7445 files changed, 444301 insertions, 83002 deletions
diff --git a/.ahub/sam/advanced.cfg b/.ahub/sam/advanced.cfg
new file mode 100644
index 000000000..4f8bf6420
--- /dev/null
+++ b/.ahub/sam/advanced.cfg
@@ -0,0 +1,2 @@
+[SamPolicy]
+preset=CCD_FOR_OO
diff --git a/.ahub/sam/exclude.txt b/.ahub/sam/exclude.txt
new file mode 100644
index 000000000..3c2b71f06
--- /dev/null
+++ b/.ahub/sam/exclude.txt
@@ -0,0 +1,52 @@
+# External code: Android NN API
+/ONE/compiler/ann-api/include/NeuralNetworks.h
+/ONE/compiler/ann-ref
+
+# Eigen
+/ONE/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
+
+# Frontend test tools that are needed for release package build
+/ONE/compiler/circlechef
+/ONE/compiler/circle-verify
+/ONE/compiler/luci/tester
+
+# Exclude IR headers which have lots of similar patterns
+# TODO remove this when refactoring is possible
+/ONE/compiler/luci/lang/include/luci/IR/Nodes
+/ONE/compiler/luci/import/include/luci/Import/Nodes
+/ONE/compiler/loco/include/loco/IR
+/ONE/compiler/tflchef/tflite/src/Op/include
+
+# Exclude interpreter kernels which have similar patterns
+/ONE/compiler/luci-interpreter/src/kernels
+/ONE/compiler/locomotiv/src/Node
+
+# Test codes
+/ONE/tests
+
+# Flatbuffers generated
+/ONE/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
+/ONE/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
+
+# External code: Android NN API
+/ONE/runtime/nnapi-header/include/NeuralNetworks.h
+/ONE/runtime/nnapi-header/include/NeuralNetworksExtensions.h
+
+# External code: Tensorflow lite
+/ONE/runtime/libs/nnapi
+/ONE/runtime/libs/profiling
+
+# External code: 3rd party
+/ONE/runtime/3rdparty
+
+# External code: compute libraries
+/ONE/compute
+
+# Experimental subprojects not for release
+/ONE/runtime/contrib
+
+# Downloaded externals
+/ONE/externals
+
+# Intermediate code for runtime build (refer nnfw.spec file's nncc_workspace)
+/ONE/build/nncc/
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml
index cd34d792f..12fbabefd 100644
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -1,37 +1,85 @@
 version: 2
 test:
-  - name: NN Runtime
+  - name: NN_Runtime
     testCaseLanguage: CPP
     testFW: GTEST
     testCaseFolder:
-      - ./compute/test/cker
-      - ./runtime/onert/core/src/backend/cpu_common
-      - ./runtime/onert/frontend/nnapi
-      - ./runtime/onert/test/core/compiler
-      - ./runtime/onert/test/core/exec
-      - ./runtime/onert/test/core/interp
-      - ./runtime/onert/test/graph
-      - ./runtime/onert/test/graph/operand
-      - ./runtime/onert/test/graph/operation
-      - ./runtime/onert/test/graph/verifier
-      - ./runtime/onert/test/ir
-      - ./runtime/onert/test/util
-      - ./tests/nnapi/src
-      - ./tests/nnfw_api/src
-      - ./tests/tools/tflite_run/src
+      - /compute/cker
+      - /runtime/libs/misc
+      - /runtime/libs/ndarray
+      - /runtime/onert
+      - /runtime/service/npud/tests
+      - /tests/nnfw_api
 
     testFile:
-      - extension: cpp
+      - extension: test.cpp
         any: true
-      - extension: cc
+      - extension: test.cc
         any: true
-
     testCase:
       - condition:
         - functionName:
             starts:
               - TEST
- 
+              - TYPED_TEST
+        - excludes :
+          - Verifier.dag_checker
+          - graph_operand_LayoutSet.layout_set_operators
+          - InterpExecutorTest.executeTwoStep
+          - InterpExecutorTest.execute
+          - InterpExecutorTest.setOutput
+          - InterpExecutorTest.create_empty
+          - InterpExecutorTest.setOutputForUnspecifiedDimensions
+          - InterpExecutorTest.setInputForUnspecifiedDimensions
+          - InterpExecutorTest.setInput
+          - InterpExecutorTest.create_simple
+          - ExecTime.structure
+          - ExecTime.roundtrip_ok
+          - SchedulerTest.branched_graph_profiling_mode
+          - SchedulerTestWithExecutorParam.straight_graph_known_exec_time
+          - SchedulerTestWithExecutorParam.branched_graph_known_exec_time
+          - TFLite_test_case.simple_test
+          - ExecInstance.simple
+          - ExecInstance.twoExecution
+          - ExecInstance.twoCompile
+          - ExecInstance.async
+          - ExecInstance.twoThreads
+          - graph_operand_usedef.usedef_test
+          - Graph.inputs_and_outputs
+          - nnfw_create_session.Test_001
+          - nnfw_create_session.Negative_001
+          - WICPlanner.claim_release_test
+          - BumpPlanner.claim_test
+          - Allocator.allocate_test
+          - FirstFitPlanner.claim_release_test
+          - graph_operation_setIO.operation_setIO_concat
+          - graph_operation_setIO.operation_setIO_conv
+          - ValidationTest.neg_prepare_001
+          - ValidationTestOneOpModelLoaded.prepare_001
+          - graph_OperandIndexSequence.replace
+          - graph_OperandIndexSequence.append
+          - MODEL.model_build
+          - graph_operation_Set.operation_test
+          - graph_operand_Set.set_test
+          - ValidationTestSessionCreated.neg_load_session_001
+          - ValidationTestSessionCreated.load_session_001
+          - ShapeInference.Pool2DNodeExplicit
+          - ShapeInference.Elementwise
+          - ShapeInference.Concat
+          - ShapeInference.Pool2DNodeSame
+          - ShapeInference.IncorrectElementwise
+          - ShapeInference.Conv2D
+          - ShapeInference.Pool2DNodeValid
+          - ShapeInference.FullyConnected
+          - ShapeInference.DepthwiseConv2D
+          - ObjectManager.non_const_iterate
+          - ObjectManager.const_iterate
+          - ObjectManager.emplace
+          - ObjectManager.remove_2
+          - ObjectManager.remove_1
+          - ObjectManager.push
+          - Index.index_test
+
     negativeTestCase:
       - condition:
         - testName:
@@ -41,3 +89,546 @@ test:
     positiveTestCase:
       - condition:
         - inverse: negativeTestCase
+
+  - name: NN_Compiler
+    testCaseLanguage: CPP
+    testFW: GTEST
+    testCaseFolder:
+      - /compiler/angkor
+      - /compiler/arser
+      - /compiler/circle-partitioner
+      - /compiler/circle-quantizer
+      - /compiler/circle-tensordump
+      - /compiler/circle2circle
+      - /compiler/circlechef
+      - /compiler/circledump
+      - /compiler/crew
+      - /compiler/cwrap
+      - /compiler/dio-hdf5
+      - /compiler/foder
+      - /compiler/hermes
+      - /compiler/hermes-std
+      - /compiler/loco
+      - /compiler/locomotiv
+      - /compiler/locop
+      - /compiler/logo
+      - /compiler/logo-core
+      - /compiler/luci
+      - /compiler/luci-interpreter
+      - /compiler/luci-eval-driver
+      - /compiler/luci-pass-value-test
+      - /compiler/luci-value-test
+      - /compiler/mio-circle05
+      - /compiler/mio-tflite
+      - /compiler/mio-tflite260
+      - /compiler/oops
+      - /compiler/pepper-assert
+      - /compiler/pepper-csv2vec
+      - /compiler/pepper-str
+      - /compiler/pepper-strcast
+      - /compiler/pp
+      - /compiler/rawdata2hdf5
+      - /compiler/record-minmax
+      - /compiler/safemain
+      - /compiler/souschef
+      - /compiler/tflchef
+      - /compiler/tflite2circle
+      - /compiler/vconone
+
+    testFile:
+      - extension: .test.cpp
+        any: true
+
+    testCase:
+      - condition:
+        - functionName:
+            starts:
+              - TEST
+        - excludes :
+              - ConstantFolding.const_relu_to_const
+              - ConstantFolding.const_relu_to_concat
+              - ADT_TENSOR_OVERLAY.access
+              - ADT_TENSOR_OVERLAY.ctor
+              - ADT_TENSOR_OVERLAY.read
+              - NodeExecution_BiasEncode.f32
+              - NodeExecution_BiasEncode.s32
+              - NodeExecution_EltwiseDiv.f32
+              - CircleLogicalOrTest.constructor_P
+              - NodeExecution_TensorConcat.f32_2
+              - NodeExecution_TensorConcat.f32
+              - CircleShapeInferenceRuleTest.avgpool2d_valid
+              - CircleShapeInferenceRuleTest.TFAdd_shapeinf_different
+              - CircleShapeInferenceRuleTest.minimal_with_CircleRelu
+              - CircleShapeInferenceRuleTest.CircleTranspose_simple
+              - CircleShapeInferenceRuleTest.avgpool2d_same
+              - CircleConv2Dest.constructor_P
+              - ADT_TENSOR_BUFFER.access
+              - ADT_TENSOR_BUFFER.ctor
+              - CircleRelu6Test.constructor_P
+              - Circle2CircleTest.NoArg_NEG
+              - CircleInstanceNormTest.constructor
+              - ADT_KERNEL_INDEX_ENUMERATOR.iterate_full_range
+              - ADT_TENSOR_INDEX_ENUMERATOR.iterate_full_range
+              - CirclePadTest.constructor_P
+              - ADT_KERNEL_KERNEL_NHWC_LAYOUT.n_increment
+              - ADT_KERNEL_KERNEL_NHWC_LAYOUT.col_increment
+              - ADT_KERNEL_KERNEL_NHWC_LAYOUT.ch_increment
+              - ADT_KERNEL_KERNEL_NHWC_LAYOUT.row_increment
+              - ADT_TENSOR_LEXICAL_LAYOUT.last
+              - ADT_TENSOR_LEXICAL_LAYOUT.lexical_first
+              - ADT_TENSOR_LEXICAL_LAYOUT.lexical_middle
+              - FeatureShapeTest.settet_and_getter
+              - FeatureShapeTest.default_constructor
+              - INDENTED_STRING_BUILDER.usage
+              - NodeExecution_Fixed_Reduce_Mean.f32_1
+              - NodeExecution_Fixed_Reduce_Mean.f32_0
+              - CircleAbsTest.constructor
+              - CircleMaximumTest.constructor_P
+              - FORMAT.simple_string
+              - FORMAT.concat_rvalue
+              - FORMAT.concat_lvalue
+              - FORMAT.simple_number
+              - ADT_KERNEL_BUFFER.ctor
+              - ADT_KERNEL_BUFFER.access
+              - ADT_TENSOR_SHAPE.num_elements_rank_0
+              - ADT_TENSOR_SHAPE.squeeze_neg_0
+              - ADT_TENSOR_SHAPE.num_elements_zero
+              - ADT_TENSOR_SHAPE.copy
+              - ADT_TENSOR_SHAPE.eq_negative_on_unmatched_dim
+              - ADT_TENSOR_SHAPE.num_elements_nulldim
+              - ADT_TENSOR_SHAPE.eq_positive
+              - ADT_TENSOR_SHAPE.squeeze_pos
+              - ADT_TENSOR_SHAPE.resize
+              - ADT_TENSOR_SHAPE.ctor_initializer_list
+              - ADT_TENSOR_SHAPE.squeeze_neg
+              - ADT_TENSOR_SHAPE.squeeze_nested
+              - ADT_TENSOR_SHAPE.num_elements_nonzero
+              - ADT_TENSOR_SHAPE.eq_negative_on_unmatched_rank
+              - ADT_TENSOR_SHAPE.dim
+              - ADT_TENSOR_SHAPE.ctor
+              - GraphBuilderTest.Usecase_000
+              - QueueTest.take
+              - MultiDialectShapeInferenceRuleTest.test1
+              - AlgorithmTest.postorder_traversal_incomplte_graph
+              - AlgorithmTest.active_nodes
+              - AlgorithmTest.postorder_traversal_visit_once
+              - AlgorithmTest.postorder_traversal
+              - CircleSquaredDifferenceTest.constructor_P
+              - NodeShapeTest.feature_shape_constructor
+              - NodeShapeTest.filter_shape_constructor
+              - NodeShapeTest.default_constructor
+              - NodeShapeTest.copy_constructible
+              - NodeShapeTest.tensor_shape_constructor
+              - NodeShapeTest.dwfilter_shape_constructor
+              - NodeShapeTest.bias_shape_constructor
+              - ADT_KERNEL_KERNEL_NCHW_LAYOUT.n_increment
+              - ADT_KERNEL_KERNEL_NCHW_LAYOUT.col_increment
+              - ADT_KERNEL_KERNEL_NCHW_LAYOUT.row_increment
+              - ADT_KERNEL_KERNEL_NCHW_LAYOUT.ch_increment
+              - CircleEqualTest.constructor_P
+              - VerifierTest.valid_error_reporter
+              - VerifierTest.valid_minimal
+              - DataTypeTraitsTest.FLOAT32
+              - NodeExecution_EltwiseSub.f32
+              - NodeExecution_FeatureCodec.s32
+              - NodeExecution_FeatureCodec.f32
+              - ADT_TENSOR_INDEX.operator_add
+              - ADT_TENSOR_INDEX.ctor_initializer_list
+              - ADT_TENSOR_INDEX.fill
+              - ADT_TENSOR_INDEX.operator_eqaul
+              - ADT_TENSOR_INDEX.resize
+              - ADT_TENSOR_INDEX.operator_add_different_size
+              - ADT_TENSOR_INDEX.at
+              - ADT_TENSOR_INDEX.ctor
+              - ADT_TENSOR_INDEX.copy
+              - ADT_KERNEL_OVERLAY.access
+              - ADT_KERNEL_OVERLAY.read
+              - ADT_KERNEL_OVERLAY.ctor
+              - BiasShapeTest.default_constructor
+              - FildesTest.destructor
+              - FildesTest.value_constructor
+              - FildesTest.move_constructor
+              - FildesTest.default_constructor
+              - CircleGatherTest.constructor
+              - LinearV1FormatterTest.node_summary_builder_composition
+              - LinearV1FormatterTest.user_defined_node_summary_builder
+              - LinearV1FormatterTest.simple
+              - SourceTest.construct
+              - SourceTest.macro
+              - CircleFullyConnectedTest.constructor
+              - ADT_FEATURE_OVERLAY.read
+              - ADT_FEATURE_OVERLAY.access
+              - ADT_FEATURE_OVERLAY.ctor
+              - ContextTest.constructor
+              - CircleDivTest.constructor_P
+              - NodeExecution_Reshape.f32
+              - MultiDialectTypeInferenceRuleTest.test1
+              - CanonicalTypeInferenceRuleTest.relu6
+              - TypeInferenceTest.framework
+              - CanonicalTypeInferenceRuleTest.tensor_broadcast
+              - CanonicalTypeInferenceRuleTest.minimal
+              - PermutingDecoderTest.feature
+              - PemutationTest.feature
+              - PermutingEncoderTest.depthwisefilter_init
+              - PermutingDecoderTest.filter
+              - PermutingEncoderTest.depthwise_filter
+              - PermutingEncoderTest.filter
+              - PermutingEncoderTest.feature_clone
+              - PermutingEncoderTest.feature
+              - PermutingDecoderTest.depthwise_filter
+              - PemutationTest.depthwise_filter
+              - PermutingDecoderTest.feature_clone
+              - PemutationTest.filter
+              - PadTest.default_constructor_2D
+              - NodeDomain.as_annotation
+              - CirclePackTest.constructor
+              - ADT_TENSOR_LAYOUT.move
+              - ADT_TENSOR_LAYOUT.ctor
+              - ADT_TENSOR_LAYOUT.copy
+              - DepthwiseFilterShapeTest.settet_and_getter
+              - DepthwiseFilterShapeTest.default_constructor
+              - CircleTypeInferenceRuleTest.minimal_with_CircleRelu
+              - GenericNodeSummaryBuilderTest.simple
+              - LogoPassTests.pass_name_over_unnamed_pass
+              - LogoPassTests.pass_name_over_named_pass
+              - CircleReluTest.constructor_P
+              - PaddingNDTest.default_constructor_ND
+              - TensorShapeTest.copy
+              - TensorShapeTest.rank
+              - TensorShapeTest.element_count
+              - TensorShapeTest.dim
+              - TensorShapeTest.rank_update
+              - TensorShapeTest.default_constructor
+              - TensorShapeTest.initializer_list_constructor
+              - DepthwiseFilterIndexTest.settet_and_getter
+              - DepthwiseFilterIndexTest.default_constructor
+              - MemoryTest.make_unique
+              - AnnotatedItemTest.annotation
+              - NodeExecution_DepthwiseFilterEncode.f32
+              - CircleBatchToSpaceNDTest.constructor
+              - WindowTest.setter_and_getter_2D
+              - WindowTest.default_constructor_2D
+              - NodeExecution_Tanh.f32
+              - MessageBufferTest.pass_constructed_message_on_descturction
+              - NodeExecution_TensorBroadcast.f32
+              - CircleSubTest.constructor_P
+              - NodeExecution_AvgPool2D.f32_1x3x3x1_calculation
+              - NodeExecution_AvgPool2D.f32_1x4x4x1_calculation
+              - NodeExecution_Conv2D.f32_multiple_channel
+              - NodeExecution_Conv2D.f32_1x5x5x1_calculation
+              - NodeExecution_Conv2D.with_padding
+              - ADT_FEATURE_HWC_LAYOUT.W_increase
+              - ADT_FEATURE_HWC_LAYOUT.C_increase
+              - ADT_FEATURE_HWC_LAYOUT.H_increase
+              - SimplifyDomainConversionPass.FilterEncode_FilterDecode_equal_perms
+              - SimplifyDomainConversionPass.FilterEncode_FilterDecode_different_perms
+              - CircleDialectTest.get_N
+              - CircleDialectTest.get_P
+              - LINEAR_DOCUMENT.line
+              - LINEAR_DOCUMENT.lines
+              - NodeExecution_Push.f32
+              - NodeExecution_Push.s32
+              - NodeExecution_DepthwiseConv2D.f32_random_valid
+              - NodeExecution_Pad.tensor_constant_pad_6_dim
+              - NodeExecution_Pad.tensor_constant_pad_1_dim
+              - NodeExecution_Pad.tensor_constant_pad_4_dim
+              - DepthwiseConv2DTest.constructor
+              - ConstGenTest.constructor_s32
+              - TransposedConv2DTest.constructor
+              - PullTest.shape
+              - MatrixDecodeTest.constructor
+              - FilterEncodeTest.constructor
+              - AvgPool2DTest.constructor
+              - Reshape_Fixed_Test.shape
+              - TensorConcatTest.constructor
+              - EltwiseSqrtTest.constructor
+              - TensorBiasAddTest.alias
+              - EltwiseSubTest.constructor
+              - TensorBroadcastTest.mapping
+              - PullTest.constructor
+              - PushTest.shape
+              - MaxPool2DTest.pad
+              - EltwiseMulTest.constructor
+              - DepthwiseFilterEncodeTest.constructor
+              - ForwardTest.constructor
+              - MaxPool2DTest.constructor
+              - TransposeTest.perm
+              - MatMulTest.constructor
+              - FeatureBiasAddTest.constructor
+              - TensorBroadcastTest.constructor
+              - FeatureEncodeTest.constructor
+              - MatrixEncodeTest.constructor
+              - ReLUTest.constructor
+              - BiasEncodeTest.constructor
+              - FilterDecodeTest.constructor
+              - EltwiseDivTest.constructor
+              - PushTest.constructor
+              - EltwiseAddTest.constructor
+              - Conv2DTest.constructor
+              - EltwiseMaxTest.constructor
+              - Reshape_Fixed_Test.constructor
+              - TransposeTest.constructor
+              - ConstGenTest.constructor
+              - FeatureBiasAddTest.alias
+              - DepthwiseFilterDecodeTest.constructor
+              - ReLU6Test.constructor
+              - FeatureDecodeTest.constructor
+              - TensorBiasAddTest.constructor
+              - NodeExecution_ReLU6.f32
+              - CircleSqrtTest.constructor_P
+              - CircleRsqrtTest.constructor
+              - LINEAR_DOCUMENT.append_empty_string
+              - LINEAR_DOCUMENT.indent
+              - LINEAR_DOCUMENT.append_multi_line_text
+              - LINEAR_DOCUMENT.append_void
+              - LINEAR_DOCUMENT.document_append
+              - LINEAR_DOCUMENT.formatted_append
+              - LINEAR_DOCUMENT.forward_append
+              - LINEAR_DOCUMENT.reverse_append
+              - NodeData.as_s32_buffer_wrapper
+              - NodeData.as_f32_buffer_wrapper
+              - ConsoleReporterTest.constructor
+              - ConsoleReporterTest.notify
+              - NodeExecution_TensorBiasAdd.f32
+              - NodeExecution_FeatureBiasAdd.f32
+              - ADT_KERNEL_SHAPE.num_elements
+              - ADT_KERNEL_SHAPE.operator_eq
+              - ADT_KERNEL_SHAPE.ctor
+              - CircleLogicalNotTest.constructor_P
+              - CircleConcatenationTest.constructor_P
+              - ModuleTest.add_more
+              - ModuleTest.consturctor
+              - ModuleTest.add
+              - ModuleTest.add_nullptr_NEG
+              - ModuleTest.graph_index_overflow_NEG
+              - CircleArgMaxTest.constructor_P
+              - CircleReshapeTest.alloc_new_shape_P
+              - CircleReshapeTest.constructor_P
+              - CircleAddTest.constructor_P
+              - CanonicalShapeInferenceRuleTest.tensor_concat
+              - CanonicalShapeInferenceRuleTest.feature_codec
+              - CanonicalShapeInferenceRuleTest.maxpool2d
+              - CanonicalShapeInferenceRuleTest.minimal
+              - CanonicalShapeInferenceRuleTest.const_gen
+              - CanonicalShapeInferenceRuleTest.depthwiseconv2d
+              - CanonicalShapeInferenceRuleTest.infer_v2
+              - CanonicalShapeInferenceRuleTest.avgpool2d
+              - CanonicalShapeInferenceRuleTest.tensor_broadcast
+              - CanonicalShapeInferenceRuleTest.transposedconv2d
+              - CanonicalShapeInferenceRuleTest.fixed_reshape
+              - CanonicalShapeInferenceRuleTest.relu
+              - CanonicalShapeInferenceRuleTest.tensor_transpose
+              - NodeExecution_Softmax.f32
+              - CircleCosTest.constructor_P
+              - HermesTest.simple_usecase
+              - CircleMaxPool2DTest.constructor_P
+              - GraphTest.graph_node_enumeration
+              - GraphTest.graph_name
+              - GraphTest.create_input
+              - NamedTest.constructor
+              - NamedTest.setter_and_getter
+              - GraphTest.create_and_destroy_node
+              - GraphTest.graph_name_nullptr_NEG
+              - DataTypedMixinTest.constructor
+              - DataTypedMixinTest.setter_and_getter
+              - GraphTest.consturctor_with_param_node
+              - TensorShapedMixinTest.setter_and_getter
+              - GraphTest.getters_over_const_instance
+              - GraphTest.create_output
+              - GraphTest.graph_inout_enumeration
+              - StrideTest.default_constructor_2D
+              - StrideTest.setter_and_getter_2D
+              - ADT_FEATURE_CHW_LAYOUT.col_increase
+              - ADT_FEATURE_CHW_LAYOUT.ch_increase
+              - ADT_FEATURE_CHW_LAYOUT.row_increase
+              - TensorIndexTest.copy
+              - TensorIndexTest.fill
+              - TensorIndexTest.at
+              - TensorIndexTest.ctor_initializer_list
+              - TensorIndexTest.resize
+              - TensorIndexTest.ctor
+              - NodeDataImpl.as_annotation
+              - MUILTI_LINE_TEXT_UTILS.operator_shift
+              - SeverityTest.fatal
+              - SeverityTest.warn
+              - SeverityTest.error
+              - SeverityTest.info
+              - SeverityTest.verbose
+              - MessageTest.ctor
+              - MessageTextTest.multiline
+              - NodeExecution_TransposedConv2D.f32
+              - ADT_FEATURE_BUFFER.ctor
+              - ADT_FEATURE_BUFFER.access
+              - UseTest.constructor
+              - UseTest.link_node
+              - NodeExecution_FilterEncode.f32
+              - NodeExecution_FilterEncode.s32
+              - CircleTransposeTest.constructor_P
+              - DimensionTest.value_constructor
+              - DimensionTest.default_constructor
+              - DimensionTest.operator_eq
+              - DimensionTest.make_unknown_dimension
+              - DimensionTest.unset
+              - DimensionTest.set
+              - TensorShapeTest.ctor_initializer_list
+              - TensorShapeTest.eq_negative_on_unmatched_dim
+              - TensorShapeTest.copy
+              - TensorShapeTest.eq_negative_on_unmatched_rank
+              - TensorShapeTest.dim
+              - TensorShapeTest.resize
+              - TensorShapeTest.eq_positive
+              - TensorShapeTest.ctor
+              - TensorFlowLiteImport.Dummy
+              - CircleTransposeConvTest.constructor_P
+              - LOCO.identity_network
+              - CanonicalDialectTest.get
+              - FeatureIndexTest.default_constructor
+              - FeatureIndexTest.settet_and_getter
+              - ADT_FEATURE_LAYOUT.move
+              - ADT_FEATURE_LAYOUT.ctor
+              - ADT_FEATURE_LAYOUT.copy
+              - CircleSoftmaxTest.constructor_P
+              - CanonicalNodeTest.mutable_visitor
+              - CanonicalNodeTest.visitor
+              - CanonicalNodeTest.visitor_with_user_default_impl
+              - NodeExecution_ReLU.f32
+              - ShapeInferenceTest.framework
+              - NodeExecution_EltwiseSqrt.f32
+              - NodeExecution_MatrixCodec.WH_f32
+              - NodeExecution_MatrixCodec.HW_s32
+              - ADT_FEATURE_SHAPE.operator_eq
+              - ADT_FEATURE_SHAPE.ctor
+              - ADT_FEATURE_SHAPE.num_elements
+              - SET.operator_diff
+              - SET.operator_eq
+              - NodeExecution_ConstGen.s32
+              - NodeExecution_ConstGen.f32
+              - CircleMulTest.constructor_P
+              - StrCastTests.safe_strcast_int
+              - NodeExecution_EltwiseMax.f32
+              - NodeExecution_Pull.check_data_ready
+              - FormattedTensorShapeTest.BracketFormat
+              - FilterShapeTest.settet_and_getter
+              - FilterShapeTest.default_constructor
+              - NodeExecution_MaxPool2D.with_padding
+              - NodeExecution_MaxPool2D.f32_1x3x3x1_calculation
+              - NodeExecution_EltwiseAdd.f32
+              - ADT_KERNEL_LAYOUT.move
+              - ADT_KERNEL_LAYOUT.ctor
+              - ADT_KERNEL_LAYOUT.copy
+              - NodeExecution_MatMul.s32_4x2_2x6
+              - NodeExecution_MatMul.f32_2x3_3x3
+              - CircleDepthwiseConv2DTest.constructor_P
+              - NodeExecution_Forward.s32
+              - NodeExecution_Forward.f32
+              - NodeExecution_EltwiseMul.f32
+              - FilterIndexTest.default_constructor
+              - FilterIndexTest.settet_and_getter
+              - DialectTest.service
+              - Session.inference_identity
+              - Session.dtor
+              - Session.session_for_subgraph
+              - Session.set_input
+              - Session.ctor_by_range
+              - Session.graph_IO_size
+              - NodeTest.constructor
+              - NodeTest.replace_with
+              - NodeTest.succs
+              - NodeTest.preds
+              - FixedArityNodeTest.constructor
+
+    negativeTestCase:
+      - condition:
+        - testName:
+            ends:
+              - _NEG
+
+    positiveTestCase:
+      - condition:
+        - inverse: negativeTestCase
+  - name: onert-micro
+    testCaseLanguage: CPP
+    testFW: GTEST
+    testCaseFolder:
+      - /onert-micro
+
+    testFile:
+      - extension: test.cpp
+        any: true
+      - extension: test.cc
+        any: true
+      - excludes :
+        - Greater.test.cpp
+        - LeakyRelu.test.cpp
+        - Dequantize.test.cpp
+        - L2Normalize.test.cpp
+        - OneHot.test.cpp
+        - BatchToSpaceND.test.cpp
+        - BatchMatMul.test.cpp
+        - SpaceToBatchND.test.cpp
+        - LocalResponseNormalization.test.cpp
+        - LessEqual.test.cpp
+        - Minimum.test.cpp
+        - Relu6.test.cpp
+        - ResizeBilinear.test.cpp
+        - SquaredDifference.test.cpp
+        - SpaceToDepth.test.cpp
+        - SVDF.test.cpp
+        - Neg.test.cpp
+        - InstanceNorm.test.cpp
+        - MirrorPad.test.cpp
+        - Quantize.test.cpp
+        - ResizeNearestNeighbor.test.cpp
+        - LogicalNot.test.cpp
+        - Elu.test.cpp
+        - If.test.cpp
+        - ReverseV2.test.cpp
+        - Equal.test.cpp
+        - FloorDiv.test.cpp
+        - Rsqrt.test.cpp
+        - L2Pool2D.test.cpp
+        - PRelu.test.cpp
+        - TransposeConv.test.cpp
+        - ArgMax.test.cpp
+        - LogicalOr.test.cpp
+        - Div.test.cpp
+        - LogicalAnd.test.cpp
+        - Square.test.cpp
+        - AveragePool2D.test.cpp
+        - Pow.test.cpp
+        - Softmax.test.cpp
+        - NotEqual.test.cpp
+        - Cast.test.cpp
+        - Floor.test.cpp
+        - Exp.test.cpp
+        - GreaterEqual.test.cpp
+        - Maximum.test.cpp
+        - Mean.test.cpp
+        - PadV2.test.cpp
+        - Squeeze.test.cpp
+        - Pad.test.cpp
+        - DepthwiseConv2D.test.cpp
+        - Sqrt.test.cpp
+        - Relu.test.cpp
+        - LogSoftmax.test.cpp
+        - DepthToSpace.test.cpp
+        - Unpack.test.cpp
+    testCase:
+      - condition:
+        - functionName:
+            starts:
+              - TEST
+              - TYPED_TEST
+        - excludes :
+          - Verifier.dag_checker
+    negativeTestCase:
+      - condition:
+        - testName:
+            ends:
+              - _NEG
+
+    positiveTestCase:
+      - condition:
+        - testName:
+            ends:
+              - _P
diff --git a/.clang-format b/.clang-format
index 7dcf11cad..9243c9a2b 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,4 +1,3 @@
----
 Language:        Cpp
 BasedOnStyle: Google
 AccessModifierOffset: -2
@@ -21,18 +20,19 @@ AlwaysBreakTemplateDeclarations: false
 BinPackArguments: true
 BinPackParameters: true
 BraceWrapping:
-  AfterClass:      true
-  AfterControlStatement: true
-  AfterEnum:       false
-  AfterFunction:   true
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     true
-  AfterUnion:      true
-  BeforeCatch:     true
-  BeforeElse:      true
-  IndentBraces:    false
-BreakBeforeBraces: Allman
+  AfterClass:             true
+  AfterControlStatement:  true
+  AfterEnum:              true
+  AfterFunction:          true
+  AfterNamespace:         true
+  AfterObjCDeclaration:   false
+  AfterStruct:            true
+  AfterUnion:             false
+  AfterExternBlock:       false
+  BeforeCatch:            true
+  BeforeElse:             true
+  IndentBraces:           false
+BreakBeforeBraces: Custom
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: false
 BreakAfterJavaFieldAnnotations: false
@@ -40,12 +40,13 @@ BreakStringLiterals: true
 ColumnLimit:     100
 CommentPragmas:  '^ IWYU pragma:'
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
+ConstructorInitializerIndentWidth: 2
+ContinuationIndentWidth: 2
 Cpp11BracedListStyle: true
 DerivePointerAlignment: false
 DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
 IncludeCategories:
   - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
     Priority:        2
@@ -75,6 +76,7 @@ PenaltyReturnTypeOnItsOwnLine: 60
 PointerAlignment: Right
 ReflowComments:  true
 SortIncludes:    false
+SortUsingDeclarations: false
 SpaceAfterCStyleCast: false
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
@@ -86,5 +88,5 @@ SpacesInCStyleCastParentheses: false
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
 Standard:        Cpp11
-TabWidth:        4
+TabWidth:        2
 UseTab:          Never
diff --git a/.gitattributes b/.gitattributes
index b8eec3df8..d106893d2 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,17 @@
-tests/nnapi/specs/* linguist-detectable=false
+# Exclude from git language statistics
+tests/nnapi/specs/** linguist-detectable=false
+res/** linguist-detectable=false
+
+# Default: text file
+# - Set End-Of-Line type
+* text eol=lf
+
+# Binary - ignore text file setting
+*.bmp binary
+*.caffemodel binary
+*.h5 binary
+*.jar binary
+*.pdf binary
+*.png binary
+*.tar.gz binary
+*.tflite binary
diff --git a/.github/workflows/check-format.yml b/.github/workflows/check-format.yml
new file mode 100644
index 000000000..dcfb8d5e8
--- /dev/null
+++ b/.github/workflows/check-format.yml
@@ -0,0 +1,64 @@
+name: Check code format
+
+on:
+  push:
+    branches:
+      - master
+      - release/*
+  pull_request:
+    branches:
+      - master
+      - release/*
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  check-format:
+    name: Check format
+    runs-on: ubuntu-20.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+
+      # C format: clang-format-8
+      # Python format: yapf==0.22.0
+      - name: Install packages
+        run: |
+          sudo apt-get install -y clang-format-8
+          python -m pip install --upgrade pip
+          pip install yapf==0.22.0
+
+      - name: Check
+        run: ./nnas format
+
+      # Upload patch file if failed
+      - name: Store archive
+        uses: actions/upload-artifact@v2
+        if: failure()
+        with:
+          name: format-patch
+          path: format.patch
+          retention-days: 3
+
+  check-copyright:
+    name: Check copyright
+    runs-on: ubuntu-20.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          # Fetch all history and branch (default: 1)
+          # Require all history to get file creation date
+          fetch-depth: 0
+
+      - name: Check copyright
+        run: ./nnfw copyright-check
diff --git a/.github/workflows/check-pr-commit.yml b/.github/workflows/check-pr-commit.yml
new file mode 100644
index 000000000..7fa84b166
--- /dev/null
+++ b/.github/workflows/check-pr-commit.yml
@@ -0,0 +1,58 @@
+name: Check PR commit
+
+on:
+  pull_request:
+    branches:
+      - master
+      - release/*
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - ready_for_review
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  check-commit-message:
+    name: Check commit message
+    runs-on: ubuntu-20.04
+    # Skip on draft, check on draft -> ready
+    if: github.event.pull_request.draft == false
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          # Checkout PR head commit
+          # Checkout Action use merge commit as default
+          ref: ${{ github.event.pull_request.head.sha }}
+          # Fetch all history and branch (default: 1)
+          fetch-depth: 0
+
+      - name: Get commit body
+        run: |
+          git log origin/${GITHUB_BASE_REF}..HEAD --format=%b > commit_msg.txt
+          sed '/^$/d' commit_msg.txt > commit_body.txt
+
+      - name: Check signed-off
+        run: |
+          # Check string starting from "Signed-off-by:"
+          count=$(cat commit_body.txt | grep 'Signed-off-by:' | wc -l)
+          if [[ ! "$count" -ge "1" ]]; then
+            exit 1
+          fi
+
+          echo "Signed-off-by is OK"
+
+      - name: Check body words
+        # Run if check_signed_off step is failed
+        if: ${{ always() }}
+        run: |
+          count=$(cat commit_body.txt | sed '/Signed-off-by:/d' | wc -w)
+          echo "Commit body word check: $count words"
+          if [[ "$count" -lt "5" ]]; then
+            exit 1
+          fi
diff --git a/.github/workflows/deploy-github-pages.yml b/.github/workflows/deploy-github-pages.yml
new file mode 100644
index 000000000..d474a2754
--- /dev/null
+++ b/.github/workflows/deploy-github-pages.yml
@@ -0,0 +1,40 @@
+name: Build and deploy github page
+
+on:
+  schedule:
+    # Every weeks
+    - cron: '30 19 * * SUN'
+  workflow_dispatch:
+    inputs:
+      publish:
+        description: 'Push to github page branch or not'
+        required: true
+        default: false
+        type: boolean
+
+jobs:
+  build_and_deploy:
+    name: 'Deploy doxygen page'
+    runs-on: 'ubuntu-latest'
+    if: github.repository_owner == 'Samsung'
+
+    steps:
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+      - name: 'Generate HTML'
+        uses: mattnotmitt/doxygen-action@v1.9
+        with:
+          doxyfile-path: 'infra/doxygen/Doxyfile'
+      - name: 'Tar artifact'
+        run: tar -zcf doxygen.tar.gz -C doxygen/html ./
+      - name: 'Generate artifact'
+        uses: actions/upload-artifact@v3
+        with:
+          name: doxygen-html
+          path: doxygen.tar.gz
+      - name: 'Deploy'
+        if: ${{ github.event_name == 'schedule' || github.event.inputs.publish == 'true' }}
+        uses: JamesIves/github-pages-deploy-action@v4
+        with:
+          folder: doxygen/html
+          branch: gh-pages
diff --git a/.github/workflows/run-onert-micro-unit-tests.yml b/.github/workflows/run-onert-micro-unit-tests.yml
new file mode 100644
index 000000000..8b27e638b
--- /dev/null
+++ b/.github/workflows/run-onert-micro-unit-tests.yml
@@ -0,0 +1,47 @@
+name: Run onert-micro Unit tests
+
+on:
+  pull_request:
+    branches:
+      - master
+      - release/*
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - ready_for_review
+    paths:
+      - 'onert-micro/**'
+      - '.github/workflows/run-onert-micro-unit-tests.yml'
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  run-onert-micro-unit-tests:
+    name: Run onert-micro Unit tests
+    runs-on: ubuntu-20.04
+    # Skip on draft, check on draft -> ready
+    if: github.event.pull_request.draft == false
+
+    steps:
+      - name: Install Arm GNU Toolchain (arm-none-eabi-gcc)
+        uses: carlosperate/arm-none-eabi-gcc-action@v1
+        with:
+          release: '12.2.Rel1' # <-- The compiler release to use
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          # Checkout PR head commit
+          # Checkout Action use merge commit as default
+          ref: ${{ github.event.pull_request.head.sha }}
+          # Fetch all history and branch (default: 1)
+          fetch-depth: 0
+      - name: Build and Run Tests
+        run: |
+          mkdir build
+          cd build
+          cmake ../infra/onert-micro/ -DENABLE_ONERT_MICRO_TEST=1 -DENABLE_TEST=1
+          make -j$(nproc) luci_interpreter_kernels_micro_test
+          ./onert-micro/eval-driver/luci-interpreter/src/kernels/luci_interpreter_kernels_micro_test
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 000000000..9b8f6243b
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+BASED_ON_STYLE = pep8
+COLUMN_LIMIT = 90
diff --git a/Makefile.template b/Makefile.template
index 1b2f564c0..7621a2f7a 100644
--- a/Makefile.template
+++ b/Makefile.template
@@ -1,3 +1,5 @@
+#!/usr/bin/make -f
+
 HOST_ARCH?=$(shell uname -m)
 TARGET_ARCH?=$(shell uname -m)
 BUILD_TYPE?=Debug
@@ -5,7 +7,6 @@ CROSS_BUILD?=0
 HOST_OS?=linux
 TARGET_OS?=linux
 COVERAGE_BUILD?=0
-BENCHMARK_ACL_BUILD?=0
 OPTIONS?=
 
 # make TARGET and TYPE to lowercase
@@ -22,9 +23,8 @@ else ifneq (,$(findstring aarch64,$(TARGET_ARCH_BASE)))
 	TARGET_ARCH_LC=aarch64
 endif
 ifneq (,$(findstring android,$(TARGET_OS)))
-	# Anndroid only allow aarch64 target-arch
+	# Android only allow aarch64 target-arch
 	TARGET_ARCH_LC=aarch64
-	TARGET_OS=android
 endif
 # Set CROSS_BUILD=1 when ROOTFS_DIR is given, and TARGET_ARCH is different to HOST_ARCH.
 ifneq ($(ROOTFS_DIR),)
@@ -38,19 +38,14 @@ ifeq ($(CROSS_BUILD),1)
 	OPTIONS+= -DCMAKE_TOOLCHAIN_FILE=$(TOOLCHAIN_FILE)
 endif
 
-ifeq ($(COVERAGE_BUILD),1)
+ifneq ($(filter create-covsuite,$(MAKECMDGOALS)),)
 	OPTIONS+= -DENABLE_COVERAGE=ON
 else
-	OPTIONS+= -DENABLE_COVERAGE=OFF
-endif
-
-ifeq ($(BENCHMARK_ACL_BUILD),1)
-	OPTIONS+= -DBUILD_BENCHMARK_ACL=1
-endif
-
-ifneq ($(EXT_HDF5_DIR),)
-  $(info Hello $(EXT_HDF5_DIR))
-	OPTIONS+= -DEXT_HDF5_DIR=$(EXT_HDF5_DIR)
+	ifeq ($(COVERAGE_BUILD),1)
+		OPTIONS+= -DENABLE_COVERAGE=ON
+	else
+		OPTIONS+= -DENABLE_COVERAGE=OFF
+	endif
 endif
 
 ifneq ($(EXTERNAL_VOLUME),)
@@ -87,90 +82,129 @@ WORKHOME=$(CURDIR)/Product
 WORKFOLDER=$(TARGET_ARCH_LC)-$(TARGET_OS).$(BUILD_TYPE_LC)
 WORKSPACE=$(WORKHOME)/$(WORKFOLDER)
 
-BUILD_FOLDER=$(WORKSPACE)/obj
 INSTALL_PATH?=$(WORKSPACE)/out
 OVERLAY_FOLDER?=$(WORKSPACE)/overlay
-BUILD_ALIAS=$(WORKHOME)/obj
 INSTALL_ALIAS=$(WORKHOME)/out
 
 TIMESTAMP_CONFIGURE=$(WORKSPACE)/CONFIGURE
 TIMESTAMP_BUILD=$(WORKSPACE)/BUILD
 TIMESTAMP_INSTALL=$(WORKSPACE)/INSTALL
 
-all: build
+###
+### Common environment variable
+###
+export NNFW_WORKSPACE=$(WORKSPACE)
+
+###
+### Common environment variable for compiler module
+###
+NNCC_FOLDER=Product/$(WORKFOLDER)/nncc
+export NNCC_WORKSPACE=$(NNCC_FOLDER)
+
+###
+### Default target
+###
+all: install
 
 ###
 ### Command (public)
 ###
+prepare-nncc: prepare_nncc_internal
+
 configure: configure_internal
 
 build: build_internal
 
-install: $(TIMESTAMP_INSTALL)
+install: install_all_internal
 
-create_package: runtime_tar_internal
+create-package: runtime_tar_internal
 
-create_acl_tar: acl_tar_internal
+create-aclpack: acl_tar_internal
+
+create-testsuite: test_suite_internal
+
+create-covsuite: coverage_suite_internal
 
 clean:
 	rm -rf $(WORKSPACE)
 
 distclean:
-	rm -rf $(WORKSPACE)
-	rm -rf externals/*.stamp
+	rm -rf Product
+	rm -rf externals
 	rm -rf tests/nnapi/src/generated/
 
+# create_package, create_acl_tar: to be removed
+create_package: runtime_tar_internal
+create_acl_tar: acl_tar_internal
+
 ###
 ### Command (internal)
 ###
-configure_internal:
-# TODO Remove setting EXT_ACL_FOLDER
-#      Construct overlay folder directly outside (with headers?)
-ifneq ($(EXT_ACL_FOLDER),)
-	mkdir -p $(OVERLAY_FOLDER)/lib
-	cp $(EXT_ACL_FOLDER)/* $(OVERLAY_FOLDER)/lib
-endif
+$(WORKSPACE):
+	mkdir -p $@
 
-	NNFW_WORKSPACE="$(WORKSPACE)" NNFW_INSTALL_PREFIX=$(INSTALL_PATH) ./nnfw configure \
+prepare_nncc_internal: $(WORKSPACE)
+ifneq ($(CROSS_BUILD),1)
+	./nncc configure -DBUILD_GTEST=OFF -DENABLE_TEST=OFF -DEXTERNALS_BUILD_THREADS=$(NPROCS) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
+		-DCMAKE_INSTALL_PREFIX=$(OVERLAY_FOLDER) \
+		-DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle06;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str"
+	./nncc build -j$(NPROCS)
+	cmake --install $(NNCC_FOLDER)
+# install angkor TensorIndex and oops InternalExn header (TODO: Remove this)
+	@mkdir -p ${OVERLAY_FOLDER}/include/nncc/core/ADT/tensor
+	@mkdir -p ${OVERLAY_FOLDER}/include/oops
+	@cp compiler/angkor/include/nncc/core/ADT/tensor/Index.h ${OVERLAY_FOLDER}/include/nncc/core/ADT/tensor
+	@cp compiler/oops/include/oops/InternalExn.h ${OVERLAY_FOLDER}/include/oops
+endif
+	@echo "Done prepare-nncc"
+
+configure_internal: $(WORKSPACE)
+ifneq ($(DEBIAN_BUILD),)
+	test -d externals || mkdir -p externals
+	find packaging/ -type f -name "*.tar.gz" | xargs -i tar xf {} -C externals
+endif
+	NNFW_INSTALL_PREFIX=$(INSTALL_PATH) ./nnfw configure \
 		-DCMAKE_BUILD_TYPE=$(BUILD_TYPE_LC) \
 		-DNNFW_OVERLAY_DIR=$(OVERLAY_FOLDER) \
-		-DACL_BUILD_THREADS=$(NPROCS) \
+		-DEXTERNALS_BUILD_THREADS=$(NPROCS) \
 		$(OPTIONS)
-	touch $(TIMESTAMP_CONFIGURE)
 
-build_internal: $(BUILD_FOLDER)
-	NNFW_WORKSPACE="$(WORKSPACE)" ./nnfw build -j $(NPROCS)
-	rm -rf $(BUILD_ALIAS)
-	ln -s $(BUILD_FOLDER) $(BUILD_ALIAS)
-	touch $(TIMESTAMP_BUILD)
+build_internal: configure_internal
+	./nnfw build -j $(NPROCS)
 
-install_internal:
-	NNFW_WORKSPACE="$(WORKSPACE)" ./nnfw install
+install_internal: build_internal
+	./nnfw install
 	rm -rf $(INSTALL_ALIAS)
 	ln -s $(INSTALL_PATH) $(INSTALL_ALIAS)
-	touch $(TIMESTAMP_INSTALL)
 
-runtime_tar_internal: $(TIMESTAMP_BUILD) install_internal
-	tar -zcf $(WORKSPACE)/nnfw-package.tar.gz -C $(INSTALL_PATH) lib
-	tar -zcf $(WORKSPACE)/nnfw-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
-	tar -zcf $(WORKSPACE)/nnfw-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
-	tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C $(INSTALL_PATH) $(shell ls $(INSTALL_PATH) -I lib -I include)
+runtime_tar_internal: build_internal install_internal
+	tar -zcf $(WORKSPACE)/onert-package.tar.gz -C $(INSTALL_PATH) lib
+	tar -zcf $(WORKSPACE)/onert-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
+	tar -zcf $(WORKSPACE)/onert-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
+	tar -zcf $(WORKSPACE)/onert-test-package.tar.gz -C $(INSTALL_PATH) $(shell ls $(INSTALL_PATH) -I lib -I include)
 
-acl_tar_internal: $(BUILD_FOLDER)
-	tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_core.so lib/libarm_compute_graph.so
+acl_tar_internal: configure_internal
+	tar -zcf $(WORKSPACE)/onert-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_core.so lib/libarm_compute_graph.so
 
-install_internal_acl:
+install_acl_internal:
 # Workaround to install acl for test (ignore error when there is no file to copy)
-	cp $(OVERLAY_FOLDER)/lib/libarm_compute* $(INSTALL_ALIAS)/lib || true
+	@cp $(OVERLAY_FOLDER)/lib/libarm_compute*.so $(INSTALL_ALIAS)/lib 2>/dev/null || true
+
+install_luci_internal:
+	@mkdir -p $(INSTALL_ALIAS)/lib/nnfw/odc
+	@cp $(OVERLAY_FOLDER)/lib/libluci*.so $(INSTALL_ALIAS)/lib/nnfw/odc 2>/dev/null || true
+	@cp $(OVERLAY_FOLDER)/lib/libloco*.so $(INSTALL_ALIAS)/lib/nnfw/odc 2>/dev/null || true
+
+install_all_internal: install_internal install_acl_internal install_luci_internal
 
-build_test_suite: install_internal install_internal_acl
+test_suite_internal: install_all_internal
 	@echo "packaging test suite"
 	@rm -rf $(INSTALL_PATH)/test-suite.tar.gz
 # TODO Divide runtime package, external library package, and test suite
 	@tar -zcf test-suite.tar.gz tests/scripts infra Product/out --dereference
 	@mv test-suite.tar.gz $(INSTALL_PATH)/.
 
-build_coverage_suite: install_internal install_internal_acl
+coverage_suite_internal: install_all_internal
 	@echo "packaging test-coverage suite"
 	@rm -rf $(INSTALL_PATH)/coverage-suite.tar.gz
 	@find Product -name "*.gcno" > include_lists.txt
@@ -178,17 +212,3 @@ build_coverage_suite: install_internal install_internal_acl
 	@tar -zcf coverage-suite.tar.gz tests/scripts infra Product/out --dereference -T include_lists.txt
 	@rm -rf include_lists.txt tests/scripts/build_path_depth.txt
 	@mv coverage-suite.tar.gz $(INSTALL_PATH)/.
-
-###
-### Timestamps
-###
-$(WORKSPACE):
-	mkdir -p $@
-
-$(BUILD_FOLDER): $(WORKSPACE) configure_internal
-
-$(TIMESTAMP_CONFIGURE): configure_internal
-
-$(TIMESTAMP_BUILD): $(TIMESTAMP_CONFIGURE) build_internal
-
-$(TIMESTAMP_INSTALL): $(TIMESTAMP_BUILD) install_internal install_internal_acl
diff --git a/README.md b/README.md
index bbeb66c82..e3ed259c7 100644
--- a/README.md
+++ b/README.md
@@ -54,8 +54,3 @@ Thank you in advance!
 
 - Please post questions, issues, or suggestions into [Issues](https://github.com/Samsung/ONE/issues). This is the best way to communicate with the developer.
 - You can also have an open discussion with community members through [gitter.im](https://gitter.im/Samsung/ONE) channel.
-
-## Hall of Fame
-
-[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/0)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/0)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/1)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/1)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/2)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/2)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/3)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/3)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/4)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/4)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/5)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/5)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/6)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/6)[![](https://sourcerer.io/fame/lemmaa/Samsung/ONE/images/7)](https://sourcerer.io/fame/lemmaa/Samsung/ONE/links/7)
-
diff --git a/compiler/.ahub/tcchecker-tca/config.yaml b/compiler/.ahub/tcchecker-tca/config.yaml
deleted file mode 100644
index ef681de1a..000000000
--- a/compiler/.ahub/tcchecker-tca/config.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-version: 2
-test:
-  - name: NN Compiler
-    testCaseLanguage: CPP
-    testFW: GTEST
-    testCaseFolder:
-      - ./angkor
-      - ./arser
-      - ./circle2circle
-      - ./circle-quantizer
-      - ./cwrap
-      - ./foder
-      - ./hermes
-      - ./hermes-std
-      - ./loco
-      - ./locomotiv
-      - ./locop
-      - ./logo
-      - ./logo-core
-      - ./luci
-      - ./luci-interpreter
-      - ./luci-value-test
-      - ./mio-circle
-      - ./mio-tflite
-      - ./oops
-      - ./pepper-assert
-      - ./pepper-str
-      - ./pepper-strcast
-      - ./pp
-      - ./record-minmax
-      - ./safemain
-      - ./souschef
-      - ./stdex
-      - ./tflite2circle
-
-    testFile:
-      - extension: .test.cpp
-        any: true
-
-    testCase:
-      - condition:
-        - functionName:
-            starts:
-              - TEST
-
-    negativeTestCase:
-      - condition:
-        - testName:
-            ends:
-              - _NEG
-
-    positiveTestCase:
-      - condition:
-        - inverse: negativeTestCase
diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt
index 7cf12f164..ef13df857 100644
--- a/compiler/CMakeLists.txt
+++ b/compiler/CMakeLists.txt
@@ -1,3 +1,21 @@
+# get CODENAME to perform per codename actions
+# set focal as default
+set(ONE_UBUNTU_CODENAME "focal")
+find_program(LSB_RELEASE_EXEC lsb_release)
+if(LSB_RELEASE_EXEC)
+  # output should be one of 'bionic', 'focal', 'jammy'
+  # others are not tested
+  execute_process(COMMAND "${LSB_RELEASE_EXEC}" --short --codename
+                  OUTPUT_VARIABLE ONE_UBUNTU_CODENAME
+                  OUTPUT_STRIP_TRAILING_WHITESPACE)
+else()
+  message(STATUS "WARNING: lsb_release not found")
+endif()
+
+if(${ONE_UBUNTU_CODENAME} STREQUAL "bionic")
+  set(ONE_UBUNTU_CODENAME_BIONIC TRUE)
+endif()
+
 # TODO Validate the argument of "requires"
 function(get_project_build_order VAR)
   # This file will describe the dependencies among projects
diff --git a/compiler/angkor/CMakeLists.txt b/compiler/angkor/CMakeLists.txt
index 44b5e9058..7f5cb88c2 100644
--- a/compiler/angkor/CMakeLists.txt
+++ b/compiler/angkor/CMakeLists.txt
@@ -5,7 +5,9 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 
 # NOTE STATIC is deliberately used here to allow clients to use 'angkor' without installation
 add_library(angkor STATIC ${HEADERS} ${SOURCES})
-set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif (NOT NNCC_LIBRARY_NO_PIC)
 set_target_properties(angkor PROPERTIES LINKER_LANGUAGE CXX)
 target_include_directories(angkor PUBLIC include)
 target_link_libraries(angkor PRIVATE nncc_common)
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h b/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h
index 93d86f56b..0af13c56a 100644
--- a/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Overlay.h
@@ -34,7 +34,7 @@ template <typename T> class Overlay final : public View<T>
 {
 public:
   explicit Overlay(const Shape &shape, const Layout &layout, T *base)
-      : View<T>{shape, layout}, _base{base}
+    : View<T>{shape, layout}, _base{base}
   {
     // DO NOTHING
   }
diff --git a/compiler/angkor/include/nncc/core/ADT/feature/Shape.h b/compiler/angkor/include/nncc/core/ADT/feature/Shape.h
index 319326308..7d086b9b7 100644
--- a/compiler/angkor/include/nncc/core/ADT/feature/Shape.h
+++ b/compiler/angkor/include/nncc/core/ADT/feature/Shape.h
@@ -35,7 +35,7 @@ class Shape
 {
 public:
   Shape(uint32_t depth, uint32_t height, uint32_t width)
-      : _depth{depth}, _height{height}, _width{width}
+    : _depth{depth}, _height{height}, _width{width}
   {
     // DO NOTHING
   }
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h b/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h
index e348a8769..0684277fa 100644
--- a/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Overlay.h
@@ -35,7 +35,7 @@ template <typename T, typename InputIt> class Overlay final : public View<T>
 {
 public:
   explicit Overlay(const Shape &shape, const Layout &layout, InputIt it)
-      : _impl{shape, layout}, _it{it}
+    : _impl{shape, layout}, _it{it}
   {
     // DO NOTHING
   }
diff --git a/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h b/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h
index d485d526b..92f90970a 100644
--- a/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h
+++ b/compiler/angkor/include/nncc/core/ADT/kernel/Shape.h
@@ -35,7 +35,7 @@ class Shape
 {
 public:
   Shape(uint32_t count, uint32_t depth, uint32_t height, uint32_t width)
-      : _count{count}, _depth{depth}, _height{height}, _width{width}
+    : _count{count}, _depth{depth}, _height{height}, _width{width}
   {
     // DO NOTHING
   }
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h b/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h
index 11ee5350c..5fa36bbc9 100644
--- a/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/Overlay.h
@@ -32,7 +32,7 @@ template <typename T> class Overlay final : public View<T>
 {
 public:
   explicit Overlay(const Shape &shape, const Layout &layout, T *base)
-      : View<T>{shape, layout}, _base{base}
+    : View<T>{shape, layout}, _base{base}
   {
     // DO NOTHING
   }
diff --git a/compiler/angkor/include/nncc/core/ADT/tensor/View.h b/compiler/angkor/include/nncc/core/ADT/tensor/View.h
index 4c9a91539..8407df3be 100644
--- a/compiler/angkor/include/nncc/core/ADT/tensor/View.h
+++ b/compiler/angkor/include/nncc/core/ADT/tensor/View.h
@@ -36,7 +36,7 @@ template <typename T> class View : public Reader<T>, public Accessor<T>
 {
 public:
   explicit View(const Shape &shape, const Layout &layout)
-      : _shape{shape}, _layout{std::move(layout)}
+    : _shape{shape}, _layout{std::move(layout)}
   {
     // DO NOTHING
   }
diff --git a/compiler/angkor/src/ADT/feature/Overlay.test.cpp b/compiler/angkor/src/ADT/feature/Overlay.test.cpp
index 8ba28bf5a..1ac62f856 100644
--- a/compiler/angkor/src/ADT/feature/Overlay.test.cpp
+++ b/compiler/angkor/src/ADT/feature/Overlay.test.cpp
@@ -30,7 +30,7 @@ TEST(ADT_FEATURE_OVERLAY, ctor)
   const Shape shape{4, 6, 3};
 
   int data[4 * 6 * 3] = {
-      0,
+    0,
   };
   auto overlay = make_overlay<int, CHWLayout>(shape, data);
 
@@ -44,7 +44,7 @@ TEST(ADT_FEATURE_OVERLAY, read)
   const Shape shape{4, 6, 3};
 
   int data[4 * 6 * 3] = {
-      0,
+    0,
   };
   const auto overlay = make_overlay<int, CHWLayout>(shape, data);
 
@@ -60,7 +60,7 @@ TEST(ADT_FEATURE_OVERLAY, access)
   const Shape shape{4, 6, 3};
 
   int data[4 * 6 * 3] = {
-      0,
+    0,
   };
   auto overlay = make_overlay<int, CHWLayout>(shape, data);
 
diff --git a/compiler/angkor/src/ADT/kernel/Overlay.test.cpp b/compiler/angkor/src/ADT/kernel/Overlay.test.cpp
index 4e9bd8dbd..7129fe242 100644
--- a/compiler/angkor/src/ADT/kernel/Overlay.test.cpp
+++ b/compiler/angkor/src/ADT/kernel/Overlay.test.cpp
@@ -30,7 +30,7 @@ TEST(ADT_KERNEL_OVERLAY, ctor)
   const Shape shape{2, 4, 6, 3};
 
   int data[2 * 4 * 6 * 3] = {
-      0,
+    0,
   };
   auto overlay = make_overlay<int, NCHWLayout>(shape, data);
 
@@ -45,7 +45,7 @@ TEST(ADT_KERNEL_OVERLAY, read)
   const Shape shape{2, 4, 6, 3};
 
   int data[2 * 4 * 6 * 3] = {
-      0,
+    0,
   };
   const auto overlay = make_overlay<int, NCHWLayout>(shape, data);
 
@@ -61,7 +61,7 @@ TEST(ADT_KERNEL_OVERLAY, access)
   const Shape shape{2, 4, 6, 3};
 
   int data[2 * 4 * 6 * 3] = {
-      0,
+    0,
   };
   auto overlay = make_overlay<int, NCHWLayout>(shape, data);
 
diff --git a/compiler/angkor/src/ADT/tensor/Overlay.test.cpp b/compiler/angkor/src/ADT/tensor/Overlay.test.cpp
index 57cd1e6f9..d5369dffc 100644
--- a/compiler/angkor/src/ADT/tensor/Overlay.test.cpp
+++ b/compiler/angkor/src/ADT/tensor/Overlay.test.cpp
@@ -31,7 +31,7 @@ TEST(ADT_TENSOR_OVERLAY, ctor)
   const Shape shape{2, 3};
 
   int data[2 * 3] = {
-      0,
+    0,
   };
   auto view = make_overlay<int, LexicalLayout>(shape, data);
 
@@ -43,7 +43,7 @@ TEST(ADT_TENSOR_OVERLAY, read)
   const Shape shape{2, 3};
 
   int data[2 * 3] = {
-      0,
+    0,
   };
   const auto view = make_overlay<int, LexicalLayout>(shape, data);
 
@@ -61,7 +61,7 @@ TEST(ADT_TENSOR_OVERLAY, access)
   const Shape shape{2, 3};
 
   int data[2 * 3] = {
-      0,
+    0,
   };
   auto view = make_overlay<int, LexicalLayout>(shape, data);
 
diff --git a/compiler/arser/CMakeLists.txt b/compiler/arser/CMakeLists.txt
index 63d19f538..b937ff6ad 100644
--- a/compiler/arser/CMakeLists.txt
+++ b/compiler/arser/CMakeLists.txt
@@ -2,14 +2,16 @@ add_library(arser INTERFACE)
 
 # It specifies INTERFACE so that future targets linked with arser library will inherit its include directory.
 # It means that a developer who want to link arser just need to add one line.
-# target_link_library(another-users-target arser) 
+# target_link_library(another-users-target arser)
 target_include_directories(arser INTERFACE include/)
+target_link_libraries(arser INTERFACE nncc_coverage)
 
 if(NOT ENABLE_TEST)
   return()
 endif(NOT ENABLE_TEST)
 
 nnas_find_package(GTest REQUIRED)
-set(TESTS "${CMAKE_CURRENT_SOURCE_DIR}/tests/arser.test.cpp")
+set(TESTS "${CMAKE_CURRENT_SOURCE_DIR}/tests/arser.test.cpp"
+          "${CMAKE_CURRENT_SOURCE_DIR}/tests/HelpMessage.test.cpp")
 GTest_AddTest(arser_test ${TESTS})
-target_include_directories(arser_test PRIVATE include)
+target_link_libraries(arser_test arser)
diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h
index 64bb557c4..43f99dc5e 100644
--- a/compiler/arser/include/arser/arser.h
+++ b/compiler/arser/include/arser/arser.h
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#ifndef __ARSER_H__
+#define __ARSER_H__
+
 #include <iostream>
 #include <sstream>
 
@@ -29,7 +32,11 @@
 
 #include <cstring>
 
-namespace
+#include <cassert>
+
+namespace arser
+{
+namespace internal
 {
 
 template <typename T> T lexical_cast(const std::string &str)
@@ -41,7 +48,7 @@ template <typename T> T lexical_cast(const std::string &str)
   return data;
 }
 
-template <> bool lexical_cast(const std::string &str)
+template <> inline bool lexical_cast(const std::string &str)
 {
   bool data = true;
   if (str == "false" || str == "False" || str == "FALSE" || str == "0")
@@ -55,7 +62,33 @@ template <> inline std::string to_string(const char *value) { return std::string
 
 template <> inline std::string to_string(const bool value) { return value ? "true" : "false"; }
 
-} // namespace
+/**
+ * @brief Returns the string with the leading dash removed.
+ *
+ * If there is no dash, it returns as it is.
+ */
+inline std::string remove_dash(const std::string &str)
+{
+  std::string ret{str};
+  auto pos = ret.find_first_not_of('-');
+  if (pos == std::string::npos)
+    return ret;
+  return ret.substr(pos);
+}
+
+/**
+ * @brief Returns the string that created by concatenating the elements of a vector with commas.
+ */
+inline std::string make_comma_concatenated(const std::vector<std::string> &vec)
+{
+  std::ostringstream oss;
+  std::copy(vec.begin(), std::prev(vec.end()), std::ostream_iterator<std::string>(oss, ", "));
+  oss << vec.back();
+  return oss.str();
+}
+
+} // namespace internal
+} // namespace arser
 
 namespace arser
 {
@@ -116,10 +149,41 @@ enum class DataType
 
 class Arser;
 
+/**
+ * Argument
+ *   ├── positional argument
+ *   └── optioanl argument  [ dash at the beginning of the string ]
+ *       ├── long option    [ two or more dashes ]
+ *       └── short option   [ one dash ]
+ *
+ * Argument has two types - positional argument, optional argument.
+ *
+ * The way to distinguish the two types is whether there is a dash('-') at the beginning of the
+ * string.
+ *
+ * And, optional argument has two types as well - long option, short option, which is distinguished
+ * by the number of dash.
+ */
 class Argument
 {
 public:
-  explicit Argument(const std::string &arg_name) : _name{arg_name} {}
+  explicit Argument(const std::string &arg_name) : _long_name{arg_name}, _names{arg_name} {}
+  explicit Argument(const std::string &short_name, const std::string &long_name)
+    : _short_name{short_name}, _long_name{long_name}, _names{short_name, long_name}
+  {
+  }
+  explicit Argument(const std::string &short_name, const std::string &long_name,
+                    const std::vector<std::string> &names)
+    : _short_name{short_name}, _long_name{long_name}, _names{names}
+  {
+    // 'names' must have 'short_name' and 'long_name'.
+    auto it = std::find(names.begin(), names.end(), short_name);
+    assert(it != names.end());
+    it = std::find(names.begin(), names.end(), long_name);
+    assert(it != names.end());
+    // for avoiding unused warning.
+    (void)it;
+  }
 
   Argument &nargs(uint32_t num)
   {
@@ -174,6 +238,18 @@ public:
     return *this;
   }
 
+  Argument &accumulated(void)
+  {
+    _is_accumulated = true;
+    return *this;
+  }
+
+  Argument &accumulated(bool value)
+  {
+    _is_accumulated = value;
+    return *this;
+  }
+
   Argument &help(std::string help_message)
   {
     _help_message = help_message;
@@ -190,7 +266,7 @@ public:
   {
     if ((_nargs <= 1 && TypeName<T>::Get() == _type) ||
         (_nargs > 1 && TypeName<std::vector<T>>::Get() == _type))
-      _values.emplace_back(::to_string(value));
+      _values.emplace_back(internal::to_string(value));
     else
     {
       throw std::runtime_error("Type mismatch. "
@@ -207,7 +283,7 @@ public:
     if ((_nargs <= 1 && TypeName<T>::Get() == _type) ||
         (_nargs > 1 && TypeName<std::vector<T>>::Get() == _type))
     {
-      _values.emplace_back(::to_string(value));
+      _values.emplace_back(internal::to_string(value));
       default_value(values...);
     }
     else
@@ -222,13 +298,19 @@ public:
   }
 
 private:
-  std::string _name;
-  std::string _type;
+  // The '_names' vector contains all of the options specified by the user.
+  // And among them, '_long_name' and '_short_name' are selected.
+  std::string _long_name;
+  std::string _short_name;
+  std::vector<std::string> _names;
+  std::string _type = "string";
   std::string _help_message;
   std::function<void(void)> _func;
   uint32_t _nargs{1};
   bool _is_required{false};
+  bool _is_accumulated{false};
   std::vector<std::string> _values;
+  std::vector<std::vector<std::string>> _accum_values;
 
   friend class Arser;
   friend std::ostream &operator<<(std::ostream &, const Arser &);
@@ -238,33 +320,115 @@ class Arser
 {
 public:
   explicit Arser(const std::string &program_description = {})
-      : _program_description{program_description}
+    : _program_description{program_description}
   {
-    add_argument("--help").help("Show help message and exit").nargs(0);
+    add_argument("-h", "--help").help("Show help message and exit").nargs(0);
   }
 
   Argument &add_argument(const std::string &arg_name)
   {
-    if (arg_name.at(0) != '-')
+    if (arg_name.at(0) != '-') /* positional */
     {
       _positional_arg_vec.emplace_back(arg_name);
       _arg_map[arg_name] = &_positional_arg_vec.back();
     }
-    else
+    else /* optional */
     {
+      // The length of optional argument name must be 2 or more.
+      // And it shouldn't be hard to recognize. e.g. '-', '--'
+      if (arg_name.size() < 2)
+      {
+        throw std::runtime_error("Too short name. The length of argument name must be 2 or more.");
+      }
+      if (arg_name == "--")
+      {
+        throw std::runtime_error(
+          "Too short name. Option name must contain at least one character other than dash.");
+      }
       _optional_arg_vec.emplace_back(arg_name);
+      _optional_arg_vec.back()._short_name = arg_name;
       _arg_map[arg_name] = &_optional_arg_vec.back();
     }
     return *_arg_map[arg_name];
   }
 
+  Argument &add_argument(const std::vector<std::string> &arg_name_vec)
+  {
+    assert(arg_name_vec.size() >= 2);
+    std::string long_opt, short_opt;
+    // find long and short option
+    for (const auto &arg_name : arg_name_vec)
+    {
+      if (arg_name.at(0) != '-')
+      {
+        throw std::runtime_error("Invalid argument. "
+                                 "Positional argument cannot have short option.");
+      }
+      assert(arg_name.size() >= 2);
+      if (long_opt.empty() && arg_name.at(0) == '-' && arg_name.at(1) == '-')
+      {
+        long_opt = arg_name;
+      }
+      if (short_opt.empty() && arg_name.at(0) == '-' && arg_name.at(1) != '-')
+      {
+        short_opt = arg_name;
+      }
+    }
+    // If one of the two is empty, fill it with the non-empty one for pretty printing.
+    if (long_opt.empty())
+    {
+      assert(not short_opt.empty());
+      long_opt = short_opt;
+    }
+    if (short_opt.empty())
+    {
+      assert(not long_opt.empty());
+      short_opt = long_opt;
+    }
+
+    _optional_arg_vec.emplace_back(short_opt, long_opt, arg_name_vec);
+    for (const auto &arg_name : arg_name_vec)
+    {
+      _arg_map[arg_name] = &_optional_arg_vec.back();
+    }
+    return _optional_arg_vec.back();
+  }
+
+  template <typename... Ts> Argument &add_argument(const std::string &arg_name, Ts... arg_names)
+  {
+    if (sizeof...(arg_names) == 0)
+    {
+      return add_argument(arg_name);
+    }
+    // sizeof...(arg_names) > 0
+    else
+    {
+      return add_argument(std::vector<std::string>{arg_name, arg_names...});
+    }
+  }
+
+  void validate_arguments(void)
+  {
+    // positional argument is always required.
+    for (const auto &arg : _positional_arg_vec)
+    {
+      if (arg._is_required)
+      {
+        throw std::runtime_error("Invalid arguments. Positional argument must always be required.");
+      }
+    }
+    // TODO accumulated arguments shouldn't be enabled to positional arguments.
+    // TODO accumulated arguments shouldn't be enabled to optional arguments whose `narg` == 0.
+  }
+
   void parse(int argc, char **argv)
   {
+    validate_arguments();
     _program_name = argv[0];
     _program_name.erase(0, _program_name.find_last_of("/\\") + 1);
     if (argc >= 2)
     {
-      if (!std::strcmp(argv[1], "--help"))
+      if (!std::strcmp(argv[1], "--help") || !std::strcmp(argv[1], "-h"))
       {
         std::cout << *this;
         std::exit(0);
@@ -274,7 +438,7 @@ public:
         for (const auto &arg : _arg_map)
         {
           const auto &func = arg.second->_func;
-          if (func && !std::strcmp(argv[1], arg.second->_name.c_str()))
+          if (func && !std::strcmp(argv[1], arg.first.c_str()))
           {
             func();
             std::exit(0);
@@ -327,6 +491,11 @@ public:
                                      "You must have missed some argument.");
           arg->second->_values.emplace_back(argv[c++]);
         }
+        // accumulate values
+        if (arg->second->_is_accumulated)
+        {
+          arg->second->_accum_values.emplace_back(arg->second->_values);
+        }
         if (arg->second->_nargs == 0)
         {
           // TODO std::boolalpha for true or false
@@ -345,6 +514,9 @@ public:
     if (arg == _arg_map.end())
       return false;
 
+    if (arg->second->_is_accumulated)
+      return arg->second->_accum_values.size() > 0 ? true : false;
+
     return arg->second->_values.size() > 0 ? true : false;
   }
 
@@ -352,16 +524,106 @@ public:
 
   template <typename T> std::vector<T> get_impl(const std::string &arg_name, std::vector<T> *);
 
+  template <typename T>
+  std::vector<std::vector<T>> get_impl(const std::string &arg_name, std::vector<std::vector<T>> *);
+
   template <typename T> T get(const std::string &arg_name);
 
+  friend std::ostream &operator<<(std::ostream &stream, const Arser &parser)
+  {
+    // print description
+    if (!parser._program_description.empty())
+    {
+      stream << "What " << parser._program_name << " does: " << parser._program_description
+             << "\n\n";
+    }
+    /*
+    ** print usage
+    */
+    auto print_usage_arg = [&](const arser::Argument &arg) {
+      stream << " ";
+      std::string arg_name = arser::internal::remove_dash(arg._long_name);
+      std::for_each(arg_name.begin(), arg_name.end(),
+                    [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+    };
+    stream << "Usage: ./" << parser._program_name << " ";
+    // required optional argument
+    for (const auto &arg : parser._optional_arg_vec)
+    {
+      if (!arg._is_required)
+        continue;
+      stream << arg._short_name;
+      print_usage_arg(arg);
+      stream << " ";
+    }
+    // rest of the optional argument
+    for (const auto &arg : parser._optional_arg_vec)
+    {
+      if (arg._is_required)
+        continue;
+      stream << "[" << arg._short_name;
+      if (arg._nargs)
+      {
+        print_usage_arg(arg);
+      }
+      stream << "]"
+             << " ";
+    }
+    // positional arguement
+    for (const auto &arg : parser._positional_arg_vec)
+    {
+      stream << arg._long_name << " ";
+    }
+    stream << "\n\n";
+    /*
+    ** print argument list and its help message
+    */
+    // get the length of the longest argument
+    size_t length_of_longest_arg = 0;
+    for (const auto &arg : parser._positional_arg_vec)
+    {
+      length_of_longest_arg = std::max(length_of_longest_arg,
+                                       arser::internal::make_comma_concatenated(arg._names).size());
+    }
+    for (const auto &arg : parser._optional_arg_vec)
+    {
+      length_of_longest_arg = std::max(length_of_longest_arg,
+                                       arser::internal::make_comma_concatenated(arg._names).size());
+    }
+
+    const size_t message_width = 60;
+    auto print_help_args = [&](const std::list<Argument> &args, const std::string &title) {
+      if (!args.empty())
+      {
+        stream << title << std::endl;
+        for (const auto &arg : args)
+        {
+          stream.width(length_of_longest_arg);
+          stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
+          for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+          {
+            if (i)
+              stream << std::string(length_of_longest_arg, ' ') << "\t";
+            stream << arg._help_message.substr(i, message_width) << std::endl;
+          }
+        }
+        std::cout << std::endl;
+      }
+    };
+    // positional argument
+    print_help_args(parser._positional_arg_vec, "[Positional argument]");
+    // optional argument
+    print_help_args(parser._optional_arg_vec, "[Optional argument]");
+
+    return stream;
+  }
+
 private:
   std::string _program_name;
   std::string _program_description;
   std::list<Argument> _positional_arg_vec;
   std::list<Argument> _optional_arg_vec;
   std::map<std::string, Argument *> _arg_map;
-
-  friend std::ostream &operator<<(std::ostream &, const Arser &);
 };
 
 template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
@@ -369,7 +631,14 @@ template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
   auto arg = _arg_map.find(arg_name);
   if (arg == _arg_map.end())
     throw std::runtime_error("Invalid argument. "
-                             "There is no argument you are looking for.");
+                             "There is no argument you are looking for: " +
+                             arg_name);
+
+  if (arg->second->_is_accumulated)
+    throw std::runtime_error(
+      "Type mismatch. "
+      "You called get using a type different from the one you specified."
+      "Accumulated argument is returned as std::vector of the specified type");
 
   if (arg->second->_type != TypeName<T>::Get())
     throw std::runtime_error("Type mismatch. "
@@ -383,7 +652,7 @@ template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
                              "You must make sure that the argument is given before accessing it. "
                              "You can do it by calling arser[\"argument\"].");
 
-  return ::lexical_cast<T>(arg->second->_values[0]);
+  return internal::lexical_cast<T>(arg->second->_values[0]);
 }
 
 template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name, std::vector<T> *)
@@ -391,7 +660,24 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name
   auto arg = _arg_map.find(arg_name);
   if (arg == _arg_map.end())
     throw std::runtime_error("Invalid argument. "
-                             "There is no argument you are looking for.");
+                             "There is no argument you are looking for: " +
+                             arg_name);
+
+  // Accumulated arguments with scalar type (e.g., STR)
+  if (arg->second->_is_accumulated)
+  {
+    if (arg->second->_type != TypeName<T>::Get())
+      throw std::runtime_error("Type mismatch. "
+                               "You called get using a type different from the one you specified.");
+
+    std::vector<T> data;
+    for (auto values : arg->second->_accum_values)
+    {
+      assert(values.size() == 1);
+      data.emplace_back(internal::lexical_cast<T>(values[0]));
+    }
+    return data;
+  }
 
   if (arg->second->_type != TypeName<std::vector<T>>::Get())
     throw std::runtime_error("Type mismatch. "
@@ -399,109 +685,71 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name
 
   std::vector<T> data;
   std::transform(arg->second->_values.begin(), arg->second->_values.end(), std::back_inserter(data),
-                 [](std::string str) -> T { return ::lexical_cast<T>(str); });
+                 [](std::string str) -> T { return internal::lexical_cast<T>(str); });
   return data;
 }
 
+// Accumulated arguments with vector type (e.g., STR_VEC)
+template <typename T>
+std::vector<std::vector<T>> Arser::get_impl(const std::string &arg_name,
+                                            std::vector<std::vector<T>> *)
+{
+  auto arg = _arg_map.find(arg_name);
+  if (arg == _arg_map.end())
+    throw std::runtime_error("Invalid argument. "
+                             "There is no argument you are looking for: " +
+                             arg_name);
+
+  if (not arg->second->_is_accumulated)
+    throw std::runtime_error("Type mismatch. "
+                             "You called get using a type different from the one you specified.");
+
+  if (arg->second->_type != TypeName<std::vector<T>>::Get())
+    throw std::runtime_error(
+      "Type mismatch. "
+      "You called get using a type different from the one you specified."
+      "Accumulated argument is returned as std::vector of the specified type");
+
+  std::vector<std::vector<T>> result;
+  for (auto values : arg->second->_accum_values)
+  {
+    std::vector<T> data;
+    std::transform(values.begin(), values.end(), std::back_inserter(data),
+                   [](std::string str) -> T { return internal::lexical_cast<T>(str); });
+    result.emplace_back(data);
+  }
+
+  return result;
+}
+
 template <typename T> T Arser::get(const std::string &arg_name)
 {
   return get_impl(arg_name, static_cast<T *>(nullptr));
 }
 
-std::ostream &operator<<(std::ostream &stream, const Arser &parser)
+class Helper
 {
-  // print description
-  if (!parser._program_description.empty())
-  {
-    stream << "What " << parser._program_name << " does: " << parser._program_description << "\n\n";
-  }
-  /*
-  ** print usage
-  */
-  stream << "Usage: ./" << parser._program_name << " ";
-  // required optional argument
-  for (const auto &arg : parser._optional_arg_vec)
-  {
-    if (!arg._is_required)
-      continue;
-    stream << arg._name << " ";
-    std::string arg_name = arg._name.substr(2);
-    std::for_each(arg_name.begin(), arg_name.end(),
-                  [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
-    stream << " ";
-  }
-  // rest of the optional argument
-  for (const auto &arg : parser._optional_arg_vec)
-  {
-    if (arg._is_required)
-      continue;
-    stream << "[" << arg._name;
-    if (arg._nargs)
-    {
-      stream << " ";
-      std::string arg_name = arg._name.substr(2);
-      std::for_each(arg_name.begin(), arg_name.end(),
-                    [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
-    }
-    stream << "]"
-           << " ";
-  }
-  // positional arguement
-  for (const auto &arg : parser._positional_arg_vec)
-  {
-    stream << arg._name << " ";
-  }
-  stream << "\n\n";
-  /*
-  ** print argument list and its help message
-  */
-  // get the length of the longest argument
-  size_t length_of_longest_arg = 0;
-  for (const auto &arg : parser._positional_arg_vec)
-  {
-    length_of_longest_arg = std::max(length_of_longest_arg, arg._name.length());
-  }
-  for (const auto &arg : parser._optional_arg_vec)
+public:
+  static void add_version(Arser &arser, const std::function<void(void)> &func)
   {
-    length_of_longest_arg = std::max(length_of_longest_arg, arg._name.length());
+    arser.add_argument("--version")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("Show version information and exit")
+      .exit_with(func);
   }
 
-  const size_t message_width = 60;
-  // positional argument
-  if (!parser._positional_arg_vec.empty())
+  static void add_verbose(Arser &arser)
   {
-    stream << "[Positional argument]" << std::endl;
-    for (const auto &arg : parser._positional_arg_vec)
-    {
-      stream.width(length_of_longest_arg);
-      stream << std::left << arg._name << "\t";
-      for (size_t i = 0; i < arg._help_message.length(); i += message_width)
-      {
-        if (i)
-          stream << std::string(length_of_longest_arg, ' ') << "\t";
-        stream << arg._help_message.substr(i, message_width) << std::endl;
-      }
-    }
-    std::cout << std::endl;
+    arser.add_argument("-V", "--verbose")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("output additional information to stdout or stderr");
   }
-  // optional argument
-  if (!parser._optional_arg_vec.empty())
-  {
-    stream << "[Optional argument]" << std::endl;
-    for (const auto &arg : parser._optional_arg_vec)
-    {
-      stream.width(length_of_longest_arg);
-      stream << std::left << arg._name << "\t";
-      for (size_t i = 0; i < arg._help_message.length(); i += message_width)
-      {
-        if (i)
-          stream << std::string(length_of_longest_arg, ' ') << "\t";
-        stream << arg._help_message.substr(i, message_width) << std::endl;
-      }
-    }
-  }
-
-  return stream;
-}
+};
 
 } // namespace arser
+
+#endif // __ARSER_H__
diff --git a/compiler/arser/tests/HelpMessage.test.cpp b/compiler/arser/tests/HelpMessage.test.cpp
new file mode 100644
index 000000000..45cf840e6
--- /dev/null
+++ b/compiler/arser/tests/HelpMessage.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "arser/arser.h"
+
+#include "Prompt.h"
+
+using namespace arser;
+
+/**
+ * [WARNING] DO NOT GIVE THE ARSER '-h' or '--help' OPTION IN BELOW TESTS.
+ *
+ * arser exits with code 0 when '-h' option is given, which forces googletest to pass.
+ */
+
+TEST(HelpMessageTest, Default)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--dummy").nargs(0).help("Dummy optional argument");
+
+  std::ostringstream oss;
+  std::string expected_out = "Usage: ./arser [-h] [--dummy] \n"
+                             "\n"
+                             "[Optional argument]\n"
+                             "-h, --help	Show help message and exit\n"
+                             "--dummy   \tDummy optional argument\n";
+
+  test::Prompt prompt("./arser --dummy");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  oss << arser;
+
+  /* assert */
+  EXPECT_EQ(expected_out, oss.str());
+}
+
+TEST(HelpMessageTest, ShortOption)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("-v", "--verbose").nargs(0).help("Provides additional details");
+
+  std::ostringstream oss;
+  std::string expected_out = "Usage: ./arser [-h] [-v] \n"
+                             "\n"
+                             "[Optional argument]\n"
+                             "-h, --help   \tShow help message and exit\n"
+                             "-v, --verbose\tProvides additional details\n";
+
+  test::Prompt prompt("./arser -v");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  oss << arser;
+
+  /* assert */
+  EXPECT_EQ(expected_out, oss.str());
+}
diff --git a/compiler/arser/tests/Prompt.h b/compiler/arser/tests/Prompt.h
new file mode 100644
index 000000000..d816f199c
--- /dev/null
+++ b/compiler/arser/tests/Prompt.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARSER_PROMPT_H__
+#define __ARSER_PROMPT_H__
+
+#include <iterator>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace arser
+{
+namespace test
+{
+
+class Prompt
+{
+public:
+  Prompt(const std::string &command)
+  {
+    std::istringstream iss(command);
+    std::vector<std::string> token(std::istream_iterator<std::string>{iss},
+                                   std::istream_iterator<std::string>());
+    _arg = std::move(token);
+    _argv.reserve(_arg.size());
+    for (const auto &t : _arg)
+    {
+      _argv.push_back(const_cast<char *>(t.data()));
+    }
+  }
+  int argc(void) const { return _argv.size(); }
+  char **argv(void) { return _argv.data(); }
+
+private:
+  std::vector<char *> _argv;
+  std::vector<std::string> _arg;
+};
+
+} // namespace test
+} // namespace arser
+
+#endif // __ARSER_PROMPT_H__
diff --git a/compiler/arser/tests/arser.test.cpp b/compiler/arser/tests/arser.test.cpp
index 28bee4238..63121b845 100644
--- a/compiler/arser/tests/arser.test.cpp
+++ b/compiler/arser/tests/arser.test.cpp
@@ -23,30 +23,9 @@
 
 #include "arser/arser.h"
 
-using namespace arser;
+#include "Prompt.h"
 
-class Prompt
-{
-public:
-  Prompt(const std::string &command)
-  {
-    std::istringstream iss(command);
-    std::vector<std::string> token(std::istream_iterator<std::string>{iss},
-                                   std::istream_iterator<std::string>());
-    _arg = std::move(token);
-    _argv.reserve(_arg.size());
-    for (const auto &t : _arg)
-    {
-      _argv.push_back(const_cast<char *>(t.data()));
-    }
-  }
-  int argc(void) const { return _argv.size(); }
-  char **argv(void) { return _argv.data(); }
-
-private:
-  std::vector<char *> _argv;
-  std::vector<std::string> _arg;
-};
+using namespace arser;
 
 TEST(BasicTest, option)
 {
@@ -54,10 +33,10 @@ TEST(BasicTest, option)
   Arser arser;
 
   arser.add_argument("--verbose")
-      .nargs(0)
-      .help("It provides additional details as to what the executable is doing");
+    .nargs(0)
+    .help("It provides additional details as to what the executable is doing");
 
-  Prompt prompt("./executable --verbose");
+  test::Prompt prompt("./executable --verbose");
   /* act */
   arser.parse(prompt.argc(), prompt.argv());
   /* assert */
@@ -71,15 +50,15 @@ TEST(BasicTest, OptionalArgument)
   Arser arser;
 
   arser.add_argument("--volume")
-      .nargs(1)
-      .type(arser::DataType::INT32)
-      .help("Set a volume as you provided.");
+    .nargs(1)
+    .type(arser::DataType::INT32)
+    .help("Set a volume as you provided.");
   arser.add_argument("--frequency")
-      .nargs(1)
-      .type(arser::DataType::FLOAT)
-      .help("Set a frequency as you provided.");
+    .nargs(1)
+    .type(arser::DataType::FLOAT)
+    .help("Set a frequency as you provided.");
 
-  Prompt prompt("./radio --volume 5 --frequency 128.5");
+  test::Prompt prompt("./radio --volume 5 --frequency 128.5");
   /* act */
   arser.parse(prompt.argc(), prompt.argv());
   /* assert */
@@ -93,17 +72,17 @@ TEST(BasicTest, OptionalArgument)
   EXPECT_THROW(arser.get<bool>("--volume"), std::runtime_error);
 }
 
-TEST(BasicTest, NonRequiredOptionalArgument)
+TEST(BasicTest, NonRequiredOptionalArgument_NEG)
 {
   /* arrange */
   Arser arser;
 
   arser.add_argument("--weight")
-      .nargs(1)
-      .type(arser::DataType::INT32)
-      .help("Set a volume as you provided.");
+    .nargs(1)
+    .type(arser::DataType::INT32)
+    .help("Set a volume as you provided.");
 
-  Prompt prompt("./radio"); // empty argument
+  test::Prompt prompt("./radio"); // empty argument
   /* act */
   arser.parse(prompt.argc(), prompt.argv());
   /* assert */
@@ -111,18 +90,18 @@ TEST(BasicTest, NonRequiredOptionalArgument)
   EXPECT_THROW(arser.get<int>("--weight"), std::runtime_error);
 }
 
-TEST(BasicTest, RequiredOptionalArgument)
+TEST(BasicTest, RequiredOptionalArgument_NEG)
 {
   /* arrange */
   Arser arser;
 
   arser.add_argument("--volume")
-      .nargs(1)
-      .type(arser::DataType::INT32)
-      .required()
-      .help("Set a volume as you provided.");
+    .nargs(1)
+    .type(arser::DataType::INT32)
+    .required()
+    .help("Set a volume as you provided.");
 
-  Prompt prompt("./radio");
+  test::Prompt prompt("./radio");
   /* act */ /* assert */
   EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error);
 }
@@ -134,7 +113,7 @@ TEST(BasicTest, OptionalMultipleArgument)
 
   arser.add_argument("--add").nargs(2).type(arser::DataType::INT32_VEC).help("Add two numbers.");
 
-  Prompt prompt("./calculator --add 3 5");
+  test::Prompt prompt("./calculator --add 3 5");
   /* act */
   arser.parse(prompt.argc(), prompt.argv());
   /* assert */
@@ -152,23 +131,23 @@ TEST(BasicTest, MultipleOptionalArgument)
   Arser arser;
 
   arser.add_argument("--input_path")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("input path of this program.")
-      .required();
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.")
+    .required();
   arser.add_argument("--output_path")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("output path of this program.")
-      .required(true);
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
   arser.add_argument("--training_data")
-      .nargs(5)
-      .type(arser::DataType::INT32_VEC)
-      .help("give traning data to this program.")
-      .required();
+    .nargs(5)
+    .type(arser::DataType::INT32_VEC)
+    .help("give traning data to this program.")
+    .required();
 
-  Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put "
-                "--training_data 2 43 234 3 334");
+  test::Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put "
+                      "--training_data 2 43 234 3 334");
   /* act */
   arser.parse(prompt.argc(), prompt.argv());
   /* assert */
@@ -191,11 +170,11 @@ TEST(BasicTest, MultipleFloatValue)
   Arser arser;
 
   arser.add_argument("--add_float")
-      .nargs(2)
-      .type(arser::DataType::FLOAT_VEC)
-      .help("Add two float numbers.");
+    .nargs(2)
+    .type(arser::DataType::FLOAT_VEC)
+    .help("Add two float numbers.");
 
-  Prompt prompt("./calculator --add_float 3.2 5.4");
+  test::Prompt prompt("./calculator --add_float 3.2 5.4");
   /* act */
   arser.parse(prompt.argc(), prompt.argv());
   /* assert */
@@ -213,11 +192,11 @@ TEST(BasicTest, MultipleStringValue)
   Arser arser;
 
   arser.add_argument("--three_color")
-      .nargs(3)
-      .type(arser::DataType::STR_VEC)
-      .help("insert your three favorite color");
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .help("insert your three favorite color");
 
-  Prompt prompt("./color_factory --three_color red blue yellow");
+  test::Prompt prompt("./color_factory --three_color red blue yellow");
   /* act */
   arser.parse(prompt.argc(), prompt.argv());
   /* assert */
@@ -241,7 +220,7 @@ TEST(BasicTest, ExitWithFunctionCall)
 
   arser.add_argument("--name").nargs(1).type(arser::DataType::STR).help("Name your hero");
 
-  Prompt prompt("./hero --history");
+  test::Prompt prompt("./hero --history");
   /* act */ /* assert */
   EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0),
               "When I was young..");
@@ -255,10 +234,10 @@ TEST(BasicTest, ExitWithFunctionCallWithBind)
   Arser arser;
 
   arser.add_argument("--version")
-      .help("Show version and exit")
-      .exit_with(std::bind(printVersion, "1.2.0"));
+    .help("Show version and exit")
+    .exit_with(std::bind(printVersion, "1.2.0"));
 
-  Prompt prompt("./arser --version");
+  test::Prompt prompt("./arser --version");
   /* act */ /* assert */
   EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0),
               "arser version : 1.2.0");
@@ -275,7 +254,7 @@ TEST(BasicTest, ExitWithFunctionCallWithLamda)
 
   arser.add_argument("OS").nargs(1).type(arser::DataType::STR).help("The OS you want to boot");
 
-  Prompt prompt("./computer --shutdown");
+  test::Prompt prompt("./computer --shutdown");
   /* act */ /* assert */
   EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0), "Good bye..");
 }
@@ -286,36 +265,36 @@ TEST(BasicTest, DefaultValue)
   Arser arser;
 
   arser.add_argument("--delivery")
-      .nargs(3)
-      .type(arser::DataType::STR_VEC)
-      .default_value("pizza", "chicken", "hamburger")
-      .help("Enter three foods that you want to deliver");
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .default_value("pizza", "chicken", "hamburger")
+    .help("Enter three foods that you want to deliver");
   arser.add_argument("--assistant")
-      .type(arser::DataType::STR)
-      .default_value("Bixby")
-      .help("Enter name of your assistant");
+    .type(arser::DataType::STR)
+    .default_value("Bixby")
+    .help("Enter name of your assistant");
   arser.add_argument("--sound")
-      .type(arser::DataType::BOOL)
-      .nargs(1)
-      .default_value(true)
-      .help("Sound on/off");
+    .type(arser::DataType::BOOL)
+    .nargs(1)
+    .default_value(true)
+    .help("Sound on/off");
   arser.add_argument("--number")
-      .type(arser::DataType::INT32_VEC)
-      .nargs(4)
-      .default_value(1, 2, 3, 4)
-      .help("Enter the number that you want to call");
+    .type(arser::DataType::INT32_VEC)
+    .nargs(4)
+    .default_value(1, 2, 3, 4)
+    .help("Enter the number that you want to call");
   arser.add_argument("--time")
-      .type(arser::DataType::INT32_VEC)
-      .nargs(3)
-      .default_value(0, 0, 0)
-      .help("Current time(H/M/S)");
+    .type(arser::DataType::INT32_VEC)
+    .nargs(3)
+    .default_value(0, 0, 0)
+    .help("Current time(H/M/S)");
   arser.add_argument("--name")
-      .type(arser::DataType::STR)
-      .nargs(1)
-      .default_value("no name")
-      .help("Enter your name");
+    .type(arser::DataType::STR)
+    .nargs(1)
+    .default_value("no name")
+    .help("Enter your name");
 
-  Prompt prompt("/phone --time 1 52 34 --name arser");
+  test::Prompt prompt("/phone --time 1 52 34 --name arser");
   /* act */
   arser.parse(prompt.argc(), prompt.argv());
   /* assert */
@@ -342,3 +321,160 @@ TEST(BasicTest, DefaultValue)
   // 1 string, 1 argument
   EXPECT_EQ("arser", arser.get<std::string>("--name"));
 }
+
+TEST(BasicTest, shortOption)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--input_path", "-i")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.")
+    .required();
+  arser.add_argument("--output_path", "-o")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
+
+  test::Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--input_path"]);
+  EXPECT_EQ("/I/am/in.put", arser.get<std::string>("--input_path"));
+  EXPECT_TRUE(arser["--output_path"]);
+  EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
+
+TEST(BasicTest, shortMultipleOption)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--input_path", "-i", "--input", "--in")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.")
+    .required();
+  arser.add_argument("--output_path", "-o")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
+
+  test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--input"]);
+  EXPECT_EQ("/I/am/in.put", arser.get<std::string>("--input"));
+  EXPECT_TRUE(arser["--output_path"]);
+  EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
+
+TEST(BasicTest, OptWithRequiredDuplicate_NEG)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--input_path", "-i", "--input", "--in")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.")
+    .required();
+  arser.add_argument("--output_path", "-o")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
+
+  test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+  /* act */ /* assert */
+  EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error);
+}
+
+TEST(BasicTest, OptWithNonRequiredDuplicate)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--input_path", "-i", "--input", "--in")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("input path of this program.");
+  /* .required() */
+  arser.add_argument("--output_path", "-o")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("output path of this program.")
+    .required(true);
+
+  test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--input"]);
+  EXPECT_EQ("/I/am/duplicate", arser.get<std::string>("--input"));
+  EXPECT_TRUE(arser["--output_path"]);
+  EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
+}
+
+TEST(BasicTest, AccumulateVectorOptions)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--specify").nargs(3).accumulated(true).type(arser::DataType::STR_VEC);
+
+  test::Prompt prompt("./driver --specify a b c --specify 1 2 3");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--specify"]);
+
+  auto specify = arser.get<std::vector<std::vector<std::string>>>("--specify");
+  auto first = specify[0];
+  EXPECT_EQ("a", first.at(0));
+  EXPECT_EQ("b", first.at(1));
+  EXPECT_EQ("c", first.at(2));
+  auto second = specify[1];
+  EXPECT_EQ("1", second.at(0));
+  EXPECT_EQ("2", second.at(1));
+  EXPECT_EQ("3", second.at(2));
+}
+
+TEST(BasicTest, AccumulateScalarOptions)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
+
+  test::Prompt prompt("./driver --specify 1 --specify 2");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--specify"]);
+
+  auto specify = arser.get<std::vector<float>>("--specify");
+  EXPECT_EQ(1, specify.at(0));
+  EXPECT_EQ(2, specify.at(1));
+}
+
+TEST(BasicTest, AccumulateScalarOptions_WrongType_NEG)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
+
+  test::Prompt prompt("./driver --specify 1 --specify 2");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--specify"]);
+
+  EXPECT_THROW(arser.get<float>("--specify"), std::runtime_error);
+}
diff --git a/compiler/bcq-tools/CMakeLists.txt b/compiler/bcq-tools/CMakeLists.txt
index fcf01de7d..475ee04f5 100644
--- a/compiler/bcq-tools/CMakeLists.txt
+++ b/compiler/bcq-tools/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(BCQ_TOOLS_FILES
-    generate_bcq_output_arrays
-    preserve_bcq_info
+    generate_bcq_metadata.py
+    generate_bcq_output_arrays.py
 )
 
 foreach(BCQ_TOOLS IN ITEMS ${BCQ_TOOLS_FILES})
diff --git a/compiler/bcq-tools/README.md b/compiler/bcq-tools/README.md
index 18b0f4826..0acd0ba00 100644
--- a/compiler/bcq-tools/README.md
+++ b/compiler/bcq-tools/README.md
@@ -2,77 +2,69 @@
 
 This directory includes some tools related with BCQ.
 
-## preserve_bcq_info
+## generate_bcq_output_arrays
 
 ### Purpose
 
-`preserve_bcq_info` is for preserving constant nodes which include BCQ information.
-When `.pb` file is converted to `.tflite` file by TFlite converter, constant nodes whose values are exactly same are removed and then linked to only one representative node.
-This makes us impossible to know what constant node should be linked to a node which we want to apply BCQ.
-One of the solutions is making all the same constant nodes different by inserting unique values and ignore the newly generated unique values when BCQ fusing is applied.
-`preserve_bcq_info` will generate and insert unique dummy values to the constant nodes whose values are same not to be removed by Tensorflow Lite converter.
-As a result, BCQ information will be preserved.
+To apply BCQ, BCQ information nodes should be designated as model output so that they are alive even after TFLite conversion is finished.
+However, there are so many nodes to designate and sometimes we cannot copy and paste all of them because the string size is too big.
+`generate_bcq_output_arrays` is for generating output_arrays, which include BCQ information nodes.
 
 ### How to use
 
 ```bash
-preserve_bcq_info \
+generate_bcq_output_arrays \
 --input_path /path/to/original_model.pb \
---output_path /path/to/preserved_model.pb
+--output_path /path/to/output_arrays.txt
 ```
 
 ### How it works
 
-If we add unique dummy value at the end of each constant nodes, all the constant nodes would be different. Following is an example.
-
 ```
-[Original Constant Nodes]
-const(value=[1, 2, 3], name='const1')
-const(value=[1, 2, 3], name='const2')
-const(value=[1, 2, 3], name='const3')
-
-[After BCQ information preserved]
+[Original BCQ information nodes]
 const(value=[1, 2, 3, -1], name='const1')
 const(value=[1, 2, 3, -2], name='const2')
 const(value=[1, 2, 3, -3], name='const3')
-```
 
-For dummy values, negative values are used instead of positive values.
-This is because positive valus may be confused with original constant node values.
-For your information, unique dummy value starts from -1 and moves to -2, -3, ..., -N, where N is the number of preserved constant nodes.
+[Generated output_arrays]
+,const1,const2,const3
+```
 
 ### Caution
 
-- Newly generated dummy values should be ignored when the constant nodes are used.
+- Generated output_arrays will be start with comma.
 
-## generate_bcq_output_arrays
+## generate_bcq_metadata
 
 ### Purpose
 
-To apply BCQ, BCQ information nodes should be designated as model output so that they are alive even after TFLite conversion is finished.
-However, there are so many nodes to designate and sometimes we cannot copy and paste all of them because the string size is too big.
-`generate_bcq_output_arrays` is for generating output_arrays, which include BCQ information nodes.
+`generate_bcq_metadata` is for appending metadata as output of a model which includes BCQ information.
+The appended metadata is used for connecting BCQ related operations and constant nodes.
 
 ### How to use
 
 ```bash
-generate_bcq_output_arrays \
+generate_bcq_metadata \
 --input_path /path/to/original_model.pb \
---output_path /path/to/output_arrays.txt
+--output_path /path/to/metadata_inserted_model.pb
+--output_arrays output1,output2,...,outputN
 ```
 
 ### How it works
 
+Metadata will be generated as following description.
 ```
-[Original BCQ information nodes]
-const(value=[1, 2, 3, -1], name='const1')
-const(value=[1, 2, 3, -2], name='const2')
-const(value=[1, 2, 3, -3], name='const3')
-
-[Generated output_arrays]
-,const1,const2,const3
+< Generated Metadata in BCQ version 1 >
+[0] Starting magic number                = {-2e9 + 27}
+[1] Version of BCQ                       = {1}
+[2] The number of original model outputs = {N | N > 0}
+[3] Bundle size                          = {7, 8}
+[4] Ending magic number                  = {2e9 - 27}
 ```
+- BCQ version 1
+    - Two magic numbers, starting and ending magic number, are used for indicating that the model includes BCQ metadata. To decrease value duplication probability, prime number is used and the value is inserted not only at the beginning but also at the end.
+    - The word **bundle** means that a set of BCQ information and BCQ applicable operation. If six BCQ information nodes are used for one operation, the six information nodes and the other one operation are packaged as **bundle**. Then, in this case, the bundle size will be 6 + 1 = 7.
 
 ### Caution
 
-- Generated output_arrays will be start with comma.
+- If there is no BCQ information in original model, any changes will be applied.
diff --git a/compiler/bcq-tools/generate_bcq_metadata.py b/compiler/bcq-tools/generate_bcq_metadata.py
new file mode 100644
index 000000000..9281647e2
--- /dev/null
+++ b/compiler/bcq-tools/generate_bcq_metadata.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import tensorflow as tf
+
+import argparse
+import os
+import sys
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+ONE_START_MAGICNUM = int(-2e9 + 27)
+ONE_END_MAGICNUM = int(2e9 - 27)
+
+
+def _get_parser():
+    """
+    Returns an ArgumentParser for generating BCQ metadata.
+    """
+    parser = argparse.ArgumentParser(
+        description=("Command line tool to generate metadata of BCQ nodes"))
+
+    # Input and output path.
+    parser.add_argument(
+        "-i",
+        "--input_path",
+        type=str,
+        help="Full filepath of the input file.",
+        required=True)
+    parser.add_argument(
+        "-o",
+        "--output_path",
+        type=str,
+        help="Full filepath of the output file.",
+        required=True)
+    parser.add_argument(
+        "-O",
+        "--output_arrays",
+        type=str,
+        help="Original model output arrays",
+        required=True)
+
+    return parser
+
+
+# This function is copied from
+# https://github.com/tensorflow/tensorflow/blob/r2.3/tensorflow/examples/label_image/label_image.py#L26
+def load_graph(model_file):
+    graph = tf.Graph()
+    graph_def = tf.compat.v1.GraphDef()
+
+    with open(model_file, "rb") as f:
+        graph_def.ParseFromString(f.read())
+    with graph.as_default():
+        tf.import_graph_def(graph_def, name="")
+
+    return graph
+
+
+def generate_metadata_header(original_graph, bcq_version, output_arrays):
+    # Generating metadata starts
+    metadata_values = np.array([ONE_START_MAGICNUM])
+
+    # Append BCQ version
+    metadata_values = np.append(metadata_values, bcq_version)
+
+    # Append original output count
+    output_cnt = output_arrays.count(',') + 1
+    metadata_values = np.append(metadata_values, output_cnt)
+
+    return metadata_values
+
+
+def generate_bcq_metadata_v1(flags):
+    """
+    BCQv1 contains following metadata.
+        - The number of each BCQ information set
+    """
+
+    is_valid = True
+    allowed_info_names = [
+        "bcqinfo_do_w_x", "bcqinfo_alpha", "bcqinfo_packed_binary_code",
+        "bcqinfo_number_of_clusters", "bcqinfo_size_of_clusters",
+        "bcqinfo_qbits_of_clusters", "bcqinfo_dequant_weight"
+    ]
+
+    original_graph = load_graph(flags.input_path)
+    original_graph_def = original_graph.as_graph_def()
+
+    prefix_infonames_dict = {}
+
+    for node in original_graph_def.node:
+        if node.op == "Const" and "/bcqinfo_" in node.name:
+            prefix_index = node.name.index("/bcqinfo_")
+            prefix = node.name[:prefix_index]
+            infoname = node.name[prefix_index + 1:]
+
+            if infoname not in allowed_info_names:
+                is_valid = False
+                break
+
+            if prefix not in prefix_infonames_dict:
+                prefix_infonames_dict[prefix] = set()
+
+            prefix_infonames_dict[prefix].add(infoname)
+
+    # All the number of BCQ information should be same
+    num_of_bcqinfo = -1
+    for key in prefix_infonames_dict:
+        infonames = prefix_infonames_dict[key]
+        if num_of_bcqinfo == -1:
+            num_of_bcqinfo = len(infonames)
+        elif num_of_bcqinfo != len(infonames):
+            is_valid = False
+
+    # The number of BCQv1 information should be 6 or 7
+    if num_of_bcqinfo != 6 and num_of_bcqinfo != 7:
+        is_valid = False
+
+    # If BCQ information is invalid, return original model
+    if is_valid == False:
+        return original_graph_def
+
+    new_graph_def = tf.compat.v1.GraphDef()
+    for node in original_graph_def.node:
+        new_node = new_graph_def.node.add()
+        new_node.CopyFrom(node)
+
+    # Generate metadata header
+    metadata_values = generate_metadata_header(original_graph, 1, flags.output_arrays)
+
+    # Append metadata of BCQv1
+    metadata_values = np.append(metadata_values, num_of_bcqinfo + 1)
+
+    # Finish generating metadata
+    metadata_values = np.append(metadata_values, ONE_END_MAGICNUM)
+
+    # Generate metadata tensor
+    metadata_tensor = tf.make_tensor_proto(metadata_values, tf.int32)
+
+    new_node = new_graph_def.node.add()
+    new_node.op = "Const"
+    new_node.name = "one_compiler/bcqinfo_one_metadata"
+    new_node.attr["dtype"].CopyFrom(
+        tf.compat.v1.AttrValue(type=tf.int32.as_datatype_enum))
+    new_node.attr["value"].tensor.CopyFrom(metadata_tensor)
+    return new_graph_def
+
+
+def determine_bcq_version(flags):
+    """
+    CAUTION : For now, BCQ has only one version and thus always returns 1 when BCQ
+    information nodes are included. If new BCQ version is introduced,
+    this function must be updated accordingly.
+
+    When BCQ information does not exist, -1 is returned.
+    """
+    bcq_version = -1
+
+    original_graph = load_graph(flags.input_path)
+    original_graph_def = original_graph.as_graph_def()
+
+    for node in original_graph_def.node:
+        if node.op == "Const" and "/bcqinfo_" in node.name:
+            bcq_version = 1
+            break
+
+    return bcq_version
+
+
+def generate_bcq_metadata(flags):
+    """
+    Basic format of metadata is as following.
+        - Magic number indicating start
+        - Version of BCQ Format
+        - The number of original outputs
+        - Metadata based on each BCQ format
+        - Magic number indicating end
+    """
+    program_version = 1
+    model_version = determine_bcq_version(flags)
+
+    if model_version == 1:
+        result_graph_def = generate_bcq_metadata_v1(flags)
+    elif model_version == -1:
+        # When there is no BCQ information, do nothing
+        result_graph_def = load_graph(flags.input_path)
+    else:
+        err_msg = "BCQ version of the model(v{}) ".format(model_version)
+        err_msg += "is higher than "
+        err_msg += "the version supported by this program(v{})".format(program_version)
+        raise SystemExit(err_msg)
+
+    tf.io.write_graph(result_graph_def, '.', flags.output_path, False)
+
+
+def main():
+    # Parse argument.
+    parser = _get_parser()
+    flags = parser.parse_known_args(args=sys.argv[1:])
+
+    # Generate a new pb file, which BCQ metadata is included.
+    generate_bcq_metadata(flags[0])
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        prog_name = os.path.basename(__file__)
+        print(f"{prog_name}: {type(e).__name__}: " + str(e))
+        sys.exit(255)
diff --git a/compiler/bcq-tools/generate_bcq_output_arrays b/compiler/bcq-tools/generate_bcq_output_arrays
deleted file mode 100644
index 48e8a9373..000000000
--- a/compiler/bcq-tools/generate_bcq_output_arrays
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python3
-
-import tensorflow as tf
-
-import argparse
-import sys
-
-
-def _get_parser():
-    """
-    Returns an ArgumentParser for generating output_arrays.
-    """
-    parser = argparse.ArgumentParser(
-        description=("Command line tool to generated output_arrays of BCQ nodes"))
-
-    # Input and output path.
-    parser.add_argument(
-        "-i",
-        "--input_path",
-        type=str,
-        help="Full filepath of the input file.",
-        required=True)
-    parser.add_argument(
-        "-o",
-        "--output_path",
-        type=str,
-        help="Full filepath of the output file.",
-        required=True)
-
-    return parser
-
-
-def load_graph(frozen_graph_filename):
-    """
-    Load graph from frozen pb file
-    """
-    with tf.compat.v1.gfile.GFile(frozen_graph_filename, "rb") as f:
-        graph_def = tf.compat.v1.GraphDef()
-        graph_def.ParseFromString(f.read())
-    with tf.Graph().as_default() as graph:
-        tf.import_graph_def(graph_def, name='')
-    return graph
-
-
-def dtype2str(dtype):
-    if dtype == "int32":
-        return "TF_INT32"
-    elif dtype == "int64":
-        return "TF_INT64"
-    elif dtype == "float32":
-        return "TF_FLOAT"
-    elif dtype == "bool":
-        return "TF_BOOL"
-    else:
-        raise Exception("Not supported dtype")
-
-
-def print_output_arrays(flags):
-    graph_model = load_graph(flags.input_path)
-    graph_model_def = graph_model.as_graph_def()
-    ops = graph_model.get_operations()
-
-    output_names = [op.outputs[0].name for op in ops 
-        if op.type == "Const" and "bcqinfo_" in op.outputs[0].name]
-
-    output_arrays = ""    
-    for output_name in output_names:
-        output_arrays += ","
-
-        colon_index = output_name.find(":")
-        if colon_index == -1:
-            output_arrays += output_name
-        else:
-            output_arrays += output_name[:colon_index]
-
-    f = open(flags.output_path, 'w')
-    f.write(output_arrays)
-    f.close()
-
-
-def main():
-    # Parse argument.
-    parser = _get_parser()
-    flags = parser.parse_known_args(args=sys.argv[1:])
-
-    print_output_arrays(flags[0])
-
-
-if __name__ == "__main__":
-    main()
diff --git a/compiler/bcq-tools/generate_bcq_output_arrays.py b/compiler/bcq-tools/generate_bcq_output_arrays.py
new file mode 100644
index 000000000..5d9fbe687
--- /dev/null
+++ b/compiler/bcq-tools/generate_bcq_output_arrays.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+import argparse
+import sys
+
+
+# This function is copied from
+# https://github.com/tensorflow/tensorflow/blob/r2.3/tensorflow/examples/label_image/label_image.py#L26
+def load_graph(model_file):
+    graph = tf.Graph()
+    graph_def = tf.compat.v1.GraphDef()
+
+    with open(model_file, "rb") as f:
+        graph_def.ParseFromString(f.read())
+    with graph.as_default():
+        tf.import_graph_def(graph_def, name="")
+
+    return graph
+
+
+def get_bcq_version(input_path):
+    """
+    If BCQ metadata exists, BCQ version is in the second element.
+    Return -1 when the metadata is not found.
+    """
+    graph = load_graph(input_path)
+    graph_def = graph.as_graph_def()
+    for node in graph_def.node:
+        if node.op == "Const" and "one_compiler/bcqinfo_one_metadata" in node.name:
+            metadata_tensor = tf.make_ndarray(node.attr["value"].tensor)
+            return metadata_tensor[1]
+    return -1
+
+
+def get_bcqinfo_output_arrays_v1(input_path, output_arrays):
+    """
+    This function generates a file which includes output arrays of BCQ v1
+    information bundles. Each bundle is consisted with one of candidate
+    operations (BCQ may be applied) and BCQ constant nodes related with
+    the operation.
+    """
+    graph = load_graph(input_path)
+    ops = graph.get_operations()
+
+    # If there is a constant node named PREFIX_1/bcqinfo_alpha,
+    # it is used for applying BCQ to constant node named PREFIX_1.
+    # Collected prefixes will be used for connecting
+    # bcqinfo nodes and user operations of prefix nodes.
+    prefix_set = set()
+    has_dequant_weight = False
+    for op in ops:
+        if op.type == "Const" and "/bcqinfo_" in op.outputs[0].name:
+            # Metadata do not have prefix
+            if "one_compiler/bcqinfo_one_metadata" in op.outputs[0].name:
+                continue
+
+            prefix_index = op.outputs[0].name.index("/bcqinfo_")
+            prefix = op.outputs[0].name[:prefix_index]
+            prefix_set.add(prefix)
+
+            # Usually, output name of op is like "outputname:0"
+            # -2 is for removing ":0"
+            infoname = op.outputs[0].name[prefix_index + 1:-2]
+            if infoname == "bcqinfo_dequant_weight":
+                has_dequant_weight = True
+
+    # the name of metadata node
+    ret_output_arrays = ['one_compiler/bcqinfo_one_metadata']
+
+    # given node from user
+    ret_output_arrays += output_arrays.split(',')
+
+    # all pairs of a constant node and related BCQ information nodes.
+    for prefix in prefix_set:
+        ret_output_arrays.append(prefix + '/bcqinfo_do_w_x')
+        ret_output_arrays.append(prefix + '/bcqinfo_alpha')
+        ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code')
+        ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters')
+        ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters')
+        ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters')
+        ret_output_arrays.append(prefix)
+        if has_dequant_weight:
+            ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight')
+
+    return ret_output_arrays
+
+
+def get_bcq_output_arrays(input_path, output_arrays):
+    """Returns BCQ output arrays that the model from input_path has"""
+    program_version = 1
+    model_version = get_bcq_version(input_path)
+
+    if model_version == 1:
+        return get_bcqinfo_output_arrays_v1(input_path, output_arrays)
+    elif model_version == -1:
+        return output_arrays.split(',')
+    else:
+        err_msg = "BCQ version of the model(v{}) ".format(model_version)
+        err_msg += "is higher than "
+        err_msg += "the version supported by this program(v{})".format(program_version)
+        raise SystemExit(err_msg)
diff --git a/compiler/bcq-tools/preserve_bcq_info b/compiler/bcq-tools/preserve_bcq_info
deleted file mode 100644
index 2ede8d4d0..000000000
--- a/compiler/bcq-tools/preserve_bcq_info
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python3
-
-import tensorflow as tf
-import numpy as np
-
-import argparse
-import sys
-
-
-def _get_parser():
-    """
-    Returns an ArgumentParser for preserving BCQ information.
-    """
-    parser = argparse.ArgumentParser(
-        description=("Command line tool to preserve BCQ information"))
-
-    # Input and output path.
-    parser.add_argument(
-        "-i",
-        "--input_path",
-        type=str,
-        help="Full filepath of the input file.",
-        required=True)
-    parser.add_argument(
-        "-o",
-        "--output_path",
-        type=str,
-        help="Full filepath of the output file.",
-        required=True)
-
-    return parser
-
-
-def load_graph(frozen_graph_filename):
-    """
-    Load graph from frozen pb file
-    """
-    with tf.compat.v1.gfile.GFile(frozen_graph_filename, "rb") as f:
-        graph_def = tf.compat.v1.GraphDef()
-        graph_def.ParseFromString(f.read())
-    with tf.Graph().as_default() as graph:
-        tf.import_graph_def(graph_def, name='')
-    return graph
-
-
-def preserve_bcq_info(flags):
-    """
-    Generate unique dummy value from -1 to -N.
-
-    We use negative values to preserve BCQ information because
-    positive values may cause some confusion with real BCQ information values.
-    """
-
-    class UniqueValueGen:
-        def __init__(self):
-            self.unique_value = -1
-
-        def gen(self):
-            val = self.unique_value
-            self.unique_value = val - 1
-            return val
-
-    unique_value = UniqueValueGen()
-
-    original_graph_model = load_graph(flags.input_path)
-    original_graph_model_def = original_graph_model.as_graph_def()
-
-    new_graph = tf.compat.v1.GraphDef()
-    substitution_dict = {}
-
-    DT_INT32 = None  # Just for copying DT_INT32 attribute value
-
-    for node in original_graph_model_def.node:
-        if node.op == "Const":
-            # Because bcqinfo_do_w_x is BOOL type, we cannot add dummy value at the end.
-            # Therefore we should convert the type to INT32 type.
-            if "/bcqinfo_do_w_x" in node.name:
-                original_tensor = tf.make_ndarray(node.attr["value"].tensor)
-                substitution_dict[node.name] = tf.make_tensor_proto(
-                    [int(original_tensor[0]), unique_value.gen()], tf.int32)
-
-            preserved_bcqinfo_list = ["/bcqinfo_number_of_clusters", "/bcqinfo_size_of_clusters", 
-                "/bcqinfo_qbits_of_clusters"]
-
-            if any(name in node.name for name in preserved_bcqinfo_list):
-                original_tensor = tf.make_ndarray(
-                    node.attr["value"].tensor)  # variable name change
-                substitution_dict[node.name] = tf.make_tensor_proto(
-                    np.append(original_tensor, unique_value.gen()), tf.int32)
-                DT_INT32 = node.attr["dtype"]
-
-    for node in original_graph_model_def.node:
-        if node.name in substitution_dict:
-            new_node = new_graph.node.add()
-            new_node.op = "Const"
-            new_node.name = node.name
-            new_node.attr["dtype"].CopyFrom(DT_INT32)
-            new_node.attr["value"].tensor.CopyFrom(substitution_dict[node.name])
-        else:
-            new_node = new_graph.node.add()
-            new_node.CopyFrom(node)
-
-    tf.io.write_graph(new_graph, '.', flags.output_path, False)
-
-
-def main():
-    # Parse argument.
-    parser = _get_parser()
-    flags = parser.parse_known_args(args=sys.argv[1:])
-
-    # Generate a new pb file, which BCQ information is preserved.
-    preserve_bcq_info(flags[0])
-
-
-if __name__ == "__main__":
-    main()
diff --git a/compiler/bino/include/bino.h b/compiler/bino/include/bino.h
index fc22d1285..bf540dffe 100644
--- a/compiler/bino/include/bino.h
+++ b/compiler/bino/include/bino.h
@@ -33,8 +33,8 @@ public:
 public:
   template <typename T>
   auto operator()(const std::pair<T, T> &p) const
-      -> decltype(std::make_pair(std::declval<Callable>()(p.first),
-                                 std::declval<Callable>()(p.second)))
+    -> decltype(std::make_pair(std::declval<Callable>()(p.first),
+                               std::declval<Callable>()(p.second)))
   {
     return std::make_pair(f(p.first), f(p.second));
   }
diff --git a/compiler/caffegen/CMakeLists.txt b/compiler/caffegen/CMakeLists.txt
index 334174dcd..b963b5294 100644
--- a/compiler/caffegen/CMakeLists.txt
+++ b/compiler/caffegen/CMakeLists.txt
@@ -7,7 +7,6 @@ endif(NOT Caffe_FOUND)
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_executable(caffegen ${SOURCES})
-target_link_libraries(caffegen stdex)
 target_link_libraries(caffegen cli)
 # NOTE "Caffe" package provides both caffe and caffeproto target
 # NOTE "caffeproto" is linked to "caffe"
diff --git a/compiler/caffegen/src/Driver.cpp b/compiler/caffegen/src/Driver.cpp
index 81b01e6f1..17e3ebb7f 100644
--- a/compiler/caffegen/src/Driver.cpp
+++ b/compiler/caffegen/src/Driver.cpp
@@ -20,12 +20,12 @@
 #include "MergeCommand.h"
 
 #include <cli/App.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <map>
 #include <string>
 
-using stdex::make_unique;
+using std::make_unique;
 
 int main(int argc, char **argv)
 {
diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt
new file mode 100644
index 000000000..d5a62301c
--- /dev/null
+++ b/compiler/circle-eval-diff/CMakeLists.txt
@@ -0,0 +1,42 @@
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-eval-diff ${DRIVER} ${SOURCES})
+target_include_directories(circle-eval-diff PRIVATE include)
+target_include_directories(circle-eval-diff PRIVATE src)
+
+target_link_libraries(circle-eval-diff arser)
+target_link_libraries(circle-eval-diff safemain)
+target_link_libraries(circle-eval-diff foder)
+target_link_libraries(circle-eval-diff loco)
+target_link_libraries(circle-eval-diff luci_import)
+target_link_libraries(circle-eval-diff luci_lang)
+target_link_libraries(circle-eval-diff luci_interpreter)
+target_link_libraries(circle-eval-diff dio_hdf5)
+target_link_libraries(circle-eval-diff vconone)
+
+install(TARGETS circle-eval-diff DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# circle-eval-diff is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources uesd for tests.
+set(TEST_SOURCES
+    "src/MetricPrinter.cpp"
+    "src/Tensor.cpp"
+    "src/InputDataLoader.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(circle_eval_diff_test PRIVATE include)
+target_include_directories(circle_eval_diff_test PRIVATE src)
+target_link_libraries(circle_eval_diff_test luci_testhelper)
+target_link_libraries(circle_eval_diff_test nncc_coverage)
+target_link_libraries(circle_eval_diff_test dio_hdf5)
+target_link_libraries(circle_eval_diff_test loco)
+target_link_libraries(circle_eval_diff_test luci_lang)
diff --git a/compiler/circle-eval-diff/README.md b/compiler/circle-eval-diff/README.md
new file mode 100644
index 000000000..a3727cc6d
--- /dev/null
+++ b/compiler/circle-eval-diff/README.md
@@ -0,0 +1,51 @@
+# circle-eval-diff
+
+_circle-eval-diff_ compares inference results of two circle models.
+
+## Use cases
+
+1. _circle-eval-diff_ can be used to evaluate reconstruction errors of quantized models.
+2. _circle-eval-diff_ can be used to verify optimization (or any kind of value-preserving conversion) is safe.
+
+## Usage
+
+Run circle-eval-diff with the following arguments.
+
+--first_input_model: first model to compare (.circle).
+
+--second_input_model: second model to compare (.circle).
+
+--first_input_data: input data for the first model (.h5, directory). Random data will be used if this argument is not given.
+
+--second_input_data: input data for the second model (.h5, directory). Random data will be used if this argument is not given.
+
+--input_data_format: input data format (h5 (default), directory).
+
+--metric: metric to compare inference results (MAE (default), etc).
+
+```
+$ ./circle-eval-diff
+  --first_input_model <first_input_model>
+  --second_input_model <second_input_model>
+  --first_input_data <first_input_data>
+  --second_input_data <second_input_data>
+  --input_data_format <data_format>
+  --metric <metric>
+```
+
+For example,
+```
+$ ./circle-eval-diff
+  --first_input_model A.circle
+  --second_input_model B.circle
+  --first_input_data A.h5
+  --second_input_data B.h5
+  --input_data_format h5
+  --metric MAE
+```
+
+It will print MAE (Mean Absolute Error) between the inference result of A.circle with A.h5 and that of B.circle with B.h5.
+
+## Note
+
+Circle models are executed by _luci-interpreter_.
diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp
new file mode 100644
index 000000000..7e63ec88c
--- /dev/null
+++ b/compiler/circle-eval-diff/driver/Driver.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleEvalDiff.h"
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+using namespace circle_eval_diff;
+
+namespace
+{
+
+std::string to_lower_case(std::string s)
+{
+  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
+  return s;
+}
+
+InputFormat to_input_format(const std::string &str)
+{
+  auto small_str = to_lower_case(str);
+  if (small_str.compare("h5") == 0)
+    return InputFormat::H5;
+
+  if (small_str.compare("directory") == 0 || small_str.compare("dir") == 0)
+    return InputFormat::DIR;
+
+  throw std::runtime_error("Unsupported input format.");
+}
+
+void print_version(void)
+{
+  std::cout << "circle-eval-diff version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+} // namespace
+
+int entry(const int argc, char **argv)
+{
+  arser::Arser arser("Compare inference results of two circle models");
+
+  arser::Helper::add_version(arser, print_version);
+
+  arser.add_argument("--first_model").required(true).help("First input model filepath");
+
+  arser.add_argument("--second_model").required(true).help("Second input model filepath");
+
+  arser.add_argument("--first_input_data")
+    .help("Input data filepath for the first model. If not given, circle-eval-diff will run with "
+          "randomly generated data");
+
+  arser.add_argument("--second_input_data")
+    .help("Input data filepath for the second model. If not given, circle-eval-diff will run with "
+          "randomly generated data");
+
+  arser.add_argument("--dump_output_with_prefix")
+    .help("Dump output to files. <prefix> should be given as an argument. "
+          "Outputs are saved in <prefix>.<data_index>.first.output<output_index> and "
+          "<prefix>.<data_index>.second.output<output_index>.");
+
+  arser.add_argument("--print_mae").nargs(0).default_value(false).help("Print Mean Absolute Error");
+
+  arser.add_argument("--print_mape")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Absolute PercentageError");
+
+  arser.add_argument("--print_mpeir")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Peak Error to Interval Ratio");
+
+  arser.add_argument("--print_top1_match")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Top-1 Match Ratio");
+
+  arser.add_argument("--print_top5_match")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Top-5 Match Ratio");
+
+  arser.add_argument("--print_mse").nargs(0).default_value(false).help("Print Mean Squared Error");
+
+  arser.add_argument("--input_data_format")
+    .default_value("h5")
+    .help("Input data format. h5/hdf5 (default) or directory");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
+    return 255;
+  }
+
+  const auto first_model_path = arser.get<std::string>("--first_model");
+  const auto second_model_path = arser.get<std::string>("--second_model");
+
+  // Default values
+  std::string first_input_data_path;
+  std::string second_input_data_path;
+  std::string metric;
+  std::string input_data_format;
+  std::string output_prefix;
+
+  if (arser["--first_input_data"])
+    first_input_data_path = arser.get<std::string>("--first_input_data");
+
+  if (arser["--second_input_data"])
+    second_input_data_path = arser.get<std::string>("--second_input_data");
+
+  if (arser["--first_input_data"] != arser["--second_input_data"])
+    throw std::runtime_error("Input data path should be given for both first_model and "
+                             "second_model, or neither must be given.");
+
+  if (arser["--dump_output_with_prefix"])
+    output_prefix = arser.get<std::string>("--dump_output_with_prefix");
+
+  // Set Metrics
+  std::vector<Metric> metrics;
+  if (arser["--print_mae"] and arser.get<bool>("--print_mae"))
+  {
+    metrics.emplace_back(Metric::MAE);
+  }
+  if (arser["--print_mape"] and arser.get<bool>("--print_mape"))
+  {
+    metrics.emplace_back(Metric::MAPE);
+  }
+  if (arser["--print_mpeir"] and arser.get<bool>("--print_mpeir"))
+  {
+    metrics.emplace_back(Metric::MPEIR);
+  }
+  if (arser["--print_top1_match"] and arser.get<bool>("--print_top1_match"))
+  {
+    metrics.emplace_back(Metric::MTOP1);
+  }
+  if (arser["--print_top5_match"] and arser.get<bool>("--print_top5_match"))
+  {
+    metrics.emplace_back(Metric::MTOP5);
+  }
+  if (arser["--print_mse"] and arser.get<bool>("--print_mse"))
+  {
+    metrics.emplace_back(Metric::MSE);
+  }
+
+  input_data_format = arser.get<std::string>("--input_data_format");
+
+  auto ctx = std::make_unique<CircleEvalDiff::Context>();
+  {
+    ctx->first_model_path = first_model_path;
+    ctx->second_model_path = second_model_path;
+    ctx->first_input_data_path = first_input_data_path;
+    ctx->second_input_data_path = second_input_data_path;
+    ctx->metric = metrics;
+    ctx->input_format = to_input_format(input_data_format);
+    ctx->output_prefix = output_prefix;
+  }
+
+  CircleEvalDiff ced(std::move(ctx));
+
+  ced.init();
+
+  ced.evalDiff();
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h
new file mode 100644
index 000000000..7894480ac
--- /dev/null
+++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_H__
+#define __CIRCLE_EVAL_DIFF_H__
+
+#include <luci/IR/Module.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include "InputDataLoader.h"
+#include "MetricPrinter.h"
+
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace circle_eval_diff
+{
+
+// Forward declaration
+class ModuleEvalDiff;
+
+enum class Metric
+{
+  Undefined, // For debugging
+  MAE,       // Mean Absolute Error
+  MAPE,      // Mean Percentage Absolute Error
+  MPEIR,     // Mean Peak Error to Interval Ratio
+  MTOP1,     // Mean Top-1 Match Ratio
+  MTOP5,     // Mean Top-5 Match Ratio
+  MSE,       // Mean Squared Error
+};
+
+class CircleEvalDiff final
+{
+public:
+  struct Context
+  {
+    std::string first_model_path;
+    std::string second_model_path;
+    std::string first_input_data_path;
+    std::string second_input_data_path;
+    std::vector<Metric> metric;
+    InputFormat input_format = InputFormat::Undefined;
+    std::string output_prefix;
+  };
+
+public:
+  CircleEvalDiff(std::unique_ptr<Context> &&ctx);
+
+  ~CircleEvalDiff();
+
+  void init();
+
+  // Evaluate two circle models for the given input data and compare the results
+  void evalDiff(void) const;
+
+private:
+  std::unique_ptr<Context> _ctx;
+  std::unique_ptr<luci::Module> _first_module;
+  std::unique_ptr<luci::Module> _second_module;
+  std::vector<std::unique_ptr<MetricPrinter>> _metrics;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_H__
diff --git a/compiler/circle-eval-diff/requires.cmake b/compiler/circle-eval-diff/requires.cmake
new file mode 100644
index 000000000..cae9b7c62
--- /dev/null
+++ b/compiler/circle-eval-diff/requires.cmake
@@ -0,0 +1,7 @@
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
+require("safemain")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
new file mode 100644
index 000000000..43e026bf6
--- /dev/null
+++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleEvalDiff.h"
+#include "InputDataLoader.h"
+#include "MetricPrinter.h"
+#include "Tensor.h"
+
+#include <foder/FileLoader.h>
+#include <luci/Importer.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+bool same_shape(const luci::CircleNode *a, const luci::CircleNode *b)
+{
+  if (a->rank() != b->rank())
+    return false;
+
+  for (uint32_t i = 0; i < a->rank(); i++)
+  {
+    if (not(a->dim(i) == b->dim(i)))
+      return false;
+  }
+
+  return true;
+}
+
+bool same_dtype(const luci::CircleNode *a, const luci::CircleNode *b)
+{
+  return a->dtype() == b->dtype();
+}
+
+std::unique_ptr<luci::Module> import(const std::string &model_path)
+{
+  // Load model from the file
+  foder::FileLoader loader{model_path};
+  std::vector<char> model_data = loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+                                 model_data.size()};
+  if (not circle::VerifyModelBuffer(verifier))
+  {
+    throw std::runtime_error("Failed to verify circle '" + model_path + "'");
+  }
+
+  auto circle_model = circle::GetModel(model_data.data());
+
+  if (not circle_model)
+    throw std::runtime_error("Failed to load '" + model_path + "'");
+
+  auto module = luci::Importer().importModule(circle_model);
+
+  if (not module)
+    throw std::runtime_error("Failed to load '" + model_path + "'");
+
+  return module;
+}
+
+const std::vector<loco::Node *> inputs_of(const luci::Module *module)
+{
+  return loco::input_nodes(module->graph());
+}
+
+const std::vector<loco::Node *> outputs_of(const luci::Module *module)
+{
+  return loco::output_nodes(module->graph());
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+void checkOutputs(const luci::Module *first, const luci::Module *second)
+{
+  const auto first_output = outputs_of(first);
+  const auto second_output = outputs_of(second);
+
+  if (first_output.size() != second_output.size())
+    throw std::runtime_error("Models have different output counts");
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    if (not same_shape(first_node, second_node))
+      throw std::runtime_error("Output shape mismatch (" + first_node->name() + ", " +
+                               second_node->name() + ")");
+
+    if (not same_dtype(first_node, second_node))
+      throw std::runtime_error("Output dtype mismatch (" + first_node->name() + ", " +
+                               second_node->name() + ")");
+  }
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+std::vector<std::shared_ptr<Tensor>> interpret(const luci::Module *module,
+                                               const InputDataLoader::Data &data)
+{
+  auto interpreter = std::make_unique<luci_interpreter::Interpreter>(module);
+
+  auto input_nodes = ::inputs_of(module);
+  auto output_nodes = ::outputs_of(module);
+
+  for (uint32_t input_idx = 0; input_idx < data.size(); input_idx++)
+  {
+    auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+    assert(input_node->index() == input_idx);
+
+    auto input_data = data.at(input_idx);
+    interpreter->writeInputTensor(input_node, input_data.buffer(), input_data.byte_size());
+  }
+
+  interpreter->interpret();
+
+  std::vector<std::shared_ptr<Tensor>> outputs;
+  for (uint32_t output_idx = 0; output_idx < output_nodes.size(); output_idx++)
+  {
+    auto output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]);
+    assert(output_node->index() == output_idx);
+
+    auto tensor = createEmptyTensor(output_node);
+    interpreter->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size());
+    outputs.emplace_back(tensor);
+  }
+
+  return outputs;
+}
+
+CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx) : _ctx(std::move(ctx))
+{
+  // DO NOTHING
+}
+
+CircleEvalDiff::~CircleEvalDiff() = default;
+
+void CircleEvalDiff::init()
+{
+  _first_module = import(_ctx->first_model_path);
+  _second_module = import(_ctx->second_model_path);
+
+  // Check modules have the same output signature (dtype/shape)
+  // Exception will be thrown if they have different signature
+  checkOutputs(_first_module.get(), _second_module.get());
+
+  // Set metric
+  std::unique_ptr<MetricPrinter> metric;
+  for (auto metric : _ctx->metric)
+  {
+    switch (metric)
+    {
+      case Metric::MAE:
+      {
+        _metrics.emplace_back(std::make_unique<MAEPrinter>());
+        break;
+      }
+      case Metric::MAPE:
+      {
+        _metrics.emplace_back(std::make_unique<MAPEPrinter>());
+        break;
+      }
+      case Metric::MPEIR:
+      {
+        _metrics.emplace_back(std::make_unique<MPEIRPrinter>());
+        break;
+      }
+      case Metric::MTOP1:
+      {
+        _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(1));
+        break;
+      }
+      case Metric::MTOP5:
+      {
+        _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(5));
+        break;
+      }
+      case Metric::MSE:
+      {
+        _metrics.emplace_back(std::make_unique<MSEPrinter>());
+        break;
+      }
+      default:
+        throw std::runtime_error("Unsupported metric.");
+    }
+    _metrics.back()->init(_first_module.get(), _second_module.get());
+  }
+}
+
+void CircleEvalDiff::evalDiff(void) const
+{
+  auto first_input_loader = circle_eval_diff::makeDataLoader(
+    _ctx->first_input_data_path, _ctx->input_format, ::inputs_of(_first_module.get()));
+  auto second_input_loader = circle_eval_diff::makeDataLoader(
+    _ctx->second_input_data_path, _ctx->input_format, ::inputs_of(_second_module.get()));
+
+  for (uint32_t data_idx = 0; data_idx < first_input_loader->size(); data_idx++)
+  {
+    std::cout << "Evaluating " << data_idx << "'th data" << std::endl;
+
+    auto first_data = first_input_loader->get(data_idx);
+    auto second_data = second_input_loader->get(data_idx);
+
+    auto first_output = interpret(_first_module.get(), first_data);
+    auto second_output = interpret(_second_module.get(), second_data);
+
+    for (auto &metric : _metrics)
+    {
+      metric->accumulate(first_output, second_output);
+    }
+
+    if (_ctx.get()->output_prefix.empty())
+      continue;
+
+    for (uint32_t i = 0; i < first_output.size(); i++)
+    {
+      auto out = first_output[i];
+      writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) + ".first.output" +
+                        std::to_string(i),
+                      (char *)(out->buffer()), out->byte_size());
+    }
+    for (uint32_t i = 0; i < second_output.size(); i++)
+    {
+      auto out = second_output[i];
+      writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) +
+                        ".second.output" + std::to_string(i),
+                      (char *)(out->buffer()), out->byte_size());
+    }
+  }
+
+  for (auto &metric : _metrics)
+  {
+    std::cout << metric.get() << std::endl;
+  }
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.cpp b/compiler/circle-eval-diff/src/InputDataLoader.cpp
new file mode 100644
index 000000000..7b491a37a
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.cpp
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InputDataLoader.h"
+
+#include <dio_hdf5/HDF5Importer.h>
+#include <loco/IR/Graph.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <cstring>
+#include <dirent.h>
+#include <fstream>
+#include <vector>
+
+using DataType = loco::DataType;
+using Shape = std::vector<loco::Dimension>;
+
+namespace circle_eval_diff
+{
+
+// Check the type and the shape of CircleInput
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+  // Type check
+  if (dtype != input_node->dtype())
+    throw std::runtime_error("Wrong input type.");
+
+  if (shape.size() != input_node->rank())
+    throw std::runtime_error("Input rank mismatch.");
+
+  for (uint32_t i = 0; i < shape.size(); i++)
+  {
+    if (not(shape.at(i) == input_node->dim(i)))
+      throw std::runtime_error("Input shape mismatch.");
+  }
+}
+
+std::vector<size_t> getEachByteSizeOf(const std::vector<loco::Node *> &nodes)
+{
+  std::vector<size_t> vec;
+
+  for (const auto node : nodes)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+    const auto dtype_size = loco::size(input_node->dtype());
+    size_t element_size = 1;
+
+    for (uint32_t index = 0; index < input_node->rank(); index++)
+    {
+      element_size *= input_node->dim(index).value();
+    }
+
+    vec.push_back(element_size * dtype_size);
+  }
+
+  return vec;
+}
+
+size_t getTotalByteSizeOf(const std::vector<loco::Node *> &nodes)
+{
+  size_t total_byte_size = 0;
+
+  for (const auto node : nodes)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+    size_t byte_size = loco::size(input_node->dtype());
+
+    for (uint32_t index = 0; index < input_node->rank(); index++)
+    {
+      byte_size *= input_node->dim(index).value();
+    }
+
+    total_byte_size += byte_size;
+  }
+
+  return total_byte_size;
+}
+
+} // namespace circle_eval_diff
+
+namespace circle_eval_diff
+{
+
+HDF5Loader::HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes)
+  : _input_nodes{input_nodes}
+{
+  try
+  {
+    using HDF5Importer = dio::hdf5::HDF5Importer;
+
+    _hdf5 = std::make_unique<HDF5Importer>(file_path);
+    _hdf5->importGroup("value");
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
+  }
+}
+
+uint32_t HDF5Loader::size(void) const { return _hdf5->numData(); }
+
+InputDataLoader::Data HDF5Loader::get(uint32_t data_idx) const
+{
+  Data data;
+  data.resize(_input_nodes.size());
+
+  for (uint32_t input_idx = 0; input_idx < _input_nodes.size(); input_idx++)
+  {
+    auto input_node = loco::must_cast<luci::CircleInput *>(_input_nodes.at(input_idx));
+    assert(input_node->index() == input_idx);
+
+    data.at(input_idx) = *createEmptyTensor(input_node).get();
+
+    auto input_buffer = data.at(input_idx).buffer();
+    const auto input_buffer_bytes = data.at(input_idx).byte_size();
+
+    try
+    {
+      if (_hdf5->isRawData())
+      {
+        _hdf5->readTensor(data_idx, input_idx, input_buffer, input_buffer_bytes);
+      }
+      else
+      {
+        DataType dtype;
+        Shape shape;
+        _hdf5->readTensor(data_idx, input_idx, &dtype, &shape, input_buffer, input_buffer_bytes);
+
+        // Check the type and the shape of the input data is valid
+        verifyTypeShape(input_node, dtype, shape);
+      }
+    }
+    catch (const H5::Exception &e)
+    {
+      H5::Exception::printErrorStack();
+      throw std::runtime_error("HDF5 error occurred.");
+    }
+  }
+
+  return data;
+}
+
+DirectoryLoader::DirectoryLoader(const std::string &dir_path,
+                                 const std::vector<loco::Node *> &input_nodes)
+  : _input_nodes{input_nodes}
+{
+  DIR *dir = opendir(dir_path.c_str());
+  if (not dir)
+  {
+    throw std::runtime_error("Cannot open directory \"" + dir_path + "\".");
+  }
+
+  struct dirent *entry = nullptr;
+  const auto input_total_bytes = getTotalByteSizeOf(input_nodes);
+  while ((entry = readdir(dir)))
+  {
+    // Skip if the entry is not a regular file
+    if (entry->d_type != DT_REG)
+      continue;
+
+    _data_paths.push_back(dir_path + "/" + entry->d_name);
+  }
+
+  closedir(dir);
+}
+
+uint32_t DirectoryLoader::size(void) const { return _data_paths.size(); }
+
+InputDataLoader::Data DirectoryLoader::get(uint32_t data_idx) const
+{
+  // Read raw data
+  const auto input_total_bytes = getTotalByteSizeOf(_input_nodes);
+  std::vector<char> input_data(input_total_bytes);
+  const auto raw_data_path = _data_paths.at(data_idx);
+  std::ifstream fs(raw_data_path, std::ifstream::binary);
+
+  if (fs.fail())
+  {
+    throw std::runtime_error("Cannot open file \"" + raw_data_path + "\".");
+  }
+  if (fs.read(input_data.data(), input_total_bytes).fail())
+  {
+    throw std::runtime_error("Failed to read raw data from file \"" + raw_data_path + "\".");
+  }
+
+  // Make Tensor from raw data
+  auto input_data_cur = input_data.data();
+
+  Data data;
+  data.resize(_input_nodes.size());
+  std::vector<size_t> input_bytes = getEachByteSizeOf(_input_nodes);
+  for (uint32_t index = 0; index < _input_nodes.size(); index++)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(_input_nodes.at(index));
+    auto &tensor = data.at(index);
+    tensor = *createEmptyTensor(input_node).get();
+    auto buffer = tensor.buffer();
+    std::memcpy(buffer, input_data_cur, input_bytes.at(index));
+    input_data_cur += input_bytes.at(index);
+  }
+
+  return data;
+}
+
+std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path,
+                                                const InputFormat &format,
+                                                const std::vector<loco::Node *> &input_nodes)
+{
+  switch (format)
+  {
+    case InputFormat::H5:
+    {
+      return std::make_unique<HDF5Loader>(file_path, input_nodes);
+    }
+    case InputFormat::DIR:
+    {
+      return std::make_unique<DirectoryLoader>(file_path, input_nodes);
+    }
+    default:
+      throw std::runtime_error{"Unsupported input format."};
+  }
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.h b/compiler/circle-eval-diff/src/InputDataLoader.h
new file mode 100644
index 000000000..14921b239
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
+#define __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
+
+#include <dio_hdf5/HDF5Importer.h>
+#include <loco/IR/Node.h>
+#include <luci/IR/CircleNodes.h>
+
+#include "Tensor.h"
+
+#include <memory>
+#include <string>
+
+namespace circle_eval_diff
+{
+
+void verifyTypeShape(const luci::CircleInput *input_node, const loco::DataType &dtype,
+                     const std::vector<loco::Dimension> &shape);
+
+} // namespace circle_eval_diff
+
+namespace circle_eval_diff
+{
+
+enum class InputFormat
+{
+  Undefined, // For debugging
+  H5,
+  DIR, // directory
+  // TODO Implement Random, Directory
+};
+
+class InputDataLoader
+{
+public:
+  using Data = std::vector<Tensor>;
+
+public:
+  virtual ~InputDataLoader() = default;
+
+public:
+  virtual uint32_t size(void) const = 0;
+
+public:
+  virtual Data get(uint32_t data_idx) const = 0;
+};
+
+class HDF5Loader final : public InputDataLoader
+{
+public:
+  HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes);
+
+public:
+  uint32_t size(void) const final;
+  Data get(uint32_t data_idx) const final;
+
+private:
+  const std::vector<loco::Node *> _input_nodes;
+  std::unique_ptr<dio::hdf5::HDF5Importer> _hdf5;
+};
+
+// This class loads the directory that has raw data binary files.
+class DirectoryLoader final : public InputDataLoader
+{
+public:
+  DirectoryLoader(const std::string &dir_path, const std::vector<loco::Node *> &input_nodes);
+
+public:
+  uint32_t size(void) const final;
+  Data get(uint32_t data_idx) const final;
+
+private:
+  const std::vector<loco::Node *> _input_nodes;
+  std::vector<std::string> _data_paths;
+};
+
+std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path,
+                                                const InputFormat &format,
+                                                const std::vector<loco::Node *> &input_nodes);
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.test.cpp b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp
new file mode 100644
index 000000000..cbe78797b
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include "InputDataLoader.h"
+
+using namespace circle_eval_diff;
+
+TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(3);
+  input.dim(2).set(3);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  std::vector<loco::Dimension> right_shape;
+  right_shape.emplace_back(1);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(2);
+
+  EXPECT_NO_THROW(verifyTypeShape(&input, right_data_type, right_shape));
+}
+
+TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest_NEG)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(4);
+  input.dim(2).set(4);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  loco::DataType wrong_data_type{loco::DataType::FLOAT16};
+  std::vector<loco::Dimension> wrong_shape;
+  wrong_shape.emplace_back(1);
+  wrong_shape.emplace_back(3);
+  wrong_shape.emplace_back(3);
+  wrong_shape.emplace_back(2);
+
+  EXPECT_ANY_THROW(verifyTypeShape(&input, right_data_type, wrong_shape));
+  EXPECT_ANY_THROW(verifyTypeShape(&input, wrong_data_type, wrong_shape));
+}
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp
new file mode 100644
index 000000000..ec8408471
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp
@@ -0,0 +1,662 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetricPrinter.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <limits>
+#include <iostream>
+#include <cassert>
+
+using Tensor = circle_eval_diff::Tensor;
+
+#define THROW_UNLESS(COND, MSG) \
+  if (not(COND))                \
+    throw std::runtime_error(MSG);
+
+namespace
+{
+
+uint32_t num_elems(const luci::CircleNode *node)
+{
+  uint32_t res = 1;
+
+  for (uint32_t i = 0; i < node->rank(); i++)
+    res *= node->dim(i).value();
+
+  return res;
+}
+
+template <typename T> bool same_shape(const T a, const T b)
+{
+  if (a->rank() != b->rank())
+    return false;
+
+  for (uint32_t i = 0; i < a->rank(); i++)
+  {
+    if (not(a->dim(i) == b->dim(i)))
+      return false;
+  }
+
+  return true;
+}
+
+template <typename T> bool same_dtype(const T a, const T b) { return a->dtype() == b->dtype(); }
+
+template <loco::DataType DT> std::shared_ptr<Tensor> to_fp32(const std::shared_ptr<Tensor> &tensor)
+{
+  assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS
+
+  auto fp32_tensor = std::make_shared<Tensor>();
+  {
+    fp32_tensor->dtype(loco::DataType::FLOAT32);
+    fp32_tensor->rank(tensor->rank());
+    for (uint32_t i = 0; i < tensor->rank(); i++)
+      fp32_tensor->dim(i) = tensor->dim(i);
+
+    const auto num_elems = tensor->size<DT>();
+    fp32_tensor->size<loco::DataType::FLOAT32>(num_elems);
+    for (uint32_t i = 0; i < num_elems; i++)
+      fp32_tensor->at<loco::DataType::FLOAT32>(i) = static_cast<float>(tensor->at<DT>(i));
+  }
+  return fp32_tensor;
+}
+
+std::shared_ptr<Tensor> fp32(const std::shared_ptr<Tensor> &tensor)
+{
+  switch (tensor->dtype())
+  {
+    case loco::DataType::FLOAT32:
+      return tensor;
+    case loco::DataType::U8:
+      return to_fp32<loco::DataType::U8>(tensor);
+    case loco::DataType::S16:
+      return to_fp32<loco::DataType::S16>(tensor);
+    default:
+      throw std::runtime_error("Unsupported data type.");
+  }
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+void MAEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+void MAEPrinter::accum_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                      const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      std::abs(a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+void MAEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                            const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_absolute_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MAEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Absolute Error (MAE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MAE
+    float mae = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mae += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mae = mae / elem_count;
+    mae = mae / _num_data;
+
+    os << "MAE for " << name << " is " << mae << std::endl;
+  }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void MAPEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+// Accumulate |(a - b) / a|
+void MAPEPrinter::accum_mean_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                            const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    const auto b_val = b->at<loco::DataType::FLOAT32>(i);
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      std::abs((a_val - b_val) / a_val);
+  }
+}
+
+// Assumption
+// first: the result of fp32 model
+// second: the result of fake-quantized model
+void MAPEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                             const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_mean_absolute_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MAPEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Absolute Percentage Error (MAPE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MAPE
+    float mape = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mape += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mape = mape / elem_count;
+    mape = mape / _num_data;
+    mape *= 100.0;
+
+    os << "MAPE for " << name << " is " << mape << "%" << std::endl;
+  }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void MPEIRPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]);
+
+    // Create places to store intermediate results
+    _intermediate.emplace_back(0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+// Accumulate PEIR (Peak Error to Interval Ratio)
+// PEIR = max(|a - b|) / (max(a) - min(a))
+// PEIR >= 0 (lower is better)
+void MPEIRPrinter::accum_peir(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                              const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  float min = std::numeric_limits<float>::max();
+  float max = std::numeric_limits<float>::lowest();
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    min = std::min(a_val, min);
+    max = std::max(a_val, max);
+  }
+
+  float interval = max - min;
+
+  // Corner case: All values are the same. We set interval = 1 in this case
+  if (interval == 0)
+    interval = 1.0;
+
+  float peak_error = std::numeric_limits<float>::lowest();
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    const auto b_val = b->at<loco::DataType::FLOAT32>(i);
+    const auto error = std::abs(a_val - b_val);
+    peak_error = std::max(error, peak_error);
+  }
+
+  _intermediate.at(output_idx) += peak_error / interval;
+}
+
+// Assumption (when testing the accuracy of quantized model)
+// first: the result of fp32 model
+// second: the result of fake-quantized model
+void MPEIRPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                              const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 for ease of computation
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_peir(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MPEIRPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Peak Error to Interval Ratio (MPEIR)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto sum_of_peir = _intermediate.at(output_idx);
+
+    // Compute MPEIR
+    float mpeir = sum_of_peir / _num_data;
+
+    os << "MPEIR for " << name << " is " << mpeir << std::endl;
+  }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void TopKMatchPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]);
+
+    // Create places to store intermediate results
+    _intermediate.emplace_back(0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+
+    // If num_elems of an output is less than k,
+    // the output index is added to the skip list
+    if (num_elems(first_node) < _k)
+    {
+      std::cout << "Top-" << _k << "metric for " << first_node->name()
+                << " is ignored, because it has elements less than " << _k << std::endl;
+      _skip_output.emplace_back(i);
+    }
+  }
+}
+
+void TopKMatchPrinter::accum_topk_accuracy(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                           const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  // Find Top-k largest elements
+  // This implementation is a variant of "Method 2 (Use temporary array)" in
+  // https://www.geeksforgeeks.org/k-largestor-smallest-elements-in-an-array/
+  // We sort top-k elements by value and index to ensure that the element with an earlier
+  // index comes first if multiple elements have the same value.
+  auto find_topk = [this](const std::shared_ptr<Tensor> &tensor) {
+    assert(_k <= tensor->size<loco::DataType::FLOAT32>()); // FIX_CALLER_UNLESS
+
+    // first: value, second: index
+    std::vector<std::pair<float, uint32_t>> topk;
+    topk.resize(_k);
+
+    // Initialize
+    for (uint32_t i = 0; i < _k; i++)
+    {
+      topk[i] = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i);
+    }
+
+    // Input pair: (value, index)
+    // Return true if a has smaller value than b. If a and b have the same value,
+    // return true if a has larger index.
+    auto compare = [](const std::pair<float, uint32_t> &a, const std::pair<float, uint32_t> &b) {
+      if (a.first == b.first)
+        return a.second > b.second;
+
+      return a.first < b.first;
+    };
+
+    for (uint32_t i = _k; i < tensor->size<loco::DataType::FLOAT32>(); i++)
+    {
+      auto val = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i);
+
+      auto min = std::min_element(topk.begin(), topk.end(), compare);
+      if (compare(*min, val))
+      {
+        // val is larger than min. Replace min with val.
+        auto min_index = std::distance(topk.begin(), min);
+        topk[min_index] = val;
+      }
+    }
+
+    return topk;
+  };
+
+  auto first_topk = find_topk(a);
+  auto second_topk = find_topk(b);
+
+  uint32_t matched = 0;
+  for (uint32_t i = 0; i < _k; i++)
+  {
+    for (uint32_t j = 0; j < _k; j++)
+    {
+      if (first_topk[i].second == second_topk[j].second)
+      {
+        matched++;
+        break;
+      }
+    }
+  }
+
+  float matched_ratio = static_cast<float>(matched) / _k;
+
+  _intermediate.at(output_idx) += matched_ratio;
+}
+
+bool TopKMatchPrinter::in_skip_list(uint32_t output_index) const
+{
+  for (auto skip : _skip_output)
+  {
+    if (output_index == skip)
+      return true;
+  }
+
+  return false;
+}
+
+void TopKMatchPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                                  const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    if (in_skip_list(output_idx))
+      continue;
+
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 for ease of computation
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_topk_accuracy(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void TopKMatchPrinter::dump(std::ostream &os) const
+{
+  os << "Ratio of Matched Indices between Top-" << _k << " results of the models" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    if (in_skip_list(output_idx))
+      continue;
+
+    const auto name = _output_names.at(output_idx);
+    const auto sum_of_topk_accuracy = _intermediate.at(output_idx);
+
+    // Compute TopKMatch
+    float mean_topk = sum_of_topk_accuracy / _num_data;
+
+    os << "Mean Top-" << _k << " match ratio for " << name << " is " << mean_topk << std::endl;
+  }
+}
+
+void MSEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+void MSEPrinter::accum_squared_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                     const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i)) *
+      (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+void MSEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                            const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_squared_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MSEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Squared Error (MSE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MSE
+    float mse = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mse += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mse = mse / elem_count;
+    mse = mse / _num_data;
+
+    os << "MSE for " << name << " is " << mse << std::endl;
+  }
+}
+
+} // namespace circle_eval_diff
+
+#undef THROW_UNLESS
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h
new file mode 100644
index 000000000..c8f27511c
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
+#define __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
+
+#include <luci/IR/Module.h>
+
+#include "Tensor.h"
+
+#include <vector>
+#include <iostream>
+
+namespace circle_eval_diff
+{
+
+// Class to print metrics
+// How to use?
+//
+// MetricPrinter metric;
+// metric.init(first_module, second_module); // optional initialization
+//
+// for (..) // Evaluate data one by one
+// {
+//   ..
+//   metric.accumulate(first_result, second_result); // accumulate results
+// }
+//
+// std::cout << &metric << std::endl; // print result
+class MetricPrinter
+{
+public:
+  virtual ~MetricPrinter() = default;
+
+  // Child class can implement this function if necessary
+  // NOTE init can be skipped
+  virtual void init(const luci::Module *, const luci::Module *) {}
+
+  // Accumulate results of comparing the first and the second model's outputs
+  virtual void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                          const std::vector<std::shared_ptr<Tensor>> &second) = 0;
+
+  // Dump the final result of the corresponding metric
+  virtual void dump(std::ostream &os) const = 0;
+};
+
+static inline std::ostream &operator<<(std::ostream &os, const MetricPrinter *m)
+{
+  m->dump(os);
+  return os;
+}
+
+// Mean Absolute Error
+class MAEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                            const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Mean Squared Error
+class MSEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_squared_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                           const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Mean Absolute Percentage Error
+class MAPEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_mean_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                                 const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Mean Peak Error to Interval Ratio (PEIR)
+// PEIR = max(|a - b|) / (max(a) - min(a))
+// PEIR >= 0 (lower is better)
+//
+// When testing the accuracy of quantized model,
+// the first model should be the original fp32 model, and
+// the second model should be the fake-quantized fp32 model
+class MPEIRPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_peir(uint32_t index, const std::shared_ptr<Tensor> &a,
+                  const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of PEIR for each output
+  std::vector<float> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Ratio of matched indices between top-k results of two models (a, b).
+//
+// top-k match = intersection(top_k_idx(a), top_k_idx(b)) / k
+// mean top-k match = sum(top-k match) / num_data
+//
+// For example,
+// num_data = 2
+// first model output = [1, 2, 3], [2, 3, 1]
+// second model output = [2, 4, 6], [3, 2, 1]
+//
+// if k = 1,
+// first model top-1 index = ([2], [1])
+// second model top-1 index = ([2], [0])
+// mean top-1 accuracy = (1 + 0) / 2 = 0.5
+//
+// if k = 2,
+// first model output = [1, 2, 3], [2, 3, 1]
+// second model output = [2, 4, 6], [3, 2, 1]
+// first model top-2 index = ([2, 1], [1, 0])
+// second model top-2 index = ([2, 1], [0, 1])
+// mean top-2 accuracy = (2 + 2) / 4 = 1
+//
+// NOTE Order of elements is ignored when comparing two top-k sets.
+// NOTE If two elements have the same value and only one can be included in top-k,
+// the one with an earlier index will be included.
+class TopKMatchPrinter : public MetricPrinter
+{
+public:
+  TopKMatchPrinter(uint32_t k) : _k(k) {}
+
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_topk_accuracy(uint32_t index, const std::shared_ptr<Tensor> &a,
+                           const std::shared_ptr<Tensor> &b);
+
+  // Return true if the output is in the skip list (_skip_output)
+  bool in_skip_list(uint32_t output_index) const;
+
+private:
+  const uint32_t _k = 0;
+  // Store accumulated accuracy
+  std::vector<float> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+  // Save index of output whose num_elements is less than k
+  std::vector<uint32_t> _skip_output;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
new file mode 100644
index 000000000..0e71b80cc
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetricPrinter.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+using Tensor = circle_eval_diff::Tensor;
+
+namespace
+{
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                                     const std::vector<uint32_t> &shape,
+                                     const std::vector<T> &values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT)                          \
+  {                                              \
+    node->size<DT>(size);                        \
+    for (uint32_t i = 0; i < values.size(); ++i) \
+      node->at<DT>(i) = values[i];               \
+  }
+
+  switch (dtype)
+  {
+    case loco::DataType::U8:
+      INIT_VALUES(loco::DataType::U8);
+      break;
+    case loco::DataType::S16:
+      INIT_VALUES(loco::DataType::S16);
+      break;
+    case loco::DataType::S32:
+      INIT_VALUES(loco::DataType::S32);
+      break;
+    case loco::DataType::FLOAT32:
+      INIT_VALUES(loco::DataType::FLOAT32)
+      break;
+    default:
+      INTERNAL_EXN("create_const_node called with unsupported type");
+      break;
+  }
+  return node;
+}
+
+/**
+ *  Simple graph which adds constant (addition) to the input
+ *
+ *  [Input] [Const] (addition)
+ *      \   /
+ *      [Add]
+ *
+ */
+class AddGraphlet
+{
+public:
+  AddGraphlet() = default;
+
+  void init(loco::Graph *g, float addition)
+  {
+    std::vector<float> addition_val;
+    for (uint32_t i = 0; i < 16; i++)
+      addition_val.push_back(addition);
+    _add_c = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val);
+
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add->y(_add_c);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _add->dtype(loco::DataType::FLOAT32);
+    _add->shape({1, 16});
+    _add->name("add");
+  }
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_add_c = nullptr;
+};
+
+class AddOneGraph : public luci::test::TestIOGraph, public AddGraphlet
+{
+public:
+  AddOneGraph() = default;
+
+  void init(void)
+  {
+    luci::test::TestIOGraph::init({1, 4}, {1, 16});
+    AddGraphlet::init(g(), 1.0);
+
+    _add->x(input());
+
+    output()->from(_add);
+  }
+
+  std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+class AddTwoGraph : public luci::test::TestIOGraph, public AddGraphlet
+{
+public:
+  AddTwoGraph() = default;
+
+  void init(void)
+  {
+    luci::test::TestIOGraph::init({1, 4}, {1, 16});
+    AddGraphlet::init(g(), 2.0);
+
+    _add->x(input());
+
+    output()->from(_add);
+  }
+
+  std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elem = 1;
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    num_elem *= node->dim(i).value();
+  return num_elem;
+}
+
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> create_empty_tensor(const luci::CircleNode *node)
+{
+  auto tensor = std::make_shared<Tensor>();
+  {
+    tensor->dtype(node->dtype());
+    tensor->rank(node->rank());
+    for (uint32_t i = 0; i < node->rank(); i++)
+      tensor->dim(i) = node->dim(i);
+    tensor->size<loco::DataType::FLOAT32>(numElements(node));
+  }
+
+  return tensor;
+}
+
+std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, float value)
+{
+  auto outputs = loco::output_nodes(module->graph());
+  assert(outputs.size() == 1);
+  auto output = *outputs.begin();
+  auto output_cnode = loco::must_cast<luci::CircleNode *>(output);
+  auto tensor = create_empty_tensor(output_cnode);
+  auto tensor_size = tensor->size<loco::DataType::FLOAT32>();
+  for (uint32_t i = 0; i < tensor_size; i++)
+  {
+    tensor->at<loco::DataType::FLOAT32>(i) = value;
+  }
+  return tensor;
+}
+
+std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module,
+                                                 std::vector<float> &value)
+{
+  auto outputs = loco::output_nodes(module->graph());
+  assert(outputs.size() == 1);
+  auto output = *outputs.begin();
+  auto output_cnode = loco::must_cast<luci::CircleNode *>(output);
+  auto tensor = create_empty_tensor(output_cnode);
+  auto tensor_size = tensor->size<loco::DataType::FLOAT32>();
+  assert(tensor_size == value.size());
+  for (uint32_t i = 0; i < tensor_size; i++)
+  {
+    tensor->at<loco::DataType::FLOAT32>(i) = value[i];
+  }
+  return tensor;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+TEST(CircleEvalMetricPrinterTest, MAE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MAEPrinter mae;
+
+  mae.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 1.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 2.0);
+    second_result.emplace_back(output);
+  }
+
+  mae.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mae.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MAE for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG)
+{
+  MAEPrinter mae;
+
+  EXPECT_ANY_THROW(mae.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAPE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MAPEPrinter mape;
+
+  mape.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 2.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 1.0);
+    second_result.emplace_back(output);
+  }
+
+  mape.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mape.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MAPE for output_0 is 50%"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAPE_init_with_null_NEG)
+{
+  MAPEPrinter mape;
+
+  EXPECT_ANY_THROW(mape.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MPEIR_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MPEIRPrinter mpeir;
+
+  mpeir.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 0.0);
+    second_result.emplace_back(output);
+  }
+
+  mpeir.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mpeir.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MPEIR for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MPEIR_init_with_null_NEG)
+{
+  MPEIRPrinter mpeir;
+
+  EXPECT_ANY_THROW(mpeir.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top5(5);
+
+  top5.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i * 2;
+    auto output = output_tensor_with_value(&second, val);
+    second_result.emplace_back(output);
+  }
+
+  top5.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top5.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_tie)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top5(5);
+
+  top5.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    std::vector<float> val{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15, 16};
+
+    auto output = output_tensor_with_value(&second, val);
+    second_result.emplace_back(output);
+  }
+
+  top5.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top5.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 0.8"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_num_elem_less_than_k_NEG)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top100(100);
+
+  top100.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 0);
+    second_result.emplace_back(output);
+  }
+
+  top100.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top100.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_EQ(std::string::npos, result.find("Mean Top-100 match ratio"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_init_with_null_NEG)
+{
+  TopKMatchPrinter topk(5);
+
+  EXPECT_ANY_THROW(topk.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MSE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MSEPrinter mse;
+
+  mse.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 1.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 2.0);
+    second_result.emplace_back(output);
+  }
+
+  mse.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mse.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MSE for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MSE_init_with_null_NEG)
+{
+  MSEPrinter mse;
+
+  EXPECT_ANY_THROW(mse.init(nullptr, nullptr));
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp
new file mode 100644
index 000000000..c3efc44cd
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <cassert>
+
+namespace
+{
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elem = 1;
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    num_elem *= node->dim(i).value();
+  return num_elem;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+#define THROW_UNLESS(COND, MSG) \
+  if (not(COND))                \
+    throw std::runtime_error(MSG);
+
+template <loco::DataType DT> uint32_t Tensor::size(void) const
+{
+  assert(dtype() == DT);
+  assert(_data.size() % sizeof(typename loco::DataTypeImpl<DT>::Type) == 0);
+  return _data.size() / sizeof(typename loco::DataTypeImpl<DT>::Type);
+}
+
+template <loco::DataType DT> void Tensor::size(uint32_t l)
+{
+  assert(dtype() == DT);
+  _data.resize(l * sizeof(typename loco::DataTypeImpl<DT>::Type));
+}
+
+template <loco::DataType DT>
+const typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n) const
+{
+  assert(dtype() == DT);
+  THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary.");
+  return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n)
+{
+  assert(dtype() == DT);
+  THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary.");
+  return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+#undef THROW_UNLESS
+
+#define INSTANTIATE(DT)                                                                 \
+  template uint32_t Tensor::size<DT>(void) const;                                       \
+  template void Tensor::size<DT>(uint32_t);                                             \
+  template const typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t) const; \
+  template typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t);
+
+INSTANTIATE(loco::DataType::S64);
+INSTANTIATE(loco::DataType::S32);
+INSTANTIATE(loco::DataType::S16);
+INSTANTIATE(loco::DataType::U8);
+INSTANTIATE(loco::DataType::FLOAT32);
+
+#undef INSTANTIATE
+
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node)
+{
+  auto tensor = std::make_shared<Tensor>();
+  {
+    tensor->dtype(node->dtype());
+    tensor->rank(node->rank());
+    for (uint32_t i = 0; i < node->rank(); i++)
+      tensor->dim(i) = node->dim(i);
+
+    switch (node->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        tensor->size<loco::DataType::FLOAT32>(numElements(node));
+        break;
+      case loco::DataType::U8:
+        tensor->size<loco::DataType::U8>(numElements(node));
+        break;
+      case loco::DataType::S16:
+        tensor->size<loco::DataType::S16>(numElements(node));
+        break;
+      case loco::DataType::S32:
+        tensor->size<loco::DataType::S32>(numElements(node));
+        break;
+      case loco::DataType::S64:
+        tensor->size<loco::DataType::S64>(numElements(node));
+        break;
+      default:
+        throw std::runtime_error("Unsupported input tensor dtype for " + node->name());
+    }
+  }
+
+  return tensor;
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h
new file mode 100644
index 000000000..d4f65d951
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_TENSOR_H__
+#define __CIRCLE_EVAL_DIFF_TENSOR_H__
+
+#include <loco.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <vector>
+
+namespace circle_eval_diff
+{
+
+struct TensorDataType
+{
+public:
+  const loco::DataType &dtype(void) const { return _dtype; }
+  void dtype(const loco::DataType &dtype) { _dtype = dtype; }
+
+private:
+  loco::DataType _dtype = loco::DataType::Unknown;
+};
+
+struct TensorShape
+{
+public:
+  uint32_t rank(void) const { return _dims.size(); }
+  void rank(uint32_t value) { _dims.resize(value); }
+
+  const loco::Dimension &dim(uint32_t axis) const { return _dims.at(axis); }
+  loco::Dimension &dim(uint32_t axis) { return _dims.at(axis); }
+
+  void shape(std::initializer_list<uint32_t> dims)
+  {
+    rank(dims.size());
+
+    uint32_t axis = 0;
+    for (auto d : dims)
+    {
+      dim(axis++) = d;
+    }
+  }
+
+private:
+  std::vector<loco::Dimension> _dims;
+};
+
+// Tensor has three kinds of data
+// 1. DataType (_dtype)
+// 2. Shape (_dims)
+// 3. Buffer (_data)
+struct Tensor final : public TensorShape, public TensorDataType
+{
+public:
+  template <loco::DataType DT> uint32_t size(void) const;
+  template <loco::DataType DT> void size(uint32_t size);
+  template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
+  template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &at(uint32_t n);
+  uint8_t *buffer(void) { return _data.data(); }
+  uint32_t byte_size(void) const { return _data.size(); }
+
+private:
+  std::vector<uint8_t> _data;
+};
+
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node);
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_TENSOR_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp
new file mode 100644
index 000000000..395865748
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.test.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#include <gtest/gtest.h>
+
+#include <luci/IR/CircleNodes.h>
+
+using Tensor = circle_eval_diff::Tensor;
+
+namespace
+{
+
+template <loco::DataType DT> void test_out_of_buffer_range()
+{
+  Tensor t;
+
+  t.shape({1, 2, 3});
+  t.dtype(DT);
+  t.size<DT>(6);
+
+  EXPECT_ANY_THROW(t.at<DT>(6));
+}
+
+template <loco::DataType DT> void test_getter_setter()
+{
+  Tensor t;
+
+  // Check shape
+  t.shape({1, 2, 3});
+  EXPECT_EQ(3, t.rank());
+  EXPECT_EQ(1, t.dim(0));
+  EXPECT_EQ(2, t.dim(1));
+  EXPECT_EQ(3, t.dim(2));
+
+  // Check dtype
+  t.dtype(DT);
+  EXPECT_EQ(DT, t.dtype());
+
+  // Check buffer
+  t.size<DT>(6);
+  EXPECT_EQ(6 * sizeof(typename loco::DataTypeImpl<DT>::Type), t.byte_size());
+  for (uint32_t i = 0; i < 6; i++)
+    t.at<DT>(i) = i;
+
+  for (uint32_t i = 0; i < 6; i++)
+    EXPECT_EQ(i, t.at<DT>(i));
+}
+
+} // namespace
+
+TEST(CircleEvalDiffTensorTest, constructor)
+{
+  Tensor t;
+
+  EXPECT_EQ(0, t.byte_size());
+  EXPECT_EQ(0, t.rank());
+  EXPECT_EQ(loco::DataType::Unknown, t.dtype());
+}
+
+TEST(CircleEvalDiffTensorTest, getter_setter)
+{
+  test_getter_setter<loco::DataType::S64>();
+  test_getter_setter<loco::DataType::S32>();
+  test_getter_setter<loco::DataType::S16>();
+  test_getter_setter<loco::DataType::U8>();
+  test_getter_setter<loco::DataType::FLOAT32>();
+
+  SUCCEED();
+}
+
+TEST(CircleEvalDiffTensorTest, out_of_shape_range_NEG)
+{
+  Tensor t;
+  t.shape({1, 2, 2, 3});
+
+  EXPECT_ANY_THROW(t.dim(4));
+}
+
+TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG)
+{
+  test_out_of_buffer_range<loco::DataType::S64>();
+  test_out_of_buffer_range<loco::DataType::S32>();
+  test_out_of_buffer_range<loco::DataType::S16>();
+  test_out_of_buffer_range<loco::DataType::U8>();
+  test_out_of_buffer_range<loco::DataType::FLOAT32>();
+
+  SUCCEED();
+}
+
+TEST(CircleEvalDiffTensorTest, createEmptyTensorTest)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(3);
+  input.dim(2).set(3);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  std::vector<loco::Dimension> right_shape;
+  right_shape.emplace_back(1);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(2);
+
+  auto tensor = circle_eval_diff::createEmptyTensor(&input);
+  EXPECT_EQ(loco::DataType::FLOAT32, tensor->dtype());
+  EXPECT_EQ(4, tensor->rank());
+  EXPECT_EQ(1, tensor->dim(0));
+  EXPECT_EQ(3, tensor->dim(1));
+  EXPECT_EQ(3, tensor->dim(2));
+  EXPECT_EQ(2, tensor->dim(3));
+}
diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt
new file mode 100644
index 000000000..0320b52b4
--- /dev/null
+++ b/compiler/circle-execution-plan/CMakeLists.txt
@@ -0,0 +1,32 @@
+nnas_find_package(Jsoncpp)
+if(NOT Jsoncpp_FOUND)
+    message(STATUS "Build circle-execution-plan: FAILED (missing jsoncpp)")
+    return()
+endif(NOT Jsoncpp_FOUND)
+
+set(SOURCES
+        pal/IScratchpadHelper.h
+        pal/ScratchpadHelperLinux.h
+        pal/ScratchpadHelperMCU.h
+        pal/ScratchpadHelperCMSISNN.h
+        pal/TargetPlatform.h
+        src/CircleExecutionPlan.cpp
+        src/ExecutionPlanner.cpp
+        src/ExecutionPlanner.h
+        )
+
+add_executable(circle_execution_plan "${SOURCES}")
+target_include_directories(circle_execution_plan PRIVATE ${Jsoncpp_INCLUDE_DIRS})
+
+target_link_libraries(circle_execution_plan ${Jsoncpp_STATIC_LIB})
+target_link_libraries(circle_execution_plan foder)
+target_link_libraries(circle_execution_plan safemain)
+target_link_libraries(circle_execution_plan luci_env)
+target_link_libraries(circle_execution_plan luci_import)
+target_link_libraries(circle_execution_plan luci_export)
+target_link_libraries(circle_execution_plan luci_plan)
+target_link_libraries(circle_execution_plan arser)
+target_link_libraries(circle_execution_plan luci_log)
+
+target_include_directories(circle_execution_plan PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/pal")
+install(TARGETS circle_execution_plan DESTINATION bin)
diff --git a/compiler/circle-execution-plan/README.md b/compiler/circle-execution-plan/README.md
new file mode 100644
index 000000000..dbb7d4f85
--- /dev/null
+++ b/compiler/circle-execution-plan/README.md
@@ -0,0 +1,28 @@
+# circle-execution-plan
+
+_circle-execution-plan_ tool provides model with "execution plan".
+
+This tool takes circle file as input and returns modified circle file.
+The output circle file contains plan (`CircleNodeMemoryPlan`) information for every node.
+
+
+"execution plan" contains:
+- number which determines order in which nodes will be executed
+- memory offsets for node output tensors from the beginning of shared memory buffer
+
+In order to record and read this data, we use `luci::CircleNodeExecutionPlan`.
+
+### Execution plan building
+
+In order to build "execution plan" we use `ExecutionPlanner` class.
+The main method is `make_execution_plan()` which for each node finds and writes to its annotations 
+"execution plan". For this purpose there are two steps:
+- determining the order of execution of nodes, which is stored in `_ordered_nodes` vector.
+Now for this purpose there is only one default method `get_default_execution_order_plan()` that uses `loco::postorder_traversal(const std::vector<loco::Node *> &roots)`.
+  In the future we can add new method and find the most suitable way to graph traversal.
+  
+- determining memory offsets for nodes from the beginning of shared memory buffer, which is stored in `_offsets`.
+Now for this purpose there is one method `get_offsets_with_greedy_by_size()` that is the implementation of the "Greedy by Size" algorithm, which is described in https://arxiv.org/pdf/2001.03288.pdf article.
+  The main objective is to minimize the size of the allocated memory block.
+  In the future, other methods may also appear here to determine memory offsets for nodes
+  in the best way.
diff --git a/compiler/circle-execution-plan/pal/IScratchpadHelper.h b/compiler/circle-execution-plan/pal/IScratchpadHelper.h
new file mode 100644
index 000000000..f5a991526
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/IScratchpadHelper.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
+#define CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
+
+#include <luci/IR/Nodes/CircleAveragePool2D.h>
+#include <luci/IR/Nodes/CircleBatchMatMul.h>
+#include <luci/IR/Nodes/CircleConv2D.h>
+#include <luci/IR/Nodes/CircleDepthwiseConv2D.h>
+#include <luci/IR/Nodes/CircleSVDF.h>
+#include <cstdint>
+
+namespace circle_planner
+{
+
+class IScratchpadHelper
+{
+public:
+  virtual uint32_t
+  ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) = 0;
+
+  virtual std::vector<uint32_t>
+  ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) = 0;
+
+  virtual uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) = 0;
+
+  virtual uint32_t
+  ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) = 0;
+
+  virtual std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) = 0;
+
+  virtual ~IScratchpadHelper() = default;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h
new file mode 100644
index 000000000..5369c0937
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
+
+#include "IScratchpadHelper.h"
+#include <cassert>
+
+namespace circle_planner
+{
+
+namespace
+{
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                              int32_t filter_size, int32_t out_size)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+} // namespace
+
+class ScratchpadHelperCMSISNN : public IScratchpadHelper
+{
+public:
+  explicit ScratchpadHelperCMSISNN(bool use_dsp) : _use_dsp(use_dsp)
+  {
+    // Do nothing
+  }
+
+  uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+  {
+    // Main logic of arm_avgpool_s8_get_buffer_size
+
+    const auto avg_pool_input = loco::must_cast<luci::CircleNode *>(avg_pool->value());
+
+    if (avg_pool_input->dtype() != loco::DataType::S8 or !_use_dsp)
+      return 0;
+
+    const auto depth = static_cast<int32_t>(avg_pool_input->dim(3).value());
+
+    return depth * sizeof(int32_t);
+  }
+
+  std::vector<uint32_t>
+  ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+  {
+    throw std::runtime_error("BatchMatMul is not currently supported for cmsisnn platform");
+  }
+
+  uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final
+  {
+    // Main logic of arm_convolve_wrapper_s8_get_buffer_size
+
+    const auto dilation_height_factor = static_cast<int32_t>(conv->dilation()->h());
+    const auto dilation_width_factor = static_cast<int32_t>(conv->dilation()->w());
+
+    const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+    const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+
+    if (dilation_width_factor != 1 or dilation_height_factor != 1 or
+        conv_input->dtype() != loco::DataType::S8)
+    {
+      return 0;
+    }
+
+    const auto input_depth = static_cast<int32_t>(conv_input->dim(3).value());
+
+    const auto input_height = static_cast<int32_t>(conv_input->dim(1).value());
+    const auto input_width = static_cast<int32_t>(conv_input->dim(2).value());
+
+    const auto filter_height = static_cast<int32_t>(filter->dim(1).value());
+    const auto filter_width = static_cast<int32_t>(filter->dim(2).value());
+
+    const auto stride_height = static_cast<int32_t>(conv->stride()->h());
+    const auto stride_width = static_cast<int32_t>(conv->stride()->w());
+
+    const auto output_height = static_cast<int32_t>(conv->dim(1).value());
+    const auto output_width = static_cast<int32_t>(conv->dim(2).value());
+
+    assert(conv_input->quantparam()->zerop.size() == 1);
+    assert(conv->quantparam()->zerop.size() == 1);
+
+    const auto padding_height = computePadding(stride_height, dilation_height_factor, input_height,
+                                               filter_height, output_height);
+    const auto padding_width =
+      computePadding(stride_width, dilation_width_factor, input_width, filter_width, output_width);
+
+    if ((padding_width == 0) && (padding_height == 0) && (input_depth % 4 == 0) &&
+        (stride_width == 1) && (stride_height == 1) && (filter_width == 1) && (filter_height == 1))
+    {
+      return 0;
+    }
+
+    if (_use_dsp)
+    {
+      return (2 * input_depth * filter_width * filter_height) * sizeof(int16_t);
+    }
+
+    return 0;
+  }
+
+  uint32_t
+  ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+  {
+    // Main logic of arm_depthwise_conv_wrapper_s8_get_buffer_size
+
+    const auto dilation_height_factor = static_cast<int32_t>(depthwise_conv->dilation()->h());
+    const auto dilation_width_factor = static_cast<int32_t>(depthwise_conv->dilation()->w());
+
+    const auto depthwise_conv_input = loco::must_cast<luci::CircleNode *>(depthwise_conv->input());
+    const auto filter = loco::must_cast<luci::CircleNode *>(depthwise_conv->filter());
+
+    if (dilation_width_factor != 1 or dilation_height_factor != 1 or
+        depthwise_conv_input->dtype() != loco::DataType::S8)
+    {
+      return 0;
+    }
+
+    const auto input_depth = static_cast<int32_t>(depthwise_conv_input->dim(3).value());
+    const auto output_depth = static_cast<int32_t>(depthwise_conv->dim(3).value());
+    const auto batch_size = static_cast<int32_t>(depthwise_conv_input->dim(0).value());
+
+    if (input_depth != output_depth or batch_size != 1 or !_use_dsp)
+      return 0;
+
+    const auto filter_height = static_cast<int32_t>(filter->dim(1).value());
+    const auto filter_width = static_cast<int32_t>(filter->dim(2).value());
+
+    return input_depth * filter_height * filter_width * sizeof(int16_t);
+  }
+
+  std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+  {
+    const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+    const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+    if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+        (weight_feature_input->dtype() == loco::DataType::S8 or
+         weight_feature_input->dtype() == loco::DataType::U8))
+    {
+      throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+    }
+
+    std::vector<uint32_t> scratchpad_sizes;
+
+    const auto batch_size = svdf_input->dim(0).value();
+    const auto num_filters = weight_feature_input->dim(0).value();
+    const auto rank = svdf->svdf_rank();
+    const auto num_units = num_filters / rank;
+
+    if (svdf_input->dtype() == loco::DataType::S8)
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+      scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+    }
+    else
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+    }
+
+    return scratchpad_sizes;
+  }
+
+private:
+  bool _use_dsp;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h
new file mode 100644
index 000000000..811aa67c3
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
+
+#include "IScratchpadHelper.h"
+#include <loco/IR/DataTypeTraits.h>
+
+namespace circle_planner
+{
+
+class ScratchpadHelperLinux : public IScratchpadHelper
+{
+public:
+  uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+  {
+    // for linux AveragePool2d scratchpad tensors size = 0
+    return 0;
+  }
+
+  std::vector<uint32_t>
+  ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+  {
+    const auto lhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->x());
+    const auto rhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->y());
+
+    std::vector<uint32_t> scratchpad_sizes;
+
+    // Scratchpad for lhs
+    uint32_t scratchpad_size = 1;
+    for (int32_t i = 0; i < lhs->rank(); ++i)
+      scratchpad_size *= lhs->dim(i).value();
+
+    scratchpad_sizes.push_back(scratchpad_size * loco::size(lhs->dtype()));
+
+    // Scratchpad for rhs
+    scratchpad_size = 1;
+    for (int32_t i = 0; i < rhs->rank(); ++i)
+      scratchpad_size *= rhs->dim(i).value();
+
+    scratchpad_sizes.push_back(scratchpad_size * loco::size(rhs->dtype()));
+
+    return scratchpad_sizes;
+  }
+
+  uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final
+  {
+    const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+    const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+
+    const uint32_t stride_height = conv->stride()->h();
+    const uint32_t stride_width = conv->stride()->w();
+
+    const uint32_t dilation_height_factor = conv->dilation()->h();
+    const uint32_t dilation_width_factor = conv->dilation()->w();
+
+    const uint32_t filter_height = filter->dim(1).value();
+    const uint32_t filter_width = filter->dim(2).value();
+
+    const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
+    const bool need_non_dilated_im2col =
+      stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
+    const bool need_im2col = conv_input->dtype() != loco::DataType::S16 &&
+                             (need_dilated_im2col || need_non_dilated_im2col);
+
+    if (!need_im2col)
+    {
+      return 0;
+    }
+
+    const uint32_t input_depth = conv_input->dim(3).value();
+    const uint32_t batches = conv_input->dim(0).value();
+
+    const uint32_t output_height = conv->dim(1).value();
+    const uint32_t output_width = conv->dim(2).value();
+
+    return batches * output_height * output_width * input_depth * filter_height * filter_width *
+           size(conv_input->dtype());
+  }
+
+  uint32_t
+  ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+  {
+    // for linux DepthwiseConv2d scratchpad tensors size = 0
+    return 0;
+  }
+
+  std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+  {
+    const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+    const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+    if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+        (weight_feature_input->dtype() == loco::DataType::S8 or
+         weight_feature_input->dtype() == loco::DataType::U8))
+    {
+      throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+    }
+
+    std::vector<uint32_t> scratchpad_sizes;
+
+    const auto batch_size = svdf_input->dim(0).value();
+    const auto num_filters = weight_feature_input->dim(0).value();
+    const auto rank = svdf->svdf_rank();
+    const auto num_units = num_filters / rank;
+
+    if (svdf_input->dtype() == loco::DataType::S8)
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+      scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+    }
+    else
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+    }
+
+    return scratchpad_sizes;
+  }
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h
new file mode 100644
index 000000000..14b41640c
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
+
+#include "IScratchpadHelper.h"
+
+namespace circle_planner
+{
+
+class ScratchpadHelperMCU : public IScratchpadHelper
+{
+public:
+  uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+  {
+    // for mcu AveragePool2d scratchpad tensors size = 0
+    return 0;
+  }
+
+  std::vector<uint32_t>
+  ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+  {
+    throw std::runtime_error("BatchMatMul is not currently supported for mcu platform");
+  }
+
+  uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *) final
+  {
+    // for mcu scratchpad size = 0
+    return 0;
+  }
+
+  uint32_t
+  ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+  {
+    // for mcu DepthwiseConv2d scratchpad tensors size = 0
+    return 0;
+  }
+
+  std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+  {
+    const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+    const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+    if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+        (weight_feature_input->dtype() == loco::DataType::S8 or
+         weight_feature_input->dtype() == loco::DataType::U8))
+    {
+      throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+    }
+
+    std::vector<uint32_t> scratchpad_sizes;
+
+    const auto batch_size = svdf_input->dim(0).value();
+    const auto num_filters = weight_feature_input->dim(0).value();
+    const auto rank = svdf->svdf_rank();
+    const auto num_units = num_filters / rank;
+
+    if (svdf_input->dtype() == loco::DataType::S8)
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+      scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+    }
+    else
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+    }
+
+    return scratchpad_sizes;
+  }
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
diff --git a/compiler/circle-execution-plan/pal/TargetPlatform.h b/compiler/circle-execution-plan/pal/TargetPlatform.h
new file mode 100644
index 000000000..7b210d608
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/TargetPlatform.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
+#define CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
+
+namespace circle_planner
+{
+
+enum SupportedPlatformType
+{
+  LINUX,
+  MCU,
+  CMSISNN
+};
+
+enum RuntimeType
+{
+  ONERT_MICRO,
+  LUCI_INTERPRETER
+};
+
+enum AllocatingMode
+{
+  COMMON,
+  SPLIT
+};
+
+struct TargetPlatform
+{
+  SupportedPlatformType platform_type;
+  bool use_dsp;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
diff --git a/compiler/circle-execution-plan/requires.cmake b/compiler/circle-execution-plan/requires.cmake
new file mode 100644
index 000000000..76858f487
--- /dev/null
+++ b/compiler/circle-execution-plan/requires.cmake
@@ -0,0 +1,4 @@
+require(foder)
+require(safemain)
+require(luci)
+require(arser)
diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
new file mode 100644
index 000000000..345bc05f3
--- /dev/null
+++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <foder/FileLoader.h>
+
+#include <luci/Importer.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+#include "ExecutionPlanner.h"
+
+#include <arser/arser.h>
+
+#include <functional>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <cstdlib>
+
+int entry(int argc, char **argv)
+{
+  arser::Arser arser("circle_execution_plan provides model with execution plan meta information");
+
+  arser.add_argument("input").help("Input circle model");
+  arser.add_argument("output").help("Output circle model");
+  arser.add_argument("--platform").default_value("linux").help("Platform name: linux mcu cmsisnn");
+  arser.add_argument("--allocating_mode")
+    .default_value("common")
+    .help("Buffer type name (only onert-micro option):"
+          "common - a single buffer is considered for all allocations"
+          "split - there are three buffers: for input,"
+          " for output and for intermediate tensors");
+  arser.add_argument("--runtime")
+    .default_value("onert_micro")
+    .help("Target runtime name: luci-interpreter onert-micro");
+  arser.add_argument("--allocate_const")
+    .nargs(1)
+    .type(arser::DataType::BOOL)
+    .required(false)
+    .default_value(false)
+    .help("Whether or not to take into account constants in memory allocation. "
+          "Default value - false, constants are not counted when allocating memory");
+  arser.add_argument("--allocate_input")
+    .nargs(1)
+    .type(arser::DataType::BOOL)
+    .required(false)
+    .default_value(true)
+    .help("Whether or not to take into account inputs in memory allocation. "
+          "Default value - true, inputs are counted when allocating memory");
+  arser.add_argument("--use_dsp")
+    .nargs(1)
+    .type(arser::DataType::BOOL)
+    .required(false)
+    .default_value(false)
+    .help("Plan with or without dsp (now can be used only with cmsisnn)");
+  arser.add_argument("--save_allocations")
+    .nargs(1)
+    .required(false)
+    .default_value("")
+    .help("Path for output JSON file to save memory allocation info. "
+          "Note: path end of file should have 'tracealloc.json' (example path: "
+          "'../exec_plan_info.tracealloc.json')");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cout << arser;
+    return 255;
+  }
+
+  const std::string input_path = arser.get<std::string>("input");
+  const std::string output_path = arser.get<std::string>("output");
+  const std::string platform_name = arser.get<std::string>("--platform");
+  const std::string allocating_mode_name = arser.get<std::string>("--allocating_mode");
+  const std::string runtime_name = arser.get<std::string>("--runtime");
+  const bool use_dsp = arser.get<bool>("--use_dsp");
+  const bool is_allocate_const = arser.get<bool>("--allocate_const");
+  const bool is_allocate_input = arser.get<bool>("--allocate_input");
+  const std::string json_path = arser.get<std::string>("--save_allocations");
+
+  if (platform_name != "cmsisnn" && use_dsp)
+  {
+    std::cerr << "ERROR: Now use_dsp can be used only with cmsisnn" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  circle_planner::SupportedPlatformType platform_type;
+  if (platform_name == "linux")
+  {
+    platform_type = circle_planner::SupportedPlatformType::LINUX;
+  }
+  else if (platform_name == "mcu")
+  {
+    platform_type = circle_planner::SupportedPlatformType::MCU;
+  }
+  else if (platform_name == "cmsisnn")
+  {
+    platform_type = circle_planner::SupportedPlatformType::CMSISNN;
+  }
+  else
+  {
+    std::cerr << "ERROR: Invalid platform name '" << platform_name << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  circle_planner::AllocatingMode allocating_mode;
+  if (allocating_mode_name == "split")
+  {
+    allocating_mode = circle_planner::AllocatingMode::SPLIT;
+  }
+  else if (allocating_mode_name == "common")
+  {
+    allocating_mode = circle_planner::AllocatingMode::COMMON;
+  }
+  else
+  {
+    std::cerr << "ERROR: Invalid allocation mode name '" << allocating_mode_name << "'"
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  circle_planner::RuntimeType runtime_type;
+  if (runtime_name == "onert-micro")
+  {
+    runtime_type = circle_planner::RuntimeType::ONERT_MICRO;
+  }
+  else if (runtime_name == "luci-interpreter")
+  {
+    runtime_type = circle_planner::RuntimeType::LUCI_INTERPRETER;
+  }
+  else
+  {
+    std::cerr << "ERROR: Invalid runtime name '" << runtime_name << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  if (allocating_mode == circle_planner::AllocatingMode::SPLIT and
+      runtime_type == circle_planner::RuntimeType::LUCI_INTERPRETER)
+  {
+    std::cerr << "Split buffer type can only be used with onert-micro runtime" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  bool is_save_allocations = false;
+
+  if (!json_path.empty())
+  {
+    is_save_allocations = true;
+  }
+
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data;
+
+  try
+  {
+    model_data = file_loader.load();
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // Import from input Circle file
+  luci::Importer importer;
+  auto module = importer.importModule(circle_model);
+
+  // Do main job
+  circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp},
+                                                     runtime_type, allocating_mode);
+  execution_planner.change_planning_mode(is_allocate_const, is_allocate_input, true);
+  execution_planner.make_execution_plan();
+
+  if (is_save_allocations)
+    execution_planner.create_json_allocation_file(json_path);
+
+  // Export to output Circle file
+  luci::CircleExporter exporter;
+  luci::CircleFileExpContract contract(module.get(), output_path);
+
+  if (!exporter.invoke(&contract))
+  {
+    std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+    return 255;
+  }
+
+  return 0;
+}
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
new file mode 100644
index 000000000..fe028c0cb
--- /dev/null
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
@@ -0,0 +1,697 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionPlanner.h"
+#include <loco/IR/Algorithm.h>
+#include <luci/UserSettings.h>
+#include <luci/Log.h>
+
+#include <json.h>
+#include <fstream>
+
+#include <limits> // std::numeric_limits
+
+namespace circle_planner
+{
+namespace
+{
+
+constexpr uint32_t node_not_assigned = std::numeric_limits<int32_t>::max();
+
+bool isExecutableNode(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // The following nodes denote outputs of multiple-output nodes.
+    // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp
+    case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLESPLITOUT:
+    case luci::CircleOpcode::CIRCLESPLITVOUT:
+    case luci::CircleOpcode::CIRCLEUNPACKOUT:
+    case luci::CircleOpcode::CIRCLEWHILEOUT:
+      return false;
+    default:
+      return true;
+  }
+}
+
+bool isTensorProducingNode(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
+    // are produced by the corresponding *Out nodes instead.
+    // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp
+    case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::UNPACK:
+      return false;
+    default:
+      return true;
+  }
+}
+
+// Create allocation node part for current circle node for json allocation info file
+void create_allocation_node(Json::Value &allocations_node,
+                            AllocationNodeInformation &alloca_node_inform, uint32_t alive_till_max,
+                            luci::CircleNode *circle_node)
+{
+  Json::Value allocation_node;
+  if (alloca_node_inform.size == 0)
+    return;
+
+  allocation_node["offset"] = alloca_node_inform.offset;
+  allocation_node["size"] = alloca_node_inform.size;
+  allocation_node["alive_from"] = alloca_node_inform.first_node;
+
+  if (alloca_node_inform.last_node == node_not_assigned)
+    allocation_node["alive_till"] = alive_till_max + 1;
+  else
+    allocation_node["alive_till"] = alloca_node_inform.last_node;
+
+  allocation_node["origin"] = circle_node->name();
+
+  allocations_node.append(allocation_node);
+}
+
+// TODO: Introduce inplace optimization
+bool can_be_inplace_optimization_node(luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    case luci::CircleOpcode::LOGISTIC:
+    case luci::CircleOpcode::RESHAPE:
+    case luci::CircleOpcode::EXPAND_DIMS:
+      return true;
+    default:
+      return false;
+  }
+}
+
+} // namespace
+
+void ExecutionPlanner::make_execution_plan_onert_micro_base()
+{
+  switch (_allocating_mode)
+  {
+    case AllocatingMode::COMMON:
+      make_execution_plan_onert_micro_common_buffer();
+      break;
+    case AllocatingMode::SPLIT:
+      make_execution_plan_onert_micro_split_buffer();
+      break;
+    default:
+      throw std::runtime_error("Unsupported buffer type\n");
+  }
+}
+
+void ExecutionPlanner::write_execution_plan(uint32_t order_offset)
+{
+  _required_size = get_offsets_with_greedy_by_size();
+
+  int32_t counter_ops = 0;
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    const auto circle_node = dynamic_cast<luci::CircleNode *>(_ordered_nodes[i]);
+    if (circle_node->opcode() != luci::CircleOpcode::CIRCLECONST and
+        circle_node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    {
+      luci::CircleNodeExecutionPlan execution_plan(counter_ops + order_offset, _offsets[i]);
+      luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                               execution_plan);
+      counter_ops++;
+    }
+  }
+}
+
+void ExecutionPlanner::make_execution_plan_onert_micro_split_buffer()
+{
+  LOGGER(l);
+
+  const auto input_size = _graph->inputs()->size();
+  const auto output_size = _graph->outputs()->size();
+
+  // Make execution plan for inputs
+  _ordered_nodes = loco::input_nodes(_graph);
+  write_execution_plan(0);
+  dump_inform();
+  VERBOSE(l, 0) << "Input graph buffer required memory = " << _required_size << std::endl;
+
+  // Clear structures for next buffer
+  _ordered_nodes.clear();
+  _alloc_node_inform_vector.clear();
+  _dealloc_node.clear();
+  _alloc_node.clear();
+  _offsets.clear();
+  _required_size = 0;
+
+  // Make execution plan for outputs
+  _ordered_nodes = loco::output_nodes(_graph);
+  write_execution_plan(input_size);
+  dump_inform();
+  VERBOSE(l, 0) << "Output graph buffer required memory = " << _required_size << std::endl;
+
+  // Clear structures for next buffer
+  _ordered_nodes.clear();
+  _alloc_node_inform_vector.clear();
+  _dealloc_node.clear();
+  _alloc_node.clear();
+  _offsets.clear();
+  _required_size = 0;
+
+  // Make execution plan for intermediates calculations
+  get_default_execution_order_plan_without_inputs_and_outputs();
+  write_execution_plan(input_size + output_size);
+  dump_inform();
+  VERBOSE(l, 0) << "Main graph buffer required memory = " << _required_size << std::endl;
+}
+
+void ExecutionPlanner::make_execution_plan_onert_micro_common_buffer()
+{
+  LOGGER(l);
+
+  get_default_execution_order_plan();
+  _required_size = get_offsets_with_greedy_by_size();
+
+  // Find prev nodes for output nodes (actual graph output node, not luci::CircleOutput)
+  const auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+  std::vector<loco::Node *> output_prev_nodes;
+  for (const auto output_node : output_nodes)
+  {
+    const auto prev_nodes = loco::preds(output_node);
+    std::copy(prev_nodes.begin(), prev_nodes.end(), std::back_inserter(output_prev_nodes));
+  }
+  const auto output_nodes_size = output_prev_nodes.size();
+
+  const auto inputs_nodes = loco::input_nodes(_graph);
+  const auto input_nodes_size = inputs_nodes.size();
+
+  int32_t counter_ops = 0;
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    const auto circle_node = dynamic_cast<luci::CircleNode *>(_ordered_nodes[i]);
+    // First write to input nodes
+    if (circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT)
+    {
+      // Find input_position for proper position in execution order
+      const auto input_position = std::distance(
+        inputs_nodes.begin(), std::find(inputs_nodes.begin(), inputs_nodes.end(), circle_node));
+      luci::CircleNodeExecutionPlan execution_plan(input_position, _offsets[i]);
+      luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                               execution_plan);
+    }
+    // Second write to actual output nodes (not luci::CircleOutput)
+    else if (std::find(output_prev_nodes.begin(), output_prev_nodes.end(), circle_node) !=
+             output_prev_nodes.end())
+    {
+      // Find output_position for proper position in execution order
+      const auto output_position =
+        std::distance(output_prev_nodes.begin(),
+                      std::find(output_prev_nodes.begin(), output_prev_nodes.end(), circle_node));
+      luci::CircleNodeExecutionPlan execution_plan(input_nodes_size + output_position, _offsets[i]);
+      luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                               execution_plan);
+    }
+    // Finally write to all intermediate nodes
+    else if (circle_node->opcode() != luci::CircleOpcode::CIRCLECONST and
+             circle_node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    {
+      luci::CircleNodeExecutionPlan execution_plan(
+        counter_ops + input_nodes_size + output_nodes_size, _offsets[i]);
+      luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                               execution_plan);
+      counter_ops++;
+    }
+  }
+
+  dump_inform();
+  VERBOSE(l, 0) << "Buffer required memory = " << _required_size << std::endl;
+}
+
+void ExecutionPlanner::make_execution_plan_luci_interpreter()
+{
+  LOGGER(l);
+
+  get_default_execution_order_plan();
+  _required_size = get_offsets_with_greedy_by_size();
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    luci::CircleNodeExecutionPlan execution_plan(i, _offsets[i]);
+    luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                             execution_plan);
+  }
+
+  VERBOSE(l, 0) << "Buffer required memory = " << _required_size << std::endl;
+  dump_inform();
+}
+
+void ExecutionPlanner::make_execution_plan()
+{
+  switch (_runtime_type)
+  {
+    case ONERT_MICRO:
+      make_execution_plan_onert_micro_base();
+      break;
+    case LUCI_INTERPRETER:
+      make_execution_plan_luci_interpreter();
+      break;
+    default:
+      throw std::runtime_error("Unsupported runtime platform\n");
+  }
+
+  auto settings = luci::UserSettings::settings();
+  settings->set(luci::UserSettings::Key::ExecutionPlanGen, true);
+}
+
+void ExecutionPlanner::create_json_allocation_file(const std::string &json_path)
+{
+  Json::Value main_tree;
+  Json::Value segments_node;
+  Json::Value allocations_node;
+
+  uint32_t alive_till_max = 0;
+
+  // Find max dealloc value to assign to nodes with node_not_assigned value
+  for (const auto elem : _dealloc_node)
+  {
+    if (alive_till_max < elem and elem != node_not_assigned)
+      alive_till_max = elem;
+  }
+
+  for (auto &alloc_node_inform : _alloc_node_inform_vector)
+  {
+    const auto node_num = alloc_node_inform.node_num;
+    const auto circle_node = loco::must_cast<luci::CircleNode *>(_ordered_nodes[node_num]);
+
+    create_allocation_node(allocations_node, alloc_node_inform, alive_till_max, circle_node);
+  }
+
+  // Create segment part
+  Json::Value segment_node;
+  segment_node["name"] = "Segment1";
+  segment_node["allocations"] = allocations_node;
+  segments_node.append(segment_node);
+
+  main_tree["schema_version"] = 1;
+  main_tree["segments"] = segments_node;
+
+  Json::StreamWriterBuilder builder;
+  const std::unique_ptr<Json::StreamWriter> writer(builder.newStreamWriter());
+
+  // Write to json file
+  std::ofstream out;
+  out.open(json_path);
+  if (out.is_open())
+  {
+    writer->write(main_tree, &out);
+  }
+}
+
+void ExecutionPlanner::get_default_execution_order_plan()
+{
+  // Get execution order in _ordered_nodes
+  _ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)));
+}
+
+void ExecutionPlanner::get_default_execution_order_plan_without_inputs_and_outputs()
+{
+  // Get all nodes
+  _ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)));
+
+  // Get real output nodes (not luci::CircleOutput)
+  const auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+  std::vector<loco::Node *> output_prev_nodes;
+  for (const auto output_node : output_nodes)
+  {
+    const auto prev_nodes = loco::preds(output_node);
+    std::copy(prev_nodes.begin(), prev_nodes.end(), std::back_inserter(output_prev_nodes));
+  }
+
+  // Remove input and real output nodes from _ordered_nodes
+  _ordered_nodes.erase(
+    std::remove_if(_ordered_nodes.begin(), _ordered_nodes.end(),
+                   [&output_prev_nodes](auto node) {
+                     const auto circle_node = dynamic_cast<luci::CircleNode *>(node);
+
+                     return circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT or
+                            circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUT or
+                            std::find(output_prev_nodes.begin(), output_prev_nodes.end(), node) !=
+                              output_prev_nodes.end();
+                   }),
+    _ordered_nodes.end());
+}
+
+void ExecutionPlanner::get_usage_interval()
+{
+  // Initialize vectors of first and last nodes for usage interval
+  _alloc_node.assign(_ordered_nodes.size(), node_not_assigned);
+  _dealloc_node.assign(_ordered_nodes.size(), node_not_assigned);
+
+  // Vector for count usages
+  std::vector<int> usages_counts(_ordered_nodes.size(), 0);
+
+  auto allocate = [this](uint32_t node, uint32_t tensor) {
+    if (_alloc_node[tensor] != node_not_assigned)
+    {
+      return;
+    }
+    assert(_dealloc_node[tensor] == node_not_assigned);
+    _alloc_node[tensor] = node;
+  };
+
+  auto deallocate = [this](uint32_t node, uint32_t tensor) {
+    assert(_dealloc_node[tensor] == node_not_assigned);
+    _dealloc_node[tensor] = node;
+  };
+
+  // Increase refcounts for graph outputs and inputs nodes
+  for (auto &output_node : output_nodes(_graph))
+  {
+    auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), output_node);
+    if (it == _ordered_nodes.end())
+      continue;
+    size_t index = std::distance(_ordered_nodes.begin(), it);
+    usages_counts[index]++;
+  }
+
+  for (auto &input_node : input_nodes(_graph))
+  {
+    auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), input_node);
+    if (it == _ordered_nodes.end())
+      continue;
+    size_t index = std::distance(_ordered_nodes.begin(), it);
+    usages_counts[index]++;
+    allocate(0, index);
+  }
+
+  // Increase refcounts of usage for all nodes in _ordered_nodes vector
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    const auto node = _ordered_nodes.at(i);
+    auto prev_nodes = preds(node);
+    for (auto &prev_node : prev_nodes)
+    {
+      auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
+      size_t index = std::distance(_ordered_nodes.begin(), it);
+      usages_counts[index]++;
+    }
+  }
+
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    const auto node = _ordered_nodes.at(i);
+    auto prev_nodes = preds(node);
+    if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
+    {
+      allocate(0, i);
+    }
+    else if (!isExecutableNode(loco::must_cast<luci::CircleNode *>(node)))
+    {
+      // If current node is multi output node than begin life time for current node should start
+      // when prev node start live
+      auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), *prev_nodes.begin());
+      size_t index = std::distance(_ordered_nodes.begin(), it);
+      allocate(index, i);
+    }
+    else
+    {
+      allocate(i, i);
+    }
+
+    for (auto &prev_node : prev_nodes)
+    {
+      auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
+      size_t index = std::distance(_ordered_nodes.begin(), it);
+      usages_counts[index]--;
+      if (usages_counts[index] == 0)
+      {
+        deallocate(i, index);
+      }
+    }
+  }
+}
+
+uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size()
+{
+  get_usage_interval();
+  auto required_size = greedy_by_size_approach();
+
+  _offsets.resize(_ordered_nodes.size());
+  for (const auto &alloc : _alloc_node_inform_vector)
+  {
+    // Fill offsets vector: first go offset for current node and then should go offsets for
+    // temporaries tensors
+    if (alloc.is_temp)
+    {
+      _offsets[alloc.node_num].push_back(alloc.offset);
+    }
+    else
+    {
+      _offsets[alloc.node_num].insert(_offsets[alloc.node_num].begin(), alloc.offset);
+    }
+  }
+  return required_size;
+}
+
+uint32_t ExecutionPlanner::greedy_by_size_approach()
+{
+  size_t result_size = 0;
+  create_alloc_node_inform_vector();
+  std::vector<AllocationNodeInformation> ordered_alloc_inform;
+  for (auto &current_node : _alloc_node_inform_vector)
+  {
+    if (current_node.size == 0)
+    {
+      current_node.offset = 0;
+      continue;
+    }
+    const uint32_t offsetNotAssigned = std::numeric_limits<uint32_t>::max();
+    size_t best_offset = offsetNotAssigned;
+    uint32_t best_offset_fit = offsetNotAssigned;
+
+    uint32_t current_offset = 0;
+
+    for (const auto &alloc_inform : ordered_alloc_inform)
+    {
+      if ((alloc_inform.last_node < current_node.first_node ||
+           alloc_inform.first_node > current_node.last_node))
+      {
+        continue;
+      }
+
+      if (current_offset + current_node.size <= alloc_inform.offset &&
+          alloc_inform.offset - current_offset < best_offset_fit)
+      {
+        best_offset = current_offset;
+        best_offset_fit = alloc_inform.offset - current_offset;
+      }
+      current_offset = std::max(current_offset, alloc_inform.offset + alloc_inform.size);
+    }
+    if (best_offset == offsetNotAssigned)
+    {
+      best_offset = current_offset;
+    }
+
+    result_size = std::max(result_size, best_offset + current_node.size);
+    current_node.offset = best_offset;
+
+    auto insertion_it =
+      std::upper_bound(ordered_alloc_inform.begin(), ordered_alloc_inform.end(), current_node);
+    ordered_alloc_inform.insert(insertion_it, current_node);
+  }
+  return result_size;
+}
+
+void ExecutionPlanner::create_alloc_node_inform_vector()
+{
+  auto node_compare = [this](const AllocationNodeInformation &alloc_1,
+                             const AllocationNodeInformation &alloc_2) {
+    auto idx1 = alloc_1.node_num;
+    auto idx2 = alloc_2.node_num;
+
+    if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == node_not_assigned)
+    {
+      if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned)
+      {
+        return idx1 < idx2;
+      }
+      return true;
+    }
+    if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned)
+    {
+      return false;
+    }
+
+    auto size_1 = alloc_1.size;
+    auto size_2 = alloc_2.size;
+
+    if (size_1 != size_2)
+    {
+      return size_1 > size_2;
+    }
+    return this->_alloc_node[idx1] < this->_alloc_node[idx2];
+  };
+
+  _alloc_node_inform_vector.resize(_ordered_nodes.size());
+
+  for (size_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]);
+    auto node_size = 1;
+    for (uint32_t axis = 0; axis < circle_node->rank(); ++axis)
+    {
+      node_size *= circle_node->dim(axis).value();
+    }
+    node_size *= size(circle_node->dtype());
+
+    _alloc_node_inform_vector[i].node_num = i;
+    _alloc_node_inform_vector[i].first_node = _alloc_node[i];
+    _alloc_node_inform_vector[i].last_node = _dealloc_node[i];
+
+    const auto *const_node = dynamic_cast<const luci::CircleConst *>(circle_node);
+    if (circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT && not _is_allocate_inputs)
+    {
+      _alloc_node_inform_vector[i].size = 0;
+    }
+    else if (circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    {
+      _alloc_node_inform_vector[i].size = 0;
+    }
+    else if (const_node && not _is_allocate_consts)
+    {
+      _alloc_node_inform_vector[i].size = 0;
+    }
+    else if (!isTensorProducingNode(circle_node))
+    {
+      _alloc_node_inform_vector[i].size = 0;
+    }
+    else
+    {
+      _alloc_node_inform_vector[i].size = node_size;
+    }
+
+    // Scratchpad If needed
+    std::vector<uint32_t> scratchpad_sizes;
+    if (_is_allocate_scratchpads)
+    {
+      switch (circle_node->opcode())
+      {
+        case luci::CircleOpcode::AVERAGE_POOL_2D:
+        {
+          const auto avg_pool = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
+          scratchpad_sizes.push_back(
+            _scratchpad_helper->ComputeScratchpadSizeAveragePool2d(avg_pool));
+          break;
+        }
+        case luci::CircleOpcode::BATCH_MATMUL:
+        {
+          const auto batch_mat_mul = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
+          scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeBatchMatMul(batch_mat_mul);
+          break;
+        }
+        case luci::CircleOpcode::CONV_2D:
+        {
+          const auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+          scratchpad_sizes.push_back(_scratchpad_helper->ComputeScratchpadSizeConv2d(conv));
+          break;
+        }
+        case luci::CircleOpcode::DEPTHWISE_CONV_2D:
+        {
+          const auto depthwise_conv =
+            loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+          scratchpad_sizes.push_back(
+            _scratchpad_helper->ComputeScratchpadSizeDepthwiseConv2d(depthwise_conv));
+          break;
+        }
+        case luci::CircleOpcode::SVDF:
+        {
+          const auto svdf = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+          scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeSVDF(svdf);
+          break;
+        }
+        default:
+          break;
+      }
+    }
+
+    for (const auto scratchpad_size : scratchpad_sizes)
+    {
+      if (scratchpad_size > 0)
+      {
+        AllocationNodeInformation temp_alloc;
+
+        temp_alloc.size = scratchpad_size;
+        temp_alloc.first_node = i - 1;
+        temp_alloc.last_node = i + 1;
+        temp_alloc.node_num = i;
+        temp_alloc.is_temp = true;
+
+        _alloc_node_inform_vector.push_back(temp_alloc);
+        _alloc_node.push_back(i);
+        _dealloc_node.push_back(i);
+      }
+    }
+  }
+  // Sort _alloc_node_inform_vector with node_compare for the greedy by size approach.
+  std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), node_compare);
+}
+
+void ExecutionPlanner::dump_inform()
+{
+  LOGGER(l);
+  uint32_t max_breadth = 0;
+
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    auto current_node_it = std::find_if(
+      _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+      [i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
+    for (uint32_t j = 0; j < _ordered_nodes.size(); j++)
+    {
+      auto first_node = _alloc_node[j];
+      auto last_node = _dealloc_node[j];
+
+      auto it = std::find_if(
+        _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+        [j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
+      if (i >= first_node && i <= last_node)
+      {
+        current_node_it->breadth += it->size;
+      }
+    }
+    if (max_breadth < current_node_it->breadth)
+    {
+      max_breadth = current_node_it->breadth;
+    }
+
+    auto node = loco::must_cast<luci::CircleNode *>(_ordered_nodes.at(i));
+    VERBOSE(l, 0) << "node_num = " << i << " node_name = " << node->name().c_str()
+                  << " node_size = " << current_node_it->size
+                  << " node_offset = " << current_node_it->offset
+                  << " node_breadth = " << current_node_it->breadth
+                  << " node_first_node = " << current_node_it->first_node
+                  << " node_last_node =  " << current_node_it->last_node << std::endl;
+  }
+  VERBOSE(l, 0) << "Lower bound = " << max_breadth << std::endl;
+  std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+            [](const AllocationNodeInformation &first, const AllocationNodeInformation &second) {
+              if (first.breadth != second.breadth)
+                return first.breadth > second.breadth;
+              return first.node_num < second.node_num;
+            });
+}
+
+} // namespace circle_planner
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h
new file mode 100644
index 000000000..1a25518c5
--- /dev/null
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLANNER_H
+#define CIRCLE_EXECUTION_PLANNER_H
+
+#include "TargetPlatform.h"
+#include "IScratchpadHelper.h"
+#include "ScratchpadHelperLinux.h"
+#include "ScratchpadHelperMCU.h"
+#include "ScratchpadHelperCMSISNN.h"
+#include <luci/IR/Module.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace circle_planner
+{
+// struct for additional information for the node. it helps build allocations plan for nodes.
+struct AllocationNodeInformation
+{
+
+  AllocationNodeInformation()
+  {
+    offset = 0;
+    size = 0;
+    node_num = -1;
+    first_node = -1;
+    last_node = -1;
+    is_temp = false;
+    breadth = 0;
+  }
+  // memory offset from the beginning of the buffer
+  uint32_t offset;
+  // node required size
+  uint32_t size;
+  // the value assigned to the node
+  uint32_t node_num;
+  // the value of the node_num of the node when current node first use.
+  // Used to build the usage interval of the current node
+  uint32_t first_node;
+  // the value of the node_num of the node when current node last use.
+  // Used to build the usage interval of the current node
+  uint32_t last_node;
+  // is the current node temporary or not
+  bool is_temp;
+  // Breadth is a sum of live tensors sizes at the moment of execution of given node
+  uint32_t breadth;
+
+  bool operator<(const AllocationNodeInformation &other) const { return offset < other.offset; }
+};
+
+class ExecutionPlanner
+{
+public:
+  ExecutionPlanner() = delete;
+  explicit ExecutionPlanner(loco::Graph *graph) : _graph(graph)
+  {
+    _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
+  }
+
+  explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform,
+                            RuntimeType runtime_type, AllocatingMode allocating_mode)
+    : _graph(graph), _runtime_type(runtime_type), _allocating_mode(allocating_mode)
+  {
+    switch (target_platform.platform_type)
+    {
+      case LINUX:
+        _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
+        break;
+      case MCU:
+        _scratchpad_helper = std::make_unique<ScratchpadHelperMCU>();
+        break;
+      case CMSISNN:
+        _scratchpad_helper = std::make_unique<ScratchpadHelperCMSISNN>(target_platform.use_dsp);
+        break;
+      default:
+        assert(false && "Use unsupported platform");
+    }
+  };
+
+  // Method provides execution plan, which contains execution order and
+  // memory offsets for all nodes in _graph.
+  // This plan writes in nodes annotation information with help of CircleNodeExecutionPlan class.
+  void make_execution_plan();
+
+  // Method change planning mode:
+  // is_allocate_consts = false - constants are no longer taken into account when planning
+  // is_allocate_inputs = false - input are no longer taken into account when planning
+  // is_allocate_scratchpads = false - scratchpads are no longer taken into account when planning
+  void change_planning_mode(bool is_allocate_consts, bool is_allocate_inputs,
+                            bool is_allocate_scratchpads)
+  {
+    _is_allocate_consts = is_allocate_consts;
+    _is_allocate_inputs = is_allocate_inputs;
+    _is_allocate_scratchpads = is_allocate_scratchpads;
+  };
+
+  void create_json_allocation_file(const std::string &json_path);
+
+private:
+  // Save execution plan for onert-micro runtime base function.
+  //
+  // NOTE: First, according to ordered_node, the input nodes are written,
+  // then all outputs, finally all nodes in execution order.
+  // Constants are not written.
+  void make_execution_plan_onert_micro_base();
+
+  // Save execution plan for luci-interpreter runtime base function.
+  void make_execution_plan_luci_interpreter();
+
+  // Save execution plan for onert-micro runtime for common buffer type.
+  void make_execution_plan_onert_micro_common_buffer();
+
+  // Save execution plan for onert-micro runtime for common split type.
+  void make_execution_plan_onert_micro_split_buffer();
+
+  // Method gets default execution order plan and saves it in _ordered_nodes vector.
+  // There can be different variants of execution order and this method provides main one.
+  void get_default_execution_order_plan();
+
+  // Method gets default execution order plan,
+  // but without inputs and output nodes and saves it in _ordered_nodes vector
+  void get_default_execution_order_plan_without_inputs_and_outputs();
+
+  // Method provides nodes with usage interval information.
+  void get_usage_interval();
+
+  // Method dumps execution plan information.
+  void dump_inform();
+
+  void write_execution_plan(uint32_t order_offset);
+
+  // Method finds required offsets for all nodes from _ordered_nodes, using greedy by size approach.
+  // It saves offsets in _offsets vector.
+  // Return: required size of buffer.
+  uint32_t get_offsets_with_greedy_by_size();
+
+  // Realization of greedy by size approach (algorithm is mentioned in
+  // "EFFICIENT MEMORY MANAGEMENT FOR DEEP NEURAL NET INFERENCE" paper) to find offsets for nodes.
+  uint32_t greedy_by_size_approach();
+
+  // Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes.
+  // _is_allocate_const = true - size of const nodes will be equal 0;
+  // _is_allocate_input = true - size of input nodes will be equal 0;
+  // _is_allocate_scratchpad = true - size of scratchpad nodes will be equal 0;
+  // It using if we don't want to take input(const or scratchpads) nodes into account
+  // when determining offsets and calculating the required buffer size. This is uses for
+  // experiments.
+  void create_alloc_node_inform_vector();
+
+  // Stores allocation additional information for the all nodes from _graph.
+  std::vector<AllocationNodeInformation> _alloc_node_inform_vector;
+
+  // Stores nodes in execution order.
+  std::vector<loco::Node *> _ordered_nodes;
+
+  // Stores nodes memory offsets in arena buffer.
+  std::vector<std::vector<uint32_t>> _offsets;
+
+  // Stores positions of nodes in _ordered_nodes vector,
+  // where node in i'th position in this vector first use.
+  // For example, if i'th position of _alloc_node stores j value, then
+  // the node from _ordered_nodes in j'th position is the node when we should allocate (first use)
+  // the node from _ordered_nodes in i'th position.
+  std::vector<uint32_t> _alloc_node;
+
+  // Stores positions of nodes in _ordered_nodes vector,
+  // where node in i'th position in this vector last use.
+  // For example, if i'th position of _alloc_node stores j value, then
+  // the node from _ordered_nodes in j'th position is the node when we can deallocate (last use)
+  // the node from _ordered_nodes in i'th position.
+  std::vector<uint32_t> _dealloc_node;
+
+  loco::Graph *_graph;
+
+  // Calculate size of scratchpad tensors for current platform
+  std::unique_ptr<IScratchpadHelper> _scratchpad_helper;
+
+  // Supported runtime type
+  RuntimeType _runtime_type;
+
+  // Supported buffers type
+  AllocatingMode _allocating_mode;
+
+  // Required memory size.
+  uint32_t _required_size = 0;
+
+  // Flags for choosing different planning modes:
+  // _is_allocate_consts = false - constants are no longer taken into account when planning
+  // _is_allocate_inputs = false - input are no longer taken into account when planning
+  // _is_allocate_scratchpads = false - scratchpads are no longer taken into account when planning
+  bool _is_allocate_consts = true;
+  bool _is_allocate_inputs = true;
+  bool _is_allocate_scratchpads = true;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLANNER_H
diff --git a/compiler/circle-inspect/CMakeLists.txt b/compiler/circle-inspect/CMakeLists.txt
index d0775ea2d..8edfde483 100644
--- a/compiler/circle-inspect/CMakeLists.txt
+++ b/compiler/circle-inspect/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle06)
   return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle06)
 
 set(DRIVER "driver/Driver.cpp")
 
@@ -10,5 +10,6 @@ add_executable(circle-inspect ${DRIVER} ${SOURCES})
 target_include_directories(circle-inspect PRIVATE src)
 target_link_libraries(circle-inspect arser)
 target_link_libraries(circle-inspect foder)
-target_link_libraries(circle-inspect mio_circle)
+target_link_libraries(circle-inspect mio_circle06)
+target_link_libraries(circle-inspect mio_circle06_helper)
 target_link_libraries(circle-inspect safemain)
diff --git a/compiler/circle-inspect/README.md b/compiler/circle-inspect/README.md
index 1f76c8ede..94eea7b08 100644
--- a/compiler/circle-inspect/README.md
+++ b/compiler/circle-inspect/README.md
@@ -20,3 +20,19 @@ ADD
 ```
 
 To get the count of specific operator, use other tools like sort, uniq, etc.
+
+Operators with `--tensor_dtype`
+- show name and dtype of each tensor one line at a time
+
+Example
+```
+$ circle-inspect --tensor_dtype quantized_conv2d.circle
+```
+
+Result
+```
+ifm UINT8
+weights UINT8
+bias INT32
+ofm UINT8
+```
diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp
index 72cfa28a3..4fa6069c8 100644
--- a/compiler/circle-inspect/driver/Driver.cpp
+++ b/compiler/circle-inspect/driver/Driver.cpp
@@ -29,13 +29,15 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser{
-      "circle-inspect allows users to retrieve various information from a Circle model file"};
+    "circle-inspect allows users to retrieve various information from a Circle model file"};
   arser.add_argument("--operators").nargs(0).help("Dump operators in circle file");
   arser.add_argument("--conv2d_weight")
-      .nargs(0)
-      .help("Dump Conv2D series weight operators in circle file");
+    .nargs(0)
+    .help("Dump Conv2D series weight operators in circle file");
+  arser.add_argument("--constants").nargs(0).help("Dump constant tensors name");
   arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
-  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect");
+  arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors");
+  arser.add_argument("circle").help("Circle file to inspect");
 
   try
   {
@@ -48,7 +50,8 @@ int entry(int argc, char **argv)
     return 255;
   }
 
-  if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"])
+  if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"] &&
+      !arser["--tensor_dtype"] && !arser["--constants"])
   {
     std::cout << "At least one option must be specified" << std::endl;
     std::cout << arser;
@@ -63,6 +66,10 @@ int entry(int argc, char **argv)
     dumps.push_back(std::make_unique<circleinspect::DumpConv2DWeight>());
   if (arser["--op_version"])
     dumps.push_back(std::make_unique<circleinspect::DumpOperatorVersion>());
+  if (arser["--tensor_dtype"])
+    dumps.push_back(std::make_unique<circleinspect::DumpTensorDType>());
+  if (arser["--constants"])
+    dumps.push_back(std::make_unique<circleinspect::DumpConstants>());
 
   std::string model_file = arser.get<std::string>("circle");
 
diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake
index 81e0f0dbd..b3a2638ef 100644
--- a/compiler/circle-inspect/requires.cmake
+++ b/compiler/circle-inspect/requires.cmake
@@ -1,3 +1,4 @@
 require("arser")
-require("mio-circle")
+require("foder")
+require("mio-circle06")
 require("safemain")
diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp
index 5c71afb3f..868fc2ba8 100644
--- a/compiler/circle-inspect/src/Dump.cpp
+++ b/compiler/circle-inspect/src/Dump.cpp
@@ -15,7 +15,9 @@
  */
 
 #include "Dump.h"
-#include "Reader.h"
+
+#include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
 
 #include <ostream>
 
@@ -24,7 +26,7 @@ namespace circleinspect
 
 void DumpOperators::run(std::ostream &os, const circle::Model *model)
 {
-  circleinspect::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   const uint32_t subgraph_size = reader.num_subgraph();
 
@@ -50,7 +52,7 @@ void DumpOperators::run(std::ostream &os, const circle::Model *model)
 namespace
 {
 
-const circle::Operator *operator_match_output(circleinspect::Reader &reader, const int32_t tensor)
+const circle::Operator *operator_match_output(mio::circle::Reader &reader, const int32_t tensor)
 {
   auto ops = reader.operators();
 
@@ -58,7 +60,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con
   {
     const auto op = ops->Get(i);
 
-    const std::vector<int32_t> &outputs = circleinspect::as_index_vector(op->outputs());
+    const std::vector<int32_t> &outputs = mio::circle::as_index_vector(op->outputs());
 
     for (auto output : outputs)
     {
@@ -69,7 +71,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con
   return nullptr;
 }
 
-size_t tensor_buffer_size(circleinspect::Reader &reader, const int32_t tensor_id)
+size_t tensor_buffer_size(mio::circle::Reader &reader, const int32_t tensor_id)
 {
   auto tensors = reader.tensors();
 
@@ -93,7 +95,7 @@ namespace circleinspect
 
 void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model)
 {
-  circleinspect::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   const uint32_t subgraph_size = reader.num_subgraph();
 
@@ -110,7 +112,7 @@ void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model)
 
       if (bc == circle::BuiltinOperator_CONV_2D || bc == circle::BuiltinOperator_DEPTHWISE_CONV_2D)
       {
-        const std::vector<int32_t> &inputs = circleinspect::as_index_vector(op->inputs());
+        const std::vector<int32_t> &inputs = mio::circle::as_index_vector(op->inputs());
         if (inputs.size() < 2)
         {
           throw std::runtime_error("Operator has invalid input");
@@ -147,7 +149,7 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model)
 {
   std::map<std::string, int32_t> op_version_map;
 
-  circleinspect::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   // This assert is subject to be changed later
   assert(reader.num_subgraph() == 1);
@@ -175,3 +177,61 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model)
 }
 
 } // namespace circleinspect
+
+namespace circleinspect
+{
+
+void DumpTensorDType::run(std::ostream &os, const circle::Model *model)
+{
+  mio::circle::Reader reader(model);
+
+  const uint32_t subgraph_size = reader.num_subgraph();
+
+  for (uint32_t g = 0; g < subgraph_size; g++)
+  {
+    reader.select_subgraph(g);
+    auto tensors = reader.tensors();
+
+    for (uint32_t i = 0; i < tensors->Length(); ++i)
+    {
+      const auto tensor = tensors->Get(i);
+
+      os << reader.tensor_name(tensor) << " " << reader.tensor_dtype(tensor) << std::endl;
+    }
+  }
+}
+
+} // namespace circleinspect
+
+namespace circleinspect
+{
+
+void DumpConstants::run(std::ostream &os, const circle::Model *model)
+{
+  mio::circle::Reader reader(model);
+
+  const uint32_t subgraph_size = reader.num_subgraph();
+
+  for (uint32_t g = 0; g < subgraph_size; g++)
+  {
+    reader.select_subgraph(g);
+    auto tensors = reader.tensors();
+
+    for (uint32_t i = 0; i < tensors->Length(); ++i)
+    {
+      const auto tensor = tensors->Get(i);
+      if (tensor->is_variable())
+        continue;
+
+      auto const buffer_id = tensor->buffer();
+
+      auto const buffer_size = reader.buffer_info(buffer_id, nullptr);
+      if (buffer_size == 0)
+        continue;
+
+      os << reader.tensor_name(tensor) << std::endl;
+    }
+  }
+}
+
+} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Dump.h b/compiler/circle-inspect/src/Dump.h
index 996c421f9..7ab1ebca9 100644
--- a/compiler/circle-inspect/src/Dump.h
+++ b/compiler/circle-inspect/src/Dump.h
@@ -60,6 +60,24 @@ public:
   void run(std::ostream &os, const circle::Model *model);
 };
 
+class DumpTensorDType final : public DumpInterface
+{
+public:
+  DumpTensorDType() = default;
+
+public:
+  void run(std::ostream &os, const circle::Model *model);
+};
+
+class DumpConstants final : public DumpInterface
+{
+public:
+  DumpConstants() = default;
+
+public:
+  void run(std::ostream &os, const circle::Model *model);
+};
+
 } // namespace circleinspect
 
 #endif // __DUMP_H__
diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/circle-inspect/src/Reader.cpp
deleted file mode 100644
index 7807db38a..000000000
--- a/compiler/circle-inspect/src/Reader.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reader.h"
-
-#include <sstream>
-#include <string>
-
-namespace circleinspect
-{
-
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
-  return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
-Reader::Reader(const circle::Model *model)
-{
-  _subgraphs = model->subgraphs();
-  _buffers = model->buffers();
-
-  auto opcodes = model->operator_codes();
-  for (const ::circle::OperatorCode *opcode : *opcodes)
-  {
-    _op_codes.push_back(opcode);
-  }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
-  if (buff_data != nullptr)
-  {
-    *buff_data = nullptr;
-  }
-
-  if (buf_idx == 0)
-    return 0;
-
-  if (auto *buffer = (*_buffers)[buf_idx])
-  {
-    if (auto *array = buffer->data())
-    {
-      if (size_t size = array->size())
-      {
-        if (buff_data != nullptr)
-        {
-          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
-        }
-        return size;
-      }
-    }
-  }
-
-  return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  return opcode->builtin_code();
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid: " << index << ")";
-    return oss.str();
-  }
-
-  return circleinspect::opcode_name(opcode);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
-  _tensors = nullptr;
-  _operators = nullptr;
-
-  _inputs.clear();
-  _outputs.clear();
-
-  if (_subgraphs->Length() <= sgindex)
-  {
-    assert(false);
-    return false;
-  }
-
-  const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
-  _tensors = subgraph->tensors();
-  _operators = subgraph->operators();
-
-  _inputs = as_index_vector(subgraph->inputs());
-  _outputs = as_index_vector(subgraph->outputs());
-
-  return true;
-}
-
-} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h
deleted file mode 100644
index b5a99df3f..000000000
--- a/compiler/circle-inspect/src/Reader.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __READER_H__
-#define __READER_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circleinspect
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
-  std::vector<T> ret(flat_array->Length());
-  for (uint32_t i = 0; i < flat_array->Length(); i++)
-  {
-    ret[i] = flat_array->Get(i);
-  }
-  return ret;
-}
-
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
-  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
-  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
-  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
-  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-
-public:
-  Reader(const circle::Model *model);
-
-  Reader() = delete;
-
-public:
-  const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
-  const CircleBuffers_t *buffers() { return _buffers; }
-  const CircleTensors_t *tensors() { return _tensors; }
-  const CircleOperators_t *operators() { return _operators; }
-  const std::vector<int32_t> &inputs() const { return _inputs; }
-  const std::vector<int32_t> &outputs() const { return _outputs; }
-
-  uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
-  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
-  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
-  std::string opcode_name(const circle::Operator *op) const;
-
-public:
-  bool select_subgraph(uint32_t subgraph);
-
-private:
-  const CircleSubGraphs_t *_subgraphs{nullptr};
-  const CircleBuffers_t *_buffers{nullptr};
-  const CircleTensors_t *_tensors{nullptr};
-  const CircleOperators_t *_operators{nullptr};
-
-  std::vector<const circle::OperatorCode *> _op_codes;
-  std::vector<int32_t> _inputs;
-  std::vector<int32_t> _outputs;
-};
-
-} // namespace circleinspect
-
-#endif // __READER_H__
diff --git a/compiler/circle-interpreter-test/CMakeLists.txt b/compiler/circle-interpreter-test/CMakeLists.txt
new file mode 100644
index 000000000..fbb0fcd45
--- /dev/null
+++ b/compiler/circle-interpreter-test/CMakeLists.txt
@@ -0,0 +1,27 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_PATH testDataGenerator BINARY_DIR)
+get_target_property(CIRCLE_INTERPRETER_PATH circle-interpreter BINARY_DIR)
+set(CIRCLE_INTERPRETER_PATH "${CIRCLE_INTERPRETER_PATH}/circle-interpreter")
+
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+GTest_AddTest(circle-interpreter-test ${TESTS})
+
+# circle-interpreter-test uses input data generated during luci_value_test
+if(NOT CMAKE_CROSSCOMPILING)
+  set_tests_properties(circle-interpreter-test
+                      PROPERTIES
+                      DEPENDS luci_value_test
+                      ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_INTERPRETER_PATH=${CIRCLE_INTERPRETER_PATH}"
+                      )
+else(NOT CMAKE_CROSSCOMPILING)
+  set_tests_properties(circle-interpreter-test
+                      PROPERTIES
+                      DEPENDS luci_value_cross_test
+                      ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_INTERPRETER_PATH=${CIRCLE_INTERPRETER_PATH}"
+                      )
+endif(NOT CMAKE_CROSSCOMPILING)
diff --git a/compiler/circle-interpreter-test/README.md b/compiler/circle-interpreter-test/README.md
new file mode 100644
index 000000000..66f80e79b
--- /dev/null
+++ b/compiler/circle-interpreter-test/README.md
@@ -0,0 +1,9 @@
+# circle-interpreter-test
+
+`circle-interpreter-test` checks if _circle-interpreter_ is working as expected.
+
+Current tests includes
+- input arguments test
+- output data test
+- printing help message test
+- validation of arguments and error message test
diff --git a/compiler/circle-interpreter-test/requires.cmake b/compiler/circle-interpreter-test/requires.cmake
new file mode 100644
index 000000000..5ca5749ca
--- /dev/null
+++ b/compiler/circle-interpreter-test/requires.cmake
@@ -0,0 +1,3 @@
+require("common-artifacts")
+require("circle-interpreter")
+require("luci-value-test")
diff --git a/compiler/circle-interpreter-test/src/circle-interpreter.test.cpp b/compiler/circle-interpreter-test/src/circle-interpreter.test.cpp
new file mode 100644
index 000000000..4a5e81b45
--- /dev/null
+++ b/compiler/circle-interpreter-test/src/circle-interpreter.test.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <vector>
+
+#define READSIZE 4096
+
+class circle_interpreter_test : public ::testing::Test
+{
+protected:
+  bool initialize(void);
+  bool run(const std::string &command);
+
+protected:
+  bool compare(const std::string &file1, const std::string &file2);
+
+protected:
+  std::string _artifacts_path;
+  std::string _circle_interpreter_path;
+  std::string _result;
+};
+
+bool circle_interpreter_test::initialize(void)
+{
+  char *path = std::getenv("ARTIFACTS_PATH");
+  if (path == nullptr)
+  {
+    std::cerr << "ARTIFACTS_PATH not found" << std::endl;
+    return false;
+  }
+  _artifacts_path = path;
+
+  path = std::getenv("CIRCLE_INTERPRETER_PATH");
+  if (path == nullptr)
+  {
+    std::cerr << "CIRCLE_INTERPRETER_PATH  not found" << std::endl;
+    return false;
+  }
+  _circle_interpreter_path = path;
+
+  return true;
+}
+
+bool circle_interpreter_test::run(const std::string &command)
+{
+  std::vector<char> buffer(READSIZE);
+  std::string result = "";
+  std::string cmd_err = command + " 2>&1";
+  FILE *pipe = popen(cmd_err.c_str(), "r");
+  if (!pipe)
+  {
+    return false;
+  }
+  try
+  {
+    while (fgets(&buffer[0], buffer.size(), pipe) != NULL)
+    {
+      result += &buffer[0];
+    }
+  }
+  catch (...)
+  {
+    pclose(pipe);
+    return false;
+  }
+  pclose(pipe);
+  _result = result;
+
+  std::cout << _result << std::endl;
+
+  return true;
+}
+
+bool circle_interpreter_test::compare(const std::string &file1, const std::string &file2)
+{
+  std::ifstream f1(file1.c_str(), std::ifstream::in | std::ifstream::binary);
+  std::ifstream f2(file2.c_str(), std::ifstream::in | std::ifstream::binary);
+
+  if (!f1.is_open() || !f2.is_open())
+  {
+    return false;
+  }
+
+  typedef unsigned char BYTE;
+  std::vector<BYTE> vBuffer1(READSIZE);
+  std::vector<BYTE> vBuffer2(READSIZE);
+
+  do
+  {
+    f1.read((char *)&vBuffer1[0], READSIZE);
+    std::streamsize f1_bytes = f1.gcount();
+    f2.read((char *)&vBuffer2[0], READSIZE);
+    std::streamsize f2_bytes = f2.gcount();
+
+    if (f1_bytes != f2_bytes)
+    {
+      return false;
+    }
+
+    if (!std::equal(vBuffer1.begin(), vBuffer1.end(), vBuffer2.begin()))
+    {
+      return false;
+    }
+  } while (f1.good() || f2.good());
+  return true;
+}
+
+TEST_F(circle_interpreter_test, show_help_msg)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string command = _circle_interpreter_path + " -h";
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Usage: ./circle-interpreter");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, valid_command)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Conv2D_000.circle";
+  std::string input_prefix = _artifacts_path + "/Conv2D_000.circle.input";
+  std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+  std::string generated_output = output_prefix + "0";
+  std::remove(generated_output.c_str());
+  std::string command =
+    _circle_interpreter_path + " " + model + " " + input_prefix + " " + output_prefix;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  std::string expected_output = _artifacts_path + "/Conv2D_000.circle.output0";
+
+  if (!compare(generated_output, expected_output))
+  {
+    FAIL();
+    return;
+  }
+}
+
+TEST_F(circle_interpreter_test, invalid_option_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Conv2D_000.circle";
+  std::string command = _circle_interpreter_path + " " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Invalid argument");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, not_existing_model_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string not_existing_model = _artifacts_path + "/non_exist_file.foo";
+  std::string input_prefix = _artifacts_path + "/Conv2D_000.circle.input";
+  std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+  std::remove(output_prefix.c_str());
+  std::string command =
+    _circle_interpreter_path + " " + not_existing_model + " " + input_prefix + " " + output_prefix;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Failed to load");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, invalid_input_prefix_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Conv2D_000.circle";
+  std::string input_prefix = _artifacts_path + "/non_exist_file.foo";
+  std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+  std::remove(output_prefix.c_str());
+  std::string command =
+    _circle_interpreter_path + " " + model + " " + input_prefix + " " + output_prefix;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Cannot open file");
+  ASSERT_NE(std::string::npos, pos);
+}
diff --git a/compiler/circle-interpreter/CMakeLists.txt b/compiler/circle-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..d18db3e11
--- /dev/null
+++ b/compiler/circle-interpreter/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(INTERPRETER
+      src/CircleInterpreter.cpp
+   )
+
+add_executable(circle-interpreter ${INTERPRETER})
+target_link_libraries(circle-interpreter PRIVATE arser)
+target_link_libraries(circle-interpreter PRIVATE loco)
+target_link_libraries(circle-interpreter PRIVATE luci_import)
+target_link_libraries(circle-interpreter PRIVATE luci_interpreter)
+target_link_libraries(circle-interpreter PRIVATE safemain)
+target_link_libraries(circle-interpreter PRIVATE vconone)
+
+install(TARGETS circle-interpreter DESTINATION bin)
diff --git a/compiler/circle-interpreter/requires.cmake b/compiler/circle-interpreter/requires.cmake
new file mode 100644
index 000000000..a565df65b
--- /dev/null
+++ b/compiler/circle-interpreter/requires.cmake
@@ -0,0 +1,6 @@
+require("arser")
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("safemain")
+require("vconone")
diff --git a/compiler/circle-interpreter/src/CircleInterpreter.cpp b/compiler/circle-interpreter/src/CircleInterpreter.cpp
new file mode 100644
index 000000000..48c29a581
--- /dev/null
+++ b/compiler/circle-interpreter/src/CircleInterpreter.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arser/arser.h>
+#include <luci/ImporterEx.h>
+#include <luci_interpreter/Interpreter.h>
+#include <vconone/vconone.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+#include <string>
+
+namespace
+{
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.read(data, data_size).fail())
+    throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
+  if (fs.peek() != EOF)
+    throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+template <typename NodeT> size_t getTensorSize(const NodeT *node)
+{
+  uint32_t tensor_size = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    tensor_size *= node->dim(i).value();
+  return tensor_size;
+}
+
+void print_version(void)
+{
+  std::cout << "circle-interpreter version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+} // namespace
+
+/*
+ * @brief CircleInterpreter main
+ *
+ *        Driver to invoke luci-interpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+  arser::Arser arser("Interpreter driver for circle models");
+
+  arser::Helper::add_version(arser, print_version);
+
+  arser.add_argument("model_path").help("Circle model filepath");
+  arser.add_argument("input_prefix")
+    .help("Input data filepath for circle model. "
+          "n-th input data is read from ${input_prefix}n, "
+          "for example, Add.circle.input0, Add.circle.input1");
+  arser.add_argument("output_prefix")
+    .help("Output data filepath for circle model. "
+          "Output data is written in ${output_file}n, "
+          "for example, Add.circle.output0");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
+    return EXIT_FAILURE;
+  }
+
+  const auto filename = arser.get<std::string>("model_path");
+  const auto input_prefix = arser.get<std::string>("input_prefix");
+  const auto output_prefix = arser.get<std::string>("output_prefix");
+
+  // Load model from the file
+  luci::ImporterEx importer;
+  std::unique_ptr<luci::Module> module = importer.importVerifyModule(filename);
+  if (module == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // Create interpreter.
+  luci_interpreter::Interpreter interpreter(module.get());
+
+  // Set input.
+  // Data for n'th input is read from ${input_prefix}n
+  // (ex: Add.circle.input0, Add.circle.input1 ..)
+  const auto input_nodes = loco::input_nodes(module->graph());
+  for (int32_t i = 0; i < input_nodes.size(); i++)
+  {
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+    std::vector<char> input_data(getTensorSize(input_node));
+    readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
+                     input_data.size());
+    interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+  }
+
+  // Do inference.
+  interpreter.interpret();
+
+  // Get output.
+  const auto output_nodes = loco::output_nodes(module->graph());
+  for (int i = 0; i < module->graph()->outputs()->size(); i++)
+  {
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+    std::vector<char> output_data(getTensorSize(output_node));
+    interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+    // Output data is written in ${output_file}n
+    // (ex: Add.circle.output0)
+    writeDataToFile(std::string(output_prefix) + std::to_string(i), output_data.data(),
+                    output_data.size());
+  }
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-mpqsolver/CMakeLists.txt b/compiler/circle-mpqsolver/CMakeLists.txt
new file mode 100644
index 000000000..9af9fc2a3
--- /dev/null
+++ b/compiler/circle-mpqsolver/CMakeLists.txt
@@ -0,0 +1,48 @@
+nnas_find_package(Jsoncpp)
+if(NOT Jsoncpp_FOUND)
+    message(STATUS "Build circle-mpqsolver: FAILED (missing jsoncpp)")
+    return()
+endif(NOT Jsoncpp_FOUND)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-mpqsolver "${SOURCES}")
+target_include_directories(circle-mpqsolver PRIVATE src)
+target_include_directories(circle-mpqsolver PRIVATE ${Jsoncpp_INCLUDE_DIRS})
+target_link_libraries(circle-mpqsolver ${Jsoncpp_STATIC_LIB})
+target_link_libraries(circle-mpqsolver arser)
+target_link_libraries(circle-mpqsolver vconone)
+target_link_libraries(circle-mpqsolver safemain)
+target_link_libraries(circle-mpqsolver luci_service)
+target_link_libraries(circle-mpqsolver luci_pass)
+target_link_libraries(circle-mpqsolver luci_interpreter)
+target_link_libraries(circle-mpqsolver dio_hdf5)
+target_link_libraries(circle-mpqsolver luci_import)
+target_link_libraries(circle-mpqsolver luci_export)
+target_link_libraries(circle-mpqsolver luci_log)
+target_link_libraries(circle-mpqsolver nncc_common)
+
+install(TARGETS circle-mpqsolver DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# circle-mpqsolver is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources uesd for tests.
+set(TEST_SOURCES
+    "src/bisection/DepthParameterizer.cpp"
+    "src/core/Quantizer.cpp"
+    "src/bisection/VISQErrorApproximator.cpp"
+    "src/core/ErrorMetric.cpp"
+)
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(circle_mpqsolver_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(circle_mpqsolver_test PRIVATE src)
+target_include_directories(circle_mpqsolver_test PRIVATE ${Jsoncpp_INCLUDE_DIRS})
+target_link_libraries(circle_mpqsolver_test ${Jsoncpp_STATIC_LIB})
+target_link_libraries(circle_mpqsolver_test luci_service)
+target_link_libraries(circle_mpqsolver_test luci_pass)
diff --git a/compiler/circle-mpqsolver/README.md b/compiler/circle-mpqsolver/README.md
new file mode 100644
index 000000000..aa66e28a7
--- /dev/null
+++ b/compiler/circle-mpqsolver/README.md
@@ -0,0 +1,72 @@
+# circle-mpqsolver
+_circle-mpqsolver_ provides light-weight methods for finding a high-quality mixed-precision model 
+within a reasonable time.
+
+## Methods
+
+### Bisection
+A model is split into two parts: front and back. One of them is quantized in uint8 and another in 
+int16. The precision of front and back is determined by our proxy metric, upperbound of total layer 
+errors. (See https://github.com/Samsung/ONE/pull/10170#discussion_r1042246598 for more details)
+
+The boundary between the front and the back is decided by the depth of operators (depth: distance 
+from input to the operator), i.e., given a depth d, layers with a depth less than d are included 
+in front, and the rest are included in back. Bisection performs binary search to find a proper 
+depth which achieves a qerror less than target_qerror.
+
+In case front is quantized into Q16 the pseudocode is the following: 
+```
+    until |_depth_max_ - _depth_min_| <=1 do
+        _current_depth_ = 0.5 * (_depth_max_ + _depth_min_)
+        if Loss(_current_depth_) < _target_loss_
+            _depth_max_ = _current_depth_
+        else
+            _depth_min_ = _current_depth_
+```
+, where Loss(current_depth) is the qerror of the mixied-precision model split at current_depth. 
+As every iteration halves the remaining range (|depth_max - depth_min|), it converges in 
+_~log2(max_depth)_ iterations.
+
+## Usage 
+Run _circle-mpqsolver_ with the following arguments.  
+
+--data: .h5 file with test data
+
+--input_model: Input float model initialized with min-max (recorded model)
+
+--output_model: Output qunatized mode
+
+--qerror_ratio: Target quantization error ratio. It should be in [0, 1]. 0 indicates qerror of full int16 model, 1 indicates qerror of full uint8 model. The lower `qerror_ratio` indicates the more accurate solution.
+
+--bisection _mode_: input nodes should be at Q16 precision ['auto', 'true', 'false']
+--visq_file: .visq.json file to be used in 'auto' mode
+--save_intermediate: path to the directory where all intermediate results will be saved
+
+```
+$ ./circle-mpqsolver
+  --data <.h5 data>
+  --input_model <input_recorded_model>
+  --output_model <output_model_pat>
+  --qerror_ratio <optional value for reproducing target _qerror_ default is 0.5>
+  --bisection <whether input nodes should be quantized into Q16 default is 'auto'>
+  --visq_file <*.visq.json file with quantization errors>
+  --save_intermediate <intermediate_results_path>
+```
+
+For example:
+```
+$./circle-mpqsolver
+    --data dataset.h5
+    --input_model model.recorded.circle
+    --output_model model.q_opt.circle
+    --qerror_ratio 0.4f
+    --bisection true
+```
+
+It will produce _model.q_opt.circle_, which is _model.recorded.circle_ quantized to mixed precision 
+using _dataset.h5_, with input nodes set to _Q16_ precision and quantization error (_qerror_) of 
+_model.q_opt.circle_ will be less than
+```
+ _qerror(full_q16) + qerror_ratio * (qerror(full_q8) - qerror(full_q16))_
+ ```
+ (_full_q16_ - model quantized using Q16 precision, _full_q8_ - model quantized using Q8 precision).
diff --git a/compiler/circle-mpqsolver/requires.cmake b/compiler/circle-mpqsolver/requires.cmake
new file mode 100644
index 000000000..73492a11a
--- /dev/null
+++ b/compiler/circle-mpqsolver/requires.cmake
@@ -0,0 +1,6 @@
+require("safemain")
+require("arser")
+require("vconone")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
diff --git a/compiler/circle-mpqsolver/src/CircleMPQSolver.cpp b/compiler/circle-mpqsolver/src/CircleMPQSolver.cpp
new file mode 100644
index 000000000..12981be40
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/CircleMPQSolver.cpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+
+#include "bisection/BisectionSolver.h"
+#include <core/SolverOutput.h>
+
+#include <iostream>
+#include <iomanip>
+
+void print_version(void)
+{
+  std::cout << "circle-mpqsolver version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+int handleAutoAlgorithm(arser::Arser &arser, mpqsolver::bisection::BisectionSolver &solver)
+{
+  solver.algorithm(mpqsolver::bisection::BisectionSolver::Algorithm::Auto);
+  auto data_path = arser.get<std::string>("--visq_file");
+  if (data_path.empty())
+  {
+    std::cerr << "ERROR: please provide visq_file for auto mode" << std::endl;
+    return false;
+  }
+  solver.setVisqPath(data_path);
+  return true;
+}
+
+int entry(int argc, char **argv)
+{
+  const std::string bisection_str = "--bisection";
+  const std::string save_intermediate_str = "--save_intermediate";
+
+  arser::Arser arser("circle-mpqsolver provides light-weight methods for finding a high-quality "
+                     "mixed-precision model within a reasonable time.");
+
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
+
+  arser.add_argument("--data").required(true).help("Path to the test data");
+  arser.add_argument("--data_format").required(false).help("Test data format (default: h5)");
+
+  arser.add_argument("--qerror_ratio")
+    .type(arser::DataType::FLOAT)
+    .default_value(0.5f)
+    .help("quantization error ratio ([0, 1])");
+
+  arser.add_argument(bisection_str)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Single optional argument for bisection method. "
+          "Whether input node should be quantized to Q16: 'auto', 'true', 'false'.");
+
+  arser.add_argument("--input_model")
+    .required(true)
+    .help("Input float model with min max initialized");
+
+  arser.add_argument("--input_dtype")
+    .type(arser::DataType::STR)
+    .default_value("uint8")
+    .help("Data type of quantized model's inputs (default: uint8)");
+
+  arser.add_argument("--output_dtype")
+    .type(arser::DataType::STR)
+    .default_value("uint8")
+    .help("Data type of quantized model's outputs (default: uint8)");
+
+  arser.add_argument("--output_model").required(true).help("Output quantized model");
+
+  arser.add_argument("--visq_file")
+    .type(arser::DataType::STR)
+    .default_value("")
+    .required(false)
+    .help("*.visq.json file with quantization errors");
+
+  arser.add_argument(save_intermediate_str)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("path to save intermediate results");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cout << arser;
+    return EXIT_FAILURE;
+  }
+
+  if (arser.get<bool>("--verbose"))
+  {
+    // The third parameter of setenv means REPLACE.
+    // If REPLACE is zero, it does not overwrite an existing value.
+    setenv("LUCI_LOG", "100", 0);
+  }
+
+  auto data_path = arser.get<std::string>("--data");
+  auto input_model_path = arser.get<std::string>("--input_model");
+  auto output_model_path = arser.get<std::string>("--output_model");
+  auto input_dtype = arser.get<std::string>("--input_dtype");
+  auto output_dtype = arser.get<std::string>("--output_dtype");
+
+  float qerror_ratio = arser.get<float>("--qerror_ratio");
+  if (qerror_ratio < 0.0 || qerror_ratio > 1.f)
+  {
+    std::cerr << "ERROR: quantization ratio must be in [0, 1]" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  SolverOutput::get() << ">> Searching mixed precision configuration \n"
+                      << "model:" << input_model_path << "\n"
+                      << "dataset: " << data_path << "\n"
+                      << "input dtype: " << input_dtype << "\n"
+                      << "output dtype: " << output_dtype << "\n";
+
+  if (arser[bisection_str])
+  {
+    // optimize
+    using namespace mpqsolver::bisection;
+
+    BisectionSolver solver(data_path, qerror_ratio, input_dtype, output_dtype);
+    {
+      auto value = arser.get<std::string>(bisection_str);
+      if (value == "auto")
+      {
+        SolverOutput::get() << "algorithm: bisection (auto)\n";
+        if (!handleAutoAlgorithm(arser, solver))
+        {
+          return EXIT_FAILURE;
+        }
+      }
+      else if (value == "true")
+      {
+        SolverOutput::get() << "algorithm: bisection (Q16AtFront)";
+        solver.algorithm(BisectionSolver::Algorithm::ForceQ16Front);
+      }
+      else if (value == "false")
+      {
+        SolverOutput::get() << "algorithm: bisection (Q8AtFront)";
+        solver.algorithm(BisectionSolver::Algorithm::ForceQ16Back);
+      }
+      else
+      {
+        std::cerr << "ERROR: Unrecognized option for bisection algortithm" << input_model_path
+                  << std::endl;
+        return EXIT_FAILURE;
+      }
+    }
+
+    if (arser[save_intermediate_str])
+    {
+      auto data_path = arser.get<std::string>(save_intermediate_str);
+      if (!data_path.empty())
+      {
+        solver.set_save_intermediate(data_path);
+      }
+    }
+
+    SolverOutput::get() << "qerror metric: MAE\n"
+                        << "target qerror ratio: " << qerror_ratio << "\n";
+
+    auto optimized = solver.run(input_model_path);
+    if (optimized == nullptr)
+    {
+      std::cerr << "ERROR: Failed to build mixed precision model" << input_model_path << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    // save optimized
+    {
+      SolverOutput::get() << "Saving output model to " << output_model_path << "\n";
+      luci::CircleExporter exporter;
+      luci::CircleFileExpContract contract(optimized.get(), output_model_path);
+      if (!exporter.invoke(&contract))
+      {
+        std::cerr << "ERROR: Failed to export mixed precision model" << input_model_path
+                  << std::endl;
+        return EXIT_FAILURE;
+      }
+    }
+  }
+  else
+  {
+    std::cerr << "ERROR: Unrecognized solver" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-mpqsolver/src/MPQSolver.cpp b/compiler/circle-mpqsolver/src/MPQSolver.cpp
new file mode 100644
index 000000000..10cfbb65f
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/MPQSolver.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MPQSolver.h"
+
+using namespace mpqsolver;
+
+MPQSolver::MPQSolver(const std::string &input_data_path, float qerror_ratio,
+                     const std::string &input_quantization, const std::string &output_quantization)
+  : _input_data_path(input_data_path), _qerror_ratio(qerror_ratio),
+    _input_quantization(input_quantization), _output_quantization(output_quantization)
+{
+}
+
+void MPQSolver::set_save_intermediate(const std::string &save_path)
+{
+  _hooks = std::make_unique<core::DumpingHooks>(save_path);
+}
diff --git a/compiler/circle-mpqsolver/src/MPQSolver.h b/compiler/circle-mpqsolver/src/MPQSolver.h
new file mode 100644
index 000000000..6c5d25dad
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/MPQSolver.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_MPQSOLEVR_SOLVER_H__
+#define __MPQSOLVER_MPQSOLEVR_SOLVER_H__
+
+#include <core/DumpingHooks.h>
+
+#include <memory>
+#include <string>
+
+namespace mpqsolver
+{
+
+class MPQSolver
+{
+public:
+  /**
+   * @brief construct Solver using input_data_path for .h5 file,
+   * qerror_ratio to set target qerror, and input_quantization/output_quantization to set
+   * quantization type at input/output respectively
+   */
+  MPQSolver(const std::string &input_data_path, float qerror_ratio,
+            const std::string &input_quantization, const std::string &output_quantization);
+  virtual ~MPQSolver() = default;
+
+  /**
+   * @brief run solver for recorded float module at module_path
+   */
+  virtual std::unique_ptr<luci::Module> run(const std::string &module_path) = 0;
+
+  /**
+   * @brief set all intermediate artifacts to be saved
+   */
+  void set_save_intermediate(const std::string &save_path);
+
+protected:
+  std::string _input_data_path;
+  std::string _input_quantization;
+  std::string _output_quantization;
+  float _qerror_ratio = 0.f; // quantization error ratio
+  std::unique_ptr<core::DumpingHooks> _hooks;
+};
+
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_MPQSOLEVR_SOLVER_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/BisectionSolver.cpp b/compiler/circle-mpqsolver/src/bisection/BisectionSolver.cpp
new file mode 100644
index 000000000..976dac550
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/bisection/BisectionSolver.cpp
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BisectionSolver.h"
+#include "DepthParameterizer.h"
+#include "VISQErrorApproximator.h"
+
+#include <core/ErrorMetric.h>
+#include <core/SolverOutput.h>
+
+#include <luci/ImporterEx.h>
+
+#include <cmath>
+#include <iostream>
+
+using namespace mpqsolver::bisection;
+
+namespace
+{
+
+/**
+ * @brief Compare errors of two disjoint subsets of a model sliced by cut_depth
+ * @return True if the front part (< cut_depth) has larger errors than the rear part (>= cut_depth)
+ */
+bool front_has_higher_error(const NodeDepthType &nodes_depth, const std::string &visq_path,
+                            float cut_depth)
+{
+  SolverOutput::get() << "\n>> Running bisection(auto) algorithm\n";
+
+  VISQErrorApproximator approximator;
+  approximator.init(visq_path);
+
+  float error_at_input = 0;
+  float error_at_output = 0;
+  for (auto &iter : nodes_depth)
+  {
+    float cur_error = approximator.approximate(iter.first->name());
+    if (iter.second < cut_depth)
+    {
+      error_at_input += cur_error;
+    }
+    else
+    {
+      error_at_output += cur_error;
+    }
+  }
+
+  SolverOutput::get() << "Qerror of front half: " << error_at_input << "\n";
+  SolverOutput::get() << "Qerror of rear half: " << error_at_output << "\n";
+  if (error_at_input > error_at_output)
+  {
+    SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
+  }
+  else
+  {
+    SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
+  }
+
+  return error_at_input > error_at_output;
+}
+
+std::unique_ptr<luci::Module> read_module(const std::string &path)
+{
+  luci::ImporterEx importerex;
+  auto module = importerex.importVerifyModule(path);
+  if (module.get() == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load " << path << std::endl;
+    return nullptr;
+  }
+
+  return module;
+}
+
+} // namespace
+
+BisectionSolver::BisectionSolver(const std::string &input_data_path, float qerror_ratio,
+                                 const std::string &input_quantization,
+                                 const std::string &output_quantization)
+  : MPQSolver(input_data_path, qerror_ratio, input_quantization, output_quantization)
+{
+  _quantizer = std::make_unique<core::Quantizer>(_input_quantization, _output_quantization);
+}
+
+float BisectionSolver::evaluate(const core::DatasetEvaluator &evaluator,
+                                const std::string &flt_path, const std::string &def_quant,
+                                core::LayerParams &layers)
+{
+  auto model = read_module(flt_path);
+  // get fake quantized model for evaluation
+  if (!_quantizer->fake_quantize(model.get(), def_quant, layers))
+  {
+    throw std::runtime_error("Failed to produce fake-quantized model.");
+  }
+
+  return evaluator.evaluate(model.get());
+}
+
+void BisectionSolver::algorithm(Algorithm algorithm) { _algorithm = algorithm; }
+
+void BisectionSolver::setVisqPath(const std::string &visq_path) { _visq_data_path = visq_path; }
+
+std::unique_ptr<luci::Module> BisectionSolver::run(const std::string &module_path)
+{
+  auto module = read_module(module_path);
+
+  float min_depth = 0.f;
+  float max_depth = 0.f;
+  NodeDepthType nodes_depth;
+  if (compute_depth(module.get(), nodes_depth, min_depth, max_depth) !=
+      ParameterizerResult::SUCCESS)
+  {
+    std::cerr << "ERROR: Invalid graph for bisectioning" << std::endl;
+    return nullptr;
+  }
+
+  SolverOutput::get() << "\n>> Computing baseline qerrors\n";
+
+  std::unique_ptr<core::MAEMetric> metric = std::make_unique<core::MAEMetric>();
+  core::DatasetEvaluator evaluator(module.get(), _input_data_path, *metric.get());
+
+  core::LayerParams layer_params;
+  float int16_qerror =
+    evaluate(evaluator, module_path, "int16" /* default quant_dtype */, layer_params);
+  SolverOutput::get() << "Full int16 model qerror: " << int16_qerror << "\n";
+
+  float uint8_qerror =
+    evaluate(evaluator, module_path, "uint8" /* default quant_dtype */, layer_params);
+  SolverOutput::get() << "Full uint8 model qerror: " << uint8_qerror << "\n";
+  _quantizer->set_hook(_hooks.get());
+  if (_hooks)
+  {
+    _hooks->on_begin_solver(module_path, uint8_qerror, int16_qerror);
+  }
+
+  if (int16_qerror > uint8_qerror)
+  {
+    throw std::runtime_error("Q8 model's qerror is less than Q16 model's qerror.");
+  }
+
+  _qerror = int16_qerror + _qerror_ratio * std::fabs(uint8_qerror - int16_qerror);
+  SolverOutput::get() << "Target qerror: " << _qerror << "\n";
+
+  if (uint8_qerror <= _qerror)
+  {
+    // no need for bisectioning just return Q8 model
+    if (!_quantizer->quantize(module.get(), "uint8", layer_params))
+    {
+      std::cerr << "ERROR: Failed to quantize model" << std::endl;
+      return nullptr;
+    }
+  }
+
+  int last_depth = -1;
+  float best_depth = -1;
+  float best_accuracy = -1;
+  core::LayerParams best_params;
+  if (module->size() != 1)
+  {
+    throw std::runtime_error("Unsupported module");
+  }
+  auto graph = module->graph(0);
+  auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
+  // input and output nodes are not valid for quantization, so let's remove them
+  for (auto node : loco::input_nodes(graph))
+  {
+    active_nodes.erase(node);
+  }
+  for (auto node : loco::output_nodes(graph))
+  {
+    active_nodes.erase(node);
+  }
+
+  // let's decide whether nodes at input are more suspectible to be quantized into Q16, than at
+  // output
+  bool int16_front = true;
+  switch (_algorithm)
+  {
+    case Algorithm::Auto:
+      int16_front =
+        front_has_higher_error(nodes_depth, _visq_data_path, 0.5f * (max_depth + min_depth));
+      break;
+    case Algorithm::ForceQ16Front:
+      SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
+      int16_front = true;
+      break;
+    case Algorithm::ForceQ16Back:
+      SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
+      int16_front = false;
+      break;
+  }
+
+  SolverOutput::get() << "\n";
+
+  while (true)
+  {
+    if (_hooks)
+    {
+      _hooks->on_begin_iteration();
+    }
+
+    int cut_depth = static_cast<int>(std::floor(0.5f * (min_depth + max_depth)));
+
+    if (last_depth == cut_depth)
+    {
+      break;
+    }
+
+    SolverOutput::get() << "Looking for the optimal configuration in [" << min_depth << " , "
+                        << max_depth << "] depth segment\n";
+
+    last_depth = cut_depth;
+
+    core::LayerParams layer_params;
+    for (auto &node : active_nodes)
+    {
+      auto cur_node = loco::must_cast<luci::CircleNode *>(node);
+      auto iter = nodes_depth.find(cur_node);
+      if (iter == nodes_depth.end())
+      {
+        continue; // to filter out nodes like weights
+      }
+
+      float depth = iter->second;
+
+      if ((depth <= cut_depth && int16_front) || (depth >= cut_depth && !int16_front))
+      {
+        auto layer_param = std::make_shared<core::LayerParam>();
+        {
+          layer_param->name = cur_node->name();
+          layer_param->dtype = "int16";
+          layer_param->granularity = "channel";
+        }
+
+        layer_params.emplace_back(layer_param);
+      }
+    }
+
+    float cur_accuracy = evaluate(evaluator, module_path, "uint8", layer_params);
+
+    if (_hooks)
+    {
+      _hooks->on_end_iteration(layer_params, "uint8", cur_accuracy);
+    }
+
+    if (cur_accuracy < _qerror)
+    {
+      SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_accuracy
+                          << " < target qerror (" << _qerror << ")\n";
+      int16_front ? (max_depth = cut_depth) : (min_depth = cut_depth);
+      best_params = layer_params;
+      best_depth = cut_depth;
+      best_accuracy = cur_accuracy;
+    }
+    else
+    {
+      SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_accuracy
+                          << (cur_accuracy > _qerror ? " > " : " == ") << "target qerror ("
+                          << _qerror << ")\n";
+      int16_front ? (min_depth = cut_depth) : (max_depth = cut_depth);
+    }
+  }
+
+  if (_hooks)
+  {
+    _hooks->on_end_solver(best_params, "uint8", best_accuracy);
+  }
+
+  SolverOutput::get() << "Found the best configuration at depth " << best_depth << "\n";
+  if (!_quantizer->quantize(module.get(), "uint8", best_params))
+  {
+    std::cerr << "ERROR: Failed to quantize model" << std::endl;
+    return nullptr;
+  }
+
+  return module;
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/BisectionSolver.h b/compiler/circle-mpqsolver/src/bisection/BisectionSolver.h
new file mode 100644
index 000000000..83851c0c8
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/bisection/BisectionSolver.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_SOLVER_H__
+#define __MPQSOLVER_BISECTION_SOLVER_H__
+
+#include <core/Quantizer.h>
+#include <core/Evaluator.h>
+#include <MPQSolver.h>
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <string>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+class BisectionSolver final : public MPQSolver
+{
+public:
+  /**
+   * @brief Algorithm options for running bisection algorithm
+   */
+  enum Algorithm
+  {
+    Auto,
+    ForceQ16Front,
+    ForceQ16Back,
+  };
+
+public:
+  /**
+   * @brief construct Solver using input_data_path for .h5 file,
+   * qerror_ratio to set target qerror, and input_quantization/output_quantization to set
+   * quantization type at input/output respectively
+   */
+  BisectionSolver(const std::string &input_data_path, float qerror_ratio,
+                  const std::string &input_quantization, const std::string &output_quantization);
+  BisectionSolver() = delete;
+
+  /**
+   * @brief run bisection for recorded float module at module_path
+   */
+  std::unique_ptr<luci::Module> run(const std::string &module_path) override;
+
+  /**
+   * @brief set used algorithm
+   */
+  void algorithm(Algorithm algorithm);
+
+  /**
+   * @brief   set visq_file path to be used in 'auto' mode
+   * @details this is used to handle which way (8 or 16bit) of
+   *          splitting the neural network will be the best for accuracy.
+   */
+  void setVisqPath(const std::string &visq_path);
+
+private:
+  float evaluate(const core::DatasetEvaluator &evaluator, const std::string &module_path,
+                 const std::string &def_quant, core::LayerParams &layers);
+
+private:
+  float _qerror = 0.f; // quantization error
+  Algorithm _algorithm = Algorithm::ForceQ16Front;
+  std::unique_ptr<core::Quantizer> _quantizer;
+  std::string _visq_data_path;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_SOLVER_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.cpp b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.cpp
new file mode 100644
index 000000000..cbf1b96e8
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthParameterizer.h"
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+/**
+ * @brief compute maximal distance from graph inputs to graph nodes along with min/max values of
+ * distance and return status of computation (Assumes graph has no cycles)
+ */
+ParameterizerResult compute_depth(const luci::Module *module, NodeDepthType &nodes_depth,
+                                  float &min_depth, float &max_depth)
+{
+  if (module == nullptr)
+    return ParameterizerResult::FAILURE;
+
+  if (module->size() != 1)
+    return ParameterizerResult::FAILURE;
+
+  auto graph = module->graph(0);
+  if (!graph)
+    return ParameterizerResult::FAILURE;
+
+  // initializing
+  std::vector<luci::CircleNode *> to_process;
+  std::map<std::string, float> named_depth;
+  {
+    auto inputs = loco::input_nodes(graph);
+    for (auto &node : inputs)
+    {
+      auto cnode = loco::must_cast<luci::CircleNode *>(node);
+      to_process.emplace_back(cnode);
+      nodes_depth[cnode] = 0.f;
+      named_depth[cnode->name()] = 0.f;
+    }
+  }
+
+  // enumerating
+  while (!to_process.empty())
+  {
+    auto cur_node = to_process.back();
+    to_process.pop_back();
+    auto iter = nodes_depth.find(cur_node);
+    if (iter == nodes_depth.end())
+    {
+      return ParameterizerResult::FAILURE; // unexpected
+    }
+    float cur_depth = iter->second + 1;
+    // processing children
+    auto children = loco::succs(cur_node);
+    for (auto &child : children)
+    {
+      auto cichild = loco::must_cast<luci::CircleNode *>(child);
+      auto node_depth = nodes_depth.find(cichild);
+      if (node_depth == nodes_depth.end() || node_depth->second < cur_depth)
+      {
+        // initialize depth
+        nodes_depth[cichild] = cur_depth;
+        to_process.push_back(cichild);
+        named_depth[cichild->name()] = cur_depth;
+      }
+    }
+  }
+
+  // compute min/max of depth parameter
+  auto minmax = std::minmax_element(
+    nodes_depth.begin(), nodes_depth.end(),
+    [=](const std::pair<luci::CircleNode *, float> &el1,
+        const std::pair<luci::CircleNode *, float> &el2) { return el1.second < el2.second; });
+
+  min_depth = minmax.first->second;
+  max_depth = minmax.second->second;
+
+  return ParameterizerResult::SUCCESS;
+}
+
+} // namespace bisection
+} // namespace mpqsolver
diff --git a/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.h b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.h
new file mode 100644
index 000000000..6a96aa1cf
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_DEPTH_PARAMETERIZER_H__
+#define __MPQSOLVER_DEPTH_PARAMETERIZER_H__
+
+#include <luci/IR/Module.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+using NodeDepthType = std::map<luci::CircleNode *, float>;
+
+/**
+ * @brief status of parameterization
+ */
+enum class ParameterizerResult : int32_t
+{
+  SUCCESS = 0,
+  FAILURE = 1
+};
+
+/**
+ * @brief compute maximal distance from graph inputs to graph nodes along with min/max values of
+ * distance and return status of compuation (success/failure)
+ */
+ParameterizerResult compute_depth(const luci::Module *module, NodeDepthType &nodes_depth,
+                                  float &min_depth, float &max_depth);
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_DEPTH_PARAMETERIZER_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.test.cpp b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.test.cpp
new file mode 100644
index 000000000..504032d6b
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "DepthParameterizer.h"
+#include <core/TestHelper.h>
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+class NConvGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    _filter = _g->nodes()->create<luci::CircleConst>();
+    _filter->dtype(loco::DataType::FLOAT32);
+    _filter->shape({_channel_size, 1, 1, _channel_size});
+    _filter->name("conv_filter");
+
+    _bias = _g->nodes()->create<luci::CircleConst>();
+    _bias->dtype(loco::DataType::FLOAT32);
+    _bias->shape({_channel_size});
+    _bias->name("conv_bias");
+
+    _conv = _g->nodes()->create<luci::CircleConv2D>();
+    _conv->padding(luci::Padding::SAME);
+    _conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _conv->dtype(loco::DataType::FLOAT32);
+    _conv->shape({1, _width, _height, _channel_size});
+    _conv->name("conv");
+    _conv->filter(_filter);
+    _conv->bias(_bias);
+    _conv->input(input);
+
+    return _conv;
+  }
+
+public:
+  luci::CircleConv2D *_conv = nullptr;
+  luci::CircleConst *_filter = nullptr;
+  luci::CircleConst *_bias = nullptr;
+};
+
+} // namespace
+
+TEST(CircleMPQSolverDepthParameteriserTest, verifyResultsTest)
+{
+  auto m = luci::make_module();
+  NConvGraph g;
+  g.init();
+  auto conv = g._conv;
+  auto input = g._input;
+  auto output = g._output;
+
+  g.transfer_to(m.get());
+
+  mpqsolver::bisection::NodeDepthType nodes_depth;
+  float min_depth = std::numeric_limits<float>().max();
+  float max_depth = -std::numeric_limits<float>().max();
+  auto status = mpqsolver::bisection::compute_depth(m.get(), nodes_depth, min_depth, max_depth);
+
+  EXPECT_TRUE(status == mpqsolver::bisection::ParameterizerResult::SUCCESS);
+  EXPECT_TRUE(max_depth == 2 && min_depth == 0);
+  EXPECT_TRUE(nodes_depth[input] == min_depth);
+  EXPECT_TRUE(nodes_depth[conv] == 1);
+  EXPECT_TRUE(nodes_depth[output] == max_depth);
+}
+
+TEST(CircleMPQSolverDepthParameteriserTest, verifyResultsTest_NEG)
+{
+  auto m = luci::make_module();
+  mpqsolver::bisection::NodeDepthType nodes_depth;
+  float min_depth = std::numeric_limits<float>().max();
+  float max_depth = -std::numeric_limits<float>().max();
+  auto status = mpqsolver::bisection::compute_depth(m.get(), nodes_depth, min_depth, max_depth);
+
+  EXPECT_TRUE(status == mpqsolver::bisection::ParameterizerResult::FAILURE);
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.cpp b/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.cpp
new file mode 100644
index 000000000..ee6376a48
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VISQErrorApproximator.h"
+
+#include <fstream>
+#include <json.h>
+
+using namespace mpqsolver::bisection;
+
+void VISQErrorApproximator::init(const std::string &visq_data_path)
+{
+  // read file
+  std::ifstream file(visq_data_path);
+  init(file);
+}
+
+void VISQErrorApproximator::init(std::istream &visq_data)
+{
+  Json::Reader reader;
+  Json::Value completeJsonData;
+  if (!reader.parse(visq_data, completeJsonData))
+  {
+    throw std::runtime_error("Invalid visq stream");
+  }
+
+  if (!completeJsonData.isMember("error"))
+  {
+    throw std::runtime_error("No 'error' section in visq stream");
+  }
+
+  auto layers = completeJsonData["error"][0];
+  auto names = layers.getMemberNames();
+  for (auto name : names)
+  {
+    auto value = layers[name].asFloat();
+    _layer_errors[name] = value;
+  }
+}
+
+float VISQErrorApproximator::approximate(const std::string &node_name) const
+{
+  auto iter = _layer_errors.find(node_name);
+  if (iter == _layer_errors.end())
+  {
+    return 0.f;
+  }
+
+  return iter->second;
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.h b/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.h
new file mode 100644
index 000000000..0d963cefb
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
+#define __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
+
+#include <string>
+#include <map>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+class VISQErrorApproximator final
+{
+public:
+  /**
+   * @brief constructor of VISQErrorApproximator
+   */
+  VISQErrorApproximator() = default;
+
+  /**
+   * @brief initiliaze by visq_data_path (throws on failure)
+   */
+  void init(const std::string &visq_data_path);
+
+  /**
+   * @brief approximate error introduced while quantizing node into Q8
+   */
+  float approximate(const std::string &node_name) const;
+
+private:
+  /**
+   * @brief initiliaze by visq_data (throws on failure)
+   */
+  void init(std::istream &visq_data);
+
+private:
+  std::string _visq_data_path;
+  std::map<std::string, float> _layer_errors;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif // __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.test.cpp b/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.test.cpp
new file mode 100644
index 000000000..ccacb1ab7
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VISQErrorApproximator.h"
+
+#include <json.h>
+#include <fstream>
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void writeDataToFile(const std::string &path, const std::string &data)
+{
+  std::ofstream file;
+  file.open(path);
+  file << data;
+  file.close();
+}
+
+void makeTemporaryFile(char *name_template)
+{
+  int fd = mkstemp(name_template);
+  if (fd == -1)
+  {
+    throw std::runtime_error{"mkstemp failed"};
+  }
+}
+
+} // namespace
+
+TEST(CircleMPQSolverVISQErrorApproximatorTest, verifyResultsTest)
+{
+  static std::string errors_key = "error";
+  static std::string layer_key = "layer_0";
+  static float layer_error = 0.5f;
+  // trivial json with a single layer
+  Json::Value error_data;
+  Json::Value layer_data;
+  layer_data[layer_key] = layer_error;
+  error_data[errors_key].append(layer_data);
+
+  Json::StreamWriterBuilder builder;
+  auto data = Json::writeString(builder, error_data);
+
+  char path[] = "VISQErrorApproximator-TEST-XXXXXX";
+  makeTemporaryFile(path);
+  writeDataToFile(path, data);
+
+  mpqsolver::bisection::VISQErrorApproximator approximator;
+  EXPECT_NO_THROW(approximator.init(path));
+  EXPECT_FLOAT_EQ(approximator.approximate(layer_key), layer_error);
+  unlink(path);
+}
+
+TEST(CircleMPQSolverVISQErrorApproximatorTest, verifyResultsTest_NEG)
+{
+  Json::Value error_data;
+  // just an empty json
+  Json::StreamWriterBuilder builder;
+  auto data = Json::writeString(builder, error_data);
+
+  char path[] = "VISQErrorApproximator-TEST-NEG-XXXXXX";
+  makeTemporaryFile(path);
+  writeDataToFile(path, data);
+
+  mpqsolver::bisection::VISQErrorApproximator approximator;
+  EXPECT_THROW(approximator.init(path), std::exception);
+  unlink(path);
+}
diff --git a/compiler/circle-mpqsolver/src/core/Dumper.cpp b/compiler/circle-mpqsolver/src/core/Dumper.cpp
new file mode 100644
index 000000000..3a94cb3fa
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/Dumper.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dumper.h"
+
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+
+#include <json.h>
+#include <fstream>
+#include <sys/stat.h>
+
+using namespace mpqsolver::core;
+
+namespace
+{
+
+const std::string default_dtype_key = "default_quantization_dtype";
+const std::string default_granularity_key = "default_granularity";
+const std::string layers_key = "layers";
+const std::string model_key = "model_path";
+const std::string layer_name_key = "name";
+const std::string layer_dtype_key = "dtype";
+const std::string layer_granularity_key = "granularity";
+
+} // namespace
+
+Dumper::Dumper(const std::string &dir_path) : _dir_path(dir_path) {}
+
+void Dumper::set_model_path(const std::string &model_path) { _model_path = model_path; }
+
+void Dumper::dump_MPQ_configuration(const LayerParams &layers, const std::string &def_dtype,
+                                    const std::string &path) const
+{
+  Json::Value mpq_data;
+  mpq_data[default_dtype_key] = def_dtype;
+  mpq_data[default_granularity_key] = "channel";
+  mpq_data[model_key] = _model_path;
+
+  Json::Value layers_data;
+  for (auto &layer : layers)
+  {
+    Json::Value layer_data;
+    layer_data[layer_name_key] = layer->name;
+    layer_data[layer_granularity_key] = layer->granularity;
+    layer_data[layer_dtype_key] = layer->dtype;
+    layers_data.append(layer_data);
+  }
+  mpq_data[layers_key] = layers_data;
+
+  Json::StreamWriterBuilder builder;
+  auto data = Json::writeString(builder, mpq_data);
+
+  write_data_to_file(path, data);
+}
+
+void Dumper::prepare_directory(const std::string &dir_path) const
+{
+  struct stat sb;
+  if (stat(dir_path.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode))
+  {
+    if (mkdir(dir_path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) != 0)
+    {
+      throw std::runtime_error("Failed to create directory for dumping intermediate results");
+    }
+  }
+}
+
+void Dumper::dump_MPQ_configuration(const LayerParams &layers, const std::string &def_dtype,
+                                    int step) const
+{
+  prepare_directory(_dir_path);
+  std::string path = _dir_path + "/Configuration_" + std::to_string(step) + ".mpq.json";
+  dump_MPQ_configuration(layers, def_dtype, path);
+}
+
+void Dumper::dump_final_MPQ(const LayerParams &layers, const std::string &def_dtype) const
+{
+  prepare_directory(_dir_path);
+  std::string path = _dir_path + "/FinalConfiguration" + ".mpq.json";
+  dump_MPQ_configuration(layers, def_dtype, path);
+}
+
+void Dumper::write_data_to_file(const std::string &path, const std::string &data) const
+{
+  std::ofstream file;
+  file.open(path);
+  file << data;
+  file.close();
+}
+
+void Dumper::save_circle(luci::Module *module, std::string &path) const
+{
+  luci::CircleExporter exporter;
+  luci::CircleFileExpContract contract(module, path);
+  if (!exporter.invoke(&contract))
+  {
+    throw std::runtime_error("Failed to export circle model to " + path);
+  }
+}
+
+void Dumper::dump_quantized(luci::Module *module, uint32_t step) const
+{
+  std::string path = _dir_path + "/quantized_" + std::to_string(step) + ".mpq.circle";
+  save_circle(module, path);
+}
+
+void Dumper::dump_error(float error, const std::string &tag, const std::string &path) const
+{
+  std::ofstream file;
+  file.open(path, std::ios_base::app);
+  file << tag << " " << error << std::endl;
+  file.close();
+}
+
+void Dumper::prepare_for_error_dumping() const
+{
+  prepare_directory(_dir_path);
+  std::string path = get_error_path();
+  std::ofstream file;
+  file.open(path); // create empty
+  file.close();
+}
+
+void Dumper::dump_Q8_error(float error) const
+{
+  std::string path = get_error_path();
+  dump_error(error, "Q8", path);
+}
+
+void Dumper::dump_Q16_error(float error) const
+{
+  std::string path = get_error_path();
+  dump_error(error, "Q16", path);
+}
+
+void Dumper::dump_MPQ_error(float error, uint32_t step) const
+{
+  std::string path = get_error_path();
+  dump_error(error, std::to_string(step), path);
+}
+
+void Dumper::dump_MPQ_error(float error) const
+{
+  std::string path = get_error_path();
+  dump_error(error, "FINAL", path);
+}
diff --git a/compiler/circle-mpqsolver/src/core/Dumper.h b/compiler/circle-mpqsolver/src/core/Dumper.h
new file mode 100644
index 000000000..220b54a20
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/Dumper.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_DUMPER_H__
+#define __MPQSOLVER_DUMPER_H__
+
+#include <luci/IR/Module.h>
+#include <luci/CircleQuantizer.h>
+
+#include <string>
+
+namespace mpqsolver
+{
+namespace core
+{
+
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+using LayerParams = std::vector<std::shared_ptr<LayerParam>>;
+
+class Dumper final
+{
+public:
+  Dumper() = default;
+  Dumper(const std::string &dir_path);
+
+  /**
+   * @brief sets model path for further usage
+   */
+  void set_model_path(const std::string &model_path);
+
+  /**
+   * @brief dumps mpq configuration
+   * @param layers specific quantization parameters
+   * @param def_dtype default quantization data type
+   * @param step id of mpq configuration
+   */
+  void dump_MPQ_configuration(const LayerParams &layers, const std::string &def_dtype,
+                              int step) const;
+
+  /**
+   * @brief dumps final mpq configuration
+   * @param layers specific quantization parameters
+   * @param def_dtype default quantization data type
+   */
+  void dump_final_MPQ(const LayerParams &layers, const std::string &def_dtype) const;
+
+  /**
+   * @brief dumps quantized module
+   * @param layers specific quantization parameters
+   * @param step id of quantized module
+   */
+  void dump_quantized(luci::Module *module, uint32_t step) const;
+
+  /**
+   * @brief create file for error dumping
+   */
+  void prepare_for_error_dumping() const;
+
+  /**
+   * @brief append error of Q8 quantization
+   */
+  void dump_Q8_error(float error) const;
+
+  /**
+   * @brief append error of Q16 quantization
+   */
+  void dump_Q16_error(float error) const;
+
+  /**
+   * @brief append error of mpq quantization
+   * @param error error of quantization
+   * @param step id of error
+   */
+  void dump_MPQ_error(float error, uint32_t step) const;
+
+  /**
+   * @brief dump final error
+   * @param error final error of quantization
+   */
+  void dump_MPQ_error(float error) const;
+
+private:
+  void write_data_to_file(const std::string &path, const std::string &data) const;
+  void dump_MPQ_configuration(const LayerParams &layers, const std::string &def_dtype,
+                              const std::string &path) const;
+  void prepare_directory(const std::string &dir_path) const;
+  void save_circle(luci::Module *module, std::string &path) const;
+  void dump_error(float error, const std::string &tag, const std::string &path) const;
+  std::string get_error_path() const { return _dir_path + "/errors" + ".mpq.txt"; }
+
+private:
+  std::string _dir_path;
+  std::string _model_path;
+
+}; // Dumper
+
+} // namespace core
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_DUMPER_H__
diff --git a/compiler/circle-mpqsolver/src/core/DumpingHooks.cpp b/compiler/circle-mpqsolver/src/core/DumpingHooks.cpp
new file mode 100644
index 000000000..4d0522bdd
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/DumpingHooks.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DumpingHooks.h"
+
+using namespace mpqsolver::core;
+
+DumpingHooks::DumpingHooks(const std::string &save_path)
+  : _save_path(save_path), _dumper(_save_path)
+{
+}
+
+void DumpingHooks::on_begin_solver(const std::string &model_path, float q8error, float q16error)
+{
+  _model_path = model_path;
+  _dumper.set_model_path(_model_path);
+  _dumper.prepare_for_error_dumping();
+  _dumper.dump_Q8_error(q8error);
+  _dumper.dump_Q16_error(q16error);
+}
+
+void DumpingHooks::on_begin_iteration()
+{
+  _in_iterations = true;
+  _num_of_iterations += 1;
+}
+
+void DumpingHooks::on_end_iteration(const LayerParams &layers, const std::string &def_type,
+                                    float error) const
+{
+  _dumper.dump_MPQ_configuration(layers, def_type, _num_of_iterations);
+  _dumper.dump_MPQ_error(error, _num_of_iterations);
+}
+
+void DumpingHooks::on_end_solver(const LayerParams &layers, const std::string &def_dtype,
+                                 float qerror)
+{
+  _dumper.dump_final_MPQ(layers, def_dtype);
+  _dumper.dump_MPQ_error(qerror);
+  _in_iterations = false;
+}
+
+void DumpingHooks::on_quantized(luci::Module *module) const
+{
+  if (_in_iterations)
+  {
+    _dumper.dump_quantized(module, _num_of_iterations);
+  }
+}
diff --git a/compiler/circle-mpqsolver/src/core/DumpingHooks.h b/compiler/circle-mpqsolver/src/core/DumpingHooks.h
new file mode 100644
index 000000000..c432a9a40
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/DumpingHooks.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_DUMPING_HOOKS_H__
+#define __MPQSOLVER_DUMPING_HOOKS_H__
+
+#include <luci/IR/Module.h>
+
+#include <core/Quantizer.h>
+#include <core/SolverHooks.h>
+#include <core/Dumper.h>
+
+#include <string>
+
+namespace mpqsolver
+{
+namespace core
+{
+
+/**
+ * @brief DumpingHooks is intended to save intermediate results
+ */
+class DumpingHooks final : public QuantizerHook, public SolverHooks
+{
+public:
+  /**
+   * @brief DumpingHooks constructor
+   * @param save_path directory where all intermediate data will be saved
+   */
+  DumpingHooks(const std::string &save_path);
+
+  /**
+   * @brief called on successfull quantization
+   */
+  virtual void on_quantized(luci::Module *module) const override;
+
+  /**
+   * @brief called on the start of iterative search
+   */
+  virtual void on_begin_solver(const std::string &model_path, float q8error,
+                               float q16error) override;
+
+  /**
+   * @brief called on the start of current iteration
+   */
+  virtual void on_begin_iteration() override;
+
+  /**
+   * @brief called at the end of current iteration
+   */
+  virtual void on_end_iteration(const LayerParams &layers, const std::string &def_dtype,
+                                float error) const override;
+
+  /**
+   * @brief called at the end of iterative search
+   */
+  virtual void on_end_solver(const LayerParams &layers, const std::string &def_dtype,
+                             float qerror) override;
+
+protected:
+  std::string _model_path;
+  std::string _save_path;
+  Dumper _dumper;
+  uint32_t _num_of_iterations = 0;
+  bool _in_iterations = false;
+};
+
+} // namespace core
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_DUMPING_HOOKS_H__
diff --git a/compiler/circle-mpqsolver/src/core/ErrorMetric.cpp b/compiler/circle-mpqsolver/src/core/ErrorMetric.cpp
new file mode 100644
index 000000000..23ddfcb7d
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/ErrorMetric.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ErrorMetric.h"
+
+#include <loco/IR/DataType.h>
+#include <loco/IR/DataTypeTraits.h>
+
+#include <cmath>
+#include <cassert>
+
+using namespace mpqsolver::core;
+
+/**
+ * @brief compare first and second operands in MAE (Mean Average Error metric)
+ */
+float MAEMetric::compute(const WholeOutput &first, const WholeOutput &second) const
+{
+  assert(first.size() == second.size());
+
+  float error = 0.f;
+  size_t output_size = 0;
+
+  for (size_t sample_index = 0; sample_index < first.size(); ++sample_index)
+  {
+    assert(first[sample_index].size() == second[sample_index].size());
+    for (size_t out_index = 0; out_index < first[sample_index].size(); ++out_index)
+    {
+      const Buffer &first_elementary = first[sample_index][out_index];
+      const Buffer &second_elementary = second[sample_index][out_index];
+      assert(first_elementary.size() == second_elementary.size());
+      size_t cur_size = first_elementary.size() / loco::size(loco::DataType::FLOAT32);
+
+      const float *first_floats = reinterpret_cast<const float *>(first_elementary.data());
+      const float *second_floats = reinterpret_cast<const float *>(second_elementary.data());
+      for (size_t index = 0; index < cur_size; index++)
+      {
+        float ref_value = *(first_floats + index);
+        float cur_value = *(second_floats + index);
+        error += std::fabs(ref_value - cur_value);
+      }
+      output_size += cur_size;
+    }
+  }
+
+  if (output_size == 0)
+  {
+    throw std::runtime_error("nothing to compare");
+  }
+
+  return error / output_size;
+}
diff --git a/compiler/circle-mpqsolver/src/core/ErrorMetric.h b/compiler/circle-mpqsolver/src/core/ErrorMetric.h
new file mode 100644
index 000000000..fc535396e
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/ErrorMetric.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_CORE_ERROR_METRIC_H__
+#define __MPQSOLVER_CORE_ERROR_METRIC_H__
+
+#include <vector>
+
+namespace mpqsolver
+{
+namespace core
+{
+
+using Buffer = std::vector<char>;
+using Output = std::vector<Buffer>;
+using WholeOutput = std::vector<Output>;
+
+class ErrorMetric
+{
+public:
+  virtual ~ErrorMetric() = default;
+
+  /**
+   * @brief abstract method for comparing first and second operands
+   */
+  virtual float compute(const WholeOutput &first, const WholeOutput &second) const = 0;
+};
+
+// Mean Absolute Error
+class MAEMetric final : public ErrorMetric
+{
+public:
+  /**
+   * @brief compare first and second operands in MAE (Mean Average Error metric)
+   */
+  float compute(const WholeOutput &first, const WholeOutput &second) const;
+};
+
+} // namespace core
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_CORE_ERROR_METRIC_H__
diff --git a/compiler/circle-mpqsolver/src/core/ErrorMetric.test.cpp b/compiler/circle-mpqsolver/src/core/ErrorMetric.test.cpp
new file mode 100644
index 000000000..232d9bc60
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/ErrorMetric.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ErrorMetric.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleMPQSolverMAEMetricTest, verifyResultsTest)
+{
+  size_t num_elements = 512;
+  mpqsolver::core::WholeOutput target, source;
+  // let target be zero
+  {
+    std::vector<float> float_buffer(num_elements, 0.f);
+    auto const char_buffer = reinterpret_cast<char *>(float_buffer.data());
+    auto const char_buffer_size = num_elements * sizeof(float) / sizeof(char);
+    std::vector<char> buffer(char_buffer, char_buffer + char_buffer_size);
+
+    mpqsolver::core::Output out = mpqsolver::core::Output(1, buffer);
+    target = mpqsolver::core::WholeOutput(1, out);
+  }
+
+  // let source be one
+  {
+    std::vector<float> float_buffer(num_elements, 1.f);
+    auto const char_buffer = reinterpret_cast<char *>(float_buffer.data());
+    auto const char_buffer_size = num_elements * sizeof(float) / sizeof(char);
+    std::vector<char> buffer(char_buffer, char_buffer + char_buffer_size);
+    mpqsolver::core::Output out = mpqsolver::core::Output(1, buffer);
+    source = mpqsolver::core::WholeOutput(1, out);
+  }
+
+  mpqsolver::core::MAEMetric metric;
+  float value = metric.compute(target, source);
+  EXPECT_FLOAT_EQ(value, 1.f);
+}
+
+TEST(CircleMPQSolverMAEMetricTest, verifyResultsTest_NEG)
+{
+  mpqsolver::core::MAEMetric metric;
+  mpqsolver::core::WholeOutput target, source;
+  EXPECT_ANY_THROW(metric.compute(target, source));
+}
diff --git a/compiler/circle-mpqsolver/src/core/Evaluator.cpp b/compiler/circle-mpqsolver/src/core/Evaluator.cpp
new file mode 100644
index 000000000..c7afda5c2
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/Evaluator.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Evaluator.h"
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <dio_hdf5/HDF5Importer.h>
+
+using namespace mpqsolver::core;
+
+using Shape = std::vector<loco::Dimension>;
+
+namespace
+{
+
+using namespace luci;
+
+template <typename NodeT> size_t get_tensor_size(const NodeT *node)
+{
+  uint32_t tensor_size = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    tensor_size *= node->dim(i).value();
+  return tensor_size;
+}
+
+WholeOutput compute_outputs(const luci::Module *module, const std::string &h5file)
+{
+  dio::hdf5::HDF5Importer importer{h5file};
+  importer.importGroup("value");
+
+  bool is_raw_data = importer.isRawData();
+
+  const auto num_records = importer.numData();
+  if (num_records == 0)
+    throw std::runtime_error("The input data file does not contain any record.");
+  const auto input_nodes = loco::input_nodes(module->graph());
+  const auto num_inputs = input_nodes.size();
+
+  WholeOutput dataset_output;
+
+  // Create interpreter.
+  luci_interpreter::Interpreter interpreter(module);
+  for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+  {
+    if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
+      throw std::runtime_error("Wrong number of inputs.");
+    for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+    {
+      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+      assert(input_node->index() == input_idx);
+
+      std::vector<char> input_data(get_tensor_size(input_node));
+
+      if (!is_raw_data)
+      {
+        loco::DataType dtype;
+        Shape shape;
+        importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data(),
+                            input_data.size());
+      }
+      else
+      {
+        // Skip type/shape check for raw data
+        importer.readTensor(record_idx, input_idx, input_data.data(), input_data.size());
+      }
+
+      interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+    }
+
+    interpreter.interpret();
+
+    Output nn_output;
+
+    // Get output.
+    const auto output_nodes = loco::output_nodes(module->graph());
+    for (size_t i = 0; i < module->graph()->outputs()->size(); i++)
+    {
+      const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+      Buffer output_data(get_tensor_size(output_node));
+      interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+      // output
+      nn_output.push_back(output_data);
+    }
+    dataset_output.push_back(nn_output);
+  }
+
+  return dataset_output;
+}
+
+} // namespace
+
+DatasetEvaluator::DatasetEvaluator(const luci::Module *ref_module, const std::string &h5file,
+                                   const ErrorMetric &metric)
+  : _ref_module(ref_module), _h5file(h5file), _metric(&metric)
+{
+  _ref_output = compute_outputs(_ref_module, _h5file);
+}
+
+void DatasetEvaluator::validate(const luci::Module *trgt_fq_module) const
+{
+  const auto output_nodes = loco::output_nodes(trgt_fq_module->graph());
+  for (size_t out_index = 0; out_index < output_nodes.size(); ++out_index)
+  {
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[out_index]);
+    loco::DataType out_dtype = output_node->dtype();
+    if (out_dtype != loco::DataType::FLOAT32)
+      throw std::runtime_error("Unsupported output dtype " + output_node->name());
+  }
+}
+
+float DatasetEvaluator::evaluate(const luci::Module *trgt_fq_module) const
+{
+  if (trgt_fq_module == nullptr)
+    throw std::runtime_error("Invalid target module");
+
+  if (_metric == nullptr)
+    throw std::runtime_error("Invalid metric");
+
+  validate(trgt_fq_module);
+
+  const WholeOutput &cur_output = compute_outputs(trgt_fq_module, _h5file);
+  float error = _metric->compute(_ref_output, cur_output);
+  return error;
+}
diff --git a/compiler/circle-mpqsolver/src/core/Evaluator.h b/compiler/circle-mpqsolver/src/core/Evaluator.h
new file mode 100644
index 000000000..9820508bc
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/Evaluator.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_CORE_EVALUATOR_H__
+#define __MPQSOLVER_CORE_EVALUATOR_H__
+
+#include "ErrorMetric.h"
+
+#include <luci/IR/Module.h>
+#include <luci/CircleQuantizer.h>
+
+#include <string>
+#include <vector>
+
+namespace mpqsolver
+{
+namespace core
+{
+
+class DatasetEvaluator final
+{
+public:
+  /**
+   * @brief create Evaluator for comparing output of ref_module on h5file
+   */
+  DatasetEvaluator(const luci::Module *ref_module, const std::string &h5file,
+                   const ErrorMetric &metric);
+  DatasetEvaluator() = delete;
+  ~DatasetEvaluator() = default;
+
+  /**
+   * @brief evaluate trgt_fq_module (fake-quantized)
+   * returns error-metric
+   */
+  float evaluate(const luci::Module *trgt_fq_module) const;
+
+private:
+  /**
+   * @brief throws if there is something wrong with the module
+   */
+  void validate(const luci::Module *module) const;
+
+private:
+  const luci::Module *_ref_module = nullptr;
+  std::string _h5file;
+  WholeOutput _ref_output;
+  const ErrorMetric *_metric = nullptr;
+};
+
+} // namespace core
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_CORE_EVALUATOR_H__
diff --git a/compiler/circle-mpqsolver/src/core/Quantizer.cpp b/compiler/circle-mpqsolver/src/core/Quantizer.cpp
new file mode 100644
index 000000000..421793197
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/Quantizer.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Quantizer.h"
+#include <luci/Service/Validate.h>
+
+#include <iostream>
+
+using namespace mpqsolver::core;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+
+namespace
+{
+
+bool make_model_fake_quantized(luci::Module *module)
+{
+  luci::CircleQuantizer quantizer;
+
+  auto options = quantizer.options();
+  options->enable(Algorithms::ConvertToFakeQuantizedModel);
+
+  for (size_t idx = 0; idx < module->size(); ++idx)
+  {
+    auto graph = module->graph(idx);
+    // quantize the graph
+    quantizer.quantize(graph);
+    if (!luci::validate(graph))
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+} // namespace
+
+Quantizer::Quantizer(const std::string &input_dtype, const std::string &output_dtype)
+  : _input_dtype(input_dtype), _output_dtype(output_dtype)
+{
+}
+
+void Quantizer::set_hook(const QuantizerHook *hook) { _hook = hook; }
+
+/**
+ * @brief quantize recorded module (min/max initialized) with specified parameters
+ * returns true on success
+ */
+bool Quantizer::quantize(luci::Module *module, const std::string &quant_dtype,
+                         LayerParams &layer_params)
+{
+  if (!module)
+    return false;
+
+  static const std::string default_dtype = "float32";
+  static const std::string granularity_type = "channel";
+
+  luci::CircleQuantizer quantizer;
+
+  auto options = quantizer.options();
+  options->enable(Algorithms::QuantizeWithMinMax);
+
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, default_dtype);
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, quant_dtype);
+  options->param(AlgorithmParameters::Quantize_granularity, granularity_type);
+  options->param(AlgorithmParameters::Quantize_input_type, _input_dtype);
+  options->param(AlgorithmParameters::Quantize_output_type, _output_dtype);
+  options->param(AlgorithmParameters::Quantize_TF_style_maxpool, "False");
+
+  if (!layer_params.empty())
+  {
+    try
+    {
+      options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+    }
+    catch (const std::runtime_error &e)
+    {
+      std::cerr << e.what() << '\n';
+      return false;
+    }
+  }
+
+  for (size_t idx = 0; idx < module->size(); ++idx)
+  {
+    auto graph = module->graph(idx);
+    // quantize the graph
+    quantizer.quantize(graph);
+    if (!luci::validate(graph))
+    {
+      std::cerr << "ERROR: Quantized graph is invalid" << std::endl;
+      return false;
+    }
+  }
+
+  if (_hook)
+  {
+    _hook->on_quantized(module);
+  }
+
+  return true;
+}
+
+/**
+ * @brief fake_quantize recorded module (min/max initialized) with specified parameters
+ * returns true on success
+ */
+bool Quantizer::fake_quantize(luci::Module *module, const std::string &quant_dtype,
+                              LayerParams &layer_params)
+{
+  if (!quantize(module, quant_dtype, layer_params))
+    return false;
+
+  if (!make_model_fake_quantized(module))
+    return false;
+
+  return true;
+}
diff --git a/compiler/circle-mpqsolver/src/core/Quantizer.h b/compiler/circle-mpqsolver/src/core/Quantizer.h
new file mode 100644
index 000000000..259d5c4b0
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/Quantizer.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_CORE_QUANTIZER_H__
+#define __MPQSOLVER_CORE_QUANTIZER_H__
+
+#include <luci/IR/Module.h>
+#include <luci/CircleQuantizer.h>
+
+#include <string>
+#include <vector>
+#include <memory>
+
+namespace mpqsolver
+{
+namespace core
+{
+
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+using LayerParams = std::vector<std::shared_ptr<LayerParam>>;
+
+struct QuantizerHook
+{
+  /**
+   * @brief called on successfull quantization
+   * @param module quantized module
+   */
+  virtual void on_quantized(luci::Module *module) const = 0;
+};
+
+class Quantizer
+{
+public:
+  Quantizer(const std::string &input_dtype, const std::string &output_type);
+
+  /**
+   * @brief set hook on the end of quantization event
+   */
+  void set_hook(const QuantizerHook *callback);
+
+  /**
+   * @brief quantize recorded module (min/max initialized) with specified parameters
+   * returns true on success
+   */
+  bool quantize(luci::Module *module, const std::string &quant_dtype, LayerParams &layer_params);
+
+  /**
+   * @brief fake_quantize recorded module (min/max initialized) with specified parameters
+   * returns true on success
+   */
+  bool fake_quantize(luci::Module *module, const std::string &quant_dtype,
+                     LayerParams &layer_params);
+
+private:
+  std::string _input_dtype = "uint8";
+  std::string _output_dtype = "uint8";
+  const QuantizerHook *_hook = nullptr;
+};
+
+} // namespace core
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_CORE_QUANTIZER_H__
diff --git a/compiler/circle-mpqsolver/src/core/Quantizer.test.cpp b/compiler/circle-mpqsolver/src/core/Quantizer.test.cpp
new file mode 100644
index 000000000..7d7e74fdc
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/Quantizer.test.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <gtest/gtest.h>
+
+#include "Quantizer.h"
+#include "TestHelper.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <cmath>
+
+namespace
+{
+
+class AddGraph final : public SimpleGraph
+{
+protected:
+  void initInput(loco::Node *input) override
+  {
+    auto ci_input = loco::must_cast<luci::CircleNode *>(input);
+    initMinMax(ci_input);
+  }
+
+  void initMinMax(luci::CircleNode *node)
+  {
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->min.assign(1, _a_min);
+    qparam->max.assign(1, _a_max);
+    node->quantparam(std::move(qparam));
+  }
+
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    _add = _g->nodes()->create<luci::CircleAdd>();
+    _beta = _g->nodes()->create<luci::CircleConst>();
+
+    _add->dtype(loco::DataType::FLOAT32);
+    _beta->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    _add->shape({1, _channel_size, _width, _height});
+    _beta->shape({1, _channel_size, _width, _height});
+
+    _beta->size<loco::DataType::FLOAT32>(channel_size);
+    _add->x(input);
+    _add->y(_beta);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _add->name("add");
+    _beta->name("beta");
+    initMinMax(_add);
+
+    return _add;
+  }
+
+public:
+  float _a_min = -1.f;
+  float _a_max = 1.f;
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_beta = nullptr;
+};
+
+} // namespace
+
+TEST(CircleMPQSolverQuantizerTest, verifyResultsTest)
+{
+  auto m = luci::make_module();
+  AddGraph g;
+  g.init();
+  auto add = g._add;
+  float range = g._a_max - g._a_min;
+  g.transfer_to(m.get());
+
+  std::string def_quant = "uint8";
+  mpqsolver::core::Quantizer quantizer(def_quant, def_quant);
+  mpqsolver::core::LayerParams params;
+  auto res = quantizer.quantize(m.get(), def_quant, params);
+  EXPECT_TRUE(res);
+  auto quant_param = add->quantparam();
+  EXPECT_TRUE(quant_param != nullptr);
+  EXPECT_TRUE(quant_param->scale.size() == 1);
+  EXPECT_FLOAT_EQ(quant_param->scale[0], range / 255.f);
+  EXPECT_TRUE(quant_param->zerop.size() == 1);
+  EXPECT_TRUE(quant_param->zerop[0] == 128);
+}
+
+TEST(CircleMPQSolverQuantizerTest, verifyResultsTest_NEG)
+{
+  std::string def_quant = "uint8";
+  mpqsolver::core::Quantizer quantizer(def_quant, def_quant);
+  mpqsolver::core::LayerParams params;
+  auto res = quantizer.quantize(nullptr, def_quant, params);
+  EXPECT_TRUE(!res);
+}
diff --git a/compiler/circle-mpqsolver/src/core/SolverHooks.cpp b/compiler/circle-mpqsolver/src/core/SolverHooks.cpp
new file mode 100644
index 000000000..bbe1bb4a3
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/SolverHooks.cpp
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SolverHooks.h"
diff --git a/compiler/circle-mpqsolver/src/core/SolverHooks.h b/compiler/circle-mpqsolver/src/core/SolverHooks.h
new file mode 100644
index 000000000..851a69993
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/SolverHooks.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_SOLVER_HOOKS_H__
+#define __MPQSOLVER_SOLVER_HOOKS_H__
+
+#include <luci/IR/Module.h>
+
+#include <core/Quantizer.h>
+
+#include <string>
+
+namespace mpqsolver
+{
+namespace core
+{
+
+class SolverHooks
+{
+public:
+  /**
+   * @brief called on the start of iterative search
+   * @param model_path path of original float model to quantize
+   * @param q8error error of Q8 quantization
+   * @param q16error error of Q16 quantization
+   */
+  virtual void on_begin_solver(const std::string &model_path, float q8error, float q16error) = 0;
+
+  /**
+   * @brief called on the start of current iteration
+   */
+  virtual void on_begin_iteration() = 0;
+
+  /**
+   * @brief called at the end of current iteration
+   * @param layers model nodes with specific quantization parameters
+   * @param def_dtype default quantization dtype
+   * @param error error of quantization for current iteration
+   */
+  virtual void on_end_iteration(const LayerParams &layers, const std::string &def_dtype,
+                                float error) const = 0;
+
+  /**
+   * @brief called at the end of iterative search
+   * @param layers model nodes with specific quantization parameters
+   * @param def_dtype default quantization dtype
+   * @param qerror final error of quantization
+   */
+  virtual void on_end_solver(const LayerParams &layers, const std::string &def_dtype,
+                             float qerror) = 0;
+};
+
+} // namespace core
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_SOLVER_HOOKS_H__
diff --git a/compiler/circle-mpqsolver/src/core/SolverOutput.cpp b/compiler/circle-mpqsolver/src/core/SolverOutput.cpp
new file mode 100644
index 000000000..c14fe30f7
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/SolverOutput.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SolverOutput.h"
+
+#include <iostream>
+
+SolverOutput &SolverOutput::get(void)
+{
+  static SolverOutput d;
+  return d;
+}
+
+const SolverOutput &SolverOutput::operator<<(const std::string &message) const
+{
+  if (_turn_on)
+  {
+    std::cout << message;
+  }
+
+  return *this;
+}
+
+const SolverOutput &SolverOutput::operator<<(float value) const
+{
+  if (_turn_on)
+  {
+    std::cout << value;
+  }
+
+  return *this;
+}
+
+void SolverOutput::TurnOn(bool on) { _turn_on = on; }
diff --git a/compiler/circle-mpqsolver/src/core/SolverOutput.h b/compiler/circle-mpqsolver/src/core/SolverOutput.h
new file mode 100644
index 000000000..218d3b95e
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/SolverOutput.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_SOLVER_OUTPUT_H__
+#define __MPQSOLVER_SOLVER_OUTPUT_H__
+
+#include <string>
+
+/**
+ * @brief SolverOutput prints important performance information
+ */
+class SolverOutput
+{
+private:
+  /**
+   * @brief construct SolverOutput
+   */
+  SolverOutput() = default;
+
+public:
+  /**
+   * @brief get singleton object
+   */
+  static SolverOutput &get(void);
+
+  /**
+   * @brief print string message
+   */
+  const SolverOutput &operator<<(const std::string &message) const;
+
+  /**
+   * @brief print float value
+   */
+  const SolverOutput &operator<<(float value) const;
+
+  /**
+   * @brief turn on/off actual output
+   */
+  void TurnOn(bool on);
+
+private:
+  bool _turn_on = true;
+};
+
+#endif // __MPQSOLVER_SOLVER_OUTPUT_H__
diff --git a/compiler/circle-mpqsolver/src/core/TestHelper.h b/compiler/circle-mpqsolver/src/core/TestHelper.h
new file mode 100644
index 000000000..f930738f9
--- /dev/null
+++ b/compiler/circle-mpqsolver/src/core/TestHelper.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_TEST_HELPER_H__
+#define __MPQSOLVER_TEST_HELPER_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/Module.h>
+
+class SimpleGraph
+{
+public:
+  SimpleGraph() : _g(loco::make_graph()) {}
+
+public:
+  void init()
+  {
+    _input = _g->nodes()->create<luci::CircleInput>();
+    _output = _g->nodes()->create<luci::CircleOutput>();
+    _input->name("input");
+    _output->name("output");
+
+    auto graph_input = _g->inputs()->create();
+    _input->index(graph_input->index());
+    auto graph_output = _g->outputs()->create();
+    _output->index(graph_output->index());
+
+    graph_input->dtype(loco::DataType::FLOAT32);
+    _input->dtype(loco::DataType::FLOAT32);
+    _output->dtype(loco::DataType::FLOAT32);
+    graph_output->dtype(loco::DataType::FLOAT32);
+
+    graph_input->shape({1, _channel_size, _width, _height});
+    _input->shape({1, _channel_size, _width, _height});
+    _output->shape({1, _channel_size, _width, _height});
+    graph_output->shape({1, _channel_size, _width, _height});
+
+    auto graph_body = insertGraphBody(_input);
+    _output->from(graph_body);
+
+    initInput(_input);
+  }
+
+  virtual ~SimpleGraph() = default;
+  void transfer_to(luci::Module *module)
+  {
+    // WARNING: after g is transfered, _graph_inputs, _inputs
+    //          and _graph_outputs, _outputs in TestOsGraphlet will be invalid.
+    //          arrays are not cleared as this is just helpers to unit tests
+    module->add(std::move(_g));
+  }
+
+protected:
+  virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+  virtual void initInput(loco::Node *input){};
+
+public:
+  std::unique_ptr<loco::Graph> _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleOutput *_output = nullptr;
+  uint32_t _channel_size = 16;
+  uint32_t _width = 4;
+  uint32_t _height = 4;
+};
+
+#endif //__MPQSOLVER_TEST_HELPER_H__
diff --git a/compiler/circle-operator-test/CMakeLists.txt b/compiler/circle-operator-test/CMakeLists.txt
new file mode 100644
index 000000000..2ebd533b9
--- /dev/null
+++ b/compiler/circle-operator-test/CMakeLists.txt
@@ -0,0 +1,18 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_PATH testDataGenerator BINARY_DIR)
+get_target_property(CIRCLE_OPERATOR_PATH circle-operator BINARY_DIR)
+set(CIRCLE_OPERATOR_PATH "${CIRCLE_OPERATOR_PATH}/circle-operator")
+
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+
+GTest_AddTest(circle-operator-test ${TESTS})
+
+set_tests_properties(circle-operator-test
+                     PROPERTIES
+                     ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_OPERATOR_PATH=${CIRCLE_OPERATOR_PATH}"
+                     )
diff --git a/compiler/circle-operator-test/README.md b/compiler/circle-operator-test/README.md
new file mode 100644
index 000000000..d07c64d2e
--- /dev/null
+++ b/compiler/circle-operator-test/README.md
@@ -0,0 +1,7 @@
+# circle-operator-test
+
+_circle-operator-test_ provides test of circle-operator tool is working as expected.
+
+Current tests includes
+- input arguments test is working as expected
+- output of this tool is as expected
diff --git a/compiler/circle-operator-test/requires.cmake b/compiler/circle-operator-test/requires.cmake
new file mode 100644
index 000000000..8ad3b8a64
--- /dev/null
+++ b/compiler/circle-operator-test/requires.cmake
@@ -0,0 +1,2 @@
+require("circle-operator")
+require("common-artifacts")
diff --git a/compiler/circle-operator-test/src/circle-operator.test.cpp b/compiler/circle-operator-test/src/circle-operator.test.cpp
new file mode 100644
index 000000000..351eda804
--- /dev/null
+++ b/compiler/circle-operator-test/src/circle-operator.test.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+
+class circle_operator_test : public ::testing::Test
+{
+protected:
+  bool initialize(void);
+  bool run(const std::string &command);
+
+protected:
+  bool load(const std::string &file);
+
+protected:
+  std::string _artifacts_path;
+  std::string _circle_operator_path;
+  std::string _result;
+};
+
+bool circle_operator_test::initialize(void)
+{
+  char *path = std::getenv("ARTIFACTS_PATH");
+  if (path == nullptr)
+  {
+    std::cerr << "ARTIFACTS_PATH not found" << std::endl;
+    return false;
+  }
+  _artifacts_path = path;
+
+  path = std::getenv("CIRCLE_OPERATOR_PATH");
+  if (path == nullptr)
+  {
+    std::cerr << "ARTIFACTS_BIN_PATH not found" << std::endl;
+    return false;
+  }
+  _circle_operator_path = path;
+
+  return true;
+}
+
+bool circle_operator_test::run(const std::string &command)
+{
+  std::vector<char> buffer(260);
+  std::string result = "";
+  std::string cmd_err = command + " 2>&1";
+  FILE *pipe = popen(cmd_err.c_str(), "r");
+  if (!pipe)
+  {
+    return false;
+  }
+  try
+  {
+    while (fgets(&buffer[0], buffer.size(), pipe) != NULL)
+    {
+      result += &buffer[0];
+    }
+  }
+  catch (...)
+  {
+    pclose(pipe);
+    return false;
+  }
+  pclose(pipe);
+  _result = result;
+
+  std::cout << _result << std::endl;
+
+  return true;
+}
+
+bool circle_operator_test::load(const std::string &file)
+{
+  std::ifstream tmp(file.c_str());
+  if (tmp.fail())
+    return false;
+
+  std::stringstream buffer;
+  buffer << tmp.rdbuf();
+  _result = buffer.str();
+  return true;
+}
+
+TEST_F(circle_operator_test, valid_names)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --name " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ofm");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_operator_test, valid_codes)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --code " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ADD");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_operator_test, invalid_option_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --opname " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Invalid argument");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_operator_test, check_code_name)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --code --name " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ofm");
+  ASSERT_NE(std::string::npos, pos);
+  const auto pos2 = _result.find("ADD");
+  ASSERT_NE(std::string::npos, pos2);
+}
+
+TEST_F(circle_operator_test, nonexist_file_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/non_exist_file.foo";
+  std::string command = _circle_operator_path + " --name " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ERROR");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_operator_test, invalid_file_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.recipe";
+  std::string command = _circle_operator_path + " --name " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ERROR");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_operator_test, output_file)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string fileName("/tmp/a.txt");
+  std::remove(fileName.c_str());
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --code --output_path " + fileName + " " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+  if (!load(fileName))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ADD");
+  ASSERT_NE(std::string::npos, pos);
+}
diff --git a/compiler/circle-operator/CMakeLists.txt b/compiler/circle-operator/CMakeLists.txt
new file mode 100644
index 000000000..33d9a96d0
--- /dev/null
+++ b/compiler/circle-operator/CMakeLists.txt
@@ -0,0 +1,17 @@
+if(NOT TARGET mio_circle06)
+  return()
+endif(NOT TARGET mio_circle06)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circle-operator ${DRIVER} ${SOURCES})
+target_include_directories(circle-operator PRIVATE src)
+target_link_libraries(circle-operator arser)
+target_link_libraries(circle-operator foder)
+target_link_libraries(circle-operator mio_circle06)
+target_link_libraries(circle-operator mio_circle06_helper)
+target_link_libraries(circle-operator safemain)
+
+install(TARGETS circle-operator DESTINATION bin)
diff --git a/compiler/circle-operator/README.md b/compiler/circle-operator/README.md
new file mode 100644
index 000000000..86a923f05
--- /dev/null
+++ b/compiler/circle-operator/README.md
@@ -0,0 +1,70 @@
+# circle-operator
+
+_circle-operator_ allows users to retrieve operators information from a Circle model file
+
+NOTE: this tool is primary for ONE-vscode where PartEditor needs names and codes
+of the operators.
+
+## Information with operators
+
+Operators with `--name`
+- show operator names one line at a time in execution order
+
+Example
+```
+$ circle-operator --name model.circle
+```
+
+Result
+```
+conv1_pad/Pad
+conv1_conv/BiasAdd
+pool1_pad/Pad
+```
+
+Operators codes with `--code`
+- show operator codes one line at a time in execution order
+
+Example
+```
+$ circle-operator --code model.circle
+```
+
+Result
+```
+PAD
+CONV_2D
+PAD
+```
+
+Operators with both `--code` and `--name`
+- show operator both codes and name separated with `,` one line at a time in execution order
+
+Example
+```
+$ circle-operator --code --name model.circle
+```
+
+Result
+```
+PAD,conv1_pad/Pad
+CONV_2D,conv1_conv/BiasAdd
+PAD,pool1_pad/Pad
+```
+
+## Save to file
+
+Use `--output_path` to save results to a file.
+
+Example
+```
+$ circle-operator --name --output_path /tmp/result model.circle
+```
+
+Result
+```
+$ cat /tmp/result
+conv1_pad/Pad
+conv1_conv/BiasAdd
+pool1_pad/Pad
+```
diff --git a/compiler/circle-operator/driver/Driver.cpp b/compiler/circle-operator/driver/Driver.cpp
new file mode 100644
index 000000000..f5fd8073c
--- /dev/null
+++ b/compiler/circle-operator/driver/Driver.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dump.h"
+
+#include <arser/arser.h>
+#include <foder/FileLoader.h>
+#include <fstream>
+
+#include <functional>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <vector>
+#include <string>
+
+#include <signal.h>
+
+void handle_segfault(int signal, siginfo_t *si, void *arg)
+{
+  std::cerr << "ERROR: Failed to load file" << std::endl;
+  exit(255);
+}
+
+int entry(int argc, char **argv)
+{
+  // TODO add option to dump for all sub-graphs
+  arser::Arser arser{
+    "circle-operator allows users to retrieve operator information from a Circle model file"};
+  arser.add_argument("--name").nargs(0).help("Dump operators name in circle file");
+  arser.add_argument("--code").nargs(0).help("Dump operators code in circle file");
+  arser.add_argument("--output_path").help("Save output to file (default output is console)");
+  arser.add_argument("circle").help("Circle file to dump");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cerr << arser;
+    return 255;
+  }
+
+  cirops::DumpOption option;
+  option.names = arser["--name"];
+  option.codes = arser["--code"];
+
+  std::ofstream oFstream;
+  std::ostream *oStream = &std::cout;
+  if (arser["--output_path"])
+  {
+    auto output_path = arser.get<std::string>("--output_path");
+    oFstream.open(output_path, std::ofstream::out | std::ofstream::trunc);
+    if (oFstream.fail())
+    {
+      std::cerr << "ERROR: Failed to create output to file " << output_path << std::endl;
+      return 255;
+    }
+    oStream = &oFstream;
+  }
+
+  // hook segment fault
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(struct sigaction));
+  sigemptyset(&sa.sa_mask);
+  sa.sa_sigaction = handle_segfault;
+  sa.sa_flags = SA_SIGINFO;
+  sigaction(SIGSEGV, &sa, NULL);
+
+  std::string modelFile = arser.get<std::string>("circle");
+  // Load Circle model from a circle file
+  try
+  {
+    foder::FileLoader fileLoader{modelFile};
+    std::vector<char> modelData = fileLoader.load();
+    const circle::Model *circleModel = circle::GetModel(modelData.data());
+    if (circleModel == nullptr)
+    {
+      std::cerr << "ERROR: Failed to load circle '" << modelFile << "'" << std::endl;
+      return 255;
+    }
+    cirops::DumpOperators dump;
+    dump.run(*oStream, circleModel, option);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << "ERROR: " << err.what() << std::endl;
+    return 255;
+  }
+
+  if (oFstream.is_open())
+  {
+    oFstream.close();
+  }
+
+  return 0;
+}
diff --git a/compiler/circle-operator/requires.cmake b/compiler/circle-operator/requires.cmake
new file mode 100644
index 000000000..b3a2638ef
--- /dev/null
+++ b/compiler/circle-operator/requires.cmake
@@ -0,0 +1,4 @@
+require("arser")
+require("foder")
+require("mio-circle06")
+require("safemain")
diff --git a/compiler/circle-operator/src/Dump.cpp b/compiler/circle-operator/src/Dump.cpp
new file mode 100644
index 000000000..36bfe8632
--- /dev/null
+++ b/compiler/circle-operator/src/Dump.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dump.h"
+
+#include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
+
+#include <ostream>
+
+namespace
+{
+
+void dump_ops(std::ostream &os, mio::circle::Reader &reader, const cirops::DumpOption &option)
+{
+  auto ops = reader.operators();
+  for (uint32_t i = 0; i < ops->Length(); ++i)
+  {
+    const auto op = ops->Get(i);
+    const auto op_name = reader.opcode_name(op);
+
+    if (option.all_graphs)
+    {
+      // NOTE all_graphs is false for now
+      // TODO check using '$' as split key
+      os << i << "$";
+    }
+
+    if (option.codes)
+    {
+      const auto op_name = reader.opcode_name(op);
+      os << op_name;
+    }
+    if (option.names)
+    {
+      // TODO multiple outputs?
+      const auto tensors = reader.tensors();
+      const auto output_tensors = reader.outputs(op);
+      const auto output = output_tensors.at(0);
+      const auto tensor = tensors->Get(output);
+      const std::string name = mio::circle::tensor_name(tensor);
+      if (option.codes)
+      {
+        os << ",";
+      }
+      os << name;
+    }
+    os << std::endl;
+  }
+}
+
+} // namespace
+
+namespace cirops
+{
+
+void DumpOperators::run(std::ostream &os, const circle::Model *model, const DumpOption &option)
+{
+  mio::circle::Reader reader(model);
+
+  const uint32_t subgraph_size = reader.num_subgraph();
+  for (uint32_t g = 0; g < subgraph_size; g++)
+  {
+    reader.select_subgraph(g);
+    dump_ops(os, reader, option);
+
+    if (!option.all_graphs)
+      break;
+  }
+}
+
+} // namespace cirops
diff --git a/compiler/circle-operator/src/Dump.h b/compiler/circle-operator/src/Dump.h
new file mode 100644
index 000000000..aa1d1be49
--- /dev/null
+++ b/compiler/circle-operator/src/Dump.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DUMP_H__
+#define __DUMP_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <ostream>
+
+namespace cirops
+{
+
+struct DumpOption
+{
+  bool names = false;
+  bool codes = false;
+  bool all_graphs = false;
+};
+
+class DumpOperators
+{
+public:
+  DumpOperators() = default;
+
+public:
+  void run(std::ostream &os, const circle::Model *model, const DumpOption &option);
+};
+
+} // namespace cirops
+
+#endif // __DUMP_H__
diff --git a/compiler/circle-opselector/CMakeLists.txt b/compiler/circle-opselector/CMakeLists.txt
new file mode 100644
index 000000000..3f7f424ea
--- /dev/null
+++ b/compiler/circle-opselector/CMakeLists.txt
@@ -0,0 +1,39 @@
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-opselector ${DRIVER} ${SOURCES})
+target_include_directories(circle-opselector PRIVATE src)
+target_link_libraries(circle-opselector foder)
+target_link_libraries(circle-opselector safemain)
+target_link_libraries(circle-opselector loco)
+target_link_libraries(circle-opselector luci_import)
+target_link_libraries(circle-opselector luci_export)
+target_link_libraries(circle-opselector arser)
+target_link_libraries(circle-opselector vconone)
+target_link_libraries(circle-opselector luci_service)
+target_link_libraries(circle-opselector luci_partition)
+target_link_libraries(circle-opselector luci_profile)
+
+install(TARGETS circle-opselector DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circle-opselector-test ${TESTS} ${SOURCES} ${DRIVER})
+target_include_directories(circle-opselector-test PRIVATE src)
+target_link_libraries(circle-opselector-test foder)
+target_link_libraries(circle-opselector-test loco)
+target_link_libraries(circle-opselector-test luci_import)
+target_link_libraries(circle-opselector-test luci_export)
+target_link_libraries(circle-opselector-test arser)
+target_link_libraries(circle-opselector-test vconone)
+target_link_libraries(circle-opselector-test luci_service)
+target_link_libraries(circle-opselector-test luci_partition)
+target_link_libraries(circle-opselector-test luci_profile)
+target_link_libraries(circle-opselector-test luci_testhelper)
diff --git a/compiler/circle-opselector/README.md b/compiler/circle-opselector/README.md
new file mode 100644
index 000000000..5ea2d32c4
--- /dev/null
+++ b/compiler/circle-opselector/README.md
@@ -0,0 +1,21 @@
+# circle-opselector
+
+`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.
+
+## Example
+
+### 1. Select from location numbers
+
+```bash
+./circle-opselector --by_id "1-3,5" input.circle output.circle
+```
+
+Then, output.circle which has node 1, 2, 3 and 5 will be created.
+
+### 2. Select from node names
+
+```bash
+./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle
+```
+
+Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.
diff --git a/compiler/circle-opselector/driver/Driver.cpp b/compiler/circle-opselector/driver/Driver.cpp
new file mode 100644
index 000000000..5ad2b9ca3
--- /dev/null
+++ b/compiler/circle-opselector/driver/Driver.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+#include "OpSelector.h"
+
+#include <luci/ConnectNode.h>
+#include <luci/Profile/CircleNodeID.h>
+#include <luci/Service/CircleNodeClone.h>
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <cctype>
+#include <numeric>
+#include <sstream>
+
+void print_version(void)
+{
+  std::cout << "circle-opselector version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+int entry(int argc, char **argv)
+{
+  // TODO Add new option names!
+
+  arser::Arser arser("circle-opselector provides selecting operations in circle model");
+
+  arser::Helper::add_version(arser, print_version);
+
+  // TODO Add new options!
+
+  arser.add_argument("input").help("Input circle model");
+  arser.add_argument("output").help("Output circle model");
+
+  // select option
+  arser.add_argument("--by_id").help("Input operation id to select nodes.");
+  arser.add_argument("--by_name").help("Input operation name to select nodes.");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cout << arser;
+    return EXIT_FAILURE;
+  }
+
+  std::string input_path = arser.get<std::string>("input");
+  std::string output_path = arser.get<std::string>("output");
+
+  if (!arser["--by_id"] && !arser["--by_name"] || arser["--by_id"] && arser["--by_name"])
+  {
+    std::cerr << "ERROR: Either option '--by_id' or '--by_name' must be specified" << std::endl;
+    std::cerr << arser;
+    return EXIT_FAILURE;
+  }
+
+  // Import original circle file.
+  auto module = opselector::getModule(input_path);
+
+  // TODO support two or more subgraphs
+  if (module.get()->size() != 1)
+  {
+    std::cerr << "ERROR: Not support two or more subgraphs" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  opselector::OpSelector op_selector{module.get()};
+
+  std::unique_ptr<luci::Module> new_module;
+  std::string operator_input;
+
+  if (arser["--by_id"])
+  {
+    operator_input = arser.get<std::string>("--by_id");
+    new_module = op_selector.select_by<opselector::SelectType::ID>(operator_input);
+  }
+  if (arser["--by_name"])
+  {
+    operator_input = arser.get<std::string>("--by_name");
+    new_module = op_selector.select_by<opselector::SelectType::NAME>(operator_input);
+  }
+
+  if (not opselector::exportModule(new_module.get(), output_path))
+  {
+    std::cerr << "ERROR: Cannot export the module" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  return 0;
+}
diff --git a/compiler/circle-opselector/requires.cmake b/compiler/circle-opselector/requires.cmake
new file mode 100644
index 000000000..dcdbcbb68
--- /dev/null
+++ b/compiler/circle-opselector/requires.cmake
@@ -0,0 +1,6 @@
+require("foder")
+require("loco")
+require("safemain")
+require("luci")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-opselector/src/ModuleIO.cpp b/compiler/circle-opselector/src/ModuleIO.cpp
new file mode 100644
index 000000000..46f45ceb0
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+
+#include <foder/FileLoader.h>
+
+#include <luci/Importer.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+
+#include <iostream>
+
+namespace opselector
+{
+
+std::unique_ptr<luci::Module> getModule(std::string &input_path)
+{
+  // Load model from the file
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data = file_loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  // Import from input Circle file
+  luci::Importer importer;
+
+  return importer.importModule(circle_model);
+}
+
+bool exportModule(luci::Module *module, std::string &output_path)
+{
+  luci::CircleExporter exporter;
+
+  luci::CircleFileExpContract contract(module, output_path);
+
+  if (!exporter.invoke(&contract))
+  {
+    std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace opselector
diff --git a/compiler/circle-opselector/src/ModuleIO.h b/compiler/circle-opselector/src/ModuleIO.h
new file mode 100644
index 000000000..39c704bf3
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_MODULEIO_H__
+#define __CIRCLE_OPSELECTOR_MODULEIO_H__
+
+#include <luci/IR/Module.h>
+
+#include <string>
+#include <memory>
+
+namespace opselector
+{
+
+std::unique_ptr<luci::Module> getModule(std::string &input_path);
+bool exportModule(luci::Module *module, std::string &output_path);
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_MODULEIO_H__
diff --git a/compiler/circle-opselector/src/ModuleIO.test.cpp b/compiler/circle-opselector/src/ModuleIO.test.cpp
new file mode 100644
index 000000000..a1e5c2070
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+
+#include <gtest/gtest.h>
+
+TEST(ModuleIOTest, Export_nullptr)
+{
+  std::string output_path = "./test.out.circle";
+
+  ASSERT_EQ(false, opselector::exportModule(nullptr, output_path));
+}
diff --git a/compiler/circle-opselector/src/OpSelector.cpp b/compiler/circle-opselector/src/OpSelector.cpp
new file mode 100644
index 000000000..09a66548d
--- /dev/null
+++ b/compiler/circle-opselector/src/OpSelector.cpp
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpSelector.h"
+
+#include <luci/ConnectNode.h>
+#include <luci/Profile/CircleNodeID.h>
+#include <luci/Service/CircleNodeClone.h>
+
+#include <algorithm>
+#include <cassert>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+/**
+ * @brief Tokenize given string
+ *
+ * Assumes given string looks like below.
+ *
+ * - '1,2,5,7,9'
+ * - '1-5,6,7,9,12-14'
+ * - 'tensor_a,tensor_b,tensor_d'
+ *
+ * NOTE. 1-5 is same with '1,2,3,4,5'.
+ *
+ * WARNING. SelectType::NAME doesn't allow '-' like 'tensor_a-tensor_c'.
+ */
+std::vector<std::string> split_into_vector(const std::string &str, const char &delim)
+{
+  std::vector<std::string> ret;
+  std::istringstream is(str);
+  for (std::string item; std::getline(is, item, delim);)
+  {
+    ret.push_back(item);
+  }
+
+  // Remove empty string
+  ret.erase(std::remove_if(ret.begin(), ret.end(), [](const std::string &s) { return s.empty(); }),
+            ret.end());
+
+  return ret;
+}
+
+bool is_number(const std::string &s)
+{
+  return !s.empty() && std::find_if(s.begin(), s.end(),
+                                    [](unsigned char c) { return !std::isdigit(c); }) == s.end();
+}
+
+bool is_number(const std::vector<std::string> &vec)
+{
+  for (const auto &s : vec)
+  {
+    if (not::is_number(s))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+// TODO Move this class into a separate header for reuse
+class IsMultiOutputNode final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  bool visit(const luci::CircleCustom *) final { return true; }
+  bool visit(const luci::CircleIf *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV4 *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV5 *) final { return true; }
+  bool visit(const luci::CircleSplit *) final { return true; }
+  bool visit(const luci::CircleSplitV *) final { return true; }
+  bool visit(const luci::CircleTopKV2 *) final { return true; }
+  bool visit(const luci::CircleUnique *) final { return true; }
+  bool visit(const luci::CircleUnpack *) final { return true; }
+  bool visit(const luci::CircleWhile *) final { return true; }
+  // default is false
+  bool visit(const luci::CircleNode *) final { return false; }
+};
+
+std::unique_ptr<loco::Graph> make_graph(const std::vector<const luci::CircleNode *> nodes)
+{
+  auto graph = loco::make_graph();
+
+  luci::CloneContext ctx;
+  // clone nodes
+  for (const auto &n : nodes)
+  {
+    auto clone = luci::clone_node(n, graph.get());
+    ctx.emplace(n, clone);
+  }
+  // set graph input
+  for (const auto &n : nodes)
+  {
+    for (uint32_t i = 0; i < n->arity(); i++)
+    {
+      auto arg = n->arg(i);
+      auto input_node = dynamic_cast<luci::CircleNode *>(arg);
+      auto ctx_it = ctx.find(input_node);
+      // check if the node already has been cloned
+      if (ctx_it != ctx.end())
+        continue;
+      // the node isn't graph input if it is an other node's input
+      if (std::find(nodes.begin(), nodes.end(), arg) != nodes.end())
+        continue;
+      auto circle_const = dynamic_cast<luci::CircleConst *>(arg);
+      if (circle_const != nullptr)
+      {
+        auto clone = luci::clone_node(circle_const, graph.get());
+        ctx.emplace(circle_const, clone);
+      }
+      else
+      {
+        // circle input
+        auto circle_input = graph->nodes()->create<luci::CircleInput>();
+        input_node = dynamic_cast<luci::CircleNode *>(arg);
+        if (not input_node)
+        {
+          throw std::runtime_error{"ERROR: Invalid graph"};
+        }
+        luci::copy_common_attributes(input_node, circle_input);
+        ctx.emplace(input_node, circle_input);
+        // graph input
+        auto graph_input = graph->inputs()->create();
+        graph_input->name(circle_input->name());
+        graph_input->dtype(circle_input->dtype());
+        // graph input shape
+        auto input_shape = std::make_unique<loco::TensorShape>();
+        input_shape->rank(circle_input->rank());
+        for (uint32_t i = 0; i < circle_input->rank(); i++)
+        {
+          if (circle_input->dim(i).known())
+          {
+            circle_input->dim(i).set(circle_input->dim(i).value());
+          }
+        }
+        graph_input->shape(std::move(input_shape));
+
+        circle_input->index(graph_input->index());
+      }
+    }
+  }
+
+  const auto original_graph = nodes.at(0)->graph();
+  const auto original_outputs = loco::output_nodes(const_cast<loco::Graph *>(original_graph));
+
+  // set graph output
+  for (auto &n : nodes)
+  {
+    auto outputs = loco::succs(n);
+    bool beingUsed = false;
+    for (const auto &o : outputs)
+    {
+      if (std::find(nodes.begin(), nodes.end(), o) != nodes.end())
+      {
+        beingUsed = true;
+        break;
+      }
+    }
+
+    bool originalOutput = false;
+    for (const auto &o : outputs)
+    {
+      if (std::find(original_outputs.begin(), original_outputs.end(), o) != original_outputs.end())
+      {
+        originalOutput = true;
+        break;
+      }
+    }
+
+    // the node isn't graph output if it is an other node's output
+    if (beingUsed and not originalOutput)
+      continue;
+
+    IsMultiOutputNode multiout_visitor;
+    bool isMultiOut = n->accept(&multiout_visitor);
+    for (auto &o : outputs)
+    {
+      const luci::CircleNode *output_node = nullptr;
+      if (isMultiOut)
+      {
+        output_node = dynamic_cast<const luci::CircleNode *>(o);
+        if (not output_node)
+        {
+          throw std::runtime_error{"ERROR: Invalid graph"};
+        }
+      }
+      else
+      {
+        output_node = n;
+      }
+      // circle output
+      auto circle_output = graph->nodes()->create<luci::CircleOutput>();
+      luci::copy_common_attributes(output_node, circle_output);
+      // connect to cloned output node
+      circle_output->from(ctx.find(output_node)->second);
+      // graph output
+      auto graph_output = graph->outputs()->create();
+      graph_output->name(output_node->name());
+      graph_output->dtype(output_node->dtype());
+      // graph output shape
+      auto output_shape = std::make_unique<loco::TensorShape>();
+      output_shape->rank(circle_output->rank());
+      for (uint32_t i = 0; i < output_shape->rank(); i++)
+      {
+        if (circle_output->dim(i).known())
+        {
+          output_shape->dim(i).set(circle_output->dim(i).value());
+        }
+      }
+      graph_output->shape(std::move(output_shape));
+
+      circle_output->index(graph_output->index());
+      if (not isMultiOut)
+        break;
+    }
+  }
+  // connect nodes
+  for (const auto &n : nodes)
+  {
+    luci::clone_connect(n, ctx);
+  }
+
+  return graph;
+}
+
+} // namespace
+
+namespace opselector
+{
+
+OpSelector::OpSelector(const luci::Module *module) : _module{module}
+{
+  if (_module->size() != 1)
+  {
+    throw std::runtime_error{"ERROR: Not support two or more subgraphs"};
+  }
+}
+
+template <>
+std::vector<const luci::CircleNode *>
+OpSelector::select_by<SelectType::ID>(const std::vector<std::string> &comma_tokens)
+{
+  std::vector<uint32_t> by_id;
+
+  for (const auto &comma_token : comma_tokens)
+  {
+    auto dash_tokens = ::split_into_vector(comma_token, '-');
+    if (not::is_number(dash_tokens))
+    {
+      throw std::runtime_error{
+        "ERROR: To select operator by id, please use these args: [0-9], '-', ','"};
+    }
+
+    // Convert string into integer
+    std::vector<uint32_t> int_tokens;
+    try
+    {
+      std::transform(dash_tokens.begin(), dash_tokens.end(), std::back_inserter(int_tokens),
+                     [](const std::string &str) { return static_cast<uint32_t>(std::stoi(str)); });
+    }
+    catch (const std::out_of_range &)
+    {
+      // Uf input is big integer like '123467891234', stoi throws this exception.
+      throw std::runtime_error{"ERROR: Argument is out of range."};
+    }
+    catch (...)
+    {
+      throw std::runtime_error{"ERROR: Unknown error"};
+    }
+
+    switch (int_tokens.size())
+    {
+      case 0: // inputs like "-"
+      {
+        throw std::runtime_error{"ERROR: Nothing was entered"};
+      }
+      case 1: // inputs like "1", "2"
+      {
+        by_id.push_back(int_tokens.at(0));
+        break;
+      }
+      case 2: // inputs like "1-2", "11-50"
+      {
+        for (uint32_t i = int_tokens.at(0); i <= int_tokens.at(1); i++)
+        {
+          by_id.push_back(i);
+        }
+        break;
+      }
+      default: // inputs like "1-2-3"
+      {
+        throw std::runtime_error{"ERROR: Too many '-' in str."};
+      }
+    }
+  }
+
+  loco::Graph *graph = _module->graph(0);
+  std::vector<const luci::CircleNode *> selected_nodes;
+
+  for (auto node : loco::all_nodes(graph))
+  {
+    auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+
+    try
+    {
+      auto node_id = luci::get_node_id(cnode);
+      for (auto selected_id : by_id)
+      {
+        if (selected_id == node_id)
+        {
+          selected_nodes.emplace_back(cnode);
+        }
+      }
+    }
+    catch (const std::runtime_error &)
+    {
+      continue;
+    }
+  }
+
+  return selected_nodes;
+}
+
+template <>
+std::vector<const luci::CircleNode *>
+OpSelector::select_by<SelectType::NAME>(const std::vector<std::string> &tokens)
+{
+  loco::Graph *graph = _module->graph(0);
+  std::vector<const luci::CircleNode *> selected_nodes;
+
+  for (auto node : loco::all_nodes(graph))
+  {
+    auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+    std::string node_name = cnode->name();
+
+    for (const auto &selected_name : tokens)
+      if (selected_name.compare(node_name) == 0) // find the selected name
+        selected_nodes.emplace_back(cnode);
+  }
+
+  return selected_nodes;
+}
+
+template <SelectType SELECT_TYPE>
+std::unique_ptr<luci::Module> OpSelector::select_by(const std::string &str)
+{
+  auto colon_tokens = ::split_into_vector(str, ',');
+  if (colon_tokens.empty())
+  {
+    throw std::runtime_error{"ERROR: Nothing was entered."};
+  }
+
+  assert(_module->size() == 1);
+
+  auto selected_nodes = select_by<SELECT_TYPE>(colon_tokens);
+
+  // multiout node should be considered
+  IsMultiOutputNode multiout_visitor;
+  std::vector<const luci::CircleNode *> output_nodes;
+  for (const auto &node : selected_nodes)
+  {
+    if (node->accept(&multiout_visitor))
+    {
+      auto outputs = loco::succs(node);
+      for (auto &o : outputs)
+      {
+        output_nodes.push_back(dynamic_cast<luci::CircleNode *>(o));
+      }
+    }
+  }
+  selected_nodes.insert(selected_nodes.end(), output_nodes.begin(), output_nodes.end());
+
+  auto new_module = std::make_unique<luci::Module>();
+  new_module->add(::make_graph(selected_nodes));
+
+  return new_module;
+}
+
+template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::ID>(const std::string &str);
+
+template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::NAME>(const std::string &str);
+
+} // namespace opselector
diff --git a/compiler/circle-opselector/src/OpSelector.h b/compiler/circle-opselector/src/OpSelector.h
new file mode 100644
index 000000000..c4366fa89
--- /dev/null
+++ b/compiler/circle-opselector/src/OpSelector.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_OPSELECTOR_H__
+#define __CIRCLE_OPSELECTOR_OPSELECTOR_H__
+
+#include "SelectType.h"
+
+#include <luci/IR/Module.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <string>
+#include <vector>
+
+namespace opselector
+{
+
+class OpSelector final
+{
+private:
+  const luci::Module *_module;
+
+public:
+  OpSelector(const luci::Module *module);
+
+private:
+  template <SelectType SELECT_TYPE>
+  std::vector<const luci::CircleNode *> select_by(const std::vector<std::string> &tokens);
+
+public:
+  template <SelectType SELECT_TYPE> std::unique_ptr<luci::Module> select_by(const std::string &str);
+};
+
+extern template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::ID>(const std::string &str);
+extern template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::NAME>(const std::string &str);
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_OPSELECTOR_H__
diff --git a/compiler/circle-opselector/src/OpSelector.test.cpp b/compiler/circle-opselector/src/OpSelector.test.cpp
new file mode 100644
index 000000000..a5ccc03f2
--- /dev/null
+++ b/compiler/circle-opselector/src/OpSelector.test.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpSelector.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Conv-Donv graphlet
+ *
+ *   [Conv]
+ *      |
+ *   [Donv]
+ *
+ */
+class ConvDonvGraphlet
+{
+public:
+  void init(loco::Graph *g)
+  {
+    _conv_filter = g->nodes()->create<luci::CircleConst>();
+    _conv_filter->dtype(loco::DataType::FLOAT32);
+    _conv_filter->shape({16, 1, 1, 16});
+    _conv_filter->name("conv_filter");
+
+    _conv_bias = g->nodes()->create<luci::CircleConst>();
+    _conv_bias->dtype(loco::DataType::FLOAT32);
+    _conv_bias->shape({16});
+    _conv_bias->name("conv_bias");
+
+    _conv = g->nodes()->create<luci::CircleConv2D>();
+    _conv->padding(luci::Padding::SAME);
+    _conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _conv->dtype(loco::DataType::FLOAT32);
+    _conv->shape({1, 4, 4, 16});
+    _conv->name("conv");
+    _conv->filter(_conv_filter);
+    _conv->bias(_conv_bias);
+
+    _dconv_filter = g->nodes()->create<luci::CircleConst>();
+    _dconv_filter->dtype(loco::DataType::FLOAT32);
+    _dconv_filter->shape({16, 1, 1, 16});
+    _dconv_filter->name("dconv_filter");
+
+    _dconv_bias = g->nodes()->create<luci::CircleConst>();
+    _dconv_bias->dtype(loco::DataType::FLOAT32);
+    _dconv_bias->shape({16});
+    _dconv_bias->name("dconv_bias");
+
+    _dconv = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+    _dconv->input(_conv);
+    _dconv->depthMultiplier(1);
+    _dconv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _dconv->dtype(loco::DataType::FLOAT32);
+    _dconv->shape({1, 4, 4, 16});
+    _dconv->padding(luci::Padding::SAME);
+    _dconv->name("dconv");
+    _dconv->filter(_dconv_filter);
+    _dconv->bias(_dconv_bias);
+  }
+
+protected:
+  luci::CircleConv2D *_conv{nullptr};
+  luci::CircleConst *_conv_filter{nullptr};
+  luci::CircleConst *_conv_bias{nullptr};
+  luci::CircleDepthwiseConv2D *_dconv{nullptr};
+  luci::CircleConst *_dconv_filter{nullptr};
+  luci::CircleConst *_dconv_bias{nullptr};
+};
+
+class ConvDonvGraph : public luci::test::TestIOGraph, public ConvDonvGraphlet
+{
+public:
+  ConvDonvGraph()
+  {
+    luci::test::TestIOGraph::init({1, 4, 4, 16}, {1, 4, 4, 16});
+    ConvDonvGraphlet::init(g());
+
+    _conv->input(input());
+
+    output()->from(_dconv);
+  }
+
+  std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+} // namespace
+
+TEST(OpSelectorTest, select_by_name)
+{
+  auto m = luci::make_module();
+
+  ConvDonvGraph g;
+  g.transfer_to(m.get());
+
+  opselector::OpSelector op_selector{m.get()};
+
+  // Select conv only
+  auto conv_module = op_selector.select_by<opselector::SelectType::NAME>("conv");
+  ASSERT_EQ(1, conv_module->size());
+
+  auto conv_graph = conv_module->graph(0);
+  ASSERT_EQ(1, conv_graph->outputs()->size());
+
+  auto output_node1 = luci::output_node(conv_graph, 0);
+  auto conv = loco::must_cast<luci::CircleConv2D *>(output_node1->from());
+  EXPECT_STREQ("conv", conv->name().c_str());
+  auto conv_filter = loco::must_cast<luci::CircleConst *>(conv->filter());
+  EXPECT_STREQ("conv_filter", conv_filter->name().c_str());
+  auto conv_bias = loco::must_cast<luci::CircleConst *>(conv->bias());
+  EXPECT_STREQ("conv_bias", conv_bias->name().c_str());
+
+  // Select dconv only
+  auto dconv_module = op_selector.select_by<opselector::SelectType::NAME>("dconv");
+  ASSERT_EQ(1, dconv_module->size());
+
+  auto dconv_graph = dconv_module->graph(0);
+  ASSERT_EQ(1, dconv_graph->outputs()->size());
+
+  auto output_node2 = luci::output_node(dconv_graph, 0);
+  auto dconv = loco::must_cast<luci::CircleDepthwiseConv2D *>(output_node2->from());
+  EXPECT_STREQ("dconv", dconv->name().c_str());
+  auto dconv_filter = loco::must_cast<luci::CircleConst *>(dconv->filter());
+  EXPECT_STREQ("dconv_filter", dconv_filter->name().c_str());
+  auto dconv_bias = loco::must_cast<luci::CircleConst *>(dconv->bias());
+  EXPECT_STREQ("dconv_bias", dconv_bias->name().c_str());
+}
+
+TEST(OpSelectorTest, select_by_name_NEG)
+{
+  auto m = luci::make_module();
+
+  ConvDonvGraph g;
+  g.transfer_to(m.get());
+
+  opselector::OpSelector op_selector{m.get()};
+
+  EXPECT_ANY_THROW(op_selector.select_by<opselector::SelectType::NAME>(","));
+}
diff --git a/compiler/circle-opselector/src/SelectType.h b/compiler/circle-opselector/src/SelectType.h
new file mode 100644
index 000000000..46d5f09a9
--- /dev/null
+++ b/compiler/circle-opselector/src/SelectType.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
+#define __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
+
+#include <string>
+
+namespace opselector
+{
+
+enum class SelectType
+{
+  ID,
+  NAME,
+};
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
diff --git a/compiler/circle-opselector/src/TestHelper.h b/compiler/circle-opselector/src/TestHelper.h
new file mode 100644
index 000000000..966e2b219
--- /dev/null
+++ b/compiler/circle-opselector/src/TestHelper.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_TEST_HELPER_H__
+#define __CIRCLE_OPSELECTOR_TEST_HELPER_H__
+
+#include <cassert>
+#include <string.h>
+
+template <size_t N> class Argv
+{
+public:
+  typedef char *pchar_t;
+
+public:
+  ~Argv()
+  {
+    for (size_t n = 0; n < _ptr; ++n)
+      delete _argv[n];
+  }
+
+  void add(const char *in)
+  {
+    assert(_ptr < N);
+    _argv[_ptr] = new char[strlen(in) + 1];
+    strncpy(_argv[_ptr], in, strlen(in) + 1);
+    _ptr++;
+  }
+
+  pchar_t *argv(void) { return _argv; }
+
+private:
+  pchar_t _argv[N] = {
+    nullptr,
+  };
+  size_t _ptr = 0;
+};
+
+#endif // __CIRCLE_OPSELECTOR_TEST_HELPER_H__
diff --git a/compiler/circle-part-driver/CMakeLists.txt b/compiler/circle-part-driver/CMakeLists.txt
new file mode 100644
index 000000000..cb708742c
--- /dev/null
+++ b/compiler/circle-part-driver/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(SRCS_PART_TESTER
+      src/Driver.cpp
+      src/PModelsRunner.cpp
+   )
+
+add_executable(circle_part_driver ${SRCS_PART_TESTER})
+target_link_libraries(circle_part_driver foder)
+target_link_libraries(circle_part_driver loco)
+target_link_libraries(circle_part_driver luci_import)
+target_link_libraries(circle_part_driver luci_lang)
+target_link_libraries(circle_part_driver luci_log)
+target_link_libraries(circle_part_driver luci_interpreter)
+target_link_libraries(circle_part_driver crew)
+target_link_libraries(circle_part_driver safemain)
+target_link_libraries(circle_part_driver nncc_common)
+
+install(TARGETS circle_part_driver DESTINATION bin)
diff --git a/compiler/circle-part-driver/README.md b/compiler/circle-part-driver/README.md
new file mode 100644
index 000000000..d66ecf5fa
--- /dev/null
+++ b/compiler/circle-part-driver/README.md
@@ -0,0 +1,3 @@
+# circle-part-driver
+
+_circle-part-driver_ is test driver to run partitioned circle models
diff --git a/compiler/circle-part-driver/requires.cmake b/compiler/circle-part-driver/requires.cmake
new file mode 100644
index 000000000..72296e32f
--- /dev/null
+++ b/compiler/circle-part-driver/requires.cmake
@@ -0,0 +1,6 @@
+require("foder")
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("crew")
+require("safemain")
diff --git a/compiler/circle-part-driver/src/Driver.cpp b/compiler/circle-part-driver/src/Driver.cpp
new file mode 100644
index 000000000..a39bbf187
--- /dev/null
+++ b/compiler/circle-part-driver/src/Driver.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PModelsRunner.h"
+
+#include <luci/Log.h>
+
+#include <iostream>
+
+int entry(int argc, char **argv)
+{
+  LOGGER(l);
+
+  if (argc != 5)
+  {
+    std::cerr
+      << "Usage: " << argv[0]
+      << " <path/to/partition/config> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+    return EXIT_FAILURE;
+  }
+  // NOTE: about input/output data file name
+  // - I/O file name format is like filename.ext0, filename.ext1, ...
+  // NOTE: about output shape
+  // - file name with filename.ext0.shape, filename.ext1.shape, ...
+  //   having one line text content of CSV format(like H,W or N,C,H,W)
+
+  const char *config_filename = argv[1];
+  const int32_t num_inputs = atoi(argv[2]);
+  const char *input_prefix = argv[3];
+  const char *output_file = argv[4];
+
+  prunner::PModelsRunner pmrunner;
+
+  INFO(l) << "Read config file: " << config_filename << std::endl;
+  if (not pmrunner.load_config(config_filename))
+    return EXIT_FAILURE;
+
+  INFO(l) << "Read input file: " << input_prefix << ", #inputs: " << num_inputs << std::endl;
+  pmrunner.load_inputs(input_prefix, num_inputs);
+
+  INFO(l) << "Run all partitioned models..." << std::endl;
+  if (!pmrunner.run())
+    return EXIT_FAILURE;
+
+  INFO(l) << "Save output file: " << output_file << std::endl;
+  pmrunner.save_outputs(output_file);
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-part-driver/src/PModelsRunner.cpp b/compiler/circle-part-driver/src/PModelsRunner.cpp
new file mode 100644
index 000000000..dd2ffe22d
--- /dev/null
+++ b/compiler/circle-part-driver/src/PModelsRunner.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PModelsRunner.h"
+
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+#include <luci/Importer.h>
+#include <luci/Log.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <foder/FileLoader.h>
+#include <crew/PConfig.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <stdexcept>
+
+namespace
+{
+
+void write_file(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+std::unique_ptr<luci::Module> import_circle(const std::string &filename)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+  {
+    throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
+  }
+  std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
+                               std::istreambuf_iterator<char>());
+
+  return luci::Importer().importModule(circle::GetModel(model_data.data()));
+}
+
+void save_shape(const std::string &shape_filename, const luci::CircleOutput *output_node)
+{
+  if (output_node->rank() == 0)
+  {
+    write_file(shape_filename, "1", 1);
+  }
+  else
+  {
+    auto shape_str = std::to_string(output_node->dim(0).value());
+    for (uint32_t j = 1; j < output_node->rank(); j++)
+    {
+      shape_str += ",";
+      shape_str += std::to_string(output_node->dim(j).value());
+    }
+    write_file(shape_filename, shape_str.c_str(), shape_str.size());
+  }
+}
+
+template <typename NodeT> size_t tensor_size(const NodeT *node)
+{
+  uint32_t tsize = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+  {
+    assert(node->dim(i).known());
+    tsize *= node->dim(i).value();
+  }
+  return tsize;
+}
+
+} // namespace
+
+namespace prunner
+{
+
+bool PModelsRunner::load_config(const std::string &filename)
+{
+  if (!crew::read_ini(filename, _pconfig))
+  {
+    std::cerr << "ERROR: Invalid config ini file: '" << filename << "'" << std::endl;
+    return false;
+  }
+
+  for (auto &part : _pconfig.parts)
+  {
+    _models_to_run.push_back(part.model_file);
+  }
+  return true;
+}
+
+void PModelsRunner::load_inputs(const std::string &input_prefix, int32_t num_inputs)
+{
+  LOGGER(l);
+
+  auto its = _pconfig.source.inputs.begin();
+  for (int32_t i = 0; i < num_inputs; ++i, ++its)
+  {
+    std::string filename = input_prefix + std::to_string(i);
+
+    INFO(l) << "Load input data: " << filename << std::endl;
+    foder::FileLoader file_loader{filename};
+
+    std::string input_name = *its;
+    _data_stage[input_name] = file_loader.load();
+
+    INFO(l) << "Input: [" << input_name << "], size " << _data_stage[input_name].size()
+            << std::endl;
+  }
+}
+
+/**
+ * @brief return true if all inputs of the model is ready in _data_storage
+ */
+bool PModelsRunner::is_input_ready(const RunModel &model)
+{
+  for (auto &part : _pconfig.parts)
+  {
+    if (part.model_file != model)
+      continue;
+
+    for (auto &input : part.inputs)
+    {
+      auto it = _data_stage.find(input);
+      if (it == _data_stage.end())
+        return false;
+    }
+  }
+  return true;
+}
+
+bool PModelsRunner::run(void)
+{
+  LOGGER(l);
+
+  // for each partitioned model, if the inputs of the model are ready, run the model
+  do
+  {
+    bool found_model = false;
+
+    for (auto it = _models_to_run.begin(); it != _models_to_run.end(); ++it)
+    {
+      auto model_fname = *it;
+
+      INFO(l) << "Check model input ready: " << model_fname << std::endl;
+      if (is_input_ready(model_fname))
+      {
+        found_model = true;
+
+        INFO(l) << "Run model: " << model_fname << std::endl;
+        auto module = import_circle(model_fname);
+
+        luci_interpreter::Interpreter interpreter(module.get());
+
+        // Set input
+        const auto input_nodes = loco::input_nodes(module->graph());
+        int32_t num_inputs = static_cast<int32_t>(input_nodes.size());
+        for (int32_t i = 0; i < num_inputs; i++)
+        {
+          const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+
+          auto input_name = input_node->name();
+          assert(_data_stage.find(input_name) != _data_stage.end());
+
+          auto input_data = _data_stage[input_name];
+
+          interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+        }
+
+        // Run interpreter
+        interpreter.interpret();
+        INFO(l) << "Run model: " << model_fname << " done" << std::endl;
+
+        // Get output.
+        const auto output_nodes = loco::output_nodes(module->graph());
+        for (uint32_t i = 0; i < module->graph()->outputs()->size(); i++)
+        {
+          const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+          auto output_name = output_node->name();
+
+          Buffer output_data(tensor_size(output_node));
+
+          interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+          // There should not exist same output names
+          // TODO check with multiple virtual outputs
+          assert(_data_stage.find(output_name) == _data_stage.end());
+          _data_stage[output_name] = output_data;
+        }
+
+        // We've ran this model, remove from the model list
+        _models_to_run.erase(it);
+        break;
+      }
+    }
+
+    if (not found_model)
+    {
+      std::cerr << "ERROR: model partition or configuration has problems" << std::endl;
+      return false;
+    }
+  } while (not _models_to_run.empty());
+
+  return true;
+}
+
+void PModelsRunner::save_outputs(const std::string &output_file)
+{
+  LOGGER(l);
+
+  // load source model as we need to get both shape and node name
+  // TODO check for unknown shape
+  auto source_fname = _pconfig.source.model_file;
+
+  INFO(l) << "save_outputs() loading file: " << source_fname << std::endl;
+  auto module = import_circle(source_fname);
+
+  const auto output_nodes = loco::output_nodes(module->graph());
+  for (uint32_t i = 0; i < module->graph()->outputs()->size(); i++)
+  {
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+
+    auto output_name = output_node->name();
+    INFO(l) << "save_outputs() save output node: " << output_name << std::endl;
+    assert(_data_stage.find(output_name) != _data_stage.end());
+
+    auto tensor_data = _data_stage[output_name];
+    auto output_filename = output_file + std::to_string(i);
+
+    write_file(output_filename, tensor_data.data(), tensor_data.size());
+    save_shape(output_filename + ".shape", output_node);
+  }
+}
+
+} // namespace prunner
diff --git a/compiler/circle-part-driver/src/PModelsRunner.h b/compiler/circle-part-driver/src/PModelsRunner.h
new file mode 100644
index 000000000..c1a45f01c
--- /dev/null
+++ b/compiler/circle-part-driver/src/PModelsRunner.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
+#define __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
+
+#include <crew/PConfig.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace prunner
+{
+
+using Buffer = std::vector<char>;
+
+using Buffers = std::map<std::string, Buffer>;
+
+using RunModel = std::string;
+
+using RunModels = std::vector<RunModel>;
+
+/**
+ * @brief PModelsRunner runs partitioned models from input data file and stores
+ *        output data to a file
+ */
+class PModelsRunner
+{
+public:
+  PModelsRunner() = default;
+
+public:
+  bool load_config(const std::string &filename);
+  void load_inputs(const std::string &input_prefix, int32_t num_inputs);
+  bool run(void);
+  void save_outputs(const std::string &output_file);
+
+private:
+  bool is_input_ready(const RunModel &model);
+
+private:
+  crew::PConfig _pconfig;
+  RunModels _models_to_run;
+  Buffers _data_stage;
+};
+
+} // namespace prunner
+
+#endif // __CIRCLE_PRUNNER_PMODELS_RUNNER_H__
diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt
new file mode 100644
index 000000000..04ad830d3
--- /dev/null
+++ b/compiler/circle-part-value-test/CMakeLists.txt
@@ -0,0 +1,113 @@
+#
+# this project validates partitioned models produced by circle-partitioner
+# with circle-part-driver and two scripts; part_eval_all.sh and part_eval_one.py
+#
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+unset(RECIPE_LIST)
+unset(PARTITION_LIST)
+unset(OUTPUT_COUNT_LIST)
+unset(TEST_DEPS)
+
+macro(add RECIPE_NAME PARTITION_NAME OUTPUT_COUNT)
+  list(APPEND RECIPE_LIST ${RECIPE_NAME})
+  list(APPEND PARTITION_LIST ${PARTITION_NAME})
+  list(APPEND OUTPUT_COUNT_LIST ${OUTPUT_COUNT})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+
+list(LENGTH RECIPE_LIST RECIPE_LENGTH)
+math(EXPR RECIPE_LENGTH_M1 "${RECIPE_LENGTH} - 1")
+
+foreach(IDX RANGE ${RECIPE_LENGTH_M1})
+  list(GET RECIPE_LIST ${IDX} RECIPE_NAME)
+  list(GET PARTITION_LIST ${IDX} PARTITION_NAME)
+  list(GET OUTPUT_COUNT_LIST ${IDX} OUTPUT_COUNT)
+
+  # NOTE about the name:
+  # Use '.recipe' name for source tflite and circle files
+  # Use '.part' name for actual test folder and test files
+
+  # Output to a folder
+  set(PARTITIONER_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PARTITION_NAME}")
+
+  add_custom_command(OUTPUT ${PARTITIONER_OUTPUT_PATH}
+    COMMAND ${CMAKE_COMMAND} -E make_directory "${PARTITIONER_OUTPUT_PATH}"
+    COMMENT "Make directory ${PARTITIONER_OUTPUT_PATH}"
+  )
+
+  # Copy tflite
+  set(TFLITE_SRC_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE_NAME}.tflite")
+  set(TFLITE_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.tflite")
+
+  add_custom_command(OUTPUT ${TFLITE_DST_PATH}
+    COMMAND ${CMAKE_COMMAND} -E copy "${TFLITE_SRC_PATH}" "${TFLITE_DST_PATH}"
+    DEPENDS ${TFLITE_SRC_PATH}
+    COMMENT "Copy ${RECIPE_NAME}.tflite"
+  )
+  list(APPEND TEST_DEPS ${TFLITE_DST_PATH})
+
+  # Copy circle
+  set(CIRCLE_SRC_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE_NAME}.circle")
+  set(CIRCLE_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.circle")
+
+  add_custom_command(OUTPUT ${CIRCLE_DST_PATH}
+    COMMAND ${CMAKE_COMMAND} -E copy "${CIRCLE_SRC_PATH}" "${CIRCLE_DST_PATH}"
+    DEPENDS ${CIRCLE_SRC_PATH}
+    COMMENT "Copy ${RECIPE_NAME}.circle"
+  )
+  list(APPEND TEST_DEPS ${CIRCLE_DST_PATH})
+
+  # Copy .part
+  set(PART_FILE "${PARTITION_NAME}.part")
+  set(PART_SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/parts/${PART_FILE}")
+  set(PART_DST_PATH "${PARTITIONER_OUTPUT_PATH}/${PART_FILE}")
+
+  add_custom_command(OUTPUT ${PART_DST_PATH}
+    COMMAND ${CMAKE_COMMAND} -E copy "${PART_SRC_PATH}" "${PART_DST_PATH}"
+    DEPENDS ${PART_SRC_PATH}
+    COMMENT "Copy ${PART_FILE}"
+  )
+  list(APPEND TEST_DEPS ${PART_DST_PATH})
+
+  # Partition connection file to generate
+  set(PARTITIONER_CONN_JSON "${PARTITIONER_OUTPUT_PATH}/${PARTITION_NAME}.conn.json")
+
+  # Run partitioner
+  add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON}
+    COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file"
+            "${PARTITION_NAME}.circle" "--work_path" "${PARTITIONER_OUTPUT_PATH}"
+    DEPENDS circle-partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
+    COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
+  )
+  list(APPEND TEST_DEPS ${PARTITIONER_CONN_JSON})
+
+  # Write .excnt file; expected count of output models
+  set(COUNT_FILE "${PARTITION_NAME}.excnt")
+  set(COUNT_FILE_PATH "${PARTITIONER_OUTPUT_PATH}/${COUNT_FILE}")
+  add_custom_command(OUTPUT ${COUNT_FILE_PATH}
+    COMMAND echo ${OUTPUT_COUNT} > ${COUNT_FILE_PATH}
+    DEPENDS ${PART_SRC_PATH} ${PARTITIONER_OUTPUT_PATH}
+    COMMENT "Write ${COUNT_FILE} with ${OUTPUT_COUNT}"
+  )
+  list(APPEND TEST_DEPS ${COUNT_FILE_PATH})
+endforeach(IDX)
+
+add_custom_target(circle_part_value_test_prepare ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle_part_value_test_prepare common_artifacts_deps)
+
+# run evaluation
+add_test(NAME circle_part_value_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
+          "${CMAKE_CURRENT_BINARY_DIR}"
+          "${NNCC_OVERLAY_DIR}/venv_2_12_1"
+          "$<TARGET_FILE:circle_part_driver>"
+          ${PARTITION_LIST}
+)
diff --git a/compiler/circle-part-value-test/README.md b/compiler/circle-part-value-test/README.md
new file mode 100644
index 000000000..6322b0791
--- /dev/null
+++ b/compiler/circle-part-value-test/README.md
@@ -0,0 +1,15 @@
+# circle-part-value-test
+
+_circle-part-value-test_ evaluates partitioned models produced by circle-partitioner.
+
+### Process of evaluation
+
+Evaluation process is like how _luci-value-test_ does.
+
+1) generates random input and stores to reference input file(s)
+2) executes tflite file from common-artifacts for reference output
+3) partitions circle file with .part file and produces into output folder
+4) executes produced partitioned circle models with reference input file(s)
+5) saves output(s) of circle models to file(s)
+6) compares reference output with saved output file(s)
+7) fail test if values differ
diff --git a/compiler/circle-part-value-test/part_eval_all.sh b/compiler/circle-part-value-test/part_eval_all.sh
new file mode 100755
index 000000000..ae8ae4731
--- /dev/null
+++ b/compiler/circle-part-value-test/part_eval_all.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of circle-partitioner
+#
+# HOW TO USE
+#
+# ./part_eval_all.sh <path/to/work_dir> <path/to/venv_dir> <path/to/driver> <TEST 1> <TEST 2> ...
+#
+#    bin_dir  : build directory of circle-part-value-test (ex: build/compiler/circle-part-value-test)
+#    work_dir : artifacts directoy where test materials exist
+#    venv_dir : python virtual environment home directory
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/part_eval_one.py"
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+CIRCLE_PART_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  # for simplicity, folder uses same ${TESTCASE}
+  TESTCASE_FOLDER="${WORKDIR}/${TESTCASE}"
+  
+  PASSED_TAG="${TESTCASE_FOLDER}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TESTCASE_FOLDER}.log" <(
+    exec 2>&1
+    set -ex
+
+    # chdir into the folder as ini has relative filename of the model
+    pushd ${TESTCASE_FOLDER}
+
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${CIRCLE_PART_DRIVER_PATH}" \
+    --name "${TESTCASE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+
+    popd
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/circle-part-value-test/part_eval_one.py b/compiler/circle-part-value-test/part_eval_one.py
new file mode 100755
index 000000000..44661c78b
--- /dev/null
+++ b/compiler/circle-part-value-test/part_eval_one.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+import numpy as np
+import tensorflow as tf
+import subprocess
+import argparse
+import traceback
+import json
+
+#
+# This script compares the execution result of TFLite interpreter and
+# partitioned model(s) from a circle model
+#
+# Basic usage for example:
+#   part_eval_one.py \
+#       --driver build/compiler/circle-part-driver/circle-part-driver \
+#       --name test_file
+#
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--name', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+tflite_model = args.name + ".tflite"
+circle_model = args.name + ".circle"
+partition_conn_ini = args.name + ".conn.ini"
+partition_conn_json = args.name + ".conn.json"
+expected_count = args.name + ".excnt"
+
+# Check expected count of models from partitioning
+try:
+    with open(expected_count, "r") as expected_count_file:
+        expected_count_line = expected_count_file.readline()
+
+    expected_count_line = int(expected_count_line)
+    if expected_count_line:
+        with open(partition_conn_json) as json_file:
+            json_data = json.load(json_file)
+            parts_value = json_data["parts"]
+            if len(parts_value) != expected_count_line:
+                print("Partitioned model count differs from expected:",
+                      expected_count_line)
+                quit(255)
+
+            print("Partitioned model count expected: ", expected_count_line)
+    else:
+        print("Skip expected partitioned model count check: 0")
+
+except:
+    print("Skip expected partitioned model count check: error")
+
+# Build TFLite interpreter.
+interpreter = tf.lite.Interpreter(tflite_model)
+interpreter.allocate_tensors()
+
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+    signature_serving_default = full_signatures.get('serving_default', None)
+    if signature_serving_default != None:
+        signature_outputs = signature_serving_default['outputs']
+
+        full_signatures_outputs_remap = []
+        for index, (key, value) in enumerate(signature_outputs.items()):
+            full_signatures_outputs_remap.append(value)
+
+# Generate random input data.
+num_inputs = len(interpreter.get_input_details())
+for i in range(num_inputs):
+    input_details = interpreter.get_input_details()[i]
+    input_details_dtype = input_details["dtype"]
+    input_details_shape = input_details["shape"]
+    if input_details_dtype == np.float32:
+        input_data = np.array(
+            np.random.random_sample(input_details_shape), input_details_dtype)
+    elif input_details_dtype == np.int16:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details_shape), input_details_dtype)
+    elif input_details_dtype == np.uint8:
+        input_data = np.array(
+            np.random.randint(0, 256, size=input_details_shape), input_details_dtype)
+    elif input_details_dtype == np.bool_:
+        input_data = np.array(
+            np.random.choice(a=[True, False], size=input_details_shape),
+            input_details_dtype)
+    else:
+        raise SystemExit("Unsupported input dtype")
+
+    interpreter.set_tensor(input_details["index"], input_data)
+    input_data.tofile(circle_model + ".input" + str(i))
+
+# Do inference
+interpreter.invoke()
+
+# Execute circle-part-driver.
+partition_command = [
+    driver, partition_conn_ini,
+    str(num_inputs), circle_model + ".input", circle_model + ".output"
+]
+print("Run: ")
+for arg in partition_command:
+    print("    ", arg, "\\")
+print("", flush=True)
+
+subprocess.run(partition_command, check=True)
+
+# Compare the results.
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+    output_details = inpt_output_details[idx]
+    output_dtype = output_details["dtype"]
+    output_data = np.fromfile(circle_model + ".output" + str(idx), output_dtype)
+    shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+    luci_output_data = np.reshape(output_data, output_shape)
+    output_tensor = output_details["index"]
+    if full_signatures_outputs_remap != None:
+        output_tensor = full_signatures_outputs_remap[idx]
+    intp_output_data = interpreter.get_tensor(output_tensor)
+    try:
+        if output_dtype == np.uint8:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.float32:
+            if np.allclose(
+                    luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int64:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int32:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int16:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        else:
+            raise SystemExit("Unsupported data type: ", output_dtype)
+    except:
+        print(traceback.format_exc())
+        quit(255)
+
+quit(0)
diff --git a/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.001.part b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.001.part
new file mode 100644
index 000000000..01b8c704e
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.002.part b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.002.part
new file mode 100644
index 000000000..dc378a448
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.002.part
@@ -0,0 +1,8 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SUB=acl_cl
+DIV=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.003.part b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.003.part
new file mode 100644
index 000000000..eee3fd1d1
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.003.part
@@ -0,0 +1,9 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opname
+
+[OPNAME]
+Mean_as_variance=acl_cl
+Add_as_variance=acl_cl
+Pow=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.part b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.part
new file mode 100644
index 000000000..d4d439d27
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_InstanceNorm_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+DIV=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part
new file mode 100644
index 000000000..496971e55
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=npu
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part
new file mode 100644
index 000000000..9913fea96
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+UNPACK=npu
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part
new file mode 100644
index 000000000..c63efc592
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+UNPACK=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sqrt_000.part b/compiler/circle-part-value-test/parts/Part_Add_Sqrt_000.part
new file mode 100644
index 000000000..402af87e9
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sqrt_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sqrt_Rsqrt_000.part b/compiler/circle-part-value-test/parts/Part_Add_Sqrt_Rsqrt_000.part
new file mode 100644
index 000000000..c6dba9f94
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sqrt_Rsqrt_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+RSQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sub_000.001.part b/compiler/circle-part-value-test/parts/Part_Add_Sub_000.001.part
new file mode 100644
index 000000000..179cad191
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sub_000.001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opname
+
+[OPNAME]
+add1=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sub_000.part b/compiler/circle-part-value-test/parts/Part_Add_Sub_000.part
new file mode 100644
index 000000000..905137ce7
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sub_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SUB=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sub_001.part b/compiler/circle-part-value-test/parts/Part_Add_Sub_001.part
new file mode 100644
index 000000000..41ce4b23d
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sub_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opname
+
+[OPNAME]
+some/node/add2;and/another=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sub_002.001.part b/compiler/circle-part-value-test/parts/Part_Add_Sub_002.001.part
new file mode 100644
index 000000000..030653e8a
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sub_002.001.part
@@ -0,0 +1,9 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opname
+
+[OPNAME]
+add1=cpu
+add2=acl_cl
+ofm=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Add_Sub_002.002.part b/compiler/circle-part-value-test/parts/Part_Add_Sub_002.002.part
new file mode 100644
index 000000000..837b36269
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Add_Sub_002.002.part
@@ -0,0 +1,9 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opname
+
+[OPNAME]
+add1=acl_cl
+add2=acl_cl
+ofm=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_If_Add_Sub_000.001.part b/compiler/circle-part-value-test/parts/Part_If_Add_Sub_000.001.part
new file mode 100644
index 000000000..01b8c704e
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_If_Add_Sub_000.001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_If_Add_Sub_001.001.part b/compiler/circle-part-value-test/parts/Part_If_Add_Sub_001.001.part
new file mode 100644
index 000000000..01b8c704e
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_If_Add_Sub_001.001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part
new file mode 100644
index 000000000..ad0842165
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+MUL=npu
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part
new file mode 100644
index 000000000..c82b741b0
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+SQRT=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part
new file mode 100644
index 000000000..d9d2a8e59
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Split_Add_000.part b/compiler/circle-part-value-test/parts/Part_Split_Add_000.part
new file mode 100644
index 000000000..91af566cd
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Split_Add_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+SPLIT=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_000.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_000.part
new file mode 100644
index 000000000..402af87e9
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_001.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_001.part
new file mode 100644
index 000000000..402af87e9
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_002.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_002.part
new file mode 100644
index 000000000..402af87e9
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_003.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_003.part
new file mode 100644
index 000000000..402af87e9
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_000.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_000.part
new file mode 100644
index 000000000..402af87e9
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_001.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_001.part
new file mode 100644
index 000000000..402af87e9
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_002.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_002.part
new file mode 100644
index 000000000..402af87e9
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+SQRT=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_003.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_003.part
new file mode 100644
index 000000000..0ec264c94
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+WWW=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_004.part b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_004.part
new file mode 100644
index 000000000..febab2246
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Sqrt_Rsqrt_Add_004.part
@@ -0,0 +1,6 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part
new file mode 100644
index 000000000..d4d439d27
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+DIV=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part
new file mode 100644
index 000000000..dbd174ee1
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+TANH=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part
new file mode 100644
index 000000000..475439a9d
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=npu
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part
new file mode 100644
index 000000000..d9d2a8e59
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_While_000.part b/compiler/circle-part-value-test/parts/Part_While_000.part
new file mode 100644
index 000000000..e469eeb26
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_While_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+MAXIMUM=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_While_001.part b/compiler/circle-part-value-test/parts/Part_While_001.part
new file mode 100644
index 000000000..e469eeb26
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_While_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+MAXIMUM=acl_cl
diff --git a/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part
new file mode 100644
index 000000000..e469eeb26
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+MAXIMUM=acl_cl
diff --git a/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part
new file mode 100644
index 000000000..e469eeb26
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+MAXIMUM=acl_cl
diff --git a/compiler/circle-part-value-test/requires.cmake b/compiler/circle-part-value-test/requires.cmake
new file mode 100644
index 000000000..a9301f947
--- /dev/null
+++ b/compiler/circle-part-value-test/requires.cmake
@@ -0,0 +1,3 @@
+require("common-artifacts")
+require("circle-partitioner")
+require("circle-part-driver")
diff --git a/compiler/circle-part-value-test/test.lst b/compiler/circle-part-value-test/test.lst
new file mode 100644
index 000000000..b7a3f403a
--- /dev/null
+++ b/compiler/circle-part-value-test/test.lst
@@ -0,0 +1,58 @@
+# Add recipe names from /res/TensorFlowLiteRecipes to test.
+# Only add items exist in common-artifacts test: tflite/circle files are copied as source.
+#
+# add(RECIPE_NAME PARTITION_NAME EXPECTED_OUTPUT_COUNT)
+#     EXPECTED_OUTPUT_COUNT: 0 for skip expected count test
+
+add(Part_Add_Sub_000 Part_Add_Sub_000 2)
+add(Part_Sqrt_Rsqrt_000 Part_Sqrt_Rsqrt_000 2)
+add(Part_Sqrt_Rsqrt_001 Part_Sqrt_Rsqrt_001 2)
+add(Part_Sqrt_Rsqrt_002 Part_Sqrt_Rsqrt_002 4)
+add(Part_Sqrt_Rsqrt_003 Part_Sqrt_Rsqrt_003 3)
+add(Part_Sqrt_Rsqrt_Add_000 Part_Sqrt_Rsqrt_Add_000 3)
+add(Part_Sqrt_Rsqrt_Add_001 Part_Sqrt_Rsqrt_Add_001 3)
+add(Part_Sqrt_Rsqrt_Add_002 Part_Sqrt_Rsqrt_Add_002 4)
+add(Part_Sqrt_Rsqrt_Add_003 Part_Sqrt_Rsqrt_Add_003 1)
+add(Part_Sqrt_Rsqrt_Add_004 Part_Sqrt_Rsqrt_Add_004 1)
+add(Part_Add_Sqrt_000 Part_Add_Sqrt_000 3)
+add(Part_Add_Sqrt_Rsqrt_000 Part_Add_Sqrt_Rsqrt_000 3)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003 3)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003.001 5)
+# skip expected count for now
+add(Net_InstanceNorm_003 Net_InstanceNorm_003.002 0)
+
+# comply=opname
+add(Part_Add_Sub_000 Part_Add_Sub_000.001 3)
+add(Part_Add_Sub_001 Part_Add_Sub_001 3)
+add(Part_Add_Sub_002 Part_Add_Sub_002.001 2)
+add(Part_Add_Sub_002 Part_Add_Sub_002.002 2)
+add(Net_InstanceNorm_003 Net_InstanceNorm_003.003 3)
+
+# IF with subgraphs
+add(Part_If_Add_Sub_000 Part_If_Add_Sub_000.001 3)
+add(Part_If_Add_Sub_001 Part_If_Add_Sub_001.001 3)
+
+# WHILE with subgraphs
+add(Part_While_000 Part_While_000 3)
+add(Part_While_001 Part_While_001 3)
+
+# UNPACK with multiple outputs
+add(Net_UnpackAdd_001 Net_UnpackAdd_001 2)
+add(Net_UnpackAdd_001 Net_UnpackAdd_001.001 2)
+add(Net_UnpackAdd_001 Net_UnpackAdd_001.002 2)
+
+# Other multiple outputs
+add(Part_Split_Add_000 Part_Split_Add_000 2)
+
+# test SignatureDef, with any OPCODE
+add(SignatureDef_MultiOut_000 SignatureDef_MultiOut_000 0)
+add(SignatureDef_MultiOut_001 SignatureDef_MultiOut_001 0)
+
+# FC with nobias
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias 1)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_001 2)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_002 2)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_003 2)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_000 0)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_001 0)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_002 0)
diff --git a/compiler/circle-partitioner-test/CMakeLists.txt b/compiler/circle-partitioner-test/CMakeLists.txt
new file mode 100644
index 000000000..7b26b3ba7
--- /dev/null
+++ b/compiler/circle-partitioner-test/CMakeLists.txt
@@ -0,0 +1,72 @@
+# NOTE Test below are for circle-partitioner is partitioning itself.
+#      Once this test passes, add partition to 'circle-part-value-test' for
+#      full test.
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+unset(RECIPE_LIST)
+unset(PART_LIST)
+unset(TEST_DEPS)
+
+macro(add RECIPE_NAME PART_NAME)
+  list(APPEND RECIPE_LIST ${RECIPE_NAME})
+  list(APPEND PART_LIST ${PART_NAME})
+endmacro(add)
+
+# Read "test.lst"
+include("test.lst")
+
+list(LENGTH RECIPE_LIST RECIPE_LENGTH)
+math(EXPR RECIPE_LENGTH_M1 "${RECIPE_LENGTH} - 1")
+
+foreach(IDX RANGE ${RECIPE_LENGTH_M1})
+  list(GET RECIPE_LIST ${IDX} RECIPE_NAME)
+  list(GET PART_LIST ${IDX} PART_NAME)
+
+  set(PART_OUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PART_NAME}")
+
+  add_custom_command(OUTPUT ${PART_OUT_PATH}
+    COMMAND ${CMAKE_COMMAND} -E make_directory "${PART_OUT_PATH}"
+    COMMENT "Make directory ${PART_OUT_PATH}"
+  )
+
+  set(CIRCLE_SRC_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE_NAME}.circle")
+  set(CIRCLE_DST_PATH "${PART_OUT_PATH}/${PART_NAME}.circle")
+
+  # Copy circle
+  add_custom_command(OUTPUT ${CIRCLE_DST_PATH}
+    COMMAND ${CMAKE_COMMAND} -E copy "${CIRCLE_SRC_PATH}" "${CIRCLE_DST_PATH}"
+    DEPENDS ${CIRCLE_SRC_PATH}
+    COMMENT "Copy ${RECIPE_NAME}.circle"
+  )
+
+  set(PART_FILE "${PART_NAME}.part")
+  set(PART_SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/parts/${PART_FILE}")
+  set(PART_DST_PATH "${PART_OUT_PATH}/${PART_FILE}")
+
+  # Copy .part
+  add_custom_command(OUTPUT ${PART_DST_PATH}
+    COMMAND ${CMAKE_COMMAND} -E copy "${PART_SRC_PATH}" "${PART_DST_PATH}"
+    DEPENDS ${PART_SRC_PATH}
+    COMMENT "Copy ${PART_FILE}"
+  )
+
+  # Run partitioner
+  set(PART_CONN_JSON "${PART_OUT_PATH}/${PART_NAME}.conn.json")
+  add_custom_command(OUTPUT ${PART_CONN_JSON}
+    COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file"
+            "${PART_NAME}.circle" "--work_path" "${PART_OUT_PATH}"
+    DEPENDS circle-partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH}
+    COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
+  )
+  # NOTE this is checked in build time and not added with 'add_test' command
+  # to reduce scripts to run testing. actual testing is done in 'circle-part-evel'
+
+  list(APPEND TEST_DEPS ${CIRCLE_DST_PATH} ${PART_DST_PATH} ${PART_CONN_JSON})
+endforeach(IDX)
+
+add_custom_target(circle_partitioner_test ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle_partitioner_test common_artifacts_deps)
diff --git a/compiler/circle-partitioner-test/README.md b/compiler/circle-partitioner-test/README.md
new file mode 100644
index 000000000..46da98651
--- /dev/null
+++ b/compiler/circle-partitioner-test/README.md
@@ -0,0 +1,11 @@
+# circle-partitioner-test
+
+_circle-partitioner-test_ provides test of circle-partitioner;
+to test partitioning is working correctly, without value testing.
+- full value testing is done with _circle-part-value-test_.
+
+Purpose of this test is to check how the partitioning itself is done
+before value testing, in local. After you've checked model partitioning is
+working as you expect, you can add test to _circle-part-value-test_.
+
+It is not necessary to commit to test of this module to upstream.
diff --git a/compiler/circle-partitioner-test/parts/Net_InstanceNorm_003.part b/compiler/circle-partitioner-test/parts/Net_InstanceNorm_003.part
new file mode 100644
index 000000000..d4d439d27
--- /dev/null
+++ b/compiler/circle-partitioner-test/parts/Net_InstanceNorm_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+DIV=acl_cl
diff --git a/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part b/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part
new file mode 100644
index 000000000..01b8c704e
--- /dev/null
+++ b/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
diff --git a/compiler/circle-partitioner-test/requires.cmake b/compiler/circle-partitioner-test/requires.cmake
new file mode 100644
index 000000000..52685cf67
--- /dev/null
+++ b/compiler/circle-partitioner-test/requires.cmake
@@ -0,0 +1,2 @@
+require("circle-partitioner")
+require("common-artifacts")
diff --git a/compiler/circle-partitioner-test/test.lst b/compiler/circle-partitioner-test/test.lst
new file mode 100644
index 000000000..c0c185c7e
--- /dev/null
+++ b/compiler/circle-partitioner-test/test.lst
@@ -0,0 +1,11 @@
+# Add recipes in /res/TensorFlowLiteRecipes to test.
+# NOTE: only add items exist in common-artifacts test: circle files are copied
+#       from common-artifacts.
+#       Use this list file before end-to-end test in 'circle-part-value-test'.
+# add(RECIPE_NAME PART_NAME)
+
+add(Net_InstanceNorm_003 Net_InstanceNorm_003)
+
+# NOTE SVDF partition test is done here as value test may need custom tolerance
+# TODO move Part_Add_SVDF_000 to circle-part-value-test when ready
+add(Part_Add_SVDF_000 Part_Add_SVDF_000)
diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt
new file mode 100644
index 000000000..abc5d93fb
--- /dev/null
+++ b/compiler/circle-partitioner/CMakeLists.txt
@@ -0,0 +1,18 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circle-partitioner "${SOURCES}")
+target_link_libraries(circle-partitioner crew)
+target_link_libraries(circle-partitioner safemain)
+target_link_libraries(circle-partitioner luci_lang)
+target_link_libraries(circle-partitioner luci_log)
+target_link_libraries(circle-partitioner luci_import)
+target_link_libraries(circle-partitioner luci_service)
+target_link_libraries(circle-partitioner luci_pass)
+target_link_libraries(circle-partitioner luci_export)
+target_link_libraries(circle-partitioner luci_partition)
+target_link_libraries(circle-partitioner arser)
+target_link_libraries(circle-partitioner pepper_csv2vec)
+target_link_libraries(circle-partitioner vconone)
+target_link_libraries(circle-partitioner nncc_common)
+
+install(TARGETS circle-partitioner DESTINATION bin)
diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md
new file mode 100644
index 000000000..760cf28d1
--- /dev/null
+++ b/compiler/circle-partitioner/README.md
@@ -0,0 +1,294 @@
+# circle-partitioner
+
+_circle-partitioner_ provides model partitioning of circle model to two or more circle models.
+
+## How circle-partitioner work
+
+_circle-partitioner_ requires 3 arguments for inputs files
+- `--part_file`: `partition` file, use extension `.part`
+- `--input_file`: `input` circle model file
+- `--work_path`: `work` path where input files reside. this is optional and CWD if omitted
+
+And options to override `partition` file as a helper to try out without editing `partition` file.
+- `--backends`: override `backends` of `[partition]` section
+- `--default`: override `default` of `[partition]` section
+
+_circle-partitoner_ will read the `partition` and `input` files and group nodes with same backend
+and store them into new circle models in `work` folder, where the `partition` and `input` files
+are read from `work` folder.
+
+Outputs are (1) one or more partitioned circle models and (2) connection file that gives how
+the partitioned models should be connected to act like the source `input` model.
+
+Why does input files be placed in `work` path too?
+- this is still work in progress condition
+- use cases are still ambigious
+- original `input` model file can be used by the backend, so `.conn` file links it as `source`
+- to make things simple for the backend, it would be better not to use relative path for the files
+
+### `partition` file
+
+`partition` follows INI format of _crew_ project.
+
+Several example files exist in _circle-part-value-test_ `parts` folder.
+
+This section will explain with `Net_InstanceNorm_003.part` file as example.
+```ini
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+DIV=acl_cl
+```
+
+##### `[partition]` section
+
+`[partition]` section is the main section first to read.
+- `backends`: Existing partition group names which nodes should be placed, in CSV format.
+- `default`: Default group name which should be one of `backends` item.
+- `comply`: How to group nodes of the model.
+   - currently `opcode` and `opname` are supported
+   - future work: set group by sequence number.
+
+##### `[OPCODE`] section
+
+This section provides how to group nodes in OPCODE types.
+Nodes with same OPCODE will be grouped to that type.
+This does not mean number of output circle files will be same as number of backends.
+Number of output circle files will depend also on the network structure.
+
+For above example, all `DIV` OPCODE nodes will be grouped to `acl_cl` backend.
+
+`[OPCODE]` can override `default` backend set from `[partition]` section by using `_`.
+
+For example, we can change default to `cpu`.
+```
+[OPCODE]
+_=cpu
+DIV=acl_cl
+```
+
+### `circle` file
+
+Just normal `circle` file. Currently partition is supported in limited properties and
+models with these properties are not support yet;
+- Have multiple subgraph models
+- Operators with multiple output nodes such as IF or WHILE.
+
+### `work` folder
+
+`partition` and `circle` file should reside in `work` folder. Output files will be
+generated inside this folder.
+
+### Example
+
+Typical source of paritioning
+```
+$ tree Net_InstanceNorm_003/
+Net_InstanceNorm_003/
+├── Net_InstanceNorm_003.circle
+└── Net_InstanceNorm_003.part
+```
+
+Command example
+```
+./circle-partitioner --part_file Net_InstanceNorm_003.part \
+--input_file Net_InstanceNorm_003.circle --work_path= Net_InstanceNorm_003
+```
+
+Result of _circle-partitioner_
+```
+$ tree Net_InstanceNorm_003/
+Net_InstanceNorm_003/
+├── Net_InstanceNorm_003.00001_cpu.circle
+├── Net_InstanceNorm_003.00002_acl_cl.circle
+├── Net_InstanceNorm_003.00003_cpu.circle
+├── Net_InstanceNorm_003.circle
+├── Net_InstanceNorm_003.conn.ini
+├── Net_InstanceNorm_003.conn.json
+└── Net_InstanceNorm_003.part
+```
+
+### `Net_InstanceNorm_003.conn.ini` and `Net_InstanceNorm_003.conn.json`
+
+These two files are identical in content but in different formats.
+
+`.conn` file provides an information how to reconstruct the partitioned models,
+`Net_InstanceNorm_003.00001_cpu.circle`, `Net_InstanceNorm_003.00002_acl_cl.circle`
+and `Net_InstanceNorm_003.00003_cpu.circle`, so that it will identical to
+source `Net_InstanceNorm_003.circle` model in computational results.
+
+Here, meaning of `reconstruct` is connection of outputs and inputs of partitioned
+models.
+
+```json
+$ cat Net_InstanceNorm_003/Net_InstanceNorm_003.conn.json
+{
+  "source" : {
+    "file" : "Net_InstanceNorm_003.circle",
+    "inputs" : [ "Input" ],
+    "outputs" : [ "Add_as_terminal" ]
+  },
+  "parts" : [
+    {
+      "file" : "Net_InstanceNorm_003.00001_cpu.circle",
+      "inputs" : [ "Input" ],
+      "outputs" : [ "Pow", "Sub" ]
+    },
+    {
+      "file" : "Net_InstanceNorm_003.00002_acl_cl.circle",
+      "inputs" : [ "Sub", "Pow" ],
+      "outputs" : [ "Div" ]
+    },
+    {
+      "file" : "Net_InstanceNorm_003.00003_cpu.circle",
+      "inputs" : [ "Div" ],
+      "outputs" : [ "Add_as_terminal" ]
+    }
+  ]
+}
+```
+Above file is in `JSON` format with `source` file and `parts` for partitioned models.
+Each `parts` have `file` for the file, `inputs` for input nodes and `outputs`
+for output nodes.
+
+From the `source` we can identify inputs and outputs for the model.
+
+- Each items in `outputs` should connect to `inputs` of another item of `parts` model,
+or should be one of the `outputs` of the `source` model.
+- For first `Net_InstanceNorm_003.00001_cpu.circle` model, `inputs` is(are) same
+as the `source` model: `[ "Input" ]`.
+- `outputs` `[ "Pow", "Sub" ]` have same names in the second model
+`Net_InstanceNorm_003.00002_acl_cl.circle` which they should be connected.
+- And `outputs` `[ "Div" ]` should be connected to `inputs` of
+third model `Net_InstanceNorm_003.00003_cpu.circle`.
+
+### Execution example
+
+Consider partitioning with backends of OneRT
+- `cpu`, `acl_cl`, `acl_neon`, `ruy`, `xnnpack`
+
+Let's try with this command:
+```
+circle-partitioner \
+   --backends cpu,acl_cl --default cpu \
+   --part_file Net_InstanceNorm_003.part \
+   --input_file Net_InstanceNorm_003.circle \
+   --work_path Net_InstanceNorm_003
+```
+
+where `Net_InstanceNorm_003.part` is like this for initial design
+```
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
+```
+where in `[partition]` section,
+- `backends` is available backends and can be overridden by `--backends`
+- `default` is default backend for OpCodes not assigned in `[OPCODE]` section can be overridden by `--default`
+- `comply` is which rule to apply, where only `opcode` is available for now
+
+#### Use Op name to assign backend
+
+```
+[OP]
+Reduction_indices=GPU
+```
+- there are very long names that may be inconvenient
+
+### Partitioned output
+
+#### Output files
+
+After partition is applied, output files will look something like these
+- `Net_InstanceNorm_003.part.00001_cpu.circle`
+- `Net_InstanceNorm_003.part.00002_acl_cl.circle`
+- `Net_InstanceNorm_003.part.00003_cpu.circle`
+- `Net_InstanceNorm_003.part.conn.ini`
+- `Net_InstanceNorm_003.part.conn.json`
+
+Assume only `Div` node is assigned to `acl_cl`
+
+#### Connection information of partitioned circle files
+
+##### Format with ini
+- `Net_InstanceNorm_003.conn.ini` provides connection of each circle files.
+```
+[source]
+file=Net_InstanceNorm_003.circle
+i1=Input
+o1=Add_as_terminal
+
+[models]
+m1=Net_InstanceNorm_003.part.00001_cpu.circle
+m2=Net_InstanceNorm_003.part.00002_acl_cl.circle
+m3=Net_InstanceNorm_003.part.00003_cpu.circle
+
+[Net_InstanceNorm_003.part.00001_cpu.circle]
+file=Net_InstanceNorm_003.part.00001_cpu.circle
+i1=Input
+o1=Pow
+o2=Sub
+
+[Net_InstanceNorm_003.part.00002_acl_cl.circle]
+file=Net_InstanceNorm_003.part.00002_acl_cl.circle
+i1=Sub
+i2=Pow
+o1=Div
+
+[Net_InstanceNorm_003.part.00003_cpu.circle]
+file=Net_InstanceNorm_003.part.00003_cpu.circle
+i1=Div
+o1=Add_as_terminal
+```
+
+Predefined section
+- `source`: Source circle model information. Has `file` as filename, `iN` for inputs and `oN` for outputs.
+- `models`: Partitioned circle models. Has `mN` for model filename.
+
+Partitioned Model section
+- `iN`: inputs of this model
+- `oN`: outputs of this model
+
+In graph diagram, output order of `Net_InstanceNorm_003.part.00001_cpu.circle`
+looks like `Pow,Sub` but `Div` Op in `Net_InstanceNorm_003.part.00002_acl_cl.circle`
+requires order of `Sub,Pow`.
+
+##### Format with JSON
+- Use JSON format, `Net_InstanceNorm_003.part.conn.json`
+```json
+{
+  "source" : {
+    "file" : "Net_InstanceNorm_003.circle",
+    "inputs" : [ "Input" ],
+    "outputs" : [ "Add_as_terminal" ]
+  },
+  "parts" : [
+    {
+      "file" : "Net_InstanceNorm_003.part.00001_cpu.circle",
+      "inputs" : [ "Input" ],
+      "outputs" : [ "Pow", "Sub" ],
+    },
+    {
+      "file" : "Net_InstanceNorm_003.part.00002_acl_cl.circle",
+      "inputs" : [ "Pow", "Sub" ],
+      "outputs" : [ "Div" ]
+    },
+    {
+      "file" : "Net_InstanceNorm_003.part.00003_cpu.circle",
+      "inputs" : [ "Div" ],
+      "outputs" : [ "Add_as_terminal" ]
+    }
+  ]
+}
+```
+
+### Future works
+
+How to partition with multiple inputs?
diff --git a/compiler/circle-partitioner/requires.cmake b/compiler/circle-partitioner/requires.cmake
new file mode 100644
index 000000000..82d9c2b0f
--- /dev/null
+++ b/compiler/circle-partitioner/requires.cmake
@@ -0,0 +1,6 @@
+require("crew")
+require("pepper-csv2vec")
+require("safemain")
+require("luci")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-partitioner/src/CirclePartitioner.cpp b/compiler/circle-partitioner/src/CirclePartitioner.cpp
new file mode 100644
index 000000000..5cecb9ae0
--- /dev/null
+++ b/compiler/circle-partitioner/src/CirclePartitioner.cpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionRead.h"
+#include "PartitionExport.h"
+#include "HelperPath.h"
+
+#include <luci/ImporterEx.h>
+#include <luci/Service/Validate.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+#include <luci/CircleOptimizer.h>
+#include <luci/PartitionDump.h>
+#include <luci/PartitionValidate.h>
+#include <luci/Log.h>
+
+#include <pepper/csv2vec.h>
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+#include <iostream>
+#include <string>
+
+namespace
+{
+
+const char *opt_bks = "--backends";
+const char *opt_def = "--default";
+const char *opt_part_file = "--part_file";
+const char *opt_input_file = "--input_file";
+const char *opt_work_path = "--work_path";
+
+void print_version(void)
+{
+  std::cout << "circle-partitioner version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+void build_arser(arser::Arser &arser)
+{
+  arser::Helper::add_version(arser, print_version);
+
+  arser.add_argument(opt_bks).help("Backends in CSV to use for partitioning");
+
+  arser.add_argument(opt_def).help("Default backend to assign");
+
+  arser.add_argument(opt_part_file)
+    .required(true)
+    .help("Partition file which provides backend to assign");
+  arser.add_argument(opt_input_file).required(true).help("Input circle model filename");
+  arser.add_argument(opt_work_path)
+    .help("Work folder of partition, input files exist and output files are produced");
+}
+
+std::unique_ptr<luci::Module> load_model(const std::string &input_path)
+{
+  // Import from input Circle file
+  luci::ImporterEx importerex;
+  return importerex.importVerifyModule(input_path);
+}
+
+} // namespace
+
+int entry(int argc, char **argv)
+{
+  LOGGER(l);
+
+  arser::Arser arser("circle-partitioner provides circle model partitioning");
+
+  build_arser(arser);
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cerr << arser;
+    return EXIT_FAILURE;
+  }
+
+  std::string partition_file = arser.get<std::string>(opt_part_file);
+  std::string input_file = arser.get<std::string>(opt_input_file);
+  std::string work_folder = ".";
+
+  if (arser[opt_work_path])
+  {
+    work_folder = arser.get<std::string>(opt_work_path);
+  }
+
+  std::string partition_path = work_folder + "/" + partition_file;
+  std::string input_path = work_folder + "/" + input_file;
+
+  auto module = load_model(input_path);
+  if (module.get() == nullptr)
+  {
+    return EXIT_FAILURE;
+  }
+  // Run default shape/dtype inference before validation
+  // NOTE CircleWhileOut default shape is INVALID as it needs initial shape
+  //      inference. This is cause of WHILE may have dynamic shape.
+  luci::CircleOptimizer optimizer;
+  (void)optimizer.options(); // need to call this to make internal member
+  for (size_t g = 0; g < module->size(); ++g)
+  {
+    auto graph = module->graph(g);
+    optimizer.optimize(graph);
+  }
+  if (!luci::validate(module.get()))
+  {
+    return EXIT_FAILURE;
+  }
+
+  // Read partition information
+  INFO(l) << "--- Read PartitionConfig-----------------------" << std::endl;
+  auto partition = partee::read(partition_path);
+  INFO(l) << partition << std::endl;
+
+  // override with command line arguments
+  {
+    if (arser[opt_bks])
+    {
+      auto backend_backends = arser.get<std::string>(opt_bks);
+      partition.groups = pepper::csv_to_vector<std::string>(backend_backends);
+    }
+    if (arser[opt_def])
+    {
+      partition.default_group = arser.get<std::string>(opt_def);
+    }
+  }
+  if (!luci::validate(partition))
+  {
+    // NOTE error reason/message is put to std::cerr inside validate()
+    return EXIT_FAILURE;
+  }
+
+  INFO(l) << "--- PartitionConfig final----------------------" << std::endl;
+  INFO(l) << partition << std::endl;
+
+  // apply partition to module
+  auto pms = luci::apply(module.get(), partition);
+
+  // validate partitioned modules
+  for (auto &pmodule : pms.pmodules)
+  {
+    for (size_t g = 0; g < pmodule.module->size(); ++g)
+    {
+      auto graph = pmodule.module->graph(g);
+      if (graph == nullptr)
+      {
+        std::cerr << "ERROR: Failed to create partition model" << std::endl;
+        return EXIT_FAILURE;
+      }
+      if (!luci::validate(graph))
+      {
+        std::cerr << "ERROR: Failed to create partition model" << std::endl;
+        return EXIT_FAILURE;
+      }
+    }
+  }
+
+  INFO(l) << "--- Partition Export---------------------------" << std::endl;
+  uint32_t idx = 1;
+  for (auto &pmodule : pms.pmodules)
+  {
+    // Export to output circle file
+    luci::CircleExporter exporter;
+
+    auto output_path = partee::make_path(work_folder, input_path, idx, pmodule.group);
+    pmodule.name = partee::get_filename_ext(output_path);
+    INFO(l) << "--- " << output_path << ": " << pmodule.name << std::endl;
+
+    luci::CircleFileExpContract contract(pmodule.module.get(), output_path);
+    if (!exporter.invoke(&contract))
+    {
+      std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+      return EXIT_FAILURE;
+    }
+    idx++;
+  }
+
+  INFO(l) << "--- Partition connection information-----------" << std::endl;
+  if (!partee::export_part_conn_json(work_folder, input_file, module.get(), pms))
+  {
+    return EXIT_FAILURE;
+  }
+  if (!partee::export_part_conn_ini(work_folder, input_file, module.get(), pms))
+  {
+    return EXIT_FAILURE;
+  }
+
+  INFO(l) << "--- Partition done-----------------------------" << std::endl << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-partitioner/src/HelperPath.cpp b/compiler/circle-partitioner/src/HelperPath.cpp
new file mode 100644
index 000000000..fc4bb2c70
--- /dev/null
+++ b/compiler/circle-partitioner/src/HelperPath.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HelperPath.h"
+
+#include <cassert>
+#include <sstream>
+#include <stdlib.h>
+
+namespace partee
+{
+
+bool make_dir(const std::string &path)
+{
+  std::string command("mkdir -p ");
+  command += path;
+  int ret = ::system(command.c_str());
+  return ret == 0;
+}
+
+std::string get_filename_ext(const std::string &base)
+{
+  // find last '/' to get filename.ext
+  auto pos = base.find_last_of("/");
+  if (pos == std::string::npos)
+    return base;
+
+  return base.substr(pos + 1);
+}
+
+std::string make_path(const std::string &base, const std::string &input, uint32_t idx,
+                      const std::string &backend)
+{
+  auto filename_ext = get_filename_ext(input);
+
+  // We will assume file type .circle if not given
+  // TODO maybe throw if there is no extension?
+  std::string filename = filename_ext;
+  std::string ext = "circle";
+
+  auto pos = filename_ext.find_last_of(".");
+  if (pos != std::string::npos)
+  {
+    filename = filename_ext.substr(0, pos);
+    ext = filename_ext.substr(pos + 1);
+  }
+
+  // format idx with 5 '0' paddings like '00123'
+  uint32_t length = 5;
+  auto seq = std::string(length, '0').append(std::to_string(idx));
+  auto seq_fmt = seq.substr(seq.size() - length);
+
+  return base + "/" + filename + "." + seq_fmt + "_" + backend + "." + ext;
+}
+
+} // namespace partee
diff --git a/compiler/circle-partitioner/src/HelperPath.h b/compiler/circle-partitioner/src/HelperPath.h
new file mode 100644
index 000000000..e38e3a903
--- /dev/null
+++ b/compiler/circle-partitioner/src/HelperPath.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_HELPER_PATH_H__
+#define __CIRCLE_HELPER_PATH_H__
+
+#include <string>
+
+namespace partee
+{
+
+/**
+ * @brief create folder
+ */
+bool make_dir(const std::string &path);
+
+/**
+ * @brief get filename part of base
+ */
+std::string get_filename_ext(const std::string &base);
+
+/**
+ * @brief Make file path from base and backend
+ */
+std::string make_path(const std::string &base, const std::string &input, uint32_t idx,
+                      const std::string &backend);
+
+} // namespace partee
+
+#endif // __CIRCLE_HELPER_PATH_H__
diff --git a/compiler/circle-partitioner/src/PartitionExport.cpp b/compiler/circle-partitioner/src/PartitionExport.cpp
new file mode 100644
index 000000000..a61451d66
--- /dev/null
+++ b/compiler/circle-partitioner/src/PartitionExport.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionExport.h"
+#include "HelperPath.h"
+
+#include <crew/PConfig.h>
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::string export_file_path(const std::string &output_base, const std::string &input,
+                             const std::string &ext)
+{
+  auto filename_ext = partee::get_filename_ext(input);
+  auto pos = filename_ext.find_last_of(".");
+  assert(pos > 0);
+  auto filename = filename_ext.substr(0, pos);
+  auto filepath = output_base + "/" + filename + ".conn" + ext;
+  return filepath;
+}
+
+} // namespace
+
+namespace
+{
+
+void graph_io_to_config_part(loco::Graph *graph, crew::Part &part)
+{
+  assert(graph != nullptr);
+
+  auto *gis = graph->inputs();
+  auto *gos = graph->outputs();
+  for (uint32_t i = 0; i < gis->size(); ++i)
+  {
+    auto *gi = gis->at(i);
+    assert(gi != nullptr);
+    part.inputs.push_back(gi->name());
+  }
+  for (uint32_t i = 0; i < gos->size(); ++i)
+  {
+    auto *go = gos->at(i);
+    assert(go != nullptr);
+    part.outputs.push_back(go->name());
+  }
+}
+
+void pms2config(const luci::PartedModules &pms, crew::PConfig &pconfig)
+{
+  for (auto &pmodule : pms.pmodules)
+  {
+    auto *graph = pmodule.module->graph();
+
+    crew::Part part;
+    part.model_file = pmodule.name;
+    graph_io_to_config_part(graph, part);
+
+    pconfig.parts.push_back(part);
+  }
+}
+
+} // namespace
+
+namespace partee
+{
+
+bool export_part_conn_json(const std::string &output_base, const std::string &input,
+                           const luci::Module *source, luci::PartedModules &pms)
+{
+  crew::PConfig pconfig;
+
+  // TODO is graph I/O using main graph is enough?
+  auto *graph = source->graph();
+
+  pconfig.source.model_file = input;
+  graph_io_to_config_part(graph, pconfig.source);
+
+  pms2config(pms, pconfig);
+
+  auto filepath_json = export_file_path(output_base, input, ".json");
+  std::ofstream fs(filepath_json.c_str(), std::ofstream::binary | std::ofstream::trunc);
+  if (not fs.good())
+  {
+    std::cerr << "ERROR: Failed to create file: " << filepath_json;
+    return false;
+  }
+  if (not write_json(fs, pconfig))
+  {
+    std::cerr << "ERROR: Failed to write json file: " << filepath_json;
+    return false;
+  }
+  fs.close();
+
+  return true;
+}
+
+bool export_part_conn_ini(const std::string &output_base, const std::string &input,
+                          const luci::Module *source, luci::PartedModules &pms)
+{
+  crew::PConfig pconfig;
+
+  // TODO is graph I/O using main graph is enough?
+  auto *graph = source->graph();
+
+  pconfig.source.model_file = input;
+  graph_io_to_config_part(graph, pconfig.source);
+
+  pms2config(pms, pconfig);
+
+  auto filepath_ini = export_file_path(output_base, input, ".ini");
+  std::ofstream fs(filepath_ini.c_str(), std::ofstream::binary | std::ofstream::trunc);
+  if (not fs.good())
+  {
+    std::cerr << "ERROR: Failed to create file: " << filepath_ini;
+    return false;
+  }
+  if (not write_ini(fs, pconfig))
+  {
+    std::cerr << "ERROR: Failed to write ini file: " << filepath_ini;
+    return false;
+  }
+  fs.close();
+
+  return true;
+}
+
+} // namespace partee
diff --git a/compiler/circle-partitioner/src/PartitionExport.h b/compiler/circle-partitioner/src/PartitionExport.h
new file mode 100644
index 000000000..fd287dcd3
--- /dev/null
+++ b/compiler/circle-partitioner/src/PartitionExport.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PARTITION_EXPORT_H__
+#define __CIRCLE_PARTITION_EXPORT_H__
+
+#include <luci/Partition.h>
+
+#include <string>
+
+namespace partee
+{
+
+/**
+ * @brief This will save partition connection to json format file
+ */
+bool export_part_conn_json(const std::string &output_base, const std::string &input,
+                           const luci::Module *source, luci::PartedModules &pms);
+
+/**
+ * @brief This will save partition connection to ini format file
+ */
+bool export_part_conn_ini(const std::string &output_base, const std::string &input,
+                          const luci::Module *source, luci::PartedModules &pms);
+
+} // namespace partee
+
+#endif // __CIRCLE_PARTITION_EXPORT_H__
diff --git a/compiler/circle-partitioner/src/PartitionRead.cpp b/compiler/circle-partitioner/src/PartitionRead.cpp
new file mode 100644
index 000000000..1bfb2d309
--- /dev/null
+++ b/compiler/circle-partitioner/src/PartitionRead.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionRead.h"
+
+#include <crew/PConfigIni.h>
+#include <crew/PConfigIniDump.h>
+#include <luci/Log.h>
+#include <pepper/csv2vec.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+using namespace partee;
+
+const char *_section_partition = "partition";
+const char *_section_OPCODE = "OPCODE";
+const char *_section_OPNAME = "OPNAME";
+
+const char *_comply_opcode = "opcode";
+const char *_comply_opname = "opname";
+
+const char *_key_backends = "backends";
+const char *_key_default = "default";
+const char *_key_comply = "comply";
+const char *_key_underscore = "_";
+
+luci::PartitionTable parse_table(const crew::Sections &sections)
+{
+  luci::PartitionTable table;
+
+  // default comply as OPCODE
+  table.comply = luci::PartitionTable::COMPLY::OPCODE;
+
+  // read main '[partition]' first
+  for (auto &section : sections)
+  {
+    if (section.name == _section_partition)
+    {
+      auto &items = section.items;
+      if (items.find(_key_backends) == items.end())
+      {
+        throw std::invalid_argument("'backends' is required");
+      }
+      if (items.find(_key_default) == items.end())
+      {
+        throw std::invalid_argument("'default' is required");
+      }
+
+      table.groups = pepper::csv_to_vector<std::string>(items.at(_key_backends));
+      table.default_group = items.at(_key_default);
+
+      auto comply = items.at(_key_comply);
+
+      // check valid comply types
+      if (comply == _comply_opcode)
+      {
+        table.comply = luci::PartitionTable::COMPLY::OPCODE;
+        continue;
+      }
+      if (comply == _comply_opname)
+      {
+        table.comply = luci::PartitionTable::COMPLY::OPNAME;
+        continue;
+      }
+      throw std::runtime_error("Invalid or comply is not set");
+    }
+  }
+
+  // read other sections
+  for (auto &section : sections)
+  {
+    if (section.name == _section_OPCODE)
+    {
+      auto &items = section.items;
+
+      for (auto &item : items)
+      {
+        if (item.first == _key_underscore)
+        {
+          if (table.comply == luci::PartitionTable::COMPLY::OPCODE)
+            table.default_group = item.second;
+        }
+        else
+        {
+          table.byopcodes.emplace(item.first, item.second);
+        }
+      }
+    }
+    else if (section.name == _section_OPNAME)
+    {
+      auto &items = section.items;
+
+      for (auto &item : items)
+      {
+        if (item.first == _key_underscore)
+        {
+          if (table.comply == luci::PartitionTable::COMPLY::OPNAME)
+            table.default_group = item.second;
+        }
+        else
+        {
+          table.byopnames.emplace(item.first, item.second);
+        }
+      }
+    }
+  }
+
+  return table;
+}
+
+} // namespace
+
+namespace partee
+{
+
+luci::PartitionTable read(const std::string &path)
+{
+  LOGGER(l);
+
+  INFO(l) << "PartitionConfig: " << path << std::endl;
+
+  auto partition_config = crew::read_ini(path);
+
+  INFO(l) << partition_config << std::endl;
+
+  auto partition_table = parse_table(partition_config);
+
+  return partition_table;
+}
+
+} // namespace partee
diff --git a/compiler/circle-partitioner/src/PartitionRead.h b/compiler/circle-partitioner/src/PartitionRead.h
new file mode 100644
index 000000000..9b07b328b
--- /dev/null
+++ b/compiler/circle-partitioner/src/PartitionRead.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_PARTITION_READ_H__
+#define __CIRCLE_PARTITION_READ_H__
+
+#include <luci/IR/Module.h>
+#include <luci/Partition.h>
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace partee
+{
+
+/**
+ * @brief Reads and parse file and return PartitionTable
+ */
+luci::PartitionTable read(const std::string &path);
+
+} // namespace partee
+
+#endif // __CIRCLE_PARTITION_READ_H__
diff --git a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
new file mode 100644
index 000000000..adb2c0f2f
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
@@ -0,0 +1,217 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS circle-inspect)
+list(APPEND REQUIRED_TARGETS circle-verify)
+list(APPEND REQUIRED_TARGETS circle-quantizer)
+list(APPEND REQUIRED_TARGETS record-minmax)
+list(APPEND REQUIRED_TARGETS dredd_rule_lib)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+unset(TEST_DEPS)
+unset(TEST_NAMES)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+set(options USE_QCONFIG)
+set(oneValueArgs DTYPE GRANULARITY INPUT_DTYPE OUTPUT_DTYPE)
+set(multiValueArgs "")
+
+macro(Add RECIPE)
+  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  set(QCONFIG_OPT "")
+  if(ARG_USE_QCONFIG)
+    set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json")
+  endif()
+
+  set(INPUT_DTYPE_OPT "")
+  if(ARG_INPUT_DTYPE)
+    set(INPUT_DTYPE_OPT "--input_type" "${ARG_INPUT_DTYPE}")
+  endif()
+
+  set(OUTPUT_DTYPE_OPT "")
+  if(ARG_OUTPUT_DTYPE)
+    set(OUTPUT_DTYPE_OPT "--output_type" "${ARG_OUTPUT_DTYPE}")
+  endif()
+
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+  set(FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.fq.circle")
+  set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle")
+  set(QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+  # Generate quantized .circle
+  add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer> --quantize_dequantize_weights float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${CIRCLE_PATH} ${FAKE_QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:record-minmax> --input_model ${FAKE_QUANT_CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer>
+      --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY}
+      ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+      ${INPUT_DTYPE_OPT} ${OUTPUT_DTYPE_OPT}
+    DEPENDS 
+      circle-quantizer
+      record-minmax
+      ${CIRCLE_PATH}
+    COMMENT "Generate ${RECIPE}.q.circle"
+  )
+
+  list(APPEND TEST_DEPS ${QUANT_CIRCLE_PATH})
+  list(APPEND TEST_NAMES ${RECIPE})
+endmacro(Add)
+
+# Macro to generate fully fake-quantized models
+macro(AddFakeQuant RECIPE)
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+  # NOTE We use .q.circle because it is convention for output file (see testall.sh for more details)
+  set(FULL_FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+  # Generate fully fake-quantized .circle
+  add_custom_command(OUTPUT ${FULL_FAKE_QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer> --fake_quantize ${CIRCLE_PATH} ${FULL_FAKE_QUANT_CIRCLE_PATH}
+    DEPENDS
+      circle-quantizer
+      ${CIRCLE_PATH}
+    COMMENT "Generate ${RECIPE}.q.circle"
+  )
+
+  list(APPEND TEST_DEPS ${FULL_FAKE_QUANT_CIRCLE_PATH})
+  list(APPEND TEST_NAMES ${RECIPE})
+endmacro(AddFakeQuant)
+
+# Macro to generate re-quantized models
+macro(AddReQuant RECIPE)
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+  # NOTE We use .q.circle because it is convention for output file (see testall.sh for more details)
+  set(REQUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+  # Generate re-quantized .circle
+  add_custom_command(OUTPUT ${REQUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer> --requantize int8 uint8 ${CIRCLE_PATH} ${REQUANT_CIRCLE_PATH}
+    DEPENDS
+      circle-quantizer
+      ${CIRCLE_PATH}
+    COMMENT "Generate ${RECIPE}.q.circle"
+  )
+
+  list(APPEND TEST_DEPS ${REQUANT_CIRCLE_PATH})
+  list(APPEND TEST_NAMES ${RECIPE})
+endmacro(AddReQuant)
+
+# Macro to quantize without quantize_dequantize_weights
+macro(AddSkipQDQW RECIPE)
+  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  set(QCONFIG_OPT "")
+  if(ARG_USE_QCONFIG)
+    set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json")
+  endif()
+
+  set(INPUT_DTYPE_OPT "")
+  if(ARG_INPUT_DTYPE)
+    set(INPUT_DTYPE_OPT "--input_type" "${ARG_INPUT_DTYPE}")
+  endif()
+
+  set(OUTPUT_DTYPE_OPT "")
+  if(ARG_OUTPUT_DTYPE)
+    set(OUTPUT_DTYPE_OPT "--output_type" "${ARG_OUTPUT_DTYPE}")
+  endif()
+
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+  set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle")
+  set(QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+  # Generate quantized .circle
+  add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:record-minmax> --input_model ${CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer>
+      --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY}
+      ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+      ${INPUT_DTYPE_OPT} ${OUTPUT_DTYPE_OPT}
+    DEPENDS
+      circle-quantizer
+      record-minmax
+      ${CIRCLE_PATH}
+    COMMENT "Generate ${RECIPE}.q.circle"
+  )
+
+  list(APPEND TEST_DEPS ${QUANT_CIRCLE_PATH})
+  list(APPEND TEST_NAMES ${RECIPE})
+endmacro(AddSkipQDQW)
+
+# Read "test.lst"
+include("test.lst")
+
+##
+## Copy testall
+##
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh")
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+
+add_custom_command(
+  OUTPUT ${TEST_RUNNER}
+  COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+  DEPENDS ${TEST_RUNNER_SOURCE}
+  COMMENT "Generate test runner"
+)
+
+list(APPEND TEST_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+  OUTPUT ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_INSPECT_PATH=\"$<TARGET_FILE:circle-inspect>\"' >> ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_VERIFY_PATH=\"$<TARGET_FILE:circle-verify>\"' >> ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'RECORD_MINMAX_PATH=\"$<TARGET_FILE:record-minmax>\"' >> ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_QUANTIZER_PATH=\"$<TARGET_FILE:circle-quantizer>\"' >> ${TEST_CONFIG}
+  DEPENDS
+    circle-inspect
+    circle-verify
+    record-minmax
+    circle-quantizer
+  COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+#
+# copy rule-lib.sh (a library of shell script functions)
+#
+
+# getting path for rule-lib.sh in dredd-rule-lib
+get_target_property(DREDD_RULE_LIB_DIR dredd_rule_lib BINARY_DIR)
+
+set(RULE_LIB_SOURCE_PATH "${DREDD_RULE_LIB_DIR}/rule-lib.sh")
+set(RULE_LIB_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh")
+
+add_custom_command(
+  OUTPUT ${RULE_LIB_BINARY_PATH}
+  COMMAND ${CMAKE_COMMAND} -E copy "${RULE_LIB_SOURCE_PATH}" "${RULE_LIB_BINARY_PATH}"
+  DEPENDS ${RULE_LIB_SOURCE_PATH}
+  COMMENT "Generate rule lib"
+)
+
+list(APPEND TEST_DEPS "${RULE_LIB_BINARY_PATH}")
+
+# Generate dependencies
+add_custom_target(circle_quantizer_dredd_recipe_test ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle_quantizer_dredd_recipe_test common_artifacts_deps)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+# Run tests
+add_test(
+  NAME circle_quantizer_dredd_recipe_test
+  COMMAND ${TEST_RUNNER}
+          ${TEST_CONFIG}
+          ${ARTIFACTS_BIN_PATH}
+          ${TEST_NAMES}
+)
diff --git a/compiler/circle-quantizer-dredd-recipe-test/README.md b/compiler/circle-quantizer-dredd-recipe-test/README.md
new file mode 100644
index 000000000..396cf7cd8
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/README.md
@@ -0,0 +1,37 @@
+# circle-quantizer-dredd-recipe-test
+
+It tests non-functional conditions of a quantized circle model generated by circle-quantizer.
+
+## How to add a test?
+
+1. Create a directory under `res/TensorFlowLiteRecipes/` or `res/CircleRecipes/`.
+
+2. Make a recipe (`test.recipe`) for fp32 model under the directory.
+
+3. Make a rule (`test.rule`) you want to test under the directory.
+(For more information on dredd-test-rules, see _dredd-rule-lib_ module.)
+
+4. Add test to `test.lst` in this module with `Add` macro.
+   ```
+   Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG)
+   ```
+   - `RECIPE_DIR`: Path to the directory where the recipe file is saved.
+   - `DTYPE`: Default quantization dtype (uint8, int16)
+   - `GRANULARITY`: Quantization granularity (channel, layer)
+   - `USE_QCONFIG`: (Optional) Whether to use a quantization configuration file or not.
+   If this is set, `test.qconf.json` should exist under `RECIPE_DIR`
+
+## Example
+
+```
+# TensorFlowLiteRecipes
+res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000
+├── test.recipe     # What you want to test
+└── test.rule       # Non-functional conditions to be satisfied
+└── test.qconf.json # Quantization configuration file (optional)
+
+# test.lst
+...
+Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+...
+```
diff --git a/compiler/circle-quantizer-dredd-recipe-test/requires.cmake b/compiler/circle-quantizer-dredd-recipe-test/requires.cmake
new file mode 100644
index 000000000..7450f7322
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/requires.cmake
@@ -0,0 +1,6 @@
+require("circle-quantizer")
+require("record-minmax")
+require("circle-inspect")
+require("circle-verify")
+require("common-artifacts")
+require("dredd-rule-lib")
diff --git a/compiler/circle-quantizer-dredd-recipe-test/test.lst b/compiler/circle-quantizer-dredd-recipe-test/test.lst
new file mode 100644
index 000000000..309069bb8
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/test.lst
@@ -0,0 +1,98 @@
+## EXAMPLE
+#
+# Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG(optional))
+# AddFakeQuant(RECIPE_DIR)
+#
+
+## TFLITE RECIPE
+
+# MPQ Test (default: u8, target: s16)
+Add(Quant_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_AveragePool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_BatchMatMul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Concatenation_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_003 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_DepthwiseConv2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_FullyConnected_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_LeakyRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Logistic_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_MaxPool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mean_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Neg_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Pad_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_PRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU6_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Reshape_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeBilinear_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeNearestNeighbor_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Slice_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Softmax_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Tanh_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Transpose_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_TransposeConv_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+
+# MPQ Test (default: s16, target: u8)
+Add(Quant_Add_002 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_AveragePool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_BatchMatMul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Concatenation_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_004 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_DepthwiseConv2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_FullyConnected_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_LeakyRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Logistic_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_MaxPool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mean_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Neg_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Pad_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_PRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU6_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Reshape_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeBilinear_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeNearestNeighbor_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Slice_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Softmax_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Tanh_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Transpose_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_TransposeConv_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+
+Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_Mul_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_Mul_Add_002 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Split_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Split_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_000 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32)
+Add(Quant_Conv_001 DTYPE uint8 GRANULARITY channel OUTPUT_DTYPE float32)
+Add(Quant_Conv_002 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32 OUTPUT_DTYPE float32)
+
+AddFakeQuant(Quant_Add_000)
+AddFakeQuant(Quant_DepthToSpace_000)
+AddFakeQuant(Quant_SpaceToDepth_000)
+
+# Requantize Test (I8 -> U8)
+AddReQuant(Quant_Add_I8_000)
+AddReQuant(Quant_AveragePool2D_I8_000)
+AddReQuant(Quant_Conv_I8_000)
+AddReQuant(Quant_DepthwiseConv2D_I8_000)
+AddReQuant(Quant_MaxPool2D_I8_000)
+AddReQuant(Quant_Mean_I8_000)
+AddReQuant(Quant_Mul_I8_000)
+AddReQuant(Quant_PRelu_I8_000)
+AddReQuant(Quant_ReLU_I8_000)
+AddReQuant(Quant_TransposeConv_I8_000)
+
+## CIRCLE RECIPE
+
+# MPQ Test (default: u8, target: s16)
+Add(Quant_InstanceNorm_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+
+# MPQ Test (default: s16, target: u8)
+Add(Quant_InstanceNorm_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+
+# Test for quantization without QuantizeDequantizeWeights
+AddSkipQDQW(Quant_Conv_005 DTYPE uint8 GRANULARITY channel)
+AddSkipQDQW(Quant_Conv_006 DTYPE int16 GRANULARITY channel)
diff --git a/compiler/circle-quantizer-dredd-recipe-test/testall.sh b/compiler/circle-quantizer-dredd-recipe-test/testall.sh
new file mode 100755
index 000000000..e5d5cf2b8
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/testall.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+# Need at least 2 arguments
+if [[ $# -lt 2 ]]; then
+  echo "USAGE: $0 ..."
+  echo
+  echo "ARGUMENTS:"
+  echo "  [test.config path]"
+  echo "  [WORKDIR]"
+  echo "  [Prefix1]"
+  echo "  [Prefix2]"
+  echo "  ..."
+  exit 255
+fi
+
+WORKDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+CONFIG_PATH="$1"; shift
+RESOURCE_DIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
+echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
+echo "-- Found circle-quantizer: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found record-minmax: ${RECORD_MINMAX_PATH}"
+echo "-- Found common-artifacts: ${RESOURCE_DIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd ${WORKDIR}
+while [[ $# -ne 0 ]]; do
+  PREFIX="$1"; shift
+
+  TESTED+=("${PREFIX}")
+
+  PASSED_TAG="${PREFIX}.passed"
+
+  rm -f "${PASSED_TAG}"
+
+  cat > "${PREFIX}.log" <(
+    exec 2>&1
+
+    echo "-- Found circle: ${PREFIX}.q.circle"
+
+    # Exit immediately if any command fails
+    set -e
+    # Show commands
+    set -x
+
+    #
+    # Check if rule is satisfied
+    #
+
+    # Note: turn off 'command printing'. Otherwise printing will be so messy
+    set +x
+
+    # (COMPILED_FILE, INSPECT_PROG_PATH, VERIFY_PROG_PATH, ERROR_LOG) must be set for rule-lib.sh
+    COMPILED_FILE="${PREFIX}.q.circle"
+    INSPECT_PROG_PATH=${CIRCLE_INSPECT_PATH}
+    VERIFY_PROG_PATH=${CIRCLE_VERIFY_PATH}
+    ERROR_LOG="${PREFIX}.error"
+
+    rm -f "${ERROR_LOG}"
+
+    # in case error while running rule-lib.sh, prints error msg
+    trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
+
+    source rule-lib.sh
+    source "${RESOURCE_DIR}/${PREFIX}.rule"
+
+    # unset
+    trap - ERR
+    set -x
+
+    # At this point, the exit code of all commands is 0
+    # If not 0, execution of this script ends because of "set -e"
+    touch "${PASSED_TAG}"
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$PREFIX")
+  else
+    FAILED+=("$PREFIX")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt
index 5075b13d5..16e41a327 100644
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -1,15 +1,23 @@
+nnas_find_package(Jsoncpp)
+if(NOT Jsoncpp_FOUND)
+  message(STATUS "Build jsoncpp: FAILED (missing jsoncpp)")
+  return()
+endif(NOT Jsoncpp_FOUND)
+
 set (SOURCES src/CircleQuantizer.cpp)
 
 add_executable(circle-quantizer "${SOURCES}")
-target_link_libraries(circle-quantizer foder)
+target_include_directories(circle-quantizer PRIVATE ${Jsoncpp_INCLUDE_DIRS})
+
+target_link_libraries(circle-quantizer ${Jsoncpp_STATIC_LIB})
 target_link_libraries(circle-quantizer safemain)
 target_link_libraries(circle-quantizer oops)
 target_link_libraries(circle-quantizer loco)
-target_link_libraries(circle-quantizer mio_circle)
 target_link_libraries(circle-quantizer luci_import)
 target_link_libraries(circle-quantizer luci_service)
 target_link_libraries(circle-quantizer luci_pass)
 target_link_libraries(circle-quantizer luci_export)
+target_link_libraries(circle-quantizer luci_env)
 target_link_libraries(circle-quantizer arser)
 target_link_libraries(circle-quantizer vconone)
 
diff --git a/compiler/circle-quantizer/requires.cmake b/compiler/circle-quantizer/requires.cmake
index c21e28e8d..4fcee1873 100644
--- a/compiler/circle-quantizer/requires.cmake
+++ b/compiler/circle-quantizer/requires.cmake
@@ -1,4 +1,3 @@
-require("foder")
 require("loco")
 require("locop")
 require("safemain")
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index 54b38a170..02b96f91e 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
-#include <luci/CircleOptimizer.h>
+#include <luci/ImporterEx.h>
+#include <luci/CircleQuantizer.h>
 #include <luci/Service/Validate.h>
 #include <luci/CircleExporter.h>
 #include <luci/CircleFileExpContract.h>
+#include <luci/UserSettings.h>
 
 #include <oops/InternalExn.h>
 #include <arser/arser.h>
 #include <vconone/vconone.h>
+#include <json.h>
 
 #include <functional>
 #include <iostream>
@@ -33,8 +33,59 @@
 
 using OptionHook = std::function<int(const char **)>;
 
-using Algorithms = luci::CircleOptimizer::Options::Algorithm;
-using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+
+std::vector<std::shared_ptr<LayerParam>> read_layer_params(std::string &filename)
+{
+  Json::Value root;
+  std::ifstream ifs(filename);
+
+  // Failed to open cfg file
+  if (not ifs.is_open())
+    throw std::runtime_error("Cannot open config file. " + filename);
+
+  Json::CharReaderBuilder builder;
+  JSONCPP_STRING errs;
+
+  // Failed to parse
+  if (not parseFromStream(builder, ifs, &root, &errs))
+    throw std::runtime_error("Cannot parse config file (json format). " + errs);
+
+  auto layers = root["layers"];
+  std::vector<std::shared_ptr<LayerParam>> p;
+  for (auto layer : layers)
+  {
+    if (layer.isMember("name"))
+    {
+      auto l = std::make_shared<LayerParam>();
+      {
+        l->name = layer["name"].asString();
+        l->dtype = layer["dtype"].asString();
+        l->granularity = layer["granularity"].asString();
+      }
+      p.emplace_back(l);
+    }
+
+    // Multiple names with the same dtype & granularity
+    if (layer.isMember("names"))
+    {
+      for (auto name : layer["names"])
+      {
+        auto l = std::make_shared<LayerParam>();
+        {
+          l->name = name.asString();
+          l->dtype = layer["dtype"].asString();
+          l->granularity = layer["granularity"].asString();
+        }
+        p.emplace_back(l);
+      }
+    }
+  }
+
+  return p;
+}
 
 void print_exclusive_options(void)
 {
@@ -42,6 +93,8 @@ void print_exclusive_options(void)
   std::cout << "    --quantize_dequantize_weights" << std::endl;
   std::cout << "    --quantize_with_minmax" << std::endl;
   std::cout << "    --requantize" << std::endl;
+  std::cout << "    --force_quantparam" << std::endl;
+  std::cout << "    --quantize_weights" << std::endl;
 }
 
 void print_version(void)
@@ -52,51 +105,101 @@ void print_version(void)
 
 int entry(int argc, char **argv)
 {
-  // Simple argument parser (based on map)
-  std::map<std::string, OptionHook> argparse;
-  luci::CircleOptimizer optimizer;
+  luci::CircleQuantizer quantizer;
 
-  auto options = optimizer.options();
+  auto options = quantizer.options();
+  auto settings = luci::UserSettings::settings();
 
   const std::string qdqw = "--quantize_dequantize_weights";
   const std::string qwmm = "--quantize_with_minmax";
   const std::string rq = "--requantize";
+  const std::string fq = "--force_quantparam";
+  const std::string cq = "--copy_quantparam";
+  const std::string fake_quant = "--fake_quantize";
+  const std::string qw = "--quantize_weights";
+  const std::string cfg = "--config";
+
+  const std::string tf_maxpool = "--TF-style_maxpool";
+
+  const std::string gpd = "--generate_profile_data";
 
   arser::Arser arser("circle-quantizer provides circle model quantization");
 
-  arser.add_argument("--version")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("Show version information and exit")
-      .exit_with(print_version);
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
 
   arser.add_argument(qdqw)
-      .nargs(3)
-      .type(arser::DataType::STR_VEC)
-      .required(false)
-      .help("Quantize-dequantize weight values required action before quantization. "
-            "Three arguments required: input_dtype(float32) "
-            "output_dtype(uint8) granularity(layer, channel)");
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .help("Quantize-dequantize weight values required action before quantization. "
+          "Three arguments required: input_model_dtype(float32) "
+          "output_model_dtype(uint8) granularity(layer, channel)");
 
   arser.add_argument(qwmm)
-      .nargs(3)
-      .type(arser::DataType::STR_VEC)
-      .required(false)
-      .help("Quantize with min/max values. "
-            "Three arguments required: input_dtype(float32) "
-            "output_dtype(uint8) granularity(layer, channel)");
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .help("Quantize with min/max values. "
+          "Three arguments required: input_model_dtype(float32) "
+          "output_model_dtype(uint8) granularity(layer, channel)");
+
+  arser.add_argument(tf_maxpool)
+    .nargs(0)
+    .default_value(false)
+    .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can "
+          "degrade accuracy of some models");
+
+  arser.add_argument(fake_quant)
+    .nargs(0)
+    .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will "
+          "generate an fp32 model.");
 
   arser.add_argument(rq)
-      .nargs(2)
-      .type(arser::DataType::STR_VEC)
-      .required(false)
-      .help("Requantize a quantized model. "
-            "Two arguments required: input_dtype(int8) "
-            "output_dtype(uint8)");
-
-  arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
-  arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+    .nargs(2)
+    .type(arser::DataType::STR_VEC)
+    .help("Requantize a quantized model. "
+          "Two arguments required: input_model_dtype(int8) "
+          "output_model_dtype(uint8)");
+
+  arser.add_argument(fq)
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .accumulated(true)
+    .help("Write quantization parameters to the specified tensor. "
+          "Three arguments required: tensor_name(string), "
+          "scale(float) zero_point(int)");
+
+  arser.add_argument(cq)
+    .nargs(2)
+    .type(arser::DataType::STR_VEC)
+    .accumulated(true)
+    .help("Copy quantization parameter from a tensor to another tensor."
+          "Two arguments required: source_tensor_name(string), "
+          "destination_tensor_name(string)");
+
+  arser.add_argument(qw)
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .help("Quantize weights values only"
+          "Three arguments required: input_model_dtype(float32) "
+          "output_model_dtype(int8, int16) granularity(channel)");
+
+  arser.add_argument("--input_type")
+    .help("Input type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
+          "multiple inputs, "
+          "use comma-separated values. e.g., uint8,int16");
+
+  arser.add_argument("--output_type")
+    .help("Output type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
+          "multiple outputs, "
+          "use comma-separated values. e.g., uint8,int16");
+
+  arser.add_argument(cfg).help("Path to the quantization configuration file");
+
+  arser.add_argument("input").help("Input circle model");
+  arser.add_argument("output").help("Output circle model");
+
+  arser.add_argument(gpd).nargs(0).required(false).default_value(false).help(
+    "This will turn on profiling data generation.");
 
   try
   {
@@ -104,18 +207,36 @@ int entry(int argc, char **argv)
   }
   catch (const std::runtime_error &err)
   {
-    std::cout << err.what() << std::endl;
+    std::cerr << err.what() << std::endl;
     std::cout << arser;
     return 255;
   }
 
-  if (arser[qdqw])
   {
-    if (arser[qwmm] || arser[rq])
+    // only one of qdqw, qwmm, rq, fq, cq, fake_quant, qw option can be used
+    int32_t opt_used = arser[qdqw] ? 1 : 0;
+    opt_used += arser[qwmm] ? 1 : 0;
+    opt_used += arser[rq] ? 1 : 0;
+    opt_used += arser[fq] ? 1 : 0;
+    opt_used += arser[cq] ? 1 : 0;
+    opt_used += arser[fake_quant] ? 1 : 0;
+    opt_used += arser[qw] ? 1 : 0;
+    if (opt_used != 1)
     {
       print_exclusive_options();
       return 255;
     }
+  }
+
+  if (arser.get<bool>("--verbose"))
+  {
+    // The third parameter of setenv means REPLACE.
+    // If REPLACE is zero, it does not overwrite an existing value.
+    setenv("LUCI_LOG", "100", 0);
+  }
+
+  if (arser[qdqw])
+  {
     auto values = arser.get<std::vector<std::string>>(qdqw);
     if (values.size() != 3)
     {
@@ -124,18 +245,29 @@ int entry(int argc, char **argv)
     }
     options->enable(Algorithms::QuantizeDequantizeWeights);
 
-    options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
-    options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+    options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
     options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
+
+    if (arser[cfg])
+    {
+      auto filename = arser.get<std::string>(cfg);
+      try
+      {
+        auto layer_params = read_layer_params(filename);
+
+        options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+      }
+      catch (const std::runtime_error &e)
+      {
+        std::cerr << e.what() << '\n';
+        return 255;
+      }
+    }
   }
 
   if (arser[qwmm])
   {
-    if (arser[qdqw] || arser[rq])
-    {
-      print_exclusive_options();
-      return 255;
-    }
     auto values = arser.get<std::vector<std::string>>(qwmm);
     if (values.size() != 3)
     {
@@ -144,18 +276,40 @@ int entry(int argc, char **argv)
     }
     options->enable(Algorithms::QuantizeWithMinMax);
 
-    options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
-    options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+    options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
     options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
+
+    if (arser["--input_type"])
+      options->param(AlgorithmParameters::Quantize_input_type,
+                     arser.get<std::string>("--input_type"));
+
+    if (arser["--output_type"])
+      options->param(AlgorithmParameters::Quantize_output_type,
+                     arser.get<std::string>("--output_type"));
+
+    if (arser[tf_maxpool] and arser.get<bool>(tf_maxpool))
+      options->param(AlgorithmParameters::Quantize_TF_style_maxpool, "True");
+
+    if (arser[cfg])
+    {
+      auto filename = arser.get<std::string>(cfg);
+      try
+      {
+        auto layer_params = read_layer_params(filename);
+
+        options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+      }
+      catch (const std::runtime_error &e)
+      {
+        std::cerr << e.what() << '\n';
+        return 255;
+      }
+    }
   }
 
   if (arser[rq])
   {
-    if (arser[qwmm] || arser[qdqw])
-    {
-      print_exclusive_options();
-      return 255;
-    }
     auto values = arser.get<std::vector<std::string>>(rq);
     if (values.size() != 2)
     {
@@ -164,42 +318,99 @@ int entry(int argc, char **argv)
     }
     options->enable(Algorithms::Requantize);
 
-    options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
-    options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+    options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
   }
 
-  std::string input_path = arser.get<std::string>("input");
-  std::string output_path = arser.get<std::string>("output");
+  if (arser[fq])
+  {
+    auto values = arser.get<std::vector<std::vector<std::string>>>(fq);
 
-  // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data = file_loader.load();
+    std::vector<std::string> tensors;
+    std::vector<std::string> scales;
+    std::vector<std::string> zero_points;
 
-  // Verify flatbuffers
-  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
-  if (!circle::VerifyModelBuffer(verifier))
+    for (auto const value : values)
+    {
+      if (value.size() != 3)
+      {
+        std::cerr << arser;
+        return 255;
+      }
+
+      tensors.push_back(value[0]);
+      scales.push_back(value[1]);
+      zero_points.push_back(value[2]);
+    }
+
+    options->enable(Algorithms::ForceQuantParam);
+
+    options->params(AlgorithmParameters::Quantize_tensor_names, tensors);
+    options->params(AlgorithmParameters::Quantize_scales, scales);
+    options->params(AlgorithmParameters::Quantize_zero_points, zero_points);
+  }
+
+  if (arser[cq])
   {
-    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
-    return EXIT_FAILURE;
+    auto values = arser.get<std::vector<std::vector<std::string>>>(cq);
+
+    std::vector<std::string> src;
+    std::vector<std::string> dst;
+
+    for (auto const value : values)
+    {
+      if (value.size() != 2)
+      {
+        std::cerr << arser;
+        return 255;
+      }
+
+      src.push_back(value[0]);
+      dst.push_back(value[1]);
+    }
+
+    options->enable(Algorithms::CopyQuantParam);
+
+    options->params(AlgorithmParameters::Quantize_src_tensor_names, src);
+    options->params(AlgorithmParameters::Quantize_dst_tensor_names, dst);
   }
 
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
+  if (arser[fake_quant])
+    options->enable(Algorithms::ConvertToFakeQuantizedModel);
+
+  if (arser[qw])
   {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
-    return EXIT_FAILURE;
+    auto values = arser.get<std::vector<std::string>>(qw);
+    if (values.size() != 3)
+    {
+      std::cerr << arser;
+      return 255;
+    }
+    options->enable(Algorithms::QuantizeWeights);
+
+    options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
+    options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
   }
 
-  // Import from input Circle file
-  luci::Importer importer;
-  auto module = importer.importModule(circle_model);
+  std::string input_path = arser.get<std::string>("input");
+  std::string output_path = arser.get<std::string>("output");
+
+  if (arser[gpd])
+    settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+
+  // Load model from the file
+  luci::ImporterEx importerex;
+  auto module = importerex.importVerifyModule(input_path);
+  if (module.get() == nullptr)
+    return EXIT_FAILURE;
 
   for (size_t idx = 0; idx < module->size(); ++idx)
   {
     auto graph = module->graph(idx);
 
     // quantize the graph
-    optimizer.quantize(graph);
+    quantizer.quantize(graph);
 
     if (!luci::validate(graph))
     {
diff --git a/compiler/circle-tensordump/CMakeLists.txt b/compiler/circle-tensordump/CMakeLists.txt
index e55901fe2..ed6ddc408 100644
--- a/compiler/circle-tensordump/CMakeLists.txt
+++ b/compiler/circle-tensordump/CMakeLists.txt
@@ -1,8 +1,8 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle06)
   return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle06)
 
-nnas_find_package(HDF5 QUIET)
+nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
 
 if(NOT HDF5_FOUND)
   message(STATUS "Build circle-tensordump: FAILED (missing HDF5)")
@@ -19,5 +19,8 @@ target_include_directories(circle-tensordump PRIVATE ${HDF5_INCLUDE_DIRS})
 target_link_libraries(circle-tensordump PRIVATE ${HDF5_CXX_LIBRARIES})
 target_link_libraries(circle-tensordump PRIVATE arser)
 target_link_libraries(circle-tensordump PRIVATE foder)
-target_link_libraries(circle-tensordump PRIVATE mio_circle)
+target_link_libraries(circle-tensordump PRIVATE mio_circle06)
+target_link_libraries(circle-tensordump PRIVATE mio_circle06_helper)
 target_link_libraries(circle-tensordump PRIVATE safemain)
+
+install(TARGETS circle-tensordump DESTINATION bin)
diff --git a/compiler/circle-tensordump/driver/Driver.cpp b/compiler/circle-tensordump/driver/Driver.cpp
index 5bab9f59e..c32dc3f5a 100644
--- a/compiler/circle-tensordump/driver/Driver.cpp
+++ b/compiler/circle-tensordump/driver/Driver.cpp
@@ -29,14 +29,12 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser{
-      "circle-tensordump allows users to retrieve tensor information from a Circle model file"};
+    "circle-tensordump allows users to retrieve tensor information from a Circle model file"};
 
-  arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Circle file path to dump");
+  arser.add_argument("circle").help("Circle file path to dump");
   arser.add_argument("--tensors").nargs(0).help("Dump to console");
   arser.add_argument("--tensors_to_hdf5")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("Dump to hdf5 file. Specify hdf5 file path to be dumped");
+    .help("Dump to hdf5 file. Specify hdf5 file path to be dumped");
 
   try
   {
diff --git a/compiler/circle-tensordump/requires.cmake b/compiler/circle-tensordump/requires.cmake
index 1c754f518..b3a2638ef 100644
--- a/compiler/circle-tensordump/requires.cmake
+++ b/compiler/circle-tensordump/requires.cmake
@@ -1,4 +1,4 @@
 require("arser")
 require("foder")
-require("mio-circle")
+require("mio-circle06")
 require("safemain")
diff --git a/compiler/circle-tensordump/src/Dump.cpp b/compiler/circle-tensordump/src/Dump.cpp
index a8d32564f..98cb5aea2 100644
--- a/compiler/circle-tensordump/src/Dump.cpp
+++ b/compiler/circle-tensordump/src/Dump.cpp
@@ -15,7 +15,8 @@
  */
 
 #include "Dump.h"
-#include "Reader.h"
+
+#include <mio_circle/Reader.h>
 
 #include <H5Cpp.h>
 
@@ -102,7 +103,7 @@ namespace circletensordump
 
 void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::string &)
 {
-  circletensordump::Reader reader(model);
+  mio::circle::Reader reader(model);
   uint32_t num_subgraph = reader.num_subgraph();
   auto buffers = reader.buffers();
 
@@ -113,8 +114,10 @@ void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::s
     auto tensors = reader.tensors();
     for (const auto &tensor : *tensors)
     {
+      const auto tensor_name = tensor->name();
+      std::string tensor_name_str = tensor_name ? tensor_name->str() : "no_name";
       os << std::string(70, '-') << std::endl;
-      os << "[" << tensor->name()->str() << "]" << std::endl;
+      os << "[" << tensor_name_str << "]" << std::endl;
       auto buff_idx = tensor->buffer();
       auto buff_data_ptr = reader.buffers()->Get(buff_idx)->data();
       auto quant_param = tensor->quantization();
@@ -182,6 +185,14 @@ H5::PredType hdf5_dtype_cast(const circle::TensorType &circle_type)
     {
       return H5::PredType::NATIVE_UINT8;
     }
+    case circle::TensorType_INT8:
+    {
+      return H5::PredType::NATIVE_INT8;
+    }
+    case circle::TensorType_INT16:
+    {
+      return H5::PredType::NATIVE_INT16;
+    }
     case circle::TensorType_INT32:
     {
       return H5::PredType::NATIVE_INT32;
@@ -224,7 +235,9 @@ std::vector<hsize_t> hdf5_dims_cast(const flatbuffers::Vector<T> *data,
       ret.resize(rank);
       for (uint32_t d = 0; d < rank; d++)
       {
-        ret.at(d) = dims->Get(d);
+        if (dims->Get(d) < 0)
+          throw std::runtime_error("Dimensions shouldn't be negative");
+        ret.at(d) = static_cast<hsize_t>(dims->Get(d));
       }
     }
   }
@@ -249,7 +262,7 @@ void write_vector_data_to_hdf5(H5::H5File &file, std::string &group_name, std::s
     return;
   auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
   auto dataset = std::make_unique<H5::DataSet>(
-      file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
+    file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
   dataset->write(data->data(), type);
 }
 
@@ -260,7 +273,7 @@ void write_scalar_data_to_hdf5(H5::H5File &file, std::string &group_name, std::s
 {
   auto dataspace = std::make_unique<H5::DataSpace>(H5S_SCALAR);
   auto dataset = std::make_unique<H5::DataSet>(
-      file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
+    file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
   dataset->write(&data, type);
 }
 
@@ -288,7 +301,7 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
                             const std::string &output_path)
 {
   // loads a circle model
-  circletensordump::Reader reader(model);
+  mio::circle::Reader reader(model);
   uint32_t num_subgraph = reader.num_subgraph();
 
   // create a hdf5 file
@@ -301,10 +314,18 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
     auto tensors = reader.tensors();
     for (const auto &tensor : *tensors)
     {
+      // If tensor does not have name, do nothing.
+      const auto tensor_name = tensor->name();
+      if (tensor_name == nullptr)
+      {
+        assert(false && "There is no tensor name");
+        continue;
+      }
+
       // create a group for each tensor whose name is its tensor name
-      std::string group_name = ::mangle(tensor->name()->c_str());
+      std::string group_name = ::mangle(tensor_name->c_str());
       std::unique_ptr<H5::Group> tensor_group =
-          std::make_unique<H5::Group>(file.createGroup(group_name));
+        std::make_unique<H5::Group>(file.createGroup(group_name));
 
       // write a buffer data
       uint32_t buff_idx = tensor->buffer();
diff --git a/compiler/circle-tensordump/src/Reader.cpp b/compiler/circle-tensordump/src/Reader.cpp
deleted file mode 100644
index 429736bfe..000000000
--- a/compiler/circle-tensordump/src/Reader.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reader.h"
-
-#include <sstream>
-#include <string>
-
-namespace circletensordump
-{
-
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
-  return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
-Reader::Reader(const circle::Model *model)
-{
-  _subgraphs = model->subgraphs();
-  _buffers = model->buffers();
-
-  auto opcodes = model->operator_codes();
-  for (const ::circle::OperatorCode *opcode : *opcodes)
-  {
-    _op_codes.push_back(opcode);
-  }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
-  if (buff_data != nullptr)
-  {
-    *buff_data = nullptr;
-  }
-
-  if (buf_idx == 0)
-    return 0;
-
-  if (auto *buffer = (*_buffers)[buf_idx])
-  {
-    if (auto *array = buffer->data())
-    {
-      if (size_t size = array->size())
-      {
-        if (buff_data != nullptr)
-        {
-          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
-        }
-        return size;
-      }
-    }
-  }
-
-  return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  return opcode->builtin_code();
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid: " << index << ")";
-    return oss.str();
-  }
-
-  return circletensordump::opcode_name(opcode);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
-  _tensors = nullptr;
-  _operators = nullptr;
-
-  _inputs.clear();
-  _outputs.clear();
-
-  if (_subgraphs->Length() <= sgindex)
-  {
-    assert(false);
-    return false;
-  }
-
-  const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
-  _tensors = subgraph->tensors();
-  _operators = subgraph->operators();
-
-  _inputs = as_index_vector(subgraph->inputs());
-  _outputs = as_index_vector(subgraph->outputs());
-
-  return true;
-}
-
-} // namespace circletensordump
diff --git a/compiler/circle-tensordump/src/Reader.h b/compiler/circle-tensordump/src/Reader.h
deleted file mode 100644
index bbb039552..000000000
--- a/compiler/circle-tensordump/src/Reader.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE_TENSORDUMP_READER_H__
-#define __CIRCLE_TENSORDUMP_READER_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circletensordump
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
-  std::vector<T> ret(flat_array->Length());
-  for (uint32_t i = 0; i < flat_array->Length(); i++)
-  {
-    ret[i] = flat_array->Get(i);
-  }
-  return ret;
-}
-
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
-  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
-  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
-  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
-  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-
-public:
-  Reader(const circle::Model *model);
-
-  Reader() = delete;
-
-public:
-  const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
-  const CircleBuffers_t *buffers() { return _buffers; }
-  const CircleTensors_t *tensors() { return _tensors; }
-  const CircleOperators_t *operators() { return _operators; }
-  const std::vector<int32_t> &inputs() const { return _inputs; }
-  const std::vector<int32_t> &outputs() const { return _outputs; }
-
-  uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
-  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
-  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
-  std::string opcode_name(const circle::Operator *op) const;
-
-public:
-  bool select_subgraph(uint32_t subgraph);
-
-private:
-  const CircleSubGraphs_t *_subgraphs{nullptr};
-  const CircleBuffers_t *_buffers{nullptr};
-  const CircleTensors_t *_tensors{nullptr};
-  const CircleOperators_t *_operators{nullptr};
-
-  std::vector<const circle::OperatorCode *> _op_codes;
-  std::vector<int32_t> _inputs;
-  std::vector<int32_t> _outputs;
-};
-
-} // namespace circletensordump
-
-#endif // __CIRCLE_TENSORDUMP_READER_H__
diff --git a/compiler/circle-verify/CMakeLists.txt b/compiler/circle-verify/CMakeLists.txt
index f22174865..cdf74cc26 100644
--- a/compiler/circle-verify/CMakeLists.txt
+++ b/compiler/circle-verify/CMakeLists.txt
@@ -1,13 +1,14 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle06)
+  message(STATUS "Skip circle-verify: mio_circle06 not found")
   return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle06)
 
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_executable(circle-verify ${SOURCES})
 target_include_directories(circle-verify PRIVATE src)
 target_link_libraries(circle-verify arser)
-target_link_libraries(circle-verify mio_circle)
+target_link_libraries(circle-verify mio_circle06)
 target_link_libraries(circle-verify safemain)
 target_link_libraries(circle-verify cwrap)
 target_link_libraries(circle-verify foder)
diff --git a/compiler/circle-verify/requires.cmake b/compiler/circle-verify/requires.cmake
index e1b7fb212..2fd44ad75 100644
--- a/compiler/circle-verify/requires.cmake
+++ b/compiler/circle-verify/requires.cmake
@@ -1,5 +1,5 @@
 require("arser")
-require("mio-circle")
+require("mio-circle06")
 require("safemain")
 require("cwrap")
 require("foder")
diff --git a/compiler/circle-verify/src/Driver.cpp b/compiler/circle-verify/src/Driver.cpp
index 7a44c65b9..c3a414701 100644
--- a/compiler/circle-verify/src/Driver.cpp
+++ b/compiler/circle-verify/src/Driver.cpp
@@ -25,7 +25,7 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to verify");
+  arser.add_argument("circle").help("Circle file path to verify");
 
   try
   {
diff --git a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
index 4bcaae347..9ccfd0008 100644
--- a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
+++ b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
@@ -69,7 +73,7 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_INSPECT_PATH=\"$<TARGET_FILE:circle-inspect>\"' >> ${TEST_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_VERIFY_PATH=\"$<TARGET_FILE:circle-verify>\"' >> ${TEST_CONFIG}
-  DEPENDS 
+  DEPENDS
     circle-inspect
     circle-verify
   COMMENT "Generate test configuration"
diff --git a/compiler/circle2circle-dredd-recipe-test/README.md b/compiler/circle2circle-dredd-recipe-test/README.md
index 85140a8d1..b6e03e160 100644
--- a/compiler/circle2circle-dredd-recipe-test/README.md
+++ b/compiler/circle2circle-dredd-recipe-test/README.md
@@ -2,7 +2,8 @@
 
 It tests the non-functional conditions of the optimized circle binary resulting from circle2circle.
 
-This test basically refers to the _TensorFlowLiteRecipes_ resource. So you should add what you want to test to both of the resource and `test.lst`.
+This test basically refers to the _TensorFlowLiteRecipes_ resource.
+So you should add what you want to test to both of the resource and `test.lst`.
 
 ## Example
 
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst
index 302c3a796..2dd24af60 100644
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -10,13 +10,88 @@
 
 ## TFLITE RECIPE
 
+Add(Net_Preactivation_BN_000 PASS fuse_preactivation_batchnorm)
+Add(Net_BroadcastTo_AddV2_000 PASS resolve_customop_add)
+Add(Net_BroadcastTo_AddV2_001 PASS resolve_customop_add)
+Add(Net_Conv_Add_Mul_000 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_Add_Mul_001 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_Add_Mul_002 PASS fuse_batchnorm_with_conv)
+Add(Net_Conv_FakeQuant_000 PASS remove_fakequant)
+Add(Net_Conv_QuantDequant_000 PASS remove_quantdequant)
+Add(Net_Conv_Min_Max_000 PASS transform_min_max_to_relu6)
+Add(Net_Conv_Min_Relu_000 PASS transform_min_relu_to_relu6)
+Add(Net_Conv_PReluGraph_000 PASS fuse_prelu)
+Add(Net_Conv_Relu6_000 PASS fuse_activation_function)
+Add(Net_Duplicate_Weights_000 PASS remove_duplicate_const)
+Add(Net_DwConv_BN_000 PASS fuse_batchnorm_with_dwconv)
+Add(Net_DwConv_BN_001 PASS fuse_batchnorm_with_dwconv)
+Add(Net_FullyConnected_Add_000 PASS fold_fully_connected)
+Add(Net_Reshape_Reshape_000 PASS remove_redundant_reshape)
+Add(Net_Squeeze_Squeeze_000 PASS substitute_squeeze_to_reshape)
+Add(Net_TConv_Add_000 PASS fuse_add_with_tconv)
+Add(Net_TConv_Add_001 PASS fuse_add_with_tconv)
+Add(Net_TConv_Add_002 PASS fuse_add_with_tconv)
 Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_001 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_003 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_004 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_005 PASS fuse_batchnorm_with_tconv)
 Add(Net_InstanceNorm_001 PASS fuse_instnorm)
-Add(Net_InstanceNorm_002 PASS fuse_instnorm)
+Add(Net_InstanceNorm_003 PASS fuse_instnorm)
+Add(Net_InstanceNorm_004 PASS fuse_instnorm)
+Add(Net_InstanceNorm_005 PASS fuse_instnorm)
+Add(Net_InstanceNorm_006 PASS fuse_instnorm)
+Add(Net_InstanceNorm_007 PASS fuse_instnorm)
+Add(Net_Maximum_Minimum_000 PASS transform_min_max_to_relu6)
 Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
 Add(MatMul_000 PASS resolve_customop_matmul)
 Add(DepthwiseConv2D_003 PASS)
+Add(PadV2_001 PASS substitute_padv2_to_pad)
+Add(StridedSlice_003 PASS substitute_strided_slice_to_reshape)
+Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax)
+Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
+Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax)
+Add(FullyConnected_007 PASS replace_non_const_fc_with_batch_matmul)
+Add(FullyConnected_008 PASS replace_non_const_fc_with_batch_matmul)
+Add(Net_Gelu_000 PASS fuse_gelu)
+Add(Net_Gelu_001 PASS fuse_gelu)
+Add(HardSwish_001 PASS decompose_hardswish)
 
 ## CIRCLE RECIPE
 
 Add(CircleBatchMatMul_000)
+
+# REGRESSION test
+
+Add(REGRESS_ONNX_Conv_BN_001 PASS
+      convert_nchw_to_nhwc
+      nchw_to_nhwc_input_shape
+      nchw_to_nhwc_output_shape
+      remove_redundant_transpose
+      substitute_transpose_to_reshape
+      remove_redundant_reshape
+      remove_unnecessary_reshape
+      fuse_batchnorm_with_conv)
+
+Add(REGRESS_ONNX_Conv_BN_Relu6_001 PASS
+      convert_nchw_to_nhwc
+      nchw_to_nhwc_input_shape
+      nchw_to_nhwc_output_shape
+      remove_redundant_transpose
+      transform_min_max_to_relu6
+      fuse_batchnorm_with_conv
+      fuse_activation_function)
+
+Add(REGRESS_ONNX_Conv_BN_MeanMean_001 PASS
+      convert_nchw_to_nhwc
+      nchw_to_nhwc_input_shape
+      nchw_to_nhwc_output_shape
+      remove_redundant_transpose
+      fuse_batchnorm_with_conv
+      fuse_activation_function
+      fuse_mean_with_mean
+      fuse_transpose_with_mean)
+
+Add(REGRESS_ONNX_Mul_Mul_000 PASS
+      convert_nchw_to_nhwc)
diff --git a/compiler/circle2circle-dredd-recipe-test/testall.sh b/compiler/circle2circle-dredd-recipe-test/testall.sh
index 2899587ba..94831d12c 100755
--- a/compiler/circle2circle-dredd-recipe-test/testall.sh
+++ b/compiler/circle2circle-dredd-recipe-test/testall.sh
@@ -21,7 +21,6 @@ source "${CONFIG_PATH}"
 
 echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
 echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
-echo "-- Found circle2circle: ${CIRCLE2CIRCLE_PATH}"
 echo "-- Found common-artifacts: ${RESOURCE_DIR}"
 
 TESTED=()
diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt
index f60c896d8..dbe485b9f 100644
--- a/compiler/circle2circle/CMakeLists.txt
+++ b/compiler/circle2circle/CMakeLists.txt
@@ -3,16 +3,13 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_executable(circle2circle "${SOURCES}")
-target_include_directories(circle2circle PRIVATE include)
 target_include_directories(circle2circle PRIVATE src)
-target_link_libraries(circle2circle foder)
 target_link_libraries(circle2circle nncc_common)
 target_link_libraries(circle2circle safemain)
 target_link_libraries(circle2circle oops)
 target_link_libraries(circle2circle hermes)
 target_link_libraries(circle2circle hermes_std)
 target_link_libraries(circle2circle loco)
-target_link_libraries(circle2circle mio_circle)
 target_link_libraries(circle2circle luci_env)
 target_link_libraries(circle2circle luci_import)
 target_link_libraries(circle2circle luci_service)
@@ -30,15 +27,12 @@ endif(NOT ENABLE_TEST)
 nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(circle2circle_test ${TESTS} ${SOURCES})
-target_include_directories(circle2circle_test PRIVATE include)
 target_include_directories(circle2circle_test PRIVATE src)
-target_link_libraries(circle2circle_test foder)
 target_link_libraries(circle2circle_test nncc_common)
 target_link_libraries(circle2circle_test oops)
 target_link_libraries(circle2circle_test hermes)
 target_link_libraries(circle2circle_test hermes_std)
 target_link_libraries(circle2circle_test loco)
-target_link_libraries(circle2circle_test mio_circle)
 target_link_libraries(circle2circle_test luci_env)
 target_link_libraries(circle2circle_test luci_import)
 target_link_libraries(circle2circle_test luci_service)
diff --git a/compiler/circle2circle/include/Model.h b/compiler/circle2circle/include/Model.h
deleted file mode 100644
index 35b7e3239..000000000
--- a/compiler/circle2circle/include/Model.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE2CIRCLE_MODEL_H__
-#define __CIRCLE2CIRCLE_MODEL_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <memory>
-
-namespace luci
-{
-
-struct Model
-{
-  virtual ~Model() = default;
-
-  virtual const ::circle::Model *model(void) = 0;
-};
-
-/**
- * @brief Load Circle model (as a raw Model) from a given path
- *
- * @note May return a nullptr
- */
-std::unique_ptr<Model> load_model(const std::string &path);
-
-} // namespace luci
-
-#endif // __CIRCLE2CIRCLE_MODEL_H__
diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake
index 36a9efd16..4e5ed0dd1 100644
--- a/compiler/circle2circle/requires.cmake
+++ b/compiler/circle2circle/requires.cmake
@@ -1,9 +1,7 @@
-require("foder")
 require("loco")
 require("locop")
 require("logo-core")
 require("safemain")
-require("mio-circle")
 require("oops")
 require("hermes")
 require("hermes-std")
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index 39ceade3a..6a7be2204 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
 #include <luci/CircleOptimizer.h>
+#include <luci/DynamicBatchToSingleBatch.h>
+#include <luci/Service/ChangeOutputs.h>
 #include <luci/Service/Validate.h>
 #include <luci/CircleExporter.h>
 #include <luci/CircleFileExpContract.h>
@@ -29,7 +29,10 @@
 
 #include <functional>
 #include <iostream>
+#include <sstream>
 #include <string>
+#include <vector>
+#include <cstdlib>
 
 using Algorithms = luci::CircleOptimizer::Options::Algorithm;
 using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
@@ -40,6 +43,21 @@ void print_version(void)
   std::cout << vconone::get_copyright() << std::endl;
 }
 
+void csv_tokenize(const std::string &data, std::vector<std::string> &result)
+{
+  const char delim = ',';
+  std::string token;
+  std::stringstream ss(data);
+
+  while (std::getline(ss, token, delim))
+    result.push_back(token);
+}
+
+void add_switch(arser::Arser &arser, const char *opt, const char *desc)
+{
+  arser.add_argument(opt).nargs(0).default_value(false).help(desc);
+}
+
 int entry(int argc, char **argv)
 {
   // Simple argument parser (based on map)
@@ -50,66 +68,143 @@ int entry(int argc, char **argv)
 
   arser::Arser arser("circle2circle provides circle model optimization and transformations");
 
-  arser.add_argument("--version")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("Show version information and exit")
-      .exit_with(print_version);
-
-  arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
-      "Enable all optimize options");
-
-  arser.add_argument("--fuse_batchnorm_with_tconv")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse BatchNorm operators to Transposed Convolution operator");
-
-  arser.add_argument("--fuse_bcq")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse operators and apply Binary Coded Quantization");
-
-  arser.add_argument("--fuse_instnorm")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will fuse operators to InstanceNorm operator");
-
-  arser.add_argument("--resolve_customop_add")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will convert Custom(Add) to Add operator");
-
-  arser.add_argument("--resolve_customop_batchmatmul")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will convert Custom(BatchMatmul) to BatchMatmul operator");
-
-  arser.add_argument("--resolve_customop_matmul")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will convert Custom(Matmul) to Matmul operator");
-
-  arser.add_argument("--mute_warnings")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will turn off warning messages");
-
-  arser.add_argument("--disable_validation")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("This will turn off operator validations. May help input model investigation.");
-
-  arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
-  arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
+
+  add_switch(arser, "--fold_add_v2", "This will fold AddV2 operators with constant inputs");
+  add_switch(arser, "--fold_cast", "This will fold Cast operators with constant input");
+  add_switch(arser, "--fold_densify",
+             "This will fold Densify operators with sparse constant input");
+  add_switch(arser, "--fold_dequantize", "This will fold dequantize op");
+  add_switch(arser, "--fold_dwconv",
+             "This will fold Depthwise Convolution operator with constant inputs");
+  add_switch(arser, "--fold_fully_connected",
+             "This will fold FullyConnected operator with constant inputs");
+  add_switch(arser, "--fold_gather", "This will fold Gather operator");
+  add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
+  add_switch(arser, "--forward_reshape_to_unaryop",
+             "This will move Reshape after UnaryOp for centain condition");
+  add_switch(arser, "--forward_transpose_op",
+             "This will move Transpose Op forward if possible (for further optimization)");
+  add_switch(arser, "--fuse_activation_function",
+             "This will fuse Activation function to a preceding operator");
+  add_switch(arser, "--fuse_add_with_fully_connected",
+             "This will fuse Add operator to FullyConnected operator");
+  add_switch(arser, "--fuse_add_with_tconv",
+             "This will fuse Add operator to Transposed Convolution operator");
+  add_switch(arser, "--fuse_batchnorm_with_conv",
+             "This will fuse BatchNorm operators to Convolution operator");
+  add_switch(arser, "--fuse_batchnorm_with_dwconv",
+             "This will fuse BatchNorm operators to Depthwise Convolution operator");
+  add_switch(arser, "--fuse_batchnorm_with_tconv",
+             "This will fuse BatchNorm operators to Transposed Convolution operator");
+  add_switch(arser, "--fuse_bcq", "This will fuse operators and apply Binary Coded Quantization");
+  add_switch(arser, "--fuse_instnorm", "This will fuse operators to InstanceNorm operator");
+  add_switch(arser, "--fuse_mean_with_mean",
+             "This will fuse two Mean operations when they follow one by one. This will fold them "
+             "into one operation and merge reduction indices.");
+  add_switch(arser, "--fuse_transpose_with_mean",
+             "This will fuse Mean operation with a preceding Transpose under certain conditions.");
+  add_switch(arser, "--make_batchnorm_gamma_positive",
+             "This will make negative gamma of BatchNorm into a small positive value (1e-10). "
+             "Note that this pass can change the execution result of the model. So, use it only "
+             "when the impact is known to be acceptable.");
+  add_switch(arser, "--fuse_preactivation_batchnorm",
+             "This will fuse BatchNorm operators of pre-activations to Convolution operator");
+  add_switch(arser, "--fuse_prelu", "This will fuse operators to PReLU operator");
+  add_switch(arser, "--fuse_gelu", "This will fuse operators to GeLU operator");
+  add_switch(arser, "--remove_duplicate_const", "This will remove all duplicate constant nodes");
+  add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators");
+  add_switch(arser, "--remove_quantdequant", "This will remove Quantize-Dequantize sequence");
+  add_switch(arser, "--remove_redundant_quantize", "This will remove redundant Quantize operators");
+  add_switch(arser, "--remove_redundant_reshape",
+             "This will fuse or remove subsequent Reshape operators");
+  add_switch(arser, "--remove_redundant_transpose",
+             "This will fuse or remove subsequent Transpose operators");
+  add_switch(arser, "--remove_unnecessary_reshape",
+             "This will remove unnecessary reshape operators");
+  add_switch(arser, "--remove_unnecessary_slice", "This will remove unnecessary slice operators");
+  add_switch(arser, "--remove_unnecessary_strided_slice",
+             "This will remove unnecessary strided slice operators");
+  add_switch(arser, "--remove_unnecessary_split", "This will remove unnecessary split operators");
+  add_switch(arser, "--replace_cw_mul_add_with_depthwise_conv",
+             "This will replace channel-wise mul/add with DepthwiseConv2D operator");
+  add_switch(arser, "--replace_sub_with_add", "This will replace sub with add operator");
+  add_switch(arser, "--resolve_customop_add", "This will convert Custom(Add) to Add operator");
+  add_switch(arser, "--resolve_customop_batchmatmul",
+             "This will convert Custom(BatchMatmul) to BatchMatmul operator");
+  add_switch(arser, "--resolve_customop_matmul",
+             "This will convert Custom(Matmul) to Matmul operator");
+  add_switch(arser, "--resolve_customop_max_pool_with_argmax",
+             "This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators");
+  add_switch(arser, "--resolve_customop_splitv",
+             "This will convert Custom(SplitV) to SplitV operator");
+  add_switch(arser, "--shuffle_weight_to_16x1float32",
+             "This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
+             "it only converts weights whose row is a multiple of 16");
+  add_switch(arser, "--replace_non_const_fc_with_batch_matmul",
+             "Replace FullyConnected with BatchMatMul when its weight is non-constant");
+  add_switch(arser, "--substitute_pack_to_reshape",
+             "This will convert single input Pack to Reshape");
+  add_switch(arser, "--substitute_padv2_to_pad",
+             "This will convert certain condition PadV2 to Pad");
+  add_switch(arser, "--substitute_splitv_to_split",
+             "This will convert certain condition SplitV to Split operator");
+  add_switch(arser, "--substitute_squeeze_to_reshape",
+             "This will convert certain condition Squeeze to Reshape");
+  add_switch(arser, "--substitute_strided_slice_to_reshape",
+             "This will convert certain condition Strided_Slice to Reshape");
+  add_switch(arser, "--substitute_transpose_to_reshape",
+             "This will convert single input Transpose to Reshape");
+  add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs");
+  add_switch(arser, "--unroll_unidirseqlstm", "Unroll UnidirectionalSequenceLSTM operator.");
+  add_switch(arser, "--convert_nchw_to_nhwc",
+             "Experimental: This will convert NCHW operators to NHWC under the assumption that "
+             "input model is NCHW.");
+  add_switch(arser, "--nchw_to_nhwc_input_shape",
+             "Convert the input shape of the model (argument for --convert_nchw_to_nhwc).");
+  add_switch(arser, "--nchw_to_nhwc_output_shape",
+             "Convert the output shape of the model (argument for --convert_nchw_to_nhwc).");
+  add_switch(arser, "--transform_min_max_to_relu6",
+             "Transform Minimum(6)-Maximum(0) pattern to Relu6 operator");
+  add_switch(arser, "--transform_min_relu_to_relu6",
+             "Transform Minimum(6)-Relu pattern to Relu6 operator");
+  add_switch(arser, "--decompose_hardswish",
+             "Decompose HardSwish operator to Add, Mul and Relu6 operators");
+  add_switch(arser, "--mute_warnings", "This will turn off warning messages");
+  add_switch(arser, "--disable_validation",
+             "This will turn off operator validations. May help input model investigation.");
+  add_switch(arser, "--generate_profile_data", "This will turn on profiling data generation.");
+
+  // Convert dynamic batch to single batch
+  // Users have to use this option only when the first dimension of rank 4 input (NHWC or NCHW)
+  // is dynamic. Remove this comment after non-rank 4 is supported.
+  add_switch(arser, "--dynamic_batch_to_single_batch",
+             "Convert dynamic batch size (first dimension) of inputs to 1.");
+
+  arser.add_argument("--change_outputs")
+    .help("Experimental: Change first subgraph output nodes to CSV names");
+
+  arser.add_argument("input").help("Input circle model");
+  arser.add_argument("output").help("Output circle model");
+
+  // sparsification argument
+  arser.add_argument("--sparsify_tensor").help("Tensor name that you want to sparsify");
+
+  arser.add_argument("--sparsify_traversal_order")
+    .default_value("0,1,2,3")
+    .help("Traversal order of dimensions. Default value: 0,1,2,3");
+
+  arser.add_argument("--sparsify_format")
+    .default_value("d,s")
+    .help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default "
+          "value: d,s");
+
+  arser.add_argument("--sparsify_block_size").help("Size of each block dimension");
+
+  arser.add_argument("--sparsify_block_map")
+    .default_value("0,1")
+    .help("Map from block dimension to the original tensor dimension. Default value: 0,1");
 
   try
   {
@@ -117,78 +212,226 @@ int entry(int argc, char **argv)
   }
   catch (const std::runtime_error &err)
   {
-    std::cout << err.what() << std::endl;
+    std::cerr << err.what() << std::endl;
     std::cout << arser;
     return 255;
   }
 
-  if (arser.get<bool>("--all"))
+  if (arser.get<bool>("--verbose"))
   {
-    options->enable(Algorithms::FuseBCQ);
-    options->enable(Algorithms::FuseInstanceNorm);
-    options->enable(Algorithms::ResolveCustomOpAdd);
-    options->enable(Algorithms::ResolveCustomOpBatchMatMul);
-    options->enable(Algorithms::ResolveCustomOpMatMul);
+    // The third parameter of setenv means REPLACE.
+    // If REPLACE is zero, it does not overwrite an existing value.
+    setenv("LUCI_LOG", "100", 0);
   }
+  if (arser.get<bool>("--fold_add_v2"))
+    options->enable(Algorithms::FoldAddV2);
+  if (arser.get<bool>("--fold_cast"))
+    options->enable(Algorithms::FoldCast);
+  if (arser.get<bool>("--fold_densify"))
+    options->enable(Algorithms::FoldDensify);
+  if (arser.get<bool>("--fold_dequantize"))
+    options->enable(Algorithms::FoldDequantize);
+  if (arser.get<bool>("--fold_dwconv"))
+    options->enable(Algorithms::FoldDepthwiseConv2D);
+  if (arser.get<bool>("--fold_fully_connected"))
+    options->enable(Algorithms::FoldFullyConnected);
+  if (arser.get<bool>("--fold_gather"))
+    options->enable(Algorithms::FoldGather);
+  if (arser.get<bool>("--fold_sparse_to_dense"))
+    options->enable(Algorithms::FoldSparseToDense);
+  if (arser.get<bool>("--forward_reshape_to_unaryop"))
+    options->enable(Algorithms::ForwardReshapeToUnaryOp);
+  if (arser.get<bool>("--forward_transpose_op"))
+    options->enable(Algorithms::ForwardTransposeOp);
+  if (arser.get<bool>("--fuse_activation_function"))
+    options->enable(Algorithms::FuseActivationFunction);
+  if (arser.get<bool>("--fuse_batchnorm_with_conv"))
+    options->enable(Algorithms::FuseBatchNormWithConv);
+  if (arser.get<bool>("--fuse_add_with_fully_connected"))
+    options->enable(Algorithms::FuseAddWithFullyConnected);
+  if (arser.get<bool>("--fuse_add_with_tconv"))
+    options->enable(Algorithms::FuseAddWithTConv);
+  if (arser.get<bool>("--fuse_batchnorm_with_dwconv"))
+    options->enable(Algorithms::FuseBatchNormWithDwConv);
   if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
     options->enable(Algorithms::FuseBatchNormWithTConv);
   if (arser.get<bool>("--fuse_bcq"))
     options->enable(Algorithms::FuseBCQ);
   if (arser.get<bool>("--fuse_instnorm"))
     options->enable(Algorithms::FuseInstanceNorm);
+  if (arser.get<bool>("--fuse_mean_with_mean"))
+    options->enable(Algorithms::FuseMeanWithMean);
+  if (arser.get<bool>("--make_batchnorm_gamma_positive"))
+    options->enable(Algorithms::MakeBatchNormGammaPositive);
+  if (arser.get<bool>("--fuse_preactivation_batchnorm"))
+    options->enable(Algorithms::FusePreActivationBatchNorm);
+  if (arser.get<bool>("--fuse_prelu"))
+    options->enable(Algorithms::FusePRelu);
+  if (arser.get<bool>("--fuse_gelu"))
+    options->enable(Algorithms::FuseGelu);
+  if (arser.get<bool>("--fuse_transpose_with_mean"))
+    options->enable(Algorithms::FuseTransposeWithMean);
+  if (arser.get<bool>("--remove_duplicate_const"))
+    options->enable(Algorithms::RemoveDuplicateConst);
+  if (arser.get<bool>("--remove_fakequant"))
+    options->enable(Algorithms::RemoveFakeQuant);
+  if (arser.get<bool>("--remove_quantdequant"))
+    options->enable(Algorithms::RemoveQuantDequantSeq);
+  if (arser.get<bool>("--remove_redundant_quantize"))
+    options->enable(Algorithms::RemoveRedundantQuantize);
+  if (arser.get<bool>("--remove_redundant_reshape"))
+    options->enable(Algorithms::RemoveRedundantReshape);
+  if (arser.get<bool>("--remove_redundant_transpose"))
+    options->enable(Algorithms::RemoveRedundantTranspose);
+  if (arser.get<bool>("--remove_unnecessary_reshape"))
+    options->enable(Algorithms::RemoveUnnecessaryReshape);
+  if (arser.get<bool>("--remove_unnecessary_slice"))
+    options->enable(Algorithms::RemoveUnnecessarySlice);
+  if (arser.get<bool>("--remove_unnecessary_strided_slice"))
+    options->enable(Algorithms::RemoveUnnecessaryStridedSlice);
+  if (arser.get<bool>("--remove_unnecessary_split"))
+    options->enable(Algorithms::RemoveUnnecessarySplit);
+  if (arser.get<bool>("--replace_cw_mul_add_with_depthwise_conv"))
+    options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv);
+  if (arser.get<bool>("--replace_sub_with_add"))
+    options->enable(Algorithms::ReplaceSubWithAdd);
   if (arser.get<bool>("--resolve_customop_add"))
     options->enable(Algorithms::ResolveCustomOpAdd);
   if (arser.get<bool>("--resolve_customop_batchmatmul"))
     options->enable(Algorithms::ResolveCustomOpBatchMatMul);
   if (arser.get<bool>("--resolve_customop_matmul"))
     options->enable(Algorithms::ResolveCustomOpMatMul);
+  if (arser.get<bool>("--resolve_customop_max_pool_with_argmax"))
+    options->enable(Algorithms::ResolveCustomOpMaxPoolWithArgmax);
+  if (arser.get<bool>("--resolve_customop_splitv"))
+    options->enable(Algorithms::ResolveCustomOpSplitV);
+  if (arser.get<bool>("--shuffle_weight_to_16x1float32"))
+    options->enable(Algorithms::ShuffleWeightTo16x1Float32);
+  if (arser.get<bool>("--replace_non_const_fc_with_batch_matmul"))
+    options->enable(Algorithms::ReplaceNonConstFCWithBatchMatMul);
+  if (arser.get<bool>("--substitute_pack_to_reshape"))
+    options->enable(Algorithms::SubstitutePackToReshape);
+  if (arser.get<bool>("--substitute_padv2_to_pad"))
+    options->enable(Algorithms::SubstitutePadV2ToPad);
+  if (arser.get<bool>("--substitute_splitv_to_split"))
+    options->enable(Algorithms::SubstituteSplitVToSplit);
+  if (arser.get<bool>("--substitute_squeeze_to_reshape"))
+    options->enable(Algorithms::SubstituteSqueezeToReshape);
+  if (arser.get<bool>("--substitute_strided_slice_to_reshape"))
+    options->enable(Algorithms::SubstituteStridedSliceToReshape);
+  if (arser.get<bool>("--substitute_transpose_to_reshape"))
+    options->enable(Algorithms::SubstituteTransposeToReshape);
+  if (arser.get<bool>("--transform_min_max_to_relu6"))
+    options->enable(Algorithms::TransformMinMaxToRelu6Pass);
+  if (arser.get<bool>("--transform_min_relu_to_relu6"))
+    options->enable(Algorithms::TransformMinReluToRelu6Pass);
+  if (arser.get<bool>("--decompose_hardswish"))
+    options->enable(Algorithms::DecomposeHardSwishPass);
+  if (arser.get<bool>("--expand_broadcast_const"))
+    options->enable(Algorithms::ExpandBroadcastConst);
+  if (arser.get<bool>("--unroll_unidirseqlstm"))
+    options->enable(Algorithms::UnrollUnidirSeqLSTM);
 
   if (arser.get<bool>("--mute_warnings"))
     settings->set(luci::UserSettings::Key::MuteWarnings, true);
   if (arser.get<bool>("--disable_validation"))
     settings->set(luci::UserSettings::Key::DisableValidation, true);
+  if (arser.get<bool>("--generate_profile_data"))
+    settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
 
   std::string input_path = arser.get<std::string>("input");
   std::string output_path = arser.get<std::string>("output");
 
-  // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data;
+  if (arser["--sparsify_tensor"])
+  {
+    options->enable(Algorithms::SparsifyTensorPass);
+    options->param(AlgorithmParameters::Sparsify_tensor_name,
+                   arser.get<std::string>("--sparsify_tensor"));
+    options->param(AlgorithmParameters::Sparsify_traversal_order,
+                   arser.get<std::string>("--sparsify_traversal_order"));
+    options->param(AlgorithmParameters::Sparsify_format,
+                   arser.get<std::string>("--sparsify_format"));
+    if (arser["--sparsify_block_size"])
+      options->param(AlgorithmParameters::Sparsify_block_size,
+                     arser.get<std::string>("--sparsify_block_size"));
+    else
+    {
+      std::cerr << "ERROR: Block size not provided" << std::endl;
+      return 255;
+    }
+    options->param(AlgorithmParameters::Sparsify_block_map,
+                   arser.get<std::string>("--sparsify_block_map"));
+  }
 
-  try
+  if (arser.get<bool>("--convert_nchw_to_nhwc"))
   {
-    model_data = file_loader.load();
+    options->enable(Algorithms::ConvertNCHWToNHWC);
+    if (arser.get<bool>("--nchw_to_nhwc_input_shape"))
+      options->param(AlgorithmParameters::NCHW_to_NHWC_input_shape, "true");
+    if (arser.get<bool>("--nchw_to_nhwc_output_shape"))
+      options->param(AlgorithmParameters::NCHW_to_NHWC_output_shape, "true");
   }
-  catch (const std::runtime_error &err)
+
+  // Change output nodes
+  bool change_outputs = false;
+  std::vector<std::string> new_outputs;
+  if (arser["--change_outputs"])
   {
-    std::cerr << err.what() << std::endl;
-    return EXIT_FAILURE;
+    change_outputs = true;
+    auto csv_nodes = arser.get<std::string>("--change_outputs");
+    csv_tokenize(csv_nodes, new_outputs);
   }
 
-  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
-  if (!circle::VerifyModelBuffer(verifier))
+  bool dynamic_batch_to_single_batch = false;
+  if (arser.get<bool>("--dynamic_batch_to_single_batch"))
   {
-    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    dynamic_batch_to_single_batch = true;
+  }
+
+  // Import from input Circle file
+  luci::ImporterEx importerex;
+  auto module = importerex.importVerifyModule(input_path);
+  if (module.get() == nullptr)
     return EXIT_FAILURE;
+
+  // Convert dynamic batch to single batch
+  // Why here? It has to be done before 'optimize', because most optimization
+  // passes are written based on static shapes
+  if (dynamic_batch_to_single_batch)
+  {
+    luci::dynamic_batch_to_single_batch(module.get());
+
+    if (!luci::validate_shape(module.get()))
+    {
+      if (settings->get(luci::UserSettings::Key::DisableValidation))
+        std::cerr
+          << "WARNING: Invalid shape detected after converting dynamic batch to single batch"
+          << std::endl;
+      else
+      {
+        std::cerr << "ERROR: Invalid shape detected after converting dynamic batch to single batch"
+                  << std::endl;
+        return 255;
+      }
+    }
   }
 
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
+  if (change_outputs)
   {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
-    return EXIT_FAILURE;
+    auto graph = module->graph(0);
+    luci::change_outputs(graph, new_outputs);
   }
 
-  // Import from input Circle file
-  luci::Importer importer;
-  auto module = importer.importModule(circle_model);
+  // call luci optimizations for module
+  optimizer.optimize(module.get());
 
   for (size_t idx = 0; idx < module->size(); ++idx)
   {
     auto graph = module->graph(idx);
 
-    // call luci optimizations
+    // call luci optimizations for graph
     optimizer.optimize(graph);
+    optimizer.sparsify(graph);
 
     if (!luci::validate(graph))
     {
diff --git a/compiler/circle2circle/src/TestHelper.h b/compiler/circle2circle/src/TestHelper.h
index f4dbe23a9..1e055b217 100644
--- a/compiler/circle2circle/src/TestHelper.h
+++ b/compiler/circle2circle/src/TestHelper.h
@@ -39,7 +39,7 @@ public:
   {
     assert(_ptr < N);
     _argv[_ptr] = new char[strlen(in) + 1];
-    strcpy(_argv[_ptr], in);
+    strncpy(_argv[_ptr], in, strlen(in) + 1);
     _ptr++;
   }
 
@@ -47,7 +47,7 @@ public:
 
 private:
   pchar_t _argv[N] = {
-      nullptr,
+    nullptr,
   };
   size_t _ptr = 0;
 };
diff --git a/compiler/circlechef/CMakeLists.txt b/compiler/circlechef/CMakeLists.txt
index 3e2ddcbb3..56c501c24 100644
--- a/compiler/circlechef/CMakeLists.txt
+++ b/compiler/circlechef/CMakeLists.txt
@@ -1,12 +1,14 @@
 nnas_find_package(Protobuf QUIET)
 
 if(NOT Protobuf_FOUND)
+  message(STATUS "circlechef: SKIP (missing Protobuf)")
   return()
 endif(NOT Protobuf_FOUND)
 
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle06)
+  message(STATUS "circlechef: SKIP (missing mio-circle06)")
   return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle06)
 
 # Recipe Parser
 add_subdirectory(proto)
diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt
index 2ca016b84..cdd6040b7 100644
--- a/compiler/circlechef/circle/CMakeLists.txt
+++ b/compiler/circlechef/circle/CMakeLists.txt
@@ -4,7 +4,7 @@ add_library(circlechef_circle STATIC ${SOURCES})
 target_include_directories(circlechef_circle PUBLIC include)
 target_include_directories(circlechef_circle PRIVATE src)
 target_link_libraries(circlechef_circle circlechef_proto)
-target_link_libraries(circlechef_circle mio_circle)
-target_link_libraries(circlechef_circle stdex)
+target_link_libraries(circlechef_circle mio_circle06)
+target_link_libraries(circlechef_circle mio_circle06_helper)
 target_link_libraries(circlechef_circle cwrap)
 target_link_libraries(circlechef_circle souschef)
diff --git a/compiler/circlechef/circle/src/CircleImport.cpp b/compiler/circlechef/circle/src/CircleImport.cpp
index e970fbce3..f8756ef94 100644
--- a/compiler/circlechef/circle/src/CircleImport.cpp
+++ b/compiler/circlechef/circle/src/CircleImport.cpp
@@ -18,38 +18,13 @@
 
 #include "Convert.h"
 
+#include <mio_circle/Helper.h>
+
 #include <sstream>
 
 namespace circlechef
 {
 
-const char *kEmptyTensorName = "(noname)";
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
-  return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-  return kEmptyTensorName;
-}
-
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
 CircleImport::CircleImport(const circle::Model *model)
 {
   _subgraphs = model->subgraphs();
@@ -92,7 +67,7 @@ circle::BuiltinOperator CircleImport::builtin_code(const circle::Operator *op) c
   assert(index < _op_codes.size());
   const circle::OperatorCode *opcode = _op_codes.at(index);
 
-  return opcode->builtin_code();
+  return mio::circle::builtin_code_neutral(opcode);
 }
 
 std::string CircleImport::opcode_name(const circle::Operator *op) const
@@ -101,14 +76,14 @@ std::string CircleImport::opcode_name(const circle::Operator *op) const
   assert(index < _op_codes.size());
   const circle::OperatorCode *opcode = _op_codes.at(index);
 
-  if (!is_valid(opcode))
+  if (!mio::circle::is_valid(opcode))
   {
     std::ostringstream oss;
     oss << "(invalid: " << index << ")";
     return oss.str();
   }
 
-  if (is_custom(opcode))
+  if (mio::circle::is_custom(opcode))
   {
     if (!opcode->custom_code())
       return "(invalid custom)";
diff --git a/compiler/circlechef/circle/src/CircleImport.h b/compiler/circlechef/circle/src/CircleImport.h
index 23ca29beb..9c1d161b6 100644
--- a/compiler/circlechef/circle/src/CircleImport.h
+++ b/compiler/circlechef/circle/src/CircleImport.h
@@ -34,11 +34,6 @@ using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>
 using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
 using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
 
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-
 /**
  * @brief Loads TF lite file and provides helpers to access attributes
  */
diff --git a/compiler/circlechef/circle/src/Convert.cpp b/compiler/circlechef/circle/src/Convert.cpp
index 77614d9b5..248687fed 100644
--- a/compiler/circlechef/circle/src/Convert.cpp
+++ b/compiler/circlechef/circle/src/Convert.cpp
@@ -33,10 +33,11 @@ circlechef::TensorType as_circlechef_type(const circle::TensorType type)
       return circlechef::UINT8;
     case circle::TensorType_BOOL:
       return circlechef::BOOL;
+    case circle::TensorType_INT16:
+      return circlechef::INT16;
     // TODO handle other types
     // TensorType_FLOAT16
     // TensorType_STRING
-    // TensorType_INT16
     // TensorType_COMPLEX64
     default:
       throw std::runtime_error{"unsupported tensor type"};
diff --git a/compiler/circlechef/circle/src/RecipeChef.cpp b/compiler/circlechef/circle/src/RecipeChef.cpp
index 51326c7f8..e21bca8a6 100644
--- a/compiler/circlechef/circle/src/RecipeChef.cpp
+++ b/compiler/circlechef/circle/src/RecipeChef.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <circlechef/RecipeChef.h>
+#include <mio_circle/Helper.h>
 
 #include "Convert.h"
 #include "CircleImport.h"
@@ -42,7 +43,7 @@ void set_inputs(CircleImport *import, circlechef::Operation *operation, const ci
     else
     {
       auto tensor = tensors->Get(input);
-      std::string name = tensor_name(tensor);
+      std::string name = mio::circle::tensor_name(tensor);
       operation->add_input(name);
     }
   }
@@ -56,7 +57,7 @@ void set_outputs(CircleImport *import, circlechef::Operation *operation, const c
   for (auto output : outputs)
   {
     auto tensor = tensors->Get(output);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
     operation->add_output(name);
   }
 }
@@ -108,7 +109,7 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model)
 
     ::circlechef::Operand *operand = model_recipe->add_operand();
 
-    operand->set_name(tensor_name(tensor));
+    operand->set_name(mio::circle::tensor_name(tensor));
     operand->set_type(as_circlechef_type(tensor->type()));
 
     std::vector<int32_t> dims = as_index_vector(tensor->shape());
@@ -184,6 +185,16 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model)
       circlechef::TensorQuantization *chef_quant = operand->mutable_quant();
       chef_quant->set_quantized_dimension(quant->quantized_dimension());
     }
+
+    auto shape_signature = tensor->shape_signature();
+    if (shape_signature != nullptr)
+    {
+      circlechef::ShapeSignature *chef_shape_signature = operand->mutable_shape_signature();
+      for (uint32_t i = 0; i < shape_signature->size(); ++i)
+      {
+        chef_shape_signature->add_dim(shape_signature->Get(i));
+      }
+    }
   }
 
   // add all operators
@@ -214,14 +225,14 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model)
   for (const auto input : inputs)
   {
     auto tensor = tensors->Get(input);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
 
     model_recipe->add_input(name);
   }
   for (const auto output : outputs)
   {
     auto tensor = tensors->Get(output);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
 
     model_recipe->add_output(name);
   }
diff --git a/compiler/circlechef/core/CMakeLists.txt b/compiler/circlechef/core/CMakeLists.txt
index 54b3ea53d..dc1dbc4dc 100644
--- a/compiler/circlechef/core/CMakeLists.txt
+++ b/compiler/circlechef/core/CMakeLists.txt
@@ -1,9 +1,23 @@
 file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(circlechef_core STATIC ${SOURCES})
 target_include_directories(circlechef_core PUBLIC include)
 target_include_directories(circlechef_core PRIVATE src)
-target_link_libraries(circlechef_core circlechef_proto)
-target_link_libraries(circlechef_core circlechef_log)
-target_link_libraries(circlechef_core mio_circle)
-target_link_libraries(circlechef_core souschef)
+target_link_libraries(circlechef_core PUBLIC circlechef_proto)
+target_link_libraries(circlechef_core PUBLIC circlechef_log)
+target_link_libraries(circlechef_core PUBLIC mio_circle06)
+target_link_libraries(circlechef_core PUBLIC souschef)
+target_link_libraries(circlechef_core PRIVATE nncc_coverage)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circlechef_core_test ${TESTS})
+target_include_directories(circlechef_core_test PRIVATE src)
+target_link_libraries(circlechef_core_test circlechef_core)
+target_link_libraries(circlechef_core_test nncc_coverage)
diff --git a/compiler/circlechef/core/src/Convert.cpp b/compiler/circlechef/core/src/Convert.cpp
index 2db0a6212..6066324b0 100644
--- a/compiler/circlechef/core/src/Convert.cpp
+++ b/compiler/circlechef/core/src/Convert.cpp
@@ -62,8 +62,12 @@ circle::TensorType as_circle_tensortype(const circlechef::TensorType &value)
       return circle::TensorType_UINT8;
     case circlechef::INT64:
       return circle::TensorType_INT64;
+    case circlechef::STRING:
+      return circle::TensorType_STRING;
     case circlechef::BOOL:
       return circle::TensorType_BOOL;
+    case circlechef::INT16:
+      return circle::TensorType_INT16;
     default:
       break;
   }
diff --git a/compiler/circlechef/core/src/Convert.test.cpp b/compiler/circlechef/core/src/Convert.test.cpp
new file mode 100644
index 000000000..b17f5df44
--- /dev/null
+++ b/compiler/circlechef/core/src/Convert.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+#include <gtest/gtest.h>
+
+TEST(ConvertTest, as_circle_padding)
+{
+  ASSERT_EQ(circle::Padding_SAME, as_circle_padding(circlechef::SAME));
+  ASSERT_EQ(circle::Padding_VALID, as_circle_padding(circlechef::VALID));
+}
+
+TEST(ConvertTest, as_circle_padding_NEG)
+{
+  EXPECT_THROW(as_circle_padding(static_cast<circlechef::Padding>(99)), std::runtime_error);
+}
+
+TEST(ConvertTest, as_circle_activation)
+{
+  ASSERT_EQ(circle::ActivationFunctionType_NONE, as_circle_activation(circlechef::NONE));
+  ASSERT_EQ(circle::ActivationFunctionType_RELU, as_circle_activation(circlechef::RELU));
+  ASSERT_EQ(circle::ActivationFunctionType_RELU6, as_circle_activation(circlechef::RELU6));
+}
+
+TEST(ConvertTest, as_circle_activation_NEG)
+{
+  EXPECT_THROW(as_circle_activation(static_cast<circlechef::Activation>(99)), std::runtime_error);
+}
+
+TEST(ConvertTest, as_circle_tensortype)
+{
+  ASSERT_EQ(circle::TensorType_FLOAT32, as_circle_tensortype(circlechef::FLOAT32));
+  ASSERT_EQ(circle::TensorType_INT32, as_circle_tensortype(circlechef::INT32));
+  ASSERT_EQ(circle::TensorType_UINT8, as_circle_tensortype(circlechef::UINT8));
+  ASSERT_EQ(circle::TensorType_INT64, as_circle_tensortype(circlechef::INT64));
+  ASSERT_EQ(circle::TensorType_BOOL, as_circle_tensortype(circlechef::BOOL));
+  ASSERT_EQ(circle::TensorType_INT16, as_circle_tensortype(circlechef::INT16));
+}
+
+TEST(ConvertTest, as_circle_tensortype_NEG)
+{
+  EXPECT_THROW(as_circle_tensortype(static_cast<circlechef::TensorType>(99)), std::runtime_error);
+}
diff --git a/compiler/circlechef/core/src/DataChef.def b/compiler/circlechef/core/src/DataChef.def
new file mode 100644
index 000000000..c634c047e
--- /dev/null
+++ b/compiler/circlechef/core/src/DataChef.def
@@ -0,0 +1,23 @@
+#ifndef DATA_CHEF
+#error "Define DATA_CHEF first"
+#endif // DATA_CHEF
+
+// DATA_CHEF(TYPE, NAME, FACTORY_CLASS)
+//  "TYPE" SHOULD BE an enum tag of tflchef::TensorType
+DATA_CHEF(FLOAT32, constant, ConstantDataChefFactory<float>)
+DATA_CHEF(BOOL, constant, ConstantDataChefFactory<bool>)
+DATA_CHEF(UINT8, constant, ConstantDataChefFactory<uint8_t>)
+DATA_CHEF(INT16, constant, ConstantDataChefFactory<int16_t>)
+DATA_CHEF(INT32, constant, ConstantDataChefFactory<int32_t>)
+DATA_CHEF(INT64, constant, ConstantDataChefFactory<int64_t>)
+DATA_CHEF(INT64, explicit, ExplicitDataChefFactory<int64_t>)
+DATA_CHEF(INT32, explicit, ExplicitDataChefFactory<int32_t>)
+DATA_CHEF(INT16, explicit, ExplicitDataChefFactory<int16_t>)
+DATA_CHEF(UINT8, explicit, ExplicitDataChefFactory<uint8_t>)
+DATA_CHEF(BOOL, explicit, ExplicitDataChefFactory<bool>)
+DATA_CHEF(FLOAT32, explicit, ExplicitDataChefFactory<float>)
+DATA_CHEF(STRING, explicit, ExplicitDataChefFactory<std::string>)
+DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory)
+DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory)
+DATA_CHEF(INT16, gaussian, GaussianInt16DataChefFactory)
+DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory)
diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp
index aa54678ec..6c5206dfc 100644
--- a/compiler/circlechef/core/src/ModelChef.cpp
+++ b/compiler/circlechef/core/src/ModelChef.cpp
@@ -51,7 +51,7 @@ class GeneratedModelImpl final : public circlechef::GeneratedModel::Impl
 {
 public:
   GeneratedModelImpl(std::unique_ptr<flatbuffers::FlatBufferBuilder> &&builder)
-      : _builder{std::move(builder)}
+    : _builder{std::move(builder)}
   {
     // DO NOTHING
   }
@@ -89,7 +89,9 @@ DataChefRegistry &data_chef_registry(const circlechef::TensorType &type)
   static DataChefRegistry s64;
   static DataChefRegistry fp32;
   static DataChefRegistry u8;
+  static DataChefRegistry string;
   static DataChefRegistry boolean;
+  static DataChefRegistry s16;
 
   switch (type)
   {
@@ -101,8 +103,12 @@ DataChefRegistry &data_chef_registry(const circlechef::TensorType &type)
       return fp32;
     case circlechef::UINT8:
       return u8;
+    case circlechef::STRING:
+      return string;
     case circlechef::BOOL:
       return boolean;
+    case circlechef::INT16:
+      return s16;
     default:
       break;
   }
@@ -376,6 +382,13 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
       quant_index = quant_builder.Finish();
     }
 
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature;
+    if (operand.has_shape_signature())
+    {
+      auto signature = as_dims(operand.shape_signature());
+      shape_signature = flatbuffer_builder->CreateVector(signature);
+    }
+
     // Create Tensor
     circle::TensorBuilder tensor_builder{*flatbuffer_builder};
 
@@ -385,6 +398,8 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
     tensor_builder.add_name(name);
     if (operand.has_quant())
       tensor_builder.add_quantization(quant_index);
+    if (operand.has_shape_signature())
+      tensor_builder.add_shape_signature(shape_signature);
 
     // Append!
     tensor_vec.emplace_back(tensor_builder.Finish());
@@ -480,15 +495,15 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
 // Initialize Data Chef Registry
 #define DATA_CHEF(TYPE, NAME, FACTORY_CLASS) \
   data_chef_registry(::circlechef::TYPE)     \
-      .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
-#include <souschef/DataChef.def>
+    .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
+#include "DataChef.def"
 #undef DATA_CHEF
 
   //
   // Create FlatBufferBuilder
   //
   auto flatbuffer_builder =
-      std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+    std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
 
   // Operand-related
   std::vector<flatbuffers::Offset<::circle::Buffer>> buffer_vec;
@@ -501,10 +516,14 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
 
   // Create OperatorCode with Builtin Operator
   std::map<circle::BuiltinOperator, int32_t> builtin_code_map =
-      gather_builtincode_map(model_recipe);
+    gather_builtincode_map(model_recipe);
   for (auto const &opcode : builtin_code_map)
   {
     circle::OperatorCodeBuilder code_builder{*flatbuffer_builder};
+    int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
+    if (opcode.first < circle::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+      dep_code = static_cast<int8_t>(opcode.first);
+    code_builder.add_deprecated_builtin_code(dep_code);
     code_builder.add_builtin_code(opcode.first);
     code_builder.add_version(opcode.second);
     auto code = code_builder.Finish();
@@ -583,7 +602,7 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
 
   // Return "GenerateModel"
   return GeneratedModel{
-      std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
+    std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
 }
 
 } // namespace circlechef
diff --git a/compiler/circlechef/core/src/Op/BCQFullyConnected.cpp b/compiler/circlechef/core/src/Op/BCQFullyConnected.cpp
index 4c82c52cc..497cbb86b 100644
--- a/compiler/circlechef/core/src/Op/BCQFullyConnected.cpp
+++ b/compiler/circlechef/core/src/Op/BCQFullyConnected.cpp
@@ -26,9 +26,9 @@ flatbuffers::Offset<void> BCQFullyConnectedChef::value(flatbuffers::FlatBufferBu
 
   circle::BCQFullyConnectedOptionsBuilder bcq_fully_connected_options_builder{fbb};
   bcq_fully_connected_options_builder.add_weights_hidden_size(
-      operation.bcq_fully_connected_options().weights_hidden_size());
+    operation.bcq_fully_connected_options().weights_hidden_size());
   bcq_fully_connected_options_builder.add_fused_activation_function(
-      as_circle_activation(operation.bcq_fully_connected_options().activation()));
+    as_circle_activation(operation.bcq_fully_connected_options().activation()));
 
   return bcq_fully_connected_options_builder.Finish().Union();
 }
diff --git a/compiler/circlechef/core/src/Op/BCQGather.cpp b/compiler/circlechef/core/src/Op/BCQGather.cpp
index 08f6f611f..3b343ee66 100644
--- a/compiler/circlechef/core/src/Op/BCQGather.cpp
+++ b/compiler/circlechef/core/src/Op/BCQGather.cpp
@@ -24,7 +24,7 @@ flatbuffers::Offset<void> BCQGatherChef::value(flatbuffers::FlatBufferBuilder &f
 
   circle::BCQGatherOptionsBuilder bcq_gather_options_builder{fbb};
   bcq_gather_options_builder.add_input_hidden_size(
-      operation.bcq_gather_options().input_hidden_size());
+    operation.bcq_gather_options().input_hidden_size());
   bcq_gather_options_builder.add_axis(operation.bcq_gather_options().axis());
 
   return bcq_gather_options_builder.Finish().Union();
diff --git a/compiler/circlechef/core/src/Op/BatchMatMul.cpp b/compiler/circlechef/core/src/Op/BatchMatMul.cpp
index d98c0801a..645571abe 100644
--- a/compiler/circlechef/core/src/Op/BatchMatMul.cpp
+++ b/compiler/circlechef/core/src/Op/BatchMatMul.cpp
@@ -24,9 +24,9 @@ flatbuffers::Offset<void> BatchMatMulChef::value(flatbuffers::FlatBufferBuilder
 
   circle::BatchMatMulOptionsBuilder batch_matmul_options_options_builder{fbb};
   batch_matmul_options_options_builder.add_adjoint_lhs(
-      operation.batch_matmul_options().adjoint_lhs());
+    operation.batch_matmul_options().adjoint_lhs());
   batch_matmul_options_options_builder.add_adjoint_rhs(
-      operation.batch_matmul_options().adjoint_rhs());
+    operation.batch_matmul_options().adjoint_rhs());
 
   return batch_matmul_options_options_builder.Finish().Union();
 }
diff --git a/compiler/circlechef/proto/circlechef.proto b/compiler/circlechef/proto/circlechef.proto
index 3e5e6b168..d5e08576f 100644
--- a/compiler/circlechef/proto/circlechef.proto
+++ b/compiler/circlechef/proto/circlechef.proto
@@ -18,13 +18,19 @@ enum TensorType {
   INT32 = 2;
   UINT8 = 3;
   INT64 = 4;
+  STRING = 5;
   BOOL = 6;
+  INT16 = 7;
 }
 
 message TensorShape {
   repeated uint32 dim = 3;
 }
 
+message ShapeSignature {
+  repeated int32 dim = 1;
+}
+
 message TensorFiller {
   optional string tag = 1;
   repeated string arg = 2;
@@ -44,6 +50,7 @@ message Operand {
   optional TensorShape shape = 3;
   optional TensorFiller filler = 4;
   optional TensorQuantization quant = 5;
+  optional ShapeSignature shape_signature = 8;
 }
 
 // This enum value corresponds to Padding in TensorFlow Lite schema
diff --git a/compiler/circlechef/requires.cmake b/compiler/circlechef/requires.cmake
index 2106146d7..67eaa278c 100644
--- a/compiler/circlechef/requires.cmake
+++ b/compiler/circlechef/requires.cmake
@@ -1,9 +1,10 @@
 require("arser")
 require("nnkit")
 require("cwrap")
-require("mio-circle")
+require("mio-circle06")
 require("safemain")
 require("hermes")
 require("hermes-std")
 require("foder")
 require("souschef")
+require("circle-verify")
diff --git a/compiler/circlechef/tests/CMakeLists.txt b/compiler/circlechef/tests/CMakeLists.txt
index 4dc58addf..7ae619f8b 100644
--- a/compiler/circlechef/tests/CMakeLists.txt
+++ b/compiler/circlechef/tests/CMakeLists.txt
@@ -3,6 +3,15 @@ set(CIRCLERECIPES_DIR "${CircleRecipes_DIR}")
 
 file(GLOB RECIPES RELATIVE ${CIRCLERECIPES_DIR} "${CIRCLERECIPES_DIR}/*/test.recipe")
 
+set(CIRCLECHEF_FILE_PATH $<TARGET_FILE:circlechef-file>)
+set(CIRCLECHEF_REVERSE_PATH $<TARGET_FILE:circlechef-reverse>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  # TODO use better way to represent path for host executable
+  set(CIRCLECHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/file/circlechef-file)
+  set(CIRCLECHEF_REVERSE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/reverse/circlechef-reverse)
+  message(STATUS "CIRCLECHEF_FILE_PATH = ${CIRCLECHEF_FILE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+
 foreach(RECIPE IN ITEMS ${RECIPES})
   get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
 
@@ -18,8 +27,34 @@ foreach(RECIPE IN ITEMS ${RECIPES})
 
   # Generate .circle
   add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
-                     COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
-                     DEPENDS circlechef-file ${RECIPE_SOURCE_FILE}
+                     COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
+                     COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
+
+  list(APPEND TESTS ${RECIPE_PREFIX})
+  list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE})
+endforeach(RECIPE)
+
+# Add local files
+file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe")
+
+foreach(RECIPE IN ITEMS ${RECIPES})
+  get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
+
+  set(RECIPE_SOURCE_FILE "${RECIPE_PREFIX}.recipe")
+  set(RECIPE_OUTPUT_FILE "${RECIPE_PREFIX}.circle")
+
+  # Copy .recipe
+  add_custom_command(OUTPUT ${RECIPE_SOURCE_FILE}
+                     COMMAND ${CMAKE_COMMAND} -E copy_if_different
+                             "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}" ${RECIPE_SOURCE_FILE}
+                     DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}"
+                     COMMENT "Generating ${RECIPE_SOURCE_FILE}")
+
+  # Generate .circle
+  add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
+                     COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
                      COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
 
   list(APPEND TESTS ${RECIPE_PREFIX})
@@ -42,16 +77,41 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES})
 
   # Generate .gen.recipe from generated .circle
   add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
-                     COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
-                     DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE}
+                     COMMAND ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+                     DEPENDS ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
                      COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
 
   # now we are going to generate .gen.circle from .gen.recipe
   # to check generated .gen.recipe file is correct by using it.
   # as weight values may be different, binary comparision is not acceptable.
   add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
-                     COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
-                     DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
+
+  list(APPEND TESTS ${CIRCLE_PREFIX}.gen)
+  list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2})
+endforeach(CIRCLEFILE)
+
+# Test local circlechef-reverse
+file(GLOB GEN_CIRCLEFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.reverse")
+
+foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES})
+  get_filename_component(CIRCLE_PREFIX ${CIRCLEFILE} DIRECTORY)
+
+  set(RECIPE_OUTPUT_FILE "${CIRCLE_PREFIX}.circle")
+  set(RECIPE_GEN_OUTPUT_FILE "${CIRCLE_PREFIX}.gen.recipe")
+  set(RECIPE_GEN_OUTPUT_FILE2 "${CIRCLE_PREFIX}.gen.circle")
+
+  # Generate .gen.recipe from generated .circle
+  add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMAND ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+                     DEPENDS ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
+                     COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
+
+  add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
+                     COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
                      COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
 
   list(APPEND TESTS ${CIRCLE_PREFIX}.gen)
diff --git a/compiler/circlechef/tests/shape_signature/test.recipe b/compiler/circlechef/tests/shape_signature/test.recipe
new file mode 100644
index 000000000..37968ab0b
--- /dev/null
+++ b/compiler/circlechef/tests/shape_signature/test.recipe
@@ -0,0 +1,45 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+  shape_signature { dim: -1 dim: 8 dim: 6 dim: 12 }
+}
+operand {
+  name: "gamma"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "beta"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+  shape_signature { dim: -1 dim: 8 dim: 6 dim: 12 }
+}
+operation {
+  type: "InstanceNorm"
+  input: "ifm"
+  input: "gamma"
+  input: "beta"
+  output: "ofm"
+  instance_norm_options {
+    epsilon: 0.00001
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/tests/tools/tflite_benchmark_model/.FORMATDENY b/compiler/circlechef/tests/shape_signature/test.reverse
index e69de29bb..e69de29bb 100644
--- a/tests/tools/tflite_benchmark_model/.FORMATDENY
+++ b/compiler/circlechef/tests/shape_signature/test.reverse
diff --git a/compiler/circlechef/tests/short_int_datatype/test.recipe b/compiler/circlechef/tests/short_int_datatype/test.recipe
new file mode 100644
index 000000000..e0f582527
--- /dev/null
+++ b/compiler/circlechef/tests/short_int_datatype/test.recipe
@@ -0,0 +1,32 @@
+operand {
+  name: "ifm1"
+  type: INT16
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "constant"
+  type: INT16
+  shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "3.0"
+    arg: "10.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: INT16
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+  type: "BatchMatMul"
+  input: "ifm1"
+  input: "constant"
+  output: "ofm"
+  batch_matmul_options {
+    adjoint_lhs: false
+    adjoint_rhs: false
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/compiler/circlechef/tests/short_int_datatype/test.reverse b/compiler/circlechef/tests/short_int_datatype/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/circlechef/tests/short_int_datatype/test.reverse
diff --git a/compiler/circlechef/tests/string_tensor/test.recipe b/compiler/circlechef/tests/string_tensor/test.recipe
new file mode 100644
index 000000000..8ab5bbce0
--- /dev/null
+++ b/compiler/circlechef/tests/string_tensor/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm"
+  shape { }
+  type: STRING
+}
+operand {
+  name: "constant"
+  type: STRING
+  shape { }
+  filler {
+    tag: "explicit"
+    arg: "Hello"
+  }
+}
+operand {
+  name: "ofm"
+  type: STRING
+  shape { }
+}
+operation {
+  type: "BatchMatMul"
+  input: "ifm"
+  input: "constant"
+  output: "ofm"
+  batch_matmul_options {
+    adjoint_lhs: false
+    adjoint_rhs: false
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/circlechef/tools/console/CMakeLists.txt b/compiler/circlechef/tools/console/CMakeLists.txt
index 10168fca3..faf0a94f0 100644
--- a/compiler/circlechef/tools/console/CMakeLists.txt
+++ b/compiler/circlechef/tools/console/CMakeLists.txt
@@ -1,3 +1,12 @@
 add_executable(circlechef Driver.cpp)
 target_link_libraries(circlechef circlechef_core)
 target_link_libraries(circlechef safemain)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circlechef_test Driver.test.cpp Driver.cpp)
+target_link_libraries(circlechef_test circlechef_core)
diff --git a/compiler/circlechef/tools/console/Driver.cpp b/compiler/circlechef/tools/console/Driver.cpp
index 0909f5927..6aa4c3cc5 100644
--- a/compiler/circlechef/tools/console/Driver.cpp
+++ b/compiler/circlechef/tools/console/Driver.cpp
@@ -22,7 +22,7 @@
 
 #include <iostream>
 
-int entry(int argc, char **argv)
+int entry_stream(std::istream &is)
 {
   int32_t model_version = 1;
 
@@ -30,7 +30,7 @@ int entry(int argc, char **argv)
 
   // Read a model recipe from standard input
   {
-    google::protobuf::io::IstreamInputStream iis{&std::cin};
+    google::protobuf::io::IstreamInputStream iis{&is};
     if (!google::protobuf::TextFormat::Parse(&iis, &model_recipe))
     {
       std::cerr << "ERROR: Failed to parse recipe" << std::endl;
@@ -56,3 +56,9 @@ int entry(int argc, char **argv)
 
   return 0;
 }
+
+int entry(int, char **)
+{
+  // forward to entry_stream
+  return entry_stream(std::cin);
+}
diff --git a/compiler/circlechef/tools/console/Driver.test.cpp b/compiler/circlechef/tools/console/Driver.test.cpp
new file mode 100644
index 000000000..d8e4e657e
--- /dev/null
+++ b/compiler/circlechef/tools/console/Driver.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// entry function to test from Driver.cpp
+int entry_stream(std::istream &is);
+
+TEST(CircleChefDriverTest, entry_empty_NEG)
+{
+  std::istringstream empty_input("");
+
+  ASSERT_EQ(0, entry_stream(empty_input));
+}
+
+TEST(CircleChefDriverTest, entry_invaid_NEG)
+{
+  std::istringstream empty_input("invalid: input");
+
+  ASSERT_NE(0, entry_stream(empty_input));
+}
+
+TEST(CircleChefDriverTest, entry_invaid_version_NEG)
+{
+  std::istringstream empty_input("version: 9999");
+
+  ASSERT_NE(0, entry_stream(empty_input));
+}
diff --git a/compiler/circlechef/tools/file/Driver.cpp b/compiler/circlechef/tools/file/Driver.cpp
index bcc0c7ae9..9c4256b40 100644
--- a/compiler/circlechef/tools/file/Driver.cpp
+++ b/compiler/circlechef/tools/file/Driver.cpp
@@ -28,10 +28,8 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("recipe")
-      .type(arser::DataType::STR)
-      .help("Source recipe file path to convert");
-  arser.add_argument("circle").type(arser::DataType::STR).help("Target circle file path");
+  arser.add_argument("recipe").help("Source recipe file path to convert");
+  arser.add_argument("circle").help("Target circle file path");
 
   try
   {
diff --git a/compiler/circlechef/tools/reverse/Driver.cpp b/compiler/circlechef/tools/reverse/Driver.cpp
index 8a2b85fc7..c8ef07c6f 100644
--- a/compiler/circlechef/tools/reverse/Driver.cpp
+++ b/compiler/circlechef/tools/reverse/Driver.cpp
@@ -25,10 +25,8 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("circle")
-      .type(arser::DataType::STR)
-      .help("Source circle file path to convert");
-  arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
+  arser.add_argument("circle").help("Source circle file path to convert");
+  arser.add_argument("recipe").help("Target recipe file path");
 
   try
   {
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt
index fb72b1d66..b7326730a 100644
--- a/compiler/circledump/CMakeLists.txt
+++ b/compiler/circledump/CMakeLists.txt
@@ -1,6 +1,7 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle06)
+  message(STATUS "Skip circledump: mio_circle06 not found")
   return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle06)
 
 set(DRIVER "driver/Driver.cpp")
 
@@ -9,6 +10,9 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 add_executable(circledump ${DRIVER} ${SOURCES})
 target_include_directories(circledump PRIVATE include)
 target_link_libraries(circledump arser)
-target_link_libraries(circledump mio_circle)
+target_link_libraries(circledump foder)
+target_link_libraries(circledump mio_circle06)
+target_link_libraries(circledump mio_circle06_helper)
 target_link_libraries(circledump safemain)
-target_link_libraries(circledump flatbuffers)
+
+install(TARGETS circledump DESTINATION bin)
diff --git a/compiler/circledump/README.md b/compiler/circledump/README.md
index 686e918ac..f71194b08 100644
--- a/compiler/circledump/README.md
+++ b/compiler/circledump/README.md
@@ -65,7 +65,6 @@ O T(3) ofm
 
 ### Dependency
 
-- mio-circle
+- mio-circle06
 - safemain
-- stdex
 - FlatBuffers
diff --git a/compiler/circledump/driver/Driver.cpp b/compiler/circledump/driver/Driver.cpp
index 657f24fe0..5b0871a91 100644
--- a/compiler/circledump/driver/Driver.cpp
+++ b/compiler/circledump/driver/Driver.cpp
@@ -15,7 +15,7 @@
  */
 
 #include <arser/arser.h>
-#include <circleread/Model.h>
+#include <foder/FileLoader.h>
 #include <circledump/Dump.h>
 
 #include <iostream>
@@ -23,7 +23,7 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to dump");
+  arser.add_argument("circle").help("Circle file path to dump");
 
   try
   {
@@ -38,14 +38,10 @@ int entry(int argc, char **argv)
 
   std::string circle_path = arser.get<std::string>("circle");
   // Load Circle model from a circle file
-  std::unique_ptr<circleread::Model> model = circleread::load_circle(circle_path);
-  if (model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl;
-    return 255;
-  }
-
-  const circle::Model *circlemodel = model->model();
+  foder::FileLoader fileLoader{circle_path};
+  std::vector<char> modelData = fileLoader.load();
+  const circle::Model *circlemodel = circle::GetModel(modelData.data());
+  // const circle::Model *circlemodel = model->model();
   if (circlemodel == nullptr)
   {
     std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl;
diff --git a/compiler/circledump/include/circleread/Model.h b/compiler/circledump/include/circleread/Model.h
deleted file mode 100644
index 234db8b4c..000000000
--- a/compiler/circledump/include/circleread/Model.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEREAD_MODEL_H__
-#define __CIRCLEREAD_MODEL_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <memory>
-
-namespace circleread
-{
-
-struct Model
-{
-  virtual ~Model() = default;
-
-  virtual const ::circle::Model *model(void) const = 0;
-};
-
-/**
- * @brief Load Circle model (as a raw Model) from a given path
- *
- * @note May return a nullptr
- */
-std::unique_ptr<Model> load_circle(const std::string &path);
-
-} // namespace circleread
-
-#endif // __CIRCLEREAD_MODEL_H__
diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake
index 81e0f0dbd..b3a2638ef 100644
--- a/compiler/circledump/requires.cmake
+++ b/compiler/circledump/requires.cmake
@@ -1,3 +1,4 @@
 require("arser")
-require("mio-circle")
+require("foder")
+require("mio-circle06")
 require("safemain")
diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp
index c695b0721..69427a20e 100644
--- a/compiler/circledump/src/Dump.cpp
+++ b/compiler/circledump/src/Dump.cpp
@@ -15,9 +15,11 @@
  */
 
 #include <circledump/Dump.h>
+#include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
 
-#include "Read.h"
 #include "OpPrinter.h"
+#include "MetadataPrinter.h"
 
 #include <ostream>
 
@@ -73,10 +75,34 @@ std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
   return os;
 }
 
-template <typename T> void dump_fbvect(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
+template <typename T>
+void dump_fbvect(std::ostream &os, const flatbuffers::Vector<T> *fbvect, uint32_t size)
+{
+  for (uint32_t q = 0; q < size; q++)
+  {
+    if (q)
+      os << ", ";
+    os << fbvect->Get(q);
+  }
+}
+
+template <>
+void dump_fbvect(std::ostream &os, const flatbuffers::Vector<uint8_t> *fbvect, uint32_t size)
+{
+  assert(fbvect);
+  for (uint32_t q = 0; q < size; q++)
+  {
+    if (q)
+      os << ", ";
+    os << static_cast<uint32_t>(fbvect->Get(q));
+  }
+}
+
+template <typename T>
+std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
 {
   if (fbvect == nullptr)
-    return;
+    return os;
 
   bool ellipsis = (fbvect->size() > 4);
   auto limit_size = ellipsis ? 4 : fbvect->size();
@@ -85,26 +111,18 @@ template <typename T> void dump_fbvect(std::ostream &os, const flatbuffers::Vect
   {
     os << "(" << fbvect->size() << ") ";
   }
-  for (uint32_t q = 0; q < limit_size; q++)
-  {
-    if (q)
-      os << ", ";
-    os << fbvect->Get(q);
-  }
+
+  dump_fbvect(os, fbvect, limit_size);
+
   if (ellipsis)
   {
     os << " ... ";
   }
-}
 
-template <typename T>
-std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
-{
-  dump_fbvect(os, fbvect);
   return os;
 }
 
-void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
+void dump_sub_graph(std::ostream &os, mio::circle::Reader &reader)
 {
   auto tensors = reader.tensors();
   auto operators = reader.operators();
@@ -124,7 +142,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
 
   // dump operands(tensors)
   os << "Operands: T(subgraph index : tensor index) TYPE (shape) (shape_signature) "
-     << "B(buffer index) OperandName" << std::endl;
+     << "B(buffer index) (variable) OperandName" << std::endl;
   for (uint32_t i = 0; i < tensors->Length(); ++i)
   {
     // TODO refactor to some better structure
@@ -132,18 +150,22 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
     std::vector<int32_t> dims = {-1};
 
     if (tensor->shape())
-      dims = circleread::as_index_vector(tensor->shape());
+      dims = mio::circle::as_index_vector(tensor->shape());
 
-    os << "T(" << reader.subgraph_index() << ":" << i << ") " << circleread::tensor_type(tensor)
+    os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::circle::tensor_type(tensor)
        << " ";
     os << "(" << dims << ") ";
     if (tensor->shape_signature())
     {
-      std::vector<int32_t> dims_sig = circleread::as_index_vector(tensor->shape_signature());
+      std::vector<int32_t> dims_sig = mio::circle::as_index_vector(tensor->shape_signature());
       os << "(" << dims_sig << ") ";
     }
     os << "B(" << tensor->buffer() << ") ";
-    os << circleread::tensor_name(tensor) << std::endl;
+    if (tensor->is_variable())
+    {
+      os << "(variable) ";
+    }
+    os << mio::circle::tensor_name(tensor) << std::endl;
 
     if (auto q_params = tensor->quantization())
     {
@@ -182,8 +204,90 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
         os << std::endl;
       }
     }
+
+    if (const auto &s_params = tensor->sparsity())
+    {
+      std::string strsparsity = "    Sparsity: ";
+      std::string strsindent(strsparsity.size(), ' ');
+      os << strsparsity;
+
+      if (s_params->traversal_order())
+      {
+        os << "traversal_order(" << s_params->traversal_order() << ") ";
+        os << std::endl << strsindent;
+      }
+      if (s_params->block_map())
+      {
+        os << "block_map(" << s_params->block_map() << ") ";
+        os << std::endl << strsindent;
+      }
+      if (const auto &dim_metadata = s_params->dim_metadata())
+      {
+        uint32_t idx = 0;
+        for (const auto &dm : *dim_metadata)
+        {
+          std::string strdm = "dim_metadata[" + std::to_string(idx++) + "]: ";
+          std::string strdm_indent = strsindent + std::string(strdm.size(), ' ');
+          os << strdm;
+
+          os << "format(" << circle::EnumNameDimensionType(dm->format()) << ") ";
+          os << std::endl << strdm_indent;
+
+          os << "dense_size(" << dm->dense_size() << ") ";
+          os << std::endl << strdm_indent;
+
+          os << "array_segments_type("
+             << circle::EnumNameSparseIndexVector(dm->array_segments_type()) << ") ";
+          os << std::endl << strdm_indent;
+
+          os << "array_segments(";
+          switch (dm->array_segments_type())
+          {
+            case circle::SparseIndexVector_NONE:
+              // DO NOTHING
+              break;
+            case circle::SparseIndexVector_Int32Vector:
+              os << dm->array_segments_as_Int32Vector()->values();
+              break;
+            case circle::SparseIndexVector_Uint16Vector:
+              os << dm->array_segments_as_Uint16Vector()->values();
+              break;
+            case circle::SparseIndexVector_Uint8Vector:
+              os << dm->array_segments_as_Uint8Vector()->values();
+              break;
+            default:
+              throw std::runtime_error("Invalid SparseIndexVector type of array_segments");
+          }
+          os << ")" << std::endl << strdm_indent;
+
+          os << "array_indices_type(" << circle::EnumNameSparseIndexVector(dm->array_indices_type())
+             << ") ";
+          os << std::endl << strdm_indent;
+
+          os << "array_indices(";
+          switch (dm->array_indices_type())
+          {
+            case circle::SparseIndexVector_NONE:
+              // DO NOTHING
+              break;
+            case circle::SparseIndexVector_Int32Vector:
+              os << dm->array_indices_as_Int32Vector()->values();
+              break;
+            case circle::SparseIndexVector_Uint16Vector:
+              os << dm->array_indices_as_Uint16Vector()->values();
+              break;
+            case circle::SparseIndexVector_Uint8Vector:
+              os << dm->array_indices_as_Uint8Vector()->values();
+              break;
+            default:
+              throw std::runtime_error("Invalid SparseIndexVector type of array_indices");
+          }
+          os << ")" << std::endl << strsindent;
+        }
+      }
+    }
+    os << std::endl;
   }
-  os << std::endl;
 
   // dump operators
   os << "Operators: O(subgraph index : operator index) OpCodeName " << std::endl;
@@ -195,8 +299,8 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
     const auto op = operators->Get(i);
     circle::BuiltinOperator builtincode = reader.builtin_code(op);
 
-    const std::vector<int32_t> &inputs = circleread::as_index_vector(op->inputs());
-    const std::vector<int32_t> &outputs = circleread::as_index_vector(op->outputs());
+    const std::vector<int32_t> &inputs = mio::circle::as_index_vector(op->inputs());
+    const std::vector<int32_t> &outputs = mio::circle::as_index_vector(op->outputs());
     auto op_name = reader.opcode_name(op);
 
     os << "O(" << reader.subgraph_index() << ":" << i << ") " << op_name << " ";
@@ -213,7 +317,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
       if (input >= 0)
       {
         auto tensor = tensors->Get(input);
-        os << circleread::tensor_name(tensor);
+        os << mio::circle::tensor_name(tensor);
       }
       os << std::endl;
     }
@@ -223,7 +327,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
       if (output >= 0)
       {
         auto tensor = tensors->Get(output);
-        os << circleread::tensor_name(tensor);
+        os << mio::circle::tensor_name(tensor);
       }
       os << std::endl;
     }
@@ -236,14 +340,14 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
   for (const auto input : reader.inputs())
   {
     auto tensor = tensors->Get(input);
-    std::string name = circleread::tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
     os << "I T(" << reader.subgraph_index() << ":" << input << ") " << name << std::endl;
   }
 
   for (const auto output : reader.outputs())
   {
     auto tensor = tensors->Get(output);
-    std::string name = circleread::tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
     os << "O T(" << reader.subgraph_index() << ":" << output << ") " << name << std::endl;
   }
 
@@ -252,7 +356,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
 
 void dump_model(std::ostream &os, const circle::Model *model)
 {
-  circleread::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   uint32_t num_subgraph = reader.num_subgraph();
 
@@ -264,6 +368,8 @@ void dump_model(std::ostream &os, const circle::Model *model)
 
   auto opcodes = reader.opcodes();
   auto buffers = reader.buffers();
+  auto metadata = reader.metadata();
+  auto signaturedefs = reader.signature_defs();
 
   // dump operator_codes
   os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
@@ -271,11 +377,14 @@ void dump_model(std::ostream &os, const circle::Model *model)
   for (auto opcode : opcodes)
   {
     circle::BuiltinOperator op_code = opcode->builtin_code();
-    auto op_name = circleread::opcode_name(opcode);
+    // cast to int32_t to print as number or int8_t will print as ascii code
+    int32_t dp_code = static_cast<int32_t>(opcode->deprecated_builtin_code());
+
+    auto op_name = mio::circle::opcode_name(opcode);
     auto op_version = opcode->version();
 
     os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
-       << ", version: " << op_version << ")" << std::endl;
+       << ", dep_code: " << dp_code << ", version: " << op_version << ")" << std::endl;
 
     opcode_index++;
   }
@@ -297,6 +406,57 @@ void dump_model(std::ostream &os, const circle::Model *model)
   }
   os << std::endl;
 
+  // dump metadata
+  if (metadata != nullptr)
+  {
+    os << "metadata : B(index) name" << std::endl;
+    for (uint32_t i = 0; i < metadata->Length(); ++i)
+    {
+      const auto buff_id = metadata->Get(i)->buffer();
+      const auto metadata_name = metadata->Get(i)->name()->str();
+      os << "B(" << buff_id << ") " << metadata_name << std::endl;
+
+      const uint8_t *buff_data;
+      reader.buffer_info(buff_id, &buff_data);
+      if (auto meta_prn = MetadataPrinterRegistry::get().lookup(metadata_name))
+      {
+        meta_prn->print(buff_data, os);
+      }
+    }
+    os << std::endl;
+  }
+
+  // dump signaturedef
+  if (signaturedefs != nullptr)
+  {
+    os << "SignatureDef" << std::endl;
+    for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
+    {
+      auto sign_i = signaturedefs->Get(i);
+      os << "S(" << i << ") signature_key(" << sign_i->signature_key()->c_str() << "), sub_graph("
+         << sign_i->subgraph_index() << ")" << std::endl;
+
+      auto inputs_i = sign_i->inputs();
+      for (uint32_t t = 0; t < inputs_i->Length(); ++t)
+      {
+        auto inputs_i_t = inputs_i->Get(t);
+        os << "    I(" << t << ")"
+           << " T(" << sign_i->subgraph_index() << ":" << inputs_i_t->tensor_index() << ") "
+           << inputs_i_t->name()->c_str() << std::endl;
+      }
+
+      auto outputs_i = sign_i->outputs();
+      for (uint32_t t = 0; t < outputs_i->Length(); ++t)
+      {
+        auto outputs_i_t = outputs_i->Get(t);
+        os << "    O(" << t << ")"
+           << " T(" << sign_i->subgraph_index() << ":" << outputs_i_t->tensor_index() << ") "
+           << outputs_i_t->name()->c_str() << std::endl;
+      }
+    }
+    os << std::endl;
+  }
+
   for (uint32_t sg = 0; sg < num_subgraph; ++sg)
   {
     reader.select_subgraph(sg);
diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp
deleted file mode 100644
index ec91ed189..000000000
--- a/compiler/circledump/src/Load.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <circleread/Model.h>
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-namespace
-{
-
-class MemoryMappedModel final : public circleread::Model
-{
-public:
-  /**
-   * @require fd and data SHOULD be valid
-   */
-  explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
-  {
-    // DO NOTHING
-  }
-
-public:
-  ~MemoryMappedModel()
-  {
-    munmap(_data, _size);
-    close(_fd);
-  }
-
-public:
-  MemoryMappedModel(const MemoryMappedModel &) = delete;
-  MemoryMappedModel(MemoryMappedModel &&) = delete;
-
-public:
-  const ::circle::Model *model(void) const override { return ::circle::GetModel(_data); }
-
-private:
-  int _fd = -1;
-  void *_data = nullptr;
-  size_t _size = 0;
-};
-
-class FileDescriptor final
-{
-public:
-  FileDescriptor(int value) : _value{value}
-  {
-    // DO NOTHING
-  }
-
-public:
-  // NOTE Copy is not allowed
-  FileDescriptor(const FileDescriptor &) = delete;
-
-public:
-  // NOTE Move is allowed
-  FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
-
-public:
-  ~FileDescriptor()
-  {
-    if (_value != -1)
-    {
-      // Close on descturction
-      close(_value);
-    }
-  }
-
-public:
-  int value(void) const { return _value; }
-
-public:
-  int release(void)
-  {
-    auto res = _value;
-    _value = -1;
-    return res;
-  }
-
-private:
-  int _value = -1;
-};
-
-} // namespace
-
-namespace circleread
-{
-
-std::unique_ptr<Model> load_circle(const std::string &path)
-{
-  FileDescriptor fd = open(path.c_str(), O_RDONLY);
-
-  if (fd.value() == -1)
-  {
-    // Return nullptr on open failure
-    return nullptr;
-  }
-
-  struct stat st;
-  if (fstat(fd.value(), &st) == -1)
-  {
-    // Return nullptr on fstat failure
-    return nullptr;
-  }
-
-  auto size = st.st_size;
-  auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
-
-  if (data == MAP_FAILED)
-  {
-    // Return nullptr on mmap failure
-    return nullptr;
-  }
-
-  return std::unique_ptr<circleread::Model>{new MemoryMappedModel(fd.release(), data, size)};
-}
-
-} // namespace circleread
diff --git a/compiler/circledump/src/MetadataPrinter.cpp b/compiler/circledump/src/MetadataPrinter.cpp
new file mode 100644
index 000000000..f2df9bc16
--- /dev/null
+++ b/compiler/circledump/src/MetadataPrinter.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetadataPrinter.h"
+
+#include <cassert>
+#include <string>
+#include <vector>
+
+namespace circledump
+{
+
+class SourceTablePrinter : public MetadataPrinter
+{
+public:
+  /**
+   *  source table consists of following parts
+   *  - [ entry_number : uint32_t ]
+   *  - [ id : uint32_t ][ length : uint32_t ][ data : 'length' Bytes ] * entry_number
+   */
+  virtual void print(const uint8_t *buffer, std::ostream &os) const override
+  {
+    if (buffer)
+    {
+      os << "    [node_id : node_name]" << std::endl;
+      auto cur = buffer;
+      // entry number
+      const uint32_t num = *reinterpret_cast<const uint32_t *>(cur);
+      cur += sizeof(uint32_t);
+      for (uint32_t entry = 0; entry < num; entry++)
+      {
+        // id
+        const uint32_t node_id = *reinterpret_cast<const uint32_t *>(cur);
+        cur += sizeof(uint32_t);
+        // length
+        const uint32_t len = *reinterpret_cast<const uint32_t *>(cur);
+        cur += sizeof(uint32_t);
+        assert(len != 0);
+        // data
+        // non-empty 'data' has trailing '\0'. Let's exclude it.
+        std::string node_name = std::string(cur, cur + len - 1);
+        cur += len;
+
+        // print
+        os << "    [" << node_id << " : " << node_name << "]" << std::endl;
+      }
+    }
+  }
+};
+
+class OpTablePrinter : public MetadataPrinter
+{
+public:
+  /**
+   *  op table consists of following parts
+   *  - [ entry_number : uint32_t ]
+   *  - [ id : uint32_t ][ length : uint32_t ][ origin_ids : length * uint32_t ] * entry_number
+   */
+  virtual void print(const uint8_t *buffer, std::ostream &os) const override
+  {
+    if (buffer)
+    {
+      os << "    [node_id : origin_ids]" << std::endl;
+      auto cur = buffer;
+      // entry number
+      const uint32_t num = *reinterpret_cast<const uint32_t *>(cur);
+      cur += sizeof(uint32_t);
+      for (uint32_t entry = 0; entry < num; entry++)
+      {
+        // id
+        const uint32_t node_id = *reinterpret_cast<const uint32_t *>(cur);
+        cur += sizeof(uint32_t);
+        // length
+        const uint32_t len = *reinterpret_cast<const uint32_t *>(cur);
+        cur += sizeof(uint32_t);
+        assert(len != 0);
+        // origin_ids
+        std::vector<uint32_t> origin_ids;
+        for (uint32_t o = 0; o < len; o++)
+        {
+          origin_ids.push_back(*reinterpret_cast<const uint32_t *>(cur));
+          cur += sizeof(uint32_t);
+        }
+
+        // print
+        os << "    [" << node_id << " : ";
+        uint32_t i = 0;
+        for (const auto &id : origin_ids)
+        {
+          if (i++)
+            os << ", ";
+          os << id;
+        }
+        os << "]" << std::endl;
+      }
+    }
+  }
+};
+
+MetadataPrinterRegistry::MetadataPrinterRegistry()
+{
+  _metadata_map["ONE_source_table"] = std::make_unique<SourceTablePrinter>();
+  _metadata_map["ONE_op_table"] = std::make_unique<OpTablePrinter>();
+}
+
+} // namespace circledump
diff --git a/compiler/circledump/src/MetadataPrinter.h b/compiler/circledump/src/MetadataPrinter.h
new file mode 100644
index 000000000..39d92c812
--- /dev/null
+++ b/compiler/circledump/src/MetadataPrinter.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLEDUMP_METADATA_PRINTER_H__
+#define __CIRCLEDUMP_METADATA_PRINTER_H__
+
+#include <ostream>
+#include <string>
+#include <map>
+#include <memory>
+
+namespace circledump
+{
+
+class MetadataPrinter
+{
+public:
+  virtual void print(const uint8_t * /* buffer */, std::ostream &) const = 0;
+  virtual ~MetadataPrinter() = default;
+};
+
+class MetadataPrinterRegistry
+{
+public:
+  MetadataPrinterRegistry();
+
+public:
+  const MetadataPrinter *lookup(std::string op) const
+  {
+    if (_metadata_map.find(op) == _metadata_map.end())
+      return nullptr;
+
+    return _metadata_map.at(op).get();
+  }
+
+public:
+  static MetadataPrinterRegistry &get()
+  {
+    static MetadataPrinterRegistry me;
+    return me;
+  }
+
+private:
+  std::map<std::string /* metadata name */, std::unique_ptr<MetadataPrinter>> _metadata_map;
+};
+
+} // namespace circledump
+
+#endif // __CIRCLEDUMP_METADATA_PRINTER_H__
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp
index a0a063e79..bfcb1ec18 100644
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -15,7 +15,8 @@
  */
 
 #include "OpPrinter.h"
-#include "Read.h"
+
+#include <mio_circle/Helper.h>
 
 #include <memory>
 
@@ -85,6 +86,27 @@ public:
       os << std::boolalpha;
       os << "adjoint_lhs(" << params->adjoint_lhs() << ") ";
       os << "adjoint_rhs(" << params->adjoint_rhs() << ") ";
+      os << std::noboolalpha;
+      os << std::endl;
+    }
+  }
+};
+
+class BidirectionalSequenceLSTMPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_BidirectionalSequenceLSTMOptions())
+    {
+      os << "    ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "cell_clip(" << params->cell_clip() << ") ";
+      os << "proj_clip(" << params->proj_clip() << ") ";
+      os << "time_major(" << params->time_major() << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << "merge_outputs(" << params->merge_outputs() << ") ";
       os << std::endl;
     }
   }
@@ -212,7 +234,7 @@ public:
   {
     if (auto *reshape_params = op->builtin_options_as_ReshapeOptions())
     {
-      auto new_shape = circleread::as_index_vector(reshape_params->new_shape());
+      auto new_shape = mio::circle::as_index_vector(reshape_params->new_shape());
       os << "    ";
       os << "NewShape(" << new_shape << ")";
       os << std::endl;
@@ -231,6 +253,7 @@ public:
       os << std::boolalpha;
       os << "align_corners(" << resize_params->align_corners() << ")";
       os << "half_pixel_centers(" << resize_params->half_pixel_centers() << ")";
+      os << std::noboolalpha;
       os << std::endl;
     }
   }
@@ -246,6 +269,7 @@ public:
       os << "    ";
       os << std::boolalpha;
       os << "align_corners(" << resize_params->align_corners() << ")";
+      os << std::noboolalpha;
       os << std::endl;
     }
   }
@@ -279,7 +303,7 @@ public:
       os << "Stride.H(" << conv_params->stride_h() << ") ";
       os << "DepthMultiplier(" << conv_params->depth_multiplier() << ") ";
       os << "Dilation.W(" << conv_params->dilation_w_factor() << ") ";
-      os << "Dilation.H(" << conv_params->dilation_h_factor() << ")";
+      os << "Dilation.H(" << conv_params->dilation_h_factor() << ") ";
       os << "Activation("
          << EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ") ";
       os << std::endl;
@@ -287,6 +311,25 @@ public:
   }
 };
 
+class FakeQuantPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_FakeQuantOptions())
+    {
+      os << "    ";
+      os << "Min(" << params->min() << ") ";
+      os << "Max(" << params->max() << ") ";
+      os << "NumBits(" << params->num_bits() << ") ";
+      os << std::boolalpha;
+      os << "NarrowRange(" << params->narrow_range() << ") ";
+      os << std::noboolalpha;
+      os << std::endl;
+    }
+  }
+};
+
 class FullyConnectedPrinter : public OpPrinter
 {
 public:
@@ -299,6 +342,7 @@ public:
          << ") ";
       os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
          << ") ";
+      os << "keep_num_dims(" << params->keep_num_dims() << ") ";
 
       os << std::endl;
     }
@@ -320,6 +364,22 @@ public:
   }
 };
 
+class GeluPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_GeluOptions())
+    {
+      os << "    ";
+      os << std::boolalpha;
+      os << "approximate(" << params->approximate() << ") ";
+      os << std::noboolalpha;
+      os << std::endl;
+    }
+  }
+};
+
 class IfPrinter : public OpPrinter
 {
 public:
@@ -577,17 +637,55 @@ public:
   }
 };
 
+class SVDFPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_SVDFOptions())
+    {
+      os << "    ";
+      os << "rank(" << params->rank() << ") ";
+      os << "activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << std::endl;
+    }
+  }
+};
+
 class TransposeConvPrinter : public OpPrinter
 {
 public:
   void options(const circle::Operator *op, std::ostream &os) const override
   {
-    if (auto conv_params = op->builtin_options_as_TransposeConvOptions())
+    if (auto params = op->builtin_options_as_TransposeConvOptions())
     {
       os << "    ";
-      os << "Padding(" << conv_params->padding() << ") ";
-      os << "Stride.W(" << conv_params->stride_w() << ") ";
-      os << "Stride.H(" << conv_params->stride_h() << ") ";
+      os << "Padding(" << params->padding() << ") ";
+      os << "Stride.W(" << params->stride_w() << ") ";
+      os << "Stride.H(" << params->stride_h() << ") ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << std::endl;
+    }
+  }
+};
+
+class UnidirectionalSequenceLSTMPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_UnidirectionalSequenceLSTMOptions())
+    {
+      os << "    ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "cell_clip(" << params->cell_clip() << ") ";
+      os << "proj_clip(" << params->proj_clip() << ") ";
+      os << "time_major(" << params->time_major() << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
       os << std::endl;
     }
   }
@@ -693,6 +791,22 @@ public:
   }
 };
 
+class InstanceNormPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_InstanceNormOptions())
+    {
+      os << "    ";
+      os << "epsilon(" << params->epsilon() << ") ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << std::endl;
+    }
+  }
+};
+
 OpPrinterRegistry::OpPrinterRegistry()
 {
   _op_map[circle::BuiltinOperator_ADD] = make_unique<AddPrinter>();
@@ -701,23 +815,29 @@ OpPrinterRegistry::OpPrinterRegistry()
   _op_map[circle::BuiltinOperator_ARG_MIN] = make_unique<ArgMinPrinter>();
   _op_map[circle::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<Pool2DPrinter>();
   _op_map[circle::BuiltinOperator_BATCH_MATMUL] = make_unique<BatchMatMulPrinter>();
+  _op_map[circle::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM] =
+    make_unique<BidirectionalSequenceLSTMPrinter>();
   _op_map[circle::BuiltinOperator_CAST] = make_unique<CastPrinter>();
   // There is no Option for CEIL
   _op_map[circle::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
   _op_map[circle::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>();
+  // There is no Option for DENSIFY
   _op_map[circle::BuiltinOperator_DEPTH_TO_SPACE] = make_unique<DepthToSpacePrinter>();
   _op_map[circle::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
+  // There is no Option for DEQUANTIZE
   _op_map[circle::BuiltinOperator_DIV] = make_unique<DivPrinter>();
+  _op_map[circle::BuiltinOperator_FAKE_QUANT] = make_unique<FakeQuantPrinter>();
   // There is no Option for FLOOR
   // There is no Option for FLOOR_MOD
   _op_map[circle::BuiltinOperator_FULLY_CONNECTED] = make_unique<FullyConnectedPrinter>();
   _op_map[circle::BuiltinOperator_GATHER] = make_unique<GatherPrinter>();
+  _op_map[circle::BuiltinOperator_GELU] = make_unique<GeluPrinter>();
   _op_map[circle::BuiltinOperator_IF] = make_unique<IfPrinter>();
   _op_map[circle::BuiltinOperator_L2_NORMALIZATION] = make_unique<L2NormPrinter>();
   _op_map[circle::BuiltinOperator_L2_POOL_2D] = make_unique<Pool2DPrinter>();
   _op_map[circle::BuiltinOperator_LEAKY_RELU] = make_unique<LeakyReluPrinter>();
   _op_map[circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION] =
-      make_unique<LocalResponseNormalizationPrinter>();
+    make_unique<LocalResponseNormalizationPrinter>();
   // There is no Option for LOG
   // There is no Option for LOGISTIC
   // There is no Option for LOG_SOFTMAX
@@ -741,7 +861,7 @@ OpPrinterRegistry::OpPrinterRegistry()
   _op_map[circle::BuiltinOperator_RESHAPE] = make_unique<ReshapePrinter>();
   _op_map[circle::BuiltinOperator_RESIZE_BILINEAR] = make_unique<ResizeBilinearPrinter>();
   _op_map[circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR] =
-      make_unique<ResizeNearestNeighborPrinter>();
+    make_unique<ResizeNearestNeighborPrinter>();
   _op_map[circle::BuiltinOperator_REVERSE_SEQUENCE] = make_unique<ReverseSequencePrinter>();
   // There is no Option for ROUND
   // There is no Option for SELECT
@@ -759,8 +879,11 @@ OpPrinterRegistry::OpPrinterRegistry()
   _op_map[circle::BuiltinOperator_STRIDED_SLICE] = make_unique<StridedSlicePrinter>();
   _op_map[circle::BuiltinOperator_SUB] = make_unique<SubPrinter>();
   _op_map[circle::BuiltinOperator_SUM] = make_unique<ReducerPrinter>();
+  _op_map[circle::BuiltinOperator_SVDF] = make_unique<SVDFPrinter>();
   _op_map[circle::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
   // There is no Option for TOPK_V2
+  _op_map[circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
+    make_unique<UnidirectionalSequenceLSTMPrinter>();
   _op_map[circle::BuiltinOperator_UNIQUE] = make_unique<UniquePrinter>();
   _op_map[circle::BuiltinOperator_WHILE] = make_unique<WhilePrinter>();
   _op_map[circle::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
@@ -768,6 +891,7 @@ OpPrinterRegistry::OpPrinterRegistry()
   // Circle only
   _op_map[circle::BuiltinOperator_BCQ_FULLY_CONNECTED] = make_unique<BCQFullyConnectedPrinter>();
   _op_map[circle::BuiltinOperator_BCQ_GATHER] = make_unique<BCQGatherPrinter>();
+  _op_map[circle::BuiltinOperator_INSTANCE_NORM] = make_unique<InstanceNormPrinter>();
 }
 
 } // namespace circledump
diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp
deleted file mode 100644
index 053225536..000000000
--- a/compiler/circledump/src/Read.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Read.h"
-
-#include <sstream>
-#include <string>
-
-namespace circleread
-{
-
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
-  return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
-Reader::Reader(const circle::Model *model)
-{
-  _version = model->version();
-  _subgraphs = model->subgraphs();
-  _buffers = model->buffers();
-
-  auto opcodes = model->operator_codes();
-  for (const ::circle::OperatorCode *opcode : *opcodes)
-  {
-    _op_codes.push_back(opcode);
-  }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
-  *buff_data = nullptr;
-
-  if (buf_idx == 0)
-    return 0;
-
-  if (auto *buffer = (*_buffers)[buf_idx])
-  {
-    if (auto *array = buffer->data())
-    {
-      if (size_t size = array->size())
-      {
-        *buff_data = reinterpret_cast<const uint8_t *>(array->data());
-        return size;
-      }
-    }
-  }
-
-  return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  return opcode->builtin_code();
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid: " << index << ")";
-    return oss.str();
-  }
-
-  return circleread::opcode_name(opcode);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
-  _subgraph_index = sgindex;
-  _tensors = nullptr;
-  _operators = nullptr;
-
-  _inputs.clear();
-  _outputs.clear();
-
-  if (_subgraphs->Length() <= sgindex)
-  {
-    assert(false);
-    return false;
-  }
-
-  const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
-  auto name = subgraph->name();
-  _subgraph_name = name ? name->c_str() : "(noname)";
-
-  _tensors = subgraph->tensors();
-  _operators = subgraph->operators();
-  _data_format = subgraph->data_format();
-
-  _inputs = as_index_vector(subgraph->inputs());
-  _outputs = as_index_vector(subgraph->outputs());
-
-  return true;
-}
-
-} // namespace circleread
diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h
deleted file mode 100644
index dd1ef20b6..000000000
--- a/compiler/circledump/src/Read.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEREAD_READ_H__
-#define __CIRCLEREAD_READ_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circleread
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
-  std::vector<T> ret(flat_array->Length());
-  for (uint32_t i = 0; i < flat_array->Length(); i++)
-  {
-    ret[i] = flat_array->Get(i);
-  }
-  return ret;
-}
-
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
-  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
-  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
-  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
-  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-
-public:
-  Reader(const circle::Model *model);
-
-  Reader() = delete;
-
-public:
-  uint32_t version() const { return _version; }
-
-  const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
-  const CircleBuffers_t *buffers() { return _buffers; }
-  const CircleTensors_t *tensors() { return _tensors; }
-  const CircleOperators_t *operators() { return _operators; }
-  const std::vector<int32_t> &inputs() const { return _inputs; }
-  const std::vector<int32_t> &outputs() const { return _outputs; }
-  const circle::DataFormat &data_format() const { return _data_format; }
-
-  uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
-  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
-  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
-  std::string opcode_name(const circle::Operator *op) const;
-
-public:
-  bool select_subgraph(uint32_t subgraph);
-  const std::string &subgraph_name(void) const { return _subgraph_name; }
-  uint32_t subgraph_index(void) const { return _subgraph_index; }
-
-private:
-  uint32_t _version;
-
-  const CircleSubGraphs_t *_subgraphs{nullptr};
-  const CircleBuffers_t *_buffers{nullptr};
-  const CircleTensors_t *_tensors{nullptr};
-  const CircleOperators_t *_operators{nullptr};
-
-  uint32_t _subgraph_index;
-  std::string _subgraph_name;
-  std::vector<const circle::OperatorCode *> _op_codes;
-  std::vector<int32_t> _inputs;
-  std::vector<int32_t> _outputs;
-  circle::DataFormat _data_format;
-};
-
-} // namespace circleread
-
-#endif // __CIRCLEREAD_READ_H__
diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt
index 22948fff9..4ab0ea218 100644
--- a/compiler/cli/CMakeLists.txt
+++ b/compiler/cli/CMakeLists.txt
@@ -4,12 +4,11 @@ list(APPEND TESTS "src/App.test.cpp")
 add_library(cli ${SOURCES})
 target_include_directories(cli PUBLIC include)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
-GTest_AddTEst(cli_test ${TESTS})
+GTest_AddTest(cli_test ${TESTS})
 target_link_libraries(cli_test cli)
-target_link_libraries(cli_test stdex)
diff --git a/compiler/cli/src/App.test.cpp b/compiler/cli/src/App.test.cpp
index fe2d44179..59e5da3bd 100644
--- a/compiler/cli/src/App.test.cpp
+++ b/compiler/cli/src/App.test.cpp
@@ -16,7 +16,7 @@
 
 #include "cli/App.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -52,7 +52,7 @@ TEST(APP, run)
   cli::App app("test");
 
   std::string args;
-  app.insert("record", stdex::make_unique<RecordCommand>(3, args));
+  app.insert("record", std::make_unique<RecordCommand>(3, args));
 
   const char *argv[] = {"record", "hello", "world"};
 
diff --git a/compiler/coco/core/CMakeLists.txt b/compiler/coco/core/CMakeLists.txt
index 8c6844733..a81d366c9 100644
--- a/compiler/coco/core/CMakeLists.txt
+++ b/compiler/coco/core/CMakeLists.txt
@@ -7,7 +7,6 @@ target_include_directories(coco_core PUBLIC include)
 # NOTE Some coco_core PUBLIC headers include angkor headers
 target_link_libraries(coco_core PUBLIC angkor)
 target_link_libraries(coco_core PRIVATE pepper_assert)
-target_link_libraries(coco_core PRIVATE stdex)
 # Let's apply nncc common compile options
 # NOTE This will enable strict compilation (warnings as error).
 #      Please refer to top-level CMakeLists.txt for details
@@ -22,4 +21,3 @@ nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(coco_core_test ${TESTS})
 target_link_libraries(coco_core_test coco_core)
-target_link_libraries(coco_core_test stdex)
diff --git a/compiler/coco/core/include/coco/IR/FeatureShape.h b/compiler/coco/core/include/coco/IR/FeatureShape.h
index 015fc709d..3c8e9accd 100644
--- a/compiler/coco/core/include/coco/IR/FeatureShape.h
+++ b/compiler/coco/core/include/coco/IR/FeatureShape.h
@@ -31,13 +31,13 @@ class FeatureShape : public nncc::core::ADT::feature::Shape
 {
 public:
   FeatureShape(uint32_t depth, uint32_t height, uint32_t width)
-      : Shape{depth, height, width}, _batch{1}
+    : Shape{depth, height, width}, _batch{1}
   {
     // DO NOTHING
   }
 
   FeatureShape(uint32_t batch, uint32_t depth, uint32_t height, uint32_t width)
-      : Shape{depth, height, width}, _batch{batch}
+    : Shape{depth, height, width}, _batch{batch}
   {
     // DO NOTHING
   }
diff --git a/compiler/coco/core/include/coco/IR/Locatable.h b/compiler/coco/core/include/coco/IR/Locatable.h
index b80a4a360..549802776 100644
--- a/compiler/coco/core/include/coco/IR/Locatable.h
+++ b/compiler/coco/core/include/coco/IR/Locatable.h
@@ -24,7 +24,7 @@ namespace coco
 
 /**
  * @brief Return the associated instruction if exists.
-  */
+ */
 struct Locatable
 {
   virtual ~Locatable() = default;
diff --git a/compiler/coco/core/include/coco/IR/Ops.h b/compiler/coco/core/include/coco/IR/Ops.h
index 01ac92b7f..39dce5272 100644
--- a/compiler/coco/core/include/coco/IR/Ops.h
+++ b/compiler/coco/core/include/coco/IR/Ops.h
@@ -407,6 +407,6 @@ public:
   const Sqrt *asSqrt(void) const override { return this; }
 };
 
-} // namesapce coco
+} // namespace coco
 
 #endif // __COCO_IR_OPS_H__
diff --git a/compiler/coco/core/include/coco/IR/Padding2D.h b/compiler/coco/core/include/coco/IR/Padding2D.h
index b764656cc..68a3481f1 100644
--- a/compiler/coco/core/include/coco/IR/Padding2D.h
+++ b/compiler/coco/core/include/coco/IR/Padding2D.h
@@ -32,7 +32,7 @@ public:
 
 public:
   Padding2D(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
-      : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+    : _top{top}, _bottom{bottom}, _left{left}, _right{right}
   {
     // DO NOTHING
   }
diff --git a/compiler/coco/core/src/ADT/PtrList.test.cpp b/compiler/coco/core/src/ADT/PtrList.test.cpp
index dcbad8b90..904dd6e1d 100644
--- a/compiler/coco/core/src/ADT/PtrList.test.cpp
+++ b/compiler/coco/core/src/ADT/PtrList.test.cpp
@@ -25,7 +25,7 @@ namespace
 struct Object
 {
 };
-}
+} // namespace
 
 TEST(ADT_PTR_LIST, ctor)
 {
diff --git a/compiler/coco/core/src/ADT/PtrManager.test.cpp b/compiler/coco/core/src/ADT/PtrManager.test.cpp
index bb9056f29..5a9f09d4e 100644
--- a/compiler/coco/core/src/ADT/PtrManager.test.cpp
+++ b/compiler/coco/core/src/ADT/PtrManager.test.cpp
@@ -61,7 +61,7 @@ struct ObjectManager final : public coco::PtrManager<Object>
 
   void free(Object *o) { release(o); }
 };
-}
+} // namespace
 
 TEST(ADT_PTR_MANAGER, usecase)
 {
diff --git a/compiler/coco/core/src/IR/BagManager.cpp b/compiler/coco/core/src/IR/BagManager.cpp
index 10fe69d57..8cfb0c09c 100644
--- a/compiler/coco/core/src/IR/BagManager.cpp
+++ b/compiler/coco/core/src/IR/BagManager.cpp
@@ -16,14 +16,14 @@
 
 #include "coco/IR/BagManager.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace coco
 {
 
 Bag *BagManager::create(uint32_t size)
 {
-  auto bag = stdex::make_unique<Bag>(size);
+  auto bag = std::make_unique<Bag>(size);
   modulize(bag.get());
   return take(std::move(bag));
 }
diff --git a/compiler/coco/core/src/IR/BlockManager.cpp b/compiler/coco/core/src/IR/BlockManager.cpp
index 5e3b88173..d1bcacb32 100644
--- a/compiler/coco/core/src/IR/BlockManager.cpp
+++ b/compiler/coco/core/src/IR/BlockManager.cpp
@@ -16,8 +16,7 @@
 
 #include "coco/IR/BlockManager.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace coco
@@ -25,7 +24,7 @@ namespace coco
 
 Block *BlockManager::create(void)
 {
-  auto blk = stdex::make_unique<Block>();
+  auto blk = std::make_unique<Block>();
   modulize(blk.get());
   return take(std::move(blk));
 }
diff --git a/compiler/coco/core/src/IR/Conv2D.test.cpp b/compiler/coco/core/src/IR/Conv2D.test.cpp
index df0a2470b..5bf06ca9f 100644
--- a/compiler/coco/core/src/IR/Conv2D.test.cpp
+++ b/compiler/coco/core/src/IR/Conv2D.test.cpp
@@ -20,11 +20,9 @@
 #include <vector>
 #include <memory>
 
-#include <stdex/Memory.h>
-
 #include <gtest/gtest.h>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
diff --git a/compiler/coco/core/src/IR/Def.test.cpp b/compiler/coco/core/src/IR/Def.test.cpp
index 98455c09e..443fdcb95 100644
--- a/compiler/coco/core/src/IR/Def.test.cpp
+++ b/compiler/coco/core/src/IR/Def.test.cpp
@@ -19,13 +19,13 @@
 
 #include "coco/IR/FeatureObject.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include "Producer.mock.h"
 
 #include <gtest/gtest.h>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
diff --git a/compiler/coco/core/src/IR/InputManager.cpp b/compiler/coco/core/src/IR/InputManager.cpp
index 6d5b9470b..0530deeda 100644
--- a/compiler/coco/core/src/IR/InputManager.cpp
+++ b/compiler/coco/core/src/IR/InputManager.cpp
@@ -16,14 +16,14 @@
 
 #include "coco/IR/InputManager.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace coco
 {
 
 Input *InputManager::create(const nncc::core::ADT::tensor::Shape &shape)
 {
-  auto input = stdex::make_unique<Input>(shape);
+  auto input = std::make_unique<Input>(shape);
   modulize(input.get());
   return take(std::move(input));
 }
diff --git a/compiler/coco/core/src/IR/Module.cpp b/compiler/coco/core/src/IR/Module.cpp
index 0b65ceedc..0db78941c 100644
--- a/compiler/coco/core/src/IR/Module.cpp
+++ b/compiler/coco/core/src/IR/Module.cpp
@@ -16,9 +16,9 @@
 
 #include "coco/IR/Module.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
@@ -144,7 +144,7 @@ std::unique_ptr<Module> Module::create(void)
   m->_input = make_unique<coco::InputList>();
   m->_output = make_unique<coco::OutputList>();
 
-  return std::move(m);
+  return m;
 }
 
 } // namespace coco
diff --git a/compiler/coco/core/src/IR/ObjectManager.cpp b/compiler/coco/core/src/IR/ObjectManager.cpp
index 1b7215a04..38c3a9bcc 100644
--- a/compiler/coco/core/src/IR/ObjectManager.cpp
+++ b/compiler/coco/core/src/IR/ObjectManager.cpp
@@ -19,11 +19,10 @@
 #include "coco/IR/FeatureObject.h"
 #include "coco/IR/KernelObject.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace coco
 {
diff --git a/compiler/coco/core/src/IR/OpManager.cpp b/compiler/coco/core/src/IR/OpManager.cpp
index c87b704fe..911f999c7 100644
--- a/compiler/coco/core/src/IR/OpManager.cpp
+++ b/compiler/coco/core/src/IR/OpManager.cpp
@@ -16,13 +16,12 @@
 
 #include "coco/IR/OpManager.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 #include <queue>
 #include <set>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace coco
 {
diff --git a/compiler/coco/core/src/IR/Ops.test.cpp b/compiler/coco/core/src/IR/Ops.test.cpp
index ae979b2bf..cfbd3ca70 100644
--- a/compiler/coco/core/src/IR/Ops.test.cpp
+++ b/compiler/coco/core/src/IR/Ops.test.cpp
@@ -21,11 +21,9 @@
 #include <vector>
 #include <memory>
 
-#include <stdex/Memory.h>
-
 #include <gtest/gtest.h>
 
-using stdex::make_unique;
+using std::make_unique;
 
 /**
  * Section: Add Op
diff --git a/compiler/coco/core/src/IR/OutputManager.cpp b/compiler/coco/core/src/IR/OutputManager.cpp
index 86b9580ac..5dd51c378 100644
--- a/compiler/coco/core/src/IR/OutputManager.cpp
+++ b/compiler/coco/core/src/IR/OutputManager.cpp
@@ -16,14 +16,14 @@
 
 #include "coco/IR/OutputManager.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace coco
 {
 
 Output *OutputManager::create(const nncc::core::ADT::tensor::Shape &shape)
 {
-  auto output = stdex::make_unique<Output>(shape);
+  auto output = std::make_unique<Output>(shape);
   modulize(output.get());
   return take(std::move(output));
 }
diff --git a/compiler/coco/core/src/IR/Part.test.cpp b/compiler/coco/core/src/IR/Part.test.cpp
index 87e0e1516..4348d4db2 100644
--- a/compiler/coco/core/src/IR/Part.test.cpp
+++ b/compiler/coco/core/src/IR/Part.test.cpp
@@ -17,11 +17,11 @@
 #include "coco/IR/Part.h"
 #include "coco/IR/Op.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
diff --git a/compiler/coco/core/src/IR/Use.test.cpp b/compiler/coco/core/src/IR/Use.test.cpp
index 3191e9852..b7026385f 100644
--- a/compiler/coco/core/src/IR/Use.test.cpp
+++ b/compiler/coco/core/src/IR/Use.test.cpp
@@ -21,11 +21,11 @@
 
 #include "Consumer.mock.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
diff --git a/compiler/coco/generic/CMakeLists.txt b/compiler/coco/generic/CMakeLists.txt
index 02fbf67f5..c65c84c06 100644
--- a/compiler/coco/generic/CMakeLists.txt
+++ b/compiler/coco/generic/CMakeLists.txt
@@ -5,7 +5,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 add_library(coco_generic SHARED ${SOURCES})
 target_include_directories(coco_generic PUBLIC include)
 target_link_libraries(coco_generic PUBLIC coco_core)
-target_link_libraries(coco_generic PRIVATE stdex)
 target_link_libraries(coco_generic PRIVATE nncc_common)
 
 if(NOT ENABLE_TEST)
@@ -17,6 +16,3 @@ nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(coco_generic_test ${TESTS})
 target_link_libraries(coco_generic_test coco_generic)
-# stdex is a PRIVATE dependency of coco_generic, and thus is not linked to coco_generic_test
-# even though coco_generic_test is linked to coco_generic
-target_link_libraries(coco_generic_test stdex)
diff --git a/compiler/coco/generic/src/IR/Data.cpp b/compiler/coco/generic/src/IR/Data.cpp
index b71947253..361dcc243 100644
--- a/compiler/coco/generic/src/IR/Data.cpp
+++ b/compiler/coco/generic/src/IR/Data.cpp
@@ -19,13 +19,12 @@
 #include <nncc/core/ADT/kernel/NCHWLayout.h>
 #include <nncc/core/ADT/kernel/Overlay.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <map>
 
 using namespace nncc::core::ADT;
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
@@ -71,7 +70,7 @@ public:
 private:
   std::map<const coco::Bag *, std::unique_ptr<std::vector<uint8_t>>> _data;
 };
-}
+} // namespace
 
 namespace
 {
@@ -210,8 +209,7 @@ std::unique_ptr<Data> Data::create(void)
   data->_blob = std::move(blob);
   data->_fp32 = std::move(fp32);
 
-  // GCC 4.9 tries to copy data (while GCC 6.X doesn't)
-  return std::move(data);
+  return data;
 }
 
 } // namespace coco
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt
index ec9e3cf85..2b032034a 100644
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -1,60 +1,79 @@
 #[[ Generate common python virtual enviornment ]]
-find_package(PythonInterp 3 QUIET)
-find_package(PythonLibs 3 QUIET)
+# NOTE find_package try to use at least python3.8 as follows depending on platform version
+#   Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+#   Ubuntu20.04; default python3.8
+#   Ubuntu22.04; default python3.10
+#   refer https://github.com/Samsung/ONE/issues/9962
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
 
 if(NOT ${PYTHONINTERP_FOUND})
   message(STATUS "Build common-artifacts: FALSE (Python3 is missing)")
   return()
 endif()
 
-if(${PYTHON_VERSION_MINOR} LESS 3)
-  message(STATUS "Build common-artifacts: FALSE (You need to install Python version higher than 3.3)")
+if(${PYTHON_VERSION_MINOR} LESS 8)
+  message(STATUS "Build common-artifacts: FALSE (You need to install Python version higher than 3.8)")
   return()
 endif()
 
-# Create python virtual environment with tensorflow 1.13.2
-set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2")
-
-# Create python virtual environment with tensorflow 2.3.0
-set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0")
+# Create python virtual environment with tensorflow 2.12.1
+set(VIRTUALENV_OVERLAY_TF_2_12_1 "${NNCC_OVERLAY_DIR}/venv_2_12_1")
 
 add_custom_command(
-  OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2}
-  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_1_13_2}
-)
-
-add_custom_command(
-  OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0}
-  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0}
+  OUTPUT ${VIRTUALENV_OVERLAY_TF_2_12_1}
+  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_12_1}
 )
 
 # Create requirements.txt and install required pip packages
 set(REQUIREMENTS_FILE "requirements.txt")
-set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
-set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_12_1 "${VIRTUALENV_OVERLAY_TF_2_12_1}/${REQUIREMENTS_FILE}")
 
-# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
-# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
-add_custom_command(
-  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
-  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
-  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
-)
+set(PYTHON_OVERLAY python3)
+if(PYTHON_EXECUTABLE MATCHES python3.8)
+  set(PYTHON_OVERLAY python3.8)
+endif()
 
-add_custom_command(
-  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
-)
+# NOTE when using behind proxy with self signed certificate, need to set '--trusted-host' options
+set(PIP_OPTION_TRUSTED_HOST )
+if(DEFINED ENV{ONE_PIP_OPTION_TRUST_HOST})
+  set(PIP_OPTION_TRUSTED_HOST --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --trusted-host pypi.org)
+endif()
+
+if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "aarch64")
+  # NOTE `tensorflow-cpu` package is not available for aarch64, so we use `tensorflow` package.
+  add_custom_command(
+    OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==2.12.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==23.5.26" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "protobuf==4.23.3" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "pydot==1.4.2" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${VIRTUALENV_OVERLAY_TF_2_12_1}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+            ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools
+    COMMAND ${VIRTUALENV_OVERLAY_TF_2_12_1}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+            ${PIP_OPTION_TRUSTED_HOST} install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1} --upgrade
+    DEPENDS ${VIRTUALENV_OVERLAY_TF_2_12_1}
+  )
+else(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "aarch64")
+  add_custom_command(
+    OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.12.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==23.5.26" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "protobuf==4.23.3" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "pydot==1.4.2" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
+    COMMAND ${VIRTUALENV_OVERLAY_TF_2_12_1}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+            ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools
+    COMMAND ${VIRTUALENV_OVERLAY_TF_2_12_1}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+            ${PIP_OPTION_TRUSTED_HOST} install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1} --upgrade
+    DEPENDS ${VIRTUALENV_OVERLAY_TF_2_12_1}
+  )
+endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "aarch64")
 
 add_custom_target(common_artifacts_python_deps ALL
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} ${VIRTUALENV_OVERLAY_TF_2_3_0} ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_12_1}
+          ${REQUIREMENTS_OVERLAY_PATH_TF_2_12_1}
 )
 
 #[[ Generate common resources ]]
@@ -75,7 +94,6 @@ target_link_libraries(testDataGenerator PRIVATE arser)
 target_link_libraries(testDataGenerator PRIVATE foder)
 target_link_libraries(testDataGenerator PRIVATE luci_import)
 target_link_libraries(testDataGenerator PRIVATE luci_interpreter)
-target_link_libraries(testDataGenerator PRIVATE mio_circle)
 target_link_libraries(testDataGenerator PRIVATE safemain)
 
 unset(TEST_DEPS)
@@ -87,8 +105,9 @@ set(TFLITE_RECIPE_REPO "${TensorFlowLiteRecipes_DIR}")
 set(CIRCLE_RECIPE_REPO "${CircleRecipes_DIR}")
 set(TEST_RECIPE_FILENAME "test.recipe")
 set(TEST_RULE_FILENAME "test.rule")
+set(TEST_QCONFIG_FILENAME "test.qconf.json")
 
-set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh")
+set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py")
 # Get test case list
 unset(RECIPES)
 file(GLOB TFLITE_SUBDIR RELATIVE ${TFLITE_RECIPE_REPO} ${TFLITE_RECIPE_REPO}/*)
@@ -118,12 +137,20 @@ endmacro()
 
 include("exclude.lst")
 
+# TODO revise using variadic arguments
+macro(tcgenerate_option NAME OPTION ARG1 ARG2 ARG3)
+  set(TCGEN_OPT_${NAME} ${OPTION} ${ARG1} ${ARG2} ${ARG3})
+endmacro()
+
+include("options.lst")
+
 foreach(RECIPE IN ITEMS ${RECIPES})
   unset(OPT_FORMAT)
   unset(MODEL_FORMAT)
 
   set(RECIPE_FILE "${RECIPE}.recipe")
   set(RULE_FILE "${RECIPE}.rule")
+  set(QCONFIG_FILE "${RECIPE}.qconf.json")
   set(TFLITE_RECIPE_SOURCE_PATH "${TFLITE_RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}")
   set(CIRCLE_RECIPE_SOURCE_PATH "${CIRCLE_RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}")
 
@@ -152,8 +179,20 @@ foreach(RECIPE IN ITEMS ${RECIPES})
     set(RULE_SOURCE_PATH ${CIRCLE_RULE_SOURCE_PATH})
   endif()
 
+  set(TFLITE_QCONFIG_SOURCE_PATH "${TFLITE_RECIPE_REPO}/${RECIPE}/${TEST_QCONFIG_FILENAME}")
+  set(CIRCLE_QCONFIG_SOURCE_PATH "${CIRCLE_RECIPE_REPO}/${RECIPE}/${TEST_QCONFIG_FILENAME}")
+
+  unset(QCONFIG_SOURCE_PATH)
+  if(EXISTS "${TFLITE_QCONFIG_SOURCE_PATH}")
+    set(QCONFIG_SOURCE_PATH ${TFLITE_QCONFIG_SOURCE_PATH})
+  endif()
+  if(EXISTS "${CIRCLE_QCONFIG_SOURCE_PATH}")
+    set(QCONFIG_SOURCE_PATH ${CIRCLE_QCONFIG_SOURCE_PATH})
+  endif()
+
   set(RECIPE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}")
   set(RULE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RULE_FILE}")
+  set(QCONFIG_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${QCONFIG_FILE}")
 
   set(TFLITE_FILE "${RECIPE}.tflite")
   set(TFLITE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TFLITE_FILE}")
@@ -171,13 +210,23 @@ foreach(RECIPE IN ITEMS ${RECIPES})
   if(DEFINED RULE_SOURCE_PATH)
     # Copy .rule
     add_custom_command(OUTPUT ${RULE_BINARY_PATH}
-    COMMAND ${CMAKE_COMMAND} -E copy "${RULE_SOURCE_PATH}" "${RULE_BINARY_PATH}"
-    DEPENDS ${RULE_SOURCE_PATH}
-    COMMENT "Generate ${RULE_FILE}"
+      COMMAND ${CMAKE_COMMAND} -E copy "${RULE_SOURCE_PATH}" "${RULE_BINARY_PATH}"
+      DEPENDS ${RULE_SOURCE_PATH}
+      COMMENT "Generate ${RULE_FILE}"
     )
     list(APPEND TEST_DEPS ${RULE_BINARY_PATH})
   endif()
 
+  if(DEFINED QCONFIG_SOURCE_PATH)
+    # Copy .qconf.json
+    add_custom_command(OUTPUT ${QCONFIG_BINARY_PATH}
+      COMMAND ${CMAKE_COMMAND} -E copy "${QCONFIG_SOURCE_PATH}" "${QCONFIG_BINARY_PATH}"
+      DEPENDS ${QCONFIG_SOURCE_PATH}
+      COMMENT "Generate ${QCONFIG_FILE}"
+    )
+    list(APPEND TEST_DEPS ${QCONFIG_BINARY_PATH})
+  endif()
+
   if(${MODEL_FORMAT} STREQUAL "tflite")
     # Generate .tflite
     add_custom_command(OUTPUT ${TFLITE_OUTPUT_PATH}
@@ -188,21 +237,21 @@ foreach(RECIPE IN ITEMS ${RECIPES})
     list(APPEND TEST_DEPS ${TFLITE_OUTPUT_PATH})
 
     if(NOT DEFINED NO_CIRCLIZE_${RECIPE})
-    # Generate .circle
-    add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
-      COMMAND $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH} ${CIRCLE_OUTPUT_PATH}
-      DEPENDS $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH}
-      COMMENT "Generate ${CIRCLE_FILE}"
-    )
-    set(MODEL_FORMAT "circle")
-    list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
+      # Generate .circle
+      add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
+        COMMAND $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH} ${CIRCLE_OUTPUT_PATH}
+        DEPENDS $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH}
+        COMMENT "Generate ${CIRCLE_FILE}"
+      )
+      set(MODEL_FORMAT "circle")
+      list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
     endif()
   else()
     # Generate .circle
     add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
-    COMMAND $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH} ${CIRCLE_OUTPUT_PATH}
-    DEPENDS $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH}
-    COMMENT "Generate ${CIRCLE_FILE}"
+      COMMAND $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH} ${CIRCLE_OUTPUT_PATH}
+      DEPENDS $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH}
+      COMMENT "Generate ${CIRCLE_FILE}"
     )
     list(APPEND TEST_DEPS ${CIRCLE_OUTPUT_PATH})
   endif()
@@ -213,7 +262,13 @@ foreach(RECIPE IN ITEMS ${RECIPES})
   if(NOT DEFINED NO_OPTIMIZE_${RECIPE})
     # Generate optimized .circle
     add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH}
-      COMMAND $<TARGET_FILE:circle2circle> --all ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
+      # NOTE --resolve_customop_add is just to added for old -O1, no particular meaning
+      #      --fold_dequantize is added to fold Tensor(FLOAT16) + DEQUANTIZE (Net_Dequantize_Add)
+      #      model. FLOAT16 in general is NOT supported but only Tensor(FLOAT16) + DEQUANTIZE
+      #      sequence accepted as folded to Tensor(FLOAT32).
+      # TODO revise giving options from the list file
+      COMMAND $<TARGET_FILE:circle2circle> --resolve_customop_add --fold_dequantize --fold_densify
+              ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
       DEPENDS $<TARGET_FILE:circle2circle>  ${CIRCLE_OUTPUT_PATH}
       COMMENT "Generate ${OPT_CIRCLE_FILE}"
     )
@@ -224,54 +279,52 @@ foreach(RECIPE IN ITEMS ${RECIPES})
   set(MODEL_FILE "${RECIPE}${OPT_FORMAT}.${MODEL_FORMAT}")
   set(MODEL_PATH "${CMAKE_CURRENT_BINARY_DIR}/${MODEL_FILE}")
   set(NNPKG_FILE "${RECIPE}${OPT_FORMAT}")
-  set(NNPKG_PATH "${CMAKE_CURRENT_BINARY_DIR}/${NNPKG_FILE}")
+  set(NNPKG_DIR "${CMAKE_CURRENT_BINARY_DIR}/${NNPKG_FILE}")
+  set(NNPKG_MODEL "${NNPKG_DIR}/${MODEL_FILE}")
+
+  # Generate nnpackage directory
+  add_custom_command(OUTPUT ${NNPKG_DIR}
+      COMMAND ${CMAKE_COMMAND} -E make_directory ${NNPKG_DIR}
+      DEPENDS ${MODEL_PATH}
+      COMMENT "Generate ${RECIPE} nnpackage directory"
+    )
+  list(APPEND TEST_DEPS ${NNPKG_DIR})
 
-  add_custom_command(OUTPUT ${NNPKG_PATH}
-    COMMAND ${MODEL2NNPKG} ${MODEL_PATH}
+  add_custom_command(OUTPUT ${NNPKG_MODEL}
+    COMMAND ${PYTHON_EXECUTABLE} ${MODEL2NNPKG} -m ${MODEL_PATH}
     DEPENDS ${MODEL2NNPKG} ${MODEL_PATH}
     COMMENT "Generate ${RECIPE} nnpackage"
   )
-  list(APPEND TEST_DEPS ${NNPKG_PATH})
-
-  set(INPUT_HDF5_FILE "${RECIPE}${OPT_FORMAT}.input.h5")
-  set(INPUT_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_HDF5_FILE}")
-
-  set(EXPECTED_HDF5_FILE "${RECIPE}${OPT_FORMAT}.expected.h5")
-  set(EXPECTED_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${EXPECTED_HDF5_FILE}")
+  list(APPEND TEST_DEPS ${NNPKG_MODEL})
 
   if(NOT DEFINED NO_TCGEN_${RECIPE})
-    # Generate input.h5, expected.h5
-    add_custom_command(OUTPUT ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
-      COMMAND $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
-      DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
-      COMMENT "Generate ${INPUT_BIN_PATH} and ${EXPECTED_BIN_PATH}"
-    )
-
     # Generate test directory
-    set(TC_DIRECTORY "${NNPKG_PATH}/metadata/tc")
+    set(TC_DIRECTORY "${NNPKG_DIR}/metadata/tc")
     add_custom_command(OUTPUT ${TC_DIRECTORY}
       COMMAND ${CMAKE_COMMAND} -E make_directory ${TC_DIRECTORY}
-      DEPENDS ${NNPKG_PATH}
       COMMENT "Generate ${RECIPE} nnpackage test directory"
     )
+    list(APPEND TEST_DEPS ${TC_DIRECTORY})
 
-    # Move input hdf5 file to test directory
-    set(INPUT_NNPKG_PATH "${TC_DIRECTORY}/input.h5")
-    add_custom_command(OUTPUT ${INPUT_NNPKG_PATH}
-      COMMAND ${CMAKE_COMMAND} -E rename ${INPUT_BIN_PATH} ${INPUT_NNPKG_PATH}
-      DEPENDS ${INPUT_BIN_PATH} ${TC_DIRECTORY}
-      COMMENT "Move ${INPUT_HDF5_FILE} to nnpackage"
-    )
+    # set ADDITIONAL_OPTIONS as empty (one space before closing is intentional)
+    set(ADDITIONAL_OPTIONS )
+    if(DEFINED TCGEN_OPT_${RECIPE})
+      set(ADDITIONAL_OPTIONS ${ADDITIONAL_OPTIONS} ${TCGEN_OPT_${RECIPE}})
+    endif()
 
-    # Move expected hdf5 file to test directory
-    set(EXPECTED_NNPKG_PATH "${TC_DIRECTORY}/expected.h5")
-    add_custom_command(OUTPUT ${EXPECTED_NNPKG_PATH}
-      COMMAND ${CMAKE_COMMAND} -E rename ${EXPECTED_BIN_PATH} ${EXPECTED_NNPKG_PATH}
-      DEPENDS ${EXPECTED_BIN_PATH} ${TC_DIRECTORY}
-      COMMENT "Move ${EXPECTED_HDF5_FILE} to nnpackage"
+    # Generate input.h5, expected.h5
+    set(INPUT_HDF5_FILE "${TC_DIRECTORY}/input.h5")
+    set(EXPECTED_HDF5_FILE "${TC_DIRECTORY}/expected.h5")
+    add_custom_command(OUTPUT ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE}
+      COMMAND $<TARGET_FILE:testDataGenerator>
+              --input_data ${INPUT_HDF5_FILE}
+              --expected_data ${EXPECTED_HDF5_FILE}
+              ${ADDITIONAL_OPTIONS}
+              ${MODEL_FILE}
+      DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE} ${TC_DIRECTORY}
+      COMMENT "Generate input.h5 and expected.h5 in ${NNPKG_FILE}/metadata/tc"
     )
-    list(APPEND TEST_DEPS ${TC_DIRECTORY} ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
-                          ${INPUT_NNPKG_PATH} ${EXPECTED_NNPKG_PATH})
+    list(APPEND TEST_DEPS ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE})
   endif()
 endforeach()
 
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst
index 886f607cf..75055225b 100644
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -5,12 +5,6 @@
 
 #[[ optimize : Exclude from circle optimization(circle2circle) ]]
 ## TensorFlowLiteRecipes
-optimize(Unique_000)
-optimize(Unique_001)
-optimize(Unique_002)
-optimize(Unique_003)
-optimize(Unique_U8_000)
-optimize(Unique_U8_001)
 
 ## CircleRecipes
 
@@ -19,12 +13,9 @@ optimize(Unique_U8_001)
 tcgenerate(Abs_000)
 tcgenerate(AddN_000)
 tcgenerate(Add_001) # runtime doesn't support
-tcgenerate(Add_U8_000)
+tcgenerate(Add_STR_000) # STRING is not supported
+tcgenerate(Add_STR_001) # STRING is not supported
 tcgenerate(All_000)
-tcgenerate(ArgMax_U8_000) 
-tcgenerate(ArgMax_U8_001)
-tcgenerate(ArgMax_U8_002)
-tcgenerate(ArgMax_U8_003)
 tcgenerate(ArgMin_000)
 tcgenerate(ArgMin_001)
 tcgenerate(ArgMin_002)
@@ -33,67 +24,39 @@ tcgenerate(ArgMin_U8_000)
 tcgenerate(ArgMin_U8_001)
 tcgenerate(ArgMin_U8_002)
 tcgenerate(ArgMin_U8_003)
-tcgenerate(BatchMatMul_000)
 tcgenerate(BatchMatMulV2_000)
 tcgenerate(BatchMatMulV2_001)
 tcgenerate(BatchToSpaceND_000)
-tcgenerate(Cast_000)
-tcgenerate(Cast_001)
+tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator
 tcgenerate(Ceil_000)
-tcgenerate(Concatenation_U8_000)
 tcgenerate(Conv2D_003) # runtime doesn't support dilation
-tcgenerate(Conv2D_U8_000)
-tcgenerate(Conv2D_U8_001)
 tcgenerate(Cos_000)
-tcgenerate(DepthToSpace_000)
+tcgenerate(Densify_000) # luci-interpreter doesn't support
 tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation
 tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation
-tcgenerate(DepthwiseConv2D_U8_000)
 tcgenerate(DepthwiseConv2D_U8_001)  # luci-interpreter doesn't support channel-wise quantization yet
-tcgenerate(Div_000)
-tcgenerate(ELU_000)
-tcgenerate(Equal_000)
-tcgenerate(Exp_000)
-tcgenerate(ExpandDims_000)
-tcgenerate(ExpandDims_001)
-tcgenerate(ExpandDims_002)
-tcgenerate(ExpandDims_003)
+tcgenerate(ExpandDims_001) # luci-interpreter doesn't support undefined shape
+tcgenerate(ExpandDims_002) # luci-interpreter doesn't support undefined shape
+tcgenerate(FakeQuant_000) # runtime and luci-interpreter doesn't support yet
 tcgenerate(Fill_000)
 tcgenerate(Fill_001)
-tcgenerate(Floor_000)
-tcgenerate(FloorDiv_000)
-tcgenerate(FloorDiv_001)
 tcgenerate(FloorMod_000)
 tcgenerate(FloorMod_001)
-tcgenerate(FullyConnected_002)
 tcgenerate(FullyConnected_U8_000)
-tcgenerate(Gather_000)
 tcgenerate(GatherNd_000)
 tcgenerate(GatherNd_001)
-tcgenerate(Greater_000)
-tcgenerate(GreaterEqual_000)
-tcgenerate(If_000)
-tcgenerate(If_001)
-tcgenerate(L2Normalize_000) # runtime doesn't support
-tcgenerate(L2Pool2D_000) # runtime doesn't support
 tcgenerate(L2Pool2D_U8_000)
-tcgenerate(LeakyRelu_000) # runtime doesn't support
-tcgenerate(Less_000)
-tcgenerate(LessEqual_000)
-tcgenerate(LocalResponseNormalization_000) # runtime doesn't support
 tcgenerate(Log_000)
-tcgenerate(LogicalAnd_000)
-tcgenerate(LogicalNot_000)
-tcgenerate(LogicalOr_000)
-tcgenerate(LogSoftmax_000)
 tcgenerate(MatMul_000)
 tcgenerate(MatrixBandPart_000)
 tcgenerate(MatrixDiag_000)
 tcgenerate(MatrixSetDiag_000)
-tcgenerate(Maximum_000)
-tcgenerate(MaxPool2D_U8_000)
-tcgenerate(Mean_U8_000)
-tcgenerate(Minimum_000)
+tcgenerate(MaxPoolWithArgmax_000)
+tcgenerate(MaxPoolWithArgmax_001)
+tcgenerate(MaxPoolWithArgmax_002)
+tcgenerate(Mean_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(Mean_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(Mean_U8_dynamic_000) # TestDataGenerator does not support unknown dimension
 tcgenerate(NonMaxSuppressionV4_000)
 tcgenerate(NonMaxSuppressionV4_001)
 tcgenerate(NonMaxSuppressionV5_000)
@@ -101,45 +64,63 @@ tcgenerate(NonMaxSuppressionV5_001)
 tcgenerate(MirrorPad_000)
 tcgenerate(Mul_U8_000)
 tcgenerate(Neg_000)
+tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator
+tcgenerate(Net_Conv_FakeQuant_000) # luci-interpreter doesn't support FakeQuant yet
 tcgenerate(Net_Dangle_001)
-tcgenerate(Net_InstanceNorm_001)
-tcgenerate(Net_InstanceNorm_002)
+tcgenerate(Net_Densify_Add_000) # luci-interpreter doesn't support Densify yet
+tcgenerate(Net_Densify_Dequantize_Add_000) # luci-interpreter doesn't support Densify/Dequantize yet
+tcgenerate(Net_Gather_SparseToDense_AddV2_000) # luci-interpreter doesn't support custom operator
+tcgenerate(Net_Gelu_000) # luci-interpreter doesn't support custom operator
+tcgenerate(Net_Gelu_001) # luci-interpreter doesn't support custom operator
 tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
-tcgenerate(NotEqual_000)
 tcgenerate(OneHot_000)
 tcgenerate(OneHot_001)
 tcgenerate(OneHot_002)
 tcgenerate(OneHot_003)
 tcgenerate(Pack_000)
 tcgenerate(Pack_U8_000)
-tcgenerate(Pad_U8_000)
 tcgenerate(PadV2_000)
-tcgenerate(Pow_000)
-tcgenerate(PRelu_000)
+tcgenerate(Quant_Add_I8_000) # INT8 is not supported
+tcgenerate(Quant_AveragePool2D_I8_000) # INT8 is not supported
+tcgenerate(Quant_Conv_I8_000) # INT8 is not supported
+tcgenerate(Quant_DepthwiseConv2D_I8_000) # INT8 is not supported
+tcgenerate(Quant_MaxPool2D_I8_000) # INT8 is not supported
+tcgenerate(Quant_Mean_I8_000) # INT8 is not supported
+tcgenerate(Quant_Mul_I8_000) # INT8 is not supported
+tcgenerate(Quant_PRelu_I8_000) # INT8 is not supported
+tcgenerate(Quant_ReLU_I8_000) # INT8 is not supported
+tcgenerate(Quant_TransposeConv_I8_000) # INT8 is not supported
+tcgenerate(Quantize_000)  # runtime and luci-interpreter doesn't support Quantize op yet
 tcgenerate(Range_000)
 tcgenerate(Rank_000)
 tcgenerate(ReduceAny_000)
 tcgenerate(ReduceAny_001)
 tcgenerate(ReduceAny_002)
 tcgenerate(ReduceAny_003)
+tcgenerate(ReduceAny_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_002) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceAny_dynamic_003) # TestDataGenerator does not support unknown dimension
 tcgenerate(ReduceMax_000)
+tcgenerate(ReduceMax_dynamic_000) # TestDataGenerator does not support unknown dimension
 tcgenerate(ReduceMin_000)
+tcgenerate(ReduceMin_dynamic_000) # TestDataGenerator does not support unknown dimension
 tcgenerate(ReduceProd_000)
 tcgenerate(ReduceProd_001)
 tcgenerate(ReduceProd_002)
 tcgenerate(ReduceProd_003)
-tcgenerate(ReLU_000)
-tcgenerate(ReLU6_000)
+tcgenerate(ReduceProd_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_001) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_002) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReduceProd_dynamic_003) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReLU_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(ReLU6_dynamic_000) # TestDataGenerator does not support unknown dimension
 tcgenerate(ReLUN1To1_000)
+tcgenerate(ReLUN1To1_dynamic_000) # TestDataGenerator does not support unknown dimension
 tcgenerate(Reshape_003) # luci-interpreter doesn't support reshape without built-in option
-tcgenerate(Reshape_U8_000)
-tcgenerate(ResizeBilinear_000)
-tcgenerate(ResizeBilinear_U8_000) # luci-interpreter
-tcgenerate(ResizeNearestNeighbor_000)
 tcgenerate(ReverseSequence_000)
 tcgenerate(ReverseV2_000)
 tcgenerate(Round_000)
-tcgenerate(Rsqrt_000)
 tcgenerate(ScatterNd_000)
 tcgenerate(SegmentSum_000)
 tcgenerate(Select_000)
@@ -150,32 +131,23 @@ tcgenerate(SelectV2_001)
 tcgenerate(SelectV2_002)
 tcgenerate(Shape_000)
 tcgenerate(Sin_000)
-tcgenerate(Slice_000)
-tcgenerate(Softmax_U8_000)
+tcgenerate(Slice_001) # luci-interpreter doesn't support Slice with -1
 tcgenerate(SpaceToBatchND_000)
 tcgenerate(SpaceToBatchND_001)
 tcgenerate(SpaceToBatchND_002)
 tcgenerate(SpaceToBatchND_003)
-tcgenerate(SpaceToDepth_000)
 tcgenerate(SparseToDense_000)
 tcgenerate(SplitV_000)
-tcgenerate(Sqrt_000)
 tcgenerate(Square_000)
-tcgenerate(SquaredDifference_000)
-tcgenerate(Squeeze_000)
-tcgenerate(StridedSlice_000)
-tcgenerate(StridedSlice_001)
-tcgenerate(StridedSlice_002)
-tcgenerate(Sub_000)
-tcgenerate(Sub_001)
-tcgenerate(Sub_U8_000)
 tcgenerate(Sum_000)
 tcgenerate(Sum_001)
-tcgenerate(Tanh_000)
+tcgenerate(Sum_dynamic_000) # TestDataGenerator does not support unknown dimension
+tcgenerate(Sum_dynamic_001) # TestDataGenerator does not support unknown dimension
 tcgenerate(Tile_000)
 tcgenerate(Tile_U8_000)
 tcgenerate(TopKV2_000)
 tcgenerate(TopKV2_001)
+tcgenerate(UnidirectionalSequenceLSTM_000) # This mode is just for Op creation, cannot run
 tcgenerate(Unique_000)
 tcgenerate(Unique_001)
 tcgenerate(Unique_002)
@@ -184,17 +156,16 @@ tcgenerate(Unique_U8_000)
 tcgenerate(Unique_U8_001)
 tcgenerate(Where_000)
 tcgenerate(Where_001)
-tcgenerate(While_000)
-tcgenerate(While_001)
-tcgenerate(While_002)
-tcgenerate(While_003)
+tcgenerate(While_000) # Needs luci-interpreter int32_t support for ADD, EQUAL
+tcgenerate(While_001) # Needs luci-interpreter int32_t support for ADD, EQUAL
+tcgenerate(While_002) # Needs luci-interpreter int32_t support for ADD, EQUAL
+tcgenerate(While_003) # Needs luci-interpreter int32_t support for ADD, EQUAL, and dynamic shape for WHILE
 tcgenerate(YUV_TO_RGB_000)
-tcgenerate(YUV_TO_RGB_U8_000)
 tcgenerate(ZerosLike_000)
 
 ## CircleRecipes
 tcgenerate(BCQFullyConnected_000)
 tcgenerate(BCQFullyConnected_001)
 tcgenerate(BCQGather_000)
-tcgenerate(CircleBatchMatMul_000)
 tcgenerate(InstanceNorm_000)
+tcgenerate(InstanceNorm_001)
diff --git a/compiler/common-artifacts/options.lst b/compiler/common-artifacts/options.lst
new file mode 100644
index 000000000..5e0ff9da5
--- /dev/null
+++ b/compiler/common-artifacts/options.lst
@@ -0,0 +1,6 @@
+## Additional Options for test recipe
+
+#[[ tcgenerate_option : add additional option(s) for generation ]]
+
+# make valid 'indices' input value
+tcgenerate_option(Gather_001 --input_range indices 0 3)
diff --git a/compiler/common-artifacts/requires.cmake b/compiler/common-artifacts/requires.cmake
index d7bed21fe..cc07e17f6 100644
--- a/compiler/common-artifacts/requires.cmake
+++ b/compiler/common-artifacts/requires.cmake
@@ -4,6 +4,6 @@ require("circlechef")
 require("foder")
 require("luci")
 require("luci-interpreter")
-require("mio-circle")
 require("safemain")
 require("tflchef")
+require("tflite2circle")
diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp
index 7a07dd88e..7481050c5 100644
--- a/compiler/common-artifacts/src/TestDataGenerator.cpp
+++ b/compiler/common-artifacts/src/TestDataGenerator.cpp
@@ -18,7 +18,6 @@
 #include <foder/FileLoader.h>
 #include <luci/Importer.h>
 #include <luci_interpreter/Interpreter.h>
-#include <mio/circle/schema_generated.h>
 
 #include <H5Cpp.h>
 
@@ -27,6 +26,9 @@
 #include <memory>
 #include <random>
 #include <string>
+#include <vector>
+#include <cassert>
+#include <cstdlib>
 
 namespace
 {
@@ -34,7 +36,7 @@ namespace
 uint32_t element_num(std::vector<hsize_t> &vec)
 {
   return static_cast<uint32_t>(
-      std::accumulate(std::begin(vec), std::end(vec), 1, std::multiplies<uint32_t>()));
+    std::accumulate(std::begin(vec), std::end(vec), 1, std::multiplies<uint32_t>()));
 }
 
 H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype)
@@ -43,18 +45,22 @@ H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype)
   {
     case loco::DataType::U8:
       return H5::PredType::NATIVE_UINT8;
+    case loco::DataType::S16:
+      return H5::PredType::NATIVE_INT16;
     case loco::DataType::S32:
       return H5::PredType::NATIVE_INT32;
     case loco::DataType::S64:
       return H5::PredType::NATIVE_INT64;
     case loco::DataType::FLOAT32:
       return H5::PredType::NATIVE_FLOAT;
+    case loco::DataType::BOOL:
+      return H5::PredType::NATIVE_HBOOL;
     default:
       throw std::runtime_error("NYI data type.");
   }
 }
 
-template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, uint32_t size)
+template <typename T> void generate_random_data(std::mt19937 &gen, void *data, uint32_t size)
 {
   std::normal_distribution<float> distrib(0, 2); // mean(0), stddev(2)
   for (uint32_t i = 0; i < size; i++)
@@ -63,6 +69,29 @@ template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, ui
   }
 }
 
+template <> void generate_random_data<bool>(std::mt19937 &gen, void *data, uint32_t size)
+{
+  std::normal_distribution<float> distrib(0, 2); // mean(0), stddev(2)
+  for (uint32_t i = 0; i < size; i++)
+  {
+    static_cast<bool *>(data)[i] = distrib(gen) >= 0 ? true : false;
+  }
+}
+
+template <typename T>
+void generate_random_range(void *data, uint32_t size, int32_t range_min, int32_t range_max)
+{
+  assert(range_min <= range_max);
+
+  for (uint32_t i = 0; i < size; i++)
+  {
+    // +1 will make value of [range_min, range_max]
+    int32_t range = range_max - range_min + 1;
+    int32_t value = (rand() % range) + range_min;
+    static_cast<T *>(data)[i] = static_cast<T>(value);
+  }
+}
+
 void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t seed)
 {
   std::mt19937 gen(seed); // standard mersenne_twister_engine seeded with rd()
@@ -70,19 +99,41 @@ void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t
   switch (dtype)
   {
     case loco::DataType::U8:
-      geneate_random_data<uint8_t>(gen, data, size);
+      generate_random_data<uint8_t>(gen, data, size);
+      break;
+    case loco::DataType::S16:
+      generate_random_data<int16_t>(gen, data, size);
       break;
     case loco::DataType::S32:
-      geneate_random_data<int32_t>(gen, data, size);
+      generate_random_data<int32_t>(gen, data, size);
       break;
     case loco::DataType::S64:
-      geneate_random_data<int64_t>(gen, data, size);
+      generate_random_data<int64_t>(gen, data, size);
       break;
     case loco::DataType::FLOAT32:
-      geneate_random_data<float>(gen, data, size);
+      generate_random_data<float>(gen, data, size);
+      break;
+    case loco::DataType::BOOL:
+      generate_random_data<bool>(gen, data, size);
       break;
     default:
+      throw std::runtime_error("NYI data type.");
+  }
+}
+
+void fill_random_range(void *data, uint32_t size, loco::DataType dtype, int32_t range_min,
+                       int32_t range_max)
+{
+  switch (dtype)
+  {
+    case loco::DataType::S32:
+      generate_random_range<int32_t>(data, size, range_min, range_max);
+      break;
+    case loco::DataType::S64:
+      generate_random_range<int64_t>(data, size, range_min, range_max);
       break;
+    default:
+      throw std::runtime_error("NYI data type.");
   }
 }
 
@@ -91,11 +142,18 @@ void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test");
+  arser.add_argument("circle").help("Circle file you want to test");
+  arser.add_argument("--input_data").required(true).help("Path to generate input data h5 file");
+  arser.add_argument("--expected_data")
+    .required(true)
+    .help("Path to generate expected data h5 file");
   arser.add_argument("--fixed_seed")
-      .required(false)
-      .nargs(0)
-      .help("Put a fixed seed into the random number generator");
+    .nargs(0)
+    .help("Put a fixed seed into the random number generator");
+  arser.add_argument("--input_range")
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .help("Set random number range [min max] for the input as 'name min max'");
 
   try
   {
@@ -109,8 +167,6 @@ int entry(int argc, char **argv)
   }
 
   std::string circle_file = arser.get<std::string>("circle");
-  size_t last_dot_index = circle_file.find_last_of(".");
-  std::string prefix = circle_file.substr(0, last_dot_index);
 
   // load circle file
   foder::FileLoader file_loader{circle_file};
@@ -142,27 +198,48 @@ int entry(int argc, char **argv)
    *       ㄴDATA ...
    */
   // create random data and dump into hdf5 file
-  H5::H5File input_file{prefix + ".input.h5", H5F_ACC_TRUNC};
+  H5::H5File input_file{arser.get<std::string>("--input_data"), H5F_ACC_TRUNC};
   std::unique_ptr<H5::Group> input_name_group =
-      std::make_unique<H5::Group>(input_file.createGroup("name"));
+    std::make_unique<H5::Group>(input_file.createGroup("name"));
   std::unique_ptr<H5::Group> input_value_group =
-      std::make_unique<H5::Group>(input_file.createGroup("value"));
+    std::make_unique<H5::Group>(input_file.createGroup("value"));
 
-  H5::H5File output_file{prefix + ".expected.h5", H5F_ACC_TRUNC};
+  H5::H5File output_file{arser.get<std::string>("--expected_data"), H5F_ACC_TRUNC};
   std::unique_ptr<H5::Group> output_name_group =
-      std::make_unique<H5::Group>(output_file.createGroup("name"));
+    std::make_unique<H5::Group>(output_file.createGroup("name"));
   std::unique_ptr<H5::Group> output_value_group =
-      std::make_unique<H5::Group>(output_file.createGroup("value"));
+    std::make_unique<H5::Group>(output_file.createGroup("value"));
+
+  std::string range_name;
+  int32_t range_min = 0;
+  int32_t range_max = 0;
+  bool range_check = false;
+  bool range_input_found = false;
+  if (arser["--input_range"])
+  {
+    // NOTE limitation: we can only set one input range
+    // TODO expand this for multiple inputs
+    std::vector<std::string> values = arser.get<std::vector<std::string>>("--input_range");
+    assert(values.size() == 3);
+    range_name = values.at(0);
+    // TODO add check for valid numbers
+    range_min = std::atoi(values.at(1).c_str());
+    range_max = std::atoi(values.at(2).c_str());
+    range_check = true;
+  }
 
   std::random_device rd; // used to obtain a seed for the random number engine
   uint32_t input_index = 0;
-  for (uint32_t g = 0; g < circle_model->subgraphs()->size(); g++)
+  // TODO remove indentation
   {
-    const auto input_nodes = loco::input_nodes(module->graph(g));
+    // NOTE we only need to prepare data for main graph (subgraph 0) as
+    // other subgraphs are invoked by the main graph
+    const auto input_nodes = loco::input_nodes(module->graph(0));
     for (const auto &node : input_nodes)
     {
       const auto *input_node = dynamic_cast<const luci::CircleInput *>(node);
       std::string name = input_node->name();
+      assert(not name.empty());
       if (name.find(":") == std::string::npos)
         name += ":0";
 
@@ -185,7 +262,7 @@ int entry(int argc, char **argv)
       auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
       auto dtype = hdf5_dtype_cast(input_node->dtype());
       auto dataset = std::make_unique<H5::DataSet>(
-          input_file.createDataSet("value/" + std::to_string(input_index), dtype, *dataspace));
+        input_file.createDataSet("value/" + std::to_string(input_index), dtype, *dataspace));
 
       auto data_size = ::element_num(dims);
       auto dtype_size = loco::size(input_node->dtype());
@@ -193,7 +270,12 @@ int entry(int argc, char **argv)
       std::vector<int8_t> data(byte_size);
 
       // generate random data
-      if (arser["--fixed_seed"])
+      if (range_name == input_node->name())
+      {
+        fill_random_range(data.data(), data_size, input_node->dtype(), range_min, range_max);
+        range_input_found = true;
+      }
+      else if (arser["--fixed_seed"])
         fill_random_data(data.data(), data_size, input_node->dtype(), 0);
       else
         fill_random_data(data.data(), data_size, input_node->dtype(), rd());
@@ -206,13 +288,19 @@ int entry(int argc, char **argv)
     }
   }
 
+  if (range_check && not range_input_found)
+  {
+    std::cerr << "ERROR: input_range for input [" << range_name << "] not found." << std::endl;
+    return EXIT_FAILURE;
+  }
+
   interpreter.interpret();
 
   // dump output data into hdf5 file
   uint32_t output_index = 0;
-  for (uint32_t g = 0; g < circle_model->subgraphs()->size(); g++)
+  // TODO remove indentation
   {
-    const auto output_nodes = loco::output_nodes(module->graph(g));
+    const auto output_nodes = loco::output_nodes(module->graph(0));
     for (const auto &node : output_nodes)
     {
       const auto *output_node = dynamic_cast<const luci::CircleOutput *>(node);
@@ -239,7 +327,7 @@ int entry(int argc, char **argv)
       auto dataspace = std::make_unique<H5::DataSpace>(dims.size(), dims.data());
       auto dtype = hdf5_dtype_cast(output_node->dtype());
       auto dataset = std::make_unique<H5::DataSet>(
-          output_file.createDataSet("value/" + std::to_string(output_index), dtype, *dataspace));
+        output_file.createDataSet("value/" + std::to_string(output_index), dtype, *dataspace));
 
       uint32_t tensor_bytesize = loco::size(output_node->dtype());
       tensor_bytesize *= ::element_num(dims);
diff --git a/compiler/crew/CMakeLists.txt b/compiler/crew/CMakeLists.txt
new file mode 100644
index 000000000..45cda7562
--- /dev/null
+++ b/compiler/crew/CMakeLists.txt
@@ -0,0 +1,23 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(crew STATIC ${SOURCES})
+target_include_directories(crew PRIVATE src)
+target_include_directories(crew PUBLIC include)
+target_link_libraries(crew PRIVATE foder)
+target_link_libraries(crew PRIVATE nncc_common)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+configure_file("src/test_read_semicolon.ini" "test_read_semicolon.ini" COPYONLY)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(crew_test ${TESTS})
+target_include_directories(crew_test PRIVATE src)
+target_link_libraries(crew_test nncc_common)
+target_link_libraries(crew_test crew)
+target_link_libraries(crew_test foder)
diff --git a/compiler/crew/README.md b/compiler/crew/README.md
new file mode 100644
index 000000000..29691929d
--- /dev/null
+++ b/compiler/crew/README.md
@@ -0,0 +1,13 @@
+# crew
+
+_crew_ is circle partitioning Configuration REader and Writer library.
+
+### Support formats
+
+Current _crew_ supports below format and functionalities.
+- INI read
+- INI write
+- JSON write
+
+_crew_ supports limited portion of JSON and INI formats just enough to access
+circle partition configuration files.
diff --git a/compiler/crew/include/crew/PConfig.h b/compiler/crew/include/crew/PConfig.h
new file mode 100644
index 000000000..9ff875574
--- /dev/null
+++ b/compiler/crew/include/crew/PConfig.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_H__
+#define __CREW_PCONFIG_H__
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace crew
+{
+
+struct Part
+{
+  std::string model_file;
+  std::vector<std::string> inputs;
+  std::vector<std::string> outputs;
+};
+
+using Parts = std::vector<Part>;
+using Source = Part;
+
+struct PConfig
+{
+  Source source;
+  Parts parts;
+};
+
+/**
+ * @brief Read config as ini file, return false if failed
+ */
+bool read_ini(const std::string &path, PConfig &config);
+
+/**
+ * @brief Write config as ini file, return false if failed
+ */
+bool write_ini(std::ostream &os, const PConfig &config);
+
+/**
+ * @brief Write config as json file, return false if failed
+ */
+bool write_json(std::ostream &os, const PConfig &config);
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_H__
diff --git a/compiler/crew/include/crew/PConfigIni.h b/compiler/crew/include/crew/PConfigIni.h
new file mode 100644
index 000000000..45a54e115
--- /dev/null
+++ b/compiler/crew/include/crew/PConfigIni.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_INI_H__
+#define __CREW_PCONFIG_INI_H__
+
+#include <iostream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace crew
+{
+
+using KeyValues = std::unordered_map<std::string, std::string>;
+
+struct Section
+{
+  std::string name;
+  KeyValues items;
+};
+
+using Sections = std::vector<Section>;
+
+/**
+ * @brief Reads Config INI from null terminated string and return Sections
+ */
+Sections read_ini(const char *data, size_t length);
+/**
+ * @brief Reads Config INI from file and return Sections
+ */
+Sections read_ini(const std::string &path);
+
+/**
+ * @brief Write Config INI with Sections to ostream
+ */
+void write_ini(std::ostream &os, const Sections &sections);
+/**
+ * @brief Write Config INI with Sections to file, throw if failed
+ */
+void write_ini(const std::string &path, const Sections &sections);
+
+/**
+ * @brief Find a section with name, empty section if not found
+ */
+Section find(const Sections &sections, const std::string &name);
+
+/**
+ * @brief Find a key-value pair from key and return value, empty string if not found
+ */
+std::string find(const Section &section, const std::string &key);
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_INI_H__
diff --git a/compiler/crew/include/crew/PConfigIniDump.h b/compiler/crew/include/crew/PConfigIniDump.h
new file mode 100644
index 000000000..0755c6b20
--- /dev/null
+++ b/compiler/crew/include/crew/PConfigIniDump.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_INI_DUMP_H__
+#define __CREW_PCONFIG_INI_DUMP_H__
+
+#include "PConfigIni.h"
+
+#include <iostream>
+
+namespace crew
+{
+
+void dump(std::ostream &os, const Sections &sections);
+
+} // namespace crew
+
+std::ostream &operator<<(std::ostream &os, const crew::Sections &sections);
+
+#endif // __CREW_PCONFIG_INI_DUMP_H__
diff --git a/compiler/crew/requires.cmake b/compiler/crew/requires.cmake
new file mode 100644
index 000000000..27406d465
--- /dev/null
+++ b/compiler/crew/requires.cmake
@@ -0,0 +1 @@
+require("foder")
diff --git a/compiler/crew/src/PConfig.cpp b/compiler/crew/src/PConfig.cpp
new file mode 100644
index 000000000..b8e7c3e44
--- /dev/null
+++ b/compiler/crew/src/PConfig.cpp
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfig.h"
+#include "crew/PConfigIni.h"
+
+#include "PConfigJson.h"
+
+#include <utility>
+
+namespace
+{
+
+bool read_part(const crew::Section &section, crew::Part &part)
+{
+  // construct Source from section_source
+  part.model_file = crew::find(section, "file");
+  if (part.model_file.empty())
+    return false;
+
+  // read inputs for Source
+  for (int32_t i = 1;; ++i)
+  {
+    std::string item = "i" + std::to_string(i);
+    std::string input = crew::find(section, item);
+    if (input.empty())
+      break;
+
+    part.inputs.push_back(input);
+  }
+  // read outputs for Source
+  for (int32_t i = 1;; ++i)
+  {
+    std::string item = "o" + std::to_string(i);
+    std::string output = crew::find(section, item);
+    if (output.empty())
+      break;
+
+    part.outputs.push_back(output);
+  }
+  return true;
+}
+
+} // namespace
+
+namespace
+{
+
+void write_part(crew::JsonExport &je, const crew::Part &part)
+{
+  std::vector<std::string> graph_inputs;
+  std::vector<std::string> graph_outputs;
+
+  for (auto &input : part.inputs)
+  {
+    graph_inputs.push_back(input);
+  }
+  for (auto &output : part.outputs)
+  {
+    graph_outputs.push_back(output);
+  }
+
+  je.key_val("file", part.model_file.c_str(), true);
+  je.key_val("inputs", graph_inputs, true);
+  je.key_val("outputs", graph_outputs, false);
+}
+
+void write_parts(crew::JsonExport &je, const crew::Parts &parts)
+{
+  uint32_t idx = 1;
+  uint32_t size = parts.size();
+  for (auto &part : parts)
+  {
+    je.open_brace();
+    write_part(je, part);
+    je.close_brace(idx < size);
+    idx++;
+  }
+}
+
+} // namespace
+
+namespace
+{
+
+void part_to_section_io(const crew::Part &part, crew::Section &section)
+{
+  uint32_t idx = 1;
+  for (auto &input : part.inputs)
+  {
+    std::string key = "i" + std::to_string(idx);
+    section.items.emplace(key, input);
+    idx++;
+  }
+  idx = 1;
+  for (auto &output : part.outputs)
+  {
+    std::string key = "o" + std::to_string(idx);
+    section.items.emplace(key, output);
+    idx++;
+  }
+}
+
+} // namespace
+
+namespace crew
+{
+
+bool read_ini(const std::string &path, PConfig &pconfig)
+{
+  auto sections = crew::read_ini(path);
+
+  auto section_source = crew::find(sections, "source");
+  auto section_models = crew::find(sections, "models");
+  if (section_source.name != "source" || section_models.name != "models")
+  {
+    return false;
+  }
+
+  if (!read_part(section_source, pconfig.source))
+  {
+    return false;
+  }
+
+  // get models list
+  std::vector<std::string> models;
+  for (int32_t i = 1;; ++i)
+  {
+    std::string item = "m" + std::to_string(i);
+    std::string model = crew::find(section_models, item);
+    if (model.empty())
+      break;
+
+    models.push_back(model);
+  }
+
+  for (auto &model : models)
+  {
+    auto section_model = crew::find(sections, model);
+
+    Part part;
+    if (!read_part(section_model, part))
+    {
+      return false;
+    }
+    pconfig.parts.push_back(part);
+  }
+
+  return true;
+}
+
+bool write_ini(std::ostream &os, const PConfig &pconfig)
+{
+  crew::Sections sections;
+
+  // make [source]
+  crew::Section section_source;
+  section_source.name = "source";
+  section_source.items["file"] = pconfig.source.model_file;
+  part_to_section_io(pconfig.source, section_source);
+  sections.push_back(section_source);
+
+  // make [models]
+  crew::Section section_models;
+  section_models.name = "models";
+  uint32_t idx = 1;
+  for (auto &part : pconfig.parts)
+  {
+    std::string key = "m" + std::to_string(idx);
+    section_models.items[key] = part.model_file;
+    idx++;
+  }
+  sections.push_back(section_models);
+
+  for (auto &part : pconfig.parts)
+  {
+    // make circle model section
+    crew::Section section_model;
+    section_model.name = part.model_file;
+    section_model.items["file"] = part.model_file;
+    part_to_section_io(part, section_model);
+    sections.push_back(section_model);
+  }
+
+  write_ini(os, sections);
+
+  return true;
+}
+
+bool write_json(std::ostream &os, const PConfig &pconfig)
+{
+  crew::JsonExport je(os);
+
+  je.open_brace();
+  {
+    je.open_brace("source");
+    write_part(je, pconfig.source);
+    je.close_brace(true);
+  }
+  {
+    je.open_bracket("parts");
+    write_parts(je, pconfig.parts);
+    je.close_bracket(false);
+  }
+  je.close_brace(false);
+
+  return true;
+}
+
+} // namespace crew
diff --git a/compiler/crew/src/PConfigIni.cpp b/compiler/crew/src/PConfigIni.cpp
new file mode 100644
index 000000000..5177843bf
--- /dev/null
+++ b/compiler/crew/src/PConfigIni.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <foder/FileLoader.h>
+
+#include <cassert>
+#include <cstring>
+#include <fstream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace crew
+{
+
+namespace
+{
+
+std::string filter_escape(const std::string &source)
+{
+  std::string key = source;
+
+  // if key is surrounded with quotation
+  // TODO for quotation
+
+  // if key has '\\' + ';', remove '\\'
+  auto pos = key.find("\\;");
+  while (pos != std::string::npos)
+  {
+    auto k1 = key.substr(0, pos);
+    auto k2 = key.substr(pos + 1);
+    key = k1 + k2;
+    pos = key.find("\\;");
+  }
+
+  return key;
+}
+
+} // namespace
+
+Sections read_ini(const char *data, size_t length)
+{
+  assert(data != nullptr);
+  assert(length > 0);
+
+  auto buffer = std::vector<char>();
+  buffer.reserve(length + 1);
+  char *pbuffer = buffer.data();
+  memcpy(pbuffer, data, length);
+  // add null at end to be sure
+  *(pbuffer + length) = 0;
+
+  Sections sections;
+  Section section;
+
+  std::string string_line;
+
+  const char *delim = "\r\n";
+  const char *one_line = std::strtok(pbuffer, delim);
+  while (one_line != nullptr)
+  {
+    if (*one_line == '[')
+    {
+      if (!section.name.empty())
+      {
+        sections.push_back(section);
+      }
+      section.name.clear();
+      section.items.clear();
+
+      string_line = one_line + 1;
+      auto pos = string_line.find(']');
+      assert(pos != std::string::npos);
+      if (pos != std::string::npos)
+      {
+        section.name = string_line.substr(0, pos);
+      }
+    }
+    else if (*one_line == '#' || *one_line == ';')
+    {
+      // Comment line, do nothing
+    }
+    else if (*one_line) // string legnth is not 0
+    {
+      if (section.name.empty())
+        throw std::runtime_error("Invalid INI file");
+
+      string_line = one_line;
+      auto pos = string_line.find('=');
+      assert(pos != std::string::npos);
+      if (pos != std::string::npos)
+      {
+        auto key = string_line.substr(0, pos);
+        auto val = string_line.substr(pos + 1);
+        key = filter_escape(key);
+        section.items.emplace(key, val);
+      }
+    }
+
+    one_line = std::strtok(nullptr, delim);
+  }
+  if (!section.name.empty())
+  {
+    sections.push_back(section);
+  }
+
+  return sections;
+}
+
+Sections read_ini(const std::string &path)
+{
+  foder::FileLoader file_loader{path};
+  // load will throw if error while opening
+  auto ini_data = file_loader.load();
+
+  return read_ini(ini_data.data(), ini_data.size());
+}
+
+namespace
+{
+
+void replace(std::string &source, const std::string &token, const std::string &replace)
+{
+  size_t pos = 0;
+  while ((pos = source.find(token, pos)) != std::string::npos)
+  {
+    source.replace(pos, token.length(), replace);
+    pos += replace.length(); // Handles the case where 'replace' is a substring of 'token'
+  }
+}
+
+Sections insert_escape(const Sections &inputs)
+{
+  Sections sections;
+
+  // for all section in sections;
+  // if key has ';' then replace with '\;'
+  for (auto &input : inputs)
+  {
+    Section section;
+    section.name = input.name;
+
+    for (auto &item : input.items)
+    {
+      auto key = item.first;
+      auto value = item.second;
+
+      replace(key, ";", "\\;");
+      section.items[key] = value;
+    }
+    sections.push_back(section);
+  }
+
+  return sections;
+}
+
+} // namespace
+
+void write_ini(std::ostream &os, const Sections &sections)
+{
+  std::stringstream ss;
+
+  auto processed = insert_escape(sections);
+
+  ss << processed;
+
+  std::string strss = ss.str();
+
+  os.write(strss.c_str(), strss.length());
+}
+
+void write_ini(const std::string &filepath, const Sections &sections)
+{
+  std::ofstream fs(filepath.c_str(), std::ofstream::binary | std::ofstream::trunc);
+  if (not fs.good())
+  {
+    std::string msg = "Failed to create file: " + filepath;
+    throw std::runtime_error(msg);
+  }
+
+  write_ini(fs, sections);
+
+  fs.close();
+}
+
+Section find(const Sections &sections, const std::string &name)
+{
+  for (auto &section : sections)
+  {
+    if (section.name == name)
+      return section;
+  }
+  Section not_found;
+  return not_found;
+}
+
+std::string find(const Section &section, const std::string &key)
+{
+  for (auto &item : section.items)
+  {
+    if (item.first == key)
+      return item.second;
+  }
+  return "";
+}
+
+} // namespace crew
diff --git a/compiler/crew/src/PConfigIni.test.cpp b/compiler/crew/src/PConfigIni.test.cpp
new file mode 100644
index 000000000..c062c6937
--- /dev/null
+++ b/compiler/crew/src/PConfigIni.test.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <foder/FileLoader.h>
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+#include <stdexcept>
+
+TEST(ConfigIniTest, read_ini_non_exist_file_NEG)
+{
+  EXPECT_THROW(crew::read_ini("/hello/world/not_a_file"), std::runtime_error);
+}
+
+TEST(ConfigIniTest, read_ini_simple)
+{
+  std::stringstream ss;
+
+  ss << "[hello]\nkey=world\n";
+
+  auto str = ss.str();
+  auto sections = crew::read_ini(str.c_str(), str.length());
+  ASSERT_EQ(1UL, sections.size());
+
+  auto its = sections.begin();
+  ASSERT_NE(sections.end(), its);
+  EXPECT_TRUE("hello" == its->name);
+  ASSERT_EQ(1UL, its->items.size());
+
+  auto it = its->items.begin();
+  ASSERT_NE(its->items.end(), it);
+  EXPECT_TRUE("key" == it->first);
+  EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, read_ini_simple_NEG)
+{
+  std::stringstream ss;
+
+  ss << "key=value\nhello=world\n";
+
+  auto str = ss.str();
+
+  EXPECT_THROW(crew::read_ini(str.c_str(), str.length()), std::runtime_error);
+}
+
+TEST(ConfigIniTest, read_ini_comment)
+{
+  std::stringstream ss;
+
+  ss << "[hello]\n;comment=skip\n#comment=skip\nkey=world\n";
+
+  auto str = ss.str();
+  auto sections = crew::read_ini(str.c_str(), str.length());
+  ASSERT_EQ(1UL, sections.size());
+
+  auto its = sections.begin();
+  ASSERT_NE(sections.end(), its);
+  EXPECT_TRUE("hello" == its->name);
+  ASSERT_EQ(1UL, its->items.size());
+
+  auto it = its->items.begin();
+  ASSERT_NE(its->items.end(), it);
+  EXPECT_TRUE("key" == it->first);
+  EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, write_ini_file_error_NEG)
+{
+  crew::Sections sections;
+  EXPECT_THROW(crew::write_ini("/abc/def/cannot_access", sections), std::runtime_error);
+}
+
+TEST(ConfigIniTest, read_file_escape_semicolon)
+{
+  auto sections = crew::read_ini("test_read_semicolon.ini");
+  ASSERT_EQ(1UL, sections.size());
+
+  auto its = sections.begin();
+  ASSERT_NE(sections.end(), its);
+  EXPECT_TRUE("hello" == its->name);
+  ASSERT_EQ(1UL, its->items.size());
+
+  auto it = its->items.begin();
+  ASSERT_NE(its->items.end(), it);
+
+  EXPECT_TRUE("keya;keyb;keyc;keyd" == it->first);
+  EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, write_file_escape_semicolon)
+{
+  std::string path("test_write_semicolon.ini");
+
+  // save key with ';'
+  {
+    crew::Sections sections;
+    crew::Section hello;
+    hello.name = "hello";
+    hello.items["keya;keyb;keyc;keyd"] = "world";
+    sections.push_back(hello);
+    crew::write_ini(path, sections);
+  }
+
+  // load the file and check if there is '\\'
+  std::string strbuffer;
+  {
+    foder::FileLoader file_loader{path};
+    auto ini_data = file_loader.load();
+
+    auto buffer = std::vector<char>();
+    auto length = ini_data.size();
+    buffer.reserve(length + 1);
+
+    char *pbuffer = buffer.data();
+    memcpy(pbuffer, ini_data.data(), length);
+    *(pbuffer + length) = 0;
+
+    strbuffer = pbuffer;
+  }
+  int32_t count = 0;
+  size_t pos = 0;
+  while ((pos = strbuffer.find("\\;", pos)) != std::string::npos)
+  {
+    count++;
+    pos++;
+  }
+  EXPECT_TRUE(count == 3);
+}
diff --git a/compiler/crew/src/PConfigIniDump.cpp b/compiler/crew/src/PConfigIniDump.cpp
new file mode 100644
index 000000000..5b7a1cb6d
--- /dev/null
+++ b/compiler/crew/src/PConfigIniDump.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIniDump.h"
+
+namespace crew
+{
+
+/**
+ * @brief Dump content of sections
+ */
+void dump(std::ostream &os, const Sections &sections)
+{
+  for (auto &section : sections)
+  {
+    os << "[" << section.name << "]" << std::endl;
+    for (auto &item : section.items)
+    {
+      os << item.first << "=" << item.second << std::endl;
+    }
+    os << std::endl;
+  }
+}
+
+} // namespace crew
+
+std::ostream &operator<<(std::ostream &os, const crew::Sections &sections)
+{
+  crew::dump(os, sections);
+  return os;
+}
diff --git a/compiler/crew/src/PConfigIniDump.test.cpp b/compiler/crew/src/PConfigIniDump.test.cpp
new file mode 100644
index 000000000..25cf4736b
--- /dev/null
+++ b/compiler/crew/src/PConfigIniDump.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crew/PConfigIni.h"
+#include "crew/PConfigIniDump.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+#include <stdexcept>
+
+TEST(ConfigIniDumpTest, dump_sections)
+{
+  crew::Sections sections;
+  crew::Section section;
+
+  section.name = "hello";
+  section.items["key"] = "value";
+
+  sections.push_back(section);
+
+  std::stringstream ss;
+
+  ss << sections;
+
+  // there's extra \n at end of each section
+  ASSERT_TRUE(ss.str() == "[hello]\nkey=value\n\n");
+}
diff --git a/compiler/crew/src/PConfigJson.cpp b/compiler/crew/src/PConfigJson.cpp
new file mode 100644
index 000000000..5af0ebddd
--- /dev/null
+++ b/compiler/crew/src/PConfigJson.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PConfigJson.h"
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+const char _CLF = '\n'; // Control Line Feed
+const char _DQU = '\"'; // Double QUotation
+
+} // namespace
+
+namespace crew
+{
+
+void JsonExport::indent(void)
+{
+  for (uint32_t i = 0; i < _indent; ++i)
+    _os << "  ";
+}
+
+void JsonExport::open_brace(void)
+{
+  indent();
+
+  _os << "{" << _CLF;
+  _indent++;
+}
+
+void JsonExport::open_brace(const std::string &key)
+{
+  indent();
+
+  _os << _DQU << key << _DQU << " : {" << _CLF;
+  _indent++;
+}
+
+void JsonExport::open_bracket(const std::string &key)
+{
+  indent();
+
+  _os << _DQU << key << _DQU << " : [" << _CLF;
+  _indent++;
+}
+
+void JsonExport::close_bracket(bool cont)
+{
+  _indent--;
+  indent();
+
+  _os << "]";
+  if (cont)
+    _os << ",";
+  _os << _CLF;
+}
+
+void JsonExport::close_brace(bool cont)
+{
+  _indent--;
+  indent();
+
+  _os << "}";
+  if (cont)
+    _os << ",";
+  _os << _CLF;
+}
+
+void JsonExport::key_val(const std::string &key, const std::string &value, bool cont)
+{
+  indent();
+
+  _os << _DQU << key << _DQU << " : " << _DQU << value << _DQU;
+  if (cont)
+    _os << ",";
+  _os << _CLF;
+}
+
+void JsonExport::key_val(const std::string &key, const std::vector<std::string> &l, bool cont)
+{
+  indent();
+
+  _os << _DQU << key << _DQU << " : [ ";
+  bool comma = false;
+  for (auto &v : l)
+  {
+    if (comma)
+      _os << ", ";
+    else
+      comma = true;
+    _os << _DQU << v << _DQU;
+  }
+  _os << " ]";
+  if (cont)
+    _os << ",";
+  _os << _CLF;
+}
+
+} // namespace crew
diff --git a/compiler/crew/src/PConfigJson.h b/compiler/crew/src/PConfigJson.h
new file mode 100644
index 000000000..c5c49d096
--- /dev/null
+++ b/compiler/crew/src/PConfigJson.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CREW_PCONFIG_JSON_H__
+#define __CREW_PCONFIG_JSON_H__
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace crew
+{
+
+class JsonExport
+{
+public:
+  JsonExport(std::ostream &os) : _os(os) {}
+
+private:
+  void indent(void);
+
+public:
+  void open_brace(void);
+  void open_brace(const std::string &key);
+  void open_bracket(const std::string &key);
+  void close_bracket(bool cont);
+  void close_brace(bool cont);
+  void key_val(const std::string &key, const std::string &value, bool cont);
+  void key_val(const std::string &key, const std::vector<std::string> &l, bool cont);
+
+private:
+  std::ostream &_os;
+  uint32_t _indent = 0;
+};
+
+} // namespace crew
+
+#endif // __CREW_PCONFIG_JSON_H__
diff --git a/compiler/crew/src/PConfigJson.test.cpp b/compiler/crew/src/PConfigJson.test.cpp
new file mode 100644
index 000000000..f8afabc3d
--- /dev/null
+++ b/compiler/crew/src/PConfigJson.test.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PConfigJson.h"
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+
+TEST(ConfigJsonTest, empty)
+{
+  std::stringstream ss;
+  crew::JsonExport je(ss);
+
+  je.open_brace();
+  je.close_brace(true);
+
+  ASSERT_TRUE(ss.str() == "{\n},\n");
+}
+
+TEST(ConfigJsonTest, keyvalue)
+{
+  std::stringstream ss;
+  crew::JsonExport je(ss);
+
+  je.open_brace("hello");
+  je.key_val("key", "value", true);
+  je.close_brace(true);
+
+  ASSERT_TRUE(ss.str() == "\"hello\" : {\n  \"key\" : \"value\",\n},\n");
+}
+
+TEST(ConfigJsonTest, keyvaluearray)
+{
+  std::stringstream ss;
+  crew::JsonExport je(ss);
+  std::vector<std::string> vs = {"1", "2"};
+
+  je.open_brace("hello");
+  je.key_val("key", vs, true);
+  je.close_brace(true);
+
+  ASSERT_TRUE(ss.str() == "\"hello\" : {\n  \"key\" : [ \"1\", \"2\" ],\n},\n");
+}
+
+TEST(ConfigJsonTest, bracket)
+{
+  std::stringstream ss;
+  crew::JsonExport je(ss);
+
+  je.open_bracket("hello");
+  je.close_bracket(true);
+
+  ASSERT_TRUE(ss.str() == "\"hello\" : [\n],\n");
+}
diff --git a/compiler/crew/src/test_read_semicolon.ini b/compiler/crew/src/test_read_semicolon.ini
new file mode 100644
index 000000000..d966fb707
--- /dev/null
+++ b/compiler/crew/src/test_read_semicolon.ini
@@ -0,0 +1,2 @@
+[hello]
+keya\;keyb\;keyc\;keyd=world
diff --git a/compiler/cwrap/src/Fildes.test.cpp b/compiler/cwrap/src/Fildes.test.cpp
index f9fa20f9e..c487f064a 100644
--- a/compiler/cwrap/src/Fildes.test.cpp
+++ b/compiler/cwrap/src/Fildes.test.cpp
@@ -44,7 +44,7 @@ int make_temp(char *name_template)
   return fd;
 }
 
-} // namespace make_temp
+} // namespace
 
 TEST(FildesTest, default_constructor)
 {
diff --git a/compiler/dalgona-test/.gitignore b/compiler/dalgona-test/.gitignore
new file mode 100644
index 000000000..19f2918a4
--- /dev/null
+++ b/compiler/dalgona-test/.gitignore
@@ -0,0 +1 @@
+test.local.lst
diff --git a/compiler/dalgona-test/CMakeLists.txt b/compiler/dalgona-test/CMakeLists.txt
new file mode 100644
index 000000000..c8b9c2597
--- /dev/null
+++ b/compiler/dalgona-test/CMakeLists.txt
@@ -0,0 +1,58 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+unset(DALGONA_SINGLE_OP_TEST)
+
+macro(singleOpTest NAME)
+  list(APPEND DALGONA_SINGLE_OP_TEST ${NAME})
+endmacro(singleOpTest)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+# Place test scripts in one place
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/GenH5RandomInputs.py" "${CMAKE_CURRENT_BINARY_DIR}/GenH5RandomInputs.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/SingleOperatorTest.py" "${CMAKE_CURRENT_BINARY_DIR}/SingleOperatorTest.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RandomDataGenerator.py" "${CMAKE_CURRENT_BINARY_DIR}/RandomDataGenerator.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/TestUtil.py" "${CMAKE_CURRENT_BINARY_DIR}/TestUtil.py" COPYONLY)
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+  OUTPUT ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'DALGONA_PATH=\"$<TARGET_FILE:dalgona>\"' >> ${TEST_CONFIG}
+  DEPENDS dalgona
+  COMMENT "Generate test configuration"
+)
+
+# Import pics module
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CMAKE_CURRENT_BINARY_DIR}/circle")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(dalgona_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+  NAME dalgona_single_op_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/TestSingleOp.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          "${NNCC_OVERLAY_DIR}/venv_2_12_1"
+          ${DALGONA_SINGLE_OP_TEST}
+)
diff --git a/compiler/dalgona-test/GenH5RandomInputs.py b/compiler/dalgona-test/GenH5RandomInputs.py
new file mode 100644
index 000000000..795cd5aa2
--- /dev/null
+++ b/compiler/dalgona-test/GenH5RandomInputs.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import h5py as h5
+import numpy as np
+import argparse
+
+from circle.Model import Model
+from RandomDataGenerator import RandomDataGenerator
+
+#
+# This script generates a pack of random input data (.h5) expected by the input circle model
+#
+# Basic usage:
+#   gen_h5_random_inputs.py --model <path/to/circle/model> --num_data <number/of/data> --output <path/to/output/data>
+#   ex: gen_h5_random_inputs.py --model add.circle --num_data 3 --output add.circle.input.h5
+#   (This will create add.circle.input.h5 composed of three random inputs in the same directory as the model)
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--num_data', type=int, required=True)
+parser.add_argument('--output', type=str, required=True)
+args = parser.parse_args()
+
+model = args.model
+num_data = args.num_data
+output_path = args.output
+
+with open(model, 'rb') as f:
+    buf = f.read()
+    circle_model = Model.GetRootAsModel(buf, 0)
+
+# Assume one subgraph
+assert (circle_model.SubgraphsLength() == 1)
+graph = circle_model.Subgraphs(0)
+inputs = graph.InputsAsNumpy()
+
+# Create h5 file
+h5_file = h5.File(output_path, 'w')
+group = h5_file.create_group("value")
+group.attrs['desc'] = "Input data for " + model
+
+# Generate random data
+for i in range(num_data):
+    sample = group.create_group(str(i))
+    for j in range(len(inputs)):
+        input_index = inputs[j]
+        tensor = graph.Tensors(input_index)
+        g = RandomDataGenerator(tensor.ShapeAsNumpy())
+        input_data = g.gen(tensor.Type())
+        sample.create_dataset(str(j), data=input_data)
+
+h5_file.close()
diff --git a/compiler/dalgona-test/RandomDataGenerator.py b/compiler/dalgona-test/RandomDataGenerator.py
new file mode 100644
index 000000000..6fa9ab043
--- /dev/null
+++ b/compiler/dalgona-test/RandomDataGenerator.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import numpy as np
+from circle.TensorType import TensorType
+
+
+class RandomDataGenerator:
+    def __init__(self, shape):
+        self.shape = shape
+
+    def _unsupported_types(self):
+        raise RuntimeError('Unsupported data type')
+
+    def _gen_uint8(self):
+        return np.random.randint(0, high=256, size=self.shape, dtype=np.uint8)
+
+    def _gen_int16(self):
+        return np.random.randint(-32767, high=32768, size=self.shape, dtype=np.int16)
+
+    def _gen_float32(self):
+        return np.array(10 * np.random.random_sample(self.shape) - 5, np.float32)
+
+    def gen(self, dtype):
+        gen_book = dict()
+        gen_book[TensorType.UINT8] = self._gen_uint8
+        gen_book[TensorType.INT16] = self._gen_int16
+        gen_book[TensorType.FLOAT32] = self._gen_float32
+
+        return gen_book.get(dtype, self._unsupported_types)()
diff --git a/compiler/dalgona-test/SingleOperatorTest.py b/compiler/dalgona-test/SingleOperatorTest.py
new file mode 100644
index 000000000..9de77dcf3
--- /dev/null
+++ b/compiler/dalgona-test/SingleOperatorTest.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""Test for a model with a single operator"""
+
+from TestUtil import *
+
+from circle import Model
+from circle import BuiltinOptions
+from circle import BuiltinOperator
+from circle import Conv2DOptions
+from circle import DepthwiseConv2DOptions
+from circle import AddOptions
+from circle import FullyConnectedOptions
+from circle import TransposeConvOptions
+from circle import InstanceNormOptions
+from circle import SplitOptions
+
+
+class SingleOperatorTest(object):
+    def StartAnalysis(self, args):
+        """Called when the analysis starts"""
+        with open(args, 'rb') as f:
+            buffer = f.read()
+            self._model = Model.Model.GetRootAsModel(buffer, 0)
+
+        # Check model has one subgraph
+        assertTrue(self._model.SubgraphsLength() == 1, "Model has more than one subgraph")
+        graph = self._model.Subgraphs(0)
+
+        # Check model has one operator
+        assertTrue(graph.OperatorsLength() == 1, "Model has more than one operator")
+        self._op = graph.Operators(0)
+
+    def DefaultOpPost(self, name, opcode, inputs, output):
+        raise SystemExit('NYI operator: ' + str(opcode))
+
+    def testConv2D(self, padding, stride, dilation, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.CONV_2D)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.Conv2DOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = Conv2DOptions.Conv2DOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkPadding(padding, opt.Padding())
+        assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+        assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+        assertTrue(opt.DilationWFactor() == dilation['w'], "Dilation_w mismatches")
+        assertTrue(opt.DilationHFactor() == dilation['h'], "Dilation_w mismatches")
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def Conv2DPre(self, name, input, filter, bias, padding, stride, dilation, fused_act):
+        self.testConv2D(padding, stride, dilation, fused_act)
+
+    def Conv2DPost(self, name, input, filter, bias, padding, stride, dilation, output,
+                   fused_act):
+        self.testConv2D(padding, stride, dilation, fused_act)
+
+    def testAdd(self, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.ADD)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.AddOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = AddOptions.AddOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def AddPre(self, name, x, y, fused_act):
+        self.testAdd(fused_act)
+
+    def AddPost(self, name, x, y, output, fused_act):
+        self.testAdd(fused_act)
+
+    def testDepthwiseConv2D(self, padding, stride, depth_multiplier, dilation, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(),
+                    BuiltinOperator.BuiltinOperator.DEPTHWISE_CONV_2D)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.DepthwiseConv2DOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = DepthwiseConv2DOptions.DepthwiseConv2DOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkPadding(padding, opt.Padding())
+        assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+        assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+        assertTrue(opt.DepthMultiplier() == depth_multiplier,
+                   "Depth multiplier mismatches")
+        assertTrue(opt.DilationWFactor() == dilation['w'], "Dilation_w mismatches")
+        assertTrue(opt.DilationHFactor() == dilation['h'], "Dilation_w mismatches")
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def DepthwiseConv2DPre(self, name, input, filter, bias, padding, stride,
+                           depth_multiplier, dilation, fused_act):
+        self.testDepthwiseConv2D(padding, stride, depth_multiplier, dilation, fused_act)
+
+    def DepthwiseConv2DPost(self, name, input, filter, bias, padding, stride,
+                            depth_multiplier, dilation, output, fused_act):
+        self.testDepthwiseConv2D(padding, stride, depth_multiplier, dilation, fused_act)
+
+    def testFullyConnected(self, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.FULLY_CONNECTED)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.FullyConnectedOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = FullyConnectedOptions.FullyConnectedOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def FullyConnectedPre(self, name, input, weights, bias, fused_act):
+        self.testFullyConnected(fused_act)
+
+    def FullyConnectedPost(self, name, input, weights, bias, output, fused_act):
+        self.testFullyConnected(fused_act)
+
+    def testTransposeConv(self, padding, stride):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.TRANSPOSE_CONV)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.TransposeConvOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = TransposeConvOptions.TransposeConvOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkPadding(padding, opt.Padding())
+        assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+        assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+
+    def TransposeConvPre(self, name, input, filter, output_shape, bias, padding, stride):
+        self.testTransposeConv(padding, stride)
+
+    def TransposeConvPost(self, name, input, filter, output_shape, bias, padding, stride,
+                          output):
+        self.testTransposeConv(padding, stride)
+
+    def testInstanceNorm(self, epsilon, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.INSTANCE_NORM)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.InstanceNormOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = InstanceNormOptions.InstanceNormOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        assertTrue(opt.Epsilon() == epsilon, "epsilon mismatches")
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def InstanceNormPre(self, name, input, gamma, beta, epsilon, fused_act):
+        self.testInstanceNorm(epsilon, fused_act)
+
+    def InstanceNormPost(self, name, input, gamma, beta, epsilon, output, fused_act):
+        self.testInstanceNorm(epsilon, fused_act)
+
+    def testSplit(self, num_split):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.SPLIT)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.SplitOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = SplitOptions.SplitOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        assertTrue(opt.NumSplits() == num_split, "num_split mismatches")
+
+    def SplitPre(self, name, split_dim, input, num_split):
+        self.testSplit(num_split)
+
+    def SplitPost(self, name, split_dim, input, num_split, outputs):
+        self.testSplit(num_split)
+        assertTrue(num_split == len(outputs), "num_split mismatches with outputs")
diff --git a/compiler/dalgona-test/TestSingleOp.sh b/compiler/dalgona-test/TestSingleOp.sh
new file mode 100755
index 000000000..fad26fb51
--- /dev/null
+++ b/compiler/dalgona-test/TestSingleOp.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# This script tests the basic behavior of dalgona
+#
+# HOW TO USE
+#
+# ./test_single_op.sh <path/to/test.config> <path/to/work_dir> <path/to/venv> <TEST 1> <TEST 2> ...
+# test.config : set ${DALGONA_PATH}
+# work_dir : archive of common-artifacts (ex: build/compiler/common-artifacts)
+# venv : virtual environment for python execution
+
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "$CONFIG_PATH")
+GEN_SCRIPT_PATH="${BIN_PATH}/GenH5RandomInputs.py"
+TEST_SCRIPT_PATH="${BIN_PATH}/SingleOperatorTest.py"
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found DALGONA: ${DALGONA_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+
+  PASSED_TAG="${BIN_PATH}/${TESTCASE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${BIN_PATH}/${TESTCASE}.log" <(
+    exec 2>&1
+    set -ex
+
+    # Generate random h5 input data
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+    --model "${TESTCASE_FILE}.circle" \
+    --num_data 3 \
+    --output "${BIN_PATH}/${TESTCASE}.circle.input.h5"
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE INPUT"
+      continue
+    fi
+
+    # Run dalgona with test script(SingleOperatorTest.py)
+    "${DALGONA_PATH}" \
+      --input_model "${TESTCASE_FILE}.circle" \
+      --input_data "${BIN_PATH}/${TESTCASE}.circle.input.h5" \
+      --analysis "${TEST_SCRIPT_PATH}" \
+      --analysis_args "${TESTCASE_FILE}.circle"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/dalgona-test/TestUtil.py b/compiler/dalgona-test/TestUtil.py
new file mode 100644
index 000000000..d6465283f
--- /dev/null
+++ b/compiler/dalgona-test/TestUtil.py
@@ -0,0 +1,58 @@
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from circle import ActivationFunctionType
+from circle import BuiltinOptions
+from circle import Padding
+
+
+def assertTrue(cond, msg):
+    assert cond, msg
+
+
+def checkPadding(pad, exp_pad):
+    if pad == 'SAME':
+        assertTrue(exp_pad == Padding.Padding.SAME, "Padding mismatches")
+    elif pad == 'VALID':
+        assertTrue(exp_pad == Padding.Padding.VALID, "Padding mismatches")
+    else:
+        raise SystemExit('Unsupported padding')
+
+
+def checkActivation(act, exp_act):
+    act_functions = {
+        'relu': ActivationFunctionType.ActivationFunctionType.RELU,
+        'relu6': ActivationFunctionType.ActivationFunctionType.RELU6,
+        'relu_n1_to_1': ActivationFunctionType.ActivationFunctionType.RELU_N1_TO_1,
+        'tanh': ActivationFunctionType.ActivationFunctionType.TANH,
+        'none': ActivationFunctionType.ActivationFunctionType.NONE,
+        'sign_bit': ActivationFunctionType.ActivationFunctionType.SIGN_BIT,
+    }
+
+    try:
+        assertTrue(act_functions[act] == exp_act, "Activation function mismatches")
+    except KeyError:
+        raise SystemExit('Unsupported activation functions')
+
+
+def checkOpcode(opcode, exp_opcode):
+    assertTrue(opcode == exp_opcode,
+               "Opcode mismatches (" + str(opcode) + ", " + str(exp_opcode) + ")")
+
+
+def checkBuiltinOptionType(option, exp_option):
+    assertTrue(
+        option == exp_option,
+        "Built-in option type mismatches (" + str(option) + ", " + str(exp_option) + ")")
diff --git a/compiler/dalgona-test/requires.cmake b/compiler/dalgona-test/requires.cmake
new file mode 100644
index 000000000..ab75e0904
--- /dev/null
+++ b/compiler/dalgona-test/requires.cmake
@@ -0,0 +1,3 @@
+require("dalgona")
+require("common-artifacts")
+require("pics")
diff --git a/compiler/dalgona-test/test.lst b/compiler/dalgona-test/test.lst
new file mode 100644
index 000000000..36c407042
--- /dev/null
+++ b/compiler/dalgona-test/test.lst
@@ -0,0 +1,6 @@
+singleOpTest(Conv2D_000)
+singleOpTest(Conv2D_001)
+singleOpTest(Conv2D_002)
+singleOpTest(Conv2D_003)
+singleOpTest(Split_000)
+singleOpTest(InstanceNorm_000)
diff --git a/compiler/dalgona/CMakeLists.txt b/compiler/dalgona/CMakeLists.txt
new file mode 100644
index 000000000..bd06424f6
--- /dev/null
+++ b/compiler/dalgona/CMakeLists.txt
@@ -0,0 +1,63 @@
+# NOTE find_package will try to use at least python3.8 as follows depending on platform version
+#   Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+#   Ubuntu20.04; default python3.8
+#   Ubuntu22.04; default python3.10
+#   refer https://github.com/Samsung/ONE/issues/9962
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
+
+if(NOT ${PYTHONINTERP_FOUND})
+  message(STATUS "Build dalgona: FAILED (Python3 is missing)")
+  return()
+endif()
+
+if(${PYTHON_VERSION_MINOR} LESS 8)
+  message(STATUS "Build dalgona: FAILED (Install Python version higher than or equal to 3.8)")
+  return()
+endif()
+
+nnas_find_package(Pybind11)
+if(NOT Pybind11_FOUND)
+  message(STATUS "Build dalgona: FAILED (Pybind11 is missing)")
+  return()
+endif(NOT Pybind11_FOUND)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_compile_options(-fvisibility=hidden)
+
+add_executable(dalgona ${DRIVER} ${SOURCES})
+target_include_directories(dalgona PRIVATE include)
+target_include_directories(dalgona PRIVATE ${PYTHON_INCLUDE_DIRS})
+target_include_directories(dalgona PRIVATE ${Pybind11_INCLUDE_DIRS})
+
+target_link_libraries(dalgona INTERFACE pybind11::embed)
+target_link_libraries(dalgona PRIVATE ${PYTHON_LIBRARIES})
+target_link_libraries(dalgona PRIVATE arser)
+target_link_libraries(dalgona PRIVATE safemain)
+target_link_libraries(dalgona PRIVATE foder)
+target_link_libraries(dalgona PRIVATE luci_import)
+target_link_libraries(dalgona PRIVATE luci_interpreter)
+target_link_libraries(dalgona PRIVATE dio_hdf5)
+target_link_libraries(dalgona PRIVATE nncc_common)
+
+install(TARGETS dalgona DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# dalgona is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources used for tests.
+set(TEST_SOURCES
+    "src/StringUtils.cpp"
+    "src/RandomUtils.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(dalgona_unit_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(dalgona_unit_test PRIVATE src)
+target_link_libraries(dalgona_unit_test luci_lang)
diff --git a/compiler/dalgona/README.md b/compiler/dalgona/README.md
new file mode 100644
index 000000000..0fd0f0bb4
--- /dev/null
+++ b/compiler/dalgona/README.md
@@ -0,0 +1,104 @@
+# dalgona
+
+## What is dalgona?
+
+_dalgona_ is a tool for dynamic analysis of deep neural network.
+
+## How it works?
+
+_dalgona_ runs a user's custom analysis code (written in "Python") while performing inference. The analysis code has the form of hooks, called before/after each operator is executed. Intermediate execution results (values of activations) are passed to the hooks, so users can analyze the distribution of activations inside the hooks. The analysis result can be exported as files, log messages or any other forms, used for various purposes (model compression, optimization, etc.).
+
+NOTE Inference is performed by `luci-interpreter`.
+
+## Possible applications
+- Finding quantization parameters based on the distribution of activations
+- Finding sparse activations by observing the portion of zero values
+- Finding the distribution of conditional variables in If-statement and While-statement
+- Visualization of activation data with Python libraries
+
+## Prerequisite
+- Python 3.8 (python3.8, python3.8-dev packages)
+- Circle model (target to analyze)
+- Input data of the model (hdf5 format. See _rawdata2hdf5_ or _gen_h5_explicit_inputs.py_ for more details.)
+- Analysis code (Python code)
+
+## Example
+```
+dalgona \
+ --input_model model.circle
+ --input_data data.h5
+ --analysis analysis/AnalysisTemplate.py
+```
+
+## Arguments
+```
+  --help            Show help message and exit
+  --input_model     Input model filepath (.circle)
+  --input_data      Input data filepath (.h5) (if not given, random data will be used)
+  --analysis        Analysis code filepath (.py)
+  --analysis_args   (optional) String argument passed to the analysis code
+```
+
+## How to write analysis code?
+
+_dalgona_ provides hooks which are called before/after an operator is executed.
+Users can access tensors relevant to the corresponding operator inside the hooks.
+The information of each operator is passed as the arguments of the hook.
+For example, for a Conv2D operator, _dalgona_ provides following hooks.
+
+```
+  def Conv2DPre(self, name, input, filter, bias, padding, stride, dilation, fused_act)
+  def Conv2DPost(self, name, input, filter, bias, padding, stride, dilation, output, fused_act)
+```
+
+`Conv2DPre`/`Conv2DPost` are called before/after Conv2D is executed, respectively. Users can write codes to analyze the distribution of intermediate tensors using the provided arguments.
+
+(Note that Conv2DPost has one more argument "output", which is the execution result of the operator)
+
+Details about the arguments of each hook can be found in the section "Arguments of Hooks".
+
+We proivde a template for the analysis code in `analysis/AnalysisTemplate.py`. Users can copy the template file and modify it to write their custom analysis codes.
+
+| List of hooks | Explanation |
+| --------------|------------ |
+| StartAnalysis(self) | Called when the analysis starts |
+| EndAnalysis(self) | Called when the analysis ends |
+| StartNetworkExecution(self, inputs) | Called when the execution of a network starts |
+| EndNetworkExecution(self, outputs) | Called when the execution of a network ends |
+| DefaultOpPre(self, name, opcode, inputs) | Default hook called before an operator is executed |
+| DefaultOpPost(self, name, opcode, inputs, output) | Default hook called after an operator is executed |
+| \<OPCODE\>Pre/Post | Hooks called before/after the corresponding operator is executed. |
+
+## Arguments of Hooks
+
+Arguments are implemented with built-in Python types.
+
+Tensor
+- Type: dict
+- {name:str, data: np.ndarray, quantparam: QuantParam, is_const: bool}
+
+QuantParam
+- Type: dict
+- {scale: list, zero_point: list, quantized_dimension: int}
+
+Padding
+- Type: string
+- Values: 'SAME', 'VALID'
+
+Stride
+- Type: dict
+- {w: int, h: int}
+
+Dilation
+- Type: dict
+- {w: int, h: int}
+
+FusedActivationFunction
+- Type: string
+- Values: 'none', 'relu', 'relu_n1_to_1', 'relu6'
+
+## What's different from Hook APIs in Tensorflow or Pytorch?
+
+Basically, dalgona works in the same way as Hooks in TF or Pytorch. It calls user-defined functions before/after each operator is executed.
+
+A major difference is that dalgona runs with a model desinged for inference (i.e., circle, which can be directly converted from tflite).
diff --git a/compiler/dalgona/analysis/AnalysisTemplate.py b/compiler/dalgona/analysis/AnalysisTemplate.py
new file mode 100644
index 000000000..b1ffac458
--- /dev/null
+++ b/compiler/dalgona/analysis/AnalysisTemplate.py
@@ -0,0 +1,125 @@
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+A template for anlaysis code.
+This template shows how to access the information of each operator inside hooks.
+Users can write their own hooks by modifying this file.
+
+NOTE See "Arguments of Hooks" section in README to understand argument types (Tensor, Stride, ..)
+NOTE See "tests/SingleOperatorTest.py" for more operators.
+"""
+
+
+class AnalysisTemplate(object):
+    def StartAnalysis(self, args: str):
+        """
+        Called when the analysis starts
+        args: string given by --analysis_args option
+        """
+        print("Analysis started.")
+        print("args", args)
+
+    def EndAnalysis(self):
+        """
+        Called when the analysis ends
+        """
+        print("Analysis ended.")
+
+    def StartNetworkExecution(self, inputs: list):
+        """
+        Called when the execution of a network starts
+        inputs: list of Tensor
+        """
+        print("Network execution started.")
+
+    def EndNetworkExecution(self, outputs: list):
+        """
+        Called when the execution of a network ends
+        outputs: list of Tensor
+        """
+        print("Network execution ended.")
+
+    def DefaultOpPre(self, name: str, opcode: str, inputs: list):
+        """
+        Default hook called before an operator is executed
+        name: output tensor name (string)
+        opcode: opcode name (string)
+        inputs: list of Tensor
+        """
+        print("name", name)
+        print("opcode", opcode)
+        print("inputs", inputs)
+
+    def DefaultOpPost(self, name: str, opcode: str, inputs: list, output: dict):
+        """
+        Default hook called after an operator is executed
+        name: output tensor name (string)
+        opcode: opcode name (string)
+        inputs: list of Tensor
+        output: Tensor
+        """
+        print("name", name)
+        print("opcode", opcode)
+        print("inputs", inputs)
+        print("output", output)
+
+    def Conv2DPre(self, name: str, input: dict, filter: dict, bias: dict, padding: str,
+                  stride: dict, dilation: dict, fused_act: str):
+        """
+        Called before Conv2D layer execution
+        name: output tensor name (string)
+        opcode: opcode name (string)
+        input: Tensor
+        filter: Tensor
+        bias: Tensor
+        padding: Padding (string)
+        stride: Stride
+        dilation: Dilation
+        fused_act: Fused activation functions (string)
+        """
+        print("name", name)
+        print("input", input)
+        print("filter", filter)
+        print("bias", bias)
+        print("padding", padding)
+        print("stride", stride)
+        print("dilation", dilation)
+        print("fused activation", fused_act)
+
+    def Conv2DPost(self, name: str, input: dict, filter: dict, bias: dict, padding: str,
+                   stride: dict, dilation: dict, output: dict, fused_act: str):
+        """
+        Called after Conv2D layer execution
+        name: output tensor name (string)
+        opcode: opcode name (string)
+        input: Tensor
+        filter: Tensor
+        bias: Tensor
+        padding: Padding (string)
+        stride: Stride
+        dilation: Dilation
+        output: Tensor
+        fused_act: Fused activation functions (string)
+        """
+        print("name", name)
+        print("input", input)
+        print("filter", filter)
+        print("bias", bias)
+        print("padding", padding)
+        print("stride", stride)
+        print("dilation", dilation)
+        print("output shape", output['data'].shape)
+        print("output type", output['data'].dtype)
+        print("fused activation", fused_act)
diff --git a/compiler/dalgona/driver/Driver.cpp b/compiler/dalgona/driver/Driver.cpp
new file mode 100644
index 000000000..8bba0b72a
--- /dev/null
+++ b/compiler/dalgona/driver/Driver.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalgona.h"
+
+#include <arser/arser.h>
+#include <pybind11/embed.h>
+
+namespace py = pybind11;
+
+using namespace dalgona;
+
+int entry(const int argc, char **argv)
+{
+  arser::Arser arser("Dalgona: Dynamic analysis tool for DNN");
+
+  arser.add_argument("--input_model")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(true)
+    .help("Input model filepath (.circle)");
+
+  arser.add_argument("--input_data")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Input data filepath (.h5) (if not given, random data will be used)");
+
+  arser.add_argument("--analysis")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(true)
+    .help("Analysis code filepath (.py)");
+
+  arser.add_argument("--analysis_args")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("String argument passed to the analysis code");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
+    return EXIT_FAILURE;
+  }
+
+  auto input_model_path = arser.get<std::string>("--input_model");
+  auto analysis_path = arser.get<std::string>("--analysis");
+  std::string analysis_args = "";
+  if (arser["--analysis_args"])
+    analysis_args = arser.get<std::string>("--analysis_args");
+
+  // Initialize python interpreter
+  py::scoped_interpreter guard{};
+
+  Dalgona dalgona;
+
+  // Initialize interpreter and operator hooks
+  dalgona.initialize(input_model_path);
+
+  // Run analysis
+  if (arser["--input_data"])
+  {
+    const auto input_data_path = arser.get<std::string>("--input_data");
+    dalgona.runAnalysisWithH5Input(input_data_path, analysis_path, analysis_args);
+  }
+  else
+  {
+    std::cout << "--input_data was not specified. Run with a random input." << std::endl;
+    dalgona.runAnalysisWithRandomInput(analysis_path, analysis_args);
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/dalgona/include/Dalgona.h b/compiler/dalgona/include/Dalgona.h
new file mode 100644
index 000000000..353e26800
--- /dev/null
+++ b/compiler/dalgona/include/Dalgona.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_H__
+#define __DALGONA_H__
+
+#include <luci/IR/Module.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include "PythonHooks.h"
+
+#include <memory>
+
+namespace dalgona
+{
+
+class Dalgona
+{
+public:
+  explicit Dalgona() = default;
+
+  ~Dalgona() = default;
+
+  void initialize(const std::string &input_model_path);
+
+  // Run analysis with hdf5 input
+  void runAnalysisWithH5Input(const std::string &input_data_path, const std::string &analysis_path,
+                              const std::string &analysis_args);
+
+  // Run analysis with random input
+  void runAnalysisWithRandomInput(const std::string &analysis_path,
+                                  const std::string &analysis_args);
+
+private:
+  std::unique_ptr<luci::Module> _module{nullptr};
+  std::unique_ptr<luci_interpreter::Interpreter> _interpreter{nullptr};
+  std::unique_ptr<PythonHooks> _hooks{nullptr};
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_H__
diff --git a/compiler/dalgona/include/PythonHooks.h b/compiler/dalgona/include/PythonHooks.h
new file mode 100644
index 000000000..8c100aafb
--- /dev/null
+++ b/compiler/dalgona/include/PythonHooks.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_PYTHON_HOOKS_H__
+#define __DALGONA_PYTHON_HOOKS_H__
+
+#include <loco/IR/Graph.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <pybind11/embed.h>
+
+#include <string>
+
+namespace py = pybind11;
+
+namespace dalgona
+{
+
+class PythonHooks : public luci_interpreter::ExecutionObserver
+{
+public:
+  PythonHooks(luci_interpreter::Interpreter *interpreter) : _interpreter(interpreter)
+  {
+    // Do nothing
+  }
+
+  // Called when the analysis starts
+  void importAnalysis(const std::string &analysis_path, py::object &globals,
+                      const std::string &analysis_args);
+
+  // Called after the analysis is done
+  void endAnalysis();
+
+  // Called before a network is started to be executed
+  void startNetworkExecution(loco::Graph *graph);
+
+  // Called after a network is executed
+  void endNetworkExecution(loco::Graph *graph);
+
+  // Called before an operator is executed
+  void preOperatorExecute(const luci::CircleNode *node) override;
+
+  // Called after an operator is executed
+  void postOperatorExecute(const luci::CircleNode *node) override;
+
+private:
+  luci_interpreter::Interpreter *_interpreter = nullptr;
+  py::object _analysis;
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_PYTHON_HOOKS_H__
diff --git a/compiler/dalgona/requires.cmake b/compiler/dalgona/requires.cmake
new file mode 100644
index 000000000..185476b36
--- /dev/null
+++ b/compiler/dalgona/requires.cmake
@@ -0,0 +1,6 @@
+require("safemain")
+require("arser")
+require("foder")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
diff --git a/compiler/dalgona/src/Dalgona.cpp b/compiler/dalgona/src/Dalgona.cpp
new file mode 100644
index 000000000..1a35b6d03
--- /dev/null
+++ b/compiler/dalgona/src/Dalgona.cpp
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalgona.h"
+#include "PythonHooks.h"
+#include "RandomUtils.h"
+
+#include <luci/Importer.h>
+#include <foder/FileLoader.h>
+#include <dio_hdf5/HDF5Importer.h>
+
+#include <pybind11/embed.h>
+
+#include <iostream>
+#include <limits>
+
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
+
+namespace py = pybind11;
+
+namespace
+{
+
+uint32_t numElements(const luci::CircleNode *node)
+{
+  assert(node != nullptr); // FIX_CALLER_UNLESS
+
+  uint32_t num_elements = 1;
+  for (uint32_t i = 0; i < node->rank(); i++)
+    num_elements *= node->dim(i).value();
+
+  return num_elements;
+}
+
+// Return tensor's size in bytes
+template <typename NodeT> size_t getByteSize(const NodeT *node)
+{
+  assert(node != nullptr); // FIX_CALLER_UNLESS
+
+  uint32_t dtype_size = loco::size(node->dtype());
+  return static_cast<size_t>(dtype_size) * static_cast<size_t>(numElements(node));
+}
+
+// Throw exception if input has one of the following conditions.
+// 1. Have unknown dimension
+// 2. Number of elements is 0
+void checkInputDimension(const luci::CircleInput *input)
+{
+  assert(input != nullptr); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < input->rank(); i++)
+    if (!input->dim(i).known())
+      throw std::runtime_error(input->name() + " has unknown dimension");
+
+  if (numElements(input) == 0)
+    throw std::runtime_error(input->name() + " is a zero-sized input");
+}
+
+// Check the type and the shape of CircleInput
+// Throw an exception if type or shape does not match
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+  assert(input_node != nullptr); // FIX_CALLER_UNLESS
+
+  // Type check
+  if (dtype != input_node->dtype())
+    throw std::runtime_error("Wrong input type.");
+
+  if (shape.size() != input_node->rank())
+    throw std::runtime_error("Input rank mismatch.");
+
+  for (uint32_t i = 0; i < shape.size(); i++)
+  {
+    if (not(shape.at(i) == input_node->dim(i)))
+      throw std::runtime_error("Input shape mismatch.");
+  }
+}
+
+} // namespace
+
+namespace dalgona
+{
+
+void Dalgona::initialize(const std::string &input_model_path)
+{
+  // Load model from the file
+  foder::FileLoader loader{input_model_path};
+  std::vector<char> model_data = loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+                                 model_data.size()};
+  if (not circle::VerifyModelBuffer(verifier))
+    throw std::runtime_error("Failed to verify circle '" + input_model_path + "'");
+
+  auto circle_model = circle::GetModel(model_data.data());
+
+  if (not circle_model)
+    throw std::runtime_error("Failed to load '" + input_model_path + "'");
+
+  _module = luci::Importer().importModule(circle_model);
+
+  if (not _module)
+    throw std::runtime_error("ERROR: Failed to load '" + input_model_path + "'");
+
+  // Initialize interpreter
+  _interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+
+  _hooks = std::make_unique<PythonHooks>(_interpreter.get());
+
+  _interpreter->attachObserver(_hooks.get());
+}
+
+void Dalgona::runAnalysisWithH5Input(const std::string &input_data_path,
+                                     const std::string &analysis_path,
+                                     const std::string &analysis_args)
+{
+  py::object scope = py::module::import("__main__").attr("__dict__");
+  _hooks->importAnalysis(analysis_path, scope, analysis_args);
+
+  try
+  {
+    dio::hdf5::HDF5Importer importer(input_data_path);
+    importer.importGroup("value");
+
+    bool is_raw_data = importer.isRawData();
+
+    const auto num_records = importer.numData();
+    if (num_records == 0)
+      throw std::runtime_error("The input data file does not contain any record.");
+
+    const auto input_nodes = loco::input_nodes(_module->graph());
+    const auto num_inputs = input_nodes.size();
+
+    for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+    {
+      if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
+        throw std::runtime_error("Wrong number of inputs.");
+
+      std::cout << "Running " << record_idx << "'th data" << std::endl;
+
+      for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+      {
+        const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+        assert(input_node->index() == input_idx);
+        checkInputDimension(input_node);
+        std::vector<char> input_data(getByteSize(input_node));
+
+        if (is_raw_data)
+        {
+          // Skip type/shape check for raw data
+          importer.readTensor(record_idx, input_idx, input_data.data(), input_data.size());
+        }
+        else
+        {
+          DataType dtype;
+          Shape shape;
+          importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data(),
+                              input_data.size());
+
+          // Check the type and the shape of the input data is valid
+          verifyTypeShape(input_node, dtype, shape);
+        }
+
+        _interpreter->writeInputTensor(input_node, input_data.data(), input_data.size());
+      }
+
+      _hooks->startNetworkExecution(_module->graph());
+      _interpreter->interpret();
+      _hooks->endNetworkExecution(_module->graph());
+    }
+
+    std::cout << "Finished executing " << num_records << "'th data" << std::endl;
+    _hooks->endAnalysis();
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
+  }
+}
+
+void Dalgona::runAnalysisWithRandomInput(const std::string &analysis_path,
+                                         const std::string &analysis_args)
+{
+  py::object scope = py::module::import("__main__").attr("__dict__");
+  _hooks->importAnalysis(analysis_path, scope, analysis_args);
+
+  const auto input_nodes = loco::input_nodes(_module->graph());
+  const auto num_inputs = input_nodes.size();
+
+  for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+  {
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+    assert(input_node->index() == input_idx);
+    checkInputDimension(input_node);
+
+    uint32_t num_elems = numElements(input_node);
+    switch (input_node->dtype())
+    {
+      case DataType::FLOAT32:
+      {
+        // Synced with record-minmax (-5,5)
+        auto input_data = genRandomFloatData(num_elems, -5, 5);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(float));
+        break;
+      }
+      case DataType::U8:
+      {
+        auto input_data = genRandomIntData<uint8_t>(num_elems, std::numeric_limits<uint8_t>::min(),
+                                                    std::numeric_limits<uint8_t>::max());
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(uint8_t));
+        break;
+      }
+      case DataType::S16:
+      {
+        auto input_data = genRandomIntData<int16_t>(num_elems, std::numeric_limits<int16_t>::min(),
+                                                    std::numeric_limits<int16_t>::max());
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(int16_t));
+        break;
+      }
+      case DataType::S32:
+      {
+        // Synced with record-minmax (0, 100)
+        auto input_data = genRandomIntData<int32_t>(num_elems, 0, 100);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(int32_t));
+        break;
+      }
+      case DataType::S64:
+      {
+        // Synced with record-minmax (0, 100)
+        auto input_data = genRandomIntData<int64_t>(num_elems, 0, 100);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(int64_t));
+        break;
+      }
+      case DataType::BOOL:
+      {
+        // Bool is represented as uint8 (0 or 1)
+        auto input_data = genRandomIntData<uint8_t>(num_elems, 0, 1);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(uint8_t));
+        break;
+      }
+      default:
+        throw std::runtime_error("Unsupported input data type in " + input_node->name());
+    }
+  }
+
+  _hooks->startNetworkExecution(_module->graph());
+  _interpreter->interpret();
+  _hooks->endNetworkExecution(_module->graph());
+
+  std::cout << "Finished executing a random input" << std::endl;
+  _hooks->endAnalysis();
+}
+
+} // namespace dalgona
diff --git a/compiler/dalgona/src/PostOperatorHook.h b/compiler/dalgona/src/PostOperatorHook.h
new file mode 100644
index 000000000..00c5d461c
--- /dev/null
+++ b/compiler/dalgona/src/PostOperatorHook.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_POST_OPERATOR_HOOK_H__
+#define __DALGONA_POST_OPERATOR_HOOK_H__
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <loco/IR/Node.h>
+#include <luci_interpreter/Interpreter.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <pybind11/embed.h>
+#include <vector>
+
+namespace py = pybind11;
+using namespace py::literals;
+
+namespace dalgona
+{
+
+// Invoke a user-written Python hook after an operator is executed
+class PostOperatorHook final : public luci::CircleNodeVisitor<void>
+{
+
+// This macro creates three variables used for post-operator hooks.
+// 1. hook: Python function to be invoked (type: py::object)
+// 2. inputs: input data (type: std::vector of numpy array)
+// 3. output: output data (type: numpy array)
+#define POST_OPERATOR_HOOK_PROLOGUE(OP_NAME)                \
+  assert(not multi_out_node(node));                         \
+  if (!py::hasattr(_analysis, #OP_NAME "Post"))             \
+  {                                                         \
+    visit(loco::must_cast<const luci::CircleNode *>(node)); \
+    return;                                                 \
+  }                                                         \
+  py::object hook = _analysis.attr(#OP_NAME "Post");        \
+  auto inputs = inputsPyArray(node, _interpreter);          \
+  auto output = outputPyArray(node, _interpreter);
+
+// Multi-output version of POST_OPERATOR_HOOK_PROLOGUE
+#define POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS(OP_NAME)     \
+  assert(multi_out_node(node));                             \
+  if (!py::hasattr(_analysis, #OP_NAME "Post"))             \
+  {                                                         \
+    visit(loco::must_cast<const luci::CircleNode *>(node)); \
+    return;                                                 \
+  }                                                         \
+  py::object hook = _analysis.attr(#OP_NAME "Post");        \
+  auto inputs = inputsPyArray(node, _interpreter);          \
+  auto outputs = outputsPyArray(node, _interpreter);
+
+private:
+  py::object _analysis;
+  luci_interpreter::Interpreter *_interpreter{nullptr};
+
+public:
+  explicit PostOperatorHook(py::object analysis, luci_interpreter::Interpreter *interpreter)
+    : _analysis(analysis), _interpreter(interpreter)
+  {
+    // Do nothing
+  }
+
+  // default
+  void visit(const luci::CircleNode *node)
+  {
+    if (not py::hasattr(_analysis, "DefaultOpPost"))
+      return;
+
+    py::object hook = _analysis.attr("DefaultOpPost");
+    auto inputs = inputsPyArray(node, _interpreter);
+
+    py::list input_list;
+    for (uint32_t i = 0; i < inputs.size(); i++)
+    {
+      input_list.append(inputs[i]);
+    }
+
+    py::list output_list;
+    if (multi_out_node(node))
+    {
+      auto outputs = outputsPyArray(node, _interpreter);
+      for (uint32_t i = 0; i < outputs.size(); i++)
+      {
+        output_list.append(outputs[i]);
+      }
+    }
+    else
+    {
+      auto output = outputPyArray(node, _interpreter);
+      output_list.append(output);
+    }
+
+    pySafeCall(hook,
+               node->name(),             // name
+               toString(node->opcode()), // opcode
+               input_list,               // list of inputs
+               output_list               // list of outputs
+    );
+  }
+
+  void visit(const luci::CircleConv2D *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(Conv2D)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+    auto dilation = node->dilation();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+    auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[0],                                         // input
+               inputs[1],                                         // filter
+               inputs[2],                                         // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               py_dilation,                                       // dilation
+               output,                                            // output
+               toString(fused_act)                                // fused activation
+    );
+  }
+
+  void visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(DepthwiseConv2D)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+    auto dilation = node->dilation();
+    auto depthMultiplier = node->depthMultiplier();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+    auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[0],                                         // input
+               inputs[1],                                         // filter
+               inputs[2],                                         // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               depthMultiplier,                                   // depthMultiplier
+               py_dilation,                                       // dilation
+               output,                                            // output
+               toString(fused_act)                                // fused activation
+    );
+  }
+
+  void visit(const luci::CircleAdd *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(Add)
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // x
+               inputs[1],          // y
+               output,             // output
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleFullyConnected *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(FullyConnected)
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // input
+               inputs[1],          // weights
+               inputs[2],          // bias
+               output,             // output
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleTransposeConv *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(TransposeConv)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[2],                                         // input
+               inputs[1],                                         // filter
+               inputs[0],                                         // output shape
+               inputs.size() == 4 ? inputs[3] : none(),           // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               output                                             // output
+    );
+  }
+
+  void visit(const luci::CircleInstanceNorm *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(InstanceNorm)
+
+    auto epsilon = node->epsilon();
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // input
+               inputs[1],          // gamma
+               inputs[2],          // beta
+               epsilon,            // epsilon
+               output,             // output
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleSplit *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS(Split)
+
+    py::list output_list;
+    for (uint32_t i = 0; i < outputs.size(); i++)
+    {
+      output_list.append(outputs[i]);
+    }
+
+    auto num_split = node->num_split();
+
+    pySafeCall(hook,
+               node->name(), // name
+               inputs[0],    // split_dim
+               inputs[1],    // input
+               num_split,    // num_split
+               output_list   // list of outputs
+    );
+  }
+
+#undef POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_POST_OPERATOR_HOOK_H__
diff --git a/compiler/dalgona/src/PreOperatorHook.h b/compiler/dalgona/src/PreOperatorHook.h
new file mode 100644
index 000000000..eb6a95ef6
--- /dev/null
+++ b/compiler/dalgona/src/PreOperatorHook.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_PRE_OPERATOR_HOOK_H__
+#define __DALGONA_PRE_OPERATOR_HOOK_H__
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <loco/IR/Node.h>
+#include <luci_interpreter/Interpreter.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <pybind11/embed.h>
+#include <vector>
+
+namespace py = pybind11;
+using namespace py::literals;
+
+namespace dalgona
+{
+
+// Invoke a user-written Python hook before an operator is executed
+class PreOperatorHook final : public luci::CircleNodeVisitor<void>
+{
+
+// This macro creates two variables used for pre-operator hooks.
+// 1. hook: Python function to be invoked (type: py::object)
+// 2. inputs: input data (type: std::vector of numpy array)
+#define PRE_OPERATOR_HOOK_PROLOGUE(OP_NAME)                 \
+  if (!py::hasattr(_analysis, #OP_NAME "Pre"))              \
+  {                                                         \
+    visit(loco::must_cast<const luci::CircleNode *>(node)); \
+    return;                                                 \
+  }                                                         \
+  py::object hook = _analysis.attr(#OP_NAME "Pre");         \
+  auto inputs = inputsPyArray(node, _interpreter);
+
+private:
+  py::object _analysis;
+  luci_interpreter::Interpreter *_interpreter{nullptr};
+
+public:
+  explicit PreOperatorHook(py::object analysis, luci_interpreter::Interpreter *interpreter)
+    : _analysis(analysis), _interpreter(interpreter)
+  {
+    // Do nothing
+  }
+
+  // default
+  void visit(const luci::CircleNode *node)
+  {
+    if (not py::hasattr(_analysis, "DefaultOpPre"))
+      return;
+
+    py::object hook = _analysis.attr("DefaultOpPre");
+    auto inputs = inputsPyArray(node, _interpreter);
+
+    py::list input_list;
+    for (uint32_t i = 0; i < inputs.size(); i++)
+    {
+      input_list.append(inputs[i]);
+    }
+
+    pySafeCall(hook,
+               node->name(),             // name
+               toString(node->opcode()), // opcode
+               input_list                // list of inputs
+    );
+  }
+
+  void visit(const luci::CircleConv2D *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(Conv2D)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+    auto dilation = node->dilation();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+    auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[0],                                         // input
+               inputs[1],                                         // filter
+               inputs[2],                                         // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               py_dilation,                                       // dilation
+               toString(fused_act)                                // fused activation
+    );
+  }
+
+  void visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(DepthwiseConv2D)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+    auto dilation = node->dilation();
+    auto depthMultiplier = node->depthMultiplier();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+    auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[0],                                         // input
+               inputs[1],                                         // filter
+               inputs[2],                                         // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               depthMultiplier,                                   // depthMultiplier
+               py_dilation,                                       // dilation
+               toString(fused_act)                                // fused activation
+    );
+  }
+
+  void visit(const luci::CircleAdd *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(Add)
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // x
+               inputs[1],          // y
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleFullyConnected *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(FullyConnected)
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // input
+               inputs[1],          // weights
+               inputs[2],          // bias
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleTransposeConv *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(TransposeConv)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[2],                                         // input
+               inputs[1],                                         // filter
+               inputs[0],                                         // output shape
+               inputs.size() == 4 ? inputs[3] : none(),           // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride                                          // stride
+    );
+  }
+
+  void visit(const luci::CircleInstanceNorm *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(InstanceNorm)
+
+    auto epsilon = node->epsilon();
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // input
+               inputs[1],          // gamma
+               inputs[2],          // beta
+               epsilon,            // epsilon
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleSplit *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(Split)
+
+    auto num_split = node->num_split();
+
+    pySafeCall(hook,
+               node->name(), // name
+               inputs[0],    // split_dim
+               inputs[1],    // input
+               num_split     // num_split
+    );
+  }
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_PRE_OPERATOR_HOOK_H__
diff --git a/compiler/dalgona/src/PythonHooks.cpp b/compiler/dalgona/src/PythonHooks.cpp
new file mode 100644
index 000000000..8447699ca
--- /dev/null
+++ b/compiler/dalgona/src/PythonHooks.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PythonHooks.h"
+#include "PostOperatorHook.h"
+#include "PreOperatorHook.h"
+#include "Utils.h"
+
+#include <loco/IR/Graph.h>
+
+namespace dalgona
+{
+
+void PythonHooks::preOperatorExecute(const luci::CircleNode *node)
+{
+  PreOperatorHook hook(_analysis, _interpreter);
+  node->accept(&hook);
+}
+
+void PythonHooks::postOperatorExecute(const luci::CircleNode *node)
+{
+  PostOperatorHook hook(_analysis, _interpreter);
+  node->accept(&hook);
+}
+
+void PythonHooks::importAnalysis(const std::string &analysis_path, py::object &globals,
+                                 const std::string &analysis_args)
+{
+  const auto base_filename = analysis_path.substr(analysis_path.find_last_of("/\\") + 1);
+  // module name must be the same with the python code
+  // ex: base_filename = MyAnalysis.py -> module_name = MyAnalysis
+  const auto module_name = base_filename.substr(0, base_filename.find_last_of('.'));
+
+  py::dict locals;
+  locals["path"] = py::cast(analysis_path);
+
+  py::eval<py::eval_statements>("import sys\n"
+                                "import os\n"
+                                "sys.path.append(os.path.dirname(path))\n"
+                                "import " +
+                                  module_name +
+                                  "\n"
+                                  "analysis = " +
+                                  module_name + "." + module_name + "()",
+                                globals, locals);
+
+  _analysis = locals["analysis"];
+
+  if (py::hasattr(_analysis, "StartAnalysis"))
+    pySafeCall(_analysis.attr("StartAnalysis"), analysis_args);
+}
+
+void PythonHooks::startNetworkExecution(loco::Graph *graph)
+{
+  if (!py::hasattr(_analysis, "StartNetworkExecution"))
+    return;
+
+  assert(graph != nullptr); // FIX_CALLER_UNLESS
+
+  const auto input_nodes = loco::input_nodes(graph);
+  py::list inputs;
+  // Assumption: input_nodes is iterated in the same order of model inputs
+  for (const auto input_node : input_nodes)
+  {
+    auto circle_node = loco::must_cast<luci::CircleInput *>(input_node);
+    inputs.append(outputPyArray(circle_node, _interpreter));
+  }
+  pySafeCall(_analysis.attr("StartNetworkExecution"), inputs);
+}
+
+void PythonHooks::endNetworkExecution(loco::Graph *graph)
+{
+  if (!py::hasattr(_analysis, "EndNetworkExecution"))
+    return;
+
+  assert(graph != nullptr); // FIX_CALLER_UNLESS
+
+  const auto output_nodes = loco::output_nodes(graph);
+  py::list outputs;
+  // Assumption: output_nodes is iterated in the same order of model outputs
+  for (const auto output_node : output_nodes)
+  {
+    auto circle_node = loco::must_cast<luci::CircleOutput *>(output_node);
+    outputs.append(
+      outputPyArray(loco::must_cast<luci::CircleNode *>(circle_node->from()), _interpreter));
+  }
+  pySafeCall(_analysis.attr("EndNetworkExecution"), outputs);
+}
+
+void PythonHooks::endAnalysis()
+{
+  if (py::hasattr(_analysis, "EndAnalysis"))
+    pySafeCall(_analysis.attr("EndAnalysis"));
+}
+
+} // namespace dalgona
diff --git a/compiler/dalgona/src/RandomUtils.cpp b/compiler/dalgona/src/RandomUtils.cpp
new file mode 100644
index 000000000..a8e32b3e1
--- /dev/null
+++ b/compiler/dalgona/src/RandomUtils.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RandomUtils.h"
+
+#include <random>
+#include <vector>
+#include <cassert>
+
+namespace dalgona
+{
+
+std::vector<float> genRandomFloatData(uint32_t num_elements, float min, float max)
+{
+  if (min > max)
+    throw std::invalid_argument("min is greater than max");
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<> dist(min, max);
+  std::vector<float> buffer(num_elements);
+
+  // Write random data
+  for (auto &iter : buffer)
+    iter = static_cast<float>(dist(gen));
+
+  return buffer;
+}
+
+} // namespace dalgona
diff --git a/compiler/dalgona/src/RandomUtils.h b/compiler/dalgona/src/RandomUtils.h
new file mode 100644
index 000000000..6f6f48fea
--- /dev/null
+++ b/compiler/dalgona/src/RandomUtils.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_RANDOM_UTILS_H__
+#define __DALGONA_RANDOM_UTILS_H__
+
+#include <cstdint>
+#include <vector>
+#include <random>
+#include <stdexcept>
+
+namespace dalgona
+{
+
+template <typename T> std::vector<T> genRandomIntData(uint32_t num_elements, T min, T max)
+{
+  if (min > max)
+    throw std::invalid_argument("min is greater than max");
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<T> dist(min, max);
+  std::vector<T> buffer(num_elements);
+
+  // Write random data
+  for (auto &iter : buffer)
+    iter = dist(gen);
+
+  return buffer;
+}
+
+std::vector<float> genRandomFloatData(uint32_t num_elements, float min, float max);
+
+} // namespace dalgona
+
+#endif // __DALGONA_RANDOM_UTILS_H__
diff --git a/compiler/dalgona/src/RandomUtils.test.cpp b/compiler/dalgona/src/RandomUtils.test.cpp
new file mode 100644
index 000000000..b04d8b83b
--- /dev/null
+++ b/compiler/dalgona/src/RandomUtils.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RandomUtils.h"
+
+#include <gtest/gtest.h>
+
+using namespace dalgona;
+
+TEST(DalgonaUtilTest, gen_random_int32)
+{
+  const uint32_t num_elements = 10;
+  const int32_t min = -5;
+  const int32_t max = 5;
+  std::vector<int32_t> buffer = genRandomIntData<int32_t>(num_elements, min, max);
+
+  EXPECT_EQ(num_elements, buffer.size());
+  for (auto val : buffer)
+  {
+    EXPECT_TRUE(val >= min and val <= max);
+  }
+}
+
+TEST(DalgonaUtilTest, gen_random_int32_NEG)
+{
+  const uint32_t num_elements = 10;
+  const int32_t min = 5;
+  const int32_t max = -5;
+  EXPECT_ANY_THROW(genRandomIntData<int32_t>(num_elements, min, max));
+}
+
+TEST(DalgonaUtilTest, gen_random_float)
+{
+  const uint32_t num_elements = 10;
+  const float min = -5;
+  const float max = 5;
+  std::vector<float> buffer = genRandomFloatData(num_elements, min, max);
+
+  EXPECT_EQ(num_elements, buffer.size());
+  for (auto val : buffer)
+  {
+    EXPECT_TRUE(val >= min and val <= max);
+  }
+}
+
+TEST(DalgonaUtilTest, gen_random_float_NEG)
+{
+  const uint32_t num_elements = 10;
+  const float min = 5;
+  const float max = -5;
+  EXPECT_ANY_THROW(genRandomFloatData(num_elements, min, max));
+}
diff --git a/compiler/dalgona/src/StringUtils.cpp b/compiler/dalgona/src/StringUtils.cpp
new file mode 100644
index 000000000..423b48860
--- /dev/null
+++ b/compiler/dalgona/src/StringUtils.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StringUtils.h"
+
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <cassert>
+
+namespace dalgona
+{
+
+const std::string toString(luci::CircleOpcode opcode)
+{
+  static const char *names[] = {
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+#undef CIRCLE_VNODE
+  };
+
+  auto const node_name = names[static_cast<int>(opcode)];
+
+  assert(std::string(node_name).substr(0, 6) == "Circle"); // FIX_ME_UNLESS
+
+  // Return substring of class name ("Circle" is sliced out)
+  // Ex: Return "Conv2D" for "CircleConv2D" node
+  return std::string(node_name).substr(6);
+}
+
+const std::string toString(luci::FusedActFunc fused_act)
+{
+  switch (fused_act)
+  {
+    case (luci::FusedActFunc::UNDEFINED):
+      return std::string("undefined");
+    case (luci::FusedActFunc::NONE):
+      return std::string("none");
+    case (luci::FusedActFunc::RELU):
+      return std::string("relu");
+    case (luci::FusedActFunc::RELU_N1_TO_1):
+      return std::string("relu_n1_to_1");
+    case (luci::FusedActFunc::RELU6):
+      return std::string("relu6");
+    case (luci::FusedActFunc::TANH):
+      return std::string("tanh");
+    case (luci::FusedActFunc::SIGN_BIT):
+      return std::string("sign_bit");
+    default:
+      throw std::runtime_error("Unsupported activation function");
+  }
+}
+
+} // namespace dalgona
diff --git a/compiler/dalgona/src/StringUtils.h b/compiler/dalgona/src/StringUtils.h
new file mode 100644
index 000000000..ad9d061ad
--- /dev/null
+++ b/compiler/dalgona/src/StringUtils.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_STRING_UTILS_H__
+#define __DALGONA_STRING_UTILS_H__
+
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <string>
+
+namespace dalgona
+{
+
+const std::string toString(luci::CircleOpcode opcode);
+
+const std::string toString(luci::FusedActFunc fused_act);
+
+} // namespace dalgona
+
+#endif // __DALGONA_STRING_UTILS_H__
diff --git a/compiler/dalgona/src/StringUtils.test.cpp b/compiler/dalgona/src/StringUtils.test.cpp
new file mode 100644
index 000000000..e795a4773
--- /dev/null
+++ b/compiler/dalgona/src/StringUtils.test.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StringUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <gtest/gtest.h>
+
+using namespace dalgona;
+
+TEST(DalgonaUtilTest, toString_basic)
+{
+  luci::CircleConv2D node;
+
+  EXPECT_EQ("Conv2D", toString(node.opcode()));
+}
+
+TEST(DalgonaUtilTest, toString_fused_act_func)
+{
+  EXPECT_EQ("undefined", toString(luci::FusedActFunc::UNDEFINED));
+  EXPECT_EQ("none", toString(luci::FusedActFunc::NONE));
+  EXPECT_EQ("relu", toString(luci::FusedActFunc::RELU));
+  EXPECT_EQ("relu6", toString(luci::FusedActFunc::RELU6));
+  EXPECT_EQ("relu_n1_to_1", toString(luci::FusedActFunc::RELU_N1_TO_1));
+  EXPECT_EQ("tanh", toString(luci::FusedActFunc::TANH));
+  EXPECT_EQ("sign_bit", toString(luci::FusedActFunc::SIGN_BIT));
+}
diff --git a/compiler/dalgona/src/Utils.cpp b/compiler/dalgona/src/Utils.cpp
new file mode 100644
index 000000000..a5b0bb529
--- /dev/null
+++ b/compiler/dalgona/src/Utils.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <luci_interpreter/core/Tensor.h>
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <pybind11/numpy.h>
+#include <stdexcept>
+#include <vector>
+
+using Tensor = luci_interpreter::Tensor;
+
+namespace py = pybind11;
+using namespace py::literals;
+
+#define THROW_UNLESS(COND, MSG) \
+  if (not(COND))                \
+    throw std::runtime_error(MSG);
+
+namespace
+{
+
+py::array numpyArray(const Tensor *tensor)
+{
+  assert(tensor != nullptr); // FIX_CALLER_UNLESS
+
+  const auto tensor_shape = tensor->shape();
+
+  uint32_t size = 1;
+  std::vector<uint32_t> shape(tensor_shape.num_dims());
+  for (int i = 0; i < tensor_shape.num_dims(); i++)
+  {
+    THROW_UNLESS(tensor_shape.dim(i) >= 0, "Negative dimension detected in " + tensor->name());
+
+    shape[i] = tensor_shape.dim(i);
+    size *= shape[i];
+  }
+
+  if (size == 0)
+    return py::none();
+
+  switch (tensor->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      return py::array_t<float, py::array::c_style>(shape, tensor->data<float>());
+    case loco::DataType::S16:
+      return py::array_t<int16_t, py::array::c_style>(shape, tensor->data<int16_t>());
+    case loco::DataType::S32:
+      return py::array_t<int32_t, py::array::c_style>(shape, tensor->data<int32_t>());
+    case loco::DataType::S64:
+      return py::array_t<int64_t, py::array::c_style>(shape, tensor->data<int64_t>());
+    case loco::DataType::U8:
+      return py::array_t<uint8_t, py::array::c_style>(shape, tensor->data<uint8_t>());
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+py::dict quantparam(const Tensor *tensor)
+{
+  assert(tensor != nullptr); // FIX_CALLER_UNLESS
+
+  auto scale = tensor->scales();
+  auto zp = tensor->zero_points();
+
+  py::list py_scale;
+  for (auto s : scale)
+  {
+    py_scale.append(s);
+  }
+
+  py::list py_zp;
+  for (auto z : zp)
+  {
+    py_zp.append(z);
+  }
+
+  auto quantparam = py::dict("scale"_a = py_scale, "zero_point"_a = py_zp,
+                             "quantized_dimension"_a = tensor->quantized_dimension());
+  return quantparam;
+}
+
+} // namespace
+
+namespace dalgona
+{
+
+py::object none() { return py::none(); }
+
+std::vector<py::dict> inputsPyArray(const luci::CircleNode *node,
+                                    luci_interpreter::Interpreter *interpreter)
+{
+  assert(node != nullptr);        // FIX_CALLER_UNLESS
+  assert(interpreter != nullptr); // FIX_CALLER_UNLESS
+
+  std::vector<py::dict> inputs;
+  for (uint32_t i = 0; i < node->arity(); ++i)
+  {
+    const auto input_tensor = interpreter->getTensor(node->arg(i));
+    auto circle_node = static_cast<luci::CircleNode *>(node->arg(i));
+
+    // skip invalid inputs (e.g., non-existing bias in TCONV)
+    if (circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+      continue;
+
+    auto py_input =
+      py::dict("name"_a = circle_node->name(), "data"_a = numpyArray(input_tensor),
+               "quantparam"_a = quantparam(input_tensor),
+               "is_const"_a = circle_node->opcode() == luci::CircleOpcode::CIRCLECONST);
+    inputs.push_back(py_input);
+  }
+  return inputs;
+}
+
+std::vector<py::dict> outputsPyArray(const luci::CircleNode *node,
+                                     luci_interpreter::Interpreter *interpreter)
+{
+  std::vector<py::dict> outputs;
+  for (auto succ : loco::succs(node))
+  {
+    const auto output_tensor = interpreter->getTensor(succ);
+    auto circle_node = static_cast<luci::CircleNode *>(succ);
+
+    auto opcode_str = toString(circle_node->opcode());
+    // Check if node is a multi-output node
+    // Assumption: Multi-output virtual nodes have 'Out' prefix
+    // TODO Fix this if the assumption changes
+    THROW_UNLESS(opcode_str.substr(opcode_str.length() - 3) == "Out",
+                 "Invalid output detected in " + node->name());
+
+    auto py_output =
+      py::dict("name"_a = circle_node->name(), "data"_a = numpyArray(output_tensor),
+               "quantparam"_a = quantparam(output_tensor),
+               "is_const"_a = circle_node->opcode() == luci::CircleOpcode::CIRCLECONST);
+    outputs.push_back(py_output);
+  }
+  return outputs;
+}
+
+// Note: Only returns 1 output
+py::dict outputPyArray(const luci::CircleNode *node, luci_interpreter::Interpreter *interpreter)
+{
+  assert(node != nullptr);        // FIX_CALLER_UNLESS
+  assert(interpreter != nullptr); // FIX_CALLER_UNLESS
+
+  const auto tensor = interpreter->getTensor(node);
+
+  THROW_UNLESS(tensor != nullptr, "Null tensor detected in " + node->name());
+
+  auto py_output = py::dict("name"_a = node->name(), "data"_a = numpyArray(tensor),
+                            "quantparam"_a = quantparam(tensor),
+                            "is_const"_a = node->opcode() == luci::CircleOpcode::CIRCLECONST);
+  return py_output;
+}
+
+bool multi_out_node(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // TODO Update this list when new Op is added
+    // Tip: grep "public GraphBuilderMultiOutput" in luci/import
+    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+    case luci::CircleOpcode::CUSTOM:
+    case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
+    case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::SPLIT_V:
+    case luci::CircleOpcode::TOPK_V2:
+    case luci::CircleOpcode::UNIQUE:
+    case luci::CircleOpcode::UNPACK:
+      return true;
+    default:
+      return false;
+  }
+}
+
+} // namespace dalgona
+
+#undef THROW_UNLESS
diff --git a/compiler/dalgona/src/Utils.h b/compiler/dalgona/src/Utils.h
new file mode 100644
index 000000000..7bda30745
--- /dev/null
+++ b/compiler/dalgona/src/Utils.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_UTILS_H__
+#define __DALGONA_UTILS_H__
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <pybind11/embed.h>
+
+namespace py = pybind11;
+
+namespace dalgona
+{
+
+template <typename... Args> void pySafeCall(py::object func, Args... args)
+{
+  try
+  {
+    func(args...);
+  }
+  catch (py::error_already_set &e)
+  {
+    throw std::runtime_error(e.what());
+  }
+}
+
+py::dict outputPyArray(const luci::CircleNode *node, luci_interpreter::Interpreter *interpreter);
+
+// Return a vector of Tensors(py::dict) which correspond to node's inputs
+std::vector<py::dict> inputsPyArray(const luci::CircleNode *node,
+                                    luci_interpreter::Interpreter *interpreter);
+
+// Return a vector of Tensors(py::dict) which correspond to the outputs of multi-out node (ex:
+// SPLIT)
+std::vector<py::dict> outputsPyArray(const luci::CircleNode *node,
+                                     luci_interpreter::Interpreter *interpreter);
+
+py::object none();
+
+bool multi_out_node(const luci::CircleNode *node);
+
+} // namespace dalgona
+
+#endif // __DALGONA_UTILS_H__
diff --git a/compiler/dio-hdf5/CMakeLists.txt b/compiler/dio-hdf5/CMakeLists.txt
new file mode 100644
index 000000000..199c0d59d
--- /dev/null
+++ b/compiler/dio-hdf5/CMakeLists.txt
@@ -0,0 +1,30 @@
+nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
+
+if(NOT HDF5_FOUND)
+  message(STATUS "Build dio_hdf5: FAILED (missing HDF5)")
+  return()
+endif(NOT HDF5_FOUND)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(dio_hdf5 SHARED ${SOURCES})
+target_include_directories(dio_hdf5 PUBLIC include)
+target_include_directories(dio_hdf5 PUBLIC ${HDF5_INCLUDE_DIRS})
+target_link_libraries(dio_hdf5 PUBLIC ${HDF5_CXX_LIBRARIES})
+target_link_libraries(dio_hdf5 PUBLIC loco)
+
+install(TARGETS dio_hdf5 DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(dio_hdf5_test ${TESTS})
+target_include_directories(dio_hdf5_test PRIVATE include)
+target_link_libraries(dio_hdf5_test dio_hdf5)
diff --git a/compiler/dio-hdf5/README.md b/compiler/dio-hdf5/README.md
new file mode 100644
index 000000000..aa2398ce8
--- /dev/null
+++ b/compiler/dio-hdf5/README.md
@@ -0,0 +1,29 @@
+# dio-hdf5
+
+_dio-hdf5_ is a library to help loading hdf5 files (_dio_ indicates data I/O).
+
+The hdf5 file should have the following structure.
+
+```
+Group "/"
+ > Group <group_name>
+   > Group <data_idx>
+     > Dataset <input_idx>
+```
+
+## Example
+
+```cpp
+dio_hdf5::HDF5Importer h5{input_path};
+
+h5.importGroup("value");
+
+// Prepare buffer
+const uint32_t input_byte_size = 16;
+std::vector<char> buffer(input_byte_size);
+
+// Write the first input of the first data to buffer
+readTensor(0, 0, buffer.data());
+
+DO_SOMETHING_WITH(buffer);
+```
diff --git a/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h b/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h
new file mode 100644
index 000000000..add441147
--- /dev/null
+++ b/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DIO_HDF5_H__
+#define __DIO_HDF5_H__
+
+#include <H5Cpp.h>
+
+#include <loco.h>
+
+#include <string>
+#include <vector>
+
+namespace dio
+{
+namespace hdf5
+{
+
+// HDF5Importer reads an input data saved in the hdf5 file in the given path
+// The hierarchy of the hdf5 file is as follows.
+// Group "/"
+//  > Group <group_name>
+//    > Group <data_idx>
+//      > Dataset <input_idx>
+// data_idx : index of the data (dataset file can contain multiple data)
+// input_idx : index of the input (DNN model can have multiple inputs)
+// Ex: the j'th input of the i'th data of group 'value' can be accessed by "/value/i/j"
+class HDF5Importer final
+{
+public:
+  explicit HDF5Importer(const std::string &path);
+
+public:
+  /**
+   * @note importGroup has to be called before readTensor is called
+   *        Otherwise, readTensor will throw an exception
+   */
+  void importGroup(const std::string &group) { _group = _file.openGroup(group); }
+
+  /**
+   * @brief Read tensor data from file and store it into buffer
+   * @details A tensor in the file can be retrieved with (data_idx, input_idx)
+   * @param data_idx : index of the data
+   * @param input_idx : index of the input
+   * @param dtype : pointer to write the tensor's data type
+   * @param shape : pointer to write the tensor's shape
+   * @param buffer : pointer to write the tensor's data
+   * @param buffer_bytes : byte size of the buffer
+   */
+  void readTensor(int32_t data_idx, int32_t input_idx, loco::DataType *dtype,
+                  std::vector<loco::Dimension> *shape, void *buffer, size_t buffer_bytes);
+
+  // Read a raw tensor (no type/shape is specified)
+  void readTensor(int32_t data_idx, int32_t input_idx, void *buffer, size_t buffer_bytes);
+
+  bool isRawData() { return _group.attrExists("rawData"); }
+
+  int32_t numData() { return _group.getNumObjs(); }
+
+  int32_t numInputs(int32_t data_idx);
+
+private:
+  H5::H5File _file;
+  H5::Group _group;
+};
+
+} // namespace hdf5
+} // namespace dio
+
+#endif // __DIO_HDF5_H__
diff --git a/compiler/dio-hdf5/requires.cmake b/compiler/dio-hdf5/requires.cmake
new file mode 100644
index 000000000..44f6870da
--- /dev/null
+++ b/compiler/dio-hdf5/requires.cmake
@@ -0,0 +1 @@
+require("loco")
diff --git a/compiler/dio-hdf5/src/HDF5Importer.cpp b/compiler/dio-hdf5/src/HDF5Importer.cpp
new file mode 100644
index 000000000..920899058
--- /dev/null
+++ b/compiler/dio-hdf5/src/HDF5Importer.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dio_hdf5/HDF5Importer.h"
+
+#include <H5Cpp.h>
+
+#include <string>
+#include <vector>
+#include <cassert>
+#include <stdexcept>
+
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
+
+namespace
+{
+
+Shape toInternalShape(const H5::DataSpace &dataspace)
+{
+  int rank = dataspace.getSimpleExtentNdims();
+
+  std::vector<hsize_t> dims;
+  dims.resize(rank, 0);
+  dataspace.getSimpleExtentDims(dims.data());
+
+  Shape res;
+  for (int axis = 0; axis < rank; ++axis)
+  {
+    res.emplace_back(dims[axis]);
+  }
+
+  return res;
+}
+
+DataType toInternalDtype(const H5::DataType &h5_type)
+{
+  if (h5_type == H5::PredType::IEEE_F32BE || h5_type == H5::PredType::IEEE_F32LE)
+  {
+    return DataType::FLOAT32;
+  }
+  if (h5_type == H5::PredType::STD_I32BE || h5_type == H5::PredType::STD_I32LE)
+  {
+    return DataType::S32;
+  }
+  if (h5_type == H5::PredType::STD_I64BE || h5_type == H5::PredType::STD_I64LE)
+  {
+    return DataType::S64;
+  }
+  if (h5_type.getClass() == H5T_class_t::H5T_ENUM)
+  {
+    // We follow the numpy format
+    // In numpy 1.19.0, np.bool_ is saved as H5T_ENUM
+    // - (name, value) -> (FALSE, 0) and (TRUE, 1)
+    // - value dtype is H5T_STD_I8LE
+    // TODO Find a general way to recognize BOOL type
+    char name[10];
+    int8_t value[2] = {0, 1};
+    if (H5Tenum_nameof(h5_type.getId(), value, name, 10) < 0)
+      return DataType::Unknown;
+
+    if (std::string(name) != "FALSE")
+      return DataType::Unknown;
+
+    if (H5Tenum_nameof(h5_type.getId(), value + 1, name, 10) < 0)
+      return DataType::Unknown;
+
+    if (std::string(name) != "TRUE")
+      return DataType::Unknown;
+
+    return DataType::BOOL;
+  }
+  // TODO Support more datatypes
+  return DataType::Unknown;
+}
+
+void readTensorData(H5::DataSet &tensor, uint8_t *buffer)
+{
+  tensor.read(buffer, H5::PredType::NATIVE_UINT8);
+}
+
+void readTensorData(H5::DataSet &tensor, float *buffer)
+{
+  tensor.read(buffer, H5::PredType::NATIVE_FLOAT);
+}
+
+void readTensorData(H5::DataSet &tensor, int32_t *buffer)
+{
+  tensor.read(buffer, H5::PredType::NATIVE_INT);
+}
+
+void readTensorData(H5::DataSet &tensor, int64_t *buffer)
+{
+  tensor.read(buffer, H5::PredType::NATIVE_LONG);
+}
+
+} // namespace
+
+namespace dio
+{
+namespace hdf5
+{
+
+HDF5Importer::HDF5Importer(const std::string &path)
+{
+  if (_file.isHdf5(path) == false)
+    throw std::runtime_error("Given data file is not HDF5");
+
+  _file = H5::H5File(path, H5F_ACC_RDONLY);
+}
+
+int32_t HDF5Importer::numInputs(int32_t record_idx)
+{
+  auto records = _group.openGroup(std::to_string(record_idx));
+  return records.getNumObjs();
+}
+
+void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, void *buffer,
+                              size_t buffer_bytes)
+{
+  auto record = _group.openGroup(std::to_string(record_idx));
+  auto tensor = record.openDataSet(std::to_string(input_idx));
+
+  if (tensor.getInMemDataSize() != buffer_bytes)
+    throw std::runtime_error("Buffer size does not match with the size of tensor data");
+
+  readTensorData(tensor, static_cast<uint8_t *>(buffer));
+}
+
+void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape,
+                              void *buffer, size_t buffer_bytes)
+{
+  auto record = _group.openGroup(std::to_string(record_idx));
+  auto tensor = record.openDataSet(std::to_string(input_idx));
+
+  auto tensor_dtype = tensor.getDataType();
+  *dtype = toInternalDtype(tensor_dtype);
+
+  auto tensor_shape = tensor.getSpace();
+  *shape = toInternalShape(tensor_shape);
+
+  if (tensor.getInMemDataSize() != buffer_bytes)
+    throw std::runtime_error("Buffer size does not match with the size of tensor data");
+
+  switch (*dtype)
+  {
+    case DataType::FLOAT32:
+      readTensorData(tensor, static_cast<float *>(buffer));
+      break;
+    case DataType::S32:
+      readTensorData(tensor, static_cast<int32_t *>(buffer));
+      break;
+    case DataType::S64:
+      readTensorData(tensor, static_cast<int64_t *>(buffer));
+      break;
+    case DataType::BOOL:
+      readTensorData(tensor, static_cast<uint8_t *>(buffer));
+      break;
+    default:
+      throw std::runtime_error{"Unsupported data type for input data (.h5)"};
+  }
+}
+
+} // namespace hdf5
+} // namespace dio
diff --git a/compiler/dio-hdf5/src/HDF5Importer.test.cpp b/compiler/dio-hdf5/src/HDF5Importer.test.cpp
new file mode 100644
index 000000000..1433e761d
--- /dev/null
+++ b/compiler/dio-hdf5/src/HDF5Importer.test.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dio_hdf5/HDF5Importer.h"
+
+#include <loco.h>
+
+#include <H5Cpp.h>
+
+#include <cstdio>
+
+#include <gtest/gtest.h>
+
+using HDF5Importer = dio::hdf5::HDF5Importer;
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
+
+namespace
+{
+
+const std::string file_name("dio_hdf5_test.h5");
+
+void createFile()
+{
+  // File already exists. Remove it.
+  if (auto f = fopen(file_name.c_str(), "r"))
+  {
+    fclose(f);
+    if (remove(file_name.c_str()) != 0)
+      throw std::runtime_error("Error deleting file.");
+  }
+
+  const auto rank = 3;
+  hsize_t dim[3] = {1, 2, 3};
+  H5::DataSpace space(rank, dim);
+
+  float data[] = {0, 1, 2, 3, 4, 5};
+
+  // Create test file in the current directory
+  H5::H5File file(file_name, H5F_ACC_TRUNC);
+  {
+    file.createGroup("/value");
+    file.createGroup("/value/0");
+    H5::DataSet dataset(file.createDataSet("/value/0/0", H5::PredType::IEEE_F32BE, space));
+    dataset.write(data, H5::PredType::IEEE_F32LE);
+  }
+}
+
+} // namespace
+
+TEST(dio_hdf5_test, read_with_type_shape)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  h5.importGroup("value");
+
+  std::vector<float> buffer(6);
+
+  DataType dtype;
+  Shape shape;
+  h5.readTensor(0, 0, &dtype, &shape, buffer.data(), buffer.size() * sizeof(float));
+
+  for (uint32_t i = 0; i < 6; i++)
+    EXPECT_EQ(i, buffer[i]);
+
+  EXPECT_EQ(DataType::FLOAT32, dtype);
+  EXPECT_EQ(3, shape.size());
+  EXPECT_EQ(1, shape[0]);
+  EXPECT_EQ(2, shape[1]);
+  EXPECT_EQ(3, shape[2]);
+}
+
+TEST(dio_hdf5_test, wrong_path_NEG)
+{
+  const std::string wrong_path = "not_existing_file_for_dio_hdf5_test";
+
+  EXPECT_ANY_THROW(HDF5Importer h5(wrong_path));
+}
+
+TEST(dio_hdf5_test, wrong_group_name_NEG)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  EXPECT_ANY_THROW(h5.importGroup("wrong"));
+}
+
+TEST(dio_hdf5_test, data_out_of_index_NEG)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  h5.importGroup("value");
+
+  std::vector<float> buffer(6);
+
+  DataType dtype;
+  Shape shape;
+  // Read non-existing data (data_idx = 1)
+  EXPECT_ANY_THROW(
+    h5.readTensor(1, 0, &dtype, &shape, buffer.data(), buffer.size() * sizeof(float)));
+}
+
+TEST(dio_hdf5_test, input_out_of_index_NEG)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  h5.importGroup("value");
+
+  std::vector<float> buffer(6);
+
+  DataType dtype;
+  Shape shape;
+  // Read non-existing input (input_idx = 1)
+  EXPECT_ANY_THROW(
+    h5.readTensor(0, 1, &dtype, &shape, buffer.data(), buffer.size() * sizeof(float)));
+}
+
+TEST(dio_hdf5_test, wrong_buffer_size_NEG)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  h5.importGroup("value");
+
+  std::vector<float> buffer(6);
+
+  DataType dtype;
+  Shape shape;
+  EXPECT_ANY_THROW(h5.readTensor(0, 0, &dtype, &shape, buffer.data(), 1 /* wrong buffer size */));
+}
diff --git a/compiler/dredd-rule-lib/rule-lib.sh b/compiler/dredd-rule-lib/rule-lib.sh
index 9254cc9a7..a920e08ab 100755
--- a/compiler/dredd-rule-lib/rule-lib.sh
+++ b/compiler/dredd-rule-lib/rule-lib.sh
@@ -217,4 +217,39 @@ op_version()
   echo ${ACTUAL}
 }
 
+tensor_dtype()
+{
+  argc_check $# 1
+  file_path_check ${COMPILED_FILE}
+  file_path_check ${INSPECT_PROG_PATH}
+
+  set -o pipefail
+
+  ACTUAL=`init_error_log ; \
+          ${INSPECT_PROG_PATH} --tensor_dtype ${COMPILED_FILE} | \
+          awk -v tensor_name="$1" '{ if ($1 == tensor_name) print $2}'`
+
+  check_success_exit_code $? 0
+
+  echo ${ACTUAL}
+}
+
+const_count()
+{
+  argc_check $# 1
+  file_path_check ${COMPILED_FILE}
+  file_path_check ${INSPECT_PROG_PATH}
+
+  set -o pipefail
+
+  RESULT=`init_error_log ; ${INSPECT_PROG_PATH} --constants ${COMPILED_FILE}`
+  check_success_exit_code $? 0
+
+  # note : grep's exit code is 2 in case of error.
+  ACTUAL=`init_error_log ; echo "${RESULT}" | grep -wc "$1"`
+  check_error_exit_code $? 2
+
+  echo ${ACTUAL}
+}
+
 # TODO define more qullity test function
diff --git a/compiler/embedded-import-value-test/.gitignore b/compiler/embedded-import-value-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/embedded-import-value-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/embedded-import-value-test/CMakeLists.txt b/compiler/embedded-import-value-test/CMakeLists.txt
new file mode 100644
index 000000000..785edfc7d
--- /dev/null
+++ b/compiler/embedded-import-value-test/CMakeLists.txt
@@ -0,0 +1,34 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+set(SRCS_TEST_DRIVER src/TestDriver.cpp)
+
+# create driver
+add_executable(test_driver ${SRCS_TEST_DRIVER})
+target_link_libraries(test_driver PRIVATE luci_interpreter_import)
+target_link_libraries(test_driver PRIVATE luci_interpreter)
+target_link_libraries(test_driver PRIVATE safemain)
+
+unset(EMBEDDED_IMPORT_VALUE_TESTS)
+
+macro(addeval NAME)
+  list(APPEND EMBEDDED_IMPORT_VALUE_TESTS ${NAME})
+endmacro(addeval)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+# Generate dependencies
+add_custom_target(embedded_import_testfiles ALL DEPENDS ${TESTFILES})
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+add_test(NAME embedded_import_value_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
+          "${CMAKE_CURRENT_BINARY_DIR}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${EMBEDDED_IMPORT_VALUE_TESTS}
+)
diff --git a/compiler/embedded-import-value-test/README.md b/compiler/embedded-import-value-test/README.md
new file mode 100644
index 000000000..71a95486f
--- /dev/null
+++ b/compiler/embedded-import-value-test/README.md
@@ -0,0 +1,13 @@
+# embedded-import-value-test
+
+`embedded-import-value-test` checks models imported with and without constant copying produces same output values.
+
+The test proceeds as follows:
+
+1. Generate random input for provided circle model.
+
+2. Import circle model to luci in 2 modes:
+   - With constant copying (default mode).
+   - Without constant copying (experimental feature)
+
+3. Compare the execution result of both modes. The result must be the same.
diff --git a/compiler/embedded-import-value-test/evalverify.sh b/compiler/embedded-import-value-test/evalverify.sh
new file mode 100755
index 000000000..a99e76f3e
--- /dev/null
+++ b/compiler/embedded-import-value-test/evalverify.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# This script verifies that imported without constants copying models executes well in luci_interpreter
+#
+# HOW TO USE
+#
+# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# bin_dir  : build directory of embedded-import-value-test (ex: build/compiler/embedded-import-value-test)
+# work_dir : artifacts directory where test materials exist
+
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+TEST_DRIVER_PATH="${BINDIR}/test_driver"
+TEST_RESULT_DIR="${BINDIR}/result"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+mkdir -p "${TEST_RESULT_DIR}"
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${TEST_RESULT_DIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    "${TEST_DRIVER_PATH}" --model "${TESTCASE_FILE}.circle"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/embedded-import-value-test/requires.cmake b/compiler/embedded-import-value-test/requires.cmake
new file mode 100644
index 000000000..f8af5f27e
--- /dev/null
+++ b/compiler/embedded-import-value-test/requires.cmake
@@ -0,0 +1,6 @@
+require("common-artifacts")
+require("luci")
+require("luci-interpreter")
+require("safemain")
+require("oops")
+require("loco")
diff --git a/compiler/embedded-import-value-test/src/TestDriver.cpp b/compiler/embedded-import-value-test/src/TestDriver.cpp
new file mode 100644
index 000000000..63fd745eb
--- /dev/null
+++ b/compiler/embedded-import-value-test/src/TestDriver.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci_interpreter/GraphBuilderRegistry.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <luci/Importer.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <random>
+
+namespace
+{
+
+uint32_t tensor_size_of(const luci::CircleNode *node)
+{
+  uint32_t tensor_size = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    tensor_size *= node->dim(i).value();
+  return tensor_size;
+}
+
+std::vector<uint8_t> random_data_for(const luci::CircleInput *node)
+{
+  // allocate data buffer
+  std::vector<uint8_t> inputs_data(tensor_size_of(node));
+  auto *buffer = inputs_data.data();
+
+  // define size of buffer in elements
+  const auto dtype = node->dtype();
+  assert(inputs_data.size() % loco::size(dtype) == 0); // FIX ME UNLESS
+  const auto element_count = inputs_data.size() / loco::size(dtype);
+
+  // random generator engine
+  std::random_device device;
+  std::mt19937 engine{device()};
+
+  // fill buffer with random data
+  switch (node->dtype())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      auto element_buffer = reinterpret_cast<float *>(buffer);
+
+      std::uniform_real_distribution<float> distrib(-3, 3);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    case loco::DataType::U8:
+    {
+      auto element_buffer = buffer;
+
+      std::uniform_int_distribution<uint8_t> distrib(100, 200);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      auto element_buffer = reinterpret_cast<int16_t *>(buffer);
+
+      std::uniform_int_distribution<int16_t> distrib(0, 100);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    case loco::DataType::S32:
+    {
+      auto element_buffer = reinterpret_cast<int32_t *>(buffer);
+
+      std::uniform_int_distribution<int32_t> distrib(0, 100);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    case loco::DataType::BOOL:
+    {
+      // num of bool data type is equivalent to uint8_t num in [0, 1] range
+      auto element_buffer = buffer;
+
+      std::uniform_int_distribution<uint8_t> distrib(0, 1);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    default:
+      // TODO Support other dtypes
+      throw std::runtime_error("Unsupported data type, yet!");
+  }
+
+  return inputs_data;
+}
+
+} // namespace
+
+int entry(int argc, char **argv)
+{
+  // check arguments
+  if (argc != 3 || std::string(argv[1]) != "--model")
+  {
+    std::cerr << "Usage: " << argv[0] << " --model <path/to/model>" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // open file with model
+  const auto model_file = std::string(argv[2]);
+  std::ifstream fs(model_file, std::ifstream::binary);
+  if (fs.fail())
+  {
+    std::cerr << "Cannot open model file \"" << model_file << "\"." << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // create constant circle model
+  const std::vector<char> model_buffer((std::istreambuf_iterator<char>(fs)),
+                                       std::istreambuf_iterator<char>());
+  const auto circle_model = circle::GetModel(model_buffer.data());
+
+  // create random model's inputs
+  std::vector<std::vector<uint8_t>> inputs_data;
+  {
+    // model inputs
+    auto model = luci::Importer(nullptr).importModule(circle_model);
+    const auto inputs = loco::input_nodes(model->graph());
+
+    // create random data for each input
+    for (const auto *input : inputs)
+    {
+      const auto input_node = loco::must_cast<const luci::CircleInput *>(input);
+      inputs_data.emplace_back(random_data_for(input_node));
+    }
+  }
+
+  // interpret given module
+  const auto interpret_module_and_compute_output =
+    [&](const std::unique_ptr<luci::Module> &module) {
+      // create interpreter
+      luci_interpreter::Interpreter interpreter(module.get());
+
+      // model's input and output nodes
+      const auto input_nodes = loco::input_nodes(module->graph());
+      const auto output_nodes = loco::output_nodes(module->graph());
+
+      // set inputs
+      for (uint32_t i = 0; i < input_nodes.size(); ++i)
+      {
+        const auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+        const auto &data = inputs_data.at(i);
+        interpreter.writeInputTensor(input_node, data.data(), data.size());
+      }
+
+      // do inference
+      interpreter.interpret();
+
+      // read outputs
+      std::vector<std::vector<uint8_t>> outputs_data;
+      for (const auto *node : output_nodes)
+      {
+        const auto output_node = loco::must_cast<const luci::CircleOutput *>(node);
+
+        // allocate output buffer
+        outputs_data.emplace_back(tensor_size_of(output_node));
+
+        auto &data = outputs_data.back();
+        interpreter.readOutputTensor(output_node, data.data(), data.size());
+      }
+
+      return outputs_data;
+    };
+
+  // import with copying, execute and save
+  std::vector<std::vector<uint8_t>> outputs_data_1;
+  {
+    const auto default_source = &luci::GraphBuilderRegistry::get();
+    const auto module = luci::Importer(default_source).importModule(circle_model);
+    if (not module)
+    {
+      std::cerr << "Fail to import model with constant copying." << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    outputs_data_1 = interpret_module_and_compute_output(module);
+  }
+
+  // import without copying, execute and save
+  std::vector<std::vector<uint8_t>> outputs_data_2;
+  {
+    const auto optimized_source = luci_interpreter::source_without_constant_copying();
+    const auto module = luci::Importer(optimized_source.get()).importModule(circle_model);
+    if (not module)
+    {
+      std::cerr << "Fail to import model without constant copying." << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    outputs_data_2 = interpret_module_and_compute_output(module);
+  }
+
+  // check all tensors are equal
+  assert(outputs_data_1.size() == outputs_data_2.size());
+  for (uint32_t n = 0; n < outputs_data_1.size(); ++n)
+  {
+    const auto &output_1 = outputs_data_1.at(n);
+    const auto &output_2 = outputs_data_2.at(n);
+    assert(output_1.size() == output_2.size());
+
+    for (uint32_t o = 0; o < output_1.size(); ++o)
+    {
+      if (output_1[o] != output_2[o])
+      {
+        std::cerr << "Values mismatch in model's output number " << n << std::endl;
+        return EXIT_FAILURE;
+      }
+    }
+  }
+
+  std::cout << "[TEST PASSED]" << std::endl;
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/embedded-import-value-test/test.lst b/compiler/embedded-import-value-test/test.lst
new file mode 100644
index 000000000..924a60dcc
--- /dev/null
+++ b/compiler/embedded-import-value-test/test.lst
@@ -0,0 +1,192 @@
+#addeval(Abs_000)
+addeval(Add_000)
+#addeval(Add_001)
+addeval(Add_U8_000)
+#addeval(AddN_000)
+addeval(ArgMax_000)
+addeval(ArgMax_001)
+addeval(ArgMax_002)
+addeval(ArgMax_003)
+addeval(ArgMax_U8_000)
+addeval(ArgMax_U8_001)
+addeval(ArgMax_U8_002)
+addeval(ArgMax_U8_003)
+#addeval(ArgMin_000)
+#addeval(ArgMin_001)
+#addeval(ArgMin_002)
+#addeval(ArgMin_003)
+#addeval(ArgMin_U8_000)
+#addeval(ArgMin_U8_001)
+#addeval(ArgMin_U8_002)
+#addeval(ArgMin_U8_003)
+addeval(AveragePool2D_000)
+#addeval(BatchMatMul_000)
+#addeval(BatchMatMulV2_000)
+#addeval(BatchMatMulV2_001)
+#addeval(BatchToSpaceND_000)
+addeval(Cast_000)
+addeval(Cast_001)
+#addeval(Ceil_000)
+addeval(Concatenation_000)
+addeval(Concatenation_U8_000)
+addeval(Conv2D_000)
+addeval(Conv2D_001)
+addeval(Conv2D_002)
+addeval(Conv2D_003)
+addeval(Conv2D_U8_000)
+addeval(Conv2D_U8_001)
+#addeval(Cos_000)
+addeval(DepthToSpace_000)
+addeval(DepthwiseConv2D_000)
+addeval(DepthwiseConv2D_U8_000)
+#addeval(DepthwiseConv2D_U8_001)
+addeval(DepthwiseConv2D_001)
+addeval(Div_000)
+addeval(ELU_000)
+addeval(Equal_000)
+addeval(Exp_000)
+#addeval(ExpandDims_000)
+#addeval(ExpandDims_001)
+#addeval(ExpandDims_002)
+#addeval(ExpandDims_003)
+#addeval(Fill_000)
+#addeval(Fill_001)
+addeval(Floor_000)
+#addeval(FloorDiv_000)
+#addeval(FloorDiv_001)
+#addeval(FloorMod_000)
+#addeval(FloorMod_001)
+addeval(FullyConnected_000)
+addeval(FullyConnected_001)
+addeval(FullyConnected_002)
+#addeval(FullyConnected_U8_000)
+addeval(Gather_000)
+#addeval(GatherNd_000)
+#addeval(Greater_000)
+#addeval(GreaterEqual_000)
+addeval(If_000)
+addeval(If_001)
+addeval(L2Normalize_000)
+addeval(L2Pool2D_000)
+#addeval(L2Pool2D_U8_000)
+addeval(LeakyRelu_000)
+addeval(Less_000)
+addeval(LessEqual_000)
+addeval(LocalResponseNormalization_000)
+#addeval(Log_000)
+addeval(LogicalAnd_000)
+addeval(LogicalNot_000)
+addeval(LogicalOr_000)
+addeval(Logistic_000)
+addeval(LogSoftmax_000)
+#addeval(MatMul_000)
+#addeval(MatrixDiag_000)
+#addeval(MatrixSetDiag_000)
+addeval(Maximum_000)
+addeval(MaxPool2D_000)
+addeval(MaxPool2D_U8_000)
+addeval(Mean_000)
+addeval(Mean_001)
+#addeval(Mean_U8_000)
+#addeval(Minimum_000)
+#addeval(MirrorPad_000)
+addeval(Mul_000)
+#addeval(Mul_U8_000)
+addeval(Neg_000)
+addeval(NotEqual_000)
+addeval(OneHot_000)
+addeval(OneHot_001)
+addeval(OneHot_002)
+#addeval(OneHot_003)
+addeval(Pack_000)
+addeval(Pack_U8_000)
+addeval(Pad_000)
+addeval(Pad_U8_000)
+addeval(Pow_000)
+addeval(PRelu_000)
+#addeval(Range_000)
+#addeval(Rank_000)
+#addeval(ReduceAny_000)
+#addeval(ReduceAny_001)
+#addeval(ReduceAny_002)
+#addeval(ReduceAny_003)
+#addeval(ReduceMax_000)
+#addeval(ReduceMin_000)
+#addeval(ReduceProd_000)
+#addeval(ReduceProd_001)
+#addeval(ReduceProd_002)
+#addeval(ReduceProd_003)
+addeval(ReLU_000)
+addeval(ReLU6_000)
+#addeval(ReLUN1To1_000)
+addeval(Reshape_000)
+addeval(Reshape_001)
+addeval(Reshape_002)
+#addeval(Reshape_003)
+addeval(Reshape_U8_000)
+addeval(ResizeBilinear_000)
+addeval(ResizeNearestNeighbor_000)
+#addeval(ReverseSequence_000)
+#addeval(ReverseV2_000)
+#addeval(Round_000)
+addeval(Rsqrt_000)
+#addeval(ScatterNd_000)
+#addeval(SegmentSum_000)
+#addeval(Select_000)
+#addeval(Select_001)
+#addeval(Select_002)
+#addeval(SelectV2_000)
+#addeval(SelectV2_001)
+#addeval(SelectV2_002)
+#addeval(Shape_000)
+addeval(SignatureDef_MultiOut_000)
+addeval(SignatureDef_MultiOut_001)
+#addeval(Sin_000)
+addeval(Slice_000)
+addeval(Softmax_000)
+addeval(Softmax_U8_000)
+addeval(SpaceToBatchND_000)
+addeval(SpaceToBatchND_001)
+addeval(SpaceToBatchND_002)
+addeval(SpaceToBatchND_003)
+addeval(SpaceToDepth_000)
+#addeval(SparseToDense_000)
+addeval(Split_000)
+addeval(SplitV_000)
+addeval(Sqrt_000)
+addeval(Square_000)
+addeval(SquaredDifference_000)
+addeval(Squeeze_000)
+addeval(Squeeze_001)
+addeval(StridedSlice_000)
+addeval(StridedSlice_001)
+addeval(StridedSlice_002)
+addeval(Sub_000)
+addeval(Sub_U8_000)
+#addeval(Sum_000)
+#addeval(Sum_001)
+addeval(SVDF_000)
+addeval(SVDF_001)
+addeval(Tanh_000)
+#addeval(Tile_000)
+#addeval(Tile_U8_000)
+#addeval(TopKV2_000)
+#addeval(TopKV2_001)
+addeval(Transpose_000)
+addeval(TransposeConv_000)
+addeval(Unpack_000)
+addeval(Unpack_001)
+addeval(Unpack_002)
+addeval(Unpack_003)
+#addeval(Where_000)
+#addeval(Where_001)
+#addeval(While_000)
+#addeval(While_001)
+#addeval(While_002)
+#addeval(While_003)
+addeval(YUV_TO_RGB_U8_000)
+#addeval(ZerosLike_000)
+
+# Simple Network test
+addeval(Part_While_000)
+addeval(Part_While_001)
diff --git a/compiler/enco/CMakeLists.txt b/compiler/enco/CMakeLists.txt
index 17300e25e..3702f9501 100644
--- a/compiler/enco/CMakeLists.txt
+++ b/compiler/enco/CMakeLists.txt
@@ -1,4 +1,9 @@
 add_subdirectory(core)
 add_subdirectory(frontend)
 add_subdirectory(cli)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 add_subdirectory(test)
diff --git a/compiler/enco/cli/CMakeLists.txt b/compiler/enco/cli/CMakeLists.txt
index 5a43ab655..6777f329b 100644
--- a/compiler/enco/cli/CMakeLists.txt
+++ b/compiler/enco/cli/CMakeLists.txt
@@ -5,7 +5,6 @@ target_include_directories(enco-cli PRIVATE src)
 target_link_libraries(enco-cli enco_intf_cmdline)
 target_link_libraries(enco-cli enco_intf_frontend)
 target_link_libraries(enco-cli enco_core)
-target_link_libraries(enco-cli stdex)
 target_link_libraries(enco-cli dl)
 # Let's use project-wide compile options
 target_link_libraries(enco-cli nncc_common)
diff --git a/compiler/enco/cli/src/Driver.cpp b/compiler/enco/cli/src/Driver.cpp
index 185bb13b9..fe6cefb8c 100644
--- a/compiler/enco/cli/src/Driver.cpp
+++ b/compiler/enco/cli/src/Driver.cpp
@@ -135,8 +135,7 @@ private:
 
 } // namespace
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <map>
 
 #include <iostream>
@@ -153,7 +152,7 @@ static int entry(int argc, char **argv)
   std::map<std::string, std::function<void(const std::string &arg)>> argparse;
 
   argparse["--frontend"] = [&](const std::string &path) {
-    frontend_zone = stdex::make_unique<FrontendZone>(path);
+    frontend_zone = std::make_unique<FrontendZone>(path);
   };
 
   argparse["--frontend-arg"] = [&](const std::string &arg) { frontend_zone->append(arg); };
diff --git a/compiler/enco/core/CMakeLists.txt b/compiler/enco/core/CMakeLists.txt
index f437e687a..19a64231a 100644
--- a/compiler/enco/core/CMakeLists.txt
+++ b/compiler/enco/core/CMakeLists.txt
@@ -17,15 +17,14 @@ target_link_libraries(enco_core PUBLIC coco_generic)
 # These libraries are linked for internal use, and thus does not appear in public headers.
 target_link_libraries(enco_core PRIVATE pp)
 target_link_libraries(enco_core PRIVATE morph)
-target_link_libraries(enco_core PRIVATE stdex)
 # Let's use nncc project-wide build options
 target_link_libraries(enco_core PRIVATE nncc_common)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 add_executable(enco_core_test ${TESTS})
 target_include_directories(enco_core_test PRIVATE src)
diff --git a/compiler/enco/core/src/ANN/Binder.h b/compiler/enco/core/src/ANN/Binder.h
index 71b95676b..be9f705c7 100644
--- a/compiler/enco/core/src/ANN/Binder.h
+++ b/compiler/enco/core/src/ANN/Binder.h
@@ -32,7 +32,7 @@ class ANNBinder
 {
 public:
   ANNBinder(coco::Block *block, std::unique_ptr<ann::Module> &&module)
-      : _block{block}, _module{std::move(module)}
+    : _block{block}, _module{std::move(module)}
   {
     // DO NOTHING
   }
diff --git a/compiler/enco/core/src/ANN/Context.cpp b/compiler/enco/core/src/ANN/Context.cpp
index d4d1882fa..b6d2a3d42 100644
--- a/compiler/enco/core/src/ANN/Context.cpp
+++ b/compiler/enco/core/src/ANN/Context.cpp
@@ -16,12 +16,12 @@
 
 #include "ANN/Context.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 ANNBinder *ANNContext::create(coco::Block *blk)
 {
-  auto mod = stdex::make_unique<ann::Module>();
-  auto obj = stdex::make_unique<ANNBinder>(blk, std::move(mod));
+  auto mod = std::make_unique<ann::Module>();
+  auto obj = std::make_unique<ANNBinder>(blk, std::move(mod));
   auto ptr = obj.get();
 
   _binders.emplace_back(std::move(obj));
diff --git a/compiler/enco/core/src/ANN/Context.test.cpp b/compiler/enco/core/src/ANN/Context.test.cpp
index 7fd26f30c..252d92290 100644
--- a/compiler/enco/core/src/ANN/Context.test.cpp
+++ b/compiler/enco/core/src/ANN/Context.test.cpp
@@ -33,7 +33,7 @@ public:
 protected:
   std::unique_ptr<coco::Module> m;
 };
-}
+} // namespace
 
 TEST_F(ANNContextTest, constructor)
 {
diff --git a/compiler/enco/core/src/ANN/IR/OperandInventory.cpp b/compiler/enco/core/src/ANN/IR/OperandInventory.cpp
index c7ad38811..4399c3900 100644
--- a/compiler/enco/core/src/ANN/IR/OperandInventory.cpp
+++ b/compiler/enco/core/src/ANN/IR/OperandInventory.cpp
@@ -16,9 +16,9 @@
 
 #include "ANN/IR/OperandInventory.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace ann
 {
diff --git a/compiler/enco/core/src/ANN/IR/Operation.h b/compiler/enco/core/src/ANN/IR/Operation.h
index cacc2b794..a1f1d46e2 100644
--- a/compiler/enco/core/src/ANN/IR/Operation.h
+++ b/compiler/enco/core/src/ANN/IR/Operation.h
@@ -38,7 +38,7 @@ public:
 public:
   Operation(const Code &code, std::initializer_list<OperandID> inputs,
             std::initializer_list<OperandID> outputs)
-      : _code{code}, _inputs{inputs}, _outputs{outputs}
+    : _code{code}, _inputs{inputs}, _outputs{outputs}
   {
     // DO NOTHING
   }
diff --git a/compiler/enco/core/src/ANN/IR/OperationInventory.cpp b/compiler/enco/core/src/ANN/IR/OperationInventory.cpp
index 37d48c170..93108dfb7 100644
--- a/compiler/enco/core/src/ANN/IR/OperationInventory.cpp
+++ b/compiler/enco/core/src/ANN/IR/OperationInventory.cpp
@@ -16,9 +16,9 @@
 
 #include "OperationInventory.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace ann
 {
diff --git a/compiler/enco/core/src/ANN/IR/WeightInventory.cpp b/compiler/enco/core/src/ANN/IR/WeightInventory.cpp
index d8809ac08..edcb16aed 100644
--- a/compiler/enco/core/src/ANN/IR/WeightInventory.cpp
+++ b/compiler/enco/core/src/ANN/IR/WeightInventory.cpp
@@ -16,9 +16,9 @@
 
 #include "WeightInventory.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace ann
 {
diff --git a/compiler/enco/core/src/AsmCode.h b/compiler/enco/core/src/AsmCode.h
index c43892888..6d57f1851 100644
--- a/compiler/enco/core/src/AsmCode.h
+++ b/compiler/enco/core/src/AsmCode.h
@@ -27,7 +27,7 @@ class AsmCode
 {
 public:
   AsmCode(const std::string &filename, const std::string &varname)
-      : _filename{filename}, _varname{varname}
+    : _filename{filename}, _varname{varname}
   {
     // DO NOTHING
   }
diff --git a/compiler/enco/core/src/Backend.cpp b/compiler/enco/core/src/Backend.cpp
index d4bec7447..77374fecd 100644
--- a/compiler/enco/core/src/Backend.cpp
+++ b/compiler/enco/core/src/Backend.cpp
@@ -44,13 +44,12 @@
 #include "Transforms/Split.h"
 #include "Transforms/GlobalDataGeneration.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <stdexcept>
 #include <iostream>
 #include <fstream>
 
-using stdex::make_unique;
+using std::make_unique;
 using namespace enco;
 
 namespace
@@ -168,7 +167,7 @@ void BackendImpl::compile(coco::Module *m, coco::Data *d)
   ofs << CppCode{data_var, code(sess)} << std::endl;
 }
 
-} // namespace enco
+} // namespace
 
 #include <iostream>
 
diff --git a/compiler/enco/core/src/CodeIndex.h b/compiler/enco/core/src/CodeIndex.h
index 7f2da6463..ed8f24109 100644
--- a/compiler/enco/core/src/CodeIndex.h
+++ b/compiler/enco/core/src/CodeIndex.h
@@ -30,7 +30,7 @@ public:
 
 public:
   CodeIndex(const coco::BlockIndex &blk_ind, const coco::InstrIndex &ins_ind)
-      : _blk_ind{blk_ind}, _ins_ind{ins_ind}
+    : _blk_ind{blk_ind}, _ins_ind{ins_ind}
   {
   }
 
diff --git a/compiler/enco/core/src/CppGen/Host.cpp b/compiler/enco/core/src/CppGen/Host.cpp
index 37e0583d7..63baf0b31 100644
--- a/compiler/enco/core/src/CppGen/Host.cpp
+++ b/compiler/enco/core/src/CppGen/Host.cpp
@@ -18,8 +18,7 @@
 
 #include <pp/EnclosedDocument.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <map>
 #include <string>
 
@@ -157,7 +156,7 @@ public:
 
 public:
   TransferLoop(uint32_t count, uint32_t src_step, uint32_t dst_step)
-      : _count{count}, _step{src_step, dst_step}
+    : _count{count}, _step{src_step, dst_step}
   {
     // DO NOTHING
   }
@@ -293,14 +292,14 @@ std::unique_ptr<pp::MultiLineText> HostBlockCompiler::compile(const coco::Block
 {
   InstrPrinter prn{_mem};
 
-  auto res = stdex::make_unique<pp::LinearDocument>();
+  auto res = std::make_unique<pp::LinearDocument>();
 
   for (auto ins = blk->instr()->head(); ins; ins = ins->next())
   {
     res->append(ins->accept(prn));
   }
 
-  return std::move(res);
+  return res;
 }
 
 } // namespace enco
diff --git a/compiler/enco/core/src/CppGen/Subnet.cpp b/compiler/enco/core/src/CppGen/Subnet.cpp
index 9a636c6ae..3fc14edf5 100644
--- a/compiler/enco/core/src/CppGen/Subnet.cpp
+++ b/compiler/enco/core/src/CppGen/Subnet.cpp
@@ -21,11 +21,10 @@
 
 #include <pp/LinearDocument.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <sstream>
 
-using stdex::make_unique;
+using std::make_unique;
 using enco::concat;
 
 #define S(content) #content
@@ -117,7 +116,7 @@ class ScalarOperandDecl final : public CodeFragment
 {
 public:
   ScalarOperandDecl(const std::string &model, const ann::DType &dtype)
-      : _model{model}, _dtype{dtype}
+    : _model{model}, _dtype{dtype}
   {
     // DO NOTHING
   }
@@ -150,7 +149,7 @@ class TensorOperandDecl final : public CodeFragment
 public:
   TensorOperandDecl(const std::string &model, const ann::DType &dtype,
                     const nncc::core::ADT::tensor::Shape &shape)
-      : _model{model}, _dtype{dtype}, _shape{shape}
+    : _model{model}, _dtype{dtype}, _shape{shape}
   {
     // DO NOTHING
   }
@@ -194,7 +193,7 @@ class WeightDecl final : public CodeFragment
 public:
   WeightDecl(const std::string &model, const ann::OperandID &id, const std::string &base,
              const std::string &size)
-      : _model{model}, _id{id}, _base{base}, _size{size}
+    : _model{model}, _id{id}, _base{base}, _size{size}
   {
     // DO NOTHING
   }
@@ -374,7 +373,7 @@ std::unique_ptr<SubnetStruct> SubnetStructBuilder::build(const ANNBinder *binder
   // Finalize compilation
   res->ctor()->append("ANeuralNetworksCompilation_finish(", cname, ");");
 
-  return std::move(res);
+  return res;
 }
 
 std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder *binder) const
@@ -416,7 +415,7 @@ std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder
 
   res->append("ANeuralNetworksExecution_free(execution);");
 
-  return std::move(res);
+  return res;
 }
 
 } // namespace enco
diff --git a/compiler/enco/core/src/Session.cpp b/compiler/enco/core/src/Session.cpp
index 034f23892..18af87ace 100644
--- a/compiler/enco/core/src/Session.cpp
+++ b/compiler/enco/core/src/Session.cpp
@@ -16,12 +16,10 @@
 
 #include "Session.h"
 
-#include <stdex/Memory.h>
-
 #include <map>
 #include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
diff --git a/compiler/enco/core/src/Support/Debugging.cpp b/compiler/enco/core/src/Support/Debugging.cpp
index bd65a27d8..9a9a7745e 100644
--- a/compiler/enco/core/src/Support/Debugging.cpp
+++ b/compiler/enco/core/src/Support/Debugging.cpp
@@ -77,7 +77,7 @@ pp::LinearDocument operator<<(const SectionBuilder &builder, Callback cb)
 }
 
 SectionBuilder section(const std::string &tag) { return SectionBuilder{tag}; }
-}
+} // namespace
 
 /**
  * SECTION: Bag
diff --git a/compiler/enco/core/src/Transforms/FeatureUnification.cpp b/compiler/enco/core/src/Transforms/FeatureUnification.cpp
index 1a7a0a8a4..9e4a8e19f 100644
--- a/compiler/enco/core/src/Transforms/FeatureUnification.cpp
+++ b/compiler/enco/core/src/Transforms/FeatureUnification.cpp
@@ -17,14 +17,13 @@
 #include "FeatureUnification.h"
 #include "IRUtils.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <set>
 #include <vector>
 
 #include <cassert>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
diff --git a/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp b/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp
index 152477a51..cb5a0a9a9 100644
--- a/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp
+++ b/compiler/enco/core/src/Transforms/GlobalDataGeneration.cpp
@@ -18,11 +18,10 @@
 #include "Split.h"
 #include "Dims.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <map>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
diff --git a/compiler/enco/core/src/Transforms/Split.cpp b/compiler/enco/core/src/Transforms/Split.cpp
index b57b8f882..4bb21b0a7 100644
--- a/compiler/enco/core/src/Transforms/Split.cpp
+++ b/compiler/enco/core/src/Transforms/Split.cpp
@@ -22,13 +22,13 @@
 #include <coco/IR.h>
 
 #include <nncc/core/ADT/kernel/NHWCLayout.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <map>
 #include <stdexcept>
 #include <functional>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
@@ -337,8 +337,8 @@ public:
     auto ofm = binder->addOperand<float>(_ofm);
 
     binder->addOperation(
-        ann::Operation::Code::DEPTHWISE_CONV_2D,
-        {ifm, ker, bias, left, right, top, bottom, hstride, vstride, multiplier, fuse}, {ofm});
+      ann::Operation::Code::DEPTHWISE_CONV_2D,
+      {ifm, ker, bias, left, right, top, bottom, hstride, vstride, multiplier, fuse}, {ofm});
   }
 
 private:
@@ -656,7 +656,7 @@ public:
           app->ofm(ofm);
           app->ker(ker);
 
-          return std::move(app);
+          return app;
         }
         else
         {
@@ -676,7 +676,7 @@ public:
           app->ofm(ofm);
           app->ker(ker);
 
-          return std::move(app);
+          return app;
         }
       }
     }
@@ -704,7 +704,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asMul())
@@ -731,7 +731,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asPadF())
@@ -754,7 +754,7 @@ public:
         app->ifm(ifm);
         app->ofm(ofm);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto maxpool = eval->op()->asMaxPool2D())
@@ -779,7 +779,7 @@ public:
         app->ifm(ifm);
         app->ofm(ofm);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto avgpool = eval->op()->asAvgPool2D())
@@ -808,7 +808,7 @@ public:
           app->ifm(ifm);
           app->ofm(ofm);
 
-          return std::move(app);
+          return app;
         }
       }
     }
@@ -831,7 +831,7 @@ public:
         app->ifm(ifm);
         app->ofm(ofm);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto relu6 = eval->op()->asReLU6())
@@ -853,7 +853,7 @@ public:
         app->ifm(ifm);
         app->ofm(ofm);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asConcatF())
@@ -880,7 +880,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asSub())
@@ -907,7 +907,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asDiv())
@@ -934,7 +934,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
 
@@ -967,7 +967,7 @@ std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins)
     app->left(depth_concat->fst()->asFeature());
     app->right(depth_concat->snd()->asFeature());
 
-    return std::move(app);
+    return app;
   }
 
   // Build ANN IR from ANNConv2D instruction
@@ -986,7 +986,7 @@ std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins)
     app->ker(conv2d->ker()->asKernel());
     app->bias(coco::safe_cast<coco::FeatureObject>(conv2d->bias()));
 
-    return std::move(app);
+    return app;
   }
 
   return nullptr;
diff --git a/compiler/enco/core/src/Transforms/Split.h b/compiler/enco/core/src/Transforms/Split.h
index b4e1d7baf..85ad2684f 100644
--- a/compiler/enco/core/src/Transforms/Split.h
+++ b/compiler/enco/core/src/Transforms/Split.h
@@ -43,6 +43,6 @@ struct PhaseConstructionPass final : public Pass
   void run(const SessionID &sess) const override { split_into_phases(code(sess)); }
 };
 
-} // namespace enco;
+} // namespace enco
 
 #endif // __SPLIT_H__
diff --git a/compiler/enco/frontend/caffe/CMakeLists.txt b/compiler/enco/frontend/caffe/CMakeLists.txt
index ce43a41d3..baf7f7bd6 100644
--- a/compiler/enco/frontend/caffe/CMakeLists.txt
+++ b/compiler/enco/frontend/caffe/CMakeLists.txt
@@ -16,13 +16,12 @@ target_link_libraries(enco_caffe_frontend enco_intf_frontend)
 target_link_libraries(enco_caffe_frontend enco_intf_cmdline)
 target_link_libraries(enco_caffe_frontend morph)
 target_link_libraries(enco_caffe_frontend caffeproto)
-target_link_libraries(enco_caffe_frontend stdex)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 nnas_find_package(Caffe QUIET)
 
diff --git a/compiler/enco/frontend/caffe/src/Context.h b/compiler/enco/frontend/caffe/src/Context.h
index aca57ce6f..7cf27ead4 100644
--- a/compiler/enco/frontend/caffe/src/Context.h
+++ b/compiler/enco/frontend/caffe/src/Context.h
@@ -81,8 +81,8 @@ public:
   explicit GraphBuilderContext(coco::Module *module, coco::Data *data, coco::Block *block,
                                ShapeContext &shape_ctx, StoreContext &bag_ctx,
                                WeightContext &weight_ctx)
-      : _module(module), _data(data), _block(block), _shape_ctx(shape_ctx), _bag_ctx(bag_ctx),
-        _weight_ctx(weight_ctx)
+    : _module(module), _data(data), _block(block), _shape_ctx(shape_ctx), _bag_ctx(bag_ctx),
+      _weight_ctx(weight_ctx)
   {
     // DO NOTHING
   }
diff --git a/compiler/enco/frontend/caffe/src/Entry.cpp b/compiler/enco/frontend/caffe/src/Entry.cpp
index 2bdb73eac..41e174bc4 100644
--- a/compiler/enco/frontend/caffe/src/Entry.cpp
+++ b/compiler/enco/frontend/caffe/src/Entry.cpp
@@ -19,8 +19,7 @@
 
 #include <cmdline/View.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <fstream>
 #include <cassert>
 
@@ -28,7 +27,7 @@ extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cm
 {
   assert(cmdline.size() == 2);
 
-  auto frontend = stdex::make_unique<Frontend>();
+  auto frontend = std::make_unique<Frontend>();
 
   // Fill prototxt
   {
diff --git a/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp b/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp
index e9db31177..d9a1c9617 100644
--- a/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp
+++ b/compiler/enco/frontend/caffe/src/GraphBuilderRegistry.cpp
@@ -25,9 +25,9 @@
 #include "Layer/Scale.h"
 #include "Layer/BatchNorm.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace caffeimport
 {
diff --git a/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp b/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp
index 9fb096d49..807cce44d 100644
--- a/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp
+++ b/compiler/enco/frontend/caffe/src/Layer/Convolution.cpp
@@ -101,7 +101,7 @@ void ConvolutionBuilder::build(const ::caffe::LayerParameter &layer,
 
   auto ker_dst = data->f32()->access(ker_obj);
   auto ker_src = kernel::OverlayFactory<float, kernel::NCHWLayout>::make(
-      ker_obj->shape(), ker_blob->mutable_data()->begin());
+    ker_obj->shape(), ker_blob->mutable_data()->begin());
 
   for (uint32_t n = 0; n < ker_obj->shape().count(); ++n)
   {
diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt
index 77159879e..995e66f81 100644
--- a/compiler/enco/frontend/tflite/CMakeLists.txt
+++ b/compiler/enco/frontend/tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
 
 if(NOT FlatBuffers_FOUND)
   return()
@@ -17,17 +17,15 @@ add_library(enco_tflite_frontend SHARED ${SOURCES})
 target_include_directories(enco_tflite_frontend PRIVATE src)
 target_link_libraries(enco_tflite_frontend enco_intf_frontend)
 target_link_libraries(enco_tflite_frontend enco_intf_cmdline)
-target_link_libraries(enco_tflite_frontend flatbuffers)
 target_link_libraries(enco_tflite_frontend enco_tflite_schema)
-target_link_libraries(enco_tflite_frontend stdex)
 target_link_libraries(enco_tflite_frontend morph)
 target_link_libraries(enco_tflite_frontend cwrap)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 add_executable(enco_tflite_frontend_test ${TESTS})
 target_include_directories(enco_tflite_frontend_test PRIVATE src)
diff --git a/compiler/enco/frontend/tflite/src/Context.cpp b/compiler/enco/frontend/tflite/src/Context.cpp
index ef030dc5d..588c3c44b 100644
--- a/compiler/enco/frontend/tflite/src/Context.cpp
+++ b/compiler/enco/frontend/tflite/src/Context.cpp
@@ -48,7 +48,7 @@ void TensorContext::prepare(const tflite::SubGraph *graph)
 }
 
 TflOpCodeContext::TflOpCodeContext(
-    const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *opcodes)
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *opcodes)
 {
   for (const tflite::OperatorCode *opcode : *opcodes)
   {
diff --git a/compiler/enco/frontend/tflite/src/Context.h b/compiler/enco/frontend/tflite/src/Context.h
index f72385f9a..caeac4ab5 100644
--- a/compiler/enco/frontend/tflite/src/Context.h
+++ b/compiler/enco/frontend/tflite/src/Context.h
@@ -135,8 +135,8 @@ public:
   explicit GraphBuilderContext(coco::Module *m, coco::Data *d, coco::Block *block,
                                TensorBags &tensor_bags, TensorContext &tensor_context,
                                TflBufferContext &buffer_context, const tflite::SubGraph *graph)
-      : _m(m), _d(d), _block(block), _tensor_bags(tensor_bags), _tensor_context(tensor_context),
-        _buffer_context(buffer_context), _graph(graph)
+    : _m(m), _d(d), _block(block), _tensor_bags(tensor_bags), _tensor_context(tensor_context),
+      _buffer_context(buffer_context), _graph(graph)
   {
     // DO NOTHING
   }
diff --git a/compiler/enco/frontend/tflite/src/Entry.cpp b/compiler/enco/frontend/tflite/src/Entry.cpp
index c69e18074..74d3096ab 100644
--- a/compiler/enco/frontend/tflite/src/Entry.cpp
+++ b/compiler/enco/frontend/tflite/src/Entry.cpp
@@ -19,12 +19,11 @@
 
 #include <cmdline/View.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <fstream>
 #include <cassert>
 
-using stdex::make_unique;
+using std::make_unique;
 
 extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
 {
diff --git a/compiler/enco/frontend/tflite/src/Frontend.test.cpp b/compiler/enco/frontend/tflite/src/Frontend.test.cpp
index aee6099e7..1bc774629 100644
--- a/compiler/enco/frontend/tflite/src/Frontend.test.cpp
+++ b/compiler/enco/frontend/tflite/src/Frontend.test.cpp
@@ -16,11 +16,11 @@
 
 #include "Frontend.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
diff --git a/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h b/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h
index 1ae882e89..ca4f74fc5 100644
--- a/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h
+++ b/compiler/enco/frontend/tflite/src/GraphBuilderRegistry.h
@@ -29,11 +29,11 @@
 #include "Op/Div.h"
 
 #include <schema_generated.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <map>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace tflimport
 {
@@ -68,7 +68,7 @@ private:
     // add GraphBuilder for each tflite operation.
     _builder_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DGraphBuilder>();
     _builder_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] =
-        make_unique<DepthwiseConv2DGraphBuilder>();
+      make_unique<DepthwiseConv2DGraphBuilder>();
     _builder_map[tflite::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<AvgPool2DGraphBuilder>();
     _builder_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<MaxPool2DGraphBuilder>();
     _builder_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationGraphBuilder>();
diff --git a/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp b/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp
index 16f68fcdb..6f8223f10 100644
--- a/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp
+++ b/compiler/enco/frontend/tflite/src/Op/AveragePool2D.cpp
@@ -102,7 +102,7 @@ void AvgPool2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContex
   coco_avgpool2d->stride()->horizontal(params->stride_w());
 
   coco::Padding2D padding =
-      pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
+    pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
 
   coco_avgpool2d->pad()->top(padding.top());
   coco_avgpool2d->pad()->bottom(padding.bottom());
diff --git a/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp b/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp
index e9516c0e9..d1f97597f 100644
--- a/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp
+++ b/compiler/enco/frontend/tflite/src/Op/Conv2D.cpp
@@ -171,7 +171,7 @@ void Conv2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContext *
 
   // fused activation
   coco::FeatureObject *act_output =
-      build_activation(conv_params->fused_activation_function(), blk, last_obj);
+    build_activation(conv_params->fused_activation_function(), blk, last_obj);
 
   // Create Copy Instr of last_obj to Output Object
   auto copy_ins = instr_builder(m).copy(ofm_obj, act_output);
diff --git a/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp b/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp
index e3d7b263e..bc903c380 100644
--- a/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp
+++ b/compiler/enco/frontend/tflite/src/Op/DepthwiseConv2D.cpp
@@ -138,8 +138,8 @@ void DepthwiseConv2DGraphBuilder::build(const tflite::Operator *op,
           auto wc = new_shape.width() * new_shape.depth();
 
           ker_spn[n * hwc + h * wc + w * new_shape.depth() + c] =
-              buffer.ptr[tfl_n * hw * new_shape.count() + /* new_shape.count() is old c */
-                         h * new_shape.width() * new_shape.count() + w * new_shape.count() + tfl_c];
+            buffer.ptr[tfl_n * hw * new_shape.count() + /* new_shape.count() is old c */
+                       h * new_shape.width() * new_shape.count() + w * new_shape.count() + tfl_c];
         }
       }
     }
@@ -220,7 +220,7 @@ void DepthwiseConv2DGraphBuilder::build(const tflite::Operator *op,
 
   // fused activation
   coco::FeatureObject *act_output =
-      build_activation(dconv_params->fused_activation_function(), blk, last_obj);
+    build_activation(dconv_params->fused_activation_function(), blk, last_obj);
 
   // Create Copy Instr of last_obj to Output Object
   auto copy_ins = instr_builder(m).copy(ofm_obj, act_output);
diff --git a/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp b/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp
index ee4406425..41e0cde17 100644
--- a/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp
+++ b/compiler/enco/frontend/tflite/src/Op/MaxPool2D.cpp
@@ -99,7 +99,7 @@ void MaxPool2DGraphBuilder::build(const tflite::Operator *op, GraphBuilderContex
   coco_maxpool2d->stride()->horizontal(params->stride_w());
 
   coco::Padding2D padding =
-      pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
+    pool2D_padding(params, ifm_shape, params->filter_width(), params->filter_height());
 
   coco_maxpool2d->pad()->top(padding.top());
   coco_maxpool2d->pad()->bottom(padding.bottom());
diff --git a/compiler/enco/test/basic/000/CMakeLists.txt b/compiler/enco/test/basic/000/CMakeLists.txt
index 20ba3c571..95e9cb0b0 100644
--- a/compiler/enco/test/basic/000/CMakeLists.txt
+++ b/compiler/enco/test/basic/000/CMakeLists.txt
@@ -11,7 +11,6 @@ set(LIB_TARGET ${PREFIX}-lib)
 add_library(${PREFIX}-frontend SHARED enco.test.cpp)
 target_link_libraries(${PREFIX}-frontend enco_intf_cmdline)
 target_link_libraries(${PREFIX}-frontend enco_intf_frontend)
-target_link_libraries(${PREFIX}-frontend stdex)
 
 # NOTE BYPRODUCTS are not specified in order to enforce source code generation
 add_custom_command(OUTPUT ${GENERATED_CPP} ${GENERATED_ASM} ${GENERATED_BIN}
diff --git a/compiler/enco/test/basic/000/enco.test.cpp b/compiler/enco/test/basic/000/enco.test.cpp
index 3dbf96613..84c28d0f7 100644
--- a/compiler/enco/test/basic/000/enco.test.cpp
+++ b/compiler/enco/test/basic/000/enco.test.cpp
@@ -19,7 +19,7 @@
 
 #include <nncc/core/ADT/tensor/LexicalLayout.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 using namespace nncc::core::ADT;
 
@@ -77,5 +77,5 @@ struct Frontend final : public enco::Frontend
 
 extern "C" std::unique_ptr<enco::Frontend> make_frontend(const cmdline::View &cmdline)
 {
-  return stdex::make_unique<Frontend>();
+  return std::make_unique<Frontend>();
 }
diff --git a/compiler/enco/test/binder.cpp b/compiler/enco/test/binder.cpp
index c8c72fc8b..f04cfa4f6 100644
--- a/compiler/enco/test/binder.cpp
+++ b/compiler/enco/test/binder.cpp
@@ -46,9 +46,9 @@ void Network_invoke(Network *net);
 #include <nncc/core/ADT/tensor/LexicalLayout.h>
 #include <nncc/core/ADT/tensor/Overlay.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 using namespace nncc::core::ADT;
 
 namespace
diff --git a/compiler/enco/test/caffe/CMakeLists.txt b/compiler/enco/test/caffe/CMakeLists.txt
index ee49b6b28..d552d6ec8 100644
--- a/compiler/enco/test/caffe/CMakeLists.txt
+++ b/compiler/enco/test/caffe/CMakeLists.txt
@@ -123,7 +123,6 @@ foreach(PREFIX IN ITEMS ${CANDIDATES})
   target_link_libraries(${BINDER_TARGET} nnkit_intf_backend)
   target_link_libraries(${BINDER_TARGET} ann_api)
   target_link_libraries(${BINDER_TARGET} ann_ref_static)
-  target_link_libraries(${BINDER_TARGET} stdex)
   set_target_properties(${BINDER_TARGET} PROPERTIES OUTPUT_NAME ${PREFIX})
 
   list(APPEND TESTS ${PREFIX})
diff --git a/compiler/enco/test/tflite/CMakeLists.txt b/compiler/enco/test/tflite/CMakeLists.txt
index d5a96a6da..81d5ed2a2 100644
--- a/compiler/enco/test/tflite/CMakeLists.txt
+++ b/compiler/enco/test/tflite/CMakeLists.txt
@@ -90,7 +90,6 @@ foreach(PREFIX IN ITEMS ${CANDIDATES})
   target_link_libraries(${BINDER_TARGET} nnkit_intf_backend)
   target_link_libraries(${BINDER_TARGET} ann_api)
   target_link_libraries(${BINDER_TARGET} ann_ref_static)
-  target_link_libraries(${BINDER_TARGET} stdex)
   set_target_properties(${BINDER_TARGET} PROPERTIES OUTPUT_NAME ${PREFIX})
 
   list(APPEND TESTS ${PREFIX})
diff --git a/compiler/encodump/CMakeLists.txt b/compiler/encodump/CMakeLists.txt
index 58fe17a51..a4ad441b2 100644
--- a/compiler/encodump/CMakeLists.txt
+++ b/compiler/encodump/CMakeLists.txt
@@ -13,5 +13,4 @@ target_include_directories(encodump PRIVATE src)
 target_link_libraries(encodump enco_intf_frontend)
 target_link_libraries(encodump enco_core)
 target_link_libraries(encodump safemain)
-target_link_libraries(encodump stdex)
 target_link_libraries(encodump dl)
diff --git a/compiler/encodump/src/Driver.cpp b/compiler/encodump/src/Driver.cpp
index f27cbe904..2928d1d25 100644
--- a/compiler/encodump/src/Driver.cpp
+++ b/compiler/encodump/src/Driver.cpp
@@ -137,8 +137,7 @@ private:
 
 } // namespace
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <map>
 
 #include <iostream>
@@ -163,7 +162,7 @@ int entry(int argc, char **argv)
   std::map<std::string, std::function<void(const std::string &arg)>> argparse;
 
   argparse["--frontend"] = [&](const std::string &path) {
-    frontend_zone = stdex::make_unique<FrontendZone>(path);
+    frontend_zone = std::make_unique<FrontendZone>(path);
   };
 
   argparse["--frontend-arg"] = [&](const std::string &arg) { frontend_zone->append(arg); };
diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt
index 79c75ef2e..645db714c 100644
--- a/compiler/exo/CMakeLists.txt
+++ b/compiler/exo/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
 
 if(NOT FlatBuffers_FOUND)
   message(STATUS "Build exo: FALSE (missing FlatBuffers)")
@@ -15,7 +15,7 @@ endif(NOT TensorFlowSource_FOUND)
 message(STATUS "Build exo: TRUE")
 
 set(TFLITE_SCHEMA_DIR "${TensorFlowSource_DIR}/tensorflow/lite/schema")
-set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema")
+set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.3")
 
 FlatBuffers_Target(exo_tflite_fbs
   OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
@@ -39,7 +39,6 @@ target_include_directories(exo PRIVATE src)
 target_link_libraries(exo PUBLIC exo_tflite_fbs)
 target_link_libraries(exo PUBLIC exo_circle_fbs)
 target_link_libraries(exo PUBLIC loco)
-target_link_libraries(exo PRIVATE stdex)
 target_link_libraries(exo PRIVATE pepper_str)
 target_link_libraries(exo PRIVATE pepper_strcast)
 target_link_libraries(exo PRIVATE locoex_customop)
@@ -64,7 +63,6 @@ nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(exo_test ${TESTS})
 target_include_directories(exo_test PRIVATE src)
-target_link_libraries(exo_test stdex)
 target_link_libraries(exo_test pepper_str)
 target_link_libraries(exo_test exo)
 target_link_libraries(exo_test hermes_std)
diff --git a/compiler/exo/requires.cmake b/compiler/exo/requires.cmake
index 6378b942d..3116c5757 100644
--- a/compiler/exo/requires.cmake
+++ b/compiler/exo/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
 require("loco")
 require("locoex-customop")
 require("logo")
diff --git a/compiler/exo/src/Circle/CircleExporter.cpp b/compiler/exo/src/Circle/CircleExporter.cpp
index 797749090..cfcb9a258 100644
--- a/compiler/exo/src/Circle/CircleExporter.cpp
+++ b/compiler/exo/src/Circle/CircleExporter.cpp
@@ -18,16 +18,15 @@
 
 #include "CircleExporterImpl.h"
 
-#include <stdex/Memory.h>
-
 #include <oops/InternalExn.h>
 
+#include <memory>
 #include <fstream>
 
 namespace exo
 {
 
-CircleExporter::CircleExporter(loco::Graph *graph) : _impl(stdex::make_unique<Impl>(graph))
+CircleExporter::CircleExporter(loco::Graph *graph) : _impl(std::make_unique<Impl>(graph))
 {
   // NOTHING TO DO
 }
diff --git a/compiler/exo/src/Circle/CircleExporterImpl.cpp b/compiler/exo/src/Circle/CircleExporterImpl.cpp
index 4cba33da1..a93931597 100644
--- a/compiler/exo/src/Circle/CircleExporterImpl.cpp
+++ b/compiler/exo/src/Circle/CircleExporterImpl.cpp
@@ -88,7 +88,7 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<OpCode, uint3
         INTERNAL_EXN("Cannot find code for customop even though opcode is BuiltinOperator_CUSTOM");
 
       operator_codes_vec[idx] =
-          CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+        CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
     }
   }
   return builder.CreateVector(operator_codes_vec);
@@ -148,7 +148,7 @@ void CircleExporter::Impl::exportGraph(loco::Graph *graph)
 
   // encode operator codes
   auto operator_codes =
-      encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
+    encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
 
   // Subgraphs
   Offset<SubGraph> subgraph = exportSubgraph(gd);
diff --git a/compiler/exo/src/Circle/CircleExporterUtils.cpp b/compiler/exo/src/Circle/CircleExporterUtils.cpp
index 12b204ce7..079f115f6 100644
--- a/compiler/exo/src/Circle/CircleExporterUtils.cpp
+++ b/compiler/exo/src/Circle/CircleExporterUtils.cpp
@@ -78,13 +78,13 @@ circle::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *
   //
   // NOTE input and output 'feature' map are shape of NHWC
   bool same_padding_criterion_1 =
-      (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
-      (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+    (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+    (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
 
   // For same padding, rear padding is same or bigger than front padding by at most 1
   bool same_padding_criterion_2 =
-      (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
-      (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+    (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+    (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
 
   if (same_padding_criterion_1 && same_padding_criterion_2)
     return circle::Padding_SAME;
@@ -123,8 +123,7 @@ void registerGraphIOName(loco::Graph *graph, SerializedModelData &gd)
   gd._data_format = circle::DataFormat::DataFormat_CHANNELS_LAST;
 }
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace
@@ -150,7 +149,7 @@ private:
 void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id)
 {
   assert(node->annot<TFLTensorIndexAnnotation>() == nullptr);
-  node->annot(stdex::make_unique<TFLTensorIndexAnnotation>(tensor_id));
+  node->annot(std::make_unique<TFLTensorIndexAnnotation>(tensor_id));
 }
 
 TFLTensorIndex get_tensor_index(loco::Node *node)
diff --git a/compiler/exo/src/Circle/CircleExporterUtils.h b/compiler/exo/src/Circle/CircleExporterUtils.h
index fdd162bae..78f0cf7ed 100644
--- a/compiler/exo/src/Circle/CircleExporterUtils.h
+++ b/compiler/exo/src/Circle/CircleExporterUtils.h
@@ -65,7 +65,7 @@ namespace circle_detail
 {
 
 /**
- * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ * @brief Record the information of T/F Lite SubGraph and its mapping to loco
  */
 struct SubGraphContext
 {
diff --git a/compiler/exo/src/Circle/CircleOperationExporter.cpp b/compiler/exo/src/Circle/CircleOperationExporter.cpp
index 390e2ec99..8b7337011 100644
--- a/compiler/exo/src/Circle/CircleOperationExporter.cpp
+++ b/compiler/exo/src/Circle/CircleOperationExporter.cpp
@@ -89,13 +89,19 @@ public:
   void visit(loco::ReLU *) final;
   void visit(loco::ReLU6 *) final;
   void visit(loco::Tanh *) final;
-  void visit(loco::Push *) final { /* DO NOTHING */}
-  void visit(loco::Pull *) final { /* DO NOTHING */}
+  void visit(loco::Push *) final
+  { /* DO NOTHING */
+  }
+  void visit(loco::Pull *) final
+  { /* DO NOTHING */
+  }
   void visit(loco::FeatureEncode *) final;
   void visit(loco::FeatureDecode *) final;
   void visit(loco::FilterEncode *) final;
   void visit(loco::DepthwiseFilterEncode *) final;
-  void visit(loco::ConstGen *) final { /* skip, everything is done in exportOpDefinedTensors */}
+  void visit(loco::ConstGen *) final
+  { /* skip, everything is done in exportOpDefinedTensors */
+  }
   void visit(loco::MaxPool2D *) final;
   void visit(loco::AvgPool2D *) final;
   void visit(loco::Conv2D *) final;
@@ -235,7 +241,7 @@ void OperationExporter::visit(locoex::TFLFullyConnected *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   auto options =
-      CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
+    CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()));
 
   // Make FULLY_CONNECTED operator
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -375,8 +381,8 @@ void OperationExporter::visit(locoex::TFLTranspose *node)
   auto options = CreateTransposeOptions(builder);
 
   auto op_offset =
-      CreateOperator(builder, op_idx, inputs, outputs,
-                     circle::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+    CreateOperator(builder, op_idx, inputs, outputs,
+                   circle::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
   gd._operators.push_back(op_offset);
 }
 
@@ -393,7 +399,7 @@ void OperationExporter::visit(locoex::TFLTransposeConv *node)
   auto outputs = builder.CreateVector(outputs_vec);
   circle::Padding padding = getOpPadding(node->padding());
   auto options =
-      CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+    CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
 
   // Make TRANSPOSE_CONV operator
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -405,7 +411,7 @@ template <class TFLPool2D>
 void OperationExporter::export_pool_2d(TFLPool2D *node, circle::BuiltinOperator builtin_op)
 {
   EXO_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
-                 builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+               builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
              "should be maxpool or avgpool");
   EXO_ASSERT(node->padding() != locoex::Padding::UNDEFINED, "Padding is not set");
 
@@ -481,10 +487,10 @@ void OperationExporter::visit(loco::MaxPool2D *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   circle::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical(), node->window()->horizontal(),
-                                     node->window()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+                        node->window()->horizontal(), node->window()->vertical());
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                   circle::BuiltinOptions_Pool2DOptions, options.Union());
   gd._operators.push_back(op_offset);
@@ -501,10 +507,10 @@ void OperationExporter::visit(loco::AvgPool2D *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   circle::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical(), node->window()->horizontal(),
-                                     node->window()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+                        node->window()->horizontal(), node->window()->vertical());
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                   circle::BuiltinOptions_Pool2DOptions, options.Union());
   gd._operators.push_back(op_offset);
@@ -527,7 +533,7 @@ void OperationExporter::visit(loco::Conv2D *node)
   std::vector<float> bias_vec_data(bias_vec_size); // initialized as zero vector
 
   auto bias_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
 
   auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
 
@@ -539,7 +545,7 @@ void OperationExporter::visit(loco::Conv2D *node)
   auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
 
   auto bias_tensor_offset =
-      CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+    CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
   gd._tensors.push_back(bias_tensor_offset);
 
   // Make input, output and options for operator
@@ -549,9 +555,9 @@ void OperationExporter::visit(loco::Conv2D *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   circle::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreateConv2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreateConv2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical());
 
   // Make CONV_2D operator
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -581,7 +587,7 @@ void OperationExporter::visit(loco::TransposedConv2D *node)
   }
 
   auto outshape_vec_offset = builder.CreateVector(
-      reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
+    reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
 
   auto outshape_buffer_offset = CreateBuffer(builder, outshape_vec_offset);
 
@@ -630,7 +636,7 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
   size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
   std::vector<float> bias_vec_data(bias_vec_size);
   auto bias_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
 
   auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
 
@@ -642,7 +648,7 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
   auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
 
   auto bias_tensor_offset =
-      CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+    CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
   gd._tensors.push_back(bias_tensor_offset);
 
   std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
@@ -651,13 +657,13 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   circle::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
 
   int32_t ifm_channel_size = ShapeInference::get(node->ifm())._dims[3];
   // multiplier = bias_vec_size(output_size)/ifm_channel_size
   auto options =
-      CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
-                                   node->stride()->vertical(), bias_vec_size / ifm_channel_size);
+    CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
+                                 node->stride()->vertical(), bias_vec_size / ifm_channel_size);
 
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                   circle::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
@@ -691,7 +697,7 @@ void OperationExporter::visit(loco::TensorReduce *node)
 
   size_t raw_axes_vec_size = axes_vec_size * sizeof(int32_t);
   auto axes_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
 
   auto axes_buffer_offset = CreateBuffer(builder, axes_vec_offset);
 
@@ -703,7 +709,7 @@ void OperationExporter::visit(loco::TensorReduce *node)
   auto name_offset = builder.CreateString("t_" + std::to_string(axes_tensor_id));
 
   auto axes_tensor_offset =
-      CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
+    CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
   gd._tensors.push_back(axes_tensor_offset);
 
   std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), axes_tensor_id};
@@ -766,7 +772,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
   constexpr size_t raw_perm_vec_size = perm_vec_size * sizeof(int32_t);
 
   auto perm_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
 
   auto perm_buffer_offset = CreateBuffer(builder, perm_vec_offset);
 
@@ -778,7 +784,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
   auto name_offset = builder.CreateString("t_" + std::to_string(perm_tensor_id));
 
   auto perm_tensor_offset =
-      CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
+    CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
   gd._tensors.push_back(perm_tensor_offset);
 
   // Create permutation node
@@ -792,7 +798,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
   constexpr auto options_type = circle::BuiltinOptions::BuiltinOptions_TransposeOptions;
 
   auto transpose_offset =
-      CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
+    CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
   gd._operators.push_back(transpose_offset);
 }
 
@@ -878,11 +884,11 @@ void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
   //      but also by input.
 
   auto input_shape_shape_vec_offset =
-      builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
+    builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
 
   size_t input_shape_vec_size = new_shape_vec.size() * sizeof(int32_t);
   auto input_shape_input_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
   auto input_shape_buffer_offset = CreateBuffer(builder, input_shape_input_vec_offset);
 
   const auto input_shape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
@@ -891,7 +897,7 @@ void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
   auto input_shape_tensor_id = static_cast<int32_t>(gd._tensors.size());
   auto name_offset = builder.CreateString("t_" + std::to_string(input_shape_tensor_id));
   auto input_shape_tensor_offset = CreateTensor(
-      builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
+    builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
   gd._tensors.push_back(input_shape_tensor_offset);
 
   uint32_t op_idx = gd.registerBuiltinOpcode(circle::BuiltinOperator_RESHAPE);
@@ -1093,7 +1099,7 @@ void OperationExporter::visit(loco::TensorConstantPad *node)
   auto padding_shape_vec_ptr = builder.CreateVector(std::vector<int32_t>{padding_vec_size, 2});
   // create tensor
   auto padding_tensor_ptr =
-      CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
+    CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
   // get tensor id
   const auto padding_tensor_id = static_cast<int32_t>(gd._tensors.size());
 
diff --git a/compiler/exo/src/Circle/CircleTypeInference.cpp b/compiler/exo/src/Circle/CircleTypeInference.cpp
index a1e92b884..d3d01b4af 100644
--- a/compiler/exo/src/Circle/CircleTypeInference.cpp
+++ b/compiler/exo/src/Circle/CircleTypeInference.cpp
@@ -31,8 +31,6 @@
 
 #include <oops/InternalExn.h>
 
-#include <stdex/Memory.h>
-
 #include <stdexcept>
 #include <type_traits>
 
diff --git a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
index e3884c3cc..32ad44385 100644
--- a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
+++ b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
@@ -25,6 +25,8 @@
 #include <loco/Service/TypeInference.h>
 #include <loco/Service/ShapeInference.h>
 
+#include <limits>
+
 namespace exo
 {
 
@@ -75,9 +77,9 @@ bool DepthwiseConv2DConverter::convert(loco::DepthwiseConv2D *origin)
     reshape->tensor(filter_dec);
 
     int32_t new_shape[4] = {
-        1, static_cast<int32_t>(filter_shape.height().value()),
-        static_cast<int32_t>(filter_shape.width().value()),
-        static_cast<int32_t>(filter_shape.depth().value() * filter_shape.multiplier().value())};
+      1, static_cast<int32_t>(filter_shape.height().value()),
+      static_cast<int32_t>(filter_shape.width().value()),
+      static_cast<int32_t>(filter_shape.depth().value() * filter_shape.multiplier().value())};
     locoex::set_new_shape(reshape, new_shape, 4);
 
     tfl_dw_conv2d->filter(reshape);
diff --git a/compiler/exo/src/Convert.cpp b/compiler/exo/src/Convert.cpp
index 45f0481f4..3a578eee8 100644
--- a/compiler/exo/src/Convert.cpp
+++ b/compiler/exo/src/Convert.cpp
@@ -32,7 +32,7 @@
 #include <logo/RemoveForwardNodePass.h>
 
 #include <logo/Phase.h>
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace exo
 {
@@ -49,40 +49,40 @@ void convert_to_TFLNodes(loco::Graph *graph)
   logo::Phase phase;
   {
     // prepare type and shape before conversion
-    phase.emplace_back(stdex::make_unique<TypeInferencePass>());
-    phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+    phase.emplace_back(std::make_unique<TypeInferencePass>());
+    phase.emplace_back(std::make_unique<ShapeInferencePass>());
 
     // Add converters for canonical nodes. Note: Not all loco canonical nodes are listed.
-    phase.emplace_back(stdex::make_unique<AvgPool2DConverter>());
-    phase.emplace_back(stdex::make_unique<ConstGenConverter>());
-    phase.emplace_back(stdex::make_unique<Conv2DConverter>());
-    phase.emplace_back(stdex::make_unique<DepthwiseConv2DConverter>());
+    phase.emplace_back(std::make_unique<AvgPool2DConverter>());
+    phase.emplace_back(std::make_unique<ConstGenConverter>());
+    phase.emplace_back(std::make_unique<Conv2DConverter>());
+    phase.emplace_back(std::make_unique<DepthwiseConv2DConverter>());
     // TODO loco::DepthwiseFilterEncode
-    phase.emplace_back(stdex::make_unique<EltwiseAddConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseDivConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseMaxConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseMulConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseSqrtConverter>());
-    phase.emplace_back(stdex::make_unique<EltwiseSubConverter>());
-    phase.emplace_back(stdex::make_unique<FeatureBiasAddConverter>());
+    phase.emplace_back(std::make_unique<EltwiseAddConverter>());
+    phase.emplace_back(std::make_unique<EltwiseDivConverter>());
+    phase.emplace_back(std::make_unique<EltwiseMaxConverter>());
+    phase.emplace_back(std::make_unique<EltwiseMulConverter>());
+    phase.emplace_back(std::make_unique<EltwiseSqrtConverter>());
+    phase.emplace_back(std::make_unique<EltwiseSubConverter>());
+    phase.emplace_back(std::make_unique<FeatureBiasAddConverter>());
     // TODO loco::FixedReshape
-    phase.emplace_back(stdex::make_unique<MatMulConverter>());
-    phase.emplace_back(stdex::make_unique<MaxPool2DConverter>());
-    phase.emplace_back(stdex::make_unique<ReluConverter>());
-    phase.emplace_back(stdex::make_unique<Relu6Converter>());
+    phase.emplace_back(std::make_unique<MatMulConverter>());
+    phase.emplace_back(std::make_unique<MaxPool2DConverter>());
+    phase.emplace_back(std::make_unique<ReluConverter>());
+    phase.emplace_back(std::make_unique<Relu6Converter>());
     // TODO loco::Tanh
-    phase.emplace_back(stdex::make_unique<TensorConcatConverter>());
+    phase.emplace_back(std::make_unique<TensorConcatConverter>());
     // TODO loco::TensorBiasAdd
-    phase.emplace_back(stdex::make_unique<TensorBroadcastConverter>());
-    phase.emplace_back(stdex::make_unique<TensorReduceConverter>());
+    phase.emplace_back(std::make_unique<TensorBroadcastConverter>());
+    phase.emplace_back(std::make_unique<TensorReduceConverter>());
     // TODO loco::TensorSoftmax
-    phase.emplace_back(stdex::make_unique<TensorTransposeConverter>());
-    phase.emplace_back(stdex::make_unique<TransposedConv2DConverter>());
+    phase.emplace_back(std::make_unique<TensorTransposeConverter>());
+    phase.emplace_back(std::make_unique<TransposedConv2DConverter>());
 
     // Add optimization below
-    phase.emplace_back(stdex::make_unique<logo::SimplifyDomainConversionPass>());
-    phase.emplace_back(stdex::make_unique<logo::RemoveForwardNodePass>());
-    phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+    phase.emplace_back(std::make_unique<logo::SimplifyDomainConversionPass>());
+    phase.emplace_back(std::make_unique<logo::RemoveForwardNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
   }
 
   logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{graph};
diff --git a/compiler/exo/src/Dialect/IR/CircleNodes.h b/compiler/exo/src/Dialect/IR/CircleNodes.h
index 7be093103..c93bd1ab0 100644
--- a/compiler/exo/src/Dialect/IR/CircleNodes.h
+++ b/compiler/exo/src/Dialect/IR/CircleNodes.h
@@ -53,8 +53,8 @@ private:
  * @brief INSTANCE_NORM in circle
  */
 class CircleInstanceNorm final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
-      public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
   /// @note  Currently only support FLOAT32 as input node
diff --git a/compiler/exo/src/Dialect/IR/TFLNodes.h b/compiler/exo/src/Dialect/IR/TFLNodes.h
index 41a11e7c0..1642eb1f4 100644
--- a/compiler/exo/src/Dialect/IR/TFLNodes.h
+++ b/compiler/exo/src/Dialect/IR/TFLNodes.h
@@ -129,7 +129,9 @@ class TFLAveragePool2D final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::A
                                public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
 {
 public:
-  TFLAveragePool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+  TFLAveragePool2D() : _padding(Padding::UNDEFINED)
+  { /* empty */
+  }
 
 public:
   loco::Node *value(void) const { return at(0)->node(); }
@@ -240,9 +242,9 @@ private:
  * @brief DEPTHWISE_CONV_2D in TensorFlow Lite
  */
 class TFLDepthwiseConv2D final
-    : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::DEPTHWISE_CONV_2D>>,
-      public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
-      public TFLNodeMixin<TFLNodeTrait::Bias>
+  : public FixedArityNode<3, TFLNodeImpl<TFLOpcode::DEPTHWISE_CONV_2D>>,
+    public TFLNodeMixin<TFLNodeTrait::FusedActFunc>,
+    public TFLNodeMixin<TFLNodeTrait::Bias>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
@@ -325,7 +327,9 @@ class TFLMaxPool2D final : public FixedArityNode<1, TFLNodeImpl<TFLOpcode::MAX_P
                            public TFLNodeMixin<TFLNodeTrait::FusedActFunc>
 {
 public:
-  TFLMaxPool2D() : _padding(Padding::UNDEFINED) { /* empty */}
+  TFLMaxPool2D() : _padding(Padding::UNDEFINED)
+  { /* empty */
+  }
 
 public:
   loco::Node *value(void) const { return at(0)->node(); }
@@ -463,7 +467,7 @@ public:
 };
 
 class TFLSquaredDifference final
-    : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::SQUARED_DIFFERENCE>>
+  : public FixedArityNode<2, TFLNodeImpl<TFLOpcode::SQUARED_DIFFERENCE>>
 {
 public:
   TFLSquaredDifference() = default;
diff --git a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp
index f4bb10364..26cc561e1 100644
--- a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp
+++ b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp
@@ -116,7 +116,7 @@ private:
 };
 
 /**
- * @breif  Expand shape x and y to same rank by align right and filling with 1
+ * @brief  Expand shape x and y to same rank by align right and filling with 1
  */
 void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
 {
@@ -136,7 +136,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
 }
 
 /**
- * @breif  Returns shape of expanded dimension of input x and y having same rank
+ * @brief  Returns shape of expanded dimension of input x and y having same rank
  */
 loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
 {
diff --git a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp
index b68728b47..5a7e71dcf 100644
--- a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp
+++ b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.test.cpp
@@ -26,8 +26,6 @@
 #include <loco/Service/CanonicalShapeInferenceRule.h>
 #include <loco/Service/MultiDialectShapeInferenceRule.h>
 
-#include <stdex/Memory.h>
-
 #include <gtest/gtest.h>
 
 TEST(TFLShapeInferenceRuleTest, minimal_with_TFLRelu)
@@ -53,7 +51,7 @@ TEST(TFLShapeInferenceRuleTest, minimal_with_TFLRelu)
   loco::MultiDialectShapeInferenceRule rules;
 
   rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule);
 
   loco::apply(&rules).to(graph.g.get());
 
@@ -98,7 +96,7 @@ TEST(TFLShapeInferenceRuleTest, avgpool2d_valid)
   loco::MultiDialectShapeInferenceRule rules;
 
   rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule);
 
   loco::apply(&rules).to(graph.g.get());
 
@@ -145,7 +143,7 @@ TEST(TFLShapeInferenceRuleTest, avgpool2d_same)
   loco::MultiDialectShapeInferenceRule rules;
 
   rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule);
 
   loco::apply(&rules).to(graph.g.get());
 
diff --git a/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp
index 9326e5e58..df7aee49c 100644
--- a/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp
+++ b/compiler/exo/src/Dialect/Service/TFLTypeInferenceRule.test.cpp
@@ -24,8 +24,6 @@
 #include <loco/IR/CanonicalDialect.h>
 #include <loco/Service/TypeInference.h>
 
-#include <stdex/Memory.h>
-
 #include <gtest/gtest.h>
 
 TEST(TFLTypeInferenceRuleTest, minimal_with_TFLRelu)
diff --git a/compiler/exo/src/ExoFormattedGraph.h b/compiler/exo/src/ExoFormattedGraph.h
index 714e483b5..ec4173329 100644
--- a/compiler/exo/src/ExoFormattedGraph.h
+++ b/compiler/exo/src/ExoFormattedGraph.h
@@ -19,7 +19,7 @@
 
 #include <locop/FormattedGraph.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace exo
 {
@@ -47,7 +47,7 @@ public:
 public:
   std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
   {
-    return stdex::make_unique<NodeSummaryBuilder>(tlb);
+    return std::make_unique<NodeSummaryBuilder>(tlb);
   }
 };
 
diff --git a/compiler/exo/src/ExoOptimize.cpp b/compiler/exo/src/ExoOptimize.cpp
index d7278e900..752693f38 100644
--- a/compiler/exo/src/ExoOptimize.cpp
+++ b/compiler/exo/src/ExoOptimize.cpp
@@ -22,7 +22,7 @@
 
 #include <logo/Phase.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace exo
 {
@@ -32,36 +32,36 @@ void optimize(loco::Graph *g)
   logo::Phase phase;
   {
     // prepare type and shape before optimization
-    phase.emplace_back(stdex::make_unique<TypeInferencePass>());
-    phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+    phase.emplace_back(std::make_unique<TypeInferencePass>());
+    phase.emplace_back(std::make_unique<ShapeInferencePass>());
 
-    phase.emplace_back(stdex::make_unique<FoldReshapeOfConstPass>());
-    phase.emplace_back(stdex::make_unique<FoldTransposeOfConstPass>());
+    phase.emplace_back(std::make_unique<FoldReshapeOfConstPass>());
+    phase.emplace_back(std::make_unique<FoldTransposeOfConstPass>());
 
     if (get<Knob::UseFuseBiasAddPass>())
     {
-      phase.emplace_back(stdex::make_unique<FuseBiasAddPass>());
+      phase.emplace_back(std::make_unique<FuseBiasAddPass>());
     }
 
     if (get<Knob::UseFuseInstanceNormPass>())
     {
-      phase.emplace_back(stdex::make_unique<FuseInstanceNormPass>());
+      phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
     }
 
     if (get<Knob::UseFuseReluPass>())
     {
-      phase.emplace_back(stdex::make_unique<FuseReluPass>());
+      phase.emplace_back(std::make_unique<FuseReluPass>());
     }
-    phase.emplace_back(stdex::make_unique<FuseRsqrtPass>());
+    phase.emplace_back(std::make_unique<FuseRsqrtPass>());
 
     if (get<Knob::UseFuseSquaredDifferencePass>())
     {
-      phase.emplace_back(stdex::make_unique<FuseSquaredDifferencePass>());
+      phase.emplace_back(std::make_unique<FuseSquaredDifferencePass>());
     }
 
-    phase.emplace_back(stdex::make_unique<MergeConcatNodesPass>());
+    phase.emplace_back(std::make_unique<MergeConcatNodesPass>());
 
-    phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
   }
 
   logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
diff --git a/compiler/exo/src/GraphBlock.cpp b/compiler/exo/src/GraphBlock.cpp
index 0a45ce8ad..b26f2e8b6 100644
--- a/compiler/exo/src/GraphBlock.cpp
+++ b/compiler/exo/src/GraphBlock.cpp
@@ -19,7 +19,7 @@
 #include "Check.h"
 
 #include <loco.h>
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace
 {
@@ -114,7 +114,7 @@ template <FeatureLayout T> loco::FeatureEncode *make_feature_encode(loco::Node *
   EXO_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
   loco::Graph *g = input_for_encode->graph();
 
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
 
   encoder->perm(perm<T>());
 
@@ -130,7 +130,7 @@ template <FeatureLayout T> loco::FeatureDecode *make_feature_decode(loco::Node *
   EXO_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
   loco::Graph *g = input_for_decode->graph();
 
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
 
   decoder->perm(perm<T>());
 
@@ -146,7 +146,7 @@ template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *inp
   EXO_ASSERT(input_for_encode != nullptr, "filter should not be nullptr");
   loco::Graph *g = input_for_encode->graph();
 
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
 
   encoder->perm(perm<T>());
 
@@ -162,7 +162,7 @@ template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *inp
   EXO_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
   loco::Graph *g = input_for_decode->graph();
 
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
 
   decoder->perm(perm<T>());
 
@@ -179,7 +179,7 @@ loco::DepthwiseFilterDecode *make_dw_filter_decode(loco::Node *input_for_decode)
   EXO_ASSERT(input_for_decode != nullptr, "filter should not be nullptr");
   loco::Graph *g = input_for_decode->graph();
 
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::DepthwiseFilter>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::DepthwiseFilter>>();
 
   decoder->perm(perm<T>());
 
@@ -195,7 +195,7 @@ template <MatrixLayout T> loco::MatrixEncode *make_matrix_encode(loco::Node *inp
   EXO_ASSERT(input_for_encode != nullptr, "input should not be nullptr");
   loco::Graph *g = input_for_encode->graph();
 
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Matrix>>();
 
   encoder->perm(perm<T>());
 
@@ -211,7 +211,7 @@ template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *inp
   EXO_ASSERT(input_for_decode != nullptr, "input should not be nullptr");
   loco::Graph *g = input_for_decode->graph();
 
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Matrix>>();
 
   decoder->perm(perm<T>());
 
diff --git a/compiler/exo/src/GraphBlock.h b/compiler/exo/src/GraphBlock.h
index b771c821b..96e4b0831 100644
--- a/compiler/exo/src/GraphBlock.h
+++ b/compiler/exo/src/GraphBlock.h
@@ -72,7 +72,7 @@ template <MatrixLayout T> loco::MatrixEncode *make_matrix_encode(loco::Node *inp
 /// @brief Create a loco::MatrixDecode of given layout
 template <MatrixLayout T> loco::MatrixDecode *make_matrix_decode(loco::Node *input_for_decode);
 
-} // exo
+} // namespace exo
 
 //
 // DomainConverter
diff --git a/compiler/exo/src/Log.cpp b/compiler/exo/src/Log.cpp
index aa762968b..cbe9ecb73 100644
--- a/compiler/exo/src/Log.cpp
+++ b/compiler/exo/src/Log.cpp
@@ -17,7 +17,6 @@
 #include "Log.h"
 
 #include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
 
 #include <cstdlib>
 #include <iostream>
diff --git a/compiler/exo/src/LogHelper.cpp b/compiler/exo/src/LogHelper.cpp
index 7520b7ec8..153356632 100644
--- a/compiler/exo/src/LogHelper.cpp
+++ b/compiler/exo/src/LogHelper.cpp
@@ -72,7 +72,7 @@ namespace exo
 
 FormattedGraph fmt(loco::Graph *g)
 {
-  auto node_summary_builder = stdex::make_unique<NodeSummaryBuilderFactory>();
+  auto node_summary_builder = std::make_unique<NodeSummaryBuilderFactory>();
   return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
 }
 
diff --git a/compiler/exo/src/LoggingContext.cpp b/compiler/exo/src/LoggingContext.cpp
index 1c14d97b9..120a50e7b 100644
--- a/compiler/exo/src/LoggingContext.cpp
+++ b/compiler/exo/src/LoggingContext.cpp
@@ -18,7 +18,8 @@
 #include "Log.h" // To use LoggerConfig
 
 #include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace exo
 {
@@ -30,11 +31,11 @@ hermes::Context *LoggingContext::get(void)
   if (ctx == nullptr)
   {
     ctx = new hermes::Context;
-    ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-    ctx->config(stdex::make_unique<LoggerConfig>());
+    ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+    ctx->config(std::make_unique<LoggerConfig>());
   }
 
   return ctx;
 }
 
-} // namespac exo
+} // namespace exo
diff --git a/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp b/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp
index 005c42944..66c99121e 100644
--- a/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp
+++ b/compiler/exo/src/Pass/FoldTransposeOfConstPass.cpp
@@ -124,7 +124,7 @@ void fold_transpose_of_const(locoex::TFLTranspose *transpose)
       index_orig.at(perm->at<S32>(axis)) = index_new.at(axis);
 
     const_new->at<FLOAT32>(l.offset(shape_new, index_new)) =
-        const_orig->at<FLOAT32>(l.offset(shape_orig, index_orig));
+      const_orig->at<FLOAT32>(l.offset(shape_orig, index_orig));
   }
 
   // replace
diff --git a/compiler/exo/src/Pass/FuseBiasAddPass.cpp b/compiler/exo/src/Pass/FuseBiasAddPass.cpp
index 6338dff5d..0e797dc80 100644
--- a/compiler/exo/src/Pass/FuseBiasAddPass.cpp
+++ b/compiler/exo/src/Pass/FuseBiasAddPass.cpp
@@ -136,7 +136,7 @@ public:
   Fuser(LatterT *latter)
   {
     static_assert(std::is_same<LatterT, locoex::TFLAdd>::value ||
-                      std::is_same<LatterT, locoex::TFLSub>::value,
+                    std::is_same<LatterT, locoex::TFLSub>::value,
                   "wrong template type");
 
     _latter = latter;
@@ -185,7 +185,7 @@ template <class LatterT> locoex::TFLConst *Fuser<LatterT>::create_fused_bias_con
 
     for (uint32_t x = 0; x < bias->dim(0).value(); x++)
       new_bias->at<loco::DataType::FLOAT32>(x) = calc<LatterT>(
-          bias->at<loco::DataType::FLOAT32>(x), _const_node->at<loco::DataType::FLOAT32>(x));
+        bias->at<loco::DataType::FLOAT32>(x), _const_node->at<loco::DataType::FLOAT32>(x));
   }
 
   return new_bias;
@@ -252,14 +252,14 @@ struct Collector final : public locoex::TFLNodeMutableVisitor<void>
   void setCandidate(FormerT *former, LatterT *latter, locoex::TFLConst *const_node)
   {
     static_assert(std::is_same<LatterT, locoex::TFLAdd>::value ||
-                      std::is_same<LatterT, locoex::TFLSub>::value,
+                    std::is_same<LatterT, locoex::TFLSub>::value,
                   "wrong template type");
 
     if (!check_act_func(former))
       return;
 
     auto depth =
-        loco::shape_get(as_loco_node(former)).template as<loco::TensorShape>().dim(3).value();
+      loco::shape_get(as_loco_node(former)).template as<loco::TensorShape>().dim(3).value();
     auto const_shape = loco::shape_get(const_node).template as<loco::TensorShape>();
 
     if (const_shape.rank() == 1 and const_shape.dim(0) == depth)
diff --git a/compiler/exo/src/Pass/FuseInstanceNormPass.cpp b/compiler/exo/src/Pass/FuseInstanceNormPass.cpp
index 04d4a62cd..40aa9144f 100644
--- a/compiler/exo/src/Pass/FuseInstanceNormPass.cpp
+++ b/compiler/exo/src/Pass/FuseInstanceNormPass.cpp
@@ -291,7 +291,7 @@ bool InstanceNormPattern::matched()
   CHECK_OR_FALSE(add_as_variance);
 
   CHECK_OR_FALSE(
-      fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+    fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
 
   CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
   // TODO Support regarding broadcast
@@ -317,7 +317,7 @@ bool InstanceNormPattern::matched()
   locoex::TFLMul *mul_gamma_should_be = nullptr;
   locoex::TFLMean *mean_of_ifm_should_be = nullptr;
   CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
-                     .with_commutative_args_of(mul_as_scaled_mean));
+                   .with_commutative_args_of(mul_as_scaled_mean));
   CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
   CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
 #undef CHECK_OR_FALSE
diff --git a/compiler/exo/src/Pass/FuseReluPass.test.cpp b/compiler/exo/src/Pass/FuseReluPass.test.cpp
index 6f83d4dd0..fd6f88d9c 100644
--- a/compiler/exo/src/Pass/FuseReluPass.test.cpp
+++ b/compiler/exo/src/Pass/FuseReluPass.test.cpp
@@ -73,8 +73,8 @@ template <class FusedTFLType, locoex::FusedActFunc FusedActFunc> void test()
 {
   static_assert((std::is_same<FusedTFLType, locoex::TFLRelu>::value &&
                  FusedActFunc == locoex::FusedActFunc::RELU) ||
-                    (std::is_same<FusedTFLType, locoex::TFLRelu6>::value &&
-                     FusedActFunc == locoex::FusedActFunc::RELU6),
+                  (std::is_same<FusedTFLType, locoex::TFLRelu6>::value &&
+                   FusedActFunc == locoex::FusedActFunc::RELU6),
                 "wrong template type");
 
   exo::test::TestGraph g;
diff --git a/compiler/exo/src/Pass/MergeConcatNodesPass.cpp b/compiler/exo/src/Pass/MergeConcatNodesPass.cpp
index 8945fcfce..5885332a6 100644
--- a/compiler/exo/src/Pass/MergeConcatNodesPass.cpp
+++ b/compiler/exo/src/Pass/MergeConcatNodesPass.cpp
@@ -39,8 +39,8 @@ bool canMerge(locoex::TFLConcatenation *node1, locoex::TFLConcatenation *node2)
     case locoex::FusedActFunc::RELU6:
       return true;
 
-    // case locoex::FusedActFunc::TANH:
-    //   return false;
+      // case locoex::FusedActFunc::TANH:
+      //   return false;
 
     default:
       INTERNAL_EXN_V("Unknown FusedActFunc", oops::to_uint32(node1->fusedActivationFunction()));
diff --git a/compiler/exo/src/Pass/ShapeInferencePass.cpp b/compiler/exo/src/Pass/ShapeInferencePass.cpp
index bc60f91c4..367d7da91 100644
--- a/compiler/exo/src/Pass/ShapeInferencePass.cpp
+++ b/compiler/exo/src/Pass/ShapeInferencePass.cpp
@@ -49,9 +49,9 @@ bool ShapeInferencePass::run(loco::Graph *g)
   loco::MultiDialectShapeInferenceRule rules;
 
   rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule)
-      .bind(locoex::CircleDialect::get(), &circle_rule)
-      .bind(locoex::COpDialect::get(), &cop_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule)
+    .bind(locoex::CircleDialect::get(), &circle_rule)
+    .bind(locoex::COpDialect::get(), &cop_rule);
 
   return loco::apply(&rules).to(g);
 }
diff --git a/compiler/exo/src/Pass/TypeInferencePass.cpp b/compiler/exo/src/Pass/TypeInferencePass.cpp
index 31d4f13b6..52a9d0c33 100644
--- a/compiler/exo/src/Pass/TypeInferencePass.cpp
+++ b/compiler/exo/src/Pass/TypeInferencePass.cpp
@@ -47,9 +47,9 @@ bool TypeInferencePass::run(loco::Graph *g)
   loco::MultiDialectTypeInferenceRule rules;
 
   rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(locoex::TFLDialect::get(), &tfl_rule)
-      .bind(locoex::CircleDialect::get(), &circle_rule)
-      .bind(locoex::COpDialect::get(), &cop_rule);
+    .bind(locoex::TFLDialect::get(), &tfl_rule)
+    .bind(locoex::CircleDialect::get(), &circle_rule)
+    .bind(locoex::COpDialect::get(), &cop_rule);
 
   return loco::apply(&rules).to(g);
 }
diff --git a/compiler/exo/src/ProgressReporter.h b/compiler/exo/src/ProgressReporter.h
index b0f420df9..83f327309 100644
--- a/compiler/exo/src/ProgressReporter.h
+++ b/compiler/exo/src/ProgressReporter.h
@@ -28,7 +28,7 @@ class ProgressReporter : public logo::PhaseEventListener
 {
 public:
   ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
-      : _graph{graph}, _strategy{strategy}
+    : _graph{graph}, _strategy{strategy}
   {
     // DO NOTHING
   }
diff --git a/compiler/exo/src/TFLite/TFLExporter.cpp b/compiler/exo/src/TFLite/TFLExporter.cpp
index cf002b3e1..71131b725 100644
--- a/compiler/exo/src/TFLite/TFLExporter.cpp
+++ b/compiler/exo/src/TFLite/TFLExporter.cpp
@@ -18,16 +18,15 @@
 
 #include "TFLExporterImpl.h"
 
-#include <stdex/Memory.h>
-
 #include <oops/InternalExn.h>
 
+#include <memory>
 #include <fstream>
 
 namespace exo
 {
 
-TFLExporter::TFLExporter(loco::Graph *graph) : _impl(stdex::make_unique<Impl>(graph))
+TFLExporter::TFLExporter(loco::Graph *graph) : _impl(std::make_unique<Impl>(graph))
 {
   // NOTHING TO DO
 }
diff --git a/compiler/exo/src/TFLite/TFLExporterImpl.cpp b/compiler/exo/src/TFLite/TFLExporterImpl.cpp
index 07adbfb9d..1f6d1bd59 100644
--- a/compiler/exo/src/TFLite/TFLExporterImpl.cpp
+++ b/compiler/exo/src/TFLite/TFLExporterImpl.cpp
@@ -88,7 +88,7 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<OpCode, uint3
         INTERNAL_EXN("Cannot find code for custom op");
 
       operator_codes_vec[idx] =
-          CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
+        CreateOperatorCode(builder, it.first.opcode, builder.CreateString(custom_code->second));
     }
   }
   return builder.CreateVector(operator_codes_vec);
@@ -146,7 +146,7 @@ void TFLExporter::Impl::exportGraph(loco::Graph *graph)
 
   // encode operator codes
   auto operator_codes =
-      encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
+    encodeOperatorCodes(_builder, gd._operator_codes, gd._custom_operator_codes);
 
   // Subgraphs
   Offset<SubGraph> subgraph = exportSubgraph(gd);
diff --git a/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp b/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp
index 866ede6a2..c337b38d3 100644
--- a/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp
+++ b/compiler/exo/src/TFLite/TFLExporterImpl.test.cpp
@@ -23,7 +23,8 @@
 #include "Knob.h"
 
 #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -56,7 +57,7 @@ template <> loco::FeatureEncode *TFLExporterImplTests::make_node(void)
 {
   loco::FeatureEncode *encode_layer = graph()->nodes()->create<loco::FeatureEncode>();
 
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
   (*encoder->perm())[loco::FeatureAxis::Count] = 0;
   (*encoder->perm())[loco::FeatureAxis::Depth] = 1;
   (*encoder->perm())[loco::FeatureAxis::Height] = 2;
@@ -70,7 +71,7 @@ template <> loco::FeatureDecode *TFLExporterImplTests::make_node(void)
 {
   loco::FeatureDecode *decode_layer = graph()->nodes()->create<loco::FeatureDecode>();
 
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
   (*decoder->perm())[loco::FeatureAxis::Count] = 0;
   (*decoder->perm())[loco::FeatureAxis::Depth] = 1;
   (*decoder->perm())[loco::FeatureAxis::Height] = 2;
@@ -227,7 +228,7 @@ TEST(TFLExporterImplTest, Transpose_simple)
 
     auto bufs = (model->buffers());
     auto *perm_buf =
-        reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
+      reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
 
     ASSERT_EQ(1, perm_buf[0]);
     ASSERT_EQ(2, perm_buf[1]);
@@ -285,7 +286,7 @@ TEST(TFLExporterImplTest, Transpose_from_FilterEncode_FilterDecode)
 
     auto bufs = (model->buffers());
     auto *perm_buf =
-        reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
+      reinterpret_cast<const int32_t *>(bufs->Get(perm_tensor->buffer())->data()->data());
     ASSERT_EQ(3, perm_buf[0]);
     ASSERT_EQ(0, perm_buf[1]);
     ASSERT_EQ(1, perm_buf[2]);
diff --git a/compiler/exo/src/TFLite/TFLExporterUtils.cpp b/compiler/exo/src/TFLite/TFLExporterUtils.cpp
index d35afc9aa..daec03c40 100644
--- a/compiler/exo/src/TFLite/TFLExporterUtils.cpp
+++ b/compiler/exo/src/TFLite/TFLExporterUtils.cpp
@@ -78,13 +78,13 @@ tflite::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *
   //
   // NOTE input and output 'feature' map are shape of NHWC
   bool same_padding_criterion_1 =
-      (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
-      (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+    (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+    (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
 
   // For same padding, rear padding is same or bigger than front padding by at most 1
   bool same_padding_criterion_2 =
-      (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
-      (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+    (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+    (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
 
   if (same_padding_criterion_1 && same_padding_criterion_2)
     return tflite::Padding_SAME;
@@ -120,8 +120,7 @@ void registerGraphIOName(loco::Graph *graph, SerializedModelData &gd)
   }
 }
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace
@@ -147,7 +146,7 @@ private:
 void set_tensor_index(loco::Node *node, const TFLTensorIndex &tensor_id)
 {
   assert(node->annot<TFLTensorIndexAnnotation>() == nullptr);
-  node->annot(stdex::make_unique<TFLTensorIndexAnnotation>(tensor_id));
+  node->annot(std::make_unique<TFLTensorIndexAnnotation>(tensor_id));
 }
 
 TFLTensorIndex get_tensor_index(loco::Node *node)
diff --git a/compiler/exo/src/TFLite/TFLExporterUtils.h b/compiler/exo/src/TFLite/TFLExporterUtils.h
index dbd7a52fb..f2fe6075e 100644
--- a/compiler/exo/src/TFLite/TFLExporterUtils.h
+++ b/compiler/exo/src/TFLite/TFLExporterUtils.h
@@ -65,7 +65,7 @@ namespace tflite_detail
 {
 
 /**
- * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ * @brief Record the information of T/F Lite SubGraph and its mapping to loco
  */
 struct SubGraphContext
 {
diff --git a/compiler/exo/src/TFLite/TFLOperationExporter.cpp b/compiler/exo/src/TFLite/TFLOperationExporter.cpp
index 79b5b6287..b7a0ffea8 100644
--- a/compiler/exo/src/TFLite/TFLOperationExporter.cpp
+++ b/compiler/exo/src/TFLite/TFLOperationExporter.cpp
@@ -81,13 +81,19 @@ public:
   void visit(loco::ReLU *) final;
   void visit(loco::ReLU6 *) final;
   void visit(loco::Tanh *) final;
-  void visit(loco::Push *) final { /* DO NOTHING */}
-  void visit(loco::Pull *) final { /* DO NOTHING */}
+  void visit(loco::Push *) final
+  { /* DO NOTHING */
+  }
+  void visit(loco::Pull *) final
+  { /* DO NOTHING */
+  }
   void visit(loco::FeatureEncode *) final;
   void visit(loco::FeatureDecode *) final;
   void visit(loco::FilterEncode *) final;
   void visit(loco::DepthwiseFilterEncode *) final;
-  void visit(loco::ConstGen *) final { /* skip, everything is done in exportOpDefinedTensors */}
+  void visit(loco::ConstGen *) final
+  { /* skip, everything is done in exportOpDefinedTensors */
+  }
   void visit(loco::MaxPool2D *) final;
   void visit(loco::AvgPool2D *) final;
   void visit(loco::Conv2D *) final;
@@ -227,7 +233,7 @@ void OperationExporter::visit(locoex::TFLFullyConnected *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   auto options =
-      CreateFullyConnectedOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
+    CreateFullyConnectedOptions(builder, to_tflite_actfunc(node->fusedActivationFunction()));
 
   // Make FULLY_CONNECTED operator
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -367,8 +373,8 @@ void OperationExporter::visit(locoex::TFLTranspose *node)
   auto options = CreateTransposeOptions(builder);
 
   auto op_offset =
-      CreateOperator(builder, op_idx, inputs, outputs,
-                     tflite::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
+    CreateOperator(builder, op_idx, inputs, outputs,
+                   tflite::BuiltinOptions::BuiltinOptions_TransposeOptions, options.Union());
   gd._operators.push_back(op_offset);
 }
 
@@ -385,7 +391,7 @@ void OperationExporter::visit(locoex::TFLTransposeConv *node)
   auto outputs = builder.CreateVector(outputs_vec);
   tflite::Padding padding = getOpPadding(node->padding());
   auto options =
-      CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
+    CreateTransposeConvOptions(builder, padding, node->stride()->w(), node->stride()->h());
 
   // Make TRANSPOSE_CONV operator
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -397,7 +403,7 @@ template <class TFLPool2D>
 void OperationExporter::export_pool_2d(TFLPool2D *node, tflite::BuiltinOperator builtin_op)
 {
   EXO_ASSERT(builtin_op == tflite::BuiltinOperator_MAX_POOL_2D ||
-                 builtin_op == tflite::BuiltinOperator_AVERAGE_POOL_2D,
+               builtin_op == tflite::BuiltinOperator_AVERAGE_POOL_2D,
              "should be maxpool or avgpool");
   EXO_ASSERT(node->padding() != locoex::Padding::UNDEFINED, "Padding is not set");
 
@@ -458,10 +464,10 @@ void OperationExporter::visit(loco::MaxPool2D *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   tflite::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical(), node->window()->horizontal(),
-                                     node->window()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+                        node->window()->horizontal(), node->window()->vertical());
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                   tflite::BuiltinOptions_Pool2DOptions, options.Union());
   gd._operators.push_back(op_offset);
@@ -478,10 +484,10 @@ void OperationExporter::visit(loco::AvgPool2D *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   tflite::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical(), node->window()->horizontal(),
-                                     node->window()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreatePool2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical(),
+                        node->window()->horizontal(), node->window()->vertical());
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                   tflite::BuiltinOptions_Pool2DOptions, options.Union());
   gd._operators.push_back(op_offset);
@@ -504,7 +510,7 @@ void OperationExporter::visit(loco::Conv2D *node)
   std::vector<float> bias_vec_data(bias_vec_size); // initialized as zero vector
 
   auto bias_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
 
   auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
 
@@ -516,7 +522,7 @@ void OperationExporter::visit(loco::Conv2D *node)
   auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
 
   auto bias_tensor_offset =
-      CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+    CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
   gd._tensors.push_back(bias_tensor_offset);
 
   // Make input, output and options for operator
@@ -526,9 +532,9 @@ void OperationExporter::visit(loco::Conv2D *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   tflite::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
-  auto options = CreateConv2DOptions(builder, padding, node->stride()->horizontal(),
-                                     node->stride()->vertical());
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+  auto options =
+    CreateConv2DOptions(builder, padding, node->stride()->horizontal(), node->stride()->vertical());
 
   // Make CONV_2D operator
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
@@ -558,7 +564,7 @@ void OperationExporter::visit(loco::TransposedConv2D *node)
   }
 
   auto outshape_vec_offset = builder.CreateVector(
-      reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
+    reinterpret_cast<uint8_t *>(outshape_vec_data.data()), raw_outshape_vec_size);
 
   auto outshape_buffer_offset = CreateBuffer(builder, outshape_vec_offset);
 
@@ -607,7 +613,7 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
   size_t raw_bias_vec_size = bias_vec_size * sizeof(int32_t);
   std::vector<float> bias_vec_data(bias_vec_size);
   auto bias_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(bias_vec_data.data()), raw_bias_vec_size);
 
   auto bias_buffer_offset = CreateBuffer(builder, bias_vec_offset);
 
@@ -619,7 +625,7 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
   auto name_offset = builder.CreateString("t_" + std::to_string(bias_tensor_id));
 
   auto bias_tensor_offset =
-      CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
+    CreateTensor(builder, bias_vec_shape_offset, TensorType_FLOAT32, bias_buffer_id, name_offset);
   gd._tensors.push_back(bias_tensor_offset);
 
   std::vector<int32_t> inputs_vec{get_tensor_index(node->ifm()), get_tensor_index(node->ker()),
@@ -628,13 +634,13 @@ void OperationExporter::visit(loco::DepthwiseConv2D *node)
   auto inputs = builder.CreateVector(inputs_vec);
   auto outputs = builder.CreateVector(outputs_vec);
   tflite::Padding padding = getOpPadding(
-      node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
+    node->pad(), node->stride(), ShapeInference::get(node->ifm()), ShapeInference::get(node));
 
   int32_t ifm_channel_size = ShapeInference::get(node->ifm())._dims[3];
   // multiplier = bias_vec_size(output_size)/ifm_channel_size
   auto options =
-      CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
-                                   node->stride()->vertical(), bias_vec_size / ifm_channel_size);
+    CreateDepthwiseConv2DOptions(builder, padding, node->stride()->horizontal(),
+                                 node->stride()->vertical(), bias_vec_size / ifm_channel_size);
 
   auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
                                   tflite::BuiltinOptions_DepthwiseConv2DOptions, options.Union());
@@ -668,7 +674,7 @@ void OperationExporter::visit(loco::TensorReduce *node)
 
   size_t raw_axes_vec_size = axes_vec_size * sizeof(int32_t);
   auto axes_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(axes_vec.data()), raw_axes_vec_size);
 
   auto axes_buffer_offset = CreateBuffer(builder, axes_vec_offset);
 
@@ -680,7 +686,7 @@ void OperationExporter::visit(loco::TensorReduce *node)
   auto name_offset = builder.CreateString("t_" + std::to_string(axes_tensor_id));
 
   auto axes_tensor_offset =
-      CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
+    CreateTensor(builder, axes_vec_shape_offset, TensorType_INT32, axes_buffer_id, name_offset);
   gd._tensors.push_back(axes_tensor_offset);
 
   std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), axes_tensor_id};
@@ -743,7 +749,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
   constexpr size_t raw_perm_vec_size = perm_vec_size * sizeof(int32_t);
 
   auto perm_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(perm_vec_data.data()), raw_perm_vec_size);
 
   auto perm_buffer_offset = CreateBuffer(builder, perm_vec_offset);
 
@@ -755,7 +761,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
   auto name_offset = builder.CreateString("t_" + std::to_string(perm_tensor_id));
 
   auto perm_tensor_offset =
-      CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
+    CreateTensor(builder, perm_vec_shape_offset, TensorType_INT32, perm_buffer_id, name_offset);
   gd._tensors.push_back(perm_tensor_offset);
 
   // Create permutation node
@@ -769,7 +775,7 @@ void exportAsTranspose(loco::Node *node, FlatBufferBuilder &builder,
   constexpr auto options_type = tflite::BuiltinOptions::BuiltinOptions_TransposeOptions;
 
   auto transpose_offset =
-      CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
+    CreateOperator(builder, op_idx, inputs, outputs, options_type, options.Union());
   gd._operators.push_back(transpose_offset);
 }
 
@@ -854,11 +860,11 @@ void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
   //      but also by input.
 
   auto input_shape_shape_vec_offset =
-      builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
+    builder.CreateVector(std::vector<int32_t>{(int32_t)new_shape_vec.size()});
 
   size_t input_shape_vec_size = new_shape_vec.size() * sizeof(int32_t);
   auto input_shape_input_vec_offset =
-      builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
+    builder.CreateVector(reinterpret_cast<uint8_t *>(new_shape_vec.data()), input_shape_vec_size);
   auto input_shape_buffer_offset = CreateBuffer(builder, input_shape_input_vec_offset);
 
   const auto input_shape_buffer_id = static_cast<uint32_t>(gd._buffers.size());
@@ -867,7 +873,7 @@ void exportAsReshape(loco::Node *node, FlatBufferBuilder &builder,
   auto input_shape_tensor_id = static_cast<int32_t>(gd._tensors.size());
   auto name_offset = builder.CreateString("t_" + std::to_string(input_shape_tensor_id));
   auto input_shape_tensor_offset = CreateTensor(
-      builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
+    builder, input_shape_shape_vec_offset, TensorType_INT32, input_shape_buffer_id, name_offset);
   gd._tensors.push_back(input_shape_tensor_offset);
 
   uint32_t op_idx = gd.registerBuiltinOpcode(tflite::BuiltinOperator_RESHAPE);
@@ -1069,7 +1075,7 @@ void OperationExporter::visit(loco::TensorConstantPad *node)
   auto padding_shape_vec_ptr = builder.CreateVector(std::vector<int32_t>{padding_vec_size, 2});
   // create tensor
   auto padding_tensor_ptr =
-      CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
+    CreateTensor(builder, padding_shape_vec_ptr, TensorType_INT32, padding_buffer_id);
   // get tensor id
   const auto padding_tensor_id = static_cast<int32_t>(gd._tensors.size());
 
diff --git a/compiler/exo/src/TFLite/TFLTensorExporter.cpp b/compiler/exo/src/TFLite/TFLTensorExporter.cpp
index 23c810ed5..2fb6f0c13 100644
--- a/compiler/exo/src/TFLite/TFLTensorExporter.cpp
+++ b/compiler/exo/src/TFLite/TFLTensorExporter.cpp
@@ -89,7 +89,7 @@ struct NoOpDetector final : public loco::CanonicalNodeMutableVisitor<bool>
   bool visit(loco::FeatureEncode *node) final
   {
     auto encoder =
-        loco::must_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
+      loco::must_cast<loco::PermutingEncoder<loco::Domain::Feature> *>(node->encoder());
     auto perm = encoder->perm();
     return isNHWC(perm);
   }
@@ -97,7 +97,7 @@ struct NoOpDetector final : public loco::CanonicalNodeMutableVisitor<bool>
   bool visit(loco::FeatureDecode *node) final
   {
     auto decoder =
-        loco::must_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
+      loco::must_cast<loco::PermutingDecoder<loco::Domain::Feature> *>(node->decoder());
     auto perm = decoder->perm();
     return isNHWC(perm);
   }
diff --git a/compiler/exo/src/TFLite/TFLTypeInference.cpp b/compiler/exo/src/TFLite/TFLTypeInference.cpp
index 8d6bb8d8c..56817ee3b 100644
--- a/compiler/exo/src/TFLite/TFLTypeInference.cpp
+++ b/compiler/exo/src/TFLite/TFLTypeInference.cpp
@@ -31,8 +31,6 @@
 
 #include <oops/InternalExn.h>
 
-#include <stdex/Memory.h>
-
 #include <stdexcept>
 #include <type_traits>
 
diff --git a/compiler/exo/src/TFLite/TFLTypeInference.test.cpp b/compiler/exo/src/TFLite/TFLTypeInference.test.cpp
index 8a3a08da9..054dad1f1 100644
--- a/compiler/exo/src/TFLite/TFLTypeInference.test.cpp
+++ b/compiler/exo/src/TFLite/TFLTypeInference.test.cpp
@@ -18,12 +18,9 @@
 #include "Pass/TypeInferencePass.h"
 
 #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
 
 #include <gtest/gtest.h>
 
-using stdex::make_unique;
-
 namespace
 {
 
diff --git a/compiler/exo/src/TestGraph.h b/compiler/exo/src/TestGraph.h
index f919cc9ae..46c2264ab 100644
--- a/compiler/exo/src/TestGraph.h
+++ b/compiler/exo/src/TestGraph.h
@@ -23,8 +23,6 @@
 
 #include <loco.h>
 
-#include <stdex/Memory.h>
-
 #include <cassert>
 
 namespace exo
@@ -284,7 +282,7 @@ public:
   {
     filterEncode = exo::make_filter_encode<exo::FilterLayout::HWIO>(pull); // from Tensorflow
     filterDecode =
-        exo::make_filter_decode<exo::FilterLayout::OHWI>(filterEncode); // to Tensorflow Lite
+      exo::make_filter_decode<exo::FilterLayout::OHWI>(filterEncode); // to Tensorflow Lite
     complete(filterDecode);
   }
 };
diff --git a/compiler/exo/src/TestHelper.h b/compiler/exo/src/TestHelper.h
index 1a3de50f5..bacaa3e5e 100644
--- a/compiler/exo/src/TestHelper.h
+++ b/compiler/exo/src/TestHelper.h
@@ -26,7 +26,7 @@
 
 #include <loco.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -54,11 +54,11 @@ public:
   TypeShapeReadyPhase()
   {
     // Type and Shape inference is prerequisite for run other test
-    _phase.emplace_back(stdex::make_unique<::exo::TypeInferencePass>());
-    _phase.emplace_back(stdex::make_unique<::exo::ShapeInferencePass>());
+    _phase.emplace_back(std::make_unique<::exo::TypeInferencePass>());
+    _phase.emplace_back(std::make_unique<::exo::ShapeInferencePass>());
   }
 
-  template <typename PassT> void add_pass() { _phase.emplace_back(stdex::make_unique<PassT>()); }
+  template <typename PassT> void add_pass() { _phase.emplace_back(std::make_unique<PassT>()); }
 
   void run(loco::Graph *g)
   {
diff --git a/compiler/foder/CMakeLists.txt b/compiler/foder/CMakeLists.txt
index 6a413c61e..2e44eefa6 100644
--- a/compiler/foder/CMakeLists.txt
+++ b/compiler/foder/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_library(foder INTERFACE)
 target_include_directories(foder INTERFACE include)
+target_link_libraries(foder INTERFACE nncc_coverage)
diff --git a/compiler/foder/include/foder/FileLoader.h b/compiler/foder/include/foder/FileLoader.h
index e2143ecf6..cdf5ddef7 100644
--- a/compiler/foder/include/foder/FileLoader.h
+++ b/compiler/foder/include/foder/FileLoader.h
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#ifndef __FODER_FILE_LOADER_H__
+#define __FODER_FILE_LOADER_H__
+
 #include <fstream>
 #include <vector>
 
@@ -30,7 +33,7 @@ public:
 
 public:
   FileLoader(const FileLoader &) = delete;
-  FileLoader(FileLoader &&) = delete;
+  FileLoader &operator=(const FileLoader &) = delete;
 
 public:
   DataBuffer load(void) const
@@ -38,7 +41,7 @@ public:
     std::ifstream file(_path, std::ios::binary | std::ios::in);
     if (!file.good())
     {
-      std::string errmsg = "ERROR: Failed to open file: " + _path;
+      std::string errmsg = "Failed to open file: " + _path;
       throw std::runtime_error(errmsg.c_str());
     }
 
@@ -55,7 +58,7 @@ public:
     file.read(data.data(), fileSize);
     if (file.fail())
     {
-      std::string errmsg = "ERROR: Failed to read file: " + _path;
+      std::string errmsg = "Failed to read file: " + _path;
       throw std::runtime_error(errmsg.c_str());
     }
 
@@ -67,3 +70,5 @@ private:
 };
 
 } // namespace foder
+
+#endif // __FODER_FILE_LOADER_H__
diff --git a/compiler/gen-core/CMakeLists.txt b/compiler/gen-core/CMakeLists.txt
deleted file mode 100644
index 3cee4cecf..000000000
--- a/compiler/gen-core/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-nnas_find_package(HDF5 QUIET)
-
-if(NOT HDF5_FOUND)
-  return()
-endif(NOT HDF5_FOUND)
-
-nnas_find_package(TensorFlow QUIET)
-
-file(GLOB_RECURSE SOURCES "src/*.cpp")
-
-add_library(gen_core STATIC ${SOURCES})
-set_target_properties(gen_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(gen_core PUBLIC include)
-target_include_directories(gen_core PRIVATE ${HDF5_INCLUDE_DIRS})
-target_link_libraries(gen_core ${HDF5_CXX_LIBRARIES})
-target_link_libraries(gen_core tfinfo_v2)
-target_link_libraries(gen_core angkor)
diff --git a/compiler/gen-core/README.md b/compiler/gen-core/README.md
deleted file mode 100644
index cc98ef00b..000000000
--- a/compiler/gen-core/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# gen-core
-
-_gen-core_ is a common library used by _gen-tf-input_, _gen-tf-output_, and _gen-tflite-output_.
diff --git a/compiler/gen-core/include/gencore/HDF5Common.h b/compiler/gen-core/include/gencore/HDF5Common.h
deleted file mode 100644
index 87367c99c..000000000
--- a/compiler/gen-core/include/gencore/HDF5Common.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __HDF5COMMON_H__
-#define __HDF5COMMON_H__
-
-#include <string>
-
-namespace gencore
-{
-
-/**
- * @brief Construct HDF5-compatible dataset name from a given string
- *
- * When someone attempts to access 'A/B/C' dataset, HDF5 tries to open
- * dataset C in group B in top-level group A, which means that dataset
- * names SHOULD NOT contain '/' in it.
- *
- * This mangle function replaces all the occurence of '/' in a given
- * string with '_' to construct HDF5-compatible dataset name.
- */
-std::string mangle(const std::string &);
-
-#if 0
-Let us assume that a tensor context includes N + 1 tensors.
-
-Then, HDF5 export will generate a HDF5 file whose structure is given as follows:
-[value group]/
-  [file 0] <- A dataset that contains the value of 1st (=0) tensor
-  [file 1]
-  ...
-  [file N]
-[name group]/
-  [file 0] <- An attribute that contains the name of 1st (=0) tensor
-  [file 1]
-  ...
-  [file N]
-#endif
-
-/// @brief Return the name of "value group"
-std::string value_grpname(void);
-/// @brief Return the name of n-th tensor dataset
-std::string value_filename(uint32_t n);
-
-/// @brief Return the name of "name group"
-std::string name_grpname(void);
-/// @brief Return the name of n-th tensor attribute
-std::string name_filename(uint32_t n);
-
-} // namespace gencore
-
-#endif // __HDF5COMMON_H__
diff --git a/compiler/gen-core/include/gencore/HDF5Exporter.h b/compiler/gen-core/include/gencore/HDF5Exporter.h
deleted file mode 100644
index 10cc1c613..000000000
--- a/compiler/gen-core/include/gencore/HDF5Exporter.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GENCORE_HDF5EXPORTER_H__
-#define __GENCORE_HDF5EXPORTER_H__
-
-#include "HDF5Common.h"
-
-#include <angkor/TensorShape.h>
-#include <nncc/core/ADT/tensor/Reader.h>
-
-#include <H5Cpp.h>
-
-namespace gencore
-{
-
-class H5Exporter
-{
-public:
-  H5Exporter(const std::string &path) : _file{path.c_str(), H5F_ACC_TRUNC}
-  {
-    _value_grp = _file.createGroup(value_grpname());
-    _name_grp = _file.createGroup(name_grpname());
-  }
-
-public:
-  template <typename DT>
-  void write(uint32_t nth, const std::string &name, const angkor::TensorShape &shape,
-             const nncc::core::ADT::tensor::Reader<DT> &buf_reader);
-
-private:
-  H5::H5File _file;
-  H5::Group _value_grp;
-  H5::Group _name_grp;
-};
-
-} // namespace gencore
-
-#endif // __GENCORE_HDF5EXPORTER_H__
diff --git a/compiler/gen-core/include/gencore/HDF5Importer.h b/compiler/gen-core/include/gencore/HDF5Importer.h
deleted file mode 100644
index 853744199..000000000
--- a/compiler/gen-core/include/gencore/HDF5Importer.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GENCORE_HDF5IMPORTER_H__
-#define __GENCORE_HDF5IMPORTER_H__
-
-#include "HDF5Common.h"
-
-#include <tfinfo-v2/TensorSignature.h>
-
-#include <angkor/TensorShape.h>
-#include <nncc/core/ADT/tensor/Accessor.h>
-
-#include <H5Cpp.h>
-
-namespace gencore
-{
-
-class HDF5Importer
-{
-public:
-  HDF5Importer(const std::string &path) : _file{path, H5F_ACC_RDONLY}
-  {
-    _value_grp = _file.openGroup(value_grpname());
-  }
-
-public:
-  /**
-   * @brief Reads tensor data from file and store it into buf_accessor
-   */
-  template <typename DT>
-  void read(uint32_t nth, const std::string &name, const angkor::TensorShape &shape,
-            nncc::core::ADT::tensor::Accessor<DT> *buf_accessor);
-
-private:
-  H5::H5File _file;
-  H5::Group _value_grp;
-};
-
-} // namespace gencore
-
-#endif // __GENCORE_HDF5IMPORTER_H__
diff --git a/compiler/gen-core/requires.cmake b/compiler/gen-core/requires.cmake
deleted file mode 100644
index a424f1f4a..000000000
--- a/compiler/gen-core/requires.cmake
+++ /dev/null
@@ -1,2 +0,0 @@
-require("tfinfo-v2")
-require("angkor")
diff --git a/compiler/gen-core/src/HDF5Common.cpp b/compiler/gen-core/src/HDF5Common.cpp
deleted file mode 100644
index c254d9e1e..000000000
--- a/compiler/gen-core/src/HDF5Common.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gencore/HDF5Common.h"
-
-namespace gencore
-{
-
-std::string mangle(const std::string &name)
-{
-  std::string res = name;
-
-  for (uint32_t n = 0; n < res.size(); ++n)
-  {
-    if (res.at(n) == '/')
-    {
-      res.at(n) = '_';
-    }
-  }
-
-  return res;
-}
-
-std::string value_grpname(void) { return "value"; }
-std::string value_filename(uint32_t n) { return std::to_string(n); }
-
-std::string name_grpname(void) { return "name"; }
-std::string name_filename(uint32_t n) { return std::to_string(n); }
-
-} // namespace gencore
diff --git a/compiler/gen-core/src/HDF5Exporter.cpp b/compiler/gen-core/src/HDF5Exporter.cpp
deleted file mode 100644
index 6b77710c4..000000000
--- a/compiler/gen-core/src/HDF5Exporter.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gencore/HDF5Exporter.h"
-
-#include <angkor/TensorShape.h>
-#include <nncc/core/ADT/tensor/IndexEnumerator.h>
-#include <nncc/core/ADT/tensor/LexicalLayout.h>
-#include <nncc/core/ADT/tensor/Reader.h>
-
-#include <H5Cpp.h>
-
-namespace
-{
-
-template <typename DT> H5::PredType get_h5_datatype();
-
-template <> H5::PredType get_h5_datatype<float>() { return H5::PredType::NATIVE_FLOAT; }
-
-template <typename DT> H5::PredType get_h5_store_format();
-
-template <> H5::PredType get_h5_store_format<float>() { return H5::PredType::IEEE_F32BE; }
-
-} // namespace
-
-namespace gencore
-{
-
-template <typename DT>
-void H5Exporter::write(uint32_t nth, const std::string &name, const angkor::TensorShape &shape,
-                       const nncc::core::ADT::tensor::Reader<DT> &buf_reader)
-{
-  // Record tensor values
-  {
-    const auto rank = shape.rank();
-
-    hsize_t dims[rank];
-
-    for (uint32_t axis = 0; axis < rank; ++axis)
-    {
-      dims[axis] = shape.dim(axis);
-    }
-
-    H5::DataSpace dataspace(rank, dims);
-
-    auto dataset =
-        _value_grp.createDataSet(value_filename(nth), get_h5_store_format<DT>(), dataspace);
-
-    DT *h5_data = new DT[nncc::core::ADT::tensor::num_elements(shape)];
-    {
-      using nncc::core::ADT::tensor::IndexEnumerator;
-      using nncc::core::ADT::tensor::LexicalLayout;
-
-      LexicalLayout layout{};
-      for (IndexEnumerator e{shape}; e.valid(); e.advance())
-      {
-        auto i = e.current();
-        h5_data[layout.offset(shape, i)] = buf_reader.at(i);
-      }
-    }
-
-    dataset.write(h5_data, get_h5_datatype<DT>());
-
-    delete[] h5_data;
-  }
-
-  // Record name
-  {
-    H5::DataSpace name_dataspace(H5S_SCALAR);
-    H5::StrType name_datatype(H5::PredType::C_S1, name.size());
-
-    auto name_attr = _name_grp.createAttribute(value_filename(nth), name_datatype, name_dataspace);
-
-    name_attr.write(name_datatype, name);
-  }
-}
-
-// template instantiation
-template void H5Exporter::write<float>(uint32_t, const std::string &, const angkor::TensorShape &,
-                                       const nncc::core::ADT::tensor::Reader<float> &);
-
-} // namespace gencore
diff --git a/compiler/gen-core/src/HDF5Importer.cpp b/compiler/gen-core/src/HDF5Importer.cpp
deleted file mode 100644
index 83691b20b..000000000
--- a/compiler/gen-core/src/HDF5Importer.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gencore/HDF5Importer.h"
-#include "gencore/HDF5Common.h"
-
-#include <angkor/TensorShape.h>
-#include <nncc/core/ADT/tensor/IndexEnumerator.h>
-#include <nncc/core/ADT/tensor/LexicalLayout.h>
-#include <nncc/core/ADT/tensor/Overlay.h>
-#include <nncc/core/ADT/tensor/Accessor.h>
-
-#include <H5Cpp.h>
-
-#include <cassert>
-
-namespace
-{
-
-template <typename DT> H5::PredType get_h5_datatype();
-
-template <> H5::PredType get_h5_datatype<float>() { return H5::PredType::NATIVE_FLOAT; }
-
-template <typename DT> H5::PredType get_h5_store_format();
-
-template <> H5::PredType get_h5_store_format<float>() { return H5::PredType::IEEE_F32BE; }
-
-} // namespace
-
-namespace gencore
-{
-
-template <typename DT>
-void HDF5Importer::read(uint32_t nth, const std::string &name, const angkor::TensorShape &shape,
-                        nncc::core::ADT::tensor::Accessor<DT> *buf_accessor)
-{
-  assert(buf_accessor != nullptr);
-
-  try
-  {
-    auto dataset = _value_grp.openDataSet(value_filename(nth));
-
-    assert(dataset.getDataType() == get_h5_store_format<DT>());
-
-    std::vector<DT> file_buf;
-    {
-      file_buf.resize(nncc::core::ADT::tensor::num_elements(shape));
-      dataset.read(file_buf.data(), get_h5_datatype<DT>());
-    }
-
-    using nncc::core::ADT::tensor::IndexEnumerator;
-    using nncc::core::ADT::tensor::LexicalLayout;
-
-    LexicalLayout layout{};
-
-    for (IndexEnumerator e{shape}; e.valid(); e.advance())
-    {
-      auto i = e.current();
-      buf_accessor->at(i) = file_buf[layout.offset(shape, i)];
-    }
-  }
-  catch (const H5::FileIException &)
-  {
-    // Skip if data is not present in HDF5 file
-  }
-}
-
-// template instantiation
-template void HDF5Importer::read<float>(uint32_t, const std::string &, const angkor::TensorShape &,
-                                        nncc::core::ADT::tensor::Accessor<float> *);
-
-} // namespace gencore
diff --git a/compiler/gen-tf-input/CMakeLists.txt b/compiler/gen-tf-input/CMakeLists.txt
deleted file mode 100644
index 12b78b5b3..000000000
--- a/compiler/gen-tf-input/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-file(GLOB_RECURSE SOURCES "src/*.cpp")
-
-# making gen-tf-input
-add_executable(gen-tf-input ${SOURCES})
diff --git a/compiler/gen-tf-input/README.md b/compiler/gen-tf-input/README.md
deleted file mode 100644
index 2ea6f71b4..000000000
--- a/compiler/gen-tf-input/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# gen-tf-input
-
-_gen-tf-input_ generates random input data for testing in HDF5 format.
-
-# How to use
-
-Use the following to generate a file that contains random values of input tensors:
-
-```
-$ gen-tf-input <info_v2_path> <pb_path> <file_path_to_generate>
-```
diff --git a/compiler/gen-tf-input/src/Driver.cpp b/compiler/gen-tf-input/src/Driver.cpp
deleted file mode 100644
index f2ce20f16..000000000
--- a/compiler/gen-tf-input/src/Driver.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cassert>
-#include <iostream>
-
-namespace
-{
-
-void print_help()
-{
-  std::cerr << "This generates a file that contains random values of input tensors" << std::endl
-            << "Usage:" << std::endl
-            << "  gen-tf-input <info_v2_path> <pb_path> <file_path_to_generate>" << std::endl;
-}
-
-} // namespace
-
-namespace
-{
-
-void gen_input(const std::string info_v2_path, const std::string pb_path,
-               const std::string input_path)
-{
-  // TODO write code
-  assert("Not yet written" && nullptr);
-}
-
-} // namespace
-
-int main(int argc, char **argv)
-{
-  // TODO We need better args parsing in future
-  if (argc != 4)
-  {
-    print_help();
-    return 255;
-  }
-
-  gen_input(argv[1], argv[2], argv[3]);
-
-  return 0;
-}
diff --git a/compiler/gen-tf-output/CMakeLists.txt b/compiler/gen-tf-output/CMakeLists.txt
deleted file mode 100644
index c2b91a9cd..000000000
--- a/compiler/gen-tf-output/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-file(GLOB_RECURSE SOURCES "src/*.cpp")
-
-add_executable(gen-tf-output ${SOURCES})
diff --git a/compiler/gen-tf-output/README.md b/compiler/gen-tf-output/README.md
deleted file mode 100644
index ca54c75d5..000000000
--- a/compiler/gen-tf-output/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# gen-tf-output
-
-_gen-tf-output_ generates a file containing the result of running TensorFlow in HDF5 format.
-
-# How to use
-
-Use the following:
-
-```
-$ gen-tf-output <info_v2_path> <pb_path> <input_of_TensorFlow_path> <output_path_to_generate>
-```
-
-Use _gen_tf_input_ to generate `<input_of_TensorFlow_path>` file.
diff --git a/compiler/gen-tf-output/src/Driver.cpp b/compiler/gen-tf-output/src/Driver.cpp
deleted file mode 100644
index 209651987..000000000
--- a/compiler/gen-tf-output/src/Driver.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-#include <stdexcept>
-#include <string>
-
-namespace
-{
-
-void print_help()
-{
-  std::cerr << "This generates a file that contains result of running TensorFlow" << std::endl
-            << "Usage:" << std::endl
-            << "\t"
-            << "gen-tf-output <info_v2_path> <pb_path> <input_of_TensorFlow_path> "
-               "<output_path_to_generate>"
-            << std::endl;
-}
-
-void gen_tf_output(const std::string info_v2_path, const std::string pb_path,
-                   const std::string input_path, const std::string output_path)
-{
-  throw std::runtime_error("Not Yet Implemented");
-}
-
-} // namespace
-
-int main(int argc, char **argv)
-{
-  // TODO We need better args parsing in future
-  if (argc != 5)
-  {
-    print_help();
-    return 255;
-  }
-
-  gen_tf_output(argv[1], argv[2], argv[3], argv[4]);
-
-  return 0;
-}
diff --git a/compiler/gen-tflite-output/CMakeLists.txt b/compiler/gen-tflite-output/CMakeLists.txt
deleted file mode 100644
index 1c9d2601d..000000000
--- a/compiler/gen-tflite-output/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-file(GLOB_RECURSE SOURCES "src/*.cpp")
-
-add_executable(gen-tflite-output ${SOURCES})
diff --git a/compiler/gen-tflite-output/README.md b/compiler/gen-tflite-output/README.md
deleted file mode 100644
index a9c985006..000000000
--- a/compiler/gen-tflite-output/README.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# gen-tflite-output
-
-_gen-tflite-output_ generates a file containing the result of running TensorFlow Lite interpreter
-in HDF5 format.
-
-# How to use
-
-Use the following:
-
-```
-$ gen-tflite-output <tflite_file_path> <input_file_path> <output_path_to_generate>
-```
-
-Use _gen_tf_input_ to generate `<input_file_path>` file.
diff --git a/compiler/gen-tflite-output/src/Driver.cpp b/compiler/gen-tflite-output/src/Driver.cpp
deleted file mode 100644
index 90559ec2f..000000000
--- a/compiler/gen-tflite-output/src/Driver.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-#include <stdexcept>
-#include <string>
-
-namespace
-{
-
-void print_help()
-{
-  std::cerr << "This generates a file that contains result of running TensorFlow Lite interpreter"
-            << std::endl
-            << "Usage:" << std::endl
-            << "\t"
-            << "$ gen-tflite-output <tflite_file_path> <input_file_path> <output_path_to_generate>"
-            << std::endl;
-}
-
-void gen_tflite_output(const std::string tflite_path, const std::string input_path,
-                       const std::string output_path)
-{
-  throw std::runtime_error("Not Yet Implemented");
-}
-
-} // namespace
-
-int main(int argc, char **argv)
-{
-  // TODO We need better args parsing in future
-  if (argc != 4)
-  {
-    print_help();
-    return 255;
-  }
-
-  gen_tflite_output(argv[1], argv[2], argv[3]);
-
-  return 0;
-}
diff --git a/compiler/hermes-std/CMakeLists.txt b/compiler/hermes-std/CMakeLists.txt
index c7b02e14c..673d7056c 100644
--- a/compiler/hermes-std/CMakeLists.txt
+++ b/compiler/hermes-std/CMakeLists.txt
@@ -3,10 +3,11 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(hermes_std STATIC ${SOURCES})
-set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
 target_include_directories(hermes_std PUBLIC include)
 target_link_libraries(hermes_std PUBLIC hermes)
-target_link_libraries(hermes_std PRIVATE stdex)
 target_link_libraries(hermes_std PRIVATE pepper_strcast)
 # Let's apply nncc common compile options
 #
@@ -23,5 +24,4 @@ endif(NOT ENABLE_TEST)
 nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(hermes_std_test ${TESTS})
-target_link_libraries(hermes_std_test stdex)
 target_link_libraries(hermes_std_test hermes_std)
diff --git a/compiler/hermes-std/include/hermes/ConsoleReporter.h b/compiler/hermes-std/include/hermes/ConsoleReporter.h
index e09dd5785..c55e46a17 100644
--- a/compiler/hermes-std/include/hermes/ConsoleReporter.h
+++ b/compiler/hermes-std/include/hermes/ConsoleReporter.h
@@ -28,6 +28,10 @@ namespace hermes
 struct ConsoleReporter final : public hermes::Sink
 {
   void notify(const Message *m) final;
+  void set_colored_mode(bool is_colored) { _is_colored = is_colored; }
+
+private:
+  bool _is_colored = false;
 };
 
 } // namespace hermes
diff --git a/compiler/hermes-std/src/ConsoleReporter.cpp b/compiler/hermes-std/src/ConsoleReporter.cpp
index 3cc9f09ed..524ed59d8 100644
--- a/compiler/hermes-std/src/ConsoleReporter.cpp
+++ b/compiler/hermes-std/src/ConsoleReporter.cpp
@@ -17,16 +17,68 @@
 #include "hermes/ConsoleReporter.h"
 
 #include <iostream>
+#include <cstdlib>
+#include <string>
 
 namespace hermes
 {
 
+static constexpr const char *kTermColorRedTextCode = "\033[0;31m";
+static constexpr const char *kTermColorGreenTextCode = "\033[0;32m";
+static constexpr const char *kTermColorOrangeTextCode = "\033[0;33m";
+static constexpr const char *kTermColorBlueTextCode = "\033[0;34m";
+static constexpr const char *kTermColorMagentaTextCode = "\033[0;35m";
+static constexpr const char *kTermColorCyanTextCode = "\033[0;36m";
+static constexpr const char *kTermColorWhiteTextCode = "\033[0;37m";
+
+static constexpr const char *kTermBoldTextCode = "\033[1m";
+static constexpr const char *kTermUnderlineTextCode = "\033[4m";
+static constexpr const char *kTermInverseTextCode = "\033[7m";
+static constexpr const char *kTermBoldOffTextCode = "\033[21m";
+static constexpr const char *kTermUnderlineOffTextCode = "\033[24m";
+static constexpr const char *kTermInverseOffTextCode = "\033[27m";
+
+static constexpr const char *kTermColorResetAllCode = "\033[0m";
+
 void ConsoleReporter::notify(const hermes::Message *m)
 {
+  const char *env_color_p = std::getenv("ONE_HERMES_COLOR");
+  if (env_color_p)
+  {
+    auto env_color_str = std::string(env_color_p);
+    if ((env_color_str == "1") or (env_color_str == "ON"))
+      _is_colored = true;
+  }
+
+  if (_is_colored)
+  {
+    switch (m->get_severity())
+    {
+      case FATAL:
+        std::cout << kTermColorRedTextCode << kTermBoldTextCode << kTermUnderlineTextCode;
+        break;
+      case ERROR:
+        std::cout << kTermColorRedTextCode;
+        break;
+      case WARN:
+        std::cout << kTermColorOrangeTextCode;
+        break;
+      case INFO:
+        std::cout << kTermColorGreenTextCode;
+        break;
+      case VERBOSE:
+        std::cout << kTermColorResetAllCode;
+        break;
+    };
+  }
   for (uint32_t n = 0; n < m->text()->lines(); ++n)
   {
     std::cout << m->text()->line(n) << std::endl;
   }
+  if (_is_colored)
+  {
+    std::cout << kTermColorResetAllCode;
+  }
 }
 
 } // namespace hermes
diff --git a/compiler/hermes-std/src/ConsoleReporter.test.cpp b/compiler/hermes-std/src/ConsoleReporter.test.cpp
index c2e1f1c85..d959ff3d9 100644
--- a/compiler/hermes-std/src/ConsoleReporter.test.cpp
+++ b/compiler/hermes-std/src/ConsoleReporter.test.cpp
@@ -16,8 +16,7 @@
 
 #include "hermes/ConsoleReporter.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <sstream>
 
 #include <gtest/gtest.h>
@@ -37,7 +36,172 @@ TEST(ConsoleReporterTest, notify)
 
     ss << "Hello" << std::endl;
 
-    m.text(stdex::make_unique<hermes::MessageText>(ss));
+    m.text(std::make_unique<hermes::MessageText>(ss));
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_fatal)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as FATAL" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::FATAL);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_error)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as ERROR" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::ERROR);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_warn)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as WARN" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::WARN);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_info)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as INFO" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::INFO);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_verbose)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as VERBOSE" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::VERBOSE);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_fatal_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as FATAL" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::FATAL);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_error_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as ERROR" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::ERROR);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_warn_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as WARN" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::WARN);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_info_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as INFO" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::INFO);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_verbose_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as VERBOSE" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::VERBOSE);
   }
 
   hermes::ConsoleReporter r;
diff --git a/compiler/hermes-std/src/EnvConfig.test.cpp b/compiler/hermes-std/src/EnvConfig.test.cpp
new file mode 100644
index 000000000..e4b39c167
--- /dev/null
+++ b/compiler/hermes-std/src/EnvConfig.test.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hermes/EnvConfig.h"
+
+#include <hermes/core/SourceSetting.h>
+
+#include <gtest/gtest.h>
+
+#include <stdlib.h>
+
+namespace
+{
+
+class Logger final : public hermes::Source
+{
+public:
+  Logger() = default;
+  ~Logger() = default;
+};
+
+std::string env_name("TEST_CONFIG");
+
+} // namespace
+
+TEST(EnvConfigTest, constructor)
+{
+  hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+  SUCCEED();
+}
+
+TEST(EnvConfigTest, configure)
+{
+  Logger logger;
+  hermes::SourceSetting ss;
+  hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+  ec.configure(&logger, ss);
+
+  SUCCEED();
+}
+
+TEST(EnvConfigTest, configure_enabled)
+{
+  setenv(env_name.c_str(), "1", 0);
+
+  Logger logger;
+  hermes::SourceSetting ss;
+  hermes::EnvConfig<hermes::EnvFormat::BooleanNumber> ec(env_name);
+
+  ec.configure(&logger, ss);
+
+  SUCCEED();
+}
diff --git a/compiler/hermes/CMakeLists.txt b/compiler/hermes/CMakeLists.txt
index 5debfbca0..d33e2d735 100644
--- a/compiler/hermes/CMakeLists.txt
+++ b/compiler/hermes/CMakeLists.txt
@@ -3,9 +3,10 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(hermes STATIC ${SOURCES})
-set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
 target_include_directories(hermes PUBLIC include)
-target_link_libraries(hermes PRIVATE stdex)
 # Let's apply nncc common compile options
 #
 # NOTE This will enable strict compilation (warnings as error).
@@ -22,7 +23,6 @@ nnas_find_package(GTest REQUIRED)
 
 add_executable(hermes_test ${TESTS})
 target_link_libraries(hermes_test gtest_main)
-target_link_libraries(hermes_test stdex)
 target_link_libraries(hermes_test hermes)
 
 add_test(hermes_test hermes_test)
diff --git a/compiler/hermes/include/hermes/core/Message.h b/compiler/hermes/include/hermes/core/Message.h
index 28cfd7942..d76f0eb6f 100644
--- a/compiler/hermes/include/hermes/core/Message.h
+++ b/compiler/hermes/include/hermes/core/Message.h
@@ -17,6 +17,8 @@
 #ifndef __HERMES_MESSAGE_H__
 #define __HERMES_MESSAGE_H__
 
+#include "Severity.h"
+
 #include <memory>
 #include <sstream>
 #include <string>
@@ -37,7 +39,7 @@ public:
 public:
   /// @brief The number of lines
   uint32_t lines(void) const { return _lines.size(); }
-  /// @breif The content of a specific line
+  /// @brief The content of a specific line
   const std::string &line(uint32_t n) const { return _lines.at(n); }
 
 private:
@@ -48,7 +50,6 @@ private:
  * @brief Message with metadata
  *
  * TODO Add "Timestamp" field
- * TODO Add "Severity" field
  * TODO Support extensible "attribute" annotation
  */
 class Message final
@@ -58,10 +59,17 @@ public:
 
 public:
   void text(std::unique_ptr<MessageText> &&text) { _text = std::move(text); }
+  void text(std::unique_ptr<MessageText> &&text, SeverityCategory severity)
+  {
+    _text = std::move(text);
+    _severity = severity;
+  }
   const MessageText *text(void) const { return _text.get(); }
+  SeverityCategory get_severity(void) const { return _severity; }
 
 private:
   std::unique_ptr<MessageText> _text;
+  SeverityCategory _severity = SeverityCategory::INFO;
 };
 
 } // namespace hermes
diff --git a/compiler/hermes/include/hermes/core/MessageBuffer.h b/compiler/hermes/include/hermes/core/MessageBuffer.h
index a2f1de74d..1e2e9b9dc 100644
--- a/compiler/hermes/include/hermes/core/MessageBuffer.h
+++ b/compiler/hermes/include/hermes/core/MessageBuffer.h
@@ -18,6 +18,7 @@
 #define __HERMES_MESSAGE_BUFFER_H__
 
 #include "hermes/core/MessageBus.h"
+#include "hermes/core/Severity.h"
 
 #include <ostream>
 #include <sstream>
@@ -34,6 +35,7 @@ class MessageBuffer final
 {
 public:
   MessageBuffer(MessageBus *);
+  MessageBuffer(MessageBus *bus, SeverityCategory severity);
   ~MessageBuffer();
 
 public:
@@ -41,6 +43,7 @@ public:
 
 private:
   MessageBus *_bus;
+  SeverityCategory _severity = SeverityCategory::INFO;
 
   /// @brief Content buffer
   std::stringstream _ss;
diff --git a/compiler/hermes/requires.cmake b/compiler/hermes/requires.cmake
index a4855289c..e69de29bb 100644
--- a/compiler/hermes/requires.cmake
+++ b/compiler/hermes/requires.cmake
@@ -1 +0,0 @@
-require("stdex")
diff --git a/compiler/hermes/src/core/MessageBuffer.cpp b/compiler/hermes/src/core/MessageBuffer.cpp
index 175a45d3f..ce1f176d9 100644
--- a/compiler/hermes/src/core/MessageBuffer.cpp
+++ b/compiler/hermes/src/core/MessageBuffer.cpp
@@ -16,7 +16,7 @@
 
 #include "hermes/core/MessageBuffer.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace hermes
 {
@@ -26,13 +26,19 @@ MessageBuffer::MessageBuffer(MessageBus *bus) : _bus{bus}
   // DO NOTHING
 }
 
+MessageBuffer::MessageBuffer(MessageBus *bus, SeverityCategory severity)
+  : _bus{bus}, _severity{severity}
+{
+  // DO NOTHING
+}
+
 MessageBuffer::~MessageBuffer()
 {
   // NOTE The current implementation is unsafe as it may throw an excpetion.
   // TODO Find a better safe implementation.
-  auto msg = stdex::make_unique<Message>();
+  auto msg = std::make_unique<Message>();
 
-  msg->text(stdex::make_unique<MessageText>(_ss));
+  msg->text(std::make_unique<MessageText>(_ss), _severity);
 
   _bus->post(std::move(msg));
 }
diff --git a/compiler/hermes/src/core/Source.cpp b/compiler/hermes/src/core/Source.cpp
index 33f8b0570..cb60d9a31 100644
--- a/compiler/hermes/src/core/Source.cpp
+++ b/compiler/hermes/src/core/Source.cpp
@@ -16,8 +16,7 @@
 
 #include "hermes/core/Source.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace hermes
@@ -61,10 +60,9 @@ void Source::deactivate(void)
 
 void Source::reload(const Config *c) { c->configure(this, _setting); }
 
-std::unique_ptr<MessageBuffer> Source::buffer(const Severity &) const
+std::unique_ptr<MessageBuffer> Source::buffer(const Severity &severity) const
 {
-  // TODO Pass Severity
-  return stdex::make_unique<MessageBuffer>(_bus);
+  return std::make_unique<MessageBuffer>(_bus, severity.category());
 }
 
 } // namespace hermes
diff --git a/compiler/imgdata2hdf5/CMakeLists.txt b/compiler/imgdata2hdf5/CMakeLists.txt
deleted file mode 100644
index e2d9154f5..000000000
--- a/compiler/imgdata2hdf5/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-set(imgdata2hdf5_FILE "imgdata2hdf5.py")
-set(imgdata2hdf5_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${imgdata2hdf5_FILE}")
-set(imgdata2hdf5_BIN "${CMAKE_CURRENT_BINARY_DIR}/${imgdata2hdf5_FILE}")
-
-add_custom_command(OUTPUT ${imgdata2hdf5_BIN}
-    COMMAND ${CMAKE_COMMAND} -E copy "${imgdata2hdf5_SRC}" "${imgdata2hdf5_BIN}"
-    DEPENDS ${imgdata2hdf5_SRC}
-    COMMENT "Generate ${imgdata2hdf5_BIN}"
-  )
-
-add_custom_target(imgdata2hdf5 ALL DEPENDS ${imgdata2hdf5_BIN})
-
-install(FILES ${imgdata2hdf5_BIN} DESTINATION bin)
diff --git a/compiler/imgdata2hdf5/README.md b/compiler/imgdata2hdf5/README.md
deleted file mode 100644
index 54743e070..000000000
--- a/compiler/imgdata2hdf5/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# imgdata2hdf5
-
-_imgdata2hdf5_ is a tool to convert raw image data (assumed to be pre-processed) to an hdf5 file.
-
-## Prerequisite
-- Raw image data pre-processed for the corresponding DNN model
-- List of data to convert (saved in the text file)
-- Python installed with _numpy_ and _h5py_ (See docs/how-to-prepare-virtualenv.txt)
-
-## Example
-```
-python imgdata2hdf5.py \
-> --data_list=tmp/imgdata/datalist.txt
-> --output_path=tmp/imgdata/imgdata.hdf5
-```
-
-## Arguments
-```
-  -h, --help            Show this help message and exit
-  -l DATA_LIST, --data_list DATA_LIST
-                        Path to the text file which lists the absolute paths of the raw image data files to be converted.
-  -o OUTPUT_PATH, --output_path OUTPUT_PATH
-                        Path to the output hdf5 file.
-```
diff --git a/compiler/imgdata2hdf5/imgdata2hdf5.py b/compiler/imgdata2hdf5/imgdata2hdf5.py
deleted file mode 100755
index 1ff912a2f..000000000
--- a/compiler/imgdata2hdf5/imgdata2hdf5.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python3
-import h5py as h5
-import numpy as np
-import argparse
-import glob
-import os
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "-l",
-    "--data_list",
-    type=str,
-    help=
-    "Path to the text file which lists the absolute paths of the raw image data files to be converted.",
-    required=True)
-parser.add_argument(
-    "-o", "--output_path", type=str, help="Path to the output hdf5 file.", required=True)
-
-args = parser.parse_args()
-data_list = args.data_list
-output_path = args.output_path
-
-# Create h5 file
-h5_file = h5.File(output_path, 'w')
-group = h5_file.create_group("value")
-# We assume the raw input data have the correct type/shape for the corresponding model
-# If this flag is set in the hdf5 file, record-minmax will skip type/shape check
-group.attrs['rawData'] = '1'
-
-if os.path.isfile(data_list) == False:
-    raise SystemExit("No such file. " + data_list)
-
-# Data list
-datalist = []
-with open(data_list, 'r') as f:
-    lines = f.readlines()
-    for line in lines:
-        if line.strip():
-            filename = line.rstrip()
-            if os.path.isfile(filename):
-                datalist.append(filename)
-            else:
-                raise SystemExit("No such file. " + filename)
-
-# Input files
-num_converted = 0
-for imgdata in datalist:
-    with open(imgdata, 'rb') as f:
-        sample = group.create_group(str(num_converted))
-        num_converted += 1
-        filename = os.path.basename(imgdata)
-        sample.attrs['desc'] = filename
-        raw_data = bytearray(f.read())
-        # The target model is DNN for handling an input image
-        sample.create_dataset('0', data=raw_data)
-
-h5_file.close()
-
-print("Raw image data have been packaged to " + output_path)
-print("Number of packaged data: " + str(num_converted))
diff --git a/compiler/kuma/src/IntervalSet.h b/compiler/kuma/src/IntervalSet.h
index 3b6c5f666..1e26581c0 100644
--- a/compiler/kuma/src/IntervalSet.h
+++ b/compiler/kuma/src/IntervalSet.h
@@ -17,6 +17,7 @@
 #ifndef __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
 #define __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
 
+#include <cstdint>
 #include <map>
 
 namespace kuma
diff --git a/compiler/loco/CMakeLists.txt b/compiler/loco/CMakeLists.txt
index f94052840..d885805d7 100644
--- a/compiler/loco/CMakeLists.txt
+++ b/compiler/loco/CMakeLists.txt
@@ -6,7 +6,6 @@ add_library(loco SHARED ${SOURCES})
 target_include_directories(loco PUBLIC include)
 # TODO Remove dependencies on angkor library
 target_link_libraries(loco PUBLIC angkor)
-target_link_libraries(loco PRIVATE stdex)
 # Let's apply nncc common compile options
 #
 # NOTE This will enable strict compilation (warnings as error).
@@ -15,6 +14,8 @@ target_link_libraries(loco PRIVATE nncc_common)
 target_link_libraries(loco PUBLIC nncc_coverage)
 # Q. HOW TO MAKE DEV PACKAGE(?)
 install(TARGETS loco DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h" PATTERN "*.lst")
 
 if(NOT ENABLE_TEST)
   return()
@@ -24,5 +25,4 @@ endif(NOT ENABLE_TEST)
 nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(loco_test ${TESTS})
-target_link_libraries(loco_test stdex)
 target_link_libraries(loco_test loco)
diff --git a/compiler/loco/include/loco/IR/DataType.h b/compiler/loco/include/loco/IR/DataType.h
index b07022bf5..b89edf29e 100644
--- a/compiler/loco/include/loco/IR/DataType.h
+++ b/compiler/loco/include/loco/IR/DataType.h
@@ -44,6 +44,9 @@ enum class DataType
   // WARNING the size of Bool may vary for NN frameworks
   // TODO we need to find a way to resolve this issue
   BOOL, // Boolean
+
+  // WARNING STRING is NOT fully supported yet
+  STRING, // String
 };
 
 } // namespace loco
diff --git a/compiler/loco/include/loco/IR/DataTypeTraits.h b/compiler/loco/include/loco/IR/DataTypeTraits.h
index c186300de..6be46c3b3 100644
--- a/compiler/loco/include/loco/IR/DataTypeTraits.h
+++ b/compiler/loco/include/loco/IR/DataTypeTraits.h
@@ -21,6 +21,7 @@
 
 #include <cassert>
 #include <cstdint>
+#include <stdexcept>
 
 namespace loco
 {
@@ -52,6 +53,12 @@ template <> struct DataTypeImpl<DataType::S16>
   using Type = int16_t;
 };
 
+template <> struct DataTypeImpl<DataType::U16>
+{
+  // Use C++ uint16_t type for unsigned 16bit integer
+  using Type = uint16_t;
+};
+
 template <> struct DataTypeImpl<DataType::S32>
 {
   // Use C++ int32_t type for 32bit integer
@@ -70,12 +77,31 @@ template <> struct DataTypeImpl<DataType::S64>
   using Type = int64_t;
 };
 
+template <> struct DataTypeImpl<DataType::U64>
+{
+  // Use C++ uint64_t type for unsigned 64bit integer
+  using Type = uint64_t;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT16>
+{
+  // float16 type with 16bit value, encoded with help of FP16 library
+  // https://github.com/Maratyszcza/FP16/
+  using Type = uint16_t;
+};
+
 template <> struct DataTypeImpl<DataType::FLOAT32>
 {
   // Use C++ float type for IEEE 32-bit floating-point numbers
   using Type = float;
 };
 
+template <> struct DataTypeImpl<DataType::FLOAT64>
+{
+  // Use C++ double type for IEEE 64-bit floating-point numbers
+  using Type = double;
+};
+
 // NOTE DataTypeImpl for BOOL is subject to change
 template <> struct DataTypeImpl<DataType::BOOL>
 {
@@ -83,6 +109,12 @@ template <> struct DataTypeImpl<DataType::BOOL>
   using Type = uint8_t;
 };
 
+template <> struct DataTypeImpl<DataType::STRING>
+{
+  // Use C++ std::string type for STRING
+  using Type = std::string;
+};
+
 /**
  * @brief Returns the size of the data type.
  * @note If you need the size at compile time, use `sizeof(typename DataTypeImpl<DT>::Type)`.
@@ -97,16 +129,27 @@ inline uint32_t size(DataType data_type)
       return sizeof(DataTypeImpl<DataType::U8>::Type);
     case DataType::S16:
       return sizeof(DataTypeImpl<DataType::S16>::Type);
+    case DataType::U16:
+      return sizeof(DataTypeImpl<DataType::U16>::Type);
     case DataType::S32:
       return sizeof(DataTypeImpl<DataType::S32>::Type);
     case DataType::U32:
       return sizeof(DataTypeImpl<DataType::U32>::Type);
     case DataType::S64:
       return sizeof(DataTypeImpl<DataType::S64>::Type);
+    case DataType::U64:
+      return sizeof(DataTypeImpl<DataType::U64>::Type);
+    case DataType::FLOAT16:
+      return sizeof(DataTypeImpl<DataType::FLOAT16>::Type);
     case DataType::FLOAT32:
       return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
+    case DataType::FLOAT64:
+      return sizeof(DataTypeImpl<DataType::FLOAT64>::Type);
     case DataType::BOOL:
       return sizeof(DataTypeImpl<DataType::BOOL>::Type);
+    case DataType::STRING:
+      // STRING is variable length. Cannot decide size by type
+      throw std::runtime_error("Invalid size call with STRING type");
     default:
       // TODO Support remaining data types.
       assert(false);
diff --git a/compiler/loco/include/loco/IR/Graph.h b/compiler/loco/include/loco/IR/Graph.h
index a820aba91..176e6cce1 100644
--- a/compiler/loco/include/loco/IR/Graph.h
+++ b/compiler/loco/include/loco/IR/Graph.h
@@ -264,7 +264,6 @@ struct GraphOutputIndexQueryService : public DialectService
   virtual GraphOutputIndex index(const Node *node) const = 0;
 };
 
-// TODO Use "const Graph *"
 std::vector<Node *> output_nodes(Graph *);
 
 /**
diff --git a/compiler/loco/include/loco/IR/NodeMixins.h b/compiler/loco/include/loco/IR/NodeMixins.h
index f0e34b0ba..fcadcaba7 100644
--- a/compiler/loco/include/loco/IR/NodeMixins.h
+++ b/compiler/loco/include/loco/IR/NodeMixins.h
@@ -83,7 +83,7 @@ private:
   std::vector<Dimension> _dims;
 };
 
-template <unsigned N> struct FixedArity
+template <uint32_t N> struct FixedArity
 {
   template <typename Base> class Mixin : public virtual Base
   {
@@ -99,7 +99,7 @@ template <unsigned N> struct FixedArity
     virtual ~Mixin() = default;
 
   public:
-    unsigned arity(void) const final { return N; }
+    uint32_t arity(void) const final { return N; }
 
     Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
 
@@ -113,7 +113,7 @@ template <unsigned N> struct FixedArity
 
   protected:
     // This API allows inherited classes to access "_args" field.
-    Use *at(unsigned n) const { return _args.at(n).get(); }
+    Use *at(uint32_t n) const { return _args.at(n).get(); }
 
   private:
     std::array<std::unique_ptr<Use>, N> _args{};
diff --git a/compiler/loco/include/loco/IR/Nodes.h b/compiler/loco/include/loco/IR/Nodes.h
index fecfad28d..63b1181bb 100644
--- a/compiler/loco/include/loco/IR/Nodes.h
+++ b/compiler/loco/include/loco/IR/Nodes.h
@@ -49,7 +49,7 @@ class GraphOutput;
  * @brief Make a value visible to user
  */
 class Push /* to user */ final
-    : public CanonicalNodeDef<CanonicalOpcode::Push, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::Push, FixedArity<1>::Mixin>
 {
 public:
   Push() = default;
@@ -91,8 +91,8 @@ Push *push_node(Graph *g, const GraphOutputIndex &index);
  * @brief Create a value from user data
  */
 class Pull /* from user */ final
-    : public CanonicalNodeDef<CanonicalOpcode::Pull, FixedArity<0>::Mixin,
-                              With<NodeTrait::TensorShape>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::Pull, FixedArity<0>::Mixin,
+                            With<NodeTrait::TensorShape>::Mixin>
 {
 public:
   Pull() = default;
@@ -213,8 +213,8 @@ public:
  * }
  */
 class ConstGen final
-    : public CanonicalNodeDef<CanonicalOpcode::ConstGen, FixedArity<0>::Mixin,
-                              With<NodeTrait::DataType>::Mixin, With<NodeTrait::TensorShape>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::ConstGen, FixedArity<0>::Mixin,
+                            With<NodeTrait::DataType>::Mixin, With<NodeTrait::TensorShape>::Mixin>
 {
 public:
   ConstGen() = default;
@@ -376,7 +376,7 @@ private:
  * @brief Create a feature map from a tensor
  */
 class FeatureEncode final
-    : public CanonicalNodeDef<CanonicalOpcode::FeatureEncode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FeatureEncode, FixedArity<1>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -395,7 +395,7 @@ private:
  * @brief Create a tensor from a feature map
  */
 class FeatureDecode final
-    : public CanonicalNodeDef<CanonicalOpcode::FeatureDecode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FeatureDecode, FixedArity<1>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -414,7 +414,7 @@ private:
  * @brief Create a filter from a tensor
  */
 class FilterEncode final
-    : public CanonicalNodeDef<CanonicalOpcode::FilterEncode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FilterEncode, FixedArity<1>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -433,7 +433,7 @@ private:
  * @brief Create a tensor from a filter
  */
 class FilterDecode final
-    : public CanonicalNodeDef<CanonicalOpcode::FilterDecode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FilterDecode, FixedArity<1>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -452,7 +452,7 @@ private:
  * @brief Create a depthwise filter from a tensor
  */
 class DepthwiseFilterEncode final
-    : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterEncode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterEncode, FixedArity<1>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -471,7 +471,7 @@ private:
  * @brief Create a tensor from a depthwise filter
  */
 class DepthwiseFilterDecode final
-    : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterDecode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::DepthwiseFilterDecode, FixedArity<1>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -512,8 +512,8 @@ template <ReshapeType RT> class Reshape;
  */
 template <>
 class Reshape<ReshapeType::Fixed> final
-    : public CanonicalNodeDef<CanonicalOpcode::FixedReshape, FixedArity<1>::Mixin,
-                              With<NodeTrait::TensorShape>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FixedReshape, FixedArity<1>::Mixin,
+                            With<NodeTrait::TensorShape>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -529,7 +529,7 @@ using FixedReshape = Reshape<ReshapeType::Fixed>;
  * concatenated along the given axis.
  */
 class TensorConcat final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorConcat, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorConcat, FixedArity<2>::Mixin>
 {
 public:
   Node *lhs(void) const { return at(0)->node(); }
@@ -578,7 +578,7 @@ private:
  * @brief Depthwise 2D Convolution
  */
 class DepthwiseConv2D final
-    : public CanonicalNodeDef<CanonicalOpcode::DepthwiseConv2D, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::DepthwiseConv2D, FixedArity<2>::Mixin>
 {
 public:
   Node *ifm(void) const { return at(0)->node(); }
@@ -616,7 +616,7 @@ enum class ReduceFunc
  * @note  All the reduce functions always keep dimensions
  */
 class TensorReduce final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorReduce, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorReduce, FixedArity<1>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -684,7 +684,7 @@ private:
  * With this, output shape is uniquely determined by all inputs and attributes.
  */
 class TransposedConv2D final
-    : public CanonicalNodeDef<CanonicalOpcode::TransposedConv2D, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TransposedConv2D, FixedArity<2>::Mixin>
 {
 public:
   Node *ifm(void) const { return at(0)->node(); }
@@ -714,11 +714,11 @@ private:
 template <Domain D> class Softmax;
 
 /**
-* @brief Computes softmax activations for Tensor domain
-*/
+ * @brief Computes softmax activations for Tensor domain
+ */
 template <>
 class Softmax<Domain::Tensor> final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorSoftmax, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorSoftmax, FixedArity<1>::Mixin>
 {
 public:
   Softmax() = default;
@@ -777,7 +777,7 @@ template <Domain D> class BiasAdd;
  */
 template <>
 class BiasAdd<Domain::Tensor> final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorBiasAdd, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorBiasAdd, FixedArity<2>::Mixin>
 {
 public:
   BiasAdd() = default;
@@ -813,7 +813,7 @@ using TensorBiasAdd = BiasAdd<Domain::Tensor>;
  */
 template <>
 class BiasAdd<Domain::Feature> final
-    : public CanonicalNodeDef<CanonicalOpcode::FeatureBiasAdd, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::FeatureBiasAdd, FixedArity<2>::Mixin>
 {
 public:
   BiasAdd() = default;
@@ -848,7 +848,7 @@ using FeatureBiasAdd = BiasAdd<Domain::Feature>;
  * [padding.front(0) + 1 + padding.back(0), padding.front(1) + 2 + padding.back(1)] = [4,9].
  */
 class TensorConstantPad final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorConstantPad, FixedArity<2>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorConstantPad, FixedArity<2>::Mixin>
 {
 public:
   Node *input(void) const { return at(0)->node(); }
@@ -951,7 +951,7 @@ public:
  * @brief Elementwise Sqrt of input
  */
 class EltwiseSqrt final
-    : public CanonicalNodeDef<CanonicalOpcode::EltwiseSqrt, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::EltwiseSqrt, FixedArity<1>::Mixin>
 {
 public:
   EltwiseSqrt() = default;
@@ -976,7 +976,7 @@ public:
  * TODO Explain the operation semantics
  */
 class TensorBroadcast final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorBroadcast, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorBroadcast, FixedArity<1>::Mixin>
 {
 public:
   TensorBroadcast() = default;
@@ -1014,7 +1014,7 @@ private:
  * MatrixEncode currently requires a rank-2 Tensor as its input.
  */
 class MatrixEncode final
-    : public CanonicalNodeDef<CanonicalOpcode::MatrixEncode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::MatrixEncode, FixedArity<1>::Mixin>
 {
 public:
   MatrixEncode() = default;
@@ -1038,7 +1038,7 @@ private:
  * MatrixDecode currently requires a Matrix as its input.
  */
 class MatrixDecode final
-    : public CanonicalNodeDef<CanonicalOpcode::MatrixDecode, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::MatrixDecode, FixedArity<1>::Mixin>
 {
 public:
   MatrixDecode() = default;
@@ -1086,7 +1086,7 @@ public:
  * Input and output belong to tensor domain.
  */
 class TensorTranspose final
-    : public CanonicalNodeDef<CanonicalOpcode::TensorTranspose, FixedArity<1>::Mixin>
+  : public CanonicalNodeDef<CanonicalOpcode::TensorTranspose, FixedArity<1>::Mixin>
 {
 public:
   TensorTranspose() = default;
diff --git a/compiler/loco/include/loco/IR/Padding2D.h b/compiler/loco/include/loco/IR/Padding2D.h
index 30557a891..b50a8045f 100644
--- a/compiler/loco/include/loco/IR/Padding2D.h
+++ b/compiler/loco/include/loco/IR/Padding2D.h
@@ -32,7 +32,7 @@ public:
 
 public:
   Padding2D(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
-      : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+    : _top{top}, _bottom{bottom}, _left{left}, _right{right}
   {
     // DO NOTHING
   }
diff --git a/compiler/loco/requires.cmake b/compiler/loco/requires.cmake
new file mode 100644
index 000000000..654db88c3
--- /dev/null
+++ b/compiler/loco/requires.cmake
@@ -0,0 +1 @@
+require("angkor")
diff --git a/compiler/loco/src/ADT/AnnotatedItem.test.cpp b/compiler/loco/src/ADT/AnnotatedItem.test.cpp
index 45ca87d75..87e597f5c 100644
--- a/compiler/loco/src/ADT/AnnotatedItem.test.cpp
+++ b/compiler/loco/src/ADT/AnnotatedItem.test.cpp
@@ -17,7 +17,8 @@
 #include "loco/ADT/AnnotatedItem.h"
 
 #include <gtest/gtest.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -31,7 +32,7 @@ template <int N> struct DerivedAnnotation final : public Annotation
 {
   static std::unique_ptr<DerivedAnnotation<N>> make(void)
   {
-    return stdex::make_unique<DerivedAnnotation<N>>();
+    return std::make_unique<DerivedAnnotation<N>>();
   }
 };
 
diff --git a/compiler/loco/src/IR/CanonicalDialect.cpp b/compiler/loco/src/IR/CanonicalDialect.cpp
index ea956b80e..9438956f8 100644
--- a/compiler/loco/src/IR/CanonicalDialect.cpp
+++ b/compiler/loco/src/IR/CanonicalDialect.cpp
@@ -18,8 +18,7 @@
 #include "loco/IR/Graph.h"
 #include "loco/IR/Nodes.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 #include <stdexcept>
 
@@ -55,7 +54,7 @@ namespace loco
 
 CanonicalDialect::CanonicalDialect()
 {
-  service<GraphOutputIndexQueryService>(stdex::make_unique<GraphOutputIndexQueryServiceImpl>());
+  service<GraphOutputIndexQueryService>(std::make_unique<GraphOutputIndexQueryServiceImpl>());
 }
 
 Dialect *CanonicalDialect::get(void)
diff --git a/compiler/loco/src/IR/Dialect.test.cpp b/compiler/loco/src/IR/Dialect.test.cpp
index 3af303375..447f443f2 100644
--- a/compiler/loco/src/IR/Dialect.test.cpp
+++ b/compiler/loco/src/IR/Dialect.test.cpp
@@ -16,7 +16,7 @@
 
 #include "loco/IR/Dialect.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -31,7 +31,7 @@ TEST(DialectTest, service)
 
   struct MockDialect final : public loco::Dialect
   {
-    MockDialect() { service<S1>(stdex::make_unique<S1>()); }
+    MockDialect() { service<S1>(std::make_unique<S1>()); }
   };
 
   MockDialect dialect;
diff --git a/compiler/loco/src/IR/Graph.cpp b/compiler/loco/src/IR/Graph.cpp
index 8073d4545..98b22c3b6 100644
--- a/compiler/loco/src/IR/Graph.cpp
+++ b/compiler/loco/src/IR/Graph.cpp
@@ -16,8 +16,7 @@
 
 #include "loco/IR/Graph.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace
@@ -25,7 +24,7 @@ namespace
 
 std::unique_ptr<loco::TensorShape> make_tensor_shape(std::initializer_list<loco::Dimension> dims)
 {
-  auto tensor_shape = stdex::make_unique<loco::TensorShape>();
+  auto tensor_shape = std::make_unique<loco::TensorShape>();
 
   tensor_shape->rank(dims.size());
   {
@@ -50,14 +49,11 @@ void Mixin<Trait::TensorShaped>::shape(std::initializer_list<Dimension> dims)
   shape(make_tensor_shape(dims));
 }
 
-GraphInput *Graph::InputContext::create(void)
-{
-  return take(stdex::make_unique<GraphInput>(size()));
-}
+GraphInput *Graph::InputContext::create(void) { return take(std::make_unique<GraphInput>(size())); }
 
 GraphOutput *Graph::OutputContext::create(void)
 {
-  return take(stdex::make_unique<GraphOutput>(size()));
+  return take(std::make_unique<GraphOutput>(size()));
 }
 
 std::set<loco::Node *> all_nodes(loco::Graph *g)
diff --git a/compiler/loco/src/IR/Graph.test.cpp b/compiler/loco/src/IR/Graph.test.cpp
index ad6894f30..99de8fc40 100644
--- a/compiler/loco/src/IR/Graph.test.cpp
+++ b/compiler/loco/src/IR/Graph.test.cpp
@@ -79,6 +79,18 @@ TEST(GraphTest, create_and_destroy_node)
   auto pull = g->nodes()->create<loco::Pull>();
 
   ASSERT_NO_THROW(g->nodes()->destroy(pull));
+}
+
+TEST(GraphTest, DISABLED_create_and_destroy_node_again)
+{
+  auto g = loco::make_graph();
+
+  auto pull = g->nodes()->create<loco::Pull>();
+
+  ASSERT_NO_THROW(g->nodes()->destroy(pull));
+  // NOTE calling destroy again raises Segmentation fault
+  //      refer https://github.com/Samsung/ONE/issues/9968
+  // TODO fix this
   ASSERT_THROW(g->nodes()->destroy(pull), std::invalid_argument);
 }
 
@@ -108,7 +120,7 @@ namespace
 {
 // temp node with multple params for ctor. loco::CanonicalOpcode::ReLU is used for simplicity
 class ParamCtorNode
-    : public loco::CanonicalNodeDef<loco::CanonicalOpcode::ReLU, loco::FixedArity<0>::Mixin>
+  : public loco::CanonicalNodeDef<loco::CanonicalOpcode::ReLU, loco::FixedArity<0>::Mixin>
 {
 public:
   ParamCtorNode(int i, float f)
@@ -139,7 +151,6 @@ TEST(GraphTest, consturctor_with_param_node)
   ASSERT_FLOAT_EQ(test_node->f(), 11.11);
 
   ASSERT_NO_THROW(g->nodes()->destroy(test_node));
-  ASSERT_THROW(g->nodes()->destroy(test_node), std::invalid_argument);
 }
 
 TEST(GraphTest, getters_over_const_instance)
diff --git a/compiler/loco/src/IR/Nodes.test.cpp b/compiler/loco/src/IR/Nodes.test.cpp
index 0b2210357..bd1c74253 100644
--- a/compiler/loco/src/IR/Nodes.test.cpp
+++ b/compiler/loco/src/IR/Nodes.test.cpp
@@ -523,11 +523,11 @@ TEST(TensorBroadcastTest, mapping)
 {
   loco::TensorBroadcast tensor_broadcast_node;
 
-  ASSERT_EQ(false, tensor_broadcast_node.mapping()->defined(0));
+  ASSERT_FALSE(tensor_broadcast_node.mapping()->defined(0));
 
   tensor_broadcast_node.mapping()->dim(0) = 3;
 
-  ASSERT_EQ(true, tensor_broadcast_node.mapping()->defined(0));
+  ASSERT_TRUE(tensor_broadcast_node.mapping()->defined(0));
   ASSERT_EQ(3, tensor_broadcast_node.mapping()->dim(0));
 }
 
diff --git a/compiler/loco/src/IR/PermutingCodec.cpp b/compiler/loco/src/IR/PermutingCodec.cpp
index 2857e5e28..e9fd1fb12 100644
--- a/compiler/loco/src/IR/PermutingCodec.cpp
+++ b/compiler/loco/src/IR/PermutingCodec.cpp
@@ -16,8 +16,7 @@
 
 #include "loco/IR/PermutingCodec.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 #include <set>
 #include <stdexcept>
@@ -139,7 +138,7 @@ TensorIndex PermutingEncoder<Domain::Feature>::value(const FeatureIndex &in) con
 
 std::unique_ptr<FeatureEncoder> PermutingEncoder<Domain::Feature>::clone(void) const
 {
-  return stdex::make_unique<PermutingEncoder<Domain::Feature>>(_perm);
+  return std::make_unique<PermutingEncoder<Domain::Feature>>(_perm);
 }
 
 bool PermutingEncoder<Domain::Feature>::valid(void) const { return ::valid(_perm); }
@@ -179,7 +178,7 @@ FeatureIndex PermutingDecoder<Domain::Feature>::value(const TensorIndex &in) con
 
 std::unique_ptr<FeatureDecoder> PermutingDecoder<Domain::Feature>::clone(void) const
 {
-  return stdex::make_unique<PermutingDecoder<Domain::Feature>>(_perm);
+  return std::make_unique<PermutingDecoder<Domain::Feature>>(_perm);
 }
 
 bool PermutingDecoder<Domain::Feature>::valid(void) const { return ::valid(_perm); }
diff --git a/compiler/loco/src/IR/Verifier.test.cpp b/compiler/loco/src/IR/Verifier.test.cpp
index 8c40a5058..8a92a35f0 100644
--- a/compiler/loco/src/IR/Verifier.test.cpp
+++ b/compiler/loco/src/IR/Verifier.test.cpp
@@ -18,10 +18,10 @@
 
 #include <gtest/gtest.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 #include <vector>
 
-using stdex::make_unique;
+using std::make_unique;
 
 TEST(VerifierTest, valid_minimal)
 {
diff --git a/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp b/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp
index 6d5adc525..a0f0e892a 100644
--- a/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp
+++ b/compiler/loco/src/Service/CanonicalShapeInferenceRule.cpp
@@ -674,7 +674,7 @@ public:
     for (uint32_t axis = 0; axis < out_shape.rank(); ++axis)
     {
       out_shape.dim(axis) =
-          tensor_shape.dim(axis).value() + padding->front(axis) + padding->back(axis);
+        tensor_shape.dim(axis).value() + padding->front(axis) + padding->back(axis);
     }
 
     return loco::NodeShape{out_shape};
diff --git a/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp b/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp
index e88872b5d..0e0dec1a5 100644
--- a/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp
+++ b/compiler/loco/src/Service/CanonicalShapeInferenceRule.test.cpp
@@ -122,7 +122,7 @@ TEST(CanonicalShapeInferenceRuleTest, avgpool2d)
 
   testcase.pull_node->shape({1, 8, 4, 3});
 
-  testcase.encode_node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+  testcase.encode_node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
 
   testcase.avgpool2d_node->window()->vertical(2);
   testcase.avgpool2d_node->window()->horizontal(2);
@@ -130,7 +130,7 @@ TEST(CanonicalShapeInferenceRuleTest, avgpool2d)
   testcase.avgpool2d_node->stride()->vertical(2);
   testcase.avgpool2d_node->stride()->horizontal(2);
 
-  testcase.decode_node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+  testcase.decode_node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
 
   // Run Inference
   loco::CanonicalShapeInferenceRule rule;
@@ -224,7 +224,7 @@ TEST(CanonicalShapeInferenceRuleTest, maxpool2d)
 
   testcase.pull_node->shape({1, 8, 4, 3});
 
-  testcase.encode_node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+  testcase.encode_node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
 
   testcase.maxpool2d_node->window()->vertical(2);
   testcase.maxpool2d_node->window()->horizontal(2);
@@ -232,7 +232,7 @@ TEST(CanonicalShapeInferenceRuleTest, maxpool2d)
   testcase.maxpool2d_node->stride()->vertical(2);
   testcase.maxpool2d_node->stride()->horizontal(2);
 
-  testcase.decode_node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+  testcase.decode_node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
 
   // Run Inference
   loco::CanonicalShapeInferenceRule rule;
diff --git a/compiler/loco/src/Service/GraphBuilder.h b/compiler/loco/src/Service/GraphBuilder.h
index 71084673c..74eed2af8 100644
--- a/compiler/loco/src/Service/GraphBuilder.h
+++ b/compiler/loco/src/Service/GraphBuilder.h
@@ -20,10 +20,8 @@
 // loco-internal headers
 #include "loco/IR/Graph.h"
 
-// repo-internal headers
-#include <stdex/Memory.h>
-
 // C++ standard headers
+#include <memory>
 #include <stack>
 
 //
@@ -90,7 +88,7 @@ public:
   // "Layer" is in theory a subgraph builder.
   template <typename Layer, typename... Args>
   auto push(Args &&... args)
-      -> decltype(static_cast<Layer *>(nullptr)->operator()(static_cast<Context *>(nullptr)))
+    -> decltype(static_cast<Layer *>(nullptr)->operator()(static_cast<Context *>(nullptr)))
   {
     Layer layer{std::forward<Args>(args)...};
     return layer(ctx());
@@ -108,7 +106,7 @@ private:
 
 static inline std::unique_ptr<GraphBuilder> make_graph_builder(loco::Graph *g)
 {
-  return stdex::make_unique<GraphBuilder>(g);
+  return std::make_unique<GraphBuilder>(g);
 }
 
 // "InputLayer" creates both GraphInput and Pull node at once
@@ -159,7 +157,7 @@ struct InputLayer final
 
     ctx->stack()->push(pull_node);
 
-    return stdex::make_unique<Return>(graph_input, pull_node);
+    return std::make_unique<Return>(graph_input, pull_node);
   }
 };
 
@@ -205,7 +203,7 @@ struct OutputLayer final
 
     ctx->stack()->push(push_node);
 
-    return stdex::make_unique<Return>(graph_output, push_node);
+    return std::make_unique<Return>(graph_output, push_node);
   }
 };
 
@@ -236,7 +234,7 @@ struct ReLULayer final
 
     ctx->stack()->push(relu_node);
 
-    return stdex::make_unique<Return>(relu_node);
+    return std::make_unique<Return>(relu_node);
   }
 };
 
@@ -263,7 +261,7 @@ struct ConstGenLayer final
 
     ctx->stack()->push(const_node);
 
-    return stdex::make_unique<Return>(const_node);
+    return std::make_unique<Return>(const_node);
   }
 };
 
@@ -283,7 +281,7 @@ struct FeatureEncodeLayer final
     Return *perm(const loco::Permutation<loco::Domain::Feature> &perm)
     {
       using namespace loco;
-      _node->encoder(stdex::make_unique<PermutingEncoder<Domain::Feature>>(perm));
+      _node->encoder(std::make_unique<PermutingEncoder<Domain::Feature>>(perm));
       return this;
     }
 
@@ -302,7 +300,7 @@ struct FeatureEncodeLayer final
 
     ctx->stack()->push(encode_node);
 
-    return stdex::make_unique<Return>(encode_node);
+    return std::make_unique<Return>(encode_node);
   }
 };
 
@@ -320,7 +318,7 @@ struct FeatureDecodeLayer final
     Return *perm(const loco::Permutation<loco::Domain::Feature> &perm)
     {
       using namespace loco;
-      _node->decoder(stdex::make_unique<PermutingDecoder<Domain::Feature>>(perm));
+      _node->decoder(std::make_unique<PermutingDecoder<Domain::Feature>>(perm));
       return this;
     }
 
@@ -341,7 +339,7 @@ struct FeatureDecodeLayer final
 
     ctx->stack()->push(decode_node);
 
-    return stdex::make_unique<Return>(decode_node);
+    return std::make_unique<Return>(decode_node);
   }
 };
 
@@ -358,7 +356,7 @@ struct FilterEncodeLayer final
   public:
     Return *perm(const loco::Permutation<loco::Domain::Filter> &perm)
     {
-      auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+      auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
       encoder->perm(perm);
       _node->encoder(std::move(encoder));
       return this;
@@ -379,7 +377,7 @@ struct FilterEncodeLayer final
 
     ctx->stack()->push(encode_node);
 
-    return stdex::make_unique<Return>(encode_node);
+    return std::make_unique<Return>(encode_node);
   }
 };
 
@@ -397,7 +395,7 @@ struct DepthwiseFilterEncodeLayer final
     Return *perm(const loco::Permutation<loco::Domain::DepthwiseFilter> &perm)
     {
       using namespace loco;
-      _node->encoder(stdex::make_unique<PermutingEncoder<Domain::DepthwiseFilter>>(perm));
+      _node->encoder(std::make_unique<PermutingEncoder<Domain::DepthwiseFilter>>(perm));
       return this;
     }
 
@@ -416,7 +414,7 @@ struct DepthwiseFilterEncodeLayer final
 
     ctx->stack()->push(encode_node);
 
-    return stdex::make_unique<Return>(encode_node);
+    return std::make_unique<Return>(encode_node);
   }
 };
 
@@ -446,7 +444,7 @@ struct DepthwiseConv2DLayer final
 
     ctx->stack()->push(depthwiseconv2d_node);
 
-    return stdex::make_unique<Return>(depthwiseconv2d_node);
+    return std::make_unique<Return>(depthwiseconv2d_node);
   }
 };
 
@@ -476,7 +474,7 @@ struct TransposedConv2DLayer final
 
     ctx->stack()->push(tr_conv2d_node);
 
-    return stdex::make_unique<Return>(tr_conv2d_node);
+    return std::make_unique<Return>(tr_conv2d_node);
   }
 };
 
@@ -512,7 +510,7 @@ struct FixedReshapeLayer final
 
     ctx->stack()->push(reshape_node);
 
-    return stdex::make_unique<Return>(reshape_node);
+    return std::make_unique<Return>(reshape_node);
   }
 };
 
@@ -540,7 +538,7 @@ struct TensorBroadcastLayer final
     broadcast_node->input(ctx->stack()->pop());
     ctx->stack()->push(broadcast_node);
 
-    return stdex::make_unique<Return>(broadcast_node);
+    return std::make_unique<Return>(broadcast_node);
   }
 };
 
diff --git a/compiler/loco/src/Service/GraphTestcase.h b/compiler/loco/src/Service/GraphTestcase.h
index 27b011f8d..06801e0aa 100644
--- a/compiler/loco/src/Service/GraphTestcase.h
+++ b/compiler/loco/src/Service/GraphTestcase.h
@@ -22,8 +22,6 @@
 
 #include "GraphBuilder.h"
 
-#include <stdex/Memory.h>
-
 enum class GraphCode
 {
   Identity,
@@ -278,7 +276,7 @@ public:
     const_node = graph_builder->push<ConstGenLayer>()->node();
 
     filter_encode_node =
-        graph_builder->push<DepthwiseFilterEncodeLayer>()->perm(filter_perm)->node();
+      graph_builder->push<DepthwiseFilterEncodeLayer>()->perm(filter_perm)->node();
 
     depthwiseconv2d_node = graph_builder->push<DepthwiseConv2DLayer>()->node();
 
diff --git a/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp b/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp
index 3d5a11ae4..7be41f7ee 100644
--- a/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp
+++ b/compiler/loco/src/Service/MultiDialectShapeInferenceRule.test.cpp
@@ -112,8 +112,8 @@ TEST(MultiDialectShapeInferenceRuleTest, test1)
   loco::MultiDialectShapeInferenceRule rules;
 
   rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(TestDialect<2, 3>::get(), &t23_rule)
-      .bind(TestDialect<4, 5>::get(), &t45_rule);
+    .bind(TestDialect<2, 3>::get(), &t23_rule)
+    .bind(TestDialect<4, 5>::get(), &t45_rule);
 
   loco::apply(&rules).to(g.get());
 
diff --git a/compiler/loco/src/Service/ShapeInference.cpp b/compiler/loco/src/Service/ShapeInference.cpp
index 84eb10963..d177a4869 100644
--- a/compiler/loco/src/Service/ShapeInference.cpp
+++ b/compiler/loco/src/Service/ShapeInference.cpp
@@ -18,8 +18,7 @@
 #include "loco/IR/Algorithm.h"
 
 #include <cassert>
-
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace
 {
@@ -82,7 +81,7 @@ bool ShapeInferenceSession::to(Graph *g) const
       {
         if (_rule->infer(node, shape))
         {
-          node->annot(stdex::make_unique<ShapeAnnotation>(shape));
+          node->annot(std::make_unique<ShapeAnnotation>(shape));
           changed = true;
         }
       }
diff --git a/compiler/loco/src/Service/TypeInference.cpp b/compiler/loco/src/Service/TypeInference.cpp
index 27d7d9a29..df038efe7 100644
--- a/compiler/loco/src/Service/TypeInference.cpp
+++ b/compiler/loco/src/Service/TypeInference.cpp
@@ -19,8 +19,7 @@
 #include "loco/IR/Algorithm.h"
 
 #include <cassert>
-
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace
 {
@@ -73,7 +72,7 @@ bool TypeInferenceSession::to(Graph *g) const
       {
         if (_rule->infer(node, dtype))
         {
-          node->annot(stdex::make_unique<DataTypeAnnotation>(dtype));
+          node->annot(std::make_unique<DataTypeAnnotation>(dtype));
           changed = true;
         }
       }
diff --git a/compiler/loco/src/Service/TypeInference.test.cpp b/compiler/loco/src/Service/TypeInference.test.cpp
index 13bcfa52b..0d2cc8864 100644
--- a/compiler/loco/src/Service/TypeInference.test.cpp
+++ b/compiler/loco/src/Service/TypeInference.test.cpp
@@ -268,8 +268,8 @@ TEST(MultiDialectTypeInferenceRuleTest, test1)
   loco::MultiDialectTypeInferenceRule rules;
 
   rules.bind(TestDialect<loco::DataType::S8>::get(), &s8_rule)
-      .bind(TestDialect<loco::DataType::U8>::get(), &u8_rule)
-      .bind(loco::CanonicalDialect::get(), &canon_rule);
+    .bind(TestDialect<loco::DataType::U8>::get(), &u8_rule)
+    .bind(loco::CanonicalDialect::get(), &canon_rule);
 
   loco::apply(&rules).to(g.get());
 
diff --git a/compiler/loco/src/tensorflow.test.cpp b/compiler/loco/src/tensorflow.test.cpp
index f534aee7b..d905429f5 100644
--- a/compiler/loco/src/tensorflow.test.cpp
+++ b/compiler/loco/src/tensorflow.test.cpp
@@ -23,9 +23,9 @@
 
 #include <gtest/gtest.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 
 namespace
 {
@@ -65,7 +65,7 @@ loco::Permutation<loco::Domain::Filter> make_HWIO_permutation(void)
   return HWIO;
 }
 
-} // nemaspace
+} // namespace
 
 #if 0
 >>> MaxPool_Float_000 testcase
diff --git a/compiler/locoex-customop/CMakeLists.txt b/compiler/locoex-customop/CMakeLists.txt
index df1e01526..12356c81b 100644
--- a/compiler/locoex-customop/CMakeLists.txt
+++ b/compiler/locoex-customop/CMakeLists.txt
@@ -5,7 +5,7 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 add_library(locoex_customop SHARED ${SOURCES})
 target_include_directories(locoex_customop PUBLIC include)
 target_link_libraries(locoex_customop PUBLIC loco)
-target_link_libraries(locoex_customop PRIVATE stdex locop pepper_str)
+target_link_libraries(locoex_customop PRIVATE locop pepper_str)
 install(TARGETS locoex_customop DESTINATION lib)
 
 if(NOT ENABLE_TEST)
@@ -15,4 +15,4 @@ endif(NOT ENABLE_TEST)
 nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(locoex_customop_test ${TESTS})
-target_link_libraries(locoex_customop_test loco locoex_customop stdex)
+target_link_libraries(locoex_customop_test loco locoex_customop)
diff --git a/compiler/locoex-customop/requires.cmake b/compiler/locoex-customop/requires.cmake
index 9127144f2..c4240bc09 100644
--- a/compiler/locoex-customop/requires.cmake
+++ b/compiler/locoex-customop/requires.cmake
@@ -1,4 +1,3 @@
 require("loco")
-require("stdex")
 require("locop")
 require("pepper-str")
diff --git a/compiler/locoex-customop/src/COpCall.cpp b/compiler/locoex-customop/src/COpCall.cpp
index 029914758..e86ad5c5b 100644
--- a/compiler/locoex-customop/src/COpCall.cpp
+++ b/compiler/locoex-customop/src/COpCall.cpp
@@ -57,7 +57,7 @@ std::vector<std::string> COpCall::attr_names() const
 
 #define INSTANTIATE(AT)                                                                            \
   template const typename AttrTypeTrait<AT>::Type *COpCall::attr<AT>(const std::string &attr_name) \
-      const;
+    const;
 
 INSTANTIATE(COpAttrType::Float)
 INSTANTIATE(COpAttrType::Int)
diff --git a/compiler/locoex-customop/src/COpCall.test.cpp b/compiler/locoex-customop/src/COpCall.test.cpp
index d5f01d22d..7bc4186e5 100644
--- a/compiler/locoex-customop/src/COpCall.test.cpp
+++ b/compiler/locoex-customop/src/COpCall.test.cpp
@@ -20,7 +20,7 @@
 #include <loco/IR/Graph.h>
 #include <loco/IR/Nodes.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -51,8 +51,8 @@ TEST(CallTest, Test_01)
     custom->input(0, inp);
     custom->input(1, inp);
 
-    custom->attr(int_attr, stdex::make_unique<COpAttrInt>(int_val));
-    custom->attr(float_attr, stdex::make_unique<COpAttrFloat>(float_val));
+    custom->attr(int_attr, std::make_unique<COpAttrInt>(int_val));
+    custom->attr(float_attr, std::make_unique<COpAttrFloat>(float_val));
   }
 
   // access custom op input
diff --git a/compiler/locoex-customop/src/VariadicArityNode.test.cpp b/compiler/locoex-customop/src/VariadicArityNode.test.cpp
index a618824e5..86a9de5cd 100644
--- a/compiler/locoex-customop/src/VariadicArityNode.test.cpp
+++ b/compiler/locoex-customop/src/VariadicArityNode.test.cpp
@@ -47,7 +47,7 @@ class BinaryInputNode : public TestNode
 public:
   BinaryInputNode() : TestNode(2) {}
 };
-}
+} // namespace
 
 TEST(CustomOpTest, VariadicArityNode_arity_0)
 {
diff --git a/compiler/locomotiv/CMakeLists.txt b/compiler/locomotiv/CMakeLists.txt
index 5c0156b78..34835e483 100644
--- a/compiler/locomotiv/CMakeLists.txt
+++ b/compiler/locomotiv/CMakeLists.txt
@@ -3,12 +3,13 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(locomotiv STATIC ${SOURCES})
-set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif (NOT NNCC_LIBRARY_NO_PIC)
 target_include_directories(locomotiv PUBLIC include)
 target_include_directories(locomotiv PRIVATE src)
 target_link_libraries(locomotiv PUBLIC loco)
 target_link_libraries(locomotiv PUBLIC angkor)
-target_link_libraries(locomotiv PRIVATE stdex)
 # Let's apply nncc common compile options
 #
 # NOTE This will enable strict compilation (warnings as error).
diff --git a/compiler/locomotiv/include/locomotiv/Session.h b/compiler/locomotiv/include/locomotiv/Session.h
index 3268d60b3..85c26c09c 100644
--- a/compiler/locomotiv/include/locomotiv/Session.h
+++ b/compiler/locomotiv/include/locomotiv/Session.h
@@ -51,7 +51,7 @@ public:
    * @warn  This approach may fail in case of graph with control flow
    */
   Session(loco::Graph *g, const std::vector<loco::Node *> &custom_outputs)
-      : _graph(g), _outputs(custom_outputs)
+    : _graph(g), _outputs(custom_outputs)
   {
     // DO NOTHING
   }
diff --git a/compiler/locomotiv/requires.cmake b/compiler/locomotiv/requires.cmake
index 1c09aa13d..654db88c3 100644
--- a/compiler/locomotiv/requires.cmake
+++ b/compiler/locomotiv/requires.cmake
@@ -1,2 +1 @@
 require("angkor")
-require("stdex")
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.cpp b/compiler/locomotiv/src/Node/AvgPool2D.cpp
index 5fdf1e725..0adabd49a 100644
--- a/compiler/locomotiv/src/Node/AvgPool2D.cpp
+++ b/compiler/locomotiv/src/Node/AvgPool2D.cpp
@@ -78,9 +78,9 @@ nncc::core::ADT::tensor::Buffer<T> avgPool2D(const loco::AvgPool2D *avgpool2d,
   const uint32_t pad_right = avgpool2d->pad()->right();
 
   const uint32_t output_height =
-      compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+    compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
   const uint32_t output_width =
-      compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+    compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
 
   // prepare output buffer
   Shape output_shape{batches, output_height, output_width, depth};
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.test.cpp b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
index f9863b47d..ec5f3cd82 100644
--- a/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
+++ b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
@@ -84,7 +84,7 @@ void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shap
   ASSERT_TRUE(*(avgpool2d_data->shape()) == ofm_shape);
 
   auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
   for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
   {
     const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp
index b84fa7e3c..0c45cc12f 100644
--- a/compiler/locomotiv/src/Node/BiasAdd.cpp
+++ b/compiler/locomotiv/src/Node/BiasAdd.cpp
@@ -55,7 +55,7 @@ void execute_node(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
 
   validate(input_data && bias_data, "Input not ready");
   validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Tensor &&
-               locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+             locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
            "Wrong input domain");
 
   std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, bias_add->axis());
@@ -74,7 +74,7 @@ void execute_node(loco::BiasAdd<loco::Domain::Feature> *bias_add)
 
   validate(input_data && bias_data, "Input not ready");
   validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Feature &&
-               locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+             locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
            "Wrong input domain");
 
   std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, 3);
diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp
index cdf0dfd56..2f9ca5a7e 100644
--- a/compiler/locomotiv/src/Node/Conv2D.cpp
+++ b/compiler/locomotiv/src/Node/Conv2D.cpp
@@ -82,9 +82,9 @@ Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input
   const uint32_t pad_right = conv2d->pad()->right();
 
   const uint32_t output_height =
-      compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height);
+    compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height);
   const uint32_t output_width =
-      compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width);
+    compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width);
 
   const uint32_t batches = input_shape.dim(0);
   const uint32_t input_depth = input_shape.dim(3);
@@ -121,9 +121,9 @@ Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input
                     ((unsigned)in_y < input_height))
                 {
                   auto input_value =
-                      input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel}));
+                    input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel}));
                   auto filter_value =
-                      filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+                    filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
                   total += (input_value * filter_value);
                 }
               }
diff --git a/compiler/locomotiv/src/Node/Conv2D.test.cpp b/compiler/locomotiv/src/Node/Conv2D.test.cpp
index 66e947acc..93afa79b7 100644
--- a/compiler/locomotiv/src/Node/Conv2D.test.cpp
+++ b/compiler/locomotiv/src/Node/Conv2D.test.cpp
@@ -97,7 +97,7 @@ void run_test(const float *ifm, const float *ker, const float *expected_ofm, con
   ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
 
   auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
   for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
   {
     const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
index f39cd177e..a1a8e506f 100644
--- a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
@@ -89,9 +89,9 @@ Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffe
   const uint32_t pad_right = dw_conv2d->pad()->right();
 
   const uint32_t ofm_height =
-      compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height);
+    compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height);
   const uint32_t ofm_width =
-      compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width);
+    compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width);
 
   const uint32_t batches = ifm_shape.dim(0);
   const uint32_t ifm_depth = ifm_shape.dim(3);
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
index 1ff333be0..8a435b6ab 100644
--- a/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
@@ -97,7 +97,7 @@ void run_test(const float *ifm, const float *ker, const float *expected_ofm, con
   ASSERT_TRUE(*(dw_conv2d_result->shape()) == ofm_shape);
 
   auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
   for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
   {
     const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
index 03f5bf833..e161287ea 100644
--- a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
@@ -59,8 +59,8 @@ std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilte
 
   // Make HWCM (i.e. height, width, depth, multiplier) buffer from DepthwiseFilterShape
   Buffer<T> node_buf = make_buffer<T, LexicalLayout>(
-      Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(),
-            node_shape.multiplier().value()});
+    Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(),
+          node_shape.multiplier().value()});
 
   // Copy buffer in an order arranged by encoder
   for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
index 5b2ec9326..44364723c 100644
--- a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
@@ -62,7 +62,7 @@ TEST(NodeExecution_DepthwiseFilterEncode, f32)
 
   // Encoder to correctly read input tensor as MHWC
   auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>(
-      new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>);
+    new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>);
   encoder->perm()->axis(loco::DepthwiseFilterAxis::Multiplier) = 0;
   encoder->perm()->axis(loco::DepthwiseFilterAxis::Height) = 1;
   encoder->perm()->axis(loco::DepthwiseFilterAxis::Width) = 2;
diff --git a/compiler/locomotiv/src/Node/FeatureCodec.test.cpp b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
index 1b6b06c13..dacd0170c 100644
--- a/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
+++ b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
@@ -64,7 +64,7 @@ protected:
                                             const loco::Permutation<loco::Domain::Feature> &perm)
   {
     auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Feature>>(
-        new loco::PermutingEncoder<loco::Domain::Feature>);
+      new loco::PermutingEncoder<loco::Domain::Feature>);
 
     encoder->perm(perm);
 
@@ -80,7 +80,7 @@ protected:
                                             const loco::Permutation<loco::Domain::Feature> &perm)
   {
     auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Feature>>(
-        new loco::PermutingDecoder<loco::Domain::Feature>);
+      new loco::PermutingDecoder<loco::Domain::Feature>);
 
     decoder->perm(perm);
 
diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp
index 8776e1b42..2877906f9 100644
--- a/compiler/locomotiv/src/Node/FeatureDecode.cpp
+++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp
@@ -54,8 +54,8 @@ std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *n
 
   // Make tensor buffer from TensorShape
   Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(),
-                                          node_shape.dim(2).value(), node_shape.dim(3).value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(),
+                                        node_shape.dim(2).value(), node_shape.dim(3).value()});
 
   // Copy buffer in an order arranged by decoder
   for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/FeatureEncode.cpp b/compiler/locomotiv/src/Node/FeatureEncode.cpp
index 406de76ff..c3570b981 100644
--- a/compiler/locomotiv/src/Node/FeatureEncode.cpp
+++ b/compiler/locomotiv/src/Node/FeatureEncode.cpp
@@ -54,8 +54,8 @@ std::unique_ptr<locomotiv::NodeData> feature_encode(const loco::FeatureEncode *n
 
   // Make NHWC buffer from FeatureShape
   Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
-                                          node_shape.width().value(), node_shape.depth().value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+                                        node_shape.width().value(), node_shape.depth().value()});
 
   // Copy buffer in an order arranged by encoder
   for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp
index 0e2ac918f..84ba681ba 100644
--- a/compiler/locomotiv/src/Node/FilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/FilterEncode.cpp
@@ -54,8 +54,8 @@ std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *nod
 
   // Make NHWC buffer from FilterShape
   Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
-                                          node_shape.width().value(), node_shape.depth().value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+                                        node_shape.width().value(), node_shape.depth().value()});
 
   // Copy buffer in an order arranged by encoder
   for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/FilterEncode.test.cpp b/compiler/locomotiv/src/Node/FilterEncode.test.cpp
index dcca94993..80d108ece 100644
--- a/compiler/locomotiv/src/Node/FilterEncode.test.cpp
+++ b/compiler/locomotiv/src/Node/FilterEncode.test.cpp
@@ -62,7 +62,7 @@ TEST(NodeExecution_FilterEncode, s32)
 
   // Encoder to correctly read input tensor as NCHW
   auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
-      new loco::PermutingEncoder<loco::Domain::Filter>);
+    new loco::PermutingEncoder<loco::Domain::Filter>);
   encoder->perm()->axis(loco::FilterAxis::Count) = 0;
   encoder->perm()->axis(loco::FilterAxis::Depth) = 1;
   encoder->perm()->axis(loco::FilterAxis::Height) = 2;
@@ -116,7 +116,7 @@ TEST(NodeExecution_FilterEncode, f32)
 
   // Encoder to correctly read input tensor as CHNW
   auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
-      new loco::PermutingEncoder<loco::Domain::Filter>);
+    new loco::PermutingEncoder<loco::Domain::Filter>);
   encoder->perm()->axis(loco::FilterAxis::Depth) = 0;
   encoder->perm()->axis(loco::FilterAxis::Height) = 1;
   encoder->perm()->axis(loco::FilterAxis::Count) = 2;
diff --git a/compiler/locomotiv/src/Node/MatrixCodec.test.cpp b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
index da4afeded..7f684e41f 100644
--- a/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
+++ b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
@@ -64,7 +64,7 @@ protected:
                                           const loco::Permutation<loco::Domain::Matrix> &perm)
   {
     auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Matrix>>(
-        new loco::PermutingEncoder<loco::Domain::Matrix>);
+      new loco::PermutingEncoder<loco::Domain::Matrix>);
 
     encoder->perm(perm);
 
@@ -80,7 +80,7 @@ protected:
                                           const loco::Permutation<loco::Domain::Matrix> &perm)
   {
     auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Matrix>>(
-        new loco::PermutingDecoder<loco::Domain::Matrix>);
+      new loco::PermutingDecoder<loco::Domain::Matrix>);
 
     decoder->perm(perm);
 
diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp
index 0310015f1..2a65a7b74 100644
--- a/compiler/locomotiv/src/Node/MatrixDecode.cpp
+++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp
@@ -52,7 +52,7 @@ std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *nod
 
   // Make tensor buffer from TensorShape
   Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()});
 
   // Copy buffer in an order arranged by decoder
   for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/MatrixEncode.cpp b/compiler/locomotiv/src/Node/MatrixEncode.cpp
index e3554e15a..ac51e4256 100644
--- a/compiler/locomotiv/src/Node/MatrixEncode.cpp
+++ b/compiler/locomotiv/src/Node/MatrixEncode.cpp
@@ -54,7 +54,7 @@ std::unique_ptr<locomotiv::NodeData> matrix_encode(const loco::MatrixEncode *nod
 
   // Make HW buffer from MatrixShape
   Buffer<T> node_buf =
-      make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()});
+    make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()});
 
   // Copy buffer in an order arranged by encoder
   for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp
index 8dce1cb1e..dc626387b 100644
--- a/compiler/locomotiv/src/Node/MaxPool2D.cpp
+++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp
@@ -79,9 +79,9 @@ nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d,
   const uint32_t pad_right = maxpool2d->pad()->right();
 
   const uint32_t output_height =
-      compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+    compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
   const uint32_t output_width =
-      compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+    compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
 
   // prepare output buffer
   Shape output_shape{batches, output_height, output_width, depth};
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.test.cpp b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
index 5046d4a6e..d00282dd7 100644
--- a/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
+++ b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
@@ -82,7 +82,7 @@ void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shap
   ASSERT_TRUE(*(maxpool2d_data->shape()) == ofm_shape);
 
   auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
   for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
   {
     const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/Node/Pull.cpp b/compiler/locomotiv/src/Node/Pull.cpp
index fe5d7c2e1..e60c646bd 100644
--- a/compiler/locomotiv/src/Node/Pull.cpp
+++ b/compiler/locomotiv/src/Node/Pull.cpp
@@ -31,15 +31,6 @@ using namespace locomotiv;
 
 void execute_node(loco::Pull *pull)
 {
-// TODO Remove deprecated code
-#if 0
-  validate(annot_data(pull), "Data for Pull is not ready");
-
-  validate(annot_domain(pull) == loco::Domain::Tensor, "Domain for Pull is not Tensor");
-
-  // DO NOTHING
-#endif
-
   auto input_data = user_data(pull);
 
   validate(input_data, "Input not ready");
diff --git a/compiler/locomotiv/src/Node/Pull.test.cpp b/compiler/locomotiv/src/Node/Pull.test.cpp
index 53e78776b..ec58c48f1 100644
--- a/compiler/locomotiv/src/Node/Pull.test.cpp
+++ b/compiler/locomotiv/src/Node/Pull.test.cpp
@@ -45,16 +45,8 @@ TEST(NodeExecution_Pull, check_data_ready)
   auto pull_data = locomotiv::make_data(pull_buf);
   locomotiv::user_data(pull, std::move(pull_data));
 
-// The behavior of Pull is now consistent with that of other nodes.
-// -  annot_data and annot_domain is available after evaluating that "pull" node.
-// TODO Remove this
-#if 0
-  // Domain not ready yet
-  ASSERT_ANY_THROW(locomotiv::NodeExecution::get().run(pull));
-
-  // Set Domain
-  locomotiv::annot_domain(pull, loco::Domain::Tensor);
-#endif
+  // The behavior of Pull is now consistent with that of other nodes.
+  // -  annot_data and annot_domain is available after evaluating that "pull" node.
 
   // Valid run
   ASSERT_NO_THROW(locomotiv::NodeExecution::get().run(pull));
diff --git a/compiler/locomotiv/src/Node/ReLU6.cpp b/compiler/locomotiv/src/Node/ReLU6.cpp
index 586c015fc..d237fd923 100644
--- a/compiler/locomotiv/src/Node/ReLU6.cpp
+++ b/compiler/locomotiv/src/Node/ReLU6.cpp
@@ -16,25 +16,6 @@
 
 #include "NodeExecution.h"
 
-// TODO Remove deprecated code
-#if 0
-#include "NodeDataImpl.h"
-#include "NodeDomain.h"
-#include "Validation.h"
-
-#include <nncc/core/ADT/tensor/Shape.h>
-#include <nncc/core/ADT/tensor/Buffer.h>
-#include <nncc/core/ADT/tensor/IndexEnumerator.h>
-#include <nncc/core/ADT/tensor/LexicalLayout.h>
-
-using nncc::core::ADT::tensor::IndexEnumerator;
-using nncc::core::ADT::tensor::LexicalLayout;
-using nncc::core::ADT::tensor::make_buffer;
-
-#include <cassert>
-#include <stdexcept>
-#endif
-
 namespace
 {
 
@@ -47,42 +28,6 @@ namespace locomotiv
 
 void NodeExecution::execute(loco::ReLU6 *relu6)
 {
-// TODO Remove deprecated code
-#if 0
-  auto input_data = annot_data(relu6->input());
-
-  validate(input_data, "Input not ready");
-  validate(annot_domain(relu6->input()) != loco::Domain::Unknown,
-           "Input domain of ReLU is Unknown");
-
-  std::unique_ptr<NodeData> relu6_data = nullptr;
-
-  switch (input_data->dtype())
-  {
-    case loco::DataType::FLOAT32:
-    {
-      auto input_bufptr = input_data->as_f32_bufptr();
-      auto *shape = input_data->shape();
-      auto relu6_buf = make_buffer<float, LexicalLayout>(*shape);
-
-      for (IndexEnumerator e{*shape}; e.valid(); e.advance())
-      {
-        const auto &index = e.current();
-        relu6_buf.at(index) = relu6_ew(input_bufptr->at(index));
-      }
-
-      relu6_data = make_data(relu6_buf);
-      break;
-    }
-    default:
-      throw std::runtime_error("NYI for this DataType");
-  }
-
-  assert(relu6_data != nullptr);
-  annot_data(relu6, std::move(relu6_data));
-  annot_domain(relu6, annot_domain(relu6->input()));
-#endif
-
   struct Func final : public UnaryFunc
   {
     float apply(float v) const final { return relu6_ew(v); }
diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
index 38e5a7aa9..682237f16 100644
--- a/compiler/locomotiv/src/Node/TensorBroadcast.cpp
+++ b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
@@ -42,6 +42,10 @@ using namespace locomotiv;
 void execute_node(loco::TensorBroadcast *tensor_broadcast)
 {
   auto input_data = annot_data(tensor_broadcast->input());
+  if (input_data == nullptr)
+  {
+    throw std::runtime_error("Annotation is required for TensorBroadcast input");
+  }
 
   // Calculate output shape
   Shape input_shape = *(input_data->shape());
diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp
index 188bb635b..84da3a3e5 100644
--- a/compiler/locomotiv/src/Node/TensorConcat.cpp
+++ b/compiler/locomotiv/src/Node/TensorConcat.cpp
@@ -52,7 +52,7 @@ void execute_node(loco::TensorConcat *tensor_concat)
   validate(lhs_data->dtype() == rhs_data->dtype(), "lhs and rhs of Concat should have same dtype");
 
   validate(annot_domain(tensor_concat->lhs()) == loco::Domain::Tensor &&
-               annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor,
+             annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor,
            "Some ingredients of TensorConcat is not Tensor");
 
   // Calculate output shape
diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
index 5d4ad5d24..fb071280f 100644
--- a/compiler/locomotiv/src/Node/TensorConstantPad.cpp
+++ b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
@@ -53,6 +53,7 @@ void execute_node(loco::TensorConstantPad *pad)
 
   auto constant_node = pad->constant();
   auto constant_data = annot_data(constant_node);
+  validate(constant_data != nullptr, "constant is not found");
   validate(constant_data->dtype() == input_data->dtype(), "constant and input have same data type");
   validate(constant_data->shape()->rank() == 1 && constant_data->shape()->dim(0) == 1,
            "constant should have one rank with one dimension at zero axis");
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
index bec15a5df..2f3c3d089 100644
--- a/compiler/locomotiv/src/Node/TransposedConv2D.cpp
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
@@ -65,7 +65,7 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
   locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4");
   locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4");
   locomotiv::validate(input_shape.dim(3) /* depth of input */ ==
-                          filter_shape.dim(3) /* depth of filter */,
+                        filter_shape.dim(3) /* depth of filter */,
                       "channel value mismatch");
 
   const uint32_t input_height = input_shape.dim(1);
@@ -86,9 +86,9 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
   // TODO Support dilations
 
   const uint32_t output_height =
-      compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
+    compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
   const uint32_t output_width =
-      compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
+    compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
 
   const uint32_t batches = input_shape.dim(0);
   const uint32_t input_depth = input_shape.dim(3);
@@ -131,9 +131,9 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
                 {
                   auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel}));
                   auto filter_value =
-                      filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+                    filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
                   output_buf.at(Index({batch, (unsigned)out_y, (unsigned)out_x, out_channel})) +=
-                      input_value * filter_value;
+                    input_value * filter_value;
                 }
               }
             }
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
index ef759f51b..a516ef9f2 100644
--- a/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
@@ -97,7 +97,7 @@ void run_test(const float *ifm, const float *ker, const float *expected_ofm, con
   ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
 
   auto ofm_overlay =
-      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+    make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
   for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
   {
     const auto &ind = e.current();
diff --git a/compiler/locomotiv/src/NodeDataImpl.cpp b/compiler/locomotiv/src/NodeDataImpl.cpp
index 2efebe5a9..9373b8dd2 100644
--- a/compiler/locomotiv/src/NodeDataImpl.cpp
+++ b/compiler/locomotiv/src/NodeDataImpl.cpp
@@ -16,8 +16,7 @@
 
 #include "NodeDataImpl.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace
@@ -59,7 +58,7 @@ template <> NodeDataImpl::NodeDataImpl(const Buffer<float> &buf)
 
 void annot_data(loco::Node *node, std::unique_ptr<NodeData> &&data)
 {
-  node->annot(stdex::make_unique<NodeDataAnnotation>(std::move(data)));
+  node->annot(std::make_unique<NodeDataAnnotation>(std::move(data)));
 }
 
 const NodeData *annot_data(const loco::Node *node)
diff --git a/compiler/locomotiv/src/NodeExecution.h b/compiler/locomotiv/src/NodeExecution.h
index 363188d38..eb0608d2b 100644
--- a/compiler/locomotiv/src/NodeExecution.h
+++ b/compiler/locomotiv/src/NodeExecution.h
@@ -62,7 +62,7 @@ private:
     return dynamic_cast<Derived *>(node);
   }
 
-// clang-format off
+  // clang-format off
   /**
    * @brief Calculate for one specified node and update its result as NodeData.
    *        Abort program when its ingredients are not ready or not supported.
diff --git a/compiler/locomotiv/src/UserData.cpp b/compiler/locomotiv/src/UserData.cpp
index b658ada9b..98f761efd 100644
--- a/compiler/locomotiv/src/UserData.cpp
+++ b/compiler/locomotiv/src/UserData.cpp
@@ -16,8 +16,7 @@
 
 #include "UserData.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace
@@ -55,7 +54,7 @@ const NodeData *user_data(const loco::Node *node)
 
 void user_data(loco::Node *node, std::unique_ptr<NodeData> &&data)
 {
-  node->annot(stdex::make_unique<UserDataAnnotation>(std::move(data)));
+  node->annot(std::make_unique<UserDataAnnotation>(std::move(data)));
 }
 
 void erase_user_data(loco::Node *node) { node->annot<UserDataAnnotation>(nullptr); }
diff --git a/compiler/locop/CMakeLists.txt b/compiler/locop/CMakeLists.txt
index 107ee8be8..43ec41af4 100644
--- a/compiler/locop/CMakeLists.txt
+++ b/compiler/locop/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(locop STATIC ${SOURCES})
-set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
 target_include_directories(locop PUBLIC include)
 target_link_libraries(locop PUBLIC loco)
 # Let's apply nncc common compile options
@@ -13,7 +15,6 @@ target_link_libraries(locop PUBLIC loco)
 target_link_libraries(locop PRIVATE nncc_common)
 target_link_libraries(locop PUBLIC nncc_coverage)
 target_link_libraries(locop PRIVATE pp)
-target_link_libraries(locop PRIVATE stdex)
 
 if(NOT ENABLE_TEST)
   return()
@@ -23,5 +24,4 @@ endif(NOT ENABLE_TEST)
 nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(locop_test ${TESTS})
-target_link_libraries(locop_test stdex)
 target_link_libraries(locop_test locop)
diff --git a/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp b/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp
index 61d9e8ae7..75dd39f36 100644
--- a/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp
+++ b/compiler/locop/src/CanonicalNodeSummaryBuilder.cpp
@@ -25,8 +25,6 @@
 
 #include <pp/Format.h>
 
-#include <stdex/Memory.h>
-
 #include <map>
 #include <set>
 
diff --git a/compiler/locop/src/ExampleGraph.h b/compiler/locop/src/ExampleGraph.h
index 76813bcd8..84010f75b 100644
--- a/compiler/locop/src/ExampleGraph.h
+++ b/compiler/locop/src/ExampleGraph.h
@@ -19,7 +19,7 @@
 
 #include <loco.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace
 {
@@ -55,7 +55,7 @@ template <> std::unique_ptr<Bundle<PullPush>> make_bundle(void)
 
   push->from(pull);
 
-  auto res = stdex::make_unique<Bundle<PullPush>>();
+  auto res = std::make_unique<Bundle<PullPush>>();
 
   res->g = std::move(g);
   res->pull = pull;
diff --git a/compiler/locop/src/FormattedGraph.cpp b/compiler/locop/src/FormattedGraph.cpp
index bf4175768..94bfbd2f8 100644
--- a/compiler/locop/src/FormattedGraph.cpp
+++ b/compiler/locop/src/FormattedGraph.cpp
@@ -23,8 +23,7 @@
 
 #include <pp/Format.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <map>
 #include <set>
 
@@ -300,7 +299,7 @@ void FormattedGraphImpl<Formatter::LinearV1>::dump(std::ostream &os) const
   else
   {
     // Use Built-in NodeSummaryBuilder otherwise
-    node_summary_builder = stdex::make_unique<GenericNodeSummaryBuilder>(&symbols);
+    node_summary_builder = std::make_unique<GenericNodeSummaryBuilder>(&symbols);
   }
 
   // Print Graph Input(s)
diff --git a/compiler/locop/src/FormattedGraph.test.cpp b/compiler/locop/src/FormattedGraph.test.cpp
index aff9ebe5f..9f11a4e5d 100644
--- a/compiler/locop/src/FormattedGraph.test.cpp
+++ b/compiler/locop/src/FormattedGraph.test.cpp
@@ -17,7 +17,7 @@
 #include "locop/FormattedGraph.h"
 #include "ExampleGraph.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -42,7 +42,7 @@ TEST(LinearV1FormatterTest, user_defined_node_summary_builder)
   auto bundle = make_bundle<PullPush>();
   auto g = bundle->graph();
   {
-    bundle->push->annot(stdex::make_unique<MyAnnotation>());
+    bundle->push->annot(std::make_unique<MyAnnotation>());
   }
 
   struct MyBuilder final : public locop::NodeSummaryBuilder
@@ -63,11 +63,11 @@ TEST(LinearV1FormatterTest, user_defined_node_summary_builder)
   {
     std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *) const final
     {
-      return stdex::make_unique<MyBuilder>();
+      return std::make_unique<MyBuilder>();
     }
   };
 
-  std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MyFactory>()) << std::endl;
+  std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MyFactory>()) << std::endl;
 
   // TODO Check whether MyBuilder actually sees all the nodes in a graph
   SUCCEED();
@@ -134,11 +134,11 @@ TEST(LinearV1FormatterTest, node_summary_builder_composition)
   {
     std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tbl) const final
     {
-      return stdex::make_unique<CompositeBuilder>(tbl);
+      return std::make_unique<CompositeBuilder>(tbl);
     }
   };
 
-  std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MyFactory>()) << std::endl;
+  std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MyFactory>()) << std::endl;
 
   // TODO Check whether MyBuilder actually sees all the nodes in a graph
   SUCCEED();
diff --git a/compiler/locop/src/FormattedTensorShape.cpp b/compiler/locop/src/FormattedTensorShape.cpp
index b2b6ea074..bc6310313 100644
--- a/compiler/locop/src/FormattedTensorShape.cpp
+++ b/compiler/locop/src/FormattedTensorShape.cpp
@@ -25,7 +25,7 @@ std::ostream &operator<<(std::ostream &os, const loco::Dimension &d)
   return os;
 }
 
-} // namespace
+} // namespace loco
 
 namespace locop
 {
diff --git a/compiler/locop/src/FormattedTensorShape.test.cpp b/compiler/locop/src/FormattedTensorShape.test.cpp
index fc85df3a6..626b6cc23 100644
--- a/compiler/locop/src/FormattedTensorShape.test.cpp
+++ b/compiler/locop/src/FormattedTensorShape.test.cpp
@@ -16,7 +16,7 @@
 
 #include "locop/FormattedTensorShape.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -24,12 +24,26 @@ using namespace locop;
 
 TEST(FormattedTensorShapeTest, BracketFormat)
 {
-  auto tensor_shape = stdex::make_unique<loco::TensorShape>();
+  auto tensor_shape = std::make_unique<loco::TensorShape>();
 
   tensor_shape->rank(2);
   tensor_shape->dim(0) = 4;
+  tensor_shape->dim(1) = 8;
 
   std::cout << fmt<TensorShapeFormat::Bracket>(tensor_shape.get()) << std::endl;
 
   SUCCEED();
 }
+
+TEST(FormattedTensorShapeTest, PlainFormat)
+{
+  auto tensor_shape = std::make_unique<loco::TensorShape>();
+
+  tensor_shape->rank(2);
+  tensor_shape->dim(0) = 4;
+  tensor_shape->dim(1) = 8;
+
+  std::cout << fmt<TensorShapeFormat::Plain>(tensor_shape.get()) << std::endl;
+
+  SUCCEED();
+}
diff --git a/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp b/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp
index d688b5490..cfa82c2a2 100644
--- a/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp
+++ b/compiler/locop/src/GenericNodeSummaryBuilder.test.cpp
@@ -17,8 +17,7 @@
 #include "locop/GenericNodeSummaryBuilder.h"
 #include "locop/FormattedGraph.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <stdexcept>
 
 #include <gtest/gtest.h>
@@ -44,7 +43,7 @@ TEST(GenericNodeSummaryBuilderTest, simple)
   {
     std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tbl) const final
     {
-      return stdex::make_unique<locop::GenericNodeSummaryBuilder>(tbl);
+      return std::make_unique<locop::GenericNodeSummaryBuilder>(tbl);
     }
   };
 
@@ -52,7 +51,7 @@ TEST(GenericNodeSummaryBuilderTest, simple)
 
   g->nodes()->create<MockNode>();
 
-  std::cout << locop::fmt<locop::LinearV1>(g).with(stdex::make_unique<MockFactory>()) << std::endl;
+  std::cout << locop::fmt<locop::LinearV1>(g).with(std::make_unique<MockFactory>()) << std::endl;
 
   SUCCEED();
 }
diff --git a/compiler/locop/src/NodeSummary.cpp b/compiler/locop/src/NodeSummary.cpp
index 3f8856997..20250a90f 100644
--- a/compiler/locop/src/NodeSummary.cpp
+++ b/compiler/locop/src/NodeSummary.cpp
@@ -16,8 +16,7 @@
 
 #include "locop/NodeSummary.h"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace locop
@@ -36,6 +35,6 @@ const std::string &NodeDesc::opname(void) const
   return *_name;
 }
 
-void NodeDesc::opname(const std::string &v) { _name = stdex::make_unique<std::string>(v); }
+void NodeDesc::opname(const std::string &v) { _name = std::make_unique<std::string>(v); }
 
-} // namespace loco
+} // namespace locop
diff --git a/compiler/logo-core/CMakeLists.txt b/compiler/logo-core/CMakeLists.txt
index 3bc71dbd0..374794f90 100644
--- a/compiler/logo-core/CMakeLists.txt
+++ b/compiler/logo-core/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(logo_core STATIC ${SOURCES})
-set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
 target_include_directories(logo_core PRIVATE src)
 target_include_directories(logo_core PUBLIC include)
 target_link_libraries(logo_core PUBLIC loco)
diff --git a/compiler/logo-core/src/Phase.test.cpp b/compiler/logo-core/src/Phase.test.cpp
new file mode 100644
index 000000000..2ee09101b
--- /dev/null
+++ b/compiler/logo-core/src/Phase.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+struct Bumblebee final : public logo::Pass
+{
+  const char *name(void) const final { return "Bee"; }
+  bool run(loco::Graph *) final { return false; }
+};
+
+} // namespace
+
+TEST(LogoPhaseSaturateTests, simple)
+{
+  loco::Graph g;
+  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{&g};
+  logo::Phase phase;
+
+  phase.emplace_back(std::make_unique<Bumblebee>());
+  phase_runner.run(phase);
+
+  SUCCEED();
+}
+
+TEST(LogoPhaseRestartTests, simple)
+{
+  loco::Graph g;
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{&g};
+  logo::Phase phase;
+
+  phase.emplace_back(std::make_unique<Bumblebee>());
+  phase_runner.run(phase);
+
+  SUCCEED();
+}
diff --git a/compiler/logo-ex/CMakeLists.txt b/compiler/logo-ex/CMakeLists.txt
new file mode 100644
index 000000000..31d76025e
--- /dev/null
+++ b/compiler/logo-ex/CMakeLists.txt
@@ -0,0 +1,23 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(logo_ex STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(logo_ex PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(logo_ex PRIVATE src)
+target_include_directories(logo_ex PUBLIC include)
+target_link_libraries(logo_ex PUBLIC loco)
+target_link_libraries(logo_ex PUBLIC logo_core)
+target_link_libraries(logo_ex PRIVATE locomotiv)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(logo_ex_test ${TESTS})
+target_include_directories(logo_ex_test PRIVATE src)
+target_link_libraries(logo_ex_test logo_ex)
diff --git a/compiler/logo-ex/README.md b/compiler/logo-ex/README.md
new file mode 100644
index 000000000..8ea55a202
--- /dev/null
+++ b/compiler/logo-ex/README.md
@@ -0,0 +1,6 @@
+# logo-ex
+
+_logo-ex_ provides _loco_ Extended Graph Passes for Transformation and Optimization
+that gets help from _locomotiv_
+
+NOTE: f2e7c38dcc601cb290c380d8314a3ae627923f58 is where this came from
diff --git a/compiler/logo-ex/include/logo/ConstantFoldingPass.h b/compiler/logo-ex/include/logo/ConstantFoldingPass.h
new file mode 100644
index 000000000..9143ae49b
--- /dev/null
+++ b/compiler/logo-ex/include/logo/ConstantFoldingPass.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_EX_CONSTANT_FOLDING_PASS_H__
+#define __LOGO_EX_CONSTANT_FOLDING_PASS_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace logo
+{
+
+/**
+ * @brief  Performs constant folding optimization
+ */
+class ConstantFoldingPass : public Pass
+{
+public:
+  const char *name(void) const final { return "ConstantFoldingPass"; }
+
+public:
+  bool run(loco::Graph *graph) override;
+};
+
+} // namespace logo
+
+#endif // __LOGO_EX_CONSTANT_FOLDING_PASS_H__
diff --git a/compiler/logo-ex/include/logo/PassesEx.h b/compiler/logo-ex/include/logo/PassesEx.h
new file mode 100644
index 000000000..8bdf93bd9
--- /dev/null
+++ b/compiler/logo-ex/include/logo/PassesEx.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_PASSES_EX_H__
+#define __LOGO_PASSES_EX_H__
+
+// Please keep this in alphabetical order
+
+#include <logo/ConstantFoldingPass.h>
+
+#endif // __LOGO_PASSES_EX_H__
diff --git a/compiler/logo-ex/requires.cmake b/compiler/logo-ex/requires.cmake
new file mode 100644
index 000000000..c76183353
--- /dev/null
+++ b/compiler/logo-ex/requires.cmake
@@ -0,0 +1,3 @@
+require("loco")
+require("logo-core")
+require("locomotiv")
diff --git a/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp b/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp
new file mode 100644
index 000000000..97d75458b
--- /dev/null
+++ b/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ConstantFoldingPass.h>
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+
+#include <locomotiv/Session.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+uint64_t num_elements(const loco::NodeMixin<loco::NodeTrait::TensorShape> &shape)
+{
+  if (shape.rank() == 0)
+  {
+    return 0;
+  }
+
+  uint64_t res = 1;
+
+  for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+  {
+    assert(shape.dim(axis).known());
+    res *= shape.dim(axis).value();
+  }
+
+  return res;
+}
+
+/// @brief For some op, constant folding should not be performed. This returns true if node is such
+/// op.
+bool skip(const loco::Node *node)
+{
+  static std::set<uint32_t> skip_op = {
+    // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
+    //      `Feature`, `Filter`, `Bias`, etc.
+    static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
+
+    // We don't perform constant folding for Push
+    static_cast<uint32_t>(loco::CanonicalOpcode::Push),
+
+    // TensorBroadcast is a good hint for optimization
+    // TODO Let this option be controlled by driver using logo
+    static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
+  };
+
+  if (node->dialect() == loco::CanonicalDialect::get())
+  {
+    if (skip_op.find(node->opnum()) != skip_op.end())
+      return true;
+  }
+
+  return false;
+}
+
+/// @brief Checks if a node is a target of constant folding transform
+bool foldable(const loco::Node *node)
+{
+  if (node->dialect() == loco::CanonicalDialect::get())
+  {
+    if (skip(node))
+      return false;
+
+    if (node->arity() == 0) // e.g., when a node is e.g, ConstGen or Pull
+      return false;
+
+    // When all args are ConstGen, let's do Constant Folding Transforms
+    for (int i = 0; i < node->arity(); i++)
+    {
+      if (node->arg(i)->opnum() != static_cast<uint32_t>(loco::CanonicalOpcode::ConstGen))
+        return false;
+    }
+
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
+
+void fold(loco::Graph *graph, loco::Node *node)
+{
+  assert(foldable(node)); // sanity check to find a mistake when this function is reused later
+
+  // calcluate foldable node
+  locomotiv::Session sess(graph, std::vector<loco::Node *>{node});
+  sess.infer();
+  auto data = sess.get_output(0);
+
+  assert(data != nullptr);
+
+  auto shape = data->shape();
+  auto dtype = data->dtype();
+
+  // build ConstGen
+  auto new_const = graph->nodes()->create<loco::ConstGen>();
+  {
+    new_const->dtype(dtype);
+
+    new_const->rank(shape->rank());
+    for (int d = 0; d < shape->rank(); d++)
+      new_const->dim(d) = shape->dim(d);
+
+    auto count = num_elements(*new_const);
+
+    if (dtype == loco::DataType::FLOAT32)
+    {
+      new_const->size<loco::DataType::FLOAT32>(count);
+
+      auto const_buf = data->as_f32_bufptr()->base();
+      for (int x = 0; x < count; x++)
+        new_const->at<loco::DataType::FLOAT32>(x) = const_buf[x];
+    }
+    else if (dtype == loco::DataType::S32)
+    {
+      new_const->size<loco::DataType::S32>(count);
+
+      auto const_buf = data->as_s32_bufptr()->base();
+      for (int x = 0; x < count; x++)
+        new_const->at<loco::DataType::S32>(x) = const_buf[x];
+    }
+  }
+
+  // replace node with new_const
+  loco::replace(node).with(new_const);
+}
+
+} // namespace
+
+namespace logo
+{
+
+bool ConstantFoldingPass::run(loco::Graph *graph)
+{
+  auto outputs = loco::output_nodes(graph);
+
+  bool changed = false;
+  for (auto node : loco::postorder_traversal(outputs))
+  {
+    if (foldable(node))
+    {
+      fold(graph, node);
+      changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace logo
diff --git a/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp b/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp
new file mode 100644
index 000000000..ba571a7f6
--- /dev/null
+++ b/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ConstantFoldingPass.h>
+
+#include "TestHelper.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+using namespace logo::test;
+
+TEST(ConstantFoldingTest, name)
+{
+  logo::ConstantFoldingPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ConstantFoldingTest, run_NEG)
+{
+  loco::Graph g;
+  logo::ConstantFoldingPass pass;
+
+  ASSERT_FALSE(pass.run(&g));
+}
+
+namespace
+{
+
+/*
+  test case:
+      ConstGen ---- Relu ---- Push
+   (-3.14, 3.14)      (0, 3.14)
+
+  after constant folding:
+                 ConstGen ------Push
+                      (0, 3.14)
+*/
+void create_net_const_relu(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto const_node = graph->nodes()->create<loco::ConstGen>();
+  {
+    const_node->dtype(loco::DataType::FLOAT32);
+    const_node->rank(1);
+    const_node->dim(0) = 2;
+    const_node->size<loco::DataType::FLOAT32>(2);
+    const_node->at<loco::DataType::FLOAT32>(0) = -3.14f;
+    const_node->at<loco::DataType::FLOAT32>(1) = 3.14f;
+  }
+
+  auto relu_node = graph->nodes()->create<loco::ReLU>();
+  {
+    relu_node->input(const_node);
+  }
+
+  auto push_node = graph->nodes()->create<loco::Push>();
+  {
+    push_node->from(relu_node);
+  }
+
+  auto graph_output = graph->outputs()->create();
+  {
+    graph_output->name("output");
+    graph_output->dtype(loco::DataType::FLOAT32);
+    loco::link(graph_output, push_node);
+  }
+}
+
+} // namespace
+
+TEST(ConstantFolding, const_relu_to_const)
+{
+  auto graph = loco::make_graph();
+  create_net_const_relu(graph.get());
+
+  logo::ConstantFoldingPass pass;
+  while (pass.run(graph.get()) == true)
+  {
+    ;
+  }
+
+  auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
+  auto const_gen = loco::must_cast<loco::ConstGen *>(push->from());
+  ASSERT_NE(const_gen, nullptr);
+
+  ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 2);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0); // result of relu(-3.14)
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 3.14f);
+}
+
+namespace
+{
+
+/*
+  test case:
+        ConstGen ---- Relu ---+
+        (-1, 1)        (0, 1) |
+                  ConstGen ---+-- ConcatV2 ----- Push
+                  (2, 3)      |       (0, 1, 2, 3)
+                   axis(0) ---+
+
+  after constant folding:
+                                  ConstGen ----- Push
+                                  (0, 1, 2, 3)
+*/
+void create_net_const_relu_concat(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto const_1_node = graph->nodes()->create<loco::ConstGen>();
+  {
+    const_1_node->dtype(loco::DataType::FLOAT32);
+    const_1_node->rank(1);
+    const_1_node->dim(0) = 2;
+    const_1_node->size<loco::DataType::FLOAT32>(2);
+    const_1_node->at<loco::DataType::FLOAT32>(0) = -1.0f;
+    const_1_node->at<loco::DataType::FLOAT32>(1) = 1.0f;
+  }
+
+  auto relu_node = graph->nodes()->create<loco::ReLU>();
+  {
+    relu_node->input(const_1_node);
+  }
+
+  auto const_2_node = graph->nodes()->create<loco::ConstGen>();
+  {
+    const_2_node->dtype(loco::DataType::FLOAT32);
+    const_2_node->rank(1);
+    const_2_node->dim(0) = 2;
+    const_2_node->size<loco::DataType::FLOAT32>(2);
+    const_2_node->at<loco::DataType::FLOAT32>(0) = 2.0f;
+    const_2_node->at<loco::DataType::FLOAT32>(1) = 3.0f;
+  }
+
+  auto concat_node = graph->nodes()->create<loco::TensorConcat>();
+  {
+    concat_node->lhs(relu_node);
+    concat_node->rhs(const_2_node);
+    concat_node->axis(0);
+  }
+
+  auto push_node = graph->nodes()->create<loco::Push>();
+  {
+    push_node->from(concat_node);
+  }
+
+  auto graph_output = graph->outputs()->create();
+  {
+    graph_output->name("output");
+    graph_output->dtype(loco::DataType::FLOAT32);
+    loco::link(graph_output, push_node);
+  }
+}
+
+} // namespace
+
+TEST(ConstantFolding, const_relu_to_concat)
+{
+  auto graph = loco::make_graph();
+  create_net_const_relu_concat(graph.get());
+
+  logo::ConstantFoldingPass pass;
+  while (pass.run(graph.get()) == true)
+  {
+    ;
+  }
+
+  auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
+  auto const_gen = loco::must_cast<loco::ConstGen *>(push->from());
+  ASSERT_NE(const_gen, nullptr);
+
+  ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 4);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 1);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(2), 2);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(3), 3);
+}
diff --git a/compiler/logo-ex/src/TestHelper.h b/compiler/logo-ex/src/TestHelper.h
new file mode 100644
index 000000000..07e3b20aa
--- /dev/null
+++ b/compiler/logo-ex/src/TestHelper.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include <loco.h>
+
+namespace logo
+{
+namespace test
+{
+
+template <typename T> T *find_first_node_by_type(loco::Graph *g)
+{
+  T *first_node = nullptr;
+
+  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+  {
+    first_node = dynamic_cast<T *>(node);
+    if (first_node != nullptr)
+      break;
+  }
+
+  return first_node;
+}
+
+} // namespace test
+} // namespace logo
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/logo/CMakeLists.txt b/compiler/logo/CMakeLists.txt
index 399cb7586..e6a6f907f 100644
--- a/compiler/logo/CMakeLists.txt
+++ b/compiler/logo/CMakeLists.txt
@@ -3,13 +3,13 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(logo STATIC ${SOURCES})
-set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
 target_include_directories(logo PRIVATE src)
 target_include_directories(logo PUBLIC include)
 target_link_libraries(logo PUBLIC loco)
 target_link_libraries(logo PUBLIC logo_core)
-target_link_libraries(logo PRIVATE locomotiv)
-target_link_libraries(logo PRIVATE stdex)
 
 if(NOT ENABLE_TEST)
   return()
@@ -20,4 +20,3 @@ nnas_find_package(GTest REQUIRED)
 GTest_AddTest(logo_test ${TESTS})
 target_include_directories(logo_test PRIVATE src)
 target_link_libraries(logo_test logo)
-target_link_libraries(logo_test stdex)
diff --git a/compiler/logo/include/logo/ConstantFoldingPass.h b/compiler/logo/include/logo/ConstantFoldingPass.h
deleted file mode 100644
index 99ccdc315..000000000
--- a/compiler/logo/include/logo/ConstantFoldingPass.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LOGO_CONSTANT_FOLDING_PASS_H__
-#define __LOGO_CONSTANT_FOLDING_PASS_H__
-
-#include <logo/Pass.h>
-
-#include <loco.h>
-
-namespace logo
-{
-
-/**
- * @brief  Performs constant folding optimization
- */
-class ConstantFoldingPass : public Pass
-{
-public:
-  const char *name(void) const final { return "ConstantFoldingPass"; }
-
-public:
-  bool run(loco::Graph *graph) override;
-};
-
-} // namespace logo
-
-#endif // __LOGO_CONSTANT_FOLDING_PASS_H__
diff --git a/compiler/logo/include/logo/Passes.h b/compiler/logo/include/logo/Passes.h
index 636251e45..06fd3212b 100644
--- a/compiler/logo/include/logo/Passes.h
+++ b/compiler/logo/include/logo/Passes.h
@@ -19,7 +19,6 @@
 
 // Please keep this in alphabetical order
 
-#include <logo/ConstantFoldingPass.h>
 #include <logo/RemoveDeadNodePass.h>
 #include <logo/RemoveForwardNodePass.h>
 #include <logo/ReorderDecodePass.h>
diff --git a/compiler/logo/requires.cmake b/compiler/logo/requires.cmake
index 9a7d14788..3e4d227cd 100644
--- a/compiler/logo/requires.cmake
+++ b/compiler/logo/requires.cmake
@@ -1,4 +1,2 @@
 require("loco")
 require("logo-core")
-require("locomotiv")
-require("stdex")
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.cpp b/compiler/logo/src/Passes/ConstantFoldingPass.cpp
deleted file mode 100644
index e038e7140..000000000
--- a/compiler/logo/src/Passes/ConstantFoldingPass.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <logo/ConstantFoldingPass.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-
-#include <stdex/Memory.h>
-
-#include <locomotiv/Session.h>
-
-#include <cassert>
-#include <stdexcept>
-
-namespace
-{
-
-uint64_t num_elements(const loco::NodeMixin<loco::NodeTrait::TensorShape> &shape)
-{
-  if (shape.rank() == 0)
-  {
-    return 0;
-  }
-
-  uint64_t res = 1;
-
-  for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-  {
-    assert(shape.dim(axis).known());
-    res *= shape.dim(axis).value();
-  }
-
-  return res;
-}
-
-/// @brief For some op, constant folding should not be performed. This returns true if node is such
-/// op.
-bool skip(const loco::Node *node)
-{
-  static std::set<uint32_t> skip_op = {
-      // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
-      //      `Feature`, `Filter`, `Bias`, etc.
-      static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
-      static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
-      static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
-      static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
-
-      // We don't perform constant folding for Push
-      static_cast<uint32_t>(loco::CanonicalOpcode::Push),
-
-      // TensorBroadcast is a good hint for optimization
-      // TODO Let this option be controlled by driver using logo
-      static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
-  };
-
-  if (node->dialect() == loco::CanonicalDialect::get())
-  {
-    if (skip_op.find(node->opnum()) != skip_op.end())
-      return true;
-  }
-
-  return false;
-}
-
-/// @brief Checks if a node is a target of constant folding transform
-bool foldable(const loco::Node *node)
-{
-  if (node->dialect() == loco::CanonicalDialect::get())
-  {
-    if (skip(node))
-      return false;
-
-    if (node->arity() == 0) // e.g., when a node is e.g, ConstGen or Pull
-      return false;
-
-    // When all args are ConstGen, let's do Constant Folding Transforms
-    for (int i = 0; i < node->arity(); i++)
-    {
-      if (node->arg(i)->opnum() != static_cast<uint32_t>(loco::CanonicalOpcode::ConstGen))
-        return false;
-    }
-
-    return true;
-  }
-  else
-  {
-    return false;
-  }
-}
-
-void fold(loco::Graph *graph, loco::Node *node)
-{
-  assert(foldable(node)); // sanity check to find a mistake when this function is reused later
-
-  // calcluate foldable node
-  locomotiv::Session sess(graph, std::vector<loco::Node *>{node});
-  sess.infer();
-  auto data = sess.get_output(0);
-
-  assert(data != nullptr);
-
-  auto shape = data->shape();
-  auto dtype = data->dtype();
-
-  // build ConstGen
-  auto new_const = graph->nodes()->create<loco::ConstGen>();
-  {
-    new_const->dtype(dtype);
-
-    new_const->rank(shape->rank());
-    for (int d = 0; d < shape->rank(); d++)
-      new_const->dim(d) = shape->dim(d);
-
-    auto count = num_elements(*new_const);
-
-    if (dtype == loco::DataType::FLOAT32)
-    {
-      new_const->size<loco::DataType::FLOAT32>(count);
-
-      auto const_buf = data->as_f32_bufptr()->base();
-      for (int x = 0; x < count; x++)
-        new_const->at<loco::DataType::FLOAT32>(x) = const_buf[x];
-    }
-    else if (dtype == loco::DataType::S32)
-    {
-      new_const->size<loco::DataType::S32>(count);
-
-      auto const_buf = data->as_s32_bufptr()->base();
-      for (int x = 0; x < count; x++)
-        new_const->at<loco::DataType::S32>(x) = const_buf[x];
-    }
-  }
-
-  // replace node with new_const
-  loco::replace(node).with(new_const);
-}
-
-} // namespace
-
-namespace logo
-{
-
-bool ConstantFoldingPass::run(loco::Graph *graph)
-{
-  auto outputs = loco::output_nodes(graph);
-
-  bool changed = false;
-  for (auto node : loco::postorder_traversal(outputs))
-  {
-    if (foldable(node))
-    {
-      fold(graph, node);
-      changed = true;
-    }
-  }
-
-  return changed;
-}
-
-} // namespace logo
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp b/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp
deleted file mode 100644
index b9c4942c4..000000000
--- a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <logo/ConstantFoldingPass.h>
-
-#include "TestHelper.h"
-
-#include <loco.h>
-
-#include <gtest/gtest.h>
-
-using namespace logo::test;
-
-namespace
-{
-
-/*
-  test case:
-      ConstGen ---- Relu ---- Push
-   (-3.14, 3.14)      (0, 3.14)
-
-  after constant folding:
-                 ConstGen ------Push
-                      (0, 3.14)
-*/
-void create_net_const_relu(loco::Graph *graph)
-{
-  assert(graph);
-
-  auto const_node = graph->nodes()->create<loco::ConstGen>();
-  {
-    const_node->dtype(loco::DataType::FLOAT32);
-    const_node->rank(1);
-    const_node->dim(0) = 2;
-    const_node->size<loco::DataType::FLOAT32>(2);
-    const_node->at<loco::DataType::FLOAT32>(0) = -3.14f;
-    const_node->at<loco::DataType::FLOAT32>(1) = 3.14f;
-  }
-
-  auto relu_node = graph->nodes()->create<loco::ReLU>();
-  {
-    relu_node->input(const_node);
-  }
-
-  auto push_node = graph->nodes()->create<loco::Push>();
-  {
-    push_node->from(relu_node);
-  }
-
-  auto graph_output = graph->outputs()->create();
-  {
-    graph_output->name("output");
-    graph_output->dtype(loco::DataType::FLOAT32);
-    loco::link(graph_output, push_node);
-  }
-}
-
-} // namespace
-
-TEST(ConstantFolding, const_relu_to_const)
-{
-  auto graph = loco::make_graph();
-  create_net_const_relu(graph.get());
-
-  logo::ConstantFoldingPass pass;
-  while (pass.run(graph.get()) == true)
-  {
-    ;
-  }
-
-  auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
-  auto const_gen = loco::must_cast<loco::ConstGen *>(push->from());
-  ASSERT_NE(const_gen, nullptr);
-
-  ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 2);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0); // result of relu(-3.14)
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 3.14f);
-}
-
-namespace
-{
-
-/*
-  test case:
-        ConstGen ---- Relu ---+
-        (-1, 1)        (0, 1) |
-                  ConstGen ---+-- ConcatV2 ----- Push
-                  (2, 3)      |       (0, 1, 2, 3)
-                   axis(0) ---+
-
-  after constant folding:
-                                  ConstGen ----- Push
-                                  (0, 1, 2, 3)
-*/
-void create_net_const_relu_concat(loco::Graph *graph)
-{
-  assert(graph);
-
-  auto const_1_node = graph->nodes()->create<loco::ConstGen>();
-  {
-    const_1_node->dtype(loco::DataType::FLOAT32);
-    const_1_node->rank(1);
-    const_1_node->dim(0) = 2;
-    const_1_node->size<loco::DataType::FLOAT32>(2);
-    const_1_node->at<loco::DataType::FLOAT32>(0) = -1.0f;
-    const_1_node->at<loco::DataType::FLOAT32>(1) = 1.0f;
-  }
-
-  auto relu_node = graph->nodes()->create<loco::ReLU>();
-  {
-    relu_node->input(const_1_node);
-  }
-
-  auto const_2_node = graph->nodes()->create<loco::ConstGen>();
-  {
-    const_2_node->dtype(loco::DataType::FLOAT32);
-    const_2_node->rank(1);
-    const_2_node->dim(0) = 2;
-    const_2_node->size<loco::DataType::FLOAT32>(2);
-    const_2_node->at<loco::DataType::FLOAT32>(0) = 2.0f;
-    const_2_node->at<loco::DataType::FLOAT32>(1) = 3.0f;
-  }
-
-  auto concat_node = graph->nodes()->create<loco::TensorConcat>();
-  {
-    concat_node->lhs(relu_node);
-    concat_node->rhs(const_2_node);
-    concat_node->axis(0);
-  }
-
-  auto push_node = graph->nodes()->create<loco::Push>();
-  {
-    push_node->from(concat_node);
-  }
-
-  auto graph_output = graph->outputs()->create();
-  {
-    graph_output->name("output");
-    graph_output->dtype(loco::DataType::FLOAT32);
-    loco::link(graph_output, push_node);
-  }
-}
-
-} // namespace
-
-TEST(ConstantFolding, const_relu_to_concat)
-{
-  auto graph = loco::make_graph();
-  create_net_const_relu_concat(graph.get());
-
-  logo::ConstantFoldingPass pass;
-  while (pass.run(graph.get()) == true)
-  {
-    ;
-  }
-
-  auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
-  auto const_gen = loco::must_cast<loco::ConstGen *>(push->from());
-  ASSERT_NE(const_gen, nullptr);
-
-  ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 4);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 1);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(2), 2);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(3), 3);
-}
diff --git a/compiler/logo/src/Passes/EmptyTestGraph.h b/compiler/logo/src/Passes/EmptyTestGraph.h
new file mode 100644
index 000000000..67f2c8a11
--- /dev/null
+++ b/compiler/logo/src/Passes/EmptyTestGraph.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_EMPTY_TEST_GRAPH_H__
+#define __LOGO_EMPTY_TEST_GRAPH_H__
+
+#include <loco.h>
+
+namespace logo
+{
+
+void create_empty_test_net(loco::Graph *graph);
+
+} // namespace logo
+
+#endif // __LOGO_EMPTY_TEST_GRAPH_H__
diff --git a/compiler/logo/src/Passes/EmptyTestGraph.test.cpp b/compiler/logo/src/Passes/EmptyTestGraph.test.cpp
new file mode 100644
index 000000000..46750b79c
--- /dev/null
+++ b/compiler/logo/src/Passes/EmptyTestGraph.test.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+namespace logo
+{
+
+void create_empty_test_net(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto const_node = graph->nodes()->create<loco::ConstGen>();
+  {
+    const_node->dtype(loco::DataType::FLOAT32);
+    const_node->rank(1);
+    const_node->dim(0) = 1;
+    const_node->size<loco::DataType::FLOAT32>(1);
+    const_node->at<loco::DataType::FLOAT32>(0) = 1.0f;
+  }
+
+  auto push_node = graph->nodes()->create<loco::Push>();
+  {
+    push_node->from(const_node);
+  }
+
+  auto graph_output = graph->outputs()->create();
+  {
+    graph_output->name("output");
+    graph_output->dtype(loco::DataType::FLOAT32);
+    loco::link(graph_output, push_node);
+  }
+}
+
+} // namespace logo
diff --git a/compiler/logo/src/Passes/RemoveDeadNodePass.test.cpp b/compiler/logo/src/Passes/RemoveDeadNodePass.test.cpp
new file mode 100644
index 000000000..c0ecbdaa9
--- /dev/null
+++ b/compiler/logo/src/Passes/RemoveDeadNodePass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveDeadNodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveDeadNodePassTest, name)
+{
+  logo::RemoveDeadNodePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDeadNodePassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::RemoveDeadNodePass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/RemoveDeadNodeWithQueryPass.test.cpp b/compiler/logo/src/Passes/RemoveDeadNodeWithQueryPass.test.cpp
new file mode 100644
index 000000000..f14bfc30d
--- /dev/null
+++ b/compiler/logo/src/Passes/RemoveDeadNodeWithQueryPass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveDeadNodeWithQueryPass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveDeadNodeWithQueryPassTest, name)
+{
+  logo::RemoveDeadNodeWithQueryPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDeadNodeWithQueryPassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::RemoveDeadNodeWithQueryPass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/RemoveForwardNodePass.test.cpp b/compiler/logo/src/Passes/RemoveForwardNodePass.test.cpp
new file mode 100644
index 000000000..bb905aec5
--- /dev/null
+++ b/compiler/logo/src/Passes/RemoveForwardNodePass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/RemoveForwardNodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(RemoveForwardNodePassTest, name)
+{
+  logo::RemoveForwardNodePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveForwardNodePassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::RemoveForwardNodePass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/ReorderDecodePass.test.cpp b/compiler/logo/src/Passes/ReorderDecodePass.test.cpp
new file mode 100644
index 000000000..f8e158d3a
--- /dev/null
+++ b/compiler/logo/src/Passes/ReorderDecodePass.test.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ReorderDecodePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ReorderDecodePassTest, TensorBiasAdd_name)
+{
+  logo::ReorderDecodePass<loco::TensorBiasAdd> pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ReorderDecodePassTest, ReLU_name)
+{
+  logo::ReorderDecodePass<loco::ReLU> pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ReorderDecodePassTest, TensorBiasAdd_run_NEG)
+{
+  loco::Graph g;
+  logo::ReorderDecodePass<loco::TensorBiasAdd> pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
+
+TEST(ReorderDecodePassTest, ReLU_run_NEG)
+{
+  loco::Graph g;
+  logo::ReorderDecodePass<loco::ReLU> pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/ResolveDuplicateReshapePass.test.cpp b/compiler/logo/src/Passes/ResolveDuplicateReshapePass.test.cpp
new file mode 100644
index 000000000..de2df6fd5
--- /dev/null
+++ b/compiler/logo/src/Passes/ResolveDuplicateReshapePass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ResolveDuplicateReshapePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveDuplicateReshapePassTest, name)
+{
+  logo::ResolveDuplicateReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ResolveDuplicateReshapePassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::ResolveDuplicateReshapePass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/ResolveRedundantReshapePass.test.cpp b/compiler/logo/src/Passes/ResolveRedundantReshapePass.test.cpp
new file mode 100644
index 000000000..9a7e95846
--- /dev/null
+++ b/compiler/logo/src/Passes/ResolveRedundantReshapePass.test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ResolveRedundantReshapePass.h>
+
+#include "EmptyTestGraph.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveRedundantReshapePassTest, name)
+{
+  logo::ResolveRedundantReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ResolveRedundantReshapePassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::ResolveRedundantReshapePass pass;
+
+  logo::create_empty_test_net(&g);
+
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
index 0bda85b6f..40ddb133b 100644
--- a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
+++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
@@ -20,8 +20,7 @@
 #include <loco/IR/CanonicalDialect.h>
 #include <loco/IR/CanonicalNode.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <set>
 #include <vector>
 #include <cassert>
@@ -123,9 +122,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
     {
       using namespace loco;
 
-      auto encoder = encode_node->encoder();
-      assert(encoder != nullptr);
-
       auto decode_node = dynamic_cast<loco::FeatureDecode *>(encode_node->input());
       if (decode_node == nullptr)
       {
@@ -133,6 +129,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
       }
       assert(decode_node->input() != nullptr);
 
+      auto encoder = encode_node->encoder();
+      assert(encoder != nullptr);
+
       auto decoder = decode_node->decoder();
       assert(decoder != nullptr);
 
@@ -231,8 +230,8 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
           perm_vec[to] = from;
         }
 
-        transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
-            encode_node, decode_node, encode_node->input(), perm_vec));
+        transposeCandidates.insert(
+          std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
       }
     }
 
@@ -293,8 +292,8 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
           perm_vec[to] = from;
         }
 
-        transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
-            encode_node, decode_node, encode_node->input(), perm_vec));
+        transposeCandidates.insert(
+          std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
       }
     }
 
@@ -303,9 +302,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
     {
       using namespace loco;
 
-      auto encoder = encode_node->encoder();
-      assert(encoder != nullptr);
-
       auto decode_node = dynamic_cast<loco::MatrixDecode *>(encode_node->input());
       if (decode_node == nullptr)
       {
@@ -313,6 +309,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
       }
       assert(decode_node->input() != nullptr);
 
+      auto encoder = encode_node->encoder();
+      assert(encoder != nullptr);
+
       auto decoder = decode_node->decoder();
       assert(decoder != nullptr);
 
@@ -377,8 +376,8 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
           perm_vec[to] = from;
         }
 
-        transposeCandidates.insert(stdex::make_unique<TransposeCtx>(
-            encode_node, decode_node, encode_node->input(), perm_vec));
+        transposeCandidates.insert(
+          std::make_unique<TransposeCtx>(encode_node, decode_node, encode_node->input(), perm_vec));
       }
     }
 
@@ -397,7 +396,7 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
 
       TransposeCtx(loco::Node *first, loco::Node *last, loco::Node *input,
                    std::vector<loco::TensorAxis> perm)
-          : first_node(first), last_node(last), input_node(input), perm_vec(perm)
+        : first_node(first), last_node(last), input_node(input), perm_vec(perm)
       { /* empty */
       }
     };
diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp
index 9a05763b4..75a288089 100644
--- a/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp
+++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.test.cpp
@@ -19,10 +19,26 @@
 #include "TestHelper.h"
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 #include <gtest/gtest.h>
 
+TEST(SimplifyDomainConversionPassTest, name)
+{
+  logo::SimplifyDomainConversionPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(SimplifyDomainConversionPassTest, run_NEG)
+{
+  loco::Graph g;
+  logo::SimplifyDomainConversionPass pass;
+
+  ASSERT_FALSE(pass.run(&g));
+}
+
 namespace
 {
 
@@ -65,7 +81,7 @@ template <FilterLayout T> loco::FilterDecode *make_filter_decode(loco::Node *inp
 {
   loco::Graph *g = input_for_decode->graph();
 
-  auto decoder = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
+  auto decoder = std::make_unique<loco::PermutingDecoder<loco::Domain::Filter>>();
 
   decoder->perm(perm<T>());
 
@@ -80,7 +96,7 @@ template <FilterLayout T> loco::FilterEncode *make_filter_encode(loco::Node *inp
 {
   loco::Graph *g = input_for_encode->graph();
 
-  auto encoder = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+  auto encoder = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
 
   encoder->perm(perm<T>());
 
diff --git a/compiler/luci-compute/CMakeLists.txt b/compiler/luci-compute/CMakeLists.txt
new file mode 100644
index 000000000..33a8573f4
--- /dev/null
+++ b/compiler/luci-compute/CMakeLists.txt
@@ -0,0 +1,30 @@
+nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+nnas_find_package(TensorFlowRuySource EXACT 2.8.0 QUIET)
+nnas_find_package(NEON2SSESource QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+  message(STATUS "Build luci-compute: FAILED (missing TensorFlowSource 2.8.0)")
+  return()
+endif(NOT TensorFlowSource_FOUND)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+  message(STATUS "Build luci-compute: FAILED (missing TensorFlowGEMMLowpSource 2.8.0)")
+  return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+if(NOT TensorFlowRuySource_FOUND)
+  message(STATUS "Build luci-compute: FAILED (missing TensorFlowRuySource 2.8.0)")
+  return()
+endif(NOT TensorFlowRuySource_FOUND)
+
+if(NOT NEON2SSESource_FOUND)
+  message(STATUS "Build luci-compute: FAILED (missing NEON2SSESource)")
+  return()
+endif(NOT NEON2SSESource_FOUND)
+
+add_library(luci_compute INTERFACE)
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowRuySource_DIR}")
+target_include_directories(luci_compute SYSTEM INTERFACE "${NEON2SSESource_DIR}")
diff --git a/compiler/luci-compute/README.md b/compiler/luci-compute/README.md
new file mode 100644
index 000000000..caa4ab454
--- /dev/null
+++ b/compiler/luci-compute/README.md
@@ -0,0 +1,3 @@
+# luci-compute
+
+_luci-compute_ provides computation kernels for _luci_ and related modules.
diff --git a/compiler/luci-eval-driver/CMakeLists.txt b/compiler/luci-eval-driver/CMakeLists.txt
new file mode 100644
index 000000000..e8cd4f8ec
--- /dev/null
+++ b/compiler/luci-eval-driver/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(SRCS_EVAL_TESTER
+      src/EvalDriver.cpp
+   )
+
+add_executable(luci_eval_driver ${SRCS_EVAL_TESTER})
+target_link_libraries(luci_eval_driver PRIVATE luci_import)
+target_link_libraries(luci_eval_driver PRIVATE luci_interpreter)
+target_link_libraries(luci_eval_driver PRIVATE safemain)
+
+install(TARGETS luci_eval_driver DESTINATION bin)
diff --git a/compiler/luci-eval-driver/requires.cmake b/compiler/luci-eval-driver/requires.cmake
new file mode 100644
index 000000000..847eb87c6
--- /dev/null
+++ b/compiler/luci-eval-driver/requires.cmake
@@ -0,0 +1,3 @@
+require("luci")
+require("luci-interpreter")
+require("safemain")
diff --git a/compiler/luci-eval-driver/src/EvalDriver.cpp b/compiler/luci-eval-driver/src/EvalDriver.cpp
new file mode 100644
index 000000000..fb48f67e2
--- /dev/null
+++ b/compiler/luci-eval-driver/src/EvalDriver.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/ImporterEx.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+
+namespace
+{
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.read(data, data_size).fail())
+    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+template <typename NodeT> size_t getTensorSize(const NodeT *node)
+{
+  uint32_t tensor_size = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    tensor_size *= node->dim(i).value();
+  return tensor_size;
+}
+
+} // namespace
+
+/*
+ * @brief EvalDriver main
+ *
+ *        Driver for testing luci-inerpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+  if (argc != 5)
+  {
+    std::cerr
+      << "Usage: " << argv[0]
+      << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+    return EXIT_FAILURE;
+  }
+
+  const char *filename = argv[1];
+  const int32_t num_inputs = atoi(argv[2]);
+  const char *input_prefix = argv[3];
+  const char *output_file = argv[4];
+
+  // Load model from the file
+  luci::ImporterEx importer;
+  std::unique_ptr<luci::Module> module = importer.importVerifyModule(filename);
+  if (module == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // Create interpreter.
+  luci_interpreter::Interpreter interpreter(module.get());
+
+  // Set input.
+  // Data for n'th input is read from ${input_prefix}n
+  // (ex: Add.circle.input0, Add.circle.input1 ..)
+  const auto input_nodes = loco::input_nodes(module->graph());
+  if (num_inputs != input_nodes.size())
+  {
+    // NOTE using num_inputs is actually unnecessary but is kept to preserve interface.
+    std::cerr << "ERROR: invalid num_inputs value; should be " << input_nodes.size() << std::endl;
+    return EXIT_FAILURE;
+  }
+  for (int32_t i = 0; i < num_inputs; i++)
+  {
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+    std::vector<char> input_data(getTensorSize(input_node));
+    readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
+                     input_data.size());
+    interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+  }
+
+  // Do inference.
+  interpreter.interpret();
+
+  // Get output.
+  const auto output_nodes = loco::output_nodes(module->graph());
+  for (int i = 0; i < module->graph()->outputs()->size(); i++)
+  {
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+    std::vector<char> output_data(getTensorSize(output_node));
+    interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+    // Output data is written in ${output_file}
+    // (ex: Add.circle.output0)
+    // Output shape is written in ${output_file}.shape
+    // (ex: Add.circle.output0.shape)
+    writeDataToFile(std::string(output_file) + std::to_string(i), output_data.data(),
+                    output_data.size());
+    // In case of Tensor output is Scalar value.
+    // The output tensor with rank 0 is treated as a scalar with shape (1)
+    if (output_node->rank() == 0)
+    {
+      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", "1", 1);
+    }
+    else
+    {
+      auto shape_str = std::to_string(output_node->dim(0).value());
+      for (int j = 1; j < output_node->rank(); j++)
+      {
+        shape_str += ",";
+        shape_str += std::to_string(output_node->dim(j).value());
+      }
+      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", shape_str.c_str(),
+                      shape_str.size());
+    }
+  }
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/luci-interpreter/CMakeLists.txt b/compiler/luci-interpreter/CMakeLists.txt
index 33fdc52aa..1f7acee87 100644
--- a/compiler/luci-interpreter/CMakeLists.txt
+++ b/compiler/luci-interpreter/CMakeLists.txt
@@ -1,4 +1,15 @@
 set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
 set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+if (NOT LUCI_INTERPRETER_PAL_DIR)
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux")
+endif()
+
+set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst)
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+    set(LUCI_INTERPRETER_SUFFIX "")
+else()
+    set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
 
 add_subdirectory(src)
diff --git a/compiler/luci-interpreter/README.md b/compiler/luci-interpreter/README.md
new file mode 100644
index 000000000..77ec5c81c
--- /dev/null
+++ b/compiler/luci-interpreter/README.md
@@ -0,0 +1,158 @@
+# luci-interpreter
+
+`luci-interpreter` is an inference engine for neural networks represented in luci IR.
+See `compiler/luci/lang` directory for details about IR.
+You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`.
+
+`luci-interpreter` provides:
+- Basic inference functionality, input setters and output getters
+- Interface for inspecting hidden interpreter state, like activation values during inference
+- Customization mechanisms to fit the interpreter to specific platforms, like MCUs
+
+Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory
+
+## Basic usage
+
+Minimal usage includes:
+- Setting input data
+- Running inference
+- Fetching inference results
+
+Interpreter object is reusable and can run multiple inferences.
+Elements in tensors (input/output/internal) are stored contiguously and have C-like layout:
+This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1.
+
+Input and output tensors have the same indexes as in original luci model. 
+
+**Usage example:**
+``` c++
+// Note getTensorSize is a function that computes tensor size,
+// it is not part of interpreter and should be implemented by user 
+
+luci_interpreter::Interpreter interpreter(luci_module);
+
+// Set inputs
+// assuming model has only one input and one output
+const auto input_nodes = loco::input_nodes(module->graph());
+
+const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]);
+std::vector<char> input_data(getTensorSize(input_node));
+// Initialize input data here
+
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+// Start inference
+interpreter.interpret();
+
+// Fetch inference results
+const auto output_nodes = loco::output_nodes(module->graph());
+const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
+std::vector<char> output_data(getTensorSize(output_node));
+interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+```
+
+## Inspecting intermediate state
+
+Interpreter provides interfaces to investigate internal state of interpreter during inference.
+
+This is done by "observer" mechanism:
+- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object
+- `ExecutionObserver` defines several callback methods user can override to inject custom code
+
+ExecutionObserver provides three callbacks:
+- `postTensorWrite` checks contents of output tensor after operation execution
+- `preOperatorExecute` notifies that interpreter is going to execute operation
+- `postOperatorExecute` notifies that interpreter has finished execution of an operation
+
+See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details.
+
+**Usage example:**
+``` c++
+class CustomExecutionObserver: public luci_interpreter::ExecutionObserver
+{
+public:
+  void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override
+  {
+    if (tensor->element_type() != loco::DataType::FLOAT32)
+      return;
+    for (int i = 0; i < tensor->shape().num_elements(); ++i)
+      std::cout << tensor->data<float>[i] << ", ";
+  }
+
+  // User observer can override only needed methods,
+  // others will inherit empty implementation from base observer.
+
+  // void preOperatorExecute(const luci::CircleNode *node);
+  // void postOperatorExecute(const luci::CircleNode *node);
+};
+
+luci_interpreter::Interpreter interpreter(module);
+CustomExecutionObserver observer;
+interpreter.attachObserver(&observer);
+
+// initialize input_data
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+interpreter.interpret();
+```
+
+## Customizing inference
+
+### Memory manager
+
+Interpreter provides a handle for altering default memory management mechanisms.
+
+This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details.
+
+This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory.
+
+User can construct an interpreter with one of predefined memory managers or their own custom memory manager.
+Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object. 
+
+List of predefined memory managers:
+- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests.
+- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
+- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
+
+**SimpleMemoryManager usage example:**
+
+No need to select anything, to use this memory manager.
+``` c++
+luci_interpreter::Interpreter interpreter(module);
+```
+
+**TestMemoryManager usage example:**
+
+``` c++
+luci_interpreter::TestMemoryManager mm;
+luci_interpreter::Interpreter interpreter(module, &mm);
+```
+
+**BuddyMemoryManager usage example:**
+
+`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation.
+
+This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations.
+
+Limitations
+- Current implementation uses only lower power-of-two bytes of given buffer.
+
+  For example for 1000 bytes buffer, only lower 512 bytes will be used.
+- Current implementation can handle maximum 4 gigabyte memory pool
+
+``` c++
+  constexpr int buffer_size = 2048;
+  static uint8_t buffer[buffer_size];
+  luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size);
+  luci_interpreter::Interpreter interpreter(module.get(), &memory_manager);
+```
+
+**StaticMemoryManager usage example:**
+``` c++
+TBD when it is merged
+```
+
+## Further reading
+
+If you want to participate in development, please read `DEVELOPER.md` for SW architecture details.
diff --git a/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
new file mode 100644
index 000000000..fec08993c
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
@@ -0,0 +1,144 @@
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+  BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  struct Block
+  {
+    Block *next_free;
+    bool is_free;
+    uint32_t size;
+    // debug field
+    Block *self;
+  };
+
+  Block *_start_block;
+  int32_t _num_blocks;
+  uint32_t _size;
+  Block *_free_blocks[32]{};
+
+  static int32_t lowerLog2(uint32_t val)
+  {
+    int32_t i = 0;
+    while (val >>= 1)
+      i++;
+
+    return i;
+  }
+
+  void addToBlocks(Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    block->next_free = _free_blocks[l];
+    _free_blocks[l] = block;
+  }
+
+  void removeFromBlocks(const Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    Block *tmp = _free_blocks[l];
+
+    if (block == tmp)
+    {
+      _free_blocks[l] = block->next_free;
+      return;
+    }
+
+    while (tmp)
+    {
+      if (tmp->next_free == block)
+      {
+        tmp->next_free = block->next_free;
+        return;
+      }
+
+      tmp = tmp->next_free;
+    }
+  }
+
+  void divideBlock(Block *block, int32_t l)
+  {
+    int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+    removeFromBlocks(block, l);
+
+    // there is no need to add to the free_blocks list here
+    block->is_free = true;
+    block->size = size;
+    block->self = block;
+
+    Block *buddy;
+    buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+    buddy->is_free = true;
+    buddy->size = size;
+    buddy->self = buddy;
+
+    addToBlocks(buddy, l - 1);
+  }
+
+  Block *mergeBlock(Block *block)
+  {
+    Block *buddy;
+
+    const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+    const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+    buddy = (Block *)((address ^ (1LL << l)) + (uint8_t *)_start_block);
+
+    if (!buddy->is_free || buddy->size != block->size)
+      return nullptr;
+
+    if (block > buddy)
+    {
+      Block *x = block;
+      block = buddy;
+      buddy = x;
+    }
+
+    removeFromBlocks(block, l);
+    removeFromBlocks(buddy, l);
+
+    block->size = block->size * 2 + sizeof(Block);
+    block->is_free = true;
+    block->self = block;
+
+    addToBlocks(block, l + 1);
+
+    return block;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
new file mode 100644
index 000000000..375b1ae20
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+
+#include <luci/Import/GraphBuilderRegistry.h>
+
+namespace luci_interpreter
+{
+
+/**
+ * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
+ * model's file.
+ *
+ * @warning Use this source only in case when model's buffer alive longer than Interpreter.
+ */
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
index 7a14bf6f8..8e2f457a5 100644
--- a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -22,6 +22,7 @@
 #include <luci/IR/Nodes/CircleInput.h>
 #include <luci/IR/Nodes/CircleOutput.h>
 
+#include "luci_interpreter/MemoryManager.h"
 #include <luci/IR/Module.h>
 
 #include <memory>
@@ -51,6 +52,8 @@ class Interpreter
 public:
   explicit Interpreter(const luci::Module *module);
 
+  explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
+
   ~Interpreter();
 
   void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size);
@@ -64,6 +67,9 @@ public:
   const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
 
 private:
+  // _default_memory_manager should be before _runtime_module due to
+  // the order of deletion in the destructor
+  std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
   std::unique_ptr<class RuntimeModule> _runtime_module;
 
   // Observer functionality support.
diff --git a/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h
new file mode 100644
index 000000000..f32c52095
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+namespace luci_interpreter
+{
+
+class IMemoryManager
+{
+public:
+  virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0;
+  virtual void release_memory(luci_interpreter::Tensor &tensor) = 0;
+
+  virtual ~IMemoryManager() = default;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
new file mode 100644
index 000000000..658a1c609
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
new file mode 100644
index 000000000..ded7bde79
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager : public IMemoryManager
+{
+public:
+  StaticMemoryManager() = delete;
+
+  explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr)
+  { /* Do nothing */
+  }
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  // Stores a pointer to the beginning of the allocated memory buffer.
+  uint8_t *_buffer_ptr;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
new file mode 100644
index 000000000..397bbed76
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+  ~TestMemoryManager() override
+  {
+    for (auto allocation : allocations)
+    {
+      delete[] allocation;
+    }
+  }
+
+private:
+  std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
index 4ac3d8660..ad3388785 100644
--- a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -101,7 +101,7 @@ public:
     return _quantization.scale[0];
   }
 
-  float zero_point() const
+  int32_t zero_point() const
   {
     assert(_quantization.zero_point.size() == 1);
     return _quantization.zero_point[0];
@@ -113,9 +113,19 @@ public:
 
   int32_t quantized_dimension() const { return _quantization.quantized_dimension; }
 
-  template <typename T> const T *data() const { return reinterpret_cast<const T *>(_data.get()); }
+  template <typename T> const T *data() const
+  {
+    static_assert(std::is_same<uint8_t, char>::value or
+                  std::is_same<uint8_t, unsigned char>::value);
+    return reinterpret_cast<const T *>(_data);
+  }
 
-  template <typename T> T *data() { return reinterpret_cast<T *>(_data.get()); }
+  template <typename T> T *data()
+  {
+    static_assert(std::is_same<uint8_t, char>::value or
+                  std::is_same<uint8_t, unsigned char>::value);
+    return reinterpret_cast<T *>(_data);
+  }
 
   const std::string &name() const { return _name; }
 
@@ -125,12 +135,50 @@ public:
 
   void resize(const Shape &new_shape);
 
+  void set_data_buffer(uint8_t *buffer)
+  {
+    if (buffer == nullptr)
+    {
+      _data_allocated = false;
+    }
+    else
+    {
+      _data_allocated = true;
+    }
+    _data = buffer;
+  }
+
+  bool is_observable() const { return _is_observable; }
+
+  void set_observable(bool value) { _is_observable = value; }
+
+  bool is_allocatable() const { return _is_allocatable; }
+
+  void set_allocatable(bool value) { _is_allocatable = value; }
+
+  bool is_data_allocated() const { return _data_allocated; }
+
+  int32_t get_offset() const { return _offset; }
+
+  void set_offset(int32_t offset) { _offset = offset; }
+
 private:
   DataType _element_type;
   Shape _shape;
   AffineQuantization _quantization;
-  std::unique_ptr<uint8_t[]> _data;
+  uint8_t *_data = nullptr;
   std::string _name;
+  bool _data_allocated = false;
+  // Write of tensor is reported to registered Observers only if this tensor is observable
+  // This is needed for tensors used in kernel implementation, but not present in original model.
+  bool _is_observable = true;
+  // Memory manager is called for tensor only if it is "allocatable".
+  // Kernel configuration could disable allocation of some tensors if they are not needed for
+  // particular operation.
+  bool _is_allocatable = true;
+  // Used by static memory manager.
+  // Stores the offset from the beginning of the allocated memory buffer.
+  int32_t _offset = -1;
 };
 
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
new file mode 100644
index 000000000..fe3f73f5d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -0,0 +1,63 @@
+REGISTER_KERNEL(Abs)
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
new file mode 100644
index 000000000..a274afb7e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  assert(scratchpad_data != nullptr);
+
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  assert(batches == 1);
+
+  const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = 1;
+  input_dims.h = input_shape.Dims(1);
+  input_dims.w = input_shape.Dims(2);
+  input_dims.c = depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = 1;
+  output_dims.h = output_shape.Dims(1);
+  output_dims.w = output_shape.Dims(2);
+  output_dims.c = depth;
+
+  cmsis_nn_pool_params pool_params;
+  pool_params.stride.h = params.stride_height;
+  pool_params.stride.w = params.stride_width;
+  pool_params.padding.h = params.padding_values.height;
+  pool_params.padding.w = params.padding_values.width;
+  pool_params.activation.min = params.quantized_activation_min;
+  pool_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = 1;
+  filter_dims.h = params.filter_height;
+  filter_dims.w = params.filter_width;
+  filter_dims.c = 1;
+
+  cmsis_nn_context ctx;
+  ctx.buf = scratchpad_data;
+  ctx.size = scratchpad_shape.Dims(0);
+  auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
+                            output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  if (input_data_type == luci_interpreter::DataType::S8)
+  {
+    assert(input_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+
+    const int32_t output_width = output_shape.Dims(2);
+    const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+    const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
new file mode 100644
index 000000000..f8a4a8036
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
new file mode 100644
index 000000000..cfb84ea60
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_conv_params conv_params;
+    conv_params.dilation.h = params.dilation_height_factor;
+    conv_params.dilation.w = params.dilation_width_factor;
+
+    assert(conv_params.dilation.h == 1);
+    assert(conv_params.dilation.w == 1);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+    conv_params.activation.min = params.quantized_activation_min;
+    conv_params.activation.max = params.quantized_activation_max;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    quant_params.multiplier = const_cast<int32_t *>(mult);
+    quant_params.shift = const_cast<int32_t *>(shifts);
+
+    assert(conv_params.activation.min <= conv_params.activation.max);
+    assert(input_shape.DimensionsCount() == 4);
+    assert(filter_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+                                       &filter_dims, filter_data, &bias_dims, bias_data,
+                                       &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                  filter_shape, filter_data, bias_shape, bias_data,
+                                                  output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_conv_params conv_params;
+  conv_params.dilation.h = params.dilation_height_factor;
+  conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
+      conv_params.dilation.w == 1)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batches;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_height;
+    filter_dims.w = filter_width;
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batches;
+    output_dims.h = output_height;
+    output_dims.w = output_width;
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+                                                                     &filter_dims, &output_dims);
+
+    luci_interpreter::Shape scratchpad_shape{buf_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..120dcd803
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_dw_conv_params dw_conv_params;
+    dw_conv_params.dilation.h = params.dilation_height_factor;
+    dw_conv_params.dilation.w = params.dilation_width_factor;
+    assert(dw_conv_params.dilation.h == 1);
+    assert(dw_conv_params.dilation.w == 1);
+
+    dw_conv_params.input_offset = params.input_offset;
+    dw_conv_params.output_offset = params.output_offset;
+    dw_conv_params.stride.h = params.stride_height;
+    dw_conv_params.stride.w = params.stride_width;
+    dw_conv_params.padding.h = params.padding_values.height;
+    dw_conv_params.padding.w = params.padding_values.width;
+
+    dw_conv_params.activation.min = params.quantized_activation_min;
+    dw_conv_params.activation.max = params.quantized_activation_max;
+    dw_conv_params.ch_mult = params.depth_multiplier;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    int32_t output_multiplier = params.output_multiplier;
+    int32_t output_shift = params.output_shift;
+
+    quant_params.multiplier = &output_multiplier;
+    quant_params.shift = &output_shift;
+
+    assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+                                             input_data, &filter_dims, filter_data, &bias_dims,
+                                             bias_data, &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::DepthwiseConvPerChannel(
+      params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+      bias_shape, bias_data, output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_dw_conv_params dw_conv_params;
+  dw_conv_params.dilation.h = params.dilation_height_factor;
+  dw_conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+      dw_conv_params.dilation.w == 1)
+  {
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+      &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
new file mode 100644
index 000000000..efa6b167e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-interpreter/pal/cmsisnn/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
new file mode 100644
index 000000000..32e905761
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  assert(output_shape.DimensionsCount() == 2);
+
+  const int batches = output_shape.Dims(0);
+  const int output_depth = output_shape.Dims(1);
+
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+
+  cmsis_nn_fc_params fc_params;
+  fc_params.input_offset = params.input_offset;
+  fc_params.output_offset = params.output_offset;
+  fc_params.filter_offset = params.weights_offset;
+  fc_params.activation.min = params.quantized_activation_min;
+  fc_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_per_tensor_quant_params quant_params;
+  quant_params.multiplier = params.output_multiplier;
+  quant_params.shift = params.output_shift;
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = batches;
+  input_dims.h = 1;
+  input_dims.w = 1;
+  input_dims.c = accum_depth;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = accum_depth;
+  filter_dims.h = 1;
+  filter_dims.w = 1;
+  filter_dims.c = output_depth;
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = batches;
+  output_dims.h = 1;
+  output_dims.w = 1;
+  output_dims.c = output_depth;
+
+  int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+  auto buffer = std::make_unique<int8_t[]>(buf_size);
+  assert(buffer != nullptr);
+
+  cmsis_nn_context ctx;
+  ctx.buf = buffer.get();
+  ctx.size = buf_size;
+
+  auto res =
+    arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
+                           filter_data, &bias_dims, bias_data, &output_dims, output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
new file mode 100644
index 000000000..effb85d54
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h
new file mode 100644
index 000000000..a4a5b2a78
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_shape.Dims(0);
+  input_dims.h = input_shape.Dims(1);
+
+  cmsis_nn_dims weights_feature_dims;
+  weights_feature_dims.n = weight_feature_shape.Dims(0);
+  weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+  cmsis_nn_dims weights_time_dims;
+  weights_time_dims.n = weight_time_shape.Dims(0);
+  weights_time_dims.h = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = bias_shape.Dims(0);
+
+  cmsis_nn_dims state_dims;
+  state_dims.n = batch_size;
+  state_dims.h = memory_size * num_filters;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = output_shape.Dims(0);
+  output_dims.h = output_shape.Dims(1);
+
+  cmsis_nn_svdf_params svdf_params;
+  svdf_params.rank = params.rank;
+  svdf_params.input_offset = input_zp;
+  svdf_params.output_offset = output_zp;
+
+  svdf_params.input_activation.min = INT16_MIN;
+  svdf_params.input_activation.max = INT16_MAX;
+
+  svdf_params.output_activation.min = INT8_MIN;
+  svdf_params.output_activation.max = INT8_MAX;
+
+  cmsis_nn_per_tensor_quant_params in_quant_params;
+  in_quant_params.multiplier = scale_1_a;
+  in_quant_params.shift = scale_1_b;
+
+  cmsis_nn_per_tensor_quant_params out_quant_params;
+  out_quant_params.multiplier = scale_2_a;
+  out_quant_params.shift = scale_2_b;
+
+  cmsis_nn_context scratch_ctx;
+  scratch_ctx.buf = scratchpad_data;
+
+  cmsis_nn_context scratch_output_ctx;
+  scratch_output_ctx.buf = output_temp_data;
+
+  arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+              &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+              weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+              &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not supported for cmsisnn");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h
new file mode 100644
index 000000000..6bbda4867
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier, &input_beta_left_shift);
+
+  params->input_multiplier = input_beta_multiplier;
+  params->input_left_shift = input_beta_left_shift;
+  params->diff_min =
+    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+
+template <>
+inline void Softmax<int8_t>(const tflite::SoftmaxParams &params,
+                            const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                            const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  const int32_t mult = params.input_multiplier;
+  const int32_t shift = params.input_left_shift;
+  const int32_t diff_min = params.diff_min;
+
+  arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-interpreter/pal/cmsisnn/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h
new file mode 100644
index 000000000..813b1ec2c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h
@@ -0,0 +1,1568 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+                 const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T lower = 0;
+    const T clamped = val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+                  const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T upper = 1;
+    const T lower = -1;
+    const T clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+                  const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const float upper = 6;
+    const float lower = 0;
+    const float clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const int32 val = static_cast<int32_t>(input_data[i]);
+    int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+                                                                         params.output_multiplier,
+                                                                         params.output_shift);
+    clamped = std::max(params.quantized_activation_min, clamped);
+    clamped = std::min(params.quantized_activation_max, clamped);
+    output_data[i] = static_cast<T>(clamped);
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  const T max_value = params.quantized_activation_max;
+  const T min_value = params.quantized_activation_min;
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+    output_data[i] = clamped;
+  }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+                                 const RuntimeShape &unswitched_input1_shape,
+                                 const uint8 *unswitched_input1_data,
+                                 const RuntimeShape &unswitched_input2_shape,
+                                 const uint8 *unswitched_input2_data,
+                                 const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ArithmeticParams switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+
+  const bool use_unswitched = unswitched_params.broadcast_category ==
+                              tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
+  const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+  // sections of the arrays.
+  uint8 *output_data_ptr = output_data;
+  const uint8 *input1_data_ptr = input1_data;
+  const uint8 *input2_data_reset = input2_data;
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  for (int i0 = 0; i0 < y0; ++i0)
+  {
+    const uint8 *input2_data_ptr;
+    for (int i1 = 0; i1 < y1; ++i1)
+    {
+      input2_data_ptr = input2_data_reset;
+      for (int i2 = 0; i2 < y2; ++i2)
+      {
+        for (int i3 = 0; i3 < y3; ++i3)
+        {
+          MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+          input2_data_ptr += y4;
+          output_data_ptr += y4;
+        }
+        input1_data_ptr += y4;
+      }
+    }
+    input2_data_reset = input2_data_ptr;
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16");
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    output_data[i] = unclamped_result.raw();
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+  int32 output_offset = params.output_offset;
+  int32 output_activation_min = params.quantized_activation_min;
+  int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+    int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+    clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+    output_data[i] = output_offset + clamped_result;
+  }
+}
+
+inline void Sub16(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                  const int16_t *input1_data, const RuntimeShape &input2_shape,
+                  const int16_t *input2_data, const RuntimeShape &output_shape,
+                  int16_t *output_data)
+{
+  ruy::profiler::ScopeLabel label("Sub/Int16");
+  const int input1_shift = params.input1_shift;
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+  const int16 output_activation_min = params.quantized_activation_min;
+  const int16 output_activation_max = params.quantized_activation_max;
+
+  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+  TFLITE_DCHECK_LE(input1_shift, 0);
+  TFLITE_DCHECK_LE(params.input2_shift, 0);
+  const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+  const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+  const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+  if (input1_shift == 0)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+  else
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+}
+
+template <typename Scalar>
+void Pack(const PackParams &params, const RuntimeShape *const *input_shapes,
+          const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Pack");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = params.axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < inputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      const Scalar *input_ptr = input_data[i] + copy_size * k;
+      int loc = k * inputs_count * copy_size + i * copy_size;
+      memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+            const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+  ruy::profiler::ScopeLabel label("Unpack");
+  const int dimensions = input_shape.DimensionsCount();
+  const int outputs_count = params.num_split;
+
+  int outer_size = 1;
+  int axis = params.axis;
+  if (axis < 0)
+  {
+    axis += dimensions;
+  }
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, dimensions);
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < outputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      Scalar *output_ptr = output_datas[i] + copy_size * k;
+      int loc = k * outputs_count * copy_size + i * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams &params, const RuntimeShape *const *input_shapes,
+                     const uint8 *const *input_data, const RuntimeShape &output_shape,
+                     uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("PackWithScaling");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  const int32 *input_zeropoint = params.input_zeropoint;
+  const float *input_scale = params.input_scale;
+  int inputs_count = params.inputs_count;
+  const int32 output_zeropoint = params.output_zeropoint;
+  const float output_scale = params.output_scale;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  Scalar *output_ptr = output_data;
+  const float inverse_output_scale = 1.f / output_scale;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+      {
+        memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+      }
+      else
+      {
+        assert(false);
+        const float scale = input_scale[i] * inverse_output_scale;
+        const float bias = -input_zeropoint[i] * scale;
+        auto input_ptr = input_data[i];
+        for (int j = 0; j < copy_size; ++j)
+        {
+          const int value =
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+          output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+        }
+      }
+      output_ptr += copy_size;
+    }
+  }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams &params, const RuntimeShape *const *input_shapes,
+                        const Scalar *const *input_data, const RuntimeShape &output_shape,
+                        Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("DepthConcatenation");
+  auto params_copy = params;
+  params_copy.axis = 3;
+  Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+                     const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+                     const float *prev_activ_data, const RuntimeShape &weights_shape,
+                     const float *weights_data, const RuntimeShape &unextended_bias_shape,
+                     const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+                     const float *prev_state_data,
+                     const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+                     const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+                     const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+                     const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+                                  output_state_shape, 0, output_activ_shape, 0);
+  const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+                                 output_state_shape, 1, output_activ_shape, 1);
+  const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+                                output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+  // Concatenate prev_activ and input data together
+  std::vector<float const *> concat_input_arrays_data;
+  std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+  concat_input_arrays_data.push_back(input_data);
+  concat_input_arrays_data.push_back(prev_activ_data);
+  concat_input_arrays_shapes.push_back(&input_shape);
+  concat_input_arrays_shapes.push_back(&prev_activ_shape);
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = concat_input_arrays_data.size();
+  Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+                concat_temp_shape, concat_temp_data);
+
+  // Fully connected
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+                 bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+  // Memory state update (the LSTM "guts")
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int w = 0; w < width; ++w)
+    {
+      for (int h = 0; h < height; ++h)
+      {
+        for (int c = 0; c < output_depth; ++c)
+        {
+          const float input_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+          const float new_input =
+            std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+          const float forget_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+          const float output_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+          const float new_state =
+            input_gate * new_input +
+            forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+            output_gate * std::tanh(new_state);
+        }
+      }
+    }
+  }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+//  - The input activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that is the natural interval for output
+//    activations (see next point) and these need to be concatenated together.
+//    We could accommodate different ranges by re-scaling, but we empirically
+//    found that setting the input activations range to be [-1, 127/128] in the
+//    first place, removing the need for re-scaling, greatly improves accuracy.
+//  - The output activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that the definition of a LSTM cell makes them
+//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+//    makes for simpler, more accurate fixed-point arithmetic.
+//  - The output-at-previous-timestep state array is obviously quantized as
+//    the output activations.
+//  - The internal LSTM memory (not the output-at-previous-timestep, the other
+//    internal state array) is int16-quantized and may use any power-of-two,
+//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+//    StateIntegerBits below, see the below discussion of that template
+//    parameter ("The StateIntegerBits template parameter").
+//  - The output of the internal fully-connected node is int16-quantized
+//    on the interval [-8, 8 * 32767/32768], the rationale for which is
+//    explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
+//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
+//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+         const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+         const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+         const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+         const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+         const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+         int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+         uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+         uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+         int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+  (void)gemmlowp_context; // only used in optimized code.
+  int32 weights_zero_point = params.weights_zero_point;
+  int32 accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  // Gather dimensions information, and perform consistency checks.
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+                                                 output_state_shape, output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+  const int fc_output_depth =
+    MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+  // Depth-concatenate prev_activ and input data together.
+  uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+  const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+                concat_temp_shape, concat_temp_data_uint8);
+
+  // Implementation of the fully connected node inside the LSTM cell.
+  // The operands are 8-bit integers, the accumulators are internally 32bit
+  // integers, and the output is 16-bit fixed-point with 3 integer bits so
+  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+  // is explained in the function comment above.
+  for (int b = 0; b < fc_batches; ++b)
+  {
+    for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+    {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum = bias_data_int32[out_c];
+      // Accumulation loop.
+      for (int d = 0; d < fc_accum_depth; ++d)
+      {
+        int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+        int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+        accum += input_val * weights_val;
+      }
+      // Down-scale the final int32 accumulator to the scale used by our
+      // (16-bit, using 3 integer bits) fixed-point format. The quantized
+      // multiplier and shift here have been pre-computed offline
+      // (e.g. by toco).
+      accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+      // Saturate, cast to int16, and store to the temporary activations array.
+      accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+    }
+  }
+
+  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+  // and muls, all done in 16-bit fixed-point.
+  for (int b = 0; b < outer_size; ++b)
+  {
+    for (int c = 0; c < output_depth; ++c)
+    {
+      // Define the fixed-point data types that we will use here. All use
+      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+      // They only differ by the number of integral vs. fractional bits,
+      // determining the range of values that they can represent.
+      //
+      // F0 uses 0 integer bits, range [-1, 1].
+      // This is the return type of math functions such as tanh, logistic,
+      // whose range is in [-1, 1].
+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+      // F3 uses 3 integer bits, range [-8, 8].
+      // This is the range of the previous fully-connected node's output,
+      // which is our input here.
+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+      // 2^StateIntegerBits]. It's used to represent the internal state, whose
+      // number of integer bits is currently dictated by the model. See comment
+      // on the StateIntegerBits template parameter above.
+      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+      // Implementation of input gate, using fixed-point logistic function.
+      F3 input_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+      // Implementation of input modulation gate, using fixed-point tanh
+      // function.
+      F3 input_modulation_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+      F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+      // Implementation of forget gate, using fixed-point logistic function.
+      F3 forget_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+      // Implementation of output gate, using fixed-point logistic function.
+      F3 output_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+      // Implementation of internal multiplication nodes, still in fixed-point.
+      F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+      FS prev_state_times_forget_state = forget_gate_output * prev_state;
+      // Implementation of internal addition node, saturating.
+      FS new_state =
+        gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+                                prev_state_times_forget_state);
+      // Implementation of last internal Tanh node, still in fixed-point.
+      // Since a Tanh fixed-point implementation is specialized for a given
+      // number or integer bits, and each specialization can have a substantial
+      // code size, and we already used above a Tanh on an input with 3 integer
+      // bits, and per the table in the above function comment there is no
+      // significant accuracy to be lost by clamping to [-8, +8] for a
+      // 3-integer-bits representation, let us just do that. This helps people
+      // porting this to targets where code footprint must be minimized.
+      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+      // Store the new internal state back to memory, as 16-bit integers.
+      // Note: here we store the original value with StateIntegerBits, not
+      // the rescaled 3-integer-bits value fed to tanh.
+      output_state_data_int16[b * output_depth + c] = new_state.raw();
+      // Down-scale the output activations to 8-bit integers, saturating,
+      // and store back to memory.
+      int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+      int16 clamped_output_activ =
+        std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+      output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+    }
+  }
+}
+
+template <typename Scalar>
+void Split(const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+           const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+  ruy::profiler::ScopeLabel label("Split");
+  const int split_dimensions = input_shape.DimensionsCount();
+  int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+  int outputs_count = params.num_split;
+  TFLITE_DCHECK_LT(axis, split_dimensions);
+
+  int64_t split_size = 0;
+  for (int i = 0; i < outputs_count; i++)
+  {
+    TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+    for (int j = 0; j < split_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        MatchingDim(*output_shapes[i], j, input_shape, j);
+      }
+    }
+    split_size += output_shapes[i]->Dims(axis);
+  }
+  TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  // For all output arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < split_dimensions; ++i)
+  {
+    base_inner_size *= input_shape.Dims(i);
+  }
+
+  const Scalar *input_ptr = input_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < outputs_count; ++i)
+    {
+      const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+      memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+      input_ptr += copy_size;
+    }
+  }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+  return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                                       const RuntimeShape &input_shape, const float *input_data,
+                                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int c = 0; c < depth; ++c)
+    {
+      const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+      const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+      float accum = 0.f;
+      for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+      {
+        const float input_val = input_data[i * depth + input_c];
+        accum += input_val * input_val;
+      }
+      const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+      output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+    }
+  }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = static_cast<float>(input_data[i]);
+  }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+                      const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("FakeQuant");
+  float rmin = op_params.minmax.min;
+  float rmax = op_params.minmax.max;
+  int num_bits = op_params.num_bits;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  TFLITE_DCHECK_LE(rmin, 0.0f);
+  TFLITE_DCHECK_GE(rmax, 0.0f);
+  TFLITE_DCHECK_LT(rmin, rmax);
+
+  // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+  int quant_min = 0;
+  int quant_max = (1 << num_bits) - 1;
+  float nudged_min, nudged_max, nudged_scale;
+  NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+  int n_slices;
+  int slice_size;
+  int indices_nd;
+  std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape,
+                                           const RuntimeShape &indices_shape)
+{
+  GatherNdHelperResult ret;
+  ret.n_slices = 1;
+  ret.slice_size = 1;
+  const int indices_dims = indices_shape.DimensionsCount();
+  ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+  const int params_dims = params_shape.DimensionsCount();
+  for (int i = 0; i < indices_dims - 1; ++i)
+  {
+    ret.n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = ret.indices_nd; i < params_dims; ++i)
+  {
+    ret.slice_size *= params_shape.Dims(i);
+  }
+
+  int remain_flat_size = params_shape.FlatSize();
+  ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+  for (int i = 0; i < ret.indices_nd; ++i)
+  {
+    ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+    remain_flat_size = ret.dims_to_count[i];
+  }
+
+  return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape &params_shape, const ParamsT *params_data,
+                     const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                     const RuntimeShape &output_shape, ParamsT *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNd");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+                sizeof(ParamsT) * res.slice_size);
+  }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape &params_shape, const TfLiteTensor *params_data,
+                           const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                           const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNdString");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  DynamicBuffer buffer;
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    for (int j = 0; j < res.slice_size; ++j)
+    {
+      buffer.AddString(GetString(params_data, from_pos + j));
+    }
+  }
+  buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                      const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+                      const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+  ruy::profiler::ScopeLabel label("ScatterNd");
+
+  int n_slices = 1;
+  int slice_size = 1;
+  const int outer_dims = indices_shape.DimensionsCount() - 1;
+  const int indices_nd = indices_shape.Dims(outer_dims);
+  const int updates_dims = updates_shape.DimensionsCount();
+  for (int i = 0; i < outer_dims; ++i)
+  {
+    n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = outer_dims; i < updates_dims; ++i)
+  {
+    slice_size *= updates_shape.Dims(i);
+  }
+
+  int output_flat_size = output_shape.FlatSize();
+  int remain_flat_size = output_flat_size;
+  std::vector<int> dims_to_count(indices_nd, 0);
+  for (int i = 0; i < indices_nd; ++i)
+  {
+    dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+    remain_flat_size = dims_to_count[i];
+  }
+
+  memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+  for (int i = 0; i < n_slices; ++i)
+  {
+    int to_pos = 0;
+    for (int j = 0; j < indices_nd; ++j)
+    {
+      IndicesT idx = indices_data[i * indices_nd + j];
+      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+      to_pos += idx * dims_to_count[j];
+    }
+    for (int j = 0; j < slice_size; j++)
+    {
+      output_data[to_pos + j] += updates_data[i * slice_size + j];
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+  TFLITE_DCHECK_LE(op_params.begin_count, 5);
+  TFLITE_DCHECK_LE(op_params.size_count, 5);
+  const int begin_count = op_params.begin_count;
+  const int size_count = op_params.size_count;
+  // We front-pad the begin and size vectors.
+  std::array<int, 5> start;
+  std::array<int, 5> stop;
+  for (int i = 0; i < 5; ++i)
+  {
+    int padded_i = 5 - i;
+    start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+    stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+                ? ext_shape.Dims(i)
+                : start[i] + op_params.size[size_count - padded_i];
+  }
+
+  for (int i0 = start[0]; i0 < stop[0]; ++i0)
+  {
+    for (int i1 = start[1]; i1 < stop[1]; ++i1)
+    {
+      for (int i2 = start[2]; i2 < stop[2]; ++i2)
+      {
+        for (int i3 = start[3]; i3 < stop[3]; ++i3)
+        {
+          for (int i4 = start[4]; i4 < stop[4]; ++i4)
+          {
+            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  SequentialTensorWriter<T> writer(input_data, output_data);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+  SequentialTensorWriter<T> writer(input, output);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto min_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto max_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+            const RuntimeShape &output_shape, T2 *output_data)
+{
+  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+                   const RuntimeShape &input2_shape, const T3 *input2_data,
+                   const RuntimeShape &output_shape, T2 *output_data)
+{
+  // Drop shape of second input: not needed.
+  ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+            const RuntimeShape &input_x_shape, const T *input_x_data,
+            const RuntimeShape &input_y_shape, const T *input_y_data,
+            const RuntimeShape &output_shape, T *output_data)
+{
+  int64_t flatsize;
+  // Allow select operator executions on mixed scalar tensors and one element
+  // tensors.
+  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+  {
+    flatsize = 1;
+  }
+  else
+  {
+    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+  }
+  for (int64_t i = 0; i < flatsize; ++i)
+  {
+    output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                   const RuntimeShape &input_x_shape, const T *input_x_data,
+                   const RuntimeShape &input_y_shape, const T *input_y_data,
+                   const RuntimeShape &output_shape, T *output_data)
+{
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  int64_t inner_size;
+  if (input_condition_shape.DimensionsCount() == 0)
+  {
+    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+  }
+  else
+  {
+    TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+    inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+  }
+
+  int64_t offset = 0;
+  for (int64_t i = 0; i < outer_size; i++)
+  {
+    const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+    offset += inner_size;
+  }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                           const RuntimeShape &input_x_shape, const T *input_x_data,
+                           const RuntimeShape &input_y_shape, const T *input_y_data,
+                           const RuntimeShape &output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+  const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+  NdArrayDesc<4> desc_condition;
+  NdArrayDesc<4> desc_x;
+  NdArrayDesc<4> desc_y;
+  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+                                      &desc_condition, &desc_x, &desc_y);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest
+  // stride, typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for
+  // the best cache behavior.
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+        {
+          const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+          const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+          const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+            input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+        }
+      }
+    }
+  }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                      T *output_data)
+{
+  const size_t size = input_condition_shape.FlatSize();
+  if (size == 0)
+  {
+    // Dimension is zero, in which case we don't need to output.
+    return;
+  }
+  const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+  std::vector<int> dims_to_count(cond_rank, 0);
+  int cur_flat_size = size;
+  for (int i = 0; i < cond_rank; ++i)
+  {
+    dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+    cur_flat_size = dims_to_count[i];
+  }
+
+  int output_index = 0;
+  for (int i = 0; i < size; ++i)
+  {
+    if (input_condition_data[i])
+    {
+      // Insert the coordinate of the current item (row major) into output.
+      int flat_index = i;
+      for (int j = 0; j < cond_rank; ++j)
+      {
+        int coord_j = flat_index / dims_to_count[j];
+        output_data[output_index * cond_rank + j] = coord_j;
+        flat_index %= dims_to_count[j];
+      }
+      output_index++;
+    }
+  }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+                          T default_value, bool value_is_scalar,
+                          const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  const int value_count = indices.size();
+
+  // First fill the output_data with default value.
+  const int num_elements = output_shape.FlatSize();
+  for (int i = 0; i < num_elements; ++i)
+  {
+    output_data[i] = default_value;
+  }
+
+  // Special handle for value is scalar case to avoid checking the boolean
+  // condition within the loop every time.
+  if (value_is_scalar)
+  {
+    for (int i = 0; i < value_count; ++i)
+    {
+      const std::vector<TI> &index = indices[i];
+      TFLITE_DCHECK_EQ(index.size(), 4);
+      const T value = *values; // just use the first value.
+      output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+    }
+    return;
+  }
+
+  // Go through the values and indices to fill the sparse values.
+  for (int i = 0; i < value_count; ++i)
+  {
+    const std::vector<TI> &index = indices[i];
+    TFLITE_DCHECK_EQ(index.size(), 4);
+    const T value = values[i];
+    output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+  }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+                const RuntimeShape &input2_shape, const T *input2_data,
+                const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = std::pow(input1_data[i], input2_data[i]);
+  }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+                               const RuntimeShape &unextended_input2_shape, const T *input2_data,
+                               const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < output_shape.Dims(3); ++c)
+        {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = std::pow(in1_val, in2_val);
+        }
+      }
+    }
+  }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+             const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Reverse");
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_axis = input_shape.Dims(axis);
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int j = 0; j < dims_at_axis; ++j)
+    {
+      const int start_pos = (i * dims_at_axis + j) * copy_size;
+      Scalar *output_ptr = output_data + start_pos;
+      int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+                     const RuntimeShape &input_shape, const Scalar *input_data,
+                     const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("ReverseSequence");
+
+  int outer_size = 1;
+  int outer_dim = std::min(batch_dim, seq_dim);
+  int medium_dim = std::max(batch_dim, seq_dim);
+  for (int i = 0; i < outer_dim; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int medium_size = 1;
+  for (int i = outer_dim + 1; i < medium_dim; ++i)
+  {
+    medium_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+  const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+  Scalar *output_ptr;
+  if (batch_dim > seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            int sl = seq_lengths[q] - 1;
+            if (j > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+  else if (batch_dim < seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        int sl = seq_lengths[j] - 1;
+        const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            if (q > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+                       const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+                       const RuntimeShape &output_shape, T *output_data)
+{
+  const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+  memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+  for (int i = 0; i < input_shape.Dims(0); i++)
+  {
+    int output_index = segment_ids_data[i];
+    for (int j = 0; j < segment_flat_size; ++j)
+    {
+      output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+    }
+  }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
new file mode 100644
index 000000000..a68b363d9
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -0,0 +1,65 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+    nnas_find_package(CMSISSource EXACT 5.8.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    if (NOT CMSISSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+        return()
+    endif ()
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
+    list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+    set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+    target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
+            "${CMSISSource_DIR}/CMSIS/NN/Include"
+            "${CMSISSource_DIR}/CMSIS/DSP/Include"
+            "${CMSISSource_DIR}/CMSIS/Core/Include")
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
new file mode 100644
index 000000000..e4d42de33
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -0,0 +1,87 @@
+REGISTER_KERNEL(Abs)
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchMatMul)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(FloorMod)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Gather)
+REGISTER_KERNEL(Gelu)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(HardSwish)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LocalResponseNormalization)
+REGISTER_KERNEL(Log)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(LogSoftmax)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Mean)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(OneHot)
+REGISTER_KERNEL(Pack)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(Pow)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(ReduceMax)
+REGISTER_KERNEL(ReduceProd)
+REGISTER_KERNEL(Relu)
+REGISTER_KERNEL(Relu6)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(ReverseV2)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Select)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Slice)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(Split)
+REGISTER_KERNEL(SplitV)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(Sum)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(UnidirectionalSequenceLSTM)
+REGISTER_KERNEL(Unpack)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-interpreter/pal/linux/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+
+  tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+                                             output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)input_data_type;
+  (void)input_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h
new file mode 100644
index 000000000..3894f2d92
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h>
+
+namespace luci_interpreter_pal
+{
+inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data,
+                        const tflite::RuntimeShape &rhs_shape, const float *rhs_data,
+                        const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape,
+                                     output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad,
+                                         luci_interpreter::Tensor *rhs_scratchpad,
+                                         const tflite::RuntimeShape &lhs_shape,
+                                         const tflite::RuntimeShape &rhs_shape)
+{
+  // Scratchpad for transposed LHS
+  {
+    auto lhs_rank = lhs_shape.DimensionsCount();
+    luci_interpreter::Shape scratchpad_size(lhs_rank);
+    for (int i = 0; i < lhs_rank - 2; ++i)
+    {
+      scratchpad_size.dim(i) = lhs_shape.Dims(i);
+    }
+    scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1);
+    scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2);
+
+    lhs_scratchpad->resize(scratchpad_size);
+  }
+  // Scratchpad for transposed RHS
+  {
+    auto rhs_rank = rhs_shape.DimensionsCount();
+    luci_interpreter::Shape scratchpad_size(rhs_rank);
+    for (int i = 0; i < rhs_rank - 2; ++i)
+    {
+      scratchpad_size.dim(i) = rhs_shape.Dims(i);
+    }
+    scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1);
+    scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2);
+
+    rhs_scratchpad->resize(scratchpad_size);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H
diff --git a/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h
new file mode 100644
index 000000000..3fe2022ed
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h
new file mode 100644
index 000000000..985a15f39
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  if (scratchpad_data)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    tflite::RuntimeShape im2col_shape{batches, output_height, output_width,
+                                      input_depth * filter_height * filter_width};
+
+    tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                                bias_shape, bias_data, output_shape, output_data, im2col_shape,
+                                scratchpad_data);
+  }
+  else
+    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                                bias_shape, bias_data, output_shape, output_data,
+                                tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  // TODO This should only be done once (although it takes only a few microseconds).
+  //  Also, the user should be able to adjust the number of threads.
+  auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
+  gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
+
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, gemmlowp_context.get());
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  // TODO enable optimized version
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  const int32_t filter_height = filter_shape.Dims(1);
+  const int32_t filter_width = filter_shape.Dims(2);
+
+  // Allocate tensor for scratchpad, if needed.
+  // The checks here should be aligned with the actual implementation.
+  const bool need_dilated_scratchpad =
+    params.dilation_height_factor != 1 || params.dilation_width_factor != 1;
+  const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 ||
+                                           filter_height != 1 || filter_width != 1;
+  auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 &&
+                          (need_dilated_scratchpad || need_non_dilated_scratchpad);
+
+  if (_need_scratchpad)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+    int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height *
+                              filter_width * data_type_size;
+    luci_interpreter::Shape scratchpad_shape{scratchpad_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h
new file mode 100644
index 000000000..f9ebfcfb5
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)params;
+  (void)input_data_type;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-interpreter/pal/linux/PALDequantize.h
new file mode 100644
index 000000000..3af6d0777
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDequantize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-interpreter/pal/linux/PALElu.h
new file mode 100644
index 000000000..cb365ffd0
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALElu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h
new file mode 100644
index 000000000..62970dbf7
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                                filter_data, bias_shape, bias_data, output_shape,
+                                                output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-interpreter/pal/linux/PALGather.h
new file mode 100644
index 000000000..49ac35f93
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALGather.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_GATHER_H
+#define LUCI_INTERPRETER_PAL_GATHER_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T, typename CoordsT = int32>
+static inline void Gather(const tflite::GatherParams &op_params,
+                          const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data,
+                                output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_GATHER_H
diff --git a/compiler/luci-interpreter/pal/linux/PALGelu.h b/compiler/luci-interpreter/pal/linux/PALGelu.h
new file mode 100644
index 000000000..e1796e727
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALGelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_GELU_H
+#define LUCI_INTERPRETER_PAL_GELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/gelu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Gelu(bool approximate, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data)
+{
+  tflite::reference_ops::Gelu(input_shape, input_data, approximate, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_GELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALHardSwish.h b/compiler/luci-interpreter/pal/linux/PALHardSwish.h
new file mode 100644
index 000000000..2ce7cb3a1
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALHardSwish.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_HARDSWISH_H
+#define LUCI_INTERPRETER_PAL_HARDSWISH_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void HardSwish(const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::HardSwish(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_HARDSWISH_H
diff --git a/compiler/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-interpreter/pal/linux/PALL2Normalize.h
new file mode 100644
index 000000000..6c663e21f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h
new file mode 100644
index 000000000..aac57f2b2
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h
new file mode 100644
index 000000000..e8209bae6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
new file mode 100644
index 000000000..54f7f0916
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                           const tflite::RuntimeShape &input_shape, const float *input_data,
+                           const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data,
+                                                    output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h
new file mode 100644
index 000000000..a32e3eec6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // Do nothing for linux
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void LogSoftmax(const tflite::SoftmaxParams &params, float input_scale,
+                              const tflite::RuntimeShape &input_shape, const uint8 *input_data,
+                              const tflite::RuntimeShape &output_shape, uint8 *output_data)
+{
+  tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-interpreter/pal/linux/PALMul.h
new file mode 100644
index 000000000..a8a9d4abc
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALMul.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+
+template <>
+inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                const int64_t *input1_data, const tflite::RuntimeShape &input2_shape,
+                const int64_t *input2_data, const tflite::RuntimeShape &output_shape,
+                int64_t *output_data)
+{
+  tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-interpreter/pal/linux/PALNeg.h
new file mode 100644
index 000000000..797ffee1b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-interpreter/pal/linux/PALQuantize.h
new file mode 100644
index 000000000..bf1d7954e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-interpreter/pal/linux/PALRelu.h
new file mode 100644
index 000000000..b4c715d3e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALRelu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU_H
+#define LUCI_INTERPRETER_PAL_RELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                        const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+                         const T *input_data, const tflite::RuntimeShape &output_shape,
+                         T *output_data)
+{
+  tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-interpreter/pal/linux/PALRelu6.h
new file mode 100644
index 000000000..bf2f91aa5
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALRelu6.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU6_H
+#define LUCI_INTERPRETER_PAL_RELU6_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data,
+                         const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+                         const T *input_data, const tflite::RuntimeShape &output_shape,
+                         T *output_data)
+{
+  tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU6_H
diff --git a/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h
new file mode 100644
index 000000000..7380081dc
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..74d19265b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-interpreter/pal/linux/PALSVDF.h
new file mode 100644
index 000000000..0ffba14f0
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSVDF.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                         weight_feature_data, weight_time_shape, weight_time_data,
+                                         bias_shape, bias_data, activation_state_data, output_shape,
+                                         output_data, scratchpad_data, output_temp_data, scale_1_a,
+                                         scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                       weight_feature_data, weight_time_shape, weight_time_data,
+                                       bias_shape, bias_data, scratchpad_data,
+                                       activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-interpreter/pal/linux/PALSlice.h
new file mode 100644
index 000000000..640a71684
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSlice.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SLICE_H
+#define LUCI_INTERPRETER_PAL_SLICE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Slice(const tflite::SliceParams &op_params,
+                         const tflite::RuntimeShape &input_shape, const T *input_data,
+                         const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SLICE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-interpreter/pal/linux/PALSoftmax.h
new file mode 100644
index 000000000..b197e79d1
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // Do nothing for linux
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+template <typename In, typename Out>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const In *input_data,
+                           const tflite::RuntimeShape &output_shape, Out *output_data)
+{
+  tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h
new file mode 100644
index 000000000..5e8de9ba3
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h
new file mode 100644
index 000000000..52d2a5bb1
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-interpreter/pal/linux/PALSplit.h
new file mode 100644
index 000000000..4d8da72d8
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSplit.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPLIT_H
+#define LUCI_INTERPRETER_PAL_SPLIT_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename Scalar>
+static inline void Split(const tflite::SplitParams &params, const tflite::RuntimeShape &input_shape,
+                         const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes,
+                         Scalar *const *output_data)
+{
+  tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPLIT_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-interpreter/pal/linux/PALSub.h
new file mode 100644
index 000000000..04080d619
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/linux/PALreference_ops.h b/compiler/luci-interpreter/pal/linux/PALreference_ops.h
new file mode 100644
index 000000000..825ebfe8e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALreference_ops.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-interpreter/pal/linux/pal.cmake
new file mode 100644
index 000000000..28f6352bc
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/pal.cmake
@@ -0,0 +1,133 @@
+# set target platform to run
+if(NOT TARGET_ARCH OR "${TARGET_ARCH}" STREQUAL "")
+  string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} TARGET_ARCH)
+else()
+  string(TOLOWER ${TARGET_ARCH} TARGET_ARCH)
+endif()
+
+# If TARGET_ARCH is arm64 change ARCH name to aarch64
+if("${TARGET_ARCH}" STREQUAL "arm64")
+  set(TARGET_ARCH "aarch64")
+endif()
+
+if("${TARGET_ARCH}" STREQUAL "armv8-m")
+  set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7-r")
+  set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7em")
+  set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7l")
+  set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7hl")
+  set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "aarch64")
+  set(TARGET_ARCH_BASE "aarch64")
+endif()
+
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.8.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    find_package(Threads REQUIRED)
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    # TODO put it back, I changed my mind.
+    # instead add sources with visitors in this library
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+    if(TARGET_ARCH_BASE STREQUAL "arm")
+        # NOTE may need to revise this list for version upgrade
+        set(PAL_SOURCES ${PAL_SOURCES}
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+                ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+                ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+                ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+                ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+                ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+                ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+                ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+                ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+                ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+                ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+                ${TensorFlowRuySource_DIR}/ruy/tune.cc
+                ${TensorFlowRuySource_DIR}/ruy/wait.cc
+                ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+                )
+    endif(TARGET_ARCH_BASE STREQUAL "arm")
+
+    if(TARGET_ARCH_BASE STREQUAL "aarch64")
+        # NOTE may need to revise this list for version upgrade
+        set(PAL_SOURCES ${PAL_SOURCES}
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+                ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+                ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+                ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+                ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+                ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+                ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+                ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+                ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+                ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+                ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+                ${TensorFlowRuySource_DIR}/ruy/tune.cc
+                ${TensorFlowRuySource_DIR}/ruy/wait.cc
+                ${TensorFlowRuySource_DIR}/ruy/kernel_arm64.cc
+                )
+    endif(TARGET_ARCH_BASE STREQUAL "aarch64")
+
+    add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal)
+endmacro()
diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
new file mode 100644
index 000000000..fe3f73f5d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -0,0 +1,63 @@
+REGISTER_KERNEL(Abs)
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-interpreter/pal/mcu/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+
+  tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+                                             output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)input_data_type;
+  (void)input_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
new file mode 100644
index 000000000..f8a4a8036
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
new file mode 100644
index 000000000..13976877a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  (void)input_data_type;
+  (void)params;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)params;
+  (void)input_data_type;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
new file mode 100644
index 000000000..efa6b167e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-interpreter/pal/mcu/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h
new file mode 100644
index 000000000..048624d74
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                                filter_data, bias_shape, bias_data, output_shape,
+                                                output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-interpreter/pal/mcu/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-interpreter/pal/mcu/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
new file mode 100644
index 000000000..effb85d54
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-interpreter/pal/mcu/PALSVDF.h
new file mode 100644
index 000000000..3bba668fb
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSVDF.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int n_rank = params.rank;
+  const int n_batch = input_shape.Dims(0);
+  const int n_input = input_shape.Dims(1);
+  const int n_filter = weight_feature_shape.Dims(0);
+  const int n_unit = n_filter / n_rank;
+  const int n_memory = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    int16_t *new_state_start = activation_state_data;
+    const int16_t *old_state_start = activation_state_data + 1;
+    const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Feature matmul.
+  {
+    const int32_t output_max = std::numeric_limits<int16_t>::max();
+    const int32_t output_min = std::numeric_limits<int16_t>::min();
+    int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+    for (int b = 0; b < n_batch; b++)
+    {
+      const int8_t *matrix_ptr = weight_feature_data;
+      for (int r = 0; r < n_filter; r++)
+      {
+        int32_t dot_prod = 0;
+        const int8_t *vector_in_batch = input_data + b * n_input;
+        for (int c = 0; c < n_input; c++)
+        {
+          dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+        }
+        dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+        dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+        // This assumes state is symmetrically quantized. Otherwise last bit of
+        // state should be initialized to its zero point and accumulate the
+        // dot_prod.
+        // Equivalent as the following:
+        //     result_in_batch = zero point, which happens to be zero.
+        //     result_in_batch += dot_prod_56.
+        *result_in_batch = dot_prod;
+        result_in_batch += n_memory;
+      }
+    }
+  }
+
+  // Time.
+  {
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Perform batched vector dot product:
+      const int16_t *vector1_ptr = weight_time_data;
+      const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+      for (int i = 0; i < n_filter; i++)
+      {
+        *scratch_ptr_batch = 0;
+        for (int j = 0; j < n_memory; j++)
+        {
+          *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+        }
+        scratch_ptr_batch++;
+      }
+    }
+  }
+
+  // Reduce, add bias, rescale, activation.
+  {
+    // Add bias.
+    if (bias_data)
+    {
+      // Vector batch assign:
+      for (int i = 0; i < n_batch; ++i)
+      {
+        int32_t *output_ptr = output_temp_data + i * n_unit;
+        const int32_t *bias_ptr = bias_data;
+        for (int j = 0; j < n_unit; ++j)
+        {
+          *output_ptr++ = *bias_ptr++;
+        }
+      }
+    }
+    else
+    {
+      int32_t *output_ptr = output_temp_data;
+      for (int i = 0; i < n_batch * n_unit; ++i)
+      {
+        *output_ptr++ = 0;
+      }
+    }
+
+    // Reduce.
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Reduction sum vector
+      for (int i = 0; i < n_unit; ++i)
+      {
+        for (int j = 0; j < n_rank; ++j)
+        {
+          output_temp_ptr[i] += *scratch_ptr_batch++;
+        }
+      }
+    }
+
+    // Rescale.
+    const int32_t output_max = std::numeric_limits<int8_t>::max();
+    const int32_t output_min = std::numeric_limits<int8_t>::min();
+    for (int i = 0; i < n_batch * n_unit; ++i)
+    {
+      int32_t x1 = output_temp_data[i];
+      int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+      int32_t x3 = x2 + output_zp;
+      int32_t x4 = std::min(std::max(output_min, x3), output_max);
+      output_data[i] = static_cast<int8_t>(x4);
+    }
+  }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-interpreter/pal/mcu/PALSoftmax.h
new file mode 100644
index 000000000..9838b542d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSoftmax.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier, &input_beta_left_shift);
+
+  params->input_multiplier = input_beta_multiplier;
+  params->input_left_shift = input_beta_left_shift;
+  params->diff_min =
+    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-interpreter/pal/mcu/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALreference_ops.h b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h
new file mode 100644
index 000000000..62c720937
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+                 const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T lower = 0;
+    const T clamped = val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+                  const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T upper = 1;
+    const T lower = -1;
+    const T clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+                  const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const float upper = 6;
+    const float lower = 0;
+    const float clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const int32 val = static_cast<int32_t>(input_data[i]);
+    int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+                                                                         params.output_multiplier,
+                                                                         params.output_shift);
+    clamped = std::max(params.quantized_activation_min, clamped);
+    clamped = std::min(params.quantized_activation_max, clamped);
+    output_data[i] = static_cast<T>(clamped);
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  const T max_value = params.quantized_activation_max;
+  const T min_value = params.quantized_activation_min;
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+    output_data[i] = clamped;
+  }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+                                 const RuntimeShape &unswitched_input1_shape,
+                                 const uint8 *unswitched_input1_data,
+                                 const RuntimeShape &unswitched_input2_shape,
+                                 const uint8 *unswitched_input2_data,
+                                 const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ArithmeticParams switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+
+  const bool use_unswitched = unswitched_params.broadcast_category ==
+                              tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
+  const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+  // sections of the arrays.
+  uint8 *output_data_ptr = output_data;
+  const uint8 *input1_data_ptr = input1_data;
+  const uint8 *input2_data_reset = input2_data;
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  for (int i0 = 0; i0 < y0; ++i0)
+  {
+    const uint8 *input2_data_ptr;
+    for (int i1 = 0; i1 < y1; ++i1)
+    {
+      input2_data_ptr = input2_data_reset;
+      for (int i2 = 0; i2 < y2; ++i2)
+      {
+        for (int i3 = 0; i3 < y3; ++i3)
+        {
+          MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+          input2_data_ptr += y4;
+          output_data_ptr += y4;
+        }
+        input1_data_ptr += y4;
+      }
+    }
+    input2_data_reset = input2_data_ptr;
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16");
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    output_data[i] = unclamped_result.raw();
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+  int32 output_offset = params.output_offset;
+  int32 output_activation_min = params.quantized_activation_min;
+  int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+    int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+    clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+    output_data[i] = output_offset + clamped_result;
+  }
+}
+
+inline void Sub16(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                  const int16_t *input1_data, const RuntimeShape &input2_shape,
+                  const int16_t *input2_data, const RuntimeShape &output_shape,
+                  int16_t *output_data)
+{
+  ruy::profiler::ScopeLabel label("Sub/Int16");
+  const int input1_shift = params.input1_shift;
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+  const int16 output_activation_min = params.quantized_activation_min;
+  const int16 output_activation_max = params.quantized_activation_max;
+
+  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+  TFLITE_DCHECK_LE(input1_shift, 0);
+  TFLITE_DCHECK_LE(params.input2_shift, 0);
+  const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+  const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+  const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+  if (input1_shift == 0)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+  else
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+}
+
+template <typename Scalar>
+void Pack(const PackParams &params, const RuntimeShape *const *input_shapes,
+          const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Pack");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = params.axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < inputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      const Scalar *input_ptr = input_data[i] + copy_size * k;
+      int loc = k * inputs_count * copy_size + i * copy_size;
+      memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+            const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+  ruy::profiler::ScopeLabel label("Unpack");
+  const int dimensions = input_shape.DimensionsCount();
+  const int outputs_count = params.num_split;
+
+  int outer_size = 1;
+  int axis = params.axis;
+  if (axis < 0)
+  {
+    axis += dimensions;
+  }
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, dimensions);
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < outputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      Scalar *output_ptr = output_datas[i] + copy_size * k;
+      int loc = k * outputs_count * copy_size + i * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams &params, const RuntimeShape *const *input_shapes,
+                     const uint8 *const *input_data, const RuntimeShape &output_shape,
+                     uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("PackWithScaling");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  const int32 *input_zeropoint = params.input_zeropoint;
+  const float *input_scale = params.input_scale;
+  int inputs_count = params.inputs_count;
+  const int32 output_zeropoint = params.output_zeropoint;
+  const float output_scale = params.output_scale;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  Scalar *output_ptr = output_data;
+  const float inverse_output_scale = 1.f / output_scale;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+      {
+        memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+      }
+      else
+      {
+        assert(false);
+        const float scale = input_scale[i] * inverse_output_scale;
+        const float bias = -input_zeropoint[i] * scale;
+        auto input_ptr = input_data[i];
+        for (int j = 0; j < copy_size; ++j)
+        {
+          const int value =
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+          output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+        }
+      }
+      output_ptr += copy_size;
+    }
+  }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams &params, const RuntimeShape *const *input_shapes,
+                        const Scalar *const *input_data, const RuntimeShape &output_shape,
+                        Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("DepthConcatenation");
+  auto params_copy = params;
+  params_copy.axis = 3;
+  Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+                     const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+                     const float *prev_activ_data, const RuntimeShape &weights_shape,
+                     const float *weights_data, const RuntimeShape &unextended_bias_shape,
+                     const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+                     const float *prev_state_data,
+                     const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+                     const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+                     const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+                     const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+                                  output_state_shape, 0, output_activ_shape, 0);
+  const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+                                 output_state_shape, 1, output_activ_shape, 1);
+  const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+                                output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+  // Concatenate prev_activ and input data together
+  std::vector<float const *> concat_input_arrays_data;
+  std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+  concat_input_arrays_data.push_back(input_data);
+  concat_input_arrays_data.push_back(prev_activ_data);
+  concat_input_arrays_shapes.push_back(&input_shape);
+  concat_input_arrays_shapes.push_back(&prev_activ_shape);
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = concat_input_arrays_data.size();
+  Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+                concat_temp_shape, concat_temp_data);
+
+  // Fully connected
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+                 bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+  // Memory state update (the LSTM "guts")
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int w = 0; w < width; ++w)
+    {
+      for (int h = 0; h < height; ++h)
+      {
+        for (int c = 0; c < output_depth; ++c)
+        {
+          const float input_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+          const float new_input =
+            std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+          const float forget_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+          const float output_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+          const float new_state =
+            input_gate * new_input +
+            forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+            output_gate * std::tanh(new_state);
+        }
+      }
+    }
+  }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+//  - The input activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that is the natural interval for output
+//    activations (see next point) and these need to be concatenated together.
+//    We could accommodate different ranges by re-scaling, but we empirically
+//    found that setting the input activations range to be [-1, 127/128] in the
+//    first place, removing the need for re-scaling, greatly improves accuracy.
+//  - The output activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that the definition of a LSTM cell makes them
+//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+//    makes for simpler, more accurate fixed-point arithmetic.
+//  - The output-at-previous-timestep state array is obviously quantized as
+//    the output activations.
+//  - The internal LSTM memory (not the output-at-previous-timestep, the other
+//    internal state array) is int16-quantized and may use any power-of-two,
+//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+//    StateIntegerBits below, see the below discussion of that template
+//    parameter ("The StateIntegerBits template parameter").
+//  - The output of the internal fully-connected node is int16-quantized
+//    on the interval [-8, 8 * 32767/32768], the rationale for which is
+//    explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
+//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
+//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+         const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+         const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+         const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+         const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+         const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+         int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+         uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+         uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+         int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+  (void)gemmlowp_context; // only used in optimized code.
+  int32 weights_zero_point = params.weights_zero_point;
+  int32 accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  // Gather dimensions information, and perform consistency checks.
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+                                                 output_state_shape, output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+  const int fc_output_depth =
+    MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+  // Depth-concatenate prev_activ and input data together.
+  uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+  const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+                concat_temp_shape, concat_temp_data_uint8);
+
+  // Implementation of the fully connected node inside the LSTM cell.
+  // The operands are 8-bit integers, the accumulators are internally 32bit
+  // integers, and the output is 16-bit fixed-point with 3 integer bits so
+  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+  // is explained in the function comment above.
+  for (int b = 0; b < fc_batches; ++b)
+  {
+    for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+    {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum = bias_data_int32[out_c];
+      // Accumulation loop.
+      for (int d = 0; d < fc_accum_depth; ++d)
+      {
+        int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+        int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+        accum += input_val * weights_val;
+      }
+      // Down-scale the final int32 accumulator to the scale used by our
+      // (16-bit, using 3 integer bits) fixed-point format. The quantized
+      // multiplier and shift here have been pre-computed offline
+      // (e.g. by toco).
+      accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+      // Saturate, cast to int16, and store to the temporary activations array.
+      accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+    }
+  }
+
+  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+  // and muls, all done in 16-bit fixed-point.
+  for (int b = 0; b < outer_size; ++b)
+  {
+    for (int c = 0; c < output_depth; ++c)
+    {
+      // Define the fixed-point data types that we will use here. All use
+      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+      // They only differ by the number of integral vs. fractional bits,
+      // determining the range of values that they can represent.
+      //
+      // F0 uses 0 integer bits, range [-1, 1].
+      // This is the return type of math functions such as tanh, logistic,
+      // whose range is in [-1, 1].
+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+      // F3 uses 3 integer bits, range [-8, 8].
+      // This is the range of the previous fully-connected node's output,
+      // which is our input here.
+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+      // 2^StateIntegerBits]. It's used to represent the internal state, whose
+      // number of integer bits is currently dictated by the model. See comment
+      // on the StateIntegerBits template parameter above.
+      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+      // Implementation of input gate, using fixed-point logistic function.
+      F3 input_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+      // Implementation of input modulation gate, using fixed-point tanh
+      // function.
+      F3 input_modulation_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+      F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+      // Implementation of forget gate, using fixed-point logistic function.
+      F3 forget_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+      // Implementation of output gate, using fixed-point logistic function.
+      F3 output_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+      // Implementation of internal multiplication nodes, still in fixed-point.
+      F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+      FS prev_state_times_forget_state = forget_gate_output * prev_state;
+      // Implementation of internal addition node, saturating.
+      FS new_state =
+        gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+                                prev_state_times_forget_state);
+      // Implementation of last internal Tanh node, still in fixed-point.
+      // Since a Tanh fixed-point implementation is specialized for a given
+      // number or integer bits, and each specialization can have a substantial
+      // code size, and we already used above a Tanh on an input with 3 integer
+      // bits, and per the table in the above function comment there is no
+      // significant accuracy to be lost by clamping to [-8, +8] for a
+      // 3-integer-bits representation, let us just do that. This helps people
+      // porting this to targets where code footprint must be minimized.
+      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+      // Store the new internal state back to memory, as 16-bit integers.
+      // Note: here we store the original value with StateIntegerBits, not
+      // the rescaled 3-integer-bits value fed to tanh.
+      output_state_data_int16[b * output_depth + c] = new_state.raw();
+      // Down-scale the output activations to 8-bit integers, saturating,
+      // and store back to memory.
+      int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+      int16 clamped_output_activ =
+        std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+      output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+    }
+  }
+}
+
+template <typename Scalar>
+void Split(const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+           const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+  ruy::profiler::ScopeLabel label("Split");
+  const int split_dimensions = input_shape.DimensionsCount();
+  int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+  int outputs_count = params.num_split;
+  TFLITE_DCHECK_LT(axis, split_dimensions);
+
+  int64_t split_size = 0;
+  for (int i = 0; i < outputs_count; i++)
+  {
+    TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+    for (int j = 0; j < split_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        MatchingDim(*output_shapes[i], j, input_shape, j);
+      }
+    }
+    split_size += output_shapes[i]->Dims(axis);
+  }
+  TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  // For all output arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < split_dimensions; ++i)
+  {
+    base_inner_size *= input_shape.Dims(i);
+  }
+
+  const Scalar *input_ptr = input_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < outputs_count; ++i)
+    {
+      const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+      memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+      input_ptr += copy_size;
+    }
+  }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+  return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                                       const RuntimeShape &input_shape, const float *input_data,
+                                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int c = 0; c < depth; ++c)
+    {
+      const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+      const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+      float accum = 0.f;
+      for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+      {
+        const float input_val = input_data[i * depth + input_c];
+        accum += input_val * input_val;
+      }
+      const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+      output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+    }
+  }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = static_cast<float>(input_data[i]);
+  }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+                      const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("FakeQuant");
+  float rmin = op_params.minmax.min;
+  float rmax = op_params.minmax.max;
+  int num_bits = op_params.num_bits;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  TFLITE_DCHECK_LE(rmin, 0.0f);
+  TFLITE_DCHECK_GE(rmax, 0.0f);
+  TFLITE_DCHECK_LT(rmin, rmax);
+
+  // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+  int quant_min = 0;
+  int quant_max = (1 << num_bits) - 1;
+  float nudged_min, nudged_max, nudged_scale;
+  NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+  int n_slices;
+  int slice_size;
+  int indices_nd;
+  std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape,
+                                           const RuntimeShape &indices_shape)
+{
+  GatherNdHelperResult ret;
+  ret.n_slices = 1;
+  ret.slice_size = 1;
+  const int indices_dims = indices_shape.DimensionsCount();
+  ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+  const int params_dims = params_shape.DimensionsCount();
+  for (int i = 0; i < indices_dims - 1; ++i)
+  {
+    ret.n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = ret.indices_nd; i < params_dims; ++i)
+  {
+    ret.slice_size *= params_shape.Dims(i);
+  }
+
+  int remain_flat_size = params_shape.FlatSize();
+  ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+  for (int i = 0; i < ret.indices_nd; ++i)
+  {
+    ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+    remain_flat_size = ret.dims_to_count[i];
+  }
+
+  return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape &params_shape, const ParamsT *params_data,
+                     const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                     const RuntimeShape &output_shape, ParamsT *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNd");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+                sizeof(ParamsT) * res.slice_size);
+  }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape &params_shape, const TfLiteTensor *params_data,
+                           const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                           const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNdString");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  DynamicBuffer buffer;
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    for (int j = 0; j < res.slice_size; ++j)
+    {
+      buffer.AddString(GetString(params_data, from_pos + j));
+    }
+  }
+  buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                      const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+                      const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+  ruy::profiler::ScopeLabel label("ScatterNd");
+
+  int n_slices = 1;
+  int slice_size = 1;
+  const int outer_dims = indices_shape.DimensionsCount() - 1;
+  const int indices_nd = indices_shape.Dims(outer_dims);
+  const int updates_dims = updates_shape.DimensionsCount();
+  for (int i = 0; i < outer_dims; ++i)
+  {
+    n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = outer_dims; i < updates_dims; ++i)
+  {
+    slice_size *= updates_shape.Dims(i);
+  }
+
+  int output_flat_size = output_shape.FlatSize();
+  int remain_flat_size = output_flat_size;
+  std::vector<int> dims_to_count(indices_nd, 0);
+  for (int i = 0; i < indices_nd; ++i)
+  {
+    dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+    remain_flat_size = dims_to_count[i];
+  }
+
+  memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+  for (int i = 0; i < n_slices; ++i)
+  {
+    int to_pos = 0;
+    for (int j = 0; j < indices_nd; ++j)
+    {
+      IndicesT idx = indices_data[i * indices_nd + j];
+      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+      to_pos += idx * dims_to_count[j];
+    }
+    for (int j = 0; j < slice_size; j++)
+    {
+      output_data[to_pos + j] += updates_data[i * slice_size + j];
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+  TFLITE_DCHECK_LE(op_params.begin_count, 5);
+  TFLITE_DCHECK_LE(op_params.size_count, 5);
+  const int begin_count = op_params.begin_count;
+  const int size_count = op_params.size_count;
+  // We front-pad the begin and size vectors.
+  std::array<int, 5> start;
+  std::array<int, 5> stop;
+  for (int i = 0; i < 5; ++i)
+  {
+    int padded_i = 5 - i;
+    start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+    stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+                ? ext_shape.Dims(i)
+                : start[i] + op_params.size[size_count - padded_i];
+  }
+
+  for (int i0 = start[0]; i0 < stop[0]; ++i0)
+  {
+    for (int i1 = start[1]; i1 < stop[1]; ++i1)
+    {
+      for (int i2 = start[2]; i2 < stop[2]; ++i2)
+      {
+        for (int i3 = start[3]; i3 < stop[3]; ++i3)
+        {
+          for (int i4 = start[4]; i4 < stop[4]; ++i4)
+          {
+            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  SequentialTensorWriter<T> writer(input_data, output_data);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+  SequentialTensorWriter<T> writer(input, output);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto min_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto max_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+            const RuntimeShape &output_shape, T2 *output_data)
+{
+  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+                   const RuntimeShape &input2_shape, const T3 *input2_data,
+                   const RuntimeShape &output_shape, T2 *output_data)
+{
+  // Drop shape of second input: not needed.
+  ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+            const RuntimeShape &input_x_shape, const T *input_x_data,
+            const RuntimeShape &input_y_shape, const T *input_y_data,
+            const RuntimeShape &output_shape, T *output_data)
+{
+  int64_t flatsize;
+  // Allow select operator executions on mixed scalar tensors and one element
+  // tensors.
+  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+  {
+    flatsize = 1;
+  }
+  else
+  {
+    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+  }
+  for (int64_t i = 0; i < flatsize; ++i)
+  {
+    output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                   const RuntimeShape &input_x_shape, const T *input_x_data,
+                   const RuntimeShape &input_y_shape, const T *input_y_data,
+                   const RuntimeShape &output_shape, T *output_data)
+{
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  int64_t inner_size;
+  if (input_condition_shape.DimensionsCount() == 0)
+  {
+    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+  }
+  else
+  {
+    TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+    inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+  }
+
+  int64_t offset = 0;
+  for (int64_t i = 0; i < outer_size; i++)
+  {
+    const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+    offset += inner_size;
+  }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                           const RuntimeShape &input_x_shape, const T *input_x_data,
+                           const RuntimeShape &input_y_shape, const T *input_y_data,
+                           const RuntimeShape &output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+  const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+  NdArrayDesc<4> desc_condition;
+  NdArrayDesc<4> desc_x;
+  NdArrayDesc<4> desc_y;
+  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+                                      &desc_condition, &desc_x, &desc_y);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest
+  // stride, typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for
+  // the best cache behavior.
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+        {
+          const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+          const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+          const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+            input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+        }
+      }
+    }
+  }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                      T *output_data)
+{
+  const size_t size = input_condition_shape.FlatSize();
+  if (size == 0)
+  {
+    // Dimension is zero, in which case we don't need to output.
+    return;
+  }
+  const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+  std::vector<int> dims_to_count(cond_rank, 0);
+  int cur_flat_size = size;
+  for (int i = 0; i < cond_rank; ++i)
+  {
+    dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+    cur_flat_size = dims_to_count[i];
+  }
+
+  int output_index = 0;
+  for (int i = 0; i < size; ++i)
+  {
+    if (input_condition_data[i])
+    {
+      // Insert the coordinate of the current item (row major) into output.
+      int flat_index = i;
+      for (int j = 0; j < cond_rank; ++j)
+      {
+        int coord_j = flat_index / dims_to_count[j];
+        output_data[output_index * cond_rank + j] = coord_j;
+        flat_index %= dims_to_count[j];
+      }
+      output_index++;
+    }
+  }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+                          T default_value, bool value_is_scalar,
+                          const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  const int value_count = indices.size();
+
+  // First fill the output_data with default value.
+  const int num_elements = output_shape.FlatSize();
+  for (int i = 0; i < num_elements; ++i)
+  {
+    output_data[i] = default_value;
+  }
+
+  // Special handle for value is scalar case to avoid checking the boolean
+  // condition within the loop every time.
+  if (value_is_scalar)
+  {
+    for (int i = 0; i < value_count; ++i)
+    {
+      const std::vector<TI> &index = indices[i];
+      TFLITE_DCHECK_EQ(index.size(), 4);
+      const T value = *values; // just use the first value.
+      output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+    }
+    return;
+  }
+
+  // Go through the values and indices to fill the sparse values.
+  for (int i = 0; i < value_count; ++i)
+  {
+    const std::vector<TI> &index = indices[i];
+    TFLITE_DCHECK_EQ(index.size(), 4);
+    const T value = values[i];
+    output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+  }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+                const RuntimeShape &input2_shape, const T *input2_data,
+                const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = std::pow(input1_data[i], input2_data[i]);
+  }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+                               const RuntimeShape &unextended_input2_shape, const T *input2_data,
+                               const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < output_shape.Dims(3); ++c)
+        {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = std::pow(in1_val, in2_val);
+        }
+      }
+    }
+  }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+             const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Reverse");
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_axis = input_shape.Dims(axis);
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int j = 0; j < dims_at_axis; ++j)
+    {
+      const int start_pos = (i * dims_at_axis + j) * copy_size;
+      Scalar *output_ptr = output_data + start_pos;
+      int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+                     const RuntimeShape &input_shape, const Scalar *input_data,
+                     const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("ReverseSequence");
+
+  int outer_size = 1;
+  int outer_dim = std::min(batch_dim, seq_dim);
+  int medium_dim = std::max(batch_dim, seq_dim);
+  for (int i = 0; i < outer_dim; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int medium_size = 1;
+  for (int i = outer_dim + 1; i < medium_dim; ++i)
+  {
+    medium_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+  const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+  Scalar *output_ptr;
+  if (batch_dim > seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            int sl = seq_lengths[q] - 1;
+            if (j > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+  else if (batch_dim < seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        int sl = seq_lengths[j] - 1;
+        const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            if (q > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+                       const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+                       const RuntimeShape &output_shape, T *output_data)
+{
+  const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+  memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+  for (int i = 0; i < input_shape.Dims(0); i++)
+  {
+    int output_index = segment_ids_data[i];
+    for (int j = 0; j < segment_flat_size; ++j)
+    {
+      output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+    }
+  }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-interpreter/pal/mcu/pal.cmake
new file mode 100644
index 000000000..907d51de6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/pal.cmake
@@ -0,0 +1,56 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+    #find_package(Threads REQUIRED)
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    # TODO put it back, I changed my mind.
+    # instead add sources with visitors in this library
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_mcu_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
+    #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
+endmacro()
diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp
new file mode 100644
index 000000000..14bc75efe
--- /dev/null
+++ b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+  int32_t p = lowerLog2(memSize);
+
+  // We assume that the requested size of memory does not exceed 4 GB
+  assert(p < 32);
+  memSize = 1 << p;
+
+  _start_block = reinterpret_cast<Block *>(memory_start);
+  _start_block->size = memSize - sizeof(Block);
+  _start_block->is_free = true;
+  _start_block->self = _start_block;
+  _num_blocks = 0;
+  _size = _start_block->size;
+
+  for (auto &_free_block : _free_blocks)
+    _free_block = nullptr;
+
+  addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  const size_t element_size = getDataTypeSize(tensor.element_type());
+  const int32_t num_elements = tensor.shape().num_elements();
+  auto size = num_elements * element_size;
+  auto footprint = size + sizeof(Block);
+  auto l = (footprint & (footprint - 1)) == 0
+             ? lowerLog2(footprint)
+             : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+  while (l < 32 && !_free_blocks[l])
+    l++;
+
+  if (l >= 32)
+  {
+    throw std::runtime_error{"Memory limit exceeded"};
+  }
+
+  Block *tmp;
+  tmp = _free_blocks[l];
+  removeFromBlocks(tmp, l);
+
+  while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+  {
+    divideBlock(tmp, l);
+    l--;
+  }
+
+  tmp->is_free = false;
+  tmp->self = tmp;
+  _num_blocks++;
+
+  auto *data = (uint8_t *)(tmp + 1);
+  tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  auto data = tensor.data<void>();
+  auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+  assert(tmp->self == tmp);
+
+  tmp->is_free = true;
+  addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+  while (tmp)
+    if (tmp->size == _size)
+      break;
+    else
+      tmp = mergeBlock(tmp);
+
+  _num_blocks--;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp
new file mode 100644
index 000000000..29fb767b7
--- /dev/null
+++ b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+  auto mem_pool = std::make_unique<uint8_t[]>(200);
+  auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+  Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+  buddy_memory_manager->allocate_memory(first_tensor);
+
+  uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+  first_tensor.writeData(data_1, 8);
+  uint8_t array_1[8];
+  first_tensor.readData(array_1, 8);
+  for (int i = 0; i < 8; i++)
+  {
+    EXPECT_EQ(data_1[i], array_1[i]);
+  }
+
+  Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+  buddy_memory_manager->allocate_memory(second_tensor);
+
+  uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+  second_tensor.writeData(data_2, 10);
+
+  uint8_t array_2[2][5];
+  second_tensor.readData(array_2, 10);
+  for (int i = 0; i < 2; i++)
+  {
+    for (int j = 0; j < 5; j++)
+    {
+      EXPECT_EQ(data_2[i][j], array_2[i][j]);
+    }
+  }
+
+  buddy_memory_manager->release_memory(first_tensor);
+  EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+  buddy_memory_manager->release_memory(second_tensor);
+  EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt
index 47b68fa40..997b75a84 100644
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -1,41 +1,61 @@
-nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
-nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
-nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
-nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
 
-if (NOT TensorFlowSource_FOUND)
-  message(STATUS "Skipping luci-interpreter: TensorFlow not found")
-  return()
-endif ()
+initialize_pal()
 
-if (NOT TensorFlowGEMMLowpSource_FOUND)
-  message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+if (NOT PAL_INITIALIZED)
+  message("PAL Failed to initialize, skip luci-interpreter")
   return()
-endif ()
+endif()
 
-if (NOT TensorFlowEigenSource_FOUND)
-  message(STATUS "Skipping luci-interpreter: Eigen not found")
-  return()
-endif ()
+message(STATUS "LUCI INTERPRETER BEGIN")
 
-if (NOT TensorFlowRuySource_FOUND)
-  message(STATUS "Skipping luci-interpreter: Ruy not found")
-  return()
-endif ()
+set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
 
 add_subdirectory(core)
+message(STATUS "LUCI INTERPRETER CORE")
 add_subdirectory(kernels)
+message(STATUS "LUCI INTERPRETER KERNELS")
 add_subdirectory(loader)
+message(STATUS "LUCI INTERPRETER LOADER")
+add_subdirectory(import)
+message(STATUS "LUCI INTERPRETER IMPORT")
+
+message(STATUS "LUCI INTERPTER INITALIZED")
 
 set(SOURCES
     "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
-    Interpreter.cpp)
+    Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+if (NOT LUCI_INTERPRETER_STATIC)
+  add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES})
+else ()
+  add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+endif ()
 
-add_library(luci_interpreter SHARED ${SOURCES})
-target_include_directories(luci_interpreter PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(luci_interpreter PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter
-    PUBLIC luci_lang luci_interpreter_loader luci_interpreter_core
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+    PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE}
     PRIVATE nncc_common)
 
-install(TARGETS luci_interpreter DESTINATION lib)
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY})
diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp
index 639ffc1f0..8cf272efd 100644
--- a/compiler/luci-interpreter/src/Interpreter.cpp
+++ b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "luci_interpreter/Interpreter.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
 
 #include "loader/ModuleLoader.h"
 
@@ -31,7 +32,7 @@ class EventNotifierImpl final : public EventNotifier
 public:
   EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
                     const std::vector<ExecutionObserver *> &observers)
-      : _runtime_to_ir(runtime_to_ir), _observers(observers)
+    : _runtime_to_ir(runtime_to_ir), _observers(observers)
   {
   }
 
@@ -74,7 +75,25 @@ Interpreter::Interpreter(const luci::Module *module)
   _runtime_to_ir = std::make_unique<RuntimeToIR>();
   _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
   _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
-  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor);
+
+  _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+                      _default_memory_manager.get());
+  loader.load();
+}
+
+Interpreter::Interpreter(const luci::Module *module,
+                         luci_interpreter::IMemoryManager *memory_manager)
+{
+  assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
+
+  _runtime_to_ir = std::make_unique<RuntimeToIR>();
+  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+                      memory_manager);
   loader.load();
 }
 
diff --git a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp
new file mode 100644
index 000000000..230e39896
--- /dev/null
+++ b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  tensor.set_data_buffer(data);
+}
+
+void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_data_allocated())
+  {
+    tensor.set_data_buffer(nullptr);
+    return;
+  }
+  auto data = tensor.data<uint8_t>();
+  delete[] data;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-interpreter/src/StaticMemoryManager.cpp
new file mode 100644
index 000000000..73a819919
--- /dev/null
+++ b/compiler/luci-interpreter/src/StaticMemoryManager.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  int32_t offset = tensor.get_offset();
+  assert(offset >= 0);
+  auto tensor_ptr = _buffer_ptr + offset;
+  tensor.set_data_buffer(tensor_ptr);
+}
+
+void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-interpreter/src/TestMemoryManager.cpp
new file mode 100644
index 000000000..3beeee55c
--- /dev/null
+++ b/compiler/luci-interpreter/src/TestMemoryManager.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  allocations.push_back(data);
+  tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt
index e576dbd94..c2471e01c 100644
--- a/compiler/luci-interpreter/src/core/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/core/CMakeLists.txt
@@ -9,9 +9,11 @@ set(SOURCES
     RuntimeModule.h
     Tensor.cpp)
 
-add_library(luci_interpreter_core STATIC ${SOURCES})
-set_target_properties(luci_interpreter_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter_core PUBLIC luci_lang)
-target_link_libraries(luci_interpreter_core PRIVATE nncc_common)
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+    set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common)
diff --git a/compiler/luci-interpreter/src/core/Kernel.h b/compiler/luci-interpreter/src/core/Kernel.h
index 5f5efb219..a7c4a4218 100644
--- a/compiler/luci-interpreter/src/core/Kernel.h
+++ b/compiler/luci-interpreter/src/core/Kernel.h
@@ -29,15 +29,15 @@ class Kernel
 {
 protected:
   Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
-      : _inputs(std::move(inputs)), _outputs(std::move(outputs))
+    : _inputs(std::move(inputs)), _outputs(std::move(outputs))
   {
   }
 
 public:
   virtual ~Kernel() = default;
 
-  std::vector<const Tensor *> getInputTensors() const { return _inputs; }
-  std::vector<Tensor *> getOutputTensors() const { return _outputs; }
+  const std::vector<const Tensor *> &getInputTensors() const { return _inputs; }
+  const std::vector<Tensor *> &getOutputTensors() const { return _outputs; }
 
   // Configures the kernel.
   // This function is currently called once for each kernel during interpreter construction,
@@ -59,7 +59,7 @@ template <typename Params> class KernelWithParams : public Kernel
 protected:
   KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
                    const Params &params)
-      : Kernel(std::move(inputs), std::move(outputs)), _params(params)
+    : Kernel(std::move(inputs), std::move(outputs)), _params(params)
   {
   }
 
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h
index 65d119761..4ddbcefb8 100644
--- a/compiler/luci-interpreter/src/core/KernelParams.h
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -19,6 +19,7 @@
 
 #include <luci/IR/AttrPadding.h>
 #include <luci/IR/AttrFusedActFunc.h>
+#include <luci/IR/AttrMirrorPadMode.h>
 #include <luci_interpreter/core/DataType.h>
 
 #include <cstdint>
@@ -30,6 +31,7 @@ namespace luci_interpreter
 // Inject commonly used types into `luci_interpreter` namespace for convenience.
 using Activation = luci::FusedActFunc;
 using Padding = luci::Padding;
+using MirrorPadMode = luci::MirrorPadMode;
 
 struct AddParams
 {
@@ -41,9 +43,16 @@ struct ArgMaxParams
   DataType output_type;
 };
 
+struct BatchMatMulParams
+{
+  bool adj_x;
+  bool adj_y;
+};
+
 struct ConcatenationParams
 {
   int axis;
+  Activation activation;
 };
 
 struct Conv2DParams
@@ -72,9 +81,32 @@ struct DepthwiseConv2DParams
   Activation activation;
 };
 
+struct DivParams
+{
+  Activation activation;
+};
+
 struct FullyConnectedParams
 {
   Activation activation;
+  bool keep_num_dims = false;
+};
+
+struct GatherParams
+{
+  int32_t axis;
+  int32_t batch_dims;
+};
+
+struct GeluParams
+{
+  bool approximate;
+};
+
+struct InstanceNormParams
+{
+  float epsilon;
+  Activation activation;
 };
 
 struct L2NormParams
@@ -95,11 +127,27 @@ struct LocalResponseNormalizationParams
   float beta;
 };
 
+struct MirrorPadParams
+{
+  MirrorPadMode mode;
+};
+
 struct MulParams
 {
   Activation activation;
 };
 
+struct OneHotParams
+{
+  int32_t axis;
+};
+
+struct PackParams
+{
+  int32_t values_count;
+  int32_t axis;
+};
+
 struct Pool2DParams
 {
   Padding padding;
@@ -115,6 +163,35 @@ struct ReducerParams
   bool keep_dims;
 };
 
+struct ResizeBilinearParams
+{
+  bool align_corners;
+  bool half_pixel_centers;
+};
+
+struct ResizeNearestNeighborParams
+{
+  bool align_corners;
+  bool half_pixel_centers;
+};
+
+struct ShapeParams
+{
+  loco::DataType out_type;
+};
+
+struct SubParams
+{
+  Activation activation;
+};
+
+struct SVDFParams
+{
+  bool asymmetric_quantize_inputs;
+  int32_t svdf_rank;
+  Activation activation;
+};
+
 struct SpaceToDepthParams
 {
   int block_size;
@@ -144,6 +221,16 @@ struct TransposeConvParams
   Padding padding;
   int32_t stride_height;
   int32_t stride_width;
+  Activation activation;
+};
+
+struct UnidirectionalSequenceLSTMParams
+{
+  Activation activation;
+  float cell_clip;
+  float proj_clip;
+  bool time_major;
+  bool asymmetric_quantize_inputs;
 };
 
 struct UnpackParams
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
index 06f0fed15..c2f8d2ea8 100644
--- a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -19,10 +19,102 @@
 #include "core/RuntimeModule.h"
 
 #include <algorithm>
+#include <unordered_map>
 
 namespace luci_interpreter
 {
 
+class RuntimeGraph::TensorAllocPlan
+{
+  std::vector<std::vector<Tensor *>> _alloc_plan;
+  std::vector<std::vector<Tensor *>> _dealloc_plan;
+  bool _valid = false;
+  IMemoryManager *_memory_manager;
+
+public:
+  explicit TensorAllocPlan(IMemoryManager *memory_manager);
+  void invalidate() { _valid = false; }
+  bool isValid() const { return _valid; }
+  void build(const RuntimeGraph &graph);
+  void allocate(size_t kernel_index) const;
+  void deallocate(size_t kernel_index) const;
+};
+
+RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
+  : _memory_manager(memory_manager)
+{
+}
+
+void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
+{
+  invalidate();
+  using Lifetime = std::pair<size_t, size_t>;
+  std::unordered_map<Tensor *, Lifetime> lifetimes;
+  const size_t num_kernels = graph._kernels.size();
+  for (size_t index = 0; index < num_kernels; ++index)
+  {
+    const auto &kernel = graph._kernels[index];
+    for (const Tensor *tensor : kernel->getInputTensors())
+    {
+      auto nc_tensor = const_cast<Tensor *>(tensor);
+      if (lifetimes.count(nc_tensor) > 0)
+        lifetimes.at(nc_tensor).second = index;
+    }
+    for (Tensor *tensor : kernel->getOutputTensors())
+    {
+      assert(lifetimes.count(tensor) == 0);
+      lifetimes[tensor] = Lifetime(index, index);
+    }
+  }
+  for (const Tensor *tensor : graph.getOutputTensors())
+  {
+    auto nc_tensor = const_cast<Tensor *>(tensor);
+    if (lifetimes.count(nc_tensor) > 0)
+      lifetimes.at(nc_tensor).second = num_kernels;
+  }
+  _alloc_plan.assign(num_kernels, std::vector<Tensor *>());
+  _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>());
+  for (const auto &item : lifetimes)
+  {
+    _alloc_plan[item.second.first].push_back(item.first);
+    _dealloc_plan[item.second.second].push_back(item.first);
+  }
+  _valid = true;
+}
+
+void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
+{
+  assert(_valid && kernel_index < _alloc_plan.size());
+  for (Tensor *tensor : _alloc_plan[kernel_index])
+  {
+    _memory_manager->allocate_memory(*tensor);
+  }
+}
+
+void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
+{
+  assert(_valid && kernel_index < _dealloc_plan.size());
+  for (Tensor *tensor : _dealloc_plan[kernel_index])
+  {
+    _memory_manager->release_memory(*tensor);
+  }
+}
+
+RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
+  : _owning_module(owning_module), _memory_manager(memory_manager),
+    _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
+{
+}
+
+RuntimeGraph::~RuntimeGraph()
+{
+  for (auto &tensor : _tensors)
+  {
+    if (tensor->is_data_allocated())
+      _memory_manager->release_memory(*tensor);
+  }
+}
+
 Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
 {
   assert(tensor != nullptr);
@@ -44,14 +136,23 @@ void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
   _output_tensors = output_tensors;
 }
 
+void RuntimeGraph::configureAllocations(Tensor *tensor)
+{
+  _memory_manager->allocate_memory(*tensor);
+}
+
 void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
 {
   assert(kernel != nullptr);
   _kernels.push_back(std::move(kernel));
+  _tensor_alloc_plan->invalidate();
 }
 
 void RuntimeGraph::execute() const
 {
+  if (!_tensor_alloc_plan->isValid())
+    _tensor_alloc_plan->build(*this);
+
   EventNotifier *event_notifier = _owning_module->getEventNotifier();
 
   // Notify the observers that the input tensors have changed.
@@ -59,12 +160,14 @@ void RuntimeGraph::execute() const
   {
     for (const Tensor *input_tensor : getInputTensors())
     {
-      event_notifier->postTensorWrite(input_tensor);
+      if (input_tensor->is_observable())
+        event_notifier->postTensorWrite(input_tensor);
     }
   }
 
-  for (const auto &kernel : _kernels)
+  for (size_t index = 0; index < _kernels.size(); ++index)
   {
+    const auto &kernel = _kernels[index];
     if (event_notifier != nullptr)
     {
       event_notifier->preOperatorExecute(kernel.get());
@@ -73,6 +176,10 @@ void RuntimeGraph::execute() const
     // TODO The `configure` method should only be called if the outputs of an operator need to be
     //  resized.
     kernel->configure();
+
+    // Preallocate outputs in advance instead of relying on automatic allocation
+    _tensor_alloc_plan->allocate(index);
+
     kernel->execute();
 
     if (event_notifier != nullptr)
@@ -82,11 +189,12 @@ void RuntimeGraph::execute() const
 
     for (const Tensor *tensor : kernel->getOutputTensors())
     {
-      if (event_notifier != nullptr)
+      if (event_notifier != nullptr && tensor->is_observable())
       {
         event_notifier->postTensorWrite(tensor);
       }
     }
+    _tensor_alloc_plan->deallocate(index);
   }
 }
 
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-interpreter/src/core/RuntimeGraph.h
index 6ddbea4e9..8184e249d 100644
--- a/compiler/luci-interpreter/src/core/RuntimeGraph.h
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.h
@@ -18,6 +18,7 @@
 #define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
 
 #include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
 #include "core/Kernel.h"
 
 #include <memory>
@@ -30,14 +31,21 @@ class RuntimeModule;
 
 class RuntimeGraph
 {
+private:
+  class TensorAllocPlan;
+  friend class TensorAllocPlan;
+
 public:
-  explicit RuntimeGraph(RuntimeModule *owning_module) : _owning_module(owning_module) {}
+  explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager);
+  ~RuntimeGraph();
 
   Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
 
   void setInputTensors(const std::vector<Tensor *> &input_tensors);
   void setOutputTensors(const std::vector<Tensor *> &output_tensors);
 
+  void configureAllocations(Tensor *tensor);
+
   const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
   const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
 
@@ -46,6 +54,7 @@ public:
   void execute() const;
 
 private:
+  IMemoryManager *_memory_manager;
   RuntimeModule *_owning_module;
   std::vector<std::unique_ptr<Tensor>> _tensors;
   std::vector<Tensor *> _input_tensors;
@@ -53,6 +62,8 @@ private:
 
   // Kernels in execution order.
   std::vector<std::unique_ptr<Kernel>> _kernels;
+  // Tensors that are not used anymore after given op
+  std::unique_ptr<TensorAllocPlan> _tensor_alloc_plan;
 };
 
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-interpreter/src/core/RuntimeModule.h
index dccc3a173..78873b0ec 100644
--- a/compiler/luci-interpreter/src/core/RuntimeModule.h
+++ b/compiler/luci-interpreter/src/core/RuntimeModule.h
@@ -19,6 +19,7 @@
 
 #include "core/RuntimeGraph.h"
 #include "core/EventNotifier.h"
+#include "luci_interpreter/MemoryManager.h"
 
 #include <memory>
 #include <vector>
@@ -33,9 +34,9 @@ public:
 
   EventNotifier *getEventNotifier() const { return _event_notifier; }
 
-  RuntimeGraph *addGraph()
+  RuntimeGraph *addGraph(IMemoryManager *memory_manager)
   {
-    _graphs.push_back(std::make_unique<RuntimeGraph>(this));
+    _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager));
     return _graphs.back().get();
   }
 
diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp
index 4fe7479e5..3c3c5ffff 100644
--- a/compiler/luci-interpreter/src/core/Tensor.cpp
+++ b/compiler/luci-interpreter/src/core/Tensor.cpp
@@ -24,12 +24,9 @@ namespace luci_interpreter
 
 Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
                std::string name)
-    : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
-      _name(std::move(name))
+  : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
+    _name(std::move(name)), _data_allocated(false)
 {
-  const size_t element_size = getDataTypeSize(_element_type);
-  const int32_t num_elements = _shape.num_elements();
-  _data = std::make_unique<uint8_t[]>(num_elements * element_size);
 }
 
 void Tensor::readData(void *data_ptr, size_t data_size) const
@@ -56,13 +53,6 @@ void Tensor::writeData(const void *data_ptr, size_t data_size)
   std::memcpy(data<void>(), data_ptr, data_size);
 }
 
-void Tensor::resize(const Shape &new_shape)
-{
-  _shape = new_shape;
-  const size_t element_size = getDataTypeSize(_element_type);
-  const int32_t num_elements = _shape.num_elements();
-  // NOTE: _data can be nullptr for empty tensors
-  _data = std::make_unique<uint8_t[]>(num_elements * element_size);
-}
+void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
 
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-interpreter/src/import/CMakeLists.txt
new file mode 100644
index 000000000..dd9733f92
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
+    GraphBuilderRegistry.cpp)
+
+# include specific builders
+file(GLOB_RECURSE NODES "Nodes/*")
+list(APPEND SOURCES ${NODES})
+
+add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
diff --git a/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..a33bca6a4
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci_interpreter/GraphBuilderRegistry.h"
+#include "Nodes/CircleReferencingConst.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
+{
+  auto builder = std::make_unique<luci::GraphBuilderRegistry>();
+  {
+    // redefine NodeBuilder of BUFFER type
+    builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
+  }
+
+  return builder;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
new file mode 100644
index 000000000..14e90f240
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleReferencingConst.h"
+
+#include <vector>
+
+namespace
+{
+
+// helper struct which describes data loaded to custom_options of CircleReferencingConst node
+struct ConstDataReference
+{
+  const uint8_t *data = nullptr;
+  uint32_t size = 0;
+};
+
+} // namespace
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
+                                                     GraphBuilderContext *context) const
+{
+  assert(tensor_index >= 0);
+
+  const auto graph = context->graph();
+  const auto reader = context->reader();
+  const auto tensors = reader->tensors();
+  auto const const_tensor = tensors[tensor_index];
+  assert(const_tensor != nullptr);
+  if (const_tensor->is_variable())
+  {
+    // Create CircleVariable for variable
+    return nullptr;
+  }
+
+  auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+  auto const const_dims = wrap(const_tensor->shape()); // in NHWC
+  if (const_dims.empty() && buffer.empty())
+  {
+    // unknown shape tensor and scalar tensor
+    return nullptr;
+  }
+
+  // if tensor_index is used as output to some other operator, this is not a constant
+  auto tensoroutputs = context->tensoroutputs();
+  if (tensoroutputs->find(tensor_index))
+  {
+    // other operator output tensor
+    return nullptr;
+  }
+
+  uint32_t num_elements = 1;
+  for (uint32_t r = 0; r < const_dims.size(); ++r)
+  {
+    num_elements = num_elements * const_dims[r];
+  }
+
+  if (buffer.empty() && num_elements > 0)
+  {
+    // normal empty tensor
+    return nullptr;
+  }
+
+  // create CircleReferencingConst
+  auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
+  {
+    custom_node->custom_code("CircleReferencingConst");
+
+    copy_tensor_attributes(const_tensor, custom_node);
+    custom_node->shape_status(luci::ShapeStatus::VALID);
+
+    // custom options stores size of buffer and pointer's value to buffer's data
+    {
+      std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
+      {
+        auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
+        const_data_ref = {buffer.data(), buffer.size()};
+      }
+      custom_node->custom_options(custom_options);
+    }
+  }
+
+  // Output of CircleCustom node presented with CircleConstNode
+  auto out_node = graph->nodes()->create<CircleCustomOut>();
+  {
+    out_node->index(0);
+    out_node->input(custom_node);
+
+    copy_tensor_attributes(const_tensor, out_node);
+    out_node->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  return out_node;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
new file mode 100644
index 000000000..ed8f95124
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+
+#include <luci/Import/NodeBuilder.h>
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+/**
+ * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
+ */
+class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+  CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
diff --git a/compiler/luci-interpreter/src/kernels/Abs.cpp b/compiler/luci-interpreter/src/kernels/Abs.cpp
new file mode 100644
index 000000000..5c6331501
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Abs.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Abs.h"
+
+#include "kernels/Utils.h"
+
+#include <cmath> // abs for float
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Abs::Abs(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Abs::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void Abs::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Abs::eval() const
+{
+  const auto *input_data = input()->data<T>();
+  auto *output_data = output()->data<T>();
+
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = std::abs(input_data[i]);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Abs.h b/compiler/luci-interpreter/src/kernels/Abs.h
new file mode 100644
index 000000000..b5b874a99
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Abs.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ABS_H
+#define LUCI_INTERPRETER_KERNELS_ABS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Abs : public Kernel
+{
+public:
+  Abs(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ABS_H
diff --git a/compiler/luci-interpreter/src/kernels/Abs.test.cpp b/compiler/luci-interpreter/src/kernels/Abs.test.cpp
new file mode 100644
index 000000000..2c42ab75c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Abs.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Abs.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Abs kernel(&input_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(AbsTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, -1.0f, 3.0f,  // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, 1.0f, 3.0f, // Row 1
+                 1.0f, 1.0f, 2.0f, // Row 2
+               });
+
+  SUCCEED();
+}
+
+TEST(AbsTest, Type_Mismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<loco::DataType::S32>({3}, {1, -3, 2}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Abs kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp
index 9ed155e94..d7bf3084f 100644
--- a/compiler/luci-interpreter/src/kernels/Add.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/Add.h"
 
+#include "kernels/BinaryOpCommon.h"
 #include "kernels/Utils.h"
 
 #include <tensorflow/lite/kernels/internal/reference/add.h>
@@ -30,16 +31,22 @@ namespace kernels
 {
 
 Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
-    : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+  : KernelWithParams<AddParams>({input1, input2}, {output}, params)
 {
 }
 
 void Add::configure()
 {
-  if (input1()->element_type() != input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  if (input1()->element_type() == DataType::S16)
   {
-    throw std::runtime_error("Input Tensor Data Type Mismatch.");
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1);
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
   }
+
   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
 }
 
@@ -50,9 +57,18 @@ void Add::execute() const
     case DataType::FLOAT32:
       evalFloat();
       break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
     case DataType::U8:
       evalQuantized();
       break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -60,22 +76,17 @@ void Add::execute() const
 
 void Add::evalFloat() const
 {
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
   tflite::ArithmeticParams params{};
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
+  fillArithmeticActivationRange<float>(params, _params.activation);
 
   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
 
   if (need_broadcast)
   {
     tflite::reference_ops::BroadcastAdd4DSlow(
-        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
   }
   else
   {
@@ -85,6 +96,28 @@ void Add::evalFloat() const
   }
 }
 
+template <typename T> void Add::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
 void Add::evalQuantized() const
 {
   const auto input1_scale = static_cast<double>(input1()->scale());
@@ -123,14 +156,13 @@ void Add::evalQuantized() const
   params.quantized_activation_max = activation_max;
 
   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
 
   if (need_broadcast)
   {
     tflite::reference_ops::BroadcastAdd4DSlow(
-        params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
-        getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
-        getTensorData<uint8_t>(output()));
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
   }
   else
   {
@@ -140,5 +172,49 @@ void Add::evalQuantized() const
   }
 }
 
+void Add::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  constexpr int left_shift = 12;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [input1_multiplier, input1_shift, //
+             input2_multiplier, input2_shift, //
+             output_multiplier, output_shift, //
+             activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
+    const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
+    const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
+    const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, input1_multiplier, input1_shift);
+    const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input2_val, input2_multiplier, input2_shift);
+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      raw_sum, output_multiplier, output_shift);
+    const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
+    return static_cast<int16_t>(clamped_output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Add.h b/compiler/luci-interpreter/src/kernels/Add.h
index a1f7e0406..91d95b6af 100644
--- a/compiler/luci-interpreter/src/kernels/Add.h
+++ b/compiler/luci-interpreter/src/kernels/Add.h
@@ -39,7 +39,9 @@ public:
 
 private:
   void evalFloat() const;
+  template <typename T> void evalInteger() const;
   void evalQuantized() const;
+  void evalQuantizedS16() const;
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp
index 705b648c8..b8b1c3089 100644
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/Add.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -27,6 +28,14 @@ namespace
 
 using namespace testing;
 
+class AddTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
 // for quantized Add, the error shouldn't exceed step
 float GetTolerance(float min, float max)
 {
@@ -34,108 +43,94 @@ float GetTolerance(float min, float max)
   return kQuantizedStep;
 }
 
-TEST(AddTest, Uint8)
+TEST_F(AddTest, Uint8)
 {
   std::initializer_list<int32_t> base_shape = {2, 3, 1, 2};
   std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                                             1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
   std::initializer_list<int32_t> test_shapes[] = {
-      {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+    {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
   std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
   std::initializer_list<int32_t> output_shapes[] = {
-      {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
   std::vector<std::vector<float>> output_data = {
-      {-0.1f, 2.6f,  -0.7f, 2.8f,  0.7f,  3.0f,  1.1f, 0.8f,  0.5f, 1.0f,  1.9f, 1.4f,
-       1.0f,  -0.8f, 0.4f,  -0.6f, 1.8f,  -0.2f, 1.4f, 3.0f,  0.8f, 3.0f,  2.2f, 3.0f,
-       -1.4f, 0.3f,  -2.0f, 0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
-      {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
-      {-0.1f, 2.5f,  0.0f,  2.6f,  -0.7f, 1.9f,  1.1f, 0.7f,  1.2f, 0.8f,  0.5f, 0.1f,
-       1.0f,  -0.9f, 1.1f,  -0.8f, 0.4f,  -1.5f, 1.7f, 3.0f,  2.2f, 3.0f,  2.1f, 3.0f,
-       -1.1f, 0.5f,  -0.6f, 1.0f,  -0.7f, 0.9f,  1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
-      {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
+    {-0.1f, 2.6f,  -0.7f, 2.8f,  0.7f,  3.0f,  1.1f, 0.8f,  0.5f, 1.0f,  1.9f, 1.4f,
+     1.0f,  -0.8f, 0.4f,  -0.6f, 1.8f,  -0.2f, 1.4f, 3.0f,  0.8f, 3.0f,  2.2f, 3.0f,
+     -1.4f, 0.3f,  -2.0f, 0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+    {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
+    {-0.1f, 2.5f,  0.0f,  2.6f,  -0.7f, 1.9f,  1.1f, 0.7f,  1.2f, 0.8f,  0.5f, 0.1f,
+     1.0f,  -0.9f, 1.1f,  -0.8f, 0.4f,  -1.5f, 1.7f, 3.0f,  2.2f, 3.0f,  2.1f, 3.0f,
+     -1.1f, 0.5f,  -0.6f, 1.0f,  -0.7f, 0.9f,  1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+    {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
   float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
   for (int i = 0; i < output_data.size(); i++)
   {
-    Tensor input1_tensor{
-        getElementType<uint8_t>(), base_shape, {{quant_param.first}, {quant_param.second}}, ""};
-    Tensor input2_tensor{
-        getElementType<uint8_t>(), test_shapes[i], {{quant_param.first}, {quant_param.second}}, ""};
-    std::vector<uint8_t> quantized_input1_value =
-        quantize<uint8_t>(base_data, quant_param.first, quant_param.second);
-    std::vector<uint8_t> quantized_input2_value =
-        quantize<uint8_t>(test_data, quant_param.first, quant_param.second);
-    input1_tensor.writeData(quantized_input1_value.data(),
-                            quantized_input1_value.size() * sizeof(uint8_t));
-    input2_tensor.writeData(quantized_input2_value.data(),
-                            quantized_input2_value.size() * sizeof(uint8_t));
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
     Tensor output_tensor =
-        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
 
     AddParams params{};
     params.activation = Activation::NONE;
 
     Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
-    EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
-                                    output_tensor.scale(), output_tensor.zero_point()),
-                ElementsAreArray(ArrayFloatNear(output_data[i], kQuantizedTolerance)));
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
     EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
   }
   // Re-run with exchanged inputs.
   for (int i = 0; i < output_data.size(); i++)
   {
-    Tensor input1_tensor{
-        getElementType<uint8_t>(), test_shapes[i], {{quant_param.first}, {quant_param.second}}, ""};
-    Tensor input2_tensor{
-        getElementType<uint8_t>(), base_shape, {{quant_param.first}, {quant_param.second}}, ""};
-    std::vector<uint8_t> quantized_input1_value =
-        quantize<uint8_t>(test_data, quant_param.first, quant_param.second);
-    std::vector<uint8_t> quantized_input2_value =
-        quantize<uint8_t>(base_data, quant_param.first, quant_param.second);
-    input1_tensor.writeData(quantized_input1_value.data(),
-                            quantized_input1_value.size() * sizeof(uint8_t));
-    input2_tensor.writeData(quantized_input2_value.data(),
-                            quantized_input2_value.size() * sizeof(uint8_t));
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
     Tensor output_tensor =
-        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
 
     AddParams params{};
     params.activation = Activation::NONE;
 
     Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
-    EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
-                                    output_tensor.scale(), output_tensor.zero_point()),
-                ElementsAreArray(ArrayFloatNear(output_data[i], kQuantizedTolerance)));
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
     EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
   }
 }
 
-TEST(AddTest, Float)
+TEST_F(AddTest, Float)
 {
   Shape base_shape = {2, 3, 1, 2};
   std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
   std::vector<std::vector<float>> test_outputs = {
-      {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
-       1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
-       0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
-      {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
-      {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
-       1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
-       0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
-      {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+    {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+     1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+     0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+    {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+    {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+     1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+     0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+    {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
   std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                                  1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
   std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
   for (size_t i = 0; i < test_shapes.size(); ++i)
   {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
     Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
     AddParams params{};
@@ -143,17 +138,19 @@ TEST(AddTest, Float)
 
     Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
-        << "With shape number " << i;
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
   }
   // Re-run with exchanged inputs.
   for (size_t i = 0; i < test_shapes.size(); ++i)
   {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
     Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
     AddParams params{};
@@ -161,18 +158,150 @@ TEST(AddTest, Float)
 
     Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
-        << "With shape number " << i;
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
   }
 }
 
-TEST(AddTest, Input_Output_Type_NEG)
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
 {
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<dtype>> test_outputs = {
+    {3, 3, 0, 1, 0, 8, 5,  1, 0, 0, 2, 6, 8,  0, 1, 0, 5, 1,
+     5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7},
+    {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7},
+    {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0,  0, 8, 0, 5, 0, 1,  0,
+     0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7},
+    {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}};
+  std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+  std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+};
+
+TEST_F(AddTest, SInt32)
+{
+  CheckInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(AddTest, SInt64)
+{
+  CheckInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(AddTest, SInt16)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<int32_t>> ref_output_shapes{
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<std::vector<float>> ref_outputs = {
+    {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+     1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+     0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+    {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+    {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+     1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+     0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+    {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+    const float tolerance = output_tensor.scale();
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs and different scales.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
+    const float tolerance = output_tensor.scale();
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(AddTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   AddParams params{};
@@ -182,20 +311,47 @@ TEST(AddTest, Input_Output_Type_NEG)
   EXPECT_ANY_THROW(kernel.configure());
 }
 
-TEST(AddTest, Invalid_Input_Type_NEG)
+TEST_F(AddTest, Invalid_Output_Type_NEG)
 {
-  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
-  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
-  Tensor output_tensor = makeOutputTensor(DataType::S64);
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
 
   AddParams params{};
   params.activation = Activation::RELU;
 
   Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   EXPECT_ANY_THROW(kernel.execute());
 }
 
+TEST_F(AddTest, Invalid_Quantization_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.cpp
index 5c464ed09..6561a1783 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.cpp
@@ -16,7 +16,7 @@
 
 #include "kernels/ArgMax.h"
 #include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALArgMax.h"
 
 namespace luci_interpreter
 {
@@ -24,7 +24,7 @@ namespace kernels
 {
 
 ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params)
-    : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+  : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
 {
 }
 
@@ -60,11 +60,10 @@ void ArgMax::configure()
 void ArgMax::execute() const
 {
 
-#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                     \
-  tflite::optimized_ops::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
-                                   getTensorData<axis_type>(axis()), getTensorShape(output()), \
-                                   getTensorData<output_type>(output()),                       \
-                                   std::greater<data_type>())
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                    \
+  luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+                                  getTensorData<axis_type>(axis()), getTensorShape(output()), \
+                                  getTensorData<output_type>(output()), std::greater<data_type>())
   if (axis()->element_type() == DataType::S32)
   {
     switch (_params.output_type)
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
index 2ab7ff0da..474f4b321 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/ArgMax.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -32,18 +33,19 @@ void Check(std::initializer_list<int32_t> input_shape,
            std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data,
            std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data)
 {
-
-  Tensor input_tensor{getElementType<T1>(), input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T1));
-  Tensor dimension_tensor{DataType::S32, dimension_shape, {}, ""};
-  dimension_tensor.writeData(dimension_data.begin(), dimension_data.size() * sizeof(int32_t));
-
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T1>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor dimension_tensor =
+    makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(getElementType<T2>());
 
   ArgMaxParams params{};
   params.output_type = getElementType<T2>();
   ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -55,7 +57,7 @@ template <typename T> class ArgMaxTest : public ::testing::Test
 };
 
 using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ArgMaxTest, DataTypes);
+TYPED_TEST_SUITE(ArgMaxTest, DataTypes);
 
 TYPED_TEST(ArgMaxTest, Simple)
 {
@@ -63,14 +65,14 @@ TYPED_TEST(ArgMaxTest, Simple)
                             /*output_shape=*/{1, 1, 1},
                             /*input_data=*/
                             {
-                                1, 9, 7, 3,
+                              1, 9, 7, 3, //
                             },
                             /*dimension_data=*/{3}, /*output_data=*/{1});
   Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
                             /*output_shape=*/{1, 1, 1},
                             /*input_data=*/
                             {
-                                1, 9, 7, 3,
+                              1, 9, 7, 3, //
                             },
                             /*dimension_data=*/{3}, /*output_data=*/{1});
 }
@@ -81,30 +83,37 @@ TYPED_TEST(ArgMaxTest, MultiDimensions)
                             /*output_shape=*/{1, 1, 2},
                             /*input_data=*/
                             {
-                                1, 2, 7, 8, 1, 9, 7, 3,
+                              1, 2, 7, 8, //
+                              1, 9, 7, 3, //
                             },
                             /*dimension_data=*/{3}, /*output_data=*/{3, 1});
   Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
                             /*output_shape=*/{1, 1, 2},
                             /*input_data=*/
                             {
-                                1, 2, 7, 8, 1, 9, 7, 3,
+                              1, 2, 7, 8, //
+                              1, 9, 7, 3, //
                             },
                             /*dimension_data=*/{3}, /*output_data=*/{3, 1});
 }
 
 TEST(ArgMaxTest, UnsupportedType_NEG)
 {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
-                                                                             1, 2, 7, 8, 1, 9, 7, 3,
-                                                                         });
-  Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
+  Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::U8);
 
   ArgMaxParams params{};
   params.output_type = DataType::U8;
   ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   EXPECT_ANY_THROW(kernel.execute());
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
index cdd81d7d6..d3bade9e4 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -18,7 +18,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include "PALAveragePool2d.h"
 
 #include <stdexcept>
 
@@ -28,8 +28,9 @@ namespace luci_interpreter
 namespace kernels
 {
 
-AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                             const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
 {
 }
 
@@ -50,24 +51,35 @@ void AveragePool2D::configure()
   const int32_t input_width = input_shape.dim(2);
   const int32_t depth = input_shape.dim(3);
 
-  const int32_t output_height = computeOutputSize(_params.padding, input_height,
-                                                  _params.filter_height, _params.stride_height);
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
   const int32_t output_width =
-      computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
 
   _padding_height =
-      computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
   _padding_width =
-      computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
   if (input()->element_type() == DataType::U8)
   {
-    if (input()->scale() != output()->scale() || input()->zero_point() != output()->zero_point())
-    {
-      throw std::runtime_error(
-          "Quantization param for Input and output must be same(scale or zero-point)");
-    }
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+  else if (input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
   }
   output()->resize({batches, output_height, output_width, depth});
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(output()));
 }
 
 void AveragePool2D::execute() const
@@ -80,6 +92,12 @@ void AveragePool2D::execute() const
     case DataType::U8:
       evalQuantized();
       break;
+    case DataType::S16:
+      evalSInt16();
+      break;
+    case DataType::S8:
+      evalSInt8();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -126,5 +144,51 @@ void AveragePool2D::evalQuantized() const
                                      getTensorData<uint8_t>(output()));
 }
 
+void AveragePool2D::evalSInt8() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::AveragePool<int8_t>(
+    params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void AveragePool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::AveragePool(
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
index 91f212b3a..2c8fe16e7 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
@@ -28,7 +28,8 @@ namespace kernels
 class AveragePool2D : public KernelWithParams<Pool2DParams>
 {
 public:
-  AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+  AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                const Pool2DParams &params);
 
   const Tensor *input() const { return _inputs[0]; }
   Tensor *output() const { return _outputs[0]; }
@@ -39,6 +40,8 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalSInt16() const;
+  void evalSInt8() const;
 
 private:
   int32_t _padding_height{};
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
index cc80e5e90..478bfa68e 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/AveragePool2D.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,16 +27,26 @@ namespace
 
 using namespace testing;
 
-TEST(AveragePool2DTest, Float)
+class AveragePool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(AveragePool2DTest, Float)
 {
   Shape input_shape{1, 3, 5, 1};
   std::vector<float> input_data{
-      -4, -3, -2, -1, 0,  //
-      1,  2,  3,  4,  5,  //
-      6,  7,  8,  9,  10, //
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
 
   Pool2DParams params{};
   params.padding = Padding::VALID;
@@ -45,32 +56,31 @@ TEST(AveragePool2DTest, Float)
   params.stride_width = 2;
   params.activation = Activation::RELU6;
 
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
   kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      0, 1.5, //
-      4.5, 6, //
+    0, 1.5, //
+    4.5, 6, //
   };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
 }
 
-TEST(AveragePool2DTest, Uint8_0)
+TEST_F(AveragePool2DTest, Uint8_0)
 {
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
-  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-
-  std::vector<uint8_t> quant_input = quantize<uint8_t>(
-      {
-          0, -6, 12, 4,  //
-          -3, -2, 10, 7, //
-      },
-      quant_param.first, quant_param.second);
-  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
 
   Pool2DParams params{};
   params.padding = Padding::VALID;
@@ -80,29 +90,28 @@ TEST(AveragePool2DTest, Uint8_0)
   params.stride_width = 2;
   params.activation = Activation::RELU6;
 
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
   kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear({0.0, 6.0})));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0}));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
 }
 
-TEST(AveragePool2DTest, Uint8_1)
+TEST_F(AveragePool2DTest, Uint8_1)
 {
+  std::vector<float> input_data{
+    0, 6, 12, 4, //
+    3, 2, 10, 7, //
+  };
+
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
-  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-
-  std::vector<uint8_t> quant_input = quantize<uint8_t>(
-      {
-          0, 6, 12, 4, //
-          3, 2, 10, 7, //
-      },
-      quant_param.first, quant_param.second);
-  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
 
   Pool2DParams params{};
   params.padding = Padding::VALID;
@@ -112,26 +121,99 @@ TEST(AveragePool2DTest, Uint8_1)
   params.stride_width = 2;
   params.activation = Activation::RELU6;
 
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
   kernel.execute();
 
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear({2.75, 6.0})));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
 }
 
-TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
+TEST_F(AveragePool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  std::vector<float> ref_output_data{
+    0, 1.5, //
+    4.5, 6, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, SInt8)
+{
+  Shape input_shape{1, 4, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{-7, -3, 0,  2, -5, 12, -15, 3,  10, 5,
+                                7,  -6, -1, 9, -2, 0,  -5,  11, -1, -7};
+  std::vector<float> ref_output_data{
+    0, 2.5, //
+    1, 1.5, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
 {
   Shape input_shape{1, 3, 5};
   std::vector<float> input_data{
-      -4, -3, -2, -1, 0,  //
-      1,  2,  3,  4,  5,  //
-      6,  7,  8,  9,  10, //
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
 
   Pool2DParams params{};
   params.padding = Padding::VALID;
@@ -141,20 +223,22 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
   params.stride_width = 2;
   params.activation = Activation::RELU6;
 
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
   EXPECT_ANY_THROW(kernel.configure());
 }
 
-TEST(AveragePool2DTest, In_Out_Type_NEG)
+TEST_F(AveragePool2DTest, In_Out_Type_NEG)
 {
   Shape input_shape{1, 3, 5, 1};
   std::vector<float> input_data{
-      -4, -3, -2, -1, 0,  //
-      1,  2,  3,  4,  5,  //
-      6,  7,  8,  9,  10, //
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
 
   Pool2DParams params{};
   params.padding = Padding::VALID;
@@ -164,25 +248,23 @@ TEST(AveragePool2DTest, In_Out_Type_NEG)
   params.stride_width = 2;
   params.activation = Activation::RELU6;
 
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
   EXPECT_ANY_THROW(kernel.configure());
 }
 
-TEST(AveragePool2DTest, Quant_Param_NEG)
+TEST_F(AveragePool2DTest, Quant_Param_NEG)
 {
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
+
   std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
   std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
-  Tensor input_tensor{
-      DataType::U8, {1, 2, 4, 1}, {{quant_param1.first}, {quant_param1.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
-
-  std::vector<uint8_t> quant_input = quantize<uint8_t>(
-      {
-          0, -6, 12, 4,  //
-          -3, -2, 10, 7, //
-      },
-      quant_param1.first, quant_param1.second);
-  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
 
   Pool2DParams params{};
   params.padding = Padding::VALID;
@@ -192,7 +274,7 @@ TEST(AveragePool2DTest, Quant_Param_NEG)
   params.stride_width = 2;
   params.activation = Activation::RELU6;
 
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
   EXPECT_ANY_THROW(kernel.configure());
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp
new file mode 100644
index 000000000..24ca22996
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+  tflite::RuntimeShape swapped_shape(shape);
+  const int32_t dims = shape.DimensionsCount();
+  swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+  swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+  return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+                         Tensor *y_tmp, const BatchMatMulParams &params)
+  : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+  auto lhs = x();
+  auto rhs = y();
+  auto adj_x = params().adj_x;
+  auto adj_y = params().adj_y;
+
+  // TODO Support non-float types
+  if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+    throw std::runtime_error("Unsupported type.");
+
+  LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+  auto lhs_rank = lhs->shape().num_dims();
+  auto rhs_rank = rhs->shape().num_dims();
+  LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+  LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+  auto lhs_scratchpad = temp_lhs();
+  auto rhs_scratchpad = temp_rhs();
+  luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+                                              getTensorShape(rhs));
+
+  auto output_rank = std::max(lhs_rank, rhs_rank);
+
+  auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+  auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+  // Ensure any batch dimensions obey broacasting rules.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    if (lhs_dim != rhs_dim)
+    {
+      if (lhs_dim != 1)
+      {
+        LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+      }
+    }
+  }
+
+  // Ensure other dimensions work for matrix multiplication.
+  int accum_dim_lhs =
+    adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+  int accum_dim_rhs =
+    adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+  LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+  Shape output_shape(output_rank);
+  // Fill in any broadcast dimensions.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    int broadcast_dim = lhs_dim;
+    if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+    {
+      broadcast_dim = rhs_dim;
+    }
+    output_shape.dim(i) = broadcast_dim;
+  }
+  // Fill in the matmul dimensions.
+  int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+  int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+  output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+  output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+  output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+  tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+  tflite::RuntimeShape shape(getTensorShape(tensor_in));
+  tflite::TransposeParams params;
+  int rank = shape.DimensionsCount();
+  params.perm_count = rank;
+  for (int i = 0; i < rank - 2; ++i)
+  {
+    params.perm[i] = i;
+  }
+  // Transpose the last two dimensions.
+  params.perm[rank - 2] = rank - 1;
+  params.perm[rank - 1] = rank - 2;
+  transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+  transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+  switch (tensor_in->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+                                       transposed_shape, getTensorData<float>(tensor_out));
+      break;
+    default:
+      throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
+  }
+}
+
+void BatchMatMul::execute() const
+{
+  auto lhs = x();
+  auto rhs = y();
+
+  bool adj_x = params().adj_x;
+  bool adj_y = params().adj_y;
+
+  auto orig_lhs_shape = getTensorShape(lhs);
+  auto orig_rhs_shape = getTensorShape(rhs);
+
+  auto rhs_tensor = adj_y ? rhs : temp_rhs();
+  auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+  if (not adj_y)
+  {
+    TransposeRowsColumns(rhs, temp_rhs());
+  }
+  if (adj_x)
+  {
+    TransposeRowsColumns(lhs, temp_lhs());
+  }
+  tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+  tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+                                        getTensorData<float>(lhs_tensor), getTensorShape(output()),
+                                        getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-interpreter/src/kernels/BatchMatMul.h
new file mode 100644
index 000000000..744f49795
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchMatMul : public KernelWithParams<BatchMatMulParams>
+{
+public:
+  BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp,
+              const BatchMatMulParams &params);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  Tensor *temp_lhs() const { return _outputs[1]; }
+  Tensor *temp_rhs() const { return _outputs[2]; }
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp
new file mode 100644
index 000000000..edfa3a685
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class BatchMatMulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(BatchMatMulTest, Float)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = true;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = true;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_BatchSizeTwo)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632.,
+                              767., 800., 833., 866.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_DiffBatch)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4}));
+}
+
+TEST_F(BatchMatMulTest, Invalid_Shape_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Batch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank_NEG)
+{
+  Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank2_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, TypeMisMatch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::U8, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp
new file mode 100644
index 000000000..bd315ff7b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchToSpaceND.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                               Tensor *output)
+  : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *crops_data = crops()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+  for (int i = 0; i < spatial_dims_num * 2; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+  }
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+    output_batch_size = output_batch_size / block_shape_data[i];
+    output_shape.dim(i + 1) =
+      input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+  }
+
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h
new file mode 100644
index 000000000..57703ea5d
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchToSpaceND : public Kernel
+{
+public:
+  BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *crops() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
new file mode 100644
index 000000000..52647a763
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor crops_tensor =
+    makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class BatchToSpaceNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes);
+
+TYPED_TEST(BatchToSpaceNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2},
+                   /*output_shape=*/{1, 4, 4, 1},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                   /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0},
+                   /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h
new file mode 100644
index 000000000..2d2842a9e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Derived from tensorflow/lite/kernels/internal/reference/maximum_minimum.h (v2.3.0).
+template <typename T, typename Op, int N = 5>
+void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape,
+                           const T *input1_data,
+                           const tflite::RuntimeShape &unextended_input2_shape,
+                           const T *input2_data,
+                           const tflite::RuntimeShape &unextended_output_shape, T *output_data,
+                           Op op)
+{
+  if (unextended_input1_shape == unextended_input2_shape)
+  {
+    const int flat_size = tflite::MatchingElementsSize(
+      unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+    for (int i = 0; i < flat_size; ++i)
+    {
+      output_data[i] = op(input1_data[i], input2_data[i]);
+    }
+  }
+  else
+  {
+    assert(unextended_input1_shape.DimensionsCount() <= N);
+    assert(unextended_input2_shape.DimensionsCount() <= N);
+    assert(unextended_output_shape.DimensionsCount() <= N);
+
+    tflite::NdArrayDesc<N> desc1{};
+    tflite::NdArrayDesc<N> desc2{};
+    tflite::NdArrayDesc<N> output_desc{};
+    tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape,
+                                                &desc1, &desc2);
+    tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape),
+                           &output_desc);
+
+    auto fn = [&](int indexes[N]) {
+      output_data[SubscriptToIndex(output_desc, indexes)] =
+        op(input1_data[SubscriptToIndex(desc1, indexes)],
+           input2_data[SubscriptToIndex(desc2, indexes)]);
+    };
+    tflite::NDOpsHelper<N>(output_desc, fn);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
index 040ac5911..9f4ba0e0b 100644
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -1,125 +1,43 @@
-find_package(Threads REQUIRED)
-nnas_find_package(GTest REQUIRED)
-
 set(SOURCES
-    Add.h
-    Add.cpp
-    ArgMax.h
-    ArgMax.cpp
-    AveragePool2D.h
-    AveragePool2D.cpp
-    Concatenation.h
-    Concatenation.cpp
-    Conv2D.h
-    Conv2D.cpp
-    DepthToSpace.h
-    DepthToSpace.cpp
-    DepthwiseConv2D.h
-    DepthwiseConv2D.cpp
-    Elu.h
-    Elu.cpp
-    FullyConnected.h
-    FullyConnected.cpp
-    If.h
-    If.cpp
-    L2Normalize.h
-    L2Normalize.cpp
-    L2Pool2D.h
-    L2Pool2D.cpp
-    LeakyRelu.h
-    LeakyRelu.cpp
-    LocalResponseNormalization.h
-    LocalResponseNormalization.cpp
-    Logistic.h
-    Logistic.cpp
-    MaxPool2D.h
-    MaxPool2D.cpp
-    Mean.h
-    Mean.cpp
-    Mul.h
-    Mul.cpp
-    Pad.h
-    Pad.cpp
-    Reshape.h
-    Reshape.cpp
-    Reverse.h
-    Reverse.cpp
-    Rsqrt.h
-    Rsqrt.cpp
-    Slice.h
-    Slice.cpp
-    Softmax.h
-    Softmax.cpp
-    SpaceToDepth.h
-    SpaceToDepth.cpp
-    Split.h
-    Split.cpp
-    StridedSlice.h
-    StridedSlice.cpp
-    Sqrt.h
-    Sqrt.cpp
-    Squeeze.h
-    Squeeze.cpp
-    Tanh.h
-    Tanh.cpp
-    Transpose.h
-    Transpose.cpp
-    TransposeConv.h
-    TransposeConv.cpp
-    Unpack.h
-    Unpack.cpp)
+        BinaryOpCommon.h
+        Utils.h
+        Utils.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h"
+        ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h"
+        ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp)
+
+macro(REGISTER_KERNEL NODE)
+  list(APPEND SOURCES "${NODE}.h")
+  list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
 
-list(APPEND SOURCES Utils.h Utils.cpp ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
 
-add_library(luci_interpreter_kernels STATIC ${SOURCES})
-set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
-target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE
-    "${TensorFlowRuySource_DIR}"
-    "${TensorFlowGEMMLowpSource_DIR}"
-    "${TensorFlowEigenSource_DIR}"
-    "${TensorFlowSource_DIR}")
-target_link_libraries(luci_interpreter_kernels
-    PUBLIC luci_interpreter_core
-    PRIVATE nncc_common Threads::Threads)
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common)
+
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
 
+macro(REGISTER_KERNEL NODE)
+  list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
 
-set(TEST_SOURCES
-    Add.test.cpp
-    ArgMax.test.cpp
-    AveragePool2D.test.cpp
-    Concatenation.test.cpp
-    Conv2D.test.cpp
-    DepthToSpace.test.cpp
-    DepthwiseConv2D.test.cpp
-    Elu.test.cpp
-    FullyConnected.test.cpp
-    If.test.cpp
-    L2Normalize.test.cpp
-    L2Pool2D.test.cpp
-    LeakyRelu.test.cpp
-    LocalResponseNormalization.test.cpp
-    Logistic.test.cpp
-    MaxPool2D.test.cpp
-    Mean.test.cpp
-    Mul.test.cpp
-    Pad.test.cpp
-    Reshape.test.cpp
-    Reverse.test.cpp
-    Rsqrt.test.cpp
-    Slice.test.cpp
-    Softmax.test.cpp
-    SpaceToDepth.test.cpp
-    Split.test.cpp
-    StridedSlice.test.cpp
-    Sqrt.test.cpp
-    Squeeze.test.cpp
-    Tanh.test.cpp
-    Transpose.test.cpp
-    TransposeConv.test.cpp
-    Unpack.test.cpp)
+include(${KERNEL_REGISTER_FILE})
 
 list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
 
-GTest_AddTest(luci_interpreter_kernels_test ${TEST_SOURCES})
-target_link_libraries(luci_interpreter_kernels_test luci_interpreter_kernels)
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
diff --git a/compiler/luci-interpreter/src/kernels/Cast.cpp b/compiler/luci-interpreter/src/kernels/Cast.cpp
new file mode 100644
index 000000000..39ee725dc
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Cast.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/Utils.h"
+
+namespace
+{
+
+using namespace luci_interpreter;
+using namespace luci_interpreter::kernels;
+
+template <typename InT, typename OutT>
+void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
+{
+  std::transform(in_data, in_data + elements_count, out_data,
+                 [](InT a) { return static_cast<OutT>(a); });
+}
+
+template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
+{
+  auto const out_type = out_tensor->element_type();
+  auto const elements_count = out_tensor->shape().num_elements();
+
+  switch (out_type)
+  {
+    case loco::DataType::U8:
+      cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U16:
+      cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U32:
+      cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U64:
+      cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S8:
+      cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S16:
+      cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S32:
+      cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S64:
+      cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::FLOAT32:
+      cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
+      break;
+    case loco::DataType::BOOL:
+      cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
+      break;
+    default:
+      throw std::runtime_error("Unsupported output type.");
+  }
+}
+
+void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
+{
+  auto in_type = in_tensor->element_type();
+
+  switch (in_type)
+  {
+    case loco::DataType::U8:
+      cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U16:
+      cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U32:
+      cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U64:
+      cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S8:
+      cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S16:
+      cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S32:
+      cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S64:
+      cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::FLOAT32:
+      cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::BOOL:
+      cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
+      break;
+    default:
+      throw std::runtime_error("Unsupported input type.");
+  }
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Cast::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown);
+  LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown);
+
+  const Shape &shape = input()->shape();
+  output()->resize(shape);
+}
+
+void Cast::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  cast_from_tensor_to_tensor(input(), output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Cast.h b/compiler/luci-interpreter/src/kernels/Cast.h
new file mode 100644
index 000000000..f0bd02037
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Cast.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CAST_H
+#define LUCI_INTERPRETER_KERNELS_CAST_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Cast : public Kernel
+{
+public:
+  Cast(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CAST_H
diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
new file mode 100644
index 000000000..4713ad34c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data,
+           std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = getElementType<T1>();
+  constexpr DataType output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T>
+void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data,
+                 std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = loco::DataType::BOOL;
+  constexpr DataType output_type = getElementType<T>();
+  std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted;
+  for (auto elem : input_data)
+  {
+    input_data_converted.push_back(elem);
+  }
+
+  Tensor input_tensor =
+    makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T> class CastTest : public ::testing::Test
+{
+};
+
+using IntDataTypes =
+  ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
+TYPED_TEST_SUITE(CastTest, IntDataTypes);
+
+TYPED_TEST(CastTest, FloatToInt)
+{
+  Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          },
+                          /*output_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToFloat)
+{
+  Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          },
+                          /*output_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          });
+  SUCCEED();
+}
+
+template <typename T1, typename T2> void check_int()
+{
+  Check<T1, T2>(/*shape=*/{1, 1, 1, 4},
+                /*input_data=*/
+                {
+                  1, 9, 7, 3, //
+                },
+                /*output_data=*/
+                {
+                  1, 9, 7, 3, //
+                });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToInt)
+{
+  check_int<TypeParam, uint8_t>();
+  check_int<TypeParam, uint16_t>();
+  check_int<TypeParam, uint32_t>();
+  check_int<TypeParam, uint64_t>();
+  check_int<TypeParam, int8_t>();
+  check_int<TypeParam, int16_t>();
+  check_int<TypeParam, int32_t>();
+  check_int<TypeParam, int64_t>();
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToBool)
+{
+  Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           1, 0, 7, 0, //
+                         },
+                         /*output_data=*/
+                         {
+                           true, false, true, false, //
+                         });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, BoolToInt)
+{
+  CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           true, false, false, true, //
+                         },
+                         /*output_data=*/
+                         {
+                           1, 0, 0, 1, //
+                         });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToBool)
+{
+  Check<float, bool>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       1.0f, 0.0f, 7.0f, 0.0f, //
+                     },
+                     /*output_data=*/
+                     {
+                       true, false, true, false, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToFloat)
+{
+  CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       true, false, false, true, //
+                     },
+                     /*output_data=*/
+                     {
+                       1.0f, 0.0f, 0.0f, 1.0f, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToFloat)
+{
+  Check<float, float>(/*shape=*/{1, 1, 1, 4},
+                      /*input_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      },
+                      /*output_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToBool)
+{
+  CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4},
+                    /*input_data=*/
+                    {
+                      true, true, false, false, //
+                    },
+                    /*output_data=*/
+                    {
+                      true, true, false, false, //
+                    });
+  SUCCEED();
+}
+
+TEST(CastTest, UnsupportedType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::Unknown);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+  SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
index 812ab7609..46ee5941e 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -18,7 +18,7 @@
 #include "kernels/Concatenation.h"
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
 
 #include <stdexcept>
 
@@ -29,27 +29,30 @@ namespace kernels
 
 Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
                              const ConcatenationParams &params)
-    : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
+  : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
 {
 }
 
 void Concatenation::configure()
 {
   const int num_inputs = _inputs.size();
-  assert(num_inputs > 0);
+  LUCI_INTERPRETER_CHECK(num_inputs > 0);
   const Tensor *t0 = _inputs[0];
 
+  // TODO: Support concat with fused activation function
+  LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE);
+
   int axis = _params.axis;
   if (axis < 0)
     axis += t0->shape().num_dims();
-  assert(axis >= 0 && axis < t0->shape().num_dims());
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims());
 
   int32_t sum_axis = t0->shape().dim(axis);
   for (int i = 1; i < num_inputs; ++i)
   {
     const Tensor *tensor = _inputs[i];
-    assert(tensor->element_type() == t0->element_type());
-    assert(tensor->shape().num_dims() == t0->shape().num_dims());
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
     for (int d = 0; d < t0->shape().num_dims(); ++d)
     {
       if (d == axis)
@@ -58,7 +61,7 @@ void Concatenation::configure()
       }
       else
       {
-        assert(tensor->shape().dim(d) == t0->shape().dim(d));
+        LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
       }
     }
   }
@@ -66,11 +69,21 @@ void Concatenation::configure()
   Shape output_shape = t0->shape();
   output_shape.dim(axis) = sum_axis;
 
-  // TODO S8 type needs more checking: quantization parameters of all input tensors and the output
-  //  tensor should be the same. Note that there is no such requirement for U8 type.
-  if (t0->element_type() == DataType::S8)
-    throw std::runtime_error("Unsupported type.");
+  // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+  // should be the same
+  for (auto current_tensor : _inputs)
+  {
+    if (current_tensor->element_type() == DataType::S8)
+    {
+      LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
+                             output()->quantized_dimension());
 
+      LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
+                             current_tensor->scales().size());
+      LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
+      LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
+    }
+  }
   output()->resize(output_shape);
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
index d9a7097d0..f893b38fd 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Concatenation.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,58 +27,242 @@ namespace
 
 using namespace testing;
 
-TEST(ConcatenationTest, Float)
+class ConcatenationTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ConcatenationTest, Float)
 {
   std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
   std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
   ConcatenationParams params{};
 
   // Try different 'axis' and expect different results.
   {
     params.axis = 0;
+    params.activation = luci::FusedActFunc::NONE;
 
     Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
     kernel.configure();
+    for (auto t : kernel.getOutputTensors())
+    {
+      _memory_manager->allocate_memory(*t);
+    }
     kernel.execute();
 
     EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ElementsAreArray(ArrayFloatNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})));
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
   }
   {
     params.axis = -2; // Same as '0'.
+    params.activation = luci::FusedActFunc::NONE;
 
     Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
     EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ElementsAreArray(ArrayFloatNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})));
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
   }
   {
     params.axis = 1;
+    params.activation = luci::FusedActFunc::NONE;
 
     Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
     EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ElementsAreArray(ArrayFloatNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})));
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
   }
   {
     params.axis = -1; // Same as '1'.
+    params.activation = luci::FusedActFunc::NONE;
 
     Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
     EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ElementsAreArray(ArrayFloatNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})));
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
   }
 }
 
+TEST_F(ConcatenationTest, Input_Number_Check_NEG)
+{
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Invalid_Axis_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -3;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
+{
+  std::vector<uint8_t> input1_data{1, 2, 3, 4};
+  std::vector<int8_t> input2_data{5, 6, 7, 8};
+  Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  int quantized_dimension = 3;
+  std::vector<float> scales{0.1, 0.2, 0.3};
+  std::vector<int32_t> zero_points{1, -1, 1};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4};
+  std::vector<float> input2_data{5, 6, 7, 8};
+  float scale = 0.1;
+  int32_t zero_point_1 = 1;
+  int32_t zero_point_2 = -1;
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+// TODO: Remove this test when concat w/ fused_activation is supported
+TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = 1;
+  params.activation = luci::FusedActFunc::RELU;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index a51fb4afc..234f95425 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +19,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include "PALConv2d.h"
 
 #include <stdexcept>
 #include <thread>
@@ -29,8 +30,8 @@ namespace kernels
 {
 
 Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-               const Conv2DParams &params)
-    : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params)
+               Tensor *scratchpad, const Conv2DParams &params)
+  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
 {
 }
 
@@ -44,7 +45,11 @@ void Conv2D::configure()
   // (3) | uint8 uint8  int32 uint8  | quantized
   // (4) | int8  int8   int32 int8   | quantized per channel
   //
-  // We only support (1) and (3) for now.
+  // We only support (1), (3) and (4) for now, and additionally the following:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (5) | int16 int16  int64 int16  |
+  //
   if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
   {
     LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
@@ -53,6 +58,21 @@ void Conv2D::configure()
   {
     LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
   }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                           static_cast<size_t>(filter()->shape().dim(0)));
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+  }
+  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+  }
   else
   {
     throw std::runtime_error("Unsupported type.");
@@ -75,11 +95,11 @@ void Conv2D::configure()
                                                bias()->shape().dim(0) == output_depth));
 
   const int32_t output_height =
-      computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
-                        _params.dilation_height_factor);
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
   const int32_t output_width =
-      computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
-                        _params.dilation_width_factor);
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
 
   _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
                                    input_height, filter_height, output_height);
@@ -88,20 +108,28 @@ void Conv2D::configure()
 
   output()->resize({batches, output_height, output_width, output_depth});
 
-  // Allocate tensor for Im2Col, if needed.
-  // The checks here should be aligned with the actual implementation.
-  const bool need_dilated_im2col =
-      _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
-  const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
-                                       filter_height != 1 || filter_width != 1;
-  const bool need_im2col = need_dilated_im2col || need_non_dilated_im2col;
-  if (need_im2col)
+  // Allocate tensor for scratchpad, if needed.
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
+
+  switch (_params.activation)
   {
-    const int input_depth = input_shape.dim(3);
-    Shape im2col_shape{batches, output_height, output_width,
-                       input_depth * filter_height * filter_width};
-    _im2col =
-        std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, "");
+    case Activation::NONE:
+    case Activation::RELU:
+    case Activation::RELU6:
+    case Activation::RELU_N1_TO_1:
+      break;
+    default:
+      throw std::runtime_error("Unsupported fused activation");
   }
 }
 
@@ -117,7 +145,23 @@ void Conv2D::execute() const
       }
       throw std::runtime_error("Unsupported type.");
     case DataType::U8:
-      evalQuantized();
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(0)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
       break;
     default:
       throw std::runtime_error("Unsupported type.");
@@ -140,11 +184,16 @@ void Conv2D::evalFloat() const
   params.float_activation_min = activation_min;
   params.float_activation_max = activation_max;
 
-  tflite::optimized_ops::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
-                              getTensorShape(filter()), getTensorData<float>(filter()),
-                              getTensorShape(bias()), getTensorData<float>(bias()),
-                              getTensorShape(output()), getTensorData<float>(output()),
-                              getTensorShape(_im2col.get()), getTensorData<float>(_im2col.get()));
+  auto scratchpad = getOutputTensors()[1];
+  float *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<float>();
+
+  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
+                             getTensorShape(filter()), getTensorData<float>(filter()),
+                             getTensorShape(bias()), getTensorData<float>(bias()),
+                             getTensorShape(output()), getTensorData<float>(output()),
+                             getTensorShape(scratchpad), scratchpad_data);
 }
 
 void Conv2D::evalQuantized() const
@@ -178,16 +227,229 @@ void Conv2D::evalQuantized() const
   params.quantized_activation_min = activation_min;
   params.quantized_activation_max = activation_max;
 
-  // TODO This should only be done once (although it takes only a few microseconds).
-  //  Also, the user should be able to adjust the number of threads.
-  auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
-  gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                             getTensorShape(filter()), getTensorData<uint8_t>(filter()),
+                             getTensorShape(bias()), getTensorData<int32_t>(bias()),
+                             getTensorShape(output()), getTensorData<uint8_t>(output()),
+                             getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
+}
 
-  tflite::optimized_ops::Conv(
-      params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
-      getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-      getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()),
-      getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
+void Conv2D::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  const std::vector<ChannelQuantMultipliers> multipliers_raw =
+    quantizeMultipliers(effective_output_scale);
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin = out_y * stride_height - _padding_height;
+          const int32_t in_x_origin = out_x * stride_width - _padding_width;
+          int32_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const uint8_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int32_t>(input_val - input()->zero_point()) *
+                         static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+
+          scaled_acc += output()->zero_point();
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void Conv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;                    // Unused in tflite code
+  params.output_offset = output()->zero_point();
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::ConvPerChannel(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void Conv2D::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  const std::vector<ChannelQuantMultipliers> multipliers_raw =
+    quantizeMultipliers(effective_output_scale);
+  BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin = out_y * stride_height - _padding_height;
+          const int32_t in_x_origin = out_x * stride_width - _padding_width;
+          int64_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h
index 69e309852..330bf3a2a 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.h
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.h
@@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams>
 {
 public:
   Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-         const Conv2DParams &params);
+         Tensor *scratchpad, const Conv2DParams &params);
 
   const Tensor *input() const { return _inputs[0]; }
   const Tensor *filter() const { return _inputs[1]; }
@@ -44,9 +44,11 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
+  void evalQuantizedS16() const;
 
 private:
-  std::unique_ptr<Tensor> _im2col;
   int32_t _padding_height{};
   int32_t _padding_width{};
 };
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
index 0446d9760..0fe6ef795 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Conv2D.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,27 +27,39 @@ namespace
 
 using namespace testing;
 
-TEST(Conv2DTest, Float)
+class Conv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Conv2DTest, Float)
 {
   Shape input_shape{1, 4, 3, 2};
   Shape filter_shape{2, 2, 2, 2};
   Shape bias_shape{2};
   std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
   };
   std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
   };
   std::vector<float> bias_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Conv2DParams params{};
@@ -57,42 +70,95 @@ TEST(Conv2DTest, Float)
   params.dilation_width_factor = 1;
   params.activation = Activation::RELU;
 
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
   kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      11, 16, 7, 20, // row = 0
-      0,  40, 0, 44, // row = 1
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
   };
   std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
-TEST(Conv2DTest, FloatCheck)
+TEST_F(Conv2DTest, FloatPointwise)
+{
+  Shape input_shape{1, 2, 2, 2};
+  Shape filter_shape{2, 1, 1, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1, 2, // row = 0, col = 0
+    3, 4, // row = 0, col = 1
+    5, 6, // row = 1, col = 0
+    7, 8, // row = 1, col = 1
+  };
+  std::vector<float> filter_data{
+    -1, 2, // out = 0
+    -3, 4, // out = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    4, 7,  6,  9,  // row = 0
+    8, 11, 10, 13, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatCheck)
 {
   Shape input_shape{2, 2, 4, 1};
   Shape filter_shape{3, 2, 2, 1};
   Shape bias_shape{3};
   std::vector<float> input_data{
-      // First batch
-      1, 1, 1, 1, // row = 1
-      2, 2, 2, 2, // row = 2
-      // Second batch
-      1, 2, 3, 4, // row = 1
-      1, 2, 3, 4, // row = 2
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+    // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
   };
   std::vector<float> filter_data{
-      1,  2,  3,  4, // first 2x2 filter
-      -1, 1,  -1, 1, // second 2x2 filter
-      -1, -1, 1,  1, // third 2x2 filter
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
   };
   std::vector<float> bias_data{1, 2, 3};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Conv2DParams params{};
@@ -103,57 +169,130 @@ TEST(Conv2DTest, FloatCheck)
   params.dilation_width_factor = 1;
   params.activation = Activation::NONE;
 
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      18, 2, 5, // first batch, left
-      18, 2, 5, // first batch, right
-      17, 4, 3, // second batch, left
-      37, 4, 3, // second batch, right
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
   };
   std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
-TEST(Conv2DTest, Uint8)
+TEST_F(Conv2DTest, Uint8)
 {
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+
   std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
   std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
-  Shape bias_shape = {3};
-  Tensor input_tensor{
-      DataType::U8, {2, 2, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
-  Tensor filter_tensor{
-      DataType::U8, {3, 2, 2, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
-  Tensor bias_tensor{
-      DataType::S32, bias_shape, {{input_quant_param.first * input_quant_param.first}, {0}}, ""};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
   Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-  std::vector<uint8_t> quantized_input = quantize<uint8_t>(
-      {
-          // First batch
-          1, 1, 1, 1, // row = 1
-          2, 2, 2, 2, // row = 2
-          // Second batch
-          1, 2, 3, 4, // row = 1
-          1, 2, 3, 4, // row = 2
-      },
-      input_quant_param.first, input_quant_param.second);
-  std::vector<uint8_t> quantized_filter = quantize<uint8_t>(
-      {
-          1, 2, 3, 4,   // first 2x2 filter
-          -1, 1, -1, 1, // second 2x2 filter
-          -1, -1, 1, 1, // third 2x2 filter
-      },
-      input_quant_param.first, input_quant_param.second);
-  std::vector<int32_t> bias_data =
-      quantize<int32_t>({1, 2, 3}, input_quant_param.first * input_quant_param.first, 0);
-  input_tensor.writeData(quantized_input.data(), quantized_input.size() * sizeof(uint8_t));
-  filter_tensor.writeData(quantized_filter.data(), quantized_filter.size() * sizeof(uint8_t));
-  bias_tensor.writeData(bias_data.data(), bias_data.size() * sizeof(int32_t));
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
 
   Conv2DParams params{};
   params.padding = Padding::VALID;
@@ -163,44 +302,237 @@ TEST(Conv2DTest, Uint8)
   params.dilation_width_factor = 1;
   params.activation = Activation::NONE;
 
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      18, 2, 5, // first batch, left
-      18, 2, 5, // first batch, right
-      17, 4, 3, // second batch, left
-      37, 4, 3, // second batch, right
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
   };
   std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
-                                  output_quant_param.first, output_quant_param.second),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
-TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
+TEST_F(Conv2DTest, SInt8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt16)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  std::vector<float> ref_output_data{
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, SInt16_CWQ_weights)
+{
+  Shape input_shape{1, 2, 2, 2};  // Batch x H x W x C
+  Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
+  Shape bias_shape{3};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
+
+  std::vector<float> input_data{
+    1, 2, // row = 0, col 0
+    3, 4, // row = 0, col 1
+    5, 6, // row = 1, col 0
+    7, 8, // row = 1, col 1
+  };
+  std::vector<float> filter_data{
+    4, -3, // out = 0
+    1, -3, // out = 1
+    5, -3, // out = 2
+  };
+  std::vector<float> bias_data{1, 10, 5};
+  std::vector<float> ref_output_data{
+    0, 5, 4,  // row 0, col 0
+    1, 1, 8,  // row 0, col 1
+    3, 0, 12, // row 1, col 0
+    5, 0, 16, // row 1, col 1
+  };
+
+  float input_scale = 0.25f;
+  float output_scale = 0.05f;
+  std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f};
+  std::vector<float> bias_scales;
+  for (int i = 0; i < filter_scales.size(); ++i)
+    bias_scales.push_back(filter_scales[i] * input_scale);
+  std::vector<int32_t> zerop = {0, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
 {
   Shape input_shape{1, 4, 3, 2};
   Shape filter_shape{2, 2, 2, 2};
   Shape bias_shape{2};
   std::vector<int32_t> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
   };
   std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
   };
   std::vector<float> bias_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Conv2DParams params{};
@@ -211,31 +543,34 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
   params.dilation_width_factor = 1;
   params.activation = Activation::RELU;
 
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
   EXPECT_ANY_THROW(kernel.configure());
 }
 
-TEST(Conv2DTest, Invalid_Bias_Type_NEG)
+TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
 {
   Shape input_shape{1, 4, 3, 2};
   Shape filter_shape{2, 2, 2, 2};
   Shape bias_shape{2};
   std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
   };
   std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
   };
   std::vector<uint8_t> bias_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Conv2DParams params{};
@@ -246,31 +581,35 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG)
   params.dilation_width_factor = 1;
   params.activation = Activation::RELU;
 
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
   EXPECT_ANY_THROW(kernel.configure());
 }
 
-TEST(Conv2DTest, Invalid_Bias_Data_NEG)
+TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
 {
   Shape input_shape{1, 4, 3, 2};
   Shape filter_shape{2, 2, 2, 2};
   Shape bias_shape{3};
   std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
   };
   std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
   };
   std::vector<float> bias_data{1, 2, 3};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Conv2DParams params{};
@@ -281,31 +620,35 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG)
   params.dilation_width_factor = 1;
   params.activation = Activation::RELU;
 
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
   EXPECT_ANY_THROW(kernel.configure());
 }
 
-TEST(Conv2DTest, Invalid_Input_Shape_NEG)
+TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
 {
   Shape input_shape{1, 4, 6, 1};
   Shape filter_shape{2, 2, 2, 2};
   Shape bias_shape{2};
   std::vector<float> input_data{
-      1,  2,  3,  4,  5,  6,  // row = 0
-      7,  8,  9,  10, 11, 12, // row = 1
-      13, 14, 15, 16, 17, 18, // row = 2
-      19, 20, 21, 22, 23, 24, // row = 3
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
   };
   std::vector<float> filter_data{
-      1,  2,  -3, -4, // out = 0, row = 0
-      -5, 6,  -7, 8,  // out = 1, row = 0
-      4,  -2, 3,  -1, // out = 0, row = 1
-      -8, -6, 7,  5,  // out = 1, row = 1
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
   };
   std::vector<float> bias_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Conv2DParams params{};
@@ -316,7 +659,46 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG)
   params.dilation_width_factor = 1;
   params.activation = Activation::RELU;
 
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::TANH;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
   EXPECT_ANY_THROW(kernel.configure());
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
index cab63e26d..3a9acd1d4 100644
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -16,7 +16,7 @@
 
 #include "DepthToSpace.h"
 #include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALDepthToSpace.h"
 
 namespace luci_interpreter
 {
@@ -24,26 +24,16 @@ namespace kernels
 {
 
 DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
-    : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+  : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
 {
 }
 
 void DepthToSpace::configure()
 {
-  if (input()->shape().num_dims() != 4)
-  {
-    throw std::runtime_error("Invalid input num_dims.");
-  }
-  if (output()->element_type() != DataType::FLOAT32 && output()->element_type() != DataType::U8 &&
-      output()->element_type() != DataType::S8 && output()->element_type() != DataType::S32 &&
-      output()->element_type() != DataType::S64)
-  {
-    throw std::runtime_error("Invalid output type");
-  }
-  if (input()->element_type() != output()->element_type())
-  {
-    throw std::runtime_error("Type mismatch on input and output.");
-  }
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8)
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
   const int block_size = params().block_size;
   const int32_t input_height = input()->shape().dim(1);
   const int32_t input_width = input()->shape().dim(2);
@@ -52,9 +42,9 @@ void DepthToSpace::configure()
   int32_t output_width = input_width * block_size;
   int32_t output_channels = input_channels / block_size / block_size;
 
-  assert(input_height == output_height / block_size);
-  assert(input_width == output_width / block_size);
-  assert(input_channels == output_channels * block_size * block_size);
+  LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
+  LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
+  LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
 
   Shape output_shape(4);
   output_shape.dim(0) = input()->shape().dim(0);
@@ -72,14 +62,14 @@ void DepthToSpace::execute() const
   switch (input()->element_type())
   {
     case DataType::FLOAT32:
-      tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
-                                          getTensorData<float>(input()), getTensorShape(output()),
-                                          getTensorData<float>(output()));
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
       break;
     case DataType::U8:
-      tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
-                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                          getTensorData<uint8_t>(output()));
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
       break;
     default:
       throw std::runtime_error("Unsupported Type.");
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
index 1b805702d..88e6e07f1 100644
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/DepthToSpace.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -31,16 +32,18 @@ template <typename T> class DepthToSpaceTest : public ::testing::Test
 };
 
 using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(DepthToSpaceTest, DataTypes);
+TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes);
 
 TYPED_TEST(DepthToSpaceTest, SimpleCase)
 {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
   std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
   Shape input_shape{1, 1, 2, 4};
   std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
   std::vector<int32_t> output_shape{1, 2, 4, 1};
 
-  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
 
   DepthToSpaceParams params{};
@@ -48,6 +51,7 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase)
 
   DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
@@ -55,6 +59,57 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase)
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
+TEST(DepthToSpaceTest, InvalidInputShape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InvalidBlockSize_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 3;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
index b01a5e086..c554c309d 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -18,8 +18,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include "PALDepthwiseConv2d.h"
 
 #include <stdexcept>
 
@@ -29,8 +28,9 @@ namespace kernels
 {
 
 DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
-                                 Tensor *output, const DepthwiseConv2DParams &params)
-    : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
+                                 Tensor *output, Tensor *scratchpad,
+                                 const DepthwiseConv2DParams &params)
+  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
 {
 }
 
@@ -45,43 +45,62 @@ void DepthwiseConv2D::configure()
   // (4) | int8  int8   int32 int8   | quantized per channel
   // (5) | int16 int8   int64 int16  | quantized per channel 16x8
   //
-  // We only support (1) and (3) for now.
+  // We only support (1), (3) and (4) for now, and additionally the following:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (5) | int16 int16  int64 int16  |
+  //
   if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
   {
-    assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
   }
   else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
   {
-    assert(bias() == nullptr || bias()->element_type() == DataType::S32);
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+                           filter()->scales().size());
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
   }
   else
   {
     throw std::runtime_error("Unsupported type.");
   }
-  assert(output()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
 
   const Shape &input_shape = input()->shape();
   const Shape &filter_shape = filter()->shape();
-  assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
 
   const int32_t batches = input_shape.dim(0);
   const int32_t input_height = input_shape.dim(1);
   const int32_t input_width = input_shape.dim(2);
   // Filter format: [1, H, W, O].
-  assert(filter_shape.dim(0) == 1);
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
   const int32_t filter_height = filter_shape.dim(1);
   const int32_t filter_width = filter_shape.dim(2);
   const int32_t channels_out = filter_shape.dim(3);
 
-  assert(bias() == nullptr ||
-         (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == channels_out));
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == channels_out));
 
   const int32_t output_height =
-      computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
-                        _params.dilation_height_factor);
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
   const int32_t output_width =
-      computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
-                        _params.dilation_width_factor);
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
 
   _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
                                    input_height, filter_height, output_height);
@@ -89,6 +108,16 @@ void DepthwiseConv2D::configure()
                                   filter_width, output_width);
 
   output()->resize({batches, output_height, output_width, channels_out});
+
+  tflite::DepthwiseParams params{};
+
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
 }
 
 void DepthwiseConv2D::execute() const
@@ -103,7 +132,23 @@ void DepthwiseConv2D::execute() const
       }
       throw std::runtime_error("Unsupported type.");
     case DataType::U8:
-      evalQuantized();
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(3)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
       break;
     default:
       throw std::runtime_error("Unsupported type.");
@@ -128,9 +173,100 @@ void DepthwiseConv2D::evalFloat() const
   params.float_activation_max = activation_max;
 
   tflite::reference_ops::DepthwiseConv(
-      params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-      getTensorShape(output()), getTensorData<float>(output()));
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          for (int m = 0; m < depth_multiplier; ++m)
+          {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - _padding_width;
+            const int in_y_origin = (out_y * stride_height) - _padding_height;
+            int32 acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                if (is_point_inside_image)
+                {
+                  int32 input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+                  int32 filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+                  acc += (filter_val - filter()->zero_points()[output_channel]) *
+                         (input_val - input()->zero_point());
+                }
+              }
+            }
+            if (bias_data)
+            {
+              acc += bias_data[output_channel];
+            }
+            int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
+            int output_shift = quant_multipliers[output_channel].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+            scaled_acc += output()->zero_point();
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+            output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
+              static_cast<uint8_t>(scaled_acc);
+          }
+        }
+      }
+    }
+  }
 }
 
 void DepthwiseConv2D::evalQuantized() const
@@ -166,9 +302,149 @@ void DepthwiseConv2D::evalQuantized() const
   params.quantized_activation_max = activation_max;
 
   tflite::reference_ops::DepthwiseConv(
-      params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
-      getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-      getTensorShape(output()), getTensorData<uint8_t>(output()));
+    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+
+  params.padding_type = tflite::PaddingType::kSame;
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = 1; // unused in tflite code
+  params.output_shift = 0;      // unused in tflite code
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void DepthwiseConv2D::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          for (int32_t m = 0; m < depth_multiplier; ++m)
+          {
+            const int32_t out_c = m + in_c * depth_multiplier;
+            const int32_t in_y_origin = out_y * stride_height - _padding_height;
+            const int32_t in_x_origin = out_x * stride_width - _padding_width;
+            int64_t acc = 0;
+            for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+                const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+                if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+            if (bias_data != nullptr)
+            {
+              acc += bias_data[out_c];
+            }
+
+            int32_t output_multiplier = quant_multipliers[out_c].multiplier;
+            int output_shift = quant_multipliers[out_c].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+
+            output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+          }
+        }
+      }
+    }
+  }
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
index 62f4bff0e..3d1faf6c1 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -29,7 +29,7 @@ class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
 {
 public:
   DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-                  const DepthwiseConv2DParams &params);
+                  Tensor *scratchpad, const DepthwiseConv2DParams &params);
 
   const Tensor *input() const { return _inputs[0]; }
   const Tensor *filter() const { return _inputs[1]; }
@@ -42,6 +42,9 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
+  void evalQuantizedS16() const;
 
 private:
   int32_t _padding_height{};
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
index a9b43d864..6b4673f3e 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/DepthwiseConv2D.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,27 +27,39 @@ namespace
 
 using namespace testing;
 
-TEST(DepthwiseConv2DTest, Float)
+class DepthwiseConv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DepthwiseConv2DTest, Float)
 {
   Shape input_shape{1, 4, 2, 2};
   Shape filter_shape{1, 2, 2, 4};
   Shape bias_shape{4};
   std::vector<float> input_data{
-      1,  2,  7,  8,  //
-      3,  4,  9,  10, //
-      5,  6,  11, 12, //
-      13, 14, 15, 16, //
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
   };
   std::vector<float> filter_data{
-      1,  2,   3,   4,   //
-      -9, 10,  -11, 12,  //
-      5,  6,   7,   8,   //
-      13, -14, 15,  -16, //
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
   };
   std::vector<float> bias_data{1, 2, 3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   DepthwiseConv2DParams params{};
@@ -58,54 +71,50 @@ TEST(DepthwiseConv2DTest, Float)
   params.dilation_width_factor = 1;
   params.activation = Activation::RELU;
 
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
   kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      71,  0, 99,  0,  //
-      167, 0, 227, 28, //
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
   };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
 }
 
-TEST(DepthwiseConv2DTest, Uint8)
+TEST_F(DepthwiseConv2DTest, Uint8)
 {
+  std::vector<float> input_data{
+    1, 2, 7,  8,  // column 1
+    3, 4, 9,  10, // column 2
+    5, 6, 11, 12, // column 3
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+
   std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
   std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
 
-  Tensor input_tensor{
-      DataType::U8, {1, 3, 2, 2}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
-  Tensor filter_tensor{
-      DataType::U8, {1, 2, 2, 4}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
-  Tensor bias_tensor{
-      DataType::S32, {4}, {{input_quant_param.first * input_quant_param.first}, {0}}, ""};
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
   Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-
-  std::vector<uint8_t> quant_input = quantize<uint8_t>(
-      {
-          1, 2, 7, 8,   // column 1
-          3, 4, 9, 10,  // column 2
-          5, 6, 11, 12, // column 3
-      },
-      input_quant_param.first, input_quant_param.second);
-  std::vector<uint8_t> quant_filter = quantize<uint8_t>(
-      {
-          1, 2, 3, 4,       //
-          -9, 10, -11, 12,  //
-          5, 6, 7, 8,       //
-          13, -14, 15, -16, //
-      },
-      input_quant_param.first, input_quant_param.second);
-  std::vector<int32_t> quant_bias =
-      quantize<int32_t>({1, 2, 3, 4}, input_quant_param.first * input_quant_param.first, 0);
-
-  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
-  filter_tensor.writeData(quant_filter.data(), quant_filter.size() * sizeof(uint8_t));
-  bias_tensor.writeData(quant_bias.data(), quant_bias.size() * sizeof(int32_t));
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
 
   DepthwiseConv2DParams params{};
   params.padding = Padding::VALID;
@@ -116,20 +125,498 @@ TEST(DepthwiseConv2DTest, Uint8)
   params.dilation_width_factor = 1;
   params.activation = Activation::NONE;
 
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      71, -34, 99,  -20, //
-      91, -26, 127, -4,  //
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
   };
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
 }
 
+TEST_F(DepthwiseConv2DTest, SInt16)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, 4};
+
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S64, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+
+  float input_scale = 0.25;
+  std::vector<float> filter_scales{0.2f, 1.f, 0.5f, 0.1f};
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_scales[i] * input_scale);
+  std::vector<int32_t> zerop(4, 0);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(1, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<int32_t> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{4, 2, 2};
+  Shape filter_shape{2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{2, 1, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 4, 2};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.cpp
new file mode 100644
index 000000000..96399e5c7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
+                         input()->element_type() == loco::DataType::U8 ||
+                         input()->element_type() == loco::DataType::S16);
+
+  LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+  if (input()->element_type() == loco::DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+
+  output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+  tflite::DequantizationParams op_params;
+  op_params.zero_point = input()->zero_point();
+  op_params.scale = input()->scale();
+
+  switch (input()->element_type())
+  {
+    case loco::DataType::U8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case loco::DataType::S8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int16_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-interpreter/src/kernels/Dequantize.h
new file mode 100644
index 000000000..5565df0e4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Dequantize : public Kernel
+{
+public:
+  Dequantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp
new file mode 100644
index 000000000..0cab633d6
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DequantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DequantizeTest, Uint8)
+{
+  std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint8)
+{
+  std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint16)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidOutputType_NEG)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp
new file mode 100644
index 000000000..dd1532278
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Div.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/div.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
+  : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+{
+}
+
+void Div::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Div::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Div::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Div::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Div::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
+
+  int32_t output_multiplier{};
+  int output_shift{};
+
+  quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Div.h b/compiler/luci-interpreter/src/kernels/Div.h
new file mode 100644
index 000000000..c1bf3e10b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Div.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DIV_H
+#define LUCI_INTERPRETER_KERNELS_DIV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Div : public KernelWithParams<DivParams>
+{
+public:
+  Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DIV_H
diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp
new file mode 100644
index 000000000..85cd8b90a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+float GetTolerance(float min, float max)
+{
+  const float kQuantizedStep = (max - min) / 255.0f;
+  const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep;
+  return kQuantizedTolerance;
+}
+
+TEST_F(DivTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 1};
+
+  std::vector<int32_t> output_shape = {2, 3, 1, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f};
+  std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST_F(DivTest, FloatBroadcast)
+{
+  Shape input1_shape = {1, 3};
+  Shape input2_shape = {3, 1};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 1.6f, 0.5f};
+  std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(DivTest, Uint8)
+{
+  Shape base_shape = {1, 2, 2, 1};
+
+  std::vector<int32_t> output_shape = {1, 2, 2, 1};
+
+  std::vector<float> input1_data = {-0.8f, -0.2f, 0.3f, 0.7f};
+  std::vector<float> input2_data = {-0.8f, 0.4f, 0.8f, 1.0f};
+  std::vector<float> test_outputs{1.0f, 0.f, 0.375f, 0.7f};
+
+  const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
+
+  Tensor input1_tensor = makeInputTensor<DataType::U8>(
+    base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>(
+    base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get());
+
+  Tensor output_tensor =
+    makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(test_outputs, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+  std::vector<std::vector<dtype>> test_outputs = {{5,  6,  2, 0,  10, 3, //
+                                                   10, 0,  4, 5,  20, 0, //
+                                                   0,  0,  0, 2,  0,  0, //
+                                                   2,  0,  1, 10, 5,  0, //
+                                                   2,  3,  1, 0,  5,  1, //
+                                                   18, 20, 7, 0,  37, 10},
+                                                  {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10},
+                                                  {5, 7, 4, 6, 2, 3, 10, 0,  8,  0,  4, 0,
+                                                   0, 0, 0, 0, 0, 0, 0,  10, 5,  0,  1, 0,
+                                                   0, 0, 5, 9, 1, 1, 0,  0,  37, 50, 7, 10},
+                                                  {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}};
+  std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100};
+  std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    DivParams params{};
+    params.activation = Activation::RELU;
+
+    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(DivTest, SInt64)
+{
+  checkInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(DivTest, SInt32)
+{
+  checkInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(DivTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DivTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(DivTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-interpreter/src/kernels/Elu.cpp
index 5de4a1f3b..697d63be4 100644
--- a/compiler/luci-interpreter/src/kernels/Elu.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.cpp
@@ -17,7 +17,7 @@
 #include "kernels/Elu.h"
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALElu.h"
 
 #include <stdexcept>
 
@@ -31,7 +31,7 @@ Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
 
 void Elu::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   output()->resize(input()->shape());
 }
 
@@ -40,8 +40,8 @@ void Elu::execute() const
   switch (input()->element_type())
   {
     case DataType::FLOAT32:
-      tflite::optimized_ops::Elu(getTensorShape(input()), getTensorData<float>(input()),
-                                 getTensorShape(output()), getTensorData<float>(output()));
+      luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
       break;
     default:
       throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
index 52444cbea..814499cdb 100644
--- a/compiler/luci-interpreter/src/kernels/Elu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Elu.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -29,34 +30,50 @@ using namespace testing;
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
-
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Elu kernel(&input_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   (void)output_shape;
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
 }
 
 TEST(EluTest, SimpleElu)
 {
   Check(
-      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
-      /*input_data=*/
-      {
-          0, -6, 2, -4,    //
-          3, -2, 10, -0.1, //
-      },
-      /*output_data=*/
-      {
-          0.0, -0.997521, 2.0, -0.981684,   //
-          3.0, -0.864665, 10.0, -0.0951626, //
-      });
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, -6, 2, -4,    //
+      3, -2, 10, -0.1, //
+    },
+    /*output_data=*/
+    {
+      0.0, -0.997521, 2.0, -0.981684,   //
+      3.0, -0.864665, 10.0, -0.0951626, //
+    });
+}
+
+TEST(EluTest, InOutTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, -6, 2,  -4,   //
+    3, -2, 10, -0.1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Elu kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-interpreter/src/kernels/Equal.cpp
new file mode 100644
index 000000000..a57e127b7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Equal.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Equal::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Equal::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Equal::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                 y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Equal::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                         getTensorShape(y()), y_data,
+                                                         getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                          getTensorShape(y()), y_data, getTensorShape(output()),
+                                          output_data);
+  }
+}
+
+void Equal::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h
new file mode 100644
index 000000000..c9be32cc0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Equal.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Equal : public Kernel
+{
+public:
+  Equal(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EQUAL_H
diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
new file mode 100644
index 000000000..5870e5460
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class EqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(EqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, false, // Row 1
+    false, true, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(EqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+    0.9, 0.7, 0.5, // Row 4
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    false, false, false, // Row 2
+    false, false, false, // Row 3
+    true,  true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -2, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value, -2, max_value, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, false, true,  // Row 2
+    false, true,  false, // Row 3
+    true,  true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(EqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(EqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(EqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5, 0.55, 0.5, // Row 1
+    -1,  0,   0.05, 1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, false, false, // Row 1
+    false, true, true,  false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(EqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+    -1,   0.05, 0,    1,   // Row 4
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, false, false, // Row 1
+    false, false, true,  false, // Row 2
+    false, false, false, false, // Row 3
+    true,  true,  true,  true,  // Row 4
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(EqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-interpreter/src/kernels/Exp.cpp
new file mode 100644
index 000000000..e7c560a88
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Exp.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/exp.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Exp::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Exp::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Exp::evalFloat() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Exp.h b/compiler/luci-interpreter/src/kernels/Exp.h
new file mode 100644
index 000000000..429177375
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Exp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXP_H
+#define LUCI_INTERPRETER_KERNELS_EXP_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Exp : public Kernel
+{
+public:
+  Exp(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXP_H
diff --git a/compiler/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-interpreter/src/kernels/Exp.test.cpp
new file mode 100644
index 000000000..a159d9db9
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Exp.test.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(ExpTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape{1, 1, 7};
+  std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Exp kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{1, 1, 7};
+  std::vector<float> ref_output_data{std::exp(0.0f),   std::exp(1.0f),    std::exp(-1.0f),
+                                     std::exp(100.0f), std::exp(-100.0f), std::exp(0.01f),
+                                     std::exp(-0.01f)};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp
new file mode 100644
index 000000000..ba35c99fa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
+  : Kernel({input, axis}, {output})
+{
+}
+
+void ExpandDims::configure()
+{
+  int32_t axis_value;
+
+  switch (axis()->element_type())
+  {
+    case loco::DataType::S32:
+      axis_value = *getTensorData<int32_t>(axis());
+      break;
+    case loco::DataType::S64:
+      axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+
+  const auto input_shape = input()->shape();
+
+  if (axis_value < 0)
+  {
+    axis_value += input_shape.num_dims() + 1;
+  }
+
+  LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
+
+  Shape output_shape(input_shape.num_dims() + 1);
+  for (int32_t i = 0; i < output_shape.num_dims(); ++i)
+  {
+    if (i < axis_value)
+    {
+      output_shape.dim(i) = input_shape.dim(i);
+    }
+    else if (i == axis_value)
+    {
+      output_shape.dim(i) = 1;
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(i >= 1);
+      output_shape.dim(i) = input_shape.dim(i - 1);
+    }
+  }
+
+  output()->resize(output_shape);
+}
+
+void ExpandDims::execute() const
+{
+  // Just copy input to output
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+
+  const size_t element_size = getDataTypeSize(input()->element_type());
+  const int32_t num_elements = input()->shape().num_elements();
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-interpreter/src/kernels/ExpandDims.h
new file mode 100644
index 000000000..e510b1160
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ExpandDims : public Kernel
+{
+public:
+  ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axis() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp
new file mode 100644
index 000000000..df9eaccc0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {-1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<float> axis_value = {1.0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {3};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-interpreter/src/kernels/Fill.cpp
new file mode 100644
index 000000000..e09d6331a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/Utils.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
+  : Kernel({dims, value}, {output})
+{
+}
+
+template <typename T> void Fill::configureShape()
+{
+  const auto dims_data = getTensorData<T>(dims());
+  Shape output_shape(dims()->shape().dim(0));
+
+  for (int i = 0; i < output_shape.num_dims(); ++i)
+  {
+    T data = dims_data[i];
+    if (data < 0)
+      throw std::runtime_error("Fill dimensions must be >= 0");
+
+    output_shape.dim(i) = data;
+  }
+
+  output()->resize(output_shape);
+}
+
+void Fill::configure()
+{
+  const auto dims_shape = dims()->shape();
+  const auto value_shape = value()->shape();
+
+  // Make sure the 1st input tensor is 1-D
+  LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
+
+  // Make sure the 1st input tensor is int32 or int64
+  LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
+                         dims()->element_type() == DataType::S64);
+
+  // Make sure the 2nd input tensor is a scalar
+  LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
+
+  // Check zero point and scale for S16 and S8
+  if (value()->element_type() == loco::DataType::S16 or
+      value()->element_type() == loco::DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
+    LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
+
+    if (value()->element_type() == loco::DataType::S16)
+      LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
+  }
+  // Resize output
+  switch (dims()->element_type())
+  {
+    case DataType::S32:
+      configureShape<int32_t>();
+      break;
+    case DataType::S64:
+      configureShape<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Fill::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::S8:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
+                                  getTensorShape(output()), getTensorData<int8_t>(output()));
+      break;
+    case DataType::S16:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
+                                  getTensorShape(output()), getTensorData<int16_t>(output()));
+      break;
+    case DataType::S32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
+                                  getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::S64:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
+                                  getTensorShape(output()), getTensorData<int64_t>(output()));
+      break;
+    case DataType::FLOAT32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Fill.h b/compiler/luci-interpreter/src/kernels/Fill.h
new file mode 100644
index 000000000..184f0cb83
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FILL_H
+#define LUCI_INTERPRETER_KERNELS_FILL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Fill : public Kernel
+{
+public:
+  Fill(const Tensor *dims, const Tensor *value, Tensor *output);
+
+  const Tensor *dims() const { return _inputs[0]; }
+  const Tensor *value() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void configureShape();
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FILL_H
diff --git a/compiler/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-interpreter/src/kernels/Fill.test.cpp
new file mode 100644
index 000000000..cf56df507
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.test.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FillTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager)
+{
+  Shape dims_shape{2};
+
+  std::vector<int32_t> dims_data = {2, 3};
+  std::vector<T> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+  Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager);
+
+  Tensor output_tensor = makeOutputTensor(DT);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5};
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager)
+{
+  Shape dims_shape{2};
+
+  std::vector<int32_t> dims_data = {2, 3};
+  std::vector<float> value_data = {5};
+
+  int32_t zero_point = 0;
+
+  if (DT == loco::DataType::S8)
+    zero_point = 1;
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+  Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point,
+                                     value_data, memory_manager);
+
+  Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, FillInt)
+{
+  // Run for int32_t input
+  runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get());
+  // Run for int64_t input
+  runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get());
+  // Run for int8_t input
+  runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get());
+  // Run for int16_t input
+  runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get());
+
+  SUCCEED();
+}
+
+TEST_F(FillTest, FillFloat)
+{
+  Shape dims_shape{3};
+
+  std::vector<int64_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value =
+    makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5};
+
+  std::vector<int32_t> ref_output_shape{2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, Invalid_Input_Shape_NEG)
+{
+  Shape dims_shape{1, 3};
+
+  std::vector<int32_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value =
+    makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FillTest, Invalid_Value_Shape_NEG)
+{
+  Shape dims_shape{3};
+
+  std::vector<int32_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-interpreter/src/kernels/Floor.cpp
new file mode 100644
index 000000000..e3c4246cc
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Floor.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/floor.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Floor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Floor::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Floor::evalFloat() const
+{
+  tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Floor.h b/compiler/luci-interpreter/src/kernels/Floor.h
new file mode 100644
index 000000000..ca3ad5997
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Floor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Floor : public Kernel
+{
+public:
+  Floor(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H
diff --git a/compiler/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-interpreter/src/kernels/Floor.test.cpp
new file mode 100644
index 000000000..30076fb54
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Floor.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorTest, SimpleFloat)
+{
+  std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0.2, 8.6, 2.4,  4.3,  // Row 1
+    3,   7.1, 10.5, -0.9, // Row 2
+  };
+
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1};
+  std::vector<float> ref_output_data{
+    0, 8, 2,  4,  // Row 1
+    3, 7, 10, -1, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.cpp
new file mode 100644
index 000000000..a7a10a336
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+void FloorDiv::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorDiv::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void FloorDiv::evalFloat() const
+{
+  auto FloorDivFunc = [](float x, float y) -> float {
+    return std::floor(static_cast<double>(x) / static_cast<double>(y));
+  };
+
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+
+  // Check the denominator
+  for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+  }
+
+  if (x()->shape() != y()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+  else
+  {
+    tflite::reference_ops::BinaryFunction<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.h b/compiler/luci-interpreter/src/kernels/FloorDiv.h
new file mode 100644
index 000000000..e9c47d81a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FloorDiv : public Kernel
+{
+public:
+  FloorDiv(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
new file mode 100644
index 000000000..3e1b5f18e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorDivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorDivTest, FloatSimple)
+{
+  Shape x_shape{2, 3};
+  std::vector<float> x_data{
+    0.5, 2.4,  3.1,  // Row 1
+    1.9, -1.9, -2.8, // Row 2
+  };
+
+  Shape y_shape = x_shape;
+  std::vector<float> y_data{
+    2.0, 0.5,  3.0,  // Row 1
+    1.0, -1.0, -2.0, // Row 2
+  };
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  std::vector<float> ref_output_data{
+    0, 4, 1, // Row 1
+    1, 1, 1, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, FloatBroadcast)
+{
+  Shape x_shape{1, 3};
+  std::vector<float> x_data{
+    0.5, 2.4, -3.1, // Row 1
+  };
+
+  Shape y_shape{3, 3};
+  std::vector<float> y_data{
+    1.0, 1.0,  1.0,  // Row 1
+    2.0, -0.5, -2.0, // Row 2
+    0.3, 0.7,  0.9,  // Row 3
+  };
+
+  std::vector<int32_t> ref_output_shape{3, 3};
+  std::vector<float> ref_output_data{
+    0, 2,  -4, // Row 1
+    0, -5, 1,  // Row 2
+    1, 3,  -4, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, DivByZero_NEG)
+{
+  Shape shape{3};
+  std::vector<float> x_data{1, 0, -1};
+  std::vector<float> y_data{0, 0, 0};
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorDivTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FloorMod.cpp b/compiler/luci-interpreter/src/kernels/FloorMod.cpp
new file mode 100644
index 000000000..a64fcad3a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorMod.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorMod.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace
+{
+
+template <typename T> T FloorDivFunc(T input1, T input2)
+{
+  struct FloatMod
+  {
+    float operator()(const float lhs, const float rhs) const { return std::fmod(lhs, rhs); }
+  };
+  using ModFunc =
+    typename std::conditional<std::is_integral<T>::value, std::modulus<T>, FloatMod>::type;
+  ModFunc mod_func;
+  T trunc_mod = mod_func(input1, input2);
+  return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0)) ? (trunc_mod + input2) : trunc_mod;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorMod::FloorMod(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void FloorMod::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorMod::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S8:
+      evalInteger<int8_t>();
+      break;
+    case DataType::S16:
+      evalInteger<int16_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void FloorMod::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+
+  if (x()->shape() != y()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc<float>);
+  }
+  else
+  {
+    tflite::reference_ops::BinaryFunction<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc<float>);
+  }
+}
+
+template <typename T> void FloorMod::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+
+  // Check the denominator
+  const auto y_data_type = y()->element_type();
+  if (y_data_type == DataType::S8 || y_data_type == DataType::S16 || y_data_type == DataType::S32 ||
+      y_data_type == DataType::S64)
+  {
+    for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+    {
+      LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+    }
+  }
+
+  if (x()->shape() != y()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<T>(output()), FloorDivFunc<T>);
+  }
+  else
+  {
+    tflite::reference_ops::BinaryFunction<T, T, T>(getTensorShape(x()), x_data, getTensorShape(y()),
+                                                   y_data, getTensorShape(output()),
+                                                   getTensorData<T>(output()), FloorDivFunc<T>);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FloorMod.h b/compiler/luci-interpreter/src/kernels/FloorMod.h
new file mode 100644
index 000000000..f2d9b2ae8
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorMod.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_MOD_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_MOD_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FloorMod : public Kernel
+{
+public:
+  FloorMod(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_MOD_H
diff --git a/compiler/luci-interpreter/src/kernels/FloorMod.test.cpp b/compiler/luci-interpreter/src/kernels/FloorMod.test.cpp
new file mode 100644
index 000000000..123a91e3a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorMod.test.cpp
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorMod.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorModTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorModTest, Simple)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<int32_t> input1_data{10, 9, 11, 3};
+
+  Shape input2_shape = input1_shape;
+  std::vector<int32_t> input2_data{2, 2, 3, 4};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<int32_t> ref_output_data{0, 1, 2, 3};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, NegativeValue)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<int32_t> input1_data{10, -9, -11, 7};
+
+  Shape input2_shape = input1_shape;
+  std::vector<int32_t> input2_data{2, 2, -3, -4};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<int32_t> ref_output_data{0, 1, -2, -1};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, BroadcastFloorMod)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<int32_t> input1_data{
+    10,
+    -9,
+    -11,
+    7,
+  };
+
+  Shape input2_shape{1};
+  std::vector<int32_t> input2_data{-3};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<int32_t> ref_output_data{-2, 0, -2, -2};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, Int64WithBroadcast)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<int64_t> input1_data{10, -9, -11, (1LL << 34) + 9};
+
+  Shape input2_shape{1};
+  std::vector<int64_t> input2_data{-(1LL << 33)};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<int64_t> ref_output_data{-8589934582, -9, -11, -8589934583};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S64>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S64>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int64_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, FloatSimple)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<float> input1_data{10.0, 9.0, 11.0, 3.0};
+
+  Shape input2_shape = input1_shape;
+  std::vector<float> input2_data{2.0, 2.0, 3.0, 4.0};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> ref_output_data{0.0, 1.0, 2.0, 3.0};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, FloatNegativeValue)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<float> input1_data{10.0, -9.0, -11.0, 7.0};
+
+  Shape input2_shape = input1_shape;
+  std::vector<float> input2_data{2.0, 2.0, -3.0, -4.0};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> ref_output_data{0.0, 1.0, -2.0, -1.0};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, FloatBroadcast)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<float> input1_data{
+    10.0,
+    -9.0,
+    -11.0,
+    7.0,
+  };
+
+  Shape input2_shape{1};
+  std::vector<float> input2_data{-3.0};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> ref_output_data{-2.0, 0.0, -2.0, -2.0};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, SimpleInt16)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<int16_t> input1_data{10, 9, 11, 3};
+
+  Shape input2_shape = input1_shape;
+  std::vector<int16_t> input2_data{2, 2, 3, 4};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<int16_t> ref_output_data{0, 1, 2, 3};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S16>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S16>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, NegativeValueInt16)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<int16_t> input1_data{110, -9, -11, 7};
+
+  Shape input2_shape = input1_shape;
+  std::vector<int16_t> input2_data{2, 2, -3, -4};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<int16_t> ref_output_data{0, 1, -2, -1};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S16>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S16>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, BroadcastFloorModInt16)
+{
+  Shape input1_shape{1, 2, 2, 1};
+  std::vector<int16_t> input1_data{10, -9, -11, 7};
+
+  Shape input2_shape{1};
+  std::vector<int16_t> input2_data{-3};
+
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<int16_t> ref_output_data{-2, 0, -2, -2};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S16>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S16>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorModTest, DivByZero_NEG)
+{
+  Shape shape{3};
+  std::vector<int32_t> input1_data{1, 0, -1};
+  std::vector<int32_t> input2_data{0, 0, 0};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S32>(shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>(shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(FloorModTest, Int64DivByZero_NEG)
+{
+  Shape shape{3};
+  std::vector<int64_t> input1_data{1, 0, -1};
+  std::vector<int64_t> input2_data{0, 0, 0};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S64>(shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>(shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(FloorModTest, Int16DivByZero_NEG)
+{
+  Shape shape{3};
+  std::vector<int16_t> input1_data{1, 0, -1};
+  std::vector<int16_t> input2_data{0, 0, 0};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S16>(shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>(shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(FloorModTest, Input_Output_Type_Mismatch_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorModTest, Input_Type_Mismatch_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorModTest, Float_Broadcast_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorModTest, Int64_Broadcast_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorModTest, Int32_Broadcast_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorModTest, Int16_Broadcast_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({2}, {1, 2}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorModTest, UnsupportedType_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+
+  _memory_manager->allocate_memory(output_tensor);
+  ASSERT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
index 6529c5e77..bd2bb2f35 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -18,7 +18,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include "PALFullyConnected.h"
 
 #include <stdexcept>
 
@@ -30,33 +30,80 @@ namespace kernels
 
 FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
                                Tensor *output, const FullyConnectedParams &params)
-    : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
+  : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
 {
 }
 
 void FullyConnected::configure()
 {
-  if (weights()->element_type() != DataType::FLOAT32)
+  if (weights()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+  }
+  else if (weights()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
+  }
+  else if (weights()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+  }
+  else
+  {
     throw std::runtime_error("Unsupported type.");
-
-  assert(input()->element_type() == DataType::FLOAT32);
-  assert(weights()->element_type() == DataType::FLOAT32);
-  assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
 
   const Shape &input_shape = input()->shape();
   const Shape &weights_shape = weights()->shape();
 
-  assert(weights_shape.num_dims() == 2);
-  assert(bias() == nullptr || bias()->shape().num_elements() == weights_shape.dim(0));
+  LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(bias() == nullptr ||
+                         bias()->shape().num_elements() == weights_shape.dim(0));
 
-  assert(input_shape.num_elements() % weights_shape.dim(1) == 0);
+  LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0);
   const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1);
   const int32_t num_units = weights_shape.dim(0);
 
-  output()->resize({batch_size, num_units});
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
+
+  if (params().keep_num_dims == false)
+  {
+    output()->resize({batch_size, num_units});
+  }
+  else
+  {
+    luci_interpreter::Shape output_shape(input_shape.num_dims());
+    for (int i = 0; i < input_shape.num_dims(); ++i)
+      output_shape.dim(i) = input_shape.dim(i);
+    output_shape.dim(input_shape.num_dims() - 1) = num_units;
+    output()->resize(output_shape);
+  }
 }
 
-void FullyConnected::execute() const { evalFloat(); }
+void FullyConnected::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S8:
+      evalQuantizedS8();
+      break;
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
 
 void FullyConnected::evalFloat() const
 {
@@ -70,9 +117,75 @@ void FullyConnected::evalFloat() const
   params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
 
   tflite::reference_ops::FullyConnected(
-      params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
-      getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
-      getTensorShape(output()), getTensorData<float>(output()));
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
+    getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void FullyConnected::evalQuantized() const
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier =
+    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+                                    &output_activation_max);
+
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -weights()->zero_point();
+  int32_t output_offset = output()->zero_point();
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+  tflite::reference_ops::FullyConnected(
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()),
+    getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void FullyConnected::evalQuantizedS8() const
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier =
+    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+                                    &output_activation_max);
+
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -weights()->zero_point();
+  int32_t output_offset = output()->zero_point();
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+  luci_interpreter_pal::FullyConnected<int8_t>(
+    op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
+    getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<int8_t>(output()));
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-interpreter/src/kernels/FullyConnected.h
index 2e3174c74..2a7c068c0 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.h
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.h
@@ -41,6 +41,8 @@ public:
 
 private:
   void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS8() const;
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
index 8077fcb5c..4474cc4fb 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/FullyConnected.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,40 +27,232 @@ namespace
 
 using namespace testing;
 
-TEST(FullyConnectedTest, Float)
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+           std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<int8_t>(std::initializer_list<int32_t> input_shape,
+                   std::initializer_list<int32_t> weights_shape,
+                   std::initializer_list<int32_t> bias_shape,
+                   std::initializer_list<int32_t> output_shape,
+                   std::initializer_list<float> input_data,
+                   std::initializer_list<float> weights_data,
+                   std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+  std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+  std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class FullyConnectedTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
+
+TYPED_TEST(FullyConnectedTest, Simple)
+{
+  Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
+                   {
+                     -3, -5, 5, 4, 9, -2,  // batch = 0
+                     -3, -2, -4, 9, -8, 1, // batch = 1
+                   },
+                   {
+                     -3, -7, 4, -4, -6, 4, // unit = 0
+                     3, 5, 2, 3, -3, -8,   // unit = 1
+                     -3, 7, 4, 9, 0, -5,   // unit = 2
+                   },
+                   {-1, -5, -8},
+                   {
+                     0, 0, 32,   // batch = 0
+                     22, 11, 47, // batch = 1
+                   });
+}
+
+TEST(FullyConnectedTest, InvalidBiasType_NEG)
 {
   Shape input_shape{3, 2, 2, 1};
   std::vector<float> input_data{
-      -3, -5, 5,  4, 9,  -2, // batch = 0
-      -3, -2, -4, 9, -8, 1,  // batch = 1
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
   };
   Shape weights_shape{3, 6};
   std::vector<float> weights_data{
-      -3, -7, 4, -4, -6, 4,  // unit = 0
-      3,  5,  2, 3,  -3, -8, // unit = 1
-      -3, 7,  4, 9,  0,  -5, // unit = 2
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
+  std::vector<int32_t> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{1, 3, 6};
+  std::vector<float> weights_data{
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
   };
   Shape bias_shape{3};
   std::vector<float> bias_data{-1, -5, -8};
 
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   FullyConnectedParams params{};
   params.activation = Activation::RELU;
 
   FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
-  kernel.configure();
-  kernel.execute();
+  EXPECT_ANY_THROW(kernel.configure());
+}
 
-  std::vector<float> ref_output_data{
-      0,  0,  32, // batch = 0
-      22, 11, 47, // batch = 1
+TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
   };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  Shape weights_shape{6, 3};
+  std::vector<float> weights_data{
+    -3, -7, 4,  // unit = 0
+    -4, -6, 4,  // unit = 1
+    3,  5,  2,  // unit = 2
+    3,  -3, -8, // unit = 3
+    -3, 7,  4,  // unit = 4
+    9,  0,  -5, // unit = 5
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-interpreter/src/kernels/Gather.cpp
new file mode 100644
index 000000000..f1256660f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+               const GatherParams &gparams)
+  : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+  if (params()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+                         indices()->element_type() == DataType::S64);
+
+  // refer tensorflow/lite/kernels/gather.cc
+
+  const Shape &params_shape = params()->shape();
+  const Shape &indices_shape = indices()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+  {
+    axis += params_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+  int batch_dims = _params.batch_dims;
+  // batch_dims should be in range: [-rank(indices), rank(indices)].
+  // Negative batch_dims is added with rank of positions.
+  if (batch_dims < 0)
+  {
+    batch_dims += indices_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+  LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+  LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+  for (int i = 0; i < batch_dims; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+  }
+
+  const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+  Shape output_shape(num_dimensions);
+  int output_index = 0;
+  for (int i = 0; i < axis; ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = indices_shape.dim(i);
+  }
+  for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+  switch (params()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Gather::evalFloat() const
+{
+  assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+  const auto params_data = getTensorData<float>(params());
+  auto output_data = getTensorData<float>(output());
+
+  tflite::GatherParams tparams;
+  tparams.axis = _params.axis;
+  tparams.batch_dims = _params.batch_dims;
+
+  if (indices()->element_type() == DataType::S32)
+  {
+    const auto indices_data = getTensorData<int32_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+  else
+  {
+    const auto indices_data = getTensorData<int64_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Gather.h b/compiler/luci-interpreter/src/kernels/Gather.h
new file mode 100644
index 000000000..cc02d64fb
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H
+#define LUCI_INTERPRETER_KERNELS_GATHER_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Gather : public KernelWithParams<GatherParams>
+{
+public:
+  Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams);
+
+  const Tensor *params() const { return _inputs[0]; }
+  const Tensor *indices() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GATHER_H
diff --git a/compiler/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-interpreter/src/kernels/Gather.test.cpp
new file mode 100644
index 000000000..4b3dda708
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GatherTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GatherTest, Simple)
+{
+  std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+  std::vector<int32_t> indices_data{1, 0, 1, 5};
+  std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 1;
+  gparams.batch_dims = 0;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4}));
+}
+
+TEST_F(GatherTest, Simple_Batch)
+{
+  Shape params_shape = {3, 5};
+  Shape indices_shape = {3, 2};
+  std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.};
+  std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3};
+  std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get());
+  Tensor indices_tensor =
+    makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 1;
+  gparams.batch_dims = 1;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2}));
+}
+
+TEST_F(GatherTest, Simple_NEG)
+{
+  Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Axis_NEG)
+{
+  Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 100;
+  gparams.batch_dims = 0;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Batch_NEG)
+{
+  std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+  std::vector<int32_t> indices_data{1, 0, 1, 5};
+  std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 0;
+  gparams.batch_dims = 1;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Gelu.cpp b/compiler/luci-interpreter/src/kernels/Gelu.cpp
new file mode 100644
index 000000000..44e018e0e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gelu.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gelu.h"
+
+#include "kernels/Utils.h"
+
+#include "PALGelu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gelu::Gelu(const Tensor *input, Tensor *output, const GeluParams &params)
+  : KernelWithParams<GeluParams>({input}, {output}, params)
+{
+}
+
+void Gelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void Gelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Gelu::evalFloat() const
+{
+  luci_interpreter_pal::Gelu(params().approximate, getTensorShape(input()),
+                             getTensorData<float>(input()), getTensorShape(output()),
+                             getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Gelu.h b/compiler/luci-interpreter/src/kernels/Gelu.h
new file mode 100644
index 000000000..c7c8bd971
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gelu.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GELU_H
+#define LUCI_INTERPRETER_KERNELS_GELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Gelu : public KernelWithParams<GeluParams>
+{
+public:
+  Gelu(const Tensor *input, Tensor *output, const GeluParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+
+private:
+  bool _approximate = false;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GELU_H
diff --git a/compiler/luci-interpreter/src/kernels/Gelu.test.cpp b/compiler/luci-interpreter/src/kernels/Gelu.test.cpp
new file mode 100644
index 000000000..64428098e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gelu.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data,
+           bool approximate)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<float>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  GeluParams params{};
+  params.approximate = approximate;
+
+  Gelu kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+}
+
+class GeluTest : public ::testing::Test
+{
+};
+
+TEST_F(GeluTest, Simple)
+{
+  Check(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
+        /*input_data=*/
+        {
+          0.0f, 1.0f, 3.0f,   // Row 1
+          1.0f, -1.0f, -2.0f, // Row 2
+        },
+        /*output_data=*/
+        {
+          0.0f, 0.841345f, 2.99595f,          // Row 1
+          0.841345f, -0.158655f, -0.0455003f, // Row 2
+        },
+        /*approximate=*/false);
+
+  SUCCEED();
+}
+
+TEST_F(GeluTest, Approximate)
+{
+  Check(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
+        /*input_data=*/
+        {
+          0.0f, 1.0f, 3.0f,   // Row 1
+          1.0f, -1.0f, -2.0f, // Row 2
+        },
+        /*output_data=*/
+        {
+          0.0f, 0.841192f, 2.99636f,          // Row 1
+          0.841192f, -0.158808f, -0.0454023f, // Row 2
+        },
+        /*approximate=*/true);
+
+  SUCCEED();
+}
+
+TEST_F(GeluTest, DifferentInOutType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3},
+                                                           {
+                                                             0.0f, 1.0f, 3.0f,   // Row 1
+                                                             1.0f, -1.0f, -2.0f, // Row 2
+                                                           },
+                                                           memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  GeluParams params{};
+  params.approximate = false;
+
+  Gelu kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-interpreter/src/kernels/Greater.cpp
new file mode 100644
index 000000000..5ccae3c38
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Greater.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Greater::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Greater::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Greater::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
+                                                  getTensorShape(y()), y_data,
+                                                  getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                   y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Greater::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+void Greater::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h
new file mode 100644
index 000000000..065f76d7b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Greater.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Greater : public Kernel
+{
+public:
+  Greater(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_H
diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
new file mode 100644
index 000000000..a48080124
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true,  // Row 1
+    true,  false, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(GreaterTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true,  // Row 1
+    true,  false, false, // Row 2
+    false, false, true,  // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    true,  true,  true,  // Row 2
+    true,  false, false, // Row 3
+    false, false, true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(GreaterTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(GreaterTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true,  false, // Row 1
+    true, true,  false, false, // Row 2
+    true, false, true,  false, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
new file mode 100644
index 000000000..27e42c971
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
+  : Kernel({x, y}, {output})
+{
+}
+
+void GreaterEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void GreaterEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void GreaterEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
+                                                       getTensorShape(y()), y_data,
+                                                       getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                        y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void GreaterEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                 getTensorShape(y()), y_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+void GreaterEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
new file mode 100644
index 000000000..e333c30a6
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class GreaterEqual : public Kernel
+{
+public:
+  GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
new file mode 100644
index 000000000..35bf88eab
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, true,  // Row 1
+    true,  true, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(GreaterEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true,  // Row 1
+    true,  false, false, // Row 2
+    false, false, true,  // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,             // Row 1
+    4,         5,  max_value,     // Row 2
+    -1,        -4, -3,            // Row 3
+    min_value, -2, max_value - 1, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    true,  true,  true,  // Row 2
+    true,  false, false, // Row 3
+    false, true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(GreaterEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(GreaterEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true, false, // Row 1
+    true, true,  true, false, // Row 2
+    true, false, true, false, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/HardSwish.cpp b/compiler/luci-interpreter/src/kernels/HardSwish.cpp
new file mode 100644
index 000000000..b1008459a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/HardSwish.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/HardSwish.h"
+#include "kernels/Utils.h"
+
+#include "PALHardSwish.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+HardSwish::HardSwish(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void HardSwish::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void HardSwish::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::HardSwish(getTensorShape(input()), getTensorData<float>(input()),
+                                      getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/HardSwish.h b/compiler/luci-interpreter/src/kernels/HardSwish.h
new file mode 100644
index 000000000..bb9e9b653
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/HardSwish.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_HARDSWISH_H
+#define LUCI_INTERPRETER_KERNELS_HARDSWISH_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class HardSwish : public Kernel
+{
+public:
+  HardSwish(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_HARDSWISH_H
diff --git a/compiler/luci-interpreter/src/kernels/HardSwish.test.cpp b/compiler/luci-interpreter/src/kernels/HardSwish.test.cpp
new file mode 100644
index 000000000..c055fee0e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/HardSwish.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/HardSwish.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  HardSwish kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  (void)output_shape;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+}
+
+TEST(HardSwishTest, SimpleHardSwish)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, -6, 2, -4,    //
+      3, -2, 10, -0.1, //
+    },
+    /*output_data=*/
+    {
+      0, -0, 1.66667, -0,           //
+      3, -0.333333, 10, -0.0483333, //
+    });
+}
+
+TEST(HardSwishTest, InOutTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, -6, 2,  -4,   //
+    3, -2, 10, -0.1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  HardSwish kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/If.cpp b/compiler/luci-interpreter/src/kernels/If.cpp
index e6bdee338..971708bca 100644
--- a/compiler/luci-interpreter/src/kernels/If.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "kernels/If.h"
+#include "kernels/Utils.h"
 
 #include <cstring>
 
@@ -33,21 +34,21 @@ static std::vector<const Tensor *> joinInputs(const Tensor *cond,
 
 If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
        RuntimeGraph *then_graph, RuntimeGraph *else_graph)
-    : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
-      _else_graph(else_graph)
+  : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
+    _else_graph(else_graph)
 {
 }
 
 void If::configure()
 {
-  assert(cond()->element_type() == DataType::BOOL);
-  assert(cond()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL);
+  LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1);
 
   for (RuntimeGraph *graph : {_then_graph, _else_graph})
   {
     (void)graph;
-    assert(graph->getInputTensors().size() == getInputTensors().size() - 1);
-    assert(graph->getOutputTensors().size() == getOutputTensors().size());
+    LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1);
+    LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size());
   }
 }
 
@@ -62,11 +63,13 @@ void If::execute() const
   // Copy kernel inputs to active graph inputs.
   for (size_t i = 0; i < getInputTensors().size() - 1; ++i)
   {
-    assert(graph_inputs[i]->element_type() == input(i)->element_type());
+    LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type());
     graph_inputs[i]->resize(input(i)->shape());
 
     const int32_t num_elements = input(i)->shape().num_elements();
     const std::size_t element_size = getDataTypeSize(input(i)->element_type());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(graph_inputs[i]);
     std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size);
   }
 
@@ -75,8 +78,10 @@ void If::execute() const
   // Copy graph outputs to kernel outputs.
   for (size_t i = 0; i < getOutputTensors().size(); ++i)
   {
-    assert(graph_outputs[i]->element_type() == output(i)->element_type());
+    LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type());
     output(i)->resize(graph_outputs[i]->shape());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(output(i));
 
     const int32_t num_elements = output(i)->shape().num_elements();
     const std::size_t element_size = getDataTypeSize(output(i)->element_type());
diff --git a/compiler/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-interpreter/src/kernels/If.test.cpp
index 9b3857ce3..c5f4faf75 100644
--- a/compiler/luci-interpreter/src/kernels/If.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.test.cpp
@@ -21,6 +21,8 @@
 #include "kernels/Mul.h"
 #include "kernels/TestUtils.h"
 
+#include "luci_interpreter/TestMemoryManager.h"
+
 namespace luci_interpreter
 {
 namespace kernels
@@ -30,15 +32,27 @@ namespace
 
 using namespace testing;
 
-RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
+class IfTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
 {
-  RuntimeGraph *graph = module->addGraph();
+  RuntimeGraph *graph = module->addGraph(memory_manager);
   Tensor *input1 = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
   Tensor *input2 = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
   Tensor *output = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
 
   graph->setInputTensors({input1, input2});
   graph->setOutputTensors({output});
@@ -50,15 +64,19 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
   return graph;
 }
 
-RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
+RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
 {
-  RuntimeGraph *graph = module->addGraph();
+  RuntimeGraph *graph = module->addGraph(memory_manager);
   Tensor *input1 = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
   Tensor *input2 = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
   Tensor *output = graph->addTensor(
-      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
 
   graph->setInputTensors({input1, input2});
   graph->setOutputTensors({output});
@@ -70,40 +88,72 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
   return graph;
 }
 
-TEST(IfTest, CondTrue)
+TEST_F(IfTest, CondTrue)
 {
-  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true});
-  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
-  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
   Tensor output = makeOutputTensor(DataType::FLOAT32);
 
   RuntimeModule module(nullptr);
-  RuntimeGraph *then_graph = buildAddSubgraph(&module);
-  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
 
   If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
   kernel.configure();
+  _memory_manager->allocate_memory(output);
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output), ElementsAreArray(ArrayFloatNear({6, 9})));
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9}));
 }
 
-TEST(IfTest, CondFalse)
+TEST_F(IfTest, CondFalse)
 {
-  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false});
-  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
-  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
   Tensor output = makeOutputTensor(DataType::FLOAT32);
 
   RuntimeModule module(nullptr);
-  RuntimeGraph *then_graph = buildAddSubgraph(&module);
-  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
 
   If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
   kernel.configure();
+  _memory_manager->allocate_memory(output);
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output), ElementsAreArray(ArrayFloatNear({5, 14})));
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14}));
+}
+
+TEST_F(IfTest, InvalidCondType_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(IfTest, InvalidCondElementNum_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp
new file mode 100644
index 000000000..22a329be6
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/InstanceNorm.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/common.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
+                           Tensor *output, const InstanceNormParams &params)
+  : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+{
+}
+
+void InstanceNorm::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
+                         gamma()->shape().dim(0) == 1);
+  LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
+                         beta()->shape().dim(0) == 1);
+  output()->resize(input()->shape());
+}
+
+void InstanceNorm::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void InstanceNorm::evalFloat() const
+{
+  float activation_min, activation_max;
+  calculateActivationRange(params().activation, &activation_min, &activation_max);
+  auto input_shape = getTensorShape(input());
+  auto output_shape = getTensorShape(output());
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+  const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+  const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+  const float *input_data = getTensorData<float>(input());
+  const float *gamma_data = getTensorData<float>(gamma());
+  auto gamma_shape = getTensorShape(gamma());
+  bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
+  const float *beta_data = getTensorData<float>(beta());
+  auto beta_shape = getTensorShape(beta());
+  bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
+  float *output_data = getTensorData<float>(output());
+  for (int32_t batch = 0; batch < batches; batch++)
+  {
+    for (int32_t channel = 0; channel < channels; channel++)
+    {
+      double sum = 0.0f;
+      double square_sum = 0.0f;
+      int32_t size = heights * widths;
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+          sum += input_val;
+          square_sum += (input_val * input_val);
+        }
+      }
+      double mean = sum / size;
+      double var = square_sum / size - mean * mean;
+
+      double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+      double beta = single_beta ? beta_data[0] : beta_data[channel];
+      double a = gamma / (std::sqrt(var + params().epsilon));
+      double b = -mean * a + beta;
+
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_value =
+            input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+          double output_value = input_value * a + b;
+          output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
+            tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+                                                 activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.h b/compiler/luci-interpreter/src/kernels/InstanceNorm.h
new file mode 100644
index 000000000..a70a84e0a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+#define LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class InstanceNorm : public KernelWithParams<InstanceNormParams>
+{
+public:
+  InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, Tensor *output,
+               const InstanceNormParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *gamma() const { return _inputs[1]; }
+  const Tensor *beta() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
new file mode 100644
index 000000000..04400c3c0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/InstanceNorm.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class InstanceNormTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(InstanceNormTest, Simple)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST_F(InstanceNormTest, Single_gamma_beta)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
+}
+
+TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
index cfa535075..64222953f 100644
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -17,7 +17,7 @@
 #include "kernels/L2Normalize.h"
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALL2Normalize.h"
 
 #include <stdexcept>
 
@@ -28,21 +28,22 @@ namespace kernels
 {
 
 L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
-    : KernelWithParams<L2NormParams>({input}, {output}, params)
+  : KernelWithParams<L2NormParams>({input}, {output}, params)
 {
 }
 
 void L2Normalize::configure()
 {
-  assert(input()->shape().num_dims() <= 4);
-  assert(output()->element_type() == DataType::FLOAT32 || output()->element_type() == DataType::U8);
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   if (output()->element_type() == DataType::U8)
   {
-    assert(output()->scale() == (1. / 128.));
-    assert(output()->zero_point() == 128);
+    LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
   }
-  assert(params().activation == Activation::NONE);
+  LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
   output()->resize(input()->shape());
 }
 
@@ -65,9 +66,9 @@ template <typename T> void L2Normalize::eval(int32_t zero_point) const
 {
   tflite::L2NormalizationParams op_params{};
   op_params.input_zero_point = zero_point;
-  tflite::optimized_ops::L2Normalization(op_params, getTensorShape(input()),
-                                         getTensorData<T>(input()), getTensorShape(output()),
-                                         getTensorData<T>(output()));
+  luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+                                        getTensorData<T>(input()), getTensorShape(output()),
+                                        getTensorData<T>(output()));
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
index f53eaca94..6f960e8b4 100644
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -16,6 +16,7 @@
  */
 #include "kernels/L2Normalize.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,11 +27,13 @@ namespace
 
 using namespace testing;
 
-TEST(L2NormalizeTest, Float)
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
-
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   L2NormParams params{};
@@ -38,16 +41,85 @@ TEST(L2NormalizeTest, Float)
 
   L2Normalize kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::pair<float, int32_t> quant_param =
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  std::vector<float> ref_output_data{-0.55, 0.3, 0.35, 0.6, -0.35, 0.05};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class L2NormalizeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(L2NormalizeTest, DataTypes);
+
+TYPED_TEST(L2NormalizeTest, Simple)
+{
+  Check<TypeParam>({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1},
+                   {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05});
 }
 
-// TODO Uint8Quantized
-// Implement GetDequantizedOutput Function.
-// Create Test for Uint8 Case
+TEST(L2NormalizeTest, ActivationType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::RELU6;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
 
 } // namespace
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
index 37a6ddedc..5a88808d5 100644
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -19,7 +19,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALL2Pool2D.h"
 
 #include <stdexcept>
 
@@ -30,14 +30,14 @@ namespace kernels
 {
 
 L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
 {
 }
 
 void L2Pool2D::configure()
 {
-  assert(input()->shape().num_dims() == 4);
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
 
   int batches = input()->shape().dim(0);
   int height = input()->shape().dim(1);
@@ -49,13 +49,13 @@ void L2Pool2D::configure()
   int out_width, out_height;
   out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
   out_height =
-      computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+    computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
   _padding_width =
-      computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+    computePadding(params().stride_width, 1, width, params().filter_width, out_width);
   _padding_height =
-      computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+    computePadding(params().stride_height, 1, height, params().filter_height, out_height);
 
-  assert(input()->element_type() == DataType::FLOAT32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
   output()->resize({batches, out_height, out_width, channels_out});
 }
 
@@ -75,9 +75,9 @@ void L2Pool2D::execute() const
       op_params.padding_values.width = _padding_width;
       op_params.float_activation_min = activation_min;
       op_params.float_activation_max = activation_max;
-      tflite::optimized_ops::L2Pool(op_params, getTensorShape(input()),
-                                    getTensorData<float>(input()), getTensorShape(output()),
-                                    getTensorData<float>(output()));
+      luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+                                   getTensorData<float>(input()), getTensorShape(output()),
+                                   getTensorData<float>(output()));
       break;
     default:
       throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
index 06bb9388f..7245456cb 100644
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/L2Pool2D.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -27,14 +28,23 @@ namespace
 
 using namespace testing;
 
-TEST(L2Pool2DTest, FloatNone)
+class L2Pool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(L2Pool2DTest, FloatNone)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pool2DParams params{};
@@ -47,22 +57,23 @@ TEST(L2Pool2DTest, FloatNone)
 
   L2Pool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{3.5, 6.5};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
-TEST(L2Pool2DTest, FloatRelu)
+TEST_F(L2Pool2DTest, FloatRelu)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
-      -1, -6, 2,  4, //
-      -3, -2, 10, 7, //
+    -1, -6, 2,  4, //
+    -3, -2, 10, 7, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pool2DParams params{};
@@ -75,22 +86,23 @@ TEST(L2Pool2DTest, FloatRelu)
 
   L2Pool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{3.53553, 6.5};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
-TEST(L2Pool2DTest, FloatRelu1)
+TEST_F(L2Pool2DTest, FloatRelu1)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
-      -0.1, -0.6, 2,  4, //
-      -0.3, -0.2, 10, 7, //
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pool2DParams params{};
@@ -103,22 +115,23 @@ TEST(L2Pool2DTest, FloatRelu1)
 
   L2Pool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{0.353553, 1.0};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
-TEST(L2Pool2DTest, FloatRelu6)
+TEST_F(L2Pool2DTest, FloatRelu6)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
-      -0.1, -0.6, 2,  4, //
-      -0.3, -0.2, 10, 7, //
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pool2DParams params{};
@@ -131,22 +144,23 @@ TEST(L2Pool2DTest, FloatRelu6)
 
   L2Pool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{0.353553, 6.0};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
-TEST(L2Pool2DTest, FloatPaddingSame)
+TEST_F(L2Pool2DTest, FloatPaddingSame)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pool2DParams params{};
@@ -159,22 +173,23 @@ TEST(L2Pool2DTest, FloatPaddingSame)
 
   L2Pool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{3.5, 6.5};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
-TEST(L2Pool2DTest, FloatPaddingSameSlide1)
+TEST_F(L2Pool2DTest, FloatPaddingSameStride)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pool2DParams params{};
@@ -187,22 +202,24 @@ TEST(L2Pool2DTest, FloatPaddingSameSlide1)
 
   L2Pool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f));
   // TODO make a Shape checking of output_tensor.
 }
 
-TEST(L2Pool2DTest, FloatPaddingValidSlide1)
+TEST_F(L2Pool2DTest, FloatPaddingValidStride)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
-      0, 6, 2,  4, //
-      3, 2, 10, 7, //
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pool2DParams params{};
@@ -215,14 +232,60 @@ TEST(L2Pool2DTest, FloatPaddingValidSlide1)
 
   L2Pool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{3.5, 6.0, 6.5};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
+TEST_F(L2Pool2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
index 1a26debe0..3833a55e8 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -18,8 +18,9 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+#include "PALLeakyRelu.h"
 
 #include <stdexcept>
 
@@ -30,13 +31,13 @@ namespace kernels
 {
 
 LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params)
-    : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+  : KernelWithParams<LeakyReluParams>({input}, {output}, params)
 {
 }
 
 void LeakyRelu::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   if (input()->element_type() == DataType::U8)
   {
     double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
@@ -66,9 +67,8 @@ void LeakyRelu::evalFloat() const
 {
   tflite::LeakyReluParams op_params{};
   op_params.alpha = params().alpha;
-  tflite::optimized_ops::LeakyRelu(op_params, getTensorShape(input()),
-                                   getTensorData<float>(input()), getTensorShape(output()),
-                                   getTensorData<float>(output()));
+  luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
 }
 
 void LeakyRelu::evalQuantized() const
@@ -82,8 +82,8 @@ void LeakyRelu::evalQuantized() const
   op_params.output_shift_identity = _output_shift_identity;
 
   tflite::reference_ops::QuantizeLeakyRelu(
-      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
-      getTensorData<uint8_t>(output()));
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+    getTensorData<uint8_t>(output()));
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
index c79d3d6bc..0f6263b57 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/LeakyRelu.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -28,12 +29,13 @@ using namespace testing;
 
 template <typename T>
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
-           std::initializer_list<T> input_data, std::initializer_list<T> output_data, float alpha,
-           DataType element_type)
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data,
+           float alpha)
 {
-  Tensor input_tensor{element_type, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
-
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(element_type);
 
   LeakyReluParams params{};
@@ -42,32 +44,83 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
   LeakyRelu kernel(&input_tensor, &output_tensor, params);
 
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  (void)output_shape;
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
   EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
 }
 
-TEST(LeakReluTest, FloatSimple)
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data, float alpha)
 {
-  Check<float>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, /*input_data=*/
-               {
-                   0.0f, 1.0f, 3.0f,   // Row 1
-                   1.0f, -1.0f, -2.0f, // Row 2
-               },
-               /*output_data=*/
-               {
-                   0.0f, 1.0f, 3.0f,   // Row 1
-                   1.0f, -0.5f, -1.0f, // Row 2
-               },
-               /*alpha=*/0.5f, getElementType<float>());
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  LeakyReluParams params{};
+  params.alpha = alpha;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class LeakReluTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LeakReluTest, DataTypes);
+
+TYPED_TEST(LeakReluTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
+                   /*input_data=*/
+                   {
+                     0.0f, 1.0f, 3.0f,   // Row 1
+                     1.0f, -1.0f, -2.0f, // Row 2
+                   },
+                   /*output_data=*/
+                   {
+                     0.0f, 1.0f, 3.0f,   // Row 1
+                     1.0f, -0.5f, -1.0f, // Row 2
+                   },
+                   /*alpha=*/0.5f);
 
   SUCCEED();
 }
 
-// TODO Uint8Simple
-// Implement GetDequantizedOutput Function.
-// Create Test for Uint8 Case
+TEST(LeakReluTest, IvalidInputOutputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3},
+                                                           {
+                                                             0.0f, 1.0f, 3.0f,   // Row 1
+                                                             1.0f, -1.0f, -2.0f, // Row 2
+                                                           },
+                                                           memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LeakyReluParams params{};
+  params.alpha = 0.5f;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
 
 } // namespace
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-interpreter/src/kernels/Less.cpp
new file mode 100644
index 000000000..8d26ff297
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Less.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Less::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Less::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Less::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+                                getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Less::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+                                                        getTensorShape(y()), y_data,
+                                                        getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+                                         getTensorShape(y()), y_data, getTensorShape(output()),
+                                         output_data);
+  }
+}
+
+void Less::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
+                                                          getTensorShape(y()), y_data,
+                                                          getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
+                                           getTensorShape(y()), y_data, getTensorShape(output()),
+                                           output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h
new file mode 100644
index 000000000..e27bb689c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Less.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_H
+#define LUCI_INTERPRETER_KERNELS_LESS_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Less : public Kernel
+{
+public:
+  Less(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_H
diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp
new file mode 100644
index 000000000..8c5963363
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LessTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, false, true,  // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(LessTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, true,  true,  // Row 2
+    true,  true,  false, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    false, false, false, // Row 2
+    false, true,  true,  // Row 3
+    true,  false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(LessTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(LessTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, false, // Row 1
+    false, true,  false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, false, // Row 1
+    false, true,  false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, true, // Row 1
+    false, false, false, true, // Row 2
+    false, true,  false, true, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
new file mode 100644
index 000000000..b474bc47a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void LessEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void LessEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LessEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
+                                                    getTensorShape(y()), y_data,
+                                                    getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                     y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void LessEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+void LessEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h
new file mode 100644
index 000000000..f82ea90d4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LessEqual : public Kernel
+{
+public:
+  LessEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
new file mode 100644
index 000000000..b2e2fa7a1
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LessEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, // Row 1
+    false, true, true,  // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(LessEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, // Row 1
+    false, true, true,  // Row 2
+    true,  true, false, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    false, false, false, // Row 2
+    false, true,  true,  // Row 3
+    true,  true,  false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(LessEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(LessEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, false, // Row 1
+    false, true, false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, false, // Row 1
+    false, true, false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, true, // Row 1
+    false, false, true,  true, // Row 2
+    false, true,  false, true, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
index 08efa1d6a..a2bf442b0 100644
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -18,7 +18,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALLocalResponseNormalization.h"
 
 #include <stdexcept>
 
@@ -29,16 +29,16 @@ namespace kernels
 {
 
 LocalResponseNormalization::LocalResponseNormalization(
-    const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
-    : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+  const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
+  : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
 {
 }
 
 void LocalResponseNormalization::configure()
 {
-  assert(input()->shape().num_dims() == 4);
-  assert(output()->element_type() == DataType::FLOAT32);
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   output()->resize(input()->shape());
 }
 
@@ -52,9 +52,9 @@ void LocalResponseNormalization::execute() const
       op_params.bias = params().bias;
       op_params.alpha = params().alpha;
       op_params.beta = params().beta;
-      tflite::optimized_ops::LocalResponseNormalization(
-          op_params, getTensorShape(input()), getTensorData<float>(input()),
-          getTensorShape(output()), getTensorData<float>(output()));
+      luci_interpreter_pal::LocalResponseNormalization(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+        getTensorData<float>(output()));
       break;
     default:
       throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
index 4191bdb29..4a9d4739f 100644
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/LocalResponseNormalization.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -27,10 +28,18 @@ namespace
 
 using namespace testing;
 
-TEST(LocalResponseNormalizationTest, SameAsL2Norm)
+class LocalResponseNormalizationTest : public ::testing::Test
 {
-  Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LocalResponseNormalizationTest, SameAsL2Norm)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   LocalResponseNormalizationParams params{};
@@ -41,16 +50,17 @@ TEST(LocalResponseNormalizationTest, SameAsL2Norm)
 
   LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})));
+              FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
 }
 
-TEST(LocalResponseNormalizationTest, WithAlpha)
+TEST_F(LocalResponseNormalizationTest, WithAlpha)
 {
-  Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   LocalResponseNormalizationParams params{};
@@ -61,16 +71,17 @@ TEST(LocalResponseNormalizationTest, WithAlpha)
 
   LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025})));
+              FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}));
 }
 
-TEST(LocalResponseNormalizationTest, WithBias)
+TEST_F(LocalResponseNormalizationTest, WithBias)
 {
-  Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   LocalResponseNormalizationParams params{};
@@ -81,16 +92,17 @@ TEST(LocalResponseNormalizationTest, WithBias)
 
   LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02})));
+              FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}));
 }
 
-TEST(LocalResponseNormalizationTest, SmallRadius)
+TEST_F(LocalResponseNormalizationTest, SmallRadius)
 {
-  Tensor input_tensor =
-      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   LocalResponseNormalizationParams params{};
@@ -101,11 +113,43 @@ TEST(LocalResponseNormalizationTest, SmallRadius)
 
   LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(
-                  ArrayFloatNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266})));
+              FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}));
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Log.cpp b/compiler/luci-interpreter/src/kernels/Log.cpp
new file mode 100644
index 000000000..fa5f90e66
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Log.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Log.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Log::Log(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Log::configure() { output()->resize(input()->shape()); }
+
+void Log::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Log::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = input()->shape();
+  auto output_data = getTensorData<float>(output());
+  for (int64_t i = 0; i < input_shape.num_elements(); ++i)
+  {
+    output_data[i] = std::log(input_data[i]);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Log.h b/compiler/luci-interpreter/src/kernels/Log.h
new file mode 100644
index 000000000..49b293764
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Log.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOG_H
+#define LUCI_INTERPRETER_KERNELS_LOG_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Log : public Kernel
+{
+public:
+  Log(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOG_H
diff --git a/compiler/luci-interpreter/src/kernels/Log.test.cpp b/compiler/luci-interpreter/src/kernels/Log.test.cpp
new file mode 100644
index 000000000..3e360e098
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Log.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Log.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogTest, FloatSimple)
+{
+  std::vector<float> input_data{1, 3.1415926, 1, 1};
+
+  std::vector<float> ref_output_data{0, 1.14473, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 4, 1}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Log kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 4, 1}));
+}
+
+TEST_F(LogTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Log kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
new file mode 100644
index 000000000..79c315338
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
+
+#include "PALLogSoftmax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogSoftmax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
+
+    tflite::SoftmaxParams params{};
+
+    params.table = _table;
+    params.beta = 1.0;
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+  }
+  output()->resize(input()->shape());
+}
+
+void LogSoftmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LogSoftmax::evalFloat() const
+{
+  tflite::SoftmaxParams params{};
+  tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
+                                    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LogSoftmax::evalQuantized() const
+{
+  const auto input_shape = getTensorShape(input());
+  const auto output_shape = getTensorShape(output());
+  const auto input_scale = input()->scale();
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  const float beta = 1.0;
+
+  tflite::SoftmaxParams params{};
+
+  params.table = const_cast<float *>(_table);
+  params.zero_point = output()->zero_point();
+  params.scale = output()->scale();
+
+  luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
+  luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                   output_data);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.h b/compiler/luci-interpreter/src/kernels/LogSoftmax.h
new file mode 100644
index 000000000..18477fbe3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogSoftmax : public Kernel
+{
+public:
+  LogSoftmax(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+  float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
new file mode 100644
index 000000000..50dcd5c28
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogSoftmaxTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogSoftmaxTest, Float)
+{
+  Shape input_shape{2, 4};
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(LogSoftmaxTest, Uint8)
+{
+  float kMin = -10;
+  float kMax = 10;
+  float kLogSoftmaxQuantizedTolerance = 16. / 256;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
+}
+
+TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG)
+{
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp
new file mode 100644
index 000000000..8e7263231
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalAnd::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalAnd::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalAnd();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+inline void LogicalAnd::evalLogicalAnd() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+                        getTensorShape(input2()), getTensorData<bool>(input2()),
+                        getTensorShape(output()), getTensorData<bool>(output()),
+                        [](bool x, bool y) { return x && y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.h b/compiler/luci-interpreter/src/kernels/LogicalAnd.h
new file mode 100644
index 000000000..46b889986
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALAND_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALAND_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalAnd : public Kernel
+{
+public:
+  LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  inline void evalLogicalAnd() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALAND_H
diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp
new file mode 100644
index 000000000..21b7951e0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalAndTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalAndTest, Basic)
+{
+  Shape input_shape{1, 1, 1, 4};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, false));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalAndTest, Broadcast)
+{
+  Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalAndTest, MismatchInputType_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalAndTest, InputTypeInvalid_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.cpp b/compiler/luci-interpreter/src/kernels/LogicalNot.cpp
new file mode 100644
index 000000000..65ab961aa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalNot.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogicalNot::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void LogicalNot::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalNot();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+inline void LogicalNot::evalLogicalNot() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  bool *output_data = getTensorData<bool>(output());
+  const bool *input_data = getTensorData<bool>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = !input_data[i];
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.h b/compiler/luci-interpreter/src/kernels/LogicalNot.h
new file mode 100644
index 000000000..1608fafa5
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalNot.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalNot : public Kernel
+{
+public:
+  LogicalNot(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  inline void evalLogicalNot() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp
new file mode 100644
index 000000000..3cbf27f6b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalNotTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalNotTest, Basic)
+{
+  Shape input_shape{1, 1, 1, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(false, true, true, false));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalNotTest, OutputTypeInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                        _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalNotTest, InputTypeInvalid_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
new file mode 100644
index 000000000..f289ca64f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+
+#include "kernels/Utils.h"
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalOr::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL);
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalOr::execute() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+                        getTensorShape(input2()), getTensorData<bool>(input2()),
+                        getTensorShape(output()), getTensorData<bool>(output()),
+                        [](bool x, bool y) { return x || y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.h b/compiler/luci-interpreter/src/kernels/LogicalOr.h
new file mode 100644
index 000000000..88606483f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalOr.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALOR_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalOr : public Kernel
+{
+public:
+  LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALOR_H
diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp
new file mode 100644
index 000000000..d65a69a5e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalOrTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalOrTest, Basic)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false},
+                                                         _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, true, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalOrTest, Broadcast)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalOrTest, MismatchInputType_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalOrTest, InputTypeInvalid_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-interpreter/src/kernels/Logistic.cpp
index c7d45615c..58e4f185d 100644
--- a/compiler/luci-interpreter/src/kernels/Logistic.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.cpp
@@ -18,7 +18,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/logistic.h>
 
 namespace luci_interpreter
 {
@@ -29,10 +29,10 @@ Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {outpu
 
 void Logistic::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   if (input()->element_type() == DataType::U8)
   {
-    assert(output()->scale() == 1. / 256);
+    LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256);
     populateLookupTable();
   }
   output()->resize(input()->shape());
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
index 00feddf3d..5a1ea669c 100644
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Logistic.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,31 +27,121 @@ namespace
 
 using namespace testing;
 
-TEST(LogisticTest, Float)
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Shape input_shape{1, 2, 4, 1};
-  std::vector<float> input_data{
-      0, -6, 2,  4, //
-      3, -2, 10, 1, //
-  };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<T>());
 
   Logistic kernel(&input_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  std::vector<float> ref_output_data{
-      0.5,      0.002473, 0.880797, 0.982014, //
-      0.952574, 0.119203, 0.999955, 0.731059, //
-  };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
-  // TODO make a Shape checking of output_tensor.
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale() * 2));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class LogisticTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
+
+TYPED_TEST(LogisticTest, Simple)
+{
+  Check<TypeParam>(
+    {89}, {89},
+    {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+     -8.6363636364,  -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+     -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+     -5.9090909091,  -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+     -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+     -3.1818181818,  -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+     -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+     -0.4545454545,  -0.2272727273, 0.0000000000,  0.2272727273,  0.4545454545,  0.6818181818,
+     0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,  1.8181818182,  2.0454545455,
+     2.2727272727,   2.5000000000,  2.7272727273,  2.9545454545,  3.1818181818,  3.4090909091,
+     3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,  4.5454545455,  4.7727272727,
+     5.0000000000,   5.2272727273,  5.4545454545,  5.6818181818,  5.9090909091,  6.1363636364,
+     6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,  7.2727272727,  7.5000000000,
+     7.7272727273,   7.9545454545,  8.1818181818,  8.4090909091,  8.6363636364,  8.8636363636,
+     9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,  10.0000000000},
+    {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+     0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+     0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+     0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+     0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+     0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+     0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+     0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+     0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+     0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+     0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+     0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+     0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+     0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+     0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
 }
 
-// TODO Uint8
-// Need to Implement GetDequantizedOutput Function.
+TEST(LogisticTest, IvalidInputOutputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape = {1};
+  std::vector<float> input_data{10};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LogisticTest, IvalidQuantParam_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape = {2};
+  std::vector<float> input_data{-10, 10};
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
 
 } // namespace
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
index afecf9058..8d9760ff2 100644
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
@@ -18,6 +18,7 @@
 
 #include "kernels/Utils.h"
 
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
 #include <tensorflow/lite/kernels/internal/reference/pooling.h>
 
 #include <stdexcept>
@@ -29,13 +30,13 @@ namespace kernels
 {
 
 MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
 {
 }
 
 void MaxPool2D::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   assert(input()->shape().num_dims() == 4);
   const Shape &input_shape = input()->shape();
   const int32_t batches = input_shape.dim(0);
@@ -43,21 +44,26 @@ void MaxPool2D::configure()
   const int32_t input_width = input_shape.dim(2);
   const int32_t depth = input_shape.dim(3);
 
-  const int32_t output_height = computeOutputSize(_params.padding, input_height,
-                                                  _params.filter_height, _params.stride_height);
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
   const int32_t output_width =
-      computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
 
   _padding_height =
-      computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
   _padding_width =
-      computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
 
   output()->resize({batches, output_height, output_width, depth});
-  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
+  if (input()->element_type() == DataType::U8)
   {
-    assert(input()->scale() == output()->scale());
-    assert(input()->zero_point() == output()->zero_point());
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
   }
 }
 
@@ -71,6 +77,9 @@ void MaxPool2D::execute() const
     case DataType::U8:
       evalQuantized();
       break;
+    case DataType::S16:
+      evalSInt16();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -116,5 +125,26 @@ void MaxPool2D::evalQuantized() const
                                  getTensorShape(output()), getTensorData<uint8_t>(output()));
 }
 
+void MaxPool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::MaxPool(
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-interpreter/src/kernels/MaxPool2D.h
index 7a59ff022..bb7666305 100644
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.h
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.h
@@ -39,6 +39,7 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalSInt16() const;
 
 private:
   int32_t _padding_height{};
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
index 390255d89..44f2a222f 100644
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/MaxPool2D.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,15 +27,24 @@ namespace
 
 using namespace testing;
 
-TEST(MaxPool2DTest, Float)
+class MaxPool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaxPool2DTest, Float)
 {
   Shape input_shape{1, 3, 5, 1};
   std::vector<float> input_data{
-      1,  -1, 0,  -2, 2,  //
-      -7, -6, -5, -4, -3, //
-      5,  4,  3,  6,  7,  //
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pool2DParams params{};
@@ -47,30 +57,28 @@ TEST(MaxPool2DTest, Float)
 
   MaxPool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      1, 2, //
-      5, 6, //
+    1, 2, //
+    5, 6, //
   };
   std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
-TEST(MaxPool2DTest, Uint8)
+TEST_F(MaxPool2DTest, Uint8)
 {
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
   std::vector<float> input_data{
-      0,  -6, 12, 4, //
-      -3, -2, 10, 7, //
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
   };
-  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   Pool2DParams params{};
   params.padding = Padding::VALID;
@@ -82,14 +90,48 @@ TEST(MaxPool2DTest, Uint8)
 
   MaxPool2D kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{0.0, 6.0};
   std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                                  output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
+  };
+  std::vector<float> ref_output_data{
+    1, 2, //
+    5, 6, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-interpreter/src/kernels/Maximum.cpp
new file mode 100644
index 000000000..b102b5e27
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Maximum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Maximum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Maximum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMaximum<float>();
+      break;
+    case DataType::U8:
+      evalMaximum<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void Maximum::evalMaximum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::max(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.h b/compiler/luci-interpreter/src/kernels/Maximum.h
new file mode 100644
index 000000000..3c99e69c7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Maximum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Maximum : public Kernel
+{
+public:
+  Maximum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMaximum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
new file mode 100644
index 000000000..e4a505b03
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaximumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaximumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MaximumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp
index 2394e2c0e..8e65e0d6d 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.cpp
@@ -19,7 +19,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
 
 #include <stdexcept>
 
@@ -28,7 +28,7 @@ namespace luci_interpreter
 namespace kernels
 {
 
-static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *params)
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
 {
   params->axis_count = num_axes;
   for (int i = 0; i < num_axes; ++i)
@@ -42,7 +42,7 @@ static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *
 }
 
 // Returns the number of axes that will be reduced. Removes duplicates.
-static int getAxisReductionCount(const int *axes_data, int num_axes, int input_num_dims)
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
 {
   int reduction_count = num_axes;
   for (int i = 0; i < num_axes; ++i)
@@ -63,7 +63,7 @@ static int getAxisReductionCount(const int *axes_data, int num_axes, int input_n
   return reduction_count;
 }
 
-static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int num_axes,
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
                             bool keep_dims)
 {
   int input_num_dims = input_shape.num_dims();
@@ -123,15 +123,22 @@ static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int
   }
 }
 
-Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params)
-    : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+           Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+                                    params)
 {
 }
 
 void Mean::configure()
 {
-  assert(input()->element_type() == output()->element_type());
-  assert(axes()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
   const Shape &input_shape = input()->shape();
   int input_num_dims = input_shape.num_dims();
 
@@ -144,18 +151,28 @@ void Mean::configure()
 
   tflite::MeanParams params{};
   resolveAxes(axes_data, num_axes, &params);
-  const bool need_temporaries =
-      !(_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
-        ((params.axis[0] == 1 && params.axis[1] == 2) ||
-         (params.axis[0] == 2 && params.axis[1] == 1)));
-  if (need_temporaries)
-  {
-    _temp_index =
-        std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
-    _resolved_axes =
-        std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
-    _temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(),
-                                         AffineQuantization{}, "");
+  _need_temporaries = !(
+    _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+    ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
+  if (_need_temporaries)
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->resize(Shape(input_num_dims));
+    resolved_axes->resize(Shape(num_axes));
+    temp_sum->resize(output()->shape());
+  }
+  else
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->set_allocatable(false);
+    resolved_axes->set_allocatable(false);
+    temp_sum->set_allocatable(false);
   }
 }
 
@@ -169,6 +186,9 @@ void Mean::execute() const
     case DataType::U8:
       evalQuantized();
       break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -184,6 +204,10 @@ void Mean::evalFloat() const
   tflite::MeanParams params{};
   resolveAxes(axes_data, num_axes, &params);
 
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
   // Defer to specialized implementation for 4D Mean across axes 1 & 2.
   if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
       ((params.axis[0] == 1 && params.axis[1] == 2) ||
@@ -194,12 +218,12 @@ void Mean::evalFloat() const
   }
   else
   {
-    tflite::reference_ops::Mean(
-        getTensorData<float>(input()), getTensorShape(input()).DimsData(),
-        input()->shape().num_dims(), getTensorData<float>(output()),
-        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-        _params.keep_dims, getTensorData<int>(_temp_index.get()),
-        getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
+    tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<float>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<float>(temp_sum));
   }
 }
 
@@ -213,6 +237,10 @@ void Mean::evalQuantized() const
   tflite::MeanParams params{};
   resolveAxes(axes_data, num_axes, &params);
 
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
   // Defer to specialized implementation for 4D Mean across axes 1 & 2.
   if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
       ((params.axis[0] == 1 && params.axis[1] == 2) ||
@@ -225,23 +253,92 @@ void Mean::evalQuantized() const
   }
   else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
   {
-    tflite::reference_ops::Mean(
-        getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
-        input()->shape().num_dims(), getTensorData<uint8_t>(output()),
-        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-        _params.keep_dims, getTensorData<int>(_temp_index.get()),
-        getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
+    tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<int>(temp_sum));
   }
   else
   {
     tflite::reference_ops::QuantizedMeanOrSum<>(
-        getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
-        getTensorShape(input()).DimsData(), input()->shape().num_dims(),
-        getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
-        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-        _params.keep_dims, getTensorData<int>(_temp_index.get()),
-        getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
-        /*compute_sum=*/false);
+      getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+      getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+      getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+      _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+      getTensorData<int>(temp_sum),
+      /*compute_sum=*/false);
+  }
+}
+
+void Mean::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  const int num_axes = axes()->shape().num_elements();
+
+  constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+      ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+  {
+    const int32_t batches = input_shape.dim(0);
+    const int32_t input_height = input_shape.dim(1);
+    const int32_t input_width = input_shape.dim(2);
+    const int32_t depth = input_shape.dim(3);
+    assert(output_shape.num_dims() == 4);
+    assert(output_shape.dim(0) == batches);
+    assert(output_shape.dim(1) == 1);
+    assert(output_shape.dim(2) == 1);
+    assert(output_shape.dim(3) == depth);
+
+    const double real_multiplier =
+      static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+    int32_t output_multiplier{};
+    int output_shift{};
+    quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+    const int32_t num_elements_in_axes = input_height * input_width;
+
+    for (int32_t batch = 0; batch < batches; ++batch)
+    {
+      for (int32_t c = 0; c < depth; ++c)
+      {
+        int32_t acc = 0;
+        for (int32_t in_y = 0; in_y < input_height; ++in_y)
+        {
+          for (int32_t in_x = 0; in_x < input_width; ++in_x)
+          {
+            acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+          }
+        }
+        int32_t scaled_acc =
+          tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+        // Divide by the number of elements rounding to the nearest integer.
+        scaled_acc = scaled_acc > 0
+                       ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+                       : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+        scaled_acc = std::max(scaled_acc, output_min);
+        scaled_acc = std::min(scaled_acc, output_max);
+
+        output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+      }
+    }
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported configuration.");
   }
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Mean.h b/compiler/luci-interpreter/src/kernels/Mean.h
index 9cc793c72..ed07ae561 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.h
+++ b/compiler/luci-interpreter/src/kernels/Mean.h
@@ -30,7 +30,8 @@ namespace kernels
 class Mean : public KernelWithParams<ReducerParams>
 {
 public:
-  Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params);
+  Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+       Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params);
 
   const Tensor *input() const { return _inputs[0]; }
   const Tensor *axes() const { return _inputs[1]; }
@@ -42,11 +43,10 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalQuantizedS16() const;
 
 private:
-  std::unique_ptr<Tensor> _temp_index;
-  std::unique_ptr<Tensor> _resolved_axes;
-  std::unique_ptr<Tensor> _temp_sum;
+  bool _need_temporaries = false;
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
index f4e411ca4..d2c00935a 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/Mean.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -27,139 +28,213 @@ namespace
 
 using namespace testing;
 
-TEST(MeanTest, FloatKeepDims)
+class MeanTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MeanTest, FloatKeepDims)
 {
   std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
                                    9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
                                    17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
 
   std::vector<int32_t> axis_data{0, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   ReducerParams params{};
   params.keep_dims = true;
 
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
   kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{10.5, 12.5, 14.5};
   std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
-TEST(MeanTest, FloatKeepDims4DMean)
+TEST_F(MeanTest, FloatKeepDims4DMean)
 {
   std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
                                    9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
                                    17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
 
   std::vector<int32_t> axis_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   ReducerParams params{};
   params.keep_dims = true;
 
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
   kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{6, 7, 18, 19};
   std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
-TEST(MeanTest, FloatNotKeepDims)
+TEST_F(MeanTest, FloatNotKeepDims)
 {
   std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
                                    9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
                                    17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
 
   std::vector<int32_t> axis_data{1, 0, -3, -3};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   ReducerParams params{};
   params.keep_dims = false;
 
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
   kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{12, 13};
   std::initializer_list<int32_t> ref_output_shape{2};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
-TEST(MeanTest, Uint8KeepDims)
+TEST_F(MeanTest, Uint8KeepDims)
 {
   float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
   std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
 
   std::vector<int32_t> axis_data{1};
-  Tensor input_tensor{DataType::U8, {3, 2}, {{quant_param.first}, {quant_param.second}}, ""};
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::U8, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   ReducerParams params{};
   params.keep_dims = true;
 
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
   kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{0.3, 0.35, 0.55};
   std::initializer_list<int32_t> ref_output_shape{3, 1};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                                  output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
-TEST(MeanTest, Uint8NotKeepDims)
+TEST_F(MeanTest, Uint8NotKeepDims)
 {
   float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
   std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
 
   std::vector<int32_t> axis_data{1};
-  Tensor input_tensor{DataType::U8, {1, 3, 2}, {{quant_param.first}, {quant_param.second}}, ""};
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   ReducerParams params{};
   params.keep_dims = false;
 
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
   kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{0.4, 0.4};
   std::initializer_list<int32_t> ref_output_shape{1, 2};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                                  output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
+TEST_F(MeanTest, SInt16KeepDims4D)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  std::vector<int32_t> axes_data{1, 2};
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get());
+  Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-interpreter/src/kernels/Minimum.cpp
new file mode 100644
index 000000000..5d3dcde72
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Minimum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Minimum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Minimum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMinimum<float>();
+      break;
+    case DataType::U8:
+      evalMinimum<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void Minimum::evalMinimum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::min(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.h b/compiler/luci-interpreter/src/kernels/Minimum.h
new file mode 100644
index 000000000..5ff4035b4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Minimum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MINIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Minimum : public Kernel
+{
+public:
+  Minimum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMinimum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
new file mode 100644
index 000000000..9a143643f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MinimumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MinimumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MinimumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
new file mode 100644
index 000000000..bae1eac70
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+
+#include "kernels/Utils.h"
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+                     const MirrorPadParams &params)
+  : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
+{
+}
+
+void MirrorPad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output);
+
+void MirrorPad::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+
+      MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output)
+{
+  auto const input_dims = input.shape().num_dims();
+  auto const input_data = input.data<T>();
+  auto const paddings_data = paddings.data<int32_t>();
+  auto const output_data = output.data<T>();
+
+  auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+  auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+  auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+  auto const input_d = input.shape().dim(input_dims - 1);
+
+  auto const input_h_offset = input_d * input_w;
+  auto const input_b_offset = input_h_offset * input_h;
+
+  auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+  auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+  auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+  auto const output_d = output.shape().dim(input_dims - 1);
+
+  auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+  auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+  auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+  auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+  auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+  auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+  auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+  auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+  const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+  const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+                                                                      auto b) {
+    return d + w * input_d + h * input_h_offset + b * input_b_offset;
+  };
+
+  const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+    bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+    return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+  };
+
+  const T *in_ptr = input_data;
+  T *out_ptr = output_data;
+
+  for (int32_t b = 0; b < output_b; ++b)
+  {
+    for (int32_t h = 0; h < output_h; ++h)
+    {
+      for (int32_t w = 0; w < output_w; ++w)
+      {
+        for (int32_t d = 0; d < output_d; ++d)
+        {
+          if (b < left_b_pad || b >= output_b - right_b_pad || //
+              h < left_h_pad || h >= output_h - right_h_pad || //
+              w < left_w_pad || w >= output_w - right_w_pad || //
+              d < left_d_pad || d >= output_d - right_d_pad)
+          {
+            if (mode == MirrorPadMode::REFLECT)
+            {
+              *out_ptr++ = input_data[offset_index(
+                positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+                positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+            }
+            else
+            {
+              *out_ptr++ = input_data[offset_index(
+                symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+                symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+            }
+          }
+          else
+          {
+            *out_ptr++ = *in_ptr++;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.h b/compiler/luci-interpreter/src/kernels/MirrorPad.h
new file mode 100644
index 000000000..d3e6e858a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+#define LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MirrorPad : public KernelWithParams<MirrorPadParams>
+{
+public:
+  MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+            const MirrorPadParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
new file mode 100644
index 000000000..740d8cb22
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MirrorPadTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode)
+  {
+    MirrorPadParams params{};
+    params.mode = mode;
+
+    MirrorPad kernel(&input, &padding, &output, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output);
+    kernel.execute();
+  }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MirrorPadTest, FloatReflect)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric2Dim)
+{
+  Shape input_shape = {3, 1};
+  Shape padding_shape = {2, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f};
+  std::vector<int> padding_data{1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0};
+  std::initializer_list<int32_t> ref_output_shape{6, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Reflect)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Symmetric)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, UnsupportedDim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+TEST_F(MirrorPadTest, InvalidInputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp
index dd31aa099..531fb4fa1 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -17,9 +17,11 @@
 
 #include "kernels/Mul.h"
 
+#include "kernels/BinaryOpCommon.h"
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALMul.h"
+
 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
 
 #include <stdexcept>
@@ -30,13 +32,22 @@ namespace kernels
 {
 
 Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
-    : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+  : KernelWithParams<MulParams>({input1, input2}, {output}, params)
 {
 }
 
 void Mul::configure()
 {
-  assert(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
+  if (input1()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1)
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
+  }
+
   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
 }
 
@@ -47,6 +58,15 @@ void Mul::execute() const
     case DataType::FLOAT32:
       evalFloat();
       break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -54,30 +74,77 @@ void Mul::execute() const
 
 void Mul::evalFloat() const
 {
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
 
+  if (need_broadcast)
+  {
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Mul::evalInteger() const
+{
   tflite::ArithmeticParams params{};
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
+  fillArithmeticActivationRange<T>(params, _params.activation);
 
   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-      getTensorShape(input1()), getTensorShape(input2()), &params);
+    getTensorShape(input1()), getTensorShape(input2()), &params);
 
   if (need_broadcast)
   {
-    tflite::optimized_ops::BroadcastMul4DSlow(
-        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
   }
   else
   {
-    tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
-                               getTensorShape(input2()), getTensorData<float>(input2()),
-                               getTensorShape(output()), getTensorData<float>(output()));
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                              getTensorShape(input2()), getTensorData<T>(input2()),
+                              getTensorShape(output()), getTensorData<T>(output()));
   }
 }
 
+void Mul::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input1_scale * input2_scale / output_scale;
+
+  int32_t output_multiplier;
+  int output_shift;
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
+                                                                              int16_t input2_val) {
+    int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
+    output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
+    output = std::max(output, activation_min);
+    output = std::min(output, activation_max);
+    return static_cast<int16_t>(output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mul.h b/compiler/luci-interpreter/src/kernels/Mul.h
index e46160bcb..c0cf817df 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.h
+++ b/compiler/luci-interpreter/src/kernels/Mul.h
@@ -42,6 +42,8 @@ public:
 
 private:
   void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantizedS16() const;
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
index f2255ac3f..fc0e60614 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/Mul.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -27,26 +28,36 @@ namespace
 
 using namespace testing;
 
-TEST(MulTest, Float)
+class MulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MulTest, Float)
 {
   Shape base_shape = {2, 3, 1, 2};
   std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
   std::vector<std::vector<float>> test_outputs = {
-      {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
-       0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
-       0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
-      {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
-      {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
-       0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
-       0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
-      {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+    {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+     0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+     0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+     0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+     0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+    {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
   std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
                                  1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
   std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
   for (size_t i = 0; i < test_shapes.size(); ++i)
   {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
     Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
     MulParams params{};
@@ -54,17 +65,19 @@ TEST(MulTest, Float)
 
     Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
-        << "With shape number " << i;
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
   }
   // Re-run with exchanged inputs.
   for (size_t i = 0; i < test_shapes.size(); ++i)
   {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
     Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
     MulParams params{};
@@ -72,12 +85,206 @@ TEST(MulTest, Float)
 
     Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
     kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+}
+
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+  dtype max_value = std::numeric_limits<dtype>::max();
+  dtype res_max = max_value - max_value % 10;
+
+  std::vector<std::vector<dtype>> test_outputs = {
+    {8,  0, 20,  0, 4,  30,  //
+     16, 0, 40,  3, 8,  0,   //
+     0,  0, 0,   6, 0,  0,   //
+     4,  0, 10,  9, 2,  0,   //
+     40, 0, 100, 0, 20, 150, //
+     28, 0, 70,  0, 14, res_max},
+    {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max},
+    {8,  12,     0, 0, 20, 30, 16, 0, 0, 0,  40, 0,   0,   0, 0, 0,  0,
+     0,  0,      9, 2, 0,  10, 0,  0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2,
+     70, res_max},
+    {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}};
+  std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10};
+  std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(MulTest, SInt64)
+{
+  checkInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(MulTest, SInt32)
+{
+  checkInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(MulTest, SInt16)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<int32_t>> ref_output_shapes{
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<std::vector<float>> ref_outputs = {
+    {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+     0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+     0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+     0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+     0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+    {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+    const float tolerance = output_tensor.scale() * 2;
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
     kernel.execute();
 
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
-        << "With shape number " << i;
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
   }
+  // Re-run with exchanged inputs and different scales.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0);
+    const float tolerance = output_tensor.scale() * 2;
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(MulTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(MulTest, Invalid_Quantization_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  MulParams params{};
+  params.activation = Activation::NONE;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-interpreter/src/kernels/Neg.cpp
new file mode 100644
index 000000000..c6fe08a9e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Neg.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/Utils.h"
+
+#include "PALNeg.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Neg::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void Neg::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Neg::evalFloat() const
+{
+  luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Neg.h b/compiler/luci-interpreter/src/kernels/Neg.h
new file mode 100644
index 000000000..69fa1a18e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Neg.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NEG_H
+#define LUCI_INTERPRETER_KERNELS_NEG_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Neg : public Kernel
+{
+public:
+  Neg(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NEG_H
diff --git a/compiler/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-interpreter/src/kernels/Neg.test.cpp
new file mode 100644
index 000000000..8b2bc1a82
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Neg.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Neg kernel(&input_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(NegTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, -1.0f, -3.0f, // Row 1
+                 -1.0f, 1.0f, 2.0f,  // Row 2
+               });
+
+  SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
new file mode 100644
index 000000000..54e5eee34
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void NotEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void NotEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void NotEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                    y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void NotEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                            getTensorShape(y()), y_data,
+                                                            getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                             getTensorShape(y()), y_data, getTensorShape(output()),
+                                             output_data);
+  }
+}
+
+void NotEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h
new file mode 100644
index 000000000..d2aafe893
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class NotEqual : public Kernel
+{
+public:
+  NotEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
new file mode 100644
index 000000000..45bf4022a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class NotEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(NotEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true, // Row 1
+    true, false, true, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(NotEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+    0.9, 0.7, 0.5, // Row 4
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    true,  true,  true,  // Row 2
+    true,  true,  true,  // Row 3
+    false, false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -2, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value, -2, max_value, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true,  // Row 1
+    true,  true,  false, // Row 2
+    true,  false, true,  // Row 3
+    false, false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(NotEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(NotEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(NotEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5, 0.55, 0.5, // Row 1
+    -1,  0,   0.05, 1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true,  true, // Row 1
+    true, false, false, true, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(NotEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+    -1,   0.05, 0,    1,   // Row 4
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true,  true,  true,  // Row 1
+    true,  true,  false, true,  // Row 2
+    true,  true,  true,  true,  // Row 3
+    false, false, false, false, // Row 4
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(NotEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-interpreter/src/kernels/OneHot.cpp
new file mode 100644
index 000000000..4d3e5f2ef
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+                       const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+                       Tensor *output_tensor)
+{
+  // define input shape and correct axis
+  auto const &input_shape = indices_tensor->shape();
+  axis = axis == -1 ? input_shape.num_dims() : axis;
+
+  // TODO support other integer input types
+  auto const *indices = getTensorData<int32_t>(indices_tensor);
+  auto const on_value = getTensorData<T>(on_value_tensor)[0];
+  auto const off_value = getTensorData<T>(off_value_tensor)[0];
+  auto *output = getTensorData<T>(output_tensor);
+
+  // prefix_dim_size == # of elements before the axis
+  // depth == # of elements per axis
+  // suffix_dim_size == # of elements after the axis
+  auto prefix_dim_size = 1;
+  for (int32_t i = 0; i < axis; ++i)
+  {
+    prefix_dim_size *= input_shape.dim(i);
+  }
+  assert(prefix_dim_size > 0);
+  auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+  // View the indices as a matrix of size:
+  //     prefix_dim_size x suffix_dim_size
+  // View the output as a matrix of size:
+  //     prefix_dim_size x depth x suffix_dim_size
+  // Then the output is:
+  //     output(i, j, k) == (indices(i, k) == j) ? on : off
+  for (int32_t i = 0; i < prefix_dim_size; ++i)
+    for (int32_t j = 0; j < depth; ++j)
+      for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+        *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+               const Tensor *off_value, Tensor *output, const OneHotParams &params)
+  : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+  // Do nothing
+}
+
+void OneHot::configure()
+{
+  // check types
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+  // check shape dependent parameters
+  LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+  // define parameters that affect the output shape
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const &input_shape = indices()->shape();
+  auto const input_dims = input_shape.num_dims();
+  auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+  // define output shape
+  Shape output_shape(input_shape.num_dims() + 1);
+  {
+    for (int32_t d = 0; d < axis; ++d)
+      output_shape.dim(d) = input_shape.dim(d);
+
+    output_shape.dim(axis) = depth_value;
+
+    for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+      output_shape.dim(d) = input_shape.dim(d - 1);
+  }
+
+  // reshape output
+  output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const axis = params().axis;
+
+  switch (output()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case loco::DataType::U8:
+      OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case loco::DataType::S16:
+      OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    default:
+      // TODO Support other data types
+      throw std::runtime_error("Not supported, yet!");
+      break;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-interpreter/src/kernels/OneHot.h
new file mode 100644
index 000000000..572f857ae
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H
+#define LUCI_INTERPRETER_KERNELS_ONEHOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class OneHot : public KernelWithParams<OneHotParams>
+{
+public:
+  OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+         const Tensor *off_value, Tensor *output, const OneHotParams &params);
+
+  const Tensor *indices() const { return _inputs[0]; }
+  const Tensor *depth() const { return _inputs[1]; }
+  const Tensor *on_value() const { return _inputs[2]; }
+  const Tensor *off_value() const { return _inputs[3]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp
new file mode 100644
index 000000000..45b6968fa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data,
+           std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data,
+           int32_t axis, std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr auto input_type = getElementType<T1>();
+  constexpr auto output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  OneHotParams params{};
+  params.axis = axis;
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class OneHotTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(OneHotTest, DataTypes);
+
+TYPED_TEST(OneHotTest, BasicPattern)
+{
+  // axis 0
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/0,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 1, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 1, 0, //
+                              0, 1, 0, //
+                            });
+  // axis 1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+
+                              0, 0, 1, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+                            });
+  // axis -1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/-1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              0, 0, 0, 0, //
+
+                              0, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              1, 0, 0, 0, //
+                            });
+}
+
+TEST(OneHotTest, UnsupportedInputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // input type should be integer
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get());
+
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, OutputTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+
+  // type of on_value, off_value and output_tensor should be same
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, InvalidAxis_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  // axis should be in [-1, input_shape.rank]
+  OneHotParams params = {-2};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-interpreter/src/kernels/PRelu.cpp
new file mode 100644
index 000000000..5a6b05c3a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/PRelu.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+PRelu::~PRelu()
+{
+  // Destructor declared to delete vector of alpha quantized data properly
+}
+
+void PRelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+  LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
+
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+    _alpha_multipliers.resize(1);
+    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+                       &_alpha_multipliers[0].shift);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    // Common check for correctness of quant params
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+    for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
+    }
+    // PRelu specific checks for CWQ
+    LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+    LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+                           alpha()->shape().dim(alpha()->quantized_dimension()));
+    LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+                           input()->shape().dim(input()->shape().num_dims() - 1));
+
+    // all dimension of alpha except last one should be size 1
+    for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+    }
+
+    std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+    _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
+}
+
+void PRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void PRelu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto alpha_data = getTensorData<float>(alpha());
+  const auto size = getTensorShape(input()).FlatSize();
+  auto output_data = getTensorData<float>(output());
+
+  auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+      getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+      PReluFunc);
+  }
+  else
+  {
+    for (auto i = decltype(size){0}; i < size; ++i)
+    {
+      if (input_data[i] >= 0)
+        output_data[i] = input_data[i];
+      else
+        output_data[i] = input_data[i] * alpha_data[i];
+    }
+  }
+}
+
+void PRelu::evalQuantized() const
+{
+  tflite::PreluParams op_params{};
+
+  op_params.input_offset = -input()->zero_point(); // Note the '-'.
+  op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_shift_1 = _output_shift_identity;
+  op_params.output_multiplier_1 = _output_multiplier_identity;
+  op_params.output_shift_2 = _alpha_multipliers[0].shift;
+  op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastPrelu4DSlow(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Prelu<uint8_t>(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
+                                       const ChannelQuantMultipliers &identity_mult,
+                                       const ChannelQuantMultipliers &alpha_mult)
+{
+  constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
+  constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t output_val =
+    input_val >= 0
+      ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+                                              identity_mult.multiplier, identity_mult.shift)
+      : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+                                              alpha_mult.multiplier, alpha_mult.shift);
+  const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+  return clamped_output;
+}
+
+void PRelu::evalQuantizedS16() const
+{
+  // Note that this kernel assumes alpha is CWQ
+  tflite::RuntimeShape input_shape = getTensorShape(input());
+  const int16_t *input_data = input()->data<int16_t>();
+  const int16_t *alpha_data = alpha()->data<int16_t>();
+  int16_t *output_data = output()->data<int16_t>();
+
+  const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+  const int last_dim = input()->shape().num_dims() - 1;
+
+  int32_t outer_dims_size = 1;
+  for (int i = 0; i < last_dim; ++i)
+    outer_dims_size *= input_shape.Dims(i);
+  int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+  for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+    for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+    {
+      const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+      size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+      offset += quant_channel;
+
+      output_data[offset] =
+        evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+    }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-interpreter/src/kernels/PRelu.h
new file mode 100644
index 000000000..f7735d418
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/PRelu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H
+#define LUCI_INTERPRETER_KERNELS_PRELU_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class PRelu : public Kernel
+{
+public:
+  PRelu(const Tensor *input, const Tensor *alpha, Tensor *output);
+
+  ~PRelu();
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *alpha() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  std::vector<ChannelQuantMultipliers> _alpha_multipliers;
+  // TODO merge this into one ChannelQuantMultiplier object
+  int32_t _output_multiplier_identity = 0;
+  int _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PRELU_H
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp
new file mode 100644
index 000000000..6d97382de
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> alpha_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::initializer_list<T> alpha_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(PReluTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*alpha_data=*/
+               {
+                 0.0f, 0.5f, 0.1f, // Row 1
+                 0.0f, 0.5f, 0.1f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -0.5f, -0.2f, // Row 2
+               });
+
+  SUCCEED();
+}
+
+TEST(PReluTest, FloatBroadcast)
+{
+  Check<float>(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3},
+               /*output_shape=*/{1, 2, 2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 0.0f, 0.0f,    // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,    // Row 1, Column 2
+                 -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+                 -2.0f, -2.0f, -2.0f, // Row 2, Column 2
+               },
+               /*alpha_data=*/
+               {0.0f, 1.0f, 2.0f},
+               /*output_data=*/
+               {
+                 0.0f, 0.0f, 0.0f,   // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,   // Row 1, Column 2
+                 0.0f, -1.0f, -2.0f, // Row 2, Column 1
+                 0.0f, -2.0f, -4.0f, // Row 2, Column 2
+               });
+
+  SUCCEED();
+}
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PReluTest, Uint8Simple)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+
+  SUCCEED();
+}
+
+TEST(PReluTest, Uint8Broadcast)
+{
+  std::vector<float> input_data{
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+  std::vector<float> ref_quant_output_data{
+    128, 128, 128, // Row 1, Column 1
+    192, 192, 192, // Row 1, Column 2
+    128, 64,  192, // Row 2, Column 1
+    128, 112, 144  // Row 2, Column 2
+  };
+  float kQuantizedTolerance = 2 * (1. / 256);
+  const float kMin = -1;
+  const float kMax = 127.f / 128.f;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_quant_output_data));
+}
+
+TEST(PReluTest, SInt16_LWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  // Rewrite this test in case layer-wise quantization for sint16 is supported
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_Simple)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f, 0.05f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f};
+  std::vector<int32_t> zerop{0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape1)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape2)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+
+  std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
+  std::vector<int32_t> zerop{0, 0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Alpha_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST(PReluTest, Input_Output_U8_CWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Output_S16_CWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Mixing_U8_S16_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-interpreter/src/kernels/Pack.cpp
new file mode 100644
index 000000000..42aab330c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pack.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params)
+  : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Pack::configure()
+{
+  LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
+
+  if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
+      t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
+      t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  for (uint32_t i = 1; i < _inputs.size(); ++i)
+  {
+    const Tensor *tensor = _inputs[i];
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+    for (int d = 0; d < t0->shape().num_dims(); ++d)
+    {
+      LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+    }
+  }
+
+  Shape output_shape(dimension_size);
+  int i = 0;
+  for (int index = 0; index < dimension_size; ++index)
+  {
+    if (index == axis)
+    {
+      output_shape.dim(index) = params().values_count;
+    }
+    else
+    {
+      output_shape.dim(index) = t0->shape().dim(i++);
+    }
+  }
+
+  if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
+      t0->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
+    LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
+    // Guarantee input/output quantization params match as we do not support
+    // packing quantized tensors.
+    for (int i = 0; i < params().values_count; i++)
+    {
+      LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
+      LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
+    }
+  }
+
+  output()->resize(output_shape);
+}
+
+void Pack::execute() const
+{
+  switch (_inputs[0]->element_type())
+  {
+    case DataType::FLOAT32:
+      evalGeneric<float>();
+      break;
+    case DataType::U8:
+      evalGeneric<uint8_t>();
+      break;
+    case DataType::S8:
+      evalGeneric<int8_t>();
+      break;
+    case DataType::S16:
+      evalGeneric<int16_t>();
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>();
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Pack::evalGeneric() const
+{
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+
+  VectorOfTensors<T, true> inputs(_inputs);
+  tflite::PackParams params{};
+  params.axis = axis;
+  params.inputs_count = _inputs.size();
+  tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
+                                 getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pack.h b/compiler/luci-interpreter/src/kernels/Pack.h
new file mode 100644
index 000000000..4a2fcfd80
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PACK_H
+#define LUCI_INTERPRETER_KERNELS_PACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pack : public KernelWithParams<PackParams>
+{
+public:
+  Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalGeneric() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PACK_H
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
new file mode 100644
index 000000000..d16320b78
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
+           std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas,
+           std::initializer_list<T> output_data, int32_t axis)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  std::vector<const Tensor *> inputs(input_datas.size());
+  std::vector<Tensor> tmp_inputs;
+  for (int i = 0; i < input_datas.size(); i++)
+  {
+    if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value ||
+        std::is_same<T, int64_t>::value)
+    {
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+    else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+    {
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+    else
+    {
+      assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested");
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+  }
+  for (int i = 0; i < input_datas.size(); i++)
+  {
+    inputs[i] = &tmp_inputs[i];
+  }
+
+  Tensor output_tensor = makeOutputTensor(element_type);
+  if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+  {
+    output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128);
+  }
+  else if (std::is_same<T, int16_t>::value)
+  {
+    output_tensor = makeOutputTensor(element_type, 1.0f, 0);
+  }
+
+  PackParams params{};
+  params.axis = axis;
+  params.values_count = input_datas.size();
+  Pack kernel(inputs, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class PackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>;
+TYPED_TEST_SUITE(PackTest, DataTypes);
+
+TYPED_TEST(PackTest, ThreeInputs)
+{
+  Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+                   /*output_shape=*/{3, 2},
+                   /*input_datas=*/
+                   {{1, 4}, {2, 5}, {3, 6}},
+                   /*output_data=*/
+                   {1, 4, 2, 5, 3, 6}, /*axis=*/0);
+
+  SUCCEED();
+}
+
+TYPED_TEST(PackTest, NegAxis)
+{
+  Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+                   /*output_shape=*/{2, 3},
+                   /*input_datas=*/
+                   {{1, 4}, {2, 5}, {3, 6}},
+                   /*output_data=*/
+                   {1, 2, 3, 4, 5, 6}, /*axis=*/-1);
+
+  SUCCEED();
+}
+
+TEST(Pack, MismatchingInputValuesCount_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input1_data{1, 4};
+  std::vector<float> input2_data{2, 5};
+  std::vector<float> input3_data{3, 6};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  PackParams params{};
+  {
+    params.axis = 0;
+    params.values_count = 2;
+
+    Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+    EXPECT_ANY_THROW(kernel.configure());
+  }
+}
+
+TEST(Pack, InvalidInputAxis_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input1_data{1, 4};
+  std::vector<float> input2_data{2, 5};
+  std::vector<float> input3_data{3, 6};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  PackParams params{};
+  {
+    params.axis = 2;
+    params.values_count = 3;
+
+    Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+    EXPECT_ANY_THROW(kernel.configure());
+  }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp
index bdf3a2a95..c07f6e310 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -18,7 +18,9 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
 
 namespace luci_interpreter
 {
@@ -26,7 +28,7 @@ namespace kernels
 {
 
 Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
-    : Kernel({input, paddings}, {output})
+  : Kernel({input, paddings}, {output})
 {
 }
 
@@ -93,6 +95,16 @@ void Pad::execute() const
                                  getTensorData<uint8_t>(output()));
       break;
     }
+    case DataType::S8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+      const auto pad_value = static_cast<int8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<int8_t>(output()));
+      break;
+    }
     default:
       throw std::runtime_error("Unsupported type.");
   }
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
index 15fcd0da3..dd3ce947c 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Pad.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -30,47 +31,76 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; }
 
 TEST(Pad, Uint8)
 {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
   std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
   std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
-  Tensor input_tensor{DataType::U8, {1, 2, 3, 1}, {{quant_param.first}, {quant_param.second}}, ""};
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
                                      0, 0,    0,   0,   0, 0, 0, 0, 0,   0,   0,    0, 0, 0};
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
 }
 
+TEST(Pad, Int8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2};
+  std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, 0, 0,    0,    0,    0, 0, 0, -0.2, 0.4, 0.5, 0,
+                                     0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7,  0.1, 0.2, 0,
+                                     0, 0, 0,    0,    0,    0, 0, 0, 0,    0,   0,   0};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1}));
+}
+
 TEST(Pad, Float)
 {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
   std::vector<float> input_data{1, 2, 3, 4, 5, 6};
   std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data);
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                      0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5,
                                      6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp
new file mode 100644
index 000000000..197cdaa69
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values,
+             Tensor *output)
+  : Kernel({input, paddings, constant_values}, {output})
+{
+}
+
+void PadV2::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  assert(constant_values()->element_type() == output()->element_type());
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+  // Constant values elements number should be 1.
+  assert(constant_values()->shape().num_elements() == 1);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+void PadV2::execute() const
+{
+  const int num_dims = input()->shape().num_dims();
+
+  tflite::PadParams params{};
+  params.left_padding_count = num_dims;
+  params.right_padding_count = num_dims;
+
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = num_dims - 1; i >= 0; --i)
+  {
+    params.left_padding[i] = paddings_data[i * 2];
+    params.right_padding[i] = paddings_data[i * 2 + 1];
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      const auto pad_value = getTensorData<float>(constant_values())[0];
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<float>(output()));
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+      const auto pad_value = getTensorData<uint8_t>(constant_values())[0];
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<uint8_t>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.h b/compiler/luci-interpreter/src/kernels/PadV2.h
new file mode 100644
index 000000000..48a31f584
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/PadV2.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_V2_H
+#define LUCI_INTERPRETER_KERNELS_PAD_V2_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class PadV2 : public Kernel
+{
+public:
+  PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  const Tensor *constant_values() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_V2_H
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp
new file mode 100644
index 000000000..41efaff06
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PadV2, Uint8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
+  std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
+  std::vector<float> constant_values_data{0.5};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor constant_values = makeInputTensor<DataType::U8>(
+    {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data = {
+    0.5, -0.8, 0.2, 0.9, 0.5, 0.5, 0.5, 0.5, 0.7, 0.1, -0.3, 0.5, 0.5, 0.5,  //
+    0.5, 0.5,  0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  0.5, 0.5, 0.5}; //
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST(PadV2, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6};
+  std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
+  std::vector<float> constant_values_data{7};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor constant_values =
+    makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                                     7, 7, 7, 7, 7, 7, 7, 7, 1, 2, 3, 7, 7, 7, 4, 5,
+                                     6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
+  std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-interpreter/src/kernels/Pow.cpp
new file mode 100644
index 000000000..722c64024
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pow.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Pow::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Pow::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>();
+      break;
+    case DataType::S32:
+      eval<int32_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Pow::eval() const
+{
+  tflite::ArithmeticParams params{};
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                                              getTensorShape(input2()), getTensorData<T>(input2()),
+                                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pow.h b/compiler/luci-interpreter/src/kernels/Pow.h
new file mode 100644
index 000000000..8ff865e40
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pow.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_POW_H
+#define LUCI_INTERPRETER_KERNELS_POW_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pow : public Kernel
+{
+public:
+  Pow(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_POW_H
diff --git a/compiler/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-interpreter/src/kernels/Pow.test.cpp
new file mode 100644
index 000000000..0e858115d
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pow.test.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class PowTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(PowTest, SimplePow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 1, 3, 2};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, FloatBroadcastPow)
+{
+  std::initializer_list<int32_t> input1_shape = {1, 3};
+  std::initializer_list<int32_t> input2_shape = {3, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 0.3f, 0.4f};
+  std::vector<float> test_outputs{0.786f,   1.18126f, 0.9791f, 0.6968f, 1.28386f,
+                                  0.96888f, 0.6178f,  1.3953f, 0.9587f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(PowTest, IntPow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 3};
+
+  std::vector<int32_t> input_data{2, 3, 4};
+  std::vector<int32_t> test_outputs{4, 27, 256};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Input_Type_Mismatch_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-interpreter/src/kernels/Quantize.cpp
new file mode 100644
index 000000000..0c8544a65
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+  int32_t multiplier;
+  int shift;
+
+  const double effective_output_scale = input->scale() / output->scale();
+  quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+  const auto input_shape = getTensorShape(input);
+  const auto output_shape = getTensorShape(output);
+  const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+  const auto input_data = getTensorData<input_dtype>(input);
+
+  switch (output->element_type())
+  {
+    case loco::DataType::S8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int8_t>(output));
+      break;
+    case loco::DataType::U8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<uint8_t>(output));
+      break;
+    case loco::DataType::S16:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int16_t>(output));
+      break;
+    default:
+      throw std::runtime_error("Unsupported quantized type, yet!");
+  }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+  if (input()->element_type() == loco::DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
+                             output()->element_type() == loco::DataType::S8 ||
+                             output()->element_type() == loco::DataType::S16);
+      break;
+    }
+    case loco::DataType::S16:
+    case loco::DataType::S8:
+    case loco::DataType::U8:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
+                             output()->element_type() == loco::DataType::U8 ||
+                             output()->element_type() == loco::DataType::S16);
+      if (output()->element_type() == loco::DataType::S16)
+      {
+        LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+      }
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+
+  output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      tflite::QuantizationParams op_params;
+      op_params.zero_point = output()->zero_point();
+      op_params.scale = output()->scale();
+      const auto input_data = getTensorData<float>(input());
+
+      switch (output()->element_type())
+      {
+        case loco::DataType::S8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()), getTensorData<int8_t>(output()));
+          break;
+        }
+        case loco::DataType::U8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+          break;
+        }
+        case loco::DataType::S16:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<int16_t>(output()));
+          break;
+        }
+        default:
+          throw std::runtime_error("Unsupported type.");
+      }
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      call_requantize<int16_t>(input(), output());
+      break;
+    }
+    case loco::DataType::S8:
+    {
+      call_requantize<int8_t>(input(), output());
+      break;
+    }
+    case loco::DataType::U8:
+    {
+      call_requantize<uint8_t>(input(), output());
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-interpreter/src/kernels/Quantize.h
new file mode 100644
index 000000000..006c5366f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Quantize : public Kernel
+{
+public:
+  Quantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp
new file mode 100644
index 000000000..22e67fe3f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class QuantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(QuantizeTest, FloatUint8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt16)
+{
+  std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64};
+
+  std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400,  -200,
+                                       200,    400,    600,  12700, 12800};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int16)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int8Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Uint8Uint8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp
new file mode 100644
index 000000000..d58cd1563
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceMax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+ReduceMax::ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+                     Tensor *resolved_axes, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params)
+{
+}
+
+void ReduceMax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  LUCI_INTERPRETER_CHECK(num_axes <= 4);
+
+  // We compute shapes of outputs in configure, assuming that outputs have
+  // static shape
+  // TODO Support dynamic shape
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  temp_index->resize(Shape(input_num_dims));
+  resolved_axes->resize(Shape(num_axes));
+}
+
+void ReduceMax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    // TODO Support quantized kernels
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void ReduceMax::evalFloat() const
+{
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  int num_resolved_axis = 0;
+  LUCI_INTERPRETER_CHECK(
+    tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes,
+                                       getTensorData<int>(resolved_axes), &num_resolved_axis));
+
+  float init_value = std::numeric_limits<float>::lowest();
+  tflite::reference_ops::ReduceGeneric<float>(
+    getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+    getTensorData<float>(output()), getTensorShape(output()).DimsData(),
+    output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims,
+    getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value,
+    [](const float current, const float in) -> float { return (in > current) ? in : current; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.h b/compiler/luci-interpreter/src/kernels/ReduceMax.h
new file mode 100644
index 000000000..25a66278a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
+#define LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReduceMax : public KernelWithParams<ReducerParams>
+{
+public:
+  ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+            Tensor *resolved_axes, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp
new file mode 100644
index 000000000..ab688827b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceMax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReduceMaxTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReduceMaxTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0, -3, -3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                   params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{23, 24};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceMaxTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                   params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{20, 22, 24};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.cpp b/compiler/luci-interpreter/src/kernels/ReduceProd.cpp
new file mode 100644
index 000000000..f3fc7d3f1
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceProd.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceProd.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+ReduceProd::ReduceProd(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+                       Tensor *resolved_axes, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params)
+{
+}
+
+void ReduceProd::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  LUCI_INTERPRETER_CHECK(num_axes <= 4);
+
+  // We compute shapes of outputs in configure, assuming that outputs have
+  // static shape
+  // TODO Support dynamic shape
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  temp_index->resize(Shape(input_num_dims));
+  resolved_axes->resize(Shape(num_axes));
+}
+
+void ReduceProd::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    // TODO Support quantized kernels
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void ReduceProd::evalFloat() const
+{
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  int num_resolved_axis = 0;
+  LUCI_INTERPRETER_CHECK(
+    tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes,
+                                       getTensorData<int>(resolved_axes), &num_resolved_axis));
+
+  float init_value = 1.0;
+  tflite::reference_ops::ReduceGeneric<float>(
+    getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+    getTensorData<float>(output()), getTensorShape(output()).DimsData(),
+    output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims,
+    getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value,
+    [](const float current, const float in) -> float { return current * in; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.h b/compiler/luci-interpreter/src/kernels/ReduceProd.h
new file mode 100644
index 000000000..d2f58cc0a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceProd.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
+#define LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReduceProd : public KernelWithParams<ReducerParams>
+{
+public:
+  ReduceProd(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+             Tensor *resolved_axes, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp
new file mode 100644
index 000000000..fa46f394d
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceProd.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReduceProdTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReduceProdTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0, -3, -3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                    params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.162341376e+11, 1.9619905536e+12};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceProdTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                    params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{7.74592e+06, 1.197504e+08, 6.6889152e+08};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceProdTest, Input_Output_Type_NEG)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                    params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ReduceProdTest, Invalid_Axes_Type_NEG)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int64_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S64>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                    params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-interpreter/src/kernels/Relu.cpp
new file mode 100644
index 000000000..747ec6cc8
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void Relu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Relu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
+
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void Relu::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  constexpr int32_t output_min = 0;
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t num_elements = input()->shape().num_elements();
+
+  for (int32_t i = 0; i < num_elements; ++i)
+  {
+    const int32_t input_val = input_data[i];
+    int32_t output_val =
+      tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+    output_val = std::max(output_val, output_min);
+    output_val = std::min(output_val, output_max);
+    output_data[i] = static_cast<int16_t>(output_val);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu.h b/compiler/luci-interpreter/src/kernels/Relu.h
new file mode 100644
index 000000000..b813f0cdf
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU_H
+#define LUCI_INTERPRETER_KERNELS_RELU_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu : public Kernel
+{
+public:
+  Relu(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU_H
diff --git a/compiler/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-interpreter/src/kernels/Relu.test.cpp
new file mode 100644
index 000000000..bd32e3cc9
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu.test.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReluTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReluTest, FloatSimple)
+{
+  std::vector<float> input_data{
+    0.0f, 1.0f,  3.0f,  // Row 1
+    1.0f, -1.0f, -2.0f, // Row 2
+  };
+
+  std::vector<float> ref_output_data{
+    0.0f, 1.0f, 3.0f, // Row 1
+    1.0f, 0.0f, 0.0f, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(ReluTest, Uint8Quantized)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float f_min = (-128.0 / 128.0) * 8;
+  const float f_max = (127.0 / 128.0) * 8;
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({128, 128, 160, 192, 176, 128, 240, 144}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST_F(ReluTest, Uint8Requantized)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float in_min = (-128.0 / 128.0) * 8;
+  const float in_max = (127.0 / 128.0) * 8;
+  const float out_min = (0.0 / 256.0) * 8;
+  const float out_max = (255.0 / 256.0) * 8;
+
+  std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
+
+  std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 64, 128, 96, 0, 224, 32}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST_F(ReluTest, SInt16)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+  std::vector<float> ref_output_data{
+    0, 0, 2, 4, //
+    3, 0, 7, 1, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(ReluTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ReluTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-interpreter/src/kernels/Relu6.cpp
new file mode 100644
index 000000000..07205ed3a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu6.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu6.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu6::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  if (input()->element_type() == DataType::U8)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void Relu6::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Relu6::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu6::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max =
+    std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+             params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.h b/compiler/luci-interpreter/src/kernels/Relu6.h
new file mode 100644
index 000000000..f5030b588
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu6.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU6_H
+#define LUCI_INTERPRETER_KERNELS_RELU6_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu6 : public Kernel
+{
+public:
+  Relu6(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU6_H
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
new file mode 100644
index 000000000..af7b3f3db
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Relu6Test : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Relu6Test, FloatSimple)
+{
+  std::vector<float> input_data{
+    0.0f, 1.0f,  3.0f,  // Row 1
+    7.0f, -1.0f, -2.0f, // Row 2
+  };
+
+  std::vector<float> ref_output_data{
+    0.0f, 1.0f, 3.0f, // Row 1
+    6.0f, 0.0f, 0.0f, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(Relu6Test, Uint8Quantized)
+{
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float f_min = (-128.0 / 128.0) * 10;
+  const float f_max = (127.0 / 128.0) * 10;
+  const float tolerance = (f_max - f_min) / 255.0;
+
+  std::vector<float> input_data{
+    0,  -6, 2, 8, //
+    -2, 3,  7, 1, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({128, 128, 154, 205, 128, 166, 205, 141}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST_F(Relu6Test, Uint8Requantized)
+{
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float in_min = (-128.0 / 128.0) * 10;
+  const float in_max = (127.0 / 128.0) * 10;
+  const float out_min = (0.0 / 256.0) * 0;
+  const float out_max = (255.0 / 256.0) * 6;
+  const float tolerance = (in_max - in_min) / 255.0;
+
+  std::vector<float> input_data{
+    0,  -6, 2, 8, //
+    -2, 3,  7, 1, //
+  };
+
+  std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
+
+  std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 87, 255, 0, 127, 255, 43}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST_F(Relu6Test, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Relu6Test, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-interpreter/src/kernels/Reshape.cpp
index d88b5392a..d3234e483 100644
--- a/compiler/luci-interpreter/src/kernels/Reshape.cpp
+++ b/compiler/luci-interpreter/src/kernels/Reshape.cpp
@@ -17,6 +17,8 @@
 
 #include "kernels/Reshape.h"
 
+#include "kernels/Utils.h"
+
 #include <cassert>
 #include <cstring>
 
@@ -28,12 +30,26 @@ namespace kernels
 
 static Shape extractShapeFromTensor(const Tensor *tensor)
 {
-  assert(tensor->element_type() == DataType::S32);
   Shape shape(tensor->shape().num_elements());
-  const auto *shape_data = tensor->data<int32_t>();
-  for (int i = 0; i < tensor->shape().num_elements(); ++i)
+  if (tensor->element_type() == DataType::S32)
+  {
+    const auto *shape_data = tensor->data<int32_t>();
+    for (int i = 0; i < tensor->shape().num_elements(); ++i)
+    {
+      shape.dim(i) = shape_data[i];
+    }
+  }
+  else if (tensor->element_type() == DataType::S64)
+  {
+    const auto *shape_data = tensor->data<int64_t>();
+    for (int i = 0; i < tensor->shape().num_elements(); ++i)
+    {
+      shape.dim(i) = static_cast<int32_t>(shape_data[i]);
+    }
+  }
+  else
   {
-    shape.dim(i) = shape_data[i];
+    LUCI_INTERPRETER_CHECK(false);
   }
   return shape;
 }
@@ -65,7 +81,7 @@ static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shap
 }
 
 Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
-    : Kernel({input, shape}, {output})
+  : Kernel({input, shape}, {output})
 {
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
index 7255b8132..7c0522ebe 100644
--- a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Reshape.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,42 +27,90 @@ namespace
 
 using namespace testing;
 
+class ReshapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
 // TODO Test types other than FLOAT32.
 
-TEST(ReshapeTest, Regular)
+TEST_F(ReshapeTest, Regular)
 {
   Shape input_shape{1, 2, 2, 3};
   std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
   Shape shape_shape{2};
   std::vector<int32_t> shape_data{3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(input_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
 }
 
-TEST(ReshapeTest, UnknownDimension)
+TEST_F(ReshapeTest, UnknownDimension)
 {
   Shape input_shape{2, 1, 2, 3};
   std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
   Shape shape_shape{3};
   std::vector<int32_t> shape_data{2, -1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+TEST_F(ReshapeTest, SupportS64)
+{
+  Shape input_shape{2, 1, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{3};
+  std::vector<int64_t> shape_data{2, -1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S64>(shape_shape, shape_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(input_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+TEST_F(ReshapeTest, SupportS16_NEG)
+{
+  Shape input_shape{2, 1, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{3};
+  std::vector<int16_t> shape_data{2, -1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S16>(shape_shape, shape_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
new file mode 100644
index 000000000..e2ddd6a7b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+
+#include "kernels/Utils.h"
+
+#include "PALResizeBilinear.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
+                               const ResizeBilinearParams &params)
+  : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeBilinear::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  if (params().half_pixel_centers && params().align_corners)
+    throw std::runtime_error("If half_pixel_centers is True, align_corners must be False.");
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  output()->resize(output_shape);
+}
+
+void ResizeBilinear::execute() const
+{
+  tflite::ResizeBilinearParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.h b/compiler/luci-interpreter/src/kernels/ResizeBilinear.h
new file mode 100644
index 000000000..b7bdc2ab7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeBilinear : public KernelWithParams<ResizeBilinearParams>
+{
+public:
+  ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output,
+                 const ResizeBilinearParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
new file mode 100644
index 000000000..933a1128c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero
+  // point 0.
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0);
+
+  ResizeBilinearParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeBilinearTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes);
+
+TYPED_TEST(ResizeBilinearTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 5, 6,    //
+                     7, 9, 10,   //
+                     9, 11, 12,  //
+                     4, 8, 10,   //
+                     8, 12, 14,  //
+                     10, 14, 16, //
+                   },
+                   false, false);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterFloatTest)
+{
+  Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+               {
+                 1, 2, //
+                 3, 4, //
+                 1, 2, //
+                 3, 4  //
+               },
+               {3, 3},
+               {
+                 1, 1.5, 2, //
+                 2, 2.5, 3, //
+                 3, 3.5, 4, //
+                 1, 1.5, 2, //
+                 2, 2.5, 3, //
+                 3, 3.5, 4, //
+               },
+               false, true);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
+{
+  Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                 {
+                   3, 6,  //
+                   9, 12, //
+                   4, 10, //
+                   12, 16 //
+                 },
+                 {3, 3},
+                 {
+                   2, 4, 6,    //
+                   6, 7, 9,    //
+                   9, 10, 12,  //
+                   4, 7, 10,   //
+                   8, 10, 13,  //
+                   12, 14, 16, //
+                 },
+                 false, true);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, InvalidParams_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = true;
+  params.half_pixel_centers = true;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..306cefbc2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
+                                             Tensor *output,
+                                             const ResizeNearestNeighborParams &params)
+  : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeNearestNeighbor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  output()->resize(output_shape);
+}
+
+void ResizeNearestNeighbor::execute() const
+{
+  tflite::ResizeNearestNeighborParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
new file mode 100644
index 000000000..137d031cf
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeNearestNeighbor : public KernelWithParams<ResizeNearestNeighborParams>
+{
+public:
+  ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output,
+                        const ResizeNearestNeighborParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
new file mode 100644
index 000000000..7ade02a6f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> quant_param =
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes);
+
+TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 3, 6,    //
+                     3, 3, 6,    //
+                     9, 9, 12,   //
+                     4, 4, 10,   //
+                     4, 4, 10,   //
+                     10, 10, 16, //
+                   },
+                   false, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
+                   },
+                   true, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
+                   },
+                   false, true);
+}
+
+TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Reverse.cpp b/compiler/luci-interpreter/src/kernels/Reverse.cpp
deleted file mode 100644
index a46308412..000000000
--- a/compiler/luci-interpreter/src/kernels/Reverse.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Reverse.h"
-#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Reverse::Reverse(const Tensor *input, const Tensor *axes, Tensor *output)
-    : Kernel({input, axes}, {output})
-{
-}
-
-void Reverse::configure()
-{
-  assert(axes()->shape().num_dims() == 1);
-  assert(input()->shape().num_dims() >= axes()->shape().num_elements());
-  if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
-      input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
-      input()->element_type() != DataType::S64)
-  {
-    throw std::runtime_error("Unsupported input type.");
-  }
-  if (axes()->element_type() != DataType::S32)
-  {
-    throw std::runtime_error("Unsupported axes type.");
-  }
-  if (axes()->shape().num_elements() > 1)
-  {
-    throw std::runtime_error("Current implementation does not support more than 1 axis.");
-  }
-  int axis_value = getTensorData<int32_t>(axes())[0];
-  if (axis_value < 0 || axis_value >= input()->shape().num_dims())
-  {
-    throw std::runtime_error("Invalid axes value");
-  }
-  assert(input()->element_type() == output()->element_type());
-
-  output()->resize(input()->shape());
-}
-
-void Reverse::execute() const
-{
-  int axis_value = getTensorData<int32_t>(axes())[0];
-  switch (output()->element_type())
-  {
-    case DataType::FLOAT32:
-      tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
-                                            getTensorData<float>(input()), getTensorShape(output()),
-                                            getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      tflite::reference_ops::Reverse<uint8_t>(
-          axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
-          getTensorShape(output()), getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported output type");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Reverse.h b/compiler/luci-interpreter/src/kernels/Reverse.h
deleted file mode 100644
index 3489dae28..000000000
--- a/compiler/luci-interpreter/src/kernels/Reverse.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H
-#define LUCI_INTERPRETER_KERNELS_REVERSE_H
-
-#include "core/Kernel.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Reverse : public Kernel
-{
-public:
-  Reverse(const Tensor *input, const Tensor *axes, Tensor *output);
-
-  const Tensor *input() const { return _inputs[0]; }
-  const Tensor *axes() const { return _inputs[1]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H
diff --git a/compiler/luci-interpreter/src/kernels/Reverse.test.cpp b/compiler/luci-interpreter/src/kernels/Reverse.test.cpp
deleted file mode 100644
index 5475a8bd3..000000000
--- a/compiler/luci-interpreter/src/kernels/Reverse.test.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Reverse.h"
-#include "kernels/TestUtils.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-template <typename T> class ReverseTest : public ::testing::Test
-{
-};
-
-using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ReverseTest, DataTypes);
-
-TYPED_TEST(ReverseTest, MultiDimensions)
-{
-  // TypeParam
-  std::vector<TypeParam> input_data{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-                                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
-  Shape input_shape{4, 3, 2};
-  std::vector<int32_t> axis_data{1};
-  Shape axis_shape{1};
-
-  std::vector<TypeParam> output_data{5,  6,  3,  4,  1,  2,  11, 12, 9,  10, 7,  8,
-                                     17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
-  std::vector<int32_t> output_shape{4, 3, 2};
-
-  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data);
-
-  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
-
-  Reverse kernel = Reverse(&input_tensor, &axis_tensor, &output_tensor);
-  kernel.configure();
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
-              ::testing::ElementsAreArray(output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.cpp
new file mode 100644
index 000000000..1b6a5cc3b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReverseV2.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
+  : Kernel({input, axes}, {output})
+{
+}
+
+void ReverseV2::configure()
+{
+  assert(axes()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+  if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+      input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+      input()->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Unsupported input type.");
+  }
+  if (axes()->element_type() != DataType::S32)
+  {
+    throw std::runtime_error("Unsupported axes type.");
+  }
+  if (axes()->shape().num_elements() > 1)
+  {
+    throw std::runtime_error("Current implementation does not support more than 1 axis.");
+  }
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+  {
+    throw std::runtime_error("Invalid axes value");
+  }
+  assert(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void ReverseV2::execute() const
+{
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+                                            getTensorData<float>(input()), getTensorShape(output()),
+                                            getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Reverse<uint8_t>(
+        axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported output type");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.h b/compiler/luci-interpreter/src/kernels/ReverseV2.h
new file mode 100644
index 000000000..51211c703
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReverseV2.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H
+#define LUCI_INTERPRETER_KERNELS_REVERSE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReverseV2 : public Kernel
+{
+public:
+  ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H
diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
new file mode 100644
index 000000000..c0025faca
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class ReverseV2Test : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ReverseV2Test, DataTypes);
+
+TYPED_TEST(ReverseV2Test, MultiDimensions)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // TypeParam
+  std::vector<TypeParam> input_data{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
+  Shape input_shape{4, 3, 2};
+  std::vector<int32_t> axis_data{1};
+  Shape axis_shape{1};
+
+  std::vector<TypeParam> output_data{5,  6,  3,  4,  1,  2,  11, 12, 9,  10, 7,  8,
+                                     17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
+  std::vector<int32_t> output_shape{4, 3, 2};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
index 69b55d2f2..3c6494232 100644
--- a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Rsqrt.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -29,39 +30,42 @@ using namespace testing;
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
 
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Rsqrt kernel(&input_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
 TEST(RsqrtTest, SimpleRsqrt)
 {
   Check(
-      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
-      /*input_data=*/
-      {
-          5, 4, 8, 2,     //
-          6, 7.5, 9, 0.3, //
-      },
-      /*output_data=*/
-      {
-          0.44721360, 0.5, 0.35355339, 0.70710678,       //
-          0.40824829, 0.36514837, 0.33333333, 1.8257419, //
-      });
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      5, 4, 8, 2,     //
+      6, 7.5, 9, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.44721360, 0.5, 0.35355339, 0.70710678,       //
+      0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+    });
 }
 
 TEST(RsqrtTest, Input_Output_Type_NEG)
 {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::S32);
 
   Rsqrt kernel(&input_tensor, &output_tensor);
@@ -70,11 +74,14 @@ TEST(RsqrtTest, Input_Output_Type_NEG)
 
 TEST(RsqrtTest, Invalid_Input_Type_NEG)
 {
-  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::S64);
 
   Rsqrt kernel(&input_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   EXPECT_ANY_THROW(kernel.execute());
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-interpreter/src/kernels/SVDF.cpp
new file mode 100644
index 000000000..b124e242c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+           const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+           Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+           Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+           const SVDFParams &params)
+  : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+                                 {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+                                  scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+                                 params)
+{
+  // Do nothing
+}
+
+void SVDF::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const Shape &weight_features_shape = weight_feature()->shape();
+  const Shape &weight_time_shape = weight_time()->shape();
+
+  // Validate Input Tensor:
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
+                         input()->element_type() == loco::DataType::S8);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+  // Validate inputs and output types
+  if (input()->element_type() == loco::DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
+                           weight_time()->element_type() == loco::DataType::S8);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
+
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
+                           input_activation_state()->element_type() == loco::DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
+
+    // Note: now tflite support only ReLU activation for integer SVDF
+    LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
+  }
+  else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+  }
+  else if ((weight_feature()->element_type() == loco::DataType::U8 ||
+            weight_feature()->element_type() == loco::DataType::S8) &&
+           input()->element_type() == loco::DataType::FLOAT32)
+  {
+    // TODO:: support hybrid SVDF op
+    throw std::runtime_error("Hybrid type is not currently supported");
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  // Check all the parameters of tensor match within themselves and match the
+  // input configuration.
+  const int rank = params().svdf_rank;
+  const int batch_size = input_shape.dim(0);
+  const int num_filters = weight_features_shape.dim(0);
+  LUCI_INTERPRETER_CHECK(rank != 0);
+  LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+  const int num_units = num_filters / rank;
+  const int memory_size = weight_time_shape.dim(1);
+
+  // Validate Weight_Feature Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+  // Validate Weight_Time Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+  // Validate Bias
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+  // Validate Input Activation State
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+  // Resize scratchpad_state to input_activation_state
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+  // Resize output tensor
+  output()->resize({batch_size, num_units});
+
+  luci_interpreter_pal::SetupScratchpadTensor(
+    input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+    getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+    getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+  switch (weight_feature()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      evalFloat();
+      break;
+    case loco::DataType::S8:
+    {
+      if (input()->element_type() == loco::DataType::S8)
+        evalInteger();
+      else
+        // TODO:: support hybrid SVDF op
+        throw std::runtime_error("Hybrid type is not currently supported");
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+}
+
+void SVDF::evalInteger() const
+{
+  const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+                                                     input_activation_state()->scale());
+  const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+                                                     weight_time()->scale() / output()->scale());
+
+  int32_t effective_scale_1_a;
+  int effective_scale_1_b;
+  int32_t effective_scale_2_a;
+  int effective_scale_2_b;
+
+  tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+  tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = getTfLiteActivation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad = getOutputTensors()[2];
+  auto output_temp = getOutputTensors()[3];
+
+  int32_t input_zp = input()->zero_point();
+  int32_t output_zp = output()->zero_point();
+  luci_interpreter_pal::IntegerSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+    getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+    getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+    getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+    effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = getTfLiteActivation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad_1 = getOutputTensors()[2];
+
+  luci_interpreter_pal::FloatSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+    getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+    getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-interpreter/src/kernels/SVDF.h
new file mode 100644
index 000000000..335a6cd8f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H
+#define LUCI_INTERPRETER_KERNELS_SVDF_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SVDF : public KernelWithParams<SVDFParams>
+{
+public:
+  SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+       const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+       Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+       Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+       const SVDFParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *weight_feature() const { return _inputs[1]; }
+  const Tensor *weight_time() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
+  const Tensor *input_activation_state() const { return _inputs[4]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SVDF_H
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp
new file mode 100644
index 000000000..82bd9b009
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SVDFTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SVDFTest, FullIntegerTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape bias_shape{units};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083,
+                                0.17660543, 0.52949083, -0.77931279};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1);
+  std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+  std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1);
+  std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512);
+  std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16);
+
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::S8>(
+    weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second,
+    weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor = makeInputTensor<DataType::S16>(
+    weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second,
+    weight_time_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(
+    DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::RELU;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0};
+
+  std::vector<int32_t> ref_output_shape{batches, units};
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, FloatTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465,
+                                0.35867718, 0.36897406,  0.73463392};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.014899,    -0.0517661, -0.143725, -0.00271883,
+                                     -0.03004015, 0.09565311, 0.1587342, 0.00784263};
+
+  std::vector<float> ref_output_shape{batches, units};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, Unsupported_Type_Configure_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SVDFTest, Invalid_Input_Shape_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t right_input_size = 3;
+  const int32_t wrong_input_size = 4;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, wrong_input_size};
+  Shape weight_feature_shape{num_filters, right_input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Select.cpp b/compiler/luci-interpreter/src/kernels/Select.cpp
new file mode 100644
index 000000000..b4ab5f621
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Select.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Select.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+// TODO use select.h when version up
+// #include <tensorflow/lite/kernels/internal/reference/select.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Select::Select(const Tensor *condition, const Tensor *t, const Tensor *e, Tensor *output)
+  : Kernel({condition, t, e}, {output})
+{
+  // NOTE _requires_broadcast is for SelectV2
+  _requires_broadcast = false;
+  _has_low_rank_input_condition = false;
+}
+
+void Select::configure()
+{
+  LUCI_INTERPRETER_CHECK(condition()->element_type() == DataType::BOOL);
+  LUCI_INTERPRETER_CHECK(t()->element_type() == e()->element_type());
+  LUCI_INTERPRETER_CHECK(t()->element_type() == output()->element_type());
+
+  auto cond_shape = condition()->shape();
+  auto cond_num_dims = cond_shape.num_dims();
+  auto t_shape = t()->shape();
+
+  bool is_input_condition_scalar = cond_num_dims == 0;
+  bool has_rank_one_input_condition = cond_num_dims == 1 && cond_shape.dim(0) == t_shape.dim(0);
+
+  _has_low_rank_input_condition = is_input_condition_scalar || has_rank_one_input_condition;
+
+  output()->resize(calculateShapeForBroadcast(t()->shape(), e()->shape()));
+}
+
+void Select::execute() const
+{
+  switch (t()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Select: unsupported type.");
+  }
+}
+
+void Select::evalFloat() const
+{
+  const auto condition_shape = getTensorShape(condition());
+  const auto condition_data = getTensorData<bool>(condition());
+  const auto t_shape = getTensorShape(t());
+  const auto t_data = getTensorData<float>(t());
+  const auto e_shape = getTensorShape(e());
+  const auto e_data = getTensorData<float>(e());
+  const auto output_shape = getTensorShape(output());
+  auto output_data = getTensorData<float>(output());
+
+  if (_has_low_rank_input_condition)
+  {
+    tflite::reference_ops::RankOneSelect(condition_shape, condition_data, t_shape, t_data, e_shape,
+                                         e_data, output_shape, output_data);
+  }
+  else if (_requires_broadcast)
+  {
+    // TODO support broadcast kernel when upgrade to TF2.10.x or above
+    assert(false);
+  }
+  else
+  {
+    tflite::reference_ops::Select(condition_shape, condition_data, t_shape, t_data, e_shape, e_data,
+                                  output_shape, output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Select.h b/compiler/luci-interpreter/src/kernels/Select.h
new file mode 100644
index 000000000..d67b4f5fc
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Select.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SELECT_H
+#define LUCI_INTERPRETER_KERNELS_SELECT_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Select : public Kernel
+{
+public:
+  Select(const Tensor *cond, const Tensor *t, const Tensor *e, Tensor *output);
+
+  const Tensor *condition() const { return _inputs[0]; }
+  const Tensor *t() const { return _inputs[1]; }
+  const Tensor *e() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+
+private:
+  // for SelectV2
+  bool _requires_broadcast = false;
+  // True if input condition is scalar or input condition has rank one and
+  // matches the first dimension of other inputs.
+  bool _has_low_rank_input_condition = false;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SELECT_H
diff --git a/compiler/luci-interpreter/src/kernels/Select.test.cpp b/compiler/luci-interpreter/src/kernels/Select.test.cpp
new file mode 100644
index 000000000..f74d18dc4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Select.test.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Select.h"
+#include "kernels/TestUtils.h"
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SelectTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+std::vector<unsigned char> c_data{
+  1, 1, 1, // Row 1
+  0, 0, 0, // Row 2
+};
+
+std::vector<float> t_data{
+  0.5, 0.7, 0.9, // Row 1
+  1,   0,   -1,  // Row 2
+};
+
+std::vector<float> e_data{
+  0.9, 0.7, 0.5, // Row 1
+  -1,  0,   1,   // Row 2
+};
+
+std::vector<float> ref_output_data{
+  0.5, 0.7, 0.9, // Row 1
+  -1,  0,   1,   // Row 2
+};
+
+TEST_F(SelectTest, FloatSimple)
+{
+  Tensor c_tensor = makeInputTensor<DataType::BOOL>({2, 3}, c_data, _memory_manager.get());
+  Tensor t_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, t_data, _memory_manager.get());
+  Tensor e_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, e_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Select kernel(&c_tensor, &t_tensor, &e_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(SelectTest, Invalid_C_Type_NEG)
+{
+  std::vector<float> i_c_data{
+    1, 1, 1, // Row 1
+    0, 0, 0, // Row 2
+  };
+
+  Tensor c_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, i_c_data, _memory_manager.get());
+  Tensor t_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, t_data, _memory_manager.get());
+  Tensor e_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, e_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Select kernel(&c_tensor, &t_tensor, &e_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SelectTest, Invalid_O_Type_NEG)
+{
+  Tensor c_tensor = makeInputTensor<DataType::BOOL>({2, 3}, c_data, _memory_manager.get());
+  Tensor t_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, t_data, _memory_manager.get());
+  Tensor e_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, e_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Select kernel(&c_tensor, &t_tensor, &e_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-interpreter/src/kernels/Shape.cpp
new file mode 100644
index 000000000..0429fe1e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params)
+  : KernelWithParams<ShapeParams>({input}, {output}, params)
+{
+}
+
+void ShapeKernel::configure()
+{
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
+                         output()->element_type() == DataType::S64);
+  const auto input_shape = input()->shape();
+
+  Shape output_shape(1);
+  output_shape.dim(0) = input_shape.num_dims();
+
+  output()->resize(output_shape);
+}
+
+void ShapeKernel::execute() const
+{
+  switch (params().out_type)
+  {
+    case DataType::S32:
+      evalInt<int32_t>();
+      break;
+    case DataType::S64:
+      evalInt<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void ShapeKernel::evalInt() const
+{
+  const auto input_shape = input()->shape();
+
+  auto output_data = getTensorData<T>(output());
+
+  for (int i = 0; i < input_shape.num_dims(); ++i)
+  {
+    output_data[i] = input_shape.dim(i);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Shape.h b/compiler/luci-interpreter/src/kernels/Shape.h
new file mode 100644
index 000000000..cfaadec91
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H
+#define LUCI_INTERPRETER_KERNELS_SHAPE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ShapeKernel : public KernelWithParams<ShapeParams>
+{
+public:
+  ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalInt() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H
diff --git a/compiler/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-interpreter/src/kernels/Shape.test.cpp
new file mode 100644
index 000000000..4763e016c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ShapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager)
+{
+  Shape input_shape{1, 3, 1, 3, 5};
+
+  Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+  Tensor output_tensor = makeOutputTensor(dataType);
+
+  ShapeParams params{};
+  params.out_type = dataType;
+
+  ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<T> ref_output_data{1, 3, 1, 3, 5};
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+  std::vector<int32_t> ref_output_shape{5};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ShapeTest, OutTypeInt)
+{
+
+  // Run for int32_t output
+  runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get());
+  // Run for int64_t output
+  runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get());
+
+  SUCCEED();
+}
+
+TEST_F(ShapeTest, Invalid_Output_Type_NEG)
+{
+  Shape input_shape{1, 3};
+
+  Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  ShapeParams params{};
+  params.out_type = loco::DataType::FLOAT32;
+
+  ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp
index c4bc3c57c..2fe2c5471 100644
--- a/compiler/luci-interpreter/src/kernels/Slice.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.cpp
@@ -16,7 +16,7 @@
 
 #include "kernels/Slice.h"
 #include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSlice.h"
 
 #include <cassert>
 #include <cstring>
@@ -29,7 +29,7 @@ namespace kernels
 const int max_dim = 4;
 
 Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
-    : Kernel({input, begin, size}, {output})
+  : Kernel({input, begin, size}, {output})
 {
 }
 
@@ -131,14 +131,18 @@ void Slice::execute() const
   switch (input()->element_type())
   {
     case DataType::FLOAT32:
-      tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
-                                   getTensorData<float>(input()), getTensorShape(output()),
-                                   getTensorData<float>(output()));
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
       break;
     case DataType::U8:
-      tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
-                                   getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                   getTensorData<uint8_t>(output()));
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                  getTensorData<uint8_t>(output()));
+      break;
+    case DataType::S8:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<int8_t>(input()), getTensorShape(output()),
+                                  getTensorData<int8_t>(output()));
       break;
     default:
       throw std::runtime_error("Unsupported input type.");
diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
index a360a29cc..517982990 100644
--- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Slice.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -30,11 +31,13 @@ template <typename T> class SliceTest : public ::testing::Test
 {
 };
 
-using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SliceTest, DataTypes);
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SliceTest, DataTypes);
 
 TYPED_TEST(SliceTest, SimpleTest)
 {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
   std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
   Shape input_shape{3, 2, 3, 1};
   std::vector<int32_t> begin_data{1, 0, 0, 0};
@@ -44,14 +47,17 @@ TYPED_TEST(SliceTest, SimpleTest)
   std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5};
   std::vector<int32_t> output_shape{2, 1, 3, 1};
 
-  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
-  Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
-  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
 
   Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
 
   Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-interpreter/src/kernels/Softmax.cpp
index 2fb7f3f2c..c230aaa70 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.cpp
@@ -19,6 +19,7 @@
 #include "kernels/Utils.h"
 
 #include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include "PALSoftmax.h"
 
 #include <stdexcept>
 
@@ -29,13 +30,23 @@ namespace kernels
 {
 
 Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params)
-    : KernelWithParams<SoftmaxParams>({input}, {output}, params)
+  : KernelWithParams<SoftmaxParams>({input}, {output}, params)
 {
 }
 
 void Softmax::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0);
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 ||
+                           output()->zero_point() == std::numeric_limits<int8_t>::min());
+    tflite::SoftmaxParams op_params{};
+    op_params.table = _table;
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
+  }
   output()->resize(input()->shape());
 }
 
@@ -46,6 +57,12 @@ void Softmax::execute() const
     case DataType::FLOAT32:
       evalFloat();
       break;
+    case DataType::S8:
+      evalQuantized<int8_t>();
+      break;
+    case DataType::U8:
+      evalQuantized<uint8_t>();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -53,12 +70,23 @@ void Softmax::execute() const
 
 void Softmax::evalFloat() const
 {
-  tflite::SoftmaxParams params{};
-  params.beta = _params.beta;
+  tflite::SoftmaxParams op_params{};
+  op_params.beta = params().beta;
 
-  tflite::reference_ops::Softmax(params, getTensorShape(input()), getTensorData<float>(input()),
+  tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()),
                                  getTensorShape(output()), getTensorData<float>(output()));
 }
 
+template <typename T> void Softmax::evalQuantized() const
+{
+  tflite::SoftmaxParams op_params{};
+  op_params.table = const_cast<float *>(_table);
+  op_params.zero_point = output()->zero_point();
+  op_params.scale = output()->scale();
+  luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta);
+  luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
+                                getTensorShape(output()), getTensorData<T>(output()));
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-interpreter/src/kernels/Softmax.h
index 2e4eda492..1f281df1c 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.h
+++ b/compiler/luci-interpreter/src/kernels/Softmax.h
@@ -38,6 +38,9 @@ public:
 
 private:
   void evalFloat() const;
+  template <typename T> void evalQuantized() const;
+
+  float _table[256];
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
index 2193c3e83..08e70672d 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Softmax.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -26,33 +27,89 @@ namespace
 
 using namespace testing;
 
-TEST(SoftmaxTest, Float)
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Shape input_shape{2, 1, 2, 3};
-  std::vector<float> input_data{
-      5,  -9, 8,  //
-      -7, 2,  -4, //
-      1,  -2, 9,  //
-      3,  -6, -1, //
-  };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
 
   SoftmaxParams params{};
   params.beta = 0.1;
 
   Softmax kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  std::vector<float> ref_output_data{
-      0.38514, 0.09497, 0.51989, //
-      0.20792, 0.51141, 0.28067, //
-      0.25212, 0.18678, 0.56110, //
-      0.48149, 0.19576, 0.32275, //
-  };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+                          std::max<float>(std::max<float>(input_data), 0.f));
+  std::pair<float, int32_t> output_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+                          std::max<float>(std::max<float>(output_data), 0.f));
+  Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+                                                             input_quant_param.second, input_data,
+                                                             memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class SoftmaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
+
+TYPED_TEST(SoftmaxTest, Simple)
+{
+  Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
+                   {
+                     5, -9, 8,  //
+                     -7, 2, -4, //
+                     1, -2, 9,  //
+                     3, -6, -1, //
+                   },
+                   {
+                     0.38514, 0.09497, 0.51989, //
+                     0.20792, 0.51141, 0.28067, //
+                     0.25212, 0.18678, 0.56110, //
+                     0.48149, 0.19576, 0.32275, //
+                   });
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp
new file mode 100644
index 000000000..630cd38c4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include "PALSpaceToBatchND.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+                               const Tensor *paddings, Tensor *output)
+  : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *paddings_data = paddings()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    int final_dim_size =
+      (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+    LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+    output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+    output_batch_size = output_batch_size * block_shape_data[i];
+  }
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+  switch (input()->element_type())
+  {
+    tflite::SpaceToBatchParams op_params;
+    case DataType::FLOAT32:
+      op_params.output_offset = 0;
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<float>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      op_params.output_offset = output()->zero_point();
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h
new file mode 100644
index 000000000..0893003bb
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToBatchND : public Kernel
+{
+public:
+  SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *paddings() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
new file mode 100644
index 000000000..3a8b0a812
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> paddings_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape,
+  std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
+  std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class SpaceToBatchNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes);
+
+TYPED_TEST(SpaceToBatchNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2},
+                   /*paddings_shape=*/{2, 2},
+                   /*output_shape=*/{6, 2, 2, 1},
+                   /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0},
+                   /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0},
+                   /*output_data=*/{0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -1.0, 0, -0.7,
+                                    0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 1.0});
+}
+
+TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
index 6a5bd7cf8..7c29e8cb0 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -16,7 +16,7 @@
 
 #include "SpaceToDepth.h"
 #include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSpaceToDepth.h"
 
 namespace luci_interpreter
 {
@@ -24,7 +24,7 @@ namespace kernels
 {
 
 SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
-    : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+  : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
 {
 }
 
@@ -61,14 +61,14 @@ void SpaceToDepth::execute() const
   switch (input()->element_type())
   {
     case DataType::FLOAT32:
-      tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()),
-                                          getTensorData<float>(input()), getTensorShape(output()),
-                                          getTensorData<float>(output()));
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
       break;
     case DataType::U8:
-      tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()),
-                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                          getTensorData<uint8_t>(output()));
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
       break;
     default:
       throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
index e4a0fd642..4af488618 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/SpaceToDepth.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -31,23 +32,27 @@ template <typename T> class SpaceToDepthTest : public ::testing::Test
 };
 
 using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SpaceToDepthTest, DataTypes);
+TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes);
 
 TYPED_TEST(SpaceToDepthTest, SimpleCase)
 {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<TypeParam>();
   std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
   Shape input_shape{1, 2, 2, 2};
-  Tensor input_tensor{getElementType<TypeParam>(), input_shape, {{}, {}}, ""};
-  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(TypeParam));
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
   std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
   std::vector<int32_t> output_shape{1, 1, 1, 8};
-  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+  Tensor output_tensor = makeOutputTensor(element_type);
 
   SpaceToDepthParams params{};
   params.block_size = 2;
 
   SpaceToDepth kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-interpreter/src/kernels/Split.cpp
index 325b1c22f..1a563f307 100644
--- a/compiler/luci-interpreter/src/kernels/Split.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.cpp
@@ -18,7 +18,7 @@
 
 #include "Utils.h"
 
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSplit.h"
 
 namespace luci_interpreter
 {
@@ -26,7 +26,7 @@ namespace kernels
 {
 
 Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
-    : Kernel({axis, input}, std::move(outputs))
+  : Kernel({axis, input}, std::move(outputs))
 {
 }
 
@@ -56,11 +56,11 @@ void Split::execute() const
   params.num_split = _outputs.size();
   params.axis = _axis_value;
 
-#define TF_LITE_SPLIT(scalar)                                                                     \
-  {                                                                                               \
-    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
-    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
-                                 all_outputs.shapes(), all_outputs.data());                       \
+#define TF_LITE_SPLIT(scalar)                                                                    \
+  {                                                                                              \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                        \
+    luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                all_outputs.shapes(), all_outputs.data());                       \
   }
 
   switch (input()->element_type())
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp
index 11d0b1ea9..283cd9aa9 100644
--- a/compiler/luci-interpreter/src/kernels/Split.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/Split.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -30,11 +31,14 @@ using namespace testing;
 template <typename T>
 void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
            std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
-           std::vector<std::vector<T>> output_data, DataType element_type)
+           std::vector<std::vector<T>> output_data)
 {
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis});
-  Tensor input_tensor{element_type, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
 
   std::vector<Tensor> output_tensors;
   output_tensors.reserve(num_splits);
@@ -51,6 +55,10 @@ void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
 
   Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs));
   kernel.configure();
+  for (int i = 0; i < num_splits; ++i)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
   kernel.execute();
 
   for (int i = 0; i < num_splits; ++i)
@@ -65,60 +73,55 @@ template <typename T> class SplitTest : public ::testing::Test
 };
 
 using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SplitTest, DataTypes);
+TYPED_TEST_SUITE(SplitTest, DataTypes);
 
 TYPED_TEST(SplitTest, FourDimensional)
 {
   Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
                    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
                    {
-                       {1, 2, 3, 4, 5, 6, 7, 8},        //
-                       {9, 10, 11, 12, 13, 14, 15, 16}, //
-                   },
-                   getElementType<TypeParam>());
+                     {1, 2, 3, 4, 5, 6, 7, 8},        //
+                     {9, 10, 11, 12, 13, 14, 15, 16}, //
+                   });
   Check<TypeParam>(
-      /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      {
-          {1, 2, 3, 4, 9, 10, 11, 12},  //
-          {5, 6, 7, 8, 13, 14, 15, 16}, //
-      },
-      getElementType<TypeParam>());
+    /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 3, 4, 9, 10, 11, 12},  //
+      {5, 6, 7, 8, 13, 14, 15, 16}, //
+    });
   Check<TypeParam>(
-      /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      {
-          {1, 2, 5, 6, 9, 10, 13, 14},  //
-          {3, 4, 7, 8, 11, 12, 15, 16}, //
-      },
-      getElementType<TypeParam>());
+    /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 5, 6, 9, 10, 13, 14},  //
+      {3, 4, 7, 8, 11, 12, 15, 16}, //
+    });
   Check<TypeParam>(
-      /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      {
-          {1, 3, 5, 7, 9, 11, 13, 15},  //
-          {2, 4, 6, 8, 10, 12, 14, 16}, //
-      },
-      getElementType<TypeParam>());
+    /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 3, 5, 7, 9, 11, 13, 15},  //
+      {2, 4, 6, 8, 10, 12, 14, 16}, //
+    });
 }
 
 TYPED_TEST(SplitTest, OneDimensional)
 {
   Check<TypeParam>(
-      /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
-      {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}, getElementType<TypeParam>());
+    /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
+    {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
 }
 
 TYPED_TEST(SplitTest, NegativeAxis)
 {
   Check<TypeParam>(
-      /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      {
-          {1, 2, 3, 4, 5, 6, 7, 8}, //
-          {9, 10, 11, 12, 13, 14, 15, 16},
-      },
-      getElementType<TypeParam>());
+    /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 3, 4, 5, 6, 7, 8}, //
+      {9, 10, 11, 12, 13, 14, 15, 16},
+    });
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp
new file mode 100644
index 000000000..aa6820889
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitV.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+               std::vector<Tensor *> outputs)
+  : Kernel({input, size_splits, axis}, std::move(outputs))
+{
+}
+
+void SplitV::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  auto num_split = static_cast<int32_t>(_outputs.size());
+  auto sizes_data = getTensorData<int32_t>(size_splits());
+
+  assert(size_splits()->shape().num_dims() == 1);
+
+  int32_t sum = 0;
+  const auto num_dims_size_spits = size_splits()->shape().dim(0);
+  int32_t count_neg_dim = 0;
+
+  for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
+  {
+    if (sizes_data[i] != -1)
+    {
+      sum += sizes_data[i];
+    }
+    else
+    {
+      count_neg_dim++;
+    }
+  }
+  assert(count_neg_dim < 2);
+  assert(size_splits()->shape().num_elements() == num_split);
+
+  auto output_shape = input()->shape();
+  for (int32_t i = 0; i < num_split; ++i)
+  {
+    if (sizes_data[i] == -1)
+    {
+      output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
+    }
+    else
+    {
+      output_shape.dim(_axis_value) = sizes_data[i];
+    }
+    _outputs[i]->resize(output_shape);
+  }
+}
+
+void SplitV::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                     \
+  {                                                                                               \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
+    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                 all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    case DataType::S16:
+      TF_LITE_SPLIT(int16_t);
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-interpreter/src/kernels/SplitV.h
new file mode 100644
index 000000000..92f6288fb
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SplitV : public Kernel
+{
+public:
+  SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+         std::vector<Tensor *> outputs);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size_splits() const { return _inputs[1]; }
+  const Tensor *axis() const { return _inputs[2]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
new file mode 100644
index 000000000..035bc2122
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SplitV.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, std::initializer_list<int32_t> splits_size,
+           std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data,
+           std::vector<std::vector<T>> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+
+  auto num_splits = static_cast<int32_t>(splits_size.size());
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor sizes_tensor =
+    makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensors.emplace_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs));
+  kernel.configure();
+  for (int i = 0; i < num_splits; ++i)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_splits; ++i)
+  {
+    auto tmp = extractTensorData<T>(output_tensors[i]);
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(output_data[i]));
+  }
+}
+
+template <typename T> class SplitVTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(SplitVTest, DataTypes);
+
+TYPED_TEST(SplitVTest, ThreeDimensional)
+{
+  Check<TypeParam>(
+    /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 2, 3, 4, 5, 6, 7, 8, 9},                                             //
+      {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} //
+    });
+  Check<TypeParam>(
+    /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 2, 3, 10, 11, 12, 19, 20, 21},                                 //
+      {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} //
+    });
+  Check<TypeParam>(
+    /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 4, 7, 10, 13, 16, 19, 22, 25},                                 //
+      {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} //
+    });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
index cdd208280..96835fbfc 100644
--- a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Sqrt.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -29,52 +30,58 @@ using namespace testing;
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
 
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Sqrt kernel(&input_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
 TEST(SqrtTest, SimpleSqrt)
 {
   Check(
-      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
-      /*input_data=*/
-      {
-          0, 8, 2, 4,    //
-          3, 7, 10, 0.3, //
-      },
-      /*output_data=*/
-      {
-          0.0, 2.8284271, 1.4142136, 2,                //
-          1.7320508, 2.6457513, 3.1622777, 0.54772256, //
-      });
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, 8, 2, 4,    //
+      3, 7, 10, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.0, 2.8284271, 1.4142136, 2,                //
+      1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+    });
 }
 
 TEST(SqrtTest, Input_Output_Type_NEG)
 {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::S32);
 
   Sqrt kernel(&input_tensor, &output_tensor);
   EXPECT_ANY_THROW(kernel.configure());
 }
 
-TEST(AddTest, Invalid_Input_Type_NEG)
+TEST(SqrtTest, Invalid_Input_Type_NEG)
 {
-  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::S64);
 
   Sqrt kernel(&input_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   EXPECT_ANY_THROW(kernel.execute());
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Square.cpp b/compiler/luci-interpreter/src/kernels/Square.cpp
new file mode 100644
index 000000000..bc71905c1
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Square.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Square::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Square::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Square::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = (*i) * (*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Square.h b/compiler/luci-interpreter/src/kernels/Square.h
new file mode 100644
index 000000000..73ed5a707
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Square.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUARE_H
+#define LUCI_INTERPRETER_KERNELS_SQUARE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Square : public Kernel
+{
+public:
+  Square(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUARE_H
diff --git a/compiler/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-interpreter/src/kernels/Square.test.cpp
new file mode 100644
index 000000000..51662dea7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Square.test.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquareTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Square kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp
new file mode 100644
index 000000000..3bafeba4a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalSquaredDifference<float>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+                          const T difference = x - y;
+                          return difference * difference;
+                        });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.h b/compiler/luci-interpreter/src/kernels/SquaredDifference.h
new file mode 100644
index 000000000..9327caf93
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SquaredDifference : public Kernel
+{
+public:
+  SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalSquaredDifference() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp
new file mode 100644
index 000000000..2819c01e2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquaredDifferenceTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(SquaredDifferenceTest, FloatBroadcast)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape1{3, 1, 2};
+  Shape input_shape2{1};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{1.0};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.cpp
index ce43ef789..4a75518c7 100644
--- a/compiler/luci-interpreter/src/kernels/Squeeze.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.cpp
@@ -27,7 +27,7 @@ namespace kernels
 {
 
 Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
-    : KernelWithParams<SqueezeParams>({input}, {output}, params)
+  : KernelWithParams<SqueezeParams>({input}, {output}, params)
 {
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
index 3a34284dd..1bc0b6459 100644
--- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Squeeze.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -29,20 +30,21 @@ using namespace testing;
 template <typename T>
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<T> input_data, std::initializer_list<T> output_data,
-           DataType element_type, std::vector<int32_t> squeeze_dims)
+           std::initializer_list<int32_t> squeeze_dims)
 {
-  Tensor input_tensor{element_type, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(element_type);
 
   SqueezeParams params{};
-  for (size_t i = 0; i < squeeze_dims.size(); i++)
-  {
-    params.squeeze_dims.push_back(squeeze_dims.at(i));
-  }
+  params.squeeze_dims = squeeze_dims;
 
   Squeeze kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -54,17 +56,17 @@ template <typename T> class SqueezeTest : public ::testing::Test
 };
 
 using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SqueezeTest, DataTypes);
+TYPED_TEST_SUITE(SqueezeTest, DataTypes);
 
 TYPED_TEST(SqueezeTest, TotalTest)
 {
   Check<TypeParam>(
-      /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
-      /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-                      13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
-      /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
-      getElementType<TypeParam>(), {-1, 0});
+    /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                     13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    {-1, 0});
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
index 679485439..a8730d861 100644
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -19,7 +19,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
 
 #include <stdexcept>
 
@@ -31,7 +31,7 @@ namespace kernels
 
 StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
                            const Tensor *strides, Tensor *output, const StridedSliceParams &params)
-    : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+  : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
 {
 }
 
@@ -82,7 +82,7 @@ void StridedSlice::configure()
     assert(stride != 0);
     int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
     int32_t end =
-        ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+      ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
 
     const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
     if (shrink_axis)
@@ -136,6 +136,11 @@ void StridedSlice::execute() const
                                           getTensorData<uint8_t>(input()), getTensorShape(output()),
                                           getTensorData<uint8_t>(output()));
       break;
+    case DataType::S32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<int32_t>(input()), getTensorShape(output()),
+                                          getTensorData<int32_t>(output()));
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
index 5ab06e2ec..399cdebed 100644
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/StridedSlice.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -28,6 +29,8 @@ using namespace testing;
 
 TEST(StridedSliceTest, Float)
 {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
   Shape input_shape{2, 3, 2};
   std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
   Shape begin_shape{3};
@@ -36,17 +39,15 @@ TEST(StridedSliceTest, Float)
   std::vector<int32_t> end_data{1, 3, 2};
   Shape strides_shape{3};
   std::vector<int32_t> strides_data{1, 1, 1};
-  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
-  Tensor begin_tensor{DataType::S32, begin_shape, {}, ""};
-  Tensor end_tensor{DataType::S32, end_shape, {}, ""};
-  Tensor strides_tensor{DataType::S32, strides_shape, {}, ""};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+  Tensor strides_tensor =
+    makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
-  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(float));
-  begin_tensor.writeData(begin_data.data(), begin_data.size() * sizeof(int32_t));
-  end_tensor.writeData(end_data.data(), end_data.size() * sizeof(int32_t));
-  strides_tensor.writeData(strides_data.data(), strides_data.size() * sizeof(int32_t));
-
   StridedSliceParams params{};
   params.begin_mask = 0;
   params.end_mask = 0;
@@ -57,37 +58,36 @@ TEST(StridedSliceTest, Float)
   StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
                       params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<int32_t> output_shape{3, 2};
   std::vector<float> output_data{1, 2, 3, 4, 5, 6};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
 TEST(StridedSliceTest, Uint8)
 {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
   Shape input_shape{2, 3, 2};
   std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-  std::vector<uint8_t> quant_input_data = quantize<uint8_t>(input_data, 1.0f, 0);
   Shape begin_shape{3};
   std::vector<int32_t> begin_data{0, 0, 0};
   Shape end_shape{3};
   std::vector<int32_t> end_data{1, 3, 2};
   Shape strides_shape{3};
   std::vector<int32_t> strides_data{1, 1, 1};
-  Tensor input_tensor{DataType::U8, input_shape, {{1.0f}, {0}}, ""};
-  Tensor begin_tensor{DataType::S32, begin_shape, {}, ""};
-  Tensor end_tensor{DataType::S32, end_shape, {}, ""};
-  Tensor strides_tensor{DataType::S32, strides_shape, {}, ""};
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+  Tensor strides_tensor =
+    makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0);
 
-  input_tensor.writeData(quant_input_data.data(), quant_input_data.size() * sizeof(uint8_t));
-  begin_tensor.writeData(begin_data.data(), begin_data.size() * sizeof(int32_t));
-  end_tensor.writeData(end_data.data(), end_data.size() * sizeof(int32_t));
-  strides_tensor.writeData(strides_data.data(), strides_data.size() * sizeof(int32_t));
-
   StridedSliceParams params{};
   params.begin_mask = 0;
   params.end_mask = 0;
@@ -98,13 +98,12 @@ TEST(StridedSliceTest, Uint8)
   StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
                       params);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<int32_t> output_shape{3, 2};
   std::vector<float> output_data{1, 2, 3, 4, 5, 6};
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp
new file mode 100644
index 000000000..1fd583c62
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sub.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/Utils.h"
+
+#include "PALSub.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
+  : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+{
+}
+
+void Sub::configure()
+{
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Sub::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sub::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Sub::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Sub::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastQuantSubSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sub.h b/compiler/luci-interpreter/src/kernels/Sub.h
new file mode 100644
index 000000000..23952b3bd
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sub.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SUB_H
+#define LUCI_INTERPRETER_KERNELS_SUB_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sub : public KernelWithParams<SubParams>
+{
+public:
+  Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SUB_H
diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
new file mode 100644
index 000000000..9abafd49a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+using std::pair;
+using std::vector;
+using std::transform;
+using std::initializer_list;
+
+class SubTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+  float kQuantizedStep = (max - min) / 255.0;
+  return kQuantizedStep;
+}
+
+TEST_F(SubTest, Uint8)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  vector<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                             1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  vector<Shape> test_shapes = {{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  vector<vector<float>> output_data = {
+    {-0.5f, 2.0f,  0.1f,  1.8f,  -1.3f, 1.4f,  0.7f, 0.2f,  1.3f, 0.0f,  -0.1f, -0.4f,
+     0.6f,  -1.4f, 1.2f,  -1.6f, -0.2f, -2.0f, 1.0f, 2.5f,  1.6f, 2.3f,  0.2f,  1.9f,
+     -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
+    {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
+    {-0.5f, 2.1f,  -0.6f, 2.0f,  0.1f,  2.7f,  0.7f, 0.3f,  0.6f,  0.2f,  1.3f,  0.9f,
+     0.6f,  -1.3f, 0.5f,  -1.4f, 1.2f,  -0.7f, 0.7f, 2.3f,  0.2f,  1.8f,  0.3f,  1.9f,
+     -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
+    {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
+
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+  for (size_t i = 0; i < output_data.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    SubParams params{};
+    params.activation = Activation::NONE;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+
+  // Inversion step for output_data, because subtract is not commutative operation
+  auto multiply = [](auto &i) {
+    transform(i.begin(), i.end(), i.begin(), [](auto &value) { return value * -1.0f; });
+  };
+  for_each(output_data.begin(), output_data.end(), multiply);
+
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < output_data.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    SubParams params{};
+    params.activation = Activation::NONE;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+TEST_F(SubTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  vector<vector<float>> test_outputs = {
+    {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
+     0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+     0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
+
+  vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                            1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    SubParams params{};
+    params.activation = Activation::RELU;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<dtype>> test_outputs = {
+    {0, 1, 2, 3, 0, 0, 0, 0, 4,  1, 0, 0, 0, 0, 7,  0, 3, 0,
+     0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0},
+    {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0},
+    {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0,
+     2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0},
+    {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}};
+  std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+  std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    SubParams params{};
+    params.activation = Activation::RELU;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+};
+
+TEST_F(SubTest, SInt32)
+{
+  CheckInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(SubTest, SInt64)
+{
+  CheckInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(SubTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(SubTest, Mismatching_Input_Int_Types_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  SubParams params{};
+  params.activation = Activation::NONE;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sum.cpp b/compiler/luci-interpreter/src/kernels/Sum.cpp
new file mode 100644
index 000000000..645f02c36
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sum.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sum.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+Sum::Sum(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+         Tensor *resolved_axes, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params)
+{
+}
+
+void Sum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  LUCI_INTERPRETER_CHECK(num_axes <= 4);
+
+  // We compute shapes of outputs in configure, assuming that outputs have
+  // static shape
+  // TODO Support dynamic shape
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  temp_index->resize(Shape(input_num_dims));
+  resolved_axes->resize(Shape(num_axes));
+}
+
+void Sum::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sum::evalFloat() const
+{
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  int num_resolved_axis = 0;
+  LUCI_INTERPRETER_CHECK(
+    tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes,
+                                       getTensorData<int>(resolved_axes), &num_resolved_axis));
+
+  float init_value = 0.0;
+  tflite::reference_ops::ReduceGeneric<float>(
+    getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+    getTensorData<float>(output()), getTensorShape(output()).DimsData(),
+    output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims,
+    getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value,
+    [](const float current, const float in) -> float { return current + in; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sum.h b/compiler/luci-interpreter/src/kernels/Sum.h
new file mode 100644
index 000000000..290e0dafa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sum.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SUM_H
+#define LUCI_INTERPRETER_KERNELS_SUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sum : public KernelWithParams<ReducerParams>
+{
+public:
+  // TODO Add temp_sum to support quantized kernels
+  Sum(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+      Tensor *resolved_axes, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SUM_H
diff --git a/compiler/luci-interpreter/src/kernels/Sum.test.cpp b/compiler/luci-interpreter/src/kernels/Sum.test.cpp
new file mode 100644
index 000000000..e2dc3012b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sum.test.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SumTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Sum kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{144, 156};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SumTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Sum kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{84, 100, 116};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SumTest, Input_Output_Type_NEG)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Sum kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SumTest, Invalid_Axes_Type_NEG)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int64_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S64>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Sum kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp
index b649d5d2f..d47a0bde9 100644
--- a/compiler/luci-interpreter/src/kernels/Tanh.cpp
+++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp
@@ -17,8 +17,9 @@
 #include "kernels/Tanh.h"
 
 #include "kernels/Utils.h"
+#include <limits> // std::numeric_limits
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/tanh.h>
 
 namespace luci_interpreter
 {
@@ -29,7 +30,7 @@ Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
 
 void Tanh::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   if (input()->element_type() == DataType::U8)
   {
     populateLookupTable();
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
index 392b8672d..bfae479a9 100644
--- a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/Tanh.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -27,29 +28,38 @@ namespace
 
 using namespace testing;
 
-TEST(TanhTest, Float)
+class TanhTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(TanhTest, Float)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
-      0, -6, 2,  4, //
-      3, -2, 10, 1, //
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
   };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Tanh kernel(&input_tensor, &output_tensor);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      0,          -0.9999877, 0.9640275, 0.999329,  //
-      0.99505475, -0.9640275, 1,         0.7615941, //
+    0,          -0.9999877, 0.9640275, 0.999329,  //
+    0.99505475, -0.9640275, 1,         0.7615941, //
   };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
 }
 
-TEST(TanhTest, Uint8)
+TEST_F(TanhTest, Uint8)
 {
   float kMin = -1;
   float kMax = 127.f / 128.f;
@@ -57,52 +67,98 @@ TEST(TanhTest, Uint8)
   std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
   std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
   std::vector<float> input_data{
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
-      0,  -6, 2, 4, //
-      -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
   };
-  Tensor input_tensor{
-      DataType::U8, {2, 6, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
   Tensor output_tensor =
-      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, input_quant_param.first, input_quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
 
   Tanh kernel(&input_tensor, &output_tensor);
   kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   std::vector<float> ref_output_data{
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
-      0.0,       -0.999987, 0.964027, 0.999329, //
-      -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
   };
   std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                                  output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data, kTanhTolerance)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
+TEST_F(TanhTest, InputTypeInvalid_NEG)
+{
+  std::vector<int64_t> input_data{
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(TanhTest, InputOutputMismatch_NEG)
+{
+  std::vector<float> input_data{
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
index 2c8a6ae78..4d983adda 100644
--- a/compiler/luci-interpreter/src/kernels/TestUtils.cpp
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
@@ -17,6 +17,8 @@
 
 #include "kernels/TestUtils.h"
 
+#include <stdexcept>
+
 namespace luci_interpreter
 {
 namespace kernels
@@ -34,7 +36,72 @@ Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
   return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
 }
 
-std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float> &values, float max_abs_error)
+std::vector<float> dequantizeTensorData(const Tensor &tensor)
+{
+  if (tensor.element_type() == DataType::U8)
+  {
+    std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
+  if (tensor.element_type() == DataType::S8)
+  {
+    std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
+  else if (tensor.element_type() == DataType::S16)
+  {
+    // S16 quantization is symmetric, so zero point should be zero.
+    for (auto zp : tensor.zero_points())
+    {
+      (void)zp;
+      assert(zp == 0);
+    }
+
+    std::vector<int16_t> data = extractTensorData<int16_t>(tensor);
+    if (tensor.scales().size() == 1)
+    {
+      return dequantize(data.data(), data.size(), tensor.scale(), 0);
+    }
+
+    // quantize_dimension breaks shape into two parts:
+    // inner dimensions that contains continuous data with one quantization type
+    // outer dimensions that contains other dimensions
+    const Shape shape = tensor.shape();
+    const int32_t quantized_dimension = tensor.quantized_dimension();
+    assert(quantized_dimension < shape.num_dims());
+    size_t outer_dims_size = 1;
+    int32_t quant_dim_size = shape.dim(quantized_dimension);
+    size_t inner_dims_size = 1;
+    assert(quant_dim_size == tensor.scales().size());
+
+    for (int i = 0; i < quantized_dimension; ++i)
+      outer_dims_size *= shape.dim(i);
+    for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+      inner_dims_size *= shape.dim(i);
+
+    assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+    std::vector<float> dequantized_data;
+    dequantized_data.reserve(shape.num_elements());
+    for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+      for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+      {
+        float scale = tensor.scales()[channel];
+        size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+        std::vector<float> part_dequantized_data =
+          dequantize(data.data() + offset, inner_dims_size, scale, 0);
+        dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
+                                part_dequantized_data.end());
+      }
+    return dequantized_data;
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+}
+
+Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
 {
   std::vector<Matcher<float>> matchers;
   matchers.reserve(values.size());
@@ -42,7 +109,7 @@ std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float> &values, flo
   {
     matchers.emplace_back(FloatNear(v, max_abs_error));
   }
-  return matchers;
+  return ElementsAreArray(matchers);
 }
 
 std::vector<int32_t> extractTensorShape(const Tensor &tensor)
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-interpreter/src/kernels/TestUtils.h
index 5311a1949..b9c942e9a 100644
--- a/compiler/luci-interpreter/src/kernels/TestUtils.h
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.h
@@ -19,8 +19,10 @@
 #define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
 
 #include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
 
 #include <type_traits>
+#include <limits> // std::numeric_limits
 
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
@@ -32,14 +34,97 @@ namespace kernels
 namespace testing
 {
 
+template <typename T>
+std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point);
+
 template <DataType DT>
-Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data)
+Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data,
+                       IMemoryManager *memory_manager)
 {
   Tensor tensor(DT, shape, {}, "");
+  memory_manager->allocate_memory(tensor);
   tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type));
   return tensor;
 }
 
+/**
+ * @brief Create layer-wise quantized tensor
+ * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
+ * @param shape desired tensor shape
+ * @param scale scale of quantized number
+ * @param zero_point zero point of quantized number, should be 0 for signed datatypes
+ * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
+ * @return created tensor
+ */
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
+                       const std::vector<float> &data, IMemoryManager *memory_manager)
+{
+  using NativeT = typename DataTypeImpl<DT>::Type;
+  Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
+  std::vector<NativeT> quantized_data =
+    quantize<NativeT>(data.data(), data.size(), scale, zero_point);
+  memory_manager->allocate_memory(tensor);
+  tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+  return tensor;
+}
+
+/**
+ * @brief Create channel-wise quantized tensor
+ * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
+ * @param shape desired tensor shape
+ * @param scales scales of quantized number
+ * @param zero_points zero points of quantized number, should be 0 for signed datatypes
+ * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels
+ * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
+ * @return created tensor
+ */
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
+                       const std::vector<int32_t> &zero_points, int quantized_dimension,
+                       const std::vector<float> &data, IMemoryManager *memory_manager)
+{
+  using NativeT = typename DataTypeImpl<DT>::Type;
+  assert(quantized_dimension < shape.num_dims());
+  Tensor tensor(DT, shape, {scales, zero_points, quantized_dimension}, "");
+
+  // quantize_dimension breaks shape into two parts:
+  // inner dimensions that contains continuous data with one quantization type
+  // outer dimensions that contains other dimensions
+  size_t outer_dims_size = 1;
+  int32_t quant_dim_size = shape.dim(quantized_dimension);
+  size_t inner_dims_size = 1;
+  assert(quant_dim_size == scales.size());
+  assert(quant_dim_size == zero_points.size());
+
+  for (int i = 0; i < quantized_dimension; ++i)
+    outer_dims_size *= shape.dim(i);
+  for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+    inner_dims_size *= shape.dim(i);
+
+  assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+  std::vector<NativeT> quantized_data;
+  quantized_data.reserve(shape.num_elements());
+  for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+    for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+    {
+      int32_t zero_point = zero_points[channel];
+      float scale = scales[channel];
+      size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+      std::vector<NativeT> part_quantized_data =
+        quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
+      quantized_data.insert(quantized_data.end(), part_quantized_data.begin(),
+                            part_quantized_data.end());
+    }
+  assert(quantized_data.size() == shape.num_elements());
+  memory_manager->allocate_memory(tensor);
+  tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+  return tensor;
+}
+
 Tensor makeOutputTensor(DataType element_type);
 Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point);
 
@@ -50,12 +135,26 @@ template <typename T> constexpr DataType getElementType()
 {
   if (std::is_same<T, float>::value)
     return DataType::FLOAT32;
+  if (std::is_same<T, double>::value)
+    return DataType::FLOAT64;
   if (std::is_same<T, uint8_t>::value)
     return DataType::U8;
+  if (std::is_same<T, uint16_t>::value)
+    return DataType::U16;
+  if (std::is_same<T, uint32_t>::value)
+    return DataType::U32;
+  if (std::is_same<T, uint64_t>::value)
+    return DataType::U64;
+  if (std::is_same<T, int8_t>::value)
+    return DataType::S8;
+  if (std::is_same<T, int16_t>::value)
+    return DataType::S16;
   if (std::is_same<T, int32_t>::value)
     return DataType::S32;
   if (std::is_same<T, int64_t>::value)
     return DataType::S64;
+  if (std::is_same<T, bool>::value)
+    return DataType::BOOL;
   return DataType::Unknown;
 }
 
@@ -65,47 +164,62 @@ template <typename T> std::vector<T> extractTensorData(const Tensor &tensor)
   return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements());
 }
 
-std::vector<::testing::Matcher<float>> ArrayFloatNear(const std::vector<float> &values,
+std::vector<float> dequantizeTensorData(const Tensor &tensor);
+
+// Array version of `::testing::FloatNear` matcher.
+::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values,
                                                       float max_abs_error = 1.0e-5f);
 
 template <typename T>
-inline std::vector<T> quantize(const std::vector<float> &data, float scale, int32_t zero_point)
+std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point)
 {
-  assert(!std::is_floating_point<T>::value);
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
+
+  float q_min{}, q_max{};
+  if (std::is_signed<T>::value)
+  {
+    q_min = -std::numeric_limits<T>::max();
+    q_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    q_min = 0;
+    q_max = std::numeric_limits<T>::max();
+  }
+
   std::vector<T> q;
-  for (const auto &f : data)
+  for (size_t i = 0; i < num_elements; ++i)
   {
-    q.push_back(static_cast<T>(std::max<float>(
-        std::numeric_limits<T>::lowest(),
-        std::min<float>(std::numeric_limits<T>::max(), std::round(zero_point + (f / scale))))));
+    const auto &f = data[i];
+    q.push_back(static_cast<T>(
+      std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
   }
   return q;
 }
 
 template <typename T>
-inline std::vector<float> dequantize(const std::vector<T> &data, float scale, int32_t zero_point)
+std::vector<float> dequantize(const T *data, size_t num_elements, float scale, int32_t zero_point)
 {
-  assert(!std::is_floating_point<T>::value);
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
   std::vector<float> f;
-  for (const T &q : data)
+  for (size_t i = 0; i < num_elements; ++i)
   {
+    const T &q = data[i];
     f.push_back(scale * (q - zero_point));
   }
   return f;
 }
 
+// NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned).
 template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max)
 {
-  if (std::is_floating_point<T>::value)
-  {
-    return {1.0f, 0};
-  }
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
   int32_t zero_point = 0;
-  double scale = 0;
+  float scale = 0;
   const T qmin = std::numeric_limits<T>::lowest();
   const T qmax = std::numeric_limits<T>::max();
-  const double qmin_double = qmin;
-  const double qmax_double = qmax;
+  const float qmin_double = qmin;
+  const float qmax_double = qmax;
   // 0 should always be a representable value. Let's assume that the initial
   // min,max range contains 0.
   assert(f_max >= 0);
@@ -131,16 +245,16 @@ template <typename T> std::pair<float, int32_t> quantizationParams(float f_min,
   // The arithmetic error on the zero point computed from either pair
   // will be roughly machine_epsilon * (sum of absolute values of terms)
   // so we want to use the variant that adds the smaller terms.
-  const double zero_point_from_min = qmin_double - f_min / scale;
-  const double zero_point_from_max = qmax_double - f_max / scale;
+  const float zero_point_from_min = qmin_double - f_min / scale;
+  const float zero_point_from_max = qmax_double - f_max / scale;
 
-  const double zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
+  const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
 
-  const double zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
+  const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
 
-  const double zero_point_double = zero_point_from_min_error < zero_point_from_max_error
-                                       ? zero_point_from_min
-                                       : zero_point_from_max;
+  const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error
+                                    ? zero_point_from_min
+                                    : zero_point_from_max;
 
   // Now we need to nudge the zero point to be an integer
   // (our zero points are integer, and this is motivated by the requirement
@@ -168,7 +282,7 @@ template <typename T> std::pair<float, int32_t> quantizationParams(float f_min,
   assert(qmin <= nudged_zero_point);
   zero_point = nudged_zero_point;
   // finally, return the values
-  return {static_cast<float>(scale), zero_point};
+  return {scale, zero_point};
 }
 
 inline float getTolerance(float min, float max, int quantize_steps)
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-interpreter/src/kernels/Transpose.cpp
index 8265d9937..802d87295 100644
--- a/compiler/luci-interpreter/src/kernels/Transpose.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.cpp
@@ -18,7 +18,7 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
 
 #include <stdexcept>
 
@@ -29,7 +29,7 @@ namespace kernels
 {
 
 Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
-    : Kernel({input, perm}, {output})
+  : Kernel({input, perm}, {output})
 {
 }
 
@@ -37,7 +37,7 @@ void Transpose::configure()
 {
   // Transpose op only supports 1D-4D input arrays.
   int dims = input()->shape().num_dims();
-  const int *perm_data = getTensorData<int32_t>(perm());
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
 
   assert(input()->shape().num_dims() <= 4);
   assert(input()->element_type() == output()->element_type());
@@ -58,8 +58,8 @@ void Transpose::configure()
 void Transpose::execute() const
 {
   tflite::TransposeParams params{};
-  const int *perm_data = getTensorData<int32_t>(perm());
-  const int size = perm()->shape().dim(0);
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
+  const int32_t size = perm()->shape().dim(0);
   params.perm_count = size;
   for (int i = 0; i < size; i++)
     params.perm[i] = perm_data[i];
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
index 87e6e2a00..43be8f8b9 100644
--- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/Transpose.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -29,18 +30,18 @@ using namespace testing;
 template <typename T>
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> perm_shape,
            std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
-           std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data,
-           DataType element_type)
+           std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data)
 {
-  Tensor input_tensor{element_type, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
-
-  Tensor perm_tensor{DataType::S32, perm_shape, {}, ""};
-  perm_tensor.writeData(perm_data.begin(), perm_data.size() * sizeof(int32_t));
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get());
   Tensor output_tensor = makeOutputTensor(element_type);
 
   Transpose kernel(&input_tensor, &perm_tensor, &output_tensor);
   kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -51,7 +52,7 @@ template <typename T> class TransposeTest : public ::testing::Test
 };
 
 using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(TransposeTest, DataTypes);
+TYPED_TEST_SUITE(TransposeTest, DataTypes);
 
 TYPED_TEST(TransposeTest, Small3D)
 {
@@ -60,56 +61,53 @@ TYPED_TEST(TransposeTest, Small3D)
                                    12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
                    /*perm_data=*/{2, 0, 1},
                    /*output_data=*/{0, 4, 8,  12, 16, 20, 1, 5, 9,  13, 17, 21,
-                                    2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23},
-                   getElementType<TypeParam>());
+                                    2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23});
 }
 
 TYPED_TEST(TransposeTest, Large4D)
 {
   Check<TypeParam>(
-      /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
-      /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
-                      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
-                      30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
-                      45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
-                      60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
-                      75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
-                      90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
-                      105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
-      /*perm_data=*/{2, 0, 1, 3},
-      /*output_data=*/{0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
-                       60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
-                       5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
-                       65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
-                       10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
-                       70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
-                       15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
-                       75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119},
-      getElementType<TypeParam>());
+    /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
+    /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                    30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                    45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                    60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                    90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+    /*perm_data=*/{2, 0, 1, 3},
+    /*output_data=*/{0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+                     60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+                     5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+                     65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+                     10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+                     70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+                     15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+                     75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
 }
 
 TYPED_TEST(TransposeTest, Large2D)
 {
   Check<TypeParam>(
-      /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
-      /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
-                      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
-                      30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
-                      45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
-                      60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
-                      75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
-                      90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
-                      105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
-      /*perm_data=*/{1, 0},
-      /*output_data=*/{0,  12, 24, 36,  48,  60, 72, 84, 96,  108, 1,  13, 25, 37,  49,
-                       61, 73, 85, 97,  109, 2,  14, 26, 38,  50,  62, 74, 86, 98,  110,
-                       3,  15, 27, 39,  51,  63, 75, 87, 99,  111, 4,  16, 28, 40,  52,
-                       64, 76, 88, 100, 112, 5,  17, 29, 41,  53,  65, 77, 89, 101, 113,
-                       6,  18, 30, 42,  54,  66, 78, 90, 102, 114, 7,  19, 31, 43,  55,
-                       67, 79, 91, 103, 115, 8,  20, 32, 44,  56,  68, 80, 92, 104, 116,
-                       9,  21, 33, 45,  57,  69, 81, 93, 105, 117, 10, 22, 34, 46,  58,
-                       70, 82, 94, 106, 118, 11, 23, 35, 47,  59,  71, 83, 95, 107, 119},
-      getElementType<TypeParam>());
+    /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
+    /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                    30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                    45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                    60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                    90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+    /*perm_data=*/{1, 0},
+    /*output_data=*/{0,  12, 24, 36,  48,  60, 72, 84, 96,  108, 1,  13, 25, 37,  49,
+                     61, 73, 85, 97,  109, 2,  14, 26, 38,  50,  62, 74, 86, 98,  110,
+                     3,  15, 27, 39,  51,  63, 75, 87, 99,  111, 4,  16, 28, 40,  52,
+                     64, 76, 88, 100, 112, 5,  17, 29, 41,  53,  65, 77, 89, 101, 113,
+                     6,  18, 30, 42,  54,  66, 78, 90, 102, 114, 7,  19, 31, 43,  55,
+                     67, 79, 91, 103, 115, 8,  20, 32, 44,  56,  68, 80, 92, 104, 116,
+                     9,  21, 33, 45,  57,  69, 81, 93, 105, 117, 10, 22, 34, 46,  58,
+                     70, 82, 94, 106, 118, 11, 23, 35, 47,  59,  71, 83, 95, 107, 119});
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
index 898bae3da..08bfbf319 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -19,9 +19,10 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
 
 #include <stdexcept>
+#include <limits> // std::numeric_limits
 
 namespace luci_interpreter
 {
@@ -30,31 +31,27 @@ namespace kernels
 {
 
 TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                             const Tensor *bias, Tensor *output, const TransposeConvParams &params)
-    : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
+                             const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                             const TransposeConvParams &params)
+  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+                                          {output, scratch_tensor}, params)
 {
 }
 
+TransposeConv::~TransposeConv()
+{
+  // Define destructor here, to delete vector of qunatized multipliers properly
+}
+
 void TransposeConv::configure()
 {
   assert(output_shape()->shape().num_dims() == 1);
   assert(input()->shape().num_dims() == 4);
   assert(filter()->shape().num_dims() == 4);
-  assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8);
+  assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
+         input()->element_type() == DataType::S16);
   assert(input()->element_type() == output()->element_type());
   assert(input()->shape().dim(3) == filter()->shape().dim(3));
-  if (input()->element_type() == DataType::U8)
-  {
-    _scratch_tensor =
-        std::make_unique<Tensor>(DataType::S32, output()->shape(), AffineQuantization{}, "");
-    double real_multiplier = 0.0;
-    const double input_product_scale = input()->scale() * filter()->scale();
-    assert(input_product_scale >= 0);
-    real_multiplier = input_product_scale / output()->scale();
-    int exponent;
-    quantizeMultiplier(real_multiplier, &_output_multiplier, &exponent);
-    _output_shift = -exponent;
-  }
 
   const int num_dims = output_shape()->shape().dim(0);
   Shape out_shape(num_dims);
@@ -62,6 +59,36 @@ void TransposeConv::configure()
   for (int i = 0; i < num_dims; i++)
     out_shape.dim(i) = shape_data[i];
   output()->resize(out_shape);
+
+  const int32_t filter_height = filter()->shape().dim(1);
+  const int32_t filter_width = filter()->shape().dim(2);
+  const int32_t output_height = out_shape.dim(1);
+  const int32_t output_width = out_shape.dim(2);
+
+  const int32_t unused_output_height =
+    computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+  const int32_t unused_output_width =
+    computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+
+  _padding_height =
+    computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+  _padding_width =
+    computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->resize(output()->shape());
+    const std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+    _quant_multipliers = quantizeMultipliers(real_multipliers);
+  }
+  else
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->set_allocatable(false);
+  }
 }
 
 void TransposeConv::execute() const
@@ -72,7 +99,20 @@ void TransposeConv::execute() const
       evalFloat();
       break;
     case DataType::U8:
-      evalQuantized();
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(0)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
       break;
     default:
       throw std::runtime_error("Unsupported type.");
@@ -81,74 +121,231 @@ void TransposeConv::execute() const
 
 void TransposeConv::evalFloat() const
 {
-  const int width = output()->shape().dim(2);
-  const int height = output()->shape().dim(1);
-
-  const int filter_width = filter()->shape().dim(2);
-  const int filter_height = filter()->shape().dim(1);
-
-  int unused_output_height, unused_output_width;
-  unused_output_width =
-      computeOutputSize(params().padding, width, filter_width, params().stride_width, 1);
-  unused_output_height =
-      computeOutputSize(params().padding, height, filter_height, params().stride_height, 1);
-  int32_t offset = 0;
   tflite::ConvParams op_params{};
   op_params.padding_type = tflite::PaddingType::kSame;
-  op_params.padding_values.height = computePaddingWithOffset(
-      params().stride_height, 1, height, filter_height, unused_output_height, &offset);
-  op_params.padding_values.height_offset = offset;
-  op_params.padding_values.width = computePaddingWithOffset(
-      params().stride_width, 1, width, filter_width, unused_output_width, &offset);
-  op_params.padding_values.width_offset = offset;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
   op_params.stride_height = params().stride_height;
   op_params.stride_width = params().stride_width;
-  op_params.output_multiplier = _output_multiplier;
-  tflite::reference_ops::TransposeConv(
-      op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-      getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(),
-      (float *)nullptr);
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<float>(input()),   //
+                                       getTensorShape(filter()), getTensorData<float>(filter()), //
+                                       getTensorShape(bias()), getTensorData<float>(bias()),     //
+                                       getTensorShape(output()), getTensorData<float>(output()), //
+                                       tflite::RuntimeShape(), nullptr);
 }
 
 void TransposeConv::evalQuantized() const
 {
-  int32_t input_offset = -input()->zero_point();
-  int32_t filter_offset = -filter()->zero_point();
-  int32_t output_offset = filter()->zero_point();
-  const int width = output()->shape().dim(2);
-  const int height = output()->shape().dim(1);
-
-  const int filter_width = filter()->shape().dim(2);
-  const int filter_height = filter()->shape().dim(1);
-
-  int unused_output_height, unused_output_width;
-  unused_output_width =
-      computeOutputSize(params().padding, width, filter_width, params().stride_width, 1);
-  unused_output_height =
-      computeOutputSize(params().padding, height, filter_height, params().stride_height, 1);
-  int32_t offset = 0;
   tflite::ConvParams op_params{};
   op_params.padding_type = tflite::PaddingType::kSame;
-  op_params.padding_values.height = computePaddingWithOffset(
-      params().stride_height, 1, height, filter_height, unused_output_height, &offset);
-  op_params.padding_values.width = computePaddingWithOffset(
-      params().stride_width, 1, width, filter_width, unused_output_width, &offset);
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
   op_params.stride_height = params().stride_height;
   op_params.stride_width = params().stride_width;
-  op_params.input_offset = input_offset;
-  op_params.output_offset = output_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_multiplier = _output_multiplier;
-  op_params.output_shift = -_output_shift;
+  // The kernel expects input and filter zero points to be negated.
+  op_params.input_offset = -input()->zero_point();    // Note the '-'.
+  op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_multiplier = _quant_multipliers[0].multiplier;
+  op_params.output_shift = _quant_multipliers[0].shift;
   op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
   op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
 
-  tflite::reference_ops::TransposeConv(
-      op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()),
-      getTensorData<uint8>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-      getTensorShape(output()), getTensorData<uint8>(output()), tflite::RuntimeShape(),
-      (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
+  auto scratch_tensor = getOutputTensors()[1];
+
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<uint8>(input()),   //
+                                       getTensorShape(filter()), getTensorData<uint8>(filter()), //
+                                       getTensorShape(bias()), getTensorData<int32_t>(bias()),   //
+                                       getTensorShape(output()), getTensorData<uint8>(output()), //
+                                       tflite::RuntimeShape(), nullptr,                          //
+                                       getTensorData<int32_t>(scratch_tensor));
+}
+
+void TransposeConv::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const uint8_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int32_t>(input_val - input()->zero_point()) *
+                    static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc += output()->zero_point();
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void TransposeConv::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h
index 3a0eae761..cea0cf3c7 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.h
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h
@@ -25,11 +25,16 @@ namespace luci_interpreter
 namespace kernels
 {
 
+class ChannelQuantMultipliers;
+
 class TransposeConv : public KernelWithParams<TransposeConvParams>
 {
 public:
   TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                const Tensor *bias, Tensor *output, const TransposeConvParams &params);
+                const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                const TransposeConvParams &params);
+
+  ~TransposeConv();
 
   const Tensor *output_shape() const { return _inputs[0]; }
   const Tensor *filter() const { return _inputs[1]; }
@@ -43,14 +48,15 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS16() const;
 
 private:
-  std::unique_ptr<Tensor> _scratch_tensor;
-
+  int32_t _padding_height{};
+  int32_t _padding_width{};
   // The scaling factor from input to output (aka the 'real multiplier') can
   // be represented as a fixed point multiplier plus a left shift.
-  int32_t _output_multiplier = 0;
-  int _output_shift = 0;
+  std::vector<ChannelQuantMultipliers> _quant_multipliers;
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
index 0fbe9328b..8e9cfc6ad 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -16,6 +16,7 @@
 
 #include "kernels/TransposeConv.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -28,41 +29,51 @@ using namespace testing;
 
 template <typename T, typename B>
 void Check(std::initializer_list<int32_t> output_shape_shape,
-           std::initializer_list<int32_t> weight_shape,
-           std::initializer_list<int32_t> input_data_shape,
+           std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_shape,
            std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
-           std::initializer_list<T> input_data_data, std::initializer_list<B> bias_data,
+           std::initializer_list<T> input_data, std::initializer_list<B> bias_data,
            std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
-           int32_t stride_width, DataType element_type)
+           int32_t stride_width)
 {
-  Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""};
-  output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T));
-  Tensor weight_tensor{element_type, weight_shape, {}, ""};
-  weight_tensor.writeData(weight_data.begin(), weight_data.size() * sizeof(T));
-  Tensor input_data_tensor{element_type, input_data_shape, {}, ""};
-  input_data_tensor.writeData(input_data_data.begin(), input_data_data.size() * sizeof(T));
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
 
+  constexpr DataType element_type = getElementType<T>();
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get());
+  Tensor weight_tensor =
+    makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get());
+  Tensor input_data_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+  DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
   Tensor output_tensor = makeOutputTensor(element_type);
 
   TransposeConvParams params{};
   params.padding = padding;
   params.stride_height = stride_height;
   params.stride_width = stride_width;
+  params.activation = luci::FusedActFunc::NONE;
 
   if (bias_data.size() != 0)
   {
-    Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data);
+    Tensor bias_tensor =
+      makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get());
     TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
-                         &output_tensor, params);
+                         &output_tensor, &scratch_tensor, params);
     kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
     kernel.execute();
   }
   else
   {
     TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
-                         &output_tensor, params);
+                         &output_tensor, &scratch_tensor, params);
     kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
     kernel.execute();
   }
   EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -71,14 +82,13 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
 TEST(TransposeConvTest, FloatSimple)
 {
   Check<float, float>(
-      /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
-      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
-      /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
-      /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      /*bias_data=*/{},
-      /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
-      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
-      getElementType<float>());
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*bias_data=*/{},
+    /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
 
   SUCCEED();
 }
@@ -86,16 +96,15 @@ TEST(TransposeConvTest, FloatSimple)
 TEST(TransposeConvTest, FloatTwoFiltersTest)
 {
   Check<float, float>(
-      /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
-      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
-      /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
-      /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
-                      17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
-      /*bias_data=*/{},
-      /*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968,
-                       3352, 3652, 2760},
-      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
-      getElementType<float>());
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+    /*bias_data=*/{},
+    /*output_data=*/
+    {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
 
   SUCCEED();
 }
@@ -103,28 +112,246 @@ TEST(TransposeConvTest, FloatTwoFiltersTest)
 TEST(TransposeConvTest, SimpleBiasTest)
 {
   Check<float, float>(
-      /*outputShape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
-      /*input_shape=*/{1, 2, 2, 1},
-      /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 5, 5, 2},
-      /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
-      /*input_data=*/{1, 2, 3, 4},
-      /*bias_data=*/{3, 4},
-      /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
-                       24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
-                       64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
-      /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2,
-      getElementType<float>());
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+    /*input_shape=*/{1, 2, 2, 1},
+    /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
+    /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+    /*input_data=*/{1, 2, 3, 4},
+    /*bias_data=*/{3, 4},
+    /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
+                     24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
+                     64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+    /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
 
   SUCCEED();
 }
 
-// TODO Uint8Simple
-// Implement GetDequantizedOutput Function.
-// Create Test for Uint8 Case
+TEST(TransposeConvTest, UInt8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first,
+                                                      0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = luci::FusedActFunc::NONE;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, UInt8_CWQ)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  const int32_t output_channels = 2;
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = luci::FusedActFunc::NONE;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
 
-// TODO Uint8FiltersTest
-// Implement GetDequantizedOutput Function.
-// Create Test for Uint8 Case
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = luci::FusedActFunc::NONE;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16_CWQ_weights)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  const int output_channels = 2;
+  const Shape input_shape{1, 2, 2, 1};
+  const Shape filter_shape{output_channels, 3, 3, 1};
+  const Shape bias_shape{output_channels};
+  std::vector<int32_t> output_shape_data{1, 5, 5, output_channels};
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  const float input_scale = 0.25;
+  const float output_scale = 0.5;
+  const std::vector<float> filter_scales{0.2f, 0.5f};
+  std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale};
+  const std::vector<int32_t> zerop(2, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = luci::FusedActFunc::NONE;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
 
 } // namespace
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..f049beec4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,892 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/tensor_utils.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace lstm
+{
+namespace
+{
+
+using namespace tflite;
+
+void UpdateLstmCellFloat(int n_batch, int n_cell, float *cell_state, const float *input_gate,
+                         float *forget_gate, const float *cell_gate, bool use_cifg, float clip)
+{
+// NOTE tflite source is as is but will fail build with gcc-8 and above
+// TODO remove #pragma
+#pragma GCC diagnostic ignored "-Wrestrict"
+  tensor_utils::VectorVectorCwiseProduct(forget_gate, cell_state, n_batch * n_cell, cell_state);
+
+  if (use_cifg)
+  {
+    // With CIFG, input_gate = 1-forget_gate. Use the forget_gate array as
+    // scratch, as input_gate array is not allocated in this case. (Be careful
+    // not to write to the scratch before reading the forget gate data.)
+    float *scratch = forget_gate;
+    tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch);
+    tensor_utils::VectorVectorCwiseProductAccumulate(cell_gate, scratch, n_batch * n_cell,
+                                                     cell_state);
+  }
+  else
+  {
+    tensor_utils::VectorVectorCwiseProductAccumulate(cell_gate, input_gate, n_batch * n_cell,
+                                                     cell_state);
+  }
+  if (clip > 0.0f)
+  {
+    tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip);
+  }
+}
+
+void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, const float *cell_state,
+                              const float *output_gate, TfLiteFusedActivation activation,
+                              const float *projection_weights, const float *projection_bias,
+                              const float proj_clip, float *output_state, float *scratch)
+{
+  tensor_utils::ApplyActivationToVector(cell_state, n_batch * n_cell, activation, scratch);
+  tensor_utils::VectorVectorCwiseProduct(output_gate, scratch, n_batch * n_cell, scratch);
+
+  const bool use_projection = (projection_weights != nullptr);
+  const bool use_projection_bias = (projection_bias != nullptr);
+
+  if (use_projection)
+  {
+    if (use_projection_bias)
+    {
+      tensor_utils::VectorBatchVectorAssign(projection_bias, n_output, n_batch, output_state);
+    }
+    else
+    {
+      std::fill_n(output_state, n_batch * n_output, 0.0f);
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(projection_weights, n_output, n_cell, scratch,
+                                                      n_batch, output_state);
+    if (proj_clip > 0.0f)
+    {
+      tensor_utils::CwiseClipping(output_state, n_batch * n_output, proj_clip);
+    }
+  }
+  else
+  {
+    std::copy_n(scratch, n_batch * n_output, output_state);
+  }
+}
+
+inline void CalculateLstmGateFloat(const float *input, const float *input_to_gate_weights,
+                                   const float *aux_input, const float *aux_input_to_gate_weights,
+                                   const float *output_state,
+                                   const float *recurrent_to_gate_weights, const float *cell_state,
+                                   const float *cell_to_gate_weights,
+                                   const float *layer_norm_coefficients, const float *gate_bias,
+                                   const int n_batch, const int n_input, const int n_aux_input,
+                                   const int n_output, const int n_cell,
+                                   const TfLiteFusedActivation activation, float *gate,
+                                   const bool is_input_all_zeros, const bool is_aux_input_all_zeros)
+{
+  const bool use_peephole = (cell_to_gate_weights != nullptr);
+  const bool use_layer_norm = (layer_norm_coefficients != nullptr);
+
+  // Initialize scratch buffers with bias for regular lstm or initialize with
+  // zero for layer norm lstm.
+  if (use_layer_norm)
+  {
+    std::fill_n(gate, n_cell * n_batch, 0.0f);
+  }
+  else
+  {
+    tensor_utils::VectorBatchVectorAssign(gate_bias, n_cell, n_batch, gate);
+  }
+  // For each batch and cell: compute input_weight * input.
+  // Skip if input is all zeros.
+  if (!is_input_all_zeros)
+  {
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(input_to_gate_weights, n_cell, n_input, input,
+                                                      n_batch, gate);
+  }
+  // For each batch and cell: compute aux_input_weight * aux_input.
+  // Skip if auxiliary input is not available or all zeros.
+  if (!is_aux_input_all_zeros)
+  {
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(aux_input_to_gate_weights, n_cell,
+                                                      n_aux_input, aux_input, n_batch, gate);
+  }
+  // For each batch and cell: compute recurrent_weight * output_state.
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(recurrent_to_gate_weights, n_cell, n_output,
+                                                    output_state, n_batch, gate);
+  // For each batch and cell: compute cell_weight .* cell_state (peephole LSTM)
+  if (use_peephole)
+  {
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(cell_to_gate_weights, n_cell, cell_state,
+                                                          n_batch, gate);
+  }
+  // Do layer normalization (if layer norm LSTM)
+  if (use_layer_norm)
+  {
+    tensor_utils::MeanStddevNormalization(gate, gate, n_cell, n_batch);
+    tensor_utils::VectorBatchVectorCwiseProduct(layer_norm_coefficients, n_cell, gate, n_batch,
+                                                gate);
+    tensor_utils::VectorBatchVectorAdd(gate_bias, n_cell, n_batch, gate);
+  }
+  // Apply activation
+  tensor_utils::ApplyActivationToVector(gate, n_batch * n_cell, activation, gate);
+}
+
+inline void LstmStepFloat(
+  const float *input_ptr, const float *input_to_input_weights_ptr,
+  const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr,
+  const float *input_to_output_weights_ptr, const float *aux_input_ptr,
+  const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr,
+  const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr,
+  const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr,
+  const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr,
+  const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr,
+  const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr,
+  const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr,
+  const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr,
+  const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr,
+  const float *output_gate_bias_ptr, const float *projection_weights_ptr,
+  const float *projection_bias_ptr, const TfLiteLSTMParams *params, int n_batch, int n_cell,
+  int n_input, int n_aux_input, int n_output, int output_batch_leading_dim, float *output_state_ptr,
+  float *cell_state_ptr, float *scratch0, float *scratch1, float *scratch2, float *scratch3,
+  float *output_ptr)
+{
+  // Since we have already checked that weights are all there or none, we can
+  // check the existence of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+
+  // Make named scratch buffers.
+  float *input_gate_scratch = scratch0;
+  float *forget_gate_scratch = scratch1;
+  float *cell_gate_scratch = scratch2;
+  float *output_gate_scratch = scratch3;
+
+  // Check if inputs are all zeros so we can skip some computations.
+  const bool is_input_all_zeros = tensor_utils::IsZeroVector(input_ptr, n_batch * n_input);
+  const bool is_aux_input_all_zeros =
+    (aux_input_ptr == nullptr || tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input));
+  if (!use_cifg)
+  {
+    // Calculate the input gate. (If not CIFG.)
+    CalculateLstmGateFloat(input_ptr, input_to_input_weights_ptr, aux_input_ptr,
+                           aux_input_to_input_weights_ptr, output_state_ptr,
+                           recurrent_to_input_weights_ptr, cell_state_ptr,
+                           cell_to_input_weights_ptr, input_layer_norm_coefficients_ptr,
+                           input_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                           /*activation=*/kTfLiteActSigmoid, input_gate_scratch, is_input_all_zeros,
+                           is_aux_input_all_zeros);
+  }
+  // Calculate the forget gate.
+  CalculateLstmGateFloat(input_ptr, input_to_forget_weights_ptr, aux_input_ptr,
+                         aux_input_to_forget_weights_ptr, output_state_ptr,
+                         recurrent_to_forget_weights_ptr, cell_state_ptr,
+                         cell_to_forget_weights_ptr, forget_layer_norm_coefficients_ptr,
+                         forget_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                         /*activation=*/kTfLiteActSigmoid, forget_gate_scratch, is_input_all_zeros,
+                         is_aux_input_all_zeros);
+  // Calculate the cell update gate.
+  CalculateLstmGateFloat(
+    input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr,
+    output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr,
+    /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr, n_batch,
+    n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch,
+    is_input_all_zeros, is_aux_input_all_zeros);
+  // Update the cell state.
+  UpdateLstmCellFloat(n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch,
+                      cell_gate_scratch, use_cifg, params->cell_clip);
+  // Calculate output gate.
+  CalculateLstmGateFloat(input_ptr, input_to_output_weights_ptr, aux_input_ptr,
+                         aux_input_to_output_weights_ptr, output_state_ptr,
+                         recurrent_to_output_weights_ptr, cell_state_ptr,
+                         cell_to_output_weights_ptr, output_layer_norm_coefficients_ptr,
+                         output_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                         /*activation=*/kTfLiteActSigmoid, output_gate_scratch, is_input_all_zeros,
+                         is_aux_input_all_zeros);
+  // Update the output state.
+  CalculateLstmOutputFloat(n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch,
+                           params->activation, projection_weights_ptr, projection_bias_ptr,
+                           params->proj_clip, output_state_ptr, scratch2);
+  // Copy output state to the output. Note that the output's rows may not be
+  // contiguous (output_batch_leading_dim != n_output).
+  for (int b = 0; b < n_batch; b++)
+  {
+    std::copy_n(output_state_ptr + b * n_output, n_output,
+                output_ptr + b * output_batch_leading_dim);
+  }
+}
+
+} // namespace
+
+void EvalFloat(const Tensor *input,
+
+               const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+               const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+               const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+               const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+               const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+               const Tensor *cell_to_output_weights,
+
+               const Tensor *input_layer_norm_coefficients,
+               const Tensor *forget_layer_norm_coefficients,
+               const Tensor *cell_layer_norm_coefficients,
+               const Tensor *output_layer_norm_coefficients,
+
+               const Tensor *aux_input, const Tensor *aux_input_to_input_weights,
+               const Tensor *aux_input_to_forget_weights, const Tensor *aux_input_to_cell_weights,
+               const Tensor *aux_input_to_output_weights,
+
+               const Tensor *input_gate_bias, const Tensor *forget_gate_bias,
+               const Tensor *cell_gate_bias, const Tensor *output_gate_bias,
+
+               const Tensor *projection_weights, const Tensor *projection_bias,
+               const TfLiteLSTMParams *params,
+
+               bool forward_sequence, bool time_major, int output_offset,
+
+               Tensor *scratch_buffer, Tensor *output_state, Tensor *cell_state, Tensor *output)
+{
+  const Shape &input_shape = input->shape();
+  assert(input_shape.num_dims() >= 2 && input_shape.num_dims() <= 3);
+  int max_time, n_batch;
+  if (input_shape.num_dims() == 3)
+  {
+    max_time = (time_major) ? input_shape.dim(0) : input_shape.dim(1);
+    n_batch = (time_major) ? input_shape.dim(1) : input_shape.dim(0);
+  }
+  else
+  {
+    max_time = 1;
+    n_batch = input_shape.dim(0);
+  }
+  const int n_input = input_shape.dim(input_shape.num_dims() - 1);
+
+  int aux_input_temp = 0;
+  if (aux_input)
+  {
+    const Shape &aux_input_shape = aux_input->shape();
+    aux_input_temp = aux_input_shape.dim(aux_input_shape.num_dims() - 1);
+  }
+  const int aux_input_size = aux_input_temp;
+
+  // n_cell and n_output will be the same size when there is no projection.
+  const Shape &input_to_output_weights_shape = input_to_output_weights->shape();
+  const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights->shape();
+  const int n_cell = input_to_output_weights_shape.dim(0);
+  const int n_output = recurrent_to_output_weights_shape.dim(1);
+
+  // Since we have already checked that weights are all there or none, we can
+  // check the existence of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights == nullptr);
+
+  // Index the scratch buffers pointers to the global scratch buffer.
+  float *scratch_buffer_ptr = getTensorData<float>(scratch_buffer);
+  float *input_gate_scratch = nullptr;
+  float *cell_gate_scratch = nullptr;
+  float *forget_gate_scratch = nullptr;
+  float *output_gate_scratch = nullptr;
+  if (use_cifg)
+  {
+    cell_gate_scratch = scratch_buffer_ptr;
+    forget_gate_scratch = scratch_buffer_ptr + n_cell * n_batch;
+    output_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch;
+  }
+  else
+  {
+    input_gate_scratch = scratch_buffer_ptr;
+    cell_gate_scratch = scratch_buffer_ptr + n_cell * n_batch;
+    forget_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch;
+    output_gate_scratch = scratch_buffer_ptr + 3 * n_cell * n_batch;
+  }
+
+  const Shape &output_shape = output->shape();
+  const int output_batch_leading_dim = output_shape.dim(output_shape.num_dims() - 1);
+  if (time_major)
+  {
+    // Loop through the sequence.
+    const int input_step = n_batch * n_input;
+    const int output_step = n_batch * output_batch_leading_dim;
+    for (int t = 0; t < max_time; t++)
+    {
+      // If this is the forward_sequence, step forward, otherwise step
+      // backwards.
+      const int t_rel = forward_sequence ? t : max_time - t - 1;
+      const float *input_ptr = getTensorData<float>(input) + t_rel * input_step;
+      const float *aux_input_ptr = nullptr;
+      if (aux_input)
+      {
+        aux_input_ptr = getTensorData<float>(aux_input) + t_rel * input_step;
+      }
+      float *output_ptr = getTensorData<float>(output) + t_rel * output_step + output_offset;
+
+      LstmStepFloat(
+        input_ptr, getTensorData<float>(input_to_input_weights),
+        getTensorData<float>(input_to_forget_weights), getTensorData<float>(input_to_cell_weights),
+        getTensorData<float>(input_to_output_weights), aux_input_ptr,
+        getTensorData<float>(aux_input_to_input_weights),
+        getTensorData<float>(aux_input_to_forget_weights),
+        getTensorData<float>(aux_input_to_cell_weights),
+        getTensorData<float>(aux_input_to_output_weights),
+        getTensorData<float>(recurrent_to_input_weights),
+        getTensorData<float>(recurrent_to_forget_weights),
+        getTensorData<float>(recurrent_to_cell_weights),
+        getTensorData<float>(recurrent_to_output_weights),
+        getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights),
+        getTensorData<float>(cell_to_output_weights),
+        getTensorData<float>(input_layer_norm_coefficients),
+        getTensorData<float>(forget_layer_norm_coefficients),
+        getTensorData<float>(cell_layer_norm_coefficients),
+        getTensorData<float>(output_layer_norm_coefficients), getTensorData<float>(input_gate_bias),
+        getTensorData<float>(forget_gate_bias), getTensorData<float>(cell_gate_bias),
+        getTensorData<float>(output_gate_bias), getTensorData<float>(projection_weights),
+        getTensorData<float>(projection_bias), params, n_batch, n_cell, n_input, aux_input_size,
+        n_output, output_batch_leading_dim, getTensorData<float>(output_state),
+        getTensorData<float>(cell_state), input_gate_scratch, forget_gate_scratch,
+        cell_gate_scratch, output_gate_scratch, output_ptr);
+    }
+  }
+  else
+  {
+    for (int b = 0; b < n_batch; b++)
+    {
+      const int input_step = n_input;
+      const int output_step = output_batch_leading_dim;
+      for (int t = 0; t < max_time; t++)
+      {
+        // If this is the forward_sequence, step forward, otherwise step
+        // backwards.
+        const int t_rel = forward_sequence ? t : max_time - t - 1;
+        const int time_offset = b * max_time + t_rel;
+        const float *input_ptr = getTensorData<float>(input) + time_offset * input_step;
+        const float *aux_input_ptr = nullptr;
+        if (aux_input)
+        {
+          aux_input_ptr = getTensorData<float>(aux_input) + time_offset * input_step;
+        }
+        float *output_ptr =
+          getTensorData<float>(output) + time_offset * output_step + output_offset;
+
+        // Offset the {output,cell}_state pointers to the right batch.
+        float *output_state_ptr = getTensorData<float>(output_state) + b * output_batch_leading_dim;
+        float *cell_state_ptr = getTensorData<float>(cell_state) + b * n_cell;
+        // Offset the scratch pointers to the right batch.
+        float *input_gate_scratch_ptr =
+          input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
+        float *forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
+        float *cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
+        float *output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
+
+        LstmStepFloat(
+          input_ptr, getTensorData<float>(input_to_input_weights),
+          getTensorData<float>(input_to_forget_weights),
+          getTensorData<float>(input_to_cell_weights),
+          getTensorData<float>(input_to_output_weights), aux_input_ptr,
+          getTensorData<float>(aux_input_to_input_weights),
+          getTensorData<float>(aux_input_to_forget_weights),
+          getTensorData<float>(aux_input_to_cell_weights),
+          getTensorData<float>(aux_input_to_output_weights),
+          getTensorData<float>(recurrent_to_input_weights),
+          getTensorData<float>(recurrent_to_forget_weights),
+          getTensorData<float>(recurrent_to_cell_weights),
+          getTensorData<float>(recurrent_to_output_weights),
+          getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights),
+          getTensorData<float>(cell_to_output_weights),
+          getTensorData<float>(input_layer_norm_coefficients),
+          getTensorData<float>(forget_layer_norm_coefficients),
+          getTensorData<float>(cell_layer_norm_coefficients),
+          getTensorData<float>(output_layer_norm_coefficients),
+          getTensorData<float>(input_gate_bias), getTensorData<float>(forget_gate_bias),
+          getTensorData<float>(cell_gate_bias), getTensorData<float>(output_gate_bias),
+          getTensorData<float>(projection_weights), getTensorData<float>(projection_bias), params,
+          /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim,
+          output_state_ptr, cell_state_ptr, input_gate_scratch_ptr, forget_gate_scratch_ptr,
+          cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr);
+      }
+    }
+  }
+}
+
+} // namespace lstm
+} // namespace kernels
+} // namespace luci_interpreter
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+UnidirectionalSequenceLSTM::UnidirectionalSequenceLSTM(
+  const Tensor *input,
+
+  const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+  const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+  const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+  const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+  const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+  const Tensor *cell_to_output_weights,
+
+  const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias,
+  const Tensor *output_gate_bias,
+
+  const Tensor *projection_weights, const Tensor *projection_bias,
+
+  const Tensor *output_state, const Tensor *cell_state, const Tensor *input_layer_norm_coefficients,
+  const Tensor *forget_layer_norm_coefficients, const Tensor *cell_layer_norm_coefficients,
+  const Tensor *output_layer_norm_coefficients,
+
+  Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3,
+  const UnidirectionalSequenceLSTMParams &params)
+  : KernelWithParams<UnidirectionalSequenceLSTMParams>(
+      {input,
+       input_to_input_weights,
+       input_to_forget_weights,
+       input_to_cell_weights,
+       input_to_output_weights,
+
+       recurrent_to_input_weights,
+       recurrent_to_forget_weights,
+       recurrent_to_cell_weights,
+       recurrent_to_output_weights,
+
+       cell_to_input_weights,
+       cell_to_forget_weights,
+       cell_to_output_weights,
+
+       input_gate_bias,
+       forget_gate_bias,
+       cell_gate_bias,
+       output_gate_bias,
+
+       projection_weights,
+       projection_bias,
+
+       output_state,
+       cell_state,
+
+       input_layer_norm_coefficients,
+       forget_layer_norm_coefficients,
+       cell_layer_norm_coefficients,
+       output_layer_norm_coefficients},
+      {output, scratchpad_1, scratchpad_2, scratchpad_3}, params)
+{
+  // Do nothing
+}
+
+// Check that input tensor dimensions matches with each other.
+void UnidirectionalSequenceLSTM::check_input_tensor_dimensions(int n_input, int n_output,
+                                                               int n_cell, bool use_layer_norm,
+                                                               bool is_integer)
+{
+  // Making sure clipping parameters have valid values.
+  // == 0 means no clipping
+  //  > 0 means clipping
+  LUCI_INTERPRETER_CHECK(params().cell_clip >= 0);
+  LUCI_INTERPRETER_CHECK(params().proj_clip >= 0);
+
+  if (input_to_input_weights() != nullptr)
+  {
+    const Shape &input_to_input_weights_shape = input_to_input_weights()->shape();
+    LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.num_dims() == 2);
+    LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.dim(1) == n_input);
+  }
+
+  const Shape &input_to_forget_weights_shape = input_to_forget_weights()->shape();
+  LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.dim(0) == n_cell);
+  LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.dim(1) == n_input);
+
+  const Shape &input_to_cell_weights_shape = input_to_cell_weights()->shape();
+  LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.dim(0) == n_cell);
+  LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.dim(1) == n_input);
+
+  if (recurrent_to_input_weights() != nullptr)
+  {
+    const Shape &recurrent_to_input_weights_shape = recurrent_to_input_weights()->shape();
+    LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.num_dims() == 2);
+    LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.dim(1) == n_output);
+  }
+
+  const Shape &recurrent_to_forget_weights_shape = recurrent_to_forget_weights()->shape();
+  LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.dim(0) == n_cell);
+  LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.dim(1) == n_output);
+
+  const Shape &recurrent_to_cell_weights_shape = recurrent_to_cell_weights()->shape();
+  LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.dim(0) == n_cell);
+  LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.dim(1) == n_output);
+
+  // We make sure the input-gate's parameters are either both present (regular
+  // LSTM) or not at all (CIFG-LSTM).
+  const bool cifg_weights_all_or_none =
+    ((input_to_input_weights() != nullptr) && (recurrent_to_input_weights() != nullptr)) ||
+    ((input_to_input_weights() == nullptr) && (recurrent_to_input_weights() == nullptr));
+  LUCI_INTERPRETER_CHECK(cifg_weights_all_or_none == true);
+
+  if (cell_to_input_weights() != nullptr)
+  {
+    const Shape &cell_to_input_weights_shape = cell_to_input_weights()->shape();
+    LUCI_INTERPRETER_CHECK(cell_to_input_weights_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(cell_to_input_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(is_integer
+                             ? cell_to_input_weights()->element_type() == loco::DataType::S16
+                             : cell_to_input_weights()->element_type() ==
+                                 input_to_forget_weights()->element_type());
+  }
+
+  if (cell_to_forget_weights() != nullptr)
+  {
+    const Shape &cell_to_forget_weights_shape = cell_to_forget_weights()->shape();
+    LUCI_INTERPRETER_CHECK(cell_to_forget_weights_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(cell_to_forget_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(is_integer
+                             ? cell_to_forget_weights()->element_type() == loco::DataType::S16
+                             : cell_to_forget_weights()->element_type() ==
+                                 input_to_forget_weights()->element_type());
+  }
+
+  if (cell_to_output_weights() != nullptr)
+  {
+    const Shape &cell_to_output_weights_shape = cell_to_output_weights()->shape();
+    LUCI_INTERPRETER_CHECK(cell_to_output_weights_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(cell_to_output_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(is_integer
+                             ? cell_to_output_weights()->element_type() == loco::DataType::S16
+                             : cell_to_output_weights()->element_type() ==
+                                 input_to_forget_weights()->element_type());
+  }
+
+  // Making sure the peephole weights are there all or none.
+  const bool use_cifg = (input_to_input_weights() == nullptr);
+  const bool peephole_weights_all_or_none =
+    ((cell_to_input_weights() != nullptr || use_cifg) && (cell_to_forget_weights() != nullptr) &&
+     (cell_to_output_weights() != nullptr)) ||
+    ((cell_to_input_weights() == nullptr) && (cell_to_forget_weights() == nullptr) &&
+     (cell_to_output_weights() == nullptr));
+  LUCI_INTERPRETER_CHECK(peephole_weights_all_or_none == true);
+
+  // Make sure the input gate bias is present only when not a CIFG-LSTM.
+  if (use_cifg)
+  {
+    LUCI_INTERPRETER_CHECK(input_gate_bias() == nullptr);
+  }
+  else
+  {
+    const Shape &input_gate_bias_shape = input_gate_bias()->shape();
+    LUCI_INTERPRETER_CHECK(input_gate_bias_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(input_gate_bias_shape.dim(0) == n_cell);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(input_gate_bias()->element_type() == loco::DataType::S32);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(input_gate_bias()->element_type() == loco::DataType::FLOAT32);
+    }
+  }
+
+  const Shape &forget_gate_bias_shape = forget_gate_bias()->shape();
+  LUCI_INTERPRETER_CHECK(forget_gate_bias_shape.num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(forget_gate_bias_shape.dim(0) == n_cell);
+  if (is_integer)
+  {
+    LUCI_INTERPRETER_CHECK(forget_gate_bias()->element_type() == loco::DataType::S32);
+  }
+  else
+  {
+    LUCI_INTERPRETER_CHECK(forget_gate_bias()->element_type() == loco::DataType::FLOAT32);
+  }
+
+  const Shape &cell_gate_bias_shape = cell_gate_bias()->shape();
+  LUCI_INTERPRETER_CHECK(cell_gate_bias_shape.num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(cell_gate_bias_shape.dim(0) == n_cell);
+  if (is_integer)
+  {
+    LUCI_INTERPRETER_CHECK(cell_gate_bias()->element_type() == loco::DataType::S32);
+  }
+  else
+  {
+    LUCI_INTERPRETER_CHECK(cell_gate_bias()->element_type() == loco::DataType::FLOAT32);
+  }
+
+  const Shape &output_gate_bias_shape = output_gate_bias()->shape();
+  LUCI_INTERPRETER_CHECK(output_gate_bias_shape.num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(output_gate_bias_shape.dim(0) == n_cell);
+  if (is_integer)
+  {
+    LUCI_INTERPRETER_CHECK(output_gate_bias()->element_type() == loco::DataType::S32);
+  }
+  else
+  {
+    LUCI_INTERPRETER_CHECK(output_gate_bias()->element_type() == loco::DataType::FLOAT32);
+  }
+
+  if (projection_weights() != nullptr)
+  {
+    const Shape &projection_weights_shape = projection_weights()->shape();
+    LUCI_INTERPRETER_CHECK(projection_weights_shape.num_dims() == 2);
+    LUCI_INTERPRETER_CHECK(projection_weights_shape.dim(0) == n_output);
+    LUCI_INTERPRETER_CHECK(projection_weights_shape.dim(1) == n_cell);
+  }
+
+  if (projection_bias() != nullptr)
+  {
+    const Shape &projection_bias_shape = projection_bias()->shape();
+    LUCI_INTERPRETER_CHECK(projection_bias_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(projection_bias_shape.dim(0) == n_output);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(projection_bias()->element_type() == loco::DataType::S32);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(projection_bias()->element_type() == loco::DataType::FLOAT32);
+    }
+  }
+
+  // Making sure the projection tensors are consistent:
+  // 1) If projection weight is not present, then projection bias should not be
+  // present.
+  // 2) If projection weight is present, then projection bias is optional.
+  // TODO(ghodrat): make sure this is correct.
+  const bool projecton_tensors_consistent =
+    ((projection_weights() != nullptr) || (projection_bias() == nullptr));
+  LUCI_INTERPRETER_CHECK(projecton_tensors_consistent == true);
+
+  if (use_layer_norm)
+  {
+    if (use_cifg)
+    {
+      LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients() == nullptr);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients() != nullptr)
+
+      const Shape &input_layer_norm_coefficients_shape = input_layer_norm_coefficients()->shape();
+      LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients_shape.num_dims() == 1);
+      LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients_shape.dim(0) == n_cell);
+      if (is_integer)
+      {
+        LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients()->element_type() ==
+                               loco::DataType::S16);
+      }
+      else
+      {
+        LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients()->element_type() ==
+                               loco::DataType::FLOAT32);
+      }
+    }
+
+    const Shape &forget_layer_norm_coefficients_shape = forget_layer_norm_coefficients()->shape();
+    LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients_shape.dim(0) == n_cell);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::S16);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::FLOAT32);
+    }
+
+    const Shape &cell_layer_norm_coefficients_shape = cell_layer_norm_coefficients()->shape();
+    LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients_shape.dim(0) == n_cell);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients()->element_type() == loco::DataType::S16);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::FLOAT32);
+    }
+
+    const Shape &output_layer_norm_coefficients_shape = output_layer_norm_coefficients()->shape();
+    LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients_shape.dim(0) == n_cell);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::S16);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::FLOAT32);
+    }
+  }
+}
+
+void UnidirectionalSequenceLSTM::configure()
+{
+  LUCI_INTERPRETER_CHECK(getInputTensors().size() == 24);
+  LUCI_INTERPRETER_CHECK(getOutputTensors().size() >= 1);
+
+  // TODO support U8
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32);
+  const bool is_integer = false;
+  const bool use_layer_norm = (forget_layer_norm_coefficients() != nullptr);
+
+  // Inferring batch size, number of outputs and sequence length and
+  // number of cells from the input tensors.
+  const Shape &input_shape = input()->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() > 1);
+  const bool time_major = params().time_major;
+  const int n_batch = time_major ? input_shape.dim(1) : input_shape.dim(0);
+  // NOTE as dim(2) is accessed, we need to check this is valid
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() > 2);
+  const int n_input = input_shape.dim(2);
+
+  const Shape &input_to_output_weights_shape = input_to_output_weights()->shape();
+  const int n_cell = input_to_output_weights_shape.dim(0);
+  LUCI_INTERPRETER_CHECK(input_to_output_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_to_output_weights_shape.dim(1) == n_input);
+
+  const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights()->shape();
+  LUCI_INTERPRETER_CHECK(recurrent_to_output_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(recurrent_to_output_weights_shape.dim(0) == n_cell);
+
+  const int n_output = recurrent_to_output_weights_shape.dim(1);
+
+  // Check that input tensor dimensions matches with each other.
+  check_input_tensor_dimensions(n_input, n_output, n_cell, use_layer_norm, is_integer);
+
+  // Check the shape of input state tensors.
+  // These tensor may be 1D or 2D. It's fine as long as the total size is
+  // correct.
+  const Shape &output_state_shape = output_state()->shape();
+  const Shape &cell_state_shape = cell_state()->shape();
+  LUCI_INTERPRETER_CHECK(output_state_shape.num_elements() == n_batch * n_output);
+  LUCI_INTERPRETER_CHECK(cell_state_shape.num_elements() == n_batch * n_cell);
+
+  // Resize the output tensors.
+  Shape output_shape = Shape(input_shape.num_dims());
+  for (int i = 0; i < input_shape.num_dims() - 1; i++)
+  {
+    output_shape.dim(i) = input_shape.dim(i);
+  }
+  output_shape.dim(input_shape.num_dims() - 1) = n_output;
+  output()->resize(output_shape);
+
+  // TODO import integer
+
+  // output_state and cell_state are variable tensor; use scratchpad.
+  getOutputTensors()[1]->resize(output_state_shape);
+  getOutputTensors()[2]->resize(cell_state_shape);
+
+  const bool use_cifg = (input_to_input_weights() == nullptr);
+  if (use_cifg)
+    getOutputTensors()[3]->resize({n_batch, n_cell * 3});
+  else
+    getOutputTensors()[3]->resize({n_batch, n_cell * 4});
+
+  // hybrid not supported
+  if (input_to_output_weights()->element_type() == loco::DataType::U8 &&
+      input()->element_type() == loco::DataType::FLOAT32)
+  {
+    throw std::runtime_error("Hybrid type is not currently supported");
+  }
+  // TODO support hybrid
+  // TODO support U8
+}
+
+void UnidirectionalSequenceLSTM::execute() const
+{
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+}
+
+void UnidirectionalSequenceLSTM::evalFloat() const
+{
+  const bool time_major = params().time_major;
+  const bool use_layer_norm = (forget_layer_norm_coefficients() != nullptr);
+
+  const Tensor *t_input_layer_norm_coefficients =
+    use_layer_norm ? input_layer_norm_coefficients() : nullptr;
+  const Tensor *t_forget_layer_norm_coefficients =
+    use_layer_norm ? forget_layer_norm_coefficients() : nullptr;
+  const Tensor *t_cell_layer_norm_coefficients =
+    use_layer_norm ? cell_layer_norm_coefficients() : nullptr;
+  const Tensor *t_output_layer_norm_coefficients =
+    use_layer_norm ? output_layer_norm_coefficients() : nullptr;
+
+  Tensor *sp_output_state = getOutputTensors()[1];
+  Tensor *sp_cell_state = getOutputTensors()[2];
+  Tensor *sp_scratch_buffer = getOutputTensors()[3];
+
+  // Note: it is expected that output_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<float>(sp_output_state);
+  std::fill_n(scratchpad_data, sp_output_state->shape().num_elements(), 0);
+  scratchpad_data = getTensorData<float>(sp_cell_state);
+  std::fill_n(scratchpad_data, sp_cell_state->shape().num_elements(), 0);
+  scratchpad_data = getTensorData<float>(sp_scratch_buffer);
+  std::fill_n(scratchpad_data, sp_scratch_buffer->shape().num_elements(), 0);
+
+  TfLiteLSTMParams lstm_params{};
+  lstm_params.activation = getTfLiteActivation(params().activation);
+  lstm_params.cell_clip = params().cell_clip;
+  lstm_params.proj_clip = params().proj_clip;
+  lstm_params.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+
+  lstm::EvalFloat(input(), input_to_input_weights(), input_to_forget_weights(),
+                  input_to_cell_weights(), input_to_output_weights(),
+
+                  recurrent_to_input_weights(), recurrent_to_forget_weights(),
+                  recurrent_to_cell_weights(), recurrent_to_output_weights(),
+
+                  cell_to_input_weights(), cell_to_forget_weights(), cell_to_output_weights(),
+
+                  t_input_layer_norm_coefficients, t_forget_layer_norm_coefficients,
+                  t_cell_layer_norm_coefficients, t_output_layer_norm_coefficients,
+                  /*aux_input=*/nullptr,
+                  /*aux_input_to_input_weights=*/nullptr,
+                  /*aux_input_to_forget_weights=*/nullptr,
+                  /*aux_input_to_cell_weights=*/nullptr,
+                  /*aux_input_to_output_weights=*/nullptr, input_gate_bias(), forget_gate_bias(),
+                  cell_gate_bias(), output_gate_bias(),
+
+                  projection_weights(), projection_bias(), &lstm_params,
+                  /*forward_sequence=*/true, time_major,
+                  /*output_offset=*/0, sp_scratch_buffer, sp_output_state, sp_cell_state, output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..b8125111b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
+#define LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class UnidirectionalSequenceLSTM : public KernelWithParams<UnidirectionalSequenceLSTMParams>
+{
+public:
+  UnidirectionalSequenceLSTM(
+    const Tensor *input,
+
+    const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+    const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+    const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+    const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+    const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+    const Tensor *cell_to_output_weights,
+
+    const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias,
+    const Tensor *output_gate_bias,
+
+    const Tensor *projection_weights, const Tensor *projection_bias,
+
+    const Tensor *output_state, const Tensor *cell_state,
+
+    const Tensor *input_layer_norm_coefficients, const Tensor *forget_layer_norm_coefficients,
+    const Tensor *cell_layer_norm_coefficients, const Tensor *output_layer_norm_coefficients,
+
+    Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3,
+    const UnidirectionalSequenceLSTMParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+
+  const Tensor *input_to_input_weights() const { return _inputs[1]; }
+  const Tensor *input_to_forget_weights() const { return _inputs[2]; }
+  const Tensor *input_to_cell_weights() const { return _inputs[3]; }
+  const Tensor *input_to_output_weights() const { return _inputs[4]; }
+
+  const Tensor *recurrent_to_input_weights() const { return _inputs[5]; }
+  const Tensor *recurrent_to_forget_weights() const { return _inputs[6]; }
+  const Tensor *recurrent_to_cell_weights() const { return _inputs[7]; }
+  const Tensor *recurrent_to_output_weights() const { return _inputs[8]; }
+
+  const Tensor *cell_to_input_weights() const { return _inputs[9]; }
+  const Tensor *cell_to_forget_weights() const { return _inputs[10]; }
+  const Tensor *cell_to_output_weights() const { return _inputs[11]; }
+
+  const Tensor *input_gate_bias() const { return _inputs[12]; }
+  const Tensor *forget_gate_bias() const { return _inputs[13]; }
+  const Tensor *cell_gate_bias() const { return _inputs[14]; }
+  const Tensor *output_gate_bias() const { return _inputs[15]; }
+
+  const Tensor *projection_weights() const { return _inputs[16]; }
+  const Tensor *projection_bias() const { return _inputs[17]; }
+
+  const Tensor *output_state() const { return _inputs[18]; }
+  const Tensor *cell_state() const { return _inputs[19]; }
+
+  const Tensor *input_layer_norm_coefficients() const { return _inputs[20]; }
+  const Tensor *forget_layer_norm_coefficients() const { return _inputs[21]; }
+  const Tensor *cell_layer_norm_coefficients() const { return _inputs[22]; }
+  const Tensor *output_layer_norm_coefficients() const { return _inputs[23]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+
+private:
+  void check_input_tensor_dimensions(int n_input, int n_output, int n_cell, bool use_layer_norm,
+                                     bool is_integer);
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp
new file mode 100644
index 000000000..df059cfcc
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class UnidirectionalSequenceLSTMTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// NOTE from NoCifgNoPeepholeNoProjectionNoClippingUnidirectionalLstmTest
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+
+  std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284,  0.11810488, 0.2013163,
+                                              -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+  std::vector<float> input_to_forget_weights = {0.09701663,  0.20334584, -0.50592935, -0.31343272,
+                                                -0.40032279, 0.44781327, 0.01387155,  -0.35593212};
+
+  std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+                                                0.44272184,  0.03897077,  -0.1556896, 0.19487578};
+
+  std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+  std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+  std::vector<float> recurrent_to_input_weights = {
+    -0.0063535,  -0.2042388,  0.31454784,  -0.35746509, 0.28902304, 0.08183324,
+    -0.16555229, 0.02286911,  -0.13566875, 0.03034258,  0.48091322, -0.12528998,
+    0.24077177,  -0.51332325, -0.33502164, 0.10629296};
+
+  std::vector<float> recurrent_to_forget_weights = {
+    -0.48684245, -0.06655136, 0.42224967,  0.2112639,   0.27654213, 0.20864892,
+    -0.07646349, 0.45877004,  0.00141793,  -0.14609534, 0.36447752, 0.09196436,
+    0.28053468,  0.01560611,  -0.20127171, -0.01140004};
+
+  std::vector<float> recurrent_to_cell_weights = {
+    -0.3407414,  0.24443203,  -0.2078532,  0.26320225,  0.05695659, -0.00123841,
+    -0.4744786,  -0.35869038, -0.06418842, -0.13502428, -0.501764,  0.22830659,
+    -0.46367589, 0.26016325,  -0.03894562, -0.16368064};
+
+  std::vector<float> recurrent_to_output_weights = {
+    0.43385774,  -0.17194885, 0.2718237,  0.09215671,  0.24107647, -0.39835793,
+    0.18212086,  0.01301402,  0.48572797, -0.50656658, 0.20047462, -0.20607421,
+    -0.51818722, -0.15390486, 0.0468148,  0.39922136};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-0.02973187, 0.1229473,  0.20885126, -0.15358765,
+                                     -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+                                     -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+  std::vector<float> ref_output_shape{sequence_length, n_batch, n_output};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_batch)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+
+  std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284,  0.11810488, 0.2013163,
+                                              -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+  std::vector<float> input_to_forget_weights = {0.09701663,  0.20334584, -0.50592935, -0.31343272,
+                                                -0.40032279, 0.44781327, 0.01387155,  -0.35593212};
+
+  std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+                                                0.44272184,  0.03897077,  -0.1556896, 0.19487578};
+
+  std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+  std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+  std::vector<float> recurrent_to_input_weights = {
+    -0.0063535,  -0.2042388,  0.31454784,  -0.35746509, 0.28902304, 0.08183324,
+    -0.16555229, 0.02286911,  -0.13566875, 0.03034258,  0.48091322, -0.12528998,
+    0.24077177,  -0.51332325, -0.33502164, 0.10629296};
+
+  std::vector<float> recurrent_to_forget_weights = {
+    -0.48684245, -0.06655136, 0.42224967,  0.2112639,   0.27654213, 0.20864892,
+    -0.07646349, 0.45877004,  0.00141793,  -0.14609534, 0.36447752, 0.09196436,
+    0.28053468,  0.01560611,  -0.20127171, -0.01140004};
+
+  std::vector<float> recurrent_to_cell_weights = {
+    -0.3407414,  0.24443203,  -0.2078532,  0.26320225,  0.05695659, -0.00123841,
+    -0.4744786,  -0.35869038, -0.06418842, -0.13502428, -0.501764,  0.22830659,
+    -0.46367589, 0.26016325,  -0.03894562, -0.16368064};
+
+  std::vector<float> recurrent_to_output_weights = {
+    0.43385774,  -0.17194885, 0.2718237,  0.09215671,  0.24107647, -0.39835793,
+    0.18212086,  0.01301402,  0.48572797, -0.50656658, 0.20047462, -0.20607421,
+    -0.51818722, -0.15390486, 0.0468148,  0.39922136};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{n_batch, sequence_length, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = false;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+    &scratchpad_1, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-0.02973187, 0.1229473,  0.20885126, -0.15358765,
+                                     -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+                                     -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+  std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_simple)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 1;
+  const int32_t n_cell = 1;
+  const int32_t n_output = 1;
+  const int32_t sequence_length = 1;
+
+  std::vector<float> input_to_input_weights = {0.329067};
+  std::vector<float> input_to_forget_weights = {0.308059};
+  std::vector<float> input_to_cell_weights = {0.152916};
+  std::vector<float> input_to_output_weights = {-0.476033};
+
+  std::vector<float> input_gate_bias = {0.};
+  std::vector<float> forget_gate_bias = {1.};
+  std::vector<float> cell_gate_bias = {0.};
+  std::vector<float> output_gate_bias = {0.};
+
+  std::vector<float> recurrent_to_input_weights = {0.207806};
+  std::vector<float> recurrent_to_forget_weights = {0.028718};
+  std::vector<float> recurrent_to_cell_weights = {-0.182756};
+  std::vector<float> recurrent_to_output_weights = {-0.960517};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{0.03653763};
+  Shape input_shape{n_batch, sequence_length, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 10.0;
+  params.proj_clip = 0.0;
+  params.time_major = false;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+    &scratchpad_1, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.00139296};
+  std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+  const float tolerance = 1e-5;
+  auto aa = extractTensorData<float>(output_tensor);
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Unsupported_Type_Configure_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<int8_t> input_data{2, 3, 3, 4, 1, 1}; // int8 is not support as of now
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_input}; // this is wrong
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_2_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  // NOTE provide wrong shaped inputs
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-interpreter/src/kernels/Unpack.cpp
index 834b79926..9127241c0 100644
--- a/compiler/luci-interpreter/src/kernels/Unpack.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.cpp
@@ -29,7 +29,7 @@ namespace kernels
 {
 
 Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
-    : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+  : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
 {
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
index f70c5847a..9384ddc83 100644
--- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/Unpack.h"
 #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
 
 namespace luci_interpreter
 {
@@ -32,10 +33,12 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
            const std::vector<std::initializer_list<int32_t>> &exp_output_shape,
            std::vector<std::initializer_list<T>> exp_output_data)
 {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
   constexpr DataType element_type = getElementType<T>();
   const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis);
 
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
   std::vector<Tensor> output_tensors;
   output_tensors.reserve(num_outputs);
   for (int i = 0; i < num_outputs; ++i)
@@ -54,6 +57,10 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
 
   Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params);
   kernel.configure();
+  for (int i = 0; i < num_outputs; i++)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
   kernel.execute();
 
   for (int i = 0; i < num_outputs; ++i)
@@ -68,7 +75,7 @@ template <typename T> class UnpackTest : public ::testing::Test
 };
 
 using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(UnpackTest, DataTypes);
+TYPED_TEST_SUITE(UnpackTest, DataTypes);
 
 TYPED_TEST(UnpackTest, ThreeOutputs)
 {
@@ -121,11 +128,11 @@ TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs)
 TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs)
 {
   Check<TypeParam>(
-      /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
-      /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
-      /*exp_output_data=*/
-      {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
+    /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
+    /*exp_output_data=*/
+    {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
 }
 
 TYPED_TEST(UnpackTest, VectorToScalar)
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp
index b9e7738a9..a04dbcc0f 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -27,17 +27,39 @@ namespace luci_interpreter
 namespace kernels
 {
 
-void calculateActivationRange(Activation activation, float *activation_min, float *activation_max)
+TfLiteFusedActivation getTfLiteActivation(Activation activation)
+{
+  switch (activation)
+  {
+    case luci::FusedActFunc::RELU:
+      return kTfLiteActRelu;
+    case luci::FusedActFunc::RELU6:
+      return kTfLiteActRelu6;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      return kTfLiteActReluN1To1;
+    case luci::FusedActFunc::TANH:
+      return kTfLiteActTanh;
+    case luci::FusedActFunc::SIGN_BIT:
+      return kTfLiteActSignBit;
+    case luci::FusedActFunc::NONE:
+      return kTfLiteActNone;
+    default:
+      throw std::runtime_error("Unsupported activation type");
+  }
+}
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
 {
   switch (activation)
   {
     case Activation::NONE:
-      *activation_min = std::numeric_limits<float>::lowest();
-      *activation_max = std::numeric_limits<float>::max();
+      *activation_min = std::numeric_limits<T>::lowest();
+      *activation_max = std::numeric_limits<T>::max();
       break;
     case Activation::RELU:
       *activation_min = 0;
-      *activation_max = std::numeric_limits<float>::max();
+      *activation_max = std::numeric_limits<T>::max();
       break;
     case Activation::RELU_N1_TO_1:
       *activation_min = -1;
@@ -52,6 +74,13 @@ void calculateActivationRange(Activation activation, float *activation_min, floa
   }
 }
 
+template void calculateActivationRange(Activation activation, float *activation_min,
+                                       float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+                                       int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+                                       int64_t *activation_max);
+
 static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
                                                   const Tensor *output, int32_t *activation_min,
                                                   int32_t *activation_max)
@@ -66,6 +95,7 @@ static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t
   switch (activation)
   {
     case Activation::NONE:
+    case Activation::TANH:
       *activation_min = qmin;
       *activation_max = qmax;
       break;
@@ -89,20 +119,23 @@ static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t
 void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
                                        int32_t *activation_min, int32_t *activation_max)
 {
+  assert(output->zero_points().size() == 1);
   int32_t qmin{};
   int32_t qmax{};
   switch (output->element_type())
   {
     case DataType::U8:
-      qmin = std::numeric_limits<uint8_t>::min();
+      qmin = 0;
       qmax = std::numeric_limits<uint8_t>::max();
       break;
     case DataType::S8:
-      qmin = std::numeric_limits<int8_t>::min();
+      qmin = -std::numeric_limits<int8_t>::max();
       qmax = std::numeric_limits<int8_t>::max();
       break;
     case DataType::S16:
-      qmin = std::numeric_limits<int16_t>::min();
+      // For now, assume that signed int16 type implies signed symmetric quantization.
+      assert(output->zero_point() == 0);
+      qmin = -std::numeric_limits<int16_t>::max();
       qmax = std::numeric_limits<int16_t>::max();
       break;
     default:
@@ -171,7 +204,11 @@ Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_
   {
     const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
     const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
-    assert(input1_dim == input2_dim || input1_dim == 1 || input2_dim == 1);
+
+    bool need_broadcast = input1_dim != input2_dim;
+    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
     output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
   }
 
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h
index 7927151c6..e975585cd 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -21,10 +21,12 @@
 #include "core/KernelParams.h"
 #include "luci_interpreter/core/Tensor.h"
 
+#include <tensorflow/lite/kernels/internal/tensor_utils.h>
 #include <tensorflow/lite/kernels/internal/types.h>
 
 #include <cassert>
 #include <cstdint>
+#include <stdexcept>
 
 namespace luci_interpreter
 {
@@ -70,11 +72,49 @@ inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t fi
   }
 }
 
-void calculateActivationRange(Activation activation, float *activation_min, float *activation_max);
+inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
+{
+  return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
+}
+
+TfLiteFusedActivation getTfLiteActivation(Activation activation);
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
 
 void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
                                        int32_t *activation_min, int32_t *activation_max);
 
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+  return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+  static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+  if (std::is_same<T, float>::value)
+    calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+  if (std::is_same<T, int32_t>::value)
+    calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+  else
+    calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
 // Decompose a double multiplier into a Q0.31 int32 representation of its
 // significand, and shift representation of its exponent.
 //
@@ -94,6 +134,63 @@ void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quan
 
 Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape);
 
+inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
+                                               float output_scale)
+{
+  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+  LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
+  return input_product_scale / static_cast<double>(output_scale);
+}
+
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
+inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
+                                                             const std::vector<float> &filter_scale,
+                                                             float output_scale)
+{
+  std::vector<double> effective_output_scales;
+  size_t n = filter_scale.size();
+  effective_output_scales.reserve(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    effective_output_scales.push_back(
+      getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+  }
+  return effective_output_scales;
+}
+
+struct ChannelQuantMultipliers
+{
+  int shift;
+  int32_t multiplier;
+  ChannelQuantMultipliers() = default;
+};
+
+inline std::vector<ChannelQuantMultipliers>
+quantizeMultipliers(const std::vector<double> &effective_scale)
+{
+  size_t n = effective_scale.size();
+  std::vector<ChannelQuantMultipliers> params(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
+  }
+  return params;
+}
+
+// Helper wrapper to hide broadcast logic
+template <typename T> class BroadcastableWrapper
+{
+public:
+  BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
+
+  T operator[](int idx) { return _v[idx * _stride]; }
+
+private:
+  const std::vector<T> &_v;
+  int _stride;
+};
+
 inline tflite::RuntimeShape getTensorShape(const Tensor *tensor)
 {
   if (tensor == nullptr)
@@ -176,7 +273,7 @@ public:
 
   // Build with the tensors in 'tensor_list'.
   explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
-      : VectorOfTensors<uint8_t, is_const>(tensor_list)
+    : VectorOfTensors<uint8_t, is_const>(tensor_list)
   {
     for (TensorT *tensor : tensor_list)
     {
diff --git a/compiler/luci-interpreter/src/kernels/While.cpp b/compiler/luci-interpreter/src/kernels/While.cpp
new file mode 100644
index 000000000..153bd1a99
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/While.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/While.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+void copy(const std::vector<const Tensor *> &src, const std::vector<Tensor *> &dst)
+{
+  for (size_t i = 0; i < src.size(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(dst[i]->element_type() == src[i]->element_type());
+    dst[i]->resize(src[i]->shape());
+
+    const int32_t num_elements = src[i]->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(src[i]->element_type());
+    std::memcpy(dst[i]->data<void>(), src[i]->data<void>(), num_elements * element_size);
+  }
+}
+
+void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst)
+{
+  std::vector<const Tensor *> const_src;
+  for (const auto &t : src)
+    const_src.push_back(t);
+  copy(const_src, dst);
+}
+
+// TODO: Think about how allocate memory for output in main graph
+void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph)
+{
+  for (auto tensor : tensors)
+    run_graph->configureAllocations(tensor);
+}
+
+} // namespace
+
+While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
+             RuntimeGraph *cond_graph, RuntimeGraph *body_graph)
+  : Kernel(std::move(inputs), std::move(outputs)), _cond_graph(cond_graph), _body_graph(body_graph)
+{
+}
+
+void While::configure()
+{
+  LUCI_INTERPRETER_CHECK(_body_graph->getInputTensors().size() == getInputTensors().size());
+  LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getOutputTensors().size());
+  LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getInputTensors().size());
+
+  LUCI_INTERPRETER_CHECK(_cond_graph->getInputTensors().size() == getInputTensors().size());
+
+  const auto &cond_outputs = _cond_graph->getOutputTensors();
+  LUCI_INTERPRETER_CHECK(cond_outputs.size() == 1)
+  LUCI_INTERPRETER_CHECK(cond_outputs[0]->element_type() == DataType::BOOL);
+}
+
+/**
+ * @note Dynamic shape such as {1, 0, 8} may fail in tensor->data()
+ */
+void While::execute() const
+{
+  const auto &cond_inputs = _cond_graph->getInputTensors();
+  const auto &cond_outputs = _cond_graph->getOutputTensors();
+
+  configureTensorsAllocations(cond_inputs, _cond_graph);
+
+  copy(getInputTensors(), cond_inputs);
+
+  const auto &body_inputs = _body_graph->getInputTensors();
+  const auto &body_outputs = _body_graph->getOutputTensors();
+
+  configureTensorsAllocations(body_inputs, _body_graph);
+
+  while (true)
+  {
+    _cond_graph->execute();
+
+    bool cond_value = cond_outputs[0]->data<bool>()[0];
+    if (!cond_value)
+      break;
+
+    copy(cond_inputs, body_inputs);
+
+    _body_graph->execute();
+
+    copy(body_outputs, cond_inputs);
+  }
+
+  copy(cond_inputs, getOutputTensors());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/While.h b/compiler/luci-interpreter/src/kernels/While.h
new file mode 100644
index 000000000..f758df3f3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/While.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_WHILE_H
+#define LUCI_INTERPRETER_KERNELS_WHILE_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class While : public Kernel
+{
+public:
+  While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, RuntimeGraph *cond_graph,
+        RuntimeGraph *body_graph);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  RuntimeGraph *const _cond_graph = nullptr;
+  RuntimeGraph *const _body_graph = nullptr;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_WHILE_H
diff --git a/compiler/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-interpreter/src/kernels/While.test.cpp
new file mode 100644
index 000000000..cb8f89130
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/While.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/Less.h"
+#include "kernels/While.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond,
+                                IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+  Tensor *output =
+    graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input});
+  graph->setOutputTensors({output});
+
+  graph->addKernel(std::make_unique<Less>(input, input_cond, output));
+
+  return graph;
+}
+
+RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add,
+                                IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+  Tensor *output =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input});
+  graph->setOutputTensors({output});
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Add>(input, input_add, output, params));
+
+  return graph;
+}
+
+TEST(WhileTest, FloatLoop10)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get());
+  Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *cond_graph =
+    buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get());
+  RuntimeGraph *body_graph =
+    buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get());
+
+  While kernel({&input}, {&output}, cond_graph, body_graph);
+  kernel.configure();
+  memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt
index d99485d06..292771592 100644
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -1,22 +1,39 @@
-nnas_find_package(GTest REQUIRED)
-
 set(SOURCES
     GraphLoader.h
     GraphLoader.cpp
+    KernelBuilderHelper.h
+    KernelBuilderHelper.cpp
     KernelBuilder.h
     KernelBuilder.cpp
     ModuleLoader.h
     ModuleLoader.cpp
-    RuntimeToIR.h)
+    RuntimeToIR.h
+    nodes/Builders.h)
+
+# include kernel specific builders
+macro(REGISTER_KERNEL NODE)
+  list(APPEND SOURCES "nodes/${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
 
-add_library(luci_interpreter_loader STATIC ${SOURCES})
-set_target_properties(luci_interpreter_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_loader PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter_loader
-    PUBLIC luci_lang luci_interpreter_core
-    PRIVATE luci_interpreter_kernels nncc_common)
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+        PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
+        PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
 
 set(TEST_SOURCES KernelBuilder.test.cpp)
 
-GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES})
-target_link_libraries(luci_interpreter_loader_test luci_interpreter_loader)
+GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER})
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index 95c654769..ba99a579b 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -18,6 +18,7 @@
 
 #include "loader/KernelBuilder.h"
 
+#include <luci/Plan/CircleNodeExecutionPlan.h>
 #include <loco/IR/Algorithm.h>
 
 namespace luci_interpreter
@@ -57,13 +58,41 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
       return getNodeDataImpl<DataType::U8>(node, data_size);
     case DataType::FLOAT32:
       return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
+    case DataType::S8:
+      return getNodeDataImpl<DataType::S8>(node, data_size);
+    case DataType::S16:
+      return getNodeDataImpl<DataType::S16>(node, data_size);
     case DataType::S32:
       return getNodeDataImpl<DataType::S32>(node, data_size);
+    case DataType::S64:
+      return getNodeDataImpl<DataType::S64>(node, data_size);
+    case DataType::BOOL:
+      return getNodeDataImpl<DataType::BOOL>(node, data_size);
     default:
       throw std::runtime_error("Unsupported type.");
   }
 }
 
+const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
+{
+  if (node->custom_code() != "CircleReferencingConst")
+    return nullptr;
+
+  // helper struct which describes data loaded to custom_options of CircleReferencingConst node
+  // TODO move this struct to header
+  struct ConstDataReference
+  {
+    const uint8_t *data = nullptr;
+    uint32_t size = 0;
+  };
+
+  const auto &custom_options = node->custom_options();
+  const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
+
+  *data_size = const_data_ref.size;
+  return const_data_ref.data;
+}
+
 bool isExecutableNode(const luci::CircleNode *node)
 {
   switch (node->opcode())
@@ -74,10 +103,30 @@ bool isExecutableNode(const luci::CircleNode *node)
     case luci::CircleOpcode::CIRCLEOUTPUT:
     case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
     // The following nodes denote outputs of multiple-output nodes.
+    case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+    case luci::CircleOpcode::CIRCLECUSTOMOUT:
     case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
     case luci::CircleOpcode::CIRCLESPLITOUT:
+    case luci::CircleOpcode::CIRCLESPLITVOUT:
+    case luci::CircleOpcode::CIRCLETOPKV2OUT:
+    case luci::CircleOpcode::CIRCLEUNIQUEOUT:
     case luci::CircleOpcode::CIRCLEUNPACKOUT:
+    case luci::CircleOpcode::CIRCLEVARIABLE:
+    case luci::CircleOpcode::CIRCLEWHILEOUT:
       return false;
+    // Custom nodes may be executable and non-executable
+    case luci::CircleOpcode::CUSTOM:
+    {
+      auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+      // TODO handle more non-executable Custom ops here
+      if (custom_node->custom_code() == "CircleReferencingConst")
+        return false;
+
+      return true;
+    }
     default:
       return true;
   }
@@ -91,23 +140,43 @@ bool isTensorProducingNode(const luci::CircleNode *node)
     case luci::CircleOpcode::CIRCLEOUTPUT:
     // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
     // are produced by the corresponding *Out nodes instead.
+    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+    case luci::CircleOpcode::CUSTOM:
     case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
     case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::SPLIT_V:
+    case luci::CircleOpcode::TOPK_V2:
+    case luci::CircleOpcode::UNIQUE:
     case luci::CircleOpcode::UNPACK:
+    case luci::CircleOpcode::WHILE:
       return false;
     default:
       return true;
   }
 }
 
+bool isSupportedCustomNode(const luci::CircleNode *node)
+{
+  const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+  // TODO handle more Custom ops here
+  if (custom_node->custom_code() == "CircleReferencingConst")
+    return true;
+
+  return false;
+}
+
 } // namespace
 
 GraphLoader::GraphLoader(
-    const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
-      _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+  const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+  std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
+  : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
+    _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
+    _memory_manager(memory_manager)
 {
 }
 
@@ -117,24 +186,36 @@ void GraphLoader::loadTensors()
   {
     const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
 
+    if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
+    {
+      const auto *cnode = loco::must_cast<const luci::CircleCustom *>(node);
+      throw std::runtime_error("Unsupported Custom operator. " + cnode->custom_code() + " in " +
+                               node->name());
+    }
+
     if (!isTensorProducingNode(node))
       continue;
 
-    // Only Input and Const nodes have shapes. Shapes of intermediate tensors will be inferred.
+    // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
+    // be inferred.
     Shape shape{};
-    if (const auto *input_node = dynamic_cast<const luci::CircleInput *>(node))
+    switch (node->opcode())
     {
-      shape = getNodeShape(input_node);
-    }
-    else if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
-    {
-      shape = getNodeShape(const_node);
+      case luci::CircleOpcode::CIRCLECONST:
+      case luci::CircleOpcode::CIRCLECUSTOMOUT:
+      case luci::CircleOpcode::CIRCLEINPUT:
+      case luci::CircleOpcode::CIRCLEVARIABLE:
+        shape = getNodeShape(node);
+        break;
+      default:
+        break;
     }
 
     AffineQuantization quantization;
     if (node->quantparam() != nullptr)
     {
       const luci::CircleQuantParam *params = node->quantparam();
+      assert(params->scale.size() == params->zerop.size());
       quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
       quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
       quantization.quantized_dimension = params->quantized_dimension;
@@ -143,12 +224,40 @@ void GraphLoader::loadTensors()
     auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
                                            node->name());
 
+    // If node has execution plan then read memory offsets for nodes
+    // from the beginning of shared memory buffer. Used in Static Memory Manager.
+    if (luci::has_execution_plan(node))
+    {
+      auto execution_plan = luci::get_execution_plan(node);
+      assert(!execution_plan.offsets().empty());
+      tensor->set_offset(execution_plan.offsets().front());
+    }
+
     if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
     {
       size_t data_size{};
       const void *const_data = getNodeData(const_node, &data_size);
       if (const_data != nullptr)
+      {
+        _memory_manager->allocate_memory(*tensor);
         tensor->writeData(const_data, data_size);
+      }
+    }
+    else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
+    {
+      const auto *custom_node =
+        loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
+
+      if (custom_node->custom_code() == "CircleReferencingConst")
+      {
+        size_t data_size{};
+        const void *const_data = getNodeData(custom_node, &data_size);
+        if (const_data != nullptr)
+        {
+          _memory_manager->allocate_memory(*tensor);
+          tensor->writeData(const_data, data_size);
+        }
+      }
     }
 
     _node_to_tensor.emplace(node, tensor.get());
@@ -165,6 +274,7 @@ void GraphLoader::initInputOutputTensors() const
   for (size_t i = 0; i < input_nodes.size(); ++i)
   {
     input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
+    _memory_manager->allocate_memory(*input_tensors[i]);
   }
   _runtime_graph->setInputTensors(input_tensors);
 
@@ -183,16 +293,54 @@ void GraphLoader::loadOperators()
   KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
 
   // Create kernels for executable nodes. This has to be done in execution order.
-  for (const loco::Node *loco_node :
-       loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph))))
+  auto graph = const_cast<loco::Graph *>(_graph);
+
+  auto const graph_nodes = loco::all_nodes(graph);
+
+  // Checking for execution plan in node annotations.
+  bool has_execution_annotation = true;
+  auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
+    const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    if (!luci::has_execution_plan(circle_node))
+      has_execution_annotation = false;
+  };
+  std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
+
+  if (has_execution_annotation)
   {
-    const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+    // Build ordered_nodes vector that stores the order of execution of graph nodes.
+    std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
+
+    auto const filler = [&ordered_nodes](auto const node) {
+      const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+      auto const position = luci::get_execution_plan(circle_node).order_in_plan();
+      ordered_nodes.at(position) = circle_node;
+    };
+    std::for_each(begin(graph_nodes), end(graph_nodes), filler);
 
-    if (isExecutableNode(node))
+    for (auto node : ordered_nodes)
+    {
+      if (isExecutableNode(node))
+      {
+        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+        _runtime_graph->addKernel(std::move(kernel));
+      }
+    }
+  }
+  else
+  {
+    // If it is impossible to build the execution order plan,
+    // then we use the default postorder_traversal approach.
+    for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
     {
-      std::unique_ptr<Kernel> kernel = node->accept(&kernel_builder);
-      _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
-      _runtime_graph->addKernel(std::move(kernel));
+      const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+      if (isExecutableNode(node))
+      {
+        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+        _runtime_graph->addKernel(std::move(kernel));
+      }
     }
   }
 }
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-interpreter/src/loader/GraphLoader.h
index 89c5bcad7..fe066ecf8 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.h
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.h
@@ -19,6 +19,7 @@
 
 #include "core/RuntimeGraph.h"
 #include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
 
 #include <loco/IR/Graph.h>
 
@@ -32,7 +33,8 @@ class GraphLoader
 public:
   GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
               const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-              std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+              std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+              IMemoryManager *memory_manager);
 
   void loadTensors();
   void initInputOutputTensors() const;
@@ -42,6 +44,7 @@ private:
   const loco::Graph *_graph;
   RuntimeGraph *_runtime_graph;
   RuntimeToIR &_runtime_to_ir;
+  IMemoryManager *_memory_manager;
 
   const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
   std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
index 126a1cb5b..c1e2c630a 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -15,586 +15,118 @@
  */
 
 #include "loader/KernelBuilder.h"
+#include "loader/nodes/Builders.h"
 
-#include "kernels/Add.h"
-#include "kernels/ArgMax.h"
-#include "kernels/AveragePool2D.h"
-#include "kernels/Concatenation.h"
-#include "kernels/Conv2D.h"
-#include "kernels/DepthToSpace.h"
-#include "kernels/DepthwiseConv2D.h"
-#include "kernels/Elu.h"
-#include "kernels/FullyConnected.h"
-#include "kernels/If.h"
-#include "kernels/L2Normalize.h"
-#include "kernels/L2Pool2D.h"
-#include "kernels/LeakyRelu.h"
-#include "kernels/LocalResponseNormalization.h"
-#include "kernels/Logistic.h"
-#include "kernels/MaxPool2D.h"
-#include "kernels/Mean.h"
-#include "kernels/Mul.h"
-#include "kernels/Pad.h"
-#include "kernels/Reshape.h"
-#include "kernels/Reverse.h"
-#include "kernels/Rsqrt.h"
-#include "kernels/Slice.h"
-#include "kernels/Softmax.h"
-#include "kernels/SpaceToDepth.h"
-#include "kernels/Split.h"
-#include "kernels/StridedSlice.h"
-#include "kernels/Sqrt.h"
-#include "kernels/Squeeze.h"
-#include "kernels/Tanh.h"
-#include "kernels/Unpack.h"
-#include "kernels/Transpose.h"
-#include "kernels/TransposeConv.h"
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/CircleNodeDecl.h>
 
 #include <stdexcept>
 
-namespace luci_interpreter
-{
-
-template <typename CircleNodeOut>
-static std::vector<const loco::Node *> collectOutputNodes(const luci::CircleNode *node)
+namespace
 {
-  std::vector<const CircleNodeOut *> output_nodes;
-  for (const loco::Node *loco_node : loco::succs(node))
-  {
-    output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
-  }
-  std::sort(output_nodes.begin(), output_nodes.end(),
-            [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
-              return node1->index() < node2->index();
-            });
-  return {output_nodes.cbegin(), output_nodes.cend()};
-}
 
-const Tensor *KernelBuilder::getInputTensor(const loco::Node *node) const
+// TODO Extract this helper function
+const std::string toString(luci::CircleOpcode opcode)
 {
-  const Tensor *tensor = _node_to_tensor.at(node);
-  assert(tensor != nullptr);
-  return tensor;
-}
+  static const char *names[] = {
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+#undef CIRCLE_VNODE
+  };
 
-const Tensor *KernelBuilder::getOptionalInputTensor(const loco::Node *node) const
-{
-  if (dynamic_cast<const luci::CircleOutputExclude *>(node))
-  {
-    return nullptr;
-  }
-  return getInputTensor(node);
-}
+  auto const node_name = names[static_cast<int>(opcode)];
 
-Tensor *KernelBuilder::getOutputTensor(const loco::Node *node) const
-{
-  Tensor *tensor = _node_to_tensor.at(node);
-  assert(tensor != nullptr);
-  return tensor;
-}
+  assert(std::string(node_name).substr(0, 6) == "Circle"); // FIX_ME_UNLESS
 
-std::vector<Tensor *>
-KernelBuilder::getOutputTensors(const std::vector<const loco::Node *> &nodes) const
-{
-  std::vector<Tensor *> tensors;
-  tensors.reserve(nodes.size());
-  for (const loco::Node *node : nodes)
-    tensors.push_back(getOutputTensor(node));
-  return tensors;
+  // Return substring of class name ("Circle" is sliced out)
+  // Ex: Return "Conv2D" for "CircleConv2D" node
+  return std::string(node_name).substr(6);
 }
 
-RuntimeGraph *KernelBuilder::getRuntimeGraph(const loco::Graph *graph) const
-{
-  RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
-  assert(runtime_graph != nullptr);
-  return runtime_graph;
-}
+} // namespace
 
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAdd *node)
+namespace luci_interpreter
 {
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
 
-  AddParams params{};
-  params.activation = node->fusedActivationFunction();
+#define CIRCLE_NODE(OPCODE, CLASS) CLASS,
+#define CIRCLE_VNODE(OPCODE, CLASS) CLASS,
 
-  return std::make_unique<kernels::Add>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleArgMax *node)
+// This enum is auxiliary.
+// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE,
+// because list of target operators is in format of CLASS names
+enum class BuilderId
 {
-  assert(node->arity() == 2);
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *axis = getInputTensor(node->dimension());
-  Tensor *output = getOutputTensor(node);
+#include <luci/IR/CircleNodes.lst>
+  Size // casts to count of values in BuilderId enum
+};
 
-  ArgMaxParams params{};
-  params.output_type = node->output_type();
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
 
-  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
-}
+/**
+ * @brief Registry of kernel builders
+ *
+ * This class contains mapping from Opcodes to kernel builder functions
+ */
 
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAveragePool2D *node)
+class KernelBuilderRegistry
 {
-  assert(node->arity() == 1);
+public:
+  using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *,
+                                                    KernelBuilderHelper &);
 
-  const Tensor *input = getInputTensor(node->value());
-  Tensor *output = getOutputTensor(node);
+  KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr)
+  {
+#define REGISTER_KERNEL(name) \
+  register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name);
 
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
+#include "KernelsToBuild.lst"
 
-  return std::make_unique<kernels::AveragePool2D>(input, output, params);
-}
+#undef REGISTER_KERNEL
+  }
 
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConcatenation *node)
-{
-  std::vector<const Tensor *> inputs(node->numValues());
-  for (uint32_t i = 0; i < node->numValues(); ++i)
+  KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const
   {
-    inputs[i] = getInputTensor(node->values(i));
+    return _operator_builders.at(size_t(opcode));
   }
-  Tensor *output = getOutputTensor(node);
-
-  ConcatenationParams params{};
-  params.axis = node->axis();
-
-  return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConst *)
-{
-  throw std::runtime_error("Const node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConv2D *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *filter = getInputTensor(node->filter());
-  const Tensor *bias = getInputTensor(node->bias());
-  Tensor *output = getOutputTensor(node);
-
-  Conv2DParams params{};
-  params.padding = node->padding();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.dilation_height_factor = node->dilation()->h();
-  params.dilation_width_factor = node->dilation()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthToSpace *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->input());
-  Tensor *output = getOutputTensor(node);
-
-  DepthToSpaceParams params{};
-  params.block_size = node->block_size();
-
-  return std::make_unique<kernels::DepthToSpace>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthwiseConv2D *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *filter = getInputTensor(node->filter());
-  const Tensor *bias = getInputTensor(node->bias());
-  Tensor *output = getOutputTensor(node);
 
-  DepthwiseConv2DParams params{};
-  params.padding = node->padding();
-  params.depth_multiplier = node->depthMultiplier();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.dilation_height_factor = node->dilation()->h();
-  params.dilation_width_factor = node->dilation()->w();
-  params.activation = node->fusedActivationFunction();
+private:
+  std::vector<KernelBuilderFunc *> _operator_builders;
 
-  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleElu *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->features());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Elu>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleFullyConnected *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *weights = getInputTensor(node->weights());
-  const Tensor *bias = getOptionalInputTensor(node->bias());
-  Tensor *output = getOutputTensor(node);
-
-  FullyConnectedParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleIf *node)
-{
-  auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
-  assert(node->arity() == 1 + node->input_count());
-  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
-  const Tensor *cond = getInputTensor(node->cond());
-  std::vector<const Tensor *> inputs(node->input_count());
-  for (uint32_t i = 0; i < node->input_count(); ++i)
+  void register_kernel_builder(BuilderId id, KernelBuilderFunc *func)
   {
-    inputs[i] = getInputTensor(node->input(i));
+    // Using BuilderId is a duplicate of luci::CirclreOpcode,
+    // size_t(id) is equal to size_t(corresponding operation opcode).
+    assert(size_t(id) < _operator_builders.size());
+    _operator_builders[size_t(id)] = func;
   }
-  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
-  RuntimeGraph *then_graph = getRuntimeGraph(node->then_graph());
-  RuntimeGraph *else_graph = getRuntimeGraph(node->else_graph());
-
-  return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
-                                       else_graph);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleInput *)
-{
-  throw std::runtime_error("Input node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleL2Normalize *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
+};
 
-  L2NormParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::L2Normalize>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleL2Pool2D *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->value());
-  Tensor *output = getOutputTensor(node);
-
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::L2Pool2D>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLeakyRelu *node)
-{
-  assert(node->arity() == 1);
-  const Tensor *input = getInputTensor(node->features());
-  Tensor *output = getOutputTensor(node);
-
-  LeakyReluParams params{};
-  params.alpha = node->alpha();
-
-  return std::make_unique<kernels::LeakyRelu>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLocalResponseNormalization *node)
-{
-  assert(node->arity() == 1);
-  const Tensor *input = getInputTensor(node->input());
-  Tensor *output = getOutputTensor(node);
-
-  LocalResponseNormalizationParams params{};
-  params.radius = node->radius();
-  params.bias = node->bias();
-  params.alpha = node->alpha();
-  params.beta = node->beta();
-
-  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLogistic *node)
+KernelBuilder::KernelBuilder(
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+  : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
 {
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Logistic>(input, output);
+  _builder_registry = std::make_unique<KernelBuilderRegistry>();
 }
 
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMaxPool2D *node)
+KernelBuilder::~KernelBuilder()
 {
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->value());
-  Tensor *output = getOutputTensor(node);
-
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::MaxPool2D>(input, output, params);
+  // Need to define in this CPP to hide KernelBuilderRegistry internals.
+  // This destructor deletes _builder_registry
 }
 
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMean *node)
+std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
 {
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *axes = getInputTensor(node->reduction_indices());
-  Tensor *output = getOutputTensor(node);
-
-  ReducerParams params{};
-  params.keep_dims = node->keep_dims();
-
-  return std::make_unique<kernels::Mean>(input, axes, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMul *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  MulParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Mul>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleOutput *)
-{
-  throw std::runtime_error("Output node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CirclePad *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *paddings = getInputTensor(node->paddings());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Pad>(input, paddings, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReshape *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->tensor());
-  const Tensor *shape = getInputTensor(node->shape());
-  Tensor *output = getOutputTensor(node);
-
-  // NOTE 'newShape' attribute is ignored.
-  return std::make_unique<kernels::Reshape>(input, shape, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReverseV2 *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->tensor());
-  const Tensor *axes = getInputTensor(node->axis());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Reverse>(input, axes, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleRsqrt *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Rsqrt>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *begin = getInputTensor(node->begin());
-  const Tensor *size = getInputTensor(node->size());
-
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Slice>(input, begin, size, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSoftmax *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->logits());
-  Tensor *output = getOutputTensor(node);
-
-  SoftmaxParams params{};
-  params.beta = node->beta();
-
-  return std::make_unique<kernels::Softmax>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSpaceToDepth *node)
-{
-  assert(node->arity() == 1);
-  const Tensor *input = getInputTensor(node->input());
-
-  Tensor *output = getOutputTensor(node);
-
-  SpaceToDepthParams params{};
-  params.block_size = node->block_size();
-
-  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSplit *node)
-{
-  auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
-  assert(node->arity() == 2);
-  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
-
-  const Tensor *axis = getInputTensor(node->split_dim());
-  const Tensor *input = getInputTensor(node->input());
-  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
-  // NOTE 'num_splits' attribute is ignored.
-  return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqrt *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Sqrt>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->input());
-  Tensor *output = getOutputTensor(node);
-
-  SqueezeParams params{};
-  params.squeeze_dims = node->squeeze_dims();
-
-  return std::make_unique<kernels::Squeeze>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *node)
-{
-  assert(node->arity() == 4);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *begin = getInputTensor(node->begin());
-  const Tensor *end = getInputTensor(node->end());
-  const Tensor *strides = getInputTensor(node->strides());
-
-  Tensor *output = getOutputTensor(node);
-
-  StridedSliceParams params{};
-  params.begin_mask = node->begin_mask();
-  params.ellipsis_mask = node->ellipsis_mask();
-  params.end_mask = node->end_mask();
-  params.new_axis_mask = node->new_axis_mask();
-  params.shrink_axis_mask = node->shrink_axis_mask();
-
-  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Tanh>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->a());
-  const Tensor *perm = getInputTensor(node->perm());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Transpose>(input, perm, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node)
-{
-  assert(node->arity() == 4);
-
-  const Tensor *input_sizes = getInputTensor(node->inputSizes());
-  const Tensor *filter = getInputTensor(node->filter());
-  const Tensor *out_backprop = getInputTensor(node->outBackprop());
-  const Tensor *bias = getOptionalInputTensor(node->bias());
-
-  Tensor *output = getOutputTensor(node);
-
-  TransposeConvParams params{};
-  params.padding = node->padding();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-
-  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
-                                                  params);
-}
-
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleUnpack *node)
-{
-  auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
-  assert(node->arity() == 1);
-  assert(output_nodes.size() == static_cast<size_t>(node->num()));
-
-  const Tensor *input = getInputTensor(node->value());
-  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
-  UnpackParams params{};
-  params.axis = node->axis();
+  auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode());
+  if (specific_builder != nullptr)
+    return specific_builder(node, *this);
 
-  // NOTE 'num' attribute is ignored.
-  return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
+  std::string msg = "Unsupported operator: ";
+  msg += toString(node->opcode()) + " in " + std::string(node->name());
+  throw std::invalid_argument(msg.c_str());
 }
 
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h
index 31cb9d8fc..b1f383394 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -17,79 +17,34 @@
 #ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
 #define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
 
+#include "loader/KernelBuilderHelper.h"
+
 #include "core/Kernel.h"
 #include "core/RuntimeGraph.h"
 
 #include <luci/IR/CircleNodeVisitor.h>
 
 #include <memory>
-#include <vector>
 #include <unordered_map>
 
 namespace luci_interpreter
 {
 
-class KernelBuilder : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>
+class KernelBuilderRegistry;
+
+class KernelBuilder : public KernelBuilderHelper
 {
 public:
   KernelBuilder(
-      const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-      const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-      : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
-  {
-  }
-
-  std::unique_ptr<Kernel> visit(const luci::CircleAdd *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleArgMax *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleAveragePool2D *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleConcatenation *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleConv2D *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleConst *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleDepthToSpace *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleDepthwiseConv2D *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleElu *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleFullyConnected *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleIf *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleL2Normalize *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleL2Pool2D *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleLeakyRelu *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleLocalResponseNormalization *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleLogistic *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleInput *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleMaxPool2D *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleMean *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleMul *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleOutput *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override;
-  std::unique_ptr<Kernel> visit(const luci::CircleUnpack *node) override;
-
-private:
-  const Tensor *getInputTensor(const loco::Node *node) const;
-
-  const Tensor *getOptionalInputTensor(const loco::Node *node) const;
-
-  Tensor *getOutputTensor(const loco::Node *node) const;
+    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
 
-  std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const;
+  ~KernelBuilder();
 
-  RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
+  std::unique_ptr<Kernel> build(const luci::CircleNode *node);
 
 private:
-  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
-  const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+  std::unique_ptr<KernelBuilderRegistry> _builder_registry;
 };
 
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
index 4e2bc3d0b..10a01f418 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -16,35 +16,67 @@
 
 #include "loader/GraphLoader.h"
 #include "loader/KernelBuilder.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
 
 #include <kernels/Add.h>
 #include <kernels/ArgMax.h>
 #include <kernels/AveragePool2D.h>
+#include <kernels/BatchMatMul.h>
+#include <kernels/Cast.h>
 #include <kernels/Concatenation.h>
 #include <kernels/Conv2D.h>
 #include <kernels/DepthToSpace.h>
 #include <kernels/DepthwiseConv2D.h>
+#include <kernels/Div.h>
 #include <kernels/Elu.h>
+#include <kernels/Exp.h>
+#include <kernels/Floor.h>
+#include <kernels/FloorDiv.h>
+#include <kernels/Equal.h>
 #include <kernels/FullyConnected.h>
+#include <kernels/Greater.h>
+#include <kernels/GreaterEqual.h>
+#include <kernels/InstanceNorm.h>
 #include <kernels/L2Normalize.h>
 #include <kernels/L2Pool2D.h>
 #include <kernels/LeakyRelu.h>
+#include <kernels/Less.h>
+#include <kernels/LessEqual.h>
 #include <kernels/LocalResponseNormalization.h>
+#include <kernels/LogicalAnd.h>
+#include <kernels/LogicalNot.h>
+#include <kernels/LogicalOr.h>
 #include <kernels/Logistic.h>
+#include <kernels/LogSoftmax.h>
+#include <kernels/Maximum.h>
 #include <kernels/MaxPool2D.h>
 #include <kernels/Mean.h>
+#include <kernels/Minimum.h>
 #include <kernels/Mul.h>
+#include <kernels/Neg.h>
+#include <kernels/NotEqual.h>
+#include <kernels/OneHot.h>
 #include <kernels/Pad.h>
+#include <kernels/PadV2.h>
+#include <kernels/Pow.h>
+#include <kernels/PRelu.h>
+#include <kernels/Relu.h>
+#include <kernels/Relu6.h>
 #include <kernels/Reshape.h>
-#include <kernels/Reverse.h>
+#include <kernels/ResizeBilinear.h>
+#include <kernels/ResizeNearestNeighbor.h>
+#include <kernels/ReverseV2.h>
 #include <kernels/Rsqrt.h>
 #include <kernels/Slice.h>
 #include <kernels/Softmax.h>
 #include <kernels/SpaceToDepth.h>
 #include <kernels/Split.h>
+#include <kernels/SplitV.h>
 #include <kernels/Sqrt.h>
+#include <kernels/SquaredDifference.h>
 #include <kernels/Squeeze.h>
 #include <kernels/StridedSlice.h>
+#include <kernels/Sub.h>
 #include <kernels/Tanh.h>
 #include <kernels/Transpose.h>
 #include <kernels/TransposeConv.h>
@@ -63,6 +95,9 @@ class KernelBuilderTest : public Test
 {
 protected:
   luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
+  void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
 
   template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
   {
@@ -86,15 +121,16 @@ protected:
   {
     std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
 
-    RuntimeGraph runtime_graph(nullptr);
+    RuntimeGraph runtime_graph(nullptr, _memory_manager.get());
+    graph_to_runtime_graph[&_graph] = &runtime_graph;
     RuntimeToIR runtime_to_ir;
     GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
-                             _node_to_tensor);
+                             _node_to_tensor, _memory_manager.get());
     graph_loader.loadTensors();
 
     KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
 
-    auto kernel = op->accept(&kernel_builder);
+    auto kernel = kernel_builder.build(op);
     return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release()));
   }
 
@@ -175,6 +211,41 @@ TEST_F(KernelBuilderTest, AveragePool2D)
   EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
 }
 
+TEST_F(KernelBuilderTest, BatchMatMul)
+{
+  auto *lhs = createInputNode();
+  auto *rhs = createInputNode();
+
+  auto *op = createNode<luci::CircleBatchMatMul>();
+  op->x(lhs);
+  op->y(rhs);
+  op->adj_x(false);
+  op->adj_y(false);
+
+  auto kernel = buildKernel<kernels::BatchMatMul>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), lhs);
+  checkTensor(kernel->y(), rhs);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
+  EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
+}
+
+TEST_F(KernelBuilderTest, Cast)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleCast>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Cast>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, Concatenation)
 {
   auto *input1 = createInputNode();
@@ -192,6 +263,7 @@ TEST_F(KernelBuilderTest, Concatenation)
   checkTensor(kernel->input(1), input2);
   checkTensor(kernel->output(), op);
   EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
 }
 
 TEST_F(KernelBuilderTest, Conv2D)
@@ -279,6 +351,26 @@ TEST_F(KernelBuilderTest, DepthwiseConv2D)
   EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
 }
 
+TEST_F(KernelBuilderTest, Div)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleDiv>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Div>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
 TEST_F(KernelBuilderTest, Elu)
 {
   auto *input = createInputNode();
@@ -293,6 +385,68 @@ TEST_F(KernelBuilderTest, Elu)
   checkTensor(kernel->output(), op);
 }
 
+TEST_F(KernelBuilderTest, Exp)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleExp>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Exp>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Floor)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleFloor>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Floor>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FloorDiv)
+{
+  auto *x = createInputNode();
+  auto *y = createInputNode();
+
+  auto *op = createNode<luci::CircleFloorDiv>();
+  op->x(x);
+  op->y(y);
+
+  auto kernel = buildKernel<kernels::FloorDiv>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x);
+  checkTensor(kernel->y(), y);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Equal)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Equal>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, FullyConnected)
 {
   auto *input = createInputNode();
@@ -316,6 +470,65 @@ TEST_F(KernelBuilderTest, FullyConnected)
   EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
 }
 
+TEST_F(KernelBuilderTest, Greater)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleGreater>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Greater>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, GreaterEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleGreaterEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::GreaterEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, InstanceNorm)
+{
+  auto *input = createInputNode();
+  auto *gamma = createInputNode();
+  auto *beta = createInputNode();
+
+  auto *op = createNode<luci::CircleInstanceNorm>();
+  op->input(input);
+  op->gamma(gamma);
+  op->beta(beta);
+
+  op->epsilon(1e-05);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::InstanceNorm>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->gamma(), gamma);
+  checkTensor(kernel->beta(), beta);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
 TEST_F(KernelBuilderTest, L2Normalize)
 {
   auto *input = createInputNode();
@@ -377,6 +590,40 @@ TEST_F(KernelBuilderTest, LeakyRelu)
   EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
 }
 
+TEST_F(KernelBuilderTest, Less)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleLess>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Less>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LessEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleLessEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::LessEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, LocalResponseNormalization)
 {
   auto *input = createInputNode();
@@ -400,6 +647,54 @@ TEST_F(KernelBuilderTest, LocalResponseNormalization)
   EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
 }
 
+TEST_F(KernelBuilderTest, LogicalAnd)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalAnd>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::LogicalAnd>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogicalNot)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalNot>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::LogicalNot>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogicalOr)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalOr>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::LogicalOr>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, Logistic)
 {
   auto *input = createInputNode();
@@ -414,6 +709,37 @@ TEST_F(KernelBuilderTest, Logistic)
   checkTensor(kernel->output(), op);
 }
 
+TEST_F(KernelBuilderTest, LogSoftmax)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogSoftmax>();
+  op->logits(input);
+
+  auto kernel = buildKernel<kernels::LogSoftmax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Maximum)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMaximum>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Maximum>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, MaxPool2D)
 {
   auto *input = createInputNode();
@@ -461,6 +787,23 @@ TEST_F(KernelBuilderTest, Mean)
   EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims()));
 }
 
+TEST_F(KernelBuilderTest, Minimum)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMinimum>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Minimum>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, Mul)
 {
   auto *input1 = createInputNode();
@@ -481,6 +824,62 @@ TEST_F(KernelBuilderTest, Mul)
   EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
 }
 
+TEST_F(KernelBuilderTest, Neg)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleNeg>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Neg>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, NotEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleNotEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::NotEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, OneHot)
+{
+  auto *indices = createInputNode();
+  auto *depth = createInputNode();
+  auto *on_value = createInputNode();
+  auto *off_value = createInputNode();
+  auto axis = 1;
+
+  auto *op = createNode<luci::CircleOneHot>();
+  op->indices(indices);
+  op->depth(depth);
+  op->on_value(on_value);
+  op->off_value(off_value);
+  op->axis(axis);
+
+  auto kernel = buildKernel<kernels::OneHot>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->indices(), indices);
+  checkTensor(kernel->depth(), depth);
+  checkTensor(kernel->on_value(), on_value);
+  checkTensor(kernel->off_value(), off_value);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
 TEST_F(KernelBuilderTest, Pad)
 {
   auto *input = createInputNode();
@@ -498,6 +897,88 @@ TEST_F(KernelBuilderTest, Pad)
   checkTensor(kernel->output(), op);
 }
 
+TEST_F(KernelBuilderTest, PadV2)
+{
+  auto *input = createInputNode();
+  auto *paddings = createInputNode();
+  auto *constant_values = createInputNode();
+
+  auto *op = createNode<luci::CirclePadV2>();
+  op->input(input);
+  op->paddings(paddings);
+  op->constant_values(constant_values);
+
+  auto kernel = buildKernel<kernels::PadV2>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->paddings(), paddings);
+  checkTensor(kernel->constant_values(), constant_values);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Pow)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CirclePow>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Pow>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, PRelu)
+{
+  auto *input = createInputNode();
+  auto *alpha = createInputNode();
+
+  auto *op = createNode<luci::CirclePRelu>();
+  op->input(input);
+  op->alpha(alpha);
+
+  auto kernel = buildKernel<kernels::PRelu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->alpha(), alpha);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Relu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRelu>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Relu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Relu6)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRelu6>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Relu6>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, Reshape)
 {
   auto *input = createInputNode();
@@ -515,6 +996,48 @@ TEST_F(KernelBuilderTest, Reshape)
   checkTensor(kernel->output(), op);
 }
 
+TEST_F(KernelBuilderTest, ResizeBilinear)
+{
+  auto *input = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleResizeBilinear>();
+  op->input(input);
+  op->size(size);
+  op->align_corners(true);
+  op->half_pixel_centers(true);
+
+  auto kernel = buildKernel<kernels::ResizeBilinear>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
+  EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers()));
+}
+
+TEST_F(KernelBuilderTest, ResizeNearestNeighbor)
+{
+  auto *input = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleResizeNearestNeighbor>();
+  op->input(input);
+  op->size(size);
+  op->align_corners(true);
+
+  auto kernel = buildKernel<kernels::ResizeNearestNeighbor>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
+  // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor
+  // after adding, need to be updated.
+}
+
 TEST_F(KernelBuilderTest, ReverseV2)
 {
   auto *input = createInputNode();
@@ -524,7 +1047,7 @@ TEST_F(KernelBuilderTest, ReverseV2)
   op->tensor(input);
   op->axis(axes);
 
-  auto kernel = buildKernel<kernels::Reverse>(op);
+  auto kernel = buildKernel<kernels::ReverseV2>(op);
   ASSERT_THAT(kernel, NotNull());
 
   checkTensor(kernel->input(), input);
@@ -622,6 +1145,31 @@ TEST_F(KernelBuilderTest, Split)
   checkTensor(kernel->output(1), output2);
 }
 
+TEST_F(KernelBuilderTest, SplitV)
+{
+  auto *input = createInputNode();
+  auto *size_splits = createInputNode();
+  auto *axis = createInputNode();
+  auto *op = createNode<luci::CircleSplitV>();
+  auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0);
+  auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1);
+
+  op->input(input);
+  op->size_splits(size_splits);
+  op->split_dim(axis);
+
+  op->num_split(2);
+
+  auto kernel = buildKernel<kernels::SplitV>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size_splits(), size_splits);
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->output(0), output0);
+  checkTensor(kernel->output(1), output1);
+}
+
 TEST_F(KernelBuilderTest, Sqrt)
 {
   auto *input = createInputNode();
@@ -636,6 +1184,23 @@ TEST_F(KernelBuilderTest, Sqrt)
   checkTensor(kernel->output(), op);
 }
 
+TEST_F(KernelBuilderTest, SquaredDifference)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleSquaredDifference>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::SquaredDifference>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, Squeeze)
 {
   auto *input = createInputNode();
@@ -687,6 +1252,26 @@ TEST_F(KernelBuilderTest, StridedSlice)
   EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
 }
 
+TEST_F(KernelBuilderTest, Sub)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleSub>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Sub>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
 TEST_F(KernelBuilderTest, Tanh)
 {
   auto *input = createInputNode();
@@ -734,6 +1319,7 @@ TEST_F(KernelBuilderTest, TransposeConv)
   op->padding(luci::Padding::SAME);
   op->stride()->h(11);
   op->stride()->w(13);
+  op->fusedActivationFunction(luci::FusedActFunc::NONE);
 
   auto kernel = buildKernel<kernels::TransposeConv>(op);
   ASSERT_THAT(kernel, NotNull());
@@ -746,6 +1332,7 @@ TEST_F(KernelBuilderTest, TransposeConv)
   EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
   EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
   EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
 }
 
 TEST_F(KernelBuilderTest, Unpack)
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.cpp b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.cpp
new file mode 100644
index 000000000..23c96a6db
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/KernelBuilderHelper.h"
+
+#include <luci/IR/Nodes/CircleOutput.h>
+
+namespace luci_interpreter
+{
+
+const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const
+{
+  const Tensor *tensor = _node_to_tensor.at(node);
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const
+{
+  if (dynamic_cast<const luci::CircleOutputExclude *>(node))
+  {
+    return nullptr;
+  }
+  return getInputTensor(node);
+}
+
+Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const
+{
+  Tensor *tensor = _node_to_tensor.at(node);
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+std::vector<Tensor *>
+KernelBuilderHelper::getOutputTensors(const std::vector<const loco::Node *> &nodes) const
+{
+  std::vector<Tensor *> tensors;
+  tensors.reserve(nodes.size());
+  for (const loco::Node *node : nodes)
+    tensors.push_back(getOutputTensor(node));
+  return tensors;
+}
+
+RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const
+{
+  RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
+  assert(runtime_graph != nullptr);
+  return runtime_graph;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h
new file mode 100644
index 000000000..d6fb253b1
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+#include <loco/IR/Graph.h>
+#include <loco/IR/Node.h>
+
+#include <vector>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class KernelBuilderHelper
+{
+public:
+  KernelBuilderHelper(
+    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+    : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+  {
+  }
+
+public:
+  const Tensor *getInputTensor(const loco::Node *node) const;
+  const Tensor *getOptionalInputTensor(const loco::Node *node) const;
+
+  Tensor *getOutputTensor(const loco::Node *node) const;
+  std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const;
+
+  RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
+
+public:
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const
+  {
+    return _graph_to_runtime_graph;
+  }
+
+  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor() const
+  {
+    return _node_to_tensor;
+  }
+
+private:
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+  const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+};
+
+template <typename CircleNodeOut>
+std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node)
+{
+  std::vector<const CircleNodeOut *> output_nodes;
+  for (const loco::Node *loco_node : loco::succs(node))
+  {
+    output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
+  }
+  std::sort(output_nodes.begin(), output_nodes.end(),
+            [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
+              return node1->index() < node2->index();
+            });
+  return {output_nodes.cbegin(), output_nodes.cend()};
+}
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
index b9a2ae0a9..2f278b087 100644
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -23,9 +23,10 @@ namespace luci_interpreter
 
 ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
                            RuntimeToIR &runtime_to_ir,
-                           std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
-      _node_to_tensor(node_to_tensor)
+                           std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+                           IMemoryManager *memory_manager)
+  : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
+    _node_to_tensor(node_to_tensor), _memory_manager(memory_manager)
 {
 }
 
@@ -35,14 +36,14 @@ void ModuleLoader::load()
   // process for control flow nodes.
   for (size_t i = 0; i < _module->size(); ++i)
   {
-    _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph());
+    _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager));
   }
   for (size_t i = 0; i < _module->size(); ++i)
   {
     const loco::Graph *graph = _module->graph(i);
     RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
     GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
-                       _node_to_tensor);
+                       _node_to_tensor, _memory_manager);
     loader.loadTensors();
     loader.initInputOutputTensors();
     loader.loadOperators();
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-interpreter/src/loader/ModuleLoader.h
index 1af0ed747..11326a2ee 100644
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.h
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.h
@@ -19,6 +19,7 @@
 
 #include "core/RuntimeModule.h"
 #include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
 
 #include <luci/IR/Module.h>
 
@@ -32,11 +33,13 @@ class ModuleLoader
 public:
   ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
                RuntimeToIR &runtime_to_ir,
-               std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+               std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+               IMemoryManager *memory_manager);
 
   void load();
 
 private:
+  IMemoryManager *_memory_manager;
   const luci::Module *_module;
   RuntimeModule *_runtime_module;
   RuntimeToIR &_runtime_to_ir;
diff --git a/compiler/luci-interpreter/src/loader/nodes/Abs.cpp b/compiler/luci-interpreter/src/loader/nodes/Abs.cpp
new file mode 100644
index 000000000..394711145
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Abs.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Abs.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAbs(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleAbs *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Abs>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
new file mode 100644
index 000000000..501e84752
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Add.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  AddParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
new file mode 100644
index 000000000..f3ca55744
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ArgMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node);
+  assert(node->arity() == 2);
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axis = helper.getInputTensor(node->dimension());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ArgMaxParams params{};
+  params.output_type = node->output_type();
+
+  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
new file mode 100644
index 000000000..a8135706f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/AveragePool2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
new file mode 100644
index 000000000..9da2f6d93
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
+                                                       KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *lhs = helper.getInputTensor(node->x());
+  const Tensor *rhs = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto lhs_scratchpad =
+    std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
+  lhs_scratchpad->set_observable(false);
+  lhs_scratchpad->set_data_buffer(nullptr);
+  auto rhs_scratchpad =
+    std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
+  rhs_scratchpad->set_observable(false);
+  rhs_scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current BatchMatMul temporary was found.
+    if (execution_plan.offsets().size() > 1)
+    {
+      assert(execution_plan.offsets().size() == 3);
+
+      // If this is true, then we keep this offset in scratchpad.
+      lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
+      rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
+    }
+  }
+  Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
+  Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
+
+  BatchMatMulParams params;
+  params.adj_x = node->adj_x();
+  params.adj_y = node->adj_y();
+
+  return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
new file mode 100644
index 000000000..ac6ebb30f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+  const Tensor *crops = helper.getInputTensor(node->crops());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-interpreter/src/loader/nodes/Builders.h
new file mode 100644
index 000000000..eab284008
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Builders.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "luci/IR/CircleNodes.h"
+
+namespace luci_interpreter
+{
+
+#define REGISTER_KERNEL(name)                                                            \
+  std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \
+                                                    KernelBuilderHelper &helper);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
new file mode 100644
index 000000000..a16354c96
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Cast.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node);
+
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Cast>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
new file mode 100644
index 000000000..ba2564ea2
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Concatenation.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node);
+  std::vector<const Tensor *> inputs(node->numValues());
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->values(i));
+  }
+  Tensor *output = helper.getOutputTensor(node);
+
+  ConcatenationParams params{};
+  params.axis = node->axis();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
new file mode 100644
index 000000000..218165e20
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Conv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  Conv2DParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
new file mode 100644
index 000000000..174946367
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthToSpace.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DepthToSpaceParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..8af1e3b58
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthwiseConv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
+                                                           KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *bias = helper.getInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DepthwiseConv2DParams params{};
+  params.padding = node->padding();
+  params.depth_multiplier = node->depthMultiplier();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
new file mode 100644
index 000000000..787322e9b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
new file mode 100644
index 000000000..0611dfdab
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Div.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node);
+  assert(node->arity() == 2);
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DivParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Div>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
new file mode 100644
index 000000000..a79985e3b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Elu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Elu>(input, output);
+}
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
new file mode 100644
index 000000000..59692883f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Equal.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+
+{
+  const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Equal>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
new file mode 100644
index 000000000..30d11cb89
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Exp.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Exp>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp
new file mode 100644
index 000000000..9840c34e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axis = helper.getInputTensor(node->axis());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp
new file mode 100644
index 000000000..3aefdf1c5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Fill.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node);
+  assert(node->arity() == 2);
+
+  const auto dims = helper.getInputTensor(node->dims());
+  const auto value = helper.getInputTensor(node->value());
+  auto output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Fill>(dims, value, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
new file mode 100644
index 000000000..e0a223116
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Floor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Floor>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
new file mode 100644
index 000000000..a45d89e38
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorDiv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::FloorDiv>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorMod.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorMod.cpp
new file mode 100644
index 000000000..a4852f13e
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/FloorMod.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorMod.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorMod(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFloorMod *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::FloorMod>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
new file mode 100644
index 000000000..b7b742b8a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FullyConnected.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *weights = helper.getInputTensor(node->weights());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  FullyConnectedParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.keep_num_dims = node->keep_num_dims();
+
+  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
new file mode 100644
index 000000000..2ee2906e0
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *params = helper.getInputTensor(node->params());
+  const Tensor *indices = helper.getInputTensor(node->indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  GatherParams gparams{};
+  gparams.axis = node->axis();
+  // TODO support batch_dims
+  gparams.batch_dims = 0;
+
+  return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Gelu.cpp b/compiler/luci-interpreter/src/loader/nodes/Gelu.cpp
new file mode 100644
index 000000000..fc77a5817
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Gelu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGelu(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGelu *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  GeluParams params{};
+  params.approximate = node->approximate();
+
+  return std::make_unique<kernels::Gelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
new file mode 100644
index 000000000..80aa63cf0
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Greater.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Greater>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
new file mode 100644
index 000000000..272f2843b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/GreaterEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::GreaterEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/HardSwish.cpp b/compiler/luci-interpreter/src/loader/nodes/HardSwish.cpp
new file mode 100644
index 000000000..2e62f2402
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/HardSwish.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/HardSwish.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleHardSwish(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleHardSwish *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::HardSwish>(input, output);
+}
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp
new file mode 100644
index 000000000..3ac7d4941
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/If.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
+  assert(node->arity() == 1 + node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  const Tensor *cond = helper.getInputTensor(node->cond());
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph());
+  RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph());
+
+  return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
+                                       else_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
new file mode 100644
index 000000000..06031e5bc
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/InstanceNorm.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *gamma = helper.getInputTensor(node->gamma());
+  const Tensor *beta = helper.getInputTensor(node->beta());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  InstanceNormParams params{};
+  params.epsilon = node->epsilon();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
new file mode 100644
index 000000000..6e22e6d4e
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Normalize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
+                                                       KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  L2NormParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
new file mode 100644
index 000000000..95b55896f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
new file mode 100644
index 000000000..bbf5067b1
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  LeakyReluParams params{};
+  params.alpha = node->alpha();
+
+  return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
new file mode 100644
index 000000000..ae914ecc9
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Less.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Less>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
new file mode 100644
index 000000000..f1b424b55
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LessEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LessEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..962ca2d7c
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = node->radius();
+  params.bias = node->bias();
+  params.alpha = node->alpha();
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Log.cpp b/compiler/luci-interpreter/src/loader/nodes/Log.cpp
new file mode 100644
index 000000000..048e3101e
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Log.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Log.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLog(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLog *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Log>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
new file mode 100644
index 000000000..432204115
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogSoftmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->logits());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogSoftmax>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
new file mode 100644
index 000000000..bf3cb671a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalAnd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
new file mode 100644
index 000000000..fefcd9a06
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalNot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalNot>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
new file mode 100644
index 000000000..a416cb401
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalOr.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalOr>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
new file mode 100644
index 000000000..4a69deef1
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Logistic.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Logistic>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
new file mode 100644
index 000000000..f66a206ca
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MaxPool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
new file mode 100644
index 000000000..d0bff776a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Maximum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Maximum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
new file mode 100644
index 000000000..0dec63e79
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mean.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  auto temp_sum_unique =
+    std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+  temp_sum_unique->set_observable(false);
+  temp_sum_unique->set_data_buffer(nullptr);
+  Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum,
+                                         params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
new file mode 100644
index 000000000..1a49c1090
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Minimum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Minimum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
new file mode 100644
index 000000000..b221b4574
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MirrorPad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  Tensor *output = helper.getOutputTensor(node);
+
+  MirrorPadParams params{};
+  params.mode = node->mode();
+
+  return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
new file mode 100644
index 000000000..f9984853a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  MulParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
new file mode 100644
index 000000000..9a9ecf991
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Neg.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Neg>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
new file mode 100644
index 000000000..3916a5854
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/NotEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::NotEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp
new file mode 100644
index 000000000..a40160945
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *indices = helper.getInputTensor(node->indices());
+  const Tensor *depth = helper.getInputTensor(node->depth());
+  const Tensor *on_value = helper.getInputTensor(node->on_value());
+  const Tensor *off_value = helper.getInputTensor(node->off_value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  OneHotParams params{};
+  params.axis = node->axis();
+
+  return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
new file mode 100644
index 000000000..f3d700c95
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *alpha = helper.getInputTensor(node->alpha());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::PRelu>(input, alpha, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
new file mode 100644
index 000000000..efc5850e0
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node);
+  assert(node->arity() == node->values_count());
+
+  std::vector<const Tensor *> inputs(node->values_count());
+  for (uint32_t i = 0; i < node->values_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->values(i));
+  }
+  Tensor *output = helper.getOutputTensor(node);
+
+  PackParams params{};
+  params.axis = node->axis();
+  params.values_count = node->values_count();
+
+  return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
new file mode 100644
index 000000000..67ce997a7
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
new file mode 100644
index 000000000..e378a972a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PadV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  const Tensor *constant_values = helper.getInputTensor(node->constant_values());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
new file mode 100644
index 000000000..d32fc3dbb
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pow.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Pow>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
new file mode 100644
index 000000000..cb36fb6da
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp
new file mode 100644
index 000000000..1a8522dd6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReduceMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReduceMax(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReduceMax *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::ReduceMax>(input, axes, output, temp_index, resolved_axes,
+                                              params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp
new file mode 100644
index 000000000..1610e20a9
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReduceProd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReduceProd(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReduceProd *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::ReduceProd>(input, axes, output, temp_index, resolved_axes,
+                                               params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
new file mode 100644
index 000000000..1d64c1c4e
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Relu>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
new file mode 100644
index 000000000..e50cd2545
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu6.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Relu6>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
new file mode 100644
index 000000000..76ddd88a3
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Reshape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->tensor());
+  const Tensor *shape = helper.getInputTensor(node->shape());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // NOTE 'newShape' attribute is ignored.
+  return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
new file mode 100644
index 000000000..dc2b88ad3
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *size = helper.getInputTensor(node->size());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ResizeBilinearParams params{};
+  params.align_corners = node->align_corners();
+  params.half_pixel_centers = node->half_pixel_centers();
+
+  return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..c7058ae78
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
+                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *size = helper.getInputTensor(node->size());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = node->align_corners();
+  // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
+  // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
+  // default value on current is false.
+  // it need to be updated when CircleResizeNearestNeighbor updated.
+  params.half_pixel_centers = false;
+
+  return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
new file mode 100644
index 000000000..c1a7f5350
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReverseV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->tensor());
+  const Tensor *axes = helper.getInputTensor(node->axis());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::ReverseV2>(input, axes, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
new file mode 100644
index 000000000..0714a5dba
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Rsqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
new file mode 100644
index 000000000..d172ef438
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+  assert(node->arity() == 5);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *feature = helper.getInputTensor(node->weight_feature());
+  const Tensor *time = helper.getInputTensor(node->weight_time());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
+                                                    Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  if (data_type == DataType::FLOAT32 &&
+      (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+  {
+    data_type = feature->element_type();
+  }
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  data_type = DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  SVDFParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.svdf_rank = node->svdf_rank();
+  params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+  return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+                                         tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Select.cpp b/compiler/luci-interpreter/src/loader/nodes/Select.cpp
new file mode 100644
index 000000000..a0f18047b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Select.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Select.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSelect(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSelect *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *c = helper.getInputTensor(node->condition());
+  const Tensor *t = helper.getInputTensor(node->t());
+  const Tensor *e = helper.getInputTensor(node->e());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Select>(c, t, e, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp
new file mode 100644
index 000000000..d1edbc794
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Shape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node);
+  assert(node->arity() == 1);
+
+  const auto input = helper.getInputTensor(node->input());
+  auto output = helper.getOutputTensor(node);
+
+  ShapeParams shape_params{};
+  shape_params.out_type = node->out_type();
+
+  return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
new file mode 100644
index 000000000..60ac6417c
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Slice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *begin = helper.getInputTensor(node->begin());
+  const Tensor *size = helper.getInputTensor(node->size());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
new file mode 100644
index 000000000..f41f63f6f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Softmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->logits());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SoftmaxParams params{};
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
new file mode 100644
index 000000000..b6e6cf516
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
new file mode 100644
index 000000000..63fdb95ec
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToDepth.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->input());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  SpaceToDepthParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
new file mode 100644
index 000000000..3f6d4a7df
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Split.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
+  assert(node->arity() == 2);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *axis = helper.getInputTensor(node->split_dim());
+  const Tensor *input = helper.getInputTensor(node->input());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
new file mode 100644
index 000000000..0788822ca
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SplitV.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
+  assert(node->arity() == 3);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *sizes_data = helper.getInputTensor(node->size_splits());
+  const Tensor *axis = helper.getInputTensor(node->split_dim());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
new file mode 100644
index 000000000..b9843fe0b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Sqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
new file mode 100644
index 000000000..0ad7c1772
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Square.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Square>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
new file mode 100644
index 000000000..e4c6fd851
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SquaredDifference.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
+                                                             KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
new file mode 100644
index 000000000..6885f8077
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Squeeze.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SqueezeParams params{};
+  params.squeeze_dims = node->squeeze_dims();
+
+  return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
new file mode 100644
index 000000000..359b4e3e9
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/StridedSlice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *begin = helper.getInputTensor(node->begin());
+  const Tensor *end = helper.getInputTensor(node->end());
+  const Tensor *strides = helper.getInputTensor(node->strides());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  StridedSliceParams params{};
+  params.begin_mask = node->begin_mask();
+  params.ellipsis_mask = node->ellipsis_mask();
+  params.end_mask = node->end_mask();
+  params.new_axis_mask = node->new_axis_mask();
+  params.shrink_axis_mask = node->shrink_axis_mask();
+
+  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
new file mode 100644
index 000000000..a6252cb53
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sub.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SubParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sum.cpp b/compiler/luci-interpreter/src/loader/nodes/Sum.cpp
new file mode 100644
index 000000000..6dfe362c9
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Sum.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSum(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSum *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::Sum>(input, axes, output, temp_index, resolved_axes, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
new file mode 100644
index 000000000..a58ef60a8
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Tanh.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Tanh>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
new file mode 100644
index 000000000..ea17d8311
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Transpose.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->a());
+  const Tensor *perm = helper.getInputTensor(node->perm());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
new file mode 100644
index 000000000..72d1aecf7
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/TransposeConv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *out_backprop = helper.getInputTensor(node->outBackprop());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  DataType scratch_data_type =
+    helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+
+  auto scratch_tensor =
+    std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, "");
+  scratch_tensor->set_observable(false);
+  scratch_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor));
+
+  TransposeConvParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  // TODO support activation
+  if (params.activation != luci::FusedActFunc::NONE)
+  {
+    throw std::runtime_error("Unsupported activation of TransposeConv");
+  }
+
+  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
+                                                  tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp b/compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..f4cf0b869
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleUnidirectionalSequenceLSTM(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleUnidirectionalSequenceLSTM *>(circle_node);
+  assert(node->arity() == 24);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *input_to_input_weights =
+    helper.getOptionalInputTensor(node->input_to_input_weights());
+  const Tensor *input_to_cell_weights = helper.getInputTensor(node->input_to_cell_weights());
+  const Tensor *input_to_forget_weights = helper.getInputTensor(node->input_to_forget_weights());
+  const Tensor *input_to_output_weights = helper.getInputTensor(node->input_to_output_weights());
+  const Tensor *recurrent_to_input_weights =
+    helper.getOptionalInputTensor(node->recurrent_to_input_weights());
+  const Tensor *recurrent_to_cell_weights =
+    helper.getInputTensor(node->recurrent_to_cell_weights());
+  const Tensor *recurrent_to_forget_weights =
+    helper.getInputTensor(node->recurrent_to_forget_weights());
+  const Tensor *recurrent_to_output_weights =
+    helper.getInputTensor(node->recurrent_to_output_weights());
+  const Tensor *cell_to_input_weights =
+    helper.getOptionalInputTensor(node->cell_to_input_weights());
+  const Tensor *cell_to_forget_weights =
+    helper.getOptionalInputTensor(node->cell_to_forget_weights());
+  const Tensor *cell_to_output_weights =
+    helper.getOptionalInputTensor(node->cell_to_output_weights());
+  const Tensor *input_gate_bias = helper.getOptionalInputTensor(node->input_gate_bias());
+  const Tensor *forget_gate_bias = helper.getInputTensor(node->forget_gate_bias());
+  const Tensor *cell_gate_bias = helper.getInputTensor(node->cell_gate_bias());
+  const Tensor *output_gate_bias = helper.getInputTensor(node->output_gate_bias());
+  const Tensor *projection_weights = helper.getOptionalInputTensor(node->projection_weights());
+  const Tensor *projection_bias = helper.getOptionalInputTensor(node->projection_bias());
+  const Tensor *output_state = helper.getInputTensor(node->output_state());
+  const Tensor *cell_state = helper.getInputTensor(node->cell_state());
+  const Tensor *input_layer_norm_coefficients =
+    helper.getOptionalInputTensor(node->input_layer_norm_coefficients());
+  const Tensor *forget_layer_norm_coefficients =
+    helper.getOptionalInputTensor(node->forget_layer_norm_coefficients());
+  const Tensor *cell_layer_norm_coefficients =
+    helper.getOptionalInputTensor(node->cell_layer_norm_coefficients());
+  const Tensor *output_layer_norm_coefficients =
+    helper.getOptionalInputTensor(node->output_layer_norm_coefficients());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // scratch pad tensor
+  // NOTE provide more scratch pads if support hybrid or integer
+  auto sp_output_state =
+    std::make_unique<Tensor>(output_state->element_type(), Shape({}), AffineQuantization{}, "");
+  sp_output_state->set_observable(false);
+  sp_output_state->set_data_buffer(nullptr);
+  Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_output_state));
+
+  auto sp_cell_state =
+    std::make_unique<Tensor>(cell_state->element_type(), Shape({}), AffineQuantization{}, "");
+  sp_cell_state->set_observable(false);
+  sp_cell_state->set_data_buffer(nullptr);
+  Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_cell_state));
+
+  auto sp_3 = std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+  sp_3->set_observable(false);
+  sp_3->set_data_buffer(nullptr);
+  Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_3));
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.cell_clip = node->cell_clip();
+  params.proj_clip = node->proj_clip();
+  params.time_major = node->time_major();
+  params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+  return std::make_unique<kernels::UnidirectionalSequenceLSTM>(
+    input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights,
+    input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights,
+    recurrent_to_cell_weights, recurrent_to_output_weights, cell_to_input_weights,
+    cell_to_forget_weights, cell_to_output_weights, input_gate_bias, forget_gate_bias,
+    cell_gate_bias, output_gate_bias, projection_weights, projection_bias, output_state, cell_state,
+    input_layer_norm_coefficients, forget_layer_norm_coefficients, cell_layer_norm_coefficients,
+    output_layer_norm_coefficients, output, tmp_1, tmp_2, tmp_3, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
new file mode 100644
index 000000000..a1c0d323a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Unpack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
+  assert(node->arity() == 1);
+  assert(output_nodes.size() == static_cast<size_t>(node->num()));
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  UnpackParams params{};
+  params.axis = node->axis();
+
+  // NOTE 'num' attribute is ignored.
+  return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp
new file mode 100644
index 000000000..8fde6ec8a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/While.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node);
+
+  auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
+  assert(node->arity() == node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph());
+  RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph());
+
+  return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
+                                          body_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-pass-value-test/.gitignore b/compiler/luci-pass-value-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/luci-pass-value-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt
new file mode 100644
index 000000000..dcd242c6e
--- /dev/null
+++ b/compiler/luci-pass-value-test/CMakeLists.txt
@@ -0,0 +1,55 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+unset(TEST_DEPS)
+unset(LUCI_PASS_VALUE_TESTS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+macro(addeval RECIPE PASS_OPTION)
+  list(APPEND LUCI_PASS_VALUE_TESTS ${RECIPE})
+
+  set(CIRCLE_FILE "${RECIPE}.circle")
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${CIRCLE_FILE}")
+
+  set(PASS_CIRCLE_FILE "${RECIPE}.pass.circle")
+  set(PASS_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PASS_CIRCLE_FILE}")
+
+  set(DASH_PASS_OPTION "--${PASS_OPTION}")
+  foreach(MORE_OPTIONS ${ARGN})
+    list(APPEND DASH_PASS_OPTION "--${MORE_OPTIONS}")
+  endforeach()
+  # NOTE if there are two options, 'DASH_PASS_OPTION' will be like '--option_a;--option_b'
+  #      add_custom_command() will translate ';' to two arguments as '--optiona_a --optionb'
+  #      do not use set(DASH_PASS_OPTION "${DASH_PASS_OPTION} --${ARG}"))
+  #      as this will become like '"--optiona_a --optionb"' which is one string argument
+
+  # Generate optimized .circle
+  add_custom_command(OUTPUT ${PASS_CIRCLE_OUTPUT_PATH}
+    COMMAND $<TARGET_FILE:circle2circle> ${DASH_PASS_OPTION} ${CIRCLE_PATH} ${PASS_CIRCLE_OUTPUT_PATH}
+    DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_PATH}
+    COMMENT "Generate ${PASS_CIRCLE_FILE} with ${DASH_PASS_OPTION}"
+  )
+
+  # depends
+  list(APPEND TEST_DEPS ${PASS_CIRCLE_OUTPUT_PATH})
+
+endmacro(addeval)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+add_custom_target(luci_pass_value_test_files ALL DEPENDS ${TEST_DEPS})
+add_dependencies(luci_pass_value_test_files common_artifacts_deps)
+
+add_test(NAME luci_pass_value_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
+          "${CMAKE_CURRENT_BINARY_DIR}"
+          "${ARTIFACTS_BIN_PATH}"
+          "${NNCC_OVERLAY_DIR}/venv_2_12_1"
+          "$<TARGET_FILE:luci_eval_driver>"
+          ${LUCI_PASS_VALUE_TESTS}
+)
diff --git a/compiler/luci-pass-value-test/README.md b/compiler/luci-pass-value-test/README.md
new file mode 100644
index 000000000..f09619da6
--- /dev/null
+++ b/compiler/luci-pass-value-test/README.md
@@ -0,0 +1,20 @@
+# luci-pass-value-test
+
+`luci-pass-value-test` validates execution result values of tflite model and
+circle model generated with specific optimization.
+
+The test proceeds as follows:
+
+Step 0: Use tflite and circle file in 'common-artifacts' folder as the source model.
+   - tflite file is used as to generate reference execution result
+   - circle file is used as source of optimization to apply
+
+Step 1: Run circle2circle with given optimization option to produce transformed circle.
+   - "modelfile.circle" -> circle2circle -> "modelfile.pass.circle"
+
+Step 2: Run TFLite interpreter and luci-interpreter for the source tflite and circle, respectively.
+        (with the same input tensors filled with random values)
+   - "modelfile.tflite" ------> TFLite interpreter -> Execution result 1
+   - "modelfile.pass.circle" -> luci-interpreter ---> Execution result 2
+
+Step 3: Compare the execution result 1 and 2. Test is PASSED if results are sames.
diff --git a/compiler/luci-pass-value-test/eval_driver.sh b/compiler/luci-pass-value-test/eval_driver.sh
new file mode 100755
index 000000000..848b6419a
--- /dev/null
+++ b/compiler/luci-pass-value-test/eval_driver.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# This script verifies the tflite and circle execution result values
+#
+# HOW TO USE
+#
+# ./eval_driver.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/intp_dir>
+#                  <TEST 1> <TEST 2> ...
+# bin_dir  : build directory of luci-pass-value-test (ex: build/compiler/luci-pass-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
+# intp_dir : path to luci_eval_driver from luci-eval-driver
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/eval_result_verifier.py"
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_TFLITE_FILE="${WORKDIR}/${TESTCASE}.tflite"
+  TESTCASE_CIRCLE_FILE="${BINDIR}/${TESTCASE}.pass.circle"
+  TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    source "${VIRTUALENV}/bin/activate"
+
+    "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${INTERPRETER_DRIVER_PATH}" \
+    --tflite "${TESTCASE_TFLITE_FILE}" \
+    --circle "${TESTCASE_CIRCLE_FILE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-pass-value-test/eval_result_verifier.py b/compiler/luci-pass-value-test/eval_result_verifier.py
new file mode 100644
index 000000000..0073c4db5
--- /dev/null
+++ b/compiler/luci-pass-value-test/eval_result_verifier.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+import numpy as np
+import tensorflow as tf
+import subprocess
+import argparse
+import traceback
+
+#
+# This script was copied from luci-value-test with input arguments are tflite and circle path
+#
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--tflite', type=str, required=True)
+parser.add_argument('--circle', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+tflite_model = args.tflite
+circle_model = args.circle
+
+# Build TFLite interpreter.
+interpreter = tf.lite.Interpreter(tflite_model)
+interpreter.allocate_tensors()
+
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+    signature_serving_default = full_signatures.get('serving_default', None)
+    if signature_serving_default != None:
+        signature_outputs = signature_serving_default['outputs']
+
+        full_signatures_outputs_remap = []
+        for index, (key, value) in enumerate(signature_outputs.items()):
+            full_signatures_outputs_remap.append(value)
+
+# Generate random input data.
+num_inputs = len(interpreter.get_input_details())
+for i in range(num_inputs):
+    input_details = interpreter.get_input_details()[i]
+    if input_details["dtype"] == np.float32:
+        input_data = np.array(
+            np.random.random_sample(input_details["shape"]), input_details["dtype"])
+    elif input_details["dtype"] == np.uint8:
+        input_data = np.array(
+            np.random.randint(0, 256, size=input_details["shape"]),
+            input_details["dtype"])
+    elif input_details["dtype"] == np.int16:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details["shape"]),
+            input_details["dtype"])
+    elif input_details["dtype"] == np.bool_:
+        input_data = np.array(
+            np.random.choice(a=[True, False], size=input_details["shape"]),
+            input_details["dtype"])
+    else:
+        raise SystemExit("Unsupported input dtype")
+
+    interpreter.set_tensor(input_details["index"], input_data)
+    input_data.tofile(circle_model + ".input" + str(i))
+
+# Do inference
+interpreter.invoke()
+
+# Execute luci interpreter.
+subprocess.run(
+    [
+        driver, circle_model,
+        str(num_inputs), circle_model + ".input", circle_model + ".output"
+    ],
+    check=True)
+
+# Compare the results.
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+    output_details = inpt_output_details[idx]
+    output_data = np.fromfile(circle_model + ".output" + str(idx),
+                              output_details["dtype"])
+    shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+    luci_output_data = np.reshape(output_data, output_shape)
+    output_tensor = output_details["index"]
+    if full_signatures_outputs_remap != None:
+        output_tensor = full_signatures_outputs_remap[idx]
+    intp_output_data = interpreter.get_tensor(output_tensor)
+    try:
+        if output_details["dtype"] == np.uint8:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.float32:
+            if np.allclose(
+                    luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int64:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int32:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int16:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        else:
+            raise SystemExit("Unsupported data type: ", output_details["dtype"])
+    except:
+        print(traceback.format_exc())
+        quit(255)
+
+quit(0)
diff --git a/compiler/luci-pass-value-test/requires.cmake b/compiler/luci-pass-value-test/requires.cmake
new file mode 100644
index 000000000..d977a51b6
--- /dev/null
+++ b/compiler/luci-pass-value-test/requires.cmake
@@ -0,0 +1,7 @@
+require("common-artifacts")
+require("luci-interpreter")
+require("safemain")
+require("oops")
+require("loco")
+require("luci-value-test")
+require("luci-eval-driver")
diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst
new file mode 100644
index 000000000..93634b2fc
--- /dev/null
+++ b/compiler/luci-pass-value-test/test.lst
@@ -0,0 +1,52 @@
+#
+# Format:
+#   addeval(MODEL PASS)
+# MODEL: tflite model file name in build/compiler/common-artifacts folder.
+# PASS: Optimization Pass to test. Supports only one Pass for now.
+#
+
+# addeval(Net_Preactivation_BN_000 fuse_preactivation_batchnorm) : value diff exist
+# --> https://github.com/Samsung/ONE/issues/5782
+addeval(Net_Conv_Add_Mul_000 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_000 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_001 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv)
+addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6)
+addeval(Net_Conv_Min_Relu_000 transform_min_relu_to_relu6)
+addeval(HardSwish_001 decompose_hardswish)
+addeval(Net_Conv_PReluGraph_000 fuse_prelu)
+addeval(Net_Conv_Relu6_000 fuse_activation_function)
+addeval(Net_Densify_Add_000 fold_densify)
+addeval(Net_Dequantize_Add_000 fold_dequantize)
+addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv)
+addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv)
+addeval(Net_FullyConnected_Add_000 fold_fully_connected)
+addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
+addeval(Net_Reshape_Reshape_000 remove_redundant_reshape)
+addeval(Net_Squeeze_Squeeze_000 substitute_squeeze_to_reshape)
+addeval(Net_TConv_Add_000 fuse_add_with_tconv)
+addeval(Net_TConv_Add_001 fuse_add_with_tconv)
+addeval(Net_TConv_Add_002 fuse_add_with_tconv)
+addeval(Net_TConv_BN_000 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_001 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_003 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_004 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_005 fuse_batchnorm_with_tconv)
+addeval(Net_InstanceNorm_001 fuse_instnorm)
+addeval(Net_InstanceNorm_002 fuse_instnorm)
+addeval(Net_InstanceNorm_003 fuse_instnorm)
+addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
+addeval(FullyConnected_007 replace_non_const_fc_with_batch_matmul)
+addeval(Net_Transpose_Add_000 forward_transpose_op)
+addeval(Net_Transpose_Abs_000 forward_transpose_op)
+addeval(UnidirectionalSequenceLSTM_003 unroll_unidirseqlstm)
+addeval(UnidirectionalSequenceLSTM_004 unroll_unidirseqlstm)
+
+# test for limited support for FLOAT16
+addeval(Net_Dequantize_Add_000 fold_dequantize)
+addeval(Net_Densify_Dequantize_Add_000 fold_dequantize fold_densify)
+
+# test SignatureDef, with any optimization
+#addeval(SignatureDef_MultiOut_000 fuse_instnorm)
+#addeval(SignatureDef_MultiOut_001 fuse_instnorm)
diff --git a/compiler/luci-value-test/.gitignore b/compiler/luci-value-test/.gitignore
new file mode 100644
index 000000000..8dbfa9012
--- /dev/null
+++ b/compiler/luci-value-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt
index ec7463409..be7e881d0 100644
--- a/compiler/luci-value-test/CMakeLists.txt
+++ b/compiler/luci-value-test/CMakeLists.txt
@@ -1,9 +1,18 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 unset(LUCI_VALUE_TESTS)
+unset(LUCI_VALUE_TESTS_TOL)
 
 macro(addeval NAME)
   list(APPEND LUCI_VALUE_TESTS ${NAME})
 endmacro(addeval)
 
+macro(addevaltol NAME RTOL ATOL)
+  list(APPEND LUCI_VALUE_TESTS_TOL ${NAME} ${RTOL} ${ATOL})
+endmacro(addevaltol)
+
 # Read "test.lst"
 include("test.lst")
 # Read "test.local.lst" if exists
@@ -12,14 +21,60 @@ include("test.local.lst" OPTIONAL)
 # Generate dependencies
 add_custom_target(luci_eval_testfiles ALL DEPENDS ${TESTFILES})
 
-add_subdirectory(tester)
+if(NOT CMAKE_CROSSCOMPILING)
+
+  get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+  add_test(NAME luci_value_test
+    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
+            "${CMAKE_CURRENT_BINARY_DIR}"
+            "${ARTIFACTS_BIN_PATH}"
+            "${NNCC_OVERLAY_DIR}/venv_2_12_1"
+            "$<TARGET_FILE:luci_eval_driver>"
+            ${LUCI_VALUE_TESTS}
+  )
+
+  if(DEFINED LUCI_VALUE_TESTS_TOL)
+    add_test(NAME luci_value_tol_test
+      COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol.sh"
+              "${CMAKE_CURRENT_BINARY_DIR}"
+              "${ARTIFACTS_BIN_PATH}"
+              "${NNCC_OVERLAY_DIR}/venv_2_12_1"
+              "$<TARGET_FILE:luci_eval_driver>"
+              ${LUCI_VALUE_TESTS_TOL}
+    )
+  endif()
+
+else(NOT CMAKE_CROSSCOMPILING)
+  # NOTE target test is carried out using reference input/output data from host
+  #      test results. this is because it would be difficult to prepare
+  #      TensorFlow lite for target device.
+  #      thus, one must run the host test and then run the test in target device
+  #      with the test result files from the host test.
+
+  if(NOT DEFINED ENV{BUILD_HOST_EXEC})
+    message(STATUS "BUILD_HOST_EXEC not set: Skip luci-value-test")
+    return()
+  endif(NOT DEFINED ENV{BUILD_HOST_EXEC})
+
+  set(ARTIFACTS_BIN_PATH $ENV{BUILD_HOST_EXEC}/compiler/common-artifacts)
+
+  add_test(NAME luci_value_cross_test
+    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify_ref.sh"
+            "${CMAKE_CURRENT_BINARY_DIR}"
+            "${ARTIFACTS_BIN_PATH}"
+            "$<TARGET_FILE:luci_eval_driver>"
+            ${LUCI_VALUE_TESTS}
+  )
 
-get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+  if(DEFINED LUCI_VALUE_TESTS_TOL)
+    add_test(NAME luci_value_cross_tol_test
+             COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol_ref.sh"
+                     "${CMAKE_CURRENT_BINARY_DIR}"
+                     "${ARTIFACTS_BIN_PATH}"
+                     "$<TARGET_FILE:luci_eval_driver>"
+                     ${LUCI_VALUE_TESTS_TOL}
+    )
+  endif()
 
-add_test(NAME luci_value_test
-  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
-          "${CMAKE_CURRENT_BINARY_DIR}"
-          "${ARTIFACTS_BIN_PATH}"
-          "${NNCC_OVERLAY_DIR}/venv_2_3_0"
-          ${LUCI_VALUE_TESTS}
-)
+endif(NOT CMAKE_CROSSCOMPILING)
diff --git a/compiler/luci-value-test/README.md b/compiler/luci-value-test/README.md
index 90e92834b..6f1d0d54f 100644
--- a/compiler/luci-value-test/README.md
+++ b/compiler/luci-value-test/README.md
@@ -5,11 +5,15 @@
 The test proceeds as follows
 
 Step 1: Generate tflite files and circle files from TFLite recipes (listsed in test.lst).
+```
 "TFLite recipe" -> tflchef -> "tflite file" -> tflite2circle -> "circle file"
+```
 
 Step 2: Run TFLite interpreter and luci-interpreter for the generated tflite and circle, respectively.
 (with the same input tensors filled with random values)
+```
 circle file -> luci-interpreter -------> Execution result 1
 tflite file -> TFLite interpreter -----> Execution result 2
+```
 
 Step 3: Compare the execution result 1 and 2. The result must be the same.
diff --git a/compiler/luci-value-test/evalverify.sh b/compiler/luci-value-test/evalverify.sh
index 12c9a459a..3d2091176 100755
--- a/compiler/luci-value-test/evalverify.sh
+++ b/compiler/luci-value-test/evalverify.sh
@@ -4,17 +4,19 @@
 #
 # HOW TO USE
 #
-# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <TEST 1> <TEST 2> ...
+# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/eval_driver> \
+#                 <TEST 1> <TEST 2> ...
 # bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
 # work_dir : artifacts directoy where test materials exist
 # venv_dir : python virtual environment home directory
+# eval_driver : luci_eval_driver path for evaluation
 
 VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
 BINDIR="$1"; shift
 WORKDIR="$1"; shift
 VIRTUALENV="$1"; shift
-INTERPRETER_DRIVER_PATH="${BINDIR}/tester/luci_eval_tester"
+INTERPRETER_DRIVER_PATH="$1"; shift
 
 TESTED=()
 PASSED=()
diff --git a/compiler/luci-value-test/evalverify_ref.sh b/compiler/luci-value-test/evalverify_ref.sh
new file mode 100755
index 000000000..f1e538aa3
--- /dev/null
+++ b/compiler/luci-value-test/evalverify_ref.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverify_ref.sh <path/to/bin_dir> <path/to/ref_dir> <path/to/eval_driver> \
+#                     <TEST 1> <TEST 2> ...
+# bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# ref_dir  : artifacts directoy where reference test materials exist
+# eval_driver : luci_eval_driver path for evaluation
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier_ref.py"
+BINDIR="$1"; shift
+REFDIR="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${REFDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    "python3" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${INTERPRETER_DRIVER_PATH}" \
+    --model_ref "${TESTCASE_FILE}" \
+    --work_path "${TEST_RESULT_FILE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/evalverifytol.sh b/compiler/luci-value-test/evalverifytol.sh
new file mode 100755
index 000000000..92094055a
--- /dev/null
+++ b/compiler/luci-value-test/evalverifytol.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverifytol.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/eval_driver> \
+#                    <TEST 1> <RTOL 1> <ATOL 1> <TEST 2> <RTOL 2> <ATOL 2> ...
+# bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+while (( "$#" >= 3 )); do
+  TESTCASE=$1
+  RTOLERANCE=$2
+  ATOLERANCE=$3
+  shift 3
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${INTERPRETER_DRIVER_PATH}" \
+    --model "${TESTCASE_FILE}" \
+    --rtolf32 "${RTOLERANCE}" \
+    --atolf32 "${ATOLERANCE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/evalverifytol_ref.sh b/compiler/luci-value-test/evalverifytol_ref.sh
new file mode 100755
index 000000000..cc7267b18
--- /dev/null
+++ b/compiler/luci-value-test/evalverifytol_ref.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverifytol_ref.sh <path/to/bin_dir> <path/to/ref_dir> <path/to/eval_driver> \
+#                        <TEST 1> <RTOL 1> <ATOL 1> <TEST 2> <RTOL 2> <ATOL 2> ...
+# bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# ref_dir  : artifacts directoy where reference test materials exist
+# eval_driver : luci_eval_driver path for evaluation
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier_ref.py"
+BINDIR="$1"; shift
+REFDIR="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+while (( "$#" >= 3 )); do
+  TESTCASE=$1
+  RTOLERANCE=$2
+  ATOLERANCE=$3
+  shift 3
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${REFDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    "python3" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${INTERPRETER_DRIVER_PATH}" \
+    --model_ref "${TESTCASE_FILE}" \
+    --work_path "${TEST_RESULT_FILE}" \
+    --rtolf32 "${RTOLERANCE}" \
+    --atolf32 "${ATOLERANCE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py
index 7a2cebb91..f74b2201f 100755
--- a/compiler/luci-value-test/luci_eval_verifier.py
+++ b/compiler/luci-value-test/luci_eval_verifier.py
@@ -9,21 +9,51 @@ import traceback
 # This script compares the execution result of luci-interpreter with that of TFLite interpreter
 #
 # Basic usage:
-#   eval_verifier.py --driver build/compiler/luci-value-test/tester/luci_eval_tester
+#   eval_verifier.py --driver build/compiler/luci-eval-driver/luci_eval_driver
 #           --model inception_v3
 parser = argparse.ArgumentParser()
 parser.add_argument('--driver', type=str, required=True)
 parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--rtolf32', type=str, required=False)
+parser.add_argument('--atolf32', type=str, required=False)
 args = parser.parse_args()
 
 driver = args.driver
 tflite_model = args.model + ".tflite"
 circle_model = args.model + ".circle"
 
+rtolf32 = 1e-5
+atolf32 = 1e-5
+# NOTE reuse f32 value as int value too
+rtolint = 0
+atolint = 0
+try:
+    if args.rtolf32 != None:
+        rtolf32 = float(args.rtolf32)
+        rtolint = int(rtolf32)
+    if args.atolf32 != None:
+        atolf32 = float(args.atolf32)
+        atolint = int(atolf32)
+except ValueError:
+    print("rtolf32 or atolf32 is not a number")
+    quit(128)
+
 # Build TFLite interpreter.
 interpreter = tf.lite.Interpreter(tflite_model)
 interpreter.allocate_tensors()
 
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+    signature_serving_default = full_signatures.get('serving_default', None)
+    if signature_serving_default != None:
+        signature_outputs = signature_serving_default['outputs']
+
+        full_signatures_outputs_remap = []
+        for index, (key, value) in enumerate(signature_outputs.items()):
+            full_signatures_outputs_remap.append(value)
+
 # Generate random input data.
 num_inputs = len(interpreter.get_input_details())
 for i in range(num_inputs):
@@ -31,19 +61,40 @@ for i in range(num_inputs):
     if input_details["dtype"] == np.float32:
         input_data = np.array(
             np.random.random_sample(input_details["shape"]), input_details["dtype"])
+        input_dtype = "float32"
     elif input_details["dtype"] == np.uint8:
         input_data = np.array(
             np.random.randint(0, 256, size=input_details["shape"]),
             input_details["dtype"])
+        input_dtype = "uint8"
+    elif input_details["dtype"] == np.int16:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details["shape"]),
+            input_details["dtype"])
+        input_dtype = "int16"
+    elif input_details["dtype"] == np.int32:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details["shape"]),
+            input_details["dtype"])
+        input_dtype = "int32"
+    elif input_details["dtype"] == np.int64:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details["shape"]),
+            input_details["dtype"])
+        input_dtype = "int64"
     elif input_details["dtype"] == np.bool_:
         input_data = np.array(
             np.random.choice(a=[True, False], size=input_details["shape"]),
             input_details["dtype"])
+        input_dtype = "bool"
     else:
         raise SystemExit("Unsupported input dtype")
 
     interpreter.set_tensor(input_details["index"], input_data)
     input_data.tofile(circle_model + ".input" + str(i))
+    input_details["shape"].tofile(circle_model + ".input" + str(i) + ".shape", sep=',')
+    with open(circle_model + ".input" + str(i) + ".dtype", 'w') as dtype_file:
+        dtype_file.write(input_dtype)
 
 # Do inference
 interpreter.invoke()
@@ -57,52 +108,77 @@ subprocess.run(
     check=True)
 
 # Compare the results.
-for idx in range(len(interpreter.get_output_details())):
-    output_details = interpreter.get_output_details()[idx]
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+    output_details = inpt_output_details[idx]
     output_data = np.fromfile(circle_model + ".output" + str(idx),
                               output_details["dtype"])
     shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
     output_shape = [int(i) for i in shape_file.read().split(',')]
     luci_output_data = np.reshape(output_data, output_shape)
+    output_tensor = output_details["index"]
+    if full_signatures_outputs_remap != None:
+        output_tensor = full_signatures_outputs_remap[idx]
+    intp_output_data = interpreter.get_tensor(output_tensor)
     try:
         if output_details["dtype"] == np.uint8:
             if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+                    luci_output_data, intp_output_data, rtol=rtolint,
+                    atol=atolint) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
+            output_dtype = "uint8"
         elif output_details["dtype"] == np.float32:
             if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=1.e-5,
-                    atol=1.e-5) == False:
+                    luci_output_data, intp_output_data, rtol=rtolf32,
+                    atol=atolf32) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
+            output_dtype = "float32"
         elif output_details["dtype"] == np.int64:
             if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+                    luci_output_data, intp_output_data, rtol=rtolint,
+                    atol=atolint) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
+            output_dtype = "int64"
         elif output_details["dtype"] == np.int32:
             if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+                    luci_output_data, intp_output_data, rtol=rtolint,
+                    atol=atolint) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+            output_dtype = "int32"
+        elif output_details["dtype"] == np.int16:
+            if np.allclose(
+                    luci_output_data, intp_output_data, rtol=rtolint,
+                    atol=atolint) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
+            output_dtype = "int16"
+        elif output_details["dtype"] == np.bool_:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+            output_dtype = "bool"
         else:
             raise SystemExit("Unsupported data type: ", output_details["dtype"])
+
+        # save outputN.dtype file
+        with open(circle_model + ".output" + str(idx) + ".dtype", 'w') as dtype_file:
+            dtype_file.write(output_dtype)
     except:
         print(traceback.format_exc())
         quit(255)
diff --git a/compiler/luci-value-test/luci_eval_verifier_ref.py b/compiler/luci-value-test/luci_eval_verifier_ref.py
new file mode 100755
index 000000000..3e4d93855
--- /dev/null
+++ b/compiler/luci-value-test/luci_eval_verifier_ref.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+import numpy as np
+import subprocess
+import argparse
+import traceback
+import os
+
+#
+# This script compares the execution result of luci-interpreter with that from ref_model path
+#
+# Basic usage:
+#   luci_eval_verifier_ref.py --driver build/compiler/luci-eval-driver/luci_eval_driver
+#           --ref_model ref_model_path --model this_model_path
+# Assumption:
+#   these file exist with its purpose
+#   - ref_model_path.circle; circle model
+#   - ref_model_path.circle.inputN; N'th input numpy data
+#   - ref_model_path.circle.inputN.dtype; N'th input data type in text
+#   - ref_model_path.circle.inputN.shape; N'th input data shape in CSV
+#   - ref_model_path.circle.outputN; N'th output numpy data
+#   - ref_model_path.circle.outputN.dtype; N'th output data type in text
+#   - ref_model_path.circle.outputN.shape; N'th output data shape in CSV
+
+
+def dtype_from_file(file_path):
+    with open(file_path, 'r') as dtype_file:
+        dtype_str = dtype_file.read()
+    if dtype_str == "float32":
+        return np.float32
+    if dtype_str == "uint8":
+        return np.uint8
+    if dtype_str == "int16":
+        return np.int16
+    if dtype_str == "int32":
+        return np.int32
+    if dtype_str == "int64":
+        return np.int64
+    if dtype_str == "bool":
+        return np.bool_
+    raise SystemExit("Unsupported dtype from file", dtype_str)
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--model_ref', type=str, required=True)
+parser.add_argument('--work_path', type=str, required=True)
+parser.add_argument('--rtolf32', type=str, required=False)
+parser.add_argument('--atolf32', type=str, required=False)
+args = parser.parse_args()
+
+driver = args.driver
+circle_model_ref = args.model_ref + ".circle"
+circle_model = args.work_path + ".circle"
+# circle_model is used as to follow existing luci_eval_verifier.py
+
+rtolf32 = 1e-5
+atolf32 = 1e-5
+# NOTE reuse f32 value as int value too
+rtolint = 0
+atolint = 0
+try:
+    if args.rtolf32 != None:
+        rtolf32 = float(args.rtolf32)
+        rtolint = int(rtolf32)
+    if args.atolf32 != None:
+        atolf32 = float(args.atolf32)
+        atolint = int(atolf32)
+except ValueError:
+    print("rtolf32 or atolf32 is not a number")
+    quit(128)
+
+# get num of inputs by checking existance of model.inputN
+check_input = 0
+while True:
+    input_file_path = circle_model_ref + ".input" + str(check_input)
+    if not os.path.isfile(input_file_path):
+        num_inputs = check_input
+        break
+    check_input = check_input + 1
+
+if num_inputs == 0:
+    print("input file not exist for", circle_model_ref)
+    quit(128)
+
+# get num of outputs by checking existance of model.outputN
+check_output = 0
+while True:
+    output_file_path = circle_model_ref + ".output" + str(check_output)
+    if not os.path.isfile(output_file_path):
+        num_outputs = check_output
+        break
+    check_output = check_output + 1
+
+if num_outputs == 0:
+    print("output file not exist for", circle_model_ref)
+    quit(128)
+
+# Execute luci interpreter with reference input
+subprocess.run(
+    [
+        driver, circle_model_ref,
+        str(num_inputs), circle_model_ref + ".input", circle_model + ".output"
+    ],
+    check=True)
+
+# Compare the results.
+for idx in range(num_outputs):
+    output_dtype = dtype_from_file(circle_model_ref + ".output" + str(idx) + ".dtype")
+    shape_file = open(circle_model_ref + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+
+    output_data_ref = np.fromfile(circle_model_ref + ".output" + str(idx), output_dtype)
+    luci_output_data_ref = np.reshape(output_data_ref, output_shape)
+
+    output_data = np.fromfile(circle_model + ".output" + str(idx), output_dtype)
+    luci_output_data = np.reshape(output_data, output_shape)
+
+    try:
+        if output_dtype == np.uint8:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=rtolint,
+                    atol=atolint) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.float32:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=rtolf32,
+                    atol=atolf32) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int64:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=rtolint,
+                    atol=atolint) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int32:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=rtolint,
+                    atol=atolint) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int16:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=rtolint,
+                    atol=atolint) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.bool_:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        else:
+            raise SystemExit("Unsupported data type: ", output_dtype)
+    except:
+        print(traceback.format_exc())
+        quit(255)
+
+quit(0)
diff --git a/compiler/luci-value-test/requires.cmake b/compiler/luci-value-test/requires.cmake
index f8af5f27e..e1a0f8367 100644
--- a/compiler/luci-value-test/requires.cmake
+++ b/compiler/luci-value-test/requires.cmake
@@ -4,3 +4,4 @@ require("luci-interpreter")
 require("safemain")
 require("oops")
 require("loco")
+require("luci-eval-driver")
diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst
index 0e5231eca..3368b6450 100644
--- a/compiler/luci-value-test/test.lst
+++ b/compiler/luci-value-test/test.lst
@@ -20,90 +20,92 @@ addeval(ArgMax_U8_003)
 #addeval(ArgMin_U8_002)
 #addeval(ArgMin_U8_003)
 addeval(AveragePool2D_000)
-#addeval(BatchMatMul_000)
+addeval(BatchMatMul_000)
 #addeval(BatchMatMulV2_000)
 #addeval(BatchMatMulV2_001)
 #addeval(BatchToSpaceND_000)
-#addeval(Cast_000)
-#addeval(Cast_001)
+addeval(Cast_000)
+addeval(Cast_001)
 #addeval(Ceil_000)
 addeval(Concatenation_000)
 addeval(Concatenation_U8_000)
 addeval(Conv2D_000)
 addeval(Conv2D_001)
 addeval(Conv2D_002)
-#addeval(Conv2D_003)
-addeval(Conv2D_U8_000)
+addeval(Conv2D_003)
+#addeval(Conv2D_U8_000) --> test with tolerance
 addeval(Conv2D_U8_001)
 #addeval(Cos_000)
-#addeval(DepthToSpace_000)
+addeval(DepthToSpace_000)
 addeval(DepthwiseConv2D_000)
 addeval(DepthwiseConv2D_U8_000)
 #addeval(DepthwiseConv2D_U8_001)
 addeval(DepthwiseConv2D_001)
-#addeval(Div_000)
+addeval(Div_000)
 addeval(ELU_000)
-#addeval(Equal_000)
-#addeval(Exp_000)
+addeval(Equal_000)
+addeval(Exp_000)
 #addeval(ExpandDims_000)
 #addeval(ExpandDims_001)
 #addeval(ExpandDims_002)
 #addeval(ExpandDims_003)
 #addeval(Fill_000)
 #addeval(Fill_001)
-#addeval(Floor_000)
-#addeval(FloorDiv_000)
-#addeval(FloorDiv_001)
-#addeval(FloorMod_000)
-#addeval(FloorMod_001)
+addeval(Floor_000)
+addeval(FloorDiv_000)
+addeval(FloorDiv_001)
+addeval(FloorMod_000)
+addeval(FloorMod_001)
 addeval(FullyConnected_000)
 addeval(FullyConnected_001)
 addeval(FullyConnected_002)
 #addeval(FullyConnected_U8_000)
-#addeval(Gather_000)
+addeval(Gather_000)
 #addeval(GatherNd_000)
+addeval(Gelu_000)
 #addeval(Greater_000)
-#addeval(GreaterEqual_000)
+addeval(GreaterEqual_000)
+addeval(HardSwish_000)
 addeval(If_000)
 addeval(If_001)
 addeval(L2Normalize_000)
 addeval(L2Pool2D_000)
 #addeval(L2Pool2D_U8_000)
 addeval(LeakyRelu_000)
-#addeval(Less_000)
-#addeval(LessEqual_000)
+addeval(Less_000)
+addeval(LessEqual_000)
 addeval(LocalResponseNormalization_000)
-#addeval(Log_000)
-#addeval(LogicalAnd_000)
-#addeval(LogicalNot_000)
-#addeval(LogicalOr_000)
+addeval(Log_000)
+addeval(LogicalAnd_000)
+addeval(LogicalNot_000)
+addeval(LogicalOr_000)
 addeval(Logistic_000)
-#addeval(LogSoftmax_000)
+addeval(LogSoftmax_000)
 #addeval(MatMul_000)
 #addeval(MatrixDiag_000)
 #addeval(MatrixSetDiag_000)
-#addeval(Maximum_000)
+addeval(Maximum_000)
 addeval(MaxPool2D_000)
 addeval(MaxPool2D_U8_000)
 addeval(Mean_000)
 addeval(Mean_001)
-#addeval(Mean_U8_000)
-#addeval(Minimum_000)
+addeval(Mean_U8_000)
+addeval(Minimum_000)
 #addeval(MirrorPad_000)
 addeval(Mul_000)
 #addeval(Mul_U8_000)
-#addeval(Neg_000)
-#addeval(NotEqual_000)
-#addeval(OneHot_000)
-#addeval(OneHot_001)
-#addeval(OneHot_002)
+addeval(Neg_000)
+addeval(NotEqual_000)
+addeval(OneHot_000)
+addeval(OneHot_001)
+addeval(OneHot_002)
 #addeval(OneHot_003)
-#addeval(Pack_000)
-#addeval(Pack_U8_000)
+addeval(Pack_000)
+addeval(Pack_U8_000)
 addeval(Pad_000)
 addeval(Pad_U8_000)
-#addeval(Pow_000)
-#addeval(PRelu_000)
+addeval(Pow_000)
+addeval(PRelu_000)
 #addeval(Range_000)
 #addeval(Rank_000)
 #addeval(ReduceAny_000)
@@ -116,68 +118,89 @@ addeval(Pad_U8_000)
 #addeval(ReduceProd_001)
 #addeval(ReduceProd_002)
 #addeval(ReduceProd_003)
-#addeval(ReLU_000)
-#addeval(ReLU6_000)
+addeval(ReLU_000)
+addeval(ReLU6_000)
 #addeval(ReLUN1To1_000)
 addeval(Reshape_000)
 addeval(Reshape_001)
 addeval(Reshape_002)
 #addeval(Reshape_003)
 addeval(Reshape_U8_000)
-#addeval(ResizeBilinear_000)
-#addeval(ResizeNearestNeighbor_000)
+addeval(ResizeBilinear_000)
+addeval(ResizeNearestNeighbor_000)
 #addeval(ReverseSequence_000)
 #addeval(ReverseV2_000)
 #addeval(Round_000)
-#addeval(Rsqrt_000)
+addeval(Rsqrt_000)
 #addeval(ScatterNd_000)
 #addeval(SegmentSum_000)
-#addeval(Select_000)
-#addeval(Select_001)
-#addeval(Select_002)
+addeval(Select_000)
+addeval(Select_001)
+addeval(Select_002)
 #addeval(SelectV2_000)
 #addeval(SelectV2_001)
 #addeval(SelectV2_002)
 #addeval(Shape_000)
+addeval(SignatureDef_MultiOut_000)
+addeval(SignatureDef_MultiOut_001)
 #addeval(Sin_000)
 addeval(Slice_000)
 addeval(Softmax_000)
-#addeval(Softmax_U8_000)
-#addeval(SpaceToBatchND_000)
-#addeval(SpaceToBatchND_001)
-#addeval(SpaceToBatchND_002)
-#addeval(SpaceToBatchND_003)
+addeval(Softmax_U8_000)
+addeval(SpaceToBatchND_000)
+addeval(SpaceToBatchND_001)
+addeval(SpaceToBatchND_002)
+addeval(SpaceToBatchND_003)
 addeval(SpaceToDepth_000)
 #addeval(SparseToDense_000)
 addeval(Split_000)
-#addeval(SplitV_000)
-#addeval(Sqrt_000)
-#addeval(Square_000)
-#addeval(SquaredDifference_000)
+addeval(SplitV_000)
+addeval(Sqrt_000)
+addeval(Square_000)
+addeval(SquaredDifference_000)
 addeval(Squeeze_000)
+addeval(Squeeze_001)
 addeval(StridedSlice_000)
 addeval(StridedSlice_001)
 addeval(StridedSlice_002)
-#addeval(Sub_000)
-#addeval(Sub_U8_000)
-#addeval(Sum_000)
-#addeval(Sum_001)
-#addeval(Tanh_000)
+addeval(StridedSlice_003)
+addeval(StridedSlice_004)
+addeval(Sub_000)
+addeval(Sub_U8_000)
+addeval(Sum_000)
+addeval(Sum_001)
+addeval(Tanh_000)
 #addeval(Tile_000)
 #addeval(Tile_U8_000)
 #addeval(TopKV2_000)
 #addeval(TopKV2_001)
 addeval(Transpose_000)
-#addeval(TransposeConv_000)
+addeval(TransposeConv_000)
+addeval(UnidirectionalSequenceLSTM_002)
+addeval(UnidirectionalSequenceLSTM_003)
+addeval(UnidirectionalSequenceLSTM_004)
 addeval(Unpack_000)
 addeval(Unpack_001)
 addeval(Unpack_002)
 addeval(Unpack_003)
+addeval(UnidirectionalSequenceLSTM_002)
 #addeval(Where_000)
 #addeval(Where_001)
 #addeval(While_000)
 #addeval(While_001)
 #addeval(While_002)
 #addeval(While_003)
-#addeval(YUV_TO_RGB_U8_000)
 #addeval(ZerosLike_000)
+
+# Simple Network test
+addeval(Part_While_000)
+addeval(Part_While_001)
+
+# Tests with tolerance
+addevaltol(SVDF_000 8e-3 8e-3)
+addevaltol(SVDF_001 8e-3 8e-3)
+# TODO fix Conv2D_U8_000 to test without tolerenace
+# refer https://github.com/Samsung/ONE/issues/11255#issuecomment-1685424361
+addeval(Conv2D_U8_000 1 1)
+# refer https://github.com/Samsung/ONE/issues/10438
+addevaltol(YUV_TO_RGB_U8_000 1 1)
diff --git a/compiler/luci-value-test/tester/CMakeLists.txt b/compiler/luci-value-test/tester/CMakeLists.txt
deleted file mode 100644
index f2a4ff4b6..000000000
--- a/compiler/luci-value-test/tester/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-set(SRCS_EVAL_TESTER
-      src/EvalTester.cpp
-   )
-
-add_executable(luci_eval_tester ${SRCS_EVAL_TESTER})
-target_link_libraries(luci_eval_tester PRIVATE oops)
-target_link_libraries(luci_eval_tester PRIVATE loco)
-target_link_libraries(luci_eval_tester PRIVATE luci_import)
-target_link_libraries(luci_eval_tester PRIVATE luci_export)
-target_link_libraries(luci_eval_tester PRIVATE luci_lang)
-target_link_libraries(luci_eval_tester PRIVATE luci_interpreter)
-target_link_libraries(luci_eval_tester PRIVATE safemain)
diff --git a/compiler/luci-value-test/tester/src/EvalTester.cpp b/compiler/luci-value-test/tester/src/EvalTester.cpp
deleted file mode 100644
index b49602e5e..000000000
--- a/compiler/luci-value-test/tester/src/EvalTester.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <luci/Importer.h>
-#include <luci_interpreter/Interpreter.h>
-#include <luci/CircleExporter.h>
-#include <luci/CircleFileExpContract.h>
-
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <vector>
-#include <map>
-#include <string>
-#include <random>
-
-namespace
-{
-
-void readDataFromFile(const std::string &filename, char *data, size_t data_size)
-{
-  std::ifstream fs(filename, std::ifstream::binary);
-  if (fs.fail())
-    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
-  if (fs.read(data, data_size).fail())
-    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
-}
-
-void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
-{
-  std::ofstream fs(filename, std::ofstream::binary);
-  if (fs.fail())
-    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
-  if (fs.write(data, data_size).fail())
-  {
-    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
-  }
-}
-
-std::unique_ptr<luci::Module> importModel(const std::string &filename)
-{
-  std::ifstream fs(filename, std::ifstream::binary);
-  if (fs.fail())
-  {
-    throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
-  }
-  std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
-                               std::istreambuf_iterator<char>());
-  return luci::Importer().importModule(circle::GetModel(model_data.data()));
-}
-
-template <typename NodeT> size_t getTensorSize(const NodeT *node)
-{
-  uint32_t tensor_size = loco::size(node->dtype());
-  for (uint32_t i = 0; i < node->rank(); ++i)
-    tensor_size *= node->dim(i).value();
-  return tensor_size;
-}
-
-} // namespace
-
-/*
- * @brief EvalTester main
- *
- *        Driver for testing luci-inerpreter
- *
- */
-int entry(int argc, char **argv)
-{
-  if (argc != 5)
-  {
-    std::cerr
-        << "Usage: " << argv[0]
-        << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
-    return EXIT_FAILURE;
-  }
-
-  const char *filename = argv[1];
-  const int32_t num_inputs = atoi(argv[2]);
-  const char *input_prefix = argv[3];
-  const char *output_file = argv[4];
-  const std::string intermediate_filename = std::string(filename) + ".inter.circle";
-
-  // Load model from the file
-  std::unique_ptr<luci::Module> initial_module = importModel(filename);
-  if (initial_module == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  // Export to a Circle file
-  luci::CircleExporter exporter;
-
-  luci::CircleFileExpContract contract(initial_module.get(), intermediate_filename);
-
-  if (!exporter.invoke(&contract))
-  {
-    std::cerr << "ERROR: Failed to export '" << intermediate_filename << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  // Import model again
-  std::unique_ptr<luci::Module> module = importModel(intermediate_filename);
-  if (module == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load '" << intermediate_filename << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  // Create interpreter.
-  luci_interpreter::Interpreter interpreter(module.get());
-
-  // Set input.
-  // Data for n'th input is read from ${input_prefix}n
-  // (ex: Add.circle.input0, Add.circle.input1 ..)
-  const auto input_nodes = loco::input_nodes(module->graph());
-  assert(num_inputs == input_nodes.size());
-  for (int32_t i = 0; i < num_inputs; i++)
-  {
-    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
-    std::vector<char> input_data(getTensorSize(input_node));
-    readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
-                     input_data.size());
-    interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
-  }
-
-  // Do inference.
-  interpreter.interpret();
-
-  // Get output.
-  const auto output_nodes = loco::output_nodes(module->graph());
-  for (int i = 0; i < module->graph()->outputs()->size(); i++)
-  {
-    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
-    std::vector<char> output_data(getTensorSize(output_node));
-    interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
-
-    // Output data is written in ${output_file}
-    // (ex: Add.circle.output0)
-    // Output shape is written in ${output_file}.shape
-    // (ex: Add.circle.output0.shape)
-    writeDataToFile(std::string(output_file) + std::to_string(i), output_data.data(),
-                    output_data.size());
-    // In case of Tensor output is Scalar value.
-    // The output tensor with rank 0 is treated as a scalar with shape (1)
-    if (output_node->rank() == 0)
-    {
-      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", "1", 1);
-    }
-    else
-    {
-      auto shape_str = std::to_string(output_node->dim(0).value());
-      for (int j = 1; j < output_node->rank(); j++)
-      {
-        shape_str += ",";
-        shape_str += std::to_string(output_node->dim(j).value());
-      }
-      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", shape_str.c_str(),
-                      shape_str.size());
-    }
-  }
-  return EXIT_SUCCESS;
-}
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt
index 214a1bbf2..460dc7b23 100644
--- a/compiler/luci/CMakeLists.txt
+++ b/compiler/luci/CMakeLists.txt
@@ -1,11 +1,30 @@
+# Some targets do not support dynamic linking: MCU, TrustZone applications, etc.
+# STATIC_LUCI option allows us to compile luci and luci related components safely
+# and suppress various cmake warnings.
+#
+# Currently this feature is used for luci-interpreter MCU builds.
+if (STATIC_LUCI)
+  set(LUCI_LIBRARY_TYPE "STATIC")
+else()
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif()
+
 add_subdirectory(env)
 add_subdirectory(log)
 add_subdirectory(lang)
+add_subdirectory(logex)
+add_subdirectory(testhelper)
 add_subdirectory(service)
 add_subdirectory(pass)
-add_subdirectory(logex)
+add_subdirectory(profile)
+add_subdirectory(plan)
+add_subdirectory(partition)
 add_subdirectory(import)
 add_subdirectory(export)
 add_subdirectory(tester)
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 add_subdirectory(tests)
diff --git a/compiler/luci/env/CMakeLists.txt b/compiler/luci/env/CMakeLists.txt
index 3d8387a47..7025db2e8 100644
--- a/compiler/luci/env/CMakeLists.txt
+++ b/compiler/luci/env/CMakeLists.txt
@@ -2,10 +2,16 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(luci_env SHARED ${SOURCES})
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_env ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_env PUBLIC include)
 target_link_libraries(luci_env PRIVATE nncc_common)
 install(TARGETS luci_env DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
 
 if(NOT ENABLE_TEST)
   return()
diff --git a/compiler/luci/env/include/luci/UserSettings.h b/compiler/luci/env/include/luci/UserSettings.h
index bcfd16071..9fe9592e5 100644
--- a/compiler/luci/env/include/luci/UserSettings.h
+++ b/compiler/luci/env/include/luci/UserSettings.h
@@ -32,6 +32,8 @@ struct UserSettings
     Undefined,
     MuteWarnings,
     DisableValidation,
+    ProfilingDataGen,
+    ExecutionPlanGen,
   };
 
   static UserSettings *settings();
diff --git a/compiler/luci/env/src/UserSettings.cpp b/compiler/luci/env/src/UserSettings.cpp
index 27dec762d..136fee799 100644
--- a/compiler/luci/env/src/UserSettings.cpp
+++ b/compiler/luci/env/src/UserSettings.cpp
@@ -30,6 +30,8 @@ public:
 private:
   bool _MuteWarnings{false};
   bool _DisableValidation{false};
+  bool _ProfilingDataGen{false};
+  bool _ExecutionPlanGen{false};
 };
 
 void UserSettingsImpl::set(const Key key, bool value)
@@ -42,6 +44,12 @@ void UserSettingsImpl::set(const Key key, bool value)
     case Key::DisableValidation:
       _DisableValidation = value;
       break;
+    case Key::ProfilingDataGen:
+      _ProfilingDataGen = value;
+      break;
+    case Key::ExecutionPlanGen:
+      _ExecutionPlanGen = value;
+      break;
     default:
       throw std::runtime_error("Invalid key in boolean set");
       break;
@@ -56,6 +64,10 @@ bool UserSettingsImpl::get(const Key key) const
       return _MuteWarnings;
     case Key::DisableValidation:
       return _DisableValidation;
+    case Key::ProfilingDataGen:
+      return _ProfilingDataGen;
+    case Key::ExecutionPlanGen:
+      return _ExecutionPlanGen;
     default:
       throw std::runtime_error("Invalid key in boolean get");
       break;
diff --git a/compiler/luci/env/src/UserSettings.test.cpp b/compiler/luci/env/src/UserSettings.test.cpp
index 8d9d1875b..26c606edb 100644
--- a/compiler/luci/env/src/UserSettings.test.cpp
+++ b/compiler/luci/env/src/UserSettings.test.cpp
@@ -39,6 +39,18 @@ TEST(UserSettings, MuteWarnings)
   ASSERT_TRUE(settings->get(luci::UserSettings::Key::MuteWarnings));
 }
 
+TEST(UserSettings, MuteWarnings_NEG)
+{
+  auto settings = luci::UserSettings::settings();
+  ASSERT_NE(nullptr, settings);
+
+  settings->set(luci::UserSettings::Key::MuteWarnings, false);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::MuteWarnings));
+
+  settings->set(luci::UserSettings::Key::MuteWarnings, true);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::DisableValidation));
+}
+
 TEST(UserSettings, DisableValidation)
 {
   auto settings = luci::UserSettings::settings();
@@ -51,6 +63,30 @@ TEST(UserSettings, DisableValidation)
   ASSERT_TRUE(settings->get(luci::UserSettings::Key::DisableValidation));
 }
 
+TEST(UserSettings, DisableValidation_NEG)
+{
+  auto settings = luci::UserSettings::settings();
+  ASSERT_NE(nullptr, settings);
+
+  settings->set(luci::UserSettings::Key::DisableValidation, false);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::DisableValidation));
+
+  settings->set(luci::UserSettings::Key::DisableValidation, true);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+}
+
+TEST(UserSettings, ProfilingDataGen)
+{
+  auto settings = luci::UserSettings::settings();
+  ASSERT_NE(nullptr, settings);
+
+  settings->set(luci::UserSettings::Key::ProfilingDataGen, false);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+
+  settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+  ASSERT_TRUE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+}
+
 TEST(UserSettings, undefined_set_NEG)
 {
   auto settings = luci::UserSettings::settings();
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt
index fe4382ecd..fb0e20e00 100644
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -1,30 +1,41 @@
 file(GLOB_RECURSE SOURCES "src/*.cpp")
-# TODO enable tests
-#file(GLOB_RECURSE TESTS "src/*.test.cpp")
-#list(REMOVE_ITEM SOURCES ${TESTS})
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(luci_export SHARED ${SOURCES})
+if (NOT LUCI_LIBRARY_TYPE)
+    set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_export ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_export PRIVATE src)
 target_include_directories(luci_export PUBLIC include)
 target_link_libraries(luci_export PRIVATE luci_lang)
 target_link_libraries(luci_export PRIVATE luci_service)
 target_link_libraries(luci_export PRIVATE luci_pass)
-target_link_libraries(luci_export PRIVATE mio_circle)
+target_link_libraries(luci_export PRIVATE mio_circle06)
 target_link_libraries(luci_export PRIVATE luci_env)
 target_link_libraries(luci_export PRIVATE luci_log)
 target_link_libraries(luci_export PRIVATE luci_logex)
+target_link_libraries(luci_export PRIVATE luci_profile)
+target_link_libraries(luci_export PRIVATE luci_plan)
 target_link_libraries(luci_export PRIVATE nncc_common)
 target_link_libraries(luci_export PRIVATE locop)
 target_link_libraries(luci_export PRIVATE oops)
 install(TARGETS luci_export DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
 
-#if(NOT ENABLE_TEST)
-#  return()
-#endif(NOT ENABLE_TEST)
-#
-#nnas_find_package(GTest REQUIRED)
-#
-#GTest_AddTest(luci_export_test ${TESTS})
-#target_include_directories(luci_export_test PRIVATE src)
-#target_link_libraries(luci_export_test luci_export)
-#target_link_libraries(luci_export_test oops)
+GTest_AddTest(luci_export_test ${TESTS})
+target_include_directories(luci_export_test PRIVATE src)
+target_link_libraries(luci_export_test luci_export)
+target_link_libraries(luci_export_test luci_plan)
+target_link_libraries(luci_export_test luci_lang)
+target_link_libraries(luci_export_test mio_circle06)
+target_link_libraries(luci_export_test luci_env)
+target_link_libraries(luci_export_test oops)
diff --git a/compiler/luci/export/include/luci/CircleFileExpContract.h b/compiler/luci/export/include/luci/CircleFileExpContract.h
index eeaf2d9bb..8ef1b5e0c 100644
--- a/compiler/luci/export/include/luci/CircleFileExpContract.h
+++ b/compiler/luci/export/include/luci/CircleFileExpContract.h
@@ -33,7 +33,7 @@ struct CircleFileExpContract : public luci::CircleExporter::Contract
 {
 public:
   CircleFileExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
+    : _module(module), _filepath(filename)
   {
     // NOTHING TO DO
   }
diff --git a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
new file mode 100644
index 000000000..811373ffe
--- /dev/null
+++ b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
@@ -0,0 +1,549 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
+#define __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
+
+#include "CircleExporterUtils.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <flatbuffers/flexbuffers.h>
+
+namespace luci
+{
+
+// NOTE Virtual nodes are not circle builtin operators.
+//      Therefore, they are not defined here.
+class BuiltinOptionsExtractor final
+  : public luci::CircleNodeMutableVisitor<flatbuffers::Offset<void>>
+{
+public:
+  BuiltinOptionsExtractor(flatbuffers::FlatBufferBuilder &builder) : _builder{builder}
+  {
+    // DO NOTHING
+  }
+
+public:
+  flatbuffers::Offset<void> visit(luci::CircleAbs *)
+  {
+    return circle::CreateAbsOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleAdd *node)
+  {
+    return circle::CreateAddOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleAddN *)
+  {
+    return circle::CreateAddNOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleArgMax *node)
+  {
+    return circle::CreateArgMaxOptions(_builder, luci::to_circle_tensortype(node->output_type()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleArgMin *node)
+  {
+    return circle::CreateArgMinOptions(_builder, luci::to_circle_tensortype(node->output_type()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleAveragePool2D *node)
+  {
+    return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+                                       node->stride()->h(), node->filter()->w(),
+                                       node->filter()->h(),
+                                       to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleBatchMatMul *node)
+  {
+    return circle::CreateBatchMatMulOptions(_builder, node->adj_x(), node->adj_y()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleBatchToSpaceND *)
+  {
+    return circle::CreateBatchToSpaceNDOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleBidirectionalSequenceLSTM *node)
+  {
+    return circle::CreateBidirectionalSequenceLSTMOptions(
+             _builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
+             node->proj_clip(), node->merge_outputs(), node->time_major(),
+             node->asymmetric_quantize_inputs())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleCast *node)
+  {
+    if (node->out_data_type() == loco::DataType::Unknown)
+      return _no_option;
+    else
+      return circle::CreateCastOptions(_builder, luci::to_circle_tensortype(node->in_data_type()),
+                                       luci::to_circle_tensortype(node->out_data_type()))
+        .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleCeil *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleConcatenation *node)
+  {
+    return circle::CreateConcatenationOptions(_builder, node->axis(),
+                                              to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  // CircleConst is not virtual but not builtinOperator
+  // flatbuffers::Offset<void> visit(luci::CircleConst *)
+  flatbuffers::Offset<void> visit(luci::CircleConv2D *node)
+  {
+    return circle::CreateConv2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+                                       node->stride()->h(),
+                                       to_circle_actfunc(node->fusedActivationFunction()),
+                                       node->dilation()->w(), node->dilation()->h())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleCos *)
+  {
+    return circle::CreateCosOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleCustom *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleDensify *)
+  {
+    return circle::CreateDensifyOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleDepthToSpace *node)
+  {
+    return circle::CreateDepthToSpaceOptions(_builder, node->block_size()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleDepthwiseConv2D *node)
+  {
+    return circle::CreateDepthwiseConv2DOptions(
+             _builder, getOpPadding(node->padding()), node->stride()->w(), node->stride()->h(),
+             node->depthMultiplier(), to_circle_actfunc(node->fusedActivationFunction()),
+             node->dilation()->w(), node->dilation()->h())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleDequantize *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleDiv *node)
+  {
+    return circle::CreateDivOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleElu *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleEqual *)
+  {
+    return circle::CreateEqualOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleExp *)
+  {
+    return circle::CreateExpOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleExpandDims *)
+  {
+    return circle::CreateExpandDimsOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFakeQuant *node)
+  {
+    return circle::CreateFakeQuantOptions(_builder, node->min(), node->max(), node->num_bits(),
+                                          node->narrow_range())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFill *)
+  {
+    return circle::CreateFillOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFloor *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleFloorDiv *)
+  {
+    return circle::CreateFloorDivOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFloorMod *)
+  {
+    return circle::CreateFloorModOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFullyConnected *node)
+  {
+    return circle::CreateFullyConnectedOptions(
+             _builder, to_circle_actfunc(node->fusedActivationFunction()),
+             to_circle_weightsformat(node->weights_format()), node->keep_num_dims())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGather *node)
+  {
+    return circle::CreateGatherOptions(_builder, node->axis()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGatherNd *)
+  {
+    return circle::CreateGatherNdOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGelu *node)
+  {
+    return circle::CreateGeluOptions(_builder, node->approximate()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGreater *)
+  {
+    return circle::CreateGreaterOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGreaterEqual *)
+  {
+    return circle::CreateGreaterEqualOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleHardSwish *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleIf *node)
+  {
+    return circle::CreateIfOptions(_builder, node->then_branch(), node->else_branch()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleL2Normalize *node)
+  {
+    return circle::CreateL2NormOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleL2Pool2D *node)
+  {
+    return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+                                       node->stride()->h(), node->filter()->w(),
+                                       node->filter()->h(),
+                                       to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLeakyRelu *node)
+  {
+    return circle::CreateLeakyReluOptions(_builder, node->alpha()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLess *)
+  {
+    return circle::CreateLessOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLessEqual *)
+  {
+    return circle::CreateLessEqualOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLocalResponseNormalization *node)
+  {
+    return circle::CreateLocalResponseNormalizationOptions(_builder, node->radius(), node->bias(),
+                                                           node->alpha(), node->beta())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLog *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleLogicalAnd *)
+  {
+    return circle::CreateLogicalAndOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLogicalNot *)
+  {
+    return circle::CreateLogicalNotOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLogicalOr *)
+  {
+    return circle::CreateLogicalOrOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLogistic *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleLogSoftmax *)
+  {
+    return circle::CreateLogSoftmaxOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMatrixDiag *)
+  {
+    return circle::CreateMatrixDiagOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMatrixSetDiag *)
+  {
+    return circle::CreateMatrixSetDiagOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMaximum *)
+  {
+    return circle::CreateMaximumMinimumOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMaxPool2D *node)
+  {
+    return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+                                       node->stride()->h(), node->filter()->w(),
+                                       node->filter()->h(),
+                                       to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMean *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMinimum *)
+  {
+    return circle::CreateMaximumMinimumOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMirrorPad *node)
+  {
+    return circle::CreateMirrorPadOptions(_builder, to_circle_mirrorpadmode(node->mode())).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMul *node)
+  {
+    return circle::CreateMulOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleNeg *)
+  {
+    return circle::CreateNegOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleNonMaxSuppressionV4 *)
+  {
+    return circle::CreateNonMaxSuppressionV4Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleNonMaxSuppressionV5 *)
+  {
+    return circle::CreateNonMaxSuppressionV5Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleNotEqual *)
+  {
+    return circle::CreateNotEqualOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleOneHot *node)
+  {
+    return circle::CreateOneHotOptions(_builder, node->axis()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePack *node)
+  {
+    return circle::CreatePackOptions(_builder, node->values_count(), node->axis()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePad *)
+  {
+    return circle::CreatePadOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePadV2 *)
+  {
+    return circle::CreatePadV2Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePow *)
+  {
+    return circle::CreatePowOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePRelu *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleQuantize *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleRange *)
+  {
+    return circle::CreateRangeOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleRank *)
+  {
+    return circle::CreateRankOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReduceAny *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReduceMax *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReduceMin *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReduceProd *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleRelu *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleRelu6 *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleReluN1To1 *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleReshape *node)
+  {
+    auto new_shape = _builder.CreateVector<int32_t>(
+      node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
+    return circle::CreateReshapeOptions(_builder, new_shape).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleResizeBilinear *node)
+  {
+    return circle::CreateResizeBilinearOptions(_builder, node->align_corners(),
+                                               node->half_pixel_centers())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleResizeNearestNeighbor *node)
+  {
+    return circle::CreateResizeNearestNeighborOptions(_builder, node->align_corners()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReverseSequence *node)
+  {
+    return circle::CreateReverseSequenceOptions(_builder, node->seq_axis(), node->batch_axis())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReverseV2 *)
+  {
+    return circle::CreateReverseV2Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleRound *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleRsqrt *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleScatterNd *)
+  {
+    return circle::CreateScatterNdOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSegmentSum *)
+  {
+    return circle::CreateSegmentSumOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSelect *)
+  {
+    return circle::CreateSelectOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSelectV2 *)
+  {
+    return circle::CreateSelectV2Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleShape *node)
+  {
+    return circle::CreateShapeOptions(_builder, luci::to_circle_tensortype(node->out_type()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSin *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleSlice *)
+  {
+    return circle::CreateSliceOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSoftmax *node)
+  {
+    return circle::CreateSoftmaxOptions(_builder, node->beta()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSpaceToBatchND *)
+  {
+    return circle::CreateSpaceToBatchNDOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSpaceToDepth *node)
+  {
+    return circle::CreateSpaceToDepthOptions(_builder, node->block_size()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSparseToDense *node)
+  {
+    return circle::CreateSparseToDenseOptions(_builder, node->validate_indices()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSplit *node)
+  {
+    return circle::CreateSplitOptions(_builder, node->num_split()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSplitV *node)
+  {
+    return circle::CreateSplitVOptions(_builder, node->num_split()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSqrt *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleSquare *)
+  {
+    return circle::CreateSquareOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSquaredDifference *)
+  {
+    return circle::CreateSquaredDifferenceOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSqueeze *node)
+  {
+    auto squeeze_dims = _builder.CreateVector<int32_t>(node->squeeze_dims());
+    return circle::CreateSqueezeOptions(_builder, squeeze_dims).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleStridedSlice *node)
+  {
+    return circle::CreateStridedSliceOptions(_builder, node->begin_mask(), node->end_mask(),
+                                             node->ellipsis_mask(), node->new_axis_mask(),
+                                             node->shrink_axis_mask())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSub *node)
+  {
+    return circle::CreateSubOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSum *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSVDF *node)
+  {
+    return circle::CreateSVDFOptions(_builder, node->svdf_rank(),
+                                     to_circle_actfunc(node->fusedActivationFunction()),
+                                     node->asymmetric_quantize_inputs())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleTanh *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleTile *)
+  {
+    return circle::CreateTileOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleTopKV2 *)
+  {
+    return circle::CreateTopKV2Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleTranspose *)
+  {
+    return circle::CreateTransposeOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleTransposeConv *node)
+  {
+    return circle::CreateTransposeConvOptions(_builder, getOpPadding(node->padding()),
+                                              node->stride()->w(), node->stride()->h(),
+                                              to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleUnidirectionalSequenceLSTM *node)
+  {
+    return circle::CreateUnidirectionalSequenceLSTMOptions(
+             _builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
+             node->proj_clip(), node->time_major(), node->asymmetric_quantize_inputs())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleUnique *node)
+  {
+    return circle::CreateUniqueOptions(_builder, luci::to_circle_tensortype(node->idx_out_type()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleUnpack *node)
+  {
+    return circle::CreateUnpackOptions(_builder, node->num(), node->axis()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleWhere *)
+  {
+    return circle::CreateWhereOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleWhile *node)
+  {
+    return circle::CreateWhileOptions(_builder, node->cond_branch(), node->body_branch()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleZerosLike *)
+  {
+    return circle::CreateZerosLikeOptions(_builder).Union();
+  }
+  // Circle only
+  flatbuffers::Offset<void> visit(luci::CircleBCQFullyConnected *node)
+  {
+    return circle::CreateBCQFullyConnectedOptions(
+             _builder, node->weights_hidden_size(),
+             to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleBCQGather *node)
+  {
+    return circle::CreateBCQGatherOptions(_builder, node->input_hidden_size(), node->axis())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleInstanceNorm *node)
+  {
+    return circle::CreateInstanceNormOptions(_builder, node->epsilon(),
+                                             to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+
+protected:
+  flatbuffers::FlatBufferBuilder &_builder;
+
+private:
+  const flatbuffers::Offset<void> _no_option = 0;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
diff --git a/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h b/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h
new file mode 100644
index 000000000..6f7c0f70e
--- /dev/null
+++ b/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
+#define __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+class BuiltinOperatorMappingRule final : public CircleNodeVisitor<circle::BuiltinOperator>
+{
+public:
+  BuiltinOperatorMappingRule()
+  {
+    // DO NOTHING
+  }
+
+public:
+  static BuiltinOperatorMappingRule &get()
+  {
+    static BuiltinOperatorMappingRule instance;
+    return instance;
+  }
+
+public:
+#define CIRCLE_NODE(CIRCLE_NODE, OP, OPTION) \
+  circle::BuiltinOperator visit(const CIRCLE_NODE *) final { return circle::OP; }
+// Virtual nodes are not circle builtin operator
+#define CIRCLE_VNODE(CIRCLE_NODE)
+#include "CircleOps.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+};
+
+class BuiltinOptionsMappingRule final : public CircleNodeVisitor<circle::BuiltinOptions>
+{
+public:
+  BuiltinOptionsMappingRule()
+  {
+    // DO NOTHING
+  }
+
+public:
+  static BuiltinOptionsMappingRule &get()
+  {
+    static BuiltinOptionsMappingRule instance;
+    return instance;
+  }
+
+public:
+#define CIRCLE_NODE(CIRCLE_NODE, OP, OPTION) \
+  circle::BuiltinOptions visit(const CIRCLE_NODE *) final { return circle::OPTION; }
+// Virtual nodes are not circle builtin operator
+#define CIRCLE_VNODE(CIRCLE_NODE)
+#include "CircleOps.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
diff --git a/compiler/luci/export/src/CircleExportMetadata.cpp b/compiler/luci/export/src/CircleExportMetadata.cpp
new file mode 100644
index 000000000..25d0168ec
--- /dev/null
+++ b/compiler/luci/export/src/CircleExportMetadata.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleExportMetadata.h"
+
+#include <luci/UserSettings.h>
+
+namespace
+{
+
+void write_u32(std::vector<uint8_t> &to, uint32_t value)
+{
+  to.emplace_back(0xFF & (value >> 0 * 8));
+  to.emplace_back(0xFF & (value >> 1 * 8));
+  to.emplace_back(0xFF & (value >> 2 * 8));
+  to.emplace_back(0xFF & (value >> 3 * 8));
+}
+
+flatbuffers::Offset<circle::Metadata> metadata_offset(flatbuffers::FlatBufferBuilder &builder,
+                                                      luci::SerializedModelData &md,
+                                                      const std::vector<uint8_t> &data,
+                                                      const std::string &metadata_name)
+{
+  auto buffer_id = static_cast<uint32_t>(md._buffers.size());
+  md._buffers.push_back(circle::CreateBufferDirect(builder, &data));
+  return circle::CreateMetadataDirect(builder, metadata_name.c_str(), buffer_id);
+}
+
+} // namespace
+
+namespace luci
+{
+
+// 'execution_plan_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_execution_plan_table()
+{
+  std::vector<uint8_t> data;
+
+  write_u32(data, _execution_plan_table.size());
+
+  for (auto &kv : _execution_plan_table)
+  {
+    const auto id = kv.first;
+    write_u32(data, id);
+
+    const auto &plan_vector = kv.second;
+    const auto size = plan_vector.size();
+    write_u32(data, size);
+
+    for (auto elem : plan_vector)
+    {
+      write_u32(data, elem);
+    }
+  }
+
+  return data;
+}
+
+// 'source_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_source_table(void)
+{
+  std::vector<uint8_t> data;
+
+  write_u32(data, _source_table.size());
+
+  for (auto &kv : _source_table)
+  {
+    const auto id = kv.first;
+    write_u32(data, id);
+
+    const auto &origin_name = kv.second;
+    const auto length = origin_name.length();
+    write_u32(data, length + 1); // name + '\0
+
+    for (uint32_t i = 0; i < length; ++i)
+    {
+      data.emplace_back(origin_name.at(i));
+    }
+    data.emplace_back('\0');
+  }
+
+  return data;
+}
+
+// 'op_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_op_table(void)
+{
+  std::vector<uint8_t> data;
+
+  write_u32(data, _op_table.size());
+
+  for (auto &kv : _op_table)
+  {
+    const auto id = kv.first;
+    write_u32(data, id);
+
+    const auto &origins = kv.second;
+    const auto node_num = origins.size();
+    write_u32(data, node_num);
+
+    for (auto origin : origins)
+    {
+      write_u32(data, origin);
+    }
+  }
+
+  return data;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+std::vector<flatbuffers::Offset<circle::Metadata>>
+createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, luci::SerializedModelData &md)
+{
+  std::vector<flatbuffers::Offset<circle::Metadata>> metadata_vec;
+
+  auto settings = luci::UserSettings::settings();
+  if (settings->get(luci::UserSettings::Key::ProfilingDataGen))
+  {
+    metadata_vec.emplace_back(
+      metadata_offset(builder, md, md._metadata.encoded_source_table(), "ONE_source_table"));
+
+    metadata_vec.emplace_back(
+      metadata_offset(builder, md, md._metadata.encoded_op_table(), "ONE_op_table"));
+  }
+  if (settings->get(luci::UserSettings::Key::ExecutionPlanGen))
+  {
+    metadata_vec.emplace_back(metadata_offset(
+      builder, md, md._metadata.encoded_execution_plan_table(), "ONE_execution_plan_table"));
+  }
+  return metadata_vec;
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleExportMetadata.h b/compiler/luci/export/src/CircleExportMetadata.h
new file mode 100644
index 000000000..10cda421e
--- /dev/null
+++ b/compiler/luci/export/src/CircleExportMetadata.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_EXPORT_METADATA_H__
+#define __LUCI_CIRCLE_EXPORT_METADATA_H__
+
+#include "SerializedData.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Create Metadata corresponding to model metadata
+ */
+std::vector<flatbuffers::Offset<circle::Metadata>>
+createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_EXPORT_METADATA_H__
diff --git a/compiler/luci/export/src/CircleExporter.test.cpp b/compiler/luci/export/src/CircleExporter.test.cpp
new file mode 100644
index 000000000..5898f9d65
--- /dev/null
+++ b/compiler/luci/export/src/CircleExporter.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleExporter.h"
+
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+#include <luci/IR/Nodes/CircleRelu.h>
+#include <luci/UserSettings.h>
+
+#include <mio/circle/schema_generated.h>
+#include <flatbuffers/flatbuffers.h>
+
+#include <gtest/gtest.h>
+
+class SampleGraphContract : public luci::CircleExporter::Contract
+{
+public:
+  SampleGraphContract() : luci::CircleExporter::Contract(), _buffer(new std::vector<char>)
+  {
+    // create needed entities
+    _g = loco::make_graph();
+    auto graph_input = _g->inputs()->create();
+    auto graph_output = _g->outputs()->create();
+    input_node = _g->nodes()->create<luci::CircleInput>();
+    output_node = _g->nodes()->create<luci::CircleOutput>();
+    relu_node = _g->nodes()->create<luci::CircleRelu>();
+
+    // link nodes and link them to graph
+    relu_node->features(input_node);
+    output_node->from(relu_node);
+    input_node->index(graph_input->index());
+    output_node->index(graph_output->index());
+
+    // Set needed properties
+    input_node->name("input");
+    output_node->name("output");
+    relu_node->name("relu");
+    input_node->dtype(loco::DataType::FLOAT32);
+
+    graph_input->shape({1, 2, 3, 4});
+    graph_input->dtype(loco::DataType::FLOAT32);
+
+    graph_output->shape({1, 2, 3, 4});
+    graph_output->dtype(loco::DataType::FLOAT32);
+  }
+
+  loco::Graph *graph(void) const override { return _g.get(); }
+
+public:
+  bool store(const char *ptr, const size_t size) const override
+  {
+    _buffer->resize(size);
+    std::copy(ptr, ptr + size, _buffer->begin());
+    return true;
+  }
+
+  const std::vector<char> &get_buffer() { return *_buffer; }
+
+public:
+  luci::CircleInput *input_node;
+  luci::CircleOutput *output_node;
+  luci::CircleRelu *relu_node;
+
+private:
+  std::unique_ptr<loco::Graph> _g;
+  std::unique_ptr<std::vector<char>> _buffer;
+};
+
+TEST(CircleExport, export_execution_plan)
+{
+  SampleGraphContract contract;
+  uint32_t reference_order = 1;
+  uint32_t reference_offset = 100u;
+  luci::add_execution_plan(contract.relu_node,
+                           luci::CircleNodeExecutionPlan(reference_order, {reference_offset}));
+
+  luci::UserSettings::settings()->set(luci::UserSettings::ExecutionPlanGen, true);
+  luci::CircleExporter exporter;
+
+  exporter.invoke(&contract);
+
+  ASSERT_FALSE(contract.get_buffer().empty());
+  std::unique_ptr<circle::ModelT> model(circle::GetModel(contract.get_buffer().data())->UnPack());
+  ASSERT_NE(model.get(), nullptr);
+  ASSERT_EQ(model->metadata[0]->name, "ONE_execution_plan_table");
+  auto metadata_buffer = model->metadata[0]->buffer;
+  auto &buffer = model->buffers[metadata_buffer]->data;
+  ASSERT_EQ(buffer.size(), 20);
+  uint32_t *raw_table_contents = reinterpret_cast<uint32_t *>(buffer.data());
+
+  auto num_entries = raw_table_contents[0];
+  ASSERT_EQ(num_entries, 1);
+  auto node_id = raw_table_contents[1];
+  ASSERT_EQ(node_id, 1); // relu node is second (aka id 1) in tological sort in exporter
+  auto node_plan_size = raw_table_contents[2];
+  ASSERT_EQ(node_plan_size, 2); // 1 for execution order, 1 for memory offset value
+  auto node_plan_order = raw_table_contents[3];
+  ASSERT_EQ(node_plan_order,
+            reference_order); // this value goes from CircleNodeExecutionPlan initialization
+  auto node_plan_offset = raw_table_contents[4];
+  ASSERT_EQ(node_plan_offset,
+            reference_offset); // this value goes from CircleNodeExecutionPlan initialization
+}
+
+TEST(CircleExport, export_execution_plan_nosetting_NEG)
+{
+  SampleGraphContract contract;
+  uint32_t reference_order = 1;
+  uint32_t reference_offset = 100u;
+  luci::add_execution_plan(contract.relu_node,
+                           luci::CircleNodeExecutionPlan(reference_order, {reference_offset}));
+
+  luci::UserSettings::settings()->set(luci::UserSettings::ExecutionPlanGen, false);
+  luci::CircleExporter exporter;
+
+  exporter.invoke(&contract);
+
+  ASSERT_FALSE(contract.get_buffer().empty());
+  std::unique_ptr<circle::ModelT> model(circle::GetModel(contract.get_buffer().data())->UnPack());
+  ASSERT_NE(model.get(), nullptr);
+  ASSERT_EQ(model->metadata.size(), 0);
+}
diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp
index 860cebf6e..083add9be 100644
--- a/compiler/luci/export/src/CircleExporterImpl.cpp
+++ b/compiler/luci/export/src/CircleExporterImpl.cpp
@@ -16,11 +16,13 @@
 
 #include "CircleExporterImpl.h"
 #include "Optimize.h"
-#include "TypeBridge.h"
+#include "CircleExportMetadata.h"
 #include "CircleTensorExporter.h"
 #include "CircleOperationExporter.h"
 #include "CircleExporterUtils.h"
 
+#include <luci/IR/CircleNodes.h>
+
 #include <oops/InternalExn.h>
 #include <mio/circle/schema_generated.h>
 #include <flatbuffers/flatbuffers.h>
@@ -28,46 +30,16 @@
 #include <cassert>
 #include <unordered_map>
 #include <string>
-#include <stdexcept>
+#include <vector>
 
 namespace
 {
 
-luci::CircleInput *input_node(loco::Graph *g, const loco::GraphInputIndex &index)
-{
-  for (uint32_t n = 0; n < g->nodes()->size(); ++n)
-  {
-    if (auto input = dynamic_cast<luci::CircleInput *>(g->nodes()->at(n)))
-    {
-      if (input->indexed() && input->index() == index)
-      {
-        return input;
-      }
-    }
-  }
-  return nullptr;
-}
-
-luci::CircleOutput *output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
-{
-  for (uint32_t n = 0; n < g->nodes()->size(); ++n)
-  {
-    if (auto output = dynamic_cast<luci::CircleOutput *>(g->nodes()->at(n)))
-    {
-      if (output->indexed() && output->index() == index)
-      {
-        return output;
-      }
-    }
-  }
-  return nullptr;
-}
-
 void registerGraphInputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
 {
   for (uint32_t n = 0; n < graph->inputs()->size(); ++n)
   {
-    auto node = input_node(graph, n);
+    auto node = luci::input_node(graph, n);
     assert(node != nullptr);
     ctx._inputs.push_back(luci::get_tensor_index(node));
   }
@@ -77,7 +49,7 @@ void registerGraphOutputTensors(loco::Graph *graph, luci::SubGraphContext &ctx)
 {
   for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
   {
-    auto push = output_node(graph, n);
+    auto push = luci::output_node(graph, n);
     assert(push != nullptr);
     auto node = push->from();
     assert(node != nullptr);
@@ -107,14 +79,19 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<luci::OpCode,
   for (auto it : opcodes)
   {
     uint32_t idx = it.second;
+    int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
+    if (it.first.opcode < BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+      dep_code = static_cast<int8_t>(it.first.opcode);
     if (it.first.opcode != BuiltinOperator_CUSTOM)
     {
-      operator_codes_vec[idx] = CreateOperatorCode(builder, it.first.opcode, 0, it.first.version);
+      operator_codes_vec[idx] =
+        CreateOperatorCode(builder, dep_code, 0, it.first.version, it.first.opcode);
     }
     else
     {
       operator_codes_vec[idx] =
-          CreateOperatorCode(builder, it.first.opcode, builder.CreateString(it.first.custom_code));
+        CreateOperatorCode(builder, dep_code, builder.CreateString(it.first.custom_code),
+                           it.first.version, it.first.opcode);
     }
   }
 
@@ -150,9 +127,6 @@ void CircleExporterImpl::exportGraph(loco::Graph *graph)
   // do graph optimization
   optimize(graph);
 
-  // copy shape/dtype inference data to CircleNode
-  copy_shape_dtype(graph);
-
   _builder.Clear();
 
   SerializedModelData md;
@@ -190,16 +164,16 @@ void CircleExporterImpl::exportGraph(loco::Graph *graph)
   std::string description_str = "nnpackage";
   auto description = _builder.CreateString(description_str);
 
+  // Metadata
+  auto metadata_vec = createCircleMetadataVector(_builder, md);
+  auto metadata = _builder.CreateVector(std::vector<Offset<Metadata>>(metadata_vec));
+
   // create array of buffers
   auto buffers = _builder.CreateVector(md._buffers);
 
-  // empty metadata
-  std::vector<int> metadata_buffer_vec;
-  auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
-
   // Model
   auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
-                                  buffers, metadata_buffer);
+                                  buffers, 0 /* metadata_buffer */, metadata);
   FinishModelBuffer(_builder, model_offset);
 }
 
@@ -223,9 +197,6 @@ void CircleExporterImpl::exportModule(Module *module)
 
     optimize(graph);
 
-    // copy shape/dtype inference data to CircleNode
-    copy_shape_dtype(graph);
-
     SerializedGraphData gd;
 
     // set Subgraph name
@@ -257,19 +228,20 @@ void CircleExporterImpl::exportModule(Module *module)
   std::string description_str = "nnpackage";
   auto description = _builder.CreateString(description_str);
 
+  // Metadata
+  md._metadata.source_table(module->source_table());
+  auto metadata_vec = createCircleMetadataVector(_builder, md);
+  auto metadata = _builder.CreateVector(std::vector<Offset<Metadata>>(metadata_vec));
+
   // create array of buffers
   auto buffers = _builder.CreateVector(md._buffers);
 
-  // empty metadata
-  std::vector<int> metadata_buffer_vec;
-  auto metadata_buffer = _builder.CreateVector(metadata_buffer_vec);
-
   // This version is taken from comment in fbs
   constexpr uint32_t version = 0;
 
   // Model
   auto model_offset = CreateModel(_builder, version, operator_codes, subgraphs, description,
-                                  buffers, metadata_buffer);
+                                  buffers, 0 /* metadata_buffer */, metadata);
   FinishModelBuffer(_builder, model_offset);
 }
 
diff --git a/compiler/luci/export/src/CircleExporterImpl.h b/compiler/luci/export/src/CircleExporterImpl.h
index e5d5b5a00..069f62afd 100644
--- a/compiler/luci/export/src/CircleExporterImpl.h
+++ b/compiler/luci/export/src/CircleExporterImpl.h
@@ -22,8 +22,6 @@
 
 #include "SerializedData.h"
 
-#include "SerializedData.h"
-
 #include <mio/circle/schema_generated.h>
 
 #include <loco.h>
diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp
index f097e71c5..9473c2c4e 100644
--- a/compiler/luci/export/src/CircleExporterUtils.cpp
+++ b/compiler/luci/export/src/CircleExporterUtils.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "CircleExporterUtils.h"
+#include "CircleBuiltinTypesMappingRule.h"
 
 #include <oops/InternalExn.h>
 
@@ -36,6 +37,10 @@ circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func)
       return circle::ActivationFunctionType_RELU_N1_TO_1;
     case luci::FusedActFunc::RELU6:
       return circle::ActivationFunctionType_RELU6;
+    case luci::FusedActFunc::TANH:
+      return circle::ActivationFunctionType_TANH;
+    case luci::FusedActFunc::SIGN_BIT:
+      return circle::ActivationFunctionType_SIGN_BIT;
     default:
       INTERNAL_EXN_V("trying to convert unsupported luci::FusedActFunc", oops::to_uint32(func));
   }
@@ -65,6 +70,9 @@ circle::TensorType to_circle_tensortype(loco::DataType type)
     case loco::DataType::BOOL:
       return circle::TensorType_BOOL;
 
+    case loco::DataType::STRING:
+      return circle::TensorType_STRING;
+
     default:
       INTERNAL_EXN_V("failed to convert unsupported loco::DataType", oops::to_uint32(type));
   }
@@ -83,36 +91,136 @@ circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode)
   }
 }
 
-} // namespace luci
+circle::FullyConnectedOptionsWeightsFormat
+to_circle_weightsformat(luci::CircleFullyConnected::WeightsFormat format)
+{
+  switch (format)
+  {
+    case luci::CircleFullyConnected::WeightsFormat::DEFAULT:
+      return circle::FullyConnectedOptionsWeightsFormat_DEFAULT;
+    case luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8:
+      return circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8;
+    case luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32:
+      return circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32;
+    default:
+      INTERNAL_EXN_V("trying to convert unsupported luci::WeightsFormat", oops::to_uint32(format));
+  }
+}
 
-namespace luci
+circle::DimensionType to_circle_dimensiontype(luci::DimensionType type)
 {
+  switch (type)
+  {
+    case luci::DimensionType::DENSE:
+      return circle::DimensionType_DENSE;
+    case luci::DimensionType::SPARSE_CSR:
+      return circle::DimensionType_SPARSE_CSR;
+    default:
+      INTERNAL_EXN_V("trying to convert unsupported luci::DimensionType", oops::to_uint32(type));
+  }
+}
 
-uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
-                                                    const int32_t op_version)
+flatbuffers::Offset<void> to_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+                                                        const SparseIndexVector &sparse_idx_vec)
 {
-  assert(op_version > 0);
+  auto type = sparse_idx_vec.type();
+  switch (type)
+  {
+    case luci::SparseIndexVectorType::NONE:
+      return flatbuffers::Offset<void>();
+    case luci::SparseIndexVectorType::I32:
+    {
+      return circle::CreateInt32VectorDirect(fb, sparse_idx_vec.as_int32_vector()).Union();
+    }
+    case luci::SparseIndexVectorType::U16:
+    {
+      return circle::CreateUint16VectorDirect(fb, sparse_idx_vec.as_uint16_vector()).Union();
+    }
+    case luci::SparseIndexVectorType::U8:
+    {
+      return circle::CreateUint8VectorDirect(fb, sparse_idx_vec.as_uint8_vector()).Union();
+    }
+    default:
+      INTERNAL_EXN_V("trying to convert unsupported luci::SparseIndexVectorType",
+                     oops::to_uint32(type));
+  }
+}
 
-  auto it = _operator_codes.find(OpCode{builtin_code, "", op_version});
-  if (it != _operator_codes.end())
+circle::SparseIndexVector to_circle_sparse_index_vector_type(luci::SparseIndexVectorType type)
+{
+  switch (type)
   {
-    return it->second;
+    case luci::SparseIndexVectorType::NONE:
+      return circle::SparseIndexVector_NONE;
+    case luci::SparseIndexVectorType::I32:
+      return circle::SparseIndexVector_Int32Vector;
+    case luci::SparseIndexVectorType::U16:
+      return circle::SparseIndexVector_Uint16Vector;
+    case luci::SparseIndexVectorType::U8:
+      return circle::SparseIndexVector_Uint8Vector;
+    default:
+      INTERNAL_EXN_V("trying to convert unsupported luci::SparseIndexVectorType",
+                     oops::to_uint32(type));
   }
-  auto idx = static_cast<uint32_t>(_operator_codes.size());
-  _operator_codes.emplace(OpCode{builtin_code, "", op_version}, idx);
-  return idx;
 }
 
-uint32_t SerializedModelData::registerCustomOpcode(const std::string &custom_code)
+circle::BuiltinOperator circle_builtin_operator(const luci::CircleNode *node)
+{
+  return node->accept(&BuiltinOperatorMappingRule::get());
+}
+
+circle::BuiltinOptions circle_builtin_options(const luci::CircleNode *node)
+{
+  if (auto cast = dynamic_cast<const luci::CircleCast *>(node))
+  {
+    return (cast->out_data_type() == loco::DataType::Unknown) ? circle::BuiltinOptions_NONE
+                                                              : circle::BuiltinOptions_CastOptions;
+  }
+
+  return node->accept(&BuiltinOptionsMappingRule::get());
+}
+
+std::string circle_custom_code(const luci::CircleNode *node)
 {
-  const circle::BuiltinOperator builtin_code = circle::BuiltinOperator_CUSTOM;
-  auto it = _operator_codes.find(OpCode{builtin_code, custom_code});
+  if (auto custom_node = dynamic_cast<const luci::CircleCustom *>(node))
+  {
+    return custom_node->custom_code();
+  }
+
+  return "";
+}
+
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+circle_custom_options(flatbuffers::FlatBufferBuilder &fb, const luci::CircleNode *node)
+{
+  if (auto custom_node = dynamic_cast<const luci::CircleCustom *>(node))
+  {
+    std::vector<uint8_t> custom_options_vec{custom_node->custom_options().begin(),
+                                            custom_node->custom_options().end()};
+    return fb.CreateVector(custom_options_vec);
+  }
+
+  return 0;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
+                                                    const std::string &custom_code,
+                                                    const int32_t op_version)
+{
+  assert(op_version > 0);
+
+  auto it = _operator_codes.find(OpCode{builtin_code, custom_code, op_version});
   if (it != _operator_codes.end())
   {
     return it->second;
   }
   auto idx = static_cast<uint32_t>(_operator_codes.size());
-  _operator_codes.emplace(OpCode{builtin_code, custom_code}, idx);
+  _operator_codes.emplace(OpCode{builtin_code, custom_code, op_version}, idx);
   return idx;
 }
 
@@ -131,13 +239,13 @@ circle::Padding getOpPadding(const loco::Padding2D *pad, const loco::Stride<2> *
   //
   // NOTE input and output 'feature' map are shape of NHWC
   bool same_padding_criterion_1 =
-      (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
-      (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
+    (static_cast<uint32_t>(ofm._dims[1]) == (ifm._dims[1] - 1) / stride->vertical() + 1) &&
+    (static_cast<uint32_t>(ofm._dims[2]) == (ifm._dims[2] - 1) / stride->horizontal() + 1);
 
   // For same padding, rear padding is same or bigger than front padding by at most 1
   bool same_padding_criterion_2 =
-      (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
-      (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
+    (pad->top() <= pad->bottom()) && (pad->bottom() <= pad->top() + 1) &&
+    (pad->left() <= pad->right()) && (pad->right() <= pad->left() + 1);
 
   if (same_padding_criterion_1 && same_padding_criterion_2)
     return circle::Padding_SAME;
diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h
index f9ce6d2bf..4a4c54a69 100644
--- a/compiler/luci/export/src/CircleExporterUtils.h
+++ b/compiler/luci/export/src/CircleExporterUtils.h
@@ -32,6 +32,18 @@ namespace luci
 circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func);
 circle::TensorType to_circle_tensortype(loco::DataType type);
 circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode);
+circle::FullyConnectedOptionsWeightsFormat
+to_circle_weightsformat(luci::CircleFullyConnected::WeightsFormat format);
+circle::DimensionType to_circle_dimensiontype(luci::DimensionType type);
+flatbuffers::Offset<void> to_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+                                                        const SparseIndexVector &sparse_idx_vec);
+circle::SparseIndexVector to_circle_sparse_index_vector_type(luci::SparseIndexVectorType type);
+
+circle::BuiltinOperator circle_builtin_operator(const luci::CircleNode *node);
+circle::BuiltinOptions circle_builtin_options(const luci::CircleNode *node);
+std::string circle_custom_code(const luci::CircleNode *node);
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+circle_custom_options(flatbuffers::FlatBufferBuilder &fb, const luci::CircleNode *node);
 
 } // namespace luci
 
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp
index 36d61f6c9..b300a7fcf 100644
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -15,1431 +15,53 @@
  */
 
 #include "CircleOperationExporter.h"
-#include "CircleExporterUtils.h"
-#include "Check.h"
+#include "CircleOperationExporterRule.h"
 
 #include <luci/IR/CircleNode.h>
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleShapeInference.h>
-#include <luci/UserSettings.h>
-#include <luci/Log.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+#include <loco/IR/Algorithm.h>
 
-#include <loco/IR/CanonicalNodeVisitor.h>
-#include <oops/InternalExn.h>
-
-#include <flatbuffers/flexbuffers.h>
-
-using namespace flatbuffers;
-using namespace circle;
-
-namespace
-{
-
-using namespace luci;
-
-struct ExportContext
-{
-  FlatBufferBuilder &builder;
-  SerializedModelData &md;
-  SerializedGraphData &gd;
-};
-
-/**
- * @brief Exports CircleMaxPool2D or CircleAveragePool2D
- *
- * @note  CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
- */
-template <class CirclePool2D>
-void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
-{
-  LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
-                  builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
-                  builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
-              "Should be L2Pool, MaxPool or AvgPool");
-  LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-
-  circle::Padding padding = getOpPadding(node->padding());
-
-  auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(),
-                                     node->filter()->w(), node->filter()->h(),
-                                     to_circle_actfunc(node->fusedActivationFunction()));
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_Pool2DOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-/**
- * @brief export simple nodes
- */
-void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop,
-                 circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
-{
-  uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(node)};
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->arg(i)));
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset);
-  ctx.gd._operators.push_back(op_offset);
-}
-
-/**
- * @brief export simple nodes having void options
- */
-void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
-{
-  uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->arg(i)));
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleAddN *node)
-{
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->inputs(i)));
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateAddNOptions(ctx.builder);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_AddNOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleCast *node)
-{
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-
-  flatbuffers::Offset<Operator> op_offset;
-  if (node->out_data_type() != loco::DataType::Unknown)
-  {
-    auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()),
-                                     to_circle_tensortype(node->out_data_type()));
-    op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                               circle::BuiltinOptions_CastOptions, options.Union());
-  }
-  else
-  {
-    op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
-  }
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
-{
-  uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->values(i)));
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateConcatenationOptions(ctx.builder, node->axis(),
-                                            to_circle_actfunc(node->fusedActivationFunction()));
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_ConcatenationOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleCustom *node)
-{
-  auto custom_outputs = loco::succs(node);
-
-  uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t index = 0; index < node->numInputs(); index++)
-  {
-    inputs_vec.push_back(get_tensor_index(node->inputs(index)));
-  }
-  for (uint32_t index = 0; index < custom_outputs.size(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : custom_outputs)
-    {
-      auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
-      if (custom_out->index() == static_cast<int32_t>(index))
-      {
-        outputs_vec.push_back(get_tensor_index(custom_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Custom output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
-  std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
-                                          node->custom_options().end()};
-  circle_custom_options = ctx.builder.CreateVector(custom_options_vec);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
-                                  flatbuffers::Offset<void>(), circle_custom_options);
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleIf *node)
-{
-  auto if_outs = loco::succs(node);
-  assert(if_outs.size() == node->output_count());
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  inputs_vec.push_back(get_tensor_index(node->cond()));
-  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
-    inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
-  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : if_outs)
-    {
-      auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
-      if (if_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(if_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid CircleIf output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_IfOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
-{
-  auto nms_outs = loco::succs(node);
-  assert(nms_outs.size() == 2);
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
-                                                 node->op_version());
-  std::vector<int32_t> inputs_vec{
-      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
-      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
-      get_tensor_index(node->score_threshold()),
-  };
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : nms_outs)
-    {
-      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
-      if (nms_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(nms_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
-  auto op_offset =
-      CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                     circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
-{
-  auto nms_outs = loco::succs(node);
-  assert(nms_outs.size() == 3);
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
-                                                 node->op_version());
-  std::vector<int32_t> inputs_vec{
-      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
-      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
-      get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
-  };
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : nms_outs)
-    {
-      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
-      if (nms_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(nms_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
-  auto op_offset =
-      CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                     circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
-{
-  uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateReverseV2Options(ctx.builder);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleSplit *node)
-{
-  auto split_outs = loco::succs(node);
-  assert(int32_t(split_outs.size()) == node->num_split());
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
-  // NOTE BuiltinOperator_SPLIT input is placed at second position
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
-                                  get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num_split(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : split_outs)
-    {
-      auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
-      if (split_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(split_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Split output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateSplitOptions(ctx.builder, node->num_split());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_SplitOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleSplitV *node)
-{
-  auto split_outs = loco::succs(node);
-  assert(int32_t(split_outs.size()) == node->num_split());
-
-  uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
-                                  get_tensor_index(node->size_splits()),
-                                  get_tensor_index(node->split_dim())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num_split(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : split_outs)
-    {
-      auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
-      if (split_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(split_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid SplitV output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateSplitVOptions(ctx.builder, node->num_split());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_SplitVOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleTopKV2 *node)
+namespace luci
 {
-  auto topkv2_outs = loco::succs(node);
-  int outs_count = int32_t(topkv2_outs.size());
-  assert(outs_count == 2);
 
-  uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < outs_count; index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : topkv2_outs)
-    {
-      auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
-      if (topkv2_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(topkv2_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid TopKV2 output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateTopKV2Options(ctx.builder);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_TopKV2Options, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleUnique *node)
+void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
+                 SerializedGraphData &gd)
 {
-  auto unique_outs = loco::succs(node);
-  assert(int32_t(unique_outs.size()) == 2);
-  uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
-
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < 2; index++)
+  uint32_t node_position = 0;
+  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
   {
-    // store in order of index
-    bool found = false;
-    for (auto out : unique_outs)
-    {
-      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
-      if (unique_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(unique_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Unique output");
-    }
-  }
+    ExportContext ctx{builder, md, gd};
+    OperationExporterRule exporter_rule{ctx};
 
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type()));
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_UniqueOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    circle_node->accept(&exporter_rule);
 
-void export_node(ExportContext &ctx, luci::CircleUnpack *node)
-{
-  LOGGER(l);
-  auto settings = luci::UserSettings::settings();
+    const auto ops_size = gd._operators.size();
 
-  auto unpack_outs = loco::succs(node);
-  // NOTE real models may not use all of the outputs
-  if (static_cast<int32_t>(unpack_outs.size()) != node->num())
-  {
-    if (settings->get(luci::UserSettings::Key::DisableValidation))
+    if (has_origin(circle_node) && ops_size != gd._operators.size())
     {
-      WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
-    }
-    else
-      assert(false);
-  }
-
-  uint32_t op_idx =
-      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : unpack_outs)
-    {
-      auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
-      if (unpack_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(unpack_out));
-        found = true;
-        break;
-      }
-    }
-    // NOTE real models may not use all of the outputs
-    if (!found)
-    {
-      if (settings->get(luci::UserSettings::Key::DisableValidation))
+      const auto node_id = gd._operators.size() - 1;
+      for (auto source : get_origin(circle_node)->sources())
       {
-        WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
+        md._metadata.add_op_table(node_id, source->id());
       }
-      else
-        assert(false);
     }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_UnpackOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleWhile *node)
-{
-  auto while_outs = loco::succs(node);
-  assert(while_outs.size() == node->output_count());
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
-    inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
-  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : while_outs)
+    if (has_execution_plan(circle_node))
     {
-      auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
-      if (while_out->index() == static_cast<int32_t>(idx))
+      // Add to node (in node_position) metadata vector with execution_plan information:
+      // order of execution, and offsets output tensors.
+      const auto execution_plan = get_execution_plan(circle_node);
+      std::vector<uint32_t> execution_plan_vector;
+      execution_plan_vector.push_back(execution_plan.order_in_plan());
+      for (auto offset : execution_plan.offsets())
       {
-        outputs_vec.push_back(get_tensor_index(while_out));
-        found = true;
-        break;
+        execution_plan_vector.push_back(offset);
       }
+      md._metadata.add_execution_plan_table(node_position, execution_plan_vector);
     }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid CircleWhile output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_WhileOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-class OperationExporter final : public luci::CircleNodeMutableVisitor<void>,
-                                public loco::CanonicalNodeMutableVisitor<void>
-{
-public:
-  OperationExporter(ExportContext &ctx) : _ctx{ctx}
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleAbs *) final;
-  void visit(luci::CircleAdd *) final;
-  void visit(luci::CircleAddN *) final;
-  void visit(luci::CircleArgMax *) final;
-  void visit(luci::CircleArgMin *) final;
-  void visit(luci::CircleAveragePool2D *) final;
-  void visit(luci::CircleBatchMatMul *) final;
-  void visit(luci::CircleBatchToSpaceND *) final;
-  void visit(luci::CircleCast *) final;
-  void visit(luci::CircleCeil *) final;
-  void visit(luci::CircleConcatenation *) final;
-  void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
-  void visit(luci::CircleConv2D *) final;
-  void visit(luci::CircleCos *) final;
-  void visit(luci::CircleCustom *) final;
-  void visit(luci::CircleDepthToSpace *) final;
-  void visit(luci::CircleDepthwiseConv2D *) final;
-  void visit(luci::CircleDiv *) final;
-  void visit(luci::CircleElu *) final;
-  void visit(luci::CircleEqual *) final;
-  void visit(luci::CircleExp *) final;
-  void visit(luci::CircleExpandDims *) final;
-  void visit(luci::CircleFill *) final;
-  void visit(luci::CircleFloor *) final;
-  void visit(luci::CircleFloorDiv *) final;
-  void visit(luci::CircleFloorMod *) final;
-  void visit(luci::CircleFullyConnected *) final;
-  void visit(luci::CircleGather *) final;
-  void visit(luci::CircleGatherNd *) final;
-  void visit(luci::CircleGreater *) final;
-  void visit(luci::CircleGreaterEqual *) final;
-  void visit(luci::CircleIf *) final;
-  void visit(luci::CircleL2Normalize *) final;
-  void visit(luci::CircleL2Pool2D *) final;
-  void visit(luci::CircleLeakyRelu *) final;
-  void visit(luci::CircleLess *) final;
-  void visit(luci::CircleLessEqual *) final;
-  void visit(luci::CircleLocalResponseNormalization *) final;
-  void visit(luci::CircleLog *) final;
-  void visit(luci::CircleLogicalAnd *) final;
-  void visit(luci::CircleLogicalNot *) final;
-  void visit(luci::CircleLogicalOr *) final;
-  void visit(luci::CircleLogistic *) final;
-  void visit(luci::CircleLogSoftmax *) final;
-  void visit(luci::CircleMatrixDiag *) final;
-  void visit(luci::CircleMatrixSetDiag *) final;
-  void visit(luci::CircleMaximum *) final;
-  void visit(luci::CircleMaxPool2D *) final;
-  void visit(luci::CircleMean *) final;
-  void visit(luci::CircleMinimum *) final;
-  void visit(luci::CircleMirrorPad *) final;
-  void visit(luci::CircleMul *) final;
-  void visit(luci::CircleNeg *) final;
-  void visit(luci::CircleNonMaxSuppressionV4 *) final;
-  void visit(luci::CircleNonMaxSuppressionV5 *) final;
-  void visit(luci::CircleNotEqual *) final;
-  void visit(luci::CircleOneHot *) final;
-  void visit(luci::CirclePack *) final;
-  void visit(luci::CirclePad *) final;
-  void visit(luci::CirclePadV2 *) final;
-  void visit(luci::CirclePow *) final;
-  void visit(luci::CirclePRelu *) final;
-  void visit(luci::CircleRange *) final;
-  void visit(luci::CircleRank *) final;
-  void visit(luci::CircleReduceAny *) final;
-  void visit(luci::CircleReduceMax *) final;
-  void visit(luci::CircleReduceMin *) final;
-  void visit(luci::CircleReduceProd *) final;
-  void visit(luci::CircleRelu *) final;
-  void visit(luci::CircleRelu6 *) final;
-  void visit(luci::CircleReluN1To1 *) final;
-  void visit(luci::CircleReshape *) final;
-  void visit(luci::CircleResizeBilinear *) final;
-  void visit(luci::CircleResizeNearestNeighbor *) final;
-  void visit(luci::CircleReverseSequence *) final;
-  void visit(luci::CircleReverseV2 *) final;
-  void visit(luci::CircleRound *) final;
-  void visit(luci::CircleRsqrt *) final;
-  void visit(luci::CircleScatterNd *) final;
-  void visit(luci::CircleSegmentSum *) final;
-  void visit(luci::CircleSelect *) final;
-  void visit(luci::CircleSelectV2 *) final;
-  void visit(luci::CircleShape *) final;
-  void visit(luci::CircleSin *) final;
-  void visit(luci::CircleSlice *) final;
-  void visit(luci::CircleSoftmax *) final;
-  void visit(luci::CircleSpaceToBatchND *) final;
-  void visit(luci::CircleSpaceToDepth *) final;
-  void visit(luci::CircleSparseToDense *) final;
-  void visit(luci::CircleSplit *) final;
-  void visit(luci::CircleSplitV *) final;
-  void visit(luci::CircleSqrt *) final;
-  void visit(luci::CircleSquare *) final;
-  void visit(luci::CircleSquaredDifference *) final;
-  void visit(luci::CircleSqueeze *) final;
-  void visit(luci::CircleStridedSlice *) final;
-  void visit(luci::CircleSub *) final;
-  void visit(luci::CircleSum *) final;
-  void visit(luci::CircleTanh *) final;
-  void visit(luci::CircleTile *) final;
-  void visit(luci::CircleTopKV2 *) final;
-  void visit(luci::CircleTranspose *) final;
-  void visit(luci::CircleTransposeConv *) final;
-  void visit(luci::CircleUnique *) final;
-  void visit(luci::CircleUnpack *) final;
-  void visit(luci::CircleWhere *) final;
-  void visit(luci::CircleWhile *) final;
-  void visit(luci::CircleZerosLike *) final;
-  // Circle only
-  void visit(luci::CircleBCQFullyConnected *) final;
-  void visit(luci::CircleBCQGather *) final;
-  void visit(luci::CircleInstanceNorm *) final;
-  // Virtual
-  void visit(luci::CircleInput *) final {}
-  void visit(luci::CircleOutput *) final {}
-  void visit(luci::CircleOutputDummy *) final {}
-  void visit(luci::CircleOutputExclude *) final {}
-  // Virtual for multiple-outputs
-  void visit(luci::CircleCustomOut *) final {}
-  void visit(luci::CircleIfOut *) final {}
-  void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
-  void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
-  void visit(luci::CircleSplitOut *) final {}
-  void visit(luci::CircleSplitVOut *) final {}
-  void visit(luci::CircleTopKV2Out *) final {}
-  void visit(luci::CircleUniqueOut *) final {}
-  void visit(luci::CircleUnpackOut *) final {}
-  void visit(luci::CircleWhileOut *) final {}
-
-private:
-  /**
-   * @brief export simple nodes
-   */
-  void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot,
-                     flatbuffers::Offset<void> options_offset);
-
-  /**
-   * @brief export simple nodes having void options
-   */
-  void export_simple(loco::Node *node, circle::BuiltinOperator bop);
-
-private:
-  ExportContext &_ctx;
-};
-
-void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop,
-                                      circle::BuiltinOptions bot,
-                                      flatbuffers::Offset<void> options_offset)
-{
-  export_node(_ctx, node, bop, bot, options_offset);
-}
-
-void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop)
-{
-  export_node(_ctx, node, bop);
-}
-
-void OperationExporter::visit(luci::CircleAbs *node)
-{
-  export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions,
-                CreateAbsOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleAdd *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
-      CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleArgMax *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
-      CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
-}
-
-void OperationExporter::visit(luci::CircleArgMin *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
-      CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
-}
-
-void OperationExporter::visit(luci::CircleAveragePool2D *node)
-{
-  export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D);
-}
-
-void OperationExporter::visit(luci::CircleBatchMatMul *node)
-{
-  export_simple(node, circle::BuiltinOperator_BATCH_MATMUL,
-                circle::BuiltinOptions_BatchMatMulOptions,
-                CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
-}
-
-void OperationExporter::visit(luci::CircleCast *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleCeil *node)
-{
-  export_simple(node, circle::BuiltinOperator_CEIL);
-}
-
-void OperationExporter::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleBatchToSpaceND *node)
-{
-  export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
-                circle::BuiltinOptions_BatchToSpaceNDOptions,
-                CreateBatchToSpaceNDOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleConv2D *node)
-{
-  export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions,
-                CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
-                                    node->stride()->w(), node->stride()->h(),
-                                    to_circle_actfunc(node->fusedActivationFunction()),
-                                    node->dilation()->w(), node->dilation()->h())
-                    .Union());
-}
-
-void OperationExporter::visit(luci::CircleCos *node)
-{
-  export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions,
-                CreateCosOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleCustom *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleDepthToSpace *node)
-{
-  export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE,
-                circle::BuiltinOptions_DepthToSpaceOptions,
-                CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union());
-}
-
-void OperationExporter::visit(luci::CircleDepthwiseConv2D *node)
-{
-  export_simple(node, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
-                circle::BuiltinOptions_DepthwiseConv2DOptions,
-                CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
-                                             node->stride()->w(), node->stride()->h(),
-                                             node->depthMultiplier(),
-                                             to_circle_actfunc(node->fusedActivationFunction()),
-                                             node->dilation()->w(), node->dilation()->h())
-                    .Union());
-}
-
-void OperationExporter::visit(luci::CircleDiv *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
-      CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OperationExporter::visit(luci::CircleElu *node)
-{
-  export_simple(node, circle::BuiltinOperator_ELU);
-}
-
-void OperationExporter::visit(luci::CircleEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions,
-                CreateEqualOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleExp *node)
-{
-  export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions,
-                CreateExpOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleExpandDims *node)
-{
-  export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions,
-                CreateExpandDimsOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleFill *node)
-{
-  export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
-                CreateFillOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleFloor *node)
-{
-  export_simple(node, circle::BuiltinOperator_FLOOR);
-}
 
-void OperationExporter::visit(luci::CircleFloorDiv *node)
-{
-  export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions,
-                CreateFloorDivOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleFloorMod *node)
-{
-  export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions,
-                CreateFloorModOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleFullyConnected *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
-      CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
-          .Union());
-}
-
-void OperationExporter::visit(luci::CircleGather *node)
-{
-  export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions,
-                CreateGatherOptions(_ctx.builder, node->axis()).Union());
-}
-
-void OperationExporter::visit(luci::CircleGatherNd *node)
-{
-  export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions,
-                CreateGatherNdOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleGreater *node)
-{
-  export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
-                CreateGreaterOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleGreaterEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
-                circle::BuiltinOptions_GreaterEqualOptions,
-                CreateGreaterEqualOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleIf *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleL2Normalize *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
-      CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
-          .Union());
-}
-
-void OperationExporter::visit(luci::CircleL2Pool2D *node)
-{
-  export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D);
-}
-
-void OperationExporter::visit(luci::CircleLeakyRelu *node)
-{
-  export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions,
-                CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union());
-}
-
-void OperationExporter::visit(luci::CircleLess *node)
-{
-  export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions,
-                CreateLessOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleLessEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions,
-                CreateLessEqualOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleLocalResponseNormalization *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
-                circle::BuiltinOptions_LocalResponseNormalizationOptions,
-                CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
-                                                        node->alpha(), node->beta())
-                    .Union());
-}
-
-void OperationExporter::visit(luci::CircleLog *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOG);
-}
-
-void OperationExporter::visit(luci::CircleLogicalAnd *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions,
-                CreateLogicalAndOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleLogicalNot *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions,
-                CreateLogicalNotOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleLogicalOr *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions,
-                CreateLogicalOrOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleLogistic *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOGISTIC);
-}
-
-void OperationExporter::visit(luci::CircleLogSoftmax *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions,
-                CreateLogSoftmaxOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleMatrixDiag *node)
-{
-  export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions,
-                CreateMatrixDiagOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleMatrixSetDiag *node)
-{
-  export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG,
-                circle::BuiltinOptions_MatrixSetDiagOptions,
-                CreateMatrixSetDiagOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleMaximum *node)
-{
-  export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
-                CreateMaximumMinimumOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleMaxPool2D *node)
-{
-  export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D);
-}
-
-void OperationExporter::visit(luci::CircleMean *node)
-{
-  export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OperationExporter::visit(luci::CircleMinimum *node)
-{
-  export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
-                CreateMaximumMinimumOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleMirrorPad *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
-      CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
-}
-
-void OperationExporter::visit(luci::CircleMul *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
-      CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OperationExporter::visit(luci::CircleNeg *node)
-{
-  export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions,
-                CreateNegOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleNonMaxSuppressionV5 *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleNotEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
-                CreateNotEqualOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleOneHot *node)
-{
-  export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions,
-                CreateOneHotOptions(_ctx.builder, node->axis()).Union());
-}
-
-void OperationExporter::visit(luci::CirclePack *node)
-{
-  export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions,
-                CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union());
-}
-
-void OperationExporter::visit(luci::CirclePad *node)
-{
-  export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions,
-                CreatePadOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CirclePadV2 *node)
-{
-  export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options,
-                CreatePadV2Options(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CirclePow *node)
-{
-  export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions,
-                CreatePowOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CirclePRelu *node)
-{
-  export_simple(node, circle::BuiltinOperator_PRELU);
-}
-
-void OperationExporter::visit(luci::CircleRange *node)
-{
-  export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions,
-                CreateRangeOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleRank *node)
-{
-  export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions,
-                CreateRankOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleReduceAny *node)
-{
-  export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OperationExporter::visit(luci::CircleReduceMax *node)
-{
-  export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OperationExporter::visit(luci::CircleReduceMin *node)
-{
-  export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OperationExporter::visit(luci::CircleReduceProd *node)
-{
-  export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OperationExporter::visit(luci::CircleRelu *node)
-{
-  export_simple(node, circle::BuiltinOperator_RELU);
-}
-
-void OperationExporter::visit(luci::CircleRelu6 *node)
-{
-  export_simple(node, circle::BuiltinOperator_RELU6);
-}
-
-void OperationExporter::visit(luci::CircleReluN1To1 *node)
-{
-  export_simple(node, circle::BuiltinOperator_RELU_N1_TO_1);
-}
-
-void OperationExporter::visit(luci::CircleReshape *node)
-{
-  auto new_shape = _ctx.builder.CreateVector<int32_t>(
-      node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
-
-  export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
-                CreateReshapeOptions(_ctx.builder, new_shape).Union());
-}
-
-void OperationExporter::visit(luci::CircleResizeBilinear *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
-      CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
-          .Union());
-}
-
-void OperationExporter::visit(luci::CircleResizeNearestNeighbor *node)
-{
-  export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
-                circle::BuiltinOptions_ResizeNearestNeighborOptions,
-                CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union());
-}
-
-void OperationExporter::visit(luci::CircleReverseSequence *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
-      CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
-}
-
-void OperationExporter::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleRound *node)
-{
-  export_simple(node, circle::BuiltinOperator_ROUND);
-}
-
-void OperationExporter::visit(luci::CircleRsqrt *node)
-{
-  export_simple(node, circle::BuiltinOperator_RSQRT);
-}
-
-void OperationExporter::visit(luci::CircleScatterNd *node)
-{
-  export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions,
-                CreateScatterNdOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleSegmentSum *node)
-{
-  export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions,
-                CreateSegmentSumOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleSelect *node)
-{
-  export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions,
-                CreateSelectOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleSelectV2 *node)
-{
-  export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options,
-                CreateSelectV2Options(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleShape *node)
-{
-  export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions,
-                CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union());
-}
-
-void OperationExporter::visit(luci::CircleSin *node)
-{
-  export_simple(node, circle::BuiltinOperator_SIN);
-}
-
-void OperationExporter::visit(luci::CircleSlice *node)
-{
-  export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions,
-                CreateSliceOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleSoftmax *node)
-{
-  export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions,
-                CreateSoftmaxOptions(_ctx.builder, node->beta()).Union());
-}
-
-void OperationExporter::visit(luci::CircleSpaceToBatchND *node)
-{
-  export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND,
-                circle::BuiltinOptions_SpaceToBatchNDOptions,
-                CreateSpaceToBatchNDOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleSpaceToDepth *node)
-{
-  export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
-                circle::BuiltinOptions_SpaceToDepthOptions,
-                CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union());
-}
-
-void OperationExporter::visit(luci::CircleSparseToDense *node)
-{
-  export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE,
-                circle::BuiltinOptions_SparseToDenseOptions,
-                CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union());
-}
-
-void OperationExporter::visit(luci::CircleSplit *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleSplitV *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleSqrt *node)
-{
-  export_simple(node, circle::BuiltinOperator_SQRT);
-}
-
-void OperationExporter::visit(luci::CircleSquare *node)
-{
-  export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions,
-                CreateSquareOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleSquaredDifference *node)
-{
-  export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE,
-                circle::BuiltinOptions_SquaredDifferenceOptions,
-                CreateSquaredDifferenceOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleSqueeze *node)
-{
-  auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims());
-  export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions,
-                CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union());
-}
-
-void OperationExporter::visit(luci::CircleStridedSlice *node)
-{
-  export_simple(node, circle::BuiltinOperator_STRIDED_SLICE,
-                circle::BuiltinOptions_StridedSliceOptions,
-                CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
-                                          node->ellipsis_mask(), node->new_axis_mask(),
-                                          node->shrink_axis_mask())
-                    .Union());
-}
-
-void OperationExporter::visit(luci::CircleSub *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
-      CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OperationExporter::visit(luci::CircleSum *node)
-{
-  export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OperationExporter::visit(luci::CircleTanh *node)
-{
-  export_simple(node, circle::BuiltinOperator_TANH);
-}
-
-void OperationExporter::visit(luci::CircleTile *node)
-{
-  export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions,
-                CreateTileOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleTranspose *node)
-{
-  export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions,
-                CreateTransposeOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleTransposeConv *node)
-{
-  export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV,
-                circle::BuiltinOptions_TransposeConvOptions,
-                CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
-                                           node->stride()->w(), node->stride()->h())
-                    .Union());
-}
-
-void OperationExporter::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleUnpack *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleWhere *node)
-{
-  export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions,
-                CreateWhereOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleWhile *node) { export_node(_ctx, node); }
-
-void OperationExporter::visit(luci::CircleZerosLike *node)
-{
-  export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions,
-                CreateZerosLikeOptions(_ctx.builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleBCQFullyConnected *node)
-{
-  export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED,
-                circle::BuiltinOptions_BCQFullyConnectedOptions,
-                CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
-                                               to_circle_actfunc(node->fusedActivationFunction()))
-                    .Union());
-}
-
-void OperationExporter::visit(luci::CircleBCQGather *node)
-{
-  export_simple(
-      node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
-      CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
-}
-
-void OperationExporter::visit(luci::CircleInstanceNorm *node)
-{
-  export_simple(node, circle::BuiltinOperator_INSTANCE_NORM,
-                circle::BuiltinOptions_InstanceNormOptions,
-                CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
-                                          to_circle_actfunc(node->fusedActivationFunction()))
-                    .Union());
-}
-
-void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
-                SerializedGraphData &gd)
-{
-  if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
-  {
-    ExportContext ctx{builder, md, gd};
-    OperationExporter exporter{ctx};
-    circle_node->accept(&exporter);
-  }
-  else
-  {
-    INTERNAL_EXN("Node with unsupported dialect found");
-  }
-}
-
-} // namespace
-
-namespace luci
-{
-
-void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md,
-                 SerializedGraphData &gd)
-{
-  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
-  {
-    exportNode(node, builder, md, gd);
+    node_position++;
   }
 }
 
diff --git a/compiler/luci/export/src/CircleOperationExporter.h b/compiler/luci/export/src/CircleOperationExporter.h
index de6abfc54..f2b3cfd6b 100644
--- a/compiler/luci/export/src/CircleOperationExporter.h
+++ b/compiler/luci/export/src/CircleOperationExporter.h
@@ -17,7 +17,7 @@
 #ifndef __CIRCLE_OPERATION_EXPORTER_H__
 #define __CIRCLE_OPERATION_EXPORTER_H__
 
-#include "CircleExporterUtils.h"
+#include "SerializedData.h"
 
 #include <loco/IR/Graph.h>
 
diff --git a/compiler/luci/export/src/CircleOperationExporterRule.cpp b/compiler/luci/export/src/CircleOperationExporterRule.cpp
new file mode 100644
index 000000000..8dc59fa9c
--- /dev/null
+++ b/compiler/luci/export/src/CircleOperationExporterRule.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOperationExporterRule.h"
+#include "CircleBuiltinTypesExtractor.h"
+#include "Check.h"
+
+#include <loco/IR/Graph.h>
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <oops/InternalExn.h>
+
+#include <vector>
+
+namespace
+{
+class OutputVectorExtractor final : public luci::CircleNodeMutableVisitor<std::vector<int32_t>>
+{
+public:
+  OutputVectorExtractor()
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<int32_t> visit(luci::CircleNode *node) final
+  {
+    std::vector<int32_t> outputs_vec{luci::get_tensor_index(node)};
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleBidirectionalSequenceLSTM *node) final
+  {
+    auto bidi_lstm_outs = loco::succs(node);
+    assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2));
+
+    std::vector<int32_t> outputs_vec(bidi_lstm_outs.size());
+
+    for (auto out : bidi_lstm_outs)
+    {
+      auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out);
+      if (bidi_lstm_out->index() >= int32_t(bidi_lstm_outs.size()))
+        INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output");
+      outputs_vec[bidi_lstm_out->index()] = luci::get_tensor_index(bidi_lstm_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleCustom *node) final
+  {
+    auto custom_outputs = loco::succs(node);
+    assert(custom_outputs.size() == node->numOutputs());
+
+    std::vector<int32_t> outputs_vec(node->numOutputs());
+
+    for (auto out : custom_outputs)
+    {
+      auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
+      if (custom_out->index() >= int32_t(node->numOutputs()))
+        INTERNAL_EXN("Invalid Custom output");
+      outputs_vec[custom_out->index()] = luci::get_tensor_index(custom_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleIf *node) final
+  {
+    auto if_outs = loco::succs(node);
+    assert(if_outs.size() == node->output_count());
+
+    std::vector<int32_t> outputs_vec(node->output_count());
+
+    for (auto out : if_outs)
+    {
+      auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
+      if (if_out->index() >= int32_t(node->output_count()))
+        INTERNAL_EXN("Invalid If output");
+      outputs_vec[if_out->index()] = luci::get_tensor_index(if_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    auto nms_outs = loco::succs(node);
+    assert(nms_outs.size() == 2);
+
+    std::vector<int32_t> outputs_vec(2);
+
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+      if (nms_out->index() >= 2)
+        INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+      outputs_vec[nms_out->index()] = luci::get_tensor_index(nms_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    auto nms_outs = loco::succs(node);
+    assert(nms_outs.size() == 3);
+
+    std::vector<int32_t> outputs_vec(3);
+
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
+      if (nms_out->index() >= 3)
+        INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
+      outputs_vec[nms_out->index()] = luci::get_tensor_index(nms_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleSplit *node) final
+  {
+    auto split_outs = loco::succs(node);
+    assert(int32_t(split_outs.size()) == node->num_split());
+
+    std::vector<int32_t> outputs_vec(node->num_split());
+
+    for (auto out : split_outs)
+    {
+      auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
+      if (split_out->index() >= node->num_split())
+        INTERNAL_EXN("Invalid Split output");
+      outputs_vec[split_out->index()] = luci::get_tensor_index(split_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleSplitV *node) final
+  {
+    auto split_outs = loco::succs(node);
+    assert(int32_t(split_outs.size()) == node->num_split());
+
+    std::vector<int32_t> outputs_vec(node->num_split());
+
+    for (auto out : split_outs)
+    {
+      auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
+      if (split_out->index() >= node->num_split())
+        INTERNAL_EXN("Invalid SplitV output");
+      outputs_vec[split_out->index()] = luci::get_tensor_index(split_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleTopKV2 *node) final
+  {
+    auto topkv2_outs = loco::succs(node);
+    assert(topkv2_outs.size() == 2);
+
+    std::vector<int32_t> outputs_vec(2);
+
+    for (auto out : topkv2_outs)
+    {
+      auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
+      if (topkv2_out->index() >= 2)
+        INTERNAL_EXN("Invalid TopKV2 output");
+      outputs_vec[topkv2_out->index()] = luci::get_tensor_index(topkv2_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleUnique *node) final
+  {
+    auto unique_outs = loco::succs(node);
+    assert(unique_outs.size() == 2);
+
+    std::vector<int32_t> outputs_vec(2);
+
+    for (auto out : unique_outs)
+    {
+      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+      if (unique_out->index() >= 2)
+        INTERNAL_EXN("Invalid Unique output");
+      outputs_vec[unique_out->index()] = luci::get_tensor_index(unique_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleUnpack *node) final
+  {
+    auto unpack_outs = loco::succs(node);
+    assert(int32_t(unpack_outs.size()) == node->num());
+
+    std::vector<int32_t> outputs_vec(node->num());
+
+    for (auto out : unpack_outs)
+    {
+      auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
+      if (unpack_out->index() >= node->num())
+        INTERNAL_EXN("Invalid Unpack output");
+      outputs_vec[unpack_out->index()] = luci::get_tensor_index(unpack_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleWhile *node) final
+  {
+    auto while_outs = loco::succs(node);
+    assert(while_outs.size() == node->output_count());
+
+    std::vector<int32_t> outputs_vec(node->output_count());
+
+    for (auto out : while_outs)
+    {
+      auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
+      if (while_out->index() >= int32_t(node->output_count()))
+        INTERNAL_EXN("Invalid While output");
+      outputs_vec[while_out->index()] = luci::get_tensor_index(while_out);
+    }
+
+    return outputs_vec;
+  }
+};
+
+} // namespace
+
+namespace luci
+{
+
+void OperationExporterRule::visit(luci::CircleNode *node)
+{
+  auto op_idx = _ctx.md.registerBuiltinOpcode(circle_builtin_operator(node),
+                                              circle_custom_code(node), node->op_version());
+
+  std::vector<int32_t> inputs_vec;
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    inputs_vec.push_back(luci::get_tensor_index(node->arg(i)));
+  auto inputs = _ctx.builder.CreateVector(inputs_vec);
+
+  OutputVectorExtractor outputs_vec_extractor;
+  auto outputs_vec = node->accept(&outputs_vec_extractor);
+  auto outputs = _ctx.builder.CreateVector(outputs_vec);
+
+  auto builtin_options = circle_builtin_options(node);
+
+  luci::BuiltinOptionsExtractor builtin_options_extractor(_ctx.builder);
+  auto options_offset = node->accept(&builtin_options_extractor);
+
+  // If node is not CircleCustom, null offset(0) is returned
+  auto custom_options = circle_custom_options(_ctx.builder, node);
+
+  auto op_offset = circle::CreateOperator(_ctx.builder, op_idx, inputs, outputs, builtin_options,
+                                          options_offset, custom_options);
+  _ctx.gd._operators.push_back(op_offset);
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleOperationExporterRule.h b/compiler/luci/export/src/CircleOperationExporterRule.h
new file mode 100644
index 000000000..23e7546cf
--- /dev/null
+++ b/compiler/luci/export/src/CircleOperationExporterRule.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPERATION_EXPORTER_RULE_H__
+#define __CIRCLE_OPERATION_EXPORTER_RULE_H__
+
+#include "CircleOperationExporter.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+struct ExportContext
+{
+  flatbuffers::FlatBufferBuilder &builder;
+  luci::SerializedModelData &md;
+  luci::SerializedGraphData &gd;
+};
+
+class OperationExporterRule final : public luci::CircleNodeMutableVisitor<void>
+{
+public:
+  OperationExporterRule(ExportContext &ctx) : _ctx{ctx}
+  {
+    // DO NOTHING
+  }
+
+public:
+  // Default export rule
+  void visit(luci::CircleNode *node) final;
+
+  // Non-virtual
+  void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
+
+  // Virtual
+  void visit(luci::CircleInput *) final {}
+  void visit(luci::CircleOutput *) final {}
+  void visit(luci::CircleOutputDummy *) final {}
+  void visit(luci::CircleOutputExclude *) final {}
+  // Virtual for multiple-outputs
+  void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {}
+  void visit(luci::CircleCustomOut *) final {}
+  void visit(luci::CircleIfOut *) final {}
+  void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
+  void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
+  void visit(luci::CircleSplitOut *) final {}
+  void visit(luci::CircleSplitVOut *) final {}
+  void visit(luci::CircleTopKV2Out *) final {}
+  void visit(luci::CircleUniqueOut *) final {}
+  void visit(luci::CircleUnpackOut *) final {}
+  void visit(luci::CircleVariable *) final {}
+  void visit(luci::CircleWhileOut *) final {}
+
+protected:
+  ExportContext &_ctx;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_OPERATION_EXPORTER_RULE_H__
diff --git a/compiler/luci/export/src/CircleOps.lst b/compiler/luci/export/src/CircleOps.lst
new file mode 100644
index 000000000..a047f29d7
--- /dev/null
+++ b/compiler/luci/export/src/CircleOps.lst
@@ -0,0 +1,157 @@
+#ifndef CIRCLE_NODE
+#error "Define CIRCLE_NODE"
+#endif // CIRCLE_NODE
+
+#ifndef CIRCLE_VNODE
+#error "Define CIRCLE_VNODE"
+#endif // CIRCLE_VNODE
+
+//
+// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
+//
+// NOTE : CIRCLE_VNODE does not have any additional parameters
+//        because they are not circle builtin operators
+//        Please add parameters when they are needed.
+//
+// CIRCLE_NODE(CircleNode, circle::BuiltinOperator, circle::BuiltinOptions)
+// CIRCLE_VNODE(CircleNode)
+//
+
+CIRCLE_NODE(CircleAbs, BuiltinOperator_ABS, BuiltinOptions_AbsOptions)
+CIRCLE_NODE(CircleAdd, BuiltinOperator_ADD, BuiltinOptions_AddOptions)
+CIRCLE_NODE(CircleAddN, BuiltinOperator_ADD_N, BuiltinOptions_AddNOptions)
+CIRCLE_NODE(CircleArgMax, BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions)
+CIRCLE_NODE(CircleArgMin, BuiltinOperator_ARG_MIN, BuiltinOptions_ArgMinOptions)
+CIRCLE_NODE(CircleAveragePool2D, BuiltinOperator_AVERAGE_POOL_2D , BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleBatchToSpaceND, BuiltinOperator_BATCH_TO_SPACE_ND, BuiltinOptions_BatchToSpaceNDOptions)
+CIRCLE_NODE(CircleBatchMatMul, BuiltinOperator_BATCH_MATMUL, BuiltinOptions_BatchMatMulOptions)
+CIRCLE_NODE(CircleBidirectionalSequenceLSTM, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOptions_BidirectionalSequenceLSTMOptions)
+CIRCLE_NODE(CircleCast, BuiltinOperator_CAST, BuiltinOptions_CastOptions)
+CIRCLE_NODE(CircleCeil, BuiltinOperator_CEIL, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleConcatenation, BuiltinOperator_CONCATENATION, BuiltinOptions_ConcatenationOptions)
+CIRCLE_NODE(CircleConv2D, BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions)
+CIRCLE_NODE(CircleCos, BuiltinOperator_COS, BuiltinOptions_CosOptions)
+CIRCLE_NODE(CircleCustom, BuiltinOperator_CUSTOM, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleDensify, BuiltinOperator_DENSIFY, BuiltinOptions_DensifyOptions)
+CIRCLE_NODE(CircleDepthToSpace, BuiltinOperator_DEPTH_TO_SPACE, BuiltinOptions_DepthToSpaceOptions)
+CIRCLE_NODE(CircleDepthwiseConv2D, BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions)
+CIRCLE_NODE(CircleDequantize, BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions)
+CIRCLE_NODE(CircleDiv, BuiltinOperator_DIV, BuiltinOptions_DivOptions)
+CIRCLE_NODE(CircleElu, BuiltinOperator_ELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleEqual, BuiltinOperator_EQUAL, BuiltinOptions_EqualOptions)
+CIRCLE_NODE(CircleExp, BuiltinOperator_EXP, BuiltinOptions_ExpOptions)
+CIRCLE_NODE(CircleExpandDims, BuiltinOperator_EXPAND_DIMS, BuiltinOptions_ExpandDimsOptions)
+CIRCLE_NODE(CircleFakeQuant, BuiltinOperator_FAKE_QUANT, BuiltinOptions_FakeQuantOptions)
+CIRCLE_NODE(CircleFill, BuiltinOperator_FILL, BuiltinOptions_FillOptions)
+CIRCLE_NODE(CircleFloor, BuiltinOperator_FLOOR, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleFloorDiv, BuiltinOperator_FLOOR_DIV, BuiltinOptions_FloorDivOptions)
+CIRCLE_NODE(CircleFloorMod, BuiltinOperator_FLOOR_MOD, BuiltinOptions_FloorModOptions)
+CIRCLE_NODE(CircleFullyConnected, BuiltinOperator_FULLY_CONNECTED, BuiltinOptions_FullyConnectedOptions)
+CIRCLE_NODE(CircleGather, BuiltinOperator_GATHER, BuiltinOptions_GatherOptions)
+CIRCLE_NODE(CircleGatherNd, BuiltinOperator_GATHER_ND, BuiltinOptions_GatherNdOptions)
+CIRCLE_NODE(CircleGelu, BuiltinOperator_GELU, BuiltinOptions_GeluOptions)
+CIRCLE_NODE(CircleGreater, BuiltinOperator_GREATER, BuiltinOptions_GreaterOptions)
+CIRCLE_NODE(CircleGreaterEqual, BuiltinOperator_GREATER_EQUAL, BuiltinOptions_GreaterEqualOptions)
+CIRCLE_NODE(CircleHardSwish, BuiltinOperator_HARD_SWISH, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleIf, BuiltinOperator_IF, BuiltinOptions_IfOptions)
+CIRCLE_NODE(CircleL2Normalize, BuiltinOperator_L2_NORMALIZATION, BuiltinOptions_L2NormOptions)
+CIRCLE_NODE(CircleL2Pool2D, BuiltinOperator_L2_POOL_2D, BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleLeakyRelu, BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions)
+CIRCLE_NODE(CircleLess, BuiltinOperator_LESS, BuiltinOptions_LessOptions)
+CIRCLE_NODE(CircleLessEqual, BuiltinOperator_LESS_EQUAL, BuiltinOptions_LessEqualOptions)
+CIRCLE_NODE(CircleLocalResponseNormalization, BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, BuiltinOptions_LocalResponseNormalizationOptions)
+CIRCLE_NODE(CircleLog, BuiltinOperator_LOG, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleLogicalAnd, BuiltinOperator_LOGICAL_AND, BuiltinOptions_LogicalAndOptions)
+CIRCLE_NODE(CircleLogicalNot, BuiltinOperator_LOGICAL_NOT, BuiltinOptions_LogicalNotOptions)
+CIRCLE_NODE(CircleLogicalOr, BuiltinOperator_LOGICAL_OR, BuiltinOptions_LogicalOrOptions)
+CIRCLE_NODE(CircleLogistic, BuiltinOperator_LOGISTIC, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleLogSoftmax, BuiltinOperator_LOG_SOFTMAX, BuiltinOptions_LogSoftmaxOptions)
+CIRCLE_NODE(CircleMatrixDiag, BuiltinOperator_MATRIX_DIAG, BuiltinOptions_MatrixDiagOptions)
+CIRCLE_NODE(CircleMaxPool2D, BuiltinOperator_MAX_POOL_2D, BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleMatrixSetDiag, BuiltinOperator_MATRIX_SET_DIAG, BuiltinOptions_MatrixSetDiagOptions)
+CIRCLE_NODE(CircleMaximum, BuiltinOperator_MAXIMUM, BuiltinOptions_MaximumMinimumOptions)
+CIRCLE_NODE(CircleMean, BuiltinOperator_MEAN, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleMinimum, BuiltinOperator_MINIMUM, BuiltinOptions_MaximumMinimumOptions)
+CIRCLE_NODE(CircleMirrorPad, BuiltinOperator_MIRROR_PAD, BuiltinOptions_MirrorPadOptions)
+CIRCLE_NODE(CircleMul, BuiltinOperator_MUL, BuiltinOptions_MulOptions)
+CIRCLE_NODE(CircleNeg, BuiltinOperator_NEG, BuiltinOptions_NegOptions)
+CIRCLE_NODE(CircleNonMaxSuppressionV4, BuiltinOperator_NON_MAX_SUPPRESSION_V4, BuiltinOptions_NonMaxSuppressionV4Options)
+CIRCLE_NODE(CircleNonMaxSuppressionV5, BuiltinOperator_NON_MAX_SUPPRESSION_V5, BuiltinOptions_NonMaxSuppressionV5Options)
+CIRCLE_NODE(CircleNotEqual, BuiltinOperator_NOT_EQUAL, BuiltinOptions_NotEqualOptions)
+CIRCLE_NODE(CircleOneHot, BuiltinOperator_ONE_HOT, BuiltinOptions_OneHotOptions)
+CIRCLE_NODE(CirclePack, BuiltinOperator_PACK, BuiltinOptions_PackOptions)
+CIRCLE_NODE(CirclePad, BuiltinOperator_PAD, BuiltinOptions_PadOptions)
+CIRCLE_NODE(CirclePadV2, BuiltinOperator_PADV2, BuiltinOptions_PadV2Options)
+CIRCLE_NODE(CirclePow, BuiltinOperator_POW, BuiltinOptions_PowOptions)
+CIRCLE_NODE(CirclePRelu, BuiltinOperator_PRELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleQuantize, BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions)
+CIRCLE_NODE(CircleRange, BuiltinOperator_RANGE, BuiltinOptions_RangeOptions)
+CIRCLE_NODE(CircleRank, BuiltinOperator_RANK, BuiltinOptions_RankOptions)
+CIRCLE_NODE(CircleReduceAny, BuiltinOperator_REDUCE_ANY, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceMax, BuiltinOperator_REDUCE_MAX, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceMin, BuiltinOperator_REDUCE_MIN, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceProd, BuiltinOperator_REDUCE_PROD, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleRelu, BuiltinOperator_RELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleRelu6, BuiltinOperator_RELU6, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleReluN1To1, BuiltinOperator_RELU_N1_TO_1, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleReshape, BuiltinOperator_RESHAPE, BuiltinOptions_ReshapeOptions)
+CIRCLE_NODE(CircleResizeBilinear, BuiltinOperator_RESIZE_BILINEAR, BuiltinOptions_ResizeBilinearOptions)
+CIRCLE_NODE(CircleResizeNearestNeighbor, BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, BuiltinOptions_ResizeNearestNeighborOptions)
+CIRCLE_NODE(CircleReverseSequence, BuiltinOperator_REVERSE_SEQUENCE, BuiltinOptions_ReverseSequenceOptions)
+CIRCLE_NODE(CircleReverseV2, BuiltinOperator_REVERSE_V2, BuiltinOptions_ReverseV2Options)
+CIRCLE_NODE(CircleRound, BuiltinOperator_ROUND, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleRsqrt, BuiltinOperator_RSQRT, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleScatterNd, BuiltinOperator_SCATTER_ND, BuiltinOptions_ScatterNdOptions)
+CIRCLE_NODE(CircleSegmentSum, BuiltinOperator_SEGMENT_SUM, BuiltinOptions_SegmentSumOptions)
+CIRCLE_NODE(CircleSelect, BuiltinOperator_SELECT, BuiltinOptions_SelectOptions)
+CIRCLE_NODE(CircleSelectV2, BuiltinOperator_SELECT_V2, BuiltinOptions_SelectV2Options)
+CIRCLE_NODE(CircleShape, BuiltinOperator_SHAPE, BuiltinOptions_ShapeOptions)
+CIRCLE_NODE(CircleSin, BuiltinOperator_SIN, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleSlice, BuiltinOperator_SLICE, BuiltinOptions_SliceOptions)
+CIRCLE_NODE(CircleSoftmax, BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions)
+CIRCLE_NODE(CircleSpaceToBatchND, BuiltinOperator_SPACE_TO_BATCH_ND, BuiltinOptions_SpaceToBatchNDOptions)
+CIRCLE_NODE(CircleSpaceToDepth, BuiltinOperator_SPACE_TO_DEPTH, BuiltinOptions_SpaceToDepthOptions)
+CIRCLE_NODE(CircleSparseToDense, BuiltinOperator_SPARSE_TO_DENSE, BuiltinOptions_SparseToDenseOptions)
+CIRCLE_NODE(CircleSplit, BuiltinOperator_SPLIT, BuiltinOptions_SplitOptions)
+CIRCLE_NODE(CircleSplitV, BuiltinOperator_SPLIT_V, BuiltinOptions_SplitVOptions)
+CIRCLE_NODE(CircleSqrt, BuiltinOperator_SQRT, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleSquare, BuiltinOperator_SQUARE, BuiltinOptions_SquareOptions)
+CIRCLE_NODE(CircleSquaredDifference, BuiltinOperator_SQUARED_DIFFERENCE, BuiltinOptions_SquaredDifferenceOptions)
+CIRCLE_NODE(CircleSqueeze, BuiltinOperator_SQUEEZE, BuiltinOptions_SqueezeOptions)
+CIRCLE_NODE(CircleStridedSlice, BuiltinOperator_STRIDED_SLICE, BuiltinOptions_StridedSliceOptions)
+CIRCLE_NODE(CircleSub, BuiltinOperator_SUB, BuiltinOptions_SubOptions)
+CIRCLE_NODE(CircleSum, BuiltinOperator_SUM, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleSVDF, BuiltinOperator_SVDF, BuiltinOptions_SVDFOptions)
+CIRCLE_NODE(CircleTanh, BuiltinOperator_TANH, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleTile, BuiltinOperator_TILE, BuiltinOptions_TileOptions)
+CIRCLE_NODE(CircleTopKV2, BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options)
+CIRCLE_NODE(CircleTranspose, BuiltinOperator_TRANSPOSE, BuiltinOptions_TransposeOptions)
+CIRCLE_NODE(CircleTransposeConv, BuiltinOperator_TRANSPOSE_CONV, BuiltinOptions_TransposeConvOptions)
+CIRCLE_NODE(CircleUnidirectionalSequenceLSTM, BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOptions_UnidirectionalSequenceLSTMOptions)
+CIRCLE_NODE(CircleUnique, BuiltinOperator_UNIQUE, BuiltinOptions_UniqueOptions)
+CIRCLE_NODE(CircleUnpack, BuiltinOperator_UNPACK, BuiltinOptions_UnpackOptions)
+CIRCLE_NODE(CircleWhere, BuiltinOperator_WHERE, BuiltinOptions_WhereOptions)
+CIRCLE_NODE(CircleWhile, BuiltinOperator_WHILE, BuiltinOptions_WhileOptions)
+CIRCLE_NODE(CircleZerosLike, BuiltinOperator_ZEROS_LIKE, BuiltinOptions_ZerosLikeOptions)
+// Circle Only
+CIRCLE_NODE(CircleBCQFullyConnected, BuiltinOperator_BCQ_FULLY_CONNECTED, BuiltinOptions_BCQFullyConnectedOptions)
+CIRCLE_NODE(CircleBCQGather, BuiltinOperator_BCQ_GATHER, BuiltinOptions_BCQGatherOptions)
+CIRCLE_NODE(CircleInstanceNorm, BuiltinOperator_INSTANCE_NORM, BuiltinOptions_InstanceNormOptions)
+// Virtual node(s)
+CIRCLE_VNODE(CircleBidirectionalSequenceLSTMOut)
+CIRCLE_VNODE(CircleConst)
+CIRCLE_VNODE(CircleInput)
+CIRCLE_VNODE(CircleOutput)
+CIRCLE_VNODE(CircleOutputDummy)
+CIRCLE_VNODE(CircleOutputExclude)
+CIRCLE_VNODE(CircleCustomOut)
+CIRCLE_VNODE(CircleIfOut)
+CIRCLE_VNODE(CircleNonMaxSuppressionV4Out)
+CIRCLE_VNODE(CircleNonMaxSuppressionV5Out)
+CIRCLE_VNODE(CircleSplitOut)
+CIRCLE_VNODE(CircleSplitVOut)
+CIRCLE_VNODE(CircleTopKV2Out)
+CIRCLE_VNODE(CircleUniqueOut)
+CIRCLE_VNODE(CircleUnpackOut)
+CIRCLE_VNODE(CircleVariable)
+CIRCLE_VNODE(CircleWhileOut)
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp
index dc8c2fbc9..97e81076b 100644
--- a/compiler/luci/export/src/CircleTensorExporter.cpp
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -15,7 +15,6 @@
  */
 
 #include "CircleTensorExporter.h"
-#include "TypeBridge.h"
 
 #include <luci/IR/CircleNodes.h>
 #include <luci/IR/CircleNodeVisitor.h>
@@ -29,6 +28,8 @@
 #include <loco/IR/DataTypeTraits.h>
 #include <oops/InternalExn.h>
 
+#include <string.h>
+
 using namespace circle;
 using namespace flatbuffers;
 
@@ -37,10 +38,10 @@ namespace
 
 using namespace luci;
 
-class CircleTensoInfo
+class CircleTensorInfo
 {
 public:
-  CircleTensoInfo() = default;
+  CircleTensorInfo() = default;
 
 public:
   void name(const std::string &name) { _name = name; }
@@ -63,6 +64,12 @@ public:
   luci::CircleQuantParam *quantparam(void) const { return _quantparam; }
   void quantparam(luci::CircleQuantParam *qp) { _quantparam = qp; }
 
+  luci::SparsityParam *sparsityparam(void) const { return _sparsityparam; }
+  void sparsityparam(luci::SparsityParam *sp) { _sparsityparam = sp; }
+
+  bool is_variable(void) const { return _is_variable; }
+  void is_variable(bool v) { _is_variable = v; }
+
 private:
   std::string _name;
 
@@ -72,9 +79,34 @@ private:
 
   luci::CircleConst *_content = nullptr;
   luci::CircleQuantParam *_quantparam = nullptr;
+  luci::SparsityParam *_sparsityparam = nullptr;
+
+  bool _is_variable = false;
 };
 
-using CircleTensorContext = std::vector<CircleTensoInfo>;
+class CircleTensorContext
+{
+public:
+  CircleTensorContext() = default;
+
+public:
+  void emplace_back(CircleTensorInfo &ti)
+  {
+    assert(_names.find(ti.name()) == _names.end());
+    _tis.emplace_back(ti);
+    _names.insert(ti.name());
+  }
+  size_t size(void) const { return _tis.size(); }
+  std::vector<CircleTensorInfo>::iterator begin(void) { return _tis.begin(); }
+  std::vector<CircleTensorInfo>::iterator end(void) { return _tis.end(); }
+
+public:
+  bool exist(const std::string &name) const { return _names.find(name) != _names.end(); }
+
+private:
+  std::vector<CircleTensorInfo> _tis;
+  std::set<std::string> _names;
+};
 
 struct NoOpDetector final : public luci::CircleNodeMutableVisitor<bool>
 {
@@ -93,22 +125,32 @@ void allocateCircleTensorInfo(CircleNode *node, CircleTensorContext &ctx)
 
   auto tensor_index = static_cast<CircleTensorIndex>(ctx.size());
   // TODO Use Graph-level metadata for Input & Output
-  // auto tensor_name = "t_" + std::to_string(tensor_index);
   std::string tensor_name = node->name();
-  if (tensor_name.empty())
-    tensor_name = "t_" + std::to_string(tensor_index);
+  // NOTE tensor_name maybe empty. this assertion will alert when this happens.
+  //      currently we require tensor should have a name.
+  // TODO if this breaks, fix the cause or permit empty tensor_name.
+  assert(!tensor_name.empty());
+  if (ctx.exist(tensor_name))
+  {
+    // NOTE this should assign unique name for a Tensor.
+    tensor_name = tensor_name + "_" + std::to_string(tensor_index);
+    assert(!ctx.exist(tensor_name));
+  }
   INFO(l) << "[luci] Tensor for " << tensor_name << ": " << tensor_index << std::endl;
 
-  CircleTensoInfo tensor_info;
+  CircleTensorInfo tensor_info;
 
   tensor_info.name(tensor_name);
-  tensor_info.dtype(to_circle_tensortype(luci::node_dtype(node)));
+  tensor_info.dtype(to_circle_tensortype(node->dtype()));
   if (node->shape_status() == ShapeStatus::VALID)
-    tensor_info.shape(to_shape_description(luci::node_shape(node)));
+    tensor_info.shape(to_shape_description(node));
   tensor_info.shape_status(node->shape_status());
 
   tensor_info.content(dynamic_cast<luci::CircleConst *>(node));
   tensor_info.quantparam(node->quantparam());
+  tensor_info.sparsityparam(node->sparsityparam());
+
+  tensor_info.is_variable(dynamic_cast<luci::CircleVariable *>(node) != nullptr);
 
   set_tensor_index(node, tensor_index);
 
@@ -135,19 +177,55 @@ private:
   }
 
 public:
+  bool visit(luci::CircleBidirectionalSequenceLSTMOut *) final { return true; }
+  bool visit(luci::CircleCustomOut *) final { return true; }
   bool visit(luci::CircleIfOut *) final { return true; }
+  bool visit(luci::CircleNonMaxSuppressionV4Out *) final { return true; }
+  bool visit(luci::CircleNonMaxSuppressionV5Out *) final { return true; }
   bool visit(luci::CircleSplitOut *) final { return true; }
   bool visit(luci::CircleSplitVOut *) final { return true; }
   bool visit(luci::CircleTopKV2Out *) final { return true; }
   bool visit(luci::CircleUnpackOut *) final { return true; }
+  bool visit(luci::CircleUniqueOut *) final { return true; }
   bool visit(luci::CircleWhileOut *) final { return true; }
 
+  bool visit(luci::CircleBidirectionalSequenceLSTM *node) final
+  {
+    if (node->merge_outputs())
+    {
+      store_outputs(node, 1);
+    }
+    else
+    {
+      store_outputs(node, 2);
+    }
+    return true;
+  }
+
+  bool visit(luci::CircleCustom *node) final
+  {
+    store_outputs(node, node->numOutputs());
+    return true;
+  }
+
   bool visit(luci::CircleIf *node) final
   {
     store_outputs(node, node->output_count());
     return true;
   }
 
+  bool visit(luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    store_outputs(node, 2);
+    return true;
+  }
+
+  bool visit(luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    store_outputs(node, 3);
+    return true;
+  }
+
   bool visit(luci::CircleSplit *node) final
   {
     store_outputs(node, uint32_t(node->num_split()));
@@ -172,6 +250,12 @@ public:
     return true;
   }
 
+  bool visit(luci::CircleUnique *node) final
+  {
+    store_outputs(node, 2);
+    return true;
+  }
+
   bool visit(luci::CircleWhile *node) final
   {
     store_outputs(node, node->output_count());
@@ -226,7 +310,26 @@ flatbuffers::Offset<Vector<int32_t>> encodeShape(FlatBufferBuilder &builder,
                                                  const ShapeDescription &shape)
 {
   assert(shape._rank_known && "unknown number of dimensions is not supported");
-  return builder.CreateVector(shape._dims);
+
+  std::vector<int32_t> encoded_shape;
+  encoded_shape.resize(shape._dims.size());
+  for (uint32_t i = 0; i < shape._dims.size(); ++i)
+    encoded_shape.at(i) = shape._dims.at(i) == -1 ? 1 : shape._dims.at(i);
+
+  return builder.CreateVector(encoded_shape);
+}
+
+flatbuffers::Offset<Vector<int32_t>> encodeShapeSignature(FlatBufferBuilder &builder,
+                                                          const ShapeDescription &shape)
+{
+  assert(shape._rank_known && "unknown number of dimensions is not supported");
+
+  // shape_signature is set if and only if at least one of dimensions are unknown.
+  for (uint32_t i = 0; i < shape._dims.size(); ++i)
+    if (shape._dims.at(i) == -1)
+      return builder.CreateVector(shape._dims);
+
+  return flatbuffers::Offset<Vector<int32_t>>();
 }
 
 flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder)
@@ -259,12 +362,62 @@ flatbuffers::Offset<circle::Buffer> encodeOpBufferByDType(FlatBufferBuilder &bui
 }
 
 template <>
+flatbuffers::Offset<circle::Buffer>
+encodeOpBufferByDType<loco::DataType::STRING>(FlatBufferBuilder &builder, luci::CircleConst *c)
+{
+  const uint32_t count = c->size<loco::DataType::STRING>();
+  uint32_t raw_size = sizeof(int32_t) * (count + 2);
+  for (uint32_t i = 0; i < count; ++i)
+  {
+    auto &value = c->at<loco::DataType::STRING>(i);
+    raw_size += value.length();
+  }
+
+  // serialize string data
+  //   int32_t count
+  //   int32_t offsets[count + 1]
+  //   string  values[count]
+  std::vector<uint8_t> raw_data;
+  raw_data.reserve(raw_size);
+
+  auto *i32d = reinterpret_cast<int32_t *>(raw_data.data());
+  int32_t start = sizeof(int32_t) * (count + 2);
+  int32_t offset = start;
+  std::vector<int32_t> offsets;
+
+  *i32d++ = count;
+  *i32d++ = start;
+  offsets.push_back(start);
+  for (uint32_t i = 0; i < count; ++i)
+  {
+    auto &value = c->at<loco::DataType::STRING>(i);
+    offset += value.length();
+    *i32d++ = offset;
+    offsets.push_back(offset);
+  }
+
+  auto *data = reinterpret_cast<uint8_t *>(i32d);
+  for (uint32_t i = 0; i < count; ++i)
+  {
+    int32_t length = offsets[i + 1] - offsets[i];
+    auto &value = c->at<loco::DataType::STRING>(i);
+    memcpy(data, value.c_str(), length);
+    data += length;
+  }
+
+  auto array_offset = builder.CreateVector(reinterpret_cast<uint8_t *>(raw_data.data()), raw_size);
+  return CreateBuffer(builder, array_offset);
+}
+
+template <>
 flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, luci::CircleConst *c)
 {
   switch (c->dtype())
   {
     case loco::DataType::FLOAT32:
       return encodeOpBufferByDType<loco::DataType::FLOAT32>(builder, c);
+    case loco::DataType::S8:
+      return encodeOpBufferByDType<loco::DataType::S8>(builder, c);
     case loco::DataType::S16:
       return encodeOpBufferByDType<loco::DataType::S16>(builder, c);
     case loco::DataType::S32:
@@ -275,10 +428,18 @@ flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, l
       return encodeOpBufferByDType<loco::DataType::U8>(builder, c);
     case loco::DataType::BOOL:
       return encodeOpBufferByDType<loco::DataType::BOOL>(builder, c);
+    case loco::DataType::STRING:
+      return encodeOpBufferByDType<loco::DataType::STRING>(builder, c);
     default:
       break;
   }
 
+  // NOTE loco::DataType::FLOAT16 is added but we do not export this type
+  //      as backends currently don't support this type.
+  //      currently this is supported only for "Tensor(Float16) - Dequantize"
+  //      sequence so that after 'fold_dequantize' option this Tensor is
+  //      converted to FLOAT32.
+
   INTERNAL_EXN_V("Unsupported datatype", oops::to_uint32(c->dtype()));
 }
 
@@ -308,26 +469,147 @@ encodeQuantizationParameters(FlatBufferBuilder &builder, luci::CircleQuantParam
                                               0, quantparam->quantized_dimension);
 }
 
-void exportOpDefinedTensor(const CircleTensoInfo &info, FlatBufferBuilder &builder,
+flatbuffers::Offset<circle::SparsityParameters>
+encodeSparsityParameters(FlatBufferBuilder &builder, luci::SparsityParam *sparsityparam)
+{
+  if (sparsityparam == nullptr)
+    return 0;
+
+  std::vector<flatbuffers::Offset<circle::DimensionMetadata>> dim_metadata_vec;
+  auto luci_dim_metadata = sparsityparam->dim_metadata;
+  for (auto it : luci_dim_metadata)
+  {
+    // array_segments
+    auto circle_array_segments = to_circle_sparse_index_vector(builder, it.array_segments());
+    auto circle_array_segments_type =
+      to_circle_sparse_index_vector_type(it.array_segments().type());
+
+    // array_indices
+    auto circle_array_indices = to_circle_sparse_index_vector(builder, it.array_indices());
+    auto circle_array_indices_type = to_circle_sparse_index_vector_type(it.array_indices().type());
+    auto dim_metadata = circle::CreateDimensionMetadata(
+      builder, to_circle_dimensiontype(it.format()), it.dense_size(), circle_array_segments_type,
+      circle_array_segments, circle_array_indices_type, circle_array_indices);
+    dim_metadata_vec.emplace_back(dim_metadata);
+  }
+
+  return circle::CreateSparsityParametersDirect(builder, &sparsityparam->traversal_order,
+                                                &sparsityparam->block_map, &dim_metadata_vec);
+}
+
+template <loco::DataType DT> bool has_same_elements(luci::CircleConst *lhs, luci::CircleConst *rhs)
+{
+  assert(lhs->dtype() == DT);
+  assert(rhs->dtype() == DT);
+  assert(lhs->size<DT>() == rhs->size<DT>());
+
+  for (uint32_t i = 0; i < lhs->size<DT>(); ++i)
+    if (lhs->at<DT>(i) != rhs->at<DT>(i))
+      return false;
+  return true;
+}
+
+bool has_same_values(luci::CircleConst *lhs, luci::CircleConst *rhs)
+{
+  if (lhs->dtype() != rhs->dtype())
+    return false;
+
+  if (lhs->rank() != rhs->rank())
+    return false;
+
+  for (uint32_t i = 0; i < lhs->rank(); ++i)
+    if (!(lhs->dim(i) == rhs->dim(i)))
+      return false;
+
+  switch (lhs->dtype())
+  {
+    case loco::DataType::FLOAT32:
+      return has_same_elements<loco::DataType::FLOAT32>(lhs, rhs);
+
+    case loco::DataType::S8:
+      return has_same_elements<loco::DataType::S8>(lhs, rhs);
+
+    case loco::DataType::S16:
+      return has_same_elements<loco::DataType::S16>(lhs, rhs);
+
+    case loco::DataType::S32:
+      return has_same_elements<loco::DataType::S32>(lhs, rhs);
+
+    case loco::DataType::S64:
+      return has_same_elements<loco::DataType::S64>(lhs, rhs);
+
+    case loco::DataType::U8:
+      return has_same_elements<loco::DataType::U8>(lhs, rhs);
+
+    case loco::DataType::BOOL:
+      return has_same_elements<loco::DataType::BOOL>(lhs, rhs);
+
+    default:
+      break;
+  }
+
+  return false;
+}
+
+uint32_t get_buffer_id(FlatBufferBuilder &builder, SerializedModelData &md, luci::CircleConst *node)
+{
+  if (node != nullptr)
+  {
+    // When buffer with same values is found, use the buffer id.
+    for (auto key_value : md._cached_buffer_id)
+    {
+      if (has_same_values(key_value.first, node))
+        return key_value.second;
+    }
+
+    // When buffer with same values is not found, generate new buffer
+    auto buffer = encodeOpBuffer(builder, node);
+
+    auto buffer_id = static_cast<uint32_t>(md._buffers.size());
+    md._buffers.push_back(buffer);
+
+    // Cache the newly generated buffer id
+    md._cached_buffer_id.insert({node, buffer_id});
+
+    return buffer_id;
+  }
+  else
+  {
+    // When there is no CircleConst, there is nothing to cache.
+    // So return new buffer id.
+    auto buffer = encodeOpBuffer(builder);
+
+    auto buffer_id = static_cast<uint32_t>(md._buffers.size());
+    md._buffers.push_back(buffer);
+
+    return buffer_id;
+  }
+}
+
+void exportOpDefinedTensor(const CircleTensorInfo &info, FlatBufferBuilder &builder,
                            SerializedModelData &md, SerializedGraphData &gd)
 {
   // Create and register output tensor shape
   flatbuffers::Offset<Vector<int32_t>> shape_offset;
+  flatbuffers::Offset<Vector<int32_t>> shape_signature_offset;
   if (info.shape_status() == ShapeStatus::VALID)
+  {
     shape_offset = encodeShape(builder, info.shape());
-
-  // encode and register output tensor buffer
-  auto buffer =
-      info.content() == nullptr ? encodeOpBuffer(builder) : encodeOpBuffer(builder, info.content());
+    shape_signature_offset = encodeShapeSignature(builder, info.shape());
+  }
 
   auto quantparam = encodeQuantizationParameters(builder, info.quantparam());
 
-  auto buffer_id = static_cast<uint32_t>(md._buffers.size());
-  md._buffers.push_back(buffer);
+  auto sparsityparam = encodeSparsityParameters(builder, info.sparsityparam());
+
+  auto buffer_id = get_buffer_id(builder, md, info.content());
 
   auto name_offset = builder.CreateString(info.name());
+
+  auto is_variable = info.is_variable();
+
   auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset,
-                                    quantparam, /*is_variable*/ false);
+                                    quantparam, is_variable, sparsityparam, shape_signature_offset);
   gd._tensors.push_back(tensor_offset);
 }
 
diff --git a/compiler/luci/export/src/Optimize.cpp b/compiler/luci/export/src/Optimize.cpp
index 6fa50b564..e59f15204 100644
--- a/compiler/luci/export/src/Optimize.cpp
+++ b/compiler/luci/export/src/Optimize.cpp
@@ -17,8 +17,8 @@
 #include "Optimize.h"
 #include "ProgressReporter.h"
 
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
+#include <luci/Pass/CircleShapeInferencePass.h>
+#include <luci/Pass/CircleTypeInferencePass.h>
 
 #include <logo/Phase.h>
 
@@ -32,8 +32,8 @@ void optimize(loco::Graph *g)
   logo::Phase phase;
   {
     // prepare type and shape before optimization
-    phase.emplace_back(std::make_unique<TypeInferencePass>());
-    phase.emplace_back(std::make_unique<ShapeInferencePass>());
+    phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+    phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
 
     // TODO add more optimization passes (with a knob)
   }
diff --git a/compiler/luci/export/src/ProgressReporter.h b/compiler/luci/export/src/ProgressReporter.h
index e91f42592..5d55bcd07 100644
--- a/compiler/luci/export/src/ProgressReporter.h
+++ b/compiler/luci/export/src/ProgressReporter.h
@@ -28,7 +28,7 @@ class ProgressReporter : public logo::PhaseEventListener
 {
 public:
   ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
-      : _graph{graph}, _strategy{strategy}
+    : _graph{graph}, _strategy{strategy}
   {
     // DO NOTHING
   }
diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h
index 251daa0ea..136a8ac49 100644
--- a/compiler/luci/export/src/SerializedData.h
+++ b/compiler/luci/export/src/SerializedData.h
@@ -19,9 +19,13 @@
 
 #include <mio/circle/schema_generated.h>
 
-#include <vector>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/ExecutionPlanTable.h>
 
+#include <vector>
+#include <string>
 #include <unordered_map>
+#include <map>
 
 namespace luci
 {
@@ -45,6 +49,40 @@ struct OpCode
   }
 };
 
+class CircleExportMetadata
+{
+public:
+  void source_table(const std::map<uint32_t, std::string> &table) { _source_table = table; }
+
+  void add_op_table(uint32_t node_id, uint32_t source_id)
+  {
+    // Model with multiple subgraph may have duplicated node id.
+    // For now, as we do not consider about multiple subgraph in profiling,
+    // just ignore those cases and support them in the future.
+    if (_op_table.find(node_id) == _op_table.end())
+      _op_table.emplace(node_id, std::set<uint32_t>());
+    _op_table.at(node_id).emplace(source_id);
+  }
+
+  void add_execution_plan_table(uint32_t node_id,
+                                const std::vector<uint32_t> &execution_plan_inform)
+  {
+    _execution_plan_table[node_id] = execution_plan_inform;
+  }
+
+public:
+  const std::vector<uint8_t> encoded_source_table(void);
+  const std::vector<uint8_t> encoded_op_table(void);
+  const std::vector<uint8_t> encoded_execution_plan_table(void);
+
+private:
+  std::map<uint32_t, std::string> _source_table;
+  std::map<uint32_t, std::set<uint32_t>> _op_table;
+  // _exec_plan_table stores for node with node_id order of execution, and memory offsets:
+  // first go execution order, then memory offsets for node output tensors.
+  luci::ExecutionPlanTable _execution_plan_table;
+};
+
 } // namespace luci
 
 namespace std
@@ -61,7 +99,7 @@ namespace luci
 {
 
 /**
- * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ * @brief Record the information of T/F Lite SubGraph and its mapping to loco
  */
 struct SubGraphContext
 {
@@ -83,14 +121,18 @@ struct SerializedModelData final
 
   std::unordered_map<OpCode, uint32_t> _operator_codes;
   std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
+  CircleExportMetadata _metadata;
+
+  // This is used for removing buffers with same values
+  std::map<luci::CircleConst *, uint32_t> _cached_buffer_id;
 
   /**
    * @brief if opcode is not registered in table of opcodes add it
    * @param builtin_code
    * @return idx of opcode in table of opcodes (see schema)
    */
-  uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code, const int32_t op_version);
-  uint32_t registerCustomOpcode(const std::string &custom_op);
+  uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
+                                 const std::string &custom_code, const int32_t op_version);
 };
 
 // Prerequisites for circle::Model object creation
diff --git a/compiler/luci/export/src/TypeBridge.cpp b/compiler/luci/export/src/TypeBridge.cpp
deleted file mode 100644
index 9ccd52376..000000000
--- a/compiler/luci/export/src/TypeBridge.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TypeBridge.h"
-
-#include "CircleExporterUtils.h"
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleTypeInference.h>
-#include <luci/Service/CircleShapeInference.h>
-
-#include <loco/Service/TypeInference.h>
-#include <loco/Service/ShapeInference.h>
-
-namespace
-{
-
-/**
- * @brief CopySelector will return condition of copy shape/type inference to node
- */
-struct CopySelector final : public luci::CircleNodeVisitor<bool>
-{
-  // return false(don't copy) for nodes that provides shape/type from nature
-  bool visit(const luci::CircleInput *) final { return false; }
-  bool visit(const luci::CircleConst *) final { return false; }
-
-  // default is copy attributes
-  bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace
-
-namespace luci
-{
-
-loco::TensorShape node_shape(CircleNode *node)
-{
-  loco::TensorShape shape;
-
-  shape.rank(node->rank());
-  for (uint32_t r = 0; r < node->rank(); ++r)
-  {
-    shape.dim(r) = loco::Dimension(node->dim(r).value());
-  }
-  return shape;
-}
-
-loco::DataType node_dtype(CircleNode *node) { return node->dtype(); }
-
-void copy_shape_dtype(loco::Graph *graph)
-{
-  /**
-   * @note We will iterate all the nodes in the graph to include dangle nodes
-   */
-  auto nodes = graph->nodes();
-  for (uint32_t n = 0; n < nodes->size(); ++n)
-  {
-    auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
-
-    CopySelector cs;
-    if (node->accept(&cs))
-    {
-      // NOTE not all nodes have infered shape/dtype: multiple outs may not be
-      //      visited when outputs are not used
-      // TODO fix shape inference traversal
-      // NOTE when loco supports multiple outputs in nature this issue should be
-      //      resolved also
-
-      if (loco::dtype_known(node))
-      {
-        node->dtype(loco::dtype_get(node));
-      }
-
-      if (loco::shape_known(node))
-      {
-        auto shape = loco::shape_get(node).as<loco::TensorShape>();
-        node->rank(shape.rank());
-        for (uint32_t r = 0; r < shape.rank(); ++r)
-        {
-          node->dim(r) = loco::Dimension(shape.dim(r).value());
-        }
-
-        // ShapeStatus should be update only when the status was UNDEFINED
-        if (node->shape_status() == ShapeStatus::UNDEFINED)
-          node->shape_status(ShapeStatus::VALID);
-      }
-    }
-  }
-}
-
-} // namespace luci
diff --git a/compiler/luci/export/src/TypeBridge.h b/compiler/luci/export/src/TypeBridge.h
deleted file mode 100644
index a63fbce54..000000000
--- a/compiler/luci/export/src/TypeBridge.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TYPE_BRIDGE_H__
-#define __TYPE_BRIDGE_H__
-
-#include <luci/IR/CircleNode.h>
-
-#include <loco.h>
-
-namespace luci
-{
-
-/**
- * @brief  node_shape() will return loco::TensorShape of CircleNode
- */
-loco::TensorShape node_shape(CircleNode *node);
-
-/**
- * @brief  node_dtype() will return loco::DataType of CircleNode
- */
-loco::DataType node_dtype(CircleNode *node);
-
-/**
- * @brief copy_shape_dtype() will copy shape and dtype inference data to CircleNode
- */
-void copy_shape_dtype(loco::Graph *graph);
-
-} // namespace luci
-
-#endif // __TYPE_BRIDGE_H__
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
index 2ae00b837..2e7e88118 100644
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -2,18 +2,28 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(luci_import SHARED ${SOURCES})
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_import ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_import PRIVATE src)
 target_include_directories(luci_import PUBLIC include)
 target_link_libraries(luci_import PUBLIC luci_lang)
-target_link_libraries(luci_import PUBLIC mio_circle)
+target_link_libraries(luci_import PUBLIC luci_profile)
+target_link_libraries(luci_import PUBLIC luci_plan)
+target_link_libraries(luci_import PUBLIC mio_circle06)
 target_link_libraries(luci_import PRIVATE luci_env)
 target_link_libraries(luci_import PRIVATE luci_log)
 target_link_libraries(luci_import PRIVATE luci_logex)
 target_link_libraries(luci_import PRIVATE nncc_common)
 target_link_libraries(luci_import PRIVATE locop)
+target_link_libraries(luci_import PRIVATE foder)
 target_link_libraries(luci_import PRIVATE oops)
+target_link_libraries(luci_import PRIVATE mio_circle06_helper)
 install(TARGETS luci_import DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
 
 if(NOT ENABLE_TEST)
   return()
@@ -24,4 +34,3 @@ nnas_find_package(GTest REQUIRED)
 GTest_AddTest(luci_import_test ${TESTS})
 target_include_directories(luci_import_test PRIVATE src)
 target_link_libraries(luci_import_test luci_import)
-target_link_libraries(luci_import_test oops)
diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h
index 3d85b9e35..a0519f661 100644
--- a/compiler/luci/import/include/luci/Import/CircleReader.h
+++ b/compiler/luci/import/include/luci/Import/CircleReader.h
@@ -23,6 +23,7 @@
 #include <luci/IR/AttrPadding.h>
 #include <luci/IR/CircleNode.h>
 #include <luci/IR/CircleQuantParam.h>
+#include <luci/IR/SparsityParam.h>
 
 #include <loco.h>
 
@@ -34,65 +35,92 @@
 namespace luci
 {
 
-bool is_valid(const circle::OperatorCodeT &opcode);
-bool is_custom(const circle::OperatorCodeT &opcode);
-std::string opcode_name(const circle::OperatorCodeT &opcode);
-const char *tensor_name(const circle::TensorT &tensor);
-const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor);
+const char *tensor_name(const circle::Tensor *tensor);
+const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor);
 
 loco::DataType luci_datatype(circle::TensorType type);
 FusedActFunc luci_actfunc(const circle::ActivationFunctionType type);
 Padding luci_padding(const circle::Padding padding);
 MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode);
+luci::CircleFullyConnected::WeightsFormat
+luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format);
 std::unique_ptr<CircleQuantParam>
-luci_quantparam(const circle::QuantizationParametersT *quantization);
+luci_quantparam(const circle::QuantizationParameters *quantization);
 
 /// @brief Copy common tensor attributes such as name, type, etc. to node.
-void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node);
+void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node);
+
+std::string fb_string2std_string(const flatbuffers::String *fb_str);
 
 /**
- * @brief Loads Circle file and provides helpers to access attributes
+ * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity
  */
-class CircleReader
+template <typename T> class VectorWrapper
 {
+public:
+  explicit VectorWrapper(const flatbuffers::Vector<T> *ptr);
+
+  const T *data() const;
+  uint32_t size() const;
+
+  using iterator = typename flatbuffers::Vector<T>::const_iterator;
+  iterator begin() const;
+  iterator end() const;
+
+  using value_type = typename flatbuffers::Vector<T>::return_type;
+  value_type at(uint32_t i) const;
+  value_type operator[](uint32_t i) const;
+
+  bool null() const;
+  bool empty() const;
+
 private:
-  using CircleBuffers_t = std::vector<std::unique_ptr<circle::BufferT>>;
-  using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>;
-  using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>;
-  using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>;
+  const flatbuffers::Vector<T> *_vector;
+};
 
-  using CircleSubGraphsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
-  using CircleTensorsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
+template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec)
+{
+  return VectorWrapper<T>(vec);
+}
 
-public:
-  CircleReader() = default;
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class CircleReader
+{
+private: // direct API
+  using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>;
+  using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>;
+  using CircleOperators = VectorWrapper<flatbuffers::Offset<circle::Operator>>;
+  using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>;
+  using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>;
 
 public:
-  const CircleOperatorCodes_t &opcodes() const { return _model->operator_codes; }
-  const CircleBuffers_t &buffers() const { return _model->buffers; }
-  const CircleTensors_t &tensors() const { return _current_subgraph->tensors; }
-  const CircleOperators_t &operators() const { return _current_subgraph->operators; }
-  const std::vector<int32_t> &inputs() const { return _current_subgraph->inputs; }
-  const std::vector<int32_t> &outputs() const { return _current_subgraph->outputs; }
-  const std::string &name() const { return _current_subgraph->name; }
+  CircleReader() = default;
 
-  const CircleTensorsPtr_t *tensors_ptr() const { return _tensors_ptr; }
+public: // direct API
+  CircleOperatorCodes opcodes() const { return wrap(_model->operator_codes()); }
+  CircleBuffers buffers() const { return wrap(_model->buffers()); }
+  CircleTensors tensors() const { return wrap(_current_subgraph->tensors()); }
+  CircleOperators operators() const { return wrap(_current_subgraph->operators()); }
+  VectorWrapper<int32_t> inputs() const { return wrap(_current_subgraph->inputs()); }
+  VectorWrapper<int32_t> outputs() const { return wrap(_current_subgraph->outputs()); }
+  std::string name() const { return fb_string2std_string(_current_subgraph->name()); }
+  circle::DataFormat data_format() const { return _current_subgraph->data_format(); }
+  CircleMetadataSet metadata() const { return wrap(_model->metadata()); }
 
-  uint32_t num_subgraph() const { return _model->subgraphs.size(); }
+  uint32_t num_subgraph() const { return wrap(_model->subgraphs()).size(); }
 
-  circle::BuiltinOperator builtin_code(const circle::OperatorT &op) const;
-  std::string opcode_name(const circle::OperatorT &op) const;
+  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
+  std::string opcode_name(const circle::Operator *op) const;
 
 public:
   bool parse(const circle::Model *model);
   bool select_subgraph(uint32_t subgraph);
 
 private:
-  std::unique_ptr<const circle::ModelT> _model;
-  const circle::SubGraphT *_current_subgraph{nullptr};
-
-  const circle::Model *_model_ptr{nullptr};
-  const CircleTensorsPtr_t *_tensors_ptr{nullptr};
+  const circle::Model *_model{nullptr};
+  const circle::SubGraph *_current_subgraph{nullptr};
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilder.h b/compiler/luci/import/include/luci/Import/GraphBuilder.h
index 548264dac..0db612652 100644
--- a/compiler/luci/import/include/luci/Import/GraphBuilder.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilder.h
@@ -33,7 +33,13 @@ class GraphBuilder : public GraphBuilderBase
 public:
   virtual ~GraphBuilder() = default;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+  // common validate method to check number of inputs and single output
+  bool validate(const ValidateArgs &args, size_t input_cnt) const
+  {
+    return (args.op.inputs.size() == input_cnt && args.op.outputs.size() == 1);
+  }
+
+  CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
 
 private:
   virtual CircleNode *build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderBase.h b/compiler/luci/import/include/luci/Import/GraphBuilderBase.h
index a0cd008e0..ddd4445cd 100644
--- a/compiler/luci/import/include/luci/Import/GraphBuilderBase.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderBase.h
@@ -19,6 +19,8 @@
 
 #include "GraphBuilderContext.h"
 
+#include <luci/IR/CircleNode.h>
+
 #include <mio/circle/schema_generated.h>
 
 namespace luci
@@ -38,7 +40,7 @@ struct GraphBuilderBase
   };
 
   virtual bool validate(const ValidateArgs &) const = 0;
-  virtual void build(const circle::OperatorT &op, GraphBuilderContext *context) const = 0;
+  virtual CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const = 0;
 
   virtual ~GraphBuilderBase() = default;
 };
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderContext.h b/compiler/luci/import/include/luci/Import/GraphBuilderContext.h
index 72e237abc..1673df43d 100644
--- a/compiler/luci/import/include/luci/Import/GraphBuilderContext.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderContext.h
@@ -71,7 +71,7 @@ class GraphBuilderContext
 public:
   GraphBuilderContext(loco::Graph *g, CircleReader *reader, IndexNodeFinder *nodefinder,
                       IndexTensorOutputs *tensoroutputs)
-      : _g(g), _reader(reader), _indexnodefinder(nodefinder), _indextensoroutputs(tensoroutputs)
+    : _g(g), _reader(reader), _indexnodefinder(nodefinder), _indextensoroutputs(tensoroutputs)
   {
     // DO NOTHING
   }
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderMultiOutput.h b/compiler/luci/import/include/luci/Import/GraphBuilderMultiOutput.h
new file mode 100644
index 000000000..6e8791b62
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderMultiOutput.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
+#define __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
+
+#include "GraphBuilderContext.h"
+#include "GraphBuilderBase.h"
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Base of general multiple outputs graph builder(e.g., CircleIfGraphBuilder)
+ */
+class GraphBuilderMultiOutput : public GraphBuilderBase
+{
+public:
+  virtual ~GraphBuilderMultiOutput() = default;
+
+  CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+
+protected:
+  struct BuildNodeArgs
+  {
+    BuildNodeArgs(const circle::OperatorT &o, GraphBuilderContext *c,
+                  const std::vector<CircleNode *> &i)
+      : op(o), context(c), input_nodes(i)
+    {
+    }
+
+    const circle::OperatorT &op;
+    GraphBuilderContext *context;
+    const std::vector<CircleNode *> &input_nodes;
+  };
+
+  struct BuildOutArgs
+  {
+    BuildOutArgs(CircleNode *nd, uint32_t n) : node(nd), index(n) {}
+
+    CircleNode *node;
+    uint32_t index;
+  };
+
+private:
+  virtual CircleNode *build_node(const BuildNodeArgs &) const = 0;
+  virtual CircleNode *build_out(const BuildOutArgs &) const = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_GRAPH_BUILDER_MULTI_OUTPUT_H__
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
index b8dc22fdd..93e34a56b 100644
--- a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
@@ -18,6 +18,7 @@
 #define __LUCI_IMPORT_GRAPH_BUILDER_REGISTRY_H__
 
 #include "GraphBuilderBase.h"
+#include "NodeBuilder.h"
 
 #include <map>
 
@@ -32,6 +33,11 @@ struct GraphBuilderSource
    * @brief Returns registered GraphBuilder pointer for operator (nullptr if not present)
    */
   virtual const GraphBuilderBase *lookup(const circle::BuiltinOperator &op) const = 0;
+
+  /**
+   * @brief Returns registered NodeBuilderBase pointer for type (nullptr if not present)
+   */
+  virtual const NodeBuilderBase *lookup(const NodeBuilderType type) const = 0;
 };
 
 /**
@@ -61,6 +67,17 @@ public:
     return _builder_map.at(op).get();
   }
 
+  /**
+   * @brief Returns registered NodeBuilderBase pointer for type or nullptr if not registered
+   */
+  const NodeBuilderBase *lookup(const NodeBuilderType type) const final
+  {
+    if (_node_builders.find(type) == _node_builders.end())
+      return (_parent == nullptr) ? nullptr : _parent->lookup(type);
+
+    return _node_builders.at(type).get();
+  }
+
   static GraphBuilderRegistry &get()
   {
     static GraphBuilderRegistry me;
@@ -73,11 +90,17 @@ public:
     _builder_map[op] = std::move(builder);
   }
 
+  void add(std::unique_ptr<NodeBuilderBase> &&builder)
+  {
+    _node_builders[builder->builder_type()] = std::move(builder);
+  }
+
 private:
   const GraphBuilderSource *_parent = nullptr;
 
 private:
   std::map<const circle::BuiltinOperator, std::unique_ptr<GraphBuilderBase>> _builder_map;
+  std::map<const NodeBuilderType, std::unique_ptr<NodeBuilderBase>> _node_builders;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/NodeBuilder.h b/compiler/luci/import/include/luci/Import/NodeBuilder.h
new file mode 100644
index 000000000..05f533f38
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/NodeBuilder.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_NODE_BUILDER_H__
+#define __LUCI_IMPORT_NODE_BUILDER_H__
+
+#include "GraphBuilderContext.h"
+#include "GraphBuilderBase.h"
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Tensor types which requires separated node
+ */
+enum class NodeBuilderType
+{
+  BUFFER,
+  // TODO Extend this struct here if needed to add new type of NodeBuilderBase
+};
+
+/**
+ * @brief Creates nodes from given Tensor and context
+ */
+class NodeBuilderBase
+{
+public:
+  virtual CircleNode *build(TensorIndex tensor_idx, GraphBuilderContext *context) const = 0;
+  virtual NodeBuilderType builder_type() const = 0;
+  virtual ~NodeBuilderBase() = default;
+};
+
+/**
+ * @brief Placeholder for builders of tensors with different types
+ */
+template <NodeBuilderType Type> class TypedNodeBuilder : public NodeBuilderBase
+{
+public:
+  NodeBuilderType builder_type() const final { return Type; }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_NODE_BUILDER_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h
index 0b21d380f..e8c8d0aae 100644
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -27,6 +27,7 @@
 #include "Nodes/CircleBatchToSpaceND.h"
 #include "Nodes/CircleBCQFullyConnected.h"
 #include "Nodes/CircleBCQGather.h"
+#include "Nodes/CircleBidirectionalSequenceLSTM.h"
 #include "Nodes/CircleCast.h"
 #include "Nodes/CircleCeil.h"
 #include "Nodes/CircleConcatenation.h"
@@ -34,13 +35,16 @@
 #include "Nodes/CircleConv2D.h"
 #include "Nodes/CircleCos.h"
 #include "Nodes/CircleCustom.h"
+#include "Nodes/CircleDensify.h"
 #include "Nodes/CircleDepthToSpace.h"
 #include "Nodes/CircleDepthwiseConv2D.h"
+#include "Nodes/CircleDequantize.h"
 #include "Nodes/CircleDiv.h"
 #include "Nodes/CircleElu.h"
 #include "Nodes/CircleEqual.h"
 #include "Nodes/CircleExp.h"
 #include "Nodes/CircleExpandDims.h"
+#include "Nodes/CircleFakeQuant.h"
 #include "Nodes/CircleFill.h"
 #include "Nodes/CircleFloor.h"
 #include "Nodes/CircleFloorDiv.h"
@@ -48,8 +52,10 @@
 #include "Nodes/CircleFullyConnected.h"
 #include "Nodes/CircleGather.h"
 #include "Nodes/CircleGatherNd.h"
+#include "Nodes/CircleGelu.h"
 #include "Nodes/CircleGreater.h"
 #include "Nodes/CircleGreaterEqual.h"
+#include "Nodes/CircleHardSwish.h"
 #include "Nodes/CircleIf.h"
 #include "Nodes/CircleInstanceNorm.h"
 #include "Nodes/CircleL2Normalize.h"
@@ -82,6 +88,7 @@
 #include "Nodes/CirclePadV2.h"
 #include "Nodes/CirclePow.h"
 #include "Nodes/CirclePRelu.h"
+#include "Nodes/CircleQuantize.h"
 #include "Nodes/CircleRange.h"
 #include "Nodes/CircleRank.h"
 #include "Nodes/CircleReduceAny.h"
@@ -118,13 +125,16 @@
 #include "Nodes/CircleStridedSlice.h"
 #include "Nodes/CircleSub.h"
 #include "Nodes/CircleSum.h"
+#include "Nodes/CircleSVDF.h"
 #include "Nodes/CircleTanh.h"
 #include "Nodes/CircleTile.h"
 #include "Nodes/CircleTopKV2.h"
 #include "Nodes/CircleTranspose.h"
 #include "Nodes/CircleTransposeConv.h"
+#include "Nodes/CircleUnidirectionalSequenceLSTM.h"
 #include "Nodes/CircleUnique.h"
 #include "Nodes/CircleUnpack.h"
+#include "Nodes/CircleVariable.h"
 #include "Nodes/CircleWhere.h"
 #include "Nodes/CircleWhile.h"
 #include "Nodes/CircleZerosLike.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h b/compiler/luci/import/include/luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..491517268
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "luci/Import/GraphBuilderMultiOutput.h"
+
+namespace luci
+{
+
+class CircleBidirectionalSequenceLSTMGraphBuilder : public GraphBuilderMultiOutput
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_BIDIRECTIONALSEQUENCE_LSTM_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
index 7d4f10a59..9e50ddbde 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
@@ -17,20 +17,21 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_CONST_H__
 #define __LUCI_IMPORT_OP_CIRCLE_CONST_H__
 
-#include "luci/Import/GraphBuilderContext.h"
+#include "luci/Import/NodeBuilder.h"
 
 #include <luci/IR/Nodes/CircleConst.h>
 
-/*
- * @note  Circle does not have Const operator.
- *        Methods here provide helper that creates CircleConst from
- *        Tensor and Buffer in circle flatbuffer file.
- */
-
 namespace luci
 {
 
-CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index);
+/**
+ * @brief Builder creates CircleConst node from Tensor with buffer.
+ */
+class CircleConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+  CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
 
 } // namespace luci
 
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleCustom.h b/compiler/luci/import/include/luci/Import/Nodes/CircleCustom.h
index 65745be4b..f0d7e303d 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleCustom.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleCustom.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_CUSTOM_H__
 #define __LUCI_IMPORT_OP_CIRCLE_CUSTOM_H__
 
-#include "luci/Import/GraphBuilder.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleCustomGraphBuilder : public GraphBuilderBase
+class CircleCustomGraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h
new file mode 100644
index 000000000..42bdac1a4
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
+#define __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleDensifyGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleDequantize.h b/compiler/luci/import/include/luci/Import/Nodes/CircleDequantize.h
new file mode 100644
index 000000000..e25b80b0e
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleDequantize.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_DEQUANTIZE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_DEQUANTIZE_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleDequantizeGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_DEQUANTIZE_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleFakeQuant.h b/compiler/luci/import/include/luci/Import/Nodes/CircleFakeQuant.h
new file mode 100644
index 000000000..9d9f7b07b
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleFakeQuant.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
+#define __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleFakeQuantGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_FAKE_QUANT_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleGelu.h b/compiler/luci/import/include/luci/Import/Nodes/CircleGelu.h
new file mode 100644
index 000000000..9be266fb1
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleGelu.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_GELU_H__
+#define __LUCI_IMPORT_OP_CIRCLE_GELU_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleGeluGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_GELU_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleHardSwish.h b/compiler/luci/import/include/luci/Import/Nodes/CircleHardSwish.h
new file mode 100644
index 000000000..7aeb0299b
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleHardSwish.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_HARDSWISH_H__
+#define __LUCI_IMPORT_OP_CIRCLE_HARDSWISH_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleHardSwishGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_HARDSWISH_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleIf.h b/compiler/luci/import/include/luci/Import/Nodes/CircleIf.h
index 8faf09cae..94052f5be 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleIf.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleIf.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_IF_H__
 #define __LUCI_IMPORT_OP_CIRCLE_IF_H__
 
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleIfGraphBuilder : public GraphBuilderBase
+class CircleIfGraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h
index f193aae35..4e8388b3e 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
 #define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
 
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleNonMaxSuppressionV4GraphBuilder : public GraphBuilderBase
+class CircleNonMaxSuppressionV4GraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
index 62be0758e..4120a30eb 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
 #define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
 
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderBase
+class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleQuantize.h b/compiler/luci/import/include/luci/Import/Nodes/CircleQuantize.h
new file mode 100644
index 000000000..b6d52f503
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleQuantize.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_QUANTIZE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_QUANTIZE_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleQuantizeGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_QUANTIZE_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h
new file mode 100644
index 000000000..a91f66019
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
+#define __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleSVDFBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSplit.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSplit.h
index 3395e40fd..5b45c9a9e 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleSplit.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSplit.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_SPLIT_H__
 #define __LUCI_IMPORT_OP_CIRCLE_SPLIT_H__
 
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleSplitGraphBuilder : public GraphBuilderBase
+class CircleSplitGraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSplitV.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSplitV.h
index 3e53df362..de712f90c 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleSplitV.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSplitV.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_SPLIT_V_H__
 #define __LUCI_IMPORT_OP_CIRCLE_SPLIT_V_H__
 
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleSplitVGraphBuilder : public GraphBuilderBase
+class CircleSplitVGraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleTopKV2.h b/compiler/luci/import/include/luci/Import/Nodes/CircleTopKV2.h
index 8ec3f3311..b4ad97130 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleTopKV2.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleTopKV2.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_TOPK_V2_H__
 #define __LUCI_IMPORT_OP_CIRCLE_TOPK_V2_H__
 
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleTopKV2GraphBuilder : public GraphBuilderBase
+class CircleTopKV2GraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleUnidirectionalSequenceLSTM.h b/compiler/luci/import/include/luci/Import/Nodes/CircleUnidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..4cc3320dc
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleUnidirectionalSequenceLSTM.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_UNIDIRECTIONALSEQUENCELSTM_H__
+#define __LUCI_IMPORT_OP_CIRCLE_UNIDIRECTIONALSEQUENCELSTM_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleUnidirectionalSequenceLSTMGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_UNIDIRECTIONALSEQUENCELSTM_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h b/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h
index ed5b5035d..40e75ec73 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
 #define __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
 
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleUniqueGraphBuilder : public GraphBuilderBase
+class CircleUniqueGraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleUnpack.h b/compiler/luci/import/include/luci/Import/Nodes/CircleUnpack.h
index f1a21de22..0b623655f 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleUnpack.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleUnpack.h
@@ -17,17 +17,19 @@
 #ifndef __LUCI_IMPORT_OP_CIRCLE_UNPACK_H__
 #define __LUCI_IMPORT_OP_CIRCLE_UNPACK_H__
 
-#include "luci/Import/GraphBuilderBase.h"
+#include "luci/Import/GraphBuilderMultiOutput.h"
 
 namespace luci
 {
 
-class CircleUnpackGraphBuilder : public GraphBuilderBase
+class CircleUnpackGraphBuilder : public GraphBuilderMultiOutput
 {
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+private:
+  CircleNode *build_node(const BuildNodeArgs &) const final;
+  CircleNode *build_out(const BuildOutArgs &) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h b/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h
new file mode 100644
index 000000000..4d8961fa5
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
+
+#include "luci/Import/GraphBuilderContext.h"
+
+#include <luci/IR/Nodes/CircleVariable.h>
+
+/*
+ * @note  Circle does not have node for variable tensor
+ *        Methods here provide helper that creates CircleVariable from
+ *        Tensor having is_variable true value.
+ */
+
+namespace luci
+{
+
+CircleVariable *create_circlevariable(GraphBuilderContext *context, int32_t tensor_index);
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleWhile.h b/compiler/luci/import/include/luci/Import/Nodes/CircleWhile.h
index 68c56b3c6..69d23f823 100644
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleWhile.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleWhile.h
@@ -27,7 +27,7 @@ class CircleWhileGraphBuilder : public GraphBuilderBase
 public:
   bool validate(const ValidateArgs &args) const final;
 
-  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+  CircleNode *build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
 };
 
 } // namespace luci
diff --git a/compiler/luci/import/include/luci/ImporterEx.h b/compiler/luci/import/include/luci/ImporterEx.h
new file mode 100644
index 000000000..852d4c848
--- /dev/null
+++ b/compiler/luci/import/include/luci/ImporterEx.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORTER_EX_H__
+#define __LUCI_IMPORTER_EX_H__
+
+#include "luci/IR/Module.h"
+
+#include <memory>
+#include <string>
+
+namespace luci
+{
+
+class ImporterEx final
+{
+public:
+  ImporterEx() = default;
+
+public:
+  std::unique_ptr<Module> importVerifyModule(const std::string &input_path) const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORTER_EX_H__
diff --git a/compiler/luci/import/src/CircleImportMetadata.cpp b/compiler/luci/import/src/CircleImportMetadata.cpp
new file mode 100644
index 000000000..fbdea8a7c
--- /dev/null
+++ b/compiler/luci/import/src/CircleImportMetadata.cpp
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleImportMetadata.h"
+
+#include <vector>
+
+namespace
+{
+
+template <typename VECTORTYPE> uint32_t read_u32(const VECTORTYPE &buffer, uint32_t idx)
+{
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+  uint32_t val = 0;
+  val += (buffer.at(idx + 0) << 0 * 8);
+  val += (buffer.at(idx + 1) << 1 * 8);
+  val += (buffer.at(idx + 2) << 2 * 8);
+  val += (buffer.at(idx + 3) << 3 * 8);
+  return val;
+}
+
+} // namespace
+
+namespace
+{
+
+// 'source_table' is decoded to std::map<uint32_t, std::string> format.
+template <typename VECTORTYPE>
+const std::map<uint32_t, std::string> decoded_source_table(const VECTORTYPE &source_table_data)
+{
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+  std::map<uint32_t, std::string> source_id_name_map;
+  uint32_t idx = 0;
+
+  if (source_table_data.size() < 4)
+    throw std::runtime_error("Source table decode error : invalid entry number");
+
+  uint32_t entry_number = read_u32(source_table_data, idx);
+  idx += sizeof(uint32_t);
+
+  while (idx < source_table_data.size())
+  {
+    if (idx + 2 * sizeof(uint32_t) > source_table_data.size())
+      throw std::runtime_error("Source table decode error : invalid entry item");
+
+    uint32_t id = read_u32(source_table_data, idx);
+    idx += sizeof(uint32_t);
+
+    uint32_t length = read_u32(source_table_data, idx);
+    idx += sizeof(uint32_t);
+
+    if (idx + sizeof(char) * length > source_table_data.size())
+      throw std::runtime_error("Source table decode error : invalid entry data");
+
+    // The last character of name is '\0'.
+    // However, as std::string do not use '\0' for finding the end of string,
+    // we ignore the character and do not include it in the string.
+    std::string origin_name;
+    for (uint32_t j = 0; j < length - 1; ++j)
+      origin_name += source_table_data.at(idx + j);
+    assert(source_table_data.at(idx + length - 1) == '\0');
+    idx += sizeof(char) * length;
+
+    if (source_id_name_map.insert({id, origin_name}).second == false)
+      throw std::runtime_error("Source table decode error : duplicated origin ID");
+  }
+
+  if (idx != source_table_data.size())
+    throw std::runtime_error("Source table decode error : data size invalid");
+
+  if (source_id_name_map.size() != entry_number)
+    throw std::runtime_error("Source table decode error : result size mismatch");
+
+  return source_id_name_map;
+}
+
+// 'op_table' is decoded to std::map<uint32_t, std::set<uint32_t>> format.
+template <typename VECTORTYPE>
+const std::map<uint32_t, std::set<uint32_t>> decoded_op_table(const VECTORTYPE &op_table_data)
+{
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+  std::map<uint32_t, std::set<uint32_t>> node_source_ids_map;
+  uint32_t idx = 0;
+
+  if (op_table_data.size() < 4)
+    throw std::runtime_error("Op table decode error : invalid entry number");
+
+  uint32_t entry_number = read_u32(op_table_data, idx);
+  idx += sizeof(uint32_t);
+
+  while (idx < op_table_data.size())
+  {
+    if (idx + 2 * sizeof(uint32_t) > op_table_data.size())
+      throw std::runtime_error("Op table decode error : invalid entry item");
+
+    uint32_t id = read_u32(op_table_data, idx);
+    idx += sizeof(uint32_t);
+
+    uint32_t node_num = read_u32(op_table_data, idx);
+    idx += sizeof(uint32_t);
+
+    if (idx + sizeof(uint32_t) * node_num > op_table_data.size())
+      throw std::runtime_error("Source table decode error : invalid entry data");
+
+    std::set<uint32_t> source_ids;
+    for (uint32_t j = 0; j < node_num; ++j)
+    {
+      uint32_t origin = read_u32(op_table_data, idx);
+      idx += sizeof(uint32_t);
+
+      source_ids.insert(origin);
+    }
+
+    if (node_source_ids_map.insert({id, source_ids}).second == false)
+      throw std::runtime_error("Op table decode error : duplicated origin ID");
+  }
+
+  if (idx != op_table_data.size())
+    throw std::runtime_error("Op table decode error : data size invalid");
+
+  if (node_source_ids_map.size() != entry_number)
+    throw std::runtime_error("Op table decode error : entry number invalid");
+
+  return node_source_ids_map;
+}
+
+// 'execution_plan_table' is decoded to std::map<uint32_t, std::vector<uint32_t>> format.
+template <typename VECTORTYPE>
+const luci::ExecutionPlanTable decoded_execution_plan(const VECTORTYPE &execution_plan_data)
+{
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+  luci::ExecutionPlanTable execution_plan_table;
+  uint32_t idx = 0;
+
+  if (execution_plan_data.size() < 4)
+    throw std::runtime_error("Op table decode error : invalid entry number");
+
+  uint32_t entry_number = read_u32(execution_plan_data, idx);
+  idx += sizeof(uint32_t);
+
+  while (idx < execution_plan_data.size())
+  {
+    if (idx + 2 * sizeof(uint32_t) > execution_plan_data.size())
+      throw std::runtime_error("Op table decode error : invalid entry item");
+
+    uint32_t id = read_u32(execution_plan_data, idx);
+    idx += sizeof(uint32_t);
+
+    uint32_t size = read_u32(execution_plan_data, idx);
+
+    if (size == 0)
+      throw std::runtime_error("Op table decode error : empty execution plan entry");
+
+    idx += sizeof(uint32_t);
+
+    if (idx + sizeof(uint32_t) * size > execution_plan_data.size())
+      throw std::runtime_error("Source table decode error : invalid entry data");
+
+    std::vector<uint32_t> execution_plan_vector;
+    for (uint32_t j = 0; j < size; ++j)
+    {
+      uint32_t execution_plan_inform = read_u32(execution_plan_data, idx);
+      idx += sizeof(uint32_t);
+
+      execution_plan_vector.push_back(execution_plan_inform);
+    }
+
+    if (execution_plan_table.insert({id, execution_plan_vector}).second == false)
+      throw std::runtime_error("Op table decode error : duplicated origin ID");
+  }
+
+  if (idx != execution_plan_data.size())
+    throw std::runtime_error("Op table decode error : data size invalid");
+
+  if (execution_plan_table.size() != entry_number)
+    throw std::runtime_error("Op table decode error : entry number invalid");
+
+  return execution_plan_table;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader)
+{
+  const auto metadata = reader.metadata();
+  for (uint32_t i = 0; i < metadata.size(); ++i)
+  {
+    const auto *meta = metadata[i];
+    assert(meta != nullptr);
+
+    assert(meta->buffer() < reader.buffers().size());
+    assert(reader.buffers()[meta->buffer()] != nullptr);
+    const auto buffer = luci::wrap(reader.buffers()[meta->buffer()]->data());
+
+    assert(meta->name() != nullptr);
+    if (meta->name()->str().compare("ONE_op_table") == 0)
+      _op_table = decoded_op_table(buffer);
+    else if (meta->name()->str().compare("ONE_source_table") == 0)
+      _source_table = decoded_source_table(buffer);
+    else if (meta->name()->str().compare("ONE_execution_plan_table") == 0)
+      _execution_plan_table = decoded_execution_plan(buffer);
+  }
+}
+
+const OriginTable CircleImportMetadata::origin_table(void)
+{
+  OriginTable origin_table;
+
+  if (_op_table.size() > 0 && _source_table.size() > 0)
+  {
+    for (auto &kv : _op_table)
+    {
+      const auto node_id = kv.first;
+      const auto &source_ids = kv.second;
+
+      std::vector<std::shared_ptr<CircleNodeOrigin>> origins;
+      for (auto source_id : source_ids)
+      {
+        const auto &source_name = _source_table.at(source_id);
+        origins.push_back(single_origin(source_id, source_name));
+      }
+
+      auto origin = composite_origin(origins);
+      origin_table.emplace(node_id, origin);
+    }
+  }
+
+  return origin_table;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/CircleImportMetadata.h b/compiler/luci/import/src/CircleImportMetadata.h
new file mode 100644
index 000000000..0e0240678
--- /dev/null
+++ b/compiler/luci/import/src/CircleImportMetadata.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_IMPORT_METADATA_H__
+#define __LUCI_CIRCLE_IMPORT_METADATA_H__
+
+#include "luci/Import/CircleReader.h"
+
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/IR/ExecutionPlanTable.h>
+
+#include <map>
+#include <set>
+#include <string>
+
+namespace luci
+{
+
+using OriginTable = std::map<uint32_t, std::shared_ptr<CircleNodeOrigin>>;
+
+class CircleImportMetadata
+{
+public:
+  CircleImportMetadata() = delete;
+
+  CircleImportMetadata(const luci::CircleReader &reader);
+
+public:
+  /**
+   * @brief Create origin table using _source_table and _op_table in CircleImportMetadata
+   * @note  For creating origin table, both _op_table and _source_table should exist.
+   *        If one of them does not exist, empty table is returned.
+   */
+  const OriginTable origin_table(void);
+
+  const std::map<uint32_t, std::string> &source_table(void) const { return _source_table; }
+
+  const luci::ExecutionPlanTable &execution_plan_table(void) const { return _execution_plan_table; }
+
+private:
+  // Decoded metadata is stored
+  std::map<uint32_t, std::string> _source_table;
+  std::map<uint32_t, std::set<uint32_t>> _op_table;
+  // _execution_plan_table stores for node with node_id order of execution,
+  // and offsets output tensors
+  luci::ExecutionPlanTable _execution_plan_table;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_IMPORT_METADATA_H__
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp
index bc7f39762..a42c3f913 100644
--- a/compiler/luci/import/src/CircleReader.cpp
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -16,6 +16,9 @@
 
 #include "luci/Import/CircleReader.h"
 
+#include <mio_circle/Helper.h>
+
+#include <algorithm>
 #include <memory>
 #include <sstream>
 #include <string>
@@ -23,52 +26,20 @@
 namespace luci
 {
 
-bool is_valid(const circle::OperatorCodeT &opcode)
-{
-  circle::BuiltinOperator code = opcode.builtin_code;
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCodeT &opcode)
-{
-  circle::BuiltinOperator code = opcode.builtin_code;
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCodeT &opcode)
-{
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (opcode.custom_code.empty())
-      return "(invalid custom)";
-
-    return opcode.custom_code;
-  }
-
-  circle::BuiltinOperator code = opcode.builtin_code;
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_name(const circle::TensorT &tensor)
+const char *tensor_name(const circle::Tensor *tensor)
 {
-  static const char *kEmptyTensorName = "(noname)";
+  assert(tensor != nullptr);
 
-  if (!tensor.name.empty())
-    return tensor.name.c_str();
+  if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+    return "(noname)";
 
-  return kEmptyTensorName;
+  return tensor->name()->c_str();
 }
 
-const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor)
+const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor)
 {
-  return tensor.quantization.get();
+  assert(tensor != nullptr);
+  return tensor->quantization();
 }
 
 loco::DataType luci_datatype(const circle::TensorType type)
@@ -86,7 +57,7 @@ loco::DataType luci_datatype(const circle::TensorType type)
     case circle::TensorType_INT64:
       return loco::DataType::S64;
     case circle::TensorType_STRING:
-      break;
+      return loco::DataType::STRING;
     case circle::TensorType_BOOL:
       return loco::DataType::BOOL;
     case circle::TensorType_INT16:
@@ -115,7 +86,9 @@ FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
     case circle::ActivationFunctionType::ActivationFunctionType_RELU6:
       return luci::FusedActFunc::RELU6;
     case circle::ActivationFunctionType::ActivationFunctionType_TANH:
-      break;
+      return luci::FusedActFunc::TANH;
+    case circle::ActivationFunctionType::ActivationFunctionType_SIGN_BIT:
+      return luci::FusedActFunc::SIGN_BIT;
     default:
       break;
   }
@@ -149,6 +122,65 @@ MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode)
   return MirrorPadMode::UNDEFINED;
 }
 
+luci::CircleFullyConnected::WeightsFormat
+luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format)
+{
+  switch (weights_format)
+  {
+    case circle::FullyConnectedOptionsWeightsFormat_DEFAULT:
+      return luci::CircleFullyConnected::WeightsFormat::DEFAULT;
+    case circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8:
+      return luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8;
+    case circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32:
+      return luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32;
+    default:
+      throw std::runtime_error("Invalid FullyConnectedOptionsWeightsFormat");
+  }
+}
+
+DimensionType luci_dim_type(const circle::DimensionType dim_type)
+{
+  switch (dim_type)
+  {
+    case circle::DimensionType_DENSE:
+      return DimensionType::DENSE;
+    case circle::DimensionType_SPARSE_CSR:
+      return DimensionType::SPARSE_CSR;
+    default:
+      throw std::runtime_error("Invalid DimensionType");
+  }
+}
+
+SparseIndexVector
+luci_sparse_index_vector(const circle::SparseIndexVectorUnion &sparse_index_vector)
+{
+  switch (sparse_index_vector.type)
+  {
+    case circle::SparseIndexVector_NONE:
+      return SparseIndexVector{SparseIndexVectorType::NONE, nullptr};
+    case circle::SparseIndexVector_Int32Vector:
+    {
+      const auto const_vec_ptr =
+        static_cast<const void *>(&(sparse_index_vector.AsInt32Vector()->values));
+      return SparseIndexVector{SparseIndexVectorType::I32, const_vec_ptr};
+    }
+    case circle::SparseIndexVector_Uint16Vector:
+    {
+      const auto const_vec_ptr =
+        static_cast<const void *>(&(sparse_index_vector.AsUint16Vector()->values));
+      return SparseIndexVector{SparseIndexVectorType::U16, const_vec_ptr};
+    }
+    case circle::SparseIndexVector_Uint8Vector:
+    {
+      const auto const_vec_ptr =
+        static_cast<const void *>(&(sparse_index_vector.AsUint8Vector()->values));
+      return SparseIndexVector{SparseIndexVectorType::U8, const_vec_ptr};
+    }
+    default:
+      throw std::runtime_error("Invalid SparseIndexVector type");
+  }
+}
+
 std::unique_ptr<CircleQuantParam>
 luci_quantparam(const circle::QuantizationParametersT *quantization)
 {
@@ -174,83 +206,200 @@ luci_quantparam(const circle::QuantizationParametersT *quantization)
   return nullptr;
 }
 
-void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node)
+std::unique_ptr<CircleQuantParam> luci_quantparam(const circle::QuantizationParameters *qparams)
+{
+  // create temporary unpacked API object
+  assert(qparams != nullptr);
+  circle::QuantizationParametersT quantization;
+  qparams->UnPackTo(&quantization);
+
+  return luci_quantparam(&quantization);
+}
+
+std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParametersT *sparsity)
 {
+  assert(sparsity);
+  const auto &traversal_order = sparsity->traversal_order;
+  const auto &block_map = sparsity->block_map;
+  const auto &dim_metadata = sparsity->dim_metadata;
+
+  // TODO find a condition that should return nullptr
+  auto sparsityparam = std::make_unique<SparsityParam>();
+
+  sparsityparam->traversal_order = traversal_order;
+  sparsityparam->block_map = block_map;
+  for (const auto &dm : dim_metadata)
+  {
+    sparsityparam->dim_metadata.emplace_back(luci_dim_type(dm->format), dm->dense_size,
+                                             luci_sparse_index_vector(dm->array_segments),
+                                             luci_sparse_index_vector(dm->array_indices));
+  }
+
+  return sparsityparam;
+}
+
+std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParameters *sparparam)
+{
+  // create temporary unpacked API object
+  assert(sparparam != nullptr);
+  circle::SparsityParametersT sparsity;
+  sparparam->UnPackTo(&sparsity);
+
+  return luci_sparsityparam(&sparsity);
+}
+
+void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node)
+{
+  assert(tensor != nullptr);
+
   node->name(tensor_name(tensor));
-  node->dtype(luci_datatype(tensor.type));
+  node->dtype(luci_datatype(tensor->type()));
+
+  const auto tensor_shape_signature = wrap(tensor->shape_signature());
+  const auto tensor_shape = wrap(tensor->shape());
+  assert(tensor_shape_signature.size() == 0 ||
+         tensor_shape_signature.size() == tensor_shape.size());
 
-  std::vector<int32_t> dims = tensor.shape; // in NHWC
+  const auto dims = tensor_shape; // in NHWC
   node->rank(dims.size());
   for (uint32_t r = 0; r < dims.size(); ++r)
   {
-    node->dim(r) = loco::Dimension(dims[r]);
+    if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
+      node->dim(r).unset();
+    else
+      node->dim(r).set(dims[r]);
   }
 
-  const auto *quantization = tensor.quantization.get();
+  const auto quantization = tensor->quantization();
   if (quantization != nullptr)
   {
     auto quantparam = luci_quantparam(quantization);
     if (quantparam)
       node->quantparam(std::move(quantparam));
   }
+
+  const auto sparsity = tensor->sparsity();
+  if (sparsity != nullptr)
+  {
+    auto sparsityparam = luci_sparsityparam(sparsity);
+    if (sparsityparam)
+      node->sparsityparam(std::move(sparsityparam));
+  }
 }
 
-circle::BuiltinOperator CircleReader::builtin_code(const circle::OperatorT &op) const
+std::string fb_string2std_string(const flatbuffers::String *fb_str)
 {
-  const auto &op_codes = opcodes();
-  uint32_t index = op.opcode_index;
+  return fb_str == nullptr ? "" : fb_str->str();
+}
+
+circle::BuiltinOperator CircleReader::builtin_code(const circle::Operator *op) const
+{
+  assert(op != nullptr);
+
+  const auto op_codes = opcodes();
+  uint32_t index = op->opcode_index();
   assert(index < op_codes.size());
-  const circle::OperatorCodeT &opcode = *op_codes[index];
+  const auto opcode = op_codes[index];
+  assert(opcode != nullptr);
 
-  return opcode.builtin_code;
+  return mio::circle::builtin_code_neutral(opcode);
 }
 
-std::string CircleReader::opcode_name(const circle::OperatorT &op) const
+std::string CircleReader::opcode_name(const circle::Operator *op) const
 {
-  const auto &op_codes = opcodes();
-  uint32_t index = op.opcode_index;
-  assert(index < op_codes.size());
-  const circle::OperatorCodeT &opcode = *op_codes[index];
+  assert(op != nullptr);
 
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid: " << index << ")";
-    return oss.str();
-  }
+  const auto op_codes = opcodes();
+  uint32_t index = op->opcode_index();
+  assert(index < op_codes.size());
+  const auto opcode = op_codes[index];
 
-  return ::luci::opcode_name(opcode);
+  return mio::circle::opcode_name(opcode);
 }
 
 bool CircleReader::parse(const circle::Model *model)
 {
   assert(model != nullptr);
 
-  _model.reset(model->UnPack());
-
   // for direct pointer access
-  _model_ptr = model;
+  _model = model;
 
   return true;
 }
 
 bool CircleReader::select_subgraph(uint32_t sgindex)
 {
-  if (_model->subgraphs.size() <= sgindex)
+  if (num_subgraph() <= sgindex)
   {
     assert(false);
     return false;
   }
 
-  _current_subgraph = _model->subgraphs[sgindex].get();
-
   // for direct pointer access
-  auto subgraphs = _model_ptr->subgraphs();
-  const circle::SubGraph *subgraph = (*subgraphs)[sgindex];
+  auto subgraphs = _model->subgraphs();
+  assert(subgraphs != nullptr);
 
-  _tensors_ptr = subgraph->tensors();
+  _current_subgraph = subgraphs->Get(sgindex);
+  assert(_current_subgraph != nullptr);
 
   return true;
 }
 
+template <typename T>
+VectorWrapper<T>::VectorWrapper(const flatbuffers::Vector<T> *ptr) : _vector(ptr)
+{
+  // Do nothing
+}
+
+template <typename T> uint32_t VectorWrapper<T>::size() const
+{
+  return null() ? 0 : _vector->size();
+}
+
+template <typename T> const T *VectorWrapper<T>::data() const
+{
+  return null() ? nullptr : _vector->data();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::begin() const
+{
+  return null() ? iterator(nullptr, 0) : _vector->begin();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::end() const
+{
+  return null() ? begin() : _vector->end();
+}
+
+template <typename T> typename VectorWrapper<T>::value_type VectorWrapper<T>::at(uint32_t i) const
+{
+  if (i >= size())
+  {
+    // TODO find better error message
+    throw std::range_error("Access to prohibited vector element");
+  }
+
+  return _vector->Get(i);
+}
+
+template <typename T>
+typename VectorWrapper<T>::value_type VectorWrapper<T>::operator[](uint32_t i) const
+{
+  return at(i);
+}
+
+template <typename T> bool VectorWrapper<T>::null() const { return _vector == nullptr; }
+template <typename T> bool VectorWrapper<T>::empty() const { return size() == 0; }
+
+#define REGISTER_WRAPPER(T) template class VectorWrapper<T>
+REGISTER_WRAPPER(flatbuffers::Offset<circle::SubGraph>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Buffer>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Tensor>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Operator>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::OperatorCode>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Metadata>);
+REGISTER_WRAPPER(int32_t);
+REGISTER_WRAPPER(uint8_t);
+#undef REGISTER_WRAPPER
+
 } // namespace luci
diff --git a/compiler/luci/import/src/CircleReader.test.cpp b/compiler/luci/import/src/CircleReader.test.cpp
new file mode 100644
index 000000000..0ce5b600b
--- /dev/null
+++ b/compiler/luci/import/src/CircleReader.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/CircleReader.h"
+
+#include <gtest/gtest.h>
+
+TEST(VectorWrapperTest, basic_pattern)
+{
+  auto fb_builder = flatbuffers::FlatBufferBuilder();
+
+  const std::vector<int32_t> data = {1, 4, 2, 0, 7};
+  auto const vec_offset = fb_builder.CreateVector(data.data(), data.size());
+  auto const vec_pointer = GetTemporaryPointer(fb_builder, vec_offset);
+
+  auto const wrapper = luci::wrap(vec_pointer);
+
+  ASSERT_EQ(wrapper.size(), data.size());
+  ASSERT_TRUE(std::equal(wrapper.begin(), wrapper.end(), data.begin()));
+}
+
+TEST(VectorWrapperTest, wrong_data_NEG)
+{
+  auto fb_builder = flatbuffers::FlatBufferBuilder();
+
+  std::vector<int32_t> data = {1, 4, 2, 0, 7};
+  auto const vec_offset = fb_builder.CreateVector(data.data(), data.size());
+  auto const vec_pointer = GetTemporaryPointer(fb_builder, vec_offset);
+
+  auto const wrapper = luci::wrap(vec_pointer);
+
+  // change data
+  std::reverse(data.begin(), data.end());
+
+  ASSERT_EQ(wrapper.size(), data.size());
+  ASSERT_FALSE(std::equal(wrapper.begin(), wrapper.end(), data.begin()));
+}
+
+TEST(VectorWrapperTest, null_pointer)
+{
+  flatbuffers::Vector<int32_t> *vec_pointer = nullptr;
+  auto const wrapper = luci::wrap(vec_pointer);
+
+  ASSERT_TRUE(wrapper.null());
+  ASSERT_TRUE(wrapper.empty());
+}
+
+TEST(VectorWrapperTest, prohibited_access_NEG)
+{
+  flatbuffers::Vector<uint8_t> *vec_pointer = nullptr;
+  auto const wrapper = luci::wrap(vec_pointer);
+
+  ASSERT_ANY_THROW(wrapper.at(0));
+}
diff --git a/compiler/luci/import/src/GraphBuilder.cpp b/compiler/luci/import/src/GraphBuilder.cpp
index 80a9f986a..4634be772 100644
--- a/compiler/luci/import/src/GraphBuilder.cpp
+++ b/compiler/luci/import/src/GraphBuilder.cpp
@@ -21,7 +21,7 @@
 namespace luci
 {
 
-void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
 {
   LOGGER(l);
 
@@ -29,10 +29,9 @@ void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *conte
 
   const std::vector<int32_t> &inputs = op.inputs;
   const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto tensors = context->reader()->tensors();
+  const auto opcodes = context->reader()->opcodes();
+  assert(!tensors.null());
 
   std::vector<CircleNode *> input_nodes;
   for (const int32_t input_tensor_index : inputs)
@@ -47,7 +46,8 @@ void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *conte
     else
     {
       // If there is no tensor, insert CircleOutputExclude.
-      input_nodes.push_back(context->graph()->nodes()->create<luci::CircleOutputExclude>());
+      auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
+      input_nodes.push_back(node);
     }
   }
 
@@ -56,16 +56,18 @@ void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *conte
   // Set up node parameters.
   assert(outputs.size() == 1);
   {
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    const auto output_tensor = tensors[outputs[0]];
+    assert(output_tensor != nullptr);
     copy_tensor_attributes(output_tensor, node);
     // mark shape_status
-    if (tensors_ptr->Get(outputs[0])->shape() == nullptr)
+    if (output_tensor->shape() == nullptr)
       node->shape_status(ShapeStatus::NOSHAPE);
     else
       node->shape_status(ShapeStatus::VALID);
 
     // mark operator version
-    node->op_version(opcodes[op.opcode_index].get()->version);
+    assert(opcodes[op.opcode_index] != nullptr);
+    node->op_version(opcodes[op.opcode_index]->version());
   }
 
   // Register node's only output.
@@ -73,6 +75,8 @@ void GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *conte
   {
     context->nodefinder()->enroll(outputs[0], node);
   }
+
+  return node;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/GraphBuilderMultiOutput.cpp b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp
new file mode 100644
index 000000000..7bcb57a3b
--- /dev/null
+++ b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/GraphBuilderMultiOutput.h"
+
+#include <luci/Log.h>
+
+namespace luci
+{
+
+CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
+                                           GraphBuilderContext *context) const
+{
+  LOGGER(l);
+
+  assert(context != nullptr);
+
+  const std::vector<int32_t> &inputs = op.inputs;
+  const std::vector<int32_t> &outputs = op.outputs;
+  const auto tensors = context->reader()->tensors();
+  const auto opcodes = context->reader()->opcodes();
+  assert(!tensors.null());
+
+  std::vector<CircleNode *> input_nodes;
+  for (const int32_t input_tensor_index : inputs)
+  {
+    if (input_tensor_index >= 0)
+    {
+      auto input = context->nodefinder()->node(input_tensor_index);
+      if (input == nullptr)
+        INFO(l) << "[luci] Warning: input node is null " << input_tensor_index << std::endl;
+      input_nodes.push_back(input);
+    }
+    else
+    {
+      // If there is no tensor, insert CircleOutputExclude.
+      auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
+      input_nodes.push_back(node);
+    }
+  }
+
+  BuildNodeArgs bna(op, context, input_nodes);
+  auto *node = build_node(bna);
+
+  uint32_t output_count = outputs.size();
+  // NOTE CustomOp inherits GraphBuilderMultiOutput and can have 0 output
+  if (output_count > 0)
+  {
+    // Let's use attributes from output 0 for this node
+    const auto output_tensor = tensors[outputs[0]];
+    assert(output_tensor != nullptr);
+    node->name(tensor_name(output_tensor));
+    node->dtype(luci_datatype(output_tensor->type()));
+
+    // mark operator version
+    assert(opcodes[op.opcode_index] != nullptr);
+    node->op_version(opcodes[op.opcode_index]->version());
+
+    // NOTE We don't set quantization for multiple output nodes but to virtual outputs
+  }
+
+  // Create virtual outputs of Virtual Output node(s)
+  for (uint32_t n = 0; n < output_count; ++n)
+  {
+    const auto output_tensor = tensors[outputs[n]];
+    assert(output_tensor != nullptr);
+
+    BuildOutArgs boa(node, n);
+    auto *nodeout = build_out(boa);
+
+    copy_tensor_attributes(output_tensor, nodeout);
+    // NOTE name of CxxxOut nodes may have same name
+    // mark shape_status
+    if (output_tensor->shape() == nullptr)
+      nodeout->shape_status(ShapeStatus::NOSHAPE);
+    else
+      nodeout->shape_status(ShapeStatus::VALID);
+
+    context->nodefinder()->enroll(outputs[n], nodeout);
+  }
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp
index c6bcacb54..9c868320d 100644
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -37,19 +37,23 @@ GraphBuilderRegistry::GraphBuilderRegistry()
   CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceNDGraphBuilder);                        // 37
   CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnectedGraphBuilder);                   // 253
   CIRCLE_NODE(BCQ_GATHER, CircleBCQGatherGraphBuilder);                                    // 252
+  CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, CircleBidirectionalSequenceLSTMGraphBuilder);   // 52
   CIRCLE_NODE(CAST, CircleCastGraphBuilder);                                               // 53
   CIRCLE_NODE(CEIL, CircleCeilGraphBuilder);                                               // 104
   CIRCLE_NODE(CUSTOM, CircleCustomGraphBuilder);                                           // 32
   CIRCLE_NODE(CONCATENATION, CircleConcatenationGraphBuilder);                             // 2
   CIRCLE_NODE(CONV_2D, CircleConv2DGraphBuilder);                                          // 3
   CIRCLE_NODE(COS, CircleCosGraphBuilder);                                                 // 108
+  CIRCLE_NODE(DENSIFY, CircleDensifyGraphBuilder);                                         // 124
   CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceGraphBuilder);                             // 5
   CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DGraphBuilder);                       // 4
+  CIRCLE_NODE(DEQUANTIZE, CircleDequantizeGraphBuilder);                                   // 6
   CIRCLE_NODE(DIV, CircleDivGraphBuilder);                                                 // 42
   CIRCLE_NODE(ELU, CircleEluGraphBuilder);                                                 // 111
   CIRCLE_NODE(EQUAL, CircleEqualGraphBuilder);                                             // 71
   CIRCLE_NODE(EXP, CircleExpGraphBuilder);                                                 // 47
   CIRCLE_NODE(EXPAND_DIMS, CircleExpandDimsGraphBuilder);                                  // 70
+  CIRCLE_NODE(FAKE_QUANT, CircleFakeQuantGraphBuilder);                                    // 80
   CIRCLE_NODE(FILL, CircleFillGraphBuilder);                                               // 94
   CIRCLE_NODE(FLOOR, CircleFloorGraphBuilder);                                             // 8
   CIRCLE_NODE(FLOOR_DIV, CircleFloorDivGraphBuilder);                                      // 90
@@ -57,13 +61,15 @@ GraphBuilderRegistry::GraphBuilderRegistry()
   CIRCLE_NODE(FULLY_CONNECTED, CircleFullyConnectedGraphBuilder);                          // 9
   CIRCLE_NODE(GATHER, CircleGatherGraphBuilder);                                           // 36
   CIRCLE_NODE(GATHER_ND, CircleGatherNdGraphBuilder);                                      // 107
+  CIRCLE_NODE(GELU, CircleGeluGraphBuilder);                                               // 150
   CIRCLE_NODE(GREATER, CircleGreaterGraphBuilder);                                         // 61
   CIRCLE_NODE(GREATER_EQUAL, CircleGreaterEqualGraphBuilder);                              // 62
+  CIRCLE_NODE(HARD_SWISH, CircleHardSwishGraphBuilder);                                    // 117
   CIRCLE_NODE(IF, CircleIfGraphBuilder);                                                   // 118
   CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNormGraphBuilder);                              // 254
   CIRCLE_NODE(L2_NORMALIZATION, CircleL2NormalizeGraphBuilder);                            // 11
   CIRCLE_NODE(L2_POOL_2D, CircleL2Pool2DGraphBuilder);                                     // 12
-  CIRCLE_NODE(LEAKY_RELU, CircleLeakyReluGraphBuilder);                                    // 98,
+  CIRCLE_NODE(LEAKY_RELU, CircleLeakyReluGraphBuilder);                                    // 98
   CIRCLE_NODE(LESS, CircleLessGraphBuilder);                                               // 58
   CIRCLE_NODE(LESS_EQUAL, CircleLessEqualGraphBuilder);                                    // 63
   CIRCLE_NODE(LOCAL_RESPONSE_NORMALIZATION, CircleLocalResponseNormalizationGraphBuilder); // 13
@@ -82,15 +88,16 @@ GraphBuilderRegistry::GraphBuilderRegistry()
   CIRCLE_NODE(MIRROR_PAD, CircleMirrorPadGraphBuilder);                                    // 100
   CIRCLE_NODE(MUL, CircleMulGraphBuilder);                                                 // 18
   CIRCLE_NODE(NEG, CircleNegGraphBuilder);                                                 // 59
-  CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder);              // 120,
-  CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5GraphBuilder);              // 121,
+  CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder);              // 120
+  CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5GraphBuilder);              // 121
   CIRCLE_NODE(NOT_EQUAL, CircleNotEqualGraphBuilder);                                      // 72
   CIRCLE_NODE(ONE_HOT, CircleOneHotGraphBuilder);                                          // 85
   CIRCLE_NODE(PACK, CirclePackGraphBuilder);                                               // 83
   CIRCLE_NODE(PAD, CirclePadGraphBuilder);                                                 // 34
   CIRCLE_NODE(PADV2, CirclePadV2GraphBuilder);                                             // 60
   CIRCLE_NODE(POW, CirclePowGraphBuilder);                                                 // 78
-  CIRCLE_NODE(PRELU, CirclePReluGraphBuilder);                                             // 54,
+  CIRCLE_NODE(PRELU, CirclePReluGraphBuilder);                                             // 54
+  CIRCLE_NODE(QUANTIZE, CircleQuantizeGraphBuilder);                                       // 114
   CIRCLE_NODE(RANGE, CircleRangeGraphBuilder);                                             // 96
   CIRCLE_NODE(RANK, CircleRankGraphBuilder);                                               // 110
   CIRCLE_NODE(REDUCE_ANY, CircleReduceAnyGraphBuilder);                                    // 91
@@ -127,11 +134,13 @@ GraphBuilderRegistry::GraphBuilderRegistry()
   CIRCLE_NODE(STRIDED_SLICE, CircleStridedSliceGraphBuilder);                              // 45
   CIRCLE_NODE(SUB, CircleSubGraphBuilder);                                                 // 41
   CIRCLE_NODE(SUM, CircleSumGraphBuilder);                                                 // 74
+  CIRCLE_NODE(SVDF, CircleSVDFBuilder);                                                    // 27
   CIRCLE_NODE(TANH, CircleTanhGraphBuilder);                                               // 28
   CIRCLE_NODE(TILE, CircleTileGraphBuilder);                                               // 69
   CIRCLE_NODE(TOPK_V2, CircleTopKV2GraphBuilder);                                          // 48
   CIRCLE_NODE(TRANSPOSE, CircleTransposeGraphBuilder);                                     // 39
   CIRCLE_NODE(TRANSPOSE_CONV, CircleTransposeConvGraphBuilder);                            // 67
+  CIRCLE_NODE(UNIDIRECTIONAL_SEQUENCE_LSTM, CircleUnidirectionalSequenceLSTMGraphBuilder); // 44
   CIRCLE_NODE(UNIQUE, CircleUniqueGraphBuilder);                                           // 103
   CIRCLE_NODE(UNPACK, CircleUnpackGraphBuilder);                                           // 88
   CIRCLE_NODE(WHERE, CircleWhereGraphBuilder);                                             // 109
@@ -140,27 +149,26 @@ GraphBuilderRegistry::GraphBuilderRegistry()
 
 #undef CIRCLE_NODE
 
-  // BuiltinOperator_DEQUANTIZE = 6,
   // BuiltinOperator_EMBEDDING_LOOKUP = 7,
   // BuiltinOperator_HASHTABLE_LOOKUP = 10,
   // BuiltinOperator_LSH_PROJECTION = 15,
   // BuiltinOperator_LSTM = 16,
   // BuiltinOperator_RNN = 24,
-  // BuiltinOperator_SVDF = 27,
   // BuiltinOperator_CONCAT_EMBEDDINGS = 29,
   // BuiltinOperator_SKIP_GRAM = 30,
   // BuiltinOperator_CALL = 31,
   // BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
   // BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
-  // BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
   // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
   // BuiltinOperator_DELEGATE = 51,
-  // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
   // BuiltinOperator_ARG_MAX = 56,
-  // BuiltinOperator_FAKE_QUANT = 80,
-  // BuiltinOperator_QUANTIZE = 114,
-  // BuiltinOperator_HARD_SWISH = 117,
-  // BuiltinOperator_DENSIFY = 124,
+
+  // Register builders for nodes which not handles in builders registered above.
+#define CIRCLE_NODE(CLASS) add(std::make_unique<CLASS>())
+
+  CIRCLE_NODE(CircleConstNodeBuilder);
+
+#undef CIRCLE_NODE
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp
index ab89f3587..15de03df2 100644
--- a/compiler/luci/import/src/Importer.cpp
+++ b/compiler/luci/import/src/Importer.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "luci/Importer.h"
+#include "CircleImportMetadata.h"
 #include "PostImport.h"
 
 #include "luci/Import/GraphBuilder.h"
@@ -22,9 +23,13 @@
 #include "luci/Import/GraphBuilderRegistry.h"
 #include "luci/Import/CircleReader.h"
 #include "luci/Import/Nodes/CircleConst.h"
+#include "luci/Import/Nodes/CircleVariable.h"
 
 #include <luci/IR/Module.h>
 #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeID.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
 #include <luci/Log.h>
 #include <luci/LogHelper.h>
 
@@ -46,17 +51,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
 
   luci::GraphBuilderContext gb_context(graph, &reader, nodefinder.get(), tensoroutputs.get());
 
-  const auto &operators = reader.operators();
-  const auto &tensors = reader.tensors();
-  auto tensors_ptr = reader.tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto operators = reader.operators();
+  const auto tensors = reader.tensors();
+  assert(!tensors.null());
+  auto circle_metadata = std::make_unique<luci::CircleImportMetadata>(reader);
 
   // build a cache to identify if a tensor is output of an operator
   // if this is set, we should not create a CircleConst for this tensor
   for (uint32_t i = 0; i < operators.size(); ++i)
   {
-    const circle::OperatorT &op = *operators[i];
-    const auto &outputs = op.outputs;
+    const auto op = operators[i];
+    assert(op != nullptr);
+    const auto outputs = luci::wrap(op->outputs());
 
     for (uint32_t j = 0; j < outputs.size(); ++j)
     {
@@ -72,10 +78,11 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
   {
     auto input_node = graph->nodes()->create<luci::CircleInput>();
     assert(input_node != nullptr);
-    const circle::TensorT &tensor = *tensors[input];
+    const auto tensor = tensors[input];
+    assert(tensor != nullptr);
 
     luci::copy_tensor_attributes(tensor, input_node);
-    if (tensors_ptr->Get(input)->shape() == nullptr)
+    if (tensor->shape() == nullptr)
       input_node->shape_status(luci::ShapeStatus::NOSHAPE);
     else
       input_node->shape_status(luci::ShapeStatus::VALID);
@@ -96,41 +103,76 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
     // Data type
     graph_input->dtype(input_node->dtype());
 
+    const auto tensor_shape_signature = luci::wrap(tensor->shape_signature());
+    const auto tensor_shape = luci::wrap(tensor->shape());
+    assert(tensor_shape_signature.size() == 0 ||
+           tensor_shape_signature.size() == tensor_shape.size());
+
     // Shape of GraphInput
     auto input_shape = std::make_unique<loco::TensorShape>();
-    const std::vector<int32_t> &input_dims = tensor.shape; // in NHWC
+    const auto &input_dims = tensor_shape; // in NHWC
     input_shape->rank(input_dims.size());
     for (uint32_t r = 0; r < input_dims.size(); ++r)
-      input_shape->dim(r) = loco::Dimension(input_dims[r]);
+    {
+      if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
+        input_shape->dim(r).unset();
+      else
+        input_shape->dim(r).set(input_dims[r]);
+    }
     graph_input->shape(std::move(input_shape));
   }
 
-  // Create CircleConst nodes for constant tensors.
+  // Create CircleNodes for constant tensors.
+  // NOTE Origin is intentionally not provided for constants.
+  auto const_builder = source.lookup(luci::NodeBuilderType::BUFFER);
+  if (not const_builder)
+    throw oops::UserExn("Not supported", "tensor with buffer builder");
+
   for (uint32_t i = 0; i < tensors.size(); ++i)
   {
-    luci::CircleConst *const_node = luci::create_circleconst(&gb_context, i);
+    auto *const_node = const_builder->build(i, &gb_context);
     if (const_node != nullptr)
       nodefinder->enroll(i, const_node);
   }
 
+  // Create CircleVariable nodes for variable tensors
+  // TODO Add Origin if needed, skip for now
+  for (uint32_t i = 0; i < tensors.size(); ++i)
+  {
+    luci::CircleVariable *variable_node = luci::create_circlevariable(&gb_context, i);
+    if (variable_node != nullptr)
+      nodefinder->enroll(i, variable_node);
+  }
+
   // Import the operators.
   // Note that operators in model are stored in execution order. This means that when importing
   // an operator, its input operators have already been imported. We exploit this fact to set up
   // node's inputs right after creating the node.
+  auto origin_table = circle_metadata->origin_table();
   for (uint32_t i = 0; i < operators.size(); ++i)
   {
-    const circle::OperatorT &op = *operators[i];
+    const auto op = operators[i];
+    assert(op != nullptr);
     circle::BuiltinOperator builtincode = reader.builtin_code(op);
 
     if (const auto *builder = source.lookup(builtincode))
     {
-      luci::GraphBuilder::ValidateArgs args(op, reader);
+      // create temporary unpack API obj
+      circle::OperatorT oper_t;
+      op->UnPackTo(&oper_t);
+
+      luci::GraphBuilder::ValidateArgs args(oper_t, reader);
       if (!builder->validate(args))
       {
         throw oops::UserExn("Invalid operator", reader.opcode_name(op));
       }
 
-      builder->build(op, &gb_context);
+      auto built_op = builder->build(oper_t, &gb_context);
+      set_node_id(built_op, i);
+      if (origin_table.find(i) != origin_table.end())
+        add_origin(built_op, origin_table.at(i));
+      else
+        add_origin(built_op, luci::single_origin(i, built_op->name()));
     }
     else
     {
@@ -141,7 +183,8 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
   // graph outputs
   for (auto output : reader.outputs())
   {
-    const circle::TensorT &tensor = *tensors[output];
+    const auto tensor = tensors[output];
+    assert(tensor != nullptr);
 
     auto output_node = graph->nodes()->create<luci::CircleOutput>();
     assert(output_node != nullptr);
@@ -158,7 +201,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
       output_node->from(output_dummy);
 
       luci::copy_tensor_attributes(tensor, output_dummy);
-      if (tensors_ptr->Get(output)->shape() == nullptr)
+      if (tensor->shape() == nullptr)
         output_dummy->shape_status(luci::ShapeStatus::NOSHAPE);
       else
         output_dummy->shape_status(luci::ShapeStatus::VALID);
@@ -169,23 +212,34 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
     // set the graph output name and node object
     auto graph_output = graph->outputs()->create();
     std::string tname = luci::tensor_name(tensor);
-    graph_output->name("output_" + tname);
+    assert(tname.length() > 0);
+    graph_output->name(tname);
 
     luci::copy_tensor_attributes(tensor, output_node);
 
     // Set GraphInputOutputIndex for graph
     output_node->index(graph_output->index());
 
+    const auto tensor_shape_signature = luci::wrap(tensor->shape_signature());
+    const auto tensor_shape = luci::wrap(tensor->shape());
+    assert(tensor_shape_signature.size() == 0 ||
+           tensor_shape_signature.size() == tensor_shape.size());
+
     // Shape of Output
     auto output_shape = std::make_unique<loco::TensorShape>();
-    const std::vector<int32_t> &output_dims = tensor.shape; // in NHWC
+    const auto &output_dims = tensor_shape; // in NHWC
     output_shape->rank(output_dims.size());
     for (uint32_t r = 0; r < output_dims.size(); ++r)
-      output_shape->dim(r) = loco::Dimension(output_dims[r]);
+    {
+      if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
+        output_shape->dim(r).unset();
+      else
+        output_shape->dim(r).set(output_dims[r]);
+    }
     graph_output->shape(std::move(output_shape));
 
     // Data type
-    auto dtype = luci::luci_datatype(tensor.type);
+    auto dtype = luci::luci_datatype(tensor->type());
     graph_output->dtype(dtype);
   }
 }
@@ -288,6 +342,59 @@ std::unique_ptr<Module> Importer::importModule(const circle::Model *model) const
 
   post_import_graph(module.get(), reader);
 
+  // Initialize 'source_table'
+  auto circle_metadata = std::make_unique<luci::CircleImportMetadata>(reader);
+  if (circle_metadata->source_table().size() > 0)
+  {
+    // If there is 'source_table' metadata in circle model, copy the table.
+    module->source_table(circle_metadata->source_table());
+  }
+  else
+  {
+    // If there is no 'source_table' metadata in circle model,
+    // create new table with circle nodes.
+    std::map<uint32_t, std::string> table;
+
+    // NOTE Only first subgraph is considered
+    for (auto node : loco::all_nodes(module->graph(0)))
+    {
+      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+      // Virtual nodes may not have id
+      if (!has_node_id(circle_node))
+        continue;
+
+      assert(table.find(get_node_id(circle_node)) == table.end());
+      table.insert({get_node_id(circle_node), circle_node->name()});
+    }
+
+    module->source_table(table);
+  }
+
+  // Add execution_plan annotations
+  if (circle_metadata->execution_plan_table().size() > 0)
+  {
+    auto execution_plan_table = circle_metadata->execution_plan_table();
+    auto node_position = 0;
+    for (auto node : loco::postorder_traversal(loco::output_nodes(module->graph())))
+    {
+      if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
+      {
+        if (execution_plan_table.count(node_position) == 0)
+          continue;
+
+        auto node_plan = execution_plan_table[node_position];
+        assert(node_plan.size() > 0);
+
+        luci::add_execution_plan(
+          circle_node,
+          luci::CircleNodeExecutionPlan(
+            node_plan[0], std::vector<uint32_t>(node_plan.begin() + 1, node_plan.end())));
+      }
+      node_position++;
+    }
+  }
+
   return module;
 }
 
diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp
index 8366546f0..91e4860ea 100644
--- a/compiler/luci/import/src/Importer.test.cpp
+++ b/compiler/luci/import/src/Importer.test.cpp
@@ -16,13 +16,338 @@
 
 #include "luci/Importer.h"
 
-#include <loco.h>
+#include <luci/IR/CircleNode.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
 
 #include <gtest/gtest.h>
+#include <mio/circle/schema_generated.h>
+#include <flatbuffers/flatbuffers.h>
 
-TEST(TensorFlowLiteImport, Dummy)
+TEST(CircleImport, Dummy)
 {
   luci::Importer import;
 
   SUCCEED();
 }
+
+// helpers for flatbuffers
+namespace
+{
+
+struct BasicCircleModel
+{
+  std::unique_ptr<circle::ModelT> model;
+
+  BasicCircleModel()
+  {
+    model = std::make_unique<circle::ModelT>();
+    model->buffers.push_back(std::make_unique<circle::BufferT>());
+    model->description = "nnpackage";
+    model->version = 0;
+  }
+
+  uint32_t add_subgraph()
+  {
+    model->subgraphs.push_back(std::make_unique<circle::SubGraphT>());
+    model->subgraphs.back()->name = "";
+    model->subgraphs.back()->data_format = circle::DataFormat_CHANNELS_LAST;
+    return model->subgraphs.size() - 1;
+  }
+
+  void add_subgraph_inputs(uint32_t subgraph_id, const std::vector<uint32_t> &inputs)
+  {
+    model->subgraphs[subgraph_id]->inputs.assign(inputs.begin(), inputs.end());
+  }
+
+  void add_subgraph_outputs(uint32_t subgraph_id, const std::vector<uint32_t> &outputs)
+  {
+    model->subgraphs[subgraph_id]->outputs.assign(outputs.begin(), outputs.end());
+  }
+
+  uint32_t add_builtin_opcode(circle::BuiltinOperator opcode)
+  {
+    uint32_t id = model->operator_codes.size();
+    model->operator_codes.push_back(std::make_unique<circle::OperatorCodeT>());
+    model->operator_codes[id]->deprecated_builtin_code = opcode;
+    model->operator_codes[id]->builtin_code = opcode;
+    model->operator_codes[id]->version = 1;
+    return id;
+  }
+
+  uint32_t add_buffer()
+  {
+    model->buffers.push_back(std::make_unique<circle::BufferT>());
+    return model->buffers.size() - 1;
+  }
+
+  uint32_t add_float_tensor(uint32_t graph_id, const std::vector<int32_t> &shape,
+                            uint32_t buffer_id)
+  {
+    auto &graph = model->subgraphs[graph_id];
+    uint32_t idx = graph->tensors.size();
+    graph->tensors.push_back(std::make_unique<circle::TensorT>());
+    graph->tensors[idx]->shape = shape;
+    graph->tensors[idx]->type = circle::TensorType_FLOAT32;
+    graph->tensors[idx]->buffer = buffer_id;
+    graph->tensors[idx]->name = std::to_string(idx);
+    graph->tensors[idx]->quantization.reset(nullptr);
+    graph->tensors[idx]->is_variable = false;
+    graph->tensors[idx]->sparsity.reset(nullptr);
+    (void)graph->tensors[idx]->shape_signature;
+    return idx;
+  }
+
+  uint32_t add_builtin_operator(uint32_t graph_id, uint32_t opcode_id,
+                                const std::vector<uint32_t> &inputs,
+                                const std::vector<uint32_t> &outputs)
+  {
+    auto &graph = model->subgraphs[graph_id];
+    auto idx = graph->operators.size();
+    graph->operators.push_back(std::make_unique<circle::OperatorT>());
+    graph->operators[idx]->opcode_index = opcode_id;
+    graph->operators[idx]->inputs.assign(inputs.begin(), inputs.end());
+    graph->operators[idx]->outputs.assign(outputs.begin(), outputs.end());
+    graph->operators[idx]->builtin_options.Reset();
+    (void)graph->operators[idx]->custom_options;
+    graph->operators[idx]->custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS;
+    (void)graph->operators[idx]->mutating_variable_inputs;
+    (void)graph->operators[idx]->intermediates;
+    return idx;
+  }
+
+  uint32_t add_plan_metadata(uint32_t buffer_id)
+  {
+    static_assert(sizeof(uint32_t) == 4, "metadata is stored in blocks of 32 bit unsiged ints");
+    uint32_t idx = model->metadata.size();
+    model->metadata.push_back(std::make_unique<circle::MetadataT>());
+    model->metadata[idx]->name = "ONE_execution_plan_table";
+    model->metadata[idx]->buffer = buffer_id;
+    model->buffers[buffer_id]->data.resize(4);
+    auto &entries_count = *reinterpret_cast<uint32_t *>(model->buffers[buffer_id]->data.data());
+    entries_count = 0;
+    return idx;
+  }
+
+  void add_plan_entry(uint32_t plan_buffer_id, uint32_t execution_order,
+                      const std::vector<uint32_t> &offsets)
+  {
+    auto &buffer = model->buffers[plan_buffer_id]->data;
+    auto old_size = buffer.size();
+    assert(old_size % 4 == 0);
+    assert(old_size > 0);
+
+    // Allocate space for new entry:
+    // 4 bytes for entry id
+    // 4 bytes for entry size
+    // 4 bytes for execution order
+    // offsets.size() * 4 bytes for offsets
+    buffer.resize(old_size + 12 + offsets.size() * 4);
+    uint32_t *number_of_entries_ptr = reinterpret_cast<uint32_t *>(buffer.data());
+    *number_of_entries_ptr += 1;
+
+    uint32_t *entry_data_ptr = reinterpret_cast<uint32_t *>(buffer.data() + old_size);
+
+    entry_data_ptr[0] = *number_of_entries_ptr - 1; // entry id
+    entry_data_ptr[1] = 1 + offsets.size();         // entry size
+    entry_data_ptr[2] = execution_order;            // execution order
+    std::copy(offsets.begin(), offsets.end(), entry_data_ptr + 3);
+  }
+};
+
+struct SimpleRELUModel : public BasicCircleModel
+{
+  SimpleRELUModel()
+  {
+    auto relu_opcode_id = add_builtin_opcode(circle::BuiltinOperator_RELU);
+
+    uint32_t subgraph_id = add_subgraph();
+
+    auto input_buffer_id = add_buffer();
+    auto output_buffer_id = add_buffer();
+
+    auto input_tensor_idx = add_float_tensor(subgraph_id, {1, 2, 3, 4}, input_buffer_id);
+    auto output_tensor_idx = add_float_tensor(subgraph_id, {1, 2, 3, 4}, output_buffer_id);
+
+    add_subgraph_inputs(subgraph_id, {input_tensor_idx});
+    add_subgraph_outputs(subgraph_id, {output_tensor_idx});
+
+    add_builtin_operator(subgraph_id, relu_opcode_id, {0}, {1});
+  }
+};
+
+} // namespace
+
+/**
+ * This test checks that one op RELU model with execution plan is successfully imported
+ */
+TEST(CircleImport, simple_plan)
+{
+  SimpleRELUModel model;
+  auto metadata_buffer_id = model.add_buffer();
+  model.add_plan_metadata(metadata_buffer_id);
+
+  model.add_plan_entry(metadata_buffer_id, 1, {100});
+  model.add_plan_entry(metadata_buffer_id, 2, {300});
+  model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+  flatbuffers::FlatBufferBuilder fbb;
+  auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+  circle::FinishModelBuffer(fbb, model_offset);
+
+  auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+  luci::Importer import;
+
+  auto luci_module = import.importModule(model_ptr);
+
+  auto main_graph = luci_module->graph();
+  for (int i = 0; i < main_graph->nodes()->size(); ++i)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(main_graph->nodes()->at(i));
+    switch (node->opcode())
+    {
+      case luci::CircleOpcode::CIRCLEINPUT:
+      {
+        ASSERT_TRUE(luci::has_execution_plan(node));
+        auto plan = luci::get_execution_plan(node);
+        ASSERT_EQ(plan.order_in_plan(), 1);
+        ASSERT_EQ(plan.offsets().size(), 1);
+        ASSERT_EQ(plan.offsets()[0], 100);
+        break;
+      }
+      case luci::CircleOpcode::CIRCLEOUTPUT:
+      {
+        ASSERT_TRUE(luci::has_execution_plan(node));
+        auto plan = luci::get_execution_plan(node);
+        ASSERT_EQ(plan.order_in_plan(), 3);
+        ASSERT_EQ(plan.offsets().size(), 1);
+        ASSERT_EQ(plan.offsets()[0], 200);
+        break;
+      }
+      case luci::CircleOpcode::RELU:
+      {
+        ASSERT_TRUE(luci::has_execution_plan(node));
+        auto plan = luci::get_execution_plan(node);
+        ASSERT_EQ(plan.order_in_plan(), 2);
+        ASSERT_EQ(plan.offsets().size(), 1);
+        ASSERT_EQ(plan.offsets()[0], 300);
+        break;
+      }
+      default:
+        FAIL();
+    }
+  }
+}
+
+/**
+ * This test checks that model with incomplete execution plan is successfully imported
+ */
+TEST(CircleImport, incomplete_plan_NEG)
+{
+  SimpleRELUModel model;
+  auto metadata_buffer_id = model.add_buffer();
+  model.add_plan_metadata(metadata_buffer_id);
+
+  model.add_plan_entry(metadata_buffer_id, 1, {100});
+
+  flatbuffers::FlatBufferBuilder fbb;
+  auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+  circle::FinishModelBuffer(fbb, model_offset);
+
+  auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+  luci::Importer import;
+
+  auto luci_module = import.importModule(model_ptr);
+
+  auto main_graph = luci_module->graph();
+  for (int i = 0; i < main_graph->nodes()->size(); ++i)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(main_graph->nodes()->at(i));
+    switch (node->opcode())
+    {
+      case luci::CircleOpcode::CIRCLEINPUT:
+      {
+        ASSERT_TRUE(luci::has_execution_plan(node));
+        auto plan = luci::get_execution_plan(node);
+        ASSERT_EQ(plan.order_in_plan(), 1);
+        ASSERT_EQ(plan.offsets().size(), 1);
+        ASSERT_EQ(plan.offsets()[0], 100);
+        break;
+      }
+      case luci::CircleOpcode::CIRCLEOUTPUT:
+      case luci::CircleOpcode::RELU:
+      {
+        ASSERT_FALSE(luci::has_execution_plan(node));
+        break;
+      }
+      default:
+        FAIL();
+    }
+  }
+}
+
+/**
+ * This test checks that corrupted execution plan induce exception
+ */
+TEST(CircleImport, corrupted_plan_NEG)
+{
+  SimpleRELUModel model;
+  auto metadata_buffer_id = model.add_buffer();
+  model.add_plan_metadata(metadata_buffer_id);
+
+  model.add_plan_entry(metadata_buffer_id, 1, {100});
+  model.add_plan_entry(metadata_buffer_id, 2, {300});
+  model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+  // corrupt data
+  *reinterpret_cast<uint32_t *>(model.model->buffers[metadata_buffer_id]->data.data()) = 4;
+
+  flatbuffers::FlatBufferBuilder fbb;
+  auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+  circle::FinishModelBuffer(fbb, model_offset);
+
+  auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+  luci::Importer import;
+
+  ASSERT_ANY_THROW(import.importModule(model_ptr));
+}
+
+/**
+ * This test checks that empty execution plan entry induce exception
+ */
+TEST(CircleImport, corrupted_plan_entry_NEG)
+{
+  SimpleRELUModel model;
+  auto metadata_buffer_id = model.add_buffer();
+  model.add_plan_metadata(metadata_buffer_id);
+
+  model.add_plan_entry(metadata_buffer_id, 1, {100});
+
+  // add corrupted entry with 0 size
+  {
+    auto &buffer = model.model->buffers[metadata_buffer_id]->data;
+    auto old_size = buffer.size();
+
+    // Allocate space for new entry:
+    // 4 bytes for entry id
+    // 4 bytes for entry size
+    buffer.resize(old_size + 8);
+    uint32_t *number_of_entries_ptr = reinterpret_cast<uint32_t *>(buffer.data());
+    *number_of_entries_ptr += 1;
+
+    uint32_t *entry_data_ptr = reinterpret_cast<uint32_t *>(buffer.data() + old_size);
+
+    entry_data_ptr[0] = *number_of_entries_ptr - 1; // entry id
+    entry_data_ptr[1] = 0;                          // entry size
+  }
+
+  model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+  flatbuffers::FlatBufferBuilder fbb;
+  auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+  circle::FinishModelBuffer(fbb, model_offset);
+
+  auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+  luci::Importer import;
+
+  ASSERT_ANY_THROW(import.importModule(model_ptr));
+}
diff --git a/compiler/luci/import/src/ImporterEx.cpp b/compiler/luci/import/src/ImporterEx.cpp
new file mode 100644
index 000000000..db585fd4d
--- /dev/null
+++ b/compiler/luci/import/src/ImporterEx.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Importer.h"
+#include "luci/ImporterEx.h"
+
+#include <foder/FileLoader.h>
+
+#include <memory>
+#include <iostream>
+
+namespace luci
+{
+
+std::unique_ptr<Module> ImporterEx::importVerifyModule(const std::string &input_path) const
+{
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data;
+
+  try
+  {
+    model_data = file_loader.load();
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    return nullptr;
+  }
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return nullptr;
+  }
+
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    return nullptr;
+  }
+
+  Importer importer;
+  return importer.importModule(circle_model);
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleAbs.cpp b/compiler/luci/import/src/Nodes/CircleAbs.cpp
index 3556dc7fa..2a1601a21 100644
--- a/compiler/luci/import/src/Nodes/CircleAbs.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAbs.cpp
@@ -24,11 +24,8 @@ namespace luci
 {
 bool CircleAbsGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
   // TODO Support type check
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleAbsGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleAdd.cpp b/compiler/luci/import/src/Nodes/CircleAdd.cpp
index b767d4af2..94cbdf081 100644
--- a/compiler/luci/import/src/Nodes/CircleAdd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAdd.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleAddGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleAddGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleArgMax.cpp b/compiler/luci/import/src/Nodes/CircleArgMax.cpp
index 10e8516f4..fd8a84289 100644
--- a/compiler/luci/import/src/Nodes/CircleArgMax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleArgMax.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleArgMaxGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleArgMaxGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleArgMin.cpp b/compiler/luci/import/src/Nodes/CircleArgMin.cpp
index 5ff534dbb..63ca8db03 100644
--- a/compiler/luci/import/src/Nodes/CircleArgMin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleArgMin.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleArgMinGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleArgMinGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
index ad011f71f..a351cf5e7 100644
--- a/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
@@ -23,10 +23,7 @@ namespace luci
 
 bool CircleAveragePool2DGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleAveragePool2DGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp
index 16ecebd5c..4c86399ce 100644
--- a/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleBCQFullyConnectedGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 5)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 5);
 }
 
 CircleNode *CircleBCQFullyConnectedGraphBuilder::build_node(const circle::OperatorT &op,
@@ -43,15 +40,6 @@ CircleNode *CircleBCQFullyConnectedGraphBuilder::build_node(const circle::Operat
   node->bias(inputs.at(3));
   node->weights_clusters(inputs.at(4));
 
-  // TODO Find and move to appropriate place for setting optional input
-  if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
-  {
-    // bias is not used for type inference, but node itself should have a type
-    bias->dtype(loco::DataType::FLOAT32);
-
-    // bias is not used for shape inference
-  }
-
   const auto *options = op.builtin_options.AsBCQFullyConnectedOptions();
   node->weights_hidden_size(options->weights_hidden_size);
   node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
diff --git a/compiler/luci/import/src/Nodes/CircleBCQGather.cpp b/compiler/luci/import/src/Nodes/CircleBCQGather.cpp
index 464f1ac18..ee1358197 100644
--- a/compiler/luci/import/src/Nodes/CircleBCQGather.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBCQGather.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleBCQGatherGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 4)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 4);
 }
 
 CircleNode *CircleBCQGatherGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp
index 330775691..390719061 100644
--- a/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp
@@ -23,10 +23,7 @@ namespace luci
 
 bool CircleBatchMatMulGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleBatchMatMulGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleBidirectionalSequenceLSTM.cpp b/compiler/luci/import/src/Nodes/CircleBidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..c04b957bb
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleBidirectionalSequenceLSTM.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleBidirectionalSequenceLSTM.h"
+
+#include <luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h>
+#include <luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleBidirectionalSequenceLSTMGraphBuilder::validate(const ValidateArgs &args) const
+{
+  if (args.op.inputs.size() != 48)
+    return false;
+  if (args.op.outputs.size() != 2)
+    return false;
+
+  return true;
+}
+
+CircleNode *CircleBidirectionalSequenceLSTMGraphBuilder::build_node(const BuildNodeArgs &bna) const
+{
+  auto *node = bna.context->graph()->nodes()->create<CircleBidirectionalSequenceLSTM>();
+  auto &inputs = bna.input_nodes;
+  node->input(inputs.at(0));
+
+  node->fw_input_to_input_weights(inputs.at(1)); // Optional
+  node->fw_input_to_cell_weights(inputs.at(2));
+  node->fw_input_to_forget_weights(inputs.at(3));
+  node->fw_input_to_output_weights(inputs.at(4));
+
+  node->fw_recurrent_to_input_weights(inputs.at(5)); // Optional
+  node->fw_recurrent_to_cell_weights(inputs.at(6));
+  node->fw_recurrent_to_forget_weights(inputs.at(7));
+  node->fw_recurrent_to_output_weights(inputs.at(8));
+
+  node->fw_cell_to_input_weights(inputs.at(9));   // Optional
+  node->fw_cell_to_forget_weights(inputs.at(10)); // Optional
+  node->fw_cell_to_output_weights(inputs.at(11)); // Optional
+
+  node->fw_input_gate_bias(inputs.at(12)); // Optional
+  node->fw_forget_gate_bias(inputs.at(13));
+  node->fw_cell_gate_bias(inputs.at(14));
+  node->fw_output_gate_bias(inputs.at(15));
+
+  node->fw_projection_weights(inputs.at(16)); // Optional
+  node->fw_projection_bias(inputs.at(17));    // Optional
+
+  node->bw_input_to_input_weights(inputs.at(18)); // Optional
+  node->bw_input_to_cell_weights(inputs.at(19));
+  node->bw_input_to_forget_weights(inputs.at(20));
+  node->bw_input_to_output_weights(inputs.at(21));
+
+  node->bw_recurrent_to_input_weights(inputs.at(22)); // Optional
+  node->bw_recurrent_to_cell_weights(inputs.at(23));
+  node->bw_recurrent_to_forget_weights(inputs.at(24));
+  node->bw_recurrent_to_output_weights(inputs.at(25));
+
+  node->bw_cell_to_input_weights(inputs.at(26));  // Optional
+  node->bw_cell_to_forget_weights(inputs.at(27)); // Optional
+  node->bw_cell_to_output_weights(inputs.at(28)); // Optional
+
+  node->bw_input_gate_bias(inputs.at(29)); // Optional
+  node->bw_forget_gate_bias(inputs.at(30));
+  node->bw_cell_gate_bias(inputs.at(31));
+  node->bw_output_gate_bias(inputs.at(32));
+
+  node->bw_projection_weights(inputs.at(33)); // Optional
+  node->bw_projection_bias(inputs.at(34));    // Optional
+
+  node->fw_activation_state(inputs.at(35));
+  node->fw_cell_state(inputs.at(36));
+  node->bw_activation_state(inputs.at(37));
+  node->bw_cell_state(inputs.at(38));
+
+  node->auxillary_input(inputs.at(39));                      // Optional
+  node->fw_auxillary_input_to_input_weights(inputs.at(40));  // Optional
+  node->fw_auxillary_input_to_forget_weights(inputs.at(41)); // Optional
+  node->fw_auxillary_input_to_cell_weights(inputs.at(42));   // Optional
+  node->fw_auxillary_input_to_output_weights(inputs.at(43)); // Optional
+
+  node->bw_auxillary_input_to_input_weights(inputs.at(44));  // Optional
+  node->bw_auxillary_input_to_forget_weights(inputs.at(45)); // Optional
+  node->bw_auxillary_input_to_cell_weights(inputs.at(46));   // Optional
+  node->bw_auxillary_input_to_output_weights(inputs.at(47)); // Optional
+
+  const auto *options = bna.op.builtin_options.AsBidirectionalSequenceLSTMOptions();
+  node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+  node->cell_clip(options->cell_clip);
+  node->proj_clip(options->proj_clip);
+  node->merge_outputs(options->merge_outputs);
+  node->time_major(options->time_major);
+  node->asymmetric_quantize_inputs(options->asymmetric_quantize_inputs);
+
+  return node;
+}
+
+CircleNode *CircleBidirectionalSequenceLSTMGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleBidirectionalSequenceLSTMOut>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleCast.cpp b/compiler/luci/import/src/Nodes/CircleCast.cpp
index 7bdb63044..acde823b1 100644
--- a/compiler/luci/import/src/Nodes/CircleCast.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCast.cpp
@@ -30,25 +30,26 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const
 {
   LOGGER(l);
 
+  if (!GraphBuilder::validate(args, 1))
+    return false;
+
   auto settings = luci::UserSettings::settings();
 
   const auto &inputs = args.op.inputs;
   const auto &outputs = args.op.outputs;
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
-    return false;
 
   // NOTE real models do have type mismatch
   const auto *options = args.op.builtin_options.AsCastOptions();
   if (options != nullptr)
   {
-    const auto &tensors = args.reader.tensors();
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    const auto tensors = args.reader.tensors();
+    const auto output_tensor = tensors[outputs[0]];
+    assert(output_tensor != nullptr);
     auto name = tensor_name(output_tensor);
 
-    const auto &tensor_in = tensors.at(inputs.at(0));
-    if (tensor_in->type != options->in_data_type)
+    const auto tensor_in = tensors.at(inputs.at(0));
+    assert(tensor_in != nullptr);
+    if (tensor_in->type() != options->in_data_type)
     {
       if (settings->get(luci::UserSettings::Key::DisableValidation))
       {
@@ -58,7 +59,7 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const
         return false;
     }
     const auto &tensor_out = tensors.at(outputs[0]);
-    if (tensor_out->type != options->out_data_type)
+    if (tensor_out->type() != options->out_data_type)
     {
       if (settings->get(luci::UserSettings::Key::DisableValidation))
       {
diff --git a/compiler/luci/import/src/Nodes/CircleCeil.cpp b/compiler/luci/import/src/Nodes/CircleCeil.cpp
index 2e1aaa295..d439f41cd 100644
--- a/compiler/luci/import/src/Nodes/CircleCeil.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCeil.cpp
@@ -25,16 +25,8 @@ namespace luci
 
 bool CircleCeilGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
-    return false;
-
   // TODO dtype check
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleCeilGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp
index fad7a0757..88f2ae3d0 100644
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -23,14 +23,17 @@
 #include <oops/UserExn.h>
 
 #include <cassert>
+#include <ostream>
+#include <string>
+#include <vector>
 
 namespace
 {
 
-std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
+std::ostream &operator<<(std::ostream &os, const luci::VectorWrapper<int32_t> &vect)
 {
   uint32_t seq = 0;
-  for (auto &v : vect)
+  for (const auto &v : vect)
   {
     if (seq)
       os << ", ";
@@ -40,17 +43,20 @@ std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
   return os;
 }
 
-} // namespace
-
-namespace luci
-{
+using namespace luci;
 
 template <loco::DataType DT>
-static void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_elements,
-                      CircleConst *const_node)
+void copy_data(const VectorWrapper<uint8_t> &raw_data, uint32_t num_elements,
+               CircleConst *const_node)
 {
   using T = typename loco::DataTypeImpl<DT>::Type;
 
+  // TODO calculate the exact buffer size of sparse tensor
+  if (const_node->sparsityparam())
+  {
+    num_elements = raw_data.size() / sizeof(T);
+  }
+
   assert(raw_data.size() == num_elements * sizeof(T));
   const auto *data = reinterpret_cast<const T *>(raw_data.data());
 
@@ -61,23 +67,69 @@ static void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_element
   }
 }
 
-//
-// circleconst_from_tensor() ?
-//
-CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index)
+template <>
+void copy_data<loco::DataType::STRING>(const VectorWrapper<uint8_t> &raw_data,
+                                       uint32_t num_elements, CircleConst *const_node)
 {
+  assert(const_node->sparsityparam() == nullptr);
+
+  const auto *data = reinterpret_cast<const char *>(raw_data.data());
+  const auto *i32d = reinterpret_cast<const int32_t *>(raw_data.data());
+
+  // de-serialize string data
+  //   int32_t count
+  //   int32_t offsets[count + 1]
+  //   string  values[count]
+  assert(static_cast<uint32_t>(*i32d) == num_elements);
+  i32d++; // skip count
+
+  std::vector<int32_t> offsets;
+  offsets.push_back(*i32d++);
+  for (uint32_t i = 0; i < num_elements; ++i)
+  {
+    offsets.push_back(*i32d++);
+  }
+  assert(offsets.size() == num_elements + 1);
+
+  const_node->size<loco::DataType::STRING>(num_elements);
+  for (uint32_t i = 0; i < num_elements; ++i)
+  {
+    int32_t start = offsets[i];
+    int32_t next = offsets[i + 1];
+
+    std::string value(data + start, next - start);
+    const_node->at<loco::DataType::STRING>(i) = value;
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index,
+                                          GraphBuilderContext *context) const
+{
+  assert(tensor_index >= 0);
   LOGGER(l);
 
   auto graph = context->graph();
   auto reader = context->reader();
-  const auto &tensors = reader->tensors();
-  const circle::TensorT &const_tensor = *tensors[tensor_index];
+  const auto tensors = reader->tensors();
+  const auto const_tensor = tensors[tensor_index];
+  assert(const_tensor != nullptr);
+  if (const_tensor->is_variable())
+  {
+    // Create CircleVariable for variable
+    return nullptr;
+  }
 
-  const std::vector<uint8_t> &buffer = reader->buffers()[const_tensor.buffer]->data;
-  std::vector<int32_t> const_dims = const_tensor.shape; // in NHWC
+  assert(reader->buffers()[const_tensor->buffer()] != nullptr);
+  const auto buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+  const auto const_dims = wrap(const_tensor->shape()); // in NHWC
   if (const_dims.size() == 0 && buffer.empty())
   {
-    // unknown shape tensor
+    // unknown shape tensor and scalar tensor
     return nullptr;
   }
 
@@ -108,12 +160,16 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
           << const_dims << std::endl;
   if (num_elements > 0)
   {
-    switch (luci_datatype(const_tensor.type))
+    switch (luci_datatype(const_tensor->type()))
     {
       case loco::DataType::FLOAT32:
         copy_data<loco::DataType::FLOAT32>(buffer, num_elements, const_node);
         break;
 
+      case loco::DataType::FLOAT16:
+        copy_data<loco::DataType::FLOAT16>(buffer, num_elements, const_node);
+        break;
+
       case loco::DataType::U8:
         copy_data<loco::DataType::U8>(buffer, num_elements, const_node);
         break;
@@ -138,9 +194,13 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
         copy_data<loco::DataType::BOOL>(buffer, num_elements, const_node);
         break;
 
+      case loco::DataType::STRING:
+        copy_data<loco::DataType::STRING>(buffer, num_elements, const_node);
+        break;
+
       default:
         throw oops::UserExn("Unsupported tensor type",
-                            circle::EnumNameTensorType(const_tensor.type));
+                            circle::EnumNameTensorType(const_tensor->type()));
     }
   }
 
diff --git a/compiler/luci/import/src/Nodes/CircleConv2D.cpp b/compiler/luci/import/src/Nodes/CircleConv2D.cpp
index 9516ef16a..8cbecdc00 100644
--- a/compiler/luci/import/src/Nodes/CircleConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConv2D.cpp
@@ -28,10 +28,7 @@ namespace luci
 bool CircleConv2DGraphBuilder::validate(const ValidateArgs &args) const
 {
   // Circle Conv2D may not have a bias but we won't support this
-  if (args.op.inputs.size() != 3)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 3);
 }
 
 CircleNode *CircleConv2DGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleCos.cpp b/compiler/luci/import/src/Nodes/CircleCos.cpp
index 27d60c62c..9705202ee 100644
--- a/compiler/luci/import/src/Nodes/CircleCos.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCos.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleCosGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleCosGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleCustom.cpp b/compiler/luci/import/src/Nodes/CircleCustom.cpp
index d541ee87b..4e78d5fb7 100644
--- a/compiler/luci/import/src/Nodes/CircleCustom.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCustom.cpp
@@ -27,62 +27,41 @@ bool CircleCustomGraphBuilder::validate(const ValidateArgs &) const
   return true;
 }
 
-void CircleCustomGraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleCustomGraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
+  uint32_t input_count = bna.op.inputs.size();
+  uint32_t output_count = bna.op.outputs.size();
 
-  auto graph = context->graph();
+  auto *node = bna.context->graph()->nodes()->create<CircleCustom>(input_count, output_count);
 
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  // Create CircleCustom
-  const auto &opcodes = context->reader()->opcodes();
-  const uint32_t opcode_index = op.opcode_index;
-  const circle::OperatorCodeT &opcode = *opcodes[opcode_index];
-
-  auto *node = graph->nodes()->create<CircleCustom>(inputs.size());
-  uint32_t input_idx = 0;
-  for (const int32_t input_tensor_index : inputs)
+  for (uint32_t idx = 0; idx < input_count; ++idx)
   {
-    node->inputs(input_idx++, context->nodefinder()->node(input_tensor_index));
+    node->inputs(idx, bna.input_nodes[idx]);
   }
-  node->custom_options(std::vector<uint8_t>{op.custom_options.begin(), op.custom_options.end()});
-  node->custom_code(opcode.custom_code);
-  // Operator version of custom is always 1, so do nothing
 
-  uint32_t output_count = outputs.size();
+  const auto opcodes = bna.context->reader()->opcodes();
+  const uint32_t opcode_index = bna.op.opcode_index;
+  const auto opcode = opcodes[opcode_index];
+  assert(opcode != nullptr);
 
-  assert(output_count > 0);
-  {
-    // Let's use attributes from output 0 for this node
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->dtype(luci_datatype(output_tensor.type));
-  }
+  node->custom_options(
+    std::vector<uint8_t>{bna.op.custom_options.begin(), bna.op.custom_options.end()});
+  assert(opcode->custom_code() != nullptr);
+  node->custom_code(opcode->custom_code()->c_str());
 
-  // Create virtual outputs of Custom
-  for (uint32_t n = 0; n < output_count; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+  // NOTE Operator version of custom is always 1
 
-    auto *nodeout = graph->nodes()->create<CircleCustomOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
+  return node;
+}
+
+CircleNode *CircleCustomGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleCustomOut>();
 
-    nodeout->input(node);
-    nodeout->index(n);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
 
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleDensify.cpp b/compiler/luci/import/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..0a4b2186f
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleDensify.h"
+
+#include <luci/IR/Nodes/CircleDensify.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleDensifyGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleDensifyGraphBuilder::build_node(const circle::OperatorT &,
+                                                  const std::vector<CircleNode *> &inputs,
+                                                  loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleDensify>();
+  node->input(inputs.at(0));
+
+  // No options for Densify
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
index 49d31bb99..83fc2e37d 100644
--- a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
@@ -27,20 +27,17 @@ namespace luci
 
 bool CircleDepthToSpaceGraphBuilder::validate(const ValidateArgs &args) const
 {
+  if (!GraphBuilder::validate(args, 1))
+    return false;
+
   const auto &inputs = args.op.inputs;
   const auto &outputs = args.op.outputs;
 
   const auto *options = args.op.builtin_options.AsDepthToSpaceOptions();
+  const auto tensors = args.reader.tensors();
+  assert(tensors[outputs[0]] != nullptr && tensors[inputs.at(0)] != nullptr);
 
-  if (inputs.size() != 1)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-
-  if (tensors[outputs[0]]->type != tensors[inputs.at(0)]->type)
+  if (tensors[outputs[0]]->type() != tensors[inputs.at(0)]->type())
   {
     return false;
   }
diff --git a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
index 53f85f2f5..a24e4160d 100644
--- a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -32,6 +32,34 @@ bool CircleDepthwiseConv2DGraphBuilder::validate(const ValidateArgs &args) const
   if (args.op.outputs.size() != 1)
     return false;
 
+  const auto tensors = args.reader.tensors();
+
+  // input shape
+  const auto input = tensors.at(args.op.inputs.at(0));
+  assert(input != nullptr);
+  const auto input_shape = wrap(input->shape());
+
+  // input shape must be rank 4
+  if (input_shape.size() != 4)
+    return false;
+
+  // filter shape
+  const auto filter = tensors.at(args.op.inputs.at(1));
+  assert(filter != nullptr);
+  const auto filter_shape = wrap(filter->shape());
+
+  // filter shape must be rank 4
+  if (filter_shape.size() != 4)
+    return false;
+
+  // multiplier
+  const auto *options = args.op.builtin_options.AsDepthwiseConv2DOptions();
+  const auto &multiplier = options->depth_multiplier;
+
+  // filter represents as [1, H, W, C*M] where M is multiplier.
+  if (filter_shape.at(3) != input_shape.at(3) * multiplier)
+    return false;
+
   return true;
 }
 
diff --git a/compiler/luci/import/src/Nodes/CircleDequantize.cpp b/compiler/luci/import/src/Nodes/CircleDequantize.cpp
new file mode 100644
index 000000000..3db546bd0
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleDequantize.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleDequantize.h"
+
+#include <luci/IR/Nodes/CircleDequantize.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleDequantizeGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleDequantizeGraphBuilder::build_node(const circle::OperatorT &,
+                                                     const std::vector<CircleNode *> &inputs,
+                                                     loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleDequantize>();
+  node->input(inputs.at(0));
+
+  // No options for Dequantize
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleDiv.cpp b/compiler/luci/import/src/Nodes/CircleDiv.cpp
index 615c224d7..7ea1afd95 100644
--- a/compiler/luci/import/src/Nodes/CircleDiv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDiv.cpp
@@ -23,13 +23,7 @@ namespace luci
 
 bool CircleDivGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleDivGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleElu.cpp b/compiler/luci/import/src/Nodes/CircleElu.cpp
index 919e95ee4..e5d7a4c7a 100644
--- a/compiler/luci/import/src/Nodes/CircleElu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleElu.cpp
@@ -25,27 +25,32 @@ namespace luci
 
 bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
-  if (outputs.size() != 1)
-    return false;
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
 
-  switch (tensor->type)
+  switch (tensor->type())
   {
+    case circle::TensorType_FLOAT64:
+      break;
     case circle::TensorType_FLOAT32:
       break;
+    case circle::TensorType_INT16:
+      break;
+    case circle::TensorType_UINT8:
+      break;
     default:
       return false;
   }
 
-  if (tensors[outputs[0]]->type != tensor->type)
+  assert(tensors[outputs[0]] != nullptr);
+  if (tensors[outputs[0]]->type() != tensor->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleEqual.cpp b/compiler/luci/import/src/Nodes/CircleEqual.cpp
index 1db33b8ac..b326d9b5d 100644
--- a/compiler/luci/import/src/Nodes/CircleEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleEqual.cpp
@@ -25,16 +25,14 @@ namespace luci
 
 bool CircleEqualGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
     return false;
-  }
 
-  const auto &tensors = args.reader.tensors();
+  const auto &inputs = args.op.inputs;
+  const auto tensors = args.reader.tensors();
 
-  return tensors[inputs.at(0)]->type == tensors[inputs.at(1)]->type;
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  return tensors[inputs.at(0)]->type() == tensors[inputs.at(1)]->type();
 }
 
 CircleNode *CircleEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleExp.cpp b/compiler/luci/import/src/Nodes/CircleExp.cpp
index 2c031d6b3..82c26f0e5 100644
--- a/compiler/luci/import/src/Nodes/CircleExp.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExp.cpp
@@ -25,19 +25,24 @@ namespace luci
 
 bool CircleExpGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
+  const auto &inputs = args.op.inputs;
   // input type check
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
   {
     case circle::TensorType_FLOAT16:
     case circle::TensorType_FLOAT32:
     case circle::TensorType_FLOAT64:
       break;
+    // Additional support for quantized tensors
+    case circle::TensorType_UINT8:
+    case circle::TensorType_INT16:
+      break;
     // TODO support TensorType_COMPLEX64, complex128, bfloat16
     default:
       return false;
diff --git a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
index ab537c710..67d9b7e9e 100644
--- a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
@@ -25,16 +25,14 @@ namespace luci
 
 bool CircleExpandDimsGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
     return false;
-  }
 
-  const auto &tensors = args.reader.tensors();
+  const auto &inputs = args.op.inputs;
+  const auto tensors = args.reader.tensors();
 
-  return tensors[inputs.at(1)]->type == circle::TensorType_INT32;
+  assert(tensors[inputs.at(1)] != nullptr);
+  return tensors[inputs.at(1)]->type() == circle::TensorType_INT32;
 }
 
 CircleNode *CircleExpandDimsGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/import/src/Nodes/CircleFakeQuant.cpp
new file mode 100644
index 000000000..7cf40b225
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleFakeQuant.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleFakeQuant.h"
+
+#include <luci/IR/Nodes/CircleFullyConnected.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleFakeQuantGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleFakeQuantGraphBuilder::build_node(const circle::OperatorT &op,
+                                                    const std::vector<CircleNode *> &inputs,
+                                                    loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleFakeQuant>();
+  node->inputs(inputs.at(0));
+
+  const auto *options = op.builtin_options.AsFakeQuantOptions();
+  node->min(options->min);
+  node->max(options->max);
+  node->num_bits(options->num_bits);
+  node->narrow_range(options->narrow_range);
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleFill.cpp b/compiler/luci/import/src/Nodes/CircleFill.cpp
index 95d5b876b..9aacddcbe 100644
--- a/compiler/luci/import/src/Nodes/CircleFill.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFill.cpp
@@ -23,13 +23,7 @@ namespace luci
 
 bool CircleFillGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleFillGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleFloor.cpp b/compiler/luci/import/src/Nodes/CircleFloor.cpp
index ce756b3b1..9651259c7 100644
--- a/compiler/luci/import/src/Nodes/CircleFloor.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloor.cpp
@@ -25,16 +25,8 @@ namespace luci
 
 bool CircleFloorGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
-    return false;
-
   // TODO dtype check
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleFloorGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
index 55f385d60..67eeddf91 100644
--- a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
@@ -25,28 +25,23 @@ namespace luci
 
 bool CircleFloorDivGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
-    return false;
-  }
-
-  if (outputs.size() != 1)
-  {
+  if (!GraphBuilder::validate(args, 2))
     return false;
-  }
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in_0 = tensors.at(inputs.at(0));
-  const auto &tensor_in_1 = tensors.at(inputs.at(1));
-  const auto &tensor_out = tensors.at(outputs[0]);
-
-  if (tensor_in_0->type != tensor_in_1->type)
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in_0 = tensors.at(inputs.at(0));
+  const auto tensor_in_1 = tensors.at(inputs.at(1));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_in_0 != nullptr);
+  assert(tensor_in_1 != nullptr);
+  assert(tensor_out != nullptr);
+
+  if (tensor_in_0->type() != tensor_in_1->type())
     return false;
 
-  if (tensor_out->type != tensor_in_1->type)
+  if (tensor_out->type() != tensor_in_1->type())
   {
     return false;
   }
diff --git a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
index 2101e417e..d2a275b62 100644
--- a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
@@ -25,17 +25,15 @@ namespace luci
 
 bool CircleFloorModGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in_0 = tensors.at(inputs.at(0));
-  const auto &tensor_in_1 = tensors.at(inputs.at(1));
-  if (tensor_in_0->type != tensor_in_1->type)
+  const auto &inputs = args.op.inputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in_0 = tensors.at(inputs.at(0));
+  const auto tensor_in_1 = tensors.at(inputs.at(1));
+  assert(tensor_in_0 != nullptr && tensor_in_1 != nullptr);
+  if (tensor_in_0->type() != tensor_in_1->type())
     return false;
 
   // TODO dtype check
diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
index 65a863bde..cc7be1693 100644
--- a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
@@ -27,10 +27,7 @@ namespace luci
 
 bool CircleFullyConnectedGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 3)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 3);
 }
 
 CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT &op,
@@ -42,23 +39,10 @@ CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT
   node->weights(inputs.at(1));
   node->bias(inputs.at(2)); // bias is optional
 
-  // TODO Find and move to appropriate place for setting optional input
-  if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
-  {
-    // bias is not used for type inference, but node itself should have a type
-    bias->dtype(loco::DataType::FLOAT32);
-
-    // bias is not used for shape inference
-  }
-
   const auto *options = op.builtin_options.AsFullyConnectedOptions();
   node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
-  if (options->weights_format != circle::FullyConnectedOptionsWeightsFormat_DEFAULT)
-  {
-    throw oops::UserExn(
-        "Unsupported weights format",
-        circle::EnumNameFullyConnectedOptionsWeightsFormat(options->weights_format));
-  }
+  node->weights_format(luci_weights_format(options->weights_format));
+  node->keep_num_dims(options->keep_num_dims);
 
   return node;
 }
diff --git a/compiler/luci/import/src/Nodes/CircleGather.cpp b/compiler/luci/import/src/Nodes/CircleGather.cpp
index 75447a38a..8317a3340 100644
--- a/compiler/luci/import/src/Nodes/CircleGather.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGather.cpp
@@ -26,18 +26,14 @@ namespace luci
 
 bool CircleGatherGraphBuilder::validate(const ValidateArgs &args) const
 {
+  if (!GraphBuilder::validate(args, 2))
+    return false;
+
   const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
   const auto *options = args.op.builtin_options.AsGatherOptions();
 
   int32_t axis = options->axis;
 
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
   if (axis < 0)
     axis += inputs.size();
 
diff --git a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
index 981adbf63..d336878ad 100644
--- a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
@@ -27,19 +27,15 @@ namespace luci
 
 bool CircleGatherNdGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  if (outputs.size() != 1)
-    return false;
-
-  auto &indices_tensor = args.reader.tensors()[inputs.at(1)];
+  const auto &inputs = args.op.inputs;
+  auto indices_tensor = args.reader.tensors()[inputs.at(1)];
+  assert(indices_tensor != nullptr);
 
-  if (!(indices_tensor->type == circle::TensorType::TensorType_INT32 ||
-        indices_tensor->type == circle::TensorType::TensorType_INT64))
+  if (!(indices_tensor->type() == circle::TensorType::TensorType_INT32 ||
+        indices_tensor->type() == circle::TensorType::TensorType_INT64))
   {
     return false;
   }
diff --git a/compiler/luci/import/src/Nodes/CircleGelu.cpp b/compiler/luci/import/src/Nodes/CircleGelu.cpp
new file mode 100644
index 000000000..89b325f82
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleGelu.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleGelu.h"
+
+#include <luci/IR/Nodes/CircleGelu.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleGeluGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleGeluGraphBuilder::build_node(const circle::OperatorT &op,
+                                               const std::vector<CircleNode *> &inputs,
+                                               loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleGelu>();
+  node->features(inputs.at(0));
+
+  const auto *options = op.builtin_options.AsGeluOptions();
+  node->approximate(options->approximate);
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleGreater.cpp b/compiler/luci/import/src/Nodes/CircleGreater.cpp
index 1ad0467e4..7f031b0ba 100644
--- a/compiler/luci/import/src/Nodes/CircleGreater.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreater.cpp
@@ -30,28 +30,26 @@ bool CircleGreaterGraphBuilder::validate(const ValidateArgs &args) const
 {
   LOGGER(l);
 
+  if (!GraphBuilder::validate(args, 2))
+    return false;
+
   auto settings = luci::UserSettings::settings();
 
   const auto &inputs = args.op.inputs;
   const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
 
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
     return false;
 
   // NOTE: real models do have output dtype NOT BOOL
-  if (tensors[outputs[0]]->type != circle::TensorType_BOOL)
+  assert(tensors[outputs[0]] != nullptr);
+  if (tensors[outputs[0]]->type() != circle::TensorType_BOOL)
   {
     if (settings->get(luci::UserSettings::Key::DisableValidation))
     {
-      const circle::TensorT &output_tensor = *tensors[outputs[0]];
+      const auto output_tensor = tensors[outputs[0]];
       auto name = tensor_name(output_tensor);
       WARN(l) << "Warning: import Greater(" << name << ") output dtype is not boolean";
     }
diff --git a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
index 0ac63b017..ac4ce62f5 100644
--- a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
@@ -25,27 +25,21 @@ namespace luci
 
 bool CircleGreaterEqualGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
     return false;
-  }
 
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
-
-  const auto &tensors = args.reader.tensors();
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
 
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
   {
     return false;
   }
 
-  return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+  assert(tensors[outputs[0]] != nullptr);
+  return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
 }
 
 CircleNode *CircleGreaterEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleHardSwish.cpp b/compiler/luci/import/src/Nodes/CircleHardSwish.cpp
new file mode 100644
index 000000000..47fc1c92c
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleHardSwish.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleHardSwish.h"
+
+#include <luci/IR/Nodes/CircleHardSwish.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleHardSwishGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleHardSwishGraphBuilder::build_node(const circle::OperatorT &,
+                                                    const std::vector<CircleNode *> &inputs,
+                                                    loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleHardSwish>();
+  node->features(inputs.at(0));
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleIf.cpp b/compiler/luci/import/src/Nodes/CircleIf.cpp
index db9ffe1cd..e8a50ff32 100644
--- a/compiler/luci/import/src/Nodes/CircleIf.cpp
+++ b/compiler/luci/import/src/Nodes/CircleIf.cpp
@@ -42,12 +42,13 @@ bool CircleIfGraphBuilder::validate(const ValidateArgs &args) const
     return false;
 
   // input 0 should be BOOL type
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  if (tensor->type != circle::TensorType_BOOL)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  if (tensor->type() != circle::TensorType_BOOL)
     return false;
 
-  const auto &shape = tensor->shape;
+  const auto shape = wrap(tensor->shape());
   if (shape.size() != 1 && shape.size() != 0)
     return false;
 
@@ -70,69 +71,34 @@ bool CircleIfGraphBuilder::validate(const ValidateArgs &args) const
  *                       \- CircleIfOut --- Node ---
  */
 
-void CircleIfGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleIfGraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
+  uint32_t input_count = bna.op.inputs.size() - 1;
+  uint32_t output_count = bna.op.outputs.size();
 
-  auto graph = context->graph();
+  auto *node = bna.context->graph()->nodes()->create<CircleIf>(input_count, output_count);
 
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  uint32_t input_count = inputs.size() - 1;
-  uint32_t output_count = outputs.size();
-
-  // Create CircleIf
-  CircleIf *node = graph->nodes()->create<CircleIf>(input_count, output_count);
-
-  node->cond(input_nodes[0]);
+  node->cond(bna.input_nodes[0]);
   for (uint32_t idx = 0; idx < input_count; ++idx)
   {
-    node->input(idx, input_nodes[idx + 1]);
+    node->input(idx, bna.input_nodes[idx + 1]);
   }
 
-  const auto *options = op.builtin_options.AsIfOptions();
+  const auto *options = bna.op.builtin_options.AsIfOptions();
   node->then_branch(options->then_subgraph_index);
   node->else_branch(options->else_subgraph_index);
 
-  assert(outputs.size() > 0);
-  {
-    // Lets use name of output 0 as If name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for If itself but to virtual outputs
-  }
-
-  // Create virtual outputs of If
-  for (uint32_t n = 0; n < output_count; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+  return node;
+}
 
-    auto *nodeout = graph->nodes()->create<CircleIfOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleIfGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleIfOut>();
 
-    nodeout->input(node);
-    nodeout->index(n);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
 
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp
index 6349fd3b7..977b53406 100644
--- a/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp
+++ b/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp
@@ -25,12 +25,8 @@ namespace luci
 
 bool CircleInstanceNormGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 3)
-    return false;
-
   // TODO check dtypes
-
-  return true;
+  return GraphBuilder::validate(args, 3);
 }
 
 CircleNode *CircleInstanceNormGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp
index e4fdc200c..7e1faedfb 100644
--- a/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp
+++ b/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp
@@ -25,20 +25,7 @@ namespace luci
 
 bool CircleL2NormalizeGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
-  {
-    return false;
-  }
-
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleL2NormalizeGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp
index 202d9d6fb..849c7c5ed 100644
--- a/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp
@@ -25,12 +25,8 @@ namespace luci
 
 bool CircleL2Pool2DGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
   // TODO check dtypes
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleL2Pool2DGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp
index ad4979f39..880fa6428 100644
--- a/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CircleLeakyReluGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleLeakyReluGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleLess.cpp b/compiler/luci/import/src/Nodes/CircleLess.cpp
index 506036908..5c5ae51e1 100644
--- a/compiler/luci/import/src/Nodes/CircleLess.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLess.cpp
@@ -25,23 +25,16 @@ namespace luci
 
 bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
     return false;
-  }
 
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
 
-  switch (tensor->type)
+  switch (tensor->type())
   {
     case circle::TensorType_FLOAT32:
     case circle::TensorType_FLOAT64:
@@ -56,12 +49,14 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
       return false;
   }
 
-  if (tensors[inputs.at(1)]->type != tensor->type)
+  assert(tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(1)]->type() != tensor->type())
   {
     return false;
   }
 
-  return tensors[outputs[0]]->type == circle::TensorType_BOOL;
+  assert(tensors[outputs[0]] != nullptr);
+  return tensors[outputs[0]]->type() == circle::TensorType_BOOL;
 }
 
 CircleNode *CircleLessGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
index 9b4f934a5..8a2aea8db 100644
--- a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
@@ -25,27 +25,21 @@ namespace luci
 
 bool CircleLessEqualGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
     return false;
-  }
 
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
-
-  const auto &tensors = args.reader.tensors();
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
 
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
   {
     return false;
   }
 
-  return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+  assert(tensors[outputs[0]] != nullptr);
+  return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
 }
 
 CircleNode *CircleLessEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp
index 0e32f62de..d03c47d12 100644
--- a/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp
@@ -25,16 +25,12 @@ namespace luci
 
 bool CircleLocalResponseNormalizationGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
   // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleLocalResponseNormalizationGraphBuilder::build_node(
-    const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+  const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
 {
   auto *node = graph->nodes()->create<CircleLocalResponseNormalization>();
   node->input(inputs.at(0));
diff --git a/compiler/luci/import/src/Nodes/CircleLog.cpp b/compiler/luci/import/src/Nodes/CircleLog.cpp
index 346fc43bb..f41926829 100644
--- a/compiler/luci/import/src/Nodes/CircleLog.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLog.cpp
@@ -25,18 +25,17 @@ namespace luci
 
 bool CircleLogGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-  if (args.op.outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
+  const auto &inputs = args.op.inputs;
   // input type check
   // Must be one of bfloat16, half, float32, float64, complex64, complex128.
   // Currently circle supports half(float16), float32, float64, complex64.
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
   {
     case circle::TensorType_FLOAT16:
     case circle::TensorType_FLOAT32:
diff --git a/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp
index ef69e868a..4361db691 100644
--- a/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp
@@ -25,12 +25,8 @@ namespace luci
 
 bool CircleLogSoftmaxGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
   // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleLogSoftmaxGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
index 7844da0f6..b61fb6f3e 100644
--- a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
@@ -25,16 +25,17 @@ namespace luci
 
 bool CircleLogicalAndGraphBuilder::validate(const ValidateArgs &args) const
 {
-  // Only BOOL type is allowed for inputs
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 2)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  const auto &tensors = args.reader.tensors();
+  // Only BOOL type is allowed for inputs
+  const auto &inputs = args.op.inputs;
+  const auto tensors = args.reader.tensors();
   for (auto input : inputs)
   {
-    const auto &tensor = tensors.at(input);
-    if (tensor->type != circle::TensorType::TensorType_BOOL)
+    const auto tensor = tensors.at(input);
+    assert(tensor != nullptr);
+    if (tensor->type() != circle::TensorType::TensorType_BOOL)
       return false;
   }
 
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
index 3758642e4..43e9ed39f 100644
--- a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
@@ -25,14 +25,15 @@ namespace luci
 
 bool CircleLogicalNotGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
   // Only BOOL type is allowed for the input
   const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  if (tensor->type != circle::TensorType::TensorType_BOOL)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  if (tensor->type() != circle::TensorType::TensorType_BOOL)
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
index 1b87e6f9c..6354e7dc1 100644
--- a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
@@ -25,16 +25,17 @@ namespace luci
 
 bool CircleLogicalOrGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
   // Only BOOL type is allowed for inputs
   const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
   for (auto input : inputs)
   {
-    const auto &tensor = tensors.at(input);
-    if (tensor->type != circle::TensorType::TensorType_BOOL)
+    const auto tensor = tensors.at(input);
+    assert(tensor != nullptr);
+    if (tensor->type() != circle::TensorType::TensorType_BOOL)
       return false;
   }
 
diff --git a/compiler/luci/import/src/Nodes/CircleLogistic.cpp b/compiler/luci/import/src/Nodes/CircleLogistic.cpp
index 9606e19cd..b0d08e039 100644
--- a/compiler/luci/import/src/Nodes/CircleLogistic.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogistic.cpp
@@ -25,15 +25,14 @@ namespace luci
 
 bool CircleLogisticGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-  const auto &outputs = args.op.outputs;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
index a4a21a8b7..384b98586 100644
--- a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
@@ -25,19 +25,16 @@ namespace luci
 
 bool CircleMatrixDiagGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
 
-  if (tensors[outputs[0]]->type != tensor->type)
+  assert(tensors[outputs[0]] != nullptr && tensor != nullptr);
+  if (tensors[outputs[0]]->type() != tensor->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
index cf0313149..64870c057 100644
--- a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
@@ -25,19 +25,16 @@ namespace luci
 
 bool CircleMatrixSetDiagGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
 
-  if (tensors[outputs[0]]->type != tensor->type)
+  assert(tensors[outputs[0]] != nullptr && tensor != nullptr);
+  if (tensors[outputs[0]]->type() != tensor->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
index 4bca0f40b..5c03fff18 100644
--- a/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleMaxPool2DGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleMaxPool2DGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleMean.cpp b/compiler/luci/import/src/Nodes/CircleMean.cpp
index d8fa9a53d..7882f17fc 100644
--- a/compiler/luci/import/src/Nodes/CircleMean.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMean.cpp
@@ -23,10 +23,7 @@ namespace luci
 
 bool CircleMeanGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleMeanGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp
index e0ddd4c11..e40ce2249 100644
--- a/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp
@@ -25,12 +25,8 @@ namespace luci
 
 bool CircleMirrorPadGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
   // TODO check others
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleMirrorPadGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleMul.cpp b/compiler/luci/import/src/Nodes/CircleMul.cpp
index e3c4a7ee5..28421f8c4 100644
--- a/compiler/luci/import/src/Nodes/CircleMul.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMul.cpp
@@ -23,13 +23,7 @@ namespace luci
 
 bool CircleMulGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleMulGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleNeg.cpp b/compiler/luci/import/src/Nodes/CircleNeg.cpp
index a64a69560..9dd1458f4 100644
--- a/compiler/luci/import/src/Nodes/CircleNeg.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNeg.cpp
@@ -24,11 +24,8 @@ namespace luci
 {
 bool CircleNegGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
   // TODO Support type check
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleNegGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
index a4ad4a53d..e86f2ba81 100644
--- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -35,20 +35,26 @@ bool CircleNonMaxSuppressionV4GraphBuilder::validate(const ValidateArgs &args) c
   if (outputs.size() != 2)
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &boxes_tensor = tensors.at(inputs[0]);
-  if (boxes_tensor->shape.size() != 2)
+  const auto tensors = args.reader.tensors();
+  const auto boxes_tensor = tensors.at(inputs[0]);
+  assert(boxes_tensor != nullptr);
+  const auto boxes_tensor_shape = wrap(boxes_tensor->shape());
+  if (boxes_tensor_shape.size() != 2)
     return false;
-  if (boxes_tensor->shape.at(1) != 4)
+  if (boxes_tensor_shape.at(1) != 4)
     return false;
-  if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+  assert(tensors.at(inputs[1]) != nullptr);
+  if (boxes_tensor_shape.at(0) != wrap(tensors.at(inputs[1])->shape()).at(0))
     return false;
 
-  if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+  assert(tensors.at(inputs[2]) != nullptr);
+  if (tensors.at(inputs[2])->type() != circle::TensorType_INT32)
     return false;
-  if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[3]) != nullptr);
+  if (tensors.at(inputs[3])->type() != circle::TensorType_FLOAT32)
     return false;
-  if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[4]) != nullptr);
+  if (tensors.at(inputs[4])->type() != circle::TensorType_FLOAT32)
     return false;
 
   return true;
@@ -61,63 +67,27 @@ bool CircleNonMaxSuppressionV4GraphBuilder::validate(const ValidateArgs &args) c
  *         We will create multiple NonMasSuppressionV4Oout nodes to emulate this
  */
 
-void CircleNonMaxSuppressionV4GraphBuilder::build(const circle::OperatorT &op,
-                                                  GraphBuilderContext *context) const
+CircleNode *CircleNonMaxSuppressionV4GraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
-
-  auto graph = context->graph();
-
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleNonMaxSuppressionV4
-  auto node = graph->nodes()->create<CircleNonMaxSuppressionV4>();
-  node->boxes(input_nodes[0]);
-  node->scores(input_nodes[1]);
-  node->max_output_size(input_nodes[2]);
-  node->iou_threshold(input_nodes[3]);
-  node->score_threshold(input_nodes[4]);
-
-  assert(outputs.size() == 2);
-  {
-    // Let's use name of output 0 as NonMaxSuppressionV4 name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for NonMaxSuppressionV4 itself but to virtual outputs
-  }
-
-  // Create virtual outputs of NonMaxSuppressionV4
-  for (size_t n = 0; n < outputs.size(); ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV4Out>();
-    copy_tensor_attributes(output_tensor, nodeout);
-
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  auto node = bna.context->graph()->nodes()->create<CircleNonMaxSuppressionV4>();
+
+  node->boxes(bna.input_nodes[0]);
+  node->scores(bna.input_nodes[1]);
+  node->max_output_size(bna.input_nodes[2]);
+  node->iou_threshold(bna.input_nodes[3]);
+  node->score_threshold(bna.input_nodes[4]);
+
+  return node;
+}
+
+CircleNode *CircleNonMaxSuppressionV4GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleNonMaxSuppressionV4Out>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
index 241dbf5ff..a60eed4e4 100644
--- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -35,22 +35,29 @@ bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) c
   if (outputs.size() != 3)
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &boxes_tensor = tensors.at(inputs[0]);
-  if (boxes_tensor->shape.size() != 2)
+  const auto tensors = args.reader.tensors();
+  const auto boxes_tensor = tensors.at(inputs[0]);
+  assert(boxes_tensor != nullptr);
+  const auto boxes_tensor_shape = wrap(boxes_tensor->shape());
+  if (boxes_tensor_shape.size() != 2)
     return false;
-  if (boxes_tensor->shape.at(1) != 4)
+  if (boxes_tensor_shape.at(1) != 4)
     return false;
-  if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+  assert(tensors.at(inputs[1]) != nullptr);
+  if (boxes_tensor_shape.at(0) != wrap(tensors.at(inputs[1])->shape()).at(0))
     return false;
 
-  if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+  assert(tensors.at(inputs[2]) != nullptr);
+  if (tensors.at(inputs[2])->type() != circle::TensorType_INT32)
     return false;
-  if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[3]) != nullptr);
+  if (tensors.at(inputs[3])->type() != circle::TensorType_FLOAT32)
     return false;
-  if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[4]) != nullptr);
+  if (tensors.at(inputs[4])->type() != circle::TensorType_FLOAT32)
     return false;
-  if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[5]) != nullptr);
+  if (tensors.at(inputs[5])->type() != circle::TensorType_FLOAT32)
     return false;
 
   return true;
@@ -63,64 +70,28 @@ bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) c
  *         We will create multiple NonMasSuppressionV5Oout nodes to emulate this
  */
 
-void CircleNonMaxSuppressionV5GraphBuilder::build(const circle::OperatorT &op,
-                                                  GraphBuilderContext *context) const
+CircleNode *CircleNonMaxSuppressionV5GraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
-
-  auto graph = context->graph();
-
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleNonMaxSuppressionV5
-  auto node = graph->nodes()->create<CircleNonMaxSuppressionV5>();
-  node->boxes(input_nodes[0]);
-  node->scores(input_nodes[1]);
-  node->max_output_size(input_nodes[2]);
-  node->iou_threshold(input_nodes[3]);
-  node->score_threshold(input_nodes[4]);
-  node->soft_nms_sigma(input_nodes[5]);
-
-  assert(outputs.size() == 3);
-  {
-    // Let's use name of output 0 as NonMaxSuppressionV5 name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for NonMaxSuppressionV5 itself but to virtual outputs
-  }
-
-  // Create virtual outputs of NonMaxSuppressionV5
-  for (size_t n = 0; n < outputs.size(); ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV5Out>();
-    copy_tensor_attributes(output_tensor, nodeout);
-
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  auto node = bna.context->graph()->nodes()->create<CircleNonMaxSuppressionV5>();
+
+  node->boxes(bna.input_nodes[0]);
+  node->scores(bna.input_nodes[1]);
+  node->max_output_size(bna.input_nodes[2]);
+  node->iou_threshold(bna.input_nodes[3]);
+  node->score_threshold(bna.input_nodes[4]);
+  node->soft_nms_sigma(bna.input_nodes[5]);
+
+  return node;
+}
+
+CircleNode *CircleNonMaxSuppressionV5GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleNonMaxSuppressionV5Out>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
index 77e986de1..3f5c1e033 100644
--- a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
@@ -25,27 +25,21 @@ namespace luci
 
 bool CircleNotEqualGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-  {
+  if (!GraphBuilder::validate(args, 2))
     return false;
-  }
 
-  if (outputs.size() != 1)
-  {
-    return false;
-  }
-
-  const auto &tensors = args.reader.tensors();
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
 
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
   {
     return false;
   }
 
-  return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+  assert(tensors[outputs[0]] != nullptr);
+  return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
 }
 
 CircleNode *CircleNotEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleOneHot.cpp b/compiler/luci/import/src/Nodes/CircleOneHot.cpp
index 69294e1ed..6e5f8e16f 100644
--- a/compiler/luci/import/src/Nodes/CircleOneHot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleOneHot.cpp
@@ -26,32 +26,31 @@ namespace luci
 
 bool CircleOneHotGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  const auto *options = args.op.builtin_options.AsOneHotOptions();
-
   // Only 4 Input come refered from
-  if (inputs.size() != 4)
+  if (!GraphBuilder::validate(args, 4))
     return false;
 
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &indices = tensors.at(inputs.at(0));
-  const auto &depth = tensors.at(inputs.at(1));
-  const auto &on_value = tensors.at(inputs.at(2));
-  const auto &off_value = tensors.at(inputs.at(3));
+  const auto &inputs = args.op.inputs;
+  const auto *options = args.op.builtin_options.AsOneHotOptions();
+  const auto tensors = args.reader.tensors();
+  const auto indices = tensors.at(inputs.at(0));
+  const auto depth = tensors.at(inputs.at(1));
+  const auto on_value = tensors.at(inputs.at(2));
+  const auto off_value = tensors.at(inputs.at(3));
+  assert(indices != nullptr);
+  assert(depth != nullptr);
+  assert(on_value != nullptr);
+  assert(off_value != nullptr);
 
-  if (options->axis < -1 || options->axis > static_cast<int32_t>(indices->shape.size()))
+  if (options->axis < -1 || options->axis > static_cast<int32_t>(wrap(indices->shape()).size()))
     return false;
-  if (depth->shape.size() != 0)
+  if (wrap(depth->shape()).size() != 0)
     return false;
-  if (on_value->shape.size() != 0)
+  if (wrap(on_value->shape()).size() != 0)
     return false;
-  if (off_value->shape.size() != 0)
+  if (wrap(off_value->shape()).size() != 0)
     return false;
-  if (on_value->type != off_value->type)
+  if (on_value->type() != off_value->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CirclePRelu.cpp b/compiler/luci/import/src/Nodes/CirclePRelu.cpp
index c07920f7c..7c81f04bb 100644
--- a/compiler/luci/import/src/Nodes/CirclePRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePRelu.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CirclePReluGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CirclePReluGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CirclePad.cpp b/compiler/luci/import/src/Nodes/CirclePad.cpp
index 999173b90..67dce6dee 100644
--- a/compiler/luci/import/src/Nodes/CirclePad.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePad.cpp
@@ -25,12 +25,8 @@ namespace luci
 
 bool CirclePadGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
   // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CirclePadGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CirclePadV2.cpp b/compiler/luci/import/src/Nodes/CirclePadV2.cpp
index 493876e68..84a45722a 100644
--- a/compiler/luci/import/src/Nodes/CirclePadV2.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePadV2.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CirclePadV2GraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 3)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 3);
 }
 
 CircleNode *CirclePadV2GraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CirclePow.cpp b/compiler/luci/import/src/Nodes/CirclePow.cpp
index def012614..1d2d41607 100644
--- a/compiler/luci/import/src/Nodes/CirclePow.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePow.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CirclePowGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CirclePowGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleQuantize.cpp b/compiler/luci/import/src/Nodes/CircleQuantize.cpp
new file mode 100644
index 000000000..9247a76d9
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleQuantize.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleQuantize.h"
+
+#include <luci/IR/Nodes/CircleQuantize.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleQuantizeGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleQuantizeGraphBuilder::build_node(const circle::OperatorT &,
+                                                   const std::vector<CircleNode *> &inputs,
+                                                   loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleQuantize>();
+  node->input(inputs.at(0));
+
+  // No options for Quantize
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleRange.cpp b/compiler/luci/import/src/Nodes/CircleRange.cpp
index 38dc44ed6..d3b5afc95 100644
--- a/compiler/luci/import/src/Nodes/CircleRange.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRange.cpp
@@ -24,11 +24,8 @@ namespace luci
 {
 bool CircleRangeGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 3)
-    return false;
-
   // TODO Support type check
-  return true;
+  return GraphBuilder::validate(args, 3);
 }
 
 CircleNode *CircleRangeGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleRank.cpp b/compiler/luci/import/src/Nodes/CircleRank.cpp
index 12658b192..afebb9509 100644
--- a/compiler/luci/import/src/Nodes/CircleRank.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRank.cpp
@@ -24,13 +24,7 @@ namespace luci
 {
 bool CircleRankGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleRankGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
index 21a821951..ebe2368e0 100644
--- a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
@@ -23,24 +23,25 @@ namespace luci
 
 bool CircleReduceAnyGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_0 = tensors.at(inputs.at(0));
-  const auto &tensor_1 = tensors.at(inputs.at(1));
-  const auto &tensor_o = tensors.at(outputs[0]);
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor_0 = tensors.at(inputs.at(0));
+  const auto tensor_1 = tensors.at(inputs.at(1));
+  const auto tensor_o = tensors.at(outputs[0]);
+  assert(tensor_0 != nullptr);
+  assert(tensor_1 != nullptr);
+  assert(tensor_o != nullptr);
 
-  if (tensor_0->type != circle::TensorType_BOOL)
+  if (tensor_0->type() != circle::TensorType_BOOL)
     return false;
-  if (tensor_o->type != circle::TensorType_BOOL)
+  if (tensor_o->type() != circle::TensorType_BOOL)
     return false;
 
-  switch (tensor_1->type)
+  switch (tensor_1->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
diff --git a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
index 5f054586e..3b874b7c9 100644
--- a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
@@ -23,19 +23,18 @@ namespace luci
 
 bool CircleReduceProdGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 2)
-    return false;
-  if (args.op.outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs.at(1));
+  const auto &inputs = args.op.inputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor_1 = tensors.at(inputs.at(1));
+  assert(tensor_1 != nullptr);
 
   // TODO check input types
 
   // Check for reduction_indices types
-  switch (tensor_1->type)
+  switch (tensor_1->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
diff --git a/compiler/luci/import/src/Nodes/CircleRelu.cpp b/compiler/luci/import/src/Nodes/CircleRelu.cpp
index 8e1c32a3a..73b8ffee8 100644
--- a/compiler/luci/import/src/Nodes/CircleRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRelu.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CircleReluGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleReluGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleRelu6.cpp b/compiler/luci/import/src/Nodes/CircleRelu6.cpp
index 0283d7350..ab957eda8 100644
--- a/compiler/luci/import/src/Nodes/CircleRelu6.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRelu6.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CircleRelu6GraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleRelu6GraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp
index 7f517bc0d..4987f3be2 100644
--- a/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp
@@ -25,15 +25,8 @@ namespace luci
 
 bool CircleReluN1To1GraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
   // TODO check dtypes
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleReluN1To1GraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleReshape.cpp b/compiler/luci/import/src/Nodes/CircleReshape.cpp
index 996ae9d20..12da54ef7 100644
--- a/compiler/luci/import/src/Nodes/CircleReshape.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReshape.cpp
@@ -30,6 +30,19 @@ bool CircleReshapeGraphBuilder::validate(const ValidateArgs &args) const
   if (args.op.outputs.size() != 1)
     return false;
 
+  // for two inputs, check if type is S32 or S64
+  if (args.op.inputs.size() == 2)
+  {
+    const auto &inputs = args.op.inputs;
+    const auto tensors = args.reader.tensors();
+    const auto tensor_in = tensors.at(inputs.at(1));
+    assert(tensor_in != nullptr);
+
+    if (tensor_in->type() != circle::TensorType::TensorType_INT32 &&
+        tensor_in->type() != circle::TensorType::TensorType_INT64)
+      return false;
+  }
+
   return true;
 }
 
@@ -53,6 +66,7 @@ static CircleNode *create_shape_node(const std::vector<int32_t> &shape, loco::Gr
   {
     shape_node->at<loco::DataType::S32>(i) = shape[i];
   }
+  shape_node->name("Reshape/shape");
   return shape_node;
 }
 
@@ -73,6 +87,7 @@ CircleNode *CircleReshapeGraphBuilder::build_node(const circle::OperatorT &op,
       shape_node = graph->nodes()->create<CircleOutputDummy>();
       shape_node->dtype(loco::DataType::S32);
       shape_node->rank(0);
+      shape_node->name("Reshape/dummy");
     }
   }
 
diff --git a/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp
index 0fccb7b44..c751b245c 100644
--- a/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp
+++ b/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp
@@ -16,7 +16,6 @@
 
 #include "luci/Import/Nodes/CircleResizeBilinear.h"
 
-#include <luci/IR/Nodes/CircleConst.h>
 #include <luci/IR/Nodes/CircleResizeBilinear.h>
 
 namespace luci
@@ -24,13 +23,7 @@ namespace luci
 
 bool CircleResizeBilinearGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleResizeBilinearGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp
index 324323f59..df7517fe9 100644
--- a/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp
+++ b/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp
@@ -16,7 +16,6 @@
 
 #include "luci/Import/Nodes/CircleResizeNearestNeighbor.h"
 
-#include <luci/IR/Nodes/CircleConst.h>
 #include <luci/IR/Nodes/CircleResizeNearestNeighbor.h>
 
 namespace luci
@@ -24,17 +23,11 @@ namespace luci
 
 bool CircleResizeNearestNeighborGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleResizeNearestNeighborGraphBuilder::build_node(
-    const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+  const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
 {
   auto *node = graph->nodes()->create<CircleResizeNearestNeighbor>();
   node->input(inputs.at(0));
diff --git a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
index ad11d4c63..c9cc792bb 100644
--- a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
@@ -25,20 +25,20 @@ namespace luci
 
 bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs.at(0));
-  const auto &tensor_lengths = tensors.at(inputs.at(1));
-  const auto &tensor_out = tensors.at(outputs[0]);
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in = tensors.at(inputs.at(0));
+  const auto tensor_lengths = tensors.at(inputs.at(1));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_in != nullptr);
+  assert(tensor_lengths != nullptr);
+  assert(tensor_out != nullptr);
 
-  switch (tensor_lengths->type)
+  switch (tensor_lengths->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
@@ -47,7 +47,7 @@ bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
       return false;
   }
 
-  if (tensor_in->type != tensor_out->type)
+  if (tensor_in->type() != tensor_out->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
index e2e53bb4b..c19a0fdd2 100644
--- a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
@@ -25,20 +25,20 @@ namespace luci
 
 bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs.at(0));
-  const auto &tensor_axis = tensors.at(inputs.at(1));
-  const auto &tensor_out = tensors.at(outputs[0]);
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in = tensors.at(inputs.at(0));
+  const auto tensor_axis = tensors.at(inputs.at(1));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_in != nullptr);
+  assert(tensor_axis != nullptr);
+  assert(tensor_out != nullptr);
 
-  switch (tensor_axis->type)
+  switch (tensor_axis->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
@@ -47,7 +47,7 @@ bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
       return false;
   }
 
-  if (tensor_out->type != tensor_in->type)
+  if (tensor_out->type() != tensor_in->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleRound.cpp b/compiler/luci/import/src/Nodes/CircleRound.cpp
index ad77f9f03..08cfae6c2 100644
--- a/compiler/luci/import/src/Nodes/CircleRound.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRound.cpp
@@ -25,22 +25,21 @@ namespace luci
 
 bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
   // Must be one of the following types
   // bfloat16, half (float16), float32, float64, complex64, complex128
   // Currently, circle supports float16, float32, complex64
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs.at(0));
-  const auto &tensor_out = tensors.at(outputs[0]);
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in = tensors.at(inputs.at(0));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_in != nullptr);
+  assert(tensor_out != nullptr);
 
-  switch (tensor_in->type)
+  switch (tensor_in->type())
   {
     case circle::TensorType_FLOAT16:
     case circle::TensorType_FLOAT32:
@@ -52,7 +51,7 @@ bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
       return false;
   }
 
-  if (tensor_out->type != tensor_in->type)
+  if (tensor_out->type() != tensor_in->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
index ae05fbbf9..e3bc68f8b 100644
--- a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
@@ -25,17 +25,20 @@ namespace luci
 
 bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
+  const auto &inputs = args.op.inputs;
   // Must be one of the following types
   // bfloat16, half (float16), float32, float64, complex64, complex128
   // Currently, circle supports float16, float32, complex64
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
   {
+    case circle::TensorType_UINT8:
+    case circle::TensorType_INT16:
     case circle::TensorType_FLOAT16:
     case circle::TensorType_FLOAT32:
     case circle::TensorType_COMPLEX64:
diff --git a/compiler/luci/import/src/Nodes/CircleSVDF.cpp b/compiler/luci/import/src/Nodes/CircleSVDF.cpp
new file mode 100644
index 000000000..ef57a132a
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleSVDF.h"
+
+#include <luci/IR/Nodes/CircleSVDF.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleSVDFBuilder::validate(const ValidateArgs &args) const
+{
+  const auto &inputs = args.op.inputs;
+  if (!(inputs.size() == 4 || inputs.size() == 5))
+    return false;
+
+  return true;
+}
+
+CircleNode *CircleSVDFBuilder::build_node(const circle::OperatorT &op,
+                                          const std::vector<CircleNode *> &inputs,
+                                          loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleSVDF>();
+  node->input(inputs.at(0));
+  node->weight_feature(inputs.at(1));
+  node->weight_time(inputs.at(2));
+  if (inputs.size() == 4)
+  {
+    auto *bias = graph->nodes()->create<CircleOutputExclude>();
+    node->bias(bias);
+
+    node->input_activation_state(inputs.at(3));
+  }
+  else
+  {
+    node->bias(inputs.at(3));
+    node->input_activation_state(inputs.at(4));
+  }
+
+  const auto *options = op.builtin_options.AsSVDFOptions();
+  node->svdf_rank(options->rank);
+  node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+  node->asymmetric_quantize_inputs(options->asymmetric_quantize_inputs);
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
index 7f86aeb74..ebe252527 100644
--- a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
@@ -25,19 +25,20 @@ namespace luci
 
 bool CircleScatterNdGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 3)
+  if (!GraphBuilder::validate(args, 3))
     return false;
 
+  const auto &inputs = args.op.inputs;
   // indices must have the same type as shape
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
 
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(2)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(2)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(2)]->type())
     return false;
 
   // indices must be either int32 or int64
-  if (tensors[inputs.at(0)]->type != circle::TensorType_INT32 &&
-      tensors[inputs.at(0)]->type != circle::TensorType_INT64)
+  if (tensors[inputs.at(0)]->type() != circle::TensorType_INT32 &&
+      tensors[inputs.at(0)]->type() != circle::TensorType_INT64)
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
index fb84e5d52..01d1aab44 100644
--- a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
@@ -25,19 +25,20 @@ namespace luci
 
 bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 2)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs.at(0));
-  const auto &tensor_out = tensors.at(outputs[0]);
-  const auto &tensor_ids = tensors.at(inputs.at(1));
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in = tensors.at(inputs.at(0));
+  const auto tensor_out = tensors.at(outputs[0]);
+  const auto tensor_ids = tensors.at(inputs.at(1));
+  assert(tensor_in != nullptr);
+  assert(tensor_out != nullptr);
+  assert(tensor_ids != nullptr);
 
-  switch (tensor_ids->type)
+  switch (tensor_ids->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
@@ -46,7 +47,7 @@ bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
       return false;
   }
 
-  if (tensor_out->type != tensor_in->type)
+  if (tensor_out->type() != tensor_in->type())
   {
     return false;
   }
diff --git a/compiler/luci/import/src/Nodes/CircleSelect.cpp b/compiler/luci/import/src/Nodes/CircleSelect.cpp
index 1e649f1e0..002f62f6c 100644
--- a/compiler/luci/import/src/Nodes/CircleSelect.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelect.cpp
@@ -25,16 +25,14 @@ namespace luci
 
 bool CircleSelectGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 3)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 3))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  if (tensor->type != circle::TensorType_BOOL)
+  const auto &inputs = args.op.inputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  if (tensor->type() != circle::TensorType_BOOL)
     return false;
   // TODO check dtypes for input 1, 2
 
diff --git a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
index e6dd04de0..062fdc143 100644
--- a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
@@ -25,21 +25,20 @@ namespace luci
 
 bool CircleSelectV2GraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 3)
-    return false;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 3))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &condition = tensors.at(inputs.at(0));
-  if (condition->type != circle::TensorType_BOOL)
+  const auto &inputs = args.op.inputs;
+  const auto tensors = args.reader.tensors();
+  const auto condition = tensors.at(inputs.at(0));
+  assert(condition != nullptr);
+  if (condition->type() != circle::TensorType_BOOL)
     return false;
 
-  const auto &t = tensors.at(inputs.at(1));
-  const auto &e = tensors.at(inputs.at(2));
-  if (t->type != e->type)
+  const auto t = tensors.at(inputs.at(1));
+  const auto e = tensors.at(inputs.at(2));
+  assert(t != nullptr && e != nullptr);
+  if (t->type() != e->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleShape.cpp b/compiler/luci/import/src/Nodes/CircleShape.cpp
index bd7dfc9d9..86c0bf59b 100644
--- a/compiler/luci/import/src/Nodes/CircleShape.cpp
+++ b/compiler/luci/import/src/Nodes/CircleShape.cpp
@@ -25,16 +25,8 @@ namespace luci
 
 bool CircleShapeGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-  if (inputs.size() != 1)
-    return false;
-  if (outputs.size() != 1)
-    return false;
-
   // TODO check shape, dtype
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleShapeGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSin.cpp b/compiler/luci/import/src/Nodes/CircleSin.cpp
index 4b245ef6b..51ebf0355 100644
--- a/compiler/luci/import/src/Nodes/CircleSin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSin.cpp
@@ -25,16 +25,15 @@ namespace luci
 
 bool CircleSinGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-  if (args.op.outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
+  const auto &inputs = args.op.inputs;
   // input type check
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
   {
     case circle::TensorType_FLOAT16:
     case circle::TensorType_FLOAT32:
diff --git a/compiler/luci/import/src/Nodes/CircleSlice.cpp b/compiler/luci/import/src/Nodes/CircleSlice.cpp
index 8601fbf21..4166040b3 100644
--- a/compiler/luci/import/src/Nodes/CircleSlice.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSlice.cpp
@@ -27,14 +27,8 @@ namespace luci
 
 bool CircleSliceGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 3)
-    return false;
-  if (args.op.outputs.size() != 1)
-    return false;
-
   // TODO check shapes and types
-
-  return true;
+  return GraphBuilder::validate(args, 3);
 }
 
 CircleNode *CircleSliceGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleSoftmax.cpp b/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
index 0ef0b5418..e79914455 100644
--- a/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
@@ -25,12 +25,8 @@ namespace luci
 
 bool CircleSoftmaxGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
   // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleSoftmaxGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp
index 8ccd55dc6..2152b65c9 100644
--- a/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp
@@ -27,13 +27,8 @@ namespace luci
 
 bool CircleSpaceToDepthGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-
   // TODO do attribute checks
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleSpaceToDepthGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
index ac756b1f3..ce0688bb9 100644
--- a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleSparseToDenseGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 4)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 4);
 }
 
 CircleNode *CircleSparseToDenseGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSplit.cpp b/compiler/luci/import/src/Nodes/CircleSplit.cpp
index 07b6cc939..d0a24aae3 100644
--- a/compiler/luci/import/src/Nodes/CircleSplit.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSplit.cpp
@@ -58,62 +58,27 @@ bool CircleSplitGraphBuilder::validate(const ValidateArgs &args) const
  *                          \- CircleSplitOut --- FullyConnected ---
  */
 
-void CircleSplitGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleSplitGraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
+  auto node = bna.context->graph()->nodes()->create<CircleSplit>();
 
-  auto graph = context->graph();
+  node->split_dim(bna.input_nodes[0]);
+  node->input(bna.input_nodes[1]);
 
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto *options = bna.op.builtin_options.AsSplitOptions();
+  node->num_split(options->num_splits);
 
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
+  return node;
+}
 
-  // Create CircleSplit
-  auto node = graph->nodes()->create<CircleSplit>();
-  node->split_dim(input_nodes[0]);
-  node->input(input_nodes[1]);
+CircleNode *CircleSplitGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleSplitOut>();
 
-  const auto *options = op.builtin_options.AsSplitOptions();
-  node->num_split(options->num_splits);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
 
-  assert(outputs.size() > 0);
-  assert(int32_t(outputs.size()) == options->num_splits);
-  {
-    // Let's use name of output 0 as Split name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for Split itself but to virtual outputs
-  }
-
-  // Create virtual outputs of Split
-  for (int32_t n = 0; n < options->num_splits; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleSplitOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleSplitV.cpp b/compiler/luci/import/src/Nodes/CircleSplitV.cpp
index 7c6e83e17..76cbf7046 100644
--- a/compiler/luci/import/src/Nodes/CircleSplitV.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSplitV.cpp
@@ -58,64 +58,30 @@ bool CircleSplitVGraphBuilder::validate(const ValidateArgs &args) const
  *                           \- CircleSplitVOut --- FullyConnected ---
  */
 
-void CircleSplitVGraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleSplitVGraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
-
-  auto graph = context->graph();
-
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleSplitV
-  auto node = graph->nodes()->create<CircleSplitV>();
-  node->input(input_nodes[0]);
-  node->size_splits(input_nodes[1]);
-  node->split_dim(input_nodes[2]);
-
-  const auto *options = op.builtin_options.AsSplitVOptions();
+  auto node = bna.context->graph()->nodes()->create<CircleSplitV>();
+
+  node->input(bna.input_nodes[0]);
+  node->size_splits(bna.input_nodes[1]);
+  node->split_dim(bna.input_nodes[2]);
+
+  const auto *options = bna.op.builtin_options.AsSplitVOptions();
   node->num_split(options->num_splits);
 
-  assert(outputs.size() > 0);
-  assert(int32_t(outputs.size()) == options->num_splits);
-  {
-    // Let's use name of output 0 as Split name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for Split itself but to virtual outputs
-  }
-
-  // Create virtual outputs of Split
-  for (int32_t n = 0; n < options->num_splits; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleSplitVOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  assert(int32_t(bna.op.outputs.size()) == options->num_splits);
+
+  return node;
+}
+
+CircleNode *CircleSplitVGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleSplitVOut>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleSqrt.cpp b/compiler/luci/import/src/Nodes/CircleSqrt.cpp
index c8beaee0d..b1fdf7996 100644
--- a/compiler/luci/import/src/Nodes/CircleSqrt.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSqrt.cpp
@@ -25,10 +25,7 @@ namespace luci
 
 bool CircleSqrtGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleSqrtGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleSquare.cpp b/compiler/luci/import/src/Nodes/CircleSquare.cpp
index b5ba048d7..bec84b4c0 100644
--- a/compiler/luci/import/src/Nodes/CircleSquare.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquare.cpp
@@ -25,17 +25,17 @@ namespace luci
 
 bool CircleSquareGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
-  // Must be one of the following types
-  // bfloat16, half (float16), float32, float64, complex64, complex128
-  // Currently, circle supports float16, float32, complex64
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto &inputs = args.op.inputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
   {
+    case circle::TensorType_UINT8:
+    case circle::TensorType_INT16:
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
     case circle::TensorType_FLOAT16:
diff --git a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
index 6deae94c5..1983465d3 100644
--- a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
@@ -25,20 +25,17 @@ namespace luci
 
 bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
   // Inputs must be one of the following types
   // bfloat16, half(float16), float32, float64, int32, int64, complex64, complex128
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
   {
     case circle::TensorType_FLOAT16:
     case circle::TensorType_FLOAT32:
@@ -48,16 +45,22 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con
     case circle::TensorType_COMPLEX64:
       break;
     // TODO support bfloat16, complex128
+    // Additional support for quantized tensors
+    case circle::TensorType_UINT8:
+    case circle::TensorType_INT16:
+      break;
     default:
       return false;
   }
 
   // Input types must match
-  if (tensors.at(inputs.at(0))->type != tensors.at(inputs.at(1))->type)
+  assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(inputs.at(1)) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(inputs.at(1))->type())
     return false;
 
   // Input and output types must match
-  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+  assert(tensors.at(outputs[0]) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleSqueeze.cpp b/compiler/luci/import/src/Nodes/CircleSqueeze.cpp
index 32792c266..d24d8166c 100644
--- a/compiler/luci/import/src/Nodes/CircleSqueeze.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSqueeze.cpp
@@ -16,7 +16,6 @@
 
 #include "luci/Import/Nodes/CircleSqueeze.h"
 
-#include <luci/IR/Nodes/CircleConst.h>
 #include <luci/IR/Nodes/CircleSqueeze.h>
 
 namespace luci
@@ -24,13 +23,7 @@ namespace luci
 
 bool CircleSqueezeGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleSqueezeGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp
index 8f943a682..ca8259cac 100644
--- a/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp
+++ b/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp
@@ -27,14 +27,8 @@ namespace luci
 
 bool CircleStridedSliceGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 4)
-    return false;
-  if (args.op.outputs.size() != 1)
-    return false;
-
   // TODO check shapes and types
-
-  return true;
+  return GraphBuilder::validate(args, 4);
 }
 
 CircleNode *CircleStridedSliceGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSub.cpp b/compiler/luci/import/src/Nodes/CircleSub.cpp
index 9acf83d40..c3978f218 100644
--- a/compiler/luci/import/src/Nodes/CircleSub.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSub.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CircleSubGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleSubGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSum.cpp b/compiler/luci/import/src/Nodes/CircleSum.cpp
index bd3cb6239..e348a62d9 100644
--- a/compiler/luci/import/src/Nodes/CircleSum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSum.cpp
@@ -23,10 +23,7 @@ namespace luci
 
 bool CircleSumGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleSumGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleTanh.cpp b/compiler/luci/import/src/Nodes/CircleTanh.cpp
index 018f5701b..80a0e887f 100644
--- a/compiler/luci/import/src/Nodes/CircleTanh.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTanh.cpp
@@ -25,15 +25,14 @@ namespace luci
 
 bool CircleTanhGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 1)
-    return false;
-  const auto &outputs = args.op.outputs;
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTile.cpp b/compiler/luci/import/src/Nodes/CircleTile.cpp
index bc6f320ba..c41a6ba3f 100644
--- a/compiler/luci/import/src/Nodes/CircleTile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTile.cpp
@@ -25,20 +25,17 @@ namespace luci
 
 bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
 {
-  auto inputs = args.op.inputs;
-  auto outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
+  if (!GraphBuilder::validate(args, 2))
     return false;
 
+  auto inputs = args.op.inputs;
+  auto outputs = args.op.outputs;
   // Multiples (inputs.at(1)) must be one of the following types
   // int32, int64
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(1));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(1));
+  assert(tensor != nullptr);
+  switch (tensor->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
@@ -48,7 +45,8 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
   }
 
   // Type of input and output must be the same
-  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+  assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
index f0677de86..9f9173738 100644
--- a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
@@ -35,9 +35,10 @@ bool CircleTopKV2GraphBuilder::validate(const ValidateArgs &args) const
   if (outputs.size() != 2)
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(1));
-  if (tensor->type != circle::TensorType_INT32)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(1));
+  assert(tensor != nullptr);
+  if (tensor->type() != circle::TensorType_INT32)
     return false;
 
   return true;
@@ -59,59 +60,24 @@ bool CircleTopKV2GraphBuilder::validate(const ValidateArgs &args) const
  *                           \- CircleTopKV2Out --- FullyConnected ---
  */
 
-void CircleTopKV2GraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleTopKV2GraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
-
-  auto graph = context->graph();
-
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleTopKV2
-  auto node = graph->nodes()->create<CircleTopKV2>();
-  node->input(input_nodes[0]);
-  node->k(input_nodes[1]);
-
-  assert(outputs.size() == 2);
-  {
-    // Let's use name of output 0 as TopKV2 name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for TopKV2 itself but to virtual outputs
-  }
-
-  // Create virtual outputs of TopKV2
-  for (size_t n = 0; n < outputs.size(); ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
-
-    auto *nodeout = graph->nodes()->create<CircleTopKV2Out>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
-
-    nodeout->input(node);
-    nodeout->index(n);
-
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  auto node = bna.context->graph()->nodes()->create<CircleTopKV2>();
+
+  node->input(bna.input_nodes[0]);
+  node->k(bna.input_nodes[1]);
+
+  return node;
+}
+
+CircleNode *CircleTopKV2GraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleTopKV2Out>();
+
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
+
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleTranspose.cpp b/compiler/luci/import/src/Nodes/CircleTranspose.cpp
index cc3153085..01095239e 100644
--- a/compiler/luci/import/src/Nodes/CircleTranspose.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTranspose.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CircleTransposeGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 2)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 2);
 }
 
 CircleNode *CircleTransposeGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
index c280faaf5..62326f435 100644
--- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
@@ -31,11 +31,13 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
     return false;
 
   const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &filter_tensor = tensors.at(inputs.at(1));
-  const auto &filter_shape = filter_tensor.get()->shape;
-  const auto &ifm_tensor = tensors.at(inputs.at(2));
-  const auto &ifm_shape = ifm_tensor.get()->shape;
+  const auto tensors = args.reader.tensors();
+  const auto filter_tensor = tensors.at(inputs.at(1));
+  assert(filter_tensor != nullptr);
+  const auto filter_shape = wrap(filter_tensor->shape());
+  const auto ifm_tensor = tensors.at(inputs.at(2));
+  assert(ifm_tensor != nullptr);
+  const auto ifm_shape = wrap(ifm_tensor->shape());
 
   // ifm and filters must be 4-D tensor
   if (ifm_shape.size() != 4)
@@ -45,7 +47,7 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
 
   // input shape : [batch, height, width, in_channels]
   // filters shape : [output_channels, height, weight, in_channels]
-  if (ifm_tensor.get()->shape.at(3) != filter_tensor.get()->shape.at(3))
+  if (ifm_shape.at(3) != filter_shape.at(3))
     return false;
 
   return true;
@@ -61,21 +63,18 @@ CircleNode *CircleTransposeConvGraphBuilder::build_node(const circle::OperatorT
   node->filter(inputs.at(1));
   node->outBackprop(inputs.at(2));
   if (inputs.size() == 3)
-    node->bias(graph->nodes()->create<CircleOutputExclude>());
-  else
-    node->bias(inputs.at(3));
-
-  if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
   {
-    // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
-    // is inserted.
-    bias->dtype(loco::DataType::FLOAT32);
+    auto *bias = graph->nodes()->create<CircleOutputExclude>();
+    node->bias(bias);
   }
+  else
+    node->bias(inputs.at(3));
 
   const auto *options = op.builtin_options.AsTransposeConvOptions();
   node->padding(luci_padding(options->padding));
   node->stride()->w(options->stride_w);
   node->stride()->h(options->stride_h);
+  node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
 
   return node;
 }
diff --git a/compiler/luci/import/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/import/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..7ab6d6881
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleUnidirectionalSequenceLSTM.h"
+
+#include <luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleUnidirectionalSequenceLSTMGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 24);
+}
+
+CircleNode *CircleUnidirectionalSequenceLSTMGraphBuilder::build_node(
+  const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleUnidirectionalSequenceLSTM>();
+  node->input(inputs.at(0));
+  node->input_to_input_weights(inputs.at(1)); // Optional
+  node->input_to_forget_weights(inputs.at(2));
+  node->input_to_cell_weights(inputs.at(3));
+  node->input_to_output_weights(inputs.at(4));
+
+  node->recurrent_to_input_weights(inputs.at(5)); // Optional
+  node->recurrent_to_forget_weights(inputs.at(6));
+  node->recurrent_to_cell_weights(inputs.at(7));
+  node->recurrent_to_output_weights(inputs.at(8));
+
+  node->cell_to_input_weights(inputs.at(9));   // Optional
+  node->cell_to_forget_weights(inputs.at(10)); // Optional
+  node->cell_to_output_weights(inputs.at(11)); // Optional
+
+  node->input_gate_bias(inputs.at(12)); // Optional
+  node->forget_gate_bias(inputs.at(13));
+  node->cell_gate_bias(inputs.at(14));
+  node->output_gate_bias(inputs.at(15));
+
+  node->projection_weights(inputs.at(16)); // Optional
+  node->projection_bias(inputs.at(17));    // Optional
+
+  node->output_state(inputs.at(18));
+  node->cell_state(inputs.at(19));
+
+  node->input_layer_norm_coefficients(inputs.at(20));  // Optional
+  node->forget_layer_norm_coefficients(inputs.at(21)); // Optional
+  node->cell_layer_norm_coefficients(inputs.at(22));   // Optional
+  node->output_layer_norm_coefficients(inputs.at(23)); // Optional
+
+  const auto *options = op.builtin_options.AsUnidirectionalSequenceLSTMOptions();
+  node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+  node->cell_clip(options->cell_clip);
+  node->proj_clip(options->proj_clip);
+  node->time_major(options->time_major);
+  node->asymmetric_quantize_inputs(options->asymmetric_quantize_inputs);
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleUnique.cpp b/compiler/luci/import/src/Nodes/CircleUnique.cpp
index 5e79a2920..f6914c24a 100644
--- a/compiler/luci/import/src/Nodes/CircleUnique.cpp
+++ b/compiler/luci/import/src/Nodes/CircleUnique.cpp
@@ -35,55 +35,26 @@ bool CircleUniqueGraphBuilder::validate(const ValidateArgs &args) const
   return true;
 }
 
-void CircleUniqueGraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleUniqueGraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
+  auto node = bna.context->graph()->nodes()->create<CircleUnique>();
 
-  auto graph = context->graph();
+  node->input(bna.input_nodes[0]);
 
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto *options = bna.op.builtin_options.AsUniqueOptions();
+  node->idx_out_type(luci_datatype(options->idx_out_type));
 
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleUnique
-  auto node = graph->nodes()->create<CircleUnique>();
-  node->input(input_nodes[0]);
-
-  const auto *options = op.builtin_options.AsUniqueOptions();
-  node->output_type(luci_datatype(options->idx_out_type));
-
-  assert(int32_t(outputs.size()) == 2);
-  // Let's use name of output 0 as Unique name
-  const circle::TensorT &output_tensor = *tensors[outputs[0]];
-  node->name(tensor_name(output_tensor));
-
-  // Create virtual outputs of Unique
-  for (int32_t n = 0; n < 2; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+  return node;
+}
 
-    auto *nodeout = graph->nodes()->create<CircleUniqueOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleUniqueGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleUniqueOut>();
 
-    nodeout->input(node);
-    nodeout->index(n);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
 
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleUnpack.cpp b/compiler/luci/import/src/Nodes/CircleUnpack.cpp
index 9e7f3d3e1..6b3401609 100644
--- a/compiler/luci/import/src/Nodes/CircleUnpack.cpp
+++ b/compiler/luci/import/src/Nodes/CircleUnpack.cpp
@@ -46,8 +46,8 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
   {
     if (settings->get(luci::UserSettings::Key::DisableValidation))
     {
-      const auto &tensors = args.reader.tensors();
-      const circle::TensorT &output_tensor = *tensors[outputs[0]];
+      const auto tensors = args.reader.tensors();
+      const auto output_tensor = tensors[outputs[0]];
       auto name = tensor_name(output_tensor);
       WARN(l) << "Warning: import Unpack(" << name << ") 'num' is not same as outputs used";
     }
@@ -58,9 +58,10 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
   if (options->num < 0)
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  const auto &shape = tensor->shape;
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  const auto shape = wrap(tensor->shape());
   auto shape_size = static_cast<int32_t>(shape.size());
   if (shape_size > 0)
   {
@@ -88,64 +89,27 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
  *                           \- CircleUnpackOut --- FullyConnected ---
  */
 
-void CircleUnpackGraphBuilder::build(const circle::OperatorT &op,
-                                     GraphBuilderContext *context) const
+CircleNode *CircleUnpackGraphBuilder::build_node(const BuildNodeArgs &bna) const
 {
-  assert(context != nullptr);
+  auto node = bna.context->graph()->nodes()->create<CircleUnpack>();
 
-  auto graph = context->graph();
+  node->value(bna.input_nodes[0]);
 
-  const std::vector<int32_t> &inputs = op.inputs;
-  const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
-
-  // NOTE Unpack has only one input so running a loop is not necessary
-  //      This is provided as a reference for other Ops as a reference
-  std::vector<CircleNode *> input_nodes;
-  for (const int32_t input_tensor_index : inputs)
-  {
-    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
-  }
-
-  // Create CircleUnpack
-  CircleUnpack *node = graph->nodes()->create<CircleUnpack>();
-  node->value(input_nodes[0]);
-
-  const auto *options = op.builtin_options.AsUnpackOptions();
+  const auto *options = bna.op.builtin_options.AsUnpackOptions();
   node->num(options->num);
   node->axis(options->axis);
 
-  assert(outputs.size() > 0);
-  {
-    // Let's use name of output 0 as Unpack name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
-    node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
-
-    // NOTE We don't set quantization for Unpack itself but to virtual outputs
-  }
-
-  // Create virtual outputs of Unpack
-  for (int32_t n = 0; n < options->num; ++n)
-  {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+  return node;
+}
 
-    auto *nodeout = graph->nodes()->create<CircleUnpackOut>();
-    copy_tensor_attributes(output_tensor, nodeout);
-    // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
-      nodeout->shape_status(ShapeStatus::NOSHAPE);
-    else
-      nodeout->shape_status(ShapeStatus::VALID);
+CircleNode *CircleUnpackGraphBuilder::build_out(const BuildOutArgs &boa) const
+{
+  auto *nodeout = boa.node->graph()->nodes()->create<CircleUnpackOut>();
 
-    nodeout->input(node);
-    nodeout->index(n);
+  nodeout->input(boa.node);
+  nodeout->index(boa.index);
 
-    context->nodefinder()->enroll(outputs[n], nodeout);
-  }
+  return nodeout;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleVariable.cpp b/compiler/luci/import/src/Nodes/CircleVariable.cpp
new file mode 100644
index 000000000..23ae9e7be
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleVariable.h"
+
+#include <luci/IR/Nodes/CircleVariable.h>
+#include <luci/Log.h>
+
+#include <cassert>
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::ostream &operator<<(std::ostream &os, const luci::VectorWrapper<int32_t> &vect)
+{
+  uint32_t seq = 0;
+  for (const auto &v : vect)
+  {
+    if (seq)
+      os << ", ";
+    os << v;
+    seq++;
+  }
+  return os;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleVariable *create_circlevariable(GraphBuilderContext *context, int32_t tensor_index)
+{
+  LOGGER(l);
+
+  auto graph = context->graph();
+  auto reader = context->reader();
+  const auto tensors = reader->tensors();
+  const auto variable_tensor = tensors[tensor_index];
+  assert(variable_tensor != nullptr);
+
+  if (not variable_tensor->is_variable())
+  {
+    // not a variable
+    return nullptr;
+  }
+  {
+    // check if there is no buffer as we don't support this for now
+    // TODO use buffer when this is enabled in Kernel
+    assert(reader->buffers()[variable_tensor->buffer()] != nullptr);
+    assert(reader->buffers()[variable_tensor->buffer()]->data() == nullptr);
+  }
+
+  auto variable_node = graph->nodes()->create<CircleVariable>();
+  copy_tensor_attributes(variable_tensor, variable_node);
+  variable_node->shape_status(luci::ShapeStatus::VALID);
+
+  INFO(l) << "[luci] NodeFinder variable node(" << tensor_index << ") -> " << variable_node << " "
+          << wrap(variable_tensor->shape()) << std::endl;
+
+  return variable_node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleWhere.cpp b/compiler/luci/import/src/Nodes/CircleWhere.cpp
index f4c5f0c66..bc6199ace 100644
--- a/compiler/luci/import/src/Nodes/CircleWhere.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhere.cpp
@@ -25,23 +25,21 @@ namespace luci
 
 bool CircleWhereGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 1)
+  if (!GraphBuilder::validate(args, 1))
     return false;
 
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_condition = tensors.at(inputs.at(0));
-  const auto &tensor_out = tensors.at(outputs[0]);
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+  const auto tensors = args.reader.tensors();
+  const auto tensor_condition = tensors.at(inputs.at(0));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_condition != nullptr);
+  assert(tensor_out != nullptr);
 
-  if (tensor_condition->type != circle::TensorType_BOOL)
+  if (tensor_condition->type() != circle::TensorType_BOOL)
     return false;
 
-  if (tensor_out->type != circle::TensorType_INT64)
+  if (tensor_out->type() != circle::TensorType_INT64)
     return false;
 
   return true;
diff --git a/compiler/luci/import/src/Nodes/CircleWhile.cpp b/compiler/luci/import/src/Nodes/CircleWhile.cpp
index aead25071..27a392b2a 100644
--- a/compiler/luci/import/src/Nodes/CircleWhile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhile.cpp
@@ -58,7 +58,8 @@ bool CircleWhileGraphBuilder::validate(const ValidateArgs &args) const
  *                       \- CircleWhileOut --- Node ---
  */
 
-void CircleWhileGraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext *context) const
+CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
+                                           GraphBuilderContext *context) const
 {
   assert(context != nullptr);
 
@@ -66,8 +67,8 @@ void CircleWhileGraphBuilder::build(const circle::OperatorT &op, GraphBuilderCon
 
   const std::vector<int32_t> &inputs = op.inputs;
   const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
+  const auto tensors = context->reader()->tensors();
+  const auto opcodes = context->reader()->opcodes();
 
   std::vector<CircleNode *> input_nodes;
   for (const int32_t input_tensor_index : inputs)
@@ -95,9 +96,11 @@ void CircleWhileGraphBuilder::build(const circle::OperatorT &op, GraphBuilderCon
   assert(outputs.size() > 0);
   {
     // Lets use name of output 0 as While name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    const auto output_tensor = tensors[outputs[0]];
+    assert(output_tensor != nullptr);
     node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
+    assert(opcodes[op.opcode_index] != nullptr);
+    node->op_version(opcodes[op.opcode_index]->version());
 
     // NOTE We don't set quantization for While itself but to virtual outputs
   }
@@ -105,7 +108,8 @@ void CircleWhileGraphBuilder::build(const circle::OperatorT &op, GraphBuilderCon
   // Create virtual outputs of While
   for (uint32_t n = 0; n < output_count; ++n)
   {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+    const auto output_tensor = tensors[outputs[n]];
+    assert(output_tensor != nullptr);
 
     auto *nodeout = graph->nodes()->create<CircleWhileOut>();
 
@@ -118,6 +122,8 @@ void CircleWhileGraphBuilder::build(const circle::OperatorT &op, GraphBuilderCon
 
     context->nodefinder()->enroll(outputs[n], nodeout);
   }
+
+  return node;
 }
 
 } // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleZerosLike.cpp b/compiler/luci/import/src/Nodes/CircleZerosLike.cpp
index e60424def..ddb05e8a4 100644
--- a/compiler/luci/import/src/Nodes/CircleZerosLike.cpp
+++ b/compiler/luci/import/src/Nodes/CircleZerosLike.cpp
@@ -25,13 +25,7 @@ namespace luci
 
 bool CircleZerosLikeGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 1)
-    return false;
-
-  if (args.op.outputs.size() != 1)
-    return false;
-
-  return true;
+  return GraphBuilder::validate(args, 1);
 }
 
 CircleNode *CircleZerosLikeGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/PostImport.cpp b/compiler/luci/import/src/PostImport.cpp
index f436b48e8..63b16bb95 100644
--- a/compiler/luci/import/src/PostImport.cpp
+++ b/compiler/luci/import/src/PostImport.cpp
@@ -130,7 +130,10 @@ private:
 namespace
 {
 /**
- * @brief  ValidateNodeProp will validate inter graph connections for each Nodes
+ * @brief  ValidateNodeProp will validate inter graph connections for each Nodes.
+ * @note   In here, only loco::GraphInput and loco::GraphOutput are validated,
+ *         since this class is for checking inter graph connections.
+ *         CircleNodes such as CircleInput and CircleOutput will be validated at later steps.
  */
 class ValidateNodeProp final : public luci::CircleNodeMutableVisitor<void>
 {
@@ -172,9 +175,19 @@ public:
 
       auto then_graph_output = then_graph_outputs->at(then_out->index());
       auto else_graph_output = else_graph_outputs->at(else_out->index());
-      if (!(*then_graph_output->shape() == *else_graph_output->shape()))
+      if (then_graph_output->shape()->rank() != else_graph_output->shape()->rank())
       {
-        INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output shape mismatch ", idx);
+        INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output rank mismatch ", idx);
+      }
+      for (uint32_t i = 0; i < then_graph_output->shape()->rank(); ++i)
+      {
+        if (then_graph_output->shape()->dim(i).known() &&
+            else_graph_output->shape()->dim(i).known() &&
+            then_graph_output->shape()->dim(i).value() !=
+              else_graph_output->shape()->dim(i).value())
+        {
+          INTERNAL_EXN_V("CircleIf THEN and ELSE Graph Output dimension mismatch ", idx);
+        }
       }
       if (then_graph_output->dtype() != else_graph_output->dtype())
       {
@@ -231,18 +244,20 @@ public:
 
       auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
       auto body_graph_input = body_graph_inputs->at(body_in->index());
-      if ((cond_in->rank() != body_in->rank()))
+      if (cond_graph_input->shape()->rank() != body_graph_input->shape()->rank())
       {
-        INTERNAL_EXN_V("CircleWhile COND input and BODY input shape mismatch ", idx);
+        INTERNAL_EXN_V("CircleWhile COND input and BODY input rank mismatch ", idx);
       }
-      if (cond_in->rank() > 0 && body_in->rank() > 0)
+      for (uint32_t i = 0; i < cond_graph_input->shape()->rank(); ++i)
       {
-        if (!(*cond_graph_input->shape() == *body_graph_input->shape()))
+        if (cond_graph_input->shape()->dim(i).known() &&
+            body_graph_input->shape()->dim(i).known() &&
+            cond_graph_input->shape()->dim(i).value() != body_graph_input->shape()->dim(i).value())
         {
-          INTERNAL_EXN_V("CircleWhile COND input and BODY input shape mismatch ", idx);
+          INTERNAL_EXN_V("CircleWhile COND input and BODY input dimension mismatch ", idx);
         }
       }
-      if (cond_in->dtype() != body_in->dtype())
+      if (cond_graph_input->dtype() != body_graph_input->dtype())
       {
         INTERNAL_EXN_V("CircleWhile COND input and BODY input type mismatch ", idx);
       }
@@ -257,18 +272,20 @@ public:
 
       auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
       auto body_graph_output = body_graph_outputs->at(body_out->index());
-      if ((cond_in->rank() != body_out->rank()))
+      if (cond_graph_input->shape()->rank() != body_graph_output->shape()->rank())
       {
-        INTERNAL_EXN_V("CircleWhile COND input and BODY output shape mismatch ", idx);
+        INTERNAL_EXN_V("CircleWhile COND input and BODY output rank mismatch ", idx);
       }
-      if (cond_in->rank() > 0 && body_out->rank() > 0)
+      for (uint32_t i = 0; i < cond_graph_input->shape()->rank(); ++i)
       {
-        if (!(*cond_graph_input->shape() == *body_graph_output->shape()))
+        if (cond_graph_input->shape()->dim(i).known() &&
+            body_graph_output->shape()->dim(i).known() &&
+            cond_graph_input->shape()->dim(i).value() != body_graph_output->shape()->dim(i).value())
         {
-          INTERNAL_EXN_V("CircleWhile COND input and BODY output shape mismatch ", idx);
+          INTERNAL_EXN_V("CircleWhile COND input and BODY output dimension mismatch ", idx);
         }
       }
-      if (cond_in->dtype() != body_out->dtype())
+      if (cond_graph_input->dtype() != body_graph_output->dtype())
       {
         INTERNAL_EXN_V("CircleWhile COND input and BODY output type mismatch ", idx);
       }
diff --git a/compiler/luci/import/src/ValidateHelpers.cpp b/compiler/luci/import/src/ValidateHelpers.cpp
index 12a6548d6..fc027704b 100644
--- a/compiler/luci/import/src/ValidateHelpers.cpp
+++ b/compiler/luci/import/src/ValidateHelpers.cpp
@@ -26,9 +26,10 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
     return false;
 
   // input 1 and 2 should have INT32/INT64 type
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs.at(1));
-  switch (tensor_1->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor_1 = tensors.at(inputs.at(1));
+  assert(tensor_1 != nullptr);
+  switch (tensor_1->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
@@ -36,8 +37,9 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
     default:
       return false;
   }
-  const auto &tensor_2 = tensors.at(inputs.at(2));
-  switch (tensor_2->type)
+  const auto tensor_2 = tensors.at(inputs.at(2));
+  assert(tensor_2 != nullptr);
+  switch (tensor_2->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
@@ -47,8 +49,9 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
   }
 
   // Only support input shape dimension 3 and 4 only
-  const auto &tensor_0 = tensors.at(inputs.at(0));
-  const auto t_0_s = tensor_0->shape.size();
+  const auto tensor_0 = tensors.at(inputs.at(0));
+  assert(tensor_0 != nullptr);
+  const auto t_0_s = wrap(tensor_0->shape()).size();
   if (t_0_s != 3 && t_0_s != 4)
     return false;
 
@@ -68,25 +71,28 @@ bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
   if (outputs.size() != 1)
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
   {
     case circle::TensorType_FLOAT16:
     case circle::TensorType_FLOAT32:
     case circle::TensorType_FLOAT64:
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
+    case circle::TensorType_UINT8:
       break;
     default:
       return false;
   }
 
-  if (tensors[inputs.at(1)]->type != tensor->type)
+  assert(tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(1)]->type() != tensor->type())
     return false;
 
-  if (tensors[outputs[0]]->type != tensor->type)
+  assert(tensors[outputs[0]] != nullptr);
+  if (tensors[outputs[0]]->type() != tensor->type())
     return false;
 
   return true;
@@ -103,10 +109,10 @@ bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args)
   if (outputs.size() != 1)
     return false;
 
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_axis = tensors.at(inputs.at(1));
-
-  switch (tensor_axis->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor_axis = tensors.at(inputs.at(1));
+  assert(tensor_axis != nullptr);
+  switch (tensor_axis->type())
   {
     case circle::TensorType_INT32:
     case circle::TensorType_INT64:
diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt
index 32d0a890d..2f6ee23fa 100644
--- a/compiler/luci/lang/CMakeLists.txt
+++ b/compiler/luci/lang/CMakeLists.txt
@@ -2,15 +2,22 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(luci_lang SHARED ${SOURCES})
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_lang ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_lang PRIVATE src)
 target_include_directories(luci_lang PUBLIC include)
 target_link_libraries(luci_lang PUBLIC loco)
 target_link_libraries(luci_lang PUBLIC oops)
+target_link_libraries(luci_lang PUBLIC nncc_coverage)
 target_link_libraries(luci_lang PRIVATE logo)
 target_link_libraries(luci_lang PRIVATE nncc_common)
 
 install(TARGETS luci_lang DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
 
 if(NOT ENABLE_TEST)
   return()
diff --git a/compiler/luci/lang/include/luci/IR/AttrDilation.h b/compiler/luci/lang/include/luci/IR/AttrDilation.h
index c2b28d77d..ed8232576 100644
--- a/compiler/luci/lang/include/luci/IR/AttrDilation.h
+++ b/compiler/luci/lang/include/luci/IR/AttrDilation.h
@@ -27,15 +27,17 @@ class Dilation final
 public:
   Dilation() : _w(1), _h(1) {}
 
-  int32_t w() const { return _w; }
-  void w(int32_t w) { _w = w; }
+  uint32_t w() const { return _w; }
+  void w(uint32_t w) { _w = w; }
+  void w(int32_t w);
 
-  int32_t h() const { return _h; }
-  void h(int32_t h) { _h = h; }
+  uint32_t h() const { return _h; }
+  void h(uint32_t h) { _h = h; }
+  void h(int32_t h);
 
 private:
-  int32_t _w;
-  int32_t _h;
+  uint32_t _w;
+  uint32_t _h;
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/AttrFilter.h b/compiler/luci/lang/include/luci/IR/AttrFilter.h
index 7909fa523..af9d7519f 100644
--- a/compiler/luci/lang/include/luci/IR/AttrFilter.h
+++ b/compiler/luci/lang/include/luci/IR/AttrFilter.h
@@ -27,15 +27,17 @@ class Filter final
 public:
   Filter() : _w(1), _h(1) {}
 
-  int32_t w() const { return _w; }
-  void w(int32_t w) { _w = w; }
+  uint32_t w() const { return _w; }
+  void w(uint32_t w) { _w = w; }
+  void w(int32_t w);
 
-  int32_t h() const { return _h; }
-  void h(int32_t h) { _h = h; }
+  uint32_t h() const { return _h; }
+  void h(uint32_t h) { _h = h; }
+  void h(int32_t h);
 
 private:
-  int32_t _w;
-  int32_t _h;
+  uint32_t _w;
+  uint32_t _h;
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/AttrFusedActFunc.h b/compiler/luci/lang/include/luci/IR/AttrFusedActFunc.h
index 2abae604b..3f21d5858 100644
--- a/compiler/luci/lang/include/luci/IR/AttrFusedActFunc.h
+++ b/compiler/luci/lang/include/luci/IR/AttrFusedActFunc.h
@@ -28,7 +28,9 @@ enum class FusedActFunc
   NONE,
   RELU,
   RELU_N1_TO_1,
-  RELU6
+  RELU6,
+  TANH,
+  SIGN_BIT
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/AttrStride.h b/compiler/luci/lang/include/luci/IR/AttrStride.h
index 654967d73..6be697975 100644
--- a/compiler/luci/lang/include/luci/IR/AttrStride.h
+++ b/compiler/luci/lang/include/luci/IR/AttrStride.h
@@ -27,15 +27,17 @@ class Stride final
 public:
   Stride() : _w(1), _h(1) {}
 
-  int32_t w() const { return _w; }
-  void w(int32_t w) { _w = w; }
+  uint32_t w() const { return _w; }
+  void w(uint32_t w) { _w = w; }
+  void w(int32_t w);
 
-  int32_t h() const { return _h; }
-  void h(int32_t h) { _h = h; }
+  uint32_t h() const { return _h; }
+  void h(uint32_t h) { _h = h; }
+  void h(int32_t h);
 
 private:
-  int32_t _w;
-  int32_t _h;
+  uint32_t _w;
+  uint32_t _h;
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h b/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h
index 967103e3c..edec9d18b 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeDecl.h
@@ -25,6 +25,7 @@
 #include "CircleOpcode.h"
 #include "CircleNodeVisitor.forward.h"
 #include "CircleQuantParam.h"
+#include "SparsityParam.h"
 
 #include <memory>
 
@@ -54,6 +55,12 @@ struct CircleNode : public loco::Node,
     _quantparam = std::move(quantparam);
   }
 
+  SparsityParam *sparsityparam(void) const { return _sparsityparam.get(); }
+  void sparsityparam(std::unique_ptr<SparsityParam> &&sparsityparam)
+  {
+    _sparsityparam = std::move(sparsityparam);
+  }
+
   ShapeStatus shape_status(void) const { return _shape_status; }
   void shape_status(ShapeStatus ss) { _shape_status = ss; }
 
@@ -63,6 +70,7 @@ struct CircleNode : public loco::Node,
 private:
   NodeName _name;
   std::unique_ptr<CircleQuantParam> _quantparam;
+  std::unique_ptr<SparsityParam> _sparsityparam;
   ShapeStatus _shape_status{ShapeStatus::UNDEFINED};
   int32_t _op_version = 1;
 };
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h b/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h
index a6b9488db..4b3178b9b 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeImpl.h
@@ -34,8 +34,10 @@ template <typename T> T CircleNode::accept(CircleNodeVisitorBase<T> *v) const
                                    \
   case CircleOpcode::OPCODE:       \
     return v->visit(dynamic_cast<const CLASS *>(this));
+#define CIRCLE_VNODE CIRCLE_NODE
 
 #include "CircleNodes.lst"
+#undef CIRCLE_VNODE
 #undef CIRCLE_NODE
 
     default:
@@ -53,8 +55,10 @@ template <typename T> T CircleNode::accept(CircleNodeMutableVisitorBase<T> *v)
                                    \
   case CircleOpcode::OPCODE:       \
     return v->visit(dynamic_cast<CLASS *>(this));
+#define CIRCLE_VNODE CIRCLE_NODE
 
 #include "CircleNodes.lst"
+#undef CIRCLE_VNODE
 #undef CIRCLE_NODE
 
     default:
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeMixins.h b/compiler/luci/lang/include/luci/IR/CircleNodeMixins.h
new file mode 100644
index 000000000..158d65d90
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeMixins.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NODE_MIXINS_H__
+#define __LUCI_IR_CIRCLE_NODE_MIXINS_H__
+
+#include "luci/IR/AttrFusedActFunc.h"
+
+#include <loco/IR/Node.h>
+#include <loco/IR/NodeMixins.h>
+
+#include <vector>
+
+namespace luci
+{
+
+/// @brief enumeration of mixin class
+enum class CircleNodeTrait
+{
+  FusedActFunc,
+  Bias
+};
+
+template <CircleNodeTrait T> class CircleNodeMixin;
+
+template <> class CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+  CircleNodeMixin() = default;
+
+public:
+  FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
+  void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
+
+private:
+  FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
+};
+
+/**
+ * @brief Mixin class for nodes that has a bias input
+ */
+template <> class CircleNodeMixin<CircleNodeTrait::Bias>
+{
+public:
+  CircleNodeMixin() = default;
+
+public:
+  virtual loco::Node *bias(void) const = 0; /// @brief get the input for bias.
+  virtual void bias(loco::Node *node) = 0;  /// @brief set the input for bias.
+};
+
+/**
+ * @brief Nodes with the fixed number of inputs
+ *
+ * TODO Deprecated this class, and use loco::FixedArity instead
+ */
+template <uint32_t N, typename Base> class FixedArityNode : public Base
+{
+public:
+  FixedArityNode()
+  {
+    _args.resize(N);
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _args[n] = std::make_unique<loco::Use>(this);
+    }
+  }
+
+  virtual ~FixedArityNode() = default;
+
+public:
+  uint32_t arity(void) const final { return N; }
+
+  loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
+
+  void drop(void) final
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _args.at(n)->node(nullptr);
+    }
+  }
+
+protected:
+  // This API allows inherited classes to access "_args" field.
+  loco::Use *at(uint32_t n) const { return _args.at(n).get(); }
+
+private:
+  std::vector<std::unique_ptr<loco::Use>> _args{};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NODE_MIXINS_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h b/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h
index 43339fe84..599e4bcd9 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodeVisitor.h
@@ -33,8 +33,10 @@ template <typename T> struct CircleNodeVisitorBase
   virtual ~CircleNodeVisitorBase() = default;
 
 #define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) virtual T visit(const CIRCLE_CLASS *) = 0;
+#define CIRCLE_VNODE CIRCLE_NODE
 
 #include "CircleNodes.lst"
+#undef CIRCLE_VNODE
 #undef CIRCLE_NODE
 };
 
@@ -44,9 +46,11 @@ template <typename T> struct CircleNodeVisitor : public CircleNodeVisitorBase<T>
 
 #define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
   virtual T visit(const CIRCLE_CLASS *node) { return visit(static_cast<const CircleNode *>(node)); }
+#define CIRCLE_VNODE CIRCLE_NODE
 
 #include "CircleNodes.lst"
 
+#undef CIRCLE_VNODE
 #undef CIRCLE_NODE
 
   /// @brief Default fallback
@@ -61,9 +65,11 @@ template <typename T> struct CircleNodeMutableVisitorBase
   virtual ~CircleNodeMutableVisitorBase() = default;
 
 #define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) virtual T visit(CIRCLE_CLASS *) = 0;
+#define CIRCLE_VNODE CIRCLE_NODE
 
 #include "CircleNodes.lst"
 
+#undef CIRCLE_VNODE
 #undef CIRCLE_NODE
 };
 
@@ -73,9 +79,11 @@ template <typename T> struct CircleNodeMutableVisitor : public CircleNodeMutable
 
 #define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
   virtual T visit(CIRCLE_CLASS *node) { return visit(static_cast<CircleNode *>(node)); }
+#define CIRCLE_VNODE CIRCLE_NODE
 
 #include "CircleNodes.lst"
 
+#undef CIRCLE_VNODE
 #undef CIRCLE_NODE
 
   /// @brief Default fallback
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h
index 25b86d2e9..d643b0893 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -25,20 +25,23 @@
 #include "Nodes/CircleAveragePool2D.h"
 #include "Nodes/CircleBatchMatMul.h"
 #include "Nodes/CircleBatchToSpaceND.h"
+#include "Nodes/CircleBidirectionalSequenceLSTM.h"
 #include "Nodes/CircleCast.h"
 #include "Nodes/CircleCeil.h"
 #include "Nodes/CircleConcatenation.h"
-#include "Nodes/CircleConst.h"
 #include "Nodes/CircleConv2D.h"
 #include "Nodes/CircleCos.h"
 #include "Nodes/CircleCustom.h"
+#include "Nodes/CircleDensify.h"
 #include "Nodes/CircleDepthToSpace.h"
 #include "Nodes/CircleDepthwiseConv2D.h"
+#include "Nodes/CircleDequantize.h"
 #include "Nodes/CircleDiv.h"
 #include "Nodes/CircleElu.h"
 #include "Nodes/CircleEqual.h"
 #include "Nodes/CircleExp.h"
 #include "Nodes/CircleExpandDims.h"
+#include "Nodes/CircleFakeQuant.h"
 #include "Nodes/CircleFill.h"
 #include "Nodes/CircleFloor.h"
 #include "Nodes/CircleFloorDiv.h"
@@ -46,8 +49,10 @@
 #include "Nodes/CircleFullyConnected.h"
 #include "Nodes/CircleGather.h"
 #include "Nodes/CircleGatherNd.h"
+#include "Nodes/CircleGelu.h"
 #include "Nodes/CircleGreater.h"
 #include "Nodes/CircleGreaterEqual.h"
+#include "Nodes/CircleHardSwish.h"
 #include "Nodes/CircleIf.h"
 #include "Nodes/CircleL2Normalize.h"
 #include "Nodes/CircleL2Pool2D.h"
@@ -78,6 +83,7 @@
 #include "Nodes/CirclePad.h"
 #include "Nodes/CirclePadV2.h"
 #include "Nodes/CirclePow.h"
+#include "Nodes/CircleQuantize.h"
 #include "Nodes/CirclePRelu.h"
 #include "Nodes/CircleRange.h"
 #include "Nodes/CircleRank.h"
@@ -115,11 +121,13 @@
 #include "Nodes/CircleStridedSlice.h"
 #include "Nodes/CircleSub.h"
 #include "Nodes/CircleSum.h"
+#include "Nodes/CircleSVDF.h"
 #include "Nodes/CircleTanh.h"
 #include "Nodes/CircleTile.h"
 #include "Nodes/CircleTopKV2.h"
 #include "Nodes/CircleTranspose.h"
 #include "Nodes/CircleTransposeConv.h"
+#include "Nodes/CircleUnidirectionalSequenceLSTM.h"
 #include "Nodes/CircleUnique.h"
 #include "Nodes/CircleUnpack.h"
 #include "Nodes/CircleWhere.h"
@@ -130,17 +138,21 @@
 #include "Nodes/CircleBCQGather.h"
 #include "Nodes/CircleInstanceNorm.h"
 // Virtual nodes
+#include "Nodes/CircleConst.h"
 #include "Nodes/CircleInput.h"
 #include "Nodes/CircleOutput.h"
+#include "Nodes/CircleVariable.h"
+// Multi-output virtual nodes
+#include "Nodes/CircleBidirectionalSequenceLSTMOut.h"
 #include "Nodes/CircleCustomOut.h"
 #include "Nodes/CircleIfOut.h"
 #include "Nodes/CircleNonMaxSuppressionV4Out.h"
 #include "Nodes/CircleNonMaxSuppressionV5Out.h"
-#include "Nodes/CircleUnpackOut.h"
-#include "Nodes/CircleUniqueOut.h"
 #include "Nodes/CircleSplitOut.h"
 #include "Nodes/CircleSplitVOut.h"
 #include "Nodes/CircleTopKV2Out.h"
+#include "Nodes/CircleUniqueOut.h"
+#include "Nodes/CircleUnpackOut.h"
 #include "Nodes/CircleWhileOut.h"
 
 #include <loco/IR/Graph.h>
@@ -148,15 +160,6 @@
 namespace luci
 {
 
-/**
- * @brief  Set both CircleReshape's 2nd input as CircleConst, and newShape attribute
- *         with same value
- * @note   Shape inference for TFLReshape forces them to be same
- *
- * TODO find better place for this helper
- */
-void set_new_shape(CircleReshape *node, int32_t *base, uint32_t size);
-
 /// @brief Link GraphOutput with CircleOutput node
 void link(loco::GraphOutput *, CircleOutput *);
 
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
index 9f0a1b16e..1646909e8 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -2,6 +2,10 @@
 #error "Define CIRCLE_NODE"
 #endif // CIRCLE_NODE
 
+#ifndef CIRCLE_VNODE
+#error "Define CIRCLE_VNODE"
+#endif // CIRCLE_VNODE
+
 //
 // PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
 //
@@ -11,130 +15,142 @@
 //        .Input("value: T")     <-- Input name is 'value'
 //
 
-CIRCLE_NODE(ABS, luci::CircleAbs)
-CIRCLE_NODE(ADD, luci::CircleAdd)
-CIRCLE_NODE(ADD_N, luci::CircleAddN)
-CIRCLE_NODE(ARG_MAX, luci::CircleArgMax)
-CIRCLE_NODE(ARG_MIN, luci::CircleArgMin)
-CIRCLE_NODE(AVERAGE_POOL_2D, luci::CircleAveragePool2D)
-CIRCLE_NODE(BATCH_TO_SPACE_ND, luci::CircleBatchToSpaceND)
-CIRCLE_NODE(BATCHMATMUL, luci::CircleBatchMatMul)
-CIRCLE_NODE(CAST, luci::CircleCast)
-CIRCLE_NODE(CEIL, luci::CircleCeil)
-CIRCLE_NODE(CONCATENATION, luci::CircleConcatenation)
-CIRCLE_NODE(CONV_2D, luci::CircleConv2D)
-CIRCLE_NODE(COS, luci::CircleCos)
-CIRCLE_NODE(CUSTOM, luci::CircleCustom)
-CIRCLE_NODE(DEPTH_TO_SPACE, luci::CircleDepthToSpace)
-CIRCLE_NODE(DEPTHWISE_CONV_2D, luci::CircleDepthwiseConv2D)
-CIRCLE_NODE(DIV, luci::CircleDiv)
-CIRCLE_NODE(ELU, luci::CircleElu)
-CIRCLE_NODE(EQUAL, luci::CircleEqual)
-CIRCLE_NODE(EXP, luci::CircleExp)
-CIRCLE_NODE(EXPAND_DIMS, luci::CircleExpandDims)
-CIRCLE_NODE(FILL, luci::CircleFill)
-CIRCLE_NODE(FLOOR, luci::CircleFloor)
-CIRCLE_NODE(FLOOR_DIV, luci::CircleFloorDiv)
-CIRCLE_NODE(FLOOR_MOD, luci::CircleFloorMod)
-CIRCLE_NODE(FULLY_CONNECTED, luci::CircleFullyConnected)
-CIRCLE_NODE(GATHER, luci::CircleGather)
-CIRCLE_NODE(GATHER_ND, luci::CircleGatherNd)
-CIRCLE_NODE(GREATER, luci::CircleGreater)
-CIRCLE_NODE(GREATER_EQUAL, luci::CircleGreaterEqual)
-CIRCLE_NODE(IF, luci::CircleIf)
-CIRCLE_NODE(L2_NORMALIZATION, luci::CircleL2Normalize)
-CIRCLE_NODE(L2_POOL_2D, luci::CircleL2Pool2D)
-CIRCLE_NODE(LEAKY_RELU, luci::CircleLeakyRelu)
-CIRCLE_NODE(LESS, luci::CircleLess)
-CIRCLE_NODE(LESS_EQUAL, luci::CircleLessEqual)
-CIRCLE_NODE(LOCAL_RESPONSE_NORMALIZATION, luci::CircleLocalResponseNormalization)
-CIRCLE_NODE(LOG, luci::CircleLog)
-CIRCLE_NODE(LOGICAL_AND, luci::CircleLogicalAnd)
-CIRCLE_NODE(LOGICAL_NOT, luci::CircleLogicalNot)
-CIRCLE_NODE(LOGICAL_OR, luci::CircleLogicalOr)
-CIRCLE_NODE(LOGISTIC, luci::CircleLogistic)
-CIRCLE_NODE(LOG_SOFTMAX, luci::CircleLogSoftmax)
-CIRCLE_NODE(MATRIX_DIAG, luci::CircleMatrixDiag)
-CIRCLE_NODE(MAX_POOL_2D, luci::CircleMaxPool2D)
-CIRCLE_NODE(MATRIX_SET_DIAG, luci::CircleMatrixSetDiag)
-CIRCLE_NODE(MAXIMUM, luci::CircleMaximum)
-CIRCLE_NODE(MEAN, luci::CircleMean)
-CIRCLE_NODE(MINIMUM, luci::CircleMinimum)
-CIRCLE_NODE(MIRROR_PAD, luci::CircleMirrorPad)
-CIRCLE_NODE(MUL, luci::CircleMul)
-CIRCLE_NODE(NEG, luci::CircleNeg)
-CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, luci::CircleNonMaxSuppressionV4)
-CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, luci::CircleNonMaxSuppressionV5)
-CIRCLE_NODE(NOT_EQUAL, luci::CircleNotEqual)
-CIRCLE_NODE(ONE_HOT, luci::CircleOneHot)
-CIRCLE_NODE(PACK, luci::CirclePack)
-CIRCLE_NODE(PAD, luci::CirclePad)
-CIRCLE_NODE(PADV2, luci::CirclePadV2)
-CIRCLE_NODE(POW, luci::CirclePow)
-CIRCLE_NODE(PRELU, luci::CirclePRelu)
-CIRCLE_NODE(RANGE, luci::CircleRange)
-CIRCLE_NODE(RANK, luci::CircleRank)
-CIRCLE_NODE(REDUCE_ANY, luci::CircleReduceAny)
-CIRCLE_NODE(REDUCE_MAX, luci::CircleReduceMax)
-CIRCLE_NODE(REDUCE_MIN, luci::CircleReduceMin)
-CIRCLE_NODE(REDUCE_PROD, luci::CircleReduceProd)
-CIRCLE_NODE(RELU, luci::CircleRelu)
-CIRCLE_NODE(RELU6, luci::CircleRelu6)
-CIRCLE_NODE(RELU_N1_TO_1, luci::CircleReluN1To1)
-CIRCLE_NODE(RESHAPE, luci::CircleReshape)
-CIRCLE_NODE(RESIZE_BILINEAR, luci::CircleResizeBilinear)
-CIRCLE_NODE(RESIZE_NEAREST_NEIGHBOR, luci::CircleResizeNearestNeighbor)
-CIRCLE_NODE(REVERSE_SEQUENCE, luci::CircleReverseSequence)
-CIRCLE_NODE(REVERSE_V2, luci::CircleReverseV2)
-CIRCLE_NODE(ROUND, luci::CircleRound)
-CIRCLE_NODE(RSQRT, luci::CircleRsqrt)
-CIRCLE_NODE(SCATTER_ND, luci::CircleScatterNd)
-CIRCLE_NODE(SEGMENT_SUM, luci::CircleSegmentSum)
-CIRCLE_NODE(SELECT, luci::CircleSelect)
-CIRCLE_NODE(SELECT_V2, luci::CircleSelectV2)
-CIRCLE_NODE(SHAPE, luci::CircleShape)
-CIRCLE_NODE(SIN, luci::CircleSin)
-CIRCLE_NODE(SLICE, luci::CircleSlice)
-CIRCLE_NODE(SOFTMAX, luci::CircleSoftmax)
-CIRCLE_NODE(SPACE_TO_BATCH_ND, luci::CircleSpaceToBatchND)
-CIRCLE_NODE(SPACE_TO_DEPTH, luci::CircleSpaceToDepth)
-CIRCLE_NODE(SPARSE_TO_DENSE, luci::CircleSparseToDense)
-CIRCLE_NODE(SPLIT, luci::CircleSplit)
-CIRCLE_NODE(SPLIT_V, luci::CircleSplitV)
-CIRCLE_NODE(SQRT, luci::CircleSqrt)
-CIRCLE_NODE(SQUARE, luci::CircleSquare)
-CIRCLE_NODE(SQUARED_DIFFERENCE, luci::CircleSquaredDifference)
-CIRCLE_NODE(SQUEEZE, luci::CircleSqueeze)
-CIRCLE_NODE(STRIDED_SLICE, luci::CircleStridedSlice)
-CIRCLE_NODE(SUB, luci::CircleSub)
-CIRCLE_NODE(SUM, luci::CircleSum)
-CIRCLE_NODE(TANH, luci::CircleTanh)
-CIRCLE_NODE(TILE, luci::CircleTile)
-CIRCLE_NODE(TOPK_V2, luci::CircleTopKV2)
-CIRCLE_NODE(TRANSPOSE, luci::CircleTranspose)
-CIRCLE_NODE(TRANSPOSE_CONV, luci::CircleTransposeConv)
-CIRCLE_NODE(UNIQUE, luci::CircleUnique)
-CIRCLE_NODE(UNPACK, luci::CircleUnpack)
-CIRCLE_NODE(WHERE, luci::CircleWhere)
-CIRCLE_NODE(WHILE, luci::CircleWhile)
-CIRCLE_NODE(ZEROS_LIKE, luci::CircleZerosLike)
+CIRCLE_NODE(ABS, CircleAbs)
+CIRCLE_NODE(ADD, CircleAdd)
+CIRCLE_NODE(ADD_N, CircleAddN)
+CIRCLE_NODE(ARG_MAX, CircleArgMax)
+CIRCLE_NODE(ARG_MIN, CircleArgMin)
+CIRCLE_NODE(AVERAGE_POOL_2D, CircleAveragePool2D)
+CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceND)
+CIRCLE_NODE(BATCH_MATMUL, CircleBatchMatMul)
+CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, CircleBidirectionalSequenceLSTM)
+CIRCLE_NODE(CAST, CircleCast)
+CIRCLE_NODE(CEIL, CircleCeil)
+CIRCLE_NODE(CONCATENATION, CircleConcatenation)
+CIRCLE_NODE(CONV_2D, CircleConv2D)
+CIRCLE_NODE(COS, CircleCos)
+CIRCLE_NODE(CUSTOM, CircleCustom)
+CIRCLE_NODE(DENSIFY, CircleDensify)
+CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpace)
+CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2D)
+CIRCLE_NODE(DEQUANTIZE, CircleDequantize)
+CIRCLE_NODE(DIV, CircleDiv)
+CIRCLE_NODE(ELU, CircleElu)
+CIRCLE_NODE(EQUAL, CircleEqual)
+CIRCLE_NODE(EXP, CircleExp)
+CIRCLE_NODE(EXPAND_DIMS, CircleExpandDims)
+CIRCLE_NODE(FAKE_QUANT, CircleFakeQuant)
+CIRCLE_NODE(FILL, CircleFill)
+CIRCLE_NODE(FLOOR, CircleFloor)
+CIRCLE_NODE(FLOOR_DIV, CircleFloorDiv)
+CIRCLE_NODE(FLOOR_MOD, CircleFloorMod)
+CIRCLE_NODE(FULLY_CONNECTED, CircleFullyConnected)
+CIRCLE_NODE(GATHER, CircleGather)
+CIRCLE_NODE(GATHER_ND, CircleGatherNd)
+CIRCLE_NODE(GELU, CircleGelu)
+CIRCLE_NODE(GREATER, CircleGreater)
+CIRCLE_NODE(GREATER_EQUAL, CircleGreaterEqual)
+CIRCLE_NODE(HARD_SWISH, CircleHardSwish)
+CIRCLE_NODE(IF, CircleIf)
+CIRCLE_NODE(L2_NORMALIZATION, CircleL2Normalize)
+CIRCLE_NODE(L2_POOL_2D, CircleL2Pool2D)
+CIRCLE_NODE(LEAKY_RELU, CircleLeakyRelu)
+CIRCLE_NODE(LESS, CircleLess)
+CIRCLE_NODE(LESS_EQUAL, CircleLessEqual)
+CIRCLE_NODE(LOCAL_RESPONSE_NORMALIZATION, CircleLocalResponseNormalization)
+CIRCLE_NODE(LOG, CircleLog)
+CIRCLE_NODE(LOGICAL_AND, CircleLogicalAnd)
+CIRCLE_NODE(LOGICAL_NOT, CircleLogicalNot)
+CIRCLE_NODE(LOGICAL_OR, CircleLogicalOr)
+CIRCLE_NODE(LOGISTIC, CircleLogistic)
+CIRCLE_NODE(LOG_SOFTMAX, CircleLogSoftmax)
+CIRCLE_NODE(MATRIX_DIAG, CircleMatrixDiag)
+CIRCLE_NODE(MAX_POOL_2D, CircleMaxPool2D)
+CIRCLE_NODE(MATRIX_SET_DIAG, CircleMatrixSetDiag)
+CIRCLE_NODE(MAXIMUM, CircleMaximum)
+CIRCLE_NODE(MEAN, CircleMean)
+CIRCLE_NODE(MINIMUM, CircleMinimum)
+CIRCLE_NODE(MIRROR_PAD, CircleMirrorPad)
+CIRCLE_NODE(MUL, CircleMul)
+CIRCLE_NODE(NEG, CircleNeg)
+CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4)
+CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5)
+CIRCLE_NODE(NOT_EQUAL, CircleNotEqual)
+CIRCLE_NODE(ONE_HOT, CircleOneHot)
+CIRCLE_NODE(PACK, CirclePack)
+CIRCLE_NODE(PAD, CirclePad)
+CIRCLE_NODE(PADV2, CirclePadV2)
+CIRCLE_NODE(POW, CirclePow)
+CIRCLE_NODE(PRELU, CirclePRelu)
+CIRCLE_NODE(QUANTIZE, CircleQuantize)
+CIRCLE_NODE(RANGE, CircleRange)
+CIRCLE_NODE(RANK, CircleRank)
+CIRCLE_NODE(REDUCE_ANY, CircleReduceAny)
+CIRCLE_NODE(REDUCE_MAX, CircleReduceMax)
+CIRCLE_NODE(REDUCE_MIN, CircleReduceMin)
+CIRCLE_NODE(REDUCE_PROD, CircleReduceProd)
+CIRCLE_NODE(RELU, CircleRelu)
+CIRCLE_NODE(RELU6, CircleRelu6)
+CIRCLE_NODE(RELU_N1_TO_1, CircleReluN1To1)
+CIRCLE_NODE(RESHAPE, CircleReshape)
+CIRCLE_NODE(RESIZE_BILINEAR, CircleResizeBilinear)
+CIRCLE_NODE(RESIZE_NEAREST_NEIGHBOR, CircleResizeNearestNeighbor)
+CIRCLE_NODE(REVERSE_SEQUENCE, CircleReverseSequence)
+CIRCLE_NODE(REVERSE_V2, CircleReverseV2)
+CIRCLE_NODE(ROUND, CircleRound)
+CIRCLE_NODE(RSQRT, CircleRsqrt)
+CIRCLE_NODE(SCATTER_ND, CircleScatterNd)
+CIRCLE_NODE(SEGMENT_SUM, CircleSegmentSum)
+CIRCLE_NODE(SELECT, CircleSelect)
+CIRCLE_NODE(SELECT_V2, CircleSelectV2)
+CIRCLE_NODE(SHAPE, CircleShape)
+CIRCLE_NODE(SIN, CircleSin)
+CIRCLE_NODE(SLICE, CircleSlice)
+CIRCLE_NODE(SOFTMAX, CircleSoftmax)
+CIRCLE_NODE(SPACE_TO_BATCH_ND, CircleSpaceToBatchND)
+CIRCLE_NODE(SPACE_TO_DEPTH, CircleSpaceToDepth)
+CIRCLE_NODE(SPARSE_TO_DENSE, CircleSparseToDense)
+CIRCLE_NODE(SPLIT, CircleSplit)
+CIRCLE_NODE(SPLIT_V, CircleSplitV)
+CIRCLE_NODE(SQRT, CircleSqrt)
+CIRCLE_NODE(SQUARE, CircleSquare)
+CIRCLE_NODE(SQUARED_DIFFERENCE, CircleSquaredDifference)
+CIRCLE_NODE(SQUEEZE, CircleSqueeze)
+CIRCLE_NODE(STRIDED_SLICE, CircleStridedSlice)
+CIRCLE_NODE(SUB, CircleSub)
+CIRCLE_NODE(SUM, CircleSum)
+CIRCLE_NODE(SVDF, CircleSVDF)
+CIRCLE_NODE(TANH, CircleTanh)
+CIRCLE_NODE(TILE, CircleTile)
+CIRCLE_NODE(TOPK_V2, CircleTopKV2)
+CIRCLE_NODE(TRANSPOSE, CircleTranspose)
+CIRCLE_NODE(TRANSPOSE_CONV, CircleTransposeConv)
+CIRCLE_NODE(UNIDIRECTIONAL_SEQUENCE_LSTM, CircleUnidirectionalSequenceLSTM)
+CIRCLE_NODE(UNIQUE, CircleUnique)
+CIRCLE_NODE(UNPACK, CircleUnpack)
+CIRCLE_NODE(WHERE, CircleWhere)
+CIRCLE_NODE(WHILE, CircleWhile)
+CIRCLE_NODE(ZEROS_LIKE, CircleZerosLike)
 // Circle Only
-CIRCLE_NODE(BCQ_FULLY_CONNECTED, luci::CircleBCQFullyConnected)
-CIRCLE_NODE(BCQ_GATHER, luci::CircleBCQGather)
-CIRCLE_NODE(INSTANCE_NORM, luci::CircleInstanceNorm)
+CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnected)
+CIRCLE_NODE(BCQ_GATHER, CircleBCQGather)
+CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNorm)
 // Virtual node(s)
-CIRCLE_NODE(CIRCLECONST, luci::CircleConst)
-CIRCLE_NODE(CIRCLEINPUT, luci::CircleInput)
-CIRCLE_NODE(CIRCLEOUTPUT, luci::CircleOutput)
-CIRCLE_NODE(CIRCLEOUTPUTDUMMY, luci::CircleOutputDummy)
-CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
-CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
-CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
-CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
-CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
-CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
-CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
-CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
-CIRCLE_NODE(CIRCLEUNIQUEOUT, luci::CircleUniqueOut)
-CIRCLE_NODE(CIRCLEUNPACKOUT, luci::CircleUnpackOut)
-CIRCLE_NODE(CIRCLEWHILEOUT, luci::CircleWhileOut)
+CIRCLE_VNODE(CIRCLECONST, CircleConst)
+CIRCLE_VNODE(CIRCLEINPUT, CircleInput)
+CIRCLE_VNODE(CIRCLEOUTPUT, CircleOutput)
+CIRCLE_VNODE(CIRCLEOUTPUTDUMMY, CircleOutputDummy)
+CIRCLE_VNODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExclude)
+CIRCLE_VNODE(CIRCLEVARIABLE, CircleVariable)
+// Multi-output virtual nodes
+CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, CircleBidirectionalSequenceLSTMOut)
+CIRCLE_VNODE(CIRCLECUSTOMOUT, CircleCustomOut)
+CIRCLE_VNODE(CIRCLEIFOUT, CircleIfOut)
+CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4Out)
+CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV5OUT, CircleNonMaxSuppressionV5Out)
+CIRCLE_VNODE(CIRCLESPLITOUT, CircleSplitOut)
+CIRCLE_VNODE(CIRCLESPLITVOUT, CircleSplitVOut)
+CIRCLE_VNODE(CIRCLETOPKV2OUT, CircleTopKV2Out)
+CIRCLE_VNODE(CIRCLEUNIQUEOUT, CircleUniqueOut)
+CIRCLE_VNODE(CIRCLEUNPACKOUT, CircleUnpackOut)
+CIRCLE_VNODE(CIRCLEWHILEOUT, CircleWhileOut)
diff --git a/compiler/luci/lang/include/luci/IR/CircleOpcode.h b/compiler/luci/lang/include/luci/IR/CircleOpcode.h
index 703b70da2..be3069f94 100644
--- a/compiler/luci/lang/include/luci/IR/CircleOpcode.h
+++ b/compiler/luci/lang/include/luci/IR/CircleOpcode.h
@@ -23,7 +23,9 @@ namespace luci
 enum class CircleOpcode
 {
 #define CIRCLE_NODE(OPCODE, CLASS) OPCODE,
+#define CIRCLE_VNODE CIRCLE_NODE
 #include "CircleNodes.lst"
+#undef CIRCLE_VNODE
 #undef CIRCLE_NODE
 };
 
diff --git a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
index 694437303..8afc80a76 100644
--- a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
+++ b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
@@ -32,6 +32,10 @@ struct CircleQuantParam
   int32_t quantized_dimension{0};
 };
 
+struct CircleNode;
+
+void copy_quantparam(const luci::CircleNode *src, luci::CircleNode *dst);
+
 } // namespace luci
 
 #endif // __LUCI_IR_CIRCLEQUANTPARAM_H__
diff --git a/compiler/luci/lang/src/DeadNodeQueryService.h b/compiler/luci/lang/include/luci/IR/DeadNodeQueryService.h
index d10696667..d10696667 100644
--- a/compiler/luci/lang/src/DeadNodeQueryService.h
+++ b/compiler/luci/lang/include/luci/IR/DeadNodeQueryService.h
diff --git a/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h b/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h
new file mode 100644
index 000000000..5c33c1123
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_EXECUTION_PLAN_TABLE_H__
+#define __LUCI_EXECUTION_PLAN_TABLE_H__
+
+namespace luci
+{
+
+using ExecutionPlanTable = std::map<uint32_t, std::vector<uint32_t>>;
+
+} // namespace luci
+
+#endif // __LUCI_EXECUTION_PLAN_TABLE_H__
diff --git a/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h b/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h
index c1bb0db11..2078495c6 100644
--- a/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h
+++ b/compiler/luci/lang/include/luci/IR/LuciNodeMixins.h
@@ -17,90 +17,16 @@
 #ifndef __LUCI_IR_LUCINODEMIXINS_H__
 #define __LUCI_IR_LUCINODEMIXINS_H__
 
-#include "luci/IR/AttrFusedActFunc.h"
+// TODO remove this file after LuciNodeTrait and LuciNodeMixin are not used in backend
 
-#include <loco/IR/Node.h>
-#include <loco/IR/NodeMixins.h>
-
-#include <vector>
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
 
-/// @brief enumeration of mixin class
-enum class LuciNodeTrait
-{
-  FusedActFunc,
-  Bias
-};
-
-template <LuciNodeTrait T> class LuciNodeMixin;
-
-template <> class LuciNodeMixin<LuciNodeTrait::FusedActFunc>
-{
-public:
-  LuciNodeMixin() = default;
-
-public:
-  FusedActFunc fusedActivationFunction() const { return _fused_act_fun; }
-  void fusedActivationFunction(FusedActFunc fused_act_fun) { _fused_act_fun = fused_act_fun; }
-
-private:
-  FusedActFunc _fused_act_fun = FusedActFunc::UNDEFINED;
-};
-
-/**
- * @brief Mixin class for nodes that has a bias input
- */
-template <> class LuciNodeMixin<LuciNodeTrait::Bias>
-{
-public:
-  LuciNodeMixin() = default;
-
-public:
-  virtual loco::Node *bias(void) const = 0; /// @brief get the input for bias.
-  virtual void bias(loco::Node *node) = 0;  /// @brief set the input for bias.
-};
-
-/**
- * @brief Nodes with the fixed number of inputs
- *
- * TODO Deprecated this class, and use loco::FixedArity instead
- */
-template <unsigned N, typename Base> class FixedArityNode : public Base
-{
-public:
-  FixedArityNode()
-  {
-    _args.resize(N);
-    for (uint32_t n = 0; n < N; ++n)
-    {
-      _args[n] = std::make_unique<loco::Use>(this);
-    }
-  }
-
-  virtual ~FixedArityNode() = default;
-
-public:
-  unsigned arity(void) const final { return N; }
-
-  loco::Node *arg(uint32_t n) const final { return _args.at(n)->node(); }
-
-  void drop(void) final
-  {
-    for (uint32_t n = 0; n < N; ++n)
-    {
-      _args.at(n)->node(nullptr);
-    }
-  }
-
-protected:
-  // This API allows inherited classes to access "_args" field.
-  loco::Use *at(unsigned n) const { return _args.at(n).get(); }
+using LuciNodeTrait = CircleNodeTrait;
 
-private:
-  std::vector<std::unique_ptr<loco::Use>> _args{};
-};
+template <LuciNodeTrait T> using LuciNodeMixin = CircleNodeMixin<T>;
 
 } // namespace luci
 
diff --git a/compiler/luci/lang/include/luci/IR/Module.h b/compiler/luci/lang/include/luci/IR/Module.h
index 30eac59ce..75cf67905 100644
--- a/compiler/luci/lang/include/luci/IR/Module.h
+++ b/compiler/luci/lang/include/luci/IR/Module.h
@@ -19,6 +19,7 @@
 
 #include <loco/IR/Graph.h>
 
+#include <map>
 #include <memory>
 #include <vector>
 
@@ -59,8 +60,27 @@ public:
 
   // TODO provide graph accessor with a name
 
+public:
+  void source_table(const std::map<uint32_t, std::string> &table) { _source_table = table; }
+
+  const std::map<uint32_t, std::string> &source_table(void) const { return _source_table; }
+
 private:
   std::vector<std::unique_ptr<loco::Graph>> _graphs;
+
+private:
+  /**
+   * @brief Metadata about source table for profiling
+   *
+   * @note  Key is ID of node and value is name of node.
+   *
+   *        If there was originally imported 'source_table' in circle model,
+   *        the table will be stored as it is.
+   *        Otherwise, new 'source_table' is created with imported nodes.
+   *
+   *        Even if Module has multiple subgraphs, only first subgraph is considered.
+   */
+  std::map<uint32_t, std::string> _source_table;
 };
 
 std::unique_ptr<Module> make_module(void);
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h
index 45dba15bf..7a73f37cd 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAbs.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h
index f26eccd1a..92563de4c 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAdd.h
@@ -21,7 +21,7 @@
 #include "luci/IR/CircleOpcode.h"
 
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -30,7 +30,7 @@ namespace luci
  * @brief ADD in Circle
  */
 class CircleAdd final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::ADD>>,
-                        public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                        public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
   loco::Node *x(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h
index dbc4b2b3a..c1e4631e4 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMax.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMin.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMin.h
index 8cb561983..b4d026201 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMin.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleArgMin.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h
index 0b43b40c8..4aa45c2d8 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleAveragePool2D.h
@@ -24,7 +24,7 @@
 #include "luci/IR/AttrPadding.h"
 #include "luci/IR/AttrStride.h"
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -33,16 +33,14 @@ namespace luci
  * @brief AVERAGE_POOL_2D in Circle
  */
 class CircleAveragePool2D final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::AVERAGE_POOL_2D>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::AVERAGE_POOL_2D>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
-  CircleAveragePool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
-public:
   loco::Node *value(void) const { return at(0)->node(); }
   void value(loco::Node *node) { at(0)->node(node); }
 
+public:
   Padding padding() const { return _padding; }
   void padding(Padding padding) { _padding = padding; }
 
@@ -53,7 +51,7 @@ public:
   Stride *stride(void) { return &_stride; }
 
 private:
-  Padding _padding;
+  Padding _padding{Padding::UNDEFINED};
   Stride _stride;
   Filter _filter;
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQFullyConnected.h
index 7d12d593a..4c164ebca 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQFullyConnected.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQFullyConnected.h
@@ -21,7 +21,7 @@
 #include "luci/IR/CircleOpcode.h"
 
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -30,9 +30,9 @@ namespace luci
  * @brief BCQ_FULLY_CONNECTED in Circle
  */
 class CircleBCQFullyConnected final
-    : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::BCQ_FULLY_CONNECTED>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
-      public LuciNodeMixin<LuciNodeTrait::Bias>
+  : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::BCQ_FULLY_CONNECTED>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+    public CircleNodeMixin<CircleNodeTrait::Bias>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
@@ -58,7 +58,7 @@ public:
   }
 
 private:
-  int32_t _weights_hidden_size = 0;
+  int32_t _weights_hidden_size{0};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQGather.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQGather.h
index f7638261d..1a0bf4f19 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQGather.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBCQGather.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -51,8 +51,8 @@ public:
   void input_hidden_size(int32_t input_hidden_size) { _input_hidden_size = input_hidden_size; }
 
 private:
-  int32_t _axis = 0;
-  int32_t _input_hidden_size = 0;
+  int32_t _axis{0};
+  int32_t _input_hidden_size{0};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchMatMul.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchMatMul.h
index 19999924e..864b033ed 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchMatMul.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchMatMul.h
@@ -20,15 +20,15 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
 
 /**
- * @brief BATCHMATMUL in Circle
+ * @brief BATCH_MATMUL in Circle
  */
-class CircleBatchMatMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::BATCHMATMUL>>
+class CircleBatchMatMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::BATCH_MATMUL>>
 {
 public:
   loco::Node *x(void) const { return at(0)->node(); }
@@ -45,8 +45,8 @@ public:
   void adj_y(bool arg) { _adj_y = arg; }
 
 private:
-  bool _adj_x = false;
-  bool _adj_y = false;
+  bool _adj_x{false};
+  bool _adj_y{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h
index 67c0a2102..80fa53b8e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBatchToSpaceND.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief BATCH_TO_SPACE_ND in Circle
  */
 class CircleBatchToSpaceND final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::BATCH_TO_SPACE_ND>>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::BATCH_TO_SPACE_ND>>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..d16281b69
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
+#define __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief BIDIRECTIONAL_SEQUENCE_LSTM in Circle
+ */
+class CircleBidirectionalSequenceLSTM final
+  : public FixedArityNode<48, CircleNodeImpl<CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *fw_input_to_input_weights(void) const { return at(1)->node(); }
+  void fw_input_to_input_weights(loco::Node *node) { at(1)->node(node); }
+  loco::Node *fw_input_to_forget_weights(void) const { return at(2)->node(); }
+  void fw_input_to_forget_weights(loco::Node *node) { at(2)->node(node); }
+  loco::Node *fw_input_to_cell_weights(void) const { return at(3)->node(); }
+  void fw_input_to_cell_weights(loco::Node *node) { at(3)->node(node); }
+  loco::Node *fw_input_to_output_weights(void) const { return at(4)->node(); }
+  void fw_input_to_output_weights(loco::Node *node) { at(4)->node(node); }
+
+  loco::Node *fw_recurrent_to_input_weights(void) const { return at(5)->node(); }
+  void fw_recurrent_to_input_weights(loco::Node *node) { at(5)->node(node); }
+  loco::Node *fw_recurrent_to_forget_weights(void) const { return at(6)->node(); }
+  void fw_recurrent_to_forget_weights(loco::Node *node) { at(6)->node(node); }
+  loco::Node *fw_recurrent_to_cell_weights(void) const { return at(7)->node(); }
+  void fw_recurrent_to_cell_weights(loco::Node *node) { at(7)->node(node); }
+  loco::Node *fw_recurrent_to_output_weights(void) const { return at(8)->node(); }
+  void fw_recurrent_to_output_weights(loco::Node *node) { at(8)->node(node); }
+
+  loco::Node *fw_cell_to_input_weights(void) const { return at(9)->node(); }
+  void fw_cell_to_input_weights(loco::Node *node) { at(9)->node(node); }
+  loco::Node *fw_cell_to_forget_weights(void) const { return at(10)->node(); }
+  void fw_cell_to_forget_weights(loco::Node *node) { at(10)->node(node); }
+  loco::Node *fw_cell_to_output_weights(void) const { return at(11)->node(); }
+  void fw_cell_to_output_weights(loco::Node *node) { at(11)->node(node); }
+
+  loco::Node *fw_input_gate_bias(void) const { return at(12)->node(); }
+  void fw_input_gate_bias(loco::Node *node) { at(12)->node(node); }
+  loco::Node *fw_forget_gate_bias(void) const { return at(13)->node(); }
+  void fw_forget_gate_bias(loco::Node *node) { at(13)->node(node); }
+  loco::Node *fw_cell_gate_bias(void) const { return at(14)->node(); }
+  void fw_cell_gate_bias(loco::Node *node) { at(14)->node(node); }
+  loco::Node *fw_output_gate_bias(void) const { return at(15)->node(); }
+  void fw_output_gate_bias(loco::Node *node) { at(15)->node(node); }
+
+  loco::Node *fw_projection_weights(void) const { return at(16)->node(); }
+  void fw_projection_weights(loco::Node *node) { at(16)->node(node); }
+  loco::Node *fw_projection_bias(void) const { return at(17)->node(); }
+  void fw_projection_bias(loco::Node *node) { at(17)->node(node); }
+
+  loco::Node *bw_input_to_input_weights(void) const { return at(18)->node(); }
+  void bw_input_to_input_weights(loco::Node *node) { at(18)->node(node); }
+  loco::Node *bw_input_to_forget_weights(void) const { return at(19)->node(); }
+  void bw_input_to_forget_weights(loco::Node *node) { at(19)->node(node); }
+  loco::Node *bw_input_to_cell_weights(void) const { return at(20)->node(); }
+  void bw_input_to_cell_weights(loco::Node *node) { at(20)->node(node); }
+  loco::Node *bw_input_to_output_weights(void) const { return at(21)->node(); }
+  void bw_input_to_output_weights(loco::Node *node) { at(21)->node(node); }
+
+  loco::Node *bw_recurrent_to_input_weights(void) const { return at(22)->node(); }
+  void bw_recurrent_to_input_weights(loco::Node *node) { at(22)->node(node); }
+  loco::Node *bw_recurrent_to_forget_weights(void) const { return at(23)->node(); }
+  void bw_recurrent_to_forget_weights(loco::Node *node) { at(23)->node(node); }
+  loco::Node *bw_recurrent_to_cell_weights(void) const { return at(24)->node(); }
+  void bw_recurrent_to_cell_weights(loco::Node *node) { at(24)->node(node); }
+  loco::Node *bw_recurrent_to_output_weights(void) const { return at(25)->node(); }
+  void bw_recurrent_to_output_weights(loco::Node *node) { at(25)->node(node); }
+
+  loco::Node *bw_cell_to_input_weights(void) const { return at(26)->node(); }
+  void bw_cell_to_input_weights(loco::Node *node) { at(26)->node(node); }
+  loco::Node *bw_cell_to_forget_weights(void) const { return at(27)->node(); }
+  void bw_cell_to_forget_weights(loco::Node *node) { at(27)->node(node); }
+  loco::Node *bw_cell_to_output_weights(void) const { return at(28)->node(); }
+  void bw_cell_to_output_weights(loco::Node *node) { at(28)->node(node); }
+
+  loco::Node *bw_input_gate_bias(void) const { return at(29)->node(); }
+  void bw_input_gate_bias(loco::Node *node) { at(29)->node(node); }
+  loco::Node *bw_forget_gate_bias(void) const { return at(30)->node(); }
+  void bw_forget_gate_bias(loco::Node *node) { at(30)->node(node); }
+  loco::Node *bw_cell_gate_bias(void) const { return at(31)->node(); }
+  void bw_cell_gate_bias(loco::Node *node) { at(31)->node(node); }
+  loco::Node *bw_output_gate_bias(void) const { return at(32)->node(); }
+  void bw_output_gate_bias(loco::Node *node) { at(32)->node(node); }
+
+  loco::Node *bw_projection_weights(void) const { return at(33)->node(); }
+  void bw_projection_weights(loco::Node *node) { at(33)->node(node); }
+  loco::Node *bw_projection_bias(void) const { return at(34)->node(); }
+  void bw_projection_bias(loco::Node *node) { at(34)->node(node); }
+
+  loco::Node *fw_activation_state(void) const { return at(35)->node(); }
+  void fw_activation_state(loco::Node *node) { at(35)->node(node); }
+  loco::Node *fw_cell_state(void) const { return at(36)->node(); }
+  void fw_cell_state(loco::Node *node) { at(36)->node(node); }
+
+  loco::Node *bw_activation_state(void) const { return at(37)->node(); }
+  void bw_activation_state(loco::Node *node) { at(37)->node(node); }
+  loco::Node *bw_cell_state(void) const { return at(38)->node(); }
+  void bw_cell_state(loco::Node *node) { at(38)->node(node); }
+
+  loco::Node *auxillary_input(void) const { return at(39)->node(); }
+  void auxillary_input(loco::Node *node) { at(39)->node(node); }
+  loco::Node *fw_auxillary_input_to_input_weights(void) const { return at(40)->node(); }
+  void fw_auxillary_input_to_input_weights(loco::Node *node) { at(40)->node(node); }
+  loco::Node *fw_auxillary_input_to_forget_weights(void) const { return at(41)->node(); }
+  void fw_auxillary_input_to_forget_weights(loco::Node *node) { at(41)->node(node); }
+  loco::Node *fw_auxillary_input_to_cell_weights(void) const { return at(42)->node(); }
+  void fw_auxillary_input_to_cell_weights(loco::Node *node) { at(42)->node(node); }
+  loco::Node *fw_auxillary_input_to_output_weights(void) const { return at(43)->node(); }
+  void fw_auxillary_input_to_output_weights(loco::Node *node) { at(43)->node(node); }
+  loco::Node *bw_auxillary_input_to_input_weights(void) const { return at(44)->node(); }
+  void bw_auxillary_input_to_input_weights(loco::Node *node) { at(44)->node(node); }
+  loco::Node *bw_auxillary_input_to_forget_weights(void) const { return at(45)->node(); }
+  void bw_auxillary_input_to_forget_weights(loco::Node *node) { at(45)->node(node); }
+  loco::Node *bw_auxillary_input_to_cell_weights(void) const { return at(46)->node(); }
+  void bw_auxillary_input_to_cell_weights(loco::Node *node) { at(46)->node(node); }
+  loco::Node *bw_auxillary_input_to_output_weights(void) const { return at(47)->node(); }
+  void bw_auxillary_input_to_output_weights(loco::Node *node) { at(47)->node(node); }
+
+public:
+  float cell_clip(void) const { return _cell_clip; }
+  void cell_clip(float cell_clip) { _cell_clip = cell_clip; }
+  float proj_clip(void) const { return _proj_clip; }
+  void proj_clip(float proj_clip) { _proj_clip = proj_clip; }
+  bool merge_outputs(void) const { return _merge_outputs; }
+  void merge_outputs(bool merge_outputs) { _merge_outputs = merge_outputs; }
+  bool time_major(void) const { return _time_major; }
+  void time_major(bool time_major) { _time_major = time_major; }
+  bool asymmetric_quantize_inputs(void) const { return _asymmetric_quantize_inputs; }
+  void asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    _asymmetric_quantize_inputs = asymmetric_quantize_inputs;
+  }
+
+private:
+  float _cell_clip{0.0f};
+  float _proj_clip{0.0f};
+  bool _merge_outputs{false};
+  bool _time_major{false};
+  bool _asymmetric_quantize_inputs{false};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEBIDIRECTIONALSEQUENCE_LSTM_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h
new file mode 100644
index 000000000..fb2eb0831
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleBidirectionalSequenceLSTMOut.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
+#define __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT in Circle
+ */
+class CircleBidirectionalSequenceLSTMOut final
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT>>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+  int32_t index(void) const { return _index; }
+  void index(int32_t index) { _index = index; }
+
+private:
+  int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_BIDIRECTIONAL_SEQUENCE_LSTM_OUT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCast.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCast.h
index 9a89d0b2b..0b793607f 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCast.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCast.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCeil.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCeil.h
index 8a8715dcf..3d7a7ebc7 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCeil.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCeil.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h
index dea1a4613..2746a0a2e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConcatenation.h
@@ -21,7 +21,7 @@
 #include "luci/IR/CircleOpcode.h"
 
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 #include "luci/IR/VariadicArityNode.h"
 
 #include <cassert>
@@ -33,12 +33,12 @@ namespace luci
  * @brief CONCATENATION in Circle
  */
 class CircleConcatenation final
-    : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
   CircleConcatenation(uint32_t arity)
-      : VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>(arity)
+    : VariadicArityNode<CircleNodeImpl<CircleOpcode::CONCATENATION>>(arity)
   {
     // TODO Support when arity is 0
     assert(arity >= 1);
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
index 250282049..3e9a274e0 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 #include <loco/IR/DataTypeTraits.h>
 
@@ -34,9 +34,6 @@ namespace luci
 class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLECONST>>
 {
 public:
-  CircleConst() = default;
-
-public:
   template <loco::DataType DT> uint32_t size(void) const;
   template <loco::DataType DT> void size(uint32_t size);
   template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
@@ -47,6 +44,8 @@ public:
 
 private:
   std::vector<uint8_t> _data;
+  // TODO use _data for STRING and remove _strings
+  std::vector<std::string> _strings; // for STRING type
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h
index 13657cee4..7c390940e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConv2D.h
@@ -24,7 +24,7 @@
 #include "luci/IR/AttrStride.h"
 #include "luci/IR/AttrDilation.h"
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -33,8 +33,8 @@ namespace luci
  * @brief CONV_2D in Circle
  */
 class CircleConv2D final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::CONV_2D>>,
-                           public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
-                           public LuciNodeMixin<LuciNodeTrait::Bias>
+                           public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+                           public CircleNodeMixin<CircleNodeTrait::Bias>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
@@ -57,7 +57,7 @@ public:
   Dilation *dilation(void) { return &_dilation; }
 
 private:
-  Padding _padding = Padding::UNDEFINED;
+  Padding _padding{Padding::UNDEFINED};
   Stride _stride;
   Dilation _dilation;
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h
index 07ced620a..cff04906d 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCos.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCustom.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCustom.h
index 6c722b766..5709e2cd5 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCustom.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCustom.h
@@ -29,19 +29,22 @@ namespace luci
 class CircleCustom final : public VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>
 {
 public:
-  CircleCustom(uint32_t arity) : VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>(arity)
+  CircleCustom(uint32_t arity, uint32_t out)
+    : VariadicArityNode<CircleNodeImpl<CircleOpcode::CUSTOM>>(arity), _output_count(out)
   {
-    // TODO Support when arity is 0
-    assert(arity >= 1);
+    // NOTE Custom can have 0 input or 0 output but not both
+    assert(arity != 0 || out != 0);
   }
 
 public:
   uint32_t numInputs(void) const { return arity(); }
+  uint32_t numOutputs(void) const { return _output_count; }
 
 public:
   Node *inputs(uint32_t index) const { return at(index)->node(); }
   void inputs(uint32_t index, Node *node) { at(index)->node(node); }
 
+public:
   const std::vector<uint8_t> &custom_options(void) const { return _custom_options; }
   void custom_options(const std::vector<uint8_t> &custom_options)
   {
@@ -54,6 +57,7 @@ public:
 private:
   std::vector<uint8_t> _custom_options;
   std::string _custom_code;
+  uint32_t _output_count{0};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleCustomOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleCustomOut.h
index 36b8e4aed..91a89c151 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleCustomOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleCustomOut.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief Virtual CIRCLECUSTOMOUT in Circle
  */
 class CircleCustomOut final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLECUSTOMOUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLECUSTOMOUT>>
 {
 public:
-  CircleCustomOut() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h
new file mode 100644
index 000000000..7acad0341
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_DENSIFY_H__
+#define __LUCI_IR_CIRCLE_DENSIFY_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief DENSIFY in Circle
+ */
+class CircleDensify final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DENSIFY>>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_DENSIFY_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthToSpace.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthToSpace.h
index e19282b97..85b567fb7 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthToSpace.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthToSpace.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,18 +29,18 @@ namespace luci
  * @brief DEPTH_TO_SPACE in Circle
  */
 class CircleDepthToSpace final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DEPTH_TO_SPACE>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DEPTH_TO_SPACE>>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
 public:
-  int block_size(void) const { return _block_size; }
-  void block_size(int block_size) { _block_size = block_size; }
+  int32_t block_size(void) const { return _block_size; }
+  void block_size(int32_t block_size) { _block_size = block_size; }
 
 private:
-  int _block_size{0};
+  int32_t _block_size{0};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h
index eb058cec1..046aa5908 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDepthwiseConv2D.h
@@ -25,7 +25,7 @@
 #include "luci/IR/AttrPadding.h"
 #include "luci/IR/AttrStride.h"
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -34,9 +34,9 @@ namespace luci
  * @brief DEPTHWISE_CONV_2D in Circle
  */
 class CircleDepthwiseConv2D final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::DEPTHWISE_CONV_2D>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
-      public LuciNodeMixin<LuciNodeTrait::Bias>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::DEPTHWISE_CONV_2D>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+    public CircleNodeMixin<CircleNodeTrait::Bias>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
@@ -62,9 +62,9 @@ public:
   Dilation *dilation(void) { return &_dilation; }
 
 private:
-  Padding _padding = Padding::UNDEFINED;
+  Padding _padding{Padding::UNDEFINED};
   Stride _stride;
-  int32_t _depth_multiplier = 0;
+  int32_t _depth_multiplier{0};
   Dilation _dilation;
 };
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDequantize.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDequantize.h
new file mode 100644
index 000000000..c3ee44253
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDequantize.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCELEDEQUANTIZE_H__
+#define __LUCI_IR_CIRCELEDEQUANTIZE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief DEQUANTIZE in Circle
+ */
+class CircleDequantize final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DEQUANTIZE>>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCELEDEQUANTIZE_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h
index 1d4d3a239..fcc3f427c 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDiv.h
@@ -24,7 +24,7 @@
 #include "luci/IR/AttrPadding.h"
 #include "luci/IR/AttrStride.h"
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -33,12 +33,9 @@ namespace luci
  * @brief DIV in Circle
  */
 class CircleDiv final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::DIV>>,
-                        public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                        public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
-  CircleDiv() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleElu.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleElu.h
index fbb2f3533..721edd9ae 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleElu.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleElu.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleElu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ELU>>
 {
 public:
-  CircleElu() = default;
-
-public:
   loco::Node *features(void) const { return at(0)->node(); }
   void features(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h
index 2087d097a..69697ac7e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleEqual.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h
index 97aecb30a..b8a5d4561 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleExp.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleExpandDims.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleExpandDims.h
index f70219614..15bfe6a29 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleExpandDims.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleExpandDims.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleExpandDims final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::EXPAND_DIMS>>
 {
 public:
-  CircleExpandDims() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFakeQuant.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFakeQuant.h
new file mode 100644
index 000000000..9e3159685
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFakeQuant.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_FAKE_QUANT_H__
+#define __LUCI_IR_CIRCLE_FAKE_QUANT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief FAKE_QUANT in Circle
+ * @note  'inputs' came from TF.quantize.fake_quant_from_min_max_vars
+ */
+class CircleFakeQuant final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::FAKE_QUANT>>
+{
+public:
+  loco::Node *inputs(void) const { return at(0)->node(); }
+  void inputs(loco::Node *node) { at(0)->node(node); }
+
+public:
+  float min(void) const { return _min; }
+  void min(float min) { _min = min; }
+
+  float max(void) const { return _max; }
+  void max(float max) { _max = max; }
+
+  int32_t num_bits(void) const { return _num_bits; }
+  void num_bits(int32_t num_bits) { _num_bits = num_bits; }
+
+  bool narrow_range(void) const { return _narrow_range; }
+  void narrow_range(bool narrow_range) { _narrow_range = narrow_range; }
+
+private:
+  float _min{0.0f};
+  float _max{0.0f};
+  int32_t _num_bits{0};
+  bool _narrow_range{false};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEGATHER_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFill.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFill.h
index bfc65274a..183794d41 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFill.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFill.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloor.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloor.h
index 7e10547b6..ce6807e98 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloor.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloor.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorDiv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorDiv.h
index ba9db010c..bf76e37b6 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorDiv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorDiv.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorMod.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorMod.h
index 4d13717a0..1af0af758 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorMod.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFloorMod.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
index d78f39494..dc5aeb267 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
@@ -21,7 +21,7 @@
 #include "luci/IR/CircleOpcode.h"
 
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -30,11 +30,21 @@ namespace luci
  * @brief FULLY_CONNECTED in Circle
  */
 class CircleFullyConnected final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::FULLY_CONNECTED>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>,
-      public LuciNodeMixin<LuciNodeTrait::Bias>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::FULLY_CONNECTED>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+    public CircleNodeMixin<CircleNodeTrait::Bias>
 {
 public:
+  enum class WeightsFormat
+  {
+    UNDEFINED, // This is not defined by Circle. This was added to prevent programming error.
+
+    DEFAULT,
+    SHUFFLED4x16INT8,
+    SHUFFLED16x1FLOAT32,
+  };
+
+public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
@@ -43,6 +53,17 @@ public:
 
   loco::Node *bias(void) const override { return at(2)->node(); }
   void bias(loco::Node *node) override { at(2)->node(node); }
+
+public:
+  WeightsFormat weights_format(void) const { return _weights_format; }
+  void weights_format(WeightsFormat weights_format) { _weights_format = weights_format; }
+
+  bool keep_num_dims(void) const { return _keep_num_dims; }
+  void keep_num_dims(bool keep_num_dims) { _keep_num_dims = keep_num_dims; }
+
+private:
+  WeightsFormat _weights_format{WeightsFormat::DEFAULT};
+  bool _keep_num_dims{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h
index 1e8c4982a..78fa2fc28 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGather.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -42,7 +42,7 @@ public:
   void axis(int32_t axis) { _axis = axis; }
 
 private:
-  int32_t _axis = 0;
+  int32_t _axis{0};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGatherNd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGatherNd.h
index 3423a8216..d6f34f1ea 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleGatherNd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGatherNd.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGelu.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGelu.h
new file mode 100644
index 000000000..badfec7cf
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGelu.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEGELU_H__
+#define __LUCI_IR_CIRCLEGELU_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief GELU in Circle
+ */
+class CircleGelu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::GELU>>
+{
+public:
+  loco::Node *features(void) const { return at(0)->node(); }
+  void features(loco::Node *node) { at(0)->node(node); }
+
+public:
+  bool approximate(void) const { return _approximate; }
+  void approximate(bool arg) { _approximate = arg; }
+
+private:
+  bool _approximate{false};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEGELU_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGreater.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGreater.h
index 040a4e338..a03b6c749 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleGreater.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGreater.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleGreaterEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleGreaterEqual.h
index 82bdab212..e435320b2 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleGreaterEqual.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleGreaterEqual.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief GREATER EQUAL in Circle
  */
 class CircleGreaterEqual final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::GREATER_EQUAL>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::GREATER_EQUAL>>
 {
 public:
   loco::Node *x(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleHardSwish.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleHardSwish.h
new file mode 100644
index 000000000..18652a07d
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleHardSwish.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEHARDSWISH_H__
+#define __LUCI_IR_CIRCLEHARDSWISH_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief HardSwish in Circle
+ */
+class CircleHardSwish final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::HARD_SWISH>>
+{
+public:
+  loco::Node *features(void) const { return at(0)->node(); }
+  void features(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEHARDSWISH_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleIf.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleIf.h
index 2f9eac211..1c037a406 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleIf.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleIf.h
@@ -34,7 +34,7 @@ class CircleIf final : public VariadicArityNode<CircleNodeImpl<CircleOpcode::IF>
 {
 public:
   CircleIf(uint32_t arity, uint32_t out)
-      : VariadicArityNode<CircleNodeImpl<CircleOpcode::IF>>(arity + 1), _output_count(out)
+    : VariadicArityNode<CircleNodeImpl<CircleOpcode::IF>>(arity + 1), _output_count(out)
   {
     assert(arity > 0);
     assert(out > 0);
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleIfOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleIfOut.h
index 3654e943b..5adaaa447 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleIfOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleIfOut.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleIfOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEIFOUT>>
 {
 public:
-  CircleIfOut() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h
index 4a7d36a4e..e0be9aa6e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleInput.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 #include <loco/IR/DataTypeTraits.h>
 #include <loco/IR/GraphInputIndex.h>
@@ -35,16 +35,13 @@ namespace luci
 class CircleInput final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEINPUT>>
 {
 public:
-  CircleInput() = default;
-
-public:
   void index(const loco::GraphInputIndex &index);
   loco::GraphInputIndex index(void) const;
 
   bool indexed(void) const { return _index != -1; }
 
 private:
-  int64_t _index = -1; // Uninitialized
+  int64_t _index{-1}; // Uninitialized
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h
index db0faa05e..65c34194d 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleInstanceNorm.h
@@ -21,7 +21,7 @@
 #include "luci/IR/CircleOpcode.h"
 
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -30,8 +30,8 @@ namespace luci
  * @brief INSTANCE_NORM in Circle
  */
 class CircleInstanceNorm final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::INSTANCE_NORM>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
   /// @note  Currently only support FLOAT32 as input node
@@ -44,11 +44,12 @@ public:
   loco::Node *beta(void) const { return at(2)->node(); }
   void beta(loco::Node *node) { at(2)->node(node); }
 
+public:
   float epsilon() const { return _epsilon; }
   void epsilon(float epsilon) { _epsilon = epsilon; }
 
 private:
-  float _epsilon = 1e-05;
+  float _epsilon{1e-05};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Normalize.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Normalize.h
index efa932d95..eb2b372ce 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Normalize.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Normalize.h
@@ -21,7 +21,7 @@
 #include "luci/IR/CircleOpcode.h"
 
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -30,8 +30,8 @@ namespace luci
  * @brief L2_NORMALIZATION in Circle
  */
 class CircleL2Normalize final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_NORMALIZATION>>,
-      public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_NORMALIZATION>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
   loco::Node *x(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Pool2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Pool2D.h
index 7c76ee5d0..624d29e9e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Pool2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleL2Pool2D.h
@@ -24,7 +24,7 @@
 #include "luci/IR/AttrPadding.h"
 #include "luci/IR/AttrStride.h"
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -33,15 +33,13 @@ namespace luci
  * @brief L2_POOL_2D in Circle
  */
 class CircleL2Pool2D final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::L2_POOL_2D>>,
-                             public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                             public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
-  CircleL2Pool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
-public:
   loco::Node *value(void) const { return at(0)->node(); }
   void value(loco::Node *node) { at(0)->node(node); }
 
+public:
   Padding padding() const { return _padding; }
   void padding(Padding padding) { _padding = padding; }
 
@@ -52,7 +50,7 @@ public:
   Stride *stride(void) { return &_stride; }
 
 private:
-  Padding _padding;
+  Padding _padding{Padding::UNDEFINED};
   Stride _stride;
   Filter _filter;
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLeakyRelu.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLeakyRelu.h
index d6ac97fc0..c8e93af91 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLeakyRelu.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLeakyRelu.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,17 +31,15 @@ namespace luci
 class CircleLeakyRelu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LEAKY_RELU>>
 {
 public:
-  CircleLeakyRelu() = default;
-
-public:
   loco::Node *features(void) const { return at(0)->node(); }
   void features(loco::Node *node) { at(0)->node(node); }
 
+public:
   float alpha() const { return _alpha; }
   void alpha(float alpha) { _alpha = alpha; }
 
 private:
-  float _alpha = 0.2f;
+  float _alpha{0.2f};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLess.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLess.h
index cd6cf1872..7adf67842 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLess.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLess.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLessEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLessEqual.h
index 4c7c6a49b..eb8962494 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLessEqual.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLessEqual.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLocalResponseNormalization.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLocalResponseNormalization.h
index 8ad2b40fd..4d324700e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLocalResponseNormalization.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLocalResponseNormalization.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief LOCAL_RESPONSE_NORMALIZATION in Circle
  */
 class CircleLocalResponseNormalization final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOCAL_RESPONSE_NORMALIZATION>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOCAL_RESPONSE_NORMALIZATION>>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLog.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLog.h
index aeb13fed9..2cc57ce2d 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLog.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLog.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogSoftmax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogSoftmax.h
index 5dfd2c1f9..b73ff7c2a 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogSoftmax.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogSoftmax.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalAnd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalAnd.h
index 975f6dbc7..9943c71cd 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalAnd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalAnd.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h
index 749dbe518..369a3e7bf 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalNot.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h
index 570be57af..c54ec3ebf 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogicalOr.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogistic.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogistic.h
index 8328cb328..1f95e0f77 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleLogistic.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLogistic.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleLogistic final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::LOGISTIC>>
 {
 public:
-  CircleLogistic() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixDiag.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixDiag.h
index dca6538c3..f8bf259f9 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixDiag.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixDiag.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixSetDiag.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixSetDiag.h
index c1f5f3023..76aeaff40 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixSetDiag.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMatrixSetDiag.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief MATRIX_SET_DIAG in Circle
  */
 class CircleMatrixSetDiag final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MATRIX_SET_DIAG>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MATRIX_SET_DIAG>>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h
index 1eb6532ff..557240d54 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaxPool2D.h
@@ -24,7 +24,7 @@
 #include "luci/IR/AttrPadding.h"
 #include "luci/IR/AttrStride.h"
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -33,15 +33,13 @@ namespace luci
  * @brief MAX_POOL_2D in Circle
  */
 class CircleMaxPool2D final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::MAX_POOL_2D>>,
-                              public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                              public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
-  CircleMaxPool2D() : _padding(Padding::UNDEFINED) { /* empty */}
-
-public:
   loco::Node *value(void) const { return at(0)->node(); }
   void value(loco::Node *node) { at(0)->node(node); }
 
+public:
   Padding padding() const { return _padding; }
   void padding(Padding padding) { _padding = padding; }
 
@@ -52,7 +50,7 @@ public:
   Stride *stride(void) { return &_stride; }
 
 private:
-  Padding _padding;
+  Padding _padding{Padding::UNDEFINED};
   Stride _stride;
   Filter _filter;
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h
index 6f789bc14..317cea308 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMaximum.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h
index 7f8aeb5aa..f56e4f4c0 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMean.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -42,7 +42,7 @@ public:
   void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
 
 private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMinimum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMinimum.h
index 79d5a6f17..959d9c93b 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMinimum.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMinimum.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMirrorPad.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMirrorPad.h
index 68db8f6f3..c69e8f7c1 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMirrorPad.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMirrorPad.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 #include "luci/IR/AttrMirrorPadMode.h"
 
 namespace luci
@@ -32,9 +32,6 @@ namespace luci
 class CircleMirrorPad final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MIRROR_PAD>>
 {
 public:
-  CircleMirrorPad() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h
index 67e897170..85ed694b3 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleMul.h
@@ -21,7 +21,7 @@
 #include "luci/IR/CircleOpcode.h"
 
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -30,7 +30,7 @@ namespace luci
  * @brief MUL in Circle
  */
 class CircleMul final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::MUL>>,
-                        public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                        public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
   loco::Node *x(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNeg.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNeg.h
index 4149ac4a7..adea3fb83 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNeg.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNeg.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h
index 69f3368c0..b47404bb0 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief NON_MAX_SUPPRESSION_V4 in Circle
  */
 class CircleNonMaxSuppressionV4 final
-    : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V4>>
+  : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V4>>
 {
 public:
   loco::Node *boxes(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h
index a24dc3e9c..7e6923b5e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief Virtual NONMAXSUPPRESSIONV4OUT in Circle
  */
 class CircleNonMaxSuppressionV4Out final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT>>
 {
 public:
-  CircleNonMaxSuppressionV4Out() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
index 52d682147..77086ede7 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief NON_MAX_SUPPRESSION_V5 in Circle
  */
 class CircleNonMaxSuppressionV5 final
-    : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
+  : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
 {
 public:
   loco::Node *boxes(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
index 0c6989cc7..63d061f11 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief Virtual NONMAXSUPPRESSIONV5OUT in Circle
  */
 class CircleNonMaxSuppressionV5Out final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
 {
 public:
-  CircleNonMaxSuppressionV5Out() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNotEqual.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNotEqual.h
index cca7a5e22..add6a0747 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleNotEqual.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNotEqual.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleOneHot.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleOneHot.h
index 665e01d48..b3eb0f436 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleOneHot.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleOneHot.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -48,7 +48,7 @@ public:
   void axis(int32_t axis) { _axis = axis; }
 
 private:
-  int32_t _axis = -1;
+  int32_t _axis{-1};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h
index 67e55f1a1..eb02f824e 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleOutput.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 #include <loco/IR/GraphOutputIndex.h>
 
@@ -34,8 +34,6 @@ namespace luci
 class CircleOutput final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUT>>
 {
 public:
-  CircleOutput() = default;
-
   void index(const loco::GraphOutputIndex &index);
   loco::GraphOutputIndex index(void) const;
 
@@ -46,7 +44,7 @@ public:
   void from(loco::Node *node) { at(0)->node(node); }
 
 private:
-  int64_t _index = -1; // Uninitialized
+  int64_t _index{-1}; // Uninitialized
 };
 
 /**
@@ -54,7 +52,7 @@ private:
  */
 // TODO remove CircleOutputDummy
 class CircleOutputDummy final
-    : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTDUMMY>>
+  : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTDUMMY>>
 {
 public:
   CircleOutputDummy() = default;
@@ -64,7 +62,7 @@ public:
  * @brief CircleOutputExclude is used to specifying not exported nodes
  */
 class CircleOutputExclude final
-    : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTEXCLUDE>>
+  : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEOUTPUTEXCLUDE>>
 {
 public:
   CircleOutputExclude() = default;
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePRelu.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePRelu.h
index 693777512..3c5559db2 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CirclePRelu.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePRelu.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CirclePRelu final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::PRELU>>
 {
 public:
-  CirclePRelu() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h
index 31599bda0..ede217789 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePad.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CirclePad final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::PAD>>
 {
 public:
-  CirclePad() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h
index 563cfd9a4..644e2bb27 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CirclePadV2 final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::PADV2>>
 {
 public:
-  CirclePadV2() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePow.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePow.h
index 006e3dd86..40c5a829d 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CirclePow.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePow.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CirclePow final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::POW>>
 {
 public:
-  CirclePow() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleQuantize.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleQuantize.h
new file mode 100644
index 000000000..8018a76c2
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleQuantize.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCELEQUANTIZE_H__
+#define __LUCI_IR_CIRCELEQUANTIZE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief QUANTIZE in Circle
+ */
+class CircleQuantize final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::QUANTIZE>>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCELEDUANTIZE_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRange.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRange.h
index 977a37a52..56f8a2eba 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRange.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRange.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRank.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRank.h
index ba6d67f69..034f251bc 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRank.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRank.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceAny.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceAny.h
index 0456be863..c64dbbdf8 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceAny.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceAny.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -42,7 +42,7 @@ public:
   void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
 
 private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMax.h
index 925c977e5..97cbecd08 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMax.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMax.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -42,7 +42,7 @@ public:
   void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
 
 private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMin.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMin.h
index fd789ae5e..33708928f 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMin.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceMin.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -42,7 +42,7 @@ public:
   void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
 
 private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceProd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceProd.h
index b7d226255..3689ee532 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceProd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReduceProd.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -42,7 +42,7 @@ public:
   void keep_dims(bool keep_dims) { _keep_dims = keep_dims; }
 
 private:
-  bool _keep_dims = false;
+  bool _keep_dims{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h
index 91272d2bf..6148caa03 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleRelu final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU>>
 {
 public:
-  CircleRelu() = default;
-
-public:
   loco::Node *features(void) const { return at(0)->node(); }
   void features(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h
index b4274ded9..0fa25e873 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRelu6.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleRelu6 final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU6>>
 {
 public:
-  CircleRelu6() = default;
-
-public:
   loco::Node *features(void) const { return at(0)->node(); }
   void features(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReluN1To1.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReluN1To1.h
index a5c5710c2..13c0d166f 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReluN1To1.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReluN1To1.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleReluN1To1 final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RELU_N1_TO_1>>
 {
 public:
-  CircleReluN1To1() = default;
-
-public:
   loco::Node *features(void) const { return at(0)->node(); }
   void features(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h
index b13144f7e..090df4044 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReshape.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,14 +31,11 @@ namespace luci
 class CircleReshape final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESHAPE>>
 {
 public:
-  CircleReshape() = default;
-
-public:
   loco::Node *tensor(void) const { return at(0)->node(); }
   void tensor(loco::Node *node) { at(0)->node(node); }
 
   // NOTE shape is optional and can be CircleConst or any other type
-  //      and also can be CircleOutputDummy when reshape option does not exist
+  //      and also should be CircleOutputDummy when reshape option does not exist
   loco::Node *shape(void) const { return at(1)->node(); }
   void shape(loco::Node *node) { at(1)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeBilinear.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeBilinear.h
index 3c8223338..091916a2b 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeBilinear.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeBilinear.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,18 +29,16 @@ namespace luci
  * @brief RESIZE_BILINEAR in Circle
  */
 class CircleResizeBilinear final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_BILINEAR>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_BILINEAR>>
 {
 public:
-  CircleResizeBilinear() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
   loco::Node *size(void) const { return at(1)->node(); }
   void size(loco::Node *node) { at(1)->node(node); }
 
+public:
   bool align_corners() const { return _align_corners; }
   void align_corners(bool value) { _align_corners = value; }
 
@@ -48,8 +46,8 @@ public:
   void half_pixel_centers(bool value) { _half_pixel_centers = value; }
 
 private:
-  bool _align_corners = false;
-  bool _half_pixel_centers = false;
+  bool _align_corners{false};
+  bool _half_pixel_centers{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeNearestNeighbor.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeNearestNeighbor.h
index dc32ebee7..ab880d767 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeNearestNeighbor.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleResizeNearestNeighbor.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,23 +29,21 @@ namespace luci
  * @brief RESIZE_NEAREST_NEIGHBOR in Circle
  */
 class CircleResizeNearestNeighbor final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_NEAREST_NEIGHBOR>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::RESIZE_NEAREST_NEIGHBOR>>
 {
 public:
-  CircleResizeNearestNeighbor() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
   loco::Node *size(void) const { return at(1)->node(); }
   void size(loco::Node *node) { at(1)->node(node); }
 
+public:
   bool align_corners() const { return _align_corners; }
   void align_corners(bool value) { _align_corners = value; }
 
 private:
-  bool _align_corners = false;
+  bool _align_corners{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseSequence.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseSequence.h
index b0766dd3e..5f089a768 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseSequence.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseSequence.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief REVERSE_SEQUENCE in Circle
  */
 class CircleReverseSequence final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::REVERSE_SEQUENCE>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::REVERSE_SEQUENCE>>
 {
 public:
-  CircleReverseSequence() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
@@ -42,15 +39,15 @@ public:
   void seq_lengths(loco::Node *node) { at(1)->node(node); }
 
 public:
-  int seq_axis(void) const { return _seq_axis; }
-  void seq_axis(int seq_axis) { _seq_axis = seq_axis; }
+  int32_t seq_axis(void) const { return _seq_axis; }
+  void seq_axis(int32_t seq_axis) { _seq_axis = seq_axis; }
 
-  int batch_axis(void) const { return _batch_axis; }
-  void batch_axis(int batch_axis) { _batch_axis = batch_axis; }
+  int32_t batch_axis(void) const { return _batch_axis; }
+  void batch_axis(int32_t batch_axis) { _batch_axis = batch_axis; }
 
 private:
-  int _seq_axis{0};
-  int _batch_axis{0};
+  int32_t _seq_axis{0};
+  int32_t _batch_axis{0};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseV2.h
index 71d9f65aa..96b6a793d 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseV2.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleReverseV2.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRound.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRound.h
index 30296ce9e..e340266ed 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRound.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRound.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleRound final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ROUND>>
 {
 public:
-  CircleRound() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h
index 873397bce..7907f326b 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleRsqrt.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleRsqrt final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::RSQRT>>
 {
 public:
-  CircleRsqrt() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h
new file mode 100644
index 000000000..839d11e04
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_SVDF_H__
+#define __LUCI_IR_CIRCLE_SVDF_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief SVDF in Circle
+ */
+class CircleSVDF final : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::SVDF>>,
+                         public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+  CircleSVDF() = default;
+
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *weight_feature(void) const { return at(1)->node(); }
+  void weight_feature(loco::Node *node) { at(1)->node(node); }
+
+  loco::Node *weight_time(void) const { return at(2)->node(); }
+  void weight_time(loco::Node *node) { at(2)->node(node); }
+
+  loco::Node *bias(void) const { return at(3)->node(); }
+  void bias(loco::Node *node) { at(3)->node(node); }
+
+  loco::Node *input_activation_state(void) const { return at(4)->node(); }
+  void input_activation_state(loco::Node *node) { at(4)->node(node); }
+
+public:
+  bool asymmetric_quantize_inputs() const { return _asymmetric_quantize_inputs; }
+  void asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    _asymmetric_quantize_inputs = asymmetric_quantize_inputs;
+  }
+
+  int32_t svdf_rank() const { return _rank; }
+  void svdf_rank(int32_t svdf_rank) { _rank = svdf_rank; }
+
+private:
+  bool _asymmetric_quantize_inputs = false;
+  int32_t _rank = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_SVDF_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleScatterNd.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleScatterNd.h
index 9f93a0a80..fda3abafc 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleScatterNd.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleScatterNd.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSegmentSum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSegmentSum.h
index 416d617b2..e7227e9ee 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSegmentSum.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSegmentSum.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleSegmentSum final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SEGMENT_SUM>>
 {
 public:
-  CircleSegmentSum() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSelect.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSelect.h
index 727647168..6f778d72d 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSelect.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSelect.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleSelect final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SELECT>>
 {
 public:
-  CircleSelect() = default;
-
-public:
   loco::Node *condition(void) const { return at(0)->node(); }
   void condition(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSelectV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSelectV2.h
index 7ac3c0524..7969cc2aa 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSelectV2.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSelectV2.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleSelectV2 final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SELECT_V2>>
 {
 public:
-  CircleSelectV2() = default;
-
-public:
   loco::Node *condition(void) const { return at(0)->node(); }
   void condition(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleShape.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleShape.h
index ff20ce684..903894dbd 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleShape.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleShape.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleShape final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SHAPE>>
 {
 public:
-  CircleShape() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSin.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSin.h
index 5624db253..25dc18b0d 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSin.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSin.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSlice.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSlice.h
index a2113643d..98556d7a6 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSlice.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSlice.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h
index 7166a329b..d10cb1682 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSoftmax.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToBatchND.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToBatchND.h
index 042ebffcd..ef715c6d0 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToBatchND.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToBatchND.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief SPACE_TO_BATCH_ND in Circle
  */
 class CircleSpaceToBatchND final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SPACE_TO_BATCH_ND>>
+  : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::SPACE_TO_BATCH_ND>>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToDepth.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToDepth.h
index 420a4cb96..387e0d80f 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToDepth.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSpaceToDepth.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,18 +29,18 @@ namespace luci
  * @brief SPACE_TO_DEPTH in Circle
  */
 class CircleSpaceToDepth final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SPACE_TO_DEPTH>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SPACE_TO_DEPTH>>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
 public:
-  int block_size(void) const { return _block_size; }
-  void block_size(int block_size) { _block_size = block_size; }
+  int32_t block_size(void) const { return _block_size; }
+  void block_size(int32_t block_size) { _block_size = block_size; }
 
 private:
-  int _block_size{0};
+  int32_t _block_size{0};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
index 7e80304b0..94a20c064 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief SPARSE_TO_DENSE in Circle
  */
 class CircleSparseToDense final
-    : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::SPARSE_TO_DENSE>>
+  : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::SPARSE_TO_DENSE>>
 {
 public:
   loco::Node *indices(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplit.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplit.h
index 0eda19501..0cb953131 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplit.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplit.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitOut.h
index 6bf4a9fef..a507740e4 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitOut.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleSplitOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITOUT>>
 {
 public:
-  CircleSplitOut() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitV.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitV.h
index 1b7d55534..cb02cbbcf 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitV.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitV.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitVOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitVOut.h
index d3b2f1e5a..adf79f30c 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitVOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSplitVOut.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief Virtual CIRCLESPLITVOUT in Circle
  */
 class CircleSplitVOut final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITVOUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLESPLITVOUT>>
 {
 public:
-  CircleSplitVOut() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h
index c96ca8498..b76bd1ad5 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqrt.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleSqrt final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQRT>>
 {
 public:
-  CircleSqrt() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquare.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquare.h
index a29edfe82..3f9228b3b 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquare.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquare.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleSquare final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQUARE>>
 {
 public:
-  CircleSquare() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h
index b5b39f920..355c9f3d3 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSquaredDifference.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief SQUARED_DIFFERENCE in Circle
  */
 class CircleSquaredDifference final
-    : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SQUARED_DIFFERENCE>>
+  : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SQUARED_DIFFERENCE>>
 {
 public:
-  CircleSquaredDifference() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqueeze.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqueeze.h
index f175f1411..ba71ff217 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSqueeze.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSqueeze.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleSqueeze final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::SQUEEZE>>
 {
 public:
-  CircleSqueeze() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleStridedSlice.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleStridedSlice.h
index 98799fec1..6a4155ef1 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleStridedSlice.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleStridedSlice.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,7 +29,7 @@ namespace luci
  * @brief STRIDED_SLICE in Circle
  */
 class CircleStridedSlice final
-    : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::STRIDED_SLICE>>
+  : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::STRIDED_SLICE>>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h
index 08208f942..d9aaa44e5 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSub.h
@@ -21,7 +21,7 @@
 #include "luci/IR/CircleOpcode.h"
 
 #include "luci/IR/AttrFusedActFunc.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -30,12 +30,9 @@ namespace luci
  * @brief SUB in Circle
  */
 class CircleSub final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::SUB>>,
-                        public LuciNodeMixin<LuciNodeTrait::FusedActFunc>
+                        public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
 {
 public:
-  CircleSub() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSum.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSum.h
index 21faa76fe..a72e18f54 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSum.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSum.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTanh.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTanh.h
index f7444921f..2036a7301 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTanh.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTanh.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleTanh final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::TANH>>
 {
 public:
-  CircleTanh() = default;
-
-public:
   loco::Node *x(void) const { return at(0)->node(); }
   void x(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTile.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTile.h
index 96e1f69c6..1ec2f5e82 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTile.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTile.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleTile final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TILE>>
 {
 public:
-  CircleTile() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2.h
index 3b2b5abb7..0bf78c3ee 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleTopKV2 final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TOPK_V2>>
 {
 public:
-  CircleTopKV2() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2Out.h
index 5a6dd0c02..f1a6b4a41 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2Out.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTopKV2Out.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief Virtual CIRCLETOPKV2OUT in Circle
  */
 class CircleTopKV2Out final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLETOPKV2OUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLETOPKV2OUT>>
 {
 public:
-  CircleTopKV2Out() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h
index 095cd6746..72ce0738c 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTranspose.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,13 +31,7 @@ namespace luci
 class CircleTranspose final : public FixedArityNode<2, CircleNodeImpl<CircleOpcode::TRANSPOSE>>
 {
 public:
-  CircleTranspose() = default;
-
-public:
-  /// @brief Get the input node to transpose
   loco::Node *a(void) const { return at(0)->node(); }
-
-  /// @brief Set the input node to transpose
   void a(loco::Node *node) { at(0)->node(node); }
 
   loco::Node *perm(void) const { return at(1)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
index e355102d6..8c6f04a58 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
@@ -22,7 +22,7 @@
 
 #include "luci/IR/AttrPadding.h"
 #include "luci/IR/AttrStride.h"
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -34,8 +34,9 @@ namespace luci
  *        'out' acutally means 'out' and 'in' of the this node.
  */
 class CircleTransposeConv final
-    : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
-      public LuciNodeMixin<LuciNodeTrait::Bias>
+  : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>,
+    public CircleNodeMixin<CircleNodeTrait::Bias>
 {
 public:
   loco::Node *inputSizes(void) const { return at(0)->node(); }
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..7b9e445d3
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEUNIDIRECTIONALSEQUENCELSTM_H__
+#define __LUCI_IR_CIRCLEUNIDIRECTIONALSEQUENCELSTM_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/AttrFusedActFunc.h"
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief UNIDIRECTIONAL_SEQUENCE_LSTM in Circle
+ */
+class CircleUnidirectionalSequenceLSTM final
+  : public FixedArityNode<24, CircleNodeImpl<CircleOpcode::UNIDIRECTIONAL_SEQUENCE_LSTM>>,
+    public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *input_to_input_weights(void) const { return at(1)->node(); }
+  void input_to_input_weights(loco::Node *node) { at(1)->node(node); }
+  loco::Node *input_to_forget_weights(void) const { return at(2)->node(); }
+  void input_to_forget_weights(loco::Node *node) { at(2)->node(node); }
+  loco::Node *input_to_cell_weights(void) const { return at(3)->node(); }
+  void input_to_cell_weights(loco::Node *node) { at(3)->node(node); }
+  loco::Node *input_to_output_weights(void) const { return at(4)->node(); }
+  void input_to_output_weights(loco::Node *node) { at(4)->node(node); }
+
+  loco::Node *recurrent_to_input_weights(void) const { return at(5)->node(); }
+  void recurrent_to_input_weights(loco::Node *node) { at(5)->node(node); }
+  loco::Node *recurrent_to_forget_weights(void) const { return at(6)->node(); }
+  void recurrent_to_forget_weights(loco::Node *node) { at(6)->node(node); }
+  loco::Node *recurrent_to_cell_weights(void) const { return at(7)->node(); }
+  void recurrent_to_cell_weights(loco::Node *node) { at(7)->node(node); }
+  loco::Node *recurrent_to_output_weights(void) const { return at(8)->node(); }
+  void recurrent_to_output_weights(loco::Node *node) { at(8)->node(node); }
+
+  loco::Node *cell_to_input_weights(void) const { return at(9)->node(); }
+  void cell_to_input_weights(loco::Node *node) { at(9)->node(node); }
+  loco::Node *cell_to_forget_weights(void) const { return at(10)->node(); }
+  void cell_to_forget_weights(loco::Node *node) { at(10)->node(node); }
+  loco::Node *cell_to_output_weights(void) const { return at(11)->node(); }
+  void cell_to_output_weights(loco::Node *node) { at(11)->node(node); }
+
+  loco::Node *input_gate_bias(void) const { return at(12)->node(); }
+  void input_gate_bias(loco::Node *node) { at(12)->node(node); }
+  loco::Node *forget_gate_bias(void) const { return at(13)->node(); }
+  void forget_gate_bias(loco::Node *node) { at(13)->node(node); }
+  loco::Node *cell_gate_bias(void) const { return at(14)->node(); }
+  void cell_gate_bias(loco::Node *node) { at(14)->node(node); }
+  loco::Node *output_gate_bias(void) const { return at(15)->node(); }
+  void output_gate_bias(loco::Node *node) { at(15)->node(node); }
+
+  loco::Node *projection_weights(void) const { return at(16)->node(); }
+  void projection_weights(loco::Node *node) { at(16)->node(node); }
+  loco::Node *projection_bias(void) const { return at(17)->node(); }
+  void projection_bias(loco::Node *node) { at(17)->node(node); }
+
+  loco::Node *output_state(void) const { return at(18)->node(); }
+  void output_state(loco::Node *node) { at(18)->node(node); }
+  loco::Node *cell_state(void) const { return at(19)->node(); }
+  void cell_state(loco::Node *node) { at(19)->node(node); }
+
+  loco::Node *input_layer_norm_coefficients(void) const { return at(20)->node(); }
+  void input_layer_norm_coefficients(loco::Node *node) { at(20)->node(node); }
+  loco::Node *forget_layer_norm_coefficients(void) const { return at(21)->node(); }
+  void forget_layer_norm_coefficients(loco::Node *node) { at(21)->node(node); }
+  loco::Node *cell_layer_norm_coefficients(void) const { return at(22)->node(); }
+  void cell_layer_norm_coefficients(loco::Node *node) { at(22)->node(node); }
+  loco::Node *output_layer_norm_coefficients(void) const { return at(23)->node(); }
+  void output_layer_norm_coefficients(loco::Node *node) { at(23)->node(node); }
+
+public:
+  float cell_clip(void) const { return _cell_clip; }
+  void cell_clip(float cell_clip) { _cell_clip = cell_clip; }
+  float proj_clip(void) const { return _proj_clip; }
+  void proj_clip(float proj_clip) { _proj_clip = proj_clip; }
+  bool time_major(void) const { return _time_major; }
+  void time_major(bool time_major) { _time_major = time_major; }
+  bool asymmetric_quantize_inputs(void) const { return _asymmetric_quantize_inputs; }
+  void asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    _asymmetric_quantize_inputs = asymmetric_quantize_inputs;
+  }
+
+private:
+  float _cell_clip{0.0f};
+  float _proj_clip{0.0f};
+  bool _time_major{false};
+  bool _asymmetric_quantize_inputs{false};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEUNIDIRECTIONALSEQUENCELSTM_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h
index 719a72362..2dd48b2f9 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -36,7 +36,7 @@ public:
 
 public:
   loco::DataType idx_out_type(void) const { return _idx_out_type; }
-  void output_type(loco::DataType ot) { _idx_out_type = ot; }
+  void idx_out_type(loco::DataType ot) { _idx_out_type = ot; }
 
 private:
   loco::DataType _idx_out_type{loco::DataType::S32};
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h
index f846403e0..233351860 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief Virtual CIRCLEUNIQUEOUT in Circle
  */
 class CircleUniqueOut final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNIQUEOUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNIQUEOUT>>
 {
 public:
-  CircleUniqueOut() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpack.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpack.h
index cb91d7e6a..fd0c66ce0 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpack.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpack.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleUnpack final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::UNPACK>>
 {
 public:
-  CircleUnpack() = default;
-
-public:
   loco::Node *value(void) const { return at(0)->node(); }
   void value(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpackOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpackOut.h
index 6f24578a1..640d2f1bb 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpackOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnpackOut.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -29,12 +29,9 @@ namespace luci
  * @brief Virtual CIRCLEUNPACKOUT in Circle
  */
 class CircleUnpackOut final
-    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNPACKOUT>>
+  : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNPACKOUT>>
 {
 public:
-  CircleUnpackOut() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h
new file mode 100644
index 000000000..8c15b66c9
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_VARIABLE_H__
+#define __LUCI_IR_CIRCLE_VARIABLE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual CircleVariable in Circle for 'variable' Tensor
+ */
+class CircleVariable final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEVARIABLE>>
+{
+public:
+  CircleVariable() = default;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_VARIABLE_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhere.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhere.h
index 51eda3d6e..8895bcbbd 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhere.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhere.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 #include <cassert>
 
@@ -33,9 +33,6 @@ namespace luci
 class CircleWhere final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::WHERE>>
 {
 public:
-  CircleWhere() = default;
-
-public:
   loco::Node *condition() const { return at(0)->node(); }
   void condition(loco::Node *node) { at(0)->node(node); }
 };
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhile.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhile.h
index 40ec96414..f4154d3ab 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhile.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhile.h
@@ -34,7 +34,7 @@ class CircleWhile final : public VariadicArityNode<CircleNodeImpl<CircleOpcode::
 {
 public:
   CircleWhile(uint32_t arity, uint32_t out)
-      : VariadicArityNode<CircleNodeImpl<CircleOpcode::WHILE>>(arity), _output_count(out)
+    : VariadicArityNode<CircleNodeImpl<CircleOpcode::WHILE>>(arity), _output_count(out)
   {
     assert(arity > 0);
     assert(out > 0);
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhileOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhileOut.h
index cdf617848..98efc21e5 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleWhileOut.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleWhileOut.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,9 +31,6 @@ namespace luci
 class CircleWhileOut final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEWHILEOUT>>
 {
 public:
-  CircleWhileOut() = default;
-
-public:
   loco::Node *input(void) const { return at(0)->node(); }
   void input(loco::Node *node) { at(0)->node(node); }
 
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleZerosLike.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleZerosLike.h
index d3b6d272a..9302facd0 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleZerosLike.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleZerosLike.h
@@ -20,7 +20,7 @@
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
-#include "luci/IR/LuciNodeMixins.h"
+#include "luci/IR/CircleNodeMixins.h"
 
 namespace luci
 {
@@ -31,13 +31,7 @@ namespace luci
 class CircleZerosLike final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::ZEROS_LIKE>>
 {
 public:
-  CircleZerosLike() = default;
-
-public:
-  /// @brief Get the input node
   loco::Node *input(void) const { return at(0)->node(); }
-
-  /// @brief Set the input node
   void input(loco::Node *node) { at(0)->node(node); }
 };
 
diff --git a/compiler/luci/lang/include/luci/IR/SparsityParam.h b/compiler/luci/lang/include/luci/IR/SparsityParam.h
new file mode 100644
index 000000000..6cfff67e1
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/SparsityParam.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_SPARSITYPARAM_H__
+#define __LUCI_IR_SPARSITYPARAM_H__
+
+#include <cstdint>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+namespace luci
+{
+
+enum DimensionType
+{
+  DENSE,
+  SPARSE_CSR,
+};
+
+enum SparseIndexVectorType
+{
+  NONE,
+  I32,
+  U16,
+  U8,
+};
+
+class SparseIndexVector
+{
+public:
+  SparseIndexVector() = default;
+  SparseIndexVector(const SparseIndexVectorType &type, const std::vector<int32_t> &sparse_index_vec)
+    : _type{type}
+  {
+    switch (type)
+    {
+      case SparseIndexVectorType::NONE:
+        break;
+      case SparseIndexVectorType::I32:
+      {
+        _vec_ptr = static_cast<void *>(
+          new std::vector<int32_t>(sparse_index_vec.begin(), sparse_index_vec.end()));
+        break;
+      }
+      case SparseIndexVectorType::U16:
+      {
+        auto new_vec = new std::vector<uint16_t>(sparse_index_vec.size());
+        for (uint32_t idx = 0; idx < sparse_index_vec.size(); idx++)
+        {
+          new_vec->at(idx) = static_cast<uint16_t>(sparse_index_vec.at(idx));
+        }
+        _vec_ptr = static_cast<void *>(new_vec);
+        break;
+      }
+      case SparseIndexVectorType::U8:
+      {
+        auto new_vec = new std::vector<uint8_t>(sparse_index_vec.size());
+        for (uint32_t idx = 0; idx < sparse_index_vec.size(); idx++)
+        {
+          new_vec->at(idx) = static_cast<uint8_t>(sparse_index_vec.at(idx));
+        }
+        _vec_ptr = static_cast<void *>(new_vec);
+        break;
+      }
+      default:
+        std::runtime_error("Invalid SparseIndexVectorType");
+    }
+  }
+
+  SparseIndexVector(SparseIndexVectorType type, const void *sparse_index_vec) : _type{type}
+  {
+    switch (type)
+    {
+      case SparseIndexVectorType::NONE:
+        break;
+      case SparseIndexVectorType::I32:
+      {
+        const std::vector<int32_t> *vec =
+          static_cast<const std::vector<int32_t> *>(sparse_index_vec);
+        _vec_ptr = static_cast<void *>(new std::vector<int32_t>(vec->begin(), vec->end()));
+        break;
+      }
+      case SparseIndexVectorType::U16:
+      {
+        const std::vector<uint16_t> *vec =
+          static_cast<const std::vector<uint16_t> *>(sparse_index_vec);
+        _vec_ptr = static_cast<void *>(new std::vector<uint16_t>(vec->begin(), vec->end()));
+        break;
+      }
+      case SparseIndexVectorType::U8:
+      {
+        const std::vector<uint8_t> *vec =
+          static_cast<const std::vector<uint8_t> *>(sparse_index_vec);
+        _vec_ptr = static_cast<void *>(new std::vector<uint8_t>(vec->begin(), vec->end()));
+        break;
+      }
+      default:
+        std::runtime_error("Invalid SparseIndexVectorType");
+    }
+  }
+
+  SparseIndexVector(const SparseIndexVector &sparse_index_vec)
+    : SparseIndexVector(sparse_index_vec._type, sparse_index_vec._vec_ptr)
+  {
+  }
+
+  SparseIndexVector(SparseIndexVector &&sparse_index_vec)
+    : _type{sparse_index_vec._type}, _vec_ptr{std::exchange(sparse_index_vec._vec_ptr, nullptr)}
+  {
+  }
+
+  SparseIndexVector &operator=(const SparseIndexVector &sparse_index_vec)
+  {
+    return *this = SparseIndexVector(sparse_index_vec);
+  }
+
+  SparseIndexVector &operator=(SparseIndexVector &&sparse_index_vector)
+  {
+    std::swap(_vec_ptr, sparse_index_vector._vec_ptr);
+    std::swap(_type, sparse_index_vector._type);
+    return *this;
+  }
+
+  ~SparseIndexVector()
+  {
+    switch (_type)
+    {
+      case SparseIndexVectorType::NONE:
+        break;
+      case SparseIndexVectorType::I32:
+      {
+        auto vec_ptr = static_cast<std::vector<int32_t> *>(_vec_ptr);
+        delete vec_ptr;
+        break;
+      }
+      case SparseIndexVectorType::U16:
+      {
+        auto vec_ptr = static_cast<std::vector<uint16_t> *>(_vec_ptr);
+        delete vec_ptr;
+        break;
+      }
+      case SparseIndexVectorType::U8:
+      {
+        auto vec_ptr = static_cast<std::vector<uint8_t> *>(_vec_ptr);
+        delete vec_ptr;
+        break;
+      }
+      default:
+        break;
+    }
+    _vec_ptr = nullptr;
+    _type = SparseIndexVectorType::NONE;
+  }
+
+public:
+  SparseIndexVectorType type(void) const { return _type; }
+
+public:
+  const std::vector<int32_t> *as_int32_vector(void) const
+  {
+    return _type == SparseIndexVectorType::I32 ? static_cast<const std::vector<int32_t> *>(_vec_ptr)
+                                               : nullptr;
+  }
+  const std::vector<uint16_t> *as_uint16_vector(void) const
+  {
+    return _type == SparseIndexVectorType::U16
+             ? static_cast<const std::vector<uint16_t> *>(_vec_ptr)
+             : nullptr;
+  }
+  const std::vector<uint8_t> *as_uint8_vector(void) const
+  {
+    return _type == SparseIndexVectorType::U8 ? static_cast<const std::vector<uint8_t> *>(_vec_ptr)
+                                              : nullptr;
+  }
+
+private:
+  SparseIndexVectorType _type{SparseIndexVectorType::NONE};
+  void *_vec_ptr{nullptr};
+};
+
+class DimMetaData
+{
+public:
+  DimMetaData() = delete;
+  DimMetaData(DimensionType format, int32_t dense_size) : _format{format}, _dense_size{dense_size}
+  {
+    // DO NOTHING
+  }
+  DimMetaData(DimensionType format, int32_t dense_size, const SparseIndexVector &array_segments,
+              const SparseIndexVector &array_indices)
+    : _format{format}, _dense_size{dense_size}, _array_segments{array_segments}, _array_indices{
+                                                                                   array_indices}
+  {
+    // DO NOTHING
+  }
+
+public:
+  DimensionType format(void) const { return _format; }
+  int32_t dense_size(void) const { return _dense_size; }
+  const SparseIndexVector &array_segments(void) const { return _array_segments; }
+  const SparseIndexVector &array_indices(void) const { return _array_indices; }
+
+private:
+  DimensionType _format{DimensionType::DENSE};
+  int32_t _dense_size{0};
+  SparseIndexVector _array_segments;
+  SparseIndexVector _array_indices;
+};
+
+struct SparsityParam
+{
+  std::vector<int32_t> traversal_order;
+  std::vector<int32_t> block_map;
+  std::vector<DimMetaData> dim_metadata;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_SPARSITYPARAM_H__
diff --git a/compiler/luci/lang/src/AttrDilation.cpp b/compiler/luci/lang/src/AttrDilation.cpp
new file mode 100644
index 000000000..a9f479502
--- /dev/null
+++ b/compiler/luci/lang/src/AttrDilation.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrDilation.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+void Dilation::w(int32_t w)
+{
+  assert(w >= 0);
+  _w = static_cast<uint32_t>(w);
+}
+
+void Dilation::h(int32_t h)
+{
+  assert(h >= 0);
+  _h = static_cast<uint32_t>(h);
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/AttrDilation.test.cpp b/compiler/luci/lang/src/AttrDilation.test.cpp
new file mode 100644
index 000000000..3e4658990
--- /dev/null
+++ b/compiler/luci/lang/src/AttrDilation.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrDilation.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleAttrDilationTest, set)
+{
+  auto d = luci::Dilation();
+
+  d.h(10u);
+  d.w(10u);
+
+  ASSERT_EQ(d.h(), 10u);
+  ASSERT_EQ(d.w(), 10u);
+
+  d.h(10); // int32_t
+  d.w(10);
+
+  ASSERT_EQ(d.h(), 10u);
+  ASSERT_EQ(d.w(), 10u);
+}
diff --git a/compiler/luci/lang/src/AttrFilter.cpp b/compiler/luci/lang/src/AttrFilter.cpp
new file mode 100644
index 000000000..9c571e7f5
--- /dev/null
+++ b/compiler/luci/lang/src/AttrFilter.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrFilter.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+void Filter::w(int32_t w)
+{
+  assert(w >= 0);
+  _w = static_cast<uint32_t>(w);
+}
+
+void Filter::h(int32_t h)
+{
+  assert(h >= 0);
+  _h = static_cast<uint32_t>(h);
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/AttrFilter.test.cpp b/compiler/luci/lang/src/AttrFilter.test.cpp
new file mode 100644
index 000000000..06dbcacd5
--- /dev/null
+++ b/compiler/luci/lang/src/AttrFilter.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrFilter.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleAttrFilterTest, set)
+{
+  auto f = luci::Filter();
+
+  f.h(10u);
+  f.w(10u);
+
+  ASSERT_EQ(f.h(), 10u);
+  ASSERT_EQ(f.w(), 10u);
+
+  f.h(10); // int32_t
+  f.w(10);
+
+  ASSERT_EQ(f.h(), 10u);
+  ASSERT_EQ(f.w(), 10u);
+}
diff --git a/compiler/luci/lang/src/AttrStride.cpp b/compiler/luci/lang/src/AttrStride.cpp
new file mode 100644
index 000000000..9720d12b5
--- /dev/null
+++ b/compiler/luci/lang/src/AttrStride.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrStride.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+void Stride::w(int32_t w)
+{
+  assert(w >= 0);
+  _w = static_cast<uint32_t>(w);
+}
+
+void Stride::h(int32_t h)
+{
+  assert(h >= 0);
+  _h = static_cast<uint32_t>(h);
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/AttrStride.test.cpp b/compiler/luci/lang/src/AttrStride.test.cpp
new file mode 100644
index 000000000..e91365bd5
--- /dev/null
+++ b/compiler/luci/lang/src/AttrStride.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrStride.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleAttrStrideTest, set)
+{
+  auto s = luci::Stride();
+
+  s.h(10u);
+  s.w(10u);
+
+  ASSERT_EQ(s.h(), 10u);
+  ASSERT_EQ(s.w(), 10u);
+
+  s.h(10); // int32_t
+  s.w(10);
+
+  ASSERT_EQ(s.h(), 10u);
+  ASSERT_EQ(s.w(), 10u);
+}
diff --git a/compiler/luci/lang/src/CircleDialect.cpp b/compiler/luci/lang/src/CircleDialect.cpp
index 42ca3c917..0d315fc55 100644
--- a/compiler/luci/lang/src/CircleDialect.cpp
+++ b/compiler/luci/lang/src/CircleDialect.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "luci/IR/CircleDialect.h"
+#include "luci/IR/DeadNodeQueryService.h"
 #include "luci/IR/Nodes/CircleInput.h"
 #include "luci/IR/Nodes/CircleOutput.h"
 
@@ -22,8 +23,6 @@
 #include <loco/IR/GraphInputIndex.h>
 #include <loco/IR/GraphOutputIndex.h>
 
-#include "DeadNodeQueryService.h"
-
 #include <cassert>
 #include <memory>
 
diff --git a/compiler/luci/lang/src/CircleNodeMixins.cpp b/compiler/luci/lang/src/CircleNodeMixins.cpp
new file mode 100644
index 000000000..f72178df5
--- /dev/null
+++ b/compiler/luci/lang/src/CircleNodeMixins.cpp
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This is to validate CircleNodeMixins.h
+#include "luci/IR/CircleNodeMixins.h"
diff --git a/compiler/luci/lang/src/CircleNodes.cpp b/compiler/luci/lang/src/CircleNodes.cpp
index c77c06861..2c2688c9e 100644
--- a/compiler/luci/lang/src/CircleNodes.cpp
+++ b/compiler/luci/lang/src/CircleNodes.cpp
@@ -23,31 +23,6 @@
 namespace luci
 {
 
-void set_new_shape(CircleReshape *node, int32_t *base, uint32_t size)
-{
-  // Check node does not have both of new shape infos
-  LUCI_ASSERT(node->shape() == nullptr, "node already has shape input");
-  LUCI_ASSERT(node->newShape()->rank() == 0, "node already has newShape attribute");
-
-  const loco::DataType S32 = loco::DataType::S32;
-
-  // Set 2nd input as CircleConst
-  auto const_shape_node = node->graph()->nodes()->create<CircleConst>();
-  const_shape_node->rank(1);
-  const_shape_node->dim(0) = size;
-  const_shape_node->dtype(S32);
-  const_shape_node->size<S32>(size);
-  const_shape_node->shape_status(luci::ShapeStatus::VALID);
-  for (uint32_t axis = 0; axis < size; ++axis)
-    const_shape_node->at<S32>(axis) = base[axis];
-  node->shape(const_shape_node);
-
-  // Set newShape attribute
-  node->newShape()->rank(size);
-  for (uint32_t axis = 0; axis < size; ++axis)
-    node->newShape()->dim(axis) = base[axis];
-}
-
 void link(loco::GraphOutput *output, CircleOutput *node) { node->index(output->index()); }
 
 CircleOutput *output_node(loco::Graph *g, const loco::GraphOutputIndex &index)
diff --git a/compiler/luci/lang/src/CircleQuantParam.cpp b/compiler/luci/lang/src/CircleQuantParam.cpp
new file mode 100644
index 000000000..89671d3c3
--- /dev/null
+++ b/compiler/luci/lang/src/CircleQuantParam.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/CircleQuantParam.h"
+#include "luci/IR/CircleNode.h"
+
+#include <memory>
+
+namespace luci
+{
+
+/**
+ * @brief copy CircleQuantParam of src to dst
+ */
+void copy_quantparam(const luci::CircleNode *src, luci::CircleNode *dst)
+{
+  auto q = src->quantparam();
+  if (q == nullptr)
+    dst->quantparam(nullptr);
+  else
+  {
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->scale = q->scale;
+    qparam->zerop = q->zerop;
+    qparam->min = q->min;
+    qparam->max = q->max;
+    qparam->quantized_dimension = q->quantized_dimension;
+
+    dst->quantparam(std::move(qparam));
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/CircleQuantParam.test.cpp b/compiler/luci/lang/src/CircleQuantParam.test.cpp
new file mode 100644
index 000000000..520ca05cc
--- /dev/null
+++ b/compiler/luci/lang/src/CircleQuantParam.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// NOTE any node will do for testing
+#include "luci/IR/Nodes/CircleAdd.h"
+
+#include <loco/IR/Graph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleAdd *build_simple_add_graph(loco::Graph *g)
+{
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  node->name("name");
+  node->dtype(loco::DataType::FLOAT32);
+  node->rank(1);
+  node->dim(0).set(3);
+  node->shape_status(luci::ShapeStatus::VALID);
+  node->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  qparam->scale = {1.0};
+  qparam->zerop = {0};
+  qparam->min = {0.0};
+  qparam->max = {1.0};
+  qparam->quantized_dimension = 0;
+  node->quantparam(std::move(qparam));
+
+  return node;
+}
+
+} // namespace
+
+TEST(CircleNodeCloneTest, copy_quantparam)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  auto copy = g->nodes()->create<luci::CircleAdd>();
+  luci::copy_quantparam(node, copy);
+
+  const auto *qparam_node = node->quantparam();
+  const auto *qparam_copy = copy->quantparam();
+  ASSERT_EQ(qparam_node->scale, qparam_copy->scale);
+  ASSERT_EQ(qparam_node->zerop, qparam_copy->zerop);
+  ASSERT_EQ(qparam_node->quantized_dimension, qparam_copy->quantized_dimension);
+}
+
+TEST(CircleNodeCloneTest, copy_quantparam_NEG)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  node->quantparam(nullptr);
+
+  auto copy = g->nodes()->create<luci::CircleAdd>();
+  luci::copy_quantparam(node, copy);
+
+  const auto *qparam_copy = copy->quantparam();
+  ASSERT_EQ(qparam_copy, nullptr);
+}
diff --git a/compiler/luci/lang/src/DeadNodeQueryService.cpp b/compiler/luci/lang/src/DeadNodeQueryService.cpp
index a22574c94..7dac08b5f 100644
--- a/compiler/luci/lang/src/DeadNodeQueryService.cpp
+++ b/compiler/luci/lang/src/DeadNodeQueryService.cpp
@@ -14,9 +14,8 @@
  * limitations under the License.
  */
 
-#include "DeadNodeQueryService.h"
-
 #include "luci/IR/CircleNodeVisitor.h"
+#include "luci/IR/DeadNodeQueryService.h"
 
 #include <loco/IR/Graph.h>
 
diff --git a/compiler/luci/lang/src/LuciNodeMixins.cpp b/compiler/luci/lang/src/LuciNodeMixins.cpp
deleted file mode 100644
index 660cbe1a5..000000000
--- a/compiler/luci/lang/src/LuciNodeMixins.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This is to validate LuciNodeMixins.h
-#include "luci/IR/LuciNodeMixins.h"
diff --git a/compiler/luci/lang/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/lang/src/Nodes/CircleBatchMatMul.test.cpp
index d7712c8dd..3859d7fca 100644
--- a/compiler/luci/lang/src/Nodes/CircleBatchMatMul.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleBatchMatMul.test.cpp
@@ -26,7 +26,7 @@ TEST(CircleBatchMatMulTest, constructor)
   luci::CircleBatchMatMul batchmatmul_node;
 
   ASSERT_EQ(luci::CircleDialect::get(), batchmatmul_node.dialect());
-  ASSERT_EQ(luci::CircleOpcode::BATCHMATMUL, batchmatmul_node.opcode());
+  ASSERT_EQ(luci::CircleOpcode::BATCH_MATMUL, batchmatmul_node.opcode());
 
   ASSERT_EQ(nullptr, batchmatmul_node.x());
   ASSERT_EQ(nullptr, batchmatmul_node.y());
diff --git a/compiler/luci/lang/src/Nodes/CircleBidrectionalSequenceLSTM.test.cpp b/compiler/luci/lang/src/Nodes/CircleBidrectionalSequenceLSTM.test.cpp
new file mode 100644
index 000000000..3f13422e5
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleBidrectionalSequenceLSTM.test.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleBidirectionalSequenceLSTM.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleBidirectionalSequenceLSTMTest, constructor_P)
+{
+  luci::CircleBidirectionalSequenceLSTM trc_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), trc_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM, trc_node.opcode());
+
+  ASSERT_EQ(nullptr, trc_node.input());
+
+  ASSERT_EQ(nullptr, trc_node.fw_input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_input_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_recurrent_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.fw_cell_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_cell_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_cell_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.fw_input_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.fw_forget_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.fw_cell_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.fw_output_gate_bias());
+
+  ASSERT_EQ(nullptr, trc_node.fw_projection_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_projection_bias());
+
+  ASSERT_EQ(nullptr, trc_node.bw_input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_input_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_recurrent_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.bw_cell_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_cell_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_cell_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.bw_input_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.bw_forget_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.bw_cell_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.bw_output_gate_bias());
+
+  ASSERT_EQ(nullptr, trc_node.bw_projection_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_projection_bias());
+
+  ASSERT_EQ(nullptr, trc_node.fw_activation_state());
+  ASSERT_EQ(nullptr, trc_node.fw_cell_state());
+  ASSERT_EQ(nullptr, trc_node.bw_activation_state());
+  ASSERT_EQ(nullptr, trc_node.bw_cell_state());
+
+  ASSERT_EQ(nullptr, trc_node.auxillary_input());
+  ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.fw_auxillary_input_to_output_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.bw_auxillary_input_to_output_weights());
+
+  ASSERT_EQ(luci::FusedActFunc::UNDEFINED, trc_node.fusedActivationFunction());
+  ASSERT_EQ(0.f, trc_node.cell_clip());
+  ASSERT_EQ(0.f, trc_node.proj_clip());
+  ASSERT_EQ(false, trc_node.merge_outputs());
+  ASSERT_EQ(false, trc_node.time_major());
+  ASSERT_EQ(false, trc_node.asymmetric_quantize_inputs());
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, arity_NEG)
+{
+  luci::CircleBidirectionalSequenceLSTM trc_node;
+
+  ASSERT_NO_THROW(trc_node.arg(36));
+  ASSERT_THROW(trc_node.arg(48), std::out_of_range);
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleBidirectionalSequenceLSTM trc_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(trc_node.accept(&tv), std::exception);
+}
+
+TEST(CircleBidirectionalSequenceLSTMTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleBidirectionalSequenceLSTM trc_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(trc_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp
index 0d02d32dc..a4854ec59 100644
--- a/compiler/luci/lang/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp
@@ -77,7 +77,52 @@ INSTANTIATE(loco::DataType::S8);
 INSTANTIATE(loco::DataType::FLOAT32);
 INSTANTIATE(loco::DataType::U8);
 INSTANTIATE(loco::DataType::BOOL);
+INSTANTIATE(loco::DataType::FLOAT16);
 
 #undef INSTANTIATE
 
+// CircleConst implementations for loco::DataType::STRING
+
+template <> uint32_t CircleConst::size<loco::DataType::STRING>(void) const
+{
+  assert(dtype() == loco::DataType::STRING);
+  assert(_data.size() == 0);
+  return _strings.size();
+}
+
+template <> void CircleConst::size<loco::DataType::STRING>(uint32_t l)
+{
+  assert(dtype() == loco::DataType::STRING);
+  assert(_data.size() == 0);
+  _strings.resize(l);
+}
+
+template <> const std::string &CircleConst::at<loco::DataType::STRING>(uint32_t n) const
+{
+  assert(dtype() == loco::DataType::STRING);
+  assert(n < _strings.size());
+  return _strings.at(n);
+}
+
+template <> std::string &CircleConst::at<loco::DataType::STRING>(uint32_t n)
+{
+  assert(dtype() == loco::DataType::STRING);
+  assert(n < _strings.size());
+  return _strings.at(n);
+}
+
+template <> const std::string &CircleConst::scalar<loco::DataType::STRING>(void) const
+{
+  assert(dtype() == loco::DataType::STRING);
+  assert(1 == _strings.size());
+  return _strings.at(0);
+}
+
+template <> std::string &CircleConst::scalar<loco::DataType::STRING>(void)
+{
+  assert(dtype() == loco::DataType::STRING);
+  assert(1 == _strings.size());
+  return _strings.at(0);
+}
+
 } // namespace luci
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.test.cpp b/compiler/luci/lang/src/Nodes/CircleConst.test.cpp
new file mode 100644
index 000000000..e8c892b83
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleConst.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleConst.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleConstTest, constructor)
+{
+  luci::CircleConst const_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), const_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::CIRCLECONST, const_node.opcode());
+}
+
+TEST(CircleConstTest, dype_size)
+{
+  luci::CircleConst const_node;
+
+  const_node.dtype(loco::DataType::S32);
+  const_node.size<loco::DataType::S32>(1);
+
+  ASSERT_EQ(loco::DataType::S32, const_node.dtype());
+  ASSERT_EQ(1, const_node.size<loco::DataType::S32>());
+}
+
+TEST(CircleConstTest, scalar)
+{
+  luci::CircleConst const_node;
+
+  const_node.dtype(loco::DataType::S32);
+  const_node.size<loco::DataType::S32>(1);
+  const_node.scalar<loco::DataType::S32>() = 1;
+
+  auto const &cs = const_node.scalar<loco::DataType::S32>();
+  ASSERT_EQ(1, cs);
+}
+
+TEST(CircleConstTest, string)
+{
+  luci::CircleConst const_node;
+
+  const_node.dtype(loco::DataType::STRING);
+  const_node.size<loco::DataType::STRING>(1);
+  const_node.at<loco::DataType::STRING>(0) = std::string("Hello");
+
+  ASSERT_EQ(loco::DataType::STRING, const_node.dtype());
+  ASSERT_EQ(1, const_node.size<loco::DataType::STRING>());
+  EXPECT_TRUE(std::string("Hello") == const_node.at<loco::DataType::STRING>(0));
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp b/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp
index c07268cbf..76b70f38b 100644
--- a/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp
@@ -22,7 +22,7 @@
 
 TEST(CircleCustomTest, constructor)
 {
-  luci::CircleCustom custom_node(2);
+  luci::CircleCustom custom_node(2, 1);
 
   ASSERT_EQ(luci::CircleDialect::get(), custom_node.dialect());
   ASSERT_EQ(luci::CircleOpcode::CUSTOM, custom_node.opcode());
@@ -33,18 +33,19 @@ TEST(CircleCustomTest, constructor)
 
   ASSERT_EQ(2, custom_node.numInputs());
   ASSERT_EQ(0, custom_node.custom_code().size());
+  ASSERT_EQ(1, custom_node.numOutputs());
 }
 
 TEST(CircleCustomTest, constructor_NEG)
 {
-  ASSERT_DEBUG_DEATH(luci::CircleCustom{0}, "");
+  ASSERT_DEBUG_DEATH(luci::CircleCustom(0, 0), "");
 
   SUCCEED();
 }
 
 TEST(CircleCustomTest, invalidIndex_NEG)
 {
-  luci::CircleCustom custom_node(2);
+  luci::CircleCustom custom_node(2, 1);
 
   EXPECT_ANY_THROW(custom_node.arg(5));
 }
diff --git a/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..ae83784a5
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleDensify.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleDensifyTest, constructor)
+{
+  luci::CircleDensify densify_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), densify_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::DENSIFY, densify_node.opcode());
+
+  ASSERT_EQ(nullptr, densify_node.input());
+}
+
+TEST(CircleDensifyTest, input_NEG)
+{
+  luci::CircleDensify densify_node;
+  luci::CircleDensify node;
+
+  densify_node.input(&node);
+  ASSERT_NE(nullptr, densify_node.input());
+
+  densify_node.input(nullptr);
+  ASSERT_EQ(nullptr, densify_node.input());
+}
+
+TEST(CircleDensifyTest, arity_NEG)
+{
+  luci::CircleDensify densify_node;
+
+  ASSERT_NO_THROW(densify_node.arg(0));
+  ASSERT_THROW(densify_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleDensifyTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleDensify densify_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(densify_node.accept(&tv), std::exception);
+}
+
+TEST(CircleDensifyTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleDensify densify_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(densify_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleDequantize.test.cpp b/compiler/luci/lang/src/Nodes/CircleDequantize.test.cpp
new file mode 100644
index 000000000..c3a132c60
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleDequantize.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleDequantize.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+TEST(CircleDequantizeTest, constructor)
+{
+  luci::CircleDequantize dequant_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), dequant_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::DEQUANTIZE, dequant_node.opcode());
+
+  ASSERT_EQ(nullptr, dequant_node.input());
+}
+
+TEST(CircleDequantizeTest, common_NEG)
+{
+  luci::CircleDequantize dequant_node;
+
+  dequant_node.name("name");
+  ASSERT_EQ("name", dequant_node.name());
+
+  auto q = std::make_unique<luci::CircleQuantParam>();
+  dequant_node.quantparam(std::move(q));
+  ASSERT_NE(nullptr, dequant_node.quantparam());
+
+  ASSERT_EQ(luci::ShapeStatus::UNDEFINED, dequant_node.shape_status());
+  dequant_node.shape_status(luci::ShapeStatus::NOSHAPE);
+  ASSERT_NE(luci::ShapeStatus::UNDEFINED, dequant_node.shape_status());
+}
+
+TEST(CircleDequantizeTest, input_NEG)
+{
+  luci::CircleDequantize dequant_node;
+  luci::CircleDequantize node;
+
+  dequant_node.input(&node);
+  ASSERT_NE(nullptr, dequant_node.input());
+
+  dequant_node.input(nullptr);
+  ASSERT_EQ(nullptr, dequant_node.input());
+}
+
+TEST(CircleDequantizeTest, arity_NEG)
+{
+  luci::CircleDequantize dequant_node;
+
+  ASSERT_NO_THROW(dequant_node.arg(0));
+  ASSERT_THROW(dequant_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleDequantizeTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleDequantize dequant_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(dequant_node.accept(&tv), std::exception);
+}
+
+TEST(CircleDequantizeTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleDequantize dequant_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(dequant_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/lang/src/Nodes/CircleFakeQuant.test.cpp
new file mode 100644
index 000000000..912e40570
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleFakeQuant.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleFakeQuant.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleFakeQuantTest, constructor_P)
+{
+  luci::CircleFakeQuant fakequant;
+
+  ASSERT_EQ(fakequant.dialect(), luci::CircleDialect::get());
+  ASSERT_EQ(fakequant.opcode(), luci::CircleOpcode::FAKE_QUANT);
+
+  ASSERT_EQ(nullptr, fakequant.inputs());
+  ASSERT_EQ(0.0f, fakequant.min());
+  ASSERT_EQ(0.0f, fakequant.max());
+  ASSERT_EQ(0, fakequant.num_bits());
+  ASSERT_FALSE(fakequant.narrow_range());
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
index bb0e3c51b..15a780085 100644
--- a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
@@ -32,6 +32,7 @@ TEST(CircleFullyConnectedTest, constructor)
   ASSERT_EQ(nullptr, fc_node.weights());
   ASSERT_EQ(nullptr, fc_node.bias());
   ASSERT_EQ(luci::FusedActFunc::UNDEFINED, fc_node.fusedActivationFunction());
+  ASSERT_EQ(false, fc_node.keep_num_dims());
 }
 
 TEST(CircleFullyConnectedTest, input_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleGelu.test.cpp b/compiler/luci/lang/src/Nodes/CircleGelu.test.cpp
new file mode 100644
index 000000000..b10a2b48b
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleGelu.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleGelu.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleGeluTest, constructor)
+{
+  luci::CircleGelu gelu_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), gelu_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::GELU, gelu_node.opcode());
+
+  ASSERT_EQ(nullptr, gelu_node.features());
+
+  ASSERT_EQ(false, gelu_node.approximate());
+}
+
+TEST(CircleGeluTest, input_NEG)
+{
+  luci::CircleGelu gelu_node;
+  luci::CircleGelu node;
+
+  gelu_node.features(&node);
+  ASSERT_NE(nullptr, gelu_node.features());
+
+  gelu_node.features(nullptr);
+  ASSERT_EQ(nullptr, gelu_node.features());
+
+  gelu_node.approximate(true);
+  ASSERT_NE(false, gelu_node.approximate());
+}
+
+TEST(CircleGeluTest, arity_NEG)
+{
+  luci::CircleGelu gelu_node;
+
+  ASSERT_NO_THROW(gelu_node.arg(0));
+  ASSERT_THROW(gelu_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleGeluTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleGelu gelu_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(gelu_node.accept(&tv), std::exception);
+}
+
+TEST(CircleGeluTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleGelu gelu_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(gelu_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleHardSwish.test.cpp b/compiler/luci/lang/src/Nodes/CircleHardSwish.test.cpp
new file mode 100644
index 000000000..7825ce75b
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleHardSwish.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleHardSwish.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleHardSwishTest, constructor_P)
+{
+  luci::CircleHardSwish hard_swish_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), hard_swish_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::HARD_SWISH, hard_swish_node.opcode());
+
+  ASSERT_EQ(nullptr, hard_swish_node.features());
+}
+
+TEST(CircleHardSwishTest, input_NEG)
+{
+  luci::CircleHardSwish hard_swish_node;
+  luci::CircleHardSwish node;
+
+  hard_swish_node.features(&node);
+  ASSERT_NE(nullptr, hard_swish_node.features());
+
+  hard_swish_node.features(nullptr);
+  ASSERT_EQ(nullptr, hard_swish_node.features());
+}
+
+TEST(CircleHardSwishTest, arity_NEG)
+{
+  luci::CircleHardSwish hard_swish_node;
+
+  ASSERT_NO_THROW(hard_swish_node.arg(0));
+  ASSERT_THROW(hard_swish_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleHardSwishTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleHardSwish hard_swish_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(hard_swish_node.accept(&tv), std::exception);
+}
+
+TEST(CircleHardSwishTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleHardSwish hard_swish_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(hard_swish_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleQuantize.test.cpp b/compiler/luci/lang/src/Nodes/CircleQuantize.test.cpp
new file mode 100644
index 000000000..098716fdc
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleQuantize.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleQuantize.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+TEST(CircleQuantizeTest, constructor)
+{
+  luci::CircleQuantize quant_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), quant_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::QUANTIZE, quant_node.opcode());
+
+  ASSERT_EQ(nullptr, quant_node.input());
+}
+
+TEST(CircleQuantizeTest, common_NEG)
+{
+  luci::CircleQuantize quant_node;
+
+  quant_node.name("name");
+  ASSERT_EQ("name", quant_node.name());
+
+  auto q = std::make_unique<luci::CircleQuantParam>();
+  quant_node.quantparam(std::move(q));
+  ASSERT_NE(nullptr, quant_node.quantparam());
+
+  ASSERT_EQ(luci::ShapeStatus::UNDEFINED, quant_node.shape_status());
+  quant_node.shape_status(luci::ShapeStatus::NOSHAPE);
+  ASSERT_NE(luci::ShapeStatus::UNDEFINED, quant_node.shape_status());
+}
+
+TEST(CircleQuantizeTest, input_NEG)
+{
+  luci::CircleQuantize quant_node;
+  luci::CircleQuantize node;
+
+  quant_node.input(&node);
+  ASSERT_NE(nullptr, quant_node.input());
+
+  quant_node.input(nullptr);
+  ASSERT_EQ(nullptr, quant_node.input());
+}
+
+TEST(CircleQuantizeTest, arity_NEG)
+{
+  luci::CircleQuantize quant_node;
+
+  ASSERT_NO_THROW(quant_node.arg(0));
+  ASSERT_THROW(quant_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleQuantizeTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleQuantize quant_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(quant_node.accept(&tv), std::exception);
+}
+
+TEST(CircleQuantizeTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleQuantize quant_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(quant_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp
new file mode 100644
index 000000000..833ae0732
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleSVDF.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleSVDFTest, constructor)
+{
+  luci::CircleSVDF svdf_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), svdf_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::SVDF, svdf_node.opcode());
+
+  ASSERT_EQ(nullptr, svdf_node.input());
+  ASSERT_EQ(nullptr, svdf_node.weight_feature());
+  ASSERT_EQ(nullptr, svdf_node.weight_time());
+  ASSERT_EQ(nullptr, svdf_node.bias());
+  ASSERT_EQ(nullptr, svdf_node.input_activation_state());
+
+  ASSERT_EQ(false, svdf_node.asymmetric_quantize_inputs());
+  ASSERT_EQ(0, svdf_node.svdf_rank());
+}
+
+TEST(CircleSVDFTest, input_NEG)
+{
+  luci::CircleSVDF svdf_node;
+  luci::CircleSVDF node;
+
+  svdf_node.input(&node);
+  svdf_node.weight_feature(&node);
+  svdf_node.weight_time(&node);
+  svdf_node.bias(&node);
+  svdf_node.input_activation_state(&node);
+
+  ASSERT_NE(nullptr, svdf_node.input());
+  ASSERT_NE(nullptr, svdf_node.weight_feature());
+  ASSERT_NE(nullptr, svdf_node.weight_time());
+  ASSERT_NE(nullptr, svdf_node.bias());
+  ASSERT_NE(nullptr, svdf_node.input_activation_state());
+
+  svdf_node.input(nullptr);
+  svdf_node.weight_feature(nullptr);
+  svdf_node.weight_time(nullptr);
+  svdf_node.bias(nullptr);
+  svdf_node.input_activation_state(nullptr);
+
+  ASSERT_EQ(nullptr, svdf_node.input());
+  ASSERT_EQ(nullptr, svdf_node.weight_feature());
+  ASSERT_EQ(nullptr, svdf_node.weight_time());
+  ASSERT_EQ(nullptr, svdf_node.bias());
+  ASSERT_EQ(nullptr, svdf_node.input_activation_state());
+}
+
+TEST(CircleSVDFTest, arity_NEG)
+{
+  luci::CircleSVDF svdf_node;
+
+  ASSERT_NO_THROW(svdf_node.arg(4));
+  ASSERT_THROW(svdf_node.arg(5), std::out_of_range);
+}
+
+TEST(CircleSVDFTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleSVDF svdf_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(svdf_node.accept(&tv), std::exception);
+}
+
+TEST(CircleSVDFTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleSVDF svdf_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(svdf_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
index 03f612ba7..073be6bcb 100644
--- a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
@@ -33,7 +33,7 @@ TEST(CircleSparseToDenseTest, constructor)
   ASSERT_EQ(nullptr, stb_node.values());
   ASSERT_EQ(nullptr, stb_node.default_value());
 
-  ASSERT_EQ(false, stb_node.validate_indices());
+  ASSERT_FALSE(stb_node.validate_indices());
 }
 
 TEST(CircleSparseToDenseTest, input_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleSum.test.cpp b/compiler/luci/lang/src/Nodes/CircleSum.test.cpp
index 84b51d671..f9d07b200 100644
--- a/compiler/luci/lang/src/Nodes/CircleSum.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleSum.test.cpp
@@ -30,7 +30,7 @@ TEST(CircleSumTest, constructor_P)
 
   ASSERT_EQ(nullptr, sum_node.input());
   ASSERT_EQ(nullptr, sum_node.reduction_indices());
-  ASSERT_EQ(false, sum_node.keep_dims());
+  ASSERT_FALSE(sum_node.keep_dims());
 }
 
 TEST(CircleSumTest, input_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp b/compiler/luci/lang/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
new file mode 100644
index 000000000..2b10930ee
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleUnidirectionalSequenceLSTMTest, constructor_P)
+{
+  luci::CircleUnidirectionalSequenceLSTM trc_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), trc_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::UNIDIRECTIONAL_SEQUENCE_LSTM, trc_node.opcode());
+
+  ASSERT_EQ(nullptr, trc_node.input());
+
+  ASSERT_EQ(nullptr, trc_node.input_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.input_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.input_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.input_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.recurrent_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.recurrent_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.recurrent_to_cell_weights());
+  ASSERT_EQ(nullptr, trc_node.recurrent_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.cell_to_input_weights());
+  ASSERT_EQ(nullptr, trc_node.cell_to_forget_weights());
+  ASSERT_EQ(nullptr, trc_node.cell_to_output_weights());
+
+  ASSERT_EQ(nullptr, trc_node.input_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.forget_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.cell_gate_bias());
+  ASSERT_EQ(nullptr, trc_node.output_gate_bias());
+
+  ASSERT_EQ(nullptr, trc_node.projection_weights());
+  ASSERT_EQ(nullptr, trc_node.projection_bias());
+
+  ASSERT_EQ(nullptr, trc_node.output_state());
+  ASSERT_EQ(nullptr, trc_node.cell_state());
+
+  ASSERT_EQ(nullptr, trc_node.input_layer_norm_coefficients());
+  ASSERT_EQ(nullptr, trc_node.forget_layer_norm_coefficients());
+  ASSERT_EQ(nullptr, trc_node.cell_layer_norm_coefficients());
+  ASSERT_EQ(nullptr, trc_node.output_layer_norm_coefficients());
+
+  ASSERT_EQ(luci::FusedActFunc::UNDEFINED, trc_node.fusedActivationFunction());
+  ASSERT_EQ(0.f, trc_node.cell_clip());
+  ASSERT_EQ(0.f, trc_node.proj_clip());
+  ASSERT_EQ(false, trc_node.time_major());
+  ASSERT_EQ(false, trc_node.asymmetric_quantize_inputs());
+}
+
+TEST(CircleUnidirectionalSequenceLSTMTest, arity_NEG)
+{
+  luci::CircleUnidirectionalSequenceLSTM trc_node;
+
+  ASSERT_NO_THROW(trc_node.arg(20));
+  ASSERT_THROW(trc_node.arg(24), std::out_of_range);
+}
+
+TEST(CircleUnidirectionalSequenceLSTMTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleUnidirectionalSequenceLSTM trc_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(trc_node.accept(&tv), std::exception);
+}
+
+TEST(CircleUnidirectionalSequenceLSTMTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleUnidirectionalSequenceLSTM trc_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(trc_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp b/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp
new file mode 100644
index 000000000..e1864f8da
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleVariable.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleVariableTest, constructor)
+{
+  luci::CircleVariable var_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), var_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::CIRCLEVARIABLE, var_node.opcode());
+}
+
+TEST(CircleVariableTest, arity_NEG)
+{
+  luci::CircleVariable var_node;
+
+  ASSERT_THROW(var_node.arg(0), std::out_of_range);
+}
+
+TEST(CircleVariableTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleVariable var_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(var_node.accept(&tv), std::exception);
+}
+
+TEST(CircleVariableTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleVariable var_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(var_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/log/CMakeLists.txt b/compiler/luci/log/CMakeLists.txt
index 5e822871b..bbd733f1e 100644
--- a/compiler/luci/log/CMakeLists.txt
+++ b/compiler/luci/log/CMakeLists.txt
@@ -1,10 +1,16 @@
 # TODO Find how to test logging framework
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
-add_library(luci_log SHARED ${SOURCES})
+if (NOT LUCI_LIBRARY_TYPE)
+    set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_log ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_log PUBLIC include)
 target_link_libraries(luci_log PUBLIC hermes)
 target_link_libraries(luci_log PRIVATE hermes_std)
 target_link_libraries(luci_log PRIVATE nncc_common)
 target_link_libraries(luci_log PRIVATE luci_env)
 install(TARGETS luci_log DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
diff --git a/compiler/luci/log/include/luci/Log.h b/compiler/luci/log/include/luci/Log.h
index e148810d8..ddb34f47f 100644
--- a/compiler/luci/log/include/luci/Log.h
+++ b/compiler/luci/log/include/luci/Log.h
@@ -48,7 +48,6 @@ public:
 
 private:
   bool _show_warn = true;
-  bool _show_info = false;
   int _show_verbose = 0;
 };
 
@@ -67,8 +66,8 @@ private:
 #define LOGGER(name) ::luci::Logger name{::luci::LoggingContext::get()};
 
 // TODO Support FATAL, ERROR
-#define INFO(name) HERMES_INFO(name)
-#define WARN(name) HERMES_WARN(name)
+#define INFO(name) HERMES_VERBOSE(name, 3)
+#define WARN(name) HERMES_VERBOSE(name, 2)
 #define VERBOSE(name, lv) HERMES_VERBOSE(name, lv)
 
 // WARNING!
diff --git a/compiler/luci/log/src/Log.cpp b/compiler/luci/log/src/Log.cpp
index c26bf307b..27049bef9 100644
--- a/compiler/luci/log/src/Log.cpp
+++ b/compiler/luci/log/src/Log.cpp
@@ -33,11 +33,6 @@ namespace
  */
 template <typename T> T safecast(const char *, const T &);
 
-template <> bool safecast<bool>(const char *s, const bool &value)
-{
-  return (s == nullptr) ? value : (std::stoi(s) != 0);
-}
-
 template <> int safecast<int>(const char *s, const int &value)
 {
   return (s == nullptr) ? value : std::stoi(s);
@@ -68,9 +63,6 @@ LoggerConfig::LoggerConfig()
 
   _show_warn = !settings->get(luci::UserSettings::Key::MuteWarnings);
 
-  // Turn on info logging if LUCI_LOG is set as non-zero value
-  _show_info = safecast<bool>(std::getenv("LUCI_LOG"), false);
-
   // Turn on verbose logging if LUCI_LOG is set to some level
   // VERBOSE(l, 1) will be visible with LUCI_LOG=2 and VERBOSE(l, 2) with LUCI_LOG=3 and so on
   _show_verbose = safecast<int>(std::getenv("LUCI_LOG"), 0);
@@ -87,25 +79,15 @@ void LoggerConfig::configure(const hermes::Source *source, hermes::Source::Setti
 
 void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const
 {
-  setting.filter(hermes::SeverityCategory::FATAL).reject_all();
-  setting.filter(hermes::SeverityCategory::ERROR).reject_all();
-  setting.filter(hermes::SeverityCategory::WARN).reject_all();
-  setting.filter(hermes::SeverityCategory::INFO).reject_all();
-  setting.filter(hermes::SeverityCategory::VERBOSE).reject_all();
-
-  // TODO enable FATAL and ERROR
+  setting.reject_all();
+  setting.filter(hermes::SeverityCategory::FATAL).accept_upto(_show_verbose);
+  setting.filter(hermes::SeverityCategory::ERROR).accept_upto(_show_verbose);
   if (_show_warn)
   {
-    setting.filter(hermes::SeverityCategory::WARN).accept_all();
-  }
-  if (_show_info)
-  {
-    setting.filter(hermes::SeverityCategory::INFO).accept_all();
-  }
-  if (_show_verbose)
-  {
-    setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose);
+    setting.filter(hermes::SeverityCategory::WARN).accept_upto(_show_verbose);
   }
+  setting.filter(hermes::SeverityCategory::INFO).accept_upto(_show_verbose);
+  setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose);
 }
 
 } // namespace luci
diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt
index fa2ea123c..b8a2111dd 100644
--- a/compiler/luci/logex/CMakeLists.txt
+++ b/compiler/luci/logex/CMakeLists.txt
@@ -1,7 +1,13 @@
 # TODO Find how to test logging-ex utility
 file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(luci_logex SHARED ${SOURCES})
+if (NOT LUCI_LIBRARY_TYPE)
+    set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_logex ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_logex PUBLIC include)
 target_link_libraries(luci_logex PUBLIC loco)
 target_link_libraries(luci_logex PUBLIC locop)
@@ -9,5 +15,17 @@ target_link_libraries(luci_logex PRIVATE luci_log)
 target_link_libraries(luci_logex PRIVATE luci_lang)
 target_link_libraries(luci_logex PRIVATE hermes_std)
 target_link_libraries(luci_logex PRIVATE nncc_common)
-target_link_libraries(luci_logex PRIVATE pepper_str)
 install(TARGETS luci_logex DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+    return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_logex_test ${TESTS})
+target_include_directories(luci_logex_test PRIVATE src)
+target_link_libraries(luci_logex_test luci_logex)
+target_link_libraries(luci_logex_test luci_lang)
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
new file mode 100644
index 000000000..e7f38d07b
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilder.h"
+#include "CircleNodeSummaryBuilders.h"
+
+#include <luci/IR/CircleDialect.h>
+
+#include <memory>
+
+namespace
+{
+
+std::string circle_opname(luci::CircleOpcode opcode)
+{
+  static const std::string prefix{"circle."};
+
+  switch (opcode)
+  {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+  case luci::CircleOpcode::OPCODE: \
+    return prefix + #OPCODE;
+#define CIRCLE_VNODE CIRCLE_NODE
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+    default:
+      break;
+  };
+
+  return prefix + "Invalid";
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool CircleNodeSummaryBuilder::build(const loco::Node *node, const locop::SymbolTable *tbl,
+                                     locop::NodeSummary &s)
+{
+  if (node->dialect() != luci::CircleDialect::get())
+    return false;
+
+  auto ptr_to_str = [](const void *ptr) {
+    std::stringstream ss;
+    ss << ptr;
+    return ss.str();
+  };
+
+  auto shape_to_str = [](const luci::CircleNode *node) {
+    std::stringstream ss;
+    ss << "<";
+    for (uint32_t i = 0; i < node->rank(); ++i)
+    {
+      if (i)
+        ss << ",";
+      ss << (node->dim(i).known() ? node->dim(i).value() : -1);
+    }
+    ss << ">";
+    return ss.str();
+  };
+
+  auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+  if (const auto builder = create_builder(circle_node))
+  {
+    if (!builder->validate(circle_node))
+    {
+      s.state(locop::NodeDesc::State::Invalid);
+      return false;
+    }
+
+    auto input_names = builder->get_input_names(circle_node);
+    assert(node->arity() == input_names.size());
+    for (uint32_t i = 0; i < node->arity(); ++i)
+      s.args().append(input_names.at(i), tbl->lookup(node->arg(i)));
+
+    builder->build_attributes(circle_node, s);
+    builder->update_status(s);
+
+    s.opname(circle_opname(circle_node->opcode()));
+    s.comments().append("[" + circle_node->name() + " " + shape_to_str(circle_node) +
+                        "] = " + ptr_to_str(node));
+
+    return true;
+  }
+  else
+  {
+    // When SummaryBuilder is not implemented, return false
+    return false;
+  }
+}
+
+bool CircleNodeSummaryBuilder::validate(const luci::CircleNode *) { return true; }
+
+std::vector<std::string> CircleNodeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  // Return empty names for default
+  return std::vector<std::string>();
+}
+
+void CircleNodeSummaryBuilder::build_attributes(const luci::CircleNode *, locop::NodeSummary &)
+{
+  // Do nothing for default
+}
+
+void CircleNodeSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+  s.state(locop::NodeDesc::State::Complete);
+}
+
+std::unique_ptr<CircleNodeSummaryBuilder>
+CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+#define CIRCLE_NODE(OPCODE, CLASS)    \
+  case luci::CircleOpcode::OPCODE:    \
+  {                                   \
+    return std::make_unique<CLASS>(); \
+  }
+
+    CIRCLE_NODE(ABS, CircleAbsSummaryBuilder)
+    CIRCLE_NODE(ADD, CircleAddSummaryBuilder)
+    CIRCLE_NODE(ADD_N, CircleAddNSummaryBuilder)
+    CIRCLE_NODE(ARG_MAX, CircleArgMaxSummaryBuilder)
+    CIRCLE_NODE(ARG_MIN, CircleArgMinSummaryBuilder)
+    CIRCLE_NODE(AVERAGE_POOL_2D, CircleAveragePool2DSummaryBuilder)
+    CIRCLE_NODE(BATCH_MATMUL, CircleBatchMatMulSummaryBuilder)
+    CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceNDSummaryBuilder)
+    CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnectedSummaryBuilder)
+    CIRCLE_NODE(BCQ_GATHER, CircleBCQGatherSummaryBuilder)
+    CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, CircleBidirectionalSequenceLSTMSummaryBuilder)
+    CIRCLE_NODE(CAST, CircleCastSummaryBuilder)
+    CIRCLE_NODE(CEIL, CircleCeilSummaryBuilder)
+    CIRCLE_NODE(CONCATENATION, CircleConcatenationSummaryBuilder)
+    CIRCLE_NODE(CIRCLECONST, CircleConstSummaryBuilder)
+    CIRCLE_NODE(CONV_2D, CircleConv2DSummaryBuilder)
+    CIRCLE_NODE(COS, CircleCosSummaryBuilder)
+    CIRCLE_NODE(CUSTOM, CircleCustomSummaryBuilder)
+    CIRCLE_NODE(DENSIFY, CircleDensifySummaryBuilder)
+    CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceSummaryBuilder)
+    CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DSummaryBuilder)
+    CIRCLE_NODE(DEQUANTIZE, CircleDequantizeSummaryBuilder)
+    CIRCLE_NODE(DIV, CircleDivSummaryBuilder)
+    CIRCLE_NODE(ELU, CircleEluSummaryBuilder)
+    CIRCLE_NODE(EQUAL, CircleEqualSummaryBuilder)
+    CIRCLE_NODE(EXP, CircleExpSummaryBuilder)
+    CIRCLE_NODE(EXPAND_DIMS, CircleExpandDimsSummaryBuilder)
+    CIRCLE_NODE(FAKE_QUANT, CircleFakeQuantSummaryBuilder)
+    CIRCLE_NODE(FILL, CircleFillSummaryBuilder)
+    CIRCLE_NODE(FLOOR, CircleFloorSummaryBuilder)
+    CIRCLE_NODE(FLOOR_DIV, CircleFloorDivSummaryBuilder)
+    CIRCLE_NODE(FLOOR_MOD, CircleFloorModSummaryBuilder)
+    CIRCLE_NODE(FULLY_CONNECTED, CircleFullyConnectedSummaryBuilder)
+    CIRCLE_NODE(GATHER, CircleGatherSummaryBuilder)
+    CIRCLE_NODE(GATHER_ND, CircleGatherNdSummaryBuilder)
+    CIRCLE_NODE(GELU, CircleGeluSummaryBuilder)
+    CIRCLE_NODE(GREATER, CircleGreaterSummaryBuilder)
+    CIRCLE_NODE(GREATER_EQUAL, CircleGreaterEqualSummaryBuilder)
+    CIRCLE_NODE(HARD_SWISH, CircleHardSwishSummaryBuilder)
+    CIRCLE_NODE(IF, CircleIfSummaryBuilder)
+    CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNormSummaryBuilder)
+    CIRCLE_NODE(L2_NORMALIZATION, CircleL2NormalizeSummaryBuilder)
+    CIRCLE_NODE(L2_POOL_2D, CircleL2Pool2DSummaryBuilder)
+    CIRCLE_NODE(LEAKY_RELU, CircleLeakyReluSummaryBuilder)
+    CIRCLE_NODE(LESS, CircleLessSummaryBuilder)
+    CIRCLE_NODE(LESS_EQUAL, CircleLessEqualSummaryBuilder)
+    CIRCLE_NODE(LOCAL_RESPONSE_NORMALIZATION, CircleLocalResponseNormalizationSummaryBuilder)
+    CIRCLE_NODE(LOG, CircleLogSummaryBuilder)
+    CIRCLE_NODE(LOGICAL_AND, CircleLogicalAndSummaryBuilder)
+    CIRCLE_NODE(LOGICAL_NOT, CircleLogicalNotSummaryBuilder)
+    CIRCLE_NODE(LOGICAL_OR, CircleLogicalOrSummaryBuilder)
+    CIRCLE_NODE(LOGISTIC, CircleLogisticSummaryBuilder)
+    CIRCLE_NODE(LOG_SOFTMAX, CircleLogSoftmaxSummaryBuilder)
+    CIRCLE_NODE(MATRIX_DIAG, CircleMatrixDiagSummaryBuilder)
+    CIRCLE_NODE(MATRIX_SET_DIAG, CircleMatrixSetDiagSummaryBuilder)
+    CIRCLE_NODE(MAXIMUM, CircleMaximumSummaryBuilder)
+    CIRCLE_NODE(MAX_POOL_2D, CircleMaxPool2DSummaryBuilder)
+    CIRCLE_NODE(MEAN, CircleMeanSummaryBuilder)
+    CIRCLE_NODE(MINIMUM, CircleMinimumSummaryBuilder)
+    CIRCLE_NODE(MIRROR_PAD, CircleMirrorPadSummaryBuilder)
+    CIRCLE_NODE(MUL, CircleMulSummaryBuilder)
+    CIRCLE_NODE(NEG, CircleNegSummaryBuilder)
+    CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4SummaryBuilder)
+    CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5SummaryBuilder)
+    CIRCLE_NODE(NOT_EQUAL, CircleNotEqualSummaryBuilder)
+    CIRCLE_NODE(ONE_HOT, CircleOneHotSummaryBuilder)
+    CIRCLE_NODE(PACK, CirclePackSummaryBuilder)
+    CIRCLE_NODE(PAD, CirclePadSummaryBuilder)
+    CIRCLE_NODE(PADV2, CirclePadV2SummaryBuilder)
+    CIRCLE_NODE(POW, CirclePowSummaryBuilder)
+    CIRCLE_NODE(PRELU, CirclePReluSummaryBuilder)
+    CIRCLE_NODE(QUANTIZE, CircleQuantizeSummaryBuilder)
+    CIRCLE_NODE(RANGE, CircleRangeSummaryBuilder)
+    CIRCLE_NODE(RANK, CircleRankSummaryBuilder)
+    CIRCLE_NODE(REDUCE_ANY, CircleReduceAnySummaryBuilder)
+    CIRCLE_NODE(REDUCE_MAX, CircleReduceMaxSummaryBuilder)
+    CIRCLE_NODE(REDUCE_MIN, CircleReduceMinSummaryBuilder)
+    CIRCLE_NODE(REDUCE_PROD, CircleReduceProdSummaryBuilder)
+    CIRCLE_NODE(RELU, CircleReluSummaryBuilder)
+    CIRCLE_NODE(RELU6, CircleRelu6SummaryBuilder)
+    CIRCLE_NODE(RELU_N1_TO_1, CircleReluN1To1SummaryBuilder)
+    CIRCLE_NODE(RESHAPE, CircleReshapeSummaryBuilder)
+    CIRCLE_NODE(RESIZE_BILINEAR, CircleResizeBilinearSummaryBuilder)
+    CIRCLE_NODE(RESIZE_NEAREST_NEIGHBOR, CircleResizeNearestNeighborSummaryBuilder)
+    CIRCLE_NODE(REVERSE_SEQUENCE, CircleReverseSequenceSummaryBuilder)
+    CIRCLE_NODE(REVERSE_V2, CircleReverseV2SummaryBuilder)
+    CIRCLE_NODE(ROUND, CircleRoundSummaryBuilder)
+    CIRCLE_NODE(RSQRT, CircleRsqrtSummaryBuilder)
+    CIRCLE_NODE(SCATTER_ND, CircleScatterNdSummaryBuilder)
+    CIRCLE_NODE(SEGMENT_SUM, CircleSegmentSumSummaryBuilder)
+    CIRCLE_NODE(SELECT, CircleSelectSummaryBuilder)
+    CIRCLE_NODE(SELECT_V2, CircleSelectV2SummaryBuilder)
+    CIRCLE_NODE(SHAPE, CircleShapeSummaryBuilder)
+    CIRCLE_NODE(SIN, CircleSinSummaryBuilder)
+    CIRCLE_NODE(SLICE, CircleSliceSummaryBuilder)
+    CIRCLE_NODE(SOFTMAX, CircleSoftmaxSummaryBuilder)
+    CIRCLE_NODE(SPACE_TO_BATCH_ND, CircleSpaceToBatchNDSummaryBuilder)
+    CIRCLE_NODE(SPACE_TO_DEPTH, CircleSpaceToDepthSummaryBuilder)
+    CIRCLE_NODE(SPARSE_TO_DENSE, CircleSparseToDenseSummaryBuilder)
+    CIRCLE_NODE(SPLIT, CircleSplitSummaryBuilder)
+    CIRCLE_NODE(SPLIT_V, CircleSplitVSummaryBuilder)
+    CIRCLE_NODE(SQRT, CircleSqrtSummaryBuilder)
+    CIRCLE_NODE(SQUARE, CircleSquareSummaryBuilder)
+    CIRCLE_NODE(SQUARED_DIFFERENCE, CircleSquaredDifferenceSummaryBuilder)
+    CIRCLE_NODE(SQUEEZE, CircleSqueezeSummaryBuilder)
+    CIRCLE_NODE(STRIDED_SLICE, CircleStridedSliceSummaryBuilder)
+    CIRCLE_NODE(SUB, CircleSubSummaryBuilder)
+    CIRCLE_NODE(SUM, CircleSumSummaryBuilder)
+    CIRCLE_NODE(SVDF, CircleSVDFSummaryBuilder)
+    CIRCLE_NODE(TANH, CircleTanhSummaryBuilder)
+    CIRCLE_NODE(TILE, CircleTileSummaryBuilder)
+    CIRCLE_NODE(TOPK_V2, CircleTopKV2SummaryBuilder)
+    CIRCLE_NODE(TRANSPOSE, CircleTransposeSummaryBuilder)
+    CIRCLE_NODE(TRANSPOSE_CONV, CircleTransposeConvSummaryBuilder)
+    CIRCLE_NODE(UNIDIRECTIONAL_SEQUENCE_LSTM, CircleUnidirectionalSequenceLSTMSummaryBuilder)
+    CIRCLE_NODE(UNIQUE, CircleUniqueSummaryBuilder)
+    CIRCLE_NODE(UNPACK, CircleUnpackSummaryBuilder)
+    CIRCLE_NODE(WHERE, CircleWhereSummaryBuilder)
+    CIRCLE_NODE(WHILE, CircleWhileSummaryBuilder)
+    CIRCLE_NODE(ZEROS_LIKE, CircleZerosLikeSummaryBuilder)
+
+    CIRCLE_NODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT,
+                CircleBidirectionalSequenceLSTMOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLECUSTOMOUT, CircleCustomOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEIFOUT, CircleIfOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEINPUT, CircleInputSummaryBuilder)
+    CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4OutSummaryBuilder)
+    CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, CircleNonMaxSuppressionV5OutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEOUTPUT, CircleOutputSummaryBuilder)
+    CIRCLE_NODE(CIRCLEOUTPUTDUMMY, CircleOutputDummySummaryBuilder)
+    CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExcludeSummaryBuilder)
+    CIRCLE_NODE(CIRCLESPLITOUT, CircleSplitOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLESPLITVOUT, CircleSplitVOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLETOPKV2OUT, CircleTopKV2OutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEUNIQUEOUT, CircleUniqueOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEUNPACKOUT, CircleUnpackOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEVARIABLE, CircleVariableSummaryBuilder)
+    CIRCLE_NODE(CIRCLEWHILEOUT, CircleWhileOutSummaryBuilder)
+
+    default:
+      return nullptr;
+
+#undef CIRCLE_NODE
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.h b/compiler/luci/logex/src/CircleNodeSummaryBuilder.h
new file mode 100644
index 000000000..e21d77310
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
+#define __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
+
+#include <luci/IR/CircleNode.h>
+#include <locop/NodeSummary.h>
+#include <locop/SymbolTable.h>
+
+#include <memory>
+#include <sstream>
+#include <vector>
+
+namespace luci
+{
+
+class CircleNodeSummaryBuilder
+{
+public:
+  bool build(const loco::Node *node, const locop::SymbolTable *tbl, locop::NodeSummary &s);
+
+private:
+  /**
+   * @brief Template methods for building node summary.
+   *        Default behavior is building a node which has no input.
+   */
+  virtual bool validate(const luci::CircleNode *node);
+  virtual std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  virtual void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+  virtual void update_status(locop::NodeSummary &s);
+
+private:
+  std::unique_ptr<CircleNodeSummaryBuilder> create_builder(const luci::CircleNode *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp
new file mode 100644
index 000000000..ae76badc6
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilder.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <locop/NodeSummary.h>
+#include <locop/SymbolTable.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class MockSymbolTable : public locop::SymbolTable
+{
+  std::string lookup(const loco::Node *) const override
+  {
+    return "Do nothing because it is mocking Symbol Table!";
+  }
+};
+
+class CircleNodeSummaryBuilderTest : public ::testing::Test
+{
+protected:
+  bool mock_build(const loco::Node *node)
+  {
+    return luci::CircleNodeSummaryBuilder().build(node, &_tbl, _s);
+  }
+
+protected:
+  MockSymbolTable _tbl;
+  locop::NodeSummary _s;
+};
+
+} // namespace
+
+TEST_F(CircleNodeSummaryBuilderTest, Add_validate)
+{
+  luci::CircleAdd node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Add_validate_fused_NEG)
+{
+  luci::CircleAdd node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate)
+{
+  luci::CircleAveragePool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate_fused_NEG)
+{
+  luci::CircleAveragePool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate_padding_NEG)
+{
+  luci::CircleAveragePool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, BCQFullyConnected_validate)
+{
+  luci::CircleBCQFullyConnected node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, BCQFullyConnected_validate_fused_NEG)
+{
+  luci::CircleBCQFullyConnected node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Concatenation_validate)
+{
+  luci::CircleConcatenation node(2);
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Concatenation_validate_fused_NEG)
+{
+  luci::CircleConcatenation node(2);
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate)
+{
+  luci::CircleConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate_fused_NEG)
+{
+  luci::CircleConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate_padding_NEG)
+{
+  luci::CircleConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate)
+{
+  luci::CircleDepthwiseConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate_fused_NEG)
+{
+  luci::CircleDepthwiseConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate_padding_NEG)
+{
+  luci::CircleDepthwiseConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, FullyConnected_validate)
+{
+  luci::CircleFullyConnected node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, FullyConnected_validate_fused_NEG)
+{
+  luci::CircleFullyConnected node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, InstanceNorm_validate)
+{
+  luci::CircleInstanceNorm node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, InstanceNorm_validate_fused_NEG)
+{
+  luci::CircleInstanceNorm node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Normalize_validate)
+{
+  luci::CircleL2Normalize node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Normalize_validate_fused_NEG)
+{
+  luci::CircleL2Normalize node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate)
+{
+  luci::CircleL2Pool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate_fused_NEG)
+{
+  luci::CircleL2Pool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate_padding_NEG)
+{
+  luci::CircleL2Pool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate)
+{
+  luci::CircleMaxPool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate_fused_NEG)
+{
+  luci::CircleMaxPool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate_padding_NEG)
+{
+  luci::CircleMaxPool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MirrorPad_validate)
+{
+  luci::CircleMirrorPad node;
+  node.mode(luci::MirrorPadMode::REFLECT);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MirrorPad_validate_mirror_padding_NEG)
+{
+  luci::CircleMirrorPad node;
+  node.mode(luci::MirrorPadMode::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Mul_validate)
+{
+  luci::CircleMul node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Mul_validate_fused_NEG)
+{
+  luci::CircleMul node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, SVDF_validate)
+{
+  luci::CircleSVDF node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, SVDF_validate_fused_NEG)
+{
+  luci::CircleSVDF node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate)
+{
+  luci::CircleTransposeConv node;
+  node.padding(luci::Padding::SAME);
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate_padding_NEG)
+{
+  luci::CircleTransposeConv node;
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate_fused_NEG)
+{
+  luci::CircleTransposeConv node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
new file mode 100644
index 000000000..aba6a8681
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
@@ -0,0 +1,1157 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilders.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <loco/IR/Node.h>
+
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::string to_str(loco::DataType type)
+{
+  switch (type)
+  {
+    case loco::DataType::U8:
+      return "UINT8";
+    case loco::DataType::U16:
+      return "UINT16";
+    case loco::DataType::U32:
+      return "UINT32";
+    case loco::DataType::U64:
+      return "UINT64";
+
+    case loco::DataType::S8:
+      return "INT8";
+    case loco::DataType::S16:
+      return "INT16";
+    case loco::DataType::S32:
+      return "INT32";
+    case loco::DataType::S64:
+      return "INT64";
+
+    case loco::DataType::FLOAT16:
+      return "FLOAT16";
+    case loco::DataType::FLOAT32:
+      return "FLOAT32";
+    case loco::DataType::FLOAT64:
+      return "FLOAT64";
+
+    case loco::DataType::BOOL:
+      return "BOOL";
+
+    default:
+      return "Error";
+  }
+}
+
+std::string to_str(float value) { return std::to_string(value); }
+
+std::string to_str(int32_t value) { return std::to_string(value); }
+
+std::string to_str(bool value) { return value ? "true" : "false"; }
+
+std::string to_str(luci::FusedActFunc fused)
+{
+  switch (fused)
+  {
+    case luci::FusedActFunc::NONE:
+      return "NONE";
+    case luci::FusedActFunc::RELU:
+      return "RELU";
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      return "RELU_N1_TO_1";
+    case luci::FusedActFunc::RELU6:
+      return "RELU6";
+    case luci::FusedActFunc::TANH:
+      return "TANH";
+    case luci::FusedActFunc::SIGN_BIT:
+      return "SIGN_BIT";
+    default:
+      return "Error";
+  }
+}
+
+std::string to_str(luci::Padding padding)
+{
+  switch (padding)
+  {
+    case luci::Padding::SAME:
+      return "SAME";
+    case luci::Padding::VALID:
+      return "VALID";
+    default:
+      return "Error";
+  }
+}
+
+std::string to_str(const luci::Stride *stride)
+{
+  return std::to_string(stride->h()) + "," + std::to_string(stride->w());
+}
+
+std::string to_str(const luci::Filter *filter)
+{
+  return std::to_string(filter->h()) + "," + std::to_string(filter->w());
+}
+
+std::string to_str(luci::MirrorPadMode mode)
+{
+  switch (mode)
+  {
+    case luci::MirrorPadMode::REFLECT:
+      return "REFLECT";
+    case luci::MirrorPadMode::SYMMETRIC:
+      return "SYMMETRIC";
+    default:
+      return "Error";
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+std::vector<std::string> CircleNodeWithXSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"x"};
+}
+
+std::vector<std::string>
+CircleNodeWithINPUTSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input"};
+}
+
+std::vector<std::string> CircleNodeWithXYSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"x", "y"};
+}
+
+std::vector<std::string>
+CircleNodeWithFEATURESSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"features"};
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool CircleAddSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto add = loco::must_cast<const luci::CircleAdd *>(node);
+  if (add->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+void CircleAddSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto add = loco::must_cast<const luci::CircleAdd *>(node);
+  s.args().append("fused_activation_function", to_str(add->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleAddNSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  return std::vector<std::string>(node->arity(), "inputs");
+}
+
+std::vector<std::string> CircleArgMaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "dimension"};
+}
+
+void CircleArgMaxSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto argmax = loco::must_cast<const luci::CircleArgMax *>(node);
+  s.args().append("output_type", to_str(argmax->output_type()));
+}
+
+std::vector<std::string> CircleArgMinSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "dimension"};
+}
+
+void CircleArgMinSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto argmin = loco::must_cast<const luci::CircleArgMin *>(node);
+  s.args().append("output_type", to_str(argmin->output_type()));
+}
+
+bool CircleAveragePool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto avgpool = loco::must_cast<const luci::CircleAveragePool2D *>(node);
+  if (avgpool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (avgpool->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleAveragePool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"value"};
+}
+
+void CircleAveragePool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                         locop::NodeSummary &s)
+{
+  auto avgpool = loco::must_cast<const luci::CircleAveragePool2D *>(node);
+  s.args().append("filter(h,w)", to_str(avgpool->filter()));
+  s.args().append("stride(h,w)", to_str(avgpool->stride()));
+  s.args().append("padding", to_str(avgpool->padding()));
+  s.args().append("fused_activation_function", to_str(avgpool->fusedActivationFunction()));
+}
+
+void CircleBatchMatMulSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                       locop::NodeSummary &s)
+{
+  auto batchmatmul = loco::must_cast<const luci::CircleBatchMatMul *>(node);
+  s.args().append("adj_x", to_str(batchmatmul->adj_x()));
+  s.args().append("adj_y", to_str(batchmatmul->adj_y()));
+}
+
+std::vector<std::string>
+CircleBatchToSpaceNDSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "block_shape", "crops"};
+}
+
+bool CircleBCQFullyConnectedSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto bcq_fc = loco::must_cast<const luci::CircleBCQFullyConnected *>(node);
+  if (bcq_fc->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleBCQFullyConnectedSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "weights_scales", "weights_binary", "bias", "weights_clusters"};
+}
+
+void CircleBCQFullyConnectedSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                             locop::NodeSummary &s)
+{
+  auto bcq_fc = loco::must_cast<const luci::CircleBCQFullyConnected *>(node);
+  s.args().append("fused_activation_function", to_str(bcq_fc->fusedActivationFunction()));
+  s.args().append("weights_hidden_size", std::to_string(bcq_fc->weights_hidden_size()));
+}
+
+std::vector<std::string> CircleBCQGatherSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input_scales", "input_binary", "indices", "input_clusters"};
+}
+
+void CircleBCQGatherSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto bcq_gather = loco::must_cast<const luci::CircleBCQGather *>(node);
+  s.args().append("axis", std::to_string(bcq_gather->axis()));
+  s.args().append("input_hidden_size", std::to_string(bcq_gather->input_hidden_size()));
+}
+
+std::vector<std::string>
+CircleBidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input",
+          "fw_input_to_input_weights",
+          "fw_input_to_forget_weights",
+          "fw_input_to_cell_weights",
+          "fw_input_to_output_weights",
+          "fw_recurrent_to_input_weights",
+          "fw_recurrent_to_forget_weights",
+          "fw_recurrent_to_cell_weights",
+          "fw_recurrent_to_output_weights",
+          "fw_cell_to_input_weights",
+          "fw_cell_to_forget_weights",
+          "fw_cell_to_output_weights",
+          "fw_input_gate_bias",
+          "fw_forget_gate_bias",
+          "fw_cell_gate_bias",
+          "fw_output_gate_bias",
+          "fw_projection_weights",
+          "fw_projection_bias",
+          "bw_input_to_input_weights",
+          "bw_input_to_forget_weights",
+          "bw_input_to_cell_weights",
+          "bw_input_to_output_weights",
+          "bw_recurrent_to_input_weights",
+          "bw_recurrent_to_forget_weights",
+          "bw_recurrent_to_cell_weights",
+          "bw_recurrent_to_output_weights",
+          "bw_cell_to_input_weights",
+          "bw_cell_to_forget_weights",
+          "bw_cell_to_output_weights",
+          "bw_input_gate_bias",
+          "bw_forget_gate_bias",
+          "bw_cell_gate_bias",
+          "bw_output_gate_bias",
+          "bw_projection_weights",
+          "bw_projection_bias",
+          "fw_activation_state",
+          "fw_cell_state",
+          "bw_activation_state",
+          "bw_cell_state",
+          "auxillary_input",
+          "fw_auxillary_input_to_input_weights",
+          "fw_auxillary_input_to_forget_weights",
+          "fw_auxillary_input_to_cell_weights",
+          "fw_auxillary_input_to_output_weights",
+          "bw_auxillary_input_to_input_weights",
+          "bw_auxillary_input_to_forget_weights",
+          "bw_auxillary_input_to_cell_weights",
+          "bw_auxillary_input_to_output_weights"};
+}
+
+void CircleBidirectionalSequenceLSTMSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                                     locop::NodeSummary &s)
+{
+  auto lstm = loco::must_cast<const luci::CircleBidirectionalSequenceLSTM *>(node);
+  s.args().append("cell_clip", to_str(lstm->cell_clip()));
+  s.args().append("proj_clip", to_str(lstm->proj_clip()));
+  s.args().append("merge_outputs", to_str(lstm->merge_outputs()));
+  s.args().append("time_major", to_str(lstm->time_major()));
+  s.args().append("asymmetric_quantize_inputs", to_str(lstm->asymmetric_quantize_inputs()));
+}
+
+std::vector<std::string> CircleCastSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"x"};
+}
+
+void CircleCastSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto cast = loco::must_cast<const luci::CircleCast *>(node);
+  s.args().append("in_data_type", to_str(cast->in_data_type()));
+  s.args().append("out_data_type", to_str(cast->out_data_type()));
+}
+
+bool CircleConcatenationSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto concat = loco::must_cast<const luci::CircleConcatenation *>(node);
+  if (concat->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleConcatenationSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  return std::vector<std::string>(node->arity(), "values");
+}
+
+void CircleConcatenationSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                         locop::NodeSummary &s)
+{
+  auto concat = loco::must_cast<const luci::CircleConcatenation *>(node);
+  s.args().append("axis", std::to_string(concat->axis()));
+  s.args().append("fused_activation_function", to_str(concat->fusedActivationFunction()));
+}
+
+void CircleConstSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                 locop::NodeSummary &s)
+{
+  auto circonst = loco::must_cast<const luci::CircleConst *>(node);
+  s.args().append("dtype", to_str(circonst->dtype()));
+  s.args().append("rank", std::to_string(circonst->rank()));
+  std::string shape;
+  for (uint32_t r = 0; r < circonst->rank(); ++r)
+  {
+    if (!shape.empty())
+      shape += " ";
+    shape += std::to_string(circonst->dim(r).value());
+  }
+  s.args().append("shape", "[" + shape + "]");
+}
+
+void CircleConstSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+  s.state(locop::NodeDesc::State::PartiallyKnown);
+}
+
+bool CircleConv2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto conv2d = loco::must_cast<const luci::CircleConv2D *>(node);
+  if (conv2d->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (conv2d->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleConv2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "filter", "bias"};
+}
+
+void CircleConv2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto conv2d = loco::must_cast<const luci::CircleConv2D *>(node);
+  s.args().append("stride(h,w)", to_str(conv2d->stride()));
+  s.args().append("dilation(h,w)", to_str(conv2d->dilation()));
+  s.args().append("padding", to_str(conv2d->padding()));
+  s.args().append("fused_activation_function", to_str(conv2d->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleCustomSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  auto input_names = std::vector<std::string>();
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    input_names.push_back("input" + std::to_string(i));
+  return input_names;
+}
+
+void CircleCustomSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto custom = loco::must_cast<const luci::CircleCustom *>(node);
+  s.args().append("custom_code", custom->custom_code());
+}
+
+void CircleDepthToSpaceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                        locop::NodeSummary &s)
+{
+  auto depth_to_space = loco::must_cast<const luci::CircleDepthToSpace *>(node);
+  s.args().append("block_size", std::to_string(depth_to_space->block_size()));
+}
+
+bool CircleDepthwiseConv2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto dw_conv2d = loco::must_cast<const luci::CircleDepthwiseConv2D *>(node);
+  if (dw_conv2d->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (dw_conv2d->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleDepthwiseConv2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "filter", "bias"};
+}
+
+void CircleDepthwiseConv2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                           locop::NodeSummary &s)
+{
+  auto dw_conv2d = loco::must_cast<const luci::CircleDepthwiseConv2D *>(node);
+  s.args().append("stride(h,w)", to_str(dw_conv2d->stride()));
+  s.args().append("dilation(h,w)", to_str(dw_conv2d->dilation()));
+  s.args().append("padding", to_str(dw_conv2d->padding()));
+  s.args().append("depthMultiplier", std::to_string(dw_conv2d->depthMultiplier()));
+  s.args().append("fused_activation_function", to_str(dw_conv2d->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleExpandDimsSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "axis"};
+}
+
+std::vector<std::string> CircleFakeQuantSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"inputs"};
+}
+
+void CircleFakeQuantSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto fake_quant = loco::must_cast<const luci::CircleFakeQuant *>(node);
+  s.args().append("min", std::to_string(fake_quant->min()));
+  s.args().append("max", std::to_string(fake_quant->max()));
+  s.args().append("num_bits", std::to_string(fake_quant->num_bits()));
+  s.args().append("narrow_range", to_str(fake_quant->narrow_range()));
+}
+
+std::vector<std::string> CircleFillSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"dims", "value"};
+}
+
+bool CircleFullyConnectedSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto fc = loco::must_cast<const luci::CircleFullyConnected *>(node);
+  if (fc->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleFullyConnectedSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "weights", "bias"};
+}
+
+void CircleFullyConnectedSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                          locop::NodeSummary &s)
+{
+  auto fc = loco::must_cast<const luci::CircleFullyConnected *>(node);
+  s.args().append("fused_activation_function", to_str(fc->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleGatherSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"params", "indices"};
+}
+
+void CircleGatherSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto gather = loco::must_cast<const luci::CircleGather *>(node);
+  s.args().append("axis", std::to_string(gather->axis()));
+}
+
+std::vector<std::string> CircleGatherNdSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"params", "indices"};
+}
+
+void CircleGeluSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto gelu = loco::must_cast<const luci::CircleGelu *>(node);
+  s.args().append("approximate", to_str(gelu->approximate()));
+}
+
+std::vector<std::string> CircleIfSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  auto circle_if = loco::must_cast<const luci::CircleIf *>(node);
+
+  auto input_names = std::vector<std::string>();
+  input_names.push_back("cond");
+  for (uint32_t i = 0; i < circle_if->input_count(); ++i)
+    input_names.push_back("input");
+
+  return input_names;
+}
+
+void CircleIfSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto circle_if = loco::must_cast<const luci::CircleIf *>(node);
+
+  if (circle_if->then_graph() != nullptr)
+    s.args().append("then_graph", circle_if->then_graph()->name());
+  else
+    s.args().append("then_branch", std::to_string(circle_if->then_branch()));
+
+  if (circle_if->else_graph() != nullptr)
+    s.args().append("else_graph", circle_if->else_graph()->name());
+  else
+    s.args().append("else_branch", std::to_string(circle_if->else_branch()));
+}
+
+bool CircleInstanceNormSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto instnorm = loco::must_cast<const luci::CircleInstanceNorm *>(node);
+  if (instnorm->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleInstanceNormSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "gamma", "beta"};
+}
+
+void CircleInstanceNormSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                        locop::NodeSummary &s)
+{
+  auto instnorm = loco::must_cast<const luci::CircleInstanceNorm *>(node);
+  s.args().append("epsilon", std::to_string(instnorm->epsilon()));
+  s.args().append("fused_activation_function", to_str(instnorm->fusedActivationFunction()));
+}
+
+bool CircleL2NormalizeSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto l2norm = loco::must_cast<const luci::CircleL2Normalize *>(node);
+  if (l2norm->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleL2NormalizeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"x"};
+}
+
+void CircleL2NormalizeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                       locop::NodeSummary &s)
+{
+  auto l2norm = loco::must_cast<const luci::CircleL2Normalize *>(node);
+  s.args().append("fused_activation_function", to_str(l2norm->fusedActivationFunction()));
+}
+
+bool CircleL2Pool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto l2pool = loco::must_cast<const luci::CircleL2Pool2D *>(node);
+  if (l2pool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (l2pool->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleL2Pool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"value"};
+}
+
+void CircleL2Pool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                    locop::NodeSummary &s)
+{
+  auto l2pool = loco::must_cast<const luci::CircleL2Pool2D *>(node);
+  s.args().append("filter(h,w)", to_str(l2pool->filter()));
+  s.args().append("stride(h,w)", to_str(l2pool->stride()));
+  s.args().append("padding", to_str(l2pool->padding()));
+  s.args().append("fused_activation_function", to_str(l2pool->fusedActivationFunction()));
+}
+
+void CircleLeakyReluSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto leaky_relu = loco::must_cast<const luci::CircleLeakyRelu *>(node);
+  s.args().append("alpha", std::to_string(leaky_relu->alpha()));
+}
+
+void CircleLocalResponseNormalizationSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                                      locop::NodeSummary &s)
+{
+  auto lrn = loco::must_cast<const luci::CircleLocalResponseNormalization *>(node);
+  s.args().append("radius", std::to_string(lrn->radius()));
+  s.args().append("bias", std::to_string(lrn->bias()));
+  s.args().append("alpha", std::to_string(lrn->alpha()));
+  s.args().append("beta", std::to_string(lrn->beta()));
+}
+
+std::vector<std::string> CircleLogSoftmaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"logits"};
+}
+
+std::vector<std::string> CircleMatrixDiagSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"diagonal"};
+}
+
+std::vector<std::string>
+CircleMatrixSetDiagSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "diagonal"};
+}
+
+bool CircleMaxPool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto maxpool = loco::must_cast<const luci::CircleMaxPool2D *>(node);
+  if (maxpool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (maxpool->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleMaxPool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"value"};
+}
+
+void CircleMaxPool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto maxpool = loco::must_cast<const luci::CircleMaxPool2D *>(node);
+  s.args().append("filter(h,w)", to_str(maxpool->filter()));
+  s.args().append("stride(h,w)", to_str(maxpool->stride()));
+  s.args().append("padding", to_str(maxpool->padding()));
+  s.args().append("fused_activation_function", to_str(maxpool->fusedActivationFunction()));
+}
+
+bool CircleMirrorPadSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto mirror_pad = loco::must_cast<const luci::CircleMirrorPad *>(node);
+  if (mirror_pad->mode() == luci::MirrorPadMode::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleMirrorPadSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "paddings"};
+}
+
+void CircleMirrorPadSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto mirror_pad = loco::must_cast<const luci::CircleMirrorPad *>(node);
+  s.args().append("mode", to_str(mirror_pad->mode()));
+}
+
+bool CircleMulSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto mul = loco::must_cast<const luci::CircleMul *>(node);
+  if (mul->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+void CircleMulSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto mul = loco::must_cast<const luci::CircleMul *>(node);
+  s.args().append("fused_activation_function", to_str(mul->fusedActivationFunction()));
+}
+
+std::vector<std::string>
+CircleNonMaxSuppressionV4SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"boxes", "scores", "max_output_size", "iou_threshold", "score_threshold"};
+}
+
+std::vector<std::string>
+CircleNonMaxSuppressionV5SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"boxes",         "scores",          "max_output_size",
+          "iou_threshold", "score_threshold", "soft_nms_sigma"};
+}
+
+std::vector<std::string> CircleOneHotSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"indices", "depth", "on_value", "off_value"};
+}
+
+void CircleOneHotSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto onehot = loco::must_cast<const luci::CircleOneHot *>(node);
+  s.args().append("axis", std::to_string(onehot->axis()));
+}
+
+std::vector<std::string> CirclePackSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  return std::vector<std::string>(node->arity(), "values");
+}
+
+void CirclePackSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto pack = loco::must_cast<const luci::CirclePack *>(node);
+  s.args().append("values_count", std::to_string(pack->values_count()));
+  s.args().append("axis", std::to_string(pack->axis()));
+}
+
+std::vector<std::string> CirclePadSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "paddings"};
+}
+
+std::vector<std::string> CirclePadV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "paddings", "constant_values"};
+}
+
+std::vector<std::string> CirclePReluSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "alpha"};
+}
+
+std::vector<std::string> CircleRangeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"start", "limit", "delta"};
+}
+
+std::vector<std::string> CircleReshapeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"tensor", "shape"};
+}
+
+void CircleReshapeSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+  s.state(locop::NodeDesc::State::PartiallyKnown);
+}
+
+std::vector<std::string>
+CircleResizeBilinearSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "size"};
+}
+
+void CircleResizeBilinearSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                          locop::NodeSummary &s)
+{
+  auto resize_bilinear = loco::must_cast<const luci::CircleResizeBilinear *>(node);
+  s.args().append("align_corners", to_str(resize_bilinear->align_corners()));
+  s.args().append("half_pixel_centers", to_str(resize_bilinear->half_pixel_centers()));
+}
+
+std::vector<std::string>
+CircleResizeNearestNeighborSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "size"};
+}
+
+void CircleResizeNearestNeighborSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                                 locop::NodeSummary &s)
+{
+  auto resize_nn = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(node);
+  s.args().append("align_corners", to_str(resize_nn->align_corners()));
+}
+
+std::vector<std::string>
+CircleReverseSequenceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "seq_lengths"};
+}
+
+void CircleReverseSequenceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                           locop::NodeSummary &s)
+{
+  auto reverse_seq = loco::must_cast<const luci::CircleReverseSequence *>(node);
+  s.args().append("seq_axis", std::to_string(reverse_seq->seq_axis()));
+  s.args().append("batch_axis", std::to_string(reverse_seq->batch_axis()));
+}
+
+std::vector<std::string> CircleReverseV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"tensor", "axis"};
+}
+
+std::vector<std::string> CircleScatterNdSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"indices", "updates", "shape"};
+}
+
+std::vector<std::string> CircleSegmentSumSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "segment_ids"};
+}
+
+std::vector<std::string> CircleSelectSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"condition", "t", "e"};
+}
+
+std::vector<std::string> CircleSelectV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"condition", "t", "e"};
+}
+
+void CircleShapeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                 locop::NodeSummary &s)
+{
+  auto shape = loco::must_cast<const luci::CircleShape *>(node);
+  s.args().append("out_type", to_str(shape->out_type()));
+}
+
+std::vector<std::string> CircleSliceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "begin", "size"};
+}
+
+std::vector<std::string> CircleSoftmaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"logits"};
+}
+
+void CircleSoftmaxSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                   locop::NodeSummary &s)
+{
+  auto softmax = loco::must_cast<const luci::CircleSoftmax *>(node);
+  s.args().append("beta", to_str(softmax->beta()));
+}
+
+std::vector<std::string>
+CircleSpaceToBatchNDSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "block_shape", "paddings"};
+}
+
+void CircleSpaceToDepthSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                        locop::NodeSummary &s)
+{
+  auto space_to_depth = loco::must_cast<const luci::CircleSpaceToDepth *>(node);
+  s.args().append("block_size", to_str(space_to_depth->block_size()));
+}
+
+std::vector<std::string>
+CircleSparseToDenseSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"indices", "output_shape", "values", "default_value"};
+}
+
+void CircleSparseToDenseSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                         locop::NodeSummary &s)
+{
+  auto sparse_to_dense = loco::must_cast<const luci::CircleSparseToDense *>(node);
+  s.args().append("validate_indices", to_str(sparse_to_dense->validate_indices()));
+}
+
+std::vector<std::string> CircleSplitSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"split_dim", "input"};
+}
+
+void CircleSplitSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                 locop::NodeSummary &s)
+{
+  auto split = loco::must_cast<const luci::CircleSplit *>(node);
+  s.args().append("num_split", std::to_string(split->num_split()));
+}
+
+std::vector<std::string> CircleSplitVSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "size_splits", "split_dim"};
+}
+
+void CircleSplitVSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto split_v = loco::must_cast<const luci::CircleSplitV *>(node);
+  s.args().append("num_split", std::to_string(split_v->num_split()));
+}
+
+void CircleSqueezeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                   locop::NodeSummary &s)
+{
+  auto squeeze = loco::must_cast<const luci::CircleSqueeze *>(node);
+
+  std::string squeeze_dims = "(";
+  for (size_t i = 0; i < squeeze->squeeze_dims().size(); ++i)
+  {
+    if (i != 0)
+      squeeze_dims += ", ";
+    squeeze_dims += std::to_string(squeeze->squeeze_dims().at(i));
+  }
+  squeeze_dims += ")";
+
+  s.args().append("squeeze_dims", squeeze_dims);
+}
+
+std::vector<std::string> CircleStridedSliceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "begin", "end", "strides"};
+}
+
+void CircleStridedSliceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                        locop::NodeSummary &s)
+{
+  auto strided_slice = loco::must_cast<const luci::CircleStridedSlice *>(node);
+  s.args().append("begin_mask", std::to_string(strided_slice->begin_mask()));
+  s.args().append("end_mask", std::to_string(strided_slice->end_mask()));
+  s.args().append("ellipsis_mask", std::to_string(strided_slice->ellipsis_mask()));
+  s.args().append("new_axis_mask", std::to_string(strided_slice->new_axis_mask()));
+  s.args().append("shrink_axis_mask", std::to_string(strided_slice->shrink_axis_mask()));
+}
+
+bool CircleSVDFSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto svdf = loco::must_cast<const luci::CircleSVDF *>(node);
+  if (svdf->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleSVDFSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "weight_feature", "weight_time", "bias", "State"};
+}
+
+void CircleSVDFSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto svdf = loco::must_cast<const luci::CircleSVDF *>(node);
+  s.args().append("rank", to_str(svdf->svdf_rank()));
+  s.args().append("asymmetric_quantize_inputs", to_str(svdf->asymmetric_quantize_inputs()));
+  s.args().append("fused_activation_function", to_str(svdf->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleTileSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "multiples"};
+}
+
+std::vector<std::string> CircleTopKV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "k"};
+}
+
+std::vector<std::string> CircleTransposeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"a", "perm"};
+}
+
+bool CircleTransposeConvSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto transpose_conv = loco::must_cast<const luci::CircleTransposeConv *>(node);
+  if (transpose_conv->padding() == luci::Padding::UNDEFINED)
+    return false;
+  if (transpose_conv->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleTransposeConvSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"inputSizes", "filter", "outBackProp", "bias"};
+}
+
+void CircleTransposeConvSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                         locop::NodeSummary &s)
+{
+  auto transpose_conv = loco::must_cast<const luci::CircleTransposeConv *>(node);
+  s.args().append("stride(h,w)", to_str(transpose_conv->stride()));
+  s.args().append("padding", to_str(transpose_conv->padding()));
+  s.args().append("fused_activation_function", to_str(transpose_conv->fusedActivationFunction()));
+}
+
+std::vector<std::string>
+CircleUnidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input",
+          "input_to_input_weights",
+          "input_to_forget_weights",
+          "input_to_cell_weights",
+          "input_to_output_weights",
+          "recurrent_to_input_weights",
+          "recurrent_to_forget_weights",
+          "recurrent_to_cell_weights",
+          "recurrent_to_output_weights",
+          "cell_to_input_weights",
+          "cell_to_forget_weights",
+          "cell_to_output_weights",
+          "input_gate_bias",
+          "forget_gate_bias",
+          "cell_gate_bias",
+          "output_gate_bias",
+          "projection_weights",
+          "projection_bias",
+          "output_state",
+          "cell_state",
+          "input_layer_norm_coefficients",
+          "forget_layer_norm_coefficients",
+          "cell_layer_norm_coefficients",
+          "output_layer_norm_coefficients"};
+}
+
+void CircleUnidirectionalSequenceLSTMSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                                      locop::NodeSummary &s)
+{
+  auto lstm = loco::must_cast<const luci::CircleUnidirectionalSequenceLSTM *>(node);
+  s.args().append("cell_clip", to_str(lstm->cell_clip()));
+  s.args().append("proj_clip", to_str(lstm->proj_clip()));
+  s.args().append("time_major", to_str(lstm->time_major()));
+  s.args().append("asymmetric_quantize_inputs", to_str(lstm->asymmetric_quantize_inputs()));
+}
+
+void CircleUniqueSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto unique = loco::must_cast<const luci::CircleUnique *>(node);
+  s.args().append("idx_out_type", to_str(unique->idx_out_type()));
+}
+
+std::vector<std::string> CircleUnpackSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"value"};
+}
+
+void CircleUnpackSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto unpack = loco::must_cast<const luci::CircleUnpack *>(node);
+  s.args().append("num", std::to_string(unpack->num()));
+  s.args().append("axis", std::to_string(unpack->axis()));
+}
+std::vector<std::string> CircleWhereSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"condition"};
+}
+
+std::vector<std::string> CircleWhileSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  auto circle_while = loco::must_cast<const luci::CircleWhile *>(node);
+
+  auto input_names = std::vector<std::string>();
+  for (uint32_t i = 0; i < circle_while->input_count(); ++i)
+    input_names.push_back("input");
+
+  return input_names;
+}
+
+void CircleWhileSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                 locop::NodeSummary &s)
+{
+  auto circle_while = loco::must_cast<const luci::CircleWhile *>(node);
+
+  if (circle_while->cond_graph() != nullptr)
+    s.args().append("then_graph", circle_while->cond_graph()->name());
+  else
+    s.args().append("then_branch", std::to_string(circle_while->cond_branch()));
+
+  if (circle_while->body_graph() != nullptr)
+    s.args().append("else_graph", circle_while->body_graph()->name());
+  else
+    s.args().append("else_branch", std::to_string(circle_while->body_branch()));
+}
+
+std::vector<std::string> CircleOutputSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"from"};
+}
+
+std::vector<std::string> CircleTopKV2OutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"topkv2"};
+}
+
+std::vector<std::string> CircleUniqueOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"unique"};
+}
+
+std::vector<std::string> CircleUnpackOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"unpack"};
+}
+
+std::vector<std::string> CircleWhileOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"while"};
+}
+
+} // namespace luci
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
new file mode 100644
index 000000000..0bdb05d8d
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
@@ -0,0 +1,836 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
+#define __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
+
+#include "CircleNodeSummaryBuilder.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+class CircleNodeWithXSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithINPUTSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithXYSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithFEATURESSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+template <class REDUCER_NODE>
+class CircleNodeWithReducerSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *)
+  {
+    return {"input", "reduction_indices"};
+  }
+
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+  {
+    auto mean = loco::must_cast<const REDUCER_NODE *>(node);
+    s.args().append("keep_dims", mean->keep_dims() ? "true" : "false");
+  }
+};
+
+} // namespace luci
+
+namespace luci
+{
+
+class CircleAbsSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleAddSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleAddNSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+};
+
+class CircleArgMaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleArgMinSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleAveragePool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBatchMatMulSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBatchToSpaceNDSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleBCQFullyConnectedSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBCQGatherSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBidirectionalSequenceLSTMSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCastSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCeilSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleConcatenationSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleConstSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+  void update_status(locop::NodeSummary &s);
+};
+
+class CircleConv2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCosSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleCustomSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDensifySummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleDepthToSpaceSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDepthwiseConv2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDequantizeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleDivSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleEluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleExpSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleExpandDimsSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleFakeQuantSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleFillSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleFloorSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleFloorDivSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleFloorModSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleFullyConnectedSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleGatherSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleGatherNdSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleGeluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleGreaterSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleGreaterEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleHardSwishSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleIfSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleInstanceNormSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleL2NormalizeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleL2Pool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLeakyReluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLessSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLessEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLocalResponseNormalizationSummaryBuilder final
+  : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLogSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogicalAndSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLogicalNotSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogicalOrSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLogisticSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogSoftmaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMatrixDiagSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMatrixSetDiagSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMaximumSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleMaxPool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleMeanSummaryBuilder final : public CircleNodeWithReducerSummaryBuilder<luci::CircleMean>
+{
+};
+
+class CircleMinimumSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleMirrorPadSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleMulSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleNegSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV4SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNonMaxSuppressionV5SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNotEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleOneHotSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CirclePackSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CirclePadSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CirclePadV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CirclePowSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CirclePReluSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleQuantizeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleRangeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleRankSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleReduceAnySummaryBuilder final
+  : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceAny>
+{
+};
+
+class CircleReduceMaxSummaryBuilder final
+  : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceMax>
+{
+};
+
+class CircleReduceMinSummaryBuilder final
+  : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceMin>
+{
+};
+
+class CircleReduceProdSummaryBuilder final
+  : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceProd>
+{
+};
+
+class CircleReluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleRelu6SummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleReluN1To1SummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleReshapeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void update_status(locop::NodeSummary &s);
+};
+
+class CircleResizeBilinearSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleResizeNearestNeighborSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleReverseSequenceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleReverseV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleRoundSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleRsqrtSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleScatterNdSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSegmentSumSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSelectSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSelectV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleShapeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSinSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSliceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSoftmaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSpaceToBatchNDSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSpaceToDepthSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSparseToDenseSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSplitSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSplitVSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSqrtSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSquareSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSquaredDifferenceSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleSqueezeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleStridedSliceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSubSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleSumSummaryBuilder final : public CircleNodeWithReducerSummaryBuilder<luci::CircleSum>
+{
+};
+
+class CircleSVDFSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleTanhSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleTileSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTopKV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTransposeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTransposeConvSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUnidirectionalSequenceLSTMSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUniqueSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUnpackSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleWhereSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleWhileSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleZerosLikeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleBidirectionalSequenceLSTMOutSummaryBuilder final
+  : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleCustomOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleIfOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleInputSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV4OutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV5OutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleOutputSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleOutputDummySummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleOutputExcludeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleSplitOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleSplitVOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleTopKV2OutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleUniqueOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleUnpackOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleVariableSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleWhileOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+} // namespace luci
+
+#endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp
index bb7c73d5f..d3b2170b0 100644
--- a/compiler/luci/logex/src/FormattedGraph.cpp
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "CircleNodeSummaryBuilder.h"
 #include "luci/FormattedGraph.h"
 
 #include <luci/IR/CircleDialect.h>
@@ -25,1777 +26,6 @@
 #include <sstream>
 #include <vector>
 
-/**
- * @brief dump std::vector<int64_t> values to stream
- */
-std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64)
-{
-  for (auto vi : vi64)
-  {
-    os << vi << " ";
-  }
-  return os;
-}
-
-// For TF lite
-namespace
-{
-
-const char *to_str(loco::DataType type)
-{
-  switch (type)
-  {
-    case loco::DataType::U8:
-      return "UINT8";
-    case loco::DataType::U16:
-      return "UINT16";
-    case loco::DataType::U32:
-      return "UINT32";
-    case loco::DataType::U64:
-      return "UINT64";
-
-    case loco::DataType::S8:
-      return "INT8";
-    case loco::DataType::S16:
-      return "INT16";
-    case loco::DataType::S32:
-      return "INT32";
-    case loco::DataType::S64:
-      return "INT64";
-
-    case loco::DataType::FLOAT16:
-      return "FLOAT16";
-    case loco::DataType::FLOAT32:
-      return "FLOAT32";
-    case loco::DataType::FLOAT64:
-      return "FLOAT64";
-
-    case loco::DataType::BOOL:
-      return "BOOL";
-
-    default:
-      return "Error";
-  }
-}
-
-const char *to_str(bool value) { return value ? "true" : "false"; }
-
-const char *to_str(luci::FusedActFunc fused)
-{
-  switch (fused)
-  {
-    case luci::FusedActFunc::NONE:
-      return "NONE";
-    case luci::FusedActFunc::RELU:
-      return "RELU";
-    case luci::FusedActFunc::RELU_N1_TO_1:
-      return "RELU_N1_TO_1";
-    case luci::FusedActFunc::RELU6:
-      return "RELU6";
-    default:
-      return "Error";
-  }
-}
-
-const char *to_str(luci::Padding padding)
-{
-  switch (padding)
-  {
-    case luci::Padding::SAME:
-      return "SAME";
-    case luci::Padding::VALID:
-      return "VALID";
-    default:
-      return "Error";
-  }
-}
-
-const char *to_str(luci::MirrorPadMode mode)
-{
-  switch (mode)
-  {
-    case luci::MirrorPadMode::REFLECT:
-      return "REFLECT";
-    case luci::MirrorPadMode::SYMMETRIC:
-      return "SYMMETRIC";
-    default:
-      return "Error";
-  }
-}
-
-std::string to_str(const luci::Stride *stride)
-{
-  return pepper::str(stride->h(), ",", stride->w());
-}
-
-std::string to_str(const luci::Filter *filter)
-{
-  return pepper::str(filter->h(), ",", filter->w());
-}
-
-std::string circle_opname(uint32_t opnum)
-{
-  static const std::string prefix{"circle."};
-
-  switch (static_cast<luci::CircleOpcode>(opnum))
-  {
-#define CIRCLE_NODE(OPCODE, CLASS) \
-  case luci::CircleOpcode::OPCODE: \
-    return prefix + #OPCODE;
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_NODE
-    default:
-      break;
-  };
-
-  return prefix + "Invalid";
-}
-
-// CircleNodeSummaryBuilder with default implementation
-class CircleNodeSummaryBuilderBase : public locop::NodeSummaryBuilder
-{
-public:
-  CircleNodeSummaryBuilderBase(const locop::SymbolTable *tbl) : _tbl{tbl}
-  {
-    // DO NOTHING
-  }
-
-public:
-  bool build(const loco::Node *, locop::NodeSummary &s) const final;
-
-protected:
-#define CIRCLE_NODE(OPCODE, CLASS)                                      \
-  virtual bool summary(const CLASS *, locop::NodeSummary &s) const      \
-  {                                                                     \
-    s.comments().append("Emitted by Default CircleNodeSummaryBuilder"); \
-    s.state(locop::NodeSummary::State::PartiallyKnown);                 \
-    return true;                                                        \
-  }
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_NODE
-
-protected:
-  const locop::SymbolTable *tbl(void) const { return _tbl; }
-
-  // Please do not use _tbl directly and use tbl().
-  // This will be changed to private in near future.
-protected:
-  const locop::SymbolTable *_tbl;
-};
-
-class CircleNodeSummaryBuilder final : public CircleNodeSummaryBuilderBase
-{
-public:
-  CircleNodeSummaryBuilder(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-#define IMPLEMENT(CLASS) bool summary(const CLASS *, locop::NodeSummary &) const final;
-  IMPLEMENT(luci::CircleAbs)
-  IMPLEMENT(luci::CircleAdd)
-  IMPLEMENT(luci::CircleAddN)
-  IMPLEMENT(luci::CircleArgMax)
-  IMPLEMENT(luci::CircleArgMin)
-  IMPLEMENT(luci::CircleAveragePool2D)
-  IMPLEMENT(luci::CircleBatchMatMul)
-  IMPLEMENT(luci::CircleBatchToSpaceND)
-  IMPLEMENT(luci::CircleCast)
-  IMPLEMENT(luci::CircleCeil)
-  IMPLEMENT(luci::CircleConcatenation)
-  IMPLEMENT(luci::CircleConst)
-  IMPLEMENT(luci::CircleConv2D)
-  IMPLEMENT(luci::CircleCos)
-  IMPLEMENT(luci::CircleCustom)
-  IMPLEMENT(luci::CircleDepthToSpace)
-  IMPLEMENT(luci::CircleDepthwiseConv2D)
-  IMPLEMENT(luci::CircleDiv)
-  IMPLEMENT(luci::CircleElu)
-  IMPLEMENT(luci::CircleExp)
-  IMPLEMENT(luci::CircleExpandDims)
-  IMPLEMENT(luci::CircleFill)
-  IMPLEMENT(luci::CircleFloor)
-  IMPLEMENT(luci::CircleFloorDiv)
-  IMPLEMENT(luci::CircleFloorMod)
-  IMPLEMENT(luci::CircleFullyConnected)
-  IMPLEMENT(luci::CircleGather)
-  IMPLEMENT(luci::CircleGatherNd)
-  IMPLEMENT(luci::CircleGreater)
-  IMPLEMENT(luci::CircleGreaterEqual)
-  IMPLEMENT(luci::CircleIf)
-  IMPLEMENT(luci::CircleL2Normalize)
-  IMPLEMENT(luci::CircleLeakyRelu)
-  IMPLEMENT(luci::CircleLess)
-  IMPLEMENT(luci::CircleLessEqual)
-  IMPLEMENT(luci::CircleLocalResponseNormalization)
-  IMPLEMENT(luci::CircleLog)
-  IMPLEMENT(luci::CircleLogicalAnd)
-  IMPLEMENT(luci::CircleLogicalNot)
-  IMPLEMENT(luci::CircleLogicalOr)
-  IMPLEMENT(luci::CircleLogistic)
-  IMPLEMENT(luci::CircleLogSoftmax)
-  IMPLEMENT(luci::CircleMatrixDiag)
-  IMPLEMENT(luci::CircleMatrixSetDiag)
-  IMPLEMENT(luci::CircleMaximum)
-  IMPLEMENT(luci::CircleMaxPool2D)
-  IMPLEMENT(luci::CircleMean)
-  IMPLEMENT(luci::CircleMinimum)
-  IMPLEMENT(luci::CircleMirrorPad)
-  IMPLEMENT(luci::CircleMul)
-  IMPLEMENT(luci::CircleNeg)
-  IMPLEMENT(luci::CircleNonMaxSuppressionV4)
-  IMPLEMENT(luci::CircleNonMaxSuppressionV5)
-  IMPLEMENT(luci::CircleNotEqual)
-  IMPLEMENT(luci::CircleOneHot)
-  IMPLEMENT(luci::CirclePack)
-  IMPLEMENT(luci::CirclePad)
-  IMPLEMENT(luci::CirclePadV2)
-  IMPLEMENT(luci::CirclePow)
-  IMPLEMENT(luci::CirclePRelu)
-  IMPLEMENT(luci::CircleRange)
-  IMPLEMENT(luci::CircleRank)
-  IMPLEMENT(luci::CircleReduceAny)
-  IMPLEMENT(luci::CircleReduceMax)
-  IMPLEMENT(luci::CircleReduceMin)
-  IMPLEMENT(luci::CircleReduceProd)
-  IMPLEMENT(luci::CircleRelu)
-  IMPLEMENT(luci::CircleRelu6)
-  IMPLEMENT(luci::CircleReluN1To1)
-  IMPLEMENT(luci::CircleReshape)
-  IMPLEMENT(luci::CircleResizeBilinear)
-  IMPLEMENT(luci::CircleResizeNearestNeighbor)
-  IMPLEMENT(luci::CircleReverseSequence)
-  IMPLEMENT(luci::CircleReverseV2)
-  IMPLEMENT(luci::CircleRound)
-  IMPLEMENT(luci::CircleRsqrt)
-  IMPLEMENT(luci::CircleScatterNd)
-  IMPLEMENT(luci::CircleSegmentSum)
-  IMPLEMENT(luci::CircleSelect)
-  IMPLEMENT(luci::CircleSelectV2)
-  IMPLEMENT(luci::CircleShape)
-  IMPLEMENT(luci::CircleSin)
-  IMPLEMENT(luci::CircleSlice)
-  IMPLEMENT(luci::CircleSoftmax)
-  IMPLEMENT(luci::CircleSpaceToBatchND)
-  IMPLEMENT(luci::CircleSpaceToDepth)
-  IMPLEMENT(luci::CircleSparseToDense)
-  IMPLEMENT(luci::CircleSplit)
-  IMPLEMENT(luci::CircleSplitV)
-  IMPLEMENT(luci::CircleSqrt)
-  IMPLEMENT(luci::CircleSquare)
-  IMPLEMENT(luci::CircleSquaredDifference)
-  IMPLEMENT(luci::CircleSqueeze)
-  IMPLEMENT(luci::CircleStridedSlice)
-  IMPLEMENT(luci::CircleSub)
-  IMPLEMENT(luci::CircleSum)
-  IMPLEMENT(luci::CircleTanh)
-  IMPLEMENT(luci::CircleTile)
-  IMPLEMENT(luci::CircleTopKV2)
-  IMPLEMENT(luci::CircleTranspose)
-  IMPLEMENT(luci::CircleTransposeConv)
-  IMPLEMENT(luci::CircleUnique)
-  IMPLEMENT(luci::CircleUnpack)
-  IMPLEMENT(luci::CircleWhere)
-  IMPLEMENT(luci::CircleWhile)
-  IMPLEMENT(luci::CircleZerosLike)
-  // Circle Only
-  IMPLEMENT(luci::CircleBCQFullyConnected)
-  IMPLEMENT(luci::CircleBCQGather)
-  IMPLEMENT(luci::CircleInstanceNorm)
-  // Virtual nodes
-  IMPLEMENT(luci::CircleInput)
-  IMPLEMENT(luci::CircleOutput)
-  IMPLEMENT(luci::CircleIfOut)
-  IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
-  IMPLEMENT(luci::CircleNonMaxSuppressionV5Out)
-  IMPLEMENT(luci::CircleSplitOut)
-  IMPLEMENT(luci::CircleSplitVOut)
-  IMPLEMENT(luci::CircleTopKV2Out)
-  IMPLEMENT(luci::CircleUniqueOut)
-  IMPLEMENT(luci::CircleUnpackOut)
-  IMPLEMENT(luci::CircleWhileOut)
-#undef IMPLEMENT
-};
-
-template <class CIRCLENODE>
-bool use_x(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_input(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_features(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("features", tbl->lookup(node->features()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_xy(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("y", tbl->lookup(node->y()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_xy_act(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("y", tbl->lookup(node->y()));
-  s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_reducer(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("reduction_indices", tbl->lookup(node->reduction_indices()));
-  s.args().append("keep_dims", node->keep_dims() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_ido(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("dimension", tbl->lookup(node->dimension()));
-  s.args().append("output_type", to_str(node->output_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node,
-                  locop::NodeSummary &s)
-{
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    s.args().append("inputs", tbl->lookup(node->inputs(i)));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("value", tbl->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("y", tbl->lookup(node->y()));
-  s.args().append("adj_x", to_str(node->adj_x()));
-  s.args().append("adj_y", to_str(node->adj_y()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("block_shape", tbl->lookup(node->block_shape()));
-  s.args().append("crops", tbl->lookup(node->crops()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("in_data_type", to_str(node->in_data_type()));
-  s.args().append("out_data_type", to_str(node->out_data_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-    s.args().append("values", tbl->lookup(node->values(i)));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("filter", tbl->lookup(node->filter()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("dilation(h,w)", to_str(node->dilation()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node,
-                  locop::NodeSummary &s)
-{
-  for (uint32_t i = 0; i < node->numInputs(); i++)
-  {
-    s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i)));
-  }
-  s.args().append("custom_code", node->custom_code());
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("block_size", std::to_string(node->block_size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("filter", tbl->lookup(node->filter()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("dilation(h,w)", to_str(node->dilation()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("axis", tbl->lookup(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("dims", tbl->lookup(node->dims()));
-  s.args().append("value", tbl->lookup(node->value()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("weights", tbl->lookup(node->weights()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("params", tbl->lookup(node->params()));
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("params", tbl->lookup(node->params()));
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s)
-{
-  s.args().append("cond", tbl->lookup(node->cond()));
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-    s.args().append("input", tbl->lookup(node->input(i)));
-
-  if (node->then_graph() != nullptr)
-    s.args().append("then_graph", node->then_graph()->name());
-  else
-    s.args().append("then_branch", pepper::str(node->then_branch()));
-
-  if (node->else_graph() != nullptr)
-    s.args().append("else_graph", node->else_graph()->name());
-  else
-    s.args().append("else_branch", pepper::str(node->else_branch()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("features", tbl->lookup(node->features()));
-  s.args().append("alpha", std::to_string(node->alpha()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("radius", pepper::str(node->radius()));
-  s.args().append("bias", pepper::str(node->bias()));
-  s.args().append("alpha", pepper::str(node->alpha()));
-  s.args().append("beta", pepper::str(node->beta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("logits", tbl->lookup(node->logits()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("diagonal", tbl->lookup(node->diagonal()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("diagonal", tbl->lookup(node->diagonal()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("value", tbl->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("paddings", tbl->lookup(node->paddings()));
-  s.args().append("mode", to_str(node->mode()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("boxes", tbl->lookup(node->boxes()));
-  s.args().append("scores", tbl->lookup(node->scores()));
-  s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
-  s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
-  s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("boxes", tbl->lookup(node->boxes()));
-  s.args().append("scores", tbl->lookup(node->scores()));
-  s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
-  s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
-  s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
-  s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("depth", tbl->lookup(node->depth()));
-  s.args().append("on_value", tbl->lookup(node->on_value()));
-  s.args().append("off_value", tbl->lookup(node->off_value()));
-  s.args().append("axis", pepper::str(node->axis()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node,
-                  locop::NodeSummary &s)
-{
-  for (uint32_t i = 0; i < node->values_count(); ++i)
-    s.args().append("values", tbl->lookup(node->values(i)));
-  s.args().append("values_count", pepper::str(node->values_count()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("paddings", tbl->lookup(node->paddings()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("paddings", tbl->lookup(node->paddings()));
-  s.args().append("constant_values", tbl->lookup(node->constant_values()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("alpha", tbl->lookup(node->alpha()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("start", tbl->lookup(node->start()));
-  s.args().append("limit", tbl->lookup(node->limit()));
-  s.args().append("delta", tbl->lookup(node->delta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("tensor", tbl->lookup(node->tensor()));
-  s.args().append("shape", tbl->lookup(node->shape()));
-  // TODO Show newShape info
-  s.state(locop::NodeSummary::State::PartiallyKnown);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("size", tbl->lookup(node->size()));
-  s.args().append("align_corners", node->align_corners() ? "true" : "false");
-  s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("size", tbl->lookup(node->size()));
-  s.args().append("align_corners", node->align_corners() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("seq_lengths", tbl->lookup(node->seq_lengths()));
-  s.args().append("seq_axis", std::to_string(node->seq_axis()));
-  s.args().append("batch_axis", std::to_string(node->batch_axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("tensor", tbl->lookup(node->tensor()));
-  s.args().append("axis", tbl->lookup(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("updates", tbl->lookup(node->updates()));
-  s.args().append("shape", tbl->lookup(node->shape()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("segment_ids", tbl->lookup(node->segment_ids()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("condition", tbl->lookup(node->condition()));
-  s.args().append("t", tbl->lookup(node->t()));
-  s.args().append("e", tbl->lookup(node->e()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("condition", tbl->lookup(node->condition()));
-  s.args().append("t", tbl->lookup(node->t()));
-  s.args().append("e", tbl->lookup(node->e()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("out_type", to_str(node->out_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("begin", tbl->lookup(node->begin()));
-  s.args().append("size", tbl->lookup(node->size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("logits", tbl->lookup(node->logits()));
-  s.args().append("beta", pepper::str(node->beta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("block_shape", tbl->lookup(node->block_shape()));
-  s.args().append("paddings", tbl->lookup(node->paddings()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("block_size", pepper::str(node->block_size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("output_shape", tbl->lookup(node->output_shape()));
-  s.args().append("values", tbl->lookup(node->values()));
-  s.args().append("default_value", tbl->lookup(node->default_value()));
-  s.args().append("Validate_indices", pepper::str(node->validate_indices()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("split_dim", tbl->lookup(node->split_dim()));
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("num_split", pepper::str(node->num_split()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("size_splits", tbl->lookup(node->size_splits()));
-  s.args().append("split_dim", tbl->lookup(node->split_dim()));
-  s.args().append("num_split", pepper::str(node->num_split()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-
-  std::stringstream ss{"("};
-  for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
-  {
-    if (i != 0)
-      ss << ", ";
-    ss << node->squeeze_dims()[i];
-  }
-  ss << ")";
-  s.args().append("squeeze_dims", ss.str());
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("begin", tbl->lookup(node->begin()));
-  s.args().append("end", tbl->lookup(node->end()));
-  s.args().append("strides", tbl->lookup(node->strides()));
-  s.args().append("begin_mask", pepper::str(node->begin_mask()));
-  s.args().append("end_mask", pepper::str(node->end_mask()));
-  s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
-  s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
-  s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("multiples", tbl->lookup(node->multiples()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("k", tbl->lookup(node->k()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("a", tbl->lookup(node->a()));
-  s.args().append("perm", tbl->lookup(node->perm()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("inputSizes", tbl->lookup(node->inputSizes()));
-  s.args().append("filter", tbl->lookup(node->filter()));
-  s.args().append("outBackprop", tbl->lookup(node->outBackprop()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("idx_out_type", to_str(node->idx_out_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("value", tbl->lookup(node->value()));
-  s.args().append("num", pepper::str(node->num()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("condition", tbl->lookup(node->condition()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node,
-                  locop::NodeSummary &s)
-{
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-    s.args().append("input", tbl->lookup(node->input(i)));
-
-  if (node->cond_graph() != nullptr)
-    s.args().append("cond_graph", node->cond_graph()->name());
-  else
-    s.args().append("cond_branch", pepper::str(node->cond_branch()));
-
-  if (node->body_graph() != nullptr)
-    s.args().append("body_graph", node->body_graph()->name());
-  else
-    s.args().append("body_branch", pepper::str(node->body_branch()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("topkv2", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("unique", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("unpack", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("while", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("from", tbl->lookup(node->from()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("weights_scales", tbl->lookup(node->weights_scales()));
-  s.args().append("weights_binary", tbl->lookup(node->weights_binary()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("weights_clusters", tbl->lookup(node->weights_clusters()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input_scales", tbl->lookup(node->input_scales()));
-  s.args().append("input_binary", tbl->lookup(node->input_binary()));
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("input_clusters", tbl->lookup(node->input_clusters()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node,
-                  locop::NodeSummary &s)
-{
-  auto fused = node->fusedActivationFunction();
-  assert(fused != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("gamma", tbl->lookup(node->gamma()));
-  s.args().append("beta", tbl->lookup(node->beta()));
-  s.args().append("epsilon", pepper::str(node->epsilon()));
-  s.args().append("fused_activation_function", to_str(fused));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
-{
-  if (node->dialect() != luci::CircleDialect::get())
-    return false;
-
-#define CIRCLE_NODE(OPCODE, CLASS)                        \
-  if (dynamic_cast<const CLASS *>(node))                  \
-  {                                                       \
-    s.opname(circle_opname(node->opnum()));               \
-    return summary(dynamic_cast<const CLASS *>(node), s); \
-  }
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_NODE
-
-  return false;
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const
-{
-  return use_xy_act(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const
-{
-  return use_ido(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMin *node, locop::NodeSummary &s) const
-{
-  return use_ido(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const
-{
-  s.state(locop::NodeSummary::State::PartiallyKnown);
-  return true;
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleElu *node, locop::NodeSummary &s) const
-{
-  return use_features(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleExp *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleExpandDims *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleFloorDiv *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleFloorMod *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleGather *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleGatherNd *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleGreater *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleGreaterEqual *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleIf *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleL2Normalize *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLess *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLessEqual *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLeakyRelu *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLocalResponseNormalization *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLog *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLogicalAnd *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLogicalNot *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLogicalOr *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLogistic *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleLogSoftmax *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixDiag *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixSetDiag *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleMaxPool2D *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleMean *node, locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleMinimum *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleMirrorPad *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleMul *node, locop::NodeSummary &s) const
-{
-  return use_xy_act(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleNeg *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4 *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5 *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleOneHot *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CirclePack *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CirclePad *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CirclePadV2 *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CirclePRelu *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleRange *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleRank *node, locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleReduceAny *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleReduceMax *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleReduceMin *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleReduceProd *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleRelu *node, locop::NodeSummary &s) const
-{
-  return use_features(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleRelu6 *node, locop::NodeSummary &s) const
-{
-  return use_features(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleReluN1To1 *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_features(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleReshape *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeBilinear *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeNearestNeighbor *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseSequence *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseV2 *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleRound *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleRsqrt *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleScatterNd *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSegmentSum *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSelect *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSelectV2 *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleShape *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSlice *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSoftmax *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToBatchND *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToDepth *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSparseToDense *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSplit *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitV *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSquare *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSquaredDifference *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSqueeze *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleStridedSlice *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSub *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSum *node, locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleTanh *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleTile *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2 *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleTranspose *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleTransposeConv *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleUnique *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpack *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleWhere *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleWhile *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleZerosLike *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitOut *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitVOut *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2Out *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleUniqueOut *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpackOut *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleIfOut *node, locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4Out *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5Out *node,
-                                       locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSummary &s) const
-{
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleOutput *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQFullyConnected *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQGather *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool CircleNodeSummaryBuilder::summary(const luci::CircleInstanceNorm *node,
-                                       locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-} // namespace
-
 namespace luci
 {
 
@@ -1806,7 +36,7 @@ bool NodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &s) co
     return true;
   }
 
-  if (CircleNodeSummaryBuilder(_tbl).build(node, s))
+  if (CircleNodeSummaryBuilder().build(node, _tbl, s))
   {
     return true;
   }
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt
new file mode 100644
index 000000000..304ef6307
--- /dev/null
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -0,0 +1,36 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_partition ${LUCI_LIBRARY_TYPE} ${SOURCES})
+target_include_directories(luci_partition PRIVATE src)
+target_include_directories(luci_partition PUBLIC include)
+target_link_libraries(luci_partition PUBLIC luci_lang)
+target_link_libraries(luci_partition PRIVATE luci_service)
+target_link_libraries(luci_partition PRIVATE luci_log)
+target_link_libraries(luci_partition PRIVATE luci_logex)
+target_link_libraries(luci_partition PRIVATE mio_circle06)
+target_link_libraries(luci_partition PRIVATE nncc_common)
+target_link_libraries(luci_partition PRIVATE pepper_csv2vec)
+target_link_libraries(luci_partition PRIVATE oops)
+
+install(TARGETS luci_partition DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_partition_test ${TESTS})
+target_include_directories(luci_partition_test PRIVATE src)
+target_link_libraries(luci_partition_test luci_lang)
+target_link_libraries(luci_partition_test luci_partition)
+target_link_libraries(luci_partition_test luci_testhelper)
+target_link_libraries(luci_partition_test luci_service)
diff --git a/compiler/luci/partition/README.md b/compiler/luci/partition/README.md
new file mode 100644
index 000000000..40a46bc56
--- /dev/null
+++ b/compiler/luci/partition/README.md
@@ -0,0 +1,4 @@
+# luci-partition
+
+`luci-partition` provides partition of a model to two or more sub models and
+its connection configuration having same computational results.
diff --git a/compiler/luci/partition/include/luci/ConnectNode.h b/compiler/luci/partition/include/luci/ConnectNode.h
new file mode 100644
index 000000000..d8cbfc6c4
--- /dev/null
+++ b/compiler/luci/partition/include/luci/ConnectNode.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_CONNECT_NODE_H__
+#define __LUCI_PARTITION_CONNECT_NODE_H__
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @note MapNode2Clone is used as a map from original node to cloned node
+ *       to find input of a cloned node
+ *
+ *   (Original)              (Clone)
+ *
+ *     [A]                  [A']
+ *      |   [B]              |   [B']
+ *      |    |               |    |
+ *       \  /                 \  /
+ *        [C]                 [C']
+ *
+ *  From view of [C'] we need to find [A'] and [B']. We know [C] from [C'],
+ *  then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B']
+ *  from the map.
+ */
+using MapNode2Clone = std::map<const CircleNode * /* ORG */, CircleNode * /* CLONE */>;
+
+struct CloneContext
+{
+  std::pair<MapNode2Clone::iterator, bool> emplace(const CircleNode *org, CircleNode *clone)
+  {
+    return node2clone.emplace(org, clone);
+  }
+  MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); }
+  MapNode2Clone::iterator end(void) { return node2clone.end(); }
+
+  MapNode2Clone::const_iterator find(const CircleNode *org) const { return node2clone.find(org); }
+  MapNode2Clone::const_iterator end(void) const { return node2clone.end(); }
+
+  MapNode2Clone node2clone;
+};
+
+class ConnectNode final : public luci::CircleNodeVisitor<void>
+{
+public:
+  ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){};
+
+public:
+  void visit(const luci::CircleAbs *) final;
+  void visit(const luci::CircleAdd *) final;
+  void visit(const luci::CircleAddN *) final;
+  void visit(const luci::CircleArgMax *) final;
+  void visit(const luci::CircleArgMin *) final;
+  void visit(const luci::CircleAveragePool2D *) final;
+  void visit(const luci::CircleBatchMatMul *) final;
+  void visit(const luci::CircleBatchToSpaceND *) final;
+  void visit(const luci::CircleCast *) final;
+  void visit(const luci::CircleCeil *) final;
+  void visit(const luci::CircleConcatenation *) final;
+  void visit(const luci::CircleConst *) final;
+  void visit(const luci::CircleConv2D *) final;
+  void visit(const luci::CircleCos *) final;
+  void visit(const luci::CircleCustom *) final;
+  void visit(const luci::CircleDensify *) final;
+  void visit(const luci::CircleDepthToSpace *) final;
+  void visit(const luci::CircleDepthwiseConv2D *) final;
+  void visit(const luci::CircleDequantize *) final;
+  void visit(const luci::CircleDiv *) final;
+  void visit(const luci::CircleElu *) final;
+  void visit(const luci::CircleEqual *) final;
+  void visit(const luci::CircleExp *) final;
+  void visit(const luci::CircleExpandDims *) final;
+  void visit(const luci::CircleFakeQuant *) final;
+  void visit(const luci::CircleFill *) final;
+  void visit(const luci::CircleFloor *) final;
+  void visit(const luci::CircleFloorDiv *) final;
+  void visit(const luci::CircleFloorMod *) final;
+  void visit(const luci::CircleFullyConnected *) final;
+  void visit(const luci::CircleGather *) final;
+  void visit(const luci::CircleGatherNd *) final;
+  void visit(const luci::CircleGelu *) final;
+  void visit(const luci::CircleGreater *) final;
+  void visit(const luci::CircleGreaterEqual *) final;
+  void visit(const luci::CircleHardSwish *) final;
+  void visit(const luci::CircleIf *) final;
+  void visit(const luci::CircleL2Normalize *) final;
+  void visit(const luci::CircleL2Pool2D *) final;
+  void visit(const luci::CircleLeakyRelu *) final;
+  void visit(const luci::CircleLess *) final;
+  void visit(const luci::CircleLessEqual *) final;
+  void visit(const luci::CircleLocalResponseNormalization *) final;
+  void visit(const luci::CircleLog *) final;
+  void visit(const luci::CircleLogicalAnd *) final;
+  void visit(const luci::CircleLogicalNot *) final;
+  void visit(const luci::CircleLogicalOr *) final;
+  void visit(const luci::CircleLogistic *) final;
+  void visit(const luci::CircleLogSoftmax *) final;
+  void visit(const luci::CircleMatrixDiag *) final;
+  void visit(const luci::CircleMatrixSetDiag *) final;
+  void visit(const luci::CircleMaximum *) final;
+  void visit(const luci::CircleMaxPool2D *) final;
+  void visit(const luci::CircleMean *) final;
+  void visit(const luci::CircleMinimum *) final;
+  void visit(const luci::CircleMirrorPad *) final;
+  void visit(const luci::CircleMul *) final;
+  void visit(const luci::CircleNeg *) final;
+  void visit(const luci::CircleNonMaxSuppressionV4 *) final;
+  void visit(const luci::CircleNonMaxSuppressionV5 *) final;
+  void visit(const luci::CircleNotEqual *) final;
+  void visit(const luci::CircleOneHot *) final;
+  void visit(const luci::CirclePack *) final;
+  void visit(const luci::CirclePad *) final;
+  void visit(const luci::CirclePadV2 *) final;
+  void visit(const luci::CirclePow *) final;
+  void visit(const luci::CirclePRelu *) final;
+  void visit(const luci::CircleQuantize *) final;
+  void visit(const luci::CircleRange *) final;
+  void visit(const luci::CircleRank *) final;
+  void visit(const luci::CircleReduceAny *) final;
+  void visit(const luci::CircleReduceMax *) final;
+  void visit(const luci::CircleReduceMin *) final;
+  void visit(const luci::CircleReduceProd *) final;
+  void visit(const luci::CircleRelu *) final;
+  void visit(const luci::CircleRelu6 *) final;
+  void visit(const luci::CircleReluN1To1 *) final;
+  void visit(const luci::CircleReshape *) final;
+  void visit(const luci::CircleResizeBilinear *) final;
+  void visit(const luci::CircleResizeNearestNeighbor *) final;
+  void visit(const luci::CircleReverseSequence *) final;
+  void visit(const luci::CircleReverseV2 *) final;
+  void visit(const luci::CircleRound *) final;
+  void visit(const luci::CircleRsqrt *) final;
+  void visit(const luci::CircleScatterNd *) final;
+  void visit(const luci::CircleSegmentSum *) final;
+  void visit(const luci::CircleSelect *) final;
+  void visit(const luci::CircleSelectV2 *) final;
+  void visit(const luci::CircleShape *) final;
+  void visit(const luci::CircleSin *) final;
+  void visit(const luci::CircleSlice *) final;
+  void visit(const luci::CircleSoftmax *) final;
+  void visit(const luci::CircleSpaceToBatchND *) final;
+  void visit(const luci::CircleSpaceToDepth *) final;
+  void visit(const luci::CircleSparseToDense *) final;
+  void visit(const luci::CircleSplit *) final;
+  void visit(const luci::CircleSplitV *) final;
+  void visit(const luci::CircleSqrt *) final;
+  void visit(const luci::CircleSquare *) final;
+  void visit(const luci::CircleSquaredDifference *) final;
+  void visit(const luci::CircleSqueeze *) final;
+  void visit(const luci::CircleStridedSlice *) final;
+  void visit(const luci::CircleSVDF *) final;
+  void visit(const luci::CircleSub *) final;
+  void visit(const luci::CircleSum *) final;
+  void visit(const luci::CircleTanh *) final;
+  void visit(const luci::CircleTile *) final;
+  void visit(const luci::CircleTopKV2 *) final;
+  void visit(const luci::CircleTranspose *) final;
+  void visit(const luci::CircleTransposeConv *) final;
+  void visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
+  void visit(const luci::CircleUnique *) final;
+  void visit(const luci::CircleUnpack *) final;
+  void visit(const luci::CircleWhere *) final;
+  void visit(const luci::CircleWhile *) final;
+  void visit(const luci::CircleZerosLike *) final;
+
+  // Circle Only
+  void visit(const luci::CircleBCQFullyConnected *) final;
+  void visit(const luci::CircleBCQGather *) final;
+  void visit(const luci::CircleInstanceNorm *) final;
+
+  // NOTE CircleInput and CircleOutput are not handled here as these need
+  //      link with graph I/O
+
+  // Virtual
+  void visit(const luci::CircleCustomOut *) final;
+  void visit(const luci::CircleIfOut *) final;
+  // void visit(const luci::CircleInput *) final;
+  void visit(const luci::CircleNonMaxSuppressionV4Out *) final;
+  void visit(const luci::CircleNonMaxSuppressionV5Out *) final;
+  // void visit(const luci::CircleOutput *) final;
+  void visit(const luci::CircleOutputDummy *) final;
+  void visit(const luci::CircleOutputExclude *) final;
+  void visit(const luci::CircleSplitOut *) final;
+  void visit(const luci::CircleSplitVOut *) final;
+  void visit(const luci::CircleTopKV2Out *) final;
+  void visit(const luci::CircleUniqueOut *) final;
+  void visit(const luci::CircleUnpackOut *) final;
+  void visit(const luci::CircleVariable *) final;
+  void visit(const luci::CircleWhileOut *) final;
+
+public:
+  luci::CircleNode *find_clone(const luci::CircleNode *node);
+
+protected:
+  luci::CloneContext &_clonecontext;
+};
+
+/**
+ * @brief Connect cloned node from input node
+ */
+void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_CONNECT_NODE_H__
diff --git a/compiler/luci/partition/include/luci/Partition.h b/compiler/luci/partition/include/luci/Partition.h
new file mode 100644
index 000000000..6189ed9f2
--- /dev/null
+++ b/compiler/luci/partition/include/luci/Partition.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_H__
+#define __LUCI_PARTITION_H__
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace luci
+{
+
+/**
+ * @brief PartitionTable holds partition information
+ */
+struct PartitionTable
+{
+  enum class COMPLY
+  {
+    UNDEFINED,
+    OPCODE,
+    OPNAME,
+  };
+
+  std::vector<std::string> groups;
+  std::string default_group;
+  COMPLY comply = COMPLY::UNDEFINED;
+
+  // assign by opcode name: OPCODENAME=group
+  std::unordered_map<std::string /* OPCODENAME */, std::string /* group */> byopcodes;
+
+  // assign by op name: OPNAME=group
+  std::unordered_map<std::string /* OPNAME */, std::string /* group */> byopnames;
+};
+
+/**
+ * @brief PartedModule holds partitioned module and group name
+ */
+struct PartedModule
+{
+  std::unique_ptr<Module> module;
+  // group name used to partition this module
+  std::string group;
+
+  // unique name(filename) of this module
+  std::string name;
+};
+
+struct PartedModules
+{
+  std::vector<PartedModule> pmodules;
+
+  // TODO add connections ?
+};
+
+/**
+ * @brief Method to do paritioning from module and PartitionTable to produce PartedModules
+ */
+PartedModules apply(Module *module, const PartitionTable &partition);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_H__
diff --git a/compiler/luci/partition/include/luci/PartitionDump.h b/compiler/luci/partition/include/luci/PartitionDump.h
new file mode 100644
index 000000000..f395e57bf
--- /dev/null
+++ b/compiler/luci/partition/include/luci/PartitionDump.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_DUMP_H__
+#define __LUCI_PARTITION_DUMP_H__
+
+#include "luci/Partition.h"
+
+#include <iostream>
+
+std::ostream &operator<<(std::ostream &os, const luci::PartitionTable &table);
+
+#endif // __LUCI_PARTITION_DUMP_H__
diff --git a/compiler/luci/partition/include/luci/PartitionValidate.h b/compiler/luci/partition/include/luci/PartitionValidate.h
new file mode 100644
index 000000000..9f910c8cc
--- /dev/null
+++ b/compiler/luci/partition/include/luci/PartitionValidate.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_VALIDATE_H__
+#define __LUCI_PARTITION_VALIDATE_H__
+
+#include "luci/Partition.h"
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+bool validate(luci::PartitionTable &partition);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_VALIDATE_H__
diff --git a/compiler/luci/partition/src/CircleOpCode.cpp b/compiler/luci/partition/src/CircleOpCode.cpp
new file mode 100644
index 000000000..86694fa40
--- /dev/null
+++ b/compiler/luci/partition/src/CircleOpCode.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOpCode.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <mio/circle/schema_generated.h>
+
+namespace
+{
+
+using namespace luci;
+using namespace circle;
+
+class QueryOpCode final : public CircleNodeVisitor<BuiltinOperator>
+{
+public:
+// NOTE only circle operator may have BuiltinOperator_XXX
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
+  BuiltinOperator visit(const CIRCLE_CLASS *) final { return BuiltinOperator_##OPCODE; }
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS)
+
+#include "luci/IR/CircleNodes.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+  // NOTE only builtin operators should be called (NOT virtual nodes)
+};
+
+class QueryCircleName final : public luci::CircleNodeVisitor<const char *>
+{
+public:
+// NOTE provide names for circle virtual nodes
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS)
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) \
+  const char *visit(const CIRCLE_CLASS *) final { return #OPCODE; }
+
+#include "luci/IR/CircleNodes.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+  // default is null
+  const char *visit(const luci::CircleNode *) final { return nullptr; }
+};
+
+} // namespace
+
+namespace luci
+{
+
+std::string opcode_name(const CircleNode *node)
+{
+  QueryCircleName qcn;
+  auto cname = node->accept(&qcn);
+  if (cname != nullptr)
+    return std::string(cname);
+
+  QueryOpCode qoc;
+  auto opcode = node->accept(&qoc);
+  auto name = circle::EnumNameBuiltinOperator(opcode);
+  return std::string(name);
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/CircleOpCode.h b/compiler/luci/partition/src/CircleOpCode.h
new file mode 100644
index 000000000..d17b09261
--- /dev/null
+++ b/compiler/luci/partition/src/CircleOpCode.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_CIRCLE_OP_CODE_H__
+#define __LUCI_PARTITION_CIRCLE_OP_CODE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <string>
+
+namespace luci
+{
+
+std::string opcode_name(const CircleNode *node);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_CIRCLE_OP_CODE_H__
diff --git a/compiler/luci/partition/src/CircleOpCode.test.cpp b/compiler/luci/partition/src/CircleOpCode.test.cpp
new file mode 100644
index 000000000..d2524a2ef
--- /dev/null
+++ b/compiler/luci/partition/src/CircleOpCode.test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOpCode.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+TEST(CircleOpCodeTest, name)
+{
+  auto g = loco::make_graph();
+  auto node = g->nodes()->create<luci::CircleSqrt>();
+
+  auto name = luci::opcode_name(node);
+  ASSERT_EQ(name, "SQRT");
+}
diff --git a/compiler/luci/partition/src/ConnectNode.cpp b/compiler/luci/partition/src/ConnectNode.cpp
new file mode 100644
index 000000000..3d8c211c0
--- /dev/null
+++ b/compiler/luci/partition/src/ConnectNode.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext)
+{
+  ConnectNode cn(clonecontext);
+  node->accept(&cn);
+}
+
+luci::CircleNode *ConnectNode::find_clone(const luci::CircleNode *node)
+{
+  auto it = _clonecontext.find(node);
+  if (it == _clonecontext.end())
+    throw oops::UserExn("Invalid node in ConnectNode");
+  return it->second;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/ConnectNode.test.cpp b/compiler/luci/partition/src/ConnectNode.test.cpp
new file mode 100644
index 000000000..a2009c654
--- /dev/null
+++ b/compiler/luci/partition/src/ConnectNode.test.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.test.h"
+
+// This file validates "ConnectNode.test.h". Please DO NOT remove this file.
diff --git a/compiler/luci/partition/src/ConnectNode.test.h b/compiler/luci/partition/src/ConnectNode.test.h
new file mode 100644
index 000000000..18bb52a20
--- /dev/null
+++ b/compiler/luci/partition/src/ConnectNode.test.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CONNECT_NODE_TEST_H__
+#define __CONNECT_NODE_TEST_H__
+
+#include "luci/ConnectNode.h"
+
+#include <luci/Service/CircleNodeClone.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <loco/IR/Graph.h>
+
+#include <initializer_list>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+namespace luci
+{
+namespace test
+{
+
+template <unsigned N> class TestIsOGraph : public TestIsGraphlet<N>, public TestOGraphlet
+{
+public:
+  TestIsOGraph() = default;
+
+public:
+  virtual void init(const std::initializer_list<ShapeU32> shape_in, const ShapeU32 shape_out)
+  {
+    if (shape_in.size() != N)
+      throw std::runtime_error("Failed to init TestIsOGraph");
+
+    auto g = TestIsGraphlet<N>::g();
+    TestIsGraphlet<N>::init(g, shape_in);
+    TestOGraphlet::init(g, shape_out);
+  }
+};
+
+template <class T> class NodeGraphletT
+{
+public:
+  virtual void init(loco::Graph *g)
+  {
+    _node = g->nodes()->create<T>();
+    _node->dtype(loco::DataType::S32);
+    _node->name("node");
+  }
+
+  T *node(void) const { return _node; }
+
+protected:
+  T *_node{nullptr};
+};
+
+template <class T> class NodeIsGraphletT
+{
+public:
+  virtual void init(loco::Graph *g, uint32_t n)
+  {
+    _node = g->nodes()->create<T>(n);
+    _node->dtype(loco::DataType::S32);
+    _node->name("node");
+  }
+
+  T *node(void) const { return _node; }
+
+protected:
+  T *_node{nullptr};
+};
+
+template <class T> class NodeIsOsGraphletT
+{
+public:
+  virtual void init(loco::Graph *g, uint32_t n, uint32_t m)
+  {
+    _node = g->nodes()->create<T>(n, m);
+    _node->dtype(loco::DataType::S32);
+    _node->name("node");
+  }
+
+  T *node(void) const { return _node; }
+
+protected:
+  T *_node{nullptr};
+};
+
+template <unsigned N, unsigned M>
+class TestIsOsGraph : public TestIsGraphlet<N>, public TestOsGraphlet<M>
+{
+public:
+  TestIsOsGraph() = default;
+
+public:
+  virtual void init(const std::initializer_list<ShapeU32> shape_in,
+                    const std::initializer_list<ShapeU32> shape_out)
+  {
+    if (shape_in.size() != N)
+      throw std::runtime_error("Failed to init TestIsOsGraph");
+    if (shape_out.size() != M)
+      throw std::runtime_error("Failed to init TestIsOsGraph");
+
+    auto g = TestIsGraphlet<N>::g();
+    TestIsGraphlet<N>::init(g, shape_in);
+    TestOsGraphlet<M>::init(g, shape_out);
+  }
+};
+
+/**
+ * @brief ConnectionTestHelper provides common framework for testing
+ *        cloned CircleNode connection
+ */
+class ConnectionTestHelper
+{
+public:
+  ConnectionTestHelper() { _graph_clone = loco::make_graph(); }
+
+public:
+  template <unsigned N> void prepare_inputs(TestIsOGraph<N> *isograph)
+  {
+    assert(N == isograph->num_inputs());
+
+    for (uint32_t i = 0; i < N; ++i)
+    {
+      auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+      luci::copy_common_attributes(isograph->input(i), input);
+      _clonectx.emplace(isograph->input(i), input);
+      _inputs.push_back(input);
+    }
+  }
+
+  template <unsigned N, unsigned M> void prepare_inputs(TestIsOsGraph<N, M> *isosgraph)
+  {
+    assert(N == isosgraph->num_inputs());
+    assert(M == isosgraph->num_outputs());
+
+    for (uint32_t i = 0; i < N; ++i)
+    {
+      auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+      luci::copy_common_attributes(isosgraph->input(i), input);
+      _clonectx.emplace(isosgraph->input(i), input);
+      _inputs.push_back(input);
+    }
+  }
+
+  /**
+   * @note although there is only one input, method name has 's' to make test simple
+   */
+  void prepare_inputs(TestIOGraph *isograph)
+  {
+    assert(1 == isograph->num_inputs());
+
+    auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+    luci::copy_common_attributes(isograph->input(), input);
+    _clonectx.emplace(isograph->input(), input);
+    _inputs.push_back(input);
+  }
+
+  /**
+   * @note prepare_inputs_miss is for negative testing
+   */
+  template <unsigned N> void prepare_inputs_miss(TestIsOGraph<N> *isograph)
+  {
+    assert(N == isograph->num_inputs());
+
+    for (uint32_t i = 0; i < N; ++i)
+    {
+      auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+      luci::copy_common_attributes(isograph->input(i), input);
+      if (i != 0)
+        _clonectx.emplace(isograph->input(i), input);
+      _inputs.push_back(input);
+    }
+  }
+
+  template <unsigned N, unsigned M> void prepare_inputs_miss(TestIsOsGraph<N, M> *isograph)
+  {
+    assert(N == isograph->num_inputs());
+    assert(M == isograph->num_outputs());
+
+    for (uint32_t i = 0; i < N; ++i)
+    {
+      auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+      luci::copy_common_attributes(isograph->input(i), input);
+      if (i != 0)
+        _clonectx.emplace(isograph->input(i), input);
+      _inputs.push_back(input);
+    }
+  }
+
+  void prepare_inputs_miss(TestIOGraph *isograph)
+  {
+    assert(1 == isograph->num_inputs());
+
+    auto *input = _graph_clone->nodes()->create<luci::CircleInput>();
+    luci::copy_common_attributes(isograph->input(), input);
+    // _clonectx.emplace() is NOT called on purpose
+    _inputs.push_back(input);
+  }
+
+  void clone_connect(luci::CircleNode *node, luci::CircleNode *clone)
+  {
+    _clonectx.emplace(node, clone);
+
+    luci::clone_connect(node, _clonectx);
+  }
+
+public:
+  loco::Graph *graph_clone(void) { return _graph_clone.get(); }
+
+  luci::CircleNode *inputs(uint32_t idx) { return _inputs.at(idx); }
+
+protected:
+  luci::CloneContext _clonectx;
+  std::vector<luci::CircleInput *> _inputs;
+  std::unique_ptr<loco::Graph> _graph_clone; // graph for clones
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __CONNECT_NODE_TEST_H__
diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.cpp
new file mode 100644
index 000000000..a7fbc37d1
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAbs.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+/**
+ * @note This method and all other connect() are just to reduce LOC of ConnectNode class
+ */
+void connect(luci::ConnectNode *cn, const luci::CircleAbs *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleAbs *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleAbs *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp
new file mode 100644
index 000000000..ac805c1af
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleAbs>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Abs)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAbs *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAbs *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Abs_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAbs *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAbs *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.cpp
new file mode 100644
index 000000000..0754be626
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAdd.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleAdd *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleAdd *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleAdd *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
new file mode 100644
index 000000000..99ae52c54
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleAdd>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleAdd>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Add)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Add_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAdd *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.cpp
new file mode 100644
index 000000000..90aaeee3a
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAddN.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleAddN *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleAddN *>(cn->find_clone(node));
+
+  uint32_t num_inputs = cloned->arity();
+  for (uint32_t i = 0; i < num_inputs; ++i)
+  {
+    luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->inputs(i));
+
+    cloned->inputs(i, cn->find_clone(input));
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleAddN *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp
new file mode 100644
index 000000000..37743d3a3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeIsGraphletT<luci::CircleAddN>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g(), 3);
+
+    for (uint32_t i = 0; i < 3; ++i)
+    {
+      node()->inputs(i, input(i));
+    }
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_AddN)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAddN *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAddN *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_AddN_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAddN *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAddN *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp
new file mode 100644
index 000000000..99b30d38f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleArgMax *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleArgMax *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *dimension = loco::must_cast<luci::CircleNode *>(node->dimension());
+
+  cloned->input(cn->find_clone(input));
+  cloned->dimension(cn->find_clone(dimension));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleArgMax *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp
new file mode 100644
index 000000000..77248e07e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleArgMax>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->dimension(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ArgMax)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleArgMax *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleArgMax *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ArgMax_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleArgMax *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleArgMax *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp
new file mode 100644
index 000000000..1bb3d84e7
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleArgMin *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleArgMin *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *dimension = loco::must_cast<luci::CircleNode *>(node->dimension());
+
+  cloned->input(cn->find_clone(input));
+  cloned->dimension(cn->find_clone(dimension));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleArgMin *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp
new file mode 100644
index 000000000..ed0cf030c
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleArgMin>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->dimension(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ArgMin)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleArgMin *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleArgMin *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ArgMin_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleArgMin *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleArgMin *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp
new file mode 100644
index 000000000..1df86c7be
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleAveragePool2D *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleAveragePool2D *>(cn->find_clone(node));
+
+  luci::CircleNode *value = loco::must_cast<luci::CircleNode *>(node->value());
+
+  cloned->value(cn->find_clone(value));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleAveragePool2D *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp
new file mode 100644
index 000000000..266120b92
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleAveragePool2D>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleAveragePool2D>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _node->padding(luci::Padding::VALID);
+  }
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->value(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_AveragePool2D)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAveragePool2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAveragePool2D *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_AveragePool2D_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAveragePool2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleAveragePool2D *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp
new file mode 100644
index 000000000..6d50f0e31
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleBCQFullyConnected *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleBCQFullyConnected *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *weights_scales = loco::must_cast<luci::CircleNode *>(node->weights_scales());
+  luci::CircleNode *weights_binary = loco::must_cast<luci::CircleNode *>(node->weights_binary());
+  luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias());
+  luci::CircleNode *weights_clusters =
+    loco::must_cast<luci::CircleNode *>(node->weights_clusters());
+
+  cloned->input(cn->find_clone(input));
+  cloned->weights_scales(cn->find_clone(weights_scales));
+  cloned->weights_binary(cn->find_clone(weights_binary));
+  cloned->bias(cn->find_clone(bias));
+  cloned->weights_clusters(cn->find_clone(weights_clusters));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleBCQFullyConnected *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp
new file mode 100644
index 000000000..2191f5b0a
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleBCQFullyConnected>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleBCQFullyConnected>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<5>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<5>::init({shape, shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->weights_scales(input(1));
+    node()->weights_binary(input(2));
+    node()->bias(input(3));
+    node()->weights_clusters(input(4));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_BCQFullyConnected)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBCQFullyConnected *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBCQFullyConnected *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(5, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+  ASSERT_EQ(cth.inputs(4), clone->arg(4));
+}
+
+TEST(ConnectNodeTest, connect_BCQFullyConnected_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBCQFullyConnected *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBCQFullyConnected *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp
new file mode 100644
index 000000000..a9e810a27
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleBCQGather *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleBCQGather *>(cn->find_clone(node));
+
+  luci::CircleNode *input_scales = loco::must_cast<luci::CircleNode *>(node->input_scales());
+  luci::CircleNode *input_binary = loco::must_cast<luci::CircleNode *>(node->input_binary());
+  luci::CircleNode *indices = loco::must_cast<luci::CircleNode *>(node->indices());
+  luci::CircleNode *input_clusters = loco::must_cast<luci::CircleNode *>(node->input_clusters());
+
+  cloned->input_scales(cn->find_clone(input_scales));
+  cloned->input_binary(cn->find_clone(input_binary));
+  cloned->indices(cn->find_clone(indices));
+  cloned->input_clusters(cn->find_clone(input_clusters));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleBCQGather *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp
new file mode 100644
index 000000000..0324d85e0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleBCQGather>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<4>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<4>::init({shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input_scales(input(0));
+    node()->input_binary(input(1));
+    node()->indices(input(2));
+    node()->input_clusters(input(3));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_BCQGather)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBCQGather *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBCQGather *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(4, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+}
+
+TEST(ConnectNodeTest, connect_BCQGather_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBCQGather *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBCQGather *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp
new file mode 100644
index 000000000..5a459e78c
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleBatchMatMul *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleBatchMatMul *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleBatchMatMul *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp
new file mode 100644
index 000000000..e6d26a6a1
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleBatchMatMul>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_BatchMatMul)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBatchMatMul *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBatchMatMul *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_BatchMatMul_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBatchMatMul *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBatchMatMul *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp
new file mode 100644
index 000000000..40b8f7052
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleBatchToSpaceND *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleBatchToSpaceND *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *block_shape = loco::must_cast<luci::CircleNode *>(node->block_shape());
+  luci::CircleNode *crops = loco::must_cast<luci::CircleNode *>(node->crops());
+
+  cloned->input(cn->find_clone(input));
+  cloned->block_shape(cn->find_clone(block_shape));
+  cloned->crops(cn->find_clone(crops));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleBatchToSpaceND *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp
new file mode 100644
index 000000000..e9cb350b8
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleBatchToSpaceND>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->block_shape(input(1));
+    node()->crops(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_BatchToSpaceND)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBatchToSpaceND *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBatchToSpaceND *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_BatchToSpaceND_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBatchToSpaceND *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleBatchToSpaceND *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleCast.cpp b/compiler/luci/partition/src/Nodes/CircleCast.cpp
new file mode 100644
index 000000000..e1301aa06
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCast.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleCast *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleCast *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleCast *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp
new file mode 100644
index 000000000..d7b679aa2
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleCast>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Cast)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCast *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCast *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Cast_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCast *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCast *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.cpp
new file mode 100644
index 000000000..e7b5f5a3f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCeil.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleCeil *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleCeil *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleCeil *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp
new file mode 100644
index 000000000..cb0364844
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleCeil>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Ceil)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCeil *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCeil *>(node));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Ceil_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCeil *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCeil *>(node));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp
new file mode 100644
index 000000000..d895685f0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleConcatenation *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleConcatenation *>(cn->find_clone(node));
+
+  uint32_t num_inputs = cloned->numValues();
+  for (uint32_t i = 0; i < num_inputs; ++i)
+  {
+    luci::CircleNode *value = loco::must_cast<luci::CircleNode *>(node->values(i));
+
+    cloned->values(i, cn->find_clone(value));
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleConcatenation *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp
new file mode 100644
index 000000000..b5c05e25d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeIsGraphletT<luci::CircleConcatenation>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, uint32_t n) override
+  {
+    NodeIsGraphletT<luci::CircleConcatenation>::init(g, n);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g(), 3);
+
+    for (uint32_t i = 0; i < 3; ++i)
+    {
+      node()->values(i, input(i));
+    }
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Concatenation)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleConcatenation *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleConcatenation *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_Concatenation_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleConcatenation *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleConcatenation *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleConst.cpp b/compiler/luci/partition/src/Nodes/CircleConst.cpp
new file mode 100644
index 000000000..b88f5ef4e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleConst.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleConst *)
+{
+  // Nothing to do
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp
new file mode 100644
index 000000000..ca9cce18f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleConv2D *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleConv2D *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *filter = loco::must_cast<luci::CircleNode *>(node->filter());
+  luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias());
+
+  cloned->input(cn->find_clone(input));
+  cloned->filter(cn->find_clone(filter));
+  cloned->bias(cn->find_clone(bias));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleConv2D *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp
new file mode 100644
index 000000000..4596d9618
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleConv2D>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleConv2D>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _node->padding(luci::Padding::VALID);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->filter(input(1));
+    node()->bias(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Conv2D)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleConv2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleConv2D *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_Conv2D_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleConv2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleConv2D *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleCos.cpp b/compiler/luci/partition/src/Nodes/CircleCos.cpp
new file mode 100644
index 000000000..76b1baac3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCos.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleCos *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleCos *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleCos *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp
new file mode 100644
index 000000000..ba806a3f9
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleCos>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Cos)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCos *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCos *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Cos_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCos *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCos *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.cpp
new file mode 100644
index 000000000..cc1604876
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCustom.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleCustom *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleCustom *>(cn->find_clone(node));
+
+  uint32_t numInputs = cloned->numInputs();
+  for (uint32_t i = 0; i < numInputs; ++i)
+  {
+    luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->inputs(i));
+
+    cloned->inputs(i, cn->find_clone(input));
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleCustom *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp
new file mode 100644
index 000000000..f7fe86674
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+/**
+ * @note Does not use template like others as only Custom have both multiple in/out
+ */
+class NodeGraphlet
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  virtual void init(loco::Graph *g, uint32_t in, uint32_t out)
+  {
+    _node = g->nodes()->create<luci::CircleCustom>(in, out);
+    _node->dtype(loco::DataType::S32);
+    _node->name("node");
+  }
+
+  luci::CircleCustom *node(void) const { return _node; }
+
+protected:
+  luci::CircleCustom *_node = nullptr;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g(), 3, 3);
+
+    for (uint32_t i = 0; i < 3; ++i)
+    {
+      node()->inputs(i, input(i));
+    }
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Custom)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCustom *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCustom *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_Custom_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCustom *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCustom *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp
new file mode 100644
index 000000000..0d83cffaa
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleCustomOut *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleCustomOut *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleCustomOut *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp
new file mode 100644
index 000000000..ddd4e93f2
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleCustomOut>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_CustomOut)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCustomOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCustomOut *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_CustomOut_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCustomOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleCustomOut *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..cfb236a5d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDensify *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleDensify *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDensify *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..94076a8db
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDensify>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Densify)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Densify_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp
new file mode 100644
index 000000000..c044b4c42
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDepthToSpace *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleDepthToSpace *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDepthToSpace *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp
new file mode 100644
index 000000000..1b61a3517
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDepthToSpace>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_DepthToSpace)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDepthToSpace *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDepthToSpace *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_DepthToSpace_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDepthToSpace *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDepthToSpace *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp
new file mode 100644
index 000000000..2bd9ab5ca
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDepthwiseConv2D *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleDepthwiseConv2D *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *filter = loco::must_cast<luci::CircleNode *>(node->filter());
+  luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias());
+
+  cloned->input(cn->find_clone(input));
+  cloned->filter(cn->find_clone(filter));
+  cloned->bias(cn->find_clone(bias));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDepthwiseConv2D *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..02976a488
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDepthwiseConv2D>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleDepthwiseConv2D>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _node->padding(luci::Padding::VALID);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->filter(input(1));
+    node()->bias(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_DepthwiseConv2D)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDepthwiseConv2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDepthwiseConv2D *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_DepthwiseConv2D_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDepthwiseConv2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDepthwiseConv2D *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp
new file mode 100644
index 000000000..ac2642bc1
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDequantize *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleDequantize *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDequantize *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp
new file mode 100644
index 000000000..d3a43d374
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDequantize>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Dequantize)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDequantize *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDequantize *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Dequantize_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDequantize *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDequantize *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.cpp
new file mode 100644
index 000000000..8941a4196
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDiv.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDiv *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleDiv *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDiv *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
new file mode 100644
index 000000000..7900beafc
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDiv>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleDiv>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Div)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Div_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDiv *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleElu.cpp b/compiler/luci/partition/src/Nodes/CircleElu.cpp
new file mode 100644
index 000000000..b77226574
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleElu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleElu *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleElu *>(cn->find_clone(node));
+
+  luci::CircleNode *features = loco::must_cast<luci::CircleNode *>(node->features());
+
+  cloned->features(cn->find_clone(features));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleElu *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp
new file mode 100644
index 000000000..20b205048
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleElu>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->features(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Elu)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleElu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleElu *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Elu_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleElu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleElu *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.cpp
new file mode 100644
index 000000000..2dc0e759b
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleEqual.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleEqual *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleEqual *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleEqual *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp
new file mode 100644
index 000000000..c0d3bd915
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleEqual>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Equal)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleEqual *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleEqual *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Equal_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleEqual *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleEqual *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleExp.cpp b/compiler/luci/partition/src/Nodes/CircleExp.cpp
new file mode 100644
index 000000000..c1da7908a
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleExp.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleExp *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleExp *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleExp *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp
new file mode 100644
index 000000000..286f205bf
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleExp>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Exp)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleExp *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleExp *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Exp_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleExp *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleExp *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp
new file mode 100644
index 000000000..a6ce6495c
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleExpandDims *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleExpandDims *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *axis = loco::must_cast<luci::CircleNode *>(node->axis());
+
+  cloned->input(cn->find_clone(input));
+  cloned->axis(cn->find_clone(axis));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleExpandDims *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp
new file mode 100644
index 000000000..37af10f52
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleExpandDims>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->axis(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ExpandDims)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleExpandDims *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleExpandDims *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ExpandDims_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleExpandDims *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleExpandDims *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp
new file mode 100644
index 000000000..5dfaee1b5
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleFakeQuant *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleFakeQuant *>(cn->find_clone(node));
+
+  luci::CircleNode *inputs = loco::must_cast<luci::CircleNode *>(node->inputs());
+
+  cloned->inputs(cn->find_clone(inputs));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleFakeQuant *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp
new file mode 100644
index 000000000..2a2ec0cff
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleFakeQuant>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->inputs(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_FakeQuant)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFakeQuant *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFakeQuant *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_FakeQuant_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFakeQuant *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFakeQuant *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleFill.cpp b/compiler/luci/partition/src/Nodes/CircleFill.cpp
new file mode 100644
index 000000000..32688cd9b
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFill.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleFill *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleFill *>(cn->find_clone(node));
+
+  luci::CircleNode *dims = loco::must_cast<luci::CircleNode *>(node->dims());
+  luci::CircleNode *value = loco::must_cast<luci::CircleNode *>(node->value());
+
+  cloned->dims(cn->find_clone(dims));
+  cloned->value(cn->find_clone(value));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleFill *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp
new file mode 100644
index 000000000..4b3872a80
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleFill>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->dims(input(0));
+    node()->value(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Fill)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFill *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFill *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Fill_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFill *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFill *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.cpp
new file mode 100644
index 000000000..f7409a221
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFloor.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleFloor *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleFloor *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleFloor *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp
new file mode 100644
index 000000000..883d36256
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleFloor>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Floor)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloor *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloor *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Floor_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloor *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloor *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp
new file mode 100644
index 000000000..57e435c23
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleFloorDiv *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleFloorDiv *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleFloorDiv *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp
new file mode 100644
index 000000000..1eb603c5d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleFloorDiv>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_FloorDiv)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloorDiv *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloorDiv *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_FloorDiv_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloorDiv *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloorDiv *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp
new file mode 100644
index 000000000..1b942d200
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleFloorMod *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleFloorMod *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleFloorMod *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp
new file mode 100644
index 000000000..680bf1680
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleFloorMod>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_FloorMod)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloorMod *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloorMod *>(node));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_FloorMod_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloorMod *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFloorMod *>(node));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp
new file mode 100644
index 000000000..206b47aec
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleFullyConnected *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleFullyConnected *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *weights = loco::must_cast<luci::CircleNode *>(node->weights());
+  luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias());
+
+  cloned->input(cn->find_clone(input));
+  cloned->weights(cn->find_clone(weights));
+  cloned->bias(cn->find_clone(bias));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleFullyConnected *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp
new file mode 100644
index 000000000..39eea5571
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleFullyConnected>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleFullyConnected>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _node->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->weights(input(1));
+    node()->bias(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_FullyConnected)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFullyConnected *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFullyConnected *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_FullyConnected_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFullyConnected *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleFullyConnected *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleGather.cpp b/compiler/luci/partition/src/Nodes/CircleGather.cpp
new file mode 100644
index 000000000..4f059cbe4
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGather.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleGather *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleGather *>(cn->find_clone(node));
+
+  luci::CircleNode *params = loco::must_cast<luci::CircleNode *>(node->params());
+  luci::CircleNode *indices = loco::must_cast<luci::CircleNode *>(node->indices());
+
+  cloned->params(cn->find_clone(params));
+  cloned->indices(cn->find_clone(indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleGather *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp
new file mode 100644
index 000000000..f427e0456
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleGather>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->params(input(0));
+    node()->indices(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Gather)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGather *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGather *>(node));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Gather_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGather *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGather *>(node));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp
new file mode 100644
index 000000000..6a9c3b47f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleGatherNd *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleGatherNd *>(cn->find_clone(node));
+
+  luci::CircleNode *params = loco::must_cast<luci::CircleNode *>(node->params());
+  luci::CircleNode *indices = loco::must_cast<luci::CircleNode *>(node->indices());
+
+  cloned->params(cn->find_clone(params));
+  cloned->indices(cn->find_clone(indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleGatherNd *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp
new file mode 100644
index 000000000..0207e917d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleGatherNd>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->params(input(0));
+    node()->indices(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_GatherNd)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGatherNd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGatherNd *>(node));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_GatherNd_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGatherNd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGatherNd *>(node));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleGelu.cpp b/compiler/luci/partition/src/Nodes/CircleGelu.cpp
new file mode 100644
index 000000000..74ef51cc8
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGelu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleGelu *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleGelu *>(cn->find_clone(node));
+
+  luci::CircleNode *features = loco::must_cast<luci::CircleNode *>(node->features());
+
+  cloned->features(cn->find_clone(features));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleGelu *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleGelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleGelu.test.cpp
new file mode 100644
index 000000000..ebef3f791
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGelu.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleGelu>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->features(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Gelu)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGelu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGelu *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Gelu_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGelu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGelu *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.cpp
new file mode 100644
index 000000000..9f4b18fde
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGreater.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleGreater *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleGreater *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleGreater *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp
new file mode 100644
index 000000000..61d1f5957
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleGreater>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Greater)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGreater *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGreater *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Greater_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGreater *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGreater *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp
new file mode 100644
index 000000000..76130a843
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleGreaterEqual *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleGreaterEqual *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleGreaterEqual *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp
new file mode 100644
index 000000000..7e4e1ef74
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleGreaterEqual>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_GreaterEqual)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGreaterEqual *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGreaterEqual *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_GreaterEqual_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGreaterEqual *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleGreaterEqual *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleHardSwish.cpp b/compiler/luci/partition/src/Nodes/CircleHardSwish.cpp
new file mode 100644
index 000000000..d6903f305
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleHardSwish.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleHardSwish *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleHardSwish *>(cn->find_clone(node));
+
+  luci::CircleNode *features = loco::must_cast<luci::CircleNode *>(node->features());
+
+  cloned->features(cn->find_clone(features));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleHardSwish *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleHardSwish.test.cpp b/compiler/luci/partition/src/Nodes/CircleHardSwish.test.cpp
new file mode 100644
index 000000000..770597313
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleHardSwish.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleHardSwish>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->features(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_HardSwish)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleHardSwish *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleHardSwish *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_HardSwish_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleHardSwish *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleHardSwish *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleIf.cpp b/compiler/luci/partition/src/Nodes/CircleIf.cpp
new file mode 100644
index 000000000..45e4ec48b
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleIf.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleIf *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleIf *>(cn->find_clone(node));
+
+  luci::CircleNode *cond = loco::must_cast<luci::CircleNode *>(node->cond());
+
+  cloned->cond(cn->find_clone(cond));
+
+  auto input_count = node->input_count();
+  for (uint32_t in = 0; in < input_count; ++in)
+  {
+    luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input(in));
+
+    cloned->input(in, cn->find_clone(input));
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleIf *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp
new file mode 100644
index 000000000..cbb766221
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeIsOsGraphletT<luci::CircleIf>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, uint32_t n, uint32_t m) override
+  {
+    // cond() will take one input
+    NodeIsOsGraphletT::init(g, n - 1, m);
+  }
+};
+
+class TestNodeGraph : public TestIsOsGraph<3, 1>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOsGraph<3, 1>::init({shape, shape, shape}, {shape});
+    NodeGraphlet::init(g(), 3, 1);
+
+    node()->cond(input(0));
+    node()->input(0, input(1));
+    node()->input(1, input(2));
+
+    output(0)->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_If)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs<3, 1>(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleIf *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleIf *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  // aritiy(3) = cond + input(2)
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_If_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss<3, 1>(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleIf *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleIf *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp
new file mode 100644
index 000000000..2eb5dda1f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleIfOut *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleIfOut *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleIfOut *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp
new file mode 100644
index 000000000..ec2dde3b2
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleIfOut>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_IfOut)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleIfOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleIfOut *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_IfOut_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleIfOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleIfOut *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp
new file mode 100644
index 000000000..f64ffd8b4
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleInstanceNorm *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleInstanceNorm *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *gamma = loco::must_cast<luci::CircleNode *>(node->gamma());
+  luci::CircleNode *beta = loco::must_cast<luci::CircleNode *>(node->beta());
+
+  cloned->input(cn->find_clone(input));
+  cloned->gamma(cn->find_clone(gamma));
+  cloned->beta(cn->find_clone(beta));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleInstanceNorm *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp
new file mode 100644
index 000000000..4363c6c18
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleInstanceNorm>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleInstanceNorm>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->gamma(input(1));
+    node()->beta(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_InstanceNorm)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleInstanceNorm *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleInstanceNorm *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_InstanceNorm_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleInstanceNorm *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleInstanceNorm *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp
new file mode 100644
index 000000000..df26930ec
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleL2Normalize *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleL2Normalize *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleL2Normalize *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp
new file mode 100644
index 000000000..b114a15f0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleL2Normalize>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleL2Normalize>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_L2Normalize)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleL2Normalize *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleL2Normalize *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_L2Normalize_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleL2Normalize *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleL2Normalize *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp
new file mode 100644
index 000000000..1eacddb62
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleL2Pool2D *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleL2Pool2D *>(cn->find_clone(node));
+
+  luci::CircleNode *value = loco::must_cast<luci::CircleNode *>(node->value());
+
+  cloned->value(cn->find_clone(value));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleL2Pool2D *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp
new file mode 100644
index 000000000..22f99d5ef
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleL2Pool2D>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleL2Pool2D>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _node->padding(luci::Padding::VALID);
+  }
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->value(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_L2Pool2D)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleL2Pool2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleL2Pool2D *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_L2Pool2D_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleL2Pool2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleL2Pool2D *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp
new file mode 100644
index 000000000..1702ddeb1
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLeakyRelu *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLeakyRelu *>(cn->find_clone(node));
+
+  luci::CircleNode *features = loco::must_cast<luci::CircleNode *>(node->features());
+
+  cloned->features(cn->find_clone(features));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLeakyRelu *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp
new file mode 100644
index 000000000..71dc55ea0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLeakyRelu>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->features(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_LeakyRelu)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLeakyRelu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLeakyRelu *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_LeakyRelu_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLeakyRelu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLeakyRelu *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLess.cpp b/compiler/luci/partition/src/Nodes/CircleLess.cpp
new file mode 100644
index 000000000..52726f9be
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLess.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLess *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLess *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLess *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp
new file mode 100644
index 000000000..c5d194efe
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLess>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Less)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLess *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLess *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Less_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLess *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLess *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp
new file mode 100644
index 000000000..e9a3c412b
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLessEqual *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLessEqual *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLessEqual *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp
new file mode 100644
index 000000000..29f4ababa
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLessEqual>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_LessEqual)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLessEqual *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLessEqual *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_LessEqual_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLessEqual *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLessEqual *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp
new file mode 100644
index 000000000..7a00bf94f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLocalResponseNormalization *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLocalResponseNormalization *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLocalResponseNormalization *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp
new file mode 100644
index 000000000..5e5723817
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLocalResponseNormalization>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_LocalResponseNormalization)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLocalResponseNormalization *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLocalResponseNormalization *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_LocalResponseNormalization_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLocalResponseNormalization *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLocalResponseNormalization *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLog.cpp b/compiler/luci/partition/src/Nodes/CircleLog.cpp
new file mode 100644
index 000000000..676d22fc0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLog.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLog *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLog *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLog *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp
new file mode 100644
index 000000000..0a2b97538
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLog>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Log)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLog *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLog *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Log_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLog *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLog *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp
new file mode 100644
index 000000000..c67b08f0f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLogSoftmax *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLogSoftmax *>(cn->find_clone(node));
+
+  luci::CircleNode *logits = loco::must_cast<luci::CircleNode *>(node->logits());
+
+  cloned->logits(cn->find_clone(logits));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLogSoftmax *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp
new file mode 100644
index 000000000..b6daeb781
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLogSoftmax>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->logits(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_LogSoftmax)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogSoftmax *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogSoftmax *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_LogSoftmax_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogSoftmax *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogSoftmax *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp
new file mode 100644
index 000000000..1498d85ec
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLogicalAnd *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLogicalAnd *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLogicalAnd *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp
new file mode 100644
index 000000000..0b9513626
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLogicalAnd>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_LogicalAnd)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalAnd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalAnd *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_LogicalAnd_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalAnd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalAnd *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp
new file mode 100644
index 000000000..f9c077e4e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLogicalNot *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLogicalNot *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLogicalNot *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp
new file mode 100644
index 000000000..88dff3651
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLogicalNot>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_LogicalNot)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalNot *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalNot *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_LogicalNot_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalNot *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalNot *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp
new file mode 100644
index 000000000..59592e41d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLogicalOr *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLogicalOr *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLogicalOr *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp
new file mode 100644
index 000000000..35f8029c0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLogicalOr>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_LogicalOr)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalOr *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalOr *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_LogicalOr_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalOr *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogicalOr *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp
new file mode 100644
index 000000000..804597bed
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleLogistic *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleLogistic *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleLogistic *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp
new file mode 100644
index 000000000..241d84040
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleLogistic>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Logistic)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogistic *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogistic *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Logistic_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogistic *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleLogistic *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp
new file mode 100644
index 000000000..297e9f2cc
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMatrixDiag *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMatrixDiag *>(cn->find_clone(node));
+
+  luci::CircleNode *diagonal = loco::must_cast<luci::CircleNode *>(node->diagonal());
+
+  cloned->diagonal(cn->find_clone(diagonal));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMatrixDiag *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp
new file mode 100644
index 000000000..472cab8c8
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMatrixDiag>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->diagonal(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_MatrixDiag)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMatrixDiag *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMatrixDiag *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_MatrixDiag_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMatrixDiag *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMatrixDiag *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp
new file mode 100644
index 000000000..b327aacad
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMatrixSetDiag *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMatrixSetDiag *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *diagonal = loco::must_cast<luci::CircleNode *>(node->diagonal());
+
+  cloned->input(cn->find_clone(input));
+  cloned->diagonal(cn->find_clone(diagonal));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMatrixSetDiag *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp
new file mode 100644
index 000000000..4ff797c43
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMatrixSetDiag>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->diagonal(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_MatrixSetDiag)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMatrixSetDiag *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMatrixSetDiag *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_MatrixSetDiag_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMatrixSetDiag *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMatrixSetDiag *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp
new file mode 100644
index 000000000..dee90e5c0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMaxPool2D *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMaxPool2D *>(cn->find_clone(node));
+
+  luci::CircleNode *value = loco::must_cast<luci::CircleNode *>(node->value());
+
+  cloned->value(cn->find_clone(value));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMaxPool2D *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp
new file mode 100644
index 000000000..949e0d724
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMaxPool2D>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleMaxPool2D>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _node->padding(luci::Padding::VALID);
+  }
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->value(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_MaxPool2D)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMaxPool2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMaxPool2D *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_MaxPool2D_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMaxPool2D *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMaxPool2D *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp
new file mode 100644
index 000000000..459917e3e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMaximum *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMaximum *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMaximum *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp
new file mode 100644
index 000000000..e6a6d5741
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMaximum>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Maximum)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMaximum *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMaximum *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Maximum_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMaximum *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMaximum *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMean.cpp b/compiler/luci/partition/src/Nodes/CircleMean.cpp
new file mode 100644
index 000000000..c704d0054
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMean.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMean *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMean *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *reduction_indices =
+    loco::must_cast<luci::CircleNode *>(node->reduction_indices());
+
+  cloned->input(cn->find_clone(input));
+  cloned->reduction_indices(cn->find_clone(reduction_indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMean *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp
new file mode 100644
index 000000000..838d7aea2
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMean>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->reduction_indices(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Mean)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMean *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMean *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Mean_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMean *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMean *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp
new file mode 100644
index 000000000..8958bf64a
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMinimum *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMinimum *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMinimum *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp
new file mode 100644
index 000000000..a6c86a27a
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMinimum>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Minimum)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMinimum *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMinimum *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Minimum_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMinimum *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMinimum *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp
new file mode 100644
index 000000000..91c3cb97a
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMirrorPad *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMirrorPad *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *paddings = loco::must_cast<luci::CircleNode *>(node->paddings());
+
+  cloned->input(cn->find_clone(input));
+  cloned->paddings(cn->find_clone(paddings));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMirrorPad *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp
new file mode 100644
index 000000000..b837e1012
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMirrorPad>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleMirrorPad>::init(g);
+
+    _node->mode(luci::MirrorPadMode::REFLECT);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->paddings(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_MirrorPad)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMirrorPad *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMirrorPad *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_MirrorPad_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMirrorPad *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMirrorPad *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleMul.cpp b/compiler/luci/partition/src/Nodes/CircleMul.cpp
new file mode 100644
index 000000000..12e14728c
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMul.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleMul *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleMul *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleMul *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
new file mode 100644
index 000000000..b316679f8
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleMul>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    NodeGraphletT<luci::CircleMul>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Mul)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Mul_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleMul *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.cpp
new file mode 100644
index 000000000..e9dcc45cd
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNeg.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleNeg *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleNeg *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleNeg *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp
new file mode 100644
index 000000000..ab13c9416
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleNeg>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Neg)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNeg *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNeg *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Neg_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNeg *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNeg *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp
new file mode 100644
index 000000000..88d72e12f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleNonMaxSuppressionV4 *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleNonMaxSuppressionV4 *>(cn->find_clone(node));
+
+  luci::CircleNode *boxes = loco::must_cast<luci::CircleNode *>(node->boxes());
+  luci::CircleNode *scores = loco::must_cast<luci::CircleNode *>(node->scores());
+  luci::CircleNode *max_output_size = loco::must_cast<luci::CircleNode *>(node->max_output_size());
+  luci::CircleNode *iou_threshold = loco::must_cast<luci::CircleNode *>(node->iou_threshold());
+  luci::CircleNode *score_threshold = loco::must_cast<luci::CircleNode *>(node->score_threshold());
+
+  cloned->boxes(cn->find_clone(boxes));
+  cloned->scores(cn->find_clone(scores));
+  cloned->max_output_size(cn->find_clone(max_output_size));
+  cloned->iou_threshold(cn->find_clone(iou_threshold));
+  cloned->score_threshold(cn->find_clone(score_threshold));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleNonMaxSuppressionV4 *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
new file mode 100644
index 000000000..e796a14c3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleNonMaxSuppressionV4>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<5>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<5>::init({shape, shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->boxes(input(0));
+    node()->scores(input(1));
+    node()->max_output_size(input(2));
+    node()->iou_threshold(input(3));
+    node()->score_threshold(input(4));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_NonMaxSuppressionV4)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV4 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV4 *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(5, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+  ASSERT_EQ(cth.inputs(4), clone->arg(4));
+}
+
+TEST(ConnectNodeTest, connect_NonMaxSuppressionV4_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV4 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV4 *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
new file mode 100644
index 000000000..61caa3a4c
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleNonMaxSuppressionV4Out *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleNonMaxSuppressionV4Out *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
new file mode 100644
index 000000000..eb04f2688
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleNonMaxSuppressionV4Out>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_NonMaxSuppressionV4Out)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_NonMaxSuppressionV4Out_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp
new file mode 100644
index 000000000..3b0b755a4
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleNonMaxSuppressionV5 *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleNonMaxSuppressionV5 *>(cn->find_clone(node));
+
+  luci::CircleNode *boxes = loco::must_cast<luci::CircleNode *>(node->boxes());
+  luci::CircleNode *scores = loco::must_cast<luci::CircleNode *>(node->scores());
+  luci::CircleNode *max_output_size = loco::must_cast<luci::CircleNode *>(node->max_output_size());
+  luci::CircleNode *iou_threshold = loco::must_cast<luci::CircleNode *>(node->iou_threshold());
+  luci::CircleNode *score_threshold = loco::must_cast<luci::CircleNode *>(node->score_threshold());
+  luci::CircleNode *soft_nms_sigma = loco::must_cast<luci::CircleNode *>(node->soft_nms_sigma());
+
+  cloned->boxes(cn->find_clone(boxes));
+  cloned->scores(cn->find_clone(scores));
+  cloned->max_output_size(cn->find_clone(max_output_size));
+  cloned->iou_threshold(cn->find_clone(iou_threshold));
+  cloned->score_threshold(cn->find_clone(score_threshold));
+  cloned->soft_nms_sigma(cn->find_clone(soft_nms_sigma));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleNonMaxSuppressionV5 *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
new file mode 100644
index 000000000..c9c31b315
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleNonMaxSuppressionV5>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<6>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<6>::init({shape, shape, shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->boxes(input(0));
+    node()->scores(input(1));
+    node()->max_output_size(input(2));
+    node()->iou_threshold(input(3));
+    node()->score_threshold(input(4));
+    node()->soft_nms_sigma(input(5));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_NonMaxSuppressionV5)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV5 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV5 *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(6, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+  ASSERT_EQ(cth.inputs(4), clone->arg(4));
+  ASSERT_EQ(cth.inputs(5), clone->arg(5));
+}
+
+TEST(ConnectNodeTest, connect_NonMaxSuppressionV5_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV5 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV5 *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
new file mode 100644
index 000000000..3eed260c2
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleNonMaxSuppressionV5Out *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleNonMaxSuppressionV5Out *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
new file mode 100644
index 000000000..2c5822fe3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleNonMaxSuppressionV5Out>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_NonMaxSuppressionV5Out)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_NonMaxSuppressionV5Out_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp
new file mode 100644
index 000000000..29a6a43bb
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleNotEqual *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleNotEqual *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleNotEqual *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp
new file mode 100644
index 000000000..2983e1b27
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleNotEqual>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_NotEqual)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNotEqual *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNotEqual *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_NotEqual_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNotEqual *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleNotEqual *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp
new file mode 100644
index 000000000..d172fb834
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleOneHot *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleOneHot *>(cn->find_clone(node));
+
+  luci::CircleNode *indices = loco::must_cast<luci::CircleNode *>(node->indices());
+  luci::CircleNode *depth = loco::must_cast<luci::CircleNode *>(node->depth());
+  luci::CircleNode *on_value = loco::must_cast<luci::CircleNode *>(node->on_value());
+  luci::CircleNode *off_value = loco::must_cast<luci::CircleNode *>(node->off_value());
+
+  cloned->indices(cn->find_clone(indices));
+  cloned->depth(cn->find_clone(depth));
+  cloned->on_value(cn->find_clone(on_value));
+  cloned->off_value(cn->find_clone(off_value));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleOneHot *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp
new file mode 100644
index 000000000..59780e424
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleOneHot>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<4>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<4>::init({shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->indices(input(0));
+    node()->depth(input(1));
+    node()->on_value(input(2));
+    node()->off_value(input(3));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_OneHot)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleOneHot *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleOneHot *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(4, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+}
+
+TEST(ConnectNodeTest, connect_OneHot_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleOneHot *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleOneHot *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp
new file mode 100644
index 000000000..61d7620aa
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleOutputDummy *)
+{
+  // Nothing to do
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp
new file mode 100644
index 000000000..36ce35077
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleOutputExclude *)
+{
+  // Nothing to do
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp
new file mode 100644
index 000000000..6a2325715
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CirclePRelu *node)
+{
+  auto *cloned = loco::must_cast<luci::CirclePRelu *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *alpha = loco::must_cast<luci::CircleNode *>(node->alpha());
+
+  cloned->input(cn->find_clone(input));
+  cloned->alpha(cn->find_clone(alpha));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CirclePRelu *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp
new file mode 100644
index 000000000..f2a2e2c7d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CirclePRelu>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->alpha(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_PRelu)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePRelu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePRelu *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_PRelu_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePRelu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePRelu *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CirclePack.cpp b/compiler/luci/partition/src/Nodes/CirclePack.cpp
new file mode 100644
index 000000000..d4b49bfa9
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePack.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CirclePack *node)
+{
+  auto *cloned = loco::must_cast<luci::CirclePack *>(cn->find_clone(node));
+
+  uint32_t values_count = cloned->values_count();
+  for (uint32_t i = 0; i < values_count; ++i)
+  {
+    luci::CircleNode *value = loco::must_cast<luci::CircleNode *>(node->values(i));
+
+    cloned->values(i, cn->find_clone(value));
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CirclePack *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp
new file mode 100644
index 000000000..665b137e8
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeIsGraphletT<luci::CirclePack>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g(), 3);
+
+    for (uint32_t i = 0; i < 3; ++i)
+    {
+      node()->values(i, input(i));
+    }
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Pack)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePack *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePack *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_Pack_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePack *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePack *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CirclePad.cpp b/compiler/luci/partition/src/Nodes/CirclePad.cpp
new file mode 100644
index 000000000..0a1d6f7f9
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePad.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CirclePad *node)
+{
+  auto *cloned = loco::must_cast<luci::CirclePad *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *paddings = loco::must_cast<luci::CircleNode *>(node->paddings());
+
+  cloned->input(cn->find_clone(input));
+  cloned->paddings(cn->find_clone(paddings));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CirclePad *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp
new file mode 100644
index 000000000..72f97d6a4
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CirclePad>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->paddings(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Pad)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePad *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePad *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Pad_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePad *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePad *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp
new file mode 100644
index 000000000..969cc271d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CirclePadV2 *node)
+{
+  auto *cloned = loco::must_cast<luci::CirclePadV2 *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *paddings = loco::must_cast<luci::CircleNode *>(node->paddings());
+  luci::CircleNode *constant_values = loco::must_cast<luci::CircleNode *>(node->constant_values());
+
+  cloned->input(cn->find_clone(input));
+  cloned->paddings(cn->find_clone(paddings));
+  cloned->constant_values(cn->find_clone(constant_values));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CirclePadV2 *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp
new file mode 100644
index 000000000..9829f6269
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CirclePadV2>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->paddings(input(1));
+    node()->constant_values(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_PadV2)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePadV2 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePadV2 *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_PadV2_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePadV2 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePadV2 *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CirclePow.cpp b/compiler/luci/partition/src/Nodes/CirclePow.cpp
new file mode 100644
index 000000000..ce69e7402
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePow.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CirclePow *node)
+{
+  auto *cloned = loco::must_cast<luci::CirclePow *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CirclePow *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp
new file mode 100644
index 000000000..f4e49c023
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CirclePow>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Pow)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePow *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePow *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Pow_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePow *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CirclePow *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp
new file mode 100644
index 000000000..903a94e32
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleQuantize *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleQuantize *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleQuantize *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp
new file mode 100644
index 000000000..5ca1a6baa
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleQuantize>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Quantize)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleQuantize *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleQuantize *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Quantize_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleQuantize *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleQuantize *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleRange.cpp b/compiler/luci/partition/src/Nodes/CircleRange.cpp
new file mode 100644
index 000000000..fa1a02c71
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRange.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleRange *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleRange *>(cn->find_clone(node));
+
+  luci::CircleNode *start = loco::must_cast<luci::CircleNode *>(node->start());
+  luci::CircleNode *limit = loco::must_cast<luci::CircleNode *>(node->limit());
+  luci::CircleNode *delta = loco::must_cast<luci::CircleNode *>(node->delta());
+
+  cloned->start(cn->find_clone(start));
+  cloned->limit(cn->find_clone(limit));
+  cloned->delta(cn->find_clone(delta));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleRange *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp
new file mode 100644
index 000000000..b5b0c8aa8
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleRange>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->start(input(0));
+    node()->limit(input(1));
+    node()->delta(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Range)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRange *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRange *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_Range_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRange *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRange *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleRank.cpp b/compiler/luci/partition/src/Nodes/CircleRank.cpp
new file mode 100644
index 000000000..35b4764aa
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRank.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleRank *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleRank *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleRank *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp
new file mode 100644
index 000000000..5a0a71a7e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleRank>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Rank)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRank *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRank *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Rank_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRank *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRank *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp
new file mode 100644
index 000000000..262e12ac1
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleReduceAny *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleReduceAny *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *reduction_indices =
+    loco::must_cast<luci::CircleNode *>(node->reduction_indices());
+
+  cloned->input(cn->find_clone(input));
+  cloned->reduction_indices(cn->find_clone(reduction_indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleReduceAny *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp
new file mode 100644
index 000000000..45c292073
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleReduceAny>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->reduction_indices(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ReduceAny)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceAny *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceAny *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ReduceAny_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceAny *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceAny *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp
new file mode 100644
index 000000000..d91c78e41
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleReduceMax *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleReduceMax *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *reduction_indices =
+    loco::must_cast<luci::CircleNode *>(node->reduction_indices());
+
+  cloned->input(cn->find_clone(input));
+  cloned->reduction_indices(cn->find_clone(reduction_indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleReduceMax *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp
new file mode 100644
index 000000000..2ad18f339
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleReduceMax>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->reduction_indices(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ReduceMax)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceMax *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceMax *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ReduceMax_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceMax *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceMax *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp
new file mode 100644
index 000000000..65fca6ab3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleReduceMin *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleReduceMin *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *reduction_indices =
+    loco::must_cast<luci::CircleNode *>(node->reduction_indices());
+
+  cloned->input(cn->find_clone(input));
+  cloned->reduction_indices(cn->find_clone(reduction_indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleReduceMin *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp
new file mode 100644
index 000000000..db48f54d7
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleReduceMin>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->reduction_indices(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ReduceMin)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceMin *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceMin *>(node));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ReduceMin_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceMin *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceMin *>(node));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp
new file mode 100644
index 000000000..daac168b2
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleReduceProd *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleReduceProd *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *reduction_indices =
+    loco::must_cast<luci::CircleNode *>(node->reduction_indices());
+
+  cloned->input(cn->find_clone(input));
+  cloned->reduction_indices(cn->find_clone(reduction_indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleReduceProd *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp
new file mode 100644
index 000000000..f5f69f0ff
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleReduceProd>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->reduction_indices(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ReduceProd)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceProd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceProd *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ReduceProd_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceProd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReduceProd *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.cpp
new file mode 100644
index 000000000..63ac31ba9
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRelu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleRelu *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleRelu *>(cn->find_clone(node));
+
+  luci::CircleNode *features = loco::must_cast<luci::CircleNode *>(node->features());
+
+  cloned->features(cn->find_clone(features));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleRelu *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp
new file mode 100644
index 000000000..ec4d10f09
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleRelu>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->features(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Relu)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRelu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRelu *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Relu_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRelu *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRelu *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp
new file mode 100644
index 000000000..c2956c456
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleRelu6 *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleRelu6 *>(cn->find_clone(node));
+
+  luci::CircleNode *features = loco::must_cast<luci::CircleNode *>(node->features());
+
+  cloned->features(cn->find_clone(features));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleRelu6 *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp
new file mode 100644
index 000000000..e9ecbe2e6
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleRelu6>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->features(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Relu6)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRelu6 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRelu6 *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Relu6_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRelu6 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRelu6 *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp
new file mode 100644
index 000000000..1141297da
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleReluN1To1 *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleReluN1To1 *>(cn->find_clone(node));
+
+  luci::CircleNode *features = loco::must_cast<luci::CircleNode *>(node->features());
+
+  cloned->features(cn->find_clone(features));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleReluN1To1 *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp
new file mode 100644
index 000000000..ae60a97e5
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleReluN1To1>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->features(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ReluN1To1)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReluN1To1 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReluN1To1 *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_ReluN1To1_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReluN1To1 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReluN1To1 *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.cpp
new file mode 100644
index 000000000..49f7c64a7
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReshape.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleReshape *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleReshape *>(cn->find_clone(node));
+
+  luci::CircleNode *tensor = loco::must_cast<luci::CircleNode *>(node->tensor());
+  luci::CircleNode *shape = loco::must_cast<luci::CircleNode *>(node->shape());
+
+  cloned->tensor(cn->find_clone(tensor));
+  cloned->shape(cn->find_clone(shape));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleReshape *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp
new file mode 100644
index 000000000..198cfa1b6
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleReshape>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->tensor(input(0));
+    node()->shape(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Reshape)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReshape *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReshape *>(node));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Reshape_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReshape *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReshape *>(node));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp
new file mode 100644
index 000000000..41fdedf2a
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleResizeBilinear *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleResizeBilinear *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *size = loco::must_cast<luci::CircleNode *>(node->size());
+
+  cloned->input(cn->find_clone(input));
+  cloned->size(cn->find_clone(size));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleResizeBilinear *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp
new file mode 100644
index 000000000..437e448a6
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleResizeBilinear>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->size(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ResizeBilinear)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleResizeBilinear *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleResizeBilinear *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ResizeBilinear_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleResizeBilinear *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleResizeBilinear *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..567db4961
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleResizeNearestNeighbor *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleResizeNearestNeighbor *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *size = loco::must_cast<luci::CircleNode *>(node->size());
+
+  cloned->input(cn->find_clone(input));
+  cloned->size(cn->find_clone(size));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleResizeNearestNeighbor *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp
new file mode 100644
index 000000000..5dc99a385
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleResizeNearestNeighbor>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->size(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ResizeNearestNeighbor)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleResizeNearestNeighbor *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleResizeNearestNeighbor *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ResizeNearestNeighbor_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleResizeNearestNeighbor *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleResizeNearestNeighbor *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp
new file mode 100644
index 000000000..348cdbb78
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleReverseSequence *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleReverseSequence *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *seq_lengths = loco::must_cast<luci::CircleNode *>(node->seq_lengths());
+
+  cloned->input(cn->find_clone(input));
+  cloned->seq_lengths(cn->find_clone(seq_lengths));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleReverseSequence *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp
new file mode 100644
index 000000000..751910326
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleReverseSequence>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->seq_lengths(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ReverseSequence)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReverseSequence *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReverseSequence *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ReverseSequence_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReverseSequence *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReverseSequence *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp
new file mode 100644
index 000000000..4b8c4a444
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleReverseV2 *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleReverseV2 *>(cn->find_clone(node));
+
+  luci::CircleNode *tensor = loco::must_cast<luci::CircleNode *>(node->tensor());
+  luci::CircleNode *axis = loco::must_cast<luci::CircleNode *>(node->axis());
+
+  cloned->tensor(cn->find_clone(tensor));
+  cloned->axis(cn->find_clone(axis));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleReverseV2 *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp
new file mode 100644
index 000000000..351c6f2c0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleReverseV2>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->tensor(input(0));
+    node()->axis(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ReverseV2)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReverseV2 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReverseV2 *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_ReverseV2_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReverseV2 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleReverseV2 *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleRound.cpp b/compiler/luci/partition/src/Nodes/CircleRound.cpp
new file mode 100644
index 000000000..97d002870
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRound.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleRound *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleRound *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleRound *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp
new file mode 100644
index 000000000..02f335dc3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleRound>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Round)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRound *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRound *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Round_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRound *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRound *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
new file mode 100644
index 000000000..44abd5ef7
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleRsqrt *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleRsqrt *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleRsqrt *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp
new file mode 100644
index 000000000..39ae1f8f3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleRsqrt>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Rsqrt)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRsqrt *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRsqrt *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Rsqrt_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRsqrt *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleRsqrt *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
new file mode 100644
index 000000000..e2b99c49d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSVDF *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSVDF *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *weight_feature = loco::must_cast<luci::CircleNode *>(node->weight_feature());
+  luci::CircleNode *weight_time = loco::must_cast<luci::CircleNode *>(node->weight_time());
+  luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias());
+  luci::CircleNode *input_activation_state =
+    loco::must_cast<luci::CircleNode *>(node->input_activation_state());
+
+  cloned->input(cn->find_clone(input));
+  cloned->weight_feature(cn->find_clone(weight_feature));
+  cloned->weight_time(cn->find_clone(weight_time));
+  cloned->bias(cn->find_clone(bias));
+  cloned->input_activation_state(cn->find_clone(input_activation_state));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSVDF *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
new file mode 100644
index 000000000..af8cd5549
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSVDF>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    NodeGraphletT<luci::CircleSVDF>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<5>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<5>::init({shape, shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->weight_feature(input(1));
+    node()->weight_time(input(2));
+    node()->bias(input(3));
+    node()->input_activation_state(input(4));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SVDF)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(5, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+  ASSERT_EQ(cth.inputs(4), clone->arg(4));
+}
+
+TEST(ConnectNodeTest, connect_SVDF_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp
new file mode 100644
index 000000000..88a3ecf19
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleScatterNd *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleScatterNd *>(cn->find_clone(node));
+
+  luci::CircleNode *indices = loco::must_cast<luci::CircleNode *>(node->indices());
+  luci::CircleNode *updates = loco::must_cast<luci::CircleNode *>(node->updates());
+  luci::CircleNode *shape = loco::must_cast<luci::CircleNode *>(node->shape());
+
+  cloned->indices(cn->find_clone(indices));
+  cloned->updates(cn->find_clone(updates));
+  cloned->shape(cn->find_clone(shape));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleScatterNd *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp
new file mode 100644
index 000000000..4ce787569
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleScatterNd>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->indices(input(0));
+    node()->updates(input(1));
+    node()->shape(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ScatterNd)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleScatterNd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleScatterNd *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_ScatterNd_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleScatterNd *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleScatterNd *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp
new file mode 100644
index 000000000..6540416c6
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSegmentSum *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSegmentSum *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *segment_ids = loco::must_cast<luci::CircleNode *>(node->segment_ids());
+
+  cloned->input(cn->find_clone(input));
+  cloned->segment_ids(cn->find_clone(segment_ids));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSegmentSum *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp
new file mode 100644
index 000000000..453b7cc01
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSegmentSum>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->segment_ids(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SegmentSum)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSegmentSum *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSegmentSum *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_SegmentSum_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSegmentSum *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSegmentSum *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.cpp
new file mode 100644
index 000000000..436e95609
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSelect.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSelect *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSelect *>(cn->find_clone(node));
+
+  luci::CircleNode *condition = loco::must_cast<luci::CircleNode *>(node->condition());
+  luci::CircleNode *t = loco::must_cast<luci::CircleNode *>(node->t());
+  luci::CircleNode *e = loco::must_cast<luci::CircleNode *>(node->e());
+
+  cloned->condition(cn->find_clone(condition));
+  cloned->t(cn->find_clone(t));
+  cloned->e(cn->find_clone(e));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSelect *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp
new file mode 100644
index 000000000..2a38de593
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSelect>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->condition(input(0));
+    node()->t(input(1));
+    node()->e(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Select)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSelect *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSelect *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_Select_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSelect *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSelect *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp
new file mode 100644
index 000000000..a8b6ab556
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSelectV2 *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSelectV2 *>(cn->find_clone(node));
+
+  luci::CircleNode *condition = loco::must_cast<luci::CircleNode *>(node->condition());
+  luci::CircleNode *t = loco::must_cast<luci::CircleNode *>(node->t());
+  luci::CircleNode *e = loco::must_cast<luci::CircleNode *>(node->e());
+
+  cloned->condition(cn->find_clone(condition));
+  cloned->t(cn->find_clone(t));
+  cloned->e(cn->find_clone(e));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSelectV2 *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp
new file mode 100644
index 000000000..c2ebdbe11
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSelectV2>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->condition(input(0));
+    node()->t(input(1));
+    node()->e(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SelectV2)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSelectV2 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSelectV2 *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_SelectV2_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSelectV2 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSelectV2 *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleShape.cpp b/compiler/luci/partition/src/Nodes/CircleShape.cpp
new file mode 100644
index 000000000..2fb3dcdd8
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleShape.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleShape *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleShape *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleShape *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp
new file mode 100644
index 000000000..38033a3bc
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleShape>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Shape)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleShape *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleShape *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Shape_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleShape *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleShape *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSin.cpp b/compiler/luci/partition/src/Nodes/CircleSin.cpp
new file mode 100644
index 000000000..0ef605994
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSin.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSin *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSin *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSin *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp
new file mode 100644
index 000000000..e141b4530
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSin>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Sin)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSin *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSin *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Sin_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSin *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSin *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.cpp
new file mode 100644
index 000000000..811d81f9e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSlice.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSlice *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSlice *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *begin = loco::must_cast<luci::CircleNode *>(node->begin());
+  luci::CircleNode *size = loco::must_cast<luci::CircleNode *>(node->size());
+
+  cloned->input(cn->find_clone(input));
+  cloned->begin(cn->find_clone(begin));
+  cloned->size(cn->find_clone(size));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSlice *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp
new file mode 100644
index 000000000..0718c7f15
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSlice>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->begin(input(1));
+    node()->size(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Slice)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSlice *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSlice *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_Slice_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSlice *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSlice *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp
new file mode 100644
index 000000000..6b08f005e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSoftmax *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSoftmax *>(cn->find_clone(node));
+
+  luci::CircleNode *logits = loco::must_cast<luci::CircleNode *>(node->logits());
+
+  cloned->logits(cn->find_clone(logits));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSoftmax *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp
new file mode 100644
index 000000000..571ad80ff
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSoftmax>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->logits(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Softmax)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSoftmax *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSoftmax *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Softmax_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSoftmax *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSoftmax *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp
new file mode 100644
index 000000000..dc48b36d6
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSpaceToBatchND *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSpaceToBatchND *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *block_shape = loco::must_cast<luci::CircleNode *>(node->block_shape());
+  luci::CircleNode *paddings = loco::must_cast<luci::CircleNode *>(node->paddings());
+
+  cloned->input(cn->find_clone(input));
+  cloned->block_shape(cn->find_clone(block_shape));
+  cloned->paddings(cn->find_clone(paddings));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSpaceToBatchND *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp
new file mode 100644
index 000000000..0fcf22fd0
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSpaceToBatchND>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->block_shape(input(1));
+    node()->paddings(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SpaceToBatchND)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSpaceToBatchND *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSpaceToBatchND *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_SpaceToBatchND_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSpaceToBatchND *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSpaceToBatchND *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp
new file mode 100644
index 000000000..55d562f3d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSpaceToDepth *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSpaceToDepth *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSpaceToDepth *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp
new file mode 100644
index 000000000..771c1f372
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSpaceToDepth>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SpaceToDepth)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSpaceToDepth *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSpaceToDepth *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_SpaceToDepth_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSpaceToDepth *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSpaceToDepth *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp
new file mode 100644
index 000000000..cc2f5e915
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSparseToDense *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSparseToDense *>(cn->find_clone(node));
+
+  luci::CircleNode *indices = loco::must_cast<luci::CircleNode *>(node->indices());
+  luci::CircleNode *output_shape = loco::must_cast<luci::CircleNode *>(node->output_shape());
+  luci::CircleNode *values = loco::must_cast<luci::CircleNode *>(node->values());
+  luci::CircleNode *default_value = loco::must_cast<luci::CircleNode *>(node->default_value());
+
+  cloned->indices(cn->find_clone(indices));
+  cloned->output_shape(cn->find_clone(output_shape));
+  cloned->values(cn->find_clone(values));
+  cloned->default_value(cn->find_clone(default_value));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSparseToDense *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp
new file mode 100644
index 000000000..06b3814ee
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSparseToDense>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<4>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<4>::init({shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->indices(input(0));
+    node()->output_shape(input(1));
+    node()->values(input(2));
+    node()->default_value(input(3));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SparseToDense)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSparseToDense *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSparseToDense *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(4, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+}
+
+TEST(ConnectNodeTest, connect_SparseToDense_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSparseToDense *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSparseToDense *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.cpp
new file mode 100644
index 000000000..5f851f049
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSplit.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSplit *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSplit *>(cn->find_clone(node));
+
+  luci::CircleNode *split_dim = loco::must_cast<luci::CircleNode *>(node->split_dim());
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->split_dim(cn->find_clone(split_dim));
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSplit *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp
new file mode 100644
index 000000000..a4242b9ab
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSplit>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->split_dim(input(0));
+    node()->input(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Split)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplit *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplit *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Split_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplit *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplit *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp
new file mode 100644
index 000000000..1a447581e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSplitOut *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSplitOut *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSplitOut *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp
new file mode 100644
index 000000000..b7cf6fc7d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSplitOut>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SplitOut)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitOut *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_SplitOut_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitOut *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp
new file mode 100644
index 000000000..43ebe076f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSplitV *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSplitV *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *size_splits = loco::must_cast<luci::CircleNode *>(node->size_splits());
+  luci::CircleNode *split_dim = loco::must_cast<luci::CircleNode *>(node->split_dim());
+
+  cloned->input(cn->find_clone(input));
+  cloned->size_splits(cn->find_clone(size_splits));
+  cloned->split_dim(cn->find_clone(split_dim));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSplitV *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp
new file mode 100644
index 000000000..877a44759
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSplitV>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<3>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<3>::init({shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->size_splits(input(1));
+    node()->split_dim(input(2));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SplitV)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitV *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitV *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(3, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+}
+
+TEST(ConnectNodeTest, connect_SplitV_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitV *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitV *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp
new file mode 100644
index 000000000..4bac6c5dc
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSplitVOut *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSplitVOut *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSplitVOut *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp
new file mode 100644
index 000000000..b3cf4d939
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSplitVOut>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SplitVOut)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitVOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitVOut *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_SplitVOut_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitVOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSplitVOut *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
new file mode 100644
index 000000000..fd6d0ec05
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSqrt *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSqrt *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSqrt *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp
new file mode 100644
index 000000000..be298835e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSqrt>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Sqrt)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSqrt *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSqrt *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Sqrt_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSqrt *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSqrt *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.cpp
new file mode 100644
index 000000000..56dd5440d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSquare.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSquare *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSquare *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSquare *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp
new file mode 100644
index 000000000..a509b31b5
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSquare>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Square)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSquare *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSquare *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Square_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSquare *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSquare *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
new file mode 100644
index 000000000..e47be2c7e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSquaredDifference *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSquaredDifference *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSquaredDifference *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp
new file mode 100644
index 000000000..a900f1dc3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSquaredDifference>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SquaredDifference)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSquaredDifference *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSquaredDifference *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_SquaredDifference_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSquaredDifference *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSquaredDifference *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp
new file mode 100644
index 000000000..ffe3c911b
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSqueeze *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSqueeze *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSqueeze *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp
new file mode 100644
index 000000000..7a6e2bf44
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSqueeze>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Squeeze)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSqueeze *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSqueeze *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Squeeze_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSqueeze *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSqueeze *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp
new file mode 100644
index 000000000..953b45107
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleStridedSlice *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleStridedSlice *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *begin = loco::must_cast<luci::CircleNode *>(node->begin());
+  luci::CircleNode *end = loco::must_cast<luci::CircleNode *>(node->end());
+  luci::CircleNode *strides = loco::must_cast<luci::CircleNode *>(node->strides());
+
+  cloned->input(cn->find_clone(input));
+  cloned->begin(cn->find_clone(begin));
+  cloned->end(cn->find_clone(end));
+  cloned->strides(cn->find_clone(strides));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleStridedSlice *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp
new file mode 100644
index 000000000..3e950fd25
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleStridedSlice>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<4>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<4>::init({shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->begin(input(1));
+    node()->end(input(2));
+    node()->strides(input(3));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_StridedSlice)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleStridedSlice *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleStridedSlice *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(4, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+}
+
+TEST(ConnectNodeTest, connect_StridedSlice_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleStridedSlice *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleStridedSlice *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSub.cpp b/compiler/luci/partition/src/Nodes/CircleSub.cpp
new file mode 100644
index 000000000..c5bea087f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSub.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSub *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSub *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+  luci::CircleNode *y = loco::must_cast<luci::CircleNode *>(node->y());
+
+  cloned->x(cn->find_clone(x));
+  cloned->y(cn->find_clone(y));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSub *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
new file mode 100644
index 000000000..ca51865a7
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSub>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    NodeGraphletT<luci::CircleSub>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input(0));
+    node()->y(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Sub)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Sub_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSub *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleSum.cpp b/compiler/luci/partition/src/Nodes/CircleSum.cpp
new file mode 100644
index 000000000..e929fd090
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSum.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSum *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSum *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *reduction_indices =
+    loco::must_cast<luci::CircleNode *>(node->reduction_indices());
+
+  cloned->input(cn->find_clone(input));
+  cloned->reduction_indices(cn->find_clone(reduction_indices));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSum *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp
new file mode 100644
index 000000000..21f6bbb74
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSum>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->reduction_indices(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Sum)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSum *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSum *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Sum_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSum *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSum *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.cpp
new file mode 100644
index 000000000..ef5c2c993
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTanh.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleTanh *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleTanh *>(cn->find_clone(node));
+
+  luci::CircleNode *x = loco::must_cast<luci::CircleNode *>(node->x());
+
+  cloned->x(cn->find_clone(x));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleTanh *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp
new file mode 100644
index 000000000..1e2d0629c
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleTanh>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->x(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Tanh)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTanh *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTanh *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Tanh_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTanh *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTanh *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleTile.cpp b/compiler/luci/partition/src/Nodes/CircleTile.cpp
new file mode 100644
index 000000000..0c217436e
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTile.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleTile *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleTile *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *multiples = loco::must_cast<luci::CircleNode *>(node->multiples());
+
+  cloned->input(cn->find_clone(input));
+  cloned->multiples(cn->find_clone(multiples));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleTile *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp
new file mode 100644
index 000000000..9449c1fa7
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleTile>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->multiples(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Tile)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTile *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTile *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Tile_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTile *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTile *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp
new file mode 100644
index 000000000..41dfa9c22
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleTopKV2 *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleTopKV2 *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *k = loco::must_cast<luci::CircleNode *>(node->k());
+
+  cloned->input(cn->find_clone(input));
+  cloned->k(cn->find_clone(k));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleTopKV2 *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp
new file mode 100644
index 000000000..e0c4a3a84
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleTopKV2>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->k(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_TopKV2)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTopKV2 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTopKV2 *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_TopKV2_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTopKV2 *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTopKV2 *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp
new file mode 100644
index 000000000..19f0fa7bf
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleTopKV2Out *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleTopKV2Out *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleTopKV2Out *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp
new file mode 100644
index 000000000..ba085f6a9
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleTopKV2Out>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_TopKV2Out)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTopKV2Out *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTopKV2Out *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_TopKV2Out_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTopKV2Out *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTopKV2Out *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp
new file mode 100644
index 000000000..cbbdb0090
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleTranspose *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleTranspose *>(cn->find_clone(node));
+
+  luci::CircleNode *a = loco::must_cast<luci::CircleNode *>(node->a());
+  luci::CircleNode *perm = loco::must_cast<luci::CircleNode *>(node->perm());
+
+  cloned->a(cn->find_clone(a));
+  cloned->perm(cn->find_clone(perm));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleTranspose *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp
new file mode 100644
index 000000000..847683844
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleTranspose>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIsOGraph<2>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<2>::init({shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->a(input(0));
+    node()->perm(input(1));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Transpose)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTranspose *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTranspose *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(2, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+}
+
+TEST(ConnectNodeTest, connect_Transpose_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTranspose *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTranspose *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp
new file mode 100644
index 000000000..6b6819d59
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleTransposeConv *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleTransposeConv *>(cn->find_clone(node));
+
+  luci::CircleNode *inputSizes = loco::must_cast<luci::CircleNode *>(node->inputSizes());
+  luci::CircleNode *filter = loco::must_cast<luci::CircleNode *>(node->filter());
+  luci::CircleNode *outBackprop = loco::must_cast<luci::CircleNode *>(node->outBackprop());
+  luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias());
+
+  cloned->inputSizes(cn->find_clone(inputSizes));
+  cloned->filter(cn->find_clone(filter));
+  cloned->outBackprop(cn->find_clone(outBackprop));
+  cloned->bias(cn->find_clone(bias));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleTransposeConv *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp
new file mode 100644
index 000000000..7dbdfd92f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleTransposeConv>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleTransposeConv>::init(g);
+
+    _node->padding(luci::Padding::VALID);
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<4>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<4>::init({shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->inputSizes(input(0));
+    node()->filter(input(1));
+    node()->outBackprop(input(2));
+    node()->bias(input(3));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_TransposeConv)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTransposeConv *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTransposeConv *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(4, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+}
+
+TEST(ConnectNodeTest, connect_TransposeConv_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTransposeConv *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleTransposeConv *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..e8c834be9
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleUnidirectionalSequenceLSTM *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleUnidirectionalSequenceLSTM *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  luci::CircleNode *input_to_input_weights =
+    loco::must_cast<luci::CircleNode *>(node->input_to_input_weights());
+  luci::CircleNode *input_to_forget_weights =
+    loco::must_cast<luci::CircleNode *>(node->input_to_forget_weights());
+  luci::CircleNode *input_to_cell_weights =
+    loco::must_cast<luci::CircleNode *>(node->input_to_cell_weights());
+  luci::CircleNode *input_to_output_weights =
+    loco::must_cast<luci::CircleNode *>(node->input_to_output_weights());
+
+  luci::CircleNode *recurrent_to_input_weights =
+    loco::must_cast<luci::CircleNode *>(node->recurrent_to_input_weights());
+  luci::CircleNode *recurrent_to_forget_weights =
+    loco::must_cast<luci::CircleNode *>(node->recurrent_to_forget_weights());
+  luci::CircleNode *recurrent_to_cell_weights =
+    loco::must_cast<luci::CircleNode *>(node->recurrent_to_cell_weights());
+  luci::CircleNode *recurrent_to_output_weights =
+    loco::must_cast<luci::CircleNode *>(node->recurrent_to_output_weights());
+
+  luci::CircleNode *cell_to_input_weights =
+    loco::must_cast<luci::CircleNode *>(node->cell_to_input_weights());
+  luci::CircleNode *cell_to_forget_weights =
+    loco::must_cast<luci::CircleNode *>(node->cell_to_forget_weights());
+  luci::CircleNode *cell_to_output_weights =
+    loco::must_cast<luci::CircleNode *>(node->cell_to_output_weights());
+
+  luci::CircleNode *input_gate_bias = loco::must_cast<luci::CircleNode *>(node->input_gate_bias());
+  luci::CircleNode *forget_gate_bias =
+    loco::must_cast<luci::CircleNode *>(node->forget_gate_bias());
+  luci::CircleNode *cell_gate_bias = loco::must_cast<luci::CircleNode *>(node->cell_gate_bias());
+  luci::CircleNode *output_gate_bias =
+    loco::must_cast<luci::CircleNode *>(node->output_gate_bias());
+
+  luci::CircleNode *projection_weights =
+    loco::must_cast<luci::CircleNode *>(node->projection_weights());
+  luci::CircleNode *projection_bias = loco::must_cast<luci::CircleNode *>(node->projection_bias());
+
+  luci::CircleNode *output_state = loco::must_cast<luci::CircleNode *>(node->output_state());
+  luci::CircleNode *cell_state = loco::must_cast<luci::CircleNode *>(node->cell_state());
+
+  luci::CircleNode *input_layer_norm_coefficients =
+    loco::must_cast<luci::CircleNode *>(node->input_layer_norm_coefficients());
+  luci::CircleNode *forget_layer_norm_coefficients =
+    loco::must_cast<luci::CircleNode *>(node->forget_layer_norm_coefficients());
+  luci::CircleNode *cell_layer_norm_coefficients =
+    loco::must_cast<luci::CircleNode *>(node->cell_layer_norm_coefficients());
+  luci::CircleNode *output_layer_norm_coefficients =
+    loco::must_cast<luci::CircleNode *>(node->output_layer_norm_coefficients());
+
+  cloned->input(cn->find_clone(input));
+
+  cloned->input_to_input_weights(cn->find_clone(input_to_input_weights));
+  cloned->input_to_forget_weights(cn->find_clone(input_to_forget_weights));
+  cloned->input_to_cell_weights(cn->find_clone(input_to_cell_weights));
+  cloned->input_to_output_weights(cn->find_clone(input_to_output_weights));
+
+  cloned->recurrent_to_input_weights(cn->find_clone(recurrent_to_input_weights));
+  cloned->recurrent_to_forget_weights(cn->find_clone(recurrent_to_forget_weights));
+  cloned->recurrent_to_cell_weights(cn->find_clone(recurrent_to_cell_weights));
+  cloned->recurrent_to_output_weights(cn->find_clone(recurrent_to_output_weights));
+
+  cloned->cell_to_input_weights(cn->find_clone(cell_to_input_weights));
+  cloned->cell_to_forget_weights(cn->find_clone(cell_to_forget_weights));
+  cloned->cell_to_output_weights(cn->find_clone(cell_to_output_weights));
+
+  cloned->input_gate_bias(cn->find_clone(input_gate_bias));
+  cloned->forget_gate_bias(cn->find_clone(forget_gate_bias));
+  cloned->cell_gate_bias(cn->find_clone(cell_gate_bias));
+  cloned->output_gate_bias(cn->find_clone(output_gate_bias));
+
+  cloned->projection_weights(cn->find_clone(projection_weights));
+  cloned->projection_bias(cn->find_clone(projection_bias));
+
+  cloned->output_state(cn->find_clone(output_state));
+  cloned->cell_state(cn->find_clone(cell_state));
+
+  cloned->input_layer_norm_coefficients(cn->find_clone(input_layer_norm_coefficients));
+  cloned->forget_layer_norm_coefficients(cn->find_clone(forget_layer_norm_coefficients));
+  cloned->cell_layer_norm_coefficients(cn->find_clone(cell_layer_norm_coefficients));
+  cloned->output_layer_norm_coefficients(cn->find_clone(output_layer_norm_coefficients));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleUnidirectionalSequenceLSTM *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
new file mode 100644
index 000000000..6472b583b
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleUnidirectionalSequenceLSTM>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g) override
+  {
+    NodeGraphletT<luci::CircleUnidirectionalSequenceLSTM>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<24>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<24>::init({shape, shape, shape, shape, shape, shape, shape, shape,
+                            shape, shape, shape, shape, shape, shape, shape, shape,
+                            shape, shape, shape, shape, shape, shape, shape, shape},
+                           shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+
+    node()->input_to_input_weights(input(1));
+    node()->input_to_forget_weights(input(2));
+    node()->input_to_cell_weights(input(3));
+    node()->input_to_output_weights(input(4));
+
+    node()->recurrent_to_input_weights(input(5));
+    node()->recurrent_to_forget_weights(input(6));
+    node()->recurrent_to_cell_weights(input(7));
+    node()->recurrent_to_output_weights(input(8));
+
+    node()->cell_to_input_weights(input(9));
+    node()->cell_to_forget_weights(input(10));
+    node()->cell_to_output_weights(input(11));
+
+    node()->input_gate_bias(input(12));
+    node()->forget_gate_bias(input(13));
+    node()->cell_gate_bias(input(14));
+    node()->output_gate_bias(input(15));
+
+    node()->projection_weights(input(16));
+    node()->projection_bias(input(17));
+
+    node()->output_state(input(18));
+    node()->cell_state(input(19));
+
+    node()->input_layer_norm_coefficients(input(20));
+    node()->forget_layer_norm_coefficients(input(21));
+    node()->cell_layer_norm_coefficients(input(22));
+    node()->output_layer_norm_coefficients(input(23));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_UnidirectionalSequenceLSTM)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnidirectionalSequenceLSTM *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnidirectionalSequenceLSTM *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(24, clone->arity());
+  // 24 separate checks is too much
+  for (uint32_t i = 0; i < 24; ++i)
+    ASSERT_EQ(cth.inputs(i), clone->arg(i));
+}
+
+TEST(ConnectNodeTest, connect_UnidirectionalSequenceLSTM_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnidirectionalSequenceLSTM *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnidirectionalSequenceLSTM *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.cpp
new file mode 100644
index 000000000..c035b7ed7
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUnique.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleUnique *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleUnique *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleUnique *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp
new file mode 100644
index 000000000..910087a8b
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleUnique>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Unique)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnique *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnique *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Unique_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnique *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnique *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp
new file mode 100644
index 000000000..23b1abaa5
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleUniqueOut *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleUniqueOut *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleUniqueOut *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp
new file mode 100644
index 000000000..954957497
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleUniqueOut>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_UniqueOut)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUniqueOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUniqueOut *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_UniqueOut_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUniqueOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUniqueOut *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp
new file mode 100644
index 000000000..43ebcb418
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleUnpack *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleUnpack *>(cn->find_clone(node));
+
+  luci::CircleNode *value = loco::must_cast<luci::CircleNode *>(node->value());
+
+  cloned->value(cn->find_clone(value));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleUnpack *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp
new file mode 100644
index 000000000..444b04373
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleUnpack>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->value(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Unpack)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnpack *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnpack *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Unpack_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnpack *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnpack *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp
new file mode 100644
index 000000000..ee1de153f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleUnpackOut *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleUnpackOut *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleUnpackOut *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp
new file mode 100644
index 000000000..2aaef8d04
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleUnpackOut>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_UnpackOut)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnpackOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnpackOut *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_UnpackOut_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnpackOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleUnpackOut *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleVariable.cpp b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
new file mode 100644
index 000000000..e7a794a16
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleVariable *)
+{
+  // Nothing to do
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.cpp
new file mode 100644
index 000000000..d0fc8465d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleWhere.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleWhere *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleWhere *>(cn->find_clone(node));
+
+  luci::CircleNode *condition = loco::must_cast<luci::CircleNode *>(node->condition());
+
+  cloned->condition(cn->find_clone(condition));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleWhere *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp
new file mode 100644
index 000000000..f17131c94
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleWhere>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->condition(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Where)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhere *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhere *>(node));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Where_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhere *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhere *>(node));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.cpp
new file mode 100644
index 000000000..95b77f753
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleWhile.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleWhile *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleWhile *>(cn->find_clone(node));
+
+  auto input_count = node->input_count();
+  for (uint32_t in = 0; in < input_count; ++in)
+  {
+    luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input(in));
+
+    cloned->input(in, cn->find_clone(input));
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleWhile *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp
new file mode 100644
index 000000000..6ee7aba62
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeIsOsGraphletT<luci::CircleWhile>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, uint32_t n, uint32_t m) override { NodeIsOsGraphletT::init(g, n, m); }
+};
+
+class TestNodeGraph : public TestIsOsGraph<1, 1>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOsGraph<1, 1>::init({shape}, {shape});
+    NodeGraphlet::init(g(), 1, 1);
+
+    node()->input(0, input(0));
+
+    output(0)->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_While)
+{
+  TestNodeGraph tng;
+  tng.init({1});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs<1, 1>(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhile *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhile *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_While_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({1});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss<1, 1>(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhile *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhile *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp
new file mode 100644
index 000000000..5cd68355c
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleWhileOut *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleWhileOut *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleWhileOut *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp
new file mode 100644
index 000000000..f58eba031
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleWhileOut>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_WhileOut)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhileOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhileOut *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_WhileOut_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhileOut *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleWhileOut *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp
new file mode 100644
index 000000000..795d88de3
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleZerosLike *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleZerosLike *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleZerosLike *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp
new file mode 100644
index 000000000..f887bc36f
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleZerosLike>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_ZerosLike)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleZerosLike *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleZerosLike *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_ZerosLike_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleZerosLike *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleZerosLike *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Partition.cpp b/compiler/luci/partition/src/Partition.cpp
new file mode 100644
index 000000000..cc7106ca9
--- /dev/null
+++ b/compiler/luci/partition/src/Partition.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+#include "PartitionIRDump.h"
+#include "PartitionPGroups.h"
+#include "PartitionMerge.h"
+#include "PartitionCleanup.h"
+#include "PartitionPModules.h"
+#include "PartitionPModulesDump.h"
+
+#include "luci/Partition.h"
+#include "luci/Log.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @brief This will return Partitioned Modules object
+ */
+PartedModules apply(Module *source, const PartitionTable &partition)
+{
+  assert(source != nullptr);
+
+  LOGGER(l);
+
+  auto pgroups = produce_pgroups(source, partition);
+  INFO(l) << "--- Partition Graph (1)------------------------";
+  INFO(l) << pgroups.get();
+
+  auto mpgroups = merge_pgroups(pgroups.get());
+  INFO(l) << "--- Partition Graph (2)------------------------";
+  INFO(l) << mpgroups.get();
+
+  remove_unused_inputoutputs(mpgroups.get(), source);
+  INFO(l) << "--- Partition Graph (3)------------------------";
+  INFO(l) << mpgroups.get();
+
+  auto pmodules = produce_pmodules(mpgroups.get());
+  INFO(l) << "--- Modules -----------------------------------";
+  INFO(l) << &pmodules;
+
+  return pmodules;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Partition.test.cpp b/compiler/luci/partition/src/Partition.test.cpp
new file mode 100644
index 000000000..42fcc5189
--- /dev/null
+++ b/compiler/luci/partition/src/Partition.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Partition.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->dtype(loco::DataType::S32);
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  SqrtGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    SqrtGraphlet::init(g(), shape);
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+  }
+};
+
+} // namespace
+
+TEST(PartitionTest, simple_apply)
+{
+  luci::Module module;
+
+  SqrtGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  luci::PartitionTable pt;
+  pt.default_group = "A";
+  pt.comply = luci::PartitionTable::COMPLY::OPCODE;
+
+  auto pms = apply(&module, pt);
+
+  ASSERT_EQ(1, pms.pmodules.size());
+
+  auto &pm = *pms.pmodules.begin();
+  ASSERT_NE(nullptr, pm.module->graph());
+}
diff --git a/compiler/luci/partition/src/PartitionCleanup.cpp b/compiler/luci/partition/src/PartitionCleanup.cpp
new file mode 100644
index 000000000..7bf51518a
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionCleanup.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionCleanup.h"
+
+#include "luci/Log.h"
+
+namespace
+{
+
+using CircleNodes = std::vector<luci::CircleNode *>;
+
+/**
+ * @note Original source outputs should be outputs
+ */
+void gather_graph_outputs(CircleNodes &nodes, const luci::Module *source)
+{
+  // graph outputs are treated as used
+  auto graph = source->graph();
+  for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
+  {
+    auto output = luci::output_node(graph, n); // output is CircleOutput
+    assert(output != nullptr);
+
+    auto node = loco::must_cast<luci::CircleNode *>(output->from());
+
+    nodes.push_back(node);
+  }
+
+  // TODO add unused virtual outputs
+}
+
+/**
+ * @note If one PGroup requires an input, that input should be an output
+ *        from another PGroup
+ */
+void gather_pgroups_outputs(CircleNodes &nodes, const luci::PGroups *pgroups)
+{
+  // input of a pgroup is used output
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    for (auto input : pgroup->inputs)
+    {
+      nodes.push_back(input);
+    }
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void remove_unused_inputoutputs(luci::PGroups *pgroups, const luci::Module *source)
+{
+  assert(source != nullptr);
+  assert(pgroups != nullptr);
+
+  LOGGER(l);
+
+  INFO(l) << "--- Cleanup unused inputs/outputs";
+
+  // remove input within same pgroup
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    bool changed;
+    do
+    {
+      changed = false;
+      for (auto it = pgroup->inputs.begin(); it != pgroup->inputs.end(); ++it)
+      {
+        auto input = *it;
+        if (pgroups->pgroup_of(input) == pgroup.get())
+        {
+          INFO(l) << "  Cleanup input " << input->name() << " from group " << pgroup->group;
+          pgroup->inputs.erase(it);
+          changed = true;
+          break;
+        }
+        // NOTE CircleConst is one of input type, as they are registered as
+        //      input to some node and then (should be) merged.
+        //      Remove if this input is CircleConst
+        if (dynamic_cast<CircleConst *>(input) != nullptr)
+        {
+          INFO(l) << "  Cleanup CircleConst " << input->name() << " from group " << pgroup->group;
+          pgroup->inputs.erase(it);
+          changed = true;
+          break;
+        }
+      }
+    } while (changed);
+  }
+
+  // remove unused output(s)
+  // 'used_outputs' will hold actual used outputs for all PGroups
+  CircleNodes used_outputs;
+
+  gather_graph_outputs(used_outputs, source);
+  gather_pgroups_outputs(used_outputs, pgroups);
+
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    bool changed;
+    do
+    {
+      changed = false;
+      for (auto it = pgroup->outputs.begin(); it != pgroup->outputs.end(); ++it)
+      {
+        auto output = *it;
+        auto oit = std::find(used_outputs.begin(), used_outputs.end(), output);
+        if (oit == used_outputs.end())
+        {
+          INFO(l) << "  Cleanup output " << output->name() << " from group " << pgroup->group;
+          pgroup->outputs.erase(it);
+          changed = true;
+          break;
+        }
+      }
+    } while (changed);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionCleanup.h b/compiler/luci/partition/src/PartitionCleanup.h
new file mode 100644
index 000000000..f81b4a7cb
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionCleanup.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_CLEANUP_H__
+#define __LUCI_PARTITON_CLEANUP_H__
+
+#include "PartitionIR.h"
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+/**
+ * @brief This will remove unused inputs/outputs in each pgroup of pgroups
+ */
+void remove_unused_inputoutputs(luci::PGroups *, const luci::Module *);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_CLEANUP_H__
diff --git a/compiler/luci/partition/src/PartitionDump.cpp b/compiler/luci/partition/src/PartitionDump.cpp
new file mode 100644
index 000000000..69aec610d
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionDump.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/PartitionDump.h"
+
+namespace
+{
+
+void dump(std::ostream &os, const luci::PartitionTable &table)
+{
+  os << "Backends:";
+  for (auto &group : table.groups)
+  {
+    os << " " << group;
+    if (table.default_group == group)
+      os << "(default)";
+  }
+  os << std::endl;
+
+  os << "Assign by OPCODE: " << std::endl;
+  for (auto &item : table.byopcodes)
+    os << "  " << item.first << "=" << item.second << std::endl;
+
+  os << "Assign by OPNAME: " << std::endl;
+  for (auto &item : table.byopnames)
+    os << "  " << item.first << "=" << item.second << std::endl;
+}
+
+} // namespace
+
+std::ostream &operator<<(std::ostream &os, const luci::PartitionTable &table)
+{
+  dump(os, table);
+  return os;
+}
diff --git a/compiler/luci/partition/src/PartitionIR.cpp b/compiler/luci/partition/src/PartitionIR.cpp
new file mode 100644
index 000000000..969fa7092
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIR.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+#include "CircleOpCode.h"
+
+#include "luci/Log.h"
+
+#include <cassert>
+#include <ostream>
+#include <iostream>
+
+namespace luci
+{
+
+std::unique_ptr<PGroups> PGroups::make_copy(void) const
+{
+  auto d_pgroups = std::make_unique<luci::PGroups>();
+
+  for (auto &s_pgroup : pgroups)
+  {
+    // make a copy of s_pgroup to d_pgroup
+    std::unique_ptr<luci::PGroup> d_pgroup = std::make_unique<luci::PGroup>();
+
+    d_pgroup->group = s_pgroup->group;
+    d_pgroup->id = s_pgroup->id;
+
+    for (auto &pnode : s_pgroup->pnodes)
+    {
+      auto pnodec = std::make_unique<luci::PNode>();
+      pnodec->node = pnode->node;
+      pnodec->group = pnode->group;
+      pnodec->pgroup = d_pgroup.get();
+      d_pgroup->pnodes.push_back(std::move(pnodec));
+    }
+
+    for (auto &input : s_pgroup->inputs)
+      d_pgroup->inputs.push_back(input);
+
+    for (auto &output : s_pgroup->outputs)
+      d_pgroup->outputs.push_back(output);
+
+    // copy node2group
+    for (auto it = node2group.begin(); it != node2group.end(); ++it)
+      d_pgroups->node2group[it->first] = it->second;
+
+    // build id2pgroup
+    d_pgroups->id2pgroup[d_pgroup->id] = d_pgroup.get();
+
+    d_pgroups->pgroups.push_back(std::move(d_pgroup));
+    // note: d_pgroup is now nullptr as it's moved
+  }
+
+  return d_pgroups;
+}
+
+GroupKey PGroups::group_of(luci::CircleNode *node) const
+{
+  assert(node != nullptr);
+
+  LOGGER(l);
+
+  auto it = node2group.find(node);
+  if (it == node2group.end())
+  {
+    INFO(l) << "PGroups::group_of " << node << "(" << node->name() << ") not found" << std::endl;
+    return "";
+  }
+  return it->second;
+}
+
+const PGroup *PGroups::pgroup_of(luci::CircleNode *node) const
+{
+  assert(node != nullptr);
+
+  for (auto &pgroup : pgroups)
+  {
+    for (auto &pnode : pgroup->pnodes)
+    {
+      if (node == pnode->node)
+        return pgroup.get();
+    }
+  }
+  // node maybe graph input (CircleInput)
+  return nullptr;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionIR.h b/compiler/luci/partition/src/PartitionIR.h
new file mode 100644
index 000000000..c91b2f2ab
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIR.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_IR_H__
+#define __LUCI_PARTITION_IR_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+struct PGroup;
+
+using GroupKey = std::string;
+
+/**
+ * @brief Partition Node with CircleNode with group name
+ * @note  node just points to source luci::CircleNode, NOT the cloned node
+ *        CloneContext is used to find cloned node from source node
+ */
+struct PNode
+{
+  const luci::CircleNode *node = nullptr;
+  GroupKey group;
+
+  const PGroup *pgroup = nullptr;
+};
+
+/**
+ * @brief Partition Group with Partition Nodes of same group and I/Os nodes
+ */
+struct PGroup
+{
+  std::vector<std::unique_ptr<PNode>> pnodes;
+  GroupKey group;
+  uint32_t id = 0;
+
+  // I/O while partitioning
+  std::vector<luci::CircleNode *> inputs;
+  std::vector<luci::CircleNode *> outputs;
+};
+
+struct PGroups
+{
+  std::vector<std::unique_ptr<PGroup>> pgroups;
+
+  // node2group is to find group key from source node
+  std::map<const luci::CircleNode *, GroupKey> node2group;
+
+  // id2pngroup is to find *pngroup from pngroup id
+  std::map<uint32_t, PGroup *> id2pgroup;
+
+  // default group key for reference
+  GroupKey default_group;
+
+public:
+  /**
+   * @brief return a copy of PGroups
+   */
+  std::unique_ptr<PGroups> make_copy(void) const;
+
+  /**
+   * @brief return group key of node, empty string if not found
+   */
+  GroupKey group_of(luci::CircleNode *node) const;
+
+  /**
+   * @brief return holding pgroup of node, nullptr if not found
+   */
+  const PGroup *pgroup_of(luci::CircleNode *node) const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_IR_H__
diff --git a/compiler/luci/partition/src/PartitionIR.test.cpp b/compiler/luci/partition/src/PartitionIR.test.cpp
new file mode 100644
index 000000000..4c051a96d
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIR.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIR.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleAdd.h>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+TEST(PartitionIRTest, PNode_ctor)
+{
+  auto g = loco::make_graph();
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  luci::PNode pnode;
+  pnode.node = node;
+
+  ASSERT_NE(nullptr, pnode.node);
+  ASSERT_EQ(nullptr, pnode.pgroup);
+}
+
+// TODO add more tests with luci::PNode
+
+TEST(PartitionIRTest, PGroup_ctor)
+{
+  auto g = loco::make_graph();
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  luci::PGroup pgroup;
+  auto pnode = std::make_unique<luci::PNode>();
+  pnode->node = node;
+
+  pgroup.pnodes.push_back(std::move(pnode));
+
+  ASSERT_NE(pgroup.pnodes.end(), pgroup.pnodes.begin());
+  ASSERT_EQ(0, pgroup.inputs.size());
+  ASSERT_EQ(0, pgroup.outputs.size());
+}
+
+// TODO add more tests with luci::PGroup
+
+TEST(PartitionIRTest, PGroups_ctor)
+{
+  auto g = loco::make_graph();
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  auto pnode = std::make_unique<luci::PNode>();
+  pnode->node = node;
+
+  auto pgroup = std::make_unique<luci::PGroup>();
+  pgroup->pnodes.push_back(std::move(pnode));
+
+  luci::PGroups pgroups;
+  pgroups.pgroups.push_back(std::move(pgroup));
+
+  ASSERT_NE(pgroups.pgroups.end(), pgroups.pgroups.begin());
+}
+
+// TODO add more tests with luci::PGroups
diff --git a/compiler/luci/partition/src/PartitionIRDump.cpp b/compiler/luci/partition/src/PartitionIRDump.cpp
new file mode 100644
index 000000000..5a78d99c0
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIRDump.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionIRDump.h"
+
+#include "CircleOpCode.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PNode *pnode)
+{
+  os << "PNode: " << pnode->group << ", " << pnode->node << ":" << luci::opcode_name(pnode->node)
+     << ":" << pnode->node->name() << std::endl;
+}
+
+void dump(std::ostream &os, const PGroup *pgroup)
+{
+  os << "--- PGroup: " << pgroup->group << std::endl;
+  os << "Input(s): [ ";
+  for (auto &node_in : pgroup->inputs)
+    os << node_in->name() << " ";
+  os << "]" << std::endl;
+  for (auto &pnode : pgroup->pnodes)
+  {
+    dump(os, pnode.get());
+  }
+  os << "Output(s): [ ";
+  for (auto &node_out : pgroup->outputs)
+    os << node_out->name() << " ";
+  os << "]" << std::endl;
+}
+
+void dump(std::ostream &os, const PGroups *pgroups)
+{
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    dump(os, pgroup.get());
+  }
+  os << "--- Node2Group items: " << std::endl;
+  for (auto it = pgroups->node2group.begin(); it != pgroups->node2group.end(); ++it)
+  {
+    auto node = it->first;
+    auto &group = it->second;
+    os << "  Node: " << node << "(" << luci::opcode_name(node) << "," << node->name()
+       << "): " << group << std::endl;
+  }
+}
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PGroups *pgroups)
+{
+  luci::dump(os, pgroups);
+  return os;
+}
diff --git a/compiler/luci/partition/src/PartitionIRDump.h b/compiler/luci/partition/src/PartitionIRDump.h
new file mode 100644
index 000000000..8a4b3f579
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionIRDump.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_IR_DUMP_H__
+#define __LUCI_PARTITION_IR_DUMP_H__
+
+#include "PartitionIR.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PNode *pnode);
+void dump(std::ostream &os, const PGroup *pgroup);
+void dump(std::ostream &os, const PGroups *pgroups);
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PGroups *pgroups);
+
+#endif // __LUCI_PARTITION_IR_DUMP_H__
diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp
new file mode 100644
index 000000000..aa8a827cd
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionMerge.cpp
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionMerge.h"
+
+#include <algorithm>
+
+namespace
+{
+
+/**
+ * @brief return true if pgroup_i output is one of the inputs of pgroup
+ */
+bool is_input_of(const luci::PGroup *pgroup_i, const luci::PGroup *pgroup)
+{
+  for (auto *output : pgroup_i->outputs)
+  {
+    for (auto *input : pgroup->inputs)
+    {
+      if (input == output)
+        return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * @brief return true if there is only one input or all the inputs have same group
+ * @note  pgroups is used to find group of pgroup
+ */
+bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
+{
+  assert(pgroups != nullptr);
+  assert(pgroup != nullptr);
+
+  const luci::PGroup *input_pgroup = nullptr;
+  std::string group;
+  for (auto &input : pgroup->inputs)
+  {
+    // We ignore below logic for CircleConst.
+    // CircleConst will be cloned if they are not found in pgroup as an input.
+    // Refer build_graph(), "add CircleConst for inputs"
+    // Reason: CircleConst can be shared as input to multiple nodes
+    //         where each node can be placed in different groups. For this case
+    //         we need to clone this CircleConst for each graph of the group.
+    if (dynamic_cast<const luci::CircleConst *>(input) != nullptr)
+      continue;
+
+    auto input_group = pgroups->group_of(input);
+    // NOTE: all the nodes should be registered and return should be valid group.
+    // produce_pgroups() should ensure this, except CircleConst, Input, Outputs.
+    // assert here to find if there is any problem with this.
+    assert(not input_group.empty());
+    if (input_group.empty())
+      input_group = pgroups->default_group;
+
+    if (group.empty())
+      group = input_group;
+    else
+    {
+      if (group != input_group)
+        return false;
+    }
+    // if there are multiple inputs, all the inputs should be in same pgroup
+    // https://github.com/Samsung/ONE/issues/6230#issuecomment-801618150
+    // https://github.com/Samsung/ONE/issues/6230#issuecomment-801680531
+    auto pgroup_input = pgroups->pgroup_of(input);
+    if (pgroup_input != nullptr)
+    {
+      if (input_pgroup == nullptr)
+        input_pgroup = pgroup_input;
+      else
+      {
+        if (input_pgroup->group != pgroup_input->group)
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
+/**
+ * @brief return true if there is only one output and is fed to same group of nodes
+ * @note  pgroups is used to find group of pgroup
+ *        ex)
+ *                     /-- pgroup_user_1 (grp_1)
+ *           --- pgroup
+ *                     \-- pgroup_user_2 (grp_2)
+ *
+ *           return false if grp_1 != grp_2
+ */
+bool is_output_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
+{
+  assert(pgroups != nullptr);
+  assert(pgroup != nullptr);
+
+  std::string group;
+  for (auto &output : pgroup->outputs)
+  {
+    // get output_group
+    auto output_group = pgroups->group_of(output);
+    assert(not output_group.empty());
+    if (output_group.empty())
+      output_group = pgroups->default_group;
+
+    // find all PGroup that uses output
+    for (auto &pgroup_user : pgroups->pgroups)
+    {
+      for (auto &user_inputs : pgroup_user->inputs)
+      {
+        if (output == user_inputs)
+        {
+          // OK, these are connected, check group is same
+          if (pgroup_user->group != output_group)
+            return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+/**
+ * @brief merge pgroup into pgroup_i
+ * @note  output of pgroup_i should be input of pgroup
+ */
+void merge_into(luci::PGroup *pgroup, luci::PGroup *pgroup_i)
+{
+  for (auto &pnode : pgroup->pnodes)
+  {
+    // update pgroup for this pnode
+    pnode->pgroup = pgroup_i;
+    assert(pnode->group == pgroup_i->group);
+
+    // we don't need to add this in topological order:
+    // all the nodes will be created first then connection will be held
+    pgroup_i->pnodes.push_back(std::move(pnode));
+    // note: pnode is now nullptr as it's moved into pgroup_i->pnodes
+  }
+
+  for (auto &input : pgroup->inputs)
+  {
+    // add inputs of pgroup to pgroup_i if not member of pgroup_i
+    bool found_in_pgroup_i = false;
+    for (auto &pnode : pgroup_i->pnodes)
+    {
+      if (input == pnode->node)
+      {
+        found_in_pgroup_i = true;
+        break;
+      }
+    }
+    // skip if this input is already in the inputs
+    auto fit = std::find(pgroup_i->inputs.begin(), pgroup_i->inputs.end(), input);
+    if (fit != pgroup_i->inputs.end())
+    {
+      found_in_pgroup_i = true;
+    }
+    // note: if we force found_in_pgroup_i to false, for testing there will be
+    // unnecessary inputs
+    if (not found_in_pgroup_i)
+    {
+      // node input maybe in another pgroup
+      pgroup_i->inputs.push_back(input);
+    }
+  }
+  // add outputs of pgroup to pgroup_i outputs if not exist
+  for (auto &output : pgroup->outputs)
+  {
+    auto it = std::find(pgroup_i->outputs.begin(), pgroup_i->outputs.end(), output);
+    if (it == pgroup_i->outputs.end())
+    {
+      pgroup_i->outputs.push_back(output);
+    }
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * @brief This will merge pgroups with same group values in topological order
+ */
+std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups)
+{
+  // Make a copy of pgroups to apply merge action
+  // Q) do we really need a copy?
+  auto d_pgroups = s_pgroups->make_copy();
+
+  // Merge partition graphs
+  // - This is initial implementation that works for limited networks
+  // - if A and B is same group -> if A is input of B -> ... -> merge B into A
+  auto &pgroups = d_pgroups->pgroups;
+  bool changed;
+  do
+  {
+    changed = false;
+    for (auto &pgroup_i : pgroups)
+    {
+      bool merged = false;
+      for (auto it = pgroups.begin(); it != pgroups.end(); ++it)
+      {
+        auto &pgroup = *it;
+
+        // skip if same object
+        if (pgroup->id == pgroup_i->id)
+          continue;
+        // skip if different group
+        if (pgroup->group != pgroup_i->group)
+          continue;
+        // skip if not connected
+        if (!is_input_of(pgroup_i.get(), pgroup.get()))
+          continue;
+        // skip if there are multiple inputs but inputs differ in group
+        if (!is_input_same(pgroup.get(), d_pgroups.get()))
+          continue;
+        // skip if pgroup has different group for other users of pgroup_i
+        if (!is_output_same(pgroup_i.get(), d_pgroups.get()))
+          continue;
+        // TODO add more condition may be needed
+
+        merge_into(pgroup.get(), pgroup_i.get());
+
+        auto eit = d_pgroups->id2pgroup.find(pgroup->id);
+        assert(eit != d_pgroups->id2pgroup.end());
+        d_pgroups->id2pgroup.erase(eit);
+
+        // remove merged pgroup from pgroups
+        pgroups.erase(it);
+
+        merged = true;
+        break;
+      }
+      if (merged)
+      {
+        changed = true;
+        break;
+      }
+    }
+  } while (changed);
+
+  return d_pgroups;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionMerge.h b/compiler/luci/partition/src/PartitionMerge.h
new file mode 100644
index 000000000..5c9fec2d2
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionMerge.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_MERGE_H__
+#define __LUCI_PARTITON_MERGE_H__
+
+#include "PartitionIR.h"
+
+#include <memory>
+
+namespace luci
+{
+
+std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_MERGE_H__
diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp
new file mode 100644
index 000000000..2e95f08f7
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPGroups.cpp
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPGroups.h"
+#include "PartitionIR.h"
+#include "CircleOpCode.h"
+
+#include "luci/Partition.h"
+#include "luci/Log.h"
+#include "luci/LogHelper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <loco.h>
+
+namespace
+{
+
+class IsVirtualNode final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  bool visit(const luci::CircleInput *) final { return true; }
+  bool visit(const luci::CircleOutput *) final { return true; }
+  // For multiple outputs
+  bool visit(const luci::CircleCustomOut *) final { return true; }
+  bool visit(const luci::CircleIfOut *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV4Out *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV5Out *) final { return true; }
+  bool visit(const luci::CircleSplitOut *) final { return true; }
+  bool visit(const luci::CircleSplitVOut *) final { return true; }
+  bool visit(const luci::CircleTopKV2Out *) final { return true; }
+  bool visit(const luci::CircleUniqueOut *) final { return true; }
+  bool visit(const luci::CircleUnpackOut *) final { return true; }
+  bool visit(const luci::CircleWhileOut *) final { return true; }
+  // For inputs not used
+  bool visit(const luci::CircleOutputExclude *) final { return true; }
+  bool visit(const luci::CircleVariable *) final { return true; }
+  // TODO add all virtual nodes
+
+  // default is false
+  bool visit(const luci::CircleNode *) final { return false; }
+};
+
+bool check_allocate_partition(const luci::CircleNode *node)
+{
+  IsVirtualNode query;
+  if (node->accept(&query))
+    return false;
+  /**
+   * @note About CircleConst
+   *       CirleConst acts like a part of some CircleNode and managing mulitiple
+   *       used(referenced) CircleConst is a bit difficult if it's used across
+   *       different PGroup. So we treat this different to other types.
+   *       https://github.com/Samsung/ONE/issues/6230#issuecomment-809802813
+   */
+  if (dynamic_cast<const luci::CircleConst *>(node) != nullptr)
+    return false;
+  return true;
+}
+
+} // namespace
+
+namespace
+{
+
+std::string group_from_partition(const luci::CircleNode *node,
+                                 const luci::PartitionTable &partition)
+{
+  LOGGER(l);
+
+  auto group = partition.default_group;
+
+  std::string opcodename; // opcodename or opname
+
+  switch (partition.comply)
+  {
+    case luci::PartitionTable::COMPLY::OPCODE:
+    {
+      opcodename = luci::opcode_name(node);
+      assert(!opcodename.empty());
+
+      auto it = partition.byopcodes.find(opcodename);
+      if (it != partition.byopcodes.end())
+        group = it->second;
+      break;
+    }
+    case luci::PartitionTable::COMPLY::OPNAME:
+    {
+      opcodename = node->name();
+      assert(!opcodename.empty());
+
+      auto it = partition.byopnames.find(opcodename);
+      if (it != partition.byopnames.end())
+        group = it->second;
+      break;
+    }
+
+    default:
+      throw std::runtime_error("Unsupported partition.comply");
+  }
+
+  INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
+          << std::endl;
+
+  return group;
+}
+
+class IsVirtualInputNode final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  // TODO check CircleOutputDummy
+  bool visit(const luci::CircleOutputExclude *) final { return true; }
+  bool visit(const luci::CircleVariable *) final { return true; }
+
+  // default is false
+  bool visit(const luci::CircleNode *) final { return false; }
+};
+
+class IsMultiOutputNode final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  bool visit(const luci::CircleCustom *) final { return true; }
+  bool visit(const luci::CircleIf *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV4 *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV5 *) final { return true; }
+  bool visit(const luci::CircleSplit *) final { return true; }
+  bool visit(const luci::CircleSplitV *) final { return true; }
+  bool visit(const luci::CircleTopKV2 *) final { return true; }
+  bool visit(const luci::CircleUnique *) final { return true; }
+  bool visit(const luci::CircleUnpack *) final { return true; }
+  bool visit(const luci::CircleWhile *) final { return true; }
+  // default is false
+  bool visit(const luci::CircleNode *) final { return false; }
+};
+
+void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &group, uint32_t idx)
+{
+  auto pgroup = std::make_unique<luci::PGroup>();
+  pgroup->group = group;
+  pgroup->id = idx + 1;
+
+  auto pnode = std::make_unique<luci::PNode>();
+  pnode->node = node;
+  pnode->group = group;
+  pnode->pgroup = pgroup.get();
+
+  pgroup->pnodes.push_back(std::move(pnode));
+
+  IsVirtualInputNode queryvi;
+  // Set input of PGroup
+  for (uint32_t in = 0; in < node->arity(); ++in)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
+    if (input->accept(&queryvi))
+    {
+      auto pnode = std::make_unique<luci::PNode>();
+      pnode->node = input;
+      pnode->group = group;
+      pnode->pgroup = pgroup.get();
+
+      pgroup->pnodes.push_back(std::move(pnode));
+
+      pgroups->node2group[input] = group;
+    }
+    else
+    {
+      // this input maybe CircleInput in source graph
+      // --> not confident this is safe
+      pgroup->inputs.push_back(input);
+    }
+  }
+
+  IsMultiOutputNode query;
+  if (node->accept(&query))
+  {
+    // Include CircleXXXOut virtual nodes in this group
+    auto succs = loco::succs(node);
+    for (auto &succ_node : succs)
+    {
+      auto nodeout = loco::must_cast<luci::CircleNode *>(succ_node);
+
+      auto pnode = std::make_unique<luci::PNode>();
+      pnode->node = nodeout;
+      pnode->group = group;
+      pnode->pgroup = pgroup.get();
+
+      pgroup->pnodes.push_back(std::move(pnode));
+
+      pgroups->node2group[nodeout] = group;
+
+      pgroup->outputs.push_back(nodeout);
+    }
+  }
+  else
+  {
+    // Set output of PGroup: node itself
+    pgroup->outputs.push_back(node);
+  }
+
+  pgroups->node2group[node] = group;
+  pgroups->id2pgroup[pgroup->id] = pgroup.get();
+
+  pgroups->pgroups.push_back(std::move(pgroup));
+}
+
+} // namespace
+
+namespace luci
+{
+
+std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
+                                               const luci::PartitionTable &partition)
+{
+  assert(source != nullptr);
+  // NOTE Only main graph (subgraph index 0) will be partitioned.
+  // Other subgraphs will follow the owner (IF/WHILE/...) group
+
+  LOGGER(l);
+
+  auto pgroups = std::make_unique<luci::PGroups>();
+
+  pgroups->default_group = partition.default_group;
+
+  // Create a PGroup per CircleNode: each PGroup will have one CircleNode
+  auto graph = source->graph();
+  auto nodes = graph->nodes();
+  for (uint32_t idx = 0; idx < nodes->size(); ++idx)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(nodes->at(idx));
+
+    // check if node is normal node that we are interested
+    if (check_allocate_partition(node))
+    {
+      auto group = group_from_partition(node, partition);
+
+      append(node, pgroups.get(), group, idx);
+    }
+    else
+    {
+      INFO(l) << "Skip Op: " << node->name() << std::endl;
+      // record as default group
+      pgroups->node2group[node] = partition.default_group;
+    }
+  }
+
+  return pgroups;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionPGroups.h b/compiler/luci/partition/src/PartitionPGroups.h
new file mode 100644
index 000000000..998e11cbd
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPGroups.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_PGROUPS_H__
+#define __LUCI_PARTITON_PGROUPS_H__
+
+#include "PartitionIR.h"
+
+#include "luci/Partition.h"
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+/**
+ * @brief This will produce a PGroups from Module and PartitionTable.
+ * @note  Each PGroup will hold one CircleNode and partition key value as group.
+ *        Supports only single Graph in the Module for now.
+ */
+std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
+                                               const luci::PartitionTable &partition);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_PGROUPS_H__
diff --git a/compiler/luci/partition/src/PartitionPGroups.test.cpp b/compiler/luci/partition/src/PartitionPGroups.test.cpp
new file mode 100644
index 000000000..f31641be4
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPGroups.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPGroups.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->dtype(loco::DataType::S32);
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  SqrtGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    SqrtGraphlet::init(g(), shape);
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+  }
+};
+
+} // namespace
+
+TEST(PartitionPGroupsTest, simple_produce)
+{
+  luci::Module module;
+
+  SqrtGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  luci::PartitionTable pt;
+  pt.default_group = "A";
+  pt.comply = luci::PartitionTable::COMPLY::OPCODE;
+
+  auto pgs = produce_pgroups(&module, pt);
+
+  ASSERT_EQ(1, pgs->pgroups.size());
+}
diff --git a/compiler/luci/partition/src/PartitionPModules.cpp b/compiler/luci/partition/src/PartitionPModules.cpp
new file mode 100644
index 000000000..9912305b3
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModules.cpp
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModules.h"
+#include "luci/ConnectNode.h"
+
+#include "luci/Service/CircleNodeClone.h"
+#include "luci/Log.h"
+
+#include <loco.h>
+
+#include <cassert>
+
+namespace
+{
+
+// forward declare
+void clone_ifnode_subgraphs(luci::PartedModule &pm, const luci::CircleIf *if_node,
+                            const luci::CloneContext &clonectx);
+void clone_whilenode_subgraphs(luci::PartedModule &pm, const luci::CircleWhile *while_node,
+                               const luci::CloneContext &clonectx);
+
+void add_graph_input(loco::Graph *graph, luci::CircleInput *input_node)
+{
+  assert(graph != nullptr);
+  assert(input_node != nullptr);
+
+  auto graph_input = graph->inputs()->create();
+  graph_input->name(input_node->name());
+
+  // Set GraphInputOutputIndex for graph
+  input_node->index(graph_input->index());
+
+  // Data type
+  graph_input->dtype(input_node->dtype());
+
+  // Shape of GraphInput
+  auto input_shape = std::make_unique<loco::TensorShape>();
+  input_shape->rank(input_node->rank());
+  for (uint32_t r = 0; r < input_node->rank(); ++r)
+  {
+    if (input_node->dim(r).known())
+      input_shape->dim(r).set(input_node->dim(r).value());
+  }
+  graph_input->shape(std::move(input_shape));
+}
+
+void add_graph_output(loco::Graph *graph, luci::CircleOutput *output_node)
+{
+  assert(graph != nullptr);
+  assert(output_node != nullptr);
+
+  auto graph_output = graph->outputs()->create();
+  graph_output->name(output_node->name());
+
+  // Set GraphInputOutputIndex for graph
+  output_node->index(graph_output->index());
+
+  // Data type
+  graph_output->dtype(output_node->dtype());
+
+  // Shape of GraphOutput
+  auto output_shape = std::make_unique<loco::TensorShape>();
+  output_shape->rank(output_node->rank());
+  for (uint32_t r = 0; r < output_node->rank(); ++r)
+  {
+    if (output_node->dim(r).known())
+      output_shape->dim(r).set(output_node->dim(r).value());
+  }
+  graph_output->shape(std::move(output_shape));
+}
+
+/**
+ * @brief make a clone of graph
+ */
+std::unique_ptr<loco::Graph> clone_graph(loco::Graph *graph_org, luci::CloneContext &clonectx)
+{
+  auto graph = loco::make_graph();
+  auto graph_clone = graph.get();
+  auto &graph_name = graph_org->name();
+
+  graph_clone->name(graph_name);
+
+  // clone inputs
+  auto inputs = graph_org->inputs();
+  assert(inputs);
+  for (uint32_t n = 0; n < inputs->size(); ++n)
+  {
+    auto input_org = luci::input_node(graph_org, n);
+    assert(input_org != nullptr);
+
+    auto *input_clone = graph_clone->nodes()->create<luci::CircleInput>();
+    luci::copy_common_attributes(input_org, input_clone);
+
+    add_graph_input(graph_clone, input_clone);
+    clonectx.emplace(input_org, input_clone);
+  }
+
+  // clone nodes
+  auto nodes = graph_org->nodes();
+  for (uint32_t n = 0; n < nodes->size(); ++n)
+  {
+    auto node = nodes->at(n);
+
+    // skip for CircleInput, CircleOutput
+    if (dynamic_cast<luci::CircleInput *>(node) != nullptr)
+      continue;
+    if (dynamic_cast<luci::CircleOutput *>(node) != nullptr)
+      continue;
+
+    auto node_org = loco::must_cast<luci::CircleNode *>(node);
+    assert(clonectx.find(node_org) == clonectx.end());
+
+    auto *node_clone = clone_node(node_org, graph_clone);
+    clonectx.emplace(node_org, node_clone);
+  }
+
+  // connect nodes
+  for (uint32_t n = 0; n < nodes->size(); ++n)
+  {
+    auto node = nodes->at(n);
+
+    // skip for CircleInput, CircleOutput
+    if (dynamic_cast<luci::CircleInput *>(node) != nullptr)
+      continue;
+    if (dynamic_cast<luci::CircleOutput *>(node) != nullptr)
+      continue;
+
+    auto node_org = loco::must_cast<luci::CircleNode *>(node);
+    clone_connect(node_org, clonectx);
+  }
+
+  // clone outputs
+  for (uint32_t n = 0; n < graph_org->outputs()->size(); ++n)
+  {
+    auto output_org = luci::output_node(graph_org, n);
+    assert(output_org != nullptr);
+
+    auto *output_clone = graph_clone->nodes()->create<luci::CircleOutput>();
+    luci::copy_common_attributes(output_org, output_clone);
+    // note: we don't add output_clone to clonectx.
+    // logically, output is not used as an input to any other nodes.
+    auto output_from = loco::must_cast<luci::CircleNode *>(output_org->from());
+    auto it = clonectx.find(output_from);
+    assert(it != clonectx.end());
+    output_clone->from(it->second);
+
+    add_graph_output(graph_clone, output_clone);
+  }
+
+  return graph;
+}
+
+void clone_recursive_subgraphs(luci::PartedModule &pm, loco::Graph *graph,
+                               const luci::CloneContext &clonectx)
+{
+  auto nodes = graph->nodes();
+  for (uint32_t n = 0; n < nodes->size(); ++n)
+  {
+    {
+      auto if_node = dynamic_cast<luci::CircleIf *>(nodes->at(n));
+      if (if_node != nullptr)
+      {
+        clone_ifnode_subgraphs(pm, if_node, clonectx);
+      }
+    }
+    {
+      auto while_node = dynamic_cast<luci::CircleWhile *>(nodes->at(n));
+      if (while_node != nullptr)
+      {
+        clone_whilenode_subgraphs(pm, while_node, clonectx);
+      }
+    }
+    // TODO handle others
+  }
+}
+
+void clone_ifnode_subgraphs(luci::PartedModule &pm, const luci::CircleIf *if_node,
+                            const luci::CloneContext &clonectx)
+{
+  assert(if_node != nullptr);
+
+  auto it = clonectx.find(if_node);
+  assert(it != clonectx.end());
+  auto if_clone = loco::must_cast<luci::CircleIf *>(it->second);
+
+  luci::CloneContext then_clonectx;
+  luci::CloneContext else_clonectx;
+
+  auto then_graph = if_node->then_graph();
+  auto else_graph = if_node->else_graph();
+
+  auto then_clone = clone_graph(then_graph, then_clonectx);
+  auto else_clone = clone_graph(else_graph, else_clonectx);
+  if_clone->then_graph(then_clone.get());
+  if_clone->else_graph(else_clone.get());
+
+  pm.module->add(std::move(then_clone));
+  int32_t then_index = pm.module->size() - 1;
+  pm.module->add(std::move(else_clone));
+  int32_t else_index = pm.module->size() - 1;
+  if_clone->then_branch(then_index);
+  if_clone->else_branch(else_index);
+
+  // do recursive copy subgraphs of CircleIf if there are any,
+  // inside then_graph or else_graph.
+  clone_recursive_subgraphs(pm, then_graph, then_clonectx);
+  clone_recursive_subgraphs(pm, else_graph, else_clonectx);
+}
+
+void clone_whilenode_subgraphs(luci::PartedModule &pm, const luci::CircleWhile *while_node,
+                               const luci::CloneContext &clonectx)
+{
+  assert(while_node != nullptr);
+
+  auto it = clonectx.find(while_node);
+  assert(it != clonectx.end());
+  auto while_clone = loco::must_cast<luci::CircleWhile *>(it->second);
+
+  luci::CloneContext cond_clonectx;
+  luci::CloneContext body_clonectx;
+
+  auto cond_graph = while_node->cond_graph();
+  auto body_graph = while_node->body_graph();
+
+  auto cond_clone = clone_graph(cond_graph, cond_clonectx);
+  auto body_clone = clone_graph(body_graph, body_clonectx);
+  while_clone->cond_graph(cond_clone.get());
+  while_clone->body_graph(body_clone.get());
+
+  pm.module->add(std::move(cond_clone));
+  int32_t cond_index = pm.module->size() - 1;
+  pm.module->add(std::move(body_clone));
+  int32_t body_index = pm.module->size() - 1;
+  while_clone->cond_branch(cond_index);
+  while_clone->body_branch(body_index);
+
+  // do recursive copy subgraphs of CircleWhile if there are any,
+  // inside cond_graph or body_graph.
+  clone_recursive_subgraphs(pm, cond_graph, cond_clonectx);
+  clone_recursive_subgraphs(pm, body_graph, body_clonectx);
+}
+
+/**
+ * @brief Build loco::graph from pgroup into graph
+ */
+void build_graph(luci::PartedModule &pm, loco::Graph *graph, const luci::PGroup *pgroup)
+{
+  LOGGER(l);
+
+  luci::CloneContext clonectx;
+
+  // add input node(s)
+  for (auto *input : pgroup->inputs)
+  {
+    auto *input_clone = graph->nodes()->create<luci::CircleInput>();
+    luci::copy_common_attributes(input, input_clone);
+
+    add_graph_input(graph, input_clone);
+    clonectx.emplace(input, input_clone);
+
+    INFO(l) << "MAP: "
+            << " input(" << input << ") -> " << input_clone << "(" << input_clone->name() << ")";
+  }
+
+  // add CircleConst for inputs
+  for (auto &pnode : pgroup->pnodes)
+  {
+    auto node = pnode->node;
+    uint32_t arity = node->arity();
+    for (uint32_t a = 0; a < arity; ++a)
+    {
+      auto in_a_const = dynamic_cast<luci::CircleConst *>(node->arg(a));
+      if (in_a_const != nullptr)
+      {
+        auto it = clonectx.find(in_a_const);
+        if (it == clonectx.end())
+        {
+          auto *clone = clone_node(in_a_const, graph);
+          clonectx.emplace(in_a_const, clone);
+
+          INFO(l) << "MAP: "
+                  << " const(" << in_a_const << ") -> " << clone << "(" << clone->name() << ")";
+        }
+      }
+    }
+  }
+
+  // add nodes
+  for (auto &pnode : pgroup->pnodes)
+  {
+    auto *clone = clone_node(pnode->node, graph);
+    clonectx.emplace(pnode->node, clone);
+
+    INFO(l) << "MAP: "
+            << "  node(" << pnode->node << ") -> " << clone << "(" << clone->name() << ")";
+  }
+  // connect nodes
+  for (auto &pnode : pgroup->pnodes)
+  {
+    clone_connect(pnode->node, clonectx);
+  }
+
+  // add output node(s)
+  for (auto *output : pgroup->outputs)
+  {
+    auto *output_clone = graph->nodes()->create<luci::CircleOutput>();
+    luci::copy_common_attributes(output, output_clone);
+    // note: we don't add output_clone to clonectx.
+    // logically, output is not used as an input to any other nodes.
+
+    auto it = clonectx.find(output);
+    assert(it != clonectx.end());
+    output_clone->from(it->second);
+
+    add_graph_output(graph, output_clone);
+
+    INFO(l) << "MAP: "
+            << "output(" << output << ") -> " << output_clone << "(" << output_clone->name() << ")"
+            << ": from " << it->second << "(" << it->second->name() << ")";
+  }
+
+  // TODO relocate this if needed
+  // subgraphs for IF/WHILE/... nodes
+  for (auto &pnode : pgroup->pnodes)
+  {
+    {
+      auto if_node = dynamic_cast<const luci::CircleIf *>(pnode->node);
+      if (if_node != nullptr)
+      {
+        clone_ifnode_subgraphs(pm, if_node, clonectx);
+      }
+    }
+    {
+      auto while_node = dynamic_cast<const luci::CircleWhile *>(pnode->node);
+      if (while_node != nullptr)
+      {
+        clone_whilenode_subgraphs(pm, while_node, clonectx);
+      }
+    }
+    // TODO handle others
+  }
+}
+
+std::string make_name(const luci::PGroup *pgroup)
+{
+  auto &first_pnode = *pgroup->pnodes.begin();
+  auto *first_node = first_pnode->node;
+  std::string name = first_node->graph()->name();
+  name = name + "_" + pgroup->group;
+  return name;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * @brief This will produce list of luci::Module as PartedModules from pgroups
+ */
+luci::PartedModules produce_pmodules(const luci::PGroups *pgroups)
+{
+  LOGGER(l);
+
+  luci::PartedModules pms;
+
+  for (auto &pgroup : pgroups->pgroups)
+  {
+    luci::PartedModule pm;
+    pm.module = std::make_unique<luci::Module>();
+    pm.group = pgroup->group;
+
+    // the main graph for this module
+    auto graph = loco::make_graph();
+    auto graph_ptr = graph.get();
+
+    auto graph_name = make_name(pgroup.get());
+    graph->name(graph_name);
+
+    // Add main graph so that other subgraphs can be added inside build_graph
+    pm.module->add(std::move(graph));
+
+    INFO(l) << "--- Partition Graph build----------------------";
+    INFO(l) << "--- name: " << graph_name;
+    build_graph(pm, graph_ptr, pgroup.get());
+
+    pms.pmodules.emplace_back(std::move(pm));
+  }
+
+  return pms;
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionPModules.h b/compiler/luci/partition/src/PartitionPModules.h
new file mode 100644
index 000000000..628ada56c
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModules.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITON_PMODULES_H__
+#define __LUCI_PARTITON_PMODULES_H__
+
+#include "PartitionIR.h"
+
+#include "luci/Partition.h"
+
+namespace luci
+{
+
+luci::PartedModules produce_pmodules(const luci::PGroups *pgroups);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITON_PMODULES_H__
diff --git a/compiler/luci/partition/src/PartitionPModules.test.cpp b/compiler/luci/partition/src/PartitionPModules.test.cpp
new file mode 100644
index 000000000..9b949c2de
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModules.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModules.h"
+#include "PartitionPGroups.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->dtype(loco::DataType::S32);
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  SqrtGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    SqrtGraphlet::init(g(), shape);
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+  }
+};
+
+} // namespace
+
+TEST(PartitionPModulesTest, simple_convert)
+{
+  luci::Module module;
+
+  SqrtGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  luci::PartitionTable pt;
+  pt.default_group = "A";
+  pt.comply = luci::PartitionTable::COMPLY::OPCODE;
+
+  auto pgs = produce_pgroups(&module, pt);
+  auto pms = produce_pmodules(pgs.get());
+
+  ASSERT_EQ(1, pms.pmodules.size());
+}
diff --git a/compiler/luci/partition/src/PartitionPModulesDump.cpp b/compiler/luci/partition/src/PartitionPModulesDump.cpp
new file mode 100644
index 000000000..ee50bc6fb
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModulesDump.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PartitionPModulesDump.h"
+
+#include "luci/LogHelper.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PartedModule *pmodule)
+{
+  os << "--- PartedModule: " << pmodule->group << std::endl;
+  os << luci::fmt(pmodule->module->graph());
+}
+
+void dump(std::ostream &os, const PartedModules *pmodules)
+{
+  for (auto &pmodule : pmodules->pmodules)
+  {
+    dump(os, &pmodule);
+  }
+  os << std::endl;
+}
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PartedModules *pmodules)
+{
+  luci::dump(os, pmodules);
+  return os;
+}
diff --git a/compiler/luci/partition/src/PartitionPModulesDump.h b/compiler/luci/partition/src/PartitionPModulesDump.h
new file mode 100644
index 000000000..e77b235f4
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionPModulesDump.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_PMODULES_DUMP_H__
+#define __LUCI_PARTITION_PMODULES_DUMP_H__
+
+#include "luci/Partition.h"
+
+#include <iostream>
+
+namespace luci
+{
+
+void dump(std::ostream &os, const PartedModule *pmodule);
+void dump(std::ostream &os, const PartedModules *pmodules);
+
+} // namespace luci
+
+std::ostream &operator<<(std::ostream &os, const luci::PartedModules *pmodules);
+
+#endif // __LUCI_PARTITION_PMODULES_DUMP_H__
diff --git a/compiler/luci/partition/src/PartitionValidate.cpp b/compiler/luci/partition/src/PartitionValidate.cpp
new file mode 100644
index 000000000..5aceb98ca
--- /dev/null
+++ b/compiler/luci/partition/src/PartitionValidate.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/PartitionValidate.h"
+
+#include <luci/Service/Validate.h>
+
+#include <pepper/csv2vec.h>
+
+#include <iostream>
+
+namespace luci
+{
+
+bool validate(luci::PartitionTable &partition)
+{
+  if (partition.groups.size() == 0)
+  {
+    std::cerr << "There is no 'backends' information";
+    return false;
+  }
+  if (partition.default_group.empty())
+  {
+    std::cerr << "There is no 'default' backend information";
+    return false;
+  }
+  if (!pepper::is_one_of<std::string>(partition.default_group, partition.groups))
+  {
+    std::cerr << "'default' backend is not one of 'backends' item";
+    return false;
+  }
+  for (auto &byopcode : partition.byopcodes)
+  {
+    if (!pepper::is_one_of<std::string>(byopcode.second, partition.groups))
+    {
+      std::cerr << "OPCODE " << byopcode.first << " is not assigned to one of 'backends' items";
+      return false;
+    }
+  }
+  for (auto &byopname : partition.byopnames)
+  {
+    if (!pepper::is_one_of<std::string>(byopname.second, partition.groups))
+    {
+      std::cerr << "OPNAME " << byopname.first << " is not assigned to one of 'backends' items";
+      return false;
+    }
+  }
+  return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt
index 2c5fb3407..ac18a5f8d 100644
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,9 +1,27 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+nnas_find_package(Fp16Source QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "FlatBuffers NOT FOUND")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+if(NOT Fp16Source_FOUND)
+  message(STATUS "Fp16Source NOT FOUND")
+  return()
+endif(NOT Fp16Source_FOUND)
+
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(luci_pass SHARED ${SOURCES})
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_pass ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_pass PRIVATE src)
+target_include_directories(luci_pass PRIVATE ${Fp16Source_DIR}/include)
 target_include_directories(luci_pass PUBLIC include)
 target_link_libraries(luci_pass PUBLIC loco)
 target_link_libraries(luci_pass PUBLIC logo_core)
@@ -12,9 +30,15 @@ target_link_libraries(luci_pass PRIVATE luci_lang)
 target_link_libraries(luci_pass PRIVATE luci_log)
 target_link_libraries(luci_pass PRIVATE luci_service)
 target_link_libraries(luci_pass PRIVATE luci_logex)
+target_link_libraries(luci_pass PRIVATE luci_profile)
+target_link_libraries(luci_pass PRIVATE luci_compute)
 target_link_libraries(luci_pass PRIVATE nncc_common)
+target_link_libraries(luci_pass PRIVATE pepper_csv2vec)
 target_link_libraries(luci_pass PRIVATE oops)
+target_link_libraries(luci_pass PRIVATE flatbuffers-2.0)
 install(TARGETS luci_pass DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
 
 if(NOT ENABLE_TEST)
   return()
@@ -26,4 +50,6 @@ GTest_AddTest(luci_pass_test ${TESTS})
 target_include_directories(luci_pass_test PRIVATE src)
 target_link_libraries(luci_pass_test luci_pass)
 target_link_libraries(luci_pass_test luci_lang)
+target_link_libraries(luci_pass_test luci_testhelper)
+target_link_libraries(luci_pass_test flatbuffers-2.0)
 #target_link_libraries(luci_pass_test oops)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index a832844f8..6ebacee39 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -19,6 +19,8 @@
 
 #include <loco.h>
 
+#include <luci/IR/Module.h>
+
 #include <string>
 #include <vector>
 
@@ -32,22 +34,76 @@ public:
   {
     enum Algorithm
     {
+      FuseAddWithFullyConnected,
+      FuseAddWithTConv,
+      FuseBatchNormWithConv,
+      FuseBatchNormWithDwConv,
       FuseBatchNormWithTConv,
       FuseBCQ,
       FuseInstanceNorm,
+      FuseMeanWithMean,
+      FuseTransposeWithMean,
       ResolveCustomOpAdd,
       ResolveCustomOpBatchMatMul,
       ResolveCustomOpMatMul,
-      QuantizeDequantizeWeights,
-      QuantizeWithMinMax,
-      Requantize,
+      ResolveCustomOpMaxPoolWithArgmax,
+      ResolveCustomOpSplitV,
+      FoldAddV2,
+      FoldCast,
+      FoldDensify,
+      FoldDepthwiseConv2D,
+      FoldFullyConnected,
+      FoldDequantize,
+      FoldGather,
+      FoldSparseToDense,
+      ForwardReshapeToUnaryOp,
+      ForwardTransposeOp,
+      SparsifyTensorPass,
+      FusePreActivationBatchNorm,
+      MakeBatchNormGammaPositive,
+      FuseActivationFunction,
+      FusePRelu,
+      FuseGelu,
+      ShuffleWeightTo16x1Float32,
+      RemoveRedundantTranspose,
+      ReplaceMulAddWithDepthwiseConv,
+      ReplaceNonConstFCWithBatchMatMul,
+      ReplaceSubWithAdd,
+      SubstitutePackToReshape,
+      SubstitutePadV2ToPad,
+      SubstituteSplitVToSplit,
+      SubstituteSqueezeToReshape,
+      ExpandBroadcastConst,
+      ConvertNCHWToNHWC,
+      RemoveUnnecessarySlice,
+      RemoveUnnecessaryStridedSlice,
+      RemoveUnnecessarySplit,
+      RemoveUnnecessaryReshape,
+      TransformMinMaxToRelu6Pass,
+      TransformMinReluToRelu6Pass,
+      DecomposeHardSwishPass,
+      SubstituteStridedSliceToReshape,
+      SubstituteTransposeToReshape,
+      RemoveRedundantQuantize,
+      RemoveRedundantReshape,
+      RemoveFakeQuant,
+      RemoveQuantDequantSeq,
+      RemoveDuplicateConst,
+      UnrollUnidirSeqLSTM,
     };
 
     enum AlgorithmParameters
     {
-      Quantize_input_dtype,
-      Quantize_output_dtype,
-      Quantize_granularity // layer-wise or channel-wise
+      // sparsify
+      Sparsify_tensor_name,
+      Sparsify_traversal_order,
+      Sparsify_format,
+      Sparsify_block_size,
+      Sparsify_block_map,
+
+      // convert NCHW to NHWC
+      NCHW_to_NHWC_input_shape,
+      NCHW_to_NHWC_output_shape,
     };
 
     virtual ~Options() = default;
@@ -63,9 +119,11 @@ public:
   Options *options(void);
 
 public:
+  void optimize(luci::Module *) const;
+
   void optimize(loco::Graph *) const;
 
-  void quantize(loco::Graph *) const;
+  void sparsify(loco::Graph *) const;
 
 private:
   std::unique_ptr<Options> _options;
diff --git a/compiler/luci/pass/include/luci/CircleQuantizer.h b/compiler/luci/pass/include/luci/CircleQuantizer.h
new file mode 100644
index 000000000..463f31790
--- /dev/null
+++ b/compiler/luci/pass/include/luci/CircleQuantizer.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_QUANTIZER_H__
+#define __LUCI_CIRCLE_QUANTIZER_H__
+
+#include <loco.h>
+
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+class CircleQuantizer final
+{
+public:
+  struct Options
+  {
+    struct LayerParam
+    {
+      std::string name;
+      std::string dtype;
+      std::string granularity;
+    };
+
+    enum Algorithm
+    {
+      QuantizeDequantizeWeights,
+      QuantizeWithMinMax,
+      Requantize,
+      CopyQuantParam,
+      ForceQuantParam,
+      ConvertToFakeQuantizedModel,
+      QuantizeWeights,
+    };
+
+    enum AlgorithmParameters
+    {
+      // quantize
+      Quantize_input_model_dtype,
+      Quantize_output_model_dtype,
+      Quantize_granularity, // layer-wise or channel-wise
+      Quantize_tensor_names,
+      Quantize_scales,
+      Quantize_zero_points,
+      Quantize_layer_params,
+
+      // copy_quantparam
+      Quantize_src_tensor_names,
+      Quantize_dst_tensor_names,
+
+      Quantize_input_type,
+      Quantize_output_type,
+      Quantize_TF_style_maxpool,
+    };
+
+    virtual ~Options() = default;
+
+    virtual void enable(Algorithm) = 0;
+    virtual bool query(Algorithm) = 0;
+    virtual void param(AlgorithmParameters, const std::string &) = 0;
+    virtual const std::string param(AlgorithmParameters) const = 0;
+    virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0;
+    virtual std::vector<std::string> params(AlgorithmParameters) const = 0;
+
+    // Quantization parameters for multiple layers
+    virtual void layer_params(AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>> &) = 0;
+    virtual std::vector<std::shared_ptr<LayerParam>> layer_params(AlgorithmParameters) const = 0;
+  };
+
+public:
+  // TODO maybe caller can provide Options as ctor parameters
+  Options *options(void);
+
+public:
+  void quantize(loco::Graph *) const;
+
+private:
+  std::unique_ptr<Options> _options;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_QUANTIZER_H__
diff --git a/compiler/luci/pass/include/luci/DynamicBatchToSingleBatch.h b/compiler/luci/pass/include/luci/DynamicBatchToSingleBatch.h
new file mode 100644
index 000000000..2a02777f6
--- /dev/null
+++ b/compiler/luci/pass/include/luci/DynamicBatchToSingleBatch.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_DYNAMIC_BATCH_TO_SINGLE_BATCH_H__
+#define __LUCI_DYNAMIC_BATCH_TO_SINGLE_BATCH_H__
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+void dynamic_batch_to_single_batch(luci::Module *);
+
+} // namespace luci
+
+#endif // __LUCI_DYNAMIC_BATCH_TO_SINGLE_BATCH_H__
diff --git a/compiler/luci/pass/include/luci/ModulePass.h b/compiler/luci/pass/include/luci/ModulePass.h
new file mode 100644
index 000000000..1835f6e0c
--- /dev/null
+++ b/compiler/luci/pass/include/luci/ModulePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODULE_PASS_H__
+#define __MODULE_PASS_H__
+
+#include <loco.h>
+#include <logo/Pass.h>
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+class Pass : public logo::Pass
+{
+public:
+  // Run module pass and return false if there was nothing changed
+  virtual bool run(luci::Module *) = 0;
+};
+
+} // namespace luci
+
+#endif // __MODULE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/CircleShapeInferencePass.h b/compiler/luci/pass/include/luci/Pass/CircleShapeInferencePass.h
new file mode 100644
index 000000000..21d6d09d6
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/CircleShapeInferencePass.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <luci/ModulePass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to infer shape of circle nodes
+ */
+class CircleShapeInferencePass : public luci::Pass
+{
+public:
+  virtual const char *name(void) const { return "luci::CircleShapeInferencePass"; }
+
+public:
+  bool run(luci::Module *m);
+  bool run(loco::Graph *graph);
+};
+
+} // namespace luci
+
+#endif //__LUCI_CIRCLE_SHAPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h b/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h
new file mode 100644
index 000000000..379b44ccd
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__
+#define __LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <luci/ModulePass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to infer type of circle nodes
+ */
+class CircleTypeInferencePass : public luci::Pass
+{
+public:
+  virtual const char *name(void) const { return "luci::CircleTypeInferencePass"; }
+
+public:
+  bool run(luci::Module *m);
+  bool run(loco::Graph *g);
+};
+
+} // namespace luci
+
+#endif //__LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ConvertNCHWToNHWCPass.h b/compiler/luci/pass/include/luci/Pass/ConvertNCHWToNHWCPass.h
new file mode 100644
index 000000000..ba2392596
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ConvertNCHWToNHWCPass.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
+#define __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief   Class to convert NCHW Ops to NHWC
+ *
+ * @details Find operators that use NCHW layout and make them use NHWC.
+ *          Strictly speaking, it is impossible to distinguish whether
+ *          an operator is using NCHW or NHWC without programmers' annotations.
+ *          But we guess the data layout of each operator as much as possible
+ *          based on the assumptions described in the comments.
+ *          Note that this Pass does not change the execution result even
+ *          for the false-positive cases.
+ */
+struct ConvertNCHWToNHWCPass final : public logo::Pass
+{
+public:
+  ConvertNCHWToNHWCPass(bool preserve_input, bool preserve_output)
+    : _preserve_input(preserve_input), _preserve_output(preserve_output)
+  {
+    // Do nothing
+  }
+
+  ConvertNCHWToNHWCPass() = delete;
+
+  virtual ~ConvertNCHWToNHWCPass() = default;
+
+  const char *name(void) const final { return "luci::ConvertNCHWToNHWCPass"; }
+
+  bool run(loco::Graph *g) final;
+
+private:
+  bool _preserve_input = false;
+  bool _preserve_output = false;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CONVERT_NCHW_TO_NHWC_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h b/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h
new file mode 100644
index 000000000..91dd2300e
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
+#define __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to convert a quantized model to a fake-quantized fp32 model.
+ */
+struct ConvertToFakeQuantizedModelPass final : public logo::Pass
+{
+  ConvertToFakeQuantizedModelPass() {}
+
+  const char *name(void) const final { return "luci::ConvertToFakeQuantizedModelPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h
new file mode 100644
index 000000000..18c9cd56a
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_COPY_QUANT_PARAM_PASS_H__
+#define __LUCI_COPY_QUANT_PARAM_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to copy quantparam (scale, zerop) of a tensor to another tensor
+ */
+class CopyQuantParamPass : public logo::Pass
+{
+public:
+  using TensorVector = std::vector<std::string>;
+
+public:
+  CopyQuantParamPass(TensorVector &src_tensors, TensorVector &dst_tensors)
+    : _src_tensors{src_tensors}, _dst_tensors{dst_tensors}
+  {
+    // DO NOTHING
+  }
+  virtual const char *name(void) const { return "luci::CopyQuantParamPass"; }
+
+public:
+  bool run(loco::Graph *graph);
+
+private:
+  TensorVector _src_tensors;
+  TensorVector _dst_tensors;
+};
+
+} // namespace luci
+
+#endif //__LUCI_COPY_QUANT_PARAM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/DecomposeHardSwishPass.h b/compiler/luci/pass/include/luci/Pass/DecomposeHardSwishPass.h
new file mode 100644
index 000000000..83c16bcee
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/DecomposeHardSwishPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_DECOMPOSE_HARDSWISH_PASS_H__
+#define __LUCI_DECOMPOSE_HARDSWISH_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to decompose HardSwish to Add, Mul and Relu6
+ */
+struct DecomposeHardSwishPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::DecomposeHardSwishPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_DECOMPOSE_HARDSWISH_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/DynamicBatchToSingleBatchPass.h b/compiler/luci/pass/include/luci/Pass/DynamicBatchToSingleBatchPass.h
new file mode 100644
index 000000000..b3598c986
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/DynamicBatchToSingleBatchPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_DYNAMIC_BATCH_TO_SINGLE_BATCH_PASS_H__
+#define __LUCI_DYNAMIC_BATCH_TO_SINGLE_BATCH_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to convert dynamic batch to single batch
+ */
+class DynamicBatchToSingleBatchPass : public logo::Pass
+{
+public:
+  virtual const char *name(void) const { return "luci::DynamicBatchToSingleBatchPass"; }
+
+public:
+  bool run(loco::Graph *graph);
+};
+
+} // namespace luci
+
+#endif //__LUCI_DYNAMIC_BATCH_TO_SINGLE_BATCH_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h b/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h
new file mode 100644
index 000000000..5ee26b472
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
+#define __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove broadcasts of Const nodes.
+ */
+struct ExpandBroadcastConstPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ExpandBroadcastConstPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldAddV2Pass.h b/compiler/luci/pass/include/luci/Pass/FoldAddV2Pass.h
new file mode 100644
index 000000000..cd260b916
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldAddV2Pass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_ADD_V2_PASS_H__
+#define __LUCI_FOLD_ADD_V2_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold AddV2 to a constant tensor
+ *
+ */
+struct FoldAddV2Pass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldAddV2Pass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_ADD_V2_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldCastPass.h b/compiler/luci/pass/include/luci/Pass/FoldCastPass.h
new file mode 100644
index 000000000..5d7ce4ad3
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldCastPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_CAST_PASS_H__
+#define __LUCI_FOLD_CAST_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold Cast to a constant tensor
+ *
+ */
+struct FoldCastPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldCastPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_CAST_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h
new file mode 100644
index 000000000..8ec81b1d4
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_DENSIFY_PASS_H__
+#define __LUCI_FOLD_DENSIFY_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Fold Densify if input is Sparse Constant
+ *
+ */
+struct FoldDensifyPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldDensifyPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_DENSIFY_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h b/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h
new file mode 100644
index 000000000..58e5b71a7
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
+#define __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold DepthwiseConv2D with constant input and filter into a
+ * constant tensor
+ */
+struct FoldDepthwiseConv2DPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldDepthwiseConv2DPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldDequantizePass.h b/compiler/luci/pass/include/luci/Pass/FoldDequantizePass.h
new file mode 100644
index 000000000..07610d3e1
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldDequantizePass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_DEQUANTIZE_PASS_H__
+#define __LUCI_FOLD_DEQUANTIZE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold Dequantize, which can be folded by constant inputs
+ *
+ */
+struct FoldDequantizePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FOLD_DEQUANTIZE"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_DEQUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldFullyConnectedPass.h b/compiler/luci/pass/include/luci/Pass/FoldFullyConnectedPass.h
new file mode 100644
index 000000000..bd36ff149
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldFullyConnectedPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
+#define __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold FullyConnected with constant input and filter into a
+ * constant tensor
+ */
+struct FoldFullyConnectedPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldFullyConnectedPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h b/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h
new file mode 100644
index 000000000..de08c8845
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_GATHER_PASS_H__
+#define __LUCI_FOLD_GATHER_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold Gather to a constant tensor
+ *
+ */
+struct FoldGatherPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldGatherPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_GATHER_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldSparseToDensePass.h b/compiler/luci/pass/include/luci/Pass/FoldSparseToDensePass.h
new file mode 100644
index 000000000..00d2447a5
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldSparseToDensePass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
+#define __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold SparseToDense to a constant tensor
+ *
+ */
+struct FoldSparseToDensePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldSparseToDensePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_SPARSE_TO_DENSE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h
new file mode 100644
index 000000000..752ce1d31
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORCE_QUANT_PARAM_PASS_H__
+#define __LUCI_FORCE_QUANT_PARAM_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to write quantparam (scale, zerop) to the specified tensors
+ */
+class ForceQuantParamPass : public logo::Pass
+{
+public:
+  using TensorVector = std::vector<std::string>;
+  using ScaleVector = std::vector<float>;
+  using ZPVector = std::vector<int64_t>;
+
+public:
+  ForceQuantParamPass(TensorVector &tensors, ScaleVector &scales, ZPVector &zerops)
+    : _tensors{tensors}, _scales{scales}, _zerops{zerops}
+  {
+    // DO NOTHING
+  }
+  virtual const char *name(void) const { return "luci::ForceQuantParamPass"; }
+
+public:
+  bool run(loco::Graph *graph);
+
+private:
+  TensorVector _tensors;
+  ScaleVector _scales;
+  ZPVector _zerops;
+};
+
+} // namespace luci
+
+#endif //__LUCI_FORCE_QUANT_PARAM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ForwardReshapeToUnaryOpPass.h b/compiler/luci/pass/include/luci/Pass/ForwardReshapeToUnaryOpPass.h
new file mode 100644
index 000000000..4c308e531
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ForwardReshapeToUnaryOpPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
+#define __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Forward send Reshape after UnaryOp.
+ */
+struct ForwardReshapeToUnaryOpPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ForwardReshapeToUnaryOpPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FORWARD_RESHAPE_TO_UNARYOP_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ForwardTransposeOpPass.h b/compiler/luci/pass/include/luci/Pass/ForwardTransposeOpPass.h
new file mode 100644
index 000000000..b44b1bde1
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ForwardTransposeOpPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
+#define __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Forward Transpose Ops for further optimization.
+ */
+struct ForwardTransposeOpPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ForwardTransposeOpPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseActivationFunctionPass.h b/compiler/luci/pass/include/luci/Pass/FuseActivationFunctionPass.h
new file mode 100644
index 000000000..5d05fcffa
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseActivationFunctionPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_ACTIVATION_FUNCTION_PASS_H__
+#define __LUCI_FUSE_ACTIVATION_FUNCTION_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse activation functions into preceding operators
+ */
+struct FuseActivationFunctionPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseActivationFunctionPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_ACTIVATION_FUNCTION_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h b/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h
new file mode 100644
index 000000000..a59b644e9
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
+#define __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Add into FullyConnected
+ */
+struct FuseAddWithFullyConnectedPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseAddWithFullyConnectedPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseAddWithTConvPass.h b/compiler/luci/pass/include/luci/Pass/FuseAddWithTConvPass.h
new file mode 100644
index 000000000..89b120397
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseAddWithTConvPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_ADD_WITH_TCONV_PASS_H__
+#define __LUCI_FUSE_ADD_WITH_TCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Add into CircleTransposeConv
+ */
+struct FuseAddWithTConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseAddWithTConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_ADD_WITH_TCONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h b/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h
index 4404a9fc9..912ad4225 100644
--- a/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h
+++ b/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h
@@ -17,7 +17,7 @@
 #ifndef __LUCI_FUSE_BCQ_PASS_H__
 #define __LUCI_FUSE_BCQ_PASS_H__
 
-#include <logo/Pass.h>
+#include <luci/ModulePass.h>
 
 namespace luci
 {
@@ -26,10 +26,11 @@ namespace luci
  * @brief  Class to fuse certain pattern of subgraph into CircleBCQFullyConnected or CircleBCQGather
  *
  */
-struct FuseBCQPass final : public logo::Pass
+struct FuseBCQPass final : public luci::Pass
 {
   const char *name(void) const final { return "luci::FuseBCQPass"; }
 
+  bool run(luci::Module *m) final;
   bool run(loco::Graph *g) final;
 };
 
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithConvPass.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithConvPass.h
new file mode 100644
index 000000000..1ed85447b
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithConvPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Batch Normalization into CircleConv
+ */
+struct FuseBatchNormWithConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseBatchNormWithConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_CONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithDwConvPass.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithDwConvPass.h
new file mode 100644
index 000000000..32885c6b2
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithDwConvPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Batch Normalization into CircleDepthWiseConv2D
+ */
+struct FuseBatchNormWithDwConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseBatchNormWithDwConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_DWCONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConvPass.h
index d3e930a36..d3e930a36 100644
--- a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConvPass.h
diff --git a/compiler/luci/pass/include/luci/Pass/FuseGeluPass.h b/compiler/luci/pass/include/luci/Pass/FuseGeluPass.h
new file mode 100644
index 000000000..5fa23036c
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseGeluPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_GELU_PASS_H__
+#define __LUCI_FUSE_GELU_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse certain pattern of subgraph into CircleGelu
+ *
+ * For detailed subgraph pattern to be fused, please check its implementation.
+ */
+struct FuseGeluPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseGeluPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_GELU_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseMeanWithMeanPass.h b/compiler/luci/pass/include/luci/Pass/FuseMeanWithMeanPass.h
new file mode 100644
index 000000000..8bbce6947
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseMeanWithMeanPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_MEAN_WITH_MEAN_PASS_H__
+#define __LUCI_FUSE_MEAN_WITH_MEAN_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse two Mean operations follow one by one into one Mean
+ * with merge reduction indices
+ */
+struct FuseMeanWithMeanPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseMeanWithMeanPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_MEAN_WITH_MEAN_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FusePReluPass.h b/compiler/luci/pass/include/luci/Pass/FusePReluPass.h
new file mode 100644
index 000000000..a21acf49d
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FusePReluPass.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_PRELU_PASS_H__
+#define __LUCI_FUSE_PRELU_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse certain pattern of subgraph into CirclePRelu
+ *         with auxiliary nodes
+ *
+ * For detailed subgraph pattern to be fused, please check its implementation.
+ */
+struct FusePReluPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FusePReluPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_PRELU_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FusePreActivationBatchNormPass.h b/compiler/luci/pass/include/luci/Pass/FusePreActivationBatchNormPass.h
new file mode 100644
index 000000000..bd6aec7a2
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FusePreActivationBatchNormPass.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_PRE_ACTIVATION_BATCH_NORM_PASS_H__
+#define __LUCI_FUSE_PRE_ACTIVATION_BATCH_NORM_PASS_H__
+
+#include <logo/Pass.h>
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse batch normalization of pre-activation
+ */
+struct FusePreActivationBatchNormPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FusePreActivationBatchNormPass"; }
+
+  bool run(loco::Graph *g) final;
+
+  std::vector<luci::CircleMul *> _mul_list;
+  std::vector<luci::CircleAdd *> _add_list;
+  std::vector<luci::CircleSub *> _sub_list; // inserted during fusion
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_PRE_ACTIVATION_BATCH_NORM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseTransposeWithMeanPass.h b/compiler/luci/pass/include/luci/Pass/FuseTransposeWithMeanPass.h
new file mode 100644
index 000000000..74dd04e2b
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseTransposeWithMeanPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_TRANSPOSE_WITH_MEAN_PASS_H__
+#define __LUCI_FUSE_TRANSPOSE_WITH_MEAN_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Mean operation with a preceding Transpose
+ */
+struct FuseTransposeWithMeanPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseTransposeWithMeanPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_TRANSPOSE_WITH_MEAN_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/MakeBatchNormGammaPositivePass.h b/compiler/luci/pass/include/luci/Pass/MakeBatchNormGammaPositivePass.h
new file mode 100644
index 000000000..f00a68b45
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/MakeBatchNormGammaPositivePass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MAKE_BATCH_NORM_GAMMA_POSITIVE_PASS_H__
+#define __LUCI_MAKE_BATCH_NORM_GAMMA_POSITIVE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to make negative gamma of batchnorm to a small positive value (1e-10)
+ *         This pass can change the execution result of the model.
+ *         So, use it only when the impact is known to be acceptable.
+ */
+struct MakeBatchNormGammaPositivePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::MakeBatchNormGammaPositivePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_MAKE_BATCH_NORM_GAMMA_POSITIVE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h
new file mode 100644
index 000000000..0c489fc30
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
+#define __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to propagate quantization parameters of an operator's output to input
+ */
+struct PropagateQParamBackwardPass final : public logo::Pass
+{
+  PropagateQParamBackwardPass(loco::DataType output) : _output_model_dtype(output) {}
+
+  const char *name(void) const final { return "luci::PropagateQParamBackwardPass"; }
+
+  bool run(loco::Graph *g) final;
+
+private:
+  loco::DataType _output_model_dtype;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h
new file mode 100644
index 000000000..952bd9614
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
+#define __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to propagate quantization parameters of an operator's input to output
+ */
+struct PropagateQParamForwardPass final : public logo::Pass
+{
+  PropagateQParamForwardPass(bool TF_style_maxpool) : _TF_style_maxpool(TF_style_maxpool) {}
+
+  PropagateQParamForwardPass() {}
+
+  const char *name(void) const final { return "luci::PropagateQParamForwardPass"; }
+
+  bool run(loco::Graph *g) final;
+
+private:
+  bool _TF_style_maxpool = false;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
index 5c9cd427f..30c8db058 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
@@ -17,6 +17,10 @@
 #ifndef __LUCI_QUANTIZATION_PARAMETERS_H__
 #define __LUCI_QUANTIZATION_PARAMETERS_H__
 
+#include <loco.h>
+
+#include <string>
+
 namespace luci
 {
 
@@ -26,6 +30,13 @@ enum QuantizationGranularity
   ChannelWise = 1,
 };
 
+struct LayerInfo
+{
+  std::string name;
+  loco::DataType dtype;
+  QuantizationGranularity granularity;
+};
+
 } // namespace luci
 
 #endif // __LUCI_QUANTIZATION_PARAMETERS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
index 713b88f9d..1825ee1aa 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
@@ -32,21 +32,38 @@ namespace luci
 class QuantizeDequantizeWeightsPass : public logo::Pass
 {
 public:
-  QuantizeDequantizeWeightsPass(loco::DataType input_dtype, loco::DataType output_dtype,
-                                QuantizationGranularity granularity)
-      : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+  struct Context
+  {
+    loco::DataType input_model_dtype = loco::DataType::Unknown;
+    loco::DataType output_model_dtype = loco::DataType::Unknown;
+    QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+    std::vector<LayerInfo> layers_info;
+  };
+
+public:
+  QuantizeDequantizeWeightsPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
   {
     // DO NOTHING
   }
+
+public:
+  QuantizeDequantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
+                                QuantizationGranularity granularity)
+  {
+    _ctx = std::make_unique<Context>();
+    {
+      _ctx->input_model_dtype = input_model_dtype;
+      _ctx->output_model_dtype = output_model_dtype;
+      _ctx->granularity = granularity;
+    }
+  }
   virtual const char *name(void) const { return "luci::QuantizeDequantizeWeightsPass"; }
 
 public:
   bool run(loco::Graph *graph);
 
 private:
-  loco::DataType _input_dtype;
-  loco::DataType _output_dtype;
-  QuantizationGranularity _granularity;
+  std::unique_ptr<Context> _ctx;
 };
 
 } // namespace luci
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h b/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h
new file mode 100644
index 000000000..c852f88e0
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
+#define __LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to verify the input model has the form acceptable by quantizer
+ */
+class QuantizePreCheckerPass : public logo::Pass
+{
+public:
+  const char *name(void) const final { return "luci::QuantizePreCheckerPass"; }
+
+public:
+  bool run(loco::Graph *graph) final;
+};
+
+} // namespace luci
+
+#endif //__LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWeightsPass.h
new file mode 100644
index 000000000..646597312
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWeightsPass.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_WEIGHTS_PASS_H__
+#define __LUCI_QUANTIZE_WEIGHTS_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+#include <luci/Pass/QuantizationParameters.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to quantize weights
+ */
+class QuantizeWeightsPass : public logo::Pass
+{
+public:
+  struct Context
+  {
+    loco::DataType input_model_dtype = loco::DataType::Unknown;
+    loco::DataType output_model_dtype = loco::DataType::Unknown;
+    QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+  };
+
+public:
+  QuantizeWeightsPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
+  {
+    // DO NOTHING
+  }
+
+public:
+  QuantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
+                      QuantizationGranularity granularity)
+  {
+    _ctx = std::make_unique<Context>();
+    {
+      _ctx->input_model_dtype = input_model_dtype;
+      _ctx->output_model_dtype = output_model_dtype;
+      _ctx->granularity = granularity;
+    }
+  }
+  virtual const char *name(void) const { return "luci::QuantizeWeightsPass"; }
+
+public:
+  bool run(loco::Graph *graph);
+
+private:
+  std::unique_ptr<Context> _ctx;
+};
+
+} // namespace luci
+
+#endif //__LUCI_QUANTIZE_WEIGHTS_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
index bb0d0ff40..6874046f0 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
@@ -23,6 +23,8 @@
 
 #include <luci/Pass/QuantizationParameters.h>
 
+#include <vector>
+
 namespace luci
 {
 
@@ -32,21 +34,34 @@ namespace luci
 class QuantizeWithMinMaxPass : public logo::Pass
 {
 public:
-  QuantizeWithMinMaxPass(loco::DataType input_dtype, loco::DataType output_dtype,
-                         QuantizationGranularity granularity)
-      : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+  struct Context
+  {
+    loco::DataType input_model_dtype = loco::DataType::Unknown;
+    loco::DataType output_model_dtype = loco::DataType::Unknown;
+    QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+    std::vector<loco::DataType> input_types;
+    std::vector<loco::DataType> output_types;
+    bool TF_style_maxpool = false;
+    std::vector<LayerInfo> layers_info;
+  };
+
+public:
+  QuantizeWithMinMaxPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
   {
     // DO NOTHING
   }
+
   virtual const char *name(void) const { return "luci::QuantizeWithMinMaxPass"; }
 
 public:
   bool run(loco::Graph *graph);
 
 private:
-  loco::DataType _input_dtype;
-  loco::DataType _output_dtype;
-  QuantizationGranularity _granularity;
+  void set_input_type(loco::Graph *graph) const;
+  void set_output_type(loco::Graph *graph) const;
+
+private:
+  std::unique_ptr<Context> _ctx;
 };
 
 } // namespace luci
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveDuplicateConstPass.h b/compiler/luci/pass/include/luci/Pass/RemoveDuplicateConstPass.h
new file mode 100644
index 000000000..000cdcc43
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveDuplicateConstPass.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
+#define __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove duplicate Const nodes.
+ */
+struct RemoveDuplicateConstPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveDuplicateConstPass"; }
+
+  bool run(loco::Graph *g) final;
+
+private:
+  bool remove_duplicate_const();
+
+  template <loco::DataType DT> void add_to_map(luci::CircleConst *const_node);
+
+  std::map<float, std::vector<CircleConst *>> _sum_to_const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveFakeQuantPass.h b/compiler/luci/pass/include/luci/Pass/RemoveFakeQuantPass.h
new file mode 100644
index 000000000..b477c8733
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveFakeQuantPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_FAKEQUANT_PASS_H__
+#define __LUCI_REMOVE_FAKEQUANT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Remove FakeQuant node.
+ */
+struct RemoveFakeQuantPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveFakeQuantPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_FAKEQUANT_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveQuantDequantSeqPass.h b/compiler/luci/pass/include/luci/Pass/RemoveQuantDequantSeqPass.h
new file mode 100644
index 000000000..3eac626c2
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveQuantDequantSeqPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_QUANTDEQUANTSEQ_PASS_H__
+#define __LUCI_REMOVE_QUANTDEQUANTSEQ_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Remove Quantize-Dequantize sequence.
+ */
+struct RemoveQuantDequantSeqPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveQuantDequantSeqPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_QUANTDEQUANTSEQ_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h
new file mode 100644
index 000000000..2deb75297
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove redundant dequantize operations
+ */
+struct RemoveRedundantDequantizePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveRedundantDequantizePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h
new file mode 100644
index 000000000..3e76bcdc3
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove redundant quantize operations
+ */
+struct RemoveRedundantQuantizePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveRedundantQuantizePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantReshapePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantReshapePass.h
new file mode 100644
index 000000000..458ffc094
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantReshapePass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove redundant Reshape node into 1 Reshape node.
+ * @details This class will update consecutive two Reshape node into single Reshape node.
+ *          As Reshape operation just change shape, not buffer, former reshape could be unnecessary.
+ */
+struct RemoveRedundantReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveRedundantReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h
new file mode 100644
index 000000000..ca20da5ac
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__
+#define __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief fuse or remove subsequent Transpose operators
+ */
+struct RemoveRedundantTransposePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveRedundantTransposePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h
new file mode 100644
index 000000000..19948a31c
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove unnecessary Reshape nodes.
+ * @details This class will remove unnecessary pre/post-Reshape nodes.
+ *          See https://github.com/Samsung/ONE/issues/9600 for more details.
+ */
+struct RemoveUnnecessaryReshapeNetPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessaryReshapeNetPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapePass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapePass.h
new file mode 100644
index 000000000..8fca35e5b
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Remove Unnecessary(input shape and output shape same) Reshape node.
+ */
+struct RemoveUnnecessaryReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessaryReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySlicePass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySlicePass.h
new file mode 100644
index 000000000..a3b0f2f8c
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySlicePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
+#define __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Remove Unnecessary(input and output are same) Slice node.
+ */
+struct RemoveUnnecessarySlicePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessarySlicePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_NO_EFFECT_SLICE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySplitPass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySplitPass.h
new file mode 100644
index 000000000..0d9330fe7
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessarySplitPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Remove unnecessary Split OP
+ */
+struct RemoveUnnecessarySplitPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessarySplitPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_SPLIT_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryStridedSlicePass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryStridedSlicePass.h
new file mode 100644
index 000000000..0f6a61d43
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryStridedSlicePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Remove Unnecessary(input and output are same) StridedSlice node.
+ */
+struct RemoveUnnecessaryStridedSlicePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessaryStridedSlicePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_STRIDED_SLICE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h
new file mode 100644
index 000000000..5dbcc8f5b
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__
+#define __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to replace channel-wise mul/add with CircleDepthwiseConv2D
+ */
+struct ReplaceMulAddWithDepthwiseConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ReplaceMulAddWithDepthwiseConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h
new file mode 100644
index 000000000..24e16ec49
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
+#define __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to replace "FC with non-const weight" with Batched MatMul
+ */
+struct ReplaceNonConstFCWithBatchMatMulPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ReplaceNonConstFCWithBatchMatMulPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceSubWithAddPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceSubWithAddPass.h
new file mode 100644
index 000000000..4878f728f
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ReplaceSubWithAddPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REPLACE_SUB_WITH_ADD_PASS_H__
+#define __LUCI_REPLACE_SUB_WITH_ADD_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Replace Sub With Add
+ *
+ */
+struct ReplaceSubWithAddPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ReplaceSubWithAddPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REPLACE_SUB_WITH_ADD_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RequantizePass.h b/compiler/luci/pass/include/luci/Pass/RequantizePass.h
index 2442b24ea..50b9073b5 100644
--- a/compiler/luci/pass/include/luci/Pass/RequantizePass.h
+++ b/compiler/luci/pass/include/luci/Pass/RequantizePass.h
@@ -27,13 +27,13 @@ namespace luci
 {
 
 /**
- * @brief Pass to quantize weights
+ * @brief Pass to re-quantize graph (ex: int8 -> uint8)
  */
 class RequantizePass : public logo::Pass
 {
 public:
   RequantizePass(loco::DataType input_dtype, loco::DataType output_dtype)
-      : _input_dtype{input_dtype}, _output_dtype{output_dtype}
+    : _input_dtype{input_dtype}, _output_dtype{output_dtype}
   {
     // DO NOTHING
   }
diff --git a/compiler/luci/pass/include/luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h
new file mode 100644
index 000000000..ab8fce8fe
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_RESOLVE_CUSTOM_OP_MAXPOOL_WITH_ARGMAX_PASS_H__
+#define __LUCI_RESOLVE_CUSTOM_OP_MAXPOOL_WITH_ARGMAX_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to resolve custom op MaxPoolWithArgmax to subgraph with circle's MaxPool and ArgMax.
+ */
+struct ResolveCustomOpMaxPoolWithArgmaxPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ResolveCustomOpMaxPoolWithArgmaxPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_RESOLVE_CUSTOM_OP_MAXPOOL_WITH_ARGMAX_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h
new file mode 100644
index 000000000..d4f0147e8
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
+#define __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to resolve certain custom op of subgraph into splitv op in circle schema.
+ */
+struct ResolveCustomOpSplitVPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ResolveCustomOpSplitVPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h b/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h
deleted file mode 100644
index 86bb2ab42..000000000
--- a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_SHAPE_INFERENCE_PASS_H__
-#define __LUCI_SHAPE_INFERENCE_PASS_H__
-
-#include <loco.h>
-
-#include <logo/Pass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to infer shape of nodes
- */
-class ShapeInferencePass : public logo::Pass
-{
-public:
-  virtual const char *name(void) const { return "luci::ShapeInferencePass"; }
-
-public:
-  bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_SHAPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h b/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h
new file mode 100644
index 000000000..3d84f5133
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__
+#define __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to convert weight format of FullyConnected to SHUFFLED16x1FLOAT32
+ */
+struct ShuffleWeightTo16x1Float32Pass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ShuffleWeightTo16x1Float32Pass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SparsifyTensorPass.h b/compiler/luci/pass/include/luci/Pass/SparsifyTensorPass.h
new file mode 100644
index 000000000..0ce142c55
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SparsifyTensorPass.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SPARSIFY_TENSOR_PASS_H__
+#define __LUCI_SPARSIFY_TENSOR_PASS_H__
+
+#include <logo/Pass.h>
+
+#include <luci/IR/SparsityParam.h>
+
+namespace luci
+{
+
+class CircleConst;
+
+/**
+ * @brief  Pass to sparsify tensor
+ */
+struct SparsifyTensorPass final : public logo::Pass
+{
+public:
+  SparsifyTensorPass(const std::string &tensor_name, const std::vector<int32_t> &traversal_order,
+                     const std::vector<DimensionType> &format,
+                     const std::vector<int32_t> &block_size, const std::vector<int32_t> &block_map)
+    : _tensor_name{tensor_name}, _traversal_order{traversal_order}, _format{format},
+      _block_size{block_size}, _block_map{block_map}
+  {
+    // DO NOTHING
+  }
+
+public:
+  const char *name(void) const final { return "luci::SparsifyTensorPass"; }
+
+  bool run(loco::Graph *g) final;
+
+  template <loco::DataType DT> void sparsify_tensor(luci::CircleConst *cop);
+
+private:
+  // Tensor name that the pass will sparsify
+  std::string _tensor_name;
+  std::vector<int32_t> _traversal_order;
+  std::vector<DimensionType> _format;
+  std::vector<int32_t> _block_size;
+  std::vector<int32_t> _block_map;
+};
+
+extern template void
+SparsifyTensorPass::sparsify_tensor<loco::DataType::S32>(luci::CircleConst *cop);
+extern template void
+SparsifyTensorPass::sparsify_tensor<loco::DataType::S8>(luci::CircleConst *cop);
+extern template void
+SparsifyTensorPass::sparsify_tensor<loco::DataType::FLOAT32>(luci::CircleConst *cop);
+
+} // namespace luci
+
+#endif // __LUCI_SPARSIFY_TENSOR_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h b/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h
new file mode 100644
index 000000000..36d13f19f
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Substitute Pack with 1 input to single reshape node.
+ */
+struct SubstitutePackToReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstitutePackToReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SubstitutePadV2ToPadPass.h b/compiler/luci/pass/include/luci/Pass/SubstitutePadV2ToPadPass.h
new file mode 100644
index 000000000..c4ecc1086
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstitutePadV2ToPadPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_PADV2_TO_PAD_PASS_H__
+#define __LUCI_SUBSTITUTE_PADV2_TO_PAD_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to substitute PadV2 in certain condition to Pad.
+ */
+struct SubstitutePadV2ToPadPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstitutePadV2ToPadPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_PADV2_TO_PAD_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h b/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h
new file mode 100644
index 000000000..8c8900159
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
+#define __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to substitute certain SplitV to Split.
+ */
+struct SubstituteSplitVToSplitPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstituteSplitVToSplitPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteSqueezeToReshapePass.h b/compiler/luci/pass/include/luci/Pass/SubstituteSqueezeToReshapePass.h
new file mode 100644
index 000000000..d8df6ac3f
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstituteSqueezeToReshapePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Substitute Squeeze to Reshape node for certain conditions.
+ */
+struct SubstituteSqueezeToReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstituteSqueezeToReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_SQUEEZE_TO_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteStridedSliceToReshapePass.h b/compiler/luci/pass/include/luci/Pass/SubstituteStridedSliceToReshapePass.h
new file mode 100644
index 000000000..a8229a6cd
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstituteStridedSliceToReshapePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_STRIDED_SLICE_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_STRIDED_SLICE_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to substitute Strided_Slice with certain condition to single reshape node.
+ */
+struct SubstituteStridedSliceToReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstituteStridedSliceToReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_STRIDED_SLICE_TO_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteTransposeToReshapePass.h b/compiler/luci/pass/include/luci/Pass/SubstituteTransposeToReshapePass.h
new file mode 100644
index 000000000..ee708585a
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstituteTransposeToReshapePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Substitute Transpose with certain input shape condition to single reshape node.
+ */
+struct SubstituteTransposeToReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstituteTransposeToReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_TRANSPOSE_TO_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/TransformMinMaxToRelu6Pass.h b/compiler/luci/pass/include/luci/Pass/TransformMinMaxToRelu6Pass.h
new file mode 100644
index 000000000..9ea39ee4e
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/TransformMinMaxToRelu6Pass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
+#define __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to transform Maximum(Minimum(input, 6), 0) to Relu6
+ */
+struct TransformMinMaxToRelu6Pass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::TransformMinMaxToRelu6Pass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_TRANSFORM_MIN_MAX_TO_RELU6_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/TransformMinReluToRelu6Pass.h b/compiler/luci/pass/include/luci/Pass/TransformMinReluToRelu6Pass.h
new file mode 100644
index 000000000..2c83e6e85
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/TransformMinReluToRelu6Pass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TRANSFORM_MIN_RELU_TO_RELU6_PASS_H__
+#define __LUCI_TRANSFORM_MIN_RELU_TO_RELU6_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to transform Relu(Minimum(input, 6)) to Relu6
+ */
+struct TransformMinReluToRelu6Pass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::TransformMinReluToRelu6Pass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_TRANSFORM_MIN_RELU_TO_RELU6_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h b/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h
deleted file mode 100644
index c607ac63f..000000000
--- a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h
+++ /dev/null
@@ -1,42 +0,0 @@
-
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_TYPE_INFERENCE_PASS_H__
-#define __LUCI_TYPE_INFERENCE_PASS_H__
-
-#include <loco.h>
-
-#include <logo/Pass.h>
-
-namespace luci
-{
-
-/**
- * @brief Pass to infer type of nodes
- */
-class TypeInferencePass : public logo::Pass
-{
-public:
-  virtual const char *name(void) const { return "luci::TypeInferencePass"; }
-
-public:
-  bool run(loco::Graph *graph);
-};
-
-} // namespace luci
-
-#endif //__LUCI_TYPE_INFERENCE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h b/compiler/luci/pass/include/luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h
new file mode 100644
index 000000000..fd5a708e8
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
+#define __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Unroll UnidirectionalSequenceLSTM
+ */
+struct UnrollUnidirectionalSequenceLSTMPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::UnrollUnidirectionalSequenceLSTMPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.cpp
new file mode 100644
index 000000000..e3f126b15
--- /dev/null
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchNormPatternFinder.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta)
+{
+  auto x = loco::must_cast<luci::CircleNode *>(add->x());
+  auto y = loco::must_cast<luci::CircleNode *>(add->y());
+
+  luci::CircleMul *pred = nullptr;
+  luci::CircleConst *constant = nullptr;
+
+  if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL)
+  {
+    pred = loco::must_cast<luci::CircleMul *>(y);
+    constant = loco::must_cast<luci::CircleConst *>(x);
+  }
+  else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST)
+  {
+    pred = loco::must_cast<luci::CircleMul *>(x);
+    constant = loco::must_cast<luci::CircleConst *>(y);
+  }
+  else
+  {
+    return false;
+  }
+
+  uint32_t channel_dim = 0;
+
+  if (constant->rank() == 1)
+  {
+    channel_dim = constant->dim(0).value();
+  }
+  else if (constant->rank() == 4)
+  {
+    for (uint32_t i = 0; i < 3; i++)
+    {
+      if (constant->dim(i).value() != 1)
+        return false;
+    }
+    channel_dim = constant->dim(3).value();
+  }
+  else
+  {
+    return false;
+  }
+
+  // Assumption: Layout is channel-last
+  if (!(channel_dim == add->dim(add->rank() - 1)))
+    return false;
+
+  mul = pred;
+  beta = constant;
+  return true;
+}
+
+bool is_batchnorm_add(const luci::CircleAdd *add)
+{
+  // for dummy mul and beta
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  return is_batchnorm_add(add, mul, beta);
+}
+
+bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
+                      luci::CircleConst *&gamma)
+{
+  auto x = dynamic_cast<luci::CircleConst *>(mul->x());
+  auto y = dynamic_cast<luci::CircleConst *>(mul->y());
+
+  luci::CircleNode *pred = nullptr;
+  luci::CircleConst *constant = nullptr;
+
+  if (x != nullptr && y == nullptr)
+  {
+    pred = loco::must_cast<luci::CircleNode *>(mul->y());
+    constant = x;
+  }
+  else if (x == nullptr && y != nullptr)
+  {
+    pred = loco::must_cast<luci::CircleNode *>(mul->x());
+    constant = y;
+  }
+  else
+  {
+    return false;
+  }
+
+  uint32_t channel_dim = 0;
+
+  if (constant->rank() == 1)
+  {
+    channel_dim = constant->dim(0).value();
+  }
+  else if (constant->rank() == 4)
+  {
+    for (uint32_t i = 0; i < 3; i++)
+    {
+      if (constant->dim(i).value() != 1)
+        return false;
+    }
+    channel_dim = constant->dim(3).value();
+  }
+  else
+  {
+    return false;
+  }
+
+  // Assumption: Layout is channel-last
+  if (!(channel_dim == mul->dim(mul->rank() - 1)))
+    return false;
+
+  pred_node = pred;
+  gamma = constant;
+  return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.h b/compiler/luci/pass/src/BatchNormPatternFinder.h
new file mode 100644
index 000000000..58cdbb464
--- /dev/null
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
+#define __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+/**
+ * @brief Find Mul-Add pattern and return Mul and beta as BatchNorm
+ */
+bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta);
+
+/**
+ * @brief Find Mul-Add pattern
+ */
+bool is_batchnorm_add(const luci::CircleAdd *add);
+
+/**
+ * @brief Find Const-Mul pattern and return Node and gamma as BatchNorm
+ */
+bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
+                      luci::CircleConst *&gamma);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_BATCH_NORM_PATTERN_FINDER_H__
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp
new file mode 100644
index 000000000..cc8c5615f
--- /dev/null
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchNormPatternFinder.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace luci
+{
+namespace test
+{
+
+/**
+ * @brief Graphlet with Add and Const as beta from BatchNorm
+ */
+class AddBetaGraphlet
+{
+public:
+  AddBetaGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 shape, luci::FusedActFunc actf)
+  {
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add_beta = g->nodes()->create<luci::CircleConst>();
+
+    _add->dtype(loco::DataType::FLOAT32);
+    _add_beta->dtype(loco::DataType::FLOAT32);
+
+    _add->fusedActivationFunction(actf);
+
+    assert(shape.size() > 0);
+    auto last_it = std::prev(shape.end(), 1);
+    auto channel_size = *last_it;
+
+    _add->shape(shape);
+    set_beta_shape(channel_size);
+    _add_beta->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+      _add_beta->at<loco::DataType::FLOAT32>(i) = i;
+
+    _add->name("add");
+    _add_beta->name("add_beta");
+  }
+
+public:
+  luci::CircleAdd *add() { return _add; }
+
+protected:
+  virtual void set_beta_shape(uint32_t channel) = 0;
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_add_beta = nullptr;
+};
+
+class AddRank1BetaGraphlet : public AddBetaGraphlet
+{
+  void set_beta_shape(uint32_t channel) final { _add_beta->shape({channel}); }
+};
+
+class AddRank4BetaGraphlet : public AddBetaGraphlet
+{
+  void set_beta_shape(uint32_t channel) final { _add_beta->shape({1, 1, 1, channel}); }
+};
+
+/**
+ * @brief Graphlet with Mul and Const as gamma from BatchNorm
+ */
+class MulGammaGraphlet
+{
+public:
+  MulGammaGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 shape, luci::FusedActFunc actf)
+  {
+    _mul = g->nodes()->create<luci::CircleMul>();
+    _mul_gamma = g->nodes()->create<luci::CircleConst>();
+
+    _mul->dtype(loco::DataType::FLOAT32);
+    _mul_gamma->dtype(loco::DataType::FLOAT32);
+
+    _mul->fusedActivationFunction(actf);
+
+    assert(shape.size() > 0);
+    auto last_it = std::prev(shape.end(), 1);
+    auto channel_size = *last_it;
+
+    _mul->shape(shape);
+    set_gamma_shape(channel_size);
+    _mul_gamma->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+      _mul_gamma->at<loco::DataType::FLOAT32>(i) = i;
+
+    _mul->name("mul");
+    _mul_gamma->name("mul_gamma");
+  }
+
+public:
+  luci::CircleMul *mul(void) { return _mul; }
+
+protected:
+  virtual void set_gamma_shape(uint32_t channel) = 0;
+
+protected:
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleConst *_mul_gamma = nullptr;
+};
+
+class MulRank1GammaGraphlet : public MulGammaGraphlet
+{
+  void set_gamma_shape(uint32_t channel) final { _mul_gamma->shape({channel}); }
+};
+
+class MulRank4GammaGraphlet : public MulGammaGraphlet
+{
+  void set_gamma_shape(uint32_t channel) final { _mul_gamma->shape({1, 1, 1, channel}); }
+};
+
+/**
+ * @brief Graph of Mul-Add pattern from BatchNorm
+ */
+class MulAddGraph : public TestIOGraph, public AddRank1BetaGraphlet, public MulRank1GammaGraphlet
+{
+public:
+  MulAddGraph() = default;
+
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    MulRank1GammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
+    AddRank1BetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
+
+    // connect network
+    _mul->x(input());
+    _mul->y(_mul_gamma);
+    _add->x(_mul);
+    _add->y(_add_beta);
+    output()->from(_add);
+  }
+};
+
+class MulAddRank4Graph : public TestIOGraph,
+                         public AddRank4BetaGraphlet,
+                         public MulRank4GammaGraphlet
+{
+public:
+  MulAddRank4Graph() = default;
+
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    MulRank4GammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
+    AddRank4BetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
+
+    // connect network
+    _mul->x(input());
+    _mul->y(_mul_gamma);
+    _add->x(_mul);
+    _add->y(_add_beta);
+    output()->from(_add);
+  }
+};
+
+/**
+ * @brief Graph of Add with Const
+ */
+class AddGraph : public TestIOGraph, public AddRank1BetaGraphlet
+{
+public:
+  AddGraph() = default;
+
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    AddRank1BetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
+
+    // connect network
+    _add->x(input());
+    _add->y(_add_beta);
+    output()->from(_add);
+  }
+};
+
+class AddRank4Graph : public TestIOGraph, public AddRank4BetaGraphlet
+{
+public:
+  AddRank4Graph() = default;
+
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    AddRank4BetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
+
+    // connect network
+    _add->x(input());
+    _add->y(_add_beta);
+    output()->from(_add);
+  }
+};
+
+} // namespace test
+} // namespace luci
+
+class BatchNormPatternFinderMulAddTest : public ::testing::Test
+{
+public:
+  BatchNormPatternFinderMulAddTest() = default;
+
+protected:
+  luci::test::MulAddGraph _mag;
+  luci::test::MulAddRank4Graph _mag_r4;
+};
+
+class BatchNormPatternFinderAddTest : public ::testing::Test
+{
+public:
+  BatchNormPatternFinderAddTest() = default;
+
+protected:
+  luci::test::AddGraph _ag;
+  luci::test::AddRank4Graph _ag_r4;
+};
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add)
+{
+  _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  auto res = luci::is_batchnorm_add(_mag.add(), mul, beta);
+  ASSERT_TRUE(res);
+  ASSERT_NE(nullptr, mul);
+  ASSERT_NE(nullptr, beta);
+}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add2)
+{
+  _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  auto res = luci::is_batchnorm_add(_mag.add());
+  ASSERT_TRUE(res);
+}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add_rank4)
+{
+  _mag_r4.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  auto res = luci::is_batchnorm_add(_mag_r4.add(), mul, beta);
+  ASSERT_TRUE(res);
+  ASSERT_NE(nullptr, mul);
+  ASSERT_NE(nullptr, beta);
+}
+
+TEST_F(BatchNormPatternFinderAddTest, is_batchnorm_add_NEG)
+{
+  _ag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  auto res = luci::is_batchnorm_add(_ag.add(), mul, beta);
+  ASSERT_FALSE(res);
+}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul)
+{
+  _mag.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleNode *pred = nullptr;
+  luci::CircleConst *gamma = nullptr;
+
+  auto res = luci::is_batchnorm_mul(_mag.mul(), pred, gamma);
+  ASSERT_TRUE(res);
+  ASSERT_NE(nullptr, pred);
+  ASSERT_NE(nullptr, gamma);
+}
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul_rank4)
+{
+  _mag_r4.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleNode *pred = nullptr;
+  luci::CircleConst *gamma = nullptr;
+
+  auto res = luci::is_batchnorm_mul(_mag_r4.mul(), pred, gamma);
+  ASSERT_TRUE(res);
+  ASSERT_NE(nullptr, pred);
+  ASSERT_NE(nullptr, gamma);
+}
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 2ee759b4e..b011581af 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -16,30 +16,80 @@
 
 #include "luci/CircleOptimizer.h"
 
-#include "luci/Pass/FuseBatchNormWithTConv.h"
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "luci/Pass/ExpandBroadcastConstPass.h"
+#include "luci/Pass/FoldAddV2Pass.h"
+#include "luci/Pass/FoldCastPass.h"
+#include "luci/Pass/FoldDensifyPass.h"
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
+#include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/FoldFullyConnectedPass.h"
+#include "luci/Pass/FoldGatherPass.h"
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+#include "luci/Pass/ForwardTransposeOpPass.h"
+#include "luci/Pass/FuseActivationFunctionPass.h"
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
+#include "luci/Pass/FuseAddWithTConvPass.h"
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
 #include "luci/Pass/FuseBCQPass.h"
 #include "luci/Pass/FuseInstanceNormPass.h"
+#include "luci/Pass/FuseMeanWithMeanPass.h"
+#include "luci/Pass/FusePreActivationBatchNormPass.h"
+#include "luci/Pass/FusePReluPass.h"
+#include "luci/Pass/FuseGeluPass.h"
+#include "luci/Pass/FuseTransposeWithMeanPass.h"
+#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+#include "luci/Pass/RemoveDuplicateConstPass.h"
+#include "luci/Pass/RemoveFakeQuantPass.h"
+#include "luci/Pass/RemoveQuantDequantSeqPass.h"
+#include "luci/Pass/RemoveRedundantReshapePass.h"
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
+#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h"
+#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
+#include "luci/Pass/ReplaceSubWithAddPass.h"
 #include "luci/Pass/ResolveCustomOpAddPass.h"
 #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
 #include "luci/Pass/ResolveCustomOpMatMulPass.h"
-#include "luci/Pass/RequantizePass.h"
-#include "luci/Pass/QuantizeWithMinMaxPass.h"
-#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+#include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
+#include "luci/Pass/SparsifyTensorPass.h"
+#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
+#include "luci/Pass/SubstitutePackToReshapePass.h"
+#include "luci/Pass/SubstitutePadV2ToPadPass.h"
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+#include "luci/Pass/SubstituteStridedSliceToReshapePass.h"
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
+#include "luci/Pass/TransformMinReluToRelu6Pass.h"
+#include "luci/Pass/DecomposeHardSwishPass.h"
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
 // TODO add more passes
 
-#include "luci/Pass/ShapeInferencePass.h"
-#include "luci/Pass/TypeInferencePass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
 
 // logo passes
 #include <logo/RemoveDeadNodeWithQueryPass.h>
 
+#include "ModulePhase.h"
 #include "ProgressReporter.h"
-#include "CircleOptimizerUtils.h"
 
 #include <luci/IR/CircleNodes.h>
 #include <logo/Phase.h>
+#include <pepper/csv2vec.h>
 
 #include <memory>
+#include <sstream>
 
 namespace
 {
@@ -88,6 +138,46 @@ bool OptimizeOptionsImpl::query(Algorithm algo)
   return true;
 }
 
+// TODO Make a struct for args
+void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output, bool fuse_fc,
+                          bool fuse_gelu)
+{
+  logo::Phase phase;
+
+  phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+  // Resolve custom Ops
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpAddPass>());
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpBatchMatMulPass>());
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpMatMulPass>());
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>());
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpSplitVPass>());
+
+  // Fuse FullyConnected with Add
+  // Why we perform FuseAddWithFullyConnectedPass before ConvertNCHWToNHWCPass?
+  // FullyConnected Op's layout is not changed in ConvertNCHWToNHWCPass, while
+  // Add Op's layer is changed from NCHW to NHWC.
+  // This disables fusion of Add and FullyConnected after ConvertNCHWToNHWC.
+  if (fuse_fc)
+    phase.emplace_back(std::make_unique<luci::FuseAddWithFullyConnectedPass>());
+
+  // Fuse decomposed ops to Gelu Op
+  // Why here? ConverNCHWToNHWCPass inserts additional Ops, so it is better to fuse
+  // Gelu in advance.
+  if (fuse_gelu)
+    phase.emplace_back(std::make_unique<luci::FuseGeluPass>());
+
+  phase.emplace_back(
+    std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output));
+
+  ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.attach(&prog);
+  phase_runner.run(phase);
+}
+
 } // namespace
 
 namespace luci
@@ -103,11 +193,50 @@ CircleOptimizer::Options *CircleOptimizer::options(void)
   return _options.get();
 }
 
+void CircleOptimizer::optimize(luci::Module *m) const
+{
+  luci::Phase phase;
+
+  // Following passes are needed everytime when other passes create new node or modify some nodes.
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+  if (_options->query(Options::Algorithm::FuseBCQ))
+  {
+    phase.emplace_back(std::make_unique<FuseBCQPass>());
+  }
+
+  ModuleProgressReporter prog(m, logo::PhaseStrategy::Restart);
+  PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{m};
+  phase_runner.attach(&prog);
+  phase_runner.run(phase);
+}
+
 void CircleOptimizer::optimize(loco::Graph *g) const
 {
   logo::Phase phase;
 
+  // Conversion from NCHW to NHWC is done first to avoid interference with other optimizations.
+  if (_options->query(Options::Algorithm::ConvertNCHWToNHWC))
+  {
+    bool preserve_input =
+      _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_input_shape) != "true";
+    bool preserve_output =
+      _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_output_shape) != "true";
+
+    bool fuse_fc = _options->query(Options::Algorithm::FuseAddWithFullyConnected);
+    bool fuse_gelu = _options->query(Options::Algorithm::FuseGelu);
+
+    convert_nchw_to_nhwc(g, preserve_input, preserve_output, fuse_fc, fuse_gelu);
+  }
+
   /* TRANSFORM DECLARATION BEGIN */
+  phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
+
+  // Following passes are needed everytime when other passes create new node or modify some nodes.
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
   if (_options->query(Options::Algorithm::ResolveCustomOpAdd))
   {
     phase.emplace_back(std::make_unique<luci::ResolveCustomOpAddPass>());
@@ -120,128 +249,255 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::ResolveCustomOpMatMulPass>());
   }
+  if (_options->query(Options::Algorithm::FuseMeanWithMean))
+  {
+    phase.emplace_back(std::make_unique<FuseMeanWithMeanPass>());
+  }
+  if (_options->query(Options::Algorithm::ResolveCustomOpMaxPoolWithArgmax))
+  {
+    phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>());
+  }
+  if (_options->query(Options::Algorithm::ResolveCustomOpSplitV))
+  {
+    phase.emplace_back(std::make_unique<luci::ResolveCustomOpSplitVPass>());
+  }
   if (_options->query(Options::Algorithm::FuseInstanceNorm))
   {
     phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
   }
-  if (_options->query(Options::Algorithm::FuseBCQ))
+  if (_options->query(Options::Algorithm::FuseBatchNormWithConv))
   {
-    phase.emplace_back(std::make_unique<FuseBCQPass>());
+    phase.emplace_back(std::make_unique<FuseBatchNormWithConvPass>());
+  }
+  if (_options->query(Options::Algorithm::FuseBatchNormWithDwConv))
+  {
+    phase.emplace_back(std::make_unique<FuseBatchNormWithDwConvPass>());
   }
   if (_options->query(Options::Algorithm::FuseBatchNormWithTConv))
   {
     phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
   }
+  if (_options->query(Options::Algorithm::FuseAddWithFullyConnected))
+  {
+    phase.emplace_back(std::make_unique<FuseAddWithFullyConnectedPass>());
+  }
+  if (_options->query(Options::Algorithm::FuseAddWithTConv))
+  {
+    phase.emplace_back(std::make_unique<FuseAddWithTConvPass>());
+  }
+  if (_options->query(Options::Algorithm::FuseActivationFunction))
+  {
+    phase.emplace_back(std::make_unique<FuseActivationFunctionPass>());
+  }
+  if (_options->query(Options::Algorithm::FusePRelu))
+  {
+    phase.emplace_back(std::make_unique<FusePReluPass>());
+  }
+  if (_options->query(Options::Algorithm::FuseGelu))
+  {
+    phase.emplace_back(std::make_unique<FuseGeluPass>());
+  }
+  if (_options->query(Options::Algorithm::FuseTransposeWithMean))
+  {
+    phase.emplace_back(std::make_unique<FuseTransposeWithMeanPass>());
+  }
+  if (_options->query(Options::Algorithm::FoldAddV2))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldAddV2Pass>());
+  }
+  if (_options->query(Options::Algorithm::FoldCast))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldCastPass>());
+  }
+  if (_options->query(Options::Algorithm::FoldDensify))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldDensifyPass>());
+  }
+  if (_options->query(Options::Algorithm::FoldDepthwiseConv2D))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldDepthwiseConv2DPass>());
+  }
+  if (_options->query(Options::Algorithm::FoldDequantize))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
+  }
+  if (_options->query(Options::Algorithm::FoldFullyConnected))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldFullyConnectedPass>());
+  }
+  if (_options->query(Options::Algorithm::FoldGather))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldGatherPass>());
+  }
+  if (_options->query(Options::Algorithm::FoldSparseToDense))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldSparseToDensePass>());
+  }
+  if (_options->query(Options::Algorithm::FusePreActivationBatchNorm))
+  {
+    phase.emplace_back(std::make_unique<luci::FusePreActivationBatchNormPass>());
+  }
+  if (_options->query(Options::Algorithm::MakeBatchNormGammaPositive))
+  {
+    phase.emplace_back(std::make_unique<luci::MakeBatchNormGammaPositivePass>());
+  }
+  if (_options->query(Options::Algorithm::ShuffleWeightTo16x1Float32))
+  {
+    phase.emplace_back(std::make_unique<luci::ShuffleWeightTo16x1Float32Pass>());
+  }
+  if (_options->query(Options::Algorithm::ExpandBroadcastConst))
+  {
+    phase.emplace_back(std::make_unique<luci::ExpandBroadcastConstPass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveDuplicateConst))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveDuplicateConstPass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveFakeQuant))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveFakeQuantPass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveQuantDequantSeq))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveQuantDequantSeqPass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveUnnecessaryReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapePass>());
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapeNetPass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveUnnecessarySlice))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessarySlicePass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveUnnecessaryStridedSlice))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryStridedSlicePass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveUnnecessarySplit))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessarySplitPass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveRedundantReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantReshapePass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveRedundantTranspose))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>());
+  }
+  if (_options->query(Options::Algorithm::RemoveRedundantQuantize))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+  }
+  if (_options->query(Options::Algorithm::ReplaceNonConstFCWithBatchMatMul))
+  {
+    phase.emplace_back(std::make_unique<luci::ReplaceNonConstFCWithBatchMatMulPass>());
+  }
+  if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv))
+  {
+    phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>());
+  }
+  if (_options->query(Options::Algorithm::ReplaceSubWithAdd))
+  {
+    phase.emplace_back(std::make_unique<luci::ReplaceSubWithAddPass>());
+  }
+  if (_options->query(Options::Algorithm::SubstitutePackToReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstitutePackToReshapePass>());
+  }
+  if (_options->query(Options::Algorithm::SubstitutePadV2ToPad))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstitutePadV2ToPadPass>());
+  }
+  if (_options->query(Options::Algorithm::SubstituteSplitVToSplit))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstituteSplitVToSplitPass>());
+  }
+  if (_options->query(Options::Algorithm::SubstituteSqueezeToReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstituteSqueezeToReshapePass>());
+  }
+  if (_options->query(Options::Algorithm::SubstituteStridedSliceToReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstituteStridedSliceToReshapePass>());
+  }
+  if (_options->query(Options::Algorithm::SubstituteTransposeToReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstituteTransposeToReshapePass>());
+  }
+  if (_options->query(Options::Algorithm::TransformMinMaxToRelu6Pass))
+  {
+    phase.emplace_back(std::make_unique<luci::TransformMinMaxToRelu6Pass>());
+  }
+  if (_options->query(Options::Algorithm::TransformMinReluToRelu6Pass))
+  {
+    phase.emplace_back(std::make_unique<luci::TransformMinReluToRelu6Pass>());
+  }
+  if (_options->query(Options::Algorithm::DecomposeHardSwishPass))
+  {
+    phase.emplace_back(std::make_unique<luci::DecomposeHardSwishPass>());
+  }
+  if (_options->query(Options::Algorithm::UnrollUnidirSeqLSTM))
+  {
+    phase.emplace_back(std::make_unique<luci::UnrollUnidirectionalSequenceLSTMPass>());
+  }
+  // Forward Reshape/Transpose is done after
+  // 1. SubstituteXXXToReshape
+  // 2. RemoveRedundantReshape/Transpose
+  // See https://github.com/Samsung/ONE/pull/10596 for more details
+  if (_options->query(Options::Algorithm::ForwardReshapeToUnaryOp))
+  {
+    phase.emplace_back(std::make_unique<luci::ForwardReshapeToUnaryOpPass>());
+  }
+  if (_options->query(Options::Algorithm::ForwardTransposeOp))
+  {
+    phase.emplace_back(std::make_unique<luci::ForwardTransposeOpPass>());
+  }
 
-  // Shape inference is needed for added nodes doing above transformations
-  phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
-  phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
-  phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
   /* TRANSFORM DECLARATION END */
 
-  ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
-  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+  ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
   phase_runner.attach(&prog);
   phase_runner.run(phase);
 }
 
-void CircleOptimizer::quantize(loco::Graph *g) const
+void CircleOptimizer::sparsify(loco::Graph *g) const
 {
-  // Fake quantization of weights
-  if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
+  if (_options->query(Options::Algorithm::SparsifyTensorPass))
   {
-    static const std::vector<std::string> fakeq_supported_input_dtype{"float32"};
-    static const std::vector<std::string> fakeq_supported_output_dtype{"uint8"};
-    static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
-
-    auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
-    auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
-    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
-
-    if (!in_array(to_lower_case(input_dtype), fakeq_supported_input_dtype))
-      throw std::runtime_error("Unsupported input type. List of supported input type: " +
-                               to_string(fakeq_supported_input_dtype));
-
-    if (!in_array(to_lower_case(output_dtype), fakeq_supported_output_dtype))
-      throw std::runtime_error("Unsupported output type. List of supported output type: " +
-                               to_string(fakeq_supported_output_dtype));
-
-    if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
-      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
-                               to_string(fakeq_supported_granularity));
-
-    // Clear existing quantparams before doing fake quantization
-    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    std::string tensor_name = _options->param(Options::AlgorithmParameters::Sparsify_tensor_name);
+    std::string str_tarversal_order =
+      _options->param(Options::AlgorithmParameters::Sparsify_traversal_order);
+    std::string str_format = _options->param(Options::AlgorithmParameters::Sparsify_format);
+    std::string str_block_size = _options->param(Options::AlgorithmParameters::Sparsify_block_size);
+    std::string str_block_map = _options->param(Options::AlgorithmParameters::Sparsify_block_map);
+
+    // traversal order
+    std::vector<int32_t> traversal_order = pepper::csv_to_vector<int32_t>(str_tarversal_order);
+    // format
+    std::vector<DimensionType> format;
+    std::istringstream is(str_format);
+    for (char c; is >> c;)
     {
-      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-      if (circle_node->quantparam() != nullptr)
-        circle_node->quantparam(nullptr);
+      assert(c != ',');
+      if (c == 'd')
+        format.push_back(DimensionType::DENSE);
+      else if (c == 's')
+        format.push_back(DimensionType::SPARSE_CSR);
+      if (is.peek() == ',')
+        is.ignore();
     }
-
-    luci::QuantizeDequantizeWeightsPass fake_quantizer(
-        str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
-    fake_quantizer.run(g);
-  }
-
-  // Actual quantization of weights, bias, and activation
-  if (_options->query(Options::Algorithm::QuantizeWithMinMax))
-  {
-    static const std::vector<std::string> qwmm_supported_input_dtype{"float32"};
-    static const std::vector<std::string> qwmm_supported_output_dtype{"uint8"};
-    static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
-
-    auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
-    auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
-    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
-
-    if (!in_array(to_lower_case(input_dtype), qwmm_supported_input_dtype))
-      throw std::runtime_error("Unsupported input type. List of supported input types: " +
-                               to_string(qwmm_supported_input_dtype));
-
-    if (!in_array(to_lower_case(output_dtype), qwmm_supported_output_dtype))
-      throw std::runtime_error("Unsupported output type. List of supported output types: " +
-                               to_string(qwmm_supported_output_dtype));
-
-    if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
-      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
-                               to_string(qwmm_supported_granularity));
-
-    luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype),
-                                           str_to_granularity(granularity));
-    quantizer.run(g);
+    // block size
+    std::vector<int32_t> block_size = pepper::csv_to_vector<int32_t>(str_block_size);
+    // block map
+    std::vector<int32_t> block_map = pepper::csv_to_vector<int32_t>(str_block_map);
+
+    luci::SparsifyTensorPass sparsifier{tensor_name, traversal_order, format, block_size,
+                                        block_map};
+    sparsifier.run(g);
   }
-
-  // Requantize
-  if (_options->query(Options::Algorithm::Requantize))
-  {
-    static const std::vector<std::string> rq_supported_input_dtype{"int8"};
-    static const std::vector<std::string> rq_supported_output_dtype{"uint8"};
-
-    auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
-    auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
-
-    if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype))
-      throw std::runtime_error("Unsupported input type. List of supported input types: " +
-                               to_string(rq_supported_input_dtype));
-
-    if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype))
-      throw std::runtime_error("Unsupported output type. List of supported output types: " +
-                               to_string(rq_supported_output_dtype));
-
-    luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype));
-    requantizer.run(g);
-  }
-
-  logo::Phase phase;
-
-  // Do Shape/Type inference
-  phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
-  phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
-
-  ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
-  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
-  phase_runner.attach(&prog);
-  phase_runner.run(phase);
 }
 
 } // namespace luci
diff --git a/compiler/luci/pass/src/CircleOptimizer.test.cpp b/compiler/luci/pass/src/CircleOptimizer.test.cpp
new file mode 100644
index 000000000..041fc7d75
--- /dev/null
+++ b/compiler/luci/pass/src/CircleOptimizer.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleOptimizer.h"
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+using Algorithms = luci::CircleOptimizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+
+TEST(CircleOptimizerTest, optimize_algorithms)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  // NOTE these are added to cover the test
+  // TODO add more if needed
+  options->enable(Algorithms::FoldAddV2);
+  options->enable(Algorithms::FoldCast);
+  options->enable(Algorithms::FoldDepthwiseConv2D);
+  options->enable(Algorithms::FoldDequantize);
+  options->enable(Algorithms::FoldSparseToDense);
+  options->enable(Algorithms::FusePreActivationBatchNorm);
+  options->enable(Algorithms::MakeBatchNormGammaPositive);
+  options->enable(Algorithms::ShuffleWeightTo16x1Float32);
+  options->enable(Algorithms::RemoveUnnecessaryReshape);
+  options->enable(Algorithms::RemoveUnnecessarySlice);
+  options->enable(Algorithms::RemoveUnnecessarySplit);
+  options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv);
+  options->enable(Algorithms::SubstituteStridedSliceToReshape);
+  options->enable(Algorithms::SubstituteTransposeToReshape);
+  options->enable(Algorithms::ConvertNCHWToNHWC);
+  options->enable(Algorithms::ExpandBroadcastConst);
+
+  o.optimize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleOptimizerTest, sparsify_simple)
+{
+  loco::Graph g;
+  luci::CircleOptimizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::SparsifyTensorPass);
+  options->param(AlgorithmParameters::Sparsify_tensor_name, "dummy");
+  options->param(AlgorithmParameters::Sparsify_traversal_order, "dummy");
+  options->param(AlgorithmParameters::Sparsify_format, "ds");
+  options->param(AlgorithmParameters::Sparsify_block_size, "1,1");
+  options->param(AlgorithmParameters::Sparsify_block_map, "1,1");
+
+  o.sparsify(&g);
+
+  SUCCEED();
+}
diff --git a/compiler/luci/pass/src/CircleOptimizerUtils.cpp b/compiler/luci/pass/src/CircleOptimizerUtils.cpp
index ffc372392..127573db4 100644
--- a/compiler/luci/pass/src/CircleOptimizerUtils.cpp
+++ b/compiler/luci/pass/src/CircleOptimizerUtils.cpp
@@ -16,74 +16,18 @@
 
 #include "CircleOptimizerUtils.h"
 
-namespace luci
-{
-
-bool in_array(const std::string &str, const std::vector<std::string> &array)
-{
-  return std::find(array.begin(), array.end(), str) != array.end();
-}
+#include <luci/IR/CircleNode.h>
 
-std::string to_string(const std::vector<std::string> &strings)
-{
-  assert(!strings.empty());
-
-  std::string res;
-  for (unsigned int i = 0; i < strings.size() - 1; i++)
-    res += strings[i] + ", ";
-
-  res += strings[strings.size() - 1];
-  return res;
-}
-
-std::string to_lower_case(std::string s)
-{
-  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
-  return s;
-}
-
-loco::DataType str_to_dtype(const std::string &str)
+namespace luci
 {
-  if (to_lower_case(str).compare("uint8") == 0)
-    return loco::DataType::U8;
-  if (to_lower_case(str).compare("uint16") == 0)
-    return loco::DataType::U16;
-  if (to_lower_case(str).compare("uint32") == 0)
-    return loco::DataType::U32;
-  if (to_lower_case(str).compare("uint64") == 0)
-    return loco::DataType::U64;
-
-  if (to_lower_case(str).compare("int8") == 0)
-    return loco::DataType::S8;
-  if (to_lower_case(str).compare("int16") == 0)
-    return loco::DataType::S16;
-  if (to_lower_case(str).compare("int32") == 0)
-    return loco::DataType::S32;
-  if (to_lower_case(str).compare("int64") == 0)
-    return loco::DataType::S64;
-
-  if (to_lower_case(str).compare("float16") == 0)
-    return loco::DataType::FLOAT16;
-  if (to_lower_case(str).compare("float32") == 0)
-    return loco::DataType::FLOAT32;
-  if (to_lower_case(str).compare("float64") == 0)
-    return loco::DataType::FLOAT64;
 
-  if (to_lower_case(str).compare("bool") == 0)
-    return loco::DataType::BOOL;
-
-  return loco::DataType::Unknown;
-}
-
-QuantizationGranularity str_to_granularity(const std::string &str)
+bool has_dynamic_shape(const loco::Node *node)
 {
-  if (to_lower_case(str).compare("layer") == 0)
-    return QuantizationGranularity::LayerWise;
-
-  if (to_lower_case(str).compare("channel") == 0)
-    return QuantizationGranularity::ChannelWise;
-
-  throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+  const auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+  for (uint32_t i = 0; i < circle_node->rank(); ++i)
+    if (!circle_node->dim(i).known())
+      return true;
+  return false;
 }
 
 } // namespace luci
diff --git a/compiler/luci/pass/src/CircleOptimizerUtils.h b/compiler/luci/pass/src/CircleOptimizerUtils.h
index 7e577a05f..e04942bfa 100644
--- a/compiler/luci/pass/src/CircleOptimizerUtils.h
+++ b/compiler/luci/pass/src/CircleOptimizerUtils.h
@@ -17,25 +17,12 @@
 #ifndef __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
 #define __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
 
-#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
-#include "luci/Pass/QuantizeWithMinMaxPass.h"
-
 #include <loco.h>
 
-#include <algorithm>
-
 namespace luci
 {
 
-bool in_array(const std::string &, const std::vector<std::string> &);
-
-std::string to_string(const std::vector<std::string> &);
-
-std::string to_lower_case(std::string);
-
-loco::DataType str_to_dtype(const std::string &);
-
-QuantizationGranularity str_to_granularity(const std::string &);
+bool has_dynamic_shape(const loco::Node *node);
 
 } // namespace luci
 
diff --git a/compiler/luci/pass/src/CircleQuantizer.cpp b/compiler/luci/pass/src/CircleQuantizer.cpp
new file mode 100644
index 000000000..6db26d179
--- /dev/null
+++ b/compiler/luci/pass/src/CircleQuantizer.cpp
@@ -0,0 +1,663 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleQuantizer.h"
+
+#include "luci/Pass/CopyQuantParamPass.h"
+#include "luci/Pass/ForceQuantParamPass.h"
+#include "luci/Pass/PropagateQParamForwardPass.h"
+#include "luci/Pass/RequantizePass.h"
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
+#include "luci/Pass/QuantizePreCheckerPass.h"
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+#include "luci/Pass/QuantizeWeightsPass.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+// logo passes
+#include <logo/RemoveDeadNodeWithQueryPass.h>
+
+#include "ProgressReporter.h"
+#include "helpers/Strings.h"
+
+#include "QuantizedModelVerifier.h"
+
+#include <luci/IR/CircleNode.h>
+#include <logo/Phase.h>
+#include <pepper/csv2vec.h>
+
+#include <memory>
+
+namespace
+{
+
+using namespace luci;
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+
+// This function updates user-given input_type to match with the input signature of graph
+// If user gives only one input_type, it will be expanded to the number of graph inputs
+void canonicalize_input_type(loco::Graph *g, std::vector<loco::DataType> &input_type)
+{
+  if (g == nullptr)
+    return;
+
+  const auto inputs = g->inputs();
+
+  assert(inputs); // FIX_CALLER_UNLESS
+
+  // Check validity of the number of input dtype given by a user
+  if (input_type.size() != 1 and input_type.size() != inputs->size())
+  {
+    throw std::runtime_error(
+      "Invalid number of input dtype. The number of input dtype should be 1 or "
+      "the same as the number of graph inputs.");
+  }
+
+  // Handle the case when a user gives only one input dtype
+  if (input_type.size() == 1)
+  {
+    const auto user_given_dtype = input_type[0];
+    input_type.clear();
+
+    // Expand input dtype to the number of graph inputs
+    // Since quantizer can only quantize float32, user_given_dtype is set only for float32 inputs
+    auto input_nodes = loco::input_nodes(g);
+    for (uint32_t i = 0; i < input_nodes.size(); i++)
+    {
+      auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+
+      if (input->dtype() == loco::DataType::FLOAT32)
+        input_type.push_back(user_given_dtype);
+      else
+        input_type.push_back(input->dtype());
+    }
+  }
+
+  // Finally, check validity of input_type
+  // input_type is valid if
+  // C1. for non-float32 model input, input_type == model's input dtype
+  // or
+  // C2. for float32 model input, input_type == uint8, int16, or float32
+  auto input_nodes = loco::input_nodes(g);
+  for (uint32_t i = 0; i < input_nodes.size(); i++)
+  {
+    auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+    assert(i == input->index()); // FIX_ME_UNLESS
+
+    if (input->dtype() != loco::DataType::FLOAT32)
+    {
+      // C1
+      if (input->dtype() != input_type[i])
+        throw std::runtime_error(
+          "Input dtype of " + input->name() +
+          " is invalid. It has to be the same with the model's input dtype.");
+    }
+    else
+    {
+      // C2
+      if (input_type[i] != loco::DataType::FLOAT32 and input_type[i] != loco::DataType::U8 and
+          input_type[i] != loco::DataType::S16)
+      {
+        throw std::runtime_error("Input dtype of " + input->name() +
+                                 " is invalid. For float32 input, the input dtype after "
+                                 "quantization must be one of uint8, int16, or float32.");
+      }
+    }
+  }
+}
+
+// This function updates user-given output_type to match with the output signature of graph
+// If user gives only one output_type, it will be expanded to the number of graph outputs
+// NOTE This function is almost same with canonicalize_input_type, but it is written as a
+// separate function for more precise error messaging.
+// TODO Find a way to reduce duplicate codes
+void canonicalize_output_type(loco::Graph *g, std::vector<loco::DataType> &output_type)
+{
+  if (g == nullptr)
+    return;
+
+  const auto outputs = g->outputs();
+
+  assert(outputs); // FIX_CALLER_UNLESS
+
+  // Check validity of the number of output dtype given by a user
+  if (output_type.size() != 1 and output_type.size() != outputs->size())
+  {
+    throw std::runtime_error(
+      "Invalid number of output dtype. The number of output dtype should be 1 or "
+      "the same as the number of graph outputs.");
+  }
+
+  // Handle the case when a user gives only one output dtype
+  if (output_type.size() == 1)
+  {
+    const auto user_given_dtype = output_type[0];
+    output_type.clear();
+
+    // Expand output dtype to the number of graph outputs
+    // If dtype of graph output is float32, it will be replaced with user_given_dtype
+    // Otherwise, it will not change
+    auto output_nodes = loco::output_nodes(g);
+    for (uint32_t i = 0; i < output_nodes.size(); i++)
+    {
+      auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+
+      if (output->dtype() == loco::DataType::FLOAT32)
+        output_type.push_back(user_given_dtype);
+      else
+        output_type.push_back(output->dtype());
+    }
+  }
+
+  // Finally, check validity of output_type
+  // output_type is valid if
+  // C1. for non-float32 model output, output_type == model's output dtype
+  // or
+  // C2. for float32 model output, output_type == uint8, int16, or float32
+  auto output_nodes = loco::output_nodes(g);
+  for (uint32_t i = 0; i < output_nodes.size(); i++)
+  {
+    auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+    assert(i == output->index()); // FIX_ME_UNLESS
+
+    if (output->dtype() != loco::DataType::FLOAT32)
+    {
+      // C1
+      if (output->dtype() != output_type[i])
+        throw std::runtime_error(
+          "Output dtype of " + output->name() +
+          " is invalid. It has to be the same with the model's output dtype.");
+    }
+    else
+    {
+      // C2
+      if (output_type[i] != loco::DataType::FLOAT32 and output_type[i] != loco::DataType::U8 and
+          output_type[i] != loco::DataType::S16)
+      {
+        throw std::runtime_error("Output dtype of " + output->name() +
+                                 " is invalid. For float32 output, the output dtype after "
+                                 "quantization must be one of uint8, int16, or float32.");
+      }
+    }
+  }
+}
+
+template <typename T> T lexical_cast(const std::string &str)
+{
+  std::istringstream ss;
+  ss.str(str);
+  T data;
+  ss >> data;
+  return data;
+}
+
+template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv)
+{
+  std::vector<T> result;
+  std::transform(sv.begin(), sv.end(), std::back_inserter(result),
+                 [](std::string str) -> T { return lexical_cast<T>(str); });
+  return result;
+}
+
+class QuantizeOptionsImpl final : public luci::CircleQuantizer::Options
+{
+public:
+  void enable(Algorithm) final;
+  void param(AlgorithmParameters, const std::string &) final;
+  const std::string param(AlgorithmParameters) const final;
+  void params(AlgorithmParameters, std::vector<std::string> &) final;
+  std::vector<std::string> params(AlgorithmParameters) const final;
+  void layer_params(AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>> &) final;
+  std::vector<std::shared_ptr<LayerParam>> layer_params(AlgorithmParameters) const final;
+  bool query(Algorithm) final;
+
+private:
+  std::vector<Algorithm> _algorithms;
+  std::map<AlgorithmParameters, const std::string> _algorithm_params;
+  std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
+  std::map<AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>>> _layer_params;
+};
+
+void QuantizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
+
+void QuantizeOptionsImpl::param(AlgorithmParameters param, const std::string &str)
+{
+  _algorithm_params.insert(std::pair<AlgorithmParameters, const std::string>(param, str));
+}
+
+const std::string QuantizeOptionsImpl::param(AlgorithmParameters param) const
+{
+  auto param_str = _algorithm_params.find(param);
+  if (param_str != _algorithm_params.end())
+  {
+    return param_str->second;
+  }
+  else
+  {
+    return std::string();
+  }
+}
+
+void QuantizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec)
+{
+  _multiple_params[param] = vec;
+}
+
+std::vector<std::string> QuantizeOptionsImpl::params(AlgorithmParameters param) const
+{
+  auto param_vec = _multiple_params.find(param);
+  if (param_vec != _multiple_params.end())
+  {
+    return param_vec->second;
+  }
+  else
+  {
+    return std::vector<std::string>();
+  }
+}
+
+void QuantizeOptionsImpl::layer_params(AlgorithmParameters param,
+                                       std::vector<std::shared_ptr<LayerParam>> &vec)
+{
+  _layer_params[param] = vec;
+}
+
+std::vector<std::shared_ptr<LayerParam>>
+QuantizeOptionsImpl::layer_params(AlgorithmParameters param) const
+{
+  auto param_vec = _layer_params.find(param);
+  if (param_vec != _layer_params.end())
+  {
+    return param_vec->second;
+  }
+  else
+  {
+    return std::vector<std::shared_ptr<LayerParam>>();
+  }
+}
+
+bool QuantizeOptionsImpl::query(Algorithm algo)
+{
+  std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
+  if (it == _algorithms.end())
+    return false;
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleQuantizer::Options *CircleQuantizer::options(void)
+{
+  if (_options == nullptr)
+  {
+    _options = std::make_unique<QuantizeOptionsImpl>();
+  }
+
+  return _options.get();
+}
+
+void CircleQuantizer::quantize(loco::Graph *g) const
+{
+  // Fake quantization of weights
+  if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
+  {
+    static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"};
+    static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"};
+    static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
+
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+    auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
+
+    if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input type: " +
+                               to_string(fakeq_supported_input_model_dtype));
+
+    if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output type: " +
+                               to_string(fakeq_supported_output_model_dtype));
+
+    if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
+      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+                               to_string(fakeq_supported_granularity));
+
+    if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
+        str_to_dtype(output_model_dtype) != loco::DataType::U8)
+      throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
+
+    // Check dtype/granularity of layer params
+    for (auto layer_param : layer_params)
+    {
+      const auto &name = layer_param->name;
+      if (!in_array(to_lower_case(layer_param->dtype), fakeq_supported_output_model_dtype))
+      {
+        throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " +
+                                 to_string(fakeq_supported_output_model_dtype));
+      }
+      if (!in_array(to_lower_case(layer_param->granularity), fakeq_supported_granularity))
+      {
+        throw std::runtime_error(
+          "Unsupported granularity in " + name +
+          ". List of supported granularity: " + to_string(fakeq_supported_granularity));
+      }
+    }
+
+    // Clear existing quantparams before doing fake quantization
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+      if (circle_node->quantparam() != nullptr)
+        circle_node->quantparam(nullptr);
+    }
+
+    auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
+    {
+      ctx->input_model_dtype = str_to_dtype(input_model_dtype);
+      ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+      ctx->granularity = str_to_granularity(granularity);
+
+      for (auto layer_param : layer_params)
+      {
+        LayerInfo info;
+        {
+          info.name = layer_param->name;
+          info.dtype = str_to_dtype(layer_param->dtype);
+          info.granularity = str_to_granularity(layer_param->granularity);
+        }
+        ctx->layers_info.emplace_back(info);
+      }
+    }
+
+    luci::QuantizeDequantizeWeightsPass fake_quantizer(std::move(ctx));
+
+    fake_quantizer.run(g);
+  }
+
+  // Actual quantization of weights, bias, and activation
+  if (_options->query(Options::Algorithm::QuantizeWithMinMax))
+  {
+    static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
+    static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
+    static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
+    static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16",   "int32",
+                                                                    "int64", "float32", "bool"};
+    static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16",   "int32",
+                                                                     "int64", "float32", "bool"};
+
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+    auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
+    if (input_type.empty())
+      input_type = output_model_dtype;
+    auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
+    if (output_type.empty())
+      output_type = output_model_dtype;
+
+    auto input_type_vec = pepper::csv_to_vector<std::string>(input_type);
+    auto output_type_vec = pepper::csv_to_vector<std::string>(output_type);
+
+    bool TF_style_maxpool =
+      _options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) == "True";
+
+    auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
+
+    if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                               to_string(qwmm_supported_input_model_dtype));
+
+    if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                               to_string(qwmm_supported_output_model_dtype));
+
+    if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
+      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+                               to_string(qwmm_supported_granularity));
+
+    for (const auto &dtype : input_type_vec)
+    {
+      if (!in_array(to_lower_case(dtype), qwmm_supported_input_type))
+        throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                                 to_string(qwmm_supported_input_type));
+    }
+
+    for (const auto &dtype : output_type_vec)
+    {
+      if (!in_array(to_lower_case(dtype), qwmm_supported_output_type))
+        throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                                 to_string(qwmm_supported_output_type));
+    }
+
+    if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
+        str_to_dtype(output_model_dtype) != loco::DataType::U8)
+      throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
+
+    // Check dtype/granularity of layer params
+    for (auto layer_param : layer_params)
+    {
+      auto name = layer_param->name;
+      if (!in_array(to_lower_case(layer_param->dtype), qwmm_supported_output_model_dtype))
+      {
+        throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " +
+                                 to_string(qwmm_supported_output_model_dtype));
+      }
+      if (!in_array(to_lower_case(layer_param->granularity), qwmm_supported_granularity))
+      {
+        throw std::runtime_error(
+          "Unsupported granularity in " + name +
+          ". List of supported granularity: " + to_string(qwmm_supported_granularity));
+      }
+    }
+
+    auto input_types = str_vec_to_dtype_vec(input_type_vec);
+    auto output_types = str_vec_to_dtype_vec(output_type_vec);
+
+    // Canonicalize user-given input/output_type (match with # of inputs/outputs)
+    canonicalize_input_type(g, input_types);
+    canonicalize_output_type(g, output_types);
+
+    // Input model checker for quantization
+    luci::QuantizePreCheckerPass input_model_checker{};
+    input_model_checker.run(g);
+
+    auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+    {
+      ctx->input_model_dtype = str_to_dtype(input_model_dtype);
+      ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+      ctx->granularity = str_to_granularity(granularity);
+      ctx->input_types = input_types;
+      ctx->output_types = output_types;
+      ctx->TF_style_maxpool = TF_style_maxpool;
+
+      for (auto layer_param : layer_params)
+      {
+        LayerInfo info;
+        {
+          info.name = layer_param->name;
+          info.dtype = str_to_dtype(layer_param->dtype);
+          info.granularity = str_to_granularity(layer_param->granularity);
+        }
+        ctx->layers_info.emplace_back(info);
+      }
+    }
+
+    luci::QuantizeWithMinMaxPass quantizer(std::move(ctx));
+
+    quantizer.run(g);
+
+    auto verify_ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+    {
+      verify_ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+      verify_ctx->granularity = str_to_granularity(granularity);
+      verify_ctx->input_types = input_types;
+      verify_ctx->output_types = output_types;
+      verify_ctx->TF_style_maxpool = TF_style_maxpool;
+
+      for (auto layer_param : layer_params)
+      {
+        LayerInfo info;
+        {
+          info.name = layer_param->name;
+          info.dtype = str_to_dtype(layer_param->dtype);
+          info.granularity = str_to_granularity(layer_param->granularity);
+        }
+        verify_ctx->layers_info.emplace_back(info);
+      }
+    }
+
+    // Verify the type/granularity of the quantized model
+    luci::QuantizedModelVerifier verifier(std::move(verify_ctx));
+
+    verifier.verify(g);
+  }
+
+  if (_options->query(Options::Algorithm::QuantizeWeights))
+  {
+    static const std::vector<std::string> qw_supported_input_model_dtype{"float32"};
+    static const std::vector<std::string> qw_supported_output_model_dtype{"int8", "int16"};
+    static const std::vector<std::string> qw_supported_granularity{"channel"};
+
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+
+    if (!in_array(to_lower_case(input_model_dtype), qw_supported_input_model_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input type: " +
+                               to_string(qw_supported_input_model_dtype));
+
+    if (!in_array(to_lower_case(output_model_dtype), qw_supported_output_model_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output type: " +
+                               to_string(qw_supported_output_model_dtype));
+
+    if (!in_array(to_lower_case(granularity), qw_supported_granularity))
+      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+                               to_string(qw_supported_granularity));
+    auto ctx = std::make_unique<luci::QuantizeWeightsPass::Context>();
+    {
+      ctx->input_model_dtype = str_to_dtype(input_model_dtype);
+      ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+      ctx->granularity = str_to_granularity(granularity);
+    }
+    luci::QuantizeWeightsPass weights_quantizer(std::move(ctx));
+
+    weights_quantizer.run(g);
+  }
+
+  // Requantize
+  if (_options->query(Options::Algorithm::Requantize))
+  {
+    static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
+    static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
+
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+
+    if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                               to_string(rq_supported_input_model_dtype));
+
+    if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                               to_string(rq_supported_output_model_dtype));
+
+    luci::RequantizePass requantizer(str_to_dtype(input_model_dtype),
+                                     str_to_dtype(output_model_dtype));
+    requantizer.run(g);
+  }
+
+  // Force to write quantparam to specified tensors
+  // NOTE Only per-tensor (not per-channel) qparam can be written
+  if (_options->query(Options::Algorithm::ForceQuantParam))
+  {
+    ForceQuantParamPass::TensorVector tensors =
+      _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
+    auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
+    auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
+
+    // Cast scales/zero_points to proper types
+    ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales);
+    ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points);
+
+    ForceQuantParamPass fq(tensors, scales, zero_points);
+    fq.run(g);
+  }
+
+  // Copy quantparam of a tensor to another tensor
+  if (_options->query(Options::Algorithm::CopyQuantParam))
+  {
+    CopyQuantParamPass::TensorVector src_tensors =
+      _options->params(Options::AlgorithmParameters::Quantize_src_tensor_names);
+    CopyQuantParamPass::TensorVector dst_tensors =
+      _options->params(Options::AlgorithmParameters::Quantize_dst_tensor_names);
+
+    CopyQuantParamPass cq(src_tensors, dst_tensors);
+    cq.run(g);
+  }
+
+  // Convert quantized model to fake-quantized model
+  if (_options->query(Options::Algorithm::ConvertToFakeQuantizedModel))
+  {
+    luci::ConvertToFakeQuantizedModelPass fake_quantizer;
+    fake_quantizer.run(g);
+
+    logo::Phase phase;
+
+    // Default passes
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
+    phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+    phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+    // Remove redundant Dequantize Ops generated during fake quantization
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantDequantizePass>());
+    // Fold Dequantize Ops generated during fake quantization
+    phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
+
+    ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+    logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+    phase_runner.attach(&prog);
+    phase_runner.run(phase);
+  }
+
+  logo::Phase phase;
+
+  // Do Shape/Type inference
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+  ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+  phase_runner.attach(&prog);
+  phase_runner.run(phase);
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/CircleQuantizer.test.cpp b/compiler/luci/pass/src/CircleQuantizer.test.cpp
new file mode 100644
index 000000000..5766d5fe5
--- /dev/null
+++ b/compiler/luci/pass/src/CircleQuantizer.test.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleQuantizer.h"
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+
+TEST(CircleQuantizerTest, quantize_quantdequant_simple)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_gran_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_simple)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_gran_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_requant_simple)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_requant_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_requant_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
diff --git a/compiler/luci/pass/src/CircleShapeInferencePass.cpp b/compiler/luci/pass/src/CircleShapeInferencePass.cpp
new file mode 100644
index 000000000..ddab22421
--- /dev/null
+++ b/compiler/luci/pass/src/CircleShapeInferencePass.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "helpers/InferenceCandidates.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco.h>
+
+namespace
+{
+
+bool is_same_shape(luci::CircleNode *node, loco::TensorShape shape)
+{
+  if (node->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+
+  if (node->rank() != shape.rank())
+    return false;
+
+  for (uint32_t i = 0; i < node->rank(); ++i)
+  {
+    if (node->dim(i).known() != shape.dim(i).known())
+      return false;
+
+    if (node->dim(i).value() != shape.dim(i).value())
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool CircleShapeInferencePass::run(luci::Module *m)
+{
+  bool changed = false;
+
+  for (size_t g = 0; g < m->size(); ++g)
+  {
+    if (run(m->graph(g)))
+      changed = true;
+  }
+
+  return changed;
+}
+
+bool CircleShapeInferencePass::run(loco::Graph *g)
+{
+  luci::sinf::Rule shape_infer_rule;
+  bool changed = false;
+
+  for (auto node : inference_candidates(g))
+  {
+    loco::TensorShape shape;
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+    if (shape_infer_rule.infer(circle_node, shape) && !is_same_shape(circle_node, shape))
+    {
+      circle_node->rank(shape.rank());
+      for (uint32_t i = 0; i < shape.rank(); ++i)
+        circle_node->dim(i) = shape.dim(i);
+
+      circle_node->shape_status(luci::ShapeStatus::VALID);
+
+      changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/CircleShapeInferencePass.test.cpp b/compiler/luci/pass/src/CircleShapeInferencePass.test.cpp
new file mode 100644
index 000000000..cb3f1fe5f
--- /dev/null
+++ b/compiler/luci/pass/src/CircleShapeInferencePass.test.cpp
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(CircleShapeInferencePassTest, name)
+{
+  luci::CircleShapeInferencePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+/**
+ * This test is to check whether shape inference is done by topological order.
+ *
+ * When perm() of "transpose1" is changed from "old_perm" to "new_perm"
+ * by some of luci/Pass like below diagram, shape_status of "transpose1" is
+ * still VALID even the shape should be changed.
+ * If "transpose2" is visited first before shape of "transpose1" is updated,
+ * "transpose2" can reference the shape of "relu" which is not updated yet.
+ * Then shape of "transpose2" becomes 3x5x5x1 and it causes an error at "conv2d".
+ *
+ * <Initial graph>
+ *                                                4x1x1x3
+ * [old_perm] ----------+              [filter] ----------+
+ * (0,2,1,3)            |                                 |
+ *                      |               [bias]  ----------+
+ *                      |                                 |
+ *  input  ------> [transpose1] ------> [relu] ------> [conv2d] ------>  output
+ *         1x5x5x3              1x5x5x3        1x5x5x3          1x5x5x4
+ *
+ *
+ * <Right after transformation>
+ *                                                                        4x1x1x3
+ * [new_perm] ----------+-----------------------------------+    [filter] ------+
+ * (3,2,1,0)            |                                   |                   |
+ *                      |                                   |      [bias] ------+
+ *                      |                                   |                   |
+ *  input  ------> [transpose1] ------> [relu] ------> [transpose2] ------> [conv2d] ------>  output
+ *         1x5x5x3              1x5x5x3        1x5x5x3                 ?             1x5x5x4
+ *
+ *
+ * <Expected result>
+ *                                                                        4x1x1x3
+ * [new_perm] ----------+-----------------------------------+    [filter] ------+
+ * (3,2,1,0)            |                                   |                   |
+ *                      |                                   |      [bias] ------+
+ *                      |                                   |                   |
+ *  input  ------> [transpose1] ------> [relu] ------> [transpose2] ------> [conv2d] ------>  output
+ *         1x5x5x3              3x5x5x1        3x5x5x1              1x5x5x3          1x5x5x4
+ *
+ */
+TEST(CircleShapeInferencePassTest, original_node_change)
+{
+  luci::CircleShapeInferencePass pass;
+  auto g = loco::make_graph();
+
+  // Have to be packed into lambda to check throw
+  auto shape_inference_run = [&]() {
+    while (pass.run(g.get()) == true)
+      ;
+  };
+
+  // Create nodes to make relu traversed first
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto relu = g->nodes()->create<luci::CircleRelu>();
+  auto old_perm = g->nodes()->create<luci::CircleConst>();
+  auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+  auto filter = g->nodes()->create<luci::CircleConst>();
+  auto bias = g->nodes()->create<luci::CircleConst>();
+  auto conv2d = g->nodes()->create<luci::CircleConv2D>();
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  auto new_perm = g->nodes()->create<luci::CircleConst>();
+  auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+
+  // Build up initial graph
+  auto graph_input = g->inputs()->create();
+  graph_input->shape({1, 5, 5, 3});
+
+  input->index(graph_input->index());
+  input->shape({1, 5, 5, 3});
+  input->shape_status(luci::ShapeStatus::VALID);
+
+  old_perm->dtype(loco::DataType::S32);
+  old_perm->size<loco::DataType::S32>(4);
+  old_perm->shape({4});
+  old_perm->at<loco::DataType::S32>(0) = 0;
+  old_perm->at<loco::DataType::S32>(1) = 2;
+  old_perm->at<loco::DataType::S32>(2) = 1;
+  old_perm->at<loco::DataType::S32>(3) = 3;
+  old_perm->shape_status(luci::ShapeStatus::VALID);
+
+  transpose1->a(input);
+  transpose1->perm(old_perm);
+
+  relu->features(transpose1);
+
+  filter->dtype(loco::DataType::FLOAT32);
+  filter->size<loco::DataType::FLOAT32>(4 * 1 * 1 * 3);
+  filter->shape({4, 1, 1, 3});
+  filter->shape_status(luci::ShapeStatus::VALID);
+
+  bias->dtype(loco::DataType::FLOAT32);
+  bias->size<loco::DataType::FLOAT32>(4);
+  bias->shape({4});
+  bias->shape_status(luci::ShapeStatus::VALID);
+
+  conv2d->input(relu);
+  conv2d->filter(filter);
+  conv2d->bias(bias);
+  conv2d->padding(luci::Padding::VALID);
+  conv2d->stride()->h(1);
+  conv2d->stride()->w(1);
+  conv2d->dilation()->h(1);
+  conv2d->dilation()->w(1);
+
+  output->from(conv2d);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  graph_output->shape({1, 5, 5, 4});
+
+  ASSERT_NO_THROW(shape_inference_run());
+
+  // Transform graph
+  new_perm->dtype(loco::DataType::S32);
+  new_perm->size<loco::DataType::S32>(4);
+  new_perm->shape({4});
+  new_perm->at<loco::DataType::S32>(0) = 3;
+  new_perm->at<loco::DataType::S32>(1) = 2;
+  new_perm->at<loco::DataType::S32>(2) = 1;
+  new_perm->at<loco::DataType::S32>(3) = 0;
+  new_perm->shape_status(luci::ShapeStatus::VALID);
+
+  transpose1->perm(new_perm);
+
+  transpose2->a(relu);
+  transpose2->perm(new_perm);
+
+  conv2d->input(transpose2);
+
+  ASSERT_NO_THROW(shape_inference_run());
+
+  // Check result of shape inference is correct
+  ASSERT_EQ(3, transpose1->dim(0).value());
+  ASSERT_EQ(5, transpose1->dim(1).value());
+  ASSERT_EQ(5, transpose1->dim(2).value());
+  ASSERT_EQ(1, transpose1->dim(3).value());
+
+  ASSERT_EQ(3, relu->dim(0).value());
+  ASSERT_EQ(5, relu->dim(1).value());
+  ASSERT_EQ(5, relu->dim(2).value());
+  ASSERT_EQ(1, relu->dim(3).value());
+
+  ASSERT_EQ(1, transpose2->dim(0).value());
+  ASSERT_EQ(5, transpose2->dim(1).value());
+  ASSERT_EQ(5, transpose2->dim(2).value());
+  ASSERT_EQ(3, transpose2->dim(3).value());
+
+  ASSERT_EQ(1, conv2d->dim(0).value());
+  ASSERT_EQ(5, conv2d->dim(1).value());
+  ASSERT_EQ(5, conv2d->dim(2).value());
+  ASSERT_EQ(4, conv2d->dim(3).value());
+
+  SUCCEED();
+}
+
+/**
+ * This test is for checking when imported shape is wrong.
+ *
+ * Even "concat1" has wrong shape at first, correct shape should be inferred.
+ *
+ * <Initial graph>
+ *
+ *         1x1x1x1
+ *  input1 ------+                 8x7x6x5
+ *               +-----> [concat1] ------+
+ *  input2 ------+       (axis=3)        |                  1x1x2x3
+ *         1x1x1x2                       +------> [concat2] ------> output
+ *                                       |        (axis=2)
+ *                     1x1x1x3           |
+ *  input3 ------------------------------+
+ *
+ *
+ * <Expected result>
+ *
+ *         1x1x1x1
+ *  input1 ------+                 1x1x1x3
+ *               +-----> [concat1] ------+
+ *  input2 ------+       (axis=3)        |                  1x1x2x3
+ *         1x1x1x2                       +------> [concat2] ------> output
+ *                                       |        (axis=2)
+ *                     1x1x1x3           |
+ *  input3 ------------------------------+
+ */
+TEST(CircleShapeInferencePassTest, wrong_imported_shape)
+{
+  luci::CircleShapeInferencePass pass;
+  auto g = loco::make_graph();
+
+  // Have to be packed into lambda to check throw
+  auto shape_inference_run = [&]() {
+    while (pass.run(g.get()) == true)
+      ;
+  };
+
+  // Create nodes to make concat2 traversed first
+  auto concat2 = g->nodes()->create<luci::CircleConcatenation>(2);
+  auto concat1 = g->nodes()->create<luci::CircleConcatenation>(2);
+  auto input1 = g->nodes()->create<luci::CircleInput>();
+  auto input2 = g->nodes()->create<luci::CircleInput>();
+  auto input3 = g->nodes()->create<luci::CircleInput>();
+
+  // Build up initial graph
+  auto graph_input1 = g->inputs()->create();
+  auto graph_input2 = g->inputs()->create();
+  auto graph_input3 = g->inputs()->create();
+  graph_input1->shape({1, 1, 1, 1});
+  graph_input2->shape({1, 1, 1, 2});
+  graph_input2->shape({1, 1, 1, 3});
+
+  input1->index(graph_input1->index());
+  input1->shape({1, 1, 1, 1});
+  input1->shape_status(luci::ShapeStatus::VALID);
+
+  input2->index(graph_input2->index());
+  input2->shape({1, 1, 1, 2});
+  input2->shape_status(luci::ShapeStatus::VALID);
+
+  input3->index(graph_input3->index());
+  input3->shape({1, 1, 1, 3});
+  input3->shape_status(luci::ShapeStatus::VALID);
+
+  concat1->values(0, input1);
+  concat1->values(1, input2);
+  concat1->axis(3);
+  concat1->shape({8, 7, 6, 5}); // Intentionally set wrong shape
+  concat1->shape_status(luci::ShapeStatus::VALID);
+
+  concat2->values(0, concat1);
+  concat2->values(1, input3);
+  concat2->axis(2);
+
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  output->from(concat2);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  graph_output->shape({1, 1, 2, 3});
+
+  ASSERT_NO_THROW(shape_inference_run());
+
+  // Check result of shape inference is correct
+  ASSERT_EQ(1, concat1->dim(0).value());
+  ASSERT_EQ(1, concat1->dim(1).value());
+  ASSERT_EQ(1, concat1->dim(2).value());
+  ASSERT_EQ(3, concat1->dim(3).value());
+
+  ASSERT_EQ(1, concat2->dim(0).value());
+  ASSERT_EQ(1, concat2->dim(1).value());
+  ASSERT_EQ(2, concat2->dim(2).value());
+  ASSERT_EQ(3, concat2->dim(3).value());
+
+  SUCCEED();
+}
+
+/**
+ * This test is for checking that virtual operations which is not used for graph output
+ * but shape should be exported.
+ *
+ * Although "split_out2" is not used for graph output, shape should be inferenced.
+ *
+ * <Initial graph>
+ *
+ *
+ *          1x6                +----> [split_out1] ----> output
+ *  input ------> [split] -----+
+ *             (split_dim=1)   +----> [split_out2]
+ *             (num_split=2)
+ *
+ *
+ * <Expected result>
+ *                               1x3                1x3
+ *          1x6                +----> [split_out1] ----> output
+ *  input ------> [split] -----+
+ *             (split_dim=1)   +----> [split_out2]
+ *             (num_split=2)     1x3
+ */
+TEST(CircleShapeInferencePassTest, not_used_virtual_op)
+{
+  luci::CircleShapeInferencePass pass;
+  auto g = loco::make_graph();
+
+  // Have to be packed into lambda to check throw
+  auto shape_inference_run = [&]() {
+    while (pass.run(g.get()) == true)
+      ;
+  };
+
+  // Create nodes
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto split = g->nodes()->create<luci::CircleSplit>();
+  auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_dim = g->nodes()->create<luci::CircleConst>();
+
+  // Build up initial graph
+  auto graph_input1 = g->inputs()->create();
+  graph_input1->shape({1, 6});
+
+  input->index(graph_input1->index());
+  input->shape({1, 6});
+  input->shape_status(luci::ShapeStatus::VALID);
+
+  split_dim->dtype(loco::DataType::S32);
+  split_dim->size<loco::DataType::S32>(1);
+  split_dim->shape({1});
+  split_dim->at<loco::DataType::S32>(0) = 1;
+  split_dim->shape_status(luci::ShapeStatus::VALID);
+
+  split->split_dim(split_dim);
+  split->input(input);
+  split->num_split(2);
+
+  split_out1->input(split);
+  split_out1->index(0);
+
+  split_out2->input(split);
+  split_out2->index(1);
+
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  output->from(split_out1);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  graph_output->shape({1, 3});
+
+  ASSERT_NO_THROW(shape_inference_run());
+
+  // Check result of shape inference is correct
+  ASSERT_EQ(1, split_out1->dim(0).value());
+  ASSERT_EQ(3, split_out1->dim(1).value());
+
+  ASSERT_EQ(1, split_out2->dim(0).value());
+  ASSERT_EQ(3, split_out2->dim(1).value());
+
+  SUCCEED();
+}
diff --git a/compiler/luci/pass/src/CircleTypeInferencePass.cpp b/compiler/luci/pass/src/CircleTypeInferencePass.cpp
new file mode 100644
index 000000000..fb3755ffa
--- /dev/null
+++ b/compiler/luci/pass/src/CircleTypeInferencePass.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "helpers/InferenceCandidates.h"
+
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+#include <luci/Service/CircleTypeInference.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleTypeInferencePass::run(luci::Module *m)
+{
+  bool changed = false;
+
+  for (size_t g = 0; g < m->size(); ++g)
+  {
+    if (run(m->graph(g)))
+      changed = true;
+  }
+
+  return changed;
+}
+
+bool CircleTypeInferencePass::run(loco::Graph *g)
+{
+  luci::tinf::Rule type_infer_rule;
+  bool changed = false;
+
+  for (auto node : inference_candidates(g))
+  {
+    loco::DataType dtype;
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+    if (type_infer_rule.infer(circle_node, dtype) && circle_node->dtype() != dtype)
+    {
+      circle_node->dtype(dtype);
+      changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/CircleTypeInferencePass.test.cpp b/compiler/luci/pass/src/CircleTypeInferencePass.test.cpp
new file mode 100644
index 000000000..415424a6f
--- /dev/null
+++ b/compiler/luci/pass/src/CircleTypeInferencePass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleTypeInferencePassTest, name)
+{
+  luci::CircleTypeInferencePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
new file mode 100644
index 000000000..ac4320246
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
@@ -0,0 +1,1598 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <functional>
+
+namespace
+{
+
+// Return true if from can be broadcasted to to
+// to's shape is [N, C, H, W]
+bool broadcastable(const luci::CircleConst *from, const luci::CircleNode *to)
+{
+  assert(to->rank() == 4); // FIX_CALLER_UNLESS
+
+  const auto from_rank = from->rank();
+  if (from_rank > 4)
+    return false;
+
+  // Scalar is always broadcastable
+  if (from_rank == 0)
+    return true;
+
+  for (uint32_t i = 1; i <= from_rank; i++)
+  {
+    auto to_index = 4 - i;
+    auto from_index = from_rank - i;
+
+    if (from->dim(from_index).value() != to->dim(to_index).value() and
+        from->dim(from_index).value() != 1)
+      return false;
+  }
+
+  return true;
+}
+
+// Return node with rank 4
+// node should have rank less than or equal to 4
+// 1 is inserted to the front of shape if rank is less than 4
+// For example, [2] -> [1, 1, 1, 2]
+luci::CircleConst *expand_to_rank_4(luci::CircleConst *node)
+{
+  auto original_rank = node->rank();
+
+  assert(original_rank <= 4); // FIX_CALLER_UNLESS
+
+  if (original_rank == 4)
+    return node;
+
+  std::vector<uint32_t> original_shape;
+  for (uint32_t i = 0; i < original_rank; i++)
+  {
+    original_shape.emplace_back(node->dim(i).value());
+  }
+
+  auto cloned = luci::clone(node);
+  cloned->name(cloned->name() + "_rank4");
+
+  cloned->rank(4);
+  for (uint32_t i = 0; i < (4 - original_rank); i++)
+    cloned->dim(i) = 1;
+
+  for (uint32_t i = 0; i < original_rank; i++)
+    cloned->dim(i + (4 - original_rank)) = original_shape.at(i);
+
+  return cloned;
+}
+
+bool is_output(const loco::Node *node)
+{
+  auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+  auto opcode = cnode->opcode();
+  if (opcode == luci::CircleOpcode::CIRCLEOUTPUT ||
+      opcode == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    return true;
+
+  return false;
+}
+
+bool is_same_shape(const luci::CircleNode *node, const std::vector<loco::Dimension> &shape)
+{
+  if (not node)
+    return false;
+
+  if (shape.size() != node->rank())
+    return false;
+
+  for (uint32_t i = 0; i < shape.size(); i++)
+  {
+    if (not(node->dim(i) == shape[i]))
+      return false;
+  }
+  return true;
+}
+
+enum class DataFormat
+{
+  NCHW,
+  NHWC
+};
+
+/**
+ * @brief Set annotation for DataFormat (NCHW, NHWC)
+ *
+ * @note DataFormatAnnotation will live longer than this Pass (until the
+ *       annotated loco::Node is erased). So, do not use large data in the
+ *       annotation to avoid excessive memory usage.
+ */
+class DataFormatAnnotation final : public loco::NodeAnnotation
+{
+public:
+  DataFormatAnnotation(const DataFormat &format) : _format{format}
+  {
+    // DO NOTHING
+  }
+
+public:
+  const DataFormat &format(void) const { return _format; }
+
+private:
+  DataFormat _format;
+};
+
+void set_data_format(loco::Node *node, const DataFormat &format)
+{
+  node->annot(std::make_unique<DataFormatAnnotation>(format));
+}
+
+DataFormat get_data_format(loco::Node *node)
+{
+  assert(node->annot<DataFormatAnnotation>() != nullptr);
+  return node->annot<DataFormatAnnotation>()->format();
+}
+
+bool has_data_format(loco::Node *node) { return node->annot<DataFormatAnnotation>() != nullptr; }
+
+bool check_4d_transpose(loco::Node *node, const std::vector<int32_t> indices)
+{
+  assert(indices.size() == 4);
+
+  auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+  if (not trans)
+    return false;
+
+  if (not trans->perm())
+    return false;
+
+  auto perm = dynamic_cast<luci::CircleConst *>(trans->perm());
+  // Only const perm is supported
+  if (not perm)
+    return false;
+
+  if (perm->dtype() != loco::DataType::S32)
+    return false;
+
+  if (perm->size<loco::DataType::S32>() != 4)
+    return false;
+
+  for (uint32_t i = 0; i < 4; i++)
+  {
+    if (perm->at<loco::DataType::S32>(i) != indices[i])
+      return false;
+  }
+
+  return true;
+}
+
+luci::CircleTranspose *create_4d_transpose(luci::CircleNode *node,
+                                           const std::vector<int32_t> indices)
+{
+  assert(indices.size() == 4);
+
+  auto name = node->name();
+  assert(name.length() > 0);
+
+  auto perm = node->graph()->nodes()->create<luci::CircleConst>();
+  perm->dtype(loco::DataType::S32);
+  perm->size<loco::DataType::S32>(4);
+  perm->rank(1);
+  perm->dim(0) = 4;
+  for (uint32_t i = 0; i < 4; i++)
+    perm->at<loco::DataType::S32>(i) = indices[i];
+  perm->shape_status(luci::ShapeStatus::VALID);
+
+  auto make_string = [](const std::vector<int32_t> &nums) {
+    std::string str;
+    for (auto num : nums)
+    {
+      if (str.length() > 0)
+        str += ".";
+      str += std::to_string(num);
+    }
+    return str;
+  };
+
+  auto str_indices = make_string(indices);
+
+  perm->name(name + "/Transpose_" + str_indices + "/perm");
+
+  auto trans = node->graph()->nodes()->create<luci::CircleTranspose>();
+  trans->perm(perm);
+  trans->name(name + "/Transpose_" + str_indices);
+  luci::add_origin(trans, luci::get_origin(node));
+
+  return trans;
+}
+
+luci::CircleTranspose *create_Nd_transpose(luci::CircleNode *node,
+                                           const std::vector<int32_t> indices)
+{
+  auto name = node->name();
+  assert(name.length() > 0);
+
+  auto perm = node->graph()->nodes()->create<luci::CircleConst>();
+  perm->dtype(loco::DataType::S32);
+  perm->size<loco::DataType::S32>(indices.size());
+  perm->rank(1);
+  perm->dim(0) = indices.size();
+  for (uint32_t i = 0; i < indices.size(); i++)
+    perm->at<loco::DataType::S32>(i) = indices[i];
+  perm->shape_status(luci::ShapeStatus::VALID);
+
+  auto make_string = [](const std::vector<int32_t> &nums) {
+    std::string str;
+    for (auto num : nums)
+    {
+      if (str.length() > 0)
+        str += ".";
+      str += std::to_string(num);
+    }
+    return str;
+  };
+
+  auto str_indices = make_string(indices);
+
+  perm->name(name + "/Transpose_" + str_indices + "/perm");
+
+  auto trans = node->graph()->nodes()->create<luci::CircleTranspose>();
+  trans->perm(perm);
+  trans->name(name + "/Transpose_" + str_indices);
+  luci::add_origin(trans, luci::get_origin(node));
+
+  return trans;
+}
+
+int32_t nchw_axis_to_nhwc(int32_t axis)
+{
+  uint32_t pos_axis = axis >= 0 ? static_cast<uint32_t>(axis) : static_cast<uint32_t>(axis + 4);
+  static const uint32_t to_nhwc[4] = {0, 3, 1, 2};
+  if (pos_axis > 3)
+    throw std::runtime_error("Concat axis must be in range [-4, 4)");
+  return to_nhwc[pos_axis];
+}
+
+luci::CircleTranspose *create_post_transpose(luci::CircleNode *node)
+{
+  return create_4d_transpose(node, {0, 3, 1, 2});
+}
+
+luci::CircleTranspose *create_pre_transpose(luci::CircleNode *node)
+{
+  return create_4d_transpose(node, {0, 2, 3, 1});
+}
+
+bool check_4d_reshape(loco::Node *node, const std::vector<int32_t> indices)
+{
+  assert(indices.size() == 4); // FIX_CALLER_UNLESS
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(node);
+  if (not reshape)
+    return false;
+
+  if (reshape->rank() != 4)
+    return false;
+
+  auto input = loco::must_cast<luci::CircleNode *>(reshape->tensor());
+  if (input->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+
+  if (input->rank() != 4)
+    return false;
+
+  if (reshape->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+
+  if (!(input->dim(0) == reshape->dim(indices[0])) ||
+      !(input->dim(1) == reshape->dim(indices[1])) ||
+      !(input->dim(2) == reshape->dim(indices[2])) || !(input->dim(3) == reshape->dim(indices[3])))
+    return false;
+
+  return true;
+}
+
+// Check if Reshape that converts NCHW -> NHWC
+bool is_pre_reshape(loco::Node *node) { return check_4d_reshape(node, {0, 3, 1, 2}); }
+
+// Check if Reshape that converts NHWC -> NCHW
+bool is_post_reshape(loco::Node *node) { return check_4d_reshape(node, {0, 2, 3, 1}); }
+
+bool is_post_transpose(loco::Node *node) { return check_4d_transpose(node, {0, 3, 1, 2}); }
+
+bool is_pre_transpose(loco::Node *node) { return check_4d_transpose(node, {0, 2, 3, 1}); }
+
+uint32_t cal_offset(const loco::TensorShape &dimension, const uint32_t *indices)
+{
+  return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
+           dimension.dim(3).value() +
+         indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
+         indices[2] * dimension.dim(3).value() + indices[3];
+}
+
+luci::CircleConst *create_NHWC_paddings(luci::CircleConst *paddings)
+{
+  // paddings shape is (4,2) (it was checked by is_NCHW)
+  assert(paddings != nullptr);
+  assert(paddings->rank() == 2);
+  assert(paddings->dim(0).value() == 4);
+  assert(paddings->dim(1).value() == 2);
+
+  // paddings for idx 0~3 are 0 (checked by is_NCHW)
+  assert(paddings->at<loco::DataType::S32>(0) == 0);
+  assert(paddings->at<loco::DataType::S32>(1) == 0);
+  assert(paddings->at<loco::DataType::S32>(2) == 0);
+  assert(paddings->at<loco::DataType::S32>(3) == 0);
+
+  auto name = paddings->name();
+  assert(name.length() > 0);
+
+  auto nhwc_paddings = paddings->graph()->nodes()->create<luci::CircleConst>();
+  nhwc_paddings->dtype(loco::DataType::S32);
+  nhwc_paddings->shape({4, 2});
+  nhwc_paddings->shape_status(luci::ShapeStatus::VALID);
+  nhwc_paddings->size<loco::DataType::S32>(4 * 2);
+  nhwc_paddings->name(name + "_NHWC");
+
+  for (uint32_t dim = 0; dim < 4; dim++)
+  {
+    for (uint32_t i = 0; i < 2; i++)
+    {
+      int32_t data = 0;
+
+      if (dim == 1)
+      {
+        // get third dimension (H in NCHW)
+        data = paddings->at<loco::DataType::S32>(2 * 2 + i);
+      }
+      else if (dim == 2)
+      {
+        // get fourth dimension (W in NCHW)
+        data = paddings->at<loco::DataType::S32>(3 * 2 + i);
+      }
+
+      nhwc_paddings->at<loco::DataType::S32>(dim * 2 + i) = data;
+    }
+  }
+  return nhwc_paddings;
+}
+
+luci::CircleConst *create_NHWC_rindices(luci::CircleConst *rindices)
+{
+  assert(rindices != nullptr); // FIX_CALLER_UNLESS
+
+  if (rindices->dtype() != loco::DataType::S32)
+    return nullptr;
+
+  auto nhwc_rindices = luci::clone(rindices);
+  auto name = rindices->name();
+  assert(name.length() > 0); // FIX_CALLER_UNLESS
+  nhwc_rindices->name(name + "_NHWC");
+
+  auto size = nhwc_rindices->size<loco::DataType::S32>();
+  for (uint32_t i = 0; i < size; i++)
+  {
+    nhwc_rindices->at<loco::DataType::S32>(i) =
+      nchw_axis_to_nhwc(rindices->at<loco::DataType::S32>(i));
+  }
+
+  return nhwc_rindices;
+}
+
+luci::CircleConst *create_NHWC_from_NCHW(luci::CircleConst *constant)
+{
+  LOGGER(l);
+  assert(constant->rank() == 4);
+
+  // TODO: Support non-float types
+  if (constant->dtype() != loco::DataType::FLOAT32)
+  {
+    INFO(l) << "Non-float type constant: " << constant->name() << std::endl;
+    return nullptr;
+  }
+
+  loco::TensorShape nchw_dimension{constant->dim(0), constant->dim(1), constant->dim(2),
+                                   constant->dim(3)};
+  loco::TensorShape nhwc_dimension{constant->dim(0), constant->dim(2), constant->dim(3),
+                                   constant->dim(1)};
+
+  auto name = constant->name();
+  assert(name.length() > 0);
+
+  auto nhwc_const = constant->graph()->nodes()->create<luci::CircleConst>();
+  nhwc_const->dtype(constant->dtype());
+  nhwc_const->rank(4);
+  nhwc_const->dim(0).set(constant->dim(0).value());
+  nhwc_const->dim(1).set(constant->dim(2).value());
+  nhwc_const->dim(2).set(constant->dim(3).value());
+  nhwc_const->dim(3).set(constant->dim(1).value());
+  nhwc_const->shape_status(luci::ShapeStatus::VALID);
+  nhwc_const->size<loco::DataType::FLOAT32>(constant->size<loco::DataType::FLOAT32>());
+  nhwc_const->name(name + "_NHWC");
+
+  for (uint32_t n = 0; n < nchw_dimension.dim(0).value(); n++)
+  {
+    for (uint32_t c = 0; c < nchw_dimension.dim(1).value(); c++)
+    {
+      for (uint32_t h = 0; h < nchw_dimension.dim(2).value(); h++)
+      {
+        for (uint32_t w = 0; w < nchw_dimension.dim(3).value(); w++)
+        {
+          uint32_t nchw_indices[4] = {n, c, h, w};
+          uint32_t nhwc_indices[4] = {n, h, w, c};
+          auto data =
+            constant->at<loco::DataType::FLOAT32>(cal_offset(nchw_dimension, nchw_indices));
+          nhwc_const->at<loco::DataType::FLOAT32>(cal_offset(nhwc_dimension, nhwc_indices)) = data;
+        }
+      }
+    }
+  }
+  return nhwc_const;
+}
+
+// NOTE Following conditions can be extended later
+//
+// Find PAD with an NCHW pattern described below
+//   - Paddings shape : [4, 2]
+//   - Paddings value : [[0, 0], [0, 0], [h_t, h_b], [w_t, w_b]]]
+bool is_NCHW(const luci::CirclePad *node)
+{
+  const auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+  // Non-const paddings is not supported
+  if (paddings == nullptr)
+    return false;
+
+  if (paddings->rank() != 2)
+    return false;
+
+  if (paddings->dim(0).value() != 4 || paddings->dim(1).value() != 2)
+    return false;
+
+  // Only check the first two dimensions
+  for (uint32_t dim = 0; dim < 2; dim++)
+  {
+    for (uint32_t i = 0; i < 2; i++)
+    {
+      auto data = paddings->at<loco::DataType::S32>(dim * 2 + i);
+      if (data != 0)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+// NOTE Copied from is_NCHW(CirclePad)
+bool is_NCHW(const luci::CirclePadV2 *node)
+{
+  const auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+  // Non-const paddings is not supported
+  if (paddings == nullptr)
+    return false;
+
+  if (paddings->rank() != 2)
+    return false;
+
+  if (paddings->dim(0).value() != 4 || paddings->dim(1).value() != 2)
+    return false;
+
+  // Only check the first two dimensions
+  for (uint32_t dim = 0; dim < 2; dim++)
+  {
+    for (uint32_t i = 0; i < 2; i++)
+    {
+      auto data = paddings->at<loco::DataType::S32>(dim * 2 + i);
+      if (data != 0)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+bool is_const(const loco::Node *node)
+{
+  if (not dynamic_cast<const luci::CircleConst *>(node))
+    return false;
+
+  return true;
+}
+
+bool is_scalar_const(const loco::Node *node)
+{
+  auto const_node = dynamic_cast<const luci::CircleConst *>(node);
+  if (not const_node)
+    return false;
+
+  const auto const_rank = const_node->rank();
+  // shape of scalar
+  // 1. rank = 0
+  // 2. rank = 1, dimension = 1
+  if (const_rank == 0)
+    return true;
+
+  if (const_rank == 1 && const_node->dim(0).value() == 1)
+    return true;
+
+  return false;
+}
+
+// NOTE Following conditions can be extended later
+//
+// Find MUL with an NCHW pattern described below
+//   - Input (non-constant) shape : [N, C, H, W]
+//   - Input (constant) shape : broadcastable to [N, C, H, W]
+//   - Output shape : [N, C, H, W]
+bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node,
+                        luci::CircleConst *&multiplier)
+{
+  auto x = dynamic_cast<luci::CircleConst *>(node->x());
+  auto y = dynamic_cast<luci::CircleConst *>(node->y());
+
+  if (x != nullptr && y == nullptr)
+  {
+    pred_node = loco::must_cast<luci::CircleNode *>(node->y());
+    multiplier = x;
+  }
+  else if (x == nullptr && y != nullptr)
+  {
+    pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+    multiplier = y;
+  }
+  else
+  {
+    // Ignore if MUL does not have a multiplier input.
+    return false;
+  }
+
+  if (pred_node->rank() != 4)
+    return false;
+
+  if (not broadcastable(multiplier, node))
+    return false;
+
+  multiplier = expand_to_rank_4(multiplier);
+
+  return true;
+}
+
+// We assume ADD with const input is NCHW if,
+// Input shape: (N, C, H, W)
+// Output shape: (N, C, H, W)
+// 1. Const shape is (1, C, 1, 1), (N, C, H, W) or a scalar (1)
+// 2. Input, Output, Const have the same C.
+bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_node,
+                        luci::CircleConst *&beta)
+{
+  auto x = dynamic_cast<luci::CircleConst *>(node->x());
+  auto y = dynamic_cast<luci::CircleConst *>(node->y());
+
+  if (x != nullptr && y == nullptr)
+  {
+    pred_node = loco::must_cast<luci::CircleNode *>(node->y());
+    beta = x;
+  }
+  else if (x == nullptr && y != nullptr)
+  {
+    pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+    beta = y;
+  }
+  else
+  {
+    // Ignore if ADD does not have a constant input.
+    return false;
+  }
+
+  if (pred_node->rank() != 4)
+    return false;
+
+  if (not broadcastable(beta, node))
+    return false;
+
+  beta = expand_to_rank_4(beta);
+
+  return true;
+}
+
+// We assume SUB with const input is NCHW if,
+// Input shape: (N, C, H, W)
+// Output shape: (N, C, H, W)
+// 1. Const shape is (1, C, 1, 1), (N, C, H, W) or a scalar (1)
+// 2. Input, Output, Const have the same C.
+bool is_NCHW_with_const(const luci::CircleSub *node, const luci::CircleNode *pred_node,
+                        const luci::CircleConst *subtract)
+{
+  assert(pred_node != nullptr);
+  assert(subtract != nullptr);
+
+  if (pred_node->rank() != 4)
+    return false;
+
+  const auto const_rank = subtract->rank();
+  // Support Rank 4 or scalar (rank 0 or 1)
+  if (const_rank != 4 && const_rank != 0 && const_rank != 1)
+    return false;
+
+  const auto input_cdim = pred_node->dim(1);
+  const auto output_cdim = node->dim(1);
+
+  if (const_rank == 4)
+  {
+    bool supported_shape = false;
+
+    // Check subtract is (1, C, 1, 1)
+    if (is_same_shape(subtract, {1, node->dim(1), 1, 1}))
+      supported_shape = true;
+
+    // Check subtract is (N, C, H, W)
+    if (is_same_shape(subtract, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
+      supported_shape = true;
+
+    return supported_shape;
+  }
+  if (input_cdim == output_cdim)
+    return true;
+  else
+    return false;
+}
+
+template <class T> bool convert_unary_features(T *node)
+{
+  const auto pred_node = loco::must_cast<luci::CircleNode *>(node->features());
+  auto pre_trans = create_pre_transpose(node);
+  pre_trans->a(pred_node);
+  node->features(pre_trans);
+
+  // Do shape inference for this node again.
+  node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  auto post_trans = create_post_transpose(node);
+  loco::replace(node).with(post_trans);
+
+  post_trans->a(node);
+
+  return true;
+}
+
+template <class T> bool convert_unary_x(T *node)
+{
+  const auto pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+  auto pre_trans = create_pre_transpose(node);
+  pre_trans->a(pred_node);
+  node->x(pre_trans);
+
+  // Do shape inference for this node again.
+  node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  auto post_trans = create_post_transpose(node);
+  loco::replace(node).with(post_trans);
+
+  post_trans->a(node);
+
+  return true;
+}
+
+template <class T> bool convert_unary_logits(T *node)
+{
+  const auto pred_node = loco::must_cast<luci::CircleNode *>(node->logits());
+  auto pre_trans = create_pre_transpose(node);
+  pre_trans->a(pred_node);
+  node->logits(pre_trans);
+
+  // Do shape inference for this node again.
+  node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  auto post_trans = create_post_transpose(node);
+  loco::replace(node).with(post_trans);
+
+  post_trans->a(node);
+
+  return true;
+}
+
+class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
+{
+  // Default
+  bool visit(luci::CircleNode *node)
+  {
+    throw std::runtime_error(node->name() + " is an unsupported operator.");
+  }
+
+  bool visit(luci::CircleInput *node)
+  {
+    const auto n = node->dim(0);
+    const auto c = node->dim(1);
+    const auto h = node->dim(2);
+    const auto w = node->dim(3);
+
+    node->dim(1) = h;
+    node->dim(2) = w;
+    node->dim(3) = c;
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    // Insert post-tranpose
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    // Update graph input
+    auto graph_inputs = node->graph()->inputs();
+    auto graph_input = graph_inputs->at(node->index());
+    graph_input->shape({n, h, w, c});
+
+    return true;
+  }
+
+  bool visit(luci::CircleOutput *node)
+  {
+    // Insert pre-transpose
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(node->from());
+
+    node->from(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    // Update graph output
+    const auto n = node->dim(0).value();
+    const auto c = node->dim(1).value();
+    const auto h = node->dim(2).value();
+    const auto w = node->dim(3).value();
+
+    auto graph_outputs = node->graph()->outputs();
+    auto graph_output = graph_outputs->at(node->index());
+    graph_output->shape({n, h, w, c});
+
+    return true;
+  }
+
+  bool visit(luci::CircleAdd *node)
+  {
+    luci::CircleNode *pred_node = nullptr;
+    luci::CircleConst *beta = nullptr;
+
+    if (is_NCHW_with_const(node, pred_node, beta))
+    {
+      assert(beta->rank() == 4); // FIX is_NCHW_with_const unless
+      auto nhwc_const = create_NHWC_from_NCHW(beta);
+      if (nhwc_const == nullptr)
+        return false;
+      node->y(nhwc_const);
+
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(pred_node);
+      node->x(pre_trans);
+    }
+    else if (beta == nullptr)
+    {
+      // Both inputs are not constant.
+      // In this case, we cannot distinguish NCHW from NHWC,
+      // so just insert Transpose Ops.
+      auto pre_trans_x = create_pre_transpose(node);
+      pre_trans_x->a(node->x());
+      node->x(pre_trans_x);
+
+      auto pre_trans_y = create_pre_transpose(node);
+      pre_trans_y->a(node->y());
+      node->y(pre_trans_y);
+    }
+    else
+    {
+      return false;
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+    return true;
+  }
+
+  bool visit(luci::CircleConcatenation *node)
+  {
+    const auto num_values = node->numValues();
+    for (uint32_t i = 0; i < num_values; i++)
+    {
+      auto pred_node = loco::must_cast<luci::CircleNode *>(node->values(i));
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(pred_node);
+      node->values(i, pre_trans);
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    node->axis(nchw_axis_to_nhwc(node->axis()));
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  bool visit(luci::CircleElu *node) { return convert_unary_features<luci::CircleElu>(node); }
+
+  bool visit(luci::CircleGelu *node) { return convert_unary_features<luci::CircleGelu>(node); }
+
+  bool visit(luci::CircleLeakyRelu *node)
+  {
+    return convert_unary_features<luci::CircleLeakyRelu>(node);
+  }
+
+  bool visit(luci::CircleLogistic *node) { return convert_unary_x<luci::CircleLogistic>(node); }
+
+  bool visit(luci::CircleMaximum *node)
+  {
+    if ((not is_const(node->x())) and is_scalar_const(node->y()))
+    {
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(node->x());
+      node->x(pre_trans);
+    }
+    else if (is_scalar_const(node->x()) and (not is_const(node->y())))
+    {
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(node->y());
+      node->y(pre_trans);
+    }
+    else if ((not is_const(node->x())) and (not is_const(node->y())))
+    {
+      auto pre_trans_x = create_pre_transpose(node);
+      pre_trans_x->a(node->x());
+      node->x(pre_trans_x);
+
+      auto pre_trans_y = create_pre_transpose(node);
+      pre_trans_y->a(node->y());
+      node->y(pre_trans_y);
+    }
+    else
+    {
+      // TODO support other cases
+      return false;
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+    return true;
+  }
+
+  bool visit(luci::CircleMean *node)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(node->input());
+    if (input->rank() != 4)
+      return false;
+
+    auto rindices = dynamic_cast<luci::CircleConst *>(node->reduction_indices());
+    if (not rindices)
+      return false;
+
+    auto nhwc_rindices = create_NHWC_rindices(rindices);
+    if (not nhwc_rindices)
+      return false;
+
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(input);
+    node->input(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    node->reduction_indices(nhwc_rindices);
+
+    if (node->keep_dims())
+    {
+      auto post_trans = create_post_transpose(node);
+      loco::replace(node).with(post_trans);
+
+      post_trans->a(node);
+
+      return true;
+    }
+
+    // node->keep_dims() == false
+    // 1D output never needs a transpose
+    if (node->rank() <= 1)
+      return true;
+
+    std::vector<bool> reduced_dims_nhwc(4, false);
+    uint32_t num_reduced_indices = nhwc_rindices->size<loco::DataType::S32>();
+
+    for (uint32_t ri = 0; ri < num_reduced_indices; ++ri)
+    {
+      reduced_dims_nhwc[nhwc_rindices->at<loco::DataType::S32>(ri)] = true;
+    }
+
+    // if channel dimension has been reduced, we don't need a transpose
+    if (reduced_dims_nhwc[3])
+      return true;
+
+    // likewise, if both space dimensions are reduced, no transpose is needed
+    if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2])
+      return true;
+
+    std::vector<int32_t> post_trans_ind;
+    // case 1: only N is reduced
+    if (num_reduced_indices == 1 && reduced_dims_nhwc[0])
+      post_trans_ind = {2, 0, 1};
+
+    // case 2: only H or W is reduced
+    if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2]))
+      post_trans_ind = {0, 2, 1};
+
+    // case 3: N and either H or W are reduced
+    if (num_reduced_indices == 2)
+      post_trans_ind = {1, 0};
+
+    auto post_trans = create_Nd_transpose(node, post_trans_ind);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  bool visit(luci::CircleMinimum *node)
+  {
+    if ((not is_const(node->x())) and is_scalar_const(node->y()))
+    {
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(node->x());
+      node->x(pre_trans);
+    }
+    else if (is_scalar_const(node->x()) and (not is_const(node->y())))
+    {
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(node->y());
+      node->y(pre_trans);
+    }
+    else
+    {
+      // TODO support other cases
+      return false;
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+    return true;
+  }
+
+  bool visit(luci::CircleMul *node)
+  {
+    LOGGER(l);
+
+    luci::CircleNode *pred_node = nullptr;
+    luci::CircleConst *multiplier = nullptr;
+
+    if (is_NCHW_with_const(node, pred_node, multiplier))
+    {
+      assert(multiplier->rank() == 4); // FIX is_NCHW_with_const unless
+      auto nhwc_const = create_NHWC_from_NCHW(multiplier);
+      if (nhwc_const == nullptr)
+        return false;
+      node->y(nhwc_const);
+
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(pred_node);
+      node->x(pre_trans);
+    }
+    else if (multiplier == nullptr)
+    {
+      // Only support for input rank 4
+      auto input_x = loco::must_cast<luci::CircleNode *>(node->x());
+      if (input_x->rank() != 4)
+        return false;
+      auto input_y = loco::must_cast<luci::CircleNode *>(node->y());
+      if (input_y->rank() != 4)
+        return false;
+
+      auto pre_trans_x = create_pre_transpose(node);
+      pre_trans_x->a(input_x);
+      node->x(pre_trans_x);
+
+      auto pre_trans_y = create_pre_transpose(node);
+      pre_trans_y->a(input_y);
+      node->y(pre_trans_y);
+    }
+    else
+    {
+      return false;
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+    return true;
+  }
+
+  bool visit(luci::CircleNeg *node) { return convert_unary_x<luci::CircleNeg>(node); }
+
+  bool visit(luci::CirclePad *node)
+  {
+    if (!is_NCHW(node))
+      return false;
+
+    const auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(pred_node);
+    node->input(pre_trans);
+
+    auto nchw_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+    const auto nhwc_paddings = create_NHWC_paddings(nchw_paddings);
+    node->paddings(nhwc_paddings);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  bool visit(luci::CirclePadV2 *node)
+  {
+    if (!is_NCHW(node))
+      return false;
+
+    const auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(pred_node);
+    node->input(pre_trans);
+
+    auto nchw_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+    const auto nhwc_paddings = create_NHWC_paddings(nchw_paddings);
+    node->paddings(nhwc_paddings);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  // TODO Reduce duplicate code with CircleMean
+  bool visit(luci::CircleReduceMax *node)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(node->input());
+    if (input->rank() != 4)
+      return false;
+
+    auto rindices = dynamic_cast<luci::CircleConst *>(node->reduction_indices());
+    if (not rindices)
+      return false;
+
+    auto nhwc_rindices = create_NHWC_rindices(rindices);
+    if (not nhwc_rindices)
+      return false;
+
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(input);
+    node->input(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    node->reduction_indices(nhwc_rindices);
+
+    if (node->keep_dims())
+    {
+      auto post_trans = create_post_transpose(node);
+      loco::replace(node).with(post_trans);
+
+      post_trans->a(node);
+
+      return true;
+    }
+
+    // The below codes handle the cases where node->keep_dims() == false
+    // 1D output never needs a transpose
+    if (node->rank() <= 1)
+      return true;
+
+    std::vector<bool> reduced_dims_nhwc(4, false);
+    uint32_t num_reduced_indices = nhwc_rindices->size<loco::DataType::S32>();
+
+    for (uint32_t ri = 0; ri < num_reduced_indices; ++ri)
+    {
+      reduced_dims_nhwc[nhwc_rindices->at<loco::DataType::S32>(ri)] = true;
+    }
+
+    // if channel dimension has been reduced, we don't need a transpose
+    if (reduced_dims_nhwc[3])
+      return true;
+
+    // likewise, if both space dimensions are reduced, no transpose is needed
+    if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2])
+      return true;
+
+    std::vector<int32_t> post_trans_ind;
+    // case 1: only N is reduced
+    if (num_reduced_indices == 1 && reduced_dims_nhwc[0])
+      post_trans_ind = {2, 0, 1};
+
+    // case 2: only H or W is reduced
+    if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2]))
+      post_trans_ind = {0, 2, 1};
+
+    // case 3: N and either H or W are reduced
+    if (num_reduced_indices == 2)
+      post_trans_ind = {1, 0};
+
+    auto post_trans = create_Nd_transpose(node, post_trans_ind);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  // TODO Reduce duplicate codes with CircleReduceMax
+  bool visit(luci::CircleReduceMin *node)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(node->input());
+    if (input->rank() != 4)
+      return false;
+
+    auto rindices = dynamic_cast<luci::CircleConst *>(node->reduction_indices());
+    if (not rindices)
+      return false;
+
+    auto nhwc_rindices = create_NHWC_rindices(rindices);
+    if (not nhwc_rindices)
+      return false;
+
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(input);
+    node->input(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    node->reduction_indices(nhwc_rindices);
+
+    if (node->keep_dims())
+    {
+      auto post_trans = create_post_transpose(node);
+      loco::replace(node).with(post_trans);
+
+      post_trans->a(node);
+
+      return true;
+    }
+
+    // The below codes handle the cases where node->keep_dims() == false
+    // 1D output never needs a transpose
+    if (node->rank() <= 1)
+      return true;
+
+    std::vector<bool> reduced_dims_nhwc(4, false);
+    uint32_t num_reduced_indices = nhwc_rindices->size<loco::DataType::S32>();
+
+    for (uint32_t ri = 0; ri < num_reduced_indices; ++ri)
+    {
+      reduced_dims_nhwc[nhwc_rindices->at<loco::DataType::S32>(ri)] = true;
+    }
+
+    // if channel dimension has been reduced, we don't need a transpose
+    if (reduced_dims_nhwc[3])
+      return true;
+
+    // likewise, if both space dimensions are reduced, no transpose is needed
+    if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2])
+      return true;
+
+    std::vector<int32_t> post_trans_ind;
+    // case 1: only N is reduced
+    if (num_reduced_indices == 1 && reduced_dims_nhwc[0])
+      post_trans_ind = {2, 0, 1};
+
+    // case 2: only H or W is reduced
+    if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2]))
+      post_trans_ind = {0, 2, 1};
+
+    // case 3: N and either H or W are reduced
+    if (num_reduced_indices == 2)
+      post_trans_ind = {1, 0};
+
+    auto post_trans = create_Nd_transpose(node, post_trans_ind);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
+  bool visit(luci::CircleRelu *node) { return convert_unary_features<luci::CircleRelu>(node); }
+
+  bool visit(luci::CircleRelu6 *node) { return convert_unary_features<luci::CircleRelu6>(node); }
+
+  bool visit(luci::CircleRsqrt *node) { return convert_unary_x<luci::CircleRsqrt>(node); }
+
+  bool visit(luci::CircleSplitV *node)
+  {
+    // Change split dimension
+    auto axis = dynamic_cast<luci::CircleConst *>(node->split_dim());
+    if (not axis)
+      return false;
+
+    if (axis->dtype() != loco::DataType::S32)
+      return false;
+
+    if (axis->size<loco::DataType::S32>() != 1)
+      return false;
+
+    axis->at<loco::DataType::S32>(0) = nchw_axis_to_nhwc(axis->at<loco::DataType::S32>(0));
+
+    // Insert pre-transpose
+    const auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(pred_node);
+    node->input(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    // Insert post-transposes
+    for (auto succ : loco::succs(node))
+    {
+      auto svo = loco::must_cast<luci::CircleSplitVOut *>(succ);
+
+      auto post_trans = create_post_transpose(svo);
+      loco::replace(svo).with(post_trans);
+      post_trans->a(svo);
+    }
+
+    return true;
+  }
+
+  bool visit(luci::CircleSquaredDifference *node)
+  {
+    // TODO support CircleConst input
+    if (dynamic_cast<luci::CircleConst *>(node->x()) != nullptr)
+      return false;
+    if (dynamic_cast<luci::CircleConst *>(node->y()) != nullptr)
+      return false;
+
+    auto input_x = loco::must_cast<luci::CircleNode *>(node->x());
+    if (input_x->rank() != 4)
+      return false;
+    auto input_y = loco::must_cast<luci::CircleNode *>(node->y());
+    if (input_y->rank() != 4)
+      return false;
+
+    auto pre_trans_x = create_pre_transpose(node);
+    pre_trans_x->a(input_x);
+    node->x(pre_trans_x);
+
+    auto pre_trans_y = create_pre_transpose(node);
+    pre_trans_y->a(input_y);
+    node->y(pre_trans_y);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+    return true;
+  }
+
+  bool visit(luci::CircleSub *node)
+  {
+    luci::CircleNode *pred_node = nullptr;
+    luci::CircleConst *subtract = nullptr;
+
+    auto const_x = dynamic_cast<luci::CircleConst *>(node->x());
+    auto const_y = dynamic_cast<luci::CircleConst *>(node->y());
+
+    if (const_x != nullptr && const_y == nullptr)
+    {
+      // case of subtract - pred_node
+      pred_node = loco::must_cast<luci::CircleNode *>(node->y());
+      subtract = const_x;
+
+      if (!is_NCHW_with_const(node, pred_node, subtract))
+        return false;
+
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(pred_node);
+
+      if (subtract->rank() == 4)
+      {
+        auto nhwc_const = create_NHWC_from_NCHW(subtract);
+        if (nhwc_const == nullptr)
+          return false;
+        node->x(nhwc_const);
+      }
+      node->y(pre_trans);
+    }
+    else if (const_x == nullptr && const_y != nullptr)
+    {
+      // case of pred_node - subtract
+      pred_node = loco::must_cast<luci::CircleNode *>(node->x());
+      subtract = const_y;
+
+      if (!is_NCHW_with_const(node, pred_node, subtract))
+        return false;
+
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(pred_node);
+
+      if (subtract->rank() == 4)
+      {
+        auto nhwc_const = create_NHWC_from_NCHW(subtract);
+        if (nhwc_const == nullptr)
+          return false;
+        node->y(nhwc_const);
+      }
+
+      node->x(pre_trans);
+    }
+    else if (const_x == nullptr && const_y == nullptr)
+    {
+      // Both inputs are not constant.
+      // In this case, we cannot distinguish NCHW from NHWC,
+      // so just insert Transpose Ops.
+      // Only support for input rank 4.
+      auto input_x = loco::must_cast<luci::CircleNode *>(node->x());
+      if (input_x->rank() != 4)
+        return false;
+      auto input_y = loco::must_cast<luci::CircleNode *>(node->y());
+      if (input_y->rank() != 4)
+        return false;
+
+      auto pre_trans_x = create_pre_transpose(node);
+      pre_trans_x->a(input_x);
+      node->x(pre_trans_x);
+
+      auto pre_trans_y = create_pre_transpose(node);
+      pre_trans_y->a(input_y);
+      node->y(pre_trans_y);
+    }
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    auto post_trans = create_post_transpose(node);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+    return true;
+  }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "ConvertNCHWToNHWCPass Start" << std::endl;
+
+  // Annotate NHWC operators
+  // NHWC operators are detected by pattern matching
+  //
+  // Pattern
+  //    pre-Transose (or pre-Reshape) + [intermediate Ops] + post-Transpose (or post-Reshape)
+  //
+  // [intermediate Ops] are annotated as NHWC
+  //
+  // NOTE A single pre-Transpose/Reshape can have multiple post-Transpose/Reshape.
+  // For example,
+  // pre-Transpose --- [intermediate Ops] --- post-Transpose
+  //                |
+  //                +--[intermediate Ops] --- post-Transpose
+  //
+  // NOTE Intermediate Ops SHOULD NOT contain pre-Transpose/Reshape
+  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+  {
+    if (has_data_format(node))
+      continue;
+
+    if (is_pre_transpose(node) || is_pre_reshape(node))
+    {
+      std::set<loco::Node *> intermediate;
+
+      // Variable to check intermediate Ops contain pre-Transpose/Reshape
+      bool has_pre = false;
+
+      // Variable to check the pattern is closed with post-Transpose/Reshape
+      bool is_closed = true;
+
+      // For recursive call of lambda
+      std::function<void(loco::Node *)> collect_intermediate;
+      collect_intermediate = [&](loco::Node *n) {
+        for (auto succ : loco::succs(n))
+        {
+          // Skip unnecessary traversal
+          if (intermediate.find(succ) != intermediate.end())
+            continue;
+
+          // Exit condition
+          if (is_post_transpose(succ) || is_post_reshape(succ))
+            continue;
+
+          if (is_pre_transpose(succ) || is_pre_reshape(succ))
+          {
+            has_pre = true;
+            break;
+          }
+
+          if (is_output(succ))
+          {
+            is_closed = false;
+            break;
+          }
+
+          intermediate.emplace(succ);
+
+          collect_intermediate(succ);
+        }
+      };
+
+      collect_intermediate(node);
+
+      if (has_pre or not is_closed)
+        continue;
+
+      for (auto inter : intermediate)
+      {
+        if (not has_data_format(inter))
+          set_data_format(inter, DataFormat::NHWC);
+      }
+    }
+  }
+
+  // Annotate NCHW operators
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    switch (circle_node->opcode())
+    {
+      // List of supported Ops
+      case luci::CircleOpcode::CIRCLEINPUT:
+        if (!_preserve_input && !has_data_format(node))
+        {
+          set_data_format(node, DataFormat::NCHW);
+        }
+        break;
+      case luci::CircleOpcode::CIRCLEOUTPUT:
+        if (!_preserve_output && !has_data_format(node))
+        {
+          set_data_format(node, DataFormat::NCHW);
+        }
+        break;
+      // SOFTMAX, LOG_SOFTMAX are not converted, because
+      // tflite/circle assumes the last channel is always axis
+      case luci::CircleOpcode::ADD:
+      case luci::CircleOpcode::CONCATENATION:
+      case luci::CircleOpcode::ELU:
+      case luci::CircleOpcode::GELU:
+      case luci::CircleOpcode::LEAKY_RELU:
+      case luci::CircleOpcode::LOGISTIC:
+      case luci::CircleOpcode::MAXIMUM:
+      case luci::CircleOpcode::MEAN:
+      case luci::CircleOpcode::MINIMUM:
+      case luci::CircleOpcode::MUL:
+      case luci::CircleOpcode::NEG:
+      case luci::CircleOpcode::PAD:
+      case luci::CircleOpcode::PADV2:
+      case luci::CircleOpcode::REDUCE_MAX:
+      case luci::CircleOpcode::REDUCE_MIN:
+      case luci::CircleOpcode::RELU:
+      case luci::CircleOpcode::RELU6:
+      case luci::CircleOpcode::RSQRT:
+      case luci::CircleOpcode::SPLIT_V:
+      case luci::CircleOpcode::SQUARED_DIFFERENCE:
+      case luci::CircleOpcode::SUB:
+        if (!has_data_format(node))
+        {
+          set_data_format(node, DataFormat::NCHW);
+        }
+        break;
+      default:
+        break;
+    }
+  }
+
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (!has_data_format(node))
+    {
+      // Unsupported Op
+      continue;
+    }
+    else if (get_data_format(node) == DataFormat::NHWC)
+    {
+      // Already converted to NHWC
+      continue;
+    }
+    else if (has_dynamic_shape(node))
+    {
+      // This pass only works for static-shaped node
+      INFO(l) << "Skip the node with a dynamic shape." << std::endl;
+      continue;
+    }
+    else
+    {
+      ConvertNCHWToNHWC converter;
+      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+      if (circle_node->rank() != 4)
+      {
+        // TODO replace the check above with the input rank check, and remove the condition below
+        if (not dynamic_cast<luci::CircleMean *>(node) and
+            not dynamic_cast<luci::CircleReduceMax *>(node) and
+            not dynamic_cast<luci::CircleReduceMin *>(node))
+          continue;
+      }
+
+      if (circle_node->accept(&converter))
+      {
+        set_data_format(node, DataFormat::NHWC);
+        changed = true;
+      }
+      else
+      {
+        continue;
+      }
+    }
+  }
+
+  INFO(l) << "ConvertNCHWToNHWCPass End" << std::endl;
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
new file mode 100644
index 000000000..85648cf2c
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
@@ -0,0 +1,2237 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+using namespace luci::test;
+
+namespace
+{
+
+/**
+ *  Graph with a single Op (example: Add).
+ *
+ *  BEFORE
+ *  - All Ops including Input/Output are NCHW.
+ *
+ *             [Input] [beta]
+ *                |  /
+ *              [Add]
+ *                |
+ *             [Output]
+ *
+ *  AFTER
+ *  - All Ops including Input/Output are NHWC.
+ *
+ *             [Input]
+ *                |
+ *         [Transpose]
+ *                |
+ *        [Transpose] [beta]
+ *                |  /
+ *              [Add]
+ *                |
+ *         [Transpose]
+ *                |
+ *         [Transpose]
+ *                |
+ *             [Output]
+ */
+class SimpleGraph
+{
+public:
+  SimpleGraph() = default;
+
+public:
+  void init()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    output = g.nodes()->create<luci::CircleOutput>();
+    input->name("input");
+    output->name("output");
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    graph_input->dtype(loco::DataType::FLOAT32);
+    input->dtype(loco::DataType::FLOAT32);
+    output->dtype(loco::DataType::FLOAT32);
+    graph_output->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    graph_input->shape({1, channel_size, 4, 4});
+    input->shape({1, channel_size, 4, 4});
+    output->shape({1, channel_size, 4, 4});
+    graph_output->shape({1, channel_size, 4, 4});
+
+    auto graph_body = insertGraphBody(input);
+    output->from(graph_body);
+  }
+
+  virtual ~SimpleGraph() = default;
+
+protected:
+  virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class AddGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    add = g.nodes()->create<luci::CircleAdd>();
+    beta = g.nodes()->create<luci::CircleConst>();
+
+    add->dtype(loco::DataType::FLOAT32);
+    beta->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    add->shape({1, channel_size, 4, 4});
+    beta->shape({1, channel_size, 1, 1});
+
+    beta->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::FLOAT32>(i) = i;
+    }
+
+    add->x(input);
+    add->y(beta);
+
+    add->name("add");
+    beta->name("beta");
+
+    return add;
+  }
+
+public:
+  void update_const_shape_to_nchw(void)
+  {
+    uint32_t channel_size = 16;
+    beta->shape({1, channel_size, 4, 4});
+
+    beta->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::FLOAT32>(i) = i;
+    }
+  }
+
+public:
+  luci::CircleAdd *add = nullptr;
+  luci::CircleConst *beta = nullptr;
+};
+
+class NHWCReluGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu = g.nodes()->create<luci::CircleRelu>();
+    pre_reshape = g.nodes()->create<luci::CircleReshape>();
+    post_reshape = g.nodes()->create<luci::CircleReshape>();
+    pre_shape = g.nodes()->create<luci::CircleConst>();
+    post_shape = g.nodes()->create<luci::CircleConst>();
+
+    pre_shape->dtype(loco::DataType::S32);
+    post_shape->dtype(loco::DataType::S32);
+
+    uint32_t channel_size = 16;
+    auto in = loco::must_cast<luci::CircleNode *>(input);
+    in->shape({1, channel_size, 4, 4});
+    pre_shape->shape({4});
+    post_shape->shape({4});
+
+    pre_shape->size<loco::DataType::S32>(4);
+    pre_shape->at<loco::DataType::S32>(0) = 1;
+    pre_shape->at<loco::DataType::S32>(1) = 4;
+    pre_shape->at<loco::DataType::S32>(2) = 4;
+    pre_shape->at<loco::DataType::S32>(3) = channel_size;
+
+    post_shape->size<loco::DataType::S32>(4);
+    post_shape->at<loco::DataType::S32>(0) = 1;
+    post_shape->at<loco::DataType::S32>(1) = channel_size;
+    post_shape->at<loco::DataType::S32>(2) = 4;
+    post_shape->at<loco::DataType::S32>(3) = 4;
+
+    pre_reshape->tensor(input);
+    pre_reshape->shape(pre_shape);
+
+    relu->features(pre_reshape);
+
+    post_reshape->tensor(relu);
+    post_reshape->shape(post_shape);
+
+    relu->name("Relu");
+    pre_reshape->name("pre-reshape");
+    post_reshape->name("post-reshape");
+
+    return post_reshape;
+  }
+
+public:
+  luci::CircleRelu *relu = nullptr;
+  luci::CircleReshape *pre_reshape = nullptr;
+  luci::CircleReshape *post_reshape = nullptr;
+  luci::CircleConst *pre_shape = nullptr;
+  luci::CircleConst *post_shape = nullptr;
+};
+
+/**
+ *  Graph with pre-Reshape but no post-Transpose/Reshape.
+ *
+ *  BEFORE
+ *             [Input]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *              [Relu]
+ *                |
+ *             [Output]
+ *
+ *  AFTER
+ *             [Input]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *          [Pre-Transpose]
+ *                |
+ *              [Relu]
+ *                |
+ *          [Post-Transpose]
+ *                |
+ *             [Output]
+ */
+class NoPostReshapeGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu = g.nodes()->create<luci::CircleRelu>();
+    pre_reshape = g.nodes()->create<luci::CircleReshape>();
+    pre_shape = g.nodes()->create<luci::CircleConst>();
+
+    pre_shape->dtype(loco::DataType::S32);
+
+    uint32_t channel_size = 16;
+    auto in = loco::must_cast<luci::CircleNode *>(input);
+    in->shape({1, channel_size, 4, 4});
+    pre_shape->shape({4});
+
+    pre_shape->size<loco::DataType::S32>(4);
+    pre_shape->at<loco::DataType::S32>(0) = 1;
+    pre_shape->at<loco::DataType::S32>(1) = 4;
+    pre_shape->at<loco::DataType::S32>(2) = 4;
+    pre_shape->at<loco::DataType::S32>(3) = channel_size;
+
+    pre_reshape->tensor(input);
+    pre_reshape->shape(pre_shape);
+    relu->features(pre_reshape);
+
+    relu->name("Relu");
+    pre_reshape->name("pre-reshape");
+
+    return relu;
+  }
+
+public:
+  luci::CircleRelu *relu = nullptr;
+  luci::CircleReshape *pre_reshape = nullptr;
+  luci::CircleConst *pre_shape = nullptr;
+};
+
+/**
+ *  Graph with two pre-Reshapes
+ *
+ *  BEFORE
+ *             [Input]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *              [Relu]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *          [Post-Reshape]
+ *                |
+ *             [Output]
+ *
+ *  AFTER
+ *             [Input]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *          [Pre-Transpose]
+ *                |
+ *              [Relu]
+ *                |
+ *          [Post-Transpose]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *          [Post-Reshape]
+ *                |
+ *             [Output]
+ */
+class ReluNotClosedGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu = g.nodes()->create<luci::CircleRelu>();
+    pre_reshape = g.nodes()->create<luci::CircleReshape>();
+    pre_reshape_2 = g.nodes()->create<luci::CircleReshape>();
+    post_reshape = g.nodes()->create<luci::CircleReshape>();
+    pre_shape = g.nodes()->create<luci::CircleConst>();
+    pre_shape_2 = g.nodes()->create<luci::CircleConst>();
+    post_shape = g.nodes()->create<luci::CircleConst>();
+
+    pre_shape->dtype(loco::DataType::S32);
+    pre_shape_2->dtype(loco::DataType::S32);
+    post_shape->dtype(loco::DataType::S32);
+
+    uint32_t channel_size = 16;
+    auto in = loco::must_cast<luci::CircleNode *>(input);
+    in->shape({1, channel_size, 4, 4});
+    pre_shape->shape({4});
+    pre_shape_2->shape({4});
+    post_shape->shape({4});
+
+    pre_shape->size<loco::DataType::S32>(4);
+    pre_shape->at<loco::DataType::S32>(0) = 1;
+    pre_shape->at<loco::DataType::S32>(1) = 4;
+    pre_shape->at<loco::DataType::S32>(2) = 4;
+    pre_shape->at<loco::DataType::S32>(3) = channel_size;
+
+    pre_shape_2->size<loco::DataType::S32>(4);
+    pre_shape_2->at<loco::DataType::S32>(0) = 1;
+    pre_shape_2->at<loco::DataType::S32>(1) = 4;
+    pre_shape_2->at<loco::DataType::S32>(2) = channel_size;
+    pre_shape_2->at<loco::DataType::S32>(3) = 4;
+
+    post_shape->size<loco::DataType::S32>(4);
+    post_shape->at<loco::DataType::S32>(0) = 1;
+    post_shape->at<loco::DataType::S32>(1) = 4;
+    post_shape->at<loco::DataType::S32>(2) = 4;
+    post_shape->at<loco::DataType::S32>(3) = channel_size;
+
+    pre_reshape->tensor(input);
+    pre_reshape->shape(pre_shape);
+
+    relu->features(pre_reshape);
+
+    pre_reshape_2->tensor(relu);
+    pre_reshape_2->shape(pre_shape_2);
+
+    post_reshape->tensor(pre_reshape_2);
+    post_reshape->shape(post_shape);
+
+    relu->name("Relu");
+    pre_reshape->name("pre-reshape");
+    pre_reshape->name("pre-reshape-2");
+    post_reshape->name("post-reshape");
+
+    return post_reshape;
+  }
+
+public:
+  luci::CircleRelu *relu = nullptr;
+  luci::CircleReshape *pre_reshape = nullptr;
+  luci::CircleReshape *pre_reshape_2 = nullptr;
+  luci::CircleReshape *post_reshape = nullptr;
+  luci::CircleConst *pre_shape = nullptr;
+  luci::CircleConst *pre_shape_2 = nullptr;
+  luci::CircleConst *post_shape = nullptr;
+};
+
+class AddScalarGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    add = g.nodes()->create<luci::CircleAdd>();
+    beta = g.nodes()->create<luci::CircleConst>();
+
+    add->dtype(loco::DataType::FLOAT32);
+    beta->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    add->shape({1, channel_size, 4, 4});
+    beta->shape({1});
+
+    beta->size<loco::DataType::FLOAT32>(1);
+    beta->at<loco::DataType::FLOAT32>(0) = 3.14;
+
+    add->x(input);
+    add->y(beta);
+
+    add->name("add");
+    beta->name("beta");
+
+    return add;
+  }
+
+public:
+  luci::CircleAdd *add = nullptr;
+  luci::CircleConst *beta = nullptr;
+};
+
+class ConcatenationGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    concat = g.nodes()->create<luci::CircleConcatenation>(2);
+    concat->values(0, input);
+    concat->axis(1);
+
+    input2 = g.nodes()->create<luci::CircleConst>();
+    input2->dtype(loco::DataType::FLOAT32);
+    input2->shape({1, 16, 4, 4});
+    input2->size<loco::DataType::FLOAT32>(16 * 4 * 4);
+    for (uint32_t i = 0; i < 16 * 4 * 4; i++)
+    {
+      input2->at<loco::DataType::FLOAT32>(i) = i;
+    }
+    concat->values(1, input2);
+
+    concat->name("concat");
+    input2->name("input2");
+
+    return concat;
+  }
+
+public:
+  luci::CircleConcatenation *concat = nullptr;
+  luci::CircleConst *input2 = nullptr;
+};
+
+class EluGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    elu = g.nodes()->create<luci::CircleElu>();
+    elu->features(input);
+    elu->name("elu");
+
+    return elu;
+  }
+
+public:
+  luci::CircleElu *elu = nullptr;
+};
+
+class LeakyReluGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    leakyrelu = g.nodes()->create<luci::CircleLeakyRelu>();
+    leakyrelu->features(input);
+    leakyrelu->name("leakyrelu");
+
+    return leakyrelu;
+  }
+
+public:
+  luci::CircleLeakyRelu *leakyrelu = nullptr;
+};
+
+class LogisticGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    logistic = g.nodes()->create<luci::CircleLogistic>();
+    logistic->x(input);
+    logistic->name("logistic");
+
+    return logistic;
+  }
+
+public:
+  luci::CircleLogistic *logistic = nullptr;
+};
+
+class MaximumGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    max = g.nodes()->create<luci::CircleMaximum>();
+    limit = g.nodes()->create<luci::CircleConst>();
+
+    max->dtype(loco::DataType::FLOAT32);
+    limit->dtype(loco::DataType::FLOAT32);
+
+    max->shape({1, 16, 4, 4});
+    limit->shape({});
+
+    limit->size<loco::DataType::FLOAT32>(1);
+    limit->at<loco::DataType::FLOAT32>(0) = 100;
+
+    max->x(input);
+    max->y(limit);
+
+    max->name("max");
+    limit->name("limit");
+
+    return max;
+  }
+
+public:
+  luci::CircleMaximum *max = nullptr;
+  luci::CircleConst *limit = nullptr;
+};
+
+class MaximumNonConstGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    max = g.nodes()->create<luci::CircleMaximum>();
+    max->dtype(loco::DataType::FLOAT32);
+    max->shape({1, 16, 4, 4});
+
+    max->x(input);
+    max->y(input);
+
+    max->name("max");
+
+    return max;
+  }
+
+public:
+  luci::CircleMaximum *max = nullptr;
+};
+
+static constexpr std::initializer_list<uint32_t> kDefaultShape = {1, 16, 1, 1};
+
+class MeanGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    mean = g.nodes()->create<luci::CircleMean>();
+    rindices = g.nodes()->create<luci::CircleConst>();
+
+    mean->dtype(loco::DataType::FLOAT32);
+    rindices->dtype(loco::DataType::S32);
+
+    mean->shape(_shape);
+    rindices->shape({static_cast<uint32_t>(_axes.size())});
+
+    rindices->size<loco::DataType::S32>(_axes.size());
+    for (uint32_t i = 0; i < _axes.size(); ++i)
+    {
+      rindices->at<loco::DataType::S32>(i) = _axes[i];
+    }
+
+    mean->input(input);
+    mean->reduction_indices(rindices);
+    mean->keep_dims(_keep_dims);
+
+    mean->name("mean");
+    rindices->name("rindices");
+
+    return mean;
+  }
+
+public:
+  void keep_dims(bool val) { _keep_dims = val; }
+  void axes(std::vector<int32_t> val) { _axes = val; }
+  void shape(std::initializer_list<uint32_t> val) { _shape = val; }
+
+public:
+  luci::CircleMean *mean = nullptr;
+  luci::CircleConst *rindices = nullptr;
+
+private:
+  bool _keep_dims = true;
+  std::vector<int32_t> _axes = {2, 3};
+  std::initializer_list<uint32_t> _shape = kDefaultShape;
+};
+
+class MinimumGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    min = g.nodes()->create<luci::CircleMinimum>();
+    limit = g.nodes()->create<luci::CircleConst>();
+
+    min->dtype(loco::DataType::FLOAT32);
+    limit->dtype(loco::DataType::FLOAT32);
+
+    min->shape({1, 16, 4, 4});
+    limit->shape({});
+
+    limit->size<loco::DataType::FLOAT32>(1);
+    limit->at<loco::DataType::FLOAT32>(0) = 100;
+
+    min->x(input);
+    min->y(limit);
+
+    min->name("min");
+    limit->name("limit");
+
+    return min;
+  }
+
+public:
+  luci::CircleMinimum *min = nullptr;
+  luci::CircleConst *limit = nullptr;
+};
+
+class MulGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    mul = g.nodes()->create<luci::CircleMul>();
+    multiplier = g.nodes()->create<luci::CircleConst>();
+
+    mul->dtype(loco::DataType::FLOAT32);
+    multiplier->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    mul->shape({1, channel_size, 4, 4});
+    multiplier->shape({1, channel_size, 1, 1});
+
+    multiplier->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      multiplier->at<loco::DataType::FLOAT32>(i) = i;
+    }
+
+    mul->x(input);
+    mul->y(multiplier);
+
+    mul->name("mul");
+    multiplier->name("multiplier");
+
+    return mul;
+  }
+
+public:
+  void update_const_shape_to_nchw(void)
+  {
+    uint32_t channel_size = 16;
+    multiplier->shape({1, channel_size, 4, 4});
+
+    multiplier->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      multiplier->at<loco::DataType::FLOAT32>(i) = i;
+    }
+  }
+
+public:
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *multiplier = nullptr;
+};
+
+class MulScalarGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    mul = g.nodes()->create<luci::CircleMul>();
+    multiplier = g.nodes()->create<luci::CircleConst>();
+
+    mul->dtype(loco::DataType::FLOAT32);
+    multiplier->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    mul->shape({1, channel_size, 4, 4});
+    multiplier->shape({1});
+
+    multiplier->size<loco::DataType::FLOAT32>(1);
+    multiplier->at<loco::DataType::FLOAT32>(0) = 2;
+
+    mul->x(input);
+    mul->y(multiplier);
+
+    mul->name("mul");
+    multiplier->name("multiplier");
+
+    return mul;
+  }
+
+public:
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *multiplier = nullptr;
+};
+
+class MulBothNormGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    mul = g.nodes()->create<luci::CircleMul>();
+
+    mul->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    mul->shape({1, channel_size, 4, 4});
+
+    mul->x(input);
+    mul->y(input);
+
+    mul->name("mul");
+
+    return mul;
+  }
+
+public:
+  luci::CircleMul *mul = nullptr;
+};
+
+class NegGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    neg = g.nodes()->create<luci::CircleNeg>();
+    neg->x(input);
+    neg->name("neg");
+
+    return neg;
+  }
+
+public:
+  luci::CircleNeg *neg = nullptr;
+};
+
+class PadGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    pad = g.nodes()->create<luci::CirclePad>();
+    paddings = g.nodes()->create<luci::CircleConst>();
+
+    pad->dtype(loco::DataType::FLOAT32);
+    paddings->dtype(loco::DataType::S32);
+
+    uint32_t channel_size = 16;
+    pad->shape({1, channel_size, 4, 4});
+    paddings->shape({4, 2});
+
+    // paddings data (NCHW)
+    // [[0,0], [0,0], [1,1], [2,2]]
+    paddings->size<loco::DataType::S32>(8);
+    for (uint32_t dim = 0; dim < 4; dim++)
+    {
+      for (uint32_t i = 0; i < 2; i++)
+      {
+        int32_t data = 0;
+
+        if (dim == 2)
+          data = 1;
+        else if (dim == 3)
+          data = 2;
+
+        paddings->at<loco::DataType::S32>(dim * 2 + i) = data;
+      }
+    }
+
+    pad->input(input);
+    pad->paddings(paddings);
+
+    pad->name("pad");
+    paddings->name("paddings");
+
+    return pad;
+  }
+
+public:
+  luci::CirclePad *pad = nullptr;
+  luci::CircleConst *paddings = nullptr;
+};
+
+class PadV2Graph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    pad = g.nodes()->create<luci::CirclePadV2>();
+    paddings = g.nodes()->create<luci::CircleConst>();
+    const_value = g.nodes()->create<luci::CircleConst>();
+
+    pad->dtype(loco::DataType::FLOAT32);
+    paddings->dtype(loco::DataType::S32);
+    const_value->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    pad->shape({1, channel_size, 4, 4});
+    paddings->shape({4, 2});
+    const_value->shape({1});
+
+    // paddings data (NCHW)
+    // [[0,0], [0,0], [1,1], [2,2]]
+    paddings->size<loco::DataType::S32>(8);
+    for (uint32_t dim = 0; dim < 4; dim++)
+    {
+      for (uint32_t i = 0; i < 2; i++)
+      {
+        int32_t data = 0;
+
+        if (dim == 2)
+          data = 1;
+        else if (dim == 3)
+          data = 2;
+
+        paddings->at<loco::DataType::S32>(dim * 2 + i) = data;
+      }
+    }
+
+    const_value->size<loco::DataType::FLOAT32>(1);
+    const_value->at<loco::DataType::FLOAT32>(0) = -3.4;
+
+    pad->input(input);
+    pad->paddings(paddings);
+    pad->constant_values(paddings);
+
+    pad->name("padV2");
+    paddings->name("paddings");
+    const_value->name("constant_values");
+
+    return pad;
+  }
+
+public:
+  luci::CirclePadV2 *pad = nullptr;
+  luci::CircleConst *paddings = nullptr;
+  luci::CircleConst *const_value = nullptr;
+};
+
+class ReduceMaxGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    rm = g.nodes()->create<luci::CircleReduceMax>();
+    rindices = g.nodes()->create<luci::CircleConst>();
+
+    rm->dtype(loco::DataType::FLOAT32);
+    rindices->dtype(loco::DataType::S32);
+
+    rm->shape(_shape);
+    rindices->shape({static_cast<uint32_t>(_axes.size())});
+
+    rindices->size<loco::DataType::S32>(_axes.size());
+    for (uint32_t i = 0; i < _axes.size(); ++i)
+    {
+      rindices->at<loco::DataType::S32>(i) = _axes[i];
+    }
+
+    rm->input(input);
+    rm->reduction_indices(rindices);
+    rm->keep_dims(_keep_dims);
+
+    rm->name("reduce_max");
+    rindices->name("rindices");
+
+    return rm;
+  }
+
+public:
+  void keep_dims(bool val) { _keep_dims = val; }
+  void axes(std::vector<int32_t> val) { _axes = val; }
+  void shape(std::initializer_list<uint32_t> val) { _shape = val; }
+
+public:
+  luci::CircleReduceMax *rm = nullptr;
+  luci::CircleConst *rindices = nullptr;
+
+private:
+  bool _keep_dims = true;
+  std::vector<int32_t> _axes = {2, 3};
+  std::initializer_list<uint32_t> _shape = kDefaultShape;
+};
+
+class ReduceMinGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    rm = g.nodes()->create<luci::CircleReduceMin>();
+    rindices = g.nodes()->create<luci::CircleConst>();
+
+    rm->dtype(loco::DataType::FLOAT32);
+    rindices->dtype(loco::DataType::S32);
+
+    rm->shape(_shape);
+    rindices->shape({static_cast<uint32_t>(_axes.size())});
+
+    rindices->size<loco::DataType::S32>(_axes.size());
+    for (uint32_t i = 0; i < _axes.size(); ++i)
+    {
+      rindices->at<loco::DataType::S32>(i) = _axes[i];
+    }
+
+    rm->input(input);
+    rm->reduction_indices(rindices);
+    rm->keep_dims(_keep_dims);
+
+    rm->name("reduce_max");
+    rindices->name("rindices");
+
+    return rm;
+  }
+
+public:
+  void keep_dims(bool val) { _keep_dims = val; }
+  void axes(std::vector<int32_t> val) { _axes = val; }
+  void shape(std::initializer_list<uint32_t> val) { _shape = val; }
+
+public:
+  luci::CircleReduceMin *rm = nullptr;
+  luci::CircleConst *rindices = nullptr;
+
+private:
+  bool _keep_dims = true;
+  std::vector<int32_t> _axes = {2, 3};
+  std::initializer_list<uint32_t> _shape = kDefaultShape;
+};
+
+class ReluGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu = g.nodes()->create<luci::CircleRelu>();
+    relu->features(input);
+    relu->name("Relu");
+
+    return relu;
+  }
+
+public:
+  luci::CircleRelu *relu = nullptr;
+};
+
+class Relu6Graph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu6 = g.nodes()->create<luci::CircleRelu6>();
+    relu6->features(input);
+    relu6->name("relu6");
+
+    return relu6;
+  }
+
+public:
+  luci::CircleRelu6 *relu6 = nullptr;
+};
+
+class RsqrtGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    rsqrt = g.nodes()->create<luci::CircleRsqrt>();
+    rsqrt->x(input);
+    rsqrt->name("rsqrt");
+
+    return rsqrt;
+  }
+
+public:
+  luci::CircleRsqrt *rsqrt = nullptr;
+};
+
+class SplitVGraphlet
+{
+public:
+  SplitVGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    // CircleCustom(SplitV)
+    _splitv = g->nodes()->create<luci::CircleSplitV>();
+    _splitv->shape({1, 2, 2, 192});
+    _splitv->dtype(loco::DataType::FLOAT32);
+    _splitv->name("splitv");
+
+    // CircleConst
+    auto size_splits = g->nodes()->create<luci::CircleConst>();
+    size_splits->dtype(loco::DataType::S32);
+    size_splits->shape({3});
+    size_splits->size<loco::DataType::S32>(3);
+    size_splits->at<loco::DataType::S32>(0) = 32;
+    size_splits->at<loco::DataType::S32>(1) = 32;
+    size_splits->at<loco::DataType::S32>(2) = 128;
+
+    // CircleConst
+    auto split_dim = g->nodes()->create<luci::CircleConst>();
+    split_dim->dtype(loco::DataType::S32);
+    split_dim->rank(0);
+    split_dim->size<loco::DataType::S32>(1);
+    split_dim->scalar<loco::DataType::S32>() = 3;
+
+    _splitv->size_splits(size_splits);
+    _splitv->split_dim(split_dim);
+    _splitv->num_split(3);
+
+    // CircleSplitVOut
+    _splitv_out1 = g->nodes()->create<luci::CircleSplitVOut>();
+    _splitv_out1->shape({1, 2, 2, 32});
+    _splitv_out1->dtype(loco::DataType::FLOAT32);
+    _splitv_out1->index(0);
+    _splitv_out1->input(_splitv);
+    _splitv_out1->name("splitv_out1");
+
+    // CircleSplitVOut
+    _splitv_out2 = g->nodes()->create<luci::CircleSplitVOut>();
+    _splitv_out2->shape({1, 2, 2, 32});
+    _splitv_out2->dtype(loco::DataType::FLOAT32);
+    _splitv_out2->index(1);
+    _splitv_out2->input(_splitv);
+    _splitv_out2->name("splitv_out2");
+
+    // CircleSplitVOut
+    _splitv_out3 = g->nodes()->create<luci::CircleSplitVOut>();
+    _splitv_out3->shape({1, 2, 2, 128});
+    _splitv_out3->dtype(loco::DataType::FLOAT32);
+    _splitv_out3->index(2);
+    _splitv_out3->input(_splitv);
+    _splitv_out3->name("splitv_out3");
+  }
+
+public:
+  luci::CircleSplitV *splitv() { return _splitv; }
+
+protected:
+  luci::CircleSplitV *_splitv = nullptr;
+  luci::CircleSplitVOut *_splitv_out1 = nullptr;
+  luci::CircleSplitVOut *_splitv_out2 = nullptr;
+  luci::CircleSplitVOut *_splitv_out3 = nullptr;
+};
+
+class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet
+{
+public:
+  SplitVGraph() = default;
+
+  void init(void)
+  {
+    TestIGraphlet::init(g(), {1, 2, 2, 192});
+    TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}});
+    SplitVGraphlet::init(g());
+
+    // connect graph
+    _splitv->input(input());
+
+    output(0)->from(_splitv_out1);
+    output(1)->from(_splitv_out2);
+    output(2)->from(_splitv_out3);
+  }
+};
+
+class SquaredDifferenceGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    sqdiff = g.nodes()->create<luci::CircleSquaredDifference>();
+    sqdiff->x(input);
+    sqdiff->y(input);
+    sqdiff->name("sqdiff");
+
+    return sqdiff;
+  }
+
+public:
+  luci::CircleSquaredDifference *sqdiff = nullptr;
+};
+
+class SubGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    sub = g.nodes()->create<luci::CircleSub>();
+    beta = g.nodes()->create<luci::CircleConst>();
+
+    sub->dtype(loco::DataType::FLOAT32);
+    beta->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    sub->shape({1, channel_size, 4, 4});
+    beta->shape({1, channel_size, 1, 1});
+
+    beta->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::FLOAT32>(i) = i;
+    }
+
+    sub->x(input);
+    sub->y(beta);
+
+    sub->name("sub");
+    beta->name("beta");
+
+    return sub;
+  }
+
+public:
+  void update_const_shape_to_nchw(void)
+  {
+    uint32_t channel_size = 16;
+    beta->shape({1, channel_size, 4, 4});
+
+    beta->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::FLOAT32>(i) = i;
+    }
+  }
+
+public:
+  luci::CircleSub *sub = nullptr;
+  luci::CircleConst *beta = nullptr;
+};
+
+class SubScalarGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    sub = g.nodes()->create<luci::CircleSub>();
+    beta = g.nodes()->create<luci::CircleConst>();
+
+    sub->dtype(loco::DataType::FLOAT32);
+    beta->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    sub->shape({1, channel_size, 4, 4});
+    beta->shape({1});
+
+    beta->size<loco::DataType::FLOAT32>(1);
+    beta->at<loco::DataType::FLOAT32>(0) = 5;
+
+    sub->x(beta);
+    sub->y(input);
+
+    sub->name("sub");
+    beta->name("beta");
+
+    return sub;
+  }
+
+public:
+  luci::CircleSub *sub = nullptr;
+  luci::CircleConst *beta = nullptr;
+};
+
+void check_pre_trans(loco::Node *node)
+{
+  auto pre_trans = dynamic_cast<luci::CircleTranspose *>(node);
+  EXPECT_NE(nullptr, pre_trans);
+  auto pre_trans_perm = dynamic_cast<luci::CircleConst *>(pre_trans->perm());
+  EXPECT_NE(nullptr, pre_trans_perm);
+  EXPECT_EQ(1, pre_trans_perm->rank());
+  EXPECT_EQ(4, pre_trans_perm->dim(0).value());
+  EXPECT_EQ(loco::DataType::S32, pre_trans_perm->dtype());
+  EXPECT_EQ(0, pre_trans_perm->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, pre_trans_perm->at<loco::DataType::S32>(1));
+  EXPECT_EQ(3, pre_trans_perm->at<loco::DataType::S32>(2));
+  EXPECT_EQ(1, pre_trans_perm->at<loco::DataType::S32>(3));
+}
+
+void check_post_trans(loco::Node *node)
+{
+  auto post_trans = dynamic_cast<luci::CircleTranspose *>(node);
+  EXPECT_NE(nullptr, post_trans);
+  auto post_trans_perm = dynamic_cast<luci::CircleConst *>(post_trans->perm());
+  EXPECT_NE(nullptr, post_trans_perm);
+  EXPECT_EQ(1, post_trans_perm->rank());
+  EXPECT_EQ(4, post_trans_perm->dim(0).value());
+  EXPECT_EQ(loco::DataType::S32, post_trans_perm->dtype());
+  EXPECT_EQ(0, post_trans_perm->at<loco::DataType::S32>(0));
+  EXPECT_EQ(3, post_trans_perm->at<loco::DataType::S32>(1));
+  EXPECT_EQ(1, post_trans_perm->at<loco::DataType::S32>(2));
+  EXPECT_EQ(2, post_trans_perm->at<loco::DataType::S32>(3));
+}
+
+void run_phase(loco::Graph *g, bool preserve_input, bool preserve_output)
+{
+  logo::Phase phase;
+
+  // Default passes.
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+  // Pass to test
+  phase.emplace_back(
+    std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output));
+
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.run(phase);
+}
+
+} // namespace
+
+TEST(ConvertNCHWToNHWCPassTest, name)
+{
+  luci::ConvertNCHWToNHWCPass pass(false, false);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ConvertNCHWToNHWC, Add)
+{
+  AddGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.add->x());
+
+  auto add_succs = loco::succs(g.add);
+  EXPECT_EQ(1, add_succs.size());
+  check_post_trans(*add_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(4, new_beta->rank());
+  EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(1, new_beta->dim(1).value());
+  EXPECT_EQ(1, new_beta->dim(2).value());
+  EXPECT_EQ(channel_size, new_beta->dim(3).value());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Add_NCHW_const)
+{
+  AddGraph g;
+  g.init();
+  g.update_const_shape_to_nchw();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.add->x());
+
+  auto add_succs = loco::succs(g.add);
+  EXPECT_EQ(1, add_succs.size());
+  check_post_trans(*add_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(4, new_beta->rank());
+  EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(4, new_beta->dim(1).value());
+  EXPECT_EQ(4, new_beta->dim(2).value());
+  EXPECT_EQ(channel_size, new_beta->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, NHWC_Relu)
+{
+  // Relu is already NHWC, so it should not be converted
+  // i.e., the graph is not changed
+  NHWCReluGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  EXPECT_EQ(g.pre_reshape, g.relu->features());
+
+  auto relu_succs = loco::succs(g.relu);
+  EXPECT_EQ(1, relu_succs.size());
+  EXPECT_EQ(g.post_reshape, *relu_succs.begin());
+}
+
+TEST(ConvertNCHWToNHWC, AddScalar)
+{
+  AddScalarGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.add->x());
+
+  auto add_succs = loco::succs(g.add);
+  EXPECT_EQ(1, add_succs.size());
+  check_post_trans(*add_succs.begin());
+
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(4, new_beta->rank());
+  EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(1, new_beta->dim(1).value());
+  EXPECT_EQ(1, new_beta->dim(2).value());
+  EXPECT_EQ(1, new_beta->dim(3).value());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Concatenation)
+{
+  ConcatenationGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.concat->values(0));
+  check_pre_trans(g.concat->values(1));
+
+  auto concat_succs = loco::succs(g.concat);
+  EXPECT_EQ(1, concat_succs.size());
+  check_post_trans(*concat_succs.begin());
+
+  // Check concat shape, axis
+  EXPECT_EQ(1, g.concat->dim(0).value());
+  EXPECT_EQ(4, g.concat->dim(1).value());
+  EXPECT_EQ(4, g.concat->dim(2).value());
+  EXPECT_EQ(32, g.concat->dim(3).value());
+  EXPECT_EQ(3, g.concat->axis());
+}
+
+TEST(ConvertNCHWToNHWC, Elu)
+{
+  EluGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.elu->features());
+
+  auto elu_succs = loco::succs(g.elu);
+  EXPECT_EQ(1, elu_succs.size());
+  check_post_trans(*elu_succs.begin());
+
+  // Check elu shape
+  EXPECT_EQ(1, g.elu->dim(0).value());
+  EXPECT_EQ(4, g.elu->dim(1).value());
+  EXPECT_EQ(4, g.elu->dim(2).value());
+  EXPECT_EQ(16, g.elu->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, LeakyRelu)
+{
+  LeakyReluGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.leakyrelu->features());
+
+  auto leakyrelu_succs = loco::succs(g.leakyrelu);
+  EXPECT_EQ(1, leakyrelu_succs.size());
+  check_post_trans(*leakyrelu_succs.begin());
+
+  // Check leakyrelu shape
+  EXPECT_EQ(1, g.leakyrelu->dim(0).value());
+  EXPECT_EQ(4, g.leakyrelu->dim(1).value());
+  EXPECT_EQ(4, g.leakyrelu->dim(2).value());
+  EXPECT_EQ(16, g.leakyrelu->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Logistic)
+{
+  LogisticGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.logistic->x());
+
+  auto logistic_succs = loco::succs(g.logistic);
+  EXPECT_EQ(1, logistic_succs.size());
+  check_post_trans(*logistic_succs.begin());
+
+  // Check logistic shape
+  EXPECT_EQ(1, g.logistic->dim(0).value());
+  EXPECT_EQ(4, g.logistic->dim(1).value());
+  EXPECT_EQ(4, g.logistic->dim(2).value());
+  EXPECT_EQ(16, g.logistic->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Maximum)
+{
+  MaximumGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.max->x());
+
+  auto max_succs = loco::succs(g.max);
+  EXPECT_EQ(1, max_succs.size());
+  check_post_trans(*max_succs.begin());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Maximum_non_scalar_NEG)
+{
+  MaximumGraph g;
+  g.init();
+
+  g.limit->shape({3});
+
+  luci::ConvertNCHWToNHWCPass pass(true, true);
+  EXPECT_FALSE(pass.run(&g.g));
+}
+
+TEST(ConvertNCHWToNHWC, MaximumNonConst)
+{
+  MaximumNonConstGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.max->x());
+  check_pre_trans(g.max->y());
+
+  auto max_succs = loco::succs(g.max);
+  EXPECT_EQ(1, max_succs.size());
+  check_post_trans(*max_succs.begin());
+}
+
+TEST(ConvertNCHWToNHWC, Mean)
+{
+  MeanGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.mean->input());
+
+  auto mean_succs = loco::succs(g.mean);
+  EXPECT_EQ(1, mean_succs.size());
+  check_post_trans(*mean_succs.begin());
+
+  auto new_rindices = dynamic_cast<luci::CircleConst *>(g.mean->reduction_indices());
+  EXPECT_NE(nullptr, new_rindices);
+  EXPECT_EQ(1, new_rindices->rank());
+  EXPECT_EQ(2, new_rindices->dim(0).value());
+  EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>());
+  EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1));
+}
+
+TEST(ConvertNCHWToNHWC, Mean_keep_dims_false)
+{
+  struct TC
+  {
+    std::vector<int32_t> nchw_ind;
+    std::vector<int32_t> nhwc_ind;
+    std::initializer_list<uint32_t> shape;
+    bool needs_transpose = false;
+  };
+
+  uint32_t n = 1;
+  uint32_t c = 16;
+  uint32_t h = 4;
+  uint32_t w = 4;
+
+  std::vector<TC> test_cases{{{0}, {0}, {c, h, w}, true},       {{1}, {3}, {n, h, w}, false},
+                             {{2}, {1}, {n, c, w}, true},       {{3}, {2}, {n, c, h}, true},
+                             {{0, 1}, {0, 3}, {h, w}, false},   {{0, 2}, {0, 1}, {c, w}, true},
+                             {{0, 3}, {0, 2}, {c, h}, true},    {{1, 2}, {3, 1}, {n, w}, false},
+                             {{1, 3}, {3, 2}, {n, h}, false},   {{2, 3}, {1, 2}, {n, c}, false},
+                             {{0, 1, 2}, {0, 3, 1}, {w}, false}};
+
+  for (auto &tc : test_cases)
+  {
+    MeanGraph g;
+    g.keep_dims(false);
+    g.axes(tc.nchw_ind);
+    g.shape(tc.shape);
+    g.init();
+
+    run_phase(&g.g, false, true);
+
+    check_pre_trans(g.mean->input());
+
+    auto mean_succs = loco::succs(g.mean);
+    EXPECT_EQ(1, mean_succs.size());
+    if (tc.needs_transpose)
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleTranspose *>(*mean_succs.begin()));
+    }
+    else
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleOutput *>(*mean_succs.begin()));
+    }
+
+    auto new_rindices = dynamic_cast<luci::CircleConst *>(g.mean->reduction_indices());
+    EXPECT_NE(nullptr, new_rindices);
+    EXPECT_EQ(1, new_rindices->rank());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size<loco::DataType::S32>());
+    for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i)
+    {
+      EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at<loco::DataType::S32>(i));
+    }
+  }
+}
+
+TEST(ConvertNCHWToNHWC, ConvertNCHWToNHWC_Mean_keep_dims_false_NEG)
+{
+  loco::Graph g;
+  auto input = g.nodes()->create<luci::CircleInput>();
+  auto output = g.nodes()->create<luci::CircleOutput>();
+  input->name("input");
+  output->name("output");
+
+  auto graph_input = g.inputs()->create();
+  input->index(graph_input->index());
+  auto graph_output = g.outputs()->create();
+  output->index(graph_output->index());
+
+  graph_input->dtype(loco::DataType::FLOAT32);
+  input->dtype(loco::DataType::FLOAT32);
+  output->dtype(loco::DataType::FLOAT32);
+  graph_output->dtype(loco::DataType::FLOAT32);
+
+  uint32_t channel_size = 16;
+  graph_input->shape({channel_size, 4, 4});
+  input->shape({channel_size, 4, 4});
+  output->shape({channel_size});
+  graph_output->shape({channel_size});
+
+  auto mean = g.nodes()->create<luci::CircleMean>();
+  auto rindices = g.nodes()->create<luci::CircleConst>();
+
+  mean->dtype(loco::DataType::FLOAT32);
+  rindices->dtype(loco::DataType::S32);
+
+  mean->shape({channel_size});
+  rindices->shape({2});
+
+  rindices->size<loco::DataType::S32>(2);
+  rindices->at<loco::DataType::S32>(0) = 1;
+  rindices->at<loco::DataType::S32>(1) = 2;
+
+  mean->input(input);
+  mean->reduction_indices(rindices);
+  mean->keep_dims(false);
+
+  mean->name("mean");
+  rindices->name("rindices");
+
+  output->from(mean);
+
+  run_phase(&g, true, true);
+
+  auto new_rindices = dynamic_cast<luci::CircleConst *>(mean->reduction_indices());
+  EXPECT_NE(nullptr, new_rindices);
+  EXPECT_EQ(1, new_rindices->rank());
+  EXPECT_EQ(2, new_rindices->dim(0).value());
+  EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>());
+  EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1));
+}
+
+TEST(ConvertNCHWToNHWC, Minimum)
+{
+  MinimumGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.min->x());
+
+  auto min_succs = loco::succs(g.min);
+  EXPECT_EQ(1, min_succs.size());
+  check_post_trans(*min_succs.begin());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Minimum_non_scalar_NEG)
+{
+  MinimumGraph g;
+  g.init();
+
+  g.limit->shape({3});
+
+  luci::ConvertNCHWToNHWCPass pass(true, true);
+  EXPECT_FALSE(pass.run(&g.g));
+}
+
+TEST(ConvertNCHWToNHWC, Mul)
+{
+  MulGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.mul->x());
+
+  auto mul_succs = loco::succs(g.mul);
+  EXPECT_EQ(1, mul_succs.size());
+  check_post_trans(*mul_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
+  EXPECT_NE(nullptr, new_multiplier);
+  EXPECT_EQ(4, new_multiplier->rank());
+  EXPECT_EQ(1, new_multiplier->dim(0).value());
+  EXPECT_EQ(1, new_multiplier->dim(1).value());
+  EXPECT_EQ(1, new_multiplier->dim(2).value());
+  EXPECT_EQ(channel_size, new_multiplier->dim(3).value());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Mul_NCHW_const)
+{
+  MulGraph g;
+  g.init();
+  g.update_const_shape_to_nchw();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.mul->x());
+
+  auto mul_succs = loco::succs(g.mul);
+  EXPECT_EQ(1, mul_succs.size());
+  check_post_trans(*mul_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
+  EXPECT_NE(nullptr, new_multiplier);
+  EXPECT_EQ(4, new_multiplier->rank());
+  EXPECT_EQ(1, new_multiplier->dim(0).value());
+  EXPECT_EQ(4, new_multiplier->dim(1).value());
+  EXPECT_EQ(4, new_multiplier->dim(2).value());
+  EXPECT_EQ(channel_size, new_multiplier->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, MulScalar)
+{
+  MulScalarGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.mul->x());
+
+  auto mul_succs = loco::succs(g.mul);
+  EXPECT_EQ(1, mul_succs.size());
+  check_post_trans(*mul_succs.begin());
+
+  auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
+  EXPECT_NE(nullptr, new_multiplier);
+  EXPECT_EQ(4, new_multiplier->rank());
+  EXPECT_EQ(1, new_multiplier->dim(0).value());
+  EXPECT_EQ(1, new_multiplier->dim(1).value());
+  EXPECT_EQ(1, new_multiplier->dim(2).value());
+  EXPECT_EQ(1, new_multiplier->dim(3).value());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, MulBothNorm)
+{
+  MulBothNormGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.mul->x());
+  check_pre_trans(g.mul->y());
+
+  auto mul_succs = loco::succs(g.mul);
+  EXPECT_EQ(1, mul_succs.size());
+  check_post_trans(*mul_succs.begin());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Neg)
+{
+  NegGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.neg->x());
+
+  auto neg_succs = loco::succs(g.neg);
+  EXPECT_EQ(1, neg_succs.size());
+  check_post_trans(*neg_succs.begin());
+
+  // Check leakyrelu shape
+  EXPECT_EQ(1, g.neg->dim(0).value());
+  EXPECT_EQ(4, g.neg->dim(1).value());
+  EXPECT_EQ(4, g.neg->dim(2).value());
+  EXPECT_EQ(16, g.neg->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Pad)
+{
+  PadGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.pad->input());
+
+  auto pad_succs = loco::succs(g.pad);
+  EXPECT_EQ(1, pad_succs.size());
+  check_post_trans(*pad_succs.begin());
+
+  auto new_paddings = dynamic_cast<luci::CircleConst *>(g.pad->paddings());
+  EXPECT_NE(nullptr, new_paddings);
+  EXPECT_EQ(2, new_paddings->rank());
+  EXPECT_EQ(4, new_paddings->dim(0).value());
+  EXPECT_EQ(2, new_paddings->dim(1).value());
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(0));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(1));
+  EXPECT_EQ(1, new_paddings->at<loco::DataType::S32>(2));
+  EXPECT_EQ(1, new_paddings->at<loco::DataType::S32>(3));
+  EXPECT_EQ(2, new_paddings->at<loco::DataType::S32>(4));
+  EXPECT_EQ(2, new_paddings->at<loco::DataType::S32>(5));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(6));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(7));
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, PadV2)
+{
+  PadV2Graph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.pad->input());
+
+  auto pad_succs = loco::succs(g.pad);
+  EXPECT_EQ(1, pad_succs.size());
+  check_post_trans(*pad_succs.begin());
+
+  auto new_paddings = dynamic_cast<luci::CircleConst *>(g.pad->paddings());
+  EXPECT_NE(nullptr, new_paddings);
+  EXPECT_EQ(2, new_paddings->rank());
+  EXPECT_EQ(4, new_paddings->dim(0).value());
+  EXPECT_EQ(2, new_paddings->dim(1).value());
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(0));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(1));
+  EXPECT_EQ(1, new_paddings->at<loco::DataType::S32>(2));
+  EXPECT_EQ(1, new_paddings->at<loco::DataType::S32>(3));
+  EXPECT_EQ(2, new_paddings->at<loco::DataType::S32>(4));
+  EXPECT_EQ(2, new_paddings->at<loco::DataType::S32>(5));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(6));
+  EXPECT_EQ(0, new_paddings->at<loco::DataType::S32>(7));
+}
+
+TEST(ConvertNCHWToNHWC, Unknown_Shape_NEG)
+{
+  AddGraph g;
+  g.init();
+
+  // Unknown shape
+  g.input->dim(0).unset();
+  g.add->dim(0).unset();
+  g.output->dim(0).unset();
+
+  luci::ConvertNCHWToNHWCPass pass(false, false);
+  EXPECT_EQ(false, pass.run(&g.g));
+}
+
+TEST(ConvertNCHWToNHWC, Preserve_Input_Output)
+{
+  // Preserve input
+  {
+    AddGraph g;
+    g.init();
+
+    run_phase(&g.g, true, false);
+
+    // Check input shape
+    EXPECT_EQ(1, g.input->dim(0).value());
+    EXPECT_EQ(16, g.input->dim(1).value());
+    EXPECT_EQ(4, g.input->dim(2).value());
+    EXPECT_EQ(4, g.input->dim(3).value());
+
+    // Check output shape
+    EXPECT_EQ(1, g.output->dim(0).value());
+    EXPECT_EQ(4, g.output->dim(1).value());
+    EXPECT_EQ(4, g.output->dim(2).value());
+    EXPECT_EQ(16, g.output->dim(3).value());
+  }
+
+  // Preserve output
+  {
+    AddGraph g;
+    g.init();
+
+    run_phase(&g.g, false, true);
+
+    // Check input shape
+    EXPECT_EQ(1, g.input->dim(0).value());
+    EXPECT_EQ(4, g.input->dim(1).value());
+    EXPECT_EQ(4, g.input->dim(2).value());
+    EXPECT_EQ(16, g.input->dim(3).value());
+
+    // Check output shape
+    EXPECT_EQ(1, g.output->dim(0).value());
+    EXPECT_EQ(16, g.output->dim(1).value());
+    EXPECT_EQ(4, g.output->dim(2).value());
+    EXPECT_EQ(4, g.output->dim(3).value());
+  }
+
+  // Preserve both input and output
+  {
+    AddGraph g;
+    g.init();
+
+    run_phase(&g.g, true, true);
+
+    // Check input shape
+    EXPECT_EQ(1, g.input->dim(0).value());
+    EXPECT_EQ(16, g.input->dim(1).value());
+    EXPECT_EQ(4, g.input->dim(2).value());
+    EXPECT_EQ(4, g.input->dim(3).value());
+
+    // Check output shape
+    EXPECT_EQ(1, g.output->dim(0).value());
+    EXPECT_EQ(16, g.output->dim(1).value());
+    EXPECT_EQ(4, g.output->dim(2).value());
+    EXPECT_EQ(4, g.output->dim(3).value());
+  }
+}
+
+TEST(ConvertNCHWToNHWC, ReduceMax)
+{
+  ReduceMaxGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.rm->input());
+
+  auto rm_succs = loco::succs(g.rm);
+  EXPECT_EQ(1, rm_succs.size());
+  check_post_trans(*rm_succs.begin());
+
+  auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+  EXPECT_NE(nullptr, new_rindices);
+  EXPECT_EQ(1, new_rindices->rank());
+  EXPECT_EQ(2, new_rindices->dim(0).value());
+  EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>());
+  EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1));
+}
+
+TEST(ConvertNCHWToNHWC, ReduceMax_keep_dims_false)
+{
+  struct TC
+  {
+    std::vector<int32_t> nchw_ind;
+    std::vector<int32_t> nhwc_ind;
+    std::initializer_list<uint32_t> shape;
+    bool needs_transpose = false;
+  };
+
+  uint32_t n = 1;
+  uint32_t c = 16;
+  uint32_t h = 4;
+  uint32_t w = 4;
+
+  std::vector<TC> test_cases{{{0}, {0}, {c, h, w}, true},       {{1}, {3}, {n, h, w}, false},
+                             {{2}, {1}, {n, c, w}, true},       {{3}, {2}, {n, c, h}, true},
+                             {{0, 1}, {0, 3}, {h, w}, false},   {{0, 2}, {0, 1}, {c, w}, true},
+                             {{0, 3}, {0, 2}, {c, h}, true},    {{1, 2}, {3, 1}, {n, w}, false},
+                             {{1, 3}, {3, 2}, {n, h}, false},   {{2, 3}, {1, 2}, {n, c}, false},
+                             {{0, 1, 2}, {0, 3, 1}, {w}, false}};
+
+  for (auto &tc : test_cases)
+  {
+    ReduceMaxGraph g;
+    g.keep_dims(false);
+    g.axes(tc.nchw_ind);
+    g.shape(tc.shape);
+    g.init();
+
+    run_phase(&g.g, true, true);
+
+    check_pre_trans(g.rm->input());
+
+    auto rm_succs = loco::succs(g.rm);
+    EXPECT_EQ(1, rm_succs.size());
+    if (tc.needs_transpose)
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleTranspose *>(*rm_succs.begin()));
+    }
+    else
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleOutput *>(*rm_succs.begin()));
+    }
+
+    auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+    EXPECT_NE(nullptr, new_rindices);
+    EXPECT_EQ(1, new_rindices->rank());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size<loco::DataType::S32>());
+    for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i)
+    {
+      EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at<loco::DataType::S32>(i));
+    }
+  }
+}
+
+TEST(ConvertNCHWToNHWC, ReduceMin)
+{
+  ReduceMinGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.rm->input());
+
+  auto rm_succs = loco::succs(g.rm);
+  EXPECT_EQ(1, rm_succs.size());
+  check_post_trans(*rm_succs.begin());
+
+  auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+  EXPECT_NE(nullptr, new_rindices);
+  EXPECT_EQ(1, new_rindices->rank());
+  EXPECT_EQ(2, new_rindices->dim(0).value());
+  EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>());
+  EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1));
+}
+
+TEST(ConvertNCHWToNHWC, ReduceMin_keep_dims_false)
+{
+  struct TC
+  {
+    std::vector<int32_t> nchw_ind;
+    std::vector<int32_t> nhwc_ind;
+    std::initializer_list<uint32_t> shape;
+    bool needs_transpose = false;
+  };
+
+  uint32_t n = 1;
+  uint32_t c = 16;
+  uint32_t h = 4;
+  uint32_t w = 4;
+
+  std::vector<TC> test_cases{{{0}, {0}, {c, h, w}, true},       {{1}, {3}, {n, h, w}, false},
+                             {{2}, {1}, {n, c, w}, true},       {{3}, {2}, {n, c, h}, true},
+                             {{0, 1}, {0, 3}, {h, w}, false},   {{0, 2}, {0, 1}, {c, w}, true},
+                             {{0, 3}, {0, 2}, {c, h}, true},    {{1, 2}, {3, 1}, {n, w}, false},
+                             {{1, 3}, {3, 2}, {n, h}, false},   {{2, 3}, {1, 2}, {n, c}, false},
+                             {{0, 1, 2}, {0, 3, 1}, {w}, false}};
+
+  for (auto &tc : test_cases)
+  {
+    ReduceMinGraph g;
+    g.keep_dims(false);
+    g.axes(tc.nchw_ind);
+    g.shape(tc.shape);
+    g.init();
+
+    run_phase(&g.g, true, true);
+
+    check_pre_trans(g.rm->input());
+
+    auto rm_succs = loco::succs(g.rm);
+    EXPECT_EQ(1, rm_succs.size());
+    if (tc.needs_transpose)
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleTranspose *>(*rm_succs.begin()));
+    }
+    else
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleOutput *>(*rm_succs.begin()));
+    }
+
+    auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+    EXPECT_NE(nullptr, new_rindices);
+    EXPECT_EQ(1, new_rindices->rank());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size<loco::DataType::S32>());
+    for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i)
+    {
+      EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at<loco::DataType::S32>(i));
+    }
+  }
+}
+
+TEST(ConvertNCHWToNHWC, Relu)
+{
+  ReluGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.relu->features());
+
+  auto relu_succs = loco::succs(g.relu);
+  EXPECT_EQ(1, relu_succs.size());
+  check_post_trans(*relu_succs.begin());
+
+  // Check relu shape
+  EXPECT_EQ(1, g.relu->dim(0).value());
+  EXPECT_EQ(4, g.relu->dim(1).value());
+  EXPECT_EQ(4, g.relu->dim(2).value());
+  EXPECT_EQ(16, g.relu->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Relu6)
+{
+  Relu6Graph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.relu6->features());
+
+  auto relu6_succs = loco::succs(g.relu6);
+  EXPECT_EQ(1, relu6_succs.size());
+  check_post_trans(*relu6_succs.begin());
+
+  // Check relu6 shape
+  EXPECT_EQ(1, g.relu6->dim(0).value());
+  EXPECT_EQ(4, g.relu6->dim(1).value());
+  EXPECT_EQ(4, g.relu6->dim(2).value());
+  EXPECT_EQ(16, g.relu6->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, Rsqrt)
+{
+  RsqrtGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.rsqrt->x());
+
+  auto rsqrt_succs = loco::succs(g.rsqrt);
+  EXPECT_EQ(1, rsqrt_succs.size());
+  check_post_trans(*rsqrt_succs.begin());
+
+  // Check rsqrt shape
+  EXPECT_EQ(1, g.rsqrt->dim(0).value());
+  EXPECT_EQ(4, g.rsqrt->dim(1).value());
+  EXPECT_EQ(4, g.rsqrt->dim(2).value());
+  EXPECT_EQ(16, g.rsqrt->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, SplitV)
+{
+  SplitVGraph g;
+  g.init();
+
+  run_phase(g.g(), true, true);
+
+  check_pre_trans(g.splitv()->input());
+
+  auto splitv_succs = loco::succs(g.splitv());
+  for (auto svo : loco::succs(g.splitv()))
+  {
+    for (auto succ : loco::succs(svo))
+    {
+      check_post_trans(succ);
+    }
+  }
+
+  // Check splitv() shape
+  EXPECT_EQ(1, g.splitv()->dim(0).value());
+  EXPECT_EQ(2, g.splitv()->dim(1).value());
+  EXPECT_EQ(192, g.splitv()->dim(2).value());
+  EXPECT_EQ(2, g.splitv()->dim(3).value());
+
+  // Check axis
+  auto axis = dynamic_cast<luci::CircleConst *>(g.splitv()->split_dim());
+  EXPECT_NE(nullptr, axis);
+  EXPECT_EQ(1, axis->size<loco::DataType::S32>());
+  EXPECT_EQ(2, axis->at<loco::DataType::S32>(0));
+}
+
+TEST(ConvertNCHWToNHWC, SquaredDifference)
+{
+  SquaredDifferenceGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.sqdiff->x());
+  check_pre_trans(g.sqdiff->y());
+
+  auto sqdiff_succs = loco::succs(g.sqdiff);
+  EXPECT_EQ(1, sqdiff_succs.size());
+  check_post_trans(*sqdiff_succs.begin());
+}
+
+TEST(ConvertNCHWToNHWC, Sub)
+{
+  SubGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.sub->x());
+
+  auto add_succs = loco::succs(g.sub);
+  EXPECT_EQ(1, add_succs.size());
+  check_post_trans(*add_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.sub->y());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(4, new_beta->rank());
+  EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(1, new_beta->dim(1).value());
+  EXPECT_EQ(1, new_beta->dim(2).value());
+  EXPECT_EQ(channel_size, new_beta->dim(3).value());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Sub_NCHW_const)
+{
+  SubGraph g;
+  g.init();
+  g.update_const_shape_to_nchw();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.sub->x());
+
+  auto sub_succs = loco::succs(g.sub);
+  EXPECT_EQ(1, sub_succs.size());
+  check_post_trans(*sub_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.sub->y());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(4, new_beta->rank());
+  EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(4, new_beta->dim(1).value());
+  EXPECT_EQ(4, new_beta->dim(2).value());
+  EXPECT_EQ(channel_size, new_beta->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, SubScalar)
+{
+  SubScalarGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
+
+  check_pre_trans(g.sub->y());
+
+  auto add_succs = loco::succs(g.sub);
+  EXPECT_EQ(1, add_succs.size());
+  check_post_trans(*add_succs.begin());
+
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.sub->x());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(1, new_beta->rank());
+
+  check_pre_trans(g.output->from());
+}
+
+TEST(ConvertNCHWToNHWC, Not_Closed_Case1_NEG)
+{
+  NoPostReshapeGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.relu->features());
+
+  auto relu_succs = loco::succs(g.relu);
+  EXPECT_EQ(1, relu_succs.size());
+  check_post_trans(*relu_succs.begin());
+}
+
+TEST(ConvertNCHWToNHWC, Not_Closed_Case2_NEG)
+{
+  ReluNotClosedGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.relu->features());
+
+  auto relu_succs = loco::succs(g.relu);
+  EXPECT_EQ(1, relu_succs.size());
+  check_post_trans(*relu_succs.begin());
+}
diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
new file mode 100644
index 000000000..ae5ab1519
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include "luci/Pass/QuantizationParameters.h"
+
+#include "QuantizationUtils.h"
+
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+namespace
+{
+
+// Create Quantize Op whose dtype/shape/qparam are the same with node
+luci::CircleQuantize *create_quantize(luci::CircleNode *node)
+{
+  auto quantize = node->graph()->nodes()->create<luci::CircleQuantize>();
+  // DESIGN NOTE: Why use '_FQ_Quantize' instead of '_Quantize'?
+  // '_Quantize' is used in mixed-precision quantization
+  // We add '_FQ' to distinguish Op from mixed-precision quantization
+  quantize->name(node->name() + "_FQ_Quantize");
+  quantize->dtype(node->dtype());
+  quantize->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    quantize->dim(i).set(node->dim(i).value());
+
+  quantize->shape_status(luci::ShapeStatus::VALID);
+
+  copy_quantparam(node, quantize);
+
+  luci::add_origin(quantize, luci::get_origin(node));
+
+  return quantize;
+}
+
+// Create Dequantize Op whose shape is the same with node
+luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
+{
+  auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
+  // DESIGN NOTE: Why use '_FQ_Dequantize' instead of '_Dequantize'?
+  // '_Dequantize' is used in mixed-precision quantization
+  // We add '_FQ' to distinguish Op from mixed-precision quantization
+  dequantize->name(node->name() + "_FQ_Dequantize");
+  dequantize->dtype(loco::DataType::FLOAT32);
+  dequantize->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    dequantize->dim(i).set(node->dim(i).value());
+
+  dequantize->shape_status(luci::ShapeStatus::VALID);
+
+  luci::add_origin(dequantize, luci::get_origin(node));
+
+  return dequantize;
+}
+
+// Return true if node is quantized activation
+// 1. dtype is u8 or s16
+// 2. node has qparam
+bool is_quant_act(const luci::CircleNode *node)
+{
+  if (node->dtype() != loco::DataType::U8 and node->dtype() != loco::DataType::S16)
+    return false;
+
+  if (not node->quantparam())
+    return false;
+
+  return true;
+}
+
+// Return true if node is quantized const
+// 1. dtype is not fp32
+// 2. node has qparam
+// NOTE Quantized const can have the following types
+// u8 (weights, activation), s16 (weights, activation), s32 (bias), s64 (bias)
+bool is_quant_const(const luci::CircleConst *node)
+{
+  if (node->dtype() == loco::DataType::FLOAT32)
+    return false;
+
+  if (not node->quantparam())
+    return false;
+
+  return true;
+}
+
+// Insert dequantize Op after node
+void insert_dequantize(loco::Node *lnode)
+{
+  auto node = loco::must_cast<luci::CircleNode *>(lnode);
+  auto dequant = create_dequantize(node);
+  loco::replace(node).with(dequant);
+  dequant->input(node);
+}
+
+// Insert quantize Op after node and return the quantize Op
+luci::CircleQuantize *insert_quantize(loco::Node *lnode)
+{
+  auto node = loco::must_cast<luci::CircleNode *>(lnode);
+  auto quant = create_quantize(node);
+  loco::replace(node).with(quant);
+  quant->input(node);
+  return quant;
+}
+
+// Dequantize node
+void dequantize(luci::CircleNode *node)
+{
+  node->dtype(loco::DataType::FLOAT32);
+  node->quantparam(nullptr);
+}
+
+// Do fake quantization on quantized activation
+// 1. Insert Quantize-Dequantize Ops
+// 2. Update dtype/quantparam of node
+void fq_activation(luci::CircleNode *node)
+{
+  if (not is_quant_act(node))
+    return;
+
+  auto quant = insert_quantize(node);
+  insert_dequantize(quant);
+
+  dequantize(node);
+}
+
+#define RETURN_UNLESS(COND) \
+  if (not(COND))            \
+    return;
+
+// Visitor to do fake quantization for each Op
+// For non-const activation, insert Quantize-Dequantize after the ofm
+// For quantized const, insert Dequantize after the const
+struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void>
+{
+  void visit(luci::CircleNode *node)
+  {
+    throw std::runtime_error("Unsupported op for fake quantization in " + node->name());
+  }
+
+  void visit(luci::CircleInput *node)
+  {
+    RETURN_UNLESS(is_quant_act(node));
+
+    auto quant = insert_quantize(node);
+    insert_dequantize(quant);
+
+    dequantize(node);
+
+    // Update graph input
+    const auto inputs = node->graph()->inputs();
+    auto graph_input = inputs->at(node->index());
+    graph_input->dtype(loco::DataType::FLOAT32);
+  }
+
+  void visit(luci::CircleOutput *node)
+  {
+    RETURN_UNLESS(is_quant_act(node));
+
+    dequantize(node);
+
+    // Update graph output
+    const auto outputs = node->graph()->outputs();
+    auto graph_output = outputs->at(node->index());
+    graph_output->dtype(loco::DataType::FLOAT32);
+  }
+
+  // For quantized const, insert Dequantize Op
+  void visit(luci::CircleConst *node)
+  {
+    RETURN_UNLESS(is_quant_const(node));
+
+    insert_dequantize(node);
+  }
+
+  // For non-const activation, insert Quantize-Dequantize Ops
+  // and dequantize the node
+  void visit(luci::CircleAbs *node) { fq_activation(node); }
+  void visit(luci::CircleAdd *node) { fq_activation(node); }
+  void visit(luci::CircleAveragePool2D *node) { fq_activation(node); }
+  void visit(luci::CircleBatchMatMul *node) { fq_activation(node); }
+  void visit(luci::CircleConv2D *node) { fq_activation(node); }
+  void visit(luci::CircleDepthwiseConv2D *node) { fq_activation(node); }
+  void visit(luci::CircleDiv *node) { fq_activation(node); }
+  void visit(luci::CircleFullyConnected *node) { fq_activation(node); }
+  void visit(luci::CircleGelu *node) { fq_activation(node); }
+  void visit(luci::CircleInstanceNorm *node) { fq_activation(node); }
+  void visit(luci::CircleLeakyRelu *node) { fq_activation(node); }
+  void visit(luci::CircleLogistic *node) { fq_activation(node); }
+  void visit(luci::CircleLogSoftmax *node) { fq_activation(node); }
+  void visit(luci::CircleMaxPool2D *node) { fq_activation(node); }
+  void visit(luci::CircleMul *node) { fq_activation(node); }
+  void visit(luci::CircleNeg *node) { fq_activation(node); }
+  void visit(luci::CirclePad *node) { fq_activation(node); }
+  void visit(luci::CirclePRelu *node) { fq_activation(node); }
+  void visit(luci::CircleMean *node) { fq_activation(node); }
+  void visit(luci::CircleReduceProd *node) { fq_activation(node); }
+  void visit(luci::CircleReduceMax *node) { fq_activation(node); }
+  void visit(luci::CircleRelu *node) { fq_activation(node); }
+  void visit(luci::CircleRelu6 *node) { fq_activation(node); }
+  void visit(luci::CircleResizeBilinear *node) { fq_activation(node); }
+  void visit(luci::CircleResizeNearestNeighbor *node) { fq_activation(node); }
+  void visit(luci::CircleRsqrt *node) { fq_activation(node); }
+  void visit(luci::CircleSoftmax *node) { fq_activation(node); }
+  void visit(luci::CircleSqrt *node) { fq_activation(node); }
+  void visit(luci::CircleSquaredDifference *node) { fq_activation(node); }
+  void visit(luci::CircleSub *node) { fq_activation(node); }
+  void visit(luci::CircleSum *node) { fq_activation(node); }
+  void visit(luci::CircleTanh *node) { fq_activation(node); }
+  void visit(luci::CircleTransposeConv *node) { fq_activation(node); }
+
+  // For Ops that do not change the value of input, do nothing
+  // (dtype will be automatically updated by type inference)
+  void visit(luci::CircleCast *) {}
+  void visit(luci::CircleConcatenation *) {}
+  void visit(luci::CircleDepthToSpace *) {}
+  void visit(luci::CircleGather *) {}
+  void visit(luci::CircleSlice *) {}
+  void visit(luci::CircleStridedSlice *) {}
+  void visit(luci::CircleReshape *) {}
+  void visit(luci::CircleSpaceToDepth *) {}
+  void visit(luci::CircleSplit *) {}
+  void visit(luci::CircleSplitOut *) {}
+  void visit(luci::CircleSplitV *) {}
+  void visit(luci::CircleSplitVOut *) {}
+  void visit(luci::CircleTranspose *) {}
+  void visit(luci::CirclePack *) {}
+  void visit(luci::CircleUnpack *) {}
+  void visit(luci::CircleUnpackOut *) {}
+
+  // For Ops that return index, fake quantization is unnecessary
+  void visit(luci::CircleArgMax *) {}
+
+  // Virtual node
+  void visit(luci::CircleOutputExclude *) {}
+
+  void visit(luci::CircleQuantize *node)
+  {
+    RETURN_UNLESS(is_quant_act(node));
+
+    insert_dequantize(node);
+  }
+
+  // Dequantize Op does nothing in fp32 model
+  void visit(luci::CircleDequantize *) {}
+};
+
+#undef RETURN_UNLESS
+
+} // namespace
+
+namespace luci
+{
+
+bool ConvertToFakeQuantizedModelPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    INFO(l) << "ConvertToFakeQuantizedModelPass visit node: " << circle_node->name() << std::endl;
+
+    FakeQuantize fq;
+    circle_node->accept(&fq);
+  }
+
+  // One time run
+  return false;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp
new file mode 100644
index 000000000..560d68a74
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+// Check the below pattern
+// Quantize (scale, zp) -> Dequantize (node)
+void check_q_dq(loco::Node *node, float scale, int64_t zp)
+{
+  auto dequant = dynamic_cast<luci::CircleDequantize *>(node);
+  EXPECT_TRUE(dequant != nullptr);
+  auto quant = dynamic_cast<luci::CircleQuantize *>(dequant->input());
+  EXPECT_TRUE(quant != nullptr);
+  auto qparam = quant->quantparam();
+  EXPECT_EQ(scale, qparam->scale[0]);
+  EXPECT_EQ(zp, qparam->zerop[0]);
+}
+
+// Check the below pattern
+// Dequantize (node)
+void check_dq(loco::Node *node)
+{
+  auto dequant = dynamic_cast<luci::CircleDequantize *>(node);
+  EXPECT_TRUE(dequant != nullptr);
+}
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  {
+    qparam->scale.push_back(scale);
+    qparam->zerop.push_back(zp);
+  }
+  node->quantparam(std::move(qparam));
+}
+
+/**
+ *  SimpleGraph for testing
+ *  - Child class should implement insertGraphBody()
+ *
+ *  Example (U8ConvGraph inherits SimpleGraph and create Conv2D Op)
+ *
+ *  BEFORE
+ *  - A model is quantized (ex: u8)
+ *
+ *  [Input(u8)] [Filter(u8)] [Bias(s32)]
+ *           \       |        /
+ *            \      |       /
+ *             \     |      /
+ *              [Conv2D(u8)]
+ *                   |
+ *              [Output(u8)]
+ *
+ *  AFTER
+ *  - Ops are converted to fp32
+ *  - Quantize/Dequantize Ops are inserted properly
+ *    - Q-DQ is inserted after non-const activation
+ *    - DQ is inserted after const
+ *
+ *  [Input(u8)]
+ *        |
+ *  [Quant(u8)]     [Filter(u8)]       [Bias(s32)]
+ *        |              |                 |
+ *  [Dequant(fp32)] [Dequant(fp32)] [Dequant(fp32)]
+ *             \         |          /
+ *              \        |         /
+ *               \       |        /
+ *                 [Conv2D(fp32)]
+ *                       |
+ *                  [Quant(u8)]
+ *                       |
+ *                 [Dequant(fp32)]
+ *                       |
+ *                  [Output(fp32)]
+ */
+template <loco::DataType T> class SimpleGraph
+{
+public:
+  void init()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    output = g.nodes()->create<luci::CircleOutput>();
+    input->name("input");
+    output->name("output");
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    graph_input->dtype(T);
+    input->dtype(T);
+    output->dtype(T);
+    graph_output->dtype(T);
+
+    graph_input->shape({1, 4, 4, 4});
+    input->shape({1, 4, 4, 4});
+    output->shape({1, 4, 4, 4});
+    graph_output->shape({1, 4, 4, 4});
+
+    set_qparam(input, 1.0, 0);
+    set_qparam(output, 1.0, 0);
+
+    auto graph_body = insertGraphBody(input);
+    output->from(graph_body);
+  }
+
+  virtual ~SimpleGraph() = default;
+
+protected:
+  virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class U8ConvGraph final : public SimpleGraph<loco::DataType::U8>
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    weights = g.nodes()->create<luci::CircleConst>();
+    bias = g.nodes()->create<luci::CircleConst>();
+
+    conv->dtype(loco::DataType::U8);
+    weights->dtype(loco::DataType::U8);
+    bias->dtype(loco::DataType::S32);
+
+    conv->shape({1, 4, 4, 4});
+    weights->shape({4, 1, 1, 4});
+    bias->shape({4});
+
+    weights->size<loco::DataType::U8>(16);
+    for (uint32_t i = 0; i < 16; i++)
+      weights->at<loco::DataType::U8>(i) = i;
+
+    bias->size<loco::DataType::S32>(4);
+    for (uint32_t i = 0; i < 4; i++)
+      bias->at<loco::DataType::S32>(i) = i;
+
+    set_qparam(conv, 2.0, 127);
+    set_qparam(weights, 2.0, 127);
+    set_qparam(bias, 2.0, 127);
+
+    conv->input(input);
+    conv->filter(weights);
+    conv->bias(bias);
+
+    conv->name("conv");
+    weights->name("weights");
+    bias->name("bias");
+
+    return conv;
+  }
+
+public:
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleConst *weights = nullptr;
+  luci::CircleConst *bias = nullptr;
+};
+
+class FP32ConvGraph final : public SimpleGraph<loco::DataType::FLOAT32>
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    weights = g.nodes()->create<luci::CircleConst>();
+    bias = g.nodes()->create<luci::CircleConst>();
+
+    conv->dtype(loco::DataType::FLOAT32);
+    weights->dtype(loco::DataType::FLOAT32);
+    bias->dtype(loco::DataType::FLOAT32);
+
+    conv->shape({1, 4, 4, 4});
+    weights->shape({4, 1, 1, 4});
+    bias->shape({4});
+
+    weights->size<loco::DataType::FLOAT32>(16);
+    for (uint32_t i = 0; i < 16; i++)
+      weights->at<loco::DataType::FLOAT32>(i) = i;
+
+    bias->size<loco::DataType::FLOAT32>(4);
+    for (uint32_t i = 0; i < 4; i++)
+      bias->at<loco::DataType::FLOAT32>(i) = i;
+
+    conv->input(input);
+    conv->filter(weights);
+    conv->bias(bias);
+
+    conv->name("conv");
+    weights->name("weights");
+    bias->name("bias");
+
+    return conv;
+  }
+
+public:
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleConst *weights = nullptr;
+  luci::CircleConst *bias = nullptr;
+};
+
+} // namespace
+
+TEST(ConvertToFakeQuantizedModelTest, U8Conv2D)
+{
+  U8ConvGraph g;
+  g.init();
+
+  luci::ConvertToFakeQuantizedModelPass fq;
+  fq.run(&g.g);
+
+  // Check ifm
+  check_q_dq(g.conv->input(), 1.0, 0);
+
+  // Check weights
+  check_dq(g.conv->filter());
+
+  // Check bias
+  check_dq(g.conv->bias());
+
+  // Check ofm
+  check_q_dq(g.output->from(), 2.0, 127);
+
+  SUCCEED();
+}
+
+TEST(ConvertToFakeQuantizedModelTest, F32Conv2D_NEG)
+{
+  FP32ConvGraph g;
+  g.init();
+
+  luci::ConvertToFakeQuantizedModelPass fq;
+  fq.run(&g.g);
+
+  uint32_t dequant_count = 0;
+  uint32_t quant_count = 0;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(&g.g)))
+  {
+    auto cnode = loco::must_cast<luci::CircleNode *>(node);
+    auto opcode = cnode->opcode();
+    if (opcode == luci::CircleOpcode::DEQUANTIZE)
+      dequant_count++;
+    if (opcode == luci::CircleOpcode::QUANTIZE)
+      quant_count++;
+  }
+
+  // Check no quant/dequant Op is inserted
+  EXPECT_EQ(0, quant_count);
+  EXPECT_EQ(0, dequant_count);
+}
diff --git a/compiler/luci/pass/src/CopyQuantParamPass.cpp b/compiler/luci/pass/src/CopyQuantParamPass.cpp
new file mode 100644
index 000000000..0984fe85c
--- /dev/null
+++ b/compiler/luci/pass/src/CopyQuantParamPass.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CopyQuantParamPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+struct SrcDst
+{
+  CircleNode *src = nullptr;
+  CircleNode *dst = nullptr;
+};
+
+} // namespace
+
+bool CopyQuantParamPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+
+  INFO(l) << "CopyQuantParamPass Start" << std::endl;
+
+  if (_src_tensors.size() != _dst_tensors.size())
+    throw std::runtime_error("The numbers of Source/Destination tensors do not match.");
+
+  // Return src/dst CircleNodes
+  auto get_src_dst = [&g](std::string src, std::string dst) {
+    SrcDst src_dst;
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      auto const cnode = loco::must_cast<CircleNode *>(node);
+      auto const name = cnode->name();
+      if (name == src)
+        src_dst.src = cnode;
+
+      if (name == dst)
+        src_dst.dst = cnode;
+    }
+    return src_dst;
+  };
+
+  for (uint32_t i = 0; i < _src_tensors.size(); i++)
+  {
+    auto &src = _src_tensors[i];
+    auto &dst = _dst_tensors[i];
+
+    auto nodes = get_src_dst(src, dst);
+    if (not nodes.src)
+      throw std::runtime_error("The tensor named " + src + " does not exist.");
+
+    if (not nodes.dst)
+      throw std::runtime_error("The tensor named " + dst + " does not exist.");
+
+    copy_quantparam(nodes.src, nodes.dst);
+
+    INFO(l) << "Quantparam of " << src << " is copied to " << dst << std::endl;
+  }
+
+  INFO(l) << "CopyQuantParamPass End" << std::endl;
+
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/DecomposeHardSwishPass.cpp b/compiler/luci/pass/src/DecomposeHardSwishPass.cpp
new file mode 100644
index 000000000..bd99d2de0
--- /dev/null
+++ b/compiler/luci/pass/src/DecomposeHardSwishPass.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/DecomposeHardSwishPass.h"
+
+#include "helpers/NodeFiller.h"
+#include "helpers/TypeMapper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  BEFORE
+ *        [CircleNode]
+ *              |
+ *              |
+ *      [CircleHardSwish]
+ *              |
+ *              |
+ *        [CircleNode]
+ *
+ *
+ *  AFTER
+ *
+ *      [CircleNode]  [CircleConst]
+ *          |    \       /
+ *          |     \     /
+ *          |   [CircleAdd]
+ *          |        |
+ *          |        |
+ *          \  [CircleRelu6] [CircleConst]
+ *           \        \        /
+ *            \        \      /
+ *             \      [CircleMul]
+ *              \       /
+ *               \     /
+ *             [CircleMul]
+ *                  |
+ *                  |
+ *             [CircleNode]
+ *
+ */
+bool decompose_hardswish(luci::CircleHardSwish *hardswish)
+{
+  if (not hardswish)
+    return false;
+
+  if (hardswish->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  auto g = hardswish->graph();
+
+  auto name = hardswish->name();
+  assert(name.length() > 0);
+
+  // Create a const for CircleAdd operation
+  auto add_const = g->nodes()->create<luci::CircleConst>();
+  add_const->shape({}); // scalar
+  add_const->dtype(loco::DataType::FLOAT32);
+  add_const->rank(0);
+  add_const->size<loco::DataType::FLOAT32>(1);
+  add_const->at<loco::DataType::FLOAT32>(0) = 3.;
+  add_const->name(name + "/Add/const");
+  luci::add_origin(add_const, luci::get_origin(hardswish));
+
+  // Create an Add operation
+  auto add = g->nodes()->create<luci::CircleAdd>();
+  add->fusedActivationFunction(luci::FusedActFunc::NONE);
+  add->x(hardswish->features());
+  add->y(add_const);
+  add->name(name + "/Add");
+  luci::add_origin(add, luci::get_origin(hardswish));
+
+  // Create a Relu6 operation
+  auto relu6 = g->nodes()->create<luci::CircleRelu6>();
+  relu6->features(add);
+  relu6->name(name + "/Relu6");
+  luci::add_origin(relu6, luci::get_origin(hardswish));
+
+  // Create a const for CircleMul operation
+  auto mul_const = g->nodes()->create<luci::CircleConst>();
+  mul_const->shape({}); // scalar
+  mul_const->dtype(loco::DataType::FLOAT32);
+  mul_const->rank(0);
+  mul_const->size<loco::DataType::FLOAT32>(1);
+  mul_const->at<loco::DataType::FLOAT32>(0) = 1. / 6.;
+  mul_const->name(name + "/Mul/const");
+  luci::add_origin(mul_const, luci::get_origin(hardswish));
+
+  // Create first Mul operation
+  auto mul1 = g->nodes()->create<luci::CircleMul>();
+  mul1->fusedActivationFunction(luci::FusedActFunc::NONE);
+  mul1->x(relu6);
+  mul1->y(mul_const);
+  mul1->name(name + "/Mul1");
+  luci::add_origin(mul1, luci::get_origin(hardswish));
+
+  // Create second Mul operation
+  auto mul2 = g->nodes()->create<luci::CircleMul>();
+  mul2->fusedActivationFunction(luci::FusedActFunc::NONE);
+  mul2->x(hardswish->features());
+  mul2->y(mul1);
+  mul2->name(name + "/Mul2");
+  luci::add_origin(mul2, luci::get_origin(hardswish));
+
+  replace(hardswish).with(mul2);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool DecomposeHardSwishPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto hardswish = dynamic_cast<luci::CircleHardSwish *>(node))
+    {
+      if (decompose_hardswish(hardswish))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/DecomposeHardSwishPass.test.cpp b/compiler/luci/pass/src/DecomposeHardSwishPass.test.cpp
new file mode 100644
index 000000000..d51a07fdc
--- /dev/null
+++ b/compiler/luci/pass/src/DecomposeHardSwishPass.test.cpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/DecomposeHardSwishPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  HardSwish graph
+ *
+ *        [CircleInput]
+ *              |
+ *              |
+ *      [CircleHardSwish]
+ *              |
+ *              |
+ *        [CircleOutput]
+ */
+struct HardSwishGraph
+{
+  loco::Graph _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleHardSwish *_hardswish = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+class DecomposeHardSwishPass : public ::testing::Test
+{
+protected:
+  void MakeGraph()
+  {
+    const int N = 1;
+    const int H = 4;
+    const int W = 4;
+    const int C = 3;
+
+    // graph input and output
+    auto graph_input = _hardswish_g._g.inputs()->create();
+    auto graph_output = _hardswish_g._g.outputs()->create();
+
+    // CircleInput
+    _hardswish_g._input = _hardswish_g._g.nodes()->create<luci::CircleInput>();
+    _hardswish_g._input->index(graph_input->index());
+    _hardswish_g._input->shape({N, H, W, C});
+    _hardswish_g._input->dtype(loco::DataType::FLOAT32);
+    _hardswish_g._input->name("input");
+
+    // CircleHardSwish
+    _hardswish_g._hardswish = _hardswish_g._g.nodes()->create<luci::CircleHardSwish>();
+    _hardswish_g._hardswish->features(_hardswish_g._input);
+    _hardswish_g._hardswish->shape({N, H, W, C});
+    _hardswish_g._hardswish->dtype(loco::DataType::FLOAT32);
+    _hardswish_g._hardswish->name("hardswish");
+
+    // CircleOutput
+    _hardswish_g._output = _hardswish_g._g.nodes()->create<luci::CircleOutput>();
+    _hardswish_g._output->index(graph_output->index());
+    _hardswish_g._output->from(_hardswish_g._hardswish);
+    _hardswish_g._output->shape({N, H, W, C});
+    _hardswish_g._output->dtype(loco::DataType::FLOAT32);
+    _hardswish_g._output->name("output");
+  }
+
+  void MakeInt32Graph()
+  {
+    const int N = 1;
+    const int H = 4;
+    const int W = 4;
+    const int C = 3;
+
+    // graph input and output
+    auto graph_input = _hardswish_int32_g._g.inputs()->create();
+    auto graph_output = _hardswish_int32_g._g.outputs()->create();
+
+    // CircleInput
+    _hardswish_int32_g._input = _hardswish_int32_g._g.nodes()->create<luci::CircleInput>();
+    _hardswish_int32_g._input->index(graph_input->index());
+    _hardswish_int32_g._input->shape({N, H, W, C});
+    _hardswish_int32_g._input->dtype(loco::DataType::S32);
+    _hardswish_int32_g._input->name("input");
+
+    // CircleHardSwish
+    _hardswish_int32_g._hardswish = _hardswish_int32_g._g.nodes()->create<luci::CircleHardSwish>();
+    _hardswish_int32_g._hardswish->features(_hardswish_int32_g._input);
+    _hardswish_int32_g._hardswish->shape({N, H, W, C});
+    _hardswish_int32_g._hardswish->dtype(loco::DataType::S32);
+    _hardswish_int32_g._hardswish->name("hardswish");
+
+    // CircleOutput
+    _hardswish_int32_g._output = _hardswish_int32_g._g.nodes()->create<luci::CircleOutput>();
+    _hardswish_int32_g._output->index(graph_output->index());
+    _hardswish_int32_g._output->from(_hardswish_int32_g._hardswish);
+    _hardswish_int32_g._output->shape({N, H, W, C});
+    _hardswish_int32_g._output->dtype(loco::DataType::S32);
+    _hardswish_int32_g._output->name("output");
+  }
+
+  virtual void SetUp()
+  {
+    MakeGraph();
+    MakeInt32Graph();
+  }
+
+protected:
+  luci::DecomposeHardSwishPass _pass;
+  HardSwishGraph _hardswish_g;
+  HardSwishGraph _hardswish_int32_g;
+};
+
+} // namespace
+
+TEST_F(DecomposeHardSwishPass, name)
+{
+  auto const name = _pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+/**
+ *  Decomposed graph looks like below.
+ *
+ *      [CircleInput]  [CircleConst]
+ *          |    \       /
+ *          |     \     /
+ *          |   [CircleAdd]
+ *          |        |
+ *          |        |
+ *          \  [CircleRelu6] [CircleConst]
+ *           \        \        /
+ *            \        \      /
+ *             \      [CircleMul]
+ *              \       /
+ *               \     /
+ *             [CircleMul]
+ *                  |
+ *                  |
+ *             [CircleOutput]
+ *
+ */
+TEST_F(DecomposeHardSwishPass, simple_test)
+{
+  auto ret = _pass.run(&_hardswish_g._g);
+  EXPECT_TRUE(ret);
+
+  auto mul2 = dynamic_cast<luci::CircleMul *>(_hardswish_g._output->from());
+  EXPECT_NE(nullptr, mul2);
+
+  auto input2 = dynamic_cast<luci::CircleInput *>(mul2->x());
+  EXPECT_NE(nullptr, input2);
+
+  auto mul1 = dynamic_cast<luci::CircleMul *>(mul2->y());
+  EXPECT_NE(nullptr, mul1);
+
+  auto relu6 = dynamic_cast<luci::CircleRelu6 *>(mul1->x());
+  EXPECT_NE(nullptr, relu6);
+
+  auto mul_const = dynamic_cast<luci::CircleConst *>(mul1->y());
+  EXPECT_NE(nullptr, mul_const);
+  EXPECT_FLOAT_EQ(1. / 6., mul_const->at<loco::DataType::FLOAT32>(0));
+
+  auto add = dynamic_cast<luci::CircleAdd *>(relu6->features());
+  EXPECT_NE(nullptr, add);
+
+  auto input1 = dynamic_cast<luci::CircleInput *>(add->x());
+  EXPECT_NE(nullptr, input1);
+
+  auto add_const = dynamic_cast<luci::CircleConst *>(add->y());
+  EXPECT_NE(nullptr, add_const);
+  EXPECT_FLOAT_EQ(3., add_const->at<loco::DataType::FLOAT32>(0));
+}
+
+TEST_F(DecomposeHardSwishPass, check_last_node)
+{
+  auto ret = _pass.run(&_hardswish_g._g);
+  EXPECT_TRUE(ret);
+
+  auto hardswish = dynamic_cast<luci::CircleHardSwish *>(_hardswish_g._output->from());
+  EXPECT_EQ(nullptr, hardswish);
+}
+
+TEST_F(DecomposeHardSwishPass, wrong_condition_NEG)
+{
+  auto ret = _pass.run(&_hardswish_int32_g._g);
+  EXPECT_FALSE(ret);
+
+  auto hardswish = dynamic_cast<luci::CircleHardSwish *>(_hardswish_g._output->from());
+  EXPECT_NE(nullptr, hardswish);
+}
diff --git a/compiler/luci/pass/src/DynamicBatchToSingleBatch.cpp b/compiler/luci/pass/src/DynamicBatchToSingleBatch.cpp
new file mode 100644
index 000000000..86876063a
--- /dev/null
+++ b/compiler/luci/pass/src/DynamicBatchToSingleBatch.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/DynamicBatchToSingleBatch.h"
+
+#include "luci/Pass/DynamicBatchToSingleBatchPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include "ProgressReporter.h"
+
+#include <logo/Phase.h>
+
+namespace luci
+{
+
+void dynamic_batch_to_single_batch(luci::Module *m)
+{
+  assert(m); // FIX CALLER UNLESS
+
+  for (uint32_t i = 0; i < m->size(); i++)
+  {
+    auto g = m->graph(i);
+
+    logo::Phase phase;
+
+    phase.emplace_back(std::make_unique<luci::DynamicBatchToSingleBatchPass>());
+
+    // Needed to infer shapes of other nodes
+    phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+    ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+    logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+    phase_runner.attach(&prog);
+    phase_runner.run(phase);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/DynamicBatchToSingleBatchPass.cpp b/compiler/luci/pass/src/DynamicBatchToSingleBatchPass.cpp
new file mode 100644
index 000000000..59a9f5ab3
--- /dev/null
+++ b/compiler/luci/pass/src/DynamicBatchToSingleBatchPass.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/DynamicBatchToSingleBatchPass.h"
+
+#include <luci/IR/CircleNode.h>
+#include <loco.h>
+
+namespace luci
+{
+
+bool DynamicBatchToSingleBatchPass::run(loco::Graph *g)
+{
+  assert(g); // FIX CALLER UNLESS
+
+  bool changed = false;
+
+  auto graph_inputs = g->inputs();
+
+  // Assume the first dimension is batch dimension
+  const uint32_t BATCH_DIM = 0;
+
+  for (auto node : loco::input_nodes(g))
+  {
+    auto input_node = loco::must_cast<luci::CircleInput *>(node);
+
+    if (input_node->rank() == 0)
+      continue;
+
+    // Skip if batch dimension is known
+    if (input_node->dim(BATCH_DIM).known())
+      continue;
+
+    if (input_node->rank() != 4)
+    {
+      // Limit use only for rank 4 inputs (for NHWC and NCHW)
+      // TODO Enable this if necessary
+      throw std::runtime_error("First dimension of input is unknown, but its rank is not 4.");
+    }
+
+    // 'set' will make the dimension known
+    input_node->dim(BATCH_DIM).set(1);
+
+    // Update graph input
+    auto graph_input = graph_inputs->at(input_node->index());
+    auto graph_input_shape = graph_input->shape();
+    auto tensor_shape = std::make_unique<loco::TensorShape>();
+    {
+      tensor_shape->rank(graph_input_shape->rank());
+      for (uint32_t i = 0; i < tensor_shape->rank(); i++)
+      {
+        tensor_shape->dim(i) = graph_input_shape->dim(i);
+      }
+      tensor_shape->dim(BATCH_DIM).set(1);
+    }
+
+    graph_input->shape(std::move(tensor_shape));
+
+    changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/DynamicBatchToSingleBatchPass.test.cpp b/compiler/luci/pass/src/DynamicBatchToSingleBatchPass.test.cpp
new file mode 100644
index 000000000..f19f57d17
--- /dev/null
+++ b/compiler/luci/pass/src/DynamicBatchToSingleBatchPass.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/DynamicBatchToSingleBatchPass.h"
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+std::unique_ptr<loco::TensorShape> make_tshape(std::initializer_list<uint32_t> dims)
+{
+  auto tensor_shape = std::make_unique<loco::TensorShape>();
+  {
+    tensor_shape->rank(dims.size());
+    uint32_t axis = 0;
+    for (auto it = dims.begin(); it != dims.end(); ++it)
+    {
+      tensor_shape->dim(axis++) = *it;
+    }
+  }
+
+  return std::move(tensor_shape);
+}
+
+} // namespace
+
+TEST(DynamicBatchToSingleBatchPassTest, simple)
+{
+  luci::DynamicBatchToSingleBatchPass pass;
+
+  auto g = loco::make_graph();
+
+  auto graph_input = g->inputs()->create();
+  {
+    auto tensor_shape = make_tshape({1, 5, 5, 3});
+    tensor_shape->dim(0).unset();
+    graph_input->shape(std::move(tensor_shape));
+  }
+
+  // Create nodes to make relu traversed first
+  auto input = g->nodes()->create<luci::CircleInput>();
+  {
+    input->index(0);
+    input->shape({1, 5, 5, 3});
+    input->dim(0).unset();
+  }
+
+  EXPECT_FALSE(graph_input->shape()->dim(0).known());
+  EXPECT_FALSE(input->dim(0).known());
+
+  EXPECT_TRUE(pass.run(g.get()));
+
+  // Check input is knwon
+  EXPECT_TRUE(graph_input->shape()->dim(0).known());
+  EXPECT_EQ(1, graph_input->shape()->dim(0));
+  EXPECT_TRUE(input->dim(0).known());
+  EXPECT_EQ(1, input->dim(0));
+}
+
+TEST(DynamicBatchToSingleBatchPassTest, simple_NEG)
+{
+  luci::DynamicBatchToSingleBatchPass pass;
+
+  auto g = loco::make_graph();
+
+  auto graph_input = g->inputs()->create();
+  {
+    graph_input->shape({1, 5, 5, 3});
+  }
+
+  // Create nodes to make relu traversed first
+  auto input = g->nodes()->create<luci::CircleInput>();
+  {
+    input->index(0);
+    input->shape({1, 5, 5, 3});
+  }
+
+  EXPECT_FALSE(pass.run(g.get()));
+}
+
+// Remove this test if we support rank 1 in this pass
+TEST(DynamicBatchToSingleBatchPassTest, rank1_NEG)
+{
+  luci::DynamicBatchToSingleBatchPass pass;
+
+  auto g = loco::make_graph();
+
+  auto graph_input = g->inputs()->create();
+  {
+    auto tensor_shape = make_tshape({1});
+    tensor_shape->dim(0).unset();
+    graph_input->shape(std::move(tensor_shape));
+  }
+
+  // Create nodes to make relu traversed first
+  auto input = g->nodes()->create<luci::CircleInput>();
+  {
+    input->index(0);
+    input->shape({1});
+    input->dim(0).unset();
+  }
+
+  EXPECT_FALSE(graph_input->shape()->dim(0).known());
+  EXPECT_FALSE(input->dim(0).known());
+
+  // Rank 1 is unsupported for now
+  EXPECT_ANY_THROW(pass.run(g.get()));
+}
diff --git a/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp b/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp
new file mode 100644
index 000000000..25fb9f171
--- /dev/null
+++ b/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ExpandBroadcastConstPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+#include <type_traits>
+
+namespace
+{
+
+luci::CircleConst *create_expanded_constant(luci::CircleConst *node, luci::CircleNode *successor)
+{
+  LOGGER(l);
+
+  if (successor->rank() != node->rank())
+    return nullptr;
+
+  std::vector<uint32_t> broadcast_dims;
+  for (uint32_t dim = 0; dim < node->rank(); ++dim)
+  {
+    if (node->dim(dim) == successor->dim(dim))
+      continue;
+
+    if (node->dim(dim) == 1)
+      broadcast_dims.push_back(dim);
+  }
+
+  if (broadcast_dims.size() != 1 || broadcast_dims.back() != node->rank() - 1)
+  {
+    WARN(l) << "NYI: Only depth broadcast removal is supported";
+    return nullptr;
+  }
+
+  auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+  constant->name(node->name());
+  constant->dtype(node->dtype());
+  constant->rank(node->rank());
+  constant->shape_status(luci::ShapeStatus::VALID);
+
+  uint32_t node_size = node->size<loco::DataType::FLOAT32>();
+  uint32_t constant_size = 1;
+  for (uint32_t i = 0; i < successor->rank(); ++i)
+  {
+    constant->dim(i).set(successor->dim(i).value());
+    constant_size *= constant->dim(i).value();
+  }
+  constant->size<loco::DataType::FLOAT32>(constant_size);
+
+  auto const node_data = &node->at<loco::DataType::FLOAT32>(0);
+  auto const constant_data = &constant->at<loco::DataType::FLOAT32>(0);
+
+  auto const successor_depth = successor->dim(successor->rank() - 1).value();
+  for (uint32_t d = 0; d < successor_depth; ++d)
+    std::copy(node_data, node_data + node_size, constant_data + d * node_size);
+
+  return constant;
+}
+
+template <typename N> bool expand_node_input(luci::CircleConst *node, luci::CircleNode *successor)
+{
+  static_assert(std::is_base_of<luci::CircleNode, N>::value,
+                "Successor node should have CircleNode base");
+
+  auto const successor_node = loco::must_cast<N *>(successor);
+  auto const successor_x = loco::must_cast<luci::CircleNode *>(successor_node->x());
+  auto const successor_y = loco::must_cast<luci::CircleNode *>(successor_node->y());
+
+  luci::CircleConst *expanded_const;
+
+  if (node == successor_x)
+  {
+    expanded_const = create_expanded_constant(node, successor_y);
+
+    if (expanded_const == nullptr)
+      return false;
+
+    successor_node->x(expanded_const);
+  }
+  else if (node == successor_y)
+  {
+    expanded_const = create_expanded_constant(node, successor_x);
+
+    if (expanded_const == nullptr)
+      return false;
+
+    successor_node->y(expanded_const);
+  }
+
+  return true;
+}
+
+/**
+ * Expand constants following broadcasting rules for binary input nodes (Add, Mul, etc.)
+ *
+ *    BEFORE
+ *
+ *    [CircleInput] [CircleConst (H x W x 1)]
+ *               |     |
+ *             [CircleAdd]
+ *
+ *    AFTER
+ *
+ *    [CircleInput] [CircleConst (H x W x D)]
+ *               |     |
+ *             [CircleAdd]
+ */
+bool expand_broadcast_const(luci::CircleConst *node)
+{
+  if (node->dtype() != loco::DataType::FLOAT32)
+    return false; // Unsupported data type
+
+  bool changed = false;
+
+  for (auto successor : loco::succs(node))
+  {
+    auto const circle_successor = loco::must_cast<luci::CircleNode *>(successor);
+    switch (circle_successor->opcode())
+    {
+      case luci::CircleOpcode::ADD:
+        if (expand_node_input<luci::CircleAdd>(node, circle_successor))
+          changed = true;
+        break;
+      case luci::CircleOpcode::MUL:
+        if (expand_node_input<luci::CircleMul>(node, circle_successor))
+          changed = true;
+        break;
+      case luci::CircleOpcode::DIV:
+        if (expand_node_input<luci::CircleDiv>(node, circle_successor))
+          changed = true;
+        break;
+      default:
+        break; // Unsupported successor node
+    }
+  }
+
+  return changed;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Broadcast expanding for Const nodes
+ **/
+bool ExpandBroadcastConstPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto const_node = dynamic_cast<luci::CircleConst *>(node);
+    if (const_node == nullptr)
+      continue;
+
+    if (expand_broadcast_const(const_node))
+      changed = true;
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp b/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp
new file mode 100644
index 000000000..5df1b72dc
--- /dev/null
+++ b/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ExpandBroadcastConstPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <limits> // std::numeric_limits
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class ExpandBroadcastConstTest : public ::testing::Test
+{
+public:
+  ExpandBroadcastConstTest()
+  {
+    _x = _g.nodes()->create<luci::CircleInput>();
+    _y = _g.nodes()->create<luci::CircleConst>();
+    _add = _g.nodes()->create<luci::CircleAdd>();
+    _output = _g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = _g.inputs()->create();
+    graph_input->dtype(loco::DataType::FLOAT32);
+    graph_input->shape({1, H, W, D});
+    _x->index(graph_input->index());
+    _x->dtype(graph_input->dtype());
+    _x->shape({1, H, W, D});
+
+    auto graph_output = _g.outputs()->create();
+    graph_output->dtype(loco::DataType::FLOAT32);
+    graph_output->shape({1, H, W, D});
+    _output->index(graph_output->index());
+    _output->dtype(graph_output->dtype());
+    _output->shape({1, H, W, D});
+
+    _y->dtype(loco::DataType::FLOAT32);
+    _y->shape({1, H, W, 1});
+    _y->size<loco::DataType::FLOAT32>(16);
+
+    _add->dtype(loco::DataType::FLOAT32);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _add->x(_x);
+    _add->y(_y);
+    _add->shape({1, H, W, D});
+
+    _output->from(_add);
+
+    _x->name("input");
+    _output->name("output");
+  }
+
+protected:
+  uint32_t const H = 4;
+  uint32_t const W = 4;
+  uint32_t const D = 3;
+
+protected:
+  loco::Graph _g;
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleInput *_x = nullptr;
+  luci::CircleConst *_y = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+} // namespace
+
+TEST_F(ExpandBroadcastConstTest, name)
+{
+  luci::ExpandBroadcastConstPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(ExpandBroadcastConstTest, remove_broadcast)
+{
+  for (uint32_t i = 0; i < H * W; ++i)
+    _y->at<loco::DataType::FLOAT32>(i) = static_cast<float>(i);
+
+  luci::ExpandBroadcastConstPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto broadcasted_const = dynamic_cast<luci::CircleConst *>(_add->y());
+  ASSERT_NE(broadcasted_const, nullptr);
+
+  EXPECT_EQ(broadcasted_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_EQ(broadcasted_const->dim(1).value(), H);
+  EXPECT_EQ(broadcasted_const->dim(2).value(), W);
+  EXPECT_EQ(broadcasted_const->dim(3).value(), D);
+  EXPECT_EQ(broadcasted_const->size<loco::DataType::FLOAT32>(), H * W * D);
+
+  for (uint32_t i = 0; i < H * W; ++i)
+  {
+    for (uint32_t d = 0; d < D; ++d)
+    {
+      EXPECT_NEAR(broadcasted_const->at<loco::DataType::FLOAT32>(i + H * W * d),
+                  static_cast<float>(i), std::numeric_limits<float>::min());
+    }
+  }
+}
+
+TEST_F(ExpandBroadcastConstTest, remove_broadcast_multiple_successors)
+{
+  auto const circle_sqrt = _g.nodes()->create<luci::CircleSqrt>();
+  circle_sqrt->dtype(loco::DataType::FLOAT32);
+  circle_sqrt->shape({1, H, W, 1});
+  circle_sqrt->x(_y);
+
+  luci::ExpandBroadcastConstPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto broadcasted_const = dynamic_cast<luci::CircleConst *>(_add->y());
+  auto original_const = dynamic_cast<luci::CircleConst *>(circle_sqrt->x());
+
+  ASSERT_NE(broadcasted_const, nullptr);
+  EXPECT_EQ(broadcasted_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_EQ(broadcasted_const->dim(3).value(), D);
+  EXPECT_EQ(broadcasted_const->size<loco::DataType::FLOAT32>(), H * W * D);
+
+  // Check if another successor's node was left intact
+  ASSERT_NE(original_const, nullptr);
+  EXPECT_EQ(original_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_EQ(original_const->dim(3).value(), 1);
+  EXPECT_EQ(original_const->size<loco::DataType::FLOAT32>(), H * W * 1);
+}
+
+TEST_F(ExpandBroadcastConstTest, broadcast_impossible_NEG)
+{
+  _y->shape({1, H, W, 2});
+  _y->size<loco::DataType::FLOAT32>(H * W * (D - 1));
+
+  luci::ExpandBroadcastConstPass pass;
+  ASSERT_FALSE(pass.run(&_g));
+}
diff --git a/compiler/luci/pass/src/FoldAddV2Pass.cpp b/compiler/luci/pass/src/FoldAddV2Pass.cpp
new file mode 100644
index 000000000..20c1022f8
--- /dev/null
+++ b/compiler/luci/pass/src/FoldAddV2Pass.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldAddV2Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <iostream>
+
+namespace
+{
+
+bool same_shape(const luci::CircleConst *x, const luci::CircleConst *y)
+{
+  if (x->rank() != y->rank())
+    return false;
+
+  for (uint32_t i = 0; i < x->rank(); i++)
+  {
+    if (!(x->dim(i) == y->dim(i)))
+      return false;
+  }
+
+  return true;
+}
+
+/**
+ * Fold AddV2 to const if both inputs are const
+ **/
+template <loco::DataType T> bool fold_add_v2(luci::CircleCustom *add_v2)
+{
+  // This should hold for AddV2
+  if (add_v2->numInputs() != 2)
+    return false;
+
+  // Check first input is const
+  auto x = dynamic_cast<luci::CircleConst *>(add_v2->inputs(0));
+  if (not x)
+    return false;
+
+  // Check second input is const
+  auto y = dynamic_cast<luci::CircleConst *>(add_v2->inputs(1));
+  if (not y)
+    return false;
+
+  if (x->dtype() != y->dtype())
+    return false;
+
+  if (!same_shape(x, y))
+    return false;
+
+  auto name_x = x->name();
+  auto name_y = y->name();
+  assert(name_x.length() > 0);
+  assert(name_y.length() > 0);
+  auto constant = add_v2->graph()->nodes()->create<luci::CircleConst>();
+  constant->dtype(x->dtype());
+  constant->rank(x->rank());
+  for (uint32_t i = 0; i < x->rank(); i++)
+    constant->dim(i).set(x->dim(i).value());
+
+  const auto size = x->size<T>();
+  constant->size<T>(size);
+  for (uint32_t i = 0; i < size; i++)
+    constant->at<T>(i) = x->at<T>(i) + y->at<T>(i);
+
+  constant->shape_status(luci::ShapeStatus::VALID);
+  constant->name(name_x + ";" + name_y);
+
+  for (auto succ : loco::succs(add_v2))
+  {
+    auto custom_out = loco::must_cast<luci::CircleCustomOut *>(succ);
+    loco::replace(custom_out).with(constant);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for AddV2 Op
+ **/
+bool FoldAddV2Pass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto custom = dynamic_cast<luci::CircleCustom *>(node))
+    {
+      if (custom->custom_code() == "AddV2")
+      {
+        // TODO: Support more data types
+        if (custom->dtype() == loco::DataType::S64)
+        {
+          if (fold_add_v2<loco::DataType::S64>(custom))
+            changed = true;
+        }
+      }
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldAddV2Pass.test.cpp b/compiler/luci/pass/src/FoldAddV2Pass.test.cpp
new file mode 100644
index 000000000..200fcc093
--- /dev/null
+++ b/compiler/luci/pass/src/FoldAddV2Pass.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldAddV2Pass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph has an AddV2 Op with constant inputs
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleCustom (AddV2)]
+ *                 |
+ *         [CircleCustomOut]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+template <loco::DataType T> class FoldAddV2Test : public luci::ConstantFoldingAddTestGraph
+{
+public:
+  FoldAddV2Test(std::initializer_list<uint32_t> shape) : luci::ConstantFoldingAddTestGraph(shape, T)
+  {
+    _addV2 = _g.nodes()->template create<luci::CircleCustom>(2, 1);
+    _x = _g.nodes()->template create<luci::CircleConst>();
+    _y = _g.nodes()->template create<luci::CircleConst>();
+    _addV2_out = _g.nodes()->template create<luci::CircleCustomOut>();
+
+    _addV2->dtype(T);
+    _x->dtype(T);
+    _y->dtype(T);
+    _addV2_out->dtype(T);
+
+    _addV2->shape(shape);
+    _x->shape(shape);
+    _y->shape(shape);
+    _addV2_out->shape(shape);
+
+    uint32_t num_elems = 1;
+    for (auto dim = shape.begin(); dim != shape.end(); dim++)
+      num_elems *= *dim;
+
+    _x->size<T>(num_elems);
+    _y->size<T>(num_elems);
+
+    for (uint32_t i = 0; i < num_elems; i++)
+    {
+      _x->at<T>(i) = i + 1;
+      _y->at<T>(i) = i + 1;
+    }
+
+    _addV2->custom_code("AddV2");
+    _addV2->inputs(0, _x);
+    _addV2->inputs(1, _y);
+    _addV2_out->input(_addV2);
+
+    _addV2->name("addV2");
+    _x->name("x");
+    _y->name("y");
+  }
+
+  loco::Node *createFoldedPattern() override { return _addV2_out; }
+
+  virtual ~FoldAddV2Test() = default;
+
+protected:
+  luci::CircleCustom *_addV2 = nullptr;
+  luci::CircleCustomOut *_addV2_out = nullptr;
+  luci::CircleConst *_x = nullptr;
+  luci::CircleConst *_y = nullptr;
+};
+
+class FoldS64AddV2Test : public FoldAddV2Test<loco::DataType::S64>, public ::testing::Test
+{
+public:
+  FoldS64AddV2Test() : FoldAddV2Test<loco::DataType::S64>({3}) {}
+
+  virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldAddV2PassTest, name)
+{
+  luci::FoldAddV2Pass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldS64AddV2Test, fold_addV2)
+{
+  luci::FoldAddV2Pass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Check type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(3, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+  EXPECT_EQ(4, folded_const->at<loco::DataType::S64>(1));
+  EXPECT_EQ(6, folded_const->at<loco::DataType::S64>(2));
+}
+
+TEST_F(FoldS64AddV2Test, input_type_mismatch_NEG)
+{
+  _x->dtype(loco::DataType::S32);
+
+  luci::FoldAddV2Pass pass;
+  EXPECT_FALSE(pass.run(graph()));
+}
diff --git a/compiler/luci/pass/src/FoldCastPass.cpp b/compiler/luci/pass/src/FoldCastPass.cpp
new file mode 100644
index 000000000..00b86fe48
--- /dev/null
+++ b/compiler/luci/pass/src/FoldCastPass.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldCastPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+luci::CircleConst *cast_const(luci::CircleConst *node, loco::DataType from_dtype,
+                              loco::DataType to_dtype)
+{
+  assert(node->dtype() == from_dtype);
+
+  auto name = node->name();
+  assert(name.length() > 0);
+  auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+  constant->dtype(to_dtype);
+  constant->rank(node->rank());
+  uint32_t num_elems = 1;
+  for (uint32_t i = 0; i < node->rank(); i++)
+  {
+    constant->dim(i).set(node->dim(i).value());
+    num_elems *= node->dim(i).value();
+  }
+
+  constant->shape_status(luci::ShapeStatus::VALID);
+
+  // TODO: Support more data types
+  if (from_dtype == loco::DataType::S64)
+  {
+    if (to_dtype == loco::DataType::S32)
+    {
+      constant->size<loco::DataType::S32>(num_elems);
+      for (uint32_t i = 0; i < num_elems; i++)
+        constant->at<loco::DataType::S32>(i) =
+          static_cast<int32_t>(node->at<loco::DataType::S64>(i));
+
+      constant->name(name + "_S32");
+      return constant;
+    }
+    return nullptr;
+  }
+
+  return nullptr;
+}
+
+/**
+ * Fold Cast to const if it has const input
+ **/
+bool fold_cast(luci::CircleCast *cast)
+{
+  // Check cast has const input
+  auto const_x = dynamic_cast<luci::CircleConst *>(cast->x());
+  if (not const_x)
+    return false;
+
+  const auto in_dtype = const_x->dtype();
+  const auto out_dtype = cast->dtype();
+
+  auto casted_const = cast_const(const_x, in_dtype, out_dtype);
+  if (not casted_const)
+    return false;
+
+  loco::replace(cast).with(casted_const);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for Cast Op
+ **/
+bool FoldCastPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto cast = dynamic_cast<luci::CircleCast *>(node))
+    {
+      if (fold_cast(cast))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldCastPass.test.cpp b/compiler/luci/pass/src/FoldCastPass.test.cpp
new file mode 100644
index 000000000..da33e4379
--- /dev/null
+++ b/compiler/luci/pass/src/FoldCastPass.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldCastPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+template <loco::DataType FromT, loco::DataType ToT>
+class FoldCastTest : public luci::ConstantFoldingAddTestGraph
+{
+public:
+  FoldCastTest(std::initializer_list<uint32_t> shape)
+    : luci::ConstantFoldingAddTestGraph(shape, ToT)
+  {
+    _cast = _g.nodes()->template create<luci::CircleCast>();
+    _x = _g.nodes()->template create<luci::CircleConst>();
+
+    _cast->dtype(ToT);
+    _x->dtype(FromT);
+
+    _cast->shape(shape);
+    _x->shape(shape);
+
+    uint32_t num_elems = 1;
+    for (auto dim = shape.begin(); dim != shape.end(); dim++)
+      num_elems *= *dim;
+
+    _x->size<FromT>(num_elems);
+    for (uint32_t i = 0; i < num_elems; i++)
+      _x->at<FromT>(i) = i + 1;
+
+    _cast->x(_x);
+
+    _cast->name("cast");
+    _x->name("x");
+  }
+
+  loco::Node *createFoldedPattern() override { return _cast; }
+
+protected:
+  luci::CircleCast *_cast = nullptr;
+  luci::CircleConst *_x = nullptr;
+};
+
+/**
+ *  Graph that has a Cast Op with constant input
+ *
+ *    BEFORE
+ *
+ *         [CircleConst]
+ *               |
+ *            [Cast]
+ *
+ *    AFTER
+ *
+ *         [CircleConst]
+ *
+ */
+class FoldS64ToS32CastTest : public FoldCastTest<loco::DataType::S64, loco::DataType::S32>,
+                             public ::testing::Test
+{
+public:
+  FoldS64ToS32CastTest() : FoldCastTest<loco::DataType::S64, loco::DataType::S32>({3}) {}
+
+  virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldCastPassTest, name)
+{
+  luci::FoldCastPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldS64ToS32CastTest, fold_cast_s64_to_s32)
+{
+  luci::FoldCastPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Check type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S32, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(3, folded_const->dim(0).value());
+  EXPECT_EQ(1, folded_const->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S32>(1));
+  EXPECT_EQ(3, folded_const->at<loco::DataType::S32>(2));
+}
diff --git a/compiler/luci/pass/src/FoldDensifyPass.cpp b/compiler/luci/pass/src/FoldDensifyPass.cpp
new file mode 100644
index 000000000..5ddc743e5
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDensifyPass.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDensifyPass.h"
+#include "helpers/SparsityFormatConverter.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+bool is_foldable_const(luci::CircleConst *node)
+{
+  if (node->sparsityparam() == nullptr)
+    return false;
+
+  if (node->dtype() == loco::DataType::FLOAT32)
+    return true;
+  if (node->dtype() == loco::DataType::FLOAT16)
+    return true;
+
+  return false;
+}
+
+luci::CircleConst *densified_const_node(luci::CircleConst *const_node)
+{
+  assert(const_node->sparsityparam());
+
+  auto name = const_node->name();
+  assert(name.length() > 0);
+  auto g = const_node->graph();
+  auto new_const_node = g->nodes()->create<luci::CircleConst>();
+
+  new_const_node->dtype(const_node->dtype());
+  new_const_node->rank(const_node->rank());
+
+  uint32_t dim_size = 1;
+  std::vector<int> dense_shape;
+  for (uint32_t i = 0; i < new_const_node->rank(); ++i)
+  {
+    assert(const_node->dim(i).known());
+    new_const_node->dim(i) = const_node->dim(i);
+
+    uint32_t value = const_node->dim(i).value();
+    dim_size *= value;
+    dense_shape.emplace_back(static_cast<int32_t>(value));
+  }
+
+  if (const_node->dtype() == loco::DataType::FLOAT32)
+    new_const_node->size<loco::DataType::FLOAT32>(dim_size);
+  else
+  {
+    assert(const_node->dtype() == loco::DataType::FLOAT16);
+    new_const_node->size<loco::DataType::FLOAT16>(dim_size);
+  }
+
+  new_const_node->shape_status(luci::ShapeStatus::VALID);
+  new_const_node->name(name + "_DS");
+
+  if (const_node->dtype() == loco::DataType::FLOAT32)
+  {
+    auto const_items = const_node->size<loco::DataType::FLOAT32>();
+    auto f_data = std::make_unique<float[]>(const_items);
+    for (size_t i = 0; i < const_items; ++i)
+      f_data[i] = const_node->at<loco::DataType::FLOAT32>(i);
+
+    sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam());
+    sparsity::FormatConverter<float> converter(dense_shape, sp);
+    converter.SparseToDense(f_data.get());
+    const auto &data_dense = converter.GetData();
+    assert(data_dense.size() == dim_size);
+
+    for (uint32_t i = 0; i < dim_size; ++i)
+      new_const_node->at<loco::DataType::FLOAT32>(i) = data_dense[i];
+
+    luci::freeTfLiteSparsity(sp);
+  }
+  else
+  {
+    assert(const_node->dtype() == loco::DataType::FLOAT16);
+
+    auto const_items = const_node->size<loco::DataType::FLOAT16>();
+    auto f_data = std::make_unique<uint16_t[]>(const_items);
+    for (size_t i = 0; i < const_items; ++i)
+      f_data[i] = const_node->at<loco::DataType::FLOAT16>(i);
+
+    // Primitive type for FLOAT16 is UINT16
+    sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam());
+    sparsity::FormatConverter<uint16_t> converter(dense_shape, sp);
+    converter.SparseToDense(f_data.get());
+    const auto &data_dense = converter.GetData();
+    assert(data_dense.size() == dim_size);
+    for (uint32_t i = 0; i < dim_size; ++i)
+      new_const_node->at<loco::DataType::FLOAT16>(i) = data_dense[i];
+
+    luci::freeTfLiteSparsity(sp);
+  }
+
+  return new_const_node;
+}
+
+/**
+ * @brief Fold Densify if input is Sparse Constant
+ */
+bool fold_densify(luci::CircleDensify *densify)
+{
+  auto const_input = dynamic_cast<luci::CircleConst *>(densify->input());
+  if (not const_input)
+    return false;
+
+  if (not is_foldable_const(const_input))
+    return false;
+
+  auto dense_const = densified_const_node(const_input);
+  assert(dense_const);
+
+  loco::replace(densify).with(dense_const);
+  luci::add_origin(dense_const, luci::composite_origin(
+                                  {luci::get_origin(densify), luci::get_origin(const_input)}));
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ *    [CircleConst](sparse)
+ *         |
+ *   [CircleDensify]
+ *         |
+ *    [CircleNode]
+ *         |
+ *
+ * AFTER
+ *
+ *    [CircleConst](dense)  [CircleConst](sparse)
+ *         |                     |
+ *    [CircleNode]          [CircleDensify]
+ *         |
+ */
+bool FoldDensifyPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto densify = dynamic_cast<luci::CircleDensify *>(node))
+    {
+      if (fold_densify(densify))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldDensifyPass.test.cpp b/compiler/luci/pass/src/FoldDensifyPass.test.cpp
new file mode 100644
index 000000000..2f9736f49
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDensifyPass.test.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDensifyPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class FoldDensifyPassGraph : public luci::ConstantFoldingAddTestGraph
+{
+public:
+  FoldDensifyPassGraph(std::initializer_list<uint32_t> shape)
+    : luci::ConstantFoldingAddTestGraph(shape, loco::DataType::FLOAT32)
+  {
+    _densify = _g.nodes()->create<luci::CircleDensify>();
+    _x = _g.nodes()->create<luci::CircleConst>();
+
+    _densify->dtype(loco::DataType::FLOAT32);
+    _x->dtype(loco::DataType::FLOAT32);
+
+    _densify->shape(shape);
+    _x->shape(shape);
+
+    _densify->input(_x);
+
+    _densify->name("densify");
+    _x->name("x");
+  }
+
+  loco::Node *createFoldedPattern() override { return _densify; }
+
+public:
+  void fill_const_dense(void)
+  {
+    uint32_t num_elems = 1;
+    for (uint32_t r = 0; r < _x->rank(); ++r)
+      num_elems *= _x->dim(r).value();
+
+    _x->size<loco::DataType::FLOAT32>(num_elems);
+    for (uint32_t i = 0; i < num_elems; i++)
+      _x->at<loco::DataType::FLOAT32>(i) = static_cast<float>(i + 1);
+  }
+
+  void fill_const_sparse(void)
+  {
+    // fill 4x4 of
+    //  [[1 0 0 0]
+    //   [0 2 0 0]
+    //   [0 0 3 0]
+    //   [0 0 0 4]]
+
+    // values of 1.0, 2.0, 3.0, 4.0
+    uint32_t udata[] = {0x3f800000, 0x40000000, 0x40400000, 0x40800000};
+    float *fdata = reinterpret_cast<float *>(udata);
+
+    _x->size<loco::DataType::FLOAT32>(4);
+    for (uint32_t i = 0; i < 4; i++)
+      _x->at<loco::DataType::FLOAT32>(i) = fdata[i];
+
+    auto sparsityparam = std::make_unique<luci::SparsityParam>();
+    sparsityparam->traversal_order = std::vector<int32_t>({0, 1});
+    sparsityparam->block_map = std::vector<int32_t>({});
+
+    auto dm0 = luci::DimMetaData(luci::DimensionType::DENSE, 4);
+
+    std::vector<int32_t> as_vec = {0, 1, 2, 3, 4};
+    std::vector<int32_t> ai_vec = {0, 1, 2, 3};
+    auto as = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, as_vec);
+    auto ai = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, ai_vec);
+    auto dm1 = luci::DimMetaData(luci::DimensionType::SPARSE_CSR, 0, as, ai);
+    sparsityparam->dim_metadata.emplace_back(dm0);
+    sparsityparam->dim_metadata.emplace_back(dm1);
+
+    _x->sparsityparam(std::move(sparsityparam));
+  }
+
+protected:
+  luci::CircleDensify *_densify = nullptr;
+  luci::CircleConst *_x = nullptr;
+};
+
+class FoldDensifyPassGraphTest : public FoldDensifyPassGraph, public ::testing::Test
+{
+public:
+  FoldDensifyPassGraphTest() : FoldDensifyPassGraph({4, 4}) {}
+
+  virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldDensifyPassGraph, name)
+{
+  luci::FoldDensifyPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldDensifyPassGraphTest, no_sparsity_param_NEG)
+{
+  fill_const_dense();
+
+  luci::FoldDensifyPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(FoldDensifyPassGraphTest, sparsity_param)
+{
+  fill_const_sparse();
+
+  luci::FoldDensifyPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  EXPECT_EQ(2, folded_const->rank());
+  EXPECT_EQ(4, folded_const->dim(0).value());
+  EXPECT_EQ(4, folded_const->dim(1).value());
+  EXPECT_EQ(16, folded_const->size<loco::DataType::FLOAT32>());
+  for (int y = 0; y < 4; ++y)
+  {
+    for (int x = 0; x < 4; ++x)
+    {
+      float ovalue = folded_const->at<loco::DataType::FLOAT32>(y * 4 + x);
+      float fvalue = 0.0;
+      if (x == y)
+      {
+        // diagonal position
+        fvalue = static_cast<float>(y + 1);
+      }
+      EXPECT_EQ(fvalue, ovalue);
+    }
+  }
+}
diff --git a/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp
new file mode 100644
index 000000000..33f9f1d77
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <luci/Log.h>
+
+#include <limits> // std::numeric_limits
+
+namespace
+{
+
+// TODO Share activation mix/max and compute_input/output code with luci-interpreter
+
+bool compute_output(uint32_t *output_size, luci::Padding padding, int32_t image_size,
+                    int32_t filter_size, int32_t stride, int32_t dilation_rate)
+{
+  auto const effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding)
+  {
+    case luci::Padding::SAME:
+      *output_size = (image_size + stride - 1) / stride;
+      return true;
+
+    case luci::Padding::VALID:
+      *output_size = (image_size + stride - effective_filter_size) / stride;
+      return true;
+
+    default:
+    {
+      LOGGER(l);
+      WARN(l) << "Unsupported padding: " << uint32_t(padding);
+      return false;
+    }
+  }
+}
+
+uint32_t compute_padding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                         int32_t filter_size, int32_t out_size)
+{
+  auto const effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  auto const padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+bool set_kernel_parameters(tflite::DepthwiseParams *params, luci::CircleDepthwiseConv2D *node,
+                           uint32_t padding_height, uint32_t padding_width)
+{
+  switch (node->fusedActivationFunction())
+  {
+    case luci::FusedActFunc::NONE:
+    case luci::FusedActFunc::TANH:
+      params->float_activation_min = std::numeric_limits<float>::lowest();
+      params->float_activation_max = std::numeric_limits<float>::max();
+      break;
+    case luci::FusedActFunc::RELU:
+      params->float_activation_min = 0;
+      params->float_activation_max = std::numeric_limits<float>::max();
+      break;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      params->float_activation_min = -1;
+      params->float_activation_max = 1;
+      break;
+    case luci::FusedActFunc::RELU6:
+      params->float_activation_min = 0;
+      params->float_activation_max = 6;
+      break;
+    default:
+    {
+      LOGGER(l);
+      WARN(l) << "Unsupported activation: " << uint32_t(node->fusedActivationFunction());
+      return false;
+    }
+  }
+
+  params->stride_height = node->stride()->h();
+  params->stride_width = node->stride()->w();
+  params->dilation_height_factor = node->dilation()->h();
+  params->dilation_width_factor = node->dilation()->w();
+  params->depth_multiplier = node->depthMultiplier();
+
+  params->padding_values.height = padding_height;
+  params->padding_values.width = padding_width;
+
+  return true;
+}
+
+/**
+ * Fold DepthwiseConv2D with constant input and filter into a constant tensor
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleDepthwiseConv2D]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+bool fold_depthwise_conv_2d(luci::CircleDepthwiseConv2D *node)
+{
+  LOGGER(l);
+
+  auto const input = dynamic_cast<luci::CircleConst *>(node->input());
+
+  if (input == nullptr)
+    return false; // Constant input is required for folding
+
+  auto const filter = dynamic_cast<luci::CircleConst *>(node->filter());
+
+  if (filter == nullptr)
+    return false; // Constant filter is required for folding
+
+  if (filter->dim(0).value() != 1)
+    return false; // Unsupported batch size
+
+  auto const bias = dynamic_cast<luci::CircleConst *>(node->bias());
+
+  if (bias == nullptr)
+    return false; // Constant bias is required for folding
+
+  auto const input_batches = input->dim(0).value();
+  auto const input_height = input->dim(1).value();
+  auto const input_width = input->dim(2).value();
+  auto const input_depth = input->dim(3).value();
+
+  auto const filter_height = filter->dim(1).value();
+  auto const filter_width = filter->dim(2).value();
+  auto const filter_channels_out = filter->dim(3).value();
+
+  if (filter_channels_out % input_depth != 0)
+    return false; // Wrong input/output depth ratio
+
+  if (node->depthMultiplier() != static_cast<int32_t>(filter_channels_out / input_depth))
+    return false; // Wrong depth multiplier value
+
+  if (bias->rank() != 1 || bias->dim(0).value() != filter_channels_out)
+    return false; // Unsupported bias value
+
+  uint32_t output_height = 0;
+  uint32_t output_width = 0;
+
+  if (!compute_output(&output_height, node->padding(), input_height, filter_height,
+                      node->stride()->h(), node->dilation()->h()))
+    return false; // Unsupported output parameters
+
+  if (!compute_output(&output_width, node->padding(), input_width, filter_width,
+                      node->stride()->w(), node->dilation()->w()))
+    return false; // Unsupported output parameters
+
+  auto const padding_height = compute_padding(node->stride()->h(), node->dilation()->h(),
+                                              input_height, filter_height, output_height);
+  auto const padding_width = compute_padding(node->stride()->w(), node->dilation()->w(),
+                                             input_width, filter_width, output_width);
+
+  tflite::DepthwiseParams params{};
+
+  if (!set_kernel_parameters(&params, node, padding_height, padding_width))
+    return false; // Unsupported kernel parameter values
+
+  auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+  constant->name(node->name());
+  constant->dtype(node->dtype());
+  constant->rank(node->rank());
+  constant->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    constant->dim(i).set(node->dim(i).value());
+
+  constant->size<loco::DataType::FLOAT32>(input_batches * output_height * output_width *
+                                          filter_channels_out);
+
+  auto const input_data = &input->at<loco::DataType::FLOAT32>(0);
+  auto const filter_data = &filter->at<loco::DataType::FLOAT32>(0);
+  auto const bias_data = &bias->at<loco::DataType::FLOAT32>(0);
+  auto const constant_data = &constant->at<loco::DataType::FLOAT32>(0);
+
+  auto tensor_shape = [](luci::CircleNode *node) {
+    tflite::RuntimeShape runtime_shape(node->rank());
+    for (uint32_t i = 0; i < node->rank(); ++i)
+      runtime_shape.SetDim(i, node->dim(i).value());
+    return runtime_shape;
+  };
+
+  tflite::reference_ops::DepthwiseConv(params, tensor_shape(input), input_data,
+                                       tensor_shape(filter), filter_data, tensor_shape(bias),
+                                       bias_data, tensor_shape(constant), constant_data);
+
+  loco::replace(node).with(constant);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for DepthwiseConv2D Op
+ **/
+bool FoldDepthwiseConv2DPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto depthwise_conv2d = dynamic_cast<CircleDepthwiseConv2D *>(node);
+
+    if (depthwise_conv2d == nullptr)
+      continue;
+
+    switch (depthwise_conv2d->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        changed = fold_depthwise_conv_2d(depthwise_conv2d);
+        break;
+      default:
+        break;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp
new file mode 100644
index 000000000..36cae0437
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <limits> // std::numeric_limits
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph has an DepthwiseConv2D Op with constant inputs
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleDepthwiseConv2D]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+class FoldDepthwiseConv2DTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+public:
+  FoldDepthwiseConv2DTest() : luci::ConstantFoldingTestGraph({1, 4, 4, 1}, loco::DataType::FLOAT32)
+  {
+    _dconv = _g.nodes()->create<luci::CircleDepthwiseConv2D>();
+    _dconv_input = _g.nodes()->create<luci::CircleConst>();
+    _dconv_filter = _g.nodes()->create<luci::CircleConst>();
+    _dconv_bias = _g.nodes()->create<luci::CircleConst>();
+
+    _dconv->dtype(loco::DataType::FLOAT32);
+    _dconv->padding(luci::Padding::VALID);
+    _dconv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _dconv->input(_dconv_input);
+    _dconv->filter(_dconv_filter);
+    _dconv->bias(_dconv_bias);
+    _dconv->shape({1, 4, 4, 1});
+    _dconv->stride()->h(1);
+    _dconv->stride()->w(1);
+    _dconv->depthMultiplier(1);
+
+    _dconv_input->dtype(loco::DataType::FLOAT32);
+    _dconv_input->shape({1, 4, 4, 1});
+    _dconv_input->size<loco::DataType::FLOAT32>(16);
+
+    _dconv_filter->dtype(loco::DataType::FLOAT32);
+    _dconv_filter->shape({1, 1, 1, 1});
+    _dconv_filter->size<loco::DataType::FLOAT32>(1);
+
+    _dconv_bias->dtype(loco::DataType::FLOAT32);
+    _dconv_bias->shape({1});
+    _dconv_bias->size<loco::DataType::FLOAT32>(1);
+
+    _output->from(_dconv);
+  }
+
+protected:
+  void init() final {}
+
+protected:
+  loco::Node *createFoldedPattern() final { return nullptr; }
+
+protected:
+  luci::CircleConst *getFoldedPattern() final
+  {
+    return loco::must_cast<luci::CircleConst *>(_output->from());
+  }
+
+protected:
+  luci::CircleDepthwiseConv2D *_dconv = nullptr;
+  luci::CircleConst *_dconv_input = nullptr;
+  luci::CircleConst *_dconv_filter = nullptr;
+  luci::CircleConst *_dconv_bias = nullptr;
+};
+
+} // namespace
+
+TEST(FoldDepthwiseConv2DPass, name)
+{
+  luci::FoldDepthwiseConv2DPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldDepthwiseConv2DTest, fold_depthwise_conv2d)
+{
+  for (uint32_t i = 0; i < 16; ++i)
+    _dconv_input->at<loco::DataType::FLOAT32>(i) = 0.5;
+  _dconv_filter->at<loco::DataType::FLOAT32>(0) = 0.5;
+
+  luci::FoldDepthwiseConv2DPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(folded_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(0), 0.25,
+              std::numeric_limits<float>::min());
+  EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(15), 0.25,
+              std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldDepthwiseConv2DTest, fold_non_constant_NEG)
+{
+  _dconv->input(_input);
+
+  luci::FoldDepthwiseConv2DPass pass;
+  ASSERT_FALSE(pass.run(&_g));
+}
diff --git a/compiler/luci/pass/src/FoldDequantizePass.cpp b/compiler/luci/pass/src/FoldDequantizePass.cpp
new file mode 100644
index 000000000..b6526deb0
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDequantizePass.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDequantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <fp16.h>
+
+namespace
+{
+
+bool is_hybrid_kernel_supported(loco::Node *node)
+{
+  if (dynamic_cast<luci::CircleFullyConnected *>(node) != nullptr)
+    return true;
+
+  return false;
+}
+
+bool is_foldable_const(luci::CircleConst *node)
+{
+  if (node->dtype() == loco::DataType::FLOAT16)
+    return true;
+
+  if (node->quantparam() == nullptr)
+    return false;
+
+  if (node->dtype() == loco::DataType::S8)
+    return true;
+  if (node->dtype() == loco::DataType::U8)
+    return true;
+  if (node->dtype() == loco::DataType::S16)
+    return true;
+  if (node->dtype() == loco::DataType::S32)
+    return true;
+  if (node->dtype() == loco::DataType::S64)
+    return true;
+
+  return false;
+}
+
+luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
+{
+  auto name = const_node->name();
+  assert(name.length() > 0);
+  auto g = const_node->graph();
+  auto new_const_node = g->nodes()->create<luci::CircleConst>();
+
+  new_const_node->dtype(loco::DataType::FLOAT32);
+  new_const_node->rank(const_node->rank());
+  uint32_t dim_size = 1;
+  for (uint32_t i = 0; i < new_const_node->rank(); ++i)
+  {
+    new_const_node->dim(i) = const_node->dim(i);
+    dim_size *= const_node->dim(i).value();
+  }
+  new_const_node->size<loco::DataType::FLOAT32>(dim_size);
+  new_const_node->shape_status(luci::ShapeStatus::VALID);
+  new_const_node->name(name + "_DQ");
+
+  if (const_node->dtype() == loco::DataType::FLOAT16)
+  {
+    for (uint32_t i = 0; i < new_const_node->size<loco::DataType::FLOAT32>(); ++i)
+    {
+      auto raw = const_node->at<loco::DataType::FLOAT16>(i);
+      new_const_node->at<loco::DataType::FLOAT32>(i) = fp16_ieee_to_fp32_value(raw);
+    }
+    return new_const_node;
+  }
+
+  if (const_node->quantparam() == nullptr)
+  {
+    throw std::runtime_error("Given constant node has no quantization parameter");
+  }
+
+  const int32_t q_dim = const_node->quantparam()->quantized_dimension;
+  // For scalar, q_dim_value is 1
+  // For non-scalar, q_dim_value is the size of quantized dimension
+  const int32_t q_dim_value = const_node->rank() == 0 ? 1 : const_node->dim(q_dim).value();
+
+  int32_t right_count = q_dim_value;
+  for (uint32_t i = q_dim + 1; i < const_node->rank(); ++i)
+    right_count *= const_node->dim(i).value();
+
+  for (uint32_t i = 0; i < new_const_node->size<loco::DataType::FLOAT32>(); ++i)
+  {
+    uint32_t qd = (i % right_count) / (right_count / q_dim_value);
+    if (qd >= const_node->quantparam()->zerop.size())
+      qd = 0;
+
+    switch (const_node->dtype())
+    {
+      case loco::DataType::S8:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::S8>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      case loco::DataType::S16:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::S16>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      case loco::DataType::S32:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::S32>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      case loco::DataType::S64:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::S64>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      case loco::DataType::U8:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::U8>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      default:
+        throw std::runtime_error("Not supported dtype for FoldDequantizePass");
+    }
+  }
+
+  return new_const_node;
+}
+
+bool replace_const_node(loco::Node *node, luci::CircleConst *const_node)
+{
+  if (auto gather = dynamic_cast<luci::CircleGather *>(node))
+  {
+    gather->params(dequantized_const_node(const_node));
+    gather->dtype(loco::DataType::FLOAT32);
+    return true;
+  }
+  else
+  {
+    // TODO Support more ops
+    return false;
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ *
+ * Folding pattern 1 - When input of Dequantize is foldable constant
+ *
+ * [Before]
+ *     quantized_const_input ---------- Dequantize ---------- Op ---
+ *                             +-- Op1_with_quant_input ---
+ *                             +-- Op2_with_quant_input ---
+ *
+ * [After]
+ *   dequantized_const_input -------------------------------- Op ---
+ *
+ *     quantized_const_input ----- Op1_with_quant_input ---
+ *                             +-- Op2_with_quant_input ---
+ *
+ *
+ * Folding pattern 2 - When input of Dequantize uses quantized output value
+ *
+ * [Before]
+ *     quantized_const_input ----- Gather ----- Dequantize --- Op ---
+ *                             +-- Op1_with_quant_input ---
+ *                             +-- Op2_with_quant_input ---
+ *
+ * [After]
+ *   dequantized_const_input ------Gather -------------------- Op ---
+ *
+ *     quantized_const_input ----- Op1_with_quant_input ---
+ *                             +-- Op2_with_quant_input ---
+ *
+ *
+ */
+bool FoldDequantizePass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto circle_dequant = dynamic_cast<luci::CircleDequantize *>(node))
+    {
+      if (auto const_input = dynamic_cast<luci::CircleConst *>(circle_dequant->input()))
+      {
+        // Pattern 1 - When input of Dequantize is foldable constant
+        if (is_foldable_const(const_input))
+        {
+          loco::replace(circle_dequant).with(dequantized_const_node(const_input));
+          changed = true;
+        }
+      }
+    }
+    else if (auto const_node = dynamic_cast<luci::CircleConst *>(node))
+    {
+      if (is_foldable_const(const_node))
+      {
+        for (auto const_node_user : loco::succs(const_node))
+        {
+          // If user is hybrid kernel supported operation, do not dequantize
+          if (is_hybrid_kernel_supported(const_node_user))
+            continue;
+
+          auto users = loco::succs(const_node_user);
+          if (users.size() > 1)
+            continue;
+
+          // Pattern 2 - When input of Dequantize uses quantized output value
+          if (auto dequant = dynamic_cast<luci::CircleDequantize *>(*users.begin()))
+          {
+            if (replace_const_node(const_node_user, const_node))
+            {
+              loco::replace(dequant).with(const_node_user);
+              luci::add_origin(loco::must_cast<luci::CircleNode *>(const_node_user),
+                               luci::get_origin(dequant));
+              changed = true;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldDequantizePass.test.cpp b/compiler/luci/pass/src/FoldDequantizePass.test.cpp
new file mode 100644
index 000000000..87dff5dc0
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDequantizePass.test.cpp
@@ -0,0 +1,403 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDequantizePass.h"
+#include "PassTestGraphs.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+template <loco::DataType DT>
+class FoldDequantizeTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+  FoldDequantizeTest() : luci::ConstantFoldingAddTestGraph({2, 2, 2}, DT) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    _dequantize = _g.nodes()->template create<luci::CircleDequantize>();
+    _input = _g.nodes()->template create<luci::CircleConst>();
+
+    _dequantize->dtype(loco::DataType::FLOAT32);
+    _input->dtype(DT);
+
+    _input->shape({2, 2, 2});
+
+    _input->size<DT>(8);
+    _input->at<DT>(0) = 0;
+    _input->at<DT>(1) = 1;
+    _input->at<DT>(2) = 2;
+    _input->at<DT>(3) = 3;
+    _input->at<DT>(4) = 4;
+    _input->at<DT>(5) = 5;
+    _input->at<DT>(6) = 6;
+    _input->at<DT>(7) = 7;
+
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->quantized_dimension = 1;
+    qparam->scale.push_back(5.0);
+    qparam->scale.push_back(10.0);
+    qparam->zerop.push_back(1);
+    qparam->zerop.push_back(2);
+    _input->quantparam(std::move(qparam));
+
+    _dequantize->input(_input);
+
+    _dequantize->name("dequantize");
+    _input->name("input");
+
+    return _dequantize;
+  }
+
+  void createScalarPattern()
+  {
+    _input->rank(0);
+    _input->size<DT>(1);
+    _input->at<DT>(0) = 1;
+
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->quantized_dimension = 0;
+    qparam->scale.push_back(1.0);
+    qparam->zerop.push_back(0);
+    _input->quantparam(std::move(qparam));
+  }
+
+  void createNotFoldablePattern() { _input->quantparam(nullptr); }
+
+protected:
+  luci::CircleDequantize *_dequantize = nullptr;
+  luci::CircleConst *_input = nullptr;
+};
+
+class S8FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S8>
+{
+};
+
+class S16FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S16>
+{
+};
+
+class S32FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S32>
+{
+};
+
+class S64FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S64>
+{
+};
+
+class U8FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::U8>
+{
+};
+
+class F16FoldDequantizeTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+public:
+  F16FoldDequantizeTest() : ConstantFoldingTestGraph({2, 2}, loco::DataType::FLOAT16) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    const auto DT = loco::DataType::FLOAT16;
+    _dequantize = _g.nodes()->create<luci::CircleDequantize>();
+    _f16const = _g.nodes()->create<luci::CircleConst>();
+
+    _dequantize->dtype(loco::DataType::FLOAT32);
+    _f16const->dtype(DT);
+
+    _f16const->shape({2, 2});
+
+    _f16const->size<loco::DataType::FLOAT16>(4);
+    _f16const->at<DT>(0) = 49408; // -2.5f
+    _f16const->at<DT>(1) = 47104; // -0.5f
+    _f16const->at<DT>(2) = 0;     //  0.0f
+    _f16const->at<DT>(3) = 15872; //  1.5f
+    // NOTE how to get uint16_t value of float16 ?
+    // Use compiler/souschef/src/Gaussian.cpp GaussianFloat16DataChef::generate()
+    //   uint16_t value = fp16_ieee_from_fp32_value(-2.5);
+    //   printf("-2.5 = %u\r\n", value);
+
+    _dequantize->input(_f16const);
+
+    _dequantize->name("dequantize");
+    _f16const->name("input");
+
+    _output->from(_dequantize);
+
+    return _dequantize;
+  }
+
+  void createNotFoldablePattern() { _dequantize->input(_input); }
+
+protected:
+  luci::CircleConst *getFoldedPattern() override
+  {
+    return dynamic_cast<luci::CircleConst *>(_output->from());
+  }
+
+  void init() override { createFoldedPattern(); }
+
+protected:
+  luci::CircleDequantize *_dequantize = nullptr;
+  luci::CircleConst *_f16const = nullptr;
+};
+
+} // namespace
+
+TEST(FoldDequantizePassTest, name)
+{
+  luci::FoldDequantizePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S8FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S8FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S16FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S16FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S32FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S32FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S64FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S64FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_scalar)
+{
+  createScalarPattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Check type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(0, folded_const->rank());
+  EXPECT_EQ(1.0, folded_const->at<loco::DataType::FLOAT32>(0));
+}
+
+TEST_F(F16FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(2, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(-2.5, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(-0.5, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(1.5, folded_const->at<loco::DataType::FLOAT32>(3));
+}
+
+TEST_F(F16FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
diff --git a/compiler/luci/pass/src/FoldFullyConnectedPass.cpp b/compiler/luci/pass/src/FoldFullyConnectedPass.cpp
new file mode 100644
index 000000000..a3bca7eda
--- /dev/null
+++ b/compiler/luci/pass/src/FoldFullyConnectedPass.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldFullyConnectedPass.h"
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <luci/Log.h>
+
+#include <limits> // std::numeric_limits
+
+namespace
+{
+
+bool set_kernel_parameters(tflite::FullyConnectedParams *params, luci::CircleFullyConnected *node)
+{
+  switch (node->fusedActivationFunction())
+  {
+    case luci::FusedActFunc::NONE:
+    case luci::FusedActFunc::TANH:
+      params->float_activation_min = std::numeric_limits<float>::lowest();
+      params->float_activation_max = std::numeric_limits<float>::max();
+      break;
+    case luci::FusedActFunc::RELU:
+      params->float_activation_min = 0;
+      params->float_activation_max = std::numeric_limits<float>::max();
+      break;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      params->float_activation_min = -1;
+      params->float_activation_max = 1;
+      break;
+    case luci::FusedActFunc::RELU6:
+      params->float_activation_min = 0;
+      params->float_activation_max = 6;
+      break;
+    default:
+    {
+      LOGGER(l);
+      WARN(l) << "Unsupported activation: " << uint32_t(node->fusedActivationFunction());
+      return false;
+    }
+  }
+
+  assert(node->weights_format() ==
+         luci::CircleFullyConnected::WeightsFormat::DEFAULT); // FIX_CALLER_UNLESS
+  params->weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+  return true;
+}
+
+#define RETURN_FALSE_UNLESS(cond) \
+  if (not(cond))                  \
+    return false;
+
+/**
+ * Fold FullyConnected with constant input and filter into a constant tensor
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleFullyConnected]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+bool fold_fully_connected(luci::CircleFullyConnected *node)
+{
+  RETURN_FALSE_UNLESS(node != nullptr);
+
+  LOGGER(l);
+
+  auto const input = dynamic_cast<luci::CircleConst *>(node->input());
+  auto const weights = dynamic_cast<luci::CircleConst *>(node->weights());
+  auto const bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  auto const no_bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias());
+
+  RETURN_FALSE_UNLESS(input != nullptr);
+  RETURN_FALSE_UNLESS(weights != nullptr);
+  RETURN_FALSE_UNLESS(node->weights_format() == luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+  RETURN_FALSE_UNLESS(bias != nullptr or no_bias != nullptr);
+
+  RETURN_FALSE_UNLESS(input->dtype() == loco::DataType::FLOAT32);
+  RETURN_FALSE_UNLESS(weights->dtype() == loco::DataType::FLOAT32);
+  if (bias)
+    RETURN_FALSE_UNLESS(bias->dtype() == loco::DataType::FLOAT32);
+
+  auto const input_elems = input->size<loco::DataType::FLOAT32>();
+
+  RETURN_FALSE_UNLESS(weights->rank() == 2);
+  RETURN_FALSE_UNLESS(input_elems % weights->dim(1).value() == 0);
+  auto const batch_size = input_elems / weights->dim(1).value();
+  auto const num_units = weights->dim(0).value();
+
+  if (bias)
+    RETURN_FALSE_UNLESS(bias->size<loco::DataType::FLOAT32>() == num_units);
+
+  tflite::FullyConnectedParams params{};
+  if (!set_kernel_parameters(&params, node))
+    return false; // Unsupported kernel parameter values
+
+  std::vector<uint32_t> output_shape;
+  if (node->keep_num_dims() == false)
+  {
+    output_shape.push_back(batch_size);
+    output_shape.push_back(num_units);
+  }
+  else
+  {
+    output_shape.resize(input->rank());
+    for (uint32_t i = 0; i < input->rank(); i++)
+      output_shape[i] = input->dim(i).value();
+    output_shape[input->rank() - 1] = num_units;
+  }
+
+  auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+  {
+    constant->name(node->name());
+    constant->dtype(node->dtype());
+    constant->rank(node->rank());
+    constant->shape_status(luci::ShapeStatus::VALID);
+    uint32_t num_elem = 1;
+    for (uint32_t i = 0; i < node->rank(); ++i)
+    {
+      constant->dim(i).set(node->dim(i).value());
+      num_elem *= node->dim(i).value();
+    }
+    constant->size<loco::DataType::FLOAT32>(num_elem);
+  }
+
+  auto tensor_shape = [](luci::CircleNode *node) {
+    if (node == nullptr)
+      return tflite::RuntimeShape();
+
+    tflite::RuntimeShape runtime_shape(node->rank());
+    for (uint32_t i = 0; i < node->rank(); ++i)
+      runtime_shape.SetDim(i, node->dim(i).value());
+    return runtime_shape;
+  };
+
+  auto tensor_data = [](luci::CircleConst *node) -> float * {
+    if (node == nullptr)
+      return nullptr;
+
+    return &node->at<loco::DataType::FLOAT32>(0);
+  };
+
+  tflite::reference_ops::FullyConnected(
+    params, tensor_shape(input), tensor_data(input), tensor_shape(weights), tensor_data(weights),
+    tensor_shape(bias), tensor_data(bias), tensor_shape(constant), tensor_data(constant));
+
+  loco::replace(node).with(constant);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for FullyConnected Op
+ **/
+bool FoldFullyConnectedPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto fc = dynamic_cast<CircleFullyConnected *>(node);
+
+    if (fold_fully_connected(fc))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
diff --git a/compiler/luci/pass/src/FoldFullyConnectedPass.test.cpp b/compiler/luci/pass/src/FoldFullyConnectedPass.test.cpp
new file mode 100644
index 000000000..a8e64a24b
--- /dev/null
+++ b/compiler/luci/pass/src/FoldFullyConnectedPass.test.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldFullyConnectedPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <limits> // std::numeric_limits
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph has an FullyConnected Op with constant inputs
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleFullyConnected]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+class FoldFullyConnectedTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+#define INPUT_DIM 80
+#define NUM_UNITS 32
+
+public:
+  FoldFullyConnectedTest() : luci::ConstantFoldingTestGraph({INPUT_DIM}, loco::DataType::FLOAT32)
+  {
+    _fc = _g.nodes()->create<luci::CircleFullyConnected>();
+    _fc_input = _g.nodes()->create<luci::CircleConst>();
+    _fc_weights = _g.nodes()->create<luci::CircleConst>();
+    _fc_bias = _g.nodes()->create<luci::CircleConst>();
+
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->input(_fc_input);
+    _fc->weights(_fc_weights);
+    _fc->bias(_fc_bias);
+    _fc->shape({NUM_UNITS});
+    _fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+    _fc->keep_num_dims(true);
+
+    _fc_input->dtype(loco::DataType::FLOAT32);
+    _fc_input->shape({INPUT_DIM});
+    _fc_input->size<loco::DataType::FLOAT32>(INPUT_DIM);
+
+    _fc_weights->dtype(loco::DataType::FLOAT32);
+    _fc_weights->shape({NUM_UNITS, INPUT_DIM});
+    _fc_weights->size<loco::DataType::FLOAT32>(NUM_UNITS * INPUT_DIM);
+
+    _fc_bias->dtype(loco::DataType::FLOAT32);
+    _fc_bias->shape({1, NUM_UNITS});
+    _fc_bias->size<loco::DataType::FLOAT32>(NUM_UNITS);
+
+    for (uint32_t i = 0; i < INPUT_DIM; ++i)
+      _fc_input->at<loco::DataType::FLOAT32>(i) = 1.0;
+
+    for (uint32_t i = 0; i < INPUT_DIM * NUM_UNITS; ++i)
+      _fc_weights->at<loco::DataType::FLOAT32>(i) = 1.0;
+
+    for (uint32_t i = 0; i < NUM_UNITS; ++i)
+      _fc_bias->at<loco::DataType::FLOAT32>(i) = 0.0;
+
+    _output->from(_fc);
+  }
+
+protected:
+  void init() final {}
+
+protected:
+  loco::Node *createFoldedPattern() final { return nullptr; }
+
+protected:
+  luci::CircleConst *getFoldedPattern() final
+  {
+    return loco::must_cast<luci::CircleConst *>(_output->from());
+  }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleConst *_fc_input = nullptr;
+  luci::CircleConst *_fc_weights = nullptr;
+  luci::CircleConst *_fc_bias = nullptr;
+#undef INPUT_DIM
+#undef NUM_UNITS
+};
+
+} // namespace
+
+TEST_F(FoldFullyConnectedTest, fold_fc)
+{
+  luci::FoldFullyConnectedPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(folded_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(32, folded_const->dim(0));
+  EXPECT_EQ(32, folded_const->size<loco::DataType::FLOAT32>());
+  for (uint32_t i = 0; i < 32; ++i)
+    EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(i), 80,
+                std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_no_bias)
+{
+  auto no_bias = _g.nodes()->create<luci::CircleOutputExclude>();
+  _fc->bias(no_bias);
+
+  luci::FoldFullyConnectedPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(32, folded_const->dim(0));
+  EXPECT_EQ(32, folded_const->size<loco::DataType::FLOAT32>());
+  for (uint32_t i = 0; i < 32; ++i)
+    EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(i), 80,
+                std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_NEG)
+{
+  auto new_fc = _g.nodes()->create<luci::CircleFullyConnected>();
+  _fc->input(new_fc);
+
+  luci::FoldFullyConnectedPass pass;
+  ASSERT_FALSE(pass.run(&_g));
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_weight_format_NEG)
+{
+  auto new_fc = _g.nodes()->create<luci::CircleFullyConnected>();
+  _fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8);
+
+  luci::FoldFullyConnectedPass pass;
+  ASSERT_FALSE(pass.run(&_g));
+}
diff --git a/compiler/luci/pass/src/FoldGatherPass.cpp b/compiler/luci/pass/src/FoldGatherPass.cpp
new file mode 100644
index 000000000..f179d74bd
--- /dev/null
+++ b/compiler/luci/pass/src/FoldGatherPass.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldGatherPass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * Fold to const if
+ *
+ * 1. params: const and dtype = S32 or S64
+ * 2. indices: const and dtype = S32 or S64
+ *
+ * BEFORE
+ *
+ *    [CircleConst]              [CircleConst]
+ *         |                          |
+ *         +---------[Gather]---------+
+ *
+ * AFTER
+ *
+ *                [CircleConst]
+ *
+ **/
+template <loco::DataType InputT, loco::DataType IndexT>
+bool fold_gather(luci::CircleGather *gather_node)
+{
+  const auto params = loco::must_cast<luci::CircleConst *>(gather_node->params());
+  const auto indices = loco::must_cast<luci::CircleConst *>(gather_node->indices());
+
+  const auto rank = params->rank();
+  auto axis = gather_node->axis();
+  if (axis < 0)
+  {
+    axis += static_cast<int32_t>(rank);
+  }
+
+  if (axis < 0 or axis >= static_cast<int32_t>(rank))
+    throw std::runtime_error("Unsupported axis value");
+
+  const auto name = gather_node->name();
+  assert(name.length() > 0);
+
+  auto constant = gather_node->graph()->nodes()->create<luci::CircleConst>();
+  constant->dtype(InputT);
+  constant->name(name + "_folded");
+
+  constant->rank(rank + indices->rank() - 1);
+
+  assert(constant->rank() > 0);
+
+  std::vector<uint32_t> shape;
+  for (uint32_t i = 0; i < rank; ++i)
+  {
+    if (i != static_cast<uint32_t>(axis))
+    {
+      const auto dim = params->dim(i).value();
+      shape.push_back(dim);
+    }
+    else
+    {
+      for (uint32_t j = 0; j < indices->rank(); ++j)
+      {
+        const auto dim = indices->dim(j).value();
+        shape.push_back(dim);
+      }
+    }
+  }
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    constant->dim(i).set(shape.at(i));
+    size *= shape.at(i);
+  }
+
+  constant->size<InputT>(size);
+
+  uint32_t outer_size = 1;
+  for (uint32_t i = 0; i < static_cast<uint32_t>(axis); ++i)
+  {
+    outer_size *= params->dim(i).value();
+  }
+
+  uint32_t inner_size = 1;
+  for (uint32_t i = axis + 1; i < rank; ++i)
+  {
+    inner_size *= params->dim(i).value();
+  }
+
+  uint32_t coord_size = 1;
+  for (uint32_t i = 0; i < indices->rank(); ++i)
+  {
+    coord_size *= indices->dim(i).value();
+  }
+
+  const auto axis_size = params->dim(axis).value();
+
+  for (uint32_t outer = 0; outer < outer_size; ++outer)
+  {
+    for (uint32_t i = 0; i < coord_size; ++i)
+    {
+      constant->at<InputT>((outer * coord_size + i) * inner_size) =
+        params->at<InputT>((outer * axis_size + indices->at<IndexT>(i)) * inner_size);
+    }
+  }
+  loco::replace(gather_node).with(constant);
+
+  return true;
+}
+
+bool fold_gather(luci::CircleGather *gather_node)
+{
+  const auto params = dynamic_cast<luci::CircleConst *>(gather_node->params());
+  if (not params)
+    return false;
+
+  const auto indices = dynamic_cast<luci::CircleConst *>(gather_node->indices());
+  if (not indices)
+    return false;
+
+  // TODO: support more types
+  if (params->dtype() != loco::DataType::S32 and params->dtype() != loco::DataType::S64)
+    return false;
+
+  if (indices->dtype() != loco::DataType::S32 and indices->dtype() != loco::DataType::S64)
+    throw std::runtime_error("Unsupported type");
+
+  if (params->dtype() == loco::DataType::S64)
+  {
+    if (indices->dtype() == loco::DataType::S64)
+      return fold_gather<loco::DataType::S64, loco::DataType::S64>(gather_node);
+    else
+      return fold_gather<loco::DataType::S64, loco::DataType::S32>(gather_node);
+  }
+  else
+  {
+    if (indices->dtype() == loco::DataType::S64)
+      return fold_gather<loco::DataType::S32, loco::DataType::S64>(gather_node);
+    else
+      return fold_gather<loco::DataType::S32, loco::DataType::S32>(gather_node);
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for Gather Op
+ **/
+bool FoldGatherPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto gather_node = dynamic_cast<luci::CircleGather *>(node))
+    {
+      if (fold_gather(gather_node))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldGatherPass.test.cpp b/compiler/luci/pass/src/FoldGatherPass.test.cpp
new file mode 100644
index 000000000..b02c034a5
--- /dev/null
+++ b/compiler/luci/pass/src/FoldGatherPass.test.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldGatherPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *
+ *  Graph that has a Gather S64 Op with const inputs
+ *
+ *    BEFORE
+ *    params: [Const] (shape: [3], values: [1, 2, 3])
+ *    indices: [Const] (shape: [1], values: [1])
+ *
+ *     [params]     [indices]
+ *        |            |
+ *        ---[Gather]---
+ *
+ *    AFTER
+ *    [Const] (shape: [1], values: [2])
+ *
+ */
+class S64FoldGatherSimpleTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+  S64FoldGatherSimpleTest() : luci::ConstantFoldingAddTestGraph({1}, loco::DataType::S64) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    _gather = _g.nodes()->create<luci::CircleGather>();
+    _params = _g.nodes()->create<luci::CircleConst>();
+    _indices = _g.nodes()->create<luci::CircleConst>();
+
+    _gather->dtype(loco::DataType::S64);
+    _params->dtype(loco::DataType::S64);
+    _indices->dtype(loco::DataType::S64);
+
+    _params->shape({3});
+    _indices->shape({1});
+
+    _params->size<loco::DataType::S64>(3);
+    _params->at<loco::DataType::S64>(0) = 1;
+    _params->at<loco::DataType::S64>(1) = 2;
+    _params->at<loco::DataType::S64>(2) = 3;
+
+    _indices->size<loco::DataType::S64>(1);
+    _indices->at<loco::DataType::S64>(0) = 1;
+
+    _gather->params(_params);
+    _gather->indices(_indices);
+
+    _gather->name("gather");
+    _params->name("params");
+    _indices->name("indices");
+
+    return _gather;
+  }
+
+protected:
+  luci::CircleGather *_gather = nullptr;
+  luci::CircleConst *_params = nullptr;
+  luci::CircleConst *_indices = nullptr;
+};
+
+/**
+ *
+ *  Graph that has a Gather S32 Op with axis = 1 and with const inputs
+ *
+ *    BEFORE
+ *    params: [Const] (shape: [2, 3], values: [0, 1, 2, 3, 4, 5])
+ *    indices: [Const] (shape: [2], values: [2, 1])
+ *
+ *     [params]     [indices]
+ *        |            |
+ *        ---[Gather]---
+ *
+ *    AFTER
+ *    [Const] (shape: [2, 2], values: [2, 1, 5, 4])
+ *
+ */
+
+class S32FoldGatherTwoDimsTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+  S32FoldGatherTwoDimsTest() : luci::ConstantFoldingAddTestGraph({4, 2}, loco::DataType::S32) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    _gather = _g.nodes()->create<luci::CircleGather>();
+    _params = _g.nodes()->create<luci::CircleConst>();
+    _indices = _g.nodes()->create<luci::CircleConst>();
+
+    _gather->dtype(loco::DataType::S32);
+    _params->dtype(loco::DataType::S32);
+    _indices->dtype(loco::DataType::S32);
+
+    _params->shape({2, 3});
+    _indices->shape({2});
+
+    _params->size<loco::DataType::S32>(6);
+    _params->at<loco::DataType::S32>(0) = 0;
+    _params->at<loco::DataType::S32>(1) = 1;
+    _params->at<loco::DataType::S32>(2) = 2;
+    _params->at<loco::DataType::S32>(3) = 3;
+    _params->at<loco::DataType::S32>(4) = 4;
+    _params->at<loco::DataType::S32>(5) = 5;
+
+    _indices->size<loco::DataType::S32>(2);
+    _indices->at<loco::DataType::S32>(0) = 2;
+    _indices->at<loco::DataType::S32>(1) = 1;
+
+    _gather->params(_params);
+    _gather->indices(_indices);
+
+    _gather->axis(1);
+
+    _gather->name("gather");
+    _params->name("params");
+    _indices->name("indices");
+
+    return _gather;
+  }
+
+protected:
+  luci::CircleGather *_gather = nullptr;
+  luci::CircleConst *_params = nullptr;
+  luci::CircleConst *_indices = nullptr;
+};
+
+} // namespace
+
+TEST(FoldGatherTest, name)
+{
+  luci::FoldGatherPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(S64FoldGatherSimpleTest, fold_gather_simple)
+{
+  luci::FoldGatherPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(1, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+}
+
+TEST_F(S32FoldGatherTwoDimsTest, fold_gather_with_two_dim)
+{
+  luci::FoldGatherPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S32, folded_const->dtype());
+  EXPECT_EQ(2, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S32>(0));
+  EXPECT_EQ(1, folded_const->at<loco::DataType::S32>(1));
+  EXPECT_EQ(5, folded_const->at<loco::DataType::S32>(2));
+  EXPECT_EQ(4, folded_const->at<loco::DataType::S32>(3));
+}
+
+TEST_F(S64FoldGatherSimpleTest, illegal_input_NEG)
+{
+  _indices->dtype(loco::DataType::FLOAT32);
+
+  luci::FoldGatherPass pass;
+  EXPECT_ANY_THROW(pass.run(graph()));
+}
+
+TEST_F(S64FoldGatherSimpleTest, illegal_axis_NEG)
+{
+  _gather->axis(1);
+
+  luci::FoldGatherPass pass;
+  EXPECT_ANY_THROW(pass.run(graph()));
+}
diff --git a/compiler/luci/pass/src/FoldSparseToDensePass.cpp b/compiler/luci/pass/src/FoldSparseToDensePass.cpp
new file mode 100644
index 000000000..ed60d8899
--- /dev/null
+++ b/compiler/luci/pass/src/FoldSparseToDensePass.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <limits>
+
+namespace
+{
+
+/**
+ * Fold to const if
+ *
+ * 1. indices has 0-sized static shape such as [0]
+ *    (i.e., output is filled with default value)
+ * 2. default_value: const scalar
+ * 3. output_shape: const
+ *
+ * TODO: Support more general patterns
+ **/
+template <loco::DataType IndexT, loco::DataType ValueT>
+bool fold_sparse_to_dense(luci::CircleSparseToDense *stod)
+{
+  const auto indices = loco::must_cast<luci::CircleNode *>(stod->indices());
+  const auto default_value = loco::must_cast<luci::CircleConst *>(stod->default_value());
+  const auto output_shape = loco::must_cast<luci::CircleConst *>(stod->output_shape());
+
+  bool has_zero = false;
+  for (uint32_t i = 0; i < indices->rank(); i++)
+  {
+    if (indices->dim(i).known() && indices->dim(i).value() == 0)
+      has_zero = true;
+  }
+  if (!has_zero)
+    return false;
+
+  if (default_value->rank() != 0 || default_value->size<ValueT>() != 1)
+    return false;
+
+  auto rank = output_shape->size<IndexT>();
+  std::vector<uint32_t> shape;
+  for (uint32_t i = 0; i < rank; i++)
+  {
+    auto dim = output_shape->at<IndexT>(i);
+    assert(dim >= 0 && dim <= std::numeric_limits<uint32_t>::max());
+    if (!(dim >= 0 && dim <= std::numeric_limits<uint32_t>::max()))
+      return false;
+
+    shape.push_back(dim);
+  }
+
+  auto name = stod->name();
+  assert(name.length() > 0);
+  auto constant = stod->graph()->nodes()->create<luci::CircleConst>();
+  constant->dtype(default_value->dtype());
+  constant->rank(rank);
+  uint32_t dim_size = 1;
+  for (uint32_t i = 0; i < rank; i++)
+  {
+    constant->dim(i).set(shape[i]);
+    dim_size *= shape[i];
+  }
+
+  constant->size<ValueT>(dim_size);
+  const auto value = default_value->scalar<ValueT>();
+  for (uint32_t i = 0; i < dim_size; i++)
+    constant->at<ValueT>(i) = value;
+
+  constant->shape_status(luci::ShapeStatus::VALID);
+  constant->name(name + "_D");
+
+  loco::replace(stod).with(constant);
+
+  return true;
+}
+
+bool fold_sparse_to_dense(luci::CircleSparseToDense *stod)
+{
+  auto indices = loco::must_cast<luci::CircleNode *>(stod->indices());
+  auto default_value = dynamic_cast<luci::CircleConst *>(stod->default_value());
+  if (not default_value)
+    return false;
+
+  auto output_shape = dynamic_cast<luci::CircleConst *>(stod->output_shape());
+  if (not output_shape)
+    return false;
+
+  // Illegal input check
+  if (indices->dtype() != output_shape->dtype())
+    throw std::runtime_error("indices and output_shape of SparseToDense must have the same dtype");
+
+  // TODO: Support more data types
+  if (indices->dtype() == loco::DataType::S64)
+  {
+    if (default_value->dtype() == loco::DataType::S64)
+    {
+      return fold_sparse_to_dense<loco::DataType::S64, loco::DataType::S64>(stod);
+    }
+  }
+  return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for SparseToDense Op
+ **/
+bool FoldSparseToDensePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto stod = dynamic_cast<luci::CircleSparseToDense *>(node))
+    {
+      if (fold_sparse_to_dense(stod))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldSparseToDensePass.test.cpp b/compiler/luci/pass/src/FoldSparseToDensePass.test.cpp
new file mode 100644
index 000000000..7c6dcb033
--- /dev/null
+++ b/compiler/luci/pass/src/FoldSparseToDensePass.test.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldSparseToDensePass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph that has a SparseToDense Op with zero-sized indices
+ *
+ *    BEFORE
+ *    - shape of indices: [0,1]
+ *    - output_shape: [3]
+ *    - default_value: scalar 2
+ *
+ *     [indices] [output_shape] [values] [default_value]
+ *            |         |          |      |
+ *            +------[SparseToDense]------+
+ *
+ *    AFTER
+ *
+ *            [Const] (shape: [3], values: [2, 2, 2])
+ *
+ */
+class S64SparseToDenseZeroIndicesTest : public luci::ConstantFoldingAddTestGraph,
+                                        public ::testing::Test
+{
+public:
+  S64SparseToDenseZeroIndicesTest() : luci::ConstantFoldingAddTestGraph({3}, loco::DataType::S64) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    _stod = _g.nodes()->create<luci::CircleSparseToDense>();
+    _indices = _g.nodes()->create<luci::CircleConst>();
+    _output_shape = _g.nodes()->create<luci::CircleConst>();
+    _values = _g.nodes()->create<luci::CircleConst>();
+    _default_value = _g.nodes()->create<luci::CircleConst>();
+
+    _stod->dtype(loco::DataType::S64);
+    _indices->dtype(loco::DataType::S64);
+    _output_shape->dtype(loco::DataType::S64);
+    _values->dtype(loco::DataType::S64);
+    _default_value->dtype(loco::DataType::S64);
+
+    _indices->shape({0, 1});
+    _output_shape->shape({1});
+    _values->shape({0});
+    _default_value->rank(0);
+
+    _indices->size<loco::DataType::S64>(0);
+    _output_shape->size<loco::DataType::S64>(1);
+    _output_shape->at<loco::DataType::S64>(0) = 3;
+    _values->size<loco::DataType::S64>(0);
+    _default_value->size<loco::DataType::S64>(1);
+    _default_value->at<loco::DataType::S64>(0) = 2;
+
+    _stod->indices(_indices);
+    _stod->output_shape(_output_shape);
+    _stod->values(_values);
+    _stod->default_value(_default_value);
+
+    _stod->name("stod");
+    _indices->name("indices");
+    _output_shape->name("output_shape");
+    _values->name("values");
+    _default_value->name("default_value");
+
+    return _stod;
+  }
+
+protected:
+  luci::CircleSparseToDense *_stod = nullptr;
+  luci::CircleConst *_indices = nullptr;
+  luci::CircleConst *_output_shape = nullptr;
+  luci::CircleConst *_values = nullptr;
+  luci::CircleConst *_default_value = nullptr;
+};
+
+} // namespace
+
+TEST(FoldSparseToDensePassTest, name)
+{
+  luci::FoldSparseToDensePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(S64SparseToDenseZeroIndicesTest, fold_stod_with_zero_indices)
+{
+  luci::FoldSparseToDensePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(3, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(1));
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(2));
+}
+
+TEST_F(S64SparseToDenseZeroIndicesTest, illegal_input_NEG)
+{
+  _indices->dtype(loco::DataType::S32);
+
+  luci::FoldSparseToDensePass pass;
+  EXPECT_ANY_THROW(pass.run(graph()));
+}
diff --git a/compiler/luci/pass/src/ForceQuantParamPass.cpp b/compiler/luci/pass/src/ForceQuantParamPass.cpp
new file mode 100644
index 000000000..32d482fc1
--- /dev/null
+++ b/compiler/luci/pass/src/ForceQuantParamPass.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForceQuantParamPass.h"
+#include "luci/Profile/CircleNodeID.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+  assert(node); // FIX_CALLER_UNLESS
+
+  auto quantparam = std::make_unique<CircleQuantParam>();
+  quantparam->scale.push_back(scale);
+  quantparam->zerop.push_back(zp);
+
+  node->quantparam(std::move(quantparam));
+}
+
+} // namespace
+
+bool ForceQuantParamPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "ForceQuantParamPass Start" << std::endl;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto const cnode = loco::must_cast<CircleNode *>(node);
+    auto const name = cnode->name();
+    auto target = std::find(_tensors.begin(), _tensors.end(), name);
+    if (target == _tensors.end())
+      continue;
+
+    auto index = target - _tensors.begin();
+    auto scale = _scales[index];
+    auto zp = _zerops[index];
+    set_qparam(cnode, scale, zp);
+
+    _tensors.erase(_tensors.begin() + index);
+    _scales.erase(_scales.begin() + index);
+    _zerops.erase(_zerops.begin() + index);
+  }
+
+  if (_tensors.size() > 0)
+  {
+    std::string msg;
+    for (auto const &t : _tensors)
+      msg += "Tensor does not exist: " + t + ".\n";
+    msg += "Please check tensor name.\n";
+    throw std::runtime_error(msg);
+  }
+
+  INFO(l) << "ForceQuantParamPass End" << std::endl;
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ForceQuantParamPass.test.cpp b/compiler/luci/pass/src/ForceQuantParamPass.test.cpp
new file mode 100644
index 000000000..a9da7c25e
--- /dev/null
+++ b/compiler/luci/pass/src/ForceQuantParamPass.test.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForceQuantParamPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using TensorVector = luci::ForceQuantParamPass::TensorVector;
+using ScaleVector = luci::ForceQuantParamPass::ScaleVector;
+using ZPVector = luci::ForceQuantParamPass::ZPVector;
+
+std::unique_ptr<luci::CircleQuantParam> make_qparam(float scale, int64_t zp)
+{
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  qparam->scale.push_back(scale);
+  qparam->zerop.push_back(zp);
+
+  return std::move(qparam);
+}
+
+bool check_per_tensor_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+  assert(node); // FIX_CALLER_UNLESS
+
+  auto qparam = node->quantparam();
+  if (qparam->scale.size() != 1)
+    return false;
+
+  if (qparam->scale[0] != scale)
+    return false;
+
+  if (qparam->zerop.size() != 1)
+    return false;
+
+  if (qparam->zerop[0] != zp)
+    return false;
+
+  return true;
+}
+
+/**
+ *  Graph with a single input and a single output.
+ *
+ *             [Input]
+ *                |
+ *           (graph body) -> implemented by insertGraphBody()
+ *                |
+ *             [Output]
+ *
+ */
+class SISOGraph
+{
+public:
+  SISOGraph() = default;
+
+public:
+  void init()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    output = g.nodes()->create<luci::CircleOutput>();
+    input->name("input");
+    output->name("output");
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    graph_input->dtype(loco::DataType::U8);
+    input->dtype(loco::DataType::U8);
+    output->dtype(loco::DataType::U8);
+    graph_output->dtype(loco::DataType::U8);
+
+    input->quantparam(make_qparam(0.1, 11));
+    output->quantparam(make_qparam(0.2, 12));
+
+    uint32_t channel_size = 16;
+    graph_input->shape({1, channel_size, 4, 4});
+    input->shape({1, channel_size, 4, 4});
+    output->shape({1, channel_size, 4, 4});
+    graph_output->shape({1, channel_size, 4, 4});
+
+    auto graph_body = insertGraphBody(input);
+    output->from(graph_body);
+  }
+
+  virtual ~SISOGraph() = default;
+
+protected:
+  virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class AddGraph final : public SISOGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    add = g.nodes()->create<luci::CircleAdd>();
+    beta = g.nodes()->create<luci::CircleConst>();
+
+    add->dtype(loco::DataType::U8);
+    beta->dtype(loco::DataType::U8);
+    add->quantparam(make_qparam(0.1, 11));
+    beta->quantparam(make_qparam(0.2, 12));
+
+    uint32_t channel_size = 16;
+    add->shape({1, 4, 4, channel_size});
+    beta->shape({1, 1, 1, channel_size});
+
+    beta->size<loco::DataType::U8>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::U8>(i) = i;
+    }
+
+    add->x(input);
+    add->y(beta);
+
+    add->name("add");
+    beta->name("beta");
+
+    return add;
+  }
+
+public:
+  luci::CircleAdd *add = nullptr;
+  luci::CircleConst *beta = nullptr;
+};
+
+} // namespace
+
+TEST(ForceQuantParamPassTest, simple)
+{
+  TensorVector tensors{"input", "add"};
+  ScaleVector scales{2.0, 3.0};
+  ZPVector zerops{4, 8};
+
+  luci::ForceQuantParamPass pass(tensors, scales, zerops);
+
+  AddGraph g;
+  g.init();
+
+  pass.run(&g.g);
+
+  EXPECT_TRUE(check_per_tensor_qparam(g.input, 2.0, 4));
+  EXPECT_TRUE(check_per_tensor_qparam(g.add, 3.0, 8));
+}
+
+TEST(ForceQuantParamPassTest, name_mismatch_NEG)
+{
+  TensorVector tensors{"no_exist"};
+  ScaleVector scales{2.0};
+  ZPVector zerops{4};
+
+  luci::ForceQuantParamPass pass(tensors, scales, zerops);
+
+  AddGraph g;
+  g.init();
+
+  EXPECT_THROW(pass.run(&g.g), std::runtime_error);
+}
diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
new file mode 100644
index 000000000..3494a6e60
--- /dev/null
+++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Service/CircleNodeClone.h>
+
+namespace
+{
+
+luci::CircleReshape *as_reshape(loco::Node *node)
+{
+  return dynamic_cast<luci::CircleReshape *>(node);
+}
+
+luci::CircleConst *clone_shape(luci::CircleReshape *reshape)
+{
+  const auto shape = dynamic_cast<luci::CircleConst *>(reshape->shape());
+  // only support CircleConst for now
+  if (shape == nullptr)
+    return nullptr;
+
+  // NOTE tflite and circle only supports S32
+  // TODO just check with assert() after import handles this
+  auto dtype = shape->dtype();
+  if (dtype != loco::DataType::S32)
+    return nullptr;
+
+  return luci::clone(shape);
+}
+
+void copy_shape(luci::CircleReshape *reshape, luci::CircleReshape *new_reshape)
+{
+  auto ns_rank = reshape->newShape()->rank();
+  new_reshape->newShape()->rank(ns_rank);
+  for (uint32_t r = 0; r < ns_rank; ++r)
+    new_reshape->newShape()->dim(r) = reshape->newShape()->dim(r);
+}
+
+luci::CircleReshape *create_cloned_reshape(luci::CircleReshape *reshape)
+{
+  assert(reshape != nullptr); // FIX_CALLER_UNLESS
+
+  luci::CircleConst *cloned_shape = clone_shape(reshape);
+  if (cloned_shape == nullptr)
+    return nullptr;
+
+  auto cloned_node = luci::clone_node(reshape, reshape->graph());
+  if (cloned_node == nullptr)
+    return nullptr;
+
+  auto new_reshape = loco::must_cast<luci::CircleReshape *>(cloned_node);
+  new_reshape->shape(cloned_shape);
+  new_reshape->name(reshape->name() + "_C");
+  luci::add_origin(new_reshape, luci::get_origin(reshape));
+
+  return new_reshape;
+}
+
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleAbs *abs)
+{
+  assert(reshape != nullptr); // FIX_CALLER_UNLESS
+  assert(abs != nullptr);     // FIX_CALLER_UNLESS
+
+  auto new_reshape = create_cloned_reshape(reshape);
+  if (not new_reshape)
+    return false;
+
+  // reconnect network
+  loco::replace(abs).with(new_reshape);
+  abs->x(reshape->tensor());
+  new_reshape->tensor(abs);
+
+  // Do shape inference for this node again.
+  abs->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  return true;
+}
+
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
+{
+  assert(reshape != nullptr);
+  assert(neg != nullptr);
+
+  luci::CircleConst *cloned_shape = clone_shape(reshape);
+  if (cloned_shape == nullptr)
+    return false;
+
+  auto name = reshape->name();
+  assert(name.length() > 0);
+  loco::Graph *graph = neg->graph();
+  // create reshape placed after neg
+  luci::CircleReshape *new_reshape = graph->nodes()->create<luci::CircleReshape>();
+  copy_shape(reshape, new_reshape);
+  new_reshape->shape(cloned_shape);
+  new_reshape->name(name + "_C");
+  luci::add_origin(new_reshape, luci::get_origin(reshape));
+
+  // reconnect network
+  loco::replace(neg).with(new_reshape);
+  neg->x(reshape->tensor());
+  new_reshape->tensor(neg);
+
+  // Do shape inference for this node again.
+  neg->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  return true;
+}
+
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleLogistic *logit)
+{
+  assert(reshape != nullptr); // FIX_CALLER_UNLESS
+  assert(logit != nullptr);   // FIX_CALLER_UNLESS
+
+  auto new_reshape = create_cloned_reshape(reshape);
+  if (not new_reshape)
+    return false;
+
+  // reconnect network
+  loco::replace(logit).with(new_reshape);
+  logit->x(reshape->tensor());
+  new_reshape->tensor(logit);
+
+  // Do shape inference for this node again.
+  logit->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  return true;
+}
+
+class ForwardReshape final : public luci::CircleNodeMutableVisitor<bool>
+{
+protected:
+  bool visit(luci::CircleNode *node)
+  {
+    LOGGER(l);
+    INFO(l) << "ForwardReshape: Unsupported operator: " << node->name() << std::endl;
+    return false;
+  }
+
+  bool visit(luci::CircleAbs *node)
+  {
+    auto reshape = as_reshape(node->x());
+    if (reshape == nullptr)
+      return false;
+    return forward_reshape(reshape, node);
+  }
+
+  bool visit(luci::CircleNeg *node)
+  {
+    auto reshape = as_reshape(node->x());
+    if (reshape == nullptr)
+      return false;
+    return forward_reshape(reshape, node);
+  }
+
+  bool visit(luci::CircleLogistic *node)
+  {
+    auto reshape = as_reshape(node->x());
+    if (reshape == nullptr)
+      return false;
+
+    return forward_reshape(reshape, node);
+  }
+  // TODO add more unary operators
+};
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *                       |
+ *                  [CircleNode]  [CircleConst]
+ *                       |       /
+ *                 [CircleReshape]
+ *                /      |
+ *     [CircleNode]  [(UnaryOp)]
+ *          |            |     \
+ *          |            |      [CircleNode]
+ *          |            |           |
+ *
+ *   UnaryOp: CircleNeg, ...
+ *
+ * AFTER
+ *                       |
+ *   [CircleConst]  [CircleNode]
+ *         |       /     |
+ *  [CircleReshape] [(UnaryOp)] [CircleConst]
+ *         |             |      /
+ *   [CircleNode] [CircleReshape]
+ *         |             |      \
+ *         |             |       [CircleNode]
+ *         |             |            |
+ *
+ *   Note: new [CircleReshape] after [(UnaryOp)] added
+ */
+bool ForwardReshapeToUnaryOpPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  ForwardReshape forward;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (circle_node->accept(&forward))
+      changed = true;
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
new file mode 100644
index 000000000..373513270
--- /dev/null
+++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace
+{
+
+using namespace luci::test;
+
+class ReshapeNegGraphlet
+{
+public:
+  ReshapeNegGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    std::vector<uint32_t> shape_out_v = shape_out;
+
+    _reshape_shape = g->nodes()->create<luci::CircleConst>();
+    _reshape = g->nodes()->create<luci::CircleReshape>();
+    _neg = g->nodes()->create<luci::CircleNeg>();
+
+    _reshape_shape->dtype(loco::DataType::S32);
+    _reshape_shape->rank(1);
+    _reshape_shape->dim(0).set(shape_out_v.size());
+    _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+    // values
+    const auto size = shape_out_v.size();
+    _reshape_shape->size<loco::DataType::S32>(size);
+    for (uint32_t i = 0; i < size; i++)
+      _reshape_shape->at<loco::DataType::S32>(i) = shape_out_v[i];
+
+    _reshape_shape->name("reshape_shape");
+    _reshape->name("reshape");
+    _neg->name("neg");
+  }
+
+protected:
+  luci::CircleReshape *_reshape = nullptr;
+  luci::CircleNeg *_neg = nullptr;
+  luci::CircleConst *_reshape_shape = nullptr;
+};
+
+// TODO Reduce duplicate code with ReshapeNegGraphlet
+class ReshapeLogisticGraphlet
+{
+public:
+  ReshapeLogisticGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    std::vector<uint32_t> shape_out_v = shape_out;
+
+    _reshape_shape = g->nodes()->create<luci::CircleConst>();
+    _reshape = g->nodes()->create<luci::CircleReshape>();
+    _logistic = g->nodes()->create<luci::CircleLogistic>();
+
+    _reshape_shape->dtype(loco::DataType::S32);
+    _reshape_shape->rank(1);
+    _reshape_shape->dim(0).set(shape_out_v.size());
+    _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+    // values
+    const auto size = shape_out_v.size();
+    _reshape_shape->size<loco::DataType::S32>(size);
+    for (uint32_t i = 0; i < size; i++)
+      _reshape_shape->at<loco::DataType::S32>(i) = shape_out_v[i];
+
+    _reshape_shape->name("reshape_shape");
+    _reshape->name("reshape");
+    _logistic->name("logistic");
+  }
+
+protected:
+  luci::CircleReshape *_reshape = nullptr;
+  luci::CircleLogistic *_logistic = nullptr;
+  luci::CircleConst *_reshape_shape = nullptr;
+};
+
+class ForwardReshapeToNegGraph : public TestIOGraph, public ReshapeNegGraphlet
+{
+public:
+  ForwardReshapeToNegGraph() = default;
+
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    ReshapeNegGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _reshape->tensor(input());
+    _reshape->shape(_reshape_shape);
+    _neg->x(_reshape);
+
+    output()->from(_neg);
+  }
+};
+
+class ForwardReshapeToLogisticGraph : public TestIOGraph, public ReshapeLogisticGraphlet
+{
+public:
+  ForwardReshapeToLogisticGraph() = default;
+
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    ReshapeLogisticGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _reshape->tensor(input());
+    _reshape->shape(_reshape_shape);
+    _logistic->x(_reshape);
+
+    output()->from(_logistic);
+  }
+};
+
+class ForwardReshapeToNegGraphTest : public ::testing::Test
+{
+public:
+  ForwardReshapeToNegGraphTest() = default;
+
+  void run_pass(void)
+  {
+    while (_pass.run(_graph.g()))
+      ;
+  }
+
+protected:
+  ForwardReshapeToNegGraph _graph;
+  luci::ForwardReshapeToUnaryOpPass _pass;
+};
+
+class ForwardReshapeToLogisticGraphTest : public ::testing::Test
+{
+public:
+  ForwardReshapeToLogisticGraphTest() = default;
+
+  void run_pass(void)
+  {
+    while (_pass.run(_graph.g()))
+      ;
+  }
+
+protected:
+  ForwardReshapeToLogisticGraph _graph;
+  luci::ForwardReshapeToUnaryOpPass _pass;
+};
+
+} // namespace
+
+TEST(ForwardReshapeToUnaryOpPassTest, name)
+{
+  luci::ForwardReshapeToUnaryOpPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(ForwardReshapeToNegGraphTest, simple_forward)
+{
+  _graph.init({2, 2, 2}, {2, 4});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto neg = dynamic_cast<luci::CircleNeg *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, neg);
+  neg = dynamic_cast<luci::CircleNeg *>(reshape->tensor());
+  ASSERT_NE(nullptr, neg);
+}
+
+TEST_F(ForwardReshapeToLogisticGraphTest, forward)
+{
+  _graph.init({2, 2, 2}, {2, 4});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto log = dynamic_cast<luci::CircleLogistic *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, log);
+  log = dynamic_cast<luci::CircleLogistic *>(reshape->tensor());
+  ASSERT_NE(nullptr, log);
+}
diff --git a/compiler/luci/pass/src/ForwardTransposeOpPass.cpp b/compiler/luci/pass/src/ForwardTransposeOpPass.cpp
new file mode 100644
index 000000000..c76d73344
--- /dev/null
+++ b/compiler/luci/pass/src/ForwardTransposeOpPass.cpp
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardTransposeOpPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Service/CircleNodeClone.h>
+
+using namespace luci;
+
+namespace
+{
+
+// Create new Transpose Op including perm
+// Return nullptr if failed
+CircleTranspose *create_cloned_transpose(CircleTranspose *transpose)
+{
+  assert(transpose != nullptr); // FIX_CALLER_UNLESS
+
+  auto perm = dynamic_cast<CircleConst *>(transpose->perm());
+  if (not perm)
+    return nullptr;
+
+  CircleConst *cloned_perm = clone(perm);
+  if (cloned_perm == nullptr)
+    return nullptr;
+
+  cloned_perm->name(perm->name() + "_C");
+  luci::add_origin(cloned_perm, luci::get_origin(perm));
+
+  auto cloned_node = clone_node(transpose, transpose->graph());
+  if (cloned_node == nullptr)
+    return nullptr;
+
+  auto new_transpose = loco::must_cast<luci::CircleTranspose *>(cloned_node);
+  new_transpose->perm(cloned_perm);
+  new_transpose->name(transpose->name() + "_C");
+  luci::add_origin(new_transpose, luci::get_origin(transpose));
+
+  return new_transpose;
+}
+
+uint32_t cal_offset(const std::vector<uint32_t> &shape, const std::vector<uint32_t> &indices)
+{
+  assert(shape.size() == indices.size()); // FIX_CALLER_UNLESS
+
+  uint32_t offset = 0;
+  for (uint32_t i = 0; i < indices.size(); i++)
+  {
+    uint32_t index = indices[i];
+    for (uint32_t j = shape.size() - 1; j > i; j--)
+    {
+      index *= shape[j];
+    }
+    offset += index;
+  }
+  return offset;
+}
+
+// Return reverse-transpose of 'node'
+// i.e., Transpose(return value) = node
+CircleConst *reverse_transposed(CircleConst *node, std::vector<uint32_t> &t)
+{
+  assert(node->rank() == t.size()); // FIX_CALLER_UNLESS
+  assert(node->rank() == 4);        // FIX_CALLER_UNLESS
+
+  std::vector<uint32_t> orig_shape(node->rank());
+  std::vector<uint32_t> new_shape(node->rank());
+
+  for (uint32_t i = 0; i < node->rank(); i++)
+  {
+    assert(t[i] < node->rank()); // FIX_CALLER_UNLESS
+
+    orig_shape[i] = node->dim(i).value();
+    new_shape[t[i]] = node->dim(i).value();
+  }
+
+  auto clone_const = clone(node);
+  for (uint32_t i = 0; i < node->rank(); i++)
+    clone_const->dim(i).set(new_shape[i]);
+
+  clone_const->name(clone_const->name() + "_r_transposed");
+  add_origin(clone_const, luci::get_origin(node));
+
+  for (uint32_t n = 0; n < clone_const->dim(0).value(); n++)
+  {
+    for (uint32_t h = 0; h < clone_const->dim(1).value(); h++)
+    {
+      for (uint32_t w = 0; w < clone_const->dim(2).value(); w++)
+      {
+        for (uint32_t c = 0; c < clone_const->dim(3).value(); c++)
+        {
+          std::vector<uint32_t> new_indices{n, h, w, c};
+          std::vector<uint32_t> orig_indices{new_indices[t[0]], new_indices[t[1]],
+                                             new_indices[t[2]], new_indices[t[3]]};
+
+          const auto data = node->at<loco::DataType::FLOAT32>(cal_offset(orig_shape, orig_indices));
+          clone_const->at<loco::DataType::FLOAT32>(cal_offset(new_shape, new_indices)) = data;
+        }
+      }
+    }
+  }
+
+  return clone_const;
+}
+
+bool check_rank_four(const CircleConst *c) { return c->rank() == 4; }
+
+// Return true if below conditions are met
+// 1. t->perm() is CircleConst
+// 2. t->perm() is S32
+bool check_perm(const CircleTranspose *t)
+{
+  auto perm = dynamic_cast<CircleConst *>(t->perm());
+  if (not perm)
+    return false;
+
+  switch (perm->dtype())
+  {
+    case loco::DataType::S32:
+      for (uint32_t i = 0; i < perm->size<loco::DataType::S32>(); i++)
+      {
+        auto data = perm->at<loco::DataType::S32>(i);
+        // TODO Support not normalized index
+        if (data < 0 or data >= static_cast<int32_t>(t->rank()))
+          return false;
+      }
+      break;
+    // TODO Support S64 data type
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+#define RETURN_FALSE_UNLESS(COND) \
+  if (not(COND))                  \
+    return false;
+
+// Elementwise Binary Operator with const
+class EBOWithConstPattern final : public CircleNodeMutableVisitor<bool>
+{
+private:
+  template <typename CIRCLE_OP_PTR> bool has_pattern(CIRCLE_OP_PTR node)
+  {
+    if (auto x = dynamic_cast<luci::CircleConst *>(node->x()))
+    {
+      if (auto y = dynamic_cast<luci::CircleTranspose *>(node->y()))
+      {
+        RETURN_FALSE_UNLESS(check_rank_four(x));
+        RETURN_FALSE_UNLESS(check_perm(y));
+
+        auto new_const = gen_new_const(y, x);
+        assert(new_const); // FIX_ME_UNLESS
+
+        auto new_transpose = create_cloned_transpose(y);
+        assert(new_transpose); // FIX_ME_UNLESS
+
+        // Reconnect network
+        node->x(new_const);
+        node->y(y->a());
+        loco::replace(node).with(new_transpose);
+        new_transpose->a(node);
+
+        // Do shape inference for this node again.
+        node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+        return true;
+      }
+    }
+
+    if (auto y = dynamic_cast<luci::CircleConst *>(node->y()))
+    {
+      if (auto x = dynamic_cast<luci::CircleTranspose *>(node->x()))
+      {
+        RETURN_FALSE_UNLESS(check_rank_four(y));
+        RETURN_FALSE_UNLESS(check_perm(x));
+
+        auto new_const = gen_new_const(x, y);
+        assert(new_const); // FIX_ME_UNLESS
+
+        auto new_transpose = create_cloned_transpose(x);
+        assert(new_transpose); // FIX_ME_UNLESS
+
+        // Reconnect network
+        node->y(new_const);
+        node->x(x->a());
+        loco::replace(node).with(new_transpose);
+        new_transpose->a(node);
+
+        // Do shape inference for this node again.
+        node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+public:
+  // Default
+  bool visit(luci::CircleNode *) { return false; }
+
+  bool visit(luci::CircleAdd *node) { return has_pattern(node); }
+
+  bool visit(luci::CircleMul *node) { return has_pattern(node); }
+
+private:
+  // Return a new const node after Tranpose Op is forwarded
+  // Return nullptr if unsupported cases
+  CircleConst *gen_new_const(CircleTranspose *t, CircleConst *c)
+  {
+    const auto perm = dynamic_cast<CircleConst *>(t->perm());
+
+    // Only support constant perm
+    if (not perm)
+      return nullptr;
+
+    std::vector<uint32_t> perm_data;
+    switch (perm->dtype())
+    {
+      case loco::DataType::S32:
+        for (uint32_t i = 0; i < perm->size<loco::DataType::S32>(); i++)
+        {
+          auto data = perm->at<loco::DataType::S32>(i);
+          assert(data >= 0 and data < static_cast<int32_t>(t->rank()));
+          perm_data.emplace_back(static_cast<uint32_t>(data));
+        }
+        break;
+      // TODO Support S64 data type
+      default:
+        return nullptr;
+    }
+
+    assert(perm_data.size() == t->rank()); // FIX_CALLER_UNLESS
+
+    return reverse_transposed(c, perm_data);
+  }
+};
+
+// Elementwise Unary Operator
+class EwUnaryPattern final : public CircleNodeMutableVisitor<bool>
+{
+private:
+  // input is 'x'
+  template <typename CIRCLE_OP_PTR> bool has_pattern_x(CIRCLE_OP_PTR node)
+  {
+    if (auto x = dynamic_cast<luci::CircleTranspose *>(node->x()))
+    {
+      RETURN_FALSE_UNLESS(check_perm(x));
+
+      auto new_transpose = create_cloned_transpose(x);
+      assert(new_transpose); // FIX_ME_UNLESS
+
+      // Reconnect network
+      node->x(x->a());
+      loco::replace(node).with(new_transpose);
+      new_transpose->a(node);
+
+      // Do shape inference for this node again.
+      node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+      return true;
+    }
+
+    return false;
+  }
+
+public:
+  // Default
+  bool visit(luci::CircleNode *) { return false; }
+
+  bool visit(luci::CircleAbs *node) { return has_pattern_x(node); }
+};
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *                       |
+ *                  [CircleNode]  [CircleConst]
+ *                       |       /
+ *              [CircleTranspose] [CircleConst]
+ *                /      |       /
+ *     [CircleNode]  [(BinaryOp)]
+ *          |            |     \
+ *          |            |      [CircleNode]
+ *          |            |           |
+ *
+ *   BinaryOp: CircleAdd, CircleMul, ...
+ *
+ *                       |
+ *                  [CircleNode]  [CircleConst]
+ *                       |       /
+ *              [CircleTranspose]
+ *                /      |
+ *     [CircleNode]  [(UnaryOp)]
+ *          |            |     \
+ *          |            |      [CircleNode]
+ *          |            |           |
+ *
+ *   UnaryOp: CircleAbs, ...
+ *
+ * AFTER
+ *                       |
+ *   [CircleConst]  [CircleNode]  [CircleConst(updated)]
+ *         |       /     |       /
+ *  [CircleTranspose] [(BinaryOp)] [CircleConst]
+ *         |             |        /
+ *   [CircleNode] [CircleTranspose]
+ *         |             |      \
+ *         |             |       [CircleNode]
+ *         |             |            |
+ *
+ *                       |
+ *   [CircleConst]  [CircleNode]
+ *         |       /     |
+ *  [CircleTranspose] [(UnaryOp)] [CircleConst]
+ *         |             |        /
+ *   [CircleNode] [CircleTranspose]
+ *         |             |      \
+ *         |             |       [CircleNode]
+ *         |             |            |
+ *
+ *   Note: new [CircleTranspose] is added after [(BinaryOp)]
+ */
+bool ForwardTransposeOpPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  EBOWithConstPattern eboc;
+  EwUnaryPattern ewu;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (circle_node->accept(&eboc))
+      changed = true;
+    else if (circle_node->accept(&ewu))
+      changed = true;
+  }
+  return changed;
+}
+
+#undef RETURN_FALSE_UNLESS
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ForwardTransposeOpPass.test.cpp b/compiler/luci/pass/src/ForwardTransposeOpPass.test.cpp
new file mode 100644
index 000000000..2d061c2a3
--- /dev/null
+++ b/compiler/luci/pass/src/ForwardTransposeOpPass.test.cpp
@@ -0,0 +1,524 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardTransposeOpPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <logo/Phase.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace
+{
+
+using namespace luci::test;
+
+template <typename T> class TransposeBinaryOpGraphlet
+{
+public:
+  TransposeBinaryOpGraphlet() = default;
+
+public:
+  virtual ~TransposeBinaryOpGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 perm)
+  {
+    std::vector<uint32_t> shape_in_v = shape_in;
+    std::vector<uint32_t> perm_v = perm;
+
+    assert(shape_in_v.size() == perm_v.size()); // FIX_CALLER_UNLESS
+
+    _perm = g->nodes()->create<luci::CircleConst>();
+    _const = g->nodes()->create<luci::CircleConst>();
+    _transpose = g->nodes()->create<luci::CircleTranspose>();
+    _binary = g->nodes()->create<T>();
+
+    _perm->dtype(loco::DataType::S32);
+    _perm->rank(1);
+    _perm->dim(0).set(perm_v.size());
+    _perm->shape_status(luci::ShapeStatus::VALID);
+
+    _const->dtype(loco::DataType::FLOAT32);
+    _const->rank(shape_in_v.size());
+    for (uint32_t i = 0; i < shape_in_v.size(); i++)
+      _const->dim(i).set(shape_in_v[perm_v[i]]);
+    _const->shape_status(luci::ShapeStatus::VALID);
+
+    // values
+    const auto size = perm_v.size();
+    _perm->size<loco::DataType::S32>(size);
+    for (uint32_t i = 0; i < size; i++)
+      _perm->at<loco::DataType::S32>(i) = perm_v[i];
+
+    uint32_t elems = 1;
+    for (uint32_t i = 0; i < size; i++)
+      elems *= shape_in_v[i];
+
+    _const->size<loco::DataType::FLOAT32>(elems);
+    for (uint32_t i = 0; i < elems; i++)
+      _const->at<loco::DataType::FLOAT32>(i) = i;
+
+    _perm->name("transpose_perm");
+    _transpose->name("transpose");
+    _binary->name("binary");
+  }
+
+  luci::CircleTranspose *transpose(void) { return _transpose; }
+
+  void switch_xy(void)
+  {
+    assert(_binary); // FIX_CALLER_UNLESS
+    auto temp = _binary->x();
+    _binary->x(_binary->y());
+    _binary->y(temp);
+  }
+
+protected:
+  luci::CircleTranspose *_transpose = nullptr;
+  T *_binary = nullptr;
+  luci::CircleConst *_perm = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+using TransposeAddGraphlet = TransposeBinaryOpGraphlet<luci::CircleAdd>;
+using TransposeMulGraphlet = TransposeBinaryOpGraphlet<luci::CircleMul>;
+
+class ForwardTransposeToAddGraph : public TestIOGraph, public TransposeAddGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeAddGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _transpose->a(input());
+    _transpose->perm(_perm);
+    _binary->x(_transpose);
+    _binary->y(_const);
+
+    output()->from(_binary);
+  }
+};
+
+class ForwardTransposeToAddInvalidGraph : public TestIOGraph, public TransposeAddGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeAddGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _transpose->a(input());
+    _transpose->perm(_perm);
+    _binary->x(_transpose);
+    _binary->y(_transpose);
+
+    output()->from(_binary);
+  }
+};
+
+class ForwardTransposeToMulGraph : public TestIOGraph, public TransposeMulGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeMulGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _transpose->a(input());
+    _transpose->perm(_perm);
+    _binary->x(_transpose);
+    _binary->y(_const);
+
+    output()->from(_binary);
+  }
+};
+
+void run_phase(loco::Graph *g)
+{
+  logo::Phase phase;
+
+  // Default passes.
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+  // Pass to test
+  phase.emplace_back(std::make_unique<luci::ForwardTransposeOpPass>());
+
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.run(phase);
+}
+
+class ForwardTransposeToAddGraphTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToAddGraph _graph;
+};
+
+class ForwardTransposeToAddGraphNegTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToAddInvalidGraph _graph;
+};
+
+class ForwardTransposeToMulGraphTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToMulGraph _graph;
+};
+
+} // namespace
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_add_xy)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto add = dynamic_cast<luci::CircleAdd *>(transpose->a());
+  EXPECT_NE(nullptr, add);
+  EXPECT_EQ(4, add->rank());
+  EXPECT_EQ(1, add->dim(0).value());
+  EXPECT_EQ(64, add->dim(1).value());
+  EXPECT_EQ(51, add->dim(2).value());
+  EXPECT_EQ(1, add->dim(3).value());
+
+  auto add_const = dynamic_cast<luci::CircleConst *>(add->y());
+  EXPECT_NE(nullptr, add_const);
+  EXPECT_EQ(4, add_const->rank());
+  EXPECT_EQ(1, add_const->dim(0).value());
+  EXPECT_EQ(64, add_const->dim(1).value());
+  EXPECT_EQ(51, add_const->dim(2).value());
+  EXPECT_EQ(1, add_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_add_yx)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+  _graph.switch_xy();
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto mul = dynamic_cast<luci::CircleAdd *>(transpose->a());
+  EXPECT_NE(nullptr, mul);
+  EXPECT_EQ(4, mul->rank());
+  EXPECT_EQ(1, mul->dim(0).value());
+  EXPECT_EQ(64, mul->dim(1).value());
+  EXPECT_EQ(51, mul->dim(2).value());
+  EXPECT_EQ(1, mul->dim(3).value());
+
+  auto mul_const = dynamic_cast<luci::CircleConst *>(mul->x());
+  EXPECT_NE(nullptr, mul_const);
+  EXPECT_EQ(4, mul_const->rank());
+  EXPECT_EQ(1, mul_const->dim(0).value());
+  EXPECT_EQ(64, mul_const->dim(1).value());
+  EXPECT_EQ(51, mul_const->dim(2).value());
+  EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_mul_xy)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto mul = dynamic_cast<luci::CircleMul *>(transpose->a());
+  EXPECT_NE(nullptr, mul);
+  EXPECT_EQ(4, mul->rank());
+  EXPECT_EQ(1, mul->dim(0).value());
+  EXPECT_EQ(64, mul->dim(1).value());
+  EXPECT_EQ(51, mul->dim(2).value());
+  EXPECT_EQ(1, mul->dim(3).value());
+
+  auto mul_const = dynamic_cast<luci::CircleConst *>(mul->y());
+  EXPECT_NE(nullptr, mul_const);
+  EXPECT_EQ(4, mul_const->rank());
+  EXPECT_EQ(1, mul_const->dim(0).value());
+  EXPECT_EQ(64, mul_const->dim(1).value());
+  EXPECT_EQ(51, mul_const->dim(2).value());
+  EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_mul_yx)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+  _graph.switch_xy();
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto mul = dynamic_cast<luci::CircleMul *>(transpose->a());
+  EXPECT_NE(nullptr, mul);
+  EXPECT_EQ(4, mul->rank());
+  EXPECT_EQ(1, mul->dim(0).value());
+  EXPECT_EQ(64, mul->dim(1).value());
+  EXPECT_EQ(51, mul->dim(2).value());
+  EXPECT_EQ(1, mul->dim(3).value());
+
+  auto mul_const = dynamic_cast<luci::CircleConst *>(mul->x());
+  EXPECT_NE(nullptr, mul_const);
+  EXPECT_EQ(4, mul_const->rank());
+  EXPECT_EQ(1, mul_const->dim(0).value());
+  EXPECT_EQ(64, mul_const->dim(1).value());
+  EXPECT_EQ(51, mul_const->dim(2).value());
+  EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_transpose_add_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  // Remove add
+  _graph.output()->from(_graph.transpose());
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToAddGraphNegTest, forward_transpose_add_non_const_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_transpose_mul_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  // Remove mul
+  _graph.output()->from(_graph.transpose());
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+// Unary
+
+namespace
+{
+
+template <typename T> class TransposeUnaryOpGraphlet
+{
+public:
+  TransposeUnaryOpGraphlet() = default;
+
+public:
+  virtual ~TransposeUnaryOpGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 perm)
+  {
+    std::vector<uint32_t> shape_in_v = shape_in;
+    std::vector<uint32_t> perm_v = perm;
+
+    assert(shape_in_v.size() == perm_v.size()); // FIX_CALLER_UNLESS
+
+    _perm = g->nodes()->create<luci::CircleConst>();
+    _const = g->nodes()->create<luci::CircleConst>();
+    _transpose = g->nodes()->create<luci::CircleTranspose>();
+    _unary = g->nodes()->create<T>();
+
+    _perm->dtype(loco::DataType::S32);
+    _perm->rank(1);
+    _perm->dim(0).set(perm_v.size());
+    _perm->shape_status(luci::ShapeStatus::VALID);
+
+    _const->dtype(loco::DataType::FLOAT32);
+    _const->rank(shape_in_v.size());
+    for (uint32_t i = 0; i < shape_in_v.size(); i++)
+      _const->dim(i).set(shape_in_v[perm_v[i]]);
+    _const->shape_status(luci::ShapeStatus::VALID);
+
+    // values
+    const auto size = perm_v.size();
+    _perm->size<loco::DataType::S32>(size);
+    for (uint32_t i = 0; i < size; i++)
+      _perm->at<loco::DataType::S32>(i) = perm_v[i];
+
+    uint32_t elems = 1;
+    for (uint32_t i = 0; i < size; i++)
+      elems *= shape_in_v[i];
+
+    _const->size<loco::DataType::FLOAT32>(elems);
+    for (uint32_t i = 0; i < elems; i++)
+      _const->at<loco::DataType::FLOAT32>(i) = i;
+
+    _perm->name("transpose_perm");
+    _transpose->name("transpose");
+    _unary->name("_unary");
+  }
+
+  luci::CircleTranspose *transpose(void) { return _transpose; }
+
+protected:
+  luci::CircleTranspose *_transpose = nullptr;
+  T *_unary = nullptr;
+  luci::CircleConst *_perm = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+using TransposeAbsGraphlet = TransposeUnaryOpGraphlet<luci::CircleAbs>;
+
+class ForwardTransposeToAbsGraph : public TestIOGraph, public TransposeAbsGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeAbsGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _transpose->a(input());
+    _transpose->perm(_perm);
+    _unary->x(_transpose);
+
+    output()->from(_unary);
+  }
+};
+
+class ForwardTransposeToAbsInvalidGraph : public TestIOGraph, public TransposeAbsGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeAbsGraphlet::init(g(), shape_in, shape_out);
+
+    _relu = g()->nodes()->create<luci::CircleRelu>();
+    _relu->dtype(loco::DataType::FLOAT32);
+    _relu->name("relu");
+
+    // connect network
+    _relu->features(input());
+    _unary->x(_relu);
+
+    output()->from(_unary);
+  }
+
+protected:
+  luci::CircleRelu *_relu = nullptr;
+};
+
+class ForwardTransposeToAbsGraphTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToAbsGraph _graph;
+};
+
+class ForwardTransposeToAbsGraphNegTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToAbsInvalidGraph _graph;
+};
+
+} // namespace
+
+TEST_F(ForwardTransposeToAbsGraphTest, forward_abs_x)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto abs = dynamic_cast<luci::CircleAbs *>(transpose->a());
+  EXPECT_NE(nullptr, abs);
+  EXPECT_EQ(4, abs->rank());
+  EXPECT_EQ(1, abs->dim(0).value());
+  EXPECT_EQ(64, abs->dim(1).value());
+  EXPECT_EQ(51, abs->dim(2).value());
+  EXPECT_EQ(1, abs->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAbsGraphTest, forward_transpose_abs_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  // Remove abs
+  _graph.output()->from(_graph.transpose());
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToAbsGraphNegTest, forward_transpose_abs_non_transpose_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
new file mode 100644
index 000000000..868ccd140
--- /dev/null
+++ b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseActivationFunctionPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeMixins.h>
+#include <luci/IR/CircleOpcode.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace luci
+{
+
+bool fuse_activation_function(luci::CircleNode *node)
+{
+  auto preds = loco::preds(node);
+  assert(preds.size() == 1);
+
+  auto pred_node = static_cast<luci::CircleNode *>(*preds.begin());
+  if (loco::succs(pred_node).size() != 1)
+    return false;
+
+  auto node_with_fused_act =
+    dynamic_cast<luci::CircleNodeMixin<luci::CircleNodeTrait::FusedActFunc> *>(pred_node);
+  if (node_with_fused_act == nullptr)
+    return false;
+
+  // TODO remove this work-around
+  // This will skip fuse for concat as luci-interpreter doesn't support this yet
+  if (dynamic_cast<luci::CircleConcatenation *>(pred_node) != nullptr)
+    return false;
+  // TODO remove this work-around
+  // This will skip fuse for TransposeConv as backends does not support this yet
+  // NOTE remove this when XpSepActFromTransposeConvOpPass is removed
+  if (dynamic_cast<luci::CircleTransposeConv *>(pred_node) != nullptr)
+    return false;
+
+  auto fused_act = node_with_fused_act->fusedActivationFunction();
+
+  luci::FusedActFunc target_func = luci::FusedActFunc::UNDEFINED;
+
+  auto opcode = node->opcode();
+  if (opcode == luci::CircleOpcode::RELU)
+  {
+    if (fused_act == luci::FusedActFunc::NONE || fused_act == luci::FusedActFunc::RELU)
+      target_func = luci::FusedActFunc::RELU;
+    else if (fused_act == luci::FusedActFunc::RELU6)
+      target_func = luci::FusedActFunc::RELU6;
+    else
+      return false;
+  }
+  else if (opcode == luci::CircleOpcode::RELU6)
+  {
+    if (fused_act == luci::FusedActFunc::NONE || fused_act == luci::FusedActFunc::RELU ||
+        fused_act == luci::FusedActFunc::RELU6)
+      target_func = luci::FusedActFunc::RELU6;
+    else
+      return false;
+  }
+  else if (opcode == luci::CircleOpcode::RELU_N1_TO_1)
+  {
+    if (fused_act == luci::FusedActFunc::NONE || fused_act == luci::FusedActFunc::RELU_N1_TO_1)
+      target_func = luci::FusedActFunc::RELU_N1_TO_1;
+    else
+      return false;
+  }
+  else
+    return false;
+
+  node_with_fused_act->fusedActivationFunction(target_func);
+  luci::add_origin(pred_node, luci::get_origin(node));
+  loco::replace(node).with(pred_node);
+
+  node->drop();
+
+  return true;
+}
+
+bool FuseActivationFunctionPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = static_cast<luci::CircleNode *>(node);
+    auto opcode = circle_node->opcode();
+    // TANH is not supported as CONV fused with TANH is not supported in luci-interpreter
+    if (opcode == luci::CircleOpcode::RELU || opcode == luci::CircleOpcode::RELU6 ||
+        opcode == luci::CircleOpcode::RELU_N1_TO_1)
+    {
+      if (fuse_activation_function(circle_node))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
new file mode 100644
index 000000000..9e0a80df1
--- /dev/null
+++ b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseActivationFunctionPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *         [Conv1]
+ *           |
+ *     [Activation func]
+ *           |
+ *         [Conv2]
+ *
+ *  AFTER
+ *
+ *   [Conv1 + Activation func]
+ *           |
+ *         [Conv2]
+ *
+ */
+class ConvReluConvGraphlet
+{
+public:
+  ConvReluConvGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _conv1 = g->nodes()->create<luci::CircleConv2D>();
+    _conv2 = g->nodes()->create<luci::CircleConv2D>();
+    _relu = g->nodes()->create<luci::CircleRelu>();
+    _conv1_f = g->nodes()->create<luci::CircleConst>();
+    _conv1_b = g->nodes()->create<luci::CircleConst>();
+    _conv2_f = g->nodes()->create<luci::CircleConst>();
+    _conv2_b = g->nodes()->create<luci::CircleConst>();
+
+    _conv1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _conv1->name("conv1");
+    _conv2->name("conv2");
+    _relu->name("relu");
+    _conv1_f->name("conv1f");
+    _conv1_b->name("conv1b");
+    _conv2_f->name("conv2f");
+    _conv2_b->name("conv2b");
+  }
+
+public:
+  luci::CircleRelu *relu() { return _relu; }
+  luci::CircleConv2D *conv1() { return _conv1; }
+  luci::CircleConv2D *conv2() { return _conv2; }
+
+protected:
+  luci::CircleConv2D *_conv1 = nullptr;
+  luci::CircleConv2D *_conv2 = nullptr;
+  luci::CircleRelu *_relu = nullptr;
+  luci::CircleConst *_conv1_f = nullptr;
+  luci::CircleConst *_conv1_b = nullptr;
+  luci::CircleConst *_conv2_f = nullptr;
+  luci::CircleConst *_conv2_b = nullptr;
+};
+
+class ConvTanhConvGraphlet
+{
+public:
+  ConvTanhConvGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _conv1 = g->nodes()->create<luci::CircleConv2D>();
+    _conv2 = g->nodes()->create<luci::CircleConv2D>();
+    _tanh = g->nodes()->create<luci::CircleTanh>();
+    _conv1_f = g->nodes()->create<luci::CircleConst>();
+    _conv1_b = g->nodes()->create<luci::CircleConst>();
+    _conv2_f = g->nodes()->create<luci::CircleConst>();
+    _conv2_b = g->nodes()->create<luci::CircleConst>();
+
+    _conv1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _conv1->name("conv1");
+    _conv2->name("conv2");
+    _tanh->name("tanh");
+    _conv1_f->name("conv1f");
+    _conv1_b->name("conv1b");
+    _conv2_f->name("conv2f");
+    _conv2_b->name("conv2b");
+  }
+
+public:
+  luci::CircleTanh *tanh() { return _tanh; }
+  luci::CircleConv2D *conv1() { return _conv1; }
+  luci::CircleConv2D *conv2() { return _conv2; }
+
+protected:
+  luci::CircleConv2D *_conv1 = nullptr;
+  luci::CircleConv2D *_conv2 = nullptr;
+  luci::CircleTanh *_tanh = nullptr;
+  luci::CircleConst *_conv1_f = nullptr;
+  luci::CircleConst *_conv1_b = nullptr;
+  luci::CircleConst *_conv2_f = nullptr;
+  luci::CircleConst *_conv2_b = nullptr;
+};
+
+class FuseActTestGraph : public TestIOGraph, public ConvReluConvGraphlet
+{
+public:
+  FuseActTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    ConvReluConvGraphlet::init(g());
+
+    _conv1->input(input());
+    _conv1->filter(_conv1_f);
+    _conv1->bias(_conv1_b);
+
+    _relu->features(_conv1);
+
+    _conv2->input(_relu);
+    _conv2->filter(_conv2_f);
+    _conv2->bias(_conv2_b);
+
+    output()->from(_conv2);
+  }
+};
+
+class FuseTanhActTestGraph : public TestIOGraph, public ConvTanhConvGraphlet
+{
+public:
+  FuseTanhActTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    ConvTanhConvGraphlet::init(g());
+
+    _conv1->input(input());
+    _conv1->filter(_conv1_f);
+    _conv1->bias(_conv1_b);
+
+    _tanh->x(_conv1);
+
+    _conv2->input(_tanh);
+    _conv2->filter(_conv2_f);
+    _conv2->bias(_conv2_b);
+
+    output()->from(_conv2);
+  }
+};
+
+class ConvHasMultiSuccGraph : public TestIOGraph, public ConvReluConvGraphlet
+{
+public:
+  ConvHasMultiSuccGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    ConvReluConvGraphlet::init(g());
+
+    _conv1->input(input());
+    _conv1->filter(_conv1_f);
+    _conv1->bias(_conv1_b);
+
+    _relu->features(_conv1);
+
+    _conv2->input(_conv1);
+    _conv2->filter(_conv2_f);
+    _conv2->bias(_conv2_b);
+
+    output()->from(_relu); // We need to check from relu
+  }
+};
+
+// TODO use ::testing::Test
+
+} // namespace
+
+TEST(FuseActivationFunctionPassTest, name)
+{
+  luci::FuseActivationFunctionPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(FusePreActivationBatchNorm, fuse_activation_function)
+{
+  FuseActTestGraph g;
+  luci::FuseActivationFunctionPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+  EXPECT_EQ(g.conv1(), g.conv2()->input());
+}
+
+TEST(FusePreActivationBatchNorm, fuse_activation_function_dup_relu)
+{
+  FuseActTestGraph g;
+  luci::FuseActivationFunctionPass pass;
+
+  g.init();
+  g.conv1()->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  EXPECT_TRUE(pass.run(g.g()));
+  EXPECT_EQ(g.conv1(), g.conv2()->input());
+}
+
+TEST(FusePreActivationBatchNorm, fuse_activation_function_mulsucc_NEG)
+{
+  ConvHasMultiSuccGraph g;
+  luci::FuseActivationFunctionPass pass;
+
+  g.init();
+
+  // Relu input Conv2D has multiple successors
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePreActivationBatchNorm, fuse_activation_function_tanh_NEG)
+{
+  FuseActTestGraph g;
+  luci::FuseActivationFunctionPass pass;
+
+  g.init();
+  g.conv1()->fusedActivationFunction(luci::FusedActFunc::TANH);
+
+  // Relu input Conv2D already has activation function
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePreActivationBatchNorm, fuse_tanh_NEG)
+{
+  FuseTanhActTestGraph g;
+  luci::FuseActivationFunctionPass pass;
+
+  g.init();
+
+  // Tanh should not be fused
+  // This can be changed when CONV+TANH is supported by luci-interpreter
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
new file mode 100644
index 000000000..1d4a2e3bf
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse Add to FullyConnected if the added value is a channel(last dimension)-wise constant
+ *
+ *  BEFORE
+ *                |
+ *      [CircleFullyConnected]
+ *                |
+ *           [CircleAdd]
+ *                |
+ *
+ *  AFTER
+ *                |
+ *       [CircleFullyConnected]   [CircleAdd] (dead)
+ *                |
+ *
+ */
+bool fuse_add_with_fc(luci::CircleFullyConnected *fc)
+{
+  if (not fc)
+    return false;
+
+  if (fc->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  if (fc->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+
+  auto weights = dynamic_cast<luci::CircleConst *>(fc->weights());
+  if (not weights)
+    return false;
+
+  // Get add node
+  auto fc_output = loco::succs(fc);
+  if (fc_output.size() != 1)
+    return false;
+
+  auto add = dynamic_cast<luci::CircleAdd *>(*fc_output.begin());
+  if (not add)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // Get addition
+  auto addition = add->x() == fc ? dynamic_cast<luci::CircleConst *>(add->y())
+                                 : dynamic_cast<luci::CircleConst *>(add->x());
+
+  // Non-const addition
+  if (not addition)
+    return false;
+
+  auto rank = addition->rank();
+  // TODO Support scalar addition
+  if (rank == 0)
+    return false;
+
+  for (uint32_t i = 0; i < rank - 1; i++)
+  {
+    if (addition->dim(i).value() != 1)
+      return false;
+  }
+  // Check the last dimesion of addition is the same with the number of neurons of FC
+  if (not(addition->dim(rank - 1) == weights->dim(0)))
+    return false;
+
+  auto bias = loco::must_cast<luci::CircleNode *>(fc->bias());
+
+  // We only support (1) constant bias (2) no bias
+  // If bias is neither (1) nor (2), it would be a feature map
+  if (bias->opcode() != luci::CircleOpcode::CIRCLECONST and
+      bias->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    return false;
+
+  auto fused_bias = luci::clone(addition);
+
+  // Add existing bias values
+  if (auto const_bias = dynamic_cast<luci::CircleConst *>(fc->bias()))
+  {
+    assert(const_bias->dtype() == loco::DataType::FLOAT32);
+
+    auto bias_size = fused_bias->size<loco::DataType::FLOAT32>();
+    assert(bias_size == const_bias->size<loco::DataType::FLOAT32>());
+    for (uint32_t i = 0; i < bias_size; i++)
+      fused_bias->at<loco::DataType::FLOAT32>(i) += const_bias->at<loco::DataType::FLOAT32>(i);
+  }
+
+  // At this point, it is guarateed that fused_bias's shape is [1, 1, ..., N] or [N]
+  // where N is weights->dim(0).
+  // The shape is normalized to [N] to become the bias of FC
+  fused_bias->rank(1);
+  fused_bias->dim(0) = weights->dim(0);
+
+  fc->bias(fused_bias);
+  fc->fusedActivationFunction(add->fusedActivationFunction());
+
+  // set origin
+  luci::add_origin(fc, luci::get_origin(add));
+
+  replace(add).with(fc);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseAddWithFullyConnectedPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+    if (not fc)
+      continue;
+
+    if (fuse_add_with_fc(fc))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp
new file mode 100644
index 000000000..b132c6bd9
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
+
+#include "helpers/CreateCircleConst.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *         [FC]
+ *           |
+ *     [Add w/ Relu]
+ *
+ *  AFTER
+ *
+ *      [FC w/ Relu] (bias updated)
+ *
+ */
+class FCAddGraphlet
+{
+public:
+  FCAddGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    std::vector<float> weights_val(16 * 4);
+    _fc_f = luci::create_const_node(g, loco::DataType::FLOAT32, {16, 4}, weights_val);
+
+    std::vector<float> bias_val(16);
+    _fc_b = luci::create_const_node(g, loco::DataType::FLOAT32, {1, 16}, bias_val);
+
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+    _fc->weights(_fc_f);
+    _fc->bias(_fc_b);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->shape({1, 16});
+    _fc->name("fc");
+
+    std::vector<float> addition_val;
+    for (uint32_t i = 0; i < 16; i++)
+      addition_val.push_back(static_cast<float>(i));
+    _add_c = luci::create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val);
+
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add->x(_fc);
+    _add->y(_add_c);
+    _add->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _add->dtype(loco::DataType::FLOAT32);
+    _add->shape({1, 16});
+    _add->name("add");
+  }
+
+public:
+  luci::CircleFullyConnected *fc() { return _fc; }
+
+public:
+  void to_fm_bias(void)
+  {
+    assert(_fc != nullptr); // FIX_ME_UNLESS
+
+    auto new_fc = _fc->graph()->nodes()->create<luci::CircleFullyConnected>();
+    _fc->bias(new_fc);
+  }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_fc_f = nullptr;
+  luci::CircleConst *_fc_b = nullptr;
+  luci::CircleConst *_add_c = nullptr;
+};
+
+class FuseAddWithFCTestGraph : public TestIOGraph, public FCAddGraphlet
+{
+public:
+  FuseAddWithFCTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1, 4}, {1, 16});
+    FCAddGraphlet::init(g());
+
+    _fc->input(input());
+
+    output()->from(_add);
+  }
+};
+
+class FuseAddWithFullyConnectedPassTest : public ::testing::Test
+{
+public:
+  FuseAddWithFCTestGraph g;
+  luci::FuseAddWithFullyConnectedPass pass;
+};
+
+} // namespace
+
+TEST_F(FuseAddWithFullyConnectedPassTest, simple_test)
+{
+  g.init();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto fc = dynamic_cast<luci::CircleFullyConnected *>(g.output()->from());
+  EXPECT_NE(nullptr, fc);
+
+  auto bias = loco::must_cast<luci::CircleConst *>(g.fc()->bias());
+  for (uint32_t i = 0; i < bias->size<loco::DataType::FLOAT32>(); i++)
+  {
+    EXPECT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+TEST_F(FuseAddWithFullyConnectedPassTest, fm_bias_NEG)
+{
+  g.init();
+
+  // Bias is a feature map. Add is not fused.
+  g.to_fm_bias();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
new file mode 100644
index 000000000..d8e9f11f5
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithTConvPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse Add to TransposeConv if possible
+ *
+ *  BEFORE
+ *                     |
+ *   [CircleConst]  [CircleTransposeConv]
+ *               \     |
+ *             [CircleAdd]
+ *                  |
+ *
+ *  AFTER
+ *                  |
+ *   [CircleConst]  |
+ *             \    |
+ *         [CircleTransposeConv]   [CircleAdd]
+ *                  |
+ *          ([CircleRelu/Relu6])
+ *                  |
+ *
+ *  Note: CircleRelu/Relu6 is inserted if Add activation is ReLU6
+ */
+bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
+{
+  // skip if tconv has fused activation
+  if (tconv->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+  // check whether it has bias or not. This optimization works only if it doesn't.
+  auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias());
+  if (not bias)
+    return false;
+
+  // get weight of tconv
+  auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
+  if (not filter)
+    return false;
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // get add node
+  auto tconv_output = loco::succs(tconv);
+  assert(tconv_output.size() == 1);
+  auto add = dynamic_cast<luci::CircleAdd *>(*tconv_output.begin());
+  if (not add)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU6 &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU)
+    return false;
+
+  // get addition
+  luci::CircleConst *addition = nullptr;
+  if (add->x() == tconv)
+    addition = dynamic_cast<luci::CircleConst *>(add->y());
+  else
+    addition = dynamic_cast<luci::CircleConst *>(add->x());
+
+  if (not addition)
+    return false;
+
+  // addition dim(0) == tconv filter channel dim
+  if (addition->rank() != 1)
+    return false;
+  auto addition_dim = addition->dim(0).value();
+  auto filter_channel_dim = filter->dim(0).value();
+  if (filter_channel_dim != addition_dim)
+    return false;
+
+  // fuse addition with transposed conv
+  tconv->bias(addition);
+
+  if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+  {
+    auto name = addition->name();
+    assert(name.length() > 0);
+    // separate relu op from add op
+    auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
+    relu->features(tconv);
+    relu->name(name + "/Relu6");
+    luci::add_origin(relu, luci::get_origin(add));
+
+    // remove add node
+    replace(add).with(relu);
+  }
+  else if (add->fusedActivationFunction() == luci::FusedActFunc::RELU)
+  {
+    auto name = addition->name();
+    assert(name.length() > 0);
+    // separate relu op from add op
+    auto relu = add->graph()->nodes()->create<luci::CircleRelu>();
+    relu->features(tconv);
+    relu->name(name + "/Relu");
+    luci::add_origin(relu, luci::get_origin(add));
+
+    // remove add node
+    replace(add).with(relu);
+  }
+  else
+  {
+    replace(add).with(tconv);
+  }
+
+  // set origin
+  luci::add_origin(tconv, luci::get_origin(add));
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseAddWithTConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node);
+    if (not tconv)
+      continue;
+
+    if (fuse_add_with_tconv(tconv))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseAddWithTConvPass.test.cpp b/compiler/luci/pass/src/FuseAddWithTConvPass.test.cpp
new file mode 100644
index 000000000..8748d73ef
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithTConvPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithTConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseAddWithTConvPassTest, name)
+{
+  luci::FuseAddWithTConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp
index 7aa2e3e80..3f8f700a9 100644
--- a/compiler/luci/pass/src/FuseBCQPass.cpp
+++ b/compiler/luci/pass/src/FuseBCQPass.cpp
@@ -17,85 +17,88 @@
 #include "luci/Pass/FuseBCQPass.h"
 
 #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Log.h>
 
 #include <cassert>
-#include <string>
 #include <set>
 
 namespace
 {
 
-/**
- * @brief Circle nodes including BCQ information and a circle node to which BCQ will be applied
- *        are connected with their name. And their names include common prefix.
- *        However, after pb file is converted to tflite file, some nodes' name are changed.
- *        Thus this function will return original common prefix.
- *
- * @note  All the re-naming rule of TFLite converter is not figured out.
- *        Therefore, if new naming rule is detected, this function should be updated.
- */
-const std::string node_name_prefix(luci::NodeName node_name)
+bool is_fusable_const(luci::CircleConst *before, luci::CircleConst *after, bool do_w_x)
 {
-  std::string prefix = node_name;
+  if (after->dtype() != loco::DataType::FLOAT32)
+    return false;
 
-  if (prefix.find("/ReadVariableOp/resource") != std::string::npos)
-  {
-    const auto start_index = prefix.find("/ReadVariableOp/resource");
+  if (after->rank() != 2)
+    return false;
 
-    const auto left_prefix = prefix.substr(0, start_index);
-    const auto right_prefix = prefix.substr(start_index + 24);
+  if (after->size<loco::DataType::FLOAT32>() != before->size<loco::DataType::FLOAT32>())
+    return false;
 
-    prefix = left_prefix + right_prefix;
-  }
+  auto after_dim0 = after->dim(0).value();
+  auto after_dim1 = after->dim(1).value();
 
-  if (prefix.find("Tensordot/") != std::string::npos)
+  if (before->rank() == 2)
   {
-    const auto index = prefix.find("Tensordot/");
-    prefix = prefix.substr(0, index - 1);
-  }
-  else if (prefix.find("/MatMul") != std::string::npos)
-  {
-    const auto index = prefix.find("/MatMul");
-    prefix = prefix.substr(0, index);
-  }
-  else if (prefix.find("kernel/") != std::string::npos)
-  {
-    const auto index = prefix.find("kernel/");
-    prefix = prefix.substr(0, index - 1);
-  }
-  else if (prefix.find("/bcqinfo_") != std::string::npos)
-  {
-    const auto index = prefix.find("/bcqinfo_");
-    prefix = prefix.substr(0, index);
-  }
+    if (do_w_x)
+    {
+      // Check for [dim0, dim1] --> [dim0, dim1]
+      if (!(after->dim(0) == before->dim(0) && after->dim(1) == before->dim(1)))
+        return false;
 
-  return prefix;
-}
+      for (uint32_t i = 0; i < after->size<loco::DataType::FLOAT32>(); ++i)
+        if (after->at<loco::DataType::FLOAT32>(i) != before->at<loco::DataType::FLOAT32>(i))
+          return false;
+    }
+    else
+    {
+      // Check for [dim0, dim1] --> [dim1, dim0]
+      if (!(after->dim(0) == before->dim(1) && after->dim(1) == before->dim(0)))
+        return false;
 
-/**
- * @brief Create CircleOutputExclude operation, which has same shape and dtype with
- *        original circle_node.
- */
-luci::CircleOutputExclude *createNoOp(luci::CircleNode *circle_node)
-{
-  auto graph = circle_node->graph();
-  auto noOp = graph->nodes()->create<luci::CircleOutputExclude>();
+      for (uint32_t i = 0; i < after_dim0; ++i)
+        for (uint32_t j = 0; j < after_dim1; ++j)
+          if (after->at<loco::DataType::FLOAT32>(i * after_dim1 + j) !=
+              before->at<loco::DataType::FLOAT32>(j * after_dim0 + i))
+            return false;
+    }
 
-  if (circle_node->shape_status() == luci::ShapeStatus::VALID)
-  {
-    noOp->dtype(circle_node->dtype());
-    noOp->rank(circle_node->rank());
-    for (uint32_t i = 0; i < circle_node->rank(); ++i)
-      noOp->dim(i) = circle_node->dim(i);
+    return true;
   }
-  else
+  else if (before->rank() == 3)
   {
-    // For type inference
-    noOp->dtype(loco::DataType::FLOAT32);
+    if (do_w_x)
+    {
+      // This case is not found yet.
+      return false;
+    }
+    else
+    {
+      // When Einsum op is converted to FullyConnected, original rank can be 3.
+      auto before_dim0 = before->dim(0).value();
+      auto before_dim1 = before->dim(1).value();
+      auto before_dim2 = before->dim(2).value();
+
+      // Check if [dim0, dim1, dim2] --> [dim2, dim0 * dim1] or
+      //          [dim0, dim1, dim2] --> [dim1 * dim2, dim0]
+      if ((after_dim0 == before_dim1 * before_dim2 && after_dim1 == before_dim0) ||
+          (after_dim0 == before_dim2 && after_dim1 == before_dim0 * before_dim1))
+      {
+        for (uint32_t i = 0; i < after_dim0; ++i)
+          for (uint32_t j = 0; j < after_dim1; ++j)
+            if (after->at<loco::DataType::FLOAT32>(i * after_dim1 + j) !=
+                before->at<loco::DataType::FLOAT32>(j * after_dim0 + i))
+              return false;
+      }
+    }
+
+    return true;
   }
 
-  return noOp;
-};
+  return false;
+}
 
 } // namespace
 
@@ -108,72 +111,128 @@ template <int32_t V> class BCQFuser;
 template <> class BCQFuser<1>
 {
 public:
-  bool fuseBCQ(loco::Graph *g)
+  BCQFuser<1>(int32_t original_output_cnt, int32_t bundle_cnt)
+    : _original_output_cnt{original_output_cnt}, _bundle_cnt{bundle_cnt}
   {
-    bool changed = false;
+    // Do nothing
+  }
 
-    for (auto node : loco::all_nodes(g))
+public:
+  void register_bcq_info(loco::Graph *g)
+  {
+    for (auto node : loco::output_nodes(g))
     {
-      if (auto circle_const = dynamic_cast<luci::CircleConst *>(node))
+      auto output_node = loco::must_cast<luci::CircleOutput *>(node);
+
+      /**
+       * First output of model is metadata for BCQ. Please refer to following example.
+       *
+       * When original_output_cnt is 2,
+       * BCQ_METADATA, original_output_1, original_output_2, BCQ_INFO_1, ...
+       */
+      if ((int)output_node->index() > _original_output_cnt)
       {
-        add_BCQ_info_node(circle_const);
+        const auto prefix = (output_node->index() - (_original_output_cnt + 1)) / (_bundle_cnt);
+        const MetadataType metadata_type = static_cast<MetadataType>(
+          (output_node->index() - (_original_output_cnt + 1)) % (_bundle_cnt));
+        const auto circle_node = loco::must_cast<luci::CircleNode *>(output_node->from());
+        add_BCQ_info_node(prefix, metadata_type, circle_node);
       }
     }
+  }
 
+  bool fuseBCQ(loco::Graph *g)
+  {
     if (!is_bcqinfo_valid())
       return false;
 
-    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
     {
+      // Fuse Gather to BCQGather
       if (auto gather = dynamic_cast<luci::CircleGather *>(node))
       {
-        auto params = dynamic_cast<luci::CircleConst *>(gather->params());
-        if (params != nullptr && has_BCQ_info(params))
+        if (auto params = dynamic_cast<luci::CircleConst *>(gather->params()))
         {
+          auto prefix = get_prefix_of_const(params);
+          if (prefix == -1 || !is_valid_prefix(prefix))
+            continue;
+
+          auto name = gather->name();
+          assert(name.length() > 0);
+
           auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
+          luci::add_origin(bcq_gather, luci::get_origin(gather));
 
           bcq_gather->op_version(1);
-          bcq_gather->input_scales(get_alpha(params));
-          bcq_gather->input_binary(get_packed_binary_code(params));
+          bcq_gather->input_scales(alpha(g, prefix));
+          bcq_gather->input_binary(packed_binary_code(g, prefix));
           bcq_gather->indices(gather->indices());
-          bcq_gather->input_clusters(packed_clusters(params));
-
-          // input_binary shape : [output_size, hidden_size]
-          const auto binary_hidden_size =
-              loco::must_cast<luci::CircleConst *>(bcq_gather->input_binary())->dim(1).value() * 32;
-          bcq_gather->input_hidden_size(binary_hidden_size);
+          bcq_gather->input_clusters(packed_clusters(g, prefix));
+          bcq_gather->name(name + "/BCQGather");
 
-          if (do_w_x(params))
+          if (_do_w_x[prefix]->at<loco::DataType::BOOL>(0))
           {
+            bcq_gather->input_hidden_size(params->dim(1).value());
             bcq_gather->axis(gather->axis());
+            loco::replace(gather).with(bcq_gather);
           }
           else
           {
+            bcq_gather->input_hidden_size(params->dim(0).value());
             const auto axis_transpose = (gather->axis() == 0) ? 1 : 0;
             bcq_gather->axis(axis_transpose);
-          }
 
-          loco::replace(gather).with(bcq_gather);
+            const auto indices_rank =
+              loco::must_cast<luci::CircleNode *>(gather->indices())->rank();
 
-          changed = true;
+            auto perm = g->nodes()->create<luci::CircleConst>();
+            perm->dtype(loco::DataType::S32);
+            perm->size<loco::DataType::S32>(1 + indices_rank);
+            perm->rank(1);
+            perm->dim(0) = 1 + indices_rank;
+            for (uint32_t idx = 0; idx < indices_rank; ++idx)
+              perm->at<loco::DataType::S32>(idx) = idx + 1;
+            perm->at<loco::DataType::S32>(indices_rank) = 0;
+            perm->shape_status(luci::ShapeStatus::VALID);
+            perm->name(name + "/Transpose/perm");
+
+            auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+            luci::add_origin(output_transpose, luci::get_origin(gather));
+            output_transpose->a(bcq_gather);
+            output_transpose->perm(perm);
+            output_transpose->name(name + "/Transpose");
+
+            loco::replace(gather).with(output_transpose);
+          }
+
+          return true;
         }
       }
-      else if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node))
+
+      // Fuse FullyConnected to BCQFullyConnected
+      if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node))
       {
-        auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights());
-        if (weights != nullptr && has_BCQ_info(weights))
+        if (auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights()))
         {
+          auto prefix = get_prefix_of_const(weights);
+          if (prefix == -1 || !is_valid_prefix(prefix))
+            continue;
+
+          auto name = fully_connected->name();
+          assert(name.length() > 0);
+
           auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+          luci::add_origin(bcq_fc, luci::get_origin(fully_connected));
 
           bcq_fc->op_version(1);
-          bcq_fc->weights_scales(get_alpha(weights));
-          bcq_fc->weights_binary(get_packed_binary_code(weights));
+          bcq_fc->weights_scales(alpha(g, prefix));
+          bcq_fc->weights_binary(packed_binary_code(g, prefix));
           bcq_fc->bias(fully_connected->bias());
-          bcq_fc->weights_clusters(packed_clusters(weights));
+          bcq_fc->weights_clusters(packed_clusters(g, prefix));
           bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
+          bcq_fc->name(name + "/BCQFullyConnected");
 
           loco::Node *bcq_input = fully_connected->input();
-          int32_t batch_rank = 0;
 
           // If input of BCQFullyConnected has more than rank 2, we should reshape it as rank 2
           const auto original_input = loco::must_cast<luci::CircleNode *>(fully_connected->input());
@@ -186,217 +245,331 @@ public:
             new_shape->rank(1);
             new_shape->dim(0) = 2;
 
-            auto batch_size = 1;
-            for (uint32_t i = 0; i < original_input->rank() - 1; ++i)
-              batch_size *= original_input->dim(i).value();
-
-            new_shape->at<loco::DataType::S32>(0) = batch_size;
-            new_shape->at<loco::DataType::S32>(1) =
-                original_input->dim(original_input->rank() - 1).value();
+            new_shape->at<loco::DataType::S32>(0) = -1;
+            new_shape->at<loco::DataType::S32>(1) = weights->dim(1).value();
             new_shape->shape_status(luci::ShapeStatus::VALID);
+            new_shape->name(name + "/Reshape/shape");
 
             auto reshape = g->nodes()->create<luci::CircleReshape>();
+            luci::add_origin(reshape, luci::get_origin(fully_connected));
             reshape->tensor(original_input);
             reshape->shape(new_shape);
+            reshape->name(name + "/Reshape");
 
             bcq_input = reshape;
-            batch_rank = original_input->rank() - 2;
           }
 
           // If x_w formation, we should insert Transpose in front and back of BCQFullyConnected
-          if (do_w_x(weights))
-          {
-            const auto binary_hidden_size =
-                loco::must_cast<luci::CircleNode *>(fully_connected->input())
-                    ->dim(batch_rank)
-                    .value();
-            bcq_fc->weights_hidden_size(binary_hidden_size);
-            bcq_fc->input(bcq_input);
-            loco::replace(fully_connected).with(bcq_fc);
-          }
-          else
-          {
-            const auto binary_hidden_size =
-                loco::must_cast<luci::CircleNode *>(fully_connected->input())
-                    ->dim(1 + batch_rank)
-                    .value();
-            bcq_fc->weights_hidden_size(binary_hidden_size);
+          bcq_fc->weights_hidden_size(weights->dim(1).value());
+
+          auto perm = g->nodes()->create<luci::CircleConst>();
+          perm->dtype(loco::DataType::S32);
+          perm->size<loco::DataType::S32>(2);
+          perm->rank(1);
+          perm->dim(0) = 2;
+          perm->at<loco::DataType::S32>(0) = 1;
+          perm->at<loco::DataType::S32>(1) = 0;
+          perm->shape_status(luci::ShapeStatus::VALID);
+          perm->name(name + "/Transpose/perm");
+
+          auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+          luci::add_origin(input_transpose, luci::get_origin(fully_connected));
+          input_transpose->a(bcq_input);
+          input_transpose->perm(perm);
+          input_transpose->name(name + "_input/Transpose");
+
+          bcq_fc->input(input_transpose);
+
+          auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+          luci::add_origin(output_transpose, luci::get_origin(fully_connected));
+          output_transpose->a(bcq_fc);
+          output_transpose->perm(perm);
+          output_transpose->name(name + "_output/Transpose");
+
+          loco::replace(fully_connected).with(output_transpose);
+
+          return true;
+        }
+        else if (auto weights_as_input =
+                   dynamic_cast<luci::CircleConst *>(fully_connected->input()))
+        {
+          auto prefix = get_prefix_of_const(weights_as_input);
+          if (prefix == -1 || !is_valid_prefix(prefix))
+            continue;
+
+          assert(_do_w_x[prefix]->at<loco::DataType::BOOL>(0) == true);
+
+          auto name = weights_as_input->name();
+          assert(name.length() > 0);
+
+          auto perm = g->nodes()->create<luci::CircleConst>();
+          perm->dtype(loco::DataType::S32);
+          perm->size<loco::DataType::S32>(2);
+          perm->rank(1);
+          perm->dim(0) = 2;
+          perm->at<loco::DataType::S32>(0) = 1;
+          perm->at<loco::DataType::S32>(1) = 0;
+          perm->shape_status(luci::ShapeStatus::VALID);
+          perm->name(name + "/Transpose/perm");
+
+          auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+          luci::add_origin(input_transpose, luci::get_origin(fully_connected));
+          input_transpose->a(fully_connected->weights());
+          input_transpose->perm(perm);
+          input_transpose->name(name + "/Transpose");
 
-            auto perm = g->nodes()->create<luci::CircleConst>();
-            perm->dtype(loco::DataType::S32);
-            perm->size<loco::DataType::S32>(2);
-            perm->rank(1);
-            perm->dim(0) = 2;
-            perm->at<loco::DataType::S32>(0) = 1;
-            perm->at<loco::DataType::S32>(1) = 0;
-            perm->shape_status(luci::ShapeStatus::VALID);
+          auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+          luci::add_origin(bcq_fc, luci::get_origin(fully_connected));
 
-            auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
-            input_transpose->a(bcq_input);
-            input_transpose->perm(perm);
+          assert(dynamic_cast<luci::CircleOutputExclude *>(fully_connected->bias()) != nullptr);
 
-            bcq_fc->input(input_transpose);
+          bcq_fc->op_version(1);
+          bcq_fc->weights_scales(alpha(g, prefix));
+          bcq_fc->weights_binary(packed_binary_code(g, prefix));
+          bcq_fc->bias(fully_connected->bias());
+          bcq_fc->weights_clusters(packed_clusters(g, prefix));
+          bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
 
-            auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
-            output_transpose->a(bcq_fc);
-            output_transpose->perm(perm);
+          bcq_fc->weights_hidden_size(weights_as_input->dim(1).value());
+          bcq_fc->input(input_transpose);
+          bcq_fc->name(name + "/BCQFullyConnected");
 
-            loco::replace(fully_connected).with(output_transpose);
-          }
+          loco::replace(fully_connected).with(bcq_fc);
 
-          changed = true;
+          return true;
         }
       }
     }
 
-    if (changed)
-      clear_BCQ_nodes();
-
-    return changed;
+    return false;
   }
 
 private:
-  void add_BCQ_info_node(luci::CircleConst *node)
+  enum MetadataType
+  {
+    DO_W_X,
+    ALPHA,
+    BINARY_CODE,
+    NUM_OF_CLUSTERS,
+    SIZE_OF_CLUSTERS,
+    QBITS_OF_CLUSTERS,
+    FUSABLE_OP,
+    DEQUANT_WEIGHT,
+  };
+
+  void add_BCQ_info_node(int32_t prefix, MetadataType metadata_type, luci::CircleNode *node)
   {
-    const auto node_name = node->name();
-    const auto prefix = node_name_prefix(node_name);
-
-    // If bcqinfo_* nodes are held by Reshape operation,
-    // shape of bcqinfo_* nodes are copied to `shape` input of Reshape operation.
-    // Then the name becomes bcqinfo_*_copy_shape.
-    // We should prevent this node not to added to bcq information.
-    if (node_name.find("_copy_shape") != std::string::npos)
+    if (metadata_type == MetadataType::FUSABLE_OP)
+    {
+      _fusable_op[prefix] = node;
       return;
+    }
 
-    if (node_name.find("bcqinfo_do_w_x") != std::string::npos)
-      _do_w_x[prefix] = node;
-    else if (node_name.find("bcqinfo_alpha") != std::string::npos)
-      _alpha[prefix] = node;
-    else if (node_name.find("bcqinfo_packed_binary_code") != std::string::npos)
-      _packed_binary_code[prefix] = node;
-    else if (node_name.find("bcqinfo_number_of_clusters") != std::string::npos)
-      _number_of_clusters[prefix] = node;
-    else if (node_name.find("bcqinfo_size_of_clusters") != std::string::npos)
-      _size_of_clusters[prefix] = node;
-    else if (node_name.find("bcqinfo_qbits_of_clusters") != std::string::npos)
-      _qbits_of_clusters[prefix] = node;
-    else if (node_name.find("bcqinfo_dequant_weight") != std::string::npos)
-      _dequant_weight[prefix] = node;
-  }
+    luci::CircleConst *const_node;
 
-  bool has_BCQ_info(luci::CircleConst *node)
-  {
-    const auto prefix = node_name_prefix(node->name());
-    bool has_info = true;
-
-    has_info &= (_do_w_x.find(prefix) != _do_w_x.end());
-    has_info &= (_alpha.find(prefix) != _alpha.end());
-    has_info &= (_packed_binary_code.find(prefix) != _packed_binary_code.end());
-    has_info &= (_number_of_clusters.find(prefix) != _number_of_clusters.end());
-    has_info &= (_size_of_clusters.find(prefix) != _size_of_clusters.end());
-    has_info &= (_qbits_of_clusters.find(prefix) != _qbits_of_clusters.end());
-    // bcqinfo_dequant_weight is just for validation, so not always exists.
-
-    return has_info;
+    // Converter in TensorFlow v1.x sometimes generate Reshape op
+    if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+      const_node = loco::must_cast<luci::CircleConst *>(reshape->tensor());
+    else
+      const_node = loco::must_cast<luci::CircleConst *>(node);
+
+    if (metadata_type == MetadataType::DO_W_X)
+      _do_w_x[prefix] = const_node;
+    else if (metadata_type == MetadataType::ALPHA)
+      _alpha[prefix] = const_node;
+    else if (metadata_type == MetadataType::BINARY_CODE)
+      _packed_binary_code[prefix] = const_node;
+    else if (metadata_type == MetadataType::NUM_OF_CLUSTERS)
+      _number_of_clusters[prefix] = const_node;
+    else if (metadata_type == MetadataType::SIZE_OF_CLUSTERS)
+      _size_of_clusters[prefix] = const_node;
+    else if (metadata_type == MetadataType::QBITS_OF_CLUSTERS)
+      _qbits_of_clusters[prefix] = const_node;
+    else
+      _dequant_weight[prefix] = const_node;
   }
 
-  /**
-   * @brief Exclude BCQ information nodes which are used for fusing BCQ operations
-   *        from graph output by using CircleOutputExclude
-   */
-  void clear_BCQ_nodes()
+  int32_t get_prefix_of_const(luci::CircleConst *w_after)
   {
-    auto clear_nodes = [](std::map<std::string, luci::CircleConst *> &nodes) {
-      for (auto &n : nodes)
-      {
-        auto node = n.second;
+    for (auto n : _fusable_op)
+    {
+      auto prefix = n.first;
+      auto w_before = loco::must_cast<luci::CircleConst *>(n.second);
+      if (is_fusable_const(w_before, w_after, _do_w_x[prefix]->at<loco::DataType::BOOL>(0)))
+        return prefix;
+    }
 
-        for (auto s : loco::succs(node))
-        {
-          if (auto outnode = dynamic_cast<luci::CircleOutput *>(s))
-          {
-            outnode->from(createNoOp(node));
-          }
-          else if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(s))
-          {
-            for (auto o : loco::succs(reshape_node))
-            {
-              auto circle_output = loco::must_cast<luci::CircleOutput *>(o);
-              circle_output->from(createNoOp(reshape_node));
-            }
-          }
-        }
-      }
-    };
-
-    clear_nodes(_do_w_x);
-    clear_nodes(_alpha);
-    clear_nodes(_packed_binary_code);
-    clear_nodes(_number_of_clusters);
-    clear_nodes(_size_of_clusters);
-    clear_nodes(_qbits_of_clusters);
-    clear_nodes(_dequant_weight);
+    return -1;
   }
 
   bool is_bcqinfo_valid()
   {
-    // do_w_x should be int32 or bool type
+    LOGGER(l);
+
     for (auto n : _do_w_x)
     {
-      if (n.second->dtype() != loco::DataType::BOOL && n.second->dtype() != loco::DataType::S32)
+      // do_w_x should be BOOL type
+      if (n.second->dtype() != loco::DataType::BOOL)
+      {
+        WARN(l) << "FuseBCQPass : do_w_x has wrong type" << std::endl;
+        return false;
+      }
+    }
+
+    for (auto n : _alpha)
+    {
+      // alpha should be FLOAT32 type
+      if (n.second->dtype() != loco::DataType::FLOAT32)
+      {
+        WARN(l) << "FuseBCQPass : alpha has wrong type" << std::endl;
+        return false;
+      }
+    }
+
+    for (auto n : _packed_binary_code)
+    {
+      // packed_binary_code should be INT32 type
+      if (n.second->dtype() != loco::DataType::S32)
+      {
+        WARN(l) << "FuseBCQPass : packed_binary_code has wrong type" << std::endl;
+        return false;
+      }
+    }
+
+    for (auto n : _number_of_clusters)
+    {
+      // number_of_clusters should be INT32 type
+      if (n.second->dtype() != loco::DataType::S32)
+      {
+        WARN(l) << "FuseBCQPass : number_of_clusters has wrong type" << std::endl;
+        return false;
+      }
+    }
+
+    for (auto n : _size_of_clusters)
+    {
+      // size_of_clusters should be INT32 type
+      if (n.second->dtype() != loco::DataType::S32)
+      {
+        WARN(l) << "FuseBCQPass : size_of_clusters has wrong type" << std::endl;
+        return false;
+      }
+    }
+
+    for (auto n : _qbits_of_clusters)
+    {
+      // qbits_of_clusters should be INT32 type
+      if (n.second->dtype() != loco::DataType::S32)
+      {
+        WARN(l) << "FuseBCQPass : qbits_of_clusters has wrong type" << std::endl;
         return false;
+      }
     }
 
+    for (auto n : _fusable_op)
+    {
+      // fusable_op should be FLOAT32 type
+      if (n.second->dtype() != loco::DataType::FLOAT32)
+      {
+        WARN(l) << "FuseBCQPass : fusable_op has wrong type" << std::endl;
+        return false;
+      }
+    }
+
+    // As dequant_weight is not used for fusing, skip validation.
+
     return true;
   }
 
-private:
-  bool do_w_x(luci::CircleConst *node)
+  bool is_valid_prefix(int32_t prefix)
   {
-    const auto prefix = node_name_prefix(node->name());
+    LOGGER(l);
 
-    if (_do_w_x[prefix]->dtype() == loco::DataType::S32)
-      return _do_w_x[prefix]->at<loco::DataType::S32>(0) == 1;
-    else
-      return _do_w_x[prefix]->at<loco::DataType::BOOL>(0);
-  }
+    if (_do_w_x.find(prefix) == _do_w_x.end())
+    {
+      WARN(l) << "do_w_x is not found" << std::endl;
+      return false;
+    }
 
-  luci::CircleConst *get_alpha(luci::CircleConst *node)
-  {
-    const auto prefix = node_name_prefix(node->name());
-    return _alpha[prefix];
-  }
+    if (_alpha.find(prefix) == _alpha.end())
+    {
+      WARN(l) << "alpha is not found" << std::endl;
+      return false;
+    }
 
-  luci::CircleConst *get_packed_binary_code(luci::CircleConst *node)
-  {
-    const auto prefix = node_name_prefix(node->name());
-    return _packed_binary_code[prefix];
-  }
+    if (_packed_binary_code.find(prefix) == _packed_binary_code.end())
+    {
+      WARN(l) << "packed_binary_code is not found" << std::endl;
+      return false;
+    }
 
-  luci::CircleConst *get_number_of_clusters(luci::CircleConst *node)
-  {
-    const auto prefix = node_name_prefix(node->name());
-    return _number_of_clusters[prefix];
+    if (_number_of_clusters.find(prefix) == _number_of_clusters.end())
+    {
+      WARN(l) << "number_of_clusters is not found" << std::endl;
+      return false;
+    }
+
+    if (_size_of_clusters.find(prefix) == _size_of_clusters.end())
+    {
+      WARN(l) << "size_of_clusters is not found" << std::endl;
+      return false;
+    }
+
+    if (_qbits_of_clusters.find(prefix) == _qbits_of_clusters.end())
+    {
+      WARN(l) << "qbits_of_clusters is not found" << std::endl;
+      return false;
+    }
+
+    if (_fusable_op.find(prefix) == _fusable_op.end())
+    {
+      WARN(l) << "fusable_op is not found" << std::endl;
+      return false;
+    }
+
+    // As dequant_weight is not used for fusing, skip validation.
+
+    return true;
   }
 
-  luci::CircleConst *get_size_of_clusters(luci::CircleConst *node)
+private:
+  luci::CircleConst *alpha(loco::Graph *graph, int32_t prefix)
   {
-    const auto prefix = node_name_prefix(node->name());
-    return _size_of_clusters[prefix];
+    auto new_alpha = graph->nodes()->create<luci::CircleConst>();
+
+    new_alpha->dtype(loco::DataType::FLOAT32);
+    new_alpha->size<loco::DataType::FLOAT32>(_alpha[prefix]->size<loco::DataType::FLOAT32>());
+    new_alpha->rank(1);
+    new_alpha->dim(0) = _alpha[prefix]->dim(0);
+    for (uint32_t i = 0; i < _alpha[prefix]->size<loco::DataType::FLOAT32>(); ++i)
+      new_alpha->at<loco::DataType::FLOAT32>(i) = _alpha[prefix]->at<loco::DataType::FLOAT32>(i);
+    new_alpha->shape_status(luci::ShapeStatus::VALID);
+
+    return new_alpha;
   }
 
-  luci::CircleConst *get_qbits_of_clusters(luci::CircleConst *node)
+  luci::CircleConst *packed_binary_code(loco::Graph *graph, int32_t prefix)
   {
-    const auto prefix = node_name_prefix(node->name());
-    return _qbits_of_clusters[prefix];
+    auto new_beta = graph->nodes()->create<luci::CircleConst>();
+
+    new_beta->dtype(loco::DataType::S32);
+    new_beta->size<loco::DataType::S32>(_packed_binary_code[prefix]->size<loco::DataType::S32>());
+    new_beta->rank(2);
+    new_beta->dim(0) = _packed_binary_code[prefix]->dim(0);
+    new_beta->dim(1) = _packed_binary_code[prefix]->dim(1);
+    for (uint32_t i = 0; i < _packed_binary_code[prefix]->size<loco::DataType::S32>(); ++i)
+      new_beta->at<loco::DataType::S32>(i) =
+        _packed_binary_code[prefix]->at<loco::DataType::S32>(i);
+    new_beta->shape_status(luci::ShapeStatus::VALID);
+
+    return new_beta;
   }
 
-  luci::CircleConst *packed_clusters(luci::CircleConst *node)
+  luci::CircleConst *packed_clusters(loco::Graph *graph, int32_t prefix)
   {
-    auto graph = node->graph();
-    auto qbits_of_clusters = get_qbits_of_clusters(node);
-    auto size_of_clusters = get_size_of_clusters(node);
-    const auto number_of_clusters = get_number_of_clusters(node)->at<loco::DataType::S32>(0);
+    auto qbits_of_clusters = _qbits_of_clusters[prefix];
+    auto size_of_clusters = _size_of_clusters[prefix];
+    const auto number_of_clusters = _number_of_clusters[prefix]->at<loco::DataType::S32>(0);
 
     auto packed_clusters = graph->nodes()->create<luci::CircleConst>();
     packed_clusters->dtype(loco::DataType::S32);
@@ -409,22 +582,27 @@ private:
     for (int i = 0; i < number_of_clusters; ++i)
     {
       packed_clusters->at<loco::DataType::S32>(i * 2) =
-          qbits_of_clusters->at<loco::DataType::S32>(i);
+        qbits_of_clusters->at<loco::DataType::S32>(i);
       packed_clusters->at<loco::DataType::S32>(i * 2 + 1) =
-          size_of_clusters->at<loco::DataType::S32>(i);
+        size_of_clusters->at<loco::DataType::S32>(i);
     }
 
     return packed_clusters;
   }
 
 private:
-  std::map<std::string, luci::CircleConst *> _do_w_x;
-  std::map<std::string, luci::CircleConst *> _alpha;
-  std::map<std::string, luci::CircleConst *> _packed_binary_code;
-  std::map<std::string, luci::CircleConst *> _number_of_clusters;
-  std::map<std::string, luci::CircleConst *> _size_of_clusters;
-  std::map<std::string, luci::CircleConst *> _qbits_of_clusters;
-  std::map<std::string, luci::CircleConst *> _dequant_weight;
+  std::map<int32_t, luci::CircleConst *> _do_w_x;
+  std::map<int32_t, luci::CircleConst *> _alpha;
+  std::map<int32_t, luci::CircleConst *> _packed_binary_code;
+  std::map<int32_t, luci::CircleConst *> _number_of_clusters;
+  std::map<int32_t, luci::CircleConst *> _size_of_clusters;
+  std::map<int32_t, luci::CircleConst *> _qbits_of_clusters;
+  std::map<int32_t, luci::CircleConst *> _dequant_weight;
+  std::map<int32_t, luci::CircleNode *> _fusable_op;
+
+private:
+  int32_t _original_output_cnt = 0;
+  int32_t _bundle_cnt = 0;
 };
 
 } // namespace
@@ -432,45 +610,89 @@ private:
 namespace luci
 {
 
-bool FuseBCQPass::run(loco::Graph *g)
+bool FuseBCQPass::run(luci::Module *m)
 {
   bool changed = false;
 
-  // Find BCQ version information and check validity.
-  luci::CircleConst *version_node = nullptr;
-  for (auto node : loco::all_nodes(g))
+  const int32_t start_magicnum = -2e9 + 27;
+  const int32_t end_magicnum = 2e9 - 27;
+
+  loco::Graph *main_graph = m->graph(0);
+
+  luci::CircleConst *metadata_node = nullptr;
+  for (auto node : loco::output_nodes(main_graph))
   {
-    if (auto circle_const = dynamic_cast<luci::CircleConst *>(node))
+    auto output_node = loco::must_cast<luci::CircleOutput *>(node);
+
+    // Metadata node should be first output
+    if (output_node->index() != 0)
+      continue;
+
+    // Metadata should be constant and dtype should be S32
+    auto const_node = dynamic_cast<luci::CircleConst *>(output_node->from());
+    if (const_node == nullptr || const_node->dtype() != loco::DataType::S32)
+      continue;
+
+    // Metadata has at least four elements
+    const auto element_cnt = const_node->size<loco::DataType::S32>();
+    if (element_cnt < 4)
+      continue;
+
+    // Metadata has magic numbers at first and at last
+    const auto start_value = const_node->at<loco::DataType::S32>(0);
+    const auto end_value = const_node->at<loco::DataType::S32>(element_cnt - 1);
+    if (start_value == start_magicnum && end_value == end_magicnum)
     {
-      if (circle_const->name().find("/bcqinfo_version") != std::string::npos)
-      {
-        // There should be only one bcqinfo_version in the model
-        if (version_node != nullptr)
-        {
-          assert(false && "Multiple version information found");
-          return false;
-        }
-
-        version_node = circle_const;
-      }
+      metadata_node = const_node;
+      break;
     }
   }
 
-  // If version node is not found, regard it as version 1.
-  int32_t bcq_version = (version_node != nullptr) ? version_node->at<loco::DataType::S32>(0) : 1;
+  if (metadata_node != nullptr)
+  {
+    const auto bcq_version = metadata_node->at<loco::DataType::S32>(1);
+    const auto original_output_cnt = metadata_node->at<loco::DataType::S32>(2);
 
-  if (bcq_version == 1)
-    changed = BCQFuser<1>().fuseBCQ(g);
-  else
-    assert(false && "Not supported BCQ version");
+    if (bcq_version == 1)
+    {
+      const auto bundle_cnt = metadata_node->at<loco::DataType::S32>(3);
 
-  if (changed && version_node != nullptr)
-  {
-    // If BCQ is applied and version node was found, remove the node.
-    loco::replace(version_node).with(createNoOp(version_node));
+      BCQFuser<1> fuser{original_output_cnt, bundle_cnt};
+      fuser.register_bcq_info(main_graph);
+
+      for (size_t g = 0; g < m->size(); ++g)
+        if (fuser.fuseBCQ(m->graph(g)))
+          changed = true;
+    }
+    else
+    {
+      LOGGER(l);
+      WARN(l) << "Not supported BCQ version is found." << std::endl;
+    }
+
+    // Remove all of BCQ information nodes iff there is no change
+    if (changed == false)
+    {
+      for (auto node : loco::output_nodes(main_graph))
+      {
+        auto output_node = loco::must_cast<luci::CircleOutput *>(node);
+        if (output_node->index() == 0 || (int)output_node->index() > original_output_cnt)
+        {
+          auto noOp = main_graph->nodes()->create<luci::CircleOutputExclude>();
+          output_node->from(noOp);
+          changed = true;
+        }
+      }
+    }
   }
 
   return changed;
 }
 
+bool FuseBCQPass::run(loco::Graph *)
+{
+  // Do nothing for graph
+  return false;
+}
+
 } // namespace luci
diff --git a/compiler/luci/pass/src/FuseBCQPass.test.cpp b/compiler/luci/pass/src/FuseBCQPass.test.cpp
new file mode 100644
index 000000000..73677affd
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBCQPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBCQPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBCQPassTest, name)
+{
+  luci::FuseBCQPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseBatchNormWithConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithConvPass.cpp
new file mode 100644
index 000000000..062da7058
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithConvPass.cpp
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse Mul-Add to Conv2D if possible.
+ *
+ *  NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ *  BEFORE
+ *                  |   [CircleConst]
+ *                  |  / [CircleConst]
+ *                  | / /
+ *         [CircleConv2D] [CircleConst]
+ *                  |    /
+ *            [CircleMul] [CircleConst]
+ *                  |    /
+ *             [CircleAdd]
+ *                  |
+ *
+ *  AFTER
+ *                  |                  [CircleConst]
+ *                  +--------------+  / [CircleConst]
+ *                  |              | / /
+ *                  |     [CircleConv2D] [CircleConst]
+ *  [CircleConst]   |              |    /
+ * [CircleConst] \  |         [CircleMul] [CircleConst]
+ *              \ \ |              |     /
+ *           [CircleConv2D]   [CircleAdd]
+ *                  |
+ */
+bool fused_batch_norm_with_conv(luci::CircleAdd *add)
+{
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *shift = nullptr;
+  if (auto add_lhs = dynamic_cast<luci::CircleMul *>(add->x()))
+  {
+    mul = add_lhs;
+    shift = dynamic_cast<luci::CircleConst *>(add->y());
+  }
+  else if (auto add_rhs = dynamic_cast<luci::CircleMul *>(add->y()))
+  {
+    mul = add_rhs;
+    shift = dynamic_cast<luci::CircleConst *>(add->x());
+  }
+
+  // If CircleMul is not found or constant operand of CircleAdd is not found,
+  // this pass cannot be applied.
+  if (mul == nullptr || shift == nullptr)
+    return false;
+
+  // If FusedActivationFunction of mul is not none, this pass cannot be applied.
+  if (mul->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+
+  // To apply this pass, shape of shift should be [1, 1, 1, out_channel].
+  if (shift->rank() != 4)
+    return false;
+  for (uint32_t i = 0; i < 3; ++i)
+    if (shift->dim(i).value() != 1)
+      return false;
+
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleConst *scale = nullptr;
+  if (auto mul_lhs = dynamic_cast<luci::CircleConv2D *>(mul->x()))
+  {
+    conv = mul_lhs;
+    scale = dynamic_cast<luci::CircleConst *>(mul->y());
+  }
+  else if (auto mul_rhs = dynamic_cast<luci::CircleConv2D *>(mul->y()))
+  {
+    conv = mul_rhs;
+    scale = dynamic_cast<luci::CircleConst *>(mul->x());
+  }
+
+  // If CircleConv2D is not found or constant operand of CircleMul is not found,
+  // this pass cannot be applied.
+  if (conv == nullptr || scale == nullptr)
+    return false;
+
+  // To apply this pass, shape of scale should be [1, 1, 1, out_channel].
+  if (scale->rank() != 4)
+    return false;
+  for (uint32_t i = 0; i < 3; ++i)
+    if (scale->dim(i).value() != 1)
+      return false;
+
+  // If FusedActivationFunction of conv is not none, this pass cannot be applied.
+  if (conv->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+
+  luci::CircleConst *filter = dynamic_cast<luci::CircleConst *>(conv->filter());
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(conv->bias());
+
+  // If filter or bias of conv is not const, this pass cannot be applied.
+  if (filter == nullptr || bias == nullptr)
+    return false;
+
+  // If dtype of filter is different with scale and shift, multiplication may be impossible.
+  if (filter->dtype() != scale->dtype())
+    return false;
+  if (filter->dtype() != shift->dtype())
+    return false;
+
+  // TODO Support more data type
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // Output channel dimension should be same. If not, this pass cannot be applied.
+  if (filter->dim(0).value() != scale->dim(3).value())
+    return false;
+  if (filter->dim(0).value() != shift->dim(3).value())
+    return false;
+
+  auto name = add->name();
+  assert(name.length() > 0);
+
+  luci::CircleConv2D *fused_conv = add->graph()->nodes()->create<luci::CircleConv2D>();
+  luci::CircleConst *fused_filter = add->graph()->nodes()->create<luci::CircleConst>();
+  luci::CircleConst *fused_bias = add->graph()->nodes()->create<luci::CircleConst>();
+
+  uint32_t filter_out_channel = filter->dim(0).value();
+  uint32_t filter_height = filter->dim(1).value();
+  uint32_t filter_width = filter->dim(2).value();
+  uint32_t filter_in_channel = filter->dim(3).value();
+
+  // Copy filter
+  fused_filter->dtype(filter->dtype());
+  fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+  fused_filter->rank(4);
+  fused_filter->dim(0).set(filter_out_channel);
+  fused_filter->dim(1).set(filter_height);
+  fused_filter->dim(2).set(filter_width);
+  fused_filter->dim(3).set(filter_in_channel);
+  fused_filter->shape_status(luci::ShapeStatus::VALID);
+  fused_filter->name(name + "/Conv2D/filter");
+
+  // Fuse scale to new filter
+  for (uint32_t c = 0; c < filter_out_channel; c++)
+  {
+    for (uint32_t h = 0; h < filter_height; h++)
+    {
+      for (uint32_t w = 0; w < filter_width; w++)
+      {
+        for (uint32_t b = 0; b < filter_in_channel; b++)
+        {
+          uint32_t offset = c * filter_height * filter_width * filter_in_channel +
+                            h * filter_width * filter_in_channel + w * filter_in_channel + b;
+          fused_filter->at<loco::DataType::FLOAT32>(offset) =
+            filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(c);
+        }
+      }
+    }
+  }
+
+  // Copy bias
+  assert(bias->rank() == 1);
+  assert(bias->dim(0).value() == filter_out_channel);
+  fused_bias->dtype(bias->dtype());
+  fused_bias->size<loco::DataType::FLOAT32>(bias->size<loco::DataType::FLOAT32>());
+  fused_bias->rank(1);
+  fused_bias->dim(0).set(filter_out_channel);
+  fused_bias->shape_status(luci::ShapeStatus::VALID);
+  fused_bias->name(name + "/Conv2D/bias");
+
+  // Fuse scale and shift to bias
+  for (uint32_t b = 0; b < filter_out_channel; ++b)
+  {
+    fused_bias->at<loco::DataType::FLOAT32>(b) =
+      bias->at<loco::DataType::FLOAT32>(b) * scale->at<loco::DataType::FLOAT32>(b) +
+      shift->at<loco::DataType::FLOAT32>(b);
+  }
+
+  // Set attributes of fused_conv
+  fused_conv->input(conv->input());
+  fused_conv->filter(fused_filter);
+  fused_conv->bias(fused_bias);
+  fused_conv->fusedActivationFunction(add->fusedActivationFunction());
+  fused_conv->padding(conv->padding());
+  fused_conv->stride()->h(conv->stride()->h());
+  fused_conv->stride()->w(conv->stride()->w());
+  fused_conv->dilation()->h(conv->dilation()->h());
+  fused_conv->dilation()->w(conv->dilation()->w());
+  fused_conv->name(name + "/Conv2D");
+  luci::add_origin(fused_conv, luci::composite_origin({luci::get_origin(add), luci::get_origin(mul),
+                                                       luci::get_origin(conv)}));
+
+  replace(add).with(fused_conv);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+    {
+      if (fused_batch_norm_with_conv(add))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseBatchNormWithConvPass.test.cpp b/compiler/luci/pass/src/FuseBatchNormWithConvPass.test.cpp
new file mode 100644
index 000000000..96bc2bd35
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithConvPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithConvPassTest, name)
+{
+  luci::FuseBatchNormWithConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.cpp
new file mode 100644
index 000000000..64e8daa69
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.cpp
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse Mul-Add to DepthwiseConv2D if possible.
+ *
+ *  NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ *  BEFORE
+ *                     |   [CircleConst]
+ *                     |   / [CircleConst]
+ *                     |  / /
+ *    [CircleDepthwiseConv2D] [CircleConst]
+ *                     |     /
+ *                [CircleMul] [CircleConst]
+ *                     |     /
+ *                [CircleAdd]
+ *                     |
+ *
+ *  AFTER
+ *                     |                                          [CircleConst]
+ *                     +-------------------------------------+   / [CircleConst]
+ *                     |                                     |  / /
+ *                     |                    [CircleDepthwiseConv2D] [CircleConst]
+ *                     |    [CircleConst]                    |     /
+ *                     |   / [CircleConst]              [CircleMul] [CircleConst]
+ *                     |  / /                                |     /
+ *    [CircleDepthwiseConv2D]                           [CircleAdd]
+ *                     |
+ *
+ */
+
+/**
+ * @brief Check shape is [x] or [1, 1, 1, x]
+ */
+bool is_scale_shift_shape(luci::CircleConst *node)
+{
+  auto rank = node->rank();
+  if (rank != 1 && rank != 4)
+    return false;
+  for (uint32_t r = 0; r < rank - 1; ++r)
+  {
+    if (node->dim(r).value() != 1)
+      return false;
+  }
+  return true;
+}
+
+bool fused_batch_norm_with_dwconv(luci::CircleAdd *add)
+{
+  assert(add != nullptr);
+
+  // Find the pattern of CircleDepthwiseConv2D - CircleMul - CircleAdd
+  luci::CircleConst *scale = nullptr;
+  luci::CircleConst *shift = nullptr;
+  luci::CircleDepthwiseConv2D *dwconv = nullptr;
+  luci::CircleMul *mul = nullptr;
+  if (not luci::fill(&shift, &mul).with_commutative_args_of(add))
+    return false;
+  if (not luci::fill(&scale, &dwconv).with_commutative_args_of(mul))
+    return false;
+
+  // check scale and shift constant attributes
+  // scale and shift can be [x] or [1, 1, 1, x]
+  if (not is_scale_shift_shape(scale))
+    return false;
+  if (not is_scale_shift_shape(shift))
+    return false;
+
+  // check mul, add attributes
+  if (mul->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (mul->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // get weight of dwconv
+  auto filter = dynamic_cast<luci::CircleConst *>(dwconv->filter());
+  if (not filter)
+    return false;
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (filter->rank() != 4)
+    return false;
+
+  // check attributes of dwconv
+  if (dwconv->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+  if (dwconv->depthMultiplier() < 0) // can this happen?
+    return false;
+
+  // get bias of dwconv
+  auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias());
+  if (not bias)
+    return false;
+  if (bias->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (bias->rank() != 1)
+    return false;
+
+  // filter represents as [1, H, W, C*M] where M is multiplier.
+  auto filter_out_chn = filter->dim(3).value();
+  auto multiplier = static_cast<uint32_t>(dwconv->depthMultiplier());
+  auto srank = scale->rank(); // as rank can be 1 or 4
+  if (filter_out_chn != scale->dim(srank - 1).value() * multiplier)
+    return false;
+  srank = shift->rank();
+  if (filter_out_chn != shift->dim(srank - 1).value() * multiplier)
+    return false;
+  auto channel = filter_out_chn / multiplier;
+
+  auto name = add->name();
+  assert(name.length() > 0);
+
+  loco::Graph *graph = add->graph();
+  luci::CircleDepthwiseConv2D *fused_dwconv = graph->nodes()->create<luci::CircleDepthwiseConv2D>();
+  luci::CircleConst *fused_filter = graph->nodes()->create<luci::CircleConst>();
+  luci::CircleConst *fused_bias = graph->nodes()->create<luci::CircleConst>();
+
+  auto filter_in_chn = filter->dim(0).value();
+  auto filter_height = filter->dim(1).value();
+  auto filter_width = filter->dim(2).value();
+  assert(filter_in_chn == 1);
+
+  // Copy filter shape
+  fused_filter->dtype(filter->dtype());
+  fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+  fused_filter->rank(4);
+  fused_filter->dim(0).set(filter_in_chn);
+  fused_filter->dim(1).set(filter_height);
+  fused_filter->dim(2).set(filter_width);
+  fused_filter->dim(3).set(filter_out_chn);
+  fused_filter->shape_status(luci::ShapeStatus::VALID);
+  fused_filter->name(name + "/DepthwiseConv2D/filter");
+
+  // fused filter weight = filter weight * mul(scale) + add(shift)
+  for (uint32_t b = 0; b < filter_in_chn; b++)
+  {
+    for (uint32_t h = 0; h < filter_height; h++)
+    {
+      for (uint32_t w = 0; w < filter_width; w++)
+      {
+        for (uint32_t c = 0; c < filter_out_chn; c++)
+        {
+          uint32_t offset = b * filter_height * filter_width * filter_out_chn +
+                            h * filter_width * filter_out_chn + w * filter_out_chn + c;
+          uint32_t chn = c / multiplier;
+          fused_filter->at<loco::DataType::FLOAT32>(offset) =
+            filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(chn);
+        }
+      }
+    }
+  }
+
+  // Fuse bias with scale and shift
+  fused_bias->dtype(shift->dtype());
+  fused_bias->size<loco::DataType::FLOAT32>(shift->size<loco::DataType::FLOAT32>());
+  fused_bias->rank(1);
+  fused_bias->dim(0).set(channel);
+  fused_bias->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t c = 0; c < channel; ++c)
+  {
+    fused_bias->at<loco::DataType::FLOAT32>(c) =
+      bias->at<loco::DataType::FLOAT32>(c) * scale->at<loco::DataType::FLOAT32>(c) +
+      shift->at<loco::DataType::FLOAT32>(c);
+  }
+  fused_bias->name(name + "/DepthwiseConv2D/bias");
+
+  // set new tconv properties
+  fused_dwconv->input(dwconv->input());
+  fused_dwconv->filter(fused_filter);
+  fused_dwconv->bias(fused_bias);
+  fused_dwconv->fusedActivationFunction(add->fusedActivationFunction());
+  fused_dwconv->padding(dwconv->padding());
+  fused_dwconv->stride()->h(dwconv->stride()->h());
+  fused_dwconv->stride()->w(dwconv->stride()->w());
+  fused_dwconv->depthMultiplier(dwconv->depthMultiplier());
+  fused_dwconv->dilation()->h(dwconv->dilation()->h());
+  fused_dwconv->dilation()->w(dwconv->dilation()->w());
+  fused_dwconv->name(name + "/DepthwiseConv2D");
+  luci::add_origin(fused_dwconv,
+                   luci::composite_origin(
+                     {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(dwconv)}));
+
+  replace(add).with(fused_dwconv);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithDwConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+    {
+      if (fused_batch_norm_with_dwconv(add))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.test.cpp b/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.test.cpp
new file mode 100644
index 000000000..3030a7306
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithDwConvPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithDwConvPassTest, name)
+{
+  luci::FuseBatchNormWithDwConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp
deleted file mode 100644
index e39455b1a..000000000
--- a/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/FuseBatchNormWithTConv.h"
-
-#include <luci/IR/CircleNodes.h>
-
-namespace
-{
-/**
- *  NOTE TF's fusedBatchNorm is converted to mul and add of Circle.
- *
- *  BEFORE
- *
- *         [CircleTransposeConv]
- *                  |
- *                [mul]
- *                  |
- *                [add]
- *  AFTER
- *
- *         [CircleTransposeConv]
- */
-bool fused_batch_norm_with_tconv(luci::CircleTransposeConv *tconv)
-{
-  // check whether it has bias or not. This optimization works only if it doesn't.
-  auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias());
-  if (not bias)
-    return false;
-
-  // get weight of tconv
-  auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
-  if (not filter)
-    return false;
-  if (filter->dtype() != loco::DataType::FLOAT32)
-    return false;
-
-  // get mul node
-  auto tconv_output = loco::succs(tconv);
-  assert(tconv_output.size() == 1);
-  auto mul = dynamic_cast<luci::CircleMul *>(*tconv_output.begin());
-  if (not mul)
-    return false;
-  if (mul->dtype() != loco::DataType::FLOAT32)
-    return false;
-
-  // get add node
-  auto mul_output = loco::succs(mul);
-  assert(mul_output.size() == 1);
-  auto add = dynamic_cast<luci::CircleAdd *>(*mul_output.begin());
-  if (not add)
-    return false;
-  if (add->dtype() != loco::DataType::FLOAT32)
-    return false;
-  if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
-      add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
-    return false;
-
-  // get scale of batchnorm
-  auto scale = dynamic_cast<luci::CircleConst *>(mul->y());
-  if (not scale)
-    return false;
-
-  // scale dim(0) == tconv filter channel dim
-  if (filter->rank() != 4)
-    return false;
-  auto filter_channel_dim = filter->dim(3).value();
-  if (scale->rank() != 1)
-    return false;
-  auto scale_dim = scale->dim(0).value();
-  if (filter_channel_dim != scale_dim)
-    return false;
-
-  // get shift of batchnorm
-  auto shift = dynamic_cast<luci::CircleConst *>(add->y());
-  if (not shift)
-    return false;
-
-  // shift dim(0) == tconv filter channel dim
-  if (shift->rank() != 1)
-    return false;
-  auto shift_dim = shift->dim(0).value();
-  if (filter_channel_dim != shift_dim)
-    return false;
-
-  // filter weight = filter weight * mul(scale) + add(shift)
-  uint32_t filter_batch_dim = filter->dim(0).value();
-  uint32_t filter_height_dim = filter->dim(1).value();
-  uint32_t filter_width_dim = filter->dim(2).value();
-  for (uint32_t c = 0; c < filter_channel_dim; c++)
-  {
-    for (uint32_t n = 0; n < filter_batch_dim; n++)
-    {
-      for (uint32_t h = 0; h < filter_height_dim; h++)
-      {
-        for (uint32_t w = 0; w < filter_width_dim; w++)
-        {
-          uint32_t offset = n * filter_height_dim * filter_width_dim * filter_channel_dim +
-                            h * filter_width_dim * filter_channel_dim + w * filter_channel_dim + c;
-          filter->at<loco::DataType::FLOAT32>(offset) *= scale->at<loco::DataType::FLOAT32>(c);
-        }
-      }
-    }
-  }
-
-  // fuse shift with transposed conv
-  tconv->bias(shift);
-
-  if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
-  {
-    // separate relu op from add op
-    auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
-    relu->features(tconv);
-
-    // remove mul node
-    replace(add).with(relu);
-  }
-  else
-  {
-    replace(add).with(tconv);
-  }
-
-  return true;
-}
-
-} // namespace
-
-namespace luci
-{
-
-bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
-{
-  bool changed = false;
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
-  {
-    auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node);
-    if (not tconv)
-      continue;
-
-    changed |= fused_batch_norm_with_tconv(tconv);
-  }
-
-  return changed;
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
new file mode 100644
index 000000000..919ce6edc
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
+
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+template <class CIRCLENODE>
+void replace_with_relu(luci::CircleNode *target, luci::CircleNode *feature,
+                       const std::string &relu_name)
+{
+  assert(target != nullptr);
+  assert(feature != nullptr);
+
+  auto relu = target->graph()->nodes()->create<CIRCLENODE>();
+  relu->features(feature);
+  relu->name(relu_name);
+  luci::add_origin(relu, luci::get_origin(target));
+
+  replace(target).with(relu);
+}
+
+} // namespace
+
+namespace
+{
+/**
+ *  Fuse Mul-Add to TransposeConv if possible.
+ *
+ *  NOTE TF's BatchNormalization is converted to Mul and Add.
+ *
+ *  BEFORE
+ *                     |   [CircleConst]/[CircleOutputExclude]
+ *                     |   / [CircleConst]
+ *                     |  / /
+ *     [CircleTransposeConv]  [CircleConst]
+ *                     |     /
+ *                [CircleMul] [CircleConst]
+ *                     |     /
+ *                [CircleAdd]
+ *                     |
+ *
+ *  AFTER
+ *                     |                                         [CircleConst]/[CircleOutputExclude]
+ *                     +-------------------------------------+   / [CircleConst]
+ *                     |                                     |  / /
+ *                     |                     [CircleTransposeConv]  [CircleConst]
+ *                     |    [CircleConst]                    |     /
+ *                     |   / [CircleConst]              [CircleMul] [CircleConst]
+ *                     |  / /                                |     /
+ *     [CircleTransposeConv]                            [CircleAdd]
+ *                     |
+ *        ([CircleRelu]/[CircleRelu6])
+ *                     |
+ *
+ * Note: CircleRelu or CircleRelu6 is inserted if Add activation is ReLU/ReLU6
+ */
+bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
+{
+  assert(add != nullptr);
+
+  // Find the pattern of CircleTransposeConv - CircleMul - CircleAdd
+  luci::CircleConst *scale = nullptr;
+  luci::CircleConst *shift = nullptr;
+  luci::CircleTransposeConv *tconv = nullptr;
+  luci::CircleMul *mul = nullptr;
+  if (not luci::fill(&shift, &mul).with_commutative_args_of(add))
+    return false;
+  if (not luci::fill(&scale, &tconv).with_commutative_args_of(mul))
+    return false;
+  // skip if tconv has fused activation
+  if (tconv->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+
+  // check scale and shift constant attributes
+  // TODO maybe rank check is not needed
+  if (scale->rank() != 1 && scale->rank() != 4)
+    return false;
+  if (shift->rank() != 1 && shift->rank() != 4)
+    return false;
+  // check mul, add attributes
+  if (mul->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU6 &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU)
+    return false;
+
+  // tconv bias is optional
+  auto bias = dynamic_cast<luci::CircleConst *>(tconv->bias());
+
+  // get weight of tconv
+  auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
+  if (not filter)
+    return false;
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (filter->rank() != 4)
+    return false;
+
+  auto filter_out_chn = filter->dim(0).value();
+  // allow scale/shift and bias shape of [N], [1,1,1,N]; BN works for "channel-wise"
+  auto srank = scale->rank() - 1;
+  if (filter_out_chn != scale->dim(srank).value())
+    return false;
+  for (uint32_t d = 0; d < srank; ++d)
+  {
+    if (1 != scale->dim(d).value())
+      return false;
+  }
+  srank = shift->rank() - 1;
+  if (filter_out_chn != shift->dim(srank).value())
+    return false;
+  for (uint32_t d = 0; d < srank; ++d)
+  {
+    if (1 != shift->dim(d).value())
+      return false;
+  }
+  if (bias)
+  {
+    if (bias->dtype() != loco::DataType::FLOAT32)
+      return false;
+    srank = bias->rank() - 1;
+    if (filter_out_chn != bias->dim(srank).value())
+      return false;
+    for (uint32_t d = 0; d < srank; ++d)
+    {
+      if (1 != bias->dim(d).value())
+        return false;
+    }
+  }
+
+  auto name = add->name();
+  assert(name.length() > 0);
+
+  loco::Graph *graph = add->graph();
+  luci::CircleTransposeConv *fused_tconv = graph->nodes()->create<luci::CircleTransposeConv>();
+  luci::CircleConst *fused_filter = graph->nodes()->create<luci::CircleConst>();
+  luci::CircleConst *fused_bias = graph->nodes()->create<luci::CircleConst>();
+
+  auto filter_height = filter->dim(1).value();
+  auto filter_width = filter->dim(2).value();
+  auto filter_in_chn = filter->dim(3).value();
+
+  // Copy filter shape
+  fused_filter->dtype(filter->dtype());
+  fused_filter->size<loco::DataType::FLOAT32>(filter->size<loco::DataType::FLOAT32>());
+  fused_filter->rank(4);
+  fused_filter->dim(0).set(filter_out_chn);
+  fused_filter->dim(1).set(filter_height);
+  fused_filter->dim(2).set(filter_width);
+  fused_filter->dim(3).set(filter_in_chn);
+  fused_filter->shape_status(luci::ShapeStatus::VALID);
+  fused_filter->name(name + "/TransposeConv/filter");
+
+  // fused filter weight = filter weight * mul(scale) + add(shift)
+  for (uint32_t c = 0; c < filter_out_chn; c++)
+  {
+    for (uint32_t h = 0; h < filter_height; h++)
+    {
+      for (uint32_t w = 0; w < filter_width; w++)
+      {
+        for (uint32_t b = 0; b < filter_in_chn; b++)
+        {
+          uint32_t offset = c * filter_height * filter_width * filter_in_chn +
+                            h * filter_width * filter_in_chn + w * filter_in_chn + b;
+          fused_filter->at<loco::DataType::FLOAT32>(offset) =
+            filter->at<loco::DataType::FLOAT32>(offset) * scale->at<loco::DataType::FLOAT32>(c);
+        }
+      }
+    }
+  }
+
+  // Copy fused_bias from shift
+  fused_bias->dtype(shift->dtype());
+  fused_bias->size<loco::DataType::FLOAT32>(shift->size<loco::DataType::FLOAT32>());
+  fused_bias->rank(1);
+  fused_bias->dim(0).set(filter_out_chn);
+  fused_bias->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t c = 0; c < filter_out_chn; ++c)
+  {
+    fused_bias->at<loco::DataType::FLOAT32>(c) = shift->at<loco::DataType::FLOAT32>(c);
+    if (bias != nullptr)
+    {
+      fused_bias->at<loco::DataType::FLOAT32>(c) +=
+        bias->at<loco::DataType::FLOAT32>(c) * scale->at<loco::DataType::FLOAT32>(c);
+    }
+  }
+  fused_bias->name(name + "/TransposeConv/bias");
+
+  // set new tconv properties
+  fused_tconv->inputSizes(tconv->inputSizes());
+  fused_tconv->filter(fused_filter);
+  fused_tconv->outBackprop(tconv->outBackprop());
+  fused_tconv->bias(fused_bias);
+  fused_tconv->padding(tconv->padding());
+  fused_tconv->stride()->h(tconv->stride()->h());
+  fused_tconv->stride()->w(tconv->stride()->w());
+  fused_tconv->name(name + "/TransposeConv");
+  // TODO set activation from Add and remove adding following Relu/Relu6 Op
+  //      when all of our backends supports fused activation of TransposeConv
+  fused_tconv->fusedActivationFunction(luci::FusedActFunc::NONE);
+  luci::add_origin(fused_tconv,
+                   luci::composite_origin(
+                     {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(tconv)}));
+  if (bias != nullptr)
+  {
+    luci::add_origin(fused_tconv, luci::get_origin(bias));
+  }
+
+  switch (add->fusedActivationFunction())
+  {
+    case luci::FusedActFunc::RELU6:
+      replace_with_relu<luci::CircleRelu6>(add, fused_tconv, name + "/Relu6");
+      break;
+
+    case luci::FusedActFunc::RELU:
+      replace_with_relu<luci::CircleRelu>(add, fused_tconv, name + "/Relu");
+      break;
+
+    case luci::FusedActFunc::NONE:
+      replace(add).with(fused_tconv);
+      break;
+
+    default:
+      assert(false);
+      break;
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+    {
+      if (fused_batch_norm_with_tconv(add))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.test.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.test.cpp
new file mode 100644
index 000000000..051100dc9
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConvPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(FuseBatchNormWithTConvPassTest, name)
+{
+  luci::FuseBatchNormWithTConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/FuseGeluPass.cpp b/compiler/luci/pass/src/FuseGeluPass.cpp
new file mode 100644
index 000000000..e3e7cecb3
--- /dev/null
+++ b/compiler/luci/pass/src/FuseGeluPass.cpp
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseGeluPass.h"
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/CircleNodeClone.h>
+
+#include <cmath>
+
+#include <cassert>
+
+// Helper to fuse Gelu
+namespace
+{
+
+// Float comparison
+bool same(float a, float b) { return fabs(a - b) < 1e-5; }
+
+class GeluPatternBase
+{
+public:
+  GeluPatternBase(luci::CircleMul *candidate) { _pattern_last_node = candidate; }
+
+  virtual ~GeluPatternBase() = default;
+
+public:
+  virtual bool matched() = 0;
+
+public:
+  luci::CircleNode *_ifm = nullptr;
+  luci::CircleMul *_mul_sqrt = nullptr;
+  luci::CircleCustom *_erf = nullptr;
+  luci::CircleCustomOut *_erf_out = nullptr;
+  luci::CircleAdd *_add_one = nullptr;
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleMul *_mul_half = nullptr;
+  luci::CircleConst *_const_sqrt = nullptr;
+  luci::CircleConst *_const_one = nullptr;
+  luci::CircleConst *_const_half = nullptr;
+  luci::CircleMul *_pattern_last_node = nullptr;
+};
+
+/**
+ * Below diagram shows Gelu pattern to fuse.
+ * - Gelu(x) = 0.5 * x * (1.0 + erf(x / sqrt(2.0)))
+ * - the below pattern will be replaced with one Gelu
+ *
+ *           [In]
+ *            |
+ *            V
+ *     +---- ifm
+ *     |      |
+ *     |      V
+ *     |  mul_sqrt (1/sqrt(2) = 0.707106..)
+ *     |      |
+ *     |      V
+ *     |     erf
+ *     |      |
+ *     |      V
+ *     |   add_one (1.0)
+ *     |      |
+ *     |      V
+ *     +---> mul
+ *            |
+ *            V
+ *         mul_half (0.5)
+ *            |
+ *            V
+ *          [Out]
+ *
+ */
+class GeluPattern1 final : public GeluPatternBase
+{
+public:
+  GeluPattern1(luci::CircleMul *candidate) : GeluPatternBase(candidate)
+  {
+    assert(candidate);
+    _mul_half = candidate;
+  }
+
+public:
+  bool matched() override;
+};
+
+/**
+ * Below diagram shows Gelu pattern to fuse.
+ * - Gelu(x) = 0.5 * x * (1.0 + erf(x / sqrt(2.0)))
+ * - the below pattern will be replaced with one Gelu
+ *
+ *                  [In]
+ *                   |
+ *                   V
+ *     +----------- ifm
+ *     |             |
+ *     |             V
+ *     |          mul_sqrt (1/sqrt(2) = 0.707106..)
+ *     |             |
+ *     |             V
+ *     |            erf
+ * mul_half (0.5)    |
+ *     |             V
+ *     |         add_one (1.0)
+ *     |             |
+ *     |             V
+ *     +----------> mul
+ *                   |
+ *                   |
+ *                   V
+ *                 [Out]
+ *
+ */
+class GeluPattern2 final : public GeluPatternBase
+{
+public:
+  GeluPattern2(luci::CircleMul *candidate) : GeluPatternBase(candidate)
+  {
+    assert(candidate);
+    _mul = candidate;
+  }
+
+  ~GeluPattern2() override = default;
+
+public:
+  bool matched() override;
+};
+
+#define CHECK_OR_FALSE(condition) \
+  if (not(condition))             \
+    return false;
+
+bool GeluPattern1::matched()
+{
+  // check pattern
+  CHECK_OR_FALSE(luci::fill(&_mul, &_const_half).with_commutative_args_of(_mul_half));
+  CHECK_OR_FALSE(luci::fill(&_ifm, &_add_one).with_commutative_args_of(_mul));
+  CHECK_OR_FALSE(luci::fill(&_erf_out, &_const_one).with_commutative_args_of(_add_one));
+
+  if (auto erf = dynamic_cast<luci::CircleCustom *>(_erf_out->input()))
+    _erf = erf;
+
+  CHECK_OR_FALSE(_erf != nullptr);
+
+  // Check erf
+  CHECK_OR_FALSE(_erf->custom_code() == "Erf");
+  CHECK_OR_FALSE(_erf->numInputs() == 1);
+  CHECK_OR_FALSE(_erf->numOutputs() == 1);
+
+  if (auto mul_sqrt = dynamic_cast<luci::CircleMul *>(_erf->inputs(0)))
+    _mul_sqrt = mul_sqrt;
+
+  CHECK_OR_FALSE(_mul_sqrt != nullptr);
+
+  CHECK_OR_FALSE(luci::fill(&_ifm, &_const_sqrt).with_commutative_args_of(_mul_sqrt));
+
+  CHECK_OR_FALSE(_mul_sqrt->x() == _ifm);
+  CHECK_OR_FALSE(_mul->x() == _ifm);
+
+  // Check Activation to be NONE
+  CHECK_OR_FALSE(_mul_sqrt->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_add_one->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_mul->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_mul_half->fusedActivationFunction() == luci::FusedActFunc::NONE);
+
+  // check _const_sqrt condition
+  CHECK_OR_FALSE(_const_sqrt->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(_const_sqrt->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(::same(_const_sqrt->at<loco::DataType::FLOAT32>(0), sqrtf(0.5f)));
+
+  // check if _const_half is 0.5 (fp32)
+  CHECK_OR_FALSE(_const_half->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(_const_half->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(_const_half->at<loco::DataType::FLOAT32>(0) == 0.5);
+
+  // check _const_one condition
+  CHECK_OR_FALSE(_const_one->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(_const_one->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(_const_one->at<loco::DataType::FLOAT32>(0) == 1);
+
+  return true;
+}
+
+bool GeluPattern2::matched()
+{
+  // check pattern
+  CHECK_OR_FALSE(luci::fill(&_mul_half, &_add_one).with_commutative_args_of(_mul));
+  CHECK_OR_FALSE(luci::fill(&_ifm, &_const_half).with_commutative_args_of(_mul_half));
+  CHECK_OR_FALSE(luci::fill(&_erf_out, &_const_one).with_commutative_args_of(_add_one));
+
+  CHECK_OR_FALSE(_mul_half->x() == _ifm);
+
+  if (auto erf = dynamic_cast<luci::CircleCustom *>(_erf_out->input()))
+    _erf = erf;
+
+  CHECK_OR_FALSE(_erf != nullptr);
+
+  // Check erf
+  CHECK_OR_FALSE(_erf->custom_code() == "Erf");
+  CHECK_OR_FALSE(_erf->numInputs() == 1);
+  CHECK_OR_FALSE(_erf->numOutputs() == 1);
+
+  if (auto mul_sqrt = dynamic_cast<luci::CircleMul *>(_erf->inputs(0)))
+    _mul_sqrt = mul_sqrt;
+
+  CHECK_OR_FALSE(_mul_sqrt != nullptr);
+
+  CHECK_OR_FALSE(luci::fill(&_ifm, &_const_sqrt).with_commutative_args_of(_mul_sqrt));
+
+  CHECK_OR_FALSE(_mul_sqrt->x() == _ifm);
+
+  // Check Activation to be NONE
+  CHECK_OR_FALSE(_mul_sqrt->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_add_one->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_mul->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_mul_half->fusedActivationFunction() == luci::FusedActFunc::NONE);
+
+  // check _const_sqrt condition
+  CHECK_OR_FALSE(_const_sqrt->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(_const_sqrt->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(::same(_const_sqrt->at<loco::DataType::FLOAT32>(0), sqrtf(0.5f)));
+
+  // check if _const_half is 0.5 (fp32)
+  CHECK_OR_FALSE(_const_half->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(_const_half->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(_const_half->at<loco::DataType::FLOAT32>(0) == 0.5);
+
+  // check _const_one condition
+  CHECK_OR_FALSE(_const_one->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(_const_one->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(_const_one->at<loco::DataType::FLOAT32>(0) == 1);
+
+  return true;
+}
+
+#undef CHECK_OR_FALSE
+
+class FuseGelu final
+{
+public:
+  FuseGelu(const GeluPatternBase *p) : _p(p) {}
+
+public:
+  void apply(void);
+
+private:
+  luci::CircleGelu *create_gelu(loco::Graph *graph);
+
+private:
+  const GeluPatternBase *_p;
+};
+
+luci::CircleGelu *FuseGelu::create_gelu(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto gelu = graph->nodes()->create<luci::CircleGelu>();
+  gelu->features(_p->_ifm);
+  // TODO Support approximate = True pattern
+  gelu->approximate(false);
+  gelu->name(_p->_pattern_last_node->name() + "_gelu");
+  return gelu;
+}
+
+void FuseGelu::apply()
+{
+  auto graph = _p->_pattern_last_node->graph();
+
+  auto gelu = create_gelu(graph);
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(_p->_mul_sqrt), luci::get_origin(_p->_erf), luci::get_origin(_p->_add_one),
+    luci::get_origin(_p->_mul), luci::get_origin(_p->_mul_half)};
+
+  luci::add_origin(gelu, luci::composite_origin(origin_vec));
+
+  replace(_p->_pattern_last_node).with(gelu);
+}
+
+} // namespace
+
+namespace
+{
+
+bool fuse_gelu(luci::CircleMul *mul)
+{
+  assert(mul);
+
+  // check first pattern
+  GeluPattern1 pattern(mul);
+  if (pattern.matched())
+  {
+    FuseGelu fuse(&pattern);
+    fuse.apply();
+    return true;
+  }
+
+  // check second pattern
+  GeluPattern2 pattern2(mul);
+  if (pattern2.matched())
+  {
+    FuseGelu fuse(&pattern2);
+    fuse.apply();
+    return true;
+  }
+  return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseGeluPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto mul = dynamic_cast<luci::CircleMul *>(node);
+    if (not mul)
+      continue;
+
+    if (fuse_gelu(mul))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseGeluPass.test.cpp b/compiler/luci/pass/src/FuseGeluPass.test.cpp
new file mode 100644
index 000000000..db6f6993a
--- /dev/null
+++ b/compiler/luci/pass/src/FuseGeluPass.test.cpp
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseGeluPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <cmath>
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class GeluGraphlet
+{
+public:
+  GeluGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _ifm = g->nodes()->create<luci::CircleAbs>();
+    _mul_sqrt = g->nodes()->create<luci::CircleMul>();
+    _erf = g->nodes()->create<luci::CircleCustom>(1, 1);
+    _erf_out = g->nodes()->create<luci::CircleCustomOut>();
+    _add_one = g->nodes()->create<luci::CircleAdd>();
+    _mul = g->nodes()->create<luci::CircleMul>();
+    _mul_half = g->nodes()->create<luci::CircleMul>();
+    _const_sqrt = g->nodes()->create<luci::CircleConst>();
+    _const_one = g->nodes()->create<luci::CircleConst>();
+    _const_half = g->nodes()->create<luci::CircleConst>();
+
+    _mul->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _mul_sqrt->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _mul_half->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _add_one->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _ifm->name("ifm");
+    _mul_sqrt->name("mul_sqrt");
+    _erf->name("erf");
+    _erf_out->name("erf_out");
+    _add_one->name("add_one");
+    _mul->name("mul");
+    _mul_half->name("mul_half");
+    _const_one->name("const_one");
+    _const_sqrt->name("const_sqrt");
+    _const_half->name("const_half");
+
+    _erf->custom_code("Erf");
+
+    _const_sqrt->dtype(loco::DataType::FLOAT32);
+    _const_sqrt->size<loco::DataType::FLOAT32>(1);
+    _const_sqrt->shape({1});
+    _const_sqrt->at<loco::DataType::FLOAT32>(0) = sqrtf(0.5f);
+    _const_sqrt->shape_status(luci::ShapeStatus::VALID);
+
+    _const_one->dtype(loco::DataType::FLOAT32);
+    _const_one->size<loco::DataType::FLOAT32>(1);
+    _const_one->shape({1});
+    _const_one->at<loco::DataType::FLOAT32>(0) = 1.0;
+    _const_one->shape_status(luci::ShapeStatus::VALID);
+
+    _const_half->dtype(loco::DataType::FLOAT32);
+    _const_half->size<loco::DataType::FLOAT32>(1);
+    _const_half->shape({1});
+    _const_half->at<loco::DataType::FLOAT32>(0) = 0.5;
+    _const_half->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  void invalid_half() { _const_half->at<loco::DataType::FLOAT32>(0) = 0.1; }
+  void invalid_act() { _add_one->fusedActivationFunction(luci::FusedActFunc::RELU); }
+
+protected:
+  luci::CircleAbs *_ifm = nullptr;
+  luci::CircleMul *_mul_sqrt = nullptr;
+  luci::CircleCustom *_erf = nullptr;
+  luci::CircleCustomOut *_erf_out = nullptr;
+  luci::CircleAdd *_add_one = nullptr;
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleMul *_mul_half = nullptr;
+  luci::CircleConst *_const_sqrt = nullptr;
+  luci::CircleConst *_const_one = nullptr;
+  luci::CircleConst *_const_half = nullptr;
+};
+
+class FuseGeluTestGraph1 : public TestIOGraph, public GeluGraphlet
+{
+public:
+  FuseGeluTestGraph1() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    GeluGraphlet::init(g());
+
+    _ifm->x(input());
+    _mul_sqrt->x(_ifm);
+    _mul_sqrt->y(_const_sqrt);
+    _erf->inputs(0, _mul_sqrt);
+    _erf_out->input(_erf);
+    _add_one->x(_erf_out);
+    _add_one->y(_const_one);
+    _mul->x(_ifm);
+    _mul->y(_add_one);
+    _mul_half->x(_mul);
+    _mul_half->y(_const_half);
+
+    output()->from(_mul_half);
+  }
+};
+
+class FuseGeluTestGraph2 : public TestIOGraph, public GeluGraphlet
+{
+public:
+  FuseGeluTestGraph2() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    GeluGraphlet::init(g());
+
+    _ifm->x(input());
+    _mul_sqrt->x(_ifm);
+    _mul_sqrt->y(_const_sqrt);
+    _erf->inputs(0, _mul_sqrt);
+    _erf_out->input(_erf);
+    _add_one->x(_erf_out);
+    _add_one->y(_const_one);
+    _mul_half->x(_ifm);
+    _mul_half->y(_const_half);
+    _mul->x(_mul_half);
+    _mul->y(_add_one);
+
+    output()->from(_mul);
+  }
+};
+
+class FuseGeluTestNegGraph : public TestIOGraph, public GeluGraphlet
+{
+public:
+  FuseGeluTestNegGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    GeluGraphlet::init(g());
+
+    _ifm->x(input());
+    _mul_sqrt->x(_ifm);
+    // NOTE y is incorrect (should be _const_sqrt)
+    _mul_sqrt->y(_ifm);
+    _erf->inputs(0, _mul_sqrt);
+    _erf_out->input(_erf);
+    _add_one->x(_erf_out);
+    _add_one->y(_const_one);
+    _mul->x(_ifm);
+    _mul->y(_add_one);
+    _mul_half->x(_mul);
+    _mul_half->y(_const_half);
+
+    output()->from(_mul_half);
+  }
+};
+
+} // namespace
+
+TEST(FuseGeluPassTest, name)
+{
+  luci::FuseGeluPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(FuseGeluPassTest, fuse_pattern1)
+{
+  FuseGeluTestGraph1 g;
+  luci::FuseGeluPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+}
+
+TEST(FuseGeluPassTest, fuse_pattern2)
+{
+  FuseGeluTestGraph2 g;
+  luci::FuseGeluPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+}
+
+TEST(FuseGeluPassTest, fuse_invalid_half_NEG)
+{
+  FuseGeluTestNegGraph g;
+  luci::FuseGeluPass pass;
+
+  g.init();
+  g.invalid_half();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FuseGeluPassTest, fuse_pattern2_invalid_half_NEG)
+{
+  FuseGeluTestGraph2 g;
+  luci::FuseGeluPass pass;
+
+  g.init();
+  g.invalid_half();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FuseGeluPassTest, fuse_invalid_act_NEG)
+{
+  FuseGeluTestNegGraph g;
+  luci::FuseGeluPass pass;
+
+  g.init();
+  g.invalid_act();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FuseGeluPassTest, fuse_NEG)
+{
+  FuseGeluTestNegGraph g;
+  luci::FuseGeluPass pass;
+
+  g.init();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
index ad8765c41..10a651e35 100644
--- a/compiler/luci/pass/src/FuseInstanceNormPass.cpp
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
@@ -15,105 +15,17 @@
  */
 
 #include "luci/Pass/FuseInstanceNormPass.h"
+#include "helpers/NodeFiller.h"
 #include "FuseInstanceNormPassInternal.h"
 
 #include <luci/IR/CircleNodes.h>
 
-#include <loco/Service/ShapeInference.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/CircleNodeClone.h>
 
 #include <cassert>
 #include <set>
 
-// Helper to find commutative node's arguments
-namespace
-{
-
-/**
- * INTRODUCTION
- *         Binary operation f(x,y) is 'commutative' when
- *         f(x,y) == f(y,x) holds for all x, y.
- *         For examples, ADD, MUL and SQUARED_DIFFERENCE are commutative.
- *         These helpers make it easy to find commutative arguemnts of commtative node.
- *
- * HOW TO USE
- *         COMM_NODE *node;
- *         ARG_TYPE_1 *arg1;
- *         ARG_TYPE_2 *arg2;
- *
- *         bool ok = fill(&arg1, &arg2).with_commutative_args_of(node);
- *
- * Result
- *         If 'node's commutative argument types are actually {ARG_TYPE_1, ARG_TYPE_2}
- *         (as a set), 'arg1' and 'arg2' set as actual 'node's arguemnts with matching
- *         type, and return value 'ok' is true.
- *         Otherwise, 'arg1' and 'arg2' not changed, 'ok' is false.
- */
-
-template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final
-{
-public:
-  NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)
-  {
-    // DO NOTHING
-  }
-
-  /**
-   * @return true   When 'node's argument types are 'ARG_TYPE_1' and 'ARG_TYPE_2'
-   *                In such case, it assign '_arg_1' and '_arg_2' to actual arguments
-   *
-   * @return false  When 'node's argument types are NOT matched with 'ARG_TYPE_*'
-   *                In such case, it does not amend '_arg_1' and '_arg_2'
-   *
-   * @require       COMM_NODE has member x() and y()
-   */
-  template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
-
-private:
-  ARG_TYPE_1 **_arg_1;
-  ARG_TYPE_2 **_arg_2;
-};
-
-template <class ARG_TYPE_1, class ARG_TYPE_2>
-inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
-{
-  return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};
-}
-
-template <class ARG_TYPE_1, class ARG_TYPE_2>
-template <class COMM_NODE>
-bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)
-{
-  // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2
-  {
-    auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
-    auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
-
-    if (x && y)
-    {
-      *_arg_1 = x;
-      *_arg_2 = y;
-      return true;
-    }
-  }
-
-  // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1
-  {
-    auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());
-    auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());
-
-    if (x && y)
-    {
-      *_arg_1 = y;
-      *_arg_2 = x;
-      return true;
-    }
-  }
-
-  return false;
-}
-
-} // namespace
-
 // Helper to check detail
 
 /// @return true  When node has shape of '1 x .. x 1 x depth'
@@ -129,32 +41,15 @@ bool is_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth)
   return node->dim(axis).value() == depth;
 }
 
-/// @return true if node shape consists of ones, except the one before the last dim: 1,...1,depth,1
-bool is_quasi_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth)
-{
-  auto rank = node->rank();
-  // minimal accepted shape is [1 x depth x 1]
-  if (rank < 3)
-    return false;
-  const auto depth_axis = rank - 2;
-  for (uint32_t axis = 0; axis < rank; ++axis)
-  {
-    if (axis != depth_axis && node->dim(axis).value() != 1)
-      return false;
-  }
-  return node->dim(depth_axis).value() == depth;
-}
-
-bool is_instance_mean_v0(luci::CircleMean *mean)
+bool is_instance_mean_v1(luci::CircleMean *mean)
 {
   //
   // CHECK 1) input is rank 4
   //
-  auto input = mean->input();
-  if (not loco::shape_known(input))
+  auto input = loco::must_cast<luci::CircleNode *>(mean->input());
+  if (input->shape_status() != luci::ShapeStatus::VALID)
     return false;
-  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
-  if (input_shape.rank() != 4)
+  if (input->rank() != 4)
     return false;
 
   //
@@ -190,52 +85,22 @@ bool is_instance_mean_v0(luci::CircleMean *mean)
   return mean->keep_dims();
 }
 
-bool is_instance_mean_v1(luci::CircleMean *mean)
+/// @return true  When node has the shape of 1D channel_size
+bool is_1D_float32_const(const luci::CircleConst *node, uint32_t channel_size)
 {
-  //
-  // CHECK 1) input is rank 5 (NHWCX)
-  //
-  auto input = mean->input();
-  if (not loco::shape_known(input))
-    return false;
-  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
-  if (input_shape.rank() != 5)
+  if (node->rank() != 1)
     return false;
 
-  //
-  // CHECK 2) 'reduction indices' is CircleConst of value [1,2,4], that is HWX of NHWCX input shape
-  //
-  // TODO Support equivalent case, like [-3,-2]
-  // TODO Support non-Const case?
-  // TODO What if input is NCHW format in Circle?
-  auto red_indices = dynamic_cast<luci::CircleConst *>(mean->reduction_indices());
-  if (not red_indices)
-    return false;
-  if (red_indices->rank() != 1)
+  if (node->dim(0).value() != channel_size)
     return false;
-  std::set<int32_t> red_indices_set;
 
-  // TODO Currently only support S32, support other types
-  if (red_indices->dtype() != loco::DataType::S32)
+  if (node->dtype() != loco::DataType::FLOAT32)
     return false;
-  for (uint32_t i = 0; i < red_indices->dim(0).value(); ++i)
-    red_indices_set.insert(red_indices->at<loco::DataType::S32>(i));
 
-  if (red_indices_set.size() != 3)
-    return false;
-  if (red_indices_set.find(1) == red_indices_set.end())
-    return false;
-  if (red_indices_set.find(2) == red_indices_set.end())
-    return false;
-  if (red_indices_set.find(4) == red_indices_set.end())
+  if (node->size<loco::DataType::FLOAT32>() != channel_size)
     return false;
 
-  //
-  // CHECK 3) keep_dims == true (?)
-  //
-  // We only have case of 'keep_dims == true' so far, but it might be okay with 'keep_dims == false'
-  // TODO Check this fact, and if true, return true regardless of keep_dims
-  return mean->keep_dims();
+  return true;
 }
 
 // Helper to fuse Instance Norm
@@ -255,6 +120,7 @@ namespace
  *
  *    TODO support other semantically same patterns for instance norm
  *
+ * Version_1
  *                 [In]
  *                   |
  *                   V
@@ -289,33 +155,138 @@ namespace
  *         V
  *       [Out]
  *-------------------------------------------------------------------
+ * Version_2
+ *                          [In]
+ *                            |
+ *                            V
+ *      +----+-------------- ifm
+ *      |    |  (reduction    |
+ *      |    |   indicies)    |
+ *      |    |     |          |
+ *      |    V     V          |
+ *      |  mean_of_ifm        |
+ *      |       |             V
+ *      |       +------->  sqdiff   (reduction indicies)
+ *      V       |             |            |
+ *     sub <----+             V            |
+ *      |             mean_as_variance <---+  const_as_epsilon
+ *      |                     |                     |
+ *      |                     V                     |
+ *      |              add_as_variance <------------+
+ *      |                     |    (0.5)
+ *      |                     V      |
+ *      |                    pow <---+
+ *      |                     |
+ *      V                     |
+ *     div <------------------+    const_as_gamma
+ *      |                              |
+ *      V                              |
+ *    mul_gamma <----------------------+
+ *      |                const_as_beta
+ *      V                     |
+ *   add_as_terminal <--------+
+ *       |
+ *       V
+ *     [Out]
+ *-------------------------------------------------------------------
+ * Version_3
+ *                          [In]
+ *                            |
+ *                            V
+ *      +----+-------------- ifm ---+
+ *      |    |  (reduction    |     |  (reduction
+ *      |    |   indicies)    |     |   indicies)
+ *      |    |     |          |     |     |
+ *      |    V     V          |     V     V
+ *      |  mean_of_ifm        |   mean_of_ifm_2
+ *      |       |             |       |
+ *      V       |             V       |
+ *     sub <----+           sub_2 <---+
+ *      |                     |
+ *      |                     V
+ *      |                   square
+ *      |                     |   (reduction indicies)
+ *      |                     |            |
+ *      |                     V            |
+ *      |             mean_as_variance <---+
+ *      |                     |
+ *      |                     V
+ *      |                    sqrt    const_as_epsilon
+ *      |                     |            |
+ *      |                     V            |
+ *      |              add_as_variance <---+
+ *      |                     |
+ *      V                     |
+ *     div <------------------+    const_as_gamma
+ *      |                              |
+ *      V                              |
+ *    mul_gamma <----------------------+
+ *      |                const_as_beta
+ *      V                     |
+ *   add_as_terminal <--------+
+ *      |
+ *      V
+ *    [Out]
+ *-------------------------------------------------------------------
+ * Version_4
+ * - mul_gamma and add_as_terminal are removed for const_as_gamma = 1.0
+ *   and const_as_beta = 0.0
+ *                          [In]
+ *                            |
+ *                            V
+ *      +----+-------------- ifm ---+
+ *      |    |  (reduction    |     |  (reduction
+ *      |    |   indicies)    |     |   indicies)
+ *      |    |     |          |     |     |
+ *      |    V     V          |     V     V
+ *      |  mean_of_ifm        |   mean_of_ifm_2
+ *      |       |             |       |
+ *      V       |             V       |
+ *     sub <----+           sub_2 <---+
+ *      |                     |
+ *      |                     V
+ *      |                   square
+ *      |                     |   (reduction indicies)
+ *      |                     |            |
+ *      |                     V            |
+ *      |             mean_as_variance <---+
+ *      |                     |
+ *      |                     V
+ *      |                    sqrt    const_as_epsilon
+ *      |                     |            |
+ *      |                     V            |
+ *      |              add_as_variance <---+
+ *      |                     |
+ *      V                     |
+ *     div <------------------+
+ *      |
+ *      V
+ *    [Out]
+ *-------------------------------------------------------------------
+ * Version_5
  *                 [In]
  *                   |
  *                   V
- *                  ifm
- *                   |
- *                   V
- *     +---------reshape_of_ifm ----+   (reduction indicies)
- *     |             |              |    |
- *     |             |              V    V
- *     |             |       mean_of_reshape -------------+
- *     |             V       |                            |
- *     |           sqdiff <--+   (reduction indicies)     |
- *     |             |             |                      |
- *     |             V             |                      |
- *     |      mean_as_variance <---+  const_as_epsilon    |
- *     |             |                 |                  |
- *     |             V                 |                  |
- *     |      add_as_variance <--------+                  |
- *     |             |                                    |
- *     |             V                                    |
- *     |           rsqrt     const_as_gamma               |
- *     |             |          |                         |
- *     |             V          |                         |
- *     |           mul_gamma <--+                         |
- *     |            |      |                              |
- *     V            V      V                              |
- * mul_as_scaled_reshape   mul_as_scaled_mean <-----------+
+ *     +----------- ifm -----+   (reduction indicies)
+ *     |             |       |       |
+ *     |             |       V       V
+ *     |             |      mean_of_ifm ----------------+
+ *     |             V       |                          |
+ *     |           sqdiff <--+   (reduction indicies)   |
+ *     |             |             |                    |
+ *     |             V             |                    |
+ *     |      mean_as_variance <---+  const_as_epsilon  |
+ *     |             |                 |                |
+ *     |             V                 |                |
+ *     |      add_as_variance <--------+                |
+ *     |             |                                  |
+ *     |             V                                  |
+ *     |           rsqrt                                |
+ *     |             |                                  |
+ *     |          +--+--+                               |
+ *     |          |     |                               |
+ *     V          V     V                               |
+ * mul_as_scaled_ifm   mul_as_scaled_mean <-------------+
  *         |                   |
  *         |   const_as_beta   |
  *         |         |         V
@@ -324,9 +295,6 @@ namespace
  *  add_as_terminal <----------+
  *         |
  *         V
- *  reshape_as_terminal
- *         |
- *         V
  *       [Out]
  */
 class InstanceNormPattern final
@@ -334,8 +302,12 @@ class InstanceNormPattern final
 public:
   enum PatternVersion
   {
-    Version_0,
-    Version_1
+    Version_Unknown,
+    Version_1,
+    Version_2,
+    Version_3,
+    Version_4,
+    Version_5,
   };
 
   InstanceNormPattern(luci::CircleAdd *candidate, PatternVersion pv)
@@ -345,17 +317,35 @@ public:
     _pv = pv;
   }
 
+  InstanceNormPattern(luci::CircleDiv *candidate, PatternVersion pv)
+  {
+    assert(candidate);
+    div = candidate;
+    _pv = pv;
+  }
+
+private:
+  bool condition_common_1_5(uint32_t ifm_channel_depth);
+  bool condition_common_3_4();
+
+private:
+  template <enum PatternVersion> bool match();
+
 public:
   bool matched();
   bool matched() const { return _matched; }
 
+  PatternVersion version() const { return _pv; }
+
 public:
   // Context
   loco::Node *ifm = nullptr;
   luci::CircleReshape *reshape_of_ifm = nullptr;
   luci::CircleMean *mean_of_ifm = nullptr;
+  luci::CircleMean *mean_of_ifm_2 = nullptr;
   luci::CircleMean *mean_of_reshape = nullptr;
   luci::CircleSquaredDifference *sqdiff = nullptr;
+  luci::CircleSquare *square = nullptr;
   luci::CircleMean *mean_as_variance = nullptr;
   luci::CircleConst *const_as_epsilon = nullptr;
   luci::CircleAdd *add_as_variance = nullptr;
@@ -367,217 +357,760 @@ public:
   luci::CircleMul *mul_as_scaled_reshape = nullptr;
   luci::CircleConst *const_as_beta = nullptr;
   luci::CircleSub *sub = nullptr;
+  luci::CircleSub *sub_2 = nullptr;
   luci::CircleAdd *add_as_terminal = nullptr;
+  luci::CirclePow *pow = nullptr;
+  luci::CircleSqrt *sqrt = nullptr;
+  luci::CircleDiv *div = nullptr;
 
 private:
   bool _matched = false;
   PatternVersion _pv;
 };
 
-bool InstanceNormPattern::matched()
-{
-  if (_matched)
-    return true;
-
 #define CHECK_OR_FALSE(condition) \
   if (not(condition))             \
     return false;
 
-  // Check order is DFS
-
-  if (_pv == PatternVersion::Version_0)
-  {
-    CHECK_OR_FALSE(fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
-    CHECK_OR_FALSE(fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
-  }
-  if (_pv == PatternVersion::Version_1)
-  {
-    CHECK_OR_FALSE(fill(&mul_as_scaled_reshape, &sub).with_commutative_args_of(add_as_terminal));
-    CHECK_OR_FALSE(
-        fill(&reshape_of_ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_reshape));
-    ifm = reshape_of_ifm->tensor();
-  }
-
-  CHECK_OR_FALSE(loco::shape_known(ifm));
-  auto ifm_shape = loco::shape_get(ifm);
-  CHECK_OR_FALSE(ifm_shape.domain() == loco::Domain::Tensor);
-  auto ifm_tensor_shape = ifm_shape.as<loco::TensorShape>();
-  CHECK_OR_FALSE(ifm_tensor_shape.rank() == 4);
-  uint32_t ifm_channel_depth = ifm_tensor_shape.dim(3).value();
-
-  CHECK_OR_FALSE(fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
-
-  if (_pv == PatternVersion::Version_0)
-  {
-    CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
-  }
-  if (_pv == PatternVersion::Version_1)
-  {
-    CHECK_OR_FALSE(is_quasi_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
-  }
-
+bool InstanceNormPattern::condition_common_1_5(uint32_t ifm_channel_depth)
+{
   add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x());
   CHECK_OR_FALSE(add_as_variance);
 
   CHECK_OR_FALSE(
-      fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+    luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
 
   CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
   // TODO Support regarding broadcast
   CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
 
-  if (_pv == PatternVersion::Version_0)
-  {
-    CHECK_OR_FALSE(is_instance_mean_v0(mean_as_variance));
-  }
-  if (_pv == PatternVersion::Version_1)
-  {
-    CHECK_OR_FALSE(is_instance_mean_v1(mean_as_variance));
-  }
+  CHECK_OR_FALSE(is_instance_mean_v1(mean_as_variance));
 
   sqdiff = dynamic_cast<luci::CircleSquaredDifference *>(mean_as_variance->input());
   CHECK_OR_FALSE(sqdiff);
 
-  if (_pv == PatternVersion::Version_0)
-  {
-    loco::Node *ifm_should_be = nullptr;
-    CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
-    CHECK_OR_FALSE(ifm == ifm_should_be);
-    CHECK_OR_FALSE(is_instance_mean_v0(mean_of_ifm));
-    CHECK_OR_FALSE(ifm == mean_of_ifm->input());
-  }
-  if (_pv == PatternVersion::Version_1)
-  {
-    loco::Node *reshape_should_be = nullptr;
-    CHECK_OR_FALSE(fill(&reshape_should_be, &mean_of_reshape).with_commutative_args_of(sqdiff));
-    CHECK_OR_FALSE(reshape_of_ifm == reshape_should_be);
-    CHECK_OR_FALSE(is_instance_mean_v1(mean_of_reshape));
-    CHECK_OR_FALSE(reshape_of_ifm == mean_of_reshape->input());
-  }
+  loco::Node *ifm_should_be = nullptr;
+  CHECK_OR_FALSE(luci::fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
+  CHECK_OR_FALSE(ifm == ifm_should_be);
+  CHECK_OR_FALSE(is_instance_mean_v1(mean_of_ifm));
+  CHECK_OR_FALSE(ifm == mean_of_ifm->input());
 
   const_as_beta = dynamic_cast<luci::CircleConst *>(sub->x());
   CHECK_OR_FALSE(const_as_beta);
+  CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
 
-  if (_pv == PatternVersion::Version_0)
-  {
-    CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
-  }
-  if (_pv == PatternVersion::Version_1)
-  {
-    CHECK_OR_FALSE(is_quasi_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
-  }
+  return true;
+}
+
+bool InstanceNormPattern::condition_common_3_4()
+{
+  // check left sub
+  ifm = sub->x();
+  CHECK_OR_FALSE(ifm);
+
+  luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
+  CHECK_OR_FALSE(ifm_node->rank() == 4);
+  CHECK_OR_FALSE(ifm_node->dim(3).known());
+
+  mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
+  CHECK_OR_FALSE(mean_of_ifm);
+  CHECK_OR_FALSE(ifm == mean_of_ifm->input());
+
+  // continue search from add_as_variance
+  CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+  CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
+  // TODO Support regarding broadcast
+  CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
+
+  mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x());
+  CHECK_OR_FALSE(mean_as_variance);
+
+  square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input());
+  CHECK_OR_FALSE(square);
+
+  sub_2 = dynamic_cast<luci::CircleSub *>(square->x());
+  CHECK_OR_FALSE(sub_2);
+  CHECK_OR_FALSE(ifm == sub_2->x());
+
+  mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y());
+  CHECK_OR_FALSE(mean_of_ifm_2);
+  CHECK_OR_FALSE(ifm == mean_of_ifm_2->input());
+
+  loco::Node *ifm_should_be = nullptr;
+  luci::CircleMean *mean_of_ifm_2_should_be = nullptr;
+  CHECK_OR_FALSE(
+    luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2));
+  CHECK_OR_FALSE(ifm == ifm_should_be);
+  CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be);
+
+  return true;
+}
+
+template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_1>()
+{
+  CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+  CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+
+  auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm);
+  CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID);
+  CHECK_OR_FALSE(ifm_circle->rank() == 4);
+  CHECK_OR_FALSE(ifm_circle->dim(3).known());
+  uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
+
+  CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
+
+  CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
+
+  CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth));
+
+  luci::CircleMul *mul_gamma_should_be = nullptr;
+  luci::CircleMean *mean_of_ifm_should_be = nullptr;
 
   mul_as_scaled_mean = dynamic_cast<luci::CircleMul *>(sub->y());
   CHECK_OR_FALSE(mul_as_scaled_mean);
+  CHECK_OR_FALSE(luci::fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
+                   .with_commutative_args_of(mul_as_scaled_mean));
+  CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
+  CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
 
-  luci::CircleMul *mul_gamma_should_be = nullptr;
+  _matched = true;
+  return true;
+}
+
+template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_2>()
+{
+  CHECK_OR_FALSE(luci::fill(&mul_gamma, &const_as_beta).with_commutative_args_of(add_as_terminal));
+  CHECK_OR_FALSE(luci::fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma));
+
+  sub = dynamic_cast<luci::CircleSub *>(div->x());
+  CHECK_OR_FALSE(sub);
+
+  ifm = sub->x();
+  CHECK_OR_FALSE(ifm);
+
+  luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
+  CHECK_OR_FALSE(ifm_node->rank() == 4);
+  CHECK_OR_FALSE(ifm_node->dim(3).known());
+  uint32_t ifm_channel_depth = ifm_node->dim(3).value();
+
+  mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
+  CHECK_OR_FALSE(mean_of_ifm);
+
+  CHECK_OR_FALSE(ifm == mean_of_ifm->input());
+
+  pow = dynamic_cast<luci::CirclePow *>(div->y());
+  CHECK_OR_FALSE(pow);
+
+  add_as_variance = dynamic_cast<luci::CircleAdd *>(pow->x());
+  CHECK_OR_FALSE(add_as_variance);
+
+  luci::CircleConst *zero_point_five = dynamic_cast<luci::CircleConst *>(pow->y());
+  CHECK_OR_FALSE(zero_point_five);
+  CHECK_OR_FALSE(zero_point_five->dtype() == loco::DataType::FLOAT32);
+  // TODO Support regarding broadcast
+  CHECK_OR_FALSE(zero_point_five->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(zero_point_five->at<loco::DataType::FLOAT32>(0) == 0.5);
+
+  CHECK_OR_FALSE(
+    luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+  CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
+  // TODO Support regarding broadcast
+  CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
+
+  CHECK_OR_FALSE(is_instance_mean_v1(mean_as_variance));
+
+  sqdiff = dynamic_cast<luci::CircleSquaredDifference *>(mean_as_variance->input());
+  CHECK_OR_FALSE(sqdiff);
+
+  loco::Node *ifm_should_be = nullptr;
   luci::CircleMean *mean_of_ifm_should_be = nullptr;
-  luci::CircleMean *mean_of_reshape_should_be = nullptr;
+  CHECK_OR_FALSE(
+    luci::fill(&ifm_should_be, &mean_of_ifm_should_be).with_commutative_args_of(sqdiff));
+  CHECK_OR_FALSE(ifm == ifm_should_be);
+  CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
 
-  if (_pv == PatternVersion::Version_0)
-  {
-    CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
-                       .with_commutative_args_of(mul_as_scaled_mean));
-    CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
-    CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
-  }
-  if (_pv == PatternVersion::Version_1)
-  {
-    CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_reshape_should_be)
-                       .with_commutative_args_of(mul_as_scaled_mean));
-    CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
-    CHECK_OR_FALSE(mean_of_reshape == mean_of_reshape_should_be);
-  }
+  // Check for channel size
+  CHECK_OR_FALSE(is_1D_float32_const(const_as_gamma, ifm_channel_depth));
+  CHECK_OR_FALSE(is_1D_float32_const(const_as_beta, ifm_channel_depth));
+
+  _matched = true;
+  return true;
+}
+
+template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_3>()
+{
+  CHECK_OR_FALSE(luci::fill(&mul_gamma, &const_as_beta).with_commutative_args_of(add_as_terminal));
+  CHECK_OR_FALSE(luci::fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma));
+  CHECK_OR_FALSE(luci::fill(&sub, &add_as_variance).with_commutative_args_of(div));
+
+  CHECK_OR_FALSE(condition_common_3_4());
+
+  _matched = true;
+  return true;
+}
+
+luci::CircleConst *make_const_one(loco::Graph *graph, float value)
+{
+  auto const_one = graph->nodes()->create<luci::CircleConst>();
+  const_one->dtype(loco::DataType::FLOAT32);
+  const_one->rank(1);
+  const_one->size<loco::DataType::FLOAT32>(1);
+  const_one->at<loco::DataType::FLOAT32>(0) = value;
+  return const_one;
+}
+
+template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_4>()
+{
+  CHECK_OR_FALSE(div);
+  CHECK_OR_FALSE(luci::fill(&sub, &add_as_variance).with_commutative_args_of(div));
+
+  CHECK_OR_FALSE(condition_common_3_4());
+
+  assert(const_as_gamma == nullptr);
+  assert(const_as_beta == nullptr);
+  assert(mul_gamma == nullptr);
+  assert(add_as_terminal == nullptr);
+
+  // create 1.0 gamma and 0.0 beta
+  auto graph = div->graph();
+  const_as_gamma = make_const_one(graph, 1.0f);
+  const_as_beta = make_const_one(graph, 0.0f);
+  const_as_gamma->name(div->name() + "/gamma");
+  const_as_beta->name(div->name() + "/beta");
+
+  _matched = true;
+  return true;
+}
+
+template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_5>()
+{
+  CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+  CHECK_OR_FALSE(luci::fill(&ifm, &rsqrt).with_commutative_args_of(mul_as_scaled_ifm));
+
+  auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm);
+  CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID);
+  CHECK_OR_FALSE(ifm_circle->rank() == 4);
+  CHECK_OR_FALSE(ifm_circle->dim(3).known());
+  uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
+
+  CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth));
+
+  luci::CircleRsqrt *rsqrt_should_be = nullptr;
+  luci::CircleMean *mean_of_ifm_should_be = nullptr;
+
+  mul_as_scaled_mean = dynamic_cast<luci::CircleMul *>(sub->y());
+  CHECK_OR_FALSE(mul_as_scaled_mean);
+  CHECK_OR_FALSE(luci::fill(&rsqrt_should_be, &mean_of_ifm_should_be)
+                   .with_commutative_args_of(mul_as_scaled_mean));
+  CHECK_OR_FALSE(rsqrt == rsqrt_should_be);
+  CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
+
+  // mul_gamma is absent
+  // const_as_gamma assume to be 1.0
+  auto graph = add_as_terminal->graph();
+  const_as_gamma = make_const_one(graph, 1.0f);
+  const_as_gamma->name(add_as_terminal->name() + "/gamma");
 
-#undef CHECK_OR_FALSE
   _matched = true;
   return true;
 }
 
+bool InstanceNormPattern::matched()
+{
+  if (_matched)
+    return true;
+
+  // Check order is DFS
+
+  switch (_pv)
+  {
+    case PatternVersion::Version_1:
+      return match<PatternVersion::Version_1>();
+    case PatternVersion::Version_2:
+      return match<PatternVersion::Version_2>();
+    case PatternVersion::Version_3:
+      return match<PatternVersion::Version_3>();
+    case PatternVersion::Version_4:
+      return match<PatternVersion::Version_4>();
+    case PatternVersion::Version_5:
+      return match<PatternVersion::Version_5>();
+
+    default:
+      break;
+  }
+
+  throw std::runtime_error("Invalid InstanceNorm PatternVersion.");
+}
+
+#undef CHECK_OR_FALSE
+
 /**
  * Instance norm pattern would be fused like following diagram:
  *
- *    [In] --------------------------- CircleInstanceNorm --- [Out]
- *                                     / /
- *    const_as_gamma --- TFLReshape --- /
- *                                     /
- *    const_as_beta ---- TFLReshape ---
+ *    [In] -------------- CircleInstanceNorm --- [Out]
+ *                        / /
+ *    const_as_gamma ----  /
+ *                        /
+ *    const_as_beta -----
  *
  * Note
  *  - 'const_as_gamma' and 'const_as_beta' are from original graph
  *  - Value of 'const_as_epsilon' would be copied to CircleInstanceNorm's attribute
- *  - TFLReshape is added as CircleInstanceNorm only accept 1D tensor
+ *  - Two CircleConst shape is updated as CircleInstanceNorm only accept 1D tensor
  *  - 'CircleConst --- TFLReshape' is expected to be fused in constant folding for Reshape
  */
-void fuse_instance_norm(const InstanceNormPattern &p)
+
+class FuseInstanceNorm final
 {
-  assert(p.matched());
+public:
+  FuseInstanceNorm(const InstanceNormPattern &p) : _p(p) {}
 
-  auto graph = p.add_as_terminal->graph();
+public:
+  void apply(void);
 
-  // Make reshape for gamma & beta
-  auto reshape_gamma = graph->nodes()->create<luci::CircleReshape>();
-  auto reshape_beta = graph->nodes()->create<luci::CircleReshape>();
-  {
-    auto ifm_shape = loco::shape_get(p.ifm).as<loco::TensorShape>();
-    uint32_t ifm_channel_depth = ifm_shape.dim(3).value();
+private:
+  template <InstanceNormPattern::PatternVersion> void apply(void);
 
-    int32_t new_shape[1] = {static_cast<int32_t>(ifm_channel_depth)};
+private:
+  void reshape_gamma_beta(void);
+  luci::CircleInstanceNorm *create_inst_norm(loco::Graph *graph);
 
-    reshape_gamma->tensor(p.const_as_gamma);
-    reshape_beta->tensor(p.const_as_beta);
+private:
+  const InstanceNormPattern &_p;
+};
 
-    luci::set_new_shape(reshape_gamma, new_shape, 1);
-    luci::set_new_shape(reshape_beta, new_shape, 1);
+void FuseInstanceNorm::reshape_gamma_beta()
+{
+  // Version 1 and 3 need to reshape
+  {
+    _p.const_as_gamma->rank(1);
+    _p.const_as_gamma->dim(0).set(_p.const_as_gamma->size<loco::DataType::FLOAT32>());
+    _p.const_as_beta->rank(1);
+    _p.const_as_beta->dim(0).set(_p.const_as_beta->size<loco::DataType::FLOAT32>());
+
+    _p.const_as_gamma->shape_status(luci::ShapeStatus::UNDEFINED);
+    _p.const_as_beta->shape_status(luci::ShapeStatus::UNDEFINED);
   }
+}
 
+luci::CircleInstanceNorm *FuseInstanceNorm::create_inst_norm(loco::Graph *graph)
+{
   // Make Instance Norm to replace
   auto instance_norm = graph->nodes()->create<luci::CircleInstanceNorm>();
-  instance_norm->input(p.ifm);
-  instance_norm->gamma(reshape_gamma);
-  instance_norm->beta(reshape_beta);
-  float epsilon = p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);
+  instance_norm->input(_p.ifm);
+  instance_norm->gamma(_p.const_as_gamma);
+  instance_norm->beta(_p.const_as_beta);
+  float epsilon = _p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);
   instance_norm->epsilon(epsilon);
-  instance_norm->fusedActivationFunction(p.add_as_terminal->fusedActivationFunction());
+  if (_p.add_as_terminal != nullptr)
+  {
+    instance_norm->fusedActivationFunction(_p.add_as_terminal->fusedActivationFunction());
+    // NOTE unique name should be assigned in export
+    instance_norm->name("FusedInstanceNorm/" + _p.add_as_terminal->name());
+  }
+  else
+  {
+    // VERSION_4
+    assert(_p.div != nullptr);
+    instance_norm->fusedActivationFunction(_p.div->fusedActivationFunction());
+    instance_norm->name("FusedInstanceNorm/" + _p.div->name());
+  }
 
-  replace(p.add_as_terminal).with(instance_norm);
+  return instance_norm;
+}
+
+template <> void FuseInstanceNorm::apply<InstanceNormPattern::PatternVersion::Version_1>()
+{
+  auto graph = _p.add_as_terminal->graph();
+
+  reshape_gamma_beta();
+
+  auto instance_norm = create_inst_norm(graph);
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(_p.mean_of_ifm),
+    luci::get_origin(_p.sqdiff),
+    luci::get_origin(_p.mean_as_variance),
+    luci::get_origin(_p.add_as_variance),
+    luci::get_origin(_p.rsqrt),
+    luci::get_origin(_p.mul_gamma),
+    luci::get_origin(_p.mul_as_scaled_ifm),
+    luci::get_origin(_p.mul_as_scaled_mean),
+    luci::get_origin(_p.sub),
+    luci::get_origin(_p.add_as_terminal)};
+
+  luci::add_origin(instance_norm, luci::composite_origin(origin_vec));
+
+  replace(_p.add_as_terminal).with(instance_norm);
+}
+
+template <> void FuseInstanceNorm::apply<InstanceNormPattern::PatternVersion::Version_2>()
+{
+  auto graph = _p.add_as_terminal->graph();
+
+  auto instance_norm = create_inst_norm(graph);
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(_p.mean_of_ifm),
+    luci::get_origin(_p.sqdiff),
+    luci::get_origin(_p.mean_as_variance),
+    luci::get_origin(_p.add_as_variance),
+    luci::get_origin(_p.pow),
+    luci::get_origin(_p.sub),
+    luci::get_origin(_p.div),
+    luci::get_origin(_p.mul_gamma),
+    luci::get_origin(_p.add_as_terminal)};
+
+  luci::add_origin(instance_norm, luci::composite_origin(origin_vec));
+
+  replace(_p.add_as_terminal).with(instance_norm);
+}
+
+template <> void FuseInstanceNorm::apply<InstanceNormPattern::PatternVersion::Version_3>()
+{
+  auto graph = _p.add_as_terminal->graph();
+
+  reshape_gamma_beta();
+
+  auto instance_norm = create_inst_norm(graph);
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(_p.mean_of_ifm),
+    luci::get_origin(_p.sub),
+    luci::get_origin(_p.mean_of_ifm_2),
+    luci::get_origin(_p.sub_2),
+    luci::get_origin(_p.square),
+    luci::get_origin(_p.mean_as_variance),
+    luci::get_origin(_p.sqrt),
+    luci::get_origin(_p.add_as_variance),
+    luci::get_origin(_p.div),
+    luci::get_origin(_p.mul_gamma),
+    luci::get_origin(_p.add_as_terminal)};
+
+  luci::add_origin(instance_norm, luci::composite_origin(origin_vec));
+
+  replace(_p.add_as_terminal).with(instance_norm);
+}
+
+template <> void FuseInstanceNorm::apply<InstanceNormPattern::PatternVersion::Version_4>()
+{
+  auto graph = _p.div->graph();
+
+  auto instance_norm = create_inst_norm(graph);
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(_p.mean_of_ifm),
+    luci::get_origin(_p.sub),
+    luci::get_origin(_p.mean_of_ifm_2),
+    luci::get_origin(_p.sub_2),
+    luci::get_origin(_p.square),
+    luci::get_origin(_p.mean_as_variance),
+    luci::get_origin(_p.sqrt),
+    luci::get_origin(_p.add_as_variance),
+    luci::get_origin(_p.div)};
+
+  luci::add_origin(instance_norm, luci::composite_origin(origin_vec));
+
+  replace(_p.div).with(instance_norm);
+}
+
+template <> void FuseInstanceNorm::apply<InstanceNormPattern::PatternVersion::Version_5>()
+{
+  auto graph = _p.add_as_terminal->graph();
+
+  reshape_gamma_beta();
+
+  auto instance_norm = create_inst_norm(graph);
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(_p.mean_of_ifm),
+    luci::get_origin(_p.sqdiff),
+    luci::get_origin(_p.mean_as_variance),
+    luci::get_origin(_p.add_as_variance),
+    luci::get_origin(_p.rsqrt),
+    luci::get_origin(_p.mul_as_scaled_ifm),
+    luci::get_origin(_p.mul_as_scaled_mean),
+    luci::get_origin(_p.sub),
+    luci::get_origin(_p.add_as_terminal)};
+
+  luci::add_origin(instance_norm, luci::composite_origin(origin_vec));
+
+  replace(_p.add_as_terminal).with(instance_norm);
+}
+
+void FuseInstanceNorm::apply()
+{
+  assert(_p.matched());
+
+  switch (_p.version())
+  {
+    case InstanceNormPattern::PatternVersion::Version_1:
+      apply<InstanceNormPattern::PatternVersion::Version_1>();
+      break;
+    case InstanceNormPattern::PatternVersion::Version_2:
+      apply<InstanceNormPattern::PatternVersion::Version_2>();
+      break;
+    case InstanceNormPattern::PatternVersion::Version_3:
+      apply<InstanceNormPattern::PatternVersion::Version_3>();
+      break;
+    case InstanceNormPattern::PatternVersion::Version_4:
+      apply<InstanceNormPattern::PatternVersion::Version_4>();
+      break;
+    case InstanceNormPattern::PatternVersion::Version_5:
+      apply<InstanceNormPattern::PatternVersion::Version_5>();
+      break;
+
+    default:
+      break;
+  }
 }
 
 } // namespace
 
-namespace luci
+namespace
 {
 
-bool FuseInstanceNormPass::run(loco::Graph *g)
+class PostFusion final
+{
+public:
+  PostFusion(luci::CircleInstanceNorm *inst_norm) : _inst_norm(inst_norm) {}
+
+private:
+  uint32_t input_channel(void);
+
+  luci::CircleConst *match_const_channel(luci::CircleConst *, uint32_t);
+  bool match_const_gamma_channel(void);
+  bool match_const_beta_channel(void);
+
+public:
+  bool process(void);
+
+private:
+  luci::CircleInstanceNorm *_inst_norm = nullptr;
+};
+
+/**
+ * @brief return C value or 0 if shape status is not valid
+ */
+uint32_t PostFusion::input_channel(void)
+{
+  auto input = dynamic_cast<luci::CircleNode *>(_inst_norm->input());
+  if (input == nullptr)
+    return 0;
+  if (input->shape_status() != luci::ShapeStatus::VALID)
+    return 0;
+
+  auto input_rank = input->rank();
+  if (input_rank < 1)
+    return 0;
+
+  // assume channel-last
+  return input->dim(input_rank - 1).value();
+}
+
+/**
+ * @brief return new CircleConst with C channel if input_const channel != C
+ */
+luci::CircleConst *PostFusion::match_const_channel(luci::CircleConst *input_const, uint32_t C)
+{
+  luci::CircleConst *new_input_const = nullptr;
+
+  auto input_chn = input_const->dim(0).value();
+  if (input_chn == 1 && input_chn != C)
+  {
+    float value = input_const->at<loco::DataType::FLOAT32>(0);
+    auto clone = luci::clone_node(input_const, input_const->graph());
+
+    new_input_const = loco::must_cast<luci::CircleConst *>(clone);
+    new_input_const->rank(1);
+    new_input_const->dim(0).set(C);
+    new_input_const->size<loco::DataType::FLOAT32>(C);
+    for (uint32_t c = 0; c < C; ++c)
+      new_input_const->at<loco::DataType::FLOAT32>(c) = value;
+  }
+
+  return new_input_const;
+}
+
+/**
+ * @brief Broadcast gamma to match input channel if CircleConst
+ */
+bool PostFusion::match_const_gamma_channel(void)
+{
+  auto const_as_gamma = dynamic_cast<luci::CircleConst *>(_inst_norm->gamma());
+  if (const_as_gamma == nullptr)
+    return false;
+
+  auto C = input_channel();
+  if (C == 0)
+    return false;
+
+  auto new_const_as_gamma = match_const_channel(const_as_gamma, C);
+  if (new_const_as_gamma == nullptr)
+    return false;
+
+  _inst_norm->gamma(new_const_as_gamma);
+
+  return true;
+}
+
+/**
+ * @brief Broadcast beta to match input channel if CircleConst
+ */
+bool PostFusion::match_const_beta_channel(void)
+{
+  auto const_as_beta = dynamic_cast<luci::CircleConst *>(_inst_norm->beta());
+  if (const_as_beta == nullptr)
+    return false;
+
+  auto C = input_channel();
+  if (C == 0)
+    return false;
+
+  auto new_const_as_beta = match_const_channel(const_as_beta, C);
+  if (new_const_as_beta == nullptr)
+    return false;
+
+  _inst_norm->beta(new_const_as_beta);
+
+  return true;
+}
+
+bool PostFusion::process(void)
 {
   bool changed = false;
-  luci::CircleAdd *add;
-  InstanceNormPattern::PatternVersion pv;
 
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  if (match_const_gamma_channel())
+    changed = true;
+  if (match_const_beta_channel())
+    changed = true;
+
+  return changed;
+}
+
+} // namespace
+
+namespace
+{
+
+bool is_add_input_mul_const(luci::CircleAdd *add)
+{
+  luci::CircleMul *p_mul = nullptr;
+  luci::CircleConst *p_const = nullptr;
+
+  return luci::fill(&p_mul, &p_const).with_commutative_args_of(add);
+}
+
+bool fuse_instance_norm(luci::CircleAdd *add)
+{
+  InstanceNormPattern::PatternVersion pv = InstanceNormPattern::PatternVersion::Version_1;
+
+  if (is_add_input_mul_const(add))
+    pv = InstanceNormPattern::PatternVersion::Version_2;
+
+  InstanceNormPattern pattern(add, pv);
+  if (pattern.matched())
   {
-    auto reshape = dynamic_cast<luci::CircleReshape *>(node);
-    if (not reshape)
+    FuseInstanceNorm fuse(pattern);
+    fuse.apply();
+    return true;
+  }
+
+  if (pv == InstanceNormPattern::PatternVersion::Version_1)
+  {
+    // if Version_1 failed, try with Version_5
+    pv = InstanceNormPattern::PatternVersion::Version_5;
+    InstanceNormPattern pattern(add, pv);
+    if (pattern.matched())
     {
-      add = dynamic_cast<luci::CircleAdd *>(node);
-      if (not add)
-        continue;
-      pv = InstanceNormPattern::PatternVersion::Version_0;
+      FuseInstanceNorm fuse(pattern);
+      fuse.apply();
+      return true;
     }
-    else
+  }
+  else if (pv == InstanceNormPattern::PatternVersion::Version_2)
+  {
+    // if Version_2 failed, try with Version_3
+    pv = InstanceNormPattern::PatternVersion::Version_3;
+    InstanceNormPattern pattern(add, pv);
+    if (pattern.matched())
     {
-      add = dynamic_cast<luci::CircleAdd *>(reshape->tensor());
-      if (not add)
-        continue;
-      pv = InstanceNormPattern::PatternVersion::Version_1;
+      FuseInstanceNorm fuse(pattern);
+      fuse.apply();
+      return true;
     }
+  }
 
-    InstanceNormPattern pattern(add, pv);
-    if (not pattern.matched())
+  return false;
+}
+
+bool fuse_instance_norm(luci::CircleDiv *div)
+{
+  InstanceNormPattern::PatternVersion pv = InstanceNormPattern::PatternVersion::Version_4;
+
+  InstanceNormPattern pattern(div, pv);
+  if (pattern.matched())
+  {
+    FuseInstanceNorm fuse(pattern);
+    fuse.apply();
+    return true;
+  }
+
+  return false;
+}
+
+bool post_fusion(luci::CircleInstanceNorm *inst_norm)
+{
+  PostFusion postfusion(inst_norm);
+
+  return postfusion.process();
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseInstanceNormPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  // Check Version_1, Version_2, Version_3, Version_5
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto add = dynamic_cast<luci::CircleAdd *>(node);
+    if (not add)
       continue;
 
-    fuse_instance_norm(pattern);
-    changed = true;
+    if (fuse_instance_norm(add))
+      changed = true;
+  }
+
+  // Check Version_4(from DIV) if MUL-ADD pattern is not found
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto div = dynamic_cast<luci::CircleDiv *>(node);
+    if (not div)
+      continue;
+
+    if (fuse_instance_norm(div))
+      changed = true;
+  }
+
+  // Post processing of FuseInstanceNorm
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto inst_norm = dynamic_cast<luci::CircleInstanceNorm *>(node);
+    if (not inst_norm)
+      continue;
+
+    if (post_fusion(inst_norm))
+      changed = true;
   }
 
   return changed;
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp
index 3037f3def..83489fdea 100644
--- a/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp
@@ -16,6 +16,8 @@
 
 #include "FuseInstanceNormPassInternal.h"
 
+#include "luci/Pass/FuseInstanceNormPass.h"
+
 #include <vector>
 
 #include <gtest/gtest.h>
@@ -34,31 +36,9 @@ void setShape(luci::CircleNode &node, const std::vector<int> &v)
 
 } // namespace
 
-TEST(FuseInstanceNormPass, is_quasi_1D_with_dummy_dim)
+TEST(FuseInstanceNormPassTest, name)
 {
-  luci::CircleConst const_node;
-
-  setShape(const_node, {});
-  EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
-
-  setShape(const_node, {1});
-  EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
-
-  setShape(const_node, {8});
-  EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
-
-  setShape(const_node, {1, 2, 1, 8, 1});
-  EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
-
-  setShape(const_node, {8, 3});
-  EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
-
-  setShape(const_node, {8, 1});
-  EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
-
-  setShape(const_node, {1, 8, 1});
-  EXPECT_TRUE(is_quasi_1D_with_dummy_dim(&const_node, 8));
-
-  setShape(const_node, {1, 1, 1, 8, 1});
-  EXPECT_TRUE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+  luci::FuseInstanceNormPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
 }
diff --git a/compiler/luci/pass/src/FuseMeanWithMeanPass.cpp b/compiler/luci/pass/src/FuseMeanWithMeanPass.cpp
new file mode 100644
index 000000000..75113f94b
--- /dev/null
+++ b/compiler/luci/pass/src/FuseMeanWithMeanPass.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseMeanWithMeanPass.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse two Mean operations to one Mean operation with merged reduction indices
+ *
+ *  BEFORE
+ *                  |
+ *          [CircleMean, axis<1>]
+ *                  |
+ *          [CircleMean, axis<1>]
+ *                  |
+ *
+ *  AFTER
+ *                  |
+ *          [CircleMean, axis<1,2>]     [CircleMean, axis<1>]
+ *                  |                            |
+ *                                      [CircleMean, axis<1>]
+ *
+ */
+luci::CircleConst *create_fused_indices(luci::CircleConst *indices,
+                                        const std::set<uint32_t> &indices_set)
+{
+  auto name = indices->name();
+
+  auto fused_indices_const = indices->graph()->nodes()->create<luci::CircleConst>();
+  fused_indices_const->dtype(indices->dtype());
+  fused_indices_const->rank(1);
+  fused_indices_const->dim(0) = indices_set.size();
+  fused_indices_const->size<loco::DataType::S32>(indices_set.size());
+  fused_indices_const->shape_status(luci::ShapeStatus::VALID);
+  fused_indices_const->name(name);
+
+  auto curr_index = 0;
+  for (auto it = indices_set.begin(); it != indices_set.end(); it++)
+  {
+    fused_indices_const->at<loco::DataType::S32>(curr_index) = *it;
+    curr_index++;
+  }
+
+  return fused_indices_const;
+}
+
+bool fuse_mean_with_mean(luci::CircleMean *mean)
+{
+  // Get reduction indices of current CircleMean operation.
+  auto indices = dynamic_cast<luci::CircleConst *>(mean->reduction_indices());
+  if (not indices)
+    return false;
+  assert(indices->dtype() == loco::DataType::S32);
+
+  // Check whether previous node is CircleMean operation or not.
+  auto prev_mean = dynamic_cast<luci::CircleMean *>(mean->input());
+  if (not prev_mean)
+    return false;
+
+  // Check whether input rank of previous CircleMean operation is less 2 or not.
+  // This optimization works only if doesn't.
+  auto input = loco::must_cast<luci::CircleNode *>(prev_mean->input());
+  if (input->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+  auto input_rank = input->rank();
+  if (input_rank < 2)
+    return false;
+
+  // Check whether current CircleMean and previous CircleMean
+  // has the same keep_dims parameter or not.
+  // If it doesn't, keep the graph unchanged.
+  if (mean->keep_dims() != prev_mean->keep_dims())
+    return false;
+
+  // Get reduction indices of previous CircleMean operation.
+  auto prev_indices = dynamic_cast<luci::CircleConst *>(prev_mean->reduction_indices());
+  if (not prev_indices)
+    return false;
+  assert(prev_indices->dtype() == loco::DataType::S32);
+
+  // Get sizes of indices of current CircleMean operation and previous CircleMean operation.
+  auto indices_size = indices->size<loco::DataType::S32>();
+  auto prev_indices_size = prev_indices->size<loco::DataType::S32>();
+
+  // Get set of indices of previous CircleMean operation.
+  std::set<uint32_t> indices_set;
+  for (uint32_t i = 0; i < prev_indices_size; i++)
+  {
+    auto index = prev_indices->at<loco::DataType::S32>(i);
+    if (index < 0)
+      index += input_rank;
+    indices_set.insert(index);
+  }
+
+  // Get the vector of input indexes, that remained untouched
+  // after the current CircleMean operation.
+  std::vector<uint32_t> input_indices_vector;
+  for (uint32_t i = 0; i < input_rank; i++)
+  {
+    if (indices_set.find(i) == indices_set.end())
+      input_indices_vector.push_back(i);
+  }
+
+  // Get final set of merged indices.
+  for (uint32_t i = 0; i < indices_size; i++)
+  {
+    auto index = indices->at<loco::DataType::S32>(i);
+    if (index < 0)
+      index += input_rank;
+    indices_set.insert(input_indices_vector.at(index));
+  }
+
+  // Create merged indices.
+  auto fused_indices_const = create_fused_indices(indices, indices_set);
+
+  auto name = mean->name();
+  assert(name.length() > 0);
+
+  // Create and configure new CircleMean operation.
+  auto fused_mean = mean->graph()->nodes()->create<luci::CircleMean>();
+  fused_mean->reduction_indices(fused_indices_const);
+  fused_mean->input(prev_mean->input());
+  fused_mean->keep_dims(mean->keep_dims());
+  fused_mean->name(name + "/Mean");
+
+  // Replace old CircleMean operations with new CircleMean operation with merged indices.
+  replace(mean).with(fused_mean);
+  luci::add_origin(fused_mean,
+                   luci::composite_origin({luci::get_origin(mean), luci::get_origin(prev_mean)}));
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseMeanWithMeanPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto mean = dynamic_cast<luci::CircleMean *>(node);
+    if (not mean)
+      continue;
+
+    if (fuse_mean_with_mean(mean))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseMeanWithMeanPass.test.cpp b/compiler/luci/pass/src/FuseMeanWithMeanPass.test.cpp
new file mode 100644
index 000000000..30511e2d6
--- /dev/null
+++ b/compiler/luci/pass/src/FuseMeanWithMeanPass.test.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseMeanWithMeanPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *                  |
+ *          [CircleMean, axis<1>]
+ *                  |
+ *          [CircleMean, axis<1>]
+ *                  |
+ *
+ *  AFTER
+ *                  |
+ *          [CircleMean, axis<1,2>]     [CircleMean, axis<1>]
+ *                  |                            |
+ *                                      [CircleMean, axis<1>]
+ *
+ */
+class MeansGraphlet
+{
+public:
+  MeansGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _mean1 = g->nodes()->create<luci::CircleMean>();
+    _mean2 = g->nodes()->create<luci::CircleMean>();
+    _indices1 = g->nodes()->create<luci::CircleConst>();
+    _indices2 = g->nodes()->create<luci::CircleConst>();
+
+    _mean1->name("mean1");
+    _mean2->name("mean2");
+    _indices1->name("indices1");
+    _indices2->name("indices2");
+  }
+
+public:
+  luci::CircleMean *mean1() { return _mean1; }
+  luci::CircleMean *mean2() { return _mean2; }
+
+protected:
+  luci::CircleMean *_mean1 = nullptr;
+  luci::CircleMean *_mean2 = nullptr;
+  luci::CircleConst *_indices1 = nullptr;
+  luci::CircleConst *_indices2 = nullptr;
+};
+
+class FuseMeanWithMeanTestGraph : public TestIOGraph, public MeansGraphlet
+{
+public:
+  FuseMeanWithMeanTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1, 64, 20, 32}, {1, 20});
+    MeansGraphlet::init(g());
+
+    _indices1->rank(1);
+    _indices1->dtype(loco::DataType::S32);
+    _indices1->size<loco::DataType::S32>(1);
+    _indices1->at<loco::DataType::S32>(0) = static_cast<int32_t>(1);
+    _indices1->shape_status(luci::ShapeStatus::VALID);
+
+    _indices2->rank(1);
+    _indices2->dtype(loco::DataType::S32);
+    _indices2->size<loco::DataType::S32>(1);
+    _indices2->at<loco::DataType::S32>(0) = static_cast<int32_t>(2);
+    _indices2->shape_status(luci::ShapeStatus::VALID);
+
+    _mean1->input(input());
+    _mean1->reduction_indices(_indices1);
+
+    _mean2->input(_mean1);
+    _mean2->reduction_indices(_indices2);
+
+    output()->from(_mean2);
+  }
+};
+
+} // namespace
+
+TEST(FuseMeanWithMeanPassTest, name)
+{
+  luci::FuseMeanWithMeanPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(FuseMeanWithMeanPassTest, fuse_mean_with_mean)
+{
+  FuseMeanWithMeanTestGraph g;
+  luci::FuseMeanWithMeanPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+}
+
+TEST(FuseMeanWithMeanPassTest, fus_mean_with_mean_NEG)
+{
+  FuseMeanWithMeanTestGraph g;
+  luci::FuseMeanWithMeanPass pass;
+
+  g.init();
+
+  // Add CircleRelu operation between CircleMeans operations
+  auto relu = g.g()->nodes()->create<luci::CircleRelu>();
+  relu->name("relu");
+  relu->features(g.mean1());
+  g.mean2()->input(relu);
+
+  // Due to the CircleRelu operation, pass will not be applied
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/FusePReluPass.cpp b/compiler/luci/pass/src/FusePReluPass.cpp
new file mode 100644
index 000000000..a5ce60ebf
--- /dev/null
+++ b/compiler/luci/pass/src/FusePReluPass.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FusePReluPass.h"
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/CircleNodeClone.h>
+
+#include <cassert>
+
+// Helper to fuse PRelu
+namespace
+{
+
+/**
+ * Below diagram shows PRelu pattern to fuse.
+ * - this pattern will be replaced with one PRelu
+ *
+ *           [In]
+ *            |
+ *            V
+ *     +---- ifm ----+
+ *     |      |      |
+ *     |      |      V
+ *     |      |     abs
+ *     |      V      |
+ *     |     sub <---+
+ *     |      |
+ *     |      V
+ *     |   mul_alpha (alpha of PRelu)
+ *     |      |
+ *     V      V
+ *    relu mul_half (0.5)
+ *     |      |
+ *     |      V
+ *     +---> add
+ *            |
+ *            V
+ *          [Out]
+ *
+ */
+class PReluPattern final
+{
+public:
+  PReluPattern(luci::CircleAdd *candidate)
+  {
+    assert(candidate);
+    _add_ofm = candidate;
+  }
+
+public:
+  bool matched();
+
+public:
+  luci::CircleNode *_ifm = nullptr;
+  luci::CircleRelu *_relu = nullptr;
+  luci::CircleAbs *_abs = nullptr;
+  luci::CircleSub *_sub = nullptr;
+  luci::CircleMul *_mul_alpha = nullptr;
+  luci::CircleMul *_mul_half = nullptr;
+  luci::CircleAdd *_add_ofm = nullptr;
+  luci::CircleConst *_const_alpha = nullptr;
+  luci::CircleConst *_const_half = nullptr;
+};
+
+#define CHECK_OR_FALSE(condition) \
+  if (not(condition))             \
+    return false;
+
+bool PReluPattern::matched()
+{
+  // check pattern
+  CHECK_OR_FALSE(luci::fill(&_relu, &_mul_half).with_commutative_args_of(_add_ofm));
+  CHECK_OR_FALSE(luci::fill(&_mul_alpha, &_const_half).with_commutative_args_of(_mul_half));
+  CHECK_OR_FALSE(luci::fill(&_sub, &_const_alpha).with_commutative_args_of(_mul_alpha));
+
+  CHECK_OR_FALSE(luci::fill(&_ifm, &_abs).with_args_of(_sub));
+
+  CHECK_OR_FALSE(_relu->features() == _ifm);
+  CHECK_OR_FALSE(_abs->x() == _ifm);
+
+  // Check Activation to be NONE
+  CHECK_OR_FALSE(_sub->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_mul_alpha->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_mul_half->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_add_ofm->fusedActivationFunction() == luci::FusedActFunc::NONE);
+
+  // TODO support other types?
+  // check if _const_half is really FLOAT32 & 0.5
+  CHECK_OR_FALSE(_const_half->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(_const_half->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(_const_half->at<loco::DataType::FLOAT32>(0) == 0.5);
+
+  // check _const_alpha condition
+  CHECK_OR_FALSE(_const_alpha->dtype() == loco::DataType::FLOAT32);
+  // TODO add more if needed
+
+  return true;
+}
+
+#undef CHECK_OR_FALSE
+
+class FusePRelu final
+{
+public:
+  FusePRelu(const PReluPattern &p) : _p(p) {}
+
+public:
+  void apply(void);
+
+private:
+  luci::CirclePRelu *create_prelu(loco::Graph *graph);
+
+private:
+  const PReluPattern &_p;
+};
+
+luci::CirclePRelu *FusePRelu::create_prelu(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto prelu = graph->nodes()->create<luci::CirclePRelu>();
+  prelu->input(_p._ifm);
+  prelu->alpha(_p._const_alpha);
+  prelu->name(_p._add_ofm->name() + "_prelu");
+  return prelu;
+}
+
+void FusePRelu::apply()
+{
+  auto graph = _p._add_ofm->graph();
+
+  auto prelu = create_prelu(graph);
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(_p._relu),      luci::get_origin(_p._abs),      luci::get_origin(_p._sub),
+    luci::get_origin(_p._mul_alpha), luci::get_origin(_p._mul_half), luci::get_origin(_p._add_ofm)};
+
+  luci::add_origin(prelu, luci::composite_origin(origin_vec));
+
+  replace(_p._add_ofm).with(prelu);
+}
+
+} // namespace
+
+namespace
+{
+
+bool fuse_prelu(luci::CircleAdd *add)
+{
+  assert(add);
+
+  PReluPattern pattern(add);
+  if (pattern.matched())
+  {
+    FusePRelu fuse(pattern);
+    fuse.apply();
+    return true;
+  }
+  return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FusePReluPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto add = dynamic_cast<luci::CircleAdd *>(node);
+    if (not add)
+      continue;
+
+    if (fuse_prelu(add))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FusePReluPass.test.cpp b/compiler/luci/pass/src/FusePReluPass.test.cpp
new file mode 100644
index 000000000..209fe3911
--- /dev/null
+++ b/compiler/luci/pass/src/FusePReluPass.test.cpp
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FusePReluPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class PReluGraphlet
+{
+public:
+  PReluGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _abs = g->nodes()->create<luci::CircleAbs>();
+    _sub = g->nodes()->create<luci::CircleSub>();
+    _mul_alpha = g->nodes()->create<luci::CircleMul>();
+    _mul_half = g->nodes()->create<luci::CircleMul>();
+    _relu = g->nodes()->create<luci::CircleRelu>();
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _const_alpha = g->nodes()->create<luci::CircleConst>();
+    _const_half = g->nodes()->create<luci::CircleConst>();
+
+    _sub->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _mul_alpha->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _mul_half->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _abs->name("abs");
+    _sub->name("sub");
+    _mul_alpha->name("mul_alpha");
+    _mul_half->name("mul_half");
+    _relu->name("relu");
+    _add->name("add");
+    _const_alpha->name("const_alpha");
+    _const_half->name("const_half");
+
+    _const_alpha->dtype(loco::DataType::FLOAT32);
+    _const_alpha->size<loco::DataType::FLOAT32>(1);
+    _const_alpha->shape({1});
+    _const_alpha->at<loco::DataType::FLOAT32>(0) = 0.1;
+    _const_alpha->shape_status(luci::ShapeStatus::VALID);
+
+    _const_half->dtype(loco::DataType::FLOAT32);
+    _const_half->size<loco::DataType::FLOAT32>(1);
+    _const_half->shape({1});
+    _const_half->at<loco::DataType::FLOAT32>(0) = 0.5;
+    _const_half->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  void invalid_half() { _const_half->at<loco::DataType::FLOAT32>(0) = 0.1; }
+  void invalid_act() { _add->fusedActivationFunction(luci::FusedActFunc::RELU); }
+
+protected:
+  luci::CircleAbs *_abs = nullptr;
+  luci::CircleSub *_sub = nullptr;
+  luci::CircleMul *_mul_alpha = nullptr;
+  luci::CircleMul *_mul_half = nullptr;
+  luci::CircleRelu *_relu = nullptr;
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_const_alpha = nullptr;
+  luci::CircleConst *_const_half = nullptr;
+};
+
+class FusePReluTestGraph : public TestIOGraph, public PReluGraphlet
+{
+public:
+  FusePReluTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    PReluGraphlet::init(g());
+
+    _relu->features(input());
+    _abs->x(input());
+    _sub->x(input());
+    _sub->y(_abs);
+    _mul_alpha->x(_sub);
+    _mul_alpha->y(_const_alpha);
+    _mul_half->x(_mul_alpha);
+    _mul_half->y(_const_half);
+    _add->x(_relu);
+    _add->y(_mul_half);
+
+    output()->from(_add);
+  }
+};
+
+class FusePReluTestNegGraph : public TestIOGraph, public PReluGraphlet
+{
+public:
+  FusePReluTestNegGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    PReluGraphlet::init(g());
+
+    _relu->features(input());
+    _abs->x(input());
+    // NOTE x and y are incorrect
+    _sub->x(_abs);
+    _sub->y(input());
+    _mul_alpha->x(_sub);
+    _mul_alpha->y(_const_alpha);
+    _mul_half->x(_mul_alpha);
+    _mul_half->y(_const_half);
+    _add->x(_relu);
+    _add->y(_mul_half);
+
+    output()->from(_add);
+  }
+};
+
+} // namespace
+
+TEST(FusePReluPassTest, name)
+{
+  luci::FusePReluPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(FusePReluPassTest, fuse)
+{
+  FusePReluTestGraph g;
+  luci::FusePReluPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_invalid_half_NEG)
+{
+  FusePReluTestNegGraph g;
+  luci::FusePReluPass pass;
+
+  g.init();
+  g.invalid_half();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_invalid_act_NEG)
+{
+  FusePReluTestNegGraph g;
+  luci::FusePReluPass pass;
+
+  g.init();
+  g.invalid_act();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_NEG)
+{
+  FusePReluTestNegGraph g;
+  luci::FusePReluPass pass;
+
+  g.init();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/FusePreActivationBatchNormPass.cpp b/compiler/luci/pass/src/FusePreActivationBatchNormPass.cpp
new file mode 100644
index 000000000..469fcddbb
--- /dev/null
+++ b/compiler/luci/pass/src/FusePreActivationBatchNormPass.cpp
@@ -0,0 +1,626 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FusePreActivationBatchNormPass.h"
+#include "FusePreActivationBatchNormPassInternal.h"
+#include "BatchNormPatternFinder.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+// Check if all elements are non-negative
+bool is_non_negative(const luci::CircleConst *node)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    if (node->at<loco::DataType::FLOAT32>(i) < 0)
+      return false;
+  }
+  return true;
+}
+
+const luci::CircleConv2D *get_forward_conv2d(const luci::CircleNode *node, uint32_t channel_size)
+{
+  auto opcode = node->opcode();
+  if (opcode == luci::CircleOpcode::CONV_2D)
+  {
+    auto conv = loco::must_cast<const luci::CircleConv2D *>(node);
+    auto filter = dynamic_cast<luci::CircleConst *>(conv->filter());
+    if (filter == nullptr)
+      return nullptr;
+
+    if (filter->rank() != 4)
+      return nullptr;
+
+    if (filter->dim(3).value() != channel_size)
+      return nullptr;
+
+    if (loco::succs(filter).size() != 1)
+      return nullptr;
+
+    return conv;
+  }
+  // MUL can be fused with CONV across MEAN
+  // i.e., MUL-MEAN-CONV -> MEAN-CONV
+  // This is for handling the last part of ResNetV2
+  else if (opcode == luci::CircleOpcode::MEAN)
+  {
+    auto mean = loco::must_cast<const luci::CircleMean *>(node);
+    auto axis = mean->reduction_indices();
+    auto axis_const = dynamic_cast<luci::CircleConst *>(axis);
+    if (not axis_const)
+      return nullptr;
+
+    assert(axis_const->dtype() == loco::DataType::S32);
+    auto axis_size = axis_const->size<loco::DataType::S32>();
+    for (uint32_t i = 0; i < axis_size; ++i)
+    {
+      // Reduction axis must not be the channel index
+      // Assumption: Layout is channel-last
+      if (axis_const->at<loco::DataType::S32>(i) == static_cast<int32_t>(node->rank() - 1))
+        return nullptr;
+    }
+
+    auto succ = loco::succs(node);
+    if (succ.size() != 1)
+      return nullptr;
+
+    auto succ_node = loco::must_cast<luci::CircleNode *>(*succ.begin());
+
+    return get_forward_conv2d(succ_node, channel_size);
+  }
+  else
+  {
+    return nullptr;
+  }
+}
+
+void update_conv_weights_with_gamma(const luci::CircleConv2D *conv, const luci::CircleConst *gamma)
+{
+  assert(conv != nullptr);
+  assert(gamma != nullptr);
+  auto filter = loco::must_cast<luci::CircleConst *>(conv->filter());
+
+  uint32_t filter_out_dim = filter->dim(0).value();
+  uint32_t filter_height_dim = filter->dim(1).value();
+  uint32_t filter_width_dim = filter->dim(2).value();
+  uint32_t filter_in_dim = filter->dim(3).value();
+  for (uint32_t o = 0; o < filter_out_dim; o++)
+  {
+    for (uint32_t h = 0; h < filter_height_dim; h++)
+    {
+      for (uint32_t w = 0; w < filter_width_dim; w++)
+      {
+        for (uint32_t i = 0; i < filter_in_dim; i++)
+        {
+          uint32_t offset = o * filter_height_dim * filter_width_dim * filter_in_dim +
+                            h * filter_width_dim * filter_in_dim + w * filter_in_dim + i;
+          filter->at<loco::DataType::FLOAT32>(offset) *= gamma->at<loco::DataType::FLOAT32>(i);
+        }
+      }
+    }
+  }
+}
+
+// Find CONV_2D that can be fused with ADD
+luci::CircleConv2D *get_backward_conv2d(luci::CircleNode *node, uint32_t channel_size)
+{
+  // Stop searching when meeting a node used by multiple nodes
+  if (loco::succs(node).size() != 1)
+    return nullptr;
+
+  auto opcode = node->opcode();
+  if (opcode == luci::CircleOpcode::CONV_2D)
+  {
+    auto conv = loco::must_cast<luci::CircleConv2D *>(node);
+    auto filter = dynamic_cast<luci::CircleConst *>(conv->filter());
+
+    if (filter == nullptr)
+      return nullptr;
+
+    if (filter->rank() != 4)
+      return nullptr;
+
+    if (filter->dim(0).value() != channel_size)
+      return nullptr;
+
+    if (loco::succs(filter).size() != 1)
+      return nullptr;
+
+    return conv;
+  }
+  else if (opcode == luci::CircleOpcode::MAX_POOL_2D || opcode == luci::CircleOpcode::PAD ||
+           opcode == luci::CircleOpcode::ADD)
+  {
+    auto preds = loco::preds(node);
+    for (auto pred : preds)
+    {
+      auto pred_conv = get_backward_conv2d(loco::must_cast<luci::CircleNode *>(pred), channel_size);
+      if (pred_conv != nullptr)
+        return pred_conv;
+    }
+    return nullptr;
+  }
+  else
+  {
+    return nullptr;
+  }
+}
+
+bool update_conv_bias_with_beta(luci::CircleConv2D *conv, const luci::CircleConst *beta,
+                                bool add_beta)
+{
+  assert(beta->rank() == 1);
+  auto size = beta->dim(0).value();
+  auto bias = dynamic_cast<luci::CircleConst *>(conv->bias());
+
+  auto name = conv->name();
+  assert(name.length() > 0);
+
+  if (bias == nullptr)
+  {
+    bias = conv->graph()->nodes()->create<luci::CircleConst>();
+    bias->dtype(loco::DataType::FLOAT32);
+    bias->rank(1);
+    bias->dim(0).set(size);
+    bias->size<loco::DataType::FLOAT32>(size);
+    bias->name(name + "/bias");
+    conv->bias(bias);
+  }
+  else
+  {
+    if (bias->rank() != 1)
+      return false;
+
+    if (loco::succs(bias).size() != 1)
+      return false;
+
+    if (size != bias->dim(0).value())
+      return false;
+  }
+
+  for (uint32_t i = 0; i < size; i++)
+  {
+    if (add_beta)
+      bias->at<loco::DataType::FLOAT32>(i) += beta->at<loco::DataType::FLOAT32>(i);
+    else
+      bias->at<loco::DataType::FLOAT32>(i) -= beta->at<loco::DataType::FLOAT32>(i);
+  }
+  return true;
+}
+
+luci::CircleSub *insert_sub(luci::CircleNode *pred, luci::CircleConst *beta)
+{
+  auto name = pred->name();
+  assert(name.length() > 0);
+
+  auto sub = pred->graph()->nodes()->create<luci::CircleSub>();
+  sub->fusedActivationFunction(luci::FusedActFunc::NONE);
+  sub->name(name + "/Sub");
+
+  loco::replace(pred).with(sub);
+
+  sub->x(pred);
+  sub->y(beta);
+
+  return sub;
+}
+
+luci::CircleAdd *get_forward_add(luci::CircleNode *node)
+{
+  auto opcode = node->opcode();
+  if (opcode == luci::CircleOpcode::ADD)
+  {
+    auto add = loco::must_cast<luci::CircleAdd *>(node);
+    return add;
+  }
+  else if (opcode == luci::CircleOpcode::MAX_POOL_2D)
+  {
+    auto succ = loco::succs(node);
+    if (succ.size() != 1)
+      return nullptr;
+
+    auto succ_node = loco::must_cast<luci::CircleNode *>(*succ.begin());
+    return get_forward_add(succ_node);
+  }
+
+  return nullptr;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ *  Fuse SUB with CONV
+ *
+ *  BEFORE
+ *
+ *     beta [Sub]
+ *            |
+ *      [Passable Op]   [Conv] bias
+ *                 \   /
+ *                 [Add]
+ *
+ *  AFTER
+ *
+ *      [Passable Op]   [Conv] bias - beta
+ *                 \   /
+ *                 [Add]
+ */
+bool fuse_sub_with_conv(luci::CircleSub *sub)
+{
+  luci::CircleAdd *add = nullptr;
+  luci::CircleConv2D *conv = nullptr;
+  auto succs = loco::succs(sub);
+  if (succs.size() != 1)
+    return false;
+
+  add = get_forward_add(loco::must_cast<luci::CircleNode *>(*succs.begin()));
+  if (add == nullptr)
+    return false;
+
+  conv = dynamic_cast<luci::CircleConv2D *>(add->x());
+  if (conv == nullptr)
+    conv = dynamic_cast<luci::CircleConv2D *>(add->y());
+
+  if (conv == nullptr)
+    return false;
+
+  auto beta = loco::must_cast<luci::CircleConst *>(sub->y());
+  assert(beta != nullptr);
+  if (!update_conv_bias_with_beta(conv, beta, false))
+    return false;
+
+  luci::add_origin(conv, luci::get_origin(sub));
+
+  auto pred = sub->x();
+  loco::replace(sub).with(pred);
+
+  sub->drop();
+
+  return true;
+}
+
+/**
+ *  Fuse ADD with the preceding CONV
+ *
+ *  BEFORE
+ *
+ *        [Conv]  bias
+ *           |
+ *     [Passable Op] (None, Max pool, Pad, etc)
+ *           |
+ *         [Add]  beta
+ *
+ *  AFTER
+ *
+ *        [Conv]  bias + beta
+ *           |
+ *     [Passable Op]
+ *
+ *  A special case where SUB is newly inserted
+ *
+ *  BEFORE
+ *
+ *              [Conv]  bias
+ *           \   /
+ *           [Add]
+ *           /   \
+ *              [Add]  beta
+ *
+ *  AFTER
+ *
+ *              [Conv]  bias + beta
+ *           \   /
+ *           [Add]
+ *           /   \
+ *   beta [Sub]
+ */
+bool fuse_add_with_conv(luci::CircleAdd *add, std::vector<luci::CircleSub *> &sub_list)
+{
+  auto x = dynamic_cast<luci::CircleConst *>(add->x());
+  auto y = dynamic_cast<luci::CircleConst *>(add->y());
+
+  luci::CircleNode *pred = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  if (x != nullptr && y == nullptr)
+  {
+    pred = loco::must_cast<luci::CircleNode *>(add->y());
+    beta = x;
+  }
+  else if (x == nullptr && y != nullptr)
+  {
+    pred = loco::must_cast<luci::CircleNode *>(add->x());
+    beta = y;
+  }
+  else
+  {
+    return false;
+  }
+
+  assert(beta->rank() == 1);
+
+  auto channel_size = beta->dim(0).value();
+  auto conv = get_backward_conv2d(pred, channel_size);
+
+  if (conv != nullptr)
+  {
+    if (!update_conv_bias_with_beta(conv, beta, true))
+      return false;
+
+    luci::add_origin(conv, luci::get_origin(add));
+    loco::replace(add).with(pred);
+    add->drop();
+
+    return true;
+  }
+  // A special case shown at the residual blocks of ResNetV2
+  // TODO: Handle this with get_backward_conv2d
+  else if (pred->opcode() == luci::CircleOpcode::ADD)
+  {
+    auto pred_add = loco::must_cast<luci::CircleAdd *>(pred);
+    conv = get_backward_conv2d(loco::must_cast<luci::CircleNode *>(pred_add->y()), channel_size);
+    if (conv == nullptr)
+      conv = get_backward_conv2d(loco::must_cast<luci::CircleNode *>(pred_add->x()), channel_size);
+
+    if (conv == nullptr)
+      return false;
+
+    if (!update_conv_bias_with_beta(conv, beta, true))
+      return false;
+
+    luci::add_origin(conv, luci::get_origin(add));
+
+    auto relu = *loco::succs(add).begin();
+    auto relu_node = loco::must_cast<luci::CircleRelu *>(relu);
+    assert(relu_node != nullptr);
+
+    loco::replace(add).with(pred);
+
+    add->drop();
+
+    sub_list.push_back(insert_sub(pred, beta));
+    luci::add_origin(sub_list.back(), luci::get_origin(add));
+
+    relu_node->features(pred);
+
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ *  Fuse MUL with the next CONV
+ *
+ *  BEFORE
+ *
+ *           [Mul]  gamma
+ *             |
+ *           [Relu]
+ *            /  \
+ *     W1 [Conv]  [Conv] W2
+ *
+ *  AFTER
+ *
+ *                [Relu]
+ *                 /  \
+ *   gamma X W1 [Conv]  [Conv] gamma X W2
+ */
+bool fuse_mul_with_conv(luci::CircleMul *mul)
+{
+  luci::CircleNode *pred_node = nullptr;
+  luci::CircleConst *gamma = nullptr;
+
+  if (!is_batchnorm_mul(mul, pred_node, gamma))
+    return false;
+
+  auto mul_succ = loco::succs(mul);
+  assert(mul_succ.size() == 1);
+
+  auto relu = loco::must_cast<luci::CircleRelu *>(*mul_succ.begin());
+
+  auto channel_size = gamma->dim(0).value();
+
+  bool fusable = true;
+  auto relu_succ = loco::succs(relu);
+  for (auto s : relu_succ)
+  {
+    auto conv = get_forward_conv2d(loco::must_cast<luci::CircleNode *>(s), channel_size);
+    if (conv == nullptr)
+      fusable = false;
+  }
+
+  if (fusable)
+  {
+    for (auto s : relu_succ)
+    {
+      // Find the next CONV
+      auto conv = get_forward_conv2d(loco::must_cast<luci::CircleNode *>(s), channel_size);
+
+      // Update CONV weights
+      update_conv_weights_with_gamma(conv, gamma);
+
+      // Update origin
+      // TODO need to remove const
+      luci::add_origin(const_cast<luci::CircleConv2D *>(conv),
+                       luci::get_origin(loco::must_cast<luci::CircleNode *>(mul)));
+    }
+
+    loco::replace(mul).with(pred_node);
+    relu->features(pred_node);
+
+    mul->drop();
+
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ *  Swap MUL/ADD if they are from batch normalization
+ *
+ *  BEFORE
+ *           [Mul]  gamma
+ *             |
+ *        [Add + Relu]  beta
+ *
+ *  AFTER
+ *           [Add]  beta/gamma
+ *             |
+ *           [Mul]  gamma
+ *             |
+ *           [Relu]
+ */
+bool swap_mul_add(luci::CircleAdd *add, std::vector<luci::CircleMul *> &mul_list,
+                  std::vector<luci::CircleAdd *> &add_list)
+{
+  luci::CircleNode *pred_node = nullptr;
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+  luci::CircleConst *gamma = nullptr;
+
+  if (!is_batchnorm_add(add, mul, beta))
+    return false;
+  if (add->fusedActivationFunction() != luci::FusedActFunc::RELU)
+    return false;
+
+  if (loco::succs(mul).size() != 1)
+    return false;
+
+  if (!is_batchnorm_mul(mul, pred_node, gamma))
+    return false;
+
+  if (beta->dtype() != loco::DataType::FLOAT32 || gamma->dtype() != loco::DataType::FLOAT32)
+    throw std::runtime_error("FusePreActivationBatchNormPass only supports Float32 model");
+
+  if (!is_non_negative(gamma))
+    return false;
+
+  // Insert Relu at the bottom
+  auto name = add->name();
+  assert(name.length() > 0);
+
+  auto relu = add->graph()->nodes()->create<luci::CircleRelu>();
+  relu->features(mul);
+  relu->name(name + "/Relu");
+  luci::add_origin(relu, luci::get_origin(add));
+  loco::replace(add).with(relu);
+
+  // Replace beta <- beta / gamma
+  if (add->x() == beta)
+  {
+    add->y(pred_node);
+  }
+  else
+  {
+    add->x(pred_node);
+  }
+  add->fusedActivationFunction(luci::FusedActFunc::NONE);
+  uint32_t size = beta->size<loco::DataType::FLOAT32>();
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    auto b = beta->at<loco::DataType::FLOAT32>(i);
+    auto g = gamma->at<loco::DataType::FLOAT32>(i);
+    if (b == g)
+    {
+      beta->at<loco::DataType::FLOAT32>(i) = 1;
+    }
+    else
+    {
+      // If g is 0, we use a small value (empirically determined)
+      if (g == 0)
+        g = 1e-10;
+      beta->at<loco::DataType::FLOAT32>(i) = b / g;
+    }
+  }
+
+  if (mul->x() == gamma)
+  {
+    mul->y(add);
+  }
+  else
+  {
+    mul->x(add);
+  }
+
+  mul_list.push_back(mul);
+  add_list.push_back(add);
+
+  return true;
+}
+
+bool FusePreActivationBatchNormPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  bool changed = false;
+
+  // Step 1. Swap MUL <-> ADD
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto add = dynamic_cast<luci::CircleAdd *>(node);
+    if (add == nullptr)
+      continue;
+
+    if (swap_mul_add(add, _mul_list, _add_list))
+      changed = true;
+  }
+
+  INFO(l) << "[FusePreActivationBatchNorm] Target pre-activations: " << _mul_list.size()
+          << std::endl;
+
+  // Valid pattern was not detected. Fast exit.
+  if (!changed)
+    return false;
+
+  // Step 2. Fuse MUL with the next CONV
+  for (auto const &mul : _mul_list)
+  {
+    if (fuse_mul_with_conv(mul))
+      INFO(l) << "[FusePreActivationBatchNorm] Fused MUL: " << mul->name() << std::endl;
+  }
+
+  // Step 3. Fuse ADD with the preceding CONV and insert SUB
+  for (auto const &add : _add_list)
+  {
+    if (fuse_add_with_conv(add, _sub_list))
+      INFO(l) << "[FusePreActivationBatchNorm] Fused ADD: " << add->name() << std::endl;
+  }
+
+  INFO(l) << "[FusePreActivationBatchNorm] " << _sub_list.size() << " SUB were added." << std::endl;
+
+  // Step 4. Fuse SUB to CONV (SUB -> ADD <- CONV pattern)
+  for (auto const &sub : _sub_list)
+  {
+    if (fuse_sub_with_conv(sub))
+      INFO(l) << "[FusePreActivationBatchNorm] Fused SUB: " << sub->name() << std::endl;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FusePreActivationBatchNormPass.test.cpp b/compiler/luci/pass/src/FusePreActivationBatchNormPass.test.cpp
new file mode 100644
index 000000000..3d5791c9e
--- /dev/null
+++ b/compiler/luci/pass/src/FusePreActivationBatchNormPass.test.cpp
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FusePreActivationBatchNormPassInternal.h"
+
+#include "luci/Pass/FusePreActivationBatchNormPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <math.h>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *   [Conv] W + bias
+ *        \     [Conv]
+ *         \     /
+ *          [Add]
+ *         /    \
+ *        /    [Mul]   gamma
+ *       |       |
+ *       |   [Add+Relu]   beta
+ *       |       |
+ *       |     [Conv]   W + bias
+ *        \     /
+ *         [Add]
+ *
+ *  AFTER
+ *
+ *   [Conv] W + (bias + beta/gamma)
+ *        \     [Conv]
+ *         \     /
+ *          [Add]
+ *         /    \
+ *       |     [Relu]
+ *       |       |
+ *       |     [Conv]  (gamma * W) + (bias - beta/gamma)
+ *        \     /
+ *         [Add]
+ *
+ */
+class SimpleGraph
+{
+public:
+  SimpleGraph()
+  {
+    pred_conv = g.nodes()->create<luci::CircleConv2D>();
+    pred_conv_filter = g.nodes()->create<luci::CircleConst>();
+    pred_conv_bias = g.nodes()->create<luci::CircleConst>();
+    pred_conv2 = g.nodes()->create<luci::CircleConv2D>();
+    pred_conv2_filter = g.nodes()->create<luci::CircleConst>();
+    pred_conv2_bias = g.nodes()->create<luci::CircleConst>();
+    pred_add = g.nodes()->create<luci::CircleAdd>();
+    mul = g.nodes()->create<luci::CircleMul>();
+    mul_gamma = g.nodes()->create<luci::CircleConst>();
+    add = g.nodes()->create<luci::CircleAdd>();
+    add_beta = g.nodes()->create<luci::CircleConst>();
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    conv_filter = g.nodes()->create<luci::CircleConst>();
+    conv_bias = g.nodes()->create<luci::CircleConst>();
+    succ_add = g.nodes()->create<luci::CircleAdd>();
+
+    pred_conv->dtype(loco::DataType::FLOAT32);
+    pred_conv_filter->dtype(loco::DataType::FLOAT32);
+    pred_conv_bias->dtype(loco::DataType::FLOAT32);
+    pred_conv2->dtype(loco::DataType::FLOAT32);
+    pred_conv2_filter->dtype(loco::DataType::FLOAT32);
+    pred_conv2_bias->dtype(loco::DataType::FLOAT32);
+    pred_add->dtype(loco::DataType::FLOAT32);
+    mul->dtype(loco::DataType::FLOAT32);
+    mul_gamma->dtype(loco::DataType::FLOAT32);
+    add->dtype(loco::DataType::FLOAT32);
+    add->fusedActivationFunction(luci::FusedActFunc::RELU);
+    add_beta->dtype(loco::DataType::FLOAT32);
+    conv->dtype(loco::DataType::FLOAT32);
+    conv_filter->dtype(loco::DataType::FLOAT32);
+    conv_bias->dtype(loco::DataType::FLOAT32);
+    succ_add->dtype(loco::DataType::FLOAT32);
+
+    pred_conv->shape({1, 4, 4, 16});
+    pred_conv_filter->shape({16, 1, 1, 16});
+    pred_conv_bias->shape({16});
+    pred_conv2->shape({1, 4, 4, 16});
+    pred_conv2_filter->shape({16, 1, 1, 16});
+    pred_conv2_bias->shape({16});
+    pred_add->shape({1, 4, 4, 16});
+    mul->shape({1, 4, 4, 16});
+    mul_gamma->shape({16});
+    add->shape({1, 4, 4, 16});
+    add_beta->shape({16});
+    conv->shape({1, 4, 4, 16});
+    conv_filter->shape({16, 1, 1, 16});
+    conv_bias->shape({16});
+    succ_add->shape({1, 4, 4, 16});
+
+    pred_conv->filter(pred_conv_filter);
+    pred_conv->bias(pred_conv_bias);
+    pred_conv2->filter(pred_conv2_filter);
+    pred_conv2->bias(pred_conv2_bias);
+    pred_add->x(pred_conv);
+    pred_add->y(pred_conv2);
+    mul->x(pred_add);
+    mul->y(mul_gamma);
+    add->x(mul);
+    add->y(add_beta);
+    conv->input(add);
+    conv->filter(conv_filter);
+    conv->bias(conv_bias);
+    succ_add->x(pred_add);
+    succ_add->y(conv);
+
+    uint32_t channel_size = 16;
+    uint32_t out_size = 16;
+    add_beta->size<loco::DataType::FLOAT32>(channel_size);
+    mul_gamma->size<loco::DataType::FLOAT32>(channel_size);
+    conv_filter->size<loco::DataType::FLOAT32>(channel_size * out_size);
+    conv_bias->size<loco::DataType::FLOAT32>(out_size);
+    pred_conv_bias->size<loco::DataType::FLOAT32>(channel_size);
+    pred_conv2_bias->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      add_beta->at<loco::DataType::FLOAT32>(i) = i;
+      mul_gamma->at<loco::DataType::FLOAT32>(i) = i;
+      pred_conv_bias->at<loco::DataType::FLOAT32>(i) = i;
+      pred_conv2_bias->at<loco::DataType::FLOAT32>(i) = i;
+      conv_bias->at<loco::DataType::FLOAT32>(i) = i;
+      for (uint32_t j = 0; j < out_size; j++)
+      {
+        conv_filter->at<loco::DataType::FLOAT32>(i * out_size + j) = i * out_size + j;
+      }
+    }
+
+    pred_conv->name("pred_conv");
+    pred_conv_filter->name("pred_conv_filter");
+    pred_conv_bias->name("pred_conv_bias");
+    pred_conv2->name("pred_conv2");
+    pred_conv2_filter->name("pred_conv2_filter");
+    pred_conv2_bias->name("pred_conv2_bias");
+    pred_add->name("pred_add");
+    mul->name("mul");
+    mul_gamma->name("mul_gamma");
+    add->name("add");
+    add_beta->name("add_beta");
+    conv->name("conv");
+    conv_filter->name("conv_filter");
+    conv_bias->name("conv_bias");
+    succ_add->name("succ_add");
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleConv2D *pred_conv = nullptr;
+  luci::CircleConst *pred_conv_filter = nullptr;
+  luci::CircleConst *pred_conv_bias = nullptr;
+  luci::CircleConv2D *pred_conv2 = nullptr;
+  luci::CircleConst *pred_conv2_filter = nullptr;
+  luci::CircleConst *pred_conv2_bias = nullptr;
+  luci::CircleAdd *pred_add = nullptr;
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *mul_gamma = nullptr;
+  luci::CircleAdd *add = nullptr;
+  luci::CircleConst *add_beta = nullptr;
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleConst *conv_filter = nullptr;
+  luci::CircleConst *conv_bias = nullptr;
+  luci::CircleAdd *succ_add = nullptr;
+};
+
+} // namespace
+
+TEST(FusePreActivationBatchNormPassTest, name)
+{
+  luci::FusePreActivationBatchNormPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(FusePreActivationBatchNorm, swap_mul_add)
+{
+  SimpleGraph g;
+  int channel_size = 16;
+  std::vector<luci::CircleMul *> mul_list;
+  std::vector<luci::CircleAdd *> add_list;
+
+  EXPECT_TRUE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_EQ(1, mul_list.size());
+  EXPECT_EQ(1, add_list.size());
+  EXPECT_EQ(g.mul, mul_list[0]);
+  EXPECT_EQ(g.add, add_list[0]);
+
+  for (uint32_t i = 0; i < channel_size; ++i)
+  {
+    float beta = g.add_beta->at<loco::DataType::FLOAT32>(i);
+    float gamma = g.mul_gamma->at<loco::DataType::FLOAT32>(i);
+    EXPECT_FLOAT_EQ(1.0, beta);
+    EXPECT_FLOAT_EQ(i, gamma);
+  }
+
+  auto relu = static_cast<luci::CircleRelu *>(g.conv->input());
+  EXPECT_TRUE(relu != nullptr);
+
+  EXPECT_EQ(g.mul, relu->features());
+  EXPECT_EQ(g.add, g.mul->x());
+  EXPECT_EQ(luci::FusedActFunc::NONE, g.add->fusedActivationFunction());
+  EXPECT_EQ(g.pred_add, g.add->x());
+}
+
+TEST(FusePreActivationBatchNorm, swap_mul_add_NEG)
+{
+  SimpleGraph g;
+  std::vector<luci::CircleMul *> mul_list;
+  std::vector<luci::CircleAdd *> add_list;
+
+  // Add does not have fused activation
+  g.add->fusedActivationFunction(luci::FusedActFunc::NONE);
+  EXPECT_FALSE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_EQ(0, mul_list.size());
+  EXPECT_EQ(0, add_list.size());
+  g.add->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  // Add is element-wise
+  g.add_beta->shape({1, 4, 4, 16});
+  EXPECT_FALSE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_EQ(0, mul_list.size());
+  EXPECT_EQ(0, add_list.size());
+  g.add_beta->shape({16});
+
+  // Mul is element-wise
+  g.mul_gamma->shape({1, 4, 4, 16});
+  EXPECT_FALSE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_EQ(0, mul_list.size());
+  EXPECT_EQ(0, add_list.size());
+  g.mul_gamma->shape({16});
+
+  // Negative gamma
+  g.mul_gamma->at<loco::DataType::FLOAT32>(0) = -10;
+  EXPECT_FALSE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_EQ(0, mul_list.size());
+  EXPECT_EQ(0, add_list.size());
+}
+
+TEST(FusePreActivationBatchNorm, fuse_mul_with_conv)
+{
+  SimpleGraph g;
+  int channel_size = 16;
+  int out_size = 16;
+  std::vector<luci::CircleMul *> mul_list;
+  std::vector<luci::CircleAdd *> add_list;
+
+  EXPECT_TRUE(luci::swap_mul_add(g.add, mul_list, add_list));
+
+  EXPECT_TRUE(luci::fuse_mul_with_conv(g.mul));
+  for (uint32_t o = 0; o < out_size; o++)
+  {
+    for (uint32_t c = 0; c < channel_size; c++)
+    {
+      auto val = g.conv_filter->at<loco::DataType::FLOAT32>(o * channel_size + c);
+      auto gamma = g.mul_gamma->at<loco::DataType::FLOAT32>(c);
+      EXPECT_FLOAT_EQ((o * channel_size + c) * gamma, val);
+    }
+  }
+
+  auto relu = static_cast<luci::CircleRelu *>(g.conv->input());
+  EXPECT_EQ(g.add, relu->features());
+}
+
+TEST(FusePreActivationBatchNorm, fuse_mul_with_conv_NEG)
+{
+  SimpleGraph g;
+  std::vector<luci::CircleMul *> mul_list;
+  std::vector<luci::CircleAdd *> add_list;
+
+  EXPECT_TRUE(luci::swap_mul_add(g.add, mul_list, add_list));
+
+  // Non-conv layer uses the output of relu
+  auto relu = static_cast<luci::CircleRelu *>(g.conv->input());
+  auto fc = g.g.nodes()->create<luci::CircleFullyConnected>();
+  fc->input(relu);
+  EXPECT_FALSE(luci::fuse_mul_with_conv(g.mul));
+}
+
+TEST(FusePreActivationBatchNorm, fuse_add_with_conv)
+{
+  SimpleGraph g;
+  int channel_size = 16;
+  std::vector<luci::CircleMul *> mul_list;
+  std::vector<luci::CircleAdd *> add_list;
+  std::vector<luci::CircleSub *> sub_list;
+
+  EXPECT_TRUE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_TRUE(luci::fuse_mul_with_conv(g.mul));
+  EXPECT_TRUE(luci::fuse_add_with_conv(g.add, sub_list));
+
+  for (uint32_t c = 0; c < channel_size; c++)
+  {
+    auto bias = g.pred_conv2_bias->at<loco::DataType::FLOAT32>(c);
+    EXPECT_FLOAT_EQ(c + 1.0, bias);
+  }
+
+  auto relu = static_cast<luci::CircleRelu *>(g.conv->input());
+  EXPECT_EQ(relu, g.conv->input());
+  EXPECT_EQ(g.pred_add, relu->features());
+  EXPECT_EQ(g.pred_conv, g.pred_add->x());
+  EXPECT_EQ(g.pred_conv2, g.pred_add->y());
+
+  auto sub = static_cast<luci::CircleSub *>(sub_list[0]);
+  EXPECT_EQ(sub, g.succ_add->x());
+  EXPECT_EQ(g.pred_add, sub->x());
+  for (uint32_t c = 0; c < channel_size; c++)
+  {
+    auto beta = static_cast<luci::CircleConst *>(sub->y());
+    EXPECT_FLOAT_EQ(1.0, beta->at<loco::DataType::FLOAT32>(c));
+  }
+}
+
+TEST(FusePreActivationBatchNorm, fuse_add_with_conv_NEG)
+{
+  SimpleGraph g;
+  int channel_size = 16;
+  std::vector<luci::CircleMul *> mul_list;
+  std::vector<luci::CircleAdd *> add_list;
+  std::vector<luci::CircleSub *> sub_list;
+
+  EXPECT_TRUE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_TRUE(luci::fuse_mul_with_conv(g.mul));
+
+  // No conv layer to fuse add
+  auto fc1 = g.g.nodes()->create<luci::CircleFullyConnected>();
+  auto fc2 = g.g.nodes()->create<luci::CircleFullyConnected>();
+  g.pred_add->x(fc1);
+  g.pred_add->y(fc2);
+  EXPECT_FALSE(luci::fuse_add_with_conv(g.add, sub_list));
+  EXPECT_EQ(0, sub_list.size());
+}
+
+TEST(FusePreActivationBatchNorm, fuse_sub_with_conv)
+{
+  SimpleGraph g;
+  int channel_size = 16;
+  std::vector<luci::CircleMul *> mul_list;
+  std::vector<luci::CircleAdd *> add_list;
+  std::vector<luci::CircleSub *> sub_list;
+
+  EXPECT_TRUE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_TRUE(luci::fuse_mul_with_conv(g.mul));
+  EXPECT_TRUE(luci::fuse_add_with_conv(g.add, sub_list));
+  EXPECT_TRUE(luci::fuse_sub_with_conv(sub_list[0]));
+
+  for (uint32_t c = 0; c < channel_size; c++)
+  {
+    auto bias = g.conv_bias->at<loco::DataType::FLOAT32>(c);
+    EXPECT_FLOAT_EQ(c - 1.0, bias);
+  }
+
+  EXPECT_EQ(g.pred_add, g.succ_add->x());
+  EXPECT_EQ(g.conv, g.succ_add->y());
+}
+
+TEST(FusePreActivationBatchNorm, fuse_sub_with_conv_NEG)
+{
+  SimpleGraph g;
+  int channel_size = 16;
+  std::vector<luci::CircleMul *> mul_list;
+  std::vector<luci::CircleAdd *> add_list;
+  std::vector<luci::CircleSub *> sub_list;
+
+  EXPECT_TRUE(luci::swap_mul_add(g.add, mul_list, add_list));
+  EXPECT_TRUE(luci::fuse_mul_with_conv(g.mul));
+  EXPECT_TRUE(luci::fuse_add_with_conv(g.add, sub_list));
+
+  // No suitable pattern (relu was inserted between add and conv)
+  auto relu = g.g.nodes()->create<luci::CircleRelu>();
+  relu->features(g.conv);
+  g.succ_add->y(relu);
+  EXPECT_FALSE(luci::fuse_sub_with_conv(sub_list[0]));
+  g.succ_add->y(g.conv);
+  relu->drop();
+
+  // No suitable pattern (add was replaced with mul)
+  auto mul = g.g.nodes()->create<luci::CircleMul>();
+  mul->x(sub_list[0]);
+  mul->y(g.conv);
+  g.succ_add->drop();
+  EXPECT_FALSE(luci::fuse_sub_with_conv(sub_list[0]));
+}
diff --git a/compiler/luci/pass/src/FusePreActivationBatchNormPassInternal.h b/compiler/luci/pass/src/FusePreActivationBatchNormPassInternal.h
new file mode 100644
index 000000000..5c24d8ba6
--- /dev/null
+++ b/compiler/luci/pass/src/FusePreActivationBatchNormPassInternal.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_FUSE_PRE_ACTIVATION_BATCH_NORM_PASS_INTERNAL_H__
+#define __LUCI_CIRCLE_FUSE_PRE_ACTIVATION_BATCH_NORM_PASS_INTERNAL_H__
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+//  Swap MUL/ADD if they are from batch normalization
+/// @return true if success
+bool swap_mul_add(luci::CircleAdd *add, std::vector<luci::CircleMul *> &mul_list,
+                  std::vector<luci::CircleAdd *> &add_list);
+
+//  Fuse MUL with the next CONV if possible
+/// @return true if success
+bool fuse_mul_with_conv(luci::CircleMul *mul);
+
+//  Fuse ADD with the preceding CONV if possible
+/// @return true if success
+bool fuse_add_with_conv(luci::CircleAdd *mul, std::vector<luci::CircleSub *> &sub_list);
+
+//  Fuse SUB with CONV if possible
+/// @return true if success
+bool fuse_sub_with_conv(luci::CircleSub *sub);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_FUSE_PRE_ACTIVATION_BATCH_NORM_PASS_INTERNAL_H__
diff --git a/compiler/luci/pass/src/FuseTransposeWithMeanPass.cpp b/compiler/luci/pass/src/FuseTransposeWithMeanPass.cpp
new file mode 100644
index 000000000..e5fac5af9
--- /dev/null
+++ b/compiler/luci/pass/src/FuseTransposeWithMeanPass.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseTransposeWithMeanPass.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+namespace
+{
+
+/**
+ *  Fuse Transpose with Mean if possible
+ *
+ *  BEFORE
+ *                  |
+ *          [CircleTranspose, perm<0, 2, 3, 1>]
+ *                  |
+ *          [CircleMean, axis<3>]
+ *                  |
+ *
+ *  AFTER
+ *                  |                            |
+ *          [CircleMean, axis<1>]       [CircleTranspose, perm<0, 2, 3, 1>]
+ *                  |                            |
+ *                                      [CircleMean, axis<3>]
+ *
+ */
+
+/**
+ * @brief Create a const for fused reduction indices
+ */
+luci::CircleConst *create_fused_indices(luci::CircleConst *rindices,
+                                        const std::vector<uint32_t> &fused_rindices)
+{
+  assert(rindices != nullptr); // FIX_CALLER_UNLESS
+
+  if (rindices->dtype() != loco::DataType::S32)
+    return nullptr;
+
+  assert(fused_rindices.size() == rindices->size<loco::DataType::S32>());
+
+  auto fused_rindices_const = luci::clone(rindices);
+  auto name = rindices->name();
+  assert(name.length() > 0); // FIX_CALLER_UNLESS
+  fused_rindices_const->name(name + "_fused");
+
+  for (uint32_t i = 0; i < fused_rindices.size(); ++i)
+  {
+    fused_rindices_const->at<loco::DataType::S32>(i) = fused_rindices.at(i);
+  }
+
+  return fused_rindices_const;
+}
+
+bool const_has_value_s32(const luci::CircleConst *circle_const, int32_t value)
+{
+  if (circle_const->dtype() != loco::DataType::S32)
+    return false;
+
+  uint32_t size = circle_const->size<loco::DataType::S32>();
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    if (circle_const->at<loco::DataType::S32>(i) == value)
+      return true;
+  }
+
+  return false;
+}
+
+bool fuse_transpose_with_mean(luci::CircleMean *mean)
+{
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(mean->input());
+  if (not transpose)
+    return false;
+
+  // Get reduction indices of CircleMean operation.
+  auto rindices = dynamic_cast<luci::CircleConst *>(mean->reduction_indices());
+  if (not rindices)
+    return false;
+
+  if (rindices->dtype() != loco::DataType::S32)
+    return false;
+
+  if (mean->keep_dims() != false)
+    return false;
+
+  auto perm = dynamic_cast<luci::CircleConst *>(transpose->perm());
+  if (not perm)
+    return false;
+
+  std::vector<uint32_t> axes_after_reduction;
+  std::vector<uint32_t> orig_reduced_axes;
+  for (uint32_t axis = 0; axis < perm->size<loco::DataType::S32>(); ++axis)
+  {
+    uint32_t original_axis = static_cast<uint32_t>(perm->at<loco::DataType::S32>(axis));
+
+    if (const_has_value_s32(rindices, axis))
+    {
+      orig_reduced_axes.push_back(original_axis);
+      continue;
+    }
+
+    axes_after_reduction.push_back(original_axis);
+  }
+
+  if (not std::is_sorted(axes_after_reduction.begin(), axes_after_reduction.end()))
+    return false;
+
+  auto fused_rindices = create_fused_indices(rindices, orig_reduced_axes);
+  if (not fused_rindices)
+    return false;
+
+  // Create and configure new CircleMean operation.
+  auto fused_mean = mean->graph()->nodes()->create<luci::CircleMean>();
+  fused_mean->reduction_indices(fused_rindices);
+  fused_mean->input(transpose->a());
+  fused_mean->keep_dims(false);
+  fused_mean->name(mean->name() + "/Transpose");
+
+  // Replace old CircleMean operation with new CircleMean operation with merged indices.
+  replace(mean).with(fused_mean);
+  luci::add_origin(fused_mean,
+                   luci::composite_origin({luci::get_origin(mean), luci::get_origin(transpose)}));
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseTransposeWithMeanPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto mean = dynamic_cast<luci::CircleMean *>(node);
+    if (not mean)
+      continue;
+
+    if (fuse_transpose_with_mean(mean))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseTransposeWithMeanPass.test.cpp b/compiler/luci/pass/src/FuseTransposeWithMeanPass.test.cpp
new file mode 100644
index 000000000..f015d1cf7
--- /dev/null
+++ b/compiler/luci/pass/src/FuseTransposeWithMeanPass.test.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseTransposeWithMeanPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *                  |
+ *          [CircleTranspose, perm<0, 2, 3, 1>]
+ *                  |
+ *          [CircleMean, axis<3>]
+ *                  |
+ *
+ *  AFTER
+ *                  |
+ *          [CircleMean, axis<1>]       [CircleTranspose, perm<0, 2, 3, 1>]
+ *                  |                            |
+ *                                      [CircleMean, axis<3>]
+ *
+ */
+class FuseTransposeWithMeanTestGraph : public TestIOGraph
+{
+public:
+  FuseTransposeWithMeanTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1, 64, 20, 32}, {1, 20, 32});
+
+    _mean = g()->nodes()->create<luci::CircleMean>();
+    _transpose = g()->nodes()->create<luci::CircleTranspose>();
+    _indices = g()->nodes()->create<luci::CircleConst>();
+    _perm = g()->nodes()->create<luci::CircleConst>();
+
+    _mean->name("mean");
+    _transpose->name("transpose");
+    _indices->name("indices");
+    _perm->name("perm");
+
+    _indices->rank(1);
+    _indices->dtype(loco::DataType::S32);
+    _indices->size<loco::DataType::S32>(1);
+    _indices->at<loco::DataType::S32>(0) = static_cast<int32_t>(3);
+    _indices->dim(0) = 1;
+    _indices->shape_status(luci::ShapeStatus::VALID);
+
+    _perm->rank(1);
+    _perm->dtype(loco::DataType::S32);
+    _perm->size<loco::DataType::S32>(4);
+    _perm->dim(0) = 4;
+    _perm->at<loco::DataType::S32>(0) = static_cast<int32_t>(0);
+    _perm->at<loco::DataType::S32>(1) = static_cast<int32_t>(2);
+    _perm->at<loco::DataType::S32>(2) = static_cast<int32_t>(3);
+    _perm->at<loco::DataType::S32>(3) = static_cast<int32_t>(1);
+    _perm->shape_status(luci::ShapeStatus::VALID);
+
+    _transpose->a(input());
+    _transpose->perm(_perm);
+
+    _mean->input(_transpose);
+    _mean->reduction_indices(_indices);
+
+    output()->from(_mean);
+  }
+
+  luci::CircleTranspose *transpose(void) const { return _transpose; }
+  luci::CircleMean *mean(void) const { return _mean; }
+
+private:
+  luci::CircleTranspose *_transpose = nullptr;
+  luci::CircleMean *_mean = nullptr;
+  luci::CircleConst *_indices = nullptr;
+  luci::CircleConst *_perm = nullptr;
+};
+
+} // namespace
+
+TEST(FuseTransposeWithMeanPassTest, name)
+{
+  luci::FuseTransposeWithMeanPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(FuseTransposeWithMeanPassTest, fuse_transpose_with_mean)
+{
+  FuseTransposeWithMeanTestGraph g;
+  luci::FuseTransposeWithMeanPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  auto fused_mean = dynamic_cast<luci::CircleMean *>(g.output()->from());
+  EXPECT_NE(nullptr, fused_mean);
+
+  auto rindices = dynamic_cast<luci::CircleConst *>(fused_mean->reduction_indices());
+  EXPECT_NE(nullptr, rindices);
+
+  EXPECT_EQ(1, rindices->rank());
+  EXPECT_EQ(1, rindices->dim(0));
+  EXPECT_EQ(1, rindices->size<loco::DataType::S32>());
+  EXPECT_EQ(1, rindices->at<loco::DataType::S32>(0));
+}
+
+TEST(FuseTransposeWithMeanPassTest, fuse_transpose_with_mean_NEG)
+{
+  FuseTransposeWithMeanTestGraph g;
+  luci::FuseTransposeWithMeanPass pass;
+
+  g.init();
+
+  // Add CircleRelu operation between CircleMean and Transpose
+  auto relu = g.g()->nodes()->create<luci::CircleRelu>();
+  relu->name("relu");
+  relu->features(g.transpose());
+  g.mean()->input(relu);
+
+  // Due to the CircleRelu operation, pass will not be applied
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.cpp b/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.cpp
new file mode 100644
index 000000000..96776dc92
--- /dev/null
+++ b/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+
+#include "BatchNormPatternFinder.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+// Update negative gamma to positive (1e-10)
+bool negative_gamma_to_positive(luci::CircleConst *gamma)
+{
+  assert(gamma->dtype() == loco::DataType::FLOAT32);
+
+  bool changed = false;
+  uint32_t size = gamma->size<loco::DataType::FLOAT32>();
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    if (gamma->at<loco::DataType::FLOAT32>(i) < 0)
+    {
+      gamma->at<loco::DataType::FLOAT32>(i) = 1e-10;
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+bool make_positive_gamma(luci::CircleAdd *add)
+{
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+  luci::CircleConst *gamma = nullptr;
+  luci::CircleNode *pred = nullptr;
+
+  if (!is_batchnorm_add(add, mul, beta))
+    return false;
+
+  if (loco::succs(mul).size() != 1)
+    return false;
+
+  if (!is_batchnorm_mul(mul, pred, gamma))
+    return false;
+  assert(pred == add);
+  // Only support Relu
+  if (add->fusedActivationFunction() != luci::FusedActFunc::RELU)
+    return false;
+
+  return negative_gamma_to_positive(gamma);
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Make negative gamma values of Mul-Add (as BatchNorm) to a small positive value (1e-10)
+ *
+ *  PATTERN:
+ *          |
+ *    [CircleNode] [CircleConst](as gamma)
+ *              |   |
+ *           [CircleMul] [CircleConst]
+ *                   |    |
+ *               [CircleAdd]
+ *                     |
+ */
+bool MakeBatchNormGammaPositivePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto add = dynamic_cast<luci::CircleAdd *>(node);
+    if (add == nullptr)
+      continue;
+
+    if (make_positive_gamma(add))
+      changed = true;
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.test.cpp b/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.test.cpp
new file mode 100644
index 000000000..83093edc8
--- /dev/null
+++ b/compiler/luci/pass/src/MakeBatchNormGammaPositivePass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+
+#include <gtest/gtest.h>
+
+TEST(MakeBatchNormGammaPositivePassTest, name)
+{
+  luci::MakeBatchNormGammaPositivePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ModulePhase.cpp b/compiler/luci/pass/src/ModulePhase.cpp
new file mode 100644
index 000000000..46819a0f7
--- /dev/null
+++ b/compiler/luci/pass/src/ModulePhase.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModulePhase.h"
+
+namespace luci
+{
+
+void PhaseRunner<logo::PhaseStrategy::Saturate>::run(const Phase &phase) const
+{
+  notifyPhaseBegin();
+
+  for (bool changed = true; changed;)
+  {
+    changed = false;
+
+    for (auto &pass : phase)
+    {
+      notifyPassBegin(pass.get());
+
+      bool pass_changed = pass->run(_module);
+      changed = changed || pass_changed;
+
+      notifyPassEnd(pass.get(), pass_changed);
+    }
+  }
+
+  notifyPhaseEnd();
+}
+
+void PhaseRunner<logo::PhaseStrategy::Restart>::run(const Phase &phase) const
+{
+  notifyPhaseBegin();
+
+  for (bool changed = true; changed;)
+  {
+    changed = false;
+
+    for (auto &pass : phase)
+    {
+      notifyPassBegin(pass.get());
+
+      bool pass_changed = pass->run(_module);
+      changed = changed || pass_changed;
+
+      notifyPassEnd(pass.get(), pass_changed);
+
+      if (changed)
+      {
+        break;
+      }
+    }
+  }
+
+  notifyPhaseEnd();
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ModulePhase.h b/compiler/luci/pass/src/ModulePhase.h
new file mode 100644
index 000000000..05966cc29
--- /dev/null
+++ b/compiler/luci/pass/src/ModulePhase.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODULE_PHASE_H__
+#define __MODULE_PHASE_H__
+
+#include <luci/ModulePass.h>
+
+#include <logo/Phase.h>
+
+#include <vector>
+
+namespace luci
+{
+
+using Phase = std::vector<std::unique_ptr<Pass>>;
+
+template <logo::PhaseStrategy S> class PhaseRunner;
+
+template <>
+class PhaseRunner<logo::PhaseStrategy::Saturate> final : public logo::PhaseRunnerMixinObservable
+{
+public:
+  PhaseRunner(luci::Module *module) : _module{module}
+  {
+    // DO NOTHING
+  }
+
+public:
+  void run(const Phase &) const;
+
+private:
+  luci::Module *_module;
+};
+
+template <>
+class PhaseRunner<logo::PhaseStrategy::Restart> final : public logo::PhaseRunnerMixinObservable
+{
+public:
+  PhaseRunner(luci::Module *module) : _module{module}
+  {
+    // DO NOTHING
+  }
+
+public:
+  void run(const Phase &) const;
+
+private:
+  luci::Module *_module;
+};
+
+} // namespace luci
+
+#endif // __MODULE_PHASE_H__
diff --git a/compiler/luci/pass/src/ModulePhase.test.cpp b/compiler/luci/pass/src/ModulePhase.test.cpp
new file mode 100644
index 000000000..5d92c59f4
--- /dev/null
+++ b/compiler/luci/pass/src/ModulePhase.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModulePhase.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+TEST(ModulePhaseTest, saturate)
+{
+  auto m = luci::make_module();
+  auto g = loco::make_graph();
+  m->add(std::move(g));
+
+  luci::Phase phase;
+
+  // Any Pass will do for testing
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+  luci::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{m.get()};
+  phase_runner.run(phase);
+
+  SUCCEED();
+}
+
+TEST(ModulePhaseTest, restart)
+{
+  auto m = luci::make_module();
+  auto g = loco::make_graph();
+  m->add(std::move(g));
+
+  luci::Phase phase;
+
+  // Any Pass will do for testing
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+  luci::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{m.get()};
+  phase_runner.run(phase);
+
+  SUCCEED();
+}
diff --git a/compiler/luci/pass/src/PassTestGraphs.h b/compiler/luci/pass/src/PassTestGraphs.h
new file mode 100644
index 000000000..f5ae24f0b
--- /dev/null
+++ b/compiler/luci/pass/src/PassTestGraphs.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_GRAPHS_H__
+#define __LUCI_PASS_TEST_GRAPHS_H__
+
+#include <loco.h>
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+/**
+ *  ConstantFoldingTestGraph is a base class for testing
+ *  constant folding passes. It creates Input and Output
+ *  in the below graph. Child classes must implement Connector
+ *  and Folded pattern.
+ *
+ *      [Input]   [Folded pattern] (Implemented by child class)
+ *           \    /
+ *         [Connector] (Implemented by child class)
+ *              |
+ *           [Output]
+ *
+ *    Connector should satisfy the below conditions
+ *      - Input type == Output type == Folded pattern type
+ *      - Input shape == Output shape == Folded pattern shape
+ *
+ *    For example, Add, Mul, Sub, .. can be a Connector
+ */
+class ConstantFoldingTestGraph
+{
+public:
+  ConstantFoldingTestGraph(std::vector<uint32_t> input_shape, loco::DataType input_dtype)
+  {
+    _input = _g.nodes()->create<luci::CircleInput>();
+    _output = _g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = _g.inputs()->create();
+    _input->index(graph_input->index());
+    auto graph_output = _g.outputs()->create();
+    _output->index(graph_output->index());
+
+    graph_input->dtype(input_dtype);
+    graph_output->dtype(input_dtype);
+    _input->dtype(input_dtype);
+    _output->dtype(input_dtype);
+
+    auto input_tensor_shape = std::make_unique<loco::TensorShape>();
+    input_tensor_shape->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      input_tensor_shape->dim(i).set(input_shape[i]);
+    graph_input->shape(std::move(input_tensor_shape));
+
+    auto output_tensor_shape = std::make_unique<loco::TensorShape>();
+    output_tensor_shape->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      output_tensor_shape->dim(i).set(input_shape[i]);
+    graph_output->shape(std::move(output_tensor_shape));
+
+    _input->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      _input->dim(i).set(input_shape[i]);
+
+    _output->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      _output->dim(i).set(input_shape[i]);
+
+    _input->name("input");
+    _output->name("output");
+  }
+
+  virtual void init() = 0;
+
+  virtual ~ConstantFoldingTestGraph() = default;
+
+  virtual loco::Node *createFoldedPattern() = 0;
+
+  virtual luci::CircleConst *getFoldedPattern() = 0;
+
+  loco::Graph *graph() { return &_g; }
+
+  // NOTE: we're not adding _ prefix as these class members are public
+protected:
+  loco::Graph _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+/**
+ *  ConstantFoldingTestAddGraph is ConstantFoldingTestGraph
+ *  whose Connector is Add.
+ */
+class ConstantFoldingAddTestGraph : public ConstantFoldingTestGraph
+{
+protected:
+  ConstantFoldingAddTestGraph(std::vector<uint32_t> input_shape, loco::DataType input_dtype)
+    : ConstantFoldingTestGraph(input_shape, input_dtype)
+  {
+    _add = _g.nodes()->create<luci::CircleAdd>();
+    _add->dtype(input_dtype);
+
+    _add->rank(input_shape.size());
+    for (int i = 0; i < input_shape.size(); i++)
+      _add->dim(i).set(input_shape[i]);
+
+    _add->x(_input);
+
+    _output->from(_add);
+
+    _add->name("add");
+  }
+
+protected:
+  void init() override { _add->y(createFoldedPattern()); }
+
+protected:
+  luci::CircleConst *getFoldedPattern() override
+  {
+    return dynamic_cast<luci::CircleConst *>(_add->y());
+  }
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_GRAPHS_H__
diff --git a/compiler/luci/pass/src/ProgressReporter.cpp b/compiler/luci/pass/src/ProgressReporter.cpp
index dcf47aba6..515739dc7 100644
--- a/compiler/luci/pass/src/ProgressReporter.cpp
+++ b/compiler/luci/pass/src/ProgressReporter.cpp
@@ -81,4 +81,46 @@ void ProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassE
   INFO(prime) << luci::fmt(graph());
 }
 
+void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *)
+{
+  LOGGER(prime);
+
+  INFO(prime) << "==============================================================";
+  INFO(prime) << "ModulePhaseRunner<" << to_str(strategy()) << ">";
+  INFO(prime) << "Initial graphs";
+  for (size_t g = 0; g < module()->size(); ++g)
+  {
+    INFO(prime) << "graphs #" << g;
+    INFO(prime) << luci::fmt(module()->graph(g));
+  }
+}
+
+void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *)
+{
+  LOGGER(prime);
+
+  INFO(prime) << "ModulePhaseRunner<" << to_str(strategy()) << "> - done";
+}
+
+void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *info)
+{
+  LOGGER(prime);
+
+  INFO(prime) << "--------------------------------------------------------------";
+  INFO(prime) << "Before " << logo::pass_name(info->pass());
+}
+
+void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *info)
+{
+  LOGGER(prime);
+
+  INFO(prime) << "After " << logo::pass_name(info->pass())
+              << " (changed: " << to_char(info->changed()) << ")";
+  for (size_t g = 0; g < module()->size(); ++g)
+  {
+    INFO(prime) << "graphs #" << g;
+    INFO(prime) << luci::fmt(module()->graph(g));
+  }
+}
+
 } // namespace luci
diff --git a/compiler/luci/pass/src/ProgressReporter.h b/compiler/luci/pass/src/ProgressReporter.h
index bd2ba9849..8c6c95e65 100644
--- a/compiler/luci/pass/src/ProgressReporter.h
+++ b/compiler/luci/pass/src/ProgressReporter.h
@@ -21,6 +21,8 @@
 
 #include <loco.h>
 
+#include <luci/IR/Module.h>
+
 namespace luci
 {
 
@@ -28,7 +30,7 @@ class ProgressReporter : public logo::PhaseEventListener
 {
 public:
   ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
-      : _graph{graph}, _strategy{strategy}
+    : _graph{graph}, _strategy{strategy}
   {
     // DO NOTHING
   }
@@ -48,6 +50,30 @@ private:
   logo::PhaseStrategy _strategy;
 };
 
+class ModuleProgressReporter : public logo::PhaseEventListener
+{
+public:
+  ModuleProgressReporter(luci::Module *module, logo::PhaseStrategy strategy)
+    : _module{module}, _strategy{strategy}
+  {
+    // DO NOTHING
+  }
+
+public:
+  void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *) override;
+  void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *) override;
+  void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *) override;
+  void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *) override;
+
+public:
+  luci::Module *module(void) const { return _module; }
+  logo::PhaseStrategy strategy(void) const { return _strategy; }
+
+private:
+  luci::Module *_module;
+  logo::PhaseStrategy _strategy;
+};
+
 } // namespace luci
 
 #endif // __LUCI_PROGRESSREPORTER_H__
diff --git a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
new file mode 100644
index 000000000..68136b244
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleQuantParam.h>
+
+#include <math.h>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void addQuantParam(luci::CircleNode &node, const std::vector<float> &scale,
+                   const std::vector<int64_t> &zp)
+{
+  assert(node.quantparam() == nullptr);
+
+  auto quantparam = std::make_unique<luci::CircleQuantParam>();
+  quantparam->scale = scale;
+  quantparam->zerop = zp;
+  node.quantparam(std::move(quantparam));
+}
+
+int32_t quantize(float f, luci::CircleQuantParam *qparam)
+{
+  float scale = qparam->scale[0];
+  int64_t zp = qparam->zerop[0];
+
+  return std::round(f / scale) + zp;
+}
+
+class SimpleConcatGraph
+{
+public:
+  SimpleConcatGraph(loco::DataType quant_type)
+  {
+    concat_node.dtype(quant_type);
+    concat_node.fusedActivationFunction(luci::FusedActFunc::NONE);
+    input_1.dtype(quant_type);
+    input_2.dtype(quant_type);
+
+    concat_node.values(0, &input_1);
+    concat_node.values(1, &input_2);
+
+    if (quant_type == loco::DataType::U8)
+    {
+      addQuantParam(concat_node, {3.14}, {77});
+      addQuantParam(input_1, {1.0}, {1});
+      addQuantParam(input_2, {2.0}, {2});
+    }
+    else if (quant_type == loco::DataType::S16)
+    {
+      addQuantParam(concat_node, {3.14}, {0});
+      addQuantParam(input_1, {1.0}, {0});
+      addQuantParam(input_2, {2.0}, {0});
+    }
+    else
+    {
+      throw std::runtime_error("Unsupported quantization type");
+    }
+  }
+
+  ~SimpleConcatGraph()
+  {
+    concat_node.values(0, nullptr);
+    concat_node.values(1, nullptr);
+  }
+
+public:
+  luci::CircleConcatenation concat_node{2};
+  luci::CircleConv2D input_1;
+  luci::CircleConv2D input_2;
+};
+
+class SubsequentConcatGraph
+{
+public:
+  SubsequentConcatGraph(loco::DataType quant_type)
+  {
+    concat_node.dtype(quant_type);
+    concat_node.fusedActivationFunction(luci::FusedActFunc::NONE);
+    input_1.dtype(quant_type);
+    input_2.dtype(quant_type);
+
+    concat_node.values(0, &input_1);
+    concat_node.values(1, &input_2);
+
+    if (quant_type == loco::DataType::U8)
+    {
+      addQuantParam(concat_node, {3.14}, {77});
+      addQuantParam(input_1, {1.0}, {1});
+      addQuantParam(input_2, {2.0}, {2});
+    }
+    else if (quant_type == loco::DataType::S16)
+    {
+      addQuantParam(concat_node, {3.14}, {0});
+      addQuantParam(input_1, {1.0}, {0});
+      addQuantParam(input_2, {2.0}, {0});
+    }
+    else
+    {
+      throw std::runtime_error("Unsupported quantization type");
+    }
+  }
+
+  ~SubsequentConcatGraph()
+  {
+    concat_node.values(0, nullptr);
+    concat_node.values(1, nullptr);
+  }
+
+public:
+  luci::CircleConcatenation concat_node{2};
+  luci::CircleConcatenation input_1{2};
+  luci::CircleConv2D input_2;
+};
+
+class ConstInputConcatGraph
+{
+public:
+  ConstInputConcatGraph(loco::DataType quant_type)
+  {
+    concat_node = g.nodes()->create<luci::CircleConcatenation>(2);
+    input_1 = g.nodes()->create<luci::CircleConst>();
+    input_2 = g.nodes()->create<luci::CircleConv2D>();
+
+    concat_node->dtype(quant_type);
+    concat_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+    input_1->dtype(loco::DataType::FLOAT32);
+    input_1->size<loco::DataType::FLOAT32>(5);
+    for (int i = 0; i < 5; i++)
+    {
+      // Set data {-2, -1, 0, 1, 2}
+      input_1->at<loco::DataType::FLOAT32>(i) = i - 2.0;
+    }
+
+    input_2->dtype(quant_type);
+
+    concat_node->values(0, input_1);
+    concat_node->values(1, input_2);
+
+    if (quant_type == loco::DataType::U8)
+    {
+      addQuantParam(*concat_node, {0.1}, {10});
+      addQuantParam(*input_2, {2.0}, {2});
+    }
+    else if (quant_type == loco::DataType::S16)
+    {
+      addQuantParam(*concat_node, {0.1}, {0});
+      addQuantParam(*input_2, {2.0}, {0});
+    }
+    else
+    {
+      throw std::runtime_error("Unsupported quantization type");
+    }
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleConcatenation *concat_node = nullptr;
+  luci::CircleConst *input_1 = nullptr;
+  luci::CircleConv2D *input_2 = nullptr;
+};
+
+} // namespace
+
+TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8)
+{
+  // Check cases where qparam of concat_node is propagated
+  // (1) normal case: qparam is propagated to input_1 and input_2
+  // (2) input used by other Op: input_1 is an input of input_2. qparam is propagated only to
+  // input_2
+  // (3) subsequent concat: input_1 is concat. qparam is propagated to subsequent concat
+  // (4) const input: input_1 is const. constant values are quantized
+
+  // normal case: qparam of concat_node is propagated to input_1 and input_2
+  SimpleConcatGraph g(loco::DataType::U8);
+  luci::propagate_concat_quantparam(&g.concat_node);
+  EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
+  EXPECT_EQ(77, g.concat_node.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, g.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(77, g.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, g.input_2.quantparam()->scale[0]);
+  EXPECT_EQ(77, g.input_2.quantparam()->zerop[0]);
+
+  // input_1 is an input of input_2. qparam is propagated only to input_2
+  SimpleConcatGraph g2(loco::DataType::U8);
+  g2.input_2.input(&g2.input_1);
+  luci::propagate_concat_quantparam(&g2.concat_node);
+  EXPECT_FLOAT_EQ(3.14, g2.concat_node.quantparam()->scale[0]);
+  EXPECT_EQ(77, g2.concat_node.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(1.0, g2.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(1, g2.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, g2.input_2.quantparam()->scale[0]);
+  EXPECT_EQ(77, g2.input_2.quantparam()->zerop[0]);
+
+  // input_1 is concat. qparam is propagated to subsequent concat
+  SubsequentConcatGraph sg(loco::DataType::U8);
+  luci::propagate_concat_quantparam(&sg.concat_node);
+  EXPECT_FLOAT_EQ(3.14, sg.concat_node.quantparam()->scale[0]);
+  EXPECT_EQ(77, sg.concat_node.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, sg.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(77, sg.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, sg.input_2.quantparam()->scale[0]);
+  EXPECT_EQ(77, sg.input_2.quantparam()->zerop[0]);
+
+  // input_1 is const. const values are quantized with the qparam of concat
+  ConstInputConcatGraph cg(loco::DataType::U8);
+  luci::propagate_concat_quantparam(cg.concat_node);
+  EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+  EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
+  const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+  EXPECT_FLOAT_EQ(0.1, cg_input_1->quantparam()->scale[0]);
+  EXPECT_EQ(10, cg_input_1->quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(0.1, cg.input_2->quantparam()->scale[0]);
+  EXPECT_EQ(10, cg.input_2->quantparam()->zerop[0]);
+  EXPECT_EQ(loco::DataType::U8, cg_input_1->dtype());
+  EXPECT_EQ(0, cg_input_1->at<loco::DataType::U8>(0));
+  EXPECT_EQ(0, cg_input_1->at<loco::DataType::U8>(1));
+  EXPECT_EQ(10, cg_input_1->at<loco::DataType::U8>(2));
+  EXPECT_EQ(20, cg_input_1->at<loco::DataType::U8>(3));
+  EXPECT_EQ(30, cg_input_1->at<loco::DataType::U8>(4));
+}
+
+TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG)
+{
+  // Check negative cases where qparam is not propagated
+  // (1) concat has fused activation function
+  // (2) concat has fused activation function and input is const
+
+  SimpleConcatGraph g(loco::DataType::U8);
+
+  // concat has fused activation function
+  g.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  luci::propagate_concat_quantparam(&g.concat_node);
+  EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
+  EXPECT_EQ(77, g.concat_node.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(1.0, g.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(1, g.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(2.0, g.input_2.quantparam()->scale[0]);
+  EXPECT_EQ(2, g.input_2.quantparam()->zerop[0]);
+  g.concat_node.fusedActivationFunction(luci::FusedActFunc::NONE);
+
+  // concat has fused activation function and input_1 is const.
+  // const values are quantized using its min/max
+  ConstInputConcatGraph cg(loco::DataType::U8);
+  cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  luci::propagate_concat_quantparam(cg.concat_node);
+  EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+  EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
+  const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+  EXPECT_FLOAT_EQ(0.015686275, cg_input_1->quantparam()->scale[0]);
+  EXPECT_EQ(128, cg_input_1->quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(2.0, cg.input_2->quantparam()->scale[0]);
+  EXPECT_EQ(2, cg.input_2->quantparam()->zerop[0]);
+  EXPECT_EQ(loco::DataType::U8, cg_input_1->dtype());
+  EXPECT_EQ(quantize(-2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(0));
+  EXPECT_EQ(quantize(-1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(1));
+  EXPECT_EQ(quantize(0, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(2));
+  EXPECT_EQ(quantize(1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(3));
+  EXPECT_EQ(quantize(2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::U8>(4));
+}
+
+TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
+{
+  // Check cases where qparam of concat_node is propagated
+  // (1) normal case: qparam is propagated to input_1 and input_2
+  // (2) input used by other Op: input_1 is an input of input_2. qparam is propagated only to
+  // input_2
+  // (3) subsequent concat: input_1 is concat. qparam is propagated to subsequent concat
+  // (4) const input: input_1 is const. constant values are quantized
+
+  // normal case: qparam of concat_node is propagated to input_1 and input_2
+  SimpleConcatGraph g(loco::DataType::S16);
+  luci::propagate_concat_quantparam(&g.concat_node);
+  EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
+  EXPECT_EQ(0, g.concat_node.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, g.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(0, g.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, g.input_2.quantparam()->scale[0]);
+  EXPECT_EQ(0, g.input_2.quantparam()->zerop[0]);
+
+  // input_1 is an input of input_2. qparam is propagated only to input_2
+  SimpleConcatGraph g2(loco::DataType::S16);
+  g2.input_2.input(&g2.input_1);
+  luci::propagate_concat_quantparam(&g2.concat_node);
+  EXPECT_FLOAT_EQ(3.14, g2.concat_node.quantparam()->scale[0]);
+  EXPECT_EQ(0, g2.concat_node.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(1.0, g2.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(0, g2.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, g2.input_2.quantparam()->scale[0]);
+  EXPECT_EQ(0, g2.input_2.quantparam()->zerop[0]);
+
+  // input_1 is concat. qparam is propagated only to input_2
+  SubsequentConcatGraph sg(loco::DataType::S16);
+  luci::propagate_concat_quantparam(&sg.concat_node);
+  EXPECT_FLOAT_EQ(3.14, sg.concat_node.quantparam()->scale[0]);
+  EXPECT_EQ(0, sg.concat_node.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, sg.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(0, sg.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, sg.input_2.quantparam()->scale[0]);
+  EXPECT_EQ(0, sg.input_2.quantparam()->zerop[0]);
+
+  // input_1 is const. const values are quantized with the qparam of concat
+  ConstInputConcatGraph cg(loco::DataType::S16);
+  luci::propagate_concat_quantparam(cg.concat_node);
+  EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
+  const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+  EXPECT_FLOAT_EQ(0.1, cg_input_1->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg_input_1->quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(0.1, cg.input_2->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg.input_2->quantparam()->zerop[0]);
+  EXPECT_EQ(loco::DataType::S16, cg_input_1->dtype());
+  EXPECT_EQ(-20, cg_input_1->at<loco::DataType::S16>(0));
+  EXPECT_EQ(-10, cg_input_1->at<loco::DataType::S16>(1));
+  EXPECT_EQ(0, cg_input_1->at<loco::DataType::S16>(2));
+  EXPECT_EQ(10, cg_input_1->at<loco::DataType::S16>(3));
+  EXPECT_EQ(20, cg_input_1->at<loco::DataType::S16>(4));
+}
+
+TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG)
+{
+  // Check negative cases where qparam is not propagated
+  // (1) concat has fused activation function
+  // (2) concat has fused activation function and input is const
+
+  SimpleConcatGraph g(loco::DataType::S16);
+
+  // concat has fused activation function
+  g.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  luci::propagate_concat_quantparam(&g.concat_node);
+  EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
+  EXPECT_EQ(0, g.concat_node.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(1.0, g.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(0, g.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(2.0, g.input_2.quantparam()->scale[0]);
+  EXPECT_EQ(0, g.input_2.quantparam()->zerop[0]);
+  g.concat_node.fusedActivationFunction(luci::FusedActFunc::NONE);
+
+  // concat has fused activation function and input_1 is const.
+  // const values are quantized using its min/max
+  ConstInputConcatGraph cg(loco::DataType::S16);
+  cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  luci::propagate_concat_quantparam(cg.concat_node);
+  EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
+  const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
+  EXPECT_FLOAT_EQ(0.000061037, cg_input_1->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg_input_1->quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(2.0, cg.input_2->quantparam()->scale[0]);
+  EXPECT_EQ(0, cg.input_2->quantparam()->zerop[0]);
+  EXPECT_EQ(loco::DataType::S16, cg_input_1->dtype());
+  EXPECT_EQ(quantize(-2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(0));
+  EXPECT_EQ(quantize(-1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(1));
+  EXPECT_EQ(quantize(0, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(2));
+  EXPECT_EQ(quantize(1, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(3));
+  EXPECT_EQ(quantize(2, cg_input_1->quantparam()), cg_input_1->at<loco::DataType::S16>(4));
+}
diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
new file mode 100644
index 000000000..18617e3b7
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <cmath>
+#include <limits>
+
+namespace
+{
+
+// Return true if node is a virtual node
+bool virtual_op(const luci::CircleOpcode opcode)
+{
+  switch (opcode)
+  {
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) \
+  case luci::CircleOpcode::OPCODE:        \
+    return false;
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) \
+  case luci::CircleOpcode::OPCODE:         \
+    return true;
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+#undef CIRCLE_VNODE
+    default:
+      throw std::runtime_error("Unknown opcode detected");
+  }
+}
+
+void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop,
+                        loco::DataType quant_type)
+{
+  uint32_t size = const_node->size<loco::DataType::FLOAT32>();
+
+  const float scaling_factor_inv = 1.0 / scaling_factor;
+  std::vector<int32_t> quantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    auto data = static_cast<double>(const_node->at<loco::DataType::FLOAT32>(i));
+    double quantized_data = std::round(data * scaling_factor_inv) + zerop;
+    constexpr double int_max = static_cast<double>(std::numeric_limits<int32_t>::max());
+    constexpr double int_min = static_cast<double>(std::numeric_limits<int32_t>::min());
+    quantized_data = std::min(int_max, std::max(int_min, quantized_data));
+
+    quantized_values[i] = static_cast<int32_t>(quantized_data);
+  }
+
+  switch (quant_type)
+  {
+    case loco::DataType::U8:
+      const_node->dtype(loco::DataType::U8);      // change the type of tensor
+      const_node->size<loco::DataType::U8>(size); // resize tensor
+      for (uint32_t i = 0; i < size; ++i)
+        const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i]));
+      break;
+    case loco::DataType::S16:
+      assert(zerop == 0);
+      const_node->dtype(loco::DataType::S16);      // change the type of tensor
+      const_node->size<loco::DataType::S16>(size); // resize tensor
+      for (uint32_t i = 0; i < size; ++i)
+        const_node->at<loco::DataType::S16>(i) =
+          std::min(32767, std::max(-32767, quantized_values[i]));
+      break;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+void overwrite_quantparam(const luci::CircleNode *source, luci::CircleNode *target)
+{
+  auto source_qparam = source->quantparam();
+  if (source_qparam == nullptr)
+    throw std::runtime_error("source quantparam is not found during overwrite");
+
+  auto target_qparam = target->quantparam();
+  if (target_qparam == nullptr)
+  {
+    auto quantparam = std::make_unique<luci::CircleQuantParam>();
+    target->quantparam(std::move(quantparam));
+    target_qparam = target->quantparam();
+
+    if (target_qparam == nullptr)
+      throw std::runtime_error("Creating new quant param failed");
+  }
+  target_qparam->min = source_qparam->min;
+  target_qparam->max = source_qparam->max;
+  target_qparam->scale = source_qparam->scale;
+  target_qparam->zerop = source_qparam->zerop;
+  target_qparam->quantized_dimension = source_qparam->quantized_dimension;
+}
+
+/**
+ * Tells if pad_v2 quantization should ignore padding value
+ * In that case padding const will be quantized with input parameters, and probably clipped
+ */
+bool ignore_pad_v2_const_quantization(const luci::CirclePadV2 *pad)
+{
+  // This is a workaround to quantize pad generated from MaxPoolWithArgmax operation properly
+  // TODO use metadata hints to detect this case
+  auto const_value_node = dynamic_cast<const luci::CircleConst *>(pad->arg(2));
+  if (!const_value_node)
+    return false;
+  if (const_value_node->dtype() == loco::DataType::FLOAT32)
+  {
+    float const_value = const_value_node->at<loco::DataType::FLOAT32>(0);
+    if (const_value == std::numeric_limits<float>::lowest())
+      return true;
+  }
+  return false;
+}
+
+/** EXAMPLE
+ *
+ * BEFORE
+ *
+ *         [CircleNode]       [CircleConst]
+ *           (qparam1)           (FP32)
+ *                   \            /
+ *                    \          /
+ *                    [CirclePack]
+ *                     (qparam2)
+ *
+ *  AFTER
+ *
+ *         [CircleNode]        [CircleConst]   [CircleConst] <- Dead node
+ *           (qparam2)           (qparam2)         (FP32)
+ *                   \            /
+ *                    \          /
+ *                    [CirclePack]
+ *                     (qparam2)
+ *
+ * NOTE Quantization parameter of CirclePack (qparam2) is propagated to the inputs.
+ */
+void propagate_pack_quantparam(luci::CirclePack *pack)
+{
+  assert(pack->quantparam() != nullptr);
+
+  const auto num_inputs = pack->values_count();
+
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(pack->arg(i));
+
+    // Quantize constant values
+    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    {
+      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+      if (const_node->dtype() != loco::DataType::FLOAT32)
+        throw std::runtime_error("Unsupported data type for constant input of pack Op");
+
+      const auto pack_qparam = pack->quantparam();
+      if (pack_qparam == nullptr)
+        throw std::runtime_error("quantparam of pack is not found during propagation");
+
+      assert(pack_qparam->scale.size() == 1);
+      assert(pack_qparam->zerop.size() == 1);
+      const auto scaling_factor = pack_qparam->scale[0];
+      const auto zerop = pack_qparam->zerop[0];
+
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, pack->dtype());
+      pack->values(i, new_const);
+      overwrite_quantparam(pack, new_const);
+    }
+    else
+    {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        continue;
+
+      // Non-const input must have been quantized
+      assert(node->quantparam() != nullptr);
+      overwrite_quantparam(pack, node);
+    }
+  }
+}
+
+/** EXAMPLE
+ *
+ *
+ *
+ * BEFORE
+ *
+ *      [CircleNode] [CircleConst] [CircleConst] [CircleNode]
+ *          (S32)        (S32)        (FP32)     (U8 qparam1)
+ *              \          \           /            /
+ *               \          \        /            /
+ *                \          \     /            /
+ *                 -------[CircleOneHot]-------
+ *                         (U8 qparam2)
+ *
+ *  AFTER
+ *
+ *      [CircleNode] [CircleConst] [CircleConst] [CircleNode]      [CircleConst] <- Dead node
+ *          (S32)        (S32)     (U8 qparam2)  (U8 qparam2)         (FP32)
+ *              \          \           /           /
+ *               \          \        /            /
+ *                \          \     /            /
+ *                 -------[CircleOneHot]-------
+ *                         (U8 qparam2)
+ *
+ * NOTE Quantization parameter of CircleOneHot (qparam2) is propagated to on_value/off_value.
+ */
+void propagate_one_hot_quantparam(luci::CircleOneHot *one_hot)
+{
+  assert(one_hot->quantparam() != nullptr);
+
+  // Propagate quantization parameters from output to inputs,
+  // to fit both input and counstant_value in one quant range.
+  auto quant_input = [one_hot](void (luci::CircleOneHot::*arg_setter)(loco::Node *),
+                               loco::Node *(luci::CircleOneHot::*arg_getter)() const) {
+    auto node = loco::must_cast<luci::CircleNode *>((one_hot->*arg_getter)());
+
+    // Quantize constant values
+    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    {
+      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+      if (is_quantized(const_node))
+        return;
+
+      if (const_node->dtype() != loco::DataType::FLOAT32)
+        throw std::runtime_error("Unsupported data type for constant input of OneHot Op");
+
+      const auto qparam = one_hot->quantparam();
+      if (qparam == nullptr)
+        throw std::runtime_error("quantparam of OneHot is not found during propagation");
+
+      assert(qparam->scale.size() == 1);
+      const auto scaling_factor = qparam->scale.at(0);
+      const auto zerop = qparam->zerop.at(0);
+
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, one_hot->dtype());
+      overwrite_quantparam(one_hot, new_const);
+      (one_hot->*arg_setter)(new_const);
+    }
+    else
+    {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        return;
+
+      // Non-const input must have been quantized
+      assert(node->quantparam() != nullptr);
+      overwrite_quantparam(one_hot, node);
+    }
+  };
+
+  quant_input(&luci::CircleOneHot::on_value, &luci::CircleOneHot::on_value);
+  quant_input(&luci::CircleOneHot::off_value, &luci::CircleOneHot::off_value);
+}
+
+} // namespace
+
+namespace luci
+{
+
+/** BEFORE
+ *
+ *         [CircleNode]             [CircleConst]
+ *         (U8 qparam1)                 (FP32)
+ *                   \                    /
+ *                    \                  /
+ *                    [CircleConcatenation]
+ *                        (U8 qparam2)
+ *
+ *  AFTER
+ *         [CircleNode]             [CircleConst]   [CircleConst] <- Dead node
+ *         (U8 qparam2)             (U8 qparam2)       (FP32)
+ *                   \                    /
+ *                    \                  /
+ *                    [CircleConcatenation]
+ *                        (U8 qparam2)
+ */
+void propagate_concat_quantparam(luci::CircleConcatenation *concat)
+{
+  assert(concat->quantparam() != nullptr);
+
+  const auto num_inputs = concat->numValues();
+
+  // Quantize const inputs using their values if concat has fused act function
+  if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE)
+  {
+    for (uint32_t i = 0; i < num_inputs; i++)
+    {
+      auto node = concat->arg(i);
+      auto const_node = dynamic_cast<luci::CircleConst *>(node);
+      if (const_node != nullptr)
+      {
+        auto new_const = luci::clone(const_node);
+        quant_const(new_const, concat->dtype());
+        concat->values(i, new_const);
+      }
+    }
+    return;
+  }
+
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i));
+
+    // Quantize constant values
+    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    {
+      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+
+      const auto concat_qparam = concat->quantparam();
+      assert(concat_qparam->scale.size() == 1);
+      const auto scaling_factor = concat_qparam->scale[0];
+      const auto zerop = concat_qparam->zerop[0];
+
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, concat->dtype());
+      concat->values(i, new_const);
+      overwrite_quantparam(concat, new_const);
+    }
+    else
+    {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        continue;
+
+      // Non-const input must have been quantized
+      assert(node->quantparam() != nullptr);
+      overwrite_quantparam(concat, node);
+    }
+  }
+}
+
+/** BEFORE
+ *
+ *         [CircleNode] [CircleConst] [CircleConst]
+ *         (U8 qparam1)     (S32)       (FP32)
+ *                   \        |         /
+ *                    \       |        /
+ *                      [CirclePadV2]
+ *                       (U8 qparam2)
+ *
+ *  AFTER (case 1)
+ *
+ *  By default qparam is propagated from output to inputs to meet backend requirements.
+ *
+ *         [CircleNode] [CircleConst] [CircleConst]   [CircleConst] <- Dead node
+ *         (U8 qparam2)     (S32)      (U8 qparam2)       (FP32)
+ *                   \        |         /
+ *                    \       |        /
+ *                      [CirclePadV2]
+ *                       (U8 qparam2)
+ *
+ *  AFTER (case 2)
+ *
+ * In case padded value is the lowest float value
+ * Qparam is propagated from input to output and constant.
+ *
+ * This is a special case for optimization constructed pad, needed to guarantee that
+ * extremely large negative constant do not stretch output quantization range.
+ *
+ *         [CircleNode] [CircleConst] [CircleConst]   [CircleConst] <- Dead node
+ *         (U8 qparam1)     (S32)      (U8 qparam1)       (FP32)
+ *                   \        |         /
+ *                    \       |        /
+ *                      [CirclePadV2]
+ *                       (U8 qparam1)
+ */
+void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2)
+{
+  if (ignore_pad_v2_const_quantization(pad_v2))
+  {
+    // propagate input quantization paramters from input to output and padding const value
+    auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0));
+    overwrite_quantparam(pad_v2_input, pad_v2);
+
+    auto const_value_node = loco::must_cast<luci::CircleConst *>(
+      pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS
+    auto new_const = luci::clone(const_value_node);
+
+    const auto pad_v2_input_qparam = pad_v2_input->quantparam();
+    assert(pad_v2_input_qparam != nullptr);
+    assert(pad_v2_input_qparam->scale.size() == 1);
+    const auto scaling_factor = pad_v2_input_qparam->scale.at(0);
+    const auto zerop = pad_v2_input_qparam->zerop.at(0);
+
+    quant_const_values(new_const, scaling_factor, zerop, pad_v2->dtype());
+    overwrite_quantparam(pad_v2_input, new_const);
+    pad_v2->constant_values(new_const);
+    return;
+  }
+
+  // Propagate quantization paramters from output to inputs,
+  // to fit both input and counstant_value in one quant range.
+  auto quant_input = [pad_v2](void (CirclePadV2::*arg_setter)(loco::Node *), uint32_t arg) {
+    auto node = loco::must_cast<luci::CircleNode *>(pad_v2->arg(arg));
+
+    // Quantize constant values
+    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    {
+      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+      if (is_quantized(const_node))
+        return;
+
+      if (const_node->dtype() != loco::DataType::FLOAT32)
+        throw std::runtime_error("Unsupported data type for constant input of PadV2 Op");
+
+      const auto pad_v2_qparam = pad_v2->quantparam();
+      if (pad_v2_qparam == nullptr)
+        throw std::runtime_error("quantparam of PadV2 is not found during propagation");
+
+      assert(pad_v2_qparam->scale.size() == 1);
+      const auto scaling_factor = pad_v2_qparam->scale.at(0);
+      const auto zerop = pad_v2_qparam->zerop.at(0);
+
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, pad_v2->dtype());
+      overwrite_quantparam(pad_v2, new_const);
+      (pad_v2->*arg_setter)(new_const);
+    }
+    else
+    {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        return;
+
+      // Non-const input must have been quantized
+      assert(node->quantparam() != nullptr);
+      overwrite_quantparam(pad_v2, node);
+    }
+  };
+
+  quant_input(&CirclePadV2::input, 0);
+  quant_input(&CirclePadV2::constant_values, 2);
+}
+
+} // namespace luci
+
+namespace
+{
+
+// Visitor to propagate quantization parameters backwards
+struct PropagateQParamBackward final : public luci::CircleNodeMutableVisitor<void>
+{
+  void visit(luci::CircleNode *) {}
+
+  void visit(luci::CircleConcatenation *node) { propagate_concat_quantparam(node); }
+
+  void visit(luci::CircleOneHot *node) { propagate_one_hot_quantparam(node); }
+
+  void visit(luci::CirclePack *node) { propagate_pack_quantparam(node); }
+
+  void visit(luci::CirclePadV2 *node) { propagate_pad_v2_quantparam(node); }
+
+  // Propagate qparam for non-value changing Ops
+  // (ex: Reshape, Transpose, etc.)
+  // TODO Add more Ops
+
+  void visit(luci::CircleReshape *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->tensor());
+
+    // Do not propagate qparam if input node has multiple users
+    if (loco::succs(input_node).size() > 1)
+      return;
+
+    const auto input_opcode = input_node->opcode();
+
+    // Do not propagate qparam if input node is virtual Op (except CIRCLEINPUT)
+    // Why? It is not safe to propagate qparam to some virtual nodes. For example,
+    // const node, multi-out nodes. Let's block them for now.
+    // TODO Revisit this condition
+    if (virtual_op(input_opcode) and input_opcode != luci::CircleOpcode::CIRCLEINPUT)
+      return;
+
+    overwrite_quantparam(node, input_node);
+  }
+
+  void visit(luci::CircleTranspose *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+
+    // Do not propagate qparam if input node has multiple users
+    if (loco::succs(input_node).size() > 1)
+      return;
+
+    const auto input_opcode = input_node->opcode();
+
+    // Do not propagate qparam if input node is virtual Op (except CIRCLEINPUT)
+    // Why? It is not safe to propagate qparam to some virtual nodes. For example,
+    // const node, multi-out nodes. Let's block them for now.
+    // TODO Revisit this condition
+    if (virtual_op(input_opcode) and input_opcode != luci::CircleOpcode::CIRCLEINPUT)
+      return;
+
+    overwrite_quantparam(node, input_node);
+  }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool PropagateQParamBackwardPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+
+  // We use reverse post-order traversal as qparam is propagated backward
+  auto nodes = loco::postorder_traversal(loco::output_nodes(g));
+  std::reverse(nodes.begin(), nodes.end());
+  for (auto node : nodes)
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    INFO(l) << "PropagateQParamBackwardPass visit node: " << circle_node->name() << std::endl;
+
+    // We can't propagate non-existent qparam
+    if (circle_node->quantparam() == nullptr)
+      continue;
+
+    PropagateQParamBackward pqb;
+    circle_node->accept(&pqb);
+  }
+
+  // This pass is only run once, so return false
+  // TODO Refactoring not to return meaningless value
+  return false;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp
new file mode 100644
index 000000000..04573cc45
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+
+namespace
+{
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  qparam->scale.emplace_back(scale);
+  qparam->zerop.emplace_back(zp);
+
+  node->quantparam(std::move(qparam));
+}
+
+/**
+ * @brief Base Test Graph
+ */
+struct TestGraph
+{
+public:
+  virtual void init(void) = 0;
+};
+
+/**
+ *  Graph with two concats
+ *
+ *  [CircleInput]  [CircleConst]
+ *         \         /
+ *  [CircleConcatenation]  [CircleConst]
+ *           |                |
+ *          [CircleConcatenation]
+ *                  |
+ *            [CircleOutput]
+ *
+ *  BEFORE
+ *  - Concat1 and Concat 2 have different qparams
+ *
+ *  AFTER
+ *  - All Ops have the same qparam
+ */
+struct SubsequentConcatGraph : public TestGraph
+{
+public:
+  void init(void) final
+  {
+    // graph input and output
+    auto graph_input = g.inputs()->create();
+    auto graph_output = g.outputs()->create();
+
+    // input
+    input = g.nodes()->create<luci::CircleInput>();
+    input->index(graph_input->index());
+    input->shape({1, 4, 4, 3});
+    input->dtype(loco::DataType::U8);
+    set_qparam(input, 1.0, 1);
+
+    // const1
+    const1 = g.nodes()->create<luci::CircleConst>();
+    const1->shape({1, 4, 4, 3});
+    const1->dtype(loco::DataType::FLOAT32);
+    const1->size<loco::DataType::FLOAT32>(48);
+    for (uint32_t i = 0; i < 48; i++)
+      const1->at<loco::DataType::FLOAT32>(i) = i;
+
+    // concat1
+    concat1 = g.nodes()->create<luci::CircleConcatenation>(2);
+    concat1->shape({1, 4, 4, 6});
+    concat1->dtype(loco::DataType::U8);
+    set_qparam(concat1, 2.0, 2);
+    concat1->values(0, input);
+    concat1->values(1, const1);
+    concat1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    // const2
+    const2 = g.nodes()->create<luci::CircleConst>();
+    const2->shape({1, 4, 4, 3});
+    const2->dtype(loco::DataType::FLOAT32);
+    const2->size<loco::DataType::FLOAT32>(48);
+    for (uint32_t i = 0; i < 48; i++)
+      const2->at<loco::DataType::FLOAT32>(i) = i;
+
+    // concat2
+    concat2 = g.nodes()->create<luci::CircleConcatenation>(2);
+    concat2->shape({1, 4, 4, 9});
+    concat2->dtype(loco::DataType::U8);
+    set_qparam(concat2, 3.0, 3);
+    concat2->values(0, concat1);
+    concat2->values(1, const2);
+    concat2->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    // output
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->index(graph_output->index());
+    output->from(concat2);
+    output->shape({1, 4, 4, 9});
+    output->dtype(loco::DataType::U8);
+    set_qparam(output, 3.0, 3);
+  }
+
+public:
+  loco::Graph g;
+  CircleInput *input = nullptr;
+  CircleConcatenation *concat1 = nullptr;
+  CircleConcatenation *concat2 = nullptr;
+  CircleConst *const1 = nullptr;
+  CircleConst *const2 = nullptr;
+  CircleOutput *output = nullptr;
+};
+
+/**
+ *  BEFORE
+ *
+ *        [Input]
+ *           |
+ *        [Conv] (qparam 1)
+ *           |
+ *       [Reshape] (qparam 2)
+ *           |
+ *       [Output]
+ *
+ *  AFTER
+ *
+ *        [Input]
+ *           |
+ *        [Conv] (qparam 2)
+ *           |
+ *       [Reshape] (qparam 2)
+ *           |
+ *       [Output]
+ */
+class ConvReshapeGraph
+{
+public:
+  ConvReshapeGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    reshape = g.nodes()->create<luci::CircleReshape>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    set_qparam(conv, 2.0, 2);
+    set_qparam(reshape, 1.0, 1);
+
+    conv->input(input);
+    reshape->tensor(conv);
+    output->from(reshape);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleReshape *reshape = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+/**
+ *  BEFORE
+ *
+ *        [Input]
+ *           |
+ *        [Conv] (qparam 1)
+ *           |
+ *           +---------------------+
+ *           |                     |
+ *       [Reshape] (qparam 2)   [Output]
+ *           |
+ *       [Output]
+ *
+ *  AFTER (qparam is not propagated as Conv has multiple users)
+ *
+ *        [Input]
+ *           |
+ *        [Conv] (qparam 1)
+ *           |
+ *           +---------------------+
+ *           |                     |
+ *       [Reshape] (qparam 2)   [Output]
+ *           |
+ *       [Output]
+ */
+class ConvReshapeMultiOutGraph
+{
+public:
+  ConvReshapeMultiOutGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    reshape = g.nodes()->create<luci::CircleReshape>();
+    output1 = g.nodes()->create<luci::CircleOutput>();
+    output2 = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output1 = g.outputs()->create();
+    output1->index(graph_output1->index());
+    auto graph_output2 = g.outputs()->create();
+    output2->index(graph_output2->index());
+
+    set_qparam(conv, 2.0, 2);
+    set_qparam(reshape, 1.0, 1);
+
+    conv->input(input);
+    reshape->tensor(conv);
+    output1->from(reshape);
+    output2->from(conv);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleReshape *reshape = nullptr;
+  luci::CircleOutput *output1 = nullptr;
+  luci::CircleOutput *output2 = nullptr;
+};
+
+} // namespace
+
+TEST(PropagateQParamBackwardPassTest, name)
+{
+  luci::PropagateQParamBackwardPass pass(loco::DataType::U8);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(PropagateQParamBackwardPassTest, subsequent_propagation)
+{
+  SubsequentConcatGraph graph;
+
+  graph.init();
+
+  luci::PropagateQParamBackwardPass pass(loco::DataType::U8);
+
+  pass.run(&graph.g);
+
+  EXPECT_EQ(3.0, graph.concat2->quantparam()->scale[0]);
+  EXPECT_EQ(3, graph.concat2->quantparam()->zerop[0]);
+
+  auto const2 = loco::must_cast<CircleNode *>(graph.concat2->values(1));
+  EXPECT_EQ(3.0, const2->quantparam()->scale[0]);
+  EXPECT_EQ(3, const2->quantparam()->zerop[0]);
+
+  EXPECT_EQ(3.0, graph.concat1->quantparam()->scale[0]);
+  EXPECT_EQ(3, graph.concat1->quantparam()->zerop[0]);
+
+  auto const1 = loco::must_cast<CircleNode *>(graph.concat1->values(1));
+  EXPECT_EQ(3.0, const1->quantparam()->scale[0]);
+  EXPECT_EQ(3, const1->quantparam()->zerop[0]);
+
+  EXPECT_EQ(3.0, graph.input->quantparam()->scale[0]);
+  EXPECT_EQ(3, graph.input->quantparam()->zerop[0]);
+}
+
+TEST(PropagateQParamBackwardPassTest, reshape)
+{
+  ConvReshapeGraph graph;
+
+  EXPECT_NE(graph.conv->quantparam()->scale, graph.reshape->quantparam()->scale);
+  EXPECT_NE(graph.conv->quantparam()->zerop, graph.reshape->quantparam()->zerop);
+
+  luci::PropagateQParamBackwardPass pass(loco::DataType::U8);
+
+  pass.run(&graph.g);
+
+  EXPECT_EQ(graph.conv->quantparam()->scale, graph.reshape->quantparam()->scale);
+  EXPECT_EQ(graph.conv->quantparam()->zerop, graph.reshape->quantparam()->zerop);
+}
+
+TEST(PropagateQParamBackwardPassTest, reshape_multi_use_NEG)
+{
+  ConvReshapeMultiOutGraph graph;
+
+  EXPECT_NE(graph.conv->quantparam()->scale, graph.reshape->quantparam()->scale);
+  EXPECT_NE(graph.conv->quantparam()->zerop, graph.reshape->quantparam()->zerop);
+
+  luci::PropagateQParamBackwardPass pass(loco::DataType::U8);
+
+  pass.run(&graph.g);
+
+  EXPECT_NE(graph.conv->quantparam()->scale, graph.reshape->quantparam()->scale);
+  EXPECT_NE(graph.conv->quantparam()->zerop, graph.reshape->quantparam()->zerop);
+}
diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
new file mode 100644
index 000000000..aaadb2864
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamForwardPass.h"
+
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <iostream>
+
+namespace
+{
+
+bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst)
+{
+  assert(src->scale.size() == dst->scale.size());
+  assert(src->zerop.size() == dst->zerop.size());
+
+  // src and dst have the same qparam
+  if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) &&
+      std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) &&
+      src->quantized_dimension == dst->quantized_dimension)
+    return false;
+
+  dst->scale.assign(src->scale.begin(), src->scale.end());
+  dst->zerop.assign(src->zerop.begin(), src->zerop.end());
+  dst->quantized_dimension = src->quantized_dimension;
+  return true;
+}
+
+bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst)
+{
+  // Skip nodes that do not have quantparams
+  auto src_qparam = src->quantparam();
+  if (not src_qparam)
+    return false;
+
+  auto dst_qparam = dst->quantparam();
+  if (not dst_qparam)
+    return false;
+
+  return copy_qparam(src_qparam, dst_qparam);
+}
+
+//  Visitor to propagate quantization parameters
+struct PropagateQParamForward final : public luci::CircleNodeMutableVisitor<bool>
+{
+  PropagateQParamForward() = default;
+
+  bool visit(luci::CircleNode *) { return false; }
+
+  bool visit(luci::CircleGather *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->params());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleReshape *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->tensor());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleTranspose *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleStridedSlice *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->input());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleSplitOut *node)
+  {
+    auto split = loco::must_cast<luci::CircleSplit *>(node->input());
+    auto input_node = loco::must_cast<luci::CircleNode *>(split->input());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleSplitVOut *node)
+  {
+    auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+    auto input_node = loco::must_cast<luci::CircleNode *>(splitv->input());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleUnpackOut *node)
+  {
+    auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
+    auto input_node = loco::must_cast<luci::CircleNode *>(unpack->value());
+    return copy_qparam(input_node, node);
+  }
+
+  // Propagate qparam across Quantize op to ensure
+  // special qparams (pre-defined values, integer scale)
+  bool visit(luci::CircleQuantize *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->input());
+
+    // Skip if input_node is not quantized activation
+    if (input_node->dtype() != loco::DataType::U8 and input_node->dtype() != loco::DataType::S16)
+      return false;
+
+    // If input_node and node have the same dtype, Quantize op
+    // will do rescale, not requantize for mixed-precision
+    if (input_node->dtype() == node->dtype())
+      return false;
+
+    assert(node->dtype() == loco::DataType::U8 or node->dtype() == loco::DataType::S16);
+
+    auto prev_qparam = node->quantparam();
+    assert(prev_qparam);
+    assert(prev_qparam->scale.size() == 1);
+    assert(prev_qparam->zerop.size() == 1);
+
+    const auto prev_scale = prev_qparam->scale[0];
+    const auto prev_zerop = prev_qparam->zerop[0];
+
+    auto qtype = luci::activation_qtype(input_node);
+    switch (qtype)
+    {
+      case luci::ActivationQType::PreDefinedLogistic:
+      case luci::ActivationQType::PreDefinedTanh:
+      case luci::ActivationQType::PreDefinedSoftmax:
+        node->quantparam(luci::make_predefined_qparam(qtype, node->dtype()));
+        break;
+      case luci::ActivationQType::IntScale:
+        luci::set_int_scale(node);
+        break;
+      default:
+        // This assert ensures this switch-satement handles all ActivationQTypes
+        // TODO Find a better design to remove coupling with ActivationQType
+        assert(qtype == luci::ActivationQType::MinMax);
+        break;
+    }
+
+    assert(node->quantparam());
+    assert(node->quantparam()->scale.size() == 1);
+    assert(node->quantparam()->zerop.size() == 1);
+
+    const auto scale = node->quantparam()->scale[0];
+    const auto zerop = node->quantparam()->zerop[0];
+
+    // Compare qparam with saved values to detect update
+    return scale != prev_scale or zerop != prev_zerop;
+  }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool PropagateQParamForwardPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  LOGGER(l);
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    INFO(l) << "PropagateQParamForwardPass visit node: " << circle_node->name() << std::endl;
+
+    PropagateQParamForward pqp;
+    if (circle_node->accept(&pqp))
+      changed = true;
+
+    if (_TF_style_maxpool)
+    {
+      if (auto maxpool = dynamic_cast<luci::CircleMaxPool2D *>(node))
+      {
+        auto input = loco::must_cast<luci::CircleNode *>(maxpool->value());
+        copy_qparam(input, maxpool);
+      }
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp
new file mode 100644
index 000000000..a734c0873
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamForwardPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale,
+                   const std::vector<int64_t> &zp)
+{
+  assert(node->quantparam() == nullptr);
+
+  auto quantparam = std::make_unique<luci::CircleQuantParam>();
+  quantparam->scale = scale;
+  quantparam->zerop = zp;
+  node->quantparam(std::move(quantparam));
+}
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *        [Conv] (qparam 1)
+ *           |
+ *       [Reshape] (qparam 2)
+ *
+ *  AFTER
+ *
+ *        [Conv] (qparam 2)
+ *           |
+ *       [Reshape] (qparam 2)
+ *
+ */
+class SimpleGraph
+{
+public:
+  SimpleGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    reshape = g.nodes()->create<luci::CircleReshape>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20});
+    addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10});
+
+    conv->input(input);
+    reshape->tensor(conv);
+    output->from(reshape);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleReshape *reshape = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+/**
+ *  Test graph for forward propagation in Quantize Op
+ *
+ *  BEFORE
+ *
+ *         [Tanh U8] (qparam 1 - pre-defined for U8)
+ *             |
+ *       [Quantize S16] (qparam 2 - not pre-defined value)
+ *
+ *  AFTER
+ *
+ *         [Tanh U8] (qparam 1 - pre-defined for U8)
+ *             |
+ *       [Quantize S16] (qparam 3 - pre-defined for S16)
+ *
+ */
+class TanhQuantizeGraph
+{
+public:
+  TanhQuantizeGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    tanh = g.nodes()->create<luci::CircleTanh>();
+    quantize = g.nodes()->create<luci::CircleQuantize>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    tanh->dtype(loco::DataType::U8);
+    quantize->dtype(loco::DataType::S16);
+
+    addQuantParam(tanh, {2.0f / 256.0f}, {128}); // pre-defined qparam for U8
+    addQuantParam(quantize, {1.0}, {0});         // not pre-defined values
+
+    tanh->x(input);
+    quantize->input(tanh);
+    output->from(quantize);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleTanh *tanh = nullptr;
+  luci::CircleQuantize *quantize = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+/**
+ *  Test graph for forward propagation in Quantize Op
+ *
+ *  BEFORE
+ *
+ *         [Floor U8] (qparam 1 - int scale)
+ *             |
+ *       [Quantize S16] (qparam 2 - not int scale)
+ *
+ *  AFTER
+ *
+ *         [Floor U8] (qparam 1 - int scale)
+ *             |
+ *       [Quantize S16] (qparam 3 - int scale)
+ *
+ */
+class FloorQuantizeGraph
+{
+public:
+  FloorQuantizeGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    floor = g.nodes()->create<luci::CircleFloor>();
+    quantize = g.nodes()->create<luci::CircleQuantize>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    floor->dtype(loco::DataType::U8);
+    quantize->dtype(loco::DataType::S16);
+
+    addQuantParam(floor, {4.0f}, {128}); // int scale
+    addQuantParam(quantize, {0.3}, {0}); // not int scale
+
+    floor->x(input);
+    quantize->input(floor);
+    output->from(quantize);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleFloor *floor = nullptr;
+  luci::CircleQuantize *quantize = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(PropagateQParamForwardPassTest, name)
+{
+  luci::PropagateQParamForwardPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(PropagateQParamForward, simple)
+{
+  SimpleGraph g;
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  EXPECT_FLOAT_EQ(0.1, g.reshape->quantparam()->scale[0]);
+  EXPECT_FLOAT_EQ(0.2, g.reshape->quantparam()->scale[1]);
+  EXPECT_FLOAT_EQ(0.3, g.reshape->quantparam()->scale[2]);
+  EXPECT_EQ(0, g.reshape->quantparam()->zerop[0]);
+  EXPECT_EQ(10, g.reshape->quantparam()->zerop[1]);
+  EXPECT_EQ(20, g.reshape->quantparam()->zerop[2]);
+}
+
+TEST(PropagateQParamForward, wrong_op_NEG)
+{
+  SimpleGraph g;
+  g.output->from(g.conv);
+  g.reshape->drop();
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]);
+  EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]);
+  EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]);
+  EXPECT_EQ(0, g.conv->quantparam()->zerop[0]);
+  EXPECT_EQ(10, g.conv->quantparam()->zerop[1]);
+  EXPECT_EQ(20, g.conv->quantparam()->zerop[2]);
+}
+
+TEST(PropagateQParamForward, tanh_predefined_value)
+{
+  TanhQuantizeGraph g;
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  EXPECT_FLOAT_EQ(1.0f / 32768.0f, g.quantize->quantparam()->scale[0]);
+}
+
+TEST(PropagateQParamForward, floor_int_scale)
+{
+  FloorQuantizeGraph g;
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  EXPECT_FLOAT_EQ(1.0f, g.quantize->quantparam()->scale[0]);
+}
+
+TEST(PropagateQParamForward, same_dtype_NEG)
+{
+  FloorQuantizeGraph g;
+  g.quantize->dtype(loco::DataType::U8);
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  // Qparam is not propagated as ifm/ofm of Quantize Op have the same dtype
+  EXPECT_FLOAT_EQ(0.3f, g.quantize->quantparam()->scale[0]);
+}
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp
index e18690605..3e3cdde34 100644
--- a/compiler/luci/pass/src/QuantizationUtils.cpp
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -20,10 +20,22 @@
 
 #include <iostream>
 #include <cmath>
+#include <limits>
 
 namespace luci
 {
 
+bool is_quantized(const CircleNode *node)
+{
+  return node->quantparam() != nullptr &&
+         (node->dtype() == loco::DataType::U8 ||  // activation, weight (uint8 quant)
+          node->dtype() == loco::DataType::S16 || // activation, weight (int16 quant)
+          node->dtype() == loco::DataType::S32 || // bias (uint8 quant)
+          node->dtype() == loco::DataType::S64);  // bias (int16 quant)
+}
+
+bool is_fp32(const CircleNode *node) { return node->dtype() == loco::DataType::FLOAT32; }
+
 uint8_t fp32_to_uint8_cast(float f)
 {
   assert(std::numeric_limits<uint8_t>::min() <= f);
@@ -31,13 +43,73 @@ uint8_t fp32_to_uint8_cast(float f)
   return static_cast<uint8_t>(f);
 }
 
-void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
-                          float &nudged_min, float &nudged_max)
+void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
+                                             float &scaling_factor, int64_t &zp, float &nudged_min,
+                                             float &nudged_max)
 {
-  assert(min != max);
+  const int32_t kMinScale = 0;
+  const int32_t kMaxScale = 255;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+  const float scaling_factor_inv = 1.0 / scaling_factor;
+  std::vector<int32_t> quantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    // clipping
+    auto data = node->at<loco::DataType::FLOAT32>(i);
+    data = data < nudged_min ? nudged_min : data;
+    data = data > nudged_max ? nudged_max : data;
+    quantized_values[i] =
+      static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
+  }
+
+  node->dtype(loco::DataType::U8);      // change the type of tensor
+  node->size<loco::DataType::U8>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
 
+void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
+                                            float &scaling_factor, float &nudged_min,
+                                            float &nudged_max)
+{
   const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
   const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
+  const float scaling_factor_inv = 1.0 / scaling_factor;
+  std::vector<int32_t> quantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    // clipping
+    auto data = node->at<loco::DataType::FLOAT32>(i);
+    data = data < nudged_min ? nudged_min : data;
+    data = data > nudged_max ? nudged_max : data;
+    quantized_values[i] = static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  }
+
+  node->dtype(loco::DataType::S16);      // change the type of tensor
+  node->size<loco::DataType::S16>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::S16>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+void compute_sym_scale(float min, float max, float &scaling_factor, float &nudged_min,
+                       float &nudged_max, loco::DataType out_type)
+{
+  assert(min <= max);
+  assert(out_type == loco::DataType::S8 || out_type == loco::DataType::S16);
+
+  const int32_t kMaxScale = (out_type == loco::DataType::S16) ? std::numeric_limits<int16_t>::max()
+                                                              : std::numeric_limits<int8_t>::max();
+  const int32_t kMinScale = -kMaxScale;
   const double qmin_double = kMinScale;
   const double qmax_double = kMaxScale;
   const double rmin = std::fmin(0, min);
@@ -52,9 +124,13 @@ void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &
     scale_factor_from_max_side = rmax / qmax_double;
 
   scaling_factor = scale_factor_from_min_side > scale_factor_from_max_side
-                       ? scale_factor_from_min_side
-                       : scale_factor_from_max_side;
-  zp = 0;
+                     ? scale_factor_from_min_side
+                     : scale_factor_from_max_side;
+
+  // protect scale from being very low to avoid overflow/underflow
+  const float kMinScalingFactor = (out_type == loco::DataType::S16) ? 1e-8 : 1e-5;
+  scaling_factor = std::max(scaling_factor, kMinScalingFactor);
+
   nudged_min = static_cast<float>(qmin_double * scaling_factor);
   nudged_max = static_cast<float>(qmax_double * scaling_factor);
 }
@@ -120,11 +196,32 @@ void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t
   zp = nudged_zero_point;
 }
 
-bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int &channel_dim_index)
+bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
+                           int32_t &channel_dim_index)
 {
   auto succs = loco::succs(node);
-  if (succs.size() != 1) // assume weights is used by only one node
-    return false;
+
+  // opcode is initialized to CIRCLEINPUT, because
+  // CIRCLEINPUT should never be the successor of any node
+  // (this is checked w/ the assert in the loop body)
+  luci::CircleOpcode opcode = luci::CircleOpcode::CIRCLEINPUT;
+  for (auto out : succs)
+  {
+    const auto circle_node = static_cast<CircleNode *>(out);
+    assert(circle_node->opcode() != luci::CircleOpcode::CIRCLEINPUT);
+
+    if (opcode == luci::CircleOpcode::CIRCLEINPUT)
+    {
+      opcode = circle_node->opcode();
+    }
+    else
+    {
+      // Node is used by multiple layers with different opcodes
+      // We do not care such cases
+      if (opcode != circle_node->opcode())
+        return false;
+    }
+  }
 
   for (auto out : succs)
   {
@@ -178,9 +275,237 @@ bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int
 uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
 {
   return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
-             dimension.dim(3).value() +
+           dimension.dim(3).value() +
          indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
          indices[2] * dimension.dim(3).value() + indices[3];
 }
 
+// Activation (ofm) qtype is determined in different ways.
+// 1. Pre-defined values: Some Ops have pre-defined qparams (ex: LOGISTIC, TANH)
+// 2. Integer scale: Output of some Ops should be integers (ex: FLOOR, CEIL)
+// 3. Activation qtype of input: Some Ops propagate qparam from input to output (ex: QUANTIZE,
+// TRANSPOSE, etc. See PropagateQParamForwardPass.cpp for more details).
+ActivationQType activation_qtype(const CircleNode *node)
+{
+  auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
+  if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
+    return ActivationQType::PreDefinedTanh;
+
+#define RETURN_INPUT_ACTIVATION_QTYPE(CLASS, INPUT)         \
+  {                                                         \
+    auto n = loco::must_cast<const CLASS *>(node);          \
+    auto input = loco::must_cast<CircleNode *>(n->INPUT()); \
+    return activation_qtype(input);                         \
+  }
+
+  switch (node->opcode())
+  {
+    case CircleOpcode::LOGISTIC:
+      return ActivationQType::PreDefinedLogistic;
+    case CircleOpcode::TANH:
+      return ActivationQType::PreDefinedTanh;
+    case CircleOpcode::SOFTMAX:
+      return ActivationQType::PreDefinedSoftmax;
+    case CircleOpcode::FLOOR:
+    case CircleOpcode::FLOOR_DIV:
+    case CircleOpcode::FLOOR_MOD:
+    case CircleOpcode::CEIL:
+      return ActivationQType::IntScale;
+    case CircleOpcode::GATHER:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleGather, params);
+    case CircleOpcode::RESHAPE:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleReshape, tensor);
+    case CircleOpcode::TRANSPOSE:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleTranspose, a);
+    case CircleOpcode::STRIDED_SLICE:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleStridedSlice, input);
+    case CircleOpcode::SPLIT:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleSplit, input);
+    case CircleOpcode::CIRCLESPLITOUT:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitOut, input);
+    case CircleOpcode::SPLIT_V:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitV, input);
+    case CircleOpcode::CIRCLESPLITVOUT:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitVOut, input);
+    case CircleOpcode::UNPACK:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpack, value);
+    case CircleOpcode::CIRCLEUNPACKOUT:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpackOut, input);
+    case CircleOpcode::QUANTIZE:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleQuantize, input);
+    default:
+      break;
+  }
+
+#undef RETURN_INPUT_ACTIVATION_QTYPE
+
+  return ActivationQType::MinMax;
+}
+
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype,
+                                                         loco::DataType dtype)
+{
+  auto qparam = std::make_unique<CircleQuantParam>();
+
+  auto set_qparam = [&qparam](float scale, int64_t zp) {
+    qparam->scale.emplace_back(scale);
+    qparam->zerop.emplace_back(zp);
+  };
+
+  switch (qtype)
+  {
+    case ActivationQType::PreDefinedLogistic:
+      if (dtype == loco::DataType::U8)
+        set_qparam(1.0f / 256.0f, 0);
+      else
+      {
+        assert(dtype == loco::DataType::S16);
+        set_qparam(1.0f / 32768.0f, 0);
+      }
+      break;
+    case ActivationQType::PreDefinedTanh:
+      if (dtype == loco::DataType::U8)
+        set_qparam(2.0f / 256.0f, 128);
+      else
+      {
+        assert(dtype == loco::DataType::S16);
+        set_qparam(1.0f / 32768.0f, 0);
+      }
+      break;
+    case ActivationQType::PreDefinedSoftmax:
+      if (dtype == loco::DataType::U8)
+        set_qparam(1.0f / 255.0f, 0);
+      else
+      {
+        assert(dtype == loco::DataType::S16);
+        set_qparam(1.0f / 32767.0f, 0);
+      }
+      break;
+    default:
+      throw std::runtime_error("Unsupported opcode with pre-defined qparam");
+  }
+  return qparam;
+}
+
+// For nodes with integer output, we use integer scale
+void set_int_scale(luci::CircleNode *node)
+{
+  assert(node); // FIX_CALLER_UNLESS
+
+  auto qparam = node->quantparam();
+  assert(qparam);                    // FIX_CALLER_UNLESS
+  assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS
+
+  auto fp_scale = qparam->scale[0];
+  qparam->scale[0] = fp_scale < 1 ? 1.0f : std::round(fp_scale);
+}
+
+void quant_const(luci::CircleConst *node, loco::DataType quant_type)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  float min = std::numeric_limits<float>::max();
+  float max = std::numeric_limits<float>::lowest();
+  for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
+  {
+    auto data = node->at<loco::DataType::FLOAT32>(i);
+    min = data < min ? data : min;
+    max = data > max ? data : max;
+  }
+
+  float scaling_factor{0.0};
+  int64_t zp{0};
+  float nudged_min{0.0};
+  float nudged_max{0.0};
+
+  switch (quant_type)
+  {
+    case loco::DataType::U8:
+      asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
+                                              nudged_max);
+      break;
+    case loco::DataType::S16:
+      symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, nudged_min,
+                                             nudged_max);
+      break;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+
+  auto quantparam = std::make_unique<luci::CircleQuantParam>();
+  quantparam->scale.push_back(scaling_factor);
+  quantparam->zerop.push_back(zp);
+  node->quantparam(std::move(quantparam));
+}
+
+namespace
+{
+
+// TODO move this to a more global helper file
+int nbits(loco::DataType dt) noexcept
+{
+  switch (dt)
+  {
+    case loco::DataType::S8:
+    case loco::DataType::U8:
+      return 8;
+    case loco::DataType::S16:
+    case loco::DataType::U16:
+    case loco::DataType::FLOAT16:
+      return 16;
+    case loco::DataType::S32:
+    case loco::DataType::U32:
+    case loco::DataType::FLOAT32:
+      return 32;
+    case loco::DataType::S64:
+      return 64;
+    default:
+      return 64; // a safe large default
+  }
+}
+
+// TODO Check if the metric is valid
+// Returns true if [min,max] is poorly representable
+bool range_check(float min, float max, loco::DataType dtype)
+{
+  float thresh = 1.5f;
+  return log2f(max) - log2f(min) > nbits(dtype) * thresh;
+}
+
+bool warn_scale_zp(float scale, int64_t zp, luci::CircleNode *n)
+{
+  float min, max;
+  // estimate min/max
+  switch (n->dtype())
+  {
+    case loco::DataType::U8:
+      min = scale * (0 - zp);
+      max = scale * (255 - zp);
+      break;
+    case loco::DataType::S16:
+      min = scale * (-32767);
+      max = scale * (32767);
+      break;
+    default:
+      return false;
+  }
+  return range_check(min, max, n->dtype());
+}
+
+} // namespace
+
+void warn_accuracy_with_range(luci::CircleNode *n)
+{
+  LOGGER(l);
+  auto qp = n->quantparam();
+  auto k = qp->zerop.size();
+  for (uint32_t i = 0; i < k; i++)
+  {
+    if (warn_scale_zp(qp->scale[i], qp->zerop[i], n))
+      WARN(l) << "Quantization of " << i << "-th channel of " << n->name()
+              << "'s quantization may cause accuracy issues" << std::endl;
+    ;
+  }
+}
+
 } // namespace luci
diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h
index ec0e86df8..93c4045b5 100644
--- a/compiler/luci/pass/src/QuantizationUtils.h
+++ b/compiler/luci/pass/src/QuantizationUtils.h
@@ -23,16 +23,72 @@
 namespace luci
 {
 
-void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
-                          float &nudged_min, float &nudged_max);
+// Compute scale using given min/max for symmetric quantization (int8/int16)
+void compute_sym_scale(float min, float max, float &scaling_factor, float &nudged_min,
+                       float &nudged_max, loco::DataType out_type = loco::DataType::S16);
 
+// Compute scale/zp using given min/max for asymmetric quantization (uint8)
 void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
                            float &nudged_min, float &nudged_max);
 
-bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int &channel_dim_index);
+// Asymmetric per-layer quantization of weights (const tensor) using given min/max values
+// NOTE: in-place update of node data
+void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
+                                             float &scaling_factor, int64_t &zp, float &nudged_min,
+                                             float &nudged_max);
 
+// Symmetric per-layer quantization of weights (const tensor) using given min/max values
+// NOTE: in-place update of node data
+void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
+                                            float &scaling_factor, float &nudged_min,
+                                            float &nudged_max);
+
+// Helper function to get channel dimension
+// TODO Embed this function into iterate_per_channel
+bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
+                           int32_t &channel_dim_index);
+
+// Calculate offset of the given indices in dimension
 uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices);
 
+// Backward propagation of concatenation qparam
+void propagate_concat_quantparam(luci::CircleConcatenation *concat);
+
+// Backward propagation of pad_v2 qparam
+void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2);
+
+// Return true if the node is quantized
+bool is_quantized(const CircleNode *node);
+
+// Return true if the node is fp32
+bool is_fp32(const CircleNode *node);
+
+enum ActivationQType
+{
+  MinMax,             // Quantize using recorded min/max
+  PreDefinedLogistic, // Quantize using pre-defined values
+  PreDefinedTanh,     // Quantize using pre-defined values
+  PreDefinedSoftmax,  // Quantize using pre-defined values
+  IntScale,           // Round scale to a positive integer
+};
+
+ActivationQType activation_qtype(const CircleNode *node);
+
+// Create qparam with pre-defined values for speical operators
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleNode *node, loco::DataType dtype);
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype,
+                                                         loco::DataType dtype);
+
+// Update node's scale to a positive integer (for special Ops e.g., Floor, Ceil)
+void set_int_scale(luci::CircleNode *node);
+
+// Quantize const tensor using its min/max values
+void quant_const(luci::CircleConst *node, loco::DataType quant_type);
+
+// Check that a node is quantized without significant loss of precision;
+// Emits warnings to log with WARN
+void warn_accuracy_with_range(luci::CircleNode *n);
+
 } // namespace luci
 
 #endif // __LUCI_QUANTIZATION_UTILS_H__
diff --git a/compiler/luci/pass/src/QuantizeActivation.cpp b/compiler/luci/pass/src/QuantizeActivation.cpp
new file mode 100644
index 000000000..913450083
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeActivation.cpp
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeActivation.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <algorithm>
+#include <cmath>
+
+using namespace luci;
+
+namespace
+{
+
+bool has_min_max(const CircleNode *node)
+{
+  return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
+}
+
+} // namespace
+
+// QuantizeActivation
+namespace luci
+{
+
+void QuantizeActivation::visit(luci::CircleNode *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
+
+  // Check if node is fp32
+  if (not is_fp32(node))
+    return;
+
+  // Check if this is const (const activation is handled by QuantizeConstInputActivation)
+  // NOTE QuantizePreChecker guarantees weights/bias are const.
+  // Update this code when we accept non-const weights/bias.
+  if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    return;
+
+  // Check if this is activation
+  // We assume min/max are recorded only for activations
+  if (has_min_max(node))
+  {
+    // Quantize using recorded min/max
+    auto quantparam = node->quantparam();
+    assert(quantparam);
+    assert(quantparam->min.size() == 1); // only support layer-wise quant
+    assert(quantparam->max.size() == 1); // only support layer-wise quant
+    auto min = quantparam->min[0];
+    auto max = quantparam->max[0];
+
+    float scaling_factor{0};
+    int64_t zp{0};
+    float nudged_min{0};
+    float nudged_max{0};
+
+    if (output_type == loco::DataType::U8)
+    {
+      compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+      node->dtype(loco::DataType::U8);
+    }
+    else
+    {
+      compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
+      node->dtype(loco::DataType::S16);
+    }
+
+    node->quantparam()->scale.push_back(scaling_factor);
+    node->quantparam()->zerop.push_back(zp);
+  }
+  // Fix special attributes
+  if (node->opcode() == luci::CircleOpcode::CAST)
+  {
+    auto *cast = loco::must_cast<luci::CircleCast *>(node);
+    auto *cast_input = loco::must_cast<luci::CircleNode *>(cast->x());
+
+    // make sure that cast_input is already quantized
+    assert(cast_input->dtype() != loco::DataType::FLOAT32);
+    cast->in_data_type(cast_input->dtype());
+    cast->out_data_type(cast->dtype());
+  }
+}
+
+} // namespace luci
+
+// QuantizeSpecialActivation
+namespace luci
+{
+
+void QuantizeSpecialActivation::visit(luci::CircleNode *node)
+{
+  // Nodes fused with activation functions which need special quantization
+  auto fused_act_node = dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
+  if (fused_act_node != nullptr && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
+  {
+    auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type);
+    node->quantparam(std::move(qparam));
+  }
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleLogistic *node)
+{
+  auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedLogistic, output_type);
+  node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleTanh *node)
+{
+  auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type);
+  node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleSoftmax *node)
+{
+  auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedSoftmax, output_type);
+  node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloor *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+  set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloorDiv *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+  set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloorMod *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+  set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleCeil *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+  set_int_scale(node);
+}
+
+} // namespace luci
+
+// QuantizeConstInputActivation
+namespace luci
+{
+
+// Default behavior (NYI)
+void QuantizeConstInputActivation::visit(luci::CircleNode *node)
+{
+  for (uint32_t i = 0; i < node->arity(); i++)
+  {
+    auto input_node = node->arg(i);
+    auto const_node = dynamic_cast<luci::CircleConst *>(input_node);
+    if (const_node != nullptr)
+    {
+      std::string msg = "Unsupported Op for const inputs: " + node->name();
+      throw std::runtime_error(msg);
+    }
+  }
+}
+
+// INPUT_NAME is the only activation of NODE
+#define QUANTIZE_SINGLE_CONST_INPUT(NODE, INPUT_NAME)           \
+  void QuantizeConstInputActivation::visit(NODE *node)          \
+  {                                                             \
+    auto input = node->INPUT_NAME();                            \
+    auto const_node = dynamic_cast<luci::CircleConst *>(input); \
+    if (const_node && is_fp32(const_node))                      \
+    {                                                           \
+      auto new_const = luci::clone(const_node);                 \
+      quant_const(new_const, _output_type);                     \
+      node->INPUT_NAME(new_const);                              \
+    }                                                           \
+  }
+
+// INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE
+#define QUANTIZE_TWO_CONST_INPUTS(NODE, INPUT_NAME1, INPUT_NAME2) \
+  void QuantizeConstInputActivation::visit(NODE *node)            \
+  {                                                               \
+    auto input1 = node->INPUT_NAME1();                            \
+    auto const_node1 = dynamic_cast<luci::CircleConst *>(input1); \
+    if (const_node1 && is_fp32(const_node1))                      \
+    {                                                             \
+      auto new_const1 = luci::clone(const_node1);                 \
+      quant_const(new_const1, _output_type);                      \
+      node->INPUT_NAME1(new_const1);                              \
+    }                                                             \
+    auto input2 = node->INPUT_NAME2();                            \
+    auto const_node2 = dynamic_cast<luci::CircleConst *>(input2); \
+    if (const_node2 && is_fp32(const_node2))                      \
+    {                                                             \
+      auto new_const2 = luci::clone(const_node2);                 \
+      quant_const(new_const2, _output_type);                      \
+      node->INPUT_NAME2(new_const2);                              \
+    }                                                             \
+  }
+
+// Ops that receive a single activation as an input
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleAbs, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMax, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMin, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleBatchToSpaceND, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleDepthToSpace, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleElu, features)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleExp, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleFloor, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleGather, params)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleGelu, features)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleLocalResponseNormalization, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleLogistic, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleMean, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleMirrorPad, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CirclePad, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceAny, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceProd, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceMax, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceMin, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReshape, tensor)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleResizeBilinear, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleResizeNearestNeighbor, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReverseSequence, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleRsqrt, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSlice, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSoftmax, logits)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSpaceToBatchND, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSpaceToDepth, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSplit, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSplitV, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSqrt, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSqueeze, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleStridedSlice, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSum, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTanh, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTile, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTopKV2, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTranspose, a)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleUnpack, value)
+
+// Ops that receive two activations as inputs
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleAdd, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleBatchMatMul, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleDiv, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleFloorDiv, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleFloorMod, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleGreater, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleGreaterEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleLess, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleLessEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMaximum, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMinimum, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMul, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleNotEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CirclePow, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleSub, x, y)
+
+// AddN has arbitrary number of inputs
+void QuantizeConstInputActivation::visit(luci::CircleAddN *node)
+{
+  auto arity = node->arity();
+  for (uint32_t i = 0; i < arity; i++)
+  {
+    auto input_node = node->inputs(i);
+    auto const_node = dynamic_cast<luci::CircleConst *>(input_node);
+    if (const_node && is_fp32(const_node))
+    {
+      auto new_const = luci::clone(const_node);
+      quant_const(new_const, _output_type);
+      node->inputs(i, new_const);
+    }
+  }
+}
+
+#undef QUANTIZE_SINGLE_CONST_INPUT
+#undef QUANTIZE_TWO_CONST_INPUTS
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeActivation.h b/compiler/luci/pass/src/QuantizeActivation.h
new file mode 100644
index 000000000..ba3bc59f2
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeActivation.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZATION_ACTIVATION_H__
+#define __LUCI_QUANTIZATION_ACTIVATION_H__
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief Quantize non-const activation using recorded min/max values
+ */
+struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeActivation(loco::DataType input, loco::DataType output)
+    : input_type(input), output_type(output)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+
+  // Quantize each node using recorded min/max
+  void visit(luci::CircleNode *node);
+};
+
+/**
+ * @brief Quantize non-const activaion using pre-defined scale/zp for special Ops
+ */
+struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeSpecialActivation(loco::DataType input, loco::DataType output)
+    : input_type(input), output_type(output)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+
+  void visit(luci::CircleNode *node);
+  void visit(luci::CircleLogistic *node);
+  void visit(luci::CircleTanh *node);
+  void visit(luci::CircleSoftmax *node);
+  void visit(luci::CircleFloor *node);
+  void visit(luci::CircleFloorDiv *node);
+  void visit(luci::CircleFloorMod *node);
+  void visit(luci::CircleCeil *node);
+};
+
+// Quantize constant input activation of a node
+// The input of a node is quantized if it is
+// 1. Constant (instance of CircleConst*)
+// 2. Activation (other inputs e.g., weights, bias, axis, etc should not be quantized here)
+struct QuantizeConstInputActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeConstInputActivation(loco::DataType output_type) : _output_type(output_type) {}
+
+private:
+  loco::DataType _output_type;
+
+// Skip NODE
+#define SKIP(NODE) \
+  void visit(NODE *) {}
+
+  // Handled in QuantizeWeights and QuantizeBias
+  SKIP(luci::CircleConv2D)
+  SKIP(luci::CircleDepthwiseConv2D)
+  SKIP(luci::CircleFullyConnected)
+  SKIP(luci::CircleInstanceNorm)
+  SKIP(luci::CirclePRelu)
+  SKIP(luci::CircleTransposeConv)
+
+  // Handled in PropagateQParamBackwardPass
+  SKIP(luci::CircleConcatenation)
+  SKIP(luci::CirclePadV2)
+  SKIP(luci::CirclePack)
+  SKIP(luci::CircleOneHot)
+
+  // Inputs of logical Ops are bool, thus not quantized
+  SKIP(luci::CircleLogicalOr)
+  SKIP(luci::CircleLogicalAnd)
+  SKIP(luci::CircleLogicalNot)
+
+#undef SKIP
+
+  // Default behavior (NYI)
+  void visit(luci::CircleNode *node);
+
+  // Ops that receive a single activation as an input
+  void visit(luci::CircleAbs *node);
+  void visit(luci::CircleArgMax *node);
+  void visit(luci::CircleArgMin *node);
+  void visit(luci::CircleBatchToSpaceND *node);
+  void visit(luci::CircleDepthToSpace *node);
+  void visit(luci::CircleElu *node);
+  void visit(luci::CircleExp *node);
+  void visit(luci::CircleFloor *node);
+  void visit(luci::CircleGather *node);
+  void visit(luci::CircleGelu *node);
+  void visit(luci::CircleLocalResponseNormalization *node);
+  void visit(luci::CircleLogistic *node);
+  void visit(luci::CircleMean *node);
+  void visit(luci::CircleMirrorPad *node);
+  void visit(luci::CirclePad *node);
+  void visit(luci::CircleReduceAny *node);
+  void visit(luci::CircleReduceProd *node);
+  void visit(luci::CircleReduceMax *node);
+  void visit(luci::CircleReduceMin *node);
+  void visit(luci::CircleReshape *node);
+  void visit(luci::CircleResizeBilinear *node);
+  void visit(luci::CircleResizeNearestNeighbor *node);
+  void visit(luci::CircleReverseSequence *node);
+  void visit(luci::CircleRsqrt *node);
+  void visit(luci::CircleSlice *node);
+  void visit(luci::CircleSoftmax *node);
+  void visit(luci::CircleSpaceToBatchND *node);
+  void visit(luci::CircleSpaceToDepth *node);
+  void visit(luci::CircleSplit *node);
+  void visit(luci::CircleSplitV *node);
+  void visit(luci::CircleSqrt *node);
+  void visit(luci::CircleSqueeze *node);
+  void visit(luci::CircleStridedSlice *node);
+  void visit(luci::CircleSum *node);
+  void visit(luci::CircleTanh *node);
+  void visit(luci::CircleTile *node);
+  void visit(luci::CircleTopKV2 *node);
+  void visit(luci::CircleTranspose *node);
+  void visit(luci::CircleUnpack *node);
+
+  // Ops that receive two activations as inputs
+  void visit(luci::CircleAdd *node);
+  void visit(luci::CircleBatchMatMul *node);
+  void visit(luci::CircleDiv *node);
+  void visit(luci::CircleEqual *node);
+  void visit(luci::CircleFloorDiv *node);
+  void visit(luci::CircleFloorMod *node);
+  void visit(luci::CircleGreater *node);
+  void visit(luci::CircleGreaterEqual *node);
+  void visit(luci::CircleLess *node);
+  void visit(luci::CircleLessEqual *node);
+  void visit(luci::CircleMaximum *node);
+  void visit(luci::CircleMinimum *node);
+  void visit(luci::CircleMul *node);
+  void visit(luci::CircleNotEqual *node);
+  void visit(luci::CirclePow *node);
+  void visit(luci::CircleSub *node);
+
+  // AddN has arbitrary number of inputs
+  void visit(luci::CircleAddN *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZATION_ACTIVATION_H__
diff --git a/compiler/luci/pass/src/QuantizeBias.cpp b/compiler/luci/pass/src/QuantizeBias.cpp
new file mode 100644
index 000000000..de97a14dd
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.cpp
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeBias.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+using namespace luci;
+
+namespace
+{
+
+// struct to carry Input/Weights/Bias
+struct IWB
+{
+  CircleNode *input = nullptr;
+  CircleNode *weights = nullptr;
+  CircleConst *bias = nullptr;
+
+  IWB(loco::Node *i, loco::Node *w, loco::Node *b)
+  {
+    input = dynamic_cast<luci::CircleNode *>(i);
+    weights = dynamic_cast<luci::CircleNode *>(w);
+    bias = dynamic_cast<luci::CircleConst *>(b);
+  }
+
+  // Return true if bias can be quantized with valid input an weights
+  operator bool()
+  {
+    if (bias == nullptr || is_quantized(bias))
+      return false;
+    if (input == nullptr || weights == nullptr)
+      return false;
+    return true;
+  }
+};
+
+// Create a new const node from an existing node.
+// The new node has the following characteristics
+// type: T
+// shape: same with 'node' (given as an argument)
+// buffer size: 'size' (given as an argument)
+// Note that contents are not filled in this function.
+template <loco::DataType T>
+luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size)
+{
+  auto new_node = node->graph()->nodes()->create<CircleConst>();
+  // TODO: We don't have any naming convention for quantized nodes yet.
+  //       Fix this when we have one.
+  new_node->name(node->name());
+  new_node->dtype(T);
+  new_node->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    new_node->dim(i).set(node->dim(i).value());
+
+  new_node->size<T>(size);
+  new_node->shape_status(luci::ShapeStatus::VALID);
+
+  return new_node;
+}
+
+CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
+                                       float *scaling_factor, int64_t *zp)
+{
+  float scale = input_scale * weight_scale;
+  const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    quantized_values[i] =
+      static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+  }
+
+  auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
+  const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
+  const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    new_bias->at<loco::DataType::S32>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+  *scaling_factor = scale;
+  *zp = 0;
+
+  return new_bias;
+}
+
+CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale,
+                                    std::vector<float> &weight_scale,
+                                    std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
+{
+  float scaling_factor_inv{0};
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    scaling_factor[i] = input_scale * weight_scale[i];
+    scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
+    quantized_values[i] =
+      static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+    zp[i] = 0;
+  }
+
+  auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
+  const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
+  const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    new_bias->at<loco::DataType::S32>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+
+  return new_bias;
+}
+
+CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale,
+                                          std::vector<float> &weight_scale,
+                                          std::vector<float> &scaling_factor,
+                                          std::vector<int64_t> &zp)
+{
+  float scaling_factor_inv{0};
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int64_t> quantized_values(size);
+
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    scaling_factor[i] = input_scale * weight_scale[i];
+    scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
+    quantized_values[i] =
+      static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+    zp[i] = 0;
+  }
+
+  auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size);
+
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    new_bias->at<loco::DataType::S64>(i) = quantized_values[i];
+  }
+
+  return new_bias;
+}
+
+} // namespace
+
+namespace luci
+{
+
+// Return a quantized bias node
+CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *weight,
+                                          CircleNode *bias)
+{
+  auto const_bias = loco::must_cast<luci::CircleConst *>(bias);
+  assert(const_bias->dtype() == loco::DataType::FLOAT32);
+
+  // If input is const, it is quantized here, not in QuantizeActivation
+  if (auto const_input = dynamic_cast<luci::CircleConst *>(input))
+  {
+    quant_const(const_input, output_type);
+  }
+
+  CircleConst *new_bias = nullptr;
+
+  if (granularity == QuantizationGranularity::ChannelWise)
+  {
+    auto input_q = input->quantparam();
+    assert(input_q);
+    assert(input_q->scale.size() == 1); // input scale's layer-wise
+    auto input_scale = input_q->scale[0];
+
+    assert(weight->quantparam() != nullptr); // weight scale's channel-wise
+    auto weight_scale = weight->quantparam()->scale;
+
+    uint32_t size = const_bias->size<loco::DataType::FLOAT32>();
+    assert(size == weight_scale.size());
+    std::vector<float> scaling_factor(size);
+    std::vector<int64_t> zp(size);
+
+    if (const_bias->rank() == 0)
+    {
+      // TODO Support quantization of scalar bias
+      throw std::runtime_error("Quantization of scalar bias is not yet supported (" +
+                               const_bias->name() + ")");
+    }
+    if (size != const_bias->dim(const_bias->rank() - 1).value())
+    {
+      throw std::runtime_error(const_bias->name() +
+                               " (bias) should have the shape of [1, 1, .. 1, channel]");
+    }
+
+    if (output_type == loco::DataType::U8)
+    {
+      new_bias = quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+    }
+    else if (output_type == loco::DataType::S16)
+    {
+      new_bias =
+        int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+    }
+    else
+    {
+      throw std::runtime_error("Unsupported quantization type.");
+    }
+
+    auto quantparam = std::make_unique<CircleQuantParam>();
+    quantparam->scale = scaling_factor;
+    quantparam->zerop = zp;
+    quantparam->quantized_dimension = const_bias->rank() - 1;
+    assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+    new_bias->quantparam(std::move(quantparam));
+
+    return new_bias;
+  }
+  else
+  {
+    auto input_q = input->quantparam();
+    assert(input_q);
+    assert(input_q->scale.size() == 1); // Only support per-layer quant
+    auto input_scale = input_q->scale[0];
+
+    auto weight_q = weight->quantparam();
+    assert(weight_q);
+    assert(weight_q->scale.size() == 1); // Only support per-layer quant
+    auto weight_scale = weight_q->scale[0];
+
+    float scaling_factor{0};
+    int64_t zp{0};
+    new_bias =
+      asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp);
+    auto quantparam = std::make_unique<CircleQuantParam>();
+    quantparam->scale.push_back(scaling_factor);
+    quantparam->zerop.push_back(zp);
+    assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+    new_bias->quantparam(std::move(quantparam));
+
+    return new_bias;
+  }
+}
+
+void QuantizeBias::visit(luci::CircleConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+  if (auto iwb = IWB(node->input(), node->filter(), node->bias()))
+  {
+    auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+    node->bias(new_bias);
+  }
+}
+
+void QuantizeBias::visit(luci::CircleDepthwiseConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+  if (auto iwb = IWB(node->input(), node->filter(), node->bias()))
+  {
+    auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+    node->bias(new_bias);
+  }
+}
+
+void QuantizeBias::visit(luci::CircleTransposeConv *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+  if (auto iwb = IWB(node->outBackprop(), node->filter(), node->bias()))
+  {
+    auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+    node->bias(new_bias);
+  }
+}
+
+void QuantizeBias::visit(luci::CircleFullyConnected *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeBias visit node: " << node->name() << std::endl;
+
+  if (auto iwb = IWB(node->input(), node->weights(), node->bias()))
+  {
+    auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+    node->bias(new_bias);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeBias.h b/compiler/luci/pass/src/QuantizeBias.h
new file mode 100644
index 000000000..8de09df72
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_BIAS_H__
+#define __LUCI_QUANTIZE_BIAS_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief QuantizeBias quantizes tensors for bias
+ * @details Use input/weights scale to quantize values
+ */
+struct QuantizeBias final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+    : input_type(input), output_type(output), granularity(gr)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+  QuantizationGranularity granularity;
+
+private:
+  // Return a quantized bias node
+  CircleConst *quantized_bias(CircleNode *input, const CircleNode *weight, CircleNode *bias);
+
+  void visit(luci::CircleConv2D *node);
+  void visit(luci::CircleDepthwiseConv2D *node);
+  void visit(luci::CircleTransposeConv *node);
+  void visit(luci::CircleFullyConnected *node);
+
+  // Default behavior
+  void visit(luci::CircleNode *) {}
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZE_BIAS_H__
diff --git a/compiler/luci/pass/src/QuantizeBias.test.cpp b/compiler/luci/pass/src/QuantizeBias.test.cpp
new file mode 100644
index 000000000..9030f59e9
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.test.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeBias.h"
+
+#include "helpers/CreateCircleConst.h"
+
+#include <luci/test/TestIOGraph.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleQuantParam.h>
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+
+namespace
+{
+
+using namespace luci::test;
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *   [IFM] [WEIGHTS] [BIAS(FP32)]
+ *        \   |     /
+ *           [FC]
+ *            |
+ *          [OFM]
+ *
+ *  AFTER
+ *
+ *   [IFM] [WEIGHTS] [BIAS(Quantized)]
+ *        \   |     /
+ *           [FC]
+ *            |
+ *          [OFM]
+ */
+struct Q8FCGraphlet
+{
+public:
+  Q8FCGraphlet() = default;
+  virtual ~Q8FCGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 out_shape, const ShapeU32 w_shape,
+            const ShapeU32 bias_shape, const float bv)
+  {
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+    _fc->input(_x);
+    _x->dtype(loco::DataType::U8);
+    {
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale.push_back(1.0);
+      quantparam->zerop.push_back(0);
+      quantparam->quantized_dimension = 0;
+      _x->quantparam(std::move(quantparam));
+    }
+
+    auto weights = create_const_node<uint8_t>(g, loco::DataType::U8, w_shape, 1.0);
+    auto w_qparam = std::make_unique<CircleQuantParam>();
+    std::vector<float> w_scale(weights->dim(0).value(), 1.0);
+    std::vector<int64_t> w_zp(weights->dim(0).value(), 0);
+    w_qparam->scale = w_scale;
+    w_qparam->zerop = w_zp;
+    w_qparam->quantized_dimension = 0;
+    weights->quantparam(std::move(w_qparam));
+    _fc->weights(weights);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->dtype(loco::DataType::U8);
+    _fc->shape(out_shape);
+    auto l = _fc->dim(_fc->rank() - 1).value();
+    _fc->bias(create_const_node(g, loco::DataType::FLOAT32, bias_shape, bv));
+    _fc->name("fc");
+    {
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale.push_back(1.0);
+      quantparam->zerop.push_back(0);
+      quantparam->quantized_dimension = 0;
+      _fc->quantparam(std::move(quantparam));
+    }
+  }
+
+public:
+  luci::CircleFullyConnected *fc() { return _fc; }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleInput *_x = nullptr;
+};
+
+struct Q8FCGraph final : public TestIGraphlet, public TestOGraphlet, public Q8FCGraphlet
+{
+  void init(const ShapeU32 in_shape, const ShapeU32 w_shape, const ShapeU32 out_shape,
+            const ShapeU32 bias_shape, const float bv)
+  {
+    TestIGraphlet::init(g(), in_shape);
+    TestOGraphlet::init(g(), out_shape);
+    _x = input();
+    Q8FCGraphlet::init(g(), out_shape, w_shape, bias_shape, bv);
+    output()->from(_fc);
+  }
+};
+
+class CQ8QuantizeBiasFCTest : public ::testing::Test
+{
+public:
+  Q8FCGraph g;
+  luci::QuantizeBias qb{loco::DataType::FLOAT32, loco::DataType::U8,
+                        luci::QuantizationGranularity::ChannelWise};
+};
+
+} // namespace
+
+TEST_F(CQ8QuantizeBiasFCTest, fully_connected)
+{
+  g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 256}, 1);
+  g.fc()->accept(&qb);
+
+  auto bias = loco::must_cast<CircleConst *>(g.fc()->bias());
+  auto qparam = bias->quantparam();
+
+  EXPECT_NE(nullptr, qparam);
+  EXPECT_EQ(256, qparam->scale.size());
+  EXPECT_EQ(256, qparam->zerop.size());
+  EXPECT_EQ(1, qparam->quantized_dimension);
+}
+
+TEST_F(CQ8QuantizeBiasFCTest, wrong_bias_shape_NEG)
+{
+  g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 2, 128}, 1);
+  EXPECT_ANY_THROW(g.fc()->accept(&qb)); // Wrong bias shape
+}
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
index c492234c7..f8989c9e0 100644
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -16,41 +16,40 @@
 
 #include "luci/Pass/QuantizeDequantizeWeightsPass.h"
 #include "QuantizationUtils.h"
+#include "helpers/LayerInfoMap.h"
 
 #include <luci/IR/CircleNodes.h>
 #include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
 #include <luci/Log.h>
 #include <loco/IR/TensorShape.h>
 
 #include <iostream>
 #include <cmath>
-
-namespace luci
-{
+#include <functional>
+#include <limits>
 
 namespace
 {
 
-void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max)
+using namespace luci;
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, IterFunc func)
 {
   loco::TensorShape dimension;
   dimension.rank(4);
   uint32_t indices[4] = {
-      0,
+    0,
   };
-  int channel_dim_index{0};
-  int size{0};
+  int32_t channel_dim_index{0};
 
   if (!get_channel_dim_index(node, dimension, channel_dim_index))
   {
     assert(false);
     return;
   }
-  size = dimension.dim(channel_dim_index).value();
 
-  std::vector<bool> has_min_max_value(size, false);
-  min.resize(size);
-  max.resize(size);
   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
   {
     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
@@ -59,80 +58,91 @@ void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vec
       {
         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
         {
-          int channel_idx = indices[channel_dim_index];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          if (has_min_max_value[channel_idx])
-          {
-            min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
-            max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
-          }
-          else
-          {
-            min[channel_idx] = data;
-            max[channel_idx] = data;
-            has_min_max_value[channel_idx] = true;
-          }
+          func(indices, dimension, channel_dim_index);
         }
       }
     }
   }
 }
 
-void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
-                            std::vector<float> &scaling_factor, std::vector<int64_t> &zp,
-                            std::vector<float> &nudged_min, std::vector<float> &nudged_max)
-{
-  assert(node->dtype() == loco::DataType::FLOAT32);
-  const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
-  const int32_t kMinScale = -kMaxScale;
+} // namespace
 
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
+namespace luci
+{
 
-  for (size_t i = 0; i < min.size(); ++i)
-  {
-    compute_sym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
-  }
+namespace
+{
 
+void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max)
+{
   loco::TensorShape dimension;
   dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
-  };
-  int channel_dim_index{0};
+  int32_t channel_dim_index{0};
 
   if (!get_channel_dim_index(node, dimension, channel_dim_index))
   {
     assert(false);
     return;
   }
+  auto size = dimension.dim(channel_dim_index).value();
 
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+  std::vector<bool> has_min_max_value(size, false);
+  min.resize(size);
+  max.resize(size);
+
+  auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    if (has_min_max_value[channel_idx])
     {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
-          data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
-          quantized_values[cal_offset(dimension, indices)] =
-              static_cast<int32_t>(std::round(data * scaling_factor_inv));
-        }
-      }
+      min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+      max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+    }
+    else
+    {
+      min[channel_idx] = data;
+      max[channel_idx] = data;
+      has_min_max_value[channel_idx] = true;
     }
+  };
+
+  iterate_per_channel(node, cal_minmax);
+}
+
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+                            std::vector<float> &scaling_factor, std::vector<float> &nudged_min,
+                            std::vector<float> &nudged_max)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+  const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+  const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    compute_sym_scale(min[i], max[i], scaling_factor[i], nudged_min[i], nudged_max[i]);
   }
 
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, quantize);
+
   node->dtype(loco::DataType::S16);      // change the type of tensor
   node->size<loco::DataType::S16>(size); // resize tensor
   for (uint32_t i = 0; i < size; ++i)
   {
     node->at<loco::DataType::S16>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
   }
 }
 
@@ -142,35 +152,14 @@ void sym_wdequant_per_channel(CircleConst *node, std::vector<float> &scaling_fac
   uint32_t size = node->size<loco::DataType::S16>();
   std::vector<float> dequantized_values(size);
 
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto dequantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::S16>(cal_offset(dimension, indices));
+    dequantized_values[cal_offset(dimension, indices)] =
+      static_cast<float>(data) * scaling_factor[channel_idx];
   };
-  int channel_dim_index{0};
 
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
-
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          auto data = node->at<loco::DataType::S16>(cal_offset(dimension, indices));
-          dequantized_values[cal_offset(dimension, indices)] =
-              static_cast<float>(data) * scaling_factor[channel_idx];
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, dequantize);
 
   node->dtype(loco::DataType::FLOAT32);      // change the type of tensor
   node->size<loco::DataType::FLOAT32>(size); // resize tensor
@@ -198,38 +187,17 @@ void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
     compute_asym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
   }
 
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
   };
-  int channel_dim_index{0};
 
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
-
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
-          data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
-          quantized_values[cal_offset(dimension, indices)] = static_cast<int32_t>(
-              std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, quantize);
 
   node->dtype(loco::DataType::U8);      // change the type of tensor
   node->size<loco::DataType::U8>(size); // resize tensor
@@ -246,35 +214,14 @@ void asymmetric_wdequant_per_channel(CircleConst *node, std::vector<float> &scal
   uint32_t size = node->size<loco::DataType::U8>();
   std::vector<float> dequantized_values(size);
 
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
+  auto dequantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::U8>(cal_offset(dimension, indices));
+    dequantized_values[cal_offset(dimension, indices)] =
+      static_cast<float>(data) * scaling_factor[channel_idx] + nudged_min[channel_idx];
   };
-  int channel_dim_index{0};
 
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
-
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          auto data = node->at<loco::DataType::U8>(cal_offset(dimension, indices));
-          dequantized_values[cal_offset(dimension, indices)] =
-              static_cast<float>(data) * scaling_factor[channel_idx] + nudged_min[channel_idx];
-        }
-      }
-    }
-  }
+  iterate_per_channel(node, dequantize);
 
   node->dtype(loco::DataType::FLOAT32);      // change the type of tensor
   node->size<loco::DataType::FLOAT32>(size); // resize tensor
@@ -284,36 +231,6 @@ void asymmetric_wdequant_per_channel(CircleConst *node, std::vector<float> &scal
   }
 }
 
-void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
-                                             float &scaling_factor, int64_t &zp, float &nudged_min,
-                                             float &nudged_max)
-{
-
-  const int32_t kMinScale = 0;
-  const int32_t kMaxScale = 255;
-
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
-  const float scaling_factor_inv = 1.0 / scaling_factor;
-  std::vector<int32_t> quantized_values(size);
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    // clipping
-    auto data = node->at<loco::DataType::FLOAT32>(i);
-    data = data < nudged_min ? nudged_min : data;
-    data = data > nudged_max ? nudged_max : data;
-    quantized_values[i] =
-        static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
-  }
-
-  node->dtype(loco::DataType::U8);      // change the type of tensor
-  node->size<loco::DataType::U8>(size); // resize tensor
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
-  }
-}
-
 void asymmetric_wdequant_with_minmax_per_layer(CircleConst *node, float scaling_factor,
                                                float nudged_min)
 {
@@ -333,54 +250,15 @@ void asymmetric_wdequant_with_minmax_per_layer(CircleConst *node, float scaling_
   }
 }
 
-bool is_quantized(const CircleNode *node)
-{
-  return node->dtype() == loco::DataType::U8 ||  // activation, weight
-         node->dtype() == loco::DataType::S16 || // activation, weight
-         node->dtype() == loco::DataType::S32;   // bias
-}
-
-// Check if node is weights of conv2d, transepose_conv2d, depthwise_conv2d, or fully_connected layer
-bool is_weights(CircleNode *node)
-{
-  auto circle_const = dynamic_cast<CircleConst *>(node);
-  if (circle_const == nullptr)
-    return false;
-
-  auto succs = loco::succs(node);
-  if (succs.size() != 1) // assume weights is used by only one node
-    return false;
-
-  for (auto out : succs)
-  {
-    auto conv = dynamic_cast<CircleConv2D *>(out);
-    if (conv != nullptr && conv->filter() == circle_const && circle_const->rank() == 4)
-      return true;
-
-    auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
-    if (dw_conv != nullptr && dw_conv->filter() == circle_const && circle_const->rank() == 4)
-      return true;
-
-    auto tw_conv = dynamic_cast<CircleTransposeConv *>(out);
-    if (tw_conv != nullptr && tw_conv->filter() == circle_const && circle_const->rank() == 4)
-      return true;
-
-    auto fc = dynamic_cast<CircleFullyConnected *>(out);
-    if (fc != nullptr && fc->weights() == circle_const && circle_const->rank() == 2)
-      return true;
-  }
-  return false;
-}
-
 /**
  * @brief QuantizeDequantizeWeights quantizes and dequantizes tensors for weights
  * @details Find min/max values on the fly, quantize the model, and dequantize the model
  */
-struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
+struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<void>
 {
   QuantizeDequantizeWeights(loco::DataType input, loco::DataType output,
                             QuantizationGranularity granularity)
-      : input_type(input), output_type(output), granularity(granularity)
+    : input_type(input), output_type(output), granularity(granularity)
   {
   }
 
@@ -388,88 +266,164 @@ struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<b
   loco::DataType output_type;
   QuantizationGranularity granularity;
 
-  // Quantize and dequantize input tensors of each node
-  bool visit(luci::CircleNode *node)
+private:
+  // Fake quantize weights (Only u8 quantization is supported for LWQ)
+  void fake_quantize_lwq(luci::CircleConst *weights) const
   {
-    assert(output_type == loco::DataType::U8 || output_type == loco::DataType::S16);
-    LOGGER(l);
-    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
-    auto arity = node->arity();
-    for (uint32_t i = 0; i < arity; i++)
+    assert(output_type == loco::DataType::U8); // FIX_CALLER_UNLESS
+
+    // Find min/max per layer
+    float min = std::numeric_limits<float>::max();
+    float max = std::numeric_limits<float>::lowest();
+    for (uint32_t i = 0; i < weights->size<loco::DataType::FLOAT32>(); i++)
     {
-      auto input_node = node->arg(i);
-      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+      auto data = weights->at<loco::DataType::FLOAT32>(i);
+      min = data < min ? data : min;
+      max = data > max ? data : max;
+    }
+    float scaling_factor{0};
+    int64_t zp{0};
+    float nudged_min{0};
+    float nudged_max{0};
+
+    asymmetric_wquant_with_minmax_per_layer(weights, min, max, scaling_factor, zp, nudged_min,
+                                            nudged_max);
+    asymmetric_wdequant_with_minmax_per_layer(weights, scaling_factor, nudged_min);
+    auto quantparam = std::make_unique<CircleQuantParam>();
+    quantparam->min.push_back(nudged_min);
+    quantparam->max.push_back(nudged_max);
+    quantparam->scale.push_back(scaling_factor);
+    quantparam->zerop.push_back(zp);
+    weights->quantparam(std::move(quantparam));
+  }
 
-      // Check if this is already quantized
-      if (is_quantized(circle_node))
-        continue;
+private:
+  // Fake quantize weights (u8/s16 quantization are supported for CWQ)
+  void fake_quantize_cwq(luci::CircleConst *weights) const
+  {
+    assert(output_type == loco::DataType::U8 ||
+           output_type == loco::DataType::S16); // FIX_CALLER_UNLESS
 
-      if (is_weights(circle_node))
-      {
-        auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
+    // Find min/max per channel
+    std::vector<float> min;
+    std::vector<float> max;
 
-        // Find min/max per channel-wise
-        if (granularity == QuantizationGranularity::ChannelWise)
-        {
-          std::vector<float> min;
-          std::vector<float> max;
-
-          cal_minmax_per_channel(circle_const, min, max);
-
-          std::vector<float> nudged_min(min.size());
-          std::vector<float> nudged_max(min.size());
-          std::vector<float> scaling_factor(min.size());
-          std::vector<int64_t> zp(min.size());
-
-          if (output_type == loco::DataType::U8)
-          {
-            asymmetric_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min,
-                                          nudged_max);
-            asymmetric_wdequant_per_channel(circle_const, scaling_factor, nudged_min);
-          }
-          else
-          {
-            sym_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min,
-                                   nudged_max);
-            sym_wdequant_per_channel(circle_const, scaling_factor);
-          }
-
-          auto quantparam = std::make_unique<CircleQuantParam>();
-          quantparam->min = nudged_min;
-          quantparam->max = nudged_max;
-          quantparam->scale = scaling_factor;
-          quantparam->zerop = zp;
-          circle_node->quantparam(std::move(quantparam));
-        }
-        // Find min/max per layer-wise
-        else
-        {
-          float min = std::numeric_limits<float>::max();
-          float max = std::numeric_limits<float>::lowest();
-          for (uint32_t i = 0; i < circle_const->size<loco::DataType::FLOAT32>(); i++)
-          {
-            auto data = circle_const->at<loco::DataType::FLOAT32>(i);
-            min = data < min ? data : min;
-            max = data > max ? data : max;
-          }
-          float scaling_factor{0};
-          int64_t zp{0};
-          float nudged_min{0};
-          float nudged_max{0};
-
-          asymmetric_wquant_with_minmax_per_layer(circle_const, min, max, scaling_factor, zp,
-                                                  nudged_min, nudged_max);
-          asymmetric_wdequant_with_minmax_per_layer(circle_const, scaling_factor, nudged_min);
-          auto quantparam = std::make_unique<CircleQuantParam>();
-          quantparam->min.push_back(nudged_min);
-          quantparam->max.push_back(nudged_max);
-          quantparam->scale.push_back(scaling_factor);
-          quantparam->zerop.push_back(zp);
-          circle_node->quantparam(std::move(quantparam));
-        }
-      }
+    cal_minmax_per_channel(weights, min, max);
+
+    std::vector<float> nudged_min(min.size());
+    std::vector<float> nudged_max(min.size());
+    std::vector<float> scaling_factor(min.size());
+    std::vector<int64_t> zp(min.size());
+
+    if (output_type == loco::DataType::U8)
+    {
+      asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max);
+      asymmetric_wdequant_per_channel(weights, scaling_factor, nudged_min);
+    }
+    else
+    {
+      sym_wquant_per_channel(weights, min, max, scaling_factor, nudged_min, nudged_max);
+      sym_wdequant_per_channel(weights, scaling_factor);
+    }
+
+    auto quantparam = std::make_unique<CircleQuantParam>();
+    quantparam->min = nudged_min;
+    quantparam->max = nudged_max;
+    quantparam->scale = scaling_factor;
+    quantparam->zerop = zp;
+    weights->quantparam(std::move(quantparam));
+  }
+
+private:
+  void fake_quantize(luci::CircleConst *weights) const
+  {
+    switch (granularity)
+    {
+      case luci::QuantizationGranularity::ChannelWise:
+        fake_quantize_cwq(weights);
+        break;
+      case luci::QuantizationGranularity::LayerWise:
+        fake_quantize_lwq(weights);
+        break;
+      default:
+        throw std::invalid_argument("Unsupported granularity");
     }
-    return false;
+  }
+
+private:
+  // Check if
+  // 1. node is const
+  // 2. node's dtype is float32
+  bool is_quantizable(loco::Node *node)
+  {
+    auto const_node = dynamic_cast<luci::CircleConst *>(node);
+    if (not const_node)
+      return false;
+
+    // Skip if this is not float32
+    if (const_node->dtype() != loco::DataType::FLOAT32)
+      return false;
+
+    return true;
+  }
+
+  // Default behavior (Do nothing)
+  void visit(luci::CircleNode *) {}
+
+  void visit(luci::CircleConv2D *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+    if (not is_quantizable(node->filter()))
+      return;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    fake_quantize(new_weights);
+  }
+
+  void visit(luci::CircleDepthwiseConv2D *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+    if (not is_quantizable(node->filter()))
+      return;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    fake_quantize(new_weights);
+  }
+
+  void visit(luci::CircleTransposeConv *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+    if (not is_quantizable(node->filter()))
+      return;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    fake_quantize(new_weights);
+  }
+
+  void visit(luci::CircleFullyConnected *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+    if (not is_quantizable(node->weights()))
+      return;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
+    auto new_weights = luci::clone(weights);
+    node->weights(new_weights);
+    fake_quantize(new_weights);
   }
 };
 
@@ -480,11 +434,36 @@ bool QuantizeDequantizeWeightsPass::run(loco::Graph *g)
   LOGGER(l);
   INFO(l) << "QuantizeDequantizeWeightsPass Start" << std::endl;
 
+  auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+  auto quantize_dtype = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization dtype
+    if (iter != info_by_name.end())
+      return iter->second.dtype;
+
+    // Return default quantization dtype
+    return _ctx->output_model_dtype;
+  };
+
+  auto quantize_granularity = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization granularity
+    if (iter != info_by_name.end())
+      return iter->second.granularity;
+
+    // Return default quantization granularity
+    return _ctx->granularity;
+  };
+
   // Quantize weights
   for (auto node : loco::active_nodes(loco::output_nodes(g)))
   {
-    QuantizeDequantizeWeights qw(_input_dtype, _output_dtype, _granularity);
     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeDequantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
+                                 quantize_granularity(circle_node));
     circle_node->accept(&qw);
   }
 
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp
new file mode 100644
index 000000000..15f5ca7ac
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(QuantizeDequantizeWeightsPassTest, name)
+{
+  luci::QuantizeDequantizeWeightsPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
+                                           luci::QuantizationGranularity::LayerWise);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(QuantizeDequantizeWeightsPassTest, name_ctx)
+{
+  auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::U8;
+    ctx->granularity = luci::QuantizationGranularity::LayerWise;
+  }
+
+  luci::QuantizeDequantizeWeightsPass pass(std::move(ctx));
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/QuantizePreCheckerPass.cpp b/compiler/luci/pass/src/QuantizePreCheckerPass.cpp
new file mode 100644
index 000000000..4b3b7e330
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizePreCheckerPass.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizePreCheckerPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+void check_const_opcode(luci::CircleNode *node)
+{
+  if (node == nullptr)
+    return;
+
+  if (node->opcode() != luci::CircleOpcode::CIRCLECONST and
+      node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+  {
+    throw std::runtime_error("Unsupported non const input " + node->name());
+  }
+}
+
+struct ConstInputChecker final : public luci::CircleNodeMutableVisitor<void>
+{
+// INPUT_NAME is name for input const for current NODE
+#define CHECK_NODE_WITH_ONE_INPUT_CONST(NODE, INPUT_NAME)                    \
+  void visit(NODE *node)                                                     \
+  {                                                                          \
+    const auto input = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME()); \
+    check_const_opcode(input);                                               \
+  }
+
+// INPUT_NAME_1 and INPUT_NAME_2 are names for input const for current NODE
+#define CHECK_NODE_WITH_TWO_INPUT_CONST(NODE, INPUT_NAME_1, INPUT_NAME_2)        \
+  void visit(NODE *node)                                                         \
+  {                                                                              \
+    const auto input_1 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_1()); \
+    const auto input_2 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_2()); \
+                                                                                 \
+    check_const_opcode(input_1);                                                 \
+    check_const_opcode(input_2);                                                 \
+  }
+
+// INPUT_NAME_1, INPUT_NAME_2 and INPUT_NAME_3 are names for input const for current NODE
+#define CHECK_NODE_WITH_THREE_INPUT_CONST(NODE, INPUT_NAME_1, INPUT_NAME_2, INPUT_NAME_3) \
+  void visit(NODE *node)                                                                  \
+  {                                                                                       \
+    const auto input_1 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_1());          \
+    const auto input_2 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_2());          \
+    const auto input_3 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_3());          \
+                                                                                          \
+    check_const_opcode(input_1);                                                          \
+    check_const_opcode(input_2);                                                          \
+    check_const_opcode(input_3);                                                          \
+  }
+
+  // Skip other circle node
+  void visit(luci::CircleNode *) {}
+
+  // Ops that receive one const nodes as inputs
+  CHECK_NODE_WITH_ONE_INPUT_CONST(luci::CirclePRelu, alpha)
+
+  // Ops that receive two const node as an inputs
+  CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleConv2D, filter, bias)
+  CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleDepthwiseConv2D, filter, bias)
+  CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleFullyConnected, weights, bias)
+  CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleInstanceNorm, gamma, beta)
+
+  // Ops that receive three const nodes as an inputs
+  CHECK_NODE_WITH_THREE_INPUT_CONST(luci::CircleTransposeConv, inputSizes, filter, bias)
+
+#undef CHECK_NODE_WITH_ONE_INPUT_CONST
+#undef CHECK_NODE_WITH_TWO_INPUT_CONST
+#undef CHECK_NODE_WITH_THREE_INPUT_CONST
+};
+
+} // namespace
+
+/**
+ * Verify the input model has the form acceptable by quantizer
+ */
+bool QuantizePreCheckerPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizePreCheckerPass Start" << std::endl;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    // Check const inputs
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    ConstInputChecker checker{};
+    circle_node->accept(&checker);
+  }
+
+  INFO(l) << "QuantizePreCheckerPass End" << std::endl;
+
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp b/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp
new file mode 100644
index 000000000..8f6a96f33
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizePreCheckerPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+class SimpleConv2DGraph
+{
+public:
+  SimpleConv2DGraph(bool make_valid)
+  {
+    conv2d_node = g.nodes()->create<luci::CircleConv2D>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+    filter = g.nodes()->create<luci::CircleConst>();
+
+    conv2d_node->input(input_1);
+    conv2d_node->filter(filter);
+
+    if (make_valid)
+    {
+      bias = g.nodes()->create<luci::CircleConst>();
+      conv2d_node->bias(bias);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      conv2d_node->bias(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(conv2d_node);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleConv2D *conv2d_node = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *filter = nullptr;
+  luci::CircleConst *bias = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimpleDepthConv2DGraph
+{
+public:
+  SimpleDepthConv2DGraph(bool make_valid)
+  {
+    depth_conv2d_node = g.nodes()->create<luci::CircleDepthwiseConv2D>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+    filter = g.nodes()->create<luci::CircleConst>();
+
+    depth_conv2d_node->input(input_1);
+    depth_conv2d_node->filter(filter);
+
+    if (make_valid)
+    {
+      bias = g.nodes()->create<luci::CircleConst>();
+      depth_conv2d_node->bias(bias);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      depth_conv2d_node->bias(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(depth_conv2d_node);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleDepthwiseConv2D *depth_conv2d_node = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *filter = nullptr;
+  luci::CircleConst *bias = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimpleFCGraph
+{
+public:
+  SimpleFCGraph(bool make_valid)
+  {
+    fc_node = g.nodes()->create<luci::CircleFullyConnected>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+    weights = g.nodes()->create<luci::CircleConst>();
+
+    fc_node->input(input_1);
+    fc_node->weights(weights);
+
+    if (make_valid)
+    {
+      bias = g.nodes()->create<luci::CircleConst>();
+      fc_node->bias(bias);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      fc_node->bias(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(fc_node);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleFullyConnected *fc_node = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *weights = nullptr;
+  luci::CircleConst *bias = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimpleInstanceNormGraph
+{
+public:
+  SimpleInstanceNormGraph(bool make_valid)
+  {
+    instance_norm_node = g.nodes()->create<luci::CircleInstanceNorm>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+    gamma = g.nodes()->create<luci::CircleConst>();
+
+    instance_norm_node->input(input_1);
+    instance_norm_node->gamma(gamma);
+
+    if (make_valid)
+    {
+      beta = g.nodes()->create<luci::CircleConst>();
+      instance_norm_node->beta(beta);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      instance_norm_node->beta(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(instance_norm_node);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleInstanceNorm *instance_norm_node = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *gamma = nullptr;
+  luci::CircleConst *beta = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimpleTransposeConvGraph
+{
+public:
+  SimpleTransposeConvGraph(bool make_valid)
+  {
+    transpose_conv = g.nodes()->create<luci::CircleTransposeConv>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+
+    input_sizes = g.nodes()->create<luci::CircleConst>();
+    filter = g.nodes()->create<luci::CircleConst>();
+
+    transpose_conv->outBackprop(input_1);
+    transpose_conv->filter(filter);
+    transpose_conv->inputSizes(input_sizes);
+    transpose_conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    if (make_valid)
+    {
+      bias = g.nodes()->create<luci::CircleConst>();
+      transpose_conv->bias(bias);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      transpose_conv->bias(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(transpose_conv);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleTransposeConv *transpose_conv = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *input_sizes = nullptr;
+  luci::CircleConst *filter = nullptr;
+  luci::CircleConst *bias = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimplePReluGraph
+{
+public:
+  SimplePReluGraph(bool make_valid)
+  {
+    prelu = g.nodes()->create<luci::CirclePRelu>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+
+    prelu->input(input_1);
+
+    if (make_valid)
+    {
+      alpha = g.nodes()->create<luci::CircleConst>();
+      prelu->alpha(alpha);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      prelu->alpha(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(prelu);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CirclePRelu *prelu = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *alpha = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+TEST(QuantizePreCheckerPassTest, name)
+{
+  luci::QuantizePreCheckerPass pass{};
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+// Test Conv2d
+TEST(QuantizePreCheckerPassTest, conv2d)
+{
+  SimpleConv2DGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, conv2d_NEG)
+{
+  SimpleConv2DGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test DepthwiseConv2d
+TEST(QuantizePreCheckerPassTest, depthwise_conv2d)
+{
+  SimpleDepthConv2DGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, depthwise_conv2d_NEG)
+{
+  SimpleDepthConv2DGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test FullyConnected
+TEST(QuantizePreCheckerPassTest, fully_connected)
+{
+  SimpleFCGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, fully_connected_NEG)
+{
+  SimpleFCGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test InstanceNorm
+TEST(QuantizePreCheckerPassTest, instance_norm)
+{
+  SimpleInstanceNormGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, instance_norm_NEG)
+{
+  SimpleInstanceNormGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test TransposeConv
+TEST(QuantizePreCheckerPassTest, transpose_conv)
+{
+  SimpleTransposeConvGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, transpose_conv_NEG)
+{
+  SimpleTransposeConvGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test PRelu
+TEST(QuantizePreCheckerPassTest, prelu)
+{
+  SimplePReluGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, prelu_NEG)
+{
+  SimplePReluGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
diff --git a/compiler/luci/pass/src/QuantizeWeights.cpp b/compiler/luci/pass/src/QuantizeWeights.cpp
new file mode 100644
index 000000000..59329c19e
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeights.cpp
@@ -0,0 +1,563 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeWeights.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <cmath>
+#include <vector>
+#include <functional>
+#include <limits>
+
+using namespace luci;
+
+namespace
+{
+
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
+{
+  loco::TensorShape dimension;
+  dimension.rank(4);
+  uint32_t indices[4] = {
+    0,
+  };
+
+  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  {
+    assert(false);
+    return;
+  }
+
+  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+  {
+    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+    {
+      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+      {
+        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+        {
+          func(indices, dimension, channel_dim_index);
+        }
+      }
+    }
+  }
+}
+
+void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
+                             std::vector<float> &scaling_factor, int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  const int32_t kMinScale = 0;
+  const int32_t kMaxScale = 255;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(loco::DataType::U8);      // change the type of tensor
+  node->size<loco::DataType::U8>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+// TODO Reduce duplicate code with QuantizeDequantizeWeights
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+                            std::vector<float> &scaling_factor, std::vector<float> &nudged_min,
+                            std::vector<float> &nudged_max, int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+  const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+  const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    compute_sym_scale(min[i], max[i], scaling_factor[i], nudged_min[i], nudged_max[i]);
+  }
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(loco::DataType::S16);      // change the type of tensor
+  node->size<loco::DataType::S16>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::S16>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+                            int32_t &channel_dim_index)
+{
+  loco::TensorShape dimension;
+  dimension.rank(4);
+
+  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  {
+    throw std::runtime_error("Failed to find channel index in " + node->name());
+  }
+  auto size = dimension.dim(channel_dim_index).value();
+
+  std::vector<bool> has_min_max_value(size, false);
+  min.resize(size);
+  max.resize(size);
+
+  auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    if (has_min_max_value[channel_idx])
+    {
+      min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+      max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+    }
+    else
+    {
+      min[channel_idx] = data;
+      max[channel_idx] = data;
+      has_min_max_value[channel_idx] = true;
+    }
+  };
+
+  iterate_per_channel(node, channel_dim_index, cal_minmax);
+}
+
+void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
+                                   std::vector<float> &max, std::vector<float> &scaling_factor,
+                                   std::vector<int64_t> &zp, std::vector<float> &nudged_min,
+                                   std::vector<float> &nudged_max, int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  const int32_t kMinScale = 0;
+  const int32_t kMaxScale = 255;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    compute_asym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
+  }
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(loco::DataType::U8);      // change the type of tensor
+  node->size<loco::DataType::U8>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
+                            int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+  const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(loco::DataType::S16);      // change the type of tensor
+  node->size<loco::DataType::S16>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::S16>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
+{
+  const int32_t kMinScale = 0;
+  const int32_t kMaxScale = 255;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+
+  const float scaling_factor_inv = 1.0 / scaling_factor;
+  std::vector<int32_t> quantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    auto data = node->at<loco::DataType::FLOAT32>(i);
+    quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
+  }
+
+  node->dtype(loco::DataType::U8);      // change the type of tensor
+  node->size<loco::DataType::U8>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+// Quantize const per channel
+//
+// The last dimension of const is the same as the dimension of channel
+// And the rest of the const dimensions should be 1
+// So, a 'single value' is quantized per channel
+//
+// Quantization spec (f: fp value, q: quantized value)
+//
+// uint8
+//   Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
+//   Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1]
+//
+// int16
+//   Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
+//   Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0]
+void quant_const_per_channel(CircleConst *node, loco::DataType quant_type)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+  assert(node->rank() > 0);
+
+  for (uint32_t i = 0; i < node->rank() - 1; i++)
+  {
+    // Caller should call this function when the below condition is satisfied
+    if (node->dim(i).value() != 1)
+      throw std::runtime_error("Non-channel dimension of const node must be 1");
+  }
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  assert(size == node->dim(node->rank() - 1).value());
+
+  auto quantparam = std::make_unique<CircleQuantParam>();
+  quantparam->quantized_dimension = node->rank() - 1;
+  std::vector<int32_t> quantized_data(size);
+
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    auto data = node->at<loco::DataType::FLOAT32>(i);
+    if (quant_type == loco::DataType::U8)
+    {
+      if (data >= 0)
+      {
+        quantparam->scale.push_back(data);
+        quantparam->zerop.push_back(0);
+        quantized_data[i] = 1;
+      }
+      else
+      {
+        quantparam->scale.push_back(-data);
+        quantparam->zerop.push_back(1);
+        quantized_data[i] = 0;
+      }
+    }
+    else if (quant_type == loco::DataType::S16)
+    {
+      if (data >= 0)
+      {
+        quantparam->scale.push_back(data);
+        quantized_data[i] = 1;
+      }
+      else
+      {
+        quantparam->scale.push_back(-data);
+        quantized_data[i] = -1;
+      }
+      quantparam->zerop.push_back(0);
+    }
+  }
+  node->quantparam(std::move(quantparam));
+
+  switch (quant_type)
+  {
+    case loco::DataType::U8:
+      node->dtype(loco::DataType::U8);
+      node->size<loco::DataType::U8>(size);
+      for (uint32_t i = 0; i < size; ++i)
+      {
+        assert(quantized_data[i] == 0 || quantized_data[i] == 1);
+        node->at<loco::DataType::U8>(i) = quantized_data[i];
+      }
+      break;
+    case loco::DataType::S16:
+      node->dtype(loco::DataType::S16);
+      node->size<loco::DataType::S16>(size);
+      for (uint32_t i = 0; i < size; ++i)
+      {
+        assert(quantized_data[i] == -1 || quantized_data[i] == 1);
+        node->at<loco::DataType::S16>(i) = quantized_data[i];
+      }
+      break;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void QuantizeWeights::quantize_weights(luci::CircleConst *weights)
+{
+  // Find min/max per channel-wise
+  if (granularity == QuantizationGranularity::ChannelWise)
+  {
+    auto quantparam = weights->quantparam();
+    if (quantparam == nullptr)
+    {
+      // Find min/max on the fly
+      // NOTE This is for the case when QuantizeDequantizeWeights is skipped
+      // TODO Reduce duplicate codes
+      std::vector<float> min;
+      std::vector<float> max;
+      int32_t channel_dim_index = 0;
+
+      cal_minmax_per_channel(weights, min, max, channel_dim_index);
+
+      std::vector<float> nudged_min(min.size());
+      std::vector<float> nudged_max(min.size());
+      std::vector<float> scaling_factor(min.size());
+      std::vector<int64_t> zp(min.size());
+
+      if (output_type == loco::DataType::U8)
+      {
+        asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max,
+                                      channel_dim_index);
+      }
+      else
+      {
+        sym_wquant_per_channel(weights, min, max, scaling_factor, nudged_min, nudged_max,
+                               channel_dim_index);
+      }
+
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale = scaling_factor;
+      quantparam->zerop = zp;
+      quantparam->quantized_dimension = channel_dim_index;
+      weights->quantparam(std::move(quantparam));
+
+      return;
+    }
+
+    auto min = quantparam->min;
+    auto scaling_factor = quantparam->scale;
+    int32_t channel_dim_index = 0;
+
+    if (output_type == loco::DataType::U8)
+    {
+      asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index);
+    }
+    else
+    {
+      sym_wquant_per_channel(weights, scaling_factor, channel_dim_index);
+    }
+    quantparam->min.clear();
+    quantparam->max.clear();
+    quantparam->quantized_dimension = channel_dim_index;
+  }
+  // Find min/max per layer-wise
+  else
+  {
+    auto quantparam = weights->quantparam();
+    if (quantparam == nullptr)
+    {
+      // Find min/max on the fly
+      // NOTE This is for the case when QuantizeDequantizeWeights is skipped
+      // TODO Reduce duplicate codes
+      float min = std::numeric_limits<float>::max();
+      float max = std::numeric_limits<float>::lowest();
+      for (uint32_t i = 0; i < weights->size<loco::DataType::FLOAT32>(); i++)
+      {
+        auto data = weights->at<loco::DataType::FLOAT32>(i);
+        min = data < min ? data : min;
+        max = data > max ? data : max;
+      }
+      float scaling_factor{0};
+      int64_t zp{0};
+      float nudged_min{0};
+      float nudged_max{0};
+
+      asymmetric_wquant_with_minmax_per_layer(weights, min, max, scaling_factor, zp, nudged_min,
+                                              nudged_max);
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale.push_back(scaling_factor);
+      quantparam->zerop.push_back(zp);
+      weights->quantparam(std::move(quantparam));
+      return;
+    }
+
+    // Quantize using recorded quantparam
+    assert(quantparam != nullptr);
+    assert(quantparam->min.size() == 1);   // only support layer-wise quant
+    assert(quantparam->scale.size() == 1); // only support layer-wise quant
+    auto min = quantparam->min[0];
+    auto scaling_factor = quantparam->scale[0];
+    asym_wquant_per_layer(weights, min, scaling_factor);
+    quantparam->min.clear();
+    quantparam->max.clear();
+  }
+}
+void QuantizeWeights::visit(luci::CircleConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleDepthwiseConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleInstanceNorm *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
+  auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
+
+  if (!is_quantized(gamma))
+  {
+    assert(gamma->dtype() == loco::DataType::FLOAT32);
+    auto new_gamma = luci::clone(gamma);
+    if (granularity == QuantizationGranularity::LayerWise)
+      quant_const(new_gamma, output_type);
+    else if (granularity == QuantizationGranularity::ChannelWise)
+      quant_const_per_channel(new_gamma, output_type);
+    node->gamma(new_gamma);
+  }
+  if (!is_quantized(beta))
+  {
+    assert(beta->dtype() == loco::DataType::FLOAT32);
+    auto new_beta = luci::clone(beta);
+    if (granularity == QuantizationGranularity::LayerWise)
+      quant_const(new_beta, output_type);
+    else if (granularity == QuantizationGranularity::ChannelWise)
+      quant_const_per_channel(new_beta, output_type);
+    node->beta(new_beta);
+  }
+}
+
+void QuantizeWeights::visit(luci::CirclePRelu *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
+
+  if (!is_quantized(alpha))
+  {
+    assert(alpha->dtype() == loco::DataType::FLOAT32);
+    auto new_alpha = luci::clone(alpha);
+    if (granularity == QuantizationGranularity::LayerWise)
+      quant_const(new_alpha, output_type);
+    else if (granularity == QuantizationGranularity::ChannelWise)
+      quant_const_per_channel(new_alpha, output_type);
+    node->alpha(new_alpha);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleTransposeConv *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleFullyConnected *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->weights(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleNode *) {}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWeights.h b/compiler/luci/pass/src/QuantizeWeights.h
new file mode 100644
index 000000000..f62cd40f3
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeights.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_WEIGHTS_H__
+#define __LUCI_QUANTIZE_WEIGHTS_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief QuantizeWeights quantizes tensors for weights
+ * @details Find min/max values on the fly and then quantize
+ */
+struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+    : input_type(input), output_type(output), granularity(gr)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+  QuantizationGranularity granularity;
+
+private:
+  void quantize_weights(luci::CircleConst *weights);
+
+  void visit(luci::CircleConv2D *node);
+  void visit(luci::CircleDepthwiseConv2D *node);
+  void visit(luci::CircleInstanceNorm *node);
+  void visit(luci::CirclePRelu *node);
+  void visit(luci::CircleTransposeConv *node);
+  void visit(luci::CircleFullyConnected *node);
+  void visit(luci::CircleNode *);
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZE_WEIGHTS_H__
diff --git a/compiler/luci/pass/src/QuantizeWeightsOnly.cpp b/compiler/luci/pass/src/QuantizeWeightsOnly.cpp
new file mode 100644
index 000000000..e69a7b6a8
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeightsOnly.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeWeightsOnly.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <cmath>
+#include <vector>
+#include <functional>
+#include <limits>
+
+using namespace luci;
+
+namespace
+{
+
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
+{
+  loco::TensorShape dimension;
+  dimension.rank(4);
+  uint32_t indices[4] = {
+    0,
+  };
+
+  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  {
+    assert(false);
+    return;
+  }
+
+  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+  {
+    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+    {
+      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+      {
+        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+        {
+          func(indices, dimension, channel_dim_index);
+        }
+      }
+    }
+  }
+}
+
+// TODO Reduce duplicate code with QuantizeDequantizeWeights
+template <loco::DataType out_type>
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+                            std::vector<float> &scaling_factor, std::vector<float> &nudged_min,
+                            std::vector<float> &nudged_max, int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+  assert(out_type == loco::DataType::S8 || out_type == loco::DataType::S16);
+  const int32_t kMaxScale = (out_type == loco::DataType::S8) ? std::numeric_limits<int8_t>::max()
+                                                             : std::numeric_limits<int16_t>::max();
+  const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    compute_sym_scale(min[i], max[i], scaling_factor[i], nudged_min[i], nudged_max[i], out_type);
+  }
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(out_type);      // change the type of tensor
+  node->size<out_type>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<out_type>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+                            int32_t &channel_dim_index)
+{
+  loco::TensorShape dimension;
+  dimension.rank(4);
+
+  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  {
+    throw std::runtime_error("Failed to find channel index in " + node->name());
+  }
+  auto size = dimension.dim(channel_dim_index).value();
+
+  std::vector<bool> has_min_max_value(size, false);
+  min.resize(size);
+  max.resize(size);
+
+  auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    if (has_min_max_value[channel_idx])
+    {
+      min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+      max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+    }
+    else
+    {
+      min[channel_idx] = data;
+      max[channel_idx] = data;
+      has_min_max_value[channel_idx] = true;
+    }
+  };
+
+  iterate_per_channel(node, channel_dim_index, cal_minmax);
+}
+
+} // namespace
+
+namespace luci
+{
+
+void QuantizeWeightsOnly::quantize_weights(luci::CircleConst *weights)
+{
+  // Find min/max per channel-wise
+  if (granularity == QuantizationGranularity::ChannelWise)
+  {
+    auto quantparam = weights->quantparam();
+    if (quantparam == nullptr)
+    {
+      // Find min/max on the fly
+      // NOTE This is for the case when QuantizeDequantizeWeights is skipped
+      // TODO Reduce duplicate codes
+      std::vector<float> min;
+      std::vector<float> max;
+      int32_t channel_dim_index = 0;
+
+      cal_minmax_per_channel(weights, min, max, channel_dim_index);
+
+      std::vector<float> nudged_min(min.size());
+      std::vector<float> nudged_max(min.size());
+      std::vector<float> scaling_factor(min.size());
+      std::vector<int64_t> zp(min.size());
+
+      if (output_type == loco::DataType::S8)
+      {
+        sym_wquant_per_channel<loco::DataType::S8>(weights, min, max, scaling_factor, nudged_min,
+                                                   nudged_max, channel_dim_index);
+      }
+      else if (output_type == loco::DataType::S16)
+      {
+        sym_wquant_per_channel<loco::DataType::S16>(weights, min, max, scaling_factor, nudged_min,
+                                                    nudged_max, channel_dim_index);
+      }
+      else
+      {
+        throw std::runtime_error("Weights-only quantization supports s8 and s16");
+      }
+
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale = scaling_factor;
+      quantparam->zerop = zp;
+      quantparam->quantized_dimension = channel_dim_index;
+      weights->quantparam(std::move(quantparam));
+
+      return;
+    }
+  }
+  else
+    throw std::runtime_error("Weights-only quantization does not support layer-wise");
+}
+
+void QuantizeWeightsOnly::visit(luci::CircleConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeightsOnly visits node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeightsOnly::visit(luci::CircleDepthwiseConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeightsOnly visits node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeightsOnly::visit(luci::CircleNode *) {}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWeightsOnly.h b/compiler/luci/pass/src/QuantizeWeightsOnly.h
new file mode 100644
index 000000000..ff6ad3261
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeightsOnly.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_WEIGHTS_ONLY_H__
+#define __LUCI_QUANTIZE_WEIGHTS_ONLY_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief QuantizeWeightsOnly quantizes tensors for weights
+ * @details Find min/max values on the fly and then quantize
+ */
+struct QuantizeWeightsOnly final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeWeightsOnly(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+    : input_type(input), output_type(output), granularity(gr)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+  QuantizationGranularity granularity;
+
+private:
+  void quantize_weights(luci::CircleConst *weights);
+
+  void visit(luci::CircleConv2D *node);
+  void visit(luci::CircleDepthwiseConv2D *node);
+  void visit(luci::CircleNode *);
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZE_WEIGHTS_ONLY_H__
diff --git a/compiler/luci/pass/src/QuantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeWeightsPass.cpp
new file mode 100644
index 000000000..9ac203e77
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeightsPass.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeWeightsPass.h"
+#include "QuantizeWeightsOnly.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Log.h>
+
+namespace luci
+{
+
+bool QuantizeWeightsPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeightsPass Start" << std::endl;
+
+  if (_ctx->input_model_dtype != loco::DataType::FLOAT32)
+    throw std::runtime_error("Weights-only quantization supports float32 input only");
+
+  // Quantize weights
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeWeightsOnly qw(_ctx->input_model_dtype, _ctx->output_model_dtype, _ctx->granularity);
+    circle_node->accept(&qw);
+  }
+
+  INFO(l) << "QuantizeWeightsPass End" << std::endl;
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWeightsPass.test.cpp b/compiler/luci/pass/src/QuantizeWeightsPass.test.cpp
new file mode 100644
index 000000000..058e029ab
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeightsPass.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeWeightsPass.h"
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+struct QuantizeWeightsPassTest : public ::testing::Test
+{
+  /**
+   *  nconv graph
+   *
+   *        [CircleInput]
+   *              |
+   *              |
+   *        [CircleConv2D]
+   *              |
+   *              |
+   *        [CircleOutput]
+   */
+  void MakeGraph()
+  {
+    const int N = 1;
+    const int H = 4;
+    const int W = 4;
+    const int C = 3; // IC = OC
+
+    // graph input and output
+    auto graph_input = _g.inputs()->create();
+    auto graph_output = _g.outputs()->create();
+
+    // CircleInput
+    auto input = _g.nodes()->create<luci::CircleInput>();
+    input->index(graph_input->index());
+    input->shape({N, H, W, C});
+    input->dtype(loco::DataType::FLOAT32);
+    input->name("input");
+
+    // CircleConv2D
+    auto conv = _g.nodes()->create<luci::CircleConv2D>();
+    conv->input(input);
+    auto bias = _g.nodes()->create<luci::CircleConst>();
+    bias->dtype(loco::DataType::FLOAT32);
+    bias->shape({C});
+    bias->name("conv_bias");
+    conv->bias(bias);
+    auto weight = _g.nodes()->create<luci::CircleConst>();
+    weight->dtype(loco::DataType::FLOAT32);
+    weight->shape({C, H, W, C});
+    weight->size<loco::DataType::FLOAT32>(C * H * W * C);
+    conv->filter(weight);
+    conv->padding(luci::Padding::SAME);
+    conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    conv->dtype(loco::DataType::FLOAT32);
+    conv->name("nconv");
+
+    // CircleOutput
+    auto output = _g.nodes()->create<luci::CircleOutput>();
+    output->index(graph_output->index());
+    output->from(conv);
+    output->shape({N, H, W, C});
+    output->dtype(loco::DataType::FLOAT32);
+    output->name("output");
+  }
+  virtual void SetUp() { MakeGraph(); }
+  loco::Graph _g;
+};
+
+} // namespace
+
+TEST_F(QuantizeWeightsPassTest, name)
+{
+  luci::QuantizeWeightsPass pass(loco::DataType::FLOAT32, loco::DataType::S8,
+                                 luci::QuantizationGranularity::ChannelWise);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(QuantizeWeightsPassTest, name_ctx)
+{
+  auto ctx = std::make_unique<luci::QuantizeWeightsPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::S8;
+    ctx->granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+
+  luci::QuantizeWeightsPass pass(std::move(ctx));
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(QuantizeWeightsPassTest, run_input_U8_NEG)
+{
+  loco::Graph g;
+  luci::QuantizeWeightsPass pass(loco::DataType::U8, loco::DataType::S8,
+                                 luci::QuantizationGranularity::ChannelWise);
+  EXPECT_THROW(pass.run(&_g), std::runtime_error);
+}
+
+TEST_F(QuantizeWeightsPassTest, run_output_f32_NEG)
+{
+  loco::Graph g;
+  luci::QuantizeWeightsPass pass(loco::DataType::FLOAT32, loco::DataType::FLOAT32,
+                                 luci::QuantizationGranularity::ChannelWise);
+  EXPECT_THROW(pass.run(&_g), std::runtime_error);
+}
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index 60c1cdd72..4f4edaf36 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -15,533 +15,657 @@
  */
 
 #include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/PropagateQParamForwardPass.h"
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+#include "QuantizeActivation.h"
+#include "QuantizeWeights.h"
+#include "QuantizeBias.h"
 #include "QuantizationUtils.h"
+#include "ProgressReporter.h"
+#include "helpers/LayerInfoMap.h"
 
 #include <luci/IR/CircleNodes.h>
 #include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Profile/CircleNodeOrigin.h>
 #include <luci/Log.h>
-
-#include <oops/UserExn.h>
+#include <logo/Phase.h>
 
 #include <iostream>
 #include <cmath>
 
-namespace luci
-{
-
 namespace
 {
 
-// Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
-// If true, return <input, weight> pair of the successor node (used to quantize bias)
-// If flase, return <nullptr, nullptr>
-std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
-{
-  auto circle_const = dynamic_cast<CircleConst *>(node);
-  if (circle_const == nullptr)
-    return std::make_pair(nullptr, nullptr);
-
-  auto succs = loco::succs(node);
-  if (succs.size() != 1) // assume bias is used by only one node
-    return std::make_pair(nullptr, nullptr);
+using namespace luci;
 
-  for (auto out : succs)
+bool use_predefined_values(ActivationQType qtype)
+{
+  switch (qtype)
   {
-    auto conv = dynamic_cast<CircleConv2D *>(out);
-    if (conv != nullptr && conv->bias() == circle_const)
-    {
-      assert(conv->input() != nullptr);
-      assert(conv->filter() != nullptr);
-      return std::make_pair(conv->input(), conv->filter());
-    }
-    auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
-    if (dw_conv != nullptr && dw_conv->bias() == circle_const)
-    {
-      assert(dw_conv->input() != nullptr);
-      assert(dw_conv->filter() != nullptr);
-      return std::make_pair(dw_conv->input(), dw_conv->filter());
-    }
-    auto fc = dynamic_cast<CircleFullyConnected *>(out);
-    if (fc != nullptr && fc->bias() == circle_const)
-    {
-      assert(fc->input() != nullptr);
-      assert(fc->weights() != nullptr);
-      return std::make_pair(fc->input(), fc->weights());
-    }
+    case ActivationQType::PreDefinedLogistic:
+    case ActivationQType::PreDefinedTanh:
+    case ActivationQType::PreDefinedSoftmax:
+      return true;
+    default:
+      // This ensures this switch-statement handles all ActivationQTypes
+      assert(qtype == ActivationQType::IntScale or qtype == ActivationQType::MinMax);
+      break;
   }
-  return std::make_pair(nullptr, nullptr);
+
+  return false;
 }
 
-void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
-                               float *scaling_factor, int64_t *zp)
+// Create a Quantize Op whose
+// dtype is out_type
+// shape is the same with node
+// qparam is computed according to node's qtype
+luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type)
 {
-  float scale = input_scale * weight_scale;
-  const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
+  auto quantize = node->graph()->nodes()->create<CircleQuantize>();
+  quantize->name(node->name() + "_Quantize");
+  quantize->dtype(out_type);
+  quantize->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    quantize->dim(i).set(node->dim(i).value());
 
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    quantized_values[i] =
-        static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
-  }
+  quantize->shape_status(luci::ShapeStatus::VALID);
 
-  node->dtype(loco::DataType::S32);      // change the type of tensor
-  node->size<loco::DataType::S32>(size); // resize tensor
-  const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
-  const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
-  for (uint32_t i = 0; i < size; ++i)
+  auto qparam = node->quantparam();
+  assert(qparam); // FIX_CALLER_UNLESS
+
+  auto qtype = luci::activation_qtype(node);
+  if (use_predefined_values(qtype))
   {
-    node->at<loco::DataType::S32>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+    quantize->quantparam(luci::make_predefined_qparam(qtype, out_type));
+    return quantize;
   }
-  *scaling_factor = scale;
-  *zp = 0;
-}
 
-void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<float> &weight_scale,
-                            std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
-{
-  float scaling_factor_inv{0};
+  assert(qtype == ActivationQType::MinMax or qtype == ActivationQType::IntScale);
 
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
+  assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS
+  assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS
+  auto min = qparam->min[0];
+  auto max = qparam->max[0];
 
-  for (uint32_t i = 0; i < size; ++i)
+  float scaling_factor{0};
+  int64_t zp{0};
+  float nudged_min{0};
+  float nudged_max{0};
+
+  if (out_type == loco::DataType::U8)
   {
-    scaling_factor[i] = input_scale * weight_scale[i];
-    scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
-    quantized_values[i] =
-        static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
-    zp[i] = 0;
+    compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
   }
-
-  node->dtype(loco::DataType::S32);      // change the type of tensor
-  node->size<loco::DataType::S32>(size); // resize tensor
-  const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
-  const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
-  for (uint32_t i = 0; i < size; ++i)
+  else
   {
-    node->at<loco::DataType::S32>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+    assert(out_type == loco::DataType::S16);
+    compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
   }
-}
 
-bool has_min_max(const CircleNode *node)
-{
-  return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
-}
+  auto quantparam = std::make_unique<CircleQuantParam>();
+  quantparam->scale.push_back(scaling_factor);
+  quantparam->zerop.push_back(zp);
+  // Save original min/max (not nudged_min/max). Nudged min/max
+  // is different from the real min/max values, causing wrong
+  // qparam when quantization dtype is changed.
+  quantparam->min.push_back(min);
+  quantparam->max.push_back(max);
 
-bool is_quantized(const CircleNode *node)
-{
-  return node->dtype() == loco::DataType::U8 || // activation, weight
-         node->dtype() == loco::DataType::S32;  // bias
+  quantize->quantparam(std::move(quantparam));
+
+  if (qtype == ActivationQType::IntScale)
+    set_int_scale(quantize);
+
+  return quantize;
 }
 
-void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
-                            int32_t &channel_dim_index)
+// Create Dequantize Op whose shape is the same with node
+luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
 {
-  assert(node->dtype() == loco::DataType::FLOAT32);
+  auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
+  dequantize->name(node->name() + "_Dequantize");
+  dequantize->dtype(loco::DataType::FLOAT32);
+  dequantize->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    dequantize->dim(i).set(node->dim(i).value());
 
-  const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
-  const int32_t kMinScale = -kMaxScale;
+  dequantize->shape_status(luci::ShapeStatus::VALID);
 
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
+  luci::add_origin(dequantize, luci::get_origin(node));
 
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
-  };
+  return dequantize;
+}
+
+} // namespace
+
+namespace luci
+{
 
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+namespace
+{
+
+/**
+ * Insert Quantize operator for mixed-precision quantization
+ * 1. Before input feature map (only for non-const)
+ * 2. After output feature map
+ *
+ * For example, if default_dtype = U8 and op_dtype = S16,
+ * 1. Quantize (U8->S16) is inserted before ifm
+ * 2. Quantize (S16->U8) is inserted after ofm
+ *
+ * Why not insert Quantize Op for const ifm?
+ * We quantize const tensor at once to preserve precision.
+ * For example, if default dtype = U8, op_dtype = S16, and op is CONV2D,
+ * We directly quantize weights to 16 bits, not 8->16 bits.
+ */
+struct InsertQuantizeOp final : public luci::CircleNodeMutableVisitor<void>
+{
+  InsertQuantizeOp(loco::DataType default_dtype, loco::DataType op_dtype)
+    : _default_dtype(default_dtype), _op_dtype(op_dtype)
   {
-    assert(false);
-    return;
+    assert(default_dtype != op_dtype); // FIX_CALLER_UNLESS
   }
 
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+private:
+  loco::DataType _default_dtype;
+  loco::DataType _op_dtype;
+
+private:
+  luci::CircleQuantize *create_in_quantize(loco::Node *in, loco::Node *origin)
   {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          quantized_values[cal_offset(dimension, indices)] =
-              static_cast<int32_t>(std::round(data * scaling_factor_inv));
-        }
-      }
-    }
+    auto input = loco::must_cast<luci::CircleNode *>(in);
+    if (input->opcode() == luci::CircleOpcode::CIRCLECONST)
+      return nullptr;
+
+    // input is not quantizable (ex: index)
+    if (input->quantparam() == nullptr)
+      return nullptr;
+
+    auto input_quant = create_quantize_op(input, _op_dtype);
+    input_quant->input(input);
+    auto origin_node = loco::must_cast<luci::CircleNode *>(origin);
+    luci::add_origin(input_quant, luci::get_origin(origin_node));
+    return input_quant;
   }
 
-  node->dtype(loco::DataType::S16);      // change the type of tensor
-  node->size<loco::DataType::S16>(size); // resize tensor
-  for (uint32_t i = 0; i < size; ++i)
+  void insert_out_quantize(loco::Node *node)
   {
-    node->at<loco::DataType::S16>(i) =
-        std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
-  }
-}
+    auto output = loco::must_cast<luci::CircleNode *>(node);
+    assert(output->opcode() != luci::CircleOpcode::CIRCLECONST); // FIX_CALLER_UNLESS
 
-void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
-                             std::vector<float> &scaling_factor, int32_t &channel_dim_index)
-{
-  assert(node->dtype() == loco::DataType::FLOAT32);
+    // output is not quantizable (ex: index)
+    if (output->quantparam() == nullptr)
+      return;
 
-  const int32_t kMinScale = 0;
-  const int32_t kMaxScale = 255;
+    auto output_quant = create_quantize_op(output, _default_dtype);
 
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
+    luci::add_origin(output_quant, luci::get_origin(output));
+    loco::replace(node).with(output_quant);
+    output_quant->input(node);
+  }
 
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-      0,
-  };
+// INPUT_NAME is the only activation of NODE
+#define INSERT_QUANTIZE_TO_UNARY_OP(NODE, INPUT_NAME)                    \
+  void visit(NODE *node)                                                 \
+  {                                                                      \
+    if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \
+      node->INPUT_NAME(input_quant);                                     \
+                                                                         \
+    insert_out_quantize(node);                                           \
+  }
 
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
+// INPUT_NAME is the only activation of NODE
+#define INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(NODE, INPUT_NAME, OUT_NAME) \
+  void visit(NODE *node)                                                     \
+  {                                                                          \
+    if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node))     \
+      node->INPUT_NAME(input_quant);                                         \
+                                                                             \
+    auto out_nodes = loco::succs(node);                                      \
+    for (auto out_node : out_nodes)                                          \
+    {                                                                        \
+      auto out_circle = loco::must_cast<OUT_NAME *>(out_node);               \
+      insert_out_quantize(out_circle);                                       \
+    }                                                                        \
   }
 
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          int channel_idx = indices[channel_dim_index];
-          const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-          auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-          quantized_values[cal_offset(dimension, indices)] =
-              static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
-        }
-      }
-    }
+// INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE
+#define INSERT_QUANTIZE_TO_BINARY_OP(NODE, INPUT_NAME1, INPUT_NAME2)       \
+  void visit(NODE *node)                                                   \
+  {                                                                        \
+    if (auto input1_quant = create_in_quantize(node->INPUT_NAME1(), node)) \
+      node->INPUT_NAME1(input1_quant);                                     \
+                                                                           \
+    if (auto input2_quant = create_in_quantize(node->INPUT_NAME2(), node)) \
+      node->INPUT_NAME2(input2_quant);                                     \
+                                                                           \
+    insert_out_quantize(node);                                             \
   }
 
-  node->dtype(loco::DataType::U8);      // change the type of tensor
-  node->size<loco::DataType::U8>(size); // resize tensor
-  for (uint32_t i = 0; i < size; ++i)
+  // Default behavior (NYI)
+  void visit(luci::CircleNode *node)
   {
-    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+    throw std::runtime_error("Unsupported Op for mixed-precision quantization. Layer name: " +
+                             node->name());
   }
-}
 
-void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
-{
-  const int32_t kMinScale = 0;
-  const int32_t kMaxScale = 255;
+  // Skip output layer
+  void visit(luci::CircleOutput *) {}
+  void visit(luci::CircleSplitVOut *) {}
+  void visit(luci::CircleSplitOut *) {}
+  void visit(luci::CircleTopKV2Out *) {}
+  void visit(luci::CircleUniqueOut *) {}
+  void visit(luci::CircleUnpackOut *) {}
+
+  // Ops that receive a single activation as an input
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAbs, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAveragePool2D, value)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleBatchToSpaceND, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleConv2D, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthToSpace, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthwiseConv2D, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleElu, features)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleExp, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFloor, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGelu, features)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLeakyRelu, features)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleNeg, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceProd, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu6, features)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReverseSequence, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRsqrt, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSlice, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqueeze, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTanh, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTile, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTranspose, a)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTransposeConv, outBackprop)
+
+  // Ops that receive two activations as inputs
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleAdd, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleBatchMatMul, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleDiv, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleFloorDiv, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMaximum, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMinimum, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMul, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleOneHot, on_value, off_value)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CirclePow, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleSub, x, y)
+
+  // Multiple-output ops that receive one activation as inputs
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplit, input, luci::CircleSplitOut)
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplitV, input, luci::CircleSplitVOut)
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleTopKV2, input, luci::CircleTopKV2Out)
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnique, input, luci::CircleUniqueOut)
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnpack, value, luci::CircleUnpackOut)
+
+  // AddN has arbitrary number of inputs
+  void visit(luci::CircleAddN *node)
+  {
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
+    {
+      if (auto input_quant = create_in_quantize(node->inputs(i), node))
+        node->inputs(i, input_quant);
+    }
 
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
+    insert_out_quantize(node);
+  }
 
-  const float scaling_factor_inv = 1.0 / scaling_factor;
-  std::vector<int32_t> quantized_values(size);
-  for (uint32_t i = 0; i < size; ++i)
+  // Concat has arbitrary number of inputs
+  void visit(luci::CircleConcatenation *node)
   {
-    auto data = node->at<loco::DataType::FLOAT32>(i);
-    quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
+    {
+      if (auto input_quant = create_in_quantize(node->values(i), node))
+        node->values(i, input_quant);
+    }
+
+    insert_out_quantize(node);
   }
 
-  node->dtype(loco::DataType::U8);      // change the type of tensor
-  node->size<loco::DataType::U8>(size); // resize tensor
-  for (uint32_t i = 0; i < size; ++i)
+  // Pack has arbitrary number of inputs
+  void visit(luci::CirclePack *node)
   {
-    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
+    {
+      if (auto input_quant = create_in_quantize(node->values(i), node))
+        node->values(i, input_quant);
+    }
+
+    insert_out_quantize(node);
   }
-}
 
-// Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer
-bool is_weights(CircleNode *node)
+#undef INSERT_QUANTIZE_TO_UNARY_OP
+#undef INSERT_QUANTIZE_TO_BINARY_OP
+#undef INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP
+};
+
+} // namespace
+
+void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
 {
-  auto circle_const = dynamic_cast<CircleConst *>(node);
-  if (circle_const == nullptr)
-    return false;
+  auto inputs = g->inputs();
 
-  auto succs = loco::succs(node);
-  if (succs.size() != 1) // assume weights is used by only one node
-    return false;
+  assert(inputs);                                     // FIX_CALLER_UNLESS
+  assert(inputs->size() == _ctx->input_types.size()); // FIX_CALLER_UNLESS
 
-  for (auto out : succs)
+  // NOTE loco::input_nodes returns input nodes following the order of InputIndex
+  auto input_nodes = loco::input_nodes(g);
+  for (uint32_t i = 0; i < input_nodes.size(); i++)
   {
-    auto conv = dynamic_cast<CircleConv2D *>(out);
-    if (conv != nullptr && conv->filter() == circle_const)
-      return true;
+    auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+    assert(i == input->index()); // Fix input_type logic
 
-    auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
-    if (dw_conv != nullptr && dw_conv->filter() == circle_const)
-      return true;
+    const auto user_given_dtype = _ctx->input_types[i];
 
-    auto t_conv = dynamic_cast<CircleTransposeConv *>(out);
-    if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4)
-      return true;
+    if (input->dtype() == user_given_dtype)
+      continue;
 
-    auto fc = dynamic_cast<CircleFullyConnected *>(out);
-    if (fc != nullptr && fc->weights() == circle_const)
-      return true;
-  }
-  return false;
-}
+    // Bool type is not quantizable
+    if (input->dtype() == loco::DataType::BOOL)
+      continue;
+    if (input->dtype() == loco::DataType::S32)
+      continue;
+    if (input->dtype() == loco::DataType::S64)
+      continue;
 
-/**
- * @brief QuantizeActivation quantizes tensors for activations
- * @details Quantize using recorded min/max values
- */
-struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
-{
-  QuantizeActivation(loco::DataType input, loco::DataType output)
-      : input_type(input), output_type(output)
-  {
-  }
+    // Insert Quantize Op
+    auto quant_op = create_quantize_op(input, input->dtype());
+    loco::replace(input).with(quant_op);
+    quant_op->input(input);
 
-  loco::DataType input_type;
-  loco::DataType output_type;
-
-  // Quantize input tensors of each node
-  bool visit(luci::CircleNode *node)
-  {
-    LOGGER(l);
-    INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
-    auto arity = node->arity();
-    for (uint32_t i = 0; i < arity; i++)
+    // TODO Set a proper origin (Quantize should have its own Origin)
     {
-      auto input_node = node->arg(i);
-      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+      auto succs = loco::succs(quant_op);
+      assert(succs.size() > 0);
+      auto succ = loco::must_cast<luci::CircleNode *>(*succs.begin());
+      luci::add_origin(quant_op, luci::get_origin(succ));
+    }
 
-      // Check if this is already quantized
-      if (is_quantized(circle_node))
-        continue;
+    // Update qparam of input
+    // This step is skipped if input_type is float32
+    if (user_given_dtype != loco::DataType::FLOAT32)
+    {
+      auto quantparam = input->quantparam();
+      assert(quantparam);
+      assert(quantparam->min.size() == 1); // only support layer-wise quant
+      assert(quantparam->max.size() == 1); // only support layer-wise quant
+      auto min = quantparam->min[0];
+      auto max = quantparam->max[0];
 
-      // Check if this is bias (bias is quantized later)
-      auto iw = get_input_weight_of_bias(circle_node);
-      if (iw.first != nullptr && iw.second != nullptr)
-        continue;
+      float scaling_factor{0};
+      int64_t zp{0};
+      float nudged_min{0};
+      float nudged_max{0};
 
-      // Check if this is activation
-      // We assume min/max are recorded only for activations
-      if (has_min_max(circle_node) && !is_weights(circle_node))
+      if (user_given_dtype == loco::DataType::U8)
+      {
+        compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+      }
+      else
       {
-        // Quantize using recorded min/max
-        auto quantparam = circle_node->quantparam();
-        assert(quantparam->min.size() == 1); // only support layer-wise quant
-        assert(quantparam->max.size() == 1); // only support layer-wise quant
-        auto min = quantparam->min[0];
-        auto max = quantparam->max[0];
-
-        float scaling_factor{0};
-        int64_t zp{0};
-        float nudged_min{0};
-        float nudged_max{0};
-
-        if (output_type == loco::DataType::U8)
-        {
-          compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
-          circle_node->dtype(loco::DataType::U8);
-        }
-        else
-        {
-          compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
-          circle_node->dtype(loco::DataType::S16);
-        }
-
-        circle_node->quantparam()->min.clear();
-        circle_node->quantparam()->max.clear();
-        circle_node->quantparam()->scale.push_back(scaling_factor);
-        circle_node->quantparam()->zerop.push_back(zp);
+        assert(user_given_dtype == loco::DataType::S16);
+        compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
       }
+      input->quantparam()->scale[0] = scaling_factor;
+      input->quantparam()->zerop[0] = zp;
     }
-    return false;
-  }
-};
 
-struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
-{
-  QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
-      : input_type(input), output_type(output), granularity(gr)
-  {
+    // Update dtype of input
+    input->dtype(user_given_dtype);
+
+    auto graph_input = inputs->at(input->index());
+    graph_input->dtype(user_given_dtype);
   }
+}
 
-  loco::DataType input_type;
-  loco::DataType output_type;
-  QuantizationGranularity granularity;
+void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
+{
+  auto outputs = g->outputs();
+  assert(outputs);                                      // FIX_CALLER_UNLESS
+  assert(outputs->size() == _ctx->output_types.size()); // Fix CircleQuantizer unless
 
-  // Quantize bias node
-  bool visit(luci::CircleNode *node)
+  // NOTE loco::output_nodes returns output nodes following the order of OutputIndex
+  auto output_nodes = loco::output_nodes(g);
+  for (uint32_t i = 0; i < output_nodes.size(); i++)
   {
-    // Check if this is already quantized
-    if (is_quantized(node))
-      return false;
-
-    // Check if this is bias
-    auto iw = get_input_weight_of_bias(node);
-    if (iw.first == nullptr || iw.second == nullptr)
-      return false;
+    auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+    assert(i == output->index()); // Fix output_type logic
 
-    auto input = loco::must_cast<luci::CircleNode *>(iw.first);
-    auto weight = loco::must_cast<luci::CircleNode *>(iw.second);
+    const auto user_given_dtype = _ctx->output_types[i];
 
-    if (granularity == QuantizationGranularity::ChannelWise)
-    {
-      assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
-      auto input_scale = input->quantparam()->scale[0];
-
-      assert(weight->quantparam() != nullptr); // weight scale's channel-wise
-      auto weight_scale = weight->quantparam()->scale;
+    if (output->dtype() == user_given_dtype)
+      continue;
 
-      auto circle_const = loco::must_cast<luci::CircleConst *>(node);
+    // Bool type is not quantizable
+    if (output->dtype() == loco::DataType::BOOL)
+      continue;
 
-      uint32_t size = circle_const->size<loco::DataType::FLOAT32>();
-      assert(size == weight_scale.size());
-      std::vector<float> scaling_factor(size);
-      std::vector<int64_t> zp(size);
+    auto from = loco::must_cast<luci::CircleNode *>(output->from());
 
-      quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
+    // The last Op is not quantizable (ex: ArgMax)
+    if (not from->quantparam())
+      continue;
 
-      auto quantparam = std::make_unique<CircleQuantParam>();
-      quantparam->scale = scaling_factor;
-      quantparam->zerop = zp;
-      assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
-      circle_const->quantparam(std::move(quantparam));
+    // Insert Dequantize Op for float32 output_type
+    if (user_given_dtype == loco::DataType::FLOAT32)
+    {
+      auto dequant_op = create_dequantize(from);
+      dequant_op->input(from);
+      output->from(dequant_op);
     }
     else
     {
-      assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
-      auto input_scale = input->quantparam()->scale[0];
+      // Insert Quantize Op for non-float32 output_type
+      auto quant_op = create_quantize_op(from, user_given_dtype);
+      quant_op->input(from);
+      output->from(quant_op);
 
-      assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
-      auto weight_scale = weight->quantparam()->scale[0];
-
-      auto circle_const = loco::must_cast<luci::CircleConst *>(node);
-      float scaling_factor{0};
-      int64_t zp{0};
-      asym_quant_bias_per_layer(circle_const, input_scale, weight_scale, &scaling_factor, &zp);
-      auto quantparam = std::make_unique<CircleQuantParam>();
-      quantparam->scale.push_back(scaling_factor);
-      quantparam->zerop.push_back(zp);
-      assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
-      circle_const->quantparam(std::move(quantparam));
+      // TODO Set a proper origin (Quantize should have its own Origin)
+      luci::add_origin(quant_op, luci::get_origin(from));
     }
-    return false;
+
+    // Update dtype of output
+    output->dtype(user_given_dtype);
+
+    auto graph_output = outputs->at(output->index());
+    graph_output->dtype(user_given_dtype);
   }
-};
+}
 
 /**
- * @brief QuantizeWeights quantizes tensors for weights
- * @details Find min/max values on the fly and then quantize
+ * How QuantizeWithMinMax works?
+ *
+ * We categorized tensors into four groups
+ * - Activation: Feature maps (both Const/Non-const)
+ * - Weights: Const tensors of specific Ops (Conv, FC, ...)
+ * - Bias: Const tensors of specific Ops (Conv, FC, ...)
+ * - Others: padding value, one_hot value, axis, ..
+ *
+ * Activation is quantized in different ways
+ * 1. For non-constant activation, quantize using recorded min/max
+ * 2. For constant activation, quantize using min/max of its value
+ * 3. For some Ops (ex: pad_v2), output qparam is used as input qparam (backward propagation)
+ * 4. For some Ops (ex: reshape), input qparam is used as output qparam (forward propagation)
+ * 5. For some Ops (ex: tanh), output qparam has pre-defined values
+ *
+ * Weights is quantized using min/max of its value
+ *
+ * Bias is quantized using input scale (s_i) and weights scale (s_w)
+ * - Therefore, activation and weights should be quantized earlier than bias
+ *
+ * Overall Quantization Steps
+ * 1. Quantize Activation
+ *   - Quantize using recorded min/max (QuantizeActivation)
+ *   - Insert Quantize Ops for mixed-precision quantization (InsertQuantizeOp)
+ *   - Remove redundant Quantize Ops (RemoveRedundantQuantizePass)
+ *   - Propagate qparam backward (PropagateQParamBackwardPass)
+ *   - Quantize const inputs (QuantizeConstInputActivation)
+ *   - Quantize using pre-defined values (QuantizeSpecialActivation)
+ *   - Propagate qparam forward (PropagateQParamForwardPass)
+ * 2. Quantize Weights
+ * 3. Quantize Bias
+ * 4. Set input dtype
+ * 5. Set output dtype
+ *
+ * Why quantization sequence was determined as above?
+ * - Activation and weights should be quantized before bias (1->2->3). Input/Output
+ *   dtype can be updated at the end (4->5).
+ * - During activation quantization,
+ *   - Backward propagation is performed earlier than forward propagation. This allows
+ *     backward-propagated qpram to be overwritten during forward propagation.
+ *     We made this decision as Ops for forward propagation (reshape, transpose, ..)
+ *     are more common than backward propagation. TODO Check this decision is safe.
+ *   - QuantizeSpecialActivation is called before forward propagation to make sure that
+ *     the pre-defined qparam values are propagated.
  */
-struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
+bool QuantizeWithMinMaxPass::run(loco::Graph *g)
 {
-  QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
-      : input_type(input), output_type(output), granularity(gr)
+  LOGGER(l);
+  INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
+
+  auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+  auto quantize_dtype = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization dtype
+    if (iter != info_by_name.end())
+      return iter->second.dtype;
+
+    // Return default quantization dtype
+    return _ctx->output_model_dtype;
+  };
+
+  auto quantize_granularity = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization granularity
+    if (iter != info_by_name.end())
+      return iter->second.granularity;
+
+    // Return default quantization granularity
+    return _ctx->granularity;
+  };
+
+  // Quantize activation
+  // Why all_nodes?
+  // Models can have inactive (unused) inputs.
+  // We do not reject such models, but quantize them too
+  for (auto node : loco::all_nodes(g))
   {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeActivation qa(_ctx->input_model_dtype, quantize_dtype(circle_node));
+    circle_node->accept(&qa);
   }
 
-  loco::DataType input_type;
-  loco::DataType output_type;
-  QuantizationGranularity granularity;
-
-  // Quantize input tensors of each node
-  bool visit(luci::CircleNode *node)
+  // Insert Quantize Op
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
   {
-    LOGGER(l);
-    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-    auto arity = node->arity();
-    for (uint32_t i = 0; i < arity; i++)
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    auto op_dtype = quantize_dtype(circle_node);
+    if (op_dtype != _ctx->output_model_dtype)
     {
-      auto input_node = node->arg(i);
-      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+      InsertQuantizeOp iqo(_ctx->output_model_dtype, op_dtype);
+      circle_node->accept(&iqo);
+    }
+  }
 
-      // Check if this is already quantized
-      if (is_quantized(circle_node))
-        continue;
+  // Remove redundant Quantize Op
+  {
+    logo::Phase phase;
 
-      if (is_weights(circle_node))
-      {
-        auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
-
-        // Find min/max per channel-wise
-        if (granularity == QuantizationGranularity::ChannelWise)
-        {
-          auto quantparam = circle_node->quantparam();
-          if (quantparam == nullptr)
-          {
-            assert(false && "quantparam is nullptr");
-            return false;
-          }
-
-          auto min = quantparam->min;
-          auto scaling_factor = quantparam->scale;
-          int32_t channel_dim_index = 0;
-
-          if (output_type == loco::DataType::U8)
-          {
-            asym_wquant_per_channel(circle_const, min, scaling_factor, channel_dim_index);
-          }
-          else
-          {
-            sym_wquant_per_channel(circle_const, scaling_factor, channel_dim_index);
-          }
-          quantparam->min.clear();
-          quantparam->max.clear();
-          quantparam->quantized_dimension = channel_dim_index;
-        }
-        // Find min/max per layer-wise
-        else
-        {
-          // Quantize using recorded quantparam
-          auto quantparam = circle_node->quantparam();
-          assert(quantparam != nullptr);
-          assert(quantparam->min.size() == 1);   // only support layer-wise quant
-          assert(quantparam->scale.size() == 1); // only support layer-wise quant
-          auto min = quantparam->min[0];
-          auto scaling_factor = quantparam->scale[0];
-          asym_wquant_per_layer(circle_const, min, scaling_factor);
-          quantparam->min.clear();
-          quantparam->max.clear();
-        }
-      }
-    }
-    return false;
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+
+    ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+    logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+    phase_runner.attach(&prog);
+    phase_runner.run(phase);
   }
-};
 
-} // namespace
+  // Backward propagation of activation qparam
+  {
+    PropagateQParamBackwardPass pqbp(_ctx->output_model_dtype);
+    pqbp.run(g);
+  }
 
-bool QuantizeWithMinMaxPass::run(loco::Graph *g)
-{
-  LOGGER(l);
-  INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
+  // Quantize const input activation
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeConstInputActivation qcia(quantize_dtype(circle_node));
+    circle_node->accept(&qcia);
+  }
 
-  // Quantize activation
+  // Update qparam of output of special Ops
   for (auto node : loco::active_nodes(loco::output_nodes(g)))
   {
-    QuantizeActivation qa(_input_dtype, _output_dtype);
     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    circle_node->accept(&qa);
+
+    // At this point, all activations have to be quantized.
+    // Un-quantized nodes are not the quantization target (ex: int32 tensor),
+    // so we skip them
+    if (circle_node->quantparam() == nullptr)
+      continue;
+
+    QuantizeSpecialActivation qsa(_ctx->input_model_dtype, quantize_dtype(circle_node));
+    circle_node->accept(&qsa);
   }
 
+  // Forward propagation of activation qparam
+  logo::Phase phase;
+
+  phase.emplace_back(std::make_unique<luci::PropagateQParamForwardPass>(_ctx->TF_style_maxpool));
+
+  ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+  phase_runner.attach(&prog);
+  phase_runner.run(phase);
+
   // Quantize weights
   for (auto node : loco::active_nodes(loco::output_nodes(g)))
   {
-    QuantizeWeights qw(_input_dtype, _output_dtype, _granularity);
     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
+                       quantize_granularity(circle_node));
     circle_node->accept(&qw);
   }
 
   // Quantize bias
   for (auto node : loco::active_nodes(loco::output_nodes(g)))
   {
-    QuantizeBias qb(_input_dtype, _output_dtype, _granularity);
     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeBias qb(_ctx->input_model_dtype, quantize_dtype(circle_node),
+                    quantize_granularity(circle_node));
     circle_node->accept(&qb);
   }
 
@@ -550,11 +674,41 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
   for (auto node : loco::output_nodes(g))
   {
     auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
-    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _ctx->output_model_dtype)
     {
-      circle_node->dtype(_output_dtype);
+      circle_node->dtype(_ctx->output_model_dtype);
       auto graph_output = graph_outputs->at(circle_node->index());
-      graph_output->dtype(_output_dtype);
+      graph_output->dtype(_ctx->output_model_dtype);
+    }
+  }
+
+  // Set input type
+  set_input_type(g);
+
+  // Set output type
+  set_output_type(g);
+
+  // Remove redundant Quantize Op
+  {
+    logo::Phase phase;
+
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+
+    ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+    logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+    phase_runner.attach(&prog);
+    phase_runner.run(phase);
+  }
+
+  // Remove min/max values
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (auto qparam = circle_node->quantparam())
+    {
+      warn_accuracy_with_range(circle_node);
+      qparam->min.clear();
+      qparam->max.clear();
     }
   }
 
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
new file mode 100644
index 000000000..49c2d4652
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+class SimpleConcatGraph
+{
+public:
+  SimpleConcatGraph(loco::DataType quant_type)
+  {
+    concat_node = g.nodes()->create<luci::CircleConcatenation>(2);
+    input_1 = g.nodes()->create<luci::CircleConst>();
+    input_2 = g.nodes()->create<luci::CircleConst>();
+
+    concat_node->dtype(quant_type);
+    concat_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+    input_1->dtype(quant_type);
+    input_2->dtype(quant_type);
+
+    concat_node->values(0, input_1);
+    concat_node->values(1, input_2);
+  }
+
+  ~SimpleConcatGraph()
+  {
+    concat_node->values(0, nullptr);
+    concat_node->values(1, nullptr);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleConcatenation *concat_node = nullptr;
+  luci::CircleConst *input_1 = nullptr;
+  luci::CircleConst *input_2 = nullptr;
+};
+
+TEST(QuantizeWithMinMaxPassTest, name)
+{
+  auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::U8;
+    ctx->granularity = luci::QuantizationGranularity::LayerWise;
+  }
+
+  luci::QuantizeWithMinMaxPass pass(std::move(ctx));
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+// Test concat of integer tensors
+// Integer tensors are not quantized
+TEST(QuantizeWithMinMaxPassTest, int_concat)
+{
+  SimpleConcatGraph g(loco::DataType::S32);
+
+  auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::U8;
+    ctx->granularity = luci::QuantizationGranularity::LayerWise;
+  }
+
+  luci::QuantizeWithMinMaxPass qwmm(std::move(ctx));
+
+  qwmm.run(&g.g);
+
+  EXPECT_EQ(nullptr, g.concat_node->quantparam());
+  EXPECT_EQ(nullptr, g.input_1->quantparam());
+  EXPECT_EQ(nullptr, g.input_2->quantparam());
+}
+
+TEST(QuantizeWithMinMaxPassTest, inactive_input)
+{
+  SimpleConcatGraph g(loco::DataType::FLOAT32);
+
+  // Unused input
+  g.g.nodes()->create<luci::CircleInput>();
+
+  auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::U8;
+    ctx->granularity = luci::QuantizationGranularity::LayerWise;
+  }
+
+  luci::QuantizeWithMinMaxPass qwmm(std::move(ctx));
+
+  EXPECT_NO_THROW(qwmm.run(&g.g));
+}
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.cpp
new file mode 100644
index 000000000..684d5d48a
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizedModelVerifier.h"
+
+#include "VerifyQuantizedNodeGranularity.h"
+#include "VerifyQuantizedNodeType.h"
+#include "VerifyQuantizedBiasScale.h"
+#include "helpers/LayerInfoMap.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+void QuantizedModelVerifier::verify(loco::Graph *g)
+{
+  if (_ctx->granularity != Granularity::ChannelWise && _ctx->granularity != Granularity::LayerWise)
+    throw std::runtime_error("Unsupported granularity");
+
+  auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+  auto quantize_dtype = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization dtype
+    if (iter != info_by_name.end())
+      return iter->second.dtype;
+
+    // Return default quantization dtype
+    return _ctx->output_model_dtype;
+  };
+
+  auto quantize_granularity = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization granularity
+    if (iter != info_by_name.end())
+      return iter->second.granularity;
+
+    // Return default quantization granularity
+    return _ctx->granularity;
+  };
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+    auto node_name = [&circle_node]() {
+      if (circle_node->name().length() == 0)
+        return std::string("(noname)");
+
+      return circle_node->name();
+    };
+
+    // Verify Type
+    if (!VerifyQuantizedNodeType::create(quantize_dtype(circle_node))->verify(circle_node))
+      throw std::runtime_error("Wrong data type detected in " + node_name());
+
+    // Verify Granularity
+    if (!circle_node->accept(
+          VerifyQuantizedNodeGranularity::create(quantize_granularity(circle_node)).get()))
+      throw std::runtime_error("Wrong granularity detected in " + node_name());
+
+    // Verify Bias scale
+    if (!VerifyQuantizedBiasScale::create()->verify(circle_node))
+      throw std::runtime_error("Wrong bias scale detected in " + node_name());
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.h b/compiler/luci/pass/src/QuantizedModelVerifier.h
new file mode 100644
index 000000000..d9bea434d
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZED_MODEL_VERIFIER_H__
+#define __LUCI_QUANTIZED_MODEL_VERIFIER_H__
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <loco.h>
+
+#include <memory>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to verify quantized model
+ *
+ * TODO Move this to luci/service
+ */
+struct QuantizedModelVerifier
+{
+public:
+  struct Context
+  {
+    loco::DataType output_model_dtype = loco::DataType::Unknown;
+    QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+    std::vector<loco::DataType> input_types;
+    std::vector<loco::DataType> output_types;
+    bool TF_style_maxpool = false;
+    std::vector<LayerInfo> layers_info;
+  };
+
+public:
+  QuantizedModelVerifier(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
+  {
+    // DO NOTHING
+  }
+
+  void verify(loco::Graph *g);
+
+private:
+  std::unique_ptr<Context> _ctx;
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZED_MODEL_VERIFIER_H__
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
new file mode 100644
index 000000000..ae02edb3d
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
@@ -0,0 +1,2828 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizedModelVerifier.h"
+
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/QuantizationParameters.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+#include <logo/Phase.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+using Type = loco::DataType;
+using Granularity = luci::QuantizationGranularity;
+
+namespace
+{
+
+/**
+ * @brief A helper function to create dummy const node
+ */
+template <Type T> luci::CircleConst *create_dummy_const(loco::Graph *g, luci::test::ShapeU32 shape)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  {
+    node->dtype(T);
+    node->shape(shape);
+    node->size<T>(luci::test::num_elements(shape));
+
+    for (int32_t i = 0; i < luci::test::num_elements(shape); i++)
+    {
+      // DESIGN NOTE
+      //
+      // Filling with any random numbers are fine
+      // Q. Should it include minus numbers?
+      switch (T)
+      {
+        case Type::FLOAT32:
+          // Fill with index
+          node->at<T>(i) = static_cast<float>(i);
+          break;
+        case Type::BOOL:
+          // Fill by flip
+          node->at<T>(i) = (i % 2) ? true : false;
+          break;
+        case Type::U8:
+          // Fill with index
+          node->at<T>(i) = static_cast<uint8_t>(i);
+          break;
+        case Type::S16:
+          // Fill with index
+          node->at<T>(i) = static_cast<int16_t>(i);
+          break;
+        default:
+          break;
+      }
+    }
+  }
+
+  return node;
+}
+
+/**
+ * @brief A helper function to create const node with value
+ */
+template <Type DT, typename T>
+luci::CircleConst *create_const(loco::Graph *g, luci::test::ShapeU32 shape,
+                                std::initializer_list<T> values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  {
+    node->dtype(DT);
+    node->shape(shape);
+    node->size<DT>(luci::test::num_elements(shape));
+
+    assert(values.size() == node->size<DT>());
+
+    uint32_t index = 0;
+    for (auto val : values)
+    {
+      node->at<DT>(index++) = static_cast<T>(val);
+    }
+  }
+
+  return node;
+}
+
+void insert_scale_zp(luci::CircleNode *node, float scale, int64_t zp)
+{
+  auto qparam = node->quantparam();
+  assert(qparam != nullptr); // FIX_CALLER_UNLESS
+  qparam->scale.push_back(scale);
+  qparam->zerop.push_back(zp);
+}
+
+void run_phase(loco::Graph *g, Type quantized_dtype, Granularity granularity)
+{
+  logo::Phase phase;
+
+  // Default passes.
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+  auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = quantized_dtype;
+    ctx->granularity = granularity;
+    // Test graph has only one input/output
+    ctx->input_types = {quantized_dtype};
+    ctx->output_types = {quantized_dtype};
+  }
+
+  phase.emplace_back(std::make_unique<luci::QuantizeWithMinMaxPass>(std::move(ctx)));
+
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.run(phase);
+}
+
+void run_phase(loco::Graph *g, std::unique_ptr<luci::QuantizeWithMinMaxPass::Context> &&ctx)
+{
+  logo::Phase phase;
+
+  // Default passes.
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+  phase.emplace_back(std::make_unique<luci::QuantizeWithMinMaxPass>(std::move(ctx)));
+
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.run(phase);
+}
+
+void quantize_and_verify(loco::Graph *g, Type quantized_dtype, Granularity granularity)
+{
+  run_phase(g, quantized_dtype, granularity);
+
+  auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+  {
+    ctx->output_model_dtype = quantized_dtype;
+    ctx->granularity = granularity;
+    // Test graph has only one input/output
+    ctx->input_types = {quantized_dtype};
+    ctx->output_types = {quantized_dtype};
+  }
+
+  luci::QuantizedModelVerifier verifier(std::move(ctx));
+  verifier.verify(g);
+}
+
+void quantize_and_verify_with_layer_info(loco::Graph *g, Type quantized_dtype,
+                                         Granularity granularity)
+{
+  // A layer named "test" has dtype different from quantized_dtype
+  luci::LayerInfo info;
+  {
+    info.name = "test";
+    // dtype is different from quantized_dtype
+    info.dtype = quantized_dtype == Type::U8 ? Type::S16 : Type::U8;
+    info.granularity = Granularity::ChannelWise;
+  }
+
+  // Do quantization
+  {
+    auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+    {
+      ctx->input_model_dtype = Type::FLOAT32;
+      ctx->output_model_dtype = quantized_dtype;
+      ctx->granularity = granularity;
+      // Test graph has only one input/output
+      ctx->input_types = {quantized_dtype};
+      ctx->output_types = {quantized_dtype};
+      ctx->TF_style_maxpool = false;
+      ctx->layers_info.push_back(info);
+    }
+
+    run_phase(g, std::move(ctx));
+  }
+
+  // Do verification
+  {
+    auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+    {
+      ctx->output_model_dtype = quantized_dtype;
+      ctx->granularity = granularity;
+      ctx->input_types = {quantized_dtype};
+      ctx->output_types = {quantized_dtype};
+      ctx->TF_style_maxpool = false;
+      ctx->layers_info.push_back(info);
+    }
+
+    luci::QuantizedModelVerifier verifier(std::move(ctx));
+    verifier.verify(g);
+  }
+}
+
+// Helper function to reduce duplicate test codes
+// Assumption: g->output()->from() is the target node
+void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
+                                         Granularity granularity, Type wrong_dtype)
+{
+  run_phase(g->g(), quantized_dtype, granularity);
+
+  auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
+  node->dtype(wrong_dtype);
+
+  auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+  {
+    ctx->output_model_dtype = quantized_dtype;
+    ctx->granularity = granularity;
+    // Test graph has only one input/output
+    ctx->input_types = {quantized_dtype};
+    ctx->output_types = {quantized_dtype};
+  }
+
+  luci::QuantizedModelVerifier verifier(std::move(ctx));
+  verifier.verify(g->g());
+}
+
+// Helper function to reduce duplicate test codes
+// Assumption: g->output()->from() is the target node
+void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
+                                                Granularity granularity)
+{
+  run_phase(g->g(), quantized_dtype, granularity);
+
+  auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
+  insert_scale_zp(node, 1.0, 1);
+
+  auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+  {
+    ctx->output_model_dtype = quantized_dtype;
+    ctx->granularity = granularity;
+    // Test graph has only one input/output
+    ctx->input_types = {quantized_dtype};
+    ctx->output_types = {quantized_dtype};
+  }
+
+  luci::QuantizedModelVerifier verifier(std::move(ctx));
+  verifier.verify(g->g());
+}
+
+// Set min/max for all non-const nodes in the graph
+void set_minmax_to_non_const(loco::Graph *g, float min, float max)
+{
+  for (auto node : loco::all_nodes(g))
+  {
+    auto const_node = dynamic_cast<luci::CircleConst *>(node);
+    if (const_node != nullptr)
+      continue;
+
+    // Min/Max is not recorded for ArgMax
+    // See MinMaxObserver.cpp in record_minmax module
+    auto argmax_node = dynamic_cast<luci::CircleArgMax *>(node);
+    if (argmax_node != nullptr)
+      continue;
+
+    // Min/Max is not recorded for Split
+    // See MinMaxObserver.cpp in record_minmax module
+    auto split_node = dynamic_cast<luci::CircleSplit *>(node);
+    if (split_node != nullptr)
+      continue;
+
+    // Min/Max is not recorded for SplitV
+    // See MinMaxObserver.cpp in record_minmax module
+    auto splitv_node = dynamic_cast<luci::CircleSplitV *>(node);
+    if (splitv_node != nullptr)
+      continue;
+
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    {
+      qparam->min.emplace_back(min);
+      qparam->max.emplace_back(max);
+    }
+    circle_node->quantparam(std::move(qparam));
+  }
+}
+
+/**
+ * @brief Simple Test Graph
+ * @note
+ * The simple test graph's nodes are initialized with
+ * simple shapes and values.
+ */
+class SimpleTestGraph : public luci::test::TestIOGraph
+{
+public:
+  virtual void init(void) = 0;
+};
+
+class TypedTestGraph : public luci::test::TestIOGraph
+{
+protected:
+  void init(Type T, const luci::test::ShapeU32 shape_in, const luci::test::ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+
+    input()->dtype(T);
+    output()->dtype(T);
+
+    g()->inputs()->at(0)->dtype(T);
+    g()->outputs()->at(0)->dtype(T);
+  }
+
+public:
+  virtual void init(void) = 0;
+};
+
+class InstanceNormTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _gamma = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _beta = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _instnorm = g()->nodes()->create<luci::CircleInstanceNorm>();
+    {
+      _instnorm->input(input());
+      _instnorm->gamma(_gamma);
+      _instnorm->beta(_beta);
+      _instnorm->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _instnorm->name("test");
+    }
+    output()->from(_instnorm);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+public:
+  loco::Node *gamma(void) const { return _instnorm->gamma(); }
+  loco::Node *beta(void) const { return _instnorm->beta(); }
+
+private:
+  luci::CircleInstanceNorm *_instnorm = nullptr;
+  luci::CircleConst *_input = nullptr;
+  luci::CircleConst *_gamma = nullptr;
+  luci::CircleConst *_beta = nullptr;
+};
+
+class LogisticTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _logistic = g()->nodes()->create<luci::CircleLogistic>();
+    {
+      _logistic->x(input());
+      _logistic->name("test");
+    }
+    output()->from(_logistic);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleLogistic *_logistic = nullptr;
+};
+
+class LocalResponseNormalizationTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 2, 2, 32}, {1, 2, 2, 32});
+    _lrn = g()->nodes()->create<luci::CircleLocalResponseNormalization>();
+    {
+      _lrn->input(input());
+      _lrn->name("test");
+    }
+    output()->from(_lrn);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleLocalResponseNormalization *_lrn = nullptr;
+};
+
+class SoftmaxTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _softmax = g()->nodes()->create<luci::CircleSoftmax>();
+    {
+      _softmax->logits(input());
+      _softmax->beta(0.1);
+      _softmax->name("test");
+    }
+    output()->from(_softmax);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSoftmax *_softmax = nullptr;
+};
+
+class SpaceToBatchNDTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 2, 2, 1}, {4, 1, 1, 1});
+    _block_shape = create_dummy_const<Type::S32>(g(), {2});
+    for (uint32_t i = 0; i < 2; i++)
+      _block_shape->at<Type::S32>(i) = 2;
+
+    _paddings = create_dummy_const<Type::S32>(g(), {2, 2});
+    for (uint32_t i = 0; i < 4; i++)
+      _paddings->at<Type::S32>(i) = 0;
+
+    _stob = g()->nodes()->create<luci::CircleSpaceToBatchND>();
+    {
+      _stob->input(input());
+      _stob->block_shape(_block_shape);
+      _stob->paddings(_paddings);
+      _stob->name("test");
+    }
+    output()->from(_stob);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSpaceToBatchND *_stob = nullptr;
+  luci::CircleConst *_block_shape = nullptr;
+  luci::CircleConst *_paddings = nullptr;
+};
+
+class SpaceToDepthTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 2, 2, 1}, {1, 1, 1, 4});
+    _stod = g()->nodes()->create<luci::CircleSpaceToDepth>();
+    {
+      _stod->input(input());
+      _stod->block_size(2);
+      _stod->name("test");
+    }
+    output()->from(_stod);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSpaceToDepth *_stod = nullptr;
+};
+
+template <Type indexT> class SliceTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _begin = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _begin->dtype(indexT);
+    }
+    _size = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _size->dtype(indexT);
+    }
+    _slice = g()->nodes()->template create<luci::CircleSlice>();
+    {
+      _slice->input(input());
+      _slice->begin(_begin);
+      _slice->size(_size);
+      _slice->name("test");
+    }
+    output()->from(_slice);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSlice *_slice = nullptr;
+  luci::CircleConst *_begin = nullptr;
+  luci::CircleConst *_size = nullptr;
+};
+
+class SplitTestGraph final : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1, 32}, {32});
+    _split_dim = create_dummy_const<Type::S32>(g(), {1});
+    _split = g()->nodes()->create<luci::CircleSplit>();
+    {
+      _split->input(input());
+      _split->split_dim(_split_dim);
+    }
+    _split_o1 = g()->nodes()->create<luci::CircleSplitOut>();
+    {
+      _split_o1->input(_split);
+      _split_o1->index(0);
+    }
+
+    output()->from(_split_o1);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSplit *_split = nullptr;
+  luci::CircleSplitOut *_split_o1 = nullptr;
+  luci::CircleConst *_split_dim = nullptr;
+};
+
+class SplitVTestGraph final : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1, 32}, {32});
+    _size_splits = create_dummy_const<Type::S32>(g(), {1});
+    _split_dim = create_dummy_const<Type::S32>(g(), {1});
+    _splitv = g()->nodes()->create<luci::CircleSplitV>();
+    {
+      _splitv->input(input());
+      _splitv->size_splits(_size_splits);
+      _splitv->split_dim(_split_dim);
+    }
+    _splitv_o1 = g()->nodes()->create<luci::CircleSplitVOut>();
+    {
+      _splitv_o1->input(_splitv);
+      _splitv_o1->index(0);
+    }
+
+    output()->from(_splitv_o1);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSplitV *_splitv = nullptr;
+  luci::CircleSplitVOut *_splitv_o1 = nullptr;
+  luci::CircleConst *_size_splits = nullptr;
+  luci::CircleConst *_split_dim = nullptr;
+};
+
+class StridedSliceTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _begin = g()->nodes()->create<luci::CircleConst>();
+    {
+      _begin->dtype(Type::S32);
+    }
+    _end = g()->nodes()->create<luci::CircleConst>();
+    {
+      _end->dtype(Type::S32);
+    }
+    _strides = g()->nodes()->create<luci::CircleConst>();
+    {
+      _strides->dtype(Type::S32);
+    }
+    _slice = g()->nodes()->create<luci::CircleStridedSlice>();
+    {
+      _slice->input(input());
+      _slice->begin(_begin);
+      _slice->end(_end);
+      _slice->strides(_strides);
+      _slice->name("test");
+    }
+    output()->from(_slice);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleStridedSlice *_slice = nullptr;
+  luci::CircleConst *_begin = nullptr;
+  luci::CircleConst *_end = nullptr;
+  luci::CircleConst *_strides = nullptr;
+};
+
+class SumTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({4, 3, 2}, {2});
+
+    _axis = create_const<Type::S32, int32_t>(g(), {2}, {1, 0});
+    _sum = g()->nodes()->create<luci::CircleSum>();
+    {
+      _sum->input(input());
+      _sum->reduction_indices(_axis);
+      _sum->name("test");
+      _sum->keep_dims(false);
+    }
+    output()->from(_sum);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSum *_sum = nullptr;
+  luci::CircleConst *_axis = nullptr;
+};
+
+class ReshapeTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _shape = g()->nodes()->create<luci::CircleConst>();
+    {
+      _shape->dtype(Type::S32);
+    }
+    _reshape = g()->nodes()->create<luci::CircleReshape>();
+    {
+      _reshape->tensor(input());
+      _reshape->shape(_shape);
+      _reshape->name("test");
+    }
+    output()->from(_reshape);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleReshape *_reshape = nullptr;
+  luci::CircleConst *_shape = nullptr;
+};
+
+class TanhTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _tanh = g()->nodes()->create<luci::CircleTanh>();
+    {
+      _tanh->x(input());
+      _tanh->name("test");
+    }
+    output()->from(_tanh);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleTanh *_tanh = nullptr;
+};
+
+class FloorTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _floor = g()->nodes()->create<luci::CircleFloor>();
+    {
+      _floor->x(input());
+      _floor->name("test");
+    }
+    output()->from(_floor);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleFloor *_floor = nullptr;
+};
+
+template <Type indexT> class ArgMaxTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {1});
+    // output dtype is float by default, but ArgMax should have indexType (s32/s64)
+    output()->dtype(indexT);
+    _dimension = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _dimension->dtype(indexT);
+    }
+    _argmax = g()->nodes()->template create<luci::CircleArgMax>();
+    {
+      _argmax->input(input());
+      _argmax->dimension(_dimension);
+      _argmax->output_type(indexT);
+      _argmax->dtype(indexT);
+    }
+    output()->from(_argmax);
+
+    set_minmax_to_non_const(g(), -1, 1);
+
+    // Sync output dtype with graph's output dtype
+    g()->outputs()->at(0)->dtype(output()->dtype());
+  }
+
+public:
+  // NOTE: Do not override `luci::CircleNode* input(void)` incidentally
+  loco::Node *input_argmax(void) { return _argmax->input(); }
+  loco::Node *dimension(void) { return _argmax->dimension(); }
+
+private:
+  luci::CircleArgMax *_argmax = nullptr;
+  luci::CircleConst *_dimension = nullptr;
+};
+
+class BatchToSpaceNDTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _block_shape = g()->nodes()->create<luci::CircleConst>();
+    {
+      _block_shape->dtype(Type::S32);
+    }
+    _crops = g()->nodes()->create<luci::CircleConst>();
+    {
+      _crops->dtype(Type::S32);
+    }
+    _btos = g()->nodes()->create<luci::CircleBatchToSpaceND>();
+    {
+      _btos->input(input());
+      _btos->block_shape(_block_shape);
+      _btos->crops(_crops);
+      _btos->name("test");
+    }
+    output()->from(_btos);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleBatchToSpaceND *_btos = nullptr;
+  luci::CircleConst *_block_shape = nullptr;
+  luci::CircleConst *_crops = nullptr;
+};
+
+class DepthToSpaceTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 1, 1, 4}, {1, 2, 2, 1});
+    _dtos = g()->nodes()->create<luci::CircleDepthToSpace>();
+    {
+      _dtos->input(input());
+      _dtos->block_size(2);
+      _dtos->name("test");
+    }
+    output()->from(_dtos);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleDepthToSpace *_dtos = nullptr;
+};
+
+class PackTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({16}, {32});
+    _param = create_dummy_const<Type::FLOAT32>(g(), {16});
+    _pack = g()->nodes()->create<luci::CirclePack>(2);
+    {
+      _pack->values(0, input());
+      _pack->values(1, _param);
+      _pack->axis(0);
+      _pack->name("test");
+    }
+    output()->from(_pack);
+
+    set_minmax_to_non_const(g(), -1, 1);
+
+    // Set min/max of the input
+    // pack's qparam will be propagted, overwritten to the input
+    auto input = loco::must_cast<luci::CircleNode *>(pack()->values(0));
+    auto qp = input->quantparam();
+    qp->min[0] = -0.5;
+    qp->max[0] = 0.5;
+  }
+
+public:
+  luci::CirclePack *pack(void) { return _pack; }
+
+private:
+  luci::CirclePack *_pack = nullptr;
+  luci::CircleConst *_param = nullptr;
+};
+
+class PadTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _paddings = g()->nodes()->create<luci::CircleConst>();
+    {
+      _paddings->dtype(Type::S32);
+    }
+    _pad = g()->nodes()->create<luci::CirclePad>();
+    {
+      _pad->input(input());
+      _pad->paddings(_paddings);
+      _pad->name("test");
+    }
+    output()->from(_pad);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CirclePad *_pad = nullptr;
+  luci::CircleConst *_paddings = nullptr;
+};
+
+class PadV2TestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _paddings = g()->nodes()->create<luci::CircleConst>();
+    {
+      _paddings->dtype(Type::S32);
+    }
+    _constant_values = create_dummy_const<Type::FLOAT32>(g(), {1});
+    _pad = g()->nodes()->create<luci::CirclePadV2>();
+    {
+      _pad->input(input());
+      _pad->paddings(_paddings);
+      _pad->constant_values(_constant_values);
+      _pad->name("test");
+    }
+    output()->from(_pad);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CirclePadV2 *_pad = nullptr;
+  luci::CircleConst *_paddings = nullptr;
+  luci::CircleConst *_constant_values = nullptr;
+};
+
+class MirrorPadTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _paddings = g()->nodes()->create<luci::CircleConst>();
+    {
+      _paddings->dtype(Type::S32);
+    }
+    _constant_values = create_dummy_const<Type::FLOAT32>(g(), {1});
+    _mirror_pad = g()->nodes()->create<luci::CircleMirrorPad>();
+    {
+      _mirror_pad->input(input());
+      _mirror_pad->paddings(_paddings);
+      _mirror_pad->mode(luci::MirrorPadMode::REFLECT);
+      _mirror_pad->name("test");
+    }
+    output()->from(_mirror_pad);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleMirrorPad *_mirror_pad = nullptr;
+  luci::CircleConst *_paddings = nullptr;
+  luci::CircleConst *_constant_values = nullptr;
+};
+
+class TransposeTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _perm = g()->nodes()->create<luci::CircleConst>();
+    {
+      _perm->dtype(Type::S32);
+    }
+    _transpose = g()->nodes()->create<luci::CircleTranspose>();
+    {
+      _transpose->a(input());
+      _transpose->perm(_perm);
+      _transpose->name("test");
+    }
+    output()->from(_transpose);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleTranspose *_transpose = nullptr;
+  luci::CircleConst *_perm = nullptr;
+};
+
+class ConcatenationTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({16}, {32});
+    _param = create_dummy_const<Type::FLOAT32>(g(), {16});
+    _concat = g()->nodes()->create<luci::CircleConcatenation>(2);
+    {
+      _concat->values(0, input());
+      _concat->values(1, _param);
+      _concat->axis(0);
+      _concat->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _concat->name("test");
+    }
+    output()->from(_concat);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleConcatenation *_concat = nullptr;
+  luci::CircleConst *_param = nullptr;
+};
+
+template <Type indexT> class OneHotTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32, 10});
+    {
+      // input dtype is float by default, but OneHot's input should have indexType (s32/s64)
+      input()->dtype(indexT);
+    }
+
+    _depth = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _depth->dtype(loco::DataType::S32);
+    }
+
+    _on_value = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _on_value->dtype(loco::DataType::FLOAT32);
+    }
+
+    _off_value = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _off_value->dtype(loco::DataType::FLOAT32);
+    }
+
+    _one_hot = g()->nodes()->template create<luci::CircleOneHot>();
+    {
+      _one_hot->indices(input());
+      _one_hot->depth(_depth);
+      _one_hot->on_value(_on_value);
+      _one_hot->off_value(_off_value);
+      _one_hot->axis(-1);
+      _one_hot->dtype(loco::DataType::FLOAT32);
+      _one_hot->name("test");
+    }
+    output()->from(_one_hot);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleOneHot *_one_hot = nullptr;
+  luci::CircleConst *_depth = nullptr;
+  luci::CircleConst *_on_value = nullptr;
+  luci::CircleConst *_off_value = nullptr;
+};
+
+// Test graph for comparison Ops
+// GREATER, GREATER_EQUAL, LESS, LESS_EQUAL, EQUAL, NOT_EQUAL
+template <class Op> class ComparisonOpTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    output()->dtype(loco::DataType::BOOL);
+    _y = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _op = g()->nodes()->template create<Op>();
+    {
+      _op->x(input());
+      _op->y(_y);
+      _op->dtype(loco::DataType::BOOL);
+    }
+    output()->from(_op);
+
+    set_minmax_to_non_const(g(), -1, 1);
+
+    // Sync output dtype with graph's output dtype
+    g()->outputs()->at(0)->dtype(output()->dtype());
+  }
+
+  loco::Node *x(void) const { return _op->x(); }
+  loco::Node *y(void) const { return _op->y(); }
+
+private:
+  Op *_op = nullptr;
+  luci::CircleConst *_y = nullptr;
+};
+
+// Test graph for binary logical Ops
+// LOGICAL_OR, LOGICAL_AND
+template <class Op> class BinaryLogicalOpTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    input()->dtype(loco::DataType::BOOL);
+    output()->dtype(loco::DataType::BOOL);
+    _y = create_dummy_const<Type::BOOL>(g(), {32});
+    _op = g()->nodes()->template create<Op>();
+    {
+      _op->x(input());
+      _op->y(_y);
+      _op->dtype(loco::DataType::BOOL);
+    }
+    output()->from(_op);
+
+    set_minmax_to_non_const(g(), -1, 1);
+
+    // Sync output dtype with graph's output dtype
+    g()->outputs()->at(0)->dtype(output()->dtype());
+  }
+
+  loco::Node *x(void) const { return _op->x(); }
+  loco::Node *y(void) const { return _op->y(); }
+
+private:
+  Op *_op = nullptr;
+  luci::CircleConst *_y = nullptr;
+};
+
+class DivTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _div = g()->nodes()->create<luci::CircleDiv>();
+    {
+      _div->x(input());
+      _div->y(_const);
+      _div->name("test");
+    }
+    output()->from(_div);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _div->x(); }
+
+  loco::Node *y() { return _div->y(); }
+
+private:
+  luci::CircleDiv *_div = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+class FloorDivTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _floor_div = g()->nodes()->create<luci::CircleFloorDiv>();
+    {
+      _floor_div->x(input());
+      _floor_div->y(_const);
+      _floor_div->name("test");
+    }
+    output()->from(_floor_div);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _floor_div->x(); }
+
+  loco::Node *y() { return _floor_div->y(); }
+
+private:
+  luci::CircleFloorDiv *_floor_div = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+class RsqrtTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _rsqrt = g()->nodes()->create<luci::CircleRsqrt>();
+    {
+      _rsqrt->x(input());
+      _rsqrt->name("test");
+    }
+    output()->from(_rsqrt);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleRsqrt *_rsqrt = nullptr;
+};
+
+class SqrtTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _sqrt = g()->nodes()->create<luci::CircleSqrt>();
+    {
+      _sqrt->x(input());
+      _sqrt->name("test");
+    }
+    output()->from(_sqrt);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class EluTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+    _elu = g()->nodes()->create<luci::CircleElu>();
+    {
+      _elu->features(input());
+      _elu->name("test");
+    }
+    output()->from(_elu);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleElu *_elu = nullptr;
+};
+
+class PowTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _pow = g()->nodes()->create<luci::CirclePow>();
+    {
+      _pow->x(input());
+      _pow->y(_const);
+      _pow->name("test");
+    }
+    output()->from(_pow);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _pow->x(); }
+
+  loco::Node *y() { return _pow->y(); }
+
+private:
+  luci::CirclePow *_pow = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+class ReduceMaxTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({4, 3, 2}, {2});
+
+    _axis = create_const<Type::S32, int32_t>(g(), {4}, {1, 0, -3, -3});
+    _reduce_max = g()->nodes()->create<luci::CircleReduceMax>();
+    {
+      _reduce_max->input(input());
+      _reduce_max->reduction_indices(_axis);
+      _reduce_max->name("test");
+      _reduce_max->keep_dims(false);
+    }
+    output()->from(_reduce_max);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleReduceMax *_reduce_max = nullptr;
+  luci::CircleConst *_axis = nullptr;
+};
+
+class ResizeBilinearTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({1, 4, 4, 1}, {1, 8, 8, 1});
+
+    _size = create_const<Type::S32, int32_t>(g(), {2}, {8, 8});
+    _resize_bilinear = g()->nodes()->create<luci::CircleResizeBilinear>();
+    {
+      _resize_bilinear->input(input());
+      _resize_bilinear->size(_size);
+      _resize_bilinear->name("test");
+    }
+    output()->from(_resize_bilinear);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleResizeBilinear *_resize_bilinear = nullptr;
+  luci::CircleConst *_size = nullptr;
+};
+
+class ResizeNearestNeighborTestGraph final : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1, 4, 4, 1}, {1, 8, 8, 1});
+
+    _size = create_const<Type::S32, int32_t>(g(), {2}, {8, 8});
+    _resize_nearest_neighbor = g()->nodes()->create<luci::CircleResizeNearestNeighbor>();
+    {
+      _resize_nearest_neighbor->input(input());
+      _resize_nearest_neighbor->size(_size);
+      _resize_nearest_neighbor->name("test");
+    }
+    output()->from(_resize_nearest_neighbor);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleResizeNearestNeighbor *_resize_nearest_neighbor = nullptr;
+  luci::CircleConst *_size = nullptr;
+};
+
+class UnpackTestGraph final : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1, 32}, {32});
+    _unpack = g()->nodes()->create<luci::CircleUnpack>();
+    {
+      _unpack->value(input());
+      _unpack->axis(0);
+      _unpack->num(1);
+    }
+    _unpack_o1 = g()->nodes()->create<luci::CircleUnpackOut>();
+    {
+      _unpack_o1->input(_unpack);
+      _unpack_o1->index(0);
+    }
+
+    output()->from(_unpack_o1);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleUnpack *_unpack = nullptr;
+  luci::CircleUnpackOut *_unpack_o1 = nullptr;
+  luci::CircleConst *_unpack_dim = nullptr;
+};
+
+class MulTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _mul = g()->nodes()->create<luci::CircleMul>();
+    {
+      _mul->x(input());
+      _mul->y(_const);
+      _mul->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _mul->name("test");
+    }
+    output()->from(_mul);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _mul->x(); }
+  loco::Node *y() { return _mul->y(); }
+
+private:
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+template <Type T> class IntMulTestGraph final : public TypedTestGraph
+{
+public:
+  void init(void) override
+  {
+    TypedTestGraph::init(T, {32}, {32});
+
+    _const = create_dummy_const<T>(g(), {32});
+    _mul = g()->nodes()->template create<luci::CircleMul>();
+    {
+      _mul->x(input());
+      _mul->y(_const);
+      _mul->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _mul->name("test");
+      _mul->dtype(T);
+    }
+    output()->from(_mul);
+  }
+
+  loco::Node *x() { return _mul->x(); }
+  loco::Node *y() { return _mul->y(); }
+
+private:
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+class AddTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _add = g()->nodes()->create<luci::CircleAdd>();
+    {
+      _add->x(input());
+      _add->y(_const);
+      _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _add->name("test");
+    }
+    output()->from(_add);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _add->x(); }
+  loco::Node *y() { return _add->y(); }
+
+private:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+template <Type T> class IntAddTestGraph final : public TypedTestGraph
+{
+public:
+  void init(void) override
+  {
+    TypedTestGraph::init(T, {32}, {32});
+
+    _const = create_dummy_const<T>(g(), {32});
+    _add = g()->nodes()->template create<luci::CircleAdd>();
+    {
+      _add->x(input());
+      _add->y(_const);
+      _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _add->name("test");
+      _add->dtype(T);
+    }
+    output()->from(_add);
+  }
+
+  loco::Node *x() { return _add->x(); }
+  loco::Node *y() { return _add->y(); }
+
+private:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+} // namespace
+
+// Quantize and verify with given configurations
+#define TEST_WITH_GRAPH(graph, type, granularity)                   \
+  do                                                                \
+  {                                                                 \
+    graph g;                                                        \
+    g.init();                                                       \
+    EXPECT_NO_THROW(quantize_and_verify(g.g(), type, granularity)); \
+  } while (0)
+
+// Quantize and verify with layer info
+#define TEST_WITH_LAYER_INFO(graph, type, granularity)                              \
+  do                                                                                \
+  {                                                                                 \
+    graph g;                                                                        \
+    g.init();                                                                       \
+    EXPECT_NO_THROW(quantize_and_verify_with_layer_info(g.g(), type, granularity)); \
+  } while (0)
+
+// Quantize and verify with wrong type
+#define TEST_WITH_WRONG_TYPE(graph, type, granularity, wrong_dtype)                            \
+  do                                                                                           \
+  {                                                                                            \
+    graph g;                                                                                   \
+    g.init();                                                                                  \
+    EXPECT_ANY_THROW(quantize_and_verify_with_wrong_type(&g, type, granularity, wrong_dtype)); \
+  } while (0)
+
+// Quantize and verify with wrong granularity
+#define TEST_WITH_WRONG_GRANULARITY(graph, type, granularity)                            \
+  do                                                                                     \
+  {                                                                                      \
+    graph g;                                                                             \
+    g.init();                                                                            \
+    EXPECT_ANY_THROW(quantize_and_verify_with_wrong_granularity(&g, type, granularity)); \
+  } while (0)
+
+// Quantize and verify with wrong type
+// Users can specify the test target
+#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity_, wrong_dtype, target) \
+  do                                                                                \
+  {                                                                                 \
+    graph g;                                                                        \
+    g.init();                                                                       \
+    auto node = loco::must_cast<luci::CircleNode *>(target);                        \
+    run_phase(g.g(), type, granularity_);                                           \
+    auto after_node = loco::must_cast<luci::CircleNode *>(target);                  \
+    after_node->dtype(wrong_dtype);                                                 \
+    auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();           \
+    {                                                                               \
+      ctx->output_model_dtype = type;                                               \
+      ctx->granularity = granularity_;                                              \
+      ctx->input_types = {type};                                                    \
+      ctx->output_types = {type};                                                   \
+    }                                                                               \
+    luci::QuantizedModelVerifier verifier(std::move(ctx));                          \
+    EXPECT_ANY_THROW(verifier.verify(g.g()));                                       \
+  } while (0)
+
+// Quantize and verify with wrong granularity
+// Users can specify the test target
+#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity_, target) \
+  do                                                                          \
+  {                                                                           \
+    graph g;                                                                  \
+    g.init();                                                                 \
+    auto node = loco::must_cast<luci::CircleNode *>(target);                  \
+    run_phase(g.g(), type, granularity_);                                     \
+    auto after_node = loco::must_cast<luci::CircleNode *>(target);            \
+    insert_scale_zp(after_node, 1.0, 1);                                      \
+    auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();     \
+    {                                                                         \
+      ctx->output_model_dtype = type;                                         \
+      ctx->granularity = granularity_;                                        \
+      ctx->input_types = {type};                                              \
+      ctx->output_types = {type};                                             \
+    }                                                                         \
+    luci::QuantizedModelVerifier verifier(std::move(ctx));                    \
+    EXPECT_ANY_THROW(verifier.verify(g.g()));                                 \
+  } while (0)
+
+// Test a local helper function
+TEST(QuantizedModelVerifierTest, LocalCreateDummyConst)
+{
+  loco::Graph g;
+
+  EXPECT_NO_THROW(create_dummy_const<Type::FLOAT32>(&g, {32, 32}));
+}
+
+TEST(QuantizedModelVerifierTest, LocalCreateConst)
+{
+  loco::Graph g;
+  std::initializer_list<float> values = {0.1, 0, -5, 100};
+  luci::CircleConst *node = create_const<Type::FLOAT32, float>(&g, {2, 2}, values);
+
+  uint32_t index = 0;
+  for (auto val : values)
+  {
+    EXPECT_EQ(node->at<Type::FLOAT32>(index++), val);
+  }
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm)
+{
+  TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, InstanceNorm_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LocalResponseNormalization)
+{
+  TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LocalResponseNormalization_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise,
+                       Type::S16);
+  TEST_WITH_WRONG_TYPE(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise,
+                       Type::S16);
+  TEST_WITH_WRONG_TYPE(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise,
+                       Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LocalResponseNormalization_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(LocalResponseNormalizationTestGraph, Type::U8,
+                              Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(LocalResponseNormalizationTestGraph, Type::U8,
+                              Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(LocalResponseNormalizationTestGraph, Type::S16,
+                              Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic)
+{
+  TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(LogisticTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Logistic_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax)
+{
+  TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Softmax_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND)
+{
+  TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToBatchND_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth)
+{
+  TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SpaceToDepth_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice)
+{
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise, Type::U8);
+
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Slice_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split)
+{
+  TEST_WITH_GRAPH(SplitTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SplitTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SplitTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SplitTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Split_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SplitTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SplitV)
+{
+  TEST_WITH_GRAPH(SplitVTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SplitVTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SplitVTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SplitV_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SplitV_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice)
+{
+  TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, StridedSlice_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sum)
+{
+  TEST_WITH_GRAPH(SumTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SumTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SumTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SumTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SumTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SumTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sum_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SumTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SumTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SumTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sum_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SumTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SumTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SumTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax)
+{
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ArgMaxTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax_wrong_input_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ArgMaxTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise, Type::U8);
+
+  TEST_WITH_WRONG_TYPE(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ArgMaxTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax_wrong_dimension_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE_TARGET(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::LayerWise,
+                              Type::S16, g.dimension());
+  TEST_WITH_WRONG_TYPE_TARGET(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise,
+                              Type::S16, g.dimension());
+  TEST_WITH_WRONG_TYPE_TARGET(ArgMaxTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise,
+                              Type::U8, g.dimension());
+
+  TEST_WITH_WRONG_TYPE_TARGET(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::LayerWise,
+                              Type::S16, g.dimension());
+  TEST_WITH_WRONG_TYPE_TARGET(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise,
+                              Type::S16, g.dimension());
+  TEST_WITH_WRONG_TYPE_TARGET(ArgMaxTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise,
+                              Type::U8, g.dimension());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ArgMax_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::LayerWise,
+                                     g.input_argmax());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ArgMaxTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise,
+                                     g.input_argmax());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ArgMaxTestGraph<Type::S32>, Type::S16,
+                                     Granularity::ChannelWise, g.input_argmax());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::LayerWise,
+                                     g.input_argmax());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ArgMaxTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise,
+                                     g.input_argmax());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ArgMaxTestGraph<Type::S64>, Type::S16,
+                                     Granularity::ChannelWise, g.input_argmax());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND)
+{
+  TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, BatchToSpaceND_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace)
+{
+  TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, DepthToSpace_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation)
+{
+  TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Concatenation_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LogicalOr)
+{
+  TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+                  Granularity::LayerWise);
+  TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+                  Granularity::ChannelWise);
+  TEST_WITH_GRAPH(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::S16,
+                  Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, LogicalOr_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+                       Granularity::LayerWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::U8,
+                       Granularity::ChannelWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(BinaryLogicalOpTestGraph<luci::CircleLogicalOr>, Type::S16,
+                       Granularity::ChannelWise, Type::S16);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape)
+{
+  TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ReshapeTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Reshape_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh)
+{
+  TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(TanhTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(TanhTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Tanh_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pack)
+{
+  TEST_WITH_GRAPH(PackTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(PackTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(PackTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(PackTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(PackTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(PackTestGraph, Type::S16, Granularity::ChannelWise);
+
+  // Test if Pack's qparam is propagated to the input
+  {
+    PackTestGraph g;
+    g.init();
+    quantize_and_verify(g.g(), Type::U8, Granularity::ChannelWise);
+    auto input = loco::must_cast<luci::CircleNode *>(g.pack()->values(0));
+    auto qp = input->quantparam();
+    EXPECT_FLOAT_EQ(2.0 / 255.0, qp->scale[0]);
+    EXPECT_FLOAT_EQ(128, qp->zerop[0]);
+  }
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pack_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(PackTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PackTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PackTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pack_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(PackTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(PackTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(PackTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad)
+{
+  TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(PadTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(PadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(PadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(PadTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(PadTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PadTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PadTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pad_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(PadTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, PadV2)
+{
+  TEST_WITH_GRAPH(PadV2TestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(PadV2TestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(PadV2TestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, PadV2_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(PadV2TestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PadV2TestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PadV2TestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, PadV2_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(PadV2TestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(PadV2TestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(PadV2TestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, MirrorPad)
+{
+  TEST_WITH_GRAPH(MirrorPadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, MirrorPad_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(MirrorPadTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, MirrorPad_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(MirrorPadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose)
+{
+  TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(TransposeTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Transpose_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor)
+{
+  TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(FloorTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(FloorTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Floor_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual)
+{
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                  Granularity::LayerWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                  Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+                  Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                       Granularity::LayerWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                       Granularity::ChannelWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+                       Granularity::ChannelWise, Type::S16);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, GreaterEqual_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                                     Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                                     Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+                                     Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                                     Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::U8,
+                                     Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreaterEqual>, Type::S16,
+                                     Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater)
+{
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8, Granularity::LayerWise,
+                       Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                       Granularity::ChannelWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+                       Granularity::ChannelWise, Type::S16);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Greater_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                                     Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                                     Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+                                     Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                                     Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::U8,
+                                     Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleGreater>, Type::S16,
+                                     Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual)
+{
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                       Granularity::LayerWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                       Granularity::ChannelWise, Type::U8);
+  TEST_WITH_WRONG_TYPE(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+                       Granularity::ChannelWise, Type::S16);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, NotEqual_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                                     Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                                     Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+                                     Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                                     Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::U8,
+                                     Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(ComparisonOpTestGraph<luci::CircleNotEqual>, Type::S16,
+                                     Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, OneHot)
+{
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, OneHot_wrong_input_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise, Type::U8);
+
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, OneHot_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div)
+{
+  TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(DivTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(DivTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(DivTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(DivTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(DivTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(DivTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(DivTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Div_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(DivTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv)
+{
+  TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(FloorDivTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, FloorDiv_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(FloorDivTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt)
+{
+  TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(RsqrtTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Rsqrt_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt)
+{
+  TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SqrtTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Sqrt_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu)
+{
+  TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(EluTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(EluTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(EluTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(EluTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(EluTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(EluTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(EluTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Elu_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(EluTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow)
+{
+  TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(PowTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(PowTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(PowTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(PowTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(PowTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PowTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(PowTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Pow_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(PowTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ReduceMax)
+{
+  TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ReduceMax_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ReduceMax_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear)
+{
+  TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeBilinear_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeNearestNeighbor)
+{
+  TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeNearestNeighbor_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ResizeNearestNeighborTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ResizeNearestNeighborTestGraph, Type::U8, Granularity::ChannelWise,
+                       Type::S16);
+  TEST_WITH_WRONG_TYPE(ResizeNearestNeighborTestGraph, Type::S16, Granularity::ChannelWise,
+                       Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ResizeNearestNeighbor_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ResizeNearestNeighborTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ResizeNearestNeighborTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ResizeNearestNeighborTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Unpack)
+{
+  TEST_WITH_GRAPH(UnpackTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(UnpackTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(UnpackTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Unpack_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(UnpackTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(UnpackTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(UnpackTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Unpack_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(UnpackTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(UnpackTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(UnpackTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Add)
+{
+  TEST_WITH_GRAPH(AddTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(AddTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(AddTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(AddTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(AddTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(AddTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Add_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(AddTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(AddTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(AddTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Add_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Add_inttype)
+{
+  // Tests for S32
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  // Tests for S64
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul)
+{
+  TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(MulTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(MulTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(MulTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(MulTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(MulTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(MulTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(MulTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul_inttype)
+{
+  // Tests for S32
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  // Tests for S64
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  SUCCEED();
+}
+
+// TODO Add following testcases
+//
+// CircleConv2D
+//
+// CircleDepthwiseConv2D
+//
+// CirclePRelu
+//
+// CircleTransposeConv
+//
+// CircleFullyConnected
+//
+// CircleAveragePool2D
+//
+// CircleMaxPool2D
+//
+// CircleMean
+//
+// CircleRelu
+//
+// CircleCast
+//
+
+#undef TEST_WITH_GRAPH
+#undef TEST_WITH_WRONG_TYPE
+#undef TEST_WITH_WRONG_GRANULARITY
diff --git a/compiler/luci/pass/src/RemoveDuplicateConstPass.cpp b/compiler/luci/pass/src/RemoveDuplicateConstPass.cpp
new file mode 100644
index 000000000..e50dda9e0
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveDuplicateConstPass.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveDuplicateConstPass.h"
+
+#include <luci/Log.h>
+
+namespace
+{
+
+bool compare_quant_params(luci::CircleConst *left, luci::CircleConst *right)
+{
+  const auto left_quant_param = left->quantparam();
+  const auto right_quant_param = right->quantparam();
+
+  if (left_quant_param == right_quant_param)
+    return true;
+
+  if (left_quant_param != nullptr and right_quant_param != nullptr)
+  {
+    if (left_quant_param->scale == right_quant_param->scale and
+        left_quant_param->quantized_dimension == right_quant_param->quantized_dimension and
+        left_quant_param->zerop == right_quant_param->zerop and
+        left_quant_param->min == right_quant_param->min and
+        left_quant_param->max == right_quant_param->max)
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool compare_dim_values(luci::CircleConst *left, luci::CircleConst *right)
+{
+  const auto left_rank = left->rank();
+  const auto right_rank = right->rank();
+
+  if (left_rank != right_rank)
+    return false;
+
+  for (uint32_t i = 0; i < left_rank; ++i)
+  {
+    if (left->dim(i).value() != right->dim(i).value())
+      return false;
+  }
+
+  return true;
+}
+
+template <loco::DataType DT> bool is_equal_consts(luci::CircleConst *left, luci::CircleConst *right)
+{
+  if (not compare_quant_params(left, right))
+    return false;
+
+  if (not compare_dim_values(left, right))
+    return false;
+
+  for (uint32_t i = 0; i < left->size<DT>(); ++i)
+  {
+    if (left->at<DT>(i) != right->at<DT>(i))
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveDuplicateConstPass::remove_duplicate_const()
+{
+  bool changed = false;
+
+  for (auto &cur_pair : _sum_to_const)
+  {
+    // if single const - continue
+    if (cur_pair.second.size() == 1)
+      continue;
+
+    for (auto reference_const : cur_pair.second)
+    {
+      if (reference_const == nullptr)
+        continue;
+
+      for (uint32_t i = 0; i < cur_pair.second.size(); ++i)
+      {
+        auto cur_const = cur_pair.second.at(i);
+        if (cur_const == nullptr or cur_const == reference_const)
+          continue;
+
+        if (cur_const->dtype() != reference_const->dtype())
+          continue;
+
+        bool is_equal = false;
+
+        switch (cur_const->dtype())
+        {
+          case loco::DataType::FLOAT32:
+            is_equal = is_equal_consts<loco::DataType::FLOAT32>(reference_const, cur_const);
+            break;
+          case loco::DataType::S32:
+            is_equal = is_equal_consts<loco::DataType::S32>(reference_const, cur_const);
+            break;
+          case loco::DataType::S16:
+            is_equal = is_equal_consts<loco::DataType::S16>(reference_const, cur_const);
+            break;
+          case loco::DataType::S8:
+            is_equal = is_equal_consts<loco::DataType::S8>(reference_const, cur_const);
+            break;
+          case loco::DataType::U8:
+            is_equal = is_equal_consts<loco::DataType::U8>(reference_const, cur_const);
+            break;
+          default:
+            continue;
+        }
+
+        if (not is_equal)
+          continue;
+
+        loco::replace(cur_const).with(reference_const);
+
+        // Remove from next checking
+        cur_pair.second[i] = nullptr;
+
+        changed = true;
+      }
+    }
+  }
+
+  return changed;
+}
+
+template <loco::DataType DT>
+void RemoveDuplicateConstPass::add_to_map(luci::CircleConst *const_node)
+{
+  const auto const_size = const_node->size<DT>();
+  float sum = 0.0;
+
+  for (uint32_t i = 0; i < const_size; ++i)
+  {
+    sum += const_node->at<DT>(i);
+  }
+
+  if (_sum_to_const.find(sum) == _sum_to_const.end())
+  {
+    _sum_to_const[sum] = {const_node};
+  }
+  else
+  {
+    _sum_to_const.at(sum).push_back(const_node);
+  }
+}
+
+/**
+ * Remove duplicate Const nodes.
+ *
+ * BEFORE
+ *    [CircleNode]   [CircleConst]
+ *          |        /
+ *          |      /
+ *    [CircleNode]    [CircleConst]
+ *          |        /
+ *          |      /
+ *    [CircleNode]
+ *
+ * AFTER
+ *
+ *    [CircleNode]   [CircleConst]
+ *          |        /     /
+ *          |      /     /
+ *    [CircleNode]     /
+ *          |        /
+ *          |      /
+ *    [CircleNode]
+ *
+ */
+bool RemoveDuplicateConstPass::run(loco::Graph *g)
+{
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto const_node = dynamic_cast<luci::CircleConst *>(node);
+    if (const_node == nullptr)
+      continue;
+
+    switch (const_node->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        add_to_map<loco::DataType::FLOAT32>(const_node);
+        break;
+      case loco::DataType::S32:
+        add_to_map<loco::DataType::S32>(const_node);
+        break;
+      case loco::DataType::S16:
+        add_to_map<loco::DataType::S16>(const_node);
+        break;
+      case loco::DataType::S8:
+        add_to_map<loco::DataType::S8>(const_node);
+        break;
+      case loco::DataType::U8:
+        add_to_map<loco::DataType::U8>(const_node);
+        break;
+      default:
+        continue;
+    }
+  }
+
+  return remove_duplicate_const();
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveDuplicateConstPass.test.cpp b/compiler/luci/pass/src/RemoveDuplicateConstPass.test.cpp
new file mode 100644
index 000000000..5052a3e01
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveDuplicateConstPass.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveDuplicateConstPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace luci::test;
+
+class DuplicateConstsGraphlet
+{
+public:
+  DuplicateConstsGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, bool is_duplicate)
+  {
+    _reshape_shape = g->nodes()->create<luci::CircleConst>();
+    _reshape_shape->rank(1);
+    _reshape_shape->dim(0).set(1);
+    _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+    _reshape_shape->dtype(loco::DataType::S32);
+
+    _reshape_shape->size<loco::DataType::S32>(1);
+    _reshape_shape->at<loco::DataType::S32>(0) = 5;
+    _reshape_shape->name("reshape_shape_1");
+
+    _reshape_shape_duplicate = g->nodes()->create<luci::CircleConst>();
+    _reshape_shape_duplicate->rank(1);
+    _reshape_shape_duplicate->dim(0).set(1);
+    _reshape_shape_duplicate->shape_status(luci::ShapeStatus::VALID);
+    _reshape_shape_duplicate->dtype(loco::DataType::S32);
+    if (is_duplicate)
+    {
+      _reshape_shape_duplicate->size<loco::DataType::S32>(1);
+      _reshape_shape_duplicate->at<loco::DataType::S32>(0) = 5;
+    }
+    else
+    {
+      _reshape_shape_duplicate->size<loco::DataType::S32>(2);
+      _reshape_shape_duplicate->at<loco::DataType::S32>(0) = 1;
+      _reshape_shape_duplicate->at<loco::DataType::S32>(1) = 5;
+    }
+    _reshape_shape_duplicate->name("reshape_shape_2");
+
+    _reshape_f = g->nodes()->create<luci::CircleReshape>();
+    _reshape_f->newShape()->rank(1);
+    _reshape_f->newShape()->dim(0) = 5;
+    _reshape_f->name("reshape_f");
+
+    _reshape_s = g->nodes()->create<luci::CircleReshape>();
+    if (is_duplicate)
+    {
+      _reshape_s->newShape()->rank(1);
+      _reshape_s->newShape()->dim(0) = 5;
+    }
+    else
+    {
+      _reshape_s->newShape()->rank(2);
+      _reshape_s->newShape()->dim(0) = 1;
+      _reshape_s->newShape()->dim(1) = 5;
+    }
+    _reshape_s->name("reshape_s");
+  }
+
+protected:
+  luci::CircleReshape *_reshape_f = nullptr;
+  luci::CircleReshape *_reshape_s = nullptr;
+  luci::CircleConst *_reshape_shape = nullptr;
+  luci::CircleConst *_reshape_shape_duplicate = nullptr;
+};
+
+class DuplicateConstsGraph : public TestIOGraph, public DuplicateConstsGraphlet
+{
+public:
+  DuplicateConstsGraph() = default;
+
+public:
+  void init(const ShapeU32 in_shape, const ShapeU32 out_shape, bool is_duplicate)
+  {
+    TestIOGraph::init(in_shape, out_shape);
+
+    DuplicateConstsGraphlet::init(g(), is_duplicate);
+
+    // connect graph
+    _reshape_f->tensor(input());
+    _reshape_f->shape(_reshape_shape);
+
+    _reshape_s->tensor(_reshape_f);
+    _reshape_s->shape(_reshape_shape_duplicate);
+
+    output()->from(_reshape_s);
+  }
+};
+} // namespace
+
+TEST(RemoveDuplicateConstPass, name)
+{
+  luci::RemoveDuplicateConstPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDuplicateConstPass, remove_duplicate)
+{
+  DuplicateConstsGraph g;
+  g.init({1, 5}, {5}, true);
+
+  luci::RemoveDuplicateConstPass pass;
+  while (pass.run(g.g()))
+    ;
+
+  uint32_t const_num = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    auto target_node = dynamic_cast<luci::CircleConst *>(node);
+    if (target_node != nullptr)
+      const_num++;
+  }
+
+  ASSERT_EQ(const_num, 1);
+}
+
+TEST(RemoveDuplicateConstPass, remove_duplicate_NEG)
+{
+  DuplicateConstsGraph g;
+  g.init({1, 5}, {1, 5}, false);
+
+  luci::RemoveDuplicateConstPass pass;
+  while (pass.run(g.g()))
+    ;
+
+  uint32_t const_num = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    auto target_node = dynamic_cast<luci::CircleConst *>(node);
+    if (target_node != nullptr)
+      const_num++;
+  }
+
+  ASSERT_EQ(const_num, 2);
+}
diff --git a/compiler/luci/pass/src/RemoveFakeQuantPass.cpp b/compiler/luci/pass/src/RemoveFakeQuantPass.cpp
new file mode 100644
index 000000000..adc0aa480
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveFakeQuantPass.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveFakeQuantPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+void remove_fake_quant(luci::CircleFakeQuant *fakequant)
+{
+  assert(fakequant != nullptr);
+
+  auto input_node = loco::must_cast<luci::CircleNode *>(fakequant->inputs());
+
+  replace(fakequant).with(input_node);
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleFakeQuant]
+ *          |
+ *    [CircleNode]
+ *
+ * AFTER
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleNode]   [CircleFakeQuant]
+ *
+ * CircleFakeQuant OP will be removed from the output graph
+ */
+bool RemoveFakeQuantPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto target_node = dynamic_cast<luci::CircleFakeQuant *>(node);
+    if (target_node != nullptr)
+    {
+      remove_fake_quant(target_node);
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveFakeQuantPass.test.cpp b/compiler/luci/pass/src/RemoveFakeQuantPass.test.cpp
new file mode 100644
index 000000000..5e1d28d2b
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveFakeQuantPass.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveFakeQuantPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class FakeQuantGraphlet
+{
+public:
+  FakeQuantGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    _fq = g->nodes()->create<luci::CircleFakeQuant>();
+    _fq->name("fq");
+  }
+
+protected:
+  luci::CircleFakeQuant *_fq = nullptr;
+};
+
+class FakeQuantGraph : public TestIOGraph, public FakeQuantGraphlet
+{
+public:
+  FakeQuantGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    FakeQuantGraphlet::init(g());
+
+    _fq->inputs(input());
+
+    output()->from(_fq);
+  }
+};
+
+} // namespace
+
+TEST(RemoveFakeQuantPass, name)
+{
+  luci::RemoveFakeQuantPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveFakeQuantPass, remove_fakequant)
+{
+  FakeQuantGraph g;
+  luci::RemoveFakeQuantPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  auto *node1 = loco::must_cast<luci::CircleNode *>(g.output()->from());
+  auto *node2 = loco::must_cast<luci::CircleNode *>(g.input());
+  EXPECT_EQ(node1, node2);
+}
diff --git a/compiler/luci/pass/src/RemoveQuantDequantSeqPass.cpp b/compiler/luci/pass/src/RemoveQuantDequantSeqPass.cpp
new file mode 100644
index 000000000..2a3157c45
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveQuantDequantSeqPass.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveQuantDequantSeqPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_quant_dequant(luci::CircleDequantize *dequant)
+{
+  assert(dequant != nullptr);
+
+  auto quantize = dynamic_cast<luci::CircleQuantize *>(dequant->input());
+  if (quantize == nullptr)
+    return false;
+
+  auto input_node = loco::must_cast<luci::CircleNode *>(quantize->input());
+
+  replace(dequant).with(input_node);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ *
+ *       [CircleNode]
+ *            |
+ *     [CircleQuantize]
+ *            |
+ *    [CircleDequantize]
+ *            |
+ *       [CircleNode]
+ *
+ * AFTER
+ *
+ *    [CircleNode]    [CircleQuantize]
+ *          |                |
+ *    [CircleNode]   [CircleDequantize]
+ *
+ * CircleQuant-CircleDequant sequance will be removed from the output graph
+ */
+bool RemoveQuantDequantSeqPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto target_node = dynamic_cast<luci::CircleDequantize *>(node);
+    if (target_node != nullptr)
+    {
+      if (remove_quant_dequant(target_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveQuantDequantSeqPass.test.cpp b/compiler/luci/pass/src/RemoveQuantDequantSeqPass.test.cpp
new file mode 100644
index 000000000..9186e56d7
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveQuantDequantSeqPass.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveQuantDequantSeqPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class QuantDequantGraphlet
+{
+public:
+  QuantDequantGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    _qu = g->nodes()->create<luci::CircleQuantize>();
+    _qu->name("qu");
+
+    _de = g->nodes()->create<luci::CircleDequantize>();
+    _de->name("de");
+  }
+
+protected:
+  luci::CircleQuantize *_qu = nullptr;
+  luci::CircleDequantize *_de = nullptr;
+};
+
+class QuantDequantGraph : public TestIOGraph, public QuantDequantGraphlet
+{
+public:
+  QuantDequantGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    QuantDequantGraphlet::init(g());
+
+    _qu->input(input());
+    _de->input(_qu);
+
+    output()->from(_de);
+  }
+};
+
+} // namespace
+
+TEST(RemoveQuantDequantSeqPass, name)
+{
+  luci::RemoveQuantDequantSeqPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveQuantDequantSeqPass, remove_quantdequant)
+{
+  QuantDequantGraph g;
+  luci::RemoveQuantDequantSeqPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  auto *node1 = loco::must_cast<luci::CircleNode *>(g.output()->from());
+  auto *node2 = loco::must_cast<luci::CircleNode *>(g.input());
+  EXPECT_EQ(node1, node2);
+}
diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp
new file mode 100644
index 000000000..66cd9d791
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_redundant_dequant(luci::CircleDequantize *dequant)
+{
+  assert(dequant != nullptr);
+
+  auto prev = loco::must_cast<luci::CircleNode *>(dequant->input());
+  if (prev->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  replace(dequant).with(prev);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * Dequantize Op does the below things on the ifm.
+ * 1. Element-wise update of quantized values (u8/s16) to fp32 values
+ * 2. Update dtype to fp32
+ * If the previous node is not quantized, dequantize Op is redundant.
+ *
+ * BEFORE
+ *
+ *     [CircleNode (A)]
+ *            |
+ *     [CircleNode (B)] (fp32)
+ *            |
+ *    [CircleDequantize]
+ *            |
+ *       [CircleNode]
+ *
+ * AFTER
+ *
+ *     [CircleNode (A)]
+ *            |
+ *     [CircleNode (B)] (fp32)
+ *            |
+ *       [CircleNode]
+ */
+bool RemoveRedundantDequantizePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto target_node = dynamic_cast<luci::CircleDequantize *>(node);
+    if (target_node != nullptr)
+    {
+      if (remove_redundant_dequant(target_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp
new file mode 100644
index 000000000..adb2f14a4
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class DequantizeGraphlet
+{
+public:
+  DequantizeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    _dequantize = g->nodes()->create<luci::CircleDequantize>();
+    _dequantize->dtype(loco::DataType::FLOAT32);
+    _dequantize->name("dequantize");
+  }
+
+protected:
+  luci::CircleDequantize *_dequantize = nullptr;
+};
+
+class RedundantDequantizeGraph : public TestIOGraph, public DequantizeGraphlet
+{
+public:
+  RedundantDequantizeGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    DequantizeGraphlet::init(g());
+
+    _dequantize->input(input());
+
+    output()->from(_dequantize);
+  }
+
+  void init_u8_input(void)
+  {
+    TestIOGraph::init({1}, {1});
+    DequantizeGraphlet::init(g());
+
+    // Use u8 input (dequantize is not redundant anymore)
+    input()->dtype(loco::DataType::U8);
+    {
+      auto qparam = std::make_unique<luci::CircleQuantParam>();
+      qparam->scale = {1};
+      qparam->zerop = {1};
+      input()->quantparam(std::move(qparam));
+    }
+
+    _dequantize->input(input());
+
+    output()->from(_dequantize);
+  }
+};
+
+} // namespace
+
+TEST(RemoveRedundantDequantizePass, single_redundant_dequantize)
+{
+  RedundantDequantizeGraph g;
+  luci::RemoveRedundantDequantizePass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    if (dynamic_cast<luci::CircleDequantize *>(node))
+    {
+      count++;
+    }
+  }
+
+  ASSERT_EQ(0, count);
+}
+
+TEST(RemoveRedundantDequantizePass, wrong_dtype_NEG)
+{
+  RedundantDequantizeGraph g;
+  luci::RemoveRedundantDequantizePass pass;
+
+  g.init_u8_input();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp
new file mode 100644
index 000000000..8a10ad4a0
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+
+#include <luci/IR/CircleNode.h>
+
+/**
+ *  Remove redundant quantize operations. For subsequent Quantize Ops,
+ *  only the last Quantize Op is valid, so we can remove the rest of the Quantize Op.
+ *
+ *  BEFORE
+ *                                          [CircleNode_1]
+ *                                                |
+ *                             [CircleQuantize, dtype_1, scale_1, zero_point_1]
+ *                                                |
+ *                             [CircleQuantize, dtype_2, scale_2, zero_point_2]
+ *                                                |
+ *                                         [CircleNode_2]
+ *
+ *  AFTER
+ *                                          [CircleNode_1]
+ *                                         /              \
+ *                                      /                    \
+ *                                   /                          \
+ *                                /                                \
+ *                             /                                      \
+ * [CircleQuantize, dtype_2, scale_2, zero_point_2] [CircleQuantize, dtype_1, scale_1, zero_point_1]
+ *                          |
+ *                   [CircleNode_2]
+ *
+ */
+
+namespace
+{
+
+bool remove_redundant_quantize(luci::CircleQuantize *node)
+{
+  auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+
+  if (node->quantparam() == nullptr or pred_node->quantparam() == nullptr)
+    return false;
+
+  if (node->quantparam()->scale.size() != 1 or node->quantparam()->zerop.size() != 1 or
+      pred_node->quantparam()->scale.size() != 1 or pred_node->quantparam()->zerop.size() != 1)
+  {
+    return false;
+  }
+
+  if (node->dtype() != pred_node->dtype() or
+      pred_node->quantparam()->scale.at(0) != node->quantparam()->scale.at(0) or
+      pred_node->quantparam()->zerop.at(0) != node->quantparam()->zerop.at(0))
+  {
+    return false;
+  }
+
+  replace(node).with(pred_node);
+
+  return true;
+}
+
+bool remove_redundant_subsequent_quantize(luci::CircleQuantize *node)
+{
+  auto pred_node = dynamic_cast<luci::CircleQuantize *>(node->input());
+  if (pred_node == nullptr)
+    return remove_redundant_quantize(node);
+
+  node->input(pred_node->input());
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveRedundantQuantizePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+  {
+    if (auto quantize_node = dynamic_cast<luci::CircleQuantize *>(node))
+    {
+      if (remove_redundant_subsequent_quantize(quantize_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp
new file mode 100644
index 000000000..d0166bd20
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class QuantizeGraphlet
+{
+public:
+  QuantizeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    _first_quantize = g->nodes()->create<luci::CircleQuantize>();
+    _first_quantize->dtype(loco::DataType::U8);
+    {
+      auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+      quantize_param->scale = {0.5};
+      quantize_param->zerop = {0};
+      _first_quantize->quantparam(std::move(quantize_param));
+    }
+    _first_quantize->name("first_quantize");
+
+    _second_quantize = g->nodes()->create<luci::CircleQuantize>();
+    _second_quantize->dtype(loco::DataType::U8);
+    {
+      auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+      quantize_param->scale = {0.5};
+      quantize_param->zerop = {0};
+      _second_quantize->quantparam(std::move(quantize_param));
+    }
+    _second_quantize->name("second_quantize");
+  }
+
+protected:
+  luci::CircleQuantize *_first_quantize = nullptr;
+  luci::CircleQuantize *_second_quantize = nullptr;
+};
+
+class RedundantSubsequentQuantizeGraph : public TestIOGraph, public QuantizeGraphlet
+{
+public:
+  RedundantSubsequentQuantizeGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    QuantizeGraphlet::init(g());
+
+    input()->dtype(loco::DataType::U8);
+    {
+      auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+      quantize_param->scale = {1};
+      quantize_param->zerop = {1};
+      input()->quantparam(std::move(quantize_param));
+    }
+
+    _first_quantize->input(input());
+    _second_quantize->input(_first_quantize);
+
+    output()->from(_second_quantize);
+    output()->dtype(loco::DataType::U8);
+  }
+};
+
+class RedundantQuantizeGraph : public TestIOGraph, public QuantizeGraphlet
+{
+public:
+  RedundantQuantizeGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    QuantizeGraphlet::init(g());
+
+    input()->dtype(loco::DataType::U8);
+    {
+      auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+      quantize_param->scale = {0.5};
+      quantize_param->zerop = {0};
+      input()->quantparam(std::move(quantize_param));
+    }
+
+    _first_quantize->input(input());
+
+    output()->from(_first_quantize);
+    output()->dtype(loco::DataType::U8);
+  }
+};
+
+} // namespace
+
+TEST(RemoveRedundantQuantizePass, name)
+{
+  luci::RemoveRedundantQuantizePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveRedundantQuantizePass, remove_subsequent_quantize)
+{
+  RedundantSubsequentQuantizeGraph g;
+  luci::RemoveRedundantQuantizePass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    if (dynamic_cast<luci::CircleQuantize *>(node))
+    {
+      count++;
+    }
+  }
+
+  ASSERT_EQ(1, count);
+}
+
+TEST(RemoveRedundantQuantizePass, remove_quantize)
+{
+  RedundantQuantizeGraph g;
+  luci::RemoveRedundantQuantizePass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    if (dynamic_cast<luci::CircleQuantize *>(node))
+    {
+      count++;
+    }
+  }
+
+  ASSERT_EQ(0, count);
+}
diff --git a/compiler/luci/pass/src/RemoveRedundantReshape.cpp b/compiler/luci/pass/src/RemoveRedundantReshape.cpp
new file mode 100644
index 000000000..2f0b22ae6
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantReshape.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_redundant_reshape(luci::CircleReshape *node)
+{
+  auto pred_node = dynamic_cast<luci::CircleReshape *>(node->tensor());
+  if (pred_node == nullptr)
+    return false;
+
+  node->tensor(pred_node->tensor());
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ *      [CircleNode]
+ *            |
+ *    [CircleReshape_1]
+ *            |
+ *    [CircleReshape_2]
+ *            |
+ *      [CircleNode]
+ *
+ * AFTER
+ *
+ *                [CircleNode]
+ *                /          \
+ *    [CircleReshape_1]  [CircleReshape_2]
+ *                               |
+ *                         [CircleNode]
+ **/
+bool RemoveRedundantReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      if (remove_redundant_reshape(reshape_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantReshape.test.cpp b/compiler/luci/pass/src/RemoveRedundantReshape.test.cpp
new file mode 100644
index 000000000..617840f3a
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantReshape.test.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveRedundantReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class RemoveRedundantReshape : public ::testing::Test
+{
+public:
+  RemoveRedundantReshape() {}
+
+  void createReshapeConst(luci::CircleReshape *target, const std::vector<int32_t> shape)
+  {
+    auto shape_const = g.nodes()->create<luci::CircleConst>();
+    shape_const->dtype(loco::DataType::S32);
+    shape_const->size<loco::DataType::S32>(shape.size());
+    shape_const->shape_status(luci::ShapeStatus::VALID);
+    shape_const->rank(1);
+    shape_const->dim(0).set(shape.size());
+    for (int32_t i = 0; i < shape.size(); i++)
+    {
+      shape_const->at<loco::DataType::S32>(i) = shape.at(i);
+    }
+    shape_const->name("shape_const");
+    target->shape(shape_const);
+  }
+
+  void buildGraph(const std::initializer_list<uint32_t> base_shape,
+                  const std::vector<int32_t> first_shape, const std::vector<int32_t> second_shape)
+  {
+    // Input Create.
+    input = g.nodes()->create<luci::CircleInput>();
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    input->shape_status(luci::ShapeStatus::VALID);
+    input->rank(base_shape.size());
+    input->shape(base_shape);
+    input->name("input");
+
+    // Create first reshape.
+    first_reshape = g.nodes()->create<luci::CircleReshape>();
+    first_reshape->tensor(input);
+    first_reshape->name("Reshape");
+    createReshapeConst(first_reshape, first_shape);
+
+    // Create second reshape.
+    second_reshape = g.nodes()->create<luci::CircleReshape>();
+    second_reshape->tensor(first_reshape);
+    second_reshape->name("second_reshape");
+    createReshapeConst(second_reshape, second_shape);
+
+    // Output Connect.
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->from(second_reshape);
+    output->name("output");
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleReshape *first_reshape = nullptr;
+  luci::CircleReshape *second_reshape = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(RemoveRedundantReshapePassTest, name)
+{
+  luci::RemoveRedundantReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(RemoveRedundantReshape, simple_case)
+{
+  buildGraph({4, 6}, {-1, 4, 6}, {1, -1, 2, 3});
+  luci::RemoveRedundantReshapePass pass;
+  while (pass.run(&g))
+    ;
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(&g)))
+  {
+    if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      count++;
+    }
+  }
+  ASSERT_EQ(1, count);
+}
diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp
new file mode 100644
index 000000000..75cf72795
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/// @brief Return true if first_perm[second_perm[i]] == i
+bool check_perm(const luci::CircleConst *first_perm, const luci::CircleConst *second_perm)
+{
+  assert(first_perm->rank() == 1);
+  assert(second_perm->rank() == 1);
+  assert(second_perm->size<loco::DataType::S32>() == first_perm->size<loco::DataType::S32>());
+  for (int32_t i = 0; i < static_cast<int32_t>(first_perm->size<loco::DataType::S32>()); i++)
+  {
+    if (first_perm->at<loco::DataType::S32>(second_perm->at<loco::DataType::S32>(i)) != i)
+      return false;
+  }
+  return true;
+}
+
+bool remove_consecutive_transpose_function(luci::CircleTranspose *target_node)
+{
+  auto pred_node = dynamic_cast<luci::CircleTranspose *>(target_node->a());
+  if (pred_node == nullptr)
+    return false;
+
+  auto target_perm = dynamic_cast<luci::CircleConst *>(target_node->perm());
+  if (target_perm == nullptr)
+    return false;
+
+  auto pred_perm = dynamic_cast<luci::CircleConst *>(pred_node->perm());
+  if (pred_perm == nullptr)
+    return false;
+
+  auto main_node = loco::must_cast<luci::CircleNode *>(pred_node->a());
+  if (check_perm(target_perm, pred_perm))
+  {
+    replace(target_node).with(main_node);
+  }
+  else
+  {
+    auto name = target_node->name();
+    assert(name.length() > 0);
+
+    auto g = pred_perm->graph();
+    auto new_const_node = g->nodes()->create<luci::CircleConst>();
+
+    new_const_node->dtype(loco::DataType::S32);
+    new_const_node->rank(1);
+    new_const_node->dim(0) = pred_perm->dim(0);
+    new_const_node->size<loco::DataType::S32>(pred_perm->dim(0).value());
+    new_const_node->shape_status(luci::ShapeStatus::VALID);
+    for (uint32_t i = 0; i < pred_perm->size<loco::DataType::S32>(); i++)
+    {
+      new_const_node->at<loco::DataType::S32>(i) =
+        pred_perm->at<loco::DataType::S32>(target_perm->at<loco::DataType::S32>(i));
+    }
+    new_const_node->name(name + "/Transpose/perm");
+
+    // Create New Transpose Node
+    auto new_transpose_node = g->nodes()->create<luci::CircleTranspose>();
+    new_transpose_node->dtype(target_node->dtype());
+    new_transpose_node->a(main_node);
+    new_transpose_node->perm(new_const_node);
+    new_transpose_node->name(name + "/Transpose");
+    luci::add_origin(new_transpose_node, luci::get_origin(target_node));
+
+    replace(target_node).with(new_transpose_node);
+  }
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ *  BEFORE
+ *         |
+ *   [CircleNode]     [CircleConst]
+ *         |           (pred_perm)
+ *          \              /
+ *         [CircleTranspose]  [CircleConst]
+ *            (pred_node)     (target_perm)
+ *                 \               /
+ *                 [CircleTranspose]
+ *                   (target_node)
+ *                         |
+ *
+ *  AFTER
+ *          |                                     |
+ *    [CircleNode]  [CircleConst](new)            |
+ *           \           /               or  [CircleNode]
+ *           [CircleTranspose](new)               |
+ *                   |                            |
+ */
+bool RemoveRedundantTransposePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto transpose = dynamic_cast<luci::CircleTranspose *>(node))
+    {
+      if (remove_consecutive_transpose_function(transpose))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp
new file mode 100644
index 000000000..bb8e292d4
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void setValue(luci::CircleConst *node, const std::vector<int> &v)
+{
+  node->dtype(loco::DataType::S32);
+  node->size<loco::DataType::S32>(v.size());
+  node->rank(1);
+  node->dim(0).set(v.size());
+  for (int i = 0; i < v.size(); ++i)
+  {
+    node->at<loco::DataType::S32>(i) = v[i];
+  }
+}
+
+/**
+ *  Remove for consecutive Transpose
+ *
+ *  Type1: Remove both Transpose
+ *     BEFORE
+ *            |
+ *      [CircleNode]     [CircleConst]
+ *              \              /
+ *              [CircleTranspose]  [CircleConst]
+ *                      \              /
+ *                      [CircleTranspose]
+ *                              |
+ *
+ *     AFTER
+ *            |
+ *      [CircleNode]
+ *            |
+ *
+ * --------------------------------------------
+ *
+ *  Type2: Merge to one Transpose
+ *     BEFORE
+ *            |
+ *      [CircleNode]     [CircleConst]
+ *              \              /
+ *              [CircleTranspose]  [CircleConst]
+ *                      \               /
+ *                      [CircleTranspose]
+ *                              |
+ *
+ *     AFTER
+ *             |
+ *       [CircleNode]      [CircleConst]
+ *              \               /
+ *              [CircleTranspose]
+ *                      |
+ *
+ */
+void create_redundunt_transpose(loco::Graph *g, const std::vector<int32_t> &perm1,
+                                const std::vector<int32_t> &perm2)
+{
+  assert(g);
+
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto graph_input = g->inputs()->create();
+  input->index(graph_input->index());
+  input->name("input");
+
+  // Create perm1
+  auto perm1_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm1_node, perm1);
+  perm1_node->name("perm1_node");
+
+  auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+  transpose1->dtype(loco::DataType::FLOAT32);
+  transpose1->a(input);
+  transpose1->perm(perm1_node);
+  transpose1->name("transpose1");
+
+  // Create perm2
+  auto perm2_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm2_node, perm2);
+  perm2_node->name("perm2_node");
+
+  auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+  transpose2->dtype(loco::DataType::FLOAT32);
+  transpose2->a(transpose1);
+  transpose2->perm(perm2_node);
+  transpose2->name("transpose2");
+
+  // Output
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  output->from(transpose2);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  output->name("output");
+}
+
+/**
+ * Remove for consecutive Transposes with branching
+ *
+ *  BEFORE
+ *               |
+ *          [CircleNode]       [CircleConst]
+ *                    \           /
+ *     [CircleConst] [CircleTranspose] [CircleConst]
+ *             \          / \              /
+ *        [CircleTranspose] [CircleTranspose]
+ *               |                |
+ *          [CircleNode]     [CircleNode]
+ *               |                |
+ *
+ *  AFTER
+ *   Type 1: Remove all Transpose
+ *                 |
+ *            [CircleNode]
+ *               /    \
+ *      [CircleNode] [CircleNode]
+ *           |            |
+ *
+ *   Type 2: Remove both for one side and create new for another side
+ *                |
+ *          [CircleNode]      [CircleConst](new)
+ *              /  \               /
+ *             /    [CircleTranspose](new)
+ *            |            |
+ *     [CircleNode]   [CircleNode]
+ *            |            |
+ */
+void create_redundunt_transpose_with_branch(loco::Graph *g, const std::vector<int32_t> &perm1,
+                                            const std::vector<int32_t> &perm2,
+                                            const std::vector<int32_t> &perm3)
+{
+  assert(g);
+
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto graph_input = g->inputs()->create();
+  input->dtype(loco::DataType::FLOAT32);
+  input->index(graph_input->index());
+  input->name("input");
+  graph_input->dtype(loco::DataType::FLOAT32);
+
+  graph_input->shape({4, 4, 4, 4});
+  input->shape({4, 4, 4, 4});
+
+  // Create perm1
+  auto perm1_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm1_node, perm1);
+  perm1_node->name("perm1_node");
+
+  auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+  transpose1->dtype(loco::DataType::FLOAT32);
+  transpose1->a(input);
+  transpose1->perm(perm1_node);
+  transpose1->name("transpose1");
+
+  // Create perm2
+  auto perm2_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm2_node, perm2);
+  perm2_node->name("perm2_node");
+
+  auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+  transpose2->dtype(loco::DataType::FLOAT32);
+  transpose2->a(transpose1);
+  transpose2->perm(perm2_node);
+  transpose2->name("transpose2");
+
+  // create perm3
+  auto perm3_node = g->nodes()->create<luci::CircleConst>();
+  setValue(perm3_node, perm3);
+  perm3_node->name("perm3_node");
+
+  auto transpose3 = g->nodes()->create<luci::CircleTranspose>();
+  transpose3->dtype(loco::DataType::FLOAT32);
+  transpose3->a(transpose1);
+  transpose3->perm(perm3_node);
+  transpose3->name("transpose3");
+
+  // Output
+  auto output1 = g->nodes()->create<luci::CircleOutput>();
+  output1->from(transpose2);
+  output1->name("output1");
+  auto output2 = g->nodes()->create<luci::CircleOutput>();
+  output2->from(transpose3);
+  output2->name("output2");
+  auto graph_output1 = g->outputs()->create();
+  output1->index(graph_output1->index());
+  auto graph_output2 = g->outputs()->create();
+  output2->index(graph_output2->index());
+  output1->dtype(loco::DataType::FLOAT32);
+  output2->dtype(loco::DataType::FLOAT32);
+  graph_output1->dtype(loco::DataType::FLOAT32);
+  graph_output2->dtype(loco::DataType::FLOAT32);
+  output1->shape({4, 4, 4, 4});
+  output2->shape({4, 4, 4, 4});
+  graph_output1->shape({4, 4, 4, 4});
+  graph_output2->shape({4, 4, 4, 4});
+}
+
+} // namespace
+
+TEST(RemoveRedundantTransposePassTest, name)
+{
+  luci::RemoveRedundantTransposePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type1)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  // No transpose node is in graph.
+  ASSERT_EQ(nullptr, transpose_node);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose(graph.get(), {0, 1, 3, 2}, {1, 0, 2, 3});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  // Just one transpose node, with updated perm constant.
+  ASSERT_NE(nullptr, transpose_node);
+  auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+  ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
+  ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
+  ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
+  ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type3)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose(graph.get(), {0, 3, 2, 1}, {0, 2, 3, 1});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  ASSERT_NE(nullptr, transpose_node);
+  auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+  ASSERT_EQ(0, perm->at<loco::DataType::S32>(0));
+  ASSERT_EQ(2, perm->at<loco::DataType::S32>(1));
+  ASSERT_EQ(1, perm->at<loco::DataType::S32>(2));
+  ASSERT_EQ(3, perm->at<loco::DataType::S32>(3));
+}
+
+/**
+ * @brief Test case that first transpose output become input of operations more than one.
+ */
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_with_branch_remove_case)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose_with_branch(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3}, {1, 0, 2, 3});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  // No transpose node is in graph.
+  ASSERT_EQ(nullptr, transpose_node);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_with_branch_leave_one)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose_with_branch(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  ASSERT_NE(nullptr, transpose_node);
+  auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+  ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
+  ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
+  ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
+  ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp
new file mode 100644
index 000000000..476ec68bf
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool acceptable_intermediate_op(const loco::Node *node)
+{
+  if (not node)
+    return false;
+
+  const auto opcode = loco::must_cast<const luci::CircleNode *>(node)->opcode();
+
+  switch (opcode)
+  {
+    case luci::CircleOpcode::ADD:
+    case luci::CircleOpcode::MUL:
+    case luci::CircleOpcode::TANH:
+    case luci::CircleOpcode::LOGISTIC:
+      break;
+
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+bool same_shape(const loco::Node *a, const loco::Node *b)
+{
+  auto a_cnode = loco::must_cast<const luci::CircleNode *>(a);
+  auto b_cnode = loco::must_cast<const luci::CircleNode *>(b);
+
+  if (a_cnode->rank() != b_cnode->rank())
+    return false;
+
+  for (uint32_t i = 0; i < a_cnode->rank(); i++)
+  {
+    if (not(a_cnode->dim(i) == b_cnode->dim(i)))
+      return false;
+  }
+  return true;
+}
+
+class PreReshapeFinder
+{
+public:
+  PreReshapeFinder(const luci::CircleReshape *post_reshape) : _post_reshape(post_reshape)
+  {
+    assert(post_reshape != nullptr); // FIX_CALLER_UNLESS
+  }
+
+public:
+  // Return true if pre_reshapes are found
+  bool collect_pre_reshapes(loco::Node *node)
+  {
+    // TODO Support diamond case
+    if (loco::succs(node).size() != 1)
+      return false;
+
+    if (auto pre_reshape = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      // Check ifm of pre-reshape and ofm of post_reshape
+      if (not same_shape(pre_reshape->tensor(), _post_reshape))
+        return false;
+
+      // Check ofm of pre-reshape and ifm of post_reshape
+      if (not same_shape(pre_reshape, _post_reshape->tensor()))
+        return false;
+
+      _pre_reshapes.emplace_back(pre_reshape);
+      return true;
+    }
+
+    if (not acceptable_intermediate_op(node))
+      return false;
+
+    for (uint32_t i = 0; i < node->arity(); i++)
+    {
+      if (not collect_pre_reshapes(node->arg(i)))
+        return false;
+    }
+
+    return true;
+  }
+
+public:
+  std::vector<luci::CircleReshape *> pre_reshapes(void) const { return _pre_reshapes; }
+
+private:
+  const luci::CircleReshape *_post_reshape = nullptr;
+  std::vector<luci::CircleReshape *> _pre_reshapes;
+};
+
+bool remove_unnecessary_reshape_net(luci::CircleReshape *reshape)
+{
+  PreReshapeFinder finder(reshape);
+  if (not finder.collect_pre_reshapes(reshape->tensor()))
+    return false;
+
+  // Remove pre_reshapes
+  for (auto pre_reshape : finder.pre_reshapes())
+  {
+    loco::replace(pre_reshape).with(pre_reshape->tensor());
+  }
+
+  // Remove post_reshape
+  loco::replace(reshape).with(reshape->tensor());
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ *      [CircleNode]
+ *            |
+ *    [CircleReshape_1] (shape: A -> B)
+ *            |
+ *      [CircleNode] (ex: Add/Mul/Tanh/Logistic ..)
+ *            |
+ *    [CircleReshape_2] (shape: B -> A)
+ *            |
+ *      [CircleNode]
+ *
+ * AFTER
+ *
+ *      [CircleNode]
+ *            |   \
+ *            |   [CircleReshape_1]
+ *      [CircleNode]
+ *            |   \
+ *            |   [CircleReshape_2]
+ *      [CircleNode]
+ **/
+bool RemoveUnnecessaryReshapeNetPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      if (remove_unnecessary_reshape_net(reshape_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp
new file mode 100644
index 000000000..4ad707ba3
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class RemoveUnnecessaryReshapeNet : public ::testing::Test
+{
+public:
+  RemoveUnnecessaryReshapeNet() {}
+
+  void createReshapeConst(luci::CircleReshape *target, const std::vector<uint32_t> shape)
+  {
+    auto shape_const = g.nodes()->create<luci::CircleConst>();
+    shape_const->dtype(loco::DataType::S32);
+    shape_const->size<loco::DataType::S32>(shape.size());
+    shape_const->shape_status(luci::ShapeStatus::VALID);
+    shape_const->rank(1);
+    shape_const->dim(0).set(shape.size());
+    for (int32_t i = 0; i < shape.size(); i++)
+    {
+      shape_const->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape.at(i));
+    }
+    shape_const->name("shape_const");
+    target->shape(shape_const);
+    target->rank(shape.size());
+    for (uint32_t i = 0; i < shape.size(); i++)
+    {
+      target->dim(i) = shape[i];
+    }
+    target->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  void buildGraph(const std::initializer_list<uint32_t> base_shape,
+                  const std::initializer_list<uint32_t> first_shape,
+                  const std::initializer_list<uint32_t> second_shape)
+  {
+    // Input Create.
+    input = g.nodes()->create<luci::CircleInput>();
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    input->shape_status(luci::ShapeStatus::VALID);
+    input->shape(base_shape);
+    input->name("input");
+
+    // Create first reshape.
+    first_reshape = g.nodes()->create<luci::CircleReshape>();
+    first_reshape->tensor(input);
+    first_reshape->name("Reshape");
+    createReshapeConst(first_reshape, first_shape);
+
+    // Create logistic.
+    logistic = g.nodes()->create<luci::CircleLogistic>();
+    logistic->x(first_reshape);
+    logistic->name("logistic");
+    logistic->shape(first_shape);
+    logistic->shape_status(luci::ShapeStatus::VALID);
+
+    // Create second reshape.
+    second_reshape = g.nodes()->create<luci::CircleReshape>();
+    second_reshape->tensor(logistic);
+    second_reshape->name("second_reshape");
+    createReshapeConst(second_reshape, second_shape);
+
+    // Output Connect.
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->from(second_reshape);
+    output->name("output");
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleReshape *first_reshape = nullptr;
+  luci::CircleLogistic *logistic = nullptr;
+  luci::CircleReshape *second_reshape = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST_F(RemoveUnnecessaryReshapeNet, simple_case)
+{
+  buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 1, 32});
+  luci::RemoveUnnecessaryReshapeNetPass pass;
+
+  ASSERT_TRUE(pass.run(&g));
+
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(&g)))
+  {
+    if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+      count++;
+  }
+  ASSERT_EQ(0, count);
+}
+
+TEST_F(RemoveUnnecessaryReshapeNet, shape_mismatch_NEG)
+{
+  buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 2, 16});
+  luci::RemoveUnnecessaryReshapeNetPass pass;
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp
new file mode 100644
index 000000000..fb46f490d
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_no_effect_reshape(luci::CircleNode *node)
+{
+  auto target_node = dynamic_cast<luci::CircleReshape *>(node);
+  if (target_node == nullptr)
+    return false;
+
+  auto new_shape = dynamic_cast<luci::CircleConst *>(target_node->shape());
+  if (new_shape == nullptr)
+    return false;
+
+  // Compare updated shape and input shape.
+  auto input_node = loco::must_cast<luci::CircleNode *>(target_node->tensor());
+  if (input_node->rank() != new_shape->dim(0).value())
+    return false;
+  for (uint32_t i = 0; i < input_node->rank(); i++)
+  {
+    // If update_shape is -1, don't care
+    // TODO check updated shape has value -1 at most one.
+    if (new_shape->at<loco::DataType::S32>(i) == -1)
+      continue;
+    // If input_shape dynamic, can't remove this.
+    if (!input_node->dim(i).known())
+      return false;
+    // If input_shape and updated shape differ, also can't remove.
+    if (input_node->dim(i).value() != static_cast<uint32_t>(new_shape->at<loco::DataType::S32>(i)))
+      return false;
+  }
+
+  replace(target_node).with(input_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *      [CircleNode]
+ *            |
+ *     [CircleReshape]
+ *            |
+ *      [CircleNode]
+ *
+ * AFTER
+ *      [CircleNode]
+ *            |  \
+ *            |  [CircleReshape]
+ *            |
+ *      [CircleNode]
+ *
+ * NOTE
+ *     This pass will remove Reshape when input and output has same shape
+ */
+
+bool RemoveUnnecessaryReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (remove_no_effect_reshape(circle_node))
+    {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.test.cpp
new file mode 100644
index 000000000..9d2e758b4
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.test.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class ReshapeGraphlet
+{
+public:
+  ReshapeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+  {
+    std::vector<uint32_t> shape_vector{input_shape};
+
+    auto dim0_val = remove ? shape_vector.size() : 1;
+    _reshape_shape = g->nodes()->create<luci::CircleConst>();
+    _reshape_shape->rank(1);
+    _reshape_shape->dim(0).set(dim0_val);
+    _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+    _reshape_shape->dtype(loco::DataType::S32);
+
+    _reshape_shape->size<loco::DataType::S32>(dim0_val);
+    for (uint32_t i = 0; i < dim0_val; i++)
+    {
+      if (remove)
+        _reshape_shape->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape_vector.at(i));
+      else
+        _reshape_shape->at<loco::DataType::S32>(i) = -1;
+    }
+    _reshape_shape->name("reshape_shape");
+
+    // Reshape create
+    auto newshape_rank = remove ? shape_vector.size() : 1;
+    _reshape = g->nodes()->create<luci::CircleReshape>();
+    _reshape->newShape()->rank(newshape_rank);
+    for (uint32_t i = 0; i < newshape_rank; i++)
+    {
+      if (remove)
+        _reshape->newShape()->dim(i) = static_cast<int32_t>(shape_vector.at(i));
+      else
+        _reshape->newShape()->dim(i) = -1;
+    }
+    _reshape->name("reshape");
+  }
+
+protected:
+  luci::CircleReshape *_reshape = nullptr;
+  luci::CircleConst *_reshape_shape = nullptr;
+};
+
+class ReshapeGraph : public TestIOGraph, public ReshapeGraphlet
+{
+public:
+  ReshapeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape, bool remove)
+  {
+    TestIOGraph::init(shape, shape);
+    ReshapeGraphlet::init(g(), shape, remove);
+
+    // connect graph
+    _reshape->tensor(input());
+    _reshape->shape(_reshape_shape);
+
+    output()->from(_reshape);
+  }
+};
+
+// TODO use ::testing::Test
+
+} // namespace
+
+TEST(RemoveUnnecessaryReshapePassTest, name)
+{
+  luci::RemoveUnnecessaryReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessaryReshapePass, removed)
+{
+  ReshapeGraph g;
+
+  g.init({1, 2, 3, 4}, true);
+
+  // confirm graph has Reshape
+  auto reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+  ASSERT_NE(nullptr, reshape_node);
+  luci::RemoveUnnecessaryReshapePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  // check Reshape is removed
+  reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+  ASSERT_EQ(nullptr, reshape_node);
+}
+
+TEST(RemoveUnnecessaryReshapePass, not_removed_NEG)
+{
+  ReshapeGraph g;
+
+  g.init({1, 2, 3, 4}, false);
+
+  // confirm graph has Reshape
+  auto reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+  ASSERT_NE(nullptr, reshape_node);
+  luci::RemoveUnnecessaryReshapePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  // check Reshape is NOT removed
+  reshape_node = luci::test::first_node<luci::CircleReshape>(g.g());
+  ASSERT_NE(nullptr, reshape_node);
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessarySlicePass.cpp b/compiler/luci/pass/src/RemoveUnnecessarySlicePass.cpp
new file mode 100644
index 000000000..0720813cd
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessarySlicePass.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * @brief   Return value in CircleConst.
+ * @details Return value in position on CircleConst with int64 format.
+ *          Begin must be larger than or equal to 0. Size must be larger
+ *          than or equal to -1.
+ */
+int64_t value_from_circle_const(const luci::CircleConst *node, uint32_t idx)
+{
+  assert(node->rank() == 1 && node->dim(0).value() > idx);
+  assert(node->dtype() == loco::DataType::S64 || node->dtype() == loco::DataType::S32);
+
+  if (node->dtype() == loco::DataType::S64)
+    return node->at<loco::DataType::S64>(idx);
+  return static_cast<int64_t>(node->at<loco::DataType::S32>(idx));
+}
+
+bool remove_no_effect_slice(luci::CircleNode *node)
+{
+  auto target_node = dynamic_cast<luci::CircleSlice *>(node);
+  if (target_node == nullptr)
+    return false;
+
+  auto begin_const = dynamic_cast<luci::CircleConst *>(target_node->begin());
+  if (begin_const == nullptr)
+    return false;
+
+  auto size_const = dynamic_cast<luci::CircleConst *>(target_node->size());
+  if (size_const == nullptr)
+    return false;
+
+  // Check input output shape.
+  auto input_node = loco::must_cast<luci::CircleNode *>(target_node->input());
+  for (uint32_t i = 0; i < input_node->rank(); i++)
+  {
+    if (value_from_circle_const(begin_const, i) != 0)
+      return false;
+
+    int64_t size_value = value_from_circle_const(size_const, i);
+    if (size_value == -1)
+      continue;
+    if (size_value != static_cast<int64_t>(input_node->dim(i).value()))
+      return false;
+
+    if (!input_node->dim(i).known())
+      return false;
+  }
+  replace(target_node).with(input_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleSlice]
+ *          |
+ *    [CircleNode]
+ *
+ * AFTER
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleNode]
+ *
+ * Slice OP has no effect if,
+ *    1. Static Shape : begin_const[idx] is 0 AND size_const[idx] is (-1 OR input_dimension[idx])
+ *    2. Dynamic Shape : begin_const[idx] is 0 AND size_const[idx] is -1
+ */
+bool RemoveUnnecessarySlicePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (remove_no_effect_slice(circle_node))
+    {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessarySlicePass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessarySlicePass.test.cpp
new file mode 100644
index 000000000..80921a93a
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessarySlicePass.test.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessarySlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SliceGraphlet
+{
+public:
+  SliceGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+  {
+    // Begin Create.
+    _begin = g->nodes()->create<luci::CircleConst>();
+    _begin->rank(1);
+    _begin->dim(0).set(input_shape.size());
+    _begin->shape_status(luci::ShapeStatus::VALID);
+    _begin->dtype(loco::DataType::S32);
+    _begin->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+      _begin->at<loco::DataType::S32>(i) = remove ? 0 : 1;
+    _begin->name("begin");
+
+    // Size Create.
+    _size = g->nodes()->create<luci::CircleConst>();
+    _size->rank(1);
+    _size->dim(0).set(input_shape.size());
+    _size->shape_status(luci::ShapeStatus::VALID);
+    _size->dtype(loco::DataType::S32);
+    _size->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+      _size->at<loco::DataType::S32>(i) = -1;
+    _size->name("size");
+
+    // Slice Node create.
+    _slice = g->nodes()->create<luci::CircleSlice>();
+    _slice->dtype(loco::DataType::S32);
+    _slice->name("slice");
+  }
+
+protected:
+  luci::CircleSlice *_slice = nullptr;
+  luci::CircleConst *_begin = nullptr;
+  luci::CircleConst *_size = nullptr;
+};
+
+class SliceGraph : public TestIOGraph, public SliceGraphlet
+{
+public:
+  SliceGraph() = default;
+
+public:
+  void init(const ShapeU32 shape, bool remove)
+  {
+    TestIOGraph::init(shape, shape);
+    SliceGraphlet::init(g(), shape, remove);
+
+    _slice->input(input());
+    _slice->begin(_begin);
+    _slice->size(_size);
+
+    output()->from(_slice);
+  }
+};
+
+} // namespace
+
+TEST(RemoveUnnecessarySlicePass, name)
+{
+  luci::RemoveUnnecessarySlicePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessarySlicePass, removed)
+{
+  SliceGraph g;
+
+  g.init({2, 4, 2, 3}, true);
+
+  // confirm graph has Slice
+  auto slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+  ASSERT_NE(nullptr, slice_node);
+  luci::RemoveUnnecessarySlicePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  // check Slice is removed
+  slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+  ASSERT_EQ(nullptr, slice_node);
+}
+
+TEST(RemoveUnnecessarySlicePass, not_removed_NEG)
+{
+  SliceGraph g;
+
+  g.init({2, 4, 2, 3}, false);
+
+  // confirm graph has Slice
+  auto slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+  ASSERT_NE(nullptr, slice_node);
+  luci::RemoveUnnecessarySlicePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  // check Slice is NOT removed
+  slice_node = luci::test::first_node<luci::CircleSlice>(g.g());
+  ASSERT_NE(nullptr, slice_node);
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessarySplitPass.cpp b/compiler/luci/pass/src/RemoveUnnecessarySplitPass.cpp
new file mode 100644
index 000000000..3243f6213
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessarySplitPass.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+bool remove_unnecessary_split(luci::CircleNode *node)
+{
+  auto target_node = dynamic_cast<luci::CircleSplitOut *>(node);
+  if (target_node == nullptr)
+    return false;
+
+  auto split_node = dynamic_cast<luci::CircleSplit *>(target_node->input());
+  if (split_node == nullptr)
+    return false;
+
+  if (loco::succs(split_node).size() != 1)
+    return false;
+
+  if (split_node->num_split() == 1)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(split_node->input());
+    replace(target_node).with(input_node);
+    return true;
+  }
+  return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveUnnecessarySplitPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (remove_unnecessary_split(circle_node))
+    {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessarySplitPass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessarySplitPass.test.cpp
new file mode 100644
index 000000000..f292b5357
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessarySplitPass.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessarySplitPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SplitGraphlet
+{
+public:
+  SplitGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, uint32_t nout)
+  {
+    assert(nout == 1 || nout == 2);
+
+    _dim = g->nodes()->create<luci::CircleConst>();
+    set_shape_vector(_dim, {0});
+    _dim->name("dim");
+
+    _split = g->nodes()->create<luci::CircleSplit>();
+    _split->num_split(nout);
+    _split->name("split");
+
+    _split_out_0 = g->nodes()->create<luci::CircleSplitOut>();
+    _split_out_0->index(0);
+    _split_out_0->name("split_out_0");
+
+    if (nout == 2)
+    {
+      _split_out_1 = g->nodes()->create<luci::CircleSplitOut>();
+      _split_out_1->index(1);
+      _split_out_1->name("split_out_1");
+    }
+  }
+
+protected:
+  luci::CircleSplit *_split = nullptr;
+  luci::CircleConst *_dim = nullptr;
+  luci::CircleSplitOut *_split_out_0 = nullptr;
+  luci::CircleSplitOut *_split_out_1 = nullptr;
+};
+
+class SplitOneGraph : public TestIGraphlet, public TestOGraphlet, public SplitGraphlet
+{
+public:
+  SplitOneGraph() = default;
+
+public:
+  void init()
+  {
+    TestIGraphlet::init(g(), {1});
+    TestOGraphlet::init(g(), {1});
+    SplitGraphlet::init(g(), 1);
+
+    _split->input(input());
+    _split->split_dim(_dim);
+    _split_out_0->input(_split);
+
+    output()->from(_split_out_0);
+  }
+};
+
+class SplitTwoGraph : public TestIGraphlet, public TestOsGraphlet<2>, public SplitGraphlet
+{
+public:
+  SplitTwoGraph() = default;
+
+public:
+  void init()
+  {
+    TestIGraphlet::init(g(), {1});
+    TestOsGraphlet<2>::init(g(), {{1}, {1}});
+    SplitGraphlet::init(g(), 2);
+
+    _split->input(input());
+    _split->split_dim(_dim);
+    _split_out_0->input(_split);
+    _split_out_1->input(_split);
+
+    output(0)->from(_split_out_0);
+    output(1)->from(_split_out_1);
+  }
+};
+
+// TODO use ::testing::Test
+
+} // namespace
+
+TEST(RemoveUnnecessarySplitPass, name)
+{
+  luci::RemoveUnnecessarySplitPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveUnnecessarySplitPass, create_unnecessary_split)
+{
+  SplitOneGraph g;
+
+  g.init();
+
+  luci::RemoveUnnecessarySplitPass pass;
+  while (pass.run(g.g()))
+    ;
+
+  auto split_node = luci::test::first_node<luci::CircleSplit>(g.g());
+  // No Split node is in graph.
+  ASSERT_EQ(nullptr, split_node);
+}
+
+TEST(RemoveUnnecessarySplitPass, create_unnecessary_split_NEG)
+{
+  SplitTwoGraph g;
+
+  g.init();
+
+  luci::RemoveUnnecessarySplitPass pass;
+  while (pass.run(g.g()))
+    ;
+
+  auto split_node = luci::test::first_node<luci::CircleSplit>(g.g());
+  // Split node is in graph.
+  ASSERT_NE(nullptr, split_node);
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.cpp
new file mode 100644
index 000000000..22b1aa64f
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * @brief   Return value in CircleConst.
+ * @details Return value in position on CircleConst with int64 format.
+ */
+int64_t value_from_circle_const(const luci::CircleConst *node, uint32_t idx)
+{
+  assert(node->rank() == 1 && node->dim(0).value() > idx);
+  assert(node->dtype() == loco::DataType::S64 || node->dtype() == loco::DataType::S32);
+
+  if (node->dtype() == loco::DataType::S64)
+    return node->at<loco::DataType::S64>(idx);
+  return static_cast<int64_t>(node->at<loco::DataType::S32>(idx));
+}
+
+bool remove_no_effect_strided_slice(luci::CircleStridedSlice *target_node)
+{
+  auto begin_const = dynamic_cast<luci::CircleConst *>(target_node->begin());
+  if (begin_const == nullptr)
+    return false;
+
+  auto strides_const = dynamic_cast<luci::CircleConst *>(target_node->strides());
+  if (strides_const == nullptr)
+    return false;
+
+  auto end_const = dynamic_cast<luci::CircleConst *>(target_node->end());
+  if (end_const == nullptr)
+    return false;
+
+  auto input_node = loco::must_cast<luci::CircleNode *>(target_node->input());
+  for (uint32_t i = 0; i < input_node->rank(); i++)
+  {
+    if (value_from_circle_const(begin_const, i) != 0)
+      return false;
+
+    int64_t strides_value = value_from_circle_const(strides_const, i);
+    if (strides_value != 1)
+      return false;
+
+    int64_t end_value = value_from_circle_const(end_const, i);
+    if (end_value == -1)
+      continue;
+
+    if (end_value != input_node->dim(i).value())
+      return false;
+
+    if (!input_node->dim(i).known())
+      return false;
+  }
+
+  /**
+   * We check additional attributes on zero after shapes
+   * for skipping wrong StridedSlice operator.
+   */
+  if (target_node->new_axis_mask() != 0 || target_node->shrink_axis_mask() != 0)
+    return false;
+
+  replace(target_node).with(input_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleStridedSlice]
+ *          |
+ *    [CircleNode]
+ *
+ * AFTER
+ *
+ *    [CircleNode]
+ *          |
+ *    [CircleNode]   [CircleStridedSlice]
+ *
+ * StridedSlice OP has no effect if,
+ *    1. Static Shape : begin_const[idx] is 0 AND strides_const[idx] is (not 1 OR
+ *       input_dimension[idx])
+ *    2. Dynamic Shape : begin_const[idx] is 0 AND strides_const[idx] is not 1
+ *
+ * StridedSlice OP has effect if,
+ *    1. begin_const[idx] is 0 AND input_shape[idx] are equal to end_shape[idx]
+ */
+bool RemoveUnnecessaryStridedSlicePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto target_node = dynamic_cast<luci::CircleStridedSlice *>(node);
+    if (target_node != nullptr)
+      if (remove_no_effect_strided_slice(target_node))
+        changed = true;
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.test.cpp
new file mode 100644
index 000000000..7d611c864
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryStridedSlicePass.test.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class StridedSliceGraphlet
+{
+public:
+  StridedSliceGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape, bool remove)
+  {
+    // Begin create
+    _begin = g->nodes()->create<luci::CircleConst>();
+    _begin->rank(1);
+    _begin->dim(0).set(input_shape.size());
+    _begin->shape_status(luci::ShapeStatus::VALID);
+    _begin->dtype(loco::DataType::S32);
+    _begin->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+    {
+      _begin->at<loco::DataType::S32>(i) = remove ? 0 : 1;
+    }
+
+    // Strides create
+    _strides = g->nodes()->create<luci::CircleConst>();
+    _strides->rank(1);
+    _strides->dim(0).set(input_shape.size());
+    _strides->shape_status(luci::ShapeStatus::VALID);
+    _strides->dtype(loco::DataType::S32);
+    _strides->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+    {
+      _strides->at<loco::DataType::S32>(i) = remove ? 1 : -1;
+    }
+
+    std::vector<uint32_t> shape_vector{input_shape};
+
+    _end = g->nodes()->create<luci::CircleConst>();
+    _end->rank(1);
+    _end->dim(0).set(input_shape.size());
+    _end->shape_status(luci::ShapeStatus::VALID);
+    _end->dtype(loco::DataType::S32);
+    _end->size<loco::DataType::S32>(input_shape.size());
+    for (int i = 0; i < input_shape.size(); ++i)
+    {
+      if (remove)
+        _end->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape_vector.at(i));
+      else
+        _end->at<loco::DataType::S32>(i) = -1;
+    }
+
+    // StridedSlice Node create
+    _strided_slice = g->nodes()->create<luci::CircleStridedSlice>();
+    _strided_slice->dtype(loco::DataType::S32);
+  }
+
+protected:
+  luci::CircleStridedSlice *_strided_slice = nullptr;
+  luci::CircleConst *_begin = nullptr;
+  luci::CircleConst *_strides = nullptr;
+  luci::CircleConst *_end = nullptr;
+};
+
+class StridedSliceGraph : public TestIOGraph, public StridedSliceGraphlet
+{
+public:
+  StridedSliceGraph() = default;
+
+public:
+  void init(const ShapeU32 shape, bool remove)
+  {
+    TestIOGraph::init(shape, shape);
+    StridedSliceGraphlet::init(g(), shape, remove);
+
+    _strided_slice->input(input());
+    _strided_slice->begin(_begin);
+    _strided_slice->strides(_strides);
+    _strided_slice->end(_end);
+
+    output()->from(_strided_slice);
+  }
+};
+
+} // namespace
+
+TEST(RemoveUnnecessaryStridedSlicePass, basic_case)
+{
+  StridedSliceGraph g;
+
+  g.init({2, 4, 2, 3}, true);
+
+  auto strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+  ASSERT_NE(nullptr, strided_slice_node);
+  luci::RemoveUnnecessaryStridedSlicePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+  ASSERT_EQ(nullptr, strided_slice_node);
+}
+
+TEST(RemoveUnnecessaryStridedSlicePass, basic_fail_case_NEG)
+{
+  StridedSliceGraph g;
+
+  g.init({2, 4, 2, 3}, false);
+
+  auto strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+  ASSERT_NE(nullptr, strided_slice_node);
+  luci::RemoveUnnecessaryStridedSlicePass pass;
+  while (pass.run(g.g()))
+    ;
+
+  strided_slice_node = luci::test::first_node<luci::CircleStridedSlice>(g.g());
+  ASSERT_NE(nullptr, strided_slice_node);
+}
diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
new file mode 100644
index 000000000..bca0a9483
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
+
+#include "BatchNormPatternFinder.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma)
+{
+  assert(gamma->rank() == 1 or gamma->rank() == 4);
+
+  uint32_t channel_idx = gamma->rank() - 1;
+  uint32_t channel_size = gamma->dim(channel_idx).value();
+
+  // Gamma should be broadcastable in the channel direction
+  for (uint32_t i = 0; i < gamma->rank(); i++)
+  {
+    if (i != channel_idx)
+      assert(gamma->dim(i).value() == 1); // FIX is_batchnorm_mul UNLESS
+  }
+
+  auto name = gamma->name();
+  assert(name.length() > 0);
+
+  // Channel-wise MUL is the same as DEPTHWISE_CONV2D with filter shape (1,1,1,channel_size)
+  auto weights = gamma->graph()->nodes()->create<luci::CircleConst>();
+  weights->dtype(loco::DataType::FLOAT32);
+  weights->rank(4);
+  weights->dim(0).set(1);
+  weights->dim(1).set(1);
+  weights->dim(2).set(1);
+  weights->dim(3).set(channel_size);
+  weights->shape_status(luci::ShapeStatus::VALID);
+  weights->size<loco::DataType::FLOAT32>(channel_size);
+  for (uint32_t i = 0; i < channel_size; i++)
+  {
+    weights->at<loco::DataType::FLOAT32>(i) = gamma->at<loco::DataType::FLOAT32>(i);
+  }
+  weights->name(name + "_weights");
+
+  return weights;
+}
+
+luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta)
+{
+  assert(beta->rank() == 1 or beta->rank() == 4);
+
+  uint32_t channel_idx = beta->rank() - 1;
+  uint32_t channel_size = beta->dim(channel_idx).value();
+
+  // Beta should be broadcastable in the channel direction
+  for (uint32_t i = 0; i < beta->rank(); i++)
+  {
+    if (i != channel_idx)
+      assert(beta->dim(i).value() == 1); // FIX is_batchnorm_add UNLESS
+  }
+
+  auto name = beta->name();
+  assert(name.length() > 0);
+
+  // Channel-wise ADD is the same as bias (shape = (channel_size)) of DEPTHWISE_CONV2D
+  auto bias = beta->graph()->nodes()->create<luci::CircleConst>();
+  bias->dtype(loco::DataType::FLOAT32);
+  bias->rank(1);
+  bias->dim(0).set(channel_size);
+  bias->size<loco::DataType::FLOAT32>(channel_size);
+  bias->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t i = 0; i < channel_size; i++)
+  {
+    bias->at<loco::DataType::FLOAT32>(i) = beta->at<loco::DataType::FLOAT32>(i);
+  }
+  bias->name(name + "_bias");
+
+  return bias;
+}
+
+/**
+ *  Replace channel-wise Mul/Add with DepthwiseConv2D
+ *
+ *  BEFORE
+ *
+ *             [Node] [gamma]
+ *                |  /
+ *              [Mul]  [beta]
+ *                |   /
+ *               [Add]
+ *
+ *  AFTER
+ *
+ *              [Node]  [weights]  [bias]
+ *                  \      /       /
+ *                [DepthwiseConv2D]
+ */
+bool replace_mul_add_with_dwconv(luci::CircleAdd *add)
+{
+  luci::CircleNode *pred_node = nullptr;
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+  luci::CircleConst *gamma = nullptr;
+
+  if (!is_batchnorm_add(add, mul, beta))
+    return false;
+
+  if (loco::succs(mul).size() != 1)
+    return false;
+
+  if (!is_batchnorm_mul(mul, pred_node, gamma))
+    return false;
+
+  if (pred_node->rank() != 4)
+    return false;
+
+  if (pred_node->dtype() != loco::DataType::FLOAT32 || beta->dtype() != loco::DataType::FLOAT32 ||
+      gamma->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  auto weights = create_weights_from_gamma(gamma);
+  auto bias = create_bias_from_beta(beta);
+
+  auto name = add->name();
+  assert(name.length() > 0);
+
+  auto dwconv = add->graph()->nodes()->create<luci::CircleDepthwiseConv2D>();
+  dwconv->input(pred_node);
+  dwconv->filter(weights);
+  dwconv->bias(bias);
+  dwconv->padding(luci::Padding::SAME);
+  dwconv->stride()->w(1);
+  dwconv->stride()->h(1);
+  dwconv->depthMultiplier(1);
+  dwconv->dilation()->w(1);
+  dwconv->dilation()->h(1);
+  dwconv->fusedActivationFunction(add->fusedActivationFunction());
+  dwconv->name(name + "/DepthwiseConv2D");
+  luci::add_origin(dwconv, luci::composite_origin({luci::get_origin(mul), luci::get_origin(add)}));
+
+  loco::replace(add).with(dwconv);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ReplaceMulAddWithDepthwiseConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto add = dynamic_cast<luci::CircleAdd *>(node))
+    {
+      if (replace_mul_add_with_dwconv(add))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
new file mode 100644
index 000000000..bac033112
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *             [Node] [gamma]
+ *                |  /
+ *              [Mul]  [beta]
+ *                |   /
+ *               [Add]
+ *
+ *  AFTER
+ *
+ *              [Node]  [weights]  [bias]
+ *                  \      /       /
+ *                [DepthwiseConv2D]
+ */
+class SimpleGraph
+{
+public:
+  SimpleGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    mul = g.nodes()->create<luci::CircleMul>();
+    gamma = g.nodes()->create<luci::CircleConst>();
+    add = g.nodes()->create<luci::CircleAdd>();
+    beta = g.nodes()->create<luci::CircleConst>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    input->dtype(loco::DataType::FLOAT32);
+    mul->dtype(loco::DataType::FLOAT32);
+    gamma->dtype(loco::DataType::FLOAT32);
+    add->dtype(loco::DataType::FLOAT32);
+    beta->dtype(loco::DataType::FLOAT32);
+    output->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    input->shape({1, 4, 4, channel_size});
+    mul->shape({1, 4, 4, channel_size});
+    gamma->shape({channel_size});
+    add->shape({1, 4, 4, channel_size});
+    beta->shape({channel_size});
+    output->shape({1, 4, 4, channel_size});
+
+    gamma->size<loco::DataType::FLOAT32>(channel_size);
+    beta->size<loco::DataType::FLOAT32>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      gamma->at<loco::DataType::FLOAT32>(i) = i;
+      beta->at<loco::DataType::FLOAT32>(i) = i;
+    }
+
+    mul->x(input);
+    mul->y(gamma);
+    add->x(mul);
+    add->y(beta);
+    output->from(add);
+
+    input->name("input");
+    mul->name("mul");
+    gamma->name("gamma");
+    add->name("add");
+    beta->name("beta");
+    output->name("output");
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *gamma = nullptr;
+  luci::CircleAdd *add = nullptr;
+  luci::CircleConst *beta = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(ReplaceMulAddWithDepthwiseConv, name)
+{
+  luci::ReplaceMulAddWithDepthwiseConvPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ReplaceMulAddWithDepthwiseConv, simple)
+{
+  SimpleGraph g;
+
+  luci::ReplaceMulAddWithDepthwiseConvPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  auto dwconv = dynamic_cast<luci::CircleDepthwiseConv2D *>(g.output->from());
+  EXPECT_NE(nullptr, dwconv);
+
+  uint32_t channel_size = 16;
+  auto weights = dynamic_cast<luci::CircleConst *>(dwconv->filter());
+  auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias());
+  EXPECT_NE(nullptr, weights);
+  EXPECT_EQ(4, weights->rank());
+  EXPECT_EQ(channel_size, weights->dim(3).value());
+  EXPECT_NE(nullptr, bias);
+  EXPECT_EQ(1, bias->rank());
+  EXPECT_EQ(channel_size, bias->dim(0).value());
+
+  for (int i = 0; i < channel_size; i++)
+  {
+    EXPECT_FLOAT_EQ(i, weights->at<loco::DataType::FLOAT32>(i));
+    EXPECT_FLOAT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+TEST(ReplaceMulAddWithDepthwiseConv, simple_rank4)
+{
+  SimpleGraph g;
+
+  const uint32_t channel_size = 16;
+  g.gamma->shape({1, 1, 1, channel_size});
+  g.beta->shape({1, 1, 1, channel_size});
+
+  luci::ReplaceMulAddWithDepthwiseConvPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  auto dwconv = dynamic_cast<luci::CircleDepthwiseConv2D *>(g.output->from());
+  EXPECT_NE(nullptr, dwconv);
+
+  auto weights = dynamic_cast<luci::CircleConst *>(dwconv->filter());
+  auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias());
+  EXPECT_NE(nullptr, weights);
+  EXPECT_EQ(4, weights->rank());
+  EXPECT_EQ(channel_size, weights->dim(3).value());
+  EXPECT_NE(nullptr, bias);
+  EXPECT_EQ(1, bias->rank());
+  EXPECT_EQ(channel_size, bias->dim(0).value());
+
+  for (int i = 0; i < channel_size; i++)
+  {
+    EXPECT_FLOAT_EQ(i, weights->at<loco::DataType::FLOAT32>(i));
+    EXPECT_FLOAT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG)
+{
+  SimpleGraph g;
+  // swap mul/add (changed to add->mul)
+  g.add->x(g.input);
+  loco::replace(g.add).with(g.mul);
+  g.mul->x(g.add);
+
+  luci::ReplaceMulAddWithDepthwiseConvPass pass;
+  auto changed = pass.run(&g.g);
+
+  EXPECT_EQ(false, changed);
+}
+
+TEST(ReplaceMulAddWithDepthwiseConv, rank3_NEG)
+{
+  SimpleGraph g;
+
+  g.input->shape({4, 4, 16});
+  g.mul->shape({4, 4, 16});
+  g.add->shape({4, 4, 16});
+  g.output->shape({4, 4, 16});
+
+  luci::ReplaceMulAddWithDepthwiseConvPass pass;
+  auto changed = pass.run(&g.g);
+
+  EXPECT_EQ(false, changed);
+}
diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp
new file mode 100644
index 000000000..07457c1e8
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h>
+
+namespace
+{
+
+// TODO move to global helper list if needed
+/**
+ * @brief Create a node with `inp` as input from fused activation fucntion `act`
+ */
+luci::CircleNode *fromActivation(luci::CircleNode *inp, luci::FusedActFunc act)
+{
+  switch (act)
+  {
+    case luci::FusedActFunc::NONE:
+      return inp;
+    case luci::FusedActFunc::RELU:
+    {
+      auto n = inp->graph()->nodes()->create<luci::CircleRelu>();
+      n->features(inp);
+      return n;
+    }
+    case luci::FusedActFunc::RELU6:
+    {
+      auto n = inp->graph()->nodes()->create<luci::CircleRelu6>();
+      n->features(inp);
+      return n;
+    }
+    case luci::FusedActFunc::RELU_N1_TO_1:
+    {
+      auto n = inp->graph()->nodes()->create<luci::CircleReluN1To1>();
+      n->features(inp);
+      return n;
+    }
+    case luci::FusedActFunc::TANH:
+    {
+      auto n = inp->graph()->nodes()->create<luci::CircleTanh>();
+      n->x(inp);
+      return n;
+    }
+    case luci::FusedActFunc::SIGN_BIT:
+    {
+      throw std::invalid_argument("no matching node to create from fused activation");
+    }
+    default:
+      throw std::invalid_argument("invalid fused activation");
+  }
+}
+
+// Create CircleReshape where
+// - dtype is same with node
+// - shape is same with node
+// NOTE: User should set input(tensor) of the returned Op.
+luci::CircleReshape *create_reshape(luci::CircleFullyConnected *node)
+{
+  assert(node); // FIX_CALLER_UNLESS
+
+  auto g = node->graph();
+
+  auto reshape = g->nodes()->create<luci::CircleReshape>();
+  reshape->name(node->name() + "/reshape");
+  reshape->dtype(node->dtype());
+  luci::add_origin(reshape, luci::get_origin(node));
+
+  auto shape_const = g->nodes()->create<luci::CircleConst>();
+  shape_const->dtype(loco::DataType::S32);
+  shape_const->rank(1);
+  shape_const->dim(0).set(node->rank());
+  shape_const->size<loco::DataType::S32>(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+  {
+    assert(node->dim(i).known()); // FIX_CALLER_UNLESS
+    shape_const->at<loco::DataType::S32>(i) = node->dim(i).value();
+  }
+  shape_const->shape_status(luci::ShapeStatus::VALID);
+  shape_const->name(node->name() + "/shape");
+  luci::add_origin(shape_const, luci::get_origin(node));
+
+  reshape->shape(shape_const);
+
+  return reshape;
+}
+
+/**
+ *  Replace Fully Connected with Batched MatMul
+ *
+ *  BEFORE
+ *
+ *         [Node1]         [Node2]
+ *           |               |
+ *       [transpose]?   [transpose]?
+ *               \        /
+ *            [FullyConnected]
+ *
+ *  AFTER
+ *
+ *              [Node1]  [Node2]
+ *                  \      /
+ *               [BatchMatMul]
+ *                      |
+ *                 [Reshape]   [BiasValue]?
+ *                        \       /
+ *                          [Add]?
+ *                            |
+ *                       [Activation]?
+ *
+ * Nodes with "?" denote optional elements
+ * NOTE Reshape Op is inserted to keep the original shape of FullyConnected Op
+ * Reshape Op can be redundant (input shape == output shape). This can be removed
+ * by RemoveUnnecessaryReshapePass.
+ */
+bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
+{
+  luci::CircleNode *x = nullptr;
+  luci::CircleNode *y = nullptr;
+  luci::CircleTranspose *ty = nullptr;
+  luci::CircleTranspose *tx = nullptr;
+  bool adj_x = false;
+  bool adj_y = true;
+
+  if (dynamic_cast<luci::CircleConst *>(fc->weights()))
+    return false; // NonConst
+
+  if ((ty = dynamic_cast<luci::CircleTranspose *>(fc->weights()))) // is y a transpose?
+  {
+    adj_y = false;
+    if (dynamic_cast<luci::CircleConst *>(ty->a()))
+      return false;
+    else
+      y = loco::must_cast<luci::CircleNode *>(ty->a());
+  }
+  else
+  { // y is not transpose and not const
+    y = loco::must_cast<luci::CircleNode *>(fc->weights());
+  }
+  if ((tx = dynamic_cast<luci::CircleTranspose *>(fc->input())))
+  {
+    adj_x = true;
+    x = loco::must_cast<luci::CircleNode *>(tx->a());
+  }
+  else
+  {
+    x = loco::must_cast<luci::CircleNode *>(fc->input());
+  }
+
+  if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  auto bc = dynamic_cast<luci::CircleConst *>(fc->bias());
+  // NOTE bias can be empty as CircleOutputExclude type
+  // NOTE we can only handle bias as FLOAT32 type as of now
+  if (nullptr != bc && bc->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  auto name = fc->name();
+  assert(name.length() > 0);
+
+  auto matmul = fc->graph()->nodes()->create<luci::CircleBatchMatMul>();
+  matmul->x(x);
+  matmul->y(y);
+  matmul->adj_x(adj_x);
+  matmul->adj_y(adj_y);
+  matmul->name(name);
+  matmul->dtype(fc->dtype());
+
+  luci::add_origin(matmul, luci::get_origin(fc));
+
+  auto reshape = create_reshape(fc);
+  reshape->tensor(matmul);
+
+  auto all_zero = [](const luci::CircleConst *c) {
+    bool ac = true;
+    for (uint32_t i = 0; i < c->size<loco::DataType::FLOAT32>() && ac; i++)
+    {
+      ac &= c->at<loco::DataType::FLOAT32>(i) == 0.0f;
+    }
+    return ac;
+  };
+
+  if (nullptr != bc && !all_zero(bc))
+  {
+    auto bias_add = fc->graph()->nodes()->create<luci::CircleAdd>();
+    bias_add->x(reshape);
+    bias_add->y(bc);
+    bias_add->name(fc->name() + "/bias_add");
+    bias_add->dtype(fc->dtype());
+    add_origin(bias_add, get_origin(fc));
+    bias_add->fusedActivationFunction(fc->fusedActivationFunction());
+    loco::replace(fc).with(bias_add);
+  }
+  else
+  {
+    // NOTE bias doesn't exist or bias is all zero
+    auto n = fromActivation(reshape, fc->fusedActivationFunction());
+    add_origin(n, luci::get_origin(fc));
+    n->name(fc->name() + "fusedActivation");
+    n->dtype(fc->dtype());
+    loco::replace(fc).with(n);
+  }
+
+  return true;
+}
+} // namespace
+
+namespace luci
+{
+
+bool ReplaceNonConstFCWithBatchMatMulPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto fc = dynamic_cast<luci::CircleFullyConnected *>(node))
+    {
+      if (replace_fc_with_matmul(fc))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp
new file mode 100644
index 000000000..194893f01
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h"
+
+#include "helpers/CreateCircleConst.h"
+
+#include <luci/test/TestIOGraph.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *   [IFM1] [IFM2] [BIAS]
+ *        \   |   /
+ *          [FC]
+ *            |
+ *          [Res]
+ *
+ *  AFTER
+ *   [IFM1] [IFM2]
+ *        \   |
+ *      [BatchMatMul] [BIAS]
+ *              \      /
+ *               [Add]
+ *                 |
+ *               [Res]
+ *
+ */
+struct FCGraphlet
+{
+public:
+  FCGraphlet() = default;
+  virtual ~FCGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 r_shape, const float bv)
+  {
+    _tr_y = g->nodes()->create<luci::CircleTranspose>();
+    _tr_y->a(_y);
+    std::vector<int32_t> tr_val = {1, 0};
+    _tr_y->perm(luci::create_const_node(g, loco::DataType::S32, {2}, tr_val));
+
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+    _fc->input(_x);
+    _fc->weights(_tr_y);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->shape(r_shape);
+    auto l = _fc->dim(_fc->rank() - 1).value();
+    std::vector<float> bias_val(l, bv);
+    _fc->bias(luci::create_const_node(g, loco::DataType::FLOAT32, {l}, bias_val));
+    _fc->name("fc");
+  }
+
+public:
+  luci::CircleFullyConnected *fc() { return _fc; }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleTranspose *_tr_y = nullptr;
+  luci::CircleInput *_x = nullptr;
+  luci::CircleInput *_y = nullptr;
+};
+
+struct FCGraph : public TestIsGraphlet<2>, public TestOGraphlet, public FCGraphlet
+{
+  FCGraph() = default;
+  virtual ~FCGraph() = default;
+  void init(const ShapeU32 x_shape, const ShapeU32 y_shape, const ShapeU32 r_shape, const float bv)
+  {
+    TestIsGraphlet<2>::init(g(), {x_shape, y_shape});
+    TestOGraphlet::init(g(), r_shape);
+    _x = input(0);
+    _y = input(1);
+    FCGraphlet::init(g(), r_shape, bv);
+    output()->from(_fc);
+  }
+};
+
+class ReplaceNonConstFCWithBatchMatMulPassTest : public ::testing::Test
+{
+public:
+  FCGraph g;
+  luci::ReplaceNonConstFCWithBatchMatMulPass pass;
+};
+
+} // namespace
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, simple_test)
+{
+  g.init({2, 3}, {2, 3}, {2, 2}, 0.0f);
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto res = dynamic_cast<luci::CircleReshape *>(g.output()->from());
+  EXPECT_NE(nullptr, res);
+}
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, nonzero_bias_test)
+{
+  g.init({2, 3}, {2, 3}, {2, 2}, 1.0f);
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto mm = dynamic_cast<luci::CircleAdd *>(g.output()->from());
+  EXPECT_NE(nullptr, mm);
+}
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, wrong_op_NEG)
+{
+  loco::Graph g;
+
+  auto inp = g.nodes()->create<luci::CircleInput>();
+  auto relu = g.nodes()->create<luci::CircleRelu>();
+  relu->features(inp);
+
+  luci::ReplaceNonConstFCWithBatchMatMulPass pass;
+  auto changed = pass.run(&g);
+
+  EXPECT_EQ(false, changed);
+}
diff --git a/compiler/luci/pass/src/ReplaceSubWithAddPass.cpp b/compiler/luci/pass/src/ReplaceSubWithAddPass.cpp
new file mode 100644
index 000000000..f9102d836
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceSubWithAddPass.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceSubWithAddPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+namespace
+{
+
+bool replace_sub_with_const_rhs(luci::CircleSub *sub)
+{
+  auto const_rhs = dynamic_cast<luci::CircleConst *>(sub->y());
+  if (const_rhs == nullptr)
+    return false;
+
+  auto graph = sub->graph();
+
+  auto neg_const_rhs = luci::clone(const_rhs);
+  if (neg_const_rhs->dtype() == loco::DataType::FLOAT32)
+  {
+    for (uint32_t i = 0; i < neg_const_rhs->size<loco::DataType::FLOAT32>(); ++i)
+      neg_const_rhs->at<loco::DataType::FLOAT32>(i) *= -1.0;
+  }
+  else
+  {
+    // TODO Support more data type
+    return false;
+  }
+
+  auto add = graph->nodes()->create<luci::CircleAdd>();
+  add->x(sub->x());
+  add->y(neg_const_rhs);
+  add->name(sub->name());
+  add->fusedActivationFunction(sub->fusedActivationFunction());
+  luci::add_origin(add, luci::get_origin(sub));
+  loco::replace(sub).with(add);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ReplaceSubWithAddPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto sub = dynamic_cast<luci::CircleSub *>(node))
+    {
+      if (replace_sub_with_const_rhs(sub))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ReplaceSubWithAddPass.test.cpp b/compiler/luci/pass/src/ReplaceSubWithAddPass.test.cpp
new file mode 100644
index 000000000..c1d5752b2
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceSubWithAddPass.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceSubWithAddPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *   [lhs] ------------+
+ *                     +-- [Sub] --
+ *   [rhs_const] ------+
+ *
+ *  AFTER
+ *
+ *   [lhs] ------------+
+ *                     +-- [Add] --
+ *   [neg_rhs_const] --+
+ */
+class SimpleGraph
+{
+public:
+  SimpleGraph()
+  {
+    lhs = g.nodes()->create<luci::CircleInput>();
+    rhs_const = g.nodes()->create<luci::CircleConst>();
+    sub = g.nodes()->create<luci::CircleSub>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    lhs->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    lhs->dtype(loco::DataType::FLOAT32);
+    rhs_const->dtype(loco::DataType::FLOAT32);
+    sub->dtype(loco::DataType::FLOAT32);
+    output->dtype(loco::DataType::FLOAT32);
+
+    lhs->shape({1, 3, 4, 5});
+    rhs_const->shape({}); // scalar
+    sub->shape({1, 3, 4, 5});
+    output->shape({1, 3, 4, 5});
+
+    rhs_const->size<loco::DataType::FLOAT32>(1);
+    rhs_const->at<loco::DataType::FLOAT32>(0) = 1.1;
+
+    sub->x(lhs);
+    sub->y(rhs_const);
+    output->from(sub);
+
+    lhs->name("lhs");
+    rhs_const->name("rhs_const");
+    sub->name("sub");
+    output->name("output");
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *lhs = nullptr;
+  luci::CircleConst *rhs_const = nullptr;
+  luci::CircleSub *sub = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(ReplaceSubWithAdd, name)
+{
+  luci::ReplaceSubWithAddPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ReplaceSubWithAdd, simple)
+{
+  SimpleGraph g;
+
+  luci::ReplaceSubWithAddPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  auto add = dynamic_cast<luci::CircleAdd *>(g.output->from());
+  EXPECT_NE(nullptr, add);
+
+  auto neg_rhs_const = dynamic_cast<luci::CircleConst *>(add->y());
+  EXPECT_NE(nullptr, neg_rhs_const);
+  EXPECT_EQ(0, neg_rhs_const->rank());
+  EXPECT_FLOAT_EQ(-1.1, neg_rhs_const->at<loco::DataType::FLOAT32>(0));
+}
+
+TEST(ReplaceSubWithAdd, wrong_op_NEG)
+{
+  SimpleGraph g;
+
+  auto mul = g.g.nodes()->create<luci::CircleMul>();
+  mul->x(g.sub->x());
+  mul->y(g.sub->y());
+  loco::replace(g.sub).with(mul);
+
+  luci::ReplaceSubWithAddPass pass;
+  auto changed = pass.run(&g.g);
+
+  EXPECT_EQ(false, changed);
+}
diff --git a/compiler/luci/pass/src/RequantizePass.cpp b/compiler/luci/pass/src/RequantizePass.cpp
index 49fbf76ec..77c55324a 100644
--- a/compiler/luci/pass/src/RequantizePass.cpp
+++ b/compiler/luci/pass/src/RequantizePass.cpp
@@ -32,35 +32,9 @@ namespace luci
 namespace
 {
 
-// Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
-bool is_bias(CircleConst *node)
-{
-  if (node == nullptr)
-    return false;
-
-  auto succs = loco::succs(node);
-  if (succs.size() != 1) // assume bias is used by only one node
-    return false;
-
-  for (auto out : succs)
-  {
-    auto conv = dynamic_cast<CircleConv2D *>(out);
-    if (conv != nullptr && conv->bias() == node)
-      return true;
-
-    auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
-    if (dw_conv != nullptr && dw_conv->bias() == node)
-      return true;
-
-    auto fc = dynamic_cast<CircleFullyConnected *>(out);
-    if (fc != nullptr && fc->bias() == node)
-      return true;
-
-    // TODO: add TransposeConv when bias is supported in CircleTransposeConv
-  }
-  return false;
-}
-
+// Requantize Non-const node from int8 to uint8
+// Original values: -128 ~ 127
+// After requantization: 0 ~ 255
 void requant_nonconst_int8_to_uint8(CircleNode *circle_node)
 {
   assert(circle_node->dtype() == loco::DataType::S8);
@@ -105,99 +79,48 @@ void requant_const_int8_to_uint8(CircleConst *node)
   }
 }
 
+#define RETURN_UNLESS(cond) \
+  if (not(cond))            \
+    return;
+
 /**
- * @brief RequantizeNonConst requantizes tensors for activations
+ * @brief Requantize int8 quantized tensors to uint8 tensors
  */
-struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool>
+struct RequantizeS8ToU8 final : public luci::CircleNodeMutableVisitor<void>
 {
-  RequantizeNonConst(loco::DataType input, loco::DataType output)
-      : _input_type(input), _output_type(output)
-  {
-  }
-
-  loco::DataType _input_type;
-  loco::DataType _output_type;
-
-  // Requantize input tensors of each node
-  bool visit(luci::CircleNode *node)
+  // Requantize non-const tensors
+  void visit(luci::CircleNode *node)
   {
     LOGGER(l);
-    INFO(l) << "RequantizeNonConst visit node: " << node->name() << std::endl;
-    auto arity = node->arity();
-    for (uint32_t i = 0; i < arity; i++)
-    {
-      auto input_node = node->arg(i);
-      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+    INFO(l) << "RequantizeS8ToU8 visit non-const node: " << node->name() << std::endl;
 
-      // Check if this was quantized (only quantized tensors are requantized)
-      if (circle_node->quantparam() == nullptr)
-        continue;
+    // Ignore non-quantized tensors
+    RETURN_UNLESS(node->quantparam() != nullptr);
 
-      // Check if this is already requantized
-      if (circle_node->dtype() == _output_type)
-        continue;
+    // Check dtype is int8
+    RETURN_UNLESS(node->dtype() == loco::DataType::S8);
 
-      // Check if this is not const (only non-const is requantized in this function)
-      auto circle_const = dynamic_cast<CircleConst *>(circle_node);
-      if (circle_const != nullptr)
-        continue;
-
-      if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
-        requant_nonconst_int8_to_uint8(circle_node);
-    }
-    return false;
-  }
-};
-
-/**
- * @brief RequantizeConst requantizes tensors for weights
- */
-struct RequantizeConst final : public luci::CircleNodeMutableVisitor<bool>
-{
-  RequantizeConst(loco::DataType input, loco::DataType output)
-      : _input_type(input), _output_type(output)
-  {
+    requant_nonconst_int8_to_uint8(node);
   }
 
-  loco::DataType _input_type;
-  loco::DataType _output_type;
-
-  // Requantize input tensors of each node
-  bool visit(luci::CircleNode *node)
+  // Requantize const tensors
+  void visit(luci::CircleConst *node)
   {
     LOGGER(l);
-    INFO(l) << "RequantizeConst visit node: " << node->name() << std::endl;
-    auto arity = node->arity();
-    for (uint32_t i = 0; i < arity; i++)
-    {
-      auto input_node = node->arg(i);
-      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+    INFO(l) << "RequantizeS8ToU8 visit const node: " << node->name() << std::endl;
 
-      // Check if this was quantized (only quantized tensors are requantized)
-      if (circle_node->quantparam() == nullptr)
-        continue;
+    // Ignore non-quantized tensors
+    RETURN_UNLESS(node->quantparam() != nullptr);
 
-      // Check if this is already requantized
-      if (circle_node->dtype() == _output_type)
-        continue;
+    // Check dtype is int8
+    RETURN_UNLESS(node->dtype() == loco::DataType::S8);
 
-      // Check if this is const (only const is requantized in this function)
-      auto circle_const = dynamic_cast<CircleConst *>(circle_node);
-      if (circle_const == nullptr)
-        continue;
-
-      // Check if this is not bias
-      // bias is not requantized when int8 -> uint8
-      if (is_bias(circle_const))
-        continue;
-
-      if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
-        requant_const_int8_to_uint8(circle_const);
-    }
-    return false;
+    requant_const_int8_to_uint8(node);
   }
 };
 
+#undef RETURN_UNLESS
+
 } // namespace
 
 bool RequantizePass::run(loco::Graph *g)
@@ -205,20 +128,21 @@ bool RequantizePass::run(loco::Graph *g)
   LOGGER(l);
   INFO(l) << "RequantizePass Start" << std::endl;
 
-  // Requantize non-const (activations)
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  // Input: int8 model
+  // Output: uint8 model
+  if (_input_dtype == loco::DataType::S8 and _output_dtype == loco::DataType::U8)
   {
-    RequantizeNonConst rqnc(_input_dtype, _output_dtype);
-    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    circle_node->accept(&rqnc);
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      RequantizeS8ToU8 rq;
+      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+      circle_node->accept(&rq);
+    }
   }
-
-  // Requantize const (including weights, constants)
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  else
   {
-    RequantizeConst rqc(_input_dtype, _output_dtype);
-    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    circle_node->accept(&rqc);
+    // Ignore other cases
+    return false;
   }
 
   // Update output dtype
@@ -226,7 +150,8 @@ bool RequantizePass::run(loco::Graph *g)
   for (auto node : loco::output_nodes(g))
   {
     auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
-    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+    auto from_node = loco::must_cast<luci::CircleNode *>(circle_node->from());
+    if (from_node->dtype() == _output_dtype)
     {
       circle_node->dtype(_output_dtype);
       auto graph_output = graph_outputs->at(circle_node->index());
diff --git a/compiler/luci/pass/src/RequantizePass.test.cpp b/compiler/luci/pass/src/RequantizePass.test.cpp
new file mode 100644
index 000000000..a9293ce27
--- /dev/null
+++ b/compiler/luci/pass/src/RequantizePass.test.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RequantizePass.h"
+
+#include "helpers/CreateCircleConst.h"
+
+#include <luci/test/TestIOGraph.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleQuantParam.h>
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+using namespace luci::test;
+
+namespace
+{
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ * [IFM (S8)] [W (S8)] [B (S32)]
+ *       |       |        |
+ *       +-------+--------+
+ *               |
+ *               V
+ *              [FC]
+ *               |
+ *               V
+ *           [OFM(S8)]
+ *
+ *  AFTER
+ *
+ * [IFM (U8)] [W (U8)] [B (S32)]
+ *       |       |        |
+ *       +-------+--------+
+ *               |
+ *               V
+ *              [FC]
+ *               |
+ *               V
+ *           [OFM(U8)]
+ */
+struct S8FCGraphlet
+{
+public:
+  S8FCGraphlet() = default;
+  virtual ~S8FCGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 out_shape, const ShapeU32 w_shape,
+            const ShapeU32 bias_shape)
+  {
+    _fc = g->nodes()->create<CircleFullyConnected>();
+    _fc->input(_x);
+    _x->dtype(loco::DataType::S8);
+    {
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale.push_back(1.0);
+      quantparam->zerop.push_back(0);
+      quantparam->quantized_dimension = 0;
+      _x->quantparam(std::move(quantparam));
+    }
+
+    _weights = create_const_node<int8_t>(g, loco::DataType::S8, w_shape, 1.0);
+    {
+      auto w_qparam = std::make_unique<CircleQuantParam>();
+      std::vector<float> w_scale(_weights->dim(0).value(), 1.0);
+      std::vector<int64_t> w_zp(_weights->dim(0).value(), 0);
+      w_qparam->scale = w_scale;
+      w_qparam->zerop = w_zp;
+      w_qparam->quantized_dimension = 0;
+      _weights->quantparam(std::move(w_qparam));
+    }
+    _fc->weights(_weights);
+
+    _bias = create_const_node<int32_t>(g, loco::DataType::S32, bias_shape, 1.0);
+    {
+      auto b_qparam = std::make_unique<CircleQuantParam>();
+      const auto bias_size = _bias->size<loco::DataType::S32>();
+      std::vector<float> b_scale(bias_size, 1.0);
+      std::vector<int64_t> b_zp(bias_size, 0);
+      b_qparam->scale = b_scale;
+      b_qparam->zerop = b_zp;
+      b_qparam->quantized_dimension = 0;
+      _bias->quantparam(std::move(b_qparam));
+    }
+
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->dtype(loco::DataType::S8);
+    _fc->shape(out_shape);
+    _fc->bias(_bias);
+    _fc->name("fc");
+    {
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale.push_back(1.0);
+      quantparam->zerop.push_back(0);
+      quantparam->quantized_dimension = 0;
+      _fc->quantparam(std::move(quantparam));
+    }
+  }
+
+public:
+  CircleFullyConnected *_fc = nullptr;
+  CircleInput *_x = nullptr;
+  CircleConst *_weights = nullptr;
+  CircleConst *_bias = nullptr;
+};
+
+struct S8FCGraph final : public TestIGraphlet, public TestOGraphlet, public S8FCGraphlet
+{
+  void init(const ShapeU32 in_shape, const ShapeU32 w_shape, const ShapeU32 out_shape,
+            const ShapeU32 bias_shape)
+  {
+    TestIGraphlet::init(g(), in_shape);
+    TestOGraphlet::init(g(), out_shape);
+    _x = input();
+    S8FCGraphlet::init(g(), out_shape, w_shape, bias_shape);
+    output()->from(_fc);
+  }
+};
+
+class RequantizeS8ToU8FCTest : public ::testing::Test
+{
+public:
+  S8FCGraph g;
+};
+
+} // namespace
+
+TEST(RequantizePassTest, name)
+{
+  luci::RequantizePass pass(loco::DataType::FLOAT32, loco::DataType::U8);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(RequantizeS8ToU8FCTest, FC)
+{
+  g.init({1, 18, 80} /* ifm shape */, {256, 80} /* weights shape*/, {18, 256} /* ofm shape */,
+         {1, 256} /* bias shape*/);
+
+  luci::RequantizePass rq(loco::DataType::S8, loco::DataType::U8);
+  rq.run(g.g());
+
+  EXPECT_EQ(loco::DataType::U8, g._x->dtype());
+  EXPECT_EQ(loco::DataType::U8, g._fc->dtype());
+  EXPECT_EQ(loco::DataType::U8, g._weights->dtype());
+  EXPECT_EQ(loco::DataType::S32, g._bias->dtype());
+}
+
+TEST_F(RequantizeS8ToU8FCTest, FC_wrong_dtype_NEG)
+{
+  g.init({1, 18, 80} /* ifm shape */, {256, 80} /* weights shape*/, {18, 256} /* ofm shape */,
+         {1, 256} /* bias shape*/);
+
+  // Wrong dtype
+  luci::RequantizePass rq(loco::DataType::U8, loco::DataType::S8);
+  rq.run(g.g());
+
+  EXPECT_EQ(loco::DataType::S8, g._x->dtype());
+  EXPECT_EQ(loco::DataType::S8, g._fc->dtype());
+  EXPECT_EQ(loco::DataType::S8, g._weights->dtype());
+  EXPECT_EQ(loco::DataType::S32, g._bias->dtype());
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
index e52d667d7..9f7e2f17d 100644
--- a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
@@ -16,10 +16,11 @@
 
 #include "luci/Pass/ResolveCustomOpAddPass.h"
 
-#include "flatbuffers/flexbuffers.h"
-
 #include <luci/IR/CircleNodes.h>
 #include <luci/IR/AttrFusedActFunc.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <flatbuffers/flexbuffers.h>
 
 namespace
 {
@@ -67,10 +68,17 @@ bool resolve_with_BroadcastTo(luci::CircleCustom *addv2)
   auto input = loco::must_cast<const luci::CircleCustomOut *>(addv2->inputs(broadcastTo_idx));
   auto broadcastTo = loco::must_cast<luci::CircleCustom *>(input->input());
 
+  auto name = addv2->name();
+  assert(name.length() > 0);
+
   auto add = addv2->graph()->nodes()->create<luci::CircleAdd>();
   add->fusedActivationFunction(luci::FusedActFunc::NONE);
   add->x(addv2->inputs(1 - broadcastTo_idx));
   add->y(broadcastTo->inputs(0));
+  add->name(name + "/Add");
+  luci::add_origin(
+    add, luci::composite_origin({luci::get_origin(broadcastTo), luci::get_origin(addv2)}));
+
   auto customOut = loco::succs(addv2);
   assert(customOut.size() == 1);
   replace(*customOut.begin()).with(add);
@@ -86,13 +94,39 @@ bool resolve_custom_op(luci::CircleCustom *addv2)
   if (custom_code != "AddV2")
     return false;
 
+  if (addv2->numInputs() != 2)
+    return false;
+
+  // check if inputs are suppport data types
+  for (uint32_t i = 0; i < addv2->numInputs(); i++)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(addv2->inputs(i));
+    switch (input->dtype())
+    {
+      case loco::DataType::U8:
+      case loco::DataType::S8:
+      case loco::DataType::S16:
+      case loco::DataType::S32:
+      case loco::DataType::FLOAT32:
+        break;
+      default:
+        return false;
+    }
+  }
+
   if (resolve_with_BroadcastTo(addv2))
     return true;
 
+  auto name = addv2->name();
+  assert(name.length() > 0);
+
   auto add = addv2->graph()->nodes()->create<luci::CircleAdd>();
   add->fusedActivationFunction(luci::FusedActFunc::NONE);
   add->x(addv2->inputs(0));
   add->y(addv2->inputs(1));
+  add->name(name + "/Add");
+  luci::add_origin(add, luci::get_origin(addv2));
+
   auto customOut = loco::succs(addv2);
   assert(customOut.size() == 1);
   replace(*customOut.begin()).with(add);
@@ -115,7 +149,8 @@ bool ResolveCustomOpAddPass::run(loco::Graph *g)
     if (not cop)
       continue;
 
-    changed |= resolve_custom_op(cop);
+    if (resolve_custom_op(cop))
+      changed = true;
   }
 
   return changed;
diff --git a/compiler/luci/pass/src/ResolveCustomOpAddPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpAddPass.test.cpp
new file mode 100644
index 000000000..31c245b0e
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpAddPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpAddPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveCustomOpAddPassTest, name)
+{
+  luci::ResolveCustomOpAddPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
index 145e9cb62..7ebd7a429 100644
--- a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
@@ -16,9 +16,10 @@
 
 #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
 
-#include "flatbuffers/flexbuffers.h"
-
 #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <flatbuffers/flexbuffers.h>
 
 namespace
 {
@@ -30,6 +31,9 @@ bool resolve_custom_op(luci::CircleCustom *cop)
 
   if (custom_code == "BatchMatMulV2")
   {
+    auto name = cop->name();
+    assert(name.length() > 0);
+
     auto batch_matmul = cop->graph()->nodes()->create<luci::CircleBatchMatMul>();
     // input
     batch_matmul->x(cop->inputs(0));
@@ -39,10 +43,16 @@ bool resolve_custom_op(luci::CircleCustom *cop)
     auto map = flexbuffers::GetRoot(custom_options).AsMap();
     batch_matmul->adj_x(map["adj_x"].AsBool());
     batch_matmul->adj_y(map["adj_y"].AsBool());
+    batch_matmul->name(name + "/BatchMatMul");
+    luci::add_origin(batch_matmul, luci::get_origin(cop));
+
+    auto customOut = loco::succs(cop);
+    assert(customOut.size() == 1);
+    replace(*customOut.begin()).with(batch_matmul);
 
-    replace(cop).with(batch_matmul);
     return true;
   }
+
   return false;
 }
 
@@ -51,6 +61,27 @@ bool resolve_custom_op(luci::CircleCustom *cop)
 namespace luci
 {
 
+/**
+ *  BEFORE
+ *         |             |
+ *    [CircleNode]  [CircleNode]
+ *          \           /
+ *         [CircleCustom]("BatchMatMulV2")
+ *               |
+ *        [CircleCustomOut]
+ *               |
+ *          [CircleNode]
+ *               |
+ *
+ *  AFTER
+ *         |             |
+ *    [CircleNode]  [CircleNode]
+ *          \           /
+ *       [CircleBatchMatMul]
+ *               |
+ *          [CircleNode]
+ *               |
+ */
 bool ResolveCustomOpBatchMatMulPass::run(loco::Graph *g)
 {
   bool changed = false;
@@ -60,7 +91,8 @@ bool ResolveCustomOpBatchMatMulPass::run(loco::Graph *g)
     if (not cop)
       continue;
 
-    changed |= resolve_custom_op(cop);
+    if (resolve_custom_op(cop))
+      changed = true;
   }
 
   return changed;
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp
new file mode 100644
index 000000000..7ef61c253
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+#include <flatbuffers/flatbuffers.h>
+#include <flatbuffers/flexbuffers.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+const int N = 1;
+const int C = 2;
+const int H_X = 1;
+const int W_X = 4;
+const int H_Y = 4;
+const int W_Y = 4;
+
+/**
+ *  graph having Custom operator BatchMatMulV2
+ *
+ *  [CircleInput]  [CircleInput]
+ *         \         /
+ *       [CircleCustom]
+ *             |
+ *      [CircleCustomOut]
+ *             |
+ *       [CircleOutput]
+ */
+class BatchMatmulV2Graphlet
+{
+public:
+  BatchMatmulV2Graphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    // custom option
+    auto flatbuffer_builder =
+      std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+    auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+    size_t map_start = flex_buffers->StartMap();
+    flex_buffers->Bool("adj_x", false);
+    flex_buffers->Bool("adj_y", false);
+    flex_buffers->Int("T", 0 /* circle::TensorType_FLOAT32 */);
+    flex_buffers->EndMap(map_start);
+    flex_buffers->Finish();
+
+    // CircleCustom(BatchMatMulV2, adj_x=False, adj_y=False)
+    _batchmatmulv2 = g->nodes()->create<luci::CircleCustom>(2, 1);
+    _batchmatmulv2->custom_code("BatchMatMulV2");
+    _batchmatmulv2->custom_options(flex_buffers->GetBuffer());
+    _batchmatmulv2->shape({N, C, H_X, W_Y});
+    _batchmatmulv2->dtype(loco::DataType::FLOAT32);
+    _batchmatmulv2->name("batchmatmulv2");
+
+    // CircleCustomOut
+    _batchmatmulv2_out = g->nodes()->create<luci::CircleCustomOut>();
+    _batchmatmulv2_out->shape({N, C, H_X, W_Y});
+    _batchmatmulv2_out->dtype(loco::DataType::FLOAT32);
+    _batchmatmulv2_out->index(0);
+  }
+
+public:
+  luci::CircleCustom *batchmatmulv2() { return _batchmatmulv2; }
+
+protected:
+  luci::CircleCustom *_batchmatmulv2 = nullptr;
+  luci::CircleCustomOut *_batchmatmulv2_out = nullptr;
+};
+
+class BatchMatmulV2Graph : public TestIsGraphlet<2>,
+                           public TestOGraphlet,
+                           public BatchMatmulV2Graphlet
+{
+public:
+  BatchMatmulV2Graph() = default;
+
+  void init(void)
+  {
+    TestIsGraphlet<2>::init(g(), {{N, C, H_X, W_X}, {N, C, H_X, W_X}});
+    TestOGraphlet::init(g(), {N, C, H_X, W_Y});
+    BatchMatmulV2Graphlet::init(g());
+
+    // TODO how set multiple of shape vector for TestIsGraphlet?
+    // update shape for second input
+    input(1)->shape({N, C, H_Y, W_Y});
+
+    // connect graph
+    _batchmatmulv2->inputs(0, input(0));
+    _batchmatmulv2->inputs(1, input(1));
+    _batchmatmulv2_out->input(_batchmatmulv2);
+
+    output()->from(_batchmatmulv2_out);
+  }
+};
+
+class BatchMatmulV2GraphTest : public ::testing::Test
+{
+public:
+  BatchMatmulV2Graph g;
+  luci::ResolveCustomOpBatchMatMulPass pass;
+};
+
+} // namespace
+
+TEST(ResolveCustomOpBatchMatMulPassTest, name)
+{
+  luci::ResolveCustomOpBatchMatMulPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+/**
+ *  Optimized graph looks like below.
+ *
+ *  [CircleInput]
+ *        |
+ *  [CircleBatchMatMul]
+ *        |
+ *  [CircleOutput]
+ */
+TEST_F(BatchMatmulV2GraphTest, simple_test)
+{
+  g.init();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto batchmatmul = dynamic_cast<luci::CircleBatchMatMul *>(g.output()->from());
+  EXPECT_NE(nullptr, batchmatmul);
+
+  auto input_0 = dynamic_cast<luci::CircleInput *>(batchmatmul->x());
+  auto input_1 = dynamic_cast<luci::CircleInput *>(batchmatmul->y());
+  EXPECT_NE(nullptr, input_0);
+  EXPECT_NE(nullptr, input_1);
+}
+
+TEST_F(BatchMatmulV2GraphTest, wrong_condition_NEG)
+{
+  g.init();
+
+  // wrong custom code
+  g.batchmatmulv2()->custom_code("BatchMatMulv2"); // v is lower case
+  auto ret = pass.run(g.g());
+
+  EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
index 547fd22fc..add55f66c 100644
--- a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
@@ -16,62 +16,20 @@
 
 #include "luci/Pass/ResolveCustomOpMatMulPass.h"
 
-#include "flatbuffers/flexbuffers.h"
+#include "helpers/CreateCircleConst.h"
+
 #include <loco/IR/DataTypeTraits.h>
 
 #include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
 
 #include <loco.h>
 #include <oops/InternalExn.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/TypeInference.h>
 
-namespace
-{
+#include <flatbuffers/flexbuffers.h>
 
-template <typename T>
-luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
-                                     const std::vector<uint32_t> &shape,
-                                     const std::vector<T> &values)
+namespace
 {
-  auto node = g->nodes()->create<luci::CircleConst>();
-  node->dtype(dtype);
-  node->rank(shape.size());
-
-  uint32_t size = 1;
-  for (uint32_t i = 0; i < shape.size(); ++i)
-  {
-    node->dim(i) = shape.at(i);
-    size *= shape.at(i);
-  }
-
-#define INIT_VALUES(DT)                          \
-  {                                              \
-    node->size<DT>(size);                        \
-    for (uint32_t i = 0; i < values.size(); ++i) \
-      node->at<DT>(i) = values[i];               \
-  }
-
-  switch (dtype)
-  {
-    case loco::DataType::U8:
-      INIT_VALUES(loco::DataType::U8);
-      break;
-    case loco::DataType::S16:
-      INIT_VALUES(loco::DataType::S16);
-      break;
-    case loco::DataType::S32:
-      INIT_VALUES(loco::DataType::S32);
-      break;
-    case loco::DataType::FLOAT32:
-      INIT_VALUES(loco::DataType::FLOAT32)
-      break;
-    default:
-      INTERNAL_EXN("create_const_node called with unsupported type");
-      break;
-  }
-  return node;
-}
 
 bool resolve_matmul(luci::CircleCustom *cop)
 {
@@ -90,6 +48,9 @@ bool resolve_matmul(luci::CircleCustom *cop)
   const auto S32 = loco::DataType::S32;
   const auto FLOAT32 = loco::DataType::FLOAT32;
 
+  auto name = cop->name();
+  assert(name.length() > 0);
+
   bool transpose_a = map["transpose_a"].AsBool();
   bool transpose_b = map["transpose_b"].AsBool();
 
@@ -97,34 +58,39 @@ bool resolve_matmul(luci::CircleCustom *cop)
   loco::Node *rhs = cop->inputs(1);
 
   // Check that the type of the first input is known
-  CHECK_OR_FALSE(loco::dtype_known(lhs));
-  auto lhs_dtype = loco::dtype_get(cop->inputs(0));
+  auto lhs_dtype = loco::must_cast<luci::CircleNode *>(cop->inputs(0))->dtype();
+  CHECK_OR_FALSE(lhs_dtype != loco::DataType::Unknown);
 
   // If transpose of first input is requested, its shape must be known
-  CHECK_OR_FALSE(!transpose_a || loco::shape_known(lhs));
+  auto circle_lhs = loco::must_cast<luci::CircleNode *>(lhs);
+  CHECK_OR_FALSE(!transpose_a || circle_lhs->shape_status() == luci::ShapeStatus::VALID);
   // and its rank should be at least 2
-  CHECK_OR_FALSE(!transpose_a || loco::shape_get(lhs).as<loco::TensorShape>().rank() >= 2);
+  CHECK_OR_FALSE(!transpose_a || circle_lhs->rank() >= 2);
   // Check that the shape of the 2nd input is known
-  CHECK_OR_FALSE(loco::shape_known(rhs));
+  auto circle_rhs = loco::must_cast<luci::CircleNode *>(rhs);
+  CHECK_OR_FALSE(circle_rhs->shape_status() == luci::ShapeStatus::VALID);
   // TODO as of 06/23/20 TFLite only supports rank 2 for 2nd input. Fix this once that changes!
-  CHECK_OR_FALSE(loco::shape_get(rhs).as<loco::TensorShape>().rank() == 2);
+  CHECK_OR_FALSE(circle_rhs->rank() == 2);
   // Check that input data type is supported
   CHECK_OR_THROW(lhs_dtype == U8 || lhs_dtype == S16 || lhs_dtype == FLOAT32,
                  "Only UInt8, Int16 and Float32 data types are supported by MatMul");
 
   if (transpose_a)
   {
-    auto a_shape = loco::shape_get(lhs).as<loco::TensorShape>();
     // Create a permutation constant node
-    std::vector<uint32_t> perm;
-    for (uint32_t i = 0; i < a_shape.rank(); ++i)
+    std::vector<int32_t> perm;
+    const auto lhs_rank = static_cast<int32_t>(circle_lhs->rank());
+    for (int32_t i = 0; i < lhs_rank; ++i)
       perm.push_back(i);
-    std::swap(perm[a_shape.rank() - 1], perm[a_shape.rank() - 2]);
-    auto perm_node = create_const_node(graph, S32, {a_shape.rank()}, perm);
+    std::swap(perm[circle_lhs->rank() - 1], perm[circle_lhs->rank() - 2]);
+    auto perm_node = luci::create_const_node(graph, S32, {circle_lhs->rank()}, perm);
+    perm_node->name(name + "/lhs/Transpose/perm");
     // Now make a transpose node
     auto transpose_node = graph->nodes()->create<luci::CircleTranspose>();
     transpose_node->a(lhs);
     transpose_node->perm(perm_node);
+    transpose_node->name(name + "/lhs/Transpose");
+    luci::add_origin(transpose_node, luci::get_origin(cop));
     lhs = transpose_node;
   }
 
@@ -133,26 +99,30 @@ bool resolve_matmul(luci::CircleCustom *cop)
   // in row-major order, thus we need to convert between them.
   if (!transpose_b)
   {
-    const std::vector<uint32_t> perm{1, 0};
-    auto perm_node = create_const_node(graph, S32, {2}, perm);
+    const std::vector<int32_t> perm{1, 0};
+    auto perm_node = luci::create_const_node(graph, S32, {2}, perm);
+    perm_node->name(name + "/rhs/Transpose/perm");
     auto transpose_node = graph->nodes()->create<luci::CircleTranspose>();
     transpose_node->a(rhs);
     transpose_node->perm(perm_node);
+    transpose_node->name(name + "/rhs/Transpose");
+    luci::add_origin(transpose_node, luci::get_origin(cop));
     rhs = transpose_node;
   }
 
-  // Make a constant zero-filled bias node
-  auto b_shape = loco::shape_get(cop->inputs(1)).as<loco::TensorShape>();
-  uint32_t bias_size = b_shape.dim(transpose_b ? 1 : 0).value();
-  const std::vector<float> val(bias_size, .0f);
-  auto bias_node = create_const_node(graph, lhs_dtype, {bias_size}, val);
+  auto empty_bias = graph->nodes()->create<luci::CircleOutputExclude>();
+
   auto fc_node = graph->nodes()->create<luci::CircleFullyConnected>();
   fc_node->input(lhs);
   fc_node->weights(rhs);
-  fc_node->bias(bias_node);
+  fc_node->bias(empty_bias);
   fc_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fc_node->name(name + "/FullyConnected");
+  luci::add_origin(fc_node, luci::get_origin(cop));
 
-  replace(cop).with(fc_node);
+  auto customOut = loco::succs(cop);
+  assert(customOut.size() == 1);
+  replace(*customOut.begin()).with(fc_node);
   return true;
 }
 
diff --git a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.test.cpp
new file mode 100644
index 000000000..c4ea3ea06
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpMatMulPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveCustomOpMatMulPassTest, name)
+{
+  luci::ResolveCustomOpMatMulPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp
new file mode 100644
index 000000000..7c038d56d
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp
@@ -0,0 +1,905 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
+
+#include <loco/IR/DataTypeTraits.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <loco.h>
+#include <oops/InternalExn.h>
+#include <limits> // std::numeric_limits
+
+#include <flatbuffers/flexbuffers.h>
+
+namespace
+{
+
+template <typename T> std::vector<T> to_vector(const flexbuffers::TypedVector &typed_vec)
+{
+  std::vector<T> answer(typed_vec.size());
+
+  for (uint32_t i = 0; i < answer.size(); ++i)
+  {
+    answer[i] = typed_vec[i].As<T>();
+  }
+
+  return answer;
+}
+
+luci::Padding string_to_padding(const std::string &pad_str)
+{
+  if (pad_str == "VALID")
+    return luci::Padding::VALID;
+  if (pad_str == "SAME")
+    return luci::Padding::SAME;
+
+  return luci::Padding::UNDEFINED;
+}
+
+template <typename NodeT> void set_stride(NodeT *node, const luci::Stride &stride)
+{
+  node->stride()->h(stride.h());
+  node->stride()->w(stride.w());
+}
+
+template <typename NodeT> void set_filter(NodeT *node, const luci::Filter &filter)
+{
+  node->filter()->h(filter.h());
+  node->filter()->w(filter.w());
+}
+
+void init_name_and_origin(luci::CircleNode *node, const std::string &name,
+                          const std::shared_ptr<luci::CircleNodeOrigin> &origin)
+{
+  node->name(name);
+  luci::add_origin(node, origin);
+}
+
+template <typename NodeT> NodeT *none_act_func(NodeT *node)
+{
+  node->fusedActivationFunction(luci::FusedActFunc::NONE);
+  return node;
+}
+
+luci::CircleCast *create_cast(luci::CircleNode *input, loco::DataType in_type,
+                              loco::DataType out_type)
+{
+  auto cast = input->graph()->nodes()->create<luci::CircleCast>();
+
+  cast->in_data_type(in_type);
+  cast->out_data_type(out_type);
+  cast->dtype(out_type);
+
+  cast->x(input);
+
+  return cast;
+}
+
+template <loco::DataType DT> void fill_conv_weights(luci::CircleConst *weights)
+{
+  assert(weights->rank() == 4);
+
+  auto const kn = weights->dim(0).value();
+  auto const kh = weights->dim(1).value();
+  auto const kw = weights->dim(2).value();
+
+  auto elements_size = kn * kh * kw * 1;
+  weights->size<DT>(elements_size);
+
+  for (uint32_t b = 0; b < kn; ++b)
+  {
+    for (uint32_t y = 0; y < kh; ++y)
+    {
+      for (uint32_t x = 0; x < kw; ++x)
+      {
+        auto const idx = (b * kh + y) * kw + x;
+        weights->at<DT>(idx) = (y * kw + x == b) ? 1 : 0;
+      }
+    }
+  }
+}
+
+luci::CircleConst *create_conv_filter(loco::Graph *graph, const uint32_t kh, const uint32_t kw,
+                                      const uint32_t kn)
+{
+  auto weights = graph->nodes()->create<luci::CircleConst>();
+
+  weights->dtype(loco::DataType::FLOAT32);
+
+  weights->rank(4);
+  weights->dim(0).set(kn);
+  weights->dim(1).set(kh);
+  weights->dim(2).set(kw);
+  weights->dim(3).set(1);
+  weights->shape_status(luci::ShapeStatus::VALID);
+
+  fill_conv_weights<loco::DataType::FLOAT32>(weights);
+
+  return weights;
+}
+
+template <loco::DataType DT> void fill_zero_bias(luci::CircleConst *bias)
+{
+  assert(bias->rank() == 1);
+
+  auto const depth = bias->dim(0).value();
+
+  bias->size<DT>(depth);
+
+  for (uint32_t i = 0; i < depth; ++i)
+  {
+    bias->at<DT>(i) = 0;
+  }
+}
+
+luci::CircleConst *create_zero_bias(loco::Graph *graph, uint32_t depth)
+{
+  auto bias = graph->nodes()->create<luci::CircleConst>();
+
+  bias->dtype(loco::DataType::FLOAT32);
+
+  bias->rank(1);
+  bias->dim(0).set(depth);
+
+  fill_zero_bias<loco::DataType::FLOAT32>(bias);
+
+  return bias;
+}
+
+luci::CircleConst *create_padding_const(loco::Graph *graph, int32_t left_pad, int32_t right_pad,
+                                        int32_t top_pad, int32_t bottom_pad)
+{
+  auto paddings = graph->nodes()->create<luci::CircleConst>();
+
+  paddings->dtype(loco::DataType::S32);
+
+  paddings->rank(2);
+  paddings->dim(0).set(4);
+  paddings->dim(1).set(2);
+  paddings->size<loco::DataType::S32>(8);
+  paddings->shape_status(luci::ShapeStatus::VALID);
+
+  paddings->at<loco::DataType::S32>(0) = 0;
+  paddings->at<loco::DataType::S32>(1) = 0;
+
+  paddings->at<loco::DataType::S32>(2) = left_pad;
+  paddings->at<loco::DataType::S32>(3) = right_pad;
+
+  paddings->at<loco::DataType::S32>(4) = top_pad;
+  paddings->at<loco::DataType::S32>(5) = bottom_pad;
+
+  paddings->at<loco::DataType::S32>(6) = 0;
+  paddings->at<loco::DataType::S32>(7) = 0;
+
+  return paddings;
+}
+
+template <loco::DataType DT, typename Numeric>
+luci::CircleConst *create_scalar(loco::Graph *graph, Numeric value)
+{
+  auto scalar = graph->nodes()->create<luci::CircleConst>();
+
+  scalar->dtype(DT);
+
+  scalar->rank(0);
+  scalar->size<DT>(1);
+  scalar->shape_status(luci::ShapeStatus::VALID);
+
+  scalar->scalar<DT>() = value;
+
+  return scalar;
+}
+
+luci::CircleConst *create_shape_tensor(loco::Graph *graph, const std::vector<uint32_t> &dims_vec)
+{
+  auto shape = graph->nodes()->create<luci::CircleConst>();
+
+  shape->dtype(loco::DataType::S32);
+
+  shape->rank(1);
+  shape->dim(0).set(dims_vec.size());
+  shape->shape_status(luci::ShapeStatus::VALID);
+
+  shape->size<loco::DataType::S32>(dims_vec.size());
+
+  for (uint32_t i = 0; i < dims_vec.size(); ++i)
+  {
+    shape->at<loco::DataType::S32>(i) = dims_vec[i];
+  }
+
+  return shape;
+}
+
+int32_t compute_full_padding(int32_t input_size, int32_t output_size, int32_t stride,
+                             int32_t filter_size)
+{
+  int32_t effective_input = (output_size - 1) * stride + filter_size;
+  int32_t full = effective_input - input_size;
+  // some extreme cases when part of input was not used in computations
+  if (full < 0)
+    full = 0;
+  return full;
+}
+
+template <loco::DataType DT>
+void fill_coords_addition(luci::Padding padding, const luci::Stride &stride,
+                          const luci::Filter &filter, uint32_t input_height, uint32_t input_width,
+                          uint32_t depth, luci::CircleConst *cords)
+{
+  assert(cords->rank() == 4);
+
+  auto const output_height = static_cast<int32_t>(cords->dim(1).value());
+  auto const output_width = static_cast<int32_t>(cords->dim(2).value());
+  {
+    auto const element_counts = 1 * output_height * output_width * 1;
+    cords->size<DT>(element_counts);
+  }
+
+  assert(padding != luci::Padding::UNDEFINED);
+
+  // For VALID padding:
+  int32_t start_y = 0;
+  int32_t start_x = 0;
+
+  // For SAME padding:
+  if (padding == luci::Padding::SAME)
+  {
+    start_y = -compute_full_padding(input_height, output_height, stride.h(), filter.h()) / 2;
+    start_x = -compute_full_padding(input_width, output_width, stride.w(), filter.w()) / 2;
+  }
+
+  auto const step_y = static_cast<int32_t>(stride.h());
+  auto const step_x = static_cast<int32_t>(stride.w());
+
+  for (int32_t y_o = 0, y_i = start_y; y_o < output_height; ++y_o, y_i += step_y)
+  {
+    for (int32_t x_o = 0, x_i = start_x; x_o < output_width; ++x_o, x_i += step_x)
+    {
+      auto const output_idx = y_o * output_width + x_o;
+      auto const input_idx = y_i * static_cast<int32_t>(input_width) + x_i;
+
+      // Add small adjustment value to fix cast operation result that follows "coord addition"
+      // in generated subgraph.
+      //
+      // Cast operation discards fractional part of value, so 1.9996 will be transformed to 1
+      // This is not a problem when working with float32, because it represents integers precisely,
+      // but leads to wrong results, when working with quantized numbers.
+      //
+      // This value is larger than quantization error,
+      // and small enough to not affect following computations
+      // (in particular multiplication with depth)
+      const float round_adjustment = 1.0f / (depth + 1);
+
+      cords->at<DT>(output_idx) = input_idx + round_adjustment;
+    }
+  }
+}
+
+luci::CircleConst *create_coords_addition(loco::Graph *graph, luci::Padding padding,
+                                          const luci::Stride &stride, const luci::Filter &filter,
+                                          uint32_t input_height, uint32_t input_width,
+                                          uint32_t depth, uint32_t output_height,
+                                          uint32_t output_width)
+{
+  auto cords = graph->nodes()->create<luci::CircleConst>();
+
+  cords->dtype(loco::DataType::FLOAT32);
+
+  cords->rank(4);
+  cords->dim(0).set(1);
+  cords->dim(1).set(output_height);
+  cords->dim(2).set(output_width);
+  cords->dim(3).set(1);
+
+  fill_coords_addition<loco::DataType::FLOAT32>(padding, stride, filter, input_height, input_width,
+                                                depth, cords);
+
+  return cords;
+}
+
+luci::CircleNode *get_custom_output(const luci::CircleCustom *cop, int32_t idx)
+{
+  auto const outputs = loco::succs(cop);
+  assert(outputs.size() == 2);
+
+  auto output = loco::must_cast<luci::CircleCustomOut *>(*outputs.begin());
+  if (output->index() != idx)
+  {
+    output = loco::must_cast<luci::CircleCustomOut *>(*outputs.rbegin());
+  }
+
+  return output;
+}
+
+luci::CircleNode *max_pool_branch(luci::Padding padding, const luci::Stride &stride,
+                                  const luci::Filter filter, luci::CircleCustom *cop)
+{
+  auto graph = cop->graph();
+  auto input = cop->inputs(0);
+
+  auto origin = luci::get_origin(cop);
+  auto name = cop->name() + "/Argmax";
+
+  // Create MaxPool
+  auto maxpool = none_act_func(graph->nodes()->create<luci::CircleMaxPool2D>());
+  {
+    init_name_and_origin(maxpool, name + "/MaxPool2D", origin);
+
+    set_stride(maxpool, stride);
+    set_filter(maxpool, filter);
+    maxpool->padding(padding);
+
+    maxpool->value(input);
+  }
+
+  return maxpool;
+}
+
+luci::CircleNode *window_flattened_coord(const std::string &name, luci::Padding padding,
+                                         const luci::Stride &stride, const luci::Filter filter,
+                                         int32_t input_height, int32_t input_width,
+                                         uint32_t output_height, uint32_t output_width,
+                                         luci::CircleNode *input)
+{
+  auto const graph = input->graph();
+  auto const origin = luci::get_origin(input);
+
+  auto const depth_dimension = 3;
+
+  // Create pad in case of SAME padding
+  luci::CircleNode *conv_input = input;
+  if (padding == luci::Padding::SAME)
+  {
+    // Create redundant add to combine two nodes with special quantization restrictions:
+    // PadV2 and Split in this case
+    // TODO Introduce special requantize node and fix quantizer?
+    auto requantize = none_act_func(graph->nodes()->create<luci::CircleMul>());
+    init_name_and_origin(requantize, name + "/Requantize", origin);
+    auto zero_const = create_scalar<loco::DataType::FLOAT32>(graph, 1.0f);
+    init_name_and_origin(zero_const, name + "Requantize_const", origin);
+
+    requantize->x(input);
+    requantize->y(zero_const);
+
+    auto pad = graph->nodes()->create<luci::CirclePadV2>();
+    init_name_and_origin(pad, name + "/Pad", origin);
+
+    pad->input(requantize);
+
+    int32_t full_w_pad = compute_full_padding(input_width, output_width, stride.w(), filter.w());
+    int32_t full_h_pad = compute_full_padding(input_height, output_height, stride.h(), filter.h());
+    int32_t left_pad = full_w_pad / 2;
+    int32_t right_pad = full_w_pad - left_pad;
+    int32_t top_pad = full_h_pad / 2;
+    int32_t bottom_pad = full_h_pad - top_pad;
+    auto padding_const = create_padding_const(graph, left_pad, right_pad, top_pad, bottom_pad);
+    init_name_and_origin(padding_const, name + "/Pad_shape", origin);
+    pad->paddings(padding_const);
+
+    auto padding_value =
+      create_scalar<loco::DataType::FLOAT32, float>(graph, std::numeric_limits<float>::lowest());
+    init_name_and_origin(padding_value, name + "/Pad_value", origin);
+    pad->constant_values(padding_value);
+
+    conv_input = pad;
+  }
+  // Create Conv2D to move spatial dimensions to depth
+  auto conv = none_act_func(graph->nodes()->create<luci::CircleConv2D>());
+  {
+    init_name_and_origin(conv, name + "/Conv2D", origin);
+
+    // Padding, Stride and kernel size equal to MaxPool's
+    set_stride(conv, stride);
+    conv->padding(luci::Padding::VALID);
+
+    // depth of kernel is equal to square size
+    auto const kh = filter.h();
+    auto const kw = filter.w();
+    auto const kd = kh * kw;
+
+    // use zero bias
+    auto bias = create_zero_bias(graph, kd);
+    init_name_and_origin(bias, conv->name() + "/Bias", origin);
+
+    // create filter
+    // TODO make shared
+    auto weights = create_conv_filter(graph, kh, kw, kd);
+    init_name_and_origin(weights, conv->name() + "/Weights", origin);
+
+    conv->bias(bias);
+    conv->filter(weights);
+    conv->input(conv_input);
+  }
+
+  // Create ArgMax
+  auto argmax = graph->nodes()->create<luci::CircleArgMax>();
+  {
+    init_name_and_origin(argmax, name + "/ArgMax", origin);
+
+    argmax->output_type(loco::DataType::S32);
+
+    // Create argmax_dim
+    auto argmax_dim = create_scalar<loco::DataType::S32>(graph, depth_dimension);
+    init_name_and_origin(argmax_dim, argmax->name() + "/Dimension", origin);
+
+    argmax->dimension(argmax_dim);
+    argmax->input(conv);
+  }
+
+  // Create Reshape to 4-rank back, because argmax decrease rank of tensor by 1
+  auto reshape = graph->nodes()->create<luci::CircleReshape>();
+  {
+    init_name_and_origin(reshape, name + "/Reshape", origin);
+
+    auto shape = create_shape_tensor(graph, {1, output_height, output_width, 1});
+    init_name_and_origin(shape, reshape->name() + "/Shape", origin);
+
+    reshape->tensor(argmax);
+    reshape->shape(shape);
+  }
+
+  // Create Cast to use float32 instead int32
+  auto argmax_cast = create_cast(reshape, loco::DataType::S32, loco::DataType::FLOAT32);
+  init_name_and_origin(argmax_cast, argmax->name() + "/Cast", origin);
+
+  return argmax_cast;
+}
+
+// Creates "identity operation" after Floor
+// to force circle-quantizer requantize output tensor with scale << 1.
+//
+// Dealing with values of extremely different scales
+// in following binary operations hurts backend precision.
+luci::CircleNode *create_post_floor_requantize_node(luci::CircleFloor *floor)
+{
+  auto graph = floor->graph();
+  auto const origin = luci::get_origin(floor);
+  auto name = floor->name();
+
+  // Use DepthwiseConv2D with identity filter as an "identity operation".
+  //
+  // This operation do not change values, but forces circle-quantizer to use
+  // statistics to compute qparam scale instead of fixed scale == 1.0 after floor.
+  // DepthwiseConv2d is not eliminated by optimizations,
+  // so desired scale will reach backend.
+  auto requantizer = none_act_func(graph->nodes()->create<luci::CircleDepthwiseConv2D>());
+  init_name_and_origin(requantizer, name + "/Requantizer", origin);
+
+  requantizer->input(floor);
+
+  auto requantizer_filter = create_scalar<loco::DataType::FLOAT32>(graph, 1.0f);
+  init_name_and_origin(requantizer_filter, name + "/Requantizer/filter", origin);
+  requantizer_filter->rank(4);
+  for (uint32_t i = 0; i < 4; ++i)
+  {
+    requantizer_filter->dim(i) = 1;
+  }
+  requantizer->filter(requantizer_filter);
+
+  auto requantizer_bias = create_zero_bias(graph, 1);
+  init_name_and_origin(requantizer_bias, name + "/Requantizer/bias", origin);
+  requantizer->bias(requantizer_bias);
+
+  requantizer->padding(luci::Padding::VALID);
+  requantizer->stride()->w(1);
+  requantizer->stride()->h(1);
+  requantizer->depthMultiplier(1);
+  requantizer->dilation()->w(1);
+  requantizer->dilation()->h(1);
+
+  return requantizer;
+}
+
+luci::CircleNode *window_y_coord(const std::string &name, const luci::Filter &filter,
+                                 luci::CircleNode *flattened)
+{
+  auto const graph = flattened->graph();
+  auto const origin = luci::get_origin(flattened);
+
+  auto div = none_act_func(graph->nodes()->create<luci::CircleMul>());
+  {
+    init_name_and_origin(div, name + "/Div", origin);
+
+    // Adjustment_coeff is needed to fix computation of quantized tensors
+    //
+    // For example float32 value 2.0 could be quantized to 1.996
+    // after floor it will be transformed to 1.0, but desired answer is still something close to 2.0
+    //
+    // rounding_adjustment is chosen so it is small enough to not affect float32 computations,
+    // but "Div" change is larger then potential quantization error.
+    //
+    // This computation exploits the fact that div is an x coord in maxpool window,
+    // and lies in defined range [0, filter.h())
+    const float rounding_adjustment = 1.0f / (filter.w() * filter.h());
+    const float divider_value = filter.w() - rounding_adjustment;
+    auto divider = create_scalar<loco::DataType::FLOAT32>(graph, 1.0f / divider_value);
+    init_name_and_origin(divider, div->name() + "/Divider", origin);
+
+    div->x(flattened);
+    div->y(divider);
+  }
+
+  auto floor = graph->nodes()->create<luci::CircleFloor>();
+  {
+    init_name_and_origin(floor, name + "/Floor", origin);
+    floor->x(div);
+  }
+
+  auto requantizer = create_post_floor_requantize_node(floor);
+
+  return requantizer;
+}
+
+luci::CircleNode *window_x_coord(const std::string &name, float filter_width,
+                                 luci::CircleNode *flattened, luci::CircleNode *y_coord)
+{
+  auto const graph = flattened->graph();
+  auto const origin = luci::get_origin(flattened);
+
+  auto mod = none_act_func(graph->nodes()->create<luci::CircleAdd>());
+  {
+    init_name_and_origin(mod, name + "/Mod", origin);
+
+    auto neg = graph->nodes()->create<luci::CircleNeg>();
+    {
+      init_name_and_origin(neg, mod->name() + "/Neg", origin);
+
+      auto mul = none_act_func(graph->nodes()->create<luci::CircleMul>());
+      {
+        init_name_and_origin(mul, neg->name() + "/Neg", origin);
+
+        auto multipler = create_scalar<loco::DataType::FLOAT32>(graph, filter_width);
+        init_name_and_origin(multipler, mul->name() + "/Multipler", origin);
+
+        mul->x(y_coord);
+        mul->y(multipler);
+      }
+
+      neg->x(mul);
+    }
+
+    mod->x(flattened);
+    mod->y(neg);
+  }
+
+  return mod;
+}
+
+luci::CircleNode *plane_flattened_coord(const std::string &name, uint32_t input_width,
+                                        luci::CircleNode *y_coord, luci::CircleNode *x_coord,
+                                        luci::CircleNode *corners)
+{
+  auto const graph = corners->graph();
+  auto const origin = luci::get_origin(corners);
+
+  auto add = none_act_func(graph->nodes()->create<luci::CircleAdd>());
+  {
+    init_name_and_origin(add, name + "/Add", origin);
+
+    auto addition = none_act_func(graph->nodes()->create<luci::CircleAdd>());
+    {
+      init_name_and_origin(addition, add->name() + "/Add", origin);
+
+      auto y_addition = none_act_func(graph->nodes()->create<luci::CircleMul>());
+      {
+        init_name_and_origin(y_addition, addition->name() + "/Mul", origin);
+
+        auto width_scalar = create_scalar<loco::DataType::FLOAT32>(graph, input_width);
+        init_name_and_origin(width_scalar, y_addition->name() + "/Const", origin);
+
+        y_addition->x(y_coord);
+        y_addition->y(width_scalar);
+      }
+
+      addition->x(x_coord);
+      addition->y(y_addition);
+    }
+
+    add->x(addition);
+    add->y(corners);
+  }
+
+  return add;
+}
+
+luci::CircleNode *volume_flattened_coords(const std::string &name, uint32_t channel,
+                                          uint32_t input_depth, luci::CircleNode *plane)
+{
+  auto const graph = plane->graph();
+  auto const origin = luci::get_origin(plane);
+
+  // Create Mul
+  auto mul = none_act_func(graph->nodes()->create<luci::CircleMul>());
+  {
+    init_name_and_origin(mul, name + "/Mul", origin);
+
+    auto depth_scalar = create_scalar<loco::DataType::FLOAT32>(graph, input_depth);
+    init_name_and_origin(depth_scalar, mul->name() + "/Const", origin);
+
+    mul->x(plane);
+    mul->y(depth_scalar);
+  }
+
+  luci::CircleNode *volume = mul;
+
+  // Add channel number to output
+  if (channel > 0)
+  {
+    // Create Add
+    auto add_ch = none_act_func(graph->nodes()->create<luci::CircleAdd>());
+    init_name_and_origin(add_ch, name + "/Add_Channel", origin);
+
+    auto channel_scalar = create_scalar<loco::DataType::FLOAT32>(graph, channel);
+    init_name_and_origin(channel_scalar, add_ch->name() + "/Const", origin);
+
+    add_ch->x(mul);
+    add_ch->y(channel_scalar);
+
+    volume = add_ch;
+  }
+
+  return volume;
+}
+
+luci::CircleNode *argmax_branch(luci::Padding padding, const luci::Stride &stride,
+                                const luci::Filter filter, luci::CircleCustom *cop)
+{
+  auto graph = cop->graph();
+  auto input = loco::must_cast<luci::CircleNode *>(cop->inputs(0));
+  auto output = get_custom_output(cop, 1);
+
+  auto const depth_dimension = 3;
+  auto const input_depth = input->dim(depth_dimension).value();
+  auto const input_height = input->dim(1).value();
+  auto const input_width = input->dim(2).value();
+
+  assert(output->rank() == 4);
+  auto const output_height = output->dim(1).value();
+  auto const output_width = output->dim(2).value();
+
+  auto origin = luci::get_origin(cop);
+  auto name = cop->name() + "/Argmax";
+
+  // Create Split
+  auto split = graph->nodes()->create<luci::CircleSplit>();
+  {
+    init_name_and_origin(split, name + "/Split", origin);
+
+    // Create split_dim
+    auto split_dim = create_scalar<loco::DataType::S32>(graph, depth_dimension);
+    init_name_and_origin(split_dim, split->name() + "/Dim", origin);
+
+    split->num_split(int32_t(input_depth));
+
+    split->split_dim(split_dim);
+    split->input(input);
+  }
+
+  /**
+   * Note: we need define idx from input_tensor of maximum element in MaxPool's sliding window.
+   * For this we split input tensor by channels, define idx in sliding window and convert this idx
+   * to idx from source input_tensor using FloorDiv, Mul and Add operations with constant tensors.
+   */
+  std::vector<luci::CircleNode *> branch_outputs(input_depth);
+
+  for (uint32_t br_n = 0; br_n < input_depth; ++br_n)
+  {
+    auto const branch_name = name + "/depth_" + std::to_string(br_n);
+
+    // Create CircleSplitOut
+    auto split_out = graph->nodes()->create<luci::CircleSplitOut>();
+    init_name_and_origin(split_out, branch_name + "/SplitOut", origin);
+    split_out->index(int32_t(br_n));
+    split_out->input(split);
+
+    // Define idx of max element in Window:
+    auto window_coords =
+      window_flattened_coord(branch_name + "/WindowFlat", padding, stride, filter, input_height,
+                             input_width, output_height, output_width, split_out);
+
+    auto const window_y = window_y_coord(branch_name + "/WindowY", filter, window_coords);
+    auto const window_x =
+      window_x_coord(branch_name + "/WindowX", filter.w(), window_coords, window_y);
+
+    // Define idx of max element in Plane
+    // This tensor contains coords of left top corners for each window from input tensor
+    auto corners = create_coords_addition(graph, padding, stride, filter, input_height, input_width,
+                                          input_depth, output_height, output_width);
+    init_name_and_origin(corners, branch_name + "/Const", origin);
+
+    auto plane_coord =
+      plane_flattened_coord(branch_name + "/PlaneFlat", input_width, window_y, window_x, corners);
+
+    // Define volume coords as final value
+    branch_outputs[br_n] =
+      volume_flattened_coords(branch_name + "/VolumeFlat", br_n, input_depth, plane_coord);
+  }
+
+  // Create Concatenation
+  auto concat = none_act_func(graph->nodes()->create<luci::CircleConcatenation>(input_depth));
+  {
+    init_name_and_origin(concat, name + "/Concatenation", origin);
+    concat->axis(depth_dimension);
+
+    for (uint32_t i = 0; i < input_depth; ++i)
+    {
+      concat->values(i, branch_outputs[i]);
+    }
+  }
+
+  // Output of argmax_with_maxpool should be S64 or S32
+  loco::DataType output_dtype = get_custom_output(cop, 1)->dtype();
+  auto output_cast = create_cast(concat, loco::DataType::FLOAT32, output_dtype);
+  init_name_and_origin(output_cast, name + "/Cast", origin);
+
+  return output_cast;
+}
+
+bool resolve_max_pool_with_argmax(luci::CircleCustom *cop)
+{
+#define CHECK_OR_FALSE(condition) \
+  if (not(condition))             \
+    return false;
+
+  const std::vector<uint8_t> custom_options = cop->custom_options();
+  auto map = flexbuffers::GetRoot(custom_options).AsMap();
+
+  // Define params
+  // Note: Only `Targmax` equal to DT_INT64 is supported by tflite converter
+  // Note: Only `data_format` equal to "NHWC" is supported by tflite converter
+  // TODO add support of `include_batch_in_index` param
+  auto ksize_param = to_vector<uint32_t>(map["ksize"].AsTypedVector());
+  auto strides_param = to_vector<uint32_t>(map["strides"].AsTypedVector());
+  auto padding_param = map["padding"].As<std::string>();
+
+  // Batch size and depth of ksize more than 1 is not supported.
+  CHECK_OR_FALSE(ksize_param.size() == 4);
+  CHECK_OR_FALSE(ksize_param[0] == 1 && ksize_param[3] == 1);
+
+  CHECK_OR_FALSE(strides_param.size() == 4);
+  CHECK_OR_FALSE(strides_param[0] == 1 && strides_param[3] == 1);
+
+  // define Padding
+  auto padding = string_to_padding(padding_param);
+
+  // define Filter
+  luci::Filter filter;
+  filter.h(ksize_param[1]);
+  filter.w(ksize_param[2]);
+
+  // define Stride
+  luci::Stride stride;
+  stride.h(strides_param[1]);
+  stride.w(strides_param[2]);
+
+  // input node
+  auto const input = loco::must_cast<luci::CircleNode *>(cop->inputs(0));
+  CHECK_OR_FALSE(input->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(input->rank() == 4);
+
+  // TODO support batch size > 1 and `include_batch_in_index` option
+  CHECK_OR_FALSE(input->dim(0).value() == 1);
+
+  // output nodes
+  auto const outputs = loco::succs(cop);
+  CHECK_OR_FALSE(outputs.size() == 2);
+  assert(outputs.size() == cop->numOutputs());
+
+  auto output0 = get_custom_output(cop, 0);
+  auto output1 = get_custom_output(cop, 1);
+
+  // From TF documentation: output of maxpool must has same type as input
+  assert(output0->dtype() == input->dtype());
+  assert(output1->dtype() == loco::DataType::S64 || output1->dtype() == loco::DataType::S32);
+
+  // Create MaxPool
+  auto maxpool = max_pool_branch(padding, stride, filter, cop);
+  auto argmax = argmax_branch(padding, stride, filter, cop);
+
+  // last argmax branch op is cast, it should have dtype initialized
+  assert(argmax->dtype() == output1->dtype());
+
+  // replace old node with new subgraph
+  cop->inputs(0, nullptr);
+  loco::replace(output0).with(maxpool);
+  loco::replace(output1).with(argmax);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *                 |
+ *            [CircleNode]
+ *                 |
+ *     [CUSTOM(MaxPoolWithArgmax)]
+ *         |              |
+ *  [MaxPool output]  [Argmax output]
+ *
+ * AFTER
+ *                         |
+ *                    [CircleNode]
+ *                    /          \
+ *       [Split over channels]  [MaxPool2D]
+ *         /       |      \              \
+ *   [Requantize] ...     ...      [MaxPool output]
+ *         |
+ *      [PadV2]
+ *         |
+ *      [Conv2D]
+ *         |
+ *      [ArgMax]
+ *         |
+ *    [Reshape to 4d]
+ *         |
+ *  [Cast to float32]
+ *    /        |
+ *   |  [Mul 1/<window width>]
+ *   |                \
+ *   |              [Floor]
+ *   |                 |
+ *   |    [DepthwiseConv2D for requantize]
+ *   |              /     \
+ *   | [Mul window width] |
+ *   \       /           /
+ *    \   [Neg] [Mul input width]
+ *     \   /    /
+ *     [Add]   /
+ *         \  /
+ *        [Add]
+ *          |
+ *     [Add const]
+ *           |
+ * [Mul number of channels]
+ *             \
+ * [Optional Add with channels id]   ...  ...
+ *                            \      |     /
+ *                           [Concatenation]
+ *                                 |
+ *                           [Cast to int]
+ *                                 |
+ *                          [Argmax output]
+ */
+bool ResolveCustomOpMaxPoolWithArgmaxPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto cop = dynamic_cast<luci::CircleCustom *>(node);
+    if (not cop)
+      continue;
+
+    if (cop->custom_code() != "MaxPoolWithArgmax")
+      continue;
+
+    if (!resolve_max_pool_with_argmax(cop))
+      continue;
+
+    changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.test.cpp
new file mode 100644
index 000000000..cad6ca214
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(ResolveCustomOpMaxPoolWithArgmaxPassTest, name)
+{
+  luci::ResolveCustomOpMaxPoolWithArgmaxPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp
new file mode 100644
index 000000000..5a09e3930
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+#include <limits> // std::numeric_limits
+
+namespace
+{
+
+// Input node is const S64
+// Return s32 version of node
+// Return nullptr if s64 value is out of range of s32
+luci::CircleConst *s64_to_s32(luci::CircleConst *node)
+{
+  assert(node);
+  assert(node->dtype() == loco::DataType::S64);
+
+  auto cloned = luci::clone(node);
+  luci::add_origin(cloned, luci::get_origin(node));
+
+  const auto num_elems = node->size<loco::DataType::S64>();
+
+  cloned->dtype(loco::DataType::S32);
+  cloned->size<loco::DataType::S32>(num_elems);
+
+  for (uint32_t i = 0; i < num_elems; i++)
+  {
+    int64_t val = node->at<loco::DataType::S64>(i);
+    if (val < std::numeric_limits<int32_t>::min() or val > std::numeric_limits<int32_t>::max())
+      return nullptr;
+
+    cloned->at<loco::DataType::S32>(i) = static_cast<int32_t>(val);
+  }
+
+  return cloned;
+}
+
+/** BEFORE
+ *
+ *        [CircleNode]
+ *              \
+ *               \   [size_splits]  [split_dim]
+ *                \       |             /
+ *               [CircleCustom(SplitV))]
+ *                        |
+ *                 [CircleCustomOut]
+ *                        |
+ *                   [CircleNode]
+ *
+ *  AFTER
+ *
+ *                [CircleNode]
+ *                  |   \
+ *                  |     \   [size_splits]  [split_dim]
+ *                  |      \       |         /
+ *                  |       \      |       /
+ *                  |        \     |      /
+ *    [CircleCustom(SplitV)]  [CircleSplitV]
+ *                  |              |
+ *      [CircleCustomOut]    [CircleSplitVOut]
+ *                                 |
+ *                            [CircleNode]
+ */
+bool resolve_splitv(luci::CircleCustom *node)
+{
+  const std::string custom_code = node->custom_code();
+  const std::vector<uint8_t> custom_options = node->custom_options();
+
+  if (custom_code != "SplitV")
+    return false;
+
+  if (node->numInputs() != 3)
+    return false;
+
+  auto size_splits = dynamic_cast<luci::CircleConst *>(node->inputs(1));
+  if (not size_splits)
+    return false;
+
+  // Convert size_splits to S32, because luci-interpeter does not support
+  // S64 size_splits yet
+  // TODO Support S64 size_splits
+  if (size_splits->dtype() == loco::DataType::S64)
+  {
+    size_splits = s64_to_s32(size_splits);
+    if (not size_splits)
+      return false;
+  }
+  if (size_splits->dtype() != loco::DataType::S32)
+    return false;
+
+  auto split_dim = dynamic_cast<luci::CircleConst *>(node->inputs(2));
+  if (not split_dim)
+    return false;
+
+  if (split_dim->dtype() == loco::DataType::S64)
+  {
+    split_dim = s64_to_s32(split_dim);
+    if (not split_dim)
+      return false;
+  }
+  if (split_dim->dtype() != loco::DataType::S32)
+    return false;
+
+  if (size_splits->rank() != 1)
+    return false;
+
+  const auto num_split = size_splits->dim(0).value();
+
+  auto split_v = node->graph()->nodes()->create<luci::CircleSplitV>();
+  split_v->input(node->inputs(0));
+  split_v->size_splits(size_splits);
+  split_v->split_dim(split_dim);
+  split_v->num_split(num_split);
+  split_v->name(node->name());
+  luci::add_origin(split_v, luci::get_origin(node));
+
+  int32_t i = 0;
+  const auto succs = loco::succs(node);
+  for (auto succ : succs)
+  {
+    auto custom_out = loco::must_cast<luci::CircleCustomOut *>(succ); // FIX_CALLER_UNLESS
+
+    auto split_v_out = node->graph()->nodes()->create<luci::CircleSplitVOut>();
+    split_v_out->input(split_v);
+    split_v_out->name(node->name() + "_out_" + std::to_string(i));
+    split_v_out->index(i++);
+    luci::add_origin(split_v_out, luci::get_origin(node));
+    loco::replace(custom_out).with(split_v_out);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ResolveCustomOpSplitVPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto cop = dynamic_cast<luci::CircleCustom *>(node);
+    if (not cop)
+      continue;
+
+    if (resolve_splitv(cop))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp
new file mode 100644
index 000000000..e7738aadb
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <gtest/gtest.h>
+
+using namespace luci::test;
+
+namespace
+{
+
+/**
+ *  graph having Custom operator SplitV
+ *
+ *        [Input]  [Const] [Const]
+ *             \    |    /
+ *           [Custom(SplitV)]
+ *             /    |       \
+ *  [CustomOut] [CustomOut] [CustomOut]
+ *       |          |           |
+ *   [Output]   [Output]     [Output]
+ */
+class SplitVGraphlet
+{
+public:
+  SplitVGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    // CircleCustom(SplitV)
+    _splitv = g->nodes()->create<luci::CircleCustom>(3, 3);
+    _splitv->custom_code("SplitV");
+    _splitv->shape({1, 2, 2, 192});
+    _splitv->dtype(loco::DataType::FLOAT32);
+    _splitv->name("splitv");
+
+    // CircleConst
+    auto size_splits = g->nodes()->create<luci::CircleConst>();
+    size_splits->dtype(loco::DataType::S64);
+    size_splits->shape({3});
+    size_splits->size<loco::DataType::S64>(3);
+    size_splits->at<loco::DataType::S64>(0) = 32;
+    size_splits->at<loco::DataType::S64>(1) = 32;
+    size_splits->at<loco::DataType::S64>(2) = 128;
+
+    // CircleConst
+    auto split_dim = g->nodes()->create<luci::CircleConst>();
+    split_dim->dtype(loco::DataType::S32);
+    split_dim->rank(0);
+    split_dim->size<loco::DataType::S32>(1);
+    split_dim->scalar<loco::DataType::S32>() = 3;
+
+    _splitv->inputs(1, size_splits);
+    _splitv->inputs(2, split_dim);
+
+    // CircleCustomOut
+    _splitv_out1 = g->nodes()->create<luci::CircleCustomOut>();
+    _splitv_out1->shape({1, 2, 2, 32});
+    _splitv_out1->dtype(loco::DataType::FLOAT32);
+    _splitv_out1->index(0);
+    _splitv_out1->input(_splitv);
+
+    // CircleCustomOut
+    _splitv_out2 = g->nodes()->create<luci::CircleCustomOut>();
+    _splitv_out2->shape({1, 2, 2, 32});
+    _splitv_out2->dtype(loco::DataType::FLOAT32);
+    _splitv_out2->index(1);
+    _splitv_out2->input(_splitv);
+
+    // CircleCustomOut
+    _splitv_out3 = g->nodes()->create<luci::CircleCustomOut>();
+    _splitv_out3->shape({1, 2, 2, 128});
+    _splitv_out3->dtype(loco::DataType::FLOAT32);
+    _splitv_out3->index(2);
+    _splitv_out3->input(_splitv);
+  }
+
+public:
+  luci::CircleCustom *splitv() { return _splitv; }
+
+protected:
+  luci::CircleCustom *_splitv = nullptr;
+  luci::CircleCustomOut *_splitv_out1 = nullptr;
+  luci::CircleCustomOut *_splitv_out2 = nullptr;
+  luci::CircleCustomOut *_splitv_out3 = nullptr;
+};
+
+class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet
+{
+public:
+  SplitVGraph() = default;
+
+  void init(void)
+  {
+    TestIGraphlet::init(g(), {1, 2, 2, 192});
+    TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}});
+    SplitVGraphlet::init(g());
+
+    // connect graph
+    _splitv->inputs(0, input());
+
+    output(0)->from(_splitv_out1);
+    output(1)->from(_splitv_out2);
+    output(2)->from(_splitv_out3);
+  }
+};
+
+class SplitVGraphTest : public ::testing::Test
+{
+public:
+  SplitVGraph g;
+  luci::ResolveCustomOpSplitVPass pass;
+};
+
+} // namespace
+
+TEST_F(SplitVGraphTest, simple_test)
+{
+  g.init();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto svo_1 = dynamic_cast<luci::CircleSplitVOut *>(g.output(0)->from());
+  EXPECT_NE(nullptr, svo_1);
+  auto svo_2 = dynamic_cast<luci::CircleSplitVOut *>(g.output(1)->from());
+  EXPECT_NE(nullptr, svo_2);
+  auto svo_3 = dynamic_cast<luci::CircleSplitVOut *>(g.output(2)->from());
+  EXPECT_NE(nullptr, svo_3);
+
+  auto sv = dynamic_cast<luci::CircleSplitV *>(svo_1->input());
+  EXPECT_NE(nullptr, sv);
+  sv = dynamic_cast<luci::CircleSplitV *>(svo_2->input());
+  EXPECT_NE(nullptr, sv);
+  sv = dynamic_cast<luci::CircleSplitV *>(svo_3->input());
+  EXPECT_NE(nullptr, sv);
+
+  auto size_splits = loco::must_cast<luci::CircleConst *>(sv->size_splits());
+  EXPECT_EQ(loco::DataType::S32, size_splits->dtype());
+  EXPECT_EQ(32, size_splits->at<loco::DataType::S32>(0));
+  EXPECT_EQ(32, size_splits->at<loco::DataType::S32>(1));
+  EXPECT_EQ(128, size_splits->at<loco::DataType::S32>(2));
+
+  auto split_dim = loco::must_cast<luci::CircleConst *>(sv->split_dim());
+  EXPECT_EQ(loco::DataType::S32, split_dim->dtype());
+  EXPECT_EQ(3, split_dim->scalar<loco::DataType::S32>());
+}
+
+TEST_F(SplitVGraphTest, wrong_op_NEG)
+{
+  g.init();
+
+  g.splitv()->custom_code("AddV2");
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/ShapeInferencePass.cpp b/compiler/luci/pass/src/ShapeInferencePass.cpp
deleted file mode 100644
index f681b3d5f..000000000
--- a/compiler/luci/pass/src/ShapeInferencePass.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/ShapeInferencePass.h"
-
-#include <luci/IR/CircleDialect.h>
-#include <luci/Service/CircleShapeInferenceRule.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/CanonicalShapeInferenceRule.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/MultiDialectShapeInferenceRule.h>
-
-namespace luci
-{
-
-bool ShapeInferencePass::run(loco::Graph *g)
-{
-  loco::CanonicalShapeInferenceRule canonical_rule;
-  luci::CircleShapeInferenceRule circle_rule;
-
-  loco::MultiDialectShapeInferenceRule rules;
-
-  rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(luci::CircleDialect::get(), &circle_rule);
-
-  return loco::apply(&rules).to(g);
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp
new file mode 100644
index 000000000..92060f625
--- /dev/null
+++ b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+bool satisfy_precondition(luci::CircleFullyConnected *fc)
+{
+  // check if it's already been shuffled
+  if (fc->weights_format() != luci::CircleFullyConnected::WeightsFormat::DEFAULT)
+    return false;
+
+  // check if its data type is FLOAT32
+  if (fc->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(fc->weights());
+  // rank must be 2
+  if (weights->rank() != 2)
+    return false;
+
+  // check if it has sparsity parameter
+  if (weights->sparsityparam())
+    return false;
+
+  // check if the number of row of FullyConnected's weight is a multiple of 16
+  const uint32_t MULTIPLE = 16;
+  uint32_t rows = weights->dim(0).value();
+  if (rows % MULTIPLE)
+    return false;
+
+  return true;
+}
+
+// get FullyConnected op vector that has same tensor
+void get_FCs_having_same_tensor(std::vector<luci::CircleFullyConnected *> &fc_vec, loco::Graph *g,
+                                luci::CircleFullyConnected *fc)
+{
+  auto the_tensor = fc->weights();
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+    if (not fc)
+      continue;
+
+    if (fc->weights() == the_tensor)
+      fc_vec.push_back(fc);
+  }
+}
+
+luci::CircleConst *shuffle_weight(luci::CircleFullyConnected *fc)
+{
+  auto the_weights = loco::must_cast<luci::CircleConst *>(fc->weights());
+
+  auto name = fc->name();
+  assert(name.length() > 0);
+
+  // create CircleConst where shuffled data will be stored
+  luci::CircleConst *new_weights = fc->graph()->nodes()->create<luci::CircleConst>();
+  new_weights->dtype(loco::DataType::FLOAT32);
+  new_weights->size<loco::DataType::FLOAT32>(the_weights->size<loco::DataType::FLOAT32>());
+  new_weights->rank(the_weights->rank());
+  new_weights->shape_status(the_weights->shape_status());
+  for (uint32_t r = 0; r < new_weights->rank(); r++)
+  {
+    new_weights->dim(r).set(the_weights->dim(r).value());
+  }
+  new_weights->name(name + "/shuffle_weight");
+
+  // suffle weight
+  const uint32_t MULTIPLE = 16;
+  const uint32_t rows = the_weights->dim(0).value();
+  const uint32_t cols = the_weights->dim(1).value();
+  const uint32_t r_step = rows / MULTIPLE;
+  uint32_t index = 0;
+  for (uint32_t r = 0; r < r_step; r++)
+  {
+    for (uint32_t c = 0; c < cols; c++)
+    {
+      for (uint32_t i = 0; i < MULTIPLE; i++)
+      {
+        new_weights->at<loco::DataType::FLOAT32>(index++) =
+          the_weights->at<loco::DataType::FLOAT32>((r * MULTIPLE + i) * cols + c);
+      }
+    }
+  }
+
+  return new_weights;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ShuffleWeightTo16x1Float32Pass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+    if (not fc)
+      continue;
+
+    if (not satisfy_precondition(fc))
+      continue;
+
+    std::vector<luci::CircleFullyConnected *> fc_vec;
+    get_FCs_having_same_tensor(fc_vec, g, fc);
+    auto new_weights = shuffle_weight(fc);
+
+    // replace to new weights
+    for (const auto fc : fc_vec)
+    {
+      fc->weights(new_weights);
+      fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32);
+    }
+
+    changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp
new file mode 100644
index 000000000..077985977
--- /dev/null
+++ b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+#include "test/TestFirstNode.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class FCGraphlet
+{
+public:
+  FCGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 wshape)
+  {
+    const uint32_t elements_num = num_elements(wshape);
+
+    // fc weights
+    _weights = g->nodes()->create<luci::CircleConst>();
+    _weights->dtype(loco::DataType::FLOAT32);
+    _weights->shape(wshape);
+    _weights->size<loco::DataType::FLOAT32>(elements_num);
+    for (uint32_t idx = 0; idx < elements_num; idx++)
+    {
+      _weights->at<loco::DataType::FLOAT32>(idx) = idx;
+    }
+    _weights->name("weights");
+
+    // fc
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->name("fc");
+  }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleConst *_weights = nullptr;
+};
+
+class FCGraph : public TestIGraphlet, public TestOGraphlet, public FCGraphlet
+{
+public:
+  FCGraph() = default;
+
+  void init(const ShapeU32 shape, const ShapeU32 wshape)
+  {
+    TestIGraphlet::init(g(), shape);
+    TestOGraphlet::init(g(), shape);
+    FCGraphlet::init(g(), wshape);
+
+    // connect graph
+    _fc->input(input());
+    _fc->weights(_weights);
+
+    output()->from(_fc);
+  }
+};
+
+} // namespace
+
+TEST(ShuffleWeightTo16x1Float32PassTest, name)
+{
+  luci::ShuffleWeightTo16x1Float32Pass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+const uint32_t ROW = 16;
+const uint32_t COL = 2;
+
+TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1)
+{
+  FCGraph g;
+
+  g.init({ROW, COL}, {ROW, COL});
+
+  auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
+  ASSERT_NE(fc_node, nullptr);
+  auto weights = loco::must_cast<luci::CircleConst *>(fc_node->weights());
+  // before
+  ASSERT_EQ(0, weights->at<loco::DataType::FLOAT32>(0));
+  ASSERT_EQ(1, weights->at<loco::DataType::FLOAT32>(1));
+  ASSERT_EQ(2, weights->at<loco::DataType::FLOAT32>(2));
+  ASSERT_EQ(3, weights->at<loco::DataType::FLOAT32>(3));
+  ASSERT_EQ(4, weights->at<loco::DataType::FLOAT32>(4));
+  ASSERT_EQ(5, weights->at<loco::DataType::FLOAT32>(5));
+  ASSERT_EQ(6, weights->at<loco::DataType::FLOAT32>(6));
+  ASSERT_EQ(7, weights->at<loco::DataType::FLOAT32>(7));
+  ASSERT_EQ(8, weights->at<loco::DataType::FLOAT32>(8));
+  ASSERT_EQ(9, weights->at<loco::DataType::FLOAT32>(9));
+  ASSERT_EQ(10, weights->at<loco::DataType::FLOAT32>(10));
+  ASSERT_EQ(11, weights->at<loco::DataType::FLOAT32>(11));
+  ASSERT_EQ(12, weights->at<loco::DataType::FLOAT32>(12));
+  ASSERT_EQ(13, weights->at<loco::DataType::FLOAT32>(13));
+  ASSERT_EQ(14, weights->at<loco::DataType::FLOAT32>(14));
+  ASSERT_EQ(15, weights->at<loco::DataType::FLOAT32>(15));
+
+  luci::ShuffleWeightTo16x1Float32Pass pass;
+  while (pass.run(g.g()))
+    ;
+
+  weights = loco::must_cast<luci::CircleConst *>(fc_node->weights());
+  // after
+  ASSERT_EQ(0, weights->at<loco::DataType::FLOAT32>(0));
+  ASSERT_EQ(2, weights->at<loco::DataType::FLOAT32>(1));
+  ASSERT_EQ(4, weights->at<loco::DataType::FLOAT32>(2));
+  ASSERT_EQ(6, weights->at<loco::DataType::FLOAT32>(3));
+  ASSERT_EQ(8, weights->at<loco::DataType::FLOAT32>(4));
+  ASSERT_EQ(10, weights->at<loco::DataType::FLOAT32>(5));
+  ASSERT_EQ(12, weights->at<loco::DataType::FLOAT32>(6));
+  ASSERT_EQ(14, weights->at<loco::DataType::FLOAT32>(7));
+  ASSERT_EQ(16, weights->at<loco::DataType::FLOAT32>(8));
+  ASSERT_EQ(18, weights->at<loco::DataType::FLOAT32>(9));
+  ASSERT_EQ(20, weights->at<loco::DataType::FLOAT32>(10));
+  ASSERT_EQ(22, weights->at<loco::DataType::FLOAT32>(11));
+  ASSERT_EQ(24, weights->at<loco::DataType::FLOAT32>(12));
+  ASSERT_EQ(26, weights->at<loco::DataType::FLOAT32>(13));
+  ASSERT_EQ(28, weights->at<loco::DataType::FLOAT32>(14));
+  ASSERT_EQ(30, weights->at<loco::DataType::FLOAT32>(15));
+}
+
+TEST(ShuffleWeightTo16x1Float32PassTest, invalid_weight_shape_NEG)
+{
+  FCGraph g;
+
+  g.init({ROW, COL}, {1, ROW, COL, 1});
+
+  auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
+  ASSERT_NE(fc_node, nullptr);
+
+  luci::ShuffleWeightTo16x1Float32Pass pass;
+  auto ret = pass.run(g.g());
+
+  ASSERT_FALSE(ret);
+}
+
+TEST(ShuffleWeightTo16x1Float32PassTest, invalid_weight_row16_NEG)
+{
+  FCGraph g;
+
+  g.init({COL, ROW}, {COL, ROW});
+
+  auto fc_node = luci::test::first_node<luci::CircleFullyConnected>(g.g());
+  ASSERT_NE(fc_node, nullptr);
+
+  luci::ShuffleWeightTo16x1Float32Pass pass;
+  auto ret = pass.run(g.g());
+
+  ASSERT_FALSE(ret);
+}
diff --git a/compiler/luci/pass/src/Sparsifier.cpp b/compiler/luci/pass/src/Sparsifier.cpp
new file mode 100644
index 000000000..18ab45f98
--- /dev/null
+++ b/compiler/luci/pass/src/Sparsifier.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sparsifier.h"
+
+namespace luci
+{
+
+template <typename T>
+Sparsifier<T>::Sparsifier(const std::vector<int32_t> &shape,
+                          const std::vector<int32_t> &traversal_order,
+                          const std::vector<DimensionType> &format,
+                          const std::vector<int32_t> &block_size,
+                          const std::vector<int32_t> &block_map)
+  : _dense_shape(shape), _traversal_order(traversal_order), _block_size(block_size),
+    _block_map(block_map)
+{
+  _dense_size = 1;
+  int32_t block_dim = 0;
+  _blocked_shape.resize(shape.size());
+  _format.resize(shape.size() + block_map.size());
+  for (int32_t i = 0; i < static_cast<int32_t>(shape.size()); i++)
+  {
+    _format.at(i) = format.at(traversal_order.at(i));
+    _dense_size *= shape.at(i);
+    if (block_dim < static_cast<int32_t>(block_map.size()) && block_map[block_dim] == i)
+    {
+      _blocked_shape.at(i) = shape.at(i) / block_size.at(block_dim);
+      block_dim++;
+    }
+    else
+    {
+      _blocked_shape.at(i) = shape.at(i);
+    }
+  }
+
+  // Only dense blocks are supported.
+  for (uint32_t i = 0; i < block_map.size(); i++)
+  {
+    _format[i + shape.size()] = DimensionType::DENSE;
+  }
+}
+
+template <typename T> void Sparsifier<T>::DenseToSparse(const T *src_data)
+{
+  int num_original_dims = _dense_shape.size();
+  int num_block_dims = _block_map.size();
+  int num_expanded_dims = num_original_dims + num_block_dims;
+  std::vector<int> expanded_shape(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; i++)
+  {
+    if (i < num_original_dims)
+    {
+      expanded_shape.at(i) = _blocked_shape.at(i);
+    }
+    else
+    {
+      expanded_shape.at(i) = _block_size.at(i - num_original_dims);
+    }
+  }
+
+  std::vector<int> shape_offset(num_original_dims);
+  shape_offset.at(shape_offset.size() - 1) = 1;
+  for (int i = num_original_dims - 1; i > 0; --i)
+  {
+    shape_offset.at(i - 1) = shape_offset.at(i) * _dense_shape.at(i);
+  }
+
+  std::vector<int> expanded_shape_offset(num_expanded_dims);
+  for (int i = 0; i < num_original_dims; ++i)
+  {
+    expanded_shape_offset.at(i) = shape_offset.at(i);
+  }
+  for (int i = 0; i < num_block_dims; ++i)
+  {
+    int mapped_dim = _block_map.at(i);
+    expanded_shape_offset.at(num_original_dims + i) = shape_offset.at(mapped_dim);
+    expanded_shape_offset.at(mapped_dim) *= _block_size.at(i);
+  }
+
+  std::vector<int> dst_ordered_offset(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; ++i)
+  {
+    dst_ordered_offset.at(i) = expanded_shape_offset.at(_traversal_order.at(i));
+  }
+
+  std::vector<bool> dst_dim_has_nonzeroes(num_expanded_dims);
+  std::fill(dst_dim_has_nonzeroes.begin(), dst_dim_has_nonzeroes.end(), false);
+  std::vector<int> inner_compressed_dim(num_expanded_dims);
+  int most_recent_compressed_dim = -1;
+  std::vector<int> num_segments_of_next_compressed_dim(num_expanded_dims);
+  int segment_count = 1;
+  for (int i = num_expanded_dims - 1; i >= 0; --i)
+  {
+    inner_compressed_dim.at(i) = most_recent_compressed_dim;
+    if (_format.at(i) == DimensionType::SPARSE_CSR)
+    {
+      most_recent_compressed_dim = i;
+      num_segments_of_next_compressed_dim.at(i) = segment_count;
+      segment_count = 1;
+    }
+    else
+    {
+      num_segments_of_next_compressed_dim.at(i) = -1;
+      segment_count *= expanded_shape.at(_traversal_order.at(i));
+    }
+  }
+
+  _dim_metadata.resize(num_expanded_dims * 2);
+  std::vector<int> dst_sparse_dims;
+  dst_sparse_dims.reserve(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; ++i)
+  {
+    _dim_metadata.at(i * 2).clear();
+    _dim_metadata.at(i * 2 + 1).clear();
+    if (_format.at(i) == DimensionType::DENSE)
+    {
+      // If dimension is dense, just store the shape.
+      _dim_metadata.at(i * 2).push_back(expanded_shape.at(_traversal_order.at(i)));
+    }
+    else
+    {
+      _dim_metadata.at(i * 2).push_back(0); // Segment array always begins with 0.
+      dst_sparse_dims.push_back(i);         // Add dimension to the sparse list.
+    }
+  }
+
+  // This algorithm assumes that the block size is small enough for all the
+  // elements to fit in cache, so the strided accesses from different traversal
+  // order and the write-first-erase-later strategy shouldn't be too slow
+  int dst_dim_idx = num_expanded_dims;
+  std::vector<int> coordinate(num_expanded_dims, 0);
+  int dense_tensor_idx = 0;
+  while (dst_dim_idx >= 0)
+  {
+    if (dst_dim_idx == num_expanded_dims)
+    {
+      // We have a complete coordinate. Add the element to the value array if it
+      // is not zero, or if the last dimension is dense.
+      if (!IsZero(src_data[dense_tensor_idx]))
+      {
+        _data.push_back(src_data[dense_tensor_idx]);
+        // Mark all sparse dimensions that their current indices have nonzeroes.
+        for (auto dst_dim : dst_sparse_dims)
+        {
+          if (!dst_dim_has_nonzeroes.at(dst_dim))
+          {
+            // Only add the index to the indices array if the current nonzero
+            // is the first nonzero of the block.
+            _dim_metadata.at(2 * dst_dim + 1).push_back(coordinate.at(dst_dim));
+            dst_dim_has_nonzeroes.at(dst_dim) = true;
+          }
+        }
+      }
+      else if (_format.at(num_expanded_dims - 1) == DimensionType::DENSE)
+      {
+        _data.push_back(src_data[dense_tensor_idx]);
+      }
+      --dst_dim_idx;
+    }
+    else
+    {
+      int original_dim_idx = _traversal_order.at(dst_dim_idx);
+      int dim_size = expanded_shape.at(original_dim_idx);
+      if (dst_dim_has_nonzeroes.at(dst_dim_idx))
+      {
+        // If the previous block has nonzeroes, reset the flag to false since
+        // we have just moved to a new block.
+        dst_dim_has_nonzeroes.at(dst_dim_idx) = false;
+      }
+      else if (_format.at(dst_dim_idx) == DimensionType::SPARSE_CSR)
+      {
+        // This block is empty. Delete unnecessary values if compressed.
+        int next_compressed_dim = inner_compressed_dim.at(dst_dim_idx);
+        int erase_offset = _dim_metadata.at(2 * dst_dim_idx + 1).size() *
+                           num_segments_of_next_compressed_dim.at(dst_dim_idx);
+        if (next_compressed_dim >= 0)
+        {
+          auto &segments = _dim_metadata.at(2 * inner_compressed_dim.at(dst_dim_idx));
+          segments.erase(segments.begin() + 1 + erase_offset, segments.end());
+        }
+        else
+        {
+          _data.erase(_data.begin() + erase_offset, _data.end());
+        }
+      }
+      if (++coordinate.at(dst_dim_idx) < dim_size)
+      {
+        // The current dst_dim_idx is valid (not out of bound).
+        dense_tensor_idx += dst_ordered_offset.at(dst_dim_idx);
+        ++dst_dim_idx;
+      }
+      else
+      {
+        // dst_dim_idx has reached its dim size. Update segment array and go
+        // back to incrementing the previous dimension (dst_dim_idx - 1).
+        if (_format.at(dst_dim_idx) == DimensionType::SPARSE_CSR)
+        {
+          _dim_metadata.at(2 * dst_dim_idx).push_back(_dim_metadata.at(2 * dst_dim_idx + 1).size());
+        }
+        coordinate.at(dst_dim_idx) = -1;
+        dense_tensor_idx -= dst_ordered_offset.at(dst_dim_idx) * dim_size;
+        --dst_dim_idx;
+      }
+    }
+  }
+}
+
+template <typename T> bool Sparsifier<T>::IsZero(const T val) { return (val == 0); }
+
+template class Sparsifier<int32_t>;
+template class Sparsifier<int8_t>;
+template class Sparsifier<float>;
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/Sparsifier.h b/compiler/luci/pass/src/Sparsifier.h
new file mode 100644
index 000000000..71ea28da9
--- /dev/null
+++ b/compiler/luci/pass/src/Sparsifier.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SPARSIFIER_H__
+#define __LUCI_SPARSIFIER_H__
+
+#include <vector>
+
+#include <luci/IR/SparsityParam.h>
+
+namespace luci
+{
+
+template <typename T> class Sparsifier
+{
+public:
+  /*
+   * Creates a dense to sparse converter.
+   * @param shape             Shape of the dense tensor.
+   * @param traversal_order   In what order to traverse all dimensions,
+   *                          including block dimensions.
+   * @param format            Whether each dimension in converted tensor is
+   *                          dense or sparse (not in the traversal order).
+   * @param block_size        Size of each block dimension.
+   * @param block_map         Map from block dimension to original tensor
+   *                          dimension.
+   */
+  Sparsifier(const std::vector<int> &shape, const std::vector<int> &traversal_order,
+             const std::vector<DimensionType> &format, const std::vector<int> &block_size = {},
+             const std::vector<int> &block_map = {});
+
+  std::vector<T> GetData() { return _data; }
+  std::vector<std::vector<int>> GetDimMetadata() { return _dim_metadata; }
+
+  void DenseToSparse(const T *src_data);
+
+private:
+  // Check if val is equal to zero.
+  bool IsZero(const T val);
+
+  // Shape of the conceptual dense tensor.
+  std::vector<int> _dense_shape;
+  // Shape of the dense tensor with inner blocks reduced. For example, a (4, 4)
+  // tensor with (2, 2) block has blocked_shape (2, 2).
+  std::vector<int> _blocked_shape;
+  // Total number of elements in the dense tensor.
+  uint64_t _dense_size;
+  // Has n(original dimension)+k(block_dimension) elements.
+  std::vector<int> _traversal_order;
+  // Format of each dimension in the traversal order.
+  std::vector<DimensionType> _format;
+  // Size of each block dimension, in the same order as block map.
+  std::vector<int> _block_size;
+  // Map from block dimension to the original tensor dimension.
+  std::vector<int> _block_map;
+  // Metadata of each dimension in the traversal order.
+  // Each dimension needs two vectors. For dense dimensions, the first vector
+  // stores the size of that dimension, and the second vector is empty. For
+  // sparse dimensions, the first vector stores the segments and the second one
+  // stores the indices.
+  std::vector<std::vector<int>> _dim_metadata;
+  // Actual buffer holding data after conversion. Could be sparse buffer or
+  // dense buffer.
+  std::vector<T> _data;
+};
+
+extern template class Sparsifier<int32_t>;
+extern template class Sparsifier<int8_t>;
+extern template class Sparsifier<float>;
+
+} // namespace luci
+
+#endif // __LUCI_SPARSIFIER_H__
diff --git a/compiler/luci/pass/src/Sparsifier.test.cpp b/compiler/luci/pass/src/Sparsifier.test.cpp
new file mode 100644
index 000000000..14e24aad7
--- /dev/null
+++ b/compiler/luci/pass/src/Sparsifier.test.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Sparsifier.h"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+TEST(SparsifierTest, NoBlockDenseDense)
+{
+  const std::vector<int32_t> dense_tensor_data = {1, 2, 3, 4, 5, 6};
+  const std::vector<int32_t> dense_shape = {2, 3};
+  const std::vector<int32_t> traversal_order = {0, 1};
+  const std::vector<luci::DimensionType> format = {luci::DimensionType::DENSE,
+                                                   luci::DimensionType::DENSE};
+  luci::Sparsifier<int32_t> sparsifier(dense_shape, traversal_order, format);
+
+  sparsifier.DenseToSparse(dense_tensor_data.data());
+
+  const auto dim_metadata = sparsifier.GetDimMetadata();
+  const std::vector<int32_t> expected_dm0 = {2};
+  const std::vector<int32_t> expected_dm1 = {3};
+
+  EXPECT_EQ(/*dense_shape[0]=*/expected_dm0, dim_metadata[0]);
+  EXPECT_EQ(/*dense_shape[1]=*/expected_dm1, dim_metadata[2]);
+
+  const auto sparsed_data = sparsifier.GetData();
+  const std::vector<int32_t> expected_data = {1, 2, 3, 4, 5, 6};
+  EXPECT_EQ(expected_data, sparsed_data);
+}
+
+TEST(SparsifierTest, NoBlockDenseCSR)
+{
+  const std::vector<int32_t> dense_tensor_data = {0, 1, 2, 3, 0, 5, 0, 0, 0};
+  const std::vector<int32_t> dense_shape = {3, 3};
+  const std::vector<int32_t> traversal_order = {0, 1};
+  const std::vector<luci::DimensionType> format = {luci::DimensionType::DENSE,
+                                                   luci::DimensionType::SPARSE_CSR};
+  luci::Sparsifier<int32_t> sparsifier(dense_shape, traversal_order, format);
+  sparsifier.DenseToSparse(dense_tensor_data.data());
+
+  const auto dim_metadata = sparsifier.GetDimMetadata();
+  const std::vector<int32_t> expected_dm0 = {3};
+  const std::vector<int32_t> expected_dm1 = {};
+  const std::vector<int32_t> expected_dm2 = {0, 2, 4, 4};
+  const std::vector<int32_t> expected_dm3 = {1, 2, 0, 2};
+
+  EXPECT_EQ(expected_dm0, dim_metadata[0]);
+  EXPECT_EQ(expected_dm1, dim_metadata[1]);
+  EXPECT_EQ(expected_dm2, dim_metadata[2]);
+  EXPECT_EQ(expected_dm3, dim_metadata[3]);
+
+  const auto data = sparsifier.GetData();
+  const std::vector<int32_t> expected_data = {1, 2, 3, 5};
+  EXPECT_EQ(expected_data, data);
+}
+
+TEST(SparsifierTest, BlockDenseDense)
+{
+  const std::vector<float> dense_tensor_data = {1.1, 2.2,  3.3,  4.4,  5.5,  6.6,  7.7,  8.8,
+                                                9.9, 10.0, 11.1, 12.2, 13.3, 14.4, 15.5, 16.6};
+  const std::vector<int32_t> dense_shape = {4, 4};
+  const std::vector<int32_t> traversal_order = {0, 1, 2, 3};
+  const std::vector<luci::DimensionType> format = {luci::DimensionType::DENSE,
+                                                   luci::DimensionType::DENSE};
+  const std::vector<int32_t> block_size = {2, 2};
+  const std::vector<int32_t> block_map = {0, 1};
+  luci::Sparsifier<float> sparsifier(dense_shape, traversal_order, format, block_size, block_map);
+  sparsifier.DenseToSparse(dense_tensor_data.data());
+
+  const auto dim_metadata = sparsifier.GetDimMetadata();
+  const std::vector<int32_t> expected_dm0 = {2};
+  const std::vector<int32_t> expected_dm1 = {};
+  EXPECT_EQ(expected_dm0, dim_metadata[0]);
+  EXPECT_EQ(expected_dm1, dim_metadata[1]);
+  EXPECT_EQ(expected_dm0, dim_metadata[2]);
+  EXPECT_EQ(expected_dm1, dim_metadata[3]);
+  EXPECT_EQ(expected_dm0, dim_metadata[4]);
+  EXPECT_EQ(expected_dm1, dim_metadata[5]);
+  EXPECT_EQ(expected_dm0, dim_metadata[6]);
+  EXPECT_EQ(expected_dm1, dim_metadata[7]);
+
+  const auto data = sparsifier.GetData();
+  const std::vector<float> expected_data = {1.1, 2.2,  5.5,  6.6,  3.3,  4.4,  7.7,  8.8,
+                                            9.9, 10.0, 13.3, 14.4, 11.1, 12.2, 15.5, 16.6};
+  EXPECT_EQ(expected_data, data);
+}
+
+TEST(SparsifierTest, BlockDenseSparse)
+{
+  const std::vector<int32_t> dense_tensor_data = {1, 2, 0, 0, 3, 4, 0, 0, 5, 6, 0, 0, 7, 8, 0, 0};
+  const std::vector<int32_t> dense_shape = {4, 4};
+  const std::vector<int32_t> traversal_order = {0, 1, 2, 3};
+  const std::vector<luci::DimensionType> format = {luci::DimensionType::DENSE,
+                                                   luci::DimensionType::SPARSE_CSR};
+  const std::vector<int32_t> block_size = {2, 2};
+  const std::vector<int32_t> block_map = {0, 1};
+  luci::Sparsifier<int32_t> sparsifier(dense_shape, traversal_order, format, block_size, block_map);
+  sparsifier.DenseToSparse(dense_tensor_data.data());
+
+  const auto dim_metadata = sparsifier.GetDimMetadata();
+  const std::vector<int32_t> expected_dm0 = {2};
+  const std::vector<int32_t> expected_dm1 = {};
+  const std::vector<int32_t> expected_dm2 = {0, 1, 2};
+  const std::vector<int32_t> expected_dm3 = {0, 0};
+  EXPECT_EQ(expected_dm0, dim_metadata[0]);
+  EXPECT_EQ(expected_dm1, dim_metadata[1]);
+  EXPECT_EQ(expected_dm2, dim_metadata[2]);
+  EXPECT_EQ(expected_dm3, dim_metadata[3]);
+  EXPECT_EQ(expected_dm0, dim_metadata[4]);
+  EXPECT_EQ(expected_dm1, dim_metadata[5]);
+  EXPECT_EQ(expected_dm0, dim_metadata[6]);
+  EXPECT_EQ(expected_dm1, dim_metadata[7]);
+
+  const auto data = sparsifier.GetData();
+  const std::vector<int32_t> expected_data = {1, 2, 3, 4, 5, 6, 7, 8};
+  EXPECT_EQ(expected_data, data);
+}
+
+TEST(SparsifierTest, BlockDenseSparse_2)
+{
+  const std::vector<int32_t> dense_tensor_data = {0,  4, 8,  1,  5, 9,  2,  6, 10, 3,  7, 11,
+                                                  12, 0, 20, 13, 0, 21, 14, 0, 22, 15, 0, 23};
+  const std::vector<int32_t> dense_shape = {8, 3};
+  const std::vector<int32_t> traversal_order = {0, 1, 2, 3};
+  const std::vector<luci::DimensionType> format = {luci::DimensionType::DENSE,
+                                                   luci::DimensionType::SPARSE_CSR};
+  const std::vector<int32_t> block_size = {4, 1};
+  const std::vector<int32_t> block_map = {0, 1};
+  luci::Sparsifier<int32_t> sparsifier(dense_shape, traversal_order, format, block_size, block_map);
+  sparsifier.DenseToSparse(dense_tensor_data.data());
+
+  const auto dim_metadata = sparsifier.GetDimMetadata();
+  const std::vector<int32_t> expected_dm0 = {2};
+  const std::vector<int32_t> expected_dm1 = {};
+  const std::vector<int32_t> expected_dm2 = {0, 3, 5};
+  const std::vector<int32_t> expected_dm3 = {0, 1, 2, 0, 2};
+  const std::vector<int32_t> expected_dm4 = {4};
+  const std::vector<int32_t> expected_dm6 = {1};
+  EXPECT_EQ(expected_dm0, dim_metadata[0]);
+  EXPECT_EQ(expected_dm1, dim_metadata[1]);
+  EXPECT_EQ(expected_dm2, dim_metadata[2]);
+  EXPECT_EQ(expected_dm3, dim_metadata[3]);
+  EXPECT_EQ(expected_dm4, dim_metadata[4]);
+  EXPECT_EQ(expected_dm1, dim_metadata[5]);
+  EXPECT_EQ(expected_dm6, dim_metadata[6]);
+  EXPECT_EQ(expected_dm1, dim_metadata[7]);
+
+  const auto data = sparsifier.GetData();
+  const std::vector<int32_t> expected_data = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+                                              10, 11, 12, 13, 14, 15, 20, 21, 22, 23};
+  EXPECT_EQ(expected_data, data);
+}
+
+TEST(SparsifierTest, WrongTraversalOrderRank_NEG)
+{
+  const std::vector<int32_t> dense_tensor_data = {0,  4, 8,  1,  5, 9,  2,  6, 10, 3,  7, 11,
+                                                  12, 0, 20, 13, 0, 21, 14, 0, 22, 15, 0, 23};
+  const std::vector<int32_t> dense_shape = {8, 3};
+  const std::vector<int32_t> traversal_order = {0, 1};
+  const std::vector<luci::DimensionType> format = {luci::DimensionType::DENSE,
+                                                   luci::DimensionType::SPARSE_CSR};
+  const std::vector<int32_t> block_size = {4, 1};
+  const std::vector<int32_t> block_map = {0, 1};
+  luci::Sparsifier<int32_t> sparsifier(dense_shape, traversal_order, format, block_size, block_map);
+  EXPECT_THROW(sparsifier.DenseToSparse(dense_tensor_data.data()), std::out_of_range);
+}
+
+TEST(SparsifierTest, WrongFormatRank_NEG)
+{
+  const std::vector<int32_t> dense_tensor_data = {0,  4, 8,  1,  5, 9,  2,  6, 10, 3,  7, 11,
+                                                  12, 0, 20, 13, 0, 21, 14, 0, 22, 15, 0, 23};
+  const std::vector<int32_t> dense_shape = {8, 3};
+  const std::vector<int32_t> traversal_order = {0, 1, 2, 3};
+  const std::vector<luci::DimensionType> format = {luci::DimensionType::SPARSE_CSR};
+  const std::vector<int32_t> block_size = {4, 1};
+  const std::vector<int32_t> block_map = {0, 1};
+  EXPECT_THROW(
+    luci::Sparsifier<int32_t>(dense_shape, traversal_order, format, block_size, block_map),
+    std::out_of_range);
+}
diff --git a/compiler/luci/pass/src/SparsifyTensorPass.cpp b/compiler/luci/pass/src/SparsifyTensorPass.cpp
new file mode 100644
index 000000000..1a75bfb0c
--- /dev/null
+++ b/compiler/luci/pass/src/SparsifyTensorPass.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SparsifyTensorPass.h"
+
+#include "Sparsifier.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+template <loco::DataType DT> void SparsifyTensorPass::sparsify_tensor(luci::CircleConst *cop)
+{
+  using PRIMITIVE_DTYPE = typename loco::DataTypeImpl<DT>::Type;
+
+  std::vector<int32_t> dense_tensor_shape(cop->rank());
+  for (uint32_t d = 0; d < cop->rank(); d++)
+  {
+    dense_tensor_shape.at(d) = cop->dim(d).value();
+  }
+
+  Sparsifier<PRIMITIVE_DTYPE> sparsifier(dense_tensor_shape, _traversal_order, _format, _block_size,
+                                         _block_map);
+  // get dense tensor data
+  uint32_t dense_tensor_data_size = cop->size<DT>();
+  std::vector<PRIMITIVE_DTYPE> dense_tensor_data(dense_tensor_data_size);
+  for (uint32_t i = 0; i < dense_tensor_data_size; i++)
+  {
+    dense_tensor_data.at(i) = cop->at<DT>(i);
+  }
+  // sparsify
+  sparsifier.DenseToSparse(dense_tensor_data.data());
+  // get sparse tensor data
+  std::vector<PRIMITIVE_DTYPE> sparse_tensor_data = sparsifier.GetData();
+  uint32_t sparse_tensor_data_size = sparse_tensor_data.size();
+  cop->size<DT>(sparse_tensor_data_size);
+  for (uint32_t i = 0; i < sparse_tensor_data_size; i++)
+  {
+    cop->at<DT>(i) = sparse_tensor_data.at(i);
+  }
+  // make sparsity parameter
+  auto sparsityparam = std::make_unique<SparsityParam>();
+  sparsityparam->traversal_order = _traversal_order;
+  sparsityparam->block_map = _block_map;
+  // get dimension meta data
+  const auto dim_metadata = sparsifier.GetDimMetadata();
+  for (uint32_t idx = 0; idx < _format.size(); idx++)
+  {
+    if (_format.at(idx) == DimensionType::DENSE)
+    {
+      sparsityparam->dim_metadata.emplace_back(DimensionType::DENSE,
+                                               dim_metadata.at(idx * 2).at(0));
+    }
+    // TODO Set SparseIndexVectorType according to its data range
+    else if (_format.at(idx) == DimensionType::SPARSE_CSR)
+    {
+      sparsityparam->dim_metadata.emplace_back(
+        DimensionType::SPARSE_CSR, /* dense size */ 0,
+        /* array_segments */
+        SparseIndexVector{SparseIndexVectorType::U16, dim_metadata.at(idx * 2)},
+        /* array_indices */
+        SparseIndexVector{SparseIndexVectorType::U16, dim_metadata.at(idx * 2 + 1)});
+    }
+  }
+  for (uint32_t i = 0; i < _block_size.size(); i++)
+  {
+    assert(_block_size.at(i) == dim_metadata.at((_format.size() + i) * 2).at(0));
+    sparsityparam->dim_metadata.emplace_back(DimensionType::DENSE, _block_size.at(i));
+  }
+  cop->sparsityparam(std::move(sparsityparam));
+}
+
+bool SparsifyTensorPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto cop = dynamic_cast<luci::CircleConst *>(node);
+    if (not cop)
+      continue;
+
+    if (cop->name() != _tensor_name)
+      continue;
+
+    switch (cop->dtype())
+    {
+      case loco::DataType::S32:
+        sparsify_tensor<loco::DataType::S32>(cop);
+        break;
+      case loco::DataType::S8:
+        sparsify_tensor<loco::DataType::S8>(cop);
+        break;
+      case loco::DataType::FLOAT32:
+        sparsify_tensor<loco::DataType::FLOAT32>(cop);
+        break;
+      default:
+        throw std::runtime_error("SparsifyTensorPass: Unsupported dtype.");
+    }
+    changed = true;
+  }
+
+  return changed;
+}
+
+template void SparsifyTensorPass::sparsify_tensor<loco::DataType::S32>(luci::CircleConst *cop);
+template void SparsifyTensorPass::sparsify_tensor<loco::DataType::S8>(luci::CircleConst *cop);
+template void SparsifyTensorPass::sparsify_tensor<loco::DataType::FLOAT32>(luci::CircleConst *cop);
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SparsifyTensorPass.test.cpp b/compiler/luci/pass/src/SparsifyTensorPass.test.cpp
new file mode 100644
index 000000000..372e8e5ca
--- /dev/null
+++ b/compiler/luci/pass/src/SparsifyTensorPass.test.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SparsifyTensorPass.h"
+
+#include <gtest/gtest.h>
+
+TEST(SparsifyTensorPassTest, name)
+{
+  std::vector<int32_t> to;
+  std::vector<luci::DimensionType> vdt;
+  std::vector<int32_t> bs;
+  std::vector<int32_t> bm;
+  luci::SparsifyTensorPass pass("", to, vdt, bs, bm);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp b/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp
new file mode 100644
index 000000000..d8676cd62
--- /dev/null
+++ b/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstitutePackToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+int32_t unknown_dim_count(luci::CircleNode *node)
+{
+  int32_t count = 0;
+
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    if (!node->dim(i).known())
+      ++count;
+
+  return count;
+}
+
+bool substitute_pack_to_reshape(luci::CircleNode *node)
+{
+  auto target_node = dynamic_cast<luci::CirclePack *>(node);
+  if (target_node == nullptr)
+    return false;
+  if (target_node->values_count() != 1)
+    return false;
+  auto value_node = loco::must_cast<luci::CircleNode *>(target_node->values(0));
+  if (value_node->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+  int32_t axis = target_node->axis();
+  if (axis < 0)
+    axis = axis + static_cast<int32_t>(value_node->rank()) + 1;
+
+  auto name = node->name();
+  assert(name.length() > 0);
+
+  auto graph = target_node->graph();
+  auto reshape_node = graph->nodes()->create<luci::CircleReshape>();
+  reshape_node->tensor(value_node);
+  reshape_node->name(name + "/Reshape");
+  luci::add_origin(reshape_node, luci::get_origin(node));
+
+  auto const_node = graph->nodes()->create<luci::CircleConst>();
+  const_node->dtype(loco::DataType::S32);
+  const_node->size<loco::DataType::S32>(value_node->rank() + 1);
+  const_node->shape_status(luci::ShapeStatus::VALID);
+  const_node->rank(1);
+  const_node->dim(0).set(value_node->rank() + 1);
+  for (int32_t i = 0; i < static_cast<int32_t>(value_node->rank()) + 1; i++)
+  {
+    if (i == axis)
+    {
+      const_node->at<loco::DataType::S32>(i) = 1;
+    }
+    else if (i < axis)
+    {
+      const_node->at<loco::DataType::S32>(i) =
+        value_node->dim(i).known() ? value_node->dim(i).value() : -1;
+    }
+    else
+    {
+      const_node->at<loco::DataType::S32>(i) =
+        value_node->dim(i - 1).known() ? value_node->dim(i - 1).value() : -1;
+    }
+  }
+  const_node->name(name + "/Reshape/shape");
+  reshape_node->shape(const_node);
+  replace(target_node).with(reshape_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *           |
+ *      [CircleNode]
+ *           |
+ *      [CirclePack]
+ *           |
+ *      [CircleNode]
+ *           |
+ *
+ * AFTER
+ *             |
+ *        [CircleNode]  [CircleConst]
+ *           |   \             /
+ *  [CirclePack] [CircleReshape]
+ *                      |
+ *                 [CircleNode]
+ *                      |
+ */
+bool SubstitutePackToReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (unknown_dim_count(circle_node) <= 1 && substitute_pack_to_reshape(circle_node))
+    {
+      changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp b/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp
new file mode 100644
index 000000000..3b5d4ea2c
--- /dev/null
+++ b/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstitutePackToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void create_substitute_pack_to_reshape(loco::Graph *g, const std::initializer_list<uint32_t> shape,
+                                       int32_t axis)
+{
+  assert(g);
+
+  // Input Create.
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto graph_input = g->inputs()->create();
+  input->index(graph_input->index());
+  input->shape_status(luci::ShapeStatus::VALID);
+  input->rank(shape.size());
+  input->shape(shape);
+  input->name("input");
+
+  // Pack Node create.
+  auto pack = g->nodes()->create<luci::CirclePack>(1);
+  pack->values(0, input);
+  pack->axis(axis);
+  pack->name("pack");
+
+  // Output Connect.
+  auto output = g->nodes()->create<luci::CircleOutput>();
+  output->from(pack);
+  auto graph_output = g->outputs()->create();
+  output->index(graph_output->index());
+  output->name("output");
+
+  return;
+}
+
+} // namespace
+
+TEST(SubstitutePackToReshapePassTest, name)
+{
+  luci::SubstitutePackToReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(SubstitutePackToReshapePass, simple_case)
+{
+  auto graph = loco::make_graph();
+  create_substitute_pack_to_reshape(graph.get(), {1, 2, 3, 4}, 0);
+  luci::SubstitutePackToReshapePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleReshape *reshape_node = nullptr;
+  luci::CirclePack *pack_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+      reshape_node = reshape;
+    else if (auto pack = dynamic_cast<luci::CirclePack *>(node))
+      pack_node = pack;
+  }
+  ASSERT_NE(nullptr, reshape_node);
+  ASSERT_EQ(nullptr, pack_node);
+  auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+  ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0));
+  ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(1));
+  ASSERT_EQ(2, new_shape->at<loco::DataType::S32>(2));
+  ASSERT_EQ(3, new_shape->at<loco::DataType::S32>(3));
+  ASSERT_EQ(4, new_shape->at<loco::DataType::S32>(4));
+}
+
+TEST(SubstitutePackToReshapePass, simple_case_neg_axis)
+{
+  auto graph = loco::make_graph();
+  create_substitute_pack_to_reshape(graph.get(), {1, 2, 3, 4}, -1);
+  luci::SubstitutePackToReshapePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleReshape *reshape_node = nullptr;
+  luci::CirclePack *pack_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+      reshape_node = reshape;
+    else if (auto pack = dynamic_cast<luci::CirclePack *>(node))
+      pack_node = pack;
+  }
+  ASSERT_NE(nullptr, reshape_node);
+  ASSERT_EQ(nullptr, pack_node);
+  auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+  ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0));
+  ASSERT_EQ(2, new_shape->at<loco::DataType::S32>(1));
+  ASSERT_EQ(3, new_shape->at<loco::DataType::S32>(2));
+  ASSERT_EQ(4, new_shape->at<loco::DataType::S32>(3));
+  ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(4));
+}
diff --git a/compiler/luci/pass/src/SubstitutePadV2ToPadPass.cpp b/compiler/luci/pass/src/SubstitutePadV2ToPadPass.cpp
new file mode 100644
index 000000000..549ed22ec
--- /dev/null
+++ b/compiler/luci/pass/src/SubstitutePadV2ToPadPass.cpp
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstitutePadV2ToPadPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <vector>
+
+/**
+ * @brief Convert PadV2 op in a certain condition to Pad
+ * @details Condition to convert PadV2 to Pad is like below:
+ *
+ * Basic Condition)
+ *
+ *    C1) For all i, PadV2.input[i] >= 0
+ *    C2) For all c, PadV2.constant_values[c] <= 0
+ *    C3) PadV2 == MaxPool2D.value()
+ *    C4) number of padded values at left < MaxPool2D.Filter.W
+ *        number of padded values at right < MaxPool2D.Filter.W
+ *        number of padded values at top < MaxPool2D.Filter.H
+ *        number of padded values at bottom < MaxPool2D.Filter.H
+ *
+ *    Example graph is as follows:
+ *
+ *        %1 = CircleRelu                  # relu_output[i] >= 0
+ *        %2 = CirclePadV2(%1, constant_values <= 0)
+ *        %3 = CircleMaxPool2D(%2, ...)    # output will be chosen from relu_output
+ *
+ * In this case, it's OK to replace PadV2 with Pad, which uses 0 as padding constant.
+ *
+ * Optional Condition)
+ *
+ *    Terminology)
+ *        - 'reshaping op' : op that does not change the value of tensor
+ *           but changes position of tensor value, e.g., Transpose, Reshape, Slice, etc.
+ *
+ *    C5) Input of PadV2 could be 'reshaping op'. Example is as follow:
+ *
+ *        %1 = CircleRelu           # output[i] >= 0
+ *        %2 = CircleTranspose(%1)  # reshaping op
+ *        ...                       # more reshaping ops
+ *        %n = CirclePadV2(%n-1, constant_values <= 0)
+ *        %n+1 = CircleMaxPool2D(%n, ...)
+ *
+ *    C6) PadV2 could be an input of 'reshaping op'. Example is as follow:
+ *
+ *        %1 = CircleRelu
+ *        %2 = CirclePadV2(%1, constant_values <= 0)
+ *        %3 = CircleTranspose(%2)  # reshaping op
+ *        ...                       # more reshaping ops
+ *        %n = CircleMaxPool2D(%n-1, ...)
+ *
+ * Why is this pass required?
+ *
+ *        When PyTorch model is converted into Circle model, sometimes PadV2 is inserted with
+ *        the following pattern:
+ *
+ *        %1 = Circle.Conv2D(..., activation = Relu)
+ *        %2 = Circle.Transpose(%1, perm=[0,3,1,2])
+ *        %3 = Circle.PadV2(%2, constant_values = -3.4028234663852886e+38)
+ *        %4 = Circle.Transpose(%3, perm=[0,2,3,1])
+ *        %5 = Circle.MaxPool2D(%4, filter=[3,3], padding="VALID")
+ *
+ *        Large negative padding constant of %3 caused problem when we quantized this model.
+ *        So we need to convert the negative number to some number in reasonable range for
+ *        quantization, e.g., zero.
+ */
+namespace
+{
+
+struct Paddings
+{
+  struct Pad
+  {
+    int32_t front;
+    int32_t end;
+  };
+  /**
+   * @brief Store paddings position information.
+   * @details _padding_pos[k] stores Pad object at axis k
+   *
+   * @note  Paddings must be for rank 4 tensor
+   */
+  std::vector<Pad> _padding_pos;
+
+  Paddings(luci::CircleConst *paddings)
+  {
+    assert(paddings->dtype() == loco::DataType::S32);
+    assert(paddings->rank() == 2);
+    assert(paddings->dim(1).value() == 2);
+    assert(paddings->size<loco::DataType::S32>() == paddings->rank() * 4);
+
+    for (uint32_t i = 0; i < paddings->dim(0).value(); i++)
+    {
+      Pad pad{.front = paddings->at<loco::DataType::S32>(i * 2),
+              .end = paddings->at<loco::DataType::S32>(i * 2 + 1)};
+      _padding_pos.emplace_back(pad);
+    }
+
+    assert(_padding_pos.size() == 4);
+  }
+
+  /**
+   * @brief Check if this padding area is covered by filter
+   *
+   * @note This is to check condition C4).
+   *       _padding_pos should store values according to NHWC.
+   */
+  bool smaller_than(int32_t filter_h, int32_t filter_w)
+  {
+    auto &pad_H = _padding_pos.at(1);
+    auto &pad_W = _padding_pos.at(2);
+
+    return (pad_H.front < filter_h) && (pad_H.end < filter_h) && (pad_W.front < filter_w) &&
+           (pad_W.end < filter_w);
+  }
+
+  /**
+   * @brief Track how paddings change after CircleTranspose
+   * @details Consider the following graph,
+   *
+   *   %1 = Circle.Input
+   *   %2 = Circle.PadV2(%1,
+   *                     paddings=[[0, 0], [0, 0], [2, 3], [4, 5]],
+   *                     padding_value = -100)
+   *   %3 = Circle.Transpose(%2, perm[0, 2, 3, 1])
+   *
+   *   Output of %3 has padding constant value(-100) from %2 at position below:
+   *
+   *    - axis | front | end
+   *     ------|-------|-----
+   *       0   |   0   |   0
+   *       1   |   2   |   3
+   *       2   |   4   |   5
+   *       3   |   0   |   0
+   *
+   *   This method keeps track of such change of padding position.
+   */
+  void apply(luci::CircleTranspose *transpose)
+  {
+    assert(transpose);
+    luci::CircleConst *perm = loco::must_cast<luci::CircleConst *>(transpose->perm());
+
+    std::vector<Pad> transposed_pos;
+    transposed_pos.resize(4);
+
+    for (uint32_t to = 0; to < 4; to++)
+    {
+      int32_t from = perm->at<loco::DataType::S32>(to);
+      transposed_pos.at(to) = _padding_pos.at(from);
+    }
+
+    _padding_pos = transposed_pos;
+  }
+};
+
+struct ReshapingNode
+{
+  /// @brief Check if node is 'reshaping op'
+  static bool check(loco::Node *node)
+  {
+    if (dynamic_cast<luci::CircleTranspose *>(node))
+      return true;
+    // add more 'reshaping op'
+
+    return false;
+  }
+
+  /// @brief Retuen reshaping op's input
+  static loco::Node *input(loco::Node *node)
+  {
+    if (auto transpose = dynamic_cast<luci::CircleTranspose *>(node))
+      return transpose->a();
+    // add more 'reshaping op'
+
+    throw std::runtime_error("Not yet supported reshaping op");
+  }
+};
+
+/// @brief Get only successor node
+loco::Node *get_only_succ(loco::Node *parent)
+{
+  assert(parent);
+
+  auto successors = loco::succs(parent);
+  if (successors.size() != 1)
+    return nullptr;
+
+  return *successors.begin();
+}
+
+// Check condition C1) and C5)
+bool positive_or_zero(loco::Node *ifm)
+{
+  assert(ifm);
+
+  if (ReshapingNode::check(ifm))
+    return positive_or_zero(ReshapingNode::input(ifm));
+
+  // Since Relu.output[i] >= 0
+  if (dynamic_cast<luci::CircleRelu *>(ifm))
+    return true;
+  if (auto conv = dynamic_cast<luci::CircleConv2D *>(ifm))
+  {
+    if (conv->fusedActivationFunction() == luci::FusedActFunc::RELU)
+      return true;
+    // Add more FusedActFunc
+  }
+  // Add more ops of which output[i] >= 0
+
+  return false;
+}
+
+template <loco::DataType DT> bool has_all_positive_values(luci::CircleConst *node)
+{
+  // Only numeric datatype is allowed
+  static_assert(DT != loco::DataType::Unknown);
+  static_assert(DT != loco::DataType::STRING);
+
+  assert(node);
+
+  auto size = node->size<DT>();
+  for (decltype(size) t = 0; t < size; t++)
+  {
+    typename loco::DataTypeImpl<DT>::Type val = node->at<DT>(t);
+    if (val <= 0)
+      return false;
+  }
+
+  return true;
+}
+
+// To check condition C2)
+bool has_all_positive_values(luci::CircleConst *node)
+{
+  assert(node);
+
+  if (node->dtype() == loco::DataType::FLOAT32)
+    return has_all_positive_values<loco::DataType::FLOAT32>(node);
+  // Add more datatype
+
+  throw std::runtime_error("Not yet supported datatype");
+}
+
+bool used_by_maxpool_only(luci::CircleNode *node, Paddings &paddings)
+{
+  auto successor = get_only_succ(node);
+
+  // when successor is not only-succ
+  if (successor == nullptr)
+    return false;
+
+  if (auto maxpool = dynamic_cast<luci::CircleMaxPool2D *>(successor))
+  {
+    // Let's check condition C4)
+    return paddings.smaller_than(maxpool->filter()->h(), maxpool->filter()->w());
+  }
+
+  // Let's check condition C6)
+  if (auto transpose = dynamic_cast<luci::CircleTranspose *>(successor))
+  {
+    auto appropriate = [](luci::CircleTranspose *transpose) {
+      luci::CircleConst *perm = loco::must_cast<luci::CircleConst *>(transpose->perm());
+
+      // For Transpose to be an input for MaxPool2D
+      return (transpose->rank() == 4) && (perm && perm->dtype() == loco::DataType::S32) &&
+             (perm->size<loco::DataType::S32>() == 4);
+    };
+
+    if (not appropriate(transpose))
+      return false;
+
+    paddings.apply(transpose);
+    return used_by_maxpool_only(transpose, paddings);
+  }
+  // Support more 'reshaping op' later
+
+  return false;
+}
+
+// Check condition C3), C4) and C6)
+bool used_by_maxpool_only(luci::CirclePadV2 *pad_v2)
+{
+  // For PadV2 to be an input for MaxPool2D
+  if (pad_v2->rank() != 4)
+    return false;
+
+  Paddings paddings(loco::must_cast<luci::CircleConst *>(pad_v2->paddings()));
+
+  return used_by_maxpool_only(pad_v2, paddings);
+}
+
+loco::Node *build_pad_from(luci::CirclePadV2 *pad_v2)
+{
+  auto copy_shape = [](const luci::CircleNode *src, luci::CircleNode *dest) {
+    auto rank = src->rank();
+    dest->rank(rank);
+
+    for (decltype(rank) axis = 0; axis < rank; axis++)
+      dest->dim(axis) = src->dim(axis);
+  };
+
+  auto g = pad_v2->graph();
+
+  auto pad = g->nodes()->create<luci::CirclePad>();
+  {
+    pad->name(pad_v2->name() + "/pad");
+    luci::add_origin(pad, luci::get_origin(pad_v2));
+
+    pad->dtype(pad_v2->dtype());
+    copy_shape(pad_v2, pad);
+
+    pad->input(pad_v2->input());
+    pad->paddings(pad_v2->paddings());
+  }
+
+  return pad;
+}
+
+luci::CirclePadV2 *get_padv2(loco::Node *node)
+{
+  if (auto padv2 = dynamic_cast<luci::CirclePadV2 *>(node))
+    return padv2;
+
+  if (ReshapingNode::check(node))
+    return get_padv2(ReshapingNode::input(node));
+
+  return nullptr;
+}
+
+bool substitute_padv2_to_pad(luci::CircleMaxPool2D *maxp)
+{
+  // precondition
+  assert(maxp);
+  assert(maxp->value());
+
+  auto pad_v2 = get_padv2(maxp->value());
+
+  if (pad_v2 == nullptr)
+    return false;
+
+  assert(pad_v2->input());
+
+  auto paddings = loco::must_cast<luci::CircleConst *>(pad_v2->paddings());
+  auto constant_values = loco::must_cast<luci::CircleConst *>(pad_v2->constant_values());
+
+  (void)paddings;
+  assert(paddings);
+  assert(paddings->dtype() == loco::DataType::S32);
+  assert(constant_values);
+  assert(constant_values->dtype() == pad_v2->dtype());
+
+  if (not positive_or_zero(pad_v2->input()))
+    return false;
+
+  if (has_all_positive_values(constant_values))
+    return false;
+
+  if (not used_by_maxpool_only(pad_v2))
+    return false;
+
+  auto pad = build_pad_from(pad_v2);
+
+  replace(pad_v2).with(pad);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Case 1) Basic case
+ *
+ * BEFORE
+ *     [CircleRelu]
+ *          |
+ *          |       [CircleConst] [CircleConst]
+ *          |             |              |
+ *          -------+----------------------
+ *                 |
+ *            [CirclePadV2]
+ *                 |
+ *          [CircleMaxPool2D]
+ *                 |
+ *
+ * AFTER
+ *     [CircleRelu]
+ *          |
+ *          |           [CircleConst]    [CircleNode] [CircleConst]
+ *          |             |      |            |              |
+ *          -------+-------      -------------+--------------+
+ *                 |                          |
+ *            [CirclePad]                [CirclePadV2]
+ *                 |
+ *         [CircleMaxPool2D]
+ *                 |
+ *
+ * Case 2) During conversion from a PyTorch model into a Circle model,
+ * it is common that some 'Reshaping op', e.g., CircleTranspose,
+ * are inserted in-between operations to swith NCHW into NHWC and vice versa.
+ * This pass also needs to handle such situation.
+ *
+ * BEFORE
+ *     [CircleRelu]
+ *          |
+ *          |       [CircleConst] [CircleConst]
+ *          |             |              |
+ *          -------+----------------------
+ *                 |
+ *          [CircleTranspose]
+ *                 |
+ *            [CirclePadV2]
+ *                 |
+ *          [CircleTranspose]
+ *                 |
+ *          [CircleMaxPool2D]
+ *                 |
+ *
+ * AFTER
+ *     [CircleRelu]
+ *          |
+ *          |           [CircleConst]    [CircleNode] [CircleConst]
+ *          |             |      |            |              |
+ *          -------+-------      -------------+--------------+
+ *                 |                          |
+ *          [CircleTranspose]           [CirclePadV2]
+ *                 |
+ *            [CirclePad]
+ *                 |
+ *          [CircleTranspose]
+ *                 |
+ *          [CircleMaxPool2D]
+ *                 |
+ */
+bool SubstitutePadV2ToPadPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto circle_node = dynamic_cast<luci::CircleMaxPool2D *>(node))
+    {
+      if (substitute_padv2_to_pad(circle_node))
+      {
+        changed = true;
+      }
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstitutePadV2ToPadPass.test.cpp b/compiler/luci/pass/src/SubstitutePadV2ToPadPass.test.cpp
new file mode 100644
index 000000000..872968250
--- /dev/null
+++ b/compiler/luci/pass/src/SubstitutePadV2ToPadPass.test.cpp
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstitutePadV2ToPadPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using UIntList = std::initializer_list<uint32_t>;
+using IntList = std::initializer_list<int32_t>;
+
+// convert shape in UIntList to loco::TensorShape
+std::unique_ptr<loco::TensorShape> tensor_shape(const UIntList &values)
+{
+  auto shape = std::make_unique<loco::TensorShape>();
+  {
+    shape->rank(values.size());
+
+    uint32_t r = 0;
+    for (auto v : values)
+      shape->dim(r++).set(v);
+  }
+  return shape;
+}
+
+class TestGraph
+{
+public:
+  void init(const UIntList &input_shape, const UIntList &output_shape)
+  {
+    _input = _g.nodes()->create<luci::CircleInput>();
+    {
+      _input->name("input");
+      _input->dtype(loco::DataType::FLOAT32);
+      _input->shape(input_shape);
+
+      auto graph_input = _g.inputs()->create();
+      {
+        _input->index(graph_input->index());
+        graph_input->shape(std::move(tensor_shape(input_shape)));
+      }
+    }
+
+    _output = _g.nodes()->create<luci::CircleOutput>();
+    {
+      _output->name("output");
+      _output->dtype(loco::DataType::FLOAT32);
+      _output->shape(output_shape);
+
+      auto graph_output = _g.outputs()->create();
+      {
+        _output->index(graph_output->index());
+        graph_output->shape(std::move(tensor_shape(output_shape)));
+      }
+    }
+
+    // subclass should implement build_body()
+    auto graphlet_before_output = build_body(_input);
+
+    _output->from(graphlet_before_output);
+  }
+
+  // build luci::CircleConst for paddings
+  luci::CircleConst *paddings_const(const std::vector<int32_t> &plist)
+  {
+    assert(plist.size() == 8);
+
+    auto node = _g.nodes()->create<luci::CircleConst>();
+    {
+      node->dtype(loco::DataType::S32);
+      node->shape({4, 2});
+      node->size<loco::DataType::S32>(8);
+
+      for (int32_t t = 0; t < 8; t++)
+        node->at<loco::DataType::S32>(t) = plist.at(t);
+    }
+
+    return node;
+  }
+
+  // build luci::CircleConst for paddings value
+  luci::CircleConst *padding_val_const(float val)
+  {
+    auto node = _g.nodes()->create<luci::CircleConst>();
+    {
+      node->dtype(loco::DataType::FLOAT32);
+      node->shape({1});
+      node->size<loco::DataType::FLOAT32>(1);
+
+      node->at<loco::DataType::FLOAT32>(0) = val;
+    }
+
+    return node;
+  }
+
+  // build luci::CirclePadV2
+  luci::CirclePadV2 *padV2(loco::Node *input, const std::vector<int32_t> &paddings,
+                           float padding_constant)
+  {
+    auto padv2 = _g.nodes()->create<luci::CirclePadV2>();
+    {
+      padv2->name("PadV2");
+      padv2->dtype(loco::DataType::FLOAT32);
+
+      padv2->input(input);
+      padv2->paddings(paddings_const(paddings));
+      padv2->constant_values(padding_val_const(padding_constant));
+      // No shape setting. ShapeInference should be run later
+    }
+    return padv2;
+  }
+
+  // build luci::CircleMaxPool2D
+  luci::CircleMaxPool2D *maxpool2d(loco::Node *input,
+                                   const std::pair<uint32_t, uint32_t> &kernel_HW)
+  {
+    auto mp = _g.nodes()->create<luci::CircleMaxPool2D>();
+    {
+      mp->value(input);
+      mp->fusedActivationFunction(luci::FusedActFunc::NONE);
+      mp->padding(luci::Padding::VALID);
+      mp->filter()->h(kernel_HW.first);
+      mp->filter()->w(kernel_HW.second);
+      mp->stride()->h(1);
+      mp->stride()->w(1);
+
+      mp->dtype(loco::DataType::FLOAT32);
+      // No shape setting. ShapeInference should be run later
+    }
+    return mp;
+  }
+
+  // build luci::CircleRelu
+  luci::CircleRelu *relu(loco::Node *input)
+  {
+    auto relu = _g.nodes()->create<luci::CircleRelu>();
+    {
+      relu->features(input);
+      relu->dtype(loco::DataType::FLOAT32);
+      // No shape setting. ShapeInference should be run later
+    }
+    return relu;
+  }
+
+  // build luci::CircleTranspose
+  luci::CircleTranspose *transpose(loco::Node *input, const std::vector<int32_t> &perm_v)
+  {
+    auto perm = _g.nodes()->create<luci::CircleConst>();
+    {
+      auto rank = static_cast<uint32_t>(perm_v.size());
+      perm->dtype(loco::DataType::S32);
+      perm->size<loco::DataType::S32>(rank);
+      perm->shape({rank});
+      for (decltype(rank) d = 0; d < rank; d++)
+        perm->at<loco::DataType::S32>(d) = perm_v.at(d);
+    }
+    auto transpose_node = _g.nodes()->create<luci::CircleTranspose>();
+    {
+      transpose_node->a(input);
+      transpose_node->perm(perm);
+      transpose_node->dtype(loco::DataType::S32);
+      // No shape setting. ShapeInference should be run later
+    }
+    return transpose_node;
+  }
+
+  loco::Graph *g() { return &_g; }
+  luci::CircleOutput *output() { return _output; }
+
+  virtual loco::Node *build_body(loco::Node *input) = 0;
+
+private:
+  loco::Graph _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+class SubstitutePadV2ToPadPassTest : public ::testing::Test
+{
+public:
+  SubstitutePadV2ToPadPassTest() = default;
+
+  bool run_pass(loco::Graph *g)
+  {
+    _shapeinf_pass.run(g);
+
+    return _pad_pass.run(g);
+  }
+
+protected:
+  luci::SubstitutePadV2ToPadPass _pad_pass;
+  luci::CircleShapeInferencePass _shapeinf_pass;
+};
+
+} // namespace
+
+/**
+ * Graph that is changed by SubstitutePadV2ToPadPass
+ *
+ *    [CircleInput]
+ *         |
+ *      [Relu]
+ *         |
+ *    [CirclePadV2]  pad.H.front = 1, pad.H.end = 1, pad.W.front = 1, pad.W.end = 1
+ *         |
+ *     [MaxPool2D]    filter.H = 2, filter.W = 2
+ *         |
+ *    [CircleOutput]
+ */
+TEST_F(SubstitutePadV2ToPadPassTest, basic_case)
+{
+  struct Graph_basic : public TestGraph
+  {
+    Graph_basic()
+    {
+      UIntList input_shape = {1, 4, 4, 3};
+      UIntList output_shape = {1, 6, 6, 3};
+      init(input_shape, output_shape);
+    }
+
+    loco::Node *build_body(loco::Node *input) final
+    {
+      auto relu_node = relu(input);
+
+      IntList paddings = {0, 0, 1, 1, 1, 1, 0, 0};
+      auto padding_const = -10.0;
+      auto padV2_node = padV2(relu_node, paddings, padding_const);
+
+      return maxpool2d(padV2_node, {2, 2});
+    }
+  } graph;
+
+  auto result = run_pass(graph.g());
+  ASSERT_TRUE(result);
+
+  // Checking CircleMaxPool2D
+  auto maxpool = dynamic_cast<luci::CircleMaxPool2D *>(graph.output()->from());
+  ASSERT_TRUE(maxpool != nullptr);
+
+  // Checking CirclePad
+  auto pad = dynamic_cast<luci::CirclePad *>(maxpool->value());
+  ASSERT_TRUE(pad != nullptr);
+
+  // Checking CircleRelu
+  auto relu = dynamic_cast<luci::CircleRelu *>(pad->input());
+  ASSERT_TRUE(relu != nullptr);
+
+  auto input = dynamic_cast<luci::CircleInput *>(relu->features());
+  ASSERT_TRUE(input != nullptr);
+}
+
+/**
+ * Graph that is changed by SubstitutePadV2ToPadPass
+ *
+ * Transpose ops are inserted, e.g., to switch layout between NHWC and NCHW
+ *
+ *    [CircleInput]
+ *         |
+ *      [Relu]
+ *         | 1x4x4x3  (NHWC)
+ *     [Transpose]  perm=[0,3,1,2]
+ *         | 1x3x4x4  (NCHW)
+ *    [CirclePadV2] paddings=[0,0,0,0,1,1,1,1]
+ *         | 1x3x6x6  (NCHW)
+ *     [Transpose]  perm=[0,2,3,1]
+ *         | 1x6x6x3  (NHWC)
+ *     [MaxPool2D]  filter.H = 3, filter.W = 3
+ *         | 1x4x4x3  (NHWC)
+ *    [CircleOutput]
+ */
+TEST_F(SubstitutePadV2ToPadPassTest, reshaping_op_case)
+{
+  struct Graph_Reshaping_Op : public TestGraph
+  {
+    Graph_Reshaping_Op()
+    {
+      UIntList input_shape = {1, 4, 4, 3};
+      UIntList output_shape = {1, 4, 4, 3};
+      init(input_shape, output_shape);
+    }
+
+    loco::Node *build_body(loco::Node *input) final
+    {
+      auto relu_node = relu(input);
+
+      auto transpose1_node = transpose(relu_node, {0, 3, 1, 2});
+
+      IntList paddings = {0, 0, 0, 0, 1, 1, 1, 1};
+      auto padding_const = -10.0;
+      auto padV2_node = padV2(transpose1_node, paddings, padding_const);
+
+      auto transpose2_node = transpose(padV2_node, {0, 2, 3, 1});
+
+      return maxpool2d(transpose2_node, {3, 3});
+    }
+  } graph;
+
+  auto result = run_pass(graph.g());
+  ASSERT_TRUE(result);
+
+  // Checking CircleMaxPool2D
+  auto maxpool = dynamic_cast<luci::CircleMaxPool2D *>(graph.output()->from());
+  ASSERT_TRUE(maxpool != nullptr);
+
+  // Checking Transpose
+  auto transpose1 = dynamic_cast<luci::CircleTranspose *>(maxpool->value());
+  ASSERT_TRUE(transpose1 != nullptr);
+
+  // Checking CirclePad
+  auto pad = dynamic_cast<luci::CirclePad *>(transpose1->a());
+  ASSERT_TRUE(pad != nullptr);
+
+  // Checking Transpose
+  auto transpose2 = dynamic_cast<luci::CircleTranspose *>(pad->input());
+  ASSERT_TRUE(transpose2 != nullptr);
+
+  // Checking CircleRelu
+  auto relu = dynamic_cast<luci::CircleRelu *>(transpose2->a());
+  ASSERT_TRUE(relu != nullptr);
+
+  auto input = dynamic_cast<luci::CircleInput *>(relu->features());
+  ASSERT_TRUE(input != nullptr);
+}
+
+//
+// Negative Tests
+//
+
+/**
+ * Graph that is not changed by SubstitutePadV2ToPadPass
+ *
+ *    [CircleInput]
+ *         |
+ *    [CirclePadV2]
+ *         |
+ *    [CircleOutput]
+ */
+TEST_F(SubstitutePadV2ToPadPassTest, no_relu_maxpool_NEG)
+{
+  struct Graph_No_MaxPool : public TestGraph
+  {
+    Graph_No_MaxPool()
+    {
+      UIntList input_shape = {1, 4, 4, 3};
+      UIntList output_shape = {1, 6, 8, 3};
+      init(input_shape, output_shape);
+    }
+
+    loco::Node *build_body(loco::Node *input) final
+    {
+      IntList paddings = {0, 0, 1, 1, 2, 2, 0, 0};
+      auto padding_const = -10.0;
+      return padV2(input, paddings, padding_const);
+    }
+  } graph;
+
+  auto result = run_pass(graph.g());
+
+  ASSERT_FALSE(result);
+}
+
+/**
+ * Graph that is not changed by SubstitutePadV2ToPadPass
+ *
+ * There is no CircleMaxPool2D.
+ *
+ *    [CircleInput]
+ *         |
+ *    [CircleRelu]
+ *         |
+ *    [CirclePadV2]
+ *         |
+ *    [CircleOutput]
+ */
+TEST_F(SubstitutePadV2ToPadPassTest, no_maxpool_NEG)
+{
+  struct Graph_No_MaxPool : public TestGraph
+  {
+    Graph_No_MaxPool()
+    {
+      UIntList input_shape = {1, 4, 4, 3};
+      UIntList output_shape = {1, 6, 8, 3};
+      init(input_shape, output_shape);
+    }
+
+    loco::Node *build_body(loco::Node *input) final
+    {
+      auto relu_node = relu(input);
+
+      IntList paddings = {0, 0, 1, 1, 2, 2, 0, 0};
+      auto padding_const = -10.0;
+      return padV2(relu_node, paddings, padding_const);
+    }
+  } graph;
+
+  auto result = run_pass(graph.g());
+
+  ASSERT_FALSE(result);
+}
+
+/**
+ * Graph where PadV2 has non-negative constant value
+ *
+ *    [CircleInput]
+ *         |
+ *      [Relu]
+ *         |
+ *    [CirclePadV2]
+ *         |
+ *     [MaxPool2D]
+ *         |
+ *    [CircleOutput]
+ */
+TEST_F(SubstitutePadV2ToPadPassTest, non_negative_NEG)
+{
+  struct NegGraph : public TestGraph
+  {
+    NegGraph()
+    {
+      UIntList input_shape = {1, 4, 4, 3};
+      UIntList output_shape = {1, 6, 6, 3};
+      init(input_shape, output_shape);
+    }
+
+    loco::Node *build_body(loco::Node *input) final
+    {
+      constexpr auto POSITIVE_CONST_VALUE = 0.1f;
+
+      auto relu_node = relu(input);
+
+      IntList paddings = {0, 0, 1, 1, 1, 1, 0, 0};
+      auto padV2_node = padV2(relu_node, paddings, POSITIVE_CONST_VALUE);
+
+      return maxpool2d(padV2_node, {2, 2});
+    }
+  } graph;
+
+  auto result = run_pass(graph.g());
+
+  ASSERT_FALSE(result);
+}
+
+/**
+ * Graph that has PadV2.padding wider than MaxPool2D.Filter
+ *
+ *    [CircleInput]
+ *         |
+ *    [CircleRelu]
+ *         |
+ *    [CirclePadV2]      paddings=[0, 0, 3, 3, 1, 1, 0, 0]
+ *         |
+ *    [CircleMaxPool2D]  Filter_H = 2, Filter_W = 2  (Filter_H < paddings for H)
+ *         |
+ *    [CircleOutput]
+ */
+TEST_F(SubstitutePadV2ToPadPassTest, wider_paddings_01_NEG)
+{
+  struct NegGraph : public TestGraph
+  {
+    NegGraph()
+    {
+      UIntList input_shape = {1, 4, 4, 3};
+      UIntList output_shape = {1, 9, 5, 3};
+      init(input_shape, output_shape);
+    }
+
+    loco::Node *build_body(loco::Node *input) final
+    {
+      auto relu_node = relu(input);
+
+      constexpr auto TOO_WIDE_H_FRONT = 3;
+      constexpr auto TOO_WIDE_H_END = 3;
+
+      IntList paddings = {0, 0, TOO_WIDE_H_FRONT, TOO_WIDE_H_END, 1, 1, 0, 0};
+      auto padding_const = -10.0;
+      auto padv2 = padV2(relu_node, paddings, padding_const);
+
+      return maxpool2d(padv2, {2, 2});
+    }
+  } graph;
+
+  auto result = run_pass(graph.g());
+
+  ASSERT_FALSE(result);
+}
+
+/**
+ * Graph that has PadV2.paddings wider than MaxPool2D.Filter
+ *
+ * Transpose ops are inserted, e.g., to switch layout between NHWC and NCHW
+ *
+ *    [CircleInput]
+ *         |
+ *      [Relu]
+ *         | 1x4x4x3 (NHWC)
+ *     [Transpose]  perm=[0,3,1,2]
+ *         | 1x3x4x4 (NCHW)
+ *    [CirclePadV2] paddings=[0,0,0,0,3,3,1,1]
+ *         | 1x3x6x6 (NCHW)
+ *     [Transpose]  perm=[0,2,3,1]
+ *         | 1x6x6x3 (NHWC)
+ *     [MaxPool2D]  filter.H = 2, filter.W = 2
+ *         | 1x4x4x3
+ *    [CircleOutput]
+ */
+TEST_F(SubstitutePadV2ToPadPassTest, wider_paddings_02_NEG)
+{
+  struct Graph_Reshaping_Op : public TestGraph
+  {
+    Graph_Reshaping_Op()
+    {
+      UIntList input_shape = {1, 4, 4, 3};
+      UIntList output_shape = {1, 9, 5, 3};
+      init(input_shape, output_shape);
+    }
+
+    loco::Node *build_body(loco::Node *input) final
+    {
+      auto relu_node = relu(input);
+
+      auto transpose1_node = transpose(relu_node, {0, 3, 1, 2});
+
+      constexpr auto TOO_WIDE_H_FRONT = 3;
+      constexpr auto TOO_WIDE_H_END = 3;
+
+      IntList paddings = {0, 0, 0, 0, TOO_WIDE_H_FRONT, TOO_WIDE_H_END, 1, 1};
+      auto padding_const = -10.0;
+      auto padV2_node = padV2(transpose1_node, paddings, padding_const);
+
+      auto transpose2_node = transpose(padV2_node, {0, 2, 3, 1});
+
+      return maxpool2d(transpose2_node, {3, 3});
+    }
+  } graph;
+
+  auto result = run_pass(graph.g());
+  ASSERT_FALSE(result);
+}
diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
new file mode 100644
index 000000000..57c386d99
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+// SplitV is substituted to Split if the contents of size_splits are all same
+// For example,
+// size_splits = [32, 32] -> substitute
+// size_splits = [31, 33] -> do not substitute
+bool resolve_splitv(luci::CircleSplitV *sv)
+{
+  auto size_splits = dynamic_cast<luci::CircleConst *>(sv->size_splits());
+  if (not size_splits)
+    return false;
+
+  if (size_splits->dtype() != loco::DataType::S32)
+    return false;
+
+  auto num_split = size_splits->size<loco::DataType::S32>();
+  if (static_cast<int32_t>(num_split) != sv->num_split())
+    return false;
+
+  if (num_split < 1)
+    return false;
+
+  // Check the contents of size_splits are all same
+  auto first_size = size_splits->at<loco::DataType::S32>(0);
+  for (uint32_t i = 1; i < num_split; i++)
+  {
+    if (first_size != size_splits->at<loco::DataType::S32>(i))
+      return false;
+  }
+
+  auto graph = sv->graph();
+  auto split_node = graph->nodes()->create<luci::CircleSplit>();
+  split_node->input(sv->input());
+  split_node->split_dim(sv->split_dim());
+  split_node->num_split(sv->num_split());
+  split_node->name(sv->name());
+  copy_quantparam(sv, split_node);
+  luci::add_origin(split_node, luci::get_origin(sv));
+
+  auto succs = loco::succs(sv);
+  for (auto succ : succs)
+  {
+    auto svo = loco::must_cast<luci::CircleSplitVOut *>(succ);
+    auto so_node = graph->nodes()->create<luci::CircleSplitOut>();
+    so_node->input(split_node);
+    so_node->index(svo->index());
+    so_node->name(svo->name());
+    copy_quantparam(svo, so_node);
+    luci::add_origin(so_node, luci::get_origin(svo));
+
+    replace(svo).with(so_node);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ *  EXAMPLE (SplitV with num_split = 2)
+ *
+ *  BEFORE
+ *              [CircleNode]
+ *                   |
+ *             [CircleSplitV] (size_splits and split_dim are ignored)
+ *                /      \
+ *   [CircleSplitVOut]  [CircleSplitVOut]
+ *            |                 |
+ *       [CircleNode]     [CircleNode]
+ *
+ *  AFTER
+ *                    [CircleNode]
+ *                     /         \
+ *             [CircleSplit]    [CircleSplitV] (dead)
+ *                /      \               \
+ *   [CircleSplitOut]  [CircleSplitOut]  [CircleSplitVOut] * 2 (dead)
+ *            |                 |
+ *       [CircleNode]     [CircleNode]
+ */
+bool SubstituteSplitVToSplitPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto sv = dynamic_cast<luci::CircleSplitV *>(node))
+    {
+      if (resolve_splitv(sv))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp
new file mode 100644
index 000000000..43f9cc116
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
+
+#include "helpers/CreateCircleConst.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+const int N = 1;
+const int C = 32;
+const int H = 8;
+const int W = 8;
+
+/**
+ *  graph having SplitV operator
+ *
+ *                [CircleInput]
+ *                      |
+ *                [CircleSplitV]
+ *                     /  \
+ *      [CircleSplitVOut] [CircleSplitVOut]
+ *             |                   |
+ *       [CircleOutput]     [CircleOutput]
+ */
+class SplitVGraphlet
+{
+public:
+  SplitVGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    const std::vector<int32_t> splits{16, 16};
+    auto size_splits = luci::create_const_node(g, loco::DataType::S32, {2}, splits);
+
+    const std::vector<int32_t> dim{3};
+    auto split_dim = luci::create_const_node(g, loco::DataType::S32, {1}, dim);
+
+    _sv = g->nodes()->create<luci::CircleSplitV>();
+    _sv->size_splits(size_splits);
+    _sv->split_dim(split_dim);
+    _sv->num_split(2);
+    _sv->name("SplitV");
+
+    _svo1 = g->nodes()->create<luci::CircleSplitVOut>();
+    _svo1->input(_sv);
+    _svo1->index(0);
+    _svo1->name("SplitV0");
+
+    _svo2 = g->nodes()->create<luci::CircleSplitVOut>();
+    _svo2->input(_sv);
+    _svo2->index(1);
+    _svo2->name("SplitV1");
+  }
+
+public:
+  luci::CircleSplitV *split_v() { return _sv; }
+  luci::CircleSplitVOut *split_vo1() { return _svo1; }
+  luci::CircleSplitVOut *split_vo2() { return _svo2; }
+
+protected:
+  luci::CircleSplitV *_sv = nullptr;
+  luci::CircleSplitVOut *_svo1 = nullptr;
+  luci::CircleSplitVOut *_svo2 = nullptr;
+};
+
+class SplitVGraph : public TestIsGraphlet<1>, public TestOsGraphlet<2>, public SplitVGraphlet
+{
+public:
+  SplitVGraph() = default;
+
+  void init(void)
+  {
+    TestIsGraphlet<1>::init(g(), {{N, C, H, W}});
+    TestOsGraphlet<2>::init(g(), {{N, C, H / 2, W / 2}, {N, C, H / 2, W / 2}});
+    SplitVGraphlet::init(g());
+
+    split_v()->input(input(0));
+
+    output(0)->from(split_vo1());
+    output(1)->from(split_vo2());
+  }
+};
+
+class SubstituteSplitVToSplitPassTest : public ::testing::Test
+{
+public:
+  SplitVGraph g;
+  luci::SubstituteSplitVToSplitPass pass;
+};
+
+} // namespace
+
+/**
+ *  Optimized graph looks like below.
+ *
+ *                [CircleInput]
+ *                      |
+ *                [CircleSplit]
+ *                     /  \
+ *      [CircleSplitOut] [CircleSplitOut]
+ *             |                 |
+ *       [CircleOutput]   [CircleOutput]
+ */
+TEST_F(SubstituteSplitVToSplitPassTest, simple_test)
+{
+  g.init();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto so1 = dynamic_cast<luci::CircleSplitOut *>(g.output(0)->from());
+  EXPECT_NE(nullptr, so1);
+
+  auto so2 = dynamic_cast<luci::CircleSplitOut *>(g.output(1)->from());
+  EXPECT_NE(nullptr, so2);
+
+  EXPECT_EQ(so1->input(), so2->input());
+
+  auto s = dynamic_cast<luci::CircleSplit *>(so1->input());
+  EXPECT_NE(nullptr, s);
+
+  auto input = dynamic_cast<luci::CircleInput *>(s->input());
+  EXPECT_NE(nullptr, input);
+}
+
+TEST_F(SubstituteSplitVToSplitPassTest, wrong_condition_NEG)
+{
+  g.init();
+
+  g.split_v()->num_split(3); // Wrong num_split
+  auto ret = pass.run(g.g());
+
+  EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
new file mode 100644
index 000000000..9bc764f92
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/**
+ * @brief return TRUE if all dim is known
+ * @note This pass can be applied even some of dimensions are unknown.
+         For now, do not consider about it and update logic later.
+ */
+bool can_squeeze_shape(const luci::CircleNode *node)
+{
+  for (uint32_t r = 0; r < node->rank(); ++r)
+  {
+    if (not node->dim(r).known())
+      return false;
+  }
+  return true;
+}
+
+/**
+ * @brief return valid unsigned dim value from 0 ~ (rank-1)
+ * @note  dim can be -rank to (rank-1)
+ */
+uint32_t valid_unsigned_dim(uint32_t rank, int32_t dim)
+{
+  int32_t irank = static_cast<int32_t>(rank);
+  return dim >= 0 ? static_cast<uint32_t>(dim) : static_cast<uint32_t>(irank + dim);
+}
+
+/**
+ * @brief return TRUE if input dim is 1 for squeeze_dims values
+ */
+bool is_valid_input(const luci::CircleNode *node, const std::vector<int32_t> &squeeze_dims)
+{
+  auto rank = node->rank();
+  for (auto dim : squeeze_dims)
+  {
+    auto udim = valid_unsigned_dim(rank, dim);
+    if (node->dim(udim).value() != 1)
+      return false;
+  }
+  return true;
+}
+
+/**
+ * @brief return shape vector from input
+ */
+std::vector<uint32_t> node_shape(const luci::CircleNode *input)
+{
+  std::vector<uint32_t> shape;
+  uint32_t rank = input->rank();
+  for (uint32_t r = 0; r < rank; ++r)
+    shape.push_back(input->dim(r).value());
+
+  return shape;
+}
+
+/**
+ * @brief return CircleConst ptr with values of new_shape
+ */
+luci::CircleConst *create_shape_const(loco::Graph *graph, const std::vector<uint32_t> &new_shape)
+{
+  // NOTE dim_size can be 0
+  uint32_t dim_size = static_cast<uint32_t>(new_shape.size());
+
+  auto shape_const = graph->nodes()->create<luci::CircleConst>();
+
+  // const shape/dtype
+  shape_const->dtype(loco::DataType::S32);
+  if (dim_size > 0)
+  {
+    shape_const->rank(1);
+    shape_const->dim(0).set(dim_size);
+  }
+  else
+    shape_const->rank(0);
+  shape_const->shape_status(luci::ShapeStatus::VALID);
+
+  // constant values
+  shape_const->size<loco::DataType::S32>(dim_size);
+  for (uint32_t i = 0; i < dim_size; ++i)
+    shape_const->at<loco::DataType::S32>(i) = new_shape.at(i);
+
+  return shape_const;
+}
+
+bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze)
+{
+  assert(squeeze != nullptr);
+
+  auto input = loco::must_cast<luci::CircleNode *>(squeeze->input());
+  // we need input node shape and all dim should be known
+  if (input->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+  if (not can_squeeze_shape(input))
+    return false;
+
+  // we will use squeeze shape for new shape
+  if (squeeze->shape_status() != luci::ShapeStatus::VALID)
+    return false;
+
+  auto &squeeze_dims = squeeze->squeeze_dims();
+  if (not is_valid_input(input, squeeze_dims))
+    throw std::runtime_error("Invalid values in squeeze_dims: " + squeeze->name());
+
+  auto name = squeeze->name();
+  assert(name.length() > 0);
+
+  auto reshape_shape = node_shape(squeeze);
+  auto graph = squeeze->graph();
+  auto reshape = graph->nodes()->create<luci::CircleReshape>();
+  auto shape_const = create_shape_const(graph, reshape_shape);
+  copy_quantparam(squeeze, reshape);
+  reshape->name(name + "/Reshape");
+  luci::add_origin(reshape, luci::get_origin(squeeze));
+  shape_const->name(name + "/Reshape/shape");
+
+  // graph connection
+  reshape->tensor(input);
+  reshape->shape(shape_const);
+  replace(squeeze).with(reshape);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *           |
+ *      [CircleNode]
+ *           |
+ *    [CircleSqueeze]
+ *           |
+ *      [CircleNode]
+ *           |
+ *
+ * AFTER
+ *               |
+ *          [CircleNode]  [CircleConst]
+ *             |    \             /
+ *  [CircleSqueeze] [CircleReshape]
+ *                        |
+ *                   [CircleNode]
+ *                        |
+ */
+bool SubstituteSqueezeToReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto squeeze = dynamic_cast<luci::CircleSqueeze *>(node))
+    {
+      if (substitute_squeeze_to_reshape(squeeze))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.test.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.test.cpp
new file mode 100644
index 000000000..d917af678
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.test.cpp
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using uilist = std::initializer_list<uint32_t>;
+using ilist = std::initializer_list<int32_t>;
+
+class PassTestGraph
+{
+public:
+  PassTestGraph() = default;
+
+public:
+  void init(const uilist shape_in, const uilist shape_out)
+  {
+    _graph_input = _g.inputs()->create();
+    _graph_output = _g.outputs()->create();
+
+    _input = _g.nodes()->create<luci::CircleInput>();
+    _input->shape(shape_in);
+    _input->shape_status(luci::ShapeStatus::VALID);
+    _input->name("input");
+
+    _output = _g.nodes()->create<luci::CircleOutput>();
+    _output->shape(shape_out);
+    _output->shape_status(luci::ShapeStatus::VALID);
+    _output->name("output");
+
+    _input->index(_graph_input->index());
+    _output->index(_graph_output->index());
+
+    auto input_shape = std::make_unique<loco::TensorShape>();
+    set(input_shape.get(), shape_in);
+    _graph_input->shape(std::move(input_shape));
+
+    auto output_shape = std::make_unique<loco::TensorShape>();
+    set(output_shape.get(), shape_out);
+    _graph_output->shape(std::move(output_shape));
+  }
+
+protected:
+  void set(loco::TensorShape *shape, const uilist &values)
+  {
+    uint32_t r = 0;
+    shape->rank(values.size());
+    for (auto v : values)
+      shape->dim(r++).set(v);
+  }
+
+public:
+  loco::Graph *g(void) { return &_g; }
+  luci::CircleOutput *output(void) { return _output; }
+
+protected:
+  loco::Graph _g;
+  loco::GraphInput *_graph_input = nullptr;
+  loco::GraphOutput *_graph_output = nullptr;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+class SubstituteSqueezeToReshapeGraph : public PassTestGraph
+{
+public:
+  SubstituteSqueezeToReshapeGraph() = default;
+
+public:
+  void init(const uilist shape_in, const uilist shape_out, const ilist squeeze_dims)
+  {
+    PassTestGraph::init(shape_in, shape_out);
+
+    _squeeze = _g.nodes()->create<luci::CircleSqueeze>();
+    _squeeze->input(_input);
+    _squeeze->squeeze_dims(squeeze_dims);
+    _squeeze->name("squeeze");
+
+    _output->from(_squeeze);
+  }
+
+protected:
+  luci::CircleSqueeze *_squeeze = nullptr;
+};
+
+class SubstituteSqueezeToReshapeTest : public ::testing::Test
+{
+public:
+  SubstituteSqueezeToReshapeTest() = default;
+
+  void run_pass(void)
+  {
+    while (_shapeinf.run(_graph.g()) || _pass.run(_graph.g()))
+      ;
+  }
+
+protected:
+  SubstituteSqueezeToReshapeGraph _graph;
+  luci::SubstituteSqueezeToReshapePass _pass;
+  luci::CircleShapeInferencePass _shapeinf;
+};
+
+} // namespace
+
+TEST(SubstituteSqueezeToReshapePassTest, name)
+{
+  luci::SubstituteSqueezeToReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, simple_with_squeeze_dims)
+{
+  _graph.init({1, 16, 1, 1}, {1, 16}, {2, 3});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+  auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+  ASSERT_EQ(2, reshape_shape->size<loco::DataType::S32>());
+  ASSERT_EQ(1, reshape_shape->at<loco::DataType::S32>(0));
+  ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(1));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, simple_without_squeeze_dims)
+{
+  _graph.init({1, 16, 1, 1}, {16}, {});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+  auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+  ASSERT_EQ(1, reshape_shape->size<loco::DataType::S32>());
+  ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(0));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, input_with_0_dims)
+{
+  _graph.init({1, 16, 0, 1}, {16, 0}, {});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+  auto reshape_shape = loco::must_cast<luci::CircleConst *>(reshape->shape());
+  ASSERT_EQ(2, reshape_shape->size<loco::DataType::S32>());
+  ASSERT_EQ(16, reshape_shape->at<loco::DataType::S32>(0));
+  ASSERT_EQ(0, reshape_shape->at<loco::DataType::S32>(1));
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, nothing_to_squeeze)
+{
+  _graph.init({2, 16, 16, 3}, {2, 16, 16, 3}, {});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, all_to_squeeze)
+{
+  _graph.init({1, 1}, {}, {});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto squeeze = dynamic_cast<luci::CircleSqueeze *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, squeeze);
+}
+
+TEST_F(SubstituteSqueezeToReshapeTest, wrong_squeeze_dims_NEG)
+{
+  _graph.init({1, 16, 1, 1}, {1, 16, 1, 1}, {1});
+
+  // shape inference will throw for invalid squeeze_dims
+  EXPECT_THROW(run_pass(), std::exception);
+}
diff --git a/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp
new file mode 100644
index 000000000..9e1c5a4a3
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteStridedSliceToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <bitset>
+#include <vector>
+
+/**
+ * @brief Convert strided_slice op in a certain condition to reshape op
+ * @details Convert strided_slice op if the op meets all of the following condition:
+ *          For all i, 0 <= i < input.rank
+ *            - begin[i] == 0
+ *            - end[i] >= input.shape.dim[i]
+ *            - strides[i] == 1
+ *          For all k (0 <= k < input.rank) where kth bit of shrink_axis_mask == 1
+ *            - end[k] == 1
+ *
+ *          Example:
+ *             input.shape = [1,1,2,3]
+ *             strided_slice(input, begin=[0,0,0,0], end=[1,1,2,3], strides=[1,1,1,1],
+ *                           shrink_axis_mask=0011b) // k = 0, 1
+ *
+ *             can be converted to
+ *
+ *             reshape(input, [2,3])
+ */
+namespace
+{
+
+/**
+ * @brief Return newly-created CircleConst whose rank is 1
+ */
+luci::CircleConst *build_rank1_const(loco::Graph *graph, const std::vector<uint32_t> &values)
+{
+  auto const_node = graph->nodes()->create<luci::CircleConst>();
+  const_node->dtype(loco::DataType::S32);
+  const_node->size<loco::DataType::S32>(values.size());
+  const_node->shape_status(luci::ShapeStatus::VALID);
+  const_node->rank(1);
+  const_node->dim(0) = values.size();
+
+  for (size_t i = 0; i < values.size(); i++)
+  {
+    const_node->at<loco::DataType::S32>(i) = values.at(i);
+  }
+
+  return const_node;
+}
+
+/**
+ * @brief Return newly-created CircleReshape node
+ */
+luci::CircleNode *build_reshape(loco::Graph *graph, const std::string &name,
+                                const std::shared_ptr<luci::CircleNodeOrigin> &origin,
+                                luci::CircleNode *input, const std::vector<uint32_t> &new_shape)
+{
+  auto reshape_node = graph->nodes()->create<luci::CircleReshape>();
+  reshape_node->tensor(input);
+  reshape_node->name(name);
+  luci::add_origin(reshape_node, origin);
+
+  auto new_shape_const = build_rank1_const(graph, new_shape);
+  {
+    new_shape_const->name(name + "/new_shape");
+    luci::add_origin(new_shape_const, origin);
+  }
+
+  reshape_node->shape(new_shape_const);
+
+  return reshape_node;
+}
+
+/**
+ * @brief Return value in position on CircleConst with int64 format.
+ */
+int64_t value_from_circle_const(const luci::CircleConst *node, uint32_t idx)
+{
+  assert(node->rank() == 1 && node->dim(0).value() > idx);
+  assert(node->dtype() == loco::DataType::S64 || node->dtype() == loco::DataType::S32);
+
+  if (node->dtype() == loco::DataType::S64)
+    return node->at<loco::DataType::S64>(idx);
+  return static_cast<int64_t>(node->at<loco::DataType::S32>(idx));
+}
+
+bool substitute_strided_slice_to_reshape(luci::CircleStridedSlice *ss_node)
+{
+  if (ss_node->shrink_axis_mask() == 0)
+    return false;
+
+  // TODO Consider cases with ellipsis_mask and new_axis_mask
+  // NOT YET SUPPORTED
+  if (ss_node->ellipsis_mask() != 0 or ss_node->new_axis_mask() != 0)
+    return false;
+
+  auto begin_const = dynamic_cast<luci::CircleConst *>(ss_node->begin());
+  auto strides_const = dynamic_cast<luci::CircleConst *>(ss_node->strides());
+  auto end_const = dynamic_cast<luci::CircleConst *>(ss_node->end());
+
+  if (not(begin_const && strides_const && end_const))
+    return false;
+
+  auto input_node = loco::must_cast<luci::CircleNode *>(ss_node->input());
+
+  // condition check
+  std::bitset<32> begin_mask(ss_node->begin_mask());
+  std::bitset<32> end_mask(ss_node->end_mask());
+  std::bitset<32> shrink_axis_mask(ss_node->shrink_axis_mask());
+
+  uint32_t input_rank = input_node->rank();
+  for (uint32_t i = 0; i < input_rank; i++)
+  {
+    if (!input_node->dim(i).known())
+      return false;
+
+    auto begin_dim = value_from_circle_const(begin_const, i);
+    if (begin_dim != 0 and begin_mask.test(i) == false)
+      return false;
+
+    // NOTE:
+    //    In Tensorflow and TFLite, e.g., if input_shape = [2,3],
+    //    strided_slice.end = [10,20] (larger value than actual dim)
+    //    is treated as strided_slice.end = [2,3]
+    int64_t end_dim = value_from_circle_const(end_const, i);
+    if (end_dim < input_node->dim(i).value() and end_mask.test(i) == false)
+      return false;
+
+    int64_t strides_value = value_from_circle_const(strides_const, i);
+    if (strides_value != 1)
+      return false;
+
+    if (shrink_axis_mask.test(i) && input_node->dim(i).value() != 1)
+      return false;
+  }
+
+  // build shape for Reshape op
+  bool found = false;
+  std::vector<uint32_t> shrunk_shape;
+  for (uint32_t i = 0; i < input_rank; i++)
+  {
+    if (input_node->dim(i) == 1 and shrink_axis_mask.test(i))
+      found = true;
+    else
+      shrunk_shape.emplace_back(input_node->dim(i).value());
+  }
+
+  if (not found)
+    return false;
+
+  auto reshape_node = build_reshape(input_node->graph(), ss_node->name(), luci::get_origin(ss_node),
+                                    input_node, shrunk_shape);
+
+  replace(ss_node).with(reshape_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *          |
+ *     [CircleNode]  [CircleConst] [CircleConst] [CircleConst]
+ *          |             |              |             |
+ *          -------+------------------------------------
+ *                 |
+ *          [CircleStridedSlice]
+ *                 |
+ *            [CircleNode]
+ *                 |
+ * AFTER
+ *                          |
+ *     [CircleConst]  [CircleNode]    [CircleConst] [CircleConst] [CircleConst]
+ *           \             /   \            |             |              |
+ *          [CircleReshape]     -------------------+----------------------
+ *                 |                               |
+ *            [CircleNode]                [CircleStridedSlice]
+ *                 |
+ */
+bool SubstituteStridedSliceToReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto circle_node = dynamic_cast<luci::CircleStridedSlice *>(node))
+    {
+      if (substitute_strided_slice_to_reshape(circle_node))
+      {
+        changed = true;
+      }
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.test.cpp b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.test.cpp
new file mode 100644
index 000000000..57e692efa
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.test.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstituteStridedSliceToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleConst *build_rank1_const(loco::Graph *graph, const std::vector<uint32_t> values)
+{
+  auto const_node = graph->nodes()->create<luci::CircleConst>();
+  const_node->dtype(loco::DataType::S32);
+  const_node->size<loco::DataType::S32>(values.size());
+  const_node->shape_status(luci::ShapeStatus::VALID);
+  const_node->rank(1);
+  const_node->dim(0) = values.size();
+
+  for (int32_t i = 0; i < values.size(); i++)
+  {
+    const_node->at<loco::DataType::S32>(i) = values.at(i);
+  }
+
+  return const_node;
+}
+
+class SubstituteStridedSliceToReshapeTest : public ::testing::Test
+{
+public:
+  SubstituteStridedSliceToReshapeTest() {}
+
+  void buildGraph(const std::initializer_list<uint32_t> input_shape,
+                  const std::initializer_list<uint32_t> begin_vals,
+                  const std::initializer_list<uint32_t> end_vals,
+                  const std::initializer_list<uint32_t> strides_vals, int32_t begin_mask,
+                  int32_t end_mask, int32_t ellipsis_mask, int32_t new_axis_mask,
+                  int32_t shrink_axis_mask)
+  {
+    // Input node
+    input = g.nodes()->create<luci::CircleInput>();
+    {
+      auto graph_input = g.inputs()->create();
+      input->index(graph_input->index());
+      input->shape_status(luci::ShapeStatus::VALID);
+      input->rank(input_shape.size());
+      input->shape(input_shape);
+      input->name("input");
+    }
+
+    // StridedSlice node
+    auto ss_node = g.nodes()->create<luci::CircleStridedSlice>();
+    {
+      auto *graph = &g;
+      auto build_attr = [&graph](const std::string &name,
+                                 const std::initializer_list<uint32_t> vals) {
+        auto node = build_rank1_const(graph, vals);
+        node->name(name);
+
+        return node;
+      };
+
+      ss_node->input(input);
+      auto begin = build_attr("begin", begin_vals);
+      auto end = build_attr("end", end_vals);
+      auto strides = build_attr("strides", strides_vals);
+
+      ss_node->begin(begin);
+      ss_node->end(end);
+      ss_node->strides(strides);
+
+      ss_node->begin_mask(begin_mask);
+      ss_node->end_mask(end_mask);
+      ss_node->ellipsis_mask(ellipsis_mask);
+      ss_node->new_axis_mask(new_axis_mask);
+      ss_node->shrink_axis_mask(shrink_axis_mask);
+    }
+
+    // Output node
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->from(ss_node);
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+    output->name("output");
+  }
+
+  void assert_not_converted()
+  {
+    luci::SubstituteStridedSliceToReshapePass pass;
+    while (pass.run(&g))
+      ;
+
+    auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+    ASSERT_TRUE(reshape_node == nullptr);
+
+    auto strided_slice_node = dynamic_cast<luci::CircleStridedSlice *>(output->from());
+    ASSERT_TRUE(strided_slice_node != nullptr);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(SubstituteStridedSliceToReshapePassTest, name)
+{
+  luci::SubstituteStridedSliceToReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(SubstituteStridedSliceToReshapeTest, simple_case)
+{
+  buildGraph({1, 1, 5, 1, 9}, // input shape
+             {0, 0, 0, 0, 0}, // begin
+             {1, 1, 5, 1, 9}, // end
+             {1, 1, 1, 1, 1}, // strides
+             0,               // begin mask
+             0,               // end mask
+             0,               // ellipsis axis mask
+             0,               // new axis mask
+             0b01001          // shrink axis mask, 0th and 3rd dim will be shrunk
+  );
+
+  luci::SubstituteStridedSliceToReshapePass pass;
+  while (pass.run(&g))
+    ;
+
+  auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+  ASSERT_TRUE(reshape_node != nullptr);
+
+  auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+  ASSERT_EQ(new_shape->rank(), 1);
+  ASSERT_EQ(new_shape->dim(0).value(), 3);
+  ASSERT_EQ(new_shape->at<loco::DataType::S32>(0), 1);
+  ASSERT_EQ(new_shape->at<loco::DataType::S32>(1), 5);
+  ASSERT_EQ(new_shape->at<loco::DataType::S32>(2), 9);
+}
+
+TEST_F(SubstituteStridedSliceToReshapeTest, with_begin_end_mask)
+{
+  buildGraph({5, 1, 9}, // input shape
+             {0, 0, 5}, // begin
+             {3, 1, 9}, // end
+             {1, 1, 1}, // strides
+             0b100,     // begin mask
+             0b001,     // end mask
+             0,         // ellipsis axis mask
+             0,         // new axis mask
+             0b010      // shrink axis mask, 0th and 3rd dim will be shrunk
+  );
+
+  luci::SubstituteStridedSliceToReshapePass pass;
+  while (pass.run(&g))
+    ;
+
+  auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+  ASSERT_TRUE(reshape_node != nullptr);
+
+  auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+  ASSERT_EQ(new_shape->rank(), 1);
+  ASSERT_EQ(new_shape->dim(0).value(), 2);
+  ASSERT_EQ(new_shape->at<loco::DataType::S32>(0), 5);
+  ASSERT_EQ(new_shape->at<loco::DataType::S32>(1), 9);
+}
+
+TEST_F(SubstituteStridedSliceToReshapeTest, with_large_end_mask)
+{
+  buildGraph({5, 1, 9},       // input shape
+             {0, 0, 0},       // begin
+             {100, 100, 100}, // large end
+             {1, 1, 1},       // strides
+             0,               // begin mask
+             0,               // end mask
+             0,               // ellipsis axis mask
+             0,               // new axis mask
+             0b010            // shrink axis mask, 0th and 3rd dim will be shrunk
+  );
+
+  luci::SubstituteStridedSliceToReshapePass pass;
+  while (pass.run(&g))
+    ;
+
+  auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+  ASSERT_TRUE(reshape_node != nullptr);
+
+  auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+  ASSERT_EQ(new_shape->rank(), 1);
+  ASSERT_EQ(new_shape->dim(0).value(), 2);
+  ASSERT_EQ(new_shape->at<loco::DataType::S32>(0), 5);
+  ASSERT_EQ(new_shape->at<loco::DataType::S32>(1), 9);
+}
+
+TEST_F(SubstituteStridedSliceToReshapeTest, not_matching_begin_index_NEG)
+{
+  buildGraph({1, 3, 5, 7}, // input shape
+             {0, 0, 2, 0}, // begin[2] does not start from 0
+             {1, 3, 5, 7}, // end
+             {1, 1, 1, 1}, // strides
+             0,            // begin mask
+             0,            // end mask
+             0,            // ellipsis axis mask
+             0,            // new axis mask
+             0b0001        // shrink axis mask
+  );
+
+  assert_not_converted();
+  SUCCEED();
+}
+
+TEST_F(SubstituteStridedSliceToReshapeTest, not_matching_end_index_NEG)
+{
+  buildGraph({1, 3, 5, 7}, // input shape
+             {0, 0, 0, 0}, // begin
+             {1, 3, 3, 7}, // end[2] does not meet condition
+             {1, 1, 1, 1}, // strides
+             0,            // begin mask
+             0,            // end mask
+             0,            // ellipsis axis mask
+             0,            // new axis mask
+             0b0001        // shrink axis mask
+  );
+
+  assert_not_converted();
+  SUCCEED();
+}
+
+TEST_F(SubstituteStridedSliceToReshapeTest, not_matching_strides_NEG)
+{
+  buildGraph({1, 3, 5, 7}, // input shape
+             {0, 0, 0, 0}, // begin
+             {1, 3, 5, 7}, // end
+             {1, 1, 2, 1}, // strides[2] does not meet condition
+             0,            // begin mask
+             0,            // end mask
+             0,            // ellipsis axis mask
+             0,            // new axis mask
+             0b0001        // shrink axis mask
+  );
+
+  assert_not_converted();
+  SUCCEED();
+}
+
+TEST_F(SubstituteStridedSliceToReshapeTest, not_matching_shrink_axis_mask_NEG)
+{
+  buildGraph({1, 3, 5, 7}, // input shape
+             {0, 0, 0, 0}, // begin
+             {1, 3, 5, 7}, // end
+             {1, 1, 1, 1}, // strides
+             0,            // begin mask
+             0,            // end mask
+             0,            // ellipsis axis mask
+             0,            // new axis mask
+             0b0101        // shrink axis mask[1] does not meet condition
+  );
+
+  assert_not_converted();
+  SUCCEED();
+}
diff --git a/compiler/luci/pass/src/SubstituteTransposeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteTransposeToReshapePass.cpp
new file mode 100644
index 000000000..dfd5e6cf2
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteTransposeToReshapePass.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+/**
+ * @brief Convert transpose op in a certain condition to reshape op
+ * @details Convert transpose op if it have condition below
+ *          1. have a CircleConst perm value.
+ *          2. input have an unknown dimension less then 2
+ *          3. the order of shape that except dim value 1 remains same on input and output
+ *             eg) input shape  = (126, 201, 1, 1) => (126, 201)
+ *                 output shape = (1, 126, 1, 201) => (126, 201)
+ */
+bool substitute_transpose_to_reshape(luci::CircleTranspose *node)
+{
+  auto perm_const = dynamic_cast<luci::CircleConst *>(node->perm());
+  if (perm_const == nullptr)
+    return false;
+
+  assert(perm_const->dtype() == loco::DataType::S32);
+
+  auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+  if (perm_const->dim(0).value() != input_node->rank())
+    return false;
+
+  // If input have more than 2 unknown dimension, transpose will not be changed.
+  int count = 0;
+  for (uint32_t i = 0; i < input_node->rank(); i++)
+    if (!input_node->dim(i).known())
+      count++;
+  if (count > 1)
+    return false;
+
+  uint32_t idx = 0;
+  auto size_items = perm_const->size<loco::DataType::S32>();
+  for (uint32_t i = 0; i < size_items; i++)
+  {
+    assert(perm_const->at<loco::DataType::S32>(i) >= 0 &&
+           perm_const->at<loco::DataType::S32>(i) < static_cast<int32_t>(input_node->rank()));
+    const auto perm_value = static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i));
+    if (input_node->dim(perm_value).known() && input_node->dim(perm_value).value() == 1)
+      continue;
+    // To check idx values are increasing
+    if (idx > perm_value)
+      return false;
+    idx = perm_value;
+  }
+
+  auto name = node->name();
+  assert(name.length() > 0);
+
+  auto new_const_node = node->graph()->nodes()->create<luci::CircleConst>();
+  new_const_node->dtype(loco::DataType::S32);
+  new_const_node->size<loco::DataType::S32>(size_items);
+  new_const_node->shape_status(luci::ShapeStatus::VALID);
+  new_const_node->rank(1);
+  new_const_node->dim(0).set(size_items);
+  for (uint32_t i = 0; i < size_items; i++)
+  {
+    if (input_node->dim(static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i))).known())
+      new_const_node->at<loco::DataType::S32>(i) = static_cast<int32_t>(
+        input_node->dim(static_cast<uint32_t>(perm_const->at<loco::DataType::S32>(i))).value());
+    else
+      new_const_node->at<loco::DataType::S32>(i) = -1;
+  }
+
+  auto new_reshape_node = node->graph()->nodes()->create<luci::CircleReshape>();
+  new_reshape_node->tensor(input_node);
+  new_reshape_node->shape(new_const_node);
+  new_reshape_node->name(name + "/Reshape");
+  luci::add_origin(new_reshape_node, luci::get_origin(node));
+  new_const_node->name(name + "/Reshape/shape");
+
+  replace(node).with(new_reshape_node);
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ *     [CircleNode]  [CircleConst]
+ *          \             /
+ *          [CircleTranspose]
+ *                 |
+ *            [CircleNode]
+ *
+ * AFTER
+ *
+ *     [CircleNode]  [CircleConst]
+ *           \             /
+ *          [CircleReshape]
+ *                 |
+ *            [CircleNode]
+ *
+ */
+bool SubstituteTransposeToReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto circle_node = dynamic_cast<luci::CircleTranspose *>(node))
+    {
+      if (substitute_transpose_to_reshape(circle_node))
+      {
+        changed = true;
+      }
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstituteTransposeToReshapePass.test.cpp b/compiler/luci/pass/src/SubstituteTransposeToReshapePass.test.cpp
new file mode 100644
index 000000000..f81f7e615
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteTransposeToReshapePass.test.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstituteTransposeToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class SubstituteTransposeToReshapeTest : public ::testing::Test
+{
+public:
+  SubstituteTransposeToReshapeTest() {}
+
+  void buildGraph(const std::initializer_list<uint32_t> shape, const std::vector<int32_t> perm)
+  {
+    // Input Create.
+    input = g.nodes()->create<luci::CircleInput>();
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    input->shape_status(luci::ShapeStatus::VALID);
+    input->rank(shape.size());
+    input->shape(shape);
+    input->name("input");
+
+    // Permutation Create.
+    auto perm_const = g.nodes()->create<luci::CircleConst>();
+    perm_const->dtype(loco::DataType::S32);
+    perm_const->size<loco::DataType::S32>(perm.size());
+    perm_const->shape_status(luci::ShapeStatus::VALID);
+    perm_const->rank(1);
+    perm_const->dim(0).set(perm.size());
+    for (uint32_t i = 0; i < static_cast<uint32_t>(perm.size()); i++)
+    {
+      perm_const->at<loco::DataType::S32>(i) = perm.at(i);
+    }
+    perm_const->name("perm_const");
+
+    // Transpose Create.
+    auto transpose_node = g.nodes()->create<luci::CircleTranspose>();
+    transpose_node->a(input);
+    transpose_node->perm(perm_const);
+    transpose_node->name("transpose_node");
+
+    // Output Connect.
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->from(transpose_node);
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+    output->name("output");
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(SubstituteTransposeToReshapePassTest, name)
+{
+  luci::SubstituteTransposeToReshapePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(SubstituteTransposeToReshapeTest, simple_case)
+{
+  // Create graph that tranpose input {126, 201, 1, 1} with permutation {2, 0, 3, 1}
+  buildGraph({126, 201, 1, 1}, std::vector<int32_t>({2, 0, 3, 1}));
+  // With this input shape and permutation values, output shape will be [1, 126, 1, 201].
+  // The order of non-one values is unchanged (126, 201).
+  // So this Transpose op can be converted to Reshape op.
+  luci::SubstituteTransposeToReshapePass pass;
+  while (pass.run(&g))
+    ;
+
+  auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+  auto transpose_node = dynamic_cast<luci::CircleTranspose *>(output->from());
+  ASSERT_NE(nullptr, reshape_node);
+  ASSERT_EQ(nullptr, transpose_node);
+  auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+  ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0));
+  ASSERT_EQ(126, new_shape->at<loco::DataType::S32>(1));
+  ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(2));
+  ASSERT_EQ(201, new_shape->at<loco::DataType::S32>(3));
+}
+
+TEST_F(SubstituteTransposeToReshapeTest, failed_to_substitute_NEG)
+{
+  // Create graph that tranpose input {126, 201, 1, 1} with permutation {2, 1, 3, 0}
+  buildGraph({126, 201, 1, 1}, std::vector<int32_t>({2, 1, 3, 0}));
+  // With this input shape and permutation values, output shape will be [1, 201, 1, 126].
+  // The order of non-one values is changed (126, 201) -> (201, 126).
+  // So this Transpose op cannot be converted to Reshape op.
+  luci::SubstituteTransposeToReshapePass pass;
+  while (pass.run(&g))
+    ;
+
+  auto reshape_node = dynamic_cast<luci::CircleReshape *>(output->from());
+  auto transpose_node = dynamic_cast<luci::CircleTranspose *>(output->from());
+  ASSERT_EQ(nullptr, reshape_node);
+  ASSERT_NE(nullptr, transpose_node);
+}
diff --git a/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.cpp b/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.cpp
new file mode 100644
index 000000000..9d1dfc1e3
--- /dev/null
+++ b/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
+
+#include "helpers/NodeFiller.h"
+#include "helpers/TypeMapper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+template <loco::DataType DT>
+bool is_scalar_with_value(luci::CircleConst *node, typename loco::DataTypeImpl<DT>::Type val)
+{
+  if (node->dtype() != DT)
+    return false;
+  if (node->rank() != 0)
+    return false;
+  if (node->size<DT>() != 1)
+    return false;
+  if (node->at<DT>(0) != static_cast<typename loco::DataTypeImpl<DT>::Type>(val))
+    return false;
+
+  return true;
+}
+
+/**
+ *  BEFORE
+ *        [CircleNode]
+ *              |
+ *       [CircleMinimum]
+ *              |
+ *       [CircleMaximum]
+ *              |
+ *        [CircleNode]
+ *
+ *  AFTER
+ *
+ *        [CircleNode]
+ *              |
+ *        [CircleRelu6]
+ *              |
+ *        [CircleNode]
+ *
+ *  NOTE Only max(min(input, 6), 0) pattern will be transformed.
+ */
+template <loco::DataType DT> bool transform_min_max_pattern(luci::CircleMaximum *maxi)
+{
+  if (not maxi)
+    return false;
+
+  if (maxi->dtype() != DT)
+    return false;
+
+  luci::CircleConst *maxi_const = nullptr;
+  luci::CircleMinimum *mini = nullptr;
+
+  // There are two ways Maximum takes inputs.
+  // 1. Maximum(x = CircleConst, y = CircleMinimum)
+  // 2. Maximum(x = CircleMinimum, y = CircleConst)
+  if (not luci::fill(&maxi_const, &mini).with_commutative_args_of(maxi))
+    return false;
+
+  // Maximum constant should be scalar whose value is 0.
+  if (not is_scalar_with_value<DT>(maxi_const,
+                                   static_cast<typename loco::DataTypeImpl<DT>::Type>(0)))
+    return false;
+
+  luci::CircleConst *mini_const = nullptr;
+  loco::Node *mini_input = nullptr;
+
+  // There are two ways Miminum takes inputs.
+  // 1. Miminum(x = CircleNode, y = CircleConst)
+  // 2. Miminum(x = CircleConst, y = CircleNode)
+  if (not luci::fill(&mini_const, &mini_input).with_commutative_args_of(mini))
+    return false;
+
+  // Miminum constant should be scalar whose value is 6.
+  if (not is_scalar_with_value<DT>(mini_const,
+                                   static_cast<typename loco::DataTypeImpl<DT>::Type>(6)))
+    return false;
+
+  auto name = maxi->name();
+  assert(name.length() > 0);
+
+  // Create Relu6 op
+  auto relu6 = mini->graph()->nodes()->create<luci::CircleRelu6>();
+  relu6->features(mini_input);
+  relu6->name(name + "/Relu6");
+  luci::add_origin(relu6, luci::composite_origin({luci::get_origin(maxi), luci::get_origin(mini)}));
+
+  replace(maxi).with(relu6);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool TransformMinMaxToRelu6Pass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto maxi = dynamic_cast<luci::CircleMaximum *>(node))
+    {
+      if (transform_min_max_pattern<loco::DataType::FLOAT32>(maxi))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.test.cpp b/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.test.cpp
new file mode 100644
index 000000000..9755a70cf
--- /dev/null
+++ b/compiler/luci/pass/src/TransformMinMaxToRelu6Pass.test.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Minimum-Maximum pattern graph
+ *
+ *  [CircleInput]  [CircleConst]
+ *         \         /
+ *    [CircleMinimum]   [CircleConst]
+ *             |       /
+ *       [CircleMaximum]
+ *             |
+ *       [CircleOutput]
+ */
+struct MinMaxGraph
+{
+  loco::Graph _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleMinimum *_mini = nullptr;
+  luci::CircleConst *_mini_const = nullptr;
+  luci::CircleMaximum *_maxi = nullptr;
+  luci::CircleConst *_maxi_const = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+class TransformMinMaxToRelu6PassTest : public ::testing::Test
+{
+protected:
+  virtual void SetUp()
+  {
+    const int N = 1;
+    const int H = 4;
+    const int W = 4;
+    const int C = 3;
+
+    // graph input and output
+    auto graph_input = _min_max_g._g.inputs()->create();
+    auto graph_output = _min_max_g._g.outputs()->create();
+
+    // CircleInput
+    _min_max_g._input = _min_max_g._g.nodes()->create<luci::CircleInput>();
+    _min_max_g._input->index(graph_input->index());
+    _min_max_g._input->shape({N, H, W, C});
+    _min_max_g._input->dtype(loco::DataType::FLOAT32);
+    _min_max_g._input->name("input");
+
+    // CircleConst
+    _min_max_g._mini_const = _min_max_g._g.nodes()->create<luci::CircleConst>();
+    _min_max_g._mini_const->shape({}); // scalar
+    _min_max_g._mini_const->dtype(loco::DataType::FLOAT32);
+    _min_max_g._mini_const->size<loco::DataType::FLOAT32>(1);
+    _min_max_g._mini_const->at<loco::DataType::FLOAT32>(0) = 6.;
+    _min_max_g._mini_const->name("mini_const");
+
+    // CircleMinimum
+    _min_max_g._mini = _min_max_g._g.nodes()->create<luci::CircleMinimum>();
+    _min_max_g._mini->x(_min_max_g._input);
+    _min_max_g._mini->y(_min_max_g._mini_const);
+    _min_max_g._mini->shape({N, H, W, C});
+    _min_max_g._mini->dtype(loco::DataType::FLOAT32);
+    _min_max_g._mini->name("mini");
+
+    // CircleConst
+    _min_max_g._maxi_const = _min_max_g._g.nodes()->create<luci::CircleConst>();
+    _min_max_g._mini_const->shape({}); // scalar
+    _min_max_g._maxi_const->dtype(loco::DataType::FLOAT32);
+    _min_max_g._maxi_const->size<loco::DataType::FLOAT32>(1);
+    _min_max_g._maxi_const->at<loco::DataType::FLOAT32>(0) = 0.;
+    _min_max_g._maxi_const->name("maxi_const");
+
+    // CircleMaximum
+    _min_max_g._maxi = _min_max_g._g.nodes()->create<luci::CircleMaximum>();
+    _min_max_g._maxi->x(_min_max_g._mini);
+    _min_max_g._maxi->y(_min_max_g._maxi_const);
+    _min_max_g._maxi->shape({N, H, W, C});
+    _min_max_g._maxi->dtype(loco::DataType::FLOAT32);
+    _min_max_g._maxi->name("maxi");
+
+    // CircleOutput
+    _min_max_g._output = _min_max_g._g.nodes()->create<luci::CircleOutput>();
+    _min_max_g._output->index(graph_output->index());
+    _min_max_g._output->from(_min_max_g._maxi);
+    _min_max_g._output->shape({N, H, W, C});
+    _min_max_g._output->dtype(loco::DataType::FLOAT32);
+    _min_max_g._output->name("output");
+  }
+
+protected:
+  luci::TransformMinMaxToRelu6Pass _pass;
+  MinMaxGraph _min_max_g;
+};
+
+} // namespace
+
+TEST_F(TransformMinMaxToRelu6PassTest, name)
+{
+  auto const name = _pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+/**
+ *  Optimized graph looks like below.
+ *
+ *  [CircleInput]
+ *        |
+ *  [CircleRelu6]
+ *        |
+ *  [CircleOutput]
+ */
+TEST_F(TransformMinMaxToRelu6PassTest, simple_test)
+{
+  auto ret = _pass.run(&_min_max_g._g);
+  EXPECT_TRUE(ret);
+
+  auto relu6 = dynamic_cast<luci::CircleRelu6 *>(_min_max_g._output->from());
+  EXPECT_NE(nullptr, relu6);
+
+  auto input = dynamic_cast<luci::CircleInput *>(relu6->features());
+  EXPECT_NE(nullptr, input);
+}
+
+TEST_F(TransformMinMaxToRelu6PassTest, wrong_condition_NEG)
+{
+  _min_max_g._maxi_const->at<loco::DataType::FLOAT32>(0) = 2.;
+
+  auto ret = _pass.run(&_min_max_g._g);
+
+  EXPECT_FALSE(ret);
+}
diff --git a/compiler/luci/pass/src/TransformMinReluToRelu6Pass.cpp b/compiler/luci/pass/src/TransformMinReluToRelu6Pass.cpp
new file mode 100644
index 000000000..cccc0134c
--- /dev/null
+++ b/compiler/luci/pass/src/TransformMinReluToRelu6Pass.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TransformMinReluToRelu6Pass.h"
+
+#include "helpers/NodeFiller.h"
+#include "helpers/TypeMapper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+template <loco::DataType DT>
+bool is_scalar_with_value(luci::CircleConst *node, typename loco::DataTypeImpl<DT>::Type val)
+{
+  if (node->dtype() != DT)
+    return false;
+  if (node->rank() != 0)
+    return false;
+  if (node->size<DT>() != 1)
+    return false;
+  if (node->at<DT>(0) != static_cast<typename loco::DataTypeImpl<DT>::Type>(val))
+    return false;
+
+  return true;
+}
+
+/**
+ *  BEFORE
+ *        [CircleNode]
+ *              |
+ *       [CircleMinimum]
+ *              |
+ *        [CircleRelu]
+ *              |
+ *        [CircleNode]
+ *
+ *  AFTER
+ *
+ *        [CircleNode]
+ *              |
+ *        [CircleRelu6]
+ *              |
+ *        [CircleNode]
+ *
+ *  NOTE Only relu(min(input, 6)) pattern will be transformed.
+ */
+template <loco::DataType DT> bool transform_min_relu_pattern(luci::CircleRelu *relu)
+{
+  if (not relu)
+    return false;
+
+  if (relu->dtype() != DT)
+    return false;
+
+  auto *mini = dynamic_cast<luci::CircleMinimum *>(relu->features());
+  if (not mini)
+    return false;
+
+  luci::CircleConst *mini_const = nullptr;
+  loco::Node *mini_input = nullptr;
+
+  // There are two ways Miminum takes inputs.
+  // 1. Miminum(x = CircleNode, y = CircleConst)
+  // 2. Miminum(x = CircleConst, y = CircleNode)
+  if (not luci::fill(&mini_const, &mini_input).with_commutative_args_of(mini))
+    return false;
+
+  // Miminum constant should be scalar whose value is 6.
+  if (not is_scalar_with_value<DT>(mini_const,
+                                   static_cast<typename loco::DataTypeImpl<DT>::Type>(6)))
+    return false;
+
+  auto name = relu->name();
+  assert(name.length() > 0);
+
+  // Create Relu6 op
+  auto relu6 = mini->graph()->nodes()->create<luci::CircleRelu6>();
+  relu6->features(mini_input);
+  relu6->name(name + "/Relu6");
+  luci::add_origin(relu6, luci::composite_origin({luci::get_origin(relu), luci::get_origin(mini)}));
+
+  replace(relu).with(relu6);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool TransformMinReluToRelu6Pass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto relu = dynamic_cast<luci::CircleRelu *>(node))
+    {
+      if (transform_min_relu_pattern<loco::DataType::FLOAT32>(relu))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/TransformMinReluToRelu6Pass.test.cpp b/compiler/luci/pass/src/TransformMinReluToRelu6Pass.test.cpp
new file mode 100644
index 000000000..01ba722a8
--- /dev/null
+++ b/compiler/luci/pass/src/TransformMinReluToRelu6Pass.test.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/TransformMinReluToRelu6Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Minimum-Relu pattern graph
+ *
+ *  [CircleInput]  [CircleConst]
+ *         \         /
+ *    [CircleMinimum]
+ *           |
+ *      [CircleRelu]
+ *           |
+ *      [CircleOutput]
+ */
+struct MinReluGraph
+{
+  loco::Graph _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleMinimum *_mini = nullptr;
+  luci::CircleConst *_mini_const = nullptr;
+  luci::CircleRelu *_relu = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+class TransformMinReluToRelu6PassTest : public ::testing::Test
+{
+protected:
+  virtual void SetUp()
+  {
+    const int N = 1;
+    const int H = 4;
+    const int W = 4;
+    const int C = 3;
+
+    // graph input and output
+    auto graph_input = _min_relu_g._g.inputs()->create();
+    auto graph_output = _min_relu_g._g.outputs()->create();
+
+    // CircleInput
+    _min_relu_g._input = _min_relu_g._g.nodes()->create<luci::CircleInput>();
+    _min_relu_g._input->index(graph_input->index());
+    _min_relu_g._input->shape({N, H, W, C});
+    _min_relu_g._input->dtype(loco::DataType::FLOAT32);
+    _min_relu_g._input->name("input");
+
+    // CircleConst
+    _min_relu_g._mini_const = _min_relu_g._g.nodes()->create<luci::CircleConst>();
+    _min_relu_g._mini_const->shape({}); // scalar
+    _min_relu_g._mini_const->dtype(loco::DataType::FLOAT32);
+    _min_relu_g._mini_const->size<loco::DataType::FLOAT32>(1);
+    _min_relu_g._mini_const->at<loco::DataType::FLOAT32>(0) = 6.;
+    _min_relu_g._mini_const->name("mini_const");
+
+    // CircleMinimum
+    _min_relu_g._mini = _min_relu_g._g.nodes()->create<luci::CircleMinimum>();
+    _min_relu_g._mini->x(_min_relu_g._input);
+    _min_relu_g._mini->y(_min_relu_g._mini_const);
+    _min_relu_g._mini->shape({N, H, W, C});
+    _min_relu_g._mini->dtype(loco::DataType::FLOAT32);
+    _min_relu_g._mini->name("mini");
+
+    // CircleRelu
+    _min_relu_g._relu = _min_relu_g._g.nodes()->create<luci::CircleRelu>();
+    _min_relu_g._relu->features(_min_relu_g._mini);
+    _min_relu_g._relu->shape({N, H, W, C});
+    _min_relu_g._relu->dtype(loco::DataType::FLOAT32);
+    _min_relu_g._relu->name("relu");
+
+    // CircleOutput
+    _min_relu_g._output = _min_relu_g._g.nodes()->create<luci::CircleOutput>();
+    _min_relu_g._output->index(graph_output->index());
+    _min_relu_g._output->from(_min_relu_g._relu);
+    _min_relu_g._output->shape({N, H, W, C});
+    _min_relu_g._output->dtype(loco::DataType::FLOAT32);
+    _min_relu_g._output->name("output");
+  }
+
+protected:
+  luci::TransformMinReluToRelu6Pass _pass;
+  MinReluGraph _min_relu_g;
+};
+
+} // namespace
+
+TEST_F(TransformMinReluToRelu6PassTest, name)
+{
+  auto const name = _pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+/**
+ *  Optimized graph looks like below.
+ *
+ *  [CircleInput]
+ *        |
+ *  [CircleRelu6]
+ *        |
+ *  [CircleOutput]
+ */
+TEST_F(TransformMinReluToRelu6PassTest, simple_test)
+{
+  auto ret = _pass.run(&_min_relu_g._g);
+  EXPECT_TRUE(ret);
+
+  auto relu6 = dynamic_cast<luci::CircleRelu6 *>(_min_relu_g._output->from());
+  EXPECT_NE(nullptr, relu6);
+
+  auto input = dynamic_cast<luci::CircleInput *>(relu6->features());
+  EXPECT_NE(nullptr, input);
+}
+
+TEST_F(TransformMinReluToRelu6PassTest, wrong_condition_NEG)
+{
+  _min_relu_g._mini_const->at<loco::DataType::FLOAT32>(0) = 2.;
+
+  auto ret = _pass.run(&_min_relu_g._g);
+
+  EXPECT_FALSE(ret);
+}
diff --git a/compiler/luci/pass/src/TypeInferencePass.cpp b/compiler/luci/pass/src/TypeInferencePass.cpp
deleted file mode 100644
index 2c7b3a897..000000000
--- a/compiler/luci/pass/src/TypeInferencePass.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/TypeInferencePass.h"
-
-#include <luci/IR/CircleDialect.h>
-#include <luci/Service/CircleTypeInferenceRule.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/TypeInference.h>
-
-namespace luci
-{
-
-bool TypeInferencePass::run(loco::Graph *g)
-{
-  loco::CanonicalTypeInferenceRule canonical_rule;
-  luci::CircleTypeInferenceRule circle_rule;
-
-  loco::MultiDialectTypeInferenceRule rules;
-
-  rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(luci::CircleDialect::get(), &circle_rule);
-
-  return loco::apply(&rules).to(g);
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.cpp b/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.cpp
new file mode 100644
index 000000000..b73efafa5
--- /dev/null
+++ b/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.cpp
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
+
+#include "helpers/NodeFiller.h"
+#include "helpers/TypeMapper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <string>
+#include <vector>
+
+/**
+ *  BEFORE
+ *        [CircleNode]
+ *              |
+ *   [UnidirectionalSequenceLSTM]
+ *              |
+ *        [CircleNode]
+ *
+ *  AFTER
+ *
+ *        [CircleNode]
+ *              |
+ *      [CircleTranspose]
+ *              |
+ *        [CircleUnpack]
+ *              |
+ *       [CircleUnpackOut]
+ *              |
+ *      (Unrolled sub network)
+ *              |
+ *        [CirclePack]
+ *              |                        |
+ *      [CircleTranspose]     [UnidirectionalSequenceLSTM]
+ *              |                        |
+ *        [CircleNode]
+ *
+ *  NOTE for timesteps = 1,
+ *       first [CircleTranspose] is not added and
+ *       last [CirclePack] + [CircleTranspose] is replaced with [CircleReshape]
+ *
+ *  First unrolled sub network is as follows
+ *    - [] and 'Circle' are omitted
+ *    - all FC has one or two Const for Weight/Bias
+ *
+ *            (input)
+ *              |
+ *              FC
+ *              |
+ *            Split
+ *    +---------+----------+----------+
+ *    |         |          |          |
+ *    |      Logistic   Logistic     Tanh
+ *    |  Const  |          |          |
+ *    |    |    |          |          |
+ *    |    +-- Mul         +-- Mul ---+
+ *    |         |               |
+ *    |         +---- Add ------+
+ *    |                |
+ *    |           +----+----+
+ *    |           |         |
+ *  Logistic     Tanh       |
+ *    |           |         |
+ *    +-- Mul ----+         |
+ *         |                |
+ *       (output)          (A)
+ *
+ *  and following unrolled sub networks are;
+ *
+ *   (prev-output) (input)
+ *        |          |
+ *        FC         FC
+ *        |          |
+ *        +--- Add --+
+ *   Const      |
+ *     |        |
+ *     +------ Add
+ *              |
+ *            Split
+ *              |
+ *    +---------+----------+----------+
+ * SplitOut SplitOut   SplitOut   SplitOut
+ *    |         |          |          |
+ *    |      Logistic   Logistic     Tanh
+ *    |  (A')   |          |          |
+ *    |   |     |          |          |
+ *    |   +--- Mul         +-- Mul ---+
+ *    |         |               |
+ *    |         +---- Add ------+
+ *    |                |
+ *    |           +----+----+
+ *    |           |         |
+ *  Logistic     Tanh       |
+ *    |           |         |
+ *    +-- Mul ----+         |
+ *         |                |
+ *      (output)          (next)
+ *
+ * where (A) and (A') are connected
+ *
+ */
+
+namespace
+{
+
+struct UnrollLSTM
+{
+  luci::CircleConst *transpose_perm(void);
+  luci::CircleTranspose *first_transpose(luci::CircleNode *input);
+  std::vector<luci::CircleUnpackOut *> input_unpacks(luci::CircleNode *input);
+  luci::CircleConst *merged_weights(luci::CircleConst *iw, luci::CircleConst *fw,
+                                    luci::CircleConst *cw, luci::CircleConst *ow);
+  luci::CircleFullyConnected *create_input_matmul(luci::CircleNode *input);
+  luci::CircleAdd *create_input_matmul(luci::CircleNode *input, luci::CircleMul *mul,
+                                       uint32_t step);
+  std::vector<luci::CircleSplitOut *> matmul_splits(luci::CircleNode *input, uint32_t step);
+  luci::CircleConst *forget_zero(void);
+  luci::CircleMul *forget_gate_cell(std::vector<luci::CircleSplitOut *> &splits,
+                                    luci::CircleNode *prev, uint32_t step,
+                                    luci::CircleNode **retadd);
+  luci::CircleReshape *last_reshape(luci::CircleNode *input);
+  luci::CircleTranspose *last_transpose(std::vector<luci::CircleMul *> &output_muls);
+
+  luci::CircleUnidirectionalSequenceLSTM *_lstm{nullptr};
+  loco::Graph::NodeContext *_nctx{nullptr};
+  std::string _name;
+  uint32_t _batch{0};
+  uint32_t _timesteps{0};
+  uint32_t _units{0}; // output space dim
+};
+
+luci::CircleConst *UnrollLSTM::transpose_perm(void)
+{
+  auto perm = _nctx->create<luci::CircleConst>();
+  perm->dtype(loco::DataType::S32);
+  perm->rank(1);
+  perm->dim(0) = 3;
+  perm->size<loco::DataType::S32>(3);
+  perm->at<loco::DataType::S32>(0) = 1;
+  perm->at<loco::DataType::S32>(1) = 0;
+  perm->at<loco::DataType::S32>(2) = 2;
+  perm->shape_status(luci::ShapeStatus::VALID);
+
+  return perm;
+}
+
+luci::CircleTranspose *UnrollLSTM::first_transpose(luci::CircleNode *input)
+{
+  assert(input != nullptr);
+
+  auto perm = transpose_perm();
+  perm->name(_name + "_perm1");
+  luci::add_origin(perm, luci::get_origin(_lstm));
+
+  auto transpose = _nctx->create<luci::CircleTranspose>();
+  transpose->a(input);
+  transpose->perm(perm);
+  transpose->name(_name + "_trans1");
+  luci::add_origin(transpose, luci::get_origin(_lstm));
+
+  return transpose;
+}
+
+std::vector<luci::CircleUnpackOut *> UnrollLSTM::input_unpacks(luci::CircleNode *input)
+{
+  assert(input != nullptr);
+
+  // NOTE unpack input can be LSTM or Transpose
+  auto unpack = _nctx->create<luci::CircleUnpack>();
+  unpack->num(_timesteps);
+  unpack->axis(0);
+  unpack->value(input);
+  unpack->name(_name + "_unpack");
+  luci::add_origin(unpack, luci::get_origin(_lstm));
+
+  std::vector<luci::CircleUnpackOut *> outs;
+  for (uint32_t idx = 0; idx < _timesteps; ++idx)
+  {
+    auto unpackout = _nctx->create<luci::CircleUnpackOut>();
+    unpackout->input(unpack);
+    unpackout->index(idx);
+    unpackout->name(_name + "_unpackout_" + std::to_string(idx));
+    luci::add_origin(unpackout, luci::get_origin(_lstm));
+    outs.push_back(unpackout);
+  }
+
+  return outs;
+}
+
+luci::CircleConst *UnrollLSTM::merged_weights(luci::CircleConst *iw, luci::CircleConst *fw,
+                                              luci::CircleConst *cw, luci::CircleConst *ow)
+{
+  assert(iw != nullptr);
+  assert(fw != nullptr);
+  assert(cw != nullptr);
+  assert(ow != nullptr);
+
+  auto iw_rank = iw->rank();
+  assert(iw_rank == fw->rank());
+  assert(iw_rank == cw->rank());
+  assert(iw_rank == ow->rank());
+
+  uint32_t ne_w = 1;
+  for (uint32_t i = 0; i < iw_rank; i++)
+    ne_w *= iw->dim(i).value();
+
+  assert(iw->dtype() == loco::DataType::FLOAT32);
+  assert(fw->dtype() == loco::DataType::FLOAT32);
+  assert(cw->dtype() == loco::DataType::FLOAT32);
+  assert(ow->dtype() == loco::DataType::FLOAT32);
+
+  // merged weights
+  auto mw = _nctx->create<luci::CircleConst>();
+  mw->dtype(iw->dtype());
+  mw->rank(iw_rank);
+  mw->dim(0) = 4u * iw->dim(0).value();
+  for (uint32_t i = 1; i < iw_rank; i++)
+    mw->dim(i) = iw->dim(i);
+  mw->size<loco::DataType::FLOAT32>(4 * ne_w);
+  mw->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t i = 0; i < ne_w; ++i)
+  {
+    mw->at<loco::DataType::FLOAT32>(i + ne_w * 0) = iw->at<loco::DataType::FLOAT32>(i);
+    mw->at<loco::DataType::FLOAT32>(i + ne_w * 1) = fw->at<loco::DataType::FLOAT32>(i);
+    mw->at<loco::DataType::FLOAT32>(i + ne_w * 2) = cw->at<loco::DataType::FLOAT32>(i);
+    mw->at<loco::DataType::FLOAT32>(i + ne_w * 3) = ow->at<loco::DataType::FLOAT32>(i);
+  }
+  return mw;
+}
+
+luci::CircleFullyConnected *UnrollLSTM::create_input_matmul(luci::CircleNode *input)
+{
+  assert(input != nullptr);
+
+  // weights
+  auto iw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_input_weights());
+  auto fw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_forget_weights());
+  auto cw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_cell_weights());
+  auto ow = loco::must_cast<luci::CircleConst *>(_lstm->input_to_output_weights());
+
+  auto fcw = merged_weights(iw, fw, cw, ow);
+  fcw->name(_name + "_fc_w");
+  luci::add_origin(fcw, luci::get_origin(_lstm));
+
+  // bias
+  auto ib = loco::must_cast<luci::CircleConst *>(_lstm->input_gate_bias());
+  auto fb = loco::must_cast<luci::CircleConst *>(_lstm->forget_gate_bias());
+  auto cb = loco::must_cast<luci::CircleConst *>(_lstm->cell_gate_bias());
+  auto ob = loco::must_cast<luci::CircleConst *>(_lstm->output_gate_bias());
+
+  auto fcb = merged_weights(ib, fb, cb, ob);
+  fcb->name(_name + "_fc_b");
+  luci::add_origin(fcb, luci::get_origin(_lstm));
+
+  auto fc = _nctx->create<luci::CircleFullyConnected>();
+  fc->input(input);
+  fc->weights(fcw);
+  fc->bias(fcb);
+  fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fc->name(_name + "_fc");
+  luci::add_origin(fc, luci::get_origin(_lstm));
+
+  return fc;
+}
+
+luci::CircleAdd *UnrollLSTM::create_input_matmul(luci::CircleNode *input, luci::CircleMul *mul,
+                                                 uint32_t step)
+{
+  assert(input != nullptr);
+  assert(mul != nullptr);
+  assert(step < _timesteps);
+
+  auto base_name = _name + "_matmul" + std::to_string(step);
+
+  // input weights
+  auto iw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_input_weights());
+  auto fw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_forget_weights());
+  auto cw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_cell_weights());
+  auto ow = loco::must_cast<luci::CircleConst *>(_lstm->input_to_output_weights());
+
+  auto fcw = merged_weights(iw, fw, cw, ow);
+  fcw->name(base_name + "_fc_w");
+  luci::add_origin(fcw, luci::get_origin(_lstm));
+
+  auto fcb = _nctx->create<luci::CircleOutputExclude>();
+
+  auto fc = _nctx->create<luci::CircleFullyConnected>();
+  fc->input(input);
+  fc->weights(fcw);
+  fc->bias(fcb);
+  fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fc->name(base_name + "_fc");
+  luci::add_origin(fc, luci::get_origin(_lstm));
+
+  // recurrent weights
+  auto ri = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_input_weights());
+  auto rf = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_forget_weights());
+  auto rc = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_cell_weights());
+  auto ro = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_output_weights());
+
+  auto fcrw = merged_weights(ri, rf, rc, ro);
+  fcrw->name(base_name + "_fcr_w");
+  luci::add_origin(fcrw, luci::get_origin(_lstm));
+
+  auto fcrb = _nctx->create<luci::CircleOutputExclude>();
+
+  auto fcr = _nctx->create<luci::CircleFullyConnected>();
+  fcr->input(mul);
+  fcr->weights(fcrw);
+  fcr->bias(fcrb);
+  fcr->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fcr->name(base_name + "_fcr");
+  luci::add_origin(fcr, luci::get_origin(_lstm));
+
+  auto add_fc = _nctx->create<luci::CircleAdd>();
+  add_fc->x(fcr);
+  add_fc->y(fc);
+  add_fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+  add_fc->name(base_name + "_addfc");
+  luci::add_origin(add_fc, luci::get_origin(_lstm));
+
+  // bias
+  auto ib = loco::must_cast<luci::CircleConst *>(_lstm->input_gate_bias());
+  auto fb = loco::must_cast<luci::CircleConst *>(_lstm->forget_gate_bias());
+  auto cb = loco::must_cast<luci::CircleConst *>(_lstm->cell_gate_bias());
+  auto ob = loco::must_cast<luci::CircleConst *>(_lstm->output_gate_bias());
+
+  auto bias = merged_weights(ib, fb, cb, ob);
+  bias->name(base_name + "_bias");
+
+  auto add_bias = _nctx->create<luci::CircleAdd>();
+  add_bias->x(add_fc);
+  add_bias->y(bias);
+  add_bias->fusedActivationFunction(luci::FusedActFunc::NONE);
+  add_bias->name(base_name + "_addbias");
+  luci::add_origin(add_bias, luci::get_origin(_lstm));
+
+  return add_bias;
+}
+
+std::vector<luci::CircleSplitOut *> UnrollLSTM::matmul_splits(luci::CircleNode *input,
+                                                              uint32_t step)
+{
+  assert(input != nullptr);
+  assert(step < _timesteps);
+
+  std::string split_name = _name + "_sp" + std::to_string(step);
+
+  auto split_dim = _nctx->create<luci::CircleConst>();
+  split_dim->dtype(loco::DataType::S32);
+  split_dim->rank(1);
+  split_dim->dim(0) = 1;
+  split_dim->size<loco::DataType::S32>(1);
+  split_dim->at<loco::DataType::S32>(0) = 1;
+  split_dim->shape_status(luci::ShapeStatus::VALID);
+  split_dim->name(split_name + "_dim");
+  luci::add_origin(split_dim, luci::get_origin(_lstm));
+
+  auto split = _nctx->create<luci::CircleSplit>();
+  split->num_split(4);
+  split->split_dim(split_dim);
+  split->input(input);
+  split->name(split_name);
+  luci::add_origin(split, luci::get_origin(_lstm));
+
+  auto split_o0 = _nctx->create<luci::CircleSplitOut>();
+  split_o0->input(split);
+  split_o0->index(0);
+  split_o0->name(split_name + "_spo0");
+  luci::add_origin(split_o0, luci::get_origin(_lstm));
+
+  auto split_o1 = _nctx->create<luci::CircleSplitOut>();
+  split_o1->input(split);
+  split_o1->index(1);
+  split_o1->name(split_name + "_spo1");
+  luci::add_origin(split_o1, luci::get_origin(_lstm));
+
+  auto split_o2 = _nctx->create<luci::CircleSplitOut>();
+  split_o2->input(split);
+  split_o2->index(2);
+  split_o2->name(split_name + "_spo2");
+  luci::add_origin(split_o2, luci::get_origin(_lstm));
+
+  auto split_o3 = _nctx->create<luci::CircleSplitOut>();
+  split_o3->input(split);
+  split_o3->index(3);
+  split_o3->name(split_name + "_spo3");
+  luci::add_origin(split_o3, luci::get_origin(_lstm));
+
+  std::vector<luci::CircleSplitOut *> outs;
+  outs.push_back(split_o0);
+  outs.push_back(split_o1);
+  outs.push_back(split_o2);
+  outs.push_back(split_o3);
+  return outs;
+}
+
+luci::CircleConst *UnrollLSTM::forget_zero(void)
+{
+  uint32_t amount = _batch * _units;
+
+  auto zero = _nctx->create<luci::CircleConst>();
+  zero->dtype(loco::DataType::FLOAT32);
+  zero->rank(2);
+  zero->dim(0) = _batch;
+  zero->dim(1) = _units;
+  zero->size<loco::DataType::FLOAT32>(amount);
+  for (uint32_t idx = 0; idx < amount; ++idx)
+    zero->at<loco::DataType::FLOAT32>(idx) = 0.0;
+  zero->shape_status(luci::ShapeStatus::VALID);
+  zero->name(_name + "_zero");
+  luci::add_origin(zero, luci::get_origin(_lstm));
+  return zero;
+}
+
+luci::CircleMul *UnrollLSTM::forget_gate_cell(std::vector<luci::CircleSplitOut *> &splits,
+                                              luci::CircleNode *prev, uint32_t step,
+                                              luci::CircleNode **retadd)
+{
+  assert(splits.size() > 0);
+  assert(prev != nullptr);
+  assert(step < _timesteps);
+
+  std::string net_name = _name + "_net" + std::to_string(step);
+
+  auto split_0 = splits[0]; // input-input  : Logistic - Mul(c) - Add - Tanh - Mul
+  auto split_1 = splits[1]; // input-forget : Logistic - Mul(p) - Add - Tanh - Mul
+  auto split_2 = splits[2]; // input-cell   : Tanh - Mul(c) - Add - Tanh - Mul
+  auto split_3 = splits[3]; // input-output : Logistic - Mul
+
+  auto logis_0 = _nctx->create<luci::CircleLogistic>();
+  logis_0->x(split_0);
+  logis_0->name(net_name + "_log0");
+  luci::add_origin(logis_0, luci::get_origin(_lstm));
+
+  auto logis_1 = _nctx->create<luci::CircleLogistic>();
+  logis_1->x(split_1);
+  logis_1->name(net_name + "_log1");
+  luci::add_origin(logis_1, luci::get_origin(_lstm));
+
+  auto tanh_2 = _nctx->create<luci::CircleTanh>();
+  tanh_2->x(split_2);
+  tanh_2->name(net_name + "_tanh2");
+  luci::add_origin(tanh_2, luci::get_origin(_lstm));
+
+  auto logis_3 = _nctx->create<luci::CircleLogistic>();
+  logis_3->x(split_3);
+  logis_3->name(net_name + "_log3");
+  luci::add_origin(logis_3, luci::get_origin(_lstm));
+
+  auto mul_c = _nctx->create<luci::CircleMul>();
+  mul_c->x(logis_0);
+  mul_c->y(tanh_2);
+  mul_c->fusedActivationFunction(luci::FusedActFunc::NONE);
+  mul_c->name(net_name + "_mul1");
+  luci::add_origin(mul_c, luci::get_origin(_lstm));
+
+  auto mul_p = _nctx->create<luci::CircleMul>();
+  mul_p->x(logis_1);
+  mul_p->y(prev);
+  mul_p->fusedActivationFunction(luci::FusedActFunc::NONE);
+  mul_p->name(net_name + "_mul2");
+  luci::add_origin(mul_p, luci::get_origin(_lstm));
+
+  auto add_cp = _nctx->create<luci::CircleAdd>();
+  add_cp->x(mul_c);
+  add_cp->y(mul_p);
+  add_cp->fusedActivationFunction(luci::FusedActFunc::NONE);
+  add_cp->name(net_name + "_add1");
+  luci::add_origin(add_cp, luci::get_origin(_lstm));
+
+  if (retadd != nullptr)
+    *retadd = add_cp;
+
+  auto tanh_cp = _nctx->create<luci::CircleTanh>();
+  tanh_cp->x(add_cp);
+  tanh_cp->name(net_name + "_tanh3");
+  luci::add_origin(tanh_cp, luci::get_origin(_lstm));
+
+  auto mul_out = _nctx->create<luci::CircleMul>();
+  mul_out->x(logis_3);
+  mul_out->y(tanh_cp);
+  mul_out->fusedActivationFunction(luci::FusedActFunc::NONE);
+  mul_out->name(net_name + "_mul3");
+  luci::add_origin(mul_out, luci::get_origin(_lstm));
+
+  return mul_out;
+}
+
+luci::CircleReshape *UnrollLSTM::last_reshape(luci::CircleNode *input)
+{
+  assert(input != nullptr);
+
+  auto reshape_s = _nctx->create<luci::CircleConst>();
+  reshape_s->dtype(loco::DataType::S32);
+  reshape_s->rank(1);
+  reshape_s->dim(0) = 3;
+  reshape_s->size<loco::DataType::S32>(3);
+  reshape_s->at<loco::DataType::S32>(0) = _batch;
+  reshape_s->at<loco::DataType::S32>(1) = _timesteps;
+  reshape_s->at<loco::DataType::S32>(2) = _units;
+  reshape_s->shape_status(luci::ShapeStatus::VALID);
+  reshape_s->name(_name + "_reshape_s");
+  luci::add_origin(reshape_s, luci::get_origin(_lstm));
+
+  auto reshape = _nctx->create<luci::CircleReshape>();
+  reshape->tensor(input);
+  reshape->shape(reshape_s);
+  reshape->newShape()->rank(3);
+  reshape->newShape()->dim(0) = _batch;
+  reshape->newShape()->dim(1) = _timesteps;
+  reshape->newShape()->dim(2) = _units;
+  reshape->name(_name + "_reshape");
+  luci::add_origin(reshape, luci::get_origin(_lstm));
+
+  return reshape;
+}
+
+luci::CircleTranspose *UnrollLSTM::last_transpose(std::vector<luci::CircleMul *> &output_muls)
+{
+  assert(output_muls.size() == _timesteps);
+
+  auto pack = _nctx->create<luci::CirclePack>(_timesteps);
+  pack->axis(0);
+  for (uint32_t idx = 0; idx < _timesteps; ++idx)
+    pack->values(idx, output_muls[idx]);
+  pack->name(_name + "_pack");
+  luci::add_origin(pack, luci::get_origin(_lstm));
+
+  auto perm = transpose_perm();
+  perm->name(_name + "_perm2");
+  luci::add_origin(perm, luci::get_origin(_lstm));
+
+  auto transpose = _nctx->create<luci::CircleTranspose>();
+  transpose->a(pack);
+  transpose->perm(perm);
+  transpose->name(_name + "_trans2");
+  luci::add_origin(transpose, luci::get_origin(_lstm));
+
+  return transpose;
+}
+
+bool unroll_lstm(luci::CircleUnidirectionalSequenceLSTM *lstm)
+{
+  // NOTE shape of input of lstm is interpreted as [batch, timesteps, feature]
+  //      shape of output of lstm is interpreted as [batch, timesteps, units]
+  // TODO add more conditions to check LSTM
+  assert(lstm != nullptr);
+  assert(lstm->rank() == 3); // use assert to findout when this happens
+  if (lstm->rank() != 3)
+    return false;
+  if (!(lstm->dim(0).known() and lstm->dim(1).known() and lstm->dim(2).known()))
+    return false;
+
+  UnrollLSTM ulstm;
+  ulstm._lstm = lstm;
+  ulstm._nctx = lstm->graph()->nodes();
+  ulstm._name = lstm->name();
+  ulstm._batch = lstm->dim(0).value();
+  ulstm._timesteps = lstm->dim(1).value();
+  ulstm._units = lstm->dim(2).value(); // output space dim
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(lstm->input());
+  assert(input->rank() == 3); // use assert to findout when this happens
+  if (input->rank() != 3)
+    return false;
+  assert(input->dim(0).value() == ulstm._batch);
+  assert(input->dim(1).value() == ulstm._timesteps);
+
+  if (ulstm._timesteps > 1)
+  {
+    // Transpose to switch batch <-> timesteps
+    // NOTE TF uses Reshape when batch is 1 but as there is Transpose->Reshape
+    //      Pass, we can just use Transpose for both cases
+    auto transpose = ulstm.first_transpose(input);
+    input = transpose;
+  }
+
+  auto unpacks = ulstm.input_unpacks(input);
+  assert(unpacks.size() == ulstm._timesteps);
+  uint32_t step = 0;
+  auto unpackout = unpacks[step];
+
+  // First FC
+  auto fc_1 = ulstm.create_input_matmul(unpackout);
+  assert(fc_1 != nullptr);
+  auto splits = ulstm.matmul_splits(fc_1, step);
+  assert(splits.size() == 4);
+
+  luci::CircleNode *prev = nullptr; // prev step CircleAdd
+  luci::CircleNode *this_add = nullptr;
+
+  prev = ulstm.forget_zero(); // provide all zero constant for first step
+
+  std::vector<luci::CircleMul *> output_muls;
+  auto mul_gc = ulstm.forget_gate_cell(splits, prev, step, &this_add);
+  assert(mul_gc != nullptr);
+  assert(this_add != nullptr);
+  // gather all Muls for last Pack
+  output_muls.push_back(mul_gc);
+
+  for (step = 1; step < ulstm._timesteps; ++step)
+  {
+    auto unpackout = unpacks[step];
+    auto add_n = ulstm.create_input_matmul(unpackout, mul_gc, step);
+
+    auto splits = ulstm.matmul_splits(add_n, step);
+    assert(splits.size() == 4);
+
+    prev = this_add;
+    mul_gc = ulstm.forget_gate_cell(splits, prev, step, &this_add);
+    assert(mul_gc != nullptr);
+    assert(this_add != nullptr);
+
+    output_muls.push_back(mul_gc);
+  }
+  assert(output_muls.size() == ulstm._timesteps);
+
+  if (ulstm._timesteps == 1)
+  {
+    // Reshape for single step
+    auto reshape = ulstm.last_reshape(mul_gc);
+    loco::replace(lstm).with(reshape);
+  }
+  else
+  {
+    // Pack + Transpose for two or more steps
+    auto transpose = ulstm.last_transpose(output_muls);
+    loco::replace(lstm).with(transpose);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool UnrollUnidirectionalSequenceLSTMPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto lstm = dynamic_cast<luci::CircleUnidirectionalSequenceLSTM *>(node))
+    {
+      if (unroll_lstm(lstm))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.test.cpp b/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.test.cpp
new file mode 100644
index 000000000..3f273cbd3
--- /dev/null
+++ b/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.test.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class UniSeqLSTMGraphlet
+{
+public:
+  UniSeqLSTMGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 oshape)
+  {
+    _uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+    _uslstm->dtype(loco::DataType::FLOAT32);
+    _uslstm->shape(oshape);
+    _uslstm->name("uslstm");
+
+    _uslstm->fusedActivationFunction(luci::FusedActFunc::TANH);
+    _uslstm->cell_clip(0.0);
+    _uslstm->proj_clip(0.0);
+    _uslstm->time_major(false);
+    _uslstm->asymmetric_quantize_inputs(false);
+
+    _iw = weight_1x1(g);
+    _rw = weight_1x1(g);
+    _gb = weight_1(g);
+    _ex = g->nodes()->create<luci::CircleOutputExclude>();
+  }
+
+protected:
+  luci::CircleConst *weight_1x1(loco::Graph *g)
+  {
+    auto w = g->nodes()->create<luci::CircleConst>();
+    w->dtype(loco::DataType::FLOAT32);
+    w->rank(2);
+    w->dim(0) = 1;
+    w->dim(1) = 1;
+    w->size<loco::DataType::FLOAT32>(1);
+    w->at<loco::DataType::FLOAT32>(0) = 1.0;
+    w->shape_status(luci::ShapeStatus::VALID);
+    return w;
+  }
+
+  luci::CircleConst *weight_1(loco::Graph *g)
+  {
+    auto w = g->nodes()->create<luci::CircleConst>();
+    w->dtype(loco::DataType::FLOAT32);
+    w->rank(1);
+    w->dim(0) = 1;
+    w->size<loco::DataType::FLOAT32>(1);
+    w->at<loco::DataType::FLOAT32>(0) = 1.0;
+    w->shape_status(luci::ShapeStatus::VALID);
+    return w;
+  }
+
+protected:
+  luci::CircleUnidirectionalSequenceLSTM *_uslstm = nullptr;
+  luci::CircleConst *_iw = nullptr;
+  luci::CircleConst *_rw = nullptr;
+  luci::CircleConst *_gb = nullptr;
+  luci::CircleOutputExclude *_ex = nullptr;
+};
+
+class UnrollUniSeqLSTMPassTestGraph : public TestIOGraph, public UniSeqLSTMGraphlet
+{
+public:
+  UnrollUniSeqLSTMPassTestGraph() = default;
+
+  void init(const ShapeU32 ishape, const ShapeU32 oshape)
+  {
+    TestIOGraph::init(ishape, oshape);
+    UniSeqLSTMGraphlet::init(g(), oshape);
+
+    auto inode = input();
+    _uslstm->input(inode);
+
+    _uslstm->input_to_input_weights(_iw);
+    _uslstm->input_to_forget_weights(_iw);
+    _uslstm->input_to_cell_weights(_iw);
+    _uslstm->input_to_output_weights(_iw);
+
+    _uslstm->recurrent_to_input_weights(_rw);
+    _uslstm->recurrent_to_forget_weights(_rw);
+    _uslstm->recurrent_to_cell_weights(_rw);
+    _uslstm->recurrent_to_output_weights(_rw);
+
+    _uslstm->cell_to_input_weights(_ex);
+    _uslstm->cell_to_forget_weights(_ex);
+    _uslstm->cell_to_output_weights(_ex);
+
+    _uslstm->input_gate_bias(_gb);
+    _uslstm->forget_gate_bias(_gb);
+    _uslstm->cell_gate_bias(_gb);
+    _uslstm->output_gate_bias(_gb);
+
+    _uslstm->projection_weights(_ex);
+    _uslstm->projection_bias(_ex);
+
+    _uslstm->output_state(_ex);
+    _uslstm->cell_state(_ex);
+
+    _uslstm->input_layer_norm_coefficients(_ex);
+    _uslstm->forget_layer_norm_coefficients(_ex);
+    _uslstm->cell_layer_norm_coefficients(_ex);
+    _uslstm->output_layer_norm_coefficients(_ex);
+
+    output()->from(_uslstm);
+  }
+};
+
+} // namespace
+
+namespace
+{
+
+using namespace luci::test;
+
+// FakeQuantGraphlet is for simple negative test
+class FakeQuantGraphlet
+{
+public:
+  FakeQuantGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    _fq = g->nodes()->create<luci::CircleFakeQuant>();
+    _fq->name("fq");
+  }
+
+protected:
+  luci::CircleFakeQuant *_fq = nullptr;
+};
+
+class FakeQuantGraph : public TestIOGraph, public FakeQuantGraphlet
+{
+public:
+  FakeQuantGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1, 1, 1}, {1, 1, 1});
+    FakeQuantGraphlet::init(g());
+
+    _fq->inputs(input());
+
+    output()->from(_fq);
+  }
+};
+
+} // namespace
+
+TEST(UnrollUnidirectionalSequenceLSTMPassTestName, name)
+{
+  luci::UnrollUnidirectionalSequenceLSTMPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+class UnrollUnidirectionalSequenceLSTMPassTest : public ::testing::Test
+{
+public:
+  UnrollUniSeqLSTMPassTestGraph g;
+  luci::UnrollUnidirectionalSequenceLSTMPass pass;
+};
+
+TEST_F(UnrollUnidirectionalSequenceLSTMPassTest, simple_run)
+{
+  g.init({1, 1, 1}, {1, 1, 1});
+
+  EXPECT_TRUE(pass.run(g.g()));
+}
+
+class UnrollUnidirectionalSequenceLSTMPassTestN : public ::testing::Test
+{
+public:
+  FakeQuantGraph g;
+  luci::UnrollUnidirectionalSequenceLSTMPass pass;
+};
+
+TEST_F(UnrollUnidirectionalSequenceLSTMPassTestN, simple_run_NEG)
+{
+  g.init();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp b/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp
new file mode 100644
index 000000000..d40c19b9b
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedBiasScale.h"
+
+#include <cmath>
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace
+{
+
+bool same(float a, float b)
+{
+  constexpr float epsilon = 1e-10;
+  return std::abs(a - b) < epsilon;
+}
+
+// Check bias scale = input scale * weight scale
+// This function checks both LWQ and CWQ
+bool check_bias_scale(const loco::Node *input, const loco::Node *weights, const loco::Node *bias)
+{
+  auto input_node = loco::must_cast<const luci::CircleNode *>(input);
+  auto input_qparam = input_node->quantparam();
+  RETURN_FALSE_UNLESS(input_qparam != nullptr);
+
+  auto weights_node = loco::must_cast<const luci::CircleNode *>(weights);
+  auto weights_qparam = weights_node->quantparam();
+  RETURN_FALSE_UNLESS(weights_qparam != nullptr);
+
+  auto bias_node = loco::must_cast<const luci::CircleNode *>(bias);
+  auto bias_qparam = bias_node->quantparam();
+  RETURN_FALSE_UNLESS(bias_qparam != nullptr);
+
+  RETURN_FALSE_UNLESS(input_qparam->scale.size() == 1);
+  RETURN_FALSE_UNLESS(weights_qparam->scale.size() == bias_qparam->scale.size());
+
+  auto input_scale = input_qparam->scale[0];
+  for (uint32_t i = 0; i < weights_qparam->scale.size(); i++)
+  {
+    auto weights_scale = weights_qparam->scale[i];
+    auto bias_scale = bias_qparam->scale[i];
+    RETURN_FALSE_UNLESS(same(bias_scale, input_scale * weights_scale));
+  }
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleConv2D *node)
+{
+  RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->filter(), node->bias()));
+  return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleDepthwiseConv2D *node)
+{
+  RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->filter(), node->bias()));
+  return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleFullyConnected *node)
+{
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  if (bias != nullptr)
+  {
+    RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->weights(), node->bias()));
+  }
+  return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleTransposeConv *node)
+{
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  if (bias != nullptr)
+  {
+    RETURN_FALSE_UNLESS(check_bias_scale(node->outBackprop(), node->filter(), node->bias()));
+  }
+  return true;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
diff --git a/compiler/luci/pass/src/VerifyQuantizedBiasScale.h b/compiler/luci/pass/src/VerifyQuantizedBiasScale.h
new file mode 100644
index 000000000..b41f78eca
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedBiasScale.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
+#define __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <memory>
+
+namespace luci
+{
+
+/**
+ * @brief Verify the scale of quantized bias node
+ * @details
+ *
+ * Bias of CONV, DCONV, TCONV, FC layers should meet the following condition.
+ *
+ * bias scale = input scale * weights scale
+ */
+class VerifyQuantizedBiasScale : public luci::CircleNodeVisitor<bool>
+{
+public:
+  static std::shared_ptr<VerifyQuantizedBiasScale> create()
+  {
+    return std::make_shared<VerifyQuantizedBiasScale>();
+  };
+
+public:
+  bool verify(luci::CircleNode *node) { return node->accept(this); }
+
+private:
+  // Operators with bias
+  bool visit(const luci::CircleConv2D *node);
+  bool visit(const luci::CircleDepthwiseConv2D *node);
+  bool visit(const luci::CircleFullyConnected *node);
+  bool visit(const luci::CircleTransposeConv *node);
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#endif // __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp
new file mode 100644
index 000000000..8697090a7
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedNodeGranularity.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+#include <memory>
+
+namespace luci
+{
+
+std::shared_ptr<VerifyQuantizedNodeGranularity>
+VerifyQuantizedNodeGranularity::create(Granularity granularity)
+{
+  if (granularity == Granularity::ChannelWise)
+    return std::make_shared<VerifyQuantizedNodeChannelWiseGranularity>();
+  else if (granularity == Granularity::LayerWise)
+    return std::make_shared<VerifyQuantizedNodeLayerWiseGranularity>();
+  else
+    throw std::domain_error("Not supported Granularity type");
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
new file mode 100644
index 000000000..cc618bf0e
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
@@ -0,0 +1,633 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+#include <memory>
+
+using Granularity = luci::QuantizationGranularity;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the granualrity of quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+class VerifyQuantizedNodeGranularity : public luci::CircleNodeVisitor<bool>
+{
+public:
+  static std::shared_ptr<VerifyQuantizedNodeGranularity> create(Granularity granularity);
+
+protected:
+  bool is_lwq(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != 1)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != 1)
+      return false;
+
+    return true;
+  }
+
+private:
+  virtual bool visit(const luci::CircleConv2D *node) = 0;
+
+  bool visit(const luci::CircleConcatenation *node)
+  {
+    // Skip granularity check for concatenation of indices
+    if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+      return true;
+
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    for (uint32_t i = 0; i < node->numValues(); i++)
+    {
+      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
+    }
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthToSpace *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  virtual bool visit(const luci::CircleDepthwiseConv2D *node) = 0;
+
+  virtual bool visit(const luci::CircleInstanceNorm *node) = 0;
+
+  bool visit(const luci::CirclePack *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    for (uint32_t i = 0; i < node->values_count(); i++)
+    {
+      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
+    }
+    return true;
+  }
+
+  bool visit(const luci::CirclePad *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  bool visit(const luci::CirclePadV2 *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq(node->constant_values()))
+    return true;
+  }
+
+  bool visit(const luci::CircleMirrorPad *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  virtual bool visit(const luci::CirclePRelu *node) = 0;
+
+  virtual bool visit(const luci::CircleTransposeConv *node) = 0;
+
+  virtual bool visit(const luci::CircleFullyConnected *node) = 0;
+
+  bool visit(const luci::CircleAdd *node)
+  {
+    // Skip granularity check for indices
+    if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+      return true;
+
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleAveragePool2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleLogicalOr *)
+  {
+    // Logical OR has bool-type inputs and output
+    // Nothing to be checked
+    return true;
+  }
+
+  bool visit(const luci::CircleMaxPool2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleLocalResponseNormalization *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleMean *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleMul *node)
+  {
+    // Skip granularity check for indices
+    if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+      return true;
+
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleNotEqual *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleOneHot *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->off_value()));
+    RETURN_FALSE_UNLESS(is_lwq(node->on_value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleReduceMax *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CircleReshape *node)
+  {
+    auto input = loco::must_cast<const luci::CircleNode *>(node->tensor());
+    bool input_quantized = input->quantparam() != nullptr;
+    bool node_quantized = node->quantparam() != nullptr;
+    RETURN_FALSE_UNLESS(input_quantized == node_quantized);
+    RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node))
+    RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
+    return true;
+  }
+
+  bool visit(const luci::CircleLogistic *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSoftmax *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->logits()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToBatchND *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToDepth *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSlice *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplit *node)
+  {
+    // node's output is the input of CircleSplitOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitV *node)
+  {
+    // node's output is the input of CircleSplitVOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitVOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
+  bool visit(const luci::CircleStridedSlice *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSum *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleArgMax *node)
+  {
+    // node's output is index, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleBatchToSpaceND *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleTanh *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleTranspose *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->a()));
+    return true;
+  }
+
+  bool visit(const luci::CircleFloor *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGreater *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGreaterEqual *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleDiv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleFloorDiv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleRsqrt *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSqrt *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleElu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CirclePow *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleResizeBilinear *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleResizeNearestNeighbor *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleUnpack *node)
+  {
+    // node's output is the input of CircleUnpackOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleUnpackOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
+  bool visit(const luci::CircleCast *node)
+  {
+    auto input = loco::must_cast<const luci::CircleNode *>(node->x());
+    bool input_quantized = input->quantparam() != nullptr;
+    bool node_quantized = node->quantparam() != nullptr;
+    RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
+    RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node));
+    return true;
+  }
+
+  // TODO: Implement more Ops
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+class VerifyQuantizedNodeChannelWiseGranularity final : public VerifyQuantizedNodeGranularity
+{
+private:
+  uint32_t rank(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    return circle_node->rank();
+  }
+
+  bool is_cwq_const(const loco::Node *node, uint32_t channel_dim)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+    assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS
+    auto channel_size = circle_node->dim(channel_dim).value();
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim))
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != channel_size)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != channel_size)
+      return false;
+
+    return true;
+  }
+
+private:
+  bool visit(const luci::CircleConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleInstanceNorm *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CirclePRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleTransposeConv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+
+    return true;
+  }
+
+  bool visit(const luci::CircleFullyConnected *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    // Bias is optional (it can be CircleOutputExclude)
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+};
+
+class VerifyQuantizedNodeLayerWiseGranularity final : public VerifyQuantizedNodeGranularity
+{
+private:
+  bool is_lwq_const(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != 1)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != 1)
+      return false;
+
+    return true;
+  }
+
+private:
+  bool visit(const luci::CircleConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleInstanceNorm *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->gamma()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->beta()))
+    return true;
+  }
+
+  bool visit(const luci::CirclePRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->alpha()))
+    return true;
+  }
+
+  bool visit(const luci::CircleTransposeConv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleFullyConnected *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->weights()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
new file mode 100644
index 000000000..4bad9522b
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedNodeType.h"
+
+#include <cmath>
+#include <memory>
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace luci
+{
+
+std::shared_ptr<VerifyQuantizedNodeType> VerifyQuantizedNodeType::create(loco::DataType dtype)
+{
+  if (dtype == loco::DataType::U8)
+    return std::make_shared<VerifyQuantizedNodeU8Type>();
+  else if (dtype == loco::DataType::S16)
+    return std::make_shared<VerifyQuantizedNodeS16Type>();
+  else
+    throw std::domain_error("Not supported Quantized type");
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAdd *node)
+{
+  // Allow add of indices
+  if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+    return true;
+
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleArgMax *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->dimension(), loco::DataType::S32) ||
+                      has_type(node->dimension(), loco::DataType::S64))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAveragePool2D *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleBatchToSpaceND *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleCast *node)
+{
+  auto *input = loco::must_cast<luci::CircleNode *>(node->x());
+  bool input_quantized = input->quantparam() != nullptr;
+  if (input_quantized)
+  {
+    RETURN_FALSE_UNLESS(has_type(input, node->in_data_type()))
+    RETURN_FALSE_UNLESS(has_type(input, Qtype))
+  }
+
+  bool node_quantized = node->quantparam() != nullptr;
+  if (node_quantized)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, node->out_data_type()))
+    RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  }
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleConv2D *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->bias(), Btype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleConcatenation *node)
+{
+  // Allow concatenation of indices
+  if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+    return true;
+
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDepthToSpace *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDepthwiseConv2D *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->bias(), Btype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDiv *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleElu *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFloor *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, Qtype));
+
+  // This checks the value of scale is an integer
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFloorDiv *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, Qtype));
+
+  // This checks the value of scale is an integer
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFullyConnected *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->weights(), Qtype))
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  if (bias != nullptr)
+    RETURN_FALSE_UNLESS(has_type(bias, Btype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGelu *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGreater *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+  RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGreaterEqual *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+  RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleInstanceNorm *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(
+  const luci::CircleLocalResponseNormalization *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleLogicalOr *node)
+{
+  return group_has_type(node, loco::DataType::BOOL);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMaxPool2D *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMean *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMirrorPad *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMul *node)
+{
+  // Allow mul of indices
+  if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+    return true;
+
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleNotEqual *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+  RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleOneHot *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype));
+  RETURN_FALSE_UNLESS(has_type(node->indices(), loco::DataType::S32) ||
+                      has_type(node->indices(), loco::DataType::S64));
+  RETURN_FALSE_UNLESS(has_type(node->depth(), loco::DataType::S32));
+  RETURN_FALSE_UNLESS(has_type(node->on_value(), Qtype));
+  RETURN_FALSE_UNLESS(has_type(node->off_value(), Qtype));
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePack *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePad *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePadV2 *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+  RETURN_FALSE_UNLESS(has_type(node->constant_values(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePRelu *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePow *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleReduceMax *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRelu *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleReshape *node)
+{
+  if (node->quantparam())
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Qtype))
+    RETURN_FALSE_UNLESS(has_type(node->tensor(), Qtype))
+  }
+  else
+  {
+    RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype()))
+  }
+  luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
+  if (shape != nullptr)
+    RETURN_FALSE_UNLESS(has_type(shape, loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleResizeBilinear *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleResizeNearestNeighbor *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRsqrt *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSlice *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->begin(), loco::DataType::S32) ||
+                      has_type(node->begin(), loco::DataType::S64))
+  RETURN_FALSE_UNLESS(has_type(node->size(), loco::DataType::S32) ||
+                      has_type(node->size(), loco::DataType::S64))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSpaceToBatchND *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSpaceToDepth *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplit *node)
+{
+  // node's output is the input of CircleSplitOut, thus not quantized
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitOut *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+  // SplitOut has the same qparam with the input of Split
+  auto split = loco::must_cast<luci::CircleSplit *>(node->input());
+  auto input = loco::must_cast<luci::CircleNode *>(split->input());
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitV *node)
+{
+  // node's output is the input of CircleSplitVOut, thus not quantized
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitVOut *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+  // SplitVOut has the same qparam with the input of SplitV
+  auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+  auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSqrt *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleStridedSlice *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+
+  auto input = loco::must_cast<luci::CircleNode *>(node->input());
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSum *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleTranspose *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->a(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->perm(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleTransposeConv *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  if (bias != nullptr)
+    RETURN_FALSE_UNLESS(has_type(bias, Btype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleUnpack *node)
+{
+  // node's output is the input of CircleUnpackOut, thus not quantized
+  RETURN_FALSE_UNLESS(has_type(node->value(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleUnpackOut *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+  // UnpackOut has the same qparam with the input of Unpack
+  auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
+  auto input = loco::must_cast<luci::CircleNode *>(Unpack->value());
+  RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+  return true;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleTanh *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 2.0f / 256.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 128);
+  return true;
+}
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleLogistic *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 256.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleSoftmax *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 255.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleTanh *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleLogistic *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleSoftmax *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32767.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.h b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
new file mode 100644
index 000000000..03f1e1d86
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief Verify the data type of quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+class VerifyQuantizedNodeType
+{
+public:
+  static std::shared_ptr<VerifyQuantizedNodeType> create(loco::DataType dtype);
+
+public:
+  virtual bool verify(luci::CircleNode *node) = 0;
+};
+
+/**
+ * @brief Verify using quantization type of a node and bias
+ *
+ * @tparam Qtype Quantization type for a node (e.g. Q8, Q16, ...)
+ * @tparam Btype Bias quantization type (e.g. For Q8, S32 is used)
+ */
+template <loco::DataType Qtype, loco::DataType Btype>
+class VerifyQuantizedNodeTypeBase : public luci::CircleNodeVisitor<bool>,
+                                    public VerifyQuantizedNodeType
+{
+public:
+  bool verify(luci::CircleNode *node) { return node->accept(this); }
+
+protected:
+  bool has_type(const loco::Node *node, loco::DataType dtype)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    return circle_node->dtype() == dtype;
+  }
+
+  // Check whether a node and all of its inputs have dtype or not
+  bool group_has_type(const loco::Node *node, loco::DataType dtype)
+  {
+    if (!has_type(node, dtype))
+      return false;
+
+    for (uint32_t i = 0; i < node->arity(); ++i)
+      if (!has_type(node->arg(i), dtype))
+        return false;
+
+    return true;
+  }
+
+private:
+  bool visit(const luci::CircleAdd *node);
+  bool visit(const luci::CircleArgMax *node);
+  bool visit(const luci::CircleAveragePool2D *node);
+  bool visit(const luci::CircleBatchToSpaceND *node);
+  bool visit(const luci::CircleCast *node);
+  bool visit(const luci::CircleConv2D *node);
+  bool visit(const luci::CircleConcatenation *node);
+  bool visit(const luci::CircleDepthToSpace *node);
+  bool visit(const luci::CircleDepthwiseConv2D *node);
+  bool visit(const luci::CircleDiv *node);
+  bool visit(const luci::CircleElu *node);
+  bool visit(const luci::CircleFloor *node);
+  bool visit(const luci::CircleFloorDiv *node);
+  bool visit(const luci::CircleFullyConnected *node);
+  bool visit(const luci::CircleGelu *node);
+  bool visit(const luci::CircleGreater *node);
+  bool visit(const luci::CircleGreaterEqual *node);
+  bool visit(const luci::CircleInstanceNorm *node);
+  bool visit(const luci::CircleLocalResponseNormalization *node);
+  bool visit(const luci::CircleLogicalOr *node);
+  bool visit(const luci::CircleMaxPool2D *node);
+  bool visit(const luci::CircleMean *node);
+  bool visit(const luci::CircleMirrorPad *node);
+  bool visit(const luci::CircleMul *node);
+  bool visit(const luci::CircleNotEqual *node);
+  bool visit(const luci::CircleOneHot *node);
+  bool visit(const luci::CirclePack *node);
+  bool visit(const luci::CirclePad *node);
+  bool visit(const luci::CirclePadV2 *node);
+  bool visit(const luci::CirclePRelu *node);
+  bool visit(const luci::CirclePow *node);
+  bool visit(const luci::CircleReduceMax *node);
+  bool visit(const luci::CircleRelu *node);
+  bool visit(const luci::CircleReshape *node);
+  bool visit(const luci::CircleResizeBilinear *node);
+  bool visit(const luci::CircleResizeNearestNeighbor *node);
+  bool visit(const luci::CircleRsqrt *node);
+  bool visit(const luci::CircleSlice *node);
+  bool visit(const luci::CircleSpaceToBatchND *node);
+  bool visit(const luci::CircleSpaceToDepth *node);
+  bool visit(const luci::CircleSplit *node);
+  bool visit(const luci::CircleSplitOut *node);
+  bool visit(const luci::CircleSplitV *node);
+  bool visit(const luci::CircleSplitVOut *node);
+  bool visit(const luci::CircleSqrt *node);
+  bool visit(const luci::CircleStridedSlice *node);
+  bool visit(const luci::CircleSum *node);
+  bool visit(const luci::CircleTranspose *node);
+  bool visit(const luci::CircleTransposeConv *node);
+  bool visit(const luci::CircleUnpack *node);
+  bool visit(const luci::CircleUnpackOut *node);
+
+  // NOTE below nodes has differnent implementation for Qtype/Btype and
+  //      implementations exist in VerifyQuantizedNodeU8Type, VerifyQuantizedNodeS16Type
+  // bool visit(const luci::CircleLogistic *node);
+  // bool visit(const luci::CircleSoftmax *node);
+  // bool visit(const luci::CircleTanh *node);
+
+  // TODO: Implement more Ops
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+class VerifyQuantizedNodeU8Type
+  : public VerifyQuantizedNodeTypeBase<loco::DataType::U8, loco::DataType::S32>
+{
+private:
+  bool visit(const luci::CircleLogistic *node);
+  bool visit(const luci::CircleSoftmax *node);
+  bool visit(const luci::CircleTanh *node);
+};
+
+class VerifyQuantizedNodeS16Type
+  : public VerifyQuantizedNodeTypeBase<loco::DataType::S16, loco::DataType::S64>
+{
+private:
+  bool visit(const luci::CircleLogistic *node);
+  bool visit(const luci::CircleSoftmax *node);
+  bool visit(const luci::CircleTanh *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
diff --git a/compiler/luci/pass/src/helpers/CreateCircleConst.cpp b/compiler/luci/pass/src/helpers/CreateCircleConst.cpp
new file mode 100644
index 000000000..bf1b0baf7
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/CreateCircleConst.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CreateCircleConst.h"
+
+// NOTE Do NOT delete this file; this file enforces compiler to check whether 'CreateCircleConst.h'
+// is complete.
diff --git a/compiler/luci/pass/src/helpers/CreateCircleConst.h b/compiler/luci/pass/src/helpers/CreateCircleConst.h
new file mode 100644
index 000000000..89c1a47be
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/CreateCircleConst.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_CREATE_CIRCLE_CONST_H__
+#define __LUCI_PASS_HELPERS_CREATE_CIRCLE_CONST_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include "TypeMapper.h"
+
+#include <vector>
+
+namespace luci
+{
+
+// Create CircleConst filled with a single value
+// Never return nullptr
+// TODO Remove dtype from the argument
+template <typename T>
+CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                               const std::vector<uint32_t> &shape, const T value)
+{
+  auto node = g->nodes()->create<CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(ShapeStatus::VALID);
+
+  node->size<TypeMapper<T>::get()>(size);
+  for (uint32_t i = 0; i < size; i++)
+  {
+    node->at<TypeMapper<T>::get()>(i) = value;
+  }
+
+  return node;
+}
+
+// Create CircleConst filled with values
+// Never return nullptr
+// TODO Remove dtype from the argument
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                                     const std::vector<uint32_t> &shape,
+                                     const std::vector<T> &values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(luci::ShapeStatus::VALID);
+
+  node->size<TypeMapper<T>::get()>(size);
+  for (uint32_t i = 0; i < size; i++)
+  {
+    node->at<TypeMapper<T>::get()>(i) = values[i];
+  }
+
+  return node;
+}
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_CREATE_CIRCLE_CONST_H__
diff --git a/compiler/luci/pass/src/helpers/InferenceCandidates.cpp b/compiler/luci/pass/src/helpers/InferenceCandidates.cpp
new file mode 100644
index 000000000..2c8565932
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/InferenceCandidates.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InferenceCandidates.h"
+
+#include <luci/IR/DeadNodeQueryService.h>
+
+namespace luci
+{
+
+std::vector<loco::Node *> inference_candidates(loco::Graph *g)
+{
+  auto candidates = loco::postorder_traversal(loco::output_nodes(g));
+
+  for (auto node : loco::all_nodes(g))
+  {
+    // already included as candidate
+    if (std::find(candidates.begin(), candidates.end(), node) != candidates.end())
+      continue;
+
+    // As the node is not used for both graph output and multiple output operation,
+    // it cannot be candidate.
+    if (node->dialect()->service<DeadNodeQueryServiceImpl>()->isDeadNode(node))
+      continue;
+
+    candidates.emplace_back(node);
+  }
+
+  return candidates;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/InferenceCandidates.h b/compiler/luci/pass/src/helpers/InferenceCandidates.h
new file mode 100644
index 000000000..f27e4fe60
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/InferenceCandidates.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INFERENCE_CANDIDATES_H__
+#define __LUCI_INFERENCE_CANDIDATES_H__
+
+#include <loco.h>
+
+#include <vector>
+
+namespace luci
+{
+
+/**
+ * @brief Enumerate all the nodes whose shape/dtype should be inferenced to export graph.
+ */
+std::vector<loco::Node *> inference_candidates(loco::Graph *g);
+
+} // namespace luci
+
+#endif // __LUCI_INFERENCE_CANDIDATES_H__
diff --git a/compiler/luci/pass/src/helpers/InferenceCandidates.test.cpp b/compiler/luci/pass/src/helpers/InferenceCandidates.test.cpp
new file mode 100644
index 000000000..e34421f5e
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/InferenceCandidates.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InferenceCandidates.h"
+#include "luci/IR/CircleNode.h"
+
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+bool contains(const std::vector<loco::Node *> &vec, loco::Node *val)
+{
+  return std::any_of(vec.begin(), vec.end(), [val](loco::Node *node) { return node == val; });
+}
+
+} // namespace
+
+TEST(LuciPassHelpersInferenceCandidates, inference_candidates)
+{
+  auto g = loco::make_graph();
+
+  // Create nodes
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto split = g->nodes()->create<luci::CircleSplit>();
+  auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_dim = g->nodes()->create<luci::CircleConst>();
+  auto output = g->nodes()->create<luci::CircleOutput>();
+
+  // Build up initial graph
+  auto graph_input1 = g->inputs()->create();
+  input->index(graph_input1->index());
+
+  split->split_dim(split_dim);
+  split->input(input);
+  split->num_split(2);
+
+  split_out1->input(split);
+  split_out1->index(0);
+
+  split_out2->input(split);
+  split_out2->index(1);
+
+  auto graph_output = g->outputs()->create();
+  output->from(split_out1);
+  output->index(graph_output->index());
+
+  auto s = luci::inference_candidates(g.get());
+
+  ASSERT_EQ(6, s.size());
+  ASSERT_TRUE(contains(s, input));
+  ASSERT_TRUE(contains(s, split));
+  ASSERT_TRUE(contains(s, split_out1));
+  ASSERT_TRUE(contains(s, split_out2));
+  ASSERT_TRUE(contains(s, split_dim));
+  ASSERT_TRUE(contains(s, output));
+}
+
+TEST(LuciPassHelpersInferenceCandidates, inference_candidates_NEG)
+{
+  auto g = loco::make_graph();
+
+  // Create nodes
+  auto input = g->nodes()->create<luci::CircleInput>();
+  auto split = g->nodes()->create<luci::CircleSplit>();
+  auto split_out1 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_out2 = g->nodes()->create<luci::CircleSplitOut>();
+  auto split_dim = g->nodes()->create<luci::CircleConst>();
+  auto relu1 = g->nodes()->create<luci::CircleRelu>();
+  auto relu2 = g->nodes()->create<luci::CircleRelu>();
+  auto output = g->nodes()->create<luci::CircleOutput>();
+
+  // Build up initial graph
+  auto graph_input1 = g->inputs()->create();
+  input->index(graph_input1->index());
+
+  split->split_dim(split_dim);
+  split->input(input);
+  split->num_split(2);
+
+  split_out1->input(split);
+  split_out1->index(0);
+
+  split_out2->input(split);
+  split_out2->index(1);
+
+  relu1->features(split_out2);
+
+  relu2->features(input);
+
+  auto graph_output = g->outputs()->create();
+  output->from(split_out1);
+  output->index(graph_output->index());
+
+  auto s = luci::inference_candidates(g.get());
+
+  ASSERT_EQ(6, s.size());
+  ASSERT_TRUE(contains(s, input));
+  ASSERT_TRUE(contains(s, split));
+  ASSERT_TRUE(contains(s, split_out1));
+  ASSERT_TRUE(contains(s, split_out2));
+  ASSERT_TRUE(contains(s, split_dim));
+  ASSERT_TRUE(contains(s, output));
+  ASSERT_FALSE(contains(s, relu1));
+  ASSERT_FALSE(contains(s, relu2));
+}
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.cpp b/compiler/luci/pass/src/helpers/LayerInfoMap.cpp
new file mode 100644
index 000000000..37d8e18e9
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayerInfoMap.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <cassert>
+
+namespace luci
+{
+namespace
+{
+
+bool is_multiple_output_node(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // The following nodes have multiple outputs. Output tensors are not produced by themselves but
+    // by the corresponding *Out nodes.
+    case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::SPLIT_V:
+    case luci::CircleOpcode::TOPK_V2:
+    case luci::CircleOpcode::UNIQUE:
+    case luci::CircleOpcode::UNPACK:
+      return true;
+    // TODO: Support ops
+    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+    case luci::CircleOpcode::CUSTOM:
+    case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
+    case luci::CircleOpcode::WHILE:
+      throw std::runtime_error("Unsupported op now");
+    default:
+      return false;
+  }
+}
+
+const luci::CircleNode *get_multi_output_node(const luci::CircleNode *node)
+{
+  if (is_multiple_output_node(node))
+    return node;
+
+  switch (node->opcode())
+  {
+    // The following nodes denote outputs of multiple-output nodes.
+    case luci::CircleOpcode::CIRCLESPLITOUT:
+    {
+      const auto split_out = loco::must_cast<const CircleSplitOut *>(node);
+      return loco::must_cast<luci::CircleNode *>(split_out->input());
+    }
+    case luci::CircleOpcode::CIRCLESPLITVOUT:
+    {
+      const auto splitv_out = loco::must_cast<const CircleSplitVOut *>(node);
+      return loco::must_cast<luci::CircleNode *>(splitv_out->input());
+    }
+    case luci::CircleOpcode::CIRCLETOPKV2OUT:
+    {
+      const auto top_kv2_out = loco::must_cast<const CircleTopKV2Out *>(node);
+      return loco::must_cast<luci::CircleNode *>(top_kv2_out->input());
+    }
+    case luci::CircleOpcode::CIRCLEUNIQUEOUT:
+    {
+      const auto unique_out = loco::must_cast<const CircleUniqueOut *>(node);
+      return loco::must_cast<luci::CircleNode *>(unique_out->input());
+    }
+    case luci::CircleOpcode::CIRCLEUNPACKOUT:
+    {
+      const auto unpack_out = loco::must_cast<const CircleUnpackOut *>(node);
+      return loco::must_cast<luci::CircleNode *>(unpack_out->input());
+    }
+    // TODO: Support these ops
+    case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+    case luci::CircleOpcode::CIRCLECUSTOMOUT:
+    case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
+    case luci::CircleOpcode::CIRCLEWHILEOUT:
+      throw std::runtime_error("Unsupported op now");
+    default:
+      return nullptr;
+  }
+}
+
+bool same_setting(const LayerInfo &left, const LayerInfo &right)
+{
+  return left.dtype == right.dtype and left.granularity == right.granularity;
+}
+
+void add_multi_output_node(LayerInfoMap &info_by_name, LayerInfo &layer_info,
+                           const luci::CircleNode *node)
+{
+  assert(is_multiple_output_node(node)); // FIX_CALLER_UNLESS
+
+  const auto succs_nodes = loco::succs(node);
+  const auto name = node->name();
+
+  if (info_by_name.find(name) != info_by_name.end())
+  {
+    // Check that all outputs have equal dtype and granularity
+    for (const auto succs_node : succs_nodes)
+    {
+      const auto succs_circle_node = loco::must_cast<luci::CircleNode *>(succs_node);
+
+      const auto it = info_by_name.find(succs_circle_node->name());
+      if (it != info_by_name.end() and not same_setting(layer_info, (it->second)))
+        throw std::runtime_error("Outputs of multiple-output nodes should have equal dtype and "
+                                 "granularity. Check the quantization configuration file");
+    }
+    return;
+  }
+
+  // Add multiple output node to info_by_name
+  info_by_name[name] = {name, layer_info.dtype, layer_info.granularity};
+
+  // Add outputs node to info_by_name
+  for (const auto succs_node : succs_nodes)
+  {
+    const auto succs_circle_node = loco::must_cast<luci::CircleNode *>(succs_node);
+    const auto succs_circle_node_name = succs_circle_node->name();
+    info_by_name[succs_circle_node_name] = {succs_circle_node_name, layer_info.dtype,
+                                            layer_info.granularity};
+  }
+}
+
+} // namespace
+
+LayerInfoMap layer_info_map(loco::Graph *g, std::vector<LayerInfo> &layers_info)
+{
+  LayerInfoMap info_by_name;
+
+  for (auto &&info : layers_info)
+  {
+    auto &name = info.name;
+    bool found = false;
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      auto cnode = loco::must_cast<luci::CircleNode *>(node);
+      if (cnode->opcode() == luci::CircleOpcode::CIRCLEOUTPUT)
+        continue;
+
+      if (cnode->name() == name)
+      {
+        // Check and add multiple-output node and its outputs to info_by_name
+        if (const auto multi_output = get_multi_output_node(cnode))
+        {
+          add_multi_output_node(info_by_name, info, multi_output);
+          found = true;
+          continue;
+        }
+
+        if (info_by_name.find(name) != info_by_name.end())
+        {
+          throw std::runtime_error("Duplicate layer name " + name +
+                                   ". Check layer names in the quantization configuration file.");
+        }
+
+        info_by_name[name] = info;
+        found = true;
+        continue;
+      }
+    }
+
+    if (not found)
+      throw std::runtime_error("No such layer named " + name +
+                               ". Check layer names in the quantization configuration file.");
+  }
+
+  // TODO Check all names in layers_info exist in the info_by_name
+  // TODO Check names in info_by_name but not in layers_info are from virtual outputs
+
+  return info_by_name;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.h b/compiler/luci/pass/src/helpers/LayerInfoMap.h
new file mode 100644
index 000000000..bb4724a50
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
+#define __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+
+#include <unordered_map>
+
+namespace luci
+{
+
+using LayerInfoMap = std::unordered_map<std::string, luci::LayerInfo>;
+
+LayerInfoMap layer_info_map(loco::Graph *g, std::vector<LayerInfo> &layers_info);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp b/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp
new file mode 100644
index 000000000..2ed28eda4
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayerInfoMap.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class SoftmaxTestGraph : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({32}, {32});
+    _softmax = g()->nodes()->create<luci::CircleSoftmax>();
+    {
+      _softmax->logits(input());
+      _softmax->beta(0.1);
+      _softmax->name("test");
+    }
+    output()->from(_softmax);
+  }
+
+private:
+  luci::CircleSoftmax *_softmax = nullptr;
+};
+
+class SplitAddTestGraph : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({6, 1, 2}, {3, 1, 2});
+    _split_dim = g()->nodes()->create<luci::CircleConst>();
+    {
+      _split_dim->rank(1);
+      _split_dim->dtype(loco::DataType::S32);
+      _split_dim->size<loco::DataType::S32>(1);
+      _split_dim->at<loco::DataType::S32>(0);
+      _split_dim->shape({1});
+      _split_dim->name("split_dim");
+    }
+
+    _split = g()->nodes()->create<luci::CircleSplit>();
+    {
+      _split->input(input());
+      _split->num_split(2);
+      _split->split_dim(_split_dim);
+      _split->name("split0");
+    }
+
+    _split_out_1 = g()->nodes()->create<luci::CircleSplitOut>();
+    {
+      _split_out_1->input(_split);
+      _split_out_1->index(0);
+      _split_out_1->name("split0");
+    }
+
+    _split_out_2 = g()->nodes()->create<luci::CircleSplitOut>();
+    {
+      _split_out_2->input(_split);
+      _split_out_2->index(1);
+      _split_out_2->name("split1");
+    }
+
+    _add = g()->nodes()->create<luci::CircleAdd>();
+    {
+      _add->x(_split_out_1);
+      _add->y(_split_out_2);
+      _add->name("add");
+    }
+    output()->from(_add);
+  }
+
+private:
+  luci::CircleSplit *_split = nullptr;
+  luci::CircleSplitOut *_split_out_1 = nullptr;
+  luci::CircleSplitOut *_split_out_2 = nullptr;
+  luci::CircleConst *_split_dim = nullptr;
+  luci::CircleAdd *_add = nullptr;
+};
+
+} // namespace
+
+TEST(LayerInfoMapTest, simple_test)
+{
+  SoftmaxTestGraph g;
+  g.init();
+
+  luci::LayerInfo info;
+  {
+    info.name = "test";
+    info.dtype = loco::DataType::U8;
+    info.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info);
+  auto map = luci::layer_info_map(g.g(), v);
+
+  EXPECT_EQ("test", map["test"].name);
+  EXPECT_EQ(loco::DataType::U8, map["test"].dtype);
+  EXPECT_EQ(luci::QuantizationGranularity::ChannelWise, map["test"].granularity);
+}
+
+TEST(LayerInfoMapTest, multiple_output_node_test)
+{
+  SplitAddTestGraph g;
+  g.init();
+
+  luci::LayerInfo info;
+  {
+    info.name = "split0";
+    info.dtype = loco::DataType::U8;
+    info.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info);
+  auto map = luci::layer_info_map(g.g(), v);
+
+  EXPECT_EQ(map.size(), 2);
+  EXPECT_EQ("split0", map["split0"].name);
+  EXPECT_EQ("split1", map["split1"].name);
+
+  EXPECT_EQ(loco::DataType::U8, map["split0"].dtype);
+  EXPECT_EQ(luci::QuantizationGranularity::ChannelWise, map["split0"].granularity);
+}
+
+TEST(LayerInfoMapTest, invalid_layer_info_multiple_output_node_NEG)
+{
+  SplitAddTestGraph g;
+  g.init();
+
+  luci::LayerInfo info_0;
+  {
+    info_0.name = "split0";
+    info_0.dtype = loco::DataType::U8;
+    info_0.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  luci::LayerInfo info_1;
+  {
+    info_1.name = "split1";
+    info_1.dtype = loco::DataType::S16;
+    info_1.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info_0);
+  v.emplace_back(info_1);
+
+  EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
+
+TEST(LayerInfoMapTest, duplicate_name_NEG)
+{
+  SoftmaxTestGraph g;
+  g.init();
+  g.input()->name("test");
+
+  luci::LayerInfo info;
+  {
+    info.name = "test";
+    info.dtype = loco::DataType::U8;
+    info.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info);
+  EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
+
+TEST(LayerInfoMapTest, no_name_NEG)
+{
+  SoftmaxTestGraph g;
+  g.init();
+
+  luci::LayerInfo info;
+  {
+    info.name = "noname";
+    info.dtype = loco::DataType::U8;
+    info.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info);
+  EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
diff --git a/compiler/luci/pass/src/helpers/NodeFiller.cpp b/compiler/luci/pass/src/helpers/NodeFiller.cpp
new file mode 100644
index 000000000..b1416655d
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/NodeFiller.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeFiller.h"
+
+// NOTE Do NOT delete this file; this file enforces compiler to check whether 'NodeFiller.h' is
+//      complete.
diff --git a/compiler/luci/pass/src/helpers/NodeFiller.h b/compiler/luci/pass/src/helpers/NodeFiller.h
new file mode 100644
index 000000000..10113e8dd
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/NodeFiller.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace luci
+{
+
+/**
+ * INTRODUCTION
+ *         Binary operation f(x,y) is 'commutative' when
+ *         f(x,y) == f(y,x) holds for all x, y.
+ *         For examples, ADD, MUL and SQUARED_DIFFERENCE are commutative.
+ *         These helpers make it easy to find commutative arguments of commutative node.
+ *
+ * HOW TO USE
+ *         COMM_NODE *node;
+ *         ARG_TYPE_1 *arg1;
+ *         ARG_TYPE_2 *arg2;
+ *
+ *         bool ok = fill(&arg1, &arg2).with_commutative_args_of(node);
+ *
+ * Result
+ *         If 'node's commutative argument types are actually {ARG_TYPE_1, ARG_TYPE_2}
+ *         (as a set), 'arg1' and 'arg2' set as actual 'node's arguments with matching
+ *         type, and return value 'ok' is true.
+ *         Otherwise, 'arg1' and 'arg2' not changed, 'ok' is false.
+ */
+
+template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final
+{
+public:
+  NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)
+  {
+    // DO NOTHING
+  }
+
+  /**
+   * @return true   When 'node's argument types are 'ARG_TYPE_1' and 'ARG_TYPE_2'
+   *                In such case, it assign '_arg_1' and '_arg_2' to actual arguments
+   *
+   * @return false  When 'node's argument types are NOT matched with 'ARG_TYPE_*'
+   *                In such case, it does not amend '_arg_1' and '_arg_2'
+   *
+   * @require       COMM_NODE has member x() and y()
+   */
+  template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
+
+  /**
+   * @note Similar as with_commutative_args_of but not commutative.
+   *       _arg_1 and _arg_2 must match that of ARG_TYPE_1 and ARG_TYPE_2.
+   */
+  template <class COMM_NODE> bool with_args_of(const COMM_NODE *node);
+
+private:
+  ARG_TYPE_1 **_arg_1;
+  ARG_TYPE_2 **_arg_2;
+};
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
+{
+  return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};
+}
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+template <class COMM_NODE>
+bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)
+{
+  // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2
+  {
+    auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
+    auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
+
+    if (x && y)
+    {
+      *_arg_1 = x;
+      *_arg_2 = y;
+      return true;
+    }
+  }
+
+  // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1
+  {
+    auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());
+    auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());
+
+    if (x && y)
+    {
+      *_arg_1 = y;
+      *_arg_2 = x;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+template <class COMM_NODE>
+bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_args_of(const COMM_NODE *node)
+{
+  // X == ARG_TYPE_1 / Y == ARG_TYPE_2
+  {
+    auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
+    auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
+
+    if (x && y)
+    {
+      *_arg_1 = x;
+      *_arg_2 = y;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/NodeFiller.test.cpp b/compiler/luci/pass/src/helpers/NodeFiller.test.cpp
new file mode 100644
index 000000000..9bbc7f264
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/NodeFiller.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+#include "NodeFiller.h"
+
+TEST(NodeFillerTest, simple_test)
+{
+  luci::CircleConst maxi_const;
+  luci::CircleMinimum mini;
+  luci::CircleMaximum maxi;
+  maxi.x(&maxi_const);
+  maxi.y(&mini);
+
+  luci::CircleConst *x = nullptr;
+  luci::CircleMinimum *y = nullptr;
+
+  EXPECT_TRUE(luci::fill(&x, &y).with_commutative_args_of(&maxi));
+  EXPECT_TRUE(x == &maxi_const);
+  EXPECT_TRUE(y == &mini);
+
+  x = nullptr;
+  y = nullptr;
+
+  EXPECT_TRUE(luci::fill(&y, &x).with_commutative_args_of(&maxi));
+  EXPECT_TRUE(x == &maxi_const);
+  EXPECT_TRUE(y == &mini);
+}
+
+TEST(NodeFillerTest, wrong_condition_NEG)
+{
+  luci::CircleConst add_const;
+  luci::CircleMinimum mini;
+  luci::CircleAdd add;
+  add.x(&add_const);
+  add.y(&mini);
+
+  luci::CircleMul *x = nullptr;
+  luci::CircleMinimum *y = nullptr;
+
+  EXPECT_FALSE(luci::fill(&x, &y).with_commutative_args_of(&add));
+  EXPECT_FALSE(luci::fill(&y, &x).with_commutative_args_of(&add));
+}
diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp
new file mode 100644
index 000000000..c15df2a6c
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+#include "SparsityFormatConverter.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace sparsity
+{
+
+namespace
+{
+
+uint64_t GetFlattenedIndex(const std::vector<int> &indices, const std::vector<int> &shape)
+{
+  uint64_t index = 0;
+  int sub_elements = 1;
+  for (int i = shape.size() - 1; i >= 0; i--)
+  {
+    assert(indices[i] >= 0);
+    assert(sub_elements >= 0);
+    index += static_cast<uint64_t>(indices[i]) * static_cast<uint64_t>(sub_elements);
+    sub_elements *= shape[i];
+  }
+  return index;
+}
+
+std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray *int_array)
+{
+  std::vector<int> values;
+  if (!int_array)
+  {
+    return values;
+  }
+
+  values.resize(int_array->size);
+  for (int i = 0; i < int_array->size; i++)
+  {
+    values[i] = int_array->data[i];
+  }
+
+  return values;
+}
+
+} // namespace
+
+template <typename T>
+FormatConverter<T>::FormatConverter(const std::vector<int> &shape, const TfLiteSparsity &sparsity)
+{
+  auto traversal_order = TfLiteIntArrayToVector(sparsity.traversal_order);
+  auto block_map = TfLiteIntArrayToVector(sparsity.block_map);
+
+  std::vector<TfLiteDimensionType> format(sparsity.dim_metadata_size);
+  std::vector<int> dense_size(sparsity.dim_metadata_size);
+  std::vector<std::vector<int>> segments(sparsity.dim_metadata_size);
+  std::vector<std::vector<int>> indices(sparsity.dim_metadata_size);
+  for (int i = 0; i < sparsity.dim_metadata_size; i++)
+  {
+    format[i] = sparsity.dim_metadata[i].format;
+    dense_size[i] = sparsity.dim_metadata[i].dense_size;
+    segments[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_segments);
+    indices[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_indices);
+  }
+
+  InitSparseToDenseConverter(shape, std::move(traversal_order), std::move(format),
+                             std::move(dense_size), std::move(segments), std::move(indices),
+                             std::move(block_map));
+}
+
+template <typename T>
+void FormatConverter<T>::InitSparseToDenseConverter(
+  std::vector<int> shape, std::vector<int> traversal_order, std::vector<TfLiteDimensionType> format,
+  std::vector<int> dense_size, std::vector<std::vector<int>> segments,
+  std::vector<std::vector<int>> indices, std::vector<int> block_map)
+{
+  dense_shape_ = std::move(shape);
+  traversal_order_ = std::move(traversal_order);
+  block_map_ = std::move(block_map);
+  format_ = std::move(format);
+
+  dense_size_ = 1;
+  for (size_t i = 0; i < dense_shape_.size(); i++)
+  {
+    dense_size_ *= dense_shape_[i];
+  }
+
+  dim_metadata_.resize(2 * format_.size());
+  for (size_t i = 0; i < format_.size(); i++)
+  {
+    if (format_[i] == kTfLiteDimDense)
+    {
+      dim_metadata_[2 * i] = {dense_size[i]};
+    }
+    else
+    {
+      dim_metadata_[2 * i] = std::move(segments[i]);
+      dim_metadata_[2 * i + 1] = std::move(indices[i]);
+    }
+  }
+
+  int original_rank = dense_shape_.size();
+  int block_dim = 0;
+
+  blocked_shape_.resize(original_rank);
+  block_size_.resize(block_map_.size());
+  for (int i = 0; i < original_rank; i++)
+  {
+    if (block_dim < (int)block_map_.size() && block_map_[block_dim] == i)
+    {
+      if (original_rank + block_dim < (int)traversal_order_.size())
+      {
+        int orig_dim = traversal_order_[original_rank + block_dim];
+        block_size_[block_dim] = dense_size[orig_dim];
+        blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim];
+        block_dim++;
+      }
+    }
+    else
+    {
+      blocked_shape_[i] = dense_shape_[i];
+    }
+  }
+}
+
+template <typename T>
+void FormatConverter<T>::Populate(const T *src_data, std::vector<int> indices, int level,
+                                  int prev_idx, int *src_data_ptr, T *dest_data)
+{
+  if (static_cast<size_t>(level) == indices.size())
+  {
+    int orig_rank = dense_shape_.size();
+    std::vector<int> orig_idx;
+    orig_idx.resize(orig_rank);
+    int i = 0;
+    for (; static_cast<size_t>(i) < orig_idx.size(); i++)
+    {
+      int orig_dim = traversal_order_[i];
+      orig_idx[orig_dim] = indices[i];
+    }
+
+    for (; static_cast<size_t>(i) < indices.size(); i++)
+    {
+      const int block_idx = traversal_order_[i] - orig_rank;
+      const int orig_dim = block_map_[block_idx];
+      orig_idx[orig_dim] = orig_idx[orig_dim] * block_size_[block_idx] + indices[i];
+    }
+
+    dest_data[GetFlattenedIndex(orig_idx, dense_shape_)] = src_data[*src_data_ptr];
+
+    *src_data_ptr = *src_data_ptr + 1;
+    return;
+  }
+
+  const int metadata_idx = 2 * level;
+  const int shape_of_level = dim_metadata_[metadata_idx][0];
+  if (format_[level] == kTfLiteDimDense)
+  {
+    for (int i = 0; i < shape_of_level; i++)
+    {
+      indices[level] = i;
+      Populate(src_data, indices, level + 1, prev_idx * shape_of_level + i, src_data_ptr,
+               dest_data);
+    }
+  }
+  else if (static_cast<size_t>(prev_idx + 1) < dim_metadata_[metadata_idx].size())
+  {
+    const auto &array_segments = dim_metadata_[metadata_idx];
+    const auto &array_indices = dim_metadata_[metadata_idx + 1];
+    for (int i = array_segments[prev_idx]; i < array_segments[prev_idx + 1]; i++)
+    {
+      if (static_cast<size_t>(i) < array_indices.size() &&
+          static_cast<size_t>(level) < indices.size())
+      {
+        indices[level] = array_indices[i];
+        Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data);
+      }
+    }
+  }
+}
+
+template <typename T> bool FormatConverter<T>::SparseToDense(const T *src_data)
+{
+  data_.resize(dense_size_);
+  std::fill(data_.begin(), data_.end(), T(0));
+
+  int total_rank = traversal_order_.size();
+  int src_data_ptr = 0;
+  std::vector<int> indices(total_rank);
+  Populate(src_data, indices, 0, 0, &src_data_ptr, data_.data());
+
+  return true;
+}
+
+template class FormatConverter<float>;
+template class FormatConverter<uint16_t>;
+
+} // namespace sparsity
+
+#include <luci/IR/SparsityParam.h>
+
+namespace luci
+{
+
+sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt)
+{
+  switch (dt)
+  {
+    case luci::DimensionType::DENSE:
+      return sparsity::TfLiteDimensionType::kTfLiteDimDense;
+    case luci::DimensionType::SPARSE_CSR:
+      return sparsity::TfLiteDimensionType::kTfLiteDimSparseCSR;
+  }
+  return sparsity::TfLiteDimensionType::kTfLiteDimDense;
+}
+
+sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data)
+{
+  auto type = data.type();
+  switch (type)
+  {
+    case luci::SparseIndexVectorType::NONE:
+    {
+      std::vector<int32_t> empty;
+      return makeTfLiteArray(empty);
+    }
+    case luci::SparseIndexVectorType::I32:
+      return makeTfLiteArray<int32_t>(*data.as_int32_vector());
+    case luci::SparseIndexVectorType::U16:
+      return makeTfLiteArray<uint16_t>(*data.as_uint16_vector());
+    case luci::SparseIndexVectorType::U8:
+      return makeTfLiteArray<uint8_t>(*data.as_uint8_vector());
+    default:
+      INTERNAL_EXN_V("unsupported SparseIndexVectorType", oops::to_uint32(type));
+  }
+}
+
+sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp)
+{
+  sparsity::TfLiteSparsity tflsp;
+  tflsp.traversal_order = makeTfLiteArray(sp->traversal_order);
+  tflsp.block_map = makeTfLiteArray(sp->block_map);
+  tflsp.dim_metadata = makeTfLiteDimensionMetadata(sp->dim_metadata);
+  tflsp.dim_metadata_size = sp->dim_metadata.size();
+  return tflsp;
+}
+
+template <typename T> sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector<T> &data)
+{
+  size_t cn = data.size();
+  size_t sz = 1 + data.size();
+  sparsity::TfLiteIntArray *sp = (sparsity::TfLiteIntArray *)(new int[sz]);
+  sp->size = cn;
+  for (size_t i = 0; i < cn; ++i)
+  {
+    sp->data[i] = data[i];
+  }
+  return sp;
+}
+
+sparsity::TfLiteDimensionMetadata *
+makeTfLiteDimensionMetadata(const std::vector<luci::DimMetaData> &data)
+{
+  size_t cn = data.size();
+  sparsity::TfLiteDimensionMetadata *tfldm = new sparsity::TfLiteDimensionMetadata[cn];
+
+  for (size_t i = 0; i < cn; ++i)
+  {
+    tfldm[i].format = to_tflite_sparsity(data[i].format());
+    tfldm[i].dense_size = data[i].dense_size();
+    tfldm[i].array_segments = to_tflite_sparsity(data[i].array_segments());
+    tfldm[i].array_indices = to_tflite_sparsity(data[i].array_indices());
+  }
+
+  return tfldm;
+}
+
+void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp)
+{
+  assert(tflsp.traversal_order);
+  assert(tflsp.block_map);
+  delete[] tflsp.traversal_order;
+  delete[] tflsp.block_map;
+
+  for (int i = 0; i < tflsp.dim_metadata_size; ++i)
+  {
+    assert(tflsp.dim_metadata[i].array_segments);
+    assert(tflsp.dim_metadata[i].array_indices);
+    delete[] tflsp.dim_metadata[i].array_segments;
+    delete[] tflsp.dim_metadata[i].array_indices;
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.h b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h
new file mode 100644
index 000000000..e01430489
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
+#define __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+// Storage format of each dimension in a sparse tensor.
+typedef enum TfLiteDimensionType
+{
+  kTfLiteDimDense = 0,
+  kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
+// indices
+typedef struct TfLiteIntArray
+{
+  int size;
+  int data[];
+} TfLiteIntArray;
+
+// Metadata to encode each dimension in a sparse tensor.
+typedef struct TfLiteDimensionMetadata
+{
+  TfLiteDimensionType format;
+  int dense_size;
+  TfLiteIntArray *array_segments;
+  TfLiteIntArray *array_indices;
+} TfLiteDimensionMetadata;
+
+// Parameters used to encode a sparse tensor. For detailed explanation of each
+// field please refer to lite/schema/schema.fbs.
+typedef struct TfLiteSparsity
+{
+  TfLiteIntArray *traversal_order;
+  TfLiteIntArray *block_map;
+  TfLiteDimensionMetadata *dim_metadata;
+  int dim_metadata_size;
+} TfLiteSparsity;
+
+// A converter that keeps an internal representation of sparse tensor parameters
+// and converts tensors between dense and sparse formats.
+template <typename T> class FormatConverter
+{
+public:
+  /* Creates a sparse to dense converter.
+   * @param shape      Shape of the target dense tensor.
+   * @param sparsity   Sparsity parameter of the sparse TfLiteTensor.
+   */
+  FormatConverter(const std::vector<int> &shape, const TfLiteSparsity &sparsity);
+
+  const std::vector<T> &GetData() { return data_; }
+  const std::vector<std::vector<int>> &GetDimMetadata() { return dim_metadata_; }
+
+  bool SparseToDense(const T *src_data);
+
+private:
+  // Helper function for initializing this converter for sparse to dense
+  // conversion.
+  void InitSparseToDenseConverter(std::vector<int> shape, std::vector<int> traversal_order,
+                                  std::vector<TfLiteDimensionType> format,
+                                  std::vector<int> dense_size,
+                                  std::vector<std::vector<int>> segments,
+                                  std::vector<std::vector<int>> indices,
+                                  std::vector<int> block_map);
+
+  void Populate(const T *src_data, std::vector<int> indices, int level, int prev_idx,
+                int *src_data_ptr, T *dest_data);
+
+private:
+  std::vector<int> dense_shape_;
+  std::vector<int> blocked_shape_;
+  size_t dense_size_;
+  std::vector<int> traversal_order_;
+  std::vector<TfLiteDimensionType> format_;
+  std::vector<int> block_size_;
+  std::vector<int> block_map_;
+  std::vector<std::vector<int>> dim_metadata_;
+  std::vector<T> data_;
+};
+
+extern template class FormatConverter<float>;
+extern template class FormatConverter<uint16_t>;
+
+} // namespace sparsity
+
+#include <luci/IR/SparsityParam.h>
+
+namespace luci
+{
+
+sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt);
+sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data);
+sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp);
+
+template <typename T> sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector<T> &data);
+sparsity::TfLiteDimensionMetadata *
+makeTfLiteDimensionMetadata(const std::vector<luci::DimMetaData> &data);
+
+void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
diff --git a/compiler/luci/pass/src/helpers/Strings.cpp b/compiler/luci/pass/src/helpers/Strings.cpp
new file mode 100644
index 000000000..2628726c1
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/Strings.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Strings.h"
+
+#include <algorithm>
+
+namespace luci
+{
+
+bool in_array(const std::string &str, const std::vector<std::string> &array)
+{
+  return std::find(array.begin(), array.end(), str) != array.end();
+}
+
+std::string to_string(const std::vector<std::string> &strings)
+{
+  assert(!strings.empty());
+
+  std::string res;
+  for (unsigned int i = 0; i < strings.size() - 1; i++)
+    res += strings[i] + ", ";
+
+  res += strings[strings.size() - 1];
+  return res;
+}
+
+std::string to_lower_case(std::string s)
+{
+  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
+  return s;
+}
+
+loco::DataType str_to_dtype(const std::string &str)
+{
+  if (to_lower_case(str).compare("uint8") == 0)
+    return loco::DataType::U8;
+  if (to_lower_case(str).compare("uint16") == 0)
+    return loco::DataType::U16;
+  if (to_lower_case(str).compare("uint32") == 0)
+    return loco::DataType::U32;
+  if (to_lower_case(str).compare("uint64") == 0)
+    return loco::DataType::U64;
+
+  if (to_lower_case(str).compare("int8") == 0)
+    return loco::DataType::S8;
+  if (to_lower_case(str).compare("int16") == 0)
+    return loco::DataType::S16;
+  if (to_lower_case(str).compare("int32") == 0)
+    return loco::DataType::S32;
+  if (to_lower_case(str).compare("int64") == 0)
+    return loco::DataType::S64;
+
+  if (to_lower_case(str).compare("float16") == 0)
+    return loco::DataType::FLOAT16;
+  if (to_lower_case(str).compare("float32") == 0)
+    return loco::DataType::FLOAT32;
+  if (to_lower_case(str).compare("float64") == 0)
+    return loco::DataType::FLOAT64;
+
+  if (to_lower_case(str).compare("bool") == 0)
+    return loco::DataType::BOOL;
+
+  return loco::DataType::Unknown;
+}
+
+// Convert string to a vector of loco::DataType
+std::vector<loco::DataType> str_vec_to_dtype_vec(std::vector<std::string> &vec)
+{
+  std::vector<loco::DataType> res;
+  std::transform(vec.begin(), vec.end(), std::back_inserter(res),
+                 [](std::string s) -> loco::DataType { return str_to_dtype(to_lower_case(s)); });
+  return res;
+}
+
+QuantizationGranularity str_to_granularity(const std::string &str)
+{
+  if (to_lower_case(str).compare("layer") == 0)
+    return QuantizationGranularity::LayerWise;
+
+  if (to_lower_case(str).compare("channel") == 0)
+    return QuantizationGranularity::ChannelWise;
+
+  throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/Strings.h b/compiler/luci/pass/src/helpers/Strings.h
new file mode 100644
index 000000000..485f37948
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/Strings.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_STRINGS_H__
+#define __LUCI_PASS_HELPERS_STRINGS_H__
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <loco.h>
+
+#include <vector>
+#include <sstream>
+#include <string>
+
+namespace luci
+{
+
+bool in_array(const std::string &, const std::vector<std::string> &);
+
+std::string to_string(const std::vector<std::string> &);
+
+std::string to_lower_case(std::string);
+
+loco::DataType str_to_dtype(const std::string &);
+
+std::vector<loco::DataType> str_vec_to_dtype_vec(std::vector<std::string> &);
+
+QuantizationGranularity str_to_granularity(const std::string &);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_STRINGS_H__
diff --git a/compiler/luci/pass/src/helpers/Strings.test.cpp b/compiler/luci/pass/src/helpers/Strings.test.cpp
new file mode 100644
index 000000000..6d854ad4f
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/Strings.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Strings.h"
+
+#include "luci/Pass/QuantizationParameters.h"
+
+#include <gtest/gtest.h>
+
+TEST(StringsTest, str_to_dtype)
+{
+  ASSERT_EQ(loco::DataType::U8, luci::str_to_dtype("uint8"));
+  ASSERT_EQ(loco::DataType::U16, luci::str_to_dtype("uint16"));
+  ASSERT_EQ(loco::DataType::U32, luci::str_to_dtype("uint32"));
+  ASSERT_EQ(loco::DataType::U64, luci::str_to_dtype("uint64"));
+
+  ASSERT_EQ(loco::DataType::S8, luci::str_to_dtype("int8"));
+  ASSERT_EQ(loco::DataType::S16, luci::str_to_dtype("int16"));
+  ASSERT_EQ(loco::DataType::S32, luci::str_to_dtype("int32"));
+  ASSERT_EQ(loco::DataType::S64, luci::str_to_dtype("int64"));
+
+  ASSERT_EQ(loco::DataType::FLOAT16, luci::str_to_dtype("float16"));
+  ASSERT_EQ(loco::DataType::FLOAT32, luci::str_to_dtype("float32"));
+  ASSERT_EQ(loco::DataType::FLOAT64, luci::str_to_dtype("float64"));
+
+  ASSERT_EQ(loco::DataType::BOOL, luci::str_to_dtype("bool"));
+
+  ASSERT_EQ(loco::DataType::Unknown, luci::str_to_dtype("foo"));
+}
+
+TEST(StringsTest, str_to_granularity)
+{
+  ASSERT_EQ(luci::QuantizationGranularity::LayerWise, luci::str_to_granularity("layer"));
+  ASSERT_EQ(luci::QuantizationGranularity::ChannelWise, luci::str_to_granularity("channel"));
+
+  EXPECT_THROW(luci::str_to_granularity("foo"), std::runtime_error);
+}
+
+TEST(StringsTest, str_vec_to_dtype_vec)
+{
+  std::vector<std::string> input1 = {"uint8", "int16", "float32"};
+  auto result1 = luci::str_vec_to_dtype_vec(input1);
+  ASSERT_EQ(3, result1.size());
+  ASSERT_EQ(loco::DataType::U8, result1[0]);
+  ASSERT_EQ(loco::DataType::S16, result1[1]);
+  ASSERT_EQ(loco::DataType::FLOAT32, result1[2]);
+
+  std::vector<std::string> input2 = {"uint8", "int16", "float32", ""};
+  auto result2 = luci::str_vec_to_dtype_vec(input2);
+  ASSERT_EQ(4, result2.size());
+  ASSERT_EQ(loco::DataType::U8, result2[0]);
+  ASSERT_EQ(loco::DataType::S16, result2[1]);
+  ASSERT_EQ(loco::DataType::FLOAT32, result2[2]);
+  ASSERT_EQ(loco::DataType::Unknown, result2[3]);
+
+  std::vector<std::string> input3 = {"uint8"};
+  auto result3 = luci::str_vec_to_dtype_vec(input3);
+  ASSERT_EQ(1, result3.size());
+  ASSERT_EQ(loco::DataType::U8, result3[0]);
+}
diff --git a/compiler/luci/pass/src/helpers/TypeMapper.cpp b/compiler/luci/pass/src/helpers/TypeMapper.cpp
new file mode 100644
index 000000000..ffa0159dd
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/TypeMapper.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TypeMapper.h"
+
+// NOTE Do NOT delete this file; this file enforces compiler to check whether 'TypeMapper.h' is
+//      complete.
diff --git a/compiler/luci/pass/src/helpers/TypeMapper.h b/compiler/luci/pass/src/helpers/TypeMapper.h
new file mode 100644
index 000000000..a3e27d259
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/TypeMapper.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_TYPE_MAPPER_H__
+#define __LUCI_PASS_HELPERS_TYPE_MAPPER_H__
+
+#include <loco/IR/DataType.h>
+
+#include <cstdint>
+
+namespace luci
+{
+
+/**
+ * @brief TypeMapper maps between c++ primitive data type and loco::DataType.
+ */
+template <typename T> struct TypeMapper
+{
+  static constexpr loco::DataType get() { return loco::DataType::Unknown; }
+};
+
+template <> struct TypeMapper<float>
+{
+  static constexpr loco::DataType get() { return loco::DataType::FLOAT32; }
+};
+
+template <> struct TypeMapper<uint8_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::U8; }
+};
+
+template <> struct TypeMapper<uint16_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::U16; }
+};
+
+template <> struct TypeMapper<uint32_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::U32; }
+};
+
+template <> struct TypeMapper<uint64_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::U64; }
+};
+
+template <> struct TypeMapper<int8_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::S8; }
+};
+
+template <> struct TypeMapper<int16_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::S16; }
+};
+
+template <> struct TypeMapper<int32_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::S32; }
+};
+
+template <> struct TypeMapper<int64_t>
+{
+  static constexpr loco::DataType get() { return loco::DataType::S64; }
+};
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_TYPE_MAPPER_H__
diff --git a/compiler/luci/pass/src/helpers/TypeMapper.test.cpp b/compiler/luci/pass/src/helpers/TypeMapper.test.cpp
new file mode 100644
index 000000000..a7ac08a63
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/TypeMapper.test.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+#include "TypeMapper.h"
+
+#include <vector>
+
+namespace
+{
+
+template <typename T> bool fill_const_node(luci::CircleConst *node, std::vector<T> &data)
+{
+  if (node->dtype() != luci::TypeMapper<T>::get())
+    return false;
+
+  node->size<luci::TypeMapper<T>::get()>(data.size());
+  for (uint32_t i = 0; i < data.size(); i++)
+  {
+    node->at<luci::TypeMapper<T>::get()>(i) = data.at(i);
+  }
+
+  return true;
+}
+
+class STRANGER
+{
+};
+
+} // namespace
+
+TEST(TypeMapperTest, simple_test)
+{
+  EXPECT_EQ(loco::DataType::FLOAT32, luci::TypeMapper<float>::get());
+  EXPECT_EQ(loco::DataType::U8, luci::TypeMapper<uint8_t>::get());
+  EXPECT_EQ(loco::DataType::U16, luci::TypeMapper<uint16_t>::get());
+  EXPECT_EQ(loco::DataType::U32, luci::TypeMapper<uint32_t>::get());
+  EXPECT_EQ(loco::DataType::U64, luci::TypeMapper<uint64_t>::get());
+  EXPECT_EQ(loco::DataType::S8, luci::TypeMapper<int8_t>::get());
+  EXPECT_EQ(loco::DataType::S16, luci::TypeMapper<int16_t>::get());
+  EXPECT_EQ(loco::DataType::S32, luci::TypeMapper<int32_t>::get());
+  EXPECT_EQ(loco::DataType::S64, luci::TypeMapper<int64_t>::get());
+}
+
+TEST(TypeMapperTest, with_template_test)
+{
+  std::vector<int32_t> int32_vec{0, 1, 2, 3, 4, 5, 6, 7};
+  luci::CircleConst const_node;
+  const_node.dtype(loco::DataType::S32);
+  EXPECT_TRUE(fill_const_node(&const_node, int32_vec));
+  EXPECT_EQ(8, const_node.size<loco::DataType::S32>());
+  EXPECT_EQ(0, const_node.at<loco::DataType::S32>(0));
+  EXPECT_EQ(1, const_node.at<loco::DataType::S32>(1));
+  EXPECT_EQ(2, const_node.at<loco::DataType::S32>(2));
+  EXPECT_EQ(3, const_node.at<loco::DataType::S32>(3));
+  EXPECT_EQ(4, const_node.at<loco::DataType::S32>(4));
+  EXPECT_EQ(5, const_node.at<loco::DataType::S32>(5));
+  EXPECT_EQ(6, const_node.at<loco::DataType::S32>(6));
+  EXPECT_EQ(7, const_node.at<loco::DataType::S32>(7));
+
+  std::vector<float> f32_vec{0.0, 1.1, 2.2, 3.3, 4.4, 5.5};
+  const_node.dtype(loco::DataType::FLOAT32);
+  EXPECT_FALSE(fill_const_node(&const_node, int32_vec));
+  EXPECT_TRUE(fill_const_node(&const_node, f32_vec));
+  EXPECT_EQ(6, const_node.size<loco::DataType::FLOAT32>());
+  EXPECT_FLOAT_EQ(0.0, const_node.at<loco::DataType::FLOAT32>(0));
+  EXPECT_FLOAT_EQ(1.1, const_node.at<loco::DataType::FLOAT32>(1));
+  EXPECT_FLOAT_EQ(2.2, const_node.at<loco::DataType::FLOAT32>(2));
+  EXPECT_FLOAT_EQ(3.3, const_node.at<loco::DataType::FLOAT32>(3));
+  EXPECT_FLOAT_EQ(4.4, const_node.at<loco::DataType::FLOAT32>(4));
+  EXPECT_FLOAT_EQ(5.5, const_node.at<loco::DataType::FLOAT32>(5));
+}
+
+TEST(TypeMapperTest, wrong_condition_NEG)
+{
+  EXPECT_EQ(loco::DataType::Unknown, luci::TypeMapper<STRANGER>::get());
+}
diff --git a/compiler/luci/pass/src/test/TestFirstNode.h b/compiler/luci/pass/src/test/TestFirstNode.h
new file mode 100644
index 000000000..21f859fcd
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestFirstNode.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_FIRST_NODE_H__
+#define __LUCI_PASS_TEST_FIRST_NODE_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco.h>
+
+namespace luci
+{
+namespace test
+{
+
+template <class T> T *first_node(loco::Graph *g)
+{
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto target_node = dynamic_cast<T *>(node);
+    if (target_node != nullptr)
+      return target_node;
+  }
+  return nullptr;
+}
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_FIRST_NODE_H__
diff --git a/compiler/luci/pass/src/test/TestFirstNode.test.cpp b/compiler/luci/pass/src/test/TestFirstNode.test.cpp
new file mode 100644
index 000000000..b07ac6199
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestFirstNode.test.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestFirstNode.h"
+
+// This file validates "TestFirstNode.h". Pleaes DO NOT remove this file.
diff --git a/compiler/luci/pass/src/test/TestShape.h b/compiler/luci/pass/src/test/TestShape.h
new file mode 100644
index 000000000..ccc55c9da
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestShape.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_TEST_SHAPE_H__
+#define __LUCI_PASS_TEST_SHAPE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <initializer_list>
+
+namespace luci
+{
+namespace test
+{
+
+using ShapeU32 = std::initializer_list<uint32_t>;
+using ShapeI32 = std::initializer_list<int32_t>;
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values);
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values);
+
+uint32_t num_elements(const ShapeU32 shape);
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_PASS_TEST_SHAPE_H__
diff --git a/compiler/luci/pass/src/test/TestShape.test.cpp b/compiler/luci/pass/src/test/TestShape.test.cpp
new file mode 100644
index 000000000..39790c614
--- /dev/null
+++ b/compiler/luci/pass/src/test/TestShape.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestShape.h"
+
+/**
+ * @note This file does not hold any test cases but provides methods for tests
+ */
+
+namespace luci
+{
+namespace test
+{
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values)
+{
+  uint32_t r = 0;
+  shape->rank(values.size());
+  for (auto v : values)
+    shape->dim(r++).set(v);
+}
+
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values)
+{
+  const_node->rank(1);
+  const_node->dim(0).set(values.size());
+  const_node->shape_status(luci::ShapeStatus::VALID);
+  const_node->dtype(loco::DataType::S32);
+  const_node->size<loco::DataType::S32>(values.size());
+  uint32_t idx = 0;
+  for (auto val : values)
+    const_node->at<loco::DataType::S32>(idx++) = val;
+}
+
+uint32_t num_elements(const ShapeU32 shape)
+{
+  uint32_t result = 1;
+  for (auto val : shape)
+    result = result * val;
+  return result;
+}
+
+} // namespace test
+} // namespace luci
diff --git a/compiler/luci/plan/CMakeLists.txt b/compiler/luci/plan/CMakeLists.txt
new file mode 100644
index 000000000..d4c8f6391
--- /dev/null
+++ b/compiler/luci/plan/CMakeLists.txt
@@ -0,0 +1,26 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+if (NOT LUCI_LIBRARY_TYPE)
+    set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_plan ${LUCI_LIBRARY_TYPE} ${SOURCES})
+target_include_directories(luci_plan PRIVATE src)
+target_include_directories(luci_plan PUBLIC include)
+target_link_libraries(luci_plan PUBLIC loco)
+target_link_libraries(luci_plan PUBLIC luci_lang)
+
+install(TARGETS luci_plan DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+    return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_plan_test ${TESTS})
+target_link_libraries(luci_plan_test luci_plan)
diff --git a/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h b/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h
new file mode 100644
index 000000000..fe966e35e
--- /dev/null
+++ b/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
+#define __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <utility>
+
+namespace luci
+{
+
+class CircleNodeExecutionPlan
+{
+public:
+  CircleNodeExecutionPlan() = delete;
+
+  CircleNodeExecutionPlan(uint32_t order_in_plan, std::vector<uint32_t> offsets)
+  {
+    _order_in_plan = order_in_plan;
+    _offsets = std::move(offsets);
+  }
+
+  uint32_t order_in_plan(void) const { return _order_in_plan; }
+  void order_in_plan(const uint32_t &order_in_plan) { _order_in_plan = order_in_plan; }
+
+  std::vector<uint32_t> offsets(void) const { return _offsets; }
+  void offsets(const std::vector<uint32_t> &offsets) { _offsets = offsets; }
+
+private:
+  uint32_t _order_in_plan = 0;
+  std::vector<uint32_t> _offsets;
+};
+
+bool has_execution_plan(const luci::CircleNode *circle_node);
+
+void add_execution_plan(luci::CircleNode *circle_node,
+                        const luci::CircleNodeExecutionPlan &execution_plan);
+
+luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
diff --git a/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp b/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp
new file mode 100644
index 000000000..a02ebc452
--- /dev/null
+++ b/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Plan/CircleNodeExecutionPlan.h"
+
+#include <loco.h>
+
+#include <stdexcept>
+#include <utility>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for circle node execution plan
+ * @note  Once CircleExecutionPlanAnnotation is annotated, it should not be changed.
+ *        If CircleExecutionPlanAnnotation is needed to be changed, create
+ *        new CircleExecutionPlanAnnotation.
+ */
+class CircleExecutionPlanAnnotation final : public loco::NodeAnnotation
+{
+public:
+  CircleExecutionPlanAnnotation() = delete;
+
+  explicit CircleExecutionPlanAnnotation(luci::CircleNodeExecutionPlan execution_plan)
+    : _execution_plan{std::move(execution_plan)}
+  {
+    // Do nothing
+  }
+
+public:
+  const luci::CircleNodeExecutionPlan &execution_plan(void) const { return _execution_plan; }
+  // No setter
+
+private:
+  luci::CircleNodeExecutionPlan _execution_plan;
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool has_execution_plan(const luci::CircleNode *circle_node)
+{
+  return circle_node->annot<CircleExecutionPlanAnnotation>() != nullptr;
+}
+
+void add_execution_plan(luci::CircleNode *circle_node,
+                        const luci::CircleNodeExecutionPlan &execution_plan)
+{
+  circle_node->annot<CircleExecutionPlanAnnotation>(nullptr);
+  circle_node->annot(std::make_unique<CircleExecutionPlanAnnotation>(execution_plan));
+}
+
+luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node)
+{
+  if (!has_execution_plan(circle_node))
+    throw std::runtime_error("Cannot find CircleNodeExecutionPlanAnnotation");
+
+  return circle_node->annot<CircleExecutionPlanAnnotation>()->execution_plan();
+}
+
+} // namespace luci
diff --git a/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp b/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp
new file mode 100644
index 000000000..d7ccf255f
--- /dev/null
+++ b/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Plan/CircleNodeExecutionPlan.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+TEST(CircleNodeExecutionPlan, basic_fields)
+{
+  luci::CircleNodeExecutionPlan plan(123, {4, 5, 6, 7});
+
+  ASSERT_EQ(plan.order_in_plan(), 123);
+  ASSERT_THAT(plan.offsets(), testing::ElementsAre(4, 5, 6, 7));
+
+  plan.order_in_plan(321);
+  plan.offsets({1, 2, 3, 4});
+
+  ASSERT_EQ(plan.order_in_plan(), 321);
+  ASSERT_THAT(plan.offsets(), testing::ElementsAre(1, 2, 3, 4));
+}
+
+TEST(CircleNodeExecutionPlan, add_extract_plan)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(luci::has_execution_plan(add));
+
+  luci::CircleNodeExecutionPlan plan(123, {4, 5, 6, 7});
+  luci::add_execution_plan(add, plan);
+
+  ASSERT_TRUE(luci::has_execution_plan(add));
+
+  auto extracted_plan = luci::get_execution_plan(add);
+
+  ASSERT_EQ(extracted_plan.order_in_plan(), 123);
+  ASSERT_THAT(extracted_plan.offsets(), testing::ElementsAre(4, 5, 6, 7));
+}
+
+TEST(CircleNodeExecutionPlan, extract_plan_NEG)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(luci::has_execution_plan(add));
+
+  ASSERT_ANY_THROW(luci::get_execution_plan(add));
+}
+
+TEST(CircleNodeExecutionPlan, double_set_plan_NEG)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(luci::has_execution_plan(add));
+
+  luci::CircleNodeExecutionPlan plan1(123, {4, 5, 6, 7});
+  luci::add_execution_plan(add, plan1);
+  ASSERT_TRUE(luci::has_execution_plan(add));
+
+  luci::CircleNodeExecutionPlan plan2(321, {1, 2, 3, 4});
+  luci::add_execution_plan(add, plan2);
+  ASSERT_TRUE(luci::has_execution_plan(add));
+
+  auto extracted_plan = luci::get_execution_plan(add);
+  ASSERT_EQ(extracted_plan.order_in_plan(), 321);
+  ASSERT_THAT(extracted_plan.offsets(), testing::ElementsAre(1, 2, 3, 4));
+}
diff --git a/compiler/luci/profile/CMakeLists.txt b/compiler/luci/profile/CMakeLists.txt
new file mode 100644
index 000000000..f8a0cc005
--- /dev/null
+++ b/compiler/luci/profile/CMakeLists.txt
@@ -0,0 +1,28 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_profile ${LUCI_LIBRARY_TYPE} ${SOURCES})
+target_include_directories(luci_profile PRIVATE src)
+target_include_directories(luci_profile PUBLIC include)
+target_link_libraries(luci_profile PUBLIC loco)
+target_link_libraries(luci_profile PUBLIC luci_lang)
+
+install(TARGETS luci_profile DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_profile_test ${TESTS})
+target_include_directories(luci_profile_test PRIVATE src)
+target_link_libraries(luci_profile_test luci_lang)
+target_link_libraries(luci_profile_test luci_profile)
diff --git a/compiler/luci/profile/README.md b/compiler/luci/profile/README.md
new file mode 100644
index 000000000..577e60a7c
--- /dev/null
+++ b/compiler/luci/profile/README.md
@@ -0,0 +1,119 @@
+# luci-profile
+
+`luci-profile` provides profiling related items.
+
+## CircleNodeOrigin
+
+`CircleNodeOrigin` allow us know where some node is originated from.
+
+Let's assume following graph transformations are done.
+
+```
+    |                          |                         |
+ [node1] --------+             |                         |
+(id = 1)         |             |                         |
+    |            +--------> [node5] ----------------> [node6]
+    |            |     (origin = [1,2])          (origin = [1,2])
+ [node2] --------+             |                         |
+(id = 2)                       |                         |
+    |                          |                         |
+ [node3] -----------------> [node3] --------+-------> [node3]
+(id = 3)                (origin = [3])      |    (origin = [3,4])
+    |                          |            |            |
+ [node4] -----------------> [node4] --------+            |
+(id = 4)                (origin = [4])                   |
+    |                          |                         |
+
+<Circle1> -- optimizer --> <circle2> -- quantizer --> <circle3>
+```
+
+The most important purpose of using `CircleNodeOrigin` is preserving origin information.
+Following changes show how origin information is preserved even after graph is transformed.
+
+- `node3`
+  - `node4` is absorbed to **existing** `node3`.
+  - origin of `node4` is absorbed to origin of `node3`.
+- `node5`
+  - `node1` and `node2` are fused to **newly created** `node5`.
+  - origin of `node1` and `node2` are inherited to origin of `node4`.
+- `node6`
+   - `node5` is **replaced with newly created** `node6`.
+   - origin of `node5` is copied to origin of `node6`.
+
+**Therefore, when using `CircleNodeOrigin`, please aware of the most important principle. "Preserve origin information"**
+
+Next items are about implementation details to store the origin information.
+
+### Source Table
+
+Source table includes a set of id and name of origin node.
+
+#### Binary format
+
+```
+[ entry_number : uint32_t ]
+[ id : uint32_t ][ length : uint32_t ][ data : char * length ] * entry_number
+```
+- entry_number : The number of entries
+  - Each entry consists of id, length, and data.
+- id : ID of origin node
+- length : Length of data
+- data : Name of origin node **(null-terminated string)**
+
+#### In-memory format
+```cpp
+// size = entry_number
+std::map<uint32_t /* id */, std::string /* name */>
+```
+
+#### Example
+
+Following example means "Name of origin 1 is node1".
+
+```
+[Binary Format]
+ 0x01 00 00 00 0x01 00 00 00 0x06 00 00 00 0x6e 0x6f 0x64 0x65 0x31 00
+ ------------- ------------- ------------- ---- ---- ---- ---- ---- ----
+entry_number=1      id=1        length=6   'n'  'o'  'd'  'e'  '1'  '\0'
+```
+```cpp
+[In-memory Format]
+std::map<uint32_t, std::string>({1, "node1"});
+```
+
+### Op Table
+
+Op table includes a set of id of operation and id(s) of operation's origin nodes.
+
+#### Binary format
+
+Op table is stored in circle file as binary with following format.
+```
+[ entry_number : uint32_t ]
+[ id : uint32_t ][ node_num : uint32_t ][ node_ids : uint32_t * node_num ] * entry_number
+```
+- entry_number : The number of entries
+  - Each entry consists of id, node_num, and node_ids.
+- id : ID of operation in circle model file
+- node_num : The number of operation's origin nodes
+- node_ids : Set of IDs of origin nodes
+
+#### In-memory format
+```cpp
+std::map<uint32_t /* id */, std::set<uint32_t> /* node_ids */>
+```
+
+#### Example
+
+Following example means "Operation 5 is originated from origin 1 and origin 2".
+
+```
+[Binary Format]
+ 0x01 00 00 00 0x05 00 00 00 0x02 00 00 00 0x01 00 00 00 0x02 00 00 00
+ ------------- ------------- ------------- ---------------------------
+entry_number=1      id=5       node_num=2        node_ids : 1, 2
+```
+```cpp
+[In-memory Format]
+std::map<uint32_t, std::set<uint32_t>>({5, std::set{1, 2}});
+```
diff --git a/compiler/luci/profile/include/luci/Profile/CircleNodeID.h b/compiler/luci/profile/include/luci/Profile/CircleNodeID.h
new file mode 100644
index 000000000..165866bcf
--- /dev/null
+++ b/compiler/luci/profile/include/luci/Profile/CircleNodeID.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROFILE_CIRCLE_NODE_ID_H__
+#define __LUCI_PROFILE_CIRCLE_NODE_ID_H__
+
+#include <luci/IR/CircleNode.h>
+
+namespace luci
+{
+
+using CircleNodeID = uint32_t;
+
+bool has_node_id(const luci::CircleNode *circle_node);
+
+void set_node_id(luci::CircleNode *circle_node, CircleNodeID id);
+
+CircleNodeID get_node_id(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_PROFILE_CIRCLE_NODE_ID_H__
diff --git a/compiler/luci/profile/include/luci/Profile/CircleNodeOrigin.h b/compiler/luci/profile/include/luci/Profile/CircleNodeOrigin.h
new file mode 100644
index 000000000..2d6558c92
--- /dev/null
+++ b/compiler/luci/profile/include/luci/Profile/CircleNodeOrigin.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
+#define __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
+
+#include "CircleNodeID.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <set>
+
+namespace luci
+{
+
+class CircleNodeOrigin
+{
+protected:
+  struct Source
+  {
+  public:
+    std::string name(void) const { return _name; }
+    void name(const std::string &name) { _name = name; }
+
+    uint32_t id(void) const { return _id; }
+    void id(const uint32_t id) { _id = id; }
+
+  private:
+    std::string _name;
+    uint32_t _id = 0;
+  };
+
+public:
+  virtual std::set<const Source *> sources(void) const = 0;
+};
+
+std::shared_ptr<CircleNodeOrigin> single_origin(uint32_t id, const std::string &name);
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::initializer_list<std::shared_ptr<CircleNodeOrigin>> origins);
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::vector<std::shared_ptr<CircleNodeOrigin>> &origins);
+
+} // namespace luci
+
+namespace luci
+{
+
+bool has_origin(const luci::CircleNode *circle_node);
+
+void add_origin(luci::CircleNode *circle_node, const std::shared_ptr<CircleNodeOrigin> origin);
+
+// NOTE When circle_node does not have origin, nullptr is returned
+const std::shared_ptr<luci::CircleNodeOrigin> get_origin(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_PROFILE_CIRCLE_NODE_ORIGIN_H__
diff --git a/compiler/luci/profile/src/CircleNodeID.cpp b/compiler/luci/profile/src/CircleNodeID.cpp
new file mode 100644
index 000000000..750b36cae
--- /dev/null
+++ b/compiler/luci/profile/src/CircleNodeID.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+
+#include <loco.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for circle node id
+ * @note  Once CircleNodeID is annotated, it should not be changed.
+ *        If CircleNodeID is needed to be changed, create new CircleNodeID.
+ */
+class CircleNodeIDAnnotation final : public loco::NodeAnnotation
+{
+public:
+  CircleNodeIDAnnotation() = delete;
+
+  CircleNodeIDAnnotation(luci::CircleNodeID node_id) : _node_id{node_id}
+  {
+    // Do nothing
+  }
+
+public:
+  luci::CircleNodeID node_id(void) const { return _node_id; }
+  // No setter
+
+private:
+  luci::CircleNodeID _node_id;
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool has_node_id(const luci::CircleNode *circle_node)
+{
+  return circle_node->annot<CircleNodeIDAnnotation>() != nullptr;
+}
+
+void set_node_id(luci::CircleNode *circle_node, luci::CircleNodeID id)
+{
+  circle_node->annot<CircleNodeIDAnnotation>(nullptr);
+  circle_node->annot(std::make_unique<CircleNodeIDAnnotation>(id));
+}
+
+luci::CircleNodeID get_node_id(const luci::CircleNode *circle_node)
+{
+  if (!has_node_id(circle_node))
+    throw std::runtime_error("Cannot find CircleNodeID");
+
+  return circle_node->annot<CircleNodeIDAnnotation>()->node_id();
+}
+
+} // namespace luci
diff --git a/compiler/luci/profile/src/CircleNodeID.test.cpp b/compiler/luci/profile/src/CircleNodeID.test.cpp
new file mode 100644
index 000000000..d80c09b2c
--- /dev/null
+++ b/compiler/luci/profile/src/CircleNodeID.test.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(LuciCircleNodeID, simple_circle_node_id)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(has_node_id(add));
+
+  set_node_id(add, 3);
+
+  ASSERT_TRUE(has_node_id(add));
+  ASSERT_EQ(3, get_node_id(add));
+}
+
+TEST(LuciCircleNodeID, simple_circle_node_id_NEG)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(has_node_id(add));
+
+  ASSERT_ANY_THROW(get_node_id(add));
+}
diff --git a/compiler/luci/profile/src/CircleNodeOrigin.cpp b/compiler/luci/profile/src/CircleNodeOrigin.cpp
new file mode 100644
index 000000000..c1aeb66ae
--- /dev/null
+++ b/compiler/luci/profile/src/CircleNodeOrigin.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeOrigin.h"
+
+#include <loco.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for recording origin information
+ * @note  Once CircleNodeOrigin is annotated, it should not be changed.
+ *        If CircleNodeOrigin is needed to be changed, create new CircleNodeOrigin.
+ */
+class CircleNodeOriginAnnotation final : public loco::NodeAnnotation
+{
+public:
+  CircleNodeOriginAnnotation() = delete;
+
+  CircleNodeOriginAnnotation(const std::shared_ptr<luci::CircleNodeOrigin> origin) : _origin(origin)
+  {
+    // Do nothing
+  }
+
+public:
+  const std::shared_ptr<luci::CircleNodeOrigin> origin(void) const { return _origin; }
+  // No setter
+
+private:
+  const std::shared_ptr<luci::CircleNodeOrigin> _origin;
+};
+
+} // namespace
+
+namespace
+{
+
+class SingleOrigin final : public luci::CircleNodeOrigin
+{
+public:
+  SingleOrigin() = delete;
+
+  SingleOrigin(uint32_t id, const std::string &name)
+  {
+    _source.id(id);
+    _source.name(name);
+  }
+
+public:
+  std::set<const Source *> sources(void) const final
+  {
+    std::set<const Source *> res;
+    res.emplace(&_source);
+    return res;
+  }
+
+private:
+  Source _source;
+};
+
+class CompositeOrigin final : public luci::CircleNodeOrigin
+{
+public:
+  CompositeOrigin() = delete;
+
+  template <typename T> CompositeOrigin(T origins)
+  {
+    if (origins.size() == 0)
+      throw std::invalid_argument("No origins provided");
+
+    for (auto &origin : origins)
+    {
+      if (origin != nullptr)
+        _origins.emplace_back(origin);
+    }
+  }
+
+public:
+  std::set<const Source *> sources(void) const final
+  {
+    std::set<const Source *> res;
+
+    for (auto &origin : _origins)
+    {
+      for (auto source : origin->sources())
+      {
+        res.emplace(source);
+      }
+    }
+
+    return res;
+  }
+
+private:
+  std::vector<std::shared_ptr<CircleNodeOrigin>> _origins;
+};
+
+} // namespace
+
+namespace luci
+{
+
+std::shared_ptr<CircleNodeOrigin> single_origin(uint32_t id, const std::string &name)
+{
+  return std::make_shared<SingleOrigin>(id, name);
+}
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::initializer_list<std::shared_ptr<CircleNodeOrigin>> origins)
+{
+  auto origin = std::make_shared<CompositeOrigin>(origins);
+
+  // For empty source, no need to create origin
+  if (origin->sources().empty())
+    return nullptr;
+
+  return origin;
+}
+
+std::shared_ptr<CircleNodeOrigin>
+composite_origin(const std::vector<std::shared_ptr<CircleNodeOrigin>> &origins)
+{
+  auto origin = std::make_shared<CompositeOrigin>(origins);
+
+  // For empty source, no need to create origin
+  if (origin->sources().empty())
+    return nullptr;
+
+  return origin;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool has_origin(const luci::CircleNode *circle_node)
+{
+  if (circle_node->annot<CircleNodeOriginAnnotation>() == nullptr)
+    return false;
+
+  assert(!circle_node->annot<CircleNodeOriginAnnotation>()->origin()->sources().empty());
+
+  return true;
+}
+
+/**
+ * @brief 'origin' is added to the existing origin of circle_node.
+ * @note  If 'origin' is nullptr, nothing is changed.
+ *        For more detail, please refer to CompositeOrigin constructor.
+ */
+void add_origin(luci::CircleNode *circle_node, const std::shared_ptr<CircleNodeOrigin> origin)
+{
+  // Nothing to add
+  if (origin == nullptr)
+    return;
+
+  auto new_origin = composite_origin({get_origin(circle_node), origin});
+  circle_node->annot<CircleNodeOriginAnnotation>(nullptr);
+  circle_node->annot(std::make_unique<CircleNodeOriginAnnotation>(new_origin));
+}
+
+const std::shared_ptr<luci::CircleNodeOrigin> get_origin(const luci::CircleNode *circle_node)
+{
+  if (!has_origin(circle_node))
+    return nullptr;
+
+  assert(circle_node->annot<CircleNodeOriginAnnotation>()->origin() != nullptr);
+  return circle_node->annot<CircleNodeOriginAnnotation>()->origin();
+}
+
+} // namespace luci
diff --git a/compiler/luci/profile/src/CircleNodeOrigin.test.cpp b/compiler/luci/profile/src/CircleNodeOrigin.test.cpp
new file mode 100644
index 000000000..8748a5ce0
--- /dev/null
+++ b/compiler/luci/profile/src/CircleNodeOrigin.test.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Profile/CircleNodeID.h"
+#include "luci/Profile/CircleNodeOrigin.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+TEST(LuciCircleNodeOrigin, simple_single_origin)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(has_origin(add));
+
+  auto origin = luci::single_origin(3, "add");
+  add_origin(add, origin);
+
+  ASSERT_TRUE(has_origin(add));
+
+  auto sources = get_origin(add)->sources();
+  ASSERT_EQ(1, sources.size());
+  for (auto source : sources)
+  {
+    ASSERT_EQ(3, source->id());
+    ASSERT_EQ(0, source->name().compare("add"));
+  }
+}
+
+TEST(LuciCircleNodeOrigin, simple_composite_origin_with_initializer)
+{
+  auto g = loco::make_graph();
+  auto mul = g->nodes()->create<luci::CircleMul>();
+
+  ASSERT_FALSE(has_origin(mul));
+
+  auto origin =
+    luci::composite_origin({luci::single_origin(3, "add"), luci::single_origin(7, "sub")});
+  add_origin(mul, origin);
+
+  ASSERT_TRUE(has_origin(mul));
+
+  bool add_origin_passed = false;
+  bool sub_origin_passed = false;
+  auto sources = get_origin(mul)->sources();
+  ASSERT_EQ(2, sources.size());
+  for (auto source : sources)
+  {
+    if (source->id() == 3 && source->name().compare("add") == 0)
+      add_origin_passed = true;
+    if (source->id() == 7 && source->name().compare("sub") == 0)
+      sub_origin_passed = true;
+  }
+
+  ASSERT_EQ(true, add_origin_passed);
+  ASSERT_EQ(true, sub_origin_passed);
+}
+
+TEST(LuciCircleNodeOrigin, simple_composite_origin_with_vector)
+{
+  auto g = loco::make_graph();
+  auto mul = g->nodes()->create<luci::CircleMul>();
+
+  ASSERT_FALSE(has_origin(mul));
+
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> vec;
+  vec.push_back(luci::single_origin(3, "add"));
+  vec.push_back(luci::single_origin(7, "sub"));
+  auto origin = luci::composite_origin(vec);
+  add_origin(mul, origin);
+
+  ASSERT_TRUE(has_origin(mul));
+
+  bool add_origin_passed = false;
+  bool sub_origin_passed = false;
+  auto sources = get_origin(mul)->sources();
+  ASSERT_EQ(2, sources.size());
+  for (auto source : sources)
+  {
+    if (source->id() == 3 && source->name().compare("add") == 0)
+      add_origin_passed = true;
+    if (source->id() == 7 && source->name().compare("sub") == 0)
+      sub_origin_passed = true;
+  }
+
+  ASSERT_EQ(true, add_origin_passed);
+  ASSERT_EQ(true, sub_origin_passed);
+}
+
+TEST(LuciCircleNodeOrigin, composite_origin_empty_ctor_NEG)
+{
+  ASSERT_ANY_THROW(luci::composite_origin({}));
+}
+
+TEST(LuciCircleNodeOrigin, add_null_origin_NEG)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(has_origin(add));
+
+  add_origin(add, nullptr);
+
+  ASSERT_FALSE(has_origin(add));
+}
+
+TEST(LuciCircleNodeOrigin, add_empty_origin_NEG)
+{
+  auto g = loco::make_graph();
+  auto add = g->nodes()->create<luci::CircleAdd>();
+
+  ASSERT_FALSE(has_origin(add));
+
+  add_origin(add, luci::composite_origin({nullptr, nullptr}));
+
+  ASSERT_FALSE(has_origin(add));
+}
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake
index e52523d45..a71d4482c 100644
--- a/compiler/luci/requires.cmake
+++ b/compiler/luci/requires.cmake
@@ -1,11 +1,14 @@
 require("foder")
+require("pepper-csv2vec")
 require("loco")
 require("locop")
 require("logo")
 require("logo-core")
-require("mio-circle")
+require("mio-circle06")
+require("luci-compute")
 require("oops")
 require("hermes")
 require("hermes-std")
 require("tflchef")
+require("circlechef")
 require("tflite2circle")
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt
index 9f50c9c4f..24bdfc152 100644
--- a/compiler/luci/service/CMakeLists.txt
+++ b/compiler/luci/service/CMakeLists.txt
@@ -2,16 +2,22 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(luci_service SHARED ${SOURCES})
+if (NOT LUCI_LIBRARY_TYPE)
+  set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
+
+add_library(luci_service ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_service PRIVATE src)
 target_include_directories(luci_service PUBLIC include)
 target_link_libraries(luci_service PUBLIC luci_lang)
-target_link_libraries(luci_service PUBLIC mio_circle)
 target_link_libraries(luci_service PUBLIC logo_core)
 target_link_libraries(luci_service PRIVATE luci_log)
+target_link_libraries(luci_service PRIVATE luci_logex)
 target_link_libraries(luci_service PRIVATE nncc_common)
 target_link_libraries(luci_service PRIVATE oops)
 install(TARGETS luci_service DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
 
 if(NOT ENABLE_TEST)
   return()
@@ -22,4 +28,5 @@ nnas_find_package(GTest REQUIRED)
 GTest_AddTest(luci_service_test ${TESTS})
 target_include_directories(luci_service_test PRIVATE src)
 target_link_libraries(luci_service_test luci_service)
+target_link_libraries(luci_service_test luci_testhelper)
 target_link_libraries(luci_service_test oops)
diff --git a/compiler/luci/service/include/luci/Service/ChangeOutputs.h b/compiler/luci/service/include/luci/Service/ChangeOutputs.h
new file mode 100644
index 000000000..553a3a3ab
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/ChangeOutputs.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SVC_CHANGE_OUTPUTS_H__
+#define __LUCI_SVC_CHANGE_OUTPUTS_H__
+
+#include <loco/IR/Graph.h>
+
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+/**
+ * @brief Change output to nodes with string name.
+ *
+ * @note  Should match existing number of nodes and all names should exist.
+ *        Will throw exception if failed.
+ */
+void change_outputs(loco::Graph *, const std::vector<std::string> &);
+
+} // namespace luci
+
+#endif // __LUCI_SVC_CHANGE_OUTPUTS_H__
diff --git a/compiler/luci/service/include/luci/Service/CircleNodeClone.h b/compiler/luci/service/include/luci/Service/CircleNodeClone.h
new file mode 100644
index 000000000..2429997cc
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/CircleNodeClone.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_NODE_CLONE__
+#define __LUCI_CIRCLE_NODE_CLONE__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco/IR/Graph.h>
+
+namespace luci
+{
+
+/**
+ * @brief Copy common attributes of CircleNode from src to dst.
+ */
+void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst);
+
+/**
+ * @brief Return a new cloned CircleNode object with same attributes value of node to graph.
+ * @note  Will return nullptr if clone has failed
+ */
+CircleNode *clone_node(const CircleNode *node, loco::Graph *graph);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_NODE_CLONE__
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
index fb934c2cf..92c5fb04c 100644
--- a/compiler/luci/service/include/luci/Service/CircleShapeInference.h
+++ b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
@@ -17,25 +17,173 @@
 #ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_H__
 #define __LUCI_CIRCLE_SHAPE_INFERENCE_H__
 
-#include "ShapeDescription.h"
+#include <luci/Service/CircleShapeInferenceRule.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
 
-#include <loco/IR/Nodes.h>
+#include <loco/IR/NodeShape.h>
+#include <loco/IR/TensorShape.h>
 
 namespace luci
 {
 
-/**
- * @brief Get the shape of each node as a node annotation
- *
- * HOW TO USE
- *
- *   ShapeInference::get(g->nodes()->at(..));
- */
-struct ShapeInference
+namespace sinf // namespace for Shape Inference
+{
+
+struct Rule
+{
+  bool infer(const luci::CircleNode *, loco::TensorShape &) const;
+};
+
+class Algorithm final : public luci::CircleNodeVisitor<loco::TensorShape>
 {
-  static ShapeDescription get(loco::Node *node);
+public:
+  // TODO Remove this when all of visit function is implemented
+  loco::TensorShape visit(const luci::CircleNode *node) final
+  {
+    loco::NodeShape shape;
+    luci::CircleShapeInferenceRule().infer(node, shape);
+    return shape.as<loco::TensorShape>();
+  }
+
+  // loco::TensorShape visit(const luci::CircleAbs *node) final;
+  // loco::TensorShape visit(const luci::CircleAdd *node) final;
+  // loco::TensorShape visit(const luci::CircleAddN *node) final;
+  // loco::TensorShape visit(const luci::CircleArgMax *node) final;
+  // loco::TensorShape visit(const luci::CircleArgMin *node) final;
+  // loco::TensorShape visit(const luci::CircleAveragePool2D *node) final;
+  // loco::TensorShape visit(const luci::CircleBatchMatMul *node) final;
+  // loco::TensorShape visit(const luci::CircleBatchToSpaceND *node) final;
+  // loco::TensorShape visit(const luci::CircleCast *node) final;
+  // loco::TensorShape visit(const luci::CircleCeil *node) final;
+  // loco::TensorShape visit(const luci::CircleConcatenation *node) final;
+  // loco::TensorShape visit(const luci::CircleConst *node) final;
+  // loco::TensorShape visit(const luci::CircleConv2D *node) final;
+  // loco::TensorShape visit(const luci::CircleCos *node) final;
+  // loco::TensorShape visit(const luci::CircleCustom *node) final;
+  // loco::TensorShape visit(const luci::CircleDepthToSpace *node) final;
+  // loco::TensorShape visit(const luci::CircleDepthwiseConv2D *node) final;
+  // loco::TensorShape visit(const luci::CircleDequantize *node) final;
+  // loco::TensorShape visit(const luci::CircleDiv *node) final;
+  // loco::TensorShape visit(const luci::CircleElu *node) final;
+  // loco::TensorShape visit(const luci::CircleEqual *node) final;
+  // loco::TensorShape visit(const luci::CircleExp *node) final;
+  // loco::TensorShape visit(const luci::CircleExpandDims *node) final;
+  // loco::TensorShape visit(const luci::CircleFakeQuant *node) final;
+  // loco::TensorShape visit(const luci::CircleFill *node) final;
+  // loco::TensorShape visit(const luci::CircleFloor *node) final;
+  // loco::TensorShape visit(const luci::CircleFloorDiv *node) final;
+  // loco::TensorShape visit(const luci::CircleFloorMod *node) final;
+  // loco::TensorShape visit(const luci::CircleFullyConnected *node) final;
+  // loco::TensorShape visit(const luci::CircleGather *node) final;
+  // loco::TensorShape visit(const luci::CircleGatherNd *node) final;
+  // loco::TensorShape visit(const luci::CircleGreater *node) final;
+  // loco::TensorShape visit(const luci::CircleGreaterEqual *node) final;
+  // loco::TensorShape visit(const luci::CircleHardSwish *node) final;
+  // loco::TensorShape visit(const luci::CircleIf *node) final;
+  // loco::TensorShape visit(const luci::CircleL2Normalize *node) final;
+  // loco::TensorShape visit(const luci::CircleL2Pool2D *node) final;
+  // loco::TensorShape visit(const luci::CircleLeakyRelu *node) final;
+  // loco::TensorShape visit(const luci::CircleLess *node) final;
+  // loco::TensorShape visit(const luci::CircleLessEqual *node) final;
+  // loco::TensorShape visit(const luci::CircleLocalResponseNormalization *node) final;
+  // loco::TensorShape visit(const luci::CircleLog *node) final;
+  // loco::TensorShape visit(const luci::CircleLogicalAnd *node) final;
+  // loco::TensorShape visit(const luci::CircleLogicalNot *node) final;
+  // loco::TensorShape visit(const luci::CircleLogicalOr *node) final;
+  // loco::TensorShape visit(const luci::CircleLogistic *node) final;
+  // loco::TensorShape visit(const luci::CircleLogSoftmax *node) final;
+  // loco::TensorShape visit(const luci::CircleMatrixDiag *node) final;
+  // loco::TensorShape visit(const luci::CircleMatrixSetDiag *node) final;
+  // loco::TensorShape visit(const luci::CircleMaximum *node) final;
+  // loco::TensorShape visit(const luci::CircleMaxPool2D *node) final;
+  // loco::TensorShape visit(const luci::CircleMean *node) final;
+  // loco::TensorShape visit(const luci::CircleMinimum *node) final;
+  // loco::TensorShape visit(const luci::CircleMirrorPad *node) final;
+  // loco::TensorShape visit(const luci::CircleMul *node) final;
+  // loco::TensorShape visit(const luci::CircleNeg *node) final;
+  // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4 *node) final;
+  // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5 *node) final;
+  // loco::TensorShape visit(const luci::CircleNotEqual *node) final;
+  // loco::TensorShape visit(const luci::CircleOneHot *node) final;
+  // loco::TensorShape visit(const luci::CirclePack *node) final;
+  // loco::TensorShape visit(const luci::CirclePad *node) final;
+  // loco::TensorShape visit(const luci::CirclePadV2 *node) final;
+  // loco::TensorShape visit(const luci::CirclePow *node) final;
+  // loco::TensorShape visit(const luci::CirclePRelu *node) final;
+  // loco::TensorShape visit(const luci::CircleQuantize *node) final;
+  // loco::TensorShape visit(const luci::CircleRange *node) final;
+  // loco::TensorShape visit(const luci::CircleRank *node) final;
+  // loco::TensorShape visit(const luci::CircleReduceAny *node) final;
+  // loco::TensorShape visit(const luci::CircleReduceMax *node) final;
+  // loco::TensorShape visit(const luci::CircleReduceMin *node) final;
+  // loco::TensorShape visit(const luci::CircleReduceProd *node) final;
+  // loco::TensorShape visit(const luci::CircleRelu *node) final;
+  // loco::TensorShape visit(const luci::CircleRelu6 *node) final;
+  // loco::TensorShape visit(const luci::CircleReluN1To1 *node) final;
+  // loco::TensorShape visit(const luci::CircleReshape *node) final;
+  // loco::TensorShape visit(const luci::CircleResizeBilinear *node) final;
+  // loco::TensorShape visit(const luci::CircleResizeNearestNeighbor *node) final;
+  // loco::TensorShape visit(const luci::CircleReverseSequence *node) final;
+  // loco::TensorShape visit(const luci::CircleReverseV2 *node) final;
+  // loco::TensorShape visit(const luci::CircleRound *node) final;
+  // loco::TensorShape visit(const luci::CircleRsqrt *node) final;
+  // loco::TensorShape visit(const luci::CircleScatterNd *node) final;
+  // loco::TensorShape visit(const luci::CircleSegmentSum *node) final;
+  // loco::TensorShape visit(const luci::CircleSelect *node) final;
+  // loco::TensorShape visit(const luci::CircleSelectV2 *node) final;
+  // loco::TensorShape visit(const luci::CircleShape *node) final;
+  // loco::TensorShape visit(const luci::CircleSin *node) final;
+  // loco::TensorShape visit(const luci::CircleSlice *node) final;
+  // loco::TensorShape visit(const luci::CircleSoftmax *node) final;
+  // loco::TensorShape visit(const luci::CircleSpaceToBatchND *node) final;
+  // loco::TensorShape visit(const luci::CircleSpaceToDepth *node) final;
+  // loco::TensorShape visit(const luci::CircleSparseToDense *node) final;
+  // loco::TensorShape visit(const luci::CircleSplit *node) final;
+  // loco::TensorShape visit(const luci::CircleSplitV *node) final;
+  // loco::TensorShape visit(const luci::CircleSqrt *node) final;
+  // loco::TensorShape visit(const luci::CircleSquare *node) final;
+  // loco::TensorShape visit(const luci::CircleSquaredDifference *node) final;
+  // loco::TensorShape visit(const luci::CircleSqueeze *node) final;
+  // loco::TensorShape visit(const luci::CircleStridedSlice *node) final;
+  // loco::TensorShape visit(const luci::CircleSub *node) final;
+  // loco::TensorShape visit(const luci::CircleSum *node) final;
+  // loco::TensorShape visit(const luci::CircleTanh *node) final;
+  // loco::TensorShape visit(const luci::CircleTile *node) final;
+  // loco::TensorShape visit(const luci::CircleTopKV2 *node) final;
+  // loco::TensorShape visit(const luci::CircleTranspose *node) final;
+  // loco::TensorShape visit(const luci::CircleTransposeConv *node) final;
+  // loco::TensorShape visit(const luci::CircleUnidirectionalSequenceLSTM *node) final;
+  // loco::TensorShape visit(const luci::CircleUnique *node) final;
+  // loco::TensorShape visit(const luci::CircleUnpack *node) final;
+  // loco::TensorShape visit(const luci::CircleWhere *node) final;
+  // loco::TensorShape visit(const luci::CircleWhile *node) final;
+  // loco::TensorShape visit(const luci::CircleZerosLike *node) final;
+
+  // Circle Only
+  // loco::TensorShape visit(const luci::CircleBCQFullyConnected *node) final;
+  // loco::TensorShape visit(const luci::CircleBCQGather *node) final;
+  // loco::TensorShape visit(const luci::CircleInstanceNorm *node) final;
+
+  // Virtual
+  // loco::TensorShape visit(const luci::CircleCustomOut *node) final;
+  loco::TensorShape visit(const luci::CircleIfOut *node) final;
+  // loco::TensorShape visit(const luci::CircleInput *node) final;
+  // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
+  // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
+  // loco::TensorShape visit(const luci::CircleOutput *node) final;
+  // loco::TensorShape visit(const luci::CircleOutputDummy *node) final;
+  // loco::TensorShape visit(const luci::CircleOutputExclude *node) final;
+  // loco::TensorShape visit(const luci::CircleSplitOut *node) final;
+  // loco::TensorShape visit(const luci::CircleSplitVOut *node) final;
+  // loco::TensorShape visit(const luci::CircleTopKV2Out *node) final;
+  // loco::TensorShape visit(const luci::CircleUniqueOut *node) final;
+  // loco::TensorShape visit(const luci::CircleUnpackOut *node) final;
+  // loco::TensorShape visit(const luci::CircleWhileOut *node) final;
 };
 
+} // namespace sinf
+
 } // namespace luci
 
 #endif // __LUCI_CIRCLE_SHAPE_INFERENCE_H__
diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInference.h b/compiler/luci/service/include/luci/Service/CircleTypeInference.h
index ea7a3c5ed..4f4ab0f34 100644
--- a/compiler/luci/service/include/luci/Service/CircleTypeInference.h
+++ b/compiler/luci/service/include/luci/Service/CircleTypeInference.h
@@ -17,26 +17,172 @@
 #ifndef __LUCI_CIRCLE_TYPE_INFERENCE_H__
 #define __LUCI_CIRCLE_TYPE_INFERENCE_H__
 
-#include <loco/IR/Nodes.h>
+#include <luci/Service/CircleTypeInferenceRule.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
 
-#include <mio/circle/schema_generated.h>
+#include <loco/IR/DataType.h>
 
 namespace luci
 {
 
-/**
- * @brief Get the type of each node as NodeAnnotation
- *
- * HOW TO USE
- *
- *   TypeInference::get(g->nodes()->at(0));
- *   TypeInference::get(g->nodes()->at(...));
- */
-struct TypeInference
+namespace tinf // namespace for Type Inference
+{
+
+struct Rule
+{
+  bool infer(const luci::CircleNode *, loco::DataType &) const;
+};
+
+class Algorithm final : public luci::CircleNodeVisitor<loco::DataType>
 {
-  static circle::TensorType get(loco::Node *node);
+public:
+  // TODO Remove this when all of visit function is implemented
+  loco::DataType visit(const luci::CircleNode *node) final
+  {
+    loco::DataType dtype;
+    luci::CircleTypeInferenceRule().infer(node, dtype);
+    return dtype;
+  }
+
+  // loco::DataType visit(const luci::CircleAbs *node) final;
+  // loco::DataType visit(const luci::CircleAdd *node) final;
+  // loco::DataType visit(const luci::CircleAddN *node) final;
+  // loco::DataType visit(const luci::CircleArgMax *node) final;
+  // loco::DataType visit(const luci::CircleArgMin *node) final;
+  // loco::DataType visit(const luci::CircleAveragePool2D *node) final;
+  // loco::DataType visit(const luci::CircleBatchMatMul *node) final;
+  // loco::DataType visit(const luci::CircleBatchToSpaceND *node) final;
+  // loco::DataType visit(const luci::CircleCast *node) final;
+  // loco::DataType visit(const luci::CircleCeil *node) final;
+  // loco::DataType visit(const luci::CircleConcatenation *node) final;
+  // loco::DataType visit(const luci::CircleConst *node) final;
+  // loco::DataType visit(const luci::CircleConv2D *node) final;
+  // loco::DataType visit(const luci::CircleCos *node) final;
+  // loco::DataType visit(const luci::CircleCustom *node) final;
+  // loco::DataType visit(const luci::CircleDepthToSpace *node) final;
+  // loco::DataType visit(const luci::CircleDepthwiseConv2D *node) final;
+  // loco::DataType visit(const luci::CircleDequantize *node) final;
+  // loco::DataType visit(const luci::CircleDiv *node) final;
+  // loco::DataType visit(const luci::CircleElu *node) final;
+  // loco::DataType visit(const luci::CircleEqual *node) final;
+  // loco::DataType visit(const luci::CircleExp *node) final;
+  // loco::DataType visit(const luci::CircleExpandDims *node) final;
+  // loco::DataType visit(const luci::CircleFakeQuant *node) final;
+  // loco::DataType visit(const luci::CircleFill *node) final;
+  // loco::DataType visit(const luci::CircleFloor *node) final;
+  // loco::DataType visit(const luci::CircleFloorDiv *node) final;
+  // loco::DataType visit(const luci::CircleFloorMod *node) final;
+  // loco::DataType visit(const luci::CircleFullyConnected *node) final;
+  // loco::DataType visit(const luci::CircleGather *node) final;
+  // loco::DataType visit(const luci::CircleGatherNd *node) final;
+  // loco::DataType visit(const luci::CircleGreater *node) final;
+  // loco::DataType visit(const luci::CircleGreaterEqual *node) final;
+  // loco::DataType visit(const luci::CircleHardSwish *node) final;
+  // loco::DataType visit(const luci::CircleIf *node) final;
+  // loco::DataType visit(const luci::CircleL2Normalize *node) final;
+  // loco::DataType visit(const luci::CircleL2Pool2D *node) final;
+  // loco::DataType visit(const luci::CircleLeakyRelu *node) final;
+  // loco::DataType visit(const luci::CircleLess *node) final;
+  // loco::DataType visit(const luci::CircleLessEqual *node) final;
+  // loco::DataType visit(const luci::CircleLocalResponseNormalization *node) final;
+  // loco::DataType visit(const luci::CircleLog *node) final;
+  // loco::DataType visit(const luci::CircleLogicalAnd *node) final;
+  // loco::DataType visit(const luci::CircleLogicalNot *node) final;
+  // loco::DataType visit(const luci::CircleLogicalOr *node) final;
+  // loco::DataType visit(const luci::CircleLogistic *node) final;
+  // loco::DataType visit(const luci::CircleLogSoftmax *node) final;
+  // loco::DataType visit(const luci::CircleMatrixDiag *node) final;
+  // loco::DataType visit(const luci::CircleMatrixSetDiag *node) final;
+  // loco::DataType visit(const luci::CircleMaximum *node) final;
+  // loco::DataType visit(const luci::CircleMaxPool2D *node) final;
+  // loco::DataType visit(const luci::CircleMean *node) final;
+  // loco::DataType visit(const luci::CircleMinimum *node) final;
+  // loco::DataType visit(const luci::CircleMirrorPad *node) final;
+  // loco::DataType visit(const luci::CircleNeg *node) final;
+  // loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final;
+  // loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final;
+  // loco::DataType visit(const luci::CircleNotEqual *node) final;
+  // loco::DataType visit(const luci::CirclePack *node) final;
+  // loco::DataType visit(const luci::CirclePad *node) final;
+  // loco::DataType visit(const luci::CirclePadV2 *node) final;
+  // loco::DataType visit(const luci::CirclePow *node) final;
+  // loco::DataType visit(const luci::CirclePRelu *node) final;
+  // loco::DataType visit(const luci::CircleRange *node) final;
+  // loco::DataType visit(const luci::CircleRank *node) final;
+  // loco::DataType visit(const luci::CircleMul *node) final;
+  // loco::DataType visit(const luci::CircleOneHot *node) final;
+  // loco::DataType visit(const luci::CircleQuantize *node) final;
+  // loco::DataType visit(const luci::CircleReduceAny *node) final;
+  // loco::DataType visit(const luci::CircleReduceMax *node) final;
+  // loco::DataType visit(const luci::CircleReduceMin *node) final;
+  // loco::DataType visit(const luci::CircleReduceProd *node) final;
+  // loco::DataType visit(const luci::CircleRelu *node) final;
+  // loco::DataType visit(const luci::CircleRelu6 *node) final;
+  // loco::DataType visit(const luci::CircleReluN1To1 *node) final;
+  // loco::DataType visit(const luci::CircleReshape *node) final;
+  // loco::DataType visit(const luci::CircleResizeBilinear *node) final;
+  // loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final;
+  // loco::DataType visit(const luci::CircleReverseSequence *node) final;
+  // loco::DataType visit(const luci::CircleReverseV2 *node) final;
+  // loco::DataType visit(const luci::CircleRound *node) final;
+  // loco::DataType visit(const luci::CircleRsqrt *node) final;
+  // loco::DataType visit(const luci::CircleScatterNd *node) final;
+  // loco::DataType visit(const luci::CircleSegmentSum *node) final;
+  // loco::DataType visit(const luci::CircleSelect *node) final;
+  // loco::DataType visit(const luci::CircleSelectV2 *node) final;
+  // loco::DataType visit(const luci::CircleShape *node) final;
+  // loco::DataType visit(const luci::CircleSin *node) final;
+  // loco::DataType visit(const luci::CircleSlice *node) final;
+  // loco::DataType visit(const luci::CircleSoftmax *node) final;
+  // loco::DataType visit(const luci::CircleSpaceToBatchND *node) final;
+  // loco::DataType visit(const luci::CircleSpaceToDepth *node) final;
+  // loco::DataType visit(const luci::CircleSparseToDense *node) final;
+  // loco::DataType visit(const luci::CircleSplit *node) final;
+  // loco::DataType visit(const luci::CircleSplitV *node) final;
+  // loco::DataType visit(const luci::CircleSqrt *node) final;
+  // loco::DataType visit(const luci::CircleSquare *node) final;
+  // loco::DataType visit(const luci::CircleSquaredDifference *node) final;
+  // loco::DataType visit(const luci::CircleSqueeze *node) final;
+  // loco::DataType visit(const luci::CircleStridedSlice *node) final;
+  // loco::DataType visit(const luci::CircleSub *node) final;
+  // loco::DataType visit(const luci::CircleSum *node) final;
+  // loco::DataType visit(const luci::CircleTanh *node) final;
+  // loco::DataType visit(const luci::CircleTile *node) final;
+  // loco::DataType visit(const luci::CircleTopKV2 *node) final;
+  // loco::DataType visit(const luci::CircleTranspose *node) final;
+  // loco::DataType visit(const luci::CircleTransposeConv *node) final;
+  // loco::DataType visit(const luci::CircleUnidirectionalSequenceLSTM *node) final;
+  // loco::DataType visit(const luci::CircleUnique *node) final;
+  // loco::DataType visit(const luci::CircleUnpack *node) final;
+  // loco::DataType visit(const luci::CircleWhere *node) final;
+  // loco::DataType visit(const luci::CircleWhile *node) final;
+  // loco::DataType visit(const luci::CircleZerosLike *node) final;
+
+  // Circle Only
+  // loco::DataType visit(const luci::CircleBCQFullyConnected *node) final;
+  // loco::DataType visit(const luci::CircleBCQGather *node) final;
+  // loco::DataType visit(const luci::CircleInstanceNorm *node) final;
+
+  // Virtual
+  // loco::DataType visit(const luci::CircleInput *node) final;
+  // loco::DataType visit(const luci::CircleOutput *node) final;
+  // loco::DataType visit(const luci::CircleOutputDummy *node) final;
+  // loco::DataType visit(const luci::CircleOutputExclude *node) final;
+  // loco::DataType visit(const luci::CircleCustomOut *node) final;
+  loco::DataType visit(const luci::CircleIfOut *node) final;
+  // loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
+  // loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
+  // loco::DataType visit(const luci::CircleSplitOut *node) final;
+  // loco::DataType visit(const luci::CircleSplitVOut *node) final;
+  // loco::DataType visit(const luci::CircleTopKV2Out *node) final;
+  // loco::DataType visit(const luci::CircleUniqueOut *node) final;
+  // loco::DataType visit(const luci::CircleUnpackOut *node) final;
+  // loco::DataType visit(const luci::CircleWhileOut *node) final;
 };
 
+} // namespace tinf
+
 } // namespace luci
 
 #endif // __LUCI_CIRCLE_TYPE_INFERENCE_H__
diff --git a/compiler/luci/service/include/luci/Service/Nodes/CircleConst.h b/compiler/luci/service/include/luci/Service/Nodes/CircleConst.h
new file mode 100644
index 000000000..6049b4297
--- /dev/null
+++ b/compiler/luci/service/include/luci/Service/Nodes/CircleConst.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SERVICE_CIRCLE_CONST_H__
+#define __LUCI_SERVICE_CIRCLE_CONST_H__
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci
+{
+
+/**
+ * @brief Return cloned object of CircleConst node
+ */
+luci::CircleConst *clone(luci::CircleConst *node);
+
+} // namespace luci
+
+#endif // __LUCI_SERVICE_CIRCLE_CONST_H__
diff --git a/compiler/luci/service/include/luci/Service/ShapeDescription.h b/compiler/luci/service/include/luci/Service/ShapeDescription.h
index 949cce535..4671096fd 100644
--- a/compiler/luci/service/include/luci/Service/ShapeDescription.h
+++ b/compiler/luci/service/include/luci/Service/ShapeDescription.h
@@ -20,6 +20,8 @@
 #include <loco/IR/PermutingCodec.h>
 #include <loco/IR/NodeShape.h>
 
+#include <luci/IR/CircleNodes.h>
+
 #include <cstdint>
 #include <vector>
 
@@ -33,11 +35,8 @@ struct ShapeDescription
 };
 
 // TODO remove these when CircleDialect is fully functioal
+ShapeDescription to_shape_description(const luci::CircleNode *node);
 ShapeDescription to_shape_description(const loco::TensorShape &shape);
-ShapeDescription to_shape_description(const loco::FeatureShape &shape);
-ShapeDescription to_shape_description(const loco::FilterShape &shape);
-ShapeDescription to_shape_description(const loco::BiasShape &shape);
-ShapeDescription to_shape_description(const loco::MatrixShape &shape);
 ShapeDescription to_shape_description(const loco::NodeShape &shape);
 
 template <typename Permutation> inline bool isNHWC(Permutation *perm);
diff --git a/compiler/luci/service/include/luci/Service/Validate.h b/compiler/luci/service/include/luci/Service/Validate.h
index 4b80d1d16..815e5e380 100644
--- a/compiler/luci/service/include/luci/Service/Validate.h
+++ b/compiler/luci/service/include/luci/Service/Validate.h
@@ -17,6 +17,8 @@
 #ifndef __LUCI_SERVICE_VALIDATE_H__
 #define __LUCI_SERVICE_VALIDATE_H__
 
+#include <luci/IR/Module.h>
+
 #include <loco.h>
 
 namespace luci
@@ -24,6 +26,23 @@ namespace luci
 
 bool validate(loco::Graph *);
 
+/**
+ * @brief Return true if all nodes in graph have non empty name
+ */
+bool validate_name(loco::Graph *);
+
+/**
+ * @brief Return true if all names in the Module are unique
+ * @note  CircleOutput may have duplicate name
+ */
+bool validate_unique_name(luci::Module *);
+
+bool validate(luci::Module *);
+
+bool validate_shape(loco::Graph *);
+
+bool validate_shape(luci::Module *);
+
 } // namespace luci
 
 #endif // __LUCI_SERVICE_VALIDATE_H__
diff --git a/compiler/luci/service/src/ChangeOutputs.cpp b/compiler/luci/service/src/ChangeOutputs.cpp
new file mode 100644
index 000000000..65175530c
--- /dev/null
+++ b/compiler/luci/service/src/ChangeOutputs.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/ChangeOutputs.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <loco/IR/Graph.h>
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+#include <iostream>
+#include <map>
+
+namespace
+{
+
+luci::CircleNode *find_by_name(loco::Graph *g, const std::string &name)
+{
+  for (auto node : loco::all_nodes(g))
+  {
+    auto cnode = loco::must_cast<luci::CircleNode *>(node);
+    if (cnode->name() == name)
+      return cnode;
+  }
+  return nullptr;
+}
+
+} // namespace
+
+namespace luci
+{
+
+void change_outputs(loco::Graph *graph, const std::vector<std::string> &new_outputs)
+{
+  if (new_outputs.size() != graph->outputs()->size())
+  {
+    throw oops::UserExn("Change outputs failed: number of outputs should be ",
+                        graph->outputs()->size());
+  }
+
+  std::map<std::string, luci::CircleNode *> named_nodes;
+
+  for (auto &node_name : new_outputs)
+  {
+    auto node = find_by_name(graph, node_name);
+    if (node == nullptr)
+    {
+      throw oops::UserExn("Change outputs failed: node not found: ", node_name);
+    }
+    named_nodes[node_name] = node;
+  }
+  // just to be sure
+  assert(graph->outputs()->size() == named_nodes.size());
+
+  for (uint32_t out = 0; out < graph->outputs()->size(); ++out)
+  {
+    auto output = luci::output_node(graph, out); // output is CircleOutput
+    assert(output != nullptr);
+
+    auto &node_name = new_outputs.at(out);
+    auto node = named_nodes[node_name];
+    assert(node != nullptr);
+
+    output->from(node);
+
+    // update GraphOutput shape, dtype to node
+    auto graph_out = graph->outputs()->at(out);
+    auto output_shape = std::make_unique<loco::TensorShape>();
+
+    output_shape->rank(node->rank());
+    for (uint32_t r = 0; r < node->rank(); ++r)
+    {
+      if (node->dim(r).known())
+        output_shape->dim(r).set(node->dim(r).value());
+      else
+        output_shape->dim(r).unset();
+    }
+    graph_out->shape(std::move(output_shape));
+    graph_out->dtype(node->dtype());
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/ChangeOutputs.test.cpp b/compiler/luci/service/src/ChangeOutputs.test.cpp
new file mode 100644
index 000000000..e37860e8a
--- /dev/null
+++ b/compiler/luci/service/src/ChangeOutputs.test.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/ChangeOutputs.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class Sqrt2xGraphlet
+{
+public:
+  Sqrt2xGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt1 = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt1->dtype(loco::DataType::S32);
+    _sqrt1->name("sqrt1");
+
+    _sqrt2 = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt2->dtype(loco::DataType::S32);
+    _sqrt2->name("sqrt2");
+  }
+
+public:
+  luci::CircleSqrt *sqrt1(void) const { return _sqrt1; }
+  luci::CircleSqrt *sqrt2(void) const { return _sqrt2; }
+
+protected:
+  luci::CircleSqrt *_sqrt1 = nullptr;
+  luci::CircleSqrt *_sqrt2 = nullptr;
+};
+
+class Sqrt2xGraph : public TestIOGraph, public Sqrt2xGraphlet
+{
+public:
+  Sqrt2xGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    Sqrt2xGraphlet::init(g(), shape);
+
+    _sqrt1->x(input());
+
+    _sqrt2->x(_sqrt1);
+
+    output()->from(_sqrt2);
+  }
+};
+
+} // namespace
+
+TEST(ChangeOutputsTest, change)
+{
+  Sqrt2xGraph g;
+
+  g.init({3, 3});
+
+  {
+    auto output = luci::output_node(g.g(), 0);
+    ASSERT_EQ(g.sqrt2(), output->from());
+  }
+
+  std::vector<std::string> names{"sqrt1"};
+
+  EXPECT_NO_THROW(luci::change_outputs(g.g(), names));
+
+  {
+    auto output = luci::output_node(g.g(), 0);
+    ASSERT_EQ(g.sqrt1(), output->from());
+  }
+}
+
+TEST(ChangeOutputsTest, name_not_found_NEG)
+{
+  Sqrt2xGraph g;
+
+  g.init({3, 3});
+
+  std::vector<std::string> names{"sqrt33"};
+
+  EXPECT_ANY_THROW(luci::change_outputs(g.g(), names));
+}
+
+TEST(ChangeOutputsTest, number_names_NEG)
+{
+  Sqrt2xGraph g;
+
+  g.init({3, 3});
+
+  std::vector<std::string> names{"sqrt1", "sqrt2"};
+
+  EXPECT_ANY_THROW(luci::change_outputs(g.g(), names));
+}
diff --git a/compiler/luci/service/src/CircleCloneNode.cpp b/compiler/luci/service/src/CircleCloneNode.cpp
new file mode 100644
index 000000000..a8895ea08
--- /dev/null
+++ b/compiler/luci/service/src/CircleCloneNode.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNode *node)
+{
+#define CNVISIT_GRP(GRP)              \
+  {                                   \
+    CloneNodeLet<CN::GRP> cn(_graph); \
+    auto cloned = node->accept(&cn);  \
+    if (cloned != nullptr)            \
+      return cloned;                  \
+  }
+
+  CNVISIT_GRP(ABC);
+  CNVISIT_GRP(DEF);
+  CNVISIT_GRP(GHIJ);
+  CNVISIT_GRP(KLMN);
+  CNVISIT_GRP(OPQR);
+  CNVISIT_GRP(STUV);
+  CNVISIT_GRP(WXYZ);
+
+#undef CNVISIT_GRP
+
+  return nullptr;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h
new file mode 100644
index 000000000..e0b4dbc41
--- /dev/null
+++ b/compiler/luci/service/src/CircleCloneNode.h
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_CLONE_NODE_H__
+#define __CIRCLE_CLONE_NODE_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+// CloneNode-let type
+enum class CN
+{
+  ABC,
+  DEF,
+  GHIJ,
+  KLMN,
+  OPQR,
+  STUV,
+  WXYZ,
+};
+
+template <CN ct> class CloneNodeLet;
+
+template <> class CloneNodeLet<CN::ABC> final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNodeLet(loco::Graph *graph) : _graph(graph){};
+
+public:
+  luci::CircleNode *visit(const luci::CircleAbs *) final;
+  luci::CircleNode *visit(const luci::CircleAdd *) final;
+  luci::CircleNode *visit(const luci::CircleAddN *) final;
+  luci::CircleNode *visit(const luci::CircleArgMax *) final;
+  luci::CircleNode *visit(const luci::CircleArgMin *) final;
+  luci::CircleNode *visit(const luci::CircleAveragePool2D *) final;
+  luci::CircleNode *visit(const luci::CircleBatchMatMul *) final;
+  luci::CircleNode *visit(const luci::CircleBatchToSpaceND *) final;
+  luci::CircleNode *visit(const luci::CircleCast *) final;
+  luci::CircleNode *visit(const luci::CircleCeil *) final;
+  luci::CircleNode *visit(const luci::CircleConcatenation *) final;
+  luci::CircleNode *visit(const luci::CircleConst *) final;
+  luci::CircleNode *visit(const luci::CircleConv2D *) final;
+  luci::CircleNode *visit(const luci::CircleCos *) final;
+  luci::CircleNode *visit(const luci::CircleCustom *) final;
+
+  luci::CircleNode *visit(const luci::CircleNode *) final { return nullptr; }
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+template <> class CloneNodeLet<CN::DEF> final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNodeLet(loco::Graph *graph) : _graph(graph){};
+
+public:
+  luci::CircleNode *visit(const luci::CircleDensify *) final;
+  luci::CircleNode *visit(const luci::CircleDepthToSpace *) final;
+  luci::CircleNode *visit(const luci::CircleDepthwiseConv2D *) final;
+  luci::CircleNode *visit(const luci::CircleDequantize *) final;
+  luci::CircleNode *visit(const luci::CircleDiv *) final;
+  luci::CircleNode *visit(const luci::CircleElu *) final;
+  luci::CircleNode *visit(const luci::CircleEqual *) final;
+  luci::CircleNode *visit(const luci::CircleExp *) final;
+  luci::CircleNode *visit(const luci::CircleExpandDims *) final;
+  luci::CircleNode *visit(const luci::CircleFakeQuant *) final;
+  luci::CircleNode *visit(const luci::CircleFill *) final;
+  luci::CircleNode *visit(const luci::CircleFloor *) final;
+  luci::CircleNode *visit(const luci::CircleFloorDiv *) final;
+  luci::CircleNode *visit(const luci::CircleFloorMod *) final;
+  luci::CircleNode *visit(const luci::CircleFullyConnected *) final;
+
+  luci::CircleNode *visit(const luci::CircleNode *) final { return nullptr; }
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+template <> class CloneNodeLet<CN::GHIJ> final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNodeLet(loco::Graph *graph) : _graph(graph){};
+
+public:
+  luci::CircleNode *visit(const luci::CircleGather *) final;
+  luci::CircleNode *visit(const luci::CircleGatherNd *) final;
+  luci::CircleNode *visit(const luci::CircleGelu *) final;
+  luci::CircleNode *visit(const luci::CircleGreater *) final;
+  luci::CircleNode *visit(const luci::CircleGreaterEqual *) final;
+  luci::CircleNode *visit(const luci::CircleHardSwish *) final;
+  luci::CircleNode *visit(const luci::CircleIf *) final;
+
+  luci::CircleNode *visit(const luci::CircleNode *) final { return nullptr; }
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+template <> class CloneNodeLet<CN::KLMN> final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNodeLet(loco::Graph *graph) : _graph(graph){};
+
+public:
+  luci::CircleNode *visit(const luci::CircleL2Normalize *) final;
+  luci::CircleNode *visit(const luci::CircleL2Pool2D *) final;
+  luci::CircleNode *visit(const luci::CircleLeakyRelu *) final;
+  luci::CircleNode *visit(const luci::CircleLess *) final;
+  luci::CircleNode *visit(const luci::CircleLessEqual *) final;
+  luci::CircleNode *visit(const luci::CircleLocalResponseNormalization *) final;
+  luci::CircleNode *visit(const luci::CircleLog *) final;
+  luci::CircleNode *visit(const luci::CircleLogicalAnd *) final;
+  luci::CircleNode *visit(const luci::CircleLogicalNot *) final;
+  luci::CircleNode *visit(const luci::CircleLogicalOr *) final;
+  luci::CircleNode *visit(const luci::CircleLogistic *) final;
+  luci::CircleNode *visit(const luci::CircleLogSoftmax *) final;
+  luci::CircleNode *visit(const luci::CircleMatrixDiag *) final;
+  luci::CircleNode *visit(const luci::CircleMatrixSetDiag *) final;
+  luci::CircleNode *visit(const luci::CircleMaximum *) final;
+  luci::CircleNode *visit(const luci::CircleMaxPool2D *) final;
+  luci::CircleNode *visit(const luci::CircleMean *) final;
+  luci::CircleNode *visit(const luci::CircleMinimum *) final;
+  luci::CircleNode *visit(const luci::CircleMirrorPad *) final;
+  luci::CircleNode *visit(const luci::CircleMul *) final;
+  luci::CircleNode *visit(const luci::CircleNeg *) final;
+  luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV4 *) final;
+  luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV5 *) final;
+  luci::CircleNode *visit(const luci::CircleNotEqual *) final;
+
+  luci::CircleNode *visit(const luci::CircleNode *) final { return nullptr; }
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+template <> class CloneNodeLet<CN::OPQR> final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNodeLet(loco::Graph *graph) : _graph(graph){};
+
+public:
+  luci::CircleNode *visit(const luci::CircleOneHot *) final;
+  luci::CircleNode *visit(const luci::CirclePack *) final;
+  luci::CircleNode *visit(const luci::CirclePad *) final;
+  luci::CircleNode *visit(const luci::CirclePadV2 *) final;
+  luci::CircleNode *visit(const luci::CirclePow *) final;
+  luci::CircleNode *visit(const luci::CirclePRelu *) final;
+  luci::CircleNode *visit(const luci::CircleQuantize *) final;
+  luci::CircleNode *visit(const luci::CircleRange *) final;
+  luci::CircleNode *visit(const luci::CircleRank *) final;
+  luci::CircleNode *visit(const luci::CircleReduceAny *) final;
+  luci::CircleNode *visit(const luci::CircleReduceMax *) final;
+  luci::CircleNode *visit(const luci::CircleReduceMin *) final;
+  luci::CircleNode *visit(const luci::CircleReduceProd *) final;
+  luci::CircleNode *visit(const luci::CircleRelu *) final;
+  luci::CircleNode *visit(const luci::CircleRelu6 *) final;
+  luci::CircleNode *visit(const luci::CircleReluN1To1 *) final;
+  luci::CircleNode *visit(const luci::CircleReshape *) final;
+  luci::CircleNode *visit(const luci::CircleResizeBilinear *) final;
+  luci::CircleNode *visit(const luci::CircleResizeNearestNeighbor *) final;
+  luci::CircleNode *visit(const luci::CircleReverseSequence *) final;
+  luci::CircleNode *visit(const luci::CircleReverseV2 *) final;
+  luci::CircleNode *visit(const luci::CircleRound *) final;
+  luci::CircleNode *visit(const luci::CircleRsqrt *) final;
+
+  luci::CircleNode *visit(const luci::CircleNode *) final { return nullptr; }
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+template <> class CloneNodeLet<CN::STUV> final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNodeLet(loco::Graph *graph) : _graph(graph){};
+
+public:
+  luci::CircleNode *visit(const luci::CircleScatterNd *) final;
+  luci::CircleNode *visit(const luci::CircleSegmentSum *) final;
+  luci::CircleNode *visit(const luci::CircleSelect *) final;
+  luci::CircleNode *visit(const luci::CircleSelectV2 *) final;
+  luci::CircleNode *visit(const luci::CircleShape *) final;
+  luci::CircleNode *visit(const luci::CircleSin *) final;
+  luci::CircleNode *visit(const luci::CircleSlice *) final;
+  luci::CircleNode *visit(const luci::CircleSoftmax *) final;
+  luci::CircleNode *visit(const luci::CircleSpaceToBatchND *) final;
+  luci::CircleNode *visit(const luci::CircleSpaceToDepth *) final;
+  luci::CircleNode *visit(const luci::CircleSparseToDense *) final;
+  luci::CircleNode *visit(const luci::CircleSplit *) final;
+  luci::CircleNode *visit(const luci::CircleSplitV *) final;
+  luci::CircleNode *visit(const luci::CircleSqrt *) final;
+  luci::CircleNode *visit(const luci::CircleSquare *) final;
+  luci::CircleNode *visit(const luci::CircleSquaredDifference *) final;
+  luci::CircleNode *visit(const luci::CircleSqueeze *) final;
+  luci::CircleNode *visit(const luci::CircleStridedSlice *) final;
+  luci::CircleNode *visit(const luci::CircleSVDF *) final;
+  luci::CircleNode *visit(const luci::CircleSub *) final;
+  luci::CircleNode *visit(const luci::CircleSum *) final;
+  luci::CircleNode *visit(const luci::CircleTanh *) final;
+  luci::CircleNode *visit(const luci::CircleTile *) final;
+  luci::CircleNode *visit(const luci::CircleTopKV2 *) final;
+  luci::CircleNode *visit(const luci::CircleTranspose *) final;
+  luci::CircleNode *visit(const luci::CircleTransposeConv *) final;
+  luci::CircleNode *visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
+  luci::CircleNode *visit(const luci::CircleUnique *) final;
+  luci::CircleNode *visit(const luci::CircleUnpack *) final;
+
+  luci::CircleNode *visit(const luci::CircleNode *) final { return nullptr; }
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+template <> class CloneNodeLet<CN::WXYZ> final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNodeLet(loco::Graph *graph) : _graph(graph){};
+
+public:
+  luci::CircleNode *visit(const luci::CircleWhere *) final;
+  luci::CircleNode *visit(const luci::CircleWhile *) final;
+  luci::CircleNode *visit(const luci::CircleZerosLike *) final;
+
+  luci::CircleNode *visit(const luci::CircleNode *) final { return nullptr; }
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+class CloneNode final : public luci::CircleNodeVisitor<luci::CircleNode *>
+{
+public:
+  CloneNode(loco::Graph *graph) : _graph(graph){};
+
+public:
+  // Circle Only
+  luci::CircleNode *visit(const luci::CircleBCQFullyConnected *) final;
+  luci::CircleNode *visit(const luci::CircleBCQGather *) final;
+  luci::CircleNode *visit(const luci::CircleInstanceNorm *) final;
+
+  // NOTE CircleInput and CircleOutput are not handled here as these need
+  //      link with graph I/O
+
+  // Virtual
+  luci::CircleNode *visit(const luci::CircleCustomOut *) final;
+  luci::CircleNode *visit(const luci::CircleIfOut *) final;
+  // luci::CircleNode *visit(const luci::CircleInput *) final;
+  luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV4Out *) final;
+  luci::CircleNode *visit(const luci::CircleNonMaxSuppressionV5Out *) final;
+  // luci::CircleNode *visit(const luci::CircleOutput *) final;
+  luci::CircleNode *visit(const luci::CircleOutputDummy *) final;
+  luci::CircleNode *visit(const luci::CircleOutputExclude *) final;
+  luci::CircleNode *visit(const luci::CircleSplitOut *) final;
+  luci::CircleNode *visit(const luci::CircleSplitVOut *) final;
+  luci::CircleNode *visit(const luci::CircleTopKV2Out *) final;
+  luci::CircleNode *visit(const luci::CircleUniqueOut *) final;
+  luci::CircleNode *visit(const luci::CircleUnpackOut *) final;
+  luci::CircleNode *visit(const luci::CircleVariable *) final;
+  luci::CircleNode *visit(const luci::CircleWhileOut *) final;
+
+  // Handle in CircleNode
+  luci::CircleNode *visit(const luci::CircleNode *) final;
+
+  // NOTE CircleNodeVisitor will throw if not supported here
+
+protected:
+  loco::Graph *_graph = nullptr;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_CLONE_NODE_H__
diff --git a/compiler/luci/service/src/CircleNodeClone.cpp b/compiler/luci/service/src/CircleNodeClone.cpp
new file mode 100644
index 000000000..220c6096c
--- /dev/null
+++ b/compiler/luci/service/src/CircleNodeClone.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/CircleQuantParam.h"
+#include "luci/Service/CircleNodeClone.h"
+
+#include "CircleCloneNode.h"
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace luci
+{
+
+/**
+ * @note  Attributes of specific node type like keep_dims() of CircleSum are
+ *        not copied.
+ */
+void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst)
+{
+  assert(src != nullptr);
+  assert(dst != nullptr);
+
+  dst->name(src->name());
+  dst->dtype(src->dtype());
+
+  dst->rank(src->rank());
+  for (uint32_t i = 0; i < src->rank(); i++)
+  {
+    dst->dim(i) = src->dim(i);
+  }
+  dst->shape_status(src->shape_status());
+
+  // quantparam
+  copy_quantparam(src, dst);
+
+  // sparsity
+  const auto *sparsity = src->sparsityparam();
+  if (sparsity != nullptr)
+  {
+    auto sparam = std::make_unique<luci::SparsityParam>();
+    sparam->traversal_order = sparsity->traversal_order;
+    sparam->block_map = sparsity->block_map;
+    sparam->dim_metadata = sparsity->dim_metadata;
+
+    dst->sparsityparam(std::move(sparam));
+  }
+
+  // op version
+  dst->op_version(src->op_version());
+}
+
+/**
+ * @note  Each visit implementation must copy node specific attributes.
+ */
+luci::CircleNode *clone_node(const luci::CircleNode *node, loco::Graph *graph)
+{
+  if (node == nullptr || graph == nullptr)
+    return nullptr;
+
+  CloneNode cn(graph);
+  auto cloned = node->accept(&cn);
+  if (cloned != nullptr)
+    copy_common_attributes(node, cloned);
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleNodeClone.test.cpp b/compiler/luci/service/src/CircleNodeClone.test.cpp
new file mode 100644
index 000000000..5908eeb82
--- /dev/null
+++ b/compiler/luci/service/src/CircleNodeClone.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+// NOTE any node will do for testing
+#include <luci/IR/Nodes/CircleAdd.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleAdd *build_simple_add_graph(loco::Graph *g)
+{
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  node->name("name");
+  node->dtype(loco::DataType::FLOAT32);
+  node->rank(1);
+  node->dim(0).set(3);
+  node->shape_status(luci::ShapeStatus::VALID);
+  node->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  qparam->scale = {1.0};
+  qparam->zerop = {0};
+  qparam->min = {0.0};
+  qparam->max = {1.0};
+  qparam->quantized_dimension = 0;
+  node->quantparam(std::move(qparam));
+
+  auto sparam = std::make_unique<luci::SparsityParam>();
+  sparam->traversal_order = {0};
+  sparam->block_map = {0};
+  sparam->dim_metadata = {luci::DimMetaData(luci::DimensionType::DENSE, 1)};
+  node->sparsityparam(std::move(sparam));
+
+  node->op_version(2);
+
+  return node;
+}
+
+} // namespace
+
+TEST(CircleNodeCloneTest, copy_attribites)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  auto copy = g->nodes()->create<luci::CircleAdd>();
+  luci::copy_common_attributes(node, copy);
+
+  ASSERT_EQ(node->name(), copy->name());
+  ASSERT_EQ(node->dtype(), copy->dtype());
+  ASSERT_EQ(node->rank(), copy->rank());
+  ASSERT_EQ(node->shape_status(), copy->shape_status());
+
+  const auto *qparam_node = node->quantparam();
+  const auto *qparam_copy = copy->quantparam();
+  ASSERT_EQ(qparam_node->scale, qparam_copy->scale);
+
+  const auto *sparsity_node = node->sparsityparam();
+  const auto *sparsity_copy = copy->sparsityparam();
+  ASSERT_EQ(sparsity_node->traversal_order, sparsity_copy->traversal_order);
+
+  ASSERT_EQ(node->op_version(), copy->op_version());
+}
+
+TEST(CircleNodeCloneTest, clone_add_node)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  auto cg = loco::make_graph();
+  auto clone = clone_node(node, cg.get());
+
+  ASSERT_NE(nullptr, clone);
+  ASSERT_EQ(cg.get(), clone->graph());
+  ASSERT_EQ(node->name(), clone->name());
+  ASSERT_EQ(node->dtype(), clone->dtype());
+  ASSERT_EQ(node->rank(), clone->rank());
+  ASSERT_EQ(node->shape_status(), clone->shape_status());
+}
+
+TEST(CircleNodeCloneTest, clone_node_NEG)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  auto cg = loco::make_graph();
+  auto clone = luci::clone_node(nullptr, cg.get());
+  ASSERT_EQ(nullptr, clone);
+  auto clone2 = luci::clone_node(node, nullptr);
+  ASSERT_EQ(nullptr, clone2);
+}
diff --git a/compiler/luci/service/src/CircleShapeInference.cpp b/compiler/luci/service/src/CircleShapeInference.cpp
index 0732849db..73472069b 100644
--- a/compiler/luci/service/src/CircleShapeInference.cpp
+++ b/compiler/luci/service/src/CircleShapeInference.cpp
@@ -15,20 +15,73 @@
  */
 
 #include "luci/Service/CircleShapeInference.h"
-#include "luci/Service/ShapeDescription.h"
+
+#include "CircleShapeInferenceHelper.h"
 
 #include <loco.h>
-#include <loco/Service/ShapeInference.h>
+
+#include <luci/Log.h>
 
 #include <cassert>
+#include <iostream>
+
+namespace
+{
+
+std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape)
+{
+  os << "[";
+  for (uint32_t r = 0; r < tensor_shape.rank(); ++r)
+  {
+    if (r)
+      os << ",";
+
+    if (tensor_shape.dim(r).known())
+      os << tensor_shape.dim(r).value();
+    else
+      os << "?";
+  }
+  os << "]";
+  return os;
+}
+
+bool inputs_shape_ready(const luci::CircleNode *node)
+{
+  for (uint32_t arity = 0; arity < node->arity(); ++arity)
+  {
+    auto node_input = loco::must_cast<luci::CircleNode *>(node->arg(arity));
+    if (node_input->shape_status() == luci::ShapeStatus::UNDEFINED)
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace
 
 namespace luci
 {
+namespace sinf
+{
 
-ShapeDescription ShapeInference::get(loco::Node *node)
+bool Rule::infer(const luci::CircleNode *circle_node, loco::TensorShape &shape) const
 {
-  assert(loco::shape_known(node));
-  return to_shape_description(loco::shape_get(node));
+  LOGGER(l);
+  VERBOSE(l, 1) << "[CircleShapeInference] " << circle_node->name();
+  VERBOSE(l, 1) << "  before: " << circle_shape(circle_node);
+
+  if (!inputs_shape_ready(circle_node))
+  {
+    VERBOSE(l, 1) << " after: Some inputs are not ready for inference";
+    return false;
+  }
+
+  Algorithm alg;
+  shape = circle_node->accept(&alg);
+  VERBOSE(l, 1) << " after: " << shape;
+
+  return true;
 }
 
+} // namespace sinf
 } // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeInferenceHelper.cpp b/compiler/luci/service/src/CircleShapeInferenceHelper.cpp
new file mode 100644
index 000000000..2009aa59f
--- /dev/null
+++ b/compiler/luci/service/src/CircleShapeInferenceHelper.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleShapeInferenceHelper.h"
+
+namespace luci
+{
+
+loco::NodeShape shape_get(const loco::Node *node)
+{
+  assert(luci::shape_known(node));
+  return loco::NodeShape{sinf::circle_shape(loco::must_cast<const luci::CircleNode *>(node))};
+}
+
+bool shape_known(const loco::Node *node)
+{
+  return loco::must_cast<const luci::CircleNode *>(node)->shape_status() !=
+         luci::ShapeStatus::UNDEFINED;
+}
+
+} // namespace luci
+
+namespace luci
+{
+namespace sinf
+{
+
+loco::TensorShape circle_shape(const luci::CircleNode *node)
+{
+  loco::TensorShape shape;
+  shape.rank(node->rank());
+  for (uint32_t r = 0; r < node->rank(); ++r)
+    shape.dim(r) = node->dim(r);
+  return shape;
+}
+
+} // namespace sinf
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeInferenceHelper.h b/compiler/luci/service/src/CircleShapeInferenceHelper.h
new file mode 100644
index 000000000..7c7ea496c
--- /dev/null
+++ b/compiler/luci/service/src/CircleShapeInferenceHelper.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+
+#include <loco/IR/NodeShape.h>
+#include <loco/IR/TensorShape.h>
+
+#include <luci/IR/CircleNodes.h>
+
+namespace luci
+{
+
+// NOTE Functions in this namespace will be removed after new inference
+//      algorithms are fully implemented.
+
+// This function is temporary function for deprecating loco::shape_get
+loco::NodeShape shape_get(const loco::Node *node);
+
+// This function is temporary function for deprecating loco::shape_known
+bool shape_known(const loco::Node *node);
+
+} // namespace luci
+
+namespace luci
+{
+namespace sinf // Namespace for Shape Inference
+{
+
+// Return shape of circle node as loco::TensorShape
+loco::TensorShape circle_shape(const luci::CircleNode *node);
+
+} // namespace sinf
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
index db25186b1..d56886c97 100644
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +18,7 @@
 #include "luci/Service/CircleShapeInferenceRule.h"
 #include "Check.h"
 
+#include "CircleShapeInferenceHelper.h"
 #include "ShapeInfer_StridedSlice.h"
 
 #include <luci/IR/CircleNodes.h>
@@ -41,7 +43,11 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape
   {
     if (r)
       os << ",";
-    os << tensor_shape.dim(r).value();
+
+    if (tensor_shape.dim(r).known())
+      os << tensor_shape.dim(r).value();
+    else
+      os << "?";
   }
   os << "]";
   return os;
@@ -52,7 +58,15 @@ loco::TensorShape own_shape(const luci::CircleNode *node)
   loco::TensorShape shape;
   shape.rank(node->rank());
   for (uint32_t r = 0; r < node->rank(); ++r)
-    shape.dim(r) = loco::Dimension(node->dim(r).value());
+  {
+    // Shape inference rules in this file did not consider unknown dimension.
+    // If some node has unknown dimension, 0 is inserted and wrong shape
+    // inference was done as a result.
+    // To fix this, new shape inference algorithm is being implemented.
+    // Until new inference algorithm is fully implemented, unknown dimension
+    // would be represented as 1 along with TFLite expression.
+    shape.dim(r) = node->dim(r).known() ? node->dim(r).value() : 1;
+  }
   return shape;
 }
 
@@ -102,7 +116,7 @@ private:
 };
 
 /**
- * @breif  Expand shape x and y to same rank by align right and filling with 1
+ * @brief  Expand shape x and y to same rank by align right and filling with 1
  */
 void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
 {
@@ -122,7 +136,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
 }
 
 /**
- * @breif  Returns shape of expanded dimension of input x and y having same rank
+ * @brief  Returns shape of expanded dimension of input x and y having same rank
  */
 loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
 {
@@ -135,10 +149,8 @@ loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::Tenso
   output_shape.rank(rank);
   for (uint32_t axis = 0; axis < rank; ++axis)
   {
-    assert(x.dim(axis).known() && y.dim(axis).known());
-
-    auto x_dim = x.dim(axis).value();
-    auto y_dim = y.dim(axis).value();
+    auto x_dim = x.dim(axis).known() ? x.dim(axis).value() : 1;
+    auto y_dim = y.dim(axis).known() ? y.dim(axis).value() : 1;
 
     // each dimension of x and y should be same or one must be 1 if different
     if (!((x_dim == y_dim) || (x_dim == 1 || y_dim == 1)))
@@ -177,32 +189,34 @@ template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::Circ
 
 template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
 {
-  auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
-  auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>();
+  auto x_shape = luci::shape_get(node->x()).template as<loco::TensorShape>();
+  auto y_shape = luci::shape_get(node->y()).template as<loco::TensorShape>();
 
   auto output_shape = broadcast_shape(x_shape, y_shape);
 
   return loco::NodeShape{output_shape};
 }
 
-template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
-{
-  auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
-  return loco::NodeShape{x_shape};
-}
+#define DECLARE_USE_SINGLE(NAME)                                                        \
+  template <class CIRCLENODE> loco::NodeShape use_##NAME(const CIRCLENODE *node)        \
+  {                                                                                     \
+    auto inputs_shape = luci::shape_get(node->NAME()).template as<loco::TensorShape>(); \
+    return loco::NodeShape{inputs_shape};                                               \
+  }
 
-template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
-{
-  auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>();
-  return loco::NodeShape{shape};
-}
+DECLARE_USE_SINGLE(input);
+DECLARE_USE_SINGLE(inputs);
+DECLARE_USE_SINGLE(x);
+DECLARE_USE_SINGLE(logits);
+
+#undef DECLARE_USE_SINGLE
 
 template <class CIRCLENODE>
 loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings)
 {
   const loco::DataType S32 = loco::DataType::S32;
 
-  auto input_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
 
   // TODO support other data type
   LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
@@ -232,11 +246,11 @@ loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *pa
 
 loco::NodeShape infer_add_n(const luci::CircleAddN *node)
 {
-  auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+  auto shape = luci::shape_get(node->inputs(0)).as<loco::TensorShape>();
 
   for (uint32_t idx = 1; idx < node->arity(); ++idx)
   {
-    auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
+    auto shape_idx = luci::shape_get(node->inputs(idx)).as<loco::TensorShape>();
     if (!(shape == shape_idx))
     {
       INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
@@ -245,10 +259,10 @@ loco::NodeShape infer_add_n(const luci::CircleAddN *node)
   return loco::NodeShape{shape};
 }
 
-loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
+template <class CIRCLENODE> loco::NodeShape infer_arg_maxmin(const CIRCLENODE *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
+  auto dimension_shape = luci::shape_get(node->dimension()).template as<loco::TensorShape>();
 
   int64_t select_axis = 0;
   {
@@ -258,55 +272,19 @@ loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
     // Support S32 for now.
     auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
     LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
-                "Only support int32 CircleConst for CircleArgMax");
+                "Only support int32 CircleConst for CircleArgMax/CircleArgMin");
 
     if (const_shape_node->rank() > 1)
       INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
                      oops::to_uint32(const_shape_node->rank()));
 
-    select_axis = const_shape_node->scalar<loco::DataType::S32>();
+    select_axis = const_shape_node->template scalar<loco::DataType::S32>();
   }
-  assert(select_axis < input_shape.rank());
-  assert(select_axis >= 0); // TODO support minus of this breaks
-
-  // NOTE select_axis is removed
-  loco::TensorShape shape_output;
-  uint32_t rank = input_shape.rank();
-  uint32_t shrink = static_cast<uint32_t>(select_axis);
-  assert(rank > 0);
-  shape_output.rank(rank - 1);
-  for (uint32_t r = 0, d = 0; r < rank; ++r)
-  {
-    if (r == shrink)
-      continue;
-    shape_output.dim(d++) = input_shape.dim(r);
-  }
-  return loco::NodeShape{shape_output};
-}
-
-loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
-{
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
-
-  int64_t select_axis = 0;
-  {
-    LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
-
-    // Only support node's shape() is CircleConst with S32/S64
-    // Support S32 for now.
-    auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
-    LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
-                "Only support int32 CircleConst for CircleArgMin");
 
-    if (const_shape_node->rank() > 1)
-      INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
-                     oops::to_uint32(const_shape_node->rank()));
-
-    select_axis = const_shape_node->scalar<loco::DataType::S32>();
-  }
   assert(select_axis < input_shape.rank());
-  assert(select_axis >= 0); // TODO support minus of this breaks
+
+  if (select_axis < 0)
+    select_axis += static_cast<int64_t>(input_shape.rank());
 
   // NOTE select_axis is removed
   loco::TensorShape shape_output;
@@ -326,10 +304,10 @@ loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
 // Call this for CircleAvgPool2D and CircleMaxPool2D only
 template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
 {
-  LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
-
-  auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+  auto ifm_shape = luci::shape_get(node->value()).template as<loco::TensorShape>();
   assert(ifm_shape.rank() == 4);
+  assert(ifm_shape.dim(1).known());
+  assert(ifm_shape.dim(2).known());
 
   uint32_t input_height = ifm_shape.dim(1).value();
   uint32_t input_width = ifm_shape.dim(2).value();
@@ -347,6 +325,8 @@ template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType
 
   if (node->padding() == luci::Padding::VALID)
   {
+    LUCI_ASSERT(input_height + stride_height > effective_window_height, "Invalid shape");
+    LUCI_ASSERT(input_width + stride_width > effective_window_width, "Invalid shape");
     output_height = (input_height + stride_height - effective_window_height) / stride_height;
     output_width = (input_width + stride_width - effective_window_width) / stride_width;
   }
@@ -372,7 +352,7 @@ loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node)
 {
   const loco::DataType S32 = loco::DataType::S32;
 
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
   // Support only input rank is 3 and 4
   assert(input_shape.rank() == 3 || input_shape.rank() == 4);
 
@@ -384,8 +364,8 @@ loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node)
   auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
   LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
 
-  auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
-  auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
+  auto const_block_shape_shape = luci::shape_get(const_block_shape).as<loco::TensorShape>();
+  auto const_crops_shape = luci::shape_get(const_crops).as<loco::TensorShape>();
   assert(const_block_shape_shape.rank() == 1);
   assert(const_crops_shape.rank() == 2);
 
@@ -423,10 +403,14 @@ struct OutputSize
 
 template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node)
 {
-  auto ifm_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
-  auto ker_shape = loco::shape_get(node->filter()).template as<loco::TensorShape>();
+  auto ifm_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
+  auto ker_shape = luci::shape_get(node->filter()).template as<loco::TensorShape>();
   assert(ifm_shape.rank() == 4);
   assert(ker_shape.rank() == 4);
+  assert(ifm_shape.dim(1).known());
+  assert(ifm_shape.dim(2).known());
+  assert(ker_shape.dim(1).known());
+  assert(ker_shape.dim(2).known());
 
   uint32_t input_height = ifm_shape.dim(1).value();
   uint32_t input_width = ifm_shape.dim(2).value();
@@ -444,6 +428,8 @@ template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node)
 
   if (node->padding() == luci::Padding::VALID)
   {
+    LUCI_ASSERT(input_height + stride_height > effective_ker_height, "Invalid shape");
+    LUCI_ASSERT(input_width + stride_width > effective_ker_width, "Invalid shape");
     output_height = (input_height + stride_height - effective_ker_height) / stride_height;
     output_width = (input_width + stride_width - effective_ker_width) / stride_width;
   }
@@ -496,7 +482,7 @@ loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
   loco::Dimension y_lhs = adj_y ? y_shape.dim(y_rank - 1) : y_shape.dim(y_rank - 2);
   loco::Dimension y_rhs = adj_y ? y_shape.dim(y_rank - 2) : y_shape.dim(y_rank - 1);
 
-  if (not(x_rhs == y_lhs))
+  if (x_rhs.known() && y_lhs.known() && not(x_rhs == y_lhs))
     INTERNAL_EXN("x_rhs and y_lhs should be same");
 
   uint32_t out_rank = output_shape.rank();
@@ -511,7 +497,7 @@ loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
   // TODO Support when CircleConcatenation has 0 input
   assert(node->numValues() > 0);
 
-  auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+  auto first_shape = luci::shape_get(node->values(0)).as<loco::TensorShape>();
   auto axis = node->axis();
   if (axis < 0)
     axis += first_shape.rank();
@@ -527,14 +513,20 @@ loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
 
   for (uint32_t i = 1; i < node->numValues(); ++i)
   {
-    auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->values(i)).as<loco::TensorShape>();
 
     for (uint32_t j = 0; j < output_shape.rank(); ++j)
     {
       if (j == static_cast<uint32_t>(axis))
+      {
+        // If dimension is unknown, value() will return 0.
+        // This is wrong but until new inference algorithm is implemented,
+        // this code will not be modified to keep compatibility.
         output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+      }
       else
-        assert(output_shape.dim(j) == input_shape.dim(j));
+        assert(!output_shape.dim(j).known() || !input_shape.dim(j).known() ||
+               output_shape.dim(j) == input_shape.dim(j));
     }
   }
 
@@ -545,11 +537,8 @@ loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
 {
   LOGGER(l);
 
-  auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
-  auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
-
-  INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank()
-          << ")" << std::endl;
+  auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
+  auto ker_shape = luci::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
 
   assert(ifm_shape.rank() == 4);
   assert(ker_shape.rank() == 4);
@@ -564,12 +553,17 @@ loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
   ofm_shape.dim(2) = os.width;
   ofm_shape.dim(3) = ker_shape.dim(0);
 
+  INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank()
+          << ") output(" << ofm_shape.dim(0).value() << "," << ofm_shape.dim(1).value() << ","
+          << ofm_shape.dim(2).value() << "," << ofm_shape.dim(3).value() << ") " << node->name()
+          << std::endl;
+
   return loco::NodeShape{ofm_shape};
 }
 
 loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
   LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
 
   // Only data format NHWC is supported
@@ -601,12 +595,13 @@ loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
 
 loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
 {
-  auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
-  auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+  auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
+  auto ker_shape = luci::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
 
   assert(ifm_shape.rank() == 4);
   assert(ker_shape.rank() == 4);
   assert(ker_shape.dim(0).value() == 1);
+  assert(ifm_shape.dim(3).value() * node->depthMultiplier() == ker_shape.dim(3).value());
 
   auto os = infer_conv2d_type(node);
 
@@ -623,7 +618,7 @@ loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
 loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
 {
   const loco::DataType S32 = loco::DataType::S32;
-  auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto x_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
   if (x_shape.rank() == 0)
   {
     // This maybe for unknown shape. We use shape from the node itself.
@@ -637,7 +632,7 @@ loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
   }
   int32_t axis = const_axis->at<S32>(0);
   LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
-                  (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
+                (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
               "Axis has to be between [-(D+1), D], where D is rank of input.");
   size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
   loco::TensorShape output_shape;
@@ -684,29 +679,41 @@ loco::NodeShape infer_fill(const luci::CircleFill *node)
 
 loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto weights_shape = luci::shape_get(node->weights()).as<loco::TensorShape>();
+
+  loco::TensorShape out_shape;
 
-  // Checking shape capability for fully connected layer
-  // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
-  // Weight: [# of units, K]
-  // Output: [D1 * D2 * ... * Dn / K, # of units]
-  if (input_shape.rank() < 2 || weights_shape.rank() != 2)
+  // NOTE Some recipes in some repositories are using rank 4 input for FullyConnected.
+  //      Until they are all fixed, disable following assert.
+  // TODO Enable following assert after related fixes are applied
+  // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L194
+  // LUCI_ASSERT(input_shape.rank() == 2 || input_shape.rank() == 3,
+  //             "Input rank of FullyConnected should be 2 or 3");
+
+  // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L225
+  LUCI_ASSERT(weights_shape.rank() == 2, "Weights of FullyConnected should be 2");
+
+  // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L353-L367
+  if (node->keep_num_dims())
   {
-    // Return node own shape if shape inference is not possible
-    return use_own(node);
+    out_shape.rank(input_shape.rank());
+    for (uint32_t i = 0; i < input_shape.rank(); ++i)
+      out_shape.dim(i) = input_shape.dim(i);
+    out_shape.dim(out_shape.rank() - 1) = weights_shape.dim(0);
   }
-
-  uint32_t input_size = 1;
-  for (uint32_t i = 0; i < input_shape.rank(); i++)
+  else
   {
-    input_size = input_size * input_shape.dim(i).value();
+    uint32_t input_size = 1;
+    for (uint32_t i = 0; i < input_shape.rank(); i++)
+    {
+      input_size = input_size * input_shape.dim(i).value();
+    }
+    const uint32_t batch_size = input_size / weights_shape.dim(1).value();
+    out_shape.rank(2);
+    out_shape.dim(0) = batch_size;
+    out_shape.dim(1) = weights_shape.dim(0);
   }
-  const uint32_t batch_size = input_size / weights_shape.dim(1).value();
-  loco::TensorShape out_shape;
-  out_shape.rank(2);
-  out_shape.dim(0) = batch_size;
-  out_shape.dim(1) = weights_shape.dim(0);
 
   return loco::NodeShape{out_shape};
 }
@@ -715,8 +722,8 @@ loco::NodeShape infer_gather(const luci::CircleGather *node)
 {
   loco::TensorShape output_shape;
 
-  const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
-  const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  const auto input_shape = luci::shape_get(node->params()).as<loco::TensorShape>();
+  const auto positions_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
   int32_t axis = node->axis();
 
   // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
@@ -743,8 +750,8 @@ loco::NodeShape infer_gather_nd(const luci::CircleGatherNd *node)
 {
   loco::TensorShape output_shape;
 
-  const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
-  const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  const auto params_shape = luci::shape_get(node->params()).as<loco::TensorShape>();
+  const auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
 
   const auto params_rank = params_shape.rank();
   const auto indices_rank = indices_shape.rank();
@@ -791,7 +798,7 @@ loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node)
 {
   loco::TensorShape output_shape;
 
-  auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+  auto diagonal_shape = luci::shape_get(node->diagonal()).as<loco::TensorShape>();
   auto rank = diagonal_shape.rank();
 
   output_shape.rank(rank + 1);
@@ -808,8 +815,8 @@ loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node)
 
 loco::NodeShape infer_matrix_set_diag(const luci::CircleMatrixSetDiag *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto diagonal_shape = luci::shape_get(node->diagonal()).as<loco::TensorShape>();
 
   auto rank = diagonal_shape.rank();
 
@@ -831,7 +838,7 @@ loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indic
 {
   const loco::DataType S32 = loco::DataType::S32;
 
-  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(input).as<loco::TensorShape>();
   auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
 
   { // Exceptions
@@ -892,7 +899,7 @@ loco::NodeShape infer_mirror_pad(const luci::CircleMirrorPad *node)
 loco::NodeShape infer_one_hot(const luci::CircleOneHot *node)
 {
   const loco::DataType S32 = loco::DataType::S32;
-  auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
   // Only support OneHot node's depth() is CircleConst with type S32
   // TODO support depth with other types
   auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
@@ -925,11 +932,11 @@ loco::NodeShape infer_pack(const luci::CirclePack *node)
 {
   LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
 
-  auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+  auto first_shape = luci::shape_get(node->values(0)).as<loco::TensorShape>();
   // Make sure all inputs have the same shape.
   for (uint32_t i = 1; i < node->values_count(); ++i)
   {
-    auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+    auto in_shape = luci::shape_get(node->values(i)).as<loco::TensorShape>();
     LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
                 "All inputs must have the same shape");
   }
@@ -985,8 +992,8 @@ loco::NodeShape infer_pad_v2(const luci::CirclePadV2 *node)
 
 loco::NodeShape infer_p_relu(const luci::CirclePRelu *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto alpha_shape = luci::shape_get(node->alpha()).as<loco::TensorShape>();
 
   auto output_shape = broadcast_shape(input_shape, alpha_shape);
 
@@ -1087,10 +1094,12 @@ loco::NodeShape infer_reshape(const luci::CircleReshape *node)
   loco::TensorShape output_shape = shape_by_input;
 
   // One of the dimensions can have special value -1, meaning its actual value should be inferred.
-  const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
-  const uint32_t input_element_count = loco::element_count(&input_shape);
+  const auto input_shape = luci::shape_get(node->tensor()).as<loco::TensorShape>();
+  uint32_t input_element_count = 1;
   uint32_t output_element_count = 1;
   uint32_t unknown_dim_index = UINT32_MAX;
+  for (uint32_t i = 0; i < input_shape.rank(); ++i)
+    input_element_count *= (input_shape.dim(i).known() ? input_shape.dim(i).value() : 1);
   for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
   {
     const uint32_t dim_value = output_shape.dim(dim_index).value();
@@ -1112,17 +1121,17 @@ loco::NodeShape infer_reshape(const luci::CircleReshape *node)
   return loco::NodeShape{output_shape};
 }
 
-loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
+template <class CIRCLENODE> loco::NodeShape infer_resize_type(const CIRCLENODE *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
 
   if (input_shape.rank() != 4)
-    INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
+    INTERNAL_EXN("Expected input to have rank 4");
 
   auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
 
   if (const_node->dtype() != loco::DataType::S32)
-    INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
+    INTERNAL_EXN("Only S32 datatype is supported for size");
 
   if (const_node->rank() != 1)
     INTERNAL_EXN("Expected size tensor of rank 1");
@@ -1133,36 +1142,8 @@ loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
   loco::TensorShape output_shape;
   output_shape.rank(4);
   output_shape.dim(0) = input_shape.dim(0);
-  output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
-  output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
-  output_shape.dim(3) = input_shape.dim(3);
-
-  return loco::NodeShape{output_shape};
-}
-
-loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
-{
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-  if (input_shape.rank() != 4)
-    INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
-
-  auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
-  if (const_node->dtype() != loco::DataType::S32)
-    INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
-
-  if (const_node->rank() != 1)
-    INTERNAL_EXN("Expected size tensor of rank 1");
-
-  if (const_node->dim(0).value() != 2)
-    INTERNAL_EXN("Expected size tensor with shape [2]");
-
-  loco::TensorShape output_shape;
-  output_shape.rank(4);
-  output_shape.dim(0) = input_shape.dim(0);
-  output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
-  output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+  output_shape.dim(1) = const_node->template at<loco::DataType::S32>(0);
+  output_shape.dim(2) = const_node->template at<loco::DataType::S32>(1);
   output_shape.dim(3) = input_shape.dim(3);
 
   return loco::NodeShape{output_shape};
@@ -1195,8 +1176,8 @@ loco::NodeShape infer_scatter_nd(const luci::CircleScatterNd *node)
 
 loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-  auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto segment_shape = luci::shape_get(node->segment_ids()).as<loco::TensorShape>();
 
   LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
   LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
@@ -1226,11 +1207,11 @@ loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
 
 loco::NodeShape infer_select(const luci::CircleSelect *node)
 {
-  auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
-  assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
+  auto t_shape = luci::shape_get(node->t()).as<loco::TensorShape>();
+  assert(t_shape == luci::shape_get(node->e()).as<loco::TensorShape>());
 
   // condition shape validation
-  auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+  auto c_shape = luci::shape_get(node->condition()).as<loco::TensorShape>();
   if (c_shape.rank() != t_shape.rank())
   {
     if (c_shape.rank() != 0 && c_shape.rank() != 1)
@@ -1248,9 +1229,9 @@ loco::NodeShape infer_select(const luci::CircleSelect *node)
 
 loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
 {
-  auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
-  auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
-  auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
+  auto c_shape = luci::shape_get(node->condition()).as<loco::TensorShape>();
+  auto t_shape = luci::shape_get(node->t()).as<loco::TensorShape>();
+  auto e_shape = luci::shape_get(node->e()).as<loco::TensorShape>();
 
   // validate ability to broadcast shapes to each other
   auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
@@ -1259,7 +1240,7 @@ loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
 
 loco::NodeShape infer_shape(const luci::CircleShape *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
 
   loco::TensorShape output_shape;
 
@@ -1274,7 +1255,7 @@ loco::NodeShape infer_slice(const luci::CircleSlice *node)
   const loco::DataType S32 = loco::DataType::S32;
   const loco::DataType S64 = loco::DataType::S64;
 
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
 
   auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
   auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
@@ -1306,7 +1287,7 @@ loco::NodeShape infer_slice(const luci::CircleSlice *node)
     auto size = vect_size.at(idx);
     if (size == -1)
     {
-      size = input_shape.dim(idx).value() - vect_begin.at(idx);
+      size = static_cast<int64_t>(input_shape.dim(idx).value()) - vect_begin.at(idx);
     }
     output_shape.dim(idx) = size;
   }
@@ -1318,7 +1299,7 @@ loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
 {
   const loco::DataType S32 = loco::DataType::S32;
 
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
   // Support only input rank is 3 and 4
   assert(input_shape.rank() == 3 || input_shape.rank() == 4);
 
@@ -1330,8 +1311,8 @@ loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
   auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
   LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
 
-  auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
-  auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
+  auto const_block_shape_shape = luci::shape_get(const_block_shape).as<loco::TensorShape>();
+  auto const_paddings_shape = luci::shape_get(const_paddings).as<loco::TensorShape>();
   assert(const_block_shape_shape.rank() == 1);
   assert(const_paddings_shape.rank() == 2);
 
@@ -1374,7 +1355,7 @@ loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
 
 loco::NodeShape infer_space_to_depth(const luci::CircleSpaceToDepth *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
   LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
 
   // Only data format NHWC is supported
@@ -1412,19 +1393,33 @@ loco::NodeShape infer_sparse_to_dense(const luci::CircleSparseToDense *node)
     auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
     if (output_shape_node != nullptr)
     {
-      // Only support node with S32
-      LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
-                  "Only support int32 CircleConst");
+      const auto output_shape_type = output_shape_node->dtype();
 
       if (output_shape_node->rank() != 1)
         INTERNAL_EXN_V("Only support rank 1 CircleConst",
                        oops::to_uint32(output_shape_node->rank()));
 
-      shape.rank(output_shape_node->size<loco::DataType::S32>());
+      if (output_shape_type == loco::DataType::S32)
+      {
+        shape.rank(output_shape_node->size<loco::DataType::S32>());
+
+        for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+        {
+          shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+        }
+      }
+      else if (output_shape_type == loco::DataType::S64)
+      {
+        shape.rank(output_shape_node->size<loco::DataType::S64>());
 
-      for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+        for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+        {
+          shape.dim(axis) = output_shape_node->at<loco::DataType::S64>(axis);
+        }
+      }
+      else
       {
-        shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+        INTERNAL_EXN("Output shape of SparseToDense must be either int32 or int64");
       }
     }
     else
@@ -1453,7 +1448,7 @@ loco::NodeShape infer_strided_slice(const luci::CircleStridedSlice *node)
 
 loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
 
   // TODO input shape may be unknown before runtime
   std::vector<bool> do_squeeze(input_shape.rank(), false);
@@ -1504,11 +1499,35 @@ loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
   return loco::NodeShape{output_shape};
 }
 
+loco::NodeShape infer_svdf(const luci::CircleSVDF *node)
+{
+  const auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  const auto weight_feature_shape = luci::shape_get(node->weight_feature()).as<loco::TensorShape>();
+
+  assert(ifm_shape.rank() == 2);
+  assert(weight_feature_shape.rank() == 2);
+
+  assert(ifm_shape.dim(1) == weight_feature_shape.dim(1));
+  assert(weight_feature_shape.dim(0).known());
+
+  const auto rank = node->svdf_rank();
+  const auto num_filters = weight_feature_shape.dim(0).value();
+  assert(num_filters % rank == 0);
+  const auto num_units = num_filters / rank;
+
+  loco::TensorShape ofm_shape;
+  ofm_shape.rank(2);
+  ofm_shape.dim(0) = ifm_shape.dim(0);
+  ofm_shape.dim(1) = num_units;
+
+  return loco::NodeShape{ofm_shape};
+}
+
 loco::NodeShape infer_tile(const luci::CircleTile *node)
 {
   const loco::DataType S32 = loco::DataType::S32;
 
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
   auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
 
   // TODO support non-const case
@@ -1534,7 +1553,7 @@ loco::NodeShape infer_tile(const luci::CircleTile *node)
 
 loco::NodeShape infer_transpose(const luci::CircleTranspose *node)
 {
-  auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->a()).as<loco::TensorShape>();
 
   auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
 
@@ -1576,7 +1595,7 @@ loco::NodeShape infer_unpack(const luci::CircleUnpack *node)
   // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
   // We'll set shape of CircleUnpack to shape of actual outputs
   // TODO fix this if any problem rises
-  auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
+  auto value_shape = luci::shape_get(node->value()).as<loco::TensorShape>();
 
   auto axis = node->axis();
   auto num = node->num();
@@ -1608,9 +1627,25 @@ loco::NodeShape infer_unpack(const luci::CircleUnpack *node)
   return loco::NodeShape{output_shape};
 }
 
+loco::NodeShape infer_unidirectionalsequencelstm(const luci::CircleUnidirectionalSequenceLSTM *node)
+{
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto recurrent_to_output_weights =
+    luci::shape_get(node->recurrent_to_output_weights()).as<loco::TensorShape>();
+  auto rank = input_shape.rank();
+  loco::TensorShape output_shape;
+  output_shape.rank(rank);
+  for (uint32_t i = 0; i < rank - 1; i++)
+  {
+    output_shape.dim(i) = input_shape.dim(i);
+  }
+  output_shape.dim(rank - 1) = recurrent_to_output_weights.dim(1);
+  return loco::NodeShape{output_shape};
+}
+
 loco::NodeShape infer_unique(const luci::CircleUnique *node)
 {
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
 
   assert(input_shape.rank() == 1);
 
@@ -1625,7 +1660,7 @@ loco::NodeShape infer_bcq_fully_connected(const luci::CircleBCQFullyConnected *n
 {
   loco::TensorShape out_shape;
 
-  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
   auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
 
   LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
@@ -1648,8 +1683,8 @@ loco::NodeShape infer_bcq_gather(const luci::CircleBCQGather *node)
   loco::TensorShape input_shape;
   loco::TensorShape output_shape;
 
-  const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
-  const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  const auto input_binary_shape = luci::shape_get(node->input_binary()).as<loco::TensorShape>();
+  const auto indices_shape = luci::shape_get(node->indices()).as<loco::TensorShape>();
   auto axis = node->axis();
 
   auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
@@ -1696,46 +1731,6 @@ loco::NodeShape infer_output(const luci::CircleOutput *node)
   return loco::NodeShape{*output_shape};
 }
 
-loco::NodeShape infer_if_out(const luci::CircleIfOut *node)
-{
-  /**
-   * @note  IF operator type and shape are that of the "then" and "else"
-   *        Graph Outputs.
-   */
-  auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
-  if (circle_if == nullptr)
-  {
-    INTERNAL_EXN("CircleIf IR is not configured correctly");
-  }
-
-  auto index = node->index();
-  auto then_graph = circle_if->then_graph();
-  auto else_graph = circle_if->else_graph();
-  assert(then_graph != nullptr);
-  assert(else_graph != nullptr);
-
-  // shape and type are assumed to be same
-  // these are checked at post_import_graph() in Import
-  auto then_outputs = loco::output_nodes(then_graph);
-  auto else_outputs = loco::output_nodes(else_graph);
-  assert(then_outputs.size() == else_outputs.size());
-  assert(index < static_cast<int32_t>(then_outputs.size()));
-
-  auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
-  auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
-  auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
-  auto else_graph_outputs = else_graph->outputs();
-  assert(then_graph_outputs->size() == else_graph_outputs->size());
-
-  auto then_graph_output = then_graph_outputs->at(then_out->index());
-  auto else_graph_output = else_graph_outputs->at(else_out->index());
-  (void)else_graph_output; // make compiler happy for unused variable warnings
-  assert(*then_graph_output->shape() == *else_graph_output->shape());
-
-  return loco::NodeShape{*then_graph_output->shape()};
-}
-
 loco::NodeShape infer_non_max_suppression_v4_out(const luci::CircleNonMaxSuppressionV4Out *node)
 {
   const loco::DataType S32 = loco::DataType::S32;
@@ -1802,7 +1797,7 @@ loco::NodeShape infer_split_out(const luci::CircleSplitOut *node)
 
   loco::NodeShape unknown;
 
-  auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+  auto split_shape = luci::shape_get(split).as<loco::TensorShape>();
 
   auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
   if (split_dim == nullptr)
@@ -1836,7 +1831,7 @@ loco::NodeShape infer_split_v_out(const luci::CircleSplitVOut *node)
 
   loco::NodeShape unknown;
 
-  auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+  auto split_shape = luci::shape_get(split).as<loco::TensorShape>();
 
   auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
   if (size_splits == nullptr)
@@ -1879,7 +1874,7 @@ loco::NodeShape infer_split_v_out(const luci::CircleSplitVOut *node)
   assert(0 <= index_this && index_this < split->num_split());
   auto split_depth = size_splits->at<S32>(index_this);
   if (split_depth == -1)
-    split_depth = input_size - size_splits_sum;
+    split_depth = static_cast<int32_t>(input_size) - static_cast<int32_t>(size_splits_sum);
 
   loco::TensorShape output_shape = split_shape;
 
@@ -1897,7 +1892,7 @@ loco::NodeShape infer_top_k_v2_out(const luci::CircleTopKV2Out *node)
     INTERNAL_EXN("CircleSplit IR is not configured correctly");
 
   // shape of topkv2 is same as topkv2->input()
-  auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(topkv2).as<loco::TensorShape>();
 
   auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
   LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
@@ -1924,7 +1919,7 @@ loco::NodeShape infer_unique_out(const luci::CircleUniqueOut *node)
   }
   assert(node->index() == 1);
   auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
-  auto unique_shape = loco::shape_get(unique->input()).as<loco::TensorShape>();
+  auto unique_shape = luci::shape_get(unique->input()).as<loco::TensorShape>();
 
   assert(unique_shape.rank() == 1);
 
@@ -1942,7 +1937,7 @@ loco::NodeShape infer_unpack_out(const luci::CircleUnpackOut *node)
     INTERNAL_EXN("CircleUnpack IR is not configured correctly");
   }
 
-  auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
+  auto unpack_shape = luci::shape_get(unpack).as<loco::TensorShape>();
 
   return loco::NodeShape{unpack_shape};
 }
@@ -1998,9 +1993,9 @@ public:
 
   loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); }
 
-  loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); }
+  loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_maxmin(node); }
 
-  loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); }
+  loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_maxmin(node); }
 
   loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
   {
@@ -2009,8 +2004,8 @@ public:
 
   loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
   {
-    auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
-    auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+    auto x_shape = luci::shape_get(node->x()).as<loco::TensorShape>();
+    auto y_shape = luci::shape_get(node->y()).as<loco::TensorShape>();
 
     return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
   }
@@ -2037,6 +2032,8 @@ public:
 
   loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
 
+  loco::NodeShape visit(const luci::CircleDensify *node) final { return use_input(node); }
+
   loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
   {
     return infer_depth_to_space(node);
@@ -2047,11 +2044,17 @@ public:
     return infer_depthwise_conv2d(node);
   }
 
+  loco::NodeShape visit(const luci::CircleDequantize *node) final
+  {
+    const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+    return loco::NodeShape{input_shape};
+  }
+
   loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
 
   loco::NodeShape visit(const luci::CircleElu *node) final
   {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
 
     return loco::NodeShape{input_shape};
   }
@@ -2065,6 +2068,8 @@ public:
     return infer_expand_dims(node);
   }
 
+  loco::NodeShape visit(const luci::CircleFakeQuant *node) final { return use_inputs(node); }
+
   loco::NodeShape visit(const luci::CircleFill *node) final { return infer_fill(node); }
 
   loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
@@ -2082,15 +2087,29 @@ public:
 
   loco::NodeShape visit(const luci::CircleGatherNd *node) final { return infer_gather_nd(node); }
 
+  loco::NodeShape visit(const luci::CircleGelu *node) final
+  {
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
+
+    return loco::NodeShape{input_shape};
+  }
+
   loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
 
   loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
 
+  loco::NodeShape visit(const luci::CircleHardSwish *node) final
+  {
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
+
+    return loco::NodeShape{input_shape};
+  }
+
   loco::NodeShape visit(const luci::CircleIf *node) final
   {
     // Shape of CircleIf is not used. Just use input 0
     assert(node->input_count() > 0);
-    const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input(0)).as<loco::TensorShape>();
     return loco::NodeShape{input_shape};
   }
 
@@ -2103,7 +2122,7 @@ public:
 
   loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
   {
-    const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
     return loco::NodeShape{input_shape};
   }
 
@@ -2113,7 +2132,7 @@ public:
 
   loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
   {
-    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
     return loco::NodeShape{input_shape};
   }
 
@@ -2162,13 +2181,13 @@ public:
 
   loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
   {
-    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    const auto boxes_shape = luci::shape_get(node->boxes()).as<loco::TensorShape>();
     return loco::NodeShape{boxes_shape};
   }
 
   loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5 *node) final
   {
-    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    const auto boxes_shape = luci::shape_get(node->boxes()).as<loco::TensorShape>();
     return loco::NodeShape{boxes_shape};
   }
 
@@ -2186,6 +2205,12 @@ public:
 
   loco::NodeShape visit(const luci::CirclePRelu *node) final { return infer_p_relu(node); }
 
+  loco::NodeShape visit(const luci::CircleQuantize *node) final
+  {
+    const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+    return loco::NodeShape{input_shape};
+  }
+
   loco::NodeShape visit(const luci::CircleRange *node) final { return infer_range(node); }
 
   loco::NodeShape visit(const luci::CircleRank *) final
@@ -2222,21 +2247,21 @@ public:
 
   loco::NodeShape visit(const luci::CircleRelu *node) final
   {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
 
     return loco::NodeShape{input_shape};
   }
 
   loco::NodeShape visit(const luci::CircleRelu6 *node) final
   {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
 
     return loco::NodeShape{input_shape};
   }
 
   loco::NodeShape visit(const luci::CircleReluN1To1 *node) final
   {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->features()).as<loco::TensorShape>();
 
     return loco::NodeShape{input_shape};
   }
@@ -2252,17 +2277,17 @@ public:
 
   loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
   {
-    return infer_resize_bilinear(node);
+    return infer_resize_type(node);
   }
 
   loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
   {
-    return infer_resize_nearest_neighbor(node);
+    return infer_resize_type(node);
   }
 
   loco::NodeShape visit(const luci::CircleReverseSequence *node) final
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
 
     return loco::NodeShape{input_shape};
   }
@@ -2271,9 +2296,9 @@ public:
 
   loco::NodeShape visit(const luci::CircleReverseV2 *node) final
   {
-    auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->tensor()).as<loco::TensorShape>();
 
-    LUCI_ASSERT(loco::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
+    LUCI_ASSERT(luci::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
                 "Tensor must be 1-D");
 
     return loco::NodeShape{input_shape};
@@ -2318,14 +2343,14 @@ public:
   loco::NodeShape visit(const luci::CircleSplit *node) final
   {
     // We'll set Split output as same as input so that SplitOut can handle it's own shape
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
     return loco::NodeShape{input_shape};
   }
 
   loco::NodeShape visit(const luci::CircleSplitV *node) final
   {
     // We'll set SplitV output as same as input so that SplitOut can handle it's own shape
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
     return loco::NodeShape{input_shape};
   }
 
@@ -2353,6 +2378,8 @@ public:
     return loco::NodeShape{output_shape};
   }
 
+  loco::NodeShape visit(const luci::CircleSVDF *node) final { return infer_svdf(node); }
+
   loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); }
 
   loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); }
@@ -2360,7 +2387,7 @@ public:
   loco::NodeShape visit(const luci::CircleTopKV2 *node) final
   {
     // set shape of this node as same as input
-    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
     return loco::NodeShape{input_shape};
   }
 
@@ -2373,6 +2400,11 @@ public:
 
   loco::NodeShape visit(const luci::CircleUnpack *node) final { return infer_unpack(node); }
 
+  loco::NodeShape visit(const luci::CircleUnidirectionalSequenceLSTM *node) final
+  {
+    return infer_unidirectionalsequencelstm(node);
+  }
+
   loco::NodeShape visit(const luci::CircleUnique *node) final { return infer_unique(node); }
 
   loco::NodeShape visit(const luci::CircleWhere *node) final { return use_own(node); }
@@ -2381,13 +2413,13 @@ public:
   {
     // Shape of CircleWhile is not used. Just use input 0
     assert(node->arity() > 0);
-    const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+    const auto input_shape = luci::shape_get(node->input(0)).as<loco::TensorShape>();
     return loco::NodeShape{input_shape};
   }
 
   loco::NodeShape visit(const luci::CircleZerosLike *node) final
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
 
     return loco::NodeShape{input_shape};
   }
@@ -2402,7 +2434,7 @@ public:
 
   loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
 
     return loco::NodeShape{input_shape};
   }
@@ -2418,8 +2450,6 @@ public:
 
   loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); }
 
-  loco::NodeShape visit(const luci::CircleIfOut *node) final { return infer_if_out(node); }
-
   loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
   {
     return infer_non_max_suppression_v4_out(node);
@@ -2443,6 +2473,8 @@ public:
 
   loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); }
 
+  loco::NodeShape visit(const luci::CircleVariable *node) final { return use_own(node); }
+
   loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); }
 };
 
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp
deleted file mode 100644
index ac27db3bd..000000000
--- a/compiler/luci/service/src/CircleShapeInferenceRule.test.cpp
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TestGraph.h"
-#include "luci/Service/CircleShapeInferenceRule.h"
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleDialect.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/CanonicalShapeInferenceRule.h>
-#include <loco/Service/MultiDialectShapeInferenceRule.h>
-
-#include <oops/InternalExn.h>
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-namespace
-{
-
-bool shape_pass(loco::Graph *g)
-{
-  loco::CanonicalShapeInferenceRule canonical_rule;
-  luci::CircleShapeInferenceRule circle_rule;
-  loco::MultiDialectShapeInferenceRule rules;
-
-  rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(luci::CircleDialect::get(), &circle_rule);
-
-  return loco::apply(&rules).to(g);
-}
-
-} // namespace
-
-TEST(CircleShapeInferenceRuleTest, minimal_with_CircleRelu)
-{
-  // Create a simple network
-  luci::test::TestGraph graph;
-  auto relu_node = graph.append<luci::CircleRelu>(graph.input_node);
-  graph.complete(relu_node);
-
-  // set shape
-  {
-    graph.input_node->rank(2);
-    graph.input_node->dim(0) = 3;
-    graph.input_node->dim(1) = 4;
-
-    graph.output_node->rank(2);
-    graph.output_node->dim(0) = 3;
-    graph.output_node->dim(1) = 4;
-
-    luci::test::graph_input_shape(graph.input_node);
-    luci::test::graph_output_shape(graph.output_node);
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(relu_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(relu_node));
-    ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(relu_node).domain());
-
-    auto shape = loco::shape_get(relu_node).as<loco::TensorShape>();
-    ASSERT_EQ(2, shape.rank());
-    ASSERT_EQ(3, shape.dim(0));
-    ASSERT_EQ(4, shape.dim(1));
-  }
-}
-
-// based on the case shown in
-// https://www.corvil.com/kb/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-tensorflow
-TEST(CircleShapeInferenceRuleTest, avgpool2d_valid)
-{
-  luci::test::TestGraph graph;
-  auto avg_node = graph.append<luci::CircleAveragePool2D>(graph.input_node);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({1, 4, 3, 1});
-    luci::test::graph_input_shape(input_node);
-  }
-  auto output_node = graph.output_node;
-  {
-    output_node->shape({1, 2, 1, 1});
-    luci::test::graph_output_shape(output_node);
-  }
-  // setting CircleAveragePool2D
-  {
-    avg_node->filter()->h(2);
-    avg_node->filter()->w(2);
-    avg_node->stride()->h(2);
-    avg_node->stride()->w(2);
-    avg_node->fusedActivationFunction(luci::FusedActFunc::NONE);
-    avg_node->padding(luci::Padding::VALID);
-  }
-  ASSERT_FALSE(loco::shape_known(avg_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(avg_node));
-    ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(avg_node).domain());
-
-    auto shape = loco::shape_get(avg_node).as<loco::TensorShape>();
-    ASSERT_EQ(4, shape.rank());
-    ASSERT_EQ(1, shape.dim(0).value());
-    ASSERT_EQ(2, shape.dim(1).value());
-    ASSERT_EQ(1, shape.dim(2).value());
-    ASSERT_EQ(1, shape.dim(3).value());
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, avgpool2d_same)
-{
-  luci::test::TestGraph graph;
-  auto avg_node = graph.append<luci::CircleAveragePool2D>(graph.input_node);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({1, 4, 3, 1});
-    luci::test::graph_input_shape(input_node);
-  }
-  auto output_node = graph.output_node;
-  {
-    output_node->shape({1, 2, 2, 1});
-    luci::test::graph_output_shape(output_node);
-  }
-
-  // setting CircleAveragePool2D
-  {
-    avg_node->filter()->h(2);
-    avg_node->filter()->w(2);
-    avg_node->stride()->h(2);
-    avg_node->stride()->w(2);
-    avg_node->fusedActivationFunction(luci::FusedActFunc::NONE);
-    avg_node->padding(luci::Padding::SAME);
-  }
-
-  ASSERT_FALSE(loco::shape_known(avg_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(avg_node));
-    ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(avg_node).domain());
-
-    auto shape = loco::shape_get(avg_node).as<loco::TensorShape>();
-    ASSERT_EQ(4, shape.rank());
-    ASSERT_EQ(1, shape.dim(0).value());
-    ASSERT_EQ(2, shape.dim(1).value());
-    ASSERT_EQ(2, shape.dim(2).value());
-    ASSERT_EQ(1, shape.dim(3).value());
-  }
-}
-
-/**
- * @note Function to test: Shape inference of two different input shapes
- *
- *       Rank expansion to higher input side
- *          x(2,1,5) + y(3,5) --> x(2,1,5) + y(1,3,5)
- *       Do output shape inference like numpy
- *          x(2,1,5) + y(1,3,5) --> output(2,3,5)
- *       For each axis, dim value should be same OR one of them should be 1
- */
-TEST(CircleShapeInferenceRuleTest, TFAdd_shapeinf_different)
-{
-  auto g = loco::make_graph();
-
-  auto x_node = g->nodes()->create<luci::CircleInput>();
-  {
-    x_node->rank(3);
-    x_node->dim(0) = 2;
-    x_node->dim(1) = 1;
-    x_node->dim(2) = 5;
-  }
-  auto y_node = g->nodes()->create<luci::CircleInput>();
-  {
-    y_node->rank(2);
-    y_node->dim(0) = 3;
-    y_node->dim(1) = 5;
-  }
-  auto add_node = g->nodes()->create<luci::CircleAdd>();
-  {
-    add_node->x(x_node);
-    add_node->y(y_node);
-  }
-  auto output_node = g->nodes()->create<luci::CircleOutput>();
-  {
-    output_node->from(add_node);
-  }
-
-  auto x_input = g->inputs()->create();
-  {
-    x_input->name("x");
-    luci::link(x_input, x_node);
-  }
-  auto y_input = g->inputs()->create();
-  {
-    y_input->name("y");
-    luci::link(y_input, y_node);
-  }
-  auto output = g->outputs()->create();
-  {
-    output->name("output");
-    luci::link(output, output_node);
-  }
-
-  luci::test::graph_input_shape(x_node);
-  luci::test::graph_input_shape(y_node);
-  luci::test::graph_output_shape(output_node);
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(add_node));
-
-  // shape inference
-  while (shape_pass(g.get()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(add_node));
-    ASSERT_EQ(loco::Domain::Tensor, loco::shape_get(add_node).domain());
-
-    auto shape = loco::shape_get(add_node).as<loco::TensorShape>();
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(2, shape.dim(0));
-    ASSERT_EQ(3, shape.dim(1));
-    ASSERT_EQ(5, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleTranspose_simple)
-{
-  luci::test::ExampleGraph<luci::test::ExampleGraphType::CircleTranspose> g;
-
-  g.input_node->rank(3);
-  g.input_node->dim(0) = 3;
-  g.input_node->dim(1) = 8;
-  g.input_node->dim(2) = 1;
-
-  g.const_perm->dtype(loco::DataType::S32);
-  g.const_perm->rank(1);
-  g.const_perm->dim(0) = 3;
-  g.const_perm->size<loco::DataType::S32>(3);
-  g.const_perm->at<loco::DataType::S32>(0) = 1;
-  g.const_perm->at<loco::DataType::S32>(1) = 2;
-  g.const_perm->at<loco::DataType::S32>(2) = 0;
-
-  luci::test::graph_input_shape(g.input_node);
-  luci::test::graph_output_shape(g.output_node);
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(g.transpose_node));
-
-  // shape inference
-  while (shape_pass(g.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(g.transpose_node));
-
-    auto shape = loco::shape_get(g.transpose_node).as<loco::TensorShape>();
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(8, shape.dim(0));
-    ASSERT_EQ(1, shape.dim(1));
-    ASSERT_EQ(3, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleSqueeze)
-{
-  luci::test::TestGraph graph;
-  auto squeeze_node = graph.append<luci::CircleSqueeze>(graph.input_node);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({1, 4, 3, 1});
-  }
-  auto output_node = graph.output_node;
-  {
-    output_node->shape({4, 3, 1});
-  }
-
-  luci::test::graph_input_shape(input_node);
-  luci::test::graph_output_shape(output_node);
-
-  squeeze_node->squeeze_dims({0});
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(squeeze_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(squeeze_node));
-
-    auto shape = loco::shape_get(squeeze_node).as<loco::TensorShape>();
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(4, shape.dim(0));
-    ASSERT_EQ(3, shape.dim(1));
-    ASSERT_EQ(1, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleExpandDims)
-{
-  luci::test::TestGraph graph;
-  auto axis = graph.append<luci::CircleConst>();
-  axis->dtype(loco::DataType::S32);
-  axis->rank(0);
-  axis->size<loco::DataType::S32>(1);
-  axis->at<loco::DataType::S32>(0) = 1;
-
-  auto expand_dims = graph.append<luci::CircleExpandDims>(graph.input_node, axis);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({4, 3});
-  }
-
-  auto output_node = graph.output_node;
-  {
-    output_node->from(expand_dims);
-  }
-
-  luci::test::graph_input_shape(input_node);
-  luci::test::graph_output_shape(output_node);
-
-  // shape inference
-  while (shape_pass(graph.graph()))
-    ;
-
-  // validation
-  {
-    ASSERT_TRUE(loco::shape_known(expand_dims));
-
-    auto shape = loco::shape_get(expand_dims).as<loco::TensorShape>();
-
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(4, shape.dim(0));
-    ASSERT_EQ(1, shape.dim(1));
-    ASSERT_EQ(3, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleSqueezeAll)
-{
-  luci::test::TestGraph graph;
-  auto squeeze_node = graph.append<luci::CircleSqueeze>(graph.input_node);
-  graph.complete();
-
-  auto input_node = graph.input_node;
-  {
-    input_node->shape({1, 4, 3, 1});
-  }
-  auto output_node = graph.output_node;
-  {
-    input_node->shape({4, 3});
-  }
-
-  luci::test::graph_input_shape(input_node);
-  luci::test::graph_output_shape(output_node);
-
-  squeeze_node->squeeze_dims({});
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(squeeze_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(squeeze_node));
-
-    auto shape = loco::shape_get(squeeze_node).as<loco::TensorShape>();
-    ASSERT_EQ(2, shape.rank());
-    ASSERT_EQ(4, shape.dim(0));
-    ASSERT_EQ(3, shape.dim(1));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_simple)
-{
-  luci::test::TestGraph graph;
-  auto indices_const = graph.append<luci::CircleConst>();
-  auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    auto output_node = graph.output_node;
-    output_node->shape({1, 2, 2, 3});
-    luci::test::graph_output_shape(output_node);
-  }
-
-  {
-    indices_const->shape({1, 2, 3});
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(gather_nd_node));
-
-    auto shape = loco::shape_get(gather_nd_node).as<loco::TensorShape>();
-    ASSERT_EQ(3, shape.rank());
-    ASSERT_EQ(1, shape.dim(0));
-    ASSERT_EQ(2, shape.dim(1));
-    ASSERT_EQ(3, shape.dim(2));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_slices)
-{
-  luci::test::TestGraph graph;
-  auto indices_const = graph.append<luci::CircleConst>();
-  auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    auto output_node = graph.output_node;
-    output_node->shape({1, 2, 4, 4, 3});
-    luci::test::graph_output_shape(output_node);
-  }
-
-  {
-    indices_const->shape({1, 2, 1});
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(gather_nd_node));
-
-    auto shape = loco::shape_get(gather_nd_node).as<loco::TensorShape>();
-    ASSERT_EQ(5, shape.rank());
-    ASSERT_EQ(1, shape.dim(0));
-    ASSERT_EQ(2, shape.dim(1));
-    ASSERT_EQ(4, shape.dim(2));
-    ASSERT_EQ(4, shape.dim(3));
-    ASSERT_EQ(3, shape.dim(4));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleGatherNd_NEG)
-{
-  luci::test::TestGraph graph;
-  auto indices_const = graph.append<luci::CircleConst>();
-  auto gather_nd_node = graph.append<luci::CircleGatherNd>(graph.input_node, indices_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    // Does not matter, because test should fail anyway
-    auto output_node = graph.output_node;
-    output_node->shape({0, 0, 0});
-    luci::test::graph_output_shape(output_node);
-  }
-
-  {
-    indices_const->shape({1, 2, 5});
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(gather_nd_node));
-
-  // had to pack into lambda to check throw
-  auto lambda = [&]() {
-    // shape inference
-    while (shape_pass(graph.graph()) == true)
-      ;
-  };
-
-  ASSERT_THROW(lambda(), oops::InternalExn);
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleResizeNearestNeighbor)
-{
-  luci::test::TestGraph graph;
-  auto size_const = graph.append<luci::CircleConst>();
-  size_const->dtype(loco::DataType::S32);
-  size_const->rank(1);
-  size_const->dim(0) = 2;
-  size_const->size<loco::DataType::S32>(2);
-  size_const->at<loco::DataType::S32>(0) = 16;
-  size_const->at<loco::DataType::S32>(1) = 16;
-  auto resize_node = graph.append<luci::CircleResizeNearestNeighbor>(graph.input_node, size_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    auto output_node = graph.output_node;
-    output_node->from(resize_node);
-    luci::test::graph_output_shape(output_node);
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(resize_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(resize_node));
-
-    auto shape = loco::shape_get(resize_node).as<loco::TensorShape>();
-    ASSERT_EQ(4, shape.rank());
-    ASSERT_EQ(1, shape.dim(0));
-    ASSERT_EQ(16, shape.dim(1));
-    ASSERT_EQ(16, shape.dim(2));
-    ASSERT_EQ(3, shape.dim(3));
-  }
-}
-
-TEST(CircleShapeInferenceRuleTest, CircleResizeBilinear)
-{
-  luci::test::TestGraph graph;
-  auto size_const = graph.append<luci::CircleConst>();
-  size_const->dtype(loco::DataType::S32);
-  size_const->rank(1);
-  size_const->dim(0) = 2;
-  size_const->size<loco::DataType::S32>(2);
-  size_const->at<loco::DataType::S32>(0) = 16;
-  size_const->at<loco::DataType::S32>(1) = 16;
-  auto resize_node = graph.append<luci::CircleResizeBilinear>(graph.input_node, size_const);
-  graph.complete();
-
-  {
-    auto input_node = graph.input_node;
-    input_node->shape({1, 4, 4, 3});
-    luci::test::graph_input_shape(input_node);
-  }
-  {
-    auto output_node = graph.output_node;
-    output_node->from(resize_node);
-    luci::test::graph_output_shape(output_node);
-  }
-
-  // pre-check
-  ASSERT_FALSE(loco::shape_known(resize_node));
-
-  // shape inference
-  while (shape_pass(graph.graph()) == true)
-    ;
-
-  // Verify
-  {
-    ASSERT_TRUE(loco::shape_known(resize_node));
-
-    auto shape = loco::shape_get(resize_node).as<loco::TensorShape>();
-    ASSERT_EQ(4, shape.rank());
-    ASSERT_EQ(1, shape.dim(0));
-    ASSERT_EQ(16, shape.dim(1));
-    ASSERT_EQ(16, shape.dim(2));
-    ASSERT_EQ(3, shape.dim(3));
-  }
-}
diff --git a/compiler/luci/service/src/CircleTypeInference.cpp b/compiler/luci/service/src/CircleTypeInference.cpp
index aa8524a55..db9a37cb0 100644
--- a/compiler/luci/service/src/CircleTypeInference.cpp
+++ b/compiler/luci/service/src/CircleTypeInference.cpp
@@ -15,58 +15,55 @@
  */
 
 #include "luci/Service/CircleTypeInference.h"
+#include "CircleTypeInferenceHelper.h"
 
-#include <loco.h>
-#include <loco/Service/TypeInference.h>
+#include <luci/Log.h>
 
-#include <mio/circle/schema_generated.h>
-#include <oops/InternalExn.h>
+#include <loco.h>
 
 #include <type_traits>
 
 namespace
 {
 
-circle::TensorType translateLocoTypeToCircle(loco::DataType dtype)
+bool inputs_dtype_ready(const luci::CircleNode *node)
 {
-  switch (dtype)
+  for (uint32_t arity = 0; arity < node->arity(); ++arity)
   {
-    case loco::DataType::U8:
-      return circle::TensorType_UINT8;
-    //  case loco::DataType::U16: unsupported
-    //  case loco::DataType::U32: unsupported
-    //  case loco::DataType::U64: unsupported
-    case loco::DataType::S8:
-      return circle::TensorType_INT8;
-    case loco::DataType::S16:
-      return circle::TensorType_INT16;
-    case loco::DataType::S32:
-      return circle::TensorType_INT32;
-    case loco::DataType::S64:
-      return circle::TensorType_INT64;
-    case loco::DataType::FLOAT16:
-      return circle::TensorType_FLOAT16;
-    case loco::DataType::FLOAT32:
-      return circle::TensorType_FLOAT32;
-    //  case loco::DataType::FLOAT64: unsupported
-    case loco::DataType::BOOL:
-      return circle::TensorType_BOOL;
-    default:
-      break;
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->arg(arity));
+    if (input_node->dtype() == loco::DataType::Unknown)
+      return false;
   }
 
-  INTERNAL_EXN_V("Invalid loco dtype", oops::to_uint32(dtype));
+  return true;
 }
 
 } // namespace
 
 namespace luci
 {
+namespace tinf
+{
 
-circle::TensorType TypeInference::get(loco::Node *node)
+bool Rule::infer(const luci::CircleNode *circle_node, loco::DataType &dtype) const
 {
-  assert(loco::dtype_known(node));
-  return translateLocoTypeToCircle(loco::dtype_get(node));
+  LOGGER(l);
+  VERBOSE(l, 1) << "[CircleTypeInference] " << circle_node->name();
+  VERBOSE(l, 1) << "  before: " << static_cast<int>(circle_node->dtype());
+
+  if (!inputs_dtype_ready(circle_node))
+  {
+    VERBOSE(l, 1) << "   after: Some inputs are not ready for inference";
+    return false;
+  }
+
+  Algorithm alg;
+  dtype = circle_node->accept(&alg);
+
+  VERBOSE(l, 1) << "   after: " << static_cast<int>(dtype);
+
+  return true;
 }
 
+} // namespace tinf
 } // namespace luci
diff --git a/compiler/luci/service/src/CircleTypeInferenceHelper.cpp b/compiler/luci/service/src/CircleTypeInferenceHelper.cpp
new file mode 100644
index 000000000..06edd70f2
--- /dev/null
+++ b/compiler/luci/service/src/CircleTypeInferenceHelper.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleTypeInferenceHelper.h"
+
+namespace luci
+{
+
+loco::DataType dtype_get(const loco::Node *node)
+{
+  assert(luci::dtype_known(node));
+  return loco::must_cast<const luci::CircleNode *>(node)->dtype();
+}
+
+bool dtype_known(const loco::Node *node)
+{
+  return loco::must_cast<const luci::CircleNode *>(node)->dtype() != loco::DataType::Unknown;
+}
+
+} // namespace luci
+
+namespace luci
+{
+namespace tinf
+{
+
+// Helper function will be added
+
+} // namespace tinf
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleTypeInferenceHelper.h b/compiler/luci/service/src/CircleTypeInferenceHelper.h
new file mode 100644
index 000000000..751340cc7
--- /dev/null
+++ b/compiler/luci/service/src/CircleTypeInferenceHelper.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco/IR/DataType.h>
+
+namespace luci
+{
+
+// NOTE Functions in this namespace will be removed after new inference
+//      algorithms are fully implemented.
+
+// This function is temporary function for deprecating loco::dtype_get
+loco::DataType dtype_get(const loco::Node *node);
+
+// This function is temporary function for deprecating loco::dtype_known
+bool dtype_known(const loco::Node *node);
+
+} // namespace luci
+
+namespace luci
+{
+namespace tinf // Namespace for Type Inference
+{
+
+// Helper function will be added
+
+} // namespace tinf
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
index d28d8ac99..bd3feb977 100644
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "luci/Service/CircleTypeInferenceRule.h"
+#include "CircleTypeInferenceHelper.h"
 
 #include <luci/IR/CircleDialect.h>
 #include <luci/IR/CircleNodeVisitor.h>
@@ -29,24 +30,24 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 {
   // TODO Given a tensor x of complex numbers, Abs operation returns a tensor of type float32 or
   // float64.
-  loco::DataType visit(const luci::CircleAbs *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleAbs *node) final { return luci::dtype_get(node->x()); }
 
-  loco::DataType visit(const luci::CircleAdd *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleAdd *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleAddN *node) final
   {
-    auto dtype = loco::dtype_get(node->inputs(0));
+    auto dtype = luci::dtype_get(node->inputs(0));
 
     for (uint32_t idx = 1; idx < node->arity(); ++idx)
     {
-      auto dtype_idx = loco::dtype_get(node->inputs(idx));
+      auto dtype_idx = luci::dtype_get(node->inputs(idx));
       if (dtype != dtype_idx)
       {
         INTERNAL_EXN_V("ADD_N dtype not same as the first input: ", idx);
       }
     }
 
-    return loco::dtype_get(node->inputs(0));
+    return luci::dtype_get(node->inputs(0));
   }
 
   loco::DataType visit(const luci::CircleArgMax *node) final { return node->output_type(); }
@@ -55,22 +56,22 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 
   loco::DataType visit(const luci::CircleAveragePool2D *node) final
   {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
   }
 
   loco::DataType visit(const luci::CircleBatchMatMul *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleBatchToSpaceND *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleCast *node) final { return node->dtype(); }
 
-  loco::DataType visit(const luci::CircleCeil *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleCeil *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleConcatenation *node) final
   {
@@ -78,111 +79,133 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
     assert(node->numValues() > 0);
 
     for (uint32_t i = 1; i < node->numValues(); ++i)
-      assert(loco::dtype_get(node->values(i - 1)) == loco::dtype_get(node->values(i)));
+      assert(luci::dtype_get(node->values(i - 1)) == luci::dtype_get(node->values(i)));
 
-    return loco::dtype_get(node->values(0));
+    return luci::dtype_get(node->values(0));
   }
 
   loco::DataType visit(const luci::CircleConst *node) final { return node->dtype(); }
 
   loco::DataType visit(const luci::CircleConv2D *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
-  loco::DataType visit(const luci::CircleCos *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleCos *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleCustom *node) final
   {
     if (node->custom_code() == "BatchMatMulV2")
     {
-      return loco::dtype_get(node->inputs(0));
+      return luci::dtype_get(node->inputs(0));
     }
     return node->dtype();
   }
 
+  loco::DataType visit(const luci::CircleDensify *node) final
+  {
+    return luci::dtype_get(node->input());
+  }
+
   loco::DataType visit(const luci::CircleDepthToSpace *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleDepthwiseConv2D *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
-  loco::DataType visit(const luci::CircleDiv *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleDequantize *) final { return loco::DataType::FLOAT32; }
+
+  loco::DataType visit(const luci::CircleDiv *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleElu *node) final
   {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
   }
 
   loco::DataType visit(const luci::CircleEqual *) final { return loco::DataType::BOOL; }
 
-  loco::DataType visit(const luci::CircleExp *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleExp *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleExpandDims *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
+  }
+
+  loco::DataType visit(const luci::CircleFakeQuant *node) final
+  {
+    return luci::dtype_get(node->inputs());
   }
 
   loco::DataType visit(const luci::CircleFill *node) final
   {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
   }
 
-  loco::DataType visit(const luci::CircleFloor *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleFloor *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleFloorDiv *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleFloorMod *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleFullyConnected *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleGather *node) final
   {
-    return loco::dtype_get(node->params());
+    return luci::dtype_get(node->params());
   }
 
   loco::DataType visit(const luci::CircleGatherNd *node) final
   {
-    return loco::dtype_get(node->params());
+    return luci::dtype_get(node->params());
+  }
+
+  loco::DataType visit(const luci::CircleGelu *node) final
+  {
+    return luci::dtype_get(node->features());
   }
 
   loco::DataType visit(const luci::CircleGreater *) final { return loco::DataType::BOOL; }
 
   loco::DataType visit(const luci::CircleGreaterEqual *) final { return loco::DataType::BOOL; }
 
+  loco::DataType visit(const luci::CircleHardSwish *node) final
+  {
+    return luci::dtype_get(node->features());
+  }
+
   loco::DataType visit(const luci::CircleIf *node) final
   {
     // Type of If is not used. Just use input 0
     assert(node->input_count() > 0);
-    return loco::dtype_get(node->input(0));
+    return luci::dtype_get(node->input(0));
   }
 
   loco::DataType visit(const luci::CircleL2Normalize *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleL2Pool2D *node) final
   {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
   }
 
   loco::DataType visit(const luci::CircleLeakyRelu *node) final
   {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
   }
 
   loco::DataType visit(const luci::CircleLess *) final { return loco::DataType::BOOL; }
@@ -191,75 +214,75 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 
   loco::DataType visit(const luci::CircleLocalResponseNormalization *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
-  loco::DataType visit(const luci::CircleLog *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleLog *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleLogicalAnd *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleLogicalNot *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleLogicalOr *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleLogistic *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleLogSoftmax *node) final
   {
-    return loco::dtype_get(node->logits());
+    return luci::dtype_get(node->logits());
   }
 
   loco::DataType visit(const luci::CircleMatrixDiag *node) final
   {
-    return loco::dtype_get(node->diagonal());
+    return luci::dtype_get(node->diagonal());
   }
 
   loco::DataType visit(const luci::CircleMatrixSetDiag *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
-  loco::DataType visit(const luci::CircleMaximum *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleMaximum *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleMaxPool2D *node) final
   {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
   }
 
   loco::DataType visit(const luci::CircleMean *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
-  loco::DataType visit(const luci::CircleMinimum *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleMinimum *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleMirrorPad *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
-  loco::DataType visit(const luci::CircleNeg *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleNeg *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final
   {
-    return loco::dtype_get(node->boxes());
+    return luci::dtype_get(node->boxes());
   }
 
   loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final
   {
-    return loco::dtype_get(node->boxes());
+    return luci::dtype_get(node->boxes());
   }
 
   loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
@@ -269,25 +292,25 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
     // Only support CirclePack with one or more inputs
     assert(node->values_count() > 0);
 
-    auto first_value_type = loco::dtype_get(node->values(0));
+    auto first_value_type = luci::dtype_get(node->values(0));
     for (uint32_t i = 1; i < node->values_count(); ++i)
-      assert(first_value_type == loco::dtype_get(node->values(i)));
+      assert(first_value_type == luci::dtype_get(node->values(i)));
 
     return first_value_type;
   }
 
-  loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); }
+  loco::DataType visit(const luci::CirclePad *node) final { return luci::dtype_get(node->input()); }
 
   loco::DataType visit(const luci::CirclePadV2 *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CirclePow *node) final
   {
     // TODO make sure types cannot differ
-    auto x_type = loco::dtype_get(node->x());
-    auto y_type = loco::dtype_get(node->y());
+    auto x_type = luci::dtype_get(node->x());
+    auto y_type = luci::dtype_get(node->y());
 
     if (x_type != y_type)
       INTERNAL_EXN("Different datatype for x and y are not supported");
@@ -297,8 +320,8 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 
   loco::DataType visit(const luci::CirclePRelu *node) final
   {
-    auto input_type = loco::dtype_get(node->input());
-    auto alpha_type = loco::dtype_get(node->alpha());
+    auto input_type = luci::dtype_get(node->input());
+    auto alpha_type = luci::dtype_get(node->alpha());
 
     if (input_type != alpha_type)
       INTERNAL_EXN("Different datatype for input and alpha are not supported");
@@ -306,198 +329,210 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
     return input_type;
   }
 
+  loco::DataType visit(const luci::CircleQuantize *node) final { return luci::dtype_get(node); }
+
   loco::DataType visit(const luci::CircleRange *node) final
   {
-    return loco::dtype_get(node->start());
+    return luci::dtype_get(node->start());
   }
 
   loco::DataType visit(const luci::CircleRank *) final { return loco::DataType::S32; }
 
-  loco::DataType visit(const luci::CircleMul *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleMul *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleOneHot *node) final
   {
-    return loco::dtype_get(node->on_value());
+    return luci::dtype_get(node->on_value());
   }
 
   loco::DataType visit(const luci::CircleReduceAny *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleReduceMax *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleReduceMin *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleReduceProd *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleRelu *node) final
   {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
   }
 
   loco::DataType visit(const luci::CircleRelu6 *node) final
   {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
   }
 
   loco::DataType visit(const luci::CircleReluN1To1 *node) final
   {
-    return loco::dtype_get(node->features());
+    return luci::dtype_get(node->features());
   }
 
   loco::DataType visit(const luci::CircleReshape *node) final
   {
-    return loco::dtype_get(node->tensor());
+    return luci::dtype_get(node->tensor());
   }
 
   loco::DataType visit(const luci::CircleResizeBilinear *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleReverseSequence *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleReverseV2 *node) final
   {
-    return loco::dtype_get(node->tensor());
+    return luci::dtype_get(node->tensor());
   }
 
-  loco::DataType visit(const luci::CircleRound *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleRound *node) final { return luci::dtype_get(node->x()); }
 
-  loco::DataType visit(const luci::CircleRsqrt *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleRsqrt *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleScatterNd *node) final
   {
-    return loco::dtype_get(node->updates());
+    return luci::dtype_get(node->updates());
   }
 
   loco::DataType visit(const luci::CircleSegmentSum *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleSelect *node) final
   {
-    assert(loco::dtype_get(node->t()) == loco::dtype_get(node->e()));
-    return loco::dtype_get(node->t());
+    assert(luci::dtype_get(node->t()) == luci::dtype_get(node->e()));
+    return luci::dtype_get(node->t());
   }
 
   loco::DataType visit(const luci::CircleSelectV2 *node) final
   {
-    assert(loco::dtype_get(node->t()) == loco::dtype_get(node->e()));
-    return loco::dtype_get(node->t());
+    assert(luci::dtype_get(node->t()) == luci::dtype_get(node->e()));
+    return luci::dtype_get(node->t());
   }
 
   loco::DataType visit(const luci::CircleShape *node) final { return node->out_type(); }
 
-  loco::DataType visit(const luci::CircleSin *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSin *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleSlice *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleSoftmax *node) final
   {
-    return loco::dtype_get(node->logits());
+    return luci::dtype_get(node->logits());
   }
 
   loco::DataType visit(const luci::CircleSpaceToBatchND *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleSpaceToDepth *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleSparseToDense *node) final
   {
-    return loco::dtype_get(node->values());
+    return luci::dtype_get(node->values());
   }
 
   loco::DataType visit(const luci::CircleSplit *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleSplitV *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
-  loco::DataType visit(const luci::CircleSqrt *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSqrt *node) final { return luci::dtype_get(node->x()); }
 
-  loco::DataType visit(const luci::CircleSquare *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSquare *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleSquaredDifference *node) final
   {
-    return loco::dtype_get(node->x());
+    return luci::dtype_get(node->x());
   }
 
   loco::DataType visit(const luci::CircleSqueeze *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleStridedSlice *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
-  loco::DataType visit(const luci::CircleSub *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSub *node) final { return luci::dtype_get(node->x()); }
 
-  loco::DataType visit(const luci::CircleSum *node) final { return loco::dtype_get(node->input()); }
+  loco::DataType visit(const luci::CircleSum *node) final { return luci::dtype_get(node->input()); }
 
-  loco::DataType visit(const luci::CircleTanh *node) final { return loco::dtype_get(node->x()); }
+  loco::DataType visit(const luci::CircleSVDF *node) final
+  {
+    return luci::dtype_get(node->input());
+  }
+
+  loco::DataType visit(const luci::CircleTanh *node) final { return luci::dtype_get(node->x()); }
 
   loco::DataType visit(const luci::CircleTile *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleTopKV2 *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleTranspose *node) final
   {
-    return loco::dtype_get(node->a());
+    return luci::dtype_get(node->a());
   }
 
   loco::DataType visit(const luci::CircleTransposeConv *node) final
   {
-    return loco::dtype_get(node->outBackprop());
+    return luci::dtype_get(node->outBackprop());
+  }
+
+  loco::DataType visit(const luci::CircleUnidirectionalSequenceLSTM *node) final
+  {
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleUnique *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleUnpack *node) final
   {
-    return loco::dtype_get(node->value());
+    return luci::dtype_get(node->value());
   }
 
   loco::DataType visit(const luci::CircleWhere *) final { return loco::DataType::S64; }
@@ -506,12 +541,12 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
   {
     // Type of While is not used. Just use input 0
     assert(node->input_count() > 0);
-    return loco::dtype_get(node->input(0));
+    return luci::dtype_get(node->input(0));
   }
 
   loco::DataType visit(const luci::CircleZerosLike *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   // Circle Only
@@ -524,7 +559,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 
   loco::DataType visit(const luci::CircleInstanceNorm *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   // Virtual
@@ -541,57 +576,24 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
     {
       // We don't care for the type if from() is CircleOutputDummy or CircleOutputExclude
       // from() type should match that of CircleOutput
-      assert(output_dtype == loco::dtype_get(node->from()));
+      assert(output_dtype == luci::dtype_get(node->from()));
     }
     return output_dtype;
   }
 
   loco::DataType visit(const luci::CircleOutputDummy *node) final { return node->dtype(); }
 
-  loco::DataType visit(const luci::CircleOutputExclude *node) final { return node->dtype(); }
-
-  loco::DataType visit(const luci::CircleCustomOut *node) final { return node->dtype(); }
-
-  loco::DataType visit(const luci::CircleIfOut *node) final
+  loco::DataType visit(const luci::CircleOutputExclude *node) final
   {
-    /**
-     * @note  IF operator type and shape are that of the "then" and "else"
-     *        Graph Outputs.
-     */
-    auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
-    if (circle_if == nullptr)
-    {
-      INTERNAL_EXN("CircleIf IR is not configured correctly");
-    }
-
-    auto index = node->index();
-    auto then_graph = circle_if->then_graph();
-    auto else_graph = circle_if->else_graph();
-    assert(then_graph != nullptr);
-    assert(else_graph != nullptr);
-
-    // shape and type are assumed to be same
-    // these are checked at post_import_graph() in Import
-    auto then_outputs = loco::output_nodes(then_graph);
-    auto else_outputs = loco::output_nodes(else_graph);
-    assert(then_outputs.size() == else_outputs.size());
-    assert(index < static_cast<int32_t>(then_outputs.size()));
-
-    auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
-    auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
-    auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
-    auto else_graph_outputs = else_graph->outputs();
-    assert(then_graph_outputs->size() == else_graph_outputs->size());
-
-    auto then_graph_output = then_graph_outputs->at(then_out->index());
-    auto else_graph_output = else_graph_outputs->at(else_out->index());
-    (void)else_graph_output; // make compiler happy for unused variable warnings
-    assert(then_graph_output->dtype() == else_graph_output->dtype());
-
-    return then_graph_output->dtype();
+    // NOTE We don't care CircleOutputExclude dtype, but set to FLOAT32
+    //      if it's Unknown to make type inference happy.
+    if (node->dtype() == loco::DataType::Unknown)
+      return loco::DataType::FLOAT32;
+    return node->dtype();
   }
 
+  loco::DataType visit(const luci::CircleCustomOut *node) final { return node->dtype(); }
+
   loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final
   {
     (void)node;
@@ -612,29 +614,31 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 
   loco::DataType visit(const luci::CircleSplitOut *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleSplitVOut *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleTopKV2Out *node) final
   {
     // First output is same as input
     if (node->index() == 0)
-      return loco::dtype_get(node->input());
+      return luci::dtype_get(node->input());
     // Second outout is always S32
     assert(node->index() == 1);
     return loco::DataType::S32;
   }
 
+  loco::DataType visit(const luci::CircleVariable *node) final { return node->dtype(); }
+
   loco::DataType visit(const luci::CircleUniqueOut *node) final
   {
     if (node->index() == 0)
     {
-      return loco::dtype_get(node->input());
+      return luci::dtype_get(node->input());
     }
     assert(node->index() == 1);
     auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
@@ -643,7 +647,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 
   loco::DataType visit(const luci::CircleUnpackOut *node) final
   {
-    return loco::dtype_get(node->input());
+    return luci::dtype_get(node->input());
   }
 
   loco::DataType visit(const luci::CircleWhileOut *node) final
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp
deleted file mode 100644
index 711a489af..000000000
--- a/compiler/luci/service/src/CircleTypeInferenceRule.test.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TestGraph.h"
-#include <luci/Service/CircleTypeInferenceRule.h>
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleDialect.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-#include <loco/Service/TypeInference.h>
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-TEST(CircleTypeInferenceRuleTest, minimal_with_CircleRelu)
-{
-  // Create a simple network
-  luci::test::TestGraph graph;
-  auto relu_node = graph.append<luci::CircleRelu>(graph.input_node);
-  graph.complete(relu_node);
-
-  // set dtype for nodes; like setting them in import
-  graph.input_node->dtype(loco::DataType::S32);
-  relu_node->dtype(loco::DataType::S32);
-  graph.output_node->dtype(loco::DataType::S32);
-
-  luci::test::graph_input_dtype(graph.input_node);
-  luci::test::graph_output_dtype(graph.output_node);
-
-  // pre-check
-  ASSERT_FALSE(loco::dtype_known(relu_node));
-
-  // type inference
-  luci::CircleTypeInferenceRule circle_rule;
-  loco::CanonicalTypeInferenceRule canon_rule;
-  loco::MultiDialectTypeInferenceRule rules;
-
-  rules.bind(loco::CanonicalDialect::get(), &canon_rule);
-  rules.bind(luci::CircleDialect::get(), &circle_rule);
-
-  loco::apply(&rules).to(graph.g.get());
-
-  // Verify
-  ASSERT_TRUE(loco::dtype_known(relu_node));
-  auto type = loco::dtype_get(relu_node);
-  ASSERT_EQ(loco::DataType::S32, type);
-}
diff --git a/compiler/luci/service/src/Nodes/CircleAbs.cpp b/compiler/luci/service/src/Nodes/CircleAbs.cpp
new file mode 100644
index 000000000..f7eb71a4b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAbs.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleAbs *)
+{
+  return _graph->nodes()->create<luci::CircleAbs>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleAbs.test.cpp b/compiler/luci/service/src/Nodes/CircleAbs.test.cpp
new file mode 100644
index 000000000..885b395b8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAbs.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Abs)
+{
+  auto g = loco::make_graph();
+  auto node_abs = g->nodes()->create<luci::CircleAbs>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_abs, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_abs = dynamic_cast<luci::CircleAbs *>(cloned);
+  ASSERT_NE(nullptr, cloned_abs);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleAdd.cpp b/compiler/luci/service/src/Nodes/CircleAdd.cpp
new file mode 100644
index 000000000..2bdc6deed
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAdd.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleAdd *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleAdd>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleAdd.test.cpp b/compiler/luci/service/src/Nodes/CircleAdd.test.cpp
new file mode 100644
index 000000000..41a818b0a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAdd.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+/**
+ * @note Function to test: Shape inference of two different input shapes
+ *
+ *       Rank expansion to higher input side
+ *          x(2,1,5) + y(3,5) --> x(2,1,5) + y(1,3,5)
+ *       Do output shape inference like numpy
+ *          x(2,1,5) + y(1,3,5) --> output(2,3,5)
+ *       For each axis, dim value should be same OR one of them should be 1
+ */
+TEST(ShapeRuleTest, different_input_shapes_add)
+{
+  luci::CircleInput input1;
+  luci::CircleInput input2;
+  luci::CircleAdd add;
+
+  input1.shape({2, 1, 5});
+  input1.shape_status(luci::ShapeStatus::VALID);
+  input2.shape({3, 5});
+  input2.shape_status(luci::ShapeStatus::VALID);
+
+  add.x(&input1);
+  add.y(&input2);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&add, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(2, shape.dim(0).value());
+  ASSERT_EQ(3, shape.dim(1).value());
+  ASSERT_EQ(5, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_Add)
+{
+  auto g = loco::make_graph();
+  auto node_add = g->nodes()->create<luci::CircleAdd>();
+  node_add->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_add, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_add = dynamic_cast<luci::CircleAdd *>(cloned);
+  ASSERT_NE(nullptr, cloned_add);
+  ASSERT_EQ(node_add->fusedActivationFunction(), cloned_add->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Add_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_add = g->nodes()->create<luci::CircleAdd>();
+  node_add->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_add, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleAddN.cpp b/compiler/luci/service/src/Nodes/CircleAddN.cpp
new file mode 100644
index 000000000..a43c455b6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAddN.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleAddN *node)
+{
+  auto arity = node->arity();
+  return _graph->nodes()->create<luci::CircleAddN>(arity);
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleAddN.test.cpp b/compiler/luci/service/src/Nodes/CircleAddN.test.cpp
new file mode 100644
index 000000000..5d5b82247
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAddN.test.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_AddN)
+{
+  auto g = loco::make_graph();
+  auto node_addn = g->nodes()->create<luci::CircleAddN>(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_addn, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_addn = dynamic_cast<luci::CircleAddN *>(cloned);
+  ASSERT_NE(nullptr, cloned_addn);
+  ASSERT_EQ(node_addn->arity(), cloned_addn->arity());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleArgMax.cpp b/compiler/luci/service/src/Nodes/CircleArgMax.cpp
new file mode 100644
index 000000000..b6efc2feb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleArgMax.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleArgMax *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleArgMax>();
+  if (cloned != nullptr)
+    cloned->output_type(node->output_type());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleArgMax.test.cpp b/compiler/luci/service/src/Nodes/CircleArgMax.test.cpp
new file mode 100644
index 000000000..bb7588403
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleArgMax.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ArgMax)
+{
+  auto g = loco::make_graph();
+  auto node_argmax = g->nodes()->create<luci::CircleArgMax>();
+  node_argmax->output_type(loco::DataType::FLOAT32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_argmax, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_argmax = dynamic_cast<luci::CircleArgMax *>(cloned);
+  ASSERT_NE(nullptr, cloned_argmax);
+  ASSERT_EQ(node_argmax->output_type(), cloned_argmax->output_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleArgMin.cpp b/compiler/luci/service/src/Nodes/CircleArgMin.cpp
new file mode 100644
index 000000000..ab079267e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleArgMin.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleArgMin *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleArgMin>();
+  if (cloned != nullptr)
+    cloned->output_type(node->output_type());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleArgMin.test.cpp b/compiler/luci/service/src/Nodes/CircleArgMin.test.cpp
new file mode 100644
index 000000000..ca57946f9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleArgMin.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ArgMin)
+{
+  auto g = loco::make_graph();
+  auto node_argmin = g->nodes()->create<luci::CircleArgMin>();
+  node_argmin->output_type(loco::DataType::FLOAT32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_argmin, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_argmin = dynamic_cast<luci::CircleArgMin *>(cloned);
+  ASSERT_NE(nullptr, cloned_argmin);
+  ASSERT_EQ(node_argmin->output_type(), cloned_argmin->output_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/service/src/Nodes/CircleAveragePool2D.cpp
new file mode 100644
index 000000000..c64b1b864
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAveragePool2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleAveragePool2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleAveragePool2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->filter()->h(node->filter()->h());
+    cloned->filter()->w(node->filter()->w());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleAveragePool2D.test.cpp b/compiler/luci/service/src/Nodes/CircleAveragePool2D.test.cpp
new file mode 100644
index 000000000..d048d1426
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleAveragePool2D.test.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_valid_pad_avgpool2d)
+{
+  luci::CircleInput input;
+  luci::CircleAveragePool2D avgpool_2d;
+
+  input.shape({1, 4, 3, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  avgpool_2d.value(&input);
+  avgpool_2d.filter()->h(2);
+  avgpool_2d.filter()->w(2);
+  avgpool_2d.stride()->h(2);
+  avgpool_2d.stride()->w(2);
+  avgpool_2d.fusedActivationFunction(luci::FusedActFunc::NONE);
+  avgpool_2d.padding(luci::Padding::VALID);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&avgpool_2d, shape));
+  ASSERT_EQ(4, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(2, shape.dim(1).value());
+  ASSERT_EQ(1, shape.dim(2).value());
+  ASSERT_EQ(1, shape.dim(3).value());
+}
+
+TEST(ShapeRuleTest, simple_same_pad_avgpool2d)
+{
+  luci::CircleInput input;
+  luci::CircleAveragePool2D avgpool_2d;
+
+  input.shape({1, 4, 3, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  avgpool_2d.value(&input);
+  avgpool_2d.filter()->h(2);
+  avgpool_2d.filter()->w(2);
+  avgpool_2d.stride()->h(2);
+  avgpool_2d.stride()->w(2);
+  avgpool_2d.fusedActivationFunction(luci::FusedActFunc::NONE);
+  avgpool_2d.padding(luci::Padding::SAME);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&avgpool_2d, shape));
+  ASSERT_EQ(4, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(2, shape.dim(1).value());
+  ASSERT_EQ(2, shape.dim(2).value());
+  ASSERT_EQ(1, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D)
+{
+  auto g = loco::make_graph();
+  auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+  node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_avgpool2d->padding(luci::Padding::SAME);
+  node_avgpool2d->filter()->h(1);
+  node_avgpool2d->filter()->w(2);
+  node_avgpool2d->stride()->h(3);
+  node_avgpool2d->stride()->w(4);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_avgpool2d = dynamic_cast<luci::CircleAveragePool2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_avgpool2d);
+  ASSERT_EQ(node_avgpool2d->fusedActivationFunction(), cloned_avgpool2d->fusedActivationFunction());
+  ASSERT_EQ(node_avgpool2d->padding(), cloned_avgpool2d->padding());
+  ASSERT_EQ(node_avgpool2d->filter()->h(), cloned_avgpool2d->filter()->h());
+  ASSERT_EQ(node_avgpool2d->filter()->w(), cloned_avgpool2d->filter()->w());
+  ASSERT_EQ(node_avgpool2d->stride()->h(), cloned_avgpool2d->stride()->h());
+  ASSERT_EQ(node_avgpool2d->stride()->w(), cloned_avgpool2d->stride()->w());
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+  node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_avgpool2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_AveragePool2D_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_avgpool2d = g->nodes()->create<luci::CircleAveragePool2D>();
+  node_avgpool2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_avgpool2d->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_avgpool2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.cpp
new file mode 100644
index 000000000..3edc06ab8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBCQFullyConnected *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleBCQFullyConnected>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->weights_hidden_size(node->weights_hidden_size());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.test.cpp b/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.test.cpp
new file mode 100644
index 000000000..90c192e07
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBCQFullyConnected.test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BCQFullyConnected)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_fc->weights_hidden_size(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fc = dynamic_cast<luci::CircleBCQFullyConnected *>(cloned);
+  ASSERT_NE(nullptr, cloned_fc);
+  ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+  ASSERT_EQ(node_fc->weights_hidden_size(), cloned_fc->weights_hidden_size());
+}
+
+TEST(CloneNodeTest, clone_BCQFullyConnected_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleBCQGather.cpp b/compiler/luci/service/src/Nodes/CircleBCQGather.cpp
new file mode 100644
index 000000000..35b6be744
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBCQGather.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleBCQGather *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleBCQGather>();
+  if (cloned != nullptr)
+  {
+    cloned->axis(node->axis());
+    cloned->input_hidden_size(node->input_hidden_size());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleBCQGather.test.cpp b/compiler/luci/service/src/Nodes/CircleBCQGather.test.cpp
new file mode 100644
index 000000000..a3f9e8850
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBCQGather.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BCQGather)
+{
+  auto g = loco::make_graph();
+  auto node_gat = g->nodes()->create<luci::CircleBCQGather>();
+  node_gat->axis(3);
+  node_gat->input_hidden_size(5);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gat, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gat = dynamic_cast<luci::CircleBCQGather *>(cloned);
+  ASSERT_NE(nullptr, cloned_gat);
+  ASSERT_EQ(node_gat->axis(), cloned_gat->axis());
+  ASSERT_EQ(node_gat->input_hidden_size(), cloned_gat->input_hidden_size());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/service/src/Nodes/CircleBatchMatMul.cpp
new file mode 100644
index 000000000..b21b78dab
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBatchMatMul.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleBatchMatMul *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleBatchMatMul>();
+  if (cloned != nullptr)
+  {
+    cloned->adj_x(node->adj_x());
+    cloned->adj_y(node->adj_y());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/service/src/Nodes/CircleBatchMatMul.test.cpp
new file mode 100644
index 000000000..e013feae8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBatchMatMul.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BatchMatMul)
+{
+  auto g = loco::make_graph();
+  auto node_bmm = g->nodes()->create<luci::CircleBatchMatMul>();
+  node_bmm->adj_x(true);
+  node_bmm->adj_y(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_bmm, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_bmm = dynamic_cast<luci::CircleBatchMatMul *>(cloned);
+  ASSERT_NE(nullptr, cloned_bmm);
+  ASSERT_EQ(node_bmm->adj_x(), cloned_bmm->adj_x());
+  ASSERT_EQ(node_bmm->adj_y(), cloned_bmm->adj_y());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.cpp
new file mode 100644
index 000000000..276f71bf1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleBatchToSpaceND *)
+{
+  return _graph->nodes()->create<luci::CircleBatchToSpaceND>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.test.cpp b/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.test.cpp
new file mode 100644
index 000000000..a45039fc7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleBatchToSpaceND.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_BatchToSpaceND)
+{
+  auto g = loco::make_graph();
+  auto node_b2s = g->nodes()->create<luci::CircleBatchToSpaceND>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_b2s, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_b2s = dynamic_cast<luci::CircleBatchToSpaceND *>(cloned);
+  ASSERT_NE(nullptr, cloned_b2s);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCast.cpp b/compiler/luci/service/src/Nodes/CircleCast.cpp
new file mode 100644
index 000000000..1c20152cc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCast.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleCast *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleCast>();
+  if (cloned != nullptr)
+  {
+    cloned->in_data_type(node->in_data_type());
+    cloned->out_data_type(node->out_data_type());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCast.test.cpp b/compiler/luci/service/src/Nodes/CircleCast.test.cpp
new file mode 100644
index 000000000..1c4bacb73
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCast.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Cast)
+{
+  auto g = loco::make_graph();
+  auto node_cast = g->nodes()->create<luci::CircleCast>();
+  node_cast->in_data_type(loco::DataType::U16);
+  node_cast->out_data_type(loco::DataType::S32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_cast, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_cast = dynamic_cast<luci::CircleCast *>(cloned);
+  ASSERT_NE(nullptr, cloned_cast);
+  ASSERT_EQ(node_cast->in_data_type(), cloned_cast->in_data_type());
+  ASSERT_EQ(node_cast->out_data_type(), cloned_cast->out_data_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCeil.cpp b/compiler/luci/service/src/Nodes/CircleCeil.cpp
new file mode 100644
index 000000000..3359afa57
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCeil.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleCeil *)
+{
+  return _graph->nodes()->create<luci::CircleCeil>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCeil.test.cpp b/compiler/luci/service/src/Nodes/CircleCeil.test.cpp
new file mode 100644
index 000000000..b182127d9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCeil.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Ceil)
+{
+  auto g = loco::make_graph();
+  auto node_ceil = g->nodes()->create<luci::CircleCeil>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ceil, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ceil = dynamic_cast<luci::CircleCeil *>(cloned);
+  ASSERT_NE(nullptr, cloned_ceil);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleConcatenation.cpp b/compiler/luci/service/src/Nodes/CircleConcatenation.cpp
new file mode 100644
index 000000000..051c35c5b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConcatenation.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleConcatenation *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleConcatenation>(node->numValues());
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->axis(node->axis());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleConcatenation.test.cpp b/compiler/luci/service/src/Nodes/CircleConcatenation.test.cpp
new file mode 100644
index 000000000..270068cf0
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConcatenation.test.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Concatenation)
+{
+  auto g = loco::make_graph();
+  auto node_concat = g->nodes()->create<luci::CircleConcatenation>(3);
+  node_concat->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_concat->axis(7);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_concat, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_concat = dynamic_cast<luci::CircleConcatenation *>(cloned);
+  ASSERT_NE(nullptr, cloned_concat);
+  ASSERT_EQ(node_concat->numValues(), cloned_concat->numValues());
+  ASSERT_EQ(node_concat->fusedActivationFunction(), cloned_concat->fusedActivationFunction());
+  ASSERT_EQ(node_concat->axis(), cloned_concat->axis());
+}
+
+TEST(CloneNodeTest, clone_Concatenation_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_concat = g->nodes()->create<luci::CircleConcatenation>(3);
+  node_concat->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_concat, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleConst.cpp b/compiler/luci/service/src/Nodes/CircleConst.cpp
new file mode 100644
index 000000000..017dcc8ad
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConst.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+#include <loco.h>
+#include <loco/IR/Graph.h>
+
+#include <oops/UserExn.h>
+
+#include <cassert>
+
+namespace
+{
+
+template <loco::DataType T>
+void copy_values(const luci::CircleConst *node, luci::CircleConst *cloned)
+{
+  assert(T == node->dtype());
+  assert(T == cloned->dtype());
+
+  const auto size = node->size<T>();
+  cloned->size<T>(size);
+  for (uint32_t i = 0; i < size; i++)
+    cloned->at<T>(i) = node->at<T>(i);
+}
+
+luci::CircleConst *clone_circleconst(const luci::CircleConst *node, loco::Graph *graph)
+{
+  auto cloned = graph->nodes()->create<luci::CircleConst>();
+
+  if (cloned != nullptr)
+  {
+    // dtype/shape
+    cloned->dtype(node->dtype());
+    cloned->rank(node->rank());
+
+    // values
+    switch (node->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        copy_values<loco::DataType::FLOAT32>(node, cloned);
+        break;
+
+      case loco::DataType::U8:
+        copy_values<loco::DataType::U8>(node, cloned);
+        break;
+
+      case loco::DataType::S8:
+        copy_values<loco::DataType::S8>(node, cloned);
+        break;
+
+      case loco::DataType::S16:
+        copy_values<loco::DataType::S16>(node, cloned);
+        break;
+
+      case loco::DataType::S32:
+        copy_values<loco::DataType::S32>(node, cloned);
+        break;
+
+      case loco::DataType::S64:
+        copy_values<loco::DataType::S64>(node, cloned);
+        break;
+
+      case loco::DataType::BOOL:
+        copy_values<loco::DataType::BOOL>(node, cloned);
+        break;
+
+      default:
+        throw oops::UserExn("Unsupported tensor dtype");
+    }
+  }
+
+  return cloned;
+}
+
+} // namespace
+
+namespace luci
+{
+
+luci::CircleConst *clone(luci::CircleConst *node)
+{
+  auto *cloned = clone_circleconst(node, node->graph());
+
+  copy_common_attributes(node, cloned);
+
+  return cloned;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleConst *node)
+{
+  return clone_circleconst(node, _graph);
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleConst.test.cpp b/compiler/luci/service/src/Nodes/CircleConst.test.cpp
new file mode 100644
index 000000000..5d94798f4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConst.test.cpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/Nodes/CircleConst.h"
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco.h>
+#include <loco/IR/Graph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleConst *new_const_s32(loco::Graph *g)
+{
+  // prepare source CircleConst
+  auto circle_const = g->nodes()->create<luci::CircleConst>();
+
+  const auto size = 2;
+
+  circle_const->dtype(loco::DataType::S32);
+  circle_const->rank(1);
+  circle_const->dim(0).set(size);
+  circle_const->shape_status(luci::ShapeStatus::VALID);
+
+  circle_const->size<loco::DataType::S32>(size);
+  for (uint32_t i = 0; i < size; i++)
+    circle_const->at<loco::DataType::S32>(i) = i;
+
+  // quantparam
+  auto quantparam = std::make_unique<luci::CircleQuantParam>();
+  quantparam->scale = {1.0};
+  quantparam->zerop = {0};
+  quantparam->min = {-127.0};
+  quantparam->max = {127.0};
+  quantparam->quantized_dimension = 1;
+  circle_const->quantparam(std::move(quantparam));
+
+  // sparsityparam
+  auto sparam = std::make_unique<luci::SparsityParam>();
+  sparam->traversal_order = {1};
+  sparam->block_map = {1};
+  sparam->dim_metadata = {};
+  circle_const->sparsityparam(std::move(sparam));
+
+  return circle_const;
+}
+
+template <loco::DataType DT> luci::CircleConst *new_empty_const(loco::Graph *g)
+{
+  auto circle_const = g->nodes()->create<luci::CircleConst>();
+
+  const auto size = 0;
+
+  circle_const->dtype(DT);
+  circle_const->rank(1);
+  circle_const->dim(0).set(size);
+  circle_const->shape_status(luci::ShapeStatus::VALID);
+  circle_const->size<DT>(size);
+
+  return circle_const;
+}
+
+} // namespace
+
+TEST(CircleConstTest, clone)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_const_s32(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::S32, const_cloned->dtype());
+  ASSERT_EQ(1, const_cloned->rank());
+  ASSERT_EQ(2, const_cloned->dim(0).value());
+  ASSERT_EQ(2, const_cloned->size<loco::DataType::S32>());
+  ASSERT_EQ(0, const_cloned->at<loco::DataType::S32>(0));
+  ASSERT_EQ(1, const_cloned->at<loco::DataType::S32>(1));
+  ASSERT_NE(nullptr, const_cloned->quantparam());
+  ASSERT_NE(nullptr, const_cloned->sparsityparam());
+}
+
+TEST(CircleConstTest, clone_U8)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_empty_const<loco::DataType::U8>(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::U8, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_S8)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_empty_const<loco::DataType::S8>(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::S8, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_S64)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_empty_const<loco::DataType::S64>(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::S64, const_cloned->dtype());
+}
+
+TEST(CircleConstTest, clone_BOOL)
+{
+  auto g = loco::make_graph();
+
+  // prepare source CircleConst
+  auto circle_const = new_empty_const<loco::DataType::BOOL>(g.get());
+
+  // make a clone
+  auto const_cloned = luci::clone(circle_const);
+
+  // check attributes
+  ASSERT_EQ(loco::DataType::BOOL, const_cloned->dtype());
+}
+
+TEST(CloneNodeTest, clone_Const)
+{
+  auto g = loco::make_graph();
+  auto node_const = new_const_s32(g.get());
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_const, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_const = dynamic_cast<luci::CircleConst *>(cloned);
+  ASSERT_NE(nullptr, cloned_const);
+  ASSERT_EQ(loco::DataType::S32, cloned_const->dtype());
+  ASSERT_EQ(1, cloned_const->rank());
+  ASSERT_EQ(2, cloned_const->dim(0).value());
+  ASSERT_EQ(2, cloned_const->size<loco::DataType::S32>());
+  ASSERT_EQ(0, cloned_const->at<loco::DataType::S32>(0));
+  ASSERT_EQ(1, cloned_const->at<loco::DataType::S32>(1));
+  ASSERT_NE(nullptr, cloned_const->quantparam());
+  ASSERT_NE(nullptr, cloned_const->sparsityparam());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleConv2D.cpp b/compiler/luci/service/src/Nodes/CircleConv2D.cpp
new file mode 100644
index 000000000..bd2a28988
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConv2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleConv2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleConv2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+    cloned->dilation()->h(node->dilation()->h());
+    cloned->dilation()->w(node->dilation()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleConv2D.test.cpp b/compiler/luci/service/src/Nodes/CircleConv2D.test.cpp
new file mode 100644
index 000000000..c265d6cd1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleConv2D.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Conv2D)
+{
+  auto g = loco::make_graph();
+  auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+  node_conv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_conv2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_conv2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_conv2d = dynamic_cast<luci::CircleConv2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_conv2d);
+  ASSERT_EQ(node_conv2d->fusedActivationFunction(), cloned_conv2d->fusedActivationFunction());
+  ASSERT_EQ(node_conv2d->padding(), cloned_conv2d->padding());
+}
+
+TEST(CloneNodeTest, clone_Conv2D_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+  node_conv2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_conv2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_conv2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_Conv2D_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_conv2d = g->nodes()->create<luci::CircleConv2D>();
+  node_conv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_conv2d->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_conv2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCos.cpp b/compiler/luci/service/src/Nodes/CircleCos.cpp
new file mode 100644
index 000000000..d3377a23e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCos.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleCos *)
+{
+  return _graph->nodes()->create<luci::CircleCos>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCos.test.cpp b/compiler/luci/service/src/Nodes/CircleCos.test.cpp
new file mode 100644
index 000000000..a25943b98
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCos.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Cos)
+{
+  auto g = loco::make_graph();
+  auto node_cos = g->nodes()->create<luci::CircleCos>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_cos, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_cos = dynamic_cast<luci::CircleCos *>(cloned);
+  ASSERT_NE(nullptr, cloned_cos);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCustom.cpp b/compiler/luci/service/src/Nodes/CircleCustom.cpp
new file mode 100644
index 000000000..9a99b192a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCustom.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::ABC>::visit(const luci::CircleCustom *node)
+{
+  uint32_t num_in = node->numInputs();
+  uint32_t num_out = node->numOutputs();
+  auto *cloned = _graph->nodes()->create<luci::CircleCustom>(num_in, num_out);
+  if (cloned != nullptr)
+  {
+    cloned->custom_options(node->custom_options());
+    cloned->custom_code(node->custom_code());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCustom.test.cpp b/compiler/luci/service/src/Nodes/CircleCustom.test.cpp
new file mode 100644
index 000000000..6fee68e71
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCustom.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <vector>
+
+TEST(CloneNodeTest, clone_Custom)
+{
+  auto g = loco::make_graph();
+  auto node_custom = g->nodes()->create<luci::CircleCustom>(2, 3);
+  std::vector<uint8_t> options({0x55, 0x56, 0x57});
+  std::string code = "hello";
+  node_custom->custom_options(options);
+  node_custom->custom_code(code);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_custom, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_custom = dynamic_cast<luci::CircleCustom *>(cloned);
+  ASSERT_NE(nullptr, cloned_custom);
+  auto cloned_options = cloned_custom->custom_options();
+  ASSERT_EQ(options.size(), cloned_options.size());
+  auto size = options.size();
+  for (size_t s = 0; s < size; ++s)
+    ASSERT_EQ(options.at(s), cloned_options.at(s));
+  ASSERT_TRUE(node_custom->custom_code() == cloned_custom->custom_code());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleCustomOut.cpp b/compiler/luci/service/src/Nodes/CircleCustomOut.cpp
new file mode 100644
index 000000000..84577f529
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCustomOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleCustomOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleCustomOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleCustomOut.test.cpp b/compiler/luci/service/src/Nodes/CircleCustomOut.test.cpp
new file mode 100644
index 000000000..15121bab6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleCustomOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_CustomOut)
+{
+  auto g = loco::make_graph();
+  auto node_cout = g->nodes()->create<luci::CircleCustomOut>();
+  node_cout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_cout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_cout = dynamic_cast<luci::CircleCustomOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_cout);
+  ASSERT_EQ(node_cout->index(), cloned_cout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDensify.cpp b/compiler/luci/service/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..a0d15b6c7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleDensify *)
+{
+  return _graph->nodes()->create<luci::CircleDensify>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDensify.test.cpp b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..d0f32c1a2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Densify)
+{
+  auto g = loco::make_graph();
+  auto node_densify = g->nodes()->create<luci::CircleDensify>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_densify, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_densify = dynamic_cast<luci::CircleDensify *>(cloned);
+  ASSERT_NE(nullptr, cloned_densify);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/service/src/Nodes/CircleDepthToSpace.cpp
new file mode 100644
index 000000000..6b5705a0e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDepthToSpace.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleDepthToSpace *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleDepthToSpace>();
+  if (cloned != nullptr)
+    cloned->block_size(node->block_size());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDepthToSpace.test.cpp b/compiler/luci/service/src/Nodes/CircleDepthToSpace.test.cpp
new file mode 100644
index 000000000..192b10b90
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDepthToSpace.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_DepthToSpace)
+{
+  auto g = loco::make_graph();
+  auto node_d2s = g->nodes()->create<luci::CircleDepthToSpace>();
+  node_d2s->block_size(32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_d2s, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_d2s = dynamic_cast<luci::CircleDepthToSpace *>(cloned);
+  ASSERT_NE(nullptr, cloned_d2s);
+  ASSERT_EQ(node_d2s->block_size(), cloned_d2s->block_size());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.cpp
new file mode 100644
index 000000000..ba34a221c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleDepthwiseConv2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleDepthwiseConv2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+    cloned->depthMultiplier(node->depthMultiplier());
+    cloned->dilation()->h(node->dilation()->h());
+    cloned->dilation()->w(node->dilation()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.test.cpp b/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..8657464bc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDepthwiseConv2D.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D)
+{
+  auto g = loco::make_graph();
+  auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+  node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_dwconv2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_dwconv2d = dynamic_cast<luci::CircleDepthwiseConv2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_dwconv2d);
+  ASSERT_EQ(node_dwconv2d->fusedActivationFunction(), cloned_dwconv2d->fusedActivationFunction());
+  ASSERT_EQ(node_dwconv2d->padding(), cloned_dwconv2d->padding());
+}
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+  node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_dwconv2d->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_DepthwiseConv2D_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_dwconv2d = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+  node_dwconv2d->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_dwconv2d->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dwconv2d, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDequantize.cpp b/compiler/luci/service/src/Nodes/CircleDequantize.cpp
new file mode 100644
index 000000000..427617cb0
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDequantize.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleDequantize *)
+{
+  return _graph->nodes()->create<luci::CircleDequantize>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDequantize.test.cpp b/compiler/luci/service/src/Nodes/CircleDequantize.test.cpp
new file mode 100644
index 000000000..e1c563acf
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDequantize.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Dequantize)
+{
+  auto g = loco::make_graph();
+  auto node_dq = g->nodes()->create<luci::CircleDequantize>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dq, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_dq = dynamic_cast<luci::CircleDequantize *>(cloned);
+  ASSERT_NE(nullptr, cloned_dq);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleDiv.cpp b/compiler/luci/service/src/Nodes/CircleDiv.cpp
new file mode 100644
index 000000000..6d40c5ee5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDiv.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleDiv *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleDiv>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDiv.test.cpp b/compiler/luci/service/src/Nodes/CircleDiv.test.cpp
new file mode 100644
index 000000000..5182ac908
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDiv.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Div)
+{
+  auto g = loco::make_graph();
+  auto node_div = g->nodes()->create<luci::CircleDiv>();
+  node_div->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_div, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_div = dynamic_cast<luci::CircleDiv *>(cloned);
+  ASSERT_NE(nullptr, cloned_div);
+  ASSERT_EQ(node_div->fusedActivationFunction(), cloned_div->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Div_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_div = g->nodes()->create<luci::CircleDiv>();
+  node_div->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_div, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleElu.cpp b/compiler/luci/service/src/Nodes/CircleElu.cpp
new file mode 100644
index 000000000..885ecd6ad
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleElu.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleElu *)
+{
+  return _graph->nodes()->create<luci::CircleElu>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleElu.test.cpp b/compiler/luci/service/src/Nodes/CircleElu.test.cpp
new file mode 100644
index 000000000..e75b3bcb1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleElu.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Elu)
+{
+  auto g = loco::make_graph();
+  auto node_elu = g->nodes()->create<luci::CircleElu>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_elu, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_elu = dynamic_cast<luci::CircleElu *>(cloned);
+  ASSERT_NE(nullptr, cloned_elu);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleEqual.cpp b/compiler/luci/service/src/Nodes/CircleEqual.cpp
new file mode 100644
index 000000000..ee49e92cf
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleEqual.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleEqual *)
+{
+  return _graph->nodes()->create<luci::CircleEqual>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleEqual.test.cpp b/compiler/luci/service/src/Nodes/CircleEqual.test.cpp
new file mode 100644
index 000000000..99a5535fc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleEqual.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Equal)
+{
+  auto g = loco::make_graph();
+  auto node_eq = g->nodes()->create<luci::CircleEqual>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_eq, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_eq = dynamic_cast<luci::CircleEqual *>(cloned);
+  ASSERT_NE(nullptr, cloned_eq);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleExp.cpp b/compiler/luci/service/src/Nodes/CircleExp.cpp
new file mode 100644
index 000000000..896dc0279
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleExp.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleExp *)
+{
+  return _graph->nodes()->create<luci::CircleExp>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleExp.test.cpp b/compiler/luci/service/src/Nodes/CircleExp.test.cpp
new file mode 100644
index 000000000..ff2bb65db
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleExp.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Exp)
+{
+  auto g = loco::make_graph();
+  auto node_exp = g->nodes()->create<luci::CircleExp>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_exp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_exp = dynamic_cast<luci::CircleExp *>(cloned);
+  ASSERT_NE(nullptr, cloned_exp);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleExpandDims.cpp b/compiler/luci/service/src/Nodes/CircleExpandDims.cpp
new file mode 100644
index 000000000..a7fe5372b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleExpandDims.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleExpandDims *)
+{
+  return _graph->nodes()->create<luci::CircleExpandDims>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleExpandDims.test.cpp b/compiler/luci/service/src/Nodes/CircleExpandDims.test.cpp
new file mode 100644
index 000000000..e3481bccd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleExpandDims.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_expand_dims)
+{
+  luci::CircleInput input;
+  luci::CircleConst axis;
+  luci::CircleExpandDims expand_dims;
+
+  input.shape({4, 3});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  axis.dtype(loco::DataType::S32);
+  axis.rank(0);
+  axis.size<loco::DataType::S32>(1);
+  axis.at<loco::DataType::S32>(0) = 1;
+  axis.shape_status(luci::ShapeStatus::VALID);
+
+  expand_dims.input(&input);
+  expand_dims.axis(&axis);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&expand_dims, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(4, shape.dim(0).value());
+  ASSERT_EQ(1, shape.dim(1).value());
+  ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_ExpandDims)
+{
+  auto g = loco::make_graph();
+  auto node_ed = g->nodes()->create<luci::CircleExpandDims>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ed, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ed = dynamic_cast<luci::CircleExpandDims *>(cloned);
+  ASSERT_NE(nullptr, cloned_ed);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/service/src/Nodes/CircleFakeQuant.cpp
new file mode 100644
index 000000000..8e0150b07
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFakeQuant.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleFakeQuant *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleFakeQuant>();
+  if (cloned != nullptr)
+  {
+    cloned->min(node->min());
+    cloned->max(node->max());
+    cloned->num_bits(node->num_bits());
+    cloned->narrow_range(node->narrow_range());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/service/src/Nodes/CircleFakeQuant.test.cpp
new file mode 100644
index 000000000..2c4e3b836
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFakeQuant.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FakeQuant)
+{
+  auto g = loco::make_graph();
+  auto node_fq = g->nodes()->create<luci::CircleFakeQuant>();
+  node_fq->min(1.0f);
+  node_fq->max(2.0f);
+  node_fq->num_bits(8);
+  node_fq->narrow_range(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fq, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fq = dynamic_cast<luci::CircleFakeQuant *>(cloned);
+  ASSERT_NE(nullptr, cloned_fq);
+  ASSERT_EQ(node_fq->min(), cloned_fq->min());
+  ASSERT_EQ(node_fq->max(), cloned_fq->max());
+  ASSERT_EQ(node_fq->num_bits(), cloned_fq->num_bits());
+  ASSERT_EQ(node_fq->narrow_range(), cloned_fq->narrow_range());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFill.cpp b/compiler/luci/service/src/Nodes/CircleFill.cpp
new file mode 100644
index 000000000..9377c0c9e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFill.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleFill *)
+{
+  return _graph->nodes()->create<luci::CircleFill>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFill.test.cpp b/compiler/luci/service/src/Nodes/CircleFill.test.cpp
new file mode 100644
index 000000000..56c807585
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFill.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Fill)
+{
+  auto g = loco::make_graph();
+  auto node_fill = g->nodes()->create<luci::CircleFill>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fill, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fill = dynamic_cast<luci::CircleFill *>(cloned);
+  ASSERT_NE(nullptr, cloned_fill);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFloor.cpp b/compiler/luci/service/src/Nodes/CircleFloor.cpp
new file mode 100644
index 000000000..e7d42d57e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloor.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleFloor *)
+{
+  return _graph->nodes()->create<luci::CircleFloor>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFloor.test.cpp b/compiler/luci/service/src/Nodes/CircleFloor.test.cpp
new file mode 100644
index 000000000..3d53fd2c3
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloor.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Floor)
+{
+  auto g = loco::make_graph();
+  auto node_floor = g->nodes()->create<luci::CircleFloor>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_floor, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_floor = dynamic_cast<luci::CircleFloor *>(cloned);
+  ASSERT_NE(nullptr, cloned_floor);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/service/src/Nodes/CircleFloorDiv.cpp
new file mode 100644
index 000000000..456eddc2e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloorDiv.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleFloorDiv *)
+{
+  return _graph->nodes()->create<luci::CircleFloorDiv>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFloorDiv.test.cpp b/compiler/luci/service/src/Nodes/CircleFloorDiv.test.cpp
new file mode 100644
index 000000000..6365ccd3b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloorDiv.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FloorDiv)
+{
+  auto g = loco::make_graph();
+  auto node_floordiv = g->nodes()->create<luci::CircleFloorDiv>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_floordiv, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_floordiv = dynamic_cast<luci::CircleFloorDiv *>(cloned);
+  ASSERT_NE(nullptr, cloned_floordiv);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFloorMod.cpp b/compiler/luci/service/src/Nodes/CircleFloorMod.cpp
new file mode 100644
index 000000000..759abcda9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloorMod.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleFloorMod *)
+{
+  return _graph->nodes()->create<luci::CircleFloorMod>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFloorMod.test.cpp b/compiler/luci/service/src/Nodes/CircleFloorMod.test.cpp
new file mode 100644
index 000000000..ce91d5881
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFloorMod.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FloorMod)
+{
+  auto g = loco::make_graph();
+  auto node_floormod = g->nodes()->create<luci::CircleFloorMod>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_floormod, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_floormod = dynamic_cast<luci::CircleFloorMod *>(cloned);
+  ASSERT_NE(nullptr, cloned_floormod);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/service/src/Nodes/CircleFullyConnected.cpp
new file mode 100644
index 000000000..7c37060c4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFullyConnected.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleFullyConnected *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->weights_format() == luci::CircleFullyConnected::WeightsFormat::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleFullyConnected>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->weights_format(node->weights_format());
+    cloned->keep_num_dims(node->keep_num_dims());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/service/src/Nodes/CircleFullyConnected.test.cpp
new file mode 100644
index 000000000..965b59130
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleFullyConnected.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_FullyConnected)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fc = dynamic_cast<luci::CircleFullyConnected *>(cloned);
+  ASSERT_NE(nullptr, cloned_fc);
+  ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+  ASSERT_EQ(node_fc->weights_format(), cloned_fc->weights_format());
+}
+
+TEST(CloneNodeTest, clone_FullyConnected_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_FullyConnected_wf_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleFullyConnected>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_fc->weights_format(luci::CircleFullyConnected::WeightsFormat::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGather.cpp b/compiler/luci/service/src/Nodes/CircleGather.cpp
new file mode 100644
index 000000000..66fc90dd7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGather.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::GHIJ>::visit(const luci::CircleGather *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleGather>();
+  if (cloned != nullptr)
+    cloned->axis(node->axis());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGather.test.cpp b/compiler/luci/service/src/Nodes/CircleGather.test.cpp
new file mode 100644
index 000000000..f48dbdb67
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGather.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Gather)
+{
+  auto g = loco::make_graph();
+  auto node_gat = g->nodes()->create<luci::CircleGather>();
+  node_gat->axis(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gat, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gat = dynamic_cast<luci::CircleGather *>(cloned);
+  ASSERT_NE(nullptr, cloned_gat);
+  ASSERT_EQ(node_gat->axis(), cloned_gat->axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGatherNd.cpp b/compiler/luci/service/src/Nodes/CircleGatherNd.cpp
new file mode 100644
index 000000000..4c017448d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGatherNd.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::GHIJ>::visit(const luci::CircleGatherNd *)
+{
+  return _graph->nodes()->create<luci::CircleGatherNd>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGatherNd.test.cpp b/compiler/luci/service/src/Nodes/CircleGatherNd.test.cpp
new file mode 100644
index 000000000..3a705710c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGatherNd.test.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <oops/InternalExn.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, gather_nd_simple)
+{
+  luci::CircleInput input;
+  luci::CircleConst indices_const;
+  luci::CircleGatherNd gather_nd;
+
+  input.shape({1, 4, 4, 3});
+  indices_const.shape({1, 2, 3});
+
+  input.shape_status(luci::ShapeStatus::VALID);
+  indices_const.shape_status(luci::ShapeStatus::VALID);
+
+  gather_nd.params(&input);
+  gather_nd.indices(&indices_const);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&gather_nd, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(2, shape.dim(1).value());
+  ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(ShapeRuleTest, gather_nd_slices)
+{
+  luci::CircleInput input;
+  luci::CircleConst indices_const;
+  luci::CircleGatherNd gather_nd;
+
+  input.shape({1, 4, 4, 3});
+  indices_const.shape({1, 2, 1});
+
+  input.shape_status(luci::ShapeStatus::VALID);
+  indices_const.shape_status(luci::ShapeStatus::VALID);
+
+  gather_nd.params(&input);
+  gather_nd.indices(&indices_const);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&gather_nd, shape));
+  ASSERT_EQ(5, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(2, shape.dim(1).value());
+  ASSERT_EQ(4, shape.dim(2).value());
+  ASSERT_EQ(4, shape.dim(3).value());
+  ASSERT_EQ(3, shape.dim(4).value());
+}
+
+TEST(ShapeRuleTest, gather_nd_NEG)
+{
+  luci::CircleInput input;
+  luci::CircleConst indices_const;
+  luci::CircleGatherNd gather_nd;
+
+  input.shape({1, 4, 4, 3});
+  indices_const.shape({1, 2, 5});
+
+  input.shape_status(luci::ShapeStatus::VALID);
+  indices_const.shape_status(luci::ShapeStatus::VALID);
+
+  gather_nd.params(&input);
+  gather_nd.indices(&indices_const);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_THROW(shape_inf_rule.infer(&gather_nd, shape), oops::InternalExn);
+}
+
+TEST(CloneNodeTest, clone_GatherNd)
+{
+  auto g = loco::make_graph();
+  auto node_gtnd = g->nodes()->create<luci::CircleGatherNd>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gtnd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gtnd = dynamic_cast<luci::CircleGatherNd *>(cloned);
+  ASSERT_NE(nullptr, cloned_gtnd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGelu.cpp b/compiler/luci/service/src/Nodes/CircleGelu.cpp
new file mode 100644
index 000000000..62a0d4094
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGelu.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::GHIJ>::visit(const luci::CircleGelu *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleGelu>();
+  if (cloned != nullptr)
+    cloned->approximate(node->approximate());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGelu.test.cpp b/compiler/luci/service/src/Nodes/CircleGelu.test.cpp
new file mode 100644
index 000000000..a043b2a5d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGelu.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Gelu)
+{
+  auto g = loco::make_graph();
+  auto node_gelu = g->nodes()->create<luci::CircleGelu>();
+  node_gelu->approximate(false);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gelu, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gelu = dynamic_cast<luci::CircleGelu *>(cloned);
+  ASSERT_NE(nullptr, cloned_gelu);
+  ASSERT_EQ(node_gelu->approximate(), cloned_gelu->approximate());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGreater.cpp b/compiler/luci/service/src/Nodes/CircleGreater.cpp
new file mode 100644
index 000000000..95fdebae7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGreater.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::GHIJ>::visit(const luci::CircleGreater *)
+{
+  return _graph->nodes()->create<luci::CircleGreater>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGreater.test.cpp b/compiler/luci/service/src/Nodes/CircleGreater.test.cpp
new file mode 100644
index 000000000..6d2df61f0
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGreater.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Greater)
+{
+  auto g = loco::make_graph();
+  auto node_gt = g->nodes()->create<luci::CircleGreater>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_gt, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_gt = dynamic_cast<luci::CircleGreater *>(cloned);
+  ASSERT_NE(nullptr, cloned_gt);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/service/src/Nodes/CircleGreaterEqual.cpp
new file mode 100644
index 000000000..1fdd00196
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGreaterEqual.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::GHIJ>::visit(const luci::CircleGreaterEqual *)
+{
+  return _graph->nodes()->create<luci::CircleGreaterEqual>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleGreaterEqual.test.cpp b/compiler/luci/service/src/Nodes/CircleGreaterEqual.test.cpp
new file mode 100644
index 000000000..10387df3a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleGreaterEqual.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_GreaterEqual)
+{
+  auto g = loco::make_graph();
+  auto node_ge = g->nodes()->create<luci::CircleGreaterEqual>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ge, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ge = dynamic_cast<luci::CircleGreaterEqual *>(cloned);
+  ASSERT_NE(nullptr, cloned_ge);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleHardSwish.cpp b/compiler/luci/service/src/Nodes/CircleHardSwish.cpp
new file mode 100644
index 000000000..bbc466e3f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleHardSwish.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::GHIJ>::visit(const luci::CircleHardSwish *)
+{
+  return _graph->nodes()->create<luci::CircleHardSwish>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleHardSwish.test.cpp b/compiler/luci/service/src/Nodes/CircleHardSwish.test.cpp
new file mode 100644
index 000000000..b79386bea
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleHardSwish.test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/CircleTypeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_hardswish)
+{
+  luci::CircleInput input;
+  luci::CircleHardSwish hard_swish;
+
+  input.shape({3, 4});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  hard_swish.features(&input);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&hard_swish, shape));
+  ASSERT_EQ(2, shape.rank());
+  ASSERT_EQ(3, shape.dim(0).value());
+  ASSERT_EQ(4, shape.dim(1).value());
+}
+
+TEST(DataTypeRuleTest, simple_hardswish)
+{
+  luci::CircleInput input;
+  luci::CircleHardSwish hard_swish;
+
+  input.dtype(loco::DataType::S32);
+
+  hard_swish.features(&input);
+
+  loco::DataType dtype;
+  luci::tinf::Rule type_inf_rule;
+
+  ASSERT_TRUE(type_inf_rule.infer(&hard_swish, dtype));
+  ASSERT_EQ(loco::DataType::S32, dtype);
+}
+
+TEST(CloneNodeTest, clone_HardSwish)
+{
+  auto g = loco::make_graph();
+  auto node_hardswish = g->nodes()->create<luci::CircleHardSwish>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_hardswish, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_hardswish = dynamic_cast<luci::CircleHardSwish *>(cloned);
+  ASSERT_NE(nullptr, cloned_hardswish);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleIf.cpp b/compiler/luci/service/src/Nodes/CircleIf.cpp
new file mode 100644
index 000000000..6d45c9583
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleIf.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::GHIJ>::visit(const luci::CircleIf *node)
+{
+  auto ic = node->input_count();
+  auto oc = node->output_count();
+
+  return _graph->nodes()->create<luci::CircleIf>(ic, oc);
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleIf.test.cpp b/compiler/luci/service/src/Nodes/CircleIf.test.cpp
new file mode 100644
index 000000000..0dece100a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleIf.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_If)
+{
+  auto g = loco::make_graph();
+  auto node_if = g->nodes()->create<luci::CircleIf>(1, 1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_if, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_if = dynamic_cast<luci::CircleIf *>(cloned);
+  ASSERT_NE(nullptr, cloned_if);
+  ASSERT_EQ(-1, cloned_if->then_branch());
+  ASSERT_EQ(-1, cloned_if->else_branch());
+  ASSERT_EQ(nullptr, cloned_if->then_graph());
+  ASSERT_EQ(nullptr, cloned_if->else_graph());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleIfOut.cpp b/compiler/luci/service/src/Nodes/CircleIfOut.cpp
new file mode 100644
index 000000000..b555a9538
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleIfOut.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/CircleTypeInference.h>
+
+#include "CircleCloneNode.h"
+
+namespace
+{
+
+struct CircleIfOutGraphs
+{
+  loco::GraphOutput *then_graph_output;
+  loco::GraphOutput *else_graph_output;
+};
+
+} // namespace
+
+namespace
+{
+
+CircleIfOutGraphs get_out_graphs(const luci::CircleIfOut *node)
+{
+  CircleIfOutGraphs ret_out;
+
+  /**
+   * @note  IF operator type and shape are that of the "then" and "else"
+   *        Graph Outputs.
+   */
+  auto circle_if = loco::must_cast<const luci::CircleIf *>(node->input());
+
+  auto index = node->index();
+  auto then_graph = circle_if->then_graph();
+  auto else_graph = circle_if->else_graph();
+  assert(then_graph != nullptr);
+  assert(else_graph != nullptr);
+
+  // shape and type are assumed to be same
+  // these are checked at post_import_graph() in Import
+  auto then_outputs = loco::output_nodes(then_graph);
+  auto else_outputs = loco::output_nodes(else_graph);
+  assert(then_outputs.size() == else_outputs.size());
+  assert(index < static_cast<int32_t>(then_outputs.size()));
+
+  auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
+  auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
+
+  auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
+  auto else_graph_outputs = else_graph->outputs();
+  assert(then_graph_outputs->size() == else_graph_outputs->size());
+
+  ret_out.then_graph_output = then_graph_outputs->at(then_out->index());
+  ret_out.else_graph_output = else_graph_outputs->at(else_out->index());
+
+  return ret_out;
+}
+
+} // namespace
+
+namespace luci
+{
+
+loco::TensorShape sinf::Algorithm::visit(const luci::CircleIfOut *node)
+{
+  auto graphs = get_out_graphs(node);
+  assert(*graphs.then_graph_output->shape() == *graphs.else_graph_output->shape());
+  return *graphs.then_graph_output->shape();
+}
+
+loco::DataType tinf::Algorithm::visit(const luci::CircleIfOut *node)
+{
+  auto graphs = get_out_graphs(node);
+  assert(graphs.then_graph_output->dtype() == graphs.else_graph_output->dtype());
+  return graphs.then_graph_output->dtype();
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleIfOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleIfOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleIfOut.test.cpp b/compiler/luci/service/src/Nodes/CircleIfOut.test.cpp
new file mode 100644
index 000000000..666a15105
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleIfOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_IfOut)
+{
+  auto g = loco::make_graph();
+  auto node_iout = g->nodes()->create<luci::CircleIfOut>();
+  node_iout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_iout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_iout = dynamic_cast<luci::CircleIfOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_iout);
+  ASSERT_EQ(node_iout->index(), cloned_iout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/service/src/Nodes/CircleInstanceNorm.cpp
new file mode 100644
index 000000000..d9e49d8ed
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleInstanceNorm.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleInstanceNorm *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleInstanceNorm>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->epsilon(node->epsilon());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleInstanceNorm.test.cpp b/compiler/luci/service/src/Nodes/CircleInstanceNorm.test.cpp
new file mode 100644
index 000000000..bae92b1ae
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleInstanceNorm.test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_InstanceNorm)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleInstanceNorm>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_fc->epsilon(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_fc = dynamic_cast<luci::CircleInstanceNorm *>(cloned);
+  ASSERT_NE(nullptr, cloned_fc);
+  ASSERT_EQ(node_fc->fusedActivationFunction(), cloned_fc->fusedActivationFunction());
+  ASSERT_EQ(node_fc->epsilon(), cloned_fc->epsilon());
+}
+
+TEST(CloneNodeTest, clone_InstanceNorm_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_fc = g->nodes()->create<luci::CircleInstanceNorm>();
+  node_fc->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_fc, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/service/src/Nodes/CircleL2Normalize.cpp
new file mode 100644
index 000000000..6a9f61f82
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleL2Normalize.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleL2Normalize *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleL2Normalize>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleL2Normalize.test.cpp b/compiler/luci/service/src/Nodes/CircleL2Normalize.test.cpp
new file mode 100644
index 000000000..0f148797e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleL2Normalize.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_L2Normalize)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Normalize>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_l2n = dynamic_cast<luci::CircleL2Normalize *>(cloned);
+  ASSERT_NE(nullptr, cloned_l2n);
+  ASSERT_EQ(node_l2n->fusedActivationFunction(), cloned_l2n->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Normalize>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/service/src/Nodes/CircleL2Pool2D.cpp
new file mode 100644
index 000000000..532b36237
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleL2Pool2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleL2Pool2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleL2Pool2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->filter()->h(node->filter()->h());
+    cloned->filter()->w(node->filter()->w());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleL2Pool2D.test.cpp b/compiler/luci/service/src/Nodes/CircleL2Pool2D.test.cpp
new file mode 100644
index 000000000..37344fd9a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleL2Pool2D.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_L2Pool2D)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_l2n->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_l2n = dynamic_cast<luci::CircleL2Pool2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_l2n);
+  ASSERT_EQ(node_l2n->fusedActivationFunction(), cloned_l2n->fusedActivationFunction());
+  ASSERT_EQ(node_l2n->padding(), cloned_l2n->padding());
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_l2n->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_L2Normalize_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_l2n = g->nodes()->create<luci::CircleL2Pool2D>();
+  node_l2n->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_l2n->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_l2n, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/service/src/Nodes/CircleLeakyRelu.cpp
new file mode 100644
index 000000000..0432bbcfc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLeakyRelu.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLeakyRelu *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleLeakyRelu>();
+  if (cloned != nullptr)
+    cloned->alpha(node->alpha());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLeakyRelu.test.cpp b/compiler/luci/service/src/Nodes/CircleLeakyRelu.test.cpp
new file mode 100644
index 000000000..17fc1442a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLeakyRelu.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LeakyRelu)
+{
+  auto g = loco::make_graph();
+  auto node_lr = g->nodes()->create<luci::CircleLeakyRelu>();
+  node_lr->alpha(1.2f);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_lr, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_lr = dynamic_cast<luci::CircleLeakyRelu *>(cloned);
+  ASSERT_NE(nullptr, cloned_lr);
+  ASSERT_EQ(node_lr->alpha(), cloned_lr->alpha());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLess.cpp b/compiler/luci/service/src/Nodes/CircleLess.cpp
new file mode 100644
index 000000000..d3e72d03a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLess.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLess *)
+{
+  return _graph->nodes()->create<luci::CircleLess>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLess.test.cpp b/compiler/luci/service/src/Nodes/CircleLess.test.cpp
new file mode 100644
index 000000000..43248948d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLess.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Less)
+{
+  auto g = loco::make_graph();
+  auto node_less = g->nodes()->create<luci::CircleLess>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_less, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_less = dynamic_cast<luci::CircleLess *>(cloned);
+  ASSERT_NE(nullptr, cloned_less);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLessEqual.cpp b/compiler/luci/service/src/Nodes/CircleLessEqual.cpp
new file mode 100644
index 000000000..ba3c80233
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLessEqual.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLessEqual *)
+{
+  return _graph->nodes()->create<luci::CircleLessEqual>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLessEqual.test.cpp b/compiler/luci/service/src/Nodes/CircleLessEqual.test.cpp
new file mode 100644
index 000000000..0a87daf5d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLessEqual.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LessEqual)
+{
+  auto g = loco::make_graph();
+  auto node_le = g->nodes()->create<luci::CircleLessEqual>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_le, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_le = dynamic_cast<luci::CircleLessEqual *>(cloned);
+  ASSERT_NE(nullptr, cloned_le);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.cpp
new file mode 100644
index 000000000..7e5f7a993
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLocalResponseNormalization *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleLocalResponseNormalization>();
+  if (cloned != nullptr)
+  {
+    cloned->radius(node->radius());
+    cloned->bias(node->bias());
+    cloned->alpha(node->alpha());
+    cloned->beta(node->beta());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.test.cpp b/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.test.cpp
new file mode 100644
index 000000000..262b119bb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLocalResponseNormalization.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LocalResponseNormalization)
+{
+  auto g = loco::make_graph();
+  auto node_lrn = g->nodes()->create<luci::CircleLocalResponseNormalization>();
+  node_lrn->radius(32);
+  node_lrn->bias(1.2f);
+  node_lrn->alpha(3.4f);
+  node_lrn->beta(5.7f);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_lrn, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_lrn = dynamic_cast<luci::CircleLocalResponseNormalization *>(cloned);
+  ASSERT_NE(nullptr, cloned_lrn);
+  ASSERT_EQ(node_lrn->radius(), cloned_lrn->radius());
+  ASSERT_EQ(node_lrn->bias(), cloned_lrn->bias());
+  ASSERT_EQ(node_lrn->alpha(), cloned_lrn->alpha());
+  ASSERT_EQ(node_lrn->beta(), cloned_lrn->beta());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLog.cpp b/compiler/luci/service/src/Nodes/CircleLog.cpp
new file mode 100644
index 000000000..7c8562af3
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLog.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLog *)
+{
+  return _graph->nodes()->create<luci::CircleLog>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLog.test.cpp b/compiler/luci/service/src/Nodes/CircleLog.test.cpp
new file mode 100644
index 000000000..d1ee1428e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLog.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Log)
+{
+  auto g = loco::make_graph();
+  auto node_log = g->nodes()->create<luci::CircleLog>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_log, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_log = dynamic_cast<luci::CircleLog *>(cloned);
+  ASSERT_NE(nullptr, cloned_log);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/service/src/Nodes/CircleLogSoftmax.cpp
new file mode 100644
index 000000000..5d843e13a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogSoftmax.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLogSoftmax *)
+{
+  return _graph->nodes()->create<luci::CircleLogSoftmax>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogSoftmax.test.cpp b/compiler/luci/service/src/Nodes/CircleLogSoftmax.test.cpp
new file mode 100644
index 000000000..feebb79cb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogSoftmax.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogSoftmax)
+{
+  auto g = loco::make_graph();
+  auto node_logs = g->nodes()->create<luci::CircleLogSoftmax>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_logs, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_logs = dynamic_cast<luci::CircleLogSoftmax *>(cloned);
+  ASSERT_NE(nullptr, cloned_logs);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/service/src/Nodes/CircleLogicalAnd.cpp
new file mode 100644
index 000000000..07e09d1d1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalAnd.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLogicalAnd *)
+{
+  return _graph->nodes()->create<luci::CircleLogicalAnd>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalAnd.test.cpp b/compiler/luci/service/src/Nodes/CircleLogicalAnd.test.cpp
new file mode 100644
index 000000000..aa811edfa
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalAnd.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalAnd)
+{
+  auto g = loco::make_graph();
+  auto node_logand = g->nodes()->create<luci::CircleLogicalAnd>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_logand, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_logand = dynamic_cast<luci::CircleLogicalAnd *>(cloned);
+  ASSERT_NE(nullptr, cloned_logand);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/service/src/Nodes/CircleLogicalNot.cpp
new file mode 100644
index 000000000..13abad56d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalNot.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLogicalNot *)
+{
+  return _graph->nodes()->create<luci::CircleLogicalNot>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalNot.test.cpp b/compiler/luci/service/src/Nodes/CircleLogicalNot.test.cpp
new file mode 100644
index 000000000..9e55be944
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalNot.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalNot)
+{
+  auto g = loco::make_graph();
+  auto node_lognot = g->nodes()->create<luci::CircleLogicalNot>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_lognot, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_lognot = dynamic_cast<luci::CircleLogicalNot *>(cloned);
+  ASSERT_NE(nullptr, cloned_lognot);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/service/src/Nodes/CircleLogicalOr.cpp
new file mode 100644
index 000000000..7bd574527
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalOr.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLogicalOr *)
+{
+  return _graph->nodes()->create<luci::CircleLogicalOr>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogicalOr.test.cpp b/compiler/luci/service/src/Nodes/CircleLogicalOr.test.cpp
new file mode 100644
index 000000000..19b706dcd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogicalOr.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_LogicalOr)
+{
+  auto g = loco::make_graph();
+  auto node_logor = g->nodes()->create<luci::CircleLogicalOr>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_logor, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_logor = dynamic_cast<luci::CircleLogicalOr *>(cloned);
+  ASSERT_NE(nullptr, cloned_logor);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleLogistic.cpp b/compiler/luci/service/src/Nodes/CircleLogistic.cpp
new file mode 100644
index 000000000..41cd99b92
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogistic.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleLogistic *)
+{
+  return _graph->nodes()->create<luci::CircleLogistic>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleLogistic.test.cpp b/compiler/luci/service/src/Nodes/CircleLogistic.test.cpp
new file mode 100644
index 000000000..05dbe46e4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleLogistic.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Logistic)
+{
+  auto g = loco::make_graph();
+  auto node_log = g->nodes()->create<luci::CircleLogistic>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_log, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_log = dynamic_cast<luci::CircleLogistic *>(cloned);
+  ASSERT_NE(nullptr, cloned_log);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/service/src/Nodes/CircleMatrixDiag.cpp
new file mode 100644
index 000000000..f1dd059eb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMatrixDiag.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleMatrixDiag *)
+{
+  return _graph->nodes()->create<luci::CircleMatrixDiag>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMatrixDiag.test.cpp b/compiler/luci/service/src/Nodes/CircleMatrixDiag.test.cpp
new file mode 100644
index 000000000..c08c4cb94
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMatrixDiag.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MatrixDiag)
+{
+  auto g = loco::make_graph();
+  auto node_md = g->nodes()->create<luci::CircleMatrixDiag>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_md, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_md = dynamic_cast<luci::CircleMatrixDiag *>(cloned);
+  ASSERT_NE(nullptr, cloned_md);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.cpp
new file mode 100644
index 000000000..de5a85bcd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleMatrixSetDiag *)
+{
+  return _graph->nodes()->create<luci::CircleMatrixSetDiag>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.test.cpp b/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.test.cpp
new file mode 100644
index 000000000..5ea77ba75
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMatrixSetDiag.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MatrixSetDiag)
+{
+  auto g = loco::make_graph();
+  auto node_msd = g->nodes()->create<luci::CircleMatrixSetDiag>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_msd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_msd = dynamic_cast<luci::CircleMatrixSetDiag *>(cloned);
+  ASSERT_NE(nullptr, cloned_msd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/service/src/Nodes/CircleMaxPool2D.cpp
new file mode 100644
index 000000000..8b186094d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMaxPool2D.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleMaxPool2D *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleMaxPool2D>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->padding(node->padding());
+    cloned->filter()->h(node->filter()->h());
+    cloned->filter()->w(node->filter()->w());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMaxPool2D.test.cpp b/compiler/luci/service/src/Nodes/CircleMaxPool2D.test.cpp
new file mode 100644
index 000000000..415cf7c44
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMaxPool2D.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MaxPool2D)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+  node_mp->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_mp->padding(luci::Padding::SAME);
+  node_mp->filter()->h(1);
+  node_mp->filter()->w(2);
+  node_mp->stride()->h(3);
+  node_mp->stride()->w(4);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_mp = dynamic_cast<luci::CircleMaxPool2D *>(cloned);
+  ASSERT_NE(nullptr, cloned_mp);
+  ASSERT_EQ(node_mp->fusedActivationFunction(), cloned_mp->fusedActivationFunction());
+  ASSERT_EQ(node_mp->padding(), cloned_mp->padding());
+  ASSERT_EQ(node_mp->filter()->h(), cloned_mp->filter()->h());
+  ASSERT_EQ(node_mp->filter()->w(), cloned_mp->filter()->w());
+  ASSERT_EQ(node_mp->stride()->h(), cloned_mp->stride()->h());
+  ASSERT_EQ(node_mp->stride()->w(), cloned_mp->stride()->w());
+}
+
+TEST(CloneNodeTest, clone_MaxPool2D_fusedact_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+  node_mp->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node_mp->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_MaxPool2D_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMaxPool2D>();
+  node_mp->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_mp->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMaximum.cpp b/compiler/luci/service/src/Nodes/CircleMaximum.cpp
new file mode 100644
index 000000000..72c00af8c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMaximum.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleMaximum *)
+{
+  return _graph->nodes()->create<luci::CircleMaximum>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMaximum.test.cpp b/compiler/luci/service/src/Nodes/CircleMaximum.test.cpp
new file mode 100644
index 000000000..6f1ada060
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMaximum.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Maximum)
+{
+  auto g = loco::make_graph();
+  auto node_max = g->nodes()->create<luci::CircleMaximum>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_max, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_max = dynamic_cast<luci::CircleMaximum *>(cloned);
+  ASSERT_NE(nullptr, cloned_max);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMean.cpp b/compiler/luci/service/src/Nodes/CircleMean.cpp
new file mode 100644
index 000000000..217735693
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMean.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleMean *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleMean>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMean.test.cpp b/compiler/luci/service/src/Nodes/CircleMean.test.cpp
new file mode 100644
index 000000000..aa1b88f13
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMean.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Mean)
+{
+  auto g = loco::make_graph();
+  auto node_mean = g->nodes()->create<luci::CircleMean>();
+  node_mean->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mean, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_mean = dynamic_cast<luci::CircleMean *>(cloned);
+  ASSERT_NE(nullptr, cloned_mean);
+  ASSERT_EQ(node_mean->keep_dims(), cloned_mean->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMinimum.cpp b/compiler/luci/service/src/Nodes/CircleMinimum.cpp
new file mode 100644
index 000000000..7a206e929
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMinimum.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleMinimum *)
+{
+  return _graph->nodes()->create<luci::CircleMinimum>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMinimum.test.cpp b/compiler/luci/service/src/Nodes/CircleMinimum.test.cpp
new file mode 100644
index 000000000..0a54be71c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMinimum.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Minimum)
+{
+  auto g = loco::make_graph();
+  auto node_min = g->nodes()->create<luci::CircleMinimum>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_min, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_min = dynamic_cast<luci::CircleMinimum *>(cloned);
+  ASSERT_NE(nullptr, cloned_min);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/service/src/Nodes/CircleMirrorPad.cpp
new file mode 100644
index 000000000..8bc186a6b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMirrorPad.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleMirrorPad *node)
+{
+  if (node->mode() == luci::MirrorPadMode::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleMirrorPad>();
+  if (cloned != nullptr)
+    cloned->mode(node->mode());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMirrorPad.test.cpp b/compiler/luci/service/src/Nodes/CircleMirrorPad.test.cpp
new file mode 100644
index 000000000..911cf6d3b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMirrorPad.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_MirrorPad)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMirrorPad>();
+  node_mp->mode(luci::MirrorPadMode::REFLECT);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_mp = dynamic_cast<luci::CircleMirrorPad *>(cloned);
+  ASSERT_NE(nullptr, cloned_mp);
+  ASSERT_EQ(node_mp->mode(), cloned_mp->mode());
+}
+
+TEST(CloneNodeTest, clone_MirrorPad_mode_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_mp = g->nodes()->create<luci::CircleMirrorPad>();
+  node_mp->mode(luci::MirrorPadMode::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mp, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleMul.cpp b/compiler/luci/service/src/Nodes/CircleMul.cpp
new file mode 100644
index 000000000..f2ccfb899
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMul.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleMul *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleMul>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleMul.test.cpp b/compiler/luci/service/src/Nodes/CircleMul.test.cpp
new file mode 100644
index 000000000..dc5565f11
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleMul.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Mul)
+{
+  auto g = loco::make_graph();
+  auto node_mul = g->nodes()->create<luci::CircleMul>();
+  node_mul->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mul, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_mul = dynamic_cast<luci::CircleMul *>(cloned);
+  ASSERT_NE(nullptr, cloned_mul);
+  ASSERT_EQ(node_mul->fusedActivationFunction(), cloned_mul->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Mul_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_mul = g->nodes()->create<luci::CircleMul>();
+  node_mul->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_mul, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNeg.cpp b/compiler/luci/service/src/Nodes/CircleNeg.cpp
new file mode 100644
index 000000000..20190fd89
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNeg.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleNeg *)
+{
+  return _graph->nodes()->create<luci::CircleNeg>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNeg.test.cpp b/compiler/luci/service/src/Nodes/CircleNeg.test.cpp
new file mode 100644
index 000000000..8c2880324
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNeg.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Neg)
+{
+  auto g = loco::make_graph();
+  auto node_neg = g->nodes()->create<luci::CircleNeg>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_neg, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_neg = dynamic_cast<luci::CircleNeg *>(cloned);
+  ASSERT_NE(nullptr, cloned_neg);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.cpp
new file mode 100644
index 000000000..1024a2dfd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleNonMaxSuppressionV4 *)
+{
+  return _graph->nodes()->create<luci::CircleNonMaxSuppressionV4>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.test.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
new file mode 100644
index 000000000..34f5b0325
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV4)
+{
+  auto g = loco::make_graph();
+  auto node_nms = g->nodes()->create<luci::CircleNonMaxSuppressionV4>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_nms, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_nms = dynamic_cast<luci::CircleNonMaxSuppressionV4 *>(cloned);
+  ASSERT_NE(nullptr, cloned_nms);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
new file mode 100644
index 000000000..2a12f2a45
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV4Out *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleNonMaxSuppressionV4Out>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
new file mode 100644
index 000000000..ed9e0e019
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV4Out)
+{
+  auto g = loco::make_graph();
+  auto node_nout = g->nodes()->create<luci::CircleNonMaxSuppressionV4Out>();
+  node_nout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_nout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_nout = dynamic_cast<luci::CircleNonMaxSuppressionV4Out *>(cloned);
+  ASSERT_NE(nullptr, cloned_nout);
+  ASSERT_EQ(node_nout->index(), cloned_nout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.cpp
new file mode 100644
index 000000000..364c92749
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleNonMaxSuppressionV5 *)
+{
+  return _graph->nodes()->create<luci::CircleNonMaxSuppressionV5>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
new file mode 100644
index 000000000..faaee969e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV5)
+{
+  auto g = loco::make_graph();
+  auto node_nms = g->nodes()->create<luci::CircleNonMaxSuppressionV5>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_nms, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_nms = dynamic_cast<luci::CircleNonMaxSuppressionV5 *>(cloned);
+  ASSERT_NE(nullptr, cloned_nms);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
new file mode 100644
index 000000000..e1d7875e7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleNonMaxSuppressionV5Out *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleNonMaxSuppressionV5Out>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
new file mode 100644
index 000000000..ef0f766b9
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NonMaxSuppressionV5Out)
+{
+  auto g = loco::make_graph();
+  auto node_nout = g->nodes()->create<luci::CircleNonMaxSuppressionV5Out>();
+  node_nout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_nout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_nout = dynamic_cast<luci::CircleNonMaxSuppressionV5Out *>(cloned);
+  ASSERT_NE(nullptr, cloned_nout);
+  ASSERT_EQ(node_nout->index(), cloned_nout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleNotEqual.cpp b/compiler/luci/service/src/Nodes/CircleNotEqual.cpp
new file mode 100644
index 000000000..53ebe888d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNotEqual.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::KLMN>::visit(const luci::CircleNotEqual *)
+{
+  return _graph->nodes()->create<luci::CircleNotEqual>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleNotEqual.test.cpp b/compiler/luci/service/src/Nodes/CircleNotEqual.test.cpp
new file mode 100644
index 000000000..20f7dbc4b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleNotEqual.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_NotEqual)
+{
+  auto g = loco::make_graph();
+  auto node_ne = g->nodes()->create<luci::CircleNotEqual>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ne, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ne = dynamic_cast<luci::CircleNotEqual *>(cloned);
+  ASSERT_NE(nullptr, cloned_ne);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleOneHot.cpp b/compiler/luci/service/src/Nodes/CircleOneHot.cpp
new file mode 100644
index 000000000..c68ab9242
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOneHot.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleOneHot *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleOneHot>();
+  if (cloned != nullptr)
+    cloned->axis(node->axis());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleOneHot.test.cpp b/compiler/luci/service/src/Nodes/CircleOneHot.test.cpp
new file mode 100644
index 000000000..dea927d1b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOneHot.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OneHot)
+{
+  auto g = loco::make_graph();
+  auto node_oh = g->nodes()->create<luci::CircleOneHot>();
+  node_oh->axis(3);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_oh, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_oh = dynamic_cast<luci::CircleOneHot *>(cloned);
+  ASSERT_NE(nullptr, cloned_oh);
+  ASSERT_EQ(node_oh->axis(), cloned_oh->axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp
new file mode 100644
index 000000000..ce94dff94
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleOutputDummy *)
+{
+  return _graph->nodes()->create<luci::CircleOutputDummy>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleOutputDummy.test.cpp b/compiler/luci/service/src/Nodes/CircleOutputDummy.test.cpp
new file mode 100644
index 000000000..6170c7c41
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOutputDummy.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OutputDummy)
+{
+  auto g = loco::make_graph();
+  auto node_dummy = g->nodes()->create<luci::CircleOutputDummy>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dummy, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_dummy = dynamic_cast<luci::CircleOutputDummy *>(cloned);
+  ASSERT_NE(nullptr, cloned_dummy);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp
new file mode 100644
index 000000000..1b0f919c3
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleOutputExclude *)
+{
+  return _graph->nodes()->create<luci::CircleOutputExclude>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleOutputExclude.test.cpp b/compiler/luci/service/src/Nodes/CircleOutputExclude.test.cpp
new file mode 100644
index 000000000..120ffe86b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleOutputExclude.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_OutputExclude)
+{
+  auto g = loco::make_graph();
+  auto node_outex = g->nodes()->create<luci::CircleOutputExclude>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_outex, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_outex = dynamic_cast<luci::CircleOutputExclude *>(cloned);
+  ASSERT_NE(nullptr, cloned_outex);
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePRelu.cpp b/compiler/luci/service/src/Nodes/CirclePRelu.cpp
new file mode 100644
index 000000000..b367e1968
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePRelu.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CirclePRelu *)
+{
+  return _graph->nodes()->create<luci::CirclePRelu>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePRelu.test.cpp b/compiler/luci/service/src/Nodes/CirclePRelu.test.cpp
new file mode 100644
index 000000000..1150e3fa4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePRelu.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_PRelu)
+{
+  auto g = loco::make_graph();
+  auto node_pr = g->nodes()->create<luci::CirclePRelu>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pr, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pr = dynamic_cast<luci::CirclePRelu *>(cloned);
+  ASSERT_NE(nullptr, cloned_pr);
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePack.cpp b/compiler/luci/service/src/Nodes/CirclePack.cpp
new file mode 100644
index 000000000..fccbd0f8c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePack.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CirclePack *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CirclePack>(node->values_count());
+  if (cloned != nullptr)
+    cloned->axis(node->axis());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePack.test.cpp b/compiler/luci/service/src/Nodes/CirclePack.test.cpp
new file mode 100644
index 000000000..b808956dc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePack.test.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pack)
+{
+  auto g = loco::make_graph();
+  auto node_pack = g->nodes()->create<luci::CirclePack>(3);
+  node_pack->axis(7);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pack, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pack = dynamic_cast<luci::CirclePack *>(cloned);
+  ASSERT_NE(nullptr, cloned_pack);
+  ASSERT_EQ(node_pack->values_count(), cloned_pack->values_count());
+  ASSERT_EQ(node_pack->axis(), cloned_pack->axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePad.cpp b/compiler/luci/service/src/Nodes/CirclePad.cpp
new file mode 100644
index 000000000..a8029e4e2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePad.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CirclePad *)
+{
+  return _graph->nodes()->create<luci::CirclePad>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePad.test.cpp b/compiler/luci/service/src/Nodes/CirclePad.test.cpp
new file mode 100644
index 000000000..1d5f8375e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePad.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pad)
+{
+  auto g = loco::make_graph();
+  auto node_pad = g->nodes()->create<luci::CirclePad>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pad, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pad = dynamic_cast<luci::CirclePad *>(cloned);
+  ASSERT_NE(nullptr, cloned_pad);
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePadV2.cpp b/compiler/luci/service/src/Nodes/CirclePadV2.cpp
new file mode 100644
index 000000000..ad516a742
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePadV2.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CirclePadV2 *)
+{
+  return _graph->nodes()->create<luci::CirclePadV2>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePadV2.test.cpp b/compiler/luci/service/src/Nodes/CirclePadV2.test.cpp
new file mode 100644
index 000000000..d011f69f8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePadV2.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_PadV2)
+{
+  auto g = loco::make_graph();
+  auto node_pad = g->nodes()->create<luci::CirclePadV2>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pad, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pad = dynamic_cast<luci::CirclePadV2 *>(cloned);
+  ASSERT_NE(nullptr, cloned_pad);
+}
diff --git a/compiler/luci/service/src/Nodes/CirclePow.cpp b/compiler/luci/service/src/Nodes/CirclePow.cpp
new file mode 100644
index 000000000..2cff58757
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePow.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CirclePow *)
+{
+  return _graph->nodes()->create<luci::CirclePow>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CirclePow.test.cpp b/compiler/luci/service/src/Nodes/CirclePow.test.cpp
new file mode 100644
index 000000000..946298932
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CirclePow.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Pow)
+{
+  auto g = loco::make_graph();
+  auto node_pow = g->nodes()->create<luci::CirclePow>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_pow, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_pow = dynamic_cast<luci::CirclePow *>(cloned);
+  ASSERT_NE(nullptr, cloned_pow);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleQuantize.cpp b/compiler/luci/service/src/Nodes/CircleQuantize.cpp
new file mode 100644
index 000000000..a78eb3f02
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleQuantize.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleQuantize *)
+{
+  return _graph->nodes()->create<luci::CircleQuantize>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleQuantize.test.cpp b/compiler/luci/service/src/Nodes/CircleQuantize.test.cpp
new file mode 100644
index 000000000..628dfa1e6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleQuantize.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Quantize)
+{
+  auto g = loco::make_graph();
+  auto node_q = g->nodes()->create<luci::CircleQuantize>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_q, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_q = dynamic_cast<luci::CircleQuantize *>(cloned);
+  ASSERT_NE(nullptr, cloned_q);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRange.cpp b/compiler/luci/service/src/Nodes/CircleRange.cpp
new file mode 100644
index 000000000..ccb975d4e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRange.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleRange *)
+{
+  return _graph->nodes()->create<luci::CircleRange>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRange.test.cpp b/compiler/luci/service/src/Nodes/CircleRange.test.cpp
new file mode 100644
index 000000000..b2fb29617
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRange.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Range)
+{
+  auto g = loco::make_graph();
+  auto node_range = g->nodes()->create<luci::CircleRange>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_range, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_range = dynamic_cast<luci::CircleRange *>(cloned);
+  ASSERT_NE(nullptr, cloned_range);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRank.cpp b/compiler/luci/service/src/Nodes/CircleRank.cpp
new file mode 100644
index 000000000..168e76c70
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRank.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleRank *)
+{
+  return _graph->nodes()->create<luci::CircleRank>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRank.test.cpp b/compiler/luci/service/src/Nodes/CircleRank.test.cpp
new file mode 100644
index 000000000..0e81fb254
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRank.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Rank)
+{
+  auto g = loco::make_graph();
+  auto node_rank = g->nodes()->create<luci::CircleRank>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rank, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rank = dynamic_cast<luci::CircleRank *>(cloned);
+  ASSERT_NE(nullptr, cloned_rank);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReduceAny.cpp b/compiler/luci/service/src/Nodes/CircleReduceAny.cpp
new file mode 100644
index 000000000..c29755ec5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceAny.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleReduceAny *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleReduceAny>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReduceAny.test.cpp b/compiler/luci/service/src/Nodes/CircleReduceAny.test.cpp
new file mode 100644
index 000000000..904b5a139
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceAny.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceAny)
+{
+  auto g = loco::make_graph();
+  auto node_ra = g->nodes()->create<luci::CircleReduceAny>();
+  node_ra->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ra, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ra = dynamic_cast<luci::CircleReduceAny *>(cloned);
+  ASSERT_NE(nullptr, cloned_ra);
+  ASSERT_EQ(node_ra->keep_dims(), cloned_ra->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReduceMax.cpp b/compiler/luci/service/src/Nodes/CircleReduceMax.cpp
new file mode 100644
index 000000000..efe30491e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceMax.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleReduceMax *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleReduceMax>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReduceMax.test.cpp b/compiler/luci/service/src/Nodes/CircleReduceMax.test.cpp
new file mode 100644
index 000000000..b3f3c881e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceMax.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceMax)
+{
+  auto g = loco::make_graph();
+  auto node_rmax = g->nodes()->create<luci::CircleReduceMax>();
+  node_rmax->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rmax, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rmax = dynamic_cast<luci::CircleReduceMax *>(cloned);
+  ASSERT_NE(nullptr, cloned_rmax);
+  ASSERT_EQ(node_rmax->keep_dims(), cloned_rmax->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReduceMin.cpp b/compiler/luci/service/src/Nodes/CircleReduceMin.cpp
new file mode 100644
index 000000000..e1e38e38e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceMin.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleReduceMin *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleReduceMin>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReduceMin.test.cpp b/compiler/luci/service/src/Nodes/CircleReduceMin.test.cpp
new file mode 100644
index 000000000..b3faa68da
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceMin.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceMin)
+{
+  auto g = loco::make_graph();
+  auto node_rmin = g->nodes()->create<luci::CircleReduceMin>();
+  node_rmin->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rmin, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rmin = dynamic_cast<luci::CircleReduceMin *>(cloned);
+  ASSERT_NE(nullptr, cloned_rmin);
+  ASSERT_EQ(node_rmin->keep_dims(), cloned_rmin->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReduceProd.cpp b/compiler/luci/service/src/Nodes/CircleReduceProd.cpp
new file mode 100644
index 000000000..3d51c6929
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceProd.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleReduceProd *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleReduceProd>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReduceProd.test.cpp b/compiler/luci/service/src/Nodes/CircleReduceProd.test.cpp
new file mode 100644
index 000000000..8caf8e91f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReduceProd.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReduceProd)
+{
+  auto g = loco::make_graph();
+  auto node_rp = g->nodes()->create<luci::CircleReduceProd>();
+  node_rp->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rp = dynamic_cast<luci::CircleReduceProd *>(cloned);
+  ASSERT_NE(nullptr, cloned_rp);
+  ASSERT_EQ(node_rp->keep_dims(), cloned_rp->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRelu.cpp b/compiler/luci/service/src/Nodes/CircleRelu.cpp
new file mode 100644
index 000000000..952f1de4b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRelu.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleRelu *)
+{
+  return _graph->nodes()->create<luci::CircleRelu>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRelu.test.cpp b/compiler/luci/service/src/Nodes/CircleRelu.test.cpp
new file mode 100644
index 000000000..6154376ba
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRelu.test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+#include <luci/Service/CircleTypeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, simple_relu)
+{
+  luci::CircleInput input;
+  luci::CircleRelu relu;
+
+  input.shape({3, 4});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  relu.features(&input);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&relu, shape));
+  ASSERT_EQ(2, shape.rank());
+  ASSERT_EQ(3, shape.dim(0).value());
+  ASSERT_EQ(4, shape.dim(1).value());
+}
+
+TEST(DataTypeRuleTest, simple_relu)
+{
+  luci::CircleInput input;
+  luci::CircleRelu relu;
+
+  input.dtype(loco::DataType::S32);
+
+  relu.features(&input);
+
+  loco::DataType dtype;
+  luci::tinf::Rule type_inf_rule;
+
+  ASSERT_TRUE(type_inf_rule.infer(&relu, dtype));
+  ASSERT_EQ(loco::DataType::S32, dtype);
+}
+
+TEST(CloneNodeTest, clone_Relu)
+{
+  auto g = loco::make_graph();
+  auto node_relu = g->nodes()->create<luci::CircleRelu>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_relu, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_relu = dynamic_cast<luci::CircleRelu *>(cloned);
+  ASSERT_NE(nullptr, cloned_relu);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRelu6.cpp b/compiler/luci/service/src/Nodes/CircleRelu6.cpp
new file mode 100644
index 000000000..1cef90cb5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRelu6.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleRelu6 *)
+{
+  return _graph->nodes()->create<luci::CircleRelu6>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRelu6.test.cpp b/compiler/luci/service/src/Nodes/CircleRelu6.test.cpp
new file mode 100644
index 000000000..213dbcb09
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRelu6.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Relu6)
+{
+  auto g = loco::make_graph();
+  auto node_relu6 = g->nodes()->create<luci::CircleRelu6>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_relu6, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_relu6 = dynamic_cast<luci::CircleRelu6 *>(cloned);
+  ASSERT_NE(nullptr, cloned_relu6);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp
new file mode 100644
index 000000000..6e978566f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleReluN1To1 *)
+{
+  return _graph->nodes()->create<luci::CircleReluN1To1>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReluN1To1.test.cpp b/compiler/luci/service/src/Nodes/CircleReluN1To1.test.cpp
new file mode 100644
index 000000000..b828e795c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReluN1To1.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReluN1To1)
+{
+  auto g = loco::make_graph();
+  auto node_relun1 = g->nodes()->create<luci::CircleReluN1To1>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_relun1, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_relun1 = dynamic_cast<luci::CircleReluN1To1 *>(cloned);
+  ASSERT_NE(nullptr, cloned_relun1);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReshape.cpp b/compiler/luci/service/src/Nodes/CircleReshape.cpp
new file mode 100644
index 000000000..8533f0873
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReshape.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleReshape *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleReshape>();
+  if (cloned != nullptr)
+  {
+    uint32_t rank = node->newShape()->rank();
+    cloned->newShape()->rank(rank);
+    for (uint32_t r = 0; r < rank; ++r)
+    {
+      cloned->newShape()->dim(r) = node->newShape()->dim(r);
+    }
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReshape.test.cpp b/compiler/luci/service/src/Nodes/CircleReshape.test.cpp
new file mode 100644
index 000000000..ca92b717d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReshape.test.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Reshape)
+{
+  auto g = loco::make_graph();
+  auto node_reshape = g->nodes()->create<luci::CircleReshape>();
+  node_reshape->newShape()->rank(2);
+  node_reshape->newShape()->dim(0) = 3;
+  node_reshape->newShape()->dim(1) = 4;
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_reshape, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_reshape = dynamic_cast<luci::CircleReshape *>(cloned);
+  ASSERT_NE(nullptr, cloned_reshape);
+  ASSERT_EQ(node_reshape->newShape()->rank(), cloned_reshape->newShape()->rank());
+  ASSERT_EQ(node_reshape->newShape()->dim(0), cloned_reshape->newShape()->dim(0));
+  ASSERT_EQ(node_reshape->newShape()->dim(1), cloned_reshape->newShape()->dim(1));
+}
diff --git a/compiler/luci/service/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/service/src/Nodes/CircleResizeBilinear.cpp
new file mode 100644
index 000000000..f77d3e216
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleResizeBilinear.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleResizeBilinear *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleResizeBilinear>();
+  if (cloned != nullptr)
+  {
+    cloned->align_corners(node->align_corners());
+    cloned->half_pixel_centers(node->half_pixel_centers());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleResizeBilinear.test.cpp b/compiler/luci/service/src/Nodes/CircleResizeBilinear.test.cpp
new file mode 100644
index 000000000..bff71261d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleResizeBilinear.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, resize_bilinear_simple)
+{
+  luci::CircleInput input;
+  luci::CircleConst rb_size;
+  luci::CircleResizeBilinear rb;
+
+  input.shape({1, 4, 4, 3});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  rb_size.dtype(loco::DataType::S32);
+  rb_size.rank(1);
+  rb_size.dim(0).set(2);
+  rb_size.size<loco::DataType::S32>(2);
+  rb_size.at<loco::DataType::S32>(0) = 16;
+  rb_size.at<loco::DataType::S32>(1) = 16;
+  rb_size.shape_status(luci::ShapeStatus::VALID);
+
+  rb.input(&input);
+  rb.size(&rb_size);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&rb, shape));
+  ASSERT_EQ(4, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(16, shape.dim(1).value());
+  ASSERT_EQ(16, shape.dim(2).value());
+  ASSERT_EQ(3, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_ResizeBilinear)
+{
+  auto g = loco::make_graph();
+  auto node_rb = g->nodes()->create<luci::CircleResizeBilinear>();
+  node_rb->align_corners(true);
+  node_rb->half_pixel_centers(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rb, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rb = dynamic_cast<luci::CircleResizeBilinear *>(cloned);
+  ASSERT_NE(nullptr, cloned_rb);
+  ASSERT_EQ(node_rb->align_corners(), cloned_rb->align_corners());
+  ASSERT_EQ(node_rb->half_pixel_centers(), cloned_rb->half_pixel_centers());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..cfb396392
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleResizeNearestNeighbor *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleResizeNearestNeighbor>();
+  if (cloned != nullptr)
+    cloned->align_corners(node->align_corners());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.test.cpp b/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.test.cpp
new file mode 100644
index 000000000..a1d781c65
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleResizeNearestNeighbor.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, resize_nearest_neighbor_simple)
+{
+  luci::CircleInput input;
+  luci::CircleConst rnn_size;
+  luci::CircleResizeNearestNeighbor rnn;
+
+  input.shape({1, 4, 4, 3});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  rnn_size.dtype(loco::DataType::S32);
+  rnn_size.rank(1);
+  rnn_size.dim(0).set(2);
+  rnn_size.size<loco::DataType::S32>(2);
+  rnn_size.at<loco::DataType::S32>(0) = 16;
+  rnn_size.at<loco::DataType::S32>(1) = 16;
+  rnn_size.shape_status(luci::ShapeStatus::VALID);
+
+  rnn.input(&input);
+  rnn.size(&rnn_size);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&rnn, shape));
+  ASSERT_EQ(4, shape.rank());
+  ASSERT_EQ(1, shape.dim(0).value());
+  ASSERT_EQ(16, shape.dim(1).value());
+  ASSERT_EQ(16, shape.dim(2).value());
+  ASSERT_EQ(3, shape.dim(3).value());
+}
+
+TEST(CloneNodeTest, clone_ResizeNearestNeighbor)
+{
+  auto g = loco::make_graph();
+  auto node_rnn = g->nodes()->create<luci::CircleResizeNearestNeighbor>();
+  node_rnn->align_corners(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rnn, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rnn = dynamic_cast<luci::CircleResizeNearestNeighbor *>(cloned);
+  ASSERT_NE(nullptr, cloned_rnn);
+  ASSERT_EQ(node_rnn->align_corners(), cloned_rnn->align_corners());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/service/src/Nodes/CircleReverseSequence.cpp
new file mode 100644
index 000000000..682ad1105
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReverseSequence.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleReverseSequence *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleReverseSequence>();
+  if (cloned != nullptr)
+  {
+    cloned->seq_axis(node->seq_axis());
+    cloned->batch_axis(node->batch_axis());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReverseSequence.test.cpp b/compiler/luci/service/src/Nodes/CircleReverseSequence.test.cpp
new file mode 100644
index 000000000..a7a8e3949
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReverseSequence.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReverseSequence)
+{
+  auto g = loco::make_graph();
+  auto node_rs = g->nodes()->create<luci::CircleReverseSequence>();
+  node_rs->seq_axis(1);
+  node_rs->batch_axis(2);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rs, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rs = dynamic_cast<luci::CircleReverseSequence *>(cloned);
+  ASSERT_NE(nullptr, cloned_rs);
+  ASSERT_EQ(node_rs->seq_axis(), cloned_rs->seq_axis());
+  ASSERT_EQ(node_rs->batch_axis(), cloned_rs->batch_axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleReverseV2.cpp b/compiler/luci/service/src/Nodes/CircleReverseV2.cpp
new file mode 100644
index 000000000..f61e14e9d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReverseV2.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleReverseV2 *)
+{
+  return _graph->nodes()->create<luci::CircleReverseV2>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleReverseV2.test.cpp b/compiler/luci/service/src/Nodes/CircleReverseV2.test.cpp
new file mode 100644
index 000000000..0e5ff933c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleReverseV2.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ReverseV2)
+{
+  auto g = loco::make_graph();
+  auto node_rev = g->nodes()->create<luci::CircleReverseV2>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rev, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rev = dynamic_cast<luci::CircleReverseV2 *>(cloned);
+  ASSERT_NE(nullptr, cloned_rev);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRound.cpp b/compiler/luci/service/src/Nodes/CircleRound.cpp
new file mode 100644
index 000000000..6e9544c56
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRound.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleRound *)
+{
+  return _graph->nodes()->create<luci::CircleRound>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRound.test.cpp b/compiler/luci/service/src/Nodes/CircleRound.test.cpp
new file mode 100644
index 000000000..2c2c3a9d0
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRound.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Round)
+{
+  auto g = loco::make_graph();
+  auto node_rnd = g->nodes()->create<luci::CircleRound>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rnd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rnd = dynamic_cast<luci::CircleRound *>(cloned);
+  ASSERT_NE(nullptr, cloned_rnd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleRsqrt.cpp b/compiler/luci/service/src/Nodes/CircleRsqrt.cpp
new file mode 100644
index 000000000..1c50f5780
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRsqrt.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::OPQR>::visit(const luci::CircleRsqrt *)
+{
+  return _graph->nodes()->create<luci::CircleRsqrt>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleRsqrt.test.cpp b/compiler/luci/service/src/Nodes/CircleRsqrt.test.cpp
new file mode 100644
index 000000000..3e4ced562
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleRsqrt.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Rsqrt)
+{
+  auto g = loco::make_graph();
+  auto node_rsqrt = g->nodes()->create<luci::CircleRsqrt>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_rsqrt, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_rsqrt = dynamic_cast<luci::CircleRsqrt *>(cloned);
+  ASSERT_NE(nullptr, cloned_rsqrt);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSVDF.cpp b/compiler/luci/service/src/Nodes/CircleSVDF.cpp
new file mode 100644
index 000000000..d4c3ce88f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSVDF *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleSVDF>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->asymmetric_quantize_inputs(node->asymmetric_quantize_inputs());
+    cloned->svdf_rank(node->svdf_rank());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp
new file mode 100644
index 000000000..d6edaf1cc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SVDF)
+{
+  auto g = loco::make_graph();
+  auto node_svdf = g->nodes()->create<luci::CircleSVDF>();
+  node_svdf->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_svdf, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_svdf = dynamic_cast<luci::CircleSVDF *>(cloned);
+  ASSERT_NE(nullptr, cloned_svdf);
+  ASSERT_EQ(node_svdf->asymmetric_quantize_inputs(), cloned_svdf->asymmetric_quantize_inputs());
+  ASSERT_EQ(node_svdf->svdf_rank(), cloned_svdf->svdf_rank());
+}
+
+TEST(CloneNodeTest, clone_SVDF_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_svdf = g->nodes()->create<luci::CircleSVDF>();
+  node_svdf->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_svdf, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleScatterNd.cpp b/compiler/luci/service/src/Nodes/CircleScatterNd.cpp
new file mode 100644
index 000000000..d16524db2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleScatterNd.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleScatterNd *)
+{
+  return _graph->nodes()->create<luci::CircleScatterNd>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleScatterNd.test.cpp b/compiler/luci/service/src/Nodes/CircleScatterNd.test.cpp
new file mode 100644
index 000000000..ce63603cc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleScatterNd.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ScatterNd)
+{
+  auto g = loco::make_graph();
+  auto node_snd = g->nodes()->create<luci::CircleScatterNd>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_snd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_snd = dynamic_cast<luci::CircleScatterNd *>(cloned);
+  ASSERT_NE(nullptr, cloned_snd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/service/src/Nodes/CircleSegmentSum.cpp
new file mode 100644
index 000000000..ea9eee27d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSegmentSum.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSegmentSum *)
+{
+  return _graph->nodes()->create<luci::CircleSegmentSum>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSegmentSum.test.cpp b/compiler/luci/service/src/Nodes/CircleSegmentSum.test.cpp
new file mode 100644
index 000000000..ff17b0745
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSegmentSum.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SegmentSum)
+{
+  auto g = loco::make_graph();
+  auto node_ss = g->nodes()->create<luci::CircleSegmentSum>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ss, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ss = dynamic_cast<luci::CircleSegmentSum *>(cloned);
+  ASSERT_NE(nullptr, cloned_ss);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSelect.cpp b/compiler/luci/service/src/Nodes/CircleSelect.cpp
new file mode 100644
index 000000000..609a67905
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSelect.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSelect *)
+{
+  return _graph->nodes()->create<luci::CircleSelect>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSelect.test.cpp b/compiler/luci/service/src/Nodes/CircleSelect.test.cpp
new file mode 100644
index 000000000..e8d631618
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSelect.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Select)
+{
+  auto g = loco::make_graph();
+  auto node_sel = g->nodes()->create<luci::CircleSelect>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sel, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sel = dynamic_cast<luci::CircleSelect *>(cloned);
+  ASSERT_NE(nullptr, cloned_sel);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSelectV2.cpp b/compiler/luci/service/src/Nodes/CircleSelectV2.cpp
new file mode 100644
index 000000000..027fbd9ee
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSelectV2.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSelectV2 *)
+{
+  return _graph->nodes()->create<luci::CircleSelectV2>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSelectV2.test.cpp b/compiler/luci/service/src/Nodes/CircleSelectV2.test.cpp
new file mode 100644
index 000000000..253dba555
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSelectV2.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SelectV2)
+{
+  auto g = loco::make_graph();
+  auto node_sel = g->nodes()->create<luci::CircleSelectV2>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sel, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sel = dynamic_cast<luci::CircleSelectV2 *>(cloned);
+  ASSERT_NE(nullptr, cloned_sel);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleShape.cpp b/compiler/luci/service/src/Nodes/CircleShape.cpp
new file mode 100644
index 000000000..9ae742091
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleShape.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleShape *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleShape>();
+  if (cloned != nullptr)
+    cloned->out_type(node->out_type());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleShape.test.cpp b/compiler/luci/service/src/Nodes/CircleShape.test.cpp
new file mode 100644
index 000000000..ec057bd05
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleShape.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Shape)
+{
+  auto g = loco::make_graph();
+  auto node_shape = g->nodes()->create<luci::CircleShape>();
+  node_shape->out_type(loco::DataType::S32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_shape, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_shape = dynamic_cast<luci::CircleShape *>(cloned);
+  ASSERT_NE(nullptr, cloned_shape);
+  ASSERT_EQ(node_shape->out_type(), cloned_shape->out_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSin.cpp b/compiler/luci/service/src/Nodes/CircleSin.cpp
new file mode 100644
index 000000000..9cb35b0f2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSin.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSin *)
+{
+  return _graph->nodes()->create<luci::CircleSin>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSin.test.cpp b/compiler/luci/service/src/Nodes/CircleSin.test.cpp
new file mode 100644
index 000000000..b072e7e2c
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSin.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sin)
+{
+  auto g = loco::make_graph();
+  auto node_sin = g->nodes()->create<luci::CircleSin>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sin, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sin = dynamic_cast<luci::CircleSin *>(cloned);
+  ASSERT_NE(nullptr, cloned_sin);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSlice.cpp b/compiler/luci/service/src/Nodes/CircleSlice.cpp
new file mode 100644
index 000000000..341d37d75
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSlice.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSlice *)
+{
+  return _graph->nodes()->create<luci::CircleSlice>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSlice.test.cpp b/compiler/luci/service/src/Nodes/CircleSlice.test.cpp
new file mode 100644
index 000000000..48ec20304
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSlice.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Slice)
+{
+  auto g = loco::make_graph();
+  auto node_slice = g->nodes()->create<luci::CircleSlice>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_slice, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_slice = dynamic_cast<luci::CircleSlice *>(cloned);
+  ASSERT_NE(nullptr, cloned_slice);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSoftmax.cpp b/compiler/luci/service/src/Nodes/CircleSoftmax.cpp
new file mode 100644
index 000000000..551bcbc90
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSoftmax.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSoftmax *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSoftmax>();
+  if (cloned != nullptr)
+    cloned->beta(node->beta());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSoftmax.test.cpp b/compiler/luci/service/src/Nodes/CircleSoftmax.test.cpp
new file mode 100644
index 000000000..c80b44d69
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSoftmax.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Softmax)
+{
+  auto g = loco::make_graph();
+  auto node_sm = g->nodes()->create<luci::CircleSoftmax>();
+  node_sm->beta(2.3f);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sm, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sm = dynamic_cast<luci::CircleSoftmax *>(cloned);
+  ASSERT_NE(nullptr, cloned_sm);
+  ASSERT_EQ(node_sm->beta(), cloned_sm->beta());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.cpp
new file mode 100644
index 000000000..00bb1d42b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSpaceToBatchND *)
+{
+  return _graph->nodes()->create<luci::CircleSpaceToBatchND>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.test.cpp b/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.test.cpp
new file mode 100644
index 000000000..eb743795d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSpaceToBatchND.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SpaceToBatchND)
+{
+  auto g = loco::make_graph();
+  auto node_s2bnd = g->nodes()->create<luci::CircleSpaceToBatchND>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_s2bnd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_s2bnd = dynamic_cast<luci::CircleSpaceToBatchND *>(cloned);
+  ASSERT_NE(nullptr, cloned_s2bnd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/service/src/Nodes/CircleSpaceToDepth.cpp
new file mode 100644
index 000000000..7916acf6b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSpaceToDepth.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSpaceToDepth *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSpaceToDepth>();
+  if (cloned != nullptr)
+    cloned->block_size(node->block_size());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSpaceToDepth.test.cpp b/compiler/luci/service/src/Nodes/CircleSpaceToDepth.test.cpp
new file mode 100644
index 000000000..fb544e6d7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSpaceToDepth.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SpaceToDepth)
+{
+  auto g = loco::make_graph();
+  auto node_s2d = g->nodes()->create<luci::CircleSpaceToDepth>();
+  node_s2d->block_size(32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_s2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_s2d = dynamic_cast<luci::CircleSpaceToDepth *>(cloned);
+  ASSERT_NE(nullptr, cloned_s2d);
+  ASSERT_EQ(node_s2d->block_size(), cloned_s2d->block_size());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/service/src/Nodes/CircleSparseToDense.cpp
new file mode 100644
index 000000000..8218e9244
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSparseToDense.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSparseToDense *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSparseToDense>();
+  if (cloned != nullptr)
+    cloned->validate_indices(node->validate_indices());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/service/src/Nodes/CircleSparseToDense.test.cpp
new file mode 100644
index 000000000..177a469cd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSparseToDense.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SparseToDense)
+{
+  auto g = loco::make_graph();
+  auto node_s2d = g->nodes()->create<luci::CircleSparseToDense>();
+  node_s2d->validate_indices(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_s2d, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_s2d = dynamic_cast<luci::CircleSparseToDense *>(cloned);
+  ASSERT_NE(nullptr, cloned_s2d);
+  ASSERT_EQ(node_s2d->validate_indices(), cloned_s2d->validate_indices());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSplit.cpp b/compiler/luci/service/src/Nodes/CircleSplit.cpp
new file mode 100644
index 000000000..91aaa5547
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplit.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSplit *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSplit>();
+  if (cloned != nullptr)
+    cloned->num_split(node->num_split());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSplit.test.cpp b/compiler/luci/service/src/Nodes/CircleSplit.test.cpp
new file mode 100644
index 000000000..9ee26b425
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplit.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Split)
+{
+  auto g = loco::make_graph();
+  auto node_split = g->nodes()->create<luci::CircleSplit>();
+  node_split->num_split(5);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_split, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_split = dynamic_cast<luci::CircleSplit *>(cloned);
+  ASSERT_NE(nullptr, cloned_split);
+  ASSERT_EQ(node_split->num_split(), cloned_split->num_split());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSplitOut.cpp b/compiler/luci/service/src/Nodes/CircleSplitOut.cpp
new file mode 100644
index 000000000..024598892
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplitOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSplitOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSplitOut.test.cpp b/compiler/luci/service/src/Nodes/CircleSplitOut.test.cpp
new file mode 100644
index 000000000..deec08804
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitOut)
+{
+  auto g = loco::make_graph();
+  auto node_sout = g->nodes()->create<luci::CircleSplitOut>();
+  node_sout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sout = dynamic_cast<luci::CircleSplitOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_sout);
+  ASSERT_EQ(node_sout->index(), cloned_sout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSplitV.cpp b/compiler/luci/service/src/Nodes/CircleSplitV.cpp
new file mode 100644
index 000000000..18095b049
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitV.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSplitV *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSplitV>();
+  if (cloned != nullptr)
+    cloned->num_split(node->num_split());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSplitV.test.cpp b/compiler/luci/service/src/Nodes/CircleSplitV.test.cpp
new file mode 100644
index 000000000..d109a64aa
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitV.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitV)
+{
+  auto g = loco::make_graph();
+  auto node_split = g->nodes()->create<luci::CircleSplitV>();
+  node_split->num_split(5);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_split, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_split = dynamic_cast<luci::CircleSplitV *>(cloned);
+  ASSERT_NE(nullptr, cloned_split);
+  ASSERT_EQ(node_split->num_split(), cloned_split->num_split());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSplitVOut.cpp b/compiler/luci/service/src/Nodes/CircleSplitVOut.cpp
new file mode 100644
index 000000000..f40eb0a47
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitVOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleSplitVOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSplitVOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSplitVOut.test.cpp b/compiler/luci/service/src/Nodes/CircleSplitVOut.test.cpp
new file mode 100644
index 000000000..ab5e9d6be
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSplitVOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SplitVOut)
+{
+  auto g = loco::make_graph();
+  auto node_sout = g->nodes()->create<luci::CircleSplitVOut>();
+  node_sout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sout = dynamic_cast<luci::CircleSplitVOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_sout);
+  ASSERT_EQ(node_sout->index(), cloned_sout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSqrt.cpp b/compiler/luci/service/src/Nodes/CircleSqrt.cpp
new file mode 100644
index 000000000..712ca457f
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSqrt.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSqrt *)
+{
+  return _graph->nodes()->create<luci::CircleSqrt>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSqrt.test.cpp b/compiler/luci/service/src/Nodes/CircleSqrt.test.cpp
new file mode 100644
index 000000000..dbef839d6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSqrt.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sqrt)
+{
+  auto g = loco::make_graph();
+  auto node_sqrt = g->nodes()->create<luci::CircleSqrt>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sqrt, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sqrt = dynamic_cast<luci::CircleSqrt *>(cloned);
+  ASSERT_NE(nullptr, cloned_sqrt);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSquare.cpp b/compiler/luci/service/src/Nodes/CircleSquare.cpp
new file mode 100644
index 000000000..a86f14a35
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSquare.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSquare *)
+{
+  return _graph->nodes()->create<luci::CircleSquare>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSquare.test.cpp b/compiler/luci/service/src/Nodes/CircleSquare.test.cpp
new file mode 100644
index 000000000..67ac21210
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSquare.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Square)
+{
+  auto g = loco::make_graph();
+  auto node_squ = g->nodes()->create<luci::CircleSquare>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_squ, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_squ = dynamic_cast<luci::CircleSquare *>(cloned);
+  ASSERT_NE(nullptr, cloned_squ);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/service/src/Nodes/CircleSquaredDifference.cpp
new file mode 100644
index 000000000..c3dff271b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSquaredDifference.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSquaredDifference *)
+{
+  return _graph->nodes()->create<luci::CircleSquaredDifference>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSquaredDifference.test.cpp b/compiler/luci/service/src/Nodes/CircleSquaredDifference.test.cpp
new file mode 100644
index 000000000..26099612b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSquaredDifference.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SquaredDifference)
+{
+  auto g = loco::make_graph();
+  auto node_sd = g->nodes()->create<luci::CircleSquaredDifference>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sd, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sd = dynamic_cast<luci::CircleSquaredDifference *>(cloned);
+  ASSERT_NE(nullptr, cloned_sd);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSqueeze.cpp b/compiler/luci/service/src/Nodes/CircleSqueeze.cpp
new file mode 100644
index 000000000..384ee52c5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSqueeze.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSqueeze *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSqueeze>();
+  if (cloned != nullptr)
+    cloned->squeeze_dims(node->squeeze_dims());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSqueeze.test.cpp b/compiler/luci/service/src/Nodes/CircleSqueeze.test.cpp
new file mode 100644
index 000000000..bc73eafa7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSqueeze.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, squeeze_simple)
+{
+  luci::CircleInput input;
+  luci::CircleSqueeze squeeze;
+
+  input.shape({1, 4, 3, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  squeeze.input(&input);
+  squeeze.squeeze_dims({0});
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&squeeze, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(4, shape.dim(0).value());
+  ASSERT_EQ(3, shape.dim(1).value());
+  ASSERT_EQ(1, shape.dim(2).value());
+}
+
+TEST(ShapeRuleTest, squeeze_all)
+{
+  luci::CircleInput input;
+  luci::CircleSqueeze squeeze;
+
+  input.shape({1, 4, 3, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  squeeze.input(&input);
+  squeeze.squeeze_dims({});
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&squeeze, shape));
+  ASSERT_EQ(2, shape.rank());
+  ASSERT_EQ(4, shape.dim(0).value());
+  ASSERT_EQ(3, shape.dim(1).value());
+}
+
+TEST(CloneNodeTest, clone_Squeeze)
+{
+  auto g = loco::make_graph();
+  auto node_squ = g->nodes()->create<luci::CircleSqueeze>();
+  node_squ->squeeze_dims({2, 3});
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_squ, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_squ = dynamic_cast<luci::CircleSqueeze *>(cloned);
+  ASSERT_NE(nullptr, cloned_squ);
+  ASSERT_EQ(node_squ->squeeze_dims().size(), cloned_squ->squeeze_dims().size());
+  for (size_t s = 0; s < node_squ->squeeze_dims().size(); ++s)
+    ASSERT_EQ(node_squ->squeeze_dims().at(s), cloned_squ->squeeze_dims().at(s));
+}
diff --git a/compiler/luci/service/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/service/src/Nodes/CircleStridedSlice.cpp
new file mode 100644
index 000000000..3298c92b5
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleStridedSlice.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleStridedSlice *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleStridedSlice>();
+  if (cloned != nullptr)
+  {
+    cloned->begin_mask(node->begin_mask());
+    cloned->end_mask(node->end_mask());
+    cloned->ellipsis_mask(node->ellipsis_mask());
+    cloned->new_axis_mask(node->new_axis_mask());
+    cloned->shrink_axis_mask(node->shrink_axis_mask());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleStridedSlice.test.cpp b/compiler/luci/service/src/Nodes/CircleStridedSlice.test.cpp
new file mode 100644
index 000000000..d633f3022
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleStridedSlice.test.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_StridedSlice)
+{
+  auto g = loco::make_graph();
+  auto node_ss = g->nodes()->create<luci::CircleStridedSlice>();
+  node_ss->begin_mask(1);
+  node_ss->end_mask(2);
+  node_ss->ellipsis_mask(3);
+  node_ss->new_axis_mask(4);
+  node_ss->shrink_axis_mask(5);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_ss, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_ss = dynamic_cast<luci::CircleStridedSlice *>(cloned);
+  ASSERT_NE(nullptr, cloned_ss);
+  ASSERT_EQ(node_ss->begin_mask(), cloned_ss->begin_mask());
+  ASSERT_EQ(node_ss->end_mask(), cloned_ss->end_mask());
+  ASSERT_EQ(node_ss->ellipsis_mask(), cloned_ss->ellipsis_mask());
+  ASSERT_EQ(node_ss->new_axis_mask(), cloned_ss->new_axis_mask());
+  ASSERT_EQ(node_ss->shrink_axis_mask(), cloned_ss->shrink_axis_mask());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSub.cpp b/compiler/luci/service/src/Nodes/CircleSub.cpp
new file mode 100644
index 000000000..3084740cc
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSub.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSub *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleSub>();
+  if (cloned != nullptr)
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSub.test.cpp b/compiler/luci/service/src/Nodes/CircleSub.test.cpp
new file mode 100644
index 000000000..e6bd7b8ff
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSub.test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sub)
+{
+  auto g = loco::make_graph();
+  auto node_sub = g->nodes()->create<luci::CircleSub>();
+  node_sub->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sub, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sub = dynamic_cast<luci::CircleSub *>(cloned);
+  ASSERT_NE(nullptr, cloned_sub);
+  ASSERT_EQ(node_sub->fusedActivationFunction(), cloned_sub->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_Sub_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_sub = g->nodes()->create<luci::CircleSub>();
+  node_sub->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sub, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleSum.cpp b/compiler/luci/service/src/Nodes/CircleSum.cpp
new file mode 100644
index 000000000..1d5882afb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSum.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSum *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleSum>();
+  if (cloned != nullptr)
+    cloned->keep_dims(node->keep_dims());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSum.test.cpp b/compiler/luci/service/src/Nodes/CircleSum.test.cpp
new file mode 100644
index 000000000..aa1b0d128
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSum.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Sum)
+{
+  auto g = loco::make_graph();
+  auto node_sum = g->nodes()->create<luci::CircleSum>();
+  node_sum->keep_dims(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_sum, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_sum = dynamic_cast<luci::CircleSum *>(cloned);
+  ASSERT_NE(nullptr, cloned_sum);
+  ASSERT_EQ(node_sum->keep_dims(), cloned_sum->keep_dims());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTanh.cpp b/compiler/luci/service/src/Nodes/CircleTanh.cpp
new file mode 100644
index 000000000..56515314a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTanh.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleTanh *)
+{
+  return _graph->nodes()->create<luci::CircleTanh>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTanh.test.cpp b/compiler/luci/service/src/Nodes/CircleTanh.test.cpp
new file mode 100644
index 000000000..0215b42ca
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTanh.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Tanh)
+{
+  auto g = loco::make_graph();
+  auto node_tanh = g->nodes()->create<luci::CircleTanh>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_tanh, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_tanh = dynamic_cast<luci::CircleTanh *>(cloned);
+  ASSERT_NE(nullptr, cloned_tanh);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTile.cpp b/compiler/luci/service/src/Nodes/CircleTile.cpp
new file mode 100644
index 000000000..2cabd7818
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTile.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleTile *)
+{
+  return _graph->nodes()->create<luci::CircleTile>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTile.test.cpp b/compiler/luci/service/src/Nodes/CircleTile.test.cpp
new file mode 100644
index 000000000..089c86ccb
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTile.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Tile)
+{
+  auto g = loco::make_graph();
+  auto node_tile = g->nodes()->create<luci::CircleTile>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_tile, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_tile = dynamic_cast<luci::CircleTile *>(cloned);
+  ASSERT_NE(nullptr, cloned_tile);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTopKV2.cpp b/compiler/luci/service/src/Nodes/CircleTopKV2.cpp
new file mode 100644
index 000000000..71dd5afbe
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTopKV2.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleTopKV2 *)
+{
+  return _graph->nodes()->create<luci::CircleTopKV2>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTopKV2.test.cpp b/compiler/luci/service/src/Nodes/CircleTopKV2.test.cpp
new file mode 100644
index 000000000..7f68a408d
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTopKV2.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TopKV2)
+{
+  auto g = loco::make_graph();
+  auto node_top = g->nodes()->create<luci::CircleTopKV2>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_top, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_top = dynamic_cast<luci::CircleTopKV2 *>(cloned);
+  ASSERT_NE(nullptr, cloned_top);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTopKV2Out.cpp b/compiler/luci/service/src/Nodes/CircleTopKV2Out.cpp
new file mode 100644
index 000000000..5c13f2be1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTopKV2Out.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleTopKV2Out *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleTopKV2Out>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTopKV2Out.test.cpp b/compiler/luci/service/src/Nodes/CircleTopKV2Out.test.cpp
new file mode 100644
index 000000000..cfba61f10
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTopKV2Out.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TopKV2Out)
+{
+  auto g = loco::make_graph();
+  auto node_tout = g->nodes()->create<luci::CircleTopKV2Out>();
+  node_tout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_tout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_tout = dynamic_cast<luci::CircleTopKV2Out *>(cloned);
+  ASSERT_NE(nullptr, cloned_tout);
+  ASSERT_EQ(node_tout->index(), cloned_tout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTranspose.cpp b/compiler/luci/service/src/Nodes/CircleTranspose.cpp
new file mode 100644
index 000000000..bfbe116b4
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTranspose.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleTranspose *)
+{
+  return _graph->nodes()->create<luci::CircleTranspose>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTranspose.test.cpp b/compiler/luci/service/src/Nodes/CircleTranspose.test.cpp
new file mode 100644
index 000000000..9447d1a5b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTranspose.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/CircleShapeInference.h>
+
+#include <loco/IR/TensorShape.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeRuleTest, transpose_simple)
+{
+  luci::CircleInput input;
+  luci::CircleConst perm;
+  luci::CircleTranspose transpose;
+
+  input.shape({3, 8, 1});
+  input.shape_status(luci::ShapeStatus::VALID);
+
+  perm.dtype(loco::DataType::S32);
+  perm.rank(1);
+  perm.dim(0).set(3);
+  perm.size<loco::DataType::S32>(3);
+  perm.at<loco::DataType::S32>(0) = 1;
+  perm.at<loco::DataType::S32>(1) = 2;
+  perm.at<loco::DataType::S32>(2) = 0;
+  perm.shape_status(luci::ShapeStatus::VALID);
+
+  transpose.a(&input);
+  transpose.perm(&perm);
+
+  loco::TensorShape shape;
+  luci::sinf::Rule shape_inf_rule;
+
+  ASSERT_TRUE(shape_inf_rule.infer(&transpose, shape));
+  ASSERT_EQ(3, shape.rank());
+  ASSERT_EQ(8, shape.dim(0).value());
+  ASSERT_EQ(1, shape.dim(1).value());
+  ASSERT_EQ(3, shape.dim(2).value());
+}
+
+TEST(CloneNodeTest, clone_Transpose)
+{
+  auto g = loco::make_graph();
+  auto node_tr = g->nodes()->create<luci::CircleTranspose>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_tr, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_tr = dynamic_cast<luci::CircleTranspose *>(cloned);
+  ASSERT_NE(nullptr, cloned_tr);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/service/src/Nodes/CircleTransposeConv.cpp
new file mode 100644
index 000000000..73aad2eb6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTransposeConv.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleTransposeConv *node)
+{
+  if (node->padding() == luci::Padding::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleTransposeConv>();
+  if (cloned != nullptr)
+  {
+    cloned->padding(node->padding());
+    cloned->stride()->h(node->stride()->h());
+    cloned->stride()->w(node->stride()->w());
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/service/src/Nodes/CircleTransposeConv.test.cpp
new file mode 100644
index 000000000..e9ac6e6ff
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleTransposeConv.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_TransposeConv)
+{
+  auto g = loco::make_graph();
+  auto node_trconv = g->nodes()->create<luci::CircleTransposeConv>();
+  node_trconv->padding(luci::Padding::SAME);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_trconv, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_trconv = dynamic_cast<luci::CircleTransposeConv *>(cloned);
+  ASSERT_NE(nullptr, cloned_trconv);
+  ASSERT_EQ(node_trconv->padding(), cloned_trconv->padding());
+  ASSERT_EQ(node_trconv->fusedActivationFunction(), cloned_trconv->fusedActivationFunction());
+}
+
+TEST(CloneNodeTest, clone_TransposeConv_padding_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_trconv = g->nodes()->create<luci::CircleTransposeConv>();
+  node_trconv->padding(luci::Padding::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_trconv, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
+
+TEST(CloneNodeTest, clone_TransposeConv_fAF_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_trconv = g->nodes()->create<luci::CircleTransposeConv>();
+  node_trconv->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_trconv, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..7e03d9a1b
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleUnidirectionalSequenceLSTM *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->cell_clip(node->cell_clip());
+    cloned->proj_clip(node->proj_clip());
+    cloned->time_major(node->time_major());
+    cloned->asymmetric_quantize_inputs(node->asymmetric_quantize_inputs());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp b/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
new file mode 100644
index 000000000..c3816ab27
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UnidirectionalSequenceLSTM)
+{
+  auto g = loco::make_graph();
+  auto node_uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+  node_uslstm->fusedActivationFunction(luci::FusedActFunc::RELU);
+  node_uslstm->cell_clip(1.1f);
+  node_uslstm->proj_clip(2.2f);
+  node_uslstm->time_major(true);
+  node_uslstm->asymmetric_quantize_inputs(true);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uslstm, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_uslstm = dynamic_cast<luci::CircleUnidirectionalSequenceLSTM *>(cloned);
+  ASSERT_NE(nullptr, cloned_uslstm);
+  ASSERT_EQ(node_uslstm->fusedActivationFunction(), cloned_uslstm->fusedActivationFunction());
+  ASSERT_EQ(node_uslstm->cell_clip(), cloned_uslstm->cell_clip());
+  ASSERT_EQ(node_uslstm->proj_clip(), cloned_uslstm->proj_clip());
+  ASSERT_EQ(node_uslstm->time_major(), cloned_uslstm->time_major());
+  ASSERT_EQ(node_uslstm->asymmetric_quantize_inputs(), cloned_uslstm->asymmetric_quantize_inputs());
+}
+
+TEST(CloneNodeTest, clone_UnidirectionalSequenceLSTM_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+  node_uslstm->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uslstm, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUnique.cpp b/compiler/luci/service/src/Nodes/CircleUnique.cpp
new file mode 100644
index 000000000..fb191e1c6
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnique.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleUnique *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleUnique>();
+  if (cloned != nullptr)
+    cloned->idx_out_type(node->idx_out_type());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUnique.test.cpp b/compiler/luci/service/src/Nodes/CircleUnique.test.cpp
new file mode 100644
index 000000000..a8ff9eade
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnique.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Unique)
+{
+  auto g = loco::make_graph();
+  auto node_uniq = g->nodes()->create<luci::CircleUnique>();
+  node_uniq->idx_out_type(loco::DataType::S32);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uniq, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_uniq = dynamic_cast<luci::CircleUnique *>(cloned);
+  ASSERT_NE(nullptr, cloned_uniq);
+  ASSERT_EQ(node_uniq->idx_out_type(), cloned_uniq->idx_out_type());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUniqueOut.cpp b/compiler/luci/service/src/Nodes/CircleUniqueOut.cpp
new file mode 100644
index 000000000..30093f9db
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUniqueOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUniqueOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleUniqueOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUniqueOut.test.cpp b/compiler/luci/service/src/Nodes/CircleUniqueOut.test.cpp
new file mode 100644
index 000000000..780ad4b78
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUniqueOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UniqueOut)
+{
+  auto g = loco::make_graph();
+  auto node_uout = g->nodes()->create<luci::CircleUniqueOut>();
+  node_uout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_uout = dynamic_cast<luci::CircleUniqueOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_uout);
+  ASSERT_EQ(node_uout->index(), cloned_uout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUnpack.cpp b/compiler/luci/service/src/Nodes/CircleUnpack.cpp
new file mode 100644
index 000000000..4c90640c8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnpack.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleUnpack *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleUnpack>();
+  if (cloned != nullptr)
+  {
+    cloned->num(node->num());
+    cloned->axis(node->axis());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUnpack.test.cpp b/compiler/luci/service/src/Nodes/CircleUnpack.test.cpp
new file mode 100644
index 000000000..6559a9276
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnpack.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Unpack)
+{
+  auto g = loco::make_graph();
+  auto node_unp = g->nodes()->create<luci::CircleUnpack>();
+  node_unp->num(1);
+  node_unp->axis(2);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_unp, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_unp = dynamic_cast<luci::CircleUnpack *>(cloned);
+  ASSERT_NE(nullptr, cloned_unp);
+  ASSERT_EQ(node_unp->num(), cloned_unp->num());
+  ASSERT_EQ(node_unp->axis(), cloned_unp->axis());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleUnpackOut.cpp b/compiler/luci/service/src/Nodes/CircleUnpackOut.cpp
new file mode 100644
index 000000000..342d5daca
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnpackOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleUnpackOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleUnpackOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleUnpackOut.test.cpp b/compiler/luci/service/src/Nodes/CircleUnpackOut.test.cpp
new file mode 100644
index 000000000..ec9bb974e
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleUnpackOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_UnpackOut)
+{
+  auto g = loco::make_graph();
+  auto node_uout = g->nodes()->create<luci::CircleUnpackOut>();
+  node_uout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_uout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_uout = dynamic_cast<luci::CircleUnpackOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_uout);
+  ASSERT_EQ(node_uout->index(), cloned_uout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleVariable.cpp b/compiler/luci/service/src/Nodes/CircleVariable.cpp
new file mode 100644
index 000000000..c1430bd3a
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleVariable *)
+{
+  return _graph->nodes()->create<luci::CircleVariable>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleVariable.test.cpp b/compiler/luci/service/src/Nodes/CircleVariable.test.cpp
new file mode 100644
index 000000000..7d29438be
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleVariable.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Variable)
+{
+  auto g = loco::make_graph();
+  auto node_dummy = g->nodes()->create<luci::CircleVariable>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dummy, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_variable = dynamic_cast<luci::CircleVariable *>(cloned);
+  ASSERT_NE(nullptr, cloned_variable);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleWhere.cpp b/compiler/luci/service/src/Nodes/CircleWhere.cpp
new file mode 100644
index 000000000..7da48ed95
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleWhere.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::WXYZ>::visit(const luci::CircleWhere *)
+{
+  return _graph->nodes()->create<luci::CircleWhere>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleWhere.test.cpp b/compiler/luci/service/src/Nodes/CircleWhere.test.cpp
new file mode 100644
index 000000000..352719d85
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleWhere.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Where)
+{
+  auto g = loco::make_graph();
+  auto node_wh = g->nodes()->create<luci::CircleWhere>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_wh, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_wh = dynamic_cast<luci::CircleWhere *>(cloned);
+  ASSERT_NE(nullptr, cloned_wh);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleWhile.cpp b/compiler/luci/service/src/Nodes/CircleWhile.cpp
new file mode 100644
index 000000000..bdb6a4d16
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleWhile.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::WXYZ>::visit(const luci::CircleWhile *node)
+{
+  auto ic = node->input_count();
+  auto oc = node->output_count();
+
+  return _graph->nodes()->create<luci::CircleWhile>(ic, oc);
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleWhile.test.cpp b/compiler/luci/service/src/Nodes/CircleWhile.test.cpp
new file mode 100644
index 000000000..53b7c82f7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleWhile.test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_While)
+{
+  auto g = loco::make_graph();
+  auto node_while = g->nodes()->create<luci::CircleWhile>(1, 1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_while, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_while = dynamic_cast<luci::CircleWhile *>(cloned);
+  ASSERT_NE(nullptr, cloned_while);
+  ASSERT_EQ(-1, cloned_while->cond_branch());
+  ASSERT_EQ(-1, cloned_while->body_branch());
+  ASSERT_EQ(nullptr, cloned_while->cond_graph());
+  ASSERT_EQ(nullptr, cloned_while->body_graph());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleWhileOut.cpp b/compiler/luci/service/src/Nodes/CircleWhileOut.cpp
new file mode 100644
index 000000000..52075a1b8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleWhileOut.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleWhileOut *node)
+{
+  auto *cloned = _graph->nodes()->create<luci::CircleWhileOut>();
+  if (cloned != nullptr)
+    cloned->index(node->index());
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleWhileOut.test.cpp b/compiler/luci/service/src/Nodes/CircleWhileOut.test.cpp
new file mode 100644
index 000000000..b16a99c51
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleWhileOut.test.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_WhileOut)
+{
+  auto g = loco::make_graph();
+  auto node_iout = g->nodes()->create<luci::CircleWhileOut>();
+  node_iout->index(1);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_iout, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_iout = dynamic_cast<luci::CircleWhileOut *>(cloned);
+  ASSERT_NE(nullptr, cloned_iout);
+  ASSERT_EQ(node_iout->index(), cloned_iout->index());
+}
diff --git a/compiler/luci/service/src/Nodes/CircleZerosLike.cpp b/compiler/luci/service/src/Nodes/CircleZerosLike.cpp
new file mode 100644
index 000000000..29db09eaf
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleZerosLike.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::WXYZ>::visit(const luci::CircleZerosLike *)
+{
+  return _graph->nodes()->create<luci::CircleZerosLike>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleZerosLike.test.cpp b/compiler/luci/service/src/Nodes/CircleZerosLike.test.cpp
new file mode 100644
index 000000000..6e0a4b3be
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleZerosLike.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_ZerosLike)
+{
+  auto g = loco::make_graph();
+  auto node_zl = g->nodes()->create<luci::CircleZerosLike>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_zl, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_zl = dynamic_cast<luci::CircleZerosLike *>(cloned);
+  ASSERT_NE(nullptr, cloned_zl);
+}
diff --git a/compiler/luci/service/src/ShapeDescription.cpp b/compiler/luci/service/src/ShapeDescription.cpp
index cbc302f70..adfb7e342 100644
--- a/compiler/luci/service/src/ShapeDescription.cpp
+++ b/compiler/luci/service/src/ShapeDescription.cpp
@@ -23,92 +23,32 @@
 namespace luci
 {
 
-ShapeDescription to_shape_description(const loco::TensorShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  res._dims.resize(shape.rank());
-  for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-  {
-    // All the dimensions SHOULD be known
-    assert(shape.dim(axis).known());
-    res._dims.at(axis) = shape.dim(axis).value();
-  }
-
-  return res;
-}
-
-ShapeDescription to_shape_description(const loco::FeatureShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  // T/F Lite encodes a feature map as a NHWC tensor
-  res._dims.resize(4);
-  res._dims.at(0) = shape.count().value();
-  res._dims.at(1) = shape.height().value();
-  res._dims.at(2) = shape.width().value();
-  res._dims.at(3) = shape.depth().value();
-
-  return res;
-}
-
-ShapeDescription to_shape_description(const loco::FilterShape &shape)
+ShapeDescription to_shape_description(const luci::CircleNode *circle_node)
 {
   ShapeDescription res;
 
   res._rank_known = true;
 
-  // T/F Lite encodes a convolution filter as a NHWC tensor
-  res._dims.resize(4);
-  res._dims.at(0) = shape.count().value();
-  res._dims.at(1) = shape.height().value();
-  res._dims.at(2) = shape.width().value();
-  res._dims.at(3) = shape.depth().value();
+  res._dims.resize(circle_node->rank());
+  for (uint32_t i = 0; i < circle_node->rank(); ++i)
+    res._dims.at(i) = circle_node->dim(i).known() ? circle_node->dim(i).value() : -1;
 
   return res;
 }
 
-ShapeDescription to_shape_description(const loco::DepthwiseFilterShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  // T/F Lite encodes a depthwise convolution filter as a [1, H, W, C*M] tensor
-  res._dims.resize(4);
-  res._dims.at(0) = 1;
-  res._dims.at(1) = shape.height().value();
-  res._dims.at(2) = shape.width().value();
-  res._dims.at(3) = shape.depth().value() * shape.multiplier().value();
-
-  return res;
-}
-
-ShapeDescription to_shape_description(const loco::BiasShape &shape)
-{
-  ShapeDescription res;
-
-  res._rank_known = true;
-
-  res._dims.resize(1);
-  res._dims.at(0) = shape.length().value();
-
-  return res;
-}
-
-ShapeDescription to_shape_description(const loco::MatrixShape &shape)
+ShapeDescription to_shape_description(const loco::TensorShape &shape)
 {
   ShapeDescription res;
 
   res._rank_known = true;
 
-  res._dims.resize(2);
-  res._dims.at(0) = shape.height().value();
-  res._dims.at(1) = shape.width().value();
+  res._dims.resize(shape.rank());
+  for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+  {
+    // All the dimensions SHOULD be known
+    assert(shape.dim(axis).known());
+    res._dims.at(axis) = shape.dim(axis).value();
+  }
 
   return res;
 }
@@ -119,16 +59,6 @@ ShapeDescription to_shape_description(const loco::NodeShape &shape)
   {
     case loco::Domain::Tensor:
       return to_shape_description(shape.as<loco::TensorShape>());
-    case loco::Domain::Feature:
-      return to_shape_description(shape.as<loco::FeatureShape>());
-    case loco::Domain::Filter:
-      return to_shape_description(shape.as<loco::FilterShape>());
-    case loco::Domain::DepthwiseFilter:
-      return to_shape_description(shape.as<loco::DepthwiseFilterShape>());
-    case loco::Domain::Bias:
-      return to_shape_description(shape.as<loco::BiasShape>());
-    case loco::Domain::Matrix:
-      return to_shape_description(shape.as<loco::MatrixShape>());
     default:
       break;
   }
diff --git a/compiler/luci/service/src/ShapeDescription.test.cpp b/compiler/luci/service/src/ShapeDescription.test.cpp
new file mode 100644
index 000000000..6e53aac75
--- /dev/null
+++ b/compiler/luci/service/src/ShapeDescription.test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/ShapeDescription.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/Nodes/CircleConst.h>
+
+#include <gtest/gtest.h>
+
+TEST(ShapeDescriptionTest, CircleNode)
+{
+  // Use CircleConst as CircleNode
+  luci::CircleConst circle_const;
+  circle_const.shape({1, 2, 3, 4});
+
+  auto sd = luci::to_shape_description(&circle_const);
+
+  ASSERT_EQ(4, sd._dims.size());
+  ASSERT_EQ(1, sd._dims.at(0));
+  ASSERT_TRUE(sd._rank_known);
+}
+
+TEST(ShapeDescriptionTest, TensorShape)
+{
+  loco::TensorShape tensor_shape{1, 2, 3, 4};
+  loco::NodeShape node_shape(tensor_shape);
+
+  auto sd = luci::to_shape_description(node_shape);
+
+  ASSERT_EQ(4, sd._dims.size());
+  ASSERT_EQ(1, sd._dims.at(0));
+  ASSERT_TRUE(sd._rank_known);
+}
+
+TEST(ShapeDescriptionTest, BiasShape_NEG)
+{
+  loco::BiasShape bias_shape;
+  bias_shape.length() = 1;
+  loco::NodeShape node_shape(bias_shape);
+
+  EXPECT_THROW(luci::to_shape_description(node_shape), std::exception);
+}
diff --git a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
index 341201148..5a22da319 100644
--- a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
+++ b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
@@ -17,43 +17,82 @@
 
 #include "ShapeInfer_StridedSlice.h"
 #include "Check.h"
+#include "CircleShapeInferenceHelper.h"
 
 #include <luci/IR/CircleNode.h>
 #include <loco/IR/DataType.h>
 #include <loco/IR/NodeShape.h>
 #include <oops/InternalExn.h>
-#include <loco/Service/ShapeInference.h>
 
+#include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <limits>
 
+// code referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//    tensorflow/lite/kernels/strided_slice.cc
+//    tensorflow/lite/kernels/internal/strided_slice_logic.h
+
 namespace
 {
 
-// This Op only supports 1-4D cases and since we use the reference 4D
+// This Op only supports 1-5D cases and since we use the reference 4D
 // implementation, the 1-3D tensors are mapped to 4D.
-const int kMaxDim = 4;
+const int kMaxDim = 5;
 
 const loco::DataType S32 = loco::DataType::S32;
 
-using int8 = int8_t;
-using int16 = int16_t;
-
 struct StridedSliceParams
 {
-  int8 start_indices_count;
-  int16 start_indices[kMaxDim];
-  int8 stop_indices_count;
-  int16 stop_indices[kMaxDim];
-  int8 strides_count;
-  int16 strides[kMaxDim];
-
-  int16 begin_mask;
-  int16 ellipsis_mask;
-  int16 end_mask;
-  int16 new_axis_mask;
-  int16 shrink_axis_mask;
+  int8_t start_indices_count = 0;
+  int32_t start_indices[kMaxDim];
+  int8_t stop_indices_count = 0;
+  int32_t stop_indices[kMaxDim];
+  int8_t strides_count = 0;
+  int32_t strides[kMaxDim];
+
+  int16_t begin_mask = 0;
+  int16_t ellipsis_mask = 0;
+  int16_t end_mask = 0;
+  int16_t new_axis_mask = 0;
+  int16_t shrink_axis_mask = 0;
+};
+
+struct StridedSliceContext
+{
+  StridedSliceContext(const luci::CircleStridedSlice *node)
+  {
+    // check overflow issues
+    assert(static_cast<int16_t>(node->begin_mask()) == node->begin_mask());
+    assert(static_cast<int16_t>(node->ellipsis_mask()) == node->ellipsis_mask());
+    assert(static_cast<int16_t>(node->end_mask()) == node->end_mask());
+    assert(static_cast<int16_t>(node->new_axis_mask()) == node->new_axis_mask());
+    assert(static_cast<int16_t>(node->shrink_axis_mask()) == node->shrink_axis_mask());
+
+    params.begin_mask = node->begin_mask();
+    params.ellipsis_mask = node->ellipsis_mask();
+    params.end_mask = node->end_mask();
+    params.new_axis_mask = node->new_axis_mask();
+    params.shrink_axis_mask = node->shrink_axis_mask();
+
+    input = loco::must_cast<luci::CircleNode *>(node->input());
+    begin = loco::must_cast<luci::CircleConst *>(node->begin());
+    end = loco::must_cast<luci::CircleConst *>(node->end());
+    strides = loco::must_cast<luci::CircleConst *>(node->strides());
+
+    loco::TensorShape input_shape = luci::shape_get(input).as<loco::TensorShape>();
+    input_dims = input_shape.rank();
+  }
+  StridedSliceParams params;
+  luci::CircleNode *input = nullptr;
+  luci::CircleConst *begin = nullptr;
+  luci::CircleConst *end = nullptr;
+  luci::CircleConst *strides = nullptr;
+
+  // Equivalent input shape after adding axis according to new_axis_mask.
+  loco::TensorShape effective_input_shape;
+  int64_t input_dims = 0;
 };
 
 // Use until std::clamp() is available from C++17.
@@ -70,22 +109,22 @@ inline int Clamp(const int32_t v, const int32_t lo, const int32_t hi)
 // Return the index for the first element along that axis. This index will be a
 // positive integer between [0, axis_size - 1] that can be used to index
 // directly into the data.
-inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
-                        uint32_t axis)
+inline int64_t StartForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
+                            int64_t axis)
 {
   const auto begin_mask = params.begin_mask;
   const auto *start_indices = params.start_indices;
   const auto *strides = params.strides;
-  const int32_t axis_size = static_cast<int>(input_shape.dim(axis).value());
+  const int64_t axis_size = static_cast<int64_t>(input_shape.dim(axis).value());
   if (axis_size == 0)
   {
     return 0;
   }
   // Begin with the specified index.
-  int32_t start = start_indices[axis];
+  int64_t start = start_indices[axis];
 
   // begin_mask override
-  if (begin_mask & (1 << axis))
+  if (begin_mask & (1LL << axis))
   {
     if (strides[axis] > 0)
     {
@@ -108,7 +147,16 @@ inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShap
   }
 
   // Clamping
-  start = Clamp(start, 0, axis_size - 1);
+  if (strides[axis] > 0)
+  {
+    // Forward iteration
+    start = Clamp(start, 0, axis_size);
+  }
+  else
+  {
+    // Backward iteration
+    start = Clamp(start, -1, axis_size - 1);
+  }
 
   return start;
 }
@@ -118,22 +166,22 @@ inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShap
 // element. ie. So if you were iterating through all elements of a 1D array of
 // size 4, this function would return 4 as the stop, because it is one past the
 // "real" indices of 0, 1, 2 & 3.
-inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
-                       int axis, int start_for_axis)
+inline int64_t StopForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
+                           int64_t axis, int64_t start_for_axis)
 {
   const auto end_mask = params.end_mask;
   const auto shrink_axis_mask = params.shrink_axis_mask;
   const auto *stop_indices = params.stop_indices;
   const auto *strides = params.strides;
-  const int axis_size = static_cast<int32_t>(input_shape.dim(axis).value());
+  const int64_t axis_size = static_cast<int64_t>(input_shape.dim(axis).value());
   if (axis_size == 0)
   {
     return 0;
   }
 
   // Begin with the specified index
-  const bool shrink_axis = shrink_axis_mask & (1 << axis);
-  int32_t stop = stop_indices[axis];
+  const bool shrink_axis = shrink_axis_mask & (1LL << axis);
+  int64_t stop = stop_indices[axis];
 
   // When shrinking an axis, the end position does not matter (and can be
   // incorrect when negative indexing is used, see Issue #19260). Always use
@@ -141,11 +189,11 @@ inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape
   // already been adjusted for negative indices.
   if (shrink_axis)
   {
-    stop = start_for_axis + 1;
+    return start_for_axis + 1;
   }
 
   // end_mask override
-  if (end_mask & (1 << axis))
+  if (end_mask & (1LL << axis))
   {
     if (strides[axis] > 0)
     {
@@ -183,37 +231,134 @@ inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape
   return stop;
 }
 
-StridedSliceParams BuildStridedSliceParams(const luci::CircleStridedSlice *node)
+StridedSliceParams BuildStridedSliceParams(StridedSliceContext *op_context)
 {
   StridedSliceParams op_params;
 
-  if (kMaxDim < node->rank())
+  // The ellipsis_mask and new_axis_mask in op_params are not used. Those masks
+  // are processed here to update begin_mask, end_mask and the index range.
+  op_params.begin_mask = 0;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = 0;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = 0;
+
+  // Count indexes where the new_axis_mask is set but the ellipsis_mask is not.
+  loco::TensorShape begin_shape = luci::shape_get(op_context->begin).as<loco::TensorShape>();
+  const int64_t begin_count = static_cast<int64_t>(begin_shape.dim(0).value());
+  int64_t num_add_axis = 0;
+  for (int64_t i = 0; i < begin_count; ++i)
   {
-    INTERNAL_EXN_V("Cannot support StridedSlice rank > ", kMaxDim);
+    if (!((1LL << i) & op_context->params.ellipsis_mask) &&
+        ((1LL << i) & op_context->params.new_axis_mask))
+    {
+      num_add_axis++;
+    }
   }
 
-  auto begin_node = loco::must_cast<luci::CircleConst *>(node->begin());
-  auto end_node = loco::must_cast<luci::CircleConst *>(node->end());
-  auto strides_node = loco::must_cast<luci::CircleConst *>(node->strides());
+  // Calculate the dims of input after adding new axises.
+  const int64_t effective_dims = op_context->input_dims + num_add_axis;
+
+  // If begin, end and strides are not fully provided, it means Ellipsis should
+  // be expanded to multiple dimensions (Ex: for spec [Ellipsis, 2] on a 3D
+  // input, the Ellipsis should be applied for the first 2 dimensions). Besides,
+  // If the new_axis_mask and the ellipsis_mask are set at the same index, the
+  // new_axis_mask will have no effect.
+  int64_t effective_ellipsis_mask = 0, effective_new_axis_mask = 0;
+  int64_t ellipsis_start_idx = effective_dims, expanded_ellipsis = 0;
+  for (int64_t i = 0; i < effective_dims;)
+  {
+    if ((1LL << i) & op_context->params.ellipsis_mask)
+    {
+      ellipsis_start_idx = i;
+      int64_t ellipsis_end_idx =
+        std::max(i + 1, std::min(i + 1 + num_add_axis + op_context->input_dims - begin_count,
+                                 effective_dims));
+      expanded_ellipsis = ellipsis_end_idx - ellipsis_start_idx - 1;
+
+      // Set bit for effective_ellipsis_mask.
+      for (; i < ellipsis_end_idx; ++i)
+      {
+        effective_ellipsis_mask |= (1LL << i);
+      }
+      continue;
+    }
 
-  uint32_t dims_count = begin_node->size<S32>();
+    if ((1LL << (i - expanded_ellipsis)) & op_context->params.new_axis_mask)
+    {
+      effective_new_axis_mask |= (1LL << i);
+    }
+    ++i;
+  }
 
-  op_params.start_indices_count = dims_count;
-  op_params.stop_indices_count = dims_count;
-  op_params.strides_count = dims_count;
+  // Calculate effective_input_shape and its corresponding begin, end, strides.
+  loco::TensorShape input_shape = luci::shape_get(op_context->input).as<loco::TensorShape>();
+  int64_t added_ellipsis = 0, added_axises = 0;
+  op_context->effective_input_shape.rank(effective_dims);
 
-  for (uint32_t i = 0; i < dims_count; ++i)
+  for (int64_t i = 0; i < effective_dims; ++i)
   {
-    op_params.start_indices[i] = begin_node->at<S32>(i);
-    op_params.stop_indices[i] = end_node->at<S32>(i);
-    op_params.strides[i] = strides_node->at<S32>(i);
+    if ((1LL << i) & effective_ellipsis_mask)
+    {
+      // If ellipsis_mask, set the begin_mask and end_mask at that index.
+      added_ellipsis = std::max(int64_t(0), i - ellipsis_start_idx);
+      assert(i < 16);
+      op_params.begin_mask |= (1LL << i);
+      op_params.end_mask |= (1LL << i);
+      op_params.strides[i] = 1;
+      op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+    }
+    else if ((1LL << i) & effective_new_axis_mask)
+    {
+      // If new_axis_mask is set, it is equivalent to adding a new dim of 1 to
+      // input tensor. Store added shape to effective_input_shape.
+      op_params.start_indices[i] = 0;
+      op_params.stop_indices[i] = 1;
+      op_params.strides[i] = 1;
+      op_context->effective_input_shape.dim(i) = loco::Dimension(1);
+      added_axises++;
+    }
+    else if (i >= begin_count + expanded_ellipsis)
+    {
+      op_params.start_indices[i] = 0;
+      op_params.stop_indices[i] = 0;
+      op_params.strides[i] = 1;
+      assert(i < 16);
+      op_params.begin_mask |= (1LL << i);
+      op_params.end_mask |= (1LL << i);
+      op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+    }
+    else
+    {
+      const int64_t orig_idx = i - added_ellipsis;
+      op_params.start_indices[i] = op_context->begin->at<S32>(orig_idx);
+      op_params.stop_indices[i] = op_context->end->at<S32>(orig_idx);
+      op_params.strides[i] = op_context->strides->at<S32>(orig_idx);
+      if (op_context->params.begin_mask & (1LL << orig_idx))
+      {
+        assert(i < 16);
+        op_params.begin_mask |= (1LL << i);
+      }
+      if (op_context->params.end_mask & (1LL << orig_idx))
+      {
+        assert(i < 16);
+        op_params.end_mask |= (1LL << i);
+      }
+      if (op_context->params.shrink_axis_mask & (1LL << orig_idx))
+      {
+        assert(i < 16);
+        op_params.shrink_axis_mask |= (1LL << i);
+      }
+      op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+    }
   }
 
-  op_params.begin_mask = node->begin_mask();
-  op_params.ellipsis_mask = 0;
-  op_params.end_mask = node->end_mask();
-  op_params.new_axis_mask = 0;
-  op_params.shrink_axis_mask = node->shrink_axis_mask();
+  // make sure no overflow
+  assert(static_cast<int8_t>(effective_dims) == static_cast<int32_t>(effective_dims));
+
+  op_params.start_indices_count = effective_dims;
+  op_params.stop_indices_count = effective_dims;
+  op_params.strides_count = effective_dims;
 
   return op_params;
 }
@@ -241,55 +386,56 @@ loco::TensorShape infer_output_shape(const CircleStridedSlice *node)
   LUCI_ASSERT(end_node->dtype() == S32, "Only support S32 for end_node");
   LUCI_ASSERT(strides_node->dtype() == S32, "Only support S32 for strides_node");
 
-  assert(node->ellipsis_mask() == 0);
-  assert(node->new_axis_mask() == 0);
+  LUCI_ASSERT(begin_node->rank() == 1, "Only support rank 1 for begin_node");
+  LUCI_ASSERT(end_node->rank() == 1, "Only support rank 1 for end_node");
+  LUCI_ASSERT(strides_node->rank() == 1, "Only support rank 1 for strides_node");
 
-  auto op_params = BuildStridedSliceParams(node);
-  loco::TensorShape input_shape = loco::shape_get(input_node).as<loco::TensorShape>();
+  loco::TensorShape input_shape = luci::shape_get(input_node).as<loco::TensorShape>();
 
-  uint32_t num_input_axes = input_shape.rank();
-  assert(begin_node->size<S32>() <= num_input_axes);
-  assert(end_node->size<S32>() <= num_input_axes);
-  assert(strides_node->size<S32>() <= num_input_axes);
-  for (uint32_t i = 0; i < strides_node->size<S32>(); i++)
-  {
-    LUCI_ASSERT(strides_node->at<S32>(i) != 0, "Stride value has to be non-zero");
-  }
+  assert(begin_node->size<S32>() <= input_shape.rank());
+  assert(end_node->size<S32>() <= input_shape.rank());
+  assert(strides_node->size<S32>() <= input_shape.rank());
 
-  uint32_t shape_size = 0;
-  std::array<int32_t, 16> output_shape_data;
+  StridedSliceContext op_context(node);
+  auto op_params = BuildStridedSliceParams(&op_context);
+  auto &effective_input_shape = op_context.effective_input_shape;
+  std::vector<int64_t> output_shape_vector;
 
-  for (uint32_t idx = 0; idx < num_input_axes; ++idx)
+  for (int32_t idx = effective_input_shape.rank() - 1; idx >= 0; --idx)
   {
-    int32_t begin = StartForAxis(op_params, input_shape, idx);
-    int32_t end = StopForAxis(op_params, input_shape, idx, begin);
-    if (end < 0)
-      end = input_shape.dim(idx).value() + end + 1;
+    int32_t stride = op_params.strides[idx];
+    LUCI_ASSERT(stride != 0, "stride value has to be non-zero");
 
-    // This is valid for both positive and negative strides
-    int32_t stride = strides_node->at<S32>(idx);
-    int32_t dim_shape = std::ceil(static_cast<float>(end - begin) / stride);
-    assert(dim_shape > 0);
+    int64_t begin = StartForAxis(op_params, effective_input_shape, idx);
+    int64_t end = StopForAxis(op_params, effective_input_shape, idx, begin);
 
     // When shrinking an axis, the end position does not matter (and can be
     // incorrect when negative indexing is used, see Issue #19260). Always use
     // begin + 1 to generate a length 1 slice, since begin has
-    // already been adjusted for negative indices by StartForAxis.
-    const bool shrink_axis = node->shrink_axis_mask() & (1 << idx);
+    // already been adjusted for negative indices by GetBeginValueAtIndex.
+    const bool shrink_axis = op_params.shrink_axis_mask & (1 << idx);
     if (shrink_axis)
     {
-      assert(dim_shape == 1);
+      end = begin + 1;
     }
-    else
+
+    // This is valid for both positive and negative strides
+    int64_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+    dim_shape = dim_shape < 0 ? 0 : dim_shape;
+    if (!shrink_axis)
     {
-      output_shape_data[shape_size++] = dim_shape;
+      output_shape_vector.push_back(dim_shape);
     }
   }
 
+  auto shape_size = output_shape_vector.size();
   output_shape.rank(shape_size);
   for (uint32_t idx = 0; idx < shape_size; ++idx)
   {
-    output_shape.dim(idx) = output_shape_data[idx];
+    int64_t dim = output_shape_vector.at(shape_size - 1u - idx);
+    LUCI_ASSERT(0 <= dim && dim < 0xfffffffL, "Dimension size exceeds limit");
+    // reverse copy
+    output_shape.dim(idx) = static_cast<uint32_t>(dim);
   }
 
   return output_shape;
diff --git a/compiler/luci/service/src/Validate.cpp b/compiler/luci/service/src/Validate.cpp
index 282a068e0..3f6f9c01e 100644
--- a/compiler/luci/service/src/Validate.cpp
+++ b/compiler/luci/service/src/Validate.cpp
@@ -17,14 +17,16 @@
 #include "luci/Service/Validate.h"
 
 #include <luci/IR/Nodes/CircleOutput.h>
+#include <luci/IR/CircleNodeVisitor.h>
 #include <luci/Log.h>
+#include <luci/LogHelper.h>
 
 #include <loco/IR/NodeShape.h>
-#include <loco/Service/ShapeInference.h>
-#include <loco/Service/TypeInference.h>
 
 #include <cassert>
+#include <unordered_map>
 #include <vector>
+#include <iostream>
 
 namespace
 {
@@ -36,7 +38,28 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape
   {
     if (r)
       os << ",";
-    os << tensor_shape.dim(r).value();
+
+    if (tensor_shape.dim(r).known())
+      os << tensor_shape.dim(r).value();
+    else
+      os << "?";
+  }
+  os << "]";
+  return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const luci::CircleNode *circle_node)
+{
+  os << "[";
+  for (uint32_t r = 0; r < circle_node->rank(); ++r)
+  {
+    if (r)
+      os << ",";
+
+    if (circle_node->dim(r).known())
+      os << circle_node->dim(r).value();
+    else
+      os << "?";
   }
   os << "]";
   return os;
@@ -59,6 +82,65 @@ luci::CircleOutput *find_node(std::vector<loco::Node *> nodes, loco::GraphOutput
   return nullptr;
 }
 
+// TODO Reduce duplicate with validate_shape_dtype
+bool validate_shape(loco::Graph *g)
+{
+  LOGGER(l);
+
+  auto output_nodes = loco::output_nodes(g);
+
+  auto count = g->outputs()->size();
+  for (uint32_t out = 0; out < count; ++out)
+  {
+    auto graph_out = g->outputs()->at(out);
+    auto out_index = graph_out->index();
+
+    auto circle_output = find_node(output_nodes, out_index);
+    assert(circle_output != nullptr);
+    assert(circle_output->from() != nullptr);
+    auto circle_node = loco::must_cast<luci::CircleNode *>(circle_output->from());
+
+    // Shape validation for CircleOutputExclude is not needed
+    if (dynamic_cast<luci::CircleOutputExclude *>(circle_node))
+      continue;
+
+    assert(circle_node->shape_status() != luci::ShapeStatus::UNDEFINED);
+
+    // check if output node shape is same as graph output shape
+    auto go_tensor_shape = graph_out->shape();
+    assert(go_tensor_shape);
+
+    // NOTE Even if shape of graph output is [] (which means "shape inference was impossible")
+    //      but shape of CircleNode is not, it can be valid case because shape inference
+    //      algorithm of CircleNode may be upgraded than before. The opposite is possible either.
+    //      If such cases are appeared, following validation code should be fixed.
+    bool is_shape_valid = (circle_node->rank() == go_tensor_shape->rank());
+    for (uint32_t i = 0; is_shape_valid && i < circle_node->rank(); ++i)
+    {
+      if (!circle_node->dim(i).known() || !go_tensor_shape->dim(i).known())
+      {
+        // If at least one of two dimensions is unknown,
+        // the unknown dimension can accept any value.
+        INFO(l) << "Unknown dimension is matched with known dimension" << std::endl;
+      }
+      else if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value())
+      {
+        is_shape_valid = false;
+      }
+    }
+
+    if (is_shape_valid == false)
+    {
+      INFO(l) << "[luci] Shape for output #" << out_index << " not same " << std::endl;
+      INFO(l) << "[luci]    " << circle_node->name() << " " << circle_node << " vs "
+              << *go_tensor_shape << std::endl;
+      return false;
+    }
+  }
+
+  return true;
+}
+
 bool validate_shape_dtype(loco::Graph *g)
 {
   LOGGER(l);
@@ -75,23 +157,47 @@ bool validate_shape_dtype(loco::Graph *g)
     assert(circle_output != nullptr);
     assert(circle_output->from() != nullptr);
     auto circle_node = loco::must_cast<luci::CircleNode *>(circle_output->from());
-    assert(loco::shape_known(circle_node));
+
+    // Shape and dtype validation for CircleOutputExclude is not needed
+    if (dynamic_cast<luci::CircleOutputExclude *>(circle_node))
+      continue;
+
+    assert(circle_node->shape_status() != luci::ShapeStatus::UNDEFINED);
 
     // check if output node shape is same as graph output shape
-    auto co_tensor_shape = loco::shape_get(circle_node).as<loco::TensorShape>();
     auto go_tensor_shape = graph_out->shape();
     assert(go_tensor_shape);
-    if (!(co_tensor_shape == *go_tensor_shape))
+
+    // NOTE Even if shape of graph output is [] (which means "shape inference was impossible")
+    //      but shape of CircleNode is not, it can be valid case because shape inference
+    //      algorithm of CircleNode may be upgraded than before. The opposite is possible either.
+    //      If such cases are appeared, following validation code should be fixed.
+    bool is_shape_valid = (circle_node->rank() == go_tensor_shape->rank());
+    for (uint32_t i = 0; is_shape_valid && i < circle_node->rank(); ++i)
+    {
+      if (!circle_node->dim(i).known() || !go_tensor_shape->dim(i).known())
+      {
+        // If at least one of two dimensions is unknown,
+        // the unknown dimension can accept any value.
+        INFO(l) << "Unknown dimension is matched with known dimension" << std::endl;
+      }
+      else if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value())
+      {
+        is_shape_valid = false;
+      }
+    }
+
+    if (is_shape_valid == false)
     {
       INFO(l) << "[luci] Shape for output #" << out_index << " not same " << std::endl;
-      INFO(l) << "[luci]    " << circle_node->name() << " " << co_tensor_shape << " vs "
+      INFO(l) << "[luci]    " << circle_node->name() << " " << circle_node << " vs "
               << *go_tensor_shape << std::endl;
       return false;
     }
 
     // check if data type match
-    assert(loco::dtype_known(circle_node));
-    if (graph_out->dtype() != loco::dtype_get(circle_node))
+    assert(circle_node->dtype() != loco::DataType::Unknown);
+    if (graph_out->dtype() != circle_node->dtype())
     {
       INFO(l) << "[luci] Type for output #" << out_index << " not same " << std::endl;
       return false;
@@ -101,11 +207,118 @@ bool validate_shape_dtype(loco::Graph *g)
   return true;
 }
 
+class MultiOutNodeValidate final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  MultiOutNodeValidate() {}
+
+private:
+  template <class T> bool check(const luci::CircleNode *node)
+  {
+    auto succs = loco::succs(node);
+    if (succs.size() < 1)
+      return false;
+    for (const auto &cnode : succs)
+    {
+      auto const child = dynamic_cast<const T *>(cnode);
+      if (child == nullptr)
+        return false;
+    }
+    return true;
+  }
+
+public:
+  bool visit(const luci::CircleBidirectionalSequenceLSTM *node) final
+  {
+    return check<luci::CircleBidirectionalSequenceLSTMOut>(node);
+  }
+  bool visit(const luci::CircleCustom *node) final { return check<luci::CircleCustomOut>(node); }
+  bool visit(const luci::CircleIf *node) final { return check<luci::CircleIfOut>(node); }
+  bool visit(const luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    return check<luci::CircleNonMaxSuppressionV4Out>(node);
+  }
+  bool visit(const luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    return check<luci::CircleNonMaxSuppressionV5Out>(node);
+  }
+  bool visit(const luci::CircleSplit *node) final { return check<luci::CircleSplitOut>(node); }
+  bool visit(const luci::CircleSplitV *node) final { return check<luci::CircleSplitVOut>(node); }
+  bool visit(const luci::CircleTopKV2 *node) final { return check<luci::CircleTopKV2Out>(node); }
+  bool visit(const luci::CircleUnique *node) final { return check<luci::CircleUniqueOut>(node); }
+  bool visit(const luci::CircleUnpack *node) final { return check<luci::CircleUnpackOut>(node); }
+  bool visit(const luci::CircleWhile *node) final { return check<luci::CircleWhileOut>(node); }
+
+  // default true for other nodes
+  bool visit(const luci::CircleNode *) final { return true; }
+};
+
+/**
+ * @brief Validate sequence of multi-output nodes are followed for specific
+ *        IRs such as CircleIfOut.
+ */
+bool validate_multi_outs(loco::Graph *g)
+{
+  LOGGER(l);
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto const cnode = loco::must_cast<luci::CircleNode *>(node);
+
+    MultiOutNodeValidate d;
+    if (cnode->accept(&d))
+      continue;
+
+    auto const name = cnode->name();
+    INFO(l) << "Node: " << name << ", " << (uint32_t)(cnode->opcode()) << " has invalid successor."
+            << std::endl;
+
+    return false;
+  }
+
+  return true;
+}
+
+class VirtualNodeDetector final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  VirtualNodeDetector() {}
+
+public:
+  bool visit(const luci::CircleBidirectionalSequenceLSTMOut *) final { return true; }
+  bool visit(const luci::CircleCustomOut *) final { return true; }
+  bool visit(const luci::CircleIfOut *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV4Out *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV5Out *) final { return true; }
+  bool visit(const luci::CircleSplitOut *) final { return true; }
+  bool visit(const luci::CircleSplitVOut *) final { return true; }
+  bool visit(const luci::CircleTopKV2Out *) final { return true; }
+  bool visit(const luci::CircleUnpackOut *) final { return true; }
+  bool visit(const luci::CircleUniqueOut *) final { return true; }
+  bool visit(const luci::CircleWhileOut *) final { return true; }
+  bool visit(const luci::CircleOutputDummy *) final { return true; }
+  bool visit(const luci::CircleOutputExclude *) final { return true; }
+
+  // Return false by default
+  bool visit(const luci::CircleNode *) final { return false; }
+};
+
 } // namespace
 
 namespace luci
 {
 
+bool validate_shape(loco::Graph *g)
+{
+  if (!loco::valid(g))
+    return false;
+
+  if (!::validate_shape(g))
+    return false;
+
+  return true;
+}
+
 bool validate(loco::Graph *g)
 {
   if (!loco::valid(g))
@@ -114,9 +327,127 @@ bool validate(loco::Graph *g)
   if (!validate_shape_dtype(g))
     return false;
 
+  if (!validate_multi_outs(g))
+    return false;
+
   // TODO add more validation
 
   return true;
 }
 
+bool validate_name(loco::Graph *g)
+{
+  auto nodes = g->nodes();
+  for (uint32_t n = 0; n < nodes->size(); ++n)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
+    // skip virtual nodes
+    VirtualNodeDetector d;
+    if (node->accept(&d))
+      continue;
+
+    auto name = node->name();
+    if (name.empty())
+      return false;
+  }
+
+  return true;
+}
+
+bool validate_unique_name(luci::Module *m)
+{
+  LOGGER(l);
+
+  std::unordered_map<std::string, bool> names_col;
+
+  for (size_t g = 0; g < m->size(); ++g)
+  {
+    auto graph = m->graph(g);
+    auto nodes = graph->nodes();
+    for (uint32_t n = 0; n < nodes->size(); ++n)
+    {
+      auto node = loco::must_cast<luci::CircleNode *>(nodes->at(n));
+      // skip CircleOutput as it may have same name with from() node
+      auto output = dynamic_cast<luci::CircleOutput *>(node);
+      if (output != nullptr)
+        continue;
+      // skip virtual nodes
+      VirtualNodeDetector d;
+      if (node->accept(&d))
+        continue;
+
+      auto name = node->name();
+      INFO(l) << "Node: " << name << ", " << (uint32_t)(node->opcode()) << std::endl;
+      auto it = names_col.find(name);
+      if (it != names_col.end())
+      {
+        INFO(l) << "validate_unique_name: found duplicate " << name << ", " << graph->name()
+                << std::endl;
+        return false;
+      }
+
+      names_col[name] = true;
+    }
+    // There can exist same tensor name between different subgraphs.
+    names_col.clear();
+  }
+
+  return true;
+}
+
+bool validate(luci::Module *module)
+{
+  LOGGER(l);
+
+  INFO(l) << "--- validate Module -----------------------------------";
+
+  for (size_t g = 0; g < module->size(); ++g)
+  {
+    auto graph = module->graph(g);
+
+    INFO(l) << luci::fmt(graph) << std::endl;
+
+    if (!validate(graph))
+    {
+      std::cerr << "ERROR: Invalid circle model" << std::endl;
+      return false;
+    }
+    if (!validate_name(graph))
+    {
+      std::cerr << "ERROR: circle model has empty name" << std::endl;
+      return false;
+    }
+  }
+
+  if (!validate_unique_name(module))
+  {
+    std::cerr << "ERROR: circle model has duplicate names" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool validate_shape(luci::Module *module)
+{
+  LOGGER(l);
+
+  INFO(l) << "--- validate shape of Module -----------------------------------";
+
+  for (size_t g = 0; g < module->size(); ++g)
+  {
+    auto graph = module->graph(g);
+
+    INFO(l) << luci::fmt(graph) << std::endl;
+
+    if (!validate_shape(graph))
+    {
+      std::cerr << "ERROR: Invalid circle model" << std::endl;
+      return false;
+    }
+  }
+
+  return true;
+}
+
 } // namespace luci
diff --git a/compiler/luci/service/src/Validate.test.cpp b/compiler/luci/service/src/Validate.test.cpp
new file mode 100644
index 000000000..8ce6d895b
--- /dev/null
+++ b/compiler/luci/service/src/Validate.test.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/Validate.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleAdd.h>
+#include <luci/IR/Nodes/CircleSqrt.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->dtype(loco::DataType::S32);
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class SqrtGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  SqrtGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    SqrtGraphlet::init(g(), shape);
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+
+    // set output name to _sqrt: CircleOutput may have duplicate name
+    output()->name(_sqrt->name());
+  }
+};
+
+class Sqrt2xGraphlet
+{
+public:
+  Sqrt2xGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 input_shape)
+  {
+    _sqrt1 = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt1->dtype(loco::DataType::S32);
+    _sqrt1->name("sqrt");
+
+    _sqrt2 = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt2->dtype(loco::DataType::S32);
+    _sqrt2->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt1 = nullptr;
+  luci::CircleSqrt *_sqrt2 = nullptr;
+};
+
+class Sqrt2xGraph : public TestIOGraph, public Sqrt2xGraphlet
+{
+public:
+  Sqrt2xGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    Sqrt2xGraphlet::init(g(), shape);
+
+    _sqrt1->x(input());
+
+    _sqrt2->x(_sqrt1);
+
+    output()->from(_sqrt2);
+  }
+};
+
+} // namespace
+
+TEST(ValidateTest, non_empty_name)
+{
+  SqrtGraph g;
+  g.init({3, 3});
+
+  ASSERT_TRUE(luci::validate_name(g.g()));
+}
+
+TEST(ValidateTest, unique_name)
+{
+  luci::Module module;
+
+  SqrtGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  ASSERT_TRUE(luci::validate_unique_name(&module));
+}
+
+TEST(ValidateTest, unique_name_NEG)
+{
+  luci::Module module;
+
+  Sqrt2xGraph g;
+  g.init({3, 3});
+  g.transfer_to(&module);
+
+  ASSERT_FALSE(luci::validate_unique_name(&module));
+}
diff --git a/compiler/luci/tester/CMakeLists.txt b/compiler/luci/tester/CMakeLists.txt
index 3ac06ef3a..13aab11e7 100644
--- a/compiler/luci/tester/CMakeLists.txt
+++ b/compiler/luci/tester/CMakeLists.txt
@@ -6,6 +6,7 @@ TargetRequire_Return(${REQUIRED_TARGETS})
 
 set(SRCS_READ_TESTER
       src/ReadTester.cpp
+      src/ReadModule.cpp
    )
 
 add_executable(luci_readtester "${SRCS_READ_TESTER}")
@@ -18,6 +19,7 @@ target_link_libraries(luci_readtester PRIVATE safemain)
 
 set(SRCS_WRITE_TESTER
       src/WriteTester.cpp
+      src/ReadModule.cpp
    )
 
 add_executable(luci_writetester "${SRCS_WRITE_TESTER}")
@@ -28,3 +30,22 @@ target_link_libraries(luci_writetester PRIVATE luci_export)
 target_link_libraries(luci_writetester PRIVATE foder)
 target_link_libraries(luci_writetester PRIVATE oops)
 target_link_libraries(luci_writetester PRIVATE safemain)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_readtester_test src/ReadTester.test.cpp ${SRCS_READ_TESTER})
+target_link_libraries(luci_readtester_test luci_import)
+target_link_libraries(luci_readtester_test luci_service)
+target_link_libraries(luci_readtester_test luci_pass)
+target_link_libraries(luci_readtester_test foder)
+
+GTest_AddTest(luci_writetester_test src/WriteTester.test.cpp ${SRCS_WRITE_TESTER})
+target_link_libraries(luci_writetester_test luci_import)
+target_link_libraries(luci_writetester_test luci_service)
+target_link_libraries(luci_writetester_test luci_pass)
+target_link_libraries(luci_writetester_test luci_export)
+target_link_libraries(luci_writetester_test foder)
diff --git a/compiler/luci/tester/src/ReadModule.cpp b/compiler/luci/tester/src/ReadModule.cpp
new file mode 100644
index 000000000..87c1233f0
--- /dev/null
+++ b/compiler/luci/tester/src/ReadModule.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReadModule.h"
+
+#include <luci/Pass/CircleShapeInferencePass.h>
+#include <luci/Pass/CircleTypeInferencePass.h>
+#include <luci/Service/Validate.h>
+
+#include <logo/Phase.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+std::unique_ptr<luci::Module> ReadModule(std::string &input_path)
+{
+  // Load model from the file
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data = file_loader.load();
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    return nullptr;
+  }
+
+  luci::Importer importer;
+  auto module = importer.importModule(circle_model);
+  assert(module->size() > 0);
+
+  for (size_t g = 0; g < module->size(); ++g)
+  {
+    auto graph = module->graph(g);
+    if (graph == nullptr)
+      return nullptr;
+
+    {
+      logo::Phase phase;
+
+      phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+      phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+      logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{graph};
+      phase_runner.run(phase);
+    }
+
+    if (!luci::validate(graph))
+      return nullptr;
+  }
+  return module;
+}
diff --git a/compiler/luci/tester/src/ReadModule.h b/compiler/luci/tester/src/ReadModule.h
new file mode 100644
index 000000000..dfa9bad6b
--- /dev/null
+++ b/compiler/luci/tester/src/ReadModule.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTER_READ_MODULE_H__
+#define __LUCI_TESTER_READ_MODULE_H__
+
+#include <luci/Importer.h>
+#include <foder/FileLoader.h>
+
+#include <memory>
+#include <string>
+
+std::unique_ptr<luci::Module> ReadModule(std::string &input_path);
+
+#endif // __LUCI_TESTER_READ_MODULE_H__
diff --git a/compiler/luci/tester/src/ReadTester.cpp b/compiler/luci/tester/src/ReadTester.cpp
index a1aead1bd..864343e43 100644
--- a/compiler/luci/tester/src/ReadTester.cpp
+++ b/compiler/luci/tester/src/ReadTester.cpp
@@ -14,15 +14,9 @@
  * limitations under the License.
  */
 
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
-#include <luci/Service/Validate.h>
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
+#include "ReadModule.h"
 
 #include <iostream>
-#include <map>
 #include <string>
 
 namespace
@@ -65,39 +59,9 @@ int entry(int argc, char **argv)
 
   std::cout << "[INFO] Circle is '" << input_path << "'" << std::endl;
 
-  // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data = file_loader.load();
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+  auto module = ReadModule(input_path);
+  if (module == nullptr)
     return EXIT_FAILURE;
-  }
-
-  luci::Importer importer;
-  auto module = importer.importModule(circle_model);
-  assert(module->size() > 0);
 
-  for (size_t g = 0; g < module->size(); ++g)
-  {
-    auto graph = module->graph(g);
-    if (graph == nullptr)
-      return 255;
-
-    {
-      luci::ShapeInferencePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-    {
-      luci::TypeInferencePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-
-    if (!luci::validate(graph))
-      return 255;
-  }
   return 0;
 }
diff --git a/compiler/luci/tester/src/ReadTester.test.cpp b/compiler/luci/tester/src/ReadTester.test.cpp
new file mode 100644
index 000000000..f3850d517
--- /dev/null
+++ b/compiler/luci/tester/src/ReadTester.test.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// From ReadTester.cpp
+int entry(int argc, char **argv);
+
+TEST(ReadTesterTest, invalid_argc_NEG)
+{
+  char argv_1[20];
+  strcpy(argv_1, "ReadTesterTest");
+
+  int argc = 1;
+  char *argv[] = {argv_1};
+
+  ASSERT_NE(0, entry(argc, argv));
+}
+
+TEST(ReadTesterTest, invalid_file_NEG)
+{
+  char argv_1[20], argv_2[20];
+  strcpy(argv_1, "ReadTesterTest");
+  strcpy(argv_2, "not_a_file");
+
+  int argc = 2;
+  char *argv[] = {argv_1, argv_2};
+
+  EXPECT_THROW(entry(argc, argv), std::runtime_error);
+}
diff --git a/compiler/luci/tester/src/WriteTester.cpp b/compiler/luci/tester/src/WriteTester.cpp
index aa7085c77..0d3a1efa2 100644
--- a/compiler/luci/tester/src/WriteTester.cpp
+++ b/compiler/luci/tester/src/WriteTester.cpp
@@ -14,18 +14,13 @@
  * limitations under the License.
  */
 
-#include <foder/FileLoader.h>
+#include "ReadModule.h"
 
-#include <luci/Importer.h>
-#include <luci/Pass/ShapeInferencePass.h>
-#include <luci/Pass/TypeInferencePass.h>
-#include <luci/Service/Validate.h>
 #include <luci/CircleExporter.h>
 #include <oops/InternalExn.h>
 
 #include <fstream>
 #include <iostream>
-#include <map>
 #include <string>
 
 namespace
@@ -48,12 +43,12 @@ struct CircleExpContract : public luci::CircleExporter::Contract
 {
 public:
   CircleExpContract(loco::Graph *graph, const std::string &filename)
-      : _graph(graph), _filepath(filename)
+    : _graph(graph), _filepath(filename)
   {
     // NOTHING TO DO
   }
   CircleExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
+    : _module(module), _filepath(filename)
   {
     // NOTHING TO DO
   }
@@ -108,41 +103,9 @@ int entry(int argc, char **argv)
 
   std::cout << "[INFO] Circle from '" << input_path << "' to '" << output_path << "'" << std::endl;
 
-  // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data = file_loader.load();
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+  auto module = ReadModule(input_path);
+  if (module == nullptr)
     return EXIT_FAILURE;
-  }
-
-  // Import from input Circle file
-  luci::Importer importer;
-  auto module = importer.importModule(circle_model);
-  assert(module->size() > 0);
-
-  for (size_t g = 0; g < module->size(); ++g)
-  {
-    auto graph = module->graph(g);
-    if (graph == nullptr)
-      return 255;
-
-    {
-      luci::ShapeInferencePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-    {
-      luci::TypeInferencePass pass;
-      while (pass.run(graph) == true)
-        ;
-    }
-
-    if (!luci::validate(graph))
-      return 255;
-  }
 
   // Export to output Circle file
   luci::CircleExporter exporter;
diff --git a/compiler/luci/tester/src/WriteTester.test.cpp b/compiler/luci/tester/src/WriteTester.test.cpp
new file mode 100644
index 000000000..9d34c5f98
--- /dev/null
+++ b/compiler/luci/tester/src/WriteTester.test.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// From WriteTester.cpp
+int entry(int argc, char **argv);
+
+TEST(WriteTesterTest, invalid_argc_NEG)
+{
+  char argv_1[20];
+  strcpy(argv_1, "WriteTesterTest");
+
+  int argc = 1;
+  char *argv[] = {argv_1};
+
+  ASSERT_NE(0, entry(argc, argv));
+}
+
+TEST(WriteTesterTest, invalid_file_NEG)
+{
+  char argv_1[20], argv_2[20], argv_3[20];
+  strcpy(argv_1, "WriteTesterTest");
+  strcpy(argv_2, "not_a_file");
+  strcpy(argv_3, "not_a_file");
+
+  int argc = 3;
+  char *argv[] = {argv_1, argv_2, argv_3};
+
+  EXPECT_THROW(entry(argc, argv), std::runtime_error);
+}
diff --git a/compiler/luci/testhelper/CMakeLists.txt b/compiler/luci/testhelper/CMakeLists.txt
new file mode 100644
index 000000000..86aa66225
--- /dev/null
+++ b/compiler/luci/testhelper/CMakeLists.txt
@@ -0,0 +1,25 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+# NOTE we are using "*.test.cpp" NOT to be included in static analyzer tools
+
+# testhelper library itself
+set(HELPER_SOURCE
+      src/TestShape.test.cpp
+   )
+
+add_library(luci_testhelper STATIC ${HELPER_SOURCE})
+target_include_directories(luci_testhelper PRIVATE src)
+target_include_directories(luci_testhelper PUBLIC include)
+target_link_libraries(luci_testhelper luci_lang)
+
+# test for testhelper library
+set(TESTER_SOURCE
+      src/TestIOGraph.test.cpp
+   )
+
+GTest_AddTest(luci_testhelper_test ${TESTER_SOURCE})
+target_link_libraries(luci_testhelper_test luci_testhelper)
diff --git a/compiler/luci/testhelper/README.md b/compiler/luci/testhelper/README.md
new file mode 100644
index 000000000..6bdb92aa4
--- /dev/null
+++ b/compiler/luci/testhelper/README.md
@@ -0,0 +1,3 @@
+# luci-testhelper
+
+_luci-testhelper_ provides Helper classes for unit testing
diff --git a/compiler/luci/testhelper/include/luci/test/TestIOGraph.h b/compiler/luci/testhelper/include/luci/test/TestIOGraph.h
new file mode 100644
index 000000000..68e834821
--- /dev/null
+++ b/compiler/luci/testhelper/include/luci/test/TestIOGraph.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
+#define __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
+
+#include "TestShape.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <stdexcept>
+
+namespace luci
+{
+namespace test
+{
+
+/**
+ * @brief Graphlet with Inputs and loco::Graph for multiple inputs
+ * @note  Every Graph will have Input(s) and Output(s)
+ *        We put loco::Graph only in IsGraphlet not to declare separate
+ *        class for loco::Graph
+ */
+template <unsigned N> class TestIsGraphlet
+{
+public:
+  TestIsGraphlet()
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_inputs[n] = nullptr;
+      _inputs[n] = nullptr;
+    }
+    _g = loco::make_graph();
+  }
+
+public:
+  virtual void init(loco::Graph *g, const std::initializer_list<ShapeU32> shape_in)
+  {
+    if (shape_in.size() != N)
+      throw std::runtime_error("Failed to init TestIsGraphlet");
+
+    auto shpin = shape_in.begin();
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_inputs[n] = g->inputs()->create();
+
+      _inputs[n] = g->nodes()->create<luci::CircleInput>();
+      _inputs[n]->shape(*shpin);
+      _inputs[n]->shape_status(luci::ShapeStatus::VALID);
+      _inputs[n]->dtype(loco::DataType::FLOAT32);
+      _inputs[n]->name("input_" + std::to_string(n));
+
+      _inputs[n]->index(_graph_inputs[n]->index());
+
+      auto input_shape = std::make_unique<loco::TensorShape>();
+      set_shape_vector(input_shape.get(), *shpin);
+      _graph_inputs[n]->shape(std::move(input_shape));
+      _graph_inputs[n]->dtype(loco::DataType::FLOAT32);
+
+      shpin++;
+    }
+  }
+
+public:
+  loco::Graph *g(void) { return _g.get(); }
+  luci::CircleInput *input(int idx) { return _inputs[idx]; }
+  uint32_t num_inputs(void) { return N; }
+
+public:
+  void transfer_to(luci::Module *module)
+  {
+    // WARNING: after g is transfered, _graph_inputs, _inputs
+    //          and _graph_outputs, _outputs in TestOsGraphlet will be invalid.
+    //          arrays are not cleared as this is just helpers to unit tests
+    module->add(std::move(_g));
+  }
+
+protected:
+  std::unique_ptr<loco::Graph> _g;
+  std::array<loco::GraphInput *, N> _graph_inputs;
+  std::array<luci::CircleInput *, N> _inputs;
+};
+
+/**
+ * @brief Graphlet with one Input
+ */
+class TestIGraphlet : public TestIsGraphlet<1>
+{
+public:
+  virtual void init(loco::Graph *g, const ShapeU32 shape_in)
+  {
+    TestIsGraphlet<1>::init(g, {shape_in});
+  }
+
+  luci::CircleInput *input() { return _inputs[0]; }
+};
+
+/**
+ * @brief Graphlet with Outputs for multiple outputs
+ */
+template <unsigned N> class TestOsGraphlet
+{
+public:
+  TestOsGraphlet()
+  {
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_outputs[n] = nullptr;
+      _outputs[n] = nullptr;
+    }
+  }
+
+public:
+  virtual void init(loco::Graph *g, const std::initializer_list<ShapeU32> shape_out)
+  {
+    if (shape_out.size() != N)
+      throw std::runtime_error("Failed to init TestOsGraphlet");
+
+    auto shpout = shape_out.begin();
+    for (uint32_t n = 0; n < N; ++n)
+    {
+      _graph_outputs[n] = g->outputs()->create();
+
+      _outputs[n] = g->nodes()->create<luci::CircleOutput>();
+      _outputs[n]->shape(*shpout);
+      _outputs[n]->shape_status(luci::ShapeStatus::VALID);
+      _outputs[n]->dtype(loco::DataType::FLOAT32);
+      _outputs[n]->name("output_" + std::to_string(n));
+
+      _outputs[n]->index(_graph_outputs[n]->index());
+
+      auto output_shape = std::make_unique<loco::TensorShape>();
+      set_shape_vector(output_shape.get(), *shpout);
+      _graph_outputs[n]->shape(std::move(output_shape));
+      _graph_outputs[n]->dtype(loco::DataType::FLOAT32);
+
+      shpout++;
+    }
+  }
+
+public:
+  luci::CircleOutput *output(int idx) { return _outputs[idx]; }
+  uint32_t num_outputs(void) { return N; }
+
+protected:
+  std::array<loco::GraphOutput *, N> _graph_outputs;
+  std::array<luci::CircleOutput *, N> _outputs;
+};
+
+/**
+ * @brief Graphlet with one Output
+ */
+class TestOGraphlet : public TestOsGraphlet<1>
+{
+public:
+  virtual void init(loco::Graph *g, const ShapeU32 shape_out)
+  {
+    TestOsGraphlet<1>::init(g, {shape_out});
+  }
+
+  luci::CircleOutput *output() { return _outputs[0]; }
+};
+
+/**
+ * @brief Graph with Input and Output
+ */
+class TestIOGraph : public TestIGraphlet, public TestOGraphlet
+{
+public:
+  TestIOGraph() = default;
+
+public:
+  virtual void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIGraphlet::init(g(), shape_in);
+    TestOGraphlet::init(g(), shape_out);
+  }
+};
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_TESTHELPER_TEST_IO_GRAPH_H__
diff --git a/compiler/luci/testhelper/include/luci/test/TestShape.h b/compiler/luci/testhelper/include/luci/test/TestShape.h
new file mode 100644
index 000000000..1a5adf7d6
--- /dev/null
+++ b/compiler/luci/testhelper/include/luci/test/TestShape.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_TESTHELPER_TEST_SHAPE_H__
+#define __LUCI_TESTHELPER_TEST_SHAPE_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <initializer_list>
+
+namespace luci
+{
+namespace test
+{
+
+using ShapeU32 = std::initializer_list<uint32_t>;
+using ShapeI32 = std::initializer_list<int32_t>;
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values);
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values);
+
+uint32_t num_elements(const ShapeU32 shape);
+
+} // namespace test
+} // namespace luci
+
+#endif // __LUCI_TESTHELPER_TEST_SHAPE_H__
diff --git a/compiler/luci/testhelper/src/TestIOGraph.test.cpp b/compiler/luci/testhelper/src/TestIOGraph.test.cpp
new file mode 100644
index 000000000..8a7d1e060
--- /dev/null
+++ b/compiler/luci/testhelper/src/TestIOGraph.test.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/test/TestIOGraph.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class SqrtGraphlet
+{
+public:
+  SqrtGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _sqrt = g->nodes()->create<luci::CircleSqrt>();
+    _sqrt->name("sqrt");
+  }
+
+protected:
+  luci::CircleSqrt *_sqrt = nullptr;
+};
+
+class AddGraphlet
+{
+public:
+  AddGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add->name("add");
+  }
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+};
+
+class ConvGraphlet
+{
+public:
+  ConvGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _conv = g->nodes()->create<luci::CircleConv2D>();
+    _conv->name("conv");
+  }
+
+protected:
+  luci::CircleConv2D *_conv = nullptr;
+};
+
+} // namespace
+
+namespace
+{
+
+class TestOfTestIOGraph : public TestIOGraph, public SqrtGraphlet
+{
+public:
+  TestOfTestIOGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    SqrtGraphlet::init(g());
+
+    _sqrt->x(input());
+
+    output()->from(_sqrt);
+  }
+};
+
+class TestOfTestI2OGraph : public TestIsGraphlet<2>, public TestOGraphlet, public AddGraphlet
+{
+public:
+  TestOfTestI2OGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIsGraphlet<2>::init(g(), {{2, 3}, {2, 3}});
+    TestOsGraphlet<1>::init(g(), {{2, 3}});
+    AddGraphlet::init(g());
+
+    _add->x(input(0));
+    _add->y(input(1));
+
+    output()->from(_add);
+  }
+};
+
+class TestOfTestI3OGraph : public TestIsGraphlet<3>, public TestOGraphlet, public ConvGraphlet
+{
+public:
+  TestOfTestI3OGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIsGraphlet<3>::init(g(), {{2, 3, 3, 4}, {1, 1}, {4}});
+    TestOsGraphlet<1>::init(g(), {{2, 3, 3, 4}});
+    ConvGraphlet::init(g());
+
+    _conv->input(input(0));
+    _conv->filter(input(1));
+    _conv->bias(input(2));
+
+    output()->from(_conv);
+  }
+};
+
+class FailOfTestI3OGraph : public TestIsGraphlet<3>, public TestOGraphlet, public ConvGraphlet
+{
+public:
+  FailOfTestI3OGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIsGraphlet<3>::init(g(), {{2, 3, 3, 4}, {1, 1}});
+    TestOsGraphlet<1>::init(g(), {{2, 3, 3, 4}});
+    ConvGraphlet::init(g());
+
+    _conv->input(input(0));
+    _conv->filter(input(1));
+    _conv->bias(input(2));
+
+    output()->from(_conv);
+  }
+};
+
+} // namespace
+
+TEST(TestIOGraphTest, IOGraph_init)
+{
+  TestOfTestIOGraph tg;
+  tg.init();
+
+  SUCCEED();
+}
+
+TEST(TestIOGraphTest, I2OGraph_init)
+{
+  TestOfTestI2OGraph tg;
+  tg.init();
+
+  SUCCEED();
+}
+
+TEST(TestIOGraphTest, I3OGraph_init)
+{
+  TestOfTestI3OGraph tg;
+  tg.init();
+
+  SUCCEED();
+}
+
+TEST(TestIOGraphTest, I3OGraph_input_number_mismatch_NEG)
+{
+  FailOfTestI3OGraph fg;
+  EXPECT_THROW(fg.init(), std::runtime_error);
+}
diff --git a/compiler/luci/testhelper/src/TestShape.test.cpp b/compiler/luci/testhelper/src/TestShape.test.cpp
new file mode 100644
index 000000000..9838c6182
--- /dev/null
+++ b/compiler/luci/testhelper/src/TestShape.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/test/TestShape.h"
+
+/**
+ * @note This file does not hold any test cases but provides methods for tests
+ */
+
+namespace luci
+{
+namespace test
+{
+
+void set_shape_vector(loco::TensorShape *shape, const ShapeU32 &values)
+{
+  uint32_t r = 0;
+  shape->rank(values.size());
+  for (auto v : values)
+    shape->dim(r++).set(v);
+}
+
+void set_shape_vector(luci::CircleConst *const_node, const ShapeI32 &values)
+{
+  const_node->rank(1);
+  const_node->dim(0).set(values.size());
+  const_node->shape_status(luci::ShapeStatus::VALID);
+  const_node->dtype(loco::DataType::S32);
+  const_node->size<loco::DataType::S32>(values.size());
+  uint32_t idx = 0;
+  for (auto val : values)
+    const_node->at<loco::DataType::S32>(idx++) = val;
+}
+
+uint32_t num_elements(const ShapeU32 shape)
+{
+  uint32_t result = 1;
+  for (auto val : shape)
+    result = result * val;
+  return result;
+}
+
+} // namespace test
+} // namespace luci
diff --git a/compiler/luci/tests/CMakeLists.txt b/compiler/luci/tests/CMakeLists.txt
index c03835823..1333efb7d 100644
--- a/compiler/luci/tests/CMakeLists.txt
+++ b/compiler/luci/tests/CMakeLists.txt
@@ -1,3 +1,14 @@
+set(CIRCLECHEF_FILE_PATH $<TARGET_FILE:circlechef-file>)
+set(TFLCHEF_FILE_PATH $<TARGET_FILE:tflchef-file>)
+set(TFLITE2CIRCLE_PATH $<TARGET_FILE:tflite2circle>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  # TODO use better way to represent path for host executable
+  set(CIRCLECHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/file/circlechef-file)
+  set(TFLCHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/file/tflchef-file)
+  set(TFLITE2CIRCLE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflite2circle/tflite2circle)
+  message(STATUS "TFLITE2CIRCLE_PATH = ${TFLITE2CIRCLE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+
 # TODO use local test.recipe files for small networks
 file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe")
 
@@ -17,14 +28,14 @@ foreach(RECIPE IN ITEMS ${RECIPES})
 
   # Generate .tflite
   add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}"
-                     COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
-                     DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}"
+                     COMMAND ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
+                     DEPENDS ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
                      COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
 
   # Generate .circle
   add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
-                     COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
-                     DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}"
+                     COMMAND ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
+                     DEPENDS ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}"
                      COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
 
   list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -52,14 +63,14 @@ foreach(RECIPE IN ITEMS ${RECIPES})
 
   # Generate .tflite
   add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}"
-                     COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
-                     DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}"
+                     COMMAND ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
+                     DEPENDS ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
                      COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
 
   # Generate .circle
   add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
-                     COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
-                     DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}"
+                     COMMAND ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
+                     DEPENDS ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}"
                      COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
 
   list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -87,8 +98,8 @@ foreach(RECIPE IN ITEMS ${RECIPES2})
 
   # Generate .circle
   add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
-                     COMMAND circlechef-file "${RECIPE_SOURCE_FILE}" "${CIRCLE_OUTPUT_FILE}"
-                     DEPENDS circlechef-file "${RECIPE_SOURCE_FILE}"
+                     COMMAND ${CIRCLECHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${CIRCLE_OUTPUT_FILE}"
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
                      COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
 
   list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -111,6 +122,8 @@ include("test.lst")
 # Read "test.local.lst" if exists
 include("test.local.lst" OPTIONAL)
 
+# NOTE $<TARGET_FILE:luci_readtester> is used as-is as test itself should
+#      run in target device for cross build also
 add_test(NAME luci_unit_readtest
   COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/readverify.sh"
           "${CMAKE_CURRENT_BINARY_DIR}"
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst
index 12dd7ff5b..a88661db3 100644
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -2,6 +2,8 @@ addread(Abs_000)
 addread(Add_000)
 addread(Add_001)
 addread(Add_U8_000)
+addread(Add_STR_000)
+addread(Add_STR_001)
 addread(AddN_000)
 addread(ArgMax_000)
 addread(ArgMax_001)
@@ -37,11 +39,13 @@ addread(Conv2D_003)
 addread(Conv2D_U8_000)
 addread(Conv2D_U8_001)
 addread(Cos_000)
+addread(Densify_000)
 addread(DepthToSpace_000)
 addread(DepthwiseConv2D_000)
 addread(DepthwiseConv2D_U8_000)
 addread(DepthwiseConv2D_U8_001)
 addread(DepthwiseConv2D_001)
+addread(Dequantize_000)
 addread(Div_000)
 addread(ELU_000)
 addread(Equal_000)
@@ -50,6 +54,8 @@ addread(ExpandDims_000)
 addread(ExpandDims_001)
 addread(ExpandDims_002)
 addread(ExpandDims_003)
+addread(ExpandDims_004)
+addread(FakeQuant_000)
 addread(Fill_000)
 addread(Fill_001)
 addread(Floor_000)
@@ -63,8 +69,10 @@ addread(FullyConnected_002)
 addread(FullyConnected_U8_000)
 addread(Gather_000)
 addread(GatherNd_000)
+addread(Gelu_000)
 addread(Greater_000)
 addread(GreaterEqual_000)
+addread(HardSwish_000)
 addread(If_000)
 addread(If_001)
 addread(L2Normalize_000)
@@ -112,6 +120,7 @@ addread(Pad_U8_000)
 addread(PadV2_000)
 addread(Pow_000)
 addread(PRelu_000)
+addread(Quantize_000)
 addread(Range_000)
 addread(Rank_000)
 addread(ReduceAny_000)
@@ -150,6 +159,7 @@ addread(SelectV2_002)
 addread(Shape_000)
 addread(Sin_000)
 addread(Slice_000)
+addread(Slice_001)
 addread(Softmax_000)
 addread(Softmax_U8_000)
 addread(SpaceToBatchND_000)
@@ -165,6 +175,7 @@ addread(Sqrt_000)
 addread(Square_000)
 addread(SquaredDifference_000)
 addread(Squeeze_000)
+addread(Squeeze_001)
 addread(StridedSlice_000)
 addread(StridedSlice_001)
 addread(StridedSlice_002)
@@ -172,6 +183,8 @@ addread(Sub_000)
 addread(Sub_U8_000)
 addread(Sum_000)
 addread(Sum_001)
+addread(SVDF_000)
+addread(SVDF_001)
 addread(Tanh_000)
 addread(Tanh_U8_000)
 addread(Tile_000)
@@ -180,6 +193,9 @@ addread(TopKV2_000)
 addread(TopKV2_001)
 addread(Transpose_000)
 addread(TransposeConv_000)
+addread(UnidirectionalSequenceLSTM_000)
+addread(UnidirectionalSequenceLSTM_001)
+addread(UnidirectionalSequenceLSTM_002)
 addread(Unique_000)
 addread(Unique_001)
 addread(Unique_002)
@@ -216,6 +232,8 @@ addwrite(Abs_000)
 addwrite(Add_000)
 addwrite(Add_001)
 addwrite(Add_U8_000)
+addwrite(Add_STR_000)
+addwrite(Add_STR_001)
 addwrite(AddN_000)
 addwrite(ArgMax_000)
 addwrite(ArgMax_001)
@@ -251,11 +269,13 @@ addwrite(Conv2D_003)
 addwrite(Conv2D_U8_000)
 addwrite(Conv2D_U8_001)
 addwrite(Cos_000)
+addwrite(Densify_000)
 addwrite(DepthToSpace_000)
 addwrite(DepthwiseConv2D_000)
 addwrite(DepthwiseConv2D_U8_000)
 addwrite(DepthwiseConv2D_U8_001)
 addwrite(DepthwiseConv2D_001)
+addwrite(Dequantize_000)
 addwrite(Div_000)
 addwrite(ELU_000)
 addwrite(Equal_000)
@@ -264,6 +284,8 @@ addwrite(ExpandDims_000)
 addwrite(ExpandDims_001)
 addwrite(ExpandDims_002)
 addwrite(ExpandDims_003)
+addwrite(ExpandDims_004)
+addwrite(FakeQuant_000)
 addwrite(Fill_000)
 addwrite(Fill_001)
 addwrite(Floor_000)
@@ -277,8 +299,10 @@ addwrite(FullyConnected_002)
 addwrite(FullyConnected_U8_000)
 addwrite(Gather_000)
 addwrite(GatherNd_000)
+addwrite(Gelu_000)
 addwrite(Greater_000)
 addwrite(GreaterEqual_000)
+addwrite(HardSwish_000)
 addwrite(If_000)
 addwrite(If_001)
 addwrite(L2Normalize_000)
@@ -325,6 +349,7 @@ addwrite(Pad_000)
 addwrite(PadV2_000)
 addwrite(Pow_000)
 addwrite(PRelu_000)
+addwrite(Quantize_000)
 addwrite(Range_000)
 addwrite(Rank_000)
 addwrite(ReduceAny_000)
@@ -363,6 +388,7 @@ addwrite(SelectV2_002)
 addwrite(Shape_000)
 addwrite(Sin_000)
 addwrite(Slice_000)
+addwrite(Slice_001)
 addwrite(Softmax_000)
 addwrite(Softmax_U8_000)
 addwrite(SpaceToBatchND_000)
@@ -378,6 +404,7 @@ addwrite(Sqrt_000)
 addwrite(Square_000)
 addwrite(SquaredDifference_000)
 addwrite(Squeeze_000)
+addwrite(Squeeze_001)
 addwrite(StridedSlice_000)
 addwrite(StridedSlice_001)
 addwrite(StridedSlice_002)
@@ -385,6 +412,8 @@ addwrite(Sub_000)
 addwrite(Sub_U8_000)
 addwrite(Sum_000)
 addwrite(Sum_001)
+addwrite(SVDF_000)
+addwrite(SVDF_001)
 addwrite(Tanh_000)
 addwrite(Tanh_U8_000)
 addwrite(Tile_000)
@@ -393,6 +422,9 @@ addwrite(TopKV2_000)
 addwrite(TopKV2_001)
 addwrite(Transpose_000)
 addwrite(TransposeConv_000)
+addwrite(UnidirectionalSequenceLSTM_000)
+addwrite(UnidirectionalSequenceLSTM_001)
+addwrite(UnidirectionalSequenceLSTM_002)
 addwrite(Unique_000)
 addwrite(Unique_001)
 addwrite(Unique_002)
diff --git a/compiler/mio-circle/CMakeLists.txt b/compiler/mio-circle/CMakeLists.txt
index 9c1126d6f..d24717343 100644
--- a/compiler/mio-circle/CMakeLists.txt
+++ b/compiler/mio-circle/CMakeLists.txt
@@ -1,13 +1,14 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
 
 if(NOT FlatBuffers_FOUND)
+  message(STATUS "mio-circle skip: FlatBuffers 2.0 NOT FOUND")
   return()
 endif(NOT FlatBuffers_FOUND)
 
 message(STATUS "Build mio-circle: TRUE")
 
 # TODO Find a better way
-set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.3/circle_schema.fbs")
 
 # NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
 add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
@@ -26,3 +27,10 @@ FlatBuffers_Target(mio_circle
 # This example shows how to use "mio-circle" library
 add_executable(mio_circle_example example.cpp)
 target_link_libraries(mio_circle_example mio_circle)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(mio_circle_helper STATIC ${SOURCES})
+target_include_directories(mio_circle_helper PRIVATE src)
+target_include_directories(mio_circle_helper PUBLIC include)
+target_link_libraries(mio_circle_helper mio_circle)
diff --git a/compiler/mio-circle/include/mio_circle/Helper.h b/compiler/mio-circle/include/mio_circle/Helper.h
new file mode 100644
index 000000000..c0f8115fe
--- /dev/null
+++ b/compiler/mio-circle/include/mio_circle/Helper.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE_HELPER_H__
+#define __MIO_CIRCLE_HELPER_H__
+
+#include <mio/circle/schema_generated.h>
+
+namespace mio
+{
+namespace circle
+{
+
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE_HELPER_H__
diff --git a/compiler/mio-circle/src/Helper.cpp b/compiler/mio-circle/src/Helper.cpp
new file mode 100644
index 000000000..6f30c8c10
--- /dev/null
+++ b/compiler/mio-circle/src/Helper.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return (::circle::BuiltinOperator_MIN <= code && code <= ::circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+  static const char *kEmptyTensorName = "(noname)";
+
+  auto name = tensor->name();
+  if (name)
+    return name->c_str();
+
+  return kEmptyTensorName;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/CMakeLists.txt b/compiler/mio-circle04/CMakeLists.txt
new file mode 100644
index 000000000..8ee6da44c
--- /dev/null
+++ b/compiler/mio-circle04/CMakeLists.txt
@@ -0,0 +1,52 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "mio-circle04 skip: FlatBuffers 2.0 NOT FOUND")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+message(STATUS "Build mio-circle04: TRUE")
+
+# TODO Find a better way
+# TODO use nnpackage
+# set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.4/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_circle04
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+# This example shows how to use "mio-circle04" library
+add_executable(mio_circle04_example example.cpp)
+target_link_libraries(mio_circle04_example mio_circle04)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_circle04_helper STATIC ${SOURCES})
+set_target_properties(mio_circle04_helper PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mio_circle04_helper PRIVATE src)
+target_include_directories(mio_circle04_helper PUBLIC include)
+target_link_libraries(mio_circle04_helper mio_circle04)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_circle04_helper_test ${TESTS})
+target_include_directories(mio_circle04_helper_test PRIVATE src)
+target_link_libraries(mio_circle04_helper_test mio_circle04)
+target_link_libraries(mio_circle04_helper_test mio_circle04_helper)
diff --git a/compiler/mio-circle04/README.md b/compiler/mio-circle04/README.md
new file mode 100644
index 000000000..d12dd78ff
--- /dev/null
+++ b/compiler/mio-circle04/README.md
@@ -0,0 +1,3 @@
+# mio-circle04
+
+Let's make it easy to read and write Circle models.
diff --git a/compiler/mio-circle04/example.cpp b/compiler/mio-circle04/example.cpp
new file mode 100644
index 000000000..1970f4066
--- /dev/null
+++ b/compiler/mio-circle04/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-circle04"
+//
+#include <mio/circle/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-circle04/include/mio_circle/Helper.h b/compiler/mio-circle04/include/mio_circle/Helper.h
new file mode 100644
index 000000000..7a1ba2b2f
--- /dev/null
+++ b/compiler/mio-circle04/include/mio_circle/Helper.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE04_HELPER_H__
+#define __MIO_CIRCLE04_HELPER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <vector>
+
+namespace mio
+{
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+  if (flat_array == nullptr)
+  {
+    throw std::runtime_error("flat array is nullptr");
+  }
+
+  std::vector<T> ret(flat_array->Length());
+  for (uint32_t i = 0; i < flat_array->Length(); i++)
+  {
+    ret[i] = flat_array->Get(i);
+  }
+  return ret;
+}
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE04_HELPER_H__
diff --git a/compiler/mio-circle04/include/mio_circle/Reader.h b/compiler/mio-circle04/include/mio_circle/Reader.h
new file mode 100644
index 000000000..630646732
--- /dev/null
+++ b/compiler/mio-circle04/include/mio_circle/Reader.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE04_READER_H__
+#define __MIO_CIRCLE04_READER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+// NOTE Reader class originated from circledump and for circle-tensordump
+//      where this class has more work to be done for stability
+//      as the tools are for developers not customores.
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SubGraph>>;
+  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Buffer>>;
+  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Tensor>>;
+  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Operator>>;
+  using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Metadata>>;
+  using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SignatureDef>>;
+
+public:
+  Reader(const ::circle::Model *model);
+
+  Reader() = delete;
+
+public:
+  uint32_t version() const { return _version; }
+
+  const std::vector<const ::circle::OperatorCode *> &opcodes() { return _op_codes; }
+  const CircleBuffers_t *buffers() { return _buffers; }
+  const CircleTensors_t *tensors() { return _tensors; }
+  const CircleOperators_t *operators() { return _operators; }
+  const std::vector<int32_t> &inputs() const { return _inputs; }
+  const std::vector<int32_t> &outputs() const { return _outputs; }
+  const ::circle::DataFormat &data_format() const { return _data_format; }
+  const CircleMetadata_t *metadata() const { return _metadata; }
+  const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
+
+  uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+  ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const;
+  std::string opcode_name(const ::circle::Operator *op) const;
+  std::vector<int32_t> outputs(const ::circle::Operator *op) const;
+  std::string tensor_name(const ::circle::Tensor *tensor) const;
+  std::string tensor_dtype(const ::circle::Tensor *tensor) const;
+
+public:
+  bool select_subgraph(uint32_t subgraph);
+  const std::string &subgraph_name(void) const { return _subgraph_name; }
+  uint32_t subgraph_index(void) const { return _subgraph_index; }
+
+private:
+  uint32_t _version;
+
+  const CircleSubGraphs_t *_subgraphs{nullptr};
+  const CircleBuffers_t *_buffers{nullptr};
+  const CircleTensors_t *_tensors{nullptr};
+  const CircleOperators_t *_operators{nullptr};
+  const CircleMetadata_t *_metadata{nullptr};
+  const CircleSignatureDef_t *_signature_defs{nullptr};
+
+  uint32_t _subgraph_index = 0;
+  std::string _subgraph_name;
+  std::vector<const ::circle::OperatorCode *> _op_codes;
+  std::vector<int32_t> _inputs;
+  std::vector<int32_t> _outputs;
+  ::circle::DataFormat _data_format = ::circle::DataFormat::DataFormat_CHANNELS_FIRST;
+};
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE04_READER_H__
diff --git a/compiler/mio-circle04/src/Helper.cpp b/compiler/mio-circle04/src/Helper.cpp
new file mode 100644
index 000000000..8b8737a2d
--- /dev/null
+++ b/compiler/mio-circle04/src/Helper.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * This will provide v3/v3a/v3b format neutral BuiltinOperator
+ * NOTE circle has minus value opcode (252~254 as uint8_t)
+ *      we cannot use std::max() like tflite as deprecated_builtin_code can be
+ *      minus and builtin_code being 0 for v0.3 files.
+ */
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  if (opcode->deprecated_builtin_code() == 127)
+  {
+    assert(opcode->builtin_code() >= 127);
+    return opcode->builtin_code();
+  }
+  // There was no 255(-1) value in v0.3
+  assert(opcode->deprecated_builtin_code() != -1);
+  return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  // Valid Range : BuiltinOperator_MIN <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < ::circle::BuiltinOperator_MIN)
+    return false;
+  // There was no 255(-1) value in v0.3
+  if (deprecated_builtin_code == -1)
+    return false;
+
+  const ::circle::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::circle::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::circle::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+  if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+    return "(noname)";
+
+  return tensor->name()->c_str();
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/src/Helper.test.cpp b/compiler/mio-circle04/src/Helper.test.cpp
new file mode 100644
index 000000000..20fce0843
--- /dev/null
+++ b/compiler/mio-circle04/src/Helper.test.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_circle04_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(circle::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         circle::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(circle::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const circle::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return circle::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle04_helper_test, v04)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom_old)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-circle04/src/Reader.cpp b/compiler/mio-circle04/src/Reader.cpp
new file mode 100644
index 000000000..880ffaec8
--- /dev/null
+++ b/compiler/mio-circle04/src/Reader.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+#include "mio_circle/Helper.h"
+
+#include <sstream>
+#include <string>
+
+namespace mio
+{
+namespace circle
+{
+
+Reader::Reader(const ::circle::Model *model)
+{
+  if (model == nullptr)
+  {
+    throw std::runtime_error("Invalid model");
+  }
+
+  _version = model->version();
+  _subgraphs = model->subgraphs();
+  _buffers = model->buffers();
+  _metadata = model->metadata();
+  _signature_defs = model->signature_defs();
+
+  auto opcodes = model->operator_codes();
+  for (const ::circle::OperatorCode *opcode : *opcodes)
+  {
+    _op_codes.push_back(opcode);
+  }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+  if (buff_data != nullptr)
+  {
+    *buff_data = nullptr;
+  }
+
+  if (buf_idx == 0)
+    return 0;
+
+  if (auto *buffer = (*_buffers)[buf_idx])
+  {
+    if (auto *array = buffer->data())
+    {
+      if (size_t size = array->size())
+      {
+        if (buff_data != nullptr)
+        {
+          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+        }
+        return size;
+      }
+    }
+  }
+
+  return 0;
+}
+
+::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  return mio::circle::builtin_code_neutral(opcode);
+}
+
+std::string Reader::opcode_name(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  if (!mio::circle::is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid: " << index << ")";
+    return oss.str();
+  }
+
+  return mio::circle::opcode_name(opcode);
+}
+
+std::vector<int32_t> Reader::outputs(const ::circle::Operator *op) const
+{
+  return as_index_vector(op->outputs());
+}
+
+std::string Reader::tensor_name(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_name(tensor);
+}
+
+std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_type(tensor);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+  _subgraph_index = sgindex;
+  _tensors = nullptr;
+  _operators = nullptr;
+
+  _inputs.clear();
+  _outputs.clear();
+
+  if (_subgraphs->Length() <= sgindex)
+  {
+    assert(false);
+    return false;
+  }
+
+  const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+  auto name = subgraph->name();
+  _subgraph_name = name ? name->c_str() : "(noname)";
+
+  _tensors = subgraph->tensors();
+  _operators = subgraph->operators();
+  _data_format = subgraph->data_format();
+
+  _inputs = as_index_vector(subgraph->inputs());
+  _outputs = as_index_vector(subgraph->outputs());
+
+  return true;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/src/Reader.test.cpp b/compiler/mio-circle04/src/Reader.test.cpp
new file mode 100644
index 000000000..104454a62
--- /dev/null
+++ b/compiler/mio-circle04/src/Reader.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+class mio_circle04_reader_test : public ::testing::Test
+{
+protected:
+  void initialization_emty(void)
+  {
+    _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec);
+    circle::FinishModelBuffer(_fbb, _model);
+  }
+
+  const circle::Model *circleModel(void)
+  {
+    auto ptr = _fbb.GetBufferPointer();
+    return circle::GetModel(ptr);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  flatbuffers::Offset<circle::Model> _model;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle04_reader_test, null_Model_NEG)
+{
+  EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error);
+}
+
+TEST_F(mio_circle04_reader_test, empty_Model)
+{
+  initialization_emty();
+
+  const circle::Model *model = circleModel();
+  EXPECT_NE(nullptr, model);
+
+  mio::circle::Reader reader(model);
+
+  SUCCEED();
+}
+
+// TODO add more tests
diff --git a/compiler/mio-circle05/CMakeLists.txt b/compiler/mio-circle05/CMakeLists.txt
new file mode 100644
index 000000000..dfd359eaa
--- /dev/null
+++ b/compiler/mio-circle05/CMakeLists.txt
@@ -0,0 +1,52 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "mio-circle05 skip: FlatBuffers 2.0 NOT FOUND")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+message(STATUS "Build mio-circle05: TRUE")
+
+# TODO Find a better way
+# TODO use nnpackage
+# set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.5/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_circle05
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+# This example shows how to use "mio-circle05" library
+add_executable(mio_circle05_example example.cpp)
+target_link_libraries(mio_circle05_example mio_circle05)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_circle05_helper STATIC ${SOURCES})
+set_target_properties(mio_circle05_helper PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mio_circle05_helper PRIVATE src)
+target_include_directories(mio_circle05_helper PUBLIC include)
+target_link_libraries(mio_circle05_helper mio_circle05)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_circle05_helper_test ${TESTS})
+target_include_directories(mio_circle05_helper_test PRIVATE src)
+target_link_libraries(mio_circle05_helper_test mio_circle05)
+target_link_libraries(mio_circle05_helper_test mio_circle05_helper)
diff --git a/compiler/mio-circle05/README.md b/compiler/mio-circle05/README.md
new file mode 100644
index 000000000..929643658
--- /dev/null
+++ b/compiler/mio-circle05/README.md
@@ -0,0 +1,3 @@
+# mio-circle05
+
+Let's make it easy to read and write Circle models.
diff --git a/compiler/mio-circle05/example.cpp b/compiler/mio-circle05/example.cpp
new file mode 100644
index 000000000..31cd3fbe5
--- /dev/null
+++ b/compiler/mio-circle05/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-circle05"
+//
+#include <mio/circle/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-circle05/include/mio_circle/Helper.h b/compiler/mio-circle05/include/mio_circle/Helper.h
new file mode 100644
index 000000000..933f38587
--- /dev/null
+++ b/compiler/mio-circle05/include/mio_circle/Helper.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE05_HELPER_H__
+#define __MIO_CIRCLE05_HELPER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <vector>
+
+namespace mio
+{
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+  if (flat_array == nullptr)
+  {
+    throw std::runtime_error("flat array is nullptr");
+  }
+
+  std::vector<T> ret(flat_array->Length());
+  for (uint32_t i = 0; i < flat_array->Length(); i++)
+  {
+    ret[i] = flat_array->Get(i);
+  }
+  return ret;
+}
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE05_HELPER_H__
diff --git a/compiler/mio-circle05/include/mio_circle/Reader.h b/compiler/mio-circle05/include/mio_circle/Reader.h
new file mode 100644
index 000000000..d751a30c6
--- /dev/null
+++ b/compiler/mio-circle05/include/mio_circle/Reader.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE05_READER_H__
+#define __MIO_CIRCLE05_READER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+// NOTE Reader class originated from circledump and for circle-tensordump
+//      where this class has more work to be done for stability
+//      as the tools are for developers not customores.
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SubGraph>>;
+  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Buffer>>;
+  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Tensor>>;
+  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Operator>>;
+  using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Metadata>>;
+  using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SignatureDef>>;
+
+public:
+  Reader(const ::circle::Model *model);
+
+  Reader() = delete;
+
+public:
+  uint32_t version() const { return _version; }
+
+  const std::vector<const ::circle::OperatorCode *> &opcodes() { return _op_codes; }
+  const CircleBuffers_t *buffers() { return _buffers; }
+  const CircleTensors_t *tensors() { return _tensors; }
+  const CircleOperators_t *operators() { return _operators; }
+  const std::vector<int32_t> &inputs() const { return _inputs; }
+  const std::vector<int32_t> &outputs() const { return _outputs; }
+  const ::circle::DataFormat &data_format() const { return _data_format; }
+  const CircleMetadata_t *metadata() const { return _metadata; }
+  const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
+
+  uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+  ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const;
+  std::string opcode_name(const ::circle::Operator *op) const;
+  std::vector<int32_t> outputs(const ::circle::Operator *op) const;
+  std::string tensor_name(const ::circle::Tensor *tensor) const;
+  std::string tensor_dtype(const ::circle::Tensor *tensor) const;
+
+public:
+  bool select_subgraph(uint32_t subgraph);
+  const std::string &subgraph_name(void) const { return _subgraph_name; }
+  uint32_t subgraph_index(void) const { return _subgraph_index; }
+
+private:
+  uint32_t _version;
+
+  const CircleSubGraphs_t *_subgraphs{nullptr};
+  const CircleBuffers_t *_buffers{nullptr};
+  const CircleTensors_t *_tensors{nullptr};
+  const CircleOperators_t *_operators{nullptr};
+  const CircleMetadata_t *_metadata{nullptr};
+  const CircleSignatureDef_t *_signature_defs{nullptr};
+
+  uint32_t _subgraph_index = 0;
+  std::string _subgraph_name;
+  std::vector<const ::circle::OperatorCode *> _op_codes;
+  std::vector<int32_t> _inputs;
+  std::vector<int32_t> _outputs;
+  ::circle::DataFormat _data_format = ::circle::DataFormat::DataFormat_CHANNELS_FIRST;
+};
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE05_READER_H__
diff --git a/compiler/mio-circle05/src/Helper.cpp b/compiler/mio-circle05/src/Helper.cpp
new file mode 100644
index 000000000..bbfa2041a
--- /dev/null
+++ b/compiler/mio-circle05/src/Helper.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * This will provide v3/v3a/v3b format neutral BuiltinOperator
+ * NOTE circle has minus value opcode (252~254 as uint8_t)
+ *      we cannot use std::max() like tflite as deprecated_builtin_code can be
+ *      minus and builtin_code being 0 for v0.3 files.
+ */
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  if (opcode->deprecated_builtin_code() == 127)
+  {
+    assert(opcode->builtin_code() >= 127);
+    return opcode->builtin_code();
+  }
+  // There was no 255(-1) value in v0.3
+  assert(opcode->deprecated_builtin_code() != -1);
+  return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  // Valid Range : BuiltinOperator_MIN <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < ::circle::BuiltinOperator_MIN)
+    return false;
+  // There was no 255(-1) value in v0.3
+  if (deprecated_builtin_code == -1)
+    return false;
+
+  const ::circle::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::circle::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::circle::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+  if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+    return "(noname)";
+
+  return tensor->name()->c_str();
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle05/src/Helper.test.cpp b/compiler/mio-circle05/src/Helper.test.cpp
new file mode 100644
index 000000000..be63688df
--- /dev/null
+++ b/compiler/mio-circle05/src/Helper.test.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_circle05_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(circle::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         circle::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(circle::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const circle::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return circle::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle05_helper_test, v05)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle05_helper_test, v05_custom_old)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle05_helper_test, v05_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle05_helper_test, v05_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle05_helper_test, v05_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle05_helper_test, v05_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle05_helper_test, v05_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle05_helper_test, v05_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle05_helper_test, v05_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-circle05/src/Reader.cpp b/compiler/mio-circle05/src/Reader.cpp
new file mode 100644
index 000000000..0ee22db14
--- /dev/null
+++ b/compiler/mio-circle05/src/Reader.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+#include "mio_circle/Helper.h"
+
+#include <sstream>
+#include <string>
+
+namespace mio
+{
+namespace circle
+{
+
+Reader::Reader(const ::circle::Model *model)
+{
+  if (model == nullptr)
+  {
+    throw std::runtime_error("Invalid model");
+  }
+
+  _version = model->version();
+  _subgraphs = model->subgraphs();
+  _buffers = model->buffers();
+  _metadata = model->metadata();
+  _signature_defs = model->signature_defs();
+
+  auto opcodes = model->operator_codes();
+  for (const ::circle::OperatorCode *opcode : *opcodes)
+  {
+    _op_codes.push_back(opcode);
+  }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+  if (buff_data != nullptr)
+  {
+    *buff_data = nullptr;
+  }
+
+  if (buf_idx == 0)
+    return 0;
+
+  if (auto *buffer = (*_buffers)[buf_idx])
+  {
+    if (auto *array = buffer->data())
+    {
+      if (size_t size = array->size())
+      {
+        if (buff_data != nullptr)
+        {
+          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+        }
+        return size;
+      }
+    }
+  }
+
+  return 0;
+}
+
+::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  return mio::circle::builtin_code_neutral(opcode);
+}
+
+std::string Reader::opcode_name(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  if (!mio::circle::is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid: " << index << ")";
+    return oss.str();
+  }
+
+  return mio::circle::opcode_name(opcode);
+}
+
+std::vector<int32_t> Reader::outputs(const ::circle::Operator *op) const
+{
+  return as_index_vector(op->outputs());
+}
+
+std::string Reader::tensor_name(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_name(tensor);
+}
+
+std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_type(tensor);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+  _subgraph_index = sgindex;
+  _tensors = nullptr;
+  _operators = nullptr;
+
+  _inputs.clear();
+  _outputs.clear();
+
+  if (_subgraphs->Length() <= sgindex)
+  {
+    assert(false);
+    return false;
+  }
+
+  const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+  auto name = subgraph->name();
+  _subgraph_name = name ? name->c_str() : "(noname)";
+
+  _tensors = subgraph->tensors();
+  _operators = subgraph->operators();
+  _data_format = subgraph->data_format();
+
+  _inputs = as_index_vector(subgraph->inputs());
+  _outputs = as_index_vector(subgraph->outputs());
+
+  return true;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle05/src/Reader.test.cpp b/compiler/mio-circle05/src/Reader.test.cpp
new file mode 100644
index 000000000..0c60999f4
--- /dev/null
+++ b/compiler/mio-circle05/src/Reader.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+class mio_circle05_reader_test : public ::testing::Test
+{
+protected:
+  void initialization_emty(void)
+  {
+    _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec);
+    circle::FinishModelBuffer(_fbb, _model);
+  }
+
+  const circle::Model *circleModel(void)
+  {
+    auto ptr = _fbb.GetBufferPointer();
+    return circle::GetModel(ptr);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  flatbuffers::Offset<circle::Model> _model;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle05_reader_test, null_Model_NEG)
+{
+  EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error);
+}
+
+TEST_F(mio_circle05_reader_test, empty_Model)
+{
+  initialization_emty();
+
+  const circle::Model *model = circleModel();
+  EXPECT_NE(nullptr, model);
+
+  mio::circle::Reader reader(model);
+
+  SUCCEED();
+}
+
+// TODO add more tests
diff --git a/compiler/mio-circle06/CMakeLists.txt b/compiler/mio-circle06/CMakeLists.txt
new file mode 100644
index 000000000..2ccd8059c
--- /dev/null
+++ b/compiler/mio-circle06/CMakeLists.txt
@@ -0,0 +1,52 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "mio-circle06 skip: FlatBuffers 2.0 NOT FOUND")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+message(STATUS "Build mio-circle06: TRUE")
+
+# TODO Find a better way
+# TODO use nnpackage
+# set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.6/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_circle06
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+# This example shows how to use "mio-circle06" library
+add_executable(mio_circle06_example example.cpp)
+target_link_libraries(mio_circle06_example mio_circle06)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_circle06_helper STATIC ${SOURCES})
+set_target_properties(mio_circle06_helper PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mio_circle06_helper PRIVATE src)
+target_include_directories(mio_circle06_helper PUBLIC include)
+target_link_libraries(mio_circle06_helper mio_circle06)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_circle06_helper_test ${TESTS})
+target_include_directories(mio_circle06_helper_test PRIVATE src)
+target_link_libraries(mio_circle06_helper_test mio_circle06)
+target_link_libraries(mio_circle06_helper_test mio_circle06_helper)
diff --git a/compiler/mio-circle06/README.md b/compiler/mio-circle06/README.md
new file mode 100644
index 000000000..c84296416
--- /dev/null
+++ b/compiler/mio-circle06/README.md
@@ -0,0 +1,3 @@
+# mio-circle06
+
+Let's make it easy to read and write Circle models.
diff --git a/compiler/mio-circle06/example.cpp b/compiler/mio-circle06/example.cpp
new file mode 100644
index 000000000..e99e45429
--- /dev/null
+++ b/compiler/mio-circle06/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-circle06"
+//
+#include <mio/circle/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-circle06/include/mio_circle/Helper.h b/compiler/mio-circle06/include/mio_circle/Helper.h
new file mode 100644
index 000000000..55cab5872
--- /dev/null
+++ b/compiler/mio-circle06/include/mio_circle/Helper.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE06_HELPER_H__
+#define __MIO_CIRCLE06_HELPER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <vector>
+
+namespace mio
+{
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+  if (flat_array == nullptr)
+  {
+    throw std::runtime_error("flat array is nullptr");
+  }
+
+  std::vector<T> ret(flat_array->Length());
+  for (uint32_t i = 0; i < flat_array->Length(); i++)
+  {
+    ret[i] = flat_array->Get(i);
+  }
+  return ret;
+}
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE06_HELPER_H__
diff --git a/compiler/mio-circle06/include/mio_circle/Reader.h b/compiler/mio-circle06/include/mio_circle/Reader.h
new file mode 100644
index 000000000..357061480
--- /dev/null
+++ b/compiler/mio-circle06/include/mio_circle/Reader.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE06_READER_H__
+#define __MIO_CIRCLE06_READER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+// NOTE Reader class originated from circledump and for circle-tensordump
+//      where this class has more work to be done for stability
+//      as the tools are for developers not customores.
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SubGraph>>;
+  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Buffer>>;
+  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Tensor>>;
+  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Operator>>;
+  using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Metadata>>;
+  using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SignatureDef>>;
+
+public:
+  Reader(const ::circle::Model *model);
+
+  Reader() = delete;
+
+public:
+  uint32_t version() const { return _version; }
+
+  const std::vector<const ::circle::OperatorCode *> &opcodes() { return _op_codes; }
+  const CircleBuffers_t *buffers() { return _buffers; }
+  const CircleTensors_t *tensors() { return _tensors; }
+  const CircleOperators_t *operators() { return _operators; }
+  const std::vector<int32_t> &inputs() const { return _inputs; }
+  const std::vector<int32_t> &outputs() const { return _outputs; }
+  const ::circle::DataFormat &data_format() const { return _data_format; }
+  const CircleMetadata_t *metadata() const { return _metadata; }
+  const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
+
+  uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+  ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const;
+  std::string opcode_name(const ::circle::Operator *op) const;
+  std::vector<int32_t> outputs(const ::circle::Operator *op) const;
+  std::string tensor_name(const ::circle::Tensor *tensor) const;
+  std::string tensor_dtype(const ::circle::Tensor *tensor) const;
+
+public:
+  bool select_subgraph(uint32_t subgraph);
+  const std::string &subgraph_name(void) const { return _subgraph_name; }
+  uint32_t subgraph_index(void) const { return _subgraph_index; }
+
+private:
+  uint32_t _version;
+
+  const CircleSubGraphs_t *_subgraphs{nullptr};
+  const CircleBuffers_t *_buffers{nullptr};
+  const CircleTensors_t *_tensors{nullptr};
+  const CircleOperators_t *_operators{nullptr};
+  const CircleMetadata_t *_metadata{nullptr};
+  const CircleSignatureDef_t *_signature_defs{nullptr};
+
+  uint32_t _subgraph_index = 0;
+  std::string _subgraph_name;
+  std::vector<const ::circle::OperatorCode *> _op_codes;
+  std::vector<int32_t> _inputs;
+  std::vector<int32_t> _outputs;
+  ::circle::DataFormat _data_format = ::circle::DataFormat::DataFormat_CHANNELS_FIRST;
+};
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE06_READER_H__
diff --git a/compiler/mio-circle06/src/Helper.cpp b/compiler/mio-circle06/src/Helper.cpp
new file mode 100644
index 000000000..bbfa2041a
--- /dev/null
+++ b/compiler/mio-circle06/src/Helper.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * This will provide v3/v3a/v3b format neutral BuiltinOperator
+ * NOTE circle has minus value opcode (252~254 as uint8_t)
+ *      we cannot use std::max() like tflite as deprecated_builtin_code can be
+ *      minus and builtin_code being 0 for v0.3 files.
+ */
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  if (opcode->deprecated_builtin_code() == 127)
+  {
+    assert(opcode->builtin_code() >= 127);
+    return opcode->builtin_code();
+  }
+  // There was no 255(-1) value in v0.3
+  assert(opcode->deprecated_builtin_code() != -1);
+  return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  // Valid Range : BuiltinOperator_MIN <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < ::circle::BuiltinOperator_MIN)
+    return false;
+  // There was no 255(-1) value in v0.3
+  if (deprecated_builtin_code == -1)
+    return false;
+
+  const ::circle::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::circle::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::circle::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+  if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+    return "(noname)";
+
+  return tensor->name()->c_str();
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle06/src/Helper.test.cpp b/compiler/mio-circle06/src/Helper.test.cpp
new file mode 100644
index 000000000..9b158d198
--- /dev/null
+++ b/compiler/mio-circle06/src/Helper.test.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_circle06_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(circle::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         circle::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(circle::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const circle::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return circle::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle06_helper_test, v06)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle06_helper_test, v06_custom_old)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle06_helper_test, v06_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle06_helper_test, v06_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle06_helper_test, v06_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle06_helper_test, v06_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle06_helper_test, v06_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle06_helper_test, v06_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle06_helper_test, v06_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-circle06/src/Reader.cpp b/compiler/mio-circle06/src/Reader.cpp
new file mode 100644
index 000000000..0ee22db14
--- /dev/null
+++ b/compiler/mio-circle06/src/Reader.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+#include "mio_circle/Helper.h"
+
+#include <sstream>
+#include <string>
+
+namespace mio
+{
+namespace circle
+{
+
+Reader::Reader(const ::circle::Model *model)
+{
+  if (model == nullptr)
+  {
+    throw std::runtime_error("Invalid model");
+  }
+
+  _version = model->version();
+  _subgraphs = model->subgraphs();
+  _buffers = model->buffers();
+  _metadata = model->metadata();
+  _signature_defs = model->signature_defs();
+
+  auto opcodes = model->operator_codes();
+  for (const ::circle::OperatorCode *opcode : *opcodes)
+  {
+    _op_codes.push_back(opcode);
+  }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+  if (buff_data != nullptr)
+  {
+    *buff_data = nullptr;
+  }
+
+  if (buf_idx == 0)
+    return 0;
+
+  if (auto *buffer = (*_buffers)[buf_idx])
+  {
+    if (auto *array = buffer->data())
+    {
+      if (size_t size = array->size())
+      {
+        if (buff_data != nullptr)
+        {
+          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+        }
+        return size;
+      }
+    }
+  }
+
+  return 0;
+}
+
+::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  return mio::circle::builtin_code_neutral(opcode);
+}
+
+std::string Reader::opcode_name(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  if (!mio::circle::is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid: " << index << ")";
+    return oss.str();
+  }
+
+  return mio::circle::opcode_name(opcode);
+}
+
+std::vector<int32_t> Reader::outputs(const ::circle::Operator *op) const
+{
+  return as_index_vector(op->outputs());
+}
+
+std::string Reader::tensor_name(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_name(tensor);
+}
+
+std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_type(tensor);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+  _subgraph_index = sgindex;
+  _tensors = nullptr;
+  _operators = nullptr;
+
+  _inputs.clear();
+  _outputs.clear();
+
+  if (_subgraphs->Length() <= sgindex)
+  {
+    assert(false);
+    return false;
+  }
+
+  const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+  auto name = subgraph->name();
+  _subgraph_name = name ? name->c_str() : "(noname)";
+
+  _tensors = subgraph->tensors();
+  _operators = subgraph->operators();
+  _data_format = subgraph->data_format();
+
+  _inputs = as_index_vector(subgraph->inputs());
+  _outputs = as_index_vector(subgraph->outputs());
+
+  return true;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle06/src/Reader.test.cpp b/compiler/mio-circle06/src/Reader.test.cpp
new file mode 100644
index 000000000..668a8b1d9
--- /dev/null
+++ b/compiler/mio-circle06/src/Reader.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+class mio_circle06_reader_test : public ::testing::Test
+{
+protected:
+  void initialization_emty(void)
+  {
+    _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec);
+    circle::FinishModelBuffer(_fbb, _model);
+  }
+
+  const circle::Model *circleModel(void)
+  {
+    auto ptr = _fbb.GetBufferPointer();
+    return circle::GetModel(ptr);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  flatbuffers::Offset<circle::Model> _model;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle06_reader_test, null_Model_NEG)
+{
+  EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error);
+}
+
+TEST_F(mio_circle06_reader_test, empty_Model)
+{
+  initialization_emty();
+
+  const circle::Model *model = circleModel();
+  EXPECT_NE(nullptr, model);
+
+  mio::circle::Reader reader(model);
+
+  SUCCEED();
+}
+
+// TODO add more tests
diff --git a/compiler/mio-tf/CMakeLists.txt b/compiler/mio-tf/CMakeLists.txt
index d670f6bab..133d4684a 100644
--- a/compiler/mio-tf/CMakeLists.txt
+++ b/compiler/mio-tf/CMakeLists.txt
@@ -1,6 +1,6 @@
 nnas_find_package(Protobuf QUIET)
 # TensorFlowSource package is used to use ~.proto files
-nnas_find_package(TensorFlowSource EXACT 1.12 QUIET)
+nnas_find_package(TensorFlowSource EXACT 2.3 QUIET)
 
 if(NOT Protobuf_FOUND)
   return()
diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt
index 9ef2859b9..90187b037 100644
--- a/compiler/mio-tflite/CMakeLists.txt
+++ b/compiler/mio-tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
 
 if(NOT FlatBuffers_FOUND)
   message(STATUS "Build mio-tflite: FAILED (missing Flatbuffers)")
@@ -36,3 +36,13 @@ target_link_libraries(mio_tflite_example mio_tflite)
 # TODO provide full tflite validation with runtime/interpreter
 add_executable(mio_tflite_validate example.cpp)
 target_link_libraries(mio_tflite_validate mio_tflite)
+
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+  return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+add_library(mio_tflite_inc INTERFACE)
+target_include_directories(mio_tflite_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(mio_tflite_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
diff --git a/compiler/mio-tflite/README.md b/compiler/mio-tflite/README.md
index 187b1a5c6..c717ab877 100644
--- a/compiler/mio-tflite/README.md
+++ b/compiler/mio-tflite/README.md
@@ -1,3 +1,5 @@
 # mio-tflite
 
 _mio-tflite_ provides a library to access TensorFlow lite model files
+
+NOTE: _mio-tflite_ is currently obsolete
diff --git a/compiler/mio-tflite2121/CMakeLists.txt b/compiler/mio-tflite2121/CMakeLists.txt
new file mode 100644
index 000000000..1ca8e7581
--- /dev/null
+++ b/compiler/mio-tflite2121/CMakeLists.txt
@@ -0,0 +1,60 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "Build mio-tflite2121: FAILED (missing Flatbuffers 2.0)")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 2.12.1 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+  message(STATUS "Build mio-tflite2121: FAILED (missing TensorFlowSource 2.12.1)")
+  return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tflite2121: TRUE")
+message(STATUS "Build mio-tflite2121: with ${TensorFlowSource_DIR}")
+
+set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs")
+
+# NOTE Use copy of schema.fbs as to provide unified way for circle also
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_tflite2121
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+add_executable(mio_tflite2121_example example.cpp)
+target_link_libraries(mio_tflite2121_example mio_tflite2121)
+
+# Temporay tflite validation tool to replace nnkit-tflite
+# TODO provide full tflite validation with runtime/interpreter
+add_executable(mio_tflite2121_validate example.cpp)
+target_link_libraries(mio_tflite2121_validate mio_tflite2121)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_tflite2121_helper STATIC ${SOURCES})
+target_include_directories(mio_tflite2121_helper PRIVATE src)
+target_include_directories(mio_tflite2121_helper PUBLIC include)
+target_link_libraries(mio_tflite2121_helper mio_tflite2121)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_tflite2121_helper_test ${TESTS})
+target_include_directories(mio_tflite2121_helper_test PRIVATE src)
+target_link_libraries(mio_tflite2121_helper_test mio_tflite2121)
+target_link_libraries(mio_tflite2121_helper_test mio_tflite2121_helper)
diff --git a/compiler/mio-tflite2121/README.md b/compiler/mio-tflite2121/README.md
new file mode 100644
index 000000000..a922f304b
--- /dev/null
+++ b/compiler/mio-tflite2121/README.md
@@ -0,0 +1,3 @@
+# mio-tflite2121
+
+_mio-tflite2121_ provides a library to access TensorFlow lite model files with V2.12.1.
diff --git a/compiler/mio-tflite2121/example.cpp b/compiler/mio-tflite2121/example.cpp
new file mode 100644
index 000000000..54fe9e799
--- /dev/null
+++ b/compiler/mio-tflite2121/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-tflite2121"
+//
+#include <mio/tflite/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!tflite::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-tflite2121/include/mio_tflite2121/Helper.h b/compiler/mio-tflite2121/include/mio_tflite2121/Helper.h
new file mode 100644
index 000000000..f2062600a
--- /dev/null
+++ b/compiler/mio-tflite2121/include/mio_tflite2121/Helper.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_TFLITE2121_HELPER_H__
+#define __MIO_TFLITE2121_HELPER_H__
+
+#include <mio/tflite/schema_generated.h>
+
+namespace mio
+{
+namespace tflite
+{
+
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode);
+bool is_valid(const ::tflite::OperatorCode *opcode);
+bool is_custom(const ::tflite::OperatorCode *opcode);
+std::string opcode_name(const ::tflite::OperatorCode *opcode);
+const char *tensor_type(const ::tflite::Tensor *tensor);
+const char *tensor_name(const ::tflite::Tensor *tensor);
+
+} // namespace tflite
+} // namespace mio
+
+#endif // __MIO_TFLITE2121_HELPER_H__
diff --git a/compiler/mio-tflite2121/src/Helper.cpp b/compiler/mio-tflite2121/src/Helper.cpp
new file mode 100644
index 000000000..b0d1ba107
--- /dev/null
+++ b/compiler/mio-tflite2121/src/Helper.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite2121/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace tflite
+{
+
+/**
+ * This will provide v3/v3a format neutral BuiltinOperator
+ *
+ * This function referenced
+ * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc
+ */
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  return std::max(opcode->builtin_code(),
+                  static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code()));
+}
+
+bool is_valid(const ::tflite::OperatorCode *opcode)
+{
+  // Valid Range : 0 <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < 0)
+    return false;
+
+  const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::tflite::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::tflite::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::tflite::OperatorCode *opcode)
+{
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::tflite::Tensor *tensor)
+{
+  return ::tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::tflite::Tensor *tensor)
+{
+  static const char *kEmptyTensorName = "(noname)";
+
+  auto name = tensor->name();
+  if (name)
+    return name->c_str();
+
+  return kEmptyTensorName;
+}
+
+} // namespace tflite
+} // namespace mio
diff --git a/compiler/mio-tflite2121/src/Helper.test.cpp b/compiler/mio-tflite2121/src/Helper.test.cpp
new file mode 100644
index 000000000..1527a4956
--- /dev/null
+++ b/compiler/mio-tflite2121/src/Helper.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite2121/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_tflite2121_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         tflite::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const tflite::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec;
+};
+
+/**
+ * Extended 'builtin_code' is not in TFLite schema v3.
+ *
+ * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to
+ * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31
+ */
+TEST_F(mio_tflite2121_helper_test, v3)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite2121_helper_test, v3_custom)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite2121_helper_test, v3_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite2121_helper_test, v3a_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite2121_helper_test, v3a_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite2121_helper_test, v3a_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite2121_helper_test, v3a_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite2121_helper_test, v3a_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite2121_helper_test, v3a_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-tflite260/CMakeLists.txt b/compiler/mio-tflite260/CMakeLists.txt
new file mode 100644
index 000000000..f2cfeafcc
--- /dev/null
+++ b/compiler/mio-tflite260/CMakeLists.txt
@@ -0,0 +1,69 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 2.0)")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+  message(STATUS "Build mio-tflite260: FAILED (missing TensorFlowSource 2.6.0)")
+  return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tflite260: TRUE")
+
+set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs")
+
+# NOTE Use copy of schema.fbs as to provide unified way for circle also
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_tflite260
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+add_executable(mio_tflite260_example example.cpp)
+target_link_libraries(mio_tflite260_example mio_tflite260)
+
+# Temporay tflite validation tool to replace nnkit-tflite
+# TODO provide full tflite validation with runtime/interpreter
+add_executable(mio_tflite260_validate example.cpp)
+target_link_libraries(mio_tflite260_validate mio_tflite260)
+
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+  return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+add_library(mio_tflite260_inc INTERFACE)
+target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_tflite260_helper STATIC ${SOURCES})
+target_include_directories(mio_tflite260_helper PRIVATE src)
+target_include_directories(mio_tflite260_helper PUBLIC include)
+target_link_libraries(mio_tflite260_helper mio_tflite260)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_tflite260_helper_test ${TESTS})
+target_include_directories(mio_tflite260_helper_test PRIVATE src)
+target_link_libraries(mio_tflite260_helper_test mio_tflite260)
+target_link_libraries(mio_tflite260_helper_test mio_tflite260_helper)
diff --git a/compiler/mio-tflite260/README.md b/compiler/mio-tflite260/README.md
new file mode 100644
index 000000000..86d2998ed
--- /dev/null
+++ b/compiler/mio-tflite260/README.md
@@ -0,0 +1,5 @@
+# mio-tflite260
+
+_mio-tflite260_ provides a library to access TensorFlow lite model files with V2.6.0.
+
+NOTE: _mio-tflite260_ is currently obsolete
diff --git a/compiler/mio-tflite260/example.cpp b/compiler/mio-tflite260/example.cpp
new file mode 100644
index 000000000..2787a3c2d
--- /dev/null
+++ b/compiler/mio-tflite260/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-tflite260"
+//
+#include <mio/tflite/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!tflite::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-tflite260/include/mio_tflite260/Helper.h b/compiler/mio-tflite260/include/mio_tflite260/Helper.h
new file mode 100644
index 000000000..cb027e604
--- /dev/null
+++ b/compiler/mio-tflite260/include/mio_tflite260/Helper.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_TFLITE260_HELPER_H__
+#define __MIO_TFLITE260_HELPER_H__
+
+#include <mio/tflite/schema_generated.h>
+
+namespace mio
+{
+namespace tflite
+{
+
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode);
+bool is_valid(const ::tflite::OperatorCode *opcode);
+bool is_custom(const ::tflite::OperatorCode *opcode);
+std::string opcode_name(const ::tflite::OperatorCode *opcode);
+const char *tensor_type(const ::tflite::Tensor *tensor);
+const char *tensor_name(const ::tflite::Tensor *tensor);
+
+} // namespace tflite
+} // namespace mio
+
+#endif // __MIO_TFLITE260_HELPER_H__
diff --git a/compiler/mio-tflite260/src/Helper.cpp b/compiler/mio-tflite260/src/Helper.cpp
new file mode 100644
index 000000000..9669058ea
--- /dev/null
+++ b/compiler/mio-tflite260/src/Helper.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite260/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace tflite
+{
+
+/**
+ * This will provide v3/v3a format neutral BuiltinOperator
+ *
+ * This function referenced
+ * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc
+ */
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  return std::max(opcode->builtin_code(),
+                  static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code()));
+}
+
+bool is_valid(const ::tflite::OperatorCode *opcode)
+{
+  // Valid Range : 0 <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < 0)
+    return false;
+
+  const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::tflite::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::tflite::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::tflite::OperatorCode *opcode)
+{
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::tflite::Tensor *tensor)
+{
+  return ::tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::tflite::Tensor *tensor)
+{
+  static const char *kEmptyTensorName = "(noname)";
+
+  auto name = tensor->name();
+  if (name)
+    return name->c_str();
+
+  return kEmptyTensorName;
+}
+
+} // namespace tflite
+} // namespace mio
diff --git a/compiler/mio-tflite260/src/Helper.test.cpp b/compiler/mio-tflite260/src/Helper.test.cpp
new file mode 100644
index 000000000..e1ef04ca7
--- /dev/null
+++ b/compiler/mio-tflite260/src/Helper.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite260/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_tflite260_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         tflite::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const tflite::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec;
+};
+
+/**
+ * Extended 'builtin_code' is not in TFLite schema v3.
+ *
+ * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to
+ * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31
+ */
+TEST_F(mio_tflite260_helper_test, v3)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3_custom)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-tflite280/CMakeLists.txt b/compiler/mio-tflite280/CMakeLists.txt
new file mode 100644
index 000000000..edf75f479
--- /dev/null
+++ b/compiler/mio-tflite280/CMakeLists.txt
@@ -0,0 +1,59 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "Build mio-tflite280: FAILED (missing Flatbuffers 2.0)")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+  message(STATUS "Build mio-tflite280: FAILED (missing TensorFlowSource 2.8.0)")
+  return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tflite280: TRUE")
+
+set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs")
+
+# NOTE Use copy of schema.fbs as to provide unified way for circle also
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_tflite280
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+add_executable(mio_tflite280_example example.cpp)
+target_link_libraries(mio_tflite280_example mio_tflite280)
+
+# Temporay tflite validation tool to replace nnkit-tflite
+# TODO provide full tflite validation with runtime/interpreter
+add_executable(mio_tflite280_validate example.cpp)
+target_link_libraries(mio_tflite280_validate mio_tflite280)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_tflite280_helper STATIC ${SOURCES})
+target_include_directories(mio_tflite280_helper PRIVATE src)
+target_include_directories(mio_tflite280_helper PUBLIC include)
+target_link_libraries(mio_tflite280_helper mio_tflite280)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_tflite280_helper_test ${TESTS})
+target_include_directories(mio_tflite280_helper_test PRIVATE src)
+target_link_libraries(mio_tflite280_helper_test mio_tflite280)
+target_link_libraries(mio_tflite280_helper_test mio_tflite280_helper)
diff --git a/compiler/mio-tflite280/README.md b/compiler/mio-tflite280/README.md
new file mode 100644
index 000000000..73219a7df
--- /dev/null
+++ b/compiler/mio-tflite280/README.md
@@ -0,0 +1,3 @@
+# mio-tflite280
+
+_mio-tflite280_ provides a library to access TensorFlow lite model files with V2.8.0.
diff --git a/compiler/mio-tflite280/example.cpp b/compiler/mio-tflite280/example.cpp
new file mode 100644
index 000000000..83356b943
--- /dev/null
+++ b/compiler/mio-tflite280/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-tflite280"
+//
+#include <mio/tflite/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!tflite::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-tflite280/include/mio_tflite280/Helper.h b/compiler/mio-tflite280/include/mio_tflite280/Helper.h
new file mode 100644
index 000000000..b0fb0ace7
--- /dev/null
+++ b/compiler/mio-tflite280/include/mio_tflite280/Helper.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_TFLITE280_HELPER_H__
+#define __MIO_TFLITE280_HELPER_H__
+
+#include <mio/tflite/schema_generated.h>
+
+namespace mio
+{
+namespace tflite
+{
+
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode);
+bool is_valid(const ::tflite::OperatorCode *opcode);
+bool is_custom(const ::tflite::OperatorCode *opcode);
+std::string opcode_name(const ::tflite::OperatorCode *opcode);
+const char *tensor_type(const ::tflite::Tensor *tensor);
+const char *tensor_name(const ::tflite::Tensor *tensor);
+
+} // namespace tflite
+} // namespace mio
+
+#endif // __MIO_TFLITE280_HELPER_H__
diff --git a/compiler/mio-tflite280/src/Helper.cpp b/compiler/mio-tflite280/src/Helper.cpp
new file mode 100644
index 000000000..ebf0bd140
--- /dev/null
+++ b/compiler/mio-tflite280/src/Helper.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite280/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace tflite
+{
+
+/**
+ * This will provide v3/v3a format neutral BuiltinOperator
+ *
+ * This function referenced
+ * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc
+ */
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  return std::max(opcode->builtin_code(),
+                  static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code()));
+}
+
+bool is_valid(const ::tflite::OperatorCode *opcode)
+{
+  // Valid Range : 0 <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < 0)
+    return false;
+
+  const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::tflite::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::tflite::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::tflite::OperatorCode *opcode)
+{
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::tflite::Tensor *tensor)
+{
+  return ::tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::tflite::Tensor *tensor)
+{
+  static const char *kEmptyTensorName = "(noname)";
+
+  auto name = tensor->name();
+  if (name)
+    return name->c_str();
+
+  return kEmptyTensorName;
+}
+
+} // namespace tflite
+} // namespace mio
diff --git a/compiler/mio-tflite280/src/Helper.test.cpp b/compiler/mio-tflite280/src/Helper.test.cpp
new file mode 100644
index 000000000..df573bf44
--- /dev/null
+++ b/compiler/mio-tflite280/src/Helper.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite280/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_tflite280_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         tflite::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const tflite::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec;
+};
+
+/**
+ * Extended 'builtin_code' is not in TFLite schema v3.
+ *
+ * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to
+ * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31
+ */
+TEST_F(mio_tflite280_helper_test, v3)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3_custom)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mir-interpreter/src/ops/Add.cpp b/compiler/mir-interpreter/src/ops/Add.cpp
index 631b854b7..f80c63c15 100644
--- a/compiler/mir-interpreter/src/ops/Add.cpp
+++ b/compiler/mir-interpreter/src/ops/Add.cpp
@@ -106,13 +106,13 @@ void AddImpl<uint8_t>::run(const TensorVariant &lhs, const TensorVariant &rhs, T
     const int32_t shifted_lhs_val = lhs_val * (1 << left_shift);
     const int32_t shifted_rhs_val = rhs_val * (1 << left_shift);
     const int32_t scaled_lhs_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
+      MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
     const int32_t scaled_rhs_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
+      MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
     const int32_t raw_sum = scaled_lhs_val + scaled_rhs_val;
     const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
-        output_offset;
+      MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
+      output_offset;
     const int32_t clamped_output = std::min(output_max, std::max(output_min, raw_output));
     res_accessor.at(index) = static_cast<uint8_t>(clamped_output);
   }
diff --git a/compiler/mir-interpreter/src/ops/AvgPool2D.cpp b/compiler/mir-interpreter/src/ops/AvgPool2D.cpp
index 3f1d65100..3f74cd1e8 100644
--- a/compiler/mir-interpreter/src/ops/AvgPool2D.cpp
+++ b/compiler/mir-interpreter/src/ops/AvgPool2D.cpp
@@ -72,7 +72,7 @@ void AvgPool2DImpl<T>::run(const ops::AvgPool2DOp &op, const TensorVariant &inpu
       // Assuming NHWC format.
       for (int i = 0; i < num_spatial_dims; ++i)
         in_index.at(1 + i) =
-            out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+          out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
 
       if (in_range.contains(in_index))
       {
@@ -145,7 +145,7 @@ void AvgPool2DImpl<uint8_t>::run(const ops::AvgPool2DOp &op, const TensorVariant
       // Assuming NHWC format.
       for (int i = 0; i < num_spatial_dims; ++i)
         in_index.at(1 + i) =
-            out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+          out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
 
       if (in_range.contains(in_index))
       {
diff --git a/compiler/mir-interpreter/src/ops/CappedReLU.cpp b/compiler/mir-interpreter/src/ops/CappedReLU.cpp
index 1ac95ac16..5b348d463 100644
--- a/compiler/mir-interpreter/src/ops/CappedReLU.cpp
+++ b/compiler/mir-interpreter/src/ops/CappedReLU.cpp
@@ -68,7 +68,7 @@ template <> struct CappedReLUImpl<uint8_t>
     {
       auto value = dequantize(arg_accessor.at(index), quant_info);
       auto out_value =
-          quantize(std::min(std::max(value, 0.0f), cap), result.getType().getQuantization());
+        quantize(std::min(std::max(value, 0.0f), cap), result.getType().getQuantization());
       res_accessor.at(index) = out_value;
     }
   }
diff --git a/compiler/mir-interpreter/src/ops/Concat.cpp b/compiler/mir-interpreter/src/ops/Concat.cpp
index 99fe00c31..3c71709e6 100644
--- a/compiler/mir-interpreter/src/ops/Concat.cpp
+++ b/compiler/mir-interpreter/src/ops/Concat.cpp
@@ -90,8 +90,8 @@ template <> struct ConcatImpl<uint8_t>
 };
 
 void ConcatImpl<uint8_t>::run(
-    const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
-    mir::TensorVariant &output)
+  const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
+  mir::TensorVariant &output)
 {
   const size_t inputs_count = inputs.size();
   std::vector<int32_t> input_zeropoints(inputs_count);
@@ -154,7 +154,7 @@ void ConcatImpl<uint8_t>::run(
         for (int j = 0; j < copy_size; ++j)
         {
           const int32_t value =
-              static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
           output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
         }
       }
diff --git a/compiler/mir-interpreter/src/ops/Conv2D.cpp b/compiler/mir-interpreter/src/ops/Conv2D.cpp
index c9b98a56f..9f4339bda 100644
--- a/compiler/mir-interpreter/src/ops/Conv2D.cpp
+++ b/compiler/mir-interpreter/src/ops/Conv2D.cpp
@@ -109,9 +109,9 @@ void Conv2DImpl<T>::run(const TensorVariant &input, const TensorVariant &kernel,
                   if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
                   {
                     const std::int32_t in_offset =
-                        calcOffset(input_shape, batch, in_y, in_x, in_group_offset + in_c);
-                    const std::int32_t kernel_offset = calcOffset(
-                        kernel_shape, out_group_offset + out_c, kernel_y, kernel_x, in_c);
+                      calcOffset(input_shape, batch, in_y, in_x, in_group_offset + in_c);
+                    const std::int32_t kernel_offset =
+                      calcOffset(kernel_shape, out_group_offset + out_c, kernel_y, kernel_x, in_c);
                     const T input_val = input_data[in_offset];
                     const T kernel_val = kernel_data[kernel_offset];
                     sum += kernel_val * input_val;
@@ -121,7 +121,7 @@ void Conv2DImpl<T>::run(const TensorVariant &input, const TensorVariant &kernel,
             }
 
             const std::int32_t out_offset =
-                calcOffset(output_shape, batch, out_y, out_x, out_group_offset + out_c);
+              calcOffset(output_shape, batch, out_y, out_x, out_group_offset + out_c);
             result_data[out_offset] = sum;
           }
         }
diff --git a/compiler/mir-interpreter/src/ops/DeConv2D.cpp b/compiler/mir-interpreter/src/ops/DeConv2D.cpp
index 746d8c87c..f9e837ddb 100644
--- a/compiler/mir-interpreter/src/ops/DeConv2D.cpp
+++ b/compiler/mir-interpreter/src/ops/DeConv2D.cpp
@@ -98,9 +98,9 @@ void DeConv2DImpl<T>::run(const TensorVariant &input, const TensorVariant &kerne
                 for (int32_t out_c = 0; out_c < num_out_channels; ++out_c)
                 {
                   const int32_t kernel_offset =
-                      calcOffset(kernel_shape, in_c, kernel_y, kernel_x, out_c);
+                    calcOffset(kernel_shape, in_c, kernel_y, kernel_x, out_c);
                   const int32_t output_offset =
-                      calcOffset(output_shape, batch, out_y, out_x, out_c);
+                    calcOffset(output_shape, batch, out_y, out_x, out_c);
                   const T kernel_val = kernel_data[kernel_offset];
                   output_data[output_offset] += input_val * kernel_val;
                 }
diff --git a/compiler/mir-interpreter/src/ops/Gather.cpp b/compiler/mir-interpreter/src/ops/Gather.cpp
index 4328c26b2..11bffd411 100644
--- a/compiler/mir-interpreter/src/ops/Gather.cpp
+++ b/compiler/mir-interpreter/src/ops/Gather.cpp
@@ -64,7 +64,7 @@ void GatherImpl<T, IndicesT>::run(const TensorVariant &datav, const TensorVarian
       for (int32_t inner = 0; inner < inner_size; inner++)
       {
         output.atOffset((outer * num_indices + i) * inner_size + inner) =
-            data.atOffset((outer * axis_size + index) * inner_size + inner);
+          data.atOffset((outer * axis_size + index) * inner_size + inner);
       }
     }
   }
diff --git a/compiler/mir-interpreter/src/ops/MaxPool2D.cpp b/compiler/mir-interpreter/src/ops/MaxPool2D.cpp
index cec2f5984..6be1ccf08 100644
--- a/compiler/mir-interpreter/src/ops/MaxPool2D.cpp
+++ b/compiler/mir-interpreter/src/ops/MaxPool2D.cpp
@@ -72,7 +72,7 @@ void MaxPool2DImpl<T>::run(const TensorVariant &inputv, const ops::MaxPool2DOp &
       // Assuming NHWC format.
       for (int i = 0; i < num_spatial_dims; ++i)
         in_index.at(1 + i) =
-            out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+          out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
 
       if (in_range.contains(in_index))
       {
@@ -137,7 +137,7 @@ void MaxPool2DImpl<uint8_t>::run(const TensorVariant &input, const ops::MaxPool2
       // Assuming NHWC format.
       for (int i = 0; i < num_spatial_dims; ++i)
         in_index.at(1 + i) =
-            out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
+          out_index.at(1 + i) * strides[i] + window_index.at(i) - padding_before[i];
 
       if (in_range.contains(in_index))
       {
diff --git a/compiler/mir-interpreter/src/ops/QuantizationHelpers.h b/compiler/mir-interpreter/src/ops/QuantizationHelpers.h
index 8faeffbd3..3ab6f1edc 100644
--- a/compiler/mir-interpreter/src/ops/QuantizationHelpers.h
+++ b/compiler/mir-interpreter/src/ops/QuantizationHelpers.h
@@ -110,7 +110,7 @@ inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multip
   int left_shift = shift > 0 ? shift : 0;
   int right_shift = shift > 0 ? 0 : -shift;
   return RoundingDivideByPOT(
-      SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+    SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
 }
 
 inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x,
diff --git a/compiler/mir-interpreter/src/ops/Softmax.cpp b/compiler/mir-interpreter/src/ops/Softmax.cpp
index f263f967d..554f8c371 100644
--- a/compiler/mir-interpreter/src/ops/Softmax.cpp
+++ b/compiler/mir-interpreter/src/ops/Softmax.cpp
@@ -70,7 +70,7 @@ void SoftmaxImpl<T>::run(const mir::TensorVariant &arg, int axis, mir::TensorVar
     mir::Index expsum_index = res_index;
     expsum_index.at(axis) = 0;
     res_accessor.at(res_index) =
-        std::exp(arg_accessor.at(res_index)) / expsum_accessor.at(expsum_index);
+      std::exp(arg_accessor.at(res_index)) / expsum_accessor.at(expsum_index);
   }
 }
 
@@ -140,7 +140,7 @@ void SoftmaxImpl<uint8_t>::run(const mir::TensorVariant &input, int axis,
       const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp;
       const int32_t prob_quantized = static_cast<int32_t>(prob_rescaled + 0.5);
       output_data[j] =
-          static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+        static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
     }
     input_data += last_dim;
     output_data += last_dim;
diff --git a/compiler/mir/include/mir/Graph.h b/compiler/mir/include/mir/Graph.h
index bf94cfb14..37bfdb361 100644
--- a/compiler/mir/include/mir/Graph.h
+++ b/compiler/mir/include/mir/Graph.h
@@ -103,6 +103,10 @@ private:
 
 /**
  * @brief Returns nodes of the graph sorted topologically.
+ * @note  Sorting order priority
+ * 1) Graph input node (input index order)
+ * 2) Constant node (unordered - cannot predict order)
+ * 3) Ready node (unordered - cannot predict order)
  */
 std::vector<Operation *> getSortedNodes(Graph *graph);
 
diff --git a/compiler/mir/include/mir/Quantization.h b/compiler/mir/include/mir/Quantization.h
index d266ee00d..901915a74 100644
--- a/compiler/mir/include/mir/Quantization.h
+++ b/compiler/mir/include/mir/Quantization.h
@@ -26,7 +26,7 @@ public:
   AffineQuantization() = default;
 
   AffineQuantization(float scale, int zero_point)
-      : _scale(scale), _zero_point(zero_point), _empty(false)
+    : _scale(scale), _zero_point(zero_point), _empty(false)
   {
   }
 
diff --git a/compiler/mir/include/mir/ShapeRange.h b/compiler/mir/include/mir/ShapeRange.h
index a450bf090..70b29715f 100644
--- a/compiler/mir/include/mir/ShapeRange.h
+++ b/compiler/mir/include/mir/ShapeRange.h
@@ -26,7 +26,7 @@ namespace mir
 {
 
 class ShapeIter
-    : public std::iterator<std::forward_iterator_tag, Index, std::size_t, Index *, Index &>
+  : public std::iterator<std::forward_iterator_tag, Index, std::size_t, Index *, Index &>
 {
 public:
   ShapeIter &operator++()
diff --git a/compiler/mir/include/mir/TensorType.h b/compiler/mir/include/mir/TensorType.h
index 98797d687..b94a26eeb 100644
--- a/compiler/mir/include/mir/TensorType.h
+++ b/compiler/mir/include/mir/TensorType.h
@@ -34,7 +34,7 @@ public:
   }
 
   TensorType(DataType element_type, const Shape &shape, const AffineQuantization &quant)
-      : _element_type(element_type), _shape(shape), _quantization(quant)
+    : _element_type(element_type), _shape(shape), _quantization(quant)
   {
   }
 
diff --git a/compiler/mir/include/mir/ops/AvgPool2DOp.h b/compiler/mir/include/mir/ops/AvgPool2DOp.h
index 47fe058ee..37fb66437 100644
--- a/compiler/mir/include/mir/ops/AvgPool2DOp.h
+++ b/compiler/mir/include/mir/ops/AvgPool2DOp.h
@@ -32,7 +32,7 @@ class AvgPool2DOp : public Operation
 {
 public:
   AvgPool2DOp(Output *arg, const AvgPool2DOpAttributes &attributes)
-      : Operation(Type::avgPool2D, {arg}), _attributes(attributes)
+    : Operation(Type::avgPool2D, {arg}), _attributes(attributes)
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/ConcatOp.h b/compiler/mir/include/mir/ops/ConcatOp.h
index 4f46d4449..d1f9142fa 100644
--- a/compiler/mir/include/mir/ops/ConcatOp.h
+++ b/compiler/mir/include/mir/ops/ConcatOp.h
@@ -31,7 +31,7 @@ class ConcatOp : public Operation
 {
 public:
   ConcatOp(const std::vector<Output *> &args, int32_t axis)
-      : Operation(Type::concat, args), _axis(axis)
+    : Operation(Type::concat, args), _axis(axis)
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/Conv2DOp.h b/compiler/mir/include/mir/ops/Conv2DOp.h
index ec818dae5..f8590a947 100644
--- a/compiler/mir/include/mir/ops/Conv2DOp.h
+++ b/compiler/mir/include/mir/ops/Conv2DOp.h
@@ -30,13 +30,13 @@ class Conv2DOp : public Operation
 {
 public:
   Conv2DOp(Output *input, Output *kernel, const Conv2DOpAttributes &attributes)
-      : Operation(Type::conv2D, {input, kernel}), _attributes(attributes)
+    : Operation(Type::conv2D, {input, kernel}), _attributes(attributes)
   {
     inferOutputTypes();
   }
 
   Conv2DOp(Output *input, Output *kernel, Output *bias, const Conv2DOpAttributes &attributes)
-      : Operation(Type::conv2D, {input, kernel, bias}), _attributes(attributes)
+    : Operation(Type::conv2D, {input, kernel, bias}), _attributes(attributes)
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/Deconv2DOp.h b/compiler/mir/include/mir/ops/Deconv2DOp.h
index a7b548028..9565eeb37 100644
--- a/compiler/mir/include/mir/ops/Deconv2DOp.h
+++ b/compiler/mir/include/mir/ops/Deconv2DOp.h
@@ -33,14 +33,14 @@ class DeConv2DOp : public Operation
 {
 public:
   DeConv2DOp(Output *input, Output *kernel, const Deconv2DOpAttributes &attributes)
-      : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
+    : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
   {
     inferOutputTypes();
   }
 
   DeConv2DOp(Output *input, Output *kernel, const Deconv2DOpAttributes &attributes,
              const Shape &output_shape)
-      : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
+    : Operation(Type::deConv2D, {input, kernel}), _attributes(attributes)
   {
     assert(input->getElementType() == kernel->getElementType());
     setOutputType(0, {input->getElementType(), output_shape});
diff --git a/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h b/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h
index 347b8e94f..558d60a4a 100644
--- a/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h
+++ b/compiler/mir/include/mir/ops/DepthwiseConv2DOp.h
@@ -30,14 +30,14 @@ class DepthwiseConv2DOp : public Operation
 {
 public:
   DepthwiseConv2DOp(Output *input, Output *kernel, const Conv2DOpAttributes &attributes)
-      : Operation(Type::depthwiseConv, {input, kernel}), _attributes(attributes)
+    : Operation(Type::depthwiseConv, {input, kernel}), _attributes(attributes)
   {
     inferOutputTypes();
   }
 
   DepthwiseConv2DOp(Output *input, Output *kernel, Output *bias,
                     const Conv2DOpAttributes &attributes)
-      : Operation(Type::depthwiseConv, {input, kernel, bias}), _attributes(attributes)
+    : Operation(Type::depthwiseConv, {input, kernel, bias}), _attributes(attributes)
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/FullyConnectedOp.h b/compiler/mir/include/mir/ops/FullyConnectedOp.h
index 589c42df9..f937df539 100644
--- a/compiler/mir/include/mir/ops/FullyConnectedOp.h
+++ b/compiler/mir/include/mir/ops/FullyConnectedOp.h
@@ -29,13 +29,13 @@ class FullyConnectedOp : public Operation
 {
 public:
   FullyConnectedOp(Output *input, Output *weights)
-      : Operation(Type::fullyConnected, {input, weights})
+    : Operation(Type::fullyConnected, {input, weights})
   {
     inferOutputTypes();
   }
 
   FullyConnectedOp(Output *input, Output *weights, Output *bias)
-      : Operation(Type::fullyConnected, {input, weights, bias})
+    : Operation(Type::fullyConnected, {input, weights, bias})
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/GatherOp.h b/compiler/mir/include/mir/ops/GatherOp.h
index 899c9f169..58ea04074 100644
--- a/compiler/mir/include/mir/ops/GatherOp.h
+++ b/compiler/mir/include/mir/ops/GatherOp.h
@@ -33,7 +33,7 @@ class GatherOp : public Operation
 {
 public:
   GatherOp(Output *data, Output *indices, int32_t axis)
-      : Operation(Type::gather, {data, indices}), _axis(axis)
+    : Operation(Type::gather, {data, indices}), _axis(axis)
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/MaxPool2DOp.h b/compiler/mir/include/mir/ops/MaxPool2DOp.h
index 7c5df4a53..4345cfc18 100644
--- a/compiler/mir/include/mir/ops/MaxPool2DOp.h
+++ b/compiler/mir/include/mir/ops/MaxPool2DOp.h
@@ -32,7 +32,7 @@ class MaxPool2DOp : public Operation
 {
 public:
   MaxPool2DOp(Output *arg, const MaxPool2DOpAttributes &attributes)
-      : Operation(Type::maxPool2D, {arg}), _attributes(attributes)
+    : Operation(Type::maxPool2D, {arg}), _attributes(attributes)
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/PadOp.h b/compiler/mir/include/mir/ops/PadOp.h
index 76453acec..d229a97bd 100644
--- a/compiler/mir/include/mir/ops/PadOp.h
+++ b/compiler/mir/include/mir/ops/PadOp.h
@@ -29,7 +29,7 @@ class PadOp : public Operation
 {
 public:
   PadOp(Output *arg, const PadOpAttributes &attributes)
-      : Operation(Type::pad, {arg}), _attributes(attributes)
+    : Operation(Type::pad, {arg}), _attributes(attributes)
   {
     assert(_attributes.padding_before.size() == _attributes.padding_after.size());
     inferOutputTypes();
diff --git a/compiler/mir/include/mir/ops/ReduceMeanOp.h b/compiler/mir/include/mir/ops/ReduceMeanOp.h
index add47ac75..5759b845e 100644
--- a/compiler/mir/include/mir/ops/ReduceMeanOp.h
+++ b/compiler/mir/include/mir/ops/ReduceMeanOp.h
@@ -29,7 +29,7 @@ class ReduceMeanOp : public ReduceOp
 {
 public:
   ReduceMeanOp(Output *arg, const std::vector<int> &reduction_dims, bool keep_dims)
-      : ReduceOp(Type::reduceMean, arg, reduction_dims, keep_dims)
+    : ReduceOp(Type::reduceMean, arg, reduction_dims, keep_dims)
   {
   }
 
diff --git a/compiler/mir/include/mir/ops/ReduceOp.h b/compiler/mir/include/mir/ops/ReduceOp.h
index 0f46a4596..5204a0903 100644
--- a/compiler/mir/include/mir/ops/ReduceOp.h
+++ b/compiler/mir/include/mir/ops/ReduceOp.h
@@ -29,7 +29,7 @@ class ReduceOp : public Operation
 {
 protected:
   ReduceOp(Type type, Output *arg, const std::vector<int> &reduction_dims, bool keep_dims)
-      : Operation(type, {arg}), _reduction_dims(reduction_dims), _keep_dims(keep_dims)
+    : Operation(type, {arg}), _reduction_dims(reduction_dims), _keep_dims(keep_dims)
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/ResizeOp.h b/compiler/mir/include/mir/ops/ResizeOp.h
index 51e1b0b76..62743e396 100644
--- a/compiler/mir/include/mir/ops/ResizeOp.h
+++ b/compiler/mir/include/mir/ops/ResizeOp.h
@@ -40,7 +40,7 @@ public:
   };
 
   ResizeOp(Output *arg, ResizeMethod mode, const std::vector<float> &scales)
-      : Operation(Type::resizeIm, {arg}), _mode(mode), _scales(scales)
+    : Operation(Type::resizeIm, {arg}), _mode(mode), _scales(scales)
   {
     // Infer output shape based on given scales.
     auto &input_shape = getInputShape(0);
@@ -61,7 +61,7 @@ public:
   }
 
   ResizeOp(Output *arg, ResizeMethod mode, const Shape &output_shape)
-      : Operation(Type::resizeIm, {arg}), _mode(mode)
+    : Operation(Type::resizeIm, {arg}), _mode(mode)
   {
     // Calculate scales based on given shape.
     auto &input_shape = getInputShape(0);
diff --git a/compiler/mir/include/mir/ops/SliceOp.h b/compiler/mir/include/mir/ops/SliceOp.h
index 6370de4fa..1627d4b82 100644
--- a/compiler/mir/include/mir/ops/SliceOp.h
+++ b/compiler/mir/include/mir/ops/SliceOp.h
@@ -28,7 +28,7 @@ class SliceOp : public Operation
 {
 public:
   SliceOp(Output *arg, const Shape &starts, const Shape &sizes)
-      : Operation(Type::slice, {arg}), _starts(starts), _sizes(sizes)
+    : Operation(Type::slice, {arg}), _starts(starts), _sizes(sizes)
   {
     inferOutputTypes();
   }
diff --git a/compiler/mir/include/mir/ops/SqueezeOp.h b/compiler/mir/include/mir/ops/SqueezeOp.h
index 8ef2a78bb..735b7d86d 100644
--- a/compiler/mir/include/mir/ops/SqueezeOp.h
+++ b/compiler/mir/include/mir/ops/SqueezeOp.h
@@ -29,7 +29,7 @@ class SqueezeOp : public Operation
 {
 public:
   SqueezeOp(Output *arg, const std::vector<std::int32_t> &dims_to_squeeze)
-      : Operation(Type::squeeze, {arg}), _dims_to_squeeze(dims_to_squeeze)
+    : Operation(Type::squeeze, {arg}), _dims_to_squeeze(dims_to_squeeze)
   {
     // Infer output shape.
     inferOutputTypes();
diff --git a/compiler/mir/src/Graph.cpp b/compiler/mir/src/Graph.cpp
index 0eccdac2b..05d6dc9bd 100644
--- a/compiler/mir/src/Graph.cpp
+++ b/compiler/mir/src/Graph.cpp
@@ -44,9 +44,16 @@ std::vector<Operation *> getSortedNodes(Graph *graph)
   std::deque<Operation *> ready_nodes;
   std::unordered_map<Operation *, std::size_t> num_visited_input_edges;
 
+  // Use input vector first to maintain correct input order
+  for (Operation *op : graph->getInputs())
+  {
+    ready_nodes.push_back(op);
+  }
+
   for (Operation *op : graph->getNodes())
   {
-    if (op->getNumInputs() == 0)
+    // Skip already pushed input node
+    if ((op->getNumInputs() == 0) && (op->getType() != Operation::Type::input))
     {
       ready_nodes.push_back(op);
     }
@@ -123,11 +130,11 @@ void Graph::removeNode(Operation *op)
 
   if (op->getType() == Operation::Type::input)
     _inputs.erase(
-        std::remove(_inputs.begin(), _inputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
+      std::remove(_inputs.begin(), _inputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
 
   if (op->getType() == Operation::Type::output)
     _outputs.erase(
-        std::remove(_outputs.begin(), _outputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
+      std::remove(_outputs.begin(), _outputs.end(), op)); // NOLINT(bugprone-inaccurate-erase)
 
   _ops.erase(op);
   delete op;
diff --git a/compiler/mir/src/Operation.cpp b/compiler/mir/src/Operation.cpp
index 6f72acbf6..9ba395f94 100644
--- a/compiler/mir/src/Operation.cpp
+++ b/compiler/mir/src/Operation.cpp
@@ -40,7 +40,7 @@ void Operation::Output::replaceAllUsesWith(mir::Operation::Output *new_def)
 }
 
 Operation::Operation(Type type, const std::vector<Output *> &inputs, std::size_t num_outputs)
-    : _type(type)
+  : _type(type)
 {
   for (std::size_t i = 0; i < inputs.size(); ++i)
   {
diff --git a/compiler/mir/src/Shape.cpp b/compiler/mir/src/Shape.cpp
index 825420cd6..06dae0c54 100644
--- a/compiler/mir/src/Shape.cpp
+++ b/compiler/mir/src/Shape.cpp
@@ -48,9 +48,9 @@ Shape broadcastShapes(const Shape &lhs_shape, const Shape &rhs_shape)
   for (int i = 0; i < num_dims; ++i)
   {
     const std::int32_t lhs_dim =
-        (i >= num_dims - lhs_shape.rank()) ? lhs_shape.dim(i - (num_dims - lhs_shape.rank())) : 1;
+      (i >= num_dims - lhs_shape.rank()) ? lhs_shape.dim(i - (num_dims - lhs_shape.rank())) : 1;
     const std::int32_t rhs_dim =
-        (i >= num_dims - rhs_shape.rank()) ? rhs_shape.dim(i - (num_dims - rhs_shape.rank())) : 1;
+      (i >= num_dims - rhs_shape.rank()) ? rhs_shape.dim(i - (num_dims - rhs_shape.rank())) : 1;
     if (lhs_dim == 1)
     {
       result_shape.dim(i) = rhs_dim;
diff --git a/compiler/mir/src/TensorVariant.cpp b/compiler/mir/src/TensorVariant.cpp
index 9e57dbaf0..516c0df73 100644
--- a/compiler/mir/src/TensorVariant.cpp
+++ b/compiler/mir/src/TensorVariant.cpp
@@ -35,7 +35,7 @@ TensorVariant::TensorVariant(const TensorType &type) : _type(type), _strides(typ
 }
 
 TensorVariant::TensorVariant(DataType element_type, const Shape &shape)
-    : TensorVariant(TensorType(element_type, shape))
+  : TensorVariant(TensorType(element_type, shape))
 {
 }
 
@@ -46,7 +46,7 @@ TensorVariant::TensorVariant(const TensorType &type, const void *data) : TensorV
 }
 
 TensorVariant::TensorVariant(DataType element_type, const Shape &shape, const void *data)
-    : TensorVariant(TensorType(element_type, shape), data)
+  : TensorVariant(TensorType(element_type, shape), data)
 {
 }
 
@@ -57,8 +57,8 @@ TensorVariant::TensorVariant(DataType element_type, const Shape &shape, const vo
  * @param shape shape to broadcast to
  */
 TensorVariant::TensorVariant(const TensorVariant &t_old, const Shape &shape)
-    : _type(t_old.getType().getElementType(), shape), _data(t_old._data),
-      _strides(static_cast<size_t>(shape.rank())), _element_size(t_old._element_size)
+  : _type(t_old.getType().getElementType(), shape), _data(t_old._data),
+    _strides(static_cast<size_t>(shape.rank())), _element_size(t_old._element_size)
 {
   int axis_old = t_old.getShape().rank() - 1;
   for (int d = shape.rank() - 1; d >= 0; d--)
diff --git a/compiler/mir/src/mir_caffe2_importer/caffe2_importer.cpp b/compiler/mir/src/mir_caffe2_importer/caffe2_importer.cpp
index 812fcc5cc..abecfc88a 100644
--- a/compiler/mir/src/mir_caffe2_importer/caffe2_importer.cpp
+++ b/compiler/mir/src/mir_caffe2_importer/caffe2_importer.cpp
@@ -99,7 +99,7 @@ using mir::Shape;
 
 Caffe2Importer::Caffe2Importer(std::string predict_net, std::string init_net,
                                const std::vector<std::vector<int>> &input_shapes)
-    : _predictNet(std::move(predict_net)), _initNet(std::move(init_net))
+  : _predictNet(std::move(predict_net)), _initNet(std::move(init_net))
 {
   for (auto &shape : input_shapes)
     _inputShapes.emplace_back(shape);
@@ -308,27 +308,27 @@ void Caffe2Importer::setGraphOutputs()
 }
 
 const std::map<std::string, SupportedCaffe2OpType> Caffe2Importer::_operatorTypes = {
-    {"Add", SupportedCaffe2OpType::add},
-    {"AveragePool", SupportedCaffe2OpType::averagePool},
-    {"Conv", SupportedCaffe2OpType::conv},
-    {"Concat", SupportedCaffe2OpType::concat},
-    {"ConstantFill", SupportedCaffe2OpType::constantFill},
-    {"Dropout", SupportedCaffe2OpType::dropout},
-    {"FC", SupportedCaffe2OpType::FC},
-    {"GivenTensorFill", SupportedCaffe2OpType::givenTensorFill},
-    {"MaxPool", SupportedCaffe2OpType::maxPool},
-    {"Mul", SupportedCaffe2OpType::mul},
-    {"Relu", SupportedCaffe2OpType::relu},
-    {"ResizeNearest", SupportedCaffe2OpType::resizeNearest},
-    {"Sigmoid", SupportedCaffe2OpType::sigmoid},
-    {"Softmax", SupportedCaffe2OpType::softmax},
-    {"SpatialBN", SupportedCaffe2OpType::spatialBN},
-    {"Sum", SupportedCaffe2OpType::sum},
-    {"Clip", SupportedCaffe2OpType::clip},
-    {"Reshape", SupportedCaffe2OpType::reshape},
-    {"GivenTensorInt64Fill", SupportedCaffe2OpType::givenTensorInt64Fill},
+  {"Add", SupportedCaffe2OpType::add},
+  {"AveragePool", SupportedCaffe2OpType::averagePool},
+  {"Conv", SupportedCaffe2OpType::conv},
+  {"Concat", SupportedCaffe2OpType::concat},
+  {"ConstantFill", SupportedCaffe2OpType::constantFill},
+  {"Dropout", SupportedCaffe2OpType::dropout},
+  {"FC", SupportedCaffe2OpType::FC},
+  {"GivenTensorFill", SupportedCaffe2OpType::givenTensorFill},
+  {"MaxPool", SupportedCaffe2OpType::maxPool},
+  {"Mul", SupportedCaffe2OpType::mul},
+  {"Relu", SupportedCaffe2OpType::relu},
+  {"ResizeNearest", SupportedCaffe2OpType::resizeNearest},
+  {"Sigmoid", SupportedCaffe2OpType::sigmoid},
+  {"Softmax", SupportedCaffe2OpType::softmax},
+  {"SpatialBN", SupportedCaffe2OpType::spatialBN},
+  {"Sum", SupportedCaffe2OpType::sum},
+  {"Clip", SupportedCaffe2OpType::clip},
+  {"Reshape", SupportedCaffe2OpType::reshape},
+  {"GivenTensorInt64Fill", SupportedCaffe2OpType::givenTensorInt64Fill},
 };
-}
+} // namespace
 
 namespace mir_caffe2
 {
diff --git a/compiler/mir/src/mir_caffe2_importer/caffe2_op_creator.cpp b/compiler/mir/src/mir_caffe2_importer/caffe2_op_creator.cpp
index 3390f4482..de0762dfa 100644
--- a/compiler/mir/src/mir_caffe2_importer/caffe2_op_creator.cpp
+++ b/compiler/mir/src/mir_caffe2_importer/caffe2_op_creator.cpp
@@ -125,7 +125,7 @@ static std::vector<std::int32_t> getWindowSize(const ::caffe2::OperatorDef &op,
 {
   int is_global_pooling = getSingleArgument(op, "global_pooling", 0);
   bool has_custom_kernel_size =
-      hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
+    hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
   bool has_custom_kernels_size = hasArgument(op.arg(), "kernels");
 
   int kernel_h(0), kernel_w(0);
@@ -186,14 +186,13 @@ static void checkConvLikeOp(const ::caffe2::OperatorDef &op)
   if (has_custom_pad && hasArgument(op.arg(), "pad"))
     throw std::runtime_error("Custom pad can't be combined with overall pad");
 
-  if (has_custom_pad &&
-      !(hasArgument(op.arg(), "pad_l") && hasArgument(op.arg(), "pad_r") &&
-        hasArgument(op.arg(), "pad_t") && hasArgument(op.arg(), "pad_b")))
+  if (has_custom_pad && !(hasArgument(op.arg(), "pad_l") && hasArgument(op.arg(), "pad_r") &&
+                          hasArgument(op.arg(), "pad_t") && hasArgument(op.arg(), "pad_b")))
     throw std::runtime_error("If one custom pad specified - all custom pads must be specified");
 
   // Kernel size
   bool has_custom_kernel_size =
-      hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
+    hasArgument(op.arg(), "kernel_h") || hasArgument(op.arg(), "kernel_w");
 
   if (has_custom_kernel_size && hasArgument(op.arg(), "kernel"))
     throw std::runtime_error("Custom kernel size can't be combined with overall kernel size");
@@ -201,7 +200,7 @@ static void checkConvLikeOp(const ::caffe2::OperatorDef &op)
   if (has_custom_kernel_size &&
       !(hasArgument(op.arg(), "kernel_h") && hasArgument(op.arg(), "kernel_w")))
     throw std::runtime_error(
-        "If one custom kernel size specified - all custom kernel sizes must be specified");
+      "If one custom kernel size specified - all custom kernel sizes must be specified");
 }
 
 static mir::TensorVariant createTensor(const OperatorDef &op)
@@ -356,7 +355,7 @@ Caffe2OpCreator::convertFC(const std::vector<mir::Operation::Output *> &inputs,
 
   auto reshape = createOp<ops::ReshapeOp>(inputs[0], shape)->getOutput(0);
   auto weights =
-      createOp<ops::TransposeOp>(inputs[1], std::vector<std::size_t>{1, 0})->getOutput(0);
+    createOp<ops::TransposeOp>(inputs[1], std::vector<std::size_t>{1, 0})->getOutput(0);
   auto result = createOp<ops::FullyConnectedOp>(reshape, weights)->getOutput(0);
   result = createOp<ops::AddOp>(result, inputs[2])->getOutput(0);
 
@@ -420,8 +419,8 @@ Caffe2OpCreator::convertResizeNearest(const std::vector<mir::Operation::Output *
   scales[2] = getSingleArgument(op, "height_scale", 1.0f);
   scales[3] = getSingleArgument(op, "width_scale", 1.0f);
   auto result =
-      createOp<ops::ResizeOp>(inputs[0], ops::ResizeOp::ResizeMethod::nearestNeighbor, scales)
-          ->getOutput(0);
+    createOp<ops::ResizeOp>(inputs[0], ops::ResizeOp::ResizeMethod::nearestNeighbor, scales)
+      ->getOutput(0);
   return {result};
 }
 
@@ -450,7 +449,7 @@ Caffe2OpCreator::convertSpatialBN(const std::vector<mir::Operation::Output *> &i
   // Sanity checks
   if (op.input_size() != 5)
     throw std::runtime_error(
-        "SpatialBN must have exactly 5 inputs ('sums' and 'sumsq' are not supported yet)");
+      "SpatialBN must have exactly 5 inputs ('sums' and 'sumsq' are not supported yet)");
   if (getSingleArgument(op, "is_test", 1) != 1)
     throw std::runtime_error("SpatialBN: only test mode supported");
 
@@ -462,7 +461,7 @@ Caffe2OpCreator::convertSpatialBN(const std::vector<mir::Operation::Output *> &i
   auto var_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[4]->getNode());
   if (scale_op == nullptr || bias_op == nullptr || mean_op == nullptr || var_op == nullptr)
     throw std::runtime_error(
-        "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet.");
+      "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet.");
 
   const auto &scale_tensor = scale_op->getValue();
   const auto &bias_tensor = bias_op->getValue();
diff --git a/compiler/mir/src/mir_caffe_importer/caffe_importer.cpp b/compiler/mir/src/mir_caffe_importer/caffe_importer.cpp
index 49f13fbd8..c74658299 100644
--- a/compiler/mir/src/mir_caffe_importer/caffe_importer.cpp
+++ b/compiler/mir/src/mir_caffe_importer/caffe_importer.cpp
@@ -357,66 +357,66 @@ void CaffeImporter::setGraphOutputs(mir::Graph *graph)
 }
 
 const std::map<std::string, CaffeOpType> CaffeImporter::_operatorTypes = {
-    {"AbsVal", CaffeOpType::absVal},
-    {"Accuracy", CaffeOpType::accuracy},
-    {"ArgMax", CaffeOpType::argMax},
-    {"BatchNorm", CaffeOpType::batchNorm},
-    {"BatchReindex", CaffeOpType::batchReindex},
-    {"Bias", CaffeOpType::bias},
-    {"BNLL", CaffeOpType::BNLL},
-    {"Clip", CaffeOpType::clip},
-    {"Concat", CaffeOpType::concat},
-    {"ContrastiveLoss", CaffeOpType::contrastiveLoss},
-    {"Convolution", CaffeOpType::convolution},
-    {"Crop", CaffeOpType::crop},
-    {"Data", CaffeOpType::data},
-    {"Deconvolution", CaffeOpType::deconvolution},
-    {"Dropout", CaffeOpType::dropout},
-    {"DummyData", CaffeOpType::dummyData},
-    {"Eltwise", CaffeOpType::eltwise},
-    {"ELU", CaffeOpType::ELU},
-    {"Embed", CaffeOpType::embed},
-    {"EuclidianLoss", CaffeOpType::euclidianLoss},
-    {"Exp", CaffeOpType::exp},
-    {"Filter", CaffeOpType::filter},
-    {"Flatten", CaffeOpType::flatten},
-    {"HDF5Data", CaffeOpType::HDF5Data},
-    {"HDF5Output", CaffeOpType::HDF5Output},
-    {"HingeLoss", CaffeOpType::hingeLoss},
-    {"Im2Col", CaffeOpType::im2Col},
-    {"ImageData", CaffeOpType::imageData},
-    {"InfogainLoss", CaffeOpType::infogainLoss},
-    {"InnerProduct", CaffeOpType::innerProduct},
-    {"Input", CaffeOpType::input},
-    {"Log", CaffeOpType::log},
-    {"LRN", CaffeOpType::LRN},
-    {"LSTM", CaffeOpType::LSTM},
-    {"MemoryData", CaffeOpType::memoryData},
-    {"MultinomialLogisticLoss", CaffeOpType::multinomialLogisticLoss},
-    {"MVN", CaffeOpType::MVN},
-    {"Parameter", CaffeOpType::parameter},
-    {"Pooling", CaffeOpType::pooling},
-    {"Power", CaffeOpType::power},
-    {"PReLU", CaffeOpType::PReLU},
-    {"Python", CaffeOpType::python},
-    {"Recurrent", CaffeOpType::recurrent},
-    {"Reduction", CaffeOpType::reduction},
-    {"ReLU", CaffeOpType::ReLU},
-    {"Reshape", CaffeOpType::reshape},
-    {"RNN", CaffeOpType::RNN},
-    {"Scale", CaffeOpType::scale},
-    {"SigmoidCrossEntropyLoss", CaffeOpType::sigmoidCrossEntropyLoss},
-    {"Sigmoid", CaffeOpType::sigmoid},
-    {"Silence", CaffeOpType::silence},
-    {"Softmax", CaffeOpType::softmax},
-    {"SoftmaxWithLoss", CaffeOpType::softmaxWithLoss},
-    {"SPP", CaffeOpType::SPP},
-    {"Split", CaffeOpType::split},
-    {"Slice", CaffeOpType::slice},
-    {"TanH", CaffeOpType::tanh},
-    {"Threshold", CaffeOpType::threshold},
-    {"Tile", CaffeOpType::tile},
-    {"WindowData", CaffeOpType::windowData}};
+  {"AbsVal", CaffeOpType::absVal},
+  {"Accuracy", CaffeOpType::accuracy},
+  {"ArgMax", CaffeOpType::argMax},
+  {"BatchNorm", CaffeOpType::batchNorm},
+  {"BatchReindex", CaffeOpType::batchReindex},
+  {"Bias", CaffeOpType::bias},
+  {"BNLL", CaffeOpType::BNLL},
+  {"Clip", CaffeOpType::clip},
+  {"Concat", CaffeOpType::concat},
+  {"ContrastiveLoss", CaffeOpType::contrastiveLoss},
+  {"Convolution", CaffeOpType::convolution},
+  {"Crop", CaffeOpType::crop},
+  {"Data", CaffeOpType::data},
+  {"Deconvolution", CaffeOpType::deconvolution},
+  {"Dropout", CaffeOpType::dropout},
+  {"DummyData", CaffeOpType::dummyData},
+  {"Eltwise", CaffeOpType::eltwise},
+  {"ELU", CaffeOpType::ELU},
+  {"Embed", CaffeOpType::embed},
+  {"EuclidianLoss", CaffeOpType::euclidianLoss},
+  {"Exp", CaffeOpType::exp},
+  {"Filter", CaffeOpType::filter},
+  {"Flatten", CaffeOpType::flatten},
+  {"HDF5Data", CaffeOpType::HDF5Data},
+  {"HDF5Output", CaffeOpType::HDF5Output},
+  {"HingeLoss", CaffeOpType::hingeLoss},
+  {"Im2Col", CaffeOpType::im2Col},
+  {"ImageData", CaffeOpType::imageData},
+  {"InfogainLoss", CaffeOpType::infogainLoss},
+  {"InnerProduct", CaffeOpType::innerProduct},
+  {"Input", CaffeOpType::input},
+  {"Log", CaffeOpType::log},
+  {"LRN", CaffeOpType::LRN},
+  {"LSTM", CaffeOpType::LSTM},
+  {"MemoryData", CaffeOpType::memoryData},
+  {"MultinomialLogisticLoss", CaffeOpType::multinomialLogisticLoss},
+  {"MVN", CaffeOpType::MVN},
+  {"Parameter", CaffeOpType::parameter},
+  {"Pooling", CaffeOpType::pooling},
+  {"Power", CaffeOpType::power},
+  {"PReLU", CaffeOpType::PReLU},
+  {"Python", CaffeOpType::python},
+  {"Recurrent", CaffeOpType::recurrent},
+  {"Reduction", CaffeOpType::reduction},
+  {"ReLU", CaffeOpType::ReLU},
+  {"Reshape", CaffeOpType::reshape},
+  {"RNN", CaffeOpType::RNN},
+  {"Scale", CaffeOpType::scale},
+  {"SigmoidCrossEntropyLoss", CaffeOpType::sigmoidCrossEntropyLoss},
+  {"Sigmoid", CaffeOpType::sigmoid},
+  {"Silence", CaffeOpType::silence},
+  {"Softmax", CaffeOpType::softmax},
+  {"SoftmaxWithLoss", CaffeOpType::softmaxWithLoss},
+  {"SPP", CaffeOpType::SPP},
+  {"Split", CaffeOpType::split},
+  {"Slice", CaffeOpType::slice},
+  {"TanH", CaffeOpType::tanh},
+  {"Threshold", CaffeOpType::threshold},
+  {"Tile", CaffeOpType::tile},
+  {"WindowData", CaffeOpType::windowData}};
 } // namespace
 
 std::unique_ptr<mir::Graph> importModelFromBinaryFile(const std::string &filename)
diff --git a/compiler/mir/src/mir_caffe_importer/caffe_op_creator.cpp b/compiler/mir/src/mir_caffe_importer/caffe_op_creator.cpp
index 37edc69c4..a2c881b82 100644
--- a/compiler/mir/src/mir_caffe_importer/caffe_op_creator.cpp
+++ b/compiler/mir/src/mir_caffe_importer/caffe_op_creator.cpp
@@ -374,7 +374,7 @@ static void convertPoolingParam(const caffe::PoolingParameter &params,
   {
     // Assuming NCHW format.
     const std::int32_t padded_input =
-        input_shape.dim(2 + i) + attributes.padding_before[i] + attributes.padding_after[i];
+      input_shape.dim(2 + i) + attributes.padding_before[i] + attributes.padding_after[i];
     if ((padded_input - attributes.window[i]) % attributes.strides[i] != 0)
       ++attributes.padding_after[i];
   }
@@ -449,7 +449,7 @@ CaffeOpCreator::convertSoftmax(const caffe::LayerParameter &layer,
     auto input = createOp<ops::TransposeOp>(inputs[0], std::vector<std::size_t>{0, 2, 3, 1});
     auto softmax = createOp<ops::SoftmaxOp>(input->getOutput(0), axis);
     auto result =
-        createOp<ops::TransposeOp>(softmax->getOutput(0), std::vector<std::size_t>{0, 3, 1, 2});
+      createOp<ops::TransposeOp>(softmax->getOutput(0), std::vector<std::size_t>{0, 3, 1, 2});
     return {result->getOutput(0)};
   }
 
@@ -823,7 +823,7 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
 
     c_t = createOp<ops::AddOp>(createOp<ops::MulOp>(c_cont_t, f_t)->getOutput(0),
                                createOp<ops::MulOp>(i_t, g_t)->getOutput(0))
-              ->getOutput(0);
+            ->getOutput(0);
     h_t = createOp<ops::MulOp>(createOp<ops::TanhOp>(c_t)->getOutput(0), o_t)->getOutput(0);
 
     h_slices[t] = h_t;
diff --git a/compiler/mir/src/mir_onnx_importer/AttributeHelpers.h b/compiler/mir/src/mir_onnx_importer/AttributeHelpers.h
index 9a93b5b7d..ac1c3cfad 100644
--- a/compiler/mir/src/mir_onnx_importer/AttributeHelpers.h
+++ b/compiler/mir/src/mir_onnx_importer/AttributeHelpers.h
@@ -76,8 +76,8 @@ inline const onnx::AttributeProto *findAttribute(const onnx::NodeProto &node,
 {
   const auto &attributes = node.attribute();
   const auto it = std::find_if(
-      attributes.cbegin(), attributes.cend(),
-      [&name](const onnx::AttributeProto &attribute) { return attribute.name() == name; });
+    attributes.cbegin(), attributes.cend(),
+    [&name](const onnx::AttributeProto &attribute) { return attribute.name() == name; });
   if (it == attributes.cend())
     return nullptr;
   return &*it;
diff --git a/compiler/mir/src/mir_onnx_importer/CMakeLists.txt b/compiler/mir/src/mir_onnx_importer/CMakeLists.txt
index e6eb13b93..04c22055e 100644
--- a/compiler/mir/src/mir_onnx_importer/CMakeLists.txt
+++ b/compiler/mir/src/mir_onnx_importer/CMakeLists.txt
@@ -112,6 +112,10 @@ target_include_directories(mir_onnx_importer PUBLIC ../../include/mir_onnx_impor
 target_include_directories(mir_onnx_importer PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
 target_link_libraries(mir_onnx_importer PUBLIC mir mir_onnx_proto PRIVATE mir_interpreter nncc_common)
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_find_package(GTest REQUIRED)
 
 file(GLOB_RECURSE TEST_SOURCES "*.test.cpp")
diff --git a/compiler/mir/src/mir_onnx_importer/ConvPoolHelpers.cpp b/compiler/mir/src/mir_onnx_importer/ConvPoolHelpers.cpp
index d98e6deae..2091968d8 100644
--- a/compiler/mir/src/mir_onnx_importer/ConvPoolHelpers.cpp
+++ b/compiler/mir/src/mir_onnx_importer/ConvPoolHelpers.cpp
@@ -55,7 +55,7 @@ void inferAutoPadding(const std::string &pad_type, const mir::Shape &input_shape
       // Assuming input has NCHW format.
       const std::int32_t residual = input_shape.dim(2 + i) % strides[i];
       const std::int32_t total_pad = std::max(
-          INT32_C(0), residual == 0 ? eff_window_size - strides[i] : eff_window_size - residual);
+        INT32_C(0), residual == 0 ? eff_window_size - strides[i] : eff_window_size - residual);
       if (pad_type == "SAME_UPPER")
       {
         padding_before[i] = total_pad / 2;
diff --git a/compiler/mir/src/mir_onnx_importer/ONNXHelpers.cpp b/compiler/mir/src/mir_onnx_importer/ONNXHelpers.cpp
index f3a9d182d..77656cf48 100644
--- a/compiler/mir/src/mir_onnx_importer/ONNXHelpers.cpp
+++ b/compiler/mir/src/mir_onnx_importer/ONNXHelpers.cpp
@@ -166,9 +166,9 @@ mir::Operation *foldConstants(mir::Graph *graph, mir::Operation *op)
   }
 
   bool is_foldable =
-      std::all_of(op->getInputs().begin(), op->getInputs().end(), [](mir::Operation::Output *out) {
-        return out->getNode()->getType() == mir::Operation::Type::constant;
-      });
+    std::all_of(op->getInputs().begin(), op->getInputs().end(), [](mir::Operation::Output *out) {
+      return out->getNode()->getType() == mir::Operation::Type::constant;
+    });
 
   if (!is_foldable)
     return op;
diff --git a/compiler/mir/src/mir_onnx_importer/ONNXImporterImpl.cpp b/compiler/mir/src/mir_onnx_importer/ONNXImporterImpl.cpp
index 8b996244f..6379b6c87 100644
--- a/compiler/mir/src/mir_onnx_importer/ONNXImporterImpl.cpp
+++ b/compiler/mir/src/mir_onnx_importer/ONNXImporterImpl.cpp
@@ -134,7 +134,7 @@ void ONNXImporterImpl::collectUnsupportedOps()
     auto opset = _modelCtx->getDomainOpsetVersion(onnx_node.domain());
 
     NodeConverterRegistry::ConverterFunc converter =
-        NodeConverterRegistry::getInstance().lookup(op_type, opset);
+      NodeConverterRegistry::getInstance().lookup(op_type, opset);
 
     if (converter == nullptr)
       problems_op_set.emplace(op_type, opset);
@@ -176,7 +176,7 @@ void ONNXImporterImpl::createGraphInputs()
       }
 
       auto elem_type = onnxDataTypeToMirDataType(
-          (onnx::TensorProto_DataType)input.type().tensor_type().elem_type());
+        (onnx::TensorProto_DataType)input.type().tensor_type().elem_type());
       mir::TensorType type{elem_type, shape};
       auto *op = _graph->create<mir::ops::InputOp>(type);
       _converterCtx->setOutput(input.name(), op->getOutput(0));
@@ -199,7 +199,7 @@ std::unique_ptr<mir::Graph> ONNXImporterImpl::createIR()
     auto opset = _modelCtx->getDomainOpsetVersion(onnx_node.domain());
     // Get converter
     NodeConverterRegistry::ConverterFunc converter =
-        NodeConverterRegistry::getInstance().lookup(op_type, opset);
+      NodeConverterRegistry::getInstance().lookup(op_type, opset);
     assert(converter != nullptr);
     converter(onnx_node, _converterCtx.get());
   }
diff --git a/compiler/mir/src/mir_onnx_importer/ONNXNodeConverterRegistry.cpp b/compiler/mir/src/mir_onnx_importer/ONNXNodeConverterRegistry.cpp
index a11b18e89..573b41468 100644
--- a/compiler/mir/src/mir_onnx_importer/ONNXNodeConverterRegistry.cpp
+++ b/compiler/mir/src/mir_onnx_importer/ONNXNodeConverterRegistry.cpp
@@ -117,8 +117,8 @@ NodeConverterRegistry::ConverterFunc NodeConverterRegistry::lookup(const std::st
   const VersionMap &conv_map = it->second;
 
   auto res = std::lower_bound(
-      conv_map.crbegin(), conv_map.crend(), opset,
-      [](const VersionMap::value_type &pair, int64_t opset) { return pair.first > opset; });
+    conv_map.crbegin(), conv_map.crend(), opset,
+    [](const VersionMap::value_type &pair, int64_t opset) { return pair.first > opset; });
 
   if (res == conv_map.crend())
   {
diff --git a/compiler/mir/src/mir_onnx_importer/Op/AveragePool.cpp b/compiler/mir/src/mir_onnx_importer/Op/AveragePool.cpp
index 503feffc8..1ee136ea6 100644
--- a/compiler/mir/src/mir_onnx_importer/Op/AveragePool.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/AveragePool.cpp
@@ -40,7 +40,7 @@ void convertAveragePoolV1(const onnx::NodeProto &onnx_node, ConverterContext *co
   constexpr int num_spatial_dims = 2;
 
   const auto strides =
-      getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+    getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
   if (strides.size() != num_spatial_dims)
     throw std::runtime_error("AveragePool: attribute 'strides' has incorrect size.");
 
diff --git a/compiler/mir/src/mir_onnx_importer/Op/BatchNormalization.cpp b/compiler/mir/src/mir_onnx_importer/Op/BatchNormalization.cpp
index 8a6d8cc51..c743ee9e0 100644
--- a/compiler/mir/src/mir_onnx_importer/Op/BatchNormalization.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/BatchNormalization.cpp
@@ -81,7 +81,7 @@ void convertBatchNormalizationV9(const onnx::NodeProto &onnx_node, ConverterCont
 
   if (scale_op == nullptr || mean_op == nullptr || var_op == nullptr)
     throw std::runtime_error(
-        "BatchNormalization: only constant 'scale', 'mean' and 'variance' inputs are supported.");
+      "BatchNormalization: only constant 'scale', 'mean' and 'variance' inputs are supported.");
 
   mir::Tensor<float> scale_accessor(scale_op->getValue());
   mir::Tensor<float> mean_accessor(mean_op->getValue());
diff --git a/compiler/mir/src/mir_onnx_importer/Op/Conv.cpp b/compiler/mir/src/mir_onnx_importer/Op/Conv.cpp
index 7dc6ce818..7d78826a6 100644
--- a/compiler/mir/src/mir_onnx_importer/Op/Conv.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/Conv.cpp
@@ -139,7 +139,7 @@ void convertConvV1(const onnx::NodeProto &onnx_node, ConverterContext *context)
   {
     auto bias = inputs[2];
     bias = createOp<mir::ops::ReshapeOp>(graph, bias, mir::Shape{1, bias->getShape().dim(0), 1, 1})
-               ->getOutput(0);
+             ->getOutput(0);
     result = createOp<mir::ops::AddOp>(graph, result, bias)->getOutput(0);
   }
 
diff --git a/compiler/mir/src/mir_onnx_importer/Op/ConvTranspose.cpp b/compiler/mir/src/mir_onnx_importer/Op/ConvTranspose.cpp
index 3078a1959..ea0b6fa5e 100644
--- a/compiler/mir/src/mir_onnx_importer/Op/ConvTranspose.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/ConvTranspose.cpp
@@ -49,19 +49,19 @@ void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *
   constexpr int num_spatial_dims = 2;
 
   const auto dilations =
-      getAttributeValue(onnx_node, "dilations", std::vector<std::int32_t>(num_spatial_dims, 1));
+    getAttributeValue(onnx_node, "dilations", std::vector<std::int32_t>(num_spatial_dims, 1));
   if (dilations.size() != num_spatial_dims)
     throw std::runtime_error("ConvTranspose: attribute 'dilations' has incorrect size.");
   if (!std::all_of(dilations.cbegin(), dilations.cend(), [](std::int32_t x) { return x == 1; }))
     throw std::runtime_error("ConvTranspose: attribute 'dilations' has unsupported value.");
 
   const auto strides =
-      getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+    getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
   if (strides.size() != num_spatial_dims)
     throw std::runtime_error("ConvTranspose: attribute 'strides' has incorrect size.");
 
-  const auto output_padding = getAttributeValue(onnx_node, "output_padding",
-                                                std::vector<std::int32_t>(num_spatial_dims, 0));
+  const auto output_padding =
+    getAttributeValue(onnx_node, "output_padding", std::vector<std::int32_t>(num_spatial_dims, 0));
   if (output_padding.size() != num_spatial_dims)
     throw std::runtime_error("ConvTranspose: attribute 'output_padding' has incorrect size.");
   if (!std::all_of(output_padding.cbegin(), output_padding.cend(),
@@ -71,8 +71,8 @@ void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *
   // Assuming kernel has IOHW format.
   assert(kernel->getShape().rank() == 4);
   const auto kernel_size = getAttributeValue(
-      onnx_node, "kernel_shape",
-      std::vector<std::int32_t>{kernel->getShape().dim(2), kernel->getShape().dim(3)});
+    onnx_node, "kernel_shape",
+    std::vector<std::int32_t>{kernel->getShape().dim(2), kernel->getShape().dim(3)});
   if (kernel_size.size() != num_spatial_dims)
     throw std::runtime_error("ConvTranspose: attribute 'kernel_shape' has incorrect size.");
 
@@ -92,14 +92,14 @@ void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *
     attributes.strides = strides;
     attributes.data_format = mir::DataFormat::NCHW;
     attributes.padding_type = mir::ops::PaddingType::SameUpper;
-    result = createOp<mir::ops::DeConv2DOp>(graph, input, kernel, attributes, output_shape)
-                 ->getOutput(0);
+    result =
+      createOp<mir::ops::DeConv2DOp>(graph, input, kernel, attributes, output_shape)->getOutput(0);
   }
   else
   {
     // TODO This code was not tested.
     throw std::runtime_error(
-        "ConvTranspose: absence of attribute 'output_shape' is not supported.");
+      "ConvTranspose: absence of attribute 'output_shape' is not supported.");
     std::vector<std::int32_t> padding_before(num_spatial_dims, 0);
     std::vector<std::int32_t> padding_after(num_spatial_dims, 0);
     if (const auto *pads_attr = findAttribute(onnx_node, "pads"))
@@ -128,7 +128,7 @@ void convertConvTransposeV1(const onnx::NodeProto &onnx_node, ConverterContext *
   {
     auto bias = inputs[2];
     bias = createOp<mir::ops::ReshapeOp>(graph, bias, mir::Shape{1, bias->getShape().dim(0), 1, 1})
-               ->getOutput(0);
+             ->getOutput(0);
     result = createOp<mir::ops::AddOp>(graph, result, bias)->getOutput(0);
   }
 
diff --git a/compiler/mir/src/mir_onnx_importer/Op/MaxPool.cpp b/compiler/mir/src/mir_onnx_importer/Op/MaxPool.cpp
index 53e6e1556..6c9ef6621 100644
--- a/compiler/mir/src/mir_onnx_importer/Op/MaxPool.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/MaxPool.cpp
@@ -40,7 +40,7 @@ void convertMaxPoolV1(const onnx::NodeProto &onnx_node, ConverterContext *contex
   constexpr int num_spatial_dims = 2;
 
   const auto strides =
-      getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
+    getAttributeValue(onnx_node, "strides", std::vector<std::int32_t>(num_spatial_dims, 1));
   if (strides.size() != num_spatial_dims)
     throw std::runtime_error("MaxPool: attribute 'strides' has incorrect size.");
 
diff --git a/compiler/mir/src/mir_onnx_importer/Op/ReduceMean.cpp b/compiler/mir/src/mir_onnx_importer/Op/ReduceMean.cpp
index ec43bffb4..9bfe16282 100644
--- a/compiler/mir/src/mir_onnx_importer/Op/ReduceMean.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/ReduceMean.cpp
@@ -52,7 +52,7 @@ void convertReduceMeanV1(const onnx::NodeProto &onnx_node, ConverterContext *con
 
   mir::Graph *graph = context->getGraph();
   auto result =
-      createOp<mir::ops::ReduceMeanOp>(graph, inputs[0], reduce_dims, keep_dims)->getOutput(0);
+    createOp<mir::ops::ReduceMeanOp>(graph, inputs[0], reduce_dims, keep_dims)->getOutput(0);
 
   context->setNodeOutputs(onnx_node, {result});
 }
diff --git a/compiler/mir/src/mir_onnx_importer/Op/Upsample.cpp b/compiler/mir/src/mir_onnx_importer/Op/Upsample.cpp
index 346e22cc2..881ec89d3 100644
--- a/compiler/mir/src/mir_onnx_importer/Op/Upsample.cpp
+++ b/compiler/mir/src/mir_onnx_importer/Op/Upsample.cpp
@@ -52,9 +52,9 @@ void convertUpsampleV1(const onnx::NodeProto &onnx_node, ConverterContext *conte
   scales_vector.at(3) = w_scale;
 
   auto result =
-      createOp<mir::ops::ResizeOp>(graph, inputs[0],
-                                   mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
-          ->getOutput(0);
+    createOp<mir::ops::ResizeOp>(graph, inputs[0],
+                                 mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+      ->getOutput(0);
 
   context->setNodeOutputs(onnx_node, {result});
 }
@@ -74,7 +74,7 @@ void convertUpsampleV7(const onnx::NodeProto &onnx_node, ConverterContext *conte
 
   if (scales_attr->floats_size() != inputs[0]->getShape().rank())
     throw std::runtime_error(
-        "Number of elements of scales should be the same as the rank of input");
+      "Number of elements of scales should be the same as the rank of input");
 
   assert(inputs[0]->getShape().rank() == 4 && "Only rank 4 is supported");
   std::vector<float> scales_vector(4);
@@ -85,9 +85,9 @@ void convertUpsampleV7(const onnx::NodeProto &onnx_node, ConverterContext *conte
   scales_vector.at(3) = scales_attr->floats(3);
 
   auto result =
-      createOp<mir::ops::ResizeOp>(graph, inputs[0],
-                                   mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
-          ->getOutput(0);
+    createOp<mir::ops::ResizeOp>(graph, inputs[0],
+                                 mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+      ->getOutput(0);
 
   context->setNodeOutputs(onnx_node, {result});
 }
@@ -117,9 +117,9 @@ void convertUpsampleV9(const onnx::NodeProto &onnx_node, ConverterContext *conte
     scales_vector[i] = scales_tensor.atOffset(i);
 
   auto result =
-      createOp<mir::ops::ResizeOp>(graph, inputs[0],
-                                   mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
-          ->getOutput(0);
+    createOp<mir::ops::ResizeOp>(graph, inputs[0],
+                                 mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales_vector)
+      ->getOutput(0);
 
   context->setNodeOutputs(onnx_node, {result});
 }
diff --git a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
index 952857c86..6c6c28a32 100644
--- a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
+++ b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers REQUIRED)
+nnas_find_package(FlatBuffers EXACT 2.0 REQUIRED)
 
 if (NOT FlatBuffers_FOUND)
     return()
diff --git a/compiler/mir/src/mir_tflite_importer/tflite_importer.cpp b/compiler/mir/src/mir_tflite_importer/tflite_importer.cpp
index 3f245d2d4..7b91bf0ba 100644
--- a/compiler/mir/src/mir_tflite_importer/tflite_importer.cpp
+++ b/compiler/mir/src/mir_tflite_importer/tflite_importer.cpp
@@ -105,37 +105,37 @@ void TfliteImporter::import()
 }
 
 static const std::set<tflite::BuiltinOperator> supportedOperators = {
-    tflite::BuiltinOperator_ADD,
-    tflite::BuiltinOperator_AVERAGE_POOL_2D,
-    tflite::BuiltinOperator_CONCATENATION,
-    tflite::BuiltinOperator_CONV_2D,
-    tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
-    tflite::BuiltinOperator_DIV,
-    tflite::BuiltinOperator_FULLY_CONNECTED,
-    tflite::BuiltinOperator_HARD_SWISH,
-    tflite::BuiltinOperator_LEAKY_RELU,
-    tflite::BuiltinOperator_LOGISTIC,
-    tflite::BuiltinOperator_MAX_POOL_2D,
-    tflite::BuiltinOperator_MAXIMUM,
-    tflite::BuiltinOperator_MEAN,
-    tflite::BuiltinOperator_MUL,
-    tflite::BuiltinOperator_PAD,
-    tflite::BuiltinOperator_RELU,
-    tflite::BuiltinOperator_RELU6,
-    tflite::BuiltinOperator_RESHAPE,
-    tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
-    tflite::BuiltinOperator_RSQRT,
-    tflite::BuiltinOperator_SHAPE,
-    tflite::BuiltinOperator_SLICE,
-    tflite::BuiltinOperator_SOFTMAX,
-    tflite::BuiltinOperator_SQRT,
-    tflite::BuiltinOperator_SQUARED_DIFFERENCE,
-    tflite::BuiltinOperator_SQUEEZE,
-    tflite::BuiltinOperator_STRIDED_SLICE,
-    tflite::BuiltinOperator_SUB,
-    tflite::BuiltinOperator_TANH,
-    tflite::BuiltinOperator_TRANSPOSE,
-    tflite::BuiltinOperator_TRANSPOSE_CONV,
+  tflite::BuiltinOperator_ADD,
+  tflite::BuiltinOperator_AVERAGE_POOL_2D,
+  tflite::BuiltinOperator_CONCATENATION,
+  tflite::BuiltinOperator_CONV_2D,
+  tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+  tflite::BuiltinOperator_DIV,
+  tflite::BuiltinOperator_FULLY_CONNECTED,
+  tflite::BuiltinOperator_HARD_SWISH,
+  tflite::BuiltinOperator_LEAKY_RELU,
+  tflite::BuiltinOperator_LOGISTIC,
+  tflite::BuiltinOperator_MAX_POOL_2D,
+  tflite::BuiltinOperator_MAXIMUM,
+  tflite::BuiltinOperator_MEAN,
+  tflite::BuiltinOperator_MUL,
+  tflite::BuiltinOperator_PAD,
+  tflite::BuiltinOperator_RELU,
+  tflite::BuiltinOperator_RELU6,
+  tflite::BuiltinOperator_RESHAPE,
+  tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+  tflite::BuiltinOperator_RSQRT,
+  tflite::BuiltinOperator_SHAPE,
+  tflite::BuiltinOperator_SLICE,
+  tflite::BuiltinOperator_SOFTMAX,
+  tflite::BuiltinOperator_SQRT,
+  tflite::BuiltinOperator_SQUARED_DIFFERENCE,
+  tflite::BuiltinOperator_SQUEEZE,
+  tflite::BuiltinOperator_STRIDED_SLICE,
+  tflite::BuiltinOperator_SUB,
+  tflite::BuiltinOperator_TANH,
+  tflite::BuiltinOperator_TRANSPOSE,
+  tflite::BuiltinOperator_TRANSPOSE_CONV,
 };
 
 void TfliteImporter::collectUnsupportedOps()
@@ -268,8 +268,8 @@ void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflit
       outputs = _opCreator->convertConv2D(op->builtin_options.AsConv2DOptions(), inputs);
       break;
     case tflite::BuiltinOperator_DEPTHWISE_CONV_2D:
-      outputs = _opCreator->convertDepthwiseConv2D(op->builtin_options.AsDepthwiseConv2DOptions(),
-                                                   inputs);
+      outputs =
+        _opCreator->convertDepthwiseConv2D(op->builtin_options.AsDepthwiseConv2DOptions(), inputs);
       break;
     case tflite::BuiltinOperator_MAX_POOL_2D:
       outputs = _opCreator->convertMaxPool2D(op->builtin_options.AsPool2DOptions(), inputs);
@@ -279,21 +279,21 @@ void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflit
       break;
     case tflite::BuiltinOperator_CONCATENATION:
       outputs =
-          _opCreator->convertConcatenation(op->builtin_options.AsConcatenationOptions(), inputs);
+        _opCreator->convertConcatenation(op->builtin_options.AsConcatenationOptions(), inputs);
       break;
     case tflite::BuiltinOperator_RESHAPE:
       outputs = _opCreator->convertReshape(op->builtin_options.AsReshapeOptions(), inputs);
       break;
     case tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
       outputs = _opCreator->convertResizeNearestNeighbor(
-          op->builtin_options.AsResizeNearestNeighborOptions(), inputs);
+        op->builtin_options.AsResizeNearestNeighborOptions(), inputs);
       break;
     case tflite::BuiltinOperator_MEAN:
       outputs = _opCreator->convertMean(op->builtin_options.AsReducerOptions(), inputs);
       break;
     case tflite::BuiltinOperator_FULLY_CONNECTED:
       outputs =
-          _opCreator->convertFullyConnected(op->builtin_options.AsFullyConnectedOptions(), inputs);
+        _opCreator->convertFullyConnected(op->builtin_options.AsFullyConnectedOptions(), inputs);
       break;
     case tflite::BuiltinOperator_SOFTMAX:
       outputs = _opCreator->convertSoftmax(op->builtin_options.AsSoftmaxOptions(), inputs);
@@ -333,7 +333,7 @@ void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflit
       break;
     case tflite::BuiltinOperator_TRANSPOSE_CONV:
       outputs =
-          _opCreator->convertTransposeConv(op->builtin_options.AsTransposeConvOptions(), inputs);
+        _opCreator->convertTransposeConv(op->builtin_options.AsTransposeConvOptions(), inputs);
       break;
     case tflite::BuiltinOperator_PAD:
       outputs = _opCreator->convertPad(op->builtin_options.AsPadOptions(), inputs);
@@ -352,7 +352,7 @@ void TfliteImporter::walkOperator(const tflite::SubGraphT *subgraph, const tflit
       break;
     case tflite::BuiltinOperator_STRIDED_SLICE:
       outputs =
-          _opCreator->convertStridedSlice(op->builtin_options.AsStridedSliceOptions(), inputs);
+        _opCreator->convertStridedSlice(op->builtin_options.AsStridedSliceOptions(), inputs);
       break;
     case tflite::BuiltinOperator_LEAKY_RELU:
       outputs = _opCreator->convertLeakyReLU(op->builtin_options.AsLeakyReluOptions(), inputs);
diff --git a/compiler/mir/src/mir_tflite_importer/tflite_op_creator.cpp b/compiler/mir/src/mir_tflite_importer/tflite_op_creator.cpp
index d9f98da55..58425e9a9 100644
--- a/compiler/mir/src/mir_tflite_importer/tflite_op_creator.cpp
+++ b/compiler/mir/src/mir_tflite_importer/tflite_op_creator.cpp
@@ -92,9 +92,9 @@ static void calculatePadding(mir::ops::PaddingType padding_type, const mir::Shap
       {
         // Assuming NHWC format.
         const std::int32_t total_padding =
-            (input_shape.dim(1 + i) % strides[i] == 0)
-                ? std::max(0, window_size[i] - strides[i])
-                : std::max(0, window_size[i] - input_shape.dim(1 + i) % strides[i]);
+          (input_shape.dim(1 + i) % strides[i] == 0)
+            ? std::max(0, window_size[i] - strides[i])
+            : std::max(0, window_size[i] - input_shape.dim(1 + i) % strides[i]);
         padding_before[i] = total_padding / 2;
         padding_after[i] = total_padding - padding_before[i];
       }
@@ -332,7 +332,7 @@ TFLiteOpCreator::convertResizeNearestNeighbor(const tflite::ResizeNearestNeighbo
   Shape res_shape{input_shape.dim(0), size_tensor.at(mir::Index{0}), size_tensor.at(mir::Index{1}),
                   input_shape.dim(3)};
   auto result =
-      createOp<ops::ResizeOp>(input, ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
+    createOp<ops::ResizeOp>(input, ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
   return {result->getOutput(0)};
 }
 
diff --git a/compiler/mir/src/ops/AvgPool2DOp.cpp b/compiler/mir/src/ops/AvgPool2DOp.cpp
index 52b67303f..945917208 100644
--- a/compiler/mir/src/ops/AvgPool2DOp.cpp
+++ b/compiler/mir/src/ops/AvgPool2DOp.cpp
@@ -50,7 +50,7 @@ void AvgPool2DOp::inferOutputTypes()
     //   (in_size - window_size + 1 + stride - 1) / stride =
     //   (in_size - window_size) / stride + 1
     output_shape.dim(spatial_dim_index) =
-        (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
+      (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
   }
 
   setOutputType(0, {getInput(0)->getElementType(), output_shape});
diff --git a/compiler/mir/src/ops/Conv2DOp.cpp b/compiler/mir/src/ops/Conv2DOp.cpp
index 1addc5734..1de73b62d 100644
--- a/compiler/mir/src/ops/Conv2DOp.cpp
+++ b/compiler/mir/src/ops/Conv2DOp.cpp
@@ -54,7 +54,7 @@ void Conv2DOp::inferOutputTypes()
     //   (in_size - kernel_size + 1 + stride - 1) / stride =
     //   (in_size - kernel_size) / stride + 1
     output_shape.dim(spatial_dim_index) =
-        (padded_input - kernel_shape.dim(1 + i)) / _attributes.strides[i] + 1;
+      (padded_input - kernel_shape.dim(1 + i)) / _attributes.strides[i] + 1;
   }
 
   auto dt = getInput(0)->getElementType();
diff --git a/compiler/mir/src/ops/DeConv2DOp.cpp b/compiler/mir/src/ops/DeConv2DOp.cpp
index 35b111bc0..08829d327 100644
--- a/compiler/mir/src/ops/DeConv2DOp.cpp
+++ b/compiler/mir/src/ops/DeConv2DOp.cpp
@@ -36,8 +36,8 @@ void DeConv2DOp::inferPaddings()
   {
     const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
     const std::int32_t total_padding =
-        (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
-        output_shape.dim(spatial_dim_index);
+      (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
+      output_shape.dim(spatial_dim_index);
 
     switch (_attributes.padding_type)
     {
@@ -85,8 +85,8 @@ void DeConv2DOp::inferOutputTypes()
   {
     const int spatial_dim_index = getDataSpatialDimIndex(_attributes.data_format, i);
     output_shape.dim(spatial_dim_index) =
-        (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
-        (_attributes.padding_before.at(i) + _attributes.padding_after.at(i));
+      (input_shape.dim(spatial_dim_index) - 1) * _attributes.strides[i] + kernel_shape.dim(i) -
+      (_attributes.padding_before.at(i) + _attributes.padding_after.at(i));
   }
 
   setOutputType(0, {getInput(0)->getElementType(), output_shape});
diff --git a/compiler/mir/src/ops/DepthwiseConv2DOp.cpp b/compiler/mir/src/ops/DepthwiseConv2DOp.cpp
index 0154bcd09..521d2eb49 100644
--- a/compiler/mir/src/ops/DepthwiseConv2DOp.cpp
+++ b/compiler/mir/src/ops/DepthwiseConv2DOp.cpp
@@ -50,7 +50,7 @@ void DepthwiseConv2DOp::inferOutputTypes()
     //   (in_size - kernel_size + 1 + stride - 1) / stride =
     //   (in_size - kernel_size) / stride + 1
     output_shape.dim(spatial_dim_index) =
-        (padded_input - kernel_shape.dim(i)) / _attributes.strides[i] + 1;
+      (padded_input - kernel_shape.dim(i)) / _attributes.strides[i] + 1;
   }
 
   setOutputType(0, {getInput(0)->getElementType(), output_shape});
diff --git a/compiler/mir/src/ops/MaxPool2DOp.cpp b/compiler/mir/src/ops/MaxPool2DOp.cpp
index 38e72424e..0cb3aa93c 100644
--- a/compiler/mir/src/ops/MaxPool2DOp.cpp
+++ b/compiler/mir/src/ops/MaxPool2DOp.cpp
@@ -50,7 +50,7 @@ void MaxPool2DOp::inferOutputTypes()
     //   (in_size - window_size + 1 + stride - 1) / stride =
     //   (in_size - window_size) / stride + 1
     output_shape.dim(spatial_dim_index) =
-        (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
+      (padded_input - _attributes.window[i]) / _attributes.strides[i] + 1;
   }
 
   setOutputType(0, {getInput(0)->getElementType(), output_shape});
diff --git a/compiler/mir/src/ops/PadOp.cpp b/compiler/mir/src/ops/PadOp.cpp
index 465856d92..38feaccdc 100644
--- a/compiler/mir/src/ops/PadOp.cpp
+++ b/compiler/mir/src/ops/PadOp.cpp
@@ -30,7 +30,7 @@ void PadOp::inferOutputTypes()
   for (int32_t dim = 0; dim < num_dims; ++dim)
   {
     out_shape.dim(dim) =
-        _attributes.padding_before[dim] + input_shape.dim(dim) + _attributes.padding_after[dim];
+      _attributes.padding_before[dim] + input_shape.dim(dim) + _attributes.padding_after[dim];
   }
 
   setOutputType(0, {getInput(0)->getElementType(), out_shape});
diff --git a/compiler/mir/src/ops/TransposeOp.cpp b/compiler/mir/src/ops/TransposeOp.cpp
index 92282e17d..d04cdb4f2 100644
--- a/compiler/mir/src/ops/TransposeOp.cpp
+++ b/compiler/mir/src/ops/TransposeOp.cpp
@@ -22,7 +22,7 @@ namespace ops
 {
 
 TransposeOp::TransposeOp(Output *arg, const std::vector<std::size_t> &axis_order)
-    : Operation(Type::transpose, {arg}), _axis_order(axis_order)
+  : Operation(Type::transpose, {arg}), _axis_order(axis_order)
 {
   assert(_axis_order.size() == static_cast<std::size_t>(getInputShape(0).rank()));
   inferOutputTypes();
@@ -34,7 +34,7 @@ void TransposeOp::inferOutputTypes()
   Shape output_shape(input_shape.rank());
   for (std::size_t i = 0; i < _axis_order.size(); ++i)
     output_shape.dim(static_cast<std::int64_t>(i)) =
-        input_shape.dim(static_cast<int32_t>(_axis_order.at(i)));
+      input_shape.dim(static_cast<int32_t>(_axis_order.at(i)));
 
   setOutputType(0, {getInput(0)->getElementType(), output_shape});
 }
diff --git a/compiler/mir/unittests/ShapeInference.cpp b/compiler/mir/unittests/ShapeInference.cpp
index bae4ec5e2..c902b1e12 100644
--- a/compiler/mir/unittests/ShapeInference.cpp
+++ b/compiler/mir/unittests/ShapeInference.cpp
@@ -80,8 +80,8 @@ TEST(ShapeInferenceTest, ResizeWithScale)
   auto input = g.create<ops::InputOp>(input_type);
 
   auto op =
-      g.create<ops::ResizeOp>(input->getOutput(0), ops::ResizeOp::ResizeMethod::nearestNeighbor,
-                              std::vector<float>{1, 6, 2, 1});
+    g.create<ops::ResizeOp>(input->getOutput(0), ops::ResizeOp::ResizeMethod::nearestNeighbor,
+                            std::vector<float>{1, 6, 2, 1});
 
   ASSERT_EQ(result_shape, op->getOutputShape(0));
 }
diff --git a/compiler/mir/unittests/ShapeRange.cpp b/compiler/mir/unittests/ShapeRange.cpp
index 3b32d0c61..91b1be744 100644
--- a/compiler/mir/unittests/ShapeRange.cpp
+++ b/compiler/mir/unittests/ShapeRange.cpp
@@ -29,7 +29,7 @@ struct ParamType
 
   template <typename... Args>
   explicit ParamType(int32_t actual_len, Args &&... args)
-      : actual_length(actual_len), shape({static_cast<int32_t>(args)...})
+    : actual_length(actual_len), shape({static_cast<int32_t>(args)...})
   {
   }
 };
@@ -56,7 +56,7 @@ TEST_P(ShapeIteratorTest, ElementCount)
 std::vector<ParamType> test_data{ParamType{6, 1, 2, 3}, ParamType{16, 2, 2, 4},
                                  ParamType{1, 1, 1, 1, 1, 1}, ParamType{5, 5, 1, 1, 1, 1, 1}};
 
-INSTANTIATE_TEST_CASE_P(SimpleInput, ShapeIteratorTest, ::testing::ValuesIn(test_data));
+INSTANTIATE_TEST_SUITE_P(SimpleInput, ShapeIteratorTest, ::testing::ValuesIn(test_data));
 
 TEST(ShapeRange, Contains)
 {
diff --git a/compiler/mir2loco/CMakeLists.txt b/compiler/mir2loco/CMakeLists.txt
index a8a096ef4..217f1bd15 100644
--- a/compiler/mir2loco/CMakeLists.txt
+++ b/compiler/mir2loco/CMakeLists.txt
@@ -8,11 +8,11 @@ target_include_directories(mir2loco PUBLIC include)
 target_link_libraries(mir2loco PUBLIC mir)
 target_link_libraries(mir2loco PUBLIC loco)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 GTest_AddTest(mir2loco_test ${TESTS})
 target_link_libraries(mir2loco_test mir2loco)
diff --git a/compiler/mir2loco/src/mir2loco.test.cpp b/compiler/mir2loco/src/mir2loco.test.cpp
index 3870caeb5..244c92aa8 100644
--- a/compiler/mir2loco/src/mir2loco.test.cpp
+++ b/compiler/mir2loco/src/mir2loco.test.cpp
@@ -140,10 +140,10 @@ TEST_F(TestTransformer_mir2loco, Avg_Pool_Test)
 
   loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
   loco::FeatureEncode *encode_node =
-      dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
+    dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
   loco::AvgPool2D *pool_node = dynamic_cast<loco::AvgPool2D *>(loco_graph->nodes()->at(2));
   loco::FeatureDecode *decode_node =
-      dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
+    dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
   loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(4));
 
   ASSERT_NE(pull_node, nullptr);
@@ -188,10 +188,10 @@ TEST_F(TestTransformer_mir2loco, Max_Pool_Test)
 
   loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
   loco::FeatureEncode *encode_node =
-      dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
+    dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(1));
   loco::MaxPool2D *pool_node = dynamic_cast<loco::MaxPool2D *>(loco_graph->nodes()->at(2));
   loco::FeatureDecode *decode_node =
-      dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
+    dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(3));
   loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(4));
 
   ASSERT_NE(pull_node, nullptr);
@@ -273,7 +273,7 @@ TEST_F(TestTransformer_mir2loco, Reshape_Test)
 
   loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
   loco::Reshape<loco::ReshapeType::Fixed> *reshape_node =
-      dynamic_cast<loco::Reshape<loco::ReshapeType::Fixed> *>(loco_graph->nodes()->at(1));
+    dynamic_cast<loco::Reshape<loco::ReshapeType::Fixed> *>(loco_graph->nodes()->at(1));
   loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
 
   ASSERT_NE(pull_node, nullptr);
@@ -383,28 +383,49 @@ TEST_F(TestTransformer_mir2loco, Conv2D_Test)
   auto loco_graph = transformer.transform(&mir_graph);
 
   loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
-  loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
-  loco::FeatureEncode *encode_node =
-      dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2));
-  loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(loco_graph->nodes()->at(3));
-  loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(loco_graph->nodes()->at(4));
-  loco::FeatureDecode *decode_node =
-      dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5));
-  loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(6));
-
   ASSERT_NE(pull_node, nullptr);
+
+  // ConstGen: Only one ConstGen node
+  // We can convince that this node is input of FilterEncode because this is only ConstGen node
+  loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
   ASSERT_NE(const_node, nullptr);
-  ASSERT_NE(filter_node, nullptr);
+
+  // FeatureEncode
+  auto pull_uses = loco::succs(pull_node);
+  ASSERT_EQ(pull_uses.size(), 1);
+  loco::FeatureEncode *encode_node = dynamic_cast<loco::FeatureEncode *>(*pull_uses.begin());
   ASSERT_NE(encode_node, nullptr);
-  ASSERT_NE(conv_node, nullptr);
-  ASSERT_NE(decode_node, nullptr);
-  ASSERT_NE(push_node, nullptr);
   ASSERT_EQ(encode_node->input(), pull_node);
-  ASSERT_EQ(filter_node->input(), const_node);
+
+  // Conv2D
+  auto encode_uses = loco::succs(encode_node);
+  ASSERT_EQ(encode_uses.size(), 1);
+  loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(*encode_uses.begin());
+  ASSERT_NE(conv_node, nullptr);
   ASSERT_EQ(conv_node->ifm(), encode_node);
+
+  // FilterEncode
+  auto const_uses = loco::succs(const_node);
+  ASSERT_EQ(const_uses.size(), 1);
+  loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(*const_uses.begin());
+  ASSERT_NE(filter_node, nullptr);
+  ASSERT_EQ(filter_node->input(), const_node);
   ASSERT_EQ(conv_node->ker(), filter_node);
+
+  // FeatureDecode
+  auto conv_uses = loco::succs(conv_node);
+  ASSERT_EQ(conv_uses.size(), 1);
+  loco::FeatureDecode *decode_node = dynamic_cast<loco::FeatureDecode *>(*conv_uses.begin());
+  ASSERT_NE(decode_node, nullptr);
   ASSERT_EQ(decode_node->input(), conv_node);
+
+  // Push
+  auto decode_uses = loco::succs(decode_node);
+  ASSERT_EQ(decode_uses.size(), 1);
+  loco::Push *push_node = dynamic_cast<loco::Push *>(*decode_uses.begin());
+  ASSERT_NE(push_node, nullptr);
   ASSERT_EQ(push_node->from(), decode_node);
+
   // Check params
   ASSERT_EQ(conv_node->pad()->top(), 5);
   ASSERT_EQ(conv_node->pad()->left(), 9);
@@ -430,7 +451,7 @@ TEST_F(TestTransformer_mir2loco, Softmax_Test)
 
   loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
   loco::TensorSoftmax *softmax_node =
-      dynamic_cast<loco::TensorSoftmax *>(loco_graph->nodes()->at(1));
+    dynamic_cast<loco::TensorSoftmax *>(loco_graph->nodes()->at(1));
   loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(2));
 
   ASSERT_NE(pull_node, nullptr);
@@ -520,7 +541,7 @@ TEST_F(TestTransformer_mir2loco, DepthwiseConv2D_Test)
   attributes.padding_after = {7, 4};
 
   auto *conv =
-      mir_graph.create<mir::ops::DepthwiseConv2DOp>(input, filter, attributes)->getOutput(0);
+    mir_graph.create<mir::ops::DepthwiseConv2DOp>(input, filter, attributes)->getOutput(0);
 
   mir_graph.create<mir::ops::OutputOp>(conv);
   input->setName("x");
@@ -545,7 +566,7 @@ TEST_F(TestTransformer_mir2loco, DepthwiseConv2D_Test)
   loco::DepthwiseConv2D *dw_conv_node = dynamic_cast<loco::DepthwiseConv2D *>(*encode_uses.begin());
   ASSERT_NE(dw_conv_node, nullptr);
   loco::DepthwiseFilterEncode *filter_node =
-      dynamic_cast<loco::DepthwiseFilterEncode *>(dw_conv_node->ker());
+    dynamic_cast<loco::DepthwiseFilterEncode *>(dw_conv_node->ker());
   ASSERT_NE(filter_node, nullptr);
   ASSERT_EQ(dw_conv_node->ifm(), encode_node);
   // Check params
@@ -611,7 +632,7 @@ TEST_F(TestTransformer_mir2loco, DeConv2D_Test)
   auto encode_uses = loco::succs(encode_node);
   ASSERT_EQ(encode_uses.size(), 1);
   loco::TransposedConv2D *tr_conv_node =
-      dynamic_cast<loco::TransposedConv2D *>(*encode_uses.begin());
+    dynamic_cast<loco::TransposedConv2D *>(*encode_uses.begin());
   ASSERT_NE(tr_conv_node, nullptr);
   loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(tr_conv_node->ker());
   ASSERT_NE(filter_node, nullptr);
@@ -703,8 +724,8 @@ TEST_F(TestTransformer_mir2loco, Transpose_Test)
   mir::TensorType input_type{mir::DataType::FLOAT32, {2, 7, 9, 5}};
   auto *input = mir_graph.create<mir::ops::InputOp>(input_type)->getOutput(0);
   auto *transpose =
-      mir_graph.create<mir::ops::TransposeOp>(input, std::vector<std::size_t>{3, 0, 1, 2})
-          ->getOutput(0);
+    mir_graph.create<mir::ops::TransposeOp>(input, std::vector<std::size_t>{3, 0, 1, 2})
+      ->getOutput(0);
   mir_graph.create<mir::ops::OutputOp>(transpose);
   input->setName("x");
   transpose->setName("y");
diff --git a/compiler/moco-log/CMakeLists.txt b/compiler/moco-log/CMakeLists.txt
index 036b4e74b..af6052d0c 100644
--- a/compiler/moco-log/CMakeLists.txt
+++ b/compiler/moco-log/CMakeLists.txt
@@ -5,5 +5,4 @@ add_library(moco_log SHARED ${SOURCES})
 target_include_directories(moco_log PUBLIC include)
 target_link_libraries(moco_log PUBLIC hermes)
 target_link_libraries(moco_log PRIVATE hermes_std)
-target_link_libraries(moco_log PRIVATE stdex)
 install(TARGETS moco_log DESTINATION lib)
diff --git a/compiler/moco-log/src/LoggingContext.cpp b/compiler/moco-log/src/LoggingContext.cpp
index a004e1d3d..c75e5e21f 100644
--- a/compiler/moco-log/src/LoggingContext.cpp
+++ b/compiler/moco-log/src/LoggingContext.cpp
@@ -18,7 +18,8 @@
 #include "moco/Log.h"
 
 #include <hermes/ConsoleReporter.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace moco
 {
@@ -30,8 +31,8 @@ hermes::Context *LoggingContext::get(void)
   if (ctx == nullptr)
   {
     ctx = new hermes::Context;
-    ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-    ctx->config(stdex::make_unique<LoggerConfig>());
+    ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+    ctx->config(std::make_unique<LoggerConfig>());
   }
 
   return ctx;
diff --git a/compiler/moco-tf/CMakeLists.txt b/compiler/moco-tf/CMakeLists.txt
index 5516388a4..95669264f 100644
--- a/compiler/moco-tf/CMakeLists.txt
+++ b/compiler/moco-tf/CMakeLists.txt
@@ -19,7 +19,6 @@ target_link_libraries(moco_tf_frontend PRIVATE moco_support)
 target_link_libraries(moco_tf_frontend PRIVATE bino)
 target_link_libraries(moco_tf_frontend PRIVATE fipe)
 target_link_libraries(moco_tf_frontend PRIVATE locop)
-target_link_libraries(moco_tf_frontend PRIVATE stdex)
 target_link_libraries(moco_tf_frontend PRIVATE moco_log)
 target_link_libraries(moco_tf_frontend PRIVATE pepper_str)
 target_link_libraries(moco_tf_frontend PRIVATE pepper_strcast)
@@ -27,6 +26,7 @@ target_link_libraries(moco_tf_frontend PRIVATE locomotiv)
 target_link_libraries(moco_tf_frontend PRIVATE plier_tf)
 target_link_libraries(moco_tf_frontend PRIVATE locoex_customop)
 target_link_libraries(moco_tf_frontend PRIVATE logo)
+target_link_libraries(moco_tf_frontend PRIVATE logo_ex)
 target_link_libraries(moco_tf_frontend PRIVATE oops)
 install(TARGETS moco_tf_frontend DESTINATION lib)
 
@@ -44,8 +44,8 @@ target_link_libraries(moco_tf_frontend_test fipe)
 target_link_libraries(moco_tf_frontend_test locop)
 target_link_libraries(moco_tf_frontend_test moco_log)
 target_link_libraries(moco_tf_frontend_test moco_tf_frontend)
-target_link_libraries(moco_tf_frontend_test stdex)
 target_link_libraries(moco_tf_frontend_test plier_tf)
 target_link_libraries(moco_tf_frontend_test locoex_customop)
 target_link_libraries(moco_tf_frontend_test logo)
+target_link_libraries(moco_tf_frontend_test logo_ex)
 add_test(moco_tf_frontend_test moco_tf_frontend_test)
diff --git a/compiler/moco-tf/requires.cmake b/compiler/moco-tf/requires.cmake
index 3e0fabee9..71755556c 100644
--- a/compiler/moco-tf/requires.cmake
+++ b/compiler/moco-tf/requires.cmake
@@ -2,7 +2,6 @@ require("fipe")
 require("loco")
 require("moco")
 require("locop")
-require("stdex")
 require("moco-log")
 require("pepper-strcast")
 require("locomotiv")
@@ -10,5 +9,6 @@ require("mio-tf")
 require("plier-tf")
 require("locoex-customop")
 require("logo")
+require("logo-ex")
 require("oops")
 require("bino")
diff --git a/compiler/moco-tf/src/BroadcastHelper.h b/compiler/moco-tf/src/BroadcastHelper.h
index 6238ad269..d4e1bba55 100644
--- a/compiler/moco-tf/src/BroadcastHelper.h
+++ b/compiler/moco-tf/src/BroadcastHelper.h
@@ -65,7 +65,7 @@ private:
  * This mimics "tf.broadcast_to" API in TensorFlow.
  */
 static inline auto broadcast_to(const loco::TensorShape &shape)
-    -> decltype(bino::transform_both(std::declval<BroadcastFunctor>()))
+  -> decltype(bino::transform_both(std::declval<BroadcastFunctor>()))
 {
   return bino::transform_both(BroadcastFunctor{shape});
 }
diff --git a/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp
index b59a3f3d7..71f6230b7 100644
--- a/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/ConcatV2Canonicalizer.cpp
@@ -24,7 +24,6 @@
 
 #include <loco/Service/ShapeInference.h>
 
-#include <stdex/Memory.h>
 #include <oops/UserExn.h>
 
 namespace
diff --git a/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp
index d3cbd4ab3..1d3343933 100644
--- a/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/Conv2DBackpropInputCanonicalizer.cpp
@@ -32,7 +32,7 @@ using plier::tf::DataLayout;
 
 void set_filter_enc(loco::FilterEncode *filter_enc)
 {
-  auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+  auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
 
   // In TensorFlow, Conv2dBackpropInput's filter is a 4-D tensor of following shape:
   // [filter_height, filter_width, out_channels, in_channels] or HWOI or HWNC (in/out in loco sense)
@@ -163,9 +163,9 @@ loco::Padding2D Padding2DInference::operator()(void)
     // 'tight fit' output. When output size (set by 'input sizes' node input) is
     // larger than tight fit, extra spaces filled with zero.
     auto tight_output_vertical = tight_output_for_valid_padding(
-        input().vertical.value(), stride().vertical(), window().vertical());
+      input().vertical.value(), stride().vertical(), window().vertical());
     auto tight_output_horizontal = tight_output_for_valid_padding(
-        input().horizontal.value(), stride().horizontal(), window().horizontal());
+      input().horizontal.value(), stride().horizontal(), window().horizontal());
 
     if (output().vertical.value() < tight_output_vertical or
         output().horizontal.value() < tight_output_horizontal)
@@ -191,8 +191,8 @@ loco::Padding2D Padding2DInference::operator()(void)
     auto whole_pad_vertical = padding_needed(input().vertical.value(), output().vertical.value(),
                                              stride().vertical(), window().vertical());
     auto whole_pad_horizontal =
-        padding_needed(input().horizontal.value(), output().horizontal.value(),
-                       stride().horizontal(), window().horizontal());
+      padding_needed(input().horizontal.value(), output().horizontal.value(), stride().horizontal(),
+                     window().horizontal());
 
     loco::Padding2D res;
 
diff --git a/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp
index a955793a8..30f01cdd3 100644
--- a/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/Conv2DCanonicalizer.cpp
@@ -29,7 +29,7 @@ using plier::tf::DataLayout;
 
 void set_filter_enc(loco::FilterEncode *filter_enc)
 {
-  auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
+  auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Filter>>();
 
   // In TensorFlow, conv2d filter is a 4-D tensor of following shape:
   // [filter_height, filter_width, in_channels, out_channels] -> HWIO (HWCN)
diff --git a/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp
index 50dddf637..dd04c2427 100644
--- a/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/DepthwiseConv2dNativeCanonicalizer.cpp
@@ -30,7 +30,7 @@ using plier::tf::DataLayout;
 
 void set_filter_enc(loco::DepthwiseFilterEncode *filter_enc)
 {
-  auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>();
+  auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>();
 
   // In TensorFlow, depthwiseconv2dnative filter is a 4-D tensor of following shape:
   // [filter_height, filter_width, in_channels, channel_multiplier] -> HWCM
@@ -47,28 +47,28 @@ bool canonicalize_depthwiseconv2dnative(loco::Graph *graph, moco::TFDepthwiseCon
   LOGGER(l);
 
   /**
- * @note This will replace TFDepthwiseConv2dNative node with Canonical FeatureEncode +
- *       DepthwiseFilterEncode + DepthwiseConv2D + FeatureDecode
- *
- *       Before
- *              A -+- TFDepthwiseConv2dNative - C
- *                 |
- *              B -+
- *
- *       After
- *
- *            A -+ FeatureEncode ----------------+- DepthwiseConv2D - FeatureDecode - C
- *               |                               |
- *               +-(TFDepthwiseConv2dNative)     |
- *               |                               |
- *            B -+ DepthwiseFilterEncode --------+
- *
- *       Where
- *                 A : ifm of TFDepthwiseConv2dNative
- *                 B : ker of TFDepthwiseConv2dNative
- *                 C : a node that uses TFDepthwiseConv2dNative as an input
- *                 TFDepthwiseConv2dNative is disconnected from other nodes
- */
+   * @note This will replace TFDepthwiseConv2dNative node with Canonical FeatureEncode +
+   *       DepthwiseFilterEncode + DepthwiseConv2D + FeatureDecode
+   *
+   *       Before
+   *              A -+- TFDepthwiseConv2dNative - C
+   *                 |
+   *              B -+
+   *
+   *       After
+   *
+   *            A -+ FeatureEncode ----------------+- DepthwiseConv2D - FeatureDecode - C
+   *               |                               |
+   *               +-(TFDepthwiseConv2dNative)     |
+   *               |                               |
+   *            B -+ DepthwiseFilterEncode --------+
+   *
+   *       Where
+   *                 A : ifm of TFDepthwiseConv2dNative
+   *                 B : ker of TFDepthwiseConv2dNative
+   *                 C : a node that uses TFDepthwiseConv2dNative as an input
+   *                 TFDepthwiseConv2dNative is disconnected from other nodes
+   */
 
   INFO(l) << "TFNodeCanonicalize TFDepthwiseConv2dNative begin";
 
diff --git a/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp
index 36136aed4..28ecc3fc0 100644
--- a/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/PadCanonicalizer.cpp
@@ -20,8 +20,6 @@
 
 #include "loco/Service/TypeInference.h"
 
-#include <stdex/Memory.h>
-
 namespace
 {
 
diff --git a/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp
index c53a880a8..1179ef7f6 100644
--- a/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/Relu6Canonicalizer.cpp
@@ -18,8 +18,6 @@
 
 #include <moco/IR/TFDialect.h>
 
-#include <stdex/Memory.h>
-
 namespace
 {
 
diff --git a/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp
index 7965dc931..bb2a71bc0 100644
--- a/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/ReluCanonicalizer.cpp
@@ -18,8 +18,6 @@
 
 #include <moco/IR/TFDialect.h>
 
-#include <stdex/Memory.h>
-
 namespace
 {
 
diff --git a/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp
index c31dbf6d6..25eae6288 100644
--- a/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/RsqrtCanonicalizer.cpp
@@ -23,7 +23,6 @@
 
 #include <loco/Service/TypeInference.h>
 
-#include <stdex/Memory.h>
 #include <oops/UserExn.h>
 
 namespace
diff --git a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp
index 98af7b693..9fcb76c2a 100644
--- a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.cpp
@@ -31,16 +31,16 @@ bool canonicalize_softmax(loco::Graph *graph, moco::TFSoftmax *node)
   INFO(l) << "TFNodeCanonicalize TFSoftmax begin";
 
   /**
-  * This will replace shape inferred TFSoftmax node into canonical TensorSoftmax
-  *
-  * Before
-  *           In ---- TFSoftmax ---- Out(s)
-  *
-  * After
-  *             ------ TFSoftmax
-  *            /
-  *           In ---- TensorSoftmax ----- Out(s)
-  */
+   * This will replace shape inferred TFSoftmax node into canonical TensorSoftmax
+   *
+   * Before
+   *           In ---- TFSoftmax ---- Out(s)
+   *
+   * After
+   *             ------ TFSoftmax
+   *            /
+   *           In ---- TensorSoftmax ----- Out(s)
+   */
 
   auto nodeshape = moco::node_shape(node);
   // Canonicalization into TensorSoftmax is valid when softmax has shape info
diff --git a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h
index ebaf04cfe..33fc14a6d 100644
--- a/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h
+++ b/compiler/moco-tf/src/Canonicalization/SoftmaxCanonicalizer.h
@@ -15,7 +15,7 @@
  */
 
 #ifndef __MOCO_TF_SOFTMAX_CANONICALIZER_H__
-#define __MOCO_TF_SOFTMAx_CANONICALIZER_H__
+#define __MOCO_TF_SOFTMAX_CANONICALIZER_H__
 
 #include "Transform.h"
 #include "SimpleNodeTransform.h"
@@ -30,8 +30,8 @@ namespace tf
 {
 
 /**
-* @brief Canonicalize TF-dialect TFSoftmax into canonical Softmax node
-*/
+ * @brief Canonicalize TF-dialect TFSoftmax into canonical Softmax node
+ */
 class SoftmaxCanonicalizer : public SimpleNodeTransform<moco::TFSoftmax>
 {
 public:
diff --git a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp
index 574fa3993..47ac40ea8 100644
--- a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.cpp
@@ -30,16 +30,16 @@ bool canonicalize_stopgradient(loco::Graph *graph, moco::TFStopGradient *node)
   INFO(l) << "TFNodeCanonicalize TFStopGradient begin";
 
   /**
-  * This will replace shape inferred TFStopGradient node into canonical Forward
-  *
-  * Before
-  *           In --- TFStopGradient --- Out(s)
-  *
-  * After
-  *               -- TFStopGradient
-  *              /
-  *           In --- Forward --- Out(s)
-  */
+   * This will replace shape inferred TFStopGradient node into canonical Forward
+   *
+   * Before
+   *           In --- TFStopGradient --- Out(s)
+   *
+   * After
+   *               -- TFStopGradient
+   *              /
+   *           In --- Forward --- Out(s)
+   */
 
   // Create loco node to replace
   auto forward_node = graph->nodes()->create<loco::Forward>();
diff --git a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h
index 6a17728a6..8346914c0 100644
--- a/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h
+++ b/compiler/moco-tf/src/Canonicalization/StopGradientCanonicalizer.h
@@ -30,8 +30,8 @@ namespace tf
 {
 
 /**
-* @brief Canonicalize TF-dialect TFStopGradient into canonical Forward node
-*/
+ * @brief Canonicalize TF-dialect TFStopGradient into canonical Forward node
+ */
 class StopGradientCanonicalizer : public SimpleNodeTransform<moco::TFStopGradient>
 {
 public:
diff --git a/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp
index 081e0e5f9..3adf1733c 100644
--- a/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/TFPushCanonicalizer.cpp
@@ -18,8 +18,6 @@
 
 #include <moco/IR/TFDialect.h>
 
-#include <stdex/Memory.h>
-
 namespace
 {
 
diff --git a/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp b/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp
index 3f48a50fc..3b6e3c90c 100644
--- a/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalization/TanhCanonicalizer.cpp
@@ -18,8 +18,6 @@
 
 #include <moco/IR/TFDialect.h>
 
-#include <stdex/Memory.h>
-
 namespace
 {
 
diff --git a/compiler/moco-tf/src/Canonicalizer.cpp b/compiler/moco-tf/src/Canonicalizer.cpp
index 04bc7c57a..8e23d91df 100644
--- a/compiler/moco-tf/src/Canonicalizer.cpp
+++ b/compiler/moco-tf/src/Canonicalizer.cpp
@@ -56,8 +56,7 @@
 
 #include <logo/Phase.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 
 namespace
@@ -92,41 +91,41 @@ void Canonicalizer::canonicalize(loco::Graph *g) const
 
   /* TRANSFORM DECLARATION BEGIN */
   // Run shape and type inference at the top
-  phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
-  phase.emplace_back(stdex::make_unique<TypeInferencePass>());
+  phase.emplace_back(std::make_unique<ShapeInferencePass>());
+  phase.emplace_back(std::make_unique<TypeInferencePass>());
 
-  phase.emplace_back(stdex::make_unique<AddCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<AvgPoolCanonicalizer>());
+  phase.emplace_back(std::make_unique<AddCanonicalizer>());
+  phase.emplace_back(std::make_unique<AvgPoolCanonicalizer>());
   if (moco::tf::get<moco::tf::Knob::CanonicalizeBiasAdd>())
-    phase.emplace_back(stdex::make_unique<BiasAddCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<ConcatV2Canonicalizer>());
+    phase.emplace_back(std::make_unique<BiasAddCanonicalizer>());
+  phase.emplace_back(std::make_unique<ConcatV2Canonicalizer>());
   if (moco::tf::get<moco::tf::Knob::CanonicalizeConst>())
-    phase.emplace_back(stdex::make_unique<ConstCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<Conv2DBackpropInputCanonicalizer>());
+    phase.emplace_back(std::make_unique<ConstCanonicalizer>());
+  phase.emplace_back(std::make_unique<Conv2DBackpropInputCanonicalizer>());
   if (moco::tf::get<moco::tf::Knob::CanonicalizeConv2D>())
-    phase.emplace_back(stdex::make_unique<Conv2DCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<DepthwiseConv2dNativeCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<IdentityCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<MaximumCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<MaxPoolCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<MeanCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<MulCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<PadCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<PlaceholderCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<RealDivCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<ReluCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<Relu6Canonicalizer>());
-  phase.emplace_back(stdex::make_unique<ReshapeCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<RsqrtCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<SoftmaxCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<SqrtCanonicalizer>());
+    phase.emplace_back(std::make_unique<Conv2DCanonicalizer>());
+  phase.emplace_back(std::make_unique<DepthwiseConv2dNativeCanonicalizer>());
+  phase.emplace_back(std::make_unique<IdentityCanonicalizer>());
+  phase.emplace_back(std::make_unique<MaximumCanonicalizer>());
+  phase.emplace_back(std::make_unique<MaxPoolCanonicalizer>());
+  phase.emplace_back(std::make_unique<MeanCanonicalizer>());
+  phase.emplace_back(std::make_unique<MulCanonicalizer>());
+  phase.emplace_back(std::make_unique<PadCanonicalizer>());
+  phase.emplace_back(std::make_unique<PlaceholderCanonicalizer>());
+  phase.emplace_back(std::make_unique<RealDivCanonicalizer>());
+  phase.emplace_back(std::make_unique<ReluCanonicalizer>());
+  phase.emplace_back(std::make_unique<Relu6Canonicalizer>());
+  phase.emplace_back(std::make_unique<ReshapeCanonicalizer>());
+  phase.emplace_back(std::make_unique<RsqrtCanonicalizer>());
+  phase.emplace_back(std::make_unique<SoftmaxCanonicalizer>());
+  phase.emplace_back(std::make_unique<SqrtCanonicalizer>());
   // NOTE SquaredDifference is handled in ResolveSquaredDifference
-  phase.emplace_back(stdex::make_unique<SqueezeCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<StopGradientCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<SubCanonicalizer>());
-  phase.emplace_back(stdex::make_unique<TanhCanonicalizer>());
+  phase.emplace_back(std::make_unique<SqueezeCanonicalizer>());
+  phase.emplace_back(std::make_unique<StopGradientCanonicalizer>());
+  phase.emplace_back(std::make_unique<SubCanonicalizer>());
+  phase.emplace_back(std::make_unique<TanhCanonicalizer>());
   // For virtual nodes
-  phase.emplace_back(stdex::make_unique<TFPushCanonicalizer>());
+  phase.emplace_back(std::make_unique<TFPushCanonicalizer>());
   /* TRANSFORM DECLARATION END */
 
   ProgressReporter prog(g, logo::PhaseStrategy::Restart);
diff --git a/compiler/moco-tf/src/CodecHelper.h b/compiler/moco-tf/src/CodecHelper.h
index 85e4e2164..a4ca8d5ca 100644
--- a/compiler/moco-tf/src/CodecHelper.h
+++ b/compiler/moco-tf/src/CodecHelper.h
@@ -18,7 +18,8 @@
 #define __CODEC_HELPER_H__
 
 #include <plier/tf/Convert.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -27,7 +28,7 @@ using plier::tf::DataLayout;
 
 void set_feature_enc(loco::FeatureEncode *feature_enc, DataLayout data_layout)
 {
-  auto enc = stdex::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
+  auto enc = std::make_unique<loco::PermutingEncoder<loco::Domain::Feature>>();
 
   if (data_layout == DataLayout::NHWC)
   {
@@ -49,7 +50,7 @@ void set_feature_enc(loco::FeatureEncode *feature_enc, DataLayout data_layout)
 
 void set_feature_dec(loco::FeatureDecode *feature_dec, DataLayout data_layout)
 {
-  auto dec = stdex::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
+  auto dec = std::make_unique<loco::PermutingDecoder<loco::Domain::Feature>>();
 
   if (data_layout == DataLayout::NHWC)
   {
diff --git a/compiler/moco-tf/src/Frontend.cpp b/compiler/moco-tf/src/Frontend.cpp
index a17d5dd0e..0d5250b17 100644
--- a/compiler/moco-tf/src/Frontend.cpp
+++ b/compiler/moco-tf/src/Frontend.cpp
@@ -31,13 +31,13 @@
 
 #include <loco/Service/ShapeInference.h>
 
-#include <stdex/Memory.h>
 #include <oops/UserExn.h>
 
 #include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/text_format.h>
 
+#include <memory>
 #include <iostream>
 #include <sstream>
 #include <fstream>
@@ -157,7 +157,7 @@ moco::GraphBuilderRegistry make_graph_builder_registry(const moco::ModelSignatur
   for (const auto &custom_op : sig.customops())
   {
     std::unique_ptr<moco::tf::COpCallGraphBuilder> builder =
-        stdex::make_unique<moco::tf::COpCallGraphBuilder>(&sig);
+      std::make_unique<moco::tf::COpCallGraphBuilder>(&sig);
     registry.add(custom_op, std::move(builder));
   }
 
@@ -243,7 +243,7 @@ std::unique_ptr<loco::Graph> Frontend::import(const ModelSignature &signature,
     auto input = graph->inputs()->at(n);
     auto input_node = moco::placeholder_node(graph.get(), n);
     assert(input_node != nullptr);
-    input->shape(stdex::make_unique<loco::TensorShape>(tensor_shape(input_node)));
+    input->shape(std::make_unique<loco::TensorShape>(tensor_shape(input_node)));
   }
 
   for (uint32_t n = 0; n < graph->outputs()->size(); ++n)
@@ -251,7 +251,7 @@ std::unique_ptr<loco::Graph> Frontend::import(const ModelSignature &signature,
     auto output = graph->outputs()->at(n);
     auto output_node = moco::push_node(graph.get(), n);
     assert(output_node != nullptr);
-    output->shape(stdex::make_unique<loco::TensorShape>(::tensor_shape(output_node)));
+    output->shape(std::make_unique<loco::TensorShape>(::tensor_shape(output_node)));
   }
 
   // Convert graph to hold only Canonical dialect
diff --git a/compiler/moco-tf/src/Knob.cpp b/compiler/moco-tf/src/Knob.cpp
index 0e1c7e0ea..a13895f68 100644
--- a/compiler/moco-tf/src/Knob.cpp
+++ b/compiler/moco-tf/src/Knob.cpp
@@ -109,12 +109,12 @@ namespace moco
 namespace tf
 {
 
-#define KNOB_BOOL(NAME, DEFAULT, DESC)                                                           \
-  template <> typename KnobTrait<Knob::NAME>::ValueType get<Knob::NAME>(void)                    \
-  {                                                                                              \
-    static typename KnobTrait<Knob::NAME>::ValueType value =                                     \
-        ::knob_load<typename KnobTrait<Knob::NAME>::ValueType>(::knob_loader(), #NAME, DEFAULT); \
-    return value;                                                                                \
+#define KNOB_BOOL(NAME, DEFAULT, DESC)                                                         \
+  template <> typename KnobTrait<Knob::NAME>::ValueType get<Knob::NAME>(void)                  \
+  {                                                                                            \
+    static typename KnobTrait<Knob::NAME>::ValueType value =                                   \
+      ::knob_load<typename KnobTrait<Knob::NAME>::ValueType>(::knob_loader(), #NAME, DEFAULT); \
+    return value;                                                                              \
   }
 #include "Knob.lst"
 #undef KNOB_BOOL
diff --git a/compiler/moco-tf/src/LogHelper.cpp b/compiler/moco-tf/src/LogHelper.cpp
index 92ff75569..6b127020a 100644
--- a/compiler/moco-tf/src/LogHelper.cpp
+++ b/compiler/moco-tf/src/LogHelper.cpp
@@ -74,7 +74,7 @@ namespace tf
 
 FormattedGraph fmt(loco::Graph *g)
 {
-  auto node_summary_builder = stdex::make_unique<TFNodeSummaryBuilderFactory>();
+  auto node_summary_builder = std::make_unique<TFNodeSummaryBuilderFactory>();
   return std::move(locop::fmt<locop::LinearV1>(g).with(std::move(node_summary_builder)));
 }
 
diff --git a/compiler/moco-tf/src/Op/COpCall.cpp b/compiler/moco-tf/src/Op/COpCall.cpp
index 801196f0f..af4bc9dc4 100644
--- a/compiler/moco-tf/src/Op/COpCall.cpp
+++ b/compiler/moco-tf/src/Op/COpCall.cpp
@@ -23,9 +23,9 @@
 #include <moco/Names.h>
 #include <moco/tf/Frontend.h>
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <oops/UserExn.h>
 
+#include <memory>
 #include <vector>
 #include <cassert>
 #include <stdexcept>
@@ -37,7 +37,7 @@ class COpCallGraphUpdate final : public moco::GraphUpdate
 {
 public:
   COpCallGraphUpdate(locoex::COpCall *node, const std::vector<moco::TensorName> &input_names)
-      : _node(node), _input_names(input_names)
+    : _node(node), _input_names(input_names)
   {
   }
 
@@ -94,11 +94,11 @@ void COpCallGraphBuilder::build(const tensorflow::NodeDef &tf_node,
 
       if (val.value_case() == tensorflow::AttrValue::kF)
       {
-        call_node->attr(name, stdex::make_unique<locoex::COpAttrFloat>(val.f()));
+        call_node->attr(name, std::make_unique<locoex::COpAttrFloat>(val.f()));
       }
       else if (val.value_case() == tensorflow::AttrValue::kI)
       {
-        call_node->attr(name, stdex::make_unique<locoex::COpAttrInt>(val.i()));
+        call_node->attr(name, std::make_unique<locoex::COpAttrInt>(val.i()));
       }
       // TODO define more types
       else
@@ -118,7 +118,7 @@ void COpCallGraphBuilder::build(const tensorflow::NodeDef &tf_node,
   {
     input_names.emplace_back(TensorName(tf_node.input(i)));
   }
-  auto update = stdex::make_unique<COpCallGraphUpdate>(call_node, input_names);
+  auto update = std::make_unique<COpCallGraphUpdate>(call_node, input_names);
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco-tf/src/Op/COpCall.h b/compiler/moco-tf/src/Op/COpCall.h
index 0bb8a93c9..2f0ee1e36 100644
--- a/compiler/moco-tf/src/Op/COpCall.h
+++ b/compiler/moco-tf/src/Op/COpCall.h
@@ -32,7 +32,9 @@ namespace tf
 class COpCallGraphBuilder final : public GraphBuilder
 {
 public:
-  COpCallGraphBuilder(const ModelSignature *signature) : _signature(signature) { /* empty */}
+  COpCallGraphBuilder(const ModelSignature *signature) : _signature(signature)
+  { /* empty */
+  }
   bool validate(const tensorflow::NodeDef &) const override;
   void build(const tensorflow::NodeDef &, GraphBuilderContext *) const override;
 
diff --git a/compiler/moco-tf/src/Op/COpCall.test.cpp b/compiler/moco-tf/src/Op/COpCall.test.cpp
index f13118292..7e1ffa954 100644
--- a/compiler/moco-tf/src/Op/COpCall.test.cpp
+++ b/compiler/moco-tf/src/Op/COpCall.test.cpp
@@ -27,10 +27,11 @@
 
 #include <loco.h>
 #include <plier/tf/TestHelper.h>
-#include <stdex/Memory.h>
 
 #include <gtest/gtest.h>
 
+#include <memory>
+
 using namespace moco::tf::test;
 
 namespace
@@ -91,7 +92,7 @@ TEST(Call_Test, Call_01)
 
   // import
   moco::GraphBuilderRegistry registry{&moco::GraphBuilderRegistry::get()};
-  registry.add("new_custom_op", stdex::make_unique<moco::tf::COpCallGraphBuilder>(&signature));
+  registry.add("new_custom_op", std::make_unique<moco::tf::COpCallGraphBuilder>(&signature));
 
   moco::Importer importer(&registry);
   std::unique_ptr<loco::Graph> graph = importer.import(signature, graph_def);
diff --git a/compiler/moco-tf/src/Optimizer.cpp b/compiler/moco-tf/src/Optimizer.cpp
index f33b4109b..51e1e1c4f 100644
--- a/compiler/moco-tf/src/Optimizer.cpp
+++ b/compiler/moco-tf/src/Optimizer.cpp
@@ -22,7 +22,7 @@
 
 #include <logo/Phase.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace moco
 {
@@ -35,48 +35,48 @@ void Optimizer::optimize(loco::Graph *g) const
 
   /* TRANSFORM DECLARATION BEGIN */
   // Shape inference is required for ResolveRedundantReshape
-  phase.emplace_back(stdex::make_unique<ShapeInferencePass>());
+  phase.emplace_back(std::make_unique<ShapeInferencePass>());
 
   if (moco::tf::get<moco::tf::Knob::ConstantFolding>())
   {
-    phase.emplace_back(stdex::make_unique<logo::ConstantFoldingPass>());
+    phase.emplace_back(std::make_unique<logo::ConstantFoldingPass>());
   }
 
   if (moco::tf::get<moco::tf::Knob::RemoveDeadNode>())
   {
-    phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
   }
 
   if (moco::tf::get<moco::tf::Knob::ReorderDecode>() &&
       moco::tf::get<moco::tf::Knob::ReorderDecodeTensorBiasAdd>())
   {
-    phase.emplace_back(stdex::make_unique<logo::ReorderDecodePass<loco::TensorBiasAdd>>());
+    phase.emplace_back(std::make_unique<logo::ReorderDecodePass<loco::TensorBiasAdd>>());
   }
 
   if (moco::tf::get<moco::tf::Knob::ReorderDecode>() &&
       moco::tf::get<moco::tf::Knob::ReorderDecodeReLU>())
   {
-    phase.emplace_back(stdex::make_unique<logo::ReorderDecodePass<loco::ReLU>>());
+    phase.emplace_back(std::make_unique<logo::ReorderDecodePass<loco::ReLU>>());
   }
 
   if (moco::tf::get<moco::tf::Knob::SimplifyDomainConversion>())
   {
-    phase.emplace_back(stdex::make_unique<logo::SimplifyDomainConversionPass>());
+    phase.emplace_back(std::make_unique<logo::SimplifyDomainConversionPass>());
   }
 
   if (moco::tf::get<moco::tf::Knob::RemoveForwardNode>())
   {
-    phase.emplace_back(stdex::make_unique<logo::RemoveForwardNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveForwardNodePass>());
   }
 
   if (moco::tf::get<moco::tf::Knob::ResolveDuplicateReshape>())
   {
-    phase.emplace_back(stdex::make_unique<logo::ResolveDuplicateReshapePass>());
+    phase.emplace_back(std::make_unique<logo::ResolveDuplicateReshapePass>());
   }
 
   if (moco::tf::get<moco::tf::Knob::ResolveRedundantReshape>())
   {
-    phase.emplace_back(stdex::make_unique<logo::ResolveRedundantReshapePass>());
+    phase.emplace_back(std::make_unique<logo::ResolveRedundantReshapePass>());
   }
   /* TRANSFORM DECLARATION END */
 
diff --git a/compiler/moco-tf/src/ProgressReporter.h b/compiler/moco-tf/src/ProgressReporter.h
index 190d972c5..440d29221 100644
--- a/compiler/moco-tf/src/ProgressReporter.h
+++ b/compiler/moco-tf/src/ProgressReporter.h
@@ -30,7 +30,7 @@ class ProgressReporter : public logo::PhaseEventListener
 {
 public:
   ProgressReporter(loco::Graph *graph, logo::PhaseStrategy strategy)
-      : _graph{graph}, _strategy{strategy}
+    : _graph{graph}, _strategy{strategy}
   {
     // DO NOTHING
   }
diff --git a/compiler/moco-tf/src/TFFormattedGraph.h b/compiler/moco-tf/src/TFFormattedGraph.h
index f79208536..81978954f 100644
--- a/compiler/moco-tf/src/TFFormattedGraph.h
+++ b/compiler/moco-tf/src/TFFormattedGraph.h
@@ -19,7 +19,7 @@
 
 #include <locop/FormattedGraph.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace moco
 {
@@ -49,7 +49,7 @@ public:
 public:
   std::unique_ptr<locop::NodeSummaryBuilder> create(const locop::SymbolTable *tlb) const final
   {
-    return stdex::make_unique<MocoNodeSummaryBuilder>(tlb);
+    return std::make_unique<MocoNodeSummaryBuilder>(tlb);
   }
 };
 
diff --git a/compiler/moco-tf/src/TFOptimizer.cpp b/compiler/moco-tf/src/TFOptimizer.cpp
index 2256b99b8..720cd9d9a 100644
--- a/compiler/moco-tf/src/TFOptimizer.cpp
+++ b/compiler/moco-tf/src/TFOptimizer.cpp
@@ -22,7 +22,7 @@
 
 #include <logo/Phase.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace moco
 {
@@ -36,39 +36,39 @@ void TFOptimizer::optimize(loco::Graph *g) const
   /* TRANSFORM DECLARATION BEGIN */
   if (moco::tf::get<moco::tf::Knob::ResolveFusedBatchNorm>())
   {
-    phase.emplace_back(stdex::make_unique<moco::ResolveFusedBatchNorm>());
+    phase.emplace_back(std::make_unique<moco::ResolveFusedBatchNorm>());
   }
   if (moco::tf::get<moco::tf::Knob::FuseBinaryIntoPreceding>())
   {
-    phase.emplace_back(stdex::make_unique<moco::FuseBinaryIntoPreceding>());
+    phase.emplace_back(std::make_unique<moco::FuseBinaryIntoPreceding>());
   }
   if (moco::tf::get<moco::tf::Knob::ResolveConstantShape>())
   {
-    phase.emplace_back(stdex::make_unique<moco::ResolveConstantShape>());
+    phase.emplace_back(std::make_unique<moco::ResolveConstantShape>());
   }
   if (moco::tf::get<moco::tf::Knob::ResolveReshapeWildcardDim>())
   {
-    phase.emplace_back(stdex::make_unique<moco::ResolveReshapeWildcardDim>());
+    phase.emplace_back(std::make_unique<moco::ResolveReshapeWildcardDim>());
   }
   if (moco::tf::get<moco::tf::Knob::ResolveSquaredDifference>())
   {
-    phase.emplace_back(stdex::make_unique<moco::ResolveSquaredDifference>());
+    phase.emplace_back(std::make_unique<moco::ResolveSquaredDifference>());
   }
   if (moco::tf::get<moco::tf::Knob::RemoveTFIdentityNode>())
   {
-    phase.emplace_back(stdex::make_unique<moco::RemoveTFIdentityNode>());
+    phase.emplace_back(std::make_unique<moco::RemoveTFIdentityNode>());
   }
   if (moco::tf::get<moco::tf::Knob::RemoveDeadNode>())
   {
-    phase.emplace_back(stdex::make_unique<logo::RemoveDeadNodePass>());
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
   }
   if (moco::tf::get<moco::tf::Knob::SqueezeReduceNode>())
   {
-    phase.emplace_back(stdex::make_unique<moco::SqueezeReduceNode>());
+    phase.emplace_back(std::make_unique<moco::SqueezeReduceNode>());
   }
   // Shape inference is needed for added nodes doing above transformations
-  phase.emplace_back(stdex::make_unique<moco::tf::ShapeInferencePass>());
-  phase.emplace_back(stdex::make_unique<moco::tf::TypeInferencePass>());
+  phase.emplace_back(std::make_unique<moco::tf::ShapeInferencePass>());
+  phase.emplace_back(std::make_unique<moco::tf::TypeInferencePass>());
   /* TRANSFORM DECLARATION END */
 
   ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
diff --git a/compiler/moco-tf/src/TestHelper.test.cpp b/compiler/moco-tf/src/TestHelper.test.cpp
index 1e8c38e36..36ce1114a 100644
--- a/compiler/moco-tf/src/TestHelper.test.cpp
+++ b/compiler/moco-tf/src/TestHelper.test.cpp
@@ -48,7 +48,7 @@ void setup_output_node(loco::Graph *graph, loco::Node *last_node)
 
 #include <moco/IR/Nodes/TFConst.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -62,7 +62,7 @@ namespace test
 TFNodeBuildTester::TFNodeBuildTester()
 {
   _graph = loco::make_graph();
-  _tensor_names = stdex::make_unique<moco::SymbolTable>();
+  _tensor_names = std::make_unique<moco::SymbolTable>();
 }
 
 void TFNodeBuildTester::inputs(const std::vector<std::string> &names)
@@ -91,8 +91,8 @@ void TFNodeBuildTester::run(tensorflow::NodeDef &nodedef, moco::GraphBuilder &gr
 {
   assert(_output != nullptr);
 
-  auto node_defs = stdex::make_unique<moco::NodeDefTable>();
-  auto updates = stdex::make_unique<moco::UpdateQueue>();
+  auto node_defs = std::make_unique<moco::NodeDefTable>();
+  auto updates = std::make_unique<moco::UpdateQueue>();
 
   moco::GraphBuilderContext gb_context(_graph.get(), node_defs.get(), _tensor_names.get(),
                                        updates.get());
diff --git a/compiler/moco-tf/src/Transforms.h b/compiler/moco-tf/src/Transforms.h
index f14b81675..a197a796e 100644
--- a/compiler/moco-tf/src/Transforms.h
+++ b/compiler/moco-tf/src/Transforms.h
@@ -21,6 +21,7 @@
 #include "Transforms/TypeInferencePass.h"
 
 #include <logo/Passes.h>
+#include <logo/PassesEx.h>
 #include <moco/Pass/Passes.h>
 
 #endif // __MOCO_TF_TRANSFORMS_H__
diff --git a/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp b/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp
index 64ba9dfb1..8f46cfbbc 100644
--- a/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp
+++ b/compiler/moco-tf/src/Transforms/ShapeInferencePass.cpp
@@ -46,8 +46,8 @@ bool ShapeInferencePass::run(loco::Graph *graph)
   loco::MultiDialectShapeInferenceRule rules;
 
   rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(TFDialect::get(), &tf_rule)
-      .bind(locoex::COpDialect::get(), &cop_rule);
+    .bind(TFDialect::get(), &tf_rule)
+    .bind(locoex::COpDialect::get(), &cop_rule);
 
   return loco::apply(&rules).to(graph);
 }
diff --git a/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp b/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp
index db6cf7521..2e2d4a9c1 100644
--- a/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp
+++ b/compiler/moco-tf/src/Transforms/TypeInferencePass.cpp
@@ -42,8 +42,8 @@ bool TypeInferencePass::run(loco::Graph *graph)
   loco::MultiDialectTypeInferenceRule rules;
 
   rules.bind(loco::CanonicalDialect::get(), &canonical_rule)
-      .bind(TFDialect::get(), &tf_rule)
-      .bind(locoex::COpDialect::get(), &cop_rule);
+    .bind(TFDialect::get(), &tf_rule)
+    .bind(locoex::COpDialect::get(), &cop_rule);
 
   loco::apply(&rules).to(graph);
 
diff --git a/compiler/moco-value-pbtxt-test/CMakeLists.txt b/compiler/moco-value-pbtxt-test/CMakeLists.txt
index a469c20dc..50513f0a1 100644
--- a/compiler/moco-value-pbtxt-test/CMakeLists.txt
+++ b/compiler/moco-value-pbtxt-test/CMakeLists.txt
@@ -77,11 +77,10 @@ foreach(PREFIX IN ITEMS ${TESTCASES})
 
 endforeach(PREFIX)
 
-nnas_find_package(TensorFlow QUIET)
-if(NOT TensorFlow_FOUND)
-  message(STATUS "moco: Skip adding test as TensorFlow is not found")
+if(NOT TARGET nnkit_tf_backend)
+  message(STATUS "moco: Skip adding test as nnkit_tf_backend is not defined")
   return()
-endif(NOT TensorFlow_FOUND)
+endif(NOT TARGET nnkit_tf_backend)
 
 ##
 ## Copy runall.sh
diff --git a/compiler/moco/import/CMakeLists.txt b/compiler/moco/import/CMakeLists.txt
index 43107776e..460c2c98b 100644
--- a/compiler/moco/import/CMakeLists.txt
+++ b/compiler/moco/import/CMakeLists.txt
@@ -7,7 +7,6 @@ target_include_directories(moco_import PRIVATE src)
 target_include_directories(moco_import PUBLIC include)
 target_link_libraries(moco_import PUBLIC moco_lang)
 target_link_libraries(moco_import PUBLIC mio_tf)
-target_link_libraries(moco_import PUBLIC stdex)
 target_link_libraries(moco_import PRIVATE nncc_common)
 target_link_libraries(moco_import PRIVATE plier_tf)
 target_link_libraries(moco_import PRIVATE oops)
diff --git a/compiler/moco/import/include/moco/Import/GraphBuilderContext.h b/compiler/moco/import/include/moco/Import/GraphBuilderContext.h
index ae4f02c2a..76a9644b5 100644
--- a/compiler/moco/import/include/moco/Import/GraphBuilderContext.h
+++ b/compiler/moco/import/include/moco/Import/GraphBuilderContext.h
@@ -118,7 +118,7 @@ class GraphBuilderContext
 public:
   GraphBuilderContext(loco::Graph *g, NodeDefTable *nodedef, SymbolTable *tensor_names,
                       UpdateQueue *updates)
-      : _g(g), _nodedef(nodedef), _tensor_names(tensor_names), _updates(updates)
+    : _g(g), _nodedef(nodedef), _tensor_names(tensor_names), _updates(updates)
   {
     // DO NOTHING
   }
diff --git a/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h b/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h
index da65cffb8..c99dca1cf 100644
--- a/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h
+++ b/compiler/moco/import/include/moco/Import/GraphBuilderRegistry.h
@@ -82,6 +82,6 @@ private:
   std::map<const std::string, std::unique_ptr<GraphBuilder>> _builder_map;
 };
 
-} // namespace mono
+} // namespace moco
 
 #endif // __MOCO_IMPORT_GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/moco/import/include/moco/Import/Nodes/Softmax.h b/compiler/moco/import/include/moco/Import/Nodes/Softmax.h
index 43fbb8852..290818958 100644
--- a/compiler/moco/import/include/moco/Import/Nodes/Softmax.h
+++ b/compiler/moco/import/include/moco/Import/Nodes/Softmax.h
@@ -23,8 +23,8 @@ namespace moco
 {
 
 /**
-* @brief GraphBuilder for Softmax node
-*/
+ * @brief GraphBuilder for Softmax node
+ */
 class SoftmaxGraphBuilder final : public GraphBuilder
 {
 public:
diff --git a/compiler/moco/import/src/GraphBuilderRegistry.cpp b/compiler/moco/import/src/GraphBuilderRegistry.cpp
index 3a028513f..7e91ca9d0 100644
--- a/compiler/moco/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/moco/import/src/GraphBuilderRegistry.cpp
@@ -17,45 +17,45 @@
 #include "moco/Import/GraphBuilderRegistry.h"
 #include "moco/Import/Nodes.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace moco
 {
 
 GraphBuilderRegistry::GraphBuilderRegistry()
 {
-  add("Add", stdex::make_unique<AddGraphBuilder>());
-  add("AvgPool", stdex::make_unique<AvgPoolGraphBuilder>());
-  add("BiasAdd", stdex::make_unique<BiasAddGraphBuilder>());
-  add("ConcatV2", stdex::make_unique<ConcatV2GraphBuilder>());
-  add("Const", stdex::make_unique<ConstGraphBuilder>());
-  add("Conv2D", stdex::make_unique<Conv2DGraphBuilder>());
-  add("Conv2DBackpropInput", stdex::make_unique<Conv2DBackpropInputGraphBuilder>());
-  add("DepthwiseConv2dNative", stdex::make_unique<DepthwiseConv2dNativeGraphBuilder>());
-  add("FakeQuantWithMinMaxVars", stdex::make_unique<FakeQuantWithMinMaxVarsGraphBuilder>());
-  add("FusedBatchNorm", stdex::make_unique<FusedBatchNormGraphBuilder>());
-  add("Identity", stdex::make_unique<IdentityGraphBuilder>());
-  add("Maximum", stdex::make_unique<MaximumGraphBuilder>());
-  add("MaxPool", stdex::make_unique<MaxPoolGraphBuilder>());
-  add("Mean", stdex::make_unique<MeanGraphBuilder>());
-  add("Mul", stdex::make_unique<MulGraphBuilder>());
-  add("Pack", stdex::make_unique<PackGraphBuilder>());
-  add("Pad", stdex::make_unique<PadGraphBuilder>());
-  add("Placeholder", stdex::make_unique<PlaceholderGraphBuilder>());
-  add("RealDiv", stdex::make_unique<RealDivGraphBuilder>());
-  add("Relu", stdex::make_unique<ReluGraphBuilder>());
-  add("Relu6", stdex::make_unique<Relu6GraphBuilder>());
-  add("Reshape", stdex::make_unique<ReshapeGraphBuilder>());
-  add("Rsqrt", stdex::make_unique<RsqrtGraphBuilder>());
-  add("Shape", stdex::make_unique<ShapeGraphBuilder>());
-  add("Softmax", stdex::make_unique<SoftmaxGraphBuilder>());
-  add("Sqrt", stdex::make_unique<SqrtGraphBuilder>());
-  add("SquaredDifference", stdex::make_unique<SquaredDifferenceGraphBuilder>());
-  add("Squeeze", stdex::make_unique<SqueezeGraphBuilder>());
-  add("StopGradient", stdex::make_unique<StopGradientGraphBuilder>());
-  add("StridedSlice", stdex::make_unique<StridedSliceGraphBuilder>());
-  add("Sub", stdex::make_unique<SubGraphBuilder>());
-  add("Tanh", stdex::make_unique<TanhGraphBuilder>());
+  add("Add", std::make_unique<AddGraphBuilder>());
+  add("AvgPool", std::make_unique<AvgPoolGraphBuilder>());
+  add("BiasAdd", std::make_unique<BiasAddGraphBuilder>());
+  add("ConcatV2", std::make_unique<ConcatV2GraphBuilder>());
+  add("Const", std::make_unique<ConstGraphBuilder>());
+  add("Conv2D", std::make_unique<Conv2DGraphBuilder>());
+  add("Conv2DBackpropInput", std::make_unique<Conv2DBackpropInputGraphBuilder>());
+  add("DepthwiseConv2dNative", std::make_unique<DepthwiseConv2dNativeGraphBuilder>());
+  add("FakeQuantWithMinMaxVars", std::make_unique<FakeQuantWithMinMaxVarsGraphBuilder>());
+  add("FusedBatchNorm", std::make_unique<FusedBatchNormGraphBuilder>());
+  add("Identity", std::make_unique<IdentityGraphBuilder>());
+  add("Maximum", std::make_unique<MaximumGraphBuilder>());
+  add("MaxPool", std::make_unique<MaxPoolGraphBuilder>());
+  add("Mean", std::make_unique<MeanGraphBuilder>());
+  add("Mul", std::make_unique<MulGraphBuilder>());
+  add("Pack", std::make_unique<PackGraphBuilder>());
+  add("Pad", std::make_unique<PadGraphBuilder>());
+  add("Placeholder", std::make_unique<PlaceholderGraphBuilder>());
+  add("RealDiv", std::make_unique<RealDivGraphBuilder>());
+  add("Relu", std::make_unique<ReluGraphBuilder>());
+  add("Relu6", std::make_unique<Relu6GraphBuilder>());
+  add("Reshape", std::make_unique<ReshapeGraphBuilder>());
+  add("Rsqrt", std::make_unique<RsqrtGraphBuilder>());
+  add("Shape", std::make_unique<ShapeGraphBuilder>());
+  add("Softmax", std::make_unique<SoftmaxGraphBuilder>());
+  add("Sqrt", std::make_unique<SqrtGraphBuilder>());
+  add("SquaredDifference", std::make_unique<SquaredDifferenceGraphBuilder>());
+  add("Squeeze", std::make_unique<SqueezeGraphBuilder>());
+  add("StopGradient", std::make_unique<StopGradientGraphBuilder>());
+  add("StridedSlice", std::make_unique<StridedSliceGraphBuilder>());
+  add("Sub", std::make_unique<SubGraphBuilder>());
+  add("Tanh", std::make_unique<TanhGraphBuilder>());
 
   // Virtual node like `TFPush` need not to be added here
 }
diff --git a/compiler/moco/import/src/Importer.cpp b/compiler/moco/import/src/Importer.cpp
index 3813affce..0659fd165 100644
--- a/compiler/moco/import/src/Importer.cpp
+++ b/compiler/moco/import/src/Importer.cpp
@@ -23,9 +23,9 @@
 #include <moco/IR/Nodes/TFPlaceholder.h>
 #include <moco/IR/TFNode.h>
 
-#include <stdex/Memory.h>
 #include <oops/UserExn.h>
 
+#include <memory>
 #include <cassert>
 #include <sstream>
 #include <stdexcept>
@@ -36,9 +36,9 @@ namespace
 void convert_graph(const moco::GraphBuilderSource &source, const moco::ModelSignature &signature,
                    tensorflow::GraphDef &tf_graph_def, loco::Graph *graph)
 {
-  auto nodedef = stdex::make_unique<moco::NodeDefTable>();
-  auto tensor_names = stdex::make_unique<moco::SymbolTable>();
-  auto updates = stdex::make_unique<moco::UpdateQueue>();
+  auto nodedef = std::make_unique<moco::NodeDefTable>();
+  auto tensor_names = std::make_unique<moco::SymbolTable>();
+  auto updates = std::make_unique<moco::UpdateQueue>();
 
   moco::GraphBuilderContext gb_context(graph, nodedef.get(), tensor_names.get(), updates.get());
 
@@ -190,7 +190,7 @@ std::unique_ptr<loco::Graph> Importer::import(const ModelSignature &signature,
 
   convert_graph(*source_ptr, signature, tf_graph_def, graph.get());
 
-  return std::move(graph);
+  return graph;
 }
 
 } // namespace moco
diff --git a/compiler/moco/import/src/Nodes/Add.cpp b/compiler/moco/import/src/Nodes/Add.cpp
index 6981a55e1..af743316b 100644
--- a/compiler/moco/import/src/Nodes/Add.cpp
+++ b/compiler/moco/import/src/Nodes/Add.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFAdd.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -78,7 +79,7 @@ void AddGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext
   add_input_names.push_back(TensorName(node.input(0))); // x
   add_input_names.push_back(TensorName(node.input(1))); // y
 
-  auto tf_add_update = stdex::make_unique<TFAddGraphUpdate>(tf_add, add_input_names);
+  auto tf_add_update = std::make_unique<TFAddGraphUpdate>(tf_add, add_input_names);
   updates->enroll(std::move(tf_add_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/AvgPool.cpp b/compiler/moco/import/src/Nodes/AvgPool.cpp
index 6d7fd36bb..95232b977 100644
--- a/compiler/moco/import/src/Nodes/AvgPool.cpp
+++ b/compiler/moco/import/src/Nodes/AvgPool.cpp
@@ -22,10 +22,10 @@
 
 #include "Convert.h"
 #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 #include <oops/UserExn.h>
 
+#include <memory>
 #include <cassert>
 #include <stdexcept>
 
@@ -40,7 +40,7 @@ class TFAvgPoolGraphUpdate final : public GraphUpdate
 {
 public:
   TFAvgPoolGraphUpdate(TFAvgPool *node, const TensorName &name)
-      : _avgpool_node(node), _value_name(name)
+    : _avgpool_node(node), _value_name(name)
   {
   }
 
@@ -127,7 +127,7 @@ void AvgPoolGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
   tensor_names->enroll(output_name, avgPool_node);
 
   // Record ifm inputs to featureEncode_node
-  auto update = stdex::make_unique<TFAvgPoolGraphUpdate>(avgPool_node, TensorName(node.input(0)));
+  auto update = std::make_unique<TFAvgPoolGraphUpdate>(avgPool_node, TensorName(node.input(0)));
 
   updates->enroll(std::move(update));
 }
diff --git a/compiler/moco/import/src/Nodes/BiasAdd.cpp b/compiler/moco/import/src/Nodes/BiasAdd.cpp
index a3eb91116..d4bc161d5 100644
--- a/compiler/moco/import/src/Nodes/BiasAdd.cpp
+++ b/compiler/moco/import/src/Nodes/BiasAdd.cpp
@@ -22,10 +22,10 @@
 
 #include <loco.h>
 #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 #include <oops/UserExn.h>
 
+#include <memory>
 #include <cassert>
 #include <vector>
 
@@ -37,7 +37,7 @@ class TFBiasAddGraphUpdate final : public GraphUpdate
 {
 public:
   TFBiasAddGraphUpdate(TFBiasAdd *biasadd, std::vector<TensorName> &names)
-      : _biasadd(biasadd), _names(names)
+    : _biasadd(biasadd), _names(names)
   {
   }
 
@@ -115,7 +115,7 @@ void BiasAddGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
   input_names.push_back(TensorName(node.input(0)));
   input_names.push_back(TensorName(node.input(1)));
 
-  auto update = stdex::make_unique<TFBiasAddGraphUpdate>(tf_bias_add, input_names);
+  auto update = std::make_unique<TFBiasAddGraphUpdate>(tf_bias_add, input_names);
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Concat.cpp b/compiler/moco/import/src/Nodes/Concat.cpp
index 8bf8a84b5..dea60a737 100644
--- a/compiler/moco/import/src/Nodes/Concat.cpp
+++ b/compiler/moco/import/src/Nodes/Concat.cpp
@@ -21,9 +21,9 @@
 #include <moco/Names.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 
+#include <memory>
 #include <cassert>
 
 namespace
@@ -35,7 +35,7 @@ class TFConcatV2GraphUpdate final : public GraphUpdate
 {
 public:
   TFConcatV2GraphUpdate(TFConcatV2 *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
   {
   }
 
@@ -102,7 +102,7 @@ void ConcatV2GraphBuilder::build(const tensorflow::NodeDef &node,
   TensorName output_name(node.name(), 0);
   tensor_names->enroll(output_name, concat_node);
 
-  auto update = stdex::make_unique<TFConcatV2GraphUpdate>(concat_node, input_names);
+  auto update = std::make_unique<TFConcatV2GraphUpdate>(concat_node, input_names);
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Const.cpp b/compiler/moco/import/src/Nodes/Const.cpp
index 15ea717db..7744cf889 100644
--- a/compiler/moco/import/src/Nodes/Const.cpp
+++ b/compiler/moco/import/src/Nodes/Const.cpp
@@ -228,7 +228,7 @@ void ConstGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderConte
       read_value_float32(const_node, num_elements, input_tensor);
       break;
 
-    // TODO support other types
+      // TODO support other types
 
     default:
       assert(false);
diff --git a/compiler/moco/import/src/Nodes/Conv2D.cpp b/compiler/moco/import/src/Nodes/Conv2D.cpp
index e6b98dcd1..acb9f76c6 100644
--- a/compiler/moco/import/src/Nodes/Conv2D.cpp
+++ b/compiler/moco/import/src/Nodes/Conv2D.cpp
@@ -24,10 +24,10 @@
 
 #include <loco.h>
 #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 #include <oops/UserExn.h>
 
+#include <memory>
 #include <cassert>
 #include <stdexcept>
 #include <algorithm>
@@ -131,7 +131,7 @@ void Conv2DGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCont
   input_names.push_back(TensorName(node.input(1))); // kernel
 
   // Record ifm inputs to featureEncode_node
-  auto tfconv2d_update = stdex::make_unique<TFConv2DGraphUpdate>(conv2d, input_names);
+  auto tfconv2d_update = std::make_unique<TFConv2DGraphUpdate>(conv2d, input_names);
 
   updates->enroll(std::move(tfconv2d_update));
 }
diff --git a/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp b/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp
index 74c6605ab..10fee9a8e 100644
--- a/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp
+++ b/compiler/moco/import/src/Nodes/Conv2DBackpropInput.cpp
@@ -21,10 +21,11 @@
 #include "Convert.h"
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 #include <oops/UserExn.h>
 
+#include <memory>
+
 namespace
 {
 using namespace moco;
@@ -34,7 +35,7 @@ class Conv2DBackpropInputGraphUpdate final : public GraphUpdate
 {
 public:
   Conv2DBackpropInputGraphUpdate(TFConv2DBackpropInput *node, std::vector<TensorName> names)
-      : _node(node), _input_names(names)
+    : _node(node), _input_names(names)
   {
     // DO NOTHING
   }
@@ -132,7 +133,7 @@ void Conv2DBackpropInputGraphBuilder::build(const tensorflow::NodeDef &node,
 
   // update
   auto conv2d_backprop_update =
-      stdex::make_unique<Conv2DBackpropInputGraphUpdate>(conv2d_backprop, input_names);
+    std::make_unique<Conv2DBackpropInputGraphUpdate>(conv2d_backprop, input_names);
 
   updates->enroll(std::move(conv2d_backprop_update));
 }
diff --git a/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp b/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp
index 3991a4d51..62e57207d 100644
--- a/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp
+++ b/compiler/moco/import/src/Nodes/DepthwiseConv2dNative.cpp
@@ -24,9 +24,9 @@
 
 #include <plier/tf/Convert.h>
 #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
 #include <oops/UserExn.h>
 
+#include <memory>
 #include <cassert>
 
 using namespace plier::tf;
@@ -39,7 +39,7 @@ class TFDepthwiseConv2dNativeGraphUpdate final : public GraphUpdate
 {
 public:
   TFDepthwiseConv2dNativeGraphUpdate(TFDepthwiseConv2dNative *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
   {
   }
 
@@ -139,8 +139,8 @@ void DepthwiseConv2dNativeGraphBuilder::build(const tensorflow::NodeDef &node,
   input_names.push_back(TensorName(node.input(1))); // kernel
 
   // Record ifm inputs to featureEncode_node
-  auto tfdepthwiseconv2dnative_update = stdex::make_unique<TFDepthwiseConv2dNativeGraphUpdate>(
-      depthwiseconv2d_native_node, input_names);
+  auto tfdepthwiseconv2dnative_update =
+    std::make_unique<TFDepthwiseConv2dNativeGraphUpdate>(depthwiseconv2d_native_node, input_names);
 
   updates->enroll(std::move(tfdepthwiseconv2dnative_update));
 }
diff --git a/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp b/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp
index d2fa3d1eb..0bd354dc5 100644
--- a/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp
+++ b/compiler/moco/import/src/Nodes/FakeQuantWithMinMaxVars.cpp
@@ -24,8 +24,8 @@
 
 #include <plier/tf/Convert.h>
 #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <cassert>
 
 using namespace plier::tf;
@@ -39,7 +39,7 @@ class TFFakeQuantWithMinMaxVarsGraphUpdate final : public GraphUpdate
 public:
   TFFakeQuantWithMinMaxVarsGraphUpdate(TFFakeQuantWithMinMaxVars *node,
                                        std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
   {
   }
 
@@ -115,7 +115,7 @@ void FakeQuantWithMinMaxVarsGraphBuilder::build(const tensorflow::NodeDef &node,
 
   // Record ifm inputs to featureEncode_node
   auto tffakequant_update =
-      stdex::make_unique<TFFakeQuantWithMinMaxVarsGraphUpdate>(fakequant_node, input_names);
+    std::make_unique<TFFakeQuantWithMinMaxVarsGraphUpdate>(fakequant_node, input_names);
 
   updates->enroll(std::move(tffakequant_update));
 }
diff --git a/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp b/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp
index 59f98017c..8fc439ae3 100644
--- a/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp
+++ b/compiler/moco/import/src/Nodes/FusedBatchNorm.cpp
@@ -19,9 +19,10 @@
 #include <moco/IR/Nodes/TFFusedBatchNorm.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 
+#include <memory>
+
 namespace
 {
 
@@ -34,7 +35,7 @@ class FusedBatchNormGraphUpdate final : public GraphUpdate
 {
 public:
   FusedBatchNormGraphUpdate(TFFusedBatchNorm *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
   {
   }
 
@@ -95,7 +96,7 @@ void FusedBatchNormGraphBuilder::build(const tensorflow::NodeDef &node,
   fbn_input_names.push_back(TensorName(node.input(3))); // mean
   fbn_input_names.push_back(TensorName(node.input(4))); // variance
 
-  auto tf_fbn_update = stdex::make_unique<FusedBatchNormGraphUpdate>(tf_fbn, fbn_input_names);
+  auto tf_fbn_update = std::make_unique<FusedBatchNormGraphUpdate>(tf_fbn, fbn_input_names);
   updates->enroll(std::move(tf_fbn_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Identity.cpp b/compiler/moco/import/src/Nodes/Identity.cpp
index 8ca0e2d01..c3b912b48 100644
--- a/compiler/moco/import/src/Nodes/Identity.cpp
+++ b/compiler/moco/import/src/Nodes/Identity.cpp
@@ -20,8 +20,8 @@
 
 #include <moco/Names.h>
 #include <loco.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <vector>
 
 namespace
@@ -33,7 +33,7 @@ class TFIdentityGraphUpdate final : public GraphUpdate
 {
 public:
   TFIdentityGraphUpdate(TFIdentity *node, const std::vector<TensorName> &names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
   {
   }
 
@@ -88,7 +88,7 @@ void IdentityGraphBuilder::build(const tensorflow::NodeDef &node,
   {
     names.emplace_back(TensorName(node.input(i)));
   }
-  auto update = stdex::make_unique<TFIdentityGraphUpdate>(identity_node, names);
+  auto update = std::make_unique<TFIdentityGraphUpdate>(identity_node, names);
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/MaxPool.cpp b/compiler/moco/import/src/Nodes/MaxPool.cpp
index 63275a3b8..cf4b21224 100644
--- a/compiler/moco/import/src/Nodes/MaxPool.cpp
+++ b/compiler/moco/import/src/Nodes/MaxPool.cpp
@@ -24,10 +24,10 @@
 
 #include <loco.h>
 #include <loco/IR/PermutingCodec.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 #include <oops/UserExn.h>
 
+#include <memory>
 #include <cassert>
 #include <stdexcept>
 
@@ -40,7 +40,7 @@ class TFMaxPoolGraphUpdate final : public GraphUpdate
 {
 public:
   TFMaxPoolGraphUpdate(TFMaxPool *node, const TensorName &name)
-      : _maxpool_node(node), _input_name(name)
+    : _maxpool_node(node), _input_name(name)
   {
   }
 
@@ -132,7 +132,7 @@ void MaxPoolGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
   tensor_names->enroll(output_name, maxPool_node);
 
   // Record ifm inputs to featureEncode_node
-  auto update = stdex::make_unique<TFMaxPoolGraphUpdate>(maxPool_node, TensorName(node.input(0)));
+  auto update = std::make_unique<TFMaxPoolGraphUpdate>(maxPool_node, TensorName(node.input(0)));
 
   updates->enroll(std::move(update));
 }
diff --git a/compiler/moco/import/src/Nodes/Maximum.cpp b/compiler/moco/import/src/Nodes/Maximum.cpp
index 43bbbabe6..d2d039f27 100644
--- a/compiler/moco/import/src/Nodes/Maximum.cpp
+++ b/compiler/moco/import/src/Nodes/Maximum.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFMaximum.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -80,7 +81,7 @@ void MaximumGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
   add_input_names.push_back(TensorName(node.input(0))); // x
   add_input_names.push_back(TensorName(node.input(1))); // y
 
-  auto tf_maximum_update = stdex::make_unique<TFMaximumGraphUpdate>(tf_maximum, add_input_names);
+  auto tf_maximum_update = std::make_unique<TFMaximumGraphUpdate>(tf_maximum, add_input_names);
   updates->enroll(std::move(tf_maximum_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Mean.cpp b/compiler/moco/import/src/Nodes/Mean.cpp
index 30fb0f1f7..3f559bc41 100644
--- a/compiler/moco/import/src/Nodes/Mean.cpp
+++ b/compiler/moco/import/src/Nodes/Mean.cpp
@@ -19,9 +19,10 @@
 #include <moco/IR/Nodes/TFMean.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 
+#include <memory>
+
 namespace
 {
 using namespace moco;
@@ -34,7 +35,7 @@ class MeanGraphUpdate final : public GraphUpdate
 public:
   MeanGraphUpdate(TFMean *node, const TensorName &&input_name,
                   const TensorName &&reduction_indices_name)
-      : _node(node), _input_name(input_name), _reduction_indices_name(reduction_indices_name)
+    : _node(node), _input_name(input_name), _reduction_indices_name(reduction_indices_name)
   {
     // DO NOTHING
   }
@@ -91,8 +92,8 @@ void MeanGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
   TensorName output_name(node.name(), 0);
   tensor_names->enroll(output_name, tf_mean);
 
-  auto update = stdex::make_unique<MeanGraphUpdate>(tf_mean, TensorName(node.input(0)),
-                                                    TensorName(node.input(1)));
+  auto update = std::make_unique<MeanGraphUpdate>(tf_mean, TensorName(node.input(0)),
+                                                  TensorName(node.input(1)));
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Mul.cpp b/compiler/moco/import/src/Nodes/Mul.cpp
index ab926b59e..91c5a60e5 100644
--- a/compiler/moco/import/src/Nodes/Mul.cpp
+++ b/compiler/moco/import/src/Nodes/Mul.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFMul.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -78,7 +79,7 @@ void MulGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext
   add_input_names.push_back(TensorName(node.input(0))); // x
   add_input_names.push_back(TensorName(node.input(1))); // y
 
-  auto tf_mul_update = stdex::make_unique<TFMulGraphUpdate>(tf_mul, add_input_names);
+  auto tf_mul_update = std::make_unique<TFMulGraphUpdate>(tf_mul, add_input_names);
   updates->enroll(std::move(tf_mul_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Pack.cpp b/compiler/moco/import/src/Nodes/Pack.cpp
index 45815a30e..153ee44ef 100644
--- a/compiler/moco/import/src/Nodes/Pack.cpp
+++ b/compiler/moco/import/src/Nodes/Pack.cpp
@@ -23,9 +23,9 @@
 
 #include <loco.h>
 #include <loco/IR/NodeShape.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 
+#include <memory>
 #include <cassert>
 
 namespace
@@ -95,7 +95,7 @@ void PackGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
   TensorName output_name(node.name(), 0);
   tensor_names->enroll(output_name, pack_node);
 
-  auto update = stdex::make_unique<TFPackGraphUpdate>(pack_node, input_names);
+  auto update = std::make_unique<TFPackGraphUpdate>(pack_node, input_names);
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Pad.cpp b/compiler/moco/import/src/Nodes/Pad.cpp
index 262a68fa0..c1f466b44 100644
--- a/compiler/moco/import/src/Nodes/Pad.cpp
+++ b/compiler/moco/import/src/Nodes/Pad.cpp
@@ -19,9 +19,10 @@
 #include <moco/IR/Nodes/TFPad.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 
+#include <memory>
+
 namespace
 {
 
@@ -84,7 +85,7 @@ void PadGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext
   add_input_names.push_back(TensorName(node.input(1))); // paddings
 
   // Queue node input update
-  auto tf_pad_update = stdex::make_unique<TFPadGraphUpdate>(tf_pad, add_input_names);
+  auto tf_pad_update = std::make_unique<TFPadGraphUpdate>(tf_pad, add_input_names);
   updates->enroll(std::move(tf_pad_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/RealDiv.cpp b/compiler/moco/import/src/Nodes/RealDiv.cpp
index de3d57673..c747a2fb3 100644
--- a/compiler/moco/import/src/Nodes/RealDiv.cpp
+++ b/compiler/moco/import/src/Nodes/RealDiv.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFRealDiv.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -79,7 +80,7 @@ void RealDivGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
   div_input_names.push_back(TensorName(node.input(0))); // x
   div_input_names.push_back(TensorName(node.input(1))); // y
 
-  auto tf_div_update = stdex::make_unique<TFRealDivGraphUpdate>(tf_div, div_input_names);
+  auto tf_div_update = std::make_unique<TFRealDivGraphUpdate>(tf_div, div_input_names);
   updates->enroll(std::move(tf_div_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Relu.cpp b/compiler/moco/import/src/Nodes/Relu.cpp
index eedc8155d..c99e484e2 100644
--- a/compiler/moco/import/src/Nodes/Relu.cpp
+++ b/compiler/moco/import/src/Nodes/Relu.cpp
@@ -20,8 +20,8 @@
 
 #include <moco/Names.h>
 #include <loco.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <cassert>
 #include <stdexcept>
 
@@ -79,7 +79,7 @@ void ReluGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
   tensor_names->enroll(output_name, relu_node);
 
   // Queue node input update
-  auto update = stdex::make_unique<TFReluGraphUpdate>(relu_node, TensorName(node.input(0)));
+  auto update = std::make_unique<TFReluGraphUpdate>(relu_node, TensorName(node.input(0)));
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Relu6.cpp b/compiler/moco/import/src/Nodes/Relu6.cpp
index 4700ba408..b7bbac5ce 100644
--- a/compiler/moco/import/src/Nodes/Relu6.cpp
+++ b/compiler/moco/import/src/Nodes/Relu6.cpp
@@ -18,7 +18,7 @@
 
 #include <moco/IR/Nodes/TFRelu6.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
 namespace
 {
@@ -73,7 +73,7 @@ void Relu6GraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderConte
   tensor_names->enroll(output_name, relu_node);
 
   // Queue node input update
-  auto update = stdex::make_unique<TFRelu6GraphUpdate>(relu_node, TensorName(node.input(0)));
+  auto update = std::make_unique<TFRelu6GraphUpdate>(relu_node, TensorName(node.input(0)));
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Reshape.cpp b/compiler/moco/import/src/Nodes/Reshape.cpp
index 26e22513f..bdcafbf70 100644
--- a/compiler/moco/import/src/Nodes/Reshape.cpp
+++ b/compiler/moco/import/src/Nodes/Reshape.cpp
@@ -21,8 +21,8 @@
 #include <moco/Names.h>
 #include <plier/tf/Convert.h>
 #include <loco.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <cassert>
 #include <stdexcept>
 
@@ -94,7 +94,7 @@ void ReshapeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
   input_names.push_back(TensorName(node.input(1))); // shape
 
   // Queue node input update
-  auto update = stdex::make_unique<ReshapeGraphUpdate>(reshape, input_names);
+  auto update = std::make_unique<ReshapeGraphUpdate>(reshape, input_names);
 
   updates->enroll(std::move(update));
 }
diff --git a/compiler/moco/import/src/Nodes/Rsqrt.cpp b/compiler/moco/import/src/Nodes/Rsqrt.cpp
index 979ac90c9..f96d99b68 100644
--- a/compiler/moco/import/src/Nodes/Rsqrt.cpp
+++ b/compiler/moco/import/src/Nodes/Rsqrt.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFRsqrt.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -74,8 +75,7 @@ void RsqrtGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderConte
   tensor_names->enroll(output_name, tf_rsqrt);
 
   // Queue node input update
-  auto tf_rsqrt_update =
-      stdex::make_unique<TFRsqrtGraphUpdate>(tf_rsqrt, TensorName(node.input(0)));
+  auto tf_rsqrt_update = std::make_unique<TFRsqrtGraphUpdate>(tf_rsqrt, TensorName(node.input(0)));
   updates->enroll(std::move(tf_rsqrt_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Shape.cpp b/compiler/moco/import/src/Nodes/Shape.cpp
index 1e112ebb0..b7eb339ef 100644
--- a/compiler/moco/import/src/Nodes/Shape.cpp
+++ b/compiler/moco/import/src/Nodes/Shape.cpp
@@ -19,9 +19,10 @@
 #include <moco/IR/Nodes/TFShape.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 
+#include <memory>
+
 namespace
 {
 using namespace moco;
@@ -33,7 +34,7 @@ class ShapeGraphUpdate final : public GraphUpdate
 {
 public:
   ShapeGraphUpdate(TFShape *node, const TensorName &&input_name)
-      : _node(node), _input_name(input_name)
+    : _node(node), _input_name(input_name)
   {
     // DO NOTHING
   }
@@ -93,7 +94,7 @@ void ShapeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderConte
   TensorName output_name(node.name(), 0);
   tensor_names->enroll(output_name, tf_shape);
 
-  auto update = stdex::make_unique<ShapeGraphUpdate>(tf_shape, TensorName(node.input(0)));
+  auto update = std::make_unique<ShapeGraphUpdate>(tf_shape, TensorName(node.input(0)));
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Softmax.cpp b/compiler/moco/import/src/Nodes/Softmax.cpp
index 6f2c609ff..4fa962750 100644
--- a/compiler/moco/import/src/Nodes/Softmax.cpp
+++ b/compiler/moco/import/src/Nodes/Softmax.cpp
@@ -19,21 +19,22 @@
 #include <moco/IR/Nodes/TFSoftmax.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 
+#include <memory>
+
 namespace
 {
 using namespace moco;
 
 /**
-* @brief GraphUpdate for Softmax node
-*/
+ * @brief GraphUpdate for Softmax node
+ */
 class SoftmaxGraphUpdate final : public GraphUpdate
 {
 public:
   SoftmaxGraphUpdate(TFSoftmax *node, const TensorName &&input_name)
-      : _node(node), _input_name(input_name)
+    : _node(node), _input_name(input_name)
   {
     // DO NOTHING
   }
@@ -79,7 +80,7 @@ void SoftmaxGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
   TensorName output_name(node.name(), 0);
   tensor_names->enroll(output_name, tf_softmax);
 
-  auto update = stdex::make_unique<SoftmaxGraphUpdate>(tf_softmax, TensorName(node.input(0)));
+  auto update = std::make_unique<SoftmaxGraphUpdate>(tf_softmax, TensorName(node.input(0)));
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Sqrt.cpp b/compiler/moco/import/src/Nodes/Sqrt.cpp
index f891e48f6..0dbe15ede 100644
--- a/compiler/moco/import/src/Nodes/Sqrt.cpp
+++ b/compiler/moco/import/src/Nodes/Sqrt.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFSqrt.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -74,7 +75,7 @@ void SqrtGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
   tensor_names->enroll(output_name, tf_sqrt);
 
   // Queue node input update
-  auto tf_sqrt_update = stdex::make_unique<TFSqrtGraphUpdate>(tf_sqrt, TensorName(node.input(0)));
+  auto tf_sqrt_update = std::make_unique<TFSqrtGraphUpdate>(tf_sqrt, TensorName(node.input(0)));
   updates->enroll(std::move(tf_sqrt_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/SquaredDifference.cpp b/compiler/moco/import/src/Nodes/SquaredDifference.cpp
index 17a1fe93d..441f02a19 100644
--- a/compiler/moco/import/src/Nodes/SquaredDifference.cpp
+++ b/compiler/moco/import/src/Nodes/SquaredDifference.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFSquaredDifference.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -33,7 +34,7 @@ class TFSquaredDifferenceGraphUpdate final : public GraphUpdate
 {
 public:
   TFSquaredDifferenceGraphUpdate(TFSquaredDifference *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
   {
   }
 
@@ -85,7 +86,7 @@ void SquaredDifferenceGraphBuilder::build(const tensorflow::NodeDef &node,
 
   // Queue node input update
   auto tf_sqrt_update =
-      stdex::make_unique<TFSquaredDifferenceGraphUpdate>(tf_sqdiff, add_input_names);
+    std::make_unique<TFSquaredDifferenceGraphUpdate>(tf_sqdiff, add_input_names);
   updates->enroll(std::move(tf_sqrt_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Squeeze.cpp b/compiler/moco/import/src/Nodes/Squeeze.cpp
index 1b4ebae6f..b013b840f 100644
--- a/compiler/moco/import/src/Nodes/Squeeze.cpp
+++ b/compiler/moco/import/src/Nodes/Squeeze.cpp
@@ -21,10 +21,11 @@
 #include <moco/Names.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 #include <oops/UserExn.h>
 
+#include <memory>
+
 namespace
 {
 using namespace moco;
@@ -36,7 +37,7 @@ class SqueezeGraphUpdate final : public GraphUpdate
 {
 public:
   SqueezeGraphUpdate(TFSqueeze *node, const TensorName &&input_name)
-      : _node(node), _input_name(input_name)
+    : _node(node), _input_name(input_name)
   {
     // DO NOTHING
   }
@@ -105,7 +106,7 @@ void SqueezeGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderCon
   TensorName output_name(node.name(), 0);
   tensor_names->enroll(output_name, tf_squeeze);
 
-  auto update = stdex::make_unique<SqueezeGraphUpdate>(tf_squeeze, TensorName(node.input(0)));
+  auto update = std::make_unique<SqueezeGraphUpdate>(tf_squeeze, TensorName(node.input(0)));
   updates->enroll(std::move(update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/StopGradient.cpp b/compiler/moco/import/src/Nodes/StopGradient.cpp
index 9caec6943..82f49dc4a 100644
--- a/compiler/moco/import/src/Nodes/StopGradient.cpp
+++ b/compiler/moco/import/src/Nodes/StopGradient.cpp
@@ -20,7 +20,8 @@
 
 #include <loco.h>
 #include <plier/tf/Convert.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -80,7 +81,7 @@ void StopGradientGraphBuilder::build(const tensorflow::NodeDef &node,
 
   // Queue node input update
   auto tf_stopgradient_update =
-      stdex::make_unique<TFStopGradientGraphUpdate>(tf_stopgradient, TensorName(node.input(0)));
+    std::make_unique<TFStopGradientGraphUpdate>(tf_stopgradient, TensorName(node.input(0)));
   updates->enroll(std::move(tf_stopgradient_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/StridedSlice.cpp b/compiler/moco/import/src/Nodes/StridedSlice.cpp
index 06d388be0..b0744a7e2 100644
--- a/compiler/moco/import/src/Nodes/StridedSlice.cpp
+++ b/compiler/moco/import/src/Nodes/StridedSlice.cpp
@@ -24,10 +24,11 @@
 #include "Convert.h"
 
 #include <loco.h>
-#include <stdex/Memory.h>
 #include <plier/tf/Convert.h>
 #include <oops/UserExn.h>
 
+#include <memory>
+
 namespace
 {
 using namespace moco;
@@ -36,7 +37,7 @@ class TFStridedSliceGraphUpdate final : public GraphUpdate
 {
 public:
   TFStridedSliceGraphUpdate(TFStridedSlice *node, std::vector<TensorName> names)
-      : _node(node), _names(names)
+    : _node(node), _names(names)
   {
   }
 
@@ -179,7 +180,7 @@ void StridedSliceGraphBuilder::build(const tensorflow::NodeDef &node,
   input_names.push_back(TensorName(node.input(2))); // end
   input_names.push_back(TensorName(node.input(3))); // strides
 
-  auto tfconv2d_update = stdex::make_unique<TFStridedSliceGraphUpdate>(stridedslice, input_names);
+  auto tfconv2d_update = std::make_unique<TFStridedSliceGraphUpdate>(stridedslice, input_names);
 
   updates->enroll(std::move(tfconv2d_update));
 }
diff --git a/compiler/moco/import/src/Nodes/Sub.cpp b/compiler/moco/import/src/Nodes/Sub.cpp
index bdad81d67..4a657663e 100644
--- a/compiler/moco/import/src/Nodes/Sub.cpp
+++ b/compiler/moco/import/src/Nodes/Sub.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFSub.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -78,7 +79,7 @@ void SubGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContext
   sub_input_names.push_back(TensorName(node.input(0))); // x
   sub_input_names.push_back(TensorName(node.input(1))); // y
 
-  auto tf_sub_update = stdex::make_unique<TFSubGraphUpdate>(tf_sub, sub_input_names);
+  auto tf_sub_update = std::make_unique<TFSubGraphUpdate>(tf_sub, sub_input_names);
   updates->enroll(std::move(tf_sub_update));
 }
 
diff --git a/compiler/moco/import/src/Nodes/Tanh.cpp b/compiler/moco/import/src/Nodes/Tanh.cpp
index c89fa862a..3a0b0a334 100644
--- a/compiler/moco/import/src/Nodes/Tanh.cpp
+++ b/compiler/moco/import/src/Nodes/Tanh.cpp
@@ -19,7 +19,8 @@
 #include <moco/IR/Nodes/TFTanh.h>
 
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 namespace
 {
@@ -74,7 +75,7 @@ void TanhGraphBuilder::build(const tensorflow::NodeDef &node, GraphBuilderContex
   tensor_names->enroll(output_name, tf_tanh);
 
   // Queue node input update
-  auto tf_tanh_update = stdex::make_unique<TFTanhGraphUpdate>(tf_tanh, TensorName(node.input(0)));
+  auto tf_tanh_update = std::make_unique<TFTanhGraphUpdate>(tf_tanh, TensorName(node.input(0)));
   updates->enroll(std::move(tf_tanh_update));
 }
 
diff --git a/compiler/moco/import/src/TestHelper.test.cpp b/compiler/moco/import/src/TestHelper.test.cpp
index 06c3dd372..d0390ad32 100644
--- a/compiler/moco/import/src/TestHelper.test.cpp
+++ b/compiler/moco/import/src/TestHelper.test.cpp
@@ -17,7 +17,8 @@
 #include "TestHelper.h"
 
 #include <moco/IR/Nodes/TFConst.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -29,7 +30,7 @@ namespace test
 TFNodeBuildTester::TFNodeBuildTester()
 {
   _graph = loco::make_graph();
-  _tensor_names = stdex::make_unique<moco::SymbolTable>();
+  _tensor_names = std::make_unique<moco::SymbolTable>();
 }
 
 void TFNodeBuildTester::inputs(const std::vector<std::string> &names)
@@ -71,8 +72,8 @@ void TFNodeBuildTester::run(tensorflow::NodeDef &nodedef, moco::GraphBuilder &gr
 {
   assert(_output != nullptr);
 
-  auto node_defs = stdex::make_unique<moco::NodeDefTable>();
-  auto updates = stdex::make_unique<moco::UpdateQueue>();
+  auto node_defs = std::make_unique<moco::NodeDefTable>();
+  auto updates = std::make_unique<moco::UpdateQueue>();
 
   moco::GraphBuilderContext gb_context(_graph.get(), node_defs.get(), _tensor_names.get(),
                                        updates.get());
diff --git a/compiler/moco/lang/CMakeLists.txt b/compiler/moco/lang/CMakeLists.txt
index a64fdf92a..2543f2563 100644
--- a/compiler/moco/lang/CMakeLists.txt
+++ b/compiler/moco/lang/CMakeLists.txt
@@ -7,7 +7,6 @@ target_include_directories(moco_lang PRIVATE src)
 target_include_directories(moco_lang PUBLIC include)
 target_link_libraries(moco_lang PUBLIC loco)
 target_link_libraries(moco_lang PRIVATE nncc_common)
-target_link_libraries(moco_lang PRIVATE stdex)
 install(TARGETS moco_lang DESTINATION lib)  # moco_tf_frontend requires moco_lang
 
 if(NOT ENABLE_TEST)
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h b/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h
index 43e620d24..69d867436 100644
--- a/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFConv2DBackpropInput.h
@@ -68,7 +68,7 @@ node {
  *        Note that this convention is against loco canonical's convention.
  */
 class TFConv2DBackpropInput final
-    : public FixedArityNode<3, TFNodeImpl<TFOpcode::Conv2DBackpropInput>>
+  : public FixedArityNode<3, TFNodeImpl<TFOpcode::Conv2DBackpropInput>>
 {
 public:
   loco::Node *input_sizes(void) const { return at(0)->node(); }
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h b/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h
index aefc0b5d9..2d7fa0c10 100644
--- a/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFDepthwiseConv2dNative.h
@@ -25,7 +25,7 @@ namespace moco
 {
 
 class TFDepthwiseConv2dNative final
-    : public FixedArityNode<2, TFNodeImpl<TFOpcode::DepthwiseConv2dNative>>
+  : public FixedArityNode<2, TFNodeImpl<TFOpcode::DepthwiseConv2dNative>>
 {
 public:
   loco::Node *input(void) const { return at(0)->node(); }
diff --git a/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h b/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h
index ec54da596..55baac7de 100644
--- a/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h
+++ b/compiler/moco/lang/include/moco/IR/Nodes/TFFakeQuantWithMinMaxVars.h
@@ -25,7 +25,7 @@ namespace moco
 {
 
 class TFFakeQuantWithMinMaxVars final
-    : public FixedArityNode<3, TFNodeImpl<TFOpcode::FakeQuantWithMinMaxVars>>
+  : public FixedArityNode<3, TFNodeImpl<TFOpcode::FakeQuantWithMinMaxVars>>
 {
 public:
   loco::Node *inputs(void) const { return at(0)->node(); }
diff --git a/compiler/moco/lang/src/IR/TFDialect.cpp b/compiler/moco/lang/src/IR/TFDialect.cpp
index 35bbcc2c9..959ef98f5 100644
--- a/compiler/moco/lang/src/IR/TFDialect.cpp
+++ b/compiler/moco/lang/src/IR/TFDialect.cpp
@@ -21,8 +21,7 @@
 #include <loco/IR/GraphInputIndex.h>
 #include <loco/IR/GraphOutputIndex.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 #include <stdexcept>
 
@@ -78,8 +77,8 @@ namespace moco
 
 TFDialect::TFDialect()
 {
-  service<loco::GraphInputIndexQueryService>(stdex::make_unique<GiiQueryServiceImpl>());
-  service<loco::GraphOutputIndexQueryService>(stdex::make_unique<GoiQueryServiceImpl>());
+  service<loco::GraphInputIndexQueryService>(std::make_unique<GiiQueryServiceImpl>());
+  service<loco::GraphOutputIndexQueryService>(std::make_unique<GoiQueryServiceImpl>());
 }
 
 loco::Dialect *TFDialect::get(void)
diff --git a/compiler/moco/lang/src/IR/TFNode.cpp b/compiler/moco/lang/src/IR/TFNode.cpp
index ab9356196..b59a505b5 100644
--- a/compiler/moco/lang/src/IR/TFNode.cpp
+++ b/compiler/moco/lang/src/IR/TFNode.cpp
@@ -17,6 +17,8 @@
 #include "moco/IR/TFNode.h"
 #include "moco/IR/TFDialect.h"
 
+#include <limits>
+#include <memory>
 #include <cassert>
 
 namespace moco
@@ -26,9 +28,6 @@ const loco::Dialect *TFNode::dialect(void) const { return TFDialect::get(); }
 
 } // namespace moco
 
-// TODO move this to appropriate place
-#include <stdex/Memory.h>
-
 namespace moco
 {
 
@@ -60,7 +59,7 @@ loco::GraphInputIndex index(const TFPlaceholder *node)
 
 void index(TFPlaceholder *node, const loco::GraphInputIndex index)
 {
-  node->annot(stdex::make_unique<GraphInputIndexAnnotation>(index));
+  node->annot(std::make_unique<GraphInputIndexAnnotation>(index));
 }
 
 loco::TensorShape tensor_shape(const TFPlaceholder *node)
diff --git a/compiler/moco/pass/CMakeLists.txt b/compiler/moco/pass/CMakeLists.txt
index 1eba86283..40c3d5a49 100644
--- a/compiler/moco/pass/CMakeLists.txt
+++ b/compiler/moco/pass/CMakeLists.txt
@@ -9,7 +9,6 @@ target_link_libraries(moco_pass PUBLIC loco)
 target_link_libraries(moco_pass PUBLIC logo_core)
 target_link_libraries(moco_pass PUBLIC moco_lang)
 target_link_libraries(moco_pass PRIVATE moco_support)
-target_link_libraries(moco_pass PRIVATE stdex)
 target_link_libraries(moco_pass PRIVATE oops)
 install(TARGETS moco_pass DESTINATION lib)
 
@@ -23,4 +22,3 @@ GTest_AddTest(moco_pass_test ${TESTS})
 target_include_directories(moco_pass_test PRIVATE src)
 target_link_libraries(moco_pass_test moco_pass)
 target_link_libraries(moco_pass_test moco_support)
-target_link_libraries(moco_pass_test stdex)
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h
index 5528b8612..a5e25a0ce 100644
--- a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldMul.h
@@ -26,7 +26,7 @@ namespace moco
 
 /**
  * @brief  Constant folder for Const + Mul -> Const
-*/
+ */
 class ConstantFoldMul : public logo::Pass
 {
 public:
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h
index fc6bc0ace..f99c633ac 100644
--- a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldPack.h
@@ -28,7 +28,7 @@ namespace moco
 
 /**
  * @brief  Constant folder for Const + Pack -> Const
-*/
+ */
 class ConstantFoldPack : public logo::Pass
 {
 public:
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h
index 1e3492c2c..f57bdc05e 100644
--- a/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ConstantFoldStridedSlice.h
@@ -26,7 +26,7 @@ namespace moco
 
 /**
  * @brief  Constant folder for Const + StridedSlice -> Const
-*/
+ */
 class ConstantFoldStridedSlice : public logo::Pass
 {
 public:
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h b/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h
index 24e3567c0..4d5318c35 100644
--- a/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/FuseBinaryIntoPreceding.h
@@ -26,7 +26,7 @@ namespace moco
 
 /**
  * @brief  Fuse TFAdd, TFMul to preceding TFConv2D or TFDepthWiseConv2D
-*/
+ */
 class FuseBinaryIntoPreceding : public logo::Pass
 {
 public:
diff --git a/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h b/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h
index ce5ea0bb0..1910a9ac7 100644
--- a/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h
+++ b/compiler/moco/pass/include/moco/Pass/Passes/ResolveFusedBatchNorm.h
@@ -26,7 +26,7 @@ namespace moco
 
 /**
  * @brief  Trasform TFFusedBatchNorm into TFAdd + TFRsqrt + TFMul + TFBatchNorm
-*/
+ */
 class ResolveFusedBatchNorm : public logo::Pass
 {
 public:
diff --git a/compiler/moco/pass/src/ConstantFoldAdd.test.cpp b/compiler/moco/pass/src/ConstantFoldAdd.test.cpp
index bc9489fbd..fdfbfb8d3 100644
--- a/compiler/moco/pass/src/ConstantFoldAdd.test.cpp
+++ b/compiler/moco/pass/src/ConstantFoldAdd.test.cpp
@@ -19,7 +19,8 @@
 
 #include <moco/IR/TFNodes.h>
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -60,7 +61,7 @@ TEST(ConstantFoldAdd, basic_vector)
   }
   setup_output_node(&graph, add_node);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldAdd>();
+  auto pass = std::make_unique<moco::ConstantFoldAdd>();
   bool cont = true;
   while (cont)
   {
@@ -92,7 +93,7 @@ TEST(ConstantFoldAdd, basic_refinedet_1)
   }
   setup_output_node(&graph, add_node);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldAdd>();
+  auto pass = std::make_unique<moco::ConstantFoldAdd>();
   bool cont = true;
   while (cont)
   {
diff --git a/compiler/moco/pass/src/ConstantFoldHelper.cpp b/compiler/moco/pass/src/ConstantFoldHelper.cpp
index 79b04863c..9dd5e00cd 100644
--- a/compiler/moco/pass/src/ConstantFoldHelper.cpp
+++ b/compiler/moco/pass/src/ConstantFoldHelper.cpp
@@ -164,7 +164,7 @@ void apply_binary_s32(const moco::TFConst *lhs, const moco::TFConst *rhs, moco::
   for (uint32_t e = 0; e < nume; e++)
   {
     output->at<loco::DataType::S32>(e) =
-        f.apply(lhs->at<loco::DataType::S32>(e), rhs->at<loco::DataType::S32>(e));
+      f.apply(lhs->at<loco::DataType::S32>(e), rhs->at<loco::DataType::S32>(e));
   }
 }
 
@@ -180,7 +180,7 @@ void apply_binary_f32(const moco::TFConst *lhs, const moco::TFConst *rhs, moco::
   for (uint32_t e = 0; e < nume; e++)
   {
     output->at<loco::DataType::FLOAT32>(e) =
-        f.apply(lhs->at<loco::DataType::FLOAT32>(e), rhs->at<loco::DataType::FLOAT32>(e));
+      f.apply(lhs->at<loco::DataType::FLOAT32>(e), rhs->at<loco::DataType::FLOAT32>(e));
   }
 }
 
diff --git a/compiler/moco/pass/src/ConstantFoldMul.test.cpp b/compiler/moco/pass/src/ConstantFoldMul.test.cpp
index 4e9b78fd4..c7e7d9e65 100644
--- a/compiler/moco/pass/src/ConstantFoldMul.test.cpp
+++ b/compiler/moco/pass/src/ConstantFoldMul.test.cpp
@@ -19,7 +19,8 @@
 
 #include <moco/IR/TFNodes.h>
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -60,7 +61,7 @@ TEST(ConstantFoldMul, basic_vector)
   }
   setup_output_node(&graph, mul_node);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldMul>();
+  auto pass = std::make_unique<moco::ConstantFoldMul>();
   bool cont = true;
   while (cont)
   {
@@ -92,7 +93,7 @@ TEST(ConstantFoldMul, basic_refinedet_1)
   }
   setup_output_node(&graph, mul_node);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldMul>();
+  auto pass = std::make_unique<moco::ConstantFoldMul>();
   bool cont = true;
   while (cont)
   {
diff --git a/compiler/moco/pass/src/ConstantFoldPack.test.cpp b/compiler/moco/pass/src/ConstantFoldPack.test.cpp
index cb6eff0c8..c0fa48c7b 100644
--- a/compiler/moco/pass/src/ConstantFoldPack.test.cpp
+++ b/compiler/moco/pass/src/ConstantFoldPack.test.cpp
@@ -19,7 +19,8 @@
 
 #include <moco/IR/TFNodes.h>
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -69,7 +70,7 @@ TEST(ConstantFoldPack, basic_scalar4_vector)
   identity->input(pack_node);
   setup_output_node(&graph, identity);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldPack>();
+  auto pass = std::make_unique<moco::ConstantFoldPack>();
   bool cont = true;
   while (cont)
   {
diff --git a/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp b/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp
index b5bada221..3e8449977 100644
--- a/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp
+++ b/compiler/moco/pass/src/ConstantFoldStridedSlice.test.cpp
@@ -19,7 +19,8 @@
 
 #include <moco/IR/TFNodes.h>
 #include <loco.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 #include <gtest/gtest.h>
 
@@ -83,7 +84,7 @@ TEST(ConstantFoldStridedSlice, basic_matrix55_11)
   }
   setup_output_node(&graph, sslice_node);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+  auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
   bool cont = true;
   while (cont)
   {
@@ -121,7 +122,7 @@ TEST(ConstantFoldStridedSlice, basic_vector4_0)
   }
   setup_output_node(&graph, sslice_node);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+  auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
   bool cont = true;
   while (cont)
   {
@@ -157,7 +158,7 @@ TEST(ConstantFoldStridedSlice, basic_vector4_1)
   }
   setup_output_node(&graph, sslice_node);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+  auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
   bool cont = true;
   while (cont)
   {
@@ -193,7 +194,7 @@ TEST(ConstantFoldStridedSlice, basic_vector4_2)
   }
   setup_output_node(&graph, sslice_node);
 
-  auto pass = stdex::make_unique<moco::ConstantFoldStridedSlice>();
+  auto pass = std::make_unique<moco::ConstantFoldStridedSlice>();
   bool cont = true;
   while (cont)
   {
diff --git a/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp b/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp
index f97546a80..9374dd5f9 100644
--- a/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp
+++ b/compiler/moco/pass/src/Passes/FuseBinaryIntoPreceding.cpp
@@ -318,7 +318,7 @@ bool fuse_to_preceding(loco::Graph *graph, moco::TFMul *node)
     fused_node = fused_conv_node<FuseType::Conv2D, moco::TFConv2D>(graph, mulparam, conv2d);
   else if (auto dw_conv2d = dynamic_cast<moco::TFDepthwiseConv2dNative *>(precedingOp))
     fused_node = fused_conv_node<FuseType::DepthwiseConv2D, moco::TFDepthwiseConv2dNative>(
-        graph, mulparam, dw_conv2d);
+      graph, mulparam, dw_conv2d);
 
   // Not ready yet
   if (fused_node == nullptr)
@@ -515,7 +515,7 @@ bool FuseBinaryIntoPreceding::run(loco::Graph *graph)
         }
       }
       {
-          // TODO support Div
+        // TODO support Div
       }
 
       {
diff --git a/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp b/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp
index b66add1ae..44e92e9a7 100644
--- a/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp
+++ b/compiler/moco/pass/src/Passes/ResolveSquaredDifference.cpp
@@ -24,8 +24,6 @@
 #include <loco/IR/NodeShape.h>
 #include <loco/Service/ShapeInference.h>
 
-#include <stdex/Memory.h>
-
 namespace
 {
 
diff --git a/compiler/moco/requires.cmake b/compiler/moco/requires.cmake
index 1a7d36454..18b3a76aa 100644
--- a/compiler/moco/requires.cmake
+++ b/compiler/moco/requires.cmake
@@ -1,6 +1,5 @@
 require("loco")
 require("locop")
-require("stdex")
 require("moco-log")
 require("plier-tf")
 require("mio-tf")
diff --git a/compiler/moco/service/CMakeLists.txt b/compiler/moco/service/CMakeLists.txt
index dff0233b1..5213f718e 100644
--- a/compiler/moco/service/CMakeLists.txt
+++ b/compiler/moco/service/CMakeLists.txt
@@ -9,7 +9,6 @@ target_link_libraries(moco_service PUBLIC loco)
 target_link_libraries(moco_service PUBLIC moco_lang)
 target_link_libraries(moco_service PRIVATE moco_support)
 target_link_libraries(moco_service PRIVATE nncc_common)
-target_link_libraries(moco_service PRIVATE stdex)
 target_link_libraries(moco_service PRIVATE oops)
 install(TARGETS moco_service DESTINATION lib)
 
diff --git a/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp b/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp
index 98434155e..6a9864dc5 100644
--- a/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp
+++ b/compiler/moco/service/src/Service/TFShapeInferenceRule.cpp
@@ -302,7 +302,7 @@ public:
     // output count is from input count, depth is from kernel 'CM' which is dim(2) * dim(3)
     auto output_feature_shape = input_feature_shape;
     output_feature_shape.depth() =
-        loco::Dimension(ker_tensor_shape.dim(2).value() * ker_tensor_shape.dim(3).value());
+      loco::Dimension(ker_tensor_shape.dim(2).value() * ker_tensor_shape.dim(3).value());
 
     auto output_plane_shape = infer_plane_shape(input_plane_shape);
 
diff --git a/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h b/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h
index 52324700a..c8a547681 100644
--- a/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h
+++ b/compiler/moco/support/include/moco/Support/TFShapeInferenceHelper.h
@@ -136,11 +136,11 @@ protected:
     if (_padding == "VALID")
     {
       res.height =
-          (p.input.height.value() + p.stride.height.value() - p.effective_window.height.value()) /
-          p.stride.height.value();
+        (p.input.height.value() + p.stride.height.value() - p.effective_window.height.value()) /
+        p.stride.height.value();
       res.width =
-          (p.input.width.value() + p.stride.width.value() - p.effective_window.width.value()) /
-          p.stride.width.value();
+        (p.input.width.value() + p.stride.width.value() - p.effective_window.width.value()) /
+        p.stride.width.value();
     }
     else if (_padding == "SAME")
     {
diff --git a/compiler/moco/support/src/TFShapeInferenceHelper.cpp b/compiler/moco/support/src/TFShapeInferenceHelper.cpp
index 13e514a78..605fb9c37 100644
--- a/compiler/moco/support/src/TFShapeInferenceHelper.cpp
+++ b/compiler/moco/support/src/TFShapeInferenceHelper.cpp
@@ -66,7 +66,7 @@ private:
 };
 
 /**
- * @breif  Expand shape x and y to same rank by align right and filling with 1
+ * @brief  Expand shape x and y to same rank by align right and filling with 1
  */
 void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
 {
@@ -86,7 +86,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
 }
 
 /**
- * @breif  Returns shape of expanded dimension of input x and y having same rank
+ * @brief  Returns shape of expanded dimension of input x and y having same rank
  */
 loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
 {
diff --git a/compiler/morph/CMakeLists.txt b/compiler/morph/CMakeLists.txt
index ec7da8d30..5a5ae2623 100644
--- a/compiler/morph/CMakeLists.txt
+++ b/compiler/morph/CMakeLists.txt
@@ -8,11 +8,11 @@ target_include_directories(morph PUBLIC include)
 target_link_libraries(morph PRIVATE nncc_common)
 target_link_libraries(morph PUBLIC angkor)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 add_executable(morph_test ${TESTS})
 target_link_libraries(morph_test morph)
diff --git a/compiler/nest/core/CMakeLists.txt b/compiler/nest/core/CMakeLists.txt
index b603f9ae9..4f17db3b4 100644
--- a/compiler/nest/core/CMakeLists.txt
+++ b/compiler/nest/core/CMakeLists.txt
@@ -15,11 +15,11 @@ foreach(EXAMPLE_FILE IN ITEMS ${EXAMPLE_FILES})
    target_link_libraries(${TARGET_NAME} nest_core)
 endforeach(EXAMPLE_FILE)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 add_executable(nest_core_test ${TESTS})
 target_link_libraries(nest_core_test gtest_main)
diff --git a/compiler/nest/core/include/nest/expr/AddNode.h b/compiler/nest/core/include/nest/expr/AddNode.h
index b9b5afb22..bb95692b6 100644
--- a/compiler/nest/core/include/nest/expr/AddNode.h
+++ b/compiler/nest/core/include/nest/expr/AddNode.h
@@ -30,7 +30,7 @@ class AddNode final : public Node
 {
 public:
   AddNode(const std::shared_ptr<expr::Node> &lhs, const std::shared_ptr<expr::Node> &rhs)
-      : _lhs{lhs}, _rhs{rhs}
+    : _lhs{lhs}, _rhs{rhs}
   {
     // DO NOTHING
   }
diff --git a/compiler/nest/core/include/nest/expr/DerefNode.h b/compiler/nest/core/include/nest/expr/DerefNode.h
index 19adfe3b3..8e3cc5690 100644
--- a/compiler/nest/core/include/nest/expr/DerefNode.h
+++ b/compiler/nest/core/include/nest/expr/DerefNode.h
@@ -31,7 +31,7 @@ class DerefNode final : public Node
 public:
   template <typename... Args>
   DerefNode(const DomainID &id, Args &&... indicies)
-      : _id{id}, _sub{std::forward<Args>(indicies)...}
+    : _id{id}, _sub{std::forward<Args>(indicies)...}
   {
     // DO NOTHING
   }
diff --git a/compiler/nest/core/include/nest/expr/MulNode.h b/compiler/nest/core/include/nest/expr/MulNode.h
index f388b33a3..bbf64d9bc 100644
--- a/compiler/nest/core/include/nest/expr/MulNode.h
+++ b/compiler/nest/core/include/nest/expr/MulNode.h
@@ -30,7 +30,7 @@ class MulNode final : public Node
 {
 public:
   MulNode(const std::shared_ptr<expr::Node> &lhs, const std::shared_ptr<expr::Node> &rhs)
-      : _lhs{lhs}, _rhs{rhs}
+    : _lhs{lhs}, _rhs{rhs}
   {
     // DO NOTHING
   }
diff --git a/compiler/nest/core/src/Block.test.cpp b/compiler/nest/core/src/Block.test.cpp
index d8faa0bdb..c48fcfa35 100644
--- a/compiler/nest/core/src/Block.test.cpp
+++ b/compiler/nest/core/src/Block.test.cpp
@@ -24,7 +24,7 @@ struct DummyNode final : public nest::stmt::Node
 {
   // Dummy Node for testing
 };
-}
+} // namespace
 
 TEST(BLOCK, use_case_1)
 {
diff --git a/compiler/nest/core/src/Closure.test.cpp b/compiler/nest/core/src/Closure.test.cpp
index 495e2186a..458179fb8 100644
--- a/compiler/nest/core/src/Closure.test.cpp
+++ b/compiler/nest/core/src/Closure.test.cpp
@@ -23,7 +23,7 @@ namespace
 struct DummyNode final : public nest::expr::Node
 {
 };
-}
+} // namespace
 
 TEST(Closure, ctor)
 {
diff --git a/compiler/nest/core/src/Expr.test.cpp b/compiler/nest/core/src/Expr.test.cpp
index 2e26c234a..1b2e7135a 100644
--- a/compiler/nest/core/src/Expr.test.cpp
+++ b/compiler/nest/core/src/Expr.test.cpp
@@ -25,7 +25,7 @@ namespace
 struct DummyNode final : public nest::expr::Node
 {
 };
-}
+} // namespace
 
 TEST(EXPR, operator_sum)
 {
diff --git a/compiler/nest/core/src/Ret.test.cpp b/compiler/nest/core/src/Ret.test.cpp
index a85223578..98f47d897 100644
--- a/compiler/nest/core/src/Ret.test.cpp
+++ b/compiler/nest/core/src/Ret.test.cpp
@@ -23,7 +23,7 @@ namespace
 struct DummyNode final : public nest::expr::Node
 {
 };
-}
+} // namespace
 
 TEST(RET, ctor)
 {
diff --git a/compiler/nest/core/src/expr/AddNode.test.cpp b/compiler/nest/core/src/expr/AddNode.test.cpp
index dba6cc826..d8ef1d08b 100644
--- a/compiler/nest/core/src/expr/AddNode.test.cpp
+++ b/compiler/nest/core/src/expr/AddNode.test.cpp
@@ -25,7 +25,7 @@ namespace
 struct DummyNode final : public nest::expr::Node
 {
 };
-}
+} // namespace
 
 TEST(ADD_NODE, cast)
 {
diff --git a/compiler/nest/core/src/expr/DerefNode.test.cpp b/compiler/nest/core/src/expr/DerefNode.test.cpp
index 125d8bf1e..d0badd509 100644
--- a/compiler/nest/core/src/expr/DerefNode.test.cpp
+++ b/compiler/nest/core/src/expr/DerefNode.test.cpp
@@ -25,7 +25,7 @@ namespace
 struct DummyNode final : public nest::expr::Node
 {
 };
-}
+} // namespace
 
 TEST(DEREF_NODE, cast)
 {
diff --git a/compiler/nest/core/src/expr/MulNode.test.cpp b/compiler/nest/core/src/expr/MulNode.test.cpp
index 85cb5a56e..bccbcb3b5 100644
--- a/compiler/nest/core/src/expr/MulNode.test.cpp
+++ b/compiler/nest/core/src/expr/MulNode.test.cpp
@@ -25,7 +25,7 @@ namespace
 struct DummyNode final : public nest::expr::Node
 {
 };
-}
+} // namespace
 
 TEST(MUL_NODE, cast)
 {
diff --git a/compiler/nest/core/src/stmt/PushNode.test.cpp b/compiler/nest/core/src/stmt/PushNode.test.cpp
index c02c69220..fb58a125e 100644
--- a/compiler/nest/core/src/stmt/PushNode.test.cpp
+++ b/compiler/nest/core/src/stmt/PushNode.test.cpp
@@ -25,7 +25,7 @@ namespace
 struct DummyExprNode final : public nest::expr::Node
 {
 };
-}
+} // namespace
 
 TEST(STMT_PUSH_NODE, cast)
 {
diff --git a/compiler/nike/CMakeLists.txt b/compiler/nike/CMakeLists.txt
index 737c73b8f..6bd3199e3 100644
--- a/compiler/nike/CMakeLists.txt
+++ b/compiler/nike/CMakeLists.txt
@@ -5,11 +5,11 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 add_library(nike STATIC ${SOURCES})
 target_include_directories(nike PUBLIC include)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 GTest_AddTest(nike_test ${TESTS})
 target_link_libraries(nike_test nike)
diff --git a/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp b/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp
index 3a5b9ecaf..cad05cc1d 100644
--- a/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp
+++ b/compiler/nnc/backends/acl_soft_backend/AclCppGenerator.cpp
@@ -30,7 +30,7 @@ using namespace std;
 namespace fs = boost::filesystem;
 
 AclCppCodeGenerator::AclCppCodeGenerator(string output_dir, string artifact_name)
-    : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
+  : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
 {
 }
 
diff --git a/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp b/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp
index b5e3734ae..0abe3ec72 100644
--- a/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp
+++ b/compiler/nnc/backends/acl_soft_backend/AclCppOpGenerator.cpp
@@ -33,8 +33,8 @@ using namespace std;
 using namespace mir;
 
 AclCppOpGenerator::AclCppOpGenerator(const string &name, ostream &par_out)
-    : _parOut(par_out), _module(name), _constrBlock(nullptr), _infBlock(nullptr),
-      _clScheduler(AF::id("arm_compute::CLScheduler"))
+  : _parOut(par_out), _module(name), _constrBlock(nullptr), _infBlock(nullptr),
+    _clScheduler(AF::id("arm_compute::CLScheduler"))
 {
 }
 
@@ -60,13 +60,14 @@ const ArtifactModule &AclCppOpGenerator::generate(mir::Graph *g)
   _parInVar = _artifactClass->var(false, "std::ifstream", "_parIn");
   _parIn = _parInVar->use();
   string par_file_name = _module.name() + ".par";
-  _constrBlock->call("open", {AF::lit("\"" + par_file_name + "\""),
-                              AF::lit("std::ios_base::in | std::ios_base::binary")},
-                     _parIn);
+  _constrBlock->call(
+    "open",
+    {AF::lit("\"" + par_file_name + "\""), AF::lit("std::ios_base::in | std::ios_base::binary")},
+    _parIn);
   auto file_fail = _constrBlock->ifCond(AF::call("fail", {}, _parIn));
   auto file_fail_block = file_fail->getBlock();
   file_fail_block->addStatement(
-      AF::lit("throw std::string(\"Failed to open file: " + par_file_name + " for reading\")"));
+    AF::lit("throw std::string(\"Failed to open file: " + par_file_name + " for reading\")"));
 
   // Traverse the computational graph.
   g->accept(this);
@@ -89,8 +90,8 @@ void AclCppOpGenerator::visit(ops::ConcatOp &op)
   const auto *ir_output = op.getOutput(0);
 
   static const char *axis_names[] = {
-      "arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL",
-      "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH"};
+    "arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL",
+    "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH"};
 
   int axis = op.getAxis();
   assert(axis >= 0 && axis < static_cast<int>(sizeof(axis_names) / sizeof(axis_names[0])) &&
@@ -105,8 +106,8 @@ void AclCppOpGenerator::visit(ops::ConcatOp &op)
   for (const Operation::Output *ir_input : ir_inputs)
     _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(ir_input)))}, inputs);
 
-  auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix,
-                        {inputs, AF::ref(out), AF::lit(axis_name)});
+  auto layer =
+    genLayer("arm_compute::CLConcatenateLayer", prefix, {inputs, AF::ref(out), AF::lit(axis_name)});
 
   addToPersistentTensors(out);
   genLayerExecution(layer);
@@ -214,13 +215,13 @@ shared_ptr<ArtifactVariable> AclCppOpGenerator::genPadStrideInfo(const Op &op, c
   string var_name = prefix + "_pad_stride_info";
 
   list<std::shared_ptr<ArtifactExpr>> var_init_params = {
-      AF::lit(to_string(strides.dim(1))),
-      AF::lit(to_string(strides.dim(0))),
-      AF::lit(to_string(padding_before.at(1))),
-      AF::lit(to_string(padding_after.at(1))),
-      AF::lit(to_string(padding_before.at(0))),
-      AF::lit(to_string(padding_after.at(0))),
-      AF::lit("arm_compute::DimensionRoundingType::FLOOR")};
+    AF::lit(to_string(strides.dim(1))),
+    AF::lit(to_string(strides.dim(0))),
+    AF::lit(to_string(padding_before.at(1))),
+    AF::lit(to_string(padding_after.at(1))),
+    AF::lit(to_string(padding_before.at(0))),
+    AF::lit(to_string(padding_after.at(0))),
+    AF::lit("arm_compute::DimensionRoundingType::FLOOR")};
 
   auto pad_stride_info_var = block->var(type_name, var_name, {}, var_init_params);
 
@@ -316,7 +317,7 @@ static bool shouldSerializeConstant(const ops::ConstantOp &op)
   // themselves,
   // so we don't serialize them here, also we don't serialize tensors from dangling ConstantOp
   static std::map<Operation::Type, std::size_t> self_serializing_ops_to_inputs{
-      {Operation::Type::conv2D, 1}, {Operation::Type::fullyConnected, 1}};
+    {Operation::Type::conv2D, 1}, {Operation::Type::fullyConnected, 1}};
 
   for (Operation::Use use : op.getOutput(0)->getUses())
   {
@@ -420,8 +421,8 @@ void AclCppOpGenerator::visit(ops::PadOp &op)
   for (int i = 0; i < ir_input->getShape().rank(); ++i)
   {
     auto pad_var = _constrBlock->var(
-        "arm_compute::PaddingInfo", prefix + "_pad_" + to_string(i), {},
-        {AF::lit(to_string(padding_before[i])), AF::lit(to_string(padding_after[i]))});
+      "arm_compute::PaddingInfo", prefix + "_pad_" + to_string(i), {},
+      {AF::lit(to_string(padding_before[i])), AF::lit(to_string(padding_after[i]))});
     auto pad = pad_var->use();
     _constrBlock->call("push_back", {pad}, pad_list);
   }
@@ -430,7 +431,7 @@ void AclCppOpGenerator::visit(ops::PadOp &op)
   // FIXME Set up the `constant_value` parameter.
   assert(op.getPaddingValue() == 0.0f);
   auto layer =
-      genLayer("arm_compute::CLPadLayer", prefix, {AF::ref(input), AF::ref(out), pad_list});
+    genLayer("arm_compute::CLPadLayer", prefix, {AF::ref(input), AF::ref(out), pad_list});
   genLayerExecution(layer);
 }
 
@@ -449,7 +450,7 @@ void AclCppOpGenerator::genPooling(Op &op, const std::string &pooling_type, bool
   // Transpose data from MIR format to format compatible with ACL
   const string transposed_input_name = output_tensor_name + "transposed_input";
   shared_ptr<ArtifactId> transposed_input =
-      genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), in_id);
+    genTransposeMIRtoACL(transposed_input_name, ir_input->getShape(), in_id);
 
   const string layer_name = output_tensor_name + "_pooling_layer";
 
@@ -459,31 +460,31 @@ void AclCppOpGenerator::genPooling(Op &op, const std::string &pooling_type, bool
 
   // Create kernel window info
   shared_ptr<ArtifactVariable> kernel_window_var = _constrBlock->var(
-      "arm_compute::Size2D", layer_name + "_kernel_window", {},
-      {AF::lit(to_string(op.getWindowSize()[1])), AF::lit(to_string(op.getWindowSize()[0]))});
+    "arm_compute::Size2D", layer_name + "_kernel_window", {},
+    {AF::lit(to_string(op.getWindowSize()[1])), AF::lit(to_string(op.getWindowSize()[0]))});
   shared_ptr<ArtifactId> kernel_window = kernel_window_var->use();
 
   // Create pooling info: pooling type, kernel info, strides, etc
   shared_ptr<ArtifactVariable> pooling_info_var =
-      _constrBlock->var("arm_compute::PoolingLayerInfo", layer_name + "_pooling_info", {},
-                        {AF::lit(pooling_type), kernel_window, pad_stride_info,
-                         AF::lit(exclude_padding ? "true" : "false")});
+    _constrBlock->var("arm_compute::PoolingLayerInfo", layer_name + "_pooling_info", {},
+                      {AF::lit(pooling_type), kernel_window, pad_stride_info,
+                       AF::lit(exclude_padding ? "true" : "false")});
   shared_ptr<ArtifactId> pooling_info = pooling_info_var->use();
 
   // Generate auxiliary tensor to hold transposed output of pool in NCHW format
   Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
   shared_ptr<ArtifactId> transposed_output =
-      genTensor(layer_name + "_out_transpose", transposed_output_shape);
+    genTensor(layer_name + "_out_transpose", transposed_output_shape);
 
   // Actual layer creation
   shared_ptr<ArtifactId> layer =
-      genLayer("arm_compute::CLPoolingLayer", layer_name,
-               {AF::ref(transposed_input), AF::ref(transposed_output), pooling_info});
+    genLayer("arm_compute::CLPoolingLayer", layer_name,
+             {AF::ref(transposed_input), AF::ref(transposed_output), pooling_info});
   genTensorAllocation(_infBlock, transposed_output);
   genLayerExecution(layer);
 
   shared_ptr<ArtifactId> output =
-      genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+    genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
 
   genTensorDeallocation(_infBlock, transposed_input);
   genTensorDeallocation(_infBlock, transposed_output);
@@ -521,13 +522,13 @@ void AclCppOpGenerator::genConvolution(Op &op, const string &acl_func_name, cons
 
   // Generate auxiliary tensor to hold transposed input of convolution in NCHW format
   shared_ptr<ArtifactId> transposed_input =
-      genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input->getShape(), input);
+    genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input->getShape(), input);
 
   // Create the transposed output tensor in the DOM.
   const string transposed_output_name = output_tensor_name + "_transposed_output";
   Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output->getShape());
   shared_ptr<ArtifactId> transposed_output =
-      genTensor(transposed_output_name, transposed_output_shape);
+    genTensor(transposed_output_name, transposed_output_shape);
 
   string operation_name = output_tensor_name + suffix;
 
@@ -564,7 +565,7 @@ void AclCppOpGenerator::genConvolution(Op &op, const string &acl_func_name, cons
 
   // Generate auxiliar tensor to hold transposed output of convolution in NHWC format
   shared_ptr<ArtifactId> output =
-      genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+    genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
 
   genTensorDeallocation(_infBlock, transposed_input);
   genTensorDeallocation(_infBlock, transposed_output);
@@ -589,9 +590,9 @@ void AclCppOpGenerator::genActivation(const Operation &op, const std::string &ac
   // constructor. This instance profide information about the concrete activation function,
   // like: ReLU, Tanh etc and two optional parameter (alpha and betha) needed by some activations.
   auto activation_info_var = _constrBlock->var(
-      "arm_compute::ActivationLayerInfo", prefix + "_activation_info", {},
-      {AF::lit("arm_compute::ActivationLayerInfo::ActivationFunction::" + activation_name),
-       AF::lit(to_string(a)), AF::lit(to_string(b))});
+    "arm_compute::ActivationLayerInfo", prefix + "_activation_info", {},
+    {AF::lit("arm_compute::ActivationLayerInfo::ActivationFunction::" + activation_name),
+     AF::lit(to_string(a)), AF::lit(to_string(b))});
   auto activation_info = activation_info_var->use();
 
   // Create an instance of the CLActivationLayer class as a member of the artifact class.
@@ -619,9 +620,10 @@ shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string &prefix, size
   auto arithmetic_add_layer = arithmetic_add_layer_var->use();
 
   // Generate the call: arithmetic_add_layer.configure(&in1, &in2, &out);
-  _constrBlock->call("configure", {AF::ref(in1), AF::ref(in2), AF::ref(out),
-                                   AF::lit("arm_compute::ConvertPolicy::WRAP")},
-                     arithmetic_add_layer);
+  _constrBlock->call(
+    "configure",
+    {AF::ref(in1), AF::ref(in2), AF::ref(out), AF::lit("arm_compute::ConvertPolicy::WRAP")},
+    arithmetic_add_layer);
 
   // Generate the call: arithmetic_add_layer.run();
   _infBlock->call("run", {}, arithmetic_add_layer);
@@ -696,8 +698,8 @@ string AclCppOpGenerator::tensorName(const Operation::Output *ir_tensor) const
   if (!tensor_name.empty())
   {
     tensor_name = "_" + tensor_name;
-    replace_if(tensor_name.begin(), tensor_name.end(), [](char c) { return std::isalnum(c) == 0; },
-               '_');
+    replace_if(
+      tensor_name.begin(), tensor_name.end(), [](char c) { return std::isalnum(c) == 0; }, '_');
   }
   else
   {
@@ -740,7 +742,7 @@ shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(const string &name, const Sh
 
     const char *type_name = "arm_compute::TensorShape";
     shared_ptr<ArtifactId> shape =
-        genVectorInitializedVar(_constrBlock, type_name, name + "_shape", shape_vectorized);
+      genVectorInitializedVar(_constrBlock, type_name, name + "_shape", shape_vectorized);
     _constrBlock->call("initializeTensor", {id, shape});
 
     if (gen_accessor)
@@ -903,7 +905,7 @@ void AclCppOpGenerator::genTranspose(const std::shared_ptr<nnc::ArtifactId> &inp
 
   // Create operation parameter containing permutation vector
   shared_ptr<ArtifactId> perm_vector = genVectorInitializedVar(
-      _constrBlock, "arm_compute::PermutationVector", out_name + "_perm_param", acl_perm);
+    _constrBlock, "arm_compute::PermutationVector", out_name + "_perm_param", acl_perm);
 
   // Instantiate the CLPermute object.
   string layer_name = out_name + "_transpose_layer";
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp
index 8888697e7..bbaa1f523 100644
--- a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.cpp
@@ -25,8 +25,8 @@ using namespace std;
 ArtifactFunctionCall::ArtifactFunctionCall(string func_name,
                                            list<shared_ptr<ArtifactExpr>> param_list,
                                            shared_ptr<ArtifactExpr> on, ArtifactCallType call_type)
-    : _funcName(std::move(func_name)), _callType(call_type), _on(std::move(on)),
-      _paramList(std::move(param_list))
+  : _funcName(std::move(func_name)), _callType(call_type), _on(std::move(on)),
+    _paramList(std::move(param_list))
 {
 }
 
diff --git a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h
index 106c9bec3..89d803021 100644
--- a/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h
+++ b/compiler/nnc/backends/acl_soft_backend/ArtifactModel.h
@@ -204,7 +204,7 @@ class ArtifactUnaryExpr : public ArtifactExpr
 {
 public:
   ArtifactUnaryExpr(ArtifactUnOp op, std::shared_ptr<ArtifactExpr> expr)
-      : _op(op), _expr(std::move(expr))
+    : _op(op), _expr(std::move(expr))
   {
   }
 
@@ -248,7 +248,7 @@ class ArtifactBinaryExpr : public ArtifactExpr
 public:
   ArtifactBinaryExpr(ArtifactBinOp op, std::shared_ptr<ArtifactExpr> left,
                      std::shared_ptr<ArtifactExpr> right)
-      : _op(op), _left(std::move(left)), _right(std::move(right))
+    : _op(op), _left(std::move(left)), _right(std::move(right))
   {
   }
 
@@ -271,7 +271,7 @@ class ArtifactIndex : public ArtifactExpr
 {
 public:
   ArtifactIndex(std::shared_ptr<ArtifactExpr> expr, std::shared_ptr<ArtifactExpr> ind)
-      : _expr(std::move(expr)), _ind(std::move(ind))
+    : _expr(std::move(expr)), _ind(std::move(ind))
   {
   }
 
@@ -328,8 +328,8 @@ public:
   ArtifactVariable(std::string type_name, std::string var_name,
                    std::list<std::shared_ptr<ArtifactExpr>> dimensions = {},
                    std::list<std::shared_ptr<ArtifactExpr>> initializers = {})
-      : _typeName(std::move(type_name)), _dimensions(std::move(dimensions)),
-        _initializers(std::move(initializers)), ArtifactNamed(std::move(var_name))
+    : _typeName(std::move(type_name)), _dimensions(std::move(dimensions)),
+      _initializers(std::move(initializers)), ArtifactNamed(std::move(var_name))
   {
   }
 
@@ -469,7 +469,7 @@ public:
   explicit ArtifactForLoop(std::shared_ptr<ArtifactVariable> init = nullptr,
                            std::shared_ptr<ArtifactExpr> cond = nullptr,
                            std::shared_ptr<ArtifactExpr> iter = nullptr)
-      : _init(std::move(init)), _cond(std::move(cond)), _iter(std::move(iter))
+    : _init(std::move(init)), _cond(std::move(cond)), _iter(std::move(iter))
   {
   }
 
@@ -527,7 +527,7 @@ public:
    */
   ArtifactFunction(std::string ret_type_name, const std::string &func_name,
                    std::list<std::shared_ptr<ArtifactVariable>> params = {})
-      : ArtifactNamed(func_name), _params(std::move(params)), _retTypeName(std::move(ret_type_name))
+    : ArtifactNamed(func_name), _params(std::move(params)), _retTypeName(std::move(ret_type_name))
   {
   }
 
@@ -568,7 +568,7 @@ public:
                         const std::string &var_name,
                         const std::list<std::shared_ptr<ArtifactExpr>> &dimensions = {},
                         const std::list<std::shared_ptr<ArtifactExpr>> &initializers = {})
-      : ArtifactClassMember(owner), ArtifactVariable(type_name, var_name, dimensions, initializers)
+    : ArtifactClassMember(owner), ArtifactVariable(type_name, var_name, dimensions, initializers)
   {
   }
 
@@ -584,7 +584,7 @@ public:
   ArtifactClassFunction(const ArtifactClass *owner, const std::string &ret_type_name,
                         const std::string &func_name,
                         const std::list<std::shared_ptr<ArtifactVariable>> &params = {})
-      : ArtifactClassMember(owner), ArtifactFunction(ret_type_name, func_name, params)
+    : ArtifactClassMember(owner), ArtifactFunction(ret_type_name, func_name, params)
   {
   }
 
diff --git a/compiler/nnc/backends/interpreter/InterpreterBackend.cpp b/compiler/nnc/backends/interpreter/InterpreterBackend.cpp
index 923a7cfc7..895daa115 100644
--- a/compiler/nnc/backends/interpreter/InterpreterBackend.cpp
+++ b/compiler/nnc/backends/interpreter/InterpreterBackend.cpp
@@ -104,7 +104,7 @@ static void writeTensorToHDF5File(const TensorVariant &tensor, std::string tenso
 static TensorVariant readTensorFromFile(const std::string &filename, const TensorType &type)
 {
   const std::size_t input_data_size =
-      type.getShape().numElements() * getDataTypeSize(type.getElementType());
+    type.getShape().numElements() * getDataTypeSize(type.getElementType());
 
   std::ifstream stream(filename, std::ios::in | std::ios::binary);
   if (stream.fail())
@@ -117,9 +117,9 @@ static TensorVariant readTensorFromFile(const std::string &filename, const Tenso
   int64_t file_size = end - begin;
 
   if (static_cast<std::size_t>(file_size) != input_data_size)
-    throw std::runtime_error("File \"" + filename + "\" has incorrect size: " +
-                             std::to_string(file_size) + "(expected: " +
-                             std::to_string(input_data_size) + ").");
+    throw std::runtime_error("File \"" + filename +
+                             "\" has incorrect size: " + std::to_string(file_size) +
+                             "(expected: " + std::to_string(input_data_size) + ").");
 
   std::unique_ptr<char[]> data(new char[input_data_size]);
   stream.read(data.get(), input_data_size);
@@ -130,7 +130,7 @@ static TensorVariant readTensorFromFile(const std::string &filename, const Tenso
 }
 
 InterpreterBackend::InterpreterBackend(std::string input_dir, std::string output_dir)
-    : _input_dir(std::move(input_dir)), _output_dir(std::move(output_dir))
+  : _input_dir(std::move(input_dir)), _output_dir(std::move(output_dir))
 {
 }
 
diff --git a/compiler/nnc/backends/soft_backend/CPPGenerator.cpp b/compiler/nnc/backends/soft_backend/CPPGenerator.cpp
index 236881b80..097122882 100644
--- a/compiler/nnc/backends/soft_backend/CPPGenerator.cpp
+++ b/compiler/nnc/backends/soft_backend/CPPGenerator.cpp
@@ -80,7 +80,7 @@ static unique_ptr<ofstream> getStream(const string &path)
 }
 
 CPPCodeGenerator::CPPCodeGenerator(std::string output_dir, std::string artifact_name)
-    : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
+  : _output_dir(std::move(output_dir)), _artifact_name(std::move(artifact_name))
 {
 }
 
@@ -187,12 +187,14 @@ void CPPCodeGenerator::materializeHeader(ostream &out, const ModelAnalyzer &ma)
   string class_name = ma.getModelName() + "Model";
 
   out.write(cpp_header_types, sizeof(cpp_header_types));
-  out << "class " << class_name << "\n"
-                                   "{\n"
-                                   "public:\n"
-                                   "  "
-      << class_name << "(const std::string& parametersPath);\n"
-                       "  ~"
+  out << "class " << class_name
+      << "\n"
+         "{\n"
+         "public:\n"
+         "  "
+      << class_name
+      << "(const std::string& parametersPath);\n"
+         "  ~"
       << class_name << "();\n";
   // generate input setters
   if (ma.getInputs().size() == 1)
@@ -215,10 +217,12 @@ void CPPCodeGenerator::materializeHeader(ostream &out, const ModelAnalyzer &ma)
   out << "  void doInference();\n\n"
          "private:\n"
          "  "
-      << class_name << "() = delete;\n"
-                       "  "
-      << class_name << "(const " << class_name << "& orig) = delete;\n"
-                                                  "  "
+      << class_name
+      << "() = delete;\n"
+         "  "
+      << class_name << "(const " << class_name
+      << "& orig) = delete;\n"
+         "  "
       << class_name << "& operator=(const " << class_name << "& orig) = delete;\n";
   // generate input/output tensors
   for (const size_t in_tensor_id : ma.getInputs())
@@ -273,8 +277,9 @@ void CPPCodeGenerator::printSetter(ostream &out, const string &class_name,
 {
 
   const string &var_name = _formattedTensors[td.id];
-  out << "bool " << class_name << "::set" << setter_name << "(const Tensor& t)\n"
-                                                            "{\n";
+  out << "bool " << class_name << "::set" << setter_name
+      << "(const Tensor& t)\n"
+         "{\n";
   // need to insert input correctness check
   const mir::Shape expected = td.shape;
   int rank = expected.rank();
@@ -286,9 +291,10 @@ void CPPCodeGenerator::printSetter(ostream &out, const string &class_name,
       out << "  "
           << "if (t.getShape()[" << i << "] != " << expected.dim(i) << ") return false;\n";
   }
-  out << "  " << var_name << " = t;\n"
-                             "  return true;\n"
-                             "}\n\n";
+  out << "  " << var_name
+      << " = t;\n"
+         "  return true;\n"
+         "}\n\n";
 }
 
 void CPPCodeGenerator::printGetter(ostream &out, const string &class_name,
@@ -296,11 +302,13 @@ void CPPCodeGenerator::printGetter(ostream &out, const string &class_name,
 {
 
   const string &var_name = _formattedTensors[td.id];
-  out << "shared_ptr<Tensor> " << class_name << "::get" << getter_name << "()\n"
-                                                                          "{\n"
-                                                                          "  return "
-      << var_name << ";\n"
-                     "}\n\n";
+  out << "shared_ptr<Tensor> " << class_name << "::get" << getter_name
+      << "()\n"
+         "{\n"
+         "  return "
+      << var_name
+      << ";\n"
+         "}\n\n";
 }
 
 void CPPCodeGenerator::materializeCall(ostream &out, const ModelAnalyzer &ma,
@@ -435,13 +443,15 @@ void CPPCodeGenerator::materializeCode(ostream &out, const ModelAnalyzer &ma, co
       << "(const string& parametersPath)\n"
          "{\n"
          "  readParameters(_parameters, _paramSize, parametersPath, "
-      << s.getFormatVersion() << ", " << s.getModelHash() << ");\n"
-                                                             "}\n\n";
+      << s.getFormatVersion() << ", " << s.getModelHash()
+      << ");\n"
+         "}\n\n";
   // gen NN destructor
-  out << class_name << "::~" << class_name << "()\n"
-                                              "{\n"
-                                              "  releaseParameters(_parameters, _paramSize);\n"
-                                              "}\n\n";
+  out << class_name << "::~" << class_name
+      << "()\n"
+         "{\n"
+         "  releaseParameters(_parameters, _paramSize);\n"
+         "}\n\n";
   // generate input setters
   // generate main setter if network has only one
   const auto &inputs = ma.getInputs();
@@ -473,8 +483,9 @@ void CPPCodeGenerator::materializeCode(ostream &out, const ModelAnalyzer &ma, co
     const TensorDescriptor &td = tensors[output_tensor_id];
     printGetter(out, class_name, output_tensor_name, td);
   }
-  out << "void " << class_name << "::doInference()\n"
-                                  "{\n";
+  out << "void " << class_name
+      << "::doInference()\n"
+         "{\n";
   for (size_t output_tensor_id : ma.getPersistentTensors())
   {
     const string &output_tensor_name = _formattedTensors[output_tensor_id];
diff --git a/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp
index 82e62b531..2d555d0a9 100644
--- a/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp
+++ b/compiler/nnc/backends/soft_backend/ModelAnalyzer.cpp
@@ -62,7 +62,7 @@ void ModelAnalyzer::appendOperationToInference(Operation *op, const string &func
     {
       const auto &tensor_name = output.getName();
       const auto tensor_id =
-          tensor_name.empty() ? declareTemporaryTensor() : declarePersistentTensor(tensor_name);
+        tensor_name.empty() ? declareTemporaryTensor() : declarePersistentTensor(tensor_name);
       node_output_tensors.push_back(tensor_id);
     }
   }
@@ -82,7 +82,7 @@ void ModelAnalyzer::appendOperationToInference(Operation *op, const string &func
 
   std::copy(aux_args.begin(), aux_args.end(), std::back_inserter(node_input_tensors));
   unique_ptr<Action> operation_call(new CallFunction(
-      op, function_name, std::move(node_input_tensors), std::move(node_output_tensors)));
+    op, function_name, std::move(node_input_tensors), std::move(node_output_tensors)));
   _inferenceSequence.push_back(std::move(operation_call));
   _opToDescr[op] = _inferenceSequence.back().get();
 }
diff --git a/compiler/nnc/backends/soft_backend/ModelAnalyzer.h b/compiler/nnc/backends/soft_backend/ModelAnalyzer.h
index 471c31011..6522bc655 100644
--- a/compiler/nnc/backends/soft_backend/ModelAnalyzer.h
+++ b/compiler/nnc/backends/soft_backend/ModelAnalyzer.h
@@ -42,9 +42,9 @@ class ModelAnalyzer : public mir::Visitor
 {
 public:
   /**
- * @brief contructs inference sequence
- * @param g pointer to graph to linearize
- */
+   * @brief contructs inference sequence
+   * @param g pointer to graph to linearize
+   */
   void analyze(const mir::Graph *g);
 
   void visit(mir::ops::AbsOp &) override;
diff --git a/compiler/nnc/backends/soft_backend/SequencedIR.h b/compiler/nnc/backends/soft_backend/SequencedIR.h
index 9a761243e..ff062e043 100644
--- a/compiler/nnc/backends/soft_backend/SequencedIR.h
+++ b/compiler/nnc/backends/soft_backend/SequencedIR.h
@@ -91,7 +91,7 @@ struct TransposeTensor : public Action
 {
 
   TransposeTensor(size_t input, size_t output, std::vector<int32_t> &&perm)
-      : Action(Type::transposeTensor), perm(std::move(perm)), input(input), output(output)
+    : Action(Type::transposeTensor), perm(std::move(perm)), input(input), output(output)
   {
   }
 
@@ -121,8 +121,8 @@ struct CallFunction : public Action
 
   CallFunction(mir::Operation *op, std::string func_name, std::vector<size_t> &&inputs,
                std::vector<size_t> &&outputs)
-      : Action(Type::callFunction), mirOp(op), funcName(std::move(func_name)), inputs(inputs),
-        outputs(outputs), paramStartOffset(0)
+    : Action(Type::callFunction), mirOp(op), funcName(std::move(func_name)), inputs(inputs),
+      outputs(outputs), paramStartOffset(0)
   {
   }
 
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_header_types.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_header_types.def
index 771329cdd..db46a1e97 100644
--- a/compiler/nnc/backends/soft_backend/code_snippets/cpp_header_types.def
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_header_types.def
@@ -147,7 +147,7 @@ public:
 
     if (!t._managed) {
       if (_managed)
-        delete _data;
+        delete [] _data;
 
       _managed = false;
       _data = t._data;
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/eigen.def b/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
index b02f84bed..b6547d5a6 100644
--- a/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
+++ b/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
@@ -13381,7 +13381,7 @@ struct palign_impl<Offset,Type>\
         if (Offset!=0)\
             first = Command(first, second, Offset);\
     }\
-};\
+};
 PALIGN_NEON(0,Packet2d,vextq_f64)
 PALIGN_NEON(1,Packet2d,vextq_f64)
 #undef PALIGN_NEON
diff --git a/compiler/nnc/driver/Options.cpp b/compiler/nnc/driver/Options.cpp
index e22d01847..c1997fe6a 100644
--- a/compiler/nnc/driver/Options.cpp
+++ b/compiler/nnc/driver/Options.cpp
@@ -35,7 +35,7 @@ Option<bool> caffeFrontend(optname("--caffe"), overview("treat input file as Caf
 #else
                            showopt(false)
 #endif // NNC_FRONTEND_CAFFE_ENABLED
-                               );
+);
 Option<bool> onnxFrontend(optname("--onnx"), overview("treat input file as ONNX model"), false,
                           optional(true), optvalues(""), nullptr, separators(""),
 #ifdef NNC_FRONTEND_ONNX_ENABLED
@@ -43,7 +43,7 @@ Option<bool> onnxFrontend(optname("--onnx"), overview("treat input file as ONNX
 #else
                           showopt(false)
 #endif // NNC_FRONTEND_ONNX_ENABLED
-                              );
+);
 
 Option<bool> caffe2Frontend(optname("--caffe2"),
                             overview("treat input file as Caffe2 model (predict_net.pb)"), false,
@@ -83,16 +83,16 @@ Option<bool> tflFrontend(optname("--tflite"),
 #else
                          showopt(false)
 #endif // NNC_FRONTEND_TFLITE_ENABLED
-                             );
+);
 Option<std::string>
-    target(optname("--target"),
-           overview("select target language to emit for given architecture."
-                    "Valid values are '" NNC_TARGET_ARM_CPP "', '" NNC_TARGET_X86_CPP
-                    "', '" NNC_TARGET_ARM_GPU_CPP "', '" NNC_TARGET_INTERPRETER "'"),
-           std::string(), optional(false),
-           optvalues(NNC_TARGET_ARM_CPP "," NNC_TARGET_X86_CPP "," NNC_TARGET_ARM_GPU_CPP
-                                        "," NNC_TARGET_INTERPRETER),
-           nullptr, separators("="));
+  target(optname("--target"),
+         overview("select target language to emit for given architecture."
+                  "Valid values are '" NNC_TARGET_ARM_CPP "', '" NNC_TARGET_X86_CPP
+                  "', '" NNC_TARGET_ARM_GPU_CPP "', '" NNC_TARGET_INTERPRETER "'"),
+         std::string(), optional(false),
+         optvalues(NNC_TARGET_ARM_CPP "," NNC_TARGET_X86_CPP "," NNC_TARGET_ARM_GPU_CPP
+                                      "," NNC_TARGET_INTERPRETER),
+         nullptr, separators("="));
 
 /**
  * Options for *frontend*
diff --git a/compiler/nnc/include/Definitions.h.in b/compiler/nnc/include/Definitions.h.in
index 070cdd201..bd8642956 100644
--- a/compiler/nnc/include/Definitions.h.in
+++ b/compiler/nnc/include/Definitions.h.in
@@ -7,12 +7,12 @@
  */
 
 /**
- * @breif absolute path to installation directory of *nnc* project
+ * @brief absolute path to installation directory of *nnc* project
  */
 #define NNC_ROOT_PATH "@NNC_INSTALL_PATH@"
 
 /**
- * @breif absolute path to directory contains libraries
+ * @brief absolute path to directory contains libraries
  */
 #define NNC_LIB_PATH "@NNC_INSTALL_LIB_PATH@"
 
diff --git a/compiler/nnc/include/pass/PassData.h b/compiler/nnc/include/pass/PassData.h
index e2c0b8129..1ff8af927 100644
--- a/compiler/nnc/include/pass/PassData.h
+++ b/compiler/nnc/include/pass/PassData.h
@@ -30,9 +30,8 @@ class PassData
 {
 public:
   /* implicit */ PassData(std::nullptr_t data)
-      : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
-        _dataContainer{.unknown = data},
-        _dataType(PDT::UNKNOWN)
+    : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+      _dataContainer{.unknown = data}, _dataType(PDT::UNKNOWN)
   {
   }
 
@@ -40,9 +39,8 @@ public:
    * @brief Implicit conversion from Graph* to PassData
    */
   /* implicit */ PassData(mir::Graph *graph)
-      : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
-        _dataContainer{.graph = graph},
-        _dataType(PDT::GRAPH)
+    : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+      _dataContainer{.graph = graph}, _dataType(PDT::GRAPH)
   {
   }
 
@@ -60,9 +58,8 @@ public:
    * @brief Implicit conversion from Graph* to PassData
    */
   /* implicit */ PassData(mir::TensorVariant *tv)
-      : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
-        _dataContainer{.tensorVariant = tv},
-        _dataType(PDT::TENSOR_VARIANT)
+    : // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
+      _dataContainer{.tensorVariant = tv}, _dataType(PDT::TENSOR_VARIANT)
   {
   }
 
diff --git a/compiler/nnc/include/passes/optimizations/CombineTransposes.h b/compiler/nnc/include/passes/optimizations/CombineTransposes.h
index 7d227cd5d..a08676e47 100644
--- a/compiler/nnc/include/passes/optimizations/CombineTransposes.h
+++ b/compiler/nnc/include/passes/optimizations/CombineTransposes.h
@@ -33,6 +33,7 @@ public:
   PassData run(PassData data) override;
 
   std::string getName() override { return "opt_combine_transposes"; };
+
 private:
 };
 
diff --git a/compiler/nnc/include/passes/optimizations/OptimizationUtils.h b/compiler/nnc/include/passes/optimizations/OptimizationUtils.h
index 9a9212c12..83f455b2d 100644
--- a/compiler/nnc/include/passes/optimizations/OptimizationUtils.h
+++ b/compiler/nnc/include/passes/optimizations/OptimizationUtils.h
@@ -25,11 +25,11 @@ namespace nnc
 namespace opt_util
 {
 /**
-* @brief Swap adjacent nodes in Graph. Creates new nodes and replaces the old ones with new.
-* @param g MIR Graph
-* @param top Node
-* @param bottom Node
-*/
+ * @brief Swap adjacent nodes in Graph. Creates new nodes and replaces the old ones with new.
+ * @param g MIR Graph
+ * @param top Node
+ * @param bottom Node
+ */
 void swapAdjacent(mir::Graph *g, mir::Operation *top, mir::Operation *bottom);
 
 // TODO: this function and it's usages should be removed, after DCE optimization will be implemented
diff --git a/compiler/nnc/include/support/CommandLine.h b/compiler/nnc/include/support/CommandLine.h
index 40777ff46..66466276d 100644
--- a/compiler/nnc/include/support/CommandLine.h
+++ b/compiler/nnc/include/support/CommandLine.h
@@ -38,7 +38,7 @@ class BadOption : public std::logic_error
 {
 public:
   explicit BadOption(const std::string &msg, std::string optname = "", std::string value = "")
-      : std::logic_error(msg), _option_name(std::move(optname)), _option_value(std::move(value))
+    : std::logic_error(msg), _option_name(std::move(optname)), _option_value(std::move(value))
   {
   }
 
@@ -387,7 +387,7 @@ private:
   std::map<std::string, IOption *> _options_name; // map of name -> option
   std::vector<IOption *> _options;                // options
   std::map<IOption::Group, std::vector<IOption *>>
-      _grouped_options;   // map of groups: group -> vector of options
+    _grouped_options;     // map of groups: group -> vector of options
   std::string _prog_name; // name of program
   int _args_num = 0;      // number of command line arguments
 };
@@ -530,7 +530,7 @@ Option<T>::Option(const std::vector<std::string> &optnames, const std::string &d
   _group = group;
 
   _can_have_several_vals =
-      std::is_same<T, std::vector<std::string>>::value || std::is_same<T, std::vector<int>>::value;
+    std::is_same<T, std::vector<std::string>>::value || std::is_same<T, std::vector<int>>::value;
   assert(!(_can_have_several_vals && !_seps.empty()) &&
          "option with several values can't have separators");
 
diff --git a/compiler/nnc/passes/optimizations/CombineTransposes.cpp b/compiler/nnc/passes/optimizations/CombineTransposes.cpp
index e381a9cae..8a584d2d5 100644
--- a/compiler/nnc/passes/optimizations/CombineTransposes.cpp
+++ b/compiler/nnc/passes/optimizations/CombineTransposes.cpp
@@ -72,12 +72,12 @@ nnc::PassData nnc::CombineTransposes::run(nnc::PassData data)
       };
       auto *bottom_transpose = dynamic_cast<mir::ops::TransposeOp *>(match.second);
       auto combined_axis_order =
-          combineAxisOrders(top_transpose->getAxisOrder(), bottom_transpose->getAxisOrder());
+        combineAxisOrders(top_transpose->getAxisOrder(), bottom_transpose->getAxisOrder());
 
       if (!isIdentityTranspose(combined_axis_order))
       {
         auto new_tr_op =
-            g->create<mir::ops::TransposeOp>(top_transpose->getInput(0), combined_axis_order);
+          g->create<mir::ops::TransposeOp>(top_transpose->getInput(0), combined_axis_order);
 
         g->replaceNode(bottom_transpose, new_tr_op);
       }
diff --git a/compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp b/compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp
index 47a3147a5..ce99cdb2c 100644
--- a/compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp
+++ b/compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp
@@ -62,7 +62,7 @@ PassData ConstantFoldTranspose::run(PassData data)
   auto matches = matcher.matchEdge(is_constant, is_transpose);
   while (!matches.empty())
   {
-    for (const auto match : matches)
+    for (const auto &match : matches)
     {
       auto constant_op = dynamic_cast<ops::ConstantOp *>(match.first);
       auto transpose_op = dynamic_cast<ops::TransposeOp *>(match.second);
diff --git a/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp b/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp
index b89dca1b7..371d9703f 100644
--- a/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp
+++ b/compiler/nnc/passes/optimizations/DeadCodeElimination.cpp
@@ -33,8 +33,8 @@ nnc::PassData nnc::DeadCodeElimination::run(PassData data)
       return;
 
     bool has_no_uses =
-        std::all_of(op->getOutputs().cbegin(), op->getOutputs().cend(),
-                    [](const Operation::Output &output) { return output.getUses().empty(); });
+      std::all_of(op->getOutputs().cbegin(), op->getOutputs().cend(),
+                  [](const Operation::Output &output) { return output.getUses().empty(); });
 
     if (has_no_uses)
     {
diff --git a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
index 91686ef74..d69439fc3 100644
--- a/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
+++ b/compiler/nnc/passes/optimizations/FuseArithmeticOps.cpp
@@ -215,10 +215,10 @@ bool sinkAddThroughMul(Graph *g)
     // Create new operations
     auto old_add_input = old_add_op->getInput(0);
     auto new_mul_op =
-        g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
+      g->copyOpWithInputs(old_mul_op, {old_add_input, ols_mul_const_op->getOutput(0)});
     auto new_add_const_op = mergeConstantOps(g, old_add_const_op, ols_mul_const_op, OpType::mul);
     auto new_add_op =
-        g->copyOpWithInputs(old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
+      g->copyOpWithInputs(old_add_op, {new_mul_op->getOutput(0), new_add_const_op->getOutput(0)});
 
     // Replace old mul with new add and remove old nodes
     g->replaceNode(old_mul_op, new_add_op);
diff --git a/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp b/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp
index 8ff842660..fcdbba878 100644
--- a/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp
+++ b/compiler/nnc/passes/transformations/DataFormatSwitcher.cpp
@@ -27,7 +27,7 @@
 namespace nnc
 {
 DataFormatSwitcher::DataFormatSwitcher(const mir::DataFormat target_format)
-    : _target_format(target_format)
+  : _target_format(target_format)
 {
 }
 
@@ -89,10 +89,10 @@ mir::Operation::Output *DataFormatSwitcher::insertTransposeBefore(mir::Operation
   mir::Operation::Output *new_out;
   if (_target_format == mir::DataFormat::NHWC)
     new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 2, 3, 1})
-                  ->getOutput(0); // NCHW -> NHWC
+                ->getOutput(0); // NCHW -> NHWC
   else
     new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 3, 1, 2})
-                  ->getOutput(0); // NHWC -> NCHW
+                ->getOutput(0); // NHWC -> NCHW
   if (out->getType().isQuantized())
     new_out->setQuantization(out->getType().getQuantization());
   return new_out;
@@ -103,10 +103,10 @@ mir::Operation::Output *DataFormatSwitcher::insertTransposeAfter(mir::Operation:
   mir::Operation::Output *new_out;
   if (_target_format == mir::DataFormat::NHWC)
     new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 3, 1, 2})
-                  ->getOutput(0); // NHWC -> NCHW
+                ->getOutput(0); // NHWC -> NCHW
   else
     new_out = _graph->create<mir::ops::TransposeOp>(out, std::vector<std::size_t>{0, 2, 3, 1})
-                  ->getOutput(0); // NCHW -> NHWC
+                ->getOutput(0); // NCHW -> NHWC
   if (out->getType().isQuantized())
     new_out->setQuantization(out->getType().getQuantization());
   return new_out;
diff --git a/compiler/nnc/passes/transformations/LowerConv2D.cpp b/compiler/nnc/passes/transformations/LowerConv2D.cpp
index 9e32978bc..9ae20527d 100644
--- a/compiler/nnc/passes/transformations/LowerConv2D.cpp
+++ b/compiler/nnc/passes/transformations/LowerConv2D.cpp
@@ -36,11 +36,11 @@ static void lowerConv2D(mir::Graph *graph, mir::ops::Conv2DOp *op)
     // [O, H, W, I / M] == [M, H, W, 1] -> [H, W, M, 1]
     std::vector<std::size_t> perm{1, 2, 0, 3};
     mir::Operation::Output *new_kernel =
-        graph->create<mir::ops::TransposeOp>(kernel, perm)->getOutput(0);
+      graph->create<mir::ops::TransposeOp>(kernel, perm)->getOutput(0);
     mir::Conv2DOpAttributes attributes = op->getAttributes();
     attributes.num_groups = 1;
     mir::Operation::Output *new_result =
-        graph->create<mir::ops::DepthwiseConv2DOp>(input, new_kernel, attributes)->getOutput(0);
+      graph->create<mir::ops::DepthwiseConv2DOp>(input, new_kernel, attributes)->getOutput(0);
     graph->replaceNode(op, new_result->getNode());
   }
 }
diff --git a/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp b/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp
index 4ae020355..d39c9dcb5 100644
--- a/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp
+++ b/compiler/nnc/tests/acl_soft_backend/AclCppOperations.cpp
@@ -157,7 +157,7 @@ static void runAclSystemTest(const string &name)
 
   // Copy the model input HDF5 file to the remote device.
   ASSERT_TRUE(
-      copyToOdroid(binDir + "/" + name + "/in_" + name + "_caffe.hdf5", dir_name + "/in.hdf5"));
+    copyToOdroid(binDir + "/" + name + "/in_" + name + "_caffe.hdf5", dir_name + "/in.hdf5"));
 
   // Switch to the artifact directory on the remote device and run the artifact.
   ASSERT_TRUE(runOnOdroid("cd " + dir_name + "; ./nnc_test"));
diff --git a/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp
index c326b390b..ea4bddac8 100644
--- a/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp
+++ b/compiler/nnc/tests/acl_soft_backend/artifact_cmake/main.cpp
@@ -31,12 +31,13 @@ static unique_ptr<char[]> getTensorData(CLTensor &tensor)
   Iterator i(&tensor, window);
   char *ptr = &buf[0];
 
-  execute_window_loop(window,
-                      [&i, &ptr](const Coordinates &) {
-                        memcpy(ptr, i.ptr(), sizeof(float));
-                        ptr += sizeof(float);
-                      },
-                      i);
+  execute_window_loop(
+    window,
+    [&i, &ptr](const Coordinates &) {
+      memcpy(ptr, i.ptr(), sizeof(float));
+      ptr += sizeof(float);
+    },
+    i);
 
   tensor.unmap();
   return buf;
@@ -52,12 +53,13 @@ static void readTensor(CLTensor &tensor, H5::DataSet &dataset)
   Iterator i(&tensor, window);
   char *ptr = &buf[0];
 
-  execute_window_loop(window,
-                      [&i, &ptr](const Coordinates &) {
-                        memcpy(i.ptr(), ptr, sizeof(float));
-                        ptr += sizeof(float);
-                      },
-                      i);
+  execute_window_loop(
+    window,
+    [&i, &ptr](const Coordinates &) {
+      memcpy(i.ptr(), ptr, sizeof(float));
+      ptr += sizeof(float);
+    },
+    i);
 
   tensor.unmap();
 }
diff --git a/compiler/nnc/tests/soft_backend/CompileCPP.cpp b/compiler/nnc/tests/soft_backend/CompileCPP.cpp
index 63aeb4a1b..4ede0cf05 100644
--- a/compiler/nnc/tests/soft_backend/CompileCPP.cpp
+++ b/compiler/nnc/tests/soft_backend/CompileCPP.cpp
@@ -101,7 +101,7 @@ int main()
   string target_compiler = "g++ -Wall --std=c++11";
 
   string compiler_command =
-      target_compiler + " -I" + output_dir + " " + main_path + " " + code_path;
+    target_compiler + " -I" + output_dir + " " + main_path + " " + code_path;
 
   // call compiler
   int res = system(compiler_command.c_str());
diff --git a/compiler/nnc/tests/soft_backend/test_main.def b/compiler/nnc/tests/soft_backend/test_main.def
index 6a464f862..c508cca26 100644
--- a/compiler/nnc/tests/soft_backend/test_main.def
+++ b/compiler/nnc/tests/soft_backend/test_main.def
@@ -1,8 +1,9 @@
+#include <string>
 int main()
 {
   Shape s{1, 2, 3};
   Tensor in_t(s);
-  NNModel model("nnmodel.params");
+  NNModel model(std::string("nnmodel.params"));
   model.set_in(in_t);
   model.doInference();
   std::shared_ptr<Tensor> out_t = model.get_out();
diff --git a/compiler/nnc/unittests/acl_backend/DOMToText.cpp b/compiler/nnc/unittests/acl_backend/DOMToText.cpp
index be0e6713c..aaf0c2055 100644
--- a/compiler/nnc/unittests/acl_backend/DOMToText.cpp
+++ b/compiler/nnc/unittests/acl_backend/DOMToText.cpp
@@ -148,9 +148,9 @@ TEST(acl_backend_dom_to_text, ArtifactUnaryExpr)
   const char *var_name = "id";
   shared_ptr<ArtifactId> var = AF::id(var_name);
   pair<ArtifactUnOp, const char *> test_cases[] = {
-      {ArtifactUnOp::preIncr, "++id"},   {ArtifactUnOp::preDecr, "--id"},
-      {ArtifactUnOp::heapNew, "new id"}, {ArtifactUnOp::heapFree, "delete id"},
-      {ArtifactUnOp::postIncr, "id++"},  {ArtifactUnOp::postDecr, "id--"}};
+    {ArtifactUnOp::preIncr, "++id"},   {ArtifactUnOp::preDecr, "--id"},
+    {ArtifactUnOp::heapNew, "new id"}, {ArtifactUnOp::heapFree, "delete id"},
+    {ArtifactUnOp::postIncr, "id++"},  {ArtifactUnOp::postDecr, "id--"}};
 
   for (auto test : test_cases)
   {
@@ -181,14 +181,14 @@ TEST(acl_backend_dom_to_text, ArtifactBinaryExpr)
   shared_ptr<ArtifactId> op2 = AF::id(op2_name);
 
   pair<ArtifactBinOp, const char *> test_cases[] = {
-      {ArtifactBinOp::eq, "a == b"},          {ArtifactBinOp::notEq, "a != b"},
-      {ArtifactBinOp::less, "a < b"},         {ArtifactBinOp::lessOrEq, "a <= b"},
-      {ArtifactBinOp::great, "a > b"},        {ArtifactBinOp::greatOrEq, "a >= b"},
-      {ArtifactBinOp::assign, "a = b"},       {ArtifactBinOp::plus, "a + b"},
-      {ArtifactBinOp::minus, "a - b"},        {ArtifactBinOp::mult, "a * b"},
-      {ArtifactBinOp::div, "a / b"},          {ArtifactBinOp::plusAssign, "a += b"},
-      {ArtifactBinOp::minusAssign, "a -= b"}, {ArtifactBinOp::multAssign, "a *= b"},
-      {ArtifactBinOp::divAssign, "a /= b"}};
+    {ArtifactBinOp::eq, "a == b"},          {ArtifactBinOp::notEq, "a != b"},
+    {ArtifactBinOp::less, "a < b"},         {ArtifactBinOp::lessOrEq, "a <= b"},
+    {ArtifactBinOp::great, "a > b"},        {ArtifactBinOp::greatOrEq, "a >= b"},
+    {ArtifactBinOp::assign, "a = b"},       {ArtifactBinOp::plus, "a + b"},
+    {ArtifactBinOp::minus, "a - b"},        {ArtifactBinOp::mult, "a * b"},
+    {ArtifactBinOp::div, "a / b"},          {ArtifactBinOp::plusAssign, "a += b"},
+    {ArtifactBinOp::minusAssign, "a -= b"}, {ArtifactBinOp::multAssign, "a *= b"},
+    {ArtifactBinOp::divAssign, "a /= b"}};
 
   for (auto test : test_cases)
   {
@@ -286,12 +286,12 @@ TEST(acl_backend_dom_to_text, ArtifactForLoop)
 
   shared_ptr<ArtifactVariable> iter = AF::var(var_type, var_name, {}, {AF::lit("0")});
   shared_ptr<ArtifactExpr> step =
-      AF::bin(ArtifactBinOp::plusAssign, AF::id(var_name), AF::lit("1"));
+    AF::bin(ArtifactBinOp::plusAssign, AF::id(var_name), AF::lit("1"));
   shared_ptr<ArtifactExpr> cond =
-      AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
+    AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
 
   shared_ptr<ArtifactBinaryExpr> expr =
-      AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
+    AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
 
   ArtifactForLoop loop(iter, cond, step);
 
@@ -308,10 +308,10 @@ TEST(acl_backend_dom_to_text, ArtifactIf)
   const char *var_name = "i";
 
   shared_ptr<ArtifactExpr> cond =
-      AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
+    AF::bin(ArtifactBinOp::lessOrEq, AF::id(var_name), AF::lit("123"));
 
   shared_ptr<ArtifactBinaryExpr> expr =
-      AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
+    AF::bin(ArtifactBinOp::plusAssign, AF::id("hello"), AF::id("world"));
 
   ArtifactIf if_stmt(cond);
 
@@ -415,7 +415,7 @@ static shared_ptr<ArtifactClassVariable> createClsVariable(ArtifactClass &cls, c
   list<shared_ptr<ArtifactExpr>> dims{dim1, dim2};
   list<shared_ptr<ArtifactExpr>> initializers{AF::lit("123")};
   shared_ptr<ArtifactClassVariable> var_decl =
-      cls.var(is_public, var_type, var_name, dims, initializers);
+    cls.var(is_public, var_type, var_name, dims, initializers);
   return var_decl;
 }
 
@@ -483,8 +483,8 @@ TEST(acl_backend_dom_to_text, ArtifactModule)
   const char *code_prefix = "#include \"module.h\"\n\n#include <list>\n\n#include \"bar.h\"\n\n";
   const char *code_suffix = "\nClass::Class() {\n}\n\n";
 
-  string ref_data = string(code_prefix) +
-                    string(AclArtifactUtilities, sizeof(AclArtifactUtilities)) + code_suffix;
+  string ref_data =
+    string(code_prefix) + string(AclArtifactUtilities, sizeof(AclArtifactUtilities)) + code_suffix;
   m.accept(&code_gen);
   ASSERT_EQ(code_out.str(), ref_data);
 
diff --git a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
index a9b36a145..f411fde42 100644
--- a/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
+++ b/compiler/nnc/unittests/acl_backend/MIRToDOM.cpp
@@ -117,12 +117,12 @@ void checkDomIncludes(const ArtifactModule &m)
 
   // check ordinary includes, like '#include  "artifact_data.h"'
   checkHeadersSetsEqual(
-      m.headerIncludes(),
-      {"arm_compute/core/Types.h", "arm_compute/runtime/BlobLifetimeManager.h",
-       "arm_compute/runtime/CL/CLBufferAllocator.h", "arm_compute/runtime/CL/CLFunctions.h",
-       "arm_compute/runtime/CL/CLScheduler.h", "arm_compute/runtime/MemoryManagerOnDemand.h",
-       "arm_compute/runtime/PoolManager.h"},
-      "system header includes diverged");
+    m.headerIncludes(),
+    {"arm_compute/core/Types.h", "arm_compute/runtime/BlobLifetimeManager.h",
+     "arm_compute/runtime/CL/CLBufferAllocator.h", "arm_compute/runtime/CL/CLFunctions.h",
+     "arm_compute/runtime/CL/CLScheduler.h", "arm_compute/runtime/MemoryManagerOnDemand.h",
+     "arm_compute/runtime/PoolManager.h"},
+    "system header includes diverged");
 
   checkHeadersSetsEqual(m.sourceSysIncludes(), {}, "system source includes diverged");
 }
@@ -287,10 +287,10 @@ TEST(acl_backend_mir_to_dom, conv2d)
 
   Graph g;
   OpConstructor op_generator =
-      [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
-        auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
-        return g.create<mir::ops::Conv2DOp>(inputs[0], kernel, mir::Conv2DOpAttributes());
-      };
+    [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+      auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
+      return g.create<mir::ops::Conv2DOp>(inputs[0], kernel, mir::Conv2DOpAttributes());
+    };
 
   vector<Shape> input_shapes{{1, 10, 10, channels}};
 
@@ -312,11 +312,11 @@ TEST(acl_backend_mir_to_dom, depthwise_conv)
 
   Graph g;
   OpConstructor op_generator =
-      [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
-        Conv2DOpAttributes attributes;
-        auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
-        return g.create<mir::ops::DepthwiseConv2DOp>(inputs[0], kernel, attributes);
-      };
+    [kernel_tensor](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+      Conv2DOpAttributes attributes;
+      auto kernel = g.create<mir::ops::ConstantOp>(kernel_tensor)->getOutput(0);
+      return g.create<mir::ops::DepthwiseConv2DOp>(inputs[0], kernel, attributes);
+    };
 
   vector<Shape> input_shapes{{1, 10, 10, channels}};
 
diff --git a/compiler/nnc/unittests/optimizations/SinkTest.cpp b/compiler/nnc/unittests/optimizations/SinkTest.cpp
index 8c5b2767e..be171d1cb 100644
--- a/compiler/nnc/unittests/optimizations/SinkTest.cpp
+++ b/compiler/nnc/unittests/optimizations/SinkTest.cpp
@@ -103,7 +103,7 @@ TEST(OptPass, sinkTrConcat)
   Operation *tr1 = g.create<ops::TransposeOp>(in1->getOutput(0), vector<size_t>{0, 3, 1, 2});
   Operation *tr2 = g.create<ops::TransposeOp>(in2->getOutput(0), vector<size_t>{0, 3, 1, 2});
   Operation *conc =
-      g.create<ops::ConcatOp>(vector<Operation::Output *>{tr1->getOutput(0), tr2->getOutput(0)}, 1);
+    g.create<ops::ConcatOp>(vector<Operation::Output *>{tr1->getOutput(0), tr2->getOutput(0)}, 1);
   Operation *tanh = g.create<ops::TanhOp>(conc->getOutput(0));
   Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
   (void)out;
@@ -141,7 +141,7 @@ TEST(OptPass, sinkReluConcat)
   Operation *relu1 = g.create<ops::ReluOp>(in1->getOutput(0));
   Operation *relu2 = g.create<ops::ReluOp>(in2->getOutput(0));
   Operation *conc = g.create<ops::ConcatOp>(
-      vector<Operation::Output *>{relu1->getOutput(0), relu2->getOutput(0)}, 1);
+    vector<Operation::Output *>{relu1->getOutput(0), relu2->getOutput(0)}, 1);
   Operation *tanh = g.create<ops::TanhOp>(conc->getOutput(0));
   Operation *out = g.create<ops::OutputOp>(tanh->getOutput(0));
   (void)out;
diff --git a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
index 508ee954d..e593333fa 100644
--- a/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
+++ b/compiler/nnc/unittests/soft_backend/CPPOperations.cpp
@@ -120,11 +120,10 @@ namespace
  * @brief Creates graph with one operation generated by opGen function and returns this operation
  * node
  */
-mir::Operation *
-fillGraph(mir::Graph &g,
-          const function<mir::Operation *(mir::Graph &g, vector<mir::Operation::Output *> &inputs)>
-              &op_gen,
-          const vector<unique_ptr<mir::TensorVariant>> &input_ntensors)
+mir::Operation *fillGraph(
+  mir::Graph &g,
+  const function<mir::Operation *(mir::Graph &g, vector<mir::Operation::Output *> &inputs)> &op_gen,
+  const vector<unique_ptr<mir::TensorVariant>> &input_ntensors)
 {
   // Create operation inputs.
   vector<mir::Operation::Output *> inputs;
@@ -295,8 +294,8 @@ void compareResults(const mir::TensorVariant &ref_nnc_tensor, const Tensor &test
     float ref_data = mir::Tensor<float>(ref_nnc_tensor).at(nnc_idx);
     float test_data = test_art_tensor.at(artifact_idx);
     ASSERT_TRUE(areFloatsNear(ref_data, test_data, 32, 1e-5))
-        << "Tensor element " << nnc_idx << " diverged, reference: " << ref_data
-        << " test result: " << test_data;
+      << "Tensor element " << nnc_idx << " diverged, reference: " << ref_data
+      << " test result: " << test_data;
   }
 }
 
@@ -306,10 +305,10 @@ void compareResults(const mir::TensorVariant &ref_nnc_tensor, const Tensor &test
  */
 template <typename TestFunc, typename... Args>
 void createAndRunTestGraph(
-    function<mir::Operation *(mir::Graph &, const std::vector<mir::Operation::Output *> &inputs)>
-        op_generator,
-    TestFunc artifactOperation, const vector<unique_ptr<mir::TensorVariant>> &input_ntensors,
-    Args &... input_atensors)
+  function<mir::Operation *(mir::Graph &, const std::vector<mir::Operation::Output *> &inputs)>
+    op_generator,
+  TestFunc artifactOperation, const vector<unique_ptr<mir::TensorVariant>> &input_ntensors,
+  Args &... input_atensors)
 {
   mir::Graph g;
   mir::Operation *actual_operation = fillGraph(g, op_generator, input_ntensors);
@@ -657,7 +656,7 @@ TEST(cpp_operations_test, resize_NN_test)
     auto op_generator = [&res_shape](mir::Graph &g,
                                      const std::vector<mir::Operation::Output *> &inputs) {
       return g.create<mir::ops::ResizeOp>(
-          inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
+        inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, res_shape);
     };
 
     createAndRunTestGraph(op_generator, resize, input_ntensors, input_atensor);
@@ -668,7 +667,7 @@ TEST(cpp_operations_test, resize_NN_test_scales)
 {
   cout << "\n";
   std::vector<float> test_scales[] = {
-      {1, 2, 2, 1}, {1, 2, 3, 1}, {1, 3, 2, 1}, {1, 2.5, 2, 1}, {1, 3, 9, 1}};
+    {1, 2, 2, 1}, {1, 2, 3, 1}, {1, 3, 2, 1}, {1, 2.5, 2, 1}, {1, 3, 9, 1}};
   for (const std::vector<float> &scales : test_scales)
   {
     vector<int> input_shape_data{1, 4, 4, 1};
@@ -678,7 +677,7 @@ TEST(cpp_operations_test, resize_NN_test_scales)
     auto op_generator = [&scales](mir::Graph &g,
                                   const std::vector<mir::Operation::Output *> &inputs) {
       return g.create<mir::ops::ResizeOp>(
-          inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales);
+        inputs[0], mir::ops::ResizeOp::ResizeMethod::nearestNeighbor, scales);
     };
     createAndRunTestGraph(op_generator, resize, input_ntensors, input_atensor);
   }
@@ -711,10 +710,10 @@ TEST(cpp_operations_test, avgpool)
             for (const auto include_pad : {false, true})
             {
               attributes.include_pad = include_pad;
-              auto op_generator = [&attributes](
-                  mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
-                return g.create<mir::ops::AvgPool2DOp>(inputs[0], attributes);
-              };
+              auto op_generator =
+                [&attributes](mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+                  return g.create<mir::ops::AvgPool2DOp>(inputs[0], attributes);
+                };
 
               createAndRunTestGraph(op_generator, avgPool, input_ntensors, input_atensor);
             }
@@ -742,8 +741,9 @@ TEST(cpp_operations_test, maxpool)
             vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
             fillTensors(input_ntensors[0], input_atensor, shape_data, 1.0f);
 
-            auto op_generator = [&window_size, &strides](
-                mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+            auto op_generator = [&window_size,
+                                 &strides](mir::Graph &g,
+                                           const std::vector<mir::Operation::Output *> &inputs) {
               mir::MaxPool2DOpAttributes attributes;
               attributes.window = window_size;
               attributes.strides = strides;
@@ -838,7 +838,7 @@ TEST(cpp_operations_test, reduceMeanTst)
       vector<unique_ptr<mir::TensorVariant>> input_ntensors(1);
       fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
       auto op_generator = [&axis_list, keep_dims](
-          mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
+                            mir::Graph &g, const std::vector<mir::Operation::Output *> &inputs) {
         auto op = g.create<mir::ops::ReduceMeanOp>(inputs[0], axis_list, keep_dims);
         return op;
       };
@@ -873,7 +873,8 @@ TEST(cpp_operations_test, slice4d)
   vector<int> shape_data{5, 30, 40, 12};
   vector<int> starts[] = {{0, 0, 0, 0}, {1, 1, 1, 1}, {1, 0, 1, 0}, {0, 1, 1, 0}};
   vector<int> sizes[] = {
-      {-1, -1, -1, -1}, {4, -1, 10, -1},
+    {-1, -1, -1, -1},
+    {4, -1, 10, -1},
   };
   for (auto st : starts)
   {
diff --git a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
index d38385e91..c2135c4be 100644
--- a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
+++ b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
@@ -22,6 +22,8 @@
 
 #include <gtest/gtest.h>
 
+#include <algorithm>
+
 using namespace std;
 using namespace nnc;
 using namespace mir;
diff --git a/compiler/nnc/unittests/support/CommandLineTest.cpp b/compiler/nnc/unittests/support/CommandLineTest.cpp
index 73f77aa20..993c4086f 100644
--- a/compiler/nnc/unittests/support/CommandLineTest.cpp
+++ b/compiler/nnc/unittests/support/CommandLineTest.cpp
@@ -69,8 +69,8 @@ Option<int32_t> NNegOpt(optname("-neg_val"),
 
 // test option with default negative value
 Option<int32_t>
-    NDefaultNegOpt(optname("-default_neg_val"),
-                   overview("description of integer option with default negative value"), -33);
+  NDefaultNegOpt(optname("-default_neg_val"),
+                 overview("description of integer option with default negative value"), -33);
 // test option with positive values
 Option<uint32_t> NPosOpt(optname("-pos_val"),
                          overview("description of integer option with positive value"), 1,
@@ -124,28 +124,28 @@ TEST(SUPPORT_NNC, verify_cl_options)
 {
   // create command line
   const char *argv[] = {
-      "CLTest", // program name
-      // string options
-      "-m", "multiopt_value",                        // second name for option with several names
-      "--single", "single_value",                    // option with single name
-      "-several_separators:SOME_VALUE1,SOME_VALUE2", // test option with several separators
-      "--one_separarot=AAA_VALUE",                   // test option whit one separator
-      "-default_val_opt",                            // test option with default value
-      "--optional_opt", "/home/guest/tmp",           // test optional option
-      "-valid_opt", "value2",                        // test options with defined values
-      // integer options
-      "-neg_val", "-42",  // test negative value for integer option
-      "-default_neg_val", // test integer option with default value
-      "-pos_val", "33",   // test positive value for integer option
-      // char options
-      "-char-opt", "b", "-dash_opt", "-",
-      // bool options
-      "-bool_opt=false", "-bool-opt2",
-      // vector of strings options
-      "-vec_opt1", "1", "c", "222", "ABC", "857", "-vec_opt2", "--vec_opt_with_vals", "abc", "123",
-      "xxx", "abc", "xxx",
-      // grouped options
-      "-group_opt1", "-group_opt2", "abc", "-group_opt3", "11", nullptr};
+    "CLTest", // program name
+    // string options
+    "-m", "multiopt_value",                        // second name for option with several names
+    "--single", "single_value",                    // option with single name
+    "-several_separators:SOME_VALUE1,SOME_VALUE2", // test option with several separators
+    "--one_separarot=AAA_VALUE",                   // test option whit one separator
+    "-default_val_opt",                            // test option with default value
+    "--optional_opt", "/home/guest/tmp",           // test optional option
+    "-valid_opt", "value2",                        // test options with defined values
+    // integer options
+    "-neg_val", "-42",  // test negative value for integer option
+    "-default_neg_val", // test integer option with default value
+    "-pos_val", "33",   // test positive value for integer option
+    // char options
+    "-char-opt", "b", "-dash_opt", "-",
+    // bool options
+    "-bool_opt=false", "-bool-opt2",
+    // vector of strings options
+    "-vec_opt1", "1", "c", "222", "ABC", "857", "-vec_opt2", "--vec_opt_with_vals", "abc", "123",
+    "xxx", "abc", "xxx",
+    // grouped options
+    "-group_opt1", "-group_opt2", "abc", "-group_opt3", "11", nullptr};
   int argc = (sizeof(argv) / sizeof(argv[0])) - 1;
 
   // It must be failed if option is not passed and other options are in the same group
diff --git a/compiler/nnc/unittests/transformations/Switcher.cpp b/compiler/nnc/unittests/transformations/Switcher.cpp
index 049ac44cd..2f4793369 100644
--- a/compiler/nnc/unittests/transformations/Switcher.cpp
+++ b/compiler/nnc/unittests/transformations/Switcher.cpp
@@ -88,7 +88,7 @@ TEST(TRANSFORMATIONS, Switcher_DWConv2D_NHWC2NCHW)
   attributes.padding_before = {67, 123};
   attributes.padding_after = {32, 356};
   auto *dw_conv =
-      g.create<mir::ops::DepthwiseConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+    g.create<mir::ops::DepthwiseConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
 
   auto *output = g.create<mir::ops::OutputOp>(dw_conv->getOutput(0));
 
@@ -138,7 +138,7 @@ TEST(TRANSFORMATIONS, Switcher_DeConv2D_NHWC2NCHW)
   attributes.padding_before = {31, 72};
   attributes.padding_after = {32, 71};
   auto *deconv =
-      g.create<mir::ops::DeConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
+    g.create<mir::ops::DeConv2DOp>(input->getOutput(0), kernel->getOutput(0), attributes);
 
   auto *output = g.create<mir::ops::OutputOp>(deconv->getOutput(0));
 
diff --git a/compiler/nnkit-caffe/backend/CMakeLists.txt b/compiler/nnkit-caffe/backend/CMakeLists.txt
index b18aa4f11..567d95438 100644
--- a/compiler/nnkit-caffe/backend/CMakeLists.txt
+++ b/compiler/nnkit-caffe/backend/CMakeLists.txt
@@ -1,3 +1,2 @@
 add_library(nnkit_caffe_backend SHARED Module.cpp)
 target_link_libraries(nnkit_caffe_backend nnkit_support_caffe)
-target_link_libraries(nnkit_caffe_backend stdex)
diff --git a/compiler/nnkit-caffe/backend/Module.cpp b/compiler/nnkit-caffe/backend/Module.cpp
index cb24a4e60..0bd39125f 100644
--- a/compiler/nnkit-caffe/backend/Module.cpp
+++ b/compiler/nnkit-caffe/backend/Module.cpp
@@ -17,11 +17,12 @@
 #include "nnkit/support/caffe/Backend.h"
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
 {
-  using stdex::make_unique;
+  using std::make_unique;
 
   auto net = make_unique<::caffe::Net<float>>(args.at(0), caffe::TEST);
 
diff --git a/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h b/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h
index 07d8d154c..87056dd64 100644
--- a/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h
+++ b/compiler/nnkit-intf/tensor/include/nnkit/TensorContext.h
@@ -37,8 +37,8 @@ struct TensorContext
                                          const nncc::core::ADT::tensor::Reader<T> &)>;
 
   template <typename T>
-  using TypedAccessor = std::function<void(const TensorContext &, uint32_t n,
-                                           nncc::core::ADT::tensor::Accessor<T> &)>;
+  using TypedAccessor =
+    std::function<void(const TensorContext &, uint32_t n, nncc::core::ADT::tensor::Accessor<T> &)>;
 
   virtual ~TensorContext() = default;
 
diff --git a/compiler/nnkit-misc/backend/CMakeLists.txt b/compiler/nnkit-misc/backend/CMakeLists.txt
index d351d5ce5..327fbab3c 100644
--- a/compiler/nnkit-misc/backend/CMakeLists.txt
+++ b/compiler/nnkit-misc/backend/CMakeLists.txt
@@ -4,7 +4,6 @@ add_library(nnkit_support_backend STATIC ${SOURCES})
 target_include_directories(nnkit_support_backend PUBLIC include)
 target_link_libraries(nnkit_support_backend PUBLIC nnkit_intf_backend)
 target_link_libraries(nnkit_support_backend PUBLIC dl)
-target_link_libraries(nnkit_support_backend PUBLIC stdex)
 
 find_package(Threads QUIET)
 
diff --git a/compiler/nnkit-misc/backend/src/BackendPlugin.cpp b/compiler/nnkit-misc/backend/src/BackendPlugin.cpp
index 54b1fdc83..75e0763c4 100644
--- a/compiler/nnkit-misc/backend/src/BackendPlugin.cpp
+++ b/compiler/nnkit-misc/backend/src/BackendPlugin.cpp
@@ -17,7 +17,7 @@
 #include "nnkit/BackendPlugin.h"
 
 #include <cassert>
-#include <stdex/Memory.h>
+#include <memory>
 #include <iostream>
 
 // NOTE dlfcn.h is not a standard library
@@ -82,7 +82,7 @@ std::unique_ptr<BackendPlugin> make_backend_plugin(const std::string &path)
     exit(1);
   }
 
-  return stdex::make_unique<BackendPlugin>(handle, entry);
+  return std::make_unique<BackendPlugin>(handle, entry);
 }
 
 } // namespace nnkit
diff --git a/compiler/nnkit-mocotf/backend/Backend.cpp b/compiler/nnkit-mocotf/backend/Backend.cpp
index 4900684eb..598370635 100644
--- a/compiler/nnkit-mocotf/backend/Backend.cpp
+++ b/compiler/nnkit-mocotf/backend/Backend.cpp
@@ -17,13 +17,13 @@
 #include "nnkit/support/moco/tf/Backend.h"
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <cassert>
 
 extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
 {
-  using stdex::make_unique;
+  using std::make_unique;
 
   assert(args.size() == 2); // args.at[0] : *.pb path, args.at[1]: *.info path
 
diff --git a/compiler/nnkit-mocotf/backend/CMakeLists.txt b/compiler/nnkit-mocotf/backend/CMakeLists.txt
index 72e16c75a..3dcd7e564 100644
--- a/compiler/nnkit-mocotf/backend/CMakeLists.txt
+++ b/compiler/nnkit-mocotf/backend/CMakeLists.txt
@@ -1,3 +1,2 @@
 add_library(nnkit_moco_tf_backend SHARED Backend.cpp)
 target_link_libraries(nnkit_moco_tf_backend nnkit_support_moco_tf)
-target_link_libraries(nnkit_moco_tf_backend stdex)
diff --git a/compiler/nnkit-mocotf/requires.cmake b/compiler/nnkit-mocotf/requires.cmake
index 6949ec808..1461e8443 100644
--- a/compiler/nnkit-mocotf/requires.cmake
+++ b/compiler/nnkit-mocotf/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
 # To use "nnkit_support_tftestinfo"
 require("tfinfo")
 require("loco")
diff --git a/compiler/nnkit-mocotf/support/CMakeLists.txt b/compiler/nnkit-mocotf/support/CMakeLists.txt
index 76c7c04b1..1b20d946b 100644
--- a/compiler/nnkit-mocotf/support/CMakeLists.txt
+++ b/compiler/nnkit-mocotf/support/CMakeLists.txt
@@ -10,4 +10,3 @@ target_link_libraries(nnkit_support_moco_tf nnkit_support_tftestinfo)
 target_link_libraries(nnkit_support_moco_tf locomotiv)
 target_link_libraries(nnkit_support_moco_tf moco_tf_frontend)
 target_link_libraries(nnkit_support_moco_tf loco)
-target_link_libraries(nnkit_support_moco_tf stdex)
diff --git a/compiler/nnkit-mocotf/support/src/Backend.cpp b/compiler/nnkit-mocotf/support/src/Backend.cpp
index 2d9e21fd7..89dd73271 100644
--- a/compiler/nnkit-mocotf/support/src/Backend.cpp
+++ b/compiler/nnkit-mocotf/support/src/Backend.cpp
@@ -25,11 +25,11 @@
 
 #include <moco/tf/Frontend.h>
 #include <moco/Names.h>
-#include <stdex/Memory.h>
 
 #include <nncc/core/ADT/tensor/Buffer.h>
 #include <nncc/core/ADT/tensor/LexicalLayout.h>
 
+#include <memory>
 #include <utility> // std::move
 #include <stdexcept>
 
@@ -116,7 +116,7 @@ Backend::Backend(const char *pb_path, const char *info_path)
 
   // set member vars
   _loco_graph = std::move(loco_graph);
-  _sess = stdex::make_unique<locomotiv::Session>(_loco_graph.get());
+  _sess = std::make_unique<locomotiv::Session>(_loco_graph.get());
 }
 
 void Backend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
@@ -131,7 +131,7 @@ void Backend::prepare(const std::function<void(nnkit::TensorContext &)> &f)
   for (int n = 0; n < _inputs.size(); n++)
   {
     auto buf = make_buffer<float, LexicalLayout>(_inputs.at(n)->shape());
-    buf_list.emplace_back(stdex::make_unique<nncc::core::ADT::tensor::Buffer<float>>(buf));
+    buf_list.emplace_back(std::make_unique<nncc::core::ADT::tensor::Buffer<float>>(buf));
   }
 
   // fill test input values
diff --git a/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp b/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp
index 98f500730..25ddc0982 100644
--- a/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp
+++ b/compiler/nnkit-mocotf/support/src/InputTensorContext.cpp
@@ -37,7 +37,7 @@ void InputTensorContext::getMutableFloatTensor(uint32_t n,
 }
 
 void InputTensorContext::getConstFloatTensor(
-    uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
+  uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
 {
   auto buf = _buffers.at(n).get();
   f(*this, n, *buf);
diff --git a/compiler/nnkit-mocotf/support/src/InputTensorContext.h b/compiler/nnkit-mocotf/support/src/InputTensorContext.h
index bbb25adea..4100d229a 100644
--- a/compiler/nnkit-mocotf/support/src/InputTensorContext.h
+++ b/compiler/nnkit-mocotf/support/src/InputTensorContext.h
@@ -45,7 +45,7 @@ class InputTensorContext final : public TensorContext
 
 public:
   InputTensorContext(const ParsedTensors &parsed_tensors, const Buffers &buffers)
-      : TensorContext(parsed_tensors), _buffers(buffers)
+    : TensorContext(parsed_tensors), _buffers(buffers)
   { /* empty */
   }
 
diff --git a/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp b/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp
index 2b36fc67a..6ef1e4598 100644
--- a/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp
+++ b/compiler/nnkit-mocotf/support/src/OutputTensorContext.cpp
@@ -30,7 +30,7 @@ namespace tf
 {
 
 void OutputTensorContext::getConstFloatTensor(
-    uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
+  uint32_t n, const nnkit::TensorContext::TypedReader<float> &f) const
 { // for output
   using nncc::core::ADT::tensor::LexicalLayout;
   using nncc::core::ADT::tensor::make_overlay;
diff --git a/compiler/nnkit-mocotf/support/src/OutputTensorContext.h b/compiler/nnkit-mocotf/support/src/OutputTensorContext.h
index 8cb8d8bf0..f825729e9 100644
--- a/compiler/nnkit-mocotf/support/src/OutputTensorContext.h
+++ b/compiler/nnkit-mocotf/support/src/OutputTensorContext.h
@@ -43,7 +43,7 @@ class OutputTensorContext final : public TensorContext
 {
 public:
   OutputTensorContext(const ParsedTensors &parsed_tensors, locomotiv::Session *sess)
-      : TensorContext(parsed_tensors), _sess(sess)
+    : TensorContext(parsed_tensors), _sess(sess)
   { /* empty */
   }
 
diff --git a/compiler/nnkit-onnxrt/backend/Backend.cpp b/compiler/nnkit-onnxrt/backend/Backend.cpp
index 9247fbf34..a6c62b7b3 100644
--- a/compiler/nnkit-onnxrt/backend/Backend.cpp
+++ b/compiler/nnkit-onnxrt/backend/Backend.cpp
@@ -17,13 +17,13 @@
 #include "nnkit/support/onnx/Backend.h"
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <cassert>
 
 extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
 {
   assert(args.size() == 1); // args.at[0] : onnx file
 
-  return stdex::make_unique<::nnkit::support::onnx::Backend>(args.at(0));
+  return std::make_unique<::nnkit::support::onnx::Backend>(args.at(0));
 }
diff --git a/compiler/nnkit-onnxrt/backend/CMakeLists.txt b/compiler/nnkit-onnxrt/backend/CMakeLists.txt
index b00e5593d..ae462de8d 100644
--- a/compiler/nnkit-onnxrt/backend/CMakeLists.txt
+++ b/compiler/nnkit-onnxrt/backend/CMakeLists.txt
@@ -1,3 +1,2 @@
 add_library(nnkit_onnx_backend SHARED Backend.cpp)
 target_link_libraries(nnkit_onnx_backend nnkit_support_onnx)
-target_link_libraries(nnkit_onnx_backend stdex)
diff --git a/compiler/nnkit-onnxrt/requires.cmake b/compiler/nnkit-onnxrt/requires.cmake
index d370fc17c..be53ae74f 100644
--- a/compiler/nnkit-onnxrt/requires.cmake
+++ b/compiler/nnkit-onnxrt/requires.cmake
@@ -1,2 +1 @@
-require("stdex")
 require("nnkit-intf")
diff --git a/compiler/nnkit-onnxrt/support/CMakeLists.txt b/compiler/nnkit-onnxrt/support/CMakeLists.txt
index 1b51d4ed8..3d3bb2671 100644
--- a/compiler/nnkit-onnxrt/support/CMakeLists.txt
+++ b/compiler/nnkit-onnxrt/support/CMakeLists.txt
@@ -5,6 +5,5 @@ set_target_properties(nnkit_support_onnx-1.4 PROPERTIES POSITION_INDEPENDENT_COD
 target_include_directories(nnkit_support_onnx-1.4 PUBLIC include)
 target_link_libraries(nnkit_support_onnx-1.4 nnkit_intf_backend)
 target_link_libraries(nnkit_support_onnx-1.4 onnxruntime)
-target_link_libraries(nnkit_support_onnx-1.4 stdex)
 
 add_library(nnkit_support_onnx ALIAS nnkit_support_onnx-1.4)
diff --git a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h
index b38fc9bb0..26753fed7 100644
--- a/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h
+++ b/compiler/nnkit-onnxrt/support/include/nnkit/support/onnx/TensorSet.h
@@ -37,7 +37,7 @@ class TensorSet final
 {
 public:
   TensorSet(Allocator *allocator, size_t nums)
-      : _allocator(allocator), _names(nums), _types(nums), _dims(nums), _tensors(nums, nullptr)
+    : _allocator(allocator), _names(nums), _types(nums), _dims(nums), _tensors(nums, nullptr)
   {
     // DO NOTHING
   }
@@ -60,7 +60,7 @@ public:
     Status status;
 
     status =
-        OrtCreateTensorAsOrtValue(_allocator, dims.data(), dims.size(), type, &_tensors[index]);
+      OrtCreateTensorAsOrtValue(_allocator, dims.data(), dims.size(), type, &_tensors[index]);
     status.throwOnError();
 
     assert(OrtIsTensor(_tensors[index]));
diff --git a/compiler/nnkit-onnxrt/support/src/Runner.cpp b/compiler/nnkit-onnxrt/support/src/Runner.cpp
index bc6a81a5c..8159ed7c2 100644
--- a/compiler/nnkit-onnxrt/support/src/Runner.cpp
+++ b/compiler/nnkit-onnxrt/support/src/Runner.cpp
@@ -17,7 +17,7 @@
 #include "nnkit/support/onnx/Runner.h"
 #include "nnkit/support/onnx/Status.h"
 
-#include <stdex/Memory.h>
+#include <memory>
 #include <cassert>
 
 namespace nnkit
@@ -27,7 +27,7 @@ namespace support
 namespace onnx
 {
 
-Runner::Runner(const std::string &path) : _allocator(stdex::make_unique<Allocator>())
+Runner::Runner(const std::string &path) : _allocator(std::make_unique<Allocator>())
 {
   Status status;
 
@@ -61,7 +61,7 @@ void Runner::prepareInputs(void)
   status = OrtSessionGetInputCount(_session, &num_input_nodes);
   status.throwOnError();
 
-  _inputs = stdex::make_unique<TensorSet>(_allocator.get(), num_input_nodes);
+  _inputs = std::make_unique<TensorSet>(_allocator.get(), num_input_nodes);
 
   for (size_t i = 0; i < num_input_nodes; ++i)
   {
@@ -113,7 +113,7 @@ void Runner::prepareOutputs(void)
   status = OrtSessionGetOutputCount(_session, &num_output_nodes);
   status.throwOnError();
 
-  _outputs = stdex::make_unique<TensorSet>(_allocator.get(), num_output_nodes);
+  _outputs = std::make_unique<TensorSet>(_allocator.get(), num_output_nodes);
 
   for (size_t i = 0; i < num_output_nodes; ++i)
   {
diff --git a/compiler/nnkit-tf/CMakeLists.txt b/compiler/nnkit-tf/CMakeLists.txt
index ea6131fc2..ef2d42183 100644
--- a/compiler/nnkit-tf/CMakeLists.txt
+++ b/compiler/nnkit-tf/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(TensorFlow QUIET)
+nnas_find_package(TensorFlow EXACT 1.13 QUIET)
 
 if(NOT TensorFlow_FOUND)
   return()
diff --git a/compiler/nnkit-tf/backend/Backend.cpp b/compiler/nnkit-tf/backend/Backend.cpp
index ee0476469..99c857e46 100644
--- a/compiler/nnkit-tf/backend/Backend.cpp
+++ b/compiler/nnkit-tf/backend/Backend.cpp
@@ -17,13 +17,13 @@
 #include "nnkit/support/tf/Backend.h"
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
 
+#include <memory>
 #include <cassert>
 
 extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
 {
-  using stdex::make_unique;
+  using std::make_unique;
 
   assert(args.size() == 2); // args.at[0] : test.pb path, argas.at[1]: test.info path
 
diff --git a/compiler/nnkit-tf/backend/CMakeLists.txt b/compiler/nnkit-tf/backend/CMakeLists.txt
index dd2e469e8..d0078453e 100644
--- a/compiler/nnkit-tf/backend/CMakeLists.txt
+++ b/compiler/nnkit-tf/backend/CMakeLists.txt
@@ -1,3 +1,2 @@
 add_library(nnkit_tf_backend SHARED Backend.cpp)
 target_link_libraries(nnkit_tf_backend nnkit_support_tf)
-target_link_libraries(nnkit_tf_backend stdex)
diff --git a/compiler/nnkit-tf/requires.cmake b/compiler/nnkit-tf/requires.cmake
index 4b9fd68b2..a757bdda4 100644
--- a/compiler/nnkit-tf/requires.cmake
+++ b/compiler/nnkit-tf/requires.cmake
@@ -1,3 +1,2 @@
-require("stdex")
 require("tfinfo")
 require("nnkit-intf")
diff --git a/compiler/nnkit-tf/support/CMakeLists.txt b/compiler/nnkit-tf/support/CMakeLists.txt
index 471a1c70f..d064131ea 100644
--- a/compiler/nnkit-tf/support/CMakeLists.txt
+++ b/compiler/nnkit-tf/support/CMakeLists.txt
@@ -1,9 +1,9 @@
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
-add_library(nnkit_support_tf-1.12 STATIC ${SOURCES})
-set_target_properties(nnkit_support_tf-1.12 PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(nnkit_support_tf-1.12 PUBLIC include)
-target_link_libraries(nnkit_support_tf-1.12 nnkit_intf_backend stdex nnkit_support_tftestinfo)
-target_link_libraries(nnkit_support_tf-1.12 tensorflow)
+add_library(nnkit_support_tf-1.13 STATIC ${SOURCES})
+set_target_properties(nnkit_support_tf-1.13 PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(nnkit_support_tf-1.13 PUBLIC include)
+target_link_libraries(nnkit_support_tf-1.13 nnkit_intf_backend nnkit_support_tftestinfo)
+target_link_libraries(nnkit_support_tf-1.13 tensorflow-1.13)
 
-add_library(nnkit_support_tf ALIAS nnkit_support_tf-1.12)
+add_library(nnkit_support_tf ALIAS nnkit_support_tf-1.13)
diff --git a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h
index f1ecd6c9c..fec614733 100644
--- a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h
+++ b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorContext.h
@@ -36,7 +36,7 @@ class TensorContext final : public nnkit::TensorContext
 {
 public:
   TensorContext(const std::vector<std::unique_ptr<ParsedTensor>> &tensors, TensorDataMap &data_map)
-      : _tensors(tensors), _data_map(data_map)
+    : _tensors(tensors), _data_map(data_map)
   {
     // empty
   }
diff --git a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h
index daa1a95b3..5b12aa9a7 100644
--- a/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h
+++ b/compiler/nnkit-tf/support/include/nnkit/support/tf/TensorDataMap.h
@@ -41,7 +41,9 @@ using nnkit::support::tftestinfo::ParsedTensor;
 class TensorDataMap
 {
 public:
-  TensorDataMap() { /* empty */}
+  TensorDataMap()
+  { /* empty */
+  }
 
   uint8_t *allocate(const ParsedTensor *parsed_tensor)
   {
diff --git a/compiler/nnkit-tf/support/src/Backend.cpp b/compiler/nnkit-tf/support/src/Backend.cpp
index f28e05f74..54bc4984d 100644
--- a/compiler/nnkit-tf/support/src/Backend.cpp
+++ b/compiler/nnkit-tf/support/src/Backend.cpp
@@ -50,7 +50,7 @@ Backend::Backend(const char *pb_path, const char *info_path) : _tf_runner(pb_pat
         angkor::TensorShape shape;
         if (!_tf_runner.getTensorShapeFromGraphDef(parsed_tensor, shape))
           throw oops::UserExn(
-              "Info you provided may be wrong or not enough. Please check the info file.");
+            "Info you provided may be wrong or not enough. Please check the info file.");
 
         parsed_tensor->mutable_shape().resize(shape.rank());
         for (int r = 0; r < shape.rank(); r++)
diff --git a/compiler/nnkit-tf/support/src/Runner.cpp b/compiler/nnkit-tf/support/src/Runner.cpp
index 0d36ee2f4..d2c37cd29 100644
--- a/compiler/nnkit-tf/support/src/Runner.cpp
+++ b/compiler/nnkit-tf/support/src/Runner.cpp
@@ -263,8 +263,8 @@ void Runner::prepareInputs(const std::vector<std::unique_ptr<ParsedTensor>> &inp
       throw std::runtime_error("Not supported tensor type");
 
     TF_Tensor *input_tensor =
-        create_tensor(TF_FLOAT, shape.data(), shape.size(), data_map.data(tensor.get()),
-                      num_elements(tensor->shape()) * size);
+      create_tensor(TF_FLOAT, shape.data(), shape.size(), data_map.data(tensor.get()),
+                    num_elements(tensor->shape()) * size);
 
     _input_ops.emplace_back(input_op);
     _input_tensors.emplace_back(input_tensor);
@@ -308,7 +308,7 @@ void Runner::run()
                 0,       // Target operations, number of targets.
                 nullptr, // Run metadata.
                 _status  // Output status.
-                );
+  );
 
   if (TF_GetCode(_status) != TF_OK)
     throw std::runtime_error(TF_Message(_status));
diff --git a/compiler/nnkit-tflite/backend/Backend.cpp b/compiler/nnkit-tflite/backend/Backend.cpp
index 08ba338e8..b84c5076e 100644
--- a/compiler/nnkit-tflite/backend/Backend.cpp
+++ b/compiler/nnkit-tflite/backend/Backend.cpp
@@ -51,12 +51,13 @@ private:
   std::unique_ptr<::tflite::FlatBufferModel> _model;
   std::unique_ptr<::tflite::Interpreter> _interp;
 };
-}
+} // namespace
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArguments &args)
 {
-  return stdex::make_unique<GenericBackend>(args.at(0));
+  return std::make_unique<GenericBackend>(args.at(0));
 }
diff --git a/compiler/nnkit-tflite/backend/CMakeLists.txt b/compiler/nnkit-tflite/backend/CMakeLists.txt
index 3f4a8ca53..31606b15e 100644
--- a/compiler/nnkit-tflite/backend/CMakeLists.txt
+++ b/compiler/nnkit-tflite/backend/CMakeLists.txt
@@ -4,4 +4,3 @@ endif(NOT TARGET nnkit_support_tflite)
 
 add_library(nnkit_tflite_backend SHARED Backend.cpp)
 target_link_libraries(nnkit_tflite_backend nnkit_support_tflite)
-target_link_libraries(nnkit_tflite_backend stdex)
diff --git a/compiler/nnkit-tflite/requires.cmake b/compiler/nnkit-tflite/requires.cmake
index d370fc17c..be53ae74f 100644
--- a/compiler/nnkit-tflite/requires.cmake
+++ b/compiler/nnkit-tflite/requires.cmake
@@ -1,2 +1 @@
-require("stdex")
 require("nnkit-intf")
diff --git a/compiler/nnkit/actions/HDF5/CMakeLists.txt b/compiler/nnkit/actions/HDF5/CMakeLists.txt
index 63d3320c5..0b1e2e516 100644
--- a/compiler/nnkit/actions/HDF5/CMakeLists.txt
+++ b/compiler/nnkit/actions/HDF5/CMakeLists.txt
@@ -12,10 +12,8 @@ add_library(nnkit_HDF5_export_action SHARED Export.cpp)
 target_include_directories(nnkit_HDF5_export_action PRIVATE ${HDF5_INCLUDE_DIRS})
 target_link_libraries(nnkit_HDF5_export_action nnkit_intf_action)
 target_link_libraries(nnkit_HDF5_export_action nnkit_HDF5_common)
-target_link_libraries(nnkit_HDF5_export_action stdex)
 
 add_library(nnkit_HDF5_import_action SHARED Import.cpp)
 target_include_directories(nnkit_HDF5_import_action PRIVATE ${HDF5_INCLUDE_DIRS})
 target_link_libraries(nnkit_HDF5_import_action nnkit_intf_action)
 target_link_libraries(nnkit_HDF5_import_action nnkit_HDF5_common)
-target_link_libraries(nnkit_HDF5_import_action stdex)
diff --git a/compiler/nnkit/actions/HDF5/Export.cpp b/compiler/nnkit/actions/HDF5/Export.cpp
index 389f5c050..f21a7ff4e 100644
--- a/compiler/nnkit/actions/HDF5/Export.cpp
+++ b/compiler/nnkit/actions/HDF5/Export.cpp
@@ -58,7 +58,7 @@ public:
         H5::DataSpace dataspace(rank, dims);
 
         auto dataset =
-            _value_grp.createDataSet(value_filename(n), H5::PredType::IEEE_F32BE, dataspace);
+          _value_grp.createDataSet(value_filename(n), H5::PredType::IEEE_F32BE, dataspace);
 
         float *data = new float[nncc::core::ADT::tensor::num_elements(shape)];
 
@@ -84,7 +84,7 @@ public:
           H5::StrType name_datatype(H5::PredType::C_S1, name.size());
 
           auto name_attr =
-              _name_grp.createAttribute(value_filename(n), name_datatype, name_dataspace);
+            _name_grp.createAttribute(value_filename(n), name_datatype, name_dataspace);
 
           name_attr.write(name_datatype, name);
         }
@@ -101,9 +101,10 @@ private:
 };
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
 {
-  return stdex::make_unique<HD5ExportAction>(args.at(0));
+  return std::make_unique<HD5ExportAction>(args.at(0));
 }
diff --git a/compiler/nnkit/actions/HDF5/Import.cpp b/compiler/nnkit/actions/HDF5/Import.cpp
index bba5ab701..069f42f56 100644
--- a/compiler/nnkit/actions/HDF5/Import.cpp
+++ b/compiler/nnkit/actions/HDF5/Import.cpp
@@ -92,9 +92,10 @@ private:
 };
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
 {
-  return stdex::make_unique<HD5ImportAction>(args.at(0));
+  return std::make_unique<HD5ImportAction>(args.at(0));
 }
diff --git a/compiler/nnkit/actions/builtin/CMakeLists.txt b/compiler/nnkit/actions/builtin/CMakeLists.txt
index 910e12ea9..4de70dfc3 100644
--- a/compiler/nnkit/actions/builtin/CMakeLists.txt
+++ b/compiler/nnkit/actions/builtin/CMakeLists.txt
@@ -1,7 +1,5 @@
 add_library(nnkit_show_action SHARED Show.cpp)
 target_link_libraries(nnkit_show_action nnkit_intf_action)
-target_link_libraries(nnkit_show_action stdex)
 
 add_library(nnkit_randomize_action SHARED Randomize.cpp)
 target_link_libraries(nnkit_randomize_action nnkit_intf_action)
-target_link_libraries(nnkit_randomize_action stdex)
diff --git a/compiler/nnkit/actions/builtin/Randomize.cpp b/compiler/nnkit/actions/builtin/Randomize.cpp
index 9b023ef3b..b6e17c7c3 100644
--- a/compiler/nnkit/actions/builtin/Randomize.cpp
+++ b/compiler/nnkit/actions/builtin/Randomize.cpp
@@ -52,9 +52,10 @@ struct RandomizeAction final : public nnkit::Action
 };
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
 {
-  return stdex::make_unique<RandomizeAction>();
+  return std::make_unique<RandomizeAction>();
 }
diff --git a/compiler/nnkit/actions/builtin/Show.cpp b/compiler/nnkit/actions/builtin/Show.cpp
index 2630177ef..0be15a8cd 100644
--- a/compiler/nnkit/actions/builtin/Show.cpp
+++ b/compiler/nnkit/actions/builtin/Show.cpp
@@ -63,9 +63,10 @@ void ShowAction::run(nnkit::TensorContext &ctx)
 }
 
 #include <nnkit/CmdlineArguments.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 extern "C" std::unique_ptr<nnkit::Action> make_action(const nnkit::CmdlineArguments &args)
 {
-  return stdex::make_unique<ShowAction>();
+  return std::make_unique<ShowAction>();
 }
diff --git a/compiler/nnkit/tools/benchmark/CMakeLists.txt b/compiler/nnkit/tools/benchmark/CMakeLists.txt
index c2cde00f4..7f01f8bd1 100644
--- a/compiler/nnkit/tools/benchmark/CMakeLists.txt
+++ b/compiler/nnkit/tools/benchmark/CMakeLists.txt
@@ -11,4 +11,3 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 add_executable(nnkit-benchmark ${SOURCES})
 target_link_libraries(nnkit-benchmark nnkit_support_cmdline)
 target_link_libraries(nnkit-benchmark nnkit_support_backend)
-target_link_libraries(nnkit-benchmark stdex)
diff --git a/compiler/nnkit/tools/benchmark/src/Benchmark.cpp b/compiler/nnkit/tools/benchmark/src/Benchmark.cpp
index 6c3ebc90b..632c989bd 100644
--- a/compiler/nnkit/tools/benchmark/src/Benchmark.cpp
+++ b/compiler/nnkit/tools/benchmark/src/Benchmark.cpp
@@ -18,8 +18,7 @@
 #include <nnkit/VectorArguments.h>
 #include <nnkit/BackendPlugin.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <map>
 #include <string>
 
@@ -28,7 +27,7 @@
 #include <iostream>
 #include <iomanip>
 
-using stdex::make_unique;
+using std::make_unique;
 
 using std::chrono::milliseconds;
 using std::chrono::microseconds;
diff --git a/compiler/nnkit/tools/run/CMakeLists.txt b/compiler/nnkit/tools/run/CMakeLists.txt
index 5f42ed941..d1b716090 100644
--- a/compiler/nnkit/tools/run/CMakeLists.txt
+++ b/compiler/nnkit/tools/run/CMakeLists.txt
@@ -19,4 +19,3 @@ target_link_libraries(nnkit-run nnkit_intf_action)
 target_link_libraries(nnkit-run nnkit_intf_backend)
 target_link_libraries(nnkit-run nnkit_support_cmdline)
 target_link_libraries(nnkit-run nnkit_support_backend)
-target_link_libraries(nnkit-run stdex)
diff --git a/compiler/nnkit/tools/run/nnkit-run.cpp b/compiler/nnkit/tools/run/nnkit-run.cpp
index e60e5797a..cc5a337bd 100644
--- a/compiler/nnkit/tools/run/nnkit-run.cpp
+++ b/compiler/nnkit/tools/run/nnkit-run.cpp
@@ -35,7 +35,7 @@ public:
 private:
   nnkit::VectorArguments _args;
 };
-}
+} // namespace
 
 namespace
 {
@@ -59,7 +59,7 @@ private:
   std::string _path;
   std::unique_ptr<nnkit::BackendPlugin> _plugin;
 };
-}
+} // namespace
 
 // TODO Extract Action-related helpers
 #include <nnkit/Action.h>
@@ -120,7 +120,7 @@ private:
   void *_handle;
   Entry _entry;
 };
-}
+} // namespace
 
 namespace
 {
@@ -139,10 +139,9 @@ public:
 private:
   ActionBinder _binder;
 };
-}
-
-#include <stdex/Memory.h>
+} // namespace
 
+#include <memory>
 #include <map>
 #include <iostream>
 
@@ -170,7 +169,7 @@ int main(int argc, char **argv)
   std::map<std::string, std::function<void(const std::string &arg)>> argparse;
 
   argparse["--backend"] = [&sections](const std::string &tag) {
-    sections.backend = stdex::make_unique<BackendSection>(tag);
+    sections.backend = std::make_unique<BackendSection>(tag);
   };
 
   argparse["--backend-arg"] = [&sections](const std::string &arg) {
diff --git a/compiler/nnop/CMakeLists.txt b/compiler/nnop/CMakeLists.txt
index 82c0e3a86..d2c8af26d 100644
--- a/compiler/nnop/CMakeLists.txt
+++ b/compiler/nnop/CMakeLists.txt
@@ -2,11 +2,11 @@ add_library(nnop INTERFACE)
 target_include_directories(nnop INTERFACE include)
 target_link_libraries(nnop INTERFACE angkor)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 
diff --git a/compiler/nnop/include/nnop/PadInfo.h b/compiler/nnop/include/nnop/PadInfo.h
index 228f08514..d17a33abf 100644
--- a/compiler/nnop/include/nnop/PadInfo.h
+++ b/compiler/nnop/include/nnop/PadInfo.h
@@ -26,7 +26,7 @@ class PadInfo
 {
 public:
   PadInfo(uint32_t top, uint32_t bottom, uint32_t left, uint32_t right)
-      : _top{top}, _bottom{bottom}, _left{left}, _right{right}
+    : _top{top}, _bottom{bottom}, _left{left}, _right{right}
   {
     // DO NOTHING
   }
diff --git a/compiler/nnop/include/nnop/StrideInfo.h b/compiler/nnop/include/nnop/StrideInfo.h
index e47489fa7..653603d6c 100644
--- a/compiler/nnop/include/nnop/StrideInfo.h
+++ b/compiler/nnop/include/nnop/StrideInfo.h
@@ -39,6 +39,6 @@ private:
   uint32_t _vertical;
 };
 
-} // namespace nncc
+} // namespace nnop
 
 #endif // __NNOP_STRIDE_INFO_H__
diff --git a/compiler/nnsuite/conv/model/src/RandomModel.cpp b/compiler/nnsuite/conv/model/src/RandomModel.cpp
index 7b15d4c96..6d4a6147d 100644
--- a/compiler/nnsuite/conv/model/src/RandomModel.cpp
+++ b/compiler/nnsuite/conv/model/src/RandomModel.cpp
@@ -28,8 +28,8 @@ namespace conv
 {
 
 RandomModel::RandomModel(int32_t seed)
-    : _ifm_shape{1, 8, 8}, _ifm_name{"ifm"}, _ofm_name{"ofm"}, _ofm_shape{2, 6, 6},
-      _ker_buffer{kernel::Shape{2, 1, 3, 3}, kernel::NCHWLayout{}}
+  : _ifm_shape{1, 8, 8}, _ifm_name{"ifm"}, _ofm_name{"ofm"}, _ofm_shape{2, 6, 6},
+    _ker_buffer{kernel::Shape{2, 1, 3, 3}, kernel::NCHWLayout{}}
 {
   std::default_random_engine gen{static_cast<uint32_t>(seed)};
   std::normal_distribution<float> dist{0.0f, 1.0f};
diff --git a/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt b/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt
index 6445cc6fb..7e860f874 100644
--- a/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt
+++ b/compiler/nnsuite/conv/nnkit-caffe/CMakeLists.txt
@@ -9,7 +9,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 add_library(nnsuite_conv_caffe SHARED ${SOURCES})
 target_link_libraries(nnsuite_conv_caffe nnsuite_conv)
 target_link_libraries(nnsuite_conv_caffe nnkit_support_caffe)
-target_link_libraries(nnsuite_conv_caffe stdex)
 
 nnas_find_package(GTest QUIET)
 
diff --git a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp
index 31d2b33fc..664ca94f3 100644
--- a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp
+++ b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.cpp
@@ -23,9 +23,9 @@
 #include <nncc/core/ADT/kernel/Overlay.h>
 #include <nncc/core/ADT/kernel/NCHWLayout.h>
 
-#include <stdex/Memory.h>
+#include <memory>
 
-using stdex::make_unique;
+using std::make_unique;
 
 std::unique_ptr<nnkit::Backend> ConvBackend::create(const nnsuite::conv::Model &model)
 {
diff --git a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp
index 776bf186b..20c42385a 100644
--- a/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp
+++ b/compiler/nnsuite/conv/nnkit-caffe/ConvBackend.test.cpp
@@ -35,8 +35,8 @@ public:
   TestModel(const std::string &ifm_name, const feature::Shape &ifm_shape,
             const std::string &ofm_name, const feature::Shape &ofm_shape,
             const kernel::Shape &ker_shape, const kernel::Layout &ker_layout, float *ker_data)
-      : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name), _ofm_shape(ofm_shape),
-        _ker{ker_shape, ker_layout, ker_data}
+    : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name),
+      _ofm_shape(ofm_shape), _ker{ker_shape, ker_layout, ker_data}
   {
     // DO NOTHING
   }
diff --git a/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt b/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt
index c1cf88812..8e870490e 100644
--- a/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt
+++ b/compiler/nnsuite/conv/nnkit-tflite/CMakeLists.txt
@@ -9,7 +9,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 add_library(nnsuite_conv_tflite SHARED ${SOURCES})
 target_link_libraries(nnsuite_conv_tflite nnsuite_conv)
 target_link_libraries(nnsuite_conv_tflite nnkit_support_tflite-1.7)
-target_link_libraries(nnsuite_conv_tflite stdex)
 
 nnas_find_package(GTest QUIET)
 
diff --git a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp
index 8ec9ce491..ea189ff6e 100644
--- a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp
+++ b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.cpp
@@ -74,7 +74,7 @@ static inline std::vector<int> as_dims(const nncc::core::ADT::kernel::Shape &sha
 }
 
 ConvBackend::ConvBackend(const nnsuite::conv::Model &model)
-    : _ifm_name{model.ifm_name()}, _ofm_name{model.ofm_name()}
+  : _ifm_name{model.ifm_name()}, _ofm_name{model.ofm_name()}
 {
   using nncc::core::ADT::kernel::Overlay;
   using nncc::core::ADT::kernel::NHWCLayout;
@@ -123,12 +123,12 @@ ConvBackend::ConvBackend(const nnsuite::conv::Model &model)
                                        as_dims(model.ifm_shape()), quantization);
 
   _interp.SetTensorParametersReadOnly(
-      2, kTfLiteFloat32 /* type */, "kernel" /* name */, as_dims(model.ker_shape()), quantization,
-      reinterpret_cast<const char *>(_kernel.data()), _kernel.size() * sizeof(float));
+    2, kTfLiteFloat32 /* type */, "kernel" /* name */, as_dims(model.ker_shape()), quantization,
+    reinterpret_cast<const char *>(_kernel.data()), _kernel.size() * sizeof(float));
 
   _interp.SetTensorParametersReadOnly(
-      3, kTfLiteFloat32 /* type */, "bias" /* name */, {static_cast<int>(_bias.size())},
-      quantization, reinterpret_cast<const char *>(_bias.data()), _bias.size() * sizeof(float));
+    3, kTfLiteFloat32 /* type */, "bias" /* name */, {static_cast<int>(_bias.size())}, quantization,
+    reinterpret_cast<const char *>(_bias.data()), _bias.size() * sizeof(float));
 
   auto param = typed_malloc<TfLiteConvParams>();
 
diff --git a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp
index db82f0cf9..98ac78fc2 100644
--- a/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp
+++ b/compiler/nnsuite/conv/nnkit-tflite/ConvBackend.test.cpp
@@ -38,8 +38,8 @@ public:
   TestModel(const std::string &ifm_name, const feature::Shape &ifm_shape,
             const std::string &ofm_name, const feature::Shape &ofm_shape,
             const kernel::Shape &ker_shape, const kernel::Layout &ker_layout, float *ker_data)
-      : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name), _ofm_shape(ofm_shape),
-        _ker{ker_shape, ker_layout, ker_data}
+    : _ifm_name(ifm_name), _ifm_shape(ifm_shape), _ofm_name(ofm_name),
+      _ofm_shape(ofm_shape), _ker{ker_shape, ker_layout, ker_data}
   {
     // DO NOTHING
   }
diff --git a/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp b/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp
index 2c84f72e6..c1e013767 100644
--- a/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp
+++ b/compiler/nnsuite/conv/nnkit-tflite/Entry.cpp
@@ -21,8 +21,7 @@
 #include <nnkit/Backend.h>
 #include <nnkit/CmdlineArguments.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <chrono>
 #include <iostream>
 
@@ -40,5 +39,5 @@ extern "C" std::unique_ptr<nnkit::Backend> make_backend(const nnkit::CmdlineArgu
 
   const nnsuite::conv::RandomModel model{seed};
 
-  return stdex::make_unique<ConvBackend>(model);
+  return std::make_unique<ConvBackend>(model);
 }
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt
index 173b8b476..917bbdaf6 100644
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -1,20 +1,67 @@
+# NOTE find_package try to use at least python3.8 as follows depending on platform version
+#   Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+#   Ubuntu20.04; default python3.8
+#   Ubuntu22.04; default python3.10
+#   refer https://github.com/Samsung/ONE/issues/9962
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
+
+if(NOT ${PYTHONINTERP_FOUND})
+  message(STATUS "Build one-cmds: FALSE (Python3 is missing)")
+  return()
+endif()
+
+if(${PYTHON_VERSION_MINOR} LESS 8)
+  message(STATUS "Build one-cmds: FALSE (You need to install Python version higher than 3.8)")
+  return()
+endif()
+
+# NOTE these files should not have extensions.
+#      below code will remove extension when copy and install.
 set(ONE_COMMAND_FILES
+    one-build
     one-import
     one-import-bcq
     one-import-tf
     one-import-tflite
+    one-import-onnx
     one-optimize
     one-quantize
     one-pack
+    one-partition
+    one-profile
+    one-infer
     one-codegen
-    one-prepare-venv
+    onecc
 )
 
+# TODO find better way for per-platform files
+if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "aarch64")
+  # NOTE copy one-prepare-venv.aarch64 as build/../one-prepare-venv
+  #      and install build/../one-prepare-venv file
+  list(APPEND ONE_COMMAND_FILES one-prepare-venv.aarch64)
+else(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "aarch64")
+  if(ONE_UBUNTU_CODENAME_BIONIC)
+    # NOTE copy one-prepare-venv.u1804 as build/../one-prepare-venv
+    #      and install build/../one-prepare-venv file
+    list(APPEND ONE_COMMAND_FILES one-prepare-venv.u1804)
+  else(ONE_UBUNTU_CODENAME_BIONIC)
+    list(APPEND ONE_COMMAND_FILES one-prepare-venv)
+  endif(ONE_UBUNTU_CODENAME_BIONIC)
+endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "aarch64")
+
+# pytorch importer is an experimental feature, it is not used in default configuration
+if(ENABLE_ONE_IMPORT_PYTORCH)
+  list(APPEND ONE_COMMAND_FILES one-import-pytorch)
+endif(ENABLE_ONE_IMPORT_PYTORCH)
+
 foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES})
 
   set(ONE_COMMAND_FILE ${ONE_COMMAND})
   set(ONE_COMMAND_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_COMMAND_FILE}")
-  set(ONE_COMMAND_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_COMMAND_FILE}")
+  # strip extension from the name
+  get_filename_component(ONE_COMMNAD_FILE_NAME ${ONE_COMMAND} NAME_WE)
+  set(ONE_COMMAND_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_COMMNAD_FILE_NAME}")
   set(ONE_COMMAND_TARGET "${ONE_COMMAND}_target")
 
   add_custom_command(OUTPUT ${ONE_COMMAND_BIN}
@@ -25,7 +72,7 @@ foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES})
 
   add_custom_target(${ONE_COMMAND_TARGET} ALL DEPENDS ${ONE_COMMAND_BIN})
 
-  install(FILES ${ONE_COMMAND}
+  install(FILES ${ONE_COMMAND_BIN}
           PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
                       GROUP_READ GROUP_EXECUTE
                       WORLD_READ WORLD_EXECUTE
@@ -33,9 +80,110 @@ foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES})
           
 endforeach(ONE_COMMAND)
 
+set(ONE_UTILITY_FILES
+    one-build.template.cfg
+    onecc.template.cfg
+    onnx_legalizer.py
+)
+
+foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES})
+
+  set(ONE_UTILITY_FILE ${ONE_UTILITY})
+  set(ONE_UTILITY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_UTILITY_FILE}")
+  set(ONE_UTILITY_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_UTILITY_FILE}")
+  set(ONE_UTILITY_TARGET "${ONE_UTILITY}_target")
+
+  add_custom_command(OUTPUT ${ONE_UTILITY_BIN}
+    COMMAND ${CMAKE_COMMAND} -E copy "${ONE_UTILITY_SRC}" "${ONE_UTILITY_BIN}"
+    DEPENDS ${ONE_UTILITY_SRC}
+    COMMENT "Generate ${ONE_UTILITY_BIN}"
+  )
+
+  add_custom_target(${ONE_UTILITY_TARGET} ALL DEPENDS ${ONE_UTILITY_BIN})
+
+  install(FILES ${ONE_UTILITY}
+          PERMISSIONS OWNER_WRITE OWNER_READ
+                      GROUP_READ
+                      WORLD_READ
+          DESTINATION bin)
+
+endforeach(ONE_UTILITY)
+
+# one-pack internally uses model2nnpkg tool
+set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py")
+install(FILES ${MODEL2NNPKG}
+              PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                          GROUP_READ GROUP_EXECUTE
+                          WORLD_READ WORLD_EXECUTE
+              DESTINATION bin
+              RENAME "model2nnpkg")
+
+# make python directory
+set(ONE_PYTHON_FILES backends.py
+                     constant.py
+                     export_constant.py
+                     make_cmd.py
+                     CfgRunner.py
+                     OptionBuilder.py
+                     TopologicalSortHelper.py
+                     WorkflowRunner.py
+                     utils.py)
+
+foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES})
+
+  set(ONE_PYTHON_DIR "onelib")
+  set(ONE_PYTHON_DIR_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}")
+  set(ONE_PYTHON_FILE_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_PYTHON_DIR}/${ONE_PYTHON_FILE}")
+  set(ONE_PYTHON_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}/${ONE_PYTHON_FILE}")
+  set(ONE_PYTHON_TARGET "${ONE_PYTHON_FILE}_target")
+
+  add_custom_command(OUTPUT ${ONE_PYTHON_DIR_BIN}
+    COMMAND ${CMAKE_COMMAND} -E make_directory "${ONE_PYTHON_DIR_BIN}"
+    COMMENT "Generate ${ONE_PYTHON_DIR_BIN}"
+  )
+
+  add_custom_command(OUTPUT ${ONE_PYTHON_FILE_BIN}
+    COMMAND ${CMAKE_COMMAND} -E copy "${ONE_PYTHON_FILE_SRC}" "${ONE_PYTHON_FILE_BIN}"
+    DEPENDS ${ONE_PYTHON_SRC}
+    COMMENT "Generate ${ONE_PYTHON_FILE_BIN}"
+  )
+
+  add_custom_target(${ONE_PYTHON_TARGET} ALL DEPENDS ${ONE_PYTHON_DIR_BIN} ${ONE_PYTHON_FILE_BIN})
+
+  install(DIRECTORY ${ONE_PYTHON_DIR}
+          FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+                           GROUP_READ
+                           WORLD_READ
+          DESTINATION bin)
+
+endforeach(ONE_PYTHON_FILE)
+
+set(CONSTANT_EXPORTING_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_PYTHON_DIR}/export_constant.py")
+set(O1_OPTION "O1")
+set(O1_CFG_FILE "${O1_OPTION}.cfg")
+set(O1_CFG_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}/${O1_CFG_FILE}")
+
+add_custom_command(OUTPUT ${O1_CFG_FILE_BIN}
+  COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT}
+          --constant ${O1_OPTION}
+          --format cfg
+          --output_path ${O1_CFG_FILE_BIN}
+  DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+  COMMENT "Generate ${O1_CFG_FILE}"
+)
+
+add_custom_target("O1_cfg_target" ALL DEPENDS ${O1_CFG_FILE_BIN})
+
+install(FILES ${O1_CFG_FILE_BIN}
+        PERMISSIONS OWNER_WRITE OWNER_READ
+                    GROUP_READ
+                    WORLD_READ
+        DESTINATION optimization)
+
 set(ONE_DOCUMENT_FILES
     how-to-use-one-commands.txt
     how-to-prepare-virtualenv.txt
+    how-to-create-hdf5-dataset.txt
 )
 
 foreach(ONE_DOCUMENT IN ITEMS ${ONE_DOCUMENT_FILES})
@@ -43,3 +191,11 @@ foreach(ONE_DOCUMENT IN ITEMS ${ONE_DOCUMENT_FILES})
   install(FILES ${ONE_DOCUMENT} DESTINATION doc)
 
 endforeach(ONE_DOCUMENT)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+add_subdirectory(dummy-driver)
+add_subdirectory(tests)
+add_subdirectory(validate-onnx2circle)
diff --git a/compiler/one-cmds/dummy-driver/CMakeLists.txt b/compiler/one-cmds/dummy-driver/CMakeLists.txt
new file mode 100644
index 000000000..55ef96c06
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/CMakeLists.txt
@@ -0,0 +1,111 @@
+# dummy driver for interface test
+set(DUMMY_DRIVER_SRC src/dummy-compile.cpp)
+set(DUMMY_V2_DRIVER_SRC src/dummyV2-compile.cpp)
+set(HELP_DRIVER_SRC src/help-compile.cpp)
+set(DUMMY_INFER_SRC src/dummy-infer.cpp)
+set(DUMMY_INFER_V2_SRC src/dummy-inferV2.cpp)
+set(HELP_INFER_SRC src/help-infer.cpp)
+set(DUMMY_PROFILE_SRC src/dummy-profile.cpp)
+set(DUMMY_V2_PROFILE_SRC src/dummyV2-profile.cpp)
+set(DUMMY_V3_PROFILE_SRC src/dummyV3-profile.cpp)
+set(HELP_PROFILE_SRC src/help-profile.cpp)
+set(DUMMY_ENV_SRC src/dummyEnv-compile.cpp)
+set(DUMMY_ONNX_EXT src/dummy-onnx-ext.cpp)
+
+add_executable(dummy-compile ${DUMMY_DRIVER_SRC})
+add_executable(dummyV2-compile ${DUMMY_V2_DRIVER_SRC})
+add_executable(help-compile ${HELP_DRIVER_SRC})
+add_executable(dummy-infer ${DUMMY_INFER_SRC})
+add_executable(dummy-inferV2 ${DUMMY_INFER_V2_SRC})
+add_executable(help-infer ${HELP_INFER_SRC})
+add_executable(dummy-profile ${DUMMY_PROFILE_SRC})
+add_executable(dummyV2-profile ${DUMMY_V2_PROFILE_SRC})
+add_executable(dummyV3-profile ${DUMMY_V3_PROFILE_SRC})
+add_executable(help-profile ${HELP_PROFILE_SRC})
+add_executable(dummyEnv-compile ${DUMMY_ENV_SRC})
+add_executable(dummy-onnx-ext ${DUMMY_ONNX_EXT})
+
+set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile")
+set(DUMMY_V2_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummyV2-compile")
+set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile")
+set(DUMMY_INFER "${CMAKE_CURRENT_BINARY_DIR}/dummy-infer")
+set(DUMMY_INFER_V2 "${CMAKE_CURRENT_BINARY_DIR}/dummy-inferV2")
+set(HELP_INFER "${CMAKE_CURRENT_BINARY_DIR}/help-infer")
+set(DUMMY_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummy-profile")
+set(DUMMY_V2_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummyV2-profile")
+set(DUMMY_V3_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummyV3-profile")
+set(HELP_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/help-profile")
+set(DUMMY_ENV "${CMAKE_CURRENT_BINARY_DIR}/dummyEnv-compile")
+set(DUMMY_ONNX_EXT "${CMAKE_CURRENT_BINARY_DIR}/dummy-onnx-ext")
+
+install(FILES ${DUMMY_DRIVER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_V2_DRIVER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${HELP_DRIVER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_INFER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_INFER_V2}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${HELP_INFER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_PROFILE}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_V2_PROFILE}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_V3_PROFILE}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${HELP_PROFILE}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_ENV}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_ONNX_EXT}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-compile.cpp b/compiler/one-cmds/dummy-driver/src/dummy-compile.cpp
new file mode 100644
index 000000000..2ad09a3dd
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-compile.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-compile only tests its interface rather than its functionality.
+ *
+ * ./dummy-compile -o ${OUTPUT_NAME} ${INPUT_NAME}
+ *
+ * NOTE argv[3](INPUT_NAME) is not used here.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 4)
+    return EXIT_FAILURE;
+
+  std::string opt_o{"-o"};
+  std::string argv_1{argv[1]};
+
+  if (opt_o != argv_1)
+    return EXIT_FAILURE;
+
+  std::string output_name{argv[2]};
+  std::ofstream outfile(output_name);
+
+  outfile << "dummy-compile dummy output!!" << std::endl;
+
+  outfile.close();
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp
new file mode 100644
index 000000000..60f5faefa
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-infer only tests its interface rather than its functionality.
+ *
+ * ./dummy-infer ${INPUT_NAME}
+ * dummy-infer dummy output!!!
+ */
+
+#include <iostream>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::cout << "dummy-infer dummy output!!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp
new file mode 100644
index 000000000..4b93c70a3
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-infer only tests its interface rather than its functionality.
+ *
+ * ./dummy-infer ${INPUT_NAME}
+ * Do inference of ${INPUT_NAME}
+ */
+
+#include <iostream>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::cout << "Do inference of " + std::string(argv[1]) << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-onnx-ext.cpp b/compiler/one-cmds/dummy-driver/src/dummy-onnx-ext.cpp
new file mode 100644
index 000000000..845445bf5
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-onnx-ext.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-onnx-ext only tests its interface rather than its functionality.
+ *
+ * ./dummy-onnx-ext [options]
+ * one-import-onnx-ext dummy output!!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  (void)argc;
+  (void)argv;
+
+  std::cout << "one-import-onnx-ext dummy output!!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-profile.cpp b/compiler/one-cmds/dummy-driver/src/dummy-profile.cpp
new file mode 100644
index 000000000..b997e0514
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-profile.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-profile only tests its interface rather than its functionality.
+ *
+ * ./dummy-profile ${INPUT_NAME}
+ * dummy-profile dummy output!!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::cout << "dummy-profile dummy output!!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummyEnv-compile.cpp b/compiler/one-cmds/dummy-driver/src/dummyEnv-compile.cpp
new file mode 100644
index 000000000..b16ea3000
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummyEnv-compile.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummyEnv-compile only tests its interface rather than its functionality.
+ *
+ * ./dummyEnv-compile ${DUMMY_OUTPUT}
+ */
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::string spm_size;
+
+  if (const char *env_p = std::getenv("SPM_SIZE"))
+    spm_size = std::string(env_p);
+
+  std::ofstream outfile(argv[1]);
+
+  outfile << "SPM_SIZE=" << spm_size;
+
+  outfile.close();
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummyV2-compile.cpp b/compiler/one-cmds/dummy-driver/src/dummyV2-compile.cpp
new file mode 100644
index 000000000..bc7372e2e
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummyV2-compile.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummyV2-compile only tests its interface rather than its functionality.
+ *
+ * ./dummyV2-compile -o ${OUTPUT_NAME} ${INPUT_NAME}
+ *
+ * NOTE argv[3](INPUT_NAME) is not used here.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 4)
+    return EXIT_FAILURE;
+
+  std::string opt_o{"-O"};
+  std::string argv_1{argv[1]};
+
+  if (opt_o != argv_1)
+  {
+    std::cout << "dummyV2-compile: Invalid option" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::string output_name{argv[2]};
+  std::ofstream outfile(output_name);
+
+  outfile << "dummyV2-compile dummy output!!" << std::endl;
+
+  outfile.close();
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummyV2-profile.cpp b/compiler/one-cmds/dummy-driver/src/dummyV2-profile.cpp
new file mode 100644
index 000000000..020f320d9
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummyV2-profile.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummyV2-profile only tests its interface rather than its functionality.
+ *
+ * ./dummyV2-profile ${INPUT_NAME}
+ * dummyV2-profile dummy output!!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::cout << "dummyV2-profile dummy output!!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummyV3-profile.cpp b/compiler/one-cmds/dummy-driver/src/dummyV3-profile.cpp
new file mode 100644
index 000000000..d10644696
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummyV3-profile.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummyV3-profile only tests its interface rather than its functionality.
+ *
+ * ./dummyV3-profile ${INPUT_TO_PRINT}
+ * dummyV3-profile with ${INPUT_TO_PRINT}
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::string input_to_print{argv[1]};
+
+  std::cout << "dummyV3-profile with " << input_to_print << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/help-compile.cpp b/compiler/one-cmds/dummy-driver/src/help-compile.cpp
new file mode 100644
index 000000000..9be9018f3
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/help-compile.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * help-compile prints dummy help message.
+ *
+ * $ ./help-compile -h
+ * HELP MESSAGE!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::string opt_h{"-h"};
+  std::string argv_1{argv[1]};
+
+  if (opt_h != argv_1)
+    return EXIT_FAILURE;
+
+  std::cout << "HELP MESSAGE!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/help-infer.cpp b/compiler/one-cmds/dummy-driver/src/help-infer.cpp
new file mode 100644
index 000000000..821d368d4
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/help-infer.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * help-infer prints dummy help message.
+ *
+ * $ ./help-infer -h
+ * HELP MESSAGE!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::string opt_h{"-h"};
+  std::string argv_1{argv[1]};
+
+  if (opt_h != argv_1)
+    return EXIT_FAILURE;
+
+  std::cout << "HELP MESSAGE!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/help-profile.cpp b/compiler/one-cmds/dummy-driver/src/help-profile.cpp
new file mode 100644
index 000000000..51425360c
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/help-profile.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * help-profile prints dummy help message.
+ *
+ * $ ./help-profile -h
+ * HELP MESSAGE!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::string opt_h{"-h"};
+  std::string argv_1{argv[1]};
+
+  if (opt_h != argv_1)
+    return EXIT_FAILURE;
+
+  std::cout << "HELP MESSAGE!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/how-to-create-hdf5-dataset.txt b/compiler/one-cmds/how-to-create-hdf5-dataset.txt
new file mode 100644
index 000000000..5dc26b2cb
--- /dev/null
+++ b/compiler/one-cmds/how-to-create-hdf5-dataset.txt
@@ -0,0 +1,97 @@
+About
+-----
+
+Last update: 2020-11-06
+
+This document briefly explains how to create an input dataset for one-quantize.
+
+The input dataset of one-quantize has the form of hdf5.
+For users who are not familiar with hdf5, we provide a tool to convert raw data to hdf5.
+
+Workflow to generate input dataset (hdf5 file)
+1. Pre-process input data for the target model and save them as raw data files
+2. Package the raw data files into the hdf5 file using rawdata2hdf5
+
+Note: Users should prepare raw data which can be fed to the target model.
+This is because we don't know which pre-processing logic was used for the target model.
+
+rawdata2hdf5
+---------------
+
+rawdata2hdf5 is a tool to package raw data files into an hdf5 file,
+which is the format of input dataset for one-quantize.
+
+Usage:  rawdata2hdf5 --data_list <path/to/text/file> --output_path <path/to/output/file>
+
+Example
+---------------
+
+Let's make an input dataset for InceptionV3 model.
+
+1. Download sample images (You can use your own dataset)
+
+$ wget https://github.com/Samsung/ONE/files/5499172/img_files.zip
+$ unzip img_files.zip
+$ tree img_files
+img_files
+├── bald-eagle.jpg
+├── cow.jpg
+├── deer-in-wild.jpg
+├── fox.jpg
+├── ladybird.jpg
+├── orange-portocaliu.jpg
+├── pink-lotus.jpg
+├── red-church.jpg
+├── tomatoes.jpg
+└── young-dachshund.jpg
+
+2. Pre-process the images and save them as raw data files
+
+In this example, we use Pillow and numpy for simple pre-processing.
+
+$ pip install Pillow numpy
+
+Run the pre-processing logic for the target model.
+We provide a short python script that scales the image data from -1 to 1.
+(This is different from the original pre-processing of InceptionV3.
+Visit the below link to find the exact algorithm)
+https://github.com/tensorflow/models/blob/v2.3.0/research/slim/preprocessing/inception_preprocessing.py
+
+$ cat > preprocess.py << EOF
+import os, shutil, PIL.Image, numpy as np
+  
+input_dir = 'img_files'
+output_dir = 'raw_files'
+list_file = 'datalist.txt'
+
+if os.path.exists(output_dir):
+  shutil.rmtree(output_dir, ignore_errors=True)
+os.makedirs(output_dir)
+
+for (root, _, files) in os.walk(input_dir):
+  datalist = open(list_file, 'w')
+  for f in files:
+    with PIL.Image.open(root + '/' + f) as image:
+        img = np.array(image.resize((299, 299),
+                                    PIL.Image.ANTIALIAS)).astype(np.float32)
+        img = ((img / 255) - 0.5) * 2.0
+        output_file = output_dir + '/' + f.replace('jpg', 'data')
+        img.tofile(output_file)
+        datalist.writelines(os.path.abspath(output_file) + '\n')
+  datalist.close()
+EOF
+
+$ python preprocess.py
+
+After running preprocess.py, 'raw_files' and 'datalist.txt' will be created.
+raw_files: a directory where raw data files are saved
+datalist.txt: a text file that contains the list of raw data files.
+
+3. Run rawdata2hdf5 with datalist.txt
+
+$ rawdata2hdf5 --data_list datalist.txt --output_path dataset.h5
+
+The contents of the hdf5 file can be printed in the console using h5dump
+$ h5dump dataset.h5
+
+Now you can call one-quantize with dataset.h5.
diff --git a/compiler/one-cmds/how-to-prepare-virtualenv.txt b/compiler/one-cmds/how-to-prepare-virtualenv.txt
index 62a94968b..c36650a47 100644
--- a/compiler/one-cmds/how-to-prepare-virtualenv.txt
+++ b/compiler/one-cmds/how-to-prepare-virtualenv.txt
@@ -1,14 +1,17 @@
 About
 -----
 
-Last update: 2020-08-03
+Last update: 2023-01-30
 
 This document explains about 'one-prepare-venv' command.
 
 'one-prepare-venv' will prepare python3 virtual environment with tensorflow-cpu
-version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
+version 2.8.0, recommanded 2.x version as of now, so that 'one-import-tf'
 command can execute properly.
 
+'one-prepare-venv' will also prepare onnx and onnx-tensorflow version 1.10.0 so
+that 'one-import-onnx' command can execute properly.
+
 
 Prerequisite
 ------------
@@ -17,11 +20,11 @@ Please install these required packages before venv preparation.
 
 $ sudo apt-get update
 $ sudo apt-get upgrade
-$ sudo apt-get install python3-pip python3-venv
+$ sudo apt-get install python3.8 python3-pip python3.8-venv
 
 
-How to run
-----------
+How to run for Ubuntu
+---------------------
 
 Just run 'one-prepare-venv' command
 
@@ -30,6 +33,14 @@ $ one-prepare-venv
 There will be venv folder as of result.
 
 
+How to run for Windows
+----------------------
+
+Support for Windows is not maintained for now.
+If you have any needs for running in Windows, please fire an issue.
+Or you can use Docker for Windows.
+
+
 Trouble shooting
 ----------------
 
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt
index 0ee69e077..028cde47a 100644
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -1,7 +1,7 @@
 About
 -----
 
-Last update: 2020-07-31
+Last update: 2020-10-29
 
 This document briefly explains how to use one-* commands.
 Detailed options are not explained here. Run the command to see options.
@@ -20,8 +20,75 @@ Compilation flow for NPU
 4) one-codegen will compile to binary codes.
 
 
+common features
+---------------
+
+[configuration file]
+
+You can run one-commands with configuration file as well as command line parameters. The
+configuration file should be written with the options the one-commands need to run.
+
+```
+# configuration_file.cfg
+
+[The_driver_you_want_to_run]
+input_path=/input/path/to/convert
+output_path=...
+option_0=...
+option_1=...
+...
+
+```
+
+You can see a template file for how to write a configuration file in `one-build.template.cfg`.
+
+[options to write]
+
+Sometimes you want to change certain options without touching the configuration file. If you
+pass the option directly to the command line, the option is processed prior to the configuration
+file. A list of options can be found in each driver's help message with `-h` option.
+
+e.g.
+```
+$ ./one-import tf -C my-conf.cfg -i path/to/overwrite.pb
+```
+
+
+one-build
+---------
+
+one-build is an integrated driver that can execute one-commands at once. It's nice to run each
+driver individually, but sometimes you'll want to put together the most frequently used commands
+and run them all at once. You can do this with one-build and its configuration file.
+
+For one-build, the configuration file needs 'one-build' section that consists of list of driver.
+
+```
+# one-build.template.cfg
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+...
+
+[one-optimize]
+...
+
+[one-pack]
+...
+
+```
+See 'one-build.template.cfg' for more details.
+
+
 one-import
------------
+----------
 
 one-import will invokes one-import-* commands.
 
@@ -30,14 +97,15 @@ Syntax: one-import [framework] [options]
 Currently supported frameworks are 'tf', 'tflite' for TensorFlow and TensorFlow
 lite.
 
+
 one-import-bcq
--------------
+--------------
 
 This will convert Tensorflow model file (.pb) to our circle model file with applying BCQ.
 To execute this command, original Tensorflow model file must include BCQ information.
 
 This command invokes following scripts internally.
-- preserve_bcq_info : Prevent BCQ information vanishing problem
+- generate_bcq_metadata : Generate BCQ metadata in the model
 - generate_bcq_info : Designate BCQ information nodes as model output automatically
 - tf2tfliteV2 : Convert Tensorflow model to tflite model
 - tflite2circle : Convert Tensorflow Lite model to circle model
@@ -58,7 +126,7 @@ one-import-tf
 This will convert TensorFlow model (.pb) file to our circle model. You can also
 directly call this command. one-import-tf invokes tf2tfliteV2.py script that
 will internally use TensorFlow lite converter and then invoke tflite2circle
-converter to convert tflite model to circle model. 
+converter to convert tflite model to circle model.
 
 As tf2tfliteV2.py runs TensorFlow lite converter, you need to have TensorFlow
 installed in your system. We recommand to use 2.3.0 for now.
@@ -81,16 +149,63 @@ one-optimize
 one-optimize provides network or operator transformation shown below.
 
 Current transformation options are
+- disable_validation : This will turn off operator validations.
+- expand_broadcast_const : This will expand broadcastable constant node inputs
+- fold_add_v2 : This removes AddV2 operation which can be folded
+- fold_cast : This removes Cast operation which can be folded
+- fold_densify: This removes Densify operator which can be folded
+- fold_dequantize : This removes Dequantize operation which can be folded
+- fold_dwconv : This folds Depthwise Convolution operation which can be folded
+- fold_gather : This removes Gather operation which can be folded
+- fold_sparse_to_dense : This removes SparseToDense operation which can be folded
+- forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
+- fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible
+- fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible
+- fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator
+- fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator
+- fuse_batchnorm_with_tconv : This fuses BatchNorm operator to transpose convolution operator
 - fuse_bcq: This enables Binary-Coded-bases Quantized DNNs
    - read https://arxiv.org/abs/2005.09904 for detailed information
 - fuse_instnorm: This will convert instance normalization related operators to
   one InstanceNormalization operator that our onert provides for faster
   execution.
+- fuse_prelu: This will fuse operators to PReLU operator
+- fuse_preactivation_batchnorm: This fuses batch normalization operators of pre-activations to Conv operators.
+- fuse_activation_function: This fuses Activation function to a preceding operator.
+- fuse_mean_with_mean: This fuses two consecutive ReduceMean operations into one.
+- fuse_transpose_with_mean: This fuses ReduceMean with a preceding Transpose under certain conditions.
+- make_batchnorm_gamma_positive: This makes negative gamma of batch normalization into a small positive value (1e-10).
+  Note that this pass can change the execution result of the model.
+  So, use it only when the impact is known to be acceptable.
+- mute_warnings : This will turn off warning messages.
+- generate_profile_data : This will turn on profiling data generation.
+- remove_fakequant : This will remove all fakequant operators.
+- remove_quantdequant : This will remove all Quantize-Dequantize sequence.
+- remove_redundant_quantize : This removes redundant quantize operators.
+- remove_redundant_reshape : This fuses or removes redundant reshape operators.
+- remove_redundant_transpose : This fuses or removes redundant transpose operators.
+- remove_unnecessary_reshape : This removes unnecessary reshape operators.
+- remove_unnecessary_slice : This removes unnecessary slice operators.
+- remove_unnecessary_strided_slice : This removes unnecessary strided slice operators.
+- remove_unnecessary_split : This removes unnecessary split operators.
+- replace_cw_mul_add_with_depthwise_conv: This will replace channel-wise Mul/Add with DepthwiseConv2D.
 - resolve_customop_add: This will convert Custom(Add) to normal Add operator
 - resolve_customop_batchmatmul: This will convert Custom(BatchMatMul) to
   normal BatchMatMul operator
 - resolve_customop_matmul: This will convert Custom(MatMul) to normal MatMul
   operator
+- resolve_customop_max_pool_with_argmax: This will convert Custom(MaxPoolWithArgmax)
+  to net of builtin operators.
+- shuffle_weight_to_16x1float32 : This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32.
+  Note that it only converts weights whose row is a multiple of 16.
+- substitute_pack_to_reshape : This will convert single input Pack to Reshape.
+- substitute_padv2_to_pad : This will convert certain condition PadV2 to Pad.
+- substitute_splitv_to_split : This will convert certain condition SplitV to Split.
+- substitute_squeeze_to_reshape : This will convert certain condition Squeeze to Reshape.
+- substitute_strided_slice_to_reshape : This will convert certain condition StridedSlice to Reshape.
+- substitute_transpose_to_reshape : This will convert certain condition Transpose to Reshape.
+- transform_min_max_to_relu6: This will transform Minimum-Maximum pattern to Relu6 operator.
+- transform_min_relu_to_relu6: This will transform Minimum(6)-Relu pattern to Relu6 operator.
 
 
 one-quantize
diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build
new file mode 100644
index 000000000..b21a1624b
--- /dev/null
+++ b/compiler/one-cmds/one-build
@@ -0,0 +1,190 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import configparser
+import os
+import sys
+
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+# This suppression is applied only to `one-build`
+sys.tracebacklimit = 0
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to run ONE drivers in customized order')
+
+    oneutils.add_default_arg(parser)
+
+    opt_name_list = oneutils.get_optimization_list(get_name=True)
+    opt_name_list = ['-' + s for s in opt_name_list]
+    if not opt_name_list:
+        opt_help_message = '(No available optimization options)'
+    else:
+        opt_help_message = '(Available optimization options: ' + ', '.join(
+            opt_name_list) + ')'
+    opt_help_message = 'optimization name to use ' + opt_help_message
+    parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message)
+
+    return parser
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    if not oneutils.is_valid_attr(args, 'config'):
+        parser.error('-C/--config argument is required')
+    # check if given optimization option exists
+    opt_name_list = oneutils.get_optimization_list(get_name=True)
+    opt_name_list = [oneutils.remove_prefix(s, 'O') for s in opt_name_list]
+    if oneutils.is_valid_attr(args, 'O'):
+        if ' ' in getattr(args, 'O'):
+            parser.error('Not allowed to have space in the optimization name')
+        if not getattr(args, 'O') in opt_name_list:
+            parser.error('Invalid optimization option')
+
+
+def _get_driver_name(driver_name):
+    return {
+        'one-import-bcq': 'one-import-bcq',
+        'one-import-tf': 'one-import-tf',
+        'one-import-tflite': 'one-import-tflite',
+        'one-import-onnx': 'one-import-onnx',
+        'one-optimize': 'one-optimize',
+        'one-quantize': 'one-quantize',
+        'one-partition': 'one-partition',
+        'one-pack': 'one-pack',
+        'one-codegen': 'one-codegen'
+    }[driver_name]
+
+
+def parse_cfg(args):
+    config = configparser.ConfigParser()
+    config.optionxform = str
+    parsed = config.read(os.path.expanduser(getattr(args, 'config')))
+    if not parsed:
+        raise FileNotFoundError('Not found given configuration file')
+    return config
+
+
+def _is_available_driver(config, driver_name):
+    return config.has_option('one-build', driver_name) and config.getboolean(
+        'one-build', driver_name)
+
+
+def _verify_cfg(driver_list, config):
+    if not config.has_section('one-build'):
+        raise ImportError('[one-build] section is required in configuraion file')
+
+    import_driver_cnt = 0
+    if _is_available_driver(config, 'one-import-tf'):
+        import_driver_cnt += 1
+    if _is_available_driver(config, 'one-import-tflite'):
+        import_driver_cnt += 1
+    if _is_available_driver(config, 'one-import-bcq'):
+        import_driver_cnt += 1
+    if _is_available_driver(config, 'one-import-onnx'):
+        import_driver_cnt += 1
+    if import_driver_cnt > 1:
+        raise AssertionError('Only one import-* driver can be executed')
+
+
+# verify given optimization option file
+def _verify_opt(args):
+    if oneutils.is_valid_attr(args, 'O'):
+        config = configparser.ConfigParser()
+        config.optionxform = str
+        opt_name_path_dic = dict(
+            zip(oneutils.get_optimization_list(get_name=True),
+                oneutils.get_optimization_list()))
+        parsed = config.read(opt_name_path_dic['O' + getattr(args, 'O')])
+        # check if given optimization option file exists
+        if not parsed:
+            raise FileNotFoundError('Not found given optimization configuration file')
+        # check if given optimization option file only has `one-optimize` section
+        if len(config.sections()) == 1 and config.sections()[0] == 'one-optimize':
+            pass
+        else:
+            raise AssertionError(
+                'Optimization configuration file only allowed to have a \'one-optimize\' section'
+            )
+
+
+def main():
+    # parse arguments
+    # since the configuration file path is required first,
+    # parsing of the configuration file proceeds after this.
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # parse configuration file
+    config = parse_cfg(args)
+
+    # verify configuration file
+    bin_dir = os.path.dirname(os.path.realpath(__file__))
+    import_drivers_dict = oneutils.detect_one_import_drivers(bin_dir)
+    transform_drivers = [
+        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile',
+        'one-partition'
+    ]
+    _verify_cfg(import_drivers_dict, config)
+
+    # verify optimization option file
+    _verify_opt(args)
+
+    # get sections to run
+    section_to_run = []
+    for d in list(import_drivers_dict) + transform_drivers:
+        if _is_available_driver(config, d):
+            section_to_run.append(d)
+
+    # run
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    for section in section_to_run:
+        if section in import_drivers_dict:
+            # we already has driver name in dict
+            driver_name = import_drivers_dict[section]
+        else:
+            driver_name = _get_driver_name(section)
+        driver_path = os.path.join(dir_path, driver_name)
+        cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section]
+        if section == 'one-optimize' and oneutils.is_valid_attr(args, 'O'):
+            cmd += ['-O', getattr(args, 'O')]
+        oneutils.run(cmd)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-build.template.cfg b/compiler/one-cmds/one-build.template.cfg
new file mode 100644
index 000000000..42960811e
--- /dev/null
+++ b/compiler/one-cmds/one-build.template.cfg
@@ -0,0 +1,28 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-parition=False
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=/path/to/inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+model_format=graph_def
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+generate_profile_data=False
+
+[one-pack]
+input_path=inception_v3.opt.circle
+output_path=inception_v3_pack
diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen
index 820b6d8a3..a956a63e2 100644
--- a/compiler/one-cmds/one-codegen
+++ b/compiler/one-cmds/one-codegen
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
 
 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
 #
@@ -14,53 +19,199 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-function Usage()
-{
-  echo "Usage: one-codegen [BACKEND] ..."
-  echo "Available BACKEND drivers:"
-  backend_exist=0
-  for file in `find $DRIVER_PATH -name *-compile -type f`;
-  do
-    backend_driver=$(basename $file)
-    sub_length=8
-    driver_length=$(expr ${#backend_driver} - ${sub_length})
-    backend=${backend_driver:0:${driver_length}} # 8 is length of "-compile"
-    echo "  $backend"
-    backend_exist=1
-  done
-  if [ $backend_exist == 0 ]; then
-    echo "  (There is no available backend drivers)"
-  fi
-
-  exit 255
-}
-
-function version()
-{
-  $DRIVER_PATH/one-version one-codegen
-  exit 255
-}
-
-# Get command from command-line
-BACKEND=$1
-if [[ -z ${BACKEND} ]]; then
-  Usage
-fi
-shift
-
-if [[ "${BACKEND}" == "--version" ]]; then
-  version
-fi
-
-BACKEND_DRIVER="${BACKEND}-compile"
-
-BACKEND_DRIVER_CMD="${DRIVER_PATH}/${BACKEND_DRIVER}"
-
-if [[ ! -f "${BACKEND_DRIVER_CMD}" ]]; then
-  echo "ERROR: '${BACKEND_DRIVER}' is not supported"
-  Usage
-fi
-
-"${BACKEND_DRIVER_CMD}" "$@"
+import argparse
+import copy
+import glob
+import itertools
+import ntpath
+import os
+import sys
+import shutil
+from types import SimpleNamespace
+
+import onelib.backends as backends
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_parser(backends_list):
+    codegen_usage = 'one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] [--] [COMMANDS FOR BACKEND]'
+    parser = argparse.ArgumentParser(
+        description='command line tool for code generation', usage=codegen_usage)
+
+    oneutils.add_default_arg(parser)
+
+    # get backend list in the directory
+    backends_name = [ntpath.basename(f) for f in backends_list]
+    if not backends_name:
+        backends_name_message = '(There is no available backend drivers)'
+    else:
+        backends_name_message = '(available backend drivers: ' + ', '.join(
+            backends_name) + ')'
+    backend_help_message = 'backend name to use ' + backends_name_message
+    parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
+
+    return parser
+
+
+def _verify_arg(parser, args, cfg_args, backend_args, unknown_args):
+    """verify given arguments"""
+    cmd_backend_exist = oneutils.is_valid_attr(args, 'backend')
+    cfg_backend_exist = oneutils.is_valid_attr(cfg_args, 'backend')
+    cfg_backends_exist = oneutils.is_valid_attr(cfg_args, 'backends')
+
+    # check if required arguments is given
+    missing = []
+    if not cmd_backend_exist and not cfg_backend_exist and not cfg_backends_exist:
+        missing.append('-b/--backend')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+    if not oneutils.is_valid_attr(args, 'config'):
+        if not backend_args and not unknown_args:
+            parser.error('commands for the backend is missing.')
+
+    if cfg_backend_exist and cfg_backends_exist:
+        parser.error(
+            '\'backend\' option and \'backends\' option cannot be used simultaneously.')
+
+    # Check if given backend from command line exists in the configuration file
+    if cmd_backend_exist and cfg_backend_exist:
+        if args.backend != cfg_args.backend:
+            parser.error('Not found the command of given backend')
+
+    if cfg_backend_exist and not oneutils.is_valid_attr(cfg_args, 'command'):
+        parser.error('\'command\' key is missing in the configuration file.')
+
+    if cfg_backends_exist:
+        cfg_backends = getattr(cfg_args, 'backends').split(',')
+        # check if commands of given backends exist
+        for b in cfg_backends:
+            if not oneutils.is_valid_attr(cfg_args, b):
+                parser.error('Not found the command for ' + b)
+
+        # Check if given backend from command line exists in the configuration file
+        if cmd_backend_exist:
+            if args.backend not in cfg_backends:
+                parser.error('Not found the command of given backend')
+
+
+def _parse_arg(parser):
+    codegen_args = []
+    backend_args = []
+    unknown_args = []
+    argv = copy.deepcopy(sys.argv)
+    # delete file name
+    del argv[0]
+    # split by '--'
+    args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+    if len(args) == 0:
+        codegen_args = parser.parse_args(codegen_args)
+    # one-codegen has two interfaces
+    # 1. one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] [COMMANDS FOR BACKEND]
+    if len(args) == 1:
+        codegen_args = args[0]
+        codegen_args, unknown_args = parser.parse_known_args(codegen_args)
+    # 2. one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] -- [COMMANDS FOR BACKEND]
+    if len(args) == 2:
+        codegen_args = args[0]
+        backend_args = args[1]
+        codegen_args = parser.parse_args(codegen_args)
+    # print version
+    if len(args) and codegen_args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return codegen_args, backend_args, unknown_args
+
+
+def main():
+    # get backend list
+    backends_list = backends.get_list('compile')
+
+    # parse arguments
+    parser = _get_parser(backends_list)
+    args, backend_args, unknown_args = _parse_arg(parser)
+
+    # parse configuration file
+    cfg_args = SimpleNamespace()
+    oneutils.parse_cfg(args.config, 'one-codegen', cfg_args)
+
+    # parse configuration file (args has arguments parsed from command line + cfg)
+    # oneutils.parse_cfg(args.config, 'one-codegen', args)
+
+    # verify arguments
+    _verify_arg(parser, args, cfg_args, backend_args, unknown_args)
+    '''
+    one-codegen defines its behavior for below cases.
+
+    [1] one-codegen -h
+    [2] one-codegen -v
+    [3] one-codegen -C ${cfg} (backend, command key in cfg)
+    [4] one-codegen -C ${cfg} (backends key in cfg)
+    [5] one-codegen -b ${backend} ${command}
+    [6] one-codegen -b ${backend} -- ${command}
+    [7] one-codegen -b ${backend} -C {cfg} (backend, command key in cfg)
+    [8] one-codegen -b ${backend} -C {cfg} (backends key in cfg) (Only 'backend' is invoked, 
+         even though cfg file has multiple backends)
+    [9] one-codegen -b ${backend} -C ${cfg} -- ${command} (backend, command key in cfg) 
+    [10] one-codegen -b ${backend} -C ${cfg} -- ${command} (backends key in cfg) (Only 'backend' is invoked, 
+         even though cfg file has multiple backends)
+
+    All other cases are not allowed or an undefined behavior.
+    '''
+    cmd_overwrite = False
+    if oneutils.is_valid_attr(args, 'config'):
+        # [9], [10]
+        if backend_args and not unknown_args:
+            given_backends = [args.backend]
+            cmd_overwrite = True
+        else:
+            # [7], [8]
+            if oneutils.is_valid_attr(args, 'backend'):
+                given_backends = [args.backend]
+                if oneutils.is_valid_attr(cfg_args, 'backend'):
+                    assert (oneutils.is_valid_attr(cfg_args, 'command'))
+                    setattr(cfg_args, args.backend, cfg_args.command)
+            else:
+                # [3]
+                if oneutils.is_valid_attr(cfg_args, 'backend'):
+                    assert (oneutils.is_valid_attr(cfg_args, 'command'))
+                    given_backends = [cfg_args.backend]
+                    setattr(cfg_args, cfg_args.backend, cfg_args.command)
+                # [4]
+                if oneutils.is_valid_attr(cfg_args, 'backends'):
+                    given_backends = cfg_args.backends.split(',')
+    # [5], [6]
+    else:
+        assert (backend_args or unknown_args)
+        given_backends = [args.backend]
+
+    for given_backend in given_backends:
+        # make a command to run given backend driver
+        codegen_path = None
+        backend_base = given_backend + '-compile'
+        for cand in backends_list:
+            if ntpath.basename(cand) == backend_base:
+                codegen_path = cand
+        if not codegen_path:
+            # Find backend from system path
+            codegen_path = shutil.which(backend_base)
+
+        if not codegen_path:
+            raise FileNotFoundError(backend_base + ' not found')
+
+        codegen_cmd = [codegen_path]
+        if not cmd_overwrite and oneutils.is_valid_attr(cfg_args, given_backend):
+            codegen_cmd += getattr(cfg_args, given_backend).split()
+        else:
+            codegen_cmd += backend_args
+            codegen_cmd += unknown_args
+
+        # run backend driver
+        oneutils.run(codegen_cmd, err_prefix=backend_base)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-import b/compiler/one-cmds/one-import
index b1dd8f4c3..880b8311d 100644
--- a/compiler/one-cmds/one-import
+++ b/compiler/one-cmds/one-import
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
 
 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
 #
@@ -14,51 +19,85 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-function Usage()
-{
-  echo "Usage: one-import [FRAMEWORK] ..."
-  echo "Available FRAMEWORK drivers:"
-  framework_exist=0
-  for file in "$DRIVER_PATH"/one-import-*;
-  do
-    framework_driver=$(basename $file)
-    framework=${framework_driver:11} # 11 is length of "one-import-"
-    echo "  $framework"
-    framework_exist=1
-  done
-  if [ $framework_exist == 0 ]; then
-    echo "  (There is no available import drivers)"
-  fi
-
-  exit 255
-}
-
-function version()
-{
-  $DRIVER_PATH/one-version one-import-tf
-  exit 255
-}
-
-# Get command from command-line
-FRAMEWORK=$1
-if [[ -z ${FRAMEWORK} ]]; then
-  Usage
-fi
-shift
-
-if [ ${FRAMEWORK} = "--version" ]; then
-  version
-fi
-
-FRAMEWORK_DRIVER="one-import-$FRAMEWORK"
-
-FRAMEWORK_DRIVER_CMD="${DRIVER_PATH}/${FRAMEWORK_DRIVER}"
-
-if [[ ! -f "${FRAMEWORK_DRIVER_CMD}" ]]; then
-  echo "ERROR: '${FRAMEWORK_DRIVER}' is not supported"
-  Usage
-fi
-
-"${FRAMEWORK_DRIVER_CMD}" "$@"
+import argparse
+import os
+import subprocess
+import sys
+
+import onelib.utils as oneutils
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert various format to circle',
+        formatter_class=argparse.RawTextHelpFormatter)
+
+    # configuration file
+    parser.add_argument('-C', '--config', type=str, help='run with configuation file')
+
+    # driver
+    parser.add_argument(
+        'driver', type=str, help='driver name to run (supported: tf, tflite,' \
+        ' bcq, onnx)')
+
+    # version
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    one_version_path = os.path.join(dir_path, 'one-version')
+    version = subprocess.check_output([one_version_path]).decode('utf-8')
+    version_str = '\n'.join(['one-import version {}'.format(version), \
+    'Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved', \
+    'Licensed under the Apache License, Version 2.0', \
+    'https://github.com/Samsung/ONE'])
+    parser.add_argument('-v', '--version', action='version', version=version_str)
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    pass
+    # TODO verify arguments
+
+
+def _parse_arg(parser):
+    args, unknown_args = parser.parse_known_args()
+
+    return args, unknown_args
+
+
+def _get_driver_name(driver_name):
+    return {
+        'bcq': 'one-import-bcq',
+        'tf': 'one-import-tf',
+        'tflite': 'one-import-tflite',
+        'onnx': 'one-import-onnx',
+    }[driver_name]
+
+
+def _convert(args, unknown_args):
+    # get directory path
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    # make cmd
+    cmd = [sys.executable, os.path.join(dir_path, _get_driver_name(args.driver))]
+    if oneutils.is_valid_attr(args, 'config'):
+        cmd.append('--config')
+        cmd.append(os.path.expanduser(args.config))
+    return_code = subprocess.call(cmd + unknown_args)
+    if return_code != 0:
+        sys.exit(return_code)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args, unknown_args = _parse_arg(parser)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # convert
+    _convert(args, unknown_args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq
index 98dd1efed..fc0f75cc8 100644
--- a/compiler/one-cmds/one-import-bcq
+++ b/compiler/one-cmds/one-import-bcq
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
 
 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
 #
@@ -14,137 +19,186 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -e
-
-DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-usage()
-{
-  echo "Convert TensorFlow model with BCQ to circle."
-  echo "Usage: one-import-bcq"
-  echo "    --version Show version information and exit"
-  echo "    --input_path <path/to/tfmodel/with/BCQ>"
-  echo "    --output_path <path/to/circle>"
-  echo "    --input_arrays <names of the input arrays, comma-separated>"
-  echo "    --input_shapes <input shapes, colon-separated>"
-  echo "    --output_arrays <names of the output arrays, comma-separated>"
-  echo "    --v2 Use TensorFlow 2.x interface (default is 1.x interface)"
-  exit 255
-}
-
-version()
-{
-  $DRIVER_PATH/one-version one-import-bcq
-  exit 255
-}
-
-TF_INTERFACE="--v1"
-
-# Parse command-line arguments
-#
-while [ "$#" -ne 0 ]; do
-  CUR="$1"
-
-  case $CUR in
-    '--help')
-      usage
-      ;;
-    '--version')
-      version
-      ;;
-    '--input_path')
-      export INPUT_PATH="$2"
-      shift 2
-      ;;
-    '--output_path')
-      export OUTPUT_PATH="$2"
-      shift 2
-      ;;
-    '--input_arrays')
-      export INPUT_ARRAYS="$2"
-      shift 2
-      ;;
-    '--input_shapes')
-      export INPUT_SHAPES="$2"
-      shift 2
-      ;;
-    '--output_arrays')
-      export OUTPUT_ARRAYS="$2"
-      shift 2
-      ;;
-    '--v2')
-      TF_INTERFACE="--v2"
-      shift
-      ;;
-    *)
-      echo "Unknown parameter: ${CUR}"
-      shift
-      ;;
-  esac
-done
-
-if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
-  echo "Error: input model not found"
-  echo ""
-  usage
-  exit 2
-fi
-
-FILE_BASE=$(basename ${OUTPUT_PATH})
-MODEL_NAME="${FILE_BASE%.*}"
-
-TMPDIR=$(mktemp -d)
-trap "{ rm -rf $TMPDIR; }" EXIT
-
-# activate python virtual environment
-VIRTUALENV_LINUX="${DRIVER_PATH}/venv/bin/activate"
-VIRTUALENV_WINDOWS="${DRIVER_PATH}/venv/Scripts/activate"
-
-if [ -e ${VIRTUALENV_LINUX} ]; then
-  source ${VIRTUALENV_LINUX}
-elif [ -e ${VIRTUALENV_WINDOWS} ]; then
-  source ${VIRTUALENV_WINDOWS}
-fi
-
-# remove previous log
-rm -rf "${OUTPUT_PATH}.log"
-
-# generate temporary preserved pb file
-echo "${DRIVER_PATH}/preserve_bcq_info" --input_path ${INPUT_PATH} \
---output_path "${TMPDIR}/${MODEL_NAME}_preserved.pb"  > "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/preserve_bcq_info" --input_path ${INPUT_PATH} \
---output_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" >> "${OUTPUT_PATH}.log" 2>&1
-
-# generate output_arrays automatically
-echo "${DRIVER_PATH}/generate_bcq_output_arrays" \
---input_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" \
---output_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt" > "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/generate_bcq_output_arrays" \
---input_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" \
---output_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt" >> "${OUTPUT_PATH}.log" 2>&1
-
-# generate temporary tflite file
-CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} "
-CONVERT_SCRIPT+="--input_path ${TMPDIR}/${MODEL_NAME}_preserved.pb "
-CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} "
-CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
-CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS}$(cat ${TMPDIR}/${MODEL_NAME}_output_arrays.txt) "
-if [ ! -z ${INPUT_SHAPES} ]; then
-  CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
-fi
-
-echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log"
-$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1
-
-# convert .tflite to .circle
-echo " " >> "${OUTPUT_PATH}.log"
-echo "${DRIVER_PATH}/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" \
-"${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" \
-"${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
+import argparse
+import os
+import sys
+import tempfile
+
+import onelib.make_cmd as _make_cmd
+import onelib.utils as oneutils
+import generate_bcq_output_arrays as _bcq_info_gen
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def get_driver_cfg_section():
+    return "one-import-bcq"
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert TensorFlow with BCQ to circle')
+
+    oneutils.add_default_arg(parser)
+
+    ## tf2tfliteV2 arguments
+    tf2tfliteV2_group = parser.add_argument_group('converter arguments')
+
+    # converter version
+    converter_version = tf2tfliteV2_group.add_mutually_exclusive_group()
+    converter_version.add_argument(
+        '--v1',
+        action='store_const',
+        dest='converter_version_cmd',
+        const='--v1',
+        help='use TensorFlow Lite Converter 1.x')
+    converter_version.add_argument(
+        '--v2',
+        action='store_const',
+        dest='converter_version_cmd',
+        const='--v2',
+        help='use TensorFlow Lite Converter 2.x')
+
+    parser.add_argument('--converter_version', type=str, help=argparse.SUPPRESS)
+
+    # input and output path.
+    tf2tfliteV2_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    tf2tfliteV2_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    # input and output arrays.
+    tf2tfliteV2_group.add_argument(
+        '-I',
+        '--input_arrays',
+        type=str,
+        help='names of the input arrays, comma-separated')
+    tf2tfliteV2_group.add_argument(
+        '-s',
+        '--input_shapes',
+        type=str,
+        help=
+        'shapes corresponding to --input_arrays, colon-separated (ex:"1,4,4,3:1,20,20,3")'
+    )
+    tf2tfliteV2_group.add_argument(
+        '-O',
+        '--output_arrays',
+        type=str,
+        help='names of the output arrays, comma-separated')
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _make_generate_bcq_metadata_cmd(args, driver_path, output_path):
+    """make a command for running generate_bcq_metadata"""
+    cmd = [sys.executable, driver_path]
+    # input_path
+    if oneutils.is_valid_attr(args, 'input_path'):
+        cmd.append('--input_path')
+        cmd.append(os.path.expanduser(getattr(args, 'input_path')))
+    # output_path
+    if oneutils.is_valid_attr(args, 'output_path'):
+        cmd.append('--output_path')
+        cmd.append(os.path.expanduser(output_path))
+    # output_arrays
+    if oneutils.is_valid_attr(args, 'output_arrays'):
+        cmd.append('--output_arrays')
+        cmd.append(getattr(args, 'output_arrays'))
+
+    return cmd
+
+
+def _convert(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        # make a command to generate BCQ information metadata
+        generate_bcq_metadata_path = os.path.join(dir_path, 'generate_bcq_metadata.py')
+        generate_bcq_metadata_output_path = os.path.join(
+            tmpdir,
+            os.path.splitext(os.path.basename(args.input_path))[0] + '_withmeta.pb')
+        generate_bcq_metadata_cmd = _make_generate_bcq_metadata_cmd(
+            args, generate_bcq_metadata_path, generate_bcq_metadata_output_path)
+
+        f.write((' '.join(generate_bcq_metadata_cmd) + '\n').encode())
+
+        # generate BCQ information metadata
+        oneutils.run(generate_bcq_metadata_cmd, logfile=f)
+
+        # get output_arrays with BCQ
+        bcq_output_arrays = _bcq_info_gen.get_bcq_output_arrays(
+            generate_bcq_metadata_output_path, getattr(args, 'output_arrays'))
+
+        # make a command to convert from tf with BCQ to tflite
+        tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
+        tf2tfliteV2_output_path = os.path.join(
+            tmpdir,
+            os.path.splitext(
+                os.path.basename(generate_bcq_metadata_output_path))[0]) + '.tflite'
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
+            args, tf2tfliteV2_path, generate_bcq_metadata_output_path,
+            tf2tfliteV2_output_path)
+        try:
+            output_arrays_idx = tf2tfliteV2_cmd.index('--output_arrays')
+            tf2tfliteV2_cmd[output_arrays_idx + 1] = ','.join(bcq_output_arrays)
+        except ValueError:
+            pass
+
+        f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
+
+        # convert tf to tflite
+        oneutils.run(tf2tfliteV2_cmd, logfile=f)
+
+        # make a command to convert from tflite to circle
+        tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
+                                                             tf2tfliteV2_output_path,
+                                                             getattr(args, 'output_path'))
+
+        f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+        # convert tflite to circle
+        oneutils.run(tflite2circle_cmd, logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-import-bcq', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # _convert
+    _convert(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx
new file mode 100644
index 000000000..b9c773b06
--- /dev/null
+++ b/compiler/one-cmds/one-import-onnx
@@ -0,0 +1,349 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import sys
+import tempfile
+import onnx
+import onnx_tf
+
+# ONNX legalizer is an optional feature
+# It enables conversion of some operations, but in experimental phase for now
+try:
+    import onnx_legalizer
+    _onnx_legalizer_enabled = True
+except ImportError:
+    _onnx_legalizer_enabled = False
+
+import onelib.make_cmd as _make_cmd
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+# Class to rename input/output to prevent issues while import ONNX models
+class TidyIONames:
+    def __init__(self, onnx_model):
+        self.input_nodes = []
+        self.output_nodes = []
+        self.remap_inputs = []
+        self.remap_outputs = []
+        self.initializers = []
+        self.onnx_model = onnx_model
+        # some models may have initializers as inputs. ignore them.
+        for initializer in onnx_model.graph.initializer:
+            self.initializers.append(initializer.name)
+
+    def order(self):
+        for idx in range(0, len(self.onnx_model.graph.input)):
+            name = self.onnx_model.graph.input[idx].name
+            if not name in self.initializers:
+                self.input_nodes.append(name)
+                self.remap_inputs.append('i_' + format(idx + 1, '04d') + '_' + name)
+        for idx in range(0, len(self.onnx_model.graph.output)):
+            name = self.onnx_model.graph.output[idx].name
+            self.output_nodes.append(name)
+            self.remap_outputs.append('o_' + format(idx + 1, '04d') + '_' + name)
+
+    # exclude special characters in names
+    def sanitize(self):
+        for idx in range(0, len(self.onnx_model.graph.input)):
+            name = self.onnx_model.graph.input[idx].name
+            if not name in self.initializers:
+                if '.' in name or ':' in name or name[:1].isdigit():
+                    self.input_nodes.append(name)
+                    name_alt = name.replace('.', '_')
+                    name_alt = name_alt.replace(':', '_')
+                    if name_alt[:1].isdigit():
+                        name_alt = 'a_' + name_alt
+                    self.remap_inputs.append(name_alt)
+        for idx in range(0, len(self.onnx_model.graph.output)):
+            name = self.onnx_model.graph.output[idx].name
+            if '.' in name or ':' in name or name[:1].isdigit():
+                self.output_nodes.append(name)
+                name_alt = name.replace('.', '_')
+                name_alt = name_alt.replace(':', '_')
+                if name_alt[:1].isdigit():
+                    name_alt = 'a_' + name_alt
+                self.remap_outputs.append(name_alt)
+
+    def update(self):
+        # change names for graph input
+        for i in range(len(self.onnx_model.graph.input)):
+            if self.onnx_model.graph.input[i].name in self.input_nodes:
+                to_rename = self.onnx_model.graph.input[i].name
+                idx = self.input_nodes.index(to_rename)
+                self.onnx_model.graph.input[i].name = self.remap_inputs[idx]
+        # change names of all nodes in the graph
+        for i in range(len(self.onnx_model.graph.node)):
+            # check node.input is to change to remap_inputs or remap_outputs
+            for j in range(len(self.onnx_model.graph.node[i].input)):
+                if self.onnx_model.graph.node[i].input[j] in self.input_nodes:
+                    to_rename = self.onnx_model.graph.node[i].input[j]
+                    idx = self.input_nodes.index(to_rename)
+                    self.onnx_model.graph.node[i].input[j] = self.remap_inputs[idx]
+                if self.onnx_model.graph.node[i].input[j] in self.output_nodes:
+                    to_rename = self.onnx_model.graph.node[i].input[j]
+                    idx = self.output_nodes.index(to_rename)
+                    self.onnx_model.graph.node[i].input[j] = self.remap_outputs[idx]
+            # check node.output is to change to remap_inputs or remap_outputs
+            for j in range(len(self.onnx_model.graph.node[i].output)):
+                if self.onnx_model.graph.node[i].output[j] in self.output_nodes:
+                    to_rename = self.onnx_model.graph.node[i].output[j]
+                    idx = self.output_nodes.index(to_rename)
+                    self.onnx_model.graph.node[i].output[j] = self.remap_outputs[idx]
+                if self.onnx_model.graph.node[i].output[j] in self.input_nodes:
+                    to_rename = self.onnx_model.graph.node[i].output[j]
+                    idx = self.input_nodes.index(to_rename)
+                    self.onnx_model.graph.node[i].output[j] = self.remap_inputs[idx]
+        # change names for graph output
+        for i in range(len(self.onnx_model.graph.output)):
+            if self.onnx_model.graph.output[i].name in self.output_nodes:
+                to_rename = self.onnx_model.graph.output[i].name
+                idx = self.output_nodes.index(to_rename)
+                self.onnx_model.graph.output[i].name = self.remap_outputs[idx]
+
+
+def get_driver_cfg_section():
+    return "one-import-onnx"
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert ONNX to circle')
+
+    oneutils.add_default_arg(parser)
+
+    ## tf2tfliteV2 arguments
+    tf2tfliteV2_group = parser.add_argument_group('converter arguments')
+
+    # input and output path.
+    tf2tfliteV2_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    tf2tfliteV2_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    # input and output arrays.
+    tf2tfliteV2_group.add_argument(
+        '-I',
+        '--input_arrays',
+        type=str,
+        help='names of the input arrays, comma-separated')
+    tf2tfliteV2_group.add_argument(
+        '-O',
+        '--output_arrays',
+        type=str,
+        help='names of the output arrays, comma-separated')
+
+    # fixed options
+    tf2tfliteV2_group.add_argument('--model_format', default='saved_model')
+    tf2tfliteV2_group.add_argument('--converter_version', default='v2')
+
+    parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
+    parser.add_argument(
+        '--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+    parser.add_argument(
+        '--keep_io_order',
+        action='store_true',
+        help=
+        'Ensure generated circle model preserves the I/O order of the original onnx model.'
+    )
+
+    # save intermediate file(s)
+    parser.add_argument(
+        '--save_intermediate',
+        action='store_true',
+        help='Save intermediate files to output folder')
+
+    # experimental options
+    parser.add_argument(
+        '--experimental_disable_batchmatmul_unfold',
+        action='store_true',
+        help='Experimental disable BatchMatMul unfold')
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _apply_verbosity(verbosity):
+    # NOTE
+    # TF_CPP_MIN_LOG_LEVEL
+    #   0 : INFO + WARNING + ERROR + FATAL
+    #   1 : WARNING + ERROR + FATAL
+    #   2 : ERROR + FATAL
+    #   3 : FATAL
+    if verbosity:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
+    else:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+
+
+# TF2.12.1 tries to sanitize special characters, '.:' and maybe others and then fails with
+# 'IndexError: tuple index out of range' error from somewhere else.
+# This method is to prevent this IndexError.
+def _sanitize_io_names(onnx_model):
+    sanitizer = TidyIONames(onnx_model)
+    sanitizer.sanitize()
+    sanitizer.update()
+
+
+# The index of input/output is added in front of the name. For example,
+# Original input names: 'a', 'c', 'b'
+# Renamed: 'i_0001_a', 'i_0002_c', 'i_0003_b'
+# This will preserve I/O order after import.
+def _remap_io_names(onnx_model):
+    # gather existing name of I/O and generate new name of I/O in sort order
+    remapper = TidyIONames(onnx_model)
+    remapper.order()
+    remapper.update()
+
+
+def _check_ext():
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    ext_path = os.path.join(dir_path, 'one-import-onnx-ext')
+    if (os.path.isfile(ext_path)):
+        return ext_path
+    return None
+
+
+def _convert(args):
+    _apply_verbosity(args.verbose)
+
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+    ext_path = _check_ext()
+
+    with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        # save intermediate
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
+            tmpdir = os.path.dirname(logfile_path)
+        # convert onnx to tf saved model
+        onnx_model = onnx.load(getattr(args, 'input_path'))
+        _sanitize_io_names(onnx_model)
+        if _onnx_legalizer_enabled:
+            options = onnx_legalizer.LegalizeOptions
+            options.unroll_rnn = oneutils.is_valid_attr(args, 'unroll_rnn')
+            options.unroll_lstm = oneutils.is_valid_attr(args, 'unroll_lstm')
+            onnx_legalizer.legalize(onnx_model, options)
+        if oneutils.is_valid_attr(args, 'keep_io_order'):
+            _remap_io_names(onnx_model)
+            if oneutils.is_valid_attr(args, 'save_intermediate'):
+                basename = os.path.basename(getattr(args, 'input_path'))
+                fixed_path = os.path.join(tmpdir,
+                                          os.path.splitext(basename)[0] + '~.onnx')
+                onnx.save(onnx_model, fixed_path)
+
+        if ext_path:
+            # save onnx_model to temporary alt file
+            basename = os.path.basename(getattr(args, 'input_path'))
+            alt_path = os.path.join(tmpdir, os.path.splitext(basename)[0] + '-alt.onnx')
+            onnx.save(onnx_model, alt_path)
+
+            # call extension with options
+            ext_cmd = [ext_path]
+            if oneutils.is_valid_attr(args, 'unroll_rnn'):
+                ext_cmd.append('--unroll_rnn')
+            if oneutils.is_valid_attr(args, 'unroll_lstm'):
+                ext_cmd.append('--unroll_lstm')
+            if oneutils.is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'):
+                ext_cmd.append('--experimental_disable_batchmatmul_unfold')
+            if oneutils.is_valid_attr(args, 'save_intermediate'):
+                ext_cmd.append('--save_intermediate')
+            if oneutils.is_valid_attr(args, 'keep_io_order'):
+                ext_cmd.append('--keep_io_order')
+            ext_cmd.append(alt_path)
+            ext_cmd.append(getattr(args, 'output_path'))
+            oneutils.run(ext_cmd, logfile=f)
+            return
+
+        tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
+
+        savedmodel_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.savedmodel'
+        savedmodel_output_path = os.path.join(tmpdir, savedmodel_name)
+        tf_savedmodel.export_graph(savedmodel_output_path)
+
+        # make a command to convert from tf to tflite
+        tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
+        tf2tfliteV2_output_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.tflite'
+        tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name)
+
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
+            args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path)
+
+        f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
+
+        # convert tf to tflite
+        oneutils.run(tf2tfliteV2_cmd, logfile=f)
+
+        # make a command to convert from tflite to circle
+        tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
+                                                             tf2tfliteV2_output_path,
+                                                             getattr(args, 'output_path'))
+
+        f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+        # convert tflite to circle
+        oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-import-onnx', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # convert
+    _convert(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-import-pytorch b/compiler/one-cmds/one-import-pytorch
new file mode 100644
index 000000000..f0dbf4146
--- /dev/null
+++ b/compiler/one-cmds/one-import-pytorch
@@ -0,0 +1,368 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import importlib.machinery
+import importlib.util
+import inspect
+import os
+import sys
+import tempfile
+import torch
+import onnx
+import onnx_tf
+import json
+import zipfile
+
+import onnx_legalizer
+import onelib.make_cmd as _make_cmd
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def get_driver_spec():
+    return ("one-import-pytorch", oneutils.DriverType.IMPORTER)
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert PyTorch to Circle')
+
+    oneutils.add_default_arg(parser)
+
+    ## converter arguments
+    converter_group = parser.add_argument_group('converter arguments')
+
+    # input and output path.
+    converter_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    converter_group.add_argument(
+        '-p', '--python_path', type=str, help='full filepath of the python model file')
+    converter_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    # input arrays.
+    converter_group.add_argument(
+        '-s',
+        '--input_shapes',
+        type=str,
+        help=
+        'Shapes corresponding to --input_arrays, colon-separated.(ex:\"1,4,4,3:1,20,20,3\")'
+    )
+    converter_group.add_argument(
+        '-t',
+        '--input_types',
+        type=str,
+        help='data types of input tensors, colon-separated (ex: float32, uint8, int32)')
+
+    # fixed options
+    tf2tflite_group = parser.add_argument_group('tf2tfliteV2 arguments')
+    tf2tflite_group.add_argument('--model_format', default='saved_model')
+    tf2tflite_group.add_argument('--converter_version', default='v2')
+
+    parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
+    parser.add_argument(
+        '--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+
+    # save intermediate file(s)
+    parser.add_argument(
+        '--save_intermediate',
+        action='store_true',
+        help='Save intermediate files to output folder')
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if not oneutils.is_valid_attr(args, 'input_shapes'):
+        missing.append('-s/--input_shapes')
+    if not oneutils.is_valid_attr(args, 'input_types'):
+        missing.append('-t/--input_types')
+
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _apply_verbosity(verbosity):
+    # NOTE
+    # TF_CPP_MIN_LOG_LEVEL
+    #   0 : INFO + WARNING + ERROR + FATAL
+    #   1 : WARNING + ERROR + FATAL
+    #   2 : ERROR + FATAL
+    #   3 : FATAL
+    if verbosity:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
+    else:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+
+
+def _parse_shapes(shapes_str):
+    shapes = []
+    for shape_str in shapes_str.split(":"):
+        if shape_str != "":
+            shapes += [list(map(int, shape_str.split(",")))]
+        else:
+            shapes += [[]]
+    return shapes
+
+
+def _parse_types(types_str):
+    # There are no convenient way to create torch from string ot numpy dtype, so using this workaround
+    dtype_dict = {
+        "bool": torch.bool,
+        "uint8": torch.uint8,
+        "int8": torch.int8,
+        "int16": torch.int16,
+        "int32": torch.int32,
+        "int64": torch.int64,
+        "float16": torch.float16,
+        "float32": torch.float32,
+        "float64": torch.float64,
+        "complex64": torch.complex64,
+        "complex128": torch.complex128
+    }
+    array = types_str.split(",")
+    types = [dtype_dict[type_str.strip()] for type_str in array]
+    return types
+
+
+# merge contents of module into global namespace
+def _merge_module(module):
+    # is there an __all__?  if so respect it
+    if "__all__" in module.__dict__:
+        names = module.__dict__["__all__"]
+    else:
+        # otherwise we import all names that don't begin with _
+        names = [x for x in module.__dict__ if not x.startswith("_")]
+    globals().update({k: getattr(module, k) for k in names})
+
+
+def _list_classes_from_module(module):
+    # Parsing the module to get all defined classes
+    is_member = lambda member: inspect.isclass(member) and member.__module__ == module.__name__
+    classes = [cls[1] for cls in inspect.getmembers(module, is_member)]
+    return classes
+
+
+def _extract_pytorch_model(log_file, parameters_path, python_path):
+    log_file.write(('Trying to load saved model\n').encode())
+    python_model_path = os.path.abspath(python_path)
+    module_name = os.path.basename(python_model_path)
+    module_dir = os.path.dirname(python_model_path)
+    sys.path.append(module_dir)
+    log_file.write(('Trying to load given python module\n').encode())
+    module_loader = importlib.machinery.SourceFileLoader(module_name, python_model_path)
+    module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+    python_model_module = importlib.util.module_from_spec(module_spec)
+
+    try:
+        module_loader.exec_module(python_model_module)
+    except:
+        raise ValueError('Failed to execute given python model file')
+
+    log_file.write(('Model python module is loaded\n').encode())
+    try:
+        # this branch assumes this parameters_path contains state_dict
+        state_dict = torch.load(parameters_path)
+        log_file.write(('Trying to find model class and fill it`s state dict\n').encode())
+        model_class_definitions = _list_classes_from_module(python_model_module)
+        if len(model_class_definitions) != 1:
+            raise ValueError("Expected only one class as model definition. {}".format(
+                model_class_definitions))
+        pytorch_model_class = model_class_definitions[0]
+        model = pytorch_model_class()
+        model.load_state_dict(state_dict)
+        return model
+    except:
+        # this branch assumes this parameters_path contains "entire" model
+        _merge_module(python_model_module)
+        log_file.write(('Model python module is merged into main environment\n').encode())
+        model = torch.load(parameters_path)
+        log_file.write(('Pytorch model loaded\n').encode())
+        return model
+
+
+def _extract_torchscript_model(log_file, input_path):
+    # assuming this is a pytorch script
+    log_file.write(('Trying to load TorchScript model\n').encode())
+    try:
+        pytorch_model = torch.jit.load(input_path)
+        return pytorch_model
+    except RuntimeError as e:
+        log_file.write((str(e) + '\n').encode())
+        log_file.write(
+            'Failed to import input file. Maybe this it contains only weights? Try pass "python_path" argument\n'.
+            encode())
+        raise
+    log_file.write(('TorchScript model is loaded\n').encode())
+
+
+def _extract_mar_model(log_file, tmpdir, input_path):
+    mar_dir_path = os.path.join(tmpdir, 'mar')
+    with zipfile.ZipFile(input_path) as zip_input:
+        zip_input.extractall(path=mar_dir_path)
+    manifest_path = os.path.join(mar_dir_path, 'MAR-INF/MANIFEST.json')
+    with open(manifest_path) as manifest_file:
+        manifest = json.load(manifest_file)
+    serialized_file = os.path.join(mar_dir_path, manifest['model']['serializedFile'])
+    if 'modelFile' in manifest['model']:
+        model_file = os.path.join(mar_dir_path, manifest['model']['modelFile'])
+        return _extract_pytorch_model(log_file, serialized_file, model_file)
+    else:
+        return _extract_torchscript_model(log_file, serialized_file)
+
+
+def _convert(args):
+    _apply_verbosity(args.verbose)
+
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+    with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        # save intermediate
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
+            tmpdir = os.path.dirname(logfile_path)
+        # convert pytorch to onnx model
+        input_path = getattr(args, 'input_path')
+        model_file = getattr(args, 'python_path')
+
+        if input_path[-4:] == '.mar':
+            pytorch_model = _extract_mar_model(f, tmpdir, input_path)
+        elif model_file is None:
+            pytorch_model = _extract_torchscript_model(f, input_path)
+        else:
+            pytorch_model = _extract_pytorch_model(f, input_path, model_file)
+
+        input_shapes = _parse_shapes(getattr(args, 'input_shapes'))
+        input_types = _parse_types(getattr(args, 'input_types'))
+
+        if len(input_shapes) != len(input_types):
+            raise ValueError('number of input shapes and input types must be equal')
+
+        sample_inputs = []
+        for input_spec in zip(input_shapes, input_types):
+            sample_inputs += [torch.ones(input_spec[0], dtype=input_spec[1])]
+
+        f.write(('Trying to inference loaded model').encode())
+        sample_outputs = pytorch_model(*sample_inputs)
+        f.write(('Acquired sample outputs\n').encode())
+
+        onnx_output_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.onnx'
+        onnx_output_path = os.path.join(tmpdir, onnx_output_name)
+
+        onnx_saved = False
+        # some operations are not supported in early opset versions, try several
+        for onnx_opset_version in range(9, 15):
+            f.write(('Trying to save onnx model using opset version ' +
+                     str(onnx_opset_version) + '\n').encode())
+            try:
+                torch.onnx.export(
+                    pytorch_model,
+                    tuple(sample_inputs),
+                    onnx_output_path,
+                    example_outputs=sample_outputs,
+                    opset_version=onnx_opset_version)
+                onnx_saved = True
+                break
+            except:
+                f.write(('attempt failed\n').encode())
+
+        if not onnx_saved:
+            raise ValueError('Failed to save temporary onnx model')
+
+        # convert onnx to tf saved mode
+        onnx_model = onnx.load(onnx_output_path)
+
+        options = onnx_legalizer.LegalizeOptions()
+        options.unroll_rnn = oneutils.is_valid_attr(args, 'unroll_rnn')
+        options.unroll_lstm = oneutils.is_valid_attr(args, 'unroll_lstm')
+        onnx_legalizer.legalize(onnx_model, options)
+
+        tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
+
+        savedmodel_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.savedmodel'
+        savedmodel_output_path = os.path.join(tmpdir, savedmodel_name)
+        tf_savedmodel.export_graph(savedmodel_output_path)
+
+        # make a command to convert from tf to tflite
+        tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
+        tf2tfliteV2_output_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.tflite'
+        tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name)
+
+        del args.input_shapes
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
+            args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path)
+
+        f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
+
+        # convert tf to tflite
+        oneutils.run(tf2tfliteV2_cmd, logfile=f)
+
+        # make a command to convert from tflite to circle
+        tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
+                                                             tf2tfliteV2_output_path,
+                                                             getattr(args, 'output_path'))
+
+        f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+        # convert tflite to circle
+        oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-import-pytorch', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # convert
+    _convert(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf
index 58c686882..75d19680d 100644
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
 
 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
 #
@@ -14,131 +19,177 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -e
-
-DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-usage()
-{
-  echo "Convert TensorFlow model to circle."
-  echo "Usage: one-import-tf"
-  echo "    --version Show version information and exit"
-  echo "    --input_path <path/to/tfmodel>"
-  echo "    --output_path <path/to/circle>"
-  echo "    --input_arrays <names of the input arrays, comma-separated>"
-  echo "    --input_shapes <input shapes, colon-separated>"
-  echo "    --output_arrays <names of the output arrays, comma-separated>"
-  echo "    --v2 Use TensorFlow 2.x interface (default is 1.x interface)"
-  exit 255
-}
-
-version()
-{
-  $DRIVER_PATH/one-version one-import-tf
-  exit 255
-}
-
-TF_INTERFACE="--v1"
-
-# Parse command-line arguments
-#
-while [ "$#" -ne 0 ]; do
-  CUR="$1"
-
-  case $CUR in
-    '--help')
-      usage
-      ;;
-    '--version')
-      version
-      ;;
-    '--input_path')
-      export INPUT_PATH="$2"
-      shift 2
-      ;;
-    '--output_path')
-      export OUTPUT_PATH="$2"
-      shift 2
-      ;;
-    '--input_arrays')
-      export INPUT_ARRAYS="$2"
-      shift 2
-      ;;
-    '--input_shapes')
-      export INPUT_SHAPES="$2"
-      shift 2
-      ;;
-    '--output_arrays')
-      export OUTPUT_ARRAYS="$2"
-      shift 2
-      ;;
-    '--v2')
-      TF_INTERFACE="--v2"
-      shift
-      ;;
-    *)
-      echo "Unknown parameter: ${CUR}"
-      shift
-      ;;
-  esac
-done
-
-if [ -n ${INPUT_SHAPES} ] && [ ${TF_INTERFACE} = "--v2" ]; then
-  echo "Warning: if --v2 option is used, shape will be ignored"
-fi
-
-if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
-  echo "Error: input model not found"
-  echo ""
-  usage
-  exit 2
-fi
-
-FILE_BASE=$(basename ${OUTPUT_PATH})
-MODEL_NAME="${FILE_BASE%.*}"
-
-TMPDIR=$(mktemp -d)
-trap "{ rm -rf $TMPDIR; }" EXIT
-
-# activate python virtual environment
-VIRTUALENV_LINUX="${DRIVER_PATH}/venv/bin/activate"
-VIRTUALENV_WINDOWS="${DRIVER_PATH}/venv/Scripts/activate"
-
-if [ -e ${VIRTUALENV_LINUX} ]; then
-  source ${VIRTUALENV_LINUX}
-elif [ -e ${VIRTUALENV_WINDOWS} ]; then
-  source ${VIRTUALENV_WINDOWS}
-fi
-
-# remove previous log
-rm -rf "${OUTPUT_PATH}.log"
-
-show_err_onexit()
-{
-  cat "${OUTPUT_PATH}.log"
-}
-
-trap show_err_onexit ERR
-
-# generate temporary tflite file
-CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} "
-CONVERT_SCRIPT+="--input_path ${INPUT_PATH} "
-CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} "
-CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
-CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS} "
-if [ ! -z ${INPUT_SHAPES} ]; then
-  CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
-fi
-
-echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log"
-echo "" >> "${OUTPUT_PATH}.log"
-$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1
-
-# convert .tflite to .circle
-echo " " >> "${OUTPUT_PATH}.log"
-echo "${DRIVER_PATH}/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" \
-"${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" \
-"${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
+import argparse
+import os
+import tempfile
+
+import onelib.make_cmd as _make_cmd
+import onelib.utils as oneutils
+
+
+def get_driver_cfg_section():
+    return "one-import-tf"
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert TensorFlow to circle')
+
+    oneutils.add_default_arg(parser)
+
+    ## tf2tfliteV2 arguments
+    tf2tfliteV2_group = parser.add_argument_group('converter arguments')
+
+    # converter version
+    converter_version = tf2tfliteV2_group.add_mutually_exclusive_group()
+    converter_version.add_argument(
+        '--v1',
+        action='store_const',
+        dest='converter_version_cmd',
+        const='--v1',
+        help='use TensorFlow Lite Converter 1.x')
+    converter_version.add_argument(
+        '--v2',
+        action='store_const',
+        dest='converter_version_cmd',
+        const='--v2',
+        help='use TensorFlow Lite Converter 2.x')
+
+    parser.add_argument('--converter_version', type=str, help=argparse.SUPPRESS)
+
+    # input model format
+    model_format_arg = tf2tfliteV2_group.add_mutually_exclusive_group()
+    model_format_arg.add_argument(
+        '--graph_def',
+        action='store_const',
+        dest='model_format_cmd',
+        const='--graph_def',
+        help='use graph def file(default)')
+    model_format_arg.add_argument(
+        '--saved_model',
+        action='store_const',
+        dest='model_format_cmd',
+        const='--saved_model',
+        help='use saved model')
+    model_format_arg.add_argument(
+        '--keras_model',
+        action='store_const',
+        dest='model_format_cmd',
+        const='--keras_model',
+        help='use keras model')
+
+    parser.add_argument('--model_format', type=str, help=argparse.SUPPRESS)
+
+    # input and output path.
+    tf2tfliteV2_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    tf2tfliteV2_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    # input and output arrays.
+    tf2tfliteV2_group.add_argument(
+        '-I',
+        '--input_arrays',
+        type=str,
+        help='names of the input arrays, comma-separated')
+    tf2tfliteV2_group.add_argument(
+        '-s',
+        '--input_shapes',
+        type=str,
+        help=
+        'shapes corresponding to --input_arrays, colon-separated (ex:"1,4,4,3:1,20,20,3")'
+    )
+    tf2tfliteV2_group.add_argument(
+        '-O',
+        '--output_arrays',
+        type=str,
+        help='names of the output arrays, comma-separated')
+
+    # save intermediate file(s)
+    parser.add_argument(
+        '--save_intermediate',
+        action='store_true',
+        help='Save intermediate files to output folder')
+
+    # experimental options
+    parser.add_argument(
+        '--experimental_disable_batchmatmul_unfold',
+        action='store_true',
+        help='Experimental disable BatchMatMul unfold')
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _convert(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        # save intermediate
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
+            tmpdir = os.path.dirname(logfile_path)
+        # make a command to convert from tf to tflite
+        tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
+        tf2tfliteV2_output_path = os.path.join(
+            tmpdir,
+            os.path.splitext(os.path.basename(args.output_path))[0]) + '.tflite'
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
+                                                         getattr(args, 'input_path'),
+                                                         tf2tfliteV2_output_path)
+
+        f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
+
+        # convert tf to tflite
+        oneutils.run(tf2tfliteV2_cmd, logfile=f)
+
+        # make a command to convert from tflite to circle
+        tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
+                                                             tf2tfliteV2_output_path,
+                                                             getattr(args, 'output_path'))
+
+        f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+        # convert tflite to circle
+        oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-import-tf', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # convert
+    _convert(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite
index 053489c92..8eba46dc5 100644
--- a/compiler/one-cmds/one-import-tflite
+++ b/compiler/one-cmds/one-import-tflite
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
 
 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
 #
@@ -14,70 +19,92 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -e
+import argparse
+import os
+import sys
 
-DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+import onelib.make_cmd as _make_cmd
+import onelib.utils as oneutils
 
-usage()
-{
-  echo "Convert TensorFlow lite model to circle."
-  echo "Usage: one-import-tflite"
-  echo "    --version Show version information and exit"
-  echo "    --input_path <path/to/tflitemodel>"
-  echo "    --output_path <path/to/circle>"
-  exit 255
-}
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
 
-version()
-{
-  $DRIVER_PATH/one-version one-import-tflite
-  exit 255
-}
 
-# Parse command-line arguments
-#
-while [ "$#" -ne 0 ]; do
-  CUR="$1"
-
-  case $CUR in
-    '--help')
-      usage
-      ;;
-    '--version')
-      version
-      ;;
-    '--input_path')
-      export INPUT_PATH="$2"
-      shift 2
-      ;;
-    '--output_path')
-      export OUTPUT_PATH="$2"
-      shift 2
-      ;;
-    *)
-      echo "Unknown parameter: ${CUR}"
-      shift
-      ;;
-  esac
-done
-
-if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
-  echo "Error: input model not found"
-  echo ""
-  usage
-fi
-
-# remove previous log
-rm -rf "${OUTPUT_PATH}.log"
-
-show_err_onexit()
-{
-  cat "${OUTPUT_PATH}.log"
-}
-
-trap show_err_onexit ERR
-
-# convert .tflite to .circle
-echo "${DRIVER_PATH}/tflite2circle" "${INPUT_PATH}" "${OUTPUT_PATH}" > "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/tflite2circle" "${INPUT_PATH}" "${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
+def get_driver_cfg_section():
+    return "one-import-tflite"
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert TensorFlow lite to circle')
+
+    oneutils.add_default_arg(parser)
+
+    ## tflite2circle arguments
+    tflite2circle_group = parser.add_argument_group('converter arguments')
+
+    # input and output path.
+    tflite2circle_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    tflite2circle_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _convert(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # make a command to convert from tflite to circle
+        tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
+                                                             getattr(args, 'input_path'),
+                                                             getattr(args, 'output_path'))
+
+        f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+        # convert tflite to circle
+        oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-import-tflite', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # convert
+    _convert(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-infer b/compiler/one-cmds/one-infer
new file mode 100644
index 000000000..075e2bfa2
--- /dev/null
+++ b/compiler/one-cmds/one-infer
@@ -0,0 +1,133 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import glob
+import itertools
+import ntpath
+import os
+import sys
+
+import onelib.backends as backends
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_parser():
+    infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]'
+    infer_detail = """
+one-infer provides post-processing after invoking backend inference driver
+use python script and its arguments to '--post-process' argument as below
+one-infer -d dummy-infer --post-process "script.py arg1 arg2" -- [arguments for dummy-infer]
+"""
+    parser = argparse.ArgumentParser(
+        description='command line tool to infer model',
+        usage=infer_usage,
+        epilog=infer_detail,
+        formatter_class=argparse.RawTextHelpFormatter)
+
+    oneutils.add_default_arg(parser)
+
+    driver_help_message = 'backend inference driver name to execute'
+    parser.add_argument('-d', '--driver', type=str, help=driver_help_message)
+
+    post_process_help_message = 'post processing python script and arguments which can be used to convert I/O data to standard format'
+    parser.add_argument('--post-process', type=str, help=post_process_help_message)
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    missing = []
+    if not oneutils.is_valid_attr(args, 'driver'):
+        missing.append('-d/--driver')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    infer_args = []
+    backend_args = []
+    argv = copy.deepcopy(sys.argv)
+    # delete file name
+    del argv[0]
+    # split by '--'
+    args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+
+    # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER]
+    if len(args):
+        infer_args = args[0]
+        infer_args = parser.parse_args(infer_args)
+        backend_args = backend_args if len(args) < 2 else args[1]
+    else:
+        infer_args = parser.parse_args(infer_args)
+    # print version
+    if len(args) and infer_args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return infer_args, backend_args
+
+
+def _get_executable(args):
+    driver = oneutils.is_valid_attr(args, 'driver')
+
+    executable = backends.search_driver(driver)
+    if executable:
+        return executable
+    else:
+        raise FileNotFoundError(driver + ' not found')
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args, backend_args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-infer', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # make a command to run given backend driver
+    driver_path = _get_executable(args)
+    infer_cmd = [driver_path] + backend_args
+    if oneutils.is_valid_attr(args, 'command'):
+        infer_cmd += getattr(args, 'command').split()
+
+    # run backend driver
+    oneutils.run(infer_cmd, err_prefix=ntpath.basename(driver_path))
+
+    # run post process script if it's given
+    if oneutils.is_valid_attr(args, 'post_process'):
+        # NOTE: the given python script will be executed by venv of ONE
+        python_path = sys.executable
+        post_process_command = [python_path] + getattr(args,
+                                                       'post_process').strip().split(' ')
+        oneutils.run(post_process_command, err_prefix='one-infer')
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-init b/compiler/one-cmds/one-init
new file mode 100644
index 000000000..f2ddc78fa
--- /dev/null
+++ b/compiler/one-cmds/one-init
@@ -0,0 +1,337 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import glob
+import itertools
+import ntpath
+import os
+import sys
+
+import configparser
+import onelib.backends as backends
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+class InputOutputPath:
+    '''
+    Class that remembers input circle file and output circle file of section k,
+
+    After calling enter_new_section(),
+    output path in section k will be used as input path of section k+1
+    '''
+
+    def __init__(self, initial_input_path: str):
+        self._first_step = True
+        self._input_path = initial_input_path
+        self._output_path = ''
+
+    def enter_new_section(self, section_output_path: str):
+        '''
+        Call this when starting a section
+        '''
+        if self._first_step == True:
+            self._output_path = section_output_path
+        else:
+            self._input_path = self._output_path
+            self._output_path = section_output_path
+
+        self._first_step = False
+
+    def input_path(self):
+        return self._input_path
+
+    def output_path(self):
+        return self._output_path
+
+
+class CommentableConfigParser(configparser.ConfigParser):
+    """
+    ConfigParser where comment can be stored
+    In Python ConfigParser, comment in ini file ( starting with ';') is considered a key of which
+    value is None.
+    Ref: https://stackoverflow.com/questions/6620637/writing-comments-to-files-with-configparser
+    """
+
+    def __init__(self):
+        # allow_no_value=True to add comment
+        # ref: https://stackoverflow.com/a/19432072
+        configparser.ConfigParser.__init__(self, allow_no_value=True)
+        self.optionxform = str
+
+    def add_comment(self, section, comment):
+        comment_sign = ';'
+        self[section][f'{comment_sign} {comment}'] = None
+
+
+# TODO Add support for TF graphdef and bcq
+def _get_parser(backends_list):
+    init_usage = (
+        'one-init [-h] [-v] [-V] '
+        '[-i INPUT_PATH] '
+        '[-o OUTPUT_PATH] '
+        '[-m MODEL_TYPE] '
+        '[-b BACKEND] '
+        # args for onnx model
+        '[--convert_nchw_to_nhwc] '
+        '[--nchw_to_nhwc_input_shape] '
+        '[--nchw_to_nhwc_output_shape] '
+        # args for backend driver
+        '[--] [COMMANDS FOR BACKEND DRIVER]')
+    """
+    NOTE
+    layout options for onnx model could be difficult to users.
+    In one-init, we could consider easier args for the the above three:
+    For example, we could have another option, e.g., --input_img_layout LAYOUT
+      - When LAYOUT is NHWC, apply 'nchw_to_nhwc_input_shape=True' into cfg
+      - When LAYOUT is NCHW, apply 'nchw_to_nhwc_input_shape=False' into cfg
+    """
+
+    parser = argparse.ArgumentParser(
+        description='Command line tool to generate initial cfg file. '
+        'Currently tflite and onnx models are supported',
+        usage=init_usage)
+
+    oneutils.add_default_arg_no_CS(parser)
+
+    parser.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input model file')
+    parser.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output cfg file')
+    parser.add_argument(
+        '-m',
+        '--model_type',
+        type=str,
+        help=('type of input model: "onnx", "tflite". '
+              'If the file extension passed to --input_path is '
+              '".tflite" or ".onnx", this arg can be omitted.'))
+
+    onnx_group = parser.add_argument_group('arguments when model type is onnx')
+    onnx_group.add_argument(
+        '--convert_nchw_to_nhwc',
+        action='store_true',
+        help=
+        'Convert NCHW operators to NHWC under the assumption that input model is NCHW.')
+    onnx_group.add_argument(
+        '--nchw_to_nhwc_input_shape',
+        action='store_true',
+        help='Convert the input shape of the model (argument for convert_nchw_to_nhwc)')
+    onnx_group.add_argument(
+        '--nchw_to_nhwc_output_shape',
+        action='store_true',
+        help='Convert the output shape of the model (argument for convert_nchw_to_nhwc)')
+
+    # get backend list in the directory
+    backends_name = [ntpath.basename(f) for f in backends_list]
+    if not backends_name:
+        backends_name_message = '(There is no available backend drivers)'
+    else:
+        backends_name_message = '(available backend drivers: ' + ', '.join(
+            backends_name) + ')'
+    backend_help_message = 'backend name to use ' + backends_name_message
+    parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if not oneutils.is_valid_attr(args, 'backend'):
+        missing.append('-b/--backend')
+
+    if oneutils.is_valid_attr(args, 'model_type'):
+        # TODO Support model types other than onnx and tflite (e.g., TF)
+        if getattr(args, 'model_type') not in ['onnx', 'tflite']:
+            parser.error('Allowed value for --model_type: "onnx" or "tflite"')
+
+    if oneutils.is_valid_attr(args, 'nchw_to_nhwc_input_shape'):
+        if not oneutils.is_valid_attr(args, 'convert_nchw_to_nhwc'):
+            missing.append('--convert_nchw_to_nhwc')
+    if oneutils.is_valid_attr(args, 'nchw_to_nhwc_output_shape'):
+        if not oneutils.is_valid_attr(args, 'convert_nchw_to_nhwc'):
+            missing.append('--convert_nchw_to_nhwc')
+
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    init_args = []
+    backend_args = []
+    argv = copy.deepcopy(sys.argv)
+    # delete file name
+    del argv[0]
+    # split by '--'
+    args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+
+    # one-init [-h] [-v] ...
+    if len(args):
+        init_args = args[0]
+        init_args = parser.parse_args(init_args)
+        backend_args = backend_args if len(args) < 2 else args[1]
+    # print version
+    if len(args) and init_args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return init_args, backend_args
+
+
+def _get_executable(args, backends_list):
+    if oneutils.is_valid_attr(args, 'backend'):
+        backend_base = getattr(args, 'backend') + '-init'
+        for cand in backends_list:
+            if ntpath.basename(cand) == backend_base:
+                return cand
+        raise FileNotFoundError(backend_base + ' not found')
+
+
+# TODO Support workflow format (https://github.com/Samsung/ONE/pull/9354)
+def _generate(args, model_type: str, inout_path: InputOutputPath):
+    # generate cfg file
+    config = CommentableConfigParser()
+    model_dir = os.path.dirname(args.input_path)
+    model_name = os.path.basename(args.input_path).split('.')[0]
+
+    def _assert_section(section: str):
+        if not config.has_section(section):
+            raise RuntimeError(f'Cannot find section: {section}')
+
+    def _add_onecc_sections():
+        '''
+        This adds all sections
+        '''
+        config.add_section('onecc')
+        sections = [
+            f'one-import-{model_type}', 'one-optimize', 'one-quantize', 'one-codegen'
+        ]
+
+        for section in sections:
+            config['onecc'][section] = 'True'
+            # add empty section as a preperation of next procedure
+            config.add_section(section)
+
+    def _gen_import():
+        section = f'one-import-{model_type}'
+        _assert_section(section)
+
+        output_path = os.path.join(model_dir, f'{model_name}.circle')
+        inout_path.enter_new_section(section_output_path=output_path)
+        config[section]['input_path'] = inout_path.input_path()
+        config[section]['output_path'] = inout_path.output_path()
+
+    def _gen_optimize():
+        section = 'one-optimize'
+        _assert_section(section)
+
+        output_path = os.path.join(model_dir, f'{model_name}.opt.circle')
+        inout_path.enter_new_section(section_output_path=output_path)
+        config[section]['input_path'] = inout_path.input_path()
+        config[section]['output_path'] = inout_path.output_path()
+
+        # TODO Add optimization optinos
+
+    def _gen_quantize():
+        section = 'one-quantize'
+        _assert_section(section)
+
+        output_path = os.path.join(model_dir, f'{model_name}.q.circle')
+        inout_path.enter_new_section(section_output_path=output_path)
+        config[section]['input_path'] = inout_path.input_path()
+        config[section]['output_path'] = inout_path.output_path()
+
+    def _gen_codegen():
+        section = 'one-codegen'
+        _assert_section(section)
+
+        # [backend]-init must provide default value for 'command'
+        config[section]['backend'] = args.backend
+
+    #
+    # NYI: one-profile, one-partition, one-pack, one-infer
+    #
+
+    _add_onecc_sections()
+
+    _gen_import()
+    _gen_optimize()
+    _gen_quantize()
+    _gen_codegen()
+
+    with open(args.output_path, 'w') as f:
+        config.write(f)
+
+
+def _get_model_type(parser, args):
+    if oneutils.is_valid_attr(args, 'model_type'):
+        return args.model_type
+
+    if oneutils.is_valid_attr(args, 'input_path'):
+        _, ext = os.path.splitext(args.input_path)
+
+        # ext would be, e.g., '.tflite' or '.onnx'.
+        # Note: when args.input_path does not have an extension, e.g., '/home/foo'
+        # ext after os.path.splitext() is '' and ''[1:] is still ''.
+        # TODO support tensorflow model
+        ext = ext[1:]
+        if ext in ["tflite", "onnx"]:
+            return ext
+        else:
+            parser.error(f'following file extensions are supported: ".onnx" ".tflite"')
+
+    parser.error(f'the following argument is required: --input_path')
+
+
+def main():
+    # get backend list
+    backends_list = backends.get_list('init')
+
+    # parse arguments
+    parser = _get_parser(backends_list)
+    args, backend_args = _parse_arg(parser)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    model_type = _get_model_type(parser, args)
+    inout_path = InputOutputPath(args.input_path)
+    _generate(args, model_type, inout_path)
+
+    # make a command to run given backend driver
+    driver_path = _get_executable(args, backends_list)
+    init_cmd = [driver_path] + backend_args
+
+    # run backend driver
+    oneutils.run(init_cmd, err_prefix=ntpath.basename(driver_path))
+
+    raise NotImplementedError("NYI")
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize
index 17b6b980e..51668a816 100644
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
 
 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
 #
@@ -14,135 +19,160 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -e
-
-DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-usage()
-{
-  echo "Optimize circle model."
-  echo "Usage: one-optimize"
-  echo "    --version       Show version information and exit"
-  echo "    --all           Enable all optimization algorithms"
-  echo "    --fuse_bcq      Enable FuseBCQ Pass"
-  echo "    --fuse_instnorm Enable FuseInstanceNormalization Pass"
-  echo "    --resolve_customop_add"
-  echo "                    Enable ResolveCustomOpAddPass Pass"
-  echo "    --resolve_customop_batchmatmul"
-  echo "                    Enable ResolveCustomOpBatchMatMulPass Pass"
-  echo "    --resolve_customop_matmul"
-  echo "                    Enable ResolveCustomOpMatMulPass Pass"
-  echo "    --input_path <path/to/input/circle>"
-  echo "    --output_path <path/to/output/circle>"
-  exit 255
-}
-
-version()
-{
-  $DRIVER_PATH/one-version one-optimize
-  exit 255
-}
-
-OPTIMIZE_all=0
-OPTIMIZE_fuse_bcq=0
-OPTIMIZE_fuse_instnorm=0
-OPTIMIZE_resolve_customop_add=0
-OPTIMIZE_resolve_customop_batchmatmul=0
-OPTIMIZE_resolve_customop_matmul=0
-
-# Parse command-line arguments
+import argparse
+import os
+import sys
+
+import onelib.constant as _constant
+import onelib.make_cmd as _make_cmd
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to optimize circle model')
+
+    oneutils.add_default_arg(parser)
+
+    ## utility arguments
+    utility_group = parser.add_argument_group('arguments for utility')
+
+    utility_group.add_argument(
+        '-p',
+        '--generate_profile_data',
+        action='store_true',
+        help='generate profiling data')
+
+    utility_group.add_argument(
+        '--change_outputs',
+        type=str,
+        help='Experimental: Change first subgraph output nodes to CSV names')
+
+    ## circle2circle arguments
+    circle2circle_group = parser.add_argument_group('arguments for optimization')
+
+    # input and output path.
+    circle2circle_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    circle2circle_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    # optimization pass
+    for opt in _constant.CONSTANT.OPTIMIZATION_OPTS:
+        # opt = (option_name, help_message)
+        circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1])
+
+    # optimization option from one-build
+    parser.add_argument('-O', type=str, help=argparse.SUPPRESS)
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+    # default has pre-defined optimization options
+    default = _get_parser().parse_args()
+
+    # check if unrecognized arguments are given
+    diff = set(dir(args)) - set(dir(default))
+    if len(diff):
+        parser.error('the following arguments are unrecognized: ' + ' '.join(diff))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _optimize(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # make a command to optimize circle model
+        circle2circle_path = os.path.join(dir_path, 'circle2circle')
+        circle2circle_cmd = _make_cmd.make_circle2circle_cmd(args, circle2circle_path,
+                                                             getattr(args, 'input_path'),
+                                                             getattr(args, 'output_path'))
+
+        # verbose
+        if oneutils.is_valid_attr(args, 'verbose'):
+            circle2circle_cmd.append('--verbose')
+        if oneutils.is_valid_attr(args, 'change_outputs'):
+            circle2circle_cmd.append('--change_outputs')
+            circle2circle_cmd.append(getattr(args, 'change_outputs'))
+
+        f.write((' '.join(circle2circle_cmd) + '\n').encode())
+
+        # optimize
+        oneutils.run(circle2circle_cmd, err_prefix="circle2circle", logfile=f)
+
+
+def _parse_opt(args):
+    if oneutils.is_valid_attr(args, 'O'):
+        opt_name_path_dic = dict(
+            zip(oneutils.get_optimization_list(get_name=True),
+                oneutils.get_optimization_list()))
+        config_path = opt_name_path_dic['O' + getattr(args, 'O')]
+        # group option do not overwrite existing args
+        oneutils.parse_cfg(config_path, 'one-optimize', args)
+
+
+# There are several cases to receive the optimization options:
+#  - Indivisual option
+#    1. From command line
+#    2. From cfg file
+#  - Group option
+#    3. From command line
 #
-while [ "$#" -ne 0 ]; do
-  CUR="$1"
-
-  case $CUR in
-    '--help')
-      usage
-      ;;
-    '--version')
-      version
-      ;;
-    '--all')
-      OPTIMIZE_all=1
-      shift
-      ;;
-    '--fuse_bcq')
-      OPTIMIZE_fuse_bcq=1
-      shift
-      ;;
-    '--fuse_instnorm')
-      OPTIMIZE_fuse_instnorm=1
-      shift
-      ;;
-    '--resolve_customop_add')
-      OPTIMIZE_resolve_customop_add=1
-      shift
-      ;;
-    '--resolve_customop_batchmatmul')
-      OPTIMIZE_resolve_customop_batchmatmul=1
-      shift
-      ;;
-    '--resolve_customop_matmul')
-      OPTIMIZE_resolve_customop_matmul=1
-      shift
-      ;;
-
-    '--input_path')
-      export INPUT_PATH="$2"
-      shift 2
-      ;;
-    '--output_path')
-      export OUTPUT_PATH="$2"
-      shift 2
-      ;;
-    *)
-      echo "Unknown parameter: ${CUR}"
-      shift
-      ;;
-  esac
-done
-
-if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
-  echo "Error: input model not found"
-  echo ""
-  usage
-fi
-
-OPTIMIZE_OPTIONS=""
-
-if [ $OPTIMIZE_all == 1 ]; then
-  OPTIMIZE_OPTIONS+="--all "
-fi
-if [ $OPTIMIZE_fuse_bcq == 1 ]; then
-  OPTIMIZE_OPTIONS+="--fuse_bcq "
-fi
-if [ $OPTIMIZE_fuse_instnorm == 1 ]; then
-  OPTIMIZE_OPTIONS+="--fuse_instnorm "
-fi
-if [ $OPTIMIZE_resolve_customop_add == 1 ]; then
-  OPTIMIZE_OPTIONS+="--resolve_customop_add "
-fi
-if [ $OPTIMIZE_resolve_customop_batchmatmul == 1 ]; then
-  OPTIMIZE_OPTIONS+="--resolve_customop_batchmatmul "
-fi
-if [ $OPTIMIZE_resolve_customop_matmul == 1 ]; then
-  OPTIMIZE_OPTIONS+="--resolve_customop_matmul "
-fi
-
-# remove previous log
-rm -rf "${OUTPUT_PATH}.log"
-
-show_err_onexit()
-{
-  cat "${OUTPUT_PATH}.log"
-}
-
-trap show_err_onexit ERR
-
-# NOTE do not wrap ${OPTIMIZE_OPTIONS} with ""
-# optimize circle
-echo "${DRIVER_PATH}/circle2circle" ${OPTIMIZE_OPTIONS} \
-"${INPUT_PATH}" "${OUTPUT_PATH}" > "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/circle2circle" ${OPTIMIZE_OPTIONS} \
-"${INPUT_PATH}" "${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
+# Their priority is as follows, since each option can be given simultaneously:
+#  1. Indivisual option from command line
+#  2. Indivisual option from cfg file
+#  3. Group option from command line
+#
+# To follow their priority, options with higher priority should be parsed first.
+#
+# DO NOT MODIFY the order of below function calls.
+#
+# NOTE. Assume all the optimization options must follow 'store_true' only.
+# NOTE. Group option from cfg file (`include` in `[onecc]` section) is passed
+#        as a command line argument.
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-optimize', args)
+
+    # parse optimization file
+    # NOTE if there is a `one-optimize` section in above configuration file as well,
+    # it will be overwritten
+    _parse_opt(args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # optimize
+    _optimize(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-pack b/compiler/one-cmds/one-pack
index 023b0a85f..db4246620 100644
--- a/compiler/one-cmds/one-pack
+++ b/compiler/one-cmds/one-pack
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
 
 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
 #
@@ -14,76 +19,97 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -e
+import argparse
+import os
+import sys
 
-DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+import onelib.utils as oneutils
 
-usage()
-{
-  echo "Package circle to nnpkg"
-  echo "Usage: one-pack"
-  echo "    -v, --version Show version information and exit"
-  echo "    -i <path/to/circle>"
-  echo "    -o <path/to/nnpackage/folder>"
-  exit 255
-}
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
 
-version()
-{
-  $DRIVER_PATH/one-version one-pack
-  exit 255
-}
 
-# Parse command-line arguments
-#
-while [ "$#" -ne 0 ]; do
-  CUR="$1"
-
-  case $CUR in
-    '--help')
-      usage
-      ;;
-    '-v')
-      version
-      ;;
-    '--version')
-      version
-      ;;
-    '-i')
-      export INPUT_PATH="$2"
-      shift 2
-      ;;
-    '-o')
-      export OUTPUT_PATH="$2"
-      shift 2
-      ;;
-    *)
-      echo "Unknown parameter: ${CUR}"
-      shift
-      ;;
-  esac
-done
-
-if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
-  echo "Error: input model not found"
-  echo ""
-  usage
-fi
-
-INPUT_FILE=$(basename "${INPUT_PATH}")
-LOG_FILE="${INPUT_FILE%.*}.pack.log"
-
-# remove previous log
-rm -rf "${LOG_FILE}"
-
-show_err_onexit()
-{
-  cat "${LOG_FILE}"
-}
-
-trap show_err_onexit ERR
-
-# Package circle model file to nnpkg
-echo "${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" > "${LOG_FILE}"
-
-"${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" >> "${LOG_FILE}" 2>&1
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to package circle and metadata into nnpackage')
+
+    oneutils.add_default_arg(parser)
+
+    ## model2nnpkg arguments
+    model2nnpkg_group = parser.add_argument_group('arguments for packaging')
+
+    # input and output path.
+    model2nnpkg_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    model2nnpkg_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _make_model2nnpkg_cmd(driver_path, input_path, output_path):
+    """make a command for running model2nnpkg"""
+    cmd = [os.path.expanduser(driver_path)]
+    cmd.append('-o')
+    cmd.append(os.path.expanduser(output_path))
+    cmd.append('-m')
+    cmd.append(os.path.expanduser(input_path))
+    return cmd
+
+
+def _pack(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # make a command to package circle model and metadata into nnpackage
+        model2nnpkg_path = os.path.join(dir_path, 'model2nnpkg')
+        model2nnpkg_cmd = _make_model2nnpkg_cmd(model2nnpkg_path,
+                                                getattr(args, 'input_path'),
+                                                getattr(args, 'output_path'))
+
+        f.write((' '.join(model2nnpkg_cmd) + '\n').encode())
+
+        # convert tflite to circle
+        oneutils.run(model2nnpkg_cmd, err_prefix="model2nnpkg", logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-pack', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # package
+    _pack(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-partition b/compiler/one-cmds/one-partition
new file mode 100644
index 000000000..62ab13d39
--- /dev/null
+++ b/compiler/one-cmds/one-partition
@@ -0,0 +1,126 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import configparser
+import os
+import sys
+
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to partition circle model by multiple backends')
+
+    oneutils.add_default_arg(parser)
+
+    parser.add_argument(
+        '--backends', type=str, help='backends in CSV to use for partitioning')
+    parser.add_argument('--default', type=str, help='default backend to assign')
+
+    parser.add_argument(
+        '--part_file', type=str, help='partition file which provides backend to assign')
+    parser.add_argument('--input_file', type=str, help='input circle model filename')
+    parser.add_argument(
+        '--work_path',
+        type=str,
+        help='work path of partition, input files exist and output files are produced')
+
+    return parser
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'part_file'):
+        missing.append('part_file')
+    if not oneutils.is_valid_attr(args, 'input_file'):
+        missing.append('input_file')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+    return
+
+
+def _partition(args):
+    # get file path to log
+    bin_path = os.path.dirname(os.path.realpath(__file__))
+    cur_path = os.getcwd()
+    partition_path = os.path.join(cur_path, args.part_file)
+    logfile_path = partition_path + '.log'
+
+    with open(logfile_path, 'wb', buffering=0) as f:
+        # make a command to package circle model and metadata into nnpackage
+        circle_partitioner_path = os.path.join(bin_path, 'circle-partitioner')
+
+        cmd = [os.path.expanduser(circle_partitioner_path)]
+
+        if oneutils.is_valid_attr(args, 'backends'):
+            cmd.append('--backends')
+            cmd.append(getattr(args, 'backends'))
+        if oneutils.is_valid_attr(args, 'default'):
+            cmd.append('--default')
+            cmd.append(getattr(args, 'default'))
+        if oneutils.is_valid_attr(args, 'work_path'):
+            cmd.append('--work_path')
+            cmd.append(getattr(args, 'work_path'))
+
+        cmd.append('--part_file')
+        cmd.append(args.part_file)
+        cmd.append('--input_file')
+        cmd.append(args.input_file)
+
+        f.write((' '.join(cmd) + '\n').encode())
+
+        # run circle-partitoner
+        oneutils.run(cmd, err_prefix='circle-partitioner', logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-partition', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # do partition
+    _partition(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv
index 0b11e7f0b..a456a6b9c 100644
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -19,36 +19,80 @@ set -e
 DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
 VENV_ACTIVATE=${DRIVER_PATH}/venv/bin/activate
+# NOTE please use venv's python instead of python after `source activation`.
+# This script is called by debian maintainer script, i.e. `postinst`.
+# Since debian maintainer script is called with sudo, `source activation` is ignored.
+VENV_PYTHON=${DRIVER_PATH}/venv/bin/python
 
-if [ -f ${VENV_ACTIVATE} ]; then
-  echo "Virtual environment is already prepared."
-  exit 0
+if [ ! -f ${VENV_ACTIVATE} ]; then
+  # Create python virtual enviornment
+  python3 -m venv "${DRIVER_PATH}/venv"
 fi
 
-# Install prerequisites
-python3 -m pip install --user -U virtualenv
+# NOTE version
+# - https://github.com/onnx/onnx/blob/master/docs/Versioning.md
+# - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
 
-function error_no_ensurepip ()
-{
-  echo "ERROR: python3 'ensurepip' module is not found."
-  echo "       On ubuntu, try following command:"
-  echo
-  echo "         apt install python$(python3 --version | awk '{print $2}' | awk -F. '{print $1"."$2}')-venv"
-  echo
-  echo "       You may need root privilege for this."
-  exit 1
-}
-python3 -m ensurepip --version > /dev/null 2>&1 || error_no_ensurepip
-
-# Create python virtual enviornment
-python3 -m venv "${DRIVER_PATH}/venv"
+VER_TENSORFLOW=2.12.1
+VER_ONNX=1.14.0
+VER_ONNXRUNTIME=1.15.0
+VER_ONNX_TF=1.10.0
+VER_PYDOT=1.4.2
 
 # Install tensorflow
-source "${VENV_ACTIVATE}"
-
-# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
-# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
-python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-  install -U pip==20.2.1 setuptools==49.3.0
-python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-  install tensorflow-cpu==2.3.0
+
+PIP_TRUSTED_HOST="--trusted-host pypi.org "
+PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
+PIP_TRUSTED_HOST+="--trusted-host files.pythonhosted.org "
+PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
+
+PIP_TIMEOUT="--default-timeout=1000 "
+
+PIP_OPTIONS="${PIP_TIMEOUT} ${PIP_TRUSTED_HOST}"
+
+# NOTE $ONE_PREPVENV_PIP_OPTION is to provide additional PIP options
+# such as ceritificate file behind firewall
+# ex) ONE_PREPVENV_PIP_OPTION="--cert SomePrivateCetificate.crt" ./one-prepare-venv
+if [[ ! -z "$ONE_PREPVENV_PIP_OPTION" ]]; then
+  PIP_OPTIONS+=" ${ONE_PREPVENV_PIP_OPTION} "
+fi
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade pip setuptools
+if [ -n "${EXT_TENSORFLOW_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_TENSORFLOW_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
+# Fix version to that of TF release date
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.20.1
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_addons==0.20.0
+
+# Install PyTorch and ONNX related
+# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
+#      torch_stable.html points to download URL of torch wheel file(s)
+#      but sometimes the server gets unstable, especially from in-house CI.
+TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
+if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
+  TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
+fi
+# TODO remove torch message
+echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'"
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.13.1+cpu -f ${TORCH_STABLE_URL}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnxruntime==${VER_ONNXRUNTIME}
+
+# Provide install of custom onnx-tf
+if [ -n "${EXT_ONNX_TF_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_ONNX_TF_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
+fi
+
+# Fix version to that of TF release date
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==4.23.3
+
+# Install pydot for visq
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install pydot==${VER_PYDOT}
diff --git a/compiler/one-cmds/one-prepare-venv.aarch64 b/compiler/one-cmds/one-prepare-venv.aarch64
new file mode 100644
index 000000000..c8850df82
--- /dev/null
+++ b/compiler/one-cmds/one-prepare-venv.aarch64
@@ -0,0 +1,139 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+VENV_ACTIVATE=${DRIVER_PATH}/venv/bin/activate
+# NOTE please use venv's python instead of python after `source activation`.
+# This script is called by debian maintainer script, i.e. `postinst`.
+# Since debian maintainer script is called with sudo, `source activation` is ignored.
+VENV_PYTHON=${DRIVER_PATH}/venv/bin/python
+
+if [ ! -f ${VENV_ACTIVATE} ]; then
+  # Create python virtual enviornment
+  python3 -m venv "${DRIVER_PATH}/venv"
+fi
+
+# NOTE version
+# - https://github.com/onnx/onnx/blob/master/docs/Versioning.md
+# - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
+
+VER_TENSORFLOW=2.12.1
+VER_ONNX=1.14.0
+VER_ONNXRUNTIME=1.15.0
+VER_ONNX_TF=1.10.0
+VER_PYDOT=1.4.2
+
+# Install tensorflow
+
+PIP_TRUSTED_HOST="--trusted-host pypi.org "
+PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
+PIP_TRUSTED_HOST+="--trusted-host files.pythonhosted.org "
+PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
+
+PIP_TIMEOUT="--default-timeout=1000 "
+
+PIP_OPTIONS="${PIP_TIMEOUT} ${PIP_TRUSTED_HOST}"
+
+# NOTE $ONE_PREPVENV_PIP_OPTION is to provide additional PIP options
+# such as ceritificate file behind firewall
+# ex) ONE_PREPVENV_PIP_OPTION="--cert SomePrivateCetificate.crt" ./one-prepare-venv
+if [[ ! -z "$ONE_PREPVENV_PIP_OPTION" ]]; then
+  PIP_OPTIONS+=" ${ONE_PREPVENV_PIP_OPTION} "
+fi
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade pip setuptools
+if [ -n "${EXT_TENSORFLOW_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_TENSORFLOW_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow==${VER_TENSORFLOW}
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
+# Fix version to that of TF release date
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.20.1
+#${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_addons==0.20.0
+
+# NOTE
+#
+# - Since tensorflow_addons 0.20.0 package distribution does not exist, it is
+#   configured to build using the source code at the time of
+#   one-prepare-env. This is not a perfect solution as it requires a build
+#   environment at install time.
+#
+# - Later, it is necessary to change the pre-built package to be uploaded
+#   and downloaded at the time of installation. (Or expect the appropriate
+#   tensorflow_addons official package to be distributed.)
+
+# Make tempolary workspace for build
+BAZEL_BUILD_PATH=$(mktemp -d)
+pushd $BAZEL_BUILD_PATH
+source $VENV_ACTIVATE
+
+# Download tensorflow_addons source
+git clone https://github.com/tensorflow/addons.git
+cd addons
+git checkout -b r0.20 origin/r0.20
+
+# Install bazel
+wget https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-linux-arm64
+chmod 755 bazelisk-linux-arm64
+ln -s bazelisk-linux-arm64 bazel
+
+# This script links project with TensorFlow dependency
+python3 ./configure.py
+
+# Build
+./bazel build build_pip_pkg
+bazel-bin/build_pip_pkg artifacts
+
+# Install tensroflow_addons
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install artifacts/tensorflow_addons-*.whl
+
+# Remove tempolary workspace
+deactivate
+popd
+rm -rf $BAZEL_BUILD_PATH
+
+# Install PyTorch and ONNX related
+# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
+#      torch_stable.html points to download URL of torch wheel file(s)
+#      but sometimes the server gets unstable, especially from in-house CI.
+TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
+if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
+  TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
+fi
+# TODO remove torch message
+echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'"
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.13.1 -f ${TORCH_STABLE_URL}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnxruntime==${VER_ONNXRUNTIME}
+
+# Provide install of custom onnx-tf
+if [ -n "${EXT_ONNX_TF_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_ONNX_TF_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
+fi
+
+# Fix version to that of TF release date
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==4.23.3
+
+# Install pydot for visq
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install pydot==${VER_PYDOT}
diff --git a/compiler/one-cmds/one-prepare-venv.u1804 b/compiler/one-cmds/one-prepare-venv.u1804
new file mode 100644
index 000000000..55c79c6e0
--- /dev/null
+++ b/compiler/one-cmds/one-prepare-venv.u1804
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+VENV_ACTIVATE=${DRIVER_PATH}/venv/bin/activate
+# NOTE please use venv's python instead of python after `source activation`.
+# This script is called by debian maintainer script, i.e. `postinst`.
+# Since debian maintainer script is called with sudo, `source activation` is ignored.
+VENV_PYTHON=${DRIVER_PATH}/venv/bin/python
+
+if [ ! -f ${VENV_ACTIVATE} ]; then
+  # Create python virtual enviornment
+  python3.8 -m venv "${DRIVER_PATH}/venv"
+fi
+
+# NOTE version
+# - https://github.com/onnx/onnx/blob/master/docs/Versioning.md
+# - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
+
+VER_TENSORFLOW=2.12.1
+VER_ONNX=1.14.0
+VER_ONNXRUNTIME=1.15.0
+VER_ONNX_TF=1.10.0
+VER_PYDOT=1.4.2
+
+# Install tensorflow
+
+PIP_TRUSTED_HOST="--trusted-host pypi.org "
+PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
+PIP_TRUSTED_HOST+="--trusted-host files.pythonhosted.org "
+PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
+
+PIP_TIMEOUT="--default-timeout=1000 "
+
+PIP_OPTIONS="${PIP_TIMEOUT} ${PIP_TRUSTED_HOST}"
+
+# NOTE $ONE_PREPVENV_PIP_OPTION is to provide additional PIP options
+# such as ceritificate file behind firewall
+# ex) ONE_PREPVENV_PIP_OPTION="--cert SomePrivateCetificate.crt" ./one-prepare-venv
+if [[ ! -z "$ONE_PREPVENV_PIP_OPTION" ]]; then
+  PIP_OPTIONS+=" ${ONE_PREPVENV_PIP_OPTION} "
+fi
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade pip setuptools
+if [ -n "${EXT_TENSORFLOW_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_TENSORFLOW_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
+# Fix version to that of TF release date
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.20.1
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_addons==0.20.0
+
+# Install PyTorch and ONNX related
+# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
+#      torch_stable.html points to download URL of torch wheel file(s)
+#      but sometimes the server gets unstable, especially from in-house CI.
+TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
+if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
+  TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
+fi
+# TODO remove torch message
+echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'"
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.13.1+cpu -f ${TORCH_STABLE_URL}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnxruntime==${VER_ONNXRUNTIME}
+
+# Provide install of custom onnx-tf
+if [ -n "${EXT_ONNX_TF_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_ONNX_TF_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
+fi
+
+# Fix version to that of TF release date
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==4.23.3
+
+# Install pydot for visq
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install pydot==${VER_PYDOT}
diff --git a/compiler/one-cmds/one-profile b/compiler/one-cmds/one-profile
new file mode 100644
index 000000000..bc5338dcc
--- /dev/null
+++ b/compiler/one-cmds/one-profile
@@ -0,0 +1,246 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import glob
+import itertools
+import ntpath
+import os
+import sys
+from types import SimpleNamespace
+
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_backends_list():
+    """
+    [one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    └── test
+
+    The list where `one-profile` finds its backends
+    - `bin` folder where `one-profile` exists
+    - `backends` folder
+
+    NOTE If there are backends of the same name in different places,
+     the closer to the top in the list, the higher the priority.
+    """
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    backend_set = set()
+
+    # bin folder
+    files = [f for f in glob.glob(dir_path + '/*-profile')]
+    # backends folder
+    files += [
+        f for f in glob.glob(dir_path + '/../backends/**/*-profile', recursive=True)
+    ]
+    # TODO find backends in `$PATH`
+
+    backends_list = []
+    for cand in files:
+        base = ntpath.basename(cand)
+        if not base in backend_set and os.path.isfile(cand) and os.access(cand, os.X_OK):
+            backend_set.add(base)
+            backends_list.append(cand)
+
+    return backends_list
+
+
+def _get_parser(backends_list):
+    profile_usage = 'one-profile [-h] [-v] [-C CONFIG] [-b BACKEND] [--] [COMMANDS FOR BACKEND]'
+    parser = argparse.ArgumentParser(
+        description='command line tool for profiling backend model', usage=profile_usage)
+
+    oneutils.add_default_arg(parser)
+
+    # get backend list in the directory
+    backends_name = [ntpath.basename(f) for f in backends_list]
+    if not backends_name:
+        backends_name_message = '(There is no available backend drivers)'
+    else:
+        backends_name_message = '(available backend drivers: ' + ', '.join(
+            backends_name) + ')'
+    backend_help_message = 'backend name to use ' + backends_name_message
+    parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
+
+    return parser
+
+
+def _verify_arg(parser, args, cfg_args, backend_args, unknown_args):
+    """verify given arguments"""
+    cmd_backend_exist = oneutils.is_valid_attr(args, 'backend')
+    cfg_backend_exist = oneutils.is_valid_attr(cfg_args, 'backend')
+    cfg_backends_exist = oneutils.is_valid_attr(cfg_args, 'backends')
+
+    # check if required arguments is given
+    missing = []
+    if not cmd_backend_exist and not cfg_backend_exist and not cfg_backends_exist:
+        missing.append('-b/--backend')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+    if not oneutils.is_valid_attr(args, 'config'):
+        if not backend_args and not unknown_args:
+            parser.error('commands for the backend is missing.')
+
+    if cfg_backend_exist and cfg_backends_exist:
+        parser.error(
+            '\'backend\' option and \'backends\' option cannot be used simultaneously.')
+
+    # Check if given backend from command line exists in the configuration file
+    if cmd_backend_exist and cfg_backend_exist:
+        if args.backend != cfg_args.backend:
+            parser.error('Not found the command of given backend')
+
+    if cfg_backend_exist and not oneutils.is_valid_attr(cfg_args, 'command'):
+        parser.error('\'command\' key is missing in the configuration file.')
+
+    if cfg_backends_exist:
+        cfg_backends = getattr(cfg_args, 'backends').split(',')
+        # check if commands of given backends exist
+        for b in cfg_backends:
+            if not oneutils.is_valid_attr(cfg_args, b):
+                parser.error('Not found the command for ' + b)
+        # Check if given backend from command line exists in the configuration file
+        if cmd_backend_exist:
+            if args.backend not in cfg_backends:
+                parser.error('Not found the command of given backend')
+
+
+def _parse_arg(parser):
+    profile_args = []
+    backend_args = []
+    unknown_args = []
+    argv = copy.deepcopy(sys.argv)
+    # delete file name
+    del argv[0]
+    # split by '--'
+    args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+    if len(args) == 0:
+        profile_args = parser.parse_args(profile_args)
+    # one-profile has two interfaces
+    # 1. one-profile [-h] [-v] [-C CONFIG] [-b BACKEND] [COMMANDS FOR BACKEND]
+    if len(args) == 1:
+        profile_args = args[0]
+        profile_args, unknown_args = parser.parse_known_args(profile_args)
+    # 2. one-profile [-h] [-v] [-C CONFIG] [-b BACKEND] -- [COMMANDS FOR BACKEND]
+    if len(args) == 2:
+        profile_args = args[0]
+        backend_args = args[1]
+        profile_args = parser.parse_args(profile_args)
+    # print version
+    if len(args) and profile_args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return profile_args, backend_args, unknown_args
+
+
+def main():
+    # get backend list
+    backends_list = _get_backends_list()
+
+    # parse arguments
+    parser = _get_parser(backends_list)
+    args, backend_args, unknown_args = _parse_arg(parser)
+
+    # parse configuration file
+    cfg_args = SimpleNamespace()
+    oneutils.parse_cfg(args.config, 'one-profile', cfg_args)
+
+    # verify arguments
+    _verify_arg(parser, args, cfg_args, backend_args, unknown_args)
+    '''
+    one-profile defines its behavior for below cases.
+
+    [1] one-profile -h
+    [2] one-profile -v
+    [3] one-profile -C ${cfg} (backend, command key in cfg)
+    [4] one-profile -C ${cfg} (backends key in cfg)
+    [5] one-profile -b ${backend} ${command}
+    [6] one-profile -b ${backend} -- ${command}
+    [7] one-profile -b ${backend} -C {cfg} (backend, command key in cfg)
+    [8] one-profile -b ${backend} -C {cfg} (backends key in cfg) (Only 'backend' is invoked, 
+         even though cfg file has multiple backends)
+    [9] one-profile -b ${backend} -C ${cfg} -- ${command} (backend, command key in cfg) 
+    [10] one-profile -b ${backend} -C ${cfg} -- ${command} (backends key in cfg) (Only 'backend' is invoked, 
+         even though cfg file has multiple backends)
+
+    All other cases are not allowed or an undefined behavior.
+    '''
+    cmd_overwrite = False
+    if oneutils.is_valid_attr(args, 'config'):
+        # [9], [10]
+        if backend_args and not unknown_args:
+            given_backends = [args.backend]
+            cmd_overwrite = True
+        else:
+            # [7], [8]
+            if oneutils.is_valid_attr(args, 'backend'):
+                given_backends = [args.backend]
+                if oneutils.is_valid_attr(cfg_args, 'backend'):
+                    assert (oneutils.is_valid_attr(cfg_args, 'command'))
+                    setattr(cfg_args, args.backend, cfg_args.command)
+            else:
+                # [3]
+                if oneutils.is_valid_attr(cfg_args, 'backend'):
+                    assert (oneutils.is_valid_attr(cfg_args, 'command'))
+                    given_backends = [cfg_args.backend]
+                    setattr(cfg_args, cfg_args.backend, cfg_args.command)
+                # [4]
+                if oneutils.is_valid_attr(cfg_args, 'backends'):
+                    given_backends = cfg_args.backends.split(',')
+    # [5], [6]
+    else:
+        assert (backend_args or unknown_args)
+        given_backends = [args.backend]
+
+    for given_backend in given_backends:
+        # make a command to run given backend driver
+        profile_path = None
+        backend_base = given_backend + '-profile'
+        for cand in backends_list:
+            if ntpath.basename(cand) == backend_base:
+                profile_path = cand
+        if not profile_path:
+            raise FileNotFoundError(backend_base + ' not found')
+
+        profile_cmd = [profile_path]
+        if not cmd_overwrite and oneutils.is_valid_attr(cfg_args, given_backend):
+            profile_cmd += getattr(cfg_args, given_backend).split()
+        else:
+            profile_cmd += backend_args
+            profile_cmd += unknown_args
+
+        # run backend driver
+        oneutils.run(profile_cmd, err_prefix=backend_base)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index c74b2c2d2..f686aad29 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
 
 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
 #
@@ -14,157 +19,859 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -e
-
-DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-usage()
-{
-  echo "Quantize circle model."
-  echo "Usage: one-quantize"
-  echo "    --version         Show version information and exit"
-  echo "    --input_dtype     Input data type (supported: float32, default=float32)"
-  echo "    --quantized_dtype Output quantized data type (supported: uint8, default=uint8)"
-  echo "    --granularity     Quantize granularity (supported: layer, channel, default=layer)"
-  echo "    --min_percentile  Minimum percentile (0.0~100.0, default=1.0)"
-  echo "    --max_percentile  Maximum percentile (0.0~100.0, default=99.0)"
-  echo "    --mode            Record mode (supported: percentile/moving_average, default=percentile)"
-  echo "    --input_path <path/to/input/circle>"
-  echo "    --input_data <path/to/input/data>"
-  echo "    --output_path <path/to/output/circle>"
-  exit 255
-}
-
-version()
-{
-  $DRIVER_PATH/one-version one-quantize
-  exit 255
-}
-
-INPUT_DTYPE=float32
-QUANTIZED_DTYPE=uint8
-GRANULARITY=layer
-MIN_PERCENTILE=1
-MAX_PERCENTILE=99
-MODE=percentile
-
-# Parse command-line arguments
-#
-while [ "$#" -ne 0 ]; do
-  CUR="$1"
-
-  case $CUR in
-    '--help')
-      usage
-      ;;
-    '--version')
-      version
-      ;;
-
-    '--input_dtype')
-      INPUT_DTYPE="$2"
-      shift 2
-      ;;
-    '--quantized_dtype')
-      QUANTIZED_DTYPE="$2"
-      shift 2
-      ;;
-    '--granularity')
-      GRANULARITY="$2"
-      shift 2
-      ;;
-    '--min_percentile')
-      MIN_PERCENTILE="$2"
-      shift 2
-      ;;
-    '--max_percentile')
-      MAX_PERCENTILE="$2"
-      shift 2
-      ;;
-    '--mode')
-      MODE="$2"
-      shift 2
-      ;;
-
-    '--input_path')
-      INPUT_PATH="$2"
-      shift 2
-      ;;
-    '--input_data')
-      INPUT_DATA="$2"
-      shift 2
-      ;;
-    '--output_path')
-      OUTPUT_PATH="$2"
-      shift 2
-      ;;
-
-    *)
-      echo "Unknown parameter: ${CUR}"
-      shift
-      ;;
-  esac
-done
-
-if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
-  echo "Error: input model not found"
-  echo ""
-  usage
-fi
-if [ -z ${INPUT_DATA} ] || [ ! -e ${INPUT_DATA} ]; then
-  echo "Error: input data not found"
-  echo ""
-  usage
-fi
-
-FILE_BASE=$(basename ${OUTPUT_PATH})
-MODEL_NAME="${FILE_BASE%.*}"
-
-TMPDIR=$(mktemp -d)
-trap "{ rm -rf $TMPDIR; }" EXIT
-
-# remove previous log
-rm -rf "${OUTPUT_PATH}.log"
-
-show_err_onexit()
-{
-  cat "${OUTPUT_PATH}.log"
-}
-
-trap show_err_onexit ERR
-
-# quantize circle
-echo "${DRIVER_PATH}/circle-quantizer" \
---quantize_dequantize_weights ${INPUT_DTYPE} ${QUANTIZED_DTYPE} ${GRANULARITY} \
-"${INPUT_PATH}" "${TMPDIR}/${MODEL_NAME}.1.circle" > "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/circle-quantizer" \
---quantize_dequantize_weights ${INPUT_DTYPE} ${QUANTIZED_DTYPE} ${GRANULARITY} \
-"${INPUT_PATH}" "${TMPDIR}/${MODEL_NAME}.1.circle" >> "${OUTPUT_PATH}.log" 2>&1
-
-echo " " >> "${OUTPUT_PATH}.log"
-echo "${DRIVER_PATH}/record-minmax" \
---input_model "${TMPDIR}/${MODEL_NAME}.1.circle" \
---input_data "${INPUT_DATA}" \
---min_percentile ${MIN_PERCENTILE} --max_percentile ${MAX_PERCENTILE} \
---mode "${MODE}" \
---output_model "${TMPDIR}/${MODEL_NAME}.2.circle" >> "${OUTPUT_PATH}.log" 2>&1
-echo " " >> "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/record-minmax" \
---input_model "${TMPDIR}/${MODEL_NAME}.1.circle" \
---input_data "${INPUT_DATA}" \
---min_percentile ${MIN_PERCENTILE} --max_percentile ${MAX_PERCENTILE} \
---mode "${MODE}" \
---output_model "${TMPDIR}/${MODEL_NAME}.2.circle" >> "${OUTPUT_PATH}.log" 2>&1
-
-echo " " >> "${OUTPUT_PATH}.log"
-echo "${DRIVER_PATH}/circle-quantizer" \
---quantize_with_minmax ${INPUT_DTYPE} ${QUANTIZED_DTYPE} ${GRANULARITY} \
-"${TMPDIR}/${MODEL_NAME}.2.circle" "${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
-echo " " >> "${OUTPUT_PATH}.log"
-
-"${DRIVER_PATH}/circle-quantizer" \
---quantize_with_minmax ${INPUT_DTYPE} ${QUANTIZED_DTYPE} ${GRANULARITY} \
-"${TMPDIR}/${MODEL_NAME}.2.circle" "${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
+import argparse
+import os
+import sys
+import tempfile
+import json
+
+import onelib.utils as oneutils
+from onelib.Command import Command
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to quantize circle model')
+
+    oneutils.add_default_arg(parser)
+
+    # input and output path.
+    parser.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input circle model')
+    parser.add_argument(
+        '-d',
+        '--input_data',
+        type=str,
+        help=
+        'full filepath of the input data used for post-training quantization. if not specified, run with random input data.'
+    )
+    parser.add_argument(
+        '-f',
+        '--input_data_format',
+        type=str,
+        help=
+        'file format of input data. h5/hdf5 (default), list/filelist (a text file where a file path of input data is written in each line), or dir/directory (a directory where input data are saved)'
+    )
+    parser.add_argument(
+        '-o',
+        '--output_path',
+        type=str,
+        help='full filepath of the output quantized model')
+
+    # argument for profiling
+    parser.add_argument(
+        '-p',
+        '--generate_profile_data',
+        action='store_true',
+        help='generate profiling data')
+
+    # save intermediate file(s)
+    parser.add_argument(
+        '--save_intermediate',
+        action='store_true',
+        help='Save intermediate files to output folder')
+
+    ## arguments for quantization
+    quantization_group = parser.add_argument_group('arguments for quantization')
+
+    quantization_group.add_argument(
+        '--input_dtype',
+        type=str,
+        help=
+        'input model data type (supported: float32, default=float32). Deprecated (Use input_model_dtype)'
+    )
+    quantization_group.add_argument(
+        '--input_model_dtype',
+        type=str,
+        help='input model data type (supported: float32, default=float32)')
+    quantization_group.add_argument(
+        '--quantized_dtype',
+        type=str,
+        help='data type of output quantized model (supported: uint8, int16, default=uint8)'
+    )
+    quantization_group.add_argument(
+        '--granularity',
+        type=str,
+        help='quantization granularity (supported: layer, channel, default=layer)')
+    quantization_group.add_argument(
+        '--input_type',
+        type=str,
+        help=
+        'data type of inputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
+    )
+    quantization_group.add_argument(
+        '--output_type',
+        type=str,
+        help=
+        'data type of outputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
+    )
+    quantization_group.add_argument(
+        '--min_percentile',
+        type=str,
+        help=
+        'minimum percentile (0.0~100.0, default=1.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.'
+    )
+    quantization_group.add_argument(
+        '--max_percentile',
+        type=str,
+        help=
+        'maximum percentile (0.0~100.0, default=99.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.'
+    )
+    quantization_group.add_argument(
+        '--moving_avg_batch',
+        type=str,
+        help=
+        'batch size of moving average (default=16). This is valid when calibration algorithm is moving_average.'
+    )
+    quantization_group.add_argument(
+        '--moving_avg_const',
+        type=str,
+        help=
+        'hyperparameter (C) to compute moving average (default=0.1). Update equation: avg <- avg + C * (curr_batch_avg - avg). This is valid when calibration algorithm is moving_average.'
+    )
+    quantization_group.add_argument(
+        '--mode',
+        type=str,
+        help=
+        "calibration algorithm for post-training quantization (supported: percentile/moving_average, default=percentile). 'percentile' mode uses the n-th percentiles as min/max values. 'moving_average' mode records the moving average of min/max."
+    )
+    quantization_group.add_argument(
+        '--TF-style_maxpool',
+        action='store_true',
+        help=
+        "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)"
+    )
+    quantization_group.add_argument(
+        '--quant_config', type=str, help="Path to the quantization configuration file.")
+    quantization_group.add_argument(
+        '--evaluate_result',
+        action='store_true',
+        help=
+        "Evaluate accuracy of quantized model. Run inference for both fp32 model and the quantized model, and compare the inference results."
+    )
+    quantization_group.add_argument(
+        '--test_data', type=str, help="Path to the test data used for evaluation.")
+    quantization_group.add_argument(
+        '--print_mae',
+        action='store_true',
+        help=
+        "Print MAE (Mean Absolute Error) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mape',
+        action='store_true',
+        help=
+        "Print MAPE (Mean Absolute Percentage Error) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mpeir',
+        action='store_true',
+        help=
+        "Print MPEIR (Mean Peak Error to Interval Ratio) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_top1_match',
+        action='store_true',
+        help=
+        "Print Top-1 match ratio of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_top5_match',
+        action='store_true',
+        help=
+        "Print Top-5 match ratio of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mse',
+        action='store_true',
+        help=
+        "Print MSE (Mean Squared Error) of inference results between quantized model and fp32 model."
+    )
+
+    # arguments for force_quantparam option
+    force_quantparam_group = parser.add_argument_group(
+        'arguments for force_quantparam option')
+
+    force_quantparam_group.add_argument(
+        '--force_quantparam',
+        action='store_true',
+        help=
+        'overwrite quantparam (scale, zero_point) to the specified tensor in the quantized model.'
+    )
+    force_quantparam_group.add_argument(
+        '--tensor_name', type=str, action='append', help='tensor name (string)')
+    force_quantparam_group.add_argument(
+        '--scale', type=float, action='append', help='scale (float)')
+    force_quantparam_group.add_argument(
+        '--zero_point', type=int, action='append', help='zero point (int)')
+
+    # arguments for copy_quantparam option
+    copy_quantparam_group = parser.add_argument_group(
+        'arguments for copy_quantparam option')
+
+    copy_quantparam_group.add_argument(
+        '--copy_quantparam',
+        action='store_true',
+        help='copy quantparam (scale, zero_point) of a tensor to another tensor.')
+    copy_quantparam_group.add_argument(
+        '--src_tensor_name', type=str, action='append', help='tensor name (string)')
+    copy_quantparam_group.add_argument(
+        '--dst_tensor_name', type=str, action='append', help='tensor name (string)')
+
+    # arguments for fake_quant option
+    fake_quant_group = parser.add_argument_group('arguments for fake_quantize option')
+
+    fake_quant_group.add_argument(
+        '--fake_quantize',
+        action='store_true',
+        help='convert quantized model to fake-quantized fp32 model.')
+
+    # arguments for requantize option
+    requantize_group = parser.add_argument_group('arguments for requantize option')
+
+    requantize_group.add_argument(
+        '--requantize',
+        action='store_true',
+        help='convert quantized model to another-typed quantized model (ex: int8 -> uin8).'
+    )
+
+    # arguments for ampq option
+    ampq_quant_group = parser.add_argument_group('arguments for ampq option')
+    # ampq
+    ampq_quant_group.add_argument(
+        '--ampq', action='store_true', help='quantize model using ampq solver.')
+
+    # ampq_qerror_ratio
+    ampq_quant_group.add_argument(
+        '--ampq_qerror_ratio', type=str, help='quantization error ratio ([0, 1])')
+
+    # ampq_algorithm
+    ampq_quant_group.add_argument(
+        '--ampq_algorithm', type=str, help='type of algorithm (bisection)')
+
+    ampq_quant_group.add_argument(
+        '--bisection_type', type=str, help="one of 'auto', 'i16_front', 'i16_back'")
+
+    # ampq_bisection_visq
+    ampq_quant_group.add_argument(
+        '--ampq_bisection_visq',
+        type=str,
+        help='.visq.json file path with quantization errors')
+
+    return parser
+
+
+def _set_default_values(args):
+    if not oneutils.is_valid_attr(args,
+                                  'input_model_dtype') and not oneutils.is_valid_attr(
+                                      args, 'input_dtype'):
+        setattr(args, 'input_model_dtype', 'float32')
+    if not oneutils.is_valid_attr(args, 'quantized_dtype'):
+        setattr(args, 'quantized_dtype', 'uint8')
+        if oneutils.is_valid_attr(args, 'quant_config'):
+            # Get quantized_dtype from qconfig file
+            try:
+                with open(getattr(args, 'quant_config')) as f:
+                    qconf = json.load(f)
+                    if 'default_quantization_dtype' in qconf:
+                        setattr(args, 'quantized_dtype',
+                                qconf['default_quantization_dtype'])
+            except json.decoder.JSONDecodeError:
+                print('Failed to decode ' + getattr(args, 'quant_config') +
+                      '. Please check it is a json file.')
+    if not oneutils.is_valid_attr(args, 'granularity'):
+        setattr(args, 'granularity', 'layer')
+        if oneutils.is_valid_attr(args, 'quant_config'):
+            # Get granularity from qconfig file
+            try:
+                with open(getattr(args, 'quant_config')) as f:
+                    qconf = json.load(f)
+                    if 'default_granularity' in qconf:
+                        setattr(args, 'granularity', qconf['default_granularity'])
+            except json.decoder.JSONDecodeError:
+                print('Failed to decode ' + getattr(args, 'quant_config') +
+                      '. Please check it is a json file.')
+    if not oneutils.is_valid_attr(args, 'mode'):
+        setattr(args, 'mode', 'percentile')
+    if not oneutils.is_valid_attr(args, 'min_percentile'):
+        setattr(args, 'min_percentile', '1.0')
+    if not oneutils.is_valid_attr(args, 'max_percentile'):
+        setattr(args, 'max_percentile', '99.0')
+    if not oneutils.is_valid_attr(args, 'moving_avg_batch'):
+        setattr(args, 'moving_avg_batch', '16')
+    if not oneutils.is_valid_attr(args, 'moving_avg_const'):
+        setattr(args, 'moving_avg_const', '0.1')
+    if not oneutils.is_valid_attr(args, 'ampq_algorithm'):
+        setattr(args, 'ampq_algorithm', 'bisection')
+    if not oneutils.is_valid_attr(args, 'bisection_type'):
+        setattr(args, 'bisection_type', 'auto')
+
+
+def _verify_arg_pre(parser, args):
+    """verify given arguments before default values are set"""
+    # check if required arguments is given
+    missing = []
+    if oneutils.is_valid_attr(args, 'requantize'):
+        if not oneutils.is_valid_attr(args,
+                                      'input_model_dtype') and not oneutils.is_valid_attr(
+                                          args, 'input_dtype'):
+            missing.append('--input_model_dtype')
+        if not oneutils.is_valid_attr(args, 'quantized_dtype'):
+            missing.append('--quantized_dtype')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not oneutils.is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not oneutils.is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if oneutils.is_valid_attr(args, 'force_quantparam'):
+        if not oneutils.is_valid_attr(args, 'tensor_name'):
+            missing.append('--tensor_name')
+        if not oneutils.is_valid_attr(args, 'scale'):
+            missing.append('--scale')
+        if not oneutils.is_valid_attr(args, 'zero_point'):
+            missing.append('--zero_point')
+    if oneutils.is_valid_attr(args, 'copy_quantparam'):
+        if not oneutils.is_valid_attr(args, 'src_tensor_name'):
+            missing.append('--src_tensor_name')
+        if not oneutils.is_valid_attr(args, 'dst_tensor_name'):
+            missing.append('--dst_tensor_name')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+    if oneutils.is_valid_attr(args, 'force_quantparam'):
+        tensors = getattr(args, 'tensor_name')
+        scales = getattr(args, 'scale')
+        zerops = getattr(args, 'zero_point')
+        if len(tensors) != len(scales) or len(tensors) != len(zerops):
+            parser.error(
+                'The same number of tensor_name, scale, and zero_point should be given.')
+    if oneutils.is_valid_attr(args, 'copy_quantparam'):
+        src_tensors = getattr(args, 'src_tensor_name')
+        dst_tensors = getattr(args, 'dst_tensor_name')
+        if len(src_tensors) != len(dst_tensors):
+            parser.error(
+                'The same number of src_tensor_name and dst_tensor_name should be given.')
+
+    # Check calibration parameters
+    if oneutils.is_valid_attr(args, 'mode'):
+        if getattr(args, 'mode') == 'percentile':
+            # Check dtype
+            try:
+                min_percentile = float(getattr(args, 'min_percentile'))
+            except ValueError:
+                parser.error('min_percentile must be float')
+            try:
+                max_percentile = float(getattr(args, 'max_percentile'))
+            except ValueError:
+                parser.error('max_percentile must be float')
+        elif getattr(args, 'mode') == 'moving_average':
+            # Check dtype
+            try:
+                moving_avg_batch = int(getattr(args, 'moving_avg_batch'))
+            except ValueError:
+                parser.error('moving_avg_batch must be integer')
+            try:
+                moving_avg_const = float(getattr(args, 'moving_avg_const'))
+            except ValueError:
+                parser.error('moving_avg_const must be float')
+        else:
+            parser.error('Unsupported mode')
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _quantize(args):
+    if oneutils.is_valid_attr(args, 'ampq'):
+        _ampq_solve(args)
+        return
+
+    if oneutils.is_valid_attr(args, 'force_quantparam'):
+        # write quantization parameters
+        _write_qparam(args)
+        return
+
+    if oneutils.is_valid_attr(args, 'copy_quantparam'):
+        # copy quantization parameters
+        _copy_qparam(args)
+        return
+
+    if oneutils.is_valid_attr(args, 'fake_quantize'):
+        # fake-quantize model
+        _fake_quantize(args)
+        return
+
+    if oneutils.is_valid_attr(args, 'requantize'):
+        # requantize model
+        _requantize(args)
+        return
+
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
+            tmpdir = os.path.dirname(logfile_path)
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+        record_minmax_path = os.path.join(dir_path, 'record-minmax')
+
+        ## make a command to quantize and dequantize the weights of the model
+        circle_quantizer_cmd = [circle_quantizer_path]
+        # verbose
+        if oneutils.is_valid_attr(args, 'verbose'):
+            circle_quantizer_cmd.append('--verbose')
+        # quantize_dequantize_weights
+        circle_quantizer_cmd.append('--quantize_dequantize_weights')
+        # Use input_model_dtype if it exists. Use input_dtype otherwise.
+        if oneutils.is_valid_attr(args, 'input_model_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
+        elif oneutils.is_valid_attr(args, 'input_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
+        if oneutils.is_valid_attr(args, 'quantized_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
+        if oneutils.is_valid_attr(args, 'granularity'):
+            circle_quantizer_cmd.append(getattr(args, 'granularity'))
+        if oneutils.is_valid_attr(args, 'quant_config'):
+            # NOTE --config conflicts with --config option in onecc, so
+            # we use quant_config for one-quantize
+            circle_quantizer_cmd.append('--config')
+            circle_quantizer_cmd.append(getattr(args, 'quant_config'))
+        # input and output path
+        if oneutils.is_valid_attr(args, 'input_path'):
+            circle_quantizer_cmd.append(getattr(args, 'input_path'))
+        tmp_weights_fake_quant_path = os.path.join(
+            tmpdir,
+            os.path.splitext(os.path.basename(
+                args.input_path))[0]) + '.weights_fake_quant.circle'
+        circle_quantizer_cmd.append(tmp_weights_fake_quant_path)
+        # profiling
+        if oneutils.is_valid_attr(args, 'generate_profile_data'):
+            circle_quantizer_cmd.append('--generate_profile_data')
+
+        f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+        # run circle-quantizer
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+        tmp_minmax_recorded_path = os.path.join(
+            tmpdir,
+            os.path.splitext(os.path.basename(
+                args.input_path))[0]) + '.minmax_recorded.circle'
+
+        ## make a command to record min-max value of each tensor while running the representative dataset
+        record_minmax_cmd = Command(record_minmax_path, args, f)
+        record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+            .add_option_with_values('--input_model', [tmp_weights_fake_quant_path]) \
+            .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \
+            .add_option_with_valid_args('--input_data', ['input_data']) \
+            .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+            .add_option_with_valid_args('--min_percentile', ['min_percentile']) \
+            .add_option_with_valid_args('--max_percentile', ['max_percentile']) \
+            .add_option_with_valid_args('--moving_avg_batch', ['moving_avg_batch']) \
+            .add_option_with_valid_args('--moving_avg_const', ['moving_avg_const']) \
+            .add_option_with_valid_args('--mode', ['mode']) \
+            .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \
+            .run()
+
+        ## make a second command to quantize the model using the embedded information
+        circle_quantizer_cmd = [circle_quantizer_path]
+        # verbose
+        if oneutils.is_valid_attr(args, 'verbose'):
+            circle_quantizer_cmd.append('--verbose')
+        # quantize_dequantize_weights
+        circle_quantizer_cmd.append('--quantize_with_minmax')
+        # Use input_model_dtype if it exists. Use input_dtype otherwise.
+        if oneutils.is_valid_attr(args, 'input_model_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
+        elif oneutils.is_valid_attr(args, 'input_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
+        if oneutils.is_valid_attr(args, 'quantized_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
+        if oneutils.is_valid_attr(args, 'granularity'):
+            circle_quantizer_cmd.append(getattr(args, 'granularity'))
+        if oneutils.is_valid_attr(args, 'TF-style_maxpool'):
+            circle_quantizer_cmd.append('--TF-style_maxpool')
+        if oneutils.is_valid_attr(args, 'input_type'):
+            circle_quantizer_cmd.append('--input_type')
+            circle_quantizer_cmd.append(getattr(args, 'input_type'))
+        if oneutils.is_valid_attr(args, 'output_type'):
+            circle_quantizer_cmd.append('--output_type')
+            circle_quantizer_cmd.append(getattr(args, 'output_type'))
+        if oneutils.is_valid_attr(args, 'quant_config'):
+            # NOTE --config conflicts with --config option in onecc, so
+            # we use quant_config for one-quantize
+            circle_quantizer_cmd.append('--config')
+            circle_quantizer_cmd.append(getattr(args, 'quant_config'))
+        # input and output path
+        circle_quantizer_cmd.append(tmp_minmax_recorded_path)
+        if oneutils.is_valid_attr(args, 'output_path'):
+            circle_quantizer_cmd.append(getattr(args, 'output_path'))
+        # profiling
+        if oneutils.is_valid_attr(args, 'generate_profile_data'):
+            circle_quantizer_cmd.append('--generate_profile_data')
+
+        f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+        # run circle-quantizer
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+        # evaluate
+        if oneutils.is_valid_attr(args, 'evaluate_result'):
+            circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff')
+            quant_model = ""
+            if oneutils.is_valid_attr(args, 'output_path'):
+                quant_model = getattr(args, 'output_path')
+            tmp_fake_quant_model = os.path.join(
+                tmpdir,
+                os.path.splitext(os.path.basename(
+                    args.input_path))[0]) + '.fake_quant.circle'
+
+            # do fake quantization
+            fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+            fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+                .add_option_with_values('--fake_quantize', [quant_model, tmp_fake_quant_model]) \
+                .run()
+
+            # compare fake-quant model and fp32 model
+            circle_eval_diff_cmd = Command(circle_eval_diff_path, args, f)
+            circle_eval_diff_cmd.add_option_with_valid_args('--first_model', ['input_path']) \
+                .add_option_with_values('--second_model', [tmp_fake_quant_model]) \
+                .add_option_with_valid_args('--first_input_data', ['test_data']) \
+                .add_option_with_valid_args('--second_input_data', ['test_data']) \
+                .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+                .add_noarg_option_if_valid_arg('--print_mae', 'print_mae') \
+                .add_noarg_option_if_valid_arg('--print_mape', 'print_mape') \
+                .add_noarg_option_if_valid_arg('--print_mpeir', 'print_mpeir') \
+                .add_noarg_option_if_valid_arg('--print_top1_match', 'print_top1_match') \
+                .add_noarg_option_if_valid_arg('--print_top5_match', 'print_top5_match') \
+                .add_noarg_option_if_valid_arg('--print_mse', 'print_mse') \
+                .run()
+
+
+def _write_qparam(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+
+        # make a command to write qparams to the tensors
+        circle_quantizer_cmd = [circle_quantizer_path]
+        # verbose
+        if oneutils.is_valid_attr(args, 'verbose'):
+            circle_quantizer_cmd.append('--verbose')
+        if oneutils.is_valid_attr(args, 'tensor_name'):
+            tensor_name = getattr(args, 'tensor_name')
+        if oneutils.is_valid_attr(args, 'scale'):
+            scale = getattr(args, 'scale')
+        if oneutils.is_valid_attr(args, 'zero_point'):
+            zero_point = getattr(args, 'zero_point')
+        for (t, s, zp) in zip(tensor_name, scale, zero_point):
+            circle_quantizer_cmd.append('--force_quantparam')
+            circle_quantizer_cmd.append(t)
+            circle_quantizer_cmd.append(str(s))
+            circle_quantizer_cmd.append(str(zp))
+        # input and output path
+        if oneutils.is_valid_attr(args, 'input_path'):
+            circle_quantizer_cmd.append(getattr(args, 'input_path'))
+        if oneutils.is_valid_attr(args, 'output_path'):
+            circle_quantizer_cmd.append(getattr(args, 'output_path'))
+
+        f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+        # run circle-quantizer
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+
+def _copy_qparam(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+
+        # make a command to write qparams to the tensors
+        circle_quantizer_cmd = [circle_quantizer_path]
+        # verbose
+        if oneutils.is_valid_attr(args, 'verbose'):
+            circle_quantizer_cmd.append('--verbose')
+        if oneutils.is_valid_attr(args, 'src_tensor_name'):
+            src_tensor_name = getattr(args, 'src_tensor_name')
+        if oneutils.is_valid_attr(args, 'dst_tensor_name'):
+            dst_tensor_name = getattr(args, 'dst_tensor_name')
+        for (src, dst) in zip(src_tensor_name, dst_tensor_name):
+            circle_quantizer_cmd.append('--copy_quantparam')
+            circle_quantizer_cmd.append(src)
+            circle_quantizer_cmd.append(dst)
+        # input and output path
+        if oneutils.is_valid_attr(args, 'input_path'):
+            circle_quantizer_cmd.append(getattr(args, 'input_path'))
+        if oneutils.is_valid_attr(args, 'output_path'):
+            circle_quantizer_cmd.append(getattr(args, 'output_path'))
+
+        f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+        # run circle-quantizer
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+
+def _fake_quantize(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+        q_model = getattr(args, 'input_path')
+        fq_model = getattr(args, 'output_path')
+
+        # do fake quantization
+        fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+        fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+            .add_option_with_values('--fake_quantize', [q_model, fq_model]) \
+            .run()
+
+
+def _ampq_solve(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
+            tmpdir = os.path.dirname(logfile_path)
+
+        # get driver path
+        record_minmax_path = os.path.join(dir_path, 'record-minmax')
+
+        tmp_minmax_recorded_path = os.path.join(
+            tmpdir,
+            os.path.splitext(os.path.basename(
+                args.input_path))[0]) + '.minmax_recorded.circle'
+
+        ## make a command to record min-max value of each tensor while running the representative dataset
+        record_minmax_cmd = Command(record_minmax_path, args, f)
+        record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+            .add_option_with_valid_args('--input_model', ['input_path']) \
+            .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \
+            .add_option_with_valid_args('--input_data', ['input_data']) \
+            .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+            .add_option_with_valid_args('--min_percentile', ['min_percentile']) \
+            .add_option_with_valid_args('--max_percentile', ['max_percentile']) \
+            .add_option_with_valid_args('--moving_avg_batch', ['moving_avg_batch']) \
+            .add_option_with_valid_args('--moving_avg_const', ['moving_avg_const']) \
+            .add_option_with_valid_args('--mode', ['mode']) \
+            .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \
+            .run()
+
+        # process visq if needed
+        visq_file = None
+        if oneutils.is_valid_attr(args, 'ampq_bisection_visq'):
+            visq_file = getattr(args, 'ampq_bisection_visq')
+
+        if (oneutils.is_valid_attr(args, 'ampq_algorithm')
+                and oneutils.is_valid_attr(args, 'bisection_type')):
+            algorithm = getattr(args, 'ampq_algorithm')
+            bisection_type = getattr(args, 'bisection_type')
+            if algorithm == 'bisection' and bisection_type == 'auto' and visq_file is None:
+                # algorithm needs bisection but no file in input configuration
+
+                # to compute visq file we need q8 quantized model
+                q8_file = os.path.join(
+                    tmpdir,
+                    os.path.splitext(os.path.basename(
+                        args.input_path))[0]) + '.visq.q8.circle'
+
+                # get drievr path
+                circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+                circle_quantizer_cmd = [circle_quantizer_path]
+                # verbose
+                if oneutils.is_valid_attr(args, 'verbose'):
+                    circle_quantizer_cmd.append('--verbose')
+                circle_quantizer_cmd.append('--quantize_with_minmax')
+                circle_quantizer_cmd.append('float32')
+                circle_quantizer_cmd.append('uint8')
+                circle_quantizer_cmd.append('channel')
+
+                if oneutils.is_valid_attr(args, 'TF-style_maxpool'):
+                    circle_quantizer_cmd.append('--TF-style_maxpool')
+
+                circle_quantizer_cmd.extend(['--input_type', 'uint8'])
+                circle_quantizer_cmd.extend(['--output_type', 'uint8'])
+
+                # input and output paths
+                circle_quantizer_cmd.append(tmp_minmax_recorded_path)
+                circle_quantizer_cmd.append(q8_file)
+
+                f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+                # run circle-quantizer
+                oneutils.run(
+                    circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+                # compute visq file
+                visq_path = os.path.join(dir_path, 'visq')
+
+                visq_file = os.path.join(
+                    tmpdir,
+                    os.path.splitext(os.path.basename(
+                        args.input_path))[0]) + '.tae.visq.json'
+
+                visq_cmd = [visq_path]
+                visq_cmd.extend(['--fp32_circle', getattr(args, 'input_path')])
+                visq_cmd.extend(['--data', getattr(args, 'input_data')])
+                visq_cmd.extend(['--q_circle', q8_file])
+                visq_cmd.extend(['--tae_output', visq_file])
+                visq_cmd.extend(['--batch_size', "1"])
+                visq_cmd.append('--dump_dot_graph')
+                f.write((' '.join(visq_cmd) + '\n').encode())
+
+                # run visq
+                oneutils.run(visq_cmd, err_prefix="visq", logfile=f)
+
+        # get driver path
+        circle_mpqsolver_path = os.path.join(dir_path, 'circle-mpqsolver')
+
+        # solve for Mixed Precision Quantization configuration
+        ampq_quantize_cmd = [circle_mpqsolver_path]
+
+        # data
+        if oneutils.is_valid_attr(args, 'input_data'):
+            ampq_quantize_cmd.extend(['--data', getattr(args, 'input_data')])
+
+        # data format
+        if oneutils.is_valid_attr(args, 'input_data_format'):
+            ampq_quantize_cmd.extend(
+                ['--data_format', getattr(args, 'input_data_format')])
+
+        # qerror_ratio
+        if oneutils.is_valid_attr(args, 'ampq_qerror_ratio'):
+            ampq_quantize_cmd.extend(
+                ['--qerror_ratio', getattr(args, 'ampq_qerror_ratio')])
+
+        # algorithm
+        if oneutils.is_valid_attr(args, 'ampq_algorithm'):
+            algorithm = getattr(args, 'ampq_algorithm')
+            if algorithm == 'bisection':
+                if oneutils.is_valid_attr(args, 'bisection_type'):
+                    bisection_type = getattr(args, 'bisection_type')
+                    if bisection_type == 'auto':
+                        ampq_quantize_cmd.extend(['--bisection', 'auto'])
+                    elif bisection_type == 'i16_front':
+                        ampq_quantize_cmd.extend(['--bisection', 'true'])
+                    elif bisection_type == 'i16_back':
+                        ampq_quantize_cmd.extend(['--bisection', 'false'])
+
+        # recorded model as input
+        ampq_quantize_cmd.extend(['--input_model', tmp_minmax_recorded_path])
+
+        # input_dtype
+        if oneutils.is_valid_attr(args, 'input_type'):
+            ampq_quantize_cmd.extend(['--input_dtype', getattr(args, 'input_type')])
+
+        # output dtype
+        if oneutils.is_valid_attr(args, 'output_type'):
+            ampq_quantize_cmd.extend(['--output_dtype', getattr(args, 'output_type')])
+
+        # output model
+        if oneutils.is_valid_attr(args, 'output_path'):
+            ampq_quantize_cmd.extend(['--output_model', getattr(args, 'output_path')])
+
+        # visq_file
+        if not (visq_file is None):
+            ampq_quantize_cmd.extend(['--visq_file', visq_file])
+
+        # save_intermediate
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
+            intermediate_dir = os.path.dirname(logfile_path)
+            ampq_quantize_cmd.extend(['--save_intermediate', intermediate_dir])
+
+        if oneutils.is_valid_attr(args, 'verbose'):
+            ampq_quantize_cmd.append('--verbose')
+
+        f.write((' '.join(ampq_quantize_cmd) + '\n').encode())
+
+        # run ampq
+        oneutils.run(ampq_quantize_cmd, err_prefix="circle_mpqsolver", logfile=f)
+
+
+def _requantize(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+
+        ## make a command to quantize and dequantize the weights of the model
+        circle_quantizer_cmd = [circle_quantizer_path]
+        # verbose
+        if oneutils.is_valid_attr(args, 'verbose'):
+            circle_quantizer_cmd.append('--verbose')
+        # requantize
+        circle_quantizer_cmd.append('--requantize')
+        # Use input_model_dtype if it exists. Use input_dtype otherwise.
+        if oneutils.is_valid_attr(args, 'input_model_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
+        elif oneutils.is_valid_attr(args, 'input_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
+        if oneutils.is_valid_attr(args, 'quantized_dtype'):
+            circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
+        # input and output path
+        if oneutils.is_valid_attr(args, 'input_path'):
+            circle_quantizer_cmd.append(getattr(args, 'input_path'))
+        if oneutils.is_valid_attr(args, 'output_path'):
+            circle_quantizer_cmd.append(getattr(args, 'output_path'))
+
+        f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+        # run circle-quantizer
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    oneutils.parse_cfg(args.config, 'one-quantize', args)
+
+    # verify arguments before default value setting
+    _verify_arg_pre(parser, args)
+
+    # set default values
+    _set_default_values(args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # quantize
+    _quantize(args)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/onecc b/compiler/one-cmds/onecc
new file mode 100644
index 000000000..c7a76c535
--- /dev/null
+++ b/compiler/one-cmds/onecc
@@ -0,0 +1,217 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import configparser
+import os
+import subprocess
+import sys
+from types import SimpleNamespace
+
+from onelib.CfgRunner import CfgRunner
+from onelib.WorkflowRunner import WorkflowRunner
+import onelib.utils as oneutils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+subtool_list = {
+    'compile': {
+        'import': 'Convert given model to circle',
+        'optimize': 'Optimize circle model',
+        'quantize': 'Quantize circle model',
+    },
+    'package': {
+        'pack': 'Package circle and metadata into nnpackage',
+    },
+    'backend': {
+        'codegen': 'Code generation tool',
+        'profile': 'Profile backend model file',
+        'infer': 'Infer backend model file'
+    },
+}
+
+
+def _call_driver(driver_name, options):
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    driver_path = os.path.join(dir_path, driver_name)
+    cmd = [driver_path] + options
+    oneutils.run(cmd)
+
+
+def _check_subtool_exists():
+    """verify given arguments"""
+    subtool_keys = [n for k, v in subtool_list.items() for n in v.keys()]
+    if len(sys.argv) > 1 and sys.argv[1] in subtool_keys:
+        driver_name = 'one-' + sys.argv[1]
+        options = sys.argv[2:]
+        _call_driver(driver_name, options)
+        sys.exit(0)
+
+
+def _get_parser():
+    onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [-b BACKEND] [-W WORKFLOW] [-O OPTIMIZATION] [COMMAND <args>]'
+    onecc_desc = 'Run ONE driver via several commands or configuration file'
+    parser = argparse.ArgumentParser(description=onecc_desc, usage=onecc_usage)
+
+    oneutils.add_default_arg(parser)
+
+    opt_name_list = oneutils.get_optimization_list(get_name=True)
+    opt_name_list = ['-' + s for s in opt_name_list]
+    if not opt_name_list:
+        opt_help_message = '(No available optimization options)'
+    else:
+        opt_help_message = '(Available optimization options: ' + ', '.join(
+            opt_name_list) + ')'
+    opt_help_message = 'optimization name to use ' + opt_help_message
+    parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message)
+
+    parser.add_argument(
+        '-W', '--workflow', type=str, metavar='WORKFLOW', help='run with workflow file')
+
+    parser.add_argument(
+        '-b', '--backend', type=str, help='generate code for given backend')
+
+    # just for help message
+    compile_group = parser.add_argument_group('compile to circle model')
+    for tool, desc in subtool_list['compile'].items():
+        compile_group.add_argument(tool, action='store_true', help=desc)
+
+    package_group = parser.add_argument_group('package circle model')
+    for tool, desc in subtool_list['package'].items():
+        package_group.add_argument(tool, action='store_true', help=desc)
+
+    backend_group = parser.add_argument_group('run backend tools')
+    for tool, desc in subtool_list['backend'].items():
+        backend_group.add_argument(tool, action='store_true', help=desc)
+
+    return parser
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        oneutils.print_version_and_exit(__file__)
+
+    return args
+
+
+def _verify_backend_args(parser, args):
+    """
+    verify one-profile, one-codegen arguments
+
+    This verification logic comes from each drivers' codes.
+    """
+    cfgparser = configparser.ConfigParser()
+    cfgparser.optionxform = str
+    cfgparser.read(args.config)
+
+    for driver in ['one-profile', 'one-codegen']:
+        if not driver in cfgparser:
+            continue
+
+        cfg_args = SimpleNamespace()
+        oneutils.parse_cfg(args.config, driver, cfg_args)
+        cmd_backend_exist = oneutils.is_valid_attr(args, 'backend')
+        cfg_backend_exist = oneutils.is_valid_attr(cfg_args, 'backend')
+        cfg_backends_exist = oneutils.is_valid_attr(cfg_args, 'backends')
+
+        if cfg_backend_exist and cfg_backends_exist:
+            parser.error(
+                "'backend' option and 'backends' option cannot be used simultaneously.")
+
+        # Check if given backend from command line exists in the configuration file
+        if cmd_backend_exist and cfg_backend_exist:
+            if args.backend != cfg_args.backend:
+                parser.error('Not found the command of given backend')
+
+        if cfg_backends_exist:
+            cfg_backends = getattr(cfg_args, 'backends').split(',')
+            # check if commands of given backends exist
+            for b in cfg_backends:
+                if not oneutils.is_valid_attr(cfg_args, b):
+                    parser.error('Not found the command for ' + b)
+
+            # Check if given backend from command line exists in the configuration file
+            if cmd_backend_exist:
+                if args.backend not in cfg_backends:
+                    parser.error('Not found the command of given backend')
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    if not oneutils.is_valid_attr(args, 'config') and not oneutils.is_valid_attr(
+            args, 'workflow'):
+        parser.error('-C/--config or -W/--workflow argument is required')
+    # check if given optimization option exists
+    opt_name_list = oneutils.get_optimization_list(get_name=True)
+    opt_name_list = [oneutils.remove_prefix(s, 'O') for s in opt_name_list]
+    if oneutils.is_valid_attr(args, 'O'):
+        if ' ' in getattr(args, 'O'):
+            parser.error('Not allowed to have space in the optimization name')
+        if not getattr(args, 'O') in opt_name_list:
+            parser.error('Invalid optimization option')
+
+    if oneutils.is_valid_attr(args, 'backend') and oneutils.is_valid_attr(
+            args, 'workflow'):
+        parser.error('\'backend\' option can be used only with \'config\' option')
+
+    if oneutils.is_valid_attr(args, 'backend'):
+        _verify_backend_args(parser, args)
+
+
+def main():
+    # check if there is subtool argument
+    # if true, it executes subtool with argv
+    # NOTE:
+    # Why call subtool directly without using Argparse?
+    # Because if Argparse is used, options equivalent to onecc including
+    # '--help', '-C' are processed directly onecc itself.
+    # So options cannot be delivered to subtool.
+    _check_subtool_exists()
+
+    # parse arguments
+    # since the configuration file path is required first,
+    # parsing of the configuration file proceeds after this.
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    bin_dir = os.path.dirname(os.path.realpath(__file__))
+    if oneutils.is_valid_attr(args, 'config'):
+        runner = CfgRunner(args.config)
+        runner.detect_import_drivers(bin_dir)
+        if oneutils.is_valid_attr(args, 'O'):
+            runner.add_opt(getattr(args, 'O'))
+        if oneutils.is_valid_attr(args, 'backend'):
+            runner.set_backend(args.backend)
+        runner.run(bin_dir)
+    elif oneutils.is_valid_attr(args, 'workflow'):
+        runner = WorkflowRunner(args.workflow)
+        runner.run(bin_dir)
+
+
+if __name__ == '__main__':
+    oneutils.safemain(main, __file__)
diff --git a/compiler/one-cmds/onecc.template.cfg b/compiler/one-cmds/onecc.template.cfg
new file mode 100644
index 000000000..c9968b4ae
--- /dev/null
+++ b/compiler/one-cmds/onecc.template.cfg
@@ -0,0 +1,152 @@
+; set environment variables
+[Environment]
+ONECC_ENV="ONECC"
+
+; To activate a step (or task),
+; set True for the step in [onecc] section and fill options in the corresponding section
+[onecc]
+; neural network model to circle
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+; circle to circle with optimization
+one-optimize=False
+; circle to circle with quantization
+one-quantize=False
+; partition circle
+one-partition=False
+; package circle and metadata into nnpackage
+one-pack=False
+; generate code for backend
+one-codegen=False
+; profile
+one-profile=False
+; infer
+one-infer=False
+; group option
+; multiple group options are allowed
+include=O1
+# include=O1 O2 OMY_OPT
+
+[one-import-tf]
+# mandatory
+; pb file
+input_path=
+; circle file
+output_path=
+# optional
+; v1 or v2
+converter_version=v2
+; graph_def(default), saved_model or keras_model
+model_format=graph_def
+# optional but mandatory for model_format=graph_def
+; input tensor names of the input arrays, comma-separated
+input_arrays=
+; output tensor names of the input arrays, comma-separated
+output_arrays=
+; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3)
+input_shapes=
+
+[one-import-tflite]
+# mandatory
+; tflite file
+input_path=
+; circle file
+output_path=
+
+[one-import-bcq]
+# mandatory
+; bcq file
+input_path=
+; circle file
+output_path=
+# optional
+; v1 or v2
+converter_version=v2
+; graph_def(default), saved_model or keras_model
+model_format=graph_def
+# optional but mandatory for model_format=graph_def
+; input tensor names of the input arrays, comma-separated
+input_arrays=
+; output tensor names of the input arrays, comma-separated
+output_arrays=
+; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3)
+input_shapes=
+
+[one-import-onnx]
+# mandatory
+; onnx file
+input_path=
+; circle file
+output_path=
+# optional
+; True or False
+unroll_rnn=
+; True or False
+unroll_lstm=
+
+[one-optimize]
+# mandatory
+; circle file
+input_path=
+; circle file
+output_path=
+# //TODO: Add available options
+
+[one-quantize]
+# mandatory
+; circle file
+input_path=
+; circle file
+output_path=
+# optional arguments for quantization
+; input data file (if not given, random data will be used for calibration)
+input_data=
+; h5/hdf5(default), list/filelist, or dir/directory
+input_data_format=
+; dtype of quantized model (uint8(default), int16)
+quantized_dtype=
+; granularity of quantization (layer(default), channel)
+granularity=
+; dtype of model's input (uint8, int16, float32). Same with quantized_dtype by default.
+input_type=
+; dtype of model's output (uint8, int16, float32). Same with quantized_dtype by default.
+output_type=
+
+[one-partition]
+# mandatory
+; partition file which provides backend to assign
+part_file=
+; circle file
+input_file=
+# //TODO: Add available options
+
+[one-pack]
+# mandatory
+; input path
+input_path=
+; output path
+output_path=
+# //TODO: Add available options
+
+[one-codegen]
+# mandatory
+; backend name
+backend=
+; commands for each backend
+command=
+
+[one-profile]
+# mandatory
+; backend name
+backend=
+# //TODO: Add available options
+
+[one-infer]
+# mandatory (mutually exclusive)
+; backend name
+backend=
+; driver name
+driver=
+# //TODO: Add available options
diff --git a/compiler/one-cmds/onelib/CfgRunner.py b/compiler/one-cmds/onelib/CfgRunner.py
new file mode 100644
index 000000000..3a7b5fcbd
--- /dev/null
+++ b/compiler/one-cmds/onelib/CfgRunner.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import configparser
+import os
+import warnings
+
+import onelib.utils as oneutils
+
+
+def _simple_warning(message, category, filename, lineno, file=None, line=None):
+    return f'{category.__name__}: {message}\n'
+
+
+class CfgRunner:
+    driver_sequence = [
+        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile',
+        'one-partition', 'one-infer'
+    ]
+
+    def __init__(self, path):
+        self.path = path
+        self.optparser = None
+        self.cfgparser = configparser.ConfigParser()
+        # make option names case sensitive
+        self.cfgparser.optionxform = str
+        parsed = self.cfgparser.read(os.path.expanduser(path))
+        if not parsed:
+            raise FileNotFoundError('Not found given configuration file')
+
+        self._verify_cfg(self.cfgparser)
+        # default import drivers
+        self.import_drivers = [
+            'one-import-bcq', 'one-import-onnx', 'one-import-tf', 'one-import-tflite'
+        ]
+        # parse group option
+        GROUP_OPTION_KEY = 'include'
+        if self.cfgparser.has_option('onecc', GROUP_OPTION_KEY):
+            groups = self.cfgparser['onecc'][GROUP_OPTION_KEY].split()
+            for o in groups:
+                if o == 'O' or not o.startswith('O'):
+                    raise ValueError('Invalid group option')
+                # add_opt receives group name except first 'O'
+                self.add_opt(o[1:])
+
+        self.backend = None
+
+    def _verify_cfg(self, cfgparser):
+        if not cfgparser.has_section('onecc'):
+            if cfgparser.has_section('one-build'):
+                warnings.formatwarning = _simple_warning
+                warnings.warn(
+                    "[one-build] section will be deprecated. Please use [onecc] section.")
+            else:
+                raise ImportError('[onecc] section is required in configuration file')
+
+    def _is_available(self, driver):
+        # if there's no `onecc` section, it will find `one-build` section because of backward compatibility
+        return (self.cfgparser.has_option('onecc', driver) and self.cfgparser.getboolean(
+            'onecc', driver)) or (self.cfgparser.has_option('one-build', driver)
+                                  and self.cfgparser.getboolean('one-build', driver))
+
+    def add_opt(self, opt):
+        self.optparser = configparser.ConfigParser()
+        # make option names case sensitive
+        self.optparser.optionxform = str
+        opt_book = dict(
+            zip(oneutils.get_optimization_list(get_name=True),
+                oneutils.get_optimization_list()))
+        parsed = self.optparser.read(opt_book['O' + opt])
+        if not parsed:
+            raise FileNotFoundError('Not found given optimization configuration file')
+        if len(self.optparser.sections()) != 1 or self.optparser.sections(
+        )[0] != 'one-optimize':
+            raise AssertionError(
+                'Optimization configuration file only allowed to have a \'one-optimize\' section'
+            )
+        self.opt = opt
+
+    def set_backend(self, backend: str):
+        self.backend = backend
+
+    def detect_import_drivers(self, dir):
+        self.import_drivers = list(oneutils.detect_one_import_drivers(dir).keys())
+
+    def run(self, working_dir, verbose=False):
+        # set environment
+        CFG_ENV_SECTION = 'Environment'
+        if self.cfgparser.has_section(CFG_ENV_SECTION):
+            for key in self.cfgparser[CFG_ENV_SECTION]:
+                os.environ[key] = self.cfgparser[CFG_ENV_SECTION][key]
+
+        section_to_run = []
+        for d in self.import_drivers + self.driver_sequence:
+            if self._is_available(d):
+                section_to_run.append(d)
+
+        for section in section_to_run:
+            options = ['--config', self.path, '--section', section]
+            if section == 'one-optimize' and self.optparser:
+                options += ['-O', self.opt]
+            if verbose:
+                options.append('--verbose')
+            if (section == 'one-codegen' or section == 'one-profile') and self.backend:
+                options += ['-b', self.backend]
+            driver_path = os.path.join(working_dir, section)
+            cmd = [driver_path] + options
+            oneutils.run(cmd)
diff --git a/compiler/one-cmds/onelib/Command.py b/compiler/one-cmds/onelib/Command.py
new file mode 100644
index 000000000..35a9567b7
--- /dev/null
+++ b/compiler/one-cmds/onelib/Command.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onelib.utils as oneutils
+
+
+class Command:
+    def __init__(self, driver, args, log_file):
+        self.cmd = [driver]
+        self.driver = driver
+        self.args = args
+        self.log_file = log_file
+
+    # Add option if attrs are valid
+    # Option values are collected from self.args
+    def add_option_with_valid_args(self, option, attrs):
+        for attr in attrs:
+            if not oneutils.is_valid_attr(self.args, attr):
+                return self
+        self.cmd.append(option)
+        for attr in attrs:
+            self.cmd.append(getattr(self.args, attr))
+        return self
+
+    # Add option and values without any condition
+    def add_option_with_values(self, option, values):
+        self.cmd.append(option)
+        for value in values:
+            self.cmd.append(value)
+        return self
+
+    # Add option with no argument (ex: --verbose) if attr is valid
+    def add_noarg_option_if_valid_arg(self, option, attr):
+        if oneutils.is_valid_attr(self.args, attr):
+            self.cmd.append(option)
+        return self
+
+    # Run cmd and save logs
+    def run(self):
+        self.log_file.write((' '.join(self.cmd) + '\n').encode())
+        oneutils.run(self.cmd, err_prefix=self.driver, logfile=self.log_file)
diff --git a/compiler/one-cmds/onelib/OptionBuilder.py b/compiler/one-cmds/onelib/OptionBuilder.py
new file mode 100644
index 000000000..6a75783ad
--- /dev/null
+++ b/compiler/one-cmds/onelib/OptionBuilder.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from onelib.constant import CONSTANT
+
+
+class OptionBuilder:
+    def __init__(self, one_cmd_type):
+        self.type = one_cmd_type
+
+    def _build_default(self, commands):
+        options = []
+        for k, v in commands.items():
+            options.extend(['--' + k, v])
+        return options
+
+    def _build_with_unknown_command(self, commands):
+        COMMAND_K = 'command'
+        options = []
+        for k, v in commands.items():
+            if k == COMMAND_K:
+                continue
+            options.extend(['--' + k, v])
+        options.extend(['--'])
+        options.extend(commands[COMMAND_K].split())
+        return options
+
+    def _build_import(self, commands):
+        options = []
+        arg_0 = ['save_intermediate']
+        for k, v in commands.items():
+            if k in arg_0 and v == "True":
+                options.extend(['--' + k])
+                continue
+            options.extend(['--' + k, v])
+        return options
+
+    def _build_optimize(self, commands):
+        options = []
+        arg_0 = ['generate_profile_data']
+        arg_1 = ['input_path', 'output_path', 'change_outputs']
+        for k, v in commands.items():
+            if k in arg_1:
+                options.extend(['--' + k, v])
+                continue
+            if k in arg_0 and v == 'True':
+                options.extend(['--' + k])
+                continue
+            for opt in CONSTANT.OPTIMIZATION_OPTS:
+                if k == opt[0] and v == "True":
+                    options.extend(['--' + k])
+                    break
+        return options
+
+    def _build_quantize(self, commands):
+        options = []
+        arg_0 = [
+            'generate_profile_data', 'save_intermediate', 'TF-style_maxpool',
+            'evaluate_result', 'print_mae', 'print_mape', 'print_mpeir',
+            'print_top1_match', 'print_top5_match', 'force_quantparam', 'copy_quantparam'
+        ]
+        for k, v in commands.items():
+            if k in arg_0 and v == "True":
+                options.extend(['--' + k])
+                continue
+            options.extend(['--' + k, v])
+        return options
+
+    def build(self, commands):
+        cmd_book = dict.fromkeys(
+            ['one-import-bcq', 'one-import-tflite', 'one-pack', 'one-partition'],
+            self._build_default)
+        cmd_book['one-codegen'] = self._build_with_unknown_command
+        cmd_book['one-import-onnx'] = self._build_import
+        cmd_book['one-import-pytorch'] = self._build_import
+        cmd_book['one-import-tf'] = self._build_import
+        cmd_book['one-infer'] = self._build_with_unknown_command
+        cmd_book['one-optimize'] = self._build_optimize
+        cmd_book['one-profile'] = self._build_with_unknown_command
+        cmd_book['one-quantize'] = self._build_quantize
+
+        return cmd_book[self.type](commands)
diff --git a/compiler/one-cmds/onelib/TopologicalSortHelper.py b/compiler/one-cmds/onelib/TopologicalSortHelper.py
new file mode 100644
index 000000000..d05adea8d
--- /dev/null
+++ b/compiler/one-cmds/onelib/TopologicalSortHelper.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import defaultdict
+
+
+class TopologicalSortHelper:
+    def __init__(self, vertices):
+        self.graph = defaultdict(list)
+        self.vertices = vertices
+
+    def add_edge(self, u, v):
+        self.graph[u].append(v)
+
+    def sort_util(self, v, visited, stack):
+        visited[v] = True
+
+        for i in self.graph[v]:
+            if visited[i] == False:
+                self.sort_util(i, visited, stack)
+
+        stack.insert(0, v)
+
+    def sort(self):
+        visited = dict.fromkeys(self.vertices, False)
+        stack = []
+
+        for v in self.vertices:
+            if visited[v] == False:
+                self.sort_util(v, visited, stack)
+
+        return stack
diff --git a/compiler/one-cmds/onelib/WorkflowRunner.py b/compiler/one-cmds/onelib/WorkflowRunner.py
new file mode 100644
index 000000000..52bd253ff
--- /dev/null
+++ b/compiler/one-cmds/onelib/WorkflowRunner.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+from onelib.OptionBuilder import OptionBuilder
+from onelib.TopologicalSortHelper import TopologicalSortHelper
+from onelib.CfgRunner import CfgRunner
+import onelib.utils as oneutils
+
+
+class WorkflowRunner:
+    WORKFLOWS_K = 'workflows'
+    DEPENDENCIES_K = 'run-after'
+    CFG_REFERENCE_K = 'cfg-reference'
+    WORKFLOW_STEPS_K = 'steps'
+    ONE_CMD_TOOL_K = 'one-cmd'
+    COMMANDS_K = 'commands'
+
+    def __init__(self, path):
+        try:
+            with open(path) as f:
+                self.json_contents = json.load(f)
+        except FileNotFoundError:
+            raise FileNotFoundError("Not found given workflow file")
+        except json.decoder.JSONDecodeError:
+            raise ImportError("Invalid workflow file")
+
+        self._verify_workflow(self.json_contents)
+
+        workflows = self.json_contents[self.WORKFLOWS_K]
+        self.adj = dict.fromkeys(workflows, [])
+        # decide the order according to the dependencies of each workflow.
+        helper = TopologicalSortHelper(workflows)
+        for workflow_k in workflows:
+            workflow = self.json_contents[workflow_k]
+            if self.DEPENDENCIES_K in workflow:
+                for previous_workflow in workflow[self.DEPENDENCIES_K]:
+                    helper.add_edge(previous_workflow, workflow_k)
+                    self.adj[previous_workflow].append(workflow_k)
+        self.workflow_sequence = helper.sort()
+
+        self._check_cycle()
+
+    def _check_cycle(self):
+        pos = dict()
+        index = 0
+        workflow_num = len(self.workflow_sequence)
+        # number the order
+        for seq_idx in range(workflow_num):
+            pos[self.workflow_sequence[seq_idx]] = index
+            index += 1
+
+        for seq_idx in range(workflow_num):
+            first_wf = self.workflow_sequence[seq_idx]
+            for adj_wf in self.adj[first_wf]:
+                first_pos = 0 if first_wf not in pos else pos[first_wf]
+                second_pos = 0 if adj_wf not in pos else pos[adj_wf]
+                if (first_pos > second_pos):
+                    raise RuntimeError("Workflows should not have a cycle")
+
+    def _verify_workflow(self, json_contents):
+        # workflow file should have WORKFLOWS_K
+        if not self.WORKFLOWS_K in json_contents:
+            raise ValueError("Not found \"" + self.WORKFLOWS_K +
+                             "\" key in workflow file")
+
+        workflows = json_contents[self.WORKFLOWS_K]
+        # workflow file should have keys listed in WORKFLOWS_K
+        for workflow_k in workflows:
+            if not workflow_k in json_contents:
+                raise ValueError("Not found " + workflow_k + " key listed in \"" +
+                                 self.WORKFLOWS_K + "\"")
+
+        # each workflow should have either WORKFLOW_STEPS_K or CFG_REFERENCE_K
+        for workflow_k in workflows:
+            if not self.WORKFLOW_STEPS_K in json_contents[workflow_k] and not self.CFG_REFERENCE_K in json_contents[workflow_k]:
+                raise ValueError("Each workflow should have either \"" +
+                                 self.WORKFLOW_STEPS_K + "\" or \"" +
+                                 self.CFG_REFERENCE_K + "\"")
+        for workflow_k in workflows:
+            if self.WORKFLOW_STEPS_K in json_contents[workflow_k] and self.CFG_REFERENCE_K in json_contents[workflow_k]:
+                raise ValueError("\"" + self.WORKFLOW_STEPS_K + "\" and \"" +
+                                 self.CFG_REFERENCE_K + "\" are exclusive key")
+
+        # each step should have ONE_CMD_TOOL_K and COMMANDS_K
+        for workflow_k in workflows:
+            workflow = json_contents[workflow_k]
+            if self.WORKFLOW_STEPS_K in workflow:
+                step_keys = workflow[self.WORKFLOW_STEPS_K]
+                for step_k in step_keys:
+                    step = workflow[step_k]
+                    if not self.ONE_CMD_TOOL_K in step or not self.COMMANDS_K in step:
+                        raise ValueError("Each step should have \"" +
+                                         self.ONE_CMD_TOOL_K + "\"" + " and \"" +
+                                         self.COMMANDS_K + "\"")
+
+    def run(self, working_dir, verbose=False):
+        # run workflows in sequence
+        for workflow_k in self.workflow_sequence:
+            workflow = self.json_contents[workflow_k]
+            if self.WORKFLOW_STEPS_K in workflow:
+                steps = workflow[self.WORKFLOW_STEPS_K]
+                for step_k in steps:
+                    step = workflow[step_k]
+                    commands = step[self.COMMANDS_K]
+                    driver_name = step[self.ONE_CMD_TOOL_K]
+                    option_builder = OptionBuilder(driver_name)
+                    options = option_builder.build(commands)
+                    # get the absolute path of the caller
+                    driver_path = os.path.join(working_dir, driver_name)
+                    cmd = [driver_path] + options
+                    oneutils.run(cmd)
+            elif self.CFG_REFERENCE_K in workflow:
+                cfg_path = workflow[self.CFG_REFERENCE_K]['path']
+                runner = CfgRunner(cfg_path)
+                runner.run(working_dir, verbose)
diff --git a/compiler/one-cmds/onelib/backends.py b/compiler/one-cmds/onelib/backends.py
new file mode 100644
index 000000000..9d7dad17e
--- /dev/null
+++ b/compiler/one-cmds/onelib/backends.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import ntpath
+import os
+"""
+[one hierarchy]
+one
+├── backends
+├── bin
+├── doc
+├── include
+├── lib
+├── optimization
+└── test
+
+The list where `one-XXXX` finds its backends
+- `bin` folder where `one-XXXX` exists
+- `backends` folder
+
+NOTE If there are backends of the same name in different places,
+    the closer to the top in the list, the higher the priority.
+"""
+
+
+def get_list(cmdname):
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    backend_set = set()
+
+    # bin folder
+    files = [f for f in glob.glob(dir_path + '/../*-' + cmdname)]
+    # backends folder
+    files += [
+        f
+        for f in glob.glob(dir_path + '/../../backends/**/*-' + cmdname, recursive=True)
+    ]
+    # TODO find backends in `$PATH`
+
+    backends_list = []
+    for cand in files:
+        base = ntpath.basename(cand)
+        if (not base in backend_set) and os.path.isfile(cand) and os.access(
+                cand, os.X_OK):
+            backend_set.add(base)
+            backends_list.append(cand)
+
+    return backends_list
+
+
+def search_driver(driver):
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+
+    # CASE 1: one/bin/{driver} is found
+    driver_path = dir_path + '/../' + driver
+    if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK):
+        return driver_path
+
+    # CASE 2: one/backends/**/bin/{driver} is found
+    for driver_path in glob.glob(
+            dir_path + '/../../backends/**/bin/' + driver, recursive=True):
+        if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK):
+            return driver_path
+
+    # CASE 3: {driver} is found in nowhere
+    return None
diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py
new file mode 100644
index 000000000..90109ef40
--- /dev/null
+++ b/compiler/one-cmds/onelib/constant.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class CONSTANT:
+    __slots__ = ()  # This prevents access via __dict__.
+
+    # Basic optimization passes
+    # These passes do not change the execution result of the model
+    O1 = (
+        # Constant folding
+        'fold_add_v2',
+        'fold_cast',
+        'fold_densify',
+        'fold_dequantize',
+        'fold_dwconv',
+        'fold_fully_connected',
+        'fold_gather',
+        'fold_sparse_to_dense',
+
+        # Operator fusion
+        'fuse_add_with_tconv',
+        'fuse_add_with_fully_connected',
+        'fuse_batchnorm_with_conv',
+        'fuse_batchnorm_with_dwconv',
+        'fuse_batchnorm_with_tconv',
+        'fuse_activation_function',
+        'fuse_instnorm',
+        'fuse_prelu',
+        'fuse_gelu',
+        'fuse_mean_with_mean',
+        'fuse_transpose_with_mean',
+        'transform_min_max_to_relu6',
+        'transform_min_relu_to_relu6',
+
+        # Remove redundant operators
+        'remove_redundant_reshape',
+        'remove_redundant_transpose',
+        'remove_unnecessary_reshape',
+        'remove_unnecessary_slice',
+        'remove_unnecessary_strided_slice',
+        'remove_unnecessary_split',
+
+        # Canonicalization
+        # (passes to help further optimization)
+        'resolve_customop_add',
+        'resolve_customop_batchmatmul',
+        'resolve_customop_matmul',
+        'resolve_customop_max_pool_with_argmax',
+        'resolve_customop_splitv',
+        'substitute_pack_to_reshape',
+        'substitute_padv2_to_pad',
+        'substitute_splitv_to_split',
+        'substitute_squeeze_to_reshape',
+        'substitute_strided_slice_to_reshape',
+        'substitute_transpose_to_reshape',
+        'forward_reshape_to_unaryop',
+        'forward_transpose_op',
+        'replace_non_const_fc_with_batch_matmul',  # For quantization
+    )
+
+    OPTIMIZATION_OPTS = (
+        # (OPTION_NAME, HELP_MESSAGE)
+        ('convert_nchw_to_nhwc',
+         'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
+         ),
+        ('expand_broadcast_const', 'expand broadcastable constant node inputs'),
+        ('nchw_to_nhwc_input_shape',
+         'convert the input shape of the model (argument for convert_nchw_to_nhwc)'),
+        ('nchw_to_nhwc_output_shape',
+         'convert the output shape of the model (argument for convert_nchw_to_nhwc)'),
+        ('fold_add_v2', 'fold AddV2 op with constant inputs'),
+        ('fold_cast', 'fold Cast op with constant input'),
+        ('fold_densify', 'fold Densify op with sparse constant input'),
+        ('fold_dequantize', 'fold Dequantize op'),
+        ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
+        ('fold_fully_connected', 'fold FullyConnected op with constant inputs'),
+        ('fold_gather', 'fold Gather op'),
+        ('fold_sparse_to_dense', 'fold SparseToDense op'),
+        ('forward_reshape_to_unaryop', 'Forward Reshape op'),
+        ('forward_transpose_op', 'Forward Transpose op'),
+        ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
+        ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
+        ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
+        ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
+        ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
+        ('fuse_bcq', 'apply Binary Coded Quantization'),
+        ('fuse_preactivation_batchnorm',
+         'fuse BatchNorm operators of pre-activations to Convolution op'),
+        ('fuse_mean_with_mean', 'fuse two consecutive Mean ops'),
+        ('fuse_transpose_with_mean',
+         'fuse Mean with a preceding Transpose under certain conditions'),
+        ('make_batchnorm_gamma_positive',
+         'make negative gamma of BatchNorm to a small positive value (1e-10).'
+         ' Note that this pass can change the execution result of the model.'
+         ' So, use it only when the impact is known to be acceptable.'),
+        ('fuse_activation_function', 'fuse Activation function to a preceding operator'),
+        ('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
+        ('fuse_prelu', 'fuse ops to PReLU operator'),
+        ('fuse_gelu', 'fuse ops to GeLU operator'),
+        ('replace_cw_mul_add_with_depthwise_conv',
+         'replace channel-wise Mul/Add with DepthwiseConv2D'),
+        ('remove_fakequant', 'remove FakeQuant ops'),
+        ('remove_quantdequant', 'remove Quantize-Dequantize sequence'),
+        ('remove_redundant_quantize', 'remove redundant Quantize ops'),
+        ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'),
+        ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'),
+        ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'),
+        ('remove_unnecessary_slice', 'remove unnecessary slice ops'),
+        ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
+        ('remove_unnecessary_split', 'remove unnecessary split ops'),
+        ('replace_non_const_fc_with_batch_matmul',
+         'replace FullyConnected op with non-const weights to BatchMatMul op'),
+        ('replace_sub_with_add', 'replace Sub op with Add op'),
+        ('resolve_customop_add', 'convert Custom(Add) op to Add op'),
+        ('resolve_customop_batchmatmul',
+         'convert Custom(BatchMatmul) op to BatchMatmul op'),
+        ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
+        ('resolve_customop_max_pool_with_argmax',
+         'convert Custom(MaxPoolWithArgmax) to net of builtin operators'),
+        ('resolve_customop_splitv', 'convert Custom(SplitV) op to SplitV op'),
+        ('shuffle_weight_to_16x1float32',
+         'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
+         ' Note that it only converts weights whose row is a multiple of 16'),
+        ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
+        ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'),
+        ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'),
+        ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
+        ('substitute_strided_slice_to_reshape',
+         'convert certain condition StridedSlice to Reshape'),
+        ('substitute_transpose_to_reshape',
+         'convert certain condition Transpose to Reshape'),
+        ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'),
+        ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'),
+        ('decompose_hardswish', 'decompose the HardSwish op to Add, Mul and Relu6 ops'),
+        ('unroll_unidirseqlstm', 'unroll UnidirectionalSequenceLSTM op'),
+        ('dynamic_batch_to_single_batch',
+         'convert dynamic batch size (first dimension) of inputs to 1'))
+
+
+CONSTANT = CONSTANT()
diff --git a/compiler/one-cmds/onelib/export_constant.py b/compiler/one-cmds/onelib/export_constant.py
new file mode 100644
index 000000000..7a2de1e8a
--- /dev/null
+++ b/compiler/one-cmds/onelib/export_constant.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from constant import CONSTANT
+
+import argparse
+import configparser
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Export CONSTANT value with given file format.')
+    parser.add_argument(
+        '-c', '--constant', type=str, required=True, help='Constant name to export')
+    parser.add_argument(
+        '-f',
+        '--format',
+        type=str,
+        required=True,
+        choices=['cfg', 'txt'],
+        help=
+        'File format to export. The created cfg file contains CONSTANT under the one-optimize section.'
+    )
+    parser.add_argument(
+        '--exclusive',
+        action='store_true',
+        help='Exports the rest of the options except for the given constant')
+    parser.add_argument(
+        '-o', '--output_path', type=str, required=True, help='Path to output')
+
+    args = parser.parse_args()
+
+    if not hasattr(CONSTANT, args.constant):
+        raise NameError('Not found given constant name')
+
+    if args.exclusive:
+        constant_to_exclude = getattr(CONSTANT, args.constant)
+        constant_to_export = []
+        for opt in CONSTANT.OPTIMIZATION_OPTS:
+            if opt[0] in constant_to_exclude:
+                continue
+            constant_to_export.append(opt[0])
+    else:
+        constant_to_export = getattr(CONSTANT, args.constant)
+
+    if args.format == 'cfg':
+        SECTION_TO_EXPORT = 'one-optimize'
+        config = configparser.ConfigParser()
+        config[SECTION_TO_EXPORT] = dict()
+        for constant in constant_to_export:
+            config[SECTION_TO_EXPORT][constant] = 'True'
+
+        with open(args.output_path, 'w') as f:
+            config.write(f)
+
+    if args.format == 'txt':
+        with open(args.output_path, 'w') as f:
+            for constant in constant_to_export:
+                f.write(f"{constant}\n")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/compiler/one-cmds/onelib/make_cmd.py b/compiler/one-cmds/onelib/make_cmd.py
new file mode 100644
index 000000000..068b54959
--- /dev/null
+++ b/compiler/one-cmds/onelib/make_cmd.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+import onelib.constant as _constant
+
+
+def is_valid_attr(args, attr):
+    return hasattr(args, attr) and getattr(args, attr)
+
+
+def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path):
+    """make a command for running tf2tfliteV2.py"""
+    cmd = [sys.executable, os.path.expanduser(driver_path)]
+    # verbose
+    if is_valid_attr(args, 'verbose'):
+        cmd.append('--verbose')
+    # model_format
+    if is_valid_attr(args, 'model_format_cmd'):
+        cmd.append(getattr(args, 'model_format_cmd'))
+    elif is_valid_attr(args, 'model_format'):
+        cmd.append('--' + getattr(args, 'model_format'))
+    else:
+        cmd.append('--graph_def')  # default value
+    # converter version
+    if is_valid_attr(args, 'converter_version_cmd'):
+        cmd.append(getattr(args, 'converter_version_cmd'))
+    elif is_valid_attr(args, 'converter_version'):
+        cmd.append('--' + getattr(args, 'converter_version'))
+    else:
+        cmd.append('--v1')  # default value
+    # input_path
+    if is_valid_attr(args, 'input_path'):
+        cmd.append('--input_path')
+        cmd.append(os.path.expanduser(input_path))
+    # output_path
+    if is_valid_attr(args, 'output_path'):
+        cmd.append('--output_path')
+        cmd.append(os.path.expanduser(output_path))
+    # input_arrays
+    if is_valid_attr(args, 'input_arrays'):
+        cmd.append('--input_arrays')
+        cmd.append(getattr(args, 'input_arrays'))
+    # input_shapes
+    if is_valid_attr(args, 'input_shapes'):
+        cmd.append('--input_shapes')
+        cmd.append(getattr(args, 'input_shapes'))
+    # output_arrays
+    if is_valid_attr(args, 'output_arrays'):
+        cmd.append('--output_arrays')
+        cmd.append(getattr(args, 'output_arrays'))
+
+    # experimental options
+    if is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'):
+        cmd.append('--experimental_disable_batchmatmul_unfold')
+
+    return cmd
+
+
+def make_tflite2circle_cmd(driver_path, input_path, output_path):
+    """make a command for running tflite2circle"""
+    cmd = [driver_path, input_path, output_path]
+    return [os.path.expanduser(c) for c in cmd]
+
+
+def make_circle2circle_cmd(args, driver_path, input_path, output_path):
+    """make a command for running circle2circle"""
+    cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
+    # profiling
+    if is_valid_attr(args, 'generate_profile_data'):
+        cmd.append('--generate_profile_data')
+    # optimization pass(only true/false options)
+    # TODO support options whose number of arguments is more than zero
+    for opt in _constant.CONSTANT.OPTIMIZATION_OPTS:
+        if is_valid_attr(args, opt[0]):
+            # ./driver --opt[0]
+            if type(getattr(args, opt[0])) is bool:
+                cmd.append('--' + opt[0])
+            """
+            This condition check is for config file interface, usually would be
+             SomeOption=True
+            but user can write as follows while development
+             SomeOption=False
+            instead of removing SomeOption option
+            """
+            if type(getattr(args, opt[0])) is str and not getattr(
+                    args, opt[0]).lower() in ['false', '0', 'n']:
+                cmd.append('--' + opt[0])
+
+    return cmd
diff --git a/compiler/one-cmds/onelib/utils.py b/compiler/one-cmds/onelib/utils.py
new file mode 100644
index 000000000..f7a1a963a
--- /dev/null
+++ b/compiler/one-cmds/onelib/utils.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import configparser
+import glob
+import importlib.machinery
+import importlib.util
+import ntpath
+import os
+import subprocess
+import sys
+
+from typing import Union
+
+import onelib.constant as _constant
+
+
+def add_default_arg(parser):
+    # version
+    parser.add_argument(
+        '-v',
+        '--version',
+        action='store_true',
+        help='show program\'s version number and exit')
+
+    # verbose
+    parser.add_argument(
+        '-V',
+        '--verbose',
+        action='store_true',
+        help='output additional information to stdout or stderr')
+
+    # configuration file
+    parser.add_argument('-C', '--config', type=str, help='run with configuation file')
+    # section name that you want to run in configuration file
+    parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
+
+
+def add_default_arg_no_CS(parser):
+    """
+    This adds -v -V args only (no -C nor -S)
+    """
+    # version
+    parser.add_argument(
+        '-v',
+        '--version',
+        action='store_true',
+        help='show program\'s version number and exit')
+
+    # verbose
+    parser.add_argument(
+        '-V',
+        '--verbose',
+        action='store_true',
+        help='output additional information to stdout or stderr')
+
+
+def is_accumulated_arg(arg, driver):
+    if driver == "one-quantize":
+        accumulables = [
+            "tensor_name", "scale", "zero_point", "src_tensor_name", "dst_tensor_name"
+        ]
+        if arg in accumulables:
+            return True
+
+    return False
+
+
+def is_valid_attr(args, attr):
+    return hasattr(args, attr) and getattr(args, attr)
+
+
+def parse_cfg(config_path: Union[str, None], section_to_parse: str, args):
+    """
+    parse configuration file and store the information to args
+    
+    :param config_path: path to configuration file
+    :param section_to_parse: section name to parse
+    :param args: object to store the parsed information
+    """
+    if config_path is None:
+        return
+
+    parser = configparser.ConfigParser()
+    parser.optionxform = str
+    parser.read(config_path)
+
+    if not parser.has_section(section_to_parse):
+        raise AssertionError('configuration file must have \'' + section_to_parse +
+                             '\' section')
+
+    for key in parser[section_to_parse]:
+        if is_accumulated_arg(key, section_to_parse):
+            if not is_valid_attr(args, key):
+                setattr(args, key, [parser[section_to_parse][key]])
+            else:
+                getattr(args, key).append(parser[section_to_parse][key])
+            continue
+        if hasattr(args, key) and getattr(args, key):
+            continue
+        setattr(args, key, parser[section_to_parse][key])
+
+
+def print_version_and_exit(file_path):
+    """print version of the file located in the file_path"""
+    script_path = os.path.realpath(file_path)
+    dir_path = os.path.dirname(script_path)
+    script_name = os.path.splitext(os.path.basename(script_path))[0]
+    # run one-version
+    subprocess.call([os.path.join(dir_path, 'one-version'), script_name])
+    sys.exit()
+
+
+def safemain(main, mainpath):
+    """execute given method and print with program name for all uncaught exceptions"""
+    try:
+        main()
+    except Exception as e:
+        prog_name = os.path.basename(mainpath)
+        print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+        sys.exit(255)
+
+
+def run(cmd, err_prefix=None, logfile=None):
+    """Execute command in subprocess
+
+    Args:
+        cmd: command to be executed in subprocess
+        err_prefix: prefix to be put before every stderr lines
+        logfile: file stream to which both of stdout and stderr lines will be written
+    """
+    with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
+        import select
+        inputs = set([p.stdout, p.stderr])
+        while inputs:
+            readable, _, _ = select.select(inputs, [], [])
+            for x in readable:
+                line = x.readline()
+                if len(line) == 0:
+                    inputs.discard(x)
+                    continue
+                if x == p.stdout:
+                    out = sys.stdout
+                if x == p.stderr:
+                    out = sys.stderr
+                    if err_prefix:
+                        line = f"{err_prefix}: ".encode() + line
+                out.buffer.write(line)
+                out.buffer.flush()
+                if logfile != None:
+                    logfile.write(line)
+    if p.returncode != 0:
+        sys.exit(p.returncode)
+
+
+def remove_prefix(str, prefix):
+    if str.startswith(prefix):
+        return str[len(prefix):]
+    return str
+
+
+def remove_suffix(str, suffix):
+    if str.endswith(suffix):
+        return str[:-len(suffix)]
+    return str
+
+
+def get_optimization_list(get_name=False):
+    """
+    returns a list of optimization. If `get_name` is True,
+    only basename without extension is returned rather than full file path.
+
+    [one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test
+
+    Optimization options must be placed in `optimization` folder
+    """
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+
+    # optimization folder
+    files = [
+        f for f in glob.glob(dir_path + '/../../optimization/O*.cfg', recursive=True)
+    ]
+    # exclude if the name has space
+    files = [s for s in files if not ' ' in s]
+
+    opt_list = []
+    for cand in files:
+        base = ntpath.basename(cand)
+        if os.path.isfile(cand) and os.access(cand, os.R_OK):
+            opt_list.append(cand)
+
+    if get_name == True:
+        # NOTE the name includes prefix 'O'
+        # e.g. O1, O2, ONCHW not just 1, 2, NCHW
+        opt_list = [ntpath.basename(f) for f in opt_list]
+        opt_list = [remove_suffix(s, '.cfg') for s in opt_list]
+
+    return opt_list
+
+
+def detect_one_import_drivers(search_path):
+    """Looks for import drivers in given directory
+
+    Args:
+        search_path: path to the directory where to search import drivers
+
+    Returns:
+    dict: each entry is related to single detected driver,
+          key is a config section name, value is a driver name
+
+    """
+    import_drivers_dict = {}
+    for module_name in os.listdir(search_path):
+        full_path = os.path.join(search_path, module_name)
+        if not os.path.isfile(full_path):
+            continue
+        if module_name.find("one-import-") != 0:
+            continue
+        module_loader = importlib.machinery.SourceFileLoader(module_name, full_path)
+        module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+        module = importlib.util.module_from_spec(module_spec)
+        try:
+            module_loader.exec_module(module)
+            if hasattr(module, "get_driver_cfg_section"):
+                section = module.get_driver_cfg_section()
+                import_drivers_dict[section] = module_name
+        except:
+            pass
+    return import_drivers_dict
diff --git a/compiler/one-cmds/onnx_legalizer.py b/compiler/one-cmds/onnx_legalizer.py
new file mode 100755
index 000000000..0141514b6
--- /dev/null
+++ b/compiler/one-cmds/onnx_legalizer.py
@@ -0,0 +1,1062 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+import onnx.numpy_helper
+import sys
+import numpy as np
+import re
+
+# Transform onnx model to make it compilable with our toolchain
+#
+# This code works with onnx model in proto format. See proto buffers format in
+# https://github.com/onnx/onnx/blob/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/onnx.proto3
+#
+# More examples of handling onnx models could be found here:
+# https://github.com/onnx/onnx/tree/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/examples
+#
+# List of transformations:
+# - Replace RNN operation with unrolled subgraph
+# - Replace LSTM operation with unrolled subgraph
+
+
+class LegalizeOptions:
+    """Controls transformations that legalizer apply
+
+    Attributes:
+        unroll_rnn (bool): default is False. If True - unrolls RNN operations
+        unroll_lstm (bool): default is False. If True - unrolls LSTM operations
+    """
+
+    unroll_rnn = False
+    unroll_lstm = False
+
+
+def _reverse_str(s):
+    return ''.join(reversed(s))
+
+
+def _parse_tensor_name(name):
+    """Splits tensor name to base part and serial number
+
+    Most of tensor names have following format: "tensor_123".
+    This  function breaks name into two values: "tensor_" and 123.
+    Tensor names like this: "321" are broken into "" and 321.
+
+    Serial number is used to create unique tensor names using given base name.
+
+    Args:
+        name (str): tensor name
+
+    Returns:
+        tuple of str, int: base name and serial number of tensor
+    """
+    rev = _reverse_str(name)
+    m = re.match('(\d*)(.*)', rev)
+    if m.groups()[0] != '':
+        return (_reverse_str(m.groups()[1]), int(_reverse_str(m.groups()[0])))
+    else:
+        return (_reverse_str(m.groups()[1]), 0)
+
+
+class _ModelTransformerHelper:
+    """Helper for onnx model transformation
+
+    This helper is used for convenient operation replacement in onnx model
+
+    Attributes:
+        _model (onnx.onnx_ml_pb2.ModelProto): target model that should be changed
+        _nodes_to_delete (list of onnx.onnx_ml_pb2.NodeProto): list of replaced operations
+        _insert_id (int): position to insert created operations (should be in topologically sorted)
+        _base_name_idx (dict from str to int): maps tensor "base" name to
+            largest existing serial num. For example model has tensors "t_1", "t_2", "t_4",
+            in that case _base_name_idx["t_"] == 4.
+            This attribute is used for unique tensor name generation.
+    """
+
+    def __init__(self, model):
+        self._model = model
+        self._nodes_to_delete = []
+        self._insert_id = 0
+        # each tensor has name containing base name and unique number. for example:
+        # "abc_123": "abs_" - base name, "123" - unique number
+        # if no number in name, consider it is equal to "0"
+
+        # mapping from base names to largest given number
+        self._base_name_idx = {}
+        # gather name information for existing tensors
+        for node in model.graph.node:
+            for t in list(node.input) + list(node.output):
+                base_name, number = _parse_tensor_name(t)
+                if base_name in self._base_name_idx:
+                    self._base_name_idx[base_name] = max(self._base_name_idx[base_name],
+                                                         number)
+                else:
+                    self._base_name_idx[base_name] = number
+
+    def make_tensor_with_base_name(self, base_name):
+        """ Create unique name for given base_name
+
+        Args:
+            base_name (str): base tensor name
+
+        Returns:
+            str : unique tensor name that starts with base_name
+        """
+        if base_name in self._base_name_idx:
+            self._base_name_idx[base_name] += 1
+            return base_name + str(self._base_name_idx[base_name])
+        else:
+            self._base_name_idx[base_name] = 0
+            return base_name + '0'
+
+    def make_node(self, opcode, inputs, outputs, *p_args, **k_args):
+        """Create arbitrary node and insert it in graph.
+
+        Args:
+            opcode (str): opcode name of desired operation
+            inputs (list of str): names of input tensors
+            outputs (list of str or int): names of existing tensors to use as output tensors for operation or
+                number of tensors that should be created
+            p_args: additional arguments for onnx make_node helper
+            k_args: attributes for onnx node
+
+        Returns:
+            list of str: list of output tensor names
+        """
+        if type(outputs) == int:
+            outputs = [self.make_tensor_with_base_name('') for i in range(outputs)]
+        assert (type(outputs) == list)
+        node = onnx.helper.make_node(opcode, inputs, outputs, *p_args, **k_args)
+        self._model.graph.node.insert(self._insert_id, node)
+        self._insert_id += 1
+        return outputs
+
+    def make_split(self, input, split_sizes, axis):
+        """Create Split operation and insert it in graph.
+
+        Args:
+            input (str): name of input tensor
+            split_sizes (list of int): list of split sizes
+            axis (int): number of axis to split
+
+        Returns:
+            list: list of output tensor names
+        """
+        return self.make_node(
+            'Split', [input], len(split_sizes), axis=axis, split=split_sizes)
+
+    def make_concat(self, inputs, axis):
+        """Create Concat operation and insert it in graph.
+
+        Args:
+            inputs (list of str): list of tensors names to concat
+            axis (int): axis number to concat
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Concat', inputs, 1, axis=axis)[0]
+
+    def make_squeeze(self, input, axes):
+        """Create Squeeze operation and insert it in graph.
+
+        Args:
+            input (str): name of input tensor
+            axes (list of int): list of dimension containing ones to remove
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Squeeze', [input], 1, axes=axes)[0]
+
+    def make_unsqueeze(self, input, axes):
+        """Create Unsqueeze operation and insert it in graph.
+
+        Args:
+            input (str): name of input tensor
+            axes (list of int): list of dimension to insert ones
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Unsqueeze', [input], 1, axes=axes)[0]
+
+    def make_gemm(self, A, B, C, trans_a=False, trans_b=False):
+        """Create Gemm operation and insert it in graph.
+
+        Result tensor contains A*B + C
+
+        Args:
+            A (str): name of tensor A
+            B (str): name of tensor B
+            C (str): name of tensor C
+            transA (bool): if True, transpose tensor A before multiplication
+            transB (bool): if True, transpose tensor B before multiplication
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node(
+            'Gemm', [A, B, C], 1, transA=bool(trans_a), transB=bool(trans_b))[0]
+
+    def make_add(self, a, b):
+        """Creates Add operation and insert it in graph.
+
+        Args:
+            a (str): name of left operand tensor
+            b (str): name of right operand tensor
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Add', [a, b], 1)[0]
+
+    def make_mul(self, a, b):
+        """Creates Mul operation and insert it in graph.
+
+        Args:
+            a (str): name of left operand tensor
+            b (str): name of right operand tensor
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Mul', [a, b], 1)[0]
+
+    def make_clip(self, input, min, max):
+        """Create Clip operation and insert it in graph.
+
+        Args:
+            input (str): input tensor name
+            min (int/float): lower clip bound
+            max (int/float ): upper clip bound
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Clip', [input], 1, min=min, max=max)[0]
+
+    def make_act(self, input, act_name):
+        """Create activation function operation and insert it in graph.
+
+        Args:
+            input (str): input tensor name
+            act_name (str): name of activation function, one of ['Relu', 'Tanh', 'Sigmoid']
+
+        Returns:
+            str: output tensor name
+        """
+        assert (act_name in ['Relu', 'Tanh', 'Sigmoid'])
+        return self.make_node(act_name, [input], 1)[0]
+
+    def make_constant_tensor(self, tensor_data, base_name):
+        """Creates onnx constant tensor
+
+        Args:
+            tensor_data (numpy.ndarray): tensor data
+            base_name (str): prefix of constant tensor name
+
+        Returns:
+            str: name of created constant tensor
+        """
+        tensor = onnx.numpy_helper.from_array(tensor_data)
+        tensor.name = self.make_tensor_with_base_name(base_name)
+        self._model.graph.initializer.append(tensor)
+        return tensor.name
+
+    def mark_for_deletion(self, node):
+        self._nodes_to_delete += [node]
+
+    def get_insert_id(self):
+        return self._insert_id
+
+    def set_insert_id(self, insert_id):
+        self._insert_id = insert_id
+
+    def delete_marked_nodes(self):
+        for node in self._nodes_to_delete:
+            self._model.graph.node.remove(node)
+
+
+class _TensorInfo:
+    def __init__(self, dtype, shape):
+        self.dtype = dtype
+        self.shape = shape
+
+
+def _get_tensor_infos(model):
+    """Infer tensor shapes and dtypes
+    Args:
+        model (onnx.onnx_ml_pb2.ModelProto): model to process
+
+    Returns:
+        dict from str to _TensorInfo: maps tensor name to shape and dtype information
+    """
+
+    inferred_shape_model = onnx.shape_inference.infer_shapes(model)
+
+    infos = {}
+    for tensor in list(inferred_shape_model.graph.value_info) + list(
+            inferred_shape_model.graph.input):
+        info = _TensorInfo(tensor.type.tensor_type.elem_type, [])
+        for dim in tensor.type.tensor_type.shape.dim:
+            info.shape += [dim.dim_value]
+        infos[tensor.name] = info
+
+    for tensor in list(model.graph.initializer):
+        infos[tensor.name] = _TensorInfo(tensor.data_type, tensor.dims)
+    return infos
+
+
+def _dtype_to_np(dtype):
+    """Convert onnx dtype value to numpy dtype class
+
+    For more types see:
+    https://github.com/onnx/onnx/blob/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/onnx.proto3#L484
+
+    Args:
+        dtype (int): onnx dtype
+
+    Returns:
+        numpy data type: numpy dtype, like np.float32
+    """
+
+    if dtype == 1:
+        return np.float32
+    else:
+        raise NotImplementedError('unsupported data type')
+
+
+def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip,
+                                activation_name):
+    """Generate subgraph of one direction of unrolled RNN layer
+
+    Args:
+        transformer (_ModelTransformerHelper): helper for model generation
+        X (list of str): names of input tensors in sequence. Tensor shapes: [batch_size, input_size].
+        W (str): name of weight tensor
+        R (str): name of recurrence weight tensor
+        B (str): name of bias tensor
+        initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size]
+        clip (float or None): range which clips input of activations
+        act (str): activation function
+    """
+    # one direction RNN:
+    #
+    # For details see:
+    # https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Changelog.md#RNN-7
+    #
+    # H = f(X*(W^T) + h*(R^T) + B)
+    #
+    # H  - new hidden state
+    # h  - previous hidden state
+    # X  - current input
+    # W  - input weights matrix
+    # R  - reccurent weights matrix
+    # Wb - input weights matmul bias
+    # Rb - reccurent weights matmul bias
+    # f  - activation function
+
+    seq_length = len(X)
+    first_iter = 0
+    state_tensors = []
+    if initial_h is not None:
+        previous_state_tensor = initial_h
+    else:
+        first_iter = 1
+        state_tensor = transformer.make_gemm(X[0], W, B, trans_b=True)
+        if clip != None:
+            state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip)
+        previous_state_tensor = transformer.make_act(state_tensor, activation_name)
+        state_tensors += [previous_state_tensor]
+
+    for i in range(first_iter, seq_length):
+        state_tensor = transformer.make_gemm(X[i], W, B, trans_b=True)
+        state_tensor = transformer.make_gemm(
+            previous_state_tensor, R, state_tensor, trans_b=True)
+        if clip != None:
+            state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip)
+        previous_state_tensor = transformer.make_act(state_tensor, activation_name)
+        state_tensors += [previous_state_tensor]
+    return state_tensors
+
+
+def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, activation,
+                                  clip, direction, hidden_size, layout):
+    """Generate Simple (forward or reverse) unrolled RNN
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional RNN operation to unroll
+        x (list of str): list of input tensors (input tensor split along "time" dimension)
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        activation (str): name of activation function
+        clip (float or None): range which clips input of activations
+        direction (str): "forward" or "reverse"
+        hidden_size (int): size of hidden state
+        layout (int): See attribute description:
+            https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56
+    """
+
+    inputs = original_node.input
+    outputs = original_node.output
+    if direction == 'reverse':
+        x.reverse()
+    w = transformer.make_squeeze(inputs[1], axes=[0])
+    r = transformer.make_squeeze(inputs[2], axes=[0])
+    if len(inputs) > 3 and inputs[3] != '':
+        raw_bias_tensor = transformer.make_squeeze(inputs[3], axes=[0])
+        splitted_bias_tensors = transformer.make_split(
+            raw_bias_tensor, split_sizes=[hidden_size] * 2, axis=0)
+        b = transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1])
+    else:
+        data_type = _dtype_to_np(tensor_infos[inputs[2]].dtype)
+        b = transformer.make_constant_tensor(
+            np.zeros(hidden_size, dtype=data_type), "zero_bias")
+    if len(inputs) > 5 and inputs[5] != '':
+        direction_dim = layout
+        initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim])
+    else:
+        initial_h = None
+    state_tensors = _generate_one_direction_RNN(transformer, x, w, r, b, initial_h, clip,
+                                                activation)
+    y_direction_dim = layout + 1
+    y_h_direction_dim = layout
+    state_layout_tensors = []
+    seq_length_dim = layout
+    for state in state_tensors:
+        state_layout_tensors += [
+            transformer.make_unsqueeze(state, axes=[seq_length_dim, y_direction_dim])
+        ]
+
+    # use low-level interface to attach to existing tensors
+    Y_h = outputs[1]
+    transformer.make_node(
+        'Unsqueeze', [state_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
+    Y = outputs[0]
+    transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, activations,
+                                 clip, hidden_size, layout):
+    """Generate Bidirectional unrolled RNN
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional RNN operation to unroll
+        x (list of str): list of input tensors (input tensor split along "time" dimension)
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        activations (list of str): list of len (2) containing names of forward and reverse activations
+        clip (float or None): range which clips input of activations
+        hidden_size (int): size of hidden state
+        layout (int): See attribute description:
+            https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56
+    """
+
+    inputs = original_node.input
+    outputs = original_node.output
+    w_bi = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0)
+    r_bi = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0)
+    w = []
+    r = []
+    for d in range(2):
+        w += [transformer.make_squeeze(w_bi[d], axes=[0])]
+        r += [transformer.make_squeeze(r_bi[d], axes=[0])]
+
+    b = []
+    if len(inputs) > 3 and inputs[3] != '':
+        raw_bias_tensors = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0)
+        for d in range(2):
+            raw_bias_tensors_squeezed = transformer.make_squeeze(
+                raw_bias_tensors[d], axes=[0])
+            splitted_bias_tensors = transformer.make_split(
+                raw_bias_tensors_squeezed, split_sizes=[hidden_size] * 2, axis=0)
+            b += [
+                transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1])
+            ]
+    else:
+        data_type = _dtype_to_np(tensor_infos[inputs[2]].dtype)
+        b = [
+            transformer.make_constant_tensor(
+                np.zeros(hidden_size, dtype=data_type), "zero_bias")
+        ] * 2
+    initial_h = [None, None]
+    if len(inputs) > 5 and inputs[5] != '':
+        direction_dim = layout
+        initial_h = transformer.make_split(
+            inputs[5], split_sizes=[1, 1], axis=direction_dim)
+        for d in range(2):
+            initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
+
+    state_f_tensors = _generate_one_direction_RNN(transformer, x, w[0], r[0], b[0],
+                                                  initial_h[0], clip, activations[0])
+    x.reverse()
+    state_b_tensors = _generate_one_direction_RNN(transformer, x, w[1], r[1], b[1],
+                                                  initial_h[1], clip, activations[1])
+    state_b_tensors.reverse()
+
+    y_direction_dim = layout + 1
+    y_h_direction_dim = layout
+    state_layout_tensors = []
+    seq_length_dim = layout
+    seq_length = len(x)
+    for t in range(seq_length):
+        state_f = state_f_tensors[t]
+        state_b = state_b_tensors[t]
+        state_layout_tensors_f = transformer.make_unsqueeze(
+            state_f, axes=[seq_length_dim, y_direction_dim])
+        state_layout_tensors_b = transformer.make_unsqueeze(
+            state_b, axes=[seq_length_dim, y_direction_dim])
+        state_layout_tensors += [
+            transformer.make_concat(
+                [state_layout_tensors_f, state_layout_tensors_b], axis=y_direction_dim)
+        ]
+
+    last_f_state_layout_tensor = transformer.make_unsqueeze(
+        state_f_tensors[-1], axes=[y_h_direction_dim])
+    last_b_state_layout_tensor = transformer.make_unsqueeze(
+        state_b_tensors[0], axes=[y_h_direction_dim])
+
+    # use low-level interface to attach to existing tensors
+    Y_h = outputs[1]
+    transformer.make_node(
+        'Concat', [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h],
+        axis=y_h_direction_dim)
+
+    Y = outputs[0]
+    transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _legalize_RNN(transformer, tensor_infos, node):
+    """Unroll RNN operation
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        node (onnx.onnx_ml_pb2.NodeProto): RNN operation to unroll
+    """
+    inputs = node.input
+    if len(inputs) > 4 and inputs[4] != '':
+        raise NotImplementedError('Variadic length of output is not supported')
+    # attributes
+    activation_alpha = []
+    activation_beta = []
+    activations = ['Tanh', 'Tanh']
+    clip = None
+    direction = 'forward'
+    hidden_size = 0
+    layout = 0
+
+    for attr in node.attribute:
+        if attr.name == 'activation_alpha':
+            activation_alpha = attr.floats
+        if attr.name == 'activation_beta':
+            activation_beta = attr.floats
+        if attr.name == 'activations':
+            activations = list(map(lambda item: item.decode('UTF-8'), list(attr.strings)))
+        if attr.name == 'clip':
+            clip = attr.f
+        if attr.name == 'direction':
+            direction = attr.s.decode('UTF-8')
+        if attr.name == 'hidden_size':
+            hidden_size = attr.i
+        if attr.name == 'layout':
+            layout = attr.i
+
+    if len(activation_alpha) > 0 or len(activation_beta) > 0:
+        raise NotImplementedError('Unsupported parameters for LSTM activations')
+
+    for act in activations:
+        if act not in ['Relu', 'Tanh', 'Sigmoid']:
+            raise NotImplementedError('Unsupported activation function')
+
+    seq_length_dim = layout
+    seq_length = tensor_infos[inputs[0]].shape[seq_length_dim]
+    if hidden_size == 0:
+        hidden_size = tensor_infos[inputs[2]].shape[2]
+
+    input_split_tensor = transformer.make_split(
+        inputs[0], split_sizes=[1] * seq_length, axis=seq_length_dim)
+    x = []
+    for i in range(len(input_split_tensor)):
+        input_frame_tensor = input_split_tensor[i]
+        squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0])
+        x += [squeezed_frame_tensor]
+
+    if direction in ['forward', 'reverse']:
+        _transform_unidirectional_RNN(transformer, node, x, tensor_infos, activations[0],
+                                      clip, direction, hidden_size, layout)
+    elif direction == 'bidirectional':
+        _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations,
+                                     clip, hidden_size, layout)
+    else:
+        raise RuntimeError('Unknown RNN type')
+
+    transformer.mark_for_deletion(node)
+
+
+def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, P, clip,
+                                 act, dtype, hidden_size, batch_size):
+    """Generate subgraph for one direction of unrolled LSTM layer
+
+    Args:
+        transformer (_ModelTransformerHelper): helper for model generation
+        X (list of str): names of tensors in input sequence. Each tensor shape: [batch_size, input_size]
+        W (str): name of concatenated weight tensor: [input, output, forget, cell]
+        R (str): name of concatenated recurrence weights tensor: [input, output, forget, cell]
+        B (str): name of concatenated bias tensor: [input, output, forget, cell]
+        initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size]
+        initial_c (str or None): name of tensor containing initial cell state. Shape [batch_size, hidden_size]
+        P (str or None): name of concatenated peephole tensor: [input, output, forget]
+        clip (float or None): range which clips input of activations
+        act (dict of str):  activation functions {'f': 'Sigmoid', 'g': 'Tanh', 'h': 'Tanh'}
+        dtype (numpy dtype): data type used in created LSTM operation
+        hidden_size (int): hidden dimension
+        batch_size (int): batch dimension
+    """
+    # one direction LSTM:
+    #
+    # For details see:
+    # https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Changelog.md#LSTM-7
+    #
+    # it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
+    # ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
+    # ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
+    # Ct = ft (.) Ct-1 + it (.) ct
+    # ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
+    # Ht = ot (.) h(Ct)
+    #
+    # X - input tensor
+    # i - input gate
+    # o - output gate
+    # f - forget gate
+    # c - cell gate
+    # t - time step (t-1 means previous time step)
+    # W[iofc] - W parameter weight matrix for input, output, forget, and cell gates
+    # R[iofc] - R recurrence weight matrix for input, output, forget, and cell gates
+    # Wb[iofc] - W bias vectors for input, output, forget, and cell gates
+    # Rb[iofc] - R bias vectors for input, output, forget, and cell gates
+    # P[iof] - P peephole weight vector for input, output, and forget gates
+    # WB[iofc] - W parameter weight matrix for backward input, output, forget, and cell gates
+    # RB[iofc] - R recurrence weight matrix for backward input, output, forget, and cell gates
+    # WBb[iofc] - W bias vectors for backward input, output, forget, and cell gates
+    # RBb[iofc] - R bias vectors for backward input, output, forget, and cell gates
+    # PB[iof] - P peephole weight vector for backward input, output, and forget gates
+    # H - Hidden state
+
+    seq_length = len(X)
+    state_h_tensors = []
+
+    w_tensors = transformer.make_split(W, split_sizes=[hidden_size] * 4, axis=0)
+    W = {'i': w_tensors[0], 'o': w_tensors[1], 'f': w_tensors[2], 'c': w_tensors[3]}
+
+    r_tensors = transformer.make_split(R, split_sizes=[hidden_size] * 4, axis=0)
+    R = {'i': r_tensors[0], 'o': r_tensors[1], 'f': r_tensors[2], 'c': r_tensors[3]}
+
+    if B is not None:
+        separate_b_tensors = transformer.make_split(
+            B, split_sizes=[hidden_size] * 8, axis=0)
+        b_tensors = []
+        for i in range(4):
+            b_tensors += [
+                transformer.make_add(separate_b_tensors[i], separate_b_tensors[i + 4])
+            ]
+    else:
+        b_tensors = [
+            transformer.make_constant_tensor(
+                np.zeros((hidden_size), dtype=dtype), 'zero_b')
+        ] * 4
+    B = {'i': b_tensors[0], 'o': b_tensors[1], 'f': b_tensors[2], 'c': b_tensors[3]}
+
+    if initial_h is not None:
+        previous_h_state_tensor = initial_h
+    else:
+        previous_h_state_tensor = transformer.make_constant_tensor(
+            np.zeros((batch_size, hidden_size), dtype=dtype), 'initial_h')
+
+    if initial_c is not None:
+        previous_c_state_tensor = initial_c
+    else:
+        previous_c_state_tensor = transformer.make_constant_tensor(
+            np.zeros((batch_size, hidden_size), dtype=dtype), 'initial_c')
+
+    if P is not None:
+        p_tensors = transformer.make_split(P, split_sizes=[hidden_size] * 3, axis=0)
+        P = {'i': p_tensors[0], 'o': p_tensors[1], 'f': p_tensors[2]}
+    else:
+        zero = transformer.make_constant_tensor(
+            np.zeros((hidden_size), dtype=dtype), 'zero_peephole')
+        P = {'i': zero, 'o': zero, 'f': zero}
+
+    for i in range(seq_length):
+        # it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
+        it = transformer.make_gemm(X[i], W['i'], B['i'], trans_b=True)
+        it = transformer.make_gemm(previous_h_state_tensor, R['i'], it, trans_b=True)
+        peephole_it = transformer.make_mul(P['i'], previous_c_state_tensor)
+        it = transformer.make_add(it, peephole_it)
+        if clip is not None:
+            it = transformer.make_clip(it, min=-clip, max=clip)
+        it = transformer.make_act(it, act['f'])
+
+        # ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
+        ft = transformer.make_gemm(X[i], W['f'], B['f'], trans_b=True)
+        ft = transformer.make_gemm(previous_h_state_tensor, R['f'], ft, trans_b=True)
+        peephole_ft = transformer.make_mul(P['f'], previous_c_state_tensor)
+        ft = transformer.make_add(ft, peephole_ft)
+        if clip is not None:
+            ft = transformer.make_clip(ft, min=-clip, max=clip)
+        ft = transformer.make_act(ft, act['f'])
+
+        # ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
+        ct = transformer.make_gemm(X[i], W['c'], B['c'], trans_b=True)
+        ct = transformer.make_gemm(previous_h_state_tensor, R['c'], ct, trans_b=True)
+        if clip is not None:
+            ct = transformer.make_clip(ct, min=-clip, max=clip)
+        ct = transformer.make_act(ct, act['g'])
+
+        # Ct = ft (.) Ct-1 + it (.) ct
+        ft_Ct = transformer.make_mul(ft, previous_c_state_tensor)
+        it_ct = transformer.make_mul(it, ct)
+        Ct = transformer.make_add(ft_Ct, it_ct)
+        previous_c_state_tensor = Ct
+
+        # ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
+        ot = transformer.make_gemm(X[i], W['o'], B['o'], trans_b=True)
+        ot = transformer.make_gemm(previous_h_state_tensor, R['o'], ot, trans_b=True)
+        peephole_ot = transformer.make_mul(P['o'], Ct)
+        ot = transformer.make_add(ot, peephole_ot)
+        if clip is not None:
+            ot = transformer.make_clip(ot, min=-clip, max=clip)
+        ot = transformer.make_act(ot, act['f'])
+
+        # Ht = ot (.) h(Ct)
+        Ht = transformer.make_act(Ct, act['h'])
+        Ht = transformer.make_mul(ot, Ht)
+        previous_h_state_tensor = Ht
+        state_h_tensors += [Ht]
+
+    return (state_h_tensors, previous_c_state_tensor)
+
+
+def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos,
+                                   activations, clip, direction, hidden_size, layout):
+    """Generate Simple (forward or reverse) unrolled LSTM
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional LSTM operation to unroll
+        x (list of str): list of input tensors (input tensor split along "time" dimension)
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        activations (list of str): list of length 3 containing names of activation functions
+        clip (float or None): range which clips input of activations
+        direction (str): "forward" or "reverse"
+        hidden_size (int): size of hidden state
+        layout (int): See attribute description:
+            https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37
+    """
+
+    inputs = original_node.input
+    outputs = original_node.output
+    if direction == 'reverse':
+        x.reverse()
+    w = transformer.make_squeeze(inputs[1], axes=[0])
+    r = transformer.make_squeeze(inputs[2], axes=[0])
+
+    b = None
+    if len(inputs) > 3 and inputs[3] != '':
+        b = transformer.make_squeeze(inputs[3], axes=[0])
+
+    initial_h = None
+    if len(inputs) > 5 and inputs[5] != '':
+        direction_dim = layout
+        initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim])
+
+    initial_c = None
+    if len(inputs) > 6 and inputs[6] != '':
+        direction_dim = layout
+        initial_c = transformer.make_squeeze(inputs[6], axes=[direction_dim])
+
+    p = None
+    if len(inputs) > 7 and inputs[7] != '':
+        p = transformer.make_squeeze(inputs[7], axes=[0])
+
+    dtype = _dtype_to_np(tensor_infos[inputs[0]].dtype)
+    batch_size = tensor_infos[inputs[0]].shape[1 - layout]
+
+    act = {'f': activations[0], 'g': activations[1], 'h': activations[2]}
+
+    state_h_tensors, state_c_tensor = _generate_one_direction_LSTM(
+        transformer, x, w, r, b, initial_h, initial_c, p, clip, act, dtype, hidden_size,
+        batch_size)
+
+    y_direction_dim = layout + 1
+    y_h_direction_dim = layout
+    state_layout_tensors = []
+    seq_length_dim = layout
+    for h_state in state_h_tensors:
+        state_layout_tensors += [
+            transformer.make_unsqueeze(h_state, axes=[seq_length_dim, y_direction_dim])
+        ]
+
+    # use low-level interface to attach to existing tensors
+    Y_h = outputs[1]
+    transformer.make_node(
+        'Unsqueeze', [state_h_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
+    Y_c = outputs[2]
+    transformer.make_node('Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim])
+    if direction == 'reverse':
+        state_layout_tensors.reverse()
+    Y = outputs[0]
+    transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos,
+                                  activations, clip, hidden_size, layout):
+    """Generate Bidirectional unrolled LSTM
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional LSTM operation to unroll
+        x (list of str): list of input tensors (input tensor split along "time" dimension)
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        activations (list of str): list of length 6, containing names of forward and reverse activations
+        clip (float or None): range which clips input of activations
+        hidden_size (int): size of hidden state
+        layout (int): See attribute description:
+            https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37
+    """
+
+    inputs = original_node.input
+    outputs = original_node.output
+
+    w = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0)
+    r = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0)
+    for d in range(2):
+        w[d] = transformer.make_squeeze(w[d], axes=[0])
+        r[d] = transformer.make_squeeze(r[d], axes=[0])
+
+    b = [None, None]
+    if len(inputs) > 3 and inputs[3] != '':
+        b = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0)
+        for d in range(2):
+            b[d] = transformer.make_squeeze(b[d], axes=[0])
+
+    initial_h = [None, None]
+    if len(inputs) > 5 and inputs[5] != '':
+        direction_dim = layout
+        initial_h = transformer.make_split(
+            inputs[5], split_sizes=[1, 1], axis=direction_dim)
+        for d in range(2):
+            initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
+
+    initial_c = [None, None]
+    if len(inputs) > 6 and inputs[6] != '':
+        direction_dim = layout
+        initial_c = transformer.make_split(
+            inputs[6], split_sizes=[1, 1], axis=direction_dim)
+        for d in range(2):
+            initial_c[d] = transformer.make_squeeze(initial_c[d], axes=[direction_dim])
+
+    p = [None, None]
+    if len(inputs) > 7 and inputs[7] != '':
+        p = transformer.make_split(inputs[7], split_sizes=[1, 1], axis=0)
+        for d in range(2):
+            p[d] = transformer.make_squeeze(p[d], axes=[0])
+
+    dtype = _dtype_to_np(tensor_infos[inputs[0]].dtype)
+    batch_size = tensor_infos[inputs[0]].shape[1 - layout]
+
+    act = [{
+        'f': activations[0],
+        'g': activations[1],
+        'h': activations[2]
+    }, {
+        'f': activations[3],
+        'g': activations[4],
+        'h': activations[5]
+    }]
+
+    state_f_h_tensors, state_f_c_tensor = _generate_one_direction_LSTM(
+        transformer, x, w[0], r[0], b[0], initial_h[0], initial_c[0], p[0], clip, act[0],
+        dtype, hidden_size, batch_size)
+    x.reverse()
+    state_b_h_tensors, state_b_c_tensor = _generate_one_direction_LSTM(
+        transformer, x, w[1], r[1], b[1], initial_h[1], initial_c[1], p[1], clip, act[1],
+        dtype, hidden_size, batch_size)
+    state_b_h_tensors.reverse()
+
+    y_direction_dim = layout + 1
+    y_c_direction_dim = layout
+    state_layout_tensors = []
+    seq_length_dim = layout
+    for f_h_state, b_h_state in zip(state_f_h_tensors, state_b_h_tensors):
+        state_f_layout_tensors = transformer.make_unsqueeze(
+            f_h_state, axes=[seq_length_dim, y_direction_dim])
+        state_b_layout_tensors = transformer.make_unsqueeze(
+            b_h_state, axes=[seq_length_dim, y_direction_dim])
+        state_layout_tensors += [
+            transformer.make_concat(
+                [state_f_layout_tensors, state_b_layout_tensors], axis=y_direction_dim)
+        ]
+
+    last_f_state_layout_tensor = transformer.make_unsqueeze(
+        state_f_h_tensors[-1], axes=[y_c_direction_dim])
+    last_b_state_layout_tensor = transformer.make_unsqueeze(
+        state_b_h_tensors[0], axes=[y_c_direction_dim])
+
+    Y_h = outputs[1]
+    transformer.make_node(
+        'Concat', [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h],
+        axis=y_c_direction_dim)
+
+    Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim])
+    Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim])
+    Y_c = outputs[2]
+    transformer.make_node('Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim)
+
+    Y = outputs[0]
+    transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _legalize_LSTM(transformer, tensor_infos, node):
+    """Unroll LSTM operation
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        node (onnx.onnx_ml_pb2.NodeProto): LSTM operation to unroll
+    """
+    inputs = node.input
+    if len(inputs) > 4 and inputs[4] != '':
+        raise NotImplementedError('Variadic length of output is not supported')
+    # attributes
+    activation_alpha = []
+    activation_beta = []
+    activations = ['Sigmoid', 'Tanh', 'Tanh'] * 2
+    clip = None
+    direction = 'forward'
+    hidden_size = 0
+    input_forget = 0
+    layout = 0
+
+    for attr in node.attribute:
+        if attr.name == 'activation_alpha':
+            activation_alpha = attr.floats
+        if attr.name == 'activation_beta':
+            activation_beta = attr.floats
+        if attr.name == 'activations':
+            activations = list(map(lambda item: item.decode('UTF-8'), list(attr.strings)))
+        if attr.name == 'clip':
+            clip = attr.f
+        if attr.name == 'direction':
+            direction = attr.s.decode('UTF-8')
+        if attr.name == 'hidden_size':
+            hidden_size = attr.i
+        if attr.name == 'input_forget':
+            input_forget = attr.i
+        if attr.name == 'layout':
+            layout = attr.i
+
+    if len(activation_alpha) > 0 or len(activation_beta) > 0:
+        raise NotImplementedError('Unsupported parameters for LSTM activations')
+
+    for act in activations:
+        if act not in ['Relu', 'Tanh', 'Sigmoid']:
+            raise NotImplementedError('Unsupported activation function')
+
+    if input_forget != 0:
+        raise NotImplementedError('Unsupported input_forget attribute value')
+
+    seq_length_dim = layout
+    seq_length = tensor_infos[inputs[0]].shape[seq_length_dim]
+    if hidden_size == 0:
+        hidden_size = tensor_infos[inputs[2]].shape[2]
+
+    input_split_tensor = transformer.make_split(
+        inputs[0], split_sizes=[1] * seq_length, axis=seq_length_dim)
+    x = []
+    for i in range(len(input_split_tensor)):
+        input_frame_tensor = input_split_tensor[i]
+        squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0])
+        x += [squeezed_frame_tensor]
+
+    if direction in ['forward', 'reverse']:
+        _transform_unidirectional_LSTM(transformer, node, x, tensor_infos, activations,
+                                       clip, direction, hidden_size, layout)
+    elif direction == 'bidirectional':
+        _transform_bidirectional_LSTM(transformer, node, x, tensor_infos, activations,
+                                      clip, hidden_size, layout)
+    else:
+        raise RuntimeError('Unknown LSTM type')
+
+    transformer.mark_for_deletion(node)
+
+
+def legalize(model, options):
+    """Replace selected operations in onnx model
+
+    Replaces operations, selected by given options with different operation sequences.
+    For example remove unsupported parts of graph with sequences of supported operations.
+
+    Note that graph is changes inplace.
+
+    Args:
+        model (onnx.onnx_ml_pb2.ModelProto): target model
+        options (LegalizeOptions):
+    """
+    tensor_infos = _get_tensor_infos(model)
+
+    transformer = _ModelTransformerHelper(model)
+
+    node_id = 0
+    while node_id < len(model.graph.node):
+        node = model.graph.node[node_id]
+        if node.op_type == 'RNN' and options.unroll_rnn:
+            # opset version is required by split operation
+            if model.opset_import[0].version >= 13:
+                raise NotImplementedError(
+                    'Can not generate code with opcode version 13 and greater')
+            transformer.set_insert_id(node_id)
+            _legalize_RNN(transformer, tensor_infos, node)
+            node_id = transformer.get_insert_id()
+        elif node.op_type == 'LSTM' and options.unroll_lstm:
+            if model.opset_import[0].version >= 13:
+                raise NotImplementedError(
+                    'Can not generate code with opcode version 13 and greater')
+            transformer.set_insert_id(node_id)
+            _legalize_LSTM(transformer, tensor_infos, node)
+            node_id = transformer.get_insert_id()
+        node_id += 1
+
+    transformer.delete_marked_nodes()
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        print(
+            'usage: ./legalize_onnx.py <path to input model> <path to output model>\n'
+            '\n'
+            '    In stand-alone utility mode this tool provides basic funtionality\n'
+            '    If you want to have more control over applied transformations, use this legalizer as a library'
+        )
+        exit(1)
+    options = LegalizeOptions()
+    options.unroll_lstm = True
+    options.unroll_rnn = True
+    model = onnx.load(sys.argv[1])
+    legalize(model, options)
+    onnx.save(model, sys.argv[2])
diff --git a/compiler/one-cmds/requires.cmake b/compiler/one-cmds/requires.cmake
index 50c24579f..a25a7d70c 100644
--- a/compiler/one-cmds/requires.cmake
+++ b/compiler/one-cmds/requires.cmake
@@ -1,7 +1,10 @@
 require("tf2tfliteV2")
 require("tflite2circle")
 require("circle2circle")
+require("circle-eval-diff")
 require("circle-quantizer")
+require("circle-mpqsolver")
 require("record-minmax")
 require("vconone")
 require("bcq-tools")
+require("rawdata2hdf5")
diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt
new file mode 100644
index 000000000..8c006c10b
--- /dev/null
+++ b/compiler/one-cmds/tests/CMakeLists.txt
@@ -0,0 +1,152 @@
+# Install one-cmds test scripts
+
+# Gather test scripts
+file(GLOB TESTITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test")
+file(GLOB CONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.cfg")
+file(GLOB QCONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.qconf.json")
+file(GLOB PYSCRIPTS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.py")
+file(GLOB WORKFLOWITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.workflow.json")
+
+# Create a script to run the tests at installation folder
+set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
+
+file(WRITE  "${DRIVER_SCRIPT}" "#!/bin/bash\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname $\{BASH_SOURCE\[0\]\}) && pwd)\n")
+file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n")
+file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n")
+file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n")
+file(APPEND "${DRIVER_SCRIPT}" "  USER_PATH=$1\n")
+file(APPEND "${DRIVER_SCRIPT}" "  export PATH=$USER_PATH:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "fi\n")
+file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
+
+foreach(TESTITEM IN ITEMS ${TESTITEMS})
+  get_filename_component(ITEM_PREFIX ${TESTITEM} NAME_WE)
+
+  set(TESTITEM_SCRIPT_FILE "${ITEM_PREFIX}.test")
+  set(TESTITEM_SCRIPT_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/${TESTITEM_SCRIPT_FILE}")
+
+  file(APPEND "${DRIVER_SCRIPT}" "/bin/bash ${TESTITEM_SCRIPT_FILE} | tee -a runtestall.log\n")
+
+  install(FILES ${TESTITEM} DESTINATION test)
+
+endforeach(TESTITEM)
+
+foreach(CONFIGITEM IN ITEMS ${CONFIGITEMS})
+  get_filename_component(ITEM_PREFIX ${CONFIGITEM} NAME_WE)
+  install(FILES ${CONFIGITEM} DESTINATION test)
+endforeach(CONFIGITEM)
+
+foreach(QCONFIGITEM IN ITEMS ${QCONFIGITEMS})
+  get_filename_component(ITEM_PREFIX ${QCONFIGITEM} NAME_WE)
+  install(FILES ${QCONFIGITEM} DESTINATION test)
+endforeach(QCONFIGITEM)
+
+foreach(PYSCRIPT IN ITEMS ${PYSCRIPTS})
+  get_filename_component(ITEM_PREFIX ${PYSCRIPT} NAME_WE)
+  install(FILES ${PYSCRIPT} DESTINATION test)
+endforeach(PYSCRIPT)
+
+foreach(WORKFLOWITEM IN ITEMS ${WORKFLOWITEMS})
+  get_filename_component(ITEM_PREFIX ${WORKFLOWITEM} NAME_WE)
+  install(FILES ${WORKFLOWITEM} DESTINATION test)
+endforeach(WORKFLOWITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+  echo \"$fail_count TESTS FAILED\"
+  exit 255
+else
+  echo \"ALL TESTS PASSED!\"
+fi\n
+")
+
+set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
+set(PREPROCESS_IMAGES_PY "${CMAKE_CURRENT_SOURCE_DIR}/preprocess_images.py")
+set(ONNX_LEGALIZE_RUN_COMPARE "${CMAKE_CURRENT_SOURCE_DIR}/onnx_legalize_run_compare.py")
+set(PRINT_ONNX_MODEL "${CMAKE_CURRENT_SOURCE_DIR}/print_onnx_model.py")
+
+install(FILES ${DRIVER_SCRIPT}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${PREPARE_TEST_MATERIALS_SH}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${PREPROCESS_IMAGES_PY}
+        PERMISSIONS OWNER_WRITE OWNER_READ
+                    GROUP_READ
+                    WORLD_READ
+        DESTINATION test)
+
+install(FILES ${ONNX_LEGALIZE_RUN_COMPARE}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+        GROUP_READ GROUP_EXECUTE
+        WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${PRINT_ONNX_MODEL}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+        GROUP_READ GROUP_EXECUTE
+        WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.txt
+        DESTINATION test)
+
+add_subdirectory(onnx-operations)
+
+if(ENABLE_ONE_IMPORT_PYTORCH)
+  add_subdirectory(pytorch-operations)
+endif(ENABLE_ONE_IMPORT_PYTORCH)
+
+# Generate group option list for tests
+get_filename_component(ONE_CMDS_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
+set(ONE_PYTHON_DIR "onelib")
+set(CONSTANT_EXPORTING_SCRIPT "${ONE_CMDS_DIR}/${ONE_PYTHON_DIR}/export_constant.py")
+set(O1_OPTION "O1")
+set(O1_TXT_FILE "${O1_OPTION}.list")
+set(O1_TXT_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${O1_TXT_FILE}")
+set(NON_O1_TXT_FILE "non-${O1_OPTION}.list")
+set(NON_O1_TXT_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${NON_O1_TXT_FILE}")
+
+add_custom_command(OUTPUT ${O1_TXT_FILE_BIN}
+  COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT} --constant ${O1_OPTION}
+                                    --format txt --output_path ${O1_TXT_FILE_BIN}
+  DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+  COMMENT "Generate ${O1_TXT_FILE}"
+)
+
+add_custom_command(OUTPUT ${NON_O1_TXT_FILE_BIN}
+  COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT} --constant ${O1_OPTION}
+                                    --format txt --output_path ${NON_O1_TXT_FILE_BIN}
+                                    --exclusive
+  DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+  COMMENT "Generate ${NON_O1_TXT_FILE}"
+)
+
+add_custom_target("O1_txt_target" ALL DEPENDS ${O1_TXT_FILE_BIN} ${NON_O1_TXT_FILE_BIN})
+
+install(FILES ${O1_TXT_FILE_BIN}
+        PERMISSIONS OWNER_WRITE OWNER_READ
+                         GROUP_READ
+                         WORLD_READ
+        DESTINATION test)
+
+install(FILES ${NON_O1_TXT_FILE_BIN}
+        PERMISSIONS OWNER_WRITE OWNER_READ
+                         GROUP_READ
+                         WORLD_READ
+        DESTINATION test)
diff --git a/compiler/one-cmds/tests/OONE-BUILD_014.cfg b/compiler/one-cmds/tests/OONE-BUILD_014.cfg
new file mode 100644
index 000000000..a39aae071
--- /dev/null
+++ b/compiler/one-cmds/tests/OONE-BUILD_014.cfg
@@ -0,0 +1,2 @@
+[one-optimize]
+make_batchnorm_gamma_positive=True
diff --git a/compiler/one-cmds/tests/OONECC_024.cfg b/compiler/one-cmds/tests/OONECC_024.cfg
new file mode 100644
index 000000000..a39aae071
--- /dev/null
+++ b/compiler/one-cmds/tests/OONECC_024.cfg
@@ -0,0 +1,2 @@
+[one-optimize]
+make_batchnorm_gamma_positive=True
diff --git a/compiler/one-cmds/tests/README.txt b/compiler/one-cmds/tests/README.txt
new file mode 100644
index 000000000..0d4d0ecbe
--- /dev/null
+++ b/compiler/one-cmds/tests/README.txt
@@ -0,0 +1,27 @@
+one-cmds testing
+================
+
+Run 'runtestall.sh' program to test ONE command line programs, all at once.
+
+Steps:
+1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow
+  - you need to run this only once
+  - read 'doc/how-to-prepare-virtualenv.txt' for more information
+----------------------------------------------
+bin/one-prepare-venv
+----------------------------------------------
+
+2) run 'test/prepare_test_materials.sh' to download test material models
+  - you need to run this only once
+  - you need internet connection to download files
+  - you may need to install 'wget' and 'unzip' packages
+----------------------------------------------
+test/prepare_test_materials.sh
+----------------------------------------------
+
+3) run 'test/runtestall.sh' to run the test
+----------------------------------------------
+test/runtestall.sh
+----------------------------------------------
+
+End.
diff --git a/compiler/one-cmds/tests/one-build_001.cfg b/compiler/one-cmds/tests/one-build_001.cfg
new file mode 100644
index 000000000..b022ba74b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_001.cfg
@@ -0,0 +1,20 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_001.test b/compiler/one-cmds/tests/one-build_001.test
new file mode 100644
index 000000000..e6c6ee77b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_001.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_002.cfg b/compiler/one-cmds/tests/one-build_002.cfg
new file mode 100644
index 000000000..bbf09159b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_002.cfg
@@ -0,0 +1,24 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-pack]
+input_path=inception_v3.opt.circle
+output_path=inception_v3_pkg
diff --git a/compiler/one-cmds/tests/one-build_002.test b/compiler/one-cmds/tests/one-build_002.test
new file mode 100644
index 000000000..c29d422d6
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_002.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_002.cfg"
+outputfile="inception_v3_pkg"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_003.cfg b/compiler/one-cmds/tests/one-build_003.cfg
new file mode 100644
index 000000000..6aec3cab6
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_003.cfg
@@ -0,0 +1,21 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
diff --git a/compiler/one-cmds/tests/one-build_003.test b/compiler/one-cmds/tests/one-build_003.test
new file mode 100644
index 000000000..6337b5095
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_003.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_003.cfg"
+outputfile="inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_004.cfg b/compiler/one-cmds/tests/one-build_004.cfg
new file mode 100644
index 000000000..c23405bea
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_004.cfg
@@ -0,0 +1,20 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.circle
diff --git a/compiler/one-cmds/tests/one-build_004.test b/compiler/one-cmds/tests/one-build_004.test
new file mode 100644
index 000000000..5406355e2
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_004.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_004.cfg"
+outputfile="sample.tvn"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_005.cfg b/compiler/one-cmds/tests/one-build_005.cfg
new file mode 100644
index 000000000..841b37234
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_005.cfg
@@ -0,0 +1,20 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=True
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-tflite]
+input_path=inception_v3.tflite
+output_path=inception_v3.circle
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_005.test b/compiler/one-cmds/tests/one-build_005.test
new file mode 100644
index 000000000..f003be536
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_005.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tflite -> one-optimize -> one-codgen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_005.cfg"
+outputfile="sample.tvn"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_006.cfg b/compiler/one-cmds/tests/one-build_006.cfg
new file mode 100644
index 000000000..e754bdeca
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_006.cfg
@@ -0,0 +1,29 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=True
+one-pack=False
+one-codegen=True
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-quantize]
+input_path=inception_v3.opt.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.quantized.circle
diff --git a/compiler/one-cmds/tests/one-build_006.test b/compiler/one-cmds/tests/one-build_006.test
new file mode 100644
index 000000000..07770151a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_006.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_006.cfg"
+outputfile="sample.tvn"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_007.cfg b/compiler/one-cmds/tests/one-build_007.cfg
new file mode 100644
index 000000000..52610750d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_007.cfg
@@ -0,0 +1,29 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-quantize]
+input_path=inception_v3.opt.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
+
+[one-pack]
+input_path=inception_v3.quantized.circle
+output_path=inception_v3_pkg
diff --git a/compiler/one-cmds/tests/one-build_007.test b/compiler/one-cmds/tests/one-build_007.test
new file mode 100644
index 000000000..4fd6a2a67
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_007.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_007.cfg"
+outputfile="inception_v3_pkg"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_008.cfg b/compiler/one-cmds/tests/one-build_008.cfg
new file mode 100644
index 000000000..8c777f64f
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_008.cfg
@@ -0,0 +1,22 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.opt.circle
+remove_redundant_transpose=True
+
+[one-codegen]
+backend=dummy
+command=-o test_onnx_model.bin test_onnx_model.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_008.test b/compiler/one-cmds/tests/one-build_008.test
new file mode 100644
index 000000000..d19b95fc7
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_008.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_008.cfg"
+outputfile="test_onnx_model.bin"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_009.cfg b/compiler/one-cmds/tests/one-build_009.cfg
new file mode 100644
index 000000000..b5a35dd97
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_009.cfg
@@ -0,0 +1,23 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-onnx]
+input_path=onnx_conv2d_conv2d.onnx
+output_path=onnx_conv2d_conv2d.circle
+
+[one-optimize]
+input_path=onnx_conv2d_conv2d.circle
+output_path=onnx_conv2d_conv2d.opt.circle
+remove_redundant_transpose=True
+convert_nchw_to_nhwc=True
+
+[one-codegen]
+backend=dummy
+command=-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_009.test b/compiler/one-cmds/tests/one-build_009.test
new file mode 100644
index 000000000..ae5351957
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_009.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-onnx -> one-optimize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_009.cfg"
+outputfile="onnx_conv2d_conv2d.bin"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_010.cfg b/compiler/one-cmds/tests/one-build_010.cfg
new file mode 100644
index 000000000..3be9ed3f9
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_010.cfg
@@ -0,0 +1,17 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.alt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+save_intermediate=True
diff --git a/compiler/one-cmds/tests/one-build_010.test b/compiler/one-cmds/tests/one-build_010.test
new file mode 100644
index 000000000..b76e81cf9
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_010.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf: intermediate file should exist
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_010.cfg"
+outputfile="inception_v3.alt.circle"
+intermfile="inception_v3.alt.tflite"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+rm -f ${intermfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_011.cfg b/compiler/one-cmds/tests/one-build_011.cfg
new file mode 100644
index 000000000..15d2c103c
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_011.cfg
@@ -0,0 +1,14 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+save_intermediate=True
diff --git a/compiler/one-cmds/tests/one-build_011.test b/compiler/one-cmds/tests/one-build_011.test
new file mode 100644
index 000000000..efd714325
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_011.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-onnx
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_011.cfg"
+outputfile="test_onnx_model.circle"
+intermfile="test_onnx_model.tflite"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+rm -f ${intermfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_012.cfg b/compiler/one-cmds/tests/one-build_012.cfg
new file mode 100644
index 000000000..74b0a0ca0
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_012.cfg
@@ -0,0 +1,22 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.list.quantized.circle
+input_data=datalist.txt
+input_data_format=list
diff --git a/compiler/one-cmds/tests/one-build_012.test b/compiler/one-cmds/tests/one-build_012.test
new file mode 100644
index 000000000..5f123cd57
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_012.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_012.cfg"
+outputfile="inception_v3.list.quantized.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_013.cfg b/compiler/one-cmds/tests/one-build_013.cfg
new file mode 100644
index 000000000..8e9e40e66
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_013.cfg
@@ -0,0 +1,22 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.dir.quantized.circle
+input_data=raw_files
+input_data_format=directory
diff --git a/compiler/one-cmds/tests/one-build_013.test b/compiler/one-cmds/tests/one-build_013.test
new file mode 100644
index 000000000..9a71f02e5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_013.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_013.cfg"
+outputfile="inception_v3.dir.quantized.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_014.cfg b/compiler/one-cmds/tests/one-build_014.cfg
new file mode 100644
index 000000000..f09145ec3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_014.cfg
@@ -0,0 +1,22 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+make_batchnorm_gamma_positive=False
diff --git a/compiler/one-cmds/tests/one-build_014.test b/compiler/one-cmds/tests/one-build_014.test
new file mode 100644
index 000000000..10e6cc65b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_014.test
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use `OONE-BUILD_014` optimization option
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+clean_envir()
+{
+  rm -rf ../optimization/OONE-BUILD_014.cfg
+  if [ "$OPT_ALREADY_EXIST" = false ]; then
+    rm -rf ../optimization
+  fi
+}
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  clean_envir
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_014.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+if [ ! -d "../optimization" ]; then
+  mkdir -p ../optimization
+  OPT_ALREADY_EXIST=false
+fi
+
+cp OONE-BUILD_014.cfg ../optimization
+
+# run test
+LUCI_LOG=5 one-build -C ${configfile} -OONE-BUILD_014 > ${filename}.log 2>&1
+
+clean_envir
+
+if ! grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-build_neg_001.test b/compiler/one-cmds/tests/one-build_neg_001.test
new file mode 100644
index 000000000..2ea26d23f
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_001.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found given configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_neg_001.cfg"
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_002.cfg b/compiler/one-cmds/tests/one-build_neg_002.cfg
new file mode 100644
index 000000000..99db96651
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_002.cfg
@@ -0,0 +1,20 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_neg_002.test b/compiler/one-cmds/tests/one-build_neg_002.test
new file mode 100644
index 000000000..dece9c0bc
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_002.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing one-pack section in configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "configuration file must have 'one-pack' section" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_neg_002.cfg"
+
+rm -f ${filename}.log
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_003.cfg b/compiler/one-cmds/tests/one-build_neg_003.cfg
new file mode 100644
index 000000000..fa027cb95
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_003.cfg
@@ -0,0 +1,15 @@
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-pack]
+input_path=inception_v3.opt.circle
+output_path=inception_v3_pkg
diff --git a/compiler/one-cmds/tests/one-build_neg_003.test b/compiler/one-cmds/tests/one-build_neg_003.test
new file mode 100644
index 000000000..c793d13e0
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_003.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing one-build section in configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "\[one-build\] section is required in configuraion file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_neg_003.cfg"
+
+rm -f ${filename}.log
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_004.cfg b/compiler/one-cmds/tests/one-build_neg_004.cfg
new file mode 100644
index 000000000..571077b42
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_004.cfg
@@ -0,0 +1,24 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+
+[one-optimize]
+input_path=inception_v4.circle
+output_path=inception_v4.opt.circle
diff --git a/compiler/one-cmds/tests/one-build_neg_004.test b/compiler/one-cmds/tests/one-build_neg_004.test
new file mode 100644
index 000000000..b0d4e616d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_004.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with duplicate section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "section 'one-optimize' already exists" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_neg_004.cfg"
+
+rm -f ${filename}.log
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_005.cfg b/compiler/one-cmds/tests/one-build_neg_005.cfg
new file mode 100644
index 000000000..ad9efbca1
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_005.cfg
@@ -0,0 +1,16 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.alt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/one-build_neg_005.test b/compiler/one-cmds/tests/one-build_neg_005.test
new file mode 100644
index 000000000..9b2511103
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_005.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative one-import-tf intermediate file should not exist
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_neg_005.cfg"
+outputfile="inception_v3.alt.circle"
+intermfile="inception_v3.alt.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+rm -f ${filename}.log
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+# output should exist
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+# intermediate file should not exist
+if [[ -f "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_neg_006.cfg b/compiler/one-cmds/tests/one-build_neg_006.cfg
new file mode 100644
index 000000000..abe4c7d77
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_006.cfg
@@ -0,0 +1,13 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
diff --git a/compiler/one-cmds/tests/one-build_neg_006.test b/compiler/one-cmds/tests/one-build_neg_006.test
new file mode 100644
index 000000000..d228f8f1f
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_006.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative one-import-tf intermediate file should not exist
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_neg_006.cfg"
+outputfile="test_onnx_model.circle"
+intermfile="test_onnx_model.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+rm -f ${filename}.log
+
+# run test
+one-build -C ${configfile} > ${filename}.log 2>&1
+
+# output should exist
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+# intermediate file should not exist
+if [[ -f "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-build_neg_007.test b/compiler/one-cmds/tests/one-build_neg_007.test
new file mode 100644
index 000000000..59a5d25c1
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_007.test
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Valid optimization option but invalid configuration file path
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  rm -rf ../optimization/OONE_BUILD_NEG_007.cfg
+  if [ "$OPT_ALREADY_EXIST" = false ]; then
+    rm -rf ../optimization
+  fi
+  if grep -q "Not found given configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+if [ ! -d "../optimization" ]; then
+  mkdir -p ../optimization
+  OPT_ALREADY_EXIST=false
+fi
+
+
+touch ../optimization/OONE_BUILD_NEG_007.cfg
+
+configfile=".."
+
+rm -f ${filename}.log
+
+# run test
+one-build -C ${configfile} -OONE_BUILD_NEG_007 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_008.test b/compiler/one-cmds/tests/one-build_neg_008.test
new file mode 100644
index 000000000..2b41222e9
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_008.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Invalid optimization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid optimization option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+rm -f ${filename}.log
+
+# run test
+one-build -C ${configfile} -OONE_BUILD_NEG_008 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_009.test b/compiler/one-cmds/tests/one-build_neg_009.test
new file mode 100644
index 000000000..5e3698d23
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_009.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have space in the optimization name
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not allowed to have space in the optimization name" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+rm -f ${filename}.log
+
+# run test
+one-build -C ${configfile} "-O SPACE OPTION" > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-codegen_001.test b/compiler/one-cmds/tests/one-codegen_001.test
new file mode 100644
index 000000000..dbc4ad7f7
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# copy help-compile to bin folder
+cp help-compile ../bin/help-compile
+
+# run test
+one-codegen -b help -- -h > ${filename}.log 2>&1
+
+rm -rf ../bin/help-compile
+
+if grep -q "HELP MESSAGE!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-codegen_002.test b/compiler/one-cmds/tests/one-codegen_002.test
new file mode 100644
index 000000000..95e7b7b68
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_002.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run one-codegen with dummy-compile driver
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-codegen -b dummy -o ${outputfile} "dummy.circle" > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-codegen_003.test b/compiler/one-cmds/tests/one-codegen_003.test
new file mode 100644
index 000000000..f283ec38b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_003.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run one-codegen with dummy-compile driver
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+one-codegen -b dummy -- -o ${outputfile} "dummy.circle" > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-codegen_004.test b/compiler/one-cmds/tests/one-codegen_004.test
new file mode 100644
index 000000000..485f591a5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_004.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# print one-codegen's help message
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-codegen -h > ${filename}.log 2>&1
+
+if grep -q "command line tool for code generation" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-codegen_neg_001.test b/compiler/one-cmds/tests/one-codegen_neg_001.test
new file mode 100644
index 000000000..137a3f90d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_neg_001.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with no input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required: -b/--backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-codegen > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-codegen_neg_002.test b/compiler/one-cmds/tests/one-codegen_neg_002.test
new file mode 100644
index 000000000..28dfa920c
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_neg_002.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-codegen ${command} without backend option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required: -b/--backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-codegen -o test.tvn test.circle > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-codegen_neg_003.test b/compiler/one-cmds/tests/one-codegen_neg_003.test
new file mode 100644
index 000000000..0622b6a07
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_neg_003.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-codegen -- ${command} without backend option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required: -b/--backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-codegen -- -o test.tvn test.circle > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-codegen_neg_004.cfg b/compiler/one-cmds/tests/one-codegen_neg_004.cfg
new file mode 100644
index 000000000..b104fbb27
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_neg_004.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backend=dummy
+# command=..
diff --git a/compiler/one-cmds/tests/one-codegen_neg_004.test b/compiler/one-cmds/tests/one-codegen_neg_004.test
new file mode 100644
index 000000000..c35549bdc
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_neg_004.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# command key is missing
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: 'command' key is missing in the configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-codegen_neg_004.cfg"
+
+rm -f ${filename}.log
+
+# run test
+one-codegen -b dummy -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-codegen_neg_005.test b/compiler/one-cmds/tests/one-codegen_neg_005.test
new file mode 100644
index 000000000..4c52dcd89
--- /dev/null
+++ b/compiler/one-cmds/tests/one-codegen_neg_005.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# commands for the backend is missing
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: commands for the backend is missing" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-codegen -b dummy > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_001.test b/compiler/one-cmds/tests/one-import-bcq_001.test
new file mode 100644
index 000000000..a414eed66
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_001.test
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq.pb"
+outputfile="./bcq.circle"
+
+rm -f ${filename}.log
+rm -rf $outputfile
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_001.test b/compiler/one-cmds/tests/one-import-bcq_neg_001.test
new file mode 100644
index 000000000..cb8fe9ed3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_001.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input array
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: Invalid tensors" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq.pb"
+outputfile="./bcq.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder_null \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_002.test b/compiler/one-cmds/tests/one-import-bcq_neg_002.test
new file mode 100644
index 000000000..25c772ff9
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_002.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid output array
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: Invalid tensors" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq.pb"
+outputfile="./bcq.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder \
+--output_arrays MatMul_null > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_003.test b/compiler/one-cmds/tests/one-import-bcq_neg_003.test
new file mode 100644
index 000000000..20ef67a6d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_003.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "No such file or directory" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq_null.pb"
+outputfile="./bcq.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_004.test b/compiler/one-cmds/tests/one-import-bcq_neg_004.test
new file mode 100644
index 000000000..44a8ae19b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_004.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Error parsing message" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./while_3.pbtxt"
+outputfile="./bcq.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_005.test b/compiler/one-cmds/tests/one-import-bcq_neg_005.test
new file mode 100644
index 000000000..550804fea
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_005.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid output path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Failed to write circle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq.pb"
+outputfile="."
+
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_006.test b/compiler/one-cmds/tests/one-import-bcq_neg_006.test
new file mode 100644
index 000000000..7b872eb42
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_006.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shapes
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: The shape of tensor" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq.pb"
+outputfile="./bcq.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder --input_shapes "1,32,32" \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_007.test b/compiler/one-cmds/tests/one-import-bcq_neg_007.test
new file mode 100644
index 000000000..50d47c0a0
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_007.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shapes
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: The shape of tensor" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq.pb"
+outputfile="./bcq.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder --input_shapes "30,30" \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_008.test b/compiler/one-cmds/tests/one-import-bcq_neg_008.test
new file mode 100644
index 000000000..2edd33d38
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_008.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shapes
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: invalid literal for" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq.pb"
+outputfile="./bcq.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder --input_shapes "32,O" \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-bcq_neg_009.test b/compiler/one-cmds/tests/one-import-bcq_neg_009.test
new file mode 100644
index 000000000..72c1a877e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-bcq_neg_009.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shapes
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "must have the same number of items" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./bcq.pb"
+outputfile="./bcq.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-import-bcq \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Placeholder --input_shapes "32,32:1" \
+--output_arrays MatMul > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import-onnx_001.test b/compiler/one-cmds/tests/one-import-onnx_001.test
new file mode 100644
index 000000000..39a96074d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-onnx_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./test_onnx_model.onnx"
+outputfile="./test_onnx_model.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-import-onnx \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import-onnx_002.test b/compiler/one-cmds/tests/one-import-onnx_002.test
new file mode 100644
index 000000000..0430a8e32
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-onnx_002.test
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# test for experimental_disable_batchmatmul_unfold option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.onnx"
+outputfile="./reshape_matmul.one-import-onnx_002.circle"
+
+rm -rf ${outputfile}
+rm -rf ${outputfile}.log
+
+# run test without option that should drop FULLY_CONNECTED
+one-import-onnx \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
+
+if ! grep -q "FULLY_CONNECTED" "${outputfile}.log"; then
+  trap_err_onexit
+fi
+
+rm -rf ${outputfile}
+rm -rf ${outputfile}.log
+
+# run test with option that should drop BATCH_MATMUL
+one-import-onnx \
+--experimental_disable_batchmatmul_unfold \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
+
+if ! grep -q "BATCH_MATMUL" "${outputfile}.log"; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
+exit 0
diff --git a/compiler/one-cmds/tests/one-import-onnx_ext_001.test b/compiler/one-cmds/tests/one-import-onnx_ext_001.test
new file mode 100644
index 000000000..be8dadcf1
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-onnx_ext_001.test
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# test for one-import-onnx to invoke extension
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -f ../bin/one-import-onnx-ext
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./onnx_conv2d_conv2d.onnx"
+outputfile="./onnx_conv2d_conv2d.onnx_ext_001.circle"
+logfile=${filename}.log
+
+rm -f ${outputfile}
+rm -f ${logfile}
+
+# copy dummy-compile to bin folder
+cp dummy-onnx-ext ../bin/one-import-onnx-ext
+
+# run test 
+one-import-onnx \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${logfile} 2>&1
+
+if ! grep -q "one-import-onnx-ext dummy output!!!" "${logfile}"; then
+  trap_err_onexit
+fi
+
+rm -f ../bin/one-import-onnx-ext
+
+echo "${filename_ext} SUCCESS"
+exit 0
diff --git a/compiler/one-cmds/tests/one-import_001.test b/compiler/one-cmds/tests/one-import_001.test
new file mode 100644
index 000000000..2647337d2
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_001.test
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.circle"
+
+# Note: Do not remove output circle file as it's used for quantize tests
+rm -f ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input --input_shapes "1,299,299,3" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import_002.cfg b/compiler/one-cmds/tests/one-import_002.cfg
new file mode 100644
index 000000000..e7ede7bc2
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_002.cfg
@@ -0,0 +1,16 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3_cfg.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/one-import_002.test b/compiler/one-cmds/tests/one-import_002.test
new file mode 100644
index 000000000..24f673c43
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_002.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# positive usage with overriding option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-import_002.cfg"
+outputfile_cmd="inception_v3_cmd.circle"
+outputfile_cfg="inception_v3_cfg.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile_cmd}
+rm -f ${outputfile_cfg}
+
+# run test
+one-import tf -C ${configfile} \
+--output_path=${outputfile_cmd} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile_cmd}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import_003.cfg b/compiler/one-cmds/tests/one-import_003.cfg
new file mode 100644
index 000000000..b679ebdb3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_003.cfg
@@ -0,0 +1,13 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+model_format=saved_model
+input_path=test_saved_model
+output_path=test_saved_model.circle
diff --git a/compiler/one-cmds/tests/one-import_003.test b/compiler/one-cmds/tests/one-import_003.test
new file mode 100644
index 000000000..92e4b151b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_003.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# import of TF 2.x saved model
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-import_003.cfg"
+outputfile="test_saved_model.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-import tf -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import_004.cfg b/compiler/one-cmds/tests/one-import_004.cfg
new file mode 100644
index 000000000..d28c8dff6
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_004.cfg
@@ -0,0 +1,13 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+model_format=keras_model
+input_path=test_keras_model.h5
+output_path=test_keras_model.circle
diff --git a/compiler/one-cmds/tests/one-import_004.test b/compiler/one-cmds/tests/one-import_004.test
new file mode 100644
index 000000000..42c1692a3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_004.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# import of TF 2.x keras model
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-import_004.cfg"
+outputfile="test_keras_model.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-import tf -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import_005.cfg b/compiler/one-cmds/tests/one-import_005.cfg
new file mode 100644
index 000000000..abe4c7d77
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_005.cfg
@@ -0,0 +1,13 @@
+[one-build]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
diff --git a/compiler/one-cmds/tests/one-import_005.test b/compiler/one-cmds/tests/one-import_005.test
new file mode 100644
index 000000000..75122d6f2
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_005.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# import onnx model with cfg file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-import_005.cfg"
+outputfile="test_onnx_model.circle"
+
+rm -f ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-import onnx -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import_006.test b/compiler/one-cmds/tests/one-import_006.test
new file mode 100644
index 000000000..a9be173e5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_006.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# import onnx model
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="test_onnx_model.onnx"
+outputfile="test_onnx_model.circle"
+
+rm -f ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-import onnx -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-import_neg_001.test b/compiler/one-cmds/tests/one-import_neg_001.test
new file mode 100644
index 000000000..20a69641a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_001.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage improper input model
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: Invalid tensors 'input' were found" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+  # TF 2.12.x: error report has changed
+  if grep -q "invalid start byte" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.tflite"
+outputfile="./inception_v3.circle"
+
+# do not remove output file
+# rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input --input_shapes "1,299,299,3" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_002.test b/compiler/one-cmds/tests/one-import_neg_002.test
new file mode 100644
index 000000000..9cf0b1401
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_002.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with unsupported dynamic tensor
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  # TF2.3.0
+  if grep -q "is incompatible with result type" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+  # TF2.6.0
+  if grep -q "is incompatible with body result type" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./while_3.pbtxt"
+outputfile="./while_3.circle"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays Hole,Hole_2 --input_shapes "1,1:1,1" \
+--output_arrays Output > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_003.test b/compiler/one-cmds/tests/one-import_neg_003.test
new file mode 100644
index 000000000..9561ceafd
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_003.test
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid output array
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  # TODO Error message depends on TF version. Find better way.
+  # TF 2.3.0
+  if grep -q "ValueError: Invalid tensors" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  # TF 2.5.0
+  if grep -q "ConverterError: <unknown>:0: error:" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.circle"
+
+# do not remove output file
+# rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input --input_shapes "1,299,299,3" \
+--output_arrays InceptionV3/Predictions/Reshape_2 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_004.test b/compiler/one-cmds/tests/one-import_neg_004.test
new file mode 100644
index 000000000..8626eb9ab
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_004.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shape
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: The shape of tensor" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.circle"
+
+# do not remove output file
+# rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input --input_shapes "1,299,299,1" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_005.test b/compiler/one-cmds/tests/one-import_neg_005.test
new file mode 100644
index 000000000..f73826f00
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_005.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shape
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: The shape of tensor" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.imp_neg_005.circle"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input --input_shapes "1,299,299" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_006.test b/compiler/one-cmds/tests/one-import_neg_006.test
new file mode 100644
index 000000000..985bd1c1e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_006.test
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shape
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ConverterError: <unknown>:0: error:" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.imp_neg_006.circle"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input --input_shapes "0,299,299,3" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+# NOTE TF2.3.0 fails(which is expected) but doesn't for TF2.5(4?) and above
+# https://github.com/tensorflow/tensorflow/issues/51756 for details
+# TODO exit 255
+echo "${filename_ext} SKIPPED"
+exit 0
diff --git a/compiler/one-cmds/tests/one-import_neg_007.test b/compiler/one-cmds/tests/one-import_neg_007.test
new file mode 100644
index 000000000..2552157da
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_007.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shape
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: invalid literal" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.imp_neg_007.circle"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input --input_shapes "None,299,299,3" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_008.test b/compiler/one-cmds/tests/one-import_neg_008.test
new file mode 100644
index 000000000..d62899d2e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_008.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input shape
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "must have the same number of items" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.imp_neg_008.circle"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input,InceptionV3/Predictions/Shape --input_shapes "1,299,299,3" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_009.test b/compiler/one-cmds/tests/one-import_neg_009.test
new file mode 100644
index 000000000..0b5deb32f
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_009.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid output path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Failed to write circle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="."
+
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input --input_shapes "1,299,299,3" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_010.test b/compiler/one-cmds/tests/one-import_neg_010.test
new file mode 100644
index 000000000..11512fd18
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import_neg_010.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input_arrays
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "ValueError: Invalid tensors" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.imp_neg_010.circle"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-import tf \
+--input_path ${inputfile} \
+--output_path ${outputfile} \
+--input_arrays input2 --input_shapes "1,299,299,3" \
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer-test-post-process.py b/compiler/one-cmds/tests/one-infer-test-post-process.py
new file mode 100644
index 000000000..0f0e0d701
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer-test-post-process.py
@@ -0,0 +1,16 @@
+# This script gets one argument and print it
+
+import sys
+from pathlib import Path
+
+
+def main():
+    if len(sys.argv) < 2:
+        filepath = Path(sys.argv[0])
+        sys.exit("Usage: " + filepath.name + " [Word to print]")
+    word = sys.argv[1]
+    print(word)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/compiler/one-cmds/tests/one-infer_001.test b/compiler/one-cmds/tests/one-infer_001.test
new file mode 100644
index 000000000..e8f1bc7be
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_001.test
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/help-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# copy help-infer to bin folder
+cp help-infer ../bin/help-infer
+
+rm -f ${filename}.log
+
+# run test
+one-infer -d help-infer -- -h > ${filename}.log
+
+rm -rf ../bin/help-infer
+
+if grep -q "HELP MESSAGE!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_002.test b/compiler/one-cmds/tests/one-infer_002.test
new file mode 100644
index 000000000..6d22fb303
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_002.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
+# run test
+one-infer -d dummy-infer -- ${inputfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_003.test b/compiler/one-cmds/tests/one-infer_003.test
new file mode 100644
index 000000000..b8fbe93ae
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_003.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# print one-infer's help message
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-infer -h > ${filename}.log
+
+if grep -q "command line tool to infer model" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_004.cfg b/compiler/one-cmds/tests/one-infer_004.cfg
new file mode 100644
index 000000000..fd5353b0c
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_004.cfg
@@ -0,0 +1,3 @@
+[one-infer]
+driver=dummy-infer
+command=sample.tvn
diff --git a/compiler/one-cmds/tests/one-infer_004.test b/compiler/one-cmds/tests/one-infer_004.test
new file mode 100644
index 000000000..249728c1e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_004.test
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with configuration input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-infer_004.cfg"
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
+# run test
+one-infer -C ${configfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_005.test b/compiler/one-cmds/tests/one-infer_005.test
new file mode 100644
index 000000000..7a921fb60
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_005.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with post process script
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
+# run test
+one-infer -d dummy-infer --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1
+return_code=$?
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  if [ "$return_code" -eq "0" ]; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_neg_001.test b/compiler/one-cmds/tests/one-infer_neg_001.test
new file mode 100644
index 000000000..15df58ad9
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_001.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with no input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required: -d/--driver" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-infer > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_002.test b/compiler/one-cmds/tests/one-infer_neg_002.test
new file mode 100644
index 000000000..1fd16f2f8
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_002.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# passed driver is not found
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+driver_name="neg-infer"
+
+trap_err_onexit()
+{
+  if grep -q "FileNotFoundError: ${driver_name} not found" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-infer -d ${driver_name} -- -h > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_003.test b/compiler/one-cmds/tests/one-infer_neg_003.test
new file mode 100644
index 000000000..1de4d68d5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_003.test
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with invalid post process script
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  return_code=$?
+  if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+    # Case of succeed of inference driver but error after it
+    if [ "$return_code" -ne "0" ]; then
+      echo "${filename_ext} SUCCESS"
+      exit 0
+    fi
+  fi
+
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
+# run test
+one-infer -d dummy-infer --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1
+
+rm -rf ../bin/dummy-infer
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-optimize_001.test b/compiler/one-cmds/tests/one-optimize_001.test
new file mode 100644
index 000000000..94c906297
--- /dev/null
+++ b/compiler/one-cmds/tests/one-optimize_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3-opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-optimize --resolve_customop_add \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-optimize_002.test b/compiler/one-cmds/tests/one-optimize_002.test
new file mode 100644
index 000000000..f0a83361d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-optimize_002.test
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3-opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-optimize --resolve_customop_add \
+--change_outputs InceptionV3/Logits/SpatialSqueeze1 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-optimize_003.test b/compiler/one-cmds/tests/one-optimize_003.test
new file mode 100644
index 000000000..11a9fb4f9
--- /dev/null
+++ b/compiler/one-cmds/tests/one-optimize_003.test
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./UnidirSeqLSTM.tflite"
+intermfile="./UnidirSeqLSTM.circle"
+outputfile="./UnidirSeqLSTM-opt.circle"
+
+rm -f ${intermfile}
+rm -f ${outputfile}
+rm -f ${intermfile}.log
+rm -f ${outputfile}.log
+
+# run test
+one-import-tflite \
+--input_path ${inputfile} \
+--output_path ${intermfile} > /dev/null 2>&1
+
+one-optimize --unroll_unidirseqlstm \
+--input_path ${intermfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+# check UNIDIRECTIONAL_SEQUENCE_LSTM exist
+circle-operator --code ${intermfile} > ${intermfile}.log 2>&1
+if ! grep -q "UNIDIRECTIONAL_SEQUENCE_LSTM" "${intermfile}.log"; then
+  trap_err_onexit
+fi
+
+# check UNIDIRECTIONAL_SEQUENCE_LSTM absent
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
+if grep -q "UNIDIRECTIONAL_SEQUENCE_LSTM" "${outputfile}.log"; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-optimize_neg_001.test b/compiler/one-cmds/tests/one-optimize_neg_001.test
new file mode 100644
index 000000000..f88b4f861
--- /dev/null
+++ b/compiler/one-cmds/tests/one-optimize_neg_001.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# this test should fail
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid input file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3-opt.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-optimize --resolve_customop_add \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-optimize_neg_002.test b/compiler/one-cmds/tests/one-optimize_neg_002.test
new file mode 100644
index 000000000..3f37e6240
--- /dev/null
+++ b/compiler/one-cmds/tests/one-optimize_neg_002.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Failed to open file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circletxt"
+outputfile="./inception_v3-opt.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-optimize --resolve_customop_add \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-optimize_neg_003.test b/compiler/one-cmds/tests/one-optimize_neg_003.test
new file mode 100644
index 000000000..9d6483c2b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-optimize_neg_003.test
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage without output folder path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "the following arguments are required: -o/--output_path" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-optimize --resolve_customop_add \
+--input_path "${inputfile}" > "${filename}.log" 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-optimize_neg_004.test b/compiler/one-cmds/tests/one-optimize_neg_004.test
new file mode 100644
index 000000000..5abd4c553
--- /dev/null
+++ b/compiler/one-cmds/tests/one-optimize_neg_004.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# this test should fail
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Change outputs failed" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3-opt.circle"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# run test
+one-optimize --resolve_customop_add \
+--change_outputs non_existing_node_name \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-pack_001.test b/compiler/one-cmds/tests/one-pack_001.test
new file mode 100644
index 000000000..b05d0fe11
--- /dev/null
+++ b/compiler/one-cmds/tests/one-pack_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfolder="nnpack"
+
+rm -f ${filename}.log
+rm -rf ${outputfolder}
+
+# run test
+one-pack \
+-i ${inputfile} \
+-o ${outputfolder} > ${filename}.log 2>&1
+
+if [[ ! -d "${outputfolder}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-pack_neg_001.test b/compiler/one-cmds/tests/one-pack_neg_001.test
new file mode 100644
index 000000000..9cf388004
--- /dev/null
+++ b/compiler/one-cmds/tests/one-pack_neg_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "inception_v2.circle does not exist" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -rf ${filename}.log
+
+# run test
+one-pack \
+-i ./inception_v2.circle \
+-o nnpack > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-pack_neg_002.test b/compiler/one-cmds/tests/one-pack_neg_002.test
new file mode 100644
index 000000000..9faa8d9d7
--- /dev/null
+++ b/compiler/one-cmds/tests/one-pack_neg_002.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with filename without extension
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "modelfile does not have extension" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -rf ${filename}.log
+rm -rf nnpack
+
+# prepare dummy file
+touch ./sample
+
+# run test
+one-pack \
+-i ./sample \
+-o nnpack > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-pack_neg_003.test b/compiler/one-cmds/tests/one-pack_neg_003.test
new file mode 100644
index 000000000..5db4a3805
--- /dev/null
+++ b/compiler/one-cmds/tests/one-pack_neg_003.test
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage without output folder path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "the following arguments are required: -o/--output_path" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -rf ${filename}.log
+
+# prepare dummy file
+touch ./sample.circle
+
+# run test
+one-pack \
+-i ./sample.circle > "${filename}.log" 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-partition_001.test b/compiler/one-cmds/tests/one-partition_001.test
new file mode 100644
index 000000000..ddd2ae098
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_001.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="${testmodel}.circle"
+partfile="${testmodel}.part"
+outputfile="${testmodel}.conn.json"
+
+rm -f ${filename}.log
+rm -rf  ${testmodel}.000*
+rm -rf  ${testmodel}.conn.*
+rm -rf  ${testmodel}.*.log
+
+# run test
+one-partition \
+--input_file ${inputfile} \
+--part_file ${partfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-partition_neg_001.test b/compiler/one-cmds/tests/one-partition_neg_001.test
new file mode 100644
index 000000000..b594eba0d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_neg_001.test
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid .part file (wrong comply value)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+  if grep -q "ERROR" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+  # for debug build test
+  if grep -1 "std::runtime_error" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="${testmodel}.circle"
+partfile="${testmodel}.neg.part"
+outputfile="${testmodel}.conn.json"
+
+rm -rf  ${testmodel}.000*
+rm -rf  ${testmodel}.conn.*
+rm -rf  ${testmodel}.*.log
+rm -rf ${filename}.log
+
+# run test
+one-partition \
+--input_file ${inputfile} \
+--part_file ${partfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-partition_neg_002.test b/compiler/one-cmds/tests/one-partition_neg_002.test
new file mode 100644
index 000000000..23fe84c05
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_neg_002.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid .cfg file (no one-partition section)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+  if grep -q "'one-partition' section" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+cfgfile="${testmodel}.neg.cfg"
+
+rm -rf  ${testmodel}.000*
+rm -rf  ${testmodel}.conn.*
+rm -rf  ${testmodel}.*.log
+rm -rf ${filename}.log
+
+# run test
+one-partition -C ${cfgfile}> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-profile_001.test b/compiler/one-cmds/tests/one-profile_001.test
new file mode 100644
index 000000000..b4bdc72b0
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_001.test
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/help-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# copy help-profile to bin folder
+cp help-profile ../bin/help-profile
+
+rm -f ${filename}.log
+
+# run test
+one-profile -b help -- -h > ${filename}.log
+
+rm -rf ../bin/help-profile
+
+if grep -q "HELP MESSAGE!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-profile_002.test b/compiler/one-cmds/tests/one-profile_002.test
new file mode 100644
index 000000000..3b85dee54
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_002.test
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run one-codegen with dummy-profile driver
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-profile to bin folder
+cp dummy-profile ../bin/dummy-profile
+
+rm -f ${filename}.log
+
+# run test
+one-profile -b dummy ${inputfile} > ${filename}.log
+
+rm -rf ../bin/dummy-profile
+
+if grep -q "dummy-profile dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-profile_003.test b/compiler/one-cmds/tests/one-profile_003.test
new file mode 100644
index 000000000..ad98bb455
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_003.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# print one-profile's help message
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-profile -h > ${filename}.log
+
+if grep -q "command line tool for profiling backend model" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-profile_004.cfg b/compiler/one-cmds/tests/one-profile_004.cfg
new file mode 100644
index 000000000..6d4387b0a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_004.cfg
@@ -0,0 +1,3 @@
+[one-profile]
+backend=dummy
+command=sample.tvn
diff --git a/compiler/one-cmds/tests/one-profile_004.test b/compiler/one-cmds/tests/one-profile_004.test
new file mode 100644
index 000000000..7b77caa4d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_004.test
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-profile with configuration input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-profile_004.cfg"
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-profile to bin folder
+cp dummy-profile ../bin/dummy-profile
+
+rm -f ${filename}.log
+
+# run test
+one-profile -C ${configfile} > ${filename}.log
+
+rm -rf ../bin/dummy-profile
+
+if grep -q "dummy-profile dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-profile_neg_001.test b/compiler/one-cmds/tests/one-profile_neg_001.test
new file mode 100644
index 000000000..f5b2dce02
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_neg_001.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with no input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required: -b/--backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-profile > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-profile_neg_002.test b/compiler/one-cmds/tests/one-profile_neg_002.test
new file mode 100644
index 000000000..6964312a3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_neg_002.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-profile ${command} without backend option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required: -b/--backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-profile test.tvn > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-profile_neg_003.test b/compiler/one-cmds/tests/one-profile_neg_003.test
new file mode 100644
index 000000000..a00215452
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_neg_003.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-profile -- ${command} without backend option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required: -b/--backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-profile -- test.tvn > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-profile_neg_004.cfg b/compiler/one-cmds/tests/one-profile_neg_004.cfg
new file mode 100644
index 000000000..93afcb4df
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_neg_004.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backend=dummy
+# command=..
diff --git a/compiler/one-cmds/tests/one-profile_neg_004.test b/compiler/one-cmds/tests/one-profile_neg_004.test
new file mode 100644
index 000000000..404ac1071
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_neg_004.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# command key is missing
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: 'command' key is missing in the configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-profile_neg_004.cfg"
+
+rm -f ${filename}.log
+
+# run test
+one-profile -b dummy -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-profile_neg_005.test b/compiler/one-cmds/tests/one-profile_neg_005.test
new file mode 100644
index 000000000..9a915497b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-profile_neg_005.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# commands for the backend is missing
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: commands for the backend is missing" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+one-profile -b dummy > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_001.test b/compiler/one-cmds/tests/one-quantize_001.test
new file mode 100644
index 000000000..96804bf9d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_001.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ./inception_v3.circle \
+--input_data ./inception_v3_test_data.h5 \
+--output_path ./inception_v3.quantized.circle > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_002.test b/compiler/one-cmds/tests/one-quantize_002.test
new file mode 100644
index 000000000..0ddaad170
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_002.test
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_003.test b/compiler/one-cmds/tests/one-quantize_003.test
new file mode 100644
index 000000000..b96263ea6
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_003.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.list.quantized.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test with list-format input data (datalist.txt)
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ./datalist.txt \
+--input_data_format list \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_004.test b/compiler/one-cmds/tests/one-quantize_004.test
new file mode 100644
index 000000000..afb40805f
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_004.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.directory.quantized.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test with directory-format input data (raw_files)
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ./raw_files \
+--input_data_format directory \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_005.test b/compiler/one-cmds/tests/one-quantize_005.test
new file mode 100644
index 000000000..e5403e270
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_005.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_005.q8.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test with force_quantparam option
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--zero_point 33 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_006.test b/compiler/one-cmds/tests/one-quantize_006.test
new file mode 100644
index 000000000..2521a96a8
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_006.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_006.q8.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test with force_quantparam option (multi tensors)
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--zero_point 33 \
+--tensor_name InceptionV3/Predictions/Reshape_1 \
+--scale 2.3 \
+--zero_point 33 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_007.test b/compiler/one-cmds/tests/one-quantize_007.test
new file mode 100644
index 000000000..771b85757
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_007.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.q16.iq8.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--input_type uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_008.test b/compiler/one-cmds/tests/one-quantize_008.test
new file mode 100644
index 000000000..fcc4141d4
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_008.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.q16.oq8.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--output_type uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_009.qconf.json b/compiler/one-cmds/tests/one-quantize_009.qconf.json
new file mode 100644
index 000000000..ac274e83a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_009.qconf.json
@@ -0,0 +1,36 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/InceptionV3/Mixed_5b/concat",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/InceptionV3/Mixed_7c/concat",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/Predictions/Reshape_1",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_009.test b/compiler/one-cmds/tests/one-quantize_009.test
new file mode 100644
index 000000000..0c13292e9
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_009.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.mixed.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--quant_config one-quantize_009.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_010.test b/compiler/one-cmds/tests/one-quantize_010.test
new file mode 100644
index 000000000..83d5f3745
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_010.test
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_010.q.circle"
+datafile="./inception_v3_test_data.h5"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datafile} \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datafile} \
+--print_mpeir > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_011.test b/compiler/one-cmds/tests/one-quantize_011.test
new file mode 100644
index 000000000..88abe4e5a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_011.test
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_011.q.circle"
+datafile="./inception_v3_test_data.h5"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datafile} \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datafile} \
+--print_top5_match > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_012.qconf.json b/compiler/one-cmds/tests/one-quantize_012.qconf.json
new file mode 100644
index 000000000..4a15b04f5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_012.qconf.json
@@ -0,0 +1,16 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+            "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+            "InceptionV3/InceptionV3/Mixed_5b/concat",
+            "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+            "InceptionV3/InceptionV3/Mixed_7c/concat",
+            "InceptionV3/Predictions/Reshape_1"],
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_012.test b/compiler/one-cmds/tests/one-quantize_012.test
new file mode 100644
index 000000000..db3bb1745
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_012.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_012.q.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--quant_config one-quantize_012.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_013.qconf.json b/compiler/one-cmds/tests/one-quantize_013.qconf.json
new file mode 100644
index 000000000..4a15b04f5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_013.qconf.json
@@ -0,0 +1,16 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+            "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+            "InceptionV3/InceptionV3/Mixed_5b/concat",
+            "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+            "InceptionV3/InceptionV3/Mixed_7c/concat",
+            "InceptionV3/Predictions/Reshape_1"],
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_013.test b/compiler/one-cmds/tests/one-quantize_013.test
new file mode 100644
index 000000000..0d985ff19
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_013.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# quantized_dtype and granularity are given by qconfig file
+# (not by command line interface)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_013.q.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test without input data
+# quantized_dtype and granularity are not given here
+one-quantize \
+--input_dtype float32 \
+--quant_config one-quantize_013.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_014.test b/compiler/one-cmds/tests/one-quantize_014.test
new file mode 100644
index 000000000..40b79fafc
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_014.test
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test if `circle-eval-diff` supports directory input.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_014.q.circle"
+datadir="./raw_files/"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datadir} \
+--input_data_format dir \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datadir} \
+--print_top5_match > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_015.test b/compiler/one-cmds/tests/one-quantize_015.test
new file mode 100644
index 000000000..a069601a3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_015.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test if --fake_quantize option works well
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_015.fq.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--fake_quantize \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_016.test b/compiler/one-cmds/tests/one-quantize_016.test
new file mode 100644
index 000000000..cfebc2f1e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_016.test
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+# TODO Resolve circledump not found
+# https://github.com/Samsung/ONE/issues/10550
+if ! command -v circledump &> /dev/null
+then
+  echo "${filename_ext} SKIPPED"
+  exit 0
+fi
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+outputfile="./reshape_matmul.one-quantize_016.circle"
+
+rm -f ${filename}.log
+rm -f ${filename}.first.cdump
+rm -f ${filename}.second.cdump
+rm -f ${outputfile}
+
+# run test with different input_type
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_type uint8,int16 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+circledump ${outputfile} | grep "T(0:0)" > ${filename}.first.cdump
+circledump ${outputfile} | grep "T(0:1)" > ${filename}.second.cdump
+
+# check dtype of the first input (uint8)
+if ! grep -q "UINT8" "${filename}.first.cdump"; then
+  trap_err_onexit
+fi
+
+# check dtype of the second input (int16)
+if ! grep -q "INT16" "${filename}.second.cdump"; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_017.test b/compiler/one-cmds/tests/one-quantize_017.test
new file mode 100644
index 000000000..f4d3ee834
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_017.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./mobilenet_edgetpu_224_1.0_int8.circle"
+outputfile="./mobilenet_edgetpu_224_1.0_int8.one-quantize_017.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-quantize \
+--requantize \
+--input_model_dtype int8 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_018.test b/compiler/one-cmds/tests/one-quantize_018.test
new file mode 100644
index 000000000..dd609dda5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_018.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ampq test for bisection_type = 'i16_front' (front nodes will be quantized to int16)
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.q_opt.one-quantize_018.circle"
+datafile="./inception_v3_test_data.h5"
+bisection_type="i16_front"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-quantize \
+--input_data ${datafile} \
+--input_path ${inputfile} \
+--ampq \
+--ampq_qerror_ratio "0.5" \
+--ampq_algorithm "bisection" \
+--bisection_type ${bisection_type} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_019.test b/compiler/one-cmds/tests/one-quantize_019.test
new file mode 100644
index 000000000..3001fad4c
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_019.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ampq test for bisection_type = 'i16_back' (output nodes will be quantized to int16)
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.q_opt.one-quantize_019.circle"
+datafile="./inception_v3_test_data.h5"
+bisection_type="i16_back"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-quantize \
+--input_data ${datafile} \
+--input_path ${inputfile} \
+--ampq \
+--ampq_qerror_ratio "0.5" \
+--ampq_algorithm "bisection" \
+--bisection_type ${bisection_type} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_020.test b/compiler/one-cmds/tests/one-quantize_020.test
new file mode 100644
index 000000000..5167295ee
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_020.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ampq test with bisection_type set to auto
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.q_opt.one-quantize_020.circle"
+datafile="./inception_v3_test_data.h5"
+bisection_type="auto"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-quantize \
+--input_data ${datafile} \
+--input_path ${inputfile} \
+--ampq \
+--ampq_qerror_ratio "0.5" \
+--ampq_algorithm "bisection" \
+--bisection_type ${bisection_type} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_021.test b/compiler/one-cmds/tests/one-quantize_021.test
new file mode 100644
index 000000000..b6abc6682
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_021.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test moving average parameters (moving average per Image)
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_021.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+one-quantize \
+--input_path ${inputfile} \
+--mode moving_average \
+--moving_avg_batch 1 \
+--moving_avg_const 0.01 \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_neg_001.test b/compiler/one-cmds/tests/one-quantize_neg_001.test
new file mode 100644
index 000000000..dd2617f65
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_001.test
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with unsupported input dtype
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Unsupported input type" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# test begin
+
+one-quantize \
+--input_dtype float64 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_002.test b/compiler/one-cmds/tests/one-quantize_neg_002.test
new file mode 100644
index 000000000..c5ad693f7
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_002.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with unsupported input dtype
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Unsupported output type" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint16 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_003.test b/compiler/one-cmds/tests/one-quantize_neg_003.test
new file mode 100644
index 000000000..af2db72c5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_003.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with wrong representative dataset
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Buffer size does not match" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.quantized.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ./mobilenet_test_data.h5 \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_004.test b/compiler/one-cmds/tests/one-quantize_neg_004.test
new file mode 100644
index 000000000..eb50564e7
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_004.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid output path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Failed to export" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="."
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_005.test b/compiler/one-cmds/tests/one-quantize_neg_005.test
new file mode 100644
index 000000000..6f5f489dc
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_005.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid input file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./while_3.pbtxt"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_006.test b/compiler/one-cmds/tests/one-quantize_neg_006.test
new file mode 100644
index 000000000..f5c965f85
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_006.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input path
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Failed to open file: ./inception_v2.circle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v2.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_007.test b/compiler/one-cmds/tests/one-quantize_neg_007.test
new file mode 100644
index 000000000..50b549524
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_007.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid input_data
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Given data file is not HDF5" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3.circle"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_008.test b/compiler/one-cmds/tests/one-quantize_neg_008.test
new file mode 100644
index 000000000..f2675f421
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_008.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid mode
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Unsupported mode" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--mode average \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_009.test b/compiler/one-cmds/tests/one-quantize_neg_009.test
new file mode 100644
index 000000000..21901609e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_009.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid max_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Percentile must be ranged from" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--max_percentile 101 \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_010.test b/compiler/one-cmds/tests/one-quantize_neg_010.test
new file mode 100644
index 000000000..bd7c0dd6e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_010.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid max_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Percentile must be ranged from" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--max_percentile -1 \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_011.test b/compiler/one-cmds/tests/one-quantize_neg_011.test
new file mode 100644
index 000000000..14552f09a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_011.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Percentile must be ranged from" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--min_percentile 101 \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_012.test b/compiler/one-cmds/tests/one-quantize_neg_012.test
new file mode 100644
index 000000000..963783c82
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_012.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Percentile must be ranged from" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--min_percentile -1 \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_013.test b/compiler/one-cmds/tests/one-quantize_neg_013.test
new file mode 100644
index 000000000..4b81c8706
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_013.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Unsupported granularity" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--granularity layered \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_014.test b/compiler/one-cmds/tests/one-quantize_neg_014.test
new file mode 100644
index 000000000..911537097
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_014.test
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Given data file is not HDF5" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./datalist.txt"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--input_data_format h5 \
+--granularity channel \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_015.test b/compiler/one-cmds/tests/one-quantize_neg_015.test
new file mode 100644
index 000000000..1acb20fae
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_015.test
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Cannot open file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--input_data_format list \
+--granularity channel \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_016.test b/compiler/one-cmds/tests/one-quantize_neg_016.test
new file mode 100644
index 000000000..b419ff528
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_016.test
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Unsupported input data format" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./datalist.txt"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--input_data_format h5list \
+--granularity channel \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_017.test b/compiler/one-cmds/tests/one-quantize_neg_017.test
new file mode 100644
index 000000000..5326f18d5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_017.test
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Cannot open directory" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+inputdata="./inception_v3_test_data.h5"
+outputfile="./inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--input_data ${inputdata} \
+--input_data_format directory \
+--granularity channel \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_018.test b/compiler/one-cmds/tests/one-quantize_neg_018.test
new file mode 100644
index 000000000..6470efcc7
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_018.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "following arguments are required: --zero_point" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.neg_018.q8.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_019.test b/compiler/one-cmds/tests/one-quantize_neg_019.test
new file mode 100644
index 000000000..9f6e35f4a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_019.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Unsupported input type" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.quantized.neg_019.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--input_type float64 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_020.test b/compiler/one-cmds/tests/one-quantize_neg_020.test
new file mode 100644
index 000000000..46a4f9d19
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_020.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# check error message is printed when qconfig file is not json
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Failed to decode" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.quantized.neg_020.circle"
+
+rm -f ${filename}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quant_config one-quantize_neg_020.test \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_021.test b/compiler/one-cmds/tests/one-quantize_neg_021.test
new file mode 100644
index 000000000..31a3182ba
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_021.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Wrong number of input_type in one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid number of input dtype" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+outputfile="./reshape_matmul.quantized.neg_021.circle"
+
+rm -f ${filename}.log
+
+# run test with wrong number of input_type
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_type uint8,int16,uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_022.test b/compiler/one-cmds/tests/one-quantize_neg_022.test
new file mode 100644
index 000000000..519394175
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_022.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Wrong number of input_type in one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "following arguments are required: --input_model_dtype" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./mobilenet_edgetpu_224_1.0_int8.circle"
+outputfile="./mobilenet_edgetpu_224_1.0_int8.one-quantize_neg_022.circle"
+
+rm -f ${filename}.log
+
+# run test with wrong input model dtype
+one-quantize \
+--requantize \
+--quantized_dtype uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_neg_023.test b/compiler/one-cmds/tests/one-quantize_neg_023.test
new file mode 100644
index 000000000..7ed97c18c
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_023.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Wrong type of calibration parameter
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "moving_avg_batch must be integer" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_neg_023.circle"
+
+rm -f ${filename}.log
+
+# run test with wrong parameter dtype
+# moving_avg_batch must be integer
+one-quantize \
+--input_path ${inputfile} \
+--mode moving_average \
+--moving_avg_batch 0.1 \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_001.cfg b/compiler/one-cmds/tests/onecc_001.cfg
new file mode 100644
index 000000000..f33101074
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_001.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_001.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.onecc_001.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_001.test b/compiler/one-cmds/tests/onecc_001.test
new file mode 100644
index 000000000..73fbdd6f7
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_001.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_002.cfg b/compiler/one-cmds/tests/onecc_002.cfg
new file mode 100644
index 000000000..0338ccb33
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_002.cfg
@@ -0,0 +1,24 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_002.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.onecc_002.circle
+output_path=inception_v3.opt.circle
+
+[one-pack]
+input_path=inception_v3.opt.circle
+output_path=inception_v3_pkg
diff --git a/compiler/one-cmds/tests/onecc_002.test b/compiler/one-cmds/tests/onecc_002.test
new file mode 100644
index 000000000..f154f66d0
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_002.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_002.cfg"
+outputfile="inception_v3_pkg"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_003.cfg b/compiler/one-cmds/tests/onecc_003.cfg
new file mode 100644
index 000000000..da5c73e11
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_003.cfg
@@ -0,0 +1,21 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_003.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-quantize]
+input_path=inception_v3.onecc_003.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
diff --git a/compiler/one-cmds/tests/onecc_003.test b/compiler/one-cmds/tests/onecc_003.test
new file mode 100644
index 000000000..140a99f6b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_003.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_003.cfg"
+outputfile="inception_v3.quantized.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_004.cfg b/compiler/one-cmds/tests/onecc_004.cfg
new file mode 100644
index 000000000..e155430f2
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_004.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_004.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.onecc_004.circle
diff --git a/compiler/one-cmds/tests/onecc_004.test b/compiler/one-cmds/tests/onecc_004.test
new file mode 100644
index 000000000..b532d190b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_004.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_004.cfg"
+outputfile="sample.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_005.cfg b/compiler/one-cmds/tests/onecc_005.cfg
new file mode 100644
index 000000000..ff4ed84f1
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_005.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=True
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-tflite]
+input_path=inception_v3.tflite
+output_path=inception_v3.onecc_005.circle
+
+[one-optimize]
+input_path=inception_v3.onecc_005.circle
+output_path=inception_v3.opt.circle
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_005.test b/compiler/one-cmds/tests/onecc_005.test
new file mode 100644
index 000000000..48c81e518
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_005.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tflite -> one-optimize -> one-codgen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_005.cfg"
+outputfile="sample.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_006.cfg b/compiler/one-cmds/tests/onecc_006.cfg
new file mode 100644
index 000000000..dd58e6bca
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_006.cfg
@@ -0,0 +1,29 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=True
+one-pack=False
+one-codegen=True
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_006.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.onecc_006.circle
+output_path=inception_v3.opt.circle
+
+[one-quantize]
+input_path=inception_v3.opt.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
+
+[one-codegen]
+backend=dummy
+command=-o sample.tvn inception_v3.quantized.circle
diff --git a/compiler/one-cmds/tests/onecc_006.test b/compiler/one-cmds/tests/onecc_006.test
new file mode 100644
index 000000000..451d34f8e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_006.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_006.cfg"
+outputfile="sample.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_007.cfg b/compiler/one-cmds/tests/onecc_007.cfg
new file mode 100644
index 000000000..2d3ecac09
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_007.cfg
@@ -0,0 +1,29 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_007.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.onecc_007.circle
+output_path=inception_v3.opt.circle
+
+[one-quantize]
+input_path=inception_v3.opt.circle
+output_path=inception_v3.quantized.circle
+input_data=inception_v3_test_data.h5
+
+[one-pack]
+input_path=inception_v3.quantized.circle
+output_path=inception_v3_pkg
diff --git a/compiler/one-cmds/tests/onecc_007.test b/compiler/one-cmds/tests/onecc_007.test
new file mode 100644
index 000000000..ee3c262ba
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_007.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_007.cfg"
+outputfile="inception_v3_pkg"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_008.cfg b/compiler/one-cmds/tests/onecc_008.cfg
new file mode 100644
index 000000000..020e274e1
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_008.cfg
@@ -0,0 +1,22 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.opt.circle
+remove_redundant_transpose=True
+
+[one-codegen]
+backend=dummy
+command=-o test_onnx_model.bin test_onnx_model.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_008.test b/compiler/one-cmds/tests/onecc_008.test
new file mode 100644
index 000000000..f2d25f1d4
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_008.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_008.cfg"
+outputfile="test_onnx_model.bin"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_009.cfg b/compiler/one-cmds/tests/onecc_009.cfg
new file mode 100644
index 000000000..86121c557
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_009.cfg
@@ -0,0 +1,23 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=True
+
+[one-import-onnx]
+input_path=onnx_conv2d_conv2d.onnx
+output_path=onnx_conv2d_conv2d.circle
+
+[one-optimize]
+input_path=onnx_conv2d_conv2d.circle
+output_path=onnx_conv2d_conv2d.opt.circle
+remove_redundant_transpose=True
+convert_nchw_to_nhwc=True
+
+[one-codegen]
+backend=dummy
+command=-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_009.test b/compiler/one-cmds/tests/onecc_009.test
new file mode 100644
index 000000000..0b4537e02
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_009.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-onnx -> one-optimize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_009.cfg"
+outputfile="onnx_conv2d_conv2d.bin"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_010.cfg b/compiler/one-cmds/tests/onecc_010.cfg
new file mode 100644
index 000000000..d9f2a3d83
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_010.cfg
@@ -0,0 +1,17 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.alt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+save_intermediate=True
diff --git a/compiler/one-cmds/tests/onecc_010.test b/compiler/one-cmds/tests/onecc_010.test
new file mode 100644
index 000000000..85192ed26
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_010.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf: intermediate file should exist
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_010.cfg"
+outputfile="inception_v3.alt.circle"
+intermfile="inception_v3.alt.tflite"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_011.cfg b/compiler/one-cmds/tests/onecc_011.cfg
new file mode 100644
index 000000000..aa9457f09
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_011.cfg
@@ -0,0 +1,14 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+save_intermediate=True
diff --git a/compiler/one-cmds/tests/onecc_011.test b/compiler/one-cmds/tests/onecc_011.test
new file mode 100644
index 000000000..eeb59b4b9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_011.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-onnx
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_011.cfg"
+outputfile="test_onnx_model.circle"
+intermfile="test_onnx_model.tflite"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_012.cfg b/compiler/one-cmds/tests/onecc_012.cfg
new file mode 100644
index 000000000..92f61a14c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_012.cfg
@@ -0,0 +1,22 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_012.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-quantize]
+input_path=inception_v3.onecc_012.circle
+output_path=inception_v3.list.quantized.circle
+input_data=datalist.txt
+input_data_format=list
diff --git a/compiler/one-cmds/tests/onecc_012.test b/compiler/one-cmds/tests/onecc_012.test
new file mode 100644
index 000000000..f5abe94ca
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_012.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_012.cfg"
+outputfile="inception_v3.list.quantized.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_013.cfg b/compiler/one-cmds/tests/onecc_013.cfg
new file mode 100644
index 000000000..4ed687168
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_013.cfg
@@ -0,0 +1,7 @@
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_13.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
diff --git a/compiler/one-cmds/tests/onecc_013.test b/compiler/one-cmds/tests/onecc_013.test
new file mode 100644
index 000000000..bca0cbefc
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_013.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import {tf} with config file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_013.cfg"
+outputfile="inception_v3.onecc_13.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc import tf -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_014.cfg b/compiler/one-cmds/tests/onecc_014.cfg
new file mode 100644
index 000000000..8478be756
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_014.cfg
@@ -0,0 +1,7 @@
+[one-import-tflite]
+input_path=inception_v3.tflite
+output_path=inception_v3.onecc_014.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
diff --git a/compiler/one-cmds/tests/onecc_014.test b/compiler/one-cmds/tests/onecc_014.test
new file mode 100644
index 000000000..3e93a69ab
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_014.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import {tflite} with config file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_014.cfg"
+outputfile="inception_v3.onecc_014.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc import tflite -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_015.cfg b/compiler/one-cmds/tests/onecc_015.cfg
new file mode 100644
index 000000000..7abfa7d86
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_015.cfg
@@ -0,0 +1,5 @@
+[one-import-bcq]
+input_path=bcq.pb
+output_path=bcq.circle
+input_arrays=Placeholder
+output_arrays=MatMul
diff --git a/compiler/one-cmds/tests/onecc_015.test b/compiler/one-cmds/tests/onecc_015.test
new file mode 100644
index 000000000..d92bf78a4
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_015.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import {bcq} with config file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_015.cfg"
+outputfile="bcq.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc import bcq -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_016.cfg b/compiler/one-cmds/tests/onecc_016.cfg
new file mode 100644
index 000000000..ea0110bf9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_016.cfg
@@ -0,0 +1,3 @@
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
diff --git a/compiler/one-cmds/tests/onecc_016.test b/compiler/one-cmds/tests/onecc_016.test
new file mode 100644
index 000000000..09e514375
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_016.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import {onnx} with config file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_016.cfg"
+outputfile="test_onnx_model.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc import onnx -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_017.test b/compiler/one-cmds/tests/onecc_017.test
new file mode 100644
index 000000000..359a6e7ce
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_017.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-optimize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="inception_v3.circle"
+outputfile="inception_v3.opt.circle"
+
+if [[ ! -s "${inputfile}" ]]; then
+  echo "${filename_ext} ERROR: Missing inputfile"
+  trap_err_onexit
+fi
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc optimize -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_018.test b/compiler/one-cmds/tests/onecc_018.test
new file mode 100644
index 000000000..cd2ac599a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_018.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="inception_v3.opt.circle"
+inputdata="inception_v3_test_data.h5"
+outputfile="inception_v3.quantized.circle"
+
+if [[ ! -s "${inputfile}" && ! -s "${inputdata}" ]]; then
+  echo "${filename_ext} ERROR: Missing inputfile"
+  trap_err_onexit
+fi
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc quantize -i ${inputfile} -o ${outputfile} -d ${inputdata} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_019.test b/compiler/one-cmds/tests/onecc_019.test
new file mode 100644
index 000000000..23e0e3fa5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_019.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="inception_v3.quantized.circle"
+outputfile="inception_v3_pkg"
+
+if [[ ! -s "${inputfile}" ]]; then
+  echo "${filename_ext} ERROR: Missing inputfile"
+  trap_err_onexit
+fi
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc pack -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_020.test b/compiler/one-cmds/tests/onecc_020.test
new file mode 100644
index 000000000..d914e9b28
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_020.test
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.circle"
+outputfile="sample.dummy"
+
+# prepare dummy file
+touch ${inputfile}
+
+if [[ ! -f "${inputfile}" ]]; then
+  echo "${filename_ext} ERROR: Missing inputfile"
+  trap_err_onexit
+fi
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc codegen -b dummy -o ${outputfile} ${inputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_021.cfg b/compiler/one-cmds/tests/onecc_021.cfg
new file mode 100644
index 000000000..e2d3e6bd8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_021.cfg
@@ -0,0 +1,14 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+one-profile=True
+
+[one-profile]
+backend=dummy
+command=test_onnx_model.bin
diff --git a/compiler/one-cmds/tests/onecc_021.test b/compiler/one-cmds/tests/onecc_021.test
new file mode 100644
index 000000000..85e9aa70d
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_021.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-profile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_021.cfg"
+
+# copy dummy-profile to bin folder
+cp dummy-profile ../bin/dummy-profile
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+rm -rf ../bin/dummy-profile
+
+if grep -q "dummy-profile dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_022.cfg b/compiler/one-cmds/tests/onecc_022.cfg
new file mode 100644
index 000000000..9741d5173
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_022.cfg
@@ -0,0 +1,18 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+one-profile=False
+
+[one-quantize]
+input_path=inception_v3.mat.q8.circle
+output_path=inception_v3.onecc_022.q8.circle
+force_quantparam=True
+tensor_name=input
+scale=2.1
+zero_point=45
diff --git a/compiler/one-cmds/tests/onecc_022.test b/compiler/one-cmds/tests/onecc_022.test
new file mode 100644
index 000000000..6f5d565be
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_022.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_022.cfg"
+outputfile="inception_v3.onecc_022.q8.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_023.cfg b/compiler/one-cmds/tests/onecc_023.cfg
new file mode 100644
index 000000000..edbcc6f78
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_023.cfg
@@ -0,0 +1,15 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_023.q16.iq8.circle
+quantized_dtype=int16
+granularity=channel
+input_type=uint8
diff --git a/compiler/one-cmds/tests/onecc_023.test b/compiler/one-cmds/tests/onecc_023.test
new file mode 100644
index 000000000..9448f4094
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_023.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_023.cfg"
+outputfile="inception_v3.onecc_023.q16.iq8.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_024.cfg b/compiler/one-cmds/tests/onecc_024.cfg
new file mode 100644
index 000000000..053758fef
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_024.cfg
@@ -0,0 +1,22 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_024.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.onecc_024.circle
+output_path=inception_v3.opt.circle
+make_batchnorm_gamma_positive=False
diff --git a/compiler/one-cmds/tests/onecc_024.test b/compiler/one-cmds/tests/onecc_024.test
new file mode 100644
index 000000000..d1e1d92c8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_024.test
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use `OONECC_024` optimization option
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+clean_envir()
+{
+  rm -rf ../optimization/OONECC_024.cfg
+  if [ "$OPT_ALREADY_EXIST" = false ]; then
+    rm -rf ../optimization
+  fi
+}
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  clean_envir
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_024.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+if [ ! -d "../optimization" ]; then
+  mkdir -p ../optimization
+  OPT_ALREADY_EXIST=false
+fi
+
+cp OONECC_024.cfg ../optimization
+
+# run test
+LUCI_LOG=5 onecc -C ${configfile} -OONECC_024 > ${filename}.log 2>&1
+
+clean_envir
+
+if ! grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_025.cfg b/compiler/one-cmds/tests/onecc_025.cfg
new file mode 100644
index 000000000..02e54b0d7
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_025.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_025.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.onecc_025.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_025.test b/compiler/one-cmds/tests/onecc_025.test
new file mode 100644
index 000000000..fff944088
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_025.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize with the configuration file that includes `onecc` section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_001.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_026.cfg b/compiler/one-cmds/tests/onecc_026.cfg
new file mode 100644
index 000000000..c27a13654
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_026.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_026.q.circle
+input_data=inception_v3_test_data.h5
+evaluate_result=True
+test_data=inception_v3_test_data.h5
+print_mpeir=True
diff --git a/compiler/one-cmds/tests/onecc_026.test b/compiler/one-cmds/tests/onecc_026.test
new file mode 100644
index 000000000..cd09e1102
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_026.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_026.cfg"
+outputfile="inception_v3.onecc_026.q.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_027.cfg b/compiler/one-cmds/tests/onecc_027.cfg
new file mode 100644
index 000000000..1b9a2b90b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_027.cfg
@@ -0,0 +1,15 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+one-profile=False
+one-infer=True
+
+[one-infer]
+driver=dummy-infer
+command=test_onnx_model.bin
diff --git a/compiler/one-cmds/tests/onecc_027.test b/compiler/one-cmds/tests/onecc_027.test
new file mode 100644
index 000000000..54f0d2535
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_027.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_027.cfg"
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_028.test b/compiler/one-cmds/tests/onecc_028.test
new file mode 100644
index 000000000..ea2357941
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_028.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_028.workflow.json"
+outputfile="inception_v3_pkg"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_028.workflow.json b/compiler/one-cmds/tests/onecc_028.workflow.json
new file mode 100644
index 000000000..983648273
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_028.workflow.json
@@ -0,0 +1,37 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF",
+            "OPTIMIZE",
+            "PACK"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_028.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.onecc_028.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        },
+        "PACK": {
+            "one-cmd": "one-pack",
+            "commands": {
+                "input_path": "inception_v3.opt.circle",
+                "output_path": "inception_v3_pkg"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_029.test b/compiler/one-cmds/tests/onecc_029.test
new file mode 100644
index 000000000..9fb0ec34e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_029.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_029.workflow.json"
+outputfile="inception_v3.quantized.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_029.workflow.json b/compiler/one-cmds/tests/onecc_029.workflow.json
new file mode 100644
index 000000000..826888d19
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_029.workflow.json
@@ -0,0 +1,30 @@
+{
+    "workflows": [
+        "QUANTIZE_WORKFLOW"
+    ],
+    "QUANTIZE_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF",
+            "QUANTIZE"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_029.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.onecc_029.circle",
+                "output_path": "inception_v3.quantized.circle",
+                "input_data": "inception_v3_test_data.h5"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_030.test b/compiler/one-cmds/tests/onecc_030.test
new file mode 100644
index 000000000..88f0ab884
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_030.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_030.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_030.workflow.json b/compiler/one-cmds/tests/onecc_030.workflow.json
new file mode 100644
index 000000000..e4a2467a2
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_030.workflow.json
@@ -0,0 +1,29 @@
+{
+    "workflows": [
+        "codegen_wf"
+    ],
+    "codegen_wf": {
+        "steps": [
+            "import_tf",
+            "codegen"
+        ],
+        "import_tf": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_030.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o sample.tvn inception_v3.onecc_030.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_031.test b/compiler/one-cmds/tests/onecc_031.test
new file mode 100644
index 000000000..33c9664ad
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_031.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tflite -> one-optimize -> one-codgen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_031.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_031.workflow.json b/compiler/one-cmds/tests/onecc_031.workflow.json
new file mode 100644
index 000000000..7018ca6eb
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_031.workflow.json
@@ -0,0 +1,33 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import",
+            "optimize",
+            "codegen"
+        ],
+        "import": {
+            "one-cmd": "one-import-tflite",
+            "commands": {
+                "input_path": "inception_v3.tflite",
+                "output_path": "inception_v3.onecc_031.circle"
+            }
+        },
+        "optimize": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.onecc_031.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o sample.tvn inception_v3.opt.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_032.test b/compiler/one-cmds/tests/onecc_032.test
new file mode 100644
index 000000000..5884f578e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_032.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_032.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_032.workflow.json b/compiler/one-cmds/tests/onecc_032.workflow.json
new file mode 100644
index 000000000..7a794c8f3
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_032.workflow.json
@@ -0,0 +1,42 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import",
+            "optimize",
+            "quantize",
+            "codegen"
+        ],
+        "import": {
+            "one-cmd": "one-import-tflite",
+            "commands": {
+                "input_path": "inception_v3.tflite",
+                "output_path": "inception_v3.onecc_032.circle"
+            }
+        },
+        "optimize": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.onecc_032.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        },
+        "quantize": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.onecc_032.circle",
+                "output_path": "inception_v3.quantized.circle",
+                "input_data": "inception_v3_test_data.h5"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o sample.tvn inception_v3.quantized.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_033.test b/compiler/one-cmds/tests/onecc_033.test
new file mode 100644
index 000000000..b2655febc
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_033.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_033.workflow.json"
+outputfile="inception_v3_pkg"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_033.workflow.json b/compiler/one-cmds/tests/onecc_033.workflow.json
new file mode 100644
index 000000000..2edd5a896
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_033.workflow.json
@@ -0,0 +1,42 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import",
+            "optimize",
+            "quantize",
+            "pack"
+        ],
+        "import": {
+            "one-cmd": "one-import-tflite",
+            "commands": {
+                "input_path": "inception_v3.tflite",
+                "output_path": "inception_v3.onecc_033.circle"
+            }
+        },
+        "optimize": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.onecc_033.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        },
+        "quantize": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.onecc_033.circle",
+                "output_path": "inception_v3.quantized.circle",
+                "input_data": "inception_v3_test_data.h5"
+            }
+        },
+        "pack": {
+            "one-cmd": "one-pack",
+            "commands": {
+                "input_path": "inception_v3.quantized.circle",
+                "output_path": "inception_v3_pkg"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_034.test b/compiler/one-cmds/tests/onecc_034.test
new file mode 100644
index 000000000..9c88dff1c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_034.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-onnx -> one-optimize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_034.workflow.json"
+outputfile="onnx_conv2d_conv2d.bin"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_034.workflow.json b/compiler/one-cmds/tests/onecc_034.workflow.json
new file mode 100644
index 000000000..bc3cbbf58
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_034.workflow.json
@@ -0,0 +1,35 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import",
+            "optimize",
+            "codegen"
+        ],
+        "import": {
+            "one-cmd": "one-import-onnx",
+            "commands": {
+                "input_path": "onnx_conv2d_conv2d.onnx",
+                "output_path": "onnx_conv2d_conv2d.circle"
+            }
+        },
+        "optimize": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "onnx_conv2d_conv2d.circle",
+                "output_path": "onnx_conv2d_conv2d.opt.circle",
+                "remove_redundant_transpose": "True",
+                "convert_nchw_to_nhwc": "True"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_035.test b/compiler/one-cmds/tests/onecc_035.test
new file mode 100644
index 000000000..851da6544
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_035.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf generates intermediate files
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_035.workflow.json"
+outputfile="inception_v3.alt.circle"
+intermfile="inception_v3.alt.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_035.workflow.json b/compiler/one-cmds/tests/onecc_035.workflow.json
new file mode 100644
index 000000000..6abf1f32b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_035.workflow.json
@@ -0,0 +1,22 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import"
+        ],
+        "import": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.alt.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v1",
+                "save_intermediate": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_036.test b/compiler/one-cmds/tests/onecc_036.test
new file mode 100644
index 000000000..00f4641b7
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_036.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-onnx generates intermediate files
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_036.workflow.json"
+outputfile="test_onnx_model.circle"
+intermfile="test_onnx_model.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_036.workflow.json b/compiler/one-cmds/tests/onecc_036.workflow.json
new file mode 100644
index 000000000..5fa29edb5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_036.workflow.json
@@ -0,0 +1,18 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import"
+        ],
+        "import": {
+            "one-cmd": "one-import-onnx",
+            "commands": {
+                "input_path": "test_onnx_model.onnx",
+                "output_path": "test_onnx_model.circle",
+                "save_intermediate": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_037.test b/compiler/one-cmds/tests/onecc_037.test
new file mode 100644
index 000000000..596bbfdf9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_037.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_037.workflow.json"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_037.workflow.json b/compiler/one-cmds/tests/onecc_037.workflow.json
new file mode 100644
index 000000000..ebd6b34d6
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_037.workflow.json
@@ -0,0 +1,29 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "steps": [
+            "IMPORT",
+            "OPTIMIZE"
+        ],
+        "IMPORT": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_037.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.onecc_037.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_038.test b/compiler/one-cmds/tests/onecc_038.test
new file mode 100644
index 000000000..41f633335
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_038.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_038.workflow.json"
+outputfile="inception_v3.list.quantized.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_038.workflow.json b/compiler/one-cmds/tests/onecc_038.workflow.json
new file mode 100644
index 000000000..d31045efe
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_038.workflow.json
@@ -0,0 +1,31 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "steps": [
+            "IMPORT",
+            "QUANTIZE"
+        ],
+        "IMPORT": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_038.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.onecc_038.circle",
+                "output_path": "inception_v3.list.quantized.circle",
+                "input_data": "datalist.txt",
+                "input_data_format": "list"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_039.test b/compiler/one-cmds/tests/onecc_039.test
new file mode 100644
index 000000000..b922636ca
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_039.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-quantize quantizes the model and evaluates the result
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_039.workflow.json"
+outputfile="inception_v3.onecc_039.q.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_039.workflow.json b/compiler/one-cmds/tests/onecc_039.workflow.json
new file mode 100644
index 000000000..55ef56988
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_039.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "steps": [
+            "QUANTIZE"
+        ],
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_026.q.circle",
+                "input_data": "inception_v3_test_data.h5",
+                "evaluate_result": "True",
+                "test_data": "inception_v3_test_data.h5",
+                "print_mpeir": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_040.cfg b/compiler/one-cmds/tests/onecc_040.cfg
new file mode 100644
index 000000000..b9f39fd51
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_040.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.onecc_040.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_040.test b/compiler/one-cmds/tests/onecc_040.test
new file mode 100644
index 000000000..4d23025bf
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow with cfg reference
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_040.workflow.json"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_040.workflow.json b/compiler/one-cmds/tests/onecc_040.workflow.json
new file mode 100644
index 000000000..2d4119b21
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.workflow.json
@@ -0,0 +1,10 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "cfg-reference": {
+            "path": "onecc_040.cfg"
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_041.cfg b/compiler/one-cmds/tests/onecc_041.cfg
new file mode 100644
index 000000000..16135f074
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3_without_opt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/onecc_041.test b/compiler/one-cmds/tests/onecc_041.test
new file mode 100644
index 000000000..c504e6953
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.test
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflows
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "Do inference of inception_v3_without_opt\.circle" "${filename}.log" &&
+  grep -q "Do inference of inception_v3\.opt\.circle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_041.workflow.json"
+outputfile1="inception_v3_without_opt.circle"
+outputfile2="inception_v3.opt.circle"
+
+cp dummy-inferV2 ../bin/dummy-inferV2
+
+rm -rf ${outputfile1} {outputfile2}
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+rm -rf ../bin/dummy-inferV2
+
+if [[ ! -s "${outputfile1}" ]] && [[ ! -s "${outputfile2}" ]]; then
+  trap_err_onexit
+fi
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_041.workflow.json b/compiler/one-cmds/tests/onecc_041.workflow.json
new file mode 100644
index 000000000..e19494cd2
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.workflow.json
@@ -0,0 +1,61 @@
+{
+    "workflows": [
+        "WITHOUT_OPT",
+        "WITH_OPT",
+        "INFER"
+    ],
+    "INFER": {
+        "run-after": [
+            "WITHOUT_OPT",
+            "WITH_OPT"
+        ],
+        "steps": [
+            "INFER1",
+            "INFER2"
+        ],
+        "INFER1": {
+            "one-cmd": "one-infer",
+            "commands" : {
+                "driver": "dummy-inferV2",
+                "command": "inception_v3_without_opt.circle"
+            }
+        },
+        "INFER2": {
+            "one-cmd": "one-infer",
+            "commands": {
+                "driver": "dummy-inferV2",
+                "command": "inception_v3.opt.circle"
+            }
+        }
+    },
+    "WITHOUT_OPT": {
+        "cfg-reference": {
+            "path": "onecc_041.cfg"
+        }
+    },
+    "WITH_OPT": {
+        "steps": [
+            "IMPORT_TF",
+            "OPTIMIZE"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_041.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.onecc_041.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        }
+    }
+    
+}
diff --git a/compiler/one-cmds/tests/onecc_042.cfg b/compiler/one-cmds/tests/onecc_042.cfg
new file mode 100644
index 000000000..988c76860
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_042.cfg
@@ -0,0 +1,9 @@
+[Environment]
+SPM_SIZE=256KB
+
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backend=dummyEnv
+command=dummy_env.bin
diff --git a/compiler/one-cmds/tests/onecc_042.test b/compiler/one-cmds/tests/onecc_042.test
new file mode 100644
index 000000000..c8a15fa4f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_042.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-codegen with Environment section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+outputfile="dummy_env.bin"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummyEnv-compile
+  rm -rf ${outputfile}
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_042.cfg"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# copy dummyEnv-compile to bin folder
+cp dummyEnv-compile ../bin/dummyEnv-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+if grep -q "SPM_SIZE=256KB" "${outputfile}"; then
+  echo "${filename_ext} SUCCESS"
+  rm -rf ../bin/dummyEnv-compile
+  rm -rf ${outputfile}
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_043.cfg b/compiler/one-cmds/tests/onecc_043.cfg
new file mode 100644
index 000000000..7d5ee87a8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_043.cfg
@@ -0,0 +1,14 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_043.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_043.test b/compiler/one-cmds/tests/onecc_043.test
new file mode 100644
index 000000000..f99039ef6
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_043.test
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test for "O1=True" option in onecc config file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_043.cfg"
+outputfile="inception_v3.onecc_043.opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+# O1.list is dynamically created from onelib/export_constant.py
+readarray -t O1_OPTS < "O1.list"
+readarray -t NO_O1_OPTS < "non-O1.list"
+
+for opt in "${O1_OPTS[@]}"
+do
+  if ! grep -q ${opt} ${outputfile}.log; then
+    trap_err_onexit
+  fi
+done
+
+for no_opt in "${NO_O1_OPTS[@]}"
+do
+  if grep -q ${no_opt} ${outputfile}.log; then
+    trap_err_onexit
+  fi
+done
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_044.cfg b/compiler/one-cmds/tests/onecc_044.cfg
new file mode 100644
index 000000000..f7cdde863
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_044.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O1
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_044.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
diff --git a/compiler/one-cmds/tests/onecc_044.test b/compiler/one-cmds/tests/onecc_044.test
new file mode 100644
index 000000000..1706cf9ec
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_044.test
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test for "O1=True" option with other options
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_044.cfg"
+outputfile="test_onnx_model.onecc_044.opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+readarray -t OPTS < "O1.list"
+readarray -t NO_OPTS < "non-O1.list"
+
+OPTS+=("convert_nchw_to_nhwc")
+for i in "${!NO_OPTS[@]}"; do
+  if [[ ${NO_OPTS[i]} = "convert_nchw_to_nhwc" ]]; then
+    unset 'NO_OPTS[i]'
+  fi
+done
+
+NO_OPTS+=("fold_add_v2")
+for i in "${!OPTS[@]}"; do
+  if [[ ${OPTS[i]} = "fold_add_v2" ]]; then
+    unset 'OPTS[i]'
+  fi
+done
+
+for opt in "${OPTS[@]}"
+do
+  if ! grep -q ${opt} ${outputfile}.log; then
+    trap_err_onexit
+  fi
+done
+
+for no_opt in "${NO_OPTS[@]}"
+do
+  if grep -q ${no_opt} ${outputfile}.log; then
+    trap_err_onexit
+  fi
+done
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_045.cfg b/compiler/one-cmds/tests/onecc_045.cfg
new file mode 100644
index 000000000..d0ee39f38
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_045.cfg
@@ -0,0 +1,13 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=reshape_matmul.circle
+output_path=reshape_matmul.onecc_045.q.circle
+input_type=uint8,int16
diff --git a/compiler/one-cmds/tests/onecc_045.test b/compiler/one-cmds/tests/onecc_045.test
new file mode 100644
index 000000000..2b5c0a21a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_045.test
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+# TODO Resolve circledump not found
+# https://github.com/Samsung/ONE/issues/10550
+if ! command -v circledump &> /dev/null
+then
+  echo "${filename_ext} SKIPPED"
+  exit 0
+fi
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+configfile="onecc_045.cfg"
+outputfile="reshape_matmul.onecc_045.q.circle"
+
+rm -f ${filename}.log
+rm -f ${filename}.first.cdump
+rm -f ${filename}.second.cdump
+rm -f ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+circledump ${outputfile} | grep "T(0:0)" > ${filename}.first.cdump
+circledump ${outputfile} | grep "T(0:1)" > ${filename}.second.cdump
+
+# check dtype of the first input (uint8)
+if ! grep -q "UINT8" "${filename}.first.cdump"; then
+  trap_err_onexit
+fi
+
+# check dtype of the second input (int16)
+if ! grep -q "INT16" "${filename}.second.cdump"; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_046.cfg b/compiler/one-cmds/tests/onecc_046.cfg
new file mode 100644
index 000000000..28927355b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_046.cfg
@@ -0,0 +1,7 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backends=dummy,dummyV2
+dummy=-o onecc_046.tvn inception_v3.onecc_046.circle
+dummyV2=-O onecc_046.2.tvn inception_v3.onecc_046.2.circle
diff --git a/compiler/one-cmds/tests/onecc_046.test b/compiler/one-cmds/tests/onecc_046.test
new file mode 100644
index 000000000..a11b181d5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_046.test
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 'backends' key in configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  rm -rf ../bin/dummyV2-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_046.cfg"
+outputfile="onecc_046.tvn"
+outputfile2="onecc_046.2.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+rm -rf ${outputfile2}
+
+# copy dummy tools to bin folder
+cp dummy-compile ../bin/dummy-compile
+cp dummyV2-compile ../bin/dummyV2-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+if [[ ! -s "${outputfile2}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+rm -rf ../bin/dummyV2-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_047.cfg b/compiler/one-cmds/tests/onecc_047.cfg
new file mode 100644
index 000000000..df2eef5f8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_047.cfg
@@ -0,0 +1,7 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backends=dummy,dummyV2
+dummy=-o onecc_047.tvn inception_v3.onecc_047.circle
+dummyV2=-O onecc_047.2.tvn inception_v3.onecc_047.2.circle
diff --git a/compiler/one-cmds/tests/onecc_047.test b/compiler/one-cmds/tests/onecc_047.test
new file mode 100644
index 000000000..345042091
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_047.test
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 'backends' key in configuration file but run codegen for only one backend
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  rm -rf ../bin/dummyV2-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_047.cfg"
+outputfile="onecc_047.tvn"
+outputfile2="onecc_047.2.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+rm -rf ${outputfile2}
+
+# copy dummy tools to bin folder
+cp dummy-compile ../bin/dummy-compile
+cp dummyV2-compile ../bin/dummyV2-compile
+
+# run test
+onecc -C ${configfile} -b dummyV2 > ${filename}.log 2>&1
+
+# shouldn't be generated
+if [[ -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+if [[ ! -s "${outputfile2}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+rm -rf ../bin/dummyV2-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_048.cfg b/compiler/one-cmds/tests/onecc_048.cfg
new file mode 100644
index 000000000..c4c0acc40
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_048.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backend=dummyV2
+command=-O onecc_048.tvn inception_v3.onecc_048.circle
diff --git a/compiler/one-cmds/tests/onecc_048.test b/compiler/one-cmds/tests/onecc_048.test
new file mode 100644
index 000000000..d4eff54cf
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_048.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 'backend' option from command line with 'backend' key in configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummyV2-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_048.cfg"
+outputfile="onecc_048.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# copy dummy tools to bin folder
+cp dummyV2-compile ../bin/dummyV2-compile
+
+# run test
+onecc -C ${configfile} -b dummyV2 > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummyV2-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_049.cfg b/compiler/one-cmds/tests/onecc_049.cfg
new file mode 100644
index 000000000..021d9a45e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_049.cfg
@@ -0,0 +1,7 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backends=dummy,dummyV2
+dummy=dummy.bin
+dummyV2=dummyV2.bin
diff --git a/compiler/one-cmds/tests/onecc_049.test b/compiler/one-cmds/tests/onecc_049.test
new file mode 100644
index 000000000..45fd8a74b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_049.test
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 'backends' key in one-profile section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  rm -rf ../bin/dummyV2-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_049.cfg"
+
+rm -f ${filename}.log
+
+# copy dummy tools to bin folder
+cp dummy-profile ../bin/dummy-profile
+cp dummyV2-profile ../bin/dummyV2-profile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if ! grep -q "dummy-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+if ! grep -q "dummyV2-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-profile
+rm -rf ../bin/dummyV2-profile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_050.cfg b/compiler/one-cmds/tests/onecc_050.cfg
new file mode 100644
index 000000000..021d9a45e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_050.cfg
@@ -0,0 +1,7 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backends=dummy,dummyV2
+dummy=dummy.bin
+dummyV2=dummyV2.bin
diff --git a/compiler/one-cmds/tests/onecc_050.test b/compiler/one-cmds/tests/onecc_050.test
new file mode 100644
index 000000000..8ebfd1e81
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_050.test
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 'backends' key in configuration file but run one-profile for only one backend
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  rm -rf ../bin/dummyV2-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_050.cfg"
+
+rm -f ${filename}.log
+
+# copy dummy tools to bin folder
+cp dummy-profile ../bin/dummy-profile
+cp dummyV2-profile ../bin/dummyV2-profile
+
+# run test
+onecc -C ${configfile} -b dummyV2 > ${filename}.log 2>&1
+
+if grep -q "dummy-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+if ! grep -q "dummyV2-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-profile
+rm -rf ../bin/dummyV2-profile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_051.cfg b/compiler/one-cmds/tests/onecc_051.cfg
new file mode 100644
index 000000000..ecf983f71
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_051.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backend=dummyV2
+command=dummyV2.bin
diff --git a/compiler/one-cmds/tests/onecc_051.test b/compiler/one-cmds/tests/onecc_051.test
new file mode 100644
index 000000000..97967096c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_051.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 'backend' option from command line with 'backend' key in configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  rm -rf ../bin/dummyV2-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_051.cfg"
+
+rm -f ${filename}.log
+
+# copy dummy tools to bin folder
+cp dummyV2-profile ../bin/dummyV2-profile
+
+# run test
+onecc -C ${configfile} -b dummyV2 > ${filename}.log 2>&1
+
+if ! grep -q "dummyV2-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummyV2-profile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_052.cfg b/compiler/one-cmds/tests/onecc_052.cfg
new file mode 100644
index 000000000..e884ea9a8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_052.cfg
@@ -0,0 +1,13 @@
+[onecc]
+one-codegen=True
+one-profile=True
+
+[one-codegen]
+backends=dummy,dummyV2
+dummy=-o onecc_052.tvn inception_v3.onecc_052.circle
+dummyV2=-O onecc_052.2.tvn inception_v3.onecc_052.2.circle
+
+[one-profile]
+backends=dummy,dummyV2
+dummy=dummy.bin
+dummyV2=dummyV2.bin
diff --git a/compiler/one-cmds/tests/onecc_052.test b/compiler/one-cmds/tests/onecc_052.test
new file mode 100644
index 000000000..39bc6ed68
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_052.test
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 'backends' key with one-profile and one-codgen section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  rm -rf ../bin/dummyV2-profile
+  rm -rf ../bin/dummy-compile
+  rm -rf ../bin/dummyV2-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_052.cfg"
+outputfile="onecc_052.tvn"
+outputfile2="onecc_052.2.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+rm -rf ${outputfile2}
+
+# copy dummy tools to bin folder
+cp dummy-profile ../bin/dummy-profile
+cp dummyV2-profile ../bin/dummyV2-profile
+cp dummy-compile ../bin/dummy-compile
+cp dummyV2-compile ../bin/dummyV2-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if ! grep -q "dummy-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+if ! grep -q "dummyV2-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+if [[ ! -s "${outputfile}" ]]; then
+  echo "ERROR: Not found ${outputfile}" >> ${filename}.log
+  trap_err_onexit
+fi
+
+if [[ ! -s "${outputfile2}" ]]; then
+echo "ERROR: Not found ${outputfile2}" >> ${filename}.log
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-profile
+rm -rf ../bin/dummyV2-profile
+rm -rf ../bin/dummy-compile
+rm -rf ../bin/dummyV2-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_053.cfg b/compiler/one-cmds/tests/onecc_053.cfg
new file mode 100644
index 000000000..8cbe799b1
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_053.cfg
@@ -0,0 +1,13 @@
+[onecc]
+one-codegen=True
+one-profile=True
+
+[one-codegen]
+backends=dummy,dummyV2
+dummy=-o onecc_053.tvn inception_v3.onecc_053.circle
+dummyV2=-O onecc_053.2.tvn inception_v3.onecc_053.2.circle
+
+[one-profile]
+backends=dummy,dummyV2
+dummy=dummy.bin
+dummyV2=dummyV2.bin
diff --git a/compiler/one-cmds/tests/onecc_053.test b/compiler/one-cmds/tests/onecc_053.test
new file mode 100644
index 000000000..20f0209ad
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_053.test
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 'backend' option with one-profile and one-codgen section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  rm -rf ../bin/dummyV2-profile
+  rm -rf ../bin/dummy-compile
+  rm -rf ../bin/dummyV2-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_053.cfg"
+outputfile="onecc_053.tvn"
+outputfile2="onecc_053.2.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+rm -rf ${outputfile2}
+
+# copy dummy tools to bin folder
+cp dummy-profile ../bin/dummy-profile
+cp dummyV2-profile ../bin/dummyV2-profile
+cp dummy-compile ../bin/dummy-compile
+cp dummyV2-compile ../bin/dummyV2-compile
+
+# run test
+onecc -C ${configfile} -b dummyV2 > ${filename}.log 2>&1
+
+if grep -q "dummy-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+if ! grep -q "dummyV2-profile dummy output!!!" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+if [[ -s "${outputfile}" ]]; then
+  echo "ERROR: Found ${outputfile}" >> ${filename}.log
+  trap_err_onexit
+fi
+
+if [[ ! -s "${outputfile2}" ]]; then
+echo "ERROR: Not found ${outputfile2}" >> ${filename}.log
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-profile
+rm -rf ../bin/dummyV2-profile
+rm -rf ../bin/dummy-compile
+rm -rf ../bin/dummyV2-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_054.cfg b/compiler/one-cmds/tests/onecc_054.cfg
new file mode 100644
index 000000000..63ad06ccf
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_054.cfg
@@ -0,0 +1,7 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backends=dummy,dummyV2
+dummy=-o onecc_054.tvn inception_v3.onecc_054.circle
+dummyV2=-O onecc_054.2.tvn inception_v3.onecc_054.2.circle
diff --git a/compiler/one-cmds/tests/onecc_054.test b/compiler/one-cmds/tests/onecc_054.test
new file mode 100644
index 000000000..14853a8a3
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_054.test
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# overwrite one-codegen command with `backends` key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  rm -rf ../bin/dummyV2-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_054.cfg"
+outputfile0="onecc_054_overwrite.tvn"
+outputfile1="onecc_054.tvn"
+outputfile2="onecc_054.2.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile0}
+rm -rf ${outputfile1}
+rm -rf ${outputfile2}
+
+# copy dummy tools to bin folder
+cp dummy-compile ../bin/dummy-compile
+cp dummyV2-compile ../bin/dummyV2-compile
+
+# run test
+onecc codegen -C ${configfile} -b dummyV2 -- \
+  -O onecc_054_overwrite.tvn onecc.circle > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile0}" ]]; then
+  trap_err_onexit
+fi
+
+# shouldn't be generated
+if [[ -s "${outputfile1}" ]]; then
+  trap_err_onexit
+fi
+if [[ -s "${outputfile2}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+rm -rf ../bin/dummyV2-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_055.cfg b/compiler/one-cmds/tests/onecc_055.cfg
new file mode 100644
index 000000000..c4c0acc40
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_055.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backend=dummyV2
+command=-O onecc_048.tvn inception_v3.onecc_048.circle
diff --git a/compiler/one-cmds/tests/onecc_055.test b/compiler/one-cmds/tests/onecc_055.test
new file mode 100644
index 000000000..c13aa4b08
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_055.test
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# overwrite one-codegen command with `backend` and `command` key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummyV2-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_055.cfg"
+outputfile0="onecc_055.tvn"
+outputfile1="onecc_055_overwrite.tvn"
+
+rm -f ${filename}.log
+rm -rf ${outputfile0}
+rm -rf ${outputfile1}
+
+# copy dummy tools to bin folder
+cp dummyV2-compile ../bin/dummyV2-compile
+
+# run test
+onecc codegen -C ${configfile} -b dummyV2 -- \
+  -O onecc_055_overwrite.tvn onecc.circle > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile1}" ]]; then
+  trap_err_onexit
+fi
+
+# shouldn't be generated
+if [[ -s "${outputfile0}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummyV2-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_056.cfg b/compiler/one-cmds/tests/onecc_056.cfg
new file mode 100644
index 000000000..1d30fd78b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_056.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backend=dummyV3
+command=onecc_056
diff --git a/compiler/one-cmds/tests/onecc_056.test b/compiler/one-cmds/tests/onecc_056.test
new file mode 100644
index 000000000..159cafbb4
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_056.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# overwrite one-profile command with 'backend' key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummyV3-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_056.cfg"
+
+rm -f ${filename}.log
+
+# copy dummy tools to bin folder
+cp dummyV3-profile ../bin/dummyV3-profile
+
+# run test
+onecc profile -C ${configfile} -b dummyV3 -- \
+  onecc_056_overwrite > ${filename}.log 2>&1
+
+if ! grep -q "dummyV3-profile with onecc_056_overwrite" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummyV3-profile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_057.cfg b/compiler/one-cmds/tests/onecc_057.cfg
new file mode 100644
index 000000000..dbe4f531a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_057.cfg
@@ -0,0 +1,7 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backends=dummyV2,dummyV3
+dummyV2=dummyV2.bin
+dummyV3=onecc_057
diff --git a/compiler/one-cmds/tests/onecc_057.test b/compiler/one-cmds/tests/onecc_057.test
new file mode 100644
index 000000000..f0076093b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_057.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# overwrite one-profile command with `backends` key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummyV2-profile
+  rm -rf ../bin/dummyV3-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_057.cfg"
+
+rm -f ${filename}.log
+
+# copy dummy tools to bin folder
+cp dummyV2-profile ../bin/dummyV2-profile
+cp dummyV3-profile ../bin/dummyV3-profile
+
+# run test
+onecc profile -C ${configfile} -b dummyV3 -- \
+  onecc_057_overwrite > ${filename}.log 2>&1
+
+if ! grep -q "dummyV3-profile with onecc_057_overwrite" "${filename}.log"; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummyV2-profile
+rm -rf ../bin/dummyV3-profile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_058.cfg b/compiler/one-cmds/tests/onecc_058.cfg
new file mode 100644
index 000000000..1d0b6d1f2
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_058.cfg
@@ -0,0 +1,9 @@
+[onecc]
+one-quantize=True
+
+[one-quantize]
+requantize=True
+input_path=mobilenet_edgetpu_224_1.0_int8.circle
+output_path=mobilenet_edgetpu_224_1.0_int8.onecc_058.circle
+input_model_dtype=int8
+quantized_dtype=uint8
diff --git a/compiler/one-cmds/tests/onecc_058.test b/compiler/one-cmds/tests/onecc_058.test
new file mode 100644
index 000000000..6f60eed8a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_058.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_058.cfg"
+outputfile="mobilenet_edgetpu_224_1.0_int8.onecc_058.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_neg_001.test b/compiler/one-cmds/tests/onecc_neg_001.test
new file mode 100644
index 000000000..fe83e35b5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_001.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found given configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_001.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_002.cfg b/compiler/one-cmds/tests/onecc_neg_002.cfg
new file mode 100644
index 000000000..e597a39a5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_002.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=True
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_neg_002.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.onecc_neg_002.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_neg_002.test b/compiler/one-cmds/tests/onecc_neg_002.test
new file mode 100644
index 000000000..04918ab74
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_002.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing one-pack section in configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "configuration file must have 'one-pack' section" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_002.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_003.cfg b/compiler/one-cmds/tests/onecc_neg_003.cfg
new file mode 100644
index 000000000..e4718efe3
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_003.cfg
@@ -0,0 +1,15 @@
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_neg_003.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.onecc_neg_003.circle
+output_path=inception_v3.opt.circle
+
+[one-pack]
+input_path=inception_v3.opt.circle
+output_path=inception_v3_pkg
diff --git a/compiler/one-cmds/tests/onecc_neg_003.test b/compiler/one-cmds/tests/onecc_neg_003.test
new file mode 100644
index 000000000..4c64c5ab8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_003.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing onecc section in configuration file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "\[onecc\] section is required in configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_003.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_004.cfg b/compiler/one-cmds/tests/onecc_neg_004.cfg
new file mode 100644
index 000000000..53e819109
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_004.cfg
@@ -0,0 +1,24 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.onecc_neg_004.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.onecc_neg_004.circle
+output_path=inception_v3.opt.circle
+
+[one-optimize]
+input_path=inception_v4.circle
+output_path=inception_v4.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_neg_004.test b/compiler/one-cmds/tests/onecc_neg_004.test
new file mode 100644
index 000000000..30fb34593
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_004.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with duplicate section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "section 'one-optimize' already exists" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_004.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_005.cfg b/compiler/one-cmds/tests/onecc_neg_005.cfg
new file mode 100644
index 000000000..1b449c750
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_005.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.alt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/onecc_neg_005.test b/compiler/one-cmds/tests/onecc_neg_005.test
new file mode 100644
index 000000000..1d2248593
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_005.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative one-import-tf intermediate file should not exist
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_005.cfg"
+outputfile="inception_v3.alt.circle"
+intermfile="inception_v3.alt.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+# output should exist
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+# intermediate file should not exist
+if [[ -f "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_neg_006.cfg b/compiler/one-cmds/tests/onecc_neg_006.cfg
new file mode 100644
index 000000000..63ef42bfb
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_006.cfg
@@ -0,0 +1,13 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
diff --git a/compiler/one-cmds/tests/onecc_neg_006.test b/compiler/one-cmds/tests/onecc_neg_006.test
new file mode 100644
index 000000000..194a7ea43
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_006.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative one-import-tf intermediate file should not exist
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_006.cfg"
+outputfile="test_onnx_model.circle"
+intermfile="test_onnx_model.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+# output should exist
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+# intermediate file should not exist
+if [[ -f "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_neg_007.test b/compiler/one-cmds/tests/onecc_neg_007.test
new file mode 100644
index 000000000..264a74e48
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_007.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative subcommand
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "unrecognized arguments" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+onecc wronginput > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_008.test b/compiler/one-cmds/tests/onecc_neg_008.test
new file mode 100644
index 000000000..67b2c6ff3
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_008.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative subcommand with empty argument
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+rm -f ${filename}.log
+
+# run test
+onecc > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_009.test b/compiler/one-cmds/tests/onecc_neg_009.test
new file mode 100644
index 000000000..26ad7da60
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_009.test
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Valid optimization option but invalid configuration file path
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  rm -rf ../optimization/OONECC_NEG_009.cfg
+  if [ "$OPT_ALREADY_EXIST" = false ]; then
+    rm -rf ../optimization
+  fi
+  if grep -q "Not found given configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+if [ ! -d "../optimization" ]; then
+  mkdir -p ../optimization
+  OPT_ALREADY_EXIST=false
+fi
+
+
+touch ../optimization/OONECC_NEG_009.cfg
+
+configfile=".."
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} -OONECC_NEG_009 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_010.test b/compiler/one-cmds/tests/onecc_neg_010.test
new file mode 100644
index 000000000..98605902c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_010.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Invalid optimization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid optimization option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} -OONECC_NEG_010 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_011.cfg b/compiler/one-cmds/tests/onecc_neg_011.cfg
new file mode 100644
index 000000000..b5873245b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_011.cfg
@@ -0,0 +1,13 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+wrong_opt=True
diff --git a/compiler/one-cmds/tests/onecc_neg_011.test b/compiler/one-cmds/tests/onecc_neg_011.test
new file mode 100644
index 000000000..5df6fc830
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_011.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# generate error for unrecognized opitmization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "following arguments are unrecognized" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_011.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_012.cfg b/compiler/one-cmds/tests/onecc_neg_012.cfg
new file mode 100644
index 000000000..732409139
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_012.cfg
@@ -0,0 +1,14 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+one-profile=False
+one-infer=True
+
+[one-infer]
+driver=dummy-infer
+command="dummy arguments"
diff --git a/compiler/one-cmds/tests/onecc_neg_012.test b/compiler/one-cmds/tests/onecc_neg_012.test
new file mode 100644
index 000000000..45ed13d32
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_012.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Check the case when driver does not exist
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "dummy-infer not found" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_012.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_013.test b/compiler/one-cmds/tests/onecc_neg_013.test
new file mode 100644
index 000000000..95ac3c95c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_013.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing workflow file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found given workflow file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_013.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_014.test b/compiler/one-cmds/tests/onecc_neg_014.test
new file mode 100644
index 000000000..704acfae9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_014.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid workflow file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid workflow file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_014.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_014.workflow.json b/compiler/one-cmds/tests/onecc_neg_014.workflow.json
new file mode 100644
index 000000000..8d4fd431e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_014.workflow.json
@@ -0,0 +1,3 @@
+{
+    INVALID JSON FILE
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_015.test b/compiler/one-cmds/tests/onecc_neg_015.test
new file mode 100644
index 000000000..d15a2f367
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_015.test
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found" "${filename}.log" &&
+  grep -q "key in workflow file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_015.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_015.workflow.json b/compiler/one-cmds/tests/onecc_neg_015.workflow.json
new file mode 100644
index 000000000..4cb752e4e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_015.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflowsssssss": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "steps": [
+            "QUANTIZE"
+        ],
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_026.q.circle",
+                "input_data": "inception_v3_test_data.h5",
+                "evaluate_result": "True",
+                "test_data": "inception_v3_test_data.h5",
+                "print_mpeir": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_016.test b/compiler/one-cmds/tests/onecc_neg_016.test
new file mode 100644
index 000000000..23964e928
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_016.test
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found" "${filename}.log" &&
+  grep -q "key listed in" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_016.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_016.workflow.json b/compiler/one-cmds/tests/onecc_neg_016.workflow.json
new file mode 100644
index 000000000..c929cf38c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_016.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOWWWWW": {
+        "steps": [
+            "QUANTIZE"
+        ],
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_026.q.circle",
+                "input_data": "inception_v3_test_data.h5",
+                "evaluate_result": "True",
+                "test_data": "inception_v3_test_data.h5",
+                "print_mpeir": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_017.test b/compiler/one-cmds/tests/onecc_neg_017.test
new file mode 100644
index 000000000..a345d6259
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_017.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Each workflow should have either" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_017.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_017.workflow.json b/compiler/one-cmds/tests/onecc_neg_017.workflow.json
new file mode 100644
index 000000000..22f1415e9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_017.workflow.json
@@ -0,0 +1,18 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_026.q.circle",
+                "input_data": "inception_v3_test_data.h5",
+                "evaluate_result": "True",
+                "test_data": "inception_v3_test_data.h5",
+                "print_mpeir": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_018.test b/compiler/one-cmds/tests/onecc_neg_018.test
new file mode 100644
index 000000000..b70fae4e8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_018.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "are exclusive key" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_018.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_018.workflow.json b/compiler/one-cmds/tests/onecc_neg_018.workflow.json
new file mode 100644
index 000000000..e0754d392
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_018.workflow.json
@@ -0,0 +1,24 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "cfg-reference": {
+            "path": "/path/to/ini/format/file"
+        },
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_018.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_019.test b/compiler/one-cmds/tests/onecc_neg_019.test
new file mode 100644
index 000000000..430438d61
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_019.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Each step should have" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_019.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_019.workflow.json b/compiler/one-cmds/tests/onecc_neg_019.workflow.json
new file mode 100644
index 000000000..995c6bf4a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_019.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "IMPORT_TF": {
+            "one-cmddddddddd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_019.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_020.test b/compiler/one-cmds/tests/onecc_neg_020.test
new file mode 100644
index 000000000..b86a23116
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_020.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Each step should have" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_020.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_020.workflow.json b/compiler/one-cmds/tests/onecc_neg_020.workflow.json
new file mode 100644
index 000000000..89f0d59af
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_020.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commandssssssssss": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_020.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_021.test b/compiler/one-cmds/tests/onecc_neg_021.test
new file mode 100644
index 000000000..ef023b133
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_021.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have a cycle
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Workflows should not have a cycle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_021.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_021.workflow.json b/compiler/one-cmds/tests/onecc_neg_021.workflow.json
new file mode 100644
index 000000000..c326d41a6
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_021.workflow.json
@@ -0,0 +1,44 @@
+{
+    "workflows": [
+        "CYCLE_WF1",
+        "CYCLE_WF2"
+    ],
+    "CYCLE_WF1": {
+        "run-after": [
+            "CYCLE_WF2"
+        ],
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_021.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    },
+    "CYCLE_WF2": {
+        "run-after": [
+            "CYCLE_WF1"
+        ],
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_021.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_022.cfg b/compiler/one-cmds/tests/onecc_neg_022.cfg
new file mode 100644
index 000000000..16135f074
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3_without_opt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/onecc_neg_022.test b/compiler/one-cmds/tests/onecc_neg_022.test
new file mode 100644
index 000000000..002908a7a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have a cycle
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Workflows should not have a cycle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_022.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_022.workflow.json b/compiler/one-cmds/tests/onecc_neg_022.workflow.json
new file mode 100644
index 000000000..1f4508134
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.workflow.json
@@ -0,0 +1,63 @@
+{
+    "workflows": [
+        "WITHOUT_OPT",
+        "WITH_OPT",
+        "INFER"
+    ],
+    "INFER": {
+        "run-after": [
+            "WITHOUT_OPT",
+            "WITH_OPT"
+        ],
+        "steps": [
+            "INFER1",
+            "INFER2"
+        ],
+        "INFER1": {
+            "one-cmd": "one-infer",
+            "commands" : {
+                "driver": "dummy-inferV2",
+                "command": "inception_v3_without_opt.circle"
+            }
+        },
+        "INFER2": {
+            "one-cmd": "one-infer",
+            "commands": {
+                "driver": "dummy-inferV2",
+                "command": "inception_v3.opt.circle"
+            }
+        }
+    },
+    "WITHOUT_OPT": {
+        "cfg-reference": {
+            "path": "onecc_041.cfg"
+        }
+    },
+    "WITH_OPT": {
+        "run-after": [
+            "WITHOUT_OPT"
+        ],
+        "steps": [
+            "IMPORT_TF",
+            "OPTIMIZE"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_022.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.onecc_neg_022.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_023.test b/compiler/one-cmds/tests/onecc_neg_023.test
new file mode 100644
index 000000000..436c7c3b3
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_023.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have wrong optimize option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Change outputs failed" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_023.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_023.workflow.json b/compiler/one-cmds/tests/onecc_neg_023.workflow.json
new file mode 100644
index 000000000..2d763f098
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_023.workflow.json
@@ -0,0 +1,30 @@
+{
+    "workflows": [
+        "WITH_OPT"
+    ],
+    "WITH_OPT": {
+        "steps": [
+            "IMPORT_TF",
+            "OPTIMIZE"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_023.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.onecc_neg_023.circle",
+                "output_path": "inception_v3.opt.circle",
+                "change_outputs": "non_existing_node_name"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_024.cfg b/compiler/one-cmds/tests/onecc_neg_024.cfg
new file mode 100644
index 000000000..0e9ebc63c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_024.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O # invalid (too short group option)
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_neg_024.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
diff --git a/compiler/one-cmds/tests/onecc_neg_024.test b/compiler/one-cmds/tests/onecc_neg_024.test
new file mode 100644
index 000000000..16952baba
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_024.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid group option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid group option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_024.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_025.cfg b/compiler/one-cmds/tests/onecc_neg_025.cfg
new file mode 100644
index 000000000..e41c6e63c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_025.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=A1 # invalid (must start with 'O')
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_neg_024.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
diff --git a/compiler/one-cmds/tests/onecc_neg_025.test b/compiler/one-cmds/tests/onecc_neg_025.test
new file mode 100644
index 000000000..4ddc31002
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_025.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid group option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid group option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_025.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_026.cfg b/compiler/one-cmds/tests/onecc_neg_026.cfg
new file mode 100644
index 000000000..2efddb1d2
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_026.cfg
@@ -0,0 +1,13 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=reshape_matmul.circle
+output_path=reshape_matmul.onecc_045.q.circle
+input_type=uint8,int16,uint8
diff --git a/compiler/one-cmds/tests/onecc_neg_026.test b/compiler/one-cmds/tests/onecc_neg_026.test
new file mode 100644
index 000000000..f90c1ec47
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_026.test
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Wrong number of input_type in one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid number of input dtype" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+configfile="onecc_neg_026.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_027.cfg b/compiler/one-cmds/tests/onecc_neg_027.cfg
new file mode 100644
index 000000000..27f3f67cf
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_027.cfg
@@ -0,0 +1,7 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backends=dummy,dummyV2
+dummy=-o sample.tvn inception_v3.onecc_neg_027.circle
+# dummyV2=-O sample2.tvn inception_v3.onecc_neg_027.circle
diff --git a/compiler/one-cmds/tests/onecc_neg_027.test b/compiler/one-cmds/tests/onecc_neg_027.test
new file mode 100644
index 000000000..f96825623
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_027.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# not found the command for given backend
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found the command for dummyV2" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_027.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_028.cfg b/compiler/one-cmds/tests/onecc_neg_028.cfg
new file mode 100644
index 000000000..ca19c53ba
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_028.cfg
@@ -0,0 +1,9 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backend=dummy3
+command=-o sample.tvn inception_v3.onecc_neg_028.circle
+backends=dummy,dummy2
+dummy=-o sample.tvn inception_v3.onecc_neg_028.circle
+dummyV2=-O sample2.tvn inception_v3.onecc_neg_028.circle
diff --git a/compiler/one-cmds/tests/onecc_neg_028.test b/compiler/one-cmds/tests/onecc_neg_028.test
new file mode 100644
index 000000000..beba24ab6
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_028.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# use 'backend' and 'backends' option simultaneously
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "option cannot be used simultaneously" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_028.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_029.cfg b/compiler/one-cmds/tests/onecc_neg_029.cfg
new file mode 100644
index 000000000..02f527b87
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_029.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backend=dummy3
+command=-o sample.tvn inception_v3.onecc_neg_029.circle
diff --git a/compiler/one-cmds/tests/onecc_neg_029.test b/compiler/one-cmds/tests/onecc_neg_029.test
new file mode 100644
index 000000000..5ced5fe67
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_029.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# not found the command of given backend
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found the command of given backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_029.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} --backend dummy2 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_030.cfg b/compiler/one-cmds/tests/onecc_neg_030.cfg
new file mode 100644
index 000000000..0d573c1ad
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_030.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backends=dummy3
+dummy3=-o sample.tvn inception_v3.onecc_neg_030.circle
diff --git a/compiler/one-cmds/tests/onecc_neg_030.test b/compiler/one-cmds/tests/onecc_neg_030.test
new file mode 100644
index 000000000..404a9b885
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_030.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# not found the command of given backend
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found the command of given backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_030.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} --backend dummy2 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_031.test b/compiler/one-cmds/tests/onecc_neg_031.test
new file mode 100644
index 000000000..c7d857562
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_031.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# use 'backend' option with 'workflow' option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "'backend' option can be used only with 'config' option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_031.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} --backend dummy > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_031.workflow.json b/compiler/one-cmds/tests/onecc_neg_031.workflow.json
new file mode 100644
index 000000000..d323cc94f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_031.workflow.json
@@ -0,0 +1,29 @@
+{
+    "workflows": [
+        "codegen_wf"
+    ],
+    "codegen_wf": {
+        "steps": [
+            "import_tf",
+            "codegen"
+        ],
+        "import_tf": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_031.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o sample.tvn inception_v3.onecc_neg_031.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_032.cfg b/compiler/one-cmds/tests/onecc_neg_032.cfg
new file mode 100644
index 000000000..949194806
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_032.cfg
@@ -0,0 +1,7 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backends=dummy,dummyV2
+dummy=dummy.bin
+# dummyV2=dummyV2.bin
diff --git a/compiler/one-cmds/tests/onecc_neg_032.test b/compiler/one-cmds/tests/onecc_neg_032.test
new file mode 100644
index 000000000..12b70cbc4
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_032.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# not found the command for given backend
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found the command for dummyV2" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_032.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_033.cfg b/compiler/one-cmds/tests/onecc_neg_033.cfg
new file mode 100644
index 000000000..6c77870b1
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_033.cfg
@@ -0,0 +1,9 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backend=dummyV3
+command=dummyV3.bin
+backends=dummy,dummyV2
+dummy=dummy.bin
+dummyV2=dummyB2.bin
diff --git a/compiler/one-cmds/tests/onecc_neg_033.test b/compiler/one-cmds/tests/onecc_neg_033.test
new file mode 100644
index 000000000..51b7ca073
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_033.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# use 'backend' and 'backends' option simultaneously
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "option cannot be used simultaneously" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_033.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_034.cfg b/compiler/one-cmds/tests/onecc_neg_034.cfg
new file mode 100644
index 000000000..8c64628b4
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_034.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backend=dummy
+command=dummy.bin
diff --git a/compiler/one-cmds/tests/onecc_neg_034.test b/compiler/one-cmds/tests/onecc_neg_034.test
new file mode 100644
index 000000000..58cc8d3d1
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_034.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# not found the command of given backend
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found the command of given backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_034.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} --backend dummyV2 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_035.cfg b/compiler/one-cmds/tests/onecc_neg_035.cfg
new file mode 100644
index 000000000..6468d73bc
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_035.cfg
@@ -0,0 +1,6 @@
+[onecc]
+one-profile=True
+
+[one-profile]
+backends=dummyV2
+dummyV2=dummyV2.bin
diff --git a/compiler/one-cmds/tests/onecc_neg_035.test b/compiler/one-cmds/tests/onecc_neg_035.test
new file mode 100644
index 000000000..cae5ba1c8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_035.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# not found the command of given backend
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found the command of given backend" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_035.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} --backend dummy > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_036.test b/compiler/one-cmds/tests/onecc_neg_036.test
new file mode 100644
index 000000000..929457575
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_036.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# use 'backend' option with 'workflow' option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "'backend' option can be used only with 'config' option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_036.workflow.json"
+
+rm -f ${filename}.log
+
+# run test
+onecc -W ${workflowfile} --backend dummy > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_036.workflow.json b/compiler/one-cmds/tests/onecc_neg_036.workflow.json
new file mode 100644
index 000000000..9acd4d26e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_036.workflow.json
@@ -0,0 +1,29 @@
+{
+    "workflows": [
+        "profile_wf"
+    ],
+    "profile_wf": {
+        "steps": [
+            "import_tf",
+            "profile"
+        ],
+        "import_tf": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.onecc_neg_036.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "profile": {
+            "one-cmd": "one-profile",
+            "commands": {
+                "backend": "dummy",
+                "command": "dummy.bin"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt b/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt
new file mode 100644
index 000000000..e6b2b354a
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt
@@ -0,0 +1,86 @@
+# Install one-cmds test scripts for onnx models
+
+# Gather test scripts
+set(EXAMPLES_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/examples")
+file(GLOB TEST_EXAMPLES RELATIVE "${EXAMPLES_DIR}" "${EXAMPLES_DIR}/*")
+
+set(TEST_DST test/onnx-operations)
+
+install(DIRECTORY "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/" DESTINATION "${TEST_DST}")
+
+set(ONNX_IMPORT_OPTIONS "--unroll_rnn --unroll_lstm")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+  set(TEST_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/${TEST_ITEM}.test")
+
+  # generate test script
+  file(WRITE  "${TEST_SCRIPT}" "#!/bin/bash\n\n")
+  file(APPEND "${TEST_SCRIPT}" "filename_ext=\"\$(basename -- $0)\"\n")
+  file(APPEND "${TEST_SCRIPT}" "filename=\"\${filename_ext%.*}\"\n")
+  file(APPEND "${TEST_SCRIPT}" "trap_err_onexit()\n")
+  file(APPEND "${TEST_SCRIPT}" "{\n")
+  file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} FAILED\"\n")
+  file(APPEND "${TEST_SCRIPT}" "exit 255\n")
+  file(APPEND "${TEST_SCRIPT}" "}\n")
+  file(APPEND "${TEST_SCRIPT}" "trap trap_err_onexit ERR\n")
+  file(APPEND "${TEST_SCRIPT}" "outputfile=\"${TEST_ITEM}.circle\"\n")
+  file(APPEND "${TEST_SCRIPT}" "one-import-onnx --input_path=${TEST_ITEM}.onnx --output_path=${TEST_ITEM}.circle\
+    ${ONNX_IMPORT_OPTIONS} &> /dev/null\n")
+  file(APPEND "${TEST_SCRIPT}" "if [[ ! -s \"\${outputfile}\" ]]; then\n")
+  file(APPEND "${TEST_SCRIPT}" "trap_err_onexit\n")
+  file(APPEND "${TEST_SCRIPT}" "fi\n")
+  file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} SUCCESS\"\n")
+
+  install(FILES "${TEST_SCRIPT}" DESTINATION "${TEST_DST}")
+endforeach(TEST_ITEM)
+
+
+# Create a script to run the tests at installation folder
+set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
+
+file(WRITE  "${DRIVER_SCRIPT}" "#!/bin/bash\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname \${BASH_SOURCE[0]}) && pwd)\n")
+file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n")
+file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n")
+file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n")
+file(APPEND "${DRIVER_SCRIPT}" "  USER_PATH=$1\n")
+file(APPEND "${DRIVER_SCRIPT}" "  export PATH=$USER_PATH:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "fi\n")
+file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+  file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}.test\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+  echo \"$fail_count TESTS FAILED\"
+  exit 255
+else
+  echo \"ALL TESTS PASSED!\"
+fi\n
+")
+
+set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
+
+install(FILES "${DRIVER_SCRIPT}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${PREPARE_TEST_MATERIALS_SH}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+        DESTINATION "${TEST_DST}")
diff --git a/compiler/one-cmds/tests/onnx-operations/README.md b/compiler/one-cmds/tests/onnx-operations/README.md
new file mode 100644
index 000000000..928fb84dd
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/README.md
@@ -0,0 +1,28 @@
+## Overview 
+
+This directory contains auxilliary tests for small onnx target models.
+
+Most of the models contains single operations, but some contains multiple operations, that represents one operation with complex semantics.
+
+Models for these tests are taken from res/PyTorchExamples.
+
+## To run all tests
+
+Steps:
+1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow
+   - you need to run this only once
+   - read 'doc/how-to-prepare-virtualenv.txt' for more information
+   ```
+   bin/one-prepare-venv
+   ```
+2) run 'test/onnx-operations/prepare_test_materials.sh' to download test material models
+   - you need to run this only once
+   - you need internet connection to download files
+   - you may need to install 'wget' and 'unzip' packages
+   ```
+   test/onnx-operations/prepare_test_materials.sh
+   ```
+3) run 'test/onnx-operations/runtestall.sh' to run the test
+   ```
+   test/onnx-operations/runtestall.sh
+   ```
diff --git a/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh b/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh
new file mode 100644
index 000000000..274a60f0a
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+pushd $SCRIPT_PATH > /dev/null
+
+for test_case in examples/*; do
+  python3 ptem.py $(basename ${test_case})
+done
+
+cp output/*.onnx .
+
+popd > /dev/null
diff --git a/compiler/one-cmds/tests/onnx_legalize_run_compare.py b/compiler/one-cmds/tests/onnx_legalize_run_compare.py
new file mode 100644
index 000000000..9b02b74af
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx_legalize_run_compare.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnxruntime as rt
+import onnx
+import sys
+import numpy as np
+import importlib.util
+
+
+def _generate_inputs(model):
+    """Generate random inputs for given model
+
+    Args:
+        model (onnx.onnx_ml_pb2.ModelProto): target model
+
+    Returns:
+        dict from str to numpy.ndarray: generated inputs
+    """
+    inputs = {}
+    for input in model.graph.input:
+        # check if elem type is float32
+        # list of types could be extended, this is a property of current testsuite
+        assert (
+            input.type.tensor_type.elem_type == onnx.TensorProto.DataType.Value("FLOAT"))
+        input_shape = []
+        for dim in input.type.tensor_type.shape.dim:
+            input_shape += [dim.dim_value]
+        inputs[input.name] = np.random.random(input_shape).astype(np.float32)
+    return inputs
+
+
+def _run_model(model, inputs):
+    """Run given model
+
+    Args:
+        model (onnx.onnx_ml_pb2.ModelProto): target model
+        inputs (dict from str to numpy.ndarray): sample inputs
+
+    Returns:
+        list of numpy.ndarray: inference outputs
+    """
+    output_names = list(map(lambda output: output.name, model.graph.output))
+    session = rt.InferenceSession(model.SerializeToString())
+    outputs = session.run(output_names, inputs)
+    return outputs
+
+
+def _compare_results(ref_outputs, test_outputs, tolerance):
+    """Generate random inputs for given model
+
+    Args:
+        ref_outputs (list of numpy.ndarray): reference values (original model results)
+        test_outputs (list of numpy.ndarray): tested values (modified model results)
+        tolerance (float): maximum acceptable relative difference
+
+    Returns:
+        bool: True if outputs considered equal, False otherwise
+    """
+    num_outputs = len(ref_outputs)
+    assert (len(test_outputs) == num_outputs)
+    for i in range(num_outputs):
+        if ref_outputs[i].shape != test_outputs[i].shape:
+            print("output {} shape mismatch: ref({}) vs test({})".format(
+                i, ref_outputs[i].shape, test_outputs[i].shape))
+            return False
+
+        abs_difference = np.abs(ref_outputs[i] - test_outputs[i])
+        abs_ref_maximum = np.abs(ref_outputs[i]).max()
+        peak_error = abs_difference.max() / abs_ref_maximum
+
+        if peak_error > tolerance:
+            print("output {} peak error to value ratio {} is too big".format(
+                i, peak_error))
+            return False
+    return True
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 6:
+        exit('expecting 5 arguments:\n'
+             '  - path to input model\n'
+             '  - path to "legalized" model\n'
+             '  - path to onnx_legalizer.py\n'
+             '  - base name for generated test inputs\n'
+             '  - output tolerance')
+    input_model_path = sys.argv[1]
+    output_model_path = sys.argv[2]
+    onnx_legalizer_path = sys.argv[3]
+    input_dump_path = sys.argv[4]
+    tolerance = float(sys.argv[5])
+
+    onnx_legalizer_spec = importlib.util.spec_from_file_location(
+        "onnx_legalizer", onnx_legalizer_path)
+    onnx_legalizer = importlib.util.module_from_spec(onnx_legalizer_spec)
+    onnx_legalizer_spec.loader.exec_module(onnx_legalizer)
+
+    model = onnx.load(input_model_path)
+
+    inputs = _generate_inputs(model)
+
+    for i in inputs:
+        np.save('{}_{}.npy'.format(input_dump_path, i), inputs[i])
+
+    ref_outputs = _run_model(model, inputs)
+
+    options = onnx_legalizer.LegalizeOptions()
+    options.unroll_rnn = True
+    options.unroll_lstm = True
+    onnx_legalizer.legalize(model, options)
+
+    with open(output_model_path, 'wb') as f:
+        f.write(model.SerializeToString())
+
+    test_outputs = _run_model(model, inputs)
+
+    if not _compare_results(ref_outputs, test_outputs, tolerance):
+        exit('comparison failed')
diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh
new file mode 100644
index 000000000..915beff43
--- /dev/null
+++ b/compiler/one-cmds/tests/prepare_test_materials.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See https://github.com/Samsung/ONE/issues/4155 for information
+
+SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+pushd $SCRIPT_PATH > /dev/null
+
+if [[ ! -s "inception_v3.pb" ]]; then
+    rm -rf inception_v3_2018_04_27.tgz
+    wget -nv https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz
+    tar zxvf inception_v3_2018_04_27.tgz
+fi
+
+if [[ ! -s "mobilenet_edgetpu_224_1.0_int8.tflite" ]]; then
+    wget -nv https://github.com/mlcommons/mobile_models/raw/main/v0_7/tflite/mobilenet_edgetpu_224_1.0_int8.tflite
+fi
+
+if [[ ! -s "while_3.pbtxt" ]]; then
+    rm -rf while_3.zip
+    wget -nv https://github.com/Samsung/ONE/files/5095630/while_3.zip
+    unzip while_3.zip
+    # https://github.com/Samsung/ONE/issues/4155#issuecomment-689320297
+fi
+
+if [[ ! -s "mobilenet_test_data.h5" ]]; then
+    rm -rf mobilenet_test_data.zip
+    wget -nv https://github.com/Samsung/ONE/files/5139460/mobilenet_test_data.zip
+    unzip mobilenet_test_data.zip
+    # https://github.com/Samsung/ONE/issues/4155#issuecomment-689321538
+fi
+
+if [[ ! -s "bcq.pb" ]]; then
+    rm -rf bcq.pb.zip
+    wget -nv https://github.com/Samsung/ONE/files/5153842/bcq.pb.zip
+    unzip bcq.pb.zip
+    # https://github.com/Samsung/ONE/issues/4155#issuecomment-689324597
+fi
+
+if [[ ! -s "img_files" ]]; then
+    rm -rf img_files.zip
+    wget -nv https://github.com/Samsung/ONE/files/5499172/img_files.zip
+    unzip img_files.zip
+    # https://github.com/Samsung/ONE/issues/3213#issuecomment-722757499
+fi
+
+if [ ! -d "raw_files" ] || [ ! -s "datalist.txt" ]; then
+    ../bin/venv/bin/python preprocess_images.py
+fi
+
+if [[ ! -s "inception_v3_test_data.h5" ]]; then
+  ../bin/venv/bin/python ../bin/rawdata2hdf5 \
+  --data_list datalist.txt \
+  --output_path inception_v3_test_data.h5
+fi
+
+if [[ ! -d "test_saved_model" ]]; then
+    rm -rf test_saved_model.zip
+    wget -nv https://github.com/Samsung/ONE/files/5516226/test_saved_model.zip
+    unzip test_saved_model.zip
+    # https://github.com/Samsung/ONE/issues/4268#issuecomment-724578237
+fi
+
+if [[ ! -s "test_keras_model.h5" ]]; then
+    rm -rf test_keras_model.zip
+    wget -nv https://github.com/Samsung/ONE/files/5520777/test_keras_model.zip
+    unzip test_keras_model.zip
+    # https://github.com/Samsung/ONE/issues/4268#issuecomment-725025805
+fi
+
+if [[ ! -s "test_onnx_model.onnx" ]]; then
+    rm -rf test_onnx_model.zip
+    wget -nv https://github.com/Samsung/ONE/files/5768243/test_onnx_model.zip
+    unzip test_onnx_model.zip
+    # https://github.com/Samsung/ONE/issues/5548#issuecomment-754373360
+fi
+
+if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
+    rm -rf onnx_conv2d_conv2d.zip
+    wget -nv https://github.com/Samsung/ONE/files/5774648/onnx_conv2d_conv2d.zip
+    unzip onnx_conv2d_conv2d.zip
+    # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
+fi
+
+if [[ ! -s "reshape_matmul.onnx" ]]; then
+    rm -rf reshape_matmul.zip
+    wget -nv https://github.com/Samsung/ONE/files/12358217/reshape_matmul.zip
+    unzip reshape_matmul.zip
+    # https://github.com/Samsung/ONE/issues/9405#issuecomment-1680322410
+fi
+
+# prepare 'reshape_matmul.circle' file used for tests
+if [[ ! -s "reshape_matmul.circle" ]]; then
+    ../bin/one-import onnx \
+    --experimental_disable_batchmatmul_unfold \
+    -i reshape_matmul.onnx \
+    -o reshape_matmul.circle
+fi
+
+if [[ ! -s "Net_InstanceNorm_003.part" ]]; then
+    rm -rf Net_InstanceNorm_003.zip
+    wget -nv https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip
+    unzip Net_InstanceNorm_003.zip
+    # https://github.com/Samsung/ONE/issues/8570#issuecomment-1115804257
+fi
+
+if [[ ! -s "UnidirSeqLSTM.tflite" ]]; then
+    rm -rf UnidirSeqLSTM.zip
+    wget -nv https://github.com/Samsung/ONE/files/10055255/UnidirSeqLSTM.zip
+    unzip UnidirSeqLSTM.zip
+    # https://github.com/Samsung/ONE/issues/9940#issuecomment-1293282484
+fi
+
+function files_missing() {
+    condition="test "
+
+    for f in "${@}"; do
+        condition="${condition} ! -s ${f} -o"
+    done
+
+    # last condition is always false to properly close last "or"
+    condition="${condition} -z non_zero_string "
+    ${condition}
+}
+
+declare -a TEST_RECCURENT_MODELS=(\
+  "RNN.onnx" "RNN-nobias.onnx" "RNN-relu.onnx" "RNN-bi.onnx" "RNN-noinit.onnx"\
+  "LSTM.onnx" "LSTM-bi.onnx" "LSTM-noinit.onnx" "LSTM-nobias.onnx"
+)
+
+if files_missing "${TEST_RECCURENT_MODELS[@]}"; then
+    rm -rf test_onnx_recurrent_models.zip
+    wget -nv https://github.com/Samsung/ONE/files/8067909/test_onnx_recurrent_models.zip
+    unzip test_onnx_recurrent_models.zip
+    # https://github.com/Samsung/ONE/issues/8395#issuecomment-1040072097
+fi
+
+declare -a NEG_TEST_RECCURENT_MODELS=("rnn_variable.onnx" "lstm_variable.onnx")
+
+if files_missing "${NEG_TEST_RECCURENT_MODELS[@]}"; then
+    rm -rf neg_test_onnx_recurrent_models.zip
+    wget -nv https://github.com/Samsung/ONE/files/8137183/neg_test_onnx_recurrent_models.zip
+    unzip neg_test_onnx_recurrent_models.zip
+    # https://github.com/Samsung/ONE/issues/8395#issuecomment-1050364375
+fi
+
+# prepare 'inception_v3.circle' file used for quantization test
+inputfile="./inception_v3.pb"
+outputfile="./inception_v3.circle"
+
+if [[ ! -s ${outputfile} ]]; then
+  ../bin/one-import-tf \
+  --input_path ${inputfile} \
+  --output_path ${outputfile} \
+  --input_arrays input --input_shapes "1,299,299,3" \
+  --output_arrays InceptionV3/Predictions/Reshape_1
+fi
+
+# prepare 'inception_v3.mat.q8.circle' file used for quantization test
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.mat.q8.circle"
+
+if [[ ! -s ${outputfile} ]]; then
+  ../bin/one-quantize \
+  --input_path ${inputfile} \
+  --output_path ${outputfile}
+fi
+
+# prepare 'mobilenet_edgetpu_224_1.0_int8.circle' file used for requantization test
+inputfile="./mobilenet_edgetpu_224_1.0_int8.tflite"
+outputfile="./mobilenet_edgetpu_224_1.0_int8.circle"
+
+if [[ ! -s ${outputfile} ]]; then
+  ../bin/one-import-tflite \
+  --input_path ${inputfile} \
+  --output_path ${outputfile}
+fi
+
+popd > /dev/null
diff --git a/compiler/one-cmds/tests/preprocess_images.py b/compiler/one-cmds/tests/preprocess_images.py
new file mode 100644
index 000000000..ced6e3a08
--- /dev/null
+++ b/compiler/one-cmds/tests/preprocess_images.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, shutil, PIL.Image, numpy as np
+
+input_dir = 'img_files'
+output_dir = 'raw_files'
+list_file = 'datalist.txt'
+
+if os.path.exists(output_dir):
+    shutil.rmtree(output_dir, ignore_errors=True)
+os.makedirs(output_dir)
+
+for (root, _, files) in os.walk(input_dir):
+    datalist = open(list_file, 'w')
+    for f in files:
+        with PIL.Image.open(root + '/' + f) as image:
+            # To handle ANTIALIAS deprecation
+            ANTIALIAS = PIL.Image.Resampling.LANCZOS if hasattr(
+                PIL.Image, "Resampling") else PIL.Image.ANTIALIAS
+
+            img = np.array(image.resize((299, 299), ANTIALIAS)).astype(np.float32)
+            img = ((img / 255) - 0.5) * 2.0
+            output_file = output_dir + '/' + f.replace('jpg', 'data')
+            img.tofile(output_file)
+            datalist.writelines(os.path.abspath(output_file) + '\n')
+    datalist.close()
diff --git a/compiler/one-cmds/tests/print_onnx_model.py b/compiler/one-cmds/tests/print_onnx_model.py
new file mode 100644
index 000000000..ecab0f6da
--- /dev/null
+++ b/compiler/one-cmds/tests/print_onnx_model.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+import sys
+
+if __name__ == '__main__':
+    model = onnx.load(sys.argv[1])
+    print(model)
diff --git a/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt b/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt
new file mode 100644
index 000000000..10f30a5c9
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt
@@ -0,0 +1,109 @@
+# Install one-cmds test scripts for pytorch models
+
+# Gather test scripts
+set(EXAMPLES_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/examples")
+file(GLOB TEST_EXAMPLES RELATIVE "${EXAMPLES_DIR}" "${EXAMPLES_DIR}/*")
+file(GLOB SPECIAL_TEST_ITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test")
+
+set(TEST_DST test/pytorch-operations)
+
+install(DIRECTORY "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/" DESTINATION "${TEST_DST}")
+
+set(PYTORCH_IMPORT_OPTIONS "--unroll_rnn --unroll_lstm")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+  set(TEST_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/${TEST_ITEM}.test")
+
+  # generate test script
+  file(WRITE  "${TEST_SCRIPT}" "#!/bin/bash\n\n")
+  file(APPEND "${TEST_SCRIPT}" "filename_ext=\"\$(basename -- $0)\"\n")
+  file(APPEND "${TEST_SCRIPT}" "filename=\"\${filename_ext%.*}\"\n")
+  file(APPEND "${TEST_SCRIPT}" "trap_err_onexit()\n")
+  file(APPEND "${TEST_SCRIPT}" "{\n")
+  file(APPEND "${TEST_SCRIPT}" "  echo \"\${filename_ext} FAILED\"\n")
+  file(APPEND "${TEST_SCRIPT}" "  exit 255\n")
+  file(APPEND "${TEST_SCRIPT}" "}\n")
+  file(APPEND "${TEST_SCRIPT}" "trap trap_err_onexit ERR\n")
+  file(APPEND "${TEST_SCRIPT}" "outputfile=\"${TEST_ITEM}.circle\"\n")
+  file(APPEND "${TEST_SCRIPT}" "input_shapes=\$(head -n 1 ${TEST_ITEM}.spec)\n")
+  file(APPEND "${TEST_SCRIPT}" "input_types=\$(tail -n 1 ${TEST_ITEM}.spec)\n")
+  file(APPEND "${TEST_SCRIPT}" "one-import-pytorch --input_path=${TEST_ITEM}.pth --output_path=${TEST_ITEM}.circle\
+    ${PYTORCH_IMPORT_OPTIONS} --input_shapes=\${input_shapes} --input_types=\${input_types} &> /dev/null\n")
+  file(APPEND "${TEST_SCRIPT}" "if [[ ! -s \"\${outputfile}\" ]]; then\n")
+  file(APPEND "${TEST_SCRIPT}" "  trap_err_onexit\n")
+  file(APPEND "${TEST_SCRIPT}" "fi\n")
+  file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} SUCCESS\"\n")
+
+  install(FILES "${TEST_SCRIPT}" DESTINATION "${TEST_DST}")
+endforeach(TEST_ITEM)
+
+
+# Create a script to run the tests at installation folder
+set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
+
+file(WRITE  "${DRIVER_SCRIPT}" "#!/bin/bash\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname \${BASH_SOURCE[0]}) && pwd)\n")
+file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n")
+file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n")
+file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n")
+file(APPEND "${DRIVER_SCRIPT}" "  USER_PATH=$1\n")
+file(APPEND "${DRIVER_SCRIPT}" "  export PATH=$USER_PATH:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "fi\n")
+file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+  file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}.test\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "\necho \"special test items\" | tee -a runtestall.log\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${SPECIAL_TEST_ITEMS})
+  file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+  echo \"$fail_count TESTS FAILED\"
+  exit 255
+else
+  echo \"ALL TESTS PASSED!\"
+fi\n
+")
+
+set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
+set(EXAMPLE_GENERATOR "${CMAKE_CURRENT_SOURCE_DIR}/example_generator.py")
+set(AUX_GENERATOR "${CMAKE_CURRENT_SOURCE_DIR}/aux_generator.py")
+
+install(FILES "${DRIVER_SCRIPT}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${PREPARE_TEST_MATERIALS_SH}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${EXAMPLE_GENERATOR}" "${AUX_GENERATOR}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+        GROUP_READ GROUP_EXECUTE
+        WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES ${SPECIAL_TEST_ITEMS}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+        GROUP_READ GROUP_EXECUTE
+        WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+        DESTINATION "${TEST_DST}")
diff --git a/compiler/one-cmds/tests/pytorch-operations/README.md b/compiler/one-cmds/tests/pytorch-operations/README.md
new file mode 100644
index 000000000..231a10eb4
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/README.md
@@ -0,0 +1,28 @@
+## Overview 
+
+This directory contains auxilliary tests for small pytorch target models.
+
+Most of the models contains single operations, but some contains multiple operations, that represents one operation with complex semantics.
+
+Models for these tests are taken from res/PyTorchExamples.
+
+## To run all tests
+
+Steps:
+1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow
+   - you need to run this only once
+   - read 'doc/how-to-prepare-virtualenv.txt' for more information
+   ```
+   bin/one-prepare-venv
+   ```
+2) run 'test/pytorch-operations/prepare_test_materials.sh' to download test material models
+   - you need to run this only once
+   - you need internet connection to download files
+   - you may need to install 'wget' and 'unzip' packages
+   ```
+   test/pytorch-operations/prepare_test_materials.sh
+   ```
+3) run 'test/pytorch-operations/runtestall.sh' to run the test
+   ```
+   test/pytoch-operations/runtestall.sh
+   ```
diff --git a/compiler/one-cmds/tests/pytorch-operations/aux_generator.py b/compiler/one-cmds/tests/pytorch-operations/aux_generator.py
new file mode 100644
index 000000000..6c9afcded
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/aux_generator.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# PyTorch aux tests generator
+
+import torch
+import torch.nn as nn
+import json
+import zipfile
+import os
+
+
+# model
+class net_abs(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.abs(input)
+
+
+if __name__ == '__main__':
+    model = net_abs()
+    # save "entire" model for entire_model.test
+    torch.save(model, 'entire_model.pth')
+
+    # save state_dict file for state_dict_model.test
+    state_dict_path = 'state_dict_model.pth'
+    torch.save(model.state_dict(), state_dict_path)
+
+    # create files for mar_torchscript_model.test
+    torchscript_path = 'torchscript_model.pth'
+    inp = torch.randn(1, 2, 3, 3)
+    traced_model = torch.jit.trace(model, inp)
+    torch.jit.save(traced_model, torchscript_path)
+    # create manifest
+    manifest = {}
+    manifest['createdOn'] = '11/11/1111 11:11:11'
+    manifest['runtime'] = 'python'
+    manifest['model'] = {}
+    manifest['model']['modelName'] = 'torchscript_model',
+    manifest['model']['serializedFile'] = torchscript_path
+    manifest['model']['handler'] = 'image_classifier'
+    manifest['model']['modelVersion'] = '1.0'
+    manifest['archiverVersion'] = '0.4.2'
+
+    with zipfile.ZipFile('mar_torchscript_model.mar', 'w') as mar_file:
+        with mar_file.open('MAR-INF/MANIFEST.json', 'w') as manifest_file:
+            manifest_file.write(json.dumps(manifest).encode())
+        mar_file.write(torchscript_path)
+
+    # create files for mar_state_dict_model.test
+    model_file_path = os.path.basename(__file__)
+    # create manifest
+    manifest = {}
+    manifest['createdOn'] = '11/11/1111 11:11:11'
+    manifest['runtime'] = 'python'
+    manifest['model'] = {}
+    manifest['model']['modelName'] = 'state_dict_model',
+    manifest['model']['serializedFile'] = state_dict_path
+    manifest['model']['handler'] = 'image_classifier'
+    manifest['model']['modelFile'] = model_file_path
+    manifest['model']['modelVersion'] = '1.0'
+    manifest['archiverVersion'] = '0.4.2'
+
+    with zipfile.ZipFile('mar_state_dict_model.mar', 'w') as mar_file:
+        with mar_file.open('MAR-INF/MANIFEST.json', 'w') as manifest_file:
+            manifest_file.write(json.dumps(manifest).encode())
+        mar_file.write(state_dict_path)
+        mar_file.write(model_file_path)
diff --git a/compiler/one-cmds/tests/pytorch-operations/entire_model.test b/compiler/one-cmds/tests/pytorch-operations/entire_model.test
new file mode 100644
index 000000000..a72a56ffd
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/entire_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import NN model stored in python file and serialized "entire" model.
+# "Entire" model is serialized with `torch.save(model)` method.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="entire_model.circle"
+
+# run test
+one-import-pytorch --input_path=entire_model.pth --python_path=aux_generator.py --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/example_generator.py b/compiler/one-cmds/tests/pytorch-operations/example_generator.py
new file mode 100644
index 000000000..2e3cdcf5b
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/example_generator.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# PyTorch Example manager
+
+import torch
+import importlib.machinery
+import importlib.util
+import argparse
+import os
+
+from pathlib import Path
+
+print("PyTorch version=", torch.__version__)
+
+parser = argparse.ArgumentParser(description='Process PyTorch python examples')
+
+parser.add_argument('examples', metavar='EXAMPLES', nargs='+')
+
+args = parser.parse_args()
+
+output_folder = "./"
+
+Path(output_folder).mkdir(parents=True, exist_ok=True)
+
+
+class JitWrapper(torch.nn.Module):
+    def __init__(self, model):
+        super().__init__()
+        self.model = model
+
+    def forward(self, *args):
+        if len(args) == 1:
+            return self.model.forward(args[0])
+        else:
+            return self.model.forward(args)
+
+
+for example in args.examples:
+    print("Generate '" + example + ".pth'", end='')
+    # load example code
+    # replace - with _ in name, otherwise pytorch generates invalid torchscript
+    module_name = "examples." + example.replace('-', '_')
+    module_loader = importlib.machinery.SourceFileLoader(
+        module_name, os.path.join("examples", example, "__init__.py"))
+    module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+    module = importlib.util.module_from_spec(module_spec)
+    module_loader.exec_module(module)
+
+    jittable_model = JitWrapper(module._model_)
+
+    traced_model = torch.jit.trace(jittable_model, module._dummy_)
+    # save .pth
+    torch.jit.save(traced_model, output_folder + example + ".pth")
+
+    input_shapes = ""
+    input_types = ""
+
+    input_samples = module._dummy_
+    if isinstance(input_samples, torch.Tensor):
+        input_samples = [input_samples]
+    for inp_idx in range(len(input_samples)):
+        input_data = input_samples[inp_idx]
+
+        shape = input_data.shape
+        for dim in range(len(shape)):
+            input_shapes += str(shape[dim])
+            if dim != len(shape) - 1:
+                input_shapes += ","
+
+        if input_data.dtype == torch.bool:
+            input_types += "bool"
+        elif input_data.dtype == torch.uint8:
+            input_types += "uint8"
+        elif input_data.dtype == torch.int8:
+            input_types += "int8"
+        elif input_data.dtype == torch.int16:
+            input_types += "int16"
+        elif input_data.dtype == torch.int32:
+            input_types += "int32"
+        elif input_data.dtype == torch.int64:
+            input_types += "int16"
+        elif input_data.dtype == torch.float16:
+            input_types += "float32"
+        elif input_data.dtype == torch.float32:
+            input_types += "float32"
+        elif input_data.dtype == torch.float64:
+            input_types += "float64"
+        elif input_data.dtype == torch.complex64:
+            input_types += "complex64"
+        elif input_data.dtype == torch.complex128:
+            input_types += "complex128"
+        else:
+            raise ValueError('unsupported dtype')
+
+        if inp_idx != len(input_samples) - 1:
+            input_shapes += ":"
+            input_types += ","
+
+    with open(example + ".spec", "w") as spec_file:
+        print(input_shapes, file=spec_file)
+        print(input_types, file=spec_file)
+
+    print(" - Done")
diff --git a/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test b/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test
new file mode 100644
index 000000000..9892dbbed
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import .mar file.
+# .mar file contains python source of the model and serialized state_dict.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="mar_state_dict_model.circle"
+
+# run test
+one-import-pytorch --input_path=mar_state_dict_model.mar --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test b/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test
new file mode 100644
index 000000000..3ac38a42e
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import .mar file.
+# .mar file contains TorchScript.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="mar_torchscript_model.circle"
+
+# run test
+one-import-pytorch --input_path=mar_torchscript_model.mar --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh b/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh
new file mode 100644
index 000000000..5f38610d7
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+pushd $SCRIPT_PATH > /dev/null
+
+for test_case in examples/*; do
+  python3 example_generator.py $(basename ${test_case})
+done
+
+python3 aux_generator.py
+
+popd > /dev/null
diff --git a/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test b/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test
new file mode 100644
index 000000000..ecd2a8112
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import NN model from .py file and serialized state_dict file.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="state_dict_model.circle"
+
+# run test
+one-import-pytorch --input_path=state_dict_model.pth --python_path=aux_generator.py --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test b/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test
new file mode 100644
index 000000000..590e5b369
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import TorchScript file.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="torchscript_model.circle"
+
+# run test
+one-import-pytorch --input_path=torchscript_model.pth --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/rawdata2hdf5_001.test b/compiler/one-cmds/tests/rawdata2hdf5_001.test
new file mode 100644
index 000000000..ceefcf725
--- /dev/null
+++ b/compiler/one-cmds/tests/rawdata2hdf5_001.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="./output_testdata.h5"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+rawdata2hdf5 \
+--data_list datalist.txt \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/rawdata2hdf5_neg_001.test b/compiler/one-cmds/tests/rawdata2hdf5_neg_001.test
new file mode 100644
index 000000000..fb7803760
--- /dev/null
+++ b/compiler/one-cmds/tests/rawdata2hdf5_neg_001.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "No such file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./wronglist.txt"
+outputfile="./output_testdata.h5"
+
+rm -rf ${inputfile}
+rm -f ${filename}.log
+
+touch ${inputfile}
+echo "non-existing-file.data" >> ${inputfile}
+
+# run test
+rawdata2hdf5 \
+--data_list ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/rawdata2hdf5_neg_002.test b/compiler/one-cmds/tests/rawdata2hdf5_neg_002.test
new file mode 100644
index 000000000..7a9e231e3
--- /dev/null
+++ b/compiler/one-cmds/tests/rawdata2hdf5_neg_002.test
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "UnicodeDecodeError" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./output_testdata.h5"
+
+rm -f ${filename}.log
+
+# run test
+rawdata2hdf5 \
+--data_list ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/rawdata2hdf5_neg_003.test b/compiler/one-cmds/tests/rawdata2hdf5_neg_003.test
new file mode 100644
index 000000000..c69f935a4
--- /dev/null
+++ b/compiler/one-cmds/tests/rawdata2hdf5_neg_003.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "the following arguments are required: -l/--data_list" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="./output_testdata.h5"
+
+rm -f ${filename}.log
+
+# run test
+rawdata2hdf5 \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/rawdata2hdf5_neg_004.test b/compiler/one-cmds/tests/rawdata2hdf5_neg_004.test
new file mode 100644
index 000000000..df0620122
--- /dev/null
+++ b/compiler/one-cmds/tests/rawdata2hdf5_neg_004.test
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Unable to create file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="./non_existing_dir/output_testdata.h5"
+
+rm -f ${filename}.log
+
+# run test
+rawdata2hdf5 \
+--data_list datalist.txt \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/validate-onnx2circle/CMakeLists.txt b/compiler/one-cmds/validate-onnx2circle/CMakeLists.txt
new file mode 100644
index 000000000..6727359c9
--- /dev/null
+++ b/compiler/one-cmds/validate-onnx2circle/CMakeLists.txt
@@ -0,0 +1,5 @@
+install(FILES validate_onnx2circle.py
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
diff --git a/compiler/one-cmds/validate-onnx2circle/README.md b/compiler/one-cmds/validate-onnx2circle/README.md
new file mode 100644
index 000000000..341df3d87
--- /dev/null
+++ b/compiler/one-cmds/validate-onnx2circle/README.md
@@ -0,0 +1,36 @@
+# validate-onnx2circle
+
+_validate-onnx2circle_ provides validation of onnx to optimized circle conversion
+by comparing execution results of original onnx model and optimized circle model.
+
+This is currently in experimental state.
+
+## How to run the script
+
+Install `onnx-runtime` inside virtual environment
+```
+source install_path/bin/venv/bin/activate
+
+python -m pip --default-timeout=1000 --trusted-host pypi.org \
+  --trusted-host files.pythonhost.org install onnxruntime==1.6.0
+
+deactivate
+```
+
+Run the sctipt
+```bash
+cd install_path/test
+
+driver='one/build/debug/compiler/luci-eval-driver/luci_eval_driver'
+onnx_filepath='path_to_onnx_model.onnx'
+circle_filepath='path_to_optimized_circle.circle'
+
+./validate_onnx2circle.py --driver ${driver} --onnx ${onnx_filepath} --circle ${circle_filepath}
+```
+
+Output will show something like this
+```
+Run ONNX...
+Run luci-interpreter...
+Compare 0 True
+```
diff --git a/compiler/one-cmds/validate-onnx2circle/validate_onnx2circle.py b/compiler/one-cmds/validate-onnx2circle/validate_onnx2circle.py
new file mode 100644
index 000000000..eac2f6d35
--- /dev/null
+++ b/compiler/one-cmds/validate-onnx2circle/validate_onnx2circle.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"                  # '''
+''''export PY_PATH=${SCRIPT_PATH}/../bin/venv/bin/python                                # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE This is an experimental script to evaluate onnx-circle conversion
+#      by running onnxruntime and luci-interpreter.
+#      Plan is to run this regularly in CI
+
+import subprocess
+import argparse
+import numpy as np
+import torch
+import onnx
+import onnxruntime as ort
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--onnx', type=str, required=True)
+parser.add_argument('--circle', type=str, required=True)
+args = parser.parse_args()
+
+driver = args.driver
+onnx_filepath = args.onnx
+circle_filepath = args.circle
+
+
+def to_numpy(tensor):
+    return tensor.cpu().numpy()
+
+
+def to_nhwc(tensor):
+    if (tensor.ndim == 4):
+        return np.transpose(tensor, (0, 2, 3, 1))
+    return tensor
+
+
+class OnnxRunner:
+    def __init__(self, filepath):
+        self.filepath = filepath
+        self.session = None
+        self.inputs = None
+        self.inputs_size = None
+        self.inputs_data = None
+        self.outputs = None
+        self.outputs_size = None
+
+    def load(self):
+        model = onnx.load(self.filepath)
+        onnx.checker.check_model(model)
+        self.session = ort.InferenceSession(self.filepath)
+
+    def feed_random_inputs(self):
+        self.inputs = self.session.get_inputs()
+        self.inputs_size = len(self.inputs)
+        # reset input dictionary
+        self.inputs_data = {}
+        for in_idx in range(self.inputs_size):
+            input_shape = self.inputs[in_idx].shape
+            input_type = self.inputs[in_idx].type
+            if input_type == 'tensor(float)':
+                torch_type = torch.float32
+            else:
+                # TODO support other dtype
+                raise SystemExit("Unsupported input dtype")
+
+            x = torch.randn(input_shape, dtype=torch_type)
+            input_npa = to_numpy(x)
+            self.inputs_data.update({self.inputs[in_idx].name: input_npa})
+
+            # save NHWC form of input for luci-interpreter
+            input_npa_nhwc = to_nhwc(input_npa)
+            input_npa_nhwc.tofile(circle_filepath + ".input" + str(in_idx))
+
+    def run(self):
+        self.outs = self.session.run(None, self.inputs_data)
+
+    def get_outputs(self):
+        self.outputs = self.session.get_outputs()
+        self.outputs_size = len(self.outputs)
+
+
+# Run ONNX model
+print("Run ONNX...")
+onnx_runner = OnnxRunner(onnx_filepath)
+onnx_runner.load()
+onnx_runner.feed_random_inputs()
+onnx_runner.run()
+onnx_runner.get_outputs()
+
+# Execute luci interpreter
+print("Run luci-interpreter...")
+process = subprocess.run(
+    [
+        driver, circle_filepath,
+        str(onnx_runner.inputs_size), circle_filepath + ".input",
+        circle_filepath + ".output"
+    ],
+    check=True)
+
+# Compare results
+rtolerance = 1e-03
+atolerance = 1e-04
+result_compare = True
+for idx in range(onnx_runner.outputs_size):
+    output_shape = onnx_runner.outputs[idx].shape
+    output_type = onnx_runner.outputs[idx].type
+    if output_type == 'tensor(float)':
+        output_np_type = np.float32
+    else:
+        # TODO support other dtype
+        raise SystemExit("Unsupported output dtype")
+
+    # output of luci-interpreter
+    output_data = np.fromfile(circle_filepath + ".output" + str(idx), output_np_type)
+    shape_file = open(circle_filepath + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+    luci_output_data = np.reshape(output_data, output_shape)
+
+    # output of onnx runtime
+    output_nchw = onnx_runner.outs[idx]
+    output_nhwc = to_nhwc(output_nchw)
+
+    # diff has tensor of boolean for each values within tolerance or not
+    diff = np.isclose(output_nhwc, luci_output_data, rtol=rtolerance, atol=atolerance)
+    # get one boolean if all are True then True
+    result_compare_one = np.all(diff)
+    print("Compare", idx, result_compare_one)
+    if (not result_compare_one):
+        diff_val = np.subtract(output_nhwc, luci_output_data)
+        print("ONNX Result", output_nhwc)
+        print("Diff", diff_val)
+        print("Diff Max", np.ndarray.max(diff_val))
+
+    result_compare = result_compare and result_compare_one
+
+if (not result_compare):
+    exit(-1)
+
+exit(0)
diff --git a/compiler/onecc-docker/README.md b/compiler/onecc-docker/README.md
new file mode 100644
index 000000000..3d7aa89f0
--- /dev/null
+++ b/compiler/onecc-docker/README.md
@@ -0,0 +1,36 @@
+# onecc-docker
+
+_onecc-docker_ broadens ONE tools to be used in other platforms.
+
+## Description
+
+For now, ONE tools only support Ubuntu 18.04 and 20.04(not officially).
+So, it is difficult for people in different environments to use our tools without using ubuntu 18.04.
+
+To overcome this limitation, we provide _onecc-docker_ that runs using a Docker so that users can use ONE tools more widely.
+
+This tool aims at the following objectives.
+
+- Unsupported Ubuntu OS supports ONE tools
+- Install and use ONE tools lightly and quickly using Docker
+
+## Requirements
+
+- Any Linux distribution
+- Docker
+    - Requires root privileges.
+           - _onecc-docker_ requires the current `user ID` to be included in the `Docker group` because it requires the Docker-related commands to be executed without `sudo` privileges.
+             - See "[Post-installation steps for Linux](https://docs.docker.com/engine/install/linux-postinstall/)"
+- Python 3.8
+  - requests
+
+## Note
+
+_onecc-docker_ is currently in incubation stage.
+
+The onecc-docker debian package should be created with one-compiler debian package when ONE
+compiler project builds. To this end, it is correct to configure the onecc-docker debian codes in
+./infra/debian/compiler directory. However, we are currently working on the code, so we will
+temporarily implement it in this location.
+
+TODO: Merge this debian directory into ./infra/debian/compiler code.
diff --git a/compiler/onecc-docker/debian/changelog b/compiler/onecc-docker/debian/changelog
new file mode 100644
index 000000000..501d0ec33
--- /dev/null
+++ b/compiler/onecc-docker/debian/changelog
@@ -0,0 +1,6 @@
+onecc-docker (0.1.0) bionic; urgency=medium
+
+  * Introduce onecc-docker
+
+ -- Seunghui Lee <dltmdgml456654@gmail.com>  Wed, 23 Sep 2022 12:00:00 +0900
+
diff --git a/compiler/onecc-docker/debian/compat b/compiler/onecc-docker/debian/compat
new file mode 100644
index 000000000..ec635144f
--- /dev/null
+++ b/compiler/onecc-docker/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/compiler/onecc-docker/debian/control b/compiler/onecc-docker/debian/control
new file mode 100644
index 000000000..4687d105e
--- /dev/null
+++ b/compiler/onecc-docker/debian/control
@@ -0,0 +1,13 @@
+Source: onecc-docker
+Section: devel
+Priority: extra
+Maintainer: Neural Network Acceleration Solution Developers <nnfw@samsung.com>
+Build-Depends: debhelper (>=9)
+Standards-Version: 4.5.1
+Homepage: https://github.com/Samsung/ONE
+
+Package: onecc-docker
+Architecture: amd64
+Multi-Arch: foreign
+Depends: ${misc:Depends}, ${shlibs:Depends}, python3.8
+Description: On-device Neural Engine docker package
diff --git a/compiler/onecc-docker/debian/copyright b/compiler/onecc-docker/debian/copyright
new file mode 100644
index 000000000..837bb7d28
--- /dev/null
+++ b/compiler/onecc-docker/debian/copyright
@@ -0,0 +1,3 @@
+Files: *
+License: Proprietary
+Copyright (c) <2022> <Samsung Electronics Co.,Ltd.>
diff --git a/compiler/onecc-docker/debian/onecc-docker.install b/compiler/onecc-docker/debian/onecc-docker.install
new file mode 100644
index 000000000..403625357
--- /dev/null
+++ b/compiler/onecc-docker/debian/onecc-docker.install
@@ -0,0 +1,2 @@
+compiler/onecc-docker/onecc-docker /usr/share/one/bin/
+compiler/onecc-docker/docker/Dockerfile /usr/share/one/bin/docker/
diff --git a/compiler/onecc-docker/debian/onecc-docker.links b/compiler/onecc-docker/debian/onecc-docker.links
new file mode 100644
index 000000000..2374663e9
--- /dev/null
+++ b/compiler/onecc-docker/debian/onecc-docker.links
@@ -0,0 +1 @@
+/usr/share/one/bin/onecc-docker /usr/bin/onecc-docker
diff --git a/compiler/onecc-docker/debian/rules b/compiler/onecc-docker/debian/rules
new file mode 100644
index 000000000..cfd26cfd7
--- /dev/null
+++ b/compiler/onecc-docker/debian/rules
@@ -0,0 +1,8 @@
+#!/usr/bin/make -f
+
+%:
+	dh $@
+
+override_dh_fixperms:
+	dh_fixperms
+	chmod +x debian/onecc-docker/usr/share/one/bin/onecc-docker
diff --git a/compiler/onecc-docker/docker/Dockerfile b/compiler/onecc-docker/docker/Dockerfile
new file mode 100644
index 000000000..ae912795a
--- /dev/null
+++ b/compiler/onecc-docker/docker/Dockerfile
@@ -0,0 +1,26 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved 
+# 
+# Licensed under the Apache License, Version 2.0 (the "License"); 
+# you may not use this file except in compliance with the License. 
+# You may obtain a copy of the License at 
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0 
+# 
+# Unless required by applicable law or agreed to in writing, software 
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+# See the License for the specific language governing permissions and 
+# limitations under the License 
+
+FROM ubuntu:18.04 
+
+ARG VERSION
+
+RUN apt-get update && apt-get install -qqy --no-install-recommends \ 
+    wget \
+    ca-certificates \ 
+    && wget --no-check-certificate https://github.com/Samsung/ONE/releases/download/${VERSION}/one-compiler-bionic_${VERSION}_amd64.deb \ 
+    && apt-get install -y ./one-compiler-bionic_${VERSION}_amd64.deb \ 
+    && rm -rf /var/lib/apt/lists/* 
+ 
+ENTRYPOINT ["onecc"]
diff --git a/compiler/onecc-docker/onecc-docker b/compiler/onecc-docker/onecc-docker
new file mode 100644
index 000000000..c68c7f1f3
--- /dev/null
+++ b/compiler/onecc-docker/onecc-docker
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import subprocess
+import requests
+import os
+import argparse
+import re
+
+
+class RequestHandler:
+    def __init__(self, token=None, timeout=None):
+        if token:
+            self.headers = {"Authorization": "Bearer {}".format(token)}
+        else:
+            self.headers = {}
+        self.timeout = timeout or 5
+
+    def make_request(self, url):
+        try:
+            response = requests.get(url, headers=self.headers, timeout=self.timeout)
+            response.raise_for_status()
+            return response
+        except requests.RequestException as e:
+            raise SystemExit('[onecc-docker] error: {}'.format(e))
+
+
+# 5 sec timeout is set based on github.com/Samsung/ONE/issues/11134
+def _request_recent_version(token=None):
+    response = RequestHandler(
+        token,
+        timeout=5).make_request(url="https://api.github.com/repos/Samsung/ONE/releases")
+    versions = [release_item["tag_name"] for release_item in response.json()]
+
+    for version in versions:
+        # Return the latest version with the given format
+        # to filter out such as 'onert-micro-0.1.0' release
+        # which doesn't contain onecc package.
+        if bool(re.match(r'^\d+\.\d+\.\d+$', version)):
+            return version
+
+    raise SystemExit('[onecc-docker] Failed to get latest onecc version')
+
+
+# 10 sec timeout is set based on github.com/Samsung/ONE/issues/11134
+def _run(cmd, is_shell=False, timeout=10):
+    result = subprocess.Popen(
+        cmd, shell=is_shell, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    stdout, stderr = result.communicate(timeout=timeout)
+    stdout = stdout.decode('utf-8')
+    stderr = stderr.decode('utf-8')
+
+    if result.returncode:
+        print(stderr, end='')
+        exit(result.returncode)
+    else:
+        return stdout
+
+
+def _image_exists(name):
+    cmd = ['docker', 'images', '-q', name]
+    lines = _run(cmd).splitlines()
+    return lines
+
+
+def main():
+    script_path = os.path.dirname(os.path.realpath(__file__))
+    dockerfile_path = os.path.join(script_path, 'docker')
+
+    onecc_docker_usage = 'onecc-docker [-h] [-t TOKEN] [COMMAND <args>]'
+    onecc_docker_desc = 'Run onecc via docker'
+    parser = argparse.ArgumentParser(
+        usage=onecc_docker_usage, description=onecc_docker_desc)
+    parser.add_argument(
+        "-t",
+        "--token",
+        help=
+        "Token for authentication to GitHub. This is a workaround for Rate limit exceeded error"
+    )
+
+    args, onecc_arguments = parser.parse_known_args()
+    authorization_token = args.token
+
+    recent_version = _request_recent_version(authorization_token)
+    image_name = f"onecc:{recent_version}"
+    build_arg = f"VERSION={recent_version}"
+
+    if not _image_exists(image_name):
+        build_cmd = [
+            "docker", "build", "-t", image_name, "--build-arg", build_arg, dockerfile_path
+        ]
+        print('[onecc-docker] Build docker image ...')
+        _run(build_cmd, timeout=30)
+        print('[onecc-docker] Docker image is built successfully.')
+
+    contianer_name = f"onecc_{recent_version.replace('.','_')}"
+    user_cmd = ' '.join(onecc_arguments)
+
+    run_cmd = [
+        "docker", "run", "--rm", "-u", "$(id -u):$(id -g)", "--name", contianer_name,
+        "-v", "${HOME}:${HOME}", "-e", "HOME=${HOME}", "-w", "${PWD}", image_name,
+        user_cmd
+    ]
+
+    cmd = ' '.join(run_cmd)
+    output = _run(cmd, is_shell=True)
+    print(output, end='')
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        prog_name = os.path.basename(__file__)
+        print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+        sys.exit(255)
diff --git a/compiler/oneco/CMakeLists.txt b/compiler/oneco/CMakeLists.txt
index 73bc57d43..951194d9d 100644
--- a/compiler/oneco/CMakeLists.txt
+++ b/compiler/oneco/CMakeLists.txt
@@ -1,5 +1,5 @@
 nnas_find_package(Protobuf QUIET)
-nnas_find_package(ONNXSource EXACT 1.4.1 QUIET)
+nnas_find_package(ONNXSource EXACT 1.6.0 QUIET)
 
 if(NOT Protobuf_FOUND)
   return()
@@ -20,14 +20,13 @@ target_include_directories(moco_onnx_frontend PRIVATE src)
 target_include_directories(moco_onnx_frontend PUBLIC include)
 target_link_libraries(moco_onnx_frontend PUBLIC moco_onnx_proto)
 target_link_libraries(moco_onnx_frontend PUBLIC loco)
-target_link_libraries(moco_onnx_frontend PRIVATE stdex)
 target_link_libraries(moco_onnx_frontend PRIVATE cwrap)
 
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
   return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
 
 add_executable(moco_onnx_frontend_test ${TESTS})
 target_include_directories(moco_onnx_frontend_test PRIVATE src)
diff --git a/compiler/oneco/requires.cmake b/compiler/oneco/requires.cmake
index 4e99b0eac..c11a84d9c 100644
--- a/compiler/oneco/requires.cmake
+++ b/compiler/oneco/requires.cmake
@@ -1,3 +1,2 @@
-require("stdex")
 require("loco")
 require("cwrap")
diff --git a/compiler/oneco/src/Frontend.cpp b/compiler/oneco/src/Frontend.cpp
index d633c1c2e..4b1554ee8 100644
--- a/compiler/oneco/src/Frontend.cpp
+++ b/compiler/oneco/src/Frontend.cpp
@@ -76,8 +76,8 @@ void load_onnx(const std::string &path, moco::onnx::Frontend::FileType type,
 // TODO Make comments clear
 void convert_graph(::onnx::ModelProto &onnx_model_proto, loco::Graph *graph)
 {
-  auto nodes = stdex::make_unique<moco::onnx::SymbolTable>();
-  auto input_names = stdex::make_unique<moco::onnx::SymbolTable>();
+  auto nodes = std::make_unique<moco::onnx::SymbolTable>();
+  auto input_names = std::make_unique<moco::onnx::SymbolTable>();
 
   moco::onnx::GraphBuilderContext gb_context(graph, nodes.get(), input_names.get());
 
diff --git a/compiler/oneco/src/GraphBuilder.h b/compiler/oneco/src/GraphBuilder.h
index 7271eb81a..7e463ce9a 100644
--- a/compiler/oneco/src/GraphBuilder.h
+++ b/compiler/oneco/src/GraphBuilder.h
@@ -27,9 +27,9 @@ namespace onnx
 {
 
 /**
-* @brief Parent class of onnx operation graph builders
-* @note GraphBuilder call proper build and validate function according to opset version
-*/
+ * @brief Parent class of onnx operation graph builders
+ * @note GraphBuilder call proper build and validate function according to opset version
+ */
 class GraphBuilder
 {
 public:
diff --git a/compiler/oneco/src/GraphBuilderContext.h b/compiler/oneco/src/GraphBuilderContext.h
index f1f394b50..dd368e335 100644
--- a/compiler/oneco/src/GraphBuilderContext.h
+++ b/compiler/oneco/src/GraphBuilderContext.h
@@ -69,13 +69,13 @@ private:
 };
 
 /**
-* @brief Class to store context to build IR from onnx
-*/
+ * @brief Class to store context to build IR from onnx
+ */
 class GraphBuilderContext
 {
 public:
   GraphBuilderContext(loco::Graph *g, SymbolTable *nodes, SymbolTable *input_names)
-      : _g(g), _nodes(nodes), _input_names(input_names)
+    : _g(g), _nodes(nodes), _input_names(input_names)
   {
     // DO NOTHING
   }
diff --git a/compiler/oneco/src/GraphBuilderRegistry.h b/compiler/oneco/src/GraphBuilderRegistry.h
index 1bf4d9514..863a6ee3a 100644
--- a/compiler/oneco/src/GraphBuilderRegistry.h
+++ b/compiler/oneco/src/GraphBuilderRegistry.h
@@ -27,15 +27,15 @@ namespace onnx
 {
 
 /**
-* @brief Class to return graph builder for passed onnx Operator
-*/
+ * @brief Class to return graph builder for passed onnx Operator
+ */
 class GraphBuilderRegistry
 {
 public:
   /**
-  * @brief Returns registered GraphBuilder pointer for operator or
-  *        nullptr if not registered
-  */
+   * @brief Returns registered GraphBuilder pointer for operator or
+   *        nullptr if not registered
+   */
   const GraphBuilder *lookup(const std::string &op) const
   {
     if (_builder_map.find(op) == _builder_map.end())
@@ -63,16 +63,16 @@ private:
 } // namespace onnx
 } // namespace moco
 
-#include <stdex/Memory.h>
+#include <memory>
 
-#define REGISTER_OP_BUILDER(NAME, BUILDER)                                                    \
-  namespace                                                                                   \
-  {                                                                                           \
-  __attribute__((constructor)) void reg_op(void)                                              \
-  {                                                                                           \
-    std::unique_ptr<moco::onnx::BUILDER> builder = stdex::make_unique<moco::onnx::BUILDER>(); \
-    moco::onnx::GraphBuilderRegistry::get().add(#NAME, std::move(builder));                   \
-  }                                                                                           \
+#define REGISTER_OP_BUILDER(NAME, BUILDER)                                                  \
+  namespace                                                                                 \
+  {                                                                                         \
+  __attribute__((constructor)) void reg_op(void)                                            \
+  {                                                                                         \
+    std::unique_ptr<moco::onnx::BUILDER> builder = std::make_unique<moco::onnx::BUILDER>(); \
+    moco::onnx::GraphBuilderRegistry::get().add(#NAME, std::move(builder));                 \
+  }                                                                                         \
   }
 
 #endif // __MOCO_FRONTEND_ONNX_GRAPH_BUILDER_REGISTRY_H__
diff --git a/compiler/oneco/src/Op/Constant.h b/compiler/oneco/src/Op/Constant.h
index e25441d58..be74cfcdd 100644
--- a/compiler/oneco/src/Op/Constant.h
+++ b/compiler/oneco/src/Op/Constant.h
@@ -24,8 +24,8 @@ namespace onnx
 {
 
 /**
-  * @brief GraphBuilder for Constant(since version 1) node
-  */
+ * @brief GraphBuilder for Constant(since version 1) node
+ */
 class Constant_V1
 {
 public:
@@ -34,10 +34,10 @@ public:
 };
 
 /**
-  * @brief GraphBuilder for Constant(since version 9) node
-  * @note Until version 1, only FLOAT16, FLOAT, DOUBLE was supported
-  *       Since version 9, all types are supported
-  */
+ * @brief GraphBuilder for Constant(since version 9) node
+ * @note Until version 1, only FLOAT16, FLOAT, DOUBLE was supported
+ *       Since version 9, all types are supported
+ */
 class Constant_V9
 {
 public:
@@ -46,8 +46,8 @@ public:
 };
 
 /**
-  * @brief GraphBuilder for Constant node
-  */
+ * @brief GraphBuilder for Constant node
+ */
 class ConstantGraphBuilder : public GraphBuilder
 {
 public:
diff --git a/compiler/oneco/src/Op/Identity.h b/compiler/oneco/src/Op/Identity.h
index 41367bea0..dde614592 100644
--- a/compiler/oneco/src/Op/Identity.h
+++ b/compiler/oneco/src/Op/Identity.h
@@ -24,8 +24,8 @@ namespace onnx
 {
 
 /**
-  * @brief GraphBuilder for Identity(since version 1) node
-  */
+ * @brief GraphBuilder for Identity(since version 1) node
+ */
 class Identity_V1
 {
 public:
@@ -34,8 +34,8 @@ public:
 };
 
 /**
-  * @brief GraphBuilder for Identity node
-  */
+ * @brief GraphBuilder for Identity node
+ */
 class IdentityGraphBuilder : public GraphBuilder
 {
 public:
diff --git a/compiler/onnx-tools/CMakeLists.txt b/compiler/onnx-tools/CMakeLists.txt
new file mode 100644
index 000000000..5935cdfbe
--- /dev/null
+++ b/compiler/onnx-tools/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(ONNX_TOOL_FILES
+    onnx-dump.py
+    onnx-ops.py
+)
+
+foreach(ONNX_TOOL IN ITEMS ${ONNX_TOOL_FILES})
+
+  set(ONNX_TOOL_FILE ${ONNX_TOOL})
+  set(ONNX_TOOL_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONNX_TOOL_FILE}")
+  set(ONNX_TOOL_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONNX_TOOL_FILE}")
+  set(ONNX_TOOL_TARGET "${ONNX_TOOL}_target")
+
+  add_custom_command(OUTPUT ${ONNX_TOOL_BIN}
+    COMMAND ${CMAKE_COMMAND} -E copy "${ONNX_TOOL_SRC}" "${ONNX_TOOL_BIN}"
+    DEPENDS ${ONNX_TOOL_SRC}
+    COMMENT "Generate ${ONNX_TOOL_BIN}"
+  )
+
+  add_custom_target(${ONNX_TOOL_TARGET} ALL DEPENDS ${ONNX_TOOL_BIN})
+
+  install(FILES ${ONNX_TOOL_BIN}
+          PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                      GROUP_READ GROUP_EXECUTE
+                      WORLD_READ WORLD_EXECUTE
+          DESTINATION bin)
+
+endforeach(ONNX_TOOL)
diff --git a/compiler/onnx-tools/README.md b/compiler/onnx-tools/README.md
new file mode 100644
index 000000000..f1b886132
--- /dev/null
+++ b/compiler/onnx-tools/README.md
@@ -0,0 +1,65 @@
+# onnx-tools
+
+_onnx-tools_ provides developer tools to support ONNX format in compiler frontend.
+
+## onnx-dump.py
+
+Use `onnx-dump.py` to dump ONNX model graph in human readable text format.
+
+For example,
+
+```
+[General] -----------------------------
+IR version = 6
+Producer   = pytorch 1.6
+
+[Operators] ---------------------------
+    3 Conv
+    3 Relu
+...
+
+[Initializers] ------------------------
+"0.bias"        FLOAT [16]
+"0.weight"      FLOAT [16, 1, 3, 3]
+...
+
+[Nodes] -------------------------------
+Conv("Conv_0")
+    A dilations: [1, 1], group: 1, kernel_shape: [3, 3], pads: [1, 1, 1, 1], strides: [2, 2]
+    I "input.1"
+    I "0.weight"
+    I "0.bias"
+    O "7"
+Relu("Relu_1")
+    I "7"
+    O "8"
+...
+
+[Graph Input/Output]-------------------
+    I: "input.1"       FLOAT [1, 1, 28, 28]
+    O: "21"            FLOAT [1, 10]
+```
+
+In `[Nodes]` section, `A` is for attributes for the node, `I` for input name and `O` for output name.
+
+`I` and `O` also applies to `[Graph Input/Output]` section.
+
+## onnx-ops.py
+
+Use `onnx-ops.py` to dump ONNX model operators.
+
+You can use with other command line tools to analyze operators in the model file.
+
+For example,
+```bash
+$ python onnx-ops.py mymodel.onnx | sort | uniq -c
+      1 Concat
+      1 Constant
+      3 Conv
+      1 Gather
+      1 GlobalAveragePool
+      3 Relu
+      1 Reshape
+      1 Shape
+      1 Unsqueeze
+```
diff --git a/compiler/onnx-tools/onnx-dump.py b/compiler/onnx-tools/onnx-dump.py
new file mode 100644
index 000000000..4f169cbe9
--- /dev/null
+++ b/compiler/onnx-tools/onnx-dump.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+import os
+import sys
+
+from onnx import AttributeProto, TensorProto
+from onnx import numpy_helper
+from onnx import helper
+
+
+def _data_type_str(data_type):
+    return TensorProto.DataType.Name(data_type)
+
+
+def _get_attribute_value(attr):
+    if attr.type == AttributeProto.TENSOR:
+        return "{}, {}".format(
+            _data_type_str(attr.t.data_type), numpy_helper.to_array(attr.t))
+    if attr.type == AttributeProto.GRAPH:
+        # TODO revise when graph node is available
+        return "<graph>"
+    if attr.type == AttributeProto.TENSORS:
+        # TODO revise to see contents
+        return "<tensors>..."
+    if attr.type == AttributeProto.GRAPHS:
+        # TODO revise when graph node is available
+        return "<graphs>..."
+    return helper.get_attribute_value(attr)
+
+
+def _dump_header(onnx_model):
+    print("[General] -----------------------------")
+    print("IR version =", onnx_model.ir_version)
+    print("Producer   =", onnx_model.producer_name, onnx_model.producer_version)
+    print("")
+
+
+def _dump_operators(onnx_model):
+    opcodes_dict = dict()
+    for node in onnx_model.graph.node:
+        if node.op_type in opcodes_dict:
+            opcodes_dict[node.op_type] = opcodes_dict[node.op_type] + 1
+        else:
+            opcodes_dict[node.op_type] = 1
+
+    print("[Operators] ---------------------------")
+    for opcode_key in opcodes_dict:
+        print("{:>5} {}".format(opcodes_dict[opcode_key], opcode_key))
+
+    print("")
+
+
+def _dump_initializers(onnx_model):
+    print("[Initializers] ------------------------")
+    for initializer in onnx_model.graph.initializer:
+        init_name = '"{}"'.format(initializer.name)
+        dtstr = _data_type_str(initializer.data_type)
+        print('{:<15} {} {}'.format(init_name, dtstr, initializer.dims))
+
+    print("")
+
+
+def _dump_nodes(onnx_model):
+    print("[Nodes] -------------------------------")
+
+    for node in onnx_model.graph.node:
+        print('{0}("{1}")'.format(node.op_type, node.name))
+
+        attribute = ''
+        for attr in node.attribute:
+            if attribute != '':
+                attribute += ', '
+            attribute += "{}: {}".format(attr.name, _get_attribute_value(attr))
+
+        if attribute != '':
+            print('    A {0}'.format(attribute))
+
+        for inp in node.input:
+            print('    I "{0}"'.format(inp))
+        for out in node.output:
+            print('    O "{0}"'.format(out))
+
+    print("")
+
+
+def _dump_inputoutputs(onnx_model):
+    print("[Graph Input/Output]-------------------")
+    for mod_input in onnx_model.graph.input:
+        io_name = '"{}"'.format(mod_input.name)
+        dtstr = _data_type_str(mod_input.type.tensor_type.elem_type)
+        shape = mod_input.type.tensor_type.shape
+        input_shape = [dim.dim_value for dim in shape.dim]
+        print('    I: {:<15} {} {}'.format(io_name, dtstr, input_shape))
+
+    for mod_output in onnx_model.graph.output:
+        io_name = '"{}"'.format(mod_output.name)
+        dtstr = _data_type_str(mod_output.type.tensor_type.elem_type)
+        shape = mod_output.type.tensor_type.shape
+        output_shape = [dim.dim_value for dim in shape.dim]
+        print('    O: {:<15} {} {}'.format(io_name, dtstr, output_shape))
+
+    print("")
+
+
+def _dump_graph(onnx_model):
+    _dump_header(onnx_model)
+    _dump_operators(onnx_model)
+    _dump_initializers(onnx_model)
+    _dump_nodes(onnx_model)
+    _dump_inputoutputs(onnx_model)
+
+
+def _help_exit(cmd_name):
+    print('Dump ONNX model file Graph')
+    print('Usage: {0} [onnx_path]'.format(cmd_name))
+    print('')
+    exit()
+
+
+def main():
+    if len(sys.argv) < 2:
+        _help_exit(os.path.basename(sys.argv[0]))
+
+    onnx_model = onnx.load(sys.argv[1])
+    onnx.checker.check_model(onnx_model)
+
+    _dump_graph(onnx_model)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/compiler/onnx-tools/onnx-ops.py b/compiler/onnx-tools/onnx-ops.py
new file mode 100644
index 000000000..5292dc70e
--- /dev/null
+++ b/compiler/onnx-tools/onnx-ops.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+import os
+import sys
+
+
+def _dump_operators(onnx_model):
+    for node in onnx_model.graph.node:
+        print(node.op_type)
+
+
+def _help_exit(cmd_name):
+    print('Dump ONNX model file Operators')
+    print('Usage: {0} [onnx_path]'.format(cmd_name))
+    print('')
+    exit()
+
+
+def main():
+    if len(sys.argv) < 2:
+        _help_exit(os.path.basename(sys.argv[0]))
+
+    onnx_model = onnx.load(sys.argv[1])
+    onnx.checker.check_model(onnx_model)
+
+    _dump_operators(onnx_model)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/compiler/onnx2circle/CMakeLists.txt b/compiler/onnx2circle/CMakeLists.txt
index a0d393bd9..1a5a7e093 100644
--- a/compiler/onnx2circle/CMakeLists.txt
+++ b/compiler/onnx2circle/CMakeLists.txt
@@ -20,7 +20,6 @@ target_link_libraries(onnx2circle PRIVATE moco_log)
 target_link_libraries(onnx2circle PRIVATE exo)
 target_link_libraries(onnx2circle PRIVATE locop)
 target_link_libraries(onnx2circle PRIVATE hermes_std)
-target_link_libraries(onnx2circle PRIVATE stdex)
 target_link_libraries(onnx2circle PRIVATE angkor cwrap)
 target_link_libraries(onnx2circle PRIVATE mir2loco)
 target_link_libraries(onnx2circle PRIVATE mir_onnx_importer)
diff --git a/compiler/onnx2circle/requires.cmake b/compiler/onnx2circle/requires.cmake
index f52e40416..b2268ec8b 100644
--- a/compiler/onnx2circle/requires.cmake
+++ b/compiler/onnx2circle/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
 require("hermes-std")
 require("mir2loco")
 require("mir")
diff --git a/compiler/onnx2circle/src/onnx2circle.cpp b/compiler/onnx2circle/src/onnx2circle.cpp
index c329ed3d5..1c03fa1fe 100644
--- a/compiler/onnx2circle/src/onnx2circle.cpp
+++ b/compiler/onnx2circle/src/onnx2circle.cpp
@@ -25,10 +25,8 @@
 #include "hermes/ConsoleReporter.h"
 #include "hermes/EnvConfig.h"
 
-#include "stdex/Memory.h"
-
 #include <cassert>
-
+#include <memory>
 #include <iostream>
 #include <stdexcept>
 #include <string>
@@ -56,8 +54,8 @@ struct LoggingContext
     if (ctx == nullptr)
     {
       ctx = new hermes::Context;
-      ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-      ctx->config(stdex::make_unique<EnvConfig>("ONNX2CIRCLE_Log"));
+      ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+      ctx->config(std::make_unique<EnvConfig>("ONNX2CIRCLE_Log"));
     }
 
     return ctx;
@@ -81,7 +79,7 @@ int main(int argc, char **argv)
   using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
 
   // This line allows users to control all the exo-circle loggers via ONNX2CIRCLE_Log_Backend
-  exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("ONNX2CIRCLE_Log_Backend"));
+  exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("ONNX2CIRCLE_Log_Backend"));
 
   LOGGER(l);
 
diff --git a/compiler/onnxkit/CMakeLists.txt b/compiler/onnxkit/CMakeLists.txt
index 18f1ed423..9ccc779a8 100644
--- a/compiler/onnxkit/CMakeLists.txt
+++ b/compiler/onnxkit/CMakeLists.txt
@@ -1,5 +1,5 @@
 nnas_find_package(Protobuf QUIET)
-nnas_find_package(ONNXSource EXACT 1.4.1 QUIET)
+nnas_find_package(ONNXSource EXACT 1.6.0 QUIET)
 
 if(NOT Protobuf_FOUND)
   return()
@@ -24,7 +24,6 @@ target_include_directories(onnxkitproto PUBLIC ${ONNX_PROTO_INCLUDE_DIRS})
 target_link_libraries(onnxkitproto PUBLIC libprotobuf)
 
 add_executable(onnxkit ${SOURCES})
-target_link_libraries(onnxkit PRIVATE stdex)
 target_link_libraries(onnxkit PRIVATE cli)
 target_link_libraries(onnxkit PRIVATE onnxkitproto)
 target_link_libraries(onnxkit PRIVATE nncc_common)
diff --git a/compiler/onnxkit/README.md b/compiler/onnxkit/README.md
index d2066cf65..0a863950e 100644
--- a/compiler/onnxkit/README.md
+++ b/compiler/onnxkit/README.md
@@ -58,4 +58,3 @@ nncc$ cat decoded.pbtxt | path_to_onnxkit/onnxkit encode > encoded.pb
 - onnx
 - Protobuf
 - cli
-- stdex
diff --git a/compiler/onnxkit/src/Main.cpp b/compiler/onnxkit/src/Main.cpp
index 3dfd580ec..f97590f7d 100644
--- a/compiler/onnxkit/src/Main.cpp
+++ b/compiler/onnxkit/src/Main.cpp
@@ -18,14 +18,15 @@
 #include "DecodeCommand.hpp"
 
 #include <cli/App.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 int main(int argc, char **argv)
 {
   cli::App app{argv[0]};
 
-  app.insert("encode", stdex::make_unique<EncodeCommand>());
-  app.insert("decode", stdex::make_unique<DecodeCommand>());
+  app.insert("encode", std::make_unique<EncodeCommand>());
+  app.insert("decode", std::make_unique<DecodeCommand>());
 
   return app.run(argc - 1, argv + 1);
 }
diff --git a/compiler/onnxkit/src/Support.cpp b/compiler/onnxkit/src/Support.cpp
index 8c0774175..7740fac6c 100644
--- a/compiler/onnxkit/src/Support.cpp
+++ b/compiler/onnxkit/src/Support.cpp
@@ -16,8 +16,7 @@
 
 #include "Support.hpp"
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <cassert>
 #include <fstream>
 #include <stdexcept>
@@ -33,10 +32,10 @@ std::unique_ptr<T> open_fstream(const std::string &path, std::ios_base::openmode
     return nullptr;
   }
 
-  auto stream = stdex::make_unique<T>(path.c_str(), mode);
+  auto stream = std::make_unique<T>(path.c_str(), mode);
   if (!stream->is_open())
   {
-    throw std::runtime_error{"ERROR: Failed to open " + path};
+    throw std::runtime_error{"Failed to open " + path};
   }
   return stream;
 }
@@ -61,7 +60,7 @@ std::string Cmdline::get_or(unsigned int index, const std::string &s) const
 
 std::unique_ptr<UI> make_ui(const Cmdline &cmdargs)
 {
-  auto iocfg = stdex::make_unique<UI>();
+  auto iocfg = std::make_unique<UI>();
 
   auto in = open_fstream<std::ifstream>(cmdargs.get_or(0, "-"), std::ios::in | std::ios::binary);
   iocfg->in(std::move(in));
diff --git a/compiler/oops/CMakeLists.txt b/compiler/oops/CMakeLists.txt
index f12572d54..5cc115598 100644
--- a/compiler/oops/CMakeLists.txt
+++ b/compiler/oops/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_library(oops INTERFACE)
 target_include_directories(oops INTERFACE include)
 target_link_libraries(oops INTERFACE pepper_str)
+target_link_libraries(oops INTERFACE nncc_coverage)
 
 if(NOT ENABLE_TEST)
   return()
@@ -8,5 +9,5 @@ endif(NOT ENABLE_TEST)
 
 nnas_find_package(GTest REQUIRED)
 
-GTest_AddTest(oops_test test.cpp)
+GTest_AddTest(oops_test src/oops.test.cpp)
 target_link_libraries(oops_test oops)
diff --git a/compiler/oops/include/oops/InternalExn.h b/compiler/oops/include/oops/InternalExn.h
index 0e11085c0..5da3277b7 100644
--- a/compiler/oops/include/oops/InternalExn.h
+++ b/compiler/oops/include/oops/InternalExn.h
@@ -40,20 +40,20 @@ class InternalExn : public std::exception
 {
 public:
   InternalExn(const char *filename, const int line, const std::string &msg)
-      : _filename(filename), _line(line), _msg(msg)
+    : _filename(filename), _line(to_uint32(line)), _msg(msg)
   {
     construct_full_msg();
   }
 
   explicit InternalExn(const char *filename, const int line, const std::string &msg, uint32_t val)
-      : _filename(filename), _line(line), _msg(msg + ": " + std::to_string(val))
+    : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + std::to_string(val))
   {
     construct_full_msg();
   }
 
   explicit InternalExn(const char *filename, const int line, const std::string &msg,
                        const std::string &val)
-      : _filename(filename), _line(line), _msg(msg + ": " + val)
+    : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + val)
   {
     construct_full_msg();
   }
@@ -69,7 +69,7 @@ private:
   void construct_full_msg()
   {
     _full_msg =
-        "Internal Exception. " + _msg + " [" + _filename + ":" + std::to_string(_line) + "]";
+      "Internal Exception. " + _msg + " [" + _filename + ":" + std::to_string(_line) + "]";
   }
 
   std::string _full_msg;
diff --git a/compiler/oops/include/oops/UserExn.h b/compiler/oops/include/oops/UserExn.h
index d0138322d..84a6b81eb 100644
--- a/compiler/oops/include/oops/UserExn.h
+++ b/compiler/oops/include/oops/UserExn.h
@@ -72,7 +72,9 @@ private:
     out << pepper::str(attr, " = ", val);
   }
 
-  void build_info(std::stringstream &) { /* empty */}
+  void build_info(std::stringstream &)
+  { /* empty */
+  }
 
   // when only one info of string is provided
   void build_info(std::stringstream &out, const std::string &val) { out << val; }
diff --git a/compiler/oops/requires.cmake b/compiler/oops/requires.cmake
new file mode 100644
index 000000000..f68ab1638
--- /dev/null
+++ b/compiler/oops/requires.cmake
@@ -0,0 +1 @@
+require("pepper-str")
diff --git a/compiler/oops/test.cpp b/compiler/oops/src/oops.test.cpp
index 666f62f54..666f62f54 100644
--- a/compiler/oops/test.cpp
+++ b/compiler/oops/src/oops.test.cpp
diff --git a/compiler/pepper-csv2vec/CMakeLists.txt b/compiler/pepper-csv2vec/CMakeLists.txt
new file mode 100644
index 000000000..8c739aad9
--- /dev/null
+++ b/compiler/pepper-csv2vec/CMakeLists.txt
@@ -0,0 +1,19 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(pepper_csv2vec STATIC ${SOURCES})
+set_target_properties(pepper_csv2vec PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(pepper_csv2vec PUBLIC include)
+target_link_libraries(pepper_csv2vec PRIVATE nncc_common)
+target_link_libraries(pepper_csv2vec PUBLIC nncc_coverage)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# Google Test is mandatory for test
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(pepper_csv2vec_test ${TESTS})
+target_link_libraries(pepper_csv2vec_test pepper_csv2vec)
diff --git a/compiler/pepper-csv2vec/README.md b/compiler/pepper-csv2vec/README.md
new file mode 100644
index 000000000..a42cb0b0e
--- /dev/null
+++ b/compiler/pepper-csv2vec/README.md
@@ -0,0 +1,3 @@
+# pepper-csv2vec
+
+Returns `std::vector<T>` from CSV format string input.
diff --git a/compiler/pepper-csv2vec/include/pepper/csv2vec.h b/compiler/pepper-csv2vec/include/pepper/csv2vec.h
new file mode 100644
index 000000000..7027b25b2
--- /dev/null
+++ b/compiler/pepper-csv2vec/include/pepper/csv2vec.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PEPPER_CSV2VEC_H__
+#define __PEPPER_CSV2VEC_H__
+
+#include <string>
+#include <vector>
+
+namespace pepper
+{
+
+template <typename T> std::vector<T> csv_to_vector(const std::string &str);
+
+template <typename T> bool is_one_of(const T &item, const std::vector<T> &items);
+
+} // namespace pepper
+
+#endif // __PEPPER_CSV2VEC_H__
diff --git a/compiler/pepper-csv2vec/src/pepper-csv2vec.cpp b/compiler/pepper-csv2vec/src/pepper-csv2vec.cpp
new file mode 100644
index 000000000..8b56ec883
--- /dev/null
+++ b/compiler/pepper-csv2vec/src/pepper-csv2vec.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/csv2vec.h"
+
+#include <algorithm>
+#include <sstream>
+#include <cassert>
+
+namespace pepper
+{
+
+template <> std::vector<std::string> csv_to_vector(const std::string &str)
+{
+  std::vector<std::string> ret;
+  std::istringstream is(str);
+  for (std::string item; std::getline(is, item, ',');)
+  {
+    ret.push_back(item);
+  }
+  return ret;
+}
+
+// TODO merge std::string and int32_t type
+
+template <> std::vector<int32_t> csv_to_vector(const std::string &str)
+{
+  std::vector<int32_t> ret;
+  std::istringstream is(str);
+  for (int32_t i; is >> i;)
+  {
+    assert(i != ',');
+    ret.push_back(i);
+    if (is.peek() == ',')
+      is.ignore();
+  }
+  return ret;
+}
+
+template <> bool is_one_of(const std::string &item, const std::vector<std::string> &items)
+{
+  return std::find(items.begin(), items.end(), item) != items.end();
+}
+
+} // namespace pepper
diff --git a/compiler/pepper-csv2vec/src/pepper-csv2vec.test.cpp b/compiler/pepper-csv2vec/src/pepper-csv2vec.test.cpp
new file mode 100644
index 000000000..0067f86ec
--- /dev/null
+++ b/compiler/pepper-csv2vec/src/pepper-csv2vec.test.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pepper/csv2vec.h"
+
+#include <gtest/gtest.h>
+
+TEST(csv2vec, simple_string)
+{
+  auto ret = pepper::csv_to_vector<std::string>("hello,world");
+
+  ASSERT_EQ(2, ret.size());
+  ASSERT_TRUE("hello" == ret.at(0));
+  ASSERT_TRUE("world" == ret.at(1));
+}
+
+TEST(csv2vec, simple_int32)
+{
+  auto ret = pepper::csv_to_vector<int32_t>("1,2,3");
+
+  ASSERT_EQ(3, ret.size());
+  ASSERT_EQ(1, ret.at(0));
+  ASSERT_EQ(3, ret.at(2));
+}
+
+TEST(csv2vec, is_one_of)
+{
+  auto ret = pepper::csv_to_vector<std::string>("hello,world");
+
+  ASSERT_TRUE(pepper::is_one_of<std::string>("hello", ret));
+  ASSERT_FALSE(pepper::is_one_of<std::string>("good", ret));
+}
+
+TEST(csv2vec, empty_string_NEG)
+{
+  // should not abort
+  EXPECT_NO_THROW(pepper::csv_to_vector<std::string>(""));
+}
+
+TEST(csv2vec, invalid_int32_NEG)
+{
+  auto ret = pepper::csv_to_vector<int32_t>("hello,world");
+
+  ASSERT_EQ(0, ret.size());
+}
diff --git a/compiler/pepper-str/CMakeLists.txt b/compiler/pepper-str/CMakeLists.txt
index cbe01b86a..481073af7 100644
--- a/compiler/pepper-str/CMakeLists.txt
+++ b/compiler/pepper-str/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_library(pepper_str INTERFACE)
 target_include_directories(pepper_str INTERFACE include)
+target_link_libraries(pepper_str INTERFACE nncc_coverage)
 
 if(NOT ENABLE_TEST)
   return()
@@ -8,5 +9,5 @@ endif(NOT ENABLE_TEST)
 # Google Test is mandatory for test
 nnas_find_package(GTest REQUIRED)
 
-GTest_AddTest(pepper_str_test test.cpp)
+GTest_AddTest(pepper_str_test src/pepper-str.test.cpp)
 target_link_libraries(pepper_str_test pepper_str)
diff --git a/compiler/pepper-str/include/pepper/str.h b/compiler/pepper-str/include/pepper/str.h
index efbc3a9c8..0c74aa85a 100644
--- a/compiler/pepper-str/include/pepper/str.h
+++ b/compiler/pepper-str/include/pepper/str.h
@@ -47,7 +47,7 @@ inline void str_impl(std::ostream &os, Arg &&arg, Args &&... args)
   str_impl(os, std::forward<Args>(args)...);
 }
 
-} // namesapce details
+} // namespace details
 } // namespace pepper
 
 namespace pepper
diff --git a/compiler/pepper-str/test.cpp b/compiler/pepper-str/src/pepper-str.test.cpp
index 222c371c8..222c371c8 100644
--- a/compiler/pepper-str/test.cpp
+++ b/compiler/pepper-str/src/pepper-str.test.cpp
diff --git a/compiler/pepper-strcast/CMakeLists.txt b/compiler/pepper-strcast/CMakeLists.txt
index 5f87e9488..bcc07f482 100644
--- a/compiler/pepper-strcast/CMakeLists.txt
+++ b/compiler/pepper-strcast/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(pepper_strcast STATIC ${SOURCES})
-set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
 target_include_directories(pepper_strcast PUBLIC include)
 target_link_libraries(pepper_strcast PRIVATE nncc_common)
 target_link_libraries(pepper_strcast PUBLIC nncc_coverage)
diff --git a/compiler/pics/CMakeLists.txt b/compiler/pics/CMakeLists.txt
new file mode 100644
index 000000000..053d6a053
--- /dev/null
+++ b/compiler/pics/CMakeLists.txt
@@ -0,0 +1,33 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "Configure pics: FAILED (missing FlatBuffers)")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+unset(PICS_DEPS)
+
+###
+### Generate python interface for circle schema
+###
+set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle")
+
+get_target_property(SCHEMA_BIN_PATH mio_circle06 BINARY_DIR)
+
+add_custom_command(
+  OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR}
+  COMMAND "$<TARGET_FILE:flatbuffers::flatc>" --python
+          -o "${CMAKE_CURRENT_BINARY_DIR}" "${SCHEMA_BIN_PATH}/schema.fbs"
+  DEPENDS flatbuffers::flatc
+  COMMENT "Generate python interface for circle schema"
+)
+
+list(APPEND PICS_DEPS "${CIRCLE_SCHEMA_PYTHON_DIR}")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(pics ALL DEPENDS ${PICS_DEPS})
+
+install(DIRECTORY ${CIRCLE_SCHEMA_PYTHON_DIR}
+        FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+                          GROUP_READ
+                          WORLD_READ
+        DESTINATION bin)
diff --git a/compiler/pics/README.md b/compiler/pics/README.md
new file mode 100644
index 000000000..248d1b854
--- /dev/null
+++ b/compiler/pics/README.md
@@ -0,0 +1,16 @@
+# pics
+
+_pics_ is flatbuffer Python interface for circle schema.
+
+## How to use pics in your module?
+
+Add below lines to your module's `CMakeLists.txt`. It will create a symbolic link to `circle` directory under your module's binary directory.
+
+```
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+# Add dependency to ${CMAKE_CURRENT_BINARY_DIR}/circle
+```
diff --git a/compiler/pics/requires.cmake b/compiler/pics/requires.cmake
new file mode 100644
index 000000000..1b7745795
--- /dev/null
+++ b/compiler/pics/requires.cmake
@@ -0,0 +1 @@
+require("mio-circle06")
diff --git a/compiler/plier-tf/src/TestHelper.cpp b/compiler/plier-tf/src/TestHelper.cpp
index a551e89f9..c1565b5cc 100644
--- a/compiler/plier-tf/src/TestHelper.cpp
+++ b/compiler/plier-tf/src/TestHelper.cpp
@@ -40,7 +40,7 @@ struct membuf : std::streambuf
 struct imemstream : virtual membuf, std::istream
 {
   imemstream(char const *base, size_t size)
-      : membuf(base, size), std::istream(static_cast<std::streambuf *>(this))
+    : membuf(base, size), std::istream(static_cast<std::streambuf *>(this))
   {
   }
 };
diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt
index 73b9ead73..ec86fd907 100644
--- a/compiler/pota-quantization-value-test/CMakeLists.txt
+++ b/compiler/pota-quantization-value-test/CMakeLists.txt
@@ -1,11 +1,27 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 unset(QUANTIZATION_VALUE_TEST)
 unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
+unset(QUANTIZATION_CONFIG_VALUE_TEST)
+unset(QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM)
+unset(QUANTIZATION_WO_VALUE_TEST_WITH_PARAM)
 
 macro(addTest NAME GRANULARITY DTYPE)
   list(APPEND QUANTIZATION_VALUE_TEST ${NAME})
   list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
 endmacro(addTest)
 
+macro(addQConfTest NAME GRANULARITY DTYPE)
+  list(APPEND QUANTIZATION_CONFIG_VALUE_TEST ${NAME})
+  list(APPEND QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
+endmacro(addQConfTest)
+
+macro(addWeightsOnlyTest NAME GRANULARITY DTYPE)
+    list(APPEND QUANTIZATION_WO_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
+endmacro(addWeightsOnlyTest)
+
 # Read "test.lst"
 include("test.lst")
 # Read "test.local.lst" if exists
@@ -15,7 +31,10 @@ unset(TEST_DEPS)
 
 get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
 
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py"
+               "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY)
+
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_12_1")
 
 ###
 ### Generate test.config
@@ -35,7 +54,13 @@ add_custom_command(
   COMMENT "Generate test configuration"
 )
 
-list(APPEND TEST_DEPS "${TEST_CONFIG}")
+# Import pics module
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CMAKE_CURRENT_BINARY_DIR}/circle")
 
 # This enforces CMake to generate all the dependencies during "build" phase
 add_custom_target(pota_quantization_value_test_deps ALL DEPENDS ${TEST_DEPS})
@@ -65,5 +90,39 @@ add_test(
           ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
 )
 
+add_test(
+  NAME pota_fake_wquant_test_with_config
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_fake_wquant_with_config.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM}
+)
+
+add_test(
+  NAME pota_parallel_record_minmax_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_parallel_record_minmax.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
+)
+
+add_test(
+  NAME pota_quantization_test_with_config
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization_with_config.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM}
+)
+
+add_test(
+  NAME pota_wo_quantization_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_wo_quantization.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_WO_VALUE_TEST_WITH_PARAM}
+)
+
 set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
 set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
+set_tests_properties(pota_parallel_record_minmax_test PROPERTIES DEPENDS pota_record_minmax_test)
+set_tests_properties(pota_quantization_test_with_config PROPERTIES DEPENDS pota_fake_wquant_test_with_config)
diff --git a/compiler/pota-quantization-value-test/README.md b/compiler/pota-quantization-value-test/README.md
index e3359ae4f..d6d003b4b 100644
--- a/compiler/pota-quantization-value-test/README.md
+++ b/compiler/pota-quantization-value-test/README.md
@@ -39,3 +39,17 @@ The expected output should include
  (1) scale, zero point of activations
  (2) scale, zero point, values of weights
  (3) scale, values (weights) of bias
+
+### Golden data
+
+Golden data was generated as follows.
+
+(1) Generate random h5 input for a target model (using gen_h5_random_inputs.py in `record-minmax-conversion-test`)
+
+(2) Run `dalgona` with the target model, input data, and analysis code named GenGoldenWeights.py for uint8 (GenGoldenWeightsSym.py for int16) (https://github.com/Samsung/ONE/pull/3501)
+
+(3) Do fake quantization using circle-quantizer
+
+(4) Run `dalgona` with the fake-quantized model, input data, and analysis code named GenGoldenActBias.py for uint8 (GenGoldenActBiasSym.py for int16) (https://github.com/Samsung/ONE/pull/3501)
+
+(5) Edit generated data for some operators (concat: scale propagation, mean: axis data)
diff --git a/compiler/pota-quantization-value-test/compare_tensors.py b/compiler/pota-quantization-value-test/compare_tensors.py
index 7d95d182d..fceeba547 100755
--- a/compiler/pota-quantization-value-test/compare_tensors.py
+++ b/compiler/pota-quantization-value-test/compare_tensors.py
@@ -19,7 +19,9 @@ parser.add_argument('--expect_dir', type=str, required=True)
 parser.add_argument('--mode', type=str, required=True)
 args = parser.parse_args()
 
-supported_modes = ["fake_quantization", "record_minmax", "quantization"]
+supported_modes = [
+    "fake_quantization", "record_minmax", "quantization", "weights_only_quantization"
+]
 
 model = args.input_h5
 expect_dir = args.expect_dir
@@ -68,8 +70,14 @@ def compare_quantization(tensor, tensor_name, expect_dir):
     for key in json_load:
         if key == "weights":
             expected_weights = np.array(json_load["weights"])
-            input_weights = tensor["weights"][:]
-            if np.allclose(input_weights, expected_weights, rtol=0, atol=1) == False:
+            input_weights = tensor["weights"][()]
+            abs_tolerance = 1
+            # We use higher tolerance for int64 data (bias of int16-quantized model)
+            if tensor["weights"].dtype == 'int64':
+                abs_tolerance = 5
+
+            if np.allclose(
+                    input_weights, expected_weights, rtol=0, atol=abs_tolerance) == False:
                 print("Quantized weights of " + tensor_name + " (" + str(input_weights) +
                       ") do not match with expected value (" + str(expected_weights) +
                       ").")
@@ -105,6 +113,10 @@ with h5.File(model, "r") as input:
                 compare_record_minmax(input[tensor_name], tensor_name, expect_dir)
             elif mode == "quantization":
                 compare_quantization(input[tensor_name], tensor_name, expect_dir)
+            elif mode == "weights_only_quantization":
+                # Assume weights have name "ker"
+                if tensor_name == "ker":
+                    compare_quantization(input[tensor_name], tensor_name, expect_dir)
             else:
                 raise SystemExit("Unsupproted mode.")
 
diff --git a/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json
new file mode 100644
index 000000000..174d6e9b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "out",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json
new file mode 100644
index 000000000..733f46e60
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "out",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json
new file mode 100644
index 000000000..630c3e420
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json
@@ -0,0 +1,14 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm1",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "ofm2",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json
new file mode 100644
index 000000000..cc98d7c62
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json
@@ -0,0 +1,14 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm1",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "ofm2",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json
new file mode 100644
index 000000000..838b331fd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json
new file mode 100644
index 000000000..7cd6ce713
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ifm1.json
new file mode 100644
index 000000000..a7298cb58
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014653272228315473,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..ab968c9fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192,
+          -12288
+        ],
+        [
+          -16384,
+          -20479,
+          24575
+        ]
+      ],
+      [
+        [
+          -28671,
+          32767,
+          16384
+        ],
+        [
+          -8192,
+          12288,
+          -4096
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..3cb0552e9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00037035736022517085,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/record_minmax/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/record_minmax/ifm1.json
new file mode 100644
index 000000000..097ef6a03
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/record_minmax/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.801437664031982,
+  "max": 4.600067481994629
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..5ebbba10e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -11.26651382446289,
+  "max": 12.135499725341797
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ifm1.json
new file mode 100644
index 000000000..a223fa4aa
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038489170372486115,
+  "zero_point": 129.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..ec6082d55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153,
+          68
+        ],
+        [
+          51,
+          34,
+          221
+        ]
+      ],
+      [
+        [
+          0,
+          255,
+          187
+        ],
+        [
+          85,
+          170,
+          102
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..afa9b1a8e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0892433300614357,
+  "zero_point": 134.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/record_minmax/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/record_minmax/ifm1.json
new file mode 100644
index 000000000..0138d54cf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/record_minmax/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.9815891456604,
+  "max": 4.833149127960205
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..8edbed5b6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -11.962269973754882,
+  "max": 10.79477970123291
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..a223fa4aa
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038489170372486115,
+  "zero_point": 129.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..ec6082d55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153,
+          68
+        ],
+        [
+          51,
+          34,
+          221
+        ]
+      ],
+      [
+        [
+          0,
+          255,
+          187
+        ],
+        [
+          85,
+          170,
+          102
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..afa9b1a8e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0892433300614357,
+  "zero_point": 134.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..a7298cb58
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014653272228315473,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..ab968c9fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192,
+          -12288
+        ],
+        [
+          -16384,
+          -20479,
+          24575
+        ]
+      ],
+      [
+        [
+          -28671,
+          32767,
+          16384
+        ],
+        [
+          -8192,
+          12288,
+          -4096
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..3cb0552e9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00037035736022517085,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..353f15a6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001523942337371409,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..c4ace78d4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00012122748012188822,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..2918a2323
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.9582903289794915,
+  "max": 4.9935017013549805
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..4d78b2007
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -3.9722607898712154,
+  "max": 3.720821704864502
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/quantization/ifm.json
new file mode 100644
index 000000000..0528cc9cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03911808878183365,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..ac5da0bda
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.027372928336262703,
+  "zero_point": 141.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..8701c51ff
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.9830295753479,
+  "max": 4.992084045410156
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..b2bb2d227
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -3.863597021102905,
+  "max": 3.1164999485015867
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..0528cc9cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03911808878183365,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..ac5da0bda
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.027372928336262703,
+  "zero_point": 141.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..353f15a6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001523942337371409,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..c4ace78d4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00012122748012188822,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ifm1.json
new file mode 100644
index 000000000..71265a270
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..53d7cdba3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ifm2.json
@@ -0,0 +1,28 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192
+        ],
+        [
+          -12288,
+          -16384
+        ]
+      ],
+      [
+        [
+          -20479,
+          24575
+        ],
+        [
+          -28671,
+          32767
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..71265a270
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/record_minmax/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/record_minmax/ifm1.json
new file mode 100644
index 000000000..5f5c917d3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/record_minmax/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.591858749389648,
+  "max": 3.884731464385986
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..700674c7c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -7.0,
+  "max": 8.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ifm1.json
new file mode 100644
index 000000000..522880618
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..17ba25363
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,28 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153
+        ],
+        [
+          68,
+          51
+        ]
+      ],
+      [
+        [
+          34,
+          221
+        ],
+        [
+          0,
+          255
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..522880618
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/record_minmax/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/record_minmax/ifm1.json
new file mode 100644
index 000000000..dc8d1db1e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/record_minmax/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -2.8125765800476072,
+  "max": 4.720572299957276
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..700674c7c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -7.0,
+  "max": 8.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..522880618
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..17ba25363
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,28 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153
+        ],
+        [
+          68,
+          51
+        ]
+      ],
+      [
+        [
+          34,
+          221
+        ],
+        [
+          0,
+          255
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..522880618
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..71265a270
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..53d7cdba3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,28 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192
+        ],
+        [
+          -12288,
+          -16384
+        ]
+      ],
+      [
+        [
+          -20479,
+          24575
+        ],
+        [
+          -28671,
+          32767
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..71265a270
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/fake_quantization/ker.json
new file mode 100644
index 000000000..8817cbef7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.000030517578125,
+          2.00006103515625
+        ],
+        [
+          -3.000091552734375,
+          -4.0001220703125
+        ]
+      ],
+      [
+        [
+          -4.999908447265625,
+          5.99993896484375
+        ],
+        [
+          -6.999969482421875,
+          8.0
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          4.0001220703125,
+          -2.00006103515625
+        ],
+        [
+          3.000091552734375,
+          -1.000030517578125
+        ]
+      ],
+      [
+        [
+          -8.0,
+          -5.99993896484375
+        ],
+        [
+          6.999969482421875,
+          4.999908447265625
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/bias.json
new file mode 100644
index 000000000..b00d8d211
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/bias.json
@@ -0,0 +1,10 @@
+{
+  "weights": [
+    26925029,
+    53850057
+  ],
+  "scale": [
+    3.714016479907864e-08,
+    3.714016479907864e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..df5d06c09
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015212147263810039,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ker.json
new file mode 100644
index 000000000..94c794fbb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ker.json
@@ -0,0 +1,61 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192
+        ],
+        [
+          -12288,
+          -16384
+        ]
+      ],
+      [
+        [
+          -20479,
+          24575
+        ],
+        [
+          -28671,
+          32767
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          16384,
+          -8192
+        ],
+        [
+          12288,
+          -4096
+        ]
+      ],
+      [
+        [
+          -32767,
+          -24575
+        ],
+        [
+          28671,
+          20479
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.00024414807580797754,
+    0.00024414807580797754
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -8.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    8.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..e02eeb9dc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.002048635622486472,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..263de8644
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.964057750701905,
+  "max": 4.984564266204834
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..4d969f6ef
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.0,
+  "max": 67.1276399230957
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/wo_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/wo_quantization/ker.json
new file mode 100644
index 000000000..94c794fbb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int16/wo_quantization/ker.json
@@ -0,0 +1,61 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192
+        ],
+        [
+          -12288,
+          -16384
+        ]
+      ],
+      [
+        [
+          -20479,
+          24575
+        ],
+        [
+          -28671,
+          32767
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          16384,
+          -8192
+        ],
+        [
+          12288,
+          -4096
+        ]
+      ],
+      [
+        [
+          -32767,
+          -24575
+        ],
+        [
+          28671,
+          20479
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.00024414807580797754,
+    0.00024414807580797754
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -8.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    8.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int8/wo_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int8/wo_quantization/ker.json
new file mode 100644
index 000000000..6df116e65
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/int8/wo_quantization/ker.json
@@ -0,0 +1,61 @@
+{
+  "weights": [
+    [
+      [
+        [
+          16,
+          32
+        ],
+        [
+          -48,
+          -64
+        ]
+      ],
+      [
+        [
+          -79,
+          95
+        ],
+        [
+          -111,
+          127
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          64,
+          -32
+        ],
+        [
+          48,
+          -16
+        ]
+      ],
+      [
+        [
+          -127,
+          -95
+        ],
+        [
+          111,
+          79
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.06299212574958801,
+    0.06299212574958801
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -8.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    8.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json
new file mode 100644
index 000000000..2558bb2be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.0039215087890625,
+          2.007843017578125
+        ],
+        [
+          -3.0117650032043457,
+          -4.015686511993408
+        ]
+      ],
+      [
+        [
+          -5.019608497619629,
+          6.023530006408691
+        ],
+        [
+          -7.027451515197754,
+          7.9686279296875
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          4.01568603515625,
+          -2.007843494415283
+        ],
+        [
+          3.0117645263671875,
+          -1.0039215087890625
+        ]
+      ],
+      [
+        [
+          -7.9686279296875,
+          -6.023530006408691
+        ],
+        [
+          7.027451515197754,
+          5.019608497619629
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json
new file mode 100644
index 000000000..50d44ece7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json
@@ -0,0 +1,7 @@
+{
+  "weights": [
+    4069,
+    8138
+  ],
+  "scale": 0.0002457468386200985
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..24508860d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003916590008884668,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json
new file mode 100644
index 000000000..b249a0ce5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json
@@ -0,0 +1,52 @@
+{
+  "weights": [
+    [
+      [
+        [
+          143,
+          159
+        ],
+        [
+          79,
+          63
+        ]
+      ],
+      [
+        [
+          47,
+          223
+        ],
+        [
+          15,
+          254
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          191,
+          95
+        ],
+        [
+          175,
+          111
+        ]
+      ],
+      [
+        [
+          0,
+          31
+        ],
+        [
+          239,
+          207
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.062745101749897,
+  "zero_point": 127.0,
+  "min": -7.9686279296875,
+  "max": 8.031373023986816
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..a2dd6681f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.037479765713214874,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..8817cbef7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.000030517578125,
+          2.00006103515625
+        ],
+        [
+          -3.000091552734375,
+          -4.0001220703125
+        ]
+      ],
+      [
+        [
+          -4.999908447265625,
+          5.99993896484375
+        ],
+        [
+          -6.999969482421875,
+          8.0
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          4.0001220703125,
+          -2.00006103515625
+        ],
+        [
+          3.000091552734375,
+          -1.000030517578125
+        ]
+      ],
+      [
+        [
+          -8.0,
+          -5.99993896484375
+        ],
+        [
+          6.999969482421875,
+          4.999908447265625
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json
new file mode 100644
index 000000000..b00d8d211
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,10 @@
+{
+  "weights": [
+    26925029,
+    53850057
+  ],
+  "scale": [
+    3.714016479907864e-08,
+    3.714016479907864e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..df5d06c09
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015212147263810039,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json
new file mode 100644
index 000000000..94c794fbb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,61 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192
+        ],
+        [
+          -12288,
+          -16384
+        ]
+      ],
+      [
+        [
+          -20479,
+          24575
+        ],
+        [
+          -28671,
+          32767
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          16384,
+          -8192
+        ],
+        [
+          12288,
+          -4096
+        ]
+      ],
+      [
+        [
+          -32767,
+          -24575
+        ],
+        [
+          28671,
+          20479
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.00024414807580797754,
+    0.00024414807580797754
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -8.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    8.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..e02eeb9dc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.002048635622486472,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/fake_quantization/ker.json
new file mode 100644
index 000000000..20c1f6759
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.00018310546875,
+          2.0,
+          2.99981689453125,
+          4.0001220703125
+        ],
+        [
+          -9.00006103515625,
+          10.0,
+          -10.99993896484375,
+          11.9998779296875
+        ]
+      ],
+      [
+        [
+          5.0001220703125,
+          6.0,
+          6.9998779296875,
+          8.000244140625
+        ],
+        [
+          13.0,
+          -14.0,
+          15.0,
+          -16.0
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/bias.json
new file mode 100644
index 000000000..632333144
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    17503969,
+    32507370,
+    45510319,
+    56887898
+  ],
+  "scale": [
+    5.7129901172951205e-08,
+    6.152450895548591e-08,
+    6.591911673802062e-08,
+    7.031372452055533e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..7105a686d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014399811334442347,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ker.json
new file mode 100644
index 000000000..d465a7c17
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ker.json
@@ -0,0 +1,53 @@
+{
+  "weights": [
+    [
+      [
+        [
+          2521,
+          4681,
+          6553,
+          8192
+        ],
+        [
+          -22685,
+          23405,
+          -24029,
+          24575
+        ]
+      ],
+      [
+        [
+          12603,
+          14043,
+          15291,
+          16384
+        ],
+        [
+          32767,
+          -32767,
+          32767,
+          -32767
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.0003967406231879635,
+    0.0004272591326639607,
+    0.0004577776421399579,
+    0.0004882961516159551
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -13.0,
+    -14.0,
+    -15.0,
+    -16.0
+  ],
+  "max": [
+    13.0,
+    14.0,
+    15.0,
+    16.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..2d84cd7d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0031168656423687935,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..2ef9a69b9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.7183862495422355,
+  "max": 4.684358768463135
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..ff55057b2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.0,
+  "max": 102.13033935546875
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/wo_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/wo_quantization/ker.json
new file mode 100644
index 000000000..d465a7c17
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int16/wo_quantization/ker.json
@@ -0,0 +1,53 @@
+{
+  "weights": [
+    [
+      [
+        [
+          2521,
+          4681,
+          6553,
+          8192
+        ],
+        [
+          -22685,
+          23405,
+          -24029,
+          24575
+        ]
+      ],
+      [
+        [
+          12603,
+          14043,
+          15291,
+          16384
+        ],
+        [
+          32767,
+          -32767,
+          32767,
+          -32767
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.0003967406231879635,
+    0.0004272591326639607,
+    0.0004577776421399579,
+    0.0004882961516159551
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -13.0,
+    -14.0,
+    -15.0,
+    -16.0
+  ],
+  "max": [
+    13.0,
+    14.0,
+    15.0,
+    16.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int8/wo_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int8/wo_quantization/ker.json
new file mode 100644
index 000000000..13c929306
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/int8/wo_quantization/ker.json
@@ -0,0 +1,53 @@
+{
+  "weights": [
+    [
+      [
+        [
+          10,
+          18,
+          25,
+          32
+        ],
+        [
+          -88,
+           91,
+          -93,
+           95
+        ]
+      ],
+      [
+        [
+          49,
+          54,
+          59,
+          64
+        ],
+        [
+           127,
+          -127,
+           127,
+          -127
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.10236220806837082,
+    0.11023622006177902,
+    0.11811023950576782,
+    0.12598425149917603
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -13.0,
+    -14.0,
+    -15.0,
+    -16.0
+  ],
+  "max": [
+    13.0,
+    14.0,
+    15.0,
+    16.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..cd3479781
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.9725494384765625,
+          1.945098876953125,
+          3.039216995239258,
+          4.0117645263671875
+        ],
+        [
+          -8.996077537536621,
+          9.9686279296875,
+          -10.94117546081543,
+          12.035295486450195
+        ]
+      ],
+      [
+        [
+          4.98431396484375,
+          5.9568634033203125,
+          7.050981521606445,
+          8.023530960083008
+        ],
+        [
+          13.007843017578125,
+          -13.980391502380371,
+          14.95294189453125,
+          -16.04705810546875
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json
new file mode 100644
index 000000000..e60ff312e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json
@@ -0,0 +1,9 @@
+{
+  "weights": [
+    2156,
+    4312,
+    6468,
+    8624
+  ],
+  "scale": 0.0004638272181067826
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..4ec4ef2d7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0038153529167175293,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json
new file mode 100644
index 000000000..01835fbde
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json
@@ -0,0 +1,38 @@
+{
+  "weights": [
+    [
+      [
+        [
+          140,
+          148,
+          157,
+          165
+        ],
+        [
+          58,
+          214,
+          42,
+          231
+        ]
+      ],
+      [
+        [
+          173,
+          181,
+          190,
+          198
+        ],
+        [
+          239,
+          17,
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.12156862765550613,
+  "zero_point": 132.0,
+  "min": -16.04705810546875,
+  "max": 14.952940940856934
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json
new file mode 100644
index 000000000..39c64f3ef
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.07362665981054306,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..20c1f6759
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.00018310546875,
+          2.0,
+          2.99981689453125,
+          4.0001220703125
+        ],
+        [
+          -9.00006103515625,
+          10.0,
+          -10.99993896484375,
+          11.9998779296875
+        ]
+      ],
+      [
+        [
+          5.0001220703125,
+          6.0,
+          6.9998779296875,
+          8.000244140625
+        ],
+        [
+          13.0,
+          -14.0,
+          15.0,
+          -16.0
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json
new file mode 100644
index 000000000..632333144
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    17503969,
+    32507370,
+    45510319,
+    56887898
+  ],
+  "scale": [
+    5.7129901172951205e-08,
+    6.152450895548591e-08,
+    6.591911673802062e-08,
+    7.031372452055533e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..7105a686d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014399811334442347,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json
new file mode 100644
index 000000000..d465a7c17
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,53 @@
+{
+  "weights": [
+    [
+      [
+        [
+          2521,
+          4681,
+          6553,
+          8192
+        ],
+        [
+          -22685,
+          23405,
+          -24029,
+          24575
+        ]
+      ],
+      [
+        [
+          12603,
+          14043,
+          15291,
+          16384
+        ],
+        [
+          32767,
+          -32767,
+          32767,
+          -32767
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.0003967406231879635,
+    0.0004272591326639607,
+    0.0004577776421399579,
+    0.0004882961516159551
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -13.0,
+    -14.0,
+    -15.0,
+    -16.0
+  ],
+  "max": [
+    13.0,
+    14.0,
+    15.0,
+    16.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..2d84cd7d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0031168656423687935,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/fake_quantization/weight.json
new file mode 100644
index 000000000..559e537fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+  "weights": [
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/bias.json
new file mode 100644
index 000000000..0186c03f4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    27619368,
+    -55238737,
+    -82858105,
+    110477474
+  ],
+  "scale": [
+    3.620647604581258e-08,
+    3.620647604581258e-08,
+    3.620647604581258e-08,
+    3.620647604581258e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/in.json
new file mode 100644
index 000000000..1fd68cabe
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/in.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014829720021225512,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/out.json
new file mode 100644
index 000000000..b2950218c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/out.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003870659740641713,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/weight.json
new file mode 100644
index 000000000..69254d12b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/quantization/weight.json
@@ -0,0 +1,95 @@
+{
+  "weights": [
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ]
+  ],
+  "scale": [
+    0.00024414807580797754,
+    0.00024414807580797754,
+    0.00024414807580797754,
+    0.00024414807580797754
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -8.0,
+    -8.0,
+    -8.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    8.0,
+    8.0,
+    8.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/record_minmax/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/record_minmax/in.json
new file mode 100644
index 000000000..68ff7d8df
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/record_minmax/in.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.8592542457580565,
+  "max": 4.7664618492126465
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/record_minmax/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/record_minmax/out.json
new file mode 100644
index 000000000..c453af298
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/int16/record_minmax/out.json
@@ -0,0 +1,4 @@
+{
+  "min": -15.112948303222655,
+  "max": 126.82991027832031
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json
new file mode 100644
index 000000000..e1da53ab0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+  "weights": [
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json
new file mode 100644
index 000000000..ecb49bb64
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json
@@ -0,0 +1,9 @@
+{
+  "weights": [
+    415,
+    -829,
+    -1244,
+    1658
+  ],
+  "scale": 0.00241205753304663
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json
new file mode 100644
index 000000000..654824b5d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03844216465950012,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json
new file mode 100644
index 000000000..3baa42155
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.741962730884552,
+  "zero_point": 156.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json
new file mode 100644
index 000000000..940224049
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json
@@ -0,0 +1,80 @@
+{
+  "weights": [
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ]
+  ],
+  "scale": 0.062745101749897,
+  "zero_point": 127.0,
+  "min": -7.9686279296875,
+  "max": 8.031373023986816
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json
new file mode 100644
index 000000000..559e537fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+  "weights": [
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json
new file mode 100644
index 000000000..0186c03f4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    27619368,
+    -55238737,
+    -82858105,
+    110477474
+  ],
+  "scale": [
+    3.620647604581258e-08,
+    3.620647604581258e-08,
+    3.620647604581258e-08,
+    3.620647604581258e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json
new file mode 100644
index 000000000..1fd68cabe
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014829720021225512,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json
new file mode 100644
index 000000000..b2950218c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003870659740641713,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json
new file mode 100644
index 000000000..69254d12b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json
@@ -0,0 +1,95 @@
+{
+  "weights": [
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ]
+  ],
+  "scale": [
+    0.00024414807580797754,
+    0.00024414807580797754,
+    0.00024414807580797754,
+    0.00024414807580797754
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -8.0,
+    -8.0,
+    -8.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    8.0,
+    8.0,
+    8.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json
new file mode 100644
index 000000000..fa2cdae3d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json
@@ -0,0 +1,20 @@
+{
+  "weights": [
+    1,
+    0,
+    1,
+    1
+  ],
+  "scale": [
+    0.7023000121116638,
+    0.3091999888420105,
+    0.7552000284194946,
+    0.2728999853134155
+  ],
+  "zero_point": [
+    0,
+    1,
+    0,
+    0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json
new file mode 100644
index 000000000..393a44ab0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json
@@ -0,0 +1,20 @@
+{
+  "weights": [
+    1,
+    0,
+    1,
+    0
+  ],
+  "scale": [
+    0.012299999594688416,
+    0.33239999413490295,
+    0.23240000009536743,
+    3.3359999656677246
+  ],
+  "zero_point": [
+    0,
+    1,
+    0,
+    1
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..94c4e0f06
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003919127397239208,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json
new file mode 100644
index 000000000..27a1c8547
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.051219820976257324,
+  "zero_point": 104.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..910e855c3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.006417479291558266,
+  "max": 0.9993774032592774
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..190da3048
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -5.316554107666015,
+  "max": 7.744499607086182
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json
new file mode 100644
index 000000000..9dcefd552
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json
@@ -0,0 +1,10 @@
+{
+  "weights": [
+    242,
+    0,
+    255,
+    139
+  ],
+  "scale": 0.004174117464572191,
+  "zero_point": 74.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json
new file mode 100644
index 000000000..6d85a1ebb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json
@@ -0,0 +1,10 @@
+{
+  "weights": [
+    239,
+    214,
+    255,
+    0
+  ],
+  "scale": 0.013993725180625916,
+  "zero_point": 238.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json
new file mode 100644
index 000000000..df3df56cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003914226312190294,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..098816af9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.04870154336094856,
+  "zero_point": 122.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..d2e7923b5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.011221568882465362,
+  "max": 0.9981276893615723
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..b4ea58647
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -5.94246238708496,
+  "max": 6.4764308166503906
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..5d9052815
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015059474390000105,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..25491f05d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014986195310484618,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..54dd14d76
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.9345380973815915,
+  "max": 4.910526599884033
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..635018467
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -2.4704078197479244,
+  "max": 4.910526599884033
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/quantization/ifm.json
new file mode 100644
index 000000000..9bf6c9bff
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03876218944787979,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..87de1116e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.029836513102054596,
+  "zero_point": 88.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..bb42bdf8e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.901860733032226,
+  "max": 4.982497882843018
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..bb3a52516
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -2.6258130359649656,
+  "max": 4.982497882843018
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..9bf6c9bff
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03876218944787979,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..87de1116e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.029836513102054596,
+  "zero_point": 88.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..5d9052815
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015059474390000105,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..25491f05d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014986195310484618,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..18c3b0421
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015251495642587543,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..145ee8fda
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00013844699424225837,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/reduction_indices.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/reduction_indices.json
new file mode 100644
index 000000000..394cfb322
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/quantization/reduction_indices.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+    -1
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..8e49c0eb2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.9974578094482425,
+  "max": 4.991122436523438
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..740c3076a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -3.351332187652588,
+  "max": 4.536492500305176
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/quantization/ifm.json
new file mode 100644
index 000000000..ede36c6ad
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.039086975157260895,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..bd2fc7f62
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.028692100197076797,
+  "zero_point": 131.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..ae1dc5e90
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.993542575836181,
+  "max": 4.97363561630249
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..527ed8d46
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -3.766610870361328,
+  "max": 3.5498746299743655
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..ede36c6ad
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.039086975157260895,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..bd2fc7f62
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.028692100197076797,
+  "zero_point": 131.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..18c3b0421
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015251495642587543,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..145ee8fda
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00013844699424225837,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json
new file mode 100644
index 000000000..394cfb322
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+    -1
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ifm1.json
new file mode 100644
index 000000000..f329b43be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001513722527306527,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..ab968c9fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192,
+          -12288
+        ],
+        [
+          -16384,
+          -20479,
+          24575
+        ]
+      ],
+      [
+        [
+          -28671,
+          32767,
+          16384
+        ],
+        [
+          -8192,
+          12288,
+          -4096
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..4b5118c3e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.000991688808426261,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/record_minmax/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/record_minmax/ifm1.json
new file mode 100644
index 000000000..d333b93a8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/record_minmax/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.790120906829833,
+  "max": 4.960014820098877
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..d0cb3786d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -30.124285202026368,
+  "max": 32.49466659545899
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ifm1.json
new file mode 100644
index 000000000..bbff8952d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03780897706747055,
+  "zero_point": 131.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..ec6082d55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153,
+          68
+        ],
+        [
+          51,
+          34,
+          221
+        ]
+      ],
+      [
+        [
+          0,
+          255,
+          187
+        ],
+        [
+          85,
+          170,
+          102
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..cec0bdf9a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.232084259390831,
+  "zero_point": 111.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/record_minmax/ifm1.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/record_minmax/ifm1.json
new file mode 100644
index 000000000..7cdb53424
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/record_minmax/ifm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.954726142883301,
+  "max": 4.686561832427978
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..5f63577ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -25.874579315185546,
+  "max": 33.30691329956055
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..bbff8952d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03780897706747055,
+  "zero_point": 131.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json
new file mode 100644
index 000000000..ec6082d55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153,
+          68
+        ],
+        [
+          51,
+          34,
+          221
+        ]
+      ],
+      [
+        [
+          0,
+          255,
+          187
+        ],
+        [
+          85,
+          170,
+          102
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..cec0bdf9a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.232084259390831,
+  "zero_point": 111.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json
new file mode 100644
index 000000000..f329b43be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001513722527306527,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json
new file mode 100644
index 000000000..ab968c9fc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192,
+          -12288
+        ],
+        [
+          -16384,
+          -20479,
+          24575
+        ]
+      ],
+      [
+        [
+          -28671,
+          32767,
+          16384
+        ],
+        [
+          -8192,
+          12288,
+          -4096
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..4b5118c3e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.000991688808426261,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json
new file mode 100644
index 000000000..6f99899d5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json
@@ -0,0 +1,21 @@
+{
+  "weights": [
+    [
+      [
+        1,
+        1,
+        1
+      ]
+    ]
+  ],
+  "scale": [
+    0.10000000149011612,
+    0.30000001192092896,
+    0.5
+  ],
+  "zero_point": [
+    0,
+    0,
+    0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..7d1f4c795
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015214986342471093,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..533c1e3e0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015159364556893706,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..edbbff9cb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.985494499206543,
+  "max": 4.967269058227539
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..954d5eff1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -2.4895002365112306,
+  "max": 4.967269058227539
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json
new file mode 100644
index 000000000..6f99899d5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json
@@ -0,0 +1,21 @@
+{
+  "weights": [
+    [
+      [
+        1,
+        1,
+        1
+      ]
+    ]
+  ],
+  "scale": [
+    0.10000000149011612,
+    0.30000001192092896,
+    0.5
+  ],
+  "zero_point": [
+    0,
+    0,
+    0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..d661df363
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03893596678972244,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json
new file mode 100644
index 000000000..6dfffd563
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.029139429330825806,
+  "zero_point": 85.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..8de6b3dc2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.977406520843505,
+  "max": 4.951265411376953
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..c88f6ca92
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -2.4792890548706055,
+  "max": 4.951265411376953
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/alpha.json
new file mode 100644
index 000000000..7c001602f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/alpha.json
@@ -0,0 +1,13 @@
+{
+  "weights": [
+    [
+      [
+        51,
+        153,
+        255
+      ]
+    ]
+  ],
+  "scale": 0.0019607844296842813,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/ifm.json
new file mode 100644
index 000000000..05ce9dd2c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03849203139543533,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..8f883094a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.02848827838897705,
+  "zero_point": 82.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..76e719001
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.899785652160644,
+  "max": 4.915681838989258
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..2aa27ca64
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -2.348829574584961,
+  "max": 4.915681838989258
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json
new file mode 100644
index 000000000..7c001602f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json
@@ -0,0 +1,13 @@
+{
+  "weights": [
+    [
+      [
+        51,
+        153,
+        255
+      ]
+    ]
+  ],
+  "scale": 0.0019607844296842813,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..05ce9dd2c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03849203139543533,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..8f883094a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.02848827838897705,
+  "zero_point": 82.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json
new file mode 100644
index 000000000..6f99899d5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json
@@ -0,0 +1,21 @@
+{
+  "weights": [
+    [
+      [
+        1,
+        1,
+        1
+      ]
+    ]
+  ],
+  "scale": [
+    0.10000000149011612,
+    0.30000001192092896,
+    0.5
+  ],
+  "zero_point": [
+    0,
+    0,
+    0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..7d1f4c795
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015214986342471093,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..533c1e3e0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015159364556893706,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..5a52a1b7b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001474507007515058,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..ff9e41ec8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001422425702912733,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..c26d04075
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.831517105102539,
+  "max": 4.660862083435059
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..7108b4601
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.0,
+  "max": 4.660862083435059
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/quantization/ifm.json
new file mode 100644
index 000000000..3b97773ce
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03907399624586105,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..698a8a7ee
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.01955186203122139,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..fee2d92c0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.978144645690918,
+  "max": 4.985724964141846
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..bd6199fc0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.0,
+  "max": 4.985724964141846
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..3b97773ce
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03907399624586105,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..698a8a7ee
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.01955186203122139,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..5a52a1b7b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001474507007515058,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..ff9e41ec8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001422425702912733,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm1.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm2.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/split_dim.json
new file mode 100644
index 000000000..ac7cde187
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+     0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..5e333acde
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.909480743408203,
+  "max": 4.779518718719482
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm1.json
new file mode 100644
index 000000000..1d23f8d9a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.073143873214722,
+  "max": 4.779518718719482
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm2.json
new file mode 100644
index 000000000..ffd7d841d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/int16/record_minmax/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.9008944129943846,
+  "max": 4.620573101043701
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm1.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm2.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/split_dim.json
new file mode 100644
index 000000000..ac7cde187
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+     0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..c6dd19469
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.959668273925781,
+  "max": 4.906183891296386
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm1.json
new file mode 100644
index 000000000..4f890dddb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.3535110282897955,
+  "max": 4.636985759735107
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm2.json
new file mode 100644
index 000000000..78f9a648f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000/channel/uint8/record_minmax/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.959668273925781,
+  "max": 4.8736056804656975
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json
new file mode 100644
index 000000000..aaba6131c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json
new file mode 100644
index 000000000..ac7cde187
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+     0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json
new file mode 100644
index 000000000..2fb0c68d8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json
new file mode 100644
index 000000000..ac7cde187
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+     0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/fake_quantization/ker.json
new file mode 100644
index 000000000..6df24eb42
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.999786376953125,
+          2.0001220703125
+        ],
+        [
+          -2.999908447265625,
+          -4.000244140625
+        ],
+        [
+          5.000030517578125,
+          -5.99981689453125
+        ]
+      ],
+      [
+        [
+          7.000152587890625,
+          7.99993896484375
+        ],
+        [
+          -9.000274658203125,
+          -10.00006103515625
+        ],
+        [
+          10.999847412109375,
+          -12.00018310546875
+        ]
+      ],
+      [
+        [
+          12.999969482421875,
+          13.999755859375
+        ],
+        [
+          -15.000091552734375,
+          -15.9998779296875
+        ],
+        [
+          17.000213623046875,
+          -18.0
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/.json
new file mode 100644
index 000000000..a9a5c4735
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+    0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ifm.json
new file mode 100644
index 000000000..82f7fa2b6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015178922330960631,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ker.json
new file mode 100644
index 000000000..8d0ceb1c6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ker.json
@@ -0,0 +1,58 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1820,
+          3641
+        ],
+        [
+          -5461,
+          -7282
+        ],
+        [
+          9102,
+          -10922
+        ]
+      ],
+      [
+        [
+          12743,
+          14563
+        ],
+        [
+          -16384,
+          -18204
+        ],
+        [
+          20024,
+          -21845
+        ]
+      ],
+      [
+        [
+          23665,
+          25485
+        ],
+        [
+          -27306,
+          -29126
+        ],
+        [
+          30947,
+          -32767
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.0005493331705679495
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -18.0
+  ],
+  "max": [
+    18.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..f370bf44d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0122029148042202,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/record_minmax/ifm.json
new file mode 100644
index 000000000..8cd48cedd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.942056541442871,
+  "max": 4.973677654266357
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/record_minmax/ofm.json
new file mode 100644
index 000000000..0ad23742d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/int16/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -269.66596435546876,
+  "max": 399.85290710449215
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json
new file mode 100644
index 000000000..76a0440a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.960784912109375,
+          2.0588245391845703
+        ],
+        [
+          -3.0196075439453125,
+          -3.980391502380371
+        ],
+        [
+          4.9411773681640625,
+          -6.039215087890625
+        ]
+      ],
+      [
+        [
+          7.0,
+          7.960784912109375
+        ],
+        [
+          -9.058823585510254,
+          -10.019607543945312
+        ],
+        [
+          10.980392456054688,
+          -11.941176414489746
+        ]
+      ],
+      [
+        [
+          13.039216995239258,
+          14.000001907348633
+        ],
+        [
+          -14.960784912109375,
+          -16.05882453918457
+        ],
+        [
+          17.019607543945312,
+          -17.980392456054688
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..dc5ca8dd5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03869570419192314,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json
new file mode 100644
index 000000000..bc150bbb0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json
@@ -0,0 +1,52 @@
+{
+  "weights": [
+    [
+      [
+        [
+          138,
+          146
+        ],
+        [
+          109,
+          102
+        ],
+        [
+          167,
+          87
+        ]
+      ],
+      [
+        [
+          182,
+          189
+        ],
+        [
+          65,
+          58
+        ],
+        [
+          211,
+          44
+        ]
+      ],
+      [
+        [
+          226,
+          233
+        ],
+        [
+          22,
+          14
+        ],
+        [
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.13725490868091583,
+  "zero_point": 131.0,
+  "min": -17.980392456054688,
+  "max": 17.019609451293945
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json
new file mode 100644
index 000000000..bfd862189
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 1.6333034038543701,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..6df24eb42
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.999786376953125,
+          2.0001220703125
+        ],
+        [
+          -2.999908447265625,
+          -4.000244140625
+        ],
+        [
+          5.000030517578125,
+          -5.99981689453125
+        ]
+      ],
+      [
+        [
+          7.000152587890625,
+          7.99993896484375
+        ],
+        [
+          -9.000274658203125,
+          -10.00006103515625
+        ],
+        [
+          10.999847412109375,
+          -12.00018310546875
+        ]
+      ],
+      [
+        [
+          12.999969482421875,
+          13.999755859375
+        ],
+        [
+          -15.000091552734375,
+          -15.9998779296875
+        ],
+        [
+          17.000213623046875,
+          -18.0
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json
new file mode 100644
index 000000000..82f7fa2b6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015178922330960631,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json
new file mode 100644
index 000000000..8d0ceb1c6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,58 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1820,
+          3641
+        ],
+        [
+          -5461,
+          -7282
+        ],
+        [
+          9102,
+          -10922
+        ]
+      ],
+      [
+        [
+          12743,
+          14563
+        ],
+        [
+          -16384,
+          -18204
+        ],
+        [
+          20024,
+          -21845
+        ]
+      ],
+      [
+        [
+          23665,
+          25485
+        ],
+        [
+          -27306,
+          -29126
+        ],
+        [
+          30947,
+          -32767
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.0005493331705679495
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -18.0
+  ],
+  "max": [
+    18.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json
new file mode 100644
index 000000000..f370bf44d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0122029148042202,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py b/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py
index 9863c807a..a00cbeba3 100755
--- a/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py
+++ b/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py
@@ -1,16 +1,17 @@
 #!/usr/bin/env python3
 import h5py as h5
 import numpy as np
-import tensorflow as tf
+from circle.Model import Model
+from circle.TensorType import TensorType
 import argparse
 import glob
 
 #
-# This script generates a pack of random input data (.h5) expected by the input tflite model
+# This script generates a pack of random input data (.h5) expected by the input circle model
 #
 # Basic usage:
 #   gen_h5_explicit_inputs.py --model <path/to/model/file> --input <path/to/input/directory> --output <path/to/output/file>
-#   ex: gen_h5_explicit_inputs.py --model Add_000.tflite --input Add_000 --output Add_000.input.h5
+#   ex: gen_h5_explicit_inputs.py --model Add_000.circle --input Add_000 --output Add_000.input.h5
 #   (This will create Add_000.input.h5)
 #
 # The input directory should be organized as follows
@@ -33,15 +34,30 @@ model = args.model
 input = args.input
 output = args.output
 
-# Build TFLite interpreter. (to get the information of model input)
-interpreter = tf.lite.Interpreter(model)
-input_details = interpreter.get_input_details()
+with open(model, 'rb') as f:
+    buf = f.read()
+    circle_model = Model.GetRootAsModel(buf, 0)
+
+# Assume one subgraph
+assert (circle_model.SubgraphsLength() == 1)
+graph = circle_model.Subgraphs(0)
+inputs = graph.InputsAsNumpy()
 
 # Create h5 file
 h5_file = h5.File(output, 'w')
 group = h5_file.create_group("value")
 group.attrs['desc'] = "Input data for " + model
 
+
+def toNumpyType(circle_type):
+    if circle_type == TensorType.UINT8:
+        return np.uint8
+    if circle_type == TensorType.FLOAT32:
+        return np.float32
+    if circle_type == TensorType.INT16:
+        return np.int16
+
+
 # Input files
 records = sorted(glob.glob(input + "/*.txt"))
 for i, record in enumerate(records):
@@ -51,9 +67,10 @@ for i, record in enumerate(records):
         lines = f.readlines()
         for j, line in enumerate(lines):
             data = np.array(line.split(','))
-            input_detail = input_details[j]
-            input_data = np.array(
-                data.reshape(input_detail["shape"]), input_detail["dtype"])
+            input_index = inputs[j]
+            tensor = graph.Tensors(input_index)
+            np_type = toNumpyType(tensor.Type())
+            input_data = np.array(data.reshape(tensor.ShapeAsNumpy()), np_type)
             sample.create_dataset(str(j), data=input_data)
 
 h5_file.close()
diff --git a/compiler/pota-quantization-value-test/requires.cmake b/compiler/pota-quantization-value-test/requires.cmake
index 883a925df..e832dbaf7 100644
--- a/compiler/pota-quantization-value-test/requires.cmake
+++ b/compiler/pota-quantization-value-test/requires.cmake
@@ -2,3 +2,4 @@ require("record-minmax")
 require("circle-quantizer")
 require("circle-tensordump")
 require("common-artifacts")
+require("pics")
diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst
index d9fd91761..e510e6b3b 100644
--- a/compiler/pota-quantization-value-test/test.lst
+++ b/compiler/pota-quantization-value-test/test.lst
@@ -1,8 +1,67 @@
+addTest(Add_002 layer uint8)
+addTest(Add_002 channel int16)
+addTest(AveragePool2D_000 layer uint8)
+addTest(AveragePool2D_000 channel int16)
+addTest(Concatenation_001 layer uint8)
+addTest(Concatenation_001 channel int16)
 addTest(Conv2D_004 channel uint8)
+addTest(Conv2D_004 channel int16)
 addTest(Conv2D_004 layer uint8)
 addTest(DepthwiseConv2D_002 channel uint8)
+addTest(DepthwiseConv2D_002 channel int16)
 addTest(DepthwiseConv2D_002 layer uint8)
 addTest(FullyConnected_003 channel uint8)
+addTest(FullyConnected_003 channel int16)
 addTest(FullyConnected_003 layer uint8)
+addTest(InstanceNorm_001 layer uint8)
+addTest(InstanceNorm_001 channel uint8)
+addTest(Mean_000 layer uint8)
+addTest(Mean_000 channel int16)
+addTest(MaxPool2D_000 layer uint8)
+addTest(MaxPool2D_000 channel int16)
+addTest(Mul_001 layer uint8)
+addTest(Mul_001 channel int16)
+addTest(PRelu_001 layer uint8)
+addTest(PRelu_001 channel uint8)
+addTest(PRelu_001 channel int16)
+addTest(ReLU_000 layer uint8)
+addTest(ReLU_000 channel int16)
+addTest(Split_000 channel uint8)
+addTest(Split_000 channel int16)
 addTest(TransposeConv_001 channel uint8)
+addTest(TransposeConv_001 channel int16)
 addTest(TransposeConv_001 layer uint8)
+
+addQConfTest(Add_002 layer uint8)
+addQConfTest(Add_002 channel int16)
+addQConfTest(AveragePool2D_000 layer uint8)
+addQConfTest(AveragePool2D_000 channel int16)
+addQConfTest(Concatenation_001 layer uint8)
+addQConfTest(Concatenation_001 channel int16)
+addQConfTest(Conv2D_004 channel int16)
+addQConfTest(Conv2D_004 layer uint8)
+addQConfTest(DepthwiseConv2D_002 channel int16)
+addQConfTest(DepthwiseConv2D_002 layer uint8)
+addQConfTest(FullyConnected_003 channel int16)
+addQConfTest(FullyConnected_003 layer uint8)
+#addQConfTest(InstanceNorm_001 layer uint8)    Enable this when int16 CWQ data is ready.
+#addQConfTest(InstanceNorm_001 channel int16)  Enable this when int16 CWQ data is ready.
+addQConfTest(Mean_000 layer uint8)
+addQConfTest(Mean_000 channel int16)
+addQConfTest(MaxPool2D_000 layer uint8)
+addQConfTest(MaxPool2D_000 channel int16)
+addQConfTest(Mul_001 layer uint8)
+addQConfTest(Mul_001 channel int16)
+addQConfTest(PRelu_001 layer uint8)
+addQConfTest(PRelu_001 channel int16)
+addQConfTest(ReLU_000 layer uint8)
+addQConfTest(ReLU_000 channel int16)
+addQConfTest(Split_000 channel uint8)
+addQConfTest(Split_000 channel int16)
+addQConfTest(TransposeConv_001 channel int16)
+addQConfTest(TransposeConv_001 layer uint8)
+
+addWeightsOnlyTest(Conv2D_004 channel int8)
+addWeightsOnlyTest(Conv2D_004 channel int16)
+addWeightsOnlyTest(DepthwiseConv2D_002 channel int8)
+addWeightsOnlyTest(DepthwiseConv2D_002 channel int16)
diff --git a/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh b/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh
new file mode 100755
index 000000000..070b2738e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+# This script tests fake quantization with config file
+#
+# HOW TO USE
+#
+# ./test_fake_wquant_with_config.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE_QUANTIZER_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do  
+  MODELNAME=$1; shift
+  GRANULARITY=$1; shift
+  DTYPE=$1; shift
+  TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.fake_quantized.mixed.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}_fake_quantization_with_config.log" <(
+    exec 2>&1
+    set -ex
+
+    # Run circle-quantizer with --quantize_dequantize_weights
+    "${CIRCLE_QUANTIZER_PATH}" \
+      --quantize_dequantize_weights float32 "${DTYPE}" "${GRANULARITY}" \
+      --config "${SOURCE_PATH}/config_files/${MODELNAME}/${GRANULARITY}/${DTYPE}/qconf.json" \
+      "${WORKDIR}/${MODELNAME}.circle" \
+      "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" 
+
+    # Dump weights values (circle-tensordump)
+    "${CIRCLE_TENSORDUMP_PATH}" \
+      "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" \
+      --tensors_to_hdf5 "${TEST_RESULT_FILE}.fake_quantized.mixed.circle.h5"
+
+    # Compare result
+    "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+      --input_h5 "${TEST_RESULT_FILE}.fake_quantized.mixed.circle.h5" \
+      --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}_config/${GRANULARITY}/${DTYPE}/fake_quantization" \
+      --mode fake_quantization
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/0.txt
new file mode 100644
index 000000000..a219546a1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/0.txt
@@ -0,0 +1 @@
+-0.48516417,-4.5555663 ,-2.9907737 , 2.422857  , 1.010034  , 3.6436582 , 0.29334423,-4.0628953 , 1.0116768 , 3.0871766 , 3.3341465 , 4.3921704 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/1.txt
new file mode 100644
index 000000000..70d3139a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/1.txt
@@ -0,0 +1 @@
+-0.7787985 , 4.101575  ,-0.4839729 , 0.35971674,-4.3452406 ,-4.811665  ,-3.8693128 , 4.239326  , 0.44103175, 3.5549765 , 2.5334291 , 1.4546562 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/2.txt
new file mode 100644
index 000000000..3c38f8d5d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/2.txt
@@ -0,0 +1 @@
+ 3.5943313,-1.4843192, 1.956341 ,-1.3242344, 1.5901331,-3.641623 , 4.6022506,-0.307265 ,-0.6359913,-4.0109854,-1.2064985, 1.1137954
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/3.txt
new file mode 100644
index 000000000..e89a022f5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/3.txt
@@ -0,0 +1 @@
+ 3.1036437 ,-0.39538398,-0.07278133, 4.547673  , 3.9132211 , 2.6468625 ,-4.2830634 ,-2.0573084 , 2.1074655 ,-4.0634165 ,-4.55598   ,-0.7942089 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/4.txt
new file mode 100644
index 000000000..2b00832cd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.7745228, 1.4813256, 4.4699864, 3.7466738,-2.9847758,-4.453416 , 3.2515864,-1.2459193,-4.44965  ,-1.8452735, 4.423347 , 4.2998137
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/0.txt
new file mode 100644
index 000000000..b6e2efa3d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/0.txt
@@ -0,0 +1 @@
+-0.8596993, 4.8127713,-3.4127183, 4.2323627,-2.2201376,-1.5362649,-4.9921966, 0.9565166, 3.2879171,-1.3590081,-3.771852 ,-4.1042285
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/1.txt
new file mode 100644
index 000000000..bcf2807ba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.14624089, 4.7304125 , 4.833998  , 4.2321773 ,-2.0582533 ,-2.3694758 , 1.4213978 , 2.2444596 , 3.3630798 ,-0.70257574, 3.586656  ,-2.513805  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/2.txt
new file mode 100644
index 000000000..c3e32d2c5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 2.175218  , 0.02776978,-2.6291077 , 3.5350094 ,-1.2364857 ,-3.3151364 ,-0.92507887, 2.8038094 ,-1.8781518 , 3.6221995 , 2.4015775 ,-2.9217577 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/3.txt
new file mode 100644
index 000000000..a92abd4f6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/3.txt
@@ -0,0 +1 @@
+-1.0345451,-1.5055941,-4.144375 ,-4.727011 , 1.5841546, 4.5780725,-4.24402  ,-2.3966947,-3.0370803,-1.0234503,-0.2750057, 3.2965126
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/4.txt
new file mode 100644
index 000000000..2f2937fcb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.4460397 , 2.6090143 , 4.1773095 , 0.11204174,-3.3053472 , 2.5160108 ,-3.0612547 , 1.0667087 , 2.8952355 , 3.842513  , 0.6790793 ,-0.33375   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt
new file mode 100644
index 000000000..b6e2efa3d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-0.8596993, 4.8127713,-3.4127183, 4.2323627,-2.2201376,-1.5362649,-4.9921966, 0.9565166, 3.2879171,-1.3590081,-3.771852 ,-4.1042285
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt
new file mode 100644
index 000000000..bcf2807ba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.14624089, 4.7304125 , 4.833998  , 4.2321773 ,-2.0582533 ,-2.3694758 , 1.4213978 , 2.2444596 , 3.3630798 ,-0.70257574, 3.586656  ,-2.513805  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt
new file mode 100644
index 000000000..c3e32d2c5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 2.175218  , 0.02776978,-2.6291077 , 3.5350094 ,-1.2364857 ,-3.3151364 ,-0.92507887, 2.8038094 ,-1.8781518 , 3.6221995 , 2.4015775 ,-2.9217577 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt
new file mode 100644
index 000000000..a92abd4f6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-1.0345451,-1.5055941,-4.144375 ,-4.727011 , 1.5841546, 4.5780725,-4.24402  ,-2.3966947,-3.0370803,-1.0234503,-0.2750057, 3.2965126
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt
new file mode 100644
index 000000000..2f2937fcb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.4460397 , 2.6090143 , 4.1773095 , 0.11204174,-3.3053472 , 2.5160108 ,-3.0612547 , 1.0667087 , 2.8952355 , 3.842513  , 0.6790793 ,-0.33375   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt
new file mode 100644
index 000000000..a219546a1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-0.48516417,-4.5555663 ,-2.9907737 , 2.422857  , 1.010034  , 3.6436582 , 0.29334423,-4.0628953 , 1.0116768 , 3.0871766 , 3.3341465 , 4.3921704 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt
new file mode 100644
index 000000000..70d3139a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-0.7787985 , 4.101575  ,-0.4839729 , 0.35971674,-4.3452406 ,-4.811665  ,-3.8693128 , 4.239326  , 0.44103175, 3.5549765 , 2.5334291 , 1.4546562 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt
new file mode 100644
index 000000000..3c38f8d5d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 3.5943313,-1.4843192, 1.956341 ,-1.3242344, 1.5901331,-3.641623 , 4.6022506,-0.307265 ,-0.6359913,-4.0109854,-1.2064985, 1.1137954
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt
new file mode 100644
index 000000000..e89a022f5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.1036437 ,-0.39538398,-0.07278133, 4.547673  , 3.9132211 , 2.6468625 ,-4.2830634 ,-2.0573084 , 2.1074655 ,-4.0634165 ,-4.55598   ,-0.7942089 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt
new file mode 100644
index 000000000..2b00832cd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.7745228, 1.4813256, 4.4699864, 3.7466738,-2.9847758,-4.453416 , 3.2515864,-1.2459193,-4.44965  ,-1.8452735, 4.423347 , 4.2998137
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/0.txt
new file mode 100644
index 000000000..2a6b09b27
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.629505  , 1.0121975 ,-0.13417433,-2.329806  ,-3.4927373 ,-0.7574039 ,-2.2674313 , 3.1983519 , 2.4298382 ,-0.23268977, 2.0218065 ,-1.5087285 ,-1.3953347 ,-3.8100643 ,-1.7438283 , 3.9852605 , 2.9817178 ,-4.0460877 , 0.09402129, 4.3802586 ,-1.0991771 , 0.4134776 , 2.8136911 ,-3.6254618 ,-3.925183  , 4.691824  , 4.381538  ,-3.235543  ,-2.6764185 , 2.659456  ,-3.2127233 , 0.0206281 , 3.4056723 ,-1.693684  , 1.1005328 ,-3.1486542 , 0.77198106, 1.4526777 ,-2.3614178 , 4.8214664 ,-3.1486242 , 0.58941853,-4.1100698 , 4.1982718 , 1.7219902 ,-2.4375956 ,-1.7505955 , 1.7465224 ,-2.7494361 , 4.0679016 , 1.8936038 ,-4.523818  ,-3.4124248 ,-4.809946  ,-1.939553  , 4.9411273 , 1.6261404 ,-2.6846552 , 2.1339247 , 0.61396503,-1.6662381 , 2.4282491 , 2.662007  ,-0.40868336
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/1.txt
new file mode 100644
index 000000000..470da6c74
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.70593804, 3.253847  , 1.1094694 , 0.5295975 , 0.5944647 ,-2.4391694 , 4.7912955 , 4.4374456 ,-2.942428  ,-3.5038033 ,-3.180417  , 2.1914082 ,-4.5295396 ,-3.0037553 ,-2.265191  , 0.20113531, 2.3805366 ,-0.9111223 ,-4.3170924 , 4.08436   , 1.1006241 ,-1.286977  , 4.811279  , 0.9131829 , 3.2051497 ,-2.8660698 ,-3.188871  , 1.4163305 , 4.061829  , 2.7783196 ,-3.4975152 , 3.4888391 , 2.5789826 ,-1.5264264 ,-0.13952135,-1.280177  , 2.4716458 , 2.6200528 ,-2.515086  , 3.441416  , 2.4515297 ,-0.9845471 , 0.9481396 , 1.1518412 , 1.6088997 , 1.445077  , 2.2620194 ,-2.0843177 ,-0.7263964 , 1.8159748 ,-3.3673623 , 0.2554476 ,-4.3550563 ,-1.4280493 ,-2.2702312 ,-4.7424164 ,-0.57241255,-2.813357  , 2.9161859 ,-0.9036504 , 0.00511268, 0.60724795, 4.8010454 , 1.6000834 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/2.txt
new file mode 100644
index 000000000..d9e048b61
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/2.txt
@@ -0,0 +1 @@
+ 7.07888961e-01, 4.75798702e+00,-1.47843570e-01,-1.95845592e+00, 4.26537895e+00,-3.03711486e+00,-1.35137546e+00,-1.10638596e-01,-1.02415502e+00,-2.65345359e+00, 5.48920631e-01,-4.38003826e+00, 3.61377740e+00,-2.91408587e+00,-3.22874010e-01,-4.74363208e-01, 3.45294738e+00, 1.02204478e+00,-1.44102740e+00, 6.80687547e-01,-2.44050741e+00, 3.71395111e+00,-2.14443612e+00, 3.70928717e+00, 1.35871637e+00, 9.73374963e-01, 1.57826161e+00,-2.91381836e-01, 1.46376801e+00, 2.96391749e+00, 1.08418810e+00,-3.50718546e+00, 4.68637037e+00, 1.04839933e+00, 2.24482760e-01, 2.38816309e+00, 3.18772525e-01,-3.90284014e+00,-3.32757282e+00,-1.61143410e+00,-1.26013708e+00, 2.24948835e+00, 7.63151050e-01, 4.18296242e+00,-8.69123042e-01, 3.19850564e-01, 3.52391124e-01, 3.30018830e+00,-4.64861393e+00,-4.64479780e+00,-2.68103647e+00,-1.13277221e+00, 2.02201343e+00,-4.05572534e-01, 3.06759548e+00,-3.55881310e+00,-1.14900565e+00,-3.00835490e+00, 1.31509733e+00, 2.50206441e-01, 2.47731134e-01, 4.98673916e+00,-1.74064383e-01,-4.43180744e-03
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/3.txt
new file mode 100644
index 000000000..cdbf98e8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/3.txt
@@ -0,0 +1 @@
+ 3.5591762 , 4.8821726 , 0.44271094, 4.786732  ,-2.4497197 , 2.4973536 , 2.034311  , 4.8329844 ,-3.9451184 , 4.9937835 , 2.0246332 ,-2.8319602 , 3.9617133 , 4.10946   ,-4.3191586 ,-2.8492777 ,-2.648121  ,-4.199404  ,-0.05163948,-4.7944984 , 2.8989205 , 1.4747709 ,-3.1194637 ,-2.877846  ,-0.39301065, 2.616311  , 2.6305614 , 1.7303206 , 3.6059175 ,-2.745988  , 2.5924454 , 3.0149276 , 4.0359216 ,-0.6135884 ,-2.5023808 ,-2.3395267 ,-3.0633461 ,-2.3836162 ,-4.4779797 ,-1.30866   , 1.9110863 , 0.654628  ,-4.559368  , 0.34231895,-0.8196542 , 4.7275734 , 3.2823656 ,-4.9644713 , 2.9191613 ,-3.4621727 ,-4.276584  ,-1.7153062 , 1.8820064 , 1.2659297 , 3.4141889 ,-4.905296  , 4.619848  ,-3.9501083 ,-1.5550466 , 3.6841137 , 1.7121594 , 1.9466268 , 1.5684807 , 4.5554323 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/4.txt
new file mode 100644
index 000000000..065d77df6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.2269225 ,-1.2782103 ,-3.381931  ,-1.5229299 , 2.0681949 , 1.7630705 ,-0.81455594,-2.6558595 ,-3.4870632 ,-4.647749  , 2.4453654 ,-2.242679  ,-1.0272806 , 0.5656208 , 0.69442594,-4.4343104 ,-3.9649677 ,-3.8908577 ,-1.642287  , 3.0714357 , 1.0880747 ,-2.1665683 ,-4.0994506 , 2.004911  , 3.5922902 , 3.775     , 1.1580672 ,-1.4154137 ,-4.4964633 ,-1.696588  , 4.0220857 ,-1.2785947 ,-4.2075186 ,-4.515838  , 0.99715126, 3.0928102 ,-2.295537  ,-4.772882  ,-1.2936146 ,-2.6903791 , 0.10453273,-1.8041211 , 3.787591  , 0.9493053 ,-4.41586   , 3.4252715 ,-0.25001565, 4.655357  ,-1.8767506 , 0.00600041, 4.660605  , 2.550518  ,-3.830558  , 1.7777463 ,-0.7170577 ,-0.26554853,-3.5770113 ,-1.1354474 , 4.663121  , 3.100427  , 0.03313563,-1.7419808 ,-1.4426676 ,-3.912533  
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/0.txt
new file mode 100644
index 000000000..e42cbf88b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.1358833e+00, 1.7854472e+00, 4.1751757e+00, 5.5915713e-01,-2.6459083e-01,-1.7176826e+00,-1.8155930e+00, 2.8710868e+00,-2.7043006e+00, 1.0959731e+00,-2.0176995e+00,-6.5950048e-01,-3.6413522e+00,-4.1966043e+00,-2.6820884e+00,-3.6055098e+00, 3.6852844e+00, 8.9128174e-02, 1.3107824e+00,-3.6425626e+00,-3.2318896e-01, 3.6238370e+00,-4.9837337e+00,-4.0550299e+00,-1.4882606e+00, 1.5547658e+00,-1.1696080e+00, 2.1651111e+00, 4.9318314e+00,-3.5928023e+00,-1.2348548e+00,-1.7002642e+00, 1.7365140e+00,-8.8151926e-01,-4.1655774e+00,-1.0166957e+00,-3.7440193e+00, 2.8588972e+00, 4.1286149e+00,-4.9504828e+00, 4.8477168e+00,-2.2587967e+00, 2.8542519e+00,-7.9565448e-01, 6.8252671e-01, 2.5875571e-01,-6.3935977e-01,-4.8547015e+00, 4.1373856e-03,-1.3893708e+00, 8.8775367e-01, 2.1222150e-01, 3.1871333e+00, 1.3869151e+00,-3.8274391e+00, 3.2623324e+00, 7.2669631e-01, 1.0303619e+00, 8.1438148e-01, 8.1272924e-01,-2.7527118e+00, 1.8215455e+00,-1.6416427e-01, 4.9103169e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/1.txt
new file mode 100644
index 000000000..7caf8ce9e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/1.txt
@@ -0,0 +1 @@
+-4.250757  , 1.4186406 , 0.63726735,-0.35924944, 1.9436699 , 3.2695885 , 3.6638293 , 4.5166173 , 1.3807241 ,-1.9112543 ,-1.9026492 ,-0.4800549 , 2.818216  ,-4.6390033 ,-3.8570547 , 3.6634028 ,-1.2112037 ,-1.3335027 , 1.3524677 , 2.7240725 ,-3.8335826 , 1.1397903 ,-3.1570992 ,-4.802078  , 3.8334577 , 0.23457901, 0.7132307 , 2.9887354 , 2.9702394 ,-1.4113717 ,-0.66712093, 0.77366674, 1.9308351 ,-0.45465755, 4.925366  , 2.4214447 , 2.8401468 , 0.49789894, 0.53141665,-2.7466767 , 0.2059374 ,-4.9661317 ,-4.1334467 , 1.6928389 ,-0.42529574, 1.1033608 , 4.275776  , 1.5063075 , 2.3528252 , 0.79505247, 3.9829993 ,-4.8472476 ,-1.2752185 , 3.7365675 , 1.976164  ,-4.742636  ,-2.7199092 ,-2.9191706 ,-3.181069  ,-4.489485  , 4.0847454 , 2.2164    , 0.9725334 ,-0.72566307
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/2.txt
new file mode 100644
index 000000000..7facffa57
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/2.txt
@@ -0,0 +1 @@
+-3.8293874 ,-0.13678598,-2.5444264 , 1.654611  ,-4.3037786 ,-3.4240584 ,-4.5642533 , 4.1250315 , 1.0469195 , 4.2802887 , 3.1617825 ,-3.1706758 ,-0.99622065, 2.7707603 , 3.7494645 ,-1.4548893 , 2.328633  , 1.7976477 ,-1.2107176 ,-2.0178459 ,-0.6488357 ,-2.9393644 , 2.8918762 , 3.6192262 ,-4.1777225 , 1.3264071 , 0.32620123, 0.7890992 ,-3.304334  , 3.4893208 , 2.5354576 ,-4.7718143 , 3.8602633 , 0.4927564 , 2.2971296 ,-0.3296792 , 2.8115997 ,-0.75152504, 0.558675  ,-2.343631  , 4.650826  ,-3.0893488 , 0.8726873 , 0.24922371, 2.7634025 , 1.0358421 ,-3.862506  ,-3.169402  ,-2.5373347 , 0.9484093 , 4.1409917 ,-4.0408096 ,-2.7231216 ,-2.548547  ,-2.6315095 , 0.8164778 ,-3.017436  , 1.1860138 ,-1.8634807 , 1.8684052 , 1.8657844 , 1.7747321 ,-3.1472425 ,-1.3989028 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/3.txt
new file mode 100644
index 000000000..0be8fdd19
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.0492268 ,-2.2555764 ,-1.3543441 ,-3.7278662 ,-4.8601675 , 3.1095552 , 4.6319957 , 3.0211062 , 1.7870535 , 4.8839574 ,-1.3494394 , 2.635408  ,-0.24201432, 1.312397  , 0.16790341, 2.42507   ,-3.101355  , 3.1760497 ,-4.500736  ,-2.53691   , 1.064206  , 0.62096214, 2.803344  ,-4.6166744 ,-4.624786  , 3.667064  ,-1.484021  , 4.9401817 ,-3.763283  , 3.4351027 ,-2.906393  , 4.9945946 ,-3.2997096 , 3.6325612 ,-0.47211674, 0.28783202, 1.8703817 ,-4.042374  ,-3.3353784 , 4.9085765 ,-1.6753131 ,-3.4926984 ,-4.8663344 ,-4.495712  , 2.3402312 ,-1.0722051 , 0.28559962, 2.1208072 , 1.3024254 , 3.4810693 , 0.09860361, 1.695624  , 1.3901931 , 1.6858819 , 3.8231227 , 4.5972557 ,-4.6835494 , 0.5753765 ,-2.2377403 , 0.13013013,-2.1165738 ,-0.26044115,-0.653468  , 1.1010929 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/4.txt
new file mode 100644
index 000000000..7e2d618f9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 4.397323  ,-0.51448834, 2.5729322 ,-4.3229046 , 1.149113  ,-3.8652143 ,-1.7352968 ,-0.7575065 ,-0.41720778, 4.327346  ,-4.2363043 , 0.8653738 ,-1.7511971 ,-0.7874244 ,-4.0734816 , 2.5622475 ,-3.1229742 ,-1.1783633 , 0.4017013 ,-0.76175183,-1.058416  , 1.128772  ,-3.0143378 ,-2.6688366 ,-2.575279  ,-4.326955  , 4.175434  , 4.791393  ,-1.10654   ,-4.4417224 , 3.5057635 , 1.5339037 ,-4.0297494 ,-3.7187057 ,-0.6645762 , 4.215642  , 1.6742749 , 2.5468905 , 1.73195   ,-3.3100636 ,-4.4818826 ,-2.5627983 ,-1.4624406 , 1.2433167 ,-4.005364  ,-4.3450556 ,-1.0652863 ,-1.0240986 , 3.989825  ,-4.1690702 ,-4.595108  ,-1.1154945 , 0.65749156, 2.5127344 , 2.509761  ,-4.3936505 , 3.6513395 ,-2.3340352 ,-4.3615093 , 3.5973237 , 0.9316653 , 1.9391845 , 3.6356397 , 0.8133118 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..e42cbf88b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.1358833e+00, 1.7854472e+00, 4.1751757e+00, 5.5915713e-01,-2.6459083e-01,-1.7176826e+00,-1.8155930e+00, 2.8710868e+00,-2.7043006e+00, 1.0959731e+00,-2.0176995e+00,-6.5950048e-01,-3.6413522e+00,-4.1966043e+00,-2.6820884e+00,-3.6055098e+00, 3.6852844e+00, 8.9128174e-02, 1.3107824e+00,-3.6425626e+00,-3.2318896e-01, 3.6238370e+00,-4.9837337e+00,-4.0550299e+00,-1.4882606e+00, 1.5547658e+00,-1.1696080e+00, 2.1651111e+00, 4.9318314e+00,-3.5928023e+00,-1.2348548e+00,-1.7002642e+00, 1.7365140e+00,-8.8151926e-01,-4.1655774e+00,-1.0166957e+00,-3.7440193e+00, 2.8588972e+00, 4.1286149e+00,-4.9504828e+00, 4.8477168e+00,-2.2587967e+00, 2.8542519e+00,-7.9565448e-01, 6.8252671e-01, 2.5875571e-01,-6.3935977e-01,-4.8547015e+00, 4.1373856e-03,-1.3893708e+00, 8.8775367e-01, 2.1222150e-01, 3.1871333e+00, 1.3869151e+00,-3.8274391e+00, 3.2623324e+00, 7.2669631e-01, 1.0303619e+00, 8.1438148e-01, 8.1272924e-01,-2.7527118e+00, 1.8215455e+00,-1.6416427e-01, 4.9103169e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..7caf8ce9e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-4.250757  , 1.4186406 , 0.63726735,-0.35924944, 1.9436699 , 3.2695885 , 3.6638293 , 4.5166173 , 1.3807241 ,-1.9112543 ,-1.9026492 ,-0.4800549 , 2.818216  ,-4.6390033 ,-3.8570547 , 3.6634028 ,-1.2112037 ,-1.3335027 , 1.3524677 , 2.7240725 ,-3.8335826 , 1.1397903 ,-3.1570992 ,-4.802078  , 3.8334577 , 0.23457901, 0.7132307 , 2.9887354 , 2.9702394 ,-1.4113717 ,-0.66712093, 0.77366674, 1.9308351 ,-0.45465755, 4.925366  , 2.4214447 , 2.8401468 , 0.49789894, 0.53141665,-2.7466767 , 0.2059374 ,-4.9661317 ,-4.1334467 , 1.6928389 ,-0.42529574, 1.1033608 , 4.275776  , 1.5063075 , 2.3528252 , 0.79505247, 3.9829993 ,-4.8472476 ,-1.2752185 , 3.7365675 , 1.976164  ,-4.742636  ,-2.7199092 ,-2.9191706 ,-3.181069  ,-4.489485  , 4.0847454 , 2.2164    , 0.9725334 ,-0.72566307
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..7facffa57
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-3.8293874 ,-0.13678598,-2.5444264 , 1.654611  ,-4.3037786 ,-3.4240584 ,-4.5642533 , 4.1250315 , 1.0469195 , 4.2802887 , 3.1617825 ,-3.1706758 ,-0.99622065, 2.7707603 , 3.7494645 ,-1.4548893 , 2.328633  , 1.7976477 ,-1.2107176 ,-2.0178459 ,-0.6488357 ,-2.9393644 , 2.8918762 , 3.6192262 ,-4.1777225 , 1.3264071 , 0.32620123, 0.7890992 ,-3.304334  , 3.4893208 , 2.5354576 ,-4.7718143 , 3.8602633 , 0.4927564 , 2.2971296 ,-0.3296792 , 2.8115997 ,-0.75152504, 0.558675  ,-2.343631  , 4.650826  ,-3.0893488 , 0.8726873 , 0.24922371, 2.7634025 , 1.0358421 ,-3.862506  ,-3.169402  ,-2.5373347 , 0.9484093 , 4.1409917 ,-4.0408096 ,-2.7231216 ,-2.548547  ,-2.6315095 , 0.8164778 ,-3.017436  , 1.1860138 ,-1.8634807 , 1.8684052 , 1.8657844 , 1.7747321 ,-3.1472425 ,-1.3989028 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..0be8fdd19
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.0492268 ,-2.2555764 ,-1.3543441 ,-3.7278662 ,-4.8601675 , 3.1095552 , 4.6319957 , 3.0211062 , 1.7870535 , 4.8839574 ,-1.3494394 , 2.635408  ,-0.24201432, 1.312397  , 0.16790341, 2.42507   ,-3.101355  , 3.1760497 ,-4.500736  ,-2.53691   , 1.064206  , 0.62096214, 2.803344  ,-4.6166744 ,-4.624786  , 3.667064  ,-1.484021  , 4.9401817 ,-3.763283  , 3.4351027 ,-2.906393  , 4.9945946 ,-3.2997096 , 3.6325612 ,-0.47211674, 0.28783202, 1.8703817 ,-4.042374  ,-3.3353784 , 4.9085765 ,-1.6753131 ,-3.4926984 ,-4.8663344 ,-4.495712  , 2.3402312 ,-1.0722051 , 0.28559962, 2.1208072 , 1.3024254 , 3.4810693 , 0.09860361, 1.695624  , 1.3901931 , 1.6858819 , 3.8231227 , 4.5972557 ,-4.6835494 , 0.5753765 ,-2.2377403 , 0.13013013,-2.1165738 ,-0.26044115,-0.653468  , 1.1010929 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..7e2d618f9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 4.397323  ,-0.51448834, 2.5729322 ,-4.3229046 , 1.149113  ,-3.8652143 ,-1.7352968 ,-0.7575065 ,-0.41720778, 4.327346  ,-4.2363043 , 0.8653738 ,-1.7511971 ,-0.7874244 ,-4.0734816 , 2.5622475 ,-3.1229742 ,-1.1783633 , 0.4017013 ,-0.76175183,-1.058416  , 1.128772  ,-3.0143378 ,-2.6688366 ,-2.575279  ,-4.326955  , 4.175434  , 4.791393  ,-1.10654   ,-4.4417224 , 3.5057635 , 1.5339037 ,-4.0297494 ,-3.7187057 ,-0.6645762 , 4.215642  , 1.6742749 , 2.5468905 , 1.73195   ,-3.3100636 ,-4.4818826 ,-2.5627983 ,-1.4624406 , 1.2433167 ,-4.005364  ,-4.3450556 ,-1.0652863 ,-1.0240986 , 3.989825  ,-4.1690702 ,-4.595108  ,-1.1154945 , 0.65749156, 2.5127344 , 2.509761  ,-4.3936505 , 3.6513395 ,-2.3340352 ,-4.3615093 , 3.5973237 , 0.9316653 , 1.9391845 , 3.6356397 , 0.8133118 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt
new file mode 100644
index 000000000..2a6b09b27
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.629505  , 1.0121975 ,-0.13417433,-2.329806  ,-3.4927373 ,-0.7574039 ,-2.2674313 , 3.1983519 , 2.4298382 ,-0.23268977, 2.0218065 ,-1.5087285 ,-1.3953347 ,-3.8100643 ,-1.7438283 , 3.9852605 , 2.9817178 ,-4.0460877 , 0.09402129, 4.3802586 ,-1.0991771 , 0.4134776 , 2.8136911 ,-3.6254618 ,-3.925183  , 4.691824  , 4.381538  ,-3.235543  ,-2.6764185 , 2.659456  ,-3.2127233 , 0.0206281 , 3.4056723 ,-1.693684  , 1.1005328 ,-3.1486542 , 0.77198106, 1.4526777 ,-2.3614178 , 4.8214664 ,-3.1486242 , 0.58941853,-4.1100698 , 4.1982718 , 1.7219902 ,-2.4375956 ,-1.7505955 , 1.7465224 ,-2.7494361 , 4.0679016 , 1.8936038 ,-4.523818  ,-3.4124248 ,-4.809946  ,-1.939553  , 4.9411273 , 1.6261404 ,-2.6846552 , 2.1339247 , 0.61396503,-1.6662381 , 2.4282491 , 2.662007  ,-0.40868336
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt
new file mode 100644
index 000000000..470da6c74
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.70593804, 3.253847  , 1.1094694 , 0.5295975 , 0.5944647 ,-2.4391694 , 4.7912955 , 4.4374456 ,-2.942428  ,-3.5038033 ,-3.180417  , 2.1914082 ,-4.5295396 ,-3.0037553 ,-2.265191  , 0.20113531, 2.3805366 ,-0.9111223 ,-4.3170924 , 4.08436   , 1.1006241 ,-1.286977  , 4.811279  , 0.9131829 , 3.2051497 ,-2.8660698 ,-3.188871  , 1.4163305 , 4.061829  , 2.7783196 ,-3.4975152 , 3.4888391 , 2.5789826 ,-1.5264264 ,-0.13952135,-1.280177  , 2.4716458 , 2.6200528 ,-2.515086  , 3.441416  , 2.4515297 ,-0.9845471 , 0.9481396 , 1.1518412 , 1.6088997 , 1.445077  , 2.2620194 ,-2.0843177 ,-0.7263964 , 1.8159748 ,-3.3673623 , 0.2554476 ,-4.3550563 ,-1.4280493 ,-2.2702312 ,-4.7424164 ,-0.57241255,-2.813357  , 2.9161859 ,-0.9036504 , 0.00511268, 0.60724795, 4.8010454 , 1.6000834 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt
new file mode 100644
index 000000000..d9e048b61
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 7.07888961e-01, 4.75798702e+00,-1.47843570e-01,-1.95845592e+00, 4.26537895e+00,-3.03711486e+00,-1.35137546e+00,-1.10638596e-01,-1.02415502e+00,-2.65345359e+00, 5.48920631e-01,-4.38003826e+00, 3.61377740e+00,-2.91408587e+00,-3.22874010e-01,-4.74363208e-01, 3.45294738e+00, 1.02204478e+00,-1.44102740e+00, 6.80687547e-01,-2.44050741e+00, 3.71395111e+00,-2.14443612e+00, 3.70928717e+00, 1.35871637e+00, 9.73374963e-01, 1.57826161e+00,-2.91381836e-01, 1.46376801e+00, 2.96391749e+00, 1.08418810e+00,-3.50718546e+00, 4.68637037e+00, 1.04839933e+00, 2.24482760e-01, 2.38816309e+00, 3.18772525e-01,-3.90284014e+00,-3.32757282e+00,-1.61143410e+00,-1.26013708e+00, 2.24948835e+00, 7.63151050e-01, 4.18296242e+00,-8.69123042e-01, 3.19850564e-01, 3.52391124e-01, 3.30018830e+00,-4.64861393e+00,-4.64479780e+00,-2.68103647e+00,-1.13277221e+00, 2.02201343e+00,-4.05572534e-01, 3.06759548e+00,-3.55881310e+00,-1.14900565e+00,-3.00835490e+00, 1.31509733e+00, 2.50206441e-01, 2.47731134e-01, 4.98673916e+00,-1.74064383e-01,-4.43180744e-03
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt
new file mode 100644
index 000000000..cdbf98e8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.5591762 , 4.8821726 , 0.44271094, 4.786732  ,-2.4497197 , 2.4973536 , 2.034311  , 4.8329844 ,-3.9451184 , 4.9937835 , 2.0246332 ,-2.8319602 , 3.9617133 , 4.10946   ,-4.3191586 ,-2.8492777 ,-2.648121  ,-4.199404  ,-0.05163948,-4.7944984 , 2.8989205 , 1.4747709 ,-3.1194637 ,-2.877846  ,-0.39301065, 2.616311  , 2.6305614 , 1.7303206 , 3.6059175 ,-2.745988  , 2.5924454 , 3.0149276 , 4.0359216 ,-0.6135884 ,-2.5023808 ,-2.3395267 ,-3.0633461 ,-2.3836162 ,-4.4779797 ,-1.30866   , 1.9110863 , 0.654628  ,-4.559368  , 0.34231895,-0.8196542 , 4.7275734 , 3.2823656 ,-4.9644713 , 2.9191613 ,-3.4621727 ,-4.276584  ,-1.7153062 , 1.8820064 , 1.2659297 , 3.4141889 ,-4.905296  , 4.619848  ,-3.9501083 ,-1.5550466 , 3.6841137 , 1.7121594 , 1.9466268 , 1.5684807 , 4.5554323 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt
new file mode 100644
index 000000000..065d77df6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.2269225 ,-1.2782103 ,-3.381931  ,-1.5229299 , 2.0681949 , 1.7630705 ,-0.81455594,-2.6558595 ,-3.4870632 ,-4.647749  , 2.4453654 ,-2.242679  ,-1.0272806 , 0.5656208 , 0.69442594,-4.4343104 ,-3.9649677 ,-3.8908577 ,-1.642287  , 3.0714357 , 1.0880747 ,-2.1665683 ,-4.0994506 , 2.004911  , 3.5922902 , 3.775     , 1.1580672 ,-1.4154137 ,-4.4964633 ,-1.696588  , 4.0220857 ,-1.2785947 ,-4.2075186 ,-4.515838  , 0.99715126, 3.0928102 ,-2.295537  ,-4.772882  ,-1.2936146 ,-2.6903791 , 0.10453273,-1.8041211 , 3.787591  , 0.9493053 ,-4.41586   , 3.4252715 ,-0.25001565, 4.655357  ,-1.8767506 , 0.00600041, 4.660605  , 2.550518  ,-3.830558  , 1.7777463 ,-0.7170577 ,-0.26554853,-3.5770113 ,-1.1354474 , 4.663121  , 3.100427  , 0.03313563,-1.7419808 ,-1.4426676 ,-3.912533  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/0.txt
new file mode 100644
index 000000000..af1c2dff8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.0575085 , 2.5941508 ,-2.550309  ,-0.03760919
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/1.txt
new file mode 100644
index 000000000..0ede613ac
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.4857123,-4.032874 ,-3.687589 ,-1.235227 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/2.txt
new file mode 100644
index 000000000..b0b0392ba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/2.txt
@@ -0,0 +1 @@
+ 0.21878362, 3.9175916 ,-4.6141233 , 3.709655  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/3.txt
new file mode 100644
index 000000000..d8a8cad12
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/3.txt
@@ -0,0 +1 @@
+-1.9645791,-1.4466153, 1.2543651,-1.0288917
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/4.txt
new file mode 100644
index 000000000..ca2a1c3b4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.1611342, 2.4875243, 3.096089 ,-1.1327268
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/0.txt
new file mode 100644
index 000000000..9def1c2eb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/0.txt
@@ -0,0 +1 @@
+0.24671102,3.271825  ,3.979895  ,1.3334678 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/1.txt
new file mode 100644
index 000000000..eaec2409f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 1.9181111, 2.2396102,-2.8641696,-1.9045062
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/2.txt
new file mode 100644
index 000000000..3e05181cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/2.txt
@@ -0,0 +1 @@
+4.751434  ,2.8798263 ,0.15149078,2.9485583 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/3.txt
new file mode 100644
index 000000000..19d95b267
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/3.txt
@@ -0,0 +1 @@
+-1.5743442 , 0.6716824 , 0.75737774,-0.27396253
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/4.txt
new file mode 100644
index 000000000..d302e07a9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001/layer/uint8/4.txt
@@ -0,0 +1 @@
+-1.0539489 , 1.9595883 , 0.19975437, 2.526178  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt
new file mode 100644
index 000000000..9def1c2eb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.24671102,3.271825  ,3.979895  ,1.3334678 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt
new file mode 100644
index 000000000..eaec2409f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 1.9181111, 2.2396102,-2.8641696,-1.9045062
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt
new file mode 100644
index 000000000..3e05181cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+4.751434  ,2.8798263 ,0.15149078,2.9485583 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt
new file mode 100644
index 000000000..19d95b267
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-1.5743442 , 0.6716824 , 0.75737774,-0.27396253
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt
new file mode 100644
index 000000000..d302e07a9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-1.0539489 , 1.9595883 , 0.19975437, 2.526178  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt
new file mode 100644
index 000000000..af1c2dff8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.0575085 , 2.5941508 ,-2.550309  ,-0.03760919
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt
new file mode 100644
index 000000000..0ede613ac
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.4857123,-4.032874 ,-3.687589 ,-1.235227 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt
new file mode 100644
index 000000000..b0b0392ba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 0.21878362, 3.9175916 ,-4.6141233 , 3.709655  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt
new file mode 100644
index 000000000..d8a8cad12
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-1.9645791,-1.4466153, 1.2543651,-1.0288917
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt
new file mode 100644
index 000000000..ca2a1c3b4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.1611342, 2.4875243, 3.096089 ,-1.1327268
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/0.txt
new file mode 100644
index 000000000..f82ad6704
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/0.txt
@@ -0,0 +1 @@
+ 1.4040831 , 4.8621206 , 0.22880335,-0.3116556 , 0.260938  ,-0.61554366, 3.779648  ,-4.650609  , 3.886638  ,-0.25574106,-0.45002133, 4.9870906 ,-2.3277295 ,-4.9648423 ,-3.7695415 , 3.2857463 ,-4.5514555 ,-3.7705963 , 3.8458307 ,-4.797776  ,-3.4295716 ,-4.6026535 ,-1.4011091 , 2.8851774 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/1.txt
new file mode 100644
index 000000000..722337286
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/1.txt
@@ -0,0 +1 @@
+-4.171929  ,-2.2911541 , 2.8965824 , 0.27504483,-1.6088463 ,-0.6509234 ,-3.262618  , 0.9633116 , 2.4504175 , 0.97706884, 0.4212074 , 1.4083375 ,-2.9757218 ,-3.1010823 ,-1.7146534 , 4.105306  , 0.07195274, 3.0232217 ,-2.7568955 ,-4.8887763 ,-3.4171093 ,-0.91494775, 2.5260248 , 4.74184   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/2.txt
new file mode 100644
index 000000000..1283a8ad1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/2.txt
@@ -0,0 +1 @@
+ 0.14139967, 1.9541235 ,-4.945228  ,-0.48999134, 3.7479703 , 0.29318067, 0.21036309, 4.357736  ,-4.3354783 ,-1.9236348 , 0.49615476,-1.8418436 ,-2.425741  , 4.817022  , 1.5093465 , 2.417444  ,-4.69463   , 0.3433745 ,-4.5979595 ,-3.9027495 ,-0.29977685, 4.9239326 ,-0.39175773, 1.277211  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/3.txt
new file mode 100644
index 000000000..c931e1752
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/3.txt
@@ -0,0 +1 @@
+-3.692852  ,-1.0075341 ,-2.4409268 , 0.92995465,-3.1325107 , 4.028981  , 0.8446181 ,-2.2990613 , 4.0820794 , 3.1633005 , 4.1527267 ,-3.9514909 , 2.6104712 , 4.660645  ,-1.7398617 , 0.15663597,-3.6861904 ,-2.9019265 , 3.8828175 ,-2.712909  , 4.3699546 ,-3.5953352 ,-3.0655813 , 0.59767616
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/4.txt
new file mode 100644
index 000000000..d33c2dbec
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.8695228 , 2.865197  , 0.6635586 , 0.22709726, 2.85572   ,-4.2051144 , 1.5833759 ,-4.4277377 , 4.0004573 , 2.4766827 , 3.0412688 ,-4.8891425 ,-4.489896  , 3.0812325 , 2.1947708 , 1.6387184 , 0.31932488,-0.41092923,-0.0730476 , 0.7265327 , 4.1333    , 3.157228  , 4.7395325 , 3.4576747 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt
new file mode 100644
index 000000000..0614b5e83
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.01090685,0.0581577 ,0.637094  ,0.64067715,0.26264507,0.13692169,0.9649414 ,0.5117181 ,0.18012471,0.07855253,0.6358017 ,0.62257963,0.41469443,0.93169045,0.20763828,0.7634293 ,0.75929826,0.72708374,0.23463063,0.58222896,0.6351517 ,0.68781173,0.5558012 ,0.7652179 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt
new file mode 100644
index 000000000..b1c39382f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt
@@ -0,0 +1 @@
+0.57017624,0.08235867,0.03672464,0.40372616,0.7353964 ,0.59611887,0.7675548 ,0.21004233,0.09803218,0.20009473,0.8821493 ,0.17015271,0.14840214,0.99910176,0.37003204,0.22893582,0.43173164,0.3105084 ,0.41997132,0.43714985,0.08115962,0.71896386,0.7810953 ,0.00524598
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt
new file mode 100644
index 000000000..7e562de75
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt
@@ -0,0 +1 @@
+0.65292275,0.79842275,0.97853714,0.6711518 ,0.607567  ,0.40971732,0.74838483,0.95853555,0.32158023,0.911524  ,0.66938365,0.8573132 ,0.3047727 ,0.5561248 ,0.914098  ,0.07650814,0.37868017,0.29269257,0.19652605,0.63025194,0.61496884,0.32011527,0.8204132 ,0.21866946
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt
new file mode 100644
index 000000000..2958a7f54
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt
@@ -0,0 +1 @@
+0.4548901 ,0.56957537,0.0252368 ,0.4884317 ,0.7516498 ,0.02631272,0.22107519,0.95249426,0.34902394,0.11520014,0.808911  ,0.4148615 ,0.63615656,0.84020686,0.3633697 ,0.23993976,0.54176176,0.86938345,0.81628686,0.6380988 ,0.91891205,0.0406627 ,0.90289026,0.9429013 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt
new file mode 100644
index 000000000..fc969308e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt
@@ -0,0 +1 @@
+0.9309136 ,0.02123719,0.64467335,0.6910113 ,0.47402772,0.54622203,0.31527275,0.81530565,0.98981965,0.36102158,0.03114039,0.1902339 ,0.45183742,0.60178596,0.4683102 ,0.59810966,0.40558222,0.5420302 ,0.72699505,0.9575108 ,0.46746576,0.08518691,0.40302262,0.69213694
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt
new file mode 100644
index 000000000..f82ad6704
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.4040831 , 4.8621206 , 0.22880335,-0.3116556 , 0.260938  ,-0.61554366, 3.779648  ,-4.650609  , 3.886638  ,-0.25574106,-0.45002133, 4.9870906 ,-2.3277295 ,-4.9648423 ,-3.7695415 , 3.2857463 ,-4.5514555 ,-3.7705963 , 3.8458307 ,-4.797776  ,-3.4295716 ,-4.6026535 ,-1.4011091 , 2.8851774 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt
new file mode 100644
index 000000000..722337286
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-4.171929  ,-2.2911541 , 2.8965824 , 0.27504483,-1.6088463 ,-0.6509234 ,-3.262618  , 0.9633116 , 2.4504175 , 0.97706884, 0.4212074 , 1.4083375 ,-2.9757218 ,-3.1010823 ,-1.7146534 , 4.105306  , 0.07195274, 3.0232217 ,-2.7568955 ,-4.8887763 ,-3.4171093 ,-0.91494775, 2.5260248 , 4.74184   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt
new file mode 100644
index 000000000..1283a8ad1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 0.14139967, 1.9541235 ,-4.945228  ,-0.48999134, 3.7479703 , 0.29318067, 0.21036309, 4.357736  ,-4.3354783 ,-1.9236348 , 0.49615476,-1.8418436 ,-2.425741  , 4.817022  , 1.5093465 , 2.417444  ,-4.69463   , 0.3433745 ,-4.5979595 ,-3.9027495 ,-0.29977685, 4.9239326 ,-0.39175773, 1.277211  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt
new file mode 100644
index 000000000..c931e1752
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-3.692852  ,-1.0075341 ,-2.4409268 , 0.92995465,-3.1325107 , 4.028981  , 0.8446181 ,-2.2990613 , 4.0820794 , 3.1633005 , 4.1527267 ,-3.9514909 , 2.6104712 , 4.660645  ,-1.7398617 , 0.15663597,-3.6861904 ,-2.9019265 , 3.8828175 ,-2.712909  , 4.3699546 ,-3.5953352 ,-3.0655813 , 0.59767616
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt
new file mode 100644
index 000000000..d33c2dbec
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.8695228 , 2.865197  , 0.6635586 , 0.22709726, 2.85572   ,-4.2051144 , 1.5833759 ,-4.4277377 , 4.0004573 , 2.4766827 , 3.0412688 ,-4.8891425 ,-4.489896  , 3.0812325 , 2.1947708 , 1.6387184 , 0.31932488,-0.41092923,-0.0730476 , 0.7265327 , 4.1333    , 3.157228  , 4.7395325 , 3.4576747 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/0.txt
new file mode 100644
index 000000000..cc434b0a8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.0618963 ,-0.56899416,-2.6450877 , 2.4534085 , 1.98115   , 1.906561  ,-3.9617727 ,-0.6071247 , 3.1096997 , 4.4270124 ,-2.8755112 ,-1.8822336 ,-2.3567479 , 1.9797888 ,-3.5018713 , 3.429169  
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/1.txt
new file mode 100644
index 000000000..2c637a1d2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/1.txt
@@ -0,0 +1 @@
+-1.6089132 , 1.4328785 ,-3.2579598 ,-2.1328773 ,-2.6566415 , 2.541386  ,-4.3314023 , 0.48684084, 3.3134763 ,-2.69083   ,-0.45710313,-3.6763198 , 0.22075526,-3.159208  ,-2.1573126 , 4.1621423 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/2.txt
new file mode 100644
index 000000000..4b57fe8e0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/2.txt
@@ -0,0 +1 @@
+-4.061572  , 3.0518744 , 2.694435  ,-4.720131  , 1.3782452 , 4.083631  , 4.1221976 ,-1.2299284 , 3.096133  , 3.8382158 ,-1.9518853 , 4.350529  , 0.09219506, 2.6483617 , 0.74373996, 2.7447948 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/3.txt
new file mode 100644
index 000000000..49c3022c2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/3.txt
@@ -0,0 +1 @@
+ 4.68769   ,-3.2768764 , 3.1849844 , 4.497627  ,-1.2611016 ,-3.1152303 ,-0.8408633 , 0.4938034 , 4.0921655 ,-2.3150117 , 0.10100875,-3.8374226 , 4.08059   ,-0.74594986,-3.1000822 , 4.3654246 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/4.txt
new file mode 100644
index 000000000..e02c8ca16
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/int16/4.txt
@@ -0,0 +1 @@
+-3.6168842 , 4.1935644 , 0.73750836, 4.6044145 , 2.8967912 ,-1.8085694 , 4.539956  ,-0.37032878, 1.9738418 , 1.5388782 ,-2.945171  ,-3.3875864 ,-4.516983  ,-3.4998245 ,-4.676514  ,-2.2738194 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt
new file mode 100644
index 000000000..f4fb503ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt
new file mode 100644
index 000000000..af4b01576
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt
@@ -0,0 +1 @@
+0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375  ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt
new file mode 100644
index 000000000..57716034e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt
@@ -0,0 +1 @@
+0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829  ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt
new file mode 100644
index 000000000..1e03d83b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt
@@ -0,0 +1 @@
+0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597  ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt
new file mode 100644
index 000000000..89ee30a6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt
@@ -0,0 +1 @@
+0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt
new file mode 100644
index 000000000..cc434b0a8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.0618963 ,-0.56899416,-2.6450877 , 2.4534085 , 1.98115   , 1.906561  ,-3.9617727 ,-0.6071247 , 3.1096997 , 4.4270124 ,-2.8755112 ,-1.8822336 ,-2.3567479 , 1.9797888 ,-3.5018713 , 3.429169  
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt
new file mode 100644
index 000000000..2c637a1d2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-1.6089132 , 1.4328785 ,-3.2579598 ,-2.1328773 ,-2.6566415 , 2.541386  ,-4.3314023 , 0.48684084, 3.3134763 ,-2.69083   ,-0.45710313,-3.6763198 , 0.22075526,-3.159208  ,-2.1573126 , 4.1621423 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt
new file mode 100644
index 000000000..4b57fe8e0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-4.061572  , 3.0518744 , 2.694435  ,-4.720131  , 1.3782452 , 4.083631  , 4.1221976 ,-1.2299284 , 3.096133  , 3.8382158 ,-1.9518853 , 4.350529  , 0.09219506, 2.6483617 , 0.74373996, 2.7447948 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt
new file mode 100644
index 000000000..49c3022c2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.68769   ,-3.2768764 , 3.1849844 , 4.497627  ,-1.2611016 ,-3.1152303 ,-0.8408633 , 0.4938034 , 4.0921655 ,-2.3150117 , 0.10100875,-3.8374226 , 4.08059   ,-0.74594986,-3.1000822 , 4.3654246 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt
new file mode 100644
index 000000000..e02c8ca16
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-3.6168842 , 4.1935644 , 0.73750836, 4.6044145 , 2.8967912 ,-1.8085694 , 4.539956  ,-0.37032878, 1.9738418 , 1.5388782 ,-2.945171  ,-3.3875864 ,-4.516983  ,-3.4998245 ,-4.676514  ,-2.2738194 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/0.txt
new file mode 100644
index 000000000..18b34c8b1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/0.txt
@@ -0,0 +1 @@
+ 1.5887886e+00,-4.7446389e+00,-8.6568648e-01,-2.9789083e+00, 4.4470620e+00,-4.6563668e+00,-3.8466794e+00, 1.8815753e-03,-2.7699089e+00, 5.2776605e-01, 3.6518128e+00,-3.0939088e+00,-3.6008542e+00, 7.2454107e-01, 2.2568390e+00,-4.4835806e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/1.txt
new file mode 100644
index 000000000..d652da699
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/1.txt
@@ -0,0 +1 @@
+ 4.770412  ,-1.7520845 , 2.4057522 ,-0.74166125,-0.10780027, 4.5796657 ,-3.513094  ,-3.0285823 , 1.2001143 , 2.806742  ,-2.0503895 , 2.8160958 ,-1.5392824 ,-3.7772799 , 2.9158401 ,-1.0586692 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/2.txt
new file mode 100644
index 000000000..e6d6e004f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/2.txt
@@ -0,0 +1 @@
+ 3.937408  ,-0.11191579, 2.2054992 , 2.847275  , 3.4895647 , 4.2361116 ,-3.2401278 ,-1.5813186 ,-4.558396  ,-0.89455926, 4.204445  , 3.5968838 , 2.773891  ,-2.9562843 ,-0.62606305,-0.03814701
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/3.txt
new file mode 100644
index 000000000..8b472058e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/3.txt
@@ -0,0 +1 @@
+ 3.5032003 , 4.6036057 , 0.28915945, 4.671659  ,-1.978598  , 2.1773603 ,-0.54175234,-3.0131943 ,-2.7422159 ,-3.4361897 , 0.2850049 , 4.1412387 ,-4.86403   ,-0.67577606,-1.4206086 ,-2.357092  
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/4.txt
new file mode 100644
index 000000000..bba80be5f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/int16/4.txt
@@ -0,0 +1 @@
+ 2.5063417 , 0.22874236, 2.2677753 ,-4.4159026 , 1.7464    , 4.6051064 ,-4.2867146 , 2.730521  , 1.6372519 , 0.70292765, 3.459053  ,-4.162376  , 0.36788836, 2.213299  , 4.110952  , 1.6797827 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt
new file mode 100644
index 000000000..233e5eae3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt
@@ -0,0 +1 @@
+ 2.7731526 , 2.451602  , 3.7535272 ,-1.2774152 , 1.5482912 , 1.3402948 , 4.4792123 ,-4.4954367 , 3.354679  ,-3.3615496 ,-4.619757  ,-3.3659618 , 4.7626247 ,-1.3596478 ,-4.835548  , 0.78964525
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt
new file mode 100644
index 000000000..6a126081d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.5400839 ,-3.2621996 ,-3.4817135 , 3.8183312 , 0.48498327, 2.9812584 , 4.111276  , 0.11223658, 4.7201405 , 2.4256718 , 1.4895477 , 4.7596602 ,-0.32709372, 1.3507305 ,-0.30043927,-1.8077502 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt
new file mode 100644
index 000000000..eccd2c625
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 3.8758078 , 4.978636  ,-0.22925885,-2.6760504 ,-1.9160627 ,-4.609644  ,-0.9515802 , 3.558274  , 2.9096057 , 0.3340422 , 0.38608226,-0.32168412, 4.688853  ,-4.583811  ,-2.5113506 ,-4.6688786 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt
new file mode 100644
index 000000000..0da05277c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.9868221 , 2.4237797 , 1.0833962 ,-0.9231426 ,-2.1091506 ,-2.6163697 ,-0.23101932,-1.9252896 , 4.7034135 , 3.1088963 ,-2.345823  ,-2.7866168 ,-3.186763  ,-4.431844  , 3.3113294 , 0.9501982 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt
new file mode 100644
index 000000000..ace24f7c1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 3.9716747 ,-2.254871  , 1.1943274 ,-2.212602  , 3.4311683 , 1.114989  , 4.0739036 , 0.47244295,-3.5793104 ,-3.359908  ,-4.7657595 , 2.0369127 ,-2.5619278 ,-3.4452975 ,-4.5852203 ,-1.137643  
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt
new file mode 100644
index 000000000..18b34c8b1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.5887886e+00,-4.7446389e+00,-8.6568648e-01,-2.9789083e+00, 4.4470620e+00,-4.6563668e+00,-3.8466794e+00, 1.8815753e-03,-2.7699089e+00, 5.2776605e-01, 3.6518128e+00,-3.0939088e+00,-3.6008542e+00, 7.2454107e-01, 2.2568390e+00,-4.4835806e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt
new file mode 100644
index 000000000..d652da699
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 4.770412  ,-1.7520845 , 2.4057522 ,-0.74166125,-0.10780027, 4.5796657 ,-3.513094  ,-3.0285823 , 1.2001143 , 2.806742  ,-2.0503895 , 2.8160958 ,-1.5392824 ,-3.7772799 , 2.9158401 ,-1.0586692 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt
new file mode 100644
index 000000000..e6d6e004f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 3.937408  ,-0.11191579, 2.2054992 , 2.847275  , 3.4895647 , 4.2361116 ,-3.2401278 ,-1.5813186 ,-4.558396  ,-0.89455926, 4.204445  , 3.5968838 , 2.773891  ,-2.9562843 ,-0.62606305,-0.03814701
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt
new file mode 100644
index 000000000..8b472058e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.5032003 , 4.6036057 , 0.28915945, 4.671659  ,-1.978598  , 2.1773603 ,-0.54175234,-3.0131943 ,-2.7422159 ,-3.4361897 , 0.2850049 , 4.1412387 ,-4.86403   ,-0.67577606,-1.4206086 ,-2.357092  
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt
new file mode 100644
index 000000000..bba80be5f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 2.5063417 , 0.22874236, 2.2677753 ,-4.4159026 , 1.7464    , 4.6051064 ,-4.2867146 , 2.730521  , 1.6372519 , 0.70292765, 3.459053  ,-4.162376  , 0.36788836, 2.213299  , 4.110952  , 1.6797827 
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt
new file mode 100644
index 000000000..5e926a2d9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.15500909,0.32379007,0.12717001,0.60674316,0.07691418,0.437071  ,0.3737046 ,0.798342  ,0.65901846,0.40579247,0.15460491,0.80063623,0.591834  ,0.6617658 ,0.5617774 ,0.44884747,0.7996519 ,0.75895494,0.6239346 ,0.56500244,0.8955974 ,0.32503998,0.05756519,0.11889575,0.19635268,0.33958906,0.916527  ,0.16366032,0.51954055,0.2615102 ,0.07677322,0.6970092 ,0.27848312,0.97694606,0.73990864,0.96292055
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt
new file mode 100644
index 000000000..eb5de0c0e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.85332185,0.03102963,0.54344934,0.6300742 ,0.3323267 ,0.1701224 ,0.36199054,0.23949413,0.11960976,0.668403  ,0.7907452 ,0.4377144 ,0.87145853,0.75605077,0.37314144,0.3622036 ,0.4321453 ,0.8770253 ,0.10936793,0.0734281 ,0.2922192 ,0.5829591 ,0.5422962 ,0.84274834,0.48475483,0.23154257,0.20037153,0.27911612,0.30018023,0.23753181,0.98804647,0.61455756,0.90376633,0.8255312 ,0.21020697,0.6272272 
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt
new file mode 100644
index 000000000..16561ef0d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.29736656,0.5712386 ,0.55447775,0.9014779 ,0.6208391 ,0.3413809 ,0.043885  ,0.5474101 ,0.8642339 ,0.05225753,0.36101478,0.15561381,0.776422  ,0.9997885 ,0.35188794,0.23418508,0.0882741 ,0.5797471 ,0.99945694,0.22190607,0.12337059,0.3701574 ,0.65161157,0.9830193 ,0.46270686,0.10077237,0.23681253,0.8734158 ,0.8358533 ,0.08817147,0.3845248 ,0.12799203,0.66830546,0.14838815,0.90201443,0.21123447
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt
new file mode 100644
index 000000000..deba38b2d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.92424273,0.35776526,0.0776509 ,0.93697083,0.6559925 ,0.78421926,0.7511033 ,0.71389145,0.52217877,0.41876563,0.3560251 ,0.5862293 ,0.53027606,0.32203177,0.24654935,0.55851364,0.35312092,0.38102064,0.21245371,0.87299466,0.94972914,0.54950166,0.3445233 ,0.98951054,0.37458083,0.3778964 ,0.64035404,0.10410193,0.18511558,0.1942945 ,0.07018933,0.6113747 ,0.38076922,0.08337755,0.98258   ,0.91440874
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt
new file mode 100644
index 000000000..78b783a74
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.3790198 ,0.6347678 ,0.42544237,0.37033263,0.08057033,0.49041638,0.61705315,0.15411597,0.6455052 ,0.6857795 ,0.9613043 ,0.60357374,0.57679754,0.22550431,0.05105425,0.8641173 ,0.65559083,0.18274343,0.8963692 ,0.22369736,0.3133119 ,0.27507883,0.00539197,0.6846556 ,0.5969273 ,0.78488904,0.87746257,0.15459861,0.23133573,0.59048635,0.07172906,0.28935516,0.02084327,0.09926946,0.02687503,0.7306079 
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt
new file mode 100644
index 000000000..25b600c5f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt
@@ -0,0 +1 @@
+0.641226  ,0.68639857,0.87044334,0.9448475 ,0.21544299,0.5202749 ,0.5077167 ,0.23931624,0.5712026 ,0.4167988 ,0.56711906,0.52392703,0.42762014,0.5277072 ,0.03028643,0.18017273,0.8823869 ,0.5752544 ,0.09368648,0.50277   ,0.784248  ,0.04220072,0.55217946,0.75145644,0.7957966 ,0.6563401 ,0.54975605,0.17231019,0.4219812 ,0.27839735,0.5850074 ,0.24070603,0.00957893,0.3669335 ,0.03722228,0.8705231 
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt
new file mode 100644
index 000000000..caadfed22
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt
@@ -0,0 +1 @@
+0.76871806,0.65729177,0.946514  ,0.4308198 ,0.65200335,0.5745432 ,0.2990488 ,0.3156028 ,0.3218111 ,0.44709972,0.9411461 ,0.4828708 ,0.5707792 ,0.10645963,0.74497086,0.3563156 ,0.07986172,0.64869064,0.73329425,0.8848129 ,0.3027897 ,0.8753744 ,0.8884493 ,0.3606782 ,0.88617206,0.20232914,0.10251648,0.6366529 ,0.20422891,0.24426484,0.6952833 ,0.21889713,0.11477511,0.40650114,0.9637219 ,0.9751801 
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt
new file mode 100644
index 000000000..bc4a49454
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt
@@ -0,0 +1 @@
+0.5773043 ,0.6733178 ,0.22994593,0.32895002,0.74122405,0.6671442 ,0.1899878 ,0.35264668,0.31084946,0.3864719 ,0.7035006 ,0.46563607,0.44263086,0.2414678 ,0.7430625 ,0.72898006,0.9982008 ,0.8989132 ,0.45622516,0.17876478,0.9356994 ,0.85493064,0.73729265,0.9804242 ,0.8735895 ,0.14825071,0.33990774,0.76397645,0.14657325,0.2492199 ,0.43957144,0.20367876,0.43692476,0.28123745,0.24346785,0.21133597
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt
new file mode 100644
index 000000000..18f8666a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt
@@ -0,0 +1 @@
+0.74837255,0.7530814 ,0.05257462,0.06676125,0.26824346,0.05064487,0.23974492,0.5355457 ,0.97374374,0.38518724,0.3781766 ,0.7047476 ,0.95856845,0.09918232,0.36570287,0.5659468 ,0.8793284 ,0.7967468 ,0.99486005,0.11670698,0.42955273,0.25254622,0.06959745,0.5107888 ,0.88106513,0.3649466 ,0.7039582 ,0.8535825 ,0.3979168 ,0.9560912 ,0.17733434,0.69954944,0.35459924,0.28516313,0.75249106,0.7197228 
diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt
new file mode 100644
index 000000000..b51c5ebd0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt
@@ -0,0 +1 @@
+0.73320377,0.33635676,0.05811058,0.7032399 ,0.26380542,0.99637365,0.36622   ,0.47471517,0.5940316 ,0.39782768,0.46486765,0.5167471 ,0.61612487,0.93076104,0.8955697 ,0.5320168 ,0.41166067,0.29174343,0.07476811,0.60023075,0.0961028 ,0.77073896,0.17360727,0.48763612,0.31430086,0.37943754,0.7456216 ,0.16767363,0.9368368 ,0.09397154,0.68992966,0.5829225 ,0.7521187 ,0.06086114,0.13137193,0.22886442
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/0.txt
new file mode 100644
index 000000000..1a4fc3ed0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/0.txt
@@ -0,0 +1 @@
+ 2.2145607 , 0.88045335, 0.45151594, 2.852104  , 3.191637  ,-0.4578638 , 1.4858874 ,-2.1207588 ,-0.77495986,-4.1637363 , 0.83028954,-3.9974387 ,-3.3348315 , 3.7137656 ,-2.9883633 , 3.4332464 , 3.7178712 , 3.5850213 , 0.9240786 ,-0.07091421,-4.516931  , 3.965739  ,-4.828566  , 3.860382  , 0.3243482 , 1.6835089 ,-1.4710085 ,-2.6625636 , 1.942659  , 0.12808529, 1.3640044 ,-3.0124736 ,-3.646485  , 1.6046281 , 1.1087954 ,-2.4648561 ,-2.3274968 , 1.2196178 , 3.0752547 , 1.8316921 ,-2.926682  ,-2.247648  , 4.1264873 , 4.700915  ,-0.6861696 , 3.5246365 ,-2.5577545 , 1.832533  ,-4.3125343 ,-2.8579648 , 3.5299218 ,-0.67911506, 0.86782926,-2.918562  ,-3.3644724 ,-2.0097935 , 0.3721956 ,-1.3528451 , 3.8267515 , 4.916677  , 3.2055025 ,-0.64435905, 3.877367  ,-1.830818  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/1.txt
new file mode 100644
index 000000000..09c06c74c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/1.txt
@@ -0,0 +1 @@
+ 4.5410523 , 4.4007382 , 3.3252192 , 0.40420002,-4.7642856 , 2.0282986 , 2.32176   , 3.160375  ,-4.3348713 ,-2.324847  , 4.327631  , 3.253995  , 0.53624976,-4.4896946 , 4.0600896 , 2.697662  ,-3.0693228 ,-4.7954664 , 2.010163  , 4.5790668 , 0.00921074,-4.638007  ,-2.612561  , 4.338762  ,-1.3632652 ,-0.55081725, 4.273717  , 3.1074166 , 3.1386747 ,-4.033469  ,-0.7298752 ,-3.4973295 , 4.454913  ,-0.5148646 ,-2.4100194 , 2.7154703 , 4.1507893 , 2.3424785 ,-1.7028755 ,-2.6013496 ,-1.831555  ,-4.07971   ,-1.039077  ,-1.8733021 ,-3.885844  , 3.5691998 ,-3.8779395 ,-4.7566814 ,-3.570575  ,-3.0510366 ,-4.6841617 ,-4.751285  ,-2.9700782 , 3.4774506 ,-1.3150035 ,-3.6287053 , 2.2280993 , 4.502896  , 3.9448938 , 3.3926914 , 1.560589  , 3.3307595 , 2.6545596 , 2.0503757 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/2.txt
new file mode 100644
index 000000000..24b7a248f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/2.txt
@@ -0,0 +1 @@
+ 4.5630627e+00,-4.5077333e+00, 6.8117022e-03,-1.1568142e-02, 2.3568916e+00,-2.9918964e+00,-4.8542055e-01, 4.7381549e+00, 3.1183126e+00,-2.6462586e+00, 3.0083582e+00, 1.4518642e-01,-2.4764729e+00,-4.8520207e+00,-4.8022575e+00,-1.8167463e-01,-3.1106927e+00,-2.4183941e+00,-4.1466684e+00,-3.6997426e+00,-3.9788694e+00,-3.0889416e+00,-2.2332447e+00, 1.8608164e+00, 2.8619974e+00,-3.6986623e+00,-1.3749057e+00,-9.2409855e-01, 2.7646086e+00,-3.3385031e+00, 7.6255083e-01, 1.0236104e+00,-1.7077237e+00,-4.4339476e+00,-1.1930060e+00,-1.7226344e+00,-3.1680160e+00,-1.8338548e+00,-2.6412952e+00,-8.2973856e-01, 4.2303777e+00, 3.4531716e-03,-3.3162324e+00, 8.4682000e-01, 2.5807633e+00, 2.7543969e+00, 6.8153429e-01, 4.7182851e+00, 4.2617507e+00,-1.4446728e+00,-4.3752551e+00, 3.5699592e+00, 9.6946698e-01,-2.0700858e+00, 2.0899124e+00, 1.6371955e+00,-9.5873147e-01, 3.1151581e+00, 2.9369416e+00, 4.4568644e+00,-9.4711387e-01,-4.1349549e+00, 3.3031983e+00, 4.1091359e-01
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/3.txt
new file mode 100644
index 000000000..088eb62cd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/3.txt
@@ -0,0 +1 @@
+ 2.5168443 , 3.7492614 ,-3.7076504 , 0.49709523,-4.642194  , 1.8201847 ,-1.396746  ,-1.0660223 , 3.3333528 ,-1.7719259 ,-2.3515563 ,-2.0570705 ,-4.7125244 ,-1.593302  ,-2.1072757 ,-4.4396334 , 4.3185077 ,-2.7568438 ,-0.59535027,-3.9871383 ,-2.6216223 , 0.39957425,-1.3687986 ,-3.1157744 , 1.2557942 , 2.3428473 ,-4.906711  , 3.5663006 ,-0.46128616,-4.7818427 ,-0.8876555 , 2.5066485 ,-1.3254607 ,-3.6097736 , 1.2686944 ,-1.37061   , 4.762917  ,-3.489012  ,-2.7905307 ,-0.2612837 ,-3.3236315 , 0.8347171 , 2.5582032 , 0.42744452, 1.7428764 , 2.4122005 ,-3.6781132 , 2.8811646 ,-2.7060914 ,-0.4752588 , 0.44432116, 0.5011615 , 3.2550313 , 0.02670379, 2.6197197 ,-4.319786  ,-1.4056181 ,-3.3794782 , 0.66822946,-1.4262298 ,-0.2465175 ,-4.6432767 ,-3.580772  , 2.960096  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/4.txt
new file mode 100644
index 000000000..bb8129473
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/channel/int16/4.txt
@@ -0,0 +1 @@
+-4.9356976 , 3.9426446 ,-4.746647  , 2.3674695 , 0.54803735, 3.1911538 , 0.28858757, 0.4800329 , 2.0652595 ,-4.5046906 , 0.21695825,-0.17217463, 2.4329293 ,-1.2274694 ,-0.11534467,-2.096684  , 2.6882868 ,-2.5291932 , 0.56199783,-2.0743406 , 0.95846254, 4.004705  , 0.89853394, 2.9610496 , 2.9799032 , 1.5339601 ,-1.7136513 , 2.1797504 ,-4.2055335 , 1.5059681 , 3.0828342 ,-1.7946475 ,-2.7096524 , 3.1037905 , 0.75922704,-1.1446673 ,-2.084073  ,-1.2888353 ,-1.6958839 ,-0.8388285 ,-1.0279479 , 1.1291095 , 4.080411  , 3.6791847 , 0.9237894 ,-4.70821   , 0.5730598 ,-1.3565379 ,-2.7533107 ,-0.4583869 ,-1.4416862 ,-3.6039822 ,-1.1611387 ,-2.6919081 ,-0.6557734 ,-2.9248757 , 1.4998456 , 3.2239568 , 0.23668556,-3.4410136 ,-2.3170567 , 3.66808   , 1.9004405 , 4.3537745 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/0.txt
new file mode 100644
index 000000000..31a2db03e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.1984134 , 3.7565446 , 1.3521377 ,-4.0263743 ,-1.929471  ,-3.7523155 , 1.3858393 , 4.1565247 ,-2.4681342 , 0.3598748 ,-2.0044599 , 3.7168603 , 3.6330557 , 3.0176272 ,-4.4643235 ,-0.1893698 , 3.8839848 ,-4.5703125 , 3.365731  , 4.5556674 , 4.954971  , 1.7591819 ,-0.9497736 ,-0.8527185 ,-1.1863561 ,-4.522639  ,-4.3187394 ,-3.702939  , 0.15341021, 0.8564923 , 1.9076811 , 4.2765    ,-3.7695112 ,-1.6033245 , 2.3159432 ,-1.6656336 , 1.4186145 , 4.334284  , 4.0654674 ,-4.518256  , 0.72815216, 2.5133176 ,-4.238172  , 1.0198449 ,-0.9638457 , 2.5847483 , 4.0381308 , 4.472872  , 0.11794223, 1.3358012 , 1.7975981 , 2.168553  ,-3.5131238 , 3.8412008 , 3.851232  ,-2.130775  , 3.556102  , 0.69062364,-4.668594  ,-4.619906  ,-2.87768   ,-1.0679495 ,-4.523185  , 4.184176  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/1.txt
new file mode 100644
index 000000000..2bdd62b24
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 2.9193265 , 4.315574  ,-3.7834768 , 3.4352486 , 4.1452866 ,-4.0322523 , 1.8039155 ,-4.080042  ,-1.1999705 , 4.9018297 ,-0.27180746, 1.709373  , 4.3322196 , 4.9179945 ,-3.977508  , 2.3486571 ,-0.11026379,-0.24730131, 2.3269305 , 2.1862001 , 0.92486495, 3.5822759 , 2.8370361 , 3.915398  ,-0.6385275 ,-0.02720119,-1.408676  ,-4.4472733 , 1.2901759 ,-4.60209   ,-2.9502335 ,-2.650517  ,-1.4038593 ,-2.967456  ,-2.0060933 ,-1.9603083 ,-0.4727794 ,-1.7877682 ,-3.9565926 , 1.4452418 , 2.5925353 ,-4.5134907 ,-4.195412  , 2.4681656 , 0.7140492 , 3.0753498 , 0.269442  ,-4.768041  ,-3.5370746 , 1.0272335 ,-0.7654047 ,-1.977087  , 3.1920779 , 0.37378865, 4.016262  ,-3.3201067 ,-4.7767315 ,-3.5074112 ,-4.094166  , 1.6035818 , 1.6506963 ,-3.2142932 , 4.7714067 ,-1.7164946 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/2.txt
new file mode 100644
index 000000000..8c770f61d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/2.txt
@@ -0,0 +1 @@
+-1.8028042 , 1.7280815 ,-3.0464594 ,-2.810487  , 0.582805  ,-1.786865  ,-1.7263526 ,-0.36871073, 3.3955328 ,-3.9523299 ,-1.880003  , 4.9068613 , 4.6292953 , 3.9778202 ,-1.859954  , 2.8149757 , 4.5020967 ,-4.160163  , 1.9295161 ,-1.2508658 , 0.5669804 , 0.99246883,-2.4829247 , 0.88920474,-3.7942843 , 2.4626305 , 4.3087935 , 3.0680852 , 3.0893688 , 3.1640174 ,-0.41890725, 0.5377459 ,-4.0344224 ,-4.5812287 , 0.5720303 , 1.802316  ,-0.31413126, 2.9586952 , 1.1723012 ,-4.696369  ,-3.7047153 ,-1.8109767 ,-3.6122723 , 1.2727392 , 4.4057164 , 3.8347735 ,-4.739083  , 2.4655118 , 0.45258832, 4.0693913 ,-3.3486447 ,-0.64714307, 1.4990507 , 2.771129  ,-0.6109979 ,-1.0617865 , 2.0837703 ,-1.633663  , 1.8431798 ,-4.3942385 , 4.8523426 , 1.1941985 , 3.0366988 , 4.7991366 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/3.txt
new file mode 100644
index 000000000..8a4c9ebb5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.2375767 ,-1.1274278 , 0.18025301,-4.598087  , 1.1042122 , 3.1241179 , 1.9084688 ,-1.214722  , 4.596646  , 4.1969523 , 4.658112  , 3.143779  ,-2.6940444 ,-1.5482163 , 1.542811  ,-1.1338089 , 3.721594  , 0.24673286, 4.71102   , 2.7811737 , 1.171089  , 4.145586  ,-2.6335135 , 1.1190183 ,-3.7932637 ,-4.6548123 ,-3.10302   ,-3.392706  ,-3.856141  , 0.6618614 , 0.9668614 , 4.4293485 , 1.3193    , 4.983464  , 1.659716  ,-3.185926  , 4.8983006 , 1.6323217 , 0.18800464,-1.9328839 , 4.6031475 , 3.459718  , 4.128766  ,-3.4701612 ,-2.3796144 , 1.6752707 ,-3.6569223 , 2.922704  , 3.642789  ,-1.6817225 , 3.151759  ,-1.5401909 ,-3.8259532 , 2.4556105 ,-4.4989905 , 1.2779988 ,-0.62634754, 3.5827441 ,-0.82541114, 2.1539748 , 4.583461  , 1.2231985 ,-1.4457659 ,-2.9194565 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/4.txt
new file mode 100644
index 000000000..5110f86aa
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000/layer/uint8/4.txt
@@ -0,0 +1 @@
+-4.011289  , 0.9077414 ,-2.8109396 ,-4.33598   ,-2.6516347 ,-3.917852  , 3.2461808 , 1.7588768 ,-1.9439132 , 2.190185  , 1.5180751 , 0.3587409 ,-4.3434815 ,-4.1376143 , 3.750847  , 1.5820616 , 0.03843357, 4.71235   , 1.0592757 ,-1.7640393 , 0.44547582, 2.8698466 , 4.5816092 , 4.6638517 , 1.4207541 , 1.863644  , 3.6007912 , 0.6800818 ,-2.4884489 , 3.0707197 , 3.3961668 ,-4.331953  , 2.7828538 ,-0.16146964,-4.9070745 ,-2.9787786 , 0.3337284 ,-3.935533  ,-3.303555  , 2.376896  ,-4.7058997 ,-2.2409894 , 0.07352693,-2.6024988 , 4.9593167 ,-4.7717366 , 1.6590588 , 4.063875  ,-3.8855767 , 2.6274624 , 4.901856  , 4.157007  ,-3.292969  , 3.579326  , 3.9860668 ,-3.0936542 ,-4.7793274 , 0.71697485,-2.0354068 ,-2.1414943 , 3.6339438 , 0.10732502,-0.86129206, 4.4152017 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..31a2db03e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.1984134 , 3.7565446 , 1.3521377 ,-4.0263743 ,-1.929471  ,-3.7523155 , 1.3858393 , 4.1565247 ,-2.4681342 , 0.3598748 ,-2.0044599 , 3.7168603 , 3.6330557 , 3.0176272 ,-4.4643235 ,-0.1893698 , 3.8839848 ,-4.5703125 , 3.365731  , 4.5556674 , 4.954971  , 1.7591819 ,-0.9497736 ,-0.8527185 ,-1.1863561 ,-4.522639  ,-4.3187394 ,-3.702939  , 0.15341021, 0.8564923 , 1.9076811 , 4.2765    ,-3.7695112 ,-1.6033245 , 2.3159432 ,-1.6656336 , 1.4186145 , 4.334284  , 4.0654674 ,-4.518256  , 0.72815216, 2.5133176 ,-4.238172  , 1.0198449 ,-0.9638457 , 2.5847483 , 4.0381308 , 4.472872  , 0.11794223, 1.3358012 , 1.7975981 , 2.168553  ,-3.5131238 , 3.8412008 , 3.851232  ,-2.130775  , 3.556102  , 0.69062364,-4.668594  ,-4.619906  ,-2.87768   ,-1.0679495 ,-4.523185  , 4.184176  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..2bdd62b24
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 2.9193265 , 4.315574  ,-3.7834768 , 3.4352486 , 4.1452866 ,-4.0322523 , 1.8039155 ,-4.080042  ,-1.1999705 , 4.9018297 ,-0.27180746, 1.709373  , 4.3322196 , 4.9179945 ,-3.977508  , 2.3486571 ,-0.11026379,-0.24730131, 2.3269305 , 2.1862001 , 0.92486495, 3.5822759 , 2.8370361 , 3.915398  ,-0.6385275 ,-0.02720119,-1.408676  ,-4.4472733 , 1.2901759 ,-4.60209   ,-2.9502335 ,-2.650517  ,-1.4038593 ,-2.967456  ,-2.0060933 ,-1.9603083 ,-0.4727794 ,-1.7877682 ,-3.9565926 , 1.4452418 , 2.5925353 ,-4.5134907 ,-4.195412  , 2.4681656 , 0.7140492 , 3.0753498 , 0.269442  ,-4.768041  ,-3.5370746 , 1.0272335 ,-0.7654047 ,-1.977087  , 3.1920779 , 0.37378865, 4.016262  ,-3.3201067 ,-4.7767315 ,-3.5074112 ,-4.094166  , 1.6035818 , 1.6506963 ,-3.2142932 , 4.7714067 ,-1.7164946 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..8c770f61d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-1.8028042 , 1.7280815 ,-3.0464594 ,-2.810487  , 0.582805  ,-1.786865  ,-1.7263526 ,-0.36871073, 3.3955328 ,-3.9523299 ,-1.880003  , 4.9068613 , 4.6292953 , 3.9778202 ,-1.859954  , 2.8149757 , 4.5020967 ,-4.160163  , 1.9295161 ,-1.2508658 , 0.5669804 , 0.99246883,-2.4829247 , 0.88920474,-3.7942843 , 2.4626305 , 4.3087935 , 3.0680852 , 3.0893688 , 3.1640174 ,-0.41890725, 0.5377459 ,-4.0344224 ,-4.5812287 , 0.5720303 , 1.802316  ,-0.31413126, 2.9586952 , 1.1723012 ,-4.696369  ,-3.7047153 ,-1.8109767 ,-3.6122723 , 1.2727392 , 4.4057164 , 3.8347735 ,-4.739083  , 2.4655118 , 0.45258832, 4.0693913 ,-3.3486447 ,-0.64714307, 1.4990507 , 2.771129  ,-0.6109979 ,-1.0617865 , 2.0837703 ,-1.633663  , 1.8431798 ,-4.3942385 , 4.8523426 , 1.1941985 , 3.0366988 , 4.7991366 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..8a4c9ebb5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.2375767 ,-1.1274278 , 0.18025301,-4.598087  , 1.1042122 , 3.1241179 , 1.9084688 ,-1.214722  , 4.596646  , 4.1969523 , 4.658112  , 3.143779  ,-2.6940444 ,-1.5482163 , 1.542811  ,-1.1338089 , 3.721594  , 0.24673286, 4.71102   , 2.7811737 , 1.171089  , 4.145586  ,-2.6335135 , 1.1190183 ,-3.7932637 ,-4.6548123 ,-3.10302   ,-3.392706  ,-3.856141  , 0.6618614 , 0.9668614 , 4.4293485 , 1.3193    , 4.983464  , 1.659716  ,-3.185926  , 4.8983006 , 1.6323217 , 0.18800464,-1.9328839 , 4.6031475 , 3.459718  , 4.128766  ,-3.4701612 ,-2.3796144 , 1.6752707 ,-3.6569223 , 2.922704  , 3.642789  ,-1.6817225 , 3.151759  ,-1.5401909 ,-3.8259532 , 2.4556105 ,-4.4989905 , 1.2779988 ,-0.62634754, 3.5827441 ,-0.82541114, 2.1539748 , 4.583461  , 1.2231985 ,-1.4457659 ,-2.9194565 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..5110f86aa
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-4.011289  , 0.9077414 ,-2.8109396 ,-4.33598   ,-2.6516347 ,-3.917852  , 3.2461808 , 1.7588768 ,-1.9439132 , 2.190185  , 1.5180751 , 0.3587409 ,-4.3434815 ,-4.1376143 , 3.750847  , 1.5820616 , 0.03843357, 4.71235   , 1.0592757 ,-1.7640393 , 0.44547582, 2.8698466 , 4.5816092 , 4.6638517 , 1.4207541 , 1.863644  , 3.6007912 , 0.6800818 ,-2.4884489 , 3.0707197 , 3.3961668 ,-4.331953  , 2.7828538 ,-0.16146964,-4.9070745 ,-2.9787786 , 0.3337284 ,-3.935533  ,-3.303555  , 2.376896  ,-4.7058997 ,-2.2409894 , 0.07352693,-2.6024988 , 4.9593167 ,-4.7717366 , 1.6590588 , 4.063875  ,-3.8855767 , 2.6274624 , 4.901856  , 4.157007  ,-3.292969  , 3.579326  , 3.9860668 ,-3.0936542 ,-4.7793274 , 0.71697485,-2.0354068 ,-2.1414943 , 3.6339438 , 0.10732502,-0.86129206, 4.4152017 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt
new file mode 100644
index 000000000..1a4fc3ed0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 2.2145607 , 0.88045335, 0.45151594, 2.852104  , 3.191637  ,-0.4578638 , 1.4858874 ,-2.1207588 ,-0.77495986,-4.1637363 , 0.83028954,-3.9974387 ,-3.3348315 , 3.7137656 ,-2.9883633 , 3.4332464 , 3.7178712 , 3.5850213 , 0.9240786 ,-0.07091421,-4.516931  , 3.965739  ,-4.828566  , 3.860382  , 0.3243482 , 1.6835089 ,-1.4710085 ,-2.6625636 , 1.942659  , 0.12808529, 1.3640044 ,-3.0124736 ,-3.646485  , 1.6046281 , 1.1087954 ,-2.4648561 ,-2.3274968 , 1.2196178 , 3.0752547 , 1.8316921 ,-2.926682  ,-2.247648  , 4.1264873 , 4.700915  ,-0.6861696 , 3.5246365 ,-2.5577545 , 1.832533  ,-4.3125343 ,-2.8579648 , 3.5299218 ,-0.67911506, 0.86782926,-2.918562  ,-3.3644724 ,-2.0097935 , 0.3721956 ,-1.3528451 , 3.8267515 , 4.916677  , 3.2055025 ,-0.64435905, 3.877367  ,-1.830818  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt
new file mode 100644
index 000000000..09c06c74c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 4.5410523 , 4.4007382 , 3.3252192 , 0.40420002,-4.7642856 , 2.0282986 , 2.32176   , 3.160375  ,-4.3348713 ,-2.324847  , 4.327631  , 3.253995  , 0.53624976,-4.4896946 , 4.0600896 , 2.697662  ,-3.0693228 ,-4.7954664 , 2.010163  , 4.5790668 , 0.00921074,-4.638007  ,-2.612561  , 4.338762  ,-1.3632652 ,-0.55081725, 4.273717  , 3.1074166 , 3.1386747 ,-4.033469  ,-0.7298752 ,-3.4973295 , 4.454913  ,-0.5148646 ,-2.4100194 , 2.7154703 , 4.1507893 , 2.3424785 ,-1.7028755 ,-2.6013496 ,-1.831555  ,-4.07971   ,-1.039077  ,-1.8733021 ,-3.885844  , 3.5691998 ,-3.8779395 ,-4.7566814 ,-3.570575  ,-3.0510366 ,-4.6841617 ,-4.751285  ,-2.9700782 , 3.4774506 ,-1.3150035 ,-3.6287053 , 2.2280993 , 4.502896  , 3.9448938 , 3.3926914 , 1.560589  , 3.3307595 , 2.6545596 , 2.0503757 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt
new file mode 100644
index 000000000..24b7a248f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 4.5630627e+00,-4.5077333e+00, 6.8117022e-03,-1.1568142e-02, 2.3568916e+00,-2.9918964e+00,-4.8542055e-01, 4.7381549e+00, 3.1183126e+00,-2.6462586e+00, 3.0083582e+00, 1.4518642e-01,-2.4764729e+00,-4.8520207e+00,-4.8022575e+00,-1.8167463e-01,-3.1106927e+00,-2.4183941e+00,-4.1466684e+00,-3.6997426e+00,-3.9788694e+00,-3.0889416e+00,-2.2332447e+00, 1.8608164e+00, 2.8619974e+00,-3.6986623e+00,-1.3749057e+00,-9.2409855e-01, 2.7646086e+00,-3.3385031e+00, 7.6255083e-01, 1.0236104e+00,-1.7077237e+00,-4.4339476e+00,-1.1930060e+00,-1.7226344e+00,-3.1680160e+00,-1.8338548e+00,-2.6412952e+00,-8.2973856e-01, 4.2303777e+00, 3.4531716e-03,-3.3162324e+00, 8.4682000e-01, 2.5807633e+00, 2.7543969e+00, 6.8153429e-01, 4.7182851e+00, 4.2617507e+00,-1.4446728e+00,-4.3752551e+00, 3.5699592e+00, 9.6946698e-01,-2.0700858e+00, 2.0899124e+00, 1.6371955e+00,-9.5873147e-01, 3.1151581e+00, 2.9369416e+00, 4.4568644e+00,-9.4711387e-01,-4.1349549e+00, 3.3031983e+00, 4.1091359e-01
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt
new file mode 100644
index 000000000..088eb62cd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 2.5168443 , 3.7492614 ,-3.7076504 , 0.49709523,-4.642194  , 1.8201847 ,-1.396746  ,-1.0660223 , 3.3333528 ,-1.7719259 ,-2.3515563 ,-2.0570705 ,-4.7125244 ,-1.593302  ,-2.1072757 ,-4.4396334 , 4.3185077 ,-2.7568438 ,-0.59535027,-3.9871383 ,-2.6216223 , 0.39957425,-1.3687986 ,-3.1157744 , 1.2557942 , 2.3428473 ,-4.906711  , 3.5663006 ,-0.46128616,-4.7818427 ,-0.8876555 , 2.5066485 ,-1.3254607 ,-3.6097736 , 1.2686944 ,-1.37061   , 4.762917  ,-3.489012  ,-2.7905307 ,-0.2612837 ,-3.3236315 , 0.8347171 , 2.5582032 , 0.42744452, 1.7428764 , 2.4122005 ,-3.6781132 , 2.8811646 ,-2.7060914 ,-0.4752588 , 0.44432116, 0.5011615 , 3.2550313 , 0.02670379, 2.6197197 ,-4.319786  ,-1.4056181 ,-3.3794782 , 0.66822946,-1.4262298 ,-0.2465175 ,-4.6432767 ,-3.580772  , 2.960096  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt
new file mode 100644
index 000000000..bb8129473
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-4.9356976 , 3.9426446 ,-4.746647  , 2.3674695 , 0.54803735, 3.1911538 , 0.28858757, 0.4800329 , 2.0652595 ,-4.5046906 , 0.21695825,-0.17217463, 2.4329293 ,-1.2274694 ,-0.11534467,-2.096684  , 2.6882868 ,-2.5291932 , 0.56199783,-2.0743406 , 0.95846254, 4.004705  , 0.89853394, 2.9610496 , 2.9799032 , 1.5339601 ,-1.7136513 , 2.1797504 ,-4.2055335 , 1.5059681 , 3.0828342 ,-1.7946475 ,-2.7096524 , 3.1037905 , 0.75922704,-1.1446673 ,-2.084073  ,-1.2888353 ,-1.6958839 ,-0.8388285 ,-1.0279479 , 1.1291095 , 4.080411  , 3.6791847 , 0.9237894 ,-4.70821   , 0.5730598 ,-1.3565379 ,-2.7533107 ,-0.4583869 ,-1.4416862 ,-3.6039822 ,-1.1611387 ,-2.6919081 ,-0.6557734 ,-2.9248757 , 1.4998456 , 3.2239568 , 0.23668556,-3.4410136 ,-2.3170567 , 3.66808   , 1.9004405 , 4.3537745 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/0.txt
new file mode 100644
index 000000000..e0e52c398
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/0.txt
@@ -0,0 +1 @@
+ 4.5734663 , 3.96675   ,-2.7826853 , 4.377681  , 1.8424977 ,-2.8312624 , 0.65628445,-3.7023883 ,-1.8941027 , 0.53154576,-3.9718776 ,-3.3961854 ,-2.7500536 , 2.6793208 , 3.3515985 , 2.0939343 ,-4.3965416 ,-1.7462187 , 0.5660886 , 4.497879  ,-2.2529721 ,-4.8996797 ,-0.00740948,-2.941367  , 1.9482567 ,-2.462802  ,-0.7897884 , 3.1501546 , 3.1216884 ,-3.506249  , 2.871302  ,-3.964653  ,-0.40679944, 2.8930066 ,-4.783338  ,-1.8733944 , 2.2654383 ,-0.41361305,-3.7790897 ,-1.9458629 ,-2.274427  ,-2.9192872 ,-0.73215395, 2.8135974 , 2.1402152 , 4.516366  , 1.58816   ,-4.607831  ,-3.5409598 , 1.9784997 , 3.11111   , 1.0872442 ,-3.6907403 ,-4.774325  ,-4.9267297 , 1.2962086 , 2.4646177 , 2.2726526 , 4.8766675 ,-2.9272413 ,-0.06221364,-0.80498594,-2.319938  ,-3.8261194 ,-2.3452706 , 2.5408983 ,-0.80628425,-1.4547366 ,-4.4171157 , 3.1584027 , 4.2213454 , 3.0342784 , 2.0285478 , 3.4517126 , 1.870827  , 2.812075  , 1.0776864 ,-4.524331  , 3.1467574 ,-2.366355  ,-4.7368546 , 1.940347  , 4.282059  , 1.2666475 ,-4.9559174 , 2.8177614 , 1.1941892 ,-0.25412267,-2.833778  , 1.1770393 , 4.9503546 , 4.582686  ,-1.0778978 ,-2.9030416 , 3.2517505 , 1.556093  ,-3.7605543 , 0.5915735 ,-2.6323159 , 4.596147  ,-0.90292877, 2.8230112 , 4.9295835 , 3.523853  , 1.7742149 ,-2.6014073 , 2.162894  , 1.9364033 , 4.0920115 , 0.81613404, 2.4198878 ,-0.907447  ,-4.79113   ,-3.4193892 ,-0.3334577 ,-1.0439668 , 4.2233415 , 1.4482704 , 1.3646252 ,-0.9206041 , 4.4994802 ,-4.2411633 , 0.6763335 ,-1.3827848 , 1.8579848 , 1.6426222 , 0.904467  , 3.876264  ,-4.6476808 , 4.576801  ,-1.4680524 , 2.441134  , 3.2343059 , 0.23119794, 2.5640545 ,-0.7293438 , 3.7184558 ,-1.6056752 , 3.1490617 , 4.6837263 , 4.7100887 ,-2.785927  ,-0.1520597 ,-1.9914767 ,-4.00598   ,-2.7502792 , 3.7857378 , 2.8444788 , 4.9911737 , 0.29277426,-4.779576  , 3.223367  , 1.3517398 , 4.8757277 , 3.8083189 , 1.7660266 ,-2.1543872 , 4.822371  , 2.089687  ,-4.7373757 ,-2.4061642 , 2.0387447 ,-4.067881  ,-3.1757388 , 0.24974413,-0.24441184,-0.1168329 ,-0.35149318, 2.0035832 ,-4.248678  ,-1.4723817 , 3.8218668 ,-2.8085105 , 4.6995482 ,-3.0093114 ,-3.648268  ,-1.0374364 , 0.04459473, 2.3945484 ,-0.63439727, 3.3920286 , 2.403765  , 1.303556  , 3.232244  ,-0.44932058, 0.9601637 ,-3.3821623 ,-4.257736  ,-4.095783  , 0.42818338,-4.925627  ,-1.8419602 , 4.9393196 , 0.8049334 , 4.431875  , 2.8487725 , 2.1205912 , 1.7367444 ,-4.337498  ,-3.574642  ,-3.8927085 ,-0.35219863, 2.8415039 ,-0.2887568 ,-0.89806557, 2.669602  , 4.8017626 , 4.278042  ,-1.2604581 , 3.152027  , 2.1625066 , 1.5039738 ,-3.7209976 ,-0.72354925, 4.006067  ,-3.7651584 , 0.7198826 , 3.9594896 , 0.6228397 , 2.8464649 ,-0.18740664,-2.0530953 , 3.5185826 , 2.5037062 , 0.3990585 ,-4.423475  , 4.6931167 ,-1.0078553 , 0.74727917,-4.289701  , 1.697721  , 3.4963684 , 1.5796075 , 2.296678  ,-2.9379995 , 4.4748416 , 0.25155628, 4.1183267 , 0.9506131 , 1.2903908 ,-4.6828184 ,-2.309908  ,-4.2793307 ,-2.2069294 ,-4.038367  , 4.641971  ,-2.3178709 ,-2.2683682 ,-0.96986157, 2.6649144 , 2.3106637 ,-1.8052462 ,-4.9433284 , 1.7941002 , 4.80127   ,-0.06690114
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/1.txt
new file mode 100644
index 000000000..9a8f222e7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/1.txt
@@ -0,0 +1 @@
+ 2.2282960e+00, 1.0135865e+00,-4.1930809e+00, 5.3674412e-01,-3.2516165e+00, 1.2745492e+00, 4.2867136e+00, 1.9524460e+00,-3.6757104e+00,-3.6086998e+00,-9.4525421e-01,-3.4005399e+00, 3.3607626e+00, 4.2363039e-01,-2.5177178e+00,-3.0130227e+00,-4.1442380e+00, 4.4951862e-01,-6.4387190e-01, 4.3701029e+00,-3.6790867e+00, 3.2749624e+00,-2.2554400e+00, 1.8269253e+00, 1.8358005e+00,-6.0994375e-01, 3.5964453e+00, 4.8953295e+00,-2.6134133e+00,-3.9301482e-01, 4.0286818e+00,-8.9392501e-01, 2.6430035e+00,-1.0339550e+00,-4.2311502e+00, 5.1657695e-01,-3.0095081e+00,-3.2156844e+00, 3.0075660e+00,-2.4905038e+00, 2.2380588e+00, 4.6933036e+00,-2.7880669e+00,-3.3672907e+00, 2.5187421e+00, 2.1843061e+00,-3.9957666e+00,-4.5409918e+00,-1.7282218e+00,-4.6849327e+00, 3.1863580e+00, 2.4342964e+00,-4.5180349e+00,-2.4310455e+00,-2.6789901e+00,-1.6438740e+00, 4.9613748e+00,-3.7800386e+00,-4.4277740e+00, 1.0571244e+00,-3.3765689e-02,-6.2219787e-01, 2.1075857e+00,-2.0555353e+00, 2.6996508e+00,-3.0303302e+00,-3.8262250e+00,-4.5048919e-01, 2.6760142e+00, 3.2696848e+00, 2.8136756e+00,-2.7064829e+00, 8.5861349e-01,-1.8871003e+00,-9.5355767e-01, 2.3704410e+00, 4.8897211e-02,-4.6371531e+00, 1.5693765e+00, 3.7866819e+00,-2.9738419e+00, 1.2106347e+00,-5.8760280e-03,-6.4124316e-01, 4.2396611e-01, 4.8550687e+00,-3.0650468e+00,-1.2087260e+00,-2.4833875e+00, 2.1272743e+00,-1.8991195e-01,-3.5372739e+00,-2.3402226e+00,-1.0234243e+00, 2.8981063e+00, 8.7964945e-02, 3.2136328e+00,-3.4051507e+00,-4.5538807e+00,-4.0228786e+00,-1.8993270e-01,-4.5704255e+00, 1.8850164e+00, 9.9910229e-01,-4.8424377e+00,-3.1492932e+00, 2.3922281e+00, 4.8503261e+00,-2.1037047e+00, 3.3602579e+00, 1.3546667e+00, 1.3481154e+00,-2.3604252e+00,-1.3253393e+00,-3.5330158e-01,-2.1313765e+00, 3.1442962e+00,-1.1570807e+00,-4.5890884e+00,-4.1608801e+00, 1.8554245e+00, 2.4646142e+00,-1.8453486e+00, 3.3489871e+00,-1.1248070e+00, 3.1451607e+00,-1.4458319e+00,-2.2727523e+00,-2.0378258e+00, 2.4566815e+00, 3.8839689e-01, 4.2570353e+00, 2.3613093e+00, 1.2956337e+00,-7.5734973e-01,-1.4549307e+00, 9.3240172e-01, 4.3444591e+00,-6.4935732e-01, 2.5328317e+00,-2.3545196e+00,-4.7553263e+00, 2.6134777e+00,-2.5526178e+00,-1.7996631e+00,-2.0215256e+00,-4.6141486e+00,-1.7283168e+00, 2.5297335e-01, 3.7009020e+00,-1.9858284e+00,-3.4631619e+00,-1.5858738e+00,-2.5620985e+00, 3.2822473e+00,-3.2632313e+00,-9.0714562e-01,-2.3562717e+00, 4.4088845e+00,-3.6630182e+00, 5.5761892e-01, 1.6045070e+00,-3.6806375e-01, 4.3184443e+00,-1.3219705e+00, 1.5496376e+00,-1.5801797e+00, 2.1545045e+00,-4.0106788e+00, 3.4172714e+00,-4.2495294e+00,-6.1115064e-03,-7.2607052e-01,-7.3130745e-01,-4.4462271e+00, 4.8119636e+00,-4.7460346e+00,-3.0464313e+00,-2.8801811e+00,-1.4347218e-03, 4.4133449e+00,-3.3173063e-01, 4.3802023e+00, 2.6040417e-01,-2.5531218e+00, 3.7436140e+00,-4.1636271e+00,-3.3907690e+00,-1.4418361e+00,-3.6933661e+00,-2.6342602e+00,-3.1492887e+00,-5.5590755e-01,-1.6814464e-01,-1.0868104e+00, 4.9451909e+00, 3.4104226e+00, 1.0342516e+00, 4.7993002e+00, 1.2480364e-01, 1.6109833e-01, 2.6366503e+00, 1.6535910e+00, 4.3810592e+00, 4.4755011e+00, 4.3265424e+00,-3.1934264e-01, 9.8549920e-01, 1.9962710e-01, 2.8525822e+00,-3.7352023e+00,-1.3402178e+00, 2.5931063e+00,-2.6708813e+00,-7.6831090e-01, 3.0769660e+00, 1.4107993e+00,-1.8936746e+00,-4.7568636e+00,-1.9222193e+00, 4.7693071e+00, 2.8644614e+00, 4.1877995e+00,-3.6974251e+00, 4.5314616e-01,-7.1986055e-01, 4.8653622e+00, 1.4722897e+00,-8.6220115e-01,-4.1846976e+00, 3.7767217e+00, 3.7630556e+00,-4.5851058e-01,-4.9183292e+00,-1.8750135e+00, 1.0773923e+00,-5.2709883e-01,-9.2767686e-01,-1.3984675e+00,-2.0892789e+00,-4.3801632e+00, 4.0080590e+00, 4.2269025e+00,-1.2195336e+00,-2.2649438e+00, 4.6874623e+00,-3.8354571e+00, 5.9588730e-01,-2.8315885e+00, 3.0605823e-01, 2.1416895e+00, 1.6045133e+00,-3.3075256e+00, 4.9898911e+00, 1.7708080e-02, 3.5305614e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/2.txt
new file mode 100644
index 000000000..1b2e33401
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/2.txt
@@ -0,0 +1 @@
+ 1.9229428 , 2.1045275 , 2.0514195 , 1.7149676 ,-4.1647053 , 4.3958654 , 2.1192055 ,-2.4357705 , 2.249189  , 4.7986865 ,-1.0146881 , 2.5108647 , 0.7262246 ,-2.3110187 ,-0.434008  , 2.6220334 , 1.3261455 ,-2.0402927 , 0.6362597 , 0.12827367, 0.94167644, 1.6396433 , 2.802215  , 0.92637545,-2.8669958 , 2.1684341 , 4.7197456 ,-3.0393784 ,-1.5588902 ,-1.5589788 ,-1.2792847 ,-4.301159  , 3.6853306 , 3.5522077 ,-3.5120559 , 3.6523628 , 0.52381915,-4.3210206 , 3.1021209 ,-4.4059095 , 4.574733  ,-3.708168  ,-3.4609973 , 0.04494883, 4.6041393 , 4.6209555 ,-2.184693  , 3.3114836 , 4.0440845 ,-4.362543  ,-3.0185041 ,-3.4911432 ,-1.0443465 ,-3.1546419 ,-3.0831194 ,-1.8959469 ,-3.7653599 ,-1.8753844 , 3.969308  , 4.0960746 , 0.256032  ,-0.11065102, 4.753394  , 4.8433857 , 0.17249103, 0.44612473, 3.5996687 ,-3.7071083 , 4.15448   , 2.7609568 , 0.7979912 , 2.6985793 , 0.24981445,-0.7343978 ,-3.8946455 ,-3.4738345 ,-2.0124238 , 4.6603985 , 0.9002829 ,-2.2128618 ,-0.8752893 ,-3.0990481 , 2.770291  ,-1.4642559 , 0.4561498 , 0.5808671 , 2.4227936 ,-2.400878  , 0.6494001 , 1.0195295 ,-3.2693145 , 1.9889433 , 3.5208216 , 3.6280289 , 4.322899  ,-2.805155  , 3.7704606 , 0.6797415 , 4.442675  ,-0.5069875 , 1.3373847 , 4.6953626 ,-0.7946793 ,-2.7352958 ,-1.9969261 , 0.43059692, 2.50853   , 1.9314603 , 1.3780333 , 2.0536468 ,-1.572231  ,-4.5323825 ,-1.3175989 ,-1.5515776 ,-0.05870355, 0.32408538,-4.2935586 ,-1.561555  ,-1.7551405 ,-0.93950266, 3.2540953 ,-4.623753  ,-3.4944966 ,-0.7603045 , 0.76591074,-4.9114766 ,-2.679303  , 0.12950227, 4.094419  , 4.781908  ,-3.6946337 , 2.766349  ,-0.45678583,-2.275264  , 2.0858452 , 3.1182098 ,-1.2942638 , 4.4418044 , 2.2264028 ,-3.3838644 , 1.4427853 , 3.7365992 ,-1.1815038 , 1.4555137 , 0.22728541,-0.18817298, 3.454521  , 3.1835914 , 4.0786743 ,-1.5111316 , 1.1560454 ,-0.04693017, 0.44183066,-0.7420173 ,-1.2243766 , 3.4453049 ,-2.969513  ,-0.82397145, 4.870895  , 3.0178127 , 1.7217305 , 4.482936  , 1.9468685 , 3.9970267 , 4.7294793 , 2.9921744 , 4.470473  , 4.7626653 , 0.13104612,-4.651569  , 2.7991815 ,-4.734433  ,-2.4499187 , 1.0739365 ,-1.5583646 , 3.6531756 , 2.7731194 ,-4.72427   ,-4.5801177 ,-4.035709  , 2.5767221 ,-2.8133557 ,-1.8342617 , 3.5808434 ,-2.1022995 ,-3.5421894 ,-3.0776916 , 3.168665  ,-0.07246887,-1.2413273 , 4.7964606 ,-1.0624843 , 0.75939703, 2.5336463 ,-4.8622346 ,-4.9744167 , 2.1007512 , 1.5271608 , 0.37077245, 1.7765028 , 2.2724373 , 2.1864665 ,-0.37378153, 1.3559381 ,-1.4220421 ,-1.4756224 , 3.6143627 , 2.7846546 ,-2.5194893 , 3.005039  ,-3.6451447 ,-1.9118739 , 0.04718782,-3.0775185 ,-1.4801219 ,-2.35909   ,-0.4728799 , 4.610093  ,-4.472677  ,-4.530808  , 0.12514372, 0.05973044, 4.457302  , 3.1129916 , 3.6036162 , 4.5086145 ,-3.548999  , 0.4976606 ,-3.6525648 ,-2.1937015 ,-1.3205789 ,-2.6594079 , 4.415343  , 3.219482  ,-3.7286756 , 3.4116418 , 0.82889384,-3.0168123 , 4.382766  , 2.7633846 , 3.6949344 , 3.9806223 ,-0.6415279 ,-0.3193684 ,-1.3176754 ,-1.4990829 , 4.694691  ,-1.0581211 , 1.2103747 ,-0.26690048,-1.157015  ,-1.8951306 ,-0.8580171 ,-4.3080263 , 4.0737123 ,-1.2607352 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/3.txt
new file mode 100644
index 000000000..50ed09011
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/3.txt
@@ -0,0 +1 @@
+ 4.9386005 , 3.7248888 , 3.3261378 , 4.8302746 ,-3.9337704 ,-4.2943096 , 0.16059242, 0.17785172,-2.4971933 ,-2.933359  ,-4.598231  , 4.7816315 ,-0.6563864 , 4.452592  , 1.8066075 , 3.1572745 , 4.500678  ,-1.1609873 ,-1.6962403 , 1.567031  ,-3.3120036 , 1.8150452 ,-2.7486987 ,-1.6800771 , 1.4895486 , 1.120401  , 1.4983965 , 4.7132416 , 0.39645562,-3.12486   ,-0.5966056 , 4.618641  , 1.225812  , 0.99017185, 3.9918585 , 1.299415  ,-1.2995726 , 4.202907  , 3.8657827 ,-4.0268126 ,-0.90370494, 0.5030568 ,-2.9651542 ,-4.1249614 ,-2.8990393 ,-4.1228724 ,-1.2640246 ,-0.72640723,-1.7128279 , 2.7710931 , 2.8189523 ,-0.8384207 , 0.71266395, 3.8393862 ,-1.7801509 ,-3.1485069 , 3.2076547 , 2.267659  ,-3.745656  ,-4.373508  , 0.86005193,-4.9145784 , 0.9253047 , 1.1243923 , 0.46507052, 1.9978004 ,-4.642887  ,-2.1898057 , 0.88199854,-2.1837327 , 1.1112527 ,-1.4548608 ,-3.5766103 ,-1.5607064 ,-3.630397  ,-1.9193211 ,-0.8931484 ,-0.2812017 ,-1.2881653 ,-2.5051243 ,-3.5648384 ,-0.5431733 ,-0.47036746,-2.8132265 ,-0.4302025 ,-4.003176  , 0.31743896,-3.074693  ,-3.3994603 , 0.62276137, 0.12920536,-2.5154057 ,-0.22098878,-2.711012  ,-0.303956  , 4.6025276 , 3.1887815 ,-0.50345755,-2.6543994 ,-0.8452558 ,-1.4075644 , 3.6716504 , 2.7388885 ,-4.9426928 , 3.5494354 , 4.777085  ,-3.3904083 ,-2.4746811 ,-2.943489  , 1.3607427 , 1.313449  ,-2.7959676 , 4.5932074 , 0.2460288 ,-1.1802251 , 0.6807028 ,-3.7335384 ,-0.30950046, 0.0558207 ,-4.7604976 ,-4.5745177 ,-3.3872643 ,-1.102581  ,-1.5612804 ,-1.2933319 , 4.5290637 ,-2.5096595 , 0.8673844 , 0.6069363 , 0.8294639 ,-0.05487671,-2.5923786 , 3.2974155 , 2.252853  ,-2.4157743 , 1.6614583 , 1.975577  ,-2.7390766 ,-0.26459846, 0.8946814 ,-3.257953  , 4.0526175 ,-1.5219783 , 4.6063023 ,-0.09599628, 3.2825923 , 2.0063279 ,-3.597641  ,-0.41604096,-2.5593333 , 1.8169669 ,-3.6998532 ,-2.3723404 , 0.4008657 , 2.1002467 , 4.9284163 , 4.6011457 ,-4.8977246 , 4.7852945 , 1.2170111 ,-1.055987  , 2.27575   , 1.0601226 ,-4.176826  , 0.08197393, 4.0421042 , 3.6263971 , 2.6941037 ,-2.644993  , 0.10439859,-4.512112  , 3.7939842 ,-4.8532767 , 0.391317  , 3.6432517 ,-3.9992728 , 0.29700363, 1.2722415 ,-2.3793647 ,-3.377246  , 2.0930648 , 2.574604  ,-1.2509564 , 0.4457573 ,-0.46469867, 2.6793416 , 0.02566718,-0.11948132,-3.1046712 ,-0.6204446 ,-4.615342  , 4.057695  , 1.1312845 ,-3.0446556 ,-1.9381613 ,-0.92255247,-3.5459394 ,-1.1972907 , 0.5879403 ,-1.2265042 ,-2.6279037 , 3.7533212 ,-0.2950134 ,-1.6104454 , 4.7811155 , 3.9216835 ,-2.2905827 ,-3.9489107 ,-4.078132  , 4.878544  ,-2.1483154 ,-3.1480436 ,-1.8742744 , 0.38310575,-4.0457416 ,-1.5423136 , 4.9426446 , 2.80434   ,-2.758338  , 1.6596367 ,-4.559686  ,-1.2686385 ,-1.2173673 , 0.49475643,-2.4956207 ,-1.5008336 ,-1.7967415 ,-1.1574938 , 2.2852411 , 1.7171949 ,-3.328038  ,-3.1454384 ,-0.41883984, 3.822312  , 1.1161699 ,-1.5137968 , 3.1651397 , 3.2411747 , 1.2685378 , 2.7408757 ,-3.078621  , 3.3460293 ,-0.34918678,-1.0433053 , 0.9397743 ,-3.9071774 , 0.68924445, 4.896269  ,-4.234603  ,-4.8659916 , 1.472339  , 4.5464644 , 0.35857418, 3.4065645 ,-1.514736  , 4.2301235 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/4.txt
new file mode 100644
index 000000000..163c037cf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/channel/int16/4.txt
@@ -0,0 +1 @@
+-0.91463715,-2.9258113 , 4.4465976 ,-0.84762925,-3.3510911 ,-0.15094744, 2.2284694 , 3.9705405 ,-1.6315348 , 4.698665  , 2.8595035 ,-2.4719086 , 4.2091336 ,-3.7003224 , 0.06198901, 4.24617   ,-3.7041452 , 1.4280707 , 0.61925036, 3.873551  , 0.3554166 , 3.0535998 ,-1.403015  , 2.5769274 , 4.0060935 ,-2.134697  , 0.61366636,-2.2069314 , 3.5629356 ,-4.94381   , 3.3054771 ,-0.42945656, 4.4868546 , 4.124087  ,-4.039486  , 0.75716823,-4.530404  ,-0.8464823 , 2.7817092 ,-4.954212  , 4.790015  , 2.5307322 , 0.635834  ,-3.393037  ,-3.7000508 ,-1.1439751 ,-2.4422479 , 3.9414582 ,-4.0586324 ,-3.5872777 , 2.2529798 , 0.50453144,-2.9947112 ,-0.76174486, 0.8427806 ,-0.90798455,-0.5518859 ,-1.1810572 , 1.2787138 ,-1.7791113 ,-4.661412  ,-3.7413049 , 0.03910514, 3.970302  ,-3.0697417 ,-4.107844  ,-1.985001  ,-2.434408  ,-3.0120797 , 0.34467867, 0.09826441, 3.1933572 , 0.09855966, 1.7976784 ,-3.3814316 ,-2.8423817 ,-4.787137  , 0.21746217,-1.8560363 ,-0.7145455 , 3.911294  , 4.6970305 ,-4.0105987 , 3.3843613 , 2.3087065 , 1.8619018 , 1.6607213 ,-4.1276345 ,-0.15251912, 3.1198032 , 1.8143575 , 2.178214  ,-4.6250186 , 4.4006424 ,-3.378407  , 3.6481302 , 4.4439235 , 4.5322957 , 2.7754776 , 1.9026359 ,-2.9371052 , 0.32501587, 4.980984  ,-3.2300677 , 4.190388  , 4.441369  , 0.8116277 ,-4.7056756 , 1.1501676 ,-0.9759702 ,-0.1920487 ,-3.2009268 , 4.654679  , 4.043145  , 4.579935  , 4.917842  ,-3.2166183 , 2.381046  , 2.3470554 , 0.04456256,-2.6785278 ,-2.1683002 ,-0.2686819 , 0.6097173 , 1.5071467 , 3.9692068 ,-3.4313831 ,-0.87708473, 3.9917011 , 0.7843428 ,-4.6622047 , 0.774621  ,-4.6538844 , 3.6392822 , 4.962988  , 1.4132729 ,-0.40482154,-1.8656421 ,-1.6113061 ,-1.3454957 , 0.40846685,-4.5410986 , 2.7158992 ,-1.8403106 ,-3.803351  , 4.406537  ,-1.5868717 , 2.7034876 ,-3.3383765 , 4.6084027 ,-1.691095  ,-0.52188784, 2.9010768 , 0.08786624, 2.7466853 ,-1.7457972 , 0.59371734,-0.1716976 ,-2.6220891 , 4.9432936 , 2.3500183 , 1.6905144 ,-2.7329378 , 4.003541  ,-1.1137847 , 3.9017355 , 0.9116626 , 4.233729  ,-2.6706429 , 3.4342804 ,-0.42729262, 1.174779  ,-4.944099  , 1.2316282 , 4.9237943 ,-2.2999635 ,-4.9210916 ,-1.9033331 , 0.43241265, 3.2149148 , 4.1269703 , 0.8590868 , 2.734273  , 1.658618  ,-2.1702065 ,-2.0058317 , 4.0706363 , 4.003833  ,-0.35835287, 2.5514262 , 1.2571276 ,-4.655018  , 3.6468434 , 0.06320113,-4.662375  , 1.0745742 ,-1.117399  , 4.167245  , 4.59434   ,-1.686359  ,-0.17328739, 0.3083307 , 3.3926466 , 2.2254786 ,-0.45468137, 2.4956248 ,-3.492782  ,-2.9805465 ,-1.0610795 ,-0.2784433 , 0.7163735 ,-3.0048254 ,-1.8024784 ,-3.3139167 ,-1.8410577 , 4.5702477 ,-3.4454951 ,-1.4504164 ,-1.7432297 ,-4.998418  ,-2.5524495 , 3.028534  , 4.075326  ,-2.2187853 ,-0.6484594 , 3.00815   ,-2.8010397 ,-4.5529976 , 1.7830837 , 0.3373458 , 0.19151935,-1.0437245 ,-3.6349878 , 1.1947471 ,-1.9664146 , 0.27316815,-0.20781417, 2.419226  , 0.02246885, 4.5222287 , 3.1069999 , 3.940458  , 4.2710595 , 3.4216619 , 2.8447206 , 2.7136886 ,-0.60954016, 2.9277234 , 3.995615  ,-0.30593097, 1.7800944 , 1.0608315 , 3.8786283 ,-2.7564247 , 1.8526665 ,-3.8638606 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/0.txt
new file mode 100644
index 000000000..182eb5290
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 3.4251418 , 1.8884782 ,-4.061519  ,-2.1329548 , 3.851976  , 3.668601  ,-0.7418167 , 2.379966  , 0.87259316,-3.96981   ,-4.627804  ,-3.3958297 , 3.025158  ,-1.299777  ,-4.322816  , 3.9173064 ,-0.55214256, 1.9224825 ,-4.8571157 ,-4.778045  , 3.3015614 , 0.56785774, 4.7985554 ,-0.4355816 , 4.9478025 , 1.7909397 ,-0.7620663 ,-0.09947702,-3.0230513 , 1.3817457 ,-4.5706887 ,-3.4097836 ,-4.7086477 ,-3.4651487 , 1.4401027 , 4.7513933 ,-1.0788624 ,-3.4946275 , 4.607974  ,-3.1215246 ,-1.4637078 ,-3.5266285 , 2.1268125 , 0.19458893, 4.058288  , 2.2452407 , 0.7575343 , 0.12213306, 4.885321  ,-1.2482406 ,-1.1034219 ,-4.054173  ,-3.6471267 , 4.774012  , 0.9450243 ,-2.5827825 ,-2.3991685 ,-2.8482654 , 0.9294943 ,-3.1165063 ,-1.6113516 , 0.04260086, 2.0987031 , 2.1601508 , 4.9740996 , 3.7719023 , 2.6817482 , 0.42131838,-1.4525859 ,-0.5124655 , 2.6313434 , 4.5606523 ,-4.6180778 , 4.788594  ,-0.8446551 ,-1.5460813 , 1.4288356 ,-1.9648911 ,-4.9766145 ,-2.405665  ,-0.30327383, 3.5204673 ,-3.848158  ,-2.6913974 ,-2.76141   , 4.336643  , 1.4205143 , 4.5898    ,-0.93183124, 4.2199287 ,-4.216924  ,-1.0979122 ,-2.3032405 ,-3.4457245 , 2.944412  , 2.137278  , 1.0326933 , 2.3116126 , 4.2138443 , 1.8283377 , 0.28901085,-1.8877143 , 0.50673705, 1.4360197 ,-2.924691  , 0.9819095 , 3.4656513 ,-2.541582  ,-1.9102442 , 3.3629627 ,-0.9675056 , 0.5937253 ,-2.4236617 ,-1.4193813 ,-0.7552614 ,-1.7121441 , 4.39647   ,-2.2712908 ,-4.3387337 , 1.5912663 , 0.8397044 , 0.17277755, 1.5272428 , 3.571715  ,-1.4471695 , 1.8623346 ,-4.3603377 , 1.2116091 , 4.960487  , 2.3681397 , 1.2925869 ,-4.3249073 , 2.4402251 ,-1.4506928 , 3.023616  ,-3.232099  ,-4.0106025 , 3.5774167 ,-0.6024932 , 1.0183483 ,-2.8215308 , 3.7395437 , 1.9100485 , 3.892712  , 4.6569633 ,-3.251774  ,-3.6923678 ,-4.8891983 ,-3.8605282 ,-4.0293036 ,-2.8199108 , 4.1668954 , 2.1569817 ,-2.9700332 ,-0.7035824 ,-0.5176811 ,-3.1826456 ,-3.334556  , 4.9103675 , 3.8513231 , 2.8609774 , 1.1845547 ,-1.4094447 ,-2.0445833 , 0.9833705 , 4.481276  , 3.83006   , 4.6240997 ,-4.268881  ,-0.85518706,-2.2650888 , 4.032545  , 0.9495817 , 1.1353155 ,-4.6551876 ,-2.2839146 , 2.6291692 ,-3.0398533 , 0.52652216,-1.8323399 ,-0.12300313, 0.46178594, 1.120684  , 1.4657134 ,-1.9794375 , 0.08941289,-4.4573083 , 2.7112565 , 4.9227715 , 2.4938288 ,-0.37153494,-4.1604757 , 4.7694197 ,-1.3021677 , 2.454714  ,-2.4902875 ,-2.760436  , 0.05183195,-2.6723208 ,-1.1471758 ,-2.2565122 , 0.20876396,-0.7288584 , 0.4386669 , 0.7846054 , 2.7294593 ,-3.836883  , 2.7501638 ,-4.775067  ,-3.2403855 ,-2.0307286 ,-1.6403166 , 4.9471517 , 1.0428456 , 2.5126355 , 3.0090203 ,-2.3476288 ,-2.9215205 , 3.8079188 , 0.83959275, 4.2670302 , 1.2338712 , 2.7329903 , 2.2549257 , 4.882931  , 0.12783106,-2.4392028 ,-2.4590807 , 4.2874207 ,-0.08333418,-3.4244132 ,-0.2235516 ,-4.23632   ,-1.3970895 , 2.1245553 ,-2.513883  ,-2.8092728 ,-1.9194845 ,-4.1932216 ,-3.7431748 ,-1.1063433 ,-3.714845  , 1.7230242 ,-0.19162221, 1.1123114 , 3.937181  , 2.6165597 ,-0.61531806, 0.44309503,-2.9260228 ,-3.1617007 , 0.0663496 , 2.4541974 ,-2.714474  , 4.2564497 , 1.2300675 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/1.txt
new file mode 100644
index 000000000..dd8037244
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/1.txt
@@ -0,0 +1 @@
+-4.8834    ,-4.6238756 , 2.020674  ,-2.3068821 , 3.7487323 ,-0.36079448, 0.08661745, 3.423143  , 3.3073757 ,-2.709357  , 4.4810205 , 3.4159606 , 4.1597505 ,-4.249789  , 2.3782206 ,-2.02848   , 0.90137833,-0.6249625 ,-3.5300052 ,-4.1113796 ,-3.768913  ,-3.59854   , 2.0896666 , 1.7677166 ,-2.3101497 ,-1.0116942 ,-3.7846713 , 2.4777756 , 3.413987  ,-2.1964507 , 0.08637846, 0.02552292,-1.9918599 , 0.7785565 ,-4.065995  , 0.8808776 ,-2.0446506 ,-1.8421272 , 0.42566776, 3.8834689 , 4.900111  ,-3.0617309 , 4.0613194 ,-3.3601153 , 3.678536  ,-4.1136184 ,-4.2903633 ,-2.6918027 , 3.4335177 ,-3.9272869 ,-1.6882807 ,-1.9629028 , 4.2125826 , 1.6536059 ,-1.1801353 , 4.8443203 , 2.9393198 , 0.4306524 , 4.390743  ,-4.6322317 , 2.932263  , 4.140538  , 2.7385068 , 2.620753  , 2.0725663 ,-1.3642436 ,-0.48539641,-4.2409816 ,-1.5950899 ,-1.688442  , 4.4769464 ,-1.25038   , 3.462903  , 0.5011836 , 0.981037  , 0.63532305,-3.4727957 , 4.6721544 ,-3.481392  , 2.8904114 ,-1.7057139 , 1.0501702 , 3.0799537 , 1.6698593 ,-1.3895478 , 4.487443  , 2.5352533 ,-0.19357985, 0.78166926, 3.5892236 ,-4.3259463 , 2.8381345 , 1.3652785 ,-0.40142608,-0.62102544,-3.088937  ,-4.0266094 , 4.7095647 , 2.0513067 ,-1.8115149 , 0.11062156,-4.5980725 , 2.809295  , 4.2042894 ,-3.4689455 ,-1.3418434 , 2.9026117 ,-1.6125411 , 2.153075  ,-3.4445221 , 3.4869678 , 1.8746428 , 0.8482056 , 3.0525062 , 1.715966  , 1.7684505 ,-2.0022326 ,-4.3427444 ,-3.1659825 , 1.6855526 , 3.1612136 , 2.0646648 ,-3.972224  ,-2.91726   ,-3.5450957 ,-2.7226381 ,-0.3273488 ,-2.5905557 , 3.6621993 ,-4.3285728 ,-0.6200474 , 0.08522832,-2.1981175 ,-3.4179437 , 2.5989106 ,-0.8503352 ,-3.3723786 , 3.9595454 ,-0.5431398 ,-2.6962373 , 1.9689399 ,-2.8925    ,-1.2064192 , 1.606632  , 2.2728612 ,-0.1403075 ,-4.8031726 , 0.1549256 ,-1.3698703 , 0.78889227,-2.286554  , 0.96417916,-0.10438658,-3.8131578 , 2.9322996 , 2.4103441 , 4.4864798 , 0.02176606,-1.1966147 ,-3.6921146 , 4.943659  ,-1.0050472 ,-1.2238564 ,-4.5758605 ,-2.6865735 , 1.7294792 , 4.180183  , 3.157911  ,-3.581904  ,-2.9112866 , 4.1674094 , 3.2326035 ,-2.7883985 ,-0.09154221, 0.8667318 ,-4.532571  , 0.816668  , 3.1307516 ,-4.1993947 ,-1.0503744 , 0.123965  , 0.17691068,-3.1465137 ,-1.4964765 , 3.4077635 ,-0.35415363, 1.9092371 ,-4.709203  , 1.148622  , 4.4766874 ,-2.193539  ,-3.7959206 , 1.4420112 ,-2.5300896 , 4.107192  , 3.4666913 ,-2.1158516 ,-3.182484  ,-2.8406513 ,-1.9396024 ,-2.3695247 , 3.8301885 ,-1.5032169 ,-0.48879272, 0.41695955,-1.1829228 , 4.822825  ,-2.9244933 ,-3.8178608 , 2.7742817 , 2.6998327 ,-3.1187122 , 2.508593  , 1.2989064 , 2.3436947 ,-0.39074868,-3.034766  ,-1.8690065 , 4.850296  ,-2.4549792 , 4.839528  , 2.2758777 , 2.6689568 , 3.2014422 , 3.6975234 ,-3.2566156 , 3.546554  , 1.9570364 ,-2.753807  , 2.3366053 ,-4.357898  , 4.9184504 ,-1.0057111 ,-3.8582199 , 1.2416974 , 4.355522  ,-2.7863925 , 0.4679685 , 2.6850772 , 2.9984746 , 2.434312  , 2.9931593 , 2.2637212 ,-0.18371914,-4.07688   ,-2.0402577 , 0.5173147 , 0.19596666, 4.71653   , 4.291663  ,-3.3575501 ,-1.0857964 ,-0.16504912, 3.6683955 , 2.9581416 ,-1.354989  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/2.txt
new file mode 100644
index 000000000..1295bfdba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 1.2340723 ,-1.7371651 , 4.271641  ,-2.3332376 , 0.82301813,-3.4199295 ,-0.75806665,-2.2647665 , 2.613749  , 2.2658496 ,-2.1277714 ,-0.465433  ,-0.1323059 ,-1.9658507 ,-4.7780223 ,-4.392719  ,-0.81063855,-3.639001  ,-3.6398284 , 4.6309023 ,-0.17483327, 1.7921627 ,-1.1493484 ,-3.8145075 , 2.2367268 ,-0.40209827,-1.4159911 , 2.3032134 ,-4.154446  , 1.6760192 , 2.3430173 ,-1.386683  , 3.3363335 ,-2.976934  , 3.3983    ,-0.0069695 , 3.7025425 ,-1.8683758 , 0.72029626, 2.7558882 ,-4.4060984 , 2.553126  ,-3.5888321 , 1.8549582 ,-0.52258795, 4.6549897 , 0.8886988 ,-3.0400214 ,-3.6890693 , 3.6663766 ,-4.8026586 , 1.0636287 ,-2.9774907 , 0.39021772,-4.2414255 , 2.914968  ,-0.24334456,-4.0344954 ,-1.1011956 ,-3.8205252 , 0.05693521,-4.1379023 , 1.0584197 ,-4.0404034 , 4.841462  ,-1.2727845 , 2.6974225 ,-4.2507453 ,-2.7101111 ,-2.9800036 , 0.3082796 , 3.6763537 , 2.3277721 ,-4.9667864 ,-2.4498677 , 0.2704629 , 3.006634  ,-1.1129389 , 4.373073  ,-1.2066779 ,-3.1575904 ,-2.721046  ,-0.861226  , 1.7315729 , 2.255666  , 2.5448847 , 3.1268334 , 1.5189171 ,-3.1992466 , 0.607633  , 4.0749955 , 1.2546133 ,-1.5335796 ,-1.6200712 ,-3.9392874 , 1.053699  ,-0.87970537,-3.9218261 ,-2.2724128 , 0.82235074,-2.3400521 , 3.6467028 , 1.6891364 ,-1.6333519 , 2.2639709 ,-0.08272895,-3.076964  , 3.731091  , 3.7932968 , 2.496441  ,-4.12142   ,-2.0908666 ,-4.994248  ,-0.0429902 ,-4.6083336 ,-4.522535  , 4.717733  , 1.6715643 ,-4.779822  , 1.2919815 ,-4.6121325 ,-0.6206874 ,-2.6633883 ,-1.9632595 ,-3.2203329 ,-0.6556523 , 1.3083993 , 0.13287744, 4.599294  ,-1.1777852 ,-2.9159715 ,-0.25669238, 0.48217958,-3.9736347 ,-0.774503  ,-0.7264863 ,-3.0058725 ,-2.1682055 , 2.6579158 ,-4.4020653 , 3.0450368 , 1.3798735 ,-4.9858127 ,-4.5812607 ,-3.7349749 ,-4.4158583 , 1.631093  ,-3.0769646 ,-3.8406906 , 1.6544044 , 0.36895755,-1.8196682 ,-2.0880237 ,-3.708266  ,-2.0277069 , 1.0536597 ,-3.6726243 , 1.1704421 , 2.3201573 , 1.4994124 , 4.0197086 , 2.1001272 ,-0.39845964, 4.879206  ,-4.6042013 , 4.367211  , 2.2712052 , 2.7754369 ,-3.156667  , 4.349216  ,-4.111492  , 1.0267047 ,-2.3381946 , 4.8876834 , 4.876814  ,-0.28538027, 4.8861    ,-0.95963717, 0.46279734,-4.5789995 , 0.26168647,-0.8879058 , 2.4468584 , 1.3030591 , 3.7261188 , 3.9933589 , 2.4964094 ,-1.3851117 , 0.7147012 ,-3.8367457 , 0.79737735,-0.5907085 , 4.317288  , 0.7659837 ,-4.821792  ,-1.466433  ,-1.147227  ,-1.8638811 , 2.5115767 , 1.9449657 ,-2.4122007 ,-2.4968379 , 0.7738737 ,-1.4761454 , 4.131583  , 0.4211128 ,-2.4312468 ,-1.9722428 , 2.2810268 , 4.950381  ,-0.0406047 , 4.67312   , 0.66613483,-0.28880936, 3.2917845 , 1.6225572 , 4.809879  , 0.48241946,-3.654634  , 0.68542016, 1.3973923 , 3.479005  ,-1.4296091 , 0.64391786,-4.0887494 ,-2.186845  ,-4.5834355 ,-0.67726034, 2.4158256 ,-2.4787726 , 0.4353257 , 2.9205139 , 0.10488439, 2.0790074 ,-4.5518365 ,-3.3856661 , 3.940736  ,-1.7141095 ,-4.8946457 , 1.1085542 , 3.785141  ,-2.4175835 , 3.7720537 , 4.623048  , 2.2239215 , 0.11616404, 0.09229392,-3.637964  ,-2.334849  ,-0.95000714,-2.1338253 , 3.2281857 ,-4.0220475 , 4.7304025 ,-1.8075961 , 0.2428817 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/3.txt
new file mode 100644
index 000000000..378b5fea5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 2.4605505 ,-2.7001262 ,-4.3874917 ,-2.9867616 ,-3.4332    , 0.76675916, 3.4377892 ,-0.6712793 , 1.8018581 , 1.8148962 , 2.0353577 ,-4.766427  , 3.2487285 , 3.886249  ,-2.8867183 ,-0.7906634 ,-4.376028  ,-4.2085958 ,-0.36025277, 0.6360799 ,-4.687723  , 4.8313313 , 3.3582768 , 2.1117954 , 0.9821817 , 3.3697798 ,-1.1784939 ,-3.1590316 ,-0.24019621, 0.20640443, 1.2808957 , 2.3346424 , 2.13951   , 0.61864626, 2.4020443 ,-1.9671458 ,-1.6852348 , 0.32225233,-2.3928862 ,-4.173372  ,-2.282281  ,-1.271318  , 3.0839682 ,-4.4726086 ,-0.635177  , 3.2710915 , 3.08071   ,-0.7311931 , 2.1444874 , 0.4102332 ,-3.332888  ,-4.8965516 , 3.903695  , 1.4920163 ,-4.041926  ,-0.3941788 , 3.6352818 ,-2.098405  ,-0.9248165 , 2.6277795 , 3.225142  ,-1.4461963 ,-4.2050753 ,-0.2213572 , 1.9704323 , 3.298732  ,-4.710403  , 3.6876736 , 2.0771818 , 1.3559113 , 1.328373  ,-4.4079022 ,-3.28067   , 3.8852313 , 2.322237  , 2.3243637 ,-1.9126451 , 4.6277676 , 1.7031307 , 0.74861574,-4.688967  , 3.9351206 ,-1.8054084 , 1.5824287 , 3.5381088 , 2.4798677 ,-3.3099444 ,-3.8518245 , 1.5562242 ,-1.9466928 , 0.08375791,-0.16754703, 2.9265418 ,-1.6599798 , 2.766202  ,-2.8269696 ,-0.19389874, 2.0869334 ,-1.5073173 ,-3.2024453 ,-3.6522708 ,-4.588111  ,-2.3425827 , 4.8709297 ,-1.4231887 , 1.0590451 ,-1.6406479 , 0.37192422, 0.7313186 , 0.3865313 ,-4.2832613 , 3.9712496 , 0.07653506, 0.2593589 ,-2.6036396 ,-0.45185068, 3.6537335 ,-0.6341783 ,-0.6381408 ,-1.0992868 , 2.766365  , 4.666631  , 4.416099  ,-3.6654727 ,-4.0626607 ,-3.4928396 ,-0.6944366 , 4.869798  , 4.2240977 , 0.9655519 ,-2.5654511 , 1.3396966 ,-3.7639391 ,-1.2369057 ,-3.7242758 ,-0.5189227 , 1.6548159 ,-2.6197302 , 4.2732763 , 2.239486  ,-4.316255  , 3.2419755 ,-1.9283817 , 0.22489135, 2.6034477 , 0.15818155, 2.0811818 , 0.836994  , 2.7832468 ,-0.68581384, 0.89475006,-3.1455147 ,-4.818614  ,-4.1738377 , 0.4281551 ,-2.935886  ,-3.7582467 , 0.58168256, 0.2854076 , 1.0492616 , 2.2415884 ,-4.4923434 ,-3.2479804 , 3.8439462 , 3.9802108 ,-0.9027783 , 1.7783072 ,-2.2782066 , 4.4638705 , 4.28735   , 4.291463  , 1.1685107 , 1.2765578 ,-3.7954235 ,-3.494621  , 4.4340134 ,-3.5995178 ,-4.3025713 , 3.3037348 ,-3.6675146 ,-1.7871013 ,-1.2922373 , 0.72924066,-4.7065907 , 2.1388702 , 2.3570008 , 3.9203117 , 0.07483537,-2.8389792 ,-1.795164  ,-4.380931  , 1.3189598 , 2.4404252 , 4.4774084 ,-1.2798066 ,-4.95842   , 1.8095461 , 4.2692375 ,-2.0918155 , 0.33083543,-3.794544  , 1.4940621 ,-3.9446015 ,-0.38208306, 0.30863285,-0.6832849 ,-2.5675633 ,-4.948772  , 1.5904989 , 3.0415509 ,-4.899339  , 0.9415345 ,-0.91124976, 4.4849253 ,-3.4605968 , 1.6737833 , 1.9091597 , 1.3111106 , 2.0829957 ,-2.1308084 ,-2.912219  , 1.1306196 , 2.231948  , 4.7522073 ,-2.1438766 ,-2.1000512 ,-0.2984778 ,-1.2093959 , 2.6259391 , 1.8113437 ,-4.137133  , 2.716111  , 3.4318748 ,-0.89123845,-3.70718   , 2.453927  ,-0.22418758,-3.098459  ,-4.4986243 , 0.85048616, 2.8023102 , 3.743153  , 0.9931644 , 3.8588202 , 1.7585737 ,-4.2855363 ,-2.5475764 ,-0.83141845,-1.9358089 , 3.1711586 , 2.4221613 ,-1.881327  ,-3.7230873 ,-4.55259   ,-0.42294836, 4.64625   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/4.txt
new file mode 100644
index 000000000..339435425
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000/layer/uint8/4.txt
@@ -0,0 +1 @@
+-3.37344313e+00, 2.78325319e+00,-7.30300546e-01, 1.33456266e+00, 3.96648932e+00, 4.33421373e+00,-3.11558557e+00,-3.64659280e-02,-1.73589993e+00, 4.81018400e+00,-8.32905114e-01, 2.33330703e+00, 1.85830116e+00,-4.60395622e+00, 5.26070774e-01,-4.71355534e+00,-2.97202754e+00, 3.57638383e+00, 4.50985909e+00, 2.08423686e+00,-1.85349309e+00,-2.18306184e+00,-4.65403509e+00, 4.31280661e+00, 1.16069472e+00,-4.85344124e+00, 8.40563923e-02,-1.98723459e+00,-4.29561710e+00,-2.57372570e+00,-4.22641230e+00,-4.00811911e+00,-9.61861551e-01,-2.14665198e+00, 4.18120289e+00,-3.87826174e-01,-2.86187083e-01,-4.84979200e+00,-1.34733701e+00, 1.27489030e+00, 1.98844969e+00,-4.11230135e+00,-1.61191213e+00, 2.63515592e+00, 4.35539484e+00,-1.56582773e+00,-2.45283508e+00, 1.44556177e+00,-8.56053472e-01, 3.25111747e+00, 3.58699083e+00,-2.47732449e+00, 3.64130282e+00,-4.91288567e+00, 8.97059917e-01,-2.26010180e+00, 4.91831064e+00, 4.45047706e-01, 1.88655663e+00, 3.20642543e+00, 1.38243341e+00, 9.06112790e-01, 1.15262544e+00,-2.39862514e+00,-2.87477684e+00, 7.36831248e-01, 3.18799114e+00, 1.22698748e+00, 5.63625395e-01, 1.29130912e+00,-4.89572334e+00, 2.11258578e+00,-4.55420208e+00, 4.94569272e-01,-7.08617330e-01,-1.84863120e-01,-4.81965256e+00,-1.06512284e+00, 4.79633398e-02, 2.70429182e+00, 4.78289175e+00,-2.11806059e+00, 4.23046875e+00, 3.18022132e+00,-8.39496255e-01, 3.13150501e+00,-3.24103773e-01,-7.48505890e-01,-2.45754886e+00, 4.16639376e+00, 3.25864077e+00, 3.40006447e+00,-3.77217412e+00, 2.93266010e+00, 3.33685803e+00, 1.02347994e+00,-2.22839618e+00,-1.90375733e+00, 3.24283957e+00,-4.01684284e-01,-4.45417643e+00, 3.74440104e-01, 3.33520865e+00, 6.64106190e-01, 3.84395885e+00, 2.38586918e-01,-1.51634857e-01,-2.64977455e+00,-3.45786500e+00, 4.89002228e+00,-1.07323432e+00,-2.92749858e+00,-1.76510501e+00,-3.44604325e+00,-1.89681911e+00, 4.20239258e+00,-1.75864971e+00, 2.13181686e+00, 3.90355319e-01,-4.11911535e+00, 6.61891177e-02,-4.32988214e+00,-1.42876351e+00, 3.12163901e+00,-4.56227779e+00, 4.17938662e+00, 9.63881195e-01, 4.35952139e+00, 1.61931109e+00, 4.11196423e+00, 2.25612569e+00,-4.77538586e+00,-1.72600198e+00,-4.39411783e+00,-8.98730099e-01,-1.04562032e+00,-2.81517529e+00, 3.57167959e+00, 1.90318239e+00, 2.17302442e+00,-3.79942179e+00, 2.19838643e+00,-4.16209459e+00, 4.45025682e+00, 1.68786839e-01,-2.56879544e+00, 3.60925221e+00, 1.06542781e-01,-3.48755455e+00,-6.77028894e-01,-3.51582170e+00, 3.90697241e+00, 4.49116230e+00,-1.56180394e+00, 4.96249914e+00, 9.63374436e-01, 2.72304177e+00, 8.38046610e-01,-2.91993833e+00,-9.41783428e-01, 8.00800502e-01, 3.89176035e+00, 6.70560122e-01, 2.76782703e+00,-1.37075472e+00,-3.25303817e+00,-4.41226482e+00,-8.38777184e-01, 1.73568249e+00,-1.09438455e+00,-1.08815920e+00, 1.06787062e+00, 2.04415274e+00,-2.93027782e+00,-6.86941504e-01, 3.83109421e-01,-3.49270535e+00,-2.13225913e+00,-3.61786675e+00, 1.32213378e+00,-2.89654016e+00, 4.23944092e+00, 4.53665400e+00, 4.26081800e+00,-1.95718706e+00, 4.72295076e-01,-3.08592963e+00, 2.53354859e+00, 3.80069661e+00,-1.14408419e-01, 2.39438844e+00,-4.73618507e+00, 2.35079074e+00,-1.43686843e+00, 1.32946157e+00, 1.10381134e-01,-3.49878430e+00, 2.83181930e+00, 4.57872486e+00, 2.29953095e-01, 7.19881415e-01,-2.97208834e+00, 4.11286211e+00,-3.89149117e+00, 3.83631349e+00, 4.14627981e+00,-1.14082299e-01,-6.89825296e-01,-2.55468488e+00,-4.04466152e+00, 9.95541453e-01,-2.59181118e+00,-4.60567427e+00,-4.77339029e+00,-7.36041367e-02, 1.85957468e+00,-3.42530179e+00, 4.55782986e+00,-3.29603004e+00, 3.55632234e+00, 2.40858841e+00,-2.07399082e+00,-3.96705031e+00, 4.41718817e+00, 3.19581985e+00,-3.72379017e+00,-3.76826024e+00, 6.79764748e-01,-4.43838930e+00, 2.29627752e+00, 2.34923697e+00,-4.23308420e+00, 3.80186272e+00, 8.65862250e-01, 8.44927967e-01,-1.05974531e+00, 4.70531940e+00, 1.25060010e+00, 4.82314730e+00,-4.53093815e+00, 4.51410580e+00, 4.95166332e-01,-3.45584202e+00, 1.82002666e-03,-3.27616286e+00,-2.68104935e+00, 2.39554620e+00, 2.99364328e+00,-2.57998848e+00,-4.35891914e+00, 4.64737415e+00,-5.74958742e-01, 6.47293210e-01, 1.85961032e+00, 4.49567413e+00,-4.36166048e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..182eb5290
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+ 3.4251418 , 1.8884782 ,-4.061519  ,-2.1329548 , 3.851976  , 3.668601  ,-0.7418167 , 2.379966  , 0.87259316,-3.96981   ,-4.627804  ,-3.3958297 , 3.025158  ,-1.299777  ,-4.322816  , 3.9173064 ,-0.55214256, 1.9224825 ,-4.8571157 ,-4.778045  , 3.3015614 , 0.56785774, 4.7985554 ,-0.4355816 , 4.9478025 , 1.7909397 ,-0.7620663 ,-0.09947702,-3.0230513 , 1.3817457 ,-4.5706887 ,-3.4097836 ,-4.7086477 ,-3.4651487 , 1.4401027 , 4.7513933 ,-1.0788624 ,-3.4946275 , 4.607974  ,-3.1215246 ,-1.4637078 ,-3.5266285 , 2.1268125 , 0.19458893, 4.058288  , 2.2452407 , 0.7575343 , 0.12213306, 4.885321  ,-1.2482406 ,-1.1034219 ,-4.054173  ,-3.6471267 , 4.774012  , 0.9450243 ,-2.5827825 ,-2.3991685 ,-2.8482654 , 0.9294943 ,-3.1165063 ,-1.6113516 , 0.04260086, 2.0987031 , 2.1601508 , 4.9740996 , 3.7719023 , 2.6817482 , 0.42131838,-1.4525859 ,-0.5124655 , 2.6313434 , 4.5606523 ,-4.6180778 , 4.788594  ,-0.8446551 ,-1.5460813 , 1.4288356 ,-1.9648911 ,-4.9766145 ,-2.405665  ,-0.30327383, 3.5204673 ,-3.848158  ,-2.6913974 ,-2.76141   , 4.336643  , 1.4205143 , 4.5898    ,-0.93183124, 4.2199287 ,-4.216924  ,-1.0979122 ,-2.3032405 ,-3.4457245 , 2.944412  , 2.137278  , 1.0326933 , 2.3116126 , 4.2138443 , 1.8283377 , 0.28901085,-1.8877143 , 0.50673705, 1.4360197 ,-2.924691  , 0.9819095 , 3.4656513 ,-2.541582  ,-1.9102442 , 3.3629627 ,-0.9675056 , 0.5937253 ,-2.4236617 ,-1.4193813 ,-0.7552614 ,-1.7121441 , 4.39647   ,-2.2712908 ,-4.3387337 , 1.5912663 , 0.8397044 , 0.17277755, 1.5272428 , 3.571715  ,-1.4471695 , 1.8623346 ,-4.3603377 , 1.2116091 , 4.960487  , 2.3681397 , 1.2925869 ,-4.3249073 , 2.4402251 ,-1.4506928 , 3.023616  ,-3.232099  ,-4.0106025 , 3.5774167 ,-0.6024932 , 1.0183483 ,-2.8215308 , 3.7395437 , 1.9100485 , 3.892712  , 4.6569633 ,-3.251774  ,-3.6923678 ,-4.8891983 ,-3.8605282 ,-4.0293036 ,-2.8199108 , 4.1668954 , 2.1569817 ,-2.9700332 ,-0.7035824 ,-0.5176811 ,-3.1826456 ,-3.334556  , 4.9103675 , 3.8513231 , 2.8609774 , 1.1845547 ,-1.4094447 ,-2.0445833 , 0.9833705 , 4.481276  , 3.83006   , 4.6240997 ,-4.268881  ,-0.85518706,-2.2650888 , 4.032545  , 0.9495817 , 1.1353155 ,-4.6551876 ,-2.2839146 , 2.6291692 ,-3.0398533 , 0.52652216,-1.8323399 ,-0.12300313, 0.46178594, 1.120684  , 1.4657134 ,-1.9794375 , 0.08941289,-4.4573083 , 2.7112565 , 4.9227715 , 2.4938288 ,-0.37153494,-4.1604757 , 4.7694197 ,-1.3021677 , 2.454714  ,-2.4902875 ,-2.760436  , 0.05183195,-2.6723208 ,-1.1471758 ,-2.2565122 , 0.20876396,-0.7288584 , 0.4386669 , 0.7846054 , 2.7294593 ,-3.836883  , 2.7501638 ,-4.775067  ,-3.2403855 ,-2.0307286 ,-1.6403166 , 4.9471517 , 1.0428456 , 2.5126355 , 3.0090203 ,-2.3476288 ,-2.9215205 , 3.8079188 , 0.83959275, 4.2670302 , 1.2338712 , 2.7329903 , 2.2549257 , 4.882931  , 0.12783106,-2.4392028 ,-2.4590807 , 4.2874207 ,-0.08333418,-3.4244132 ,-0.2235516 ,-4.23632   ,-1.3970895 , 2.1245553 ,-2.513883  ,-2.8092728 ,-1.9194845 ,-4.1932216 ,-3.7431748 ,-1.1063433 ,-3.714845  , 1.7230242 ,-0.19162221, 1.1123114 , 3.937181  , 2.6165597 ,-0.61531806, 0.44309503,-2.9260228 ,-3.1617007 , 0.0663496 , 2.4541974 ,-2.714474  , 4.2564497 , 1.2300675 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..dd8037244
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-4.8834    ,-4.6238756 , 2.020674  ,-2.3068821 , 3.7487323 ,-0.36079448, 0.08661745, 3.423143  , 3.3073757 ,-2.709357  , 4.4810205 , 3.4159606 , 4.1597505 ,-4.249789  , 2.3782206 ,-2.02848   , 0.90137833,-0.6249625 ,-3.5300052 ,-4.1113796 ,-3.768913  ,-3.59854   , 2.0896666 , 1.7677166 ,-2.3101497 ,-1.0116942 ,-3.7846713 , 2.4777756 , 3.413987  ,-2.1964507 , 0.08637846, 0.02552292,-1.9918599 , 0.7785565 ,-4.065995  , 0.8808776 ,-2.0446506 ,-1.8421272 , 0.42566776, 3.8834689 , 4.900111  ,-3.0617309 , 4.0613194 ,-3.3601153 , 3.678536  ,-4.1136184 ,-4.2903633 ,-2.6918027 , 3.4335177 ,-3.9272869 ,-1.6882807 ,-1.9629028 , 4.2125826 , 1.6536059 ,-1.1801353 , 4.8443203 , 2.9393198 , 0.4306524 , 4.390743  ,-4.6322317 , 2.932263  , 4.140538  , 2.7385068 , 2.620753  , 2.0725663 ,-1.3642436 ,-0.48539641,-4.2409816 ,-1.5950899 ,-1.688442  , 4.4769464 ,-1.25038   , 3.462903  , 0.5011836 , 0.981037  , 0.63532305,-3.4727957 , 4.6721544 ,-3.481392  , 2.8904114 ,-1.7057139 , 1.0501702 , 3.0799537 , 1.6698593 ,-1.3895478 , 4.487443  , 2.5352533 ,-0.19357985, 0.78166926, 3.5892236 ,-4.3259463 , 2.8381345 , 1.3652785 ,-0.40142608,-0.62102544,-3.088937  ,-4.0266094 , 4.7095647 , 2.0513067 ,-1.8115149 , 0.11062156,-4.5980725 , 2.809295  , 4.2042894 ,-3.4689455 ,-1.3418434 , 2.9026117 ,-1.6125411 , 2.153075  ,-3.4445221 , 3.4869678 , 1.8746428 , 0.8482056 , 3.0525062 , 1.715966  , 1.7684505 ,-2.0022326 ,-4.3427444 ,-3.1659825 , 1.6855526 , 3.1612136 , 2.0646648 ,-3.972224  ,-2.91726   ,-3.5450957 ,-2.7226381 ,-0.3273488 ,-2.5905557 , 3.6621993 ,-4.3285728 ,-0.6200474 , 0.08522832,-2.1981175 ,-3.4179437 , 2.5989106 ,-0.8503352 ,-3.3723786 , 3.9595454 ,-0.5431398 ,-2.6962373 , 1.9689399 ,-2.8925    ,-1.2064192 , 1.606632  , 2.2728612 ,-0.1403075 ,-4.8031726 , 0.1549256 ,-1.3698703 , 0.78889227,-2.286554  , 0.96417916,-0.10438658,-3.8131578 , 2.9322996 , 2.4103441 , 4.4864798 , 0.02176606,-1.1966147 ,-3.6921146 , 4.943659  ,-1.0050472 ,-1.2238564 ,-4.5758605 ,-2.6865735 , 1.7294792 , 4.180183  , 3.157911  ,-3.581904  ,-2.9112866 , 4.1674094 , 3.2326035 ,-2.7883985 ,-0.09154221, 0.8667318 ,-4.532571  , 0.816668  , 3.1307516 ,-4.1993947 ,-1.0503744 , 0.123965  , 0.17691068,-3.1465137 ,-1.4964765 , 3.4077635 ,-0.35415363, 1.9092371 ,-4.709203  , 1.148622  , 4.4766874 ,-2.193539  ,-3.7959206 , 1.4420112 ,-2.5300896 , 4.107192  , 3.4666913 ,-2.1158516 ,-3.182484  ,-2.8406513 ,-1.9396024 ,-2.3695247 , 3.8301885 ,-1.5032169 ,-0.48879272, 0.41695955,-1.1829228 , 4.822825  ,-2.9244933 ,-3.8178608 , 2.7742817 , 2.6998327 ,-3.1187122 , 2.508593  , 1.2989064 , 2.3436947 ,-0.39074868,-3.034766  ,-1.8690065 , 4.850296  ,-2.4549792 , 4.839528  , 2.2758777 , 2.6689568 , 3.2014422 , 3.6975234 ,-3.2566156 , 3.546554  , 1.9570364 ,-2.753807  , 2.3366053 ,-4.357898  , 4.9184504 ,-1.0057111 ,-3.8582199 , 1.2416974 , 4.355522  ,-2.7863925 , 0.4679685 , 2.6850772 , 2.9984746 , 2.434312  , 2.9931593 , 2.2637212 ,-0.18371914,-4.07688   ,-2.0402577 , 0.5173147 , 0.19596666, 4.71653   , 4.291663  ,-3.3575501 ,-1.0857964 ,-0.16504912, 3.6683955 , 2.9581416 ,-1.354989  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..1295bfdba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 1.2340723 ,-1.7371651 , 4.271641  ,-2.3332376 , 0.82301813,-3.4199295 ,-0.75806665,-2.2647665 , 2.613749  , 2.2658496 ,-2.1277714 ,-0.465433  ,-0.1323059 ,-1.9658507 ,-4.7780223 ,-4.392719  ,-0.81063855,-3.639001  ,-3.6398284 , 4.6309023 ,-0.17483327, 1.7921627 ,-1.1493484 ,-3.8145075 , 2.2367268 ,-0.40209827,-1.4159911 , 2.3032134 ,-4.154446  , 1.6760192 , 2.3430173 ,-1.386683  , 3.3363335 ,-2.976934  , 3.3983    ,-0.0069695 , 3.7025425 ,-1.8683758 , 0.72029626, 2.7558882 ,-4.4060984 , 2.553126  ,-3.5888321 , 1.8549582 ,-0.52258795, 4.6549897 , 0.8886988 ,-3.0400214 ,-3.6890693 , 3.6663766 ,-4.8026586 , 1.0636287 ,-2.9774907 , 0.39021772,-4.2414255 , 2.914968  ,-0.24334456,-4.0344954 ,-1.1011956 ,-3.8205252 , 0.05693521,-4.1379023 , 1.0584197 ,-4.0404034 , 4.841462  ,-1.2727845 , 2.6974225 ,-4.2507453 ,-2.7101111 ,-2.9800036 , 0.3082796 , 3.6763537 , 2.3277721 ,-4.9667864 ,-2.4498677 , 0.2704629 , 3.006634  ,-1.1129389 , 4.373073  ,-1.2066779 ,-3.1575904 ,-2.721046  ,-0.861226  , 1.7315729 , 2.255666  , 2.5448847 , 3.1268334 , 1.5189171 ,-3.1992466 , 0.607633  , 4.0749955 , 1.2546133 ,-1.5335796 ,-1.6200712 ,-3.9392874 , 1.053699  ,-0.87970537,-3.9218261 ,-2.2724128 , 0.82235074,-2.3400521 , 3.6467028 , 1.6891364 ,-1.6333519 , 2.2639709 ,-0.08272895,-3.076964  , 3.731091  , 3.7932968 , 2.496441  ,-4.12142   ,-2.0908666 ,-4.994248  ,-0.0429902 ,-4.6083336 ,-4.522535  , 4.717733  , 1.6715643 ,-4.779822  , 1.2919815 ,-4.6121325 ,-0.6206874 ,-2.6633883 ,-1.9632595 ,-3.2203329 ,-0.6556523 , 1.3083993 , 0.13287744, 4.599294  ,-1.1777852 ,-2.9159715 ,-0.25669238, 0.48217958,-3.9736347 ,-0.774503  ,-0.7264863 ,-3.0058725 ,-2.1682055 , 2.6579158 ,-4.4020653 , 3.0450368 , 1.3798735 ,-4.9858127 ,-4.5812607 ,-3.7349749 ,-4.4158583 , 1.631093  ,-3.0769646 ,-3.8406906 , 1.6544044 , 0.36895755,-1.8196682 ,-2.0880237 ,-3.708266  ,-2.0277069 , 1.0536597 ,-3.6726243 , 1.1704421 , 2.3201573 , 1.4994124 , 4.0197086 , 2.1001272 ,-0.39845964, 4.879206  ,-4.6042013 , 4.367211  , 2.2712052 , 2.7754369 ,-3.156667  , 4.349216  ,-4.111492  , 1.0267047 ,-2.3381946 , 4.8876834 , 4.876814  ,-0.28538027, 4.8861    ,-0.95963717, 0.46279734,-4.5789995 , 0.26168647,-0.8879058 , 2.4468584 , 1.3030591 , 3.7261188 , 3.9933589 , 2.4964094 ,-1.3851117 , 0.7147012 ,-3.8367457 , 0.79737735,-0.5907085 , 4.317288  , 0.7659837 ,-4.821792  ,-1.466433  ,-1.147227  ,-1.8638811 , 2.5115767 , 1.9449657 ,-2.4122007 ,-2.4968379 , 0.7738737 ,-1.4761454 , 4.131583  , 0.4211128 ,-2.4312468 ,-1.9722428 , 2.2810268 , 4.950381  ,-0.0406047 , 4.67312   , 0.66613483,-0.28880936, 3.2917845 , 1.6225572 , 4.809879  , 0.48241946,-3.654634  , 0.68542016, 1.3973923 , 3.479005  ,-1.4296091 , 0.64391786,-4.0887494 ,-2.186845  ,-4.5834355 ,-0.67726034, 2.4158256 ,-2.4787726 , 0.4353257 , 2.9205139 , 0.10488439, 2.0790074 ,-4.5518365 ,-3.3856661 , 3.940736  ,-1.7141095 ,-4.8946457 , 1.1085542 , 3.785141  ,-2.4175835 , 3.7720537 , 4.623048  , 2.2239215 , 0.11616404, 0.09229392,-3.637964  ,-2.334849  ,-0.95000714,-2.1338253 , 3.2281857 ,-4.0220475 , 4.7304025 ,-1.8075961 , 0.2428817 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..378b5fea5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 2.4605505 ,-2.7001262 ,-4.3874917 ,-2.9867616 ,-3.4332    , 0.76675916, 3.4377892 ,-0.6712793 , 1.8018581 , 1.8148962 , 2.0353577 ,-4.766427  , 3.2487285 , 3.886249  ,-2.8867183 ,-0.7906634 ,-4.376028  ,-4.2085958 ,-0.36025277, 0.6360799 ,-4.687723  , 4.8313313 , 3.3582768 , 2.1117954 , 0.9821817 , 3.3697798 ,-1.1784939 ,-3.1590316 ,-0.24019621, 0.20640443, 1.2808957 , 2.3346424 , 2.13951   , 0.61864626, 2.4020443 ,-1.9671458 ,-1.6852348 , 0.32225233,-2.3928862 ,-4.173372  ,-2.282281  ,-1.271318  , 3.0839682 ,-4.4726086 ,-0.635177  , 3.2710915 , 3.08071   ,-0.7311931 , 2.1444874 , 0.4102332 ,-3.332888  ,-4.8965516 , 3.903695  , 1.4920163 ,-4.041926  ,-0.3941788 , 3.6352818 ,-2.098405  ,-0.9248165 , 2.6277795 , 3.225142  ,-1.4461963 ,-4.2050753 ,-0.2213572 , 1.9704323 , 3.298732  ,-4.710403  , 3.6876736 , 2.0771818 , 1.3559113 , 1.328373  ,-4.4079022 ,-3.28067   , 3.8852313 , 2.322237  , 2.3243637 ,-1.9126451 , 4.6277676 , 1.7031307 , 0.74861574,-4.688967  , 3.9351206 ,-1.8054084 , 1.5824287 , 3.5381088 , 2.4798677 ,-3.3099444 ,-3.8518245 , 1.5562242 ,-1.9466928 , 0.08375791,-0.16754703, 2.9265418 ,-1.6599798 , 2.766202  ,-2.8269696 ,-0.19389874, 2.0869334 ,-1.5073173 ,-3.2024453 ,-3.6522708 ,-4.588111  ,-2.3425827 , 4.8709297 ,-1.4231887 , 1.0590451 ,-1.6406479 , 0.37192422, 0.7313186 , 0.3865313 ,-4.2832613 , 3.9712496 , 0.07653506, 0.2593589 ,-2.6036396 ,-0.45185068, 3.6537335 ,-0.6341783 ,-0.6381408 ,-1.0992868 , 2.766365  , 4.666631  , 4.416099  ,-3.6654727 ,-4.0626607 ,-3.4928396 ,-0.6944366 , 4.869798  , 4.2240977 , 0.9655519 ,-2.5654511 , 1.3396966 ,-3.7639391 ,-1.2369057 ,-3.7242758 ,-0.5189227 , 1.6548159 ,-2.6197302 , 4.2732763 , 2.239486  ,-4.316255  , 3.2419755 ,-1.9283817 , 0.22489135, 2.6034477 , 0.15818155, 2.0811818 , 0.836994  , 2.7832468 ,-0.68581384, 0.89475006,-3.1455147 ,-4.818614  ,-4.1738377 , 0.4281551 ,-2.935886  ,-3.7582467 , 0.58168256, 0.2854076 , 1.0492616 , 2.2415884 ,-4.4923434 ,-3.2479804 , 3.8439462 , 3.9802108 ,-0.9027783 , 1.7783072 ,-2.2782066 , 4.4638705 , 4.28735   , 4.291463  , 1.1685107 , 1.2765578 ,-3.7954235 ,-3.494621  , 4.4340134 ,-3.5995178 ,-4.3025713 , 3.3037348 ,-3.6675146 ,-1.7871013 ,-1.2922373 , 0.72924066,-4.7065907 , 2.1388702 , 2.3570008 , 3.9203117 , 0.07483537,-2.8389792 ,-1.795164  ,-4.380931  , 1.3189598 , 2.4404252 , 4.4774084 ,-1.2798066 ,-4.95842   , 1.8095461 , 4.2692375 ,-2.0918155 , 0.33083543,-3.794544  , 1.4940621 ,-3.9446015 ,-0.38208306, 0.30863285,-0.6832849 ,-2.5675633 ,-4.948772  , 1.5904989 , 3.0415509 ,-4.899339  , 0.9415345 ,-0.91124976, 4.4849253 ,-3.4605968 , 1.6737833 , 1.9091597 , 1.3111106 , 2.0829957 ,-2.1308084 ,-2.912219  , 1.1306196 , 2.231948  , 4.7522073 ,-2.1438766 ,-2.1000512 ,-0.2984778 ,-1.2093959 , 2.6259391 , 1.8113437 ,-4.137133  , 2.716111  , 3.4318748 ,-0.89123845,-3.70718   , 2.453927  ,-0.22418758,-3.098459  ,-4.4986243 , 0.85048616, 2.8023102 , 3.743153  , 0.9931644 , 3.8588202 , 1.7585737 ,-4.2855363 ,-2.5475764 ,-0.83141845,-1.9358089 , 3.1711586 , 2.4221613 ,-1.881327  ,-3.7230873 ,-4.55259   ,-0.42294836, 4.64625   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..339435425
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-3.37344313e+00, 2.78325319e+00,-7.30300546e-01, 1.33456266e+00, 3.96648932e+00, 4.33421373e+00,-3.11558557e+00,-3.64659280e-02,-1.73589993e+00, 4.81018400e+00,-8.32905114e-01, 2.33330703e+00, 1.85830116e+00,-4.60395622e+00, 5.26070774e-01,-4.71355534e+00,-2.97202754e+00, 3.57638383e+00, 4.50985909e+00, 2.08423686e+00,-1.85349309e+00,-2.18306184e+00,-4.65403509e+00, 4.31280661e+00, 1.16069472e+00,-4.85344124e+00, 8.40563923e-02,-1.98723459e+00,-4.29561710e+00,-2.57372570e+00,-4.22641230e+00,-4.00811911e+00,-9.61861551e-01,-2.14665198e+00, 4.18120289e+00,-3.87826174e-01,-2.86187083e-01,-4.84979200e+00,-1.34733701e+00, 1.27489030e+00, 1.98844969e+00,-4.11230135e+00,-1.61191213e+00, 2.63515592e+00, 4.35539484e+00,-1.56582773e+00,-2.45283508e+00, 1.44556177e+00,-8.56053472e-01, 3.25111747e+00, 3.58699083e+00,-2.47732449e+00, 3.64130282e+00,-4.91288567e+00, 8.97059917e-01,-2.26010180e+00, 4.91831064e+00, 4.45047706e-01, 1.88655663e+00, 3.20642543e+00, 1.38243341e+00, 9.06112790e-01, 1.15262544e+00,-2.39862514e+00,-2.87477684e+00, 7.36831248e-01, 3.18799114e+00, 1.22698748e+00, 5.63625395e-01, 1.29130912e+00,-4.89572334e+00, 2.11258578e+00,-4.55420208e+00, 4.94569272e-01,-7.08617330e-01,-1.84863120e-01,-4.81965256e+00,-1.06512284e+00, 4.79633398e-02, 2.70429182e+00, 4.78289175e+00,-2.11806059e+00, 4.23046875e+00, 3.18022132e+00,-8.39496255e-01, 3.13150501e+00,-3.24103773e-01,-7.48505890e-01,-2.45754886e+00, 4.16639376e+00, 3.25864077e+00, 3.40006447e+00,-3.77217412e+00, 2.93266010e+00, 3.33685803e+00, 1.02347994e+00,-2.22839618e+00,-1.90375733e+00, 3.24283957e+00,-4.01684284e-01,-4.45417643e+00, 3.74440104e-01, 3.33520865e+00, 6.64106190e-01, 3.84395885e+00, 2.38586918e-01,-1.51634857e-01,-2.64977455e+00,-3.45786500e+00, 4.89002228e+00,-1.07323432e+00,-2.92749858e+00,-1.76510501e+00,-3.44604325e+00,-1.89681911e+00, 4.20239258e+00,-1.75864971e+00, 2.13181686e+00, 3.90355319e-01,-4.11911535e+00, 6.61891177e-02,-4.32988214e+00,-1.42876351e+00, 3.12163901e+00,-4.56227779e+00, 4.17938662e+00, 9.63881195e-01, 4.35952139e+00, 1.61931109e+00, 4.11196423e+00, 2.25612569e+00,-4.77538586e+00,-1.72600198e+00,-4.39411783e+00,-8.98730099e-01,-1.04562032e+00,-2.81517529e+00, 3.57167959e+00, 1.90318239e+00, 2.17302442e+00,-3.79942179e+00, 2.19838643e+00,-4.16209459e+00, 4.45025682e+00, 1.68786839e-01,-2.56879544e+00, 3.60925221e+00, 1.06542781e-01,-3.48755455e+00,-6.77028894e-01,-3.51582170e+00, 3.90697241e+00, 4.49116230e+00,-1.56180394e+00, 4.96249914e+00, 9.63374436e-01, 2.72304177e+00, 8.38046610e-01,-2.91993833e+00,-9.41783428e-01, 8.00800502e-01, 3.89176035e+00, 6.70560122e-01, 2.76782703e+00,-1.37075472e+00,-3.25303817e+00,-4.41226482e+00,-8.38777184e-01, 1.73568249e+00,-1.09438455e+00,-1.08815920e+00, 1.06787062e+00, 2.04415274e+00,-2.93027782e+00,-6.86941504e-01, 3.83109421e-01,-3.49270535e+00,-2.13225913e+00,-3.61786675e+00, 1.32213378e+00,-2.89654016e+00, 4.23944092e+00, 4.53665400e+00, 4.26081800e+00,-1.95718706e+00, 4.72295076e-01,-3.08592963e+00, 2.53354859e+00, 3.80069661e+00,-1.14408419e-01, 2.39438844e+00,-4.73618507e+00, 2.35079074e+00,-1.43686843e+00, 1.32946157e+00, 1.10381134e-01,-3.49878430e+00, 2.83181930e+00, 4.57872486e+00, 2.29953095e-01, 7.19881415e-01,-2.97208834e+00, 4.11286211e+00,-3.89149117e+00, 3.83631349e+00, 4.14627981e+00,-1.14082299e-01,-6.89825296e-01,-2.55468488e+00,-4.04466152e+00, 9.95541453e-01,-2.59181118e+00,-4.60567427e+00,-4.77339029e+00,-7.36041367e-02, 1.85957468e+00,-3.42530179e+00, 4.55782986e+00,-3.29603004e+00, 3.55632234e+00, 2.40858841e+00,-2.07399082e+00,-3.96705031e+00, 4.41718817e+00, 3.19581985e+00,-3.72379017e+00,-3.76826024e+00, 6.79764748e-01,-4.43838930e+00, 2.29627752e+00, 2.34923697e+00,-4.23308420e+00, 3.80186272e+00, 8.65862250e-01, 8.44927967e-01,-1.05974531e+00, 4.70531940e+00, 1.25060010e+00, 4.82314730e+00,-4.53093815e+00, 4.51410580e+00, 4.95166332e-01,-3.45584202e+00, 1.82002666e-03,-3.27616286e+00,-2.68104935e+00, 2.39554620e+00, 2.99364328e+00,-2.57998848e+00,-4.35891914e+00, 4.64737415e+00,-5.74958742e-01, 6.47293210e-01, 1.85961032e+00, 4.49567413e+00,-4.36166048e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt
new file mode 100644
index 000000000..e0e52c398
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 4.5734663 , 3.96675   ,-2.7826853 , 4.377681  , 1.8424977 ,-2.8312624 , 0.65628445,-3.7023883 ,-1.8941027 , 0.53154576,-3.9718776 ,-3.3961854 ,-2.7500536 , 2.6793208 , 3.3515985 , 2.0939343 ,-4.3965416 ,-1.7462187 , 0.5660886 , 4.497879  ,-2.2529721 ,-4.8996797 ,-0.00740948,-2.941367  , 1.9482567 ,-2.462802  ,-0.7897884 , 3.1501546 , 3.1216884 ,-3.506249  , 2.871302  ,-3.964653  ,-0.40679944, 2.8930066 ,-4.783338  ,-1.8733944 , 2.2654383 ,-0.41361305,-3.7790897 ,-1.9458629 ,-2.274427  ,-2.9192872 ,-0.73215395, 2.8135974 , 2.1402152 , 4.516366  , 1.58816   ,-4.607831  ,-3.5409598 , 1.9784997 , 3.11111   , 1.0872442 ,-3.6907403 ,-4.774325  ,-4.9267297 , 1.2962086 , 2.4646177 , 2.2726526 , 4.8766675 ,-2.9272413 ,-0.06221364,-0.80498594,-2.319938  ,-3.8261194 ,-2.3452706 , 2.5408983 ,-0.80628425,-1.4547366 ,-4.4171157 , 3.1584027 , 4.2213454 , 3.0342784 , 2.0285478 , 3.4517126 , 1.870827  , 2.812075  , 1.0776864 ,-4.524331  , 3.1467574 ,-2.366355  ,-4.7368546 , 1.940347  , 4.282059  , 1.2666475 ,-4.9559174 , 2.8177614 , 1.1941892 ,-0.25412267,-2.833778  , 1.1770393 , 4.9503546 , 4.582686  ,-1.0778978 ,-2.9030416 , 3.2517505 , 1.556093  ,-3.7605543 , 0.5915735 ,-2.6323159 , 4.596147  ,-0.90292877, 2.8230112 , 4.9295835 , 3.523853  , 1.7742149 ,-2.6014073 , 2.162894  , 1.9364033 , 4.0920115 , 0.81613404, 2.4198878 ,-0.907447  ,-4.79113   ,-3.4193892 ,-0.3334577 ,-1.0439668 , 4.2233415 , 1.4482704 , 1.3646252 ,-0.9206041 , 4.4994802 ,-4.2411633 , 0.6763335 ,-1.3827848 , 1.8579848 , 1.6426222 , 0.904467  , 3.876264  ,-4.6476808 , 4.576801  ,-1.4680524 , 2.441134  , 3.2343059 , 0.23119794, 2.5640545 ,-0.7293438 , 3.7184558 ,-1.6056752 , 3.1490617 , 4.6837263 , 4.7100887 ,-2.785927  ,-0.1520597 ,-1.9914767 ,-4.00598   ,-2.7502792 , 3.7857378 , 2.8444788 , 4.9911737 , 0.29277426,-4.779576  , 3.223367  , 1.3517398 , 4.8757277 , 3.8083189 , 1.7660266 ,-2.1543872 , 4.822371  , 2.089687  ,-4.7373757 ,-2.4061642 , 2.0387447 ,-4.067881  ,-3.1757388 , 0.24974413,-0.24441184,-0.1168329 ,-0.35149318, 2.0035832 ,-4.248678  ,-1.4723817 , 3.8218668 ,-2.8085105 , 4.6995482 ,-3.0093114 ,-3.648268  ,-1.0374364 , 0.04459473, 2.3945484 ,-0.63439727, 3.3920286 , 2.403765  , 1.303556  , 3.232244  ,-0.44932058, 0.9601637 ,-3.3821623 ,-4.257736  ,-4.095783  , 0.42818338,-4.925627  ,-1.8419602 , 4.9393196 , 0.8049334 , 4.431875  , 2.8487725 , 2.1205912 , 1.7367444 ,-4.337498  ,-3.574642  ,-3.8927085 ,-0.35219863, 2.8415039 ,-0.2887568 ,-0.89806557, 2.669602  , 4.8017626 , 4.278042  ,-1.2604581 , 3.152027  , 2.1625066 , 1.5039738 ,-3.7209976 ,-0.72354925, 4.006067  ,-3.7651584 , 0.7198826 , 3.9594896 , 0.6228397 , 2.8464649 ,-0.18740664,-2.0530953 , 3.5185826 , 2.5037062 , 0.3990585 ,-4.423475  , 4.6931167 ,-1.0078553 , 0.74727917,-4.289701  , 1.697721  , 3.4963684 , 1.5796075 , 2.296678  ,-2.9379995 , 4.4748416 , 0.25155628, 4.1183267 , 0.9506131 , 1.2903908 ,-4.6828184 ,-2.309908  ,-4.2793307 ,-2.2069294 ,-4.038367  , 4.641971  ,-2.3178709 ,-2.2683682 ,-0.96986157, 2.6649144 , 2.3106637 ,-1.8052462 ,-4.9433284 , 1.7941002 , 4.80127   ,-0.06690114
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt
new file mode 100644
index 000000000..9a8f222e7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 2.2282960e+00, 1.0135865e+00,-4.1930809e+00, 5.3674412e-01,-3.2516165e+00, 1.2745492e+00, 4.2867136e+00, 1.9524460e+00,-3.6757104e+00,-3.6086998e+00,-9.4525421e-01,-3.4005399e+00, 3.3607626e+00, 4.2363039e-01,-2.5177178e+00,-3.0130227e+00,-4.1442380e+00, 4.4951862e-01,-6.4387190e-01, 4.3701029e+00,-3.6790867e+00, 3.2749624e+00,-2.2554400e+00, 1.8269253e+00, 1.8358005e+00,-6.0994375e-01, 3.5964453e+00, 4.8953295e+00,-2.6134133e+00,-3.9301482e-01, 4.0286818e+00,-8.9392501e-01, 2.6430035e+00,-1.0339550e+00,-4.2311502e+00, 5.1657695e-01,-3.0095081e+00,-3.2156844e+00, 3.0075660e+00,-2.4905038e+00, 2.2380588e+00, 4.6933036e+00,-2.7880669e+00,-3.3672907e+00, 2.5187421e+00, 2.1843061e+00,-3.9957666e+00,-4.5409918e+00,-1.7282218e+00,-4.6849327e+00, 3.1863580e+00, 2.4342964e+00,-4.5180349e+00,-2.4310455e+00,-2.6789901e+00,-1.6438740e+00, 4.9613748e+00,-3.7800386e+00,-4.4277740e+00, 1.0571244e+00,-3.3765689e-02,-6.2219787e-01, 2.1075857e+00,-2.0555353e+00, 2.6996508e+00,-3.0303302e+00,-3.8262250e+00,-4.5048919e-01, 2.6760142e+00, 3.2696848e+00, 2.8136756e+00,-2.7064829e+00, 8.5861349e-01,-1.8871003e+00,-9.5355767e-01, 2.3704410e+00, 4.8897211e-02,-4.6371531e+00, 1.5693765e+00, 3.7866819e+00,-2.9738419e+00, 1.2106347e+00,-5.8760280e-03,-6.4124316e-01, 4.2396611e-01, 4.8550687e+00,-3.0650468e+00,-1.2087260e+00,-2.4833875e+00, 2.1272743e+00,-1.8991195e-01,-3.5372739e+00,-2.3402226e+00,-1.0234243e+00, 2.8981063e+00, 8.7964945e-02, 3.2136328e+00,-3.4051507e+00,-4.5538807e+00,-4.0228786e+00,-1.8993270e-01,-4.5704255e+00, 1.8850164e+00, 9.9910229e-01,-4.8424377e+00,-3.1492932e+00, 2.3922281e+00, 4.8503261e+00,-2.1037047e+00, 3.3602579e+00, 1.3546667e+00, 1.3481154e+00,-2.3604252e+00,-1.3253393e+00,-3.5330158e-01,-2.1313765e+00, 3.1442962e+00,-1.1570807e+00,-4.5890884e+00,-4.1608801e+00, 1.8554245e+00, 2.4646142e+00,-1.8453486e+00, 3.3489871e+00,-1.1248070e+00, 3.1451607e+00,-1.4458319e+00,-2.2727523e+00,-2.0378258e+00, 2.4566815e+00, 3.8839689e-01, 4.2570353e+00, 2.3613093e+00, 1.2956337e+00,-7.5734973e-01,-1.4549307e+00, 9.3240172e-01, 4.3444591e+00,-6.4935732e-01, 2.5328317e+00,-2.3545196e+00,-4.7553263e+00, 2.6134777e+00,-2.5526178e+00,-1.7996631e+00,-2.0215256e+00,-4.6141486e+00,-1.7283168e+00, 2.5297335e-01, 3.7009020e+00,-1.9858284e+00,-3.4631619e+00,-1.5858738e+00,-2.5620985e+00, 3.2822473e+00,-3.2632313e+00,-9.0714562e-01,-2.3562717e+00, 4.4088845e+00,-3.6630182e+00, 5.5761892e-01, 1.6045070e+00,-3.6806375e-01, 4.3184443e+00,-1.3219705e+00, 1.5496376e+00,-1.5801797e+00, 2.1545045e+00,-4.0106788e+00, 3.4172714e+00,-4.2495294e+00,-6.1115064e-03,-7.2607052e-01,-7.3130745e-01,-4.4462271e+00, 4.8119636e+00,-4.7460346e+00,-3.0464313e+00,-2.8801811e+00,-1.4347218e-03, 4.4133449e+00,-3.3173063e-01, 4.3802023e+00, 2.6040417e-01,-2.5531218e+00, 3.7436140e+00,-4.1636271e+00,-3.3907690e+00,-1.4418361e+00,-3.6933661e+00,-2.6342602e+00,-3.1492887e+00,-5.5590755e-01,-1.6814464e-01,-1.0868104e+00, 4.9451909e+00, 3.4104226e+00, 1.0342516e+00, 4.7993002e+00, 1.2480364e-01, 1.6109833e-01, 2.6366503e+00, 1.6535910e+00, 4.3810592e+00, 4.4755011e+00, 4.3265424e+00,-3.1934264e-01, 9.8549920e-01, 1.9962710e-01, 2.8525822e+00,-3.7352023e+00,-1.3402178e+00, 2.5931063e+00,-2.6708813e+00,-7.6831090e-01, 3.0769660e+00, 1.4107993e+00,-1.8936746e+00,-4.7568636e+00,-1.9222193e+00, 4.7693071e+00, 2.8644614e+00, 4.1877995e+00,-3.6974251e+00, 4.5314616e-01,-7.1986055e-01, 4.8653622e+00, 1.4722897e+00,-8.6220115e-01,-4.1846976e+00, 3.7767217e+00, 3.7630556e+00,-4.5851058e-01,-4.9183292e+00,-1.8750135e+00, 1.0773923e+00,-5.2709883e-01,-9.2767686e-01,-1.3984675e+00,-2.0892789e+00,-4.3801632e+00, 4.0080590e+00, 4.2269025e+00,-1.2195336e+00,-2.2649438e+00, 4.6874623e+00,-3.8354571e+00, 5.9588730e-01,-2.8315885e+00, 3.0605823e-01, 2.1416895e+00, 1.6045133e+00,-3.3075256e+00, 4.9898911e+00, 1.7708080e-02, 3.5305614e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt
new file mode 100644
index 000000000..1b2e33401
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 1.9229428 , 2.1045275 , 2.0514195 , 1.7149676 ,-4.1647053 , 4.3958654 , 2.1192055 ,-2.4357705 , 2.249189  , 4.7986865 ,-1.0146881 , 2.5108647 , 0.7262246 ,-2.3110187 ,-0.434008  , 2.6220334 , 1.3261455 ,-2.0402927 , 0.6362597 , 0.12827367, 0.94167644, 1.6396433 , 2.802215  , 0.92637545,-2.8669958 , 2.1684341 , 4.7197456 ,-3.0393784 ,-1.5588902 ,-1.5589788 ,-1.2792847 ,-4.301159  , 3.6853306 , 3.5522077 ,-3.5120559 , 3.6523628 , 0.52381915,-4.3210206 , 3.1021209 ,-4.4059095 , 4.574733  ,-3.708168  ,-3.4609973 , 0.04494883, 4.6041393 , 4.6209555 ,-2.184693  , 3.3114836 , 4.0440845 ,-4.362543  ,-3.0185041 ,-3.4911432 ,-1.0443465 ,-3.1546419 ,-3.0831194 ,-1.8959469 ,-3.7653599 ,-1.8753844 , 3.969308  , 4.0960746 , 0.256032  ,-0.11065102, 4.753394  , 4.8433857 , 0.17249103, 0.44612473, 3.5996687 ,-3.7071083 , 4.15448   , 2.7609568 , 0.7979912 , 2.6985793 , 0.24981445,-0.7343978 ,-3.8946455 ,-3.4738345 ,-2.0124238 , 4.6603985 , 0.9002829 ,-2.2128618 ,-0.8752893 ,-3.0990481 , 2.770291  ,-1.4642559 , 0.4561498 , 0.5808671 , 2.4227936 ,-2.400878  , 0.6494001 , 1.0195295 ,-3.2693145 , 1.9889433 , 3.5208216 , 3.6280289 , 4.322899  ,-2.805155  , 3.7704606 , 0.6797415 , 4.442675  ,-0.5069875 , 1.3373847 , 4.6953626 ,-0.7946793 ,-2.7352958 ,-1.9969261 , 0.43059692, 2.50853   , 1.9314603 , 1.3780333 , 2.0536468 ,-1.572231  ,-4.5323825 ,-1.3175989 ,-1.5515776 ,-0.05870355, 0.32408538,-4.2935586 ,-1.561555  ,-1.7551405 ,-0.93950266, 3.2540953 ,-4.623753  ,-3.4944966 ,-0.7603045 , 0.76591074,-4.9114766 ,-2.679303  , 0.12950227, 4.094419  , 4.781908  ,-3.6946337 , 2.766349  ,-0.45678583,-2.275264  , 2.0858452 , 3.1182098 ,-1.2942638 , 4.4418044 , 2.2264028 ,-3.3838644 , 1.4427853 , 3.7365992 ,-1.1815038 , 1.4555137 , 0.22728541,-0.18817298, 3.454521  , 3.1835914 , 4.0786743 ,-1.5111316 , 1.1560454 ,-0.04693017, 0.44183066,-0.7420173 ,-1.2243766 , 3.4453049 ,-2.969513  ,-0.82397145, 4.870895  , 3.0178127 , 1.7217305 , 4.482936  , 1.9468685 , 3.9970267 , 4.7294793 , 2.9921744 , 4.470473  , 4.7626653 , 0.13104612,-4.651569  , 2.7991815 ,-4.734433  ,-2.4499187 , 1.0739365 ,-1.5583646 , 3.6531756 , 2.7731194 ,-4.72427   ,-4.5801177 ,-4.035709  , 2.5767221 ,-2.8133557 ,-1.8342617 , 3.5808434 ,-2.1022995 ,-3.5421894 ,-3.0776916 , 3.168665  ,-0.07246887,-1.2413273 , 4.7964606 ,-1.0624843 , 0.75939703, 2.5336463 ,-4.8622346 ,-4.9744167 , 2.1007512 , 1.5271608 , 0.37077245, 1.7765028 , 2.2724373 , 2.1864665 ,-0.37378153, 1.3559381 ,-1.4220421 ,-1.4756224 , 3.6143627 , 2.7846546 ,-2.5194893 , 3.005039  ,-3.6451447 ,-1.9118739 , 0.04718782,-3.0775185 ,-1.4801219 ,-2.35909   ,-0.4728799 , 4.610093  ,-4.472677  ,-4.530808  , 0.12514372, 0.05973044, 4.457302  , 3.1129916 , 3.6036162 , 4.5086145 ,-3.548999  , 0.4976606 ,-3.6525648 ,-2.1937015 ,-1.3205789 ,-2.6594079 , 4.415343  , 3.219482  ,-3.7286756 , 3.4116418 , 0.82889384,-3.0168123 , 4.382766  , 2.7633846 , 3.6949344 , 3.9806223 ,-0.6415279 ,-0.3193684 ,-1.3176754 ,-1.4990829 , 4.694691  ,-1.0581211 , 1.2103747 ,-0.26690048,-1.157015  ,-1.8951306 ,-0.8580171 ,-4.3080263 , 4.0737123 ,-1.2607352 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt
new file mode 100644
index 000000000..50ed09011
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.9386005 , 3.7248888 , 3.3261378 , 4.8302746 ,-3.9337704 ,-4.2943096 , 0.16059242, 0.17785172,-2.4971933 ,-2.933359  ,-4.598231  , 4.7816315 ,-0.6563864 , 4.452592  , 1.8066075 , 3.1572745 , 4.500678  ,-1.1609873 ,-1.6962403 , 1.567031  ,-3.3120036 , 1.8150452 ,-2.7486987 ,-1.6800771 , 1.4895486 , 1.120401  , 1.4983965 , 4.7132416 , 0.39645562,-3.12486   ,-0.5966056 , 4.618641  , 1.225812  , 0.99017185, 3.9918585 , 1.299415  ,-1.2995726 , 4.202907  , 3.8657827 ,-4.0268126 ,-0.90370494, 0.5030568 ,-2.9651542 ,-4.1249614 ,-2.8990393 ,-4.1228724 ,-1.2640246 ,-0.72640723,-1.7128279 , 2.7710931 , 2.8189523 ,-0.8384207 , 0.71266395, 3.8393862 ,-1.7801509 ,-3.1485069 , 3.2076547 , 2.267659  ,-3.745656  ,-4.373508  , 0.86005193,-4.9145784 , 0.9253047 , 1.1243923 , 0.46507052, 1.9978004 ,-4.642887  ,-2.1898057 , 0.88199854,-2.1837327 , 1.1112527 ,-1.4548608 ,-3.5766103 ,-1.5607064 ,-3.630397  ,-1.9193211 ,-0.8931484 ,-0.2812017 ,-1.2881653 ,-2.5051243 ,-3.5648384 ,-0.5431733 ,-0.47036746,-2.8132265 ,-0.4302025 ,-4.003176  , 0.31743896,-3.074693  ,-3.3994603 , 0.62276137, 0.12920536,-2.5154057 ,-0.22098878,-2.711012  ,-0.303956  , 4.6025276 , 3.1887815 ,-0.50345755,-2.6543994 ,-0.8452558 ,-1.4075644 , 3.6716504 , 2.7388885 ,-4.9426928 , 3.5494354 , 4.777085  ,-3.3904083 ,-2.4746811 ,-2.943489  , 1.3607427 , 1.313449  ,-2.7959676 , 4.5932074 , 0.2460288 ,-1.1802251 , 0.6807028 ,-3.7335384 ,-0.30950046, 0.0558207 ,-4.7604976 ,-4.5745177 ,-3.3872643 ,-1.102581  ,-1.5612804 ,-1.2933319 , 4.5290637 ,-2.5096595 , 0.8673844 , 0.6069363 , 0.8294639 ,-0.05487671,-2.5923786 , 3.2974155 , 2.252853  ,-2.4157743 , 1.6614583 , 1.975577  ,-2.7390766 ,-0.26459846, 0.8946814 ,-3.257953  , 4.0526175 ,-1.5219783 , 4.6063023 ,-0.09599628, 3.2825923 , 2.0063279 ,-3.597641  ,-0.41604096,-2.5593333 , 1.8169669 ,-3.6998532 ,-2.3723404 , 0.4008657 , 2.1002467 , 4.9284163 , 4.6011457 ,-4.8977246 , 4.7852945 , 1.2170111 ,-1.055987  , 2.27575   , 1.0601226 ,-4.176826  , 0.08197393, 4.0421042 , 3.6263971 , 2.6941037 ,-2.644993  , 0.10439859,-4.512112  , 3.7939842 ,-4.8532767 , 0.391317  , 3.6432517 ,-3.9992728 , 0.29700363, 1.2722415 ,-2.3793647 ,-3.377246  , 2.0930648 , 2.574604  ,-1.2509564 , 0.4457573 ,-0.46469867, 2.6793416 , 0.02566718,-0.11948132,-3.1046712 ,-0.6204446 ,-4.615342  , 4.057695  , 1.1312845 ,-3.0446556 ,-1.9381613 ,-0.92255247,-3.5459394 ,-1.1972907 , 0.5879403 ,-1.2265042 ,-2.6279037 , 3.7533212 ,-0.2950134 ,-1.6104454 , 4.7811155 , 3.9216835 ,-2.2905827 ,-3.9489107 ,-4.078132  , 4.878544  ,-2.1483154 ,-3.1480436 ,-1.8742744 , 0.38310575,-4.0457416 ,-1.5423136 , 4.9426446 , 2.80434   ,-2.758338  , 1.6596367 ,-4.559686  ,-1.2686385 ,-1.2173673 , 0.49475643,-2.4956207 ,-1.5008336 ,-1.7967415 ,-1.1574938 , 2.2852411 , 1.7171949 ,-3.328038  ,-3.1454384 ,-0.41883984, 3.822312  , 1.1161699 ,-1.5137968 , 3.1651397 , 3.2411747 , 1.2685378 , 2.7408757 ,-3.078621  , 3.3460293 ,-0.34918678,-1.0433053 , 0.9397743 ,-3.9071774 , 0.68924445, 4.896269  ,-4.234603  ,-4.8659916 , 1.472339  , 4.5464644 , 0.35857418, 3.4065645 ,-1.514736  , 4.2301235 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt
new file mode 100644
index 000000000..163c037cf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-0.91463715,-2.9258113 , 4.4465976 ,-0.84762925,-3.3510911 ,-0.15094744, 2.2284694 , 3.9705405 ,-1.6315348 , 4.698665  , 2.8595035 ,-2.4719086 , 4.2091336 ,-3.7003224 , 0.06198901, 4.24617   ,-3.7041452 , 1.4280707 , 0.61925036, 3.873551  , 0.3554166 , 3.0535998 ,-1.403015  , 2.5769274 , 4.0060935 ,-2.134697  , 0.61366636,-2.2069314 , 3.5629356 ,-4.94381   , 3.3054771 ,-0.42945656, 4.4868546 , 4.124087  ,-4.039486  , 0.75716823,-4.530404  ,-0.8464823 , 2.7817092 ,-4.954212  , 4.790015  , 2.5307322 , 0.635834  ,-3.393037  ,-3.7000508 ,-1.1439751 ,-2.4422479 , 3.9414582 ,-4.0586324 ,-3.5872777 , 2.2529798 , 0.50453144,-2.9947112 ,-0.76174486, 0.8427806 ,-0.90798455,-0.5518859 ,-1.1810572 , 1.2787138 ,-1.7791113 ,-4.661412  ,-3.7413049 , 0.03910514, 3.970302  ,-3.0697417 ,-4.107844  ,-1.985001  ,-2.434408  ,-3.0120797 , 0.34467867, 0.09826441, 3.1933572 , 0.09855966, 1.7976784 ,-3.3814316 ,-2.8423817 ,-4.787137  , 0.21746217,-1.8560363 ,-0.7145455 , 3.911294  , 4.6970305 ,-4.0105987 , 3.3843613 , 2.3087065 , 1.8619018 , 1.6607213 ,-4.1276345 ,-0.15251912, 3.1198032 , 1.8143575 , 2.178214  ,-4.6250186 , 4.4006424 ,-3.378407  , 3.6481302 , 4.4439235 , 4.5322957 , 2.7754776 , 1.9026359 ,-2.9371052 , 0.32501587, 4.980984  ,-3.2300677 , 4.190388  , 4.441369  , 0.8116277 ,-4.7056756 , 1.1501676 ,-0.9759702 ,-0.1920487 ,-3.2009268 , 4.654679  , 4.043145  , 4.579935  , 4.917842  ,-3.2166183 , 2.381046  , 2.3470554 , 0.04456256,-2.6785278 ,-2.1683002 ,-0.2686819 , 0.6097173 , 1.5071467 , 3.9692068 ,-3.4313831 ,-0.87708473, 3.9917011 , 0.7843428 ,-4.6622047 , 0.774621  ,-4.6538844 , 3.6392822 , 4.962988  , 1.4132729 ,-0.40482154,-1.8656421 ,-1.6113061 ,-1.3454957 , 0.40846685,-4.5410986 , 2.7158992 ,-1.8403106 ,-3.803351  , 4.406537  ,-1.5868717 , 2.7034876 ,-3.3383765 , 4.6084027 ,-1.691095  ,-0.52188784, 2.9010768 , 0.08786624, 2.7466853 ,-1.7457972 , 0.59371734,-0.1716976 ,-2.6220891 , 4.9432936 , 2.3500183 , 1.6905144 ,-2.7329378 , 4.003541  ,-1.1137847 , 3.9017355 , 0.9116626 , 4.233729  ,-2.6706429 , 3.4342804 ,-0.42729262, 1.174779  ,-4.944099  , 1.2316282 , 4.9237943 ,-2.2999635 ,-4.9210916 ,-1.9033331 , 0.43241265, 3.2149148 , 4.1269703 , 0.8590868 , 2.734273  , 1.658618  ,-2.1702065 ,-2.0058317 , 4.0706363 , 4.003833  ,-0.35835287, 2.5514262 , 1.2571276 ,-4.655018  , 3.6468434 , 0.06320113,-4.662375  , 1.0745742 ,-1.117399  , 4.167245  , 4.59434   ,-1.686359  ,-0.17328739, 0.3083307 , 3.3926466 , 2.2254786 ,-0.45468137, 2.4956248 ,-3.492782  ,-2.9805465 ,-1.0610795 ,-0.2784433 , 0.7163735 ,-3.0048254 ,-1.8024784 ,-3.3139167 ,-1.8410577 , 4.5702477 ,-3.4454951 ,-1.4504164 ,-1.7432297 ,-4.998418  ,-2.5524495 , 3.028534  , 4.075326  ,-2.2187853 ,-0.6484594 , 3.00815   ,-2.8010397 ,-4.5529976 , 1.7830837 , 0.3373458 , 0.19151935,-1.0437245 ,-3.6349878 , 1.1947471 ,-1.9664146 , 0.27316815,-0.20781417, 2.419226  , 0.02246885, 4.5222287 , 3.1069999 , 3.940458  , 4.2710595 , 3.4216619 , 2.8447206 , 2.7136886 ,-0.60954016, 2.9277234 , 3.995615  ,-0.30593097, 1.7800944 , 1.0608315 , 3.8786283 ,-2.7564247 , 1.8526665 ,-3.8638606 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/0.txt
new file mode 100644
index 000000000..3b2a3c258
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/0.txt
@@ -0,0 +1 @@
+ 4.9167333 , 0.9170983 ,-2.4031715 , 0.4819133 , 0.21536288,-2.0262568 , 4.364642  , 1.7851653 , 2.0982797 , 0.5736603 , 2.5769486 , 3.68285   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/1.txt
new file mode 100644
index 000000000..dff8a3b09
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/1.txt
@@ -0,0 +1 @@
+ 3.8708763 , 3.263454  ,-4.796817  , 0.6411522 ,-3.0385532 , 0.49334133,-0.20283684,-0.88814104, 4.826072  ,-4.8037696 , 4.757636  ,-3.036691  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/2.txt
new file mode 100644
index 000000000..93e747284
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/2.txt
@@ -0,0 +1 @@
+-3.8694625 ,-3.5254061 ,-0.23680535, 4.1042504 , 3.2534697 ,-1.8511593 ,-1.9182487 , 2.6457057 , 0.12923336, 2.618141  , 1.2465005 ,-4.4625525 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/3.txt
new file mode 100644
index 000000000..c924e03d9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.5559328 , 1.768443  ,-1.4850446 ,-1.2771453 ,-2.7216687 , 2.80077   , 0.21637216,-0.6145739 ,-0.37175298, 3.8750615 ,-1.9910356 ,-1.657059  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/4.txt
new file mode 100644
index 000000000..1153c85ed
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/channel/int16/4.txt
@@ -0,0 +1 @@
+-1.6168976 ,-3.816399  ,-0.55625045, 4.961818  , 0.19316113,-2.6601286 ,-1.6928803 , 4.1208386 ,-1.4012221 , 2.7742999 , 0.75798005,-2.5877    
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/0.txt
new file mode 100644
index 000000000..e580d6f85
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.024665 , 3.0544488,-4.5645285,-3.2134292,-2.1543078, 4.039755 ,-4.613908 , 4.2014904, 3.8222141,-4.4992657,-4.02681  ,-3.2933445
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/1.txt
new file mode 100644
index 000000000..c593dfbb6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/1.txt
@@ -0,0 +1 @@
+-2.669042  , 2.479217  , 4.691815  , 1.8187722 ,-3.7656548 ,-2.0555806 ,-2.4494352 ,-3.2394514 ,-0.38215363,-1.543695  ,-0.6927158 , 2.3534324 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/2.txt
new file mode 100644
index 000000000..14520a177
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 4.036224  ,-1.2903051 , 1.2116423 , 3.92255   ,-0.48049024,-1.0290806 ,-0.9644837 , 1.3379688 ,-1.0027533 ,-1.9611529 , 3.7190473 , 0.45794436
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/3.txt
new file mode 100644
index 000000000..2238d5e9e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.560488 ,-1.2475324, 1.8892838,-2.0155866,-4.968927 , 0.3717404,-0.6095849, 3.2483344,-1.2499679, 1.4237018,-3.1225715, 3.0611598
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/4.txt
new file mode 100644
index 000000000..14a91ccc9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001/layer/uint8/4.txt
@@ -0,0 +1 @@
+-1.7167594, 2.116633 ,-1.3816848,-1.7106141,-3.273076 ,-4.148302 ,-2.1654181, 0.4368236, 3.4279666, 1.2954224, 1.3004405,-4.3022   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt
new file mode 100644
index 000000000..e580d6f85
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.024665 , 3.0544488,-4.5645285,-3.2134292,-2.1543078, 4.039755 ,-4.613908 , 4.2014904, 3.8222141,-4.4992657,-4.02681  ,-3.2933445
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt
new file mode 100644
index 000000000..c593dfbb6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-2.669042  , 2.479217  , 4.691815  , 1.8187722 ,-3.7656548 ,-2.0555806 ,-2.4494352 ,-3.2394514 ,-0.38215363,-1.543695  ,-0.6927158 , 2.3534324 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt
new file mode 100644
index 000000000..14520a177
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 4.036224  ,-1.2903051 , 1.2116423 , 3.92255   ,-0.48049024,-1.0290806 ,-0.9644837 , 1.3379688 ,-1.0027533 ,-1.9611529 , 3.7190473 , 0.45794436
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt
new file mode 100644
index 000000000..2238d5e9e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 4.560488 ,-1.2475324, 1.8892838,-2.0155866,-4.968927 , 0.3717404,-0.6095849, 3.2483344,-1.2499679, 1.4237018,-3.1225715, 3.0611598
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt
new file mode 100644
index 000000000..14a91ccc9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-1.7167594, 2.116633 ,-1.3816848,-1.7106141,-3.273076 ,-4.148302 ,-2.1654181, 0.4368236, 3.4279666, 1.2954224, 1.3004405,-4.3022   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt
new file mode 100644
index 000000000..3b2a3c258
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 4.9167333 , 0.9170983 ,-2.4031715 , 0.4819133 , 0.21536288,-2.0262568 , 4.364642  , 1.7851653 , 2.0982797 , 0.5736603 , 2.5769486 , 3.68285   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt
new file mode 100644
index 000000000..dff8a3b09
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 3.8708763 , 3.263454  ,-4.796817  , 0.6411522 ,-3.0385532 , 0.49334133,-0.20283684,-0.88814104, 4.826072  ,-4.8037696 , 4.757636  ,-3.036691  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt
new file mode 100644
index 000000000..93e747284
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-3.8694625 ,-3.5254061 ,-0.23680535, 4.1042504 , 3.2534697 ,-1.8511593 ,-1.9182487 , 2.6457057 , 0.12923336, 2.618141  , 1.2465005 ,-4.4625525 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt
new file mode 100644
index 000000000..c924e03d9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.5559328 , 1.768443  ,-1.4850446 ,-1.2771453 ,-2.7216687 , 2.80077   , 0.21637216,-0.6145739 ,-0.37175298, 3.8750615 ,-1.9910356 ,-1.657059  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt
new file mode 100644
index 000000000..1153c85ed
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-1.6168976 ,-3.816399  ,-0.55625045, 4.961818  , 0.19316113,-2.6601286 ,-1.6928803 , 4.1208386 ,-1.4012221 , 2.7742999 , 0.75798005,-2.5877    
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt
new file mode 100644
index 000000000..081a1e6ee
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt
@@ -0,0 +1 @@
+-1.9927613e+00,-1.7386111e+00, 4.0895696e+00, 3.7818990e+00, 1.9420158e+00, 2.8482721e+00, 1.9165717e+00, 3.0059583e+00, 1.8346788e+00,-1.9055414e-03, 4.9277787e+00,-2.2794118e+00, 4.4005270e+00, 4.9703922e+00,-4.5275192e+00,-4.0446317e-01,-4.9363256e+00, 4.9506269e+00, 5.5874938e-01, 3.9949589e+00,-3.8152415e-01,-4.1024357e-01,-3.8472393e+00, 4.2956004e+00, 4.8097472e+00, 1.7960385e+00, 1.6767026e+00,-2.2773645e+00, 2.6808765e+00,-3.7214172e+00, 4.0978761e+00, 3.6202488e+00,-3.3211513e+00, 3.6200387e+00,-3.6106458e+00,-3.9778764e+00, 3.8779631e+00,-4.8502750e+00,-2.1901150e+00, 3.1800017e+00, 4.6261444e+00, 3.5151103e+00, 2.8659137e-02, 4.5340648e+00, 1.9836371e+00,-2.1751235e+00,-4.6762753e+00,-3.6951694e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt
new file mode 100644
index 000000000..f6b31db38
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt
@@ -0,0 +1 @@
+-4.7488093 , 4.805902  ,-0.29828382, 0.57486725,-4.864297  , 1.1832287 ,-1.7611881 ,-2.7058024 , 2.707353  ,-3.9832466 , 3.1243927 ,-4.795229  , 1.9835415 , 3.2291937 , 2.4303932 ,-3.556881  , 4.316894  ,-0.6444627 ,-3.8289468 , 4.012964  , 0.7878584 ,-1.8921386 , 2.779619  ,-3.762597  , 3.4239094 ,-0.9103423 ,-3.9791772 ,-2.5613685 ,-4.4910364 , 0.19411987, 4.6296096 ,-0.6827259 , 3.7645729 , 1.5309091 , 3.5163064 , 3.4726381 , 3.5372822 , 1.7671971 , 1.4374614 , 3.5783768 ,-2.4927518 , 3.9427729 , 2.431568  , 2.6959393 , 3.8100271 ,-2.099064  , 3.3663592 ,-2.0818436 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt
new file mode 100644
index 000000000..acc01cb55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt
@@ -0,0 +1 @@
+ 4.279912  ,-2.2746763 , 4.0609813 , 4.5353827 , 3.624241  ,-3.9593613 , 4.189409  ,-3.9370356 ,-2.7063863 ,-1.9987059 , 4.172294  ,-4.5454354 , 4.362368  , 2.2204642 ,-4.9866576 , 3.31571   , 0.12623785, 4.7834573 ,-1.3521448 ,-1.5408021 ,-4.6578984 ,-2.93307   ,-1.5684534 ,-1.6875995 ,-0.4278419 , 1.1314197 ,-2.9655704 ,-0.48032767,-1.9200082 , 1.3321692 , 0.87586147,-0.1761448 , 3.939337  ,-1.0270193 ,-4.807054  , 2.8373904 ,-1.1184337 ,-0.8979197 , 2.1442132 ,-2.8509672 ,-3.3741531 , 3.6592414 , 0.7632272 ,-4.11465   , 4.892313  , 4.715815  ,-4.6481915 , 0.24676175
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt
new file mode 100644
index 000000000..0f0b7a939
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.0949495 ,-1.1370499 , 4.6457314 ,-2.243915  ,-1.7996464 , 1.2268789 ,-4.938172  ,-3.2802615 , 1.8788282 , 4.4162655 ,-4.8805113 , 3.1269526 , 3.2644348 , 0.89842725,-1.4484432 ,-0.28381723, 3.046261  ,-1.0718596 ,-3.996107  ,-4.9575796 ,-2.2279077 , 1.5326967 , 4.4588428 ,-2.042381  , 4.6604958 , 4.6422915 ,-1.097833  , 3.666126  , 0.4735639 ,-4.480704  ,-4.831033  ,-0.27288163, 4.588138  , 4.5297036 , 4.3675694 ,-1.6098841 ,-3.4147859 , 2.1168516 ,-1.9529305 ,-0.12548867, 3.4388335 ,-1.4071734 , 0.9507897 , 4.8206787 , 1.676873  ,-1.7102181 , 1.7746873 , 0.02711739
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt
new file mode 100644
index 000000000..d23450db6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt
@@ -0,0 +1 @@
+-4.707647  ,-4.0921726 , 3.5813692 ,-4.71081   , 3.157816  ,-3.0034213 ,-0.21858999,-1.1736552 ,-1.6042249 ,-3.93102   ,-4.0407577 , 3.7350774 ,-4.9545655 ,-1.5413756 , 0.34996858, 2.0339615 , 0.99290746,-3.9916334 ,-4.149016  ,-3.2332835 , 3.6728513 , 2.4537466 ,-3.103485  ,-0.4829316 , 4.8046784 ,-1.753812  , 4.878712  ,-1.4039769 , 1.6640003 ,-1.2041731 , 0.8046477 , 0.9196048 ,-0.6475092 , 1.1409346 , 2.0324717 ,-0.04227797,-0.5379897 , 3.205104  , 3.3556423 , 4.8447986 ,-1.9695646 ,-2.6304977 ,-3.7261262 ,-4.725599  , 2.1162436 ,-0.5631174 ,-0.5820323 , 0.8398242 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt
new file mode 100644
index 000000000..bcda22cb6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt
@@ -0,0 +1 @@
+ 0.29413325,-0.5246354 , 2.5049045 , 4.9534087 , 0.9885207 ,-4.9603324 ,-2.534284  ,-1.2587626 ,-4.6054525 ,-4.0071754 , 3.204513  , 1.9254771 ,-3.0781755 ,-2.225973  , 3.3524523 , 3.817767  , 3.4921055 , 4.3435416 , 3.0849605 ,-1.4030998 ,-1.0506575 ,-0.42979953,-2.2500112 , 3.4057455 , 4.5414543 , 2.9366746 , 4.8639297 ,-0.1028097 , 2.3421814 , 0.6463296 ,-4.906506  ,-0.7544193 ,-4.0089574 , 2.3837643 ,-0.62171113,-3.349577  , 0.63758767,-3.6872568 ,-2.4398334 ,-1.1556609 ,-3.116043  ,-1.9698795 , 0.7246678 , 2.1801088 ,-2.5762403 , 2.5748649 ,-2.8637013 , 2.8755338 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt
new file mode 100644
index 000000000..937e08f69
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt
@@ -0,0 +1 @@
+-3.5664022e+00, 3.7696166e+00,-2.0404069e+00,-3.2197843e+00, 2.0149478e-01, 4.1116104e+00, 1.9678035e+00,-7.5975507e-01,-2.1460054e+00, 4.6308274e+00,-1.8927828e+00, 3.0689645e+00,-7.0773923e-01,-6.7477709e-01,-1.6248076e+00, 2.7095401e+00, 2.9545853e+00, 8.5142839e-01,-2.7683893e-01,-2.0586762e+00,-3.5001924e+00,-1.7622359e+00, 2.2262762e+00,-4.0617161e+00,-2.4704919e+00,-3.6333869e+00, 2.3401244e+00,-4.6641917e+00,-4.0812837e-03, 1.1013873e+00, 1.4518824e-01, 2.4135842e+00, 4.1183419e+00, 3.0343807e+00,-3.7195799e-01,-9.7189492e-01,-3.0425618e+00, 4.6822820e+00,-1.7649661e+00, 3.9648254e+00,-3.1084957e+00,-7.3071235e-01,-5.1578474e-01,-3.5188673e+00,-4.7018051e+00,-4.1592669e+00,-3.5443991e-01, 1.3961188e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt
new file mode 100644
index 000000000..fb30491cd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt
@@ -0,0 +1 @@
+ 4.2618856 , 0.4364266 , 0.5258691 , 3.5147502 ,-4.025428  , 3.143039  , 1.3707066 , 4.7792606 , 1.1539228 , 3.785161  ,-1.9495047 , 2.7047534 , 0.5673139 ,-0.5191105 ,-2.5284607 , 4.076998  , 2.9433093 ,-2.1924984 , 1.1020935 ,-2.126009  , 0.7586875 , 1.1708144 ,-4.594603  ,-3.252912  ,-3.057344  , 3.8008513 ,-4.9164753 ,-4.560891  , 1.724639  ,-3.0877826 , 0.55354726,-3.969067  , 4.17461   ,-1.901139  ,-4.8903475 , 4.7866077 ,-1.3506653 ,-4.2624874 , 0.8842832 , 4.672003  ,-2.5649548 ,-3.6606123 ,-1.6794366 ,-2.0534387 ,-2.9902222 , 3.078469  , 2.846819  , 1.2788221 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt
new file mode 100644
index 000000000..fb9d40ae0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt
@@ -0,0 +1 @@
+-2.6751792 ,-2.5436802 , 0.30533552, 1.0443643 ,-4.4327927 , 2.813772  ,-4.27514   , 2.5894637 , 2.8684394 ,-2.2010357 , 1.5827026 , 0.01609957, 0.38605672,-4.978118  ,-0.30794173, 0.7372266 ,-1.2931277 , 2.8435483 , 2.8204155 , 1.5801594 , 0.853025  , 1.0665054 ,-2.3281817 ,-4.2512784 , 2.379218  , 2.6335719 , 0.17575608,-2.7761426 ,-2.8164017 , 1.8392245 , 2.6495574 , 0.82702005, 3.8548648 ,-3.179834  , 0.25908127, 2.4930098 , 0.71019745,-3.193962  ,-1.1381371 ,-3.5847874 ,-1.3353258 , 2.942422  , 0.11944559,-3.0676606 , 3.534187  , 0.86664987,-1.4781127 , 4.8873277 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt
new file mode 100644
index 000000000..aeecd56c3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt
@@ -0,0 +1 @@
+ 4.2327642 , 4.644095  ,-2.8978996 , 4.39419   , 2.897952  ,-3.330613  ,-3.9131684 ,-1.4672462 ,-3.9219787 , 2.1286428 ,-4.313653  , 2.65426   ,-4.201722  , 2.5390174 ,-3.821772  ,-1.9420135 , 3.3508427 ,-1.2804624 , 4.899826  ,-4.165279  ,-0.38920662, 3.594253  ,-2.367396  , 3.8604352 , 0.40077925, 3.7654843 ,-2.7208197 , 3.4325044 ,-2.921729  , 2.0519714 ,-0.6181836 ,-0.12342291,-4.1059036 ,-3.653849  ,-3.5340316 ,-0.2782715 , 0.32330513, 3.360021  , 2.5673623 , 2.1614027 ,-4.438277  , 3.3010736 , 0.3992392 , 0.82871836,-2.8720777 , 0.29633927, 0.25286415,-4.191315  
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/0.txt
new file mode 100644
index 000000000..1f2993269
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/0.txt
@@ -0,0 +1 @@
+-3.3436873 ,-0.79453826, 2.2211137 , 2.6420908 ,-1.3191302 , 1.2973647 ,-4.506594  , 4.867371  ,-4.318404  , 1.6957753 ,-4.3091793 ,-3.2230556 , 4.9175825 ,-3.1527104 ,-2.6669753 ,-2.1135337 ,-3.7701926 ,-3.358504  ,-4.419803  , 3.2045574 ,-0.5828494 ,-3.5796826 ,-4.0088696 ,-4.7178082 , 2.2726505 , 2.1860175 , 3.7198956 ,-0.5788681 ,-3.7766652 ,-0.65016747, 3.707159  ,-2.240267  , 4.5772953 ,-0.54754776, 4.7143884 ,-3.196982  ,-3.6356654 , 3.7157805 , 3.1312432 , 0.58816016, 2.1710336 ,-1.600533  ,-3.689763  , 4.322089  , 0.4816874 , 2.2769346 ,-3.9072733 ,-0.58615017
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/1.txt
new file mode 100644
index 000000000..a19ea6696
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/1.txt
@@ -0,0 +1 @@
+-1.275483  ,-3.6622071 ,-0.87433696, 0.60946655, 1.4415421 , 3.3705983 , 2.2635043 , 3.3926573 ,-0.2936643 ,-0.5169573 , 3.2535644 , 2.1269164 ,-3.4180303 , 1.0427854 ,-1.3514856 , 3.6084783 , 4.569944  ,-0.79272085, 2.9771423 ,-1.6668562 , 4.8700657 , 0.3355385 , 0.76509756, 3.5142152 ,-1.6743544 , 4.794434  ,-2.958765  ,-0.23857778, 2.4555902 , 2.459867  , 3.3922994 ,-4.350212  , 0.6286153 , 0.8139546 , 4.1676807 ,-3.3461437 , 0.69633776,-4.6548877 , 0.98267466,-4.508397  ,-1.4581255 ,-1.2289628 , 3.8701873 , 3.334336  ,-3.5611253 , 2.6133575 ,-1.0554558 ,-3.3291767 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/2.txt
new file mode 100644
index 000000000..7113eb52e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/2.txt
@@ -0,0 +1 @@
+-0.6250365 ,-4.798417  ,-4.214081  ,-3.625409  , 2.4391694 , 4.1856265 , 3.2472587 ,-3.20996   ,-2.3537548 , 1.3749354 , 2.5947835 ,-1.8891864 ,-3.612735  , 2.246563  , 1.2701501 ,-2.8927476 ,-0.71078295,-3.6037376 ,-4.5916877 , 2.0044398 , 3.4437728 ,-1.0695096 , 4.3483944 ,-3.3387017 ,-0.9384242 , 1.4229002 ,-0.6568144 , 1.1164346 , 1.7145283 ,-2.596518  , 4.6728883 , 3.4737296 , 1.7935314 , 3.1263895 , 1.3614839 ,-3.824968  ,-3.0405738 , 3.1729462 ,-4.1985774 ,-2.9489865 ,-4.2080064 , 2.0368521 ,-2.858539  ,-0.03206728,-1.1123812 , 0.2994737 , 1.6906137 ,-0.8665008 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/3.txt
new file mode 100644
index 000000000..afeb2c0e6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/3.txt
@@ -0,0 +1 @@
+-4.5279946 ,-3.4497826 ,-2.058617  ,-0.39549035,-0.26672208, 3.0173857 , 3.2430282 , 1.9996022 , 1.3895315 , 1.7620904 ,-4.9040093 ,-3.2858686 ,-2.2823575 ,-1.4176623 ,-0.537347  , 0.68219584,-3.193989  ,-3.1675165 , 0.47214374,-4.390378  ,-1.8730192 , 1.4416525 ,-3.0460286 ,-0.73547626, 1.8686327 ,-0.8146671 ,-2.0906649 , 0.01226121,-0.06992937, 0.9302521 ,-2.1858516 , 4.8370657 ,-4.1847024 , 4.4963436 ,-1.3834711 ,-1.1244944 , 0.4290957 ,-4.2681174 , 1.2978764 , 3.4149706 ,-2.7011304 ,-3.1285405 ,-3.8857136 ,-0.18625297,-0.13618916, 2.427405  ,-1.7979074 ,-1.4174187 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/4.txt
new file mode 100644
index 000000000..99c6284d6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/layer/uint8/4.txt
@@ -0,0 +1 @@
+-0.40635094,-2.485209  ,-2.9641154 , 4.09174   ,-1.9137962 ,-2.0860991 , 1.6594787 , 0.53744185, 1.7737653 ,-1.7054961 , 2.5611186 ,-1.1456238 , 2.741241  ,-2.283051  ,-4.2111306 ,-0.8722772 , 1.6465468 ,-0.61518955, 0.08495517, 3.6847656 , 3.7826371 , 2.0023444 ,-3.5326133 , 2.3723035 , 3.7383325 ,-3.3514297 , 2.031452  ,-0.7364658 ,-4.3347225 ,-2.8146286 ,-1.37377   ,-3.518721  ,-0.19657679,-1.6831368 , 1.2457223 , 0.25099897,-4.4722757 ,-4.135197  ,-0.6378818 , 3.8833187 , 1.9291897 , 2.5969315 , 2.146067  ,-2.846719  ,-2.2562532 ,-2.6856182 , 2.824374  , 2.3662992 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt
new file mode 100644
index 000000000..1f2993269
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-3.3436873 ,-0.79453826, 2.2211137 , 2.6420908 ,-1.3191302 , 1.2973647 ,-4.506594  , 4.867371  ,-4.318404  , 1.6957753 ,-4.3091793 ,-3.2230556 , 4.9175825 ,-3.1527104 ,-2.6669753 ,-2.1135337 ,-3.7701926 ,-3.358504  ,-4.419803  , 3.2045574 ,-0.5828494 ,-3.5796826 ,-4.0088696 ,-4.7178082 , 2.2726505 , 2.1860175 , 3.7198956 ,-0.5788681 ,-3.7766652 ,-0.65016747, 3.707159  ,-2.240267  , 4.5772953 ,-0.54754776, 4.7143884 ,-3.196982  ,-3.6356654 , 3.7157805 , 3.1312432 , 0.58816016, 2.1710336 ,-1.600533  ,-3.689763  , 4.322089  , 0.4816874 , 2.2769346 ,-3.9072733 ,-0.58615017
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt
new file mode 100644
index 000000000..a19ea6696
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-1.275483  ,-3.6622071 ,-0.87433696, 0.60946655, 1.4415421 , 3.3705983 , 2.2635043 , 3.3926573 ,-0.2936643 ,-0.5169573 , 3.2535644 , 2.1269164 ,-3.4180303 , 1.0427854 ,-1.3514856 , 3.6084783 , 4.569944  ,-0.79272085, 2.9771423 ,-1.6668562 , 4.8700657 , 0.3355385 , 0.76509756, 3.5142152 ,-1.6743544 , 4.794434  ,-2.958765  ,-0.23857778, 2.4555902 , 2.459867  , 3.3922994 ,-4.350212  , 0.6286153 , 0.8139546 , 4.1676807 ,-3.3461437 , 0.69633776,-4.6548877 , 0.98267466,-4.508397  ,-1.4581255 ,-1.2289628 , 3.8701873 , 3.334336  ,-3.5611253 , 2.6133575 ,-1.0554558 ,-3.3291767 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt
new file mode 100644
index 000000000..7113eb52e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-0.6250365 ,-4.798417  ,-4.214081  ,-3.625409  , 2.4391694 , 4.1856265 , 3.2472587 ,-3.20996   ,-2.3537548 , 1.3749354 , 2.5947835 ,-1.8891864 ,-3.612735  , 2.246563  , 1.2701501 ,-2.8927476 ,-0.71078295,-3.6037376 ,-4.5916877 , 2.0044398 , 3.4437728 ,-1.0695096 , 4.3483944 ,-3.3387017 ,-0.9384242 , 1.4229002 ,-0.6568144 , 1.1164346 , 1.7145283 ,-2.596518  , 4.6728883 , 3.4737296 , 1.7935314 , 3.1263895 , 1.3614839 ,-3.824968  ,-3.0405738 , 3.1729462 ,-4.1985774 ,-2.9489865 ,-4.2080064 , 2.0368521 ,-2.858539  ,-0.03206728,-1.1123812 , 0.2994737 , 1.6906137 ,-0.8665008 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt
new file mode 100644
index 000000000..afeb2c0e6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-4.5279946 ,-3.4497826 ,-2.058617  ,-0.39549035,-0.26672208, 3.0173857 , 3.2430282 , 1.9996022 , 1.3895315 , 1.7620904 ,-4.9040093 ,-3.2858686 ,-2.2823575 ,-1.4176623 ,-0.537347  , 0.68219584,-3.193989  ,-3.1675165 , 0.47214374,-4.390378  ,-1.8730192 , 1.4416525 ,-3.0460286 ,-0.73547626, 1.8686327 ,-0.8146671 ,-2.0906649 , 0.01226121,-0.06992937, 0.9302521 ,-2.1858516 , 4.8370657 ,-4.1847024 , 4.4963436 ,-1.3834711 ,-1.1244944 , 0.4290957 ,-4.2681174 , 1.2978764 , 3.4149706 ,-2.7011304 ,-3.1285405 ,-3.8857136 ,-0.18625297,-0.13618916, 2.427405  ,-1.7979074 ,-1.4174187 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt
new file mode 100644
index 000000000..99c6284d6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-0.40635094,-2.485209  ,-2.9641154 , 4.09174   ,-1.9137962 ,-2.0860991 , 1.6594787 , 0.53744185, 1.7737653 ,-1.7054961 , 2.5611186 ,-1.1456238 , 2.741241  ,-2.283051  ,-4.2111306 ,-0.8722772 , 1.6465468 ,-0.61518955, 0.08495517, 3.6847656 , 3.7826371 , 2.0023444 ,-3.5326133 , 2.3723035 , 3.7383325 ,-3.3514297 , 2.031452  ,-0.7364658 ,-4.3347225 ,-2.8146286 ,-1.37377   ,-3.518721  ,-0.19657679,-1.6831368 , 1.2457223 , 0.25099897,-4.4722757 ,-4.135197  ,-0.6378818 , 3.8833187 , 1.9291897 , 2.5969315 , 2.146067  ,-2.846719  ,-2.2562532 ,-2.6856182 , 2.824374  , 2.3662992 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt
new file mode 100644
index 000000000..081a1e6ee
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-1.9927613e+00,-1.7386111e+00, 4.0895696e+00, 3.7818990e+00, 1.9420158e+00, 2.8482721e+00, 1.9165717e+00, 3.0059583e+00, 1.8346788e+00,-1.9055414e-03, 4.9277787e+00,-2.2794118e+00, 4.4005270e+00, 4.9703922e+00,-4.5275192e+00,-4.0446317e-01,-4.9363256e+00, 4.9506269e+00, 5.5874938e-01, 3.9949589e+00,-3.8152415e-01,-4.1024357e-01,-3.8472393e+00, 4.2956004e+00, 4.8097472e+00, 1.7960385e+00, 1.6767026e+00,-2.2773645e+00, 2.6808765e+00,-3.7214172e+00, 4.0978761e+00, 3.6202488e+00,-3.3211513e+00, 3.6200387e+00,-3.6106458e+00,-3.9778764e+00, 3.8779631e+00,-4.8502750e+00,-2.1901150e+00, 3.1800017e+00, 4.6261444e+00, 3.5151103e+00, 2.8659137e-02, 4.5340648e+00, 1.9836371e+00,-2.1751235e+00,-4.6762753e+00,-3.6951694e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt
new file mode 100644
index 000000000..f6b31db38
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-4.7488093 , 4.805902  ,-0.29828382, 0.57486725,-4.864297  , 1.1832287 ,-1.7611881 ,-2.7058024 , 2.707353  ,-3.9832466 , 3.1243927 ,-4.795229  , 1.9835415 , 3.2291937 , 2.4303932 ,-3.556881  , 4.316894  ,-0.6444627 ,-3.8289468 , 4.012964  , 0.7878584 ,-1.8921386 , 2.779619  ,-3.762597  , 3.4239094 ,-0.9103423 ,-3.9791772 ,-2.5613685 ,-4.4910364 , 0.19411987, 4.6296096 ,-0.6827259 , 3.7645729 , 1.5309091 , 3.5163064 , 3.4726381 , 3.5372822 , 1.7671971 , 1.4374614 , 3.5783768 ,-2.4927518 , 3.9427729 , 2.431568  , 2.6959393 , 3.8100271 ,-2.099064  , 3.3663592 ,-2.0818436 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt
new file mode 100644
index 000000000..acc01cb55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 4.279912  ,-2.2746763 , 4.0609813 , 4.5353827 , 3.624241  ,-3.9593613 , 4.189409  ,-3.9370356 ,-2.7063863 ,-1.9987059 , 4.172294  ,-4.5454354 , 4.362368  , 2.2204642 ,-4.9866576 , 3.31571   , 0.12623785, 4.7834573 ,-1.3521448 ,-1.5408021 ,-4.6578984 ,-2.93307   ,-1.5684534 ,-1.6875995 ,-0.4278419 , 1.1314197 ,-2.9655704 ,-0.48032767,-1.9200082 , 1.3321692 , 0.87586147,-0.1761448 , 3.939337  ,-1.0270193 ,-4.807054  , 2.8373904 ,-1.1184337 ,-0.8979197 , 2.1442132 ,-2.8509672 ,-3.3741531 , 3.6592414 , 0.7632272 ,-4.11465   , 4.892313  , 4.715815  ,-4.6481915 , 0.24676175
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt
new file mode 100644
index 000000000..0f0b7a939
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.0949495 ,-1.1370499 , 4.6457314 ,-2.243915  ,-1.7996464 , 1.2268789 ,-4.938172  ,-3.2802615 , 1.8788282 , 4.4162655 ,-4.8805113 , 3.1269526 , 3.2644348 , 0.89842725,-1.4484432 ,-0.28381723, 3.046261  ,-1.0718596 ,-3.996107  ,-4.9575796 ,-2.2279077 , 1.5326967 , 4.4588428 ,-2.042381  , 4.6604958 , 4.6422915 ,-1.097833  , 3.666126  , 0.4735639 ,-4.480704  ,-4.831033  ,-0.27288163, 4.588138  , 4.5297036 , 4.3675694 ,-1.6098841 ,-3.4147859 , 2.1168516 ,-1.9529305 ,-0.12548867, 3.4388335 ,-1.4071734 , 0.9507897 , 4.8206787 , 1.676873  ,-1.7102181 , 1.7746873 , 0.02711739
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt
new file mode 100644
index 000000000..d23450db6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-4.707647  ,-4.0921726 , 3.5813692 ,-4.71081   , 3.157816  ,-3.0034213 ,-0.21858999,-1.1736552 ,-1.6042249 ,-3.93102   ,-4.0407577 , 3.7350774 ,-4.9545655 ,-1.5413756 , 0.34996858, 2.0339615 , 0.99290746,-3.9916334 ,-4.149016  ,-3.2332835 , 3.6728513 , 2.4537466 ,-3.103485  ,-0.4829316 , 4.8046784 ,-1.753812  , 4.878712  ,-1.4039769 , 1.6640003 ,-1.2041731 , 0.8046477 , 0.9196048 ,-0.6475092 , 1.1409346 , 2.0324717 ,-0.04227797,-0.5379897 , 3.205104  , 3.3556423 , 4.8447986 ,-1.9695646 ,-2.6304977 ,-3.7261262 ,-4.725599  , 2.1162436 ,-0.5631174 ,-0.5820323 , 0.8398242 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/0.txt
new file mode 100644
index 000000000..42ce6be36
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/0.txt
@@ -0,0 +1 @@
+ 1.1826919 , 0.07451724, 3.48515   , 3.4905832 , 1.8009655 , 4.155749  , 3.3155255 , 2.6834202 ,-1.7111781 ,-2.2254407 ,-4.578932  ,-2.1239302 ,-0.1269101 ,-2.6022012 ,-4.8320093 , 0.2983099 ,-0.43314072,-0.66332716
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/1.txt
new file mode 100644
index 000000000..f677cc836
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/1.txt
@@ -0,0 +1 @@
+-1.2971772 ,-3.6082    ,-2.2253058 ,-4.4367466 ,-1.7221912 , 0.02547262,-3.641017  , 0.2953748 , 0.7217547 , 4.663728  , 4.262444  ,-3.196005  ,-1.6792587 ,-1.7463406 , 2.030074  , 0.67998594,-0.92862725,-1.7960806 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/2.txt
new file mode 100644
index 000000000..841ea9f8f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/2.txt
@@ -0,0 +1 @@
+ 2.2390285 ,-1.9557759 ,-1.2331479 ,-2.4810686 ,-0.5112022 , 1.741153  , 0.13645513,-2.3543327 ,-3.2610211 , 2.5739572 ,-0.50510126, 2.3544457 , 1.884411  ,-3.7153857 ,-1.7037194 ,-0.36849263,-4.819704  , 3.047652  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/3.txt
new file mode 100644
index 000000000..08ec9fe8f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/3.txt
@@ -0,0 +1 @@
+-0.9080747 ,-1.5609599 ,-0.40923035,-2.0569193 , 4.5904484 ,-0.02348744, 0.35939455, 2.2017193 , 2.2766497 ,-2.2080436 ,-2.6453862 ,-3.6456985 , 4.160244  , 1.7283534 , 4.5547447 ,-1.8674839 , 3.019465  , 1.1584582 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/4.txt
new file mode 100644
index 000000000..a4f2d97d1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/channel/int16/4.txt
@@ -0,0 +1 @@
+ 4.5920744 , 3.827386  ,-2.1228654 , 3.7227573 ,-3.4464717 , 0.31313375, 0.5531476 ,-0.30391756,-0.21601346, 3.8968146 , 0.23224053,-0.6208954 ,-0.76323295,-1.1700501 ,-1.6203161 , 2.1780837 , 2.3581395 , 2.6519518 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/0.txt
new file mode 100644
index 000000000..eb058a1c3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/0.txt
@@ -0,0 +1 @@
+-0.55411166,-4.1992335 , 1.4317423 ,-3.7261302 , 1.151971  ,-2.117022  ,-0.7386241 , 4.654951  , 1.4869142 ,-4.6252975 ,-3.305923  , 3.632628  ,-2.6403873 ,-4.862389  , 3.477561  ,-4.9842925 ,-3.6267536 , 4.9950438 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/1.txt
new file mode 100644
index 000000000..ff15f032d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.18094282,-0.58095986, 1.2765085 ,-0.534363  , 4.5564513 ,-0.28305855, 0.80606604,-3.3217795 ,-0.08041744,-3.7558215 ,-0.5370528 , 1.8984528 ,-0.09462419,-0.28595117, 4.6817894 ,-4.6653147 ,-4.127137  ,-2.3407753 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/2.txt
new file mode 100644
index 000000000..e564168bf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/2.txt
@@ -0,0 +1 @@
+-0.62747055, 1.4133646 ,-0.9954612 ,-4.687624  ,-2.5390003 ,-4.534569  ,-1.1943612 ,-4.830596  , 4.3214984 ,-2.4795794 , 4.166298  ,-1.4772589 ,-4.074577  , 3.2332711 ,-1.5221404 ,-1.7308865 , 0.06814837, 2.944668  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/3.txt
new file mode 100644
index 000000000..c763b6311
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/3.txt
@@ -0,0 +1 @@
+-3.2136867 , 0.6229863 , 0.02772082,-0.00820862,-2.4893622 ,-0.6757174 ,-2.2024722 ,-2.0893583 , 0.33953062,-3.5438979 , 0.7000838 , 1.3219849 ,-0.02302017, 2.3125873 ,-1.5376673 ,-4.0330076 , 4.755884  , 2.729685  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/4.txt
new file mode 100644
index 000000000..12e13272d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 0.82922786, 4.762074  ,-3.5043278 , 2.4521468 , 2.6450796 ,-2.8606322 , 0.8321993 ,-1.4020495 ,-0.25749585, 1.0287803 ,-3.911455  ,-1.8311876 , 2.763438  , 3.8604703 ,-3.5478592 ,-4.2335987 ,-3.6402035 ,-1.8485361 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..eb058a1c3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-0.55411166,-4.1992335 , 1.4317423 ,-3.7261302 , 1.151971  ,-2.117022  ,-0.7386241 , 4.654951  , 1.4869142 ,-4.6252975 ,-3.305923  , 3.632628  ,-2.6403873 ,-4.862389  , 3.477561  ,-4.9842925 ,-3.6267536 , 4.9950438 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..ff15f032d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.18094282,-0.58095986, 1.2765085 ,-0.534363  , 4.5564513 ,-0.28305855, 0.80606604,-3.3217795 ,-0.08041744,-3.7558215 ,-0.5370528 , 1.8984528 ,-0.09462419,-0.28595117, 4.6817894 ,-4.6653147 ,-4.127137  ,-2.3407753 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..e564168bf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-0.62747055, 1.4133646 ,-0.9954612 ,-4.687624  ,-2.5390003 ,-4.534569  ,-1.1943612 ,-4.830596  , 4.3214984 ,-2.4795794 , 4.166298  ,-1.4772589 ,-4.074577  , 3.2332711 ,-1.5221404 ,-1.7308865 , 0.06814837, 2.944668  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..c763b6311
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-3.2136867 , 0.6229863 , 0.02772082,-0.00820862,-2.4893622 ,-0.6757174 ,-2.2024722 ,-2.0893583 , 0.33953062,-3.5438979 , 0.7000838 , 1.3219849 ,-0.02302017, 2.3125873 ,-1.5376673 ,-4.0330076 , 4.755884  , 2.729685  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..12e13272d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 0.82922786, 4.762074  ,-3.5043278 , 2.4521468 , 2.6450796 ,-2.8606322 , 0.8321993 ,-1.4020495 ,-0.25749585, 1.0287803 ,-3.911455  ,-1.8311876 , 2.763438  , 3.8604703 ,-3.5478592 ,-4.2335987 ,-3.6402035 ,-1.8485361 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt
new file mode 100644
index 000000000..42ce6be36
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.1826919 , 0.07451724, 3.48515   , 3.4905832 , 1.8009655 , 4.155749  , 3.3155255 , 2.6834202 ,-1.7111781 ,-2.2254407 ,-4.578932  ,-2.1239302 ,-0.1269101 ,-2.6022012 ,-4.8320093 , 0.2983099 ,-0.43314072,-0.66332716
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt
new file mode 100644
index 000000000..f677cc836
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-1.2971772 ,-3.6082    ,-2.2253058 ,-4.4367466 ,-1.7221912 , 0.02547262,-3.641017  , 0.2953748 , 0.7217547 , 4.663728  , 4.262444  ,-3.196005  ,-1.6792587 ,-1.7463406 , 2.030074  , 0.67998594,-0.92862725,-1.7960806 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt
new file mode 100644
index 000000000..841ea9f8f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 2.2390285 ,-1.9557759 ,-1.2331479 ,-2.4810686 ,-0.5112022 , 1.741153  , 0.13645513,-2.3543327 ,-3.2610211 , 2.5739572 ,-0.50510126, 2.3544457 , 1.884411  ,-3.7153857 ,-1.7037194 ,-0.36849263,-4.819704  , 3.047652  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt
new file mode 100644
index 000000000..08ec9fe8f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-0.9080747 ,-1.5609599 ,-0.40923035,-2.0569193 , 4.5904484 ,-0.02348744, 0.35939455, 2.2017193 , 2.2766497 ,-2.2080436 ,-2.6453862 ,-3.6456985 , 4.160244  , 1.7283534 , 4.5547447 ,-1.8674839 , 3.019465  , 1.1584582 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt
new file mode 100644
index 000000000..a4f2d97d1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 4.5920744 , 3.827386  ,-2.1228654 , 3.7227573 ,-3.4464717 , 0.31313375, 0.5531476 ,-0.30391756,-0.21601346, 3.8968146 , 0.23224053,-0.6208954 ,-0.76323295,-1.1700501 ,-1.6203161 , 2.1780837 , 2.3581395 , 2.6519518 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/0.txt
new file mode 100644
index 000000000..4b999a028
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/0.txt
@@ -0,0 +1 @@
+ 3.241328  , 2.7033713 ,-2.5329788 ,-4.078369  ,-3.6711028 , 2.8912613 , 0.6188993 , 3.3729403 , 2.9906578 , 0.69040877, 0.6443222 , 1.1676162 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/1.txt
new file mode 100644
index 000000000..7061063b9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/1.txt
@@ -0,0 +1 @@
+ 1.572614  , 3.6147017 , 1.4378501 ,-0.81497866, 1.5987366 , 3.7698908 ,-3.8637109 , 4.5728784 ,-0.8706349 , 0.7389268 , 4.64117   ,-0.96047217
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/2.txt
new file mode 100644
index 000000000..c048a8a9f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/2.txt
@@ -0,0 +1 @@
+ 0.00864919,-3.1653113 ,-2.125551  , 2.9225516 ,-1.1439148 , 4.6509814 ,-2.097259  , 2.5843353 ,-2.067207  ,-2.5034845 ,-4.9441104 ,-3.9062042 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/3.txt
new file mode 100644
index 000000000..55be3b464
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/3.txt
@@ -0,0 +1 @@
+ 1.0920542 , 0.5510192 , 1.3465579 ,-2.3510268 , 4.016736  , 4.7848744 ,-0.42403316, 0.00571597, 1.6412207 , 1.7787368 , 2.4728034 ,-3.5900247 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/4.txt
new file mode 100644
index 000000000..04c7a1a8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.9799085,-3.9477375, 0.6402844, 3.304766 , 3.8880465,-3.5069442,-2.3702915, 4.126247 ,-3.1614416, 2.9909244,-2.8755414, 0.2627986
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/0.txt
new file mode 100644
index 000000000..0e8d687b1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/0.txt
@@ -0,0 +1 @@
+-2.327701  , 1.9312059 ,-2.0069487 ,-1.2584914 ,-0.08435626, 0.47685367,-2.7456024 , 2.1275337 ,-4.9685698 , 1.8143541 , 0.52829266,-2.770121  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/1.txt
new file mode 100644
index 000000000..67732e8f5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/1.txt
@@ -0,0 +1 @@
+ 0.01133719,-3.3741624 , 3.556686  ,-4.21059   , 0.49977505, 1.768375  , 3.867543  , 2.270572  ,-3.9507272 ,-4.595618  ,-4.7460327 , 0.5856542 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/2.txt
new file mode 100644
index 000000000..7bc7124d6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/2.txt
@@ -0,0 +1 @@
+-2.7181    , 4.6819983 , 2.9022477 ,-0.10716935, 3.6687856 ,-2.5403244 ,-4.477037  , 2.5499978 ,-3.9294813 , 0.08725335,-2.243345  ,-1.4018577 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/3.txt
new file mode 100644
index 000000000..0fac9fb70
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/3.txt
@@ -0,0 +1 @@
+-3.920553  , 0.87464577,-1.0319884 , 2.1885726 , 2.755115  ,-1.6436632 ,-4.4507327 , 4.915525  , 2.9331517 , 4.7712016 , 4.676084  ,-1.7715888 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/4.txt
new file mode 100644
index 000000000..df79104c2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000/channel/uint8/4.txt
@@ -0,0 +1 @@
+-2.181168  ,-1.6011912 ,-4.359466  ,-1.3662407 ,-0.06876431,-2.9213328 ,-0.5463467 ,-3.7916536 ,-3.751455  ,-2.822578  , 0.8914152 ,-3.0267959 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt
new file mode 100644
index 000000000..0e8d687b1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-2.327701  , 1.9312059 ,-2.0069487 ,-1.2584914 ,-0.08435626, 0.47685367,-2.7456024 , 2.1275337 ,-4.9685698 , 1.8143541 , 0.52829266,-2.770121  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt
new file mode 100644
index 000000000..67732e8f5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.01133719,-3.3741624 , 3.556686  ,-4.21059   , 0.49977505, 1.768375  , 3.867543  , 2.270572  ,-3.9507272 ,-4.595618  ,-4.7460327 , 0.5856542 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt
new file mode 100644
index 000000000..7bc7124d6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-2.7181    , 4.6819983 , 2.9022477 ,-0.10716935, 3.6687856 ,-2.5403244 ,-4.477037  , 2.5499978 ,-3.9294813 , 0.08725335,-2.243345  ,-1.4018577 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt
new file mode 100644
index 000000000..0fac9fb70
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-3.920553  , 0.87464577,-1.0319884 , 2.1885726 , 2.755115  ,-1.6436632 ,-4.4507327 , 4.915525  , 2.9331517 , 4.7712016 , 4.676084  ,-1.7715888 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt
new file mode 100644
index 000000000..df79104c2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.181168  ,-1.6011912 ,-4.359466  ,-1.3662407 ,-0.06876431,-2.9213328 ,-0.5463467 ,-3.7916536 ,-3.751455  ,-2.822578  , 0.8914152 ,-3.0267959 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt
new file mode 100644
index 000000000..4b999a028
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt
@@ -0,0 +1 @@
+ 3.241328  , 2.7033713 ,-2.5329788 ,-4.078369  ,-3.6711028 , 2.8912613 , 0.6188993 , 3.3729403 , 2.9906578 , 0.69040877, 0.6443222 , 1.1676162 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt
new file mode 100644
index 000000000..7061063b9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt
@@ -0,0 +1 @@
+ 1.572614  , 3.6147017 , 1.4378501 ,-0.81497866, 1.5987366 , 3.7698908 ,-3.8637109 , 4.5728784 ,-0.8706349 , 0.7389268 , 4.64117   ,-0.96047217
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt
new file mode 100644
index 000000000..c048a8a9f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt
@@ -0,0 +1 @@
+ 0.00864919,-3.1653113 ,-2.125551  , 2.9225516 ,-1.1439148 , 4.6509814 ,-2.097259  , 2.5843353 ,-2.067207  ,-2.5034845 ,-4.9441104 ,-3.9062042 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt
new file mode 100644
index 000000000..55be3b464
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt
@@ -0,0 +1 @@
+ 1.0920542 , 0.5510192 , 1.3465579 ,-2.3510268 , 4.016736  , 4.7848744 ,-0.42403316, 0.00571597, 1.6412207 , 1.7787368 , 2.4728034 ,-3.5900247 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt
new file mode 100644
index 000000000..04c7a1a8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt
@@ -0,0 +1 @@
+-2.9799085,-3.9477375, 0.6402844, 3.304766 , 3.8880465,-3.5069442,-2.3702915, 4.126247 ,-3.1614416, 2.9909244,-2.8755414, 0.2627986
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/0.txt
new file mode 100644
index 000000000..a8874bc5f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/0.txt
@@ -0,0 +1 @@
+ 3.9384239 ,-3.7377489 , 0.97284186, 3.8309984 , 2.4125865 , 1.7141674 , 3.9459977 ,-0.304659  ,-3.4623327 , 4.4569106 , 4.209985  ,-0.6677348 , 3.4578135 , 1.6779743 , 2.502791  ,-1.324285  , 1.3139176 , 3.4334664 ,-2.2695086 ,-4.001059  ,-0.91164917, 4.4447775 ,-3.0275404 ,-2.0852396 , 3.6677403 ,-2.9595146 , 2.0921555 , 1.7570637 , 3.717391  ,-0.3216191 ,-0.8410847 , 2.662336  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/1.txt
new file mode 100644
index 000000000..715e680be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.6663157 ,-0.04146723,-0.8193995 , 4.804576  ,-2.1357434 , 4.0829    ,-1.6380692 , 1.8043218 , 2.3431025 , 0.30111   , 1.2928191 ,-1.8559257 ,-0.68305963,-1.1502715 , 1.9492546 ,-2.7240746 , 2.9279857 ,-3.3329778 ,-4.8343406 ,-0.02708206, 1.1840513 , 3.6476028 , 4.75276   ,-4.9085226 ,-1.1922491 , 0.54225117, 3.17247   ,-2.7856457 ,-3.0866194 ,-2.2077718 , 1.6263398 , 3.7066603 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/2.txt
new file mode 100644
index 000000000..3ca893e61
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/2.txt
@@ -0,0 +1 @@
+-4.8507566 ,-1.267258  , 0.5099198 , 1.650726  , 3.4329638 ,-2.2652836 , 1.2157568 , 0.18305123, 3.6754217 ,-4.6185255 ,-1.0646905 ,-0.46092424, 2.046326  ,-2.8830478 , 4.156068  ,-2.0503244 , 0.0755459 ,-4.6472006 ,-0.50128895, 3.1129324 ,-4.4048553 , 0.47983927, 1.4510479 , 3.9226127 ,-4.767221  ,-2.795826  ,-4.816457  ,-3.6127663 ,-2.2712553 , 4.586938  , 1.1028811 , 1.5028698 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/3.txt
new file mode 100644
index 000000000..3fba8ecec
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/3.txt
@@ -0,0 +1 @@
+ 4.9431224 ,-3.4878132 ,-2.4831018 , 2.2395666 ,-2.3317611 ,-1.6786547 ,-2.4702384 , 3.2167027 , 1.7300137 , 2.8848834 ,-4.6395254 , 0.5527259 ,-2.915835  ,-1.0066313 ,-0.278253  , 4.6136203 ,-3.4183645 ,-1.5189631 ,-4.599058  , 3.3198457 ,-3.9464161 ,-0.6357558 , 0.32550323, 3.2147424 , 4.921844  ,-0.30067012, 3.9456701 , 0.5943688 ,-4.7229166 ,-3.6803844 ,-3.3813965 , 3.283583  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/4.txt
new file mode 100644
index 000000000..16cc23b79
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/int16/4.txt
@@ -0,0 +1 @@
+ 2.232644  , 4.465217  , 1.926956  ,-4.007337  ,-2.7392106 ,-2.4579394 , 2.913538  ,-1.7261469 , 3.8706868 , 0.06259949,-2.018361  , 1.2728635 ,-3.133289  ,-4.943454  ,-1.5415367 ,-4.8183494 , 4.348317  ,-2.4929109 ,-0.9018388 ,-4.776565  , 4.634248  , 3.0753953 , 2.3412373 ,-2.7086196 , 3.4485948 , 0.3561932 , 0.03650501,-2.8704169 , 1.0514414 , 3.3964615 , 1.2783849 , 4.974951  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt
new file mode 100644
index 000000000..e9db48f9e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-1.4124781 , 0.42694193, 1.1734594 ,-3.5111153 ,-2.9756174 , 1.3682148 ,-2.318465  , 2.198896  ,-4.5043235 , 3.1775594 ,-0.42802384,-1.4872279 , 1.3821319 ,-4.771963  ,-0.12837897, 4.132799  , 3.697655  , 2.0807178 ,-3.621293  , 2.121878  ,-0.25654107, 0.42100102,-1.4009671 ,-2.9733627 ,-0.7058871 ,-2.831215  , 3.5669627 , 2.1420689 ,-1.8789555 , 0.8104939 ,-2.0503597 , 1.7788508 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt
new file mode 100644
index 000000000..479d062f1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 3.4726453 , 3.0497985 ,-4.234619  ,-1.0526706 , 1.7278554 ,-3.341614  , 4.54768   , 3.0954597 ,-3.735109  , 2.8810751 ,-2.5381427 ,-3.2360535 ,-1.5378917 , 2.3052745 ,-3.170938  ,-3.327242  , 2.0654576 ,-2.2294598 ,-1.881382  , 0.13216451,-4.2825613 , 0.26616526, 4.6196365 ,-0.88623226, 1.7103885 ,-1.5865034 ,-3.9114466 ,-3.2227128 , 4.909618  , 2.3318915 , 0.84300846, 0.760918  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt
new file mode 100644
index 000000000..ae28234bd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-4.6097918,-4.21991  ,-3.9955974, 3.6492047, 2.9191775, 2.8082933, 1.6189331, 0.2730309,-1.5029653,-1.9471445, 4.8758197, 3.3177438, 3.1338058,-2.1281245,-1.7526287,-2.5518703,-1.7746793, 4.0455256,-0.5839861,-4.408046 ,-4.0034447, 1.5858272,-4.5896654, 4.7211285,-4.677515 ,-2.6027086,-4.7896166,-3.5512326,-1.9068764,-2.9705904,-4.854087 ,-4.892111 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt
new file mode 100644
index 000000000..fd40f84f4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 2.1514777e-02, 2.6526773e+00,-3.0477784e+00, 1.3287724e+00,-4.1414630e-01,-1.7295350e-01, 7.6649576e-01,-1.8028022e+00,-7.0781744e-01,-2.5262204e-01,-3.0970418e+00,-1.3165286e+00,-4.6649928e+00, 2.0809033e+00,-1.5739973e+00,-4.0531826e-01,-2.1718202e+00, 2.0146034e+00, 2.5044403e+00,-1.1256610e+00, 1.3536702e+00, 1.0283234e-03,-1.8823910e+00, 4.7122188e+00, 9.4781297e-01, 3.2012525e+00,-5.5164534e-01,-2.6158772e+00,-1.8771547e+00,-3.1689723e+00, 4.9054880e+00,-3.4560370e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt
new file mode 100644
index 000000000..e81c3b8e5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.0927553 ,-2.107511  ,-1.6963564 , 1.7006218 , 1.4575784 , 0.06095728, 1.2659966 , 4.1905265 , 1.3035946 , 4.9793477 ,-4.3388166 ,-0.23496658, 1.9831208 , 2.6154642 ,-0.2790228 ,-3.1774354 ,-3.178935  ,-1.1564373 ,-0.8199472 ,-2.245698  ,-4.8605046 ,-3.569018  ,-1.4226891 ,-4.1067843 , 2.6078918 ,-3.5830674 , 1.9065963 , 2.435578  ,-3.3216476 , 4.5930347 , 2.9191844 , 1.7885648 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt
new file mode 100644
index 000000000..a8874bc5f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 3.9384239 ,-3.7377489 , 0.97284186, 3.8309984 , 2.4125865 , 1.7141674 , 3.9459977 ,-0.304659  ,-3.4623327 , 4.4569106 , 4.209985  ,-0.6677348 , 3.4578135 , 1.6779743 , 2.502791  ,-1.324285  , 1.3139176 , 3.4334664 ,-2.2695086 ,-4.001059  ,-0.91164917, 4.4447775 ,-3.0275404 ,-2.0852396 , 3.6677403 ,-2.9595146 , 2.0921555 , 1.7570637 , 3.717391  ,-0.3216191 ,-0.8410847 , 2.662336  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt
new file mode 100644
index 000000000..715e680be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.6663157 ,-0.04146723,-0.8193995 , 4.804576  ,-2.1357434 , 4.0829    ,-1.6380692 , 1.8043218 , 2.3431025 , 0.30111   , 1.2928191 ,-1.8559257 ,-0.68305963,-1.1502715 , 1.9492546 ,-2.7240746 , 2.9279857 ,-3.3329778 ,-4.8343406 ,-0.02708206, 1.1840513 , 3.6476028 , 4.75276   ,-4.9085226 ,-1.1922491 , 0.54225117, 3.17247   ,-2.7856457 ,-3.0866194 ,-2.2077718 , 1.6263398 , 3.7066603 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt
new file mode 100644
index 000000000..3ca893e61
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-4.8507566 ,-1.267258  , 0.5099198 , 1.650726  , 3.4329638 ,-2.2652836 , 1.2157568 , 0.18305123, 3.6754217 ,-4.6185255 ,-1.0646905 ,-0.46092424, 2.046326  ,-2.8830478 , 4.156068  ,-2.0503244 , 0.0755459 ,-4.6472006 ,-0.50128895, 3.1129324 ,-4.4048553 , 0.47983927, 1.4510479 , 3.9226127 ,-4.767221  ,-2.795826  ,-4.816457  ,-3.6127663 ,-2.2712553 , 4.586938  , 1.1028811 , 1.5028698 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt
new file mode 100644
index 000000000..3fba8ecec
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.9431224 ,-3.4878132 ,-2.4831018 , 2.2395666 ,-2.3317611 ,-1.6786547 ,-2.4702384 , 3.2167027 , 1.7300137 , 2.8848834 ,-4.6395254 , 0.5527259 ,-2.915835  ,-1.0066313 ,-0.278253  , 4.6136203 ,-3.4183645 ,-1.5189631 ,-4.599058  , 3.3198457 ,-3.9464161 ,-0.6357558 , 0.32550323, 3.2147424 , 4.921844  ,-0.30067012, 3.9456701 , 0.5943688 ,-4.7229166 ,-3.6803844 ,-3.3813965 , 3.283583  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt
new file mode 100644
index 000000000..16cc23b79
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 2.232644  , 4.465217  , 1.926956  ,-4.007337  ,-2.7392106 ,-2.4579394 , 2.913538  ,-1.7261469 , 3.8706868 , 0.06259949,-2.018361  , 1.2728635 ,-3.133289  ,-4.943454  ,-1.5415367 ,-4.8183494 , 4.348317  ,-2.4929109 ,-0.9018388 ,-4.776565  , 4.634248  , 3.0753953 , 2.3412373 ,-2.7086196 , 3.4485948 , 0.3561932 , 0.03650501,-2.8704169 , 1.0514414 , 3.3964615 , 1.2783849 , 4.974951  
diff --git a/compiler/pota-quantization-value-test/test_parallel_record_minmax.sh b/compiler/pota-quantization-value-test/test_parallel_record_minmax.sh
new file mode 100755
index 000000000..0af2c01da
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_parallel_record_minmax.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# This script tests the parallel behavior of record-minmax
+#
+# HOW TO USE
+#
+# ./test_parallel_record_minmax.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE2CIRCLE_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py"
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found RECORD-MINMAX: ${RECORD_MINMAX_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do
+  MODELNAME=$1; shift
+  GRANULARITY=$1; shift
+  DTYPE=$1; shift
+  TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.parallel_record_minmax.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}_parallel_record_minmax.log" <(
+    exec 2>&1
+    set -ex
+    # Generate h5 input data
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+      --model "${WORKDIR}/${MODELNAME}.circle" \
+      --input "${TEST_INPUT_PATH}/${MODELNAME}/${GRANULARITY}/${DTYPE}" \
+      --output "${TESTCASE_FILE}.input.h5"
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE INPUT"
+      continue
+    fi
+    # Run parallel record-minmax
+    "${RECORD_MINMAX_PATH}" \
+      --input_model "${TEST_RESULT_FILE}.fake_quantized.circle" \
+      --input_data "${TESTCASE_FILE}.input.h5" \
+      --output_model "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle" \
+      --num_threads 4
+    # Dump min/max values (circle-tensordump)
+    "${CIRCLE_TENSORDUMP_PATH}" \
+      "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle" \
+      --tensors_to_hdf5 "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle.h5"
+    # Compare result
+    "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+      --input_h5 "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle.h5" \
+      --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}/${GRANULARITY}/${DTYPE}/record_minmax" \
+      --mode record_minmax
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/pota-quantization-value-test/test_quantization_with_config.sh b/compiler/pota-quantization-value-test/test_quantization_with_config.sh
new file mode 100755
index 000000000..1364dfb90
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_quantization_with_config.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+# This script tests quantize_with_minmax option of circle-quantizer with config file
+#
+# HOW TO USE
+#
+# ./test_quantization_with_config.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE_QUANTIZER_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py"
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do  
+  MODELNAME=$1; shift
+  GRANULARITY=$1; shift
+  DTYPE=$1; shift
+  TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.quantization.mixed.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}_quantization_with_config.log" <(
+    exec 2>&1
+    set -ex
+
+    # Generate h5 input data
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+      --model "${WORKDIR}/${MODELNAME}.circle" \
+      --input "${TEST_INPUT_PATH}/${MODELNAME}_config/${GRANULARITY}/${DTYPE}" \
+      --output "${TESTCASE_FILE}.mixed.input.h5"
+
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE INPUT"
+      continue
+    fi
+
+    # Run record-minmax
+    # NOTE There is no '_with_config' test for record-minmax, because it does not
+    # use quantization config file.
+    "${RECORD_MINMAX_PATH}" \
+      --input_model "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" \
+      --input_data "${TESTCASE_FILE}.mixed.input.h5" \
+      --output_model "${TEST_RESULT_FILE}.minmax_recorded.mixed.circle" 
+
+    # Run circle-quantizer with --quantize_with_minmax
+    "${CIRCLE_QUANTIZER_PATH}" \
+      --quantize_with_minmax float32 "${DTYPE}" "${GRANULARITY}" \
+      --config "${SOURCE_PATH}/config_files/${MODELNAME}/${GRANULARITY}/${DTYPE}/qconf.json" \
+      "${TEST_RESULT_FILE}.minmax_recorded.mixed.circle" \
+      "${TEST_RESULT_FILE}.quantized.mixed.circle" 
+
+    # Dump scale, zp, weights values (circle-tensordump)
+    "${CIRCLE_TENSORDUMP_PATH}" \
+      "${TEST_RESULT_FILE}.quantized.mixed.circle" \
+      --tensors_to_hdf5 "${TEST_RESULT_FILE}.quantized.mixed.circle.h5"
+
+    # Compare result
+    "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+      --input_h5 "${TEST_RESULT_FILE}.quantized.mixed.circle.h5" \
+      --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}_config/${GRANULARITY}/${DTYPE}/quantization" \
+      --mode quantization
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/pota-quantization-value-test/test_record_minmax.sh b/compiler/pota-quantization-value-test/test_record_minmax.sh
index acb7574c0..fa8f506d4 100755
--- a/compiler/pota-quantization-value-test/test_record_minmax.sh
+++ b/compiler/pota-quantization-value-test/test_record_minmax.sh
@@ -9,11 +9,11 @@
 # work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
 
 SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-GEN_SCRIPT_PATH="${SOURCE_PATH}/gen_h5_explicit_inputs.py"
 COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
 CONFIG_PATH="$1"; shift
 BIN_PATH=$(dirname "${CONFIG_PATH}")
 TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py"
 WORKDIR="$1"; shift
 
 source "${CONFIG_PATH}"
@@ -48,7 +48,7 @@ while [ "$1" != "" ]; do
     # Generate h5 input data
     source "${VIRTUALENV}/bin/activate"
     "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
-      --model "${WORKDIR}/${MODELNAME}.tflite" \
+      --model "${WORKDIR}/${MODELNAME}.circle" \
       --input "${TEST_INPUT_PATH}/${MODELNAME}/${GRANULARITY}/${DTYPE}" \
       --output "${TESTCASE_FILE}.input.h5"
 
diff --git a/compiler/pota-quantization-value-test/test_wo_quantization.sh b/compiler/pota-quantization-value-test/test_wo_quantization.sh
new file mode 100755
index 000000000..18c85fe75
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_wo_quantization.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+# This script tests the basic behavior of weights-only quantization
+#
+# HOW TO USE
+#
+# ./test_quantization.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${CIRCLE_QUANTIZER_PATH} and ${CIRCLE_TENSORDUMP_PATH}"
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do
+  MODELNAME=$1; shift
+  GRANULARITY=$1; shift
+  DTYPE=$1; shift
+  TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.wo_quantization.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}_wo_quantization.log" <(
+    exec 2>&1
+    set -ex
+
+    # Run circle-quantizer with --quantize_weights
+    "${CIRCLE_QUANTIZER_PATH}" \
+      --quantize_weights float32 "${DTYPE}" "${GRANULARITY}" \
+      "${WORKDIR}/${MODELNAME}.circle" \
+      "${TEST_RESULT_FILE}.wo_quantized.circle"
+
+    # Dump scale, zp, weights values (circle-tensordump)
+    "${CIRCLE_TENSORDUMP_PATH}" \
+      "${TEST_RESULT_FILE}.wo_quantized.circle" \
+      --tensors_to_hdf5 "${TEST_RESULT_FILE}.wo_quantized.circle.h5"
+
+    # Compare result
+    "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+      --input_h5 "${TEST_RESULT_FILE}.wo_quantized.circle.h5" \
+      --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}/${GRANULARITY}/${DTYPE}/wo_quantization" \
+      --mode weights_only_quantization
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/pp/CMakeLists.txt b/compiler/pp/CMakeLists.txt
index 2c25c6406..1db09cb88 100644
--- a/compiler/pp/CMakeLists.txt
+++ b/compiler/pp/CMakeLists.txt
@@ -3,9 +3,12 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_library(pp STATIC ${SOURCES})
-set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
 target_include_directories(pp PUBLIC include)
 target_link_libraries(pp PRIVATE nncc_common)
+target_link_libraries(pp PUBLIC nncc_coverage)
 
 if(NOT ENABLE_TEST)
   return()
diff --git a/compiler/pp/include/pp/IndentedStringBuilder.h b/compiler/pp/include/pp/IndentedStringBuilder.h
index 2655aff05..fabefe988 100644
--- a/compiler/pp/include/pp/IndentedStringBuilder.h
+++ b/compiler/pp/include/pp/IndentedStringBuilder.h
@@ -19,6 +19,8 @@
 
 #include "pp/Format.h"
 
+#include <cstdint>
+
 namespace pp
 {
 
diff --git a/compiler/rawdata2hdf5/CMakeLists.txt b/compiler/rawdata2hdf5/CMakeLists.txt
new file mode 100644
index 000000000..9118772b6
--- /dev/null
+++ b/compiler/rawdata2hdf5/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(rawdata2hdf5_FILE "rawdata2hdf5")
+set(rawdata2hdf5_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${rawdata2hdf5_FILE}")
+set(rawdata2hdf5_BIN "${CMAKE_CURRENT_BINARY_DIR}/${rawdata2hdf5_FILE}")
+
+add_custom_command(OUTPUT ${rawdata2hdf5_BIN}
+    COMMAND ${CMAKE_COMMAND} -E copy "${rawdata2hdf5_SRC}" "${rawdata2hdf5_BIN}"
+    DEPENDS ${rawdata2hdf5_SRC}
+    COMMENT "Generate ${rawdata2hdf5_BIN}"
+  )
+
+add_custom_target(rawdata2hdf5 ALL DEPENDS ${rawdata2hdf5_BIN})
+
+install(FILES ${rawdata2hdf5_BIN}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION bin)
diff --git a/compiler/rawdata2hdf5/README.md b/compiler/rawdata2hdf5/README.md
new file mode 100644
index 000000000..cad92f34c
--- /dev/null
+++ b/compiler/rawdata2hdf5/README.md
@@ -0,0 +1,24 @@
+# rawdata2hdf5
+
+_rawdata2hdf5_ is a tool to convert raw data (assumed to be pre-processed) to an hdf5 file.
+
+## Prerequisite
+- Raw data pre-processed for the corresponding DNN model
+- List of data to convert (saved in the text file)
+- Python installed with _numpy_ and _h5py_ (See docs/how-to-prepare-virtualenv.txt)
+
+## Example
+```
+./rawdata2hdf5 \
+> --data_list=tmp/data/datalist.txt
+> --output_path=tmp/data/data.h5
+```
+
+## Arguments
+```
+  -h, --help            Show this help message and exit
+  -l DATA_LIST, --data_list DATA_LIST
+                        Path to the text file which lists the absolute paths of the raw data files to be converted.
+  -o OUTPUT_PATH, --output_path OUTPUT_PATH
+                        Path to the output hdf5 file.
+```
diff --git a/compiler/rawdata2hdf5/rawdata2hdf5 b/compiler/rawdata2hdf5/rawdata2hdf5
new file mode 100644
index 000000000..bb46d3a39
--- /dev/null
+++ b/compiler/rawdata2hdf5/rawdata2hdf5
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import h5py as h5
+import numpy as np
+import argparse
+import glob
+import os
+
+
+def get_parser():
+    """Create and return given the argument parser"""
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert raw data files to hdf5 file')
+    parser.add_argument(
+        "-l",
+        "--data_list",
+        type=str,
+        help=
+        "Path to the text file which lists the absolute paths of the raw data files to be converted.",
+        required=True)
+    parser.add_argument(
+        "-o",
+        "--output_path",
+        type=str,
+        help="Path to the output hdf5 file.",
+        required=True)
+    return parser
+
+
+def verify_args(parser, args):
+    """Verify the given arguments"""
+
+    def is_valid_attr(args, attr):
+        return hasattr(args, attr) and getattr(args, attr)
+
+    # check if required arguments is given
+    missing = []
+    if not is_valid_attr(args, 'data_list'):
+        missing.append('-l/--data_list')
+    if not is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def create_hdf5(data_list, output_path):
+    """Create the hdf5 file using raw data files listed in data_list"""
+    h5_file = h5.File(output_path, 'w')
+    group = h5_file.create_group("value")
+    # We assume the raw input data have the correct type/shape for the corresponding model
+    # If this flag is set in the hdf5 file, record-minmax will skip type/shape check
+    group.attrs['rawData'] = '1'
+
+    if os.path.isfile(data_list) == False:
+        raise FileNotFoundError("No such file. " + data_list)
+
+    # Data list
+    datalist = []
+    with open(data_list, 'r') as f:
+        lines = f.readlines()
+        for line in lines:
+            if line.strip():
+                filename = line.rstrip()
+                if os.path.isfile(filename):
+                    datalist.append(filename)
+                else:
+                    raise FileNotFoundError("No such file. " + filename)
+
+    # Input files
+    num_converted = 0
+    for rawdata in datalist:
+        with open(rawdata, 'rb') as f:
+            sample = group.create_group(str(num_converted))
+            num_converted += 1
+            filename = os.path.basename(rawdata)
+            sample.attrs['desc'] = filename
+            raw_data = bytearray(f.read())
+            # The target model is DNN with one input data
+            sample.create_dataset('0', data=raw_data)
+
+    h5_file.close()
+
+    print("Raw data have been packaged to " + output_path)
+    print("Number of packaged data: " + str(num_converted))
+
+
+def main():
+    parser = get_parser()
+
+    args = parser.parse_args()
+
+    verify_args(parser, args)
+
+    create_hdf5(args.data_list, args.output_path)
+
+if __name__ == '__main__':
+    main()
diff --git a/compiler/record-minmax-conversion-test/CMakeLists.txt b/compiler/record-minmax-conversion-test/CMakeLists.txt
index 2221e1702..35a7f9908 100644
--- a/compiler/record-minmax-conversion-test/CMakeLists.txt
+++ b/compiler/record-minmax-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 unset(RECORD_MINMAX_CONVERSION_TEST)
 
 macro(addTest NAME)
@@ -37,6 +41,6 @@ add_test(
   COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
           "${TEST_CONFIG}"
           "${ARTIFACTS_BIN_PATH}"
-          "${NNCC_OVERLAY_DIR}/venv_1_13_2"
+          "${NNCC_OVERLAY_DIR}/venv_2_12_1"
           ${RECORD_MINMAX_CONVERSION_TEST}
 )
diff --git a/compiler/record-minmax-conversion-test/gen_h5_random_inputs.py b/compiler/record-minmax-conversion-test/gen_h5_random_inputs.py
index b7709812c..d57289abf 100755
--- a/compiler/record-minmax-conversion-test/gen_h5_random_inputs.py
+++ b/compiler/record-minmax-conversion-test/gen_h5_random_inputs.py
@@ -39,8 +39,16 @@ for i in range(num_data):
 
     for j in range(len(input_details)):
         input_detail = input_details[j]
-        input_data = np.array(
-            np.random.random_sample(input_detail["shape"]), input_detail["dtype"])
+        print(input_detail["dtype"])
+        if input_detail["dtype"] == np.bool_:
+            # Generate random bool [0, 1]
+            input_data = np.array(
+                np.random.random_integers(0, 1, input_detail["shape"]),
+                input_detail["dtype"])
+        elif input_detail["dtype"] == np.float32:
+            # Generate random input [-5, 5)
+            input_data = np.array(10 * np.random.random_sample(input_detail["shape"]) - 5,
+                                  input_detail["dtype"])
         sample.create_dataset(str(j), data=input_data)
 
 h5_file.close()
diff --git a/compiler/record-minmax-conversion-test/testall.sh b/compiler/record-minmax-conversion-test/testall.sh
index 29c9ed3d1..d7fc1de53 100755
--- a/compiler/record-minmax-conversion-test/testall.sh
+++ b/compiler/record-minmax-conversion-test/testall.sh
@@ -55,6 +55,16 @@ for TESTCASE in "$@"; do
       --input_data "${BIN_PATH}/${TESTCASE}.tflite.input.h5" \
       --output_model "${BIN_PATH}/${TESTCASE}.out.circle"
 
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE CIRCLE OUTPUT"
+      continue
+    fi
+
+    # Run record-minmax with auto generated random input
+    "${RECORD_MINMAX_PATH}" \
+      --input_model "${TESTCASE_FILE}.circle" \
+      --output_model "${BIN_PATH}/${TESTCASE}.outr.circle"
+
     if [[ $? -eq 0 ]]; then
       touch "${PASSED_TAG}"
     fi
diff --git a/compiler/record-minmax-thread-safety-test/CMakeLists.txt b/compiler/record-minmax-thread-safety-test/CMakeLists.txt
new file mode 100644
index 000000000..9d25ac37e
--- /dev/null
+++ b/compiler/record-minmax-thread-safety-test/CMakeLists.txt
@@ -0,0 +1,68 @@
+if(NOT ENABLE_TEST)
+    return()
+endif(NOT ENABLE_TEST)
+
+# Disable the test if record-minmax-for-thread-test does not exist
+if (NOT TARGET record-minmax-for-thread-test)
+    message(STATUS "record-minmax-thread-safety-test is disabled as record-minmax-for-thread-test was not built.")
+    return()
+endif(NOT TARGET record-minmax-for-thread-test)
+
+# Build record-minmax-for-thread-test if target arch is 64bit
+# Thread sanitizer is only available on 64bit machine
+# (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#supported-platforms)
+if(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+    return()
+endif(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+
+unset(RECORD_MINMAX_THREAD_SAFETY_TEST)
+
+macro(addTest NAME)
+    list(APPEND RECORD_MINMAX_THREAD_SAFETY_TEST ${NAME})
+endmacro(addTest)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+        OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
+        COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
+        WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+        DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
+        VERBATIM
+)
+set(RECORD_MINMAX_PATH "$<TARGET_FILE:record-minmax-for-thread-test>")
+
+add_custom_command(
+        OUTPUT ${TEST_CONFIG}
+        COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+        COMMAND ${CMAKE_COMMAND} -E echo 'RECORD_MINMAX_PATH=\"$<TARGET_FILE:record-minmax-for-thread-test>\"' >> ${TEST_CONFIG}
+        DEPENDS record-minmax-for-thread-test
+        COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(record_minmax_thread_safety_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+        NAME record_minmax_thread_safety_test
+        COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
+        "${TEST_CONFIG}"
+        "${ARTIFACTS_BIN_PATH}"
+        "${NNCC_OVERLAY_DIR}/venv_2_12_1"
+        ${RECORD_MINMAX_THREAD_SAFETY_TEST}
+)
diff --git a/compiler/record-minmax-thread-safety-test/gen_h5_random_inputs.py b/compiler/record-minmax-thread-safety-test/gen_h5_random_inputs.py
new file mode 100644
index 000000000..d57289abf
--- /dev/null
+++ b/compiler/record-minmax-thread-safety-test/gen_h5_random_inputs.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+import h5py as h5
+import numpy as np
+import tensorflow as tf
+import argparse
+
+#
+# This script generates a pack of random input data (.h5) expected by the input tflite model
+#
+# Basic usage:
+#   gen_h5_inputs.py --model <path/to/tflite/model> --num_data <number/of/data> --output <path/to/output/data>
+#   ex: gen_h5_inputs.py --model add.tflite --num_data 3 --output add.tflite.input.h5
+#   (This will create add.tflite.input.h5 composed of three random inputs in the same directory as the model)
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--num_data', type=int, required=True)
+parser.add_argument('--output', type=str, required=True)
+args = parser.parse_args()
+
+model = args.model
+
+num_data = args.num_data
+
+output_path = args.output
+
+# Build TFLite interpreter. (to get the information of model input)
+interpreter = tf.lite.Interpreter(model)
+input_details = interpreter.get_input_details()
+
+# Create h5 file
+h5_file = h5.File(output_path, 'w')
+group = h5_file.create_group("value")
+group.attrs['desc'] = "Input data for " + model
+
+# Generate random data
+for i in range(num_data):
+    sample = group.create_group(str(i))
+    sample.attrs['desc'] = "Input data " + str(i)
+
+    for j in range(len(input_details)):
+        input_detail = input_details[j]
+        print(input_detail["dtype"])
+        if input_detail["dtype"] == np.bool_:
+            # Generate random bool [0, 1]
+            input_data = np.array(
+                np.random.random_integers(0, 1, input_detail["shape"]),
+                input_detail["dtype"])
+        elif input_detail["dtype"] == np.float32:
+            # Generate random input [-5, 5)
+            input_data = np.array(10 * np.random.random_sample(input_detail["shape"]) - 5,
+                                  input_detail["dtype"])
+        sample.create_dataset(str(j), data=input_data)
+
+h5_file.close()
diff --git a/compiler/record-minmax-thread-safety-test/requires.cmake b/compiler/record-minmax-thread-safety-test/requires.cmake
new file mode 100644
index 000000000..9105c3e2e
--- /dev/null
+++ b/compiler/record-minmax-thread-safety-test/requires.cmake
@@ -0,0 +1,2 @@
+require("common-artifacts")
+require("record-minmax")
diff --git a/compiler/record-minmax-thread-safety-test/test.lst b/compiler/record-minmax-thread-safety-test/test.lst
new file mode 100644
index 000000000..771c3bd66
--- /dev/null
+++ b/compiler/record-minmax-thread-safety-test/test.lst
@@ -0,0 +1,16 @@
+addTest(Add_000)
+addTest(AveragePool2D_000)
+addTest(Concatenation_000)
+addTest(Conv2D_000)
+addTest(Conv2D_001)
+addTest(Conv2D_002)
+addTest(DepthwiseConv2D_000)
+addTest(FullyConnected_000)
+addTest(FullyConnected_001)
+addTest(MaxPool2D_000)
+addTest(Mul_000)
+addTest(Pad_000)
+addTest(Reshape_000)
+addTest(Reshape_001)
+addTest(Reshape_002)
+addTest(Softmax_000)
diff --git a/compiler/record-minmax-thread-safety-test/testall.sh b/compiler/record-minmax-thread-safety-test/testall.sh
new file mode 100755
index 000000000..4b47b3ebb
--- /dev/null
+++ b/compiler/record-minmax-thread-safety-test/testall.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# This script tests the parallel behavior of record-minmax
+#
+# HOW TO USE
+#
+# ./testall.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH}
+# work_dir : build directory of record-minmax-thread-safety-test (ex: build/compiler/record-minmax-thread-safety-test)
+
+GEN_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+GEN_SCRIPT_PATH="${GEN_SOURCE_PATH}/gen_h5_random_inputs.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "$CONFIG_PATH")
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found RECORD-MINMAX: ${RECORD_MINMAX_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+
+  PASSED_TAG="${BIN_PATH}/${TESTCASE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${BIN_PATH}/${TESTCASE}.log" <(
+    exec 2>&1
+    set -ex
+    # Generate h5 input data
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+    --model "${TESTCASE_FILE}.tflite" \
+    --num_data 8 \
+    --output "${BIN_PATH}/${TESTCASE}.tflite.input.h5"
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE INPUT"
+      continue
+    fi
+    # Run record-minmax in parallel mode
+    "${RECORD_MINMAX_PATH}" \
+      --input_model "${TESTCASE_FILE}.circle" \
+      --input_data "${BIN_PATH}/${TESTCASE}.tflite.input.h5" \
+      --output_model "${BIN_PATH}/${TESTCASE}.out.circle" \
+      --num_threads 4
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE CIRCLE OUTPUT"
+      continue
+    fi
+  )
+
+  if ! grep -q "ThreadSanitizer: data race" "${BIN_PATH}/${TESTCASE}.log"; then
+    touch "${PASSED_TAG}"
+  fi
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/record-minmax/CMakeLists.txt b/compiler/record-minmax/CMakeLists.txt
index f8a165bd3..3feca330a 100644
--- a/compiler/record-minmax/CMakeLists.txt
+++ b/compiler/record-minmax/CMakeLists.txt
@@ -1,25 +1,21 @@
-nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
-
-if(NOT HDF5_FOUND)
-  message(STATUS "Build record-minmax: FAILED (missing HDF5)")
-  return()
-endif(NOT HDF5_FOUND)
-
 set(DRIVER "driver/Driver.cpp")
 
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_executable(record-minmax ${DRIVER} ${SOURCES})
 target_include_directories(record-minmax PRIVATE include)
-target_include_directories(record-minmax PRIVATE ${HDF5_INCLUDE_DIRS})
 
-target_link_libraries(record-minmax ${HDF5_CXX_LIBRARIES})
 target_link_libraries(record-minmax arser)
 target_link_libraries(record-minmax safemain)
 target_link_libraries(record-minmax luci_import)
+target_link_libraries(record-minmax luci_env)
 target_link_libraries(record-minmax luci_export)
 target_link_libraries(record-minmax luci_interpreter)
+target_link_libraries(record-minmax luci_log)
+target_link_libraries(record-minmax dio_hdf5)
 target_link_libraries(record-minmax vconone)
+target_link_libraries(record-minmax nncc_coverage)
+target_link_libraries(record-minmax nncc_common)
 
 install(TARGETS record-minmax DESTINATION bin)
 
@@ -27,6 +23,47 @@ if(NOT ENABLE_TEST)
   return()
 endif(NOT ENABLE_TEST)
 
+###
+### record-minmax-for-thread-test is temporarily disabled, because
+### gcc package has a bug.
+### (https://bugs.launchpad.net/ubuntu/+source/gcc-10/+bug/2029910)
+### Let's enable the target after the bug is fixed.
+###
+# Build record-minmax-for-thread-test if target arch is 64bit
+# Thread sanitizer is only available on 64bit machine
+# (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#supported-platforms)
+if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" AND FALSE)
+# create record-minmax-for-thread-test target
+  # Note: record-minmax-for-thread-test is built with -fsanitize=thread so that thread sanitizer can check memory bugs,
+  # record-minmax is built without the option for performance.
+  add_executable(record-minmax-for-thread-test ${DRIVER} ${SOURCES})
+  target_include_directories(record-minmax-for-thread-test PRIVATE include)
+
+  target_link_libraries(record-minmax-for-thread-test arser)
+  target_link_libraries(record-minmax-for-thread-test safemain)
+  target_link_libraries(record-minmax-for-thread-test luci_import)
+  target_link_libraries(record-minmax-for-thread-test luci_env)
+  target_link_libraries(record-minmax-for-thread-test luci_export)
+  target_link_libraries(record-minmax-for-thread-test luci_interpreter)
+  target_link_libraries(record-minmax-for-thread-test dio_hdf5)
+  target_link_libraries(record-minmax-for-thread-test vconone)
+  target_link_libraries(record-minmax-for-thread-test nncc_coverage)
+  target_link_libraries(record-minmax-for-thread-test luci_log)
+
+  target_compile_options(record-minmax-for-thread-test PUBLIC -fsanitize=thread)
+  target_link_libraries(record-minmax-for-thread-test -fsanitize=thread)
+endif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" AND FALSE)
+
+# record-minmax is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources uesd for tests.
+set(TEST_SOURCES
+    "src/RecordFunction.cpp"
+    "src/MinMaxComputer.cpp")
+
+file(GLOB_RECURSE TESTS "tests/*.test.cpp")
+
 nnas_find_package(GTest REQUIRED)
-GTest_AddTest(record_minmax_function_test "${CMAKE_CURRENT_SOURCE_DIR}/tests/RecordFunction.test.cpp")
+GTest_AddTest(record_minmax_function_test ${TESTS} ${TEST_SOURCES})
 target_include_directories(record_minmax_function_test PRIVATE include)
+target_link_libraries(record_minmax_function_test luci_lang)
+target_link_libraries(record_minmax_function_test nncc_coverage)
diff --git a/compiler/record-minmax/driver/Driver.cpp b/compiler/record-minmax/driver/Driver.cpp
index 8b09498c3..24a4ff80f 100644
--- a/compiler/record-minmax/driver/Driver.cpp
+++ b/compiler/record-minmax/driver/Driver.cpp
@@ -19,6 +19,11 @@
 #include <arser/arser.h>
 #include <vconone/vconone.h>
 
+#include <luci/UserSettings.h>
+
+// TODO declare own log signature of record-minmax
+#include <luci/Log.h>
+
 void print_version(void)
 {
   std::cout << "record-minmax version " << vconone::get_string() << std::endl;
@@ -29,48 +34,53 @@ int entry(const int argc, char **argv)
 {
   using namespace record_minmax;
 
+  LOGGER(l);
+
   arser::Arser arser(
-      "Embedding min/max values of activations to the circle model for post-training quantization");
+    "Embedding min/max values of activations to the circle model for post-training quantization");
 
-  arser.add_argument("--version")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("Show version information and exit")
-      .exit_with(print_version);
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
 
-  arser.add_argument("--input_model")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(true)
-      .help("Input model filepath");
+  arser.add_argument("--input_model").required(true).help("Input model filepath");
 
   arser.add_argument("--input_data")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(true)
-      .help("Input data filepath");
+    .help("Input data filepath. If not given, record-minmax will run with randomly generated data. "
+          "Note that the random dataset does not represent inference workload, leading to poor "
+          "model accuracy.");
 
-  arser.add_argument("--output_model")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .required(true)
-      .help("Output model filepath");
+  arser.add_argument("--output_model").required(true).help("Output model filepath");
 
   arser.add_argument("--min_percentile")
-      .nargs(1)
-      .type(arser::DataType::FLOAT)
-      .help("Record n'th percentile of min");
+    .type(arser::DataType::FLOAT)
+    .help("Record n'th percentile of min");
+
+  arser.add_argument("--num_threads")
+    .type(arser::DataType::INT32)
+    .help("Number of threads (default: 1)");
 
   arser.add_argument("--max_percentile")
-      .nargs(1)
-      .type(arser::DataType::FLOAT)
-      .help("Record n'th percentile of max");
+    .type(arser::DataType::FLOAT)
+    .help("Record n'th percentile of max");
+
+  arser.add_argument("--moving_avg_batch")
+    .type(arser::DataType::INT32)
+    .help("Batch size of moving average algorithm (default: 16)");
+
+  arser.add_argument("--moving_avg_const")
+    .type(arser::DataType::FLOAT)
+    .help("Hyperparameter (C) to compute moving average (default: 0.1). Update equation: avg <- "
+          "avg + C * (curr_batch_avg - avg)");
+
+  arser.add_argument("--mode").help("Record mode. percentile (default) or moving_average");
 
-  arser.add_argument("--mode")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("Record mode. percentile (default) or moving_average");
+  arser.add_argument("--input_data_format")
+    .help("Input data format. h5/hdf5 (default) or list/filelist");
+
+  arser.add_argument("--generate_profile_data")
+    .nargs(0)
+    .default_value(false)
+    .help("This will turn on profiling data generation.");
 
   try
   {
@@ -83,34 +93,135 @@ int entry(const int argc, char **argv)
     return 255;
   }
 
+  if (arser.get<bool>("--verbose"))
+  {
+    // The third parameter of setenv means REPLACE.
+    // If REPLACE is zero, it does not overwrite an existing value.
+    setenv("LUCI_LOG", "100", 0);
+  }
+
+  auto settings = luci::UserSettings::settings();
+
   auto input_model_path = arser.get<std::string>("--input_model");
-  auto input_data_path = arser.get<std::string>("--input_data");
   auto output_model_path = arser.get<std::string>("--output_model");
 
   // Default values
   std::string mode("percentile");
   float min_percentile = 1.0;
   float max_percentile = 99.0;
+  uint32_t moving_avg_batch = 16;
+  float moving_avg_const = 0.1;
+  std::string input_data_format("h5");
+  uint32_t num_threads = 1;
 
   if (arser["--min_percentile"])
     min_percentile = arser.get<float>("--min_percentile");
 
+  if (arser["--num_threads"])
+    num_threads = arser.get<int>("--num_threads");
+
+  if (num_threads < 1)
+    throw std::runtime_error("The number of threads must be greater than zero");
+
   if (arser["--max_percentile"])
     max_percentile = arser.get<float>("--max_percentile");
 
   if (arser["--mode"])
     mode = arser.get<std::string>("--mode");
 
+  if (arser["--moving_avg_batch"])
+    moving_avg_batch = arser.get<int>("--moving_avg_batch");
+
+  if (arser["--moving_avg_const"])
+    moving_avg_const = arser.get<float>("--moving_avg_const");
+
   if (mode != "percentile" && mode != "moving_average")
     throw std::runtime_error("Unsupported mode");
 
-  RecordMinMax rmm;
+  if (arser["--generate_profile_data"])
+    settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
+
+  if (arser["--input_data_format"])
+    input_data_format = arser.get<std::string>("--input_data_format");
+
+  std::unique_ptr<MinMaxComputer> computer;
+  {
+    if (mode == "percentile")
+    {
+      computer = make_percentile_computer(min_percentile, max_percentile);
+    }
+    else if (mode == "moving_average")
+    {
+      computer = make_moving_avg_computer(moving_avg_batch, moving_avg_const);
+    }
+    else
+    {
+      assert(false);
+    }
+  }
+
+  RecordMinMax rmm(num_threads, std::move(computer));
+
+  // TODO: support parallel record for profile with random data
+  if (num_threads > 1 and not arser["--input_data"])
+  {
+    throw std::runtime_error("Input data must be given for parallel recording");
+  }
 
   // Initialize interpreter and observer
   rmm.initialize(input_model_path);
 
-  // Profile min/max while executing the given input data
-  rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+  if (arser["--input_data"])
+  {
+    auto input_data_path = arser.get<std::string>("--input_data");
+
+    // TODO: support parallel record from file and dir input data format
+    if (num_threads > 1 and not(input_data_format == "h5") and not(input_data_format == "hdf5"))
+    {
+      throw std::runtime_error("Parallel recording is used only for h5 now");
+    }
+
+    if (input_data_format == "h5" || input_data_format == "hdf5")
+    {
+      // Profile min/max while executing the H5 data
+      if (num_threads == 1)
+        rmm.profileData(input_data_path);
+      else
+      {
+        INFO(l) << "Using parallel recording" << std::endl;
+        rmm.profileDataInParallel(input_data_path);
+      }
+    }
+    // input_data is a text file having a file path in each line.
+    // Each data file is composed of inputs of a model, concatenated in
+    // the same order with the input index of the model
+    //
+    // For example, for a model with n inputs, the contents of each data
+    // file can be visualized as below
+    // [input 1][input 2]...[input n]
+    // |start............end of file|
+    else if (input_data_format == "list" || input_data_format == "filelist")
+    {
+      // Profile min/max while executing the list of Raw data
+      rmm.profileRawData(input_data_path);
+    }
+    else if (input_data_format == "directory" || input_data_format == "dir")
+    {
+      // Profile min/max while executing all files under the given directory
+      // The contents of each file is same as the raw data in the 'list' type
+      rmm.profileRawDataDirectory(input_data_path);
+    }
+    else
+    {
+      throw std::runtime_error(
+        "Unsupported input data format (supported formats: h5/hdf5 (default), list/filelist)");
+    }
+  }
+  else
+  {
+    // Profile min/max while executing random input data
+    rmm.profileDataWithRandomInputs();
+  }
 
   // Save profiled values to the model
   rmm.saveModel(output_model_path);
diff --git a/compiler/record-minmax/include/MinMaxComputer.h b/compiler/record-minmax/include/MinMaxComputer.h
new file mode 100644
index 000000000..0e9c8881b
--- /dev/null
+++ b/compiler/record-minmax/include/MinMaxComputer.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __RECORD_MINMAX_MINMAXCOMPUTER_H__
+#define __RECORD_MINMAX_MINMAXCOMPUTER_H__
+
+#include "MinMaxVectors.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <unordered_map>
+#include <memory>
+
+namespace record_minmax
+{
+
+class MinMaxComputer
+{
+public:
+  MinMaxComputer()
+  {
+    // Do nothing
+  }
+
+  virtual ~MinMaxComputer() = default;
+
+  // Child class must implement this
+  virtual void
+  update_qparam(const std::unordered_map<const luci::CircleNode *, MinMaxVectors> *minmax_map) = 0;
+};
+
+class PercentileComputer : public MinMaxComputer
+{
+public:
+  PercentileComputer(float min_percentile, float max_percentile)
+    : _min_percentile(min_percentile), _max_percentile(max_percentile)
+  {
+  }
+
+  virtual void
+  update_qparam(const std::unordered_map<const luci::CircleNode *, MinMaxVectors> *minmax_map);
+
+private:
+  float _min_percentile = 0.0;
+  float _max_percentile = 0.0;
+};
+
+class MovingAvgComputer : public MinMaxComputer
+{
+public:
+  MovingAvgComputer(uint32_t batch_size, float update_const)
+    : _batch_size(batch_size), _update_const(update_const)
+  {
+  }
+
+  virtual void
+  update_qparam(const std::unordered_map<const luci::CircleNode *, MinMaxVectors> *minmax_map);
+
+private:
+  uint32_t _batch_size = 0;
+  float _update_const = 0.0;
+};
+
+std::unique_ptr<MinMaxComputer> make_percentile_computer(float min_percentile,
+                                                         float max_percentile);
+
+std::unique_ptr<MinMaxComputer> make_moving_avg_computer(uint32_t batch_size,
+                                                         float moving_avg_const);
+
+} // namespace record_minmax
+
+#endif // __RECORD_MINMAX_MINMAXCOMPUTER_H__
diff --git a/compiler/record-minmax/include/MinMaxObserver.h b/compiler/record-minmax/include/MinMaxObserver.h
index ce63438ac..f0167971f 100644
--- a/compiler/record-minmax/include/MinMaxObserver.h
+++ b/compiler/record-minmax/include/MinMaxObserver.h
@@ -20,18 +20,14 @@
 #include <luci_interpreter/Interpreter.h>
 #include <luci_interpreter/core/Tensor.h>
 
+#include "MinMaxVectors.h"
+
 #include <vector>
 #include <unordered_map>
 
 namespace record_minmax
 {
 
-struct MinMaxVectors
-{
-  std::vector<float> min_vector;
-  std::vector<float> max_vector;
-};
-
 class MinMaxMap
 {
 public:
@@ -43,6 +39,15 @@ public:
     vectors.max_vector.push_back(max);
   }
 
+  void appendMinMaxVector(const luci::CircleNode *node, const MinMaxVectors &minmax_vector)
+  {
+    MinMaxVectors &vectors = _minmax_map[node];
+    vectors.min_vector.insert(vectors.min_vector.end(), minmax_vector.min_vector.begin(),
+                              minmax_vector.min_vector.end());
+    vectors.max_vector.insert(vectors.max_vector.end(), minmax_vector.max_vector.begin(),
+                              minmax_vector.max_vector.end());
+  }
+
   const std::unordered_map<const luci::CircleNode *, MinMaxVectors> *getMap() const
   {
     return &_minmax_map;
@@ -63,6 +68,7 @@ public:
   void postTensorWrite(const luci::CircleNode *node,
                        const luci_interpreter::Tensor *tensor) override;
 
+  // Never return nullptr
   const MinMaxMap *minMaxData() { return &_minmax_data; }
 
 private:
diff --git a/compiler/record-minmax/include/MinMaxVectors.h b/compiler/record-minmax/include/MinMaxVectors.h
new file mode 100644
index 000000000..5b2b03a34
--- /dev/null
+++ b/compiler/record-minmax/include/MinMaxVectors.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __RECORD_MINMAX_MINMAXVECTORS_H__
+#define __RECORD_MINMAX_MINMAXVECTORS_H__
+
+#include <vector>
+
+namespace record_minmax
+{
+
+struct MinMaxVectors
+{
+  std::vector<float> min_vector;
+  std::vector<float> max_vector;
+};
+
+} // namespace record_minmax
+
+#endif // __RECORD_MINMAX_MINMAXVECTORS_H__
diff --git a/compiler/record-minmax/include/RecordFunction.h b/compiler/record-minmax/include/RecordFunction.h
index b570c6a0a..745ee2282 100644
--- a/compiler/record-minmax/include/RecordFunction.h
+++ b/compiler/record-minmax/include/RecordFunction.h
@@ -14,12 +14,11 @@
  * limitations under the License.
  */
 
+#ifndef __RECORD_MINMAX_RECORD_FUNCTION_H__
+#define __RECORD_MINMAX_RECORD_FUNCTION_H__
+
 #include <vector>
-#include <cassert>
-#include <algorithm>
-#include <cmath>
-#include <numeric>
-#include <stdexcept>
+#include <cstdint>
 
 namespace record_minmax
 {
@@ -28,75 +27,15 @@ namespace record_minmax
  * @brief  getNthPercentile calculates the n-th percentile of input vector (0.0 <= n <= 100.0)
  *         linear interpolation is used when the desired percentile lies between two data points
  */
-float getNthPercentile(std::vector<float> &vector, float percentile)
-{
-  if (percentile < 0 || percentile > 100)
-    throw std::runtime_error("Percentile must be ranged from 0 to 100");
-
-  if (percentile == 0.0)
-    return vector.front();
-
-  if (percentile == 100.0)
-    return vector.back();
-
-  if (vector.empty())
-    throw std::runtime_error("Percentile must take a non-empty vector as an argument");
-
-  if (vector.size() == 1)
-    return vector[0];
-
-  std::vector<float> copy;
-  copy.assign(vector.begin(), vector.end());
-  std::sort(copy.begin(), copy.end());
-
-  int index = static_cast<int>(std::floor((copy.size() - 1) * percentile / 100.0));
-
-  float percent_i = static_cast<float>(index) / static_cast<float>(copy.size() - 1);
-  float fraction =
-      (percentile / 100.0 - percent_i) / ((index + 1.0) / (copy.size() - 1.0) - percent_i);
-  float res = copy[index] + fraction * (copy[index + 1] - copy[index]);
-  return res;
-}
+float getNthPercentile(std::vector<float> &vector, float percentile);
 
 /**
  * @brief  getMovingAverage calculates the weighted moving average of input vector
  *         The initial value is the minimum (or maximum) value of the first batch of the vector
  */
 float getMovingAverage(const std::vector<float> &vector, const float alpha,
-                       const uint8_t batch_size, bool is_min)
-{
-  assert(!vector.empty());
-  assert(alpha >= 0.0 && alpha <= 1.0);
-  assert(batch_size > 0);
-
-  auto getBatchMinOrMax = [&](int start_index) {
-    assert(start_index >= 0 && start_index < vector.size());
-
-    float res = is_min ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
-    for (int offset = 0; offset < batch_size; offset++)
-    {
-      int index = start_index + offset;
-      if (index >= vector.size())
-        break;
-
-      if (is_min)
-      {
-        res = vector[index] < res ? vector[index] : res;
-      }
-      else
-      {
-        res = vector[index] > res ? vector[index] : res;
-      }
-    }
-    return res;
-  };
-
-  float curr_avg = getBatchMinOrMax(0);
-  for (size_t i = batch_size; i < vector.size(); i += batch_size)
-  {
-    curr_avg = curr_avg * alpha + getBatchMinOrMax(i) * (1.0 - alpha);
-  }
-  return curr_avg;
-}
+                       const uint8_t batch_size, bool is_min);
 
 } // namespace record_minmax
+
+#endif // __RECORD_MINMAX_RECORD_FUNCTION_H__
diff --git a/compiler/record-minmax/include/RecordMinMax.h b/compiler/record-minmax/include/RecordMinMax.h
index ffdb17aec..758e8a924 100644
--- a/compiler/record-minmax/include/RecordMinMax.h
+++ b/compiler/record-minmax/include/RecordMinMax.h
@@ -21,30 +21,66 @@
 #include <luci_interpreter/Interpreter.h>
 
 #include "MinMaxObserver.h"
+#include "MinMaxComputer.h"
 
 #include <memory>
+#include <thread>
 
 namespace record_minmax
 {
 
+using Buffer = std::vector<char>;
+using Output = std::vector<Buffer>;
+using WholeOutput = std::vector<Output>;
+
 class RecordMinMax
 {
 public:
-  explicit RecordMinMax() = default;
+  explicit RecordMinMax(uint32_t num_threads, std::unique_ptr<MinMaxComputer> &&minmax_computer)
+    : _threads_size(num_threads), _minmax_computer(std::move(minmax_computer))
+  {
+    assert(_threads_size > 0);
+    assert(_minmax_computer != nullptr);
+  }
 
   ~RecordMinMax() = default;
 
   void initialize(const std::string &input_model_path);
 
-  void profileData(const std::string &mode, const std::string &input_data_path,
-                   float min_percentile, float max_percentile);
+  // TODO Refactor profile functions
+  void profileData(const std::string &input_data_path);
+
+  void profileDataInParallel(const std::string &input_data_path);
+
+  void profileRawData(const std::string &input_data_path);
+
+  void profileRawDataDirectory(const std::string &input_data_path);
+
+  void profileDataWithRandomInputs(void);
 
   void saveModel(const std::string &output_model_path);
 
 private:
+  luci_interpreter::Interpreter *getInterpreter() const { return _interpreters[0].get(); }
+
+  // Never return nullptr
+  MinMaxObserver *getObserver() const
+  {
+    assert(_observers.size() > 0); // FIX CALLER UNLESS
+    assert(_observers[0].get());   // FIX CALLER UNLESS
+    return _observers[0].get();
+  }
+
+  WholeOutput importH5Data(const std::string &input_data_path);
+
   std::unique_ptr<luci::Module> _module;
-  std::unique_ptr<luci_interpreter::Interpreter> _interpreter;
-  std::unique_ptr<MinMaxObserver> _observer;
+
+  // Multiple interpreters are used for parallel execution
+  std::vector<std::unique_ptr<luci_interpreter::Interpreter>> _interpreters;
+  std::vector<std::unique_ptr<MinMaxObserver>> _observers;
+
+  uint32_t _threads_size = 0;
+  std::unique_ptr<MinMaxComputer> _minmax_computer;
 };
 
 } // namespace record_minmax
diff --git a/compiler/record-minmax/requires.cmake b/compiler/record-minmax/requires.cmake
index f6804cef1..69373e76f 100644
--- a/compiler/record-minmax/requires.cmake
+++ b/compiler/record-minmax/requires.cmake
@@ -1,4 +1,6 @@
 require("luci")
+require("luci-interpreter")
 require("safemain")
 require("arser")
+require("dio-hdf5")
 require("vconone")
diff --git a/compiler/record-minmax/src/HDF5Importer.cpp b/compiler/record-minmax/src/HDF5Importer.cpp
deleted file mode 100644
index a0e65eeb7..000000000
--- a/compiler/record-minmax/src/HDF5Importer.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "HDF5Importer.h"
-
-#include <H5Cpp.h>
-
-#include <string>
-#include <cassert>
-#include <stdexcept>
-
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
-
-namespace
-{
-
-Shape toInternalShape(const H5::DataSpace &dataspace)
-{
-  int rank = dataspace.getSimpleExtentNdims();
-
-  std::vector<hsize_t> dims;
-  dims.resize(rank, 0);
-  dataspace.getSimpleExtentDims(dims.data());
-
-  Shape res(rank);
-  for (int axis = 0; axis < rank; ++axis)
-  {
-    res.dim(axis) = dims[axis];
-  }
-
-  return res;
-}
-
-DataType toInternalDtype(const H5::DataType &h5_type)
-{
-  if (h5_type == H5::PredType::IEEE_F32BE || h5_type == H5::PredType::IEEE_F32LE)
-  {
-    return DataType::FLOAT32;
-  }
-  if (h5_type == H5::PredType::STD_I32BE || h5_type == H5::PredType::STD_I32LE)
-  {
-    return DataType::S32;
-  }
-  if (h5_type == H5::PredType::STD_I64BE || h5_type == H5::PredType::STD_I64LE)
-  {
-    return DataType::S64;
-  }
-  // Only support three datatypes for now
-  return DataType::Unknown;
-}
-
-void readTensorData(H5::DataSet &tensor, uint8_t *buffer)
-{
-  tensor.read(buffer, H5::PredType::NATIVE_UINT8);
-}
-
-void readTensorData(H5::DataSet &tensor, float *buffer)
-{
-  tensor.read(buffer, H5::PredType::NATIVE_FLOAT);
-}
-
-void readTensorData(H5::DataSet &tensor, int32_t *buffer)
-{
-  tensor.read(buffer, H5::PredType::NATIVE_INT);
-}
-
-void readTensorData(H5::DataSet &tensor, int64_t *buffer)
-{
-  tensor.read(buffer, H5::PredType::NATIVE_LONG);
-}
-
-} // namespace
-
-namespace record_minmax
-{
-
-int32_t HDF5Importer::numInputs(int32_t record_idx)
-{
-  auto records = _value_grp.openGroup(std::to_string(record_idx));
-  return records.getNumObjs();
-}
-
-void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, void *buffer)
-{
-  auto record = _value_grp.openGroup(std::to_string(record_idx));
-  auto tensor = record.openDataSet(std::to_string(input_idx));
-
-  readTensorData(tensor, static_cast<uint8_t *>(buffer));
-}
-
-void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape,
-                              void *buffer)
-{
-  auto record = _value_grp.openGroup(std::to_string(record_idx));
-  auto tensor = record.openDataSet(std::to_string(input_idx));
-
-  auto tensor_dtype = tensor.getDataType();
-  *dtype = toInternalDtype(tensor_dtype);
-
-  auto tensor_shape = tensor.getSpace();
-  *shape = toInternalShape(tensor_shape);
-
-  switch (*dtype)
-  {
-    case DataType::FLOAT32:
-      readTensorData(tensor, static_cast<float *>(buffer));
-      break;
-    case DataType::S32:
-      readTensorData(tensor, static_cast<int32_t *>(buffer));
-      break;
-    case DataType::S64:
-      readTensorData(tensor, static_cast<int64_t *>(buffer));
-      break;
-    default:
-      throw std::runtime_error{"Unsupported data type for input data (.h5)"};
-  }
-}
-
-} // namespace record_minmax
diff --git a/compiler/record-minmax/src/HDF5Importer.h b/compiler/record-minmax/src/HDF5Importer.h
deleted file mode 100644
index cf6526685..000000000
--- a/compiler/record-minmax/src/HDF5Importer.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __RECORD_MINMAX_HDF5IMPORTER_H__
-#define __RECORD_MINMAX_HDF5IMPORTER_H__
-
-#include <luci_interpreter/core/Tensor.h>
-
-#include <H5Cpp.h>
-
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
-
-namespace record_minmax
-{
-
-// HDF5Importer reads an input data saved in the hdf5 file in the given path
-// The hierarchy of the hdf5 file is as follows.
-// Group "/"
-//  > Group "value"
-//    > Group <record_idx>
-//      > Dataset <input_idx>
-// record_idx : index of the record (dataset file can contain multiple records)
-// input_idx : index of the input (DNN model can have multiple inputs)
-// Ex: the j'th input of the i'th record can be accessed by "/value/i/j"
-class HDF5Importer
-{
-public:
-  explicit HDF5Importer(const std::string &path) : _file{path, H5F_ACC_RDONLY}
-  {
-    // Do nothing
-  }
-
-public:
-  /**
-   * @brief importGroup has to be called before readTensor is called
-   *        Otherwise, readTensor will throw an exception
-   */
-  void importGroup() { _value_grp = _file.openGroup("value"); }
-
-  /**
-   * @brief Read tensor data from file and store it into buffer
-   * @details A tensor in the file can be retrieved with (record_idx, input_idx)
-   * @param record_idx : index of the record
-   * @param input_idx : index of the input
-   * @param dtype : pointer to write the tensor's data type
-   * @param shape : pointer to write the tensor's shape
-   * @param buffer : pointer to write the tensor's data
-   */
-  void readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape,
-                  void *buffer);
-
-  // Read a raw tensor (no type/shape is specified)
-  void readTensor(int32_t record_idx, int32_t input_idx, void *buffer);
-
-  bool isRawData() { return _value_grp.attrExists("rawData"); }
-
-  int32_t numRecords() { return _value_grp.getNumObjs(); }
-
-  int32_t numInputs(int32_t record_idx);
-
-private:
-  H5::H5File _file;
-  H5::Group _value_grp;
-};
-
-} // namespace record_minmax
-
-#endif // __RECORD_MINMAX_HDF5IMPORTER_H__
diff --git a/compiler/record-minmax/src/MinMaxComputer.cpp b/compiler/record-minmax/src/MinMaxComputer.cpp
new file mode 100644
index 000000000..5b8893a50
--- /dev/null
+++ b/compiler/record-minmax/src/MinMaxComputer.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MinMaxComputer.h"
+#include "RecordFunction.h"
+
+#include <luci/IR/CircleQuantParam.h>
+
+namespace record_minmax
+{
+
+void PercentileComputer::update_qparam(
+  const std::unordered_map<const luci::CircleNode *, MinMaxVectors> *minmax_map)
+{
+  if (minmax_map == nullptr)
+    throw std::invalid_argument("minmax_map is nullptr");
+
+  for (auto iter = minmax_map->begin(); iter != minmax_map->end(); ++iter)
+  {
+    auto node = iter->first;
+    auto minmax = iter->second;
+
+    auto min = getNthPercentile(minmax.min_vector, _min_percentile);
+    auto max = getNthPercentile(minmax.max_vector, _max_percentile);
+
+    auto quantparam = std::make_unique<luci::CircleQuantParam>();
+    quantparam->min.push_back(min);
+    quantparam->max.push_back(max);
+
+    assert(node->quantparam() == nullptr);
+
+    auto mutable_node = const_cast<luci::CircleNode *>(node);
+    mutable_node->quantparam(std::move(quantparam));
+  }
+}
+
+void MovingAvgComputer::update_qparam(
+  const std::unordered_map<const luci::CircleNode *, MinMaxVectors> *minmax_map)
+{
+  if (minmax_map == nullptr)
+    throw std::invalid_argument("minmax_map is nullptr");
+
+  for (auto iter = minmax_map->begin(); iter != minmax_map->end(); ++iter)
+  {
+    auto node = iter->first;
+    auto minmax = iter->second;
+
+    auto min = getMovingAverage(minmax.min_vector, 1 - _update_const, _batch_size, true);
+    auto max = getMovingAverage(minmax.max_vector, 1 - _update_const, _batch_size, false);
+
+    auto quantparam = std::make_unique<luci::CircleQuantParam>();
+    quantparam->min.push_back(min);
+    quantparam->max.push_back(max);
+
+    assert(node->quantparam() == nullptr);
+
+    auto mutable_node = const_cast<luci::CircleNode *>(node);
+    mutable_node->quantparam(std::move(quantparam));
+  }
+}
+
+std::unique_ptr<MinMaxComputer> make_percentile_computer(float min_percentile, float max_percentile)
+{
+  return std::make_unique<PercentileComputer>(min_percentile, max_percentile);
+}
+
+std::unique_ptr<MinMaxComputer> make_moving_avg_computer(uint32_t batch_size,
+                                                         float moving_avg_const)
+{
+  return std::make_unique<MovingAvgComputer>(batch_size, moving_avg_const);
+}
+
+} // namespace record_minmax
diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp
index c22cb4132..e6edbdca9 100644
--- a/compiler/record-minmax/src/MinMaxObserver.cpp
+++ b/compiler/record-minmax/src/MinMaxObserver.cpp
@@ -18,6 +18,9 @@
 
 #include <luci/IR/CircleOpcode.h>
 
+#include <limits>
+#include <math.h>
+
 using DataType = luci_interpreter::DataType;
 
 namespace record_minmax
@@ -44,24 +47,68 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
     return;
   }
 
-  if (node->opcode() == luci::CircleOpcode::ARG_MAX)
+  if (node->dtype() == DataType::BOOL)
   {
-    // Output of arg_max is the index of the largest value across axes of a tensor
-    // this should not be quantized
+    // Bool type tensor is not quantized
+    return;
+  }
+  if (node->dtype() == DataType::S32)
+  {
+    // Integer type tensor is not quantized
+    return;
+  }
+  if (node->dtype() == DataType::S64)
+  {
+    // Integer type tensor is not quantized
     return;
   }
 
   // Only support recording of float32 values
   if (tensor->element_type() != DataType::FLOAT32)
-    throw std::runtime_error("Tensor's data type is not float");
+  {
+    // Exceptions that should be processed in backends
+    switch (node->opcode())
+    {
+      case luci::CircleOpcode::CAST:
+        // Cast is quantized only if it converts <type> -> float.
+        // Other cases should be processed in backends.
+      case luci::CircleOpcode::RESHAPE:
+        // Reshape changes only shape of input tensor, efficiently is it a no-op.
+        return;
+      default:
+        throw std::runtime_error("Tensor's data type is not float. " + node->name());
+    }
+  }
 
   const auto data = tensor->data<float>();
   const auto num_elements = tensor->shape().num_elements();
 
   std::vector<float> buf(data, data + num_elements);
-  auto minmax = std::minmax_element(buf.begin(), buf.end());
-  float min = *minmax.first;
-  float max = *minmax.second;
+
+  float max = std::numeric_limits<float>::lowest();
+  float min = std::numeric_limits<float>::max();
+
+  bool all_nan = true;
+  for (auto number : buf)
+  {
+    if (isnan(number))
+      continue;
+
+    // TODO use metadata hints to detect such cases
+    if (number == std::numeric_limits<float>::lowest())
+      continue;
+
+    all_nan = false;
+
+    if (number > max)
+      max = number;
+
+    if (number < min)
+      min = number;
+  }
+
+  if (all_nan)
+    throw std::runtime_error("All values are NaN(Not a Number)");
 
   _minmax_data.recordMinMax(node, min, max);
 }
diff --git a/compiler/record-minmax/src/RecordFunction.cpp b/compiler/record-minmax/src/RecordFunction.cpp
new file mode 100644
index 000000000..e812f82f3
--- /dev/null
+++ b/compiler/record-minmax/src/RecordFunction.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RecordFunction.h"
+
+#include <luci/IR/CircleQuantParam.h>
+
+#include <cassert>
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+
+namespace record_minmax
+{
+
+float getNthPercentile(std::vector<float> &vector, float percentile)
+{
+  if (percentile < 0 || percentile > 100)
+    throw std::runtime_error("Percentile must be ranged from 0 to 100");
+
+  if (vector.empty())
+    throw std::runtime_error("Percentile must take a non-empty vector as an argument");
+
+  if (vector.size() == 1)
+    return vector[0];
+
+  std::vector<float> copy;
+  copy.assign(vector.begin(), vector.end());
+  std::sort(copy.begin(), copy.end());
+
+  if (percentile == 0.0)
+    return copy.front();
+
+  if (percentile == 100.0)
+    return copy.back();
+
+  int index = static_cast<int>(std::floor((copy.size() - 1) * percentile / 100.0));
+
+  float percent_i = static_cast<float>(index) / static_cast<float>(copy.size() - 1);
+  float fraction =
+    (percentile / 100.0 - percent_i) / ((index + 1.0) / (copy.size() - 1.0) - percent_i);
+  float res = copy[index] + fraction * (copy[index + 1] - copy[index]);
+  return res;
+}
+
+float getMovingAverage(const std::vector<float> &vector, const float alpha,
+                       const uint8_t batch_size, bool is_min)
+{
+  assert(!vector.empty());
+  assert(alpha >= 0.0 && alpha <= 1.0);
+  assert(batch_size > 0);
+
+  auto getBatchMinOrMax = [&](uint32_t start_index) {
+    assert(start_index < vector.size());
+
+    float res = is_min ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
+    for (uint32_t offset = 0; offset < batch_size; offset++)
+    {
+      uint32_t index = start_index + offset;
+      if (index >= vector.size())
+        break;
+
+      if (is_min)
+      {
+        res = vector[index] < res ? vector[index] : res;
+      }
+      else
+      {
+        res = vector[index] > res ? vector[index] : res;
+      }
+    }
+    return res;
+  };
+
+  float curr_avg = getBatchMinOrMax(0);
+  for (uint32_t i = batch_size; i < vector.size(); i += batch_size)
+  {
+    curr_avg = curr_avg * alpha + getBatchMinOrMax(i) * (1.0 - alpha);
+  }
+  return curr_avg;
+}
+
+} // namespace record_minmax
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp
index 0ef7cccd1..d96b79c34 100644
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -15,28 +15,103 @@
  */
 
 #include "RecordMinMax.h"
-#include "RecordFunction.h"
 #include "MinMaxObserver.h"
-#include "HDF5Importer.h"
 
 #include <luci/Importer.h>
 #include <luci/CircleExporter.h>
 #include <luci/CircleFileExpContract.h>
 #include <luci/IR/CircleQuantParam.h>
+#include <luci/Log.h>
+#include <dio_hdf5/HDF5Importer.h>
 
+#include <dirent.h>
 #include <algorithm>
 #include <cmath>
 #include <fstream>
 #include <numeric>
 #include <stdexcept>
 #include <iostream>
+#include <random>
 
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
 
 namespace
 {
 
+// Max h5 file size for parallel recording in bytes = 1 GB
+const long h5_max_size_bytes = 1000000000;
+
+long getH5FileSize(const std::string &input_data_path)
+{
+  std::ifstream in_file(input_data_path, std::ios::binary);
+  in_file.seekg(0, std::ios::end);
+
+  return in_file.tellg();
+}
+
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elements = 1;
+  for (uint32_t i = 0; i < node->rank(); i++)
+    num_elements *= node->dim(i).value();
+
+  return num_elements;
+}
+
+// Throw exception if input has one of the following conditions.
+// 1. Have unknown dimension
+// 2. Number of elements is 0
+void checkInputDimension(const luci::CircleInput *input)
+{
+  for (uint32_t i = 0; i < input->rank(); i++)
+    if (!input->dim(i).known())
+      throw std::runtime_error(input->name() + " has unknown dimension");
+
+  if (numElements(input) == 0)
+    throw std::runtime_error(input->name() + " is a zero-sized input");
+}
+
+void readDataFromFile(const std::string &filename, std::vector<char> &data, size_t data_size)
+{
+  assert(data.size() == data_size); // FIX_CALLER_UNLESS
+
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.read(data.data(), data_size).fail())
+    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+  if (fs.peek() != EOF)
+    throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
+}
+
+std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements)
+{
+  std::uniform_int_distribution<> dist(0, 1);
+  std::vector<uint8_t> input_data(num_elements);
+
+  // Write random data
+  for (auto &iter : input_data)
+    iter = static_cast<uint8_t>(dist(gen));
+
+  return input_data;
+}
+
+template <typename T>
+std::vector<T> genRandomIntData(std::mt19937 &gen, uint32_t num_elements, T min, T max)
+{
+  std::uniform_int_distribution<T> dist(min, max);
+  std::vector<T> input_data(num_elements);
+
+  // Write random data
+  {
+    auto const generator = [&gen, &dist]() { return dist(gen); };
+    std::generate(begin(input_data), end(input_data), generator);
+  }
+
+  return input_data;
+}
+
 /**
  * @brief  getTensorSize will return size in bytes
  */
@@ -58,12 +133,12 @@ void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype,
   if (dtype != input_node->dtype())
     throw std::runtime_error("Wrong input type.");
 
-  if (shape.num_dims() != input_node->rank())
+  if (shape.size() != input_node->rank())
     throw std::runtime_error("Input rank mismatch.");
 
-  for (uint32_t i = 0; i < shape.num_dims(); i++)
+  for (uint32_t i = 0; i < shape.size(); i++)
   {
-    if (shape.dim(i) != input_node->dim(i).value())
+    if (not(shape.at(i) == input_node->dim(i)))
       throw std::runtime_error("Input shape mismatch.");
   }
 }
@@ -75,6 +150,8 @@ namespace record_minmax
 
 void RecordMinMax::initialize(const std::string &input_model_path)
 {
+  assert(_threads_size > 0);
+
   // Load model from the file
   std::ifstream fs(input_model_path, std::ifstream::binary);
   if (fs.fail())
@@ -89,105 +166,442 @@ void RecordMinMax::initialize(const std::string &input_model_path)
                                  model_data.size()};
   if (!circle::VerifyModelBuffer(verifier))
   {
-    throw std::runtime_error("ERROR: Failed to verify circle '" + input_model_path + "'");
+    throw std::runtime_error("Failed to verify circle '" + input_model_path + "'");
   }
 
-  _module = luci::Importer().importModule(circle::GetModel(model_data.data()));
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    throw std::runtime_error("Failed to load '" + input_model_path + "'");
+  }
+
+  _module = luci::Importer().importModule(circle_model);
 
   if (_module == nullptr)
   {
-    throw std::runtime_error("ERROR: Failed to load '" + input_model_path + "'");
+    throw std::runtime_error("Failed to load '" + input_model_path + "'");
   }
 
-  // Initialize interpreter
-  _interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+  // Create and initialize interpreters and observers
+  _interpreters.resize(_threads_size);
+  _observers.resize(_threads_size);
+
+  for (uint32_t thread_idx = 0; thread_idx < _threads_size; ++thread_idx)
+  {
+    auto interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+    auto observer = std::make_unique<MinMaxObserver>();
 
-  _observer = std::make_unique<MinMaxObserver>();
+    interpreter->attachObserver(observer.get());
 
-  _interpreter->attachObserver(_observer.get());
+    _observers[thread_idx] = std::move(observer);
+    _interpreters[thread_idx] = std::move(interpreter);
+  }
 }
 
-void RecordMinMax::profileData(const std::string &mode, const std::string &input_data_path,
-                               float min_percentile, float max_percentile)
+// input_data_path is a path to the directory
+// The directory should contain binary files each of which is a raw data,
+// ready to be consumed by the input circle model without any modification
+// TODO reduce duplicate codes with profileRawData
+void RecordMinMax::profileRawDataDirectory(const std::string &input_data_path)
 {
-  HDF5Importer importer(input_data_path);
-  importer.importGroup();
+  struct dirent *entry = nullptr;
+  DIR *dp = nullptr;
+
+  dp = opendir(input_data_path.c_str());
+  if (not dp)
+    throw std::runtime_error("Cannot open directory. Please check \"" + input_data_path +
+                             "\" is a directory.\n");
 
-  bool is_raw_data = importer.isRawData();
+  uint32_t num_records = 0;
+  const auto input_nodes = loco::input_nodes(_module->graph());
+
+  // Get total input size
+  uint32_t total_input_size = 0;
+  for (auto input : input_nodes)
+  {
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
+    checkInputDimension(input_node);
+    total_input_size += getTensorSize(input_node);
+  }
+
+  while ((entry = readdir(dp)))
+  {
+    // Skip if the entry is not a regular file
+    if (entry->d_type != DT_REG)
+      continue;
+
+    const std::string filename = entry->d_name;
+    std::cout << "Recording " << num_records << "'th data" << std::endl;
+
+    // Read data from file to buffer
+    // Assumption: For a multi-input model, the binary file should have inputs concatenated in the
+    // same order with the input index.
+    std::vector<char> input_data(total_input_size);
+    readDataFromFile(input_data_path + "/" + filename, input_data, total_input_size);
+
+    // Write data from buffer to interpreter
+    uint32_t offset = 0;
+    for (auto input : input_nodes)
+    {
+      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
+      const auto input_size = getTensorSize(input_node);
+      getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size);
+
+      offset += input_size;
+    }
+
+    getInterpreter()->interpret();
+
+    num_records++;
+  }
+
+  closedir(dp);
 
-  const auto num_records = importer.numRecords();
   if (num_records == 0)
     throw std::runtime_error("The input data file does not contain any record.");
 
+  std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+
+  _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap());
+}
+
+// input_data_path is a text file which specifies the representative data
+// The text file should contain absolute file path per line.
+// The pointed file should be a binary file containing one representative data,
+// ready to be consumed by the input circle model without any modification
+// NOTE If a model has multiple inputs, the binary file should have inputs concatenated in the same
+// order with the input index of the circle model.
+void RecordMinMax::profileRawData(const std::string &input_data_path)
+{
+  std::ifstream input_file(input_data_path);
+  if (input_file.fail())
+    throw std::runtime_error("Cannot open file \"" + input_data_path + "\".\n");
+
+  std::string record;
+  uint32_t num_records = 0;
   const auto input_nodes = loco::input_nodes(_module->graph());
-  const auto num_inputs = input_nodes.size();
 
-  for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+  // Get total input size
+  uint32_t total_input_size = 0;
+  for (auto input : input_nodes)
+  {
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
+    checkInputDimension(input_node);
+    total_input_size += getTensorSize(input_node);
+  }
+
+  while (getline(input_file, record))
   {
-    if (num_inputs != importer.numInputs(record_idx))
-      throw std::runtime_error("Wrong number of inputs.");
+    std::cout << "Recording " << num_records << "'th data" << std::endl;
 
-    if (record_idx % 100 == 0)
-      std::cout << "Recording " << record_idx << "'th data" << std::endl;
+    // Read data from file to buffer
+    // Assumption: For a multi-input model, the binary file should have inputs concatenated in the
+    // same order with the input index.
+    std::vector<char> input_data(total_input_size);
+    readDataFromFile(record, input_data, total_input_size);
 
-    for (int32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+    // Write data from buffer to interpreter
+    uint32_t offset = 0;
+    for (auto input : input_nodes)
     {
-      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
-      assert(input_node->index() == input_idx);
-      std::vector<char> input_data(getTensorSize(input_node));
+      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
+      const auto input_size = getTensorSize(input_node);
+      getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size);
 
-      if (!is_raw_data)
-      {
-        DataType dtype;
-        Shape shape(input_node->rank());
-        importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data());
+      offset += input_size;
+    }
+
+    getInterpreter()->interpret();
+
+    num_records++;
+  }
+
+  if (num_records == 0)
+    throw std::runtime_error("The input data file does not contain any record.");
+
+  std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+
+  _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap());
+}
+
+WholeOutput RecordMinMax::importH5Data(const std::string &input_data_path)
+{
+  try
+  {
+    dio::hdf5::HDF5Importer importer(input_data_path);
+    importer.importGroup("value");
+
+    bool is_raw_data = importer.isRawData();
+
+    const auto num_records = importer.numData();
+    if (num_records == 0)
+      throw std::runtime_error("The input data file does not contain any record.");
+
+    const auto input_nodes = loco::input_nodes(_module->graph());
+    const auto num_inputs = input_nodes.size();
+
+    WholeOutput whole_output(num_records);
+
+    // Read inputs to whole_output
+    for (int i = 0; i < num_records; ++i)
+    {
+      if (num_inputs != static_cast<uint32_t>(importer.numInputs(i)))
+        throw std::runtime_error("Wrong number of inputs.");
 
-        // Check the type and the shape of the input data is valid
-        verifyTypeShape(input_node, dtype, shape);
+      for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+      {
+        const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+        assert(input_node->index() == input_idx);
+        checkInputDimension(input_node);
+        Buffer input_data(getTensorSize(input_node));
+
+        if (!is_raw_data)
+        {
+          DataType dtype;
+          Shape shape;
+          importer.readTensor(i, input_idx, &dtype, &shape, input_data.data(), input_data.size());
+
+          // Check the type and the shape of the input data is valid
+          verifyTypeShape(input_node, dtype, shape);
+        }
+        else
+        {
+          // Skip type/shape check for raw data
+          importer.readTensor(i, input_idx, input_data.data(), input_data.size());
+        }
+        whole_output[i].emplace_back(std::move(input_data));
       }
-      else
+    }
+
+    return whole_output;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
+  }
+}
+
+void RecordMinMax::profileData(const std::string &input_data_path)
+{
+  try
+  {
+    dio::hdf5::HDF5Importer importer(input_data_path);
+    importer.importGroup("value");
+
+    bool is_raw_data = importer.isRawData();
+
+    const auto num_records = importer.numData();
+    if (num_records == 0)
+      throw std::runtime_error("The input data file does not contain any record.");
+
+    const auto input_nodes = loco::input_nodes(_module->graph());
+    const auto num_inputs = input_nodes.size();
+
+    for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+    {
+      if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
+        throw std::runtime_error("Wrong number of inputs.");
+
+      std::cout << "Recording " << record_idx << "'th data" << std::endl;
+
+      for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
       {
-        // Skip type/shape check for raw data
-        importer.readTensor(record_idx, input_idx, input_data.data());
+        const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+        assert(input_node->index() == input_idx);
+        checkInputDimension(input_node);
+        std::vector<char> input_data(getTensorSize(input_node));
+
+        if (!is_raw_data)
+        {
+          DataType dtype;
+          Shape shape;
+          importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data(),
+                              input_data.size());
+
+          // Check the type and the shape of the input data is valid
+          verifyTypeShape(input_node, dtype, shape);
+        }
+        else
+        {
+          // Skip type/shape check for raw data
+          importer.readTensor(record_idx, input_idx, input_data.data(), input_data.size());
+        }
+
+        // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
+        //       We can redcue the copy by directly writing data from file to interpreter inputs
+        getInterpreter()->writeInputTensor(input_node, input_data.data(), input_data.size());
       }
 
-      // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
-      //       We can redcue the copy by directly writing data from file to interpreter inputs
-      _interpreter->writeInputTensor(input_node, input_data.data(), input_data.size());
+      getInterpreter()->interpret();
     }
 
-    _interpreter->interpret();
+    std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
   }
 
-  std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+  _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap());
+}
+
+void RecordMinMax::profileDataInParallel(const std::string &input_data_path)
+{
+  LOGGER(l);
+
+  assert(_interpreters.size() == _threads_size);
+  assert(_observers.size() == _threads_size);
+
+  const long h5_file_size = getH5FileSize(input_data_path);
+
+  if (h5_file_size > h5_max_size_bytes)
+    throw std::runtime_error("H5 file size is too large for parallel recording");
 
-  auto minmax_map = _observer->minMaxData()->getMap();
-  for (auto iter = minmax_map->begin(); iter != minmax_map->end(); ++iter)
+  WholeOutput whole_output;
+  try
   {
-    auto node = iter->first;
-    auto minmax = iter->second;
+    whole_output = importH5Data(input_data_path);
+  }
+  catch (const std::bad_alloc &e)
+  {
+    throw std::runtime_error("Out of memory during h5 data load.");
+  }
+
+  const auto num_records = whole_output.size();
+  const auto input_nodes = loco::input_nodes(_module->graph());
 
-    float min{0.0f}, max{0.0f};
-    if (mode == "percentile")
+  // Start parallel part
+  INFO(l) << _threads_size << " concurrent threads are supported." << std::endl;
+
+  const auto run_threads = num_records < _threads_size ? num_records : _threads_size;
+
+  const auto records_batch = static_cast<uint32_t>(num_records / run_threads);
+
+  auto interpret_batch = [&whole_output, &input_nodes](int first_record, int last_record,
+                                                       luci_interpreter::Interpreter *interpreter) {
+    for (int record_index = first_record; record_index < last_record; ++record_index)
+    {
+      for (uint32_t input_idx = 0; input_idx < input_nodes.size(); input_idx++)
+      {
+        const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+
+        const auto &cur_input_data = whole_output[record_index][input_idx];
+        interpreter->writeInputTensor(input_node, cur_input_data.data(), cur_input_data.size());
+      }
+      interpreter->interpret();
+    }
+  };
+
+  std::vector<std::thread> threads;
+  for (uint32_t t = 0; t < run_threads; ++t)
+  {
+    if (t < run_threads - 1)
     {
-      min = getNthPercentile(minmax.min_vector, min_percentile);
-      max = getNthPercentile(minmax.max_vector, max_percentile);
+      threads.emplace_back(interpret_batch, records_batch * t, records_batch * (t + 1),
+                           _interpreters[t].get());
     }
-    else if (mode == "moving_average")
+    else
     {
-      min = getMovingAverage(minmax.min_vector, 0.9, 16, true);
-      max = getMovingAverage(minmax.max_vector, 0.9, 16, false);
+      threads.emplace_back(interpret_batch, records_batch * t, num_records, _interpreters[t].get());
     }
-    assert(mode == "percentile" || mode == "moving_average");
-    auto quantparam = std::make_unique<luci::CircleQuantParam>();
-    quantparam->min.push_back(min);
-    quantparam->max.push_back(max);
+  }
+
+  for (uint32_t i = 0; i < run_threads; ++i)
+    threads.at(i).join();
+
+  // End parallel part
+
+  // Copy all min, max values to one min/max map
+  MinMaxMap main_min_max_map;
+
+  for (const auto &obs : _observers)
+  {
+    const auto cur_minmax_map = obs->minMaxData()->getMap();
+    for (auto &iter : *cur_minmax_map)
+    {
+      const auto node = iter.first;
+      const auto &minmax = iter.second;
+
+      main_min_max_map.appendMinMaxVector(node, minmax);
+    }
+  }
+
+  std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+
+  _minmax_computer->update_qparam(main_min_max_map.getMap());
+}
+
+void RecordMinMax::profileDataWithRandomInputs(void)
+{
+  // We use three randomly-generated records
+  const uint32_t num_records = 3;
+
+  const auto input_nodes = loco::input_nodes(_module->graph());
+  const auto num_inputs = input_nodes.size();
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<> dist(-5, 5);
+
+  for (uint32_t record_idx = 0; record_idx < num_records; record_idx++)
+  {
+    std::cout << "Recording " << record_idx << "'th data" << std::endl;
+
+    for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+    {
+      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+      assert(input_node->index() == input_idx);
+      checkInputDimension(input_node);
+
+      const auto num_elements = numElements(input_node);
 
-    assert(node->quantparam() == nullptr);
+      // TODO Support more input data types
+      assert(input_node->dtype() == loco::DataType::FLOAT32 ||
+             input_node->dtype() == loco::DataType::BOOL ||
+             input_node->dtype() == loco::DataType::S32 ||
+             input_node->dtype() == loco::DataType::S64);
 
-    auto mutable_node = const_cast<luci::CircleNode *>(node);
-    mutable_node->quantparam(std::move(quantparam));
+      if (input_node->dtype() == DataType::FLOAT32)
+      {
+        std::vector<float> input_data(num_elements);
+
+        // Write random data
+        for (auto &iter : input_data)
+          iter = static_cast<float>(dist(gen));
+
+        // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
+        //       We can redcue the copy by directly writing data from file to interpreter inputs
+        getInterpreter()->writeInputTensor(input_node, input_data.data(),
+                                           input_data.size() * sizeof(float));
+      }
+      else if (input_node->dtype() == DataType::BOOL)
+      {
+        auto input_data = genRandomBoolData(gen, num_elements);
+        getInterpreter()->writeInputTensor(input_node, input_data.data(),
+                                           input_data.size() * sizeof(uint8_t));
+      }
+      else if (input_node->dtype() == DataType::S32)
+      {
+        auto input_data = genRandomIntData<int32_t>(gen, num_elements, 0, 100);
+        getInterpreter()->writeInputTensor(input_node, input_data.data(),
+                                           input_data.size() * sizeof(int32_t));
+      }
+      else if (input_node->dtype() == DataType::S64)
+      {
+        auto input_data = genRandomIntData<int64_t>(gen, num_elements, 0, 100);
+        getInterpreter()->writeInputTensor(input_node, input_data.data(),
+                                           input_data.size() * sizeof(int64_t));
+      }
+    }
+
+    getInterpreter()->interpret();
   }
+
+  std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+
+  _minmax_computer->update_qparam(getObserver()->minMaxData()->getMap());
 }
 
 void RecordMinMax::saveModel(const std::string &output_model_path)
@@ -199,7 +613,7 @@ void RecordMinMax::saveModel(const std::string &output_model_path)
 
   if (!exporter.invoke(&contract))
   {
-    throw std::runtime_error("ERROR: Failed to export '" + output_model_path + "'");
+    throw std::runtime_error("Failed to export '" + output_model_path + "'");
   }
 }
 
diff --git a/compiler/record-minmax/tests/MinMaxComputer.test.cpp b/compiler/record-minmax/tests/MinMaxComputer.test.cpp
new file mode 100644
index 000000000..41babc27d
--- /dev/null
+++ b/compiler/record-minmax/tests/MinMaxComputer.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MinMaxComputer.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <unordered_map>
+
+#include <gtest/gtest.h>
+
+using namespace record_minmax;
+
+TEST(MinMaxComputerTest, percentile)
+{
+  auto computer = make_percentile_computer(0.0, 100.0);
+
+  luci::CircleAdd node;
+  MinMaxVectors minmax;
+  {
+    minmax.min_vector = {1.0, 2.0, 3.0};
+    minmax.max_vector = {4.0, 5.0, 6.0};
+  }
+  std::unordered_map<const luci::CircleNode *, MinMaxVectors> min_max_map;
+  min_max_map.insert({&node, minmax});
+
+  computer->update_qparam(&min_max_map);
+
+  EXPECT_TRUE(node.quantparam() != nullptr);
+}
+
+TEST(MinMaxComputerTest, percentile_nullptr_NEG)
+{
+  auto computer = make_percentile_computer(0.0, 100.0);
+
+  EXPECT_ANY_THROW(computer->update_qparam(nullptr));
+}
+
+TEST(MinMaxComputerTest, moving_avg)
+{
+  auto computer = make_moving_avg_computer(1, 0.99);
+
+  luci::CircleAdd node;
+  MinMaxVectors minmax;
+  {
+    minmax.min_vector = {1.0, 2.0, 3.0};
+    minmax.max_vector = {4.0, 5.0, 6.0};
+  }
+  std::unordered_map<const luci::CircleNode *, MinMaxVectors> min_max_map;
+  min_max_map.insert({&node, minmax});
+
+  computer->update_qparam(&min_max_map);
+
+  EXPECT_TRUE(node.quantparam() != nullptr);
+}
+
+TEST(MinMaxComputerTest, moving_avg_nullptr_NEG)
+{
+  auto computer = make_moving_avg_computer(1, 0.99);
+
+  EXPECT_ANY_THROW(computer->update_qparam(nullptr));
+}
diff --git a/compiler/record-minmax/tests/RecordFunction.test.cpp b/compiler/record-minmax/tests/RecordFunction.test.cpp
index e2f135a4e..0d8632254 100644
--- a/compiler/record-minmax/tests/RecordFunction.test.cpp
+++ b/compiler/record-minmax/tests/RecordFunction.test.cpp
@@ -115,4 +115,12 @@ TEST(GetNthPercentileTest, EmptyVector_NEG)
   SUCCEED();
 }
 
+TEST(GetMovingAverageTest, Simple)
+{
+  std::vector<float> input{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+  EXPECT_NE(0, getMovingAverage(input, 0.5, 4, true));
+  EXPECT_NE(0, getMovingAverage(input, 0.5, 4, false));
+}
+
 } // namespace record_minmax
diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt
index ca7eddc6f..8dcf4c2b8 100644
--- a/compiler/souschef/CMakeLists.txt
+++ b/compiler/souschef/CMakeLists.txt
@@ -1,13 +1,20 @@
 nnas_find_package(Protobuf QUIET)
+nnas_find_package(Fp16Source QUIET)
 
 if(NOT Protobuf_FOUND)
-  message(STATUS "Build souschef: FAILED (missing Protobuf")
+  message(STATUS "Build souschef: FAILED (missing Protobuf)")
   return()
 endif(NOT Protobuf_FOUND)
 
+if(NOT Fp16Source_FOUND)
+  message(STATUS "Build souschef: FAILED (missing Fp16Source)")
+  return()
+endif(NOT Fp16Source_FOUND)
+
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_library(souschef STATIC ${SOURCES})
 set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(souschef PRIVATE ${Fp16Source_DIR}/include)
 target_include_directories(souschef PUBLIC include)
 target_link_libraries(souschef PUBLIC libprotobuf)
diff --git a/compiler/souschef/include/souschef/Data/Explicit.h b/compiler/souschef/include/souschef/Data/Explicit.h
index 6e5ee819e..434d0ec2c 100644
--- a/compiler/souschef/include/souschef/Data/Explicit.h
+++ b/compiler/souschef/include/souschef/Data/Explicit.h
@@ -59,6 +59,27 @@ private:
   std::vector<T> _values;
 };
 
+template <> class ExplicitDataChef<std::string> final : public DataChef
+{
+public:
+  ExplicitDataChef()
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<uint8_t> generate(int32_t count) const override;
+
+public:
+  void insert(const std::string &value) { _values.emplace_back(value); }
+
+private:
+  void write_value(std::vector<uint8_t> &res, int32_t value) const;
+
+private:
+  std::vector<std::string> _values;
+};
+
 template <typename T> struct ExplicitDataChefFactory : public DataChefFactory
 {
   std::unique_ptr<DataChef> create(const Arguments &args) const
@@ -75,6 +96,41 @@ template <typename T> struct ExplicitDataChefFactory : public DataChefFactory
   }
 };
 
+class ExplicitFloat16DataChef final : public DataChef
+{
+public:
+  ExplicitFloat16DataChef()
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<uint8_t> generate(int32_t count) const override;
+
+public:
+  void insert(const float &value) { _values.emplace_back(value); }
+
+private:
+  // NOTE store values in float but will convert to uint16_t in generate()
+  std::vector<float> _values;
+};
+
+struct ExplicitFloat16DataChefFactory : public DataChefFactory
+{
+  std::unique_ptr<DataChef> create(const Arguments &args) const
+  {
+    std::unique_ptr<ExplicitFloat16DataChef> res{new ExplicitFloat16DataChef};
+
+    for (uint32_t n = 0; n < args.count(); ++n)
+    {
+      auto const value = to_number<float>(args.value(n));
+      res->insert(value);
+    }
+
+    return std::move(res);
+  }
+};
+
 } // namespace souschef
 
 #endif // __SOUSCHEF_DATA_EXPLICIT_H__
diff --git a/compiler/souschef/include/souschef/Data/Gaussian.h b/compiler/souschef/include/souschef/Data/Gaussian.h
index 75570e0b8..801bff8e9 100644
--- a/compiler/souschef/include/souschef/Data/Gaussian.h
+++ b/compiler/souschef/include/souschef/Data/Gaussian.h
@@ -41,6 +41,22 @@ private:
   float _stddev;
 };
 
+class GaussianFloat16DataChef final : public DataChef
+{
+public:
+  GaussianFloat16DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+  float _mean;
+  float _stddev;
+};
+
 class GaussianInt32DataChef final : public DataChef
 {
 public:
@@ -57,6 +73,22 @@ private:
   float _stddev;
 };
 
+class GaussianInt16DataChef final : public DataChef
+{
+public:
+  GaussianInt16DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+  float _mean;
+  float _stddev;
+};
+
 class GaussianUint8DataChef final : public DataChef
 {
 public:
@@ -73,6 +105,22 @@ private:
   float _stddev;
 };
 
+class GaussianInt8DataChef final : public DataChef
+{
+public:
+  GaussianInt8DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+  float _mean;
+  float _stddev;
+};
+
 struct GaussianFloat32DataChefFactory : public DataChefFactory
 {
   std::unique_ptr<DataChef> create(const Arguments &args) const;
@@ -83,11 +131,26 @@ struct GaussianInt32DataChefFactory : public DataChefFactory
   std::unique_ptr<DataChef> create(const Arguments &args) const;
 };
 
+struct GaussianInt16DataChefFactory : public DataChefFactory
+{
+  std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
 struct GaussianUint8DataChefFactory : public DataChefFactory
 {
   std::unique_ptr<DataChef> create(const Arguments &args) const;
 };
 
+struct GaussianFloat16DataChefFactory : public DataChefFactory
+{
+  std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
+struct GaussianInt8DataChefFactory : public DataChefFactory
+{
+  std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
 } // namespace souschef
 
 #endif // __SOUSCHEF_DATA_GAUSSIAN_H__
diff --git a/compiler/souschef/include/souschef/DataChef.def b/compiler/souschef/include/souschef/DataChef.def
deleted file mode 100644
index 28901db18..000000000
--- a/compiler/souschef/include/souschef/DataChef.def
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef DATA_CHEF
-#error "Define DATA_CHEF first"
-#endif // DATA_CHEF
-
-// DATA_CHEF(TYPE, NAME, FACTORY_CLASS)
-//  "TYPE" SHOULD BE an enum tag of tflchef::TensorType
-DATA_CHEF(FLOAT32, constant, ConstantDataChefFactory<float>)
-DATA_CHEF(BOOL, constant, ConstantDataChefFactory<bool>)
-DATA_CHEF(UINT8, constant, ConstantDataChefFactory<uint8_t>)
-DATA_CHEF(INT32, constant, ConstantDataChefFactory<int32_t>)
-DATA_CHEF(INT64, constant, ConstantDataChefFactory<int64_t>)
-DATA_CHEF(INT64, explicit, ExplicitDataChefFactory<int64_t>)
-DATA_CHEF(INT32, explicit, ExplicitDataChefFactory<int32_t>)
-DATA_CHEF(UINT8, explicit, ExplicitDataChefFactory<uint8_t>)
-DATA_CHEF(BOOL, explicit, ExplicitDataChefFactory<bool>)
-DATA_CHEF(FLOAT32, explicit, ExplicitDataChefFactory<float>)
-DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory)
-DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory)
-DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory)
diff --git a/compiler/souschef/include/souschef/Dims.h b/compiler/souschef/include/souschef/Dims.h
index 52c64dd47..fabbf3f95 100644
--- a/compiler/souschef/include/souschef/Dims.h
+++ b/compiler/souschef/include/souschef/Dims.h
@@ -17,6 +17,7 @@
 #ifndef __SOUSCHEF_DIMS_H__
 #define __SOUSCHEF_DIMS_H__
 
+#include <cstdint>
 #include <functional>
 #include <numeric>
 #include <vector>
diff --git a/compiler/souschef/src/Explicit.cpp b/compiler/souschef/src/Explicit.cpp
new file mode 100644
index 000000000..3278ae3c3
--- /dev/null
+++ b/compiler/souschef/src/Explicit.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "souschef/Data/Explicit.h"
+
+#include <string>
+#include <vector>
+
+#include <fp16.h>
+
+namespace souschef
+{
+
+/**
+ * @note This emulates TensorFlow int DynamicBuffer::WriteToBuffer(char** buffer) method
+ *       Memory structure:
+ *         int32_t count
+ *         int32_t offsets[count + 1]
+ *         string values[count]
+ *       where string is like std::string without ending null byte
+ */
+std::vector<uint8_t> ExplicitDataChef<std::string>::generate(int32_t count) const
+{
+  std::vector<uint8_t> res;
+
+  // write count
+  write_value(res, count);
+
+  // write first item offset
+  int32_t start = sizeof(int32_t) * (count + 2);
+  write_value(res, start);
+
+  // write succeeding items offset (or the end)
+  int32_t offset = start;
+  for (uint32_t n = 0; n < count; ++n)
+  {
+    std::string const value = (n < _values.size()) ? _values.at(n) : std::string{};
+    offset += value.length();
+    write_value(res, offset);
+  }
+
+  for (uint32_t n = 0; n < count; ++n)
+  {
+    std::string const value = (n < _values.size()) ? _values.at(n) : std::string{};
+    const uint8_t *arr = reinterpret_cast<const uint8_t *>(value.c_str());
+
+    for (uint32_t b = 0; b < value.length(); ++b)
+    {
+      res.emplace_back(arr[b]);
+    }
+  }
+
+  return res;
+}
+
+void ExplicitDataChef<std::string>::write_value(std::vector<uint8_t> &res, int32_t value) const
+{
+  const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value);
+
+  for (uint32_t b = 0; b < sizeof(int32_t); ++b)
+  {
+    res.emplace_back(arr[b]);
+  }
+}
+
+std::vector<uint8_t> ExplicitFloat16DataChef::generate(int32_t count) const
+{
+  std::vector<uint8_t> res;
+
+  for (uint32_t n = 0; n < count; ++n)
+  {
+    float const fvalue = (n < _values.size()) ? _values.at(n) : 0.0;
+    uint16_t const value = fp16_ieee_from_fp32_value(fvalue);
+    auto const arr = reinterpret_cast<const uint8_t *>(&value);
+
+    for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
+    {
+      res.emplace_back(arr[b]);
+    }
+  }
+
+  return res;
+}
+
+} // namespace souschef
diff --git a/compiler/souschef/src/Gaussian.cpp b/compiler/souschef/src/Gaussian.cpp
index 4a5083d8e..71ab56792 100644
--- a/compiler/souschef/src/Gaussian.cpp
+++ b/compiler/souschef/src/Gaussian.cpp
@@ -22,26 +22,32 @@
 
 #include <cassert>
 #include <stdexcept>
+#include <limits> // std::numeric_limits
+
+#include <fp16.h>
 
 namespace souschef
 {
 
-std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
+template <typename T>
+static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float stddev,
+                                              std::minstd_rand::result_type seed)
 {
-  // TODO Support seed value override
-  auto seed = std::chrono::system_clock::now().time_since_epoch().count();
-
   std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
-  std::normal_distribution<float> dist{_mean, _stddev};
+  std::normal_distribution<float> dist{mean, stddev};
 
   std::vector<uint8_t> res;
 
+  constexpr float max_cap = std::numeric_limits<T>::max();
+  constexpr float min_cap = std::numeric_limits<T>::lowest();
   for (uint32_t n = 0; n < count; ++n)
   {
-    auto const value = dist(rand);
+    float raw_value = dist(rand);
+    const float capped_value = std::max(min_cap, std::min(max_cap, raw_value));
+    auto const value = static_cast<T>(capped_value);
     auto const arr = reinterpret_cast<const uint8_t *>(&value);
 
-    for (uint32_t b = 0; b < sizeof(float); ++b)
+    for (uint32_t b = 0; b < sizeof(T); ++b)
     {
       res.emplace_back(arr[b]);
     }
@@ -50,22 +56,42 @@ std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
   return res;
 }
 
-std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
+template <typename T>
+static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float stddev)
 {
-  // TODO Support seed value override
-  auto seed = std::chrono::system_clock::now().time_since_epoch().count();
+  auto time_stamp = std::chrono::system_clock::now().time_since_epoch().count();
+
+  // Note this is implementation defined, change if needed.
+  auto seed = static_cast<std::minstd_rand::result_type>(time_stamp);
+
+  return generate_gaussian<T>(count, mean, stddev, seed);
+}
+
+std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
+{
+  return generate_gaussian<float>(count, _mean, _stddev);
+}
+
+std::vector<uint8_t> GaussianFloat16DataChef::generate(int32_t count) const
+{
+  auto time_stamp = std::chrono::system_clock::now().time_since_epoch().count();
+  auto seed = static_cast<std::minstd_rand::result_type>(time_stamp);
 
   std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
   std::normal_distribution<float> dist{_mean, _stddev};
 
   std::vector<uint8_t> res;
 
+  constexpr float max_cap = 1e9;
+  constexpr float min_cap = -1e9;
   for (uint32_t n = 0; n < count; ++n)
   {
-    auto const value = static_cast<int32_t>(dist(rand));
+    float raw_value = dist(rand);
+    const float capped_value = std::max(min_cap, std::min(max_cap, raw_value));
+    const uint16_t value = fp16_ieee_from_fp32_value(capped_value);
     auto const arr = reinterpret_cast<const uint8_t *>(&value);
 
-    for (uint32_t b = 0; b < sizeof(int32_t); ++b)
+    for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
     {
       res.emplace_back(arr[b]);
     }
@@ -74,28 +100,24 @@ std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
   return res;
 }
 
-std::vector<uint8_t> GaussianUint8DataChef::generate(int32_t count) const
+std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
 {
-  // TODO Support seed value override
-  auto seed = std::chrono::system_clock::now().time_since_epoch().count();
-
-  std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
-  std::normal_distribution<float> dist{_mean, _stddev};
-
-  std::vector<uint8_t> res;
+  return generate_gaussian<int32_t>(count, _mean, _stddev);
+}
 
-  for (uint32_t n = 0; n < count; ++n)
-  {
-    auto const value = static_cast<uint8_t>(dist(rand));        // uint8_t for data type
-    auto const arr = reinterpret_cast<const uint8_t *>(&value); // uint8_t for byte streaming
+std::vector<uint8_t> GaussianInt16DataChef::generate(int32_t count) const
+{
+  return generate_gaussian<int16_t>(count, _mean, _stddev);
+}
 
-    for (uint32_t b = 0; b < sizeof(uint8_t); ++b)
-    {
-      res.emplace_back(arr[b]);
-    }
-  }
+std::vector<uint8_t> GaussianUint8DataChef::generate(int32_t count) const
+{
+  return generate_gaussian<uint8_t>(count, _mean, _stddev);
+}
 
-  return res;
+std::vector<uint8_t> GaussianInt8DataChef::generate(int32_t count) const
+{
+  return generate_gaussian<int8_t>(count, _mean, _stddev);
 }
 
 std::unique_ptr<DataChef> GaussianFloat32DataChefFactory::create(const Arguments &args) const
@@ -124,6 +146,19 @@ std::unique_ptr<DataChef> GaussianInt32DataChefFactory::create(const Arguments &
   return std::unique_ptr<DataChef>{new GaussianInt32DataChef{mean, stddev}};
 }
 
+std::unique_ptr<DataChef> GaussianInt16DataChefFactory::create(const Arguments &args) const
+{
+  if (args.count() != 2)
+  {
+    throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+  }
+
+  auto const mean = to_number<float>(args.value(0));
+  auto const stddev = to_number<float>(args.value(1));
+
+  return std::unique_ptr<DataChef>{new GaussianInt16DataChef{mean, stddev}};
+}
+
 std::unique_ptr<DataChef> GaussianUint8DataChefFactory::create(const Arguments &args) const
 {
   if (args.count() != 2)
@@ -137,4 +172,30 @@ std::unique_ptr<DataChef> GaussianUint8DataChefFactory::create(const Arguments &
   return std::unique_ptr<DataChef>{new GaussianUint8DataChef{mean, stddev}};
 }
 
+std::unique_ptr<DataChef> GaussianInt8DataChefFactory::create(const Arguments &args) const
+{
+  if (args.count() != 2)
+  {
+    throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+  }
+
+  auto const mean = to_number<float>(args.value(0));
+  auto const stddev = to_number<float>(args.value(1));
+
+  return std::unique_ptr<DataChef>{new GaussianInt8DataChef{mean, stddev}};
+}
+
+std::unique_ptr<DataChef> GaussianFloat16DataChefFactory::create(const Arguments &args) const
+{
+  if (args.count() != 2)
+  {
+    throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+  }
+
+  auto const mean = to_number<float>(args.value(0));
+  auto const stddev = to_number<float>(args.value(1));
+
+  return std::unique_ptr<DataChef>{new GaussianFloat16DataChef{mean, stddev}};
+}
+
 } // namespace souschef
diff --git a/compiler/souschef/src/LexicalCast.cpp b/compiler/souschef/src/LexicalCast.cpp
index 8e3d4cbbb..4468f1ec1 100644
--- a/compiler/souschef/src/LexicalCast.cpp
+++ b/compiler/souschef/src/LexicalCast.cpp
@@ -18,12 +18,25 @@
 
 #include <cassert>
 #include <limits>
+#include <stdexcept>
 
 namespace souschef
 {
 
 template <> float to_number(const std::string &s) { return std::stof(s); }
 template <> int to_number(const std::string &s) { return std::stoi(s); }
+template <> int16_t to_number(const std::string &s)
+{
+  // There are no standard function to parse int16_t or short int
+  // This function simulates behavior similar stoi, stol and stoll
+  int res = std::stol(s);
+  // standard does not specify string in error message, this is arbitrary
+  if (res < std::numeric_limits<int16_t>::min() || res > std::numeric_limits<int16_t>::max())
+  {
+    throw std::out_of_range("to_number<int16_t>");
+  }
+  return res;
+}
 template <> int64_t to_number(const std::string &s) { return std::stoll(s); }
 template <> uint8_t to_number(const std::string &s)
 {
@@ -32,11 +45,21 @@ template <> uint8_t to_number(const std::string &s)
   assert(temp <= std::numeric_limits<uint8_t>::max());
   return static_cast<uint8_t>(temp);
 }
+template <> int8_t to_number(const std::string &s)
+{
+  int temp = std::stoi(s);
+  assert(temp >= std::numeric_limits<int8_t>::min());
+  assert(temp <= std::numeric_limits<int8_t>::max());
+  return static_cast<int8_t>(temp);
+}
 template <> bool to_number(const std::string &s)
 {
-  if (std::stoi(s) || s == "T" || s == "t" || s == "TRUE" || s == "true")
+  if (s == "T" || s == "t" || s == "TRUE" || s == "true" || s == "1")
     return true;
-  return false;
+  if (s == "F" || s == "f" || s == "FALSE" || s == "false" || s == "0")
+    return false;
+  throw std::invalid_argument("Unsupported boolean argument");
 }
+template <> std::string to_number(const std::string &s) { return s; }
 
 } // namespace souschef
diff --git a/compiler/stdex/CMakeLists.txt b/compiler/stdex/CMakeLists.txt
deleted file mode 100644
index 91f07e69f..000000000
--- a/compiler/stdex/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-file(GLOB_RECURSE TESTS "src/*.test.cpp")
-
-add_library(stdex INTERFACE)
-target_include_directories(stdex INTERFACE include)
-
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-# Google Test is mandatory for test
-nnas_find_package(GTest REQUIRED)
-
-add_executable(stdex_test ${TESTS})
-target_link_libraries(stdex_test stdex)
-target_link_libraries(stdex_test gtest_main)
-add_test(stdex_test stdex_test)
diff --git a/compiler/stdex/README.md b/compiler/stdex/README.md
deleted file mode 100644
index 054d08569..000000000
--- a/compiler/stdex/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# stdex
-
-`stdex` is an extension over standard C++ libraries.
-
-# How to use
-
-Please read each header files.
-
-One example of `stdex::make_unique(..)` in `compiler/stdex/Memory.h` is as follows:
-
-```cpp
-#include <stdex/Memory.h>
-
-using stdex::make_unique;
-
-class A { ... };
-
-...
-
-std::unique_ptr<A> a = make_unique<A>(); // Note: std::make_unique is not supported in C++ 11
-
-```
diff --git a/compiler/stdex/include/stdex/Memory.h b/compiler/stdex/include/stdex/Memory.h
deleted file mode 100644
index 86751f073..000000000
--- a/compiler/stdex/include/stdex/Memory.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_MEMORY_H__
-#define __STDEX_MEMORY_H__
-
-#include <memory>
-
-namespace stdex
-{
-
-using std::make_unique;
-
-} // namespace stdex
-
-#endif // __STDEX_MEMORY_H__
diff --git a/compiler/stdex/include/stdex/Queue.h b/compiler/stdex/include/stdex/Queue.h
deleted file mode 100644
index c72297bc8..000000000
--- a/compiler/stdex/include/stdex/Queue.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_QUEUE_H__
-#define __STDEX_QUEUE_H__
-
-#include <queue>
-
-namespace stdex
-{
-
-/**
- * @brief Take the front (= first) element from the queue
- * @note The queue SHOULD have at least one element
- */
-template <typename T> T take(std::queue<T> &q)
-{
-  auto res = q.front();
-  q.pop();
-  return res;
-}
-
-} // namespace stdex
-
-#endif // __STDEX_QUEUE_H__
diff --git a/compiler/stdex/include/stdex/Set.h b/compiler/stdex/include/stdex/Set.h
deleted file mode 100644
index 2c61e0d01..000000000
--- a/compiler/stdex/include/stdex/Set.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __STDEX_SET_H__
-#define __STDEX_SET_H__
-
-#include <set>
-
-template <typename T> bool operator==(const std::set<T> &lhs, const std::set<T> &rhs)
-{
-  if (rhs.size() != lhs.size())
-  {
-    return false;
-  }
-
-  for (const auto &element : lhs)
-  {
-    if (rhs.find(element) == rhs.end())
-    {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-template <typename T> std::set<T> operator-(const std::set<T> &lhs, const std::set<T> &rhs)
-{
-  std::set<T> res;
-
-  for (const auto &element : lhs)
-  {
-    if (rhs.find(element) == rhs.end())
-    {
-      res.insert(element);
-    }
-  }
-
-  return res;
-}
-
-#endif // __STDEX_SET_H__
diff --git a/compiler/stdex/src/Memory.test.cpp b/compiler/stdex/src/Memory.test.cpp
deleted file mode 100644
index 433af4534..000000000
--- a/compiler/stdex/src/Memory.test.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Memory.h"
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-struct Stat
-{
-  unsigned allocated = 0;
-  unsigned freed = 0;
-};
-
-struct Counter
-{
-public:
-  Counter(Stat *stat) : _stat{stat} { _stat->allocated += 1; }
-
-public:
-  ~Counter() { _stat->freed += 1; }
-
-private:
-  Stat *_stat;
-};
-
-} // namespace
-
-TEST(MemoryTest, make_unique)
-{
-  Stat stat;
-
-  ASSERT_EQ(stat.allocated, 0);
-  ASSERT_EQ(stat.freed, 0);
-
-  auto o = stdex::make_unique<::Counter>(&stat);
-
-  ASSERT_EQ(stat.allocated, 1);
-  ASSERT_EQ(stat.freed, 0);
-
-  o.reset();
-
-  ASSERT_EQ(stat.allocated, 1);
-  ASSERT_EQ(stat.freed, 1);
-}
diff --git a/compiler/stdex/src/Queue.test.cpp b/compiler/stdex/src/Queue.test.cpp
deleted file mode 100644
index d76cd3ee6..000000000
--- a/compiler/stdex/src/Queue.test.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Queue.h"
-
-#include <gtest/gtest.h>
-
-TEST(QueueTest, take)
-{
-  std::queue<int> q;
-
-  q.emplace(3);
-  q.emplace(4);
-  q.emplace(5);
-
-  ASSERT_EQ(stdex::take(q), 3);
-  ASSERT_EQ(stdex::take(q), 4);
-  ASSERT_EQ(stdex::take(q), 5);
-}
diff --git a/compiler/stdex/src/Set.test.cpp b/compiler/stdex/src/Set.test.cpp
deleted file mode 100644
index 90361936f..000000000
--- a/compiler/stdex/src/Set.test.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "stdex/Set.h"
-
-#include <gtest/gtest.h>
-
-TEST(SET, operator_eq)
-{
-  ASSERT_TRUE(std::set<int>({1, 2, 3}) == std::set<int>({1, 2, 3}));
-  ASSERT_FALSE(std::set<int>({1, 3}) == std::set<int>({1, 2, 3}));
-}
-
-TEST(SET, operator_diff)
-{
-  const std::set<int> lhs{1, 2, 3};
-  const std::set<int> rhs{2, 4};
-
-  auto res = lhs - rhs;
-
-  ASSERT_EQ(res.size(), 2);
-  ASSERT_NE(res.find(1), res.end());
-  ASSERT_NE(res.find(3), res.end());
-}
diff --git a/compiler/tf2circle-conversion-test/CMakeLists.txt b/compiler/tf2circle-conversion-test/CMakeLists.txt
index 27f2463f3..79a39873b 100644
--- a/compiler/tf2circle-conversion-test/CMakeLists.txt
+++ b/compiler/tf2circle-conversion-test/CMakeLists.txt
@@ -128,6 +128,10 @@ list(APPEND TEST_DEPS "${TEST_CONFIG}")
 # This "tf2circle_conversion_test_deps" target enforces CMake to generate all the dependencies during "build" phase
 add_custom_target(tf2circle_conversion_test_deps ALL DEPENDS ${TEST_DEPS})
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 # Run tests
 add_test(
   NAME tf2circle_conversion_test
diff --git a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
index 48b098e24..83596fade 100644
--- a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
+++ b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
@@ -132,6 +132,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}")
 # Generate dependencies
 add_custom_target(tf2circle_dredd_pb_deps ALL DEPENDS ${DEPS})
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 add_test(
   NAME tf2circle_dredd_pb_test
   COMMAND
diff --git a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
index 789e58535..427e57502 100644
--- a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
@@ -175,6 +175,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}")
 # Generate dependencies
 add_custom_target(tf2circle_dredd_pbtxt_deps ALL DEPENDS ${DEPS})
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 add_test(
   NAME tf2circle_dredd_pbtxt_test
   COMMAND
diff --git a/compiler/tf2circle-model-test/CMakeLists.txt b/compiler/tf2circle-model-test/CMakeLists.txt
index 2fb82236a..ad776a62b 100644
--- a/compiler/tf2circle-model-test/CMakeLists.txt
+++ b/compiler/tf2circle-model-test/CMakeLists.txt
@@ -100,6 +100,10 @@ list(APPEND DEPS "${TEST_RUNNER_SCRIPT}")
 ### Generate dependencies
 add_custom_target(tf2circle_model_test_deps ALL DEPENDS ${DEPS})
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 # NOTE This target is not built by default
 add_test(
   NAME tf2circle_model_test
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt b/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt
index 852018e64..f0ba92177 100644
--- a/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt
+++ b/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt
@@ -140,7 +140,7 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TEST_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TEST_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TEST_CONFIG}
-  COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh\"' >> ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py\"' >> ${TEST_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E echo 'RUNTIME_LIBRARY_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/Product/out/\"' >> ${TEST_CONFIG}
   DEPENDS
     nnkit-run
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/testall.sh b/compiler/tf2circle-value-pbtxt-remote-test/testall.sh
index c80b00a14..56ef0708e 100755
--- a/compiler/tf2circle-value-pbtxt-remote-test/testall.sh
+++ b/compiler/tf2circle-value-pbtxt-remote-test/testall.sh
@@ -102,7 +102,7 @@ while [[ $# -ne 0 ]]; do
       --post-arg "${WORKDIR}/${PREFIX}.expected.h5"
 
     # Generate nnpackage model
-    "${MODEL2NNPKG_PATH}" -o "${WORKDIR}" "${WORKDIR}/${PREFIX}.circle"
+    "${MODEL2NNPKG_PATH}" -o "${WORKDIR}" -m "${WORKDIR}/${PREFIX}.circle"
 
     # Copy h5 files into nnpackage
     mkdir -p "${WORKDIR}/${PREFIX}/metadata/tc"
diff --git a/compiler/tf2circle/CMakeLists.txt b/compiler/tf2circle/CMakeLists.txt
index 549f731a4..8678e90b4 100644
--- a/compiler/tf2circle/CMakeLists.txt
+++ b/compiler/tf2circle/CMakeLists.txt
@@ -40,7 +40,6 @@ target_link_libraries(tf2circle PRIVATE tfinfo)
 target_link_libraries(tf2circle PRIVATE exo)
 target_link_libraries(tf2circle PRIVATE locop)
 target_link_libraries(tf2circle PRIVATE hermes_std)
-target_link_libraries(tf2circle PRIVATE stdex)
 target_link_libraries(tf2circle PRIVATE angkor cwrap)
 target_link_libraries(tf2circle PRIVATE tf2circle_customop_info_proto)
 
diff --git a/compiler/tf2circle/requires.cmake b/compiler/tf2circle/requires.cmake
index 68d45bf3a..87ea50bf7 100644
--- a/compiler/tf2circle/requires.cmake
+++ b/compiler/tf2circle/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
 require("hermes-std")
 require("moco-tf")
 require("exo")
diff --git a/compiler/tf2circle/src/CustomopConfLoader.cpp b/compiler/tf2circle/src/CustomopConfLoader.cpp
index 412405893..0520ad0d3 100644
--- a/compiler/tf2circle/src/CustomopConfLoader.cpp
+++ b/compiler/tf2circle/src/CustomopConfLoader.cpp
@@ -27,6 +27,8 @@
 
 #include <fcntl.h>
 
+#include <limits> // std::numeric_limits
+
 namespace
 {
 bool load_text(const cwrap::Fildes &fildes, tf2circle::CustomOpInfoDef &def)
diff --git a/compiler/tf2circle/src/tf2circle.cpp b/compiler/tf2circle/src/tf2circle.cpp
index a1160e968..b4d21133d 100644
--- a/compiler/tf2circle/src/tf2circle.cpp
+++ b/compiler/tf2circle/src/tf2circle.cpp
@@ -28,10 +28,8 @@
 #include <hermes/ConsoleReporter.h>
 #include <hermes/EnvConfig.h>
 
-#include <stdex/Memory.h>
-
 #include <cassert>
-
+#include <memory>
 #include <iostream>
 #include <stdexcept>
 #include <string>
@@ -70,8 +68,8 @@ struct LoggingContext
     if (ctx == nullptr)
     {
       ctx = new hermes::Context;
-      ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-      ctx->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log"));
+      ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+      ctx->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log"));
     }
 
     return ctx;
@@ -133,9 +131,9 @@ int EntryFunctor::operator()(int argc, char **argv) const
   using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
 
   // This line allows users to control all the moco-tf loggers via TF2CIRCLE_Log_Frontend
-  moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log_Frontend"));
+  moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log_Frontend"));
   // This line allows users to control all the exo-circle loggers via TF2CIRCLE_Log_Backend
-  exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2CIRCLE_Log_Backend"));
+  exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2CIRCLE_Log_Backend"));
 
   LOGGER(l);
 
diff --git a/compiler/tf2nnpkg/CMakeLists.txt b/compiler/tf2nnpkg/CMakeLists.txt
index 8e1edf858..b81f40646 100644
--- a/compiler/tf2nnpkg/CMakeLists.txt
+++ b/compiler/tf2nnpkg/CMakeLists.txt
@@ -30,6 +30,5 @@ target_link_libraries(tf2nnpkg PRIVATE tfinfo)
 target_link_libraries(tf2nnpkg PRIVATE exo)
 target_link_libraries(tf2nnpkg PRIVATE locop)
 target_link_libraries(tf2nnpkg PRIVATE hermes_std)
-target_link_libraries(tf2nnpkg PRIVATE stdex)
 target_link_libraries(tf2nnpkg PRIVATE angkor cwrap)
 install(TARGETS tf2nnpkg DESTINATION bin)
diff --git a/compiler/tf2nnpkg/requires.cmake b/compiler/tf2nnpkg/requires.cmake
index 68d45bf3a..87ea50bf7 100644
--- a/compiler/tf2nnpkg/requires.cmake
+++ b/compiler/tf2nnpkg/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
 require("hermes-std")
 require("moco-tf")
 require("exo")
diff --git a/compiler/tf2nnpkg/src/tf2nnpkg.cpp b/compiler/tf2nnpkg/src/tf2nnpkg.cpp
index d9a0d9d2f..548cee61f 100644
--- a/compiler/tf2nnpkg/src/tf2nnpkg.cpp
+++ b/compiler/tf2nnpkg/src/tf2nnpkg.cpp
@@ -28,8 +28,7 @@
 #include <hermes/ConsoleReporter.h>
 #include <hermes/EnvConfig.h>
 
-#include <stdex/Memory.h>
-
+#include <memory>
 #include <iostream>
 #include <fstream>
 #include <functional>
@@ -71,8 +70,8 @@ struct LoggingContext
     if (ctx == nullptr)
     {
       ctx = new hermes::Context;
-      ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-      ctx->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log"));
+      ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+      ctx->config(std::make_unique<EnvConfig>("TF2NNPKG_Log"));
     }
 
     return ctx;
@@ -148,9 +147,9 @@ int EntryFunctor::operator()(int argc, char **argv) const
   using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
 
   // This line allows users to control all the moco-tf loggers via TF2NNPKG_Log_Frontend
-  moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log_Frontend"));
+  moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2NNPKG_Log_Frontend"));
   // This line allows users to control all the exo-circle loggers via TF2NNPKG_Log_Backend
-  exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2NNPKG_Log_Backend"));
+  exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2NNPKG_Log_Backend"));
 
   LOGGER(l);
 
diff --git a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
index b75c50772..ac9f14d70 100644
--- a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
+++ b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
index 87cf7836f..95a296ef8 100644
--- a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-value-pb-test/CMakeLists.txt b/compiler/tf2tflite-value-pb-test/CMakeLists.txt
index 41974f72c..a6c451e0b 100644
--- a/compiler/tf2tflite-value-pb-test/CMakeLists.txt
+++ b/compiler/tf2tflite-value-pb-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
index 2e76e21d3..fde3e60b4 100644
--- a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite/CMakeLists.txt b/compiler/tf2tflite/CMakeLists.txt
index 663563e00..e4a723305 100644
--- a/compiler/tf2tflite/CMakeLists.txt
+++ b/compiler/tf2tflite/CMakeLists.txt
@@ -38,7 +38,6 @@ target_link_libraries(tf2tflite PRIVATE tfinfo)
 target_link_libraries(tf2tflite PRIVATE exo)
 target_link_libraries(tf2tflite PRIVATE locop)
 target_link_libraries(tf2tflite PRIVATE hermes_std)
-target_link_libraries(tf2tflite PRIVATE stdex)
 target_link_libraries(tf2tflite PRIVATE angkor cwrap)
 target_link_libraries(tf2tflite PRIVATE tf2tflite_customop_info_proto)
 install(TARGETS tf2tflite DESTINATION bin)
diff --git a/compiler/tf2tflite/requires.cmake b/compiler/tf2tflite/requires.cmake
index 68d45bf3a..87ea50bf7 100644
--- a/compiler/tf2tflite/requires.cmake
+++ b/compiler/tf2tflite/requires.cmake
@@ -1,4 +1,3 @@
-require("stdex")
 require("hermes-std")
 require("moco-tf")
 require("exo")
diff --git a/compiler/tf2tflite/src/CustomopConfLoader.cpp b/compiler/tf2tflite/src/CustomopConfLoader.cpp
index 7399a432a..c50c17f69 100644
--- a/compiler/tf2tflite/src/CustomopConfLoader.cpp
+++ b/compiler/tf2tflite/src/CustomopConfLoader.cpp
@@ -27,6 +27,8 @@
 
 #include <fcntl.h>
 
+#include <limits> // std::numeric_limits
+
 namespace
 {
 bool load_text(const cwrap::Fildes &fildes, tf2tflite::CustomOpInfoDef &def)
diff --git a/compiler/tf2tflite/src/Driver.cpp b/compiler/tf2tflite/src/Driver.cpp
index e43d30bb2..12fcbd005 100644
--- a/compiler/tf2tflite/src/Driver.cpp
+++ b/compiler/tf2tflite/src/Driver.cpp
@@ -28,10 +28,8 @@
 #include <hermes/ConsoleReporter.h>
 #include <hermes/EnvConfig.h>
 
-#include <stdex/Memory.h>
-
 #include <cassert>
-
+#include <memory>
 #include <iostream>
 #include <stdexcept>
 #include <string>
@@ -70,8 +68,8 @@ struct LoggingContext
     if (ctx == nullptr)
     {
       ctx = new hermes::Context;
-      ctx->sinks()->append(stdex::make_unique<hermes::ConsoleReporter>());
-      ctx->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log"));
+      ctx->sinks()->append(std::make_unique<hermes::ConsoleReporter>());
+      ctx->config(std::make_unique<EnvConfig>("TF2TFLITE_Log"));
     }
 
     return ctx;
@@ -96,9 +94,9 @@ int main(int argc, char **argv)
   using EnvConfig = hermes::EnvConfig<hermes::EnvFormat::BooleanNumber>;
 
   // This line allows users to control all the moco-tf loggers via TF2TFLITE_Log_Frontend
-  moco::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log_Frontend"));
+  moco::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2TFLITE_Log_Frontend"));
   // This line allows users to control all the exo-tflite loggers via TF2TFLITE_Log_Backend
-  exo::LoggingContext::get()->config(stdex::make_unique<EnvConfig>("TF2TFLITE_Log_Backend"));
+  exo::LoggingContext::get()->config(std::make_unique<EnvConfig>("TF2TFLITE_Log_Backend"));
 
   LOGGER(l);
 
diff --git a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
index 3e7e57747..017fd6a73 100644
--- a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
+++ b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nncc_find_resource(TensorFlowTests)
 
 #
@@ -72,7 +76,7 @@ list(APPEND TEST_DEPS "${TEST_RUNNER}")
 
 get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
 
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_12_1")
 
 ###
 ### Generate test.config
diff --git a/compiler/tf2tfliteV2/tf2tfliteV2.py b/compiler/tf2tfliteV2/tf2tfliteV2.py
index c51dabde0..2bcf55328 100755
--- a/compiler/tf2tfliteV2/tf2tfliteV2.py
+++ b/compiler/tf2tfliteV2/tf2tfliteV2.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import tensorflow as tf
 import argparse
 import sys
@@ -41,6 +42,13 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description=("Command line tool to run TensorFlow Lite Converter."))
 
+    # Verbose
+    parser.add_argument(
+        "-V",
+        "--verbose",
+        action="store_true",
+        help="output additional information to stdout or stderr")
+
     # Converter version.
     converter_version = parser.add_mutually_exclusive_group(required=True)
     converter_version.add_argument(
@@ -88,8 +96,7 @@ def _get_parser():
         "-I",
         "--input_arrays",
         type=str,
-        help="Names of the input arrays, comma-separated.",
-        required=True)
+        help="Names of the input arrays, comma-separated.")
     parser.add_argument(
         "-s",
         "--input_shapes",
@@ -101,8 +108,13 @@ def _get_parser():
         "-O",
         "--output_arrays",
         type=str,
-        help="Names of the output arrays, comma-separated.",
-        required=True)
+        help="Names of the output arrays, comma-separated.")
+
+    # experimental options
+    parser.add_argument(
+        "--experimental_disable_batchmatmul_unfold",
+        action="store_true",
+        help="Experimental disable BatchMatMul unfold")
 
     # Set default value
     parser.set_defaults(model_format="graph_def")
@@ -146,6 +158,10 @@ def _parse_array(arrays, type_fn=str):
 
 def _v1_convert(flags):
     if flags.model_format == "graph_def":
+        if not flags.input_arrays:
+            raise ValueError("--input_arrays must be provided")
+        if not flags.output_arrays:
+            raise ValueError("--output_arrays must be provided")
         input_shapes = None
         if flags.input_shapes:
             input_arrays = _parse_array(flags.input_arrays)
@@ -174,6 +190,19 @@ def _v1_convert(flags):
 
 def _v2_convert(flags):
     if flags.model_format == "graph_def":
+        if not flags.input_arrays:
+            raise ValueError("--input_arrays must be provided")
+        if not flags.output_arrays:
+            raise ValueError("--output_arrays must be provided")
+        input_shapes = []
+        if flags.input_shapes:
+            input_shapes = [
+                _parse_array(shape, type_fn=int)
+                for shape in flags.input_shapes.split(":")
+            ]
+            if len(input_shapes) != len(_parse_array(flags.input_arrays)):
+                raise ValueError(
+                    "--input_shapes and --input_arrays must have the same length")
         file_content = open(flags.input_path, 'rb').read()
         try:
             graph_def = tf.compat.v1.GraphDef()
@@ -194,6 +223,8 @@ def _v2_convert(flags):
                 _str + ":0" if len(_str.split(":")) == 1 else _str
                 for _str in _parse_array(flags.output_arrays)
             ])
+        for i in range(len(input_shapes)):
+            wrap_func.inputs[i].set_shape(input_shapes[i])
         converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
 
     if flags.model_format == "saved_model":
@@ -203,6 +234,9 @@ def _v2_convert(flags):
         keras_model = tf.keras.models.load_model(flags.input_path)
         converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
 
+    if flags.experimental_disable_batchmatmul_unfold:
+        converter._experimental_disable_batchmatmul_unfold = True
+
     converter.allow_custom_ops = True
     converter.experimental_new_converter = True
 
@@ -212,7 +246,29 @@ def _v2_convert(flags):
     open(flags.output_path, "wb").write(tflite_model)
 
 
+def _apply_verbosity(verbosity):
+    # NOTE
+    # TF_CPP_MIN_LOG_LEVEL
+    #   0 : INFO + WARNING + ERROR + FATAL
+    #   1 : WARNING + ERROR + FATAL
+    #   2 : ERROR + FATAL
+    #   3 : FATAL
+    #
+    # TODO Find better way to suppress trackback on error
+    # tracebacklimit
+    #   The default is 1000.
+    #   When set to 0 or less, all traceback information is suppressed
+    if verbosity:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
+        sys.tracebacklimit = 1000
+    else:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+        sys.tracebacklimit = 0
+
+
 def _convert(flags):
+    _apply_verbosity(flags.verbose)
+
     if (flags.v1):
         _v1_convert(flags)
     else:
@@ -237,4 +293,9 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception as e:
+        prog_name = os.path.basename(__file__)
+        print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+        sys.exit(255)
diff --git a/compiler/tfgraph-xform/CMakeLists.txt b/compiler/tfgraph-xform/CMakeLists.txt
deleted file mode 100644
index d6e0a4cde..000000000
--- a/compiler/tfgraph-xform/CMakeLists.txt
+++ /dev/null
@@ -1,328 +0,0 @@
-macro(require_package PKGNAME)
-  nnas_find_package(${PKGNAME} ${ARGN} QUIET)
-  if(NOT ${PKGNAME}_FOUND)
-    message(STATUS "Build tfgraph-xform: FALSE (${PKGNAME} is missing)")
-    return()
-  endif(NOT ${PKGNAME}_FOUND)
-endmacro(require_package)
-
-require_package(Abseil)
-require_package(Protobuf)
-require_package(EigenSource-fd6845384b86)
-require_package(GoogleDoubleConversion)
-require_package(GoogleNSync)
-require_package(TensorFlowSource EXACT 1.12)
-require_package(TensorFlowProtoText EXACT 1.12)
-
-message(STATUS "Build tfgraph-xform: TRUE")
-
-#
-# Set "SOURCE_FILES"
-#
-unset(SOURCE_FILES)
-
-macro(Source_Add RPATH)
-  list(APPEND SOURCE_FILES "${TensorFlowSource_DIR}/tensorflow/${RPATH}")
-endmacro(Source_Add)
-
-# TensorFlow "core"
-Source_Add(core/lib/core/status.cc)
-Source_Add(core/lib/core/coding.cc)
-Source_Add(core/lib/core/arena.cc)
-Source_Add(core/lib/core/threadpool.cc)
-
-Source_Add(core/lib/strings/scanner.cc)
-Source_Add(core/lib/strings/str_util.cc)
-Source_Add(core/lib/strings/numbers.cc)
-Source_Add(core/lib/strings/stringprintf.cc)
-Source_Add(core/lib/strings/strcat.cc)
-Source_Add(core/lib/strings/proto_text_util.cc)
-Source_Add(core/lib/strings/proto_serialization.cc)
-Source_Add(core/lib/strings/ordered_code.cc)
-Source_Add(core/lib/hash/hash.cc)
-Source_Add(core/lib/hash/crc32c.cc)
-Source_Add(core/lib/hash/crc32c_accelerate.cc)
-Source_Add(core/lib/io/iterator.cc)
-Source_Add(core/lib/io/two_level_iterator.cc)
-Source_Add(core/lib/io/format.cc)
-Source_Add(core/lib/io/block.cc)
-Source_Add(core/lib/io/table.cc)
-Source_Add(core/lib/random/random.cc)
-Source_Add(core/lib/io/path.cc)
-
-Source_Add(core/platform/cpu_info.cc)
-Source_Add(core/platform/abi.cc)
-Source_Add(core/platform/env.cc)
-Source_Add(core/platform/env_time.cc)
-Source_Add(core/platform/file_system.cc)
-Source_Add(core/platform/file_system_helper.cc)
-Source_Add(core/platform/tensor_coding.cc)
-Source_Add(core/platform/tracing.cc)
-Source_Add(core/platform/setround.cc)
-Source_Add(core/platform/denormal.cc)
-Source_Add(core/platform/protobuf_util.cc)
-
-Source_Add(core/platform/default/mutex.cc)
-Source_Add(core/platform/default/logging.cc)
-Source_Add(core/platform/default/string_coding.cc)
-
-Source_Add(core/platform/posix/error.cc)
-Source_Add(core/platform/posix/env.cc)
-Source_Add(core/platform/posix/env_time.cc)
-Source_Add(core/platform/posix/port.cc)
-Source_Add(core/platform/posix/load_library.cc)
-Source_Add(core/platform/posix/posix_file_system.cc)
-
-Source_Add(core/util/env_var.cc)
-Source_Add(core/util/padding.cc)
-Source_Add(core/util/mirror_pad_mode.cc)
-Source_Add(core/util/command_line_flags.cc)
-Source_Add(core/util/tensor_format.cc)
-Source_Add(core/util/tensor_slice_set.cc)
-Source_Add(core/util/tensor_slice_reader.cc)
-Source_Add(core/util/tensor_slice_reader_cache.cc)
-Source_Add(core/util/saved_tensor_slice_util.cc)
-Source_Add(core/util/equal_graph_def.cc)
-Source_Add(core/util/device_name_utils.cc)
-Source_Add(core/util/work_sharder.cc)
-Source_Add(core/util/use_cudnn.cc)
-Source_Add(core/util/strided_slice_op.cc)
-Source_Add(core/util/bcast.cc)
-
-Source_Add(core/graph/tensor_id.cc)
-Source_Add(core/graph/algorithm.cc)
-Source_Add(core/graph/node_builder.cc)
-Source_Add(core/graph/subgraph.cc)
-Source_Add(core/graph/graph.cc)
-Source_Add(core/graph/graph_constructor.cc)
-Source_Add(core/graph/edgeset.cc)
-Source_Add(core/graph/while_context.cc)
-Source_Add(core/graph/control_flow.cc)
-Source_Add(core/graph/gradients.cc)
-Source_Add(core/graph/optimizer_cse.cc)
-
-Source_Add(core/framework/versions.cc)
-Source_Add(core/framework/types.cc)
-Source_Add(core/framework/function.cc)
-Source_Add(core/framework/op.cc)
-Source_Add(core/framework/op_def_builder.cc)
-Source_Add(core/framework/op_kernel.cc)
-Source_Add(core/framework/op_segment.cc)
-Source_Add(core/framework/resource_handle.cc)
-Source_Add(core/framework/tensor.cc)
-Source_Add(core/framework/tensor_shape.cc)
-Source_Add(core/framework/tensor_reference.cc)
-Source_Add(core/framework/tensor_slice.cc)
-Source_Add(core/framework/tensor_util.cc)
-Source_Add(core/framework/unique_tensor_references.cc)
-Source_Add(core/framework/allocator.cc)
-Source_Add(core/framework/allocator_registry.cc)
-Source_Add(core/framework/tracking_allocator.cc)
-Source_Add(core/framework/variant.cc)
-Source_Add(core/framework/variant_op_registry.cc)
-Source_Add(core/framework/variant_tensor_data.cc)
-Source_Add(core/framework/memory_types.cc)
-Source_Add(core/framework/log_memory.cc)
-Source_Add(core/framework/node_def_builder.cc)
-
-Source_Add(core/framework/common_shape_fns.cc)
-Source_Add(core/framework/shape_inference.cc)
-
-Source_Add(core/framework/resource_mgr.cc)
-Source_Add(core/framework/device_base.cc)
-Source_Add(core/framework/rendezvous.cc)
-Source_Add(core/framework/cancellation.cc)
-
-Source_Add(core/framework/attr_value_util.cc)
-Source_Add(core/framework/attr_value_util.cc)
-Source_Add(core/framework/op_def_util.cc)
-Source_Add(core/framework/node_def_util.cc)
-Source_Add(core/framework/kernel_def_builder.cc)
-Source_Add(core/framework/kernel_def_util.cc)
-
-Source_Add(core/common_runtime/device.cc)
-Source_Add(core/common_runtime/device_mgr.cc)
-Source_Add(core/common_runtime/function.cc)
-Source_Add(core/common_runtime/memory_types.cc)
-Source_Add(core/common_runtime/copy_tensor.cc)
-Source_Add(core/common_runtime/shape_refiner.cc)
-Source_Add(core/common_runtime/constant_folding.cc)
-Source_Add(core/common_runtime/eval_const_tensor.cc)
-Source_Add(core/common_runtime/graph_optimizer.cc)
-Source_Add(core/common_runtime/graph_runner.cc)
-Source_Add(core/common_runtime/rendezvous_mgr.cc)
-Source_Add(core/common_runtime/rendezvous_util.cc)
-Source_Add(core/common_runtime/process_function_library_runtime.cc)
-Source_Add(core/common_runtime/executor.cc)
-Source_Add(core/common_runtime/executor_factory.cc)
-
-# TensorFlow - Operations
-Source_Add(core/ops/no_op.cc)
-Source_Add(core/ops/sendrecv_ops.cc)
-Source_Add(core/ops/array_ops.cc)
-Source_Add(core/ops/math_ops.cc)
-Source_Add(core/ops/image_ops.cc)
-Source_Add(core/ops/nn_ops.cc)
-
-# TensorFlow - OpKernel Implementations
-Source_Add(core/kernels/ops_util.cc)
-Source_Add(core/kernels/cwise_ops_common.cc)
-Source_Add(core/kernels/cwise_op_add_1.cc)
-Source_Add(core/kernels/cwise_op_sub.cc)
-Source_Add(core/kernels/cwise_op_mul_1.cc)
-Source_Add(core/kernels/strided_slice_op.cc)
-Source_Add(core/kernels/strided_slice_op_inst_0.cc)
-Source_Add(core/kernels/strided_slice_op_inst_1.cc)
-Source_Add(core/kernels/strided_slice_op_inst_2.cc)
-Source_Add(core/kernels/strided_slice_op_inst_3.cc)
-Source_Add(core/kernels/strided_slice_op_inst_4.cc)
-Source_Add(core/kernels/strided_slice_op_inst_5.cc)
-Source_Add(core/kernels/strided_slice_op_inst_6.cc)
-Source_Add(core/kernels/strided_slice_op_inst_7.cc)
-Source_Add(core/kernels/relu_op.cc)
-Source_Add(core/kernels/conv_ops.cc)
-Source_Add(core/kernels/conv_grad_ops.cc)
-Source_Add(core/kernels/conv_grad_input_ops.cc)
-Source_Add(core/kernels/bias_op.cc)
-Source_Add(core/kernels/pad_op.cc)
-Source_Add(core/kernels/cast_op_impl_bool.cc)
-Source_Add(core/kernels/cast_op_impl_int8.cc)
-Source_Add(core/kernels/cast_op_impl_uint8.cc)
-Source_Add(core/kernels/cast_op_impl_int16.cc)
-Source_Add(core/kernels/cast_op_impl_uint16.cc)
-Source_Add(core/kernels/cast_op_impl_int32.cc)
-Source_Add(core/kernels/cast_op_impl_uint32.cc)
-Source_Add(core/kernels/cast_op_impl_int64.cc)
-Source_Add(core/kernels/cast_op_impl_uint64.cc)
-Source_Add(core/kernels/cast_op_impl_half.cc)
-Source_Add(core/kernels/cast_op_impl_bfloat.cc)
-Source_Add(core/kernels/cast_op_impl_float.cc)
-Source_Add(core/kernels/cast_op_impl_double.cc)
-Source_Add(core/kernels/cast_op_impl_complex64.cc)
-Source_Add(core/kernels/cast_op_impl_complex128.cc)
-Source_Add(core/kernels/cast_op.cc)
-Source_Add(core/kernels/split_op.cc)
-Source_Add(core/kernels/concat_lib_cpu.cc)
-Source_Add(core/kernels/concat_op.cc)
-Source_Add(core/kernels/resize_bilinear_op.cc)
-Source_Add(core/kernels/constant_op.cc)
-Source_Add(core/kernels/pack_op.cc)
-Source_Add(core/kernels/reshape_op.cc)
-Source_Add(core/kernels/shape_ops.cc)
-Source_Add(core/kernels/fill_functor.cc)
-Source_Add(core/kernels/fused_batch_norm_op.cc)
-Source_Add(core/kernels/identity_op.cc)
-Source_Add(core/kernels/split_lib_cpu.cc)
-Source_Add(core/kernels/unpack_op.cc)
-Source_Add(core/kernels/pooling_ops_common.cc)
-Source_Add(core/kernels/maxpooling_op.cc)
-Source_Add(core/kernels/deep_conv2d.cc)
-Source_Add(core/kernels/no_op.cc)
-Source_Add(core/kernels/sendrecv_ops.cc)
-
-# TensorFlow "transform_graph" - Basic Infrastructure
-Source_Add(tools/graph_transforms/file_utils.cc)
-Source_Add(tools/graph_transforms/transform_utils.cc)
-Source_Add(tools/graph_transforms/transform_graph.cc)
-Source_Add(tools/graph_transforms/transform_graph_main.cc)
-
-# TensorFlow "trasnform_graph" - Transfrom Implementations
-Source_Add(tools/graph_transforms/fold_constants_lib.cc)
-Source_Add(tools/graph_transforms/fold_old_batch_norms.cc)
-Source_Add(tools/graph_transforms/strip_unused_nodes.cc)
-
-#
-# Set "PROTO_FILES"
-#
-unset(PROTO_FILES)
-
-macro(Proto_Add RPATH)
-  list(APPEND PROTO_FILES "${RPATH}")
-endmacro(Proto_Add)
-
-Proto_Add(tensorflow/core/lib/core/error_codes.proto)
-
-# Minimal Protocol Buffer Specification to read GraphDef
-Proto_Add(tensorflow/core/framework/versions.proto)
-Proto_Add(tensorflow/core/framework/resource_handle.proto)
-Proto_Add(tensorflow/core/framework/types.proto)
-Proto_Add(tensorflow/core/framework/tensor.proto)
-Proto_Add(tensorflow/core/framework/tensor_shape.proto)
-Proto_Add(tensorflow/core/framework/tensor_slice.proto)
-Proto_Add(tensorflow/core/framework/attr_value.proto)
-Proto_Add(tensorflow/core/framework/op_def.proto)
-Proto_Add(tensorflow/core/framework/node_def.proto)
-Proto_Add(tensorflow/core/framework/function.proto)
-Proto_Add(tensorflow/core/framework/graph.proto)
-
-Proto_Add(tensorflow/core/framework/api_def.proto)
-# "tensorflow/core/framework/tensor.cc" requires these headers
-Proto_Add(tensorflow/core/framework/allocation_description.proto)
-Proto_Add(tensorflow/core/framework/tensor_description.proto)
-Proto_Add(tensorflow/core/framework/log_memory.proto)
-Proto_Add(tensorflow/core/framework/kernel_def.proto)
-Proto_Add(tensorflow/core/framework/device_attributes.proto)
-Proto_Add(tensorflow/core/framework/cost_graph.proto)
-Proto_Add(tensorflow/core/framework/step_stats.proto)
-
-Proto_Add(tensorflow/core/protobuf/cluster.proto)
-Proto_Add(tensorflow/core/protobuf/config.proto)
-Proto_Add(tensorflow/core/protobuf/debug.proto)
-Proto_Add(tensorflow/core/protobuf/rewriter_config.proto)
-
-Proto_Add(tensorflow/core/util/saved_tensor_slice.proto)
-
-#
-# Set "PROTO_TEXT_FILES"
-#
-unset(PROTO_TEXT_FILES)
-
-macro(ProtoText_Add RPATH)
-  list(APPEND PROTO_TEXT_FILES "${RPATH}")
-endmacro(ProtoText_Add)
-
-ProtoText_Add(tensorflow/core/framework/versions.proto)
-ProtoText_Add(tensorflow/core/framework/attr_value.proto)
-ProtoText_Add(tensorflow/core/framework/resource_handle.proto)
-ProtoText_Add(tensorflow/core/framework/types.proto)
-ProtoText_Add(tensorflow/core/framework/tensor_shape.proto)
-ProtoText_Add(tensorflow/core/framework/tensor_description.proto)
-ProtoText_Add(tensorflow/core/framework/allocation_description.proto)
-ProtoText_Add(tensorflow/core/framework/tensor.proto)
-ProtoText_Add(tensorflow/core/framework/op_def.proto)
-ProtoText_Add(tensorflow/core/framework/node_def.proto)
-ProtoText_Add(tensorflow/core/framework/function.proto)
-ProtoText_Add(tensorflow/core/framework/graph.proto)
-ProtoText_Add(tensorflow/core/framework/kernel_def.proto)
-ProtoText_Add(tensorflow/core/framework/log_memory.proto)
-ProtoText_Add(tensorflow/core/framework/device_attributes.proto)
-
-#
-# Build "tfgraph-xform" executable
-#
-Protobuf_Generate(TF_PROTO
-  "${CMAKE_CURRENT_BINARY_DIR}/gen/tensorflow-proto" # OUTPUT ROOT
-  "${TensorFlowSource_DIR}" # BASE DIRECTORY
-  ${PROTO_FILES} # .proto path (relative to the BASE)
-)
-
-ProtoText_Generate(TF_PROTO_TEXT
-  "${CMAKE_CURRENT_BINARY_DIR}/gen/tensorflow-prototext" # OUTPUT ROOT
-  ${PROTO_FILES}
-)
-
-add_executable(tfgraph-xform
-  ${SOURCE_FILES}           # TensorFlow Source Files
-  ${TF_PROTO_SOURCES}       # Source Files generated by Protocol Buffer
-  ${TF_PROTO_TEXT_SOURCES}  # Source Files generated by Proto Text
-)
-
-target_include_directories(tfgraph-xform PRIVATE ${TensorFlowSource_DIR})
-target_include_directories(tfgraph-xform PRIVATE ${TF_PROTO_INCLUDE_DIRS})
-target_include_directories(tfgraph-xform PRIVATE ${TF_PROTO_TEXT_INCLUDE_DIRS})
-target_include_directories(tfgraph-xform PRIVATE ${EigenSource_DIR})
-target_link_libraries(tfgraph-xform PRIVATE ${TF_PROTO_LIBRARIES})
-target_link_libraries(tfgraph-xform PRIVATE abseil)
-target_link_libraries(tfgraph-xform PRIVATE dl)
-target_link_libraries(tfgraph-xform PRIVATE Google::DoubleConversion)
-target_link_libraries(tfgraph-xform PRIVATE Google::NSync)
diff --git a/compiler/tfgraph-xform/README.md b/compiler/tfgraph-xform/README.md
deleted file mode 100644
index 41fb88530..000000000
--- a/compiler/tfgraph-xform/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# tfgraph-xform
-
-Let's build TensorFlow "transform-graph" tool without Bazel.
-
-**DISCLAIMER** Not every transformation is supported.
diff --git a/compiler/tfinfo-v2/CMakeLists.txt b/compiler/tfinfo-v2/CMakeLists.txt
index cf438ea29..40df521b9 100644
--- a/compiler/tfinfo-v2/CMakeLists.txt
+++ b/compiler/tfinfo-v2/CMakeLists.txt
@@ -24,7 +24,6 @@ set_target_properties(tfinfo_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(tfinfo_v2 PUBLIC include)
 target_link_libraries(tfinfo_v2 PRIVATE tfinfo_v2_proto)
 target_link_libraries(tfinfo_v2 PRIVATE oops)
-target_link_libraries(tfinfo_v2 PRIVATE stdex)
 
 if(NOT ENABLE_TEST)
   return()
diff --git a/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h b/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h
index f26d0354a..8c014f1fa 100644
--- a/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h
+++ b/compiler/tfinfo-v2/include/tfinfo-v2/TensorSignature.h
@@ -98,7 +98,7 @@ public:
   }
 
   TensorSignature(const Kind kind, const std::string &name, const ShapeHint &shape_hint)
-      : TensorSignature(kind, name)
+    : TensorSignature(kind, name)
   {
     _shape_hint = shape_hint;
   }
diff --git a/compiler/tfinfo-v2/requires.cmake b/compiler/tfinfo-v2/requires.cmake
index e7efab4fb..a1b974421 100644
--- a/compiler/tfinfo-v2/requires.cmake
+++ b/compiler/tfinfo-v2/requires.cmake
@@ -1,2 +1 @@
 require("oops")
-require("stdex")
diff --git a/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp b/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp
index 02a2d9199..bcab4ac7f 100644
--- a/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp
+++ b/compiler/tfinfo-v2/src/TFInfo_v2.test.cpp
@@ -54,7 +54,7 @@ const std::vector<std::string> success_cases =
                     name : "relu:0"
                 }
     ),
-    // clang-format on
+  // clang-format on
 };
 
 } // namespace
@@ -221,7 +221,7 @@ const std::vector<std::string> fail_cases =
                 input, a:0, TF_FLOAT, [2, 3 ,4]
                 output, b:0, TF_FLOAT, [2, 3 ,4]
       )",
-    // clang-format on
+  // clang-format on
 };
 
 } // namespace
diff --git a/compiler/tfinfo-v2/src/TensorInfoLoader.cpp b/compiler/tfinfo-v2/src/TensorInfoLoader.cpp
index 0bf828773..249bf384a 100644
--- a/compiler/tfinfo-v2/src/TensorInfoLoader.cpp
+++ b/compiler/tfinfo-v2/src/TensorInfoLoader.cpp
@@ -19,13 +19,13 @@
 #include "tfinfo-v2/TensorSignature.h"
 
 #include <oops/UserExn.h>
-#include <stdex/Memory.h>
 
 #include <tfinfo-v2.pb.h>
 
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/text_format.h>
 
+#include <memory>
 #include <fstream>
 #include <fcntl.h>
 
@@ -107,8 +107,8 @@ void convert(tfinfo_v2_proto::InfoDef &info_def, tfinfo::v2::TensorSignatures &t
       auto name = input_def.name();
       validate_tensor_name(name, path);
 
-      auto tensor = stdex::make_unique<tfinfo::v2::TensorSignature>(
-          tfinfo::v2::TensorSignature::Kind::Input, name);
+      auto tensor = std::make_unique<tfinfo::v2::TensorSignature>(
+        tfinfo::v2::TensorSignature::Kind::Input, name);
 
       // when there is dim attribute for unknown shape
       if (input_def.dim_size() > 0)
@@ -136,8 +136,8 @@ void convert(tfinfo_v2_proto::InfoDef &info_def, tfinfo::v2::TensorSignatures &t
       auto name = info_def.output().Get(i).name();
       validate_tensor_name(name, path);
 
-      auto tensor = stdex::make_unique<tfinfo::v2::TensorSignature>(
-          tfinfo::v2::TensorSignature::Kind::Output, name);
+      auto tensor = std::make_unique<tfinfo::v2::TensorSignature>(
+        tfinfo::v2::TensorSignature::Kind::Output, name);
       tensors.emplace_back(std::move(tensor));
     }
   }
diff --git a/compiler/tfinfo/CMakeLists.txt b/compiler/tfinfo/CMakeLists.txt
index 678912e6f..359699e13 100644
--- a/compiler/tfinfo/CMakeLists.txt
+++ b/compiler/tfinfo/CMakeLists.txt
@@ -5,7 +5,7 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 add_library(tfinfo STATIC ${SOURCES})
 set_target_properties(tfinfo PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(tfinfo PUBLIC include)
-target_link_libraries(tfinfo stdex angkor oops)
+target_link_libraries(tfinfo angkor oops)
 
 # TODO Remove "nnkit_support_tftestinfo" later
 add_library(nnkit_support_tftestinfo ALIAS tfinfo)
diff --git a/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h b/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h
index aec8c5e40..eef206207 100644
--- a/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h
+++ b/compiler/tfinfo/include/nnkit/support/tftestinfo/ParsedTensor.h
@@ -57,7 +57,7 @@ public:
 
   ParsedTensor(const Kind kind, const std::string &name, const DataType &dtype,
                const std::vector<int32_t> &shape)
-      : _kind(kind), _dtype(dtype)
+    : _kind(kind), _dtype(dtype)
   {
     _tensor_name.assign(name);
 
@@ -66,7 +66,9 @@ public:
       _shape.dim(rank) = shape.at(rank);
   }
 
-  ~ParsedTensor() { /* empty */}
+  ~ParsedTensor()
+  { /* empty */
+  }
 
 public:
   Kind kind() const { return _kind; }
diff --git a/compiler/tfinfo/requires.cmake b/compiler/tfinfo/requires.cmake
index 3b45c6458..d7ecb2382 100644
--- a/compiler/tfinfo/requires.cmake
+++ b/compiler/tfinfo/requires.cmake
@@ -1,3 +1,2 @@
-require("stdex")
 require("angkor")
 require("oops")
diff --git a/compiler/tfinfo/src/TensorInfoParser.cpp b/compiler/tfinfo/src/TensorInfoParser.cpp
index 9eb3da296..050da40de 100644
--- a/compiler/tfinfo/src/TensorInfoParser.cpp
+++ b/compiler/tfinfo/src/TensorInfoParser.cpp
@@ -21,7 +21,6 @@
 #include "Compat.h"
 
 #include <oops/UserExn.h>
-#include <stdex/Memory.h>
 #include <nncc/core/ADT/tensor/Shape.h>
 
 #include <cctype>
@@ -197,7 +196,7 @@ std::unique_ptr<ParsedTensor> parse_line(std::string &line)
     shape.emplace_back(std::stoi(dim));
   }
 
-  return stdex::make_unique<ParsedTensor>(kind, name, dtype, shape);
+  return std::make_unique<ParsedTensor>(kind, name, dtype, shape);
 }
 
 #undef CHECK_NOT_NULL
diff --git a/compiler/tfkit/CMakeLists.txt b/compiler/tfkit/CMakeLists.txt
index b809658b1..2058fbc02 100644
--- a/compiler/tfkit/CMakeLists.txt
+++ b/compiler/tfkit/CMakeLists.txt
@@ -7,7 +7,6 @@ message(STATUS "Build tfkit: TRUE")
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_executable(tfkit ${SOURCES})
-target_link_libraries(tfkit PRIVATE stdex)
 target_link_libraries(tfkit PRIVATE cli)
 target_link_libraries(tfkit PRIVATE mio_tf)
 target_link_libraries(tfkit PRIVATE nncc_common)
diff --git a/compiler/tfkit/src/ConvertCommand.cpp b/compiler/tfkit/src/ConvertCommand.cpp
index 3e417cc78..2b5d077c9 100644
--- a/compiler/tfkit/src/ConvertCommand.cpp
+++ b/compiler/tfkit/src/ConvertCommand.cpp
@@ -17,8 +17,6 @@
 #include "ConvertCommand.hpp"
 #include "Support.hpp"
 
-#include <stdex/Memory.h>
-
 #include <tensorflow/core/framework/graph.pb.h>
 
 #include <google/protobuf/io/coded_stream.h>
@@ -26,6 +24,7 @@
 #include <google/protobuf/text_format.h>
 #include <google/protobuf/util/json_util.h>
 
+#include <memory>
 #include <cassert>
 #include <map>
 #include <string>
@@ -114,12 +113,12 @@ int ConvertCommand::run(int argc, const char *const *argv) const
 
   std::map<std::string, std::unique_ptr<Importer>> importers;
 
-  importers["pb"] = stdex::make_unique<ImporterImpl<DataFormat::PBBIN>>();
-  importers["pbtxt"] = stdex::make_unique<ImporterImpl<DataFormat::PBTXT>>();
+  importers["pb"] = std::make_unique<ImporterImpl<DataFormat::PBBIN>>();
+  importers["pbtxt"] = std::make_unique<ImporterImpl<DataFormat::PBTXT>>();
 
   std::map<std::string, std::unique_ptr<Exporter>> exporters;
 
-  exporters["json"] = stdex::make_unique<ExporterImpl<DataFormat::JSON>>();
+  exporters["json"] = std::make_unique<ExporterImpl<DataFormat::JSON>>();
 
   auto importer = importers.at(input_format).get();
   auto exporter = exporters.at(output_format).get();
diff --git a/compiler/tfkit/src/Main.cpp b/compiler/tfkit/src/Main.cpp
index 60bd6abfa..a695741dd 100644
--- a/compiler/tfkit/src/Main.cpp
+++ b/compiler/tfkit/src/Main.cpp
@@ -21,17 +21,18 @@
 #include "ConvertCommand.hpp"
 
 #include <cli/App.h>
-#include <stdex/Memory.h>
+
+#include <memory>
 
 int main(int argc, char **argv)
 {
   cli::App app{argv[0]};
 
-  app.insert("encode", stdex::make_unique<tfkit::EncodeCommand>());
-  app.insert("decode", stdex::make_unique<tfkit::DecodeCommand>());
-  app.insert("unpack", stdex::make_unique<tfkit::UnpackCommand>());
-  app.insert("pack", stdex::make_unique<tfkit::PackCommand>());
-  app.insert("convert", stdex::make_unique<tfkit::ConvertCommand>());
+  app.insert("encode", std::make_unique<tfkit::EncodeCommand>());
+  app.insert("decode", std::make_unique<tfkit::DecodeCommand>());
+  app.insert("unpack", std::make_unique<tfkit::UnpackCommand>());
+  app.insert("pack", std::make_unique<tfkit::PackCommand>());
+  app.insert("convert", std::make_unique<tfkit::ConvertCommand>());
 
   return app.run(argc - 1, argv + 1);
 }
diff --git a/compiler/tfkit/src/PackCommand.cpp b/compiler/tfkit/src/PackCommand.cpp
index a1c4a6fc8..d854e30db 100644
--- a/compiler/tfkit/src/PackCommand.cpp
+++ b/compiler/tfkit/src/PackCommand.cpp
@@ -60,7 +60,7 @@ template <> void pack<float>(tensorflow::TensorProto *input_tensor)
     }
 
     input_tensor->set_tensor_content(std::string(
-        reinterpret_cast<const char *>(tensor_content.data()), sizeof(float) * input_flat_size));
+      reinterpret_cast<const char *>(tensor_content.data()), sizeof(float) * input_flat_size));
 
     input_tensor->clear_float_val();
   }
@@ -99,7 +99,7 @@ template <> void pack<int32_t>(tensorflow::TensorProto *input_tensor)
     }
 
     input_tensor->set_tensor_content(std::string(
-        reinterpret_cast<const char *>(tensor_content.data()), sizeof(int32_t) * input_flat_size));
+      reinterpret_cast<const char *>(tensor_content.data()), sizeof(int32_t) * input_flat_size));
 
     input_tensor->clear_int_val();
   }
diff --git a/compiler/tfkit/src/Support.cpp b/compiler/tfkit/src/Support.cpp
index 40d8705a7..1a713b58e 100644
--- a/compiler/tfkit/src/Support.cpp
+++ b/compiler/tfkit/src/Support.cpp
@@ -17,10 +17,9 @@
 
 #include "Support.hpp"
 
-#include <stdex/Memory.h>
-
 #include <tensorflow/core/framework/graph.pb.h>
 
+#include <memory>
 #include <cassert>
 #include <fstream>
 #include <stdexcept>
@@ -36,10 +35,10 @@ std::unique_ptr<T> open_fstream(const std::string &path, std::ios_base::openmode
     return nullptr;
   }
 
-  auto stream = stdex::make_unique<T>(path.c_str(), mode);
+  auto stream = std::make_unique<T>(path.c_str(), mode);
   if (!stream->is_open())
   {
-    throw std::runtime_error{"ERROR: Failed to open " + path};
+    throw std::runtime_error{"Failed to open " + path};
   }
   return stream;
 }
@@ -111,7 +110,7 @@ std::string CmdArguments::get_or(unsigned int index, const std::string &s) const
 
 std::unique_ptr<IOConfiguration> make_ioconfig(const CmdArguments &cmdargs)
 {
-  auto iocfg = stdex::make_unique<IOConfiguration>();
+  auto iocfg = std::make_unique<IOConfiguration>();
 
   auto in = open_fstream<std::ifstream>(cmdargs.get_or(0, "-"), std::ios::in | std::ios::binary);
   iocfg->in(std::move(in));
diff --git a/compiler/tfkit/src/Support.hpp b/compiler/tfkit/src/Support.hpp
index a5b954d5e..21726ea57 100644
--- a/compiler/tfkit/src/Support.hpp
+++ b/compiler/tfkit/src/Support.hpp
@@ -41,7 +41,7 @@ class CmdArguments
 public:
   CmdArguments() = delete;
   CmdArguments(int argc, const char *const *argv)
-      : _argc(static_cast<unsigned int>(argc)), _argv{argv}
+    : _argc(static_cast<unsigned int>(argc)), _argv{argv}
   {
   }
 
diff --git a/compiler/tfkit/src/UnpackCommand.cpp b/compiler/tfkit/src/UnpackCommand.cpp
index a6711f131..b5dd78cbb 100644
--- a/compiler/tfkit/src/UnpackCommand.cpp
+++ b/compiler/tfkit/src/UnpackCommand.cpp
@@ -52,7 +52,7 @@ template <> void unpack<float>(tensorflow::TensorProto *input_tensor)
     input_tensor->clear_float_val();
 
     const float *tensor_content =
-        reinterpret_cast<const float *>(input_tensor->tensor_content().data());
+      reinterpret_cast<const float *>(input_tensor->tensor_content().data());
     for (int i = 0; i < input_flat_size; i++)
     {
       input_tensor->add_float_val(tensor_content[i]);
@@ -87,7 +87,7 @@ template <> void unpack<int32_t>(tensorflow::TensorProto *input_tensor)
     input_tensor->clear_int_val();
 
     const int32_t *tensor_content =
-        reinterpret_cast<const int32_t *>(input_tensor->tensor_content().data());
+      reinterpret_cast<const int32_t *>(input_tensor->tensor_content().data());
     for (int i = 0; i < input_flat_size; i++)
     {
       input_tensor->add_int_val(tensor_content[i]);
@@ -122,7 +122,7 @@ template <> void unpack<int8_t>(tensorflow::TensorProto *input_tensor)
     input_tensor->clear_int_val();
 
     const int8_t *tensor_content =
-        reinterpret_cast<const int8_t *>(input_tensor->tensor_content().data());
+      reinterpret_cast<const int8_t *>(input_tensor->tensor_content().data());
     for (int i = 0; i < input_flat_size; i++)
     {
       input_tensor->add_int_val(tensor_content[i]);
@@ -157,7 +157,7 @@ template <> void unpack<bool>(tensorflow::TensorProto *input_tensor)
     input_tensor->clear_bool_val();
 
     const bool *tensor_content =
-        reinterpret_cast<const bool *>(input_tensor->tensor_content().data());
+      reinterpret_cast<const bool *>(input_tensor->tensor_content().data());
     for (int i = 0; i < input_flat_size; i++)
     {
       input_tensor->add_bool_val(tensor_content[i]);
diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt
index ba019865f..c91ec96e6 100644
--- a/compiler/tfl-inspect/CMakeLists.txt
+++ b/compiler/tfl-inspect/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_tflite)
+if(NOT TARGET mio_tflite2121)
   return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite2121)
 
 set(DRIVER "driver/Driver.cpp")
 
@@ -10,5 +10,6 @@ add_executable(tfl-inspect ${DRIVER} ${SOURCES})
 target_include_directories(tfl-inspect PRIVATE src)
 target_link_libraries(tfl-inspect arser)
 target_link_libraries(tfl-inspect foder)
-target_link_libraries(tfl-inspect mio_tflite)
+target_link_libraries(tfl-inspect mio_tflite2121)
+target_link_libraries(tfl-inspect mio_tflite2121_helper)
 target_link_libraries(tfl-inspect safemain)
diff --git a/compiler/tfl-inspect/driver/Driver.cpp b/compiler/tfl-inspect/driver/Driver.cpp
index a48001169..8505ff4aa 100644
--- a/compiler/tfl-inspect/driver/Driver.cpp
+++ b/compiler/tfl-inspect/driver/Driver.cpp
@@ -32,10 +32,10 @@ int entry(int argc, char **argv)
                      "Lite model files"};
   arser.add_argument("--operators").nargs(0).help("Dump operators in tflite file");
   arser.add_argument("--conv2d_weight")
-      .nargs(0)
-      .help("Dump Conv2D series weight operators in tflite file");
+    .nargs(0)
+    .help("Dump Conv2D series weight operators in tflite file");
   arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in tflite file");
-  arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to inspect");
+  arser.add_argument("tflite").help("TFLite file to inspect");
 
   try
   {
diff --git a/compiler/tfl-inspect/requires.cmake b/compiler/tfl-inspect/requires.cmake
index 25857ad2b..80dc592fd 100644
--- a/compiler/tfl-inspect/requires.cmake
+++ b/compiler/tfl-inspect/requires.cmake
@@ -1,4 +1,4 @@
 require("arser")
 require("foder")
-require("mio-tflite")
+require("mio-tflite2121")
 require("safemain")
diff --git a/compiler/tfl-inspect/src/Reader.cpp b/compiler/tfl-inspect/src/Reader.cpp
index 5be289446..1ae2e4702 100644
--- a/compiler/tfl-inspect/src/Reader.cpp
+++ b/compiler/tfl-inspect/src/Reader.cpp
@@ -16,66 +16,15 @@
 
 #include "Reader.h"
 
+#include <mio_tflite2121/Helper.h>
+
+#include <cassert>
 #include <sstream>
 #include <string>
 
 namespace tflinspect
 {
 
-bool is_valid(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = opcode->builtin_code();
-  return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = opcode->builtin_code();
-  return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const tflite::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  tflite::BuiltinOperator code = opcode->builtin_code();
-  return tflite::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
-  return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
 Reader::Reader(const tflite::Model *model)
 {
   _subgraphs = model->subgraphs();
@@ -122,7 +71,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
   assert(index < _op_codes.size());
   const tflite::OperatorCode *opcode = _op_codes.at(index);
 
-  return opcode->builtin_code();
+  return mio::tflite::builtin_code_neutral(opcode);
 }
 
 std::string Reader::opcode_name(const tflite::Operator *op) const
@@ -131,14 +80,14 @@ std::string Reader::opcode_name(const tflite::Operator *op) const
   assert(index < _op_codes.size());
   const tflite::OperatorCode *opcode = _op_codes.at(index);
 
-  if (!is_valid(opcode))
+  if (!mio::tflite::is_valid(opcode))
   {
     std::ostringstream oss;
     oss << "(invalid: " << index << ")";
     return oss.str();
   }
 
-  return tflinspect::opcode_name(opcode);
+  return mio::tflite::opcode_name(opcode);
 }
 
 bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/tfl-inspect/src/Reader.h b/compiler/tfl-inspect/src/Reader.h
index e9e182a4b..98554cf85 100644
--- a/compiler/tfl-inspect/src/Reader.h
+++ b/compiler/tfl-inspect/src/Reader.h
@@ -36,12 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
   return ret;
 }
 
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-std::string opcode_name(const tflite::OperatorCode *opcode);
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-
 /**
  * @brief Loads TF lite file and provides helpers to access attributes
  */
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt
index 4421a4660..4e03722d2 100644
--- a/compiler/tfl-verify/CMakeLists.txt
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_tflite)
+if(NOT TARGET mio_tflite2121)
   return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite2121)
 
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
@@ -8,6 +8,6 @@ add_executable(tfl-verify ${SOURCES})
 target_include_directories(tfl-verify PRIVATE src)
 target_link_libraries(tfl-verify arser)
 target_link_libraries(tfl-verify foder)
-target_link_libraries(tfl-verify mio_tflite)
+target_link_libraries(tfl-verify mio_tflite2121)
 target_link_libraries(tfl-verify safemain)
 target_link_libraries(tfl-verify cwrap)
diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake
index 79503f325..9579c2792 100644
--- a/compiler/tfl-verify/requires.cmake
+++ b/compiler/tfl-verify/requires.cmake
@@ -1,5 +1,5 @@
 require("arser")
 require("foder")
-require("mio-tflite")
+require("mio-tflite2121")
 require("safemain")
 require("cwrap")
diff --git a/compiler/tfl-verify/src/Driver.cpp b/compiler/tfl-verify/src/Driver.cpp
index 6d1897607..62345494b 100644
--- a/compiler/tfl-verify/src/Driver.cpp
+++ b/compiler/tfl-verify/src/Driver.cpp
@@ -25,7 +25,7 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file path to verify");
+  arser.add_argument("tflite").help("TFLite file path to verify");
 
   try
   {
diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt
index ebc873342..73a8a0d6e 100644
--- a/compiler/tflchef/CMakeLists.txt
+++ b/compiler/tflchef/CMakeLists.txt
@@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND)
   return()
 endif(NOT Protobuf_FOUND)
 
-if(NOT TARGET mio_tflite)
-  message(STATUS "Build tflchef: FAILED (missing mio_tflite)")
+if(NOT TARGET mio_tflite2121)
+  message(STATUS "Build tflchef: FAILED (missing mio_tflite2121)")
   return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite2121)
 
 # Recipe Parser
 add_subdirectory(proto)
@@ -20,4 +20,9 @@ add_subdirectory(core)
 add_subdirectory(tflite)
 # Tools
 add_subdirectory(tools)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 add_subdirectory(tests)
diff --git a/compiler/tflchef/core/CMakeLists.txt b/compiler/tflchef/core/CMakeLists.txt
index 43f6b8b03..b9f735278 100644
--- a/compiler/tflchef/core/CMakeLists.txt
+++ b/compiler/tflchef/core/CMakeLists.txt
@@ -5,5 +5,5 @@ target_include_directories(tflchef_core PUBLIC include)
 target_include_directories(tflchef_core PRIVATE src)
 target_link_libraries(tflchef_core tflchef_proto)
 target_link_libraries(tflchef_core tflchef_log)
-target_link_libraries(tflchef_core mio_tflite)
+target_link_libraries(tflchef_core mio_tflite2121)
 target_link_libraries(tflchef_core souschef)
diff --git a/compiler/tflchef/core/src/Convert.cpp b/compiler/tflchef/core/src/Convert.cpp
index dc8e31db0..d1babf09a 100644
--- a/compiler/tflchef/core/src/Convert.cpp
+++ b/compiler/tflchef/core/src/Convert.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,6 +46,10 @@ tflite::ActivationFunctionType as_tflite_activation(const tflchef::Activation &v
       return tflite::ActivationFunctionType_RELU_N1_TO_1;
     case tflchef::RELU6:
       return tflite::ActivationFunctionType_RELU6;
+    case tflchef::TANH:
+      return tflite::ActivationFunctionType_TANH;
+    case tflchef::SIGN_BIT:
+      return tflite::ActivationFunctionType_SIGN_BIT;
     default:
       break;
   }
@@ -58,14 +63,22 @@ tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value)
   {
     case tflchef::FLOAT32:
       return tflite::TensorType_FLOAT32;
+    case tflchef::FLOAT16:
+      return tflite::TensorType_FLOAT16;
     case tflchef::INT32:
       return tflite::TensorType_INT32;
     case tflchef::UINT8:
       return tflite::TensorType_UINT8;
     case tflchef::INT64:
       return tflite::TensorType_INT64;
+    case tflchef::STRING:
+      return tflite::TensorType_STRING;
     case tflchef::BOOL:
       return tflite::TensorType_BOOL;
+    case tflchef::INT16:
+      return tflite::TensorType_INT16;
+    case tflchef::INT8:
+      return tflite::TensorType_INT8;
     default:
       break;
   }
@@ -87,3 +100,291 @@ tflite::MirrorPadMode as_tflite_mirrorpadmode(const tflchef::MirrorPadMode &valu
 
   throw std::runtime_error{"Unknown mirrorpad mode"};
 }
+
+tflite::DimensionType as_tflite_dimensiontype(const tflchef::DimensionType &value)
+{
+  switch (value)
+  {
+    case tflchef::DimensionType::DENSE:
+      return tflite::DimensionType_DENSE;
+    case tflchef::DimensionType::SPARSE_CSR:
+      return tflite::DimensionType_SPARSE_CSR;
+    default:
+      break;
+  }
+
+  throw std::runtime_error("Unknown dimension type");
+}
+
+tflite::SparseIndexVector as_tflite_sparse_idx_vec_type(const tflchef::SparseIndexVecType &value)
+{
+  switch (value)
+  {
+    case tflchef::SparseIndexVecType::SparseIdxVecType_NONE:
+      return tflite::SparseIndexVector_NONE;
+    case tflchef::SparseIndexVecType::INT32VEC:
+      return tflite::SparseIndexVector_Int32Vector;
+    case tflchef::SparseIndexVecType::UINT16VEC:
+      return tflite::SparseIndexVector_Uint16Vector;
+    case tflchef::SparseIndexVecType::UINT8VEC:
+      return tflite::SparseIndexVector_Uint8Vector;
+    default:
+      break;
+  }
+
+  throw std::runtime_error("Unknown SparseIndexVector type");
+}
+
+flatbuffers::Offset<void>
+as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb,
+                           const ::tflchef::TensorSparsity_IndexVec &value)
+{
+  auto sparse_idx_type = value.type();
+
+  switch (sparse_idx_type)
+  {
+    case tflchef::SparseIndexVecType::SparseIdxVecType_NONE:
+      return flatbuffers::Offset<void>();
+    case tflchef::SparseIndexVecType::INT32VEC:
+    {
+      auto values_vec_int32 = std::vector<int32_t>{value.dim().begin(), value.dim().end()};
+      auto values_int32 = fb.CreateVector(values_vec_int32);
+      return tflite::CreateInt32Vector(fb, values_int32).Union();
+    }
+    case tflchef::SparseIndexVecType::UINT16VEC:
+    {
+      auto values_vec_uint16 = std::vector<uint16_t>{value.dim().begin(), value.dim().end()};
+      auto values_uint16 = fb.CreateVector(values_vec_uint16);
+      return tflite::CreateUint16Vector(fb, values_uint16).Union();
+    }
+    case tflchef::SparseIndexVecType::UINT8VEC:
+    {
+      auto values_vec_uint8 = std::vector<uint8_t>{value.dim().begin(), value.dim().end()};
+      auto values_uint8 = fb.CreateVector(values_vec_uint8);
+      return tflite::CreateUint8Vector(fb, values_uint8).Union();
+    }
+    default:
+      break;
+  }
+
+  throw std::runtime_error("Unknown SparseIndexVector type");
+}
+
+// namespace sparsity code referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+template <typename T>
+FormatConverter<T>::FormatConverter(const std::vector<int> &shape,
+                                    const std::vector<int> &traversal_order,
+                                    const std::vector<TfLiteDimensionType> &format,
+                                    const std::vector<int> &block_size,
+                                    const std::vector<int> &block_map)
+  : dense_shape_(shape), traversal_order_(traversal_order), block_size_(block_size),
+    block_map_(block_map)
+{
+  dense_size_ = 1;
+  int block_dim = 0;
+  blocked_shape_.resize(shape.size());
+  format_.resize(shape.size() + block_map.size());
+  for (int i = 0; i < shape.size(); i++)
+  {
+    format_[i] = format[traversal_order[i]];
+    dense_size_ *= shape[i];
+    if (block_dim < block_map.size() && block_map[block_dim] == i)
+    {
+      blocked_shape_[i] = shape[i] / block_size[block_dim];
+      block_dim++;
+    }
+    else
+    {
+      blocked_shape_[i] = shape[i];
+    }
+  }
+
+  // Only dense blocks are supported.
+  for (int i = 0; i < block_map.size(); i++)
+  {
+    format_[i + shape.size()] = kTfLiteDimDense;
+  }
+}
+
+template <typename T> bool FormatConverter<T>::DenseToSparse(const T *src_data)
+{
+  int num_original_dims = dense_shape_.size();
+  int num_block_dims = block_map_.size();
+  int num_expanded_dims = num_original_dims + num_block_dims;
+  std::vector<int> expanded_shape(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; i++)
+  {
+    if (i < num_original_dims)
+    {
+      expanded_shape[i] = blocked_shape_[i];
+    }
+    else
+    {
+      expanded_shape[i] = block_size_[i - num_original_dims];
+    }
+  }
+
+  std::vector<int> shape_offset(num_original_dims);
+  shape_offset[shape_offset.size() - 1] = 1;
+  for (int i = num_original_dims - 1; i > 0; --i)
+  {
+    shape_offset[i - 1] = shape_offset[i] * dense_shape_[i];
+  }
+
+  std::vector<int> expanded_shape_offset(num_expanded_dims);
+  for (int i = 0; i < num_original_dims; ++i)
+  {
+    expanded_shape_offset[i] = shape_offset[i];
+  }
+  for (int i = 0; i < num_block_dims; ++i)
+  {
+    int mapped_dim = block_map_[i];
+    expanded_shape_offset[num_original_dims + i] = shape_offset[mapped_dim];
+    expanded_shape_offset[mapped_dim] *= block_size_[i];
+  }
+
+  std::vector<int> dst_ordered_offset(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; ++i)
+  {
+    dst_ordered_offset[i] = expanded_shape_offset[traversal_order_[i]];
+  }
+
+  std::vector<bool> dst_dim_has_nonzeroes(num_expanded_dims);
+  std::fill(dst_dim_has_nonzeroes.begin(), dst_dim_has_nonzeroes.end(), false);
+  std::vector<int> inner_compressed_dim(num_expanded_dims);
+  int most_recent_compressed_dim = -1;
+  std::vector<int> num_segments_of_next_compressed_dim(num_expanded_dims);
+  int segment_count = 1;
+  for (int i = num_expanded_dims - 1; i >= 0; --i)
+  {
+    inner_compressed_dim[i] = most_recent_compressed_dim;
+    if (format_[i] == kTfLiteDimSparseCSR)
+    {
+      most_recent_compressed_dim = i;
+      num_segments_of_next_compressed_dim[i] = segment_count;
+      segment_count = 1;
+    }
+    else
+    {
+      num_segments_of_next_compressed_dim[i] = -1;
+      segment_count *= expanded_shape[traversal_order_[i]];
+    }
+  }
+
+  dim_metadata_.resize(num_expanded_dims * 2);
+  std::vector<int> dst_sparse_dims;
+  dst_sparse_dims.reserve(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; ++i)
+  {
+    dim_metadata_[i * 2].clear();
+    dim_metadata_[i * 2 + 1].clear();
+    if (format_[i] == kTfLiteDimDense)
+    {
+      // If dimension is dense, just store the shape.
+      dim_metadata_[i * 2].push_back(expanded_shape[traversal_order_[i]]);
+    }
+    else
+    {
+      dim_metadata_[i * 2].push_back(0); // Segment array always begins with 0.
+      dst_sparse_dims.push_back(i);      // Add dimension to the sparse list.
+    }
+  }
+
+  // This algorithm assumes that the block size is small enough for all the
+  // elements to fit in cache, so the strided accesses from different traversal
+  // order and the write-first-erase-later strategy shouldn't be too slow
+  int dst_dim_idx = num_expanded_dims;
+  std::vector<int> coordinate(num_expanded_dims, 0);
+  int dense_tensor_idx = 0;
+  while (dst_dim_idx >= 0)
+  {
+    if (dst_dim_idx == num_expanded_dims)
+    {
+      // We have a complete coordinate. Add the element to the value array if it
+      // is not zero, or if the last dimension is dense.
+      if (!IsZero(src_data[dense_tensor_idx]))
+      {
+        data_.push_back(src_data[dense_tensor_idx]);
+        // Mark all sparse dimensions that their current indices have nonzeroes.
+        for (auto dst_dim : dst_sparse_dims)
+        {
+          if (!dst_dim_has_nonzeroes[dst_dim])
+          {
+            // Only add the index to the indices array if the current nonzero
+            // is the first nonzero of the block.
+            dim_metadata_[2 * dst_dim + 1].push_back(coordinate[dst_dim]);
+            dst_dim_has_nonzeroes[dst_dim] = true;
+          }
+        }
+      }
+      else if (format_[num_expanded_dims - 1] == kTfLiteDimDense)
+      {
+        data_.push_back(src_data[dense_tensor_idx]);
+      }
+      --dst_dim_idx;
+    }
+    else
+    {
+      int original_dim_idx = traversal_order_[dst_dim_idx];
+      int dim_size = expanded_shape[original_dim_idx];
+      if (dst_dim_has_nonzeroes[dst_dim_idx])
+      {
+        // If the previous block has nonzeroes, reset the flag to false since
+        // we have just moved to a new block.
+        dst_dim_has_nonzeroes[dst_dim_idx] = false;
+      }
+      else if (format_[dst_dim_idx] == kTfLiteDimSparseCSR)
+      {
+        // This block is empty. Delete unnecessary values if compressed.
+        int next_compressed_dim = inner_compressed_dim[dst_dim_idx];
+        int erase_offset = dim_metadata_[2 * dst_dim_idx + 1].size() *
+                           num_segments_of_next_compressed_dim[dst_dim_idx];
+        if (next_compressed_dim >= 0)
+        {
+          auto &segments = dim_metadata_[2 * inner_compressed_dim[dst_dim_idx]];
+          segments.erase(segments.begin() + 1 + erase_offset, segments.end());
+        }
+        else
+        {
+          data_.erase(data_.begin() + erase_offset, data_.end());
+        }
+      }
+      if (++coordinate[dst_dim_idx] < dim_size)
+      {
+        // The current dst_dim_idx is valid (not out of bound).
+        dense_tensor_idx += dst_ordered_offset[dst_dim_idx];
+        ++dst_dim_idx;
+      }
+      else
+      {
+        // dst_dim_idx has reached its dim size. Update segment array and go
+        // back to incrementing the previous dimension (dst_dim_idx - 1).
+        if (format_[dst_dim_idx] == kTfLiteDimSparseCSR)
+        {
+          dim_metadata_[2 * dst_dim_idx].push_back(dim_metadata_[2 * dst_dim_idx + 1].size());
+        }
+        coordinate[dst_dim_idx] = -1;
+        dense_tensor_idx -= dst_ordered_offset[dst_dim_idx] * dim_size;
+        --dst_dim_idx;
+      }
+    }
+  }
+
+  return true;
+}
+
+template <typename T> bool FormatConverter<T>::IsZero(const T val)
+{
+  return (val == static_cast<T>(0));
+}
+
+template class FormatConverter<float>;
+template class FormatConverter<uint16_t>; // float16
+
+} // namespace sparsity
diff --git a/compiler/tflchef/core/src/Convert.h b/compiler/tflchef/core/src/Convert.h
index b56e6ef69..6e910ea2c 100644
--- a/compiler/tflchef/core/src/Convert.h
+++ b/compiler/tflchef/core/src/Convert.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,5 +29,58 @@ tflite::Padding as_tflite_padding(const tflchef::Padding &value);
 tflite::ActivationFunctionType as_tflite_activation(const tflchef::Activation &value);
 tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value);
 tflite::MirrorPadMode as_tflite_mirrorpadmode(const tflchef::MirrorPadMode &value);
+tflite::DimensionType as_tflite_dimensiontype(const tflchef::DimensionType &value);
+tflite::SparseIndexVector as_tflite_sparse_idx_vec_type(const tflchef::SparseIndexVecType &value);
+flatbuffers::Offset<void>
+as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb,
+                           const ::tflchef::TensorSparsity_IndexVec &value);
+
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+// Storage format of each dimension in a sparse tensor.
+typedef enum TfLiteDimensionType
+{
+  kTfLiteDimDense = 0,
+  kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+template <typename T> class FormatConverter
+{
+public:
+  FormatConverter(const std::vector<int32_t> &shape, const std::vector<int32_t> &traversal_order,
+                  const std::vector<TfLiteDimensionType> &format,
+                  const std::vector<int32_t> &block_size = {},
+                  const std::vector<int32_t> &block_map = {});
+
+  bool DenseToSparse(const T *src_data);
+
+  const std::vector<T> &GetData() { return data_; }
+  const std::vector<std::vector<int32_t>> &GetDimMetadata() { return dim_metadata_; }
+
+private:
+  bool IsZero(const T val);
+
+private:
+  std::vector<int32_t> dense_shape_;
+  std::vector<int32_t> blocked_shape_;
+  size_t dense_size_;
+  std::vector<int32_t> traversal_order_;
+  std::vector<TfLiteDimensionType> format_;
+  std::vector<int32_t> block_size_;
+  std::vector<int32_t> block_map_;
+  std::vector<std::vector<int32_t>> dim_metadata_;
+  std::vector<T> data_;
+};
+
+extern template class FormatConverter<float>;
+extern template class FormatConverter<uint16_t>; // float16
+
+} // namespace sparsity
 
 #endif // __CONVERT_H__
diff --git a/compiler/tflchef/core/src/CustomOp/AddV2.cpp b/compiler/tflchef/core/src/CustomOp/AddV2.cpp
index dffd336cd..557c20bce 100644
--- a/compiler/tflchef/core/src/CustomOp/AddV2.cpp
+++ b/compiler/tflchef/core/src/CustomOp/AddV2.cpp
@@ -17,7 +17,7 @@
 
 #include "AddV2.h"
 
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
 
 flatbuffers::Offset<void> AddV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
 {
diff --git a/compiler/tflchef/core/src/CustomOp/All.cpp b/compiler/tflchef/core/src/CustomOp/All.cpp
index b3ae821a4..bbef5ecaa 100644
--- a/compiler/tflchef/core/src/CustomOp/All.cpp
+++ b/compiler/tflchef/core/src/CustomOp/All.cpp
@@ -17,7 +17,7 @@
 
 #include "All.h"
 
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
 
 flatbuffers::Offset<void> AllChef::value(flatbuffers::FlatBufferBuilder &fbb) const
 {
diff --git a/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp b/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp
index 595f3b9bb..6d2c5b13b 100644
--- a/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp
+++ b/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp
@@ -17,7 +17,7 @@
 
 #include "BatchMatMulV2.h"
 
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
 
 flatbuffers::Offset<void> BatchMatMulV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
 {
diff --git a/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp
new file mode 100644
index 000000000..dd458b376
--- /dev/null
+++ b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BroadcastTo.h"
+
+#include <flatbuffers/flexbuffers.h>
+
+flatbuffers::Offset<void> BroadcastToChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  return flatbuffers::Offset<void>();
+}
+
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+BroadcastToChef::custom_value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+
+  assert(operation.type() == "BroadcastTo");
+
+  /**
+   * REGISTER_OP("BroadcastTo")
+    .Input("input: T")
+    .Input("shape: Tidx")
+    .Output("output: T")
+    .Attr("T: type")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c)
+   */
+
+  auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+  size_t map_start = flex_buffers->StartMap();
+
+  // TODO Support more data types
+  flex_buffers->Int("T", tflite::TensorType_FLOAT32);
+  flex_buffers->Int("Tidx", tflite::TensorType_INT32);
+
+  flex_buffers->EndMap(map_start);
+  flex_buffers->Finish();
+
+  auto circle_custom_options = fbb.CreateVector(flex_buffers->GetBuffer());
+  return circle_custom_options;
+}
+
+std::unique_ptr<OpChef> BroadcastToChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new BroadcastToChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/CustomOp/BroadcastTo.h b/compiler/tflchef/core/src/CustomOp/BroadcastTo.h
new file mode 100644
index 000000000..3ed71c511
--- /dev/null
+++ b/compiler/tflchef/core/src/CustomOp/BroadcastTo.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_BROADCASTTO_H__
+#define __OP_BROADCASTTO_H__
+
+#include "OpChef.h"
+
+class BroadcastToChef final : public OpChef
+{
+public:
+  explicit BroadcastToChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_CUSTOM; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+  custom_value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct BroadcastToChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_BROADCASTTO_H__
diff --git a/compiler/tflchef/core/src/CustomOp/Erf.cpp b/compiler/tflchef/core/src/CustomOp/Erf.cpp
new file mode 100644
index 000000000..f611b68e1
--- /dev/null
+++ b/compiler/tflchef/core/src/CustomOp/Erf.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Erf.h"
+
+#include <flatbuffers/flexbuffers.h>
+
+flatbuffers::Offset<void> ErfChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  return flatbuffers::Offset<void>();
+}
+
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+ErfChef::custom_value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+
+  assert(operation.type() == "Erf");
+
+  /**
+   * REGISTER_OP("Erf")
+      .Input("x: T")
+      .Output("y: T")
+      .Attr("T: {bfloat16, half, float, double}")
+      .SetShapeFn(shape_inference::UnchangedShape)
+   */
+
+  auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+  size_t map_start = flex_buffers->StartMap();
+
+  // TODO Support more data types
+  flex_buffers->Int("T", tflite::TensorType_FLOAT32);
+
+  flex_buffers->EndMap(map_start);
+  flex_buffers->Finish();
+
+  auto circle_custom_options = fbb.CreateVector(flex_buffers->GetBuffer());
+  return circle_custom_options;
+}
+
+std::unique_ptr<OpChef> ErfChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new ErfChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/CustomOp/Erf.h b/compiler/tflchef/core/src/CustomOp/Erf.h
new file mode 100644
index 000000000..192c5f334
--- /dev/null
+++ b/compiler/tflchef/core/src/CustomOp/Erf.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_ERF_H__
+#define __OP_ERF_H__
+
+#include "OpChef.h"
+
+class ErfChef final : public OpChef
+{
+public:
+  explicit ErfChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_CUSTOM; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+  custom_value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct ErfChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_ERF_H__
diff --git a/compiler/tflchef/core/src/CustomOp/MatMul.cpp b/compiler/tflchef/core/src/CustomOp/MatMul.cpp
index ba34aa8db..e7c707d37 100644
--- a/compiler/tflchef/core/src/CustomOp/MatMul.cpp
+++ b/compiler/tflchef/core/src/CustomOp/MatMul.cpp
@@ -17,7 +17,7 @@
 
 #include "MatMul.h"
 
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
 
 flatbuffers::Offset<void> MatMulChef::value(flatbuffers::FlatBufferBuilder &fbb) const
 {
diff --git a/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp b/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp
index d12597edb..b25003227 100644
--- a/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp
+++ b/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp
@@ -17,7 +17,7 @@
 
 #include "MatrixBandPart.h"
 
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
 
 flatbuffers::Offset<void> MatrixBandPartChef::value(flatbuffers::FlatBufferBuilder &fbb) const
 {
diff --git a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp
new file mode 100644
index 000000000..290d3c2ca
--- /dev/null
+++ b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPoolWithArgmax.h"
+
+#include <flatbuffers/flexbuffers.h>
+
+flatbuffers::Offset<void> MaxPoolWithArgmaxChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  return flatbuffers::Offset<void>();
+}
+
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+MaxPoolWithArgmaxChef::custom_value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+
+  assert(operation.type() == "MaxPoolWithArgmax");
+
+  /**
+   * REGISTER_OP("MaxPoolWithArgmax")
+    .Attr("ksize: list(int) >= 4")
+    .Attr("strides: list(int) >= 4")
+    .Attr("Targmax: {int32, int64} = DT_INT64")
+    .Attr(GetPaddingAttrString())
+    .Attr("include_batch_in_index: bool = false")
+    .Input("input: T")
+    .Output("output: T")
+    .Output("argmax: Targmax")
+    .Attr("T: realnumbertype")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
+      c->set_output(1, c->output(0));
+      return Status::OK();
+    });
+   */
+
+  auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+  size_t map_start = flex_buffers->StartMap();
+
+  auto start = flex_buffers->StartVector("ksize");
+  flex_buffers->Add(1);
+  flex_buffers->Add(operation.max_pool_with_argmax_options().filter_width());
+  flex_buffers->Add(operation.max_pool_with_argmax_options().filter_height());
+  flex_buffers->Add(1);
+  flex_buffers->EndVector(start, /*typed=*/true, /*fixed=*/false);
+  start = flex_buffers->StartVector("strides");
+  flex_buffers->Add(1);
+  flex_buffers->Add(operation.max_pool_with_argmax_options().stride_w());
+  flex_buffers->Add(operation.max_pool_with_argmax_options().stride_h());
+  flex_buffers->Add(1);
+  flex_buffers->EndVector(start, /*typed=*/true, /*fixed=*/false);
+  auto output_type = operation.max_pool_with_argmax_options().output_type();
+  assert(output_type == tflchef::INT64 || output_type == tflchef::INT32);
+  flex_buffers->Int("Targmax", output_type);
+  std::string padding = operation.max_pool_with_argmax_options().padding() ? "VALID" : "SAME";
+  flex_buffers->String("padding", padding);
+  flex_buffers->Bool("include_batch_in_index",
+                     operation.max_pool_with_argmax_options().include_batch_in_index());
+  flex_buffers->Int("T", tflchef::FLOAT32);
+  flex_buffers->EndMap(map_start);
+  flex_buffers->Finish();
+
+  auto circle_custom_options = fbb.CreateVector(flex_buffers->GetBuffer());
+  return circle_custom_options;
+}
+
+std::unique_ptr<OpChef>
+MaxPoolWithArgmaxChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new MaxPoolWithArgmaxChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.h b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.h
new file mode 100644
index 000000000..338ee8026
--- /dev/null
+++ b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_MAXPOOLWITHARGMAX_H__
+#define __OP_MAXPOOLWITHARGMAX_H__
+
+#include "OpChef.h"
+
+class MaxPoolWithArgmaxChef final : public OpChef
+{
+public:
+  explicit MaxPoolWithArgmaxChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_CUSTOM; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+  custom_value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct MaxPoolWithArgmaxChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_MAXPOOLWITHARGMAX_H__
diff --git a/compiler/tflchef/core/src/DataChef.def b/compiler/tflchef/core/src/DataChef.def
new file mode 100644
index 000000000..abe642645
--- /dev/null
+++ b/compiler/tflchef/core/src/DataChef.def
@@ -0,0 +1,30 @@
+#ifndef DATA_CHEF
+#error "Define DATA_CHEF first"
+#endif // DATA_CHEF
+
+// DATA_CHEF(TYPE, NAME, FACTORY_CLASS)
+//  "TYPE" SHOULD BE an enum tag of tflchef::TensorType
+DATA_CHEF(FLOAT32, constant, ConstantDataChefFactory<float>)
+DATA_CHEF(BOOL, constant, ConstantDataChefFactory<bool>)
+DATA_CHEF(UINT8, constant, ConstantDataChefFactory<uint8_t>)
+DATA_CHEF(INT8, constant, ConstantDataChefFactory<int8_t>)
+DATA_CHEF(INT16, constant, ConstantDataChefFactory<int16_t>)
+DATA_CHEF(INT32, constant, ConstantDataChefFactory<int32_t>)
+DATA_CHEF(INT64, constant, ConstantDataChefFactory<int64_t>)
+DATA_CHEF(INT64, explicit, ExplicitDataChefFactory<int64_t>)
+DATA_CHEF(INT32, explicit, ExplicitDataChefFactory<int32_t>)
+DATA_CHEF(INT16, explicit, ExplicitDataChefFactory<int16_t>)
+DATA_CHEF(INT8, explicit, ExplicitDataChefFactory<int8_t>)
+DATA_CHEF(UINT8, explicit, ExplicitDataChefFactory<uint8_t>)
+DATA_CHEF(BOOL, explicit, ExplicitDataChefFactory<bool>)
+DATA_CHEF(FLOAT32, explicit, ExplicitDataChefFactory<float>)
+DATA_CHEF(STRING, explicit, ExplicitDataChefFactory<std::string>)
+DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory)
+DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory)
+DATA_CHEF(INT16, gaussian, GaussianInt16DataChefFactory)
+DATA_CHEF(INT8, gaussian, GaussianInt8DataChefFactory)
+DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory)
+
+// FLOAT16 support for only gaussian, explicit for now
+DATA_CHEF(FLOAT16, explicit, ExplicitFloat16DataChefFactory)
+DATA_CHEF(FLOAT16, gaussian, GaussianFloat16DataChefFactory)
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index a4b435dfa..3afcd232d 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -51,7 +51,7 @@ class GeneratedModelImpl final : public tflchef::GeneratedModel::Impl
 {
 public:
   GeneratedModelImpl(std::unique_ptr<flatbuffers::FlatBufferBuilder> &&builder)
-      : _builder{std::move(builder)}
+    : _builder{std::move(builder)}
   {
     // DO NOTHING
   }
@@ -89,7 +89,11 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
   static DataChefRegistry s64;
   static DataChefRegistry fp32;
   static DataChefRegistry u8;
+  static DataChefRegistry string;
   static DataChefRegistry boolean;
+  static DataChefRegistry s16;
+  static DataChefRegistry fp16;
+  static DataChefRegistry s8;
 
   switch (type)
   {
@@ -99,10 +103,18 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
       return s64;
     case tflchef::FLOAT32:
       return fp32;
+    case tflchef::FLOAT16:
+      return fp16;
     case tflchef::UINT8:
       return u8;
+    case tflchef::STRING:
+      return string;
     case tflchef::BOOL:
       return boolean;
+    case tflchef::INT16:
+      return s16;
+    case tflchef::INT8:
+      return s8;
     default:
       break;
   }
@@ -197,10 +209,46 @@ struct CookParams
   std::vector<flatbuffers::Offset<::tflite::SubGraph>> &subgraph_vec;
   std::unique_ptr<flatbuffers::FlatBufferBuilder> &flatbuffer_builder;
   std::map<tflite::BuiltinOperator, int32_t> &builtin_code_map;
+  std::vector<std::string> &custom_code_vec;
   std::string noname;
 };
 
-template <typename T> void cook_graph(const T &graph, CookParams &cp)
+std::vector<flatbuffers::Offset<tflite::DimensionMetadata>>
+make_dim_metadata_vec(flatbuffers::FlatBufferBuilder *flatbuffer_builder, int32_t dims_count,
+                      const std::vector<int> &traversal_order_vec,
+                      const std::vector<sparsity::TfLiteDimensionType> &format_vec,
+                      const std::vector<std::vector<int32_t>> &dim_metadata_src)
+{
+  // Build sparsity parameter.
+  std::vector<flatbuffers::Offset<tflite::DimensionMetadata>> dim_metadata_vec(dims_count);
+  for (int32_t i = 0; i < dims_count; i++)
+  {
+    const int32_t metadata_idx = 2 * i;
+    if (format_vec[traversal_order_vec[i]] == sparsity::kTfLiteDimSparseCSR)
+    {
+      auto array_segments =
+        tflite::CreateInt32Vector(*flatbuffer_builder,
+                                  flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx]))
+          .Union();
+      auto array_indices =
+        tflite::CreateInt32Vector(
+          *flatbuffer_builder, flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx + 1]))
+          .Union();
+      dim_metadata_vec[i] =
+        tflite::CreateDimensionMetadata(*flatbuffer_builder, tflite::DimensionType_SPARSE_CSR, 0,
+                                        tflite::SparseIndexVector_Int32Vector, array_segments,
+                                        tflite::SparseIndexVector_Int32Vector, array_indices);
+    }
+    else
+    {
+      dim_metadata_vec[i] = tflite::CreateDimensionMetadata(
+        *flatbuffer_builder, tflite::DimensionType_DENSE, dim_metadata_src[metadata_idx][0]);
+    }
+  }
+  return dim_metadata_vec;
+}
+
+template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, CookParams &cp)
 {
   LOGGER(l);
 
@@ -209,6 +257,7 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
   std::vector<flatbuffers::Offset<::tflite::SubGraph>> &subgraph_vec = cp.subgraph_vec;
   std::unique_ptr<flatbuffers::FlatBufferBuilder> &flatbuffer_builder = cp.flatbuffer_builder;
   std::map<tflite::BuiltinOperator, int32_t> &builtin_code_map = cp.builtin_code_map;
+  std::vector<std::string> &custom_code_vec = cp.custom_code_vec;
 
   // Operand-related
   std::vector<flatbuffers::Offset<::tflite::Tensor>> tensor_vec;
@@ -263,6 +312,8 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
 
     assert(operand.has_type());
 
+    flatbuffers::Offset<tflite::SparsityParameters> sparsity_index;
+
     flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape;
     std::vector<int32_t> dims;
     if (operand.has_shape())
@@ -290,16 +341,125 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
       // Create Data
       int32_t count = (element_count(dims) > 0) ? element_count(dims) : filler.arg_size();
       auto data_vec = chef->generate(count);
-      auto data = flatbuffer_builder->CreateVector(data_vec);
 
-      // Create Buffer
-      tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
-      buffer_builder.add_data(data);
-      auto buffer = buffer_builder.Finish();
+      if (operand.has_make_sparse() && operand.make_sparse())
+      {
+        assert(not operand.has_sparsity());
+        assert(operand.has_shape());
+
+        const int32_t dims_count = dims.size();
+        std::vector<int> traversal_order_vec;
+        std::vector<sparsity::TfLiteDimensionType> format_vec;
+        for (int32_t o = 0; o < dims_count; ++o)
+          traversal_order_vec.push_back(o);
+        for (int32_t o = 0; o < dims_count - 1; ++o)
+          format_vec.push_back(sparsity::kTfLiteDimDense);
+        format_vec.push_back(sparsity::kTfLiteDimSparseCSR);
+
+        if (operand.type() == tflchef::FLOAT32)
+        {
+          ::sparsity::FormatConverter<float> converter(dims, traversal_order_vec, format_vec);
+          converter.DenseToSparse(reinterpret_cast<const float *>(data_vec.data()));
+          const auto &sparse_data = converter.GetData();
+
+          std::vector<uint8_t> sparse_uint8;
+          for (int c = 0; c < sparse_data.size(); ++c)
+          {
+            const float value = sparse_data.at(c);
+            const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value);
+            for (uint32_t b = 0; b < sizeof(float); ++b)
+            {
+              sparse_uint8.emplace_back(arr[b]);
+            }
+          }
+          auto data = flatbuffer_builder->CreateVector(sparse_uint8);
+
+          // Create Buffer
+          tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+          buffer_builder.add_data(data);
+          auto buffer = buffer_builder.Finish();
+
+          // Update Buffer Index & Vector
+          buffer_index = buffer_vec.size();
+          buffer_vec.emplace_back(buffer);
+
+          // save SparsityParameters
+          auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec);
+
+          // Create block map
+          std::vector<int> block_map_vec{};
+          auto block_map = flatbuffer_builder->CreateVector(block_map_vec);
+
+          // Create dimension metadata
+          const auto &dim_metadata_src = converter.GetDimMetadata();
+          auto dim_metadata_vec =
+            make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec,
+                                  format_vec, dim_metadata_src);
+          auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec);
+          sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order,
+                                                            block_map, dim_metadata);
+        }
+        else if (operand.type() == tflchef::FLOAT16)
+        {
+          ::sparsity::FormatConverter<uint16_t> converter(dims, traversal_order_vec, format_vec);
+          converter.DenseToSparse(reinterpret_cast<const uint16_t *>(data_vec.data()));
+          const auto &sparse_data = converter.GetData();
 
-      // Update Buffer Index & Vector
-      buffer_index = buffer_vec.size();
-      buffer_vec.emplace_back(buffer);
+          std::vector<uint8_t> sparse_uint8;
+          for (int c = 0; c < sparse_data.size(); ++c)
+          {
+            const uint16_t value = sparse_data.at(c);
+            const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value);
+            for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
+            {
+              sparse_uint8.emplace_back(arr[b]);
+            }
+          }
+          auto data = flatbuffer_builder->CreateVector(sparse_uint8);
+
+          // Create Buffer
+          tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+          buffer_builder.add_data(data);
+          auto buffer = buffer_builder.Finish();
+
+          // Update Buffer Index & Vector
+          buffer_index = buffer_vec.size();
+          buffer_vec.emplace_back(buffer);
+
+          // save SparsityParameters
+          auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec);
+
+          // Create block map
+          std::vector<int> block_map_vec{};
+          auto block_map = flatbuffer_builder->CreateVector(block_map_vec);
+
+          // Create dimension metadata
+          const auto &dim_metadata_src = converter.GetDimMetadata();
+          auto dim_metadata_vec =
+            make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec,
+                                  format_vec, dim_metadata_src);
+          auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec);
+          sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order,
+                                                            block_map, dim_metadata);
+        }
+        else
+        {
+          throw std::runtime_error{"NYI: unsupported operand type"};
+        }
+      }
+      else
+      {
+        auto data = flatbuffer_builder->CreateVector(data_vec);
+
+        // Create Buffer
+        tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+        buffer_builder.add_data(data);
+        auto buffer = buffer_builder.Finish();
+
+        // Update Buffer Index & Vector
+        buffer_index = buffer_vec.size();
+        buffer_vec.emplace_back(buffer);
+      }
     }
     else
     {
@@ -376,6 +536,58 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
       quant_index = quant_builder.Finish();
     }
 
+    if (operand.has_sparsity())
+    {
+      const auto &sparsity = operand.sparsity();
+
+      // Create traversal order
+      std::vector<int> traversal_order_vec{sparsity.traversal_order().dim().begin(),
+                                           sparsity.traversal_order().dim().end()};
+      auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec);
+
+      // Create block map
+      std::vector<int> block_map_vec{sparsity.block_map().dim().begin(),
+                                     sparsity.block_map().dim().end()};
+      auto block_map = flatbuffer_builder->CreateVector(block_map_vec);
+
+      // Create dimension metadata
+      std::vector<flatbuffers::Offset<tflite::DimensionMetadata>> dim_metadata_vec;
+      auto recipe_dim_metadata = sparsity.dim_metadata();
+      for (const auto &dm : recipe_dim_metadata)
+      {
+        // Create array segments
+        auto tflite_array_segments =
+          as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_segments());
+
+        // Create array indices
+        auto tflite_array_indices =
+          as_tflite_sparse_index_vec(*flatbuffer_builder, dm.array_indices());
+
+        auto tflite_dim_metadata_builder = tflite::DimensionMetadataBuilder{*flatbuffer_builder};
+        tflite_dim_metadata_builder.add_format(as_tflite_dimensiontype(dm.format()));
+        tflite_dim_metadata_builder.add_dense_size(dm.dense_size());
+        tflite_dim_metadata_builder.add_array_segments(tflite_array_segments);
+        tflite_dim_metadata_builder.add_array_segments_type(
+          as_tflite_sparse_idx_vec_type(dm.array_segments().type()));
+        tflite_dim_metadata_builder.add_array_indices(tflite_array_indices);
+        tflite_dim_metadata_builder.add_array_indices_type(
+          as_tflite_sparse_idx_vec_type(dm.array_indices().type()));
+        auto tflite_dim_metadata = tflite_dim_metadata_builder.Finish();
+        dim_metadata_vec.emplace_back(tflite_dim_metadata);
+      }
+      auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec);
+
+      sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order,
+                                                        block_map, dim_metadata);
+    }
+
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature;
+    if (operand.has_shape_signature())
+    {
+      auto signature = as_dims(operand.shape_signature());
+      shape_signature = flatbuffer_builder->CreateVector(signature);
+    }
+
     // Create Tensor
     tflite::TensorBuilder tensor_builder{*flatbuffer_builder};
 
@@ -383,8 +595,12 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
     tensor_builder.add_type(as_tflite_tensortype(operand.type()));
     tensor_builder.add_buffer(buffer_index);
     tensor_builder.add_name(name);
+    tensor_builder.add_is_variable(operand.is_variable());
     if (operand.has_quant())
       tensor_builder.add_quantization(quant_index);
+    tensor_builder.add_sparsity(sparsity_index);
+    if (operand.has_shape_signature())
+      tensor_builder.add_shape_signature(shape_signature);
 
     // Append!
     tensor_vec.emplace_back(tensor_builder.Finish());
@@ -422,11 +638,23 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
     // Create Operator
     tflite::OperatorBuilder op_builder{*flatbuffer_builder};
 
-    // Get operator code index from builtin_code_set with assumption, order of
-    // builtin_code_set is same as that of code_vec
+    // Note that opcode_index is an index into the operator_codes vector.
+    // operator_codes consists of buildtin_code and custom_code, which is inserted sequentially.
+    uint32_t opcode_index = 0;
     auto op_it = builtin_code_map.find(op_chef->code());
-    assert(op_it != builtin_code_map.end());
-    uint32_t opcode_index = std::distance(builtin_code_map.begin(), op_it);
+    // builtin operator
+    if (op_it != builtin_code_map.end())
+    {
+      opcode_index = std::distance(builtin_code_map.begin(), op_it);
+    }
+    // custom operator
+    else
+    {
+      auto op_it = std::find(custom_code_vec.begin(), custom_code_vec.end(), operation.type());
+      assert(op_it != custom_code_vec.end());
+      opcode_index = builtin_code_map.size();
+      opcode_index += std::distance(custom_code_vec.begin(), op_it);
+    }
 
     op_builder.add_opcode_index(opcode_index);
     op_builder.add_inputs(inputs);
@@ -459,6 +687,8 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
   subgraph_builder.add_name(name);
 
   subgraph_vec.emplace_back(subgraph_builder.Finish());
+
+  return symbol_table;
 }
 
 } // namespace
@@ -480,15 +710,15 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
 // Initialize Data Chef Registry
 #define DATA_CHEF(TYPE, NAME, FACTORY_CLASS) \
   data_chef_registry(::tflchef::TYPE)        \
-      .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
-#include <souschef/DataChef.def>
+    .add(#NAME, std::unique_ptr<FACTORY_CLASS>(new FACTORY_CLASS()));
+#include "DataChef.def"
 #undef DATA_CHEF
 
   //
   // Create FlatBufferBuilder
   //
   auto flatbuffer_builder =
-      std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
+    std::unique_ptr<flatbuffers::FlatBufferBuilder>(new flatbuffers::FlatBufferBuilder(1024));
 
   // Operand-related
   std::vector<flatbuffers::Offset<::tflite::Buffer>> buffer_vec;
@@ -496,6 +726,9 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
   // Operation-related
   std::vector<flatbuffers::Offset<::tflite::OperatorCode>> code_vec;
 
+  // SignatureDef-related
+  std::vector<flatbuffers::Offset<::tflite::SignatureDef>> signdef_vec;
+
   // Graphs-related
   std::vector<flatbuffers::Offset<::tflite::SubGraph>> subgraph_vec;
 
@@ -504,8 +737,21 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
   for (auto const &opcode : builtin_code_map)
   {
     tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
-    code_builder.add_builtin_code(opcode.first);
+    // 127 is BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
+    // This is the way to handle deprecated builtin code
+    // See
+    // https://github.com/tensorflow/tensorflow/blob/a0afe8f9218be5eb9ed5dffc2dff652996da8c28/tensorflow/lite/schema/schema.fbs#L1061-L1077
+    if (opcode.first < 127)
+    {
+      code_builder.add_deprecated_builtin_code(opcode.first);
+    }
+    else
+    {
+      code_builder.add_deprecated_builtin_code(
+        ::tflite::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES);
+    }
     code_builder.add_version(opcode.second);
+    code_builder.add_builtin_code(opcode.first);
     auto code = code_builder.Finish();
     // Update OperatorCode vector
     code_vec.emplace_back(code);
@@ -513,16 +759,15 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
 
   // Create OperatorCode with Custom Operator
   std::set<std::string> custom_code_set = gather_customcode_set(model_recipe);
-  if (custom_code_set.size() &&
-      builtin_code_map.find(tflite::BuiltinOperator_CUSTOM) == builtin_code_map.end())
-    builtin_code_map[tflite::BuiltinOperator_CUSTOM] = 1;
+  std::vector<std::string> custom_code_vec{custom_code_set.begin(), custom_code_set.end()};
 
-  for (auto opcode : custom_code_set)
+  for (auto opcode : custom_code_vec)
   {
     auto custom_code = flatbuffer_builder->CreateString(opcode);
     tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
-    code_builder.add_builtin_code(tflite::BuiltinOperator_CUSTOM);
+    code_builder.add_deprecated_builtin_code(tflite::BuiltinOperator_CUSTOM);
     code_builder.add_custom_code(custom_code);
+    code_builder.add_builtin_code(tflite::BuiltinOperator_CUSTOM);
     auto code = code_builder.Finish();
     // Update OperatorCode vector
     code_vec.emplace_back(code);
@@ -537,12 +782,18 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
     buffer_vec.emplace_back(buffer_builder.Finish());
   }
 
+  // symbol_tables stores symbol_table of each sub graph
+  // this is used to find tensor ID(index) with tensor name
+  std::vector<std::map<std::string, int32_t>> symbol_tables;
+
   //
   // Create Main graph
   //
-  CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder, builtin_code_map, "main"};
+  CookParams cp{buffer_vec,       code_vec,        subgraph_vec, flatbuffer_builder,
+                builtin_code_map, custom_code_vec, "main"};
 
-  cook_graph<::tflchef::ModelRecipe>(model_recipe, cp);
+  auto table = cook_graph<::tflchef::ModelRecipe>(model_recipe, cp);
+  symbol_tables.push_back(table);
 
   //
   // Create subgraphs if exist
@@ -554,14 +805,98 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
     std::ostringstream stringStream;
     stringStream << "sub_" << (g + 1);
 
-    CookParams cp{buffer_vec,         code_vec,         subgraph_vec,
-                  flatbuffer_builder, builtin_code_map, stringStream.str()};
+    CookParams cp{buffer_vec,       code_vec,        subgraph_vec,      flatbuffer_builder,
+                  builtin_code_map, custom_code_vec, stringStream.str()};
+
+    auto table = cook_graph<::tflchef::Graph>(graph, cp);
+    symbol_tables.push_back(table);
+  }
+
+  // Create Signature-Def
+  //
+  for (int s = 0; s < model_recipe.signature_def_size(); ++s)
+  {
+    // load from recipe
+    const auto &rec_signature_def = model_recipe.signature_def(s);
+
+    std::vector<flatbuffers::Offset<::tflite::TensorMap>> tensormap_inputs;
+    std::vector<flatbuffers::Offset<::tflite::TensorMap>> tensormap_outputs;
+
+    // which subgraph index to cook
+    auto subgraph_index = 0;
+    if (rec_signature_def.has_subgraph_index())
+    {
+      subgraph_index = rec_signature_def.subgraph_index();
+    }
+    assert(subgraph_index < symbol_tables.size());
+    auto &symbol_table = symbol_tables[subgraph_index];
+
+    // cook for inputs
+    for (int si = 0; si < rec_signature_def.inputs_size(); ++si)
+    {
+      // recipe for input TensorMap
+      auto rec_tm_input = rec_signature_def.inputs(si);
+      auto name = flatbuffer_builder->CreateString(rec_tm_input.name());
+      uint32_t tensor_index = 0;
+      // either tensor or tensor_index should exist
+      assert(rec_tm_input.has_tensor() || rec_tm_input.has_tensor_index());
+      if (rec_tm_input.has_tensor())
+      {
+        // we can get tensor_index from symbol_table
+        auto tensor = rec_tm_input.tensor();
+        tensor_index = symbol_table[tensor];
+      }
+      else
+      {
+        // or we can use tensor_index itself
+        tensor_index = rec_tm_input.tensor_index();
+      }
+
+      ::tflite::TensorMapBuilder tensormap_builder{*flatbuffer_builder};
+      tensormap_builder.add_name(name);
+      tensormap_builder.add_tensor_index(tensor_index);
+      tensormap_inputs.push_back(tensormap_builder.Finish());
+    }
+    // cook for outputs, same as inputs
+    for (int so = 0; so < rec_signature_def.outputs_size(); ++so)
+    {
+      auto rec_tm_output = rec_signature_def.outputs(so);
+      auto name = flatbuffer_builder->CreateString(rec_tm_output.name());
+      uint32_t tensor_index = 0;
+      assert(rec_tm_output.has_tensor() || rec_tm_output.has_tensor_index());
+      if (rec_tm_output.has_tensor())
+      {
+        auto tensor = rec_tm_output.tensor();
+        tensor_index = symbol_table[tensor];
+      }
+      else
+      {
+        tensor_index = rec_tm_output.tensor_index();
+      }
+
+      ::tflite::TensorMapBuilder tensormap_builder{*flatbuffer_builder};
+      tensormap_builder.add_name(name);
+      tensormap_builder.add_tensor_index(tensor_index);
+      tensormap_outputs.push_back(tensormap_builder.Finish());
+    }
+
+    auto inputs = flatbuffer_builder->CreateVector(tensormap_inputs);
+    auto outputs = flatbuffer_builder->CreateVector(tensormap_outputs);
+    auto signature_key = flatbuffer_builder->CreateString(rec_signature_def.signature_key());
+    // TODO add validation for signature_key
+
+    ::tflite::SignatureDefBuilder signature_def_builder{*flatbuffer_builder};
+    signature_def_builder.add_inputs(inputs);
+    signature_def_builder.add_outputs(outputs);
+    signature_def_builder.add_signature_key(signature_key);
+    signature_def_builder.add_subgraph_index(rec_signature_def.subgraph_index());
 
-    cook_graph<::tflchef::Graph>(graph, cp);
+    signdef_vec.emplace_back(signature_def_builder.Finish());
   }
 
   // Create "Model" arguments
   auto buffers = flatbuffer_builder->CreateVector(buffer_vec);
+  auto signdefs = flatbuffer_builder->CreateVector(signdef_vec);
   auto operator_codes = flatbuffer_builder->CreateVector(code_vec);
   auto subgraphs = flatbuffer_builder->CreateVector(subgraph_vec);
   auto description = flatbuffer_builder->CreateString("Generated by tflchef");
@@ -571,6 +906,7 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
 
   model_builder.add_version(3);
   model_builder.add_operator_codes(operator_codes);
+  model_builder.add_signature_defs(signdefs);
   model_builder.add_subgraphs(subgraphs);
   model_builder.add_description(description);
   model_builder.add_buffers(buffers);
@@ -582,7 +918,7 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
 
   // Return "GenerateModel"
   return GeneratedModel{
-      std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
+    std::unique_ptr<GeneratedModelImpl>(new GeneratedModelImpl(std::move(flatbuffer_builder)))};
 }
 
 } // namespace tflchef
diff --git a/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.cpp b/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..1bf2264ab
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTM.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void>
+BidirectionalSequenceLSTMChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+
+  assert(operation.has_bidirectional_sequence_lstm_options());
+
+  tflite::BidirectionalSequenceLSTMOptionsBuilder options_builder(fbb);
+  options_builder.add_fused_activation_function(
+    as_tflite_activation(operation.bidirectional_sequence_lstm_options().activation()));
+  options_builder.add_cell_clip(operation.bidirectional_sequence_lstm_options().cell_clip());
+  options_builder.add_proj_clip(operation.bidirectional_sequence_lstm_options().proj_clip());
+  options_builder.add_time_major(operation.bidirectional_sequence_lstm_options().time_major());
+  options_builder.add_asymmetric_quantize_inputs(
+    operation.bidirectional_sequence_lstm_options().asymmetric_quantize_inputs());
+  options_builder.add_merge_outputs(
+    operation.bidirectional_sequence_lstm_options().merge_outputs());
+
+  return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef>
+BidirectionalSequenceLSTMChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new BidirectionalSequenceLSTMChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.h b/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..e66917b97
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/BidirectionalSequenceLSTM.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "OpChef.h"
+
+class BidirectionalSequenceLSTMChef final : public OpChef
+{
+public:
+  explicit BidirectionalSequenceLSTMChef(const tflchef::Operation *operation)
+    : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override
+  {
+    return tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM;
+  }
+
+  tflite::BuiltinOptions type(void) const override
+  {
+    return tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions;
+  }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct BidirectionalSequenceLSTMChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_BIDIRECTIONALSEQUENCE_LSTM_H__
diff --git a/compiler/tflchef/core/src/Op/Densify.cpp b/compiler/tflchef/core/src/Op/Densify.cpp
new file mode 100644
index 000000000..63c4e207a
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Densify.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Densify.h"
+
+flatbuffers::Offset<void> DensifyChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  tflite::DensifyOptionsBuilder options_builder{fbb};
+
+  return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> DensifyChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new DensifyChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Densify.h b/compiler/tflchef/core/src/Op/Densify.h
new file mode 100644
index 000000000..f6af693d9
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Densify.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DENSIFY_H__
+#define __OP_DENSIFY_H__
+
+#include "OpChef.h"
+
+class DensifyChef final : public OpChef
+{
+public:
+  explicit DensifyChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_DENSIFY; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_DensifyOptions; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct DensifyChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_DENSIFY_H__
diff --git a/compiler/tflchef/core/src/Op/Dequantize.cpp b/compiler/tflchef/core/src/Op/Dequantize.cpp
new file mode 100644
index 000000000..761d7f99e
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Dequantize.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dequantize.h"
+
+flatbuffers::Offset<void> DequantizeChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  return flatbuffers::Offset<void>();
+}
+
+std::unique_ptr<OpChef> DequantizeChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new DequantizeChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Dequantize.h b/compiler/tflchef/core/src/Op/Dequantize.h
new file mode 100644
index 000000000..82580560d
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Dequantize.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DEQUANTIZE_H__
+#define __OP_DEQUANTIZE_H__
+
+#include "OpChef.h"
+
+class DequantizeChef final : public OpChef
+{
+public:
+  explicit DequantizeChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_DEQUANTIZE; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct DequantizeChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_DEQUANTIZE_H__
diff --git a/compiler/tflchef/core/src/Op/FakeQuant.cpp b/compiler/tflchef/core/src/Op/FakeQuant.cpp
new file mode 100644
index 000000000..e4cbbfe44
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/FakeQuant.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FakeQuant.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> FakeQuantChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+  assert(operation.has_fakequant_options());
+
+  auto options = operation.fakequant_options();
+
+  tflite::FakeQuantOptionsBuilder fq_options_builder{fbb};
+  fq_options_builder.add_min(options.min());
+  fq_options_builder.add_max(options.max());
+  fq_options_builder.add_num_bits(options.num_bits());
+  fq_options_builder.add_narrow_range(options.narrow_range());
+
+  return fq_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> FakeQuantChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new FakeQuantChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/FakeQuant.h b/compiler/tflchef/core/src/Op/FakeQuant.h
new file mode 100644
index 000000000..0fbfea315
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/FakeQuant.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_FAKE_QUANT_H__
+#define __OP_FAKE_QUANT_H__
+
+#include "OpChef.h"
+
+class FakeQuantChef final : public OpChef
+{
+public:
+  explicit FakeQuantChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_FAKE_QUANT; }
+
+  tflite::BuiltinOptions type(void) const override
+  {
+    return tflite::BuiltinOptions_FakeQuantOptions;
+  }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct FakeQuantChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_FAKE_QUANT_H__
diff --git a/compiler/tflchef/core/src/Op/FullyConnected.cpp b/compiler/tflchef/core/src/Op/FullyConnected.cpp
index 45269916c..7173a67ba 100644
--- a/compiler/tflchef/core/src/Op/FullyConnected.cpp
+++ b/compiler/tflchef/core/src/Op/FullyConnected.cpp
@@ -29,6 +29,7 @@ flatbuffers::Offset<void> FullyConnectedChef::value(flatbuffers::FlatBufferBuild
 
   tflite::FullyConnectedOptionsBuilder fc_options_builder{fbb};
   fc_options_builder.add_fused_activation_function(tflite_activation);
+  fc_options_builder.add_keep_num_dims(operation.fullyconnected_options().keep_num_dims());
 
   return fc_options_builder.Finish().Union();
 }
diff --git a/compiler/tflchef/core/src/Op/Gelu.cpp b/compiler/tflchef/core/src/Op/Gelu.cpp
new file mode 100644
index 000000000..91d2bb36c
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Gelu.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Gelu.h"
+
+flatbuffers::Offset<void> GeluChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  assert(_operation->has_gelu_options());
+
+  const auto &options = _operation->gelu_options();
+
+  tflite::GeluOptionsBuilder options_builder{fbb};
+  options_builder.add_approximate(options.approximate());
+
+  return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> GeluChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new GeluChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Gelu.h b/compiler/tflchef/core/src/Op/Gelu.h
new file mode 100644
index 000000000..64d9361e6
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Gelu.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_GELU_H__
+#define __OP_GELU_H__
+
+#include "OpChef.h"
+
+class GeluChef final : public OpChef
+{
+public:
+  explicit GeluChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_GELU; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_GeluOptions; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct GeluChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_GELU_H__
diff --git a/compiler/tflchef/core/src/Op/HardSwish.cpp b/compiler/tflchef/core/src/Op/HardSwish.cpp
new file mode 100644
index 000000000..27ab8b5ab
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/HardSwish.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HardSwish.h"
+
+flatbuffers::Offset<void> HardSwishChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  return flatbuffers::Offset<void>();
+}
+
+std::unique_ptr<OpChef> HardSwishChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new HardSwishChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/HardSwish.h b/compiler/tflchef/core/src/Op/HardSwish.h
new file mode 100644
index 000000000..10ed51e61
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/HardSwish.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_HARDSWISH_H__
+#define __OP_HARDSWISH_H__
+
+#include "OpChef.h"
+
+class HardSwishChef final : public OpChef
+{
+public:
+  explicit HardSwishChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_HARD_SWISH; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct HardSwishChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_HARDSWISH_H__
diff --git a/compiler/tflchef/core/src/Op/LocalResponseNormalization.h b/compiler/tflchef/core/src/Op/LocalResponseNormalization.h
index 62a2355f2..afc37e6ec 100644
--- a/compiler/tflchef/core/src/Op/LocalResponseNormalization.h
+++ b/compiler/tflchef/core/src/Op/LocalResponseNormalization.h
@@ -23,7 +23,7 @@ class LocalResponseNormalizationChef final : public OpChef
 {
 public:
   explicit LocalResponseNormalizationChef(const tflchef::Operation *operation)
-      : _operation{operation}
+    : _operation{operation}
   {
     // DO NOTHING
   }
diff --git a/compiler/tflchef/core/src/Op/Quantize.cpp b/compiler/tflchef/core/src/Op/Quantize.cpp
new file mode 100644
index 000000000..39b902805
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Quantize.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Quantize.h"
+
+flatbuffers::Offset<void> QuantizeChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  return flatbuffers::Offset<void>();
+}
+
+std::unique_ptr<OpChef> QuantizeChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new QuantizeChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Quantize.h b/compiler/tflchef/core/src/Op/Quantize.h
new file mode 100644
index 000000000..fe7a029bf
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Quantize.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_QUANTIZE_H__
+#define __OP_QUANTIZE_H__
+
+#include "OpChef.h"
+
+class QuantizeChef final : public OpChef
+{
+public:
+  explicit QuantizeChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_QUANTIZE; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_NONE; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct QuantizeChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_DEQUANTIZE_H__
diff --git a/compiler/tflchef/core/src/Op/SVDF.cpp b/compiler/tflchef/core/src/Op/SVDF.cpp
new file mode 100644
index 000000000..690896cf1
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/SVDF.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDF.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> SVDFChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  assert(_operation->has_svdf_options());
+
+  const auto &svdf_options = _operation->svdf_options();
+
+  const auto tflite_activation = as_tflite_activation(svdf_options.activation());
+
+  tflite::SVDFOptionsBuilder svdf_options_builder{fbb};
+  svdf_options_builder.add_fused_activation_function(tflite_activation);
+  svdf_options_builder.add_asymmetric_quantize_inputs(svdf_options.asymmetric_quantize_inputs());
+  svdf_options_builder.add_rank(svdf_options.rank());
+
+  return svdf_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> SVDFChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new SVDFChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/SVDF.h b/compiler/tflchef/core/src/Op/SVDF.h
new file mode 100644
index 000000000..9bf0b6efb
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/SVDF.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SVDF_H__
+#define __OP_SVDF_H__
+
+#include "OpChef.h"
+
+class SVDFChef final : public OpChef
+{
+public:
+  explicit SVDFChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_SVDF; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_SVDFOptions; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct SVDFChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_SVDF_H__
diff --git a/compiler/tflchef/core/src/Op/Squeeze.cpp b/compiler/tflchef/core/src/Op/Squeeze.cpp
index 8d6ef42d6..1c1d99a01 100644
--- a/compiler/tflchef/core/src/Op/Squeeze.cpp
+++ b/compiler/tflchef/core/src/Op/Squeeze.cpp
@@ -30,7 +30,7 @@ flatbuffers::Offset<void> SqueezeChef::value(flatbuffers::FlatBufferBuilder &fbb
   // Note: 'CreateVector' should be placed before 'CreateOptions'
   //       Read flatbuffers.h 'void NotNested()' for more information
   auto fb_squeeze_dims =
-      fbb.CreateVector(options.squeeze_dim().data(), options.squeeze_dim().size());
+    fbb.CreateVector(options.squeeze_dim().data(), options.squeeze_dim().size());
 
   return tflite::CreateSqueezeOptions(fbb, fb_squeeze_dims).Union();
 }
diff --git a/compiler/tflchef/core/src/Op/StridedSlice.cpp b/compiler/tflchef/core/src/Op/StridedSlice.cpp
index 587a95c66..67fd03140 100644
--- a/compiler/tflchef/core/src/Op/StridedSlice.cpp
+++ b/compiler/tflchef/core/src/Op/StridedSlice.cpp
@@ -29,11 +29,11 @@ flatbuffers::Offset<void> StridedSliceChef::value(flatbuffers::FlatBufferBuilder
   strided_slice_options_builder.add_begin_mask(operation.strided_slice_options().begin_mask());
   strided_slice_options_builder.add_end_mask(operation.strided_slice_options().end_mask());
   strided_slice_options_builder.add_ellipsis_mask(
-      operation.strided_slice_options().ellipsis_mask());
+    operation.strided_slice_options().ellipsis_mask());
   strided_slice_options_builder.add_new_axis_mask(
-      operation.strided_slice_options().new_axis_mask());
+    operation.strided_slice_options().new_axis_mask());
   strided_slice_options_builder.add_shrink_axis_mask(
-      operation.strided_slice_options().shrink_axis_mask());
+    operation.strided_slice_options().shrink_axis_mask());
 
   return strided_slice_options_builder.Finish().Union();
 }
diff --git a/compiler/tflchef/core/src/Op/TransposeConv.cpp b/compiler/tflchef/core/src/Op/TransposeConv.cpp
index c9e452714..530ebae78 100644
--- a/compiler/tflchef/core/src/Op/TransposeConv.cpp
+++ b/compiler/tflchef/core/src/Op/TransposeConv.cpp
@@ -34,6 +34,13 @@ flatbuffers::Offset<void> TransposeConvChef::value(flatbuffers::FlatBufferBuilde
   options_builder.add_stride_h(operation.transpose_conv_options().stride_h());
   options_builder.add_stride_w(operation.transpose_conv_options().stride_w());
 
+  // TODO remove calling has_activation
+  auto chef_activation = operation.transpose_conv_options().has_activation()
+                           ? operation.transpose_conv_options().activation()
+                           : tflchef::NONE;
+  auto tflite_activation = as_tflite_activation(chef_activation);
+  options_builder.add_fused_activation_function(tflite_activation);
+
   return options_builder.Finish().Union();
 }
 
diff --git a/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.cpp b/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..2d6becdff
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnidirectionalSequenceLSTM.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void>
+UnidirectionalSequenceLSTMChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  auto &operation = (*_operation);
+
+  assert(operation.has_unidirectional_sequence_lstm_options());
+
+  tflite::UnidirectionalSequenceLSTMOptionsBuilder options_builder(fbb);
+  options_builder.add_fused_activation_function(
+    as_tflite_activation(operation.unidirectional_sequence_lstm_options().activation()));
+  options_builder.add_cell_clip(operation.unidirectional_sequence_lstm_options().cell_clip());
+  options_builder.add_proj_clip(operation.unidirectional_sequence_lstm_options().proj_clip());
+  options_builder.add_time_major(operation.unidirectional_sequence_lstm_options().time_major());
+  options_builder.add_asymmetric_quantize_inputs(
+    operation.unidirectional_sequence_lstm_options().asymmetric_quantize_inputs());
+
+  return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef>
+UnidirectionalSequenceLSTMChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new UnidirectionalSequenceLSTMChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.h b/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..b8a6d8103
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/UnidirectionalSequenceLSTM.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_UNIDIRECTIONALSEQUENCELSTM_H__
+#define __OP_UNIDIRECTIONALSEQUENCELSTM_H__
+
+#include "OpChef.h"
+
+class UnidirectionalSequenceLSTMChef final : public OpChef
+{
+public:
+  explicit UnidirectionalSequenceLSTMChef(const tflchef::Operation *operation)
+    : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override
+  {
+    return tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM;
+  }
+
+  tflite::BuiltinOptions type(void) const override
+  {
+    return tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions;
+  }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct UnidirectionalSequenceLSTMChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_UNIDIRECTIONALSEQUENCELSTM_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def
index 6b242e811..9a2164640 100644
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -12,18 +12,22 @@ OP_CHEF(ArgMin, ArgMinChefFactory)
 OP_CHEF(AveragePool2D, AveragePool2DChefFactory)
 OP_CHEF(BatchMatMul, BatchMatMulChefFactory)
 OP_CHEF(BatchToSpaceND, BatchToSpaceNDChefFactory)
+OP_CHEF(BidirectionalSequenceLSTM, BidirectionalSequenceLSTMChefFactory)
 OP_CHEF(Cast, CastChefFactory)
 OP_CHEF(Ceil, CeilChefFactory)
 OP_CHEF(Concatenation, ConcatenationChefFactory)
 OP_CHEF(Conv2D, Conv2DChefFactory)
 OP_CHEF(Cos, CosChefFactory)
+OP_CHEF(Densify, DensifyChefFactory)
 OP_CHEF(DepthToSpace, DepthToSpaceChefFactory)
 OP_CHEF(DepthwiseConv2D, DepthwiseConv2DChefFactory)
+OP_CHEF(Dequantize, DequantizeChefFactory)
 OP_CHEF(Div, DivChefFactory)
 OP_CHEF(ELU, ELUChefFactory)
 OP_CHEF(Equal, EqualChefFactory)
 OP_CHEF(Exp, ExpChefFactory)
 OP_CHEF(ExpandDims, ExpandDimsChefFactory)
+OP_CHEF(FakeQuant, FakeQuantChefFactory)
 OP_CHEF(Fill, FillChefFactory)
 OP_CHEF(Floor, FloorChefFactory)
 OP_CHEF(FloorDiv, FloorDivChefFactory)
@@ -31,8 +35,10 @@ OP_CHEF(FloorMod, FloorModChefFactory)
 OP_CHEF(FullyConnected, FullyConnectedChefFactory)
 OP_CHEF(Gather, GatherChefFactory)
 OP_CHEF(GatherNd, GatherNdChefFactory)
+OP_CHEF(Gelu, GeluChefFactory)
 OP_CHEF(Greater, GreaterChefFactory)
 OP_CHEF(GreaterEqual, GreaterEqualChefFactory)
+OP_CHEF(HardSwish, HardSwishChefFactory)
 OP_CHEF(If, IfChefFactory)
 OP_CHEF(L2Normalize, L2NormalizeChefFactory)
 OP_CHEF(L2Pool2D, L2Pool2DChefFactory)
@@ -64,6 +70,7 @@ OP_CHEF(Pad, PadChefFactory)
 OP_CHEF(PadV2, PadV2ChefFactory)
 OP_CHEF(Pow, PowChefFactory)
 OP_CHEF(PRelu, PReluChefFactory)
+OP_CHEF(Quantize, QuantizeChefFactory)
 OP_CHEF(Range, RangeChefFactory)
 OP_CHEF(Rank, RankChefFactory)
 OP_CHEF(ReduceAny, ReduceAnyChefFactory)
@@ -100,11 +107,13 @@ OP_CHEF(Squeeze, SqueezeChefFactory)
 OP_CHEF(StridedSlice, StridedSliceChefFactory)
 OP_CHEF(Sub, SubChefFactory)
 OP_CHEF(Sum, SumChefFactory)
+OP_CHEF(SVDF, SVDFChefFactory)
 OP_CHEF(Tanh, TanhChefFactory)
 OP_CHEF(Tile, TileChefFactory)
 OP_CHEF(TopKV2, TopKV2ChefFactory)
 OP_CHEF(Transpose, TransposeChefFactory)
 OP_CHEF(TransposeConv, TransposeConvChefFactory)
+OP_CHEF(UnidirectionalSequenceLSTM, UnidirectionalSequenceLSTMChefFactory)
 OP_CHEF(Unique, UniqueChefFactory)
 OP_CHEF(Unpack, UnpackChefFactory)
 OP_CHEF(Where, WhereChefFactory)
@@ -115,5 +124,8 @@ OP_CHEF(ZerosLike, ZerosLikeChefFactory)
 OP_CHEF(AddV2, AddV2ChefFactory)
 OP_CHEF(All, AllChefFactory)
 OP_CHEF(BatchMatMulV2, BatchMatMulV2ChefFactory)
+OP_CHEF(BroadcastTo, BroadcastToChefFactory)
+OP_CHEF(Erf, ErfChefFactory)
 OP_CHEF(MatMul, MatMulChefFactory)
 OP_CHEF(MatrixBandPart, MatrixBandPartChefFactory)
+OP_CHEF(MaxPoolWithArgmax, MaxPoolWithArgmaxChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h
index 7637b1c69..ba2b17571 100644
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -25,18 +25,22 @@
 #include "Op/AveragePool2D.h"
 #include "Op/BatchMatMul.h"
 #include "Op/BatchToSpaceND.h"
+#include "Op/BidirectionalSequenceLSTM.h"
 #include "Op/Cast.h"
 #include "Op/Ceil.h"
 #include "Op/Concatenation.h"
 #include "Op/Conv2D.h"
 #include "Op/Cos.h"
+#include "Op/Densify.h"
 #include "Op/DepthToSpace.h"
 #include "Op/DepthwiseConv2D.h"
+#include "Op/Dequantize.h"
 #include "Op/Div.h"
 #include "Op/ELU.h"
 #include "Op/Equal.h"
 #include "Op/Exp.h"
 #include "Op/ExpandDims.h"
+#include "Op/FakeQuant.h"
 #include "Op/Fill.h"
 #include "Op/Floor.h"
 #include "Op/FloorDiv.h"
@@ -44,8 +48,10 @@
 #include "Op/FullyConnected.h"
 #include "Op/Gather.h"
 #include "Op/GatherNd.h"
+#include "Op/Gelu.h"
 #include "Op/Greater.h"
 #include "Op/GreaterEqual.h"
+#include "Op/HardSwish.h"
 #include "Op/If.h"
 #include "Op/L2Normalize.h"
 #include "Op/L2Pool2D.h"
@@ -93,6 +99,7 @@
 #include "Op/ReverseV2.h"
 #include "Op/Round.h"
 #include "Op/Rsqrt.h"
+#include "Op/Quantize.h"
 #include "Op/ScatterNd.h"
 #include "Op/SegmentSum.h"
 #include "Op/Select.h"
@@ -113,11 +120,13 @@
 #include "Op/StridedSlice.h"
 #include "Op/Sub.h"
 #include "Op/Sum.h"
+#include "Op/SVDF.h"
 #include "Op/Tanh.h"
 #include "Op/Tile.h"
 #include "Op/TopKV2.h"
 #include "Op/Transpose.h"
 #include "Op/TransposeConv.h"
+#include "Op/UnidirectionalSequenceLSTM.h"
 #include "Op/Unique.h"
 #include "Op/Unpack.h"
 #include "Op/Where.h"
@@ -127,7 +136,10 @@
 #include "CustomOp/AddV2.h"
 #include "CustomOp/All.h"
 #include "CustomOp/BatchMatMulV2.h"
+#include "CustomOp/BroadcastTo.h"
+#include "CustomOp/Erf.h"
 #include "CustomOp/MatMul.h"
 #include "CustomOp/MatrixBandPart.h"
+#include "CustomOp/MaxPoolWithArgmax.h"
 
 #endif // __OP_CHEFS_H__
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
index 9909d517a..98ae2b23f 100644
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -15,16 +15,36 @@ package tflchef;
 // This enum value corresponds to TensorType in TensorFlow Lite schema
 enum TensorType {
   FLOAT32 = 0;
+  FLOAT16 = 1;
   INT32 = 2;
   UINT8 = 3;
   INT64 = 4;
+  STRING = 5;
   BOOL = 6;
+  INT16 = 7;
+  INT8 = 9;
+}
+
+enum DimensionType {
+  DENSE = 0;
+  SPARSE_CSR = 1;
+}
+
+enum SparseIndexVecType {
+  SparseIdxVecType_NONE = 0;
+  INT32VEC = 1;
+  UINT16VEC = 2;
+  UINT8VEC = 3;
 }
 
 message TensorShape {
   repeated uint32 dim = 3;
 }
 
+message ShapeSignature {
+  repeated int32 dim = 1;
+}
+
 message TensorFiller {
   optional string tag = 1;
   repeated string arg = 2;
@@ -38,12 +58,44 @@ message TensorQuantization {
   optional int32 quantized_dimension = 5 [default = 0];
 }
 
+message TensorSparsity {
+  message TraversalOrder {
+    repeated int32 dim = 1;
+  }
+  message BlockMap {
+    repeated int32 dim = 1;
+  }
+  message IndexVec {
+    repeated int32 dim = 1;
+    optional SparseIndexVecType type = 2;
+  }
+  message DimMetaData {
+    optional DimensionType format = 1;
+    optional int32 dense_size = 2;
+    optional IndexVec array_segments = 3;
+    optional IndexVec array_indices = 4;
+  }
+
+  optional TraversalOrder traversal_order = 1;
+  optional BlockMap block_map = 2;
+  repeated DimMetaData dim_metadata = 3;
+}
+
 message Operand {
   optional string name = 1;
   optional TensorType type = 2;
   optional TensorShape shape = 3;
   optional TensorFiller filler = 4;
   optional TensorQuantization quant = 5;
+  optional TensorSparsity sparsity = 6;
+  optional bool is_variable = 7 [default = false];
+  optional ShapeSignature shape_signature = 8;
+  // 'make_sparse' is to tell tflchef to make a sparse tensor
+  // as filling 'TensorSparsity' by hand can be difficult
+  // for now, last dimension will be SPARSE_CSR
+  // ex) shape [2, 3, 4] will have
+  //     TraversalOrder [0, 1, 2] with [DENSE, DENSE, SPARSE_CSR]
+  optional bool make_sparse = 9 [default = false];
 }
 
 // This enum value corresponds to Padding in TensorFlow Lite schema
@@ -58,6 +110,8 @@ enum Activation {
   RELU = 1;
   RELU_N1_TO_1 = 2;
   RELU6 = 3;
+  TANH = 4;
+  SIGN_BIT = 5;
 }
 
 // This enum value corresponds to MirrorPadMode in TensorFlow Lite schema
@@ -66,6 +120,15 @@ enum MirrorPadMode {
   SYMMETRIC = 1;
 }
 
+message BidirectionalSequenceLSTMOptions {
+  optional Activation activation = 1 [default = NONE];
+  optional float cell_clip = 2 [default = 0.0];
+  optional float proj_clip = 3 [default = 0.0];
+  optional bool merge_outputs = 6 [default = false];
+  optional bool time_major = 4 [default = true];
+  optional bool asymmetric_quantize_inputs = 5 [default = false];  
+}
+
 message Conv2DOptions
 {
   optional Padding padding = 1 [default = VALID];
@@ -127,6 +190,7 @@ message FloorModOptions {
 
 message FullyConnectedOptions {
   optional Activation activation = 1 [default = NONE];
+  optional bool keep_num_dims = 2 [ default = false ];
 }
 
 message AddOptions {
@@ -311,6 +375,12 @@ message SquaredDifferenceOptions {
   // None
 }
 
+message SVDFOptions {
+  optional int32 rank = 1 [default = 0];
+  optional Activation activation = 2 [default = NONE];
+  optional bool asymmetric_quantize_inputs = 3 [default = false];
+}
+
 message FillOptions {
   // None
 }
@@ -367,6 +437,10 @@ message GatherNdOptions {
   // None
 }
 
+message GeluOptions {
+  optional bool approximate = 1 [default = false];
+}
+
 message NonMaxSuppressionV4Options {
   // None
 }
@@ -404,6 +478,7 @@ message TransposeConvOptions {
   optional Padding padding = 1 [default = VALID];
   optional int32 stride_w = 2 [default = 1];
   optional int32 stride_h = 3 [default = 1];
+  optional Activation activation = 4 [default = NONE];
 }
 
 message ReverseSequenceOptions {
@@ -419,6 +494,14 @@ message SegmentSumOptions {
   // NONE
 }
 
+message UnidirectionalSequenceLSTMOptions {
+  optional Activation activation = 1 [default = NONE];
+  optional float cell_clip = 2 [default = 0.0];
+  optional float proj_clip = 3 [default = 0.0];
+  optional bool time_major = 4 [default = false];
+  optional bool asymmetric_quantize_inputs = 5 [default = false];
+}
+
 message UniqueOptions {
   optional TensorType idx_out_type = 1 [default = INT32];
 }
@@ -443,6 +526,31 @@ message MatrixSetDiagOptions {
   // NONE
 }
 
+message DequantizeOptions {
+  // NONE
+}
+
+message MaxPoolWithArgmaxOptions {
+  optional Padding padding = 1 [default = VALID];
+  optional int32 stride_w = 2 [default = 1];
+  optional int32 stride_h = 3 [default = 1];
+  optional int32 filter_width = 4 [default = 1];
+  optional int32 filter_height = 5 [ default = 1];
+  optional TensorType output_type = 6 [default = INT64];
+  optional bool include_batch_in_index = 7 [default = false];
+}
+
+message FakeQuantOptions {
+  optional float min = 1 [default = 0.0];
+  optional float max = 2 [default = 0.0];
+  optional int32 num_bits = 3 [default = 0];
+  optional bool narrow_range = 4 [default = false];
+}
+
+message DensifyOptions {
+  // NONE
+}
+
 message Operation {
   optional string type = 1;
   repeated string input = 2;
@@ -505,7 +613,7 @@ message Operation {
   optional ZerosLikeOptions zeros_like_options = 153;
   // ConcatEmbeddingsOptions 154
   // LSHProjectionOptions 155
-  // SVDFOptions 156
+  optional SVDFOptions svdf_options = 156;
   // RNNOptions 157
   optional L2NormOptions l2norm_options = 158;
   optional LocalResponseNormalizationOptions local_response_normalization_options = 159;
@@ -518,7 +626,7 @@ message Operation {
   // SequenceRNNOptions 166
   optional TopKV2Options topk_v2_options = 167;
   optional LogSoftmaxOptions log_softmax_options = 168;
-  // DequantizeOptions 169
+  optional DequantizeOptions dequantize_options = 169;
   optional NegOptions neg_options = 170;
   optional PadV2Options padv2_options = 171;
   optional LessEqualOptions lessequal_options = 172;
@@ -527,10 +635,10 @@ message Operation {
   optional SparseToDenseOptions sparse_to_dense_options = 175;
   optional PowOptions pow_options = 176;
   optional ArgMinOptions argmin_options = 177;
-  // FakeQuantOptions 178
-  // BidirectionalSequenceLSTMOptions 179
+  optional FakeQuantOptions fakequant_options = 178;
+  optional BidirectionalSequenceLSTMOptions bidirectional_sequence_lstm_options = 179;
   // BidirectionalSequenceRNNOptions 180
-  // UnidirectionalSequenceLSTMOptions 181
+  optional UnidirectionalSequenceLSTMOptions unidirectional_sequence_lstm_options = 181;
   optional RangeOptions range_options = 182;
   optional ResizeNearestNeighborOptions resize_nearest_neighbor_options = 183;
   optional LeakyReluOptions leaky_relu_options = 184;
@@ -558,11 +666,29 @@ message Operation {
   optional SegmentSumOptions segment_sum_options = 206;
   optional AddNOptions add_n_options = 207;
   optional MatMulOptions matmul_options = 208;
-
+  optional MaxPoolWithArgmaxOptions max_pool_with_argmax_options = 209;
+  optional DensifyOptions densify_options = 210;
+  optional GeluOptions gelu_options = 211;
   // NOTE if there are more than two options with same type of Options
   // use the number not listed in the above reserve list
 }
 
+message TensorMap {
+  optional string name = 4;
+  // use tensor as name of the Operand or use tensor_index as order number.
+  // either one should exist.
+  optional string tensor = 5;
+  optional uint32 tensor_index = 6;
+}
+
+message SignatureDef {
+  repeated TensorMap inputs = 4;
+  repeated TensorMap outputs = 5;
+  optional string signature_key = 6;
+  // optional string key = 10; obsolete in TF2.8.0
+  optional uint32 subgraph_index = 12;
+}
+
 // For additional subgraphs
 message Graph {
   repeated Operand operand = 1;
@@ -580,4 +706,5 @@ message ModelRecipe {
   optional string name = 5;
   optional uint32 version = 6 [default = 1];
   repeated Graph graph = 7;
+  repeated SignatureDef signature_def = 8;
 }
diff --git a/compiler/tflchef/requires.cmake b/compiler/tflchef/requires.cmake
index 4c02174b5..8d86632fb 100644
--- a/compiler/tflchef/requires.cmake
+++ b/compiler/tflchef/requires.cmake
@@ -1,7 +1,7 @@
 require("arser")
 require("nnkit")
 require("cwrap")
-require("mio-tflite")
+require("mio-tflite2121")
 require("safemain")
 require("hermes")
 require("hermes-std")
diff --git a/compiler/tflchef/tests/CMakeLists.txt b/compiler/tflchef/tests/CMakeLists.txt
index 5c4dff012..79b00845b 100644
--- a/compiler/tflchef/tests/CMakeLists.txt
+++ b/compiler/tflchef/tests/CMakeLists.txt
@@ -1,10 +1,11 @@
-if(NOT TARGET nnkit-run)
-  return()
-endif(NOT TARGET nnkit-run)
-
-if(NOT TARGET nnkit_tflite_backend)
-  return()
-endif(NOT TARGET nnkit_tflite_backend)
+set(TFLCHEF_FILE_PATH $<TARGET_FILE:tflchef-file>)
+set(TFLCHEF_REVERSE_PATH $<TARGET_FILE:tflchef-reverse>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  # TODO use better way to represent path for host executable
+  set(TFLCHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/file/tflchef-file)
+  set(TFLCHEF_REVERSE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/reverse/tflchef-reverse)
+  message(STATUS "TFLCHEF_FILE_PATH = ${TFLCHEF_FILE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
 
 nncc_find_resource(TensorFlowLiteRecipes)
 set(TENSORFLOWLITERECIPES_DIR "${TensorFlowLiteRecipes_DIR}")
@@ -26,8 +27,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
 
   # Generate .tflite
   add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
-                     COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
-                     DEPENDS tflchef-file ${RECIPE_SOURCE_FILE}
+                     COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+                     DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
                      COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
 
   list(APPEND TESTS ${RECIPE_PREFIX})
@@ -52,8 +53,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
 
   # Generate .tflite
   add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
-                     COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
-                     DEPENDS tflchef-file ${RECIPE_SOURCE_FILE}
+                     COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+                     DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
                      COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
 
   list(APPEND TESTS ${RECIPE_PREFIX})
@@ -76,16 +77,16 @@ foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES})
 
   # Generate .gen.recipe from generated .tflite
   add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
-                     COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
-                     DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE}
+                     COMMAND ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+                     DEPENDS ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
                      COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
 
   # now we are going to generate .gen.tflite from .gen.recipe
   # to check generated .gen.recipe file is correct by using it.
   # as weight values may be different, binary comparision is not acceptable.
   add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
-                     COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
-                     DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+                     DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
                      COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
 
   list(APPEND TESTS ${TFLITE_PREFIX}.gen)
@@ -104,13 +105,13 @@ foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES})
 
   # Generate .gen.recipe from generated .tflite
   add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
-                     COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
-                     DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE}
+                     COMMAND ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+                     DEPENDS ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
                      COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
 
   add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
-                     COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
-                     DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+                     DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
                      COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
 
   list(APPEND TESTS ${TFLITE_PREFIX}.gen)
@@ -123,7 +124,9 @@ add_custom_target(tflchef_testfiles ALL DEPENDS ${TESTFILES})
 
 # Using mio_tflite_validate for temporary as it only calls flatbuffer validate
 # TODO do testing with running the model with runtime/interpreter
+# NOTE for ARM32 cross build, $<TARGET_FILE:mio_tflite2121_validate> is used as-is
+#      as test should run in ARM32 device
 add_test(NAME tflchef_test
          COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/runvalidate.sh"
-                 $<TARGET_FILE:mio_tflite_validate>
+                 $<TARGET_FILE:mio_tflite2121_validate>
                  ${TESTS})
diff --git a/compiler/tflchef/tests/custom_erf/test.recipe b/compiler/tflchef/tests/custom_erf/test.recipe
new file mode 100644
index 000000000..ab093a30e
--- /dev/null
+++ b/compiler/tflchef/tests/custom_erf/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Erf"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/tflchef/tests/explicit_bool/test.recipe b/compiler/tflchef/tests/explicit_bool/test.recipe
new file mode 100644
index 000000000..8f09edd13
--- /dev/null
+++ b/compiler/tflchef/tests/explicit_bool/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm1"
+  type: BOOL
+  shape { dim: 6 }
+}
+operand {
+  name: "ifm2"
+  type: BOOL
+  shape { dim: 6 }
+  filler {
+    tag: "explicit"
+    arg: "T"
+    arg: "f"
+    arg: "0"
+    arg: "1"
+    arg: "true"
+    arg: "FALSE"
+  }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 6 }
+}
+operation {
+  type: "LogicalAnd"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+output: "ofm"
diff --git a/compiler/tflchef/tests/make_sparse/test.recipe b/compiler/tflchef/tests/make_sparse/test.recipe
new file mode 100644
index 000000000..15cc93a5d
--- /dev/null
+++ b/compiler/tflchef/tests/make_sparse/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "2" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "3"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse"
+  output: "dense"
+}
+operation {
+  type: "Add"
+  input: "in"
+  input: "dense"
+  output: "out"
+  add_options {
+    activation: NONE
+  }
+}
+input: "in"
+output: "out"
diff --git a/compiler/tflchef/tests/make_sparse_f16/test.recipe b/compiler/tflchef/tests/make_sparse_f16/test.recipe
new file mode 100644
index 000000000..5977a1d32
--- /dev/null
+++ b/compiler/tflchef/tests/make_sparse_f16/test.recipe
@@ -0,0 +1,54 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse16"
+  type: FLOAT16
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "2" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "3"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense16"
+  type: FLOAT16
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "dense32"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse16"
+  output: "dense16"
+}
+operation {
+  type: "Dequantize"
+  input: "dense16"
+  output: "dense32"
+}
+operation {
+  type: "Add"
+  input: "in"
+  input: "dense32"
+  output: "out"
+  add_options {
+    activation: NONE
+  }
+}
+input: "in"
+output: "out"
diff --git a/compiler/tflchef/tests/shape_signature/test.recipe b/compiler/tflchef/tests/shape_signature/test.recipe
new file mode 100644
index 000000000..fa4293e35
--- /dev/null
+++ b/compiler/tflchef/tests/shape_signature/test.recipe
@@ -0,0 +1,19 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/tflchef/tests/shape_signature/test.reverse b/compiler/tflchef/tests/shape_signature/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/tflchef/tests/shape_signature/test.reverse
diff --git a/compiler/tflchef/tests/short_int_datatype/test.recipe b/compiler/tflchef/tests/short_int_datatype/test.recipe
new file mode 100644
index 000000000..1e135d912
--- /dev/null
+++ b/compiler/tflchef/tests/short_int_datatype/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: INT16
+  shape { dim: 1 dim: 5 dim: 5 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: INT16
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "1.0"
+    arg: "6.0"
+  }
+}
+operand {
+  name: "bias"
+  type: INT16
+  shape { dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "12345"
+  }
+}
+operand {
+  name: "ofm"
+  type: INT16
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
diff --git a/compiler/tflchef/tests/short_int_datatype/test.reverse b/compiler/tflchef/tests/short_int_datatype/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/compiler/tflchef/tests/short_int_datatype/test.reverse
diff --git a/compiler/tflchef/tests/signature_def_index/test.recipe b/compiler/tflchef/tests/signature_def_index/test.recipe
new file mode 100644
index 000000000..9e95edf00
--- /dev/null
+++ b/compiler/tflchef/tests/signature_def_index/test.recipe
@@ -0,0 +1,59 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm1"
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm2"
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm3"
+}
+signature_def {
+  inputs: {
+    name: "ifm"
+    tensor_index: 0
+  }
+  outputs {
+    name: "ofm2"
+    tensor_index: 2
+  }
+  outputs {
+    name: "ofm3"
+    tensor_index: 3
+  }
+  outputs {
+    name: "ofm1"
+    tensor_index: 1
+  }
+  signature_key: "serving_default"
+  subgraph_index: 0
+}
+input: "ifm"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/compiler/tflchef/tests/signature_def_name/test.recipe b/compiler/tflchef/tests/signature_def_name/test.recipe
new file mode 100644
index 000000000..4847f7dd8
--- /dev/null
+++ b/compiler/tflchef/tests/signature_def_name/test.recipe
@@ -0,0 +1,59 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm1"
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm2"
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm3"
+}
+signature_def {
+  inputs: {
+    name: "ifm"
+    tensor_index: 0
+  }
+  outputs {
+    name: "out2"
+    tensor: "ofm2"
+  }
+  outputs {
+    name: "out3"
+    tensor: "ofm3"
+  }
+  outputs {
+    name: "out1"
+    tensor: "ofm1"
+  }
+  signature_key: "serving_default"
+  subgraph_index: 0
+}
+input: "ifm"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/compiler/tflchef/tests/string_tensor/test.recipe b/compiler/tflchef/tests/string_tensor/test.recipe
new file mode 100644
index 000000000..eecfbc6f7
--- /dev/null
+++ b/compiler/tflchef/tests/string_tensor/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm"
+  type: STRING
+  shape { }
+}
+operand {
+  name: "suffix"
+  type: STRING
+  shape { }
+  filler {
+    tag: "explicit"
+    arg: "Hello"
+  }
+}
+operand {
+  name: "ofm"
+  type: STRING
+  shape { }
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "suffix"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt
index 83127cb3e..bf20f31c2 100644
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -3,8 +3,9 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 add_library(tflchef_tflite STATIC ${SOURCES})
 target_include_directories(tflchef_tflite PUBLIC include)
 target_include_directories(tflchef_tflite PRIVATE src)
+target_include_directories(tflchef_tflite PRIVATE src/Op/include)
 target_link_libraries(tflchef_tflite tflchef_proto)
-target_link_libraries(tflchef_tflite mio_tflite)
-target_link_libraries(tflchef_tflite stdex)
+target_link_libraries(tflchef_tflite mio_tflite2121)
+target_link_libraries(tflchef_tflite mio_tflite2121_helper)
 target_link_libraries(tflchef_tflite cwrap)
 target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/Convert.cpp b/compiler/tflchef/tflite/src/Convert.cpp
index 3cc1c9238..46812f4d6 100644
--- a/compiler/tflchef/tflite/src/Convert.cpp
+++ b/compiler/tflchef/tflite/src/Convert.cpp
@@ -33,10 +33,14 @@ tflchef::TensorType as_tflchef_type(const tflite::TensorType type)
       return tflchef::UINT8;
     case tflite::TensorType_BOOL:
       return tflchef::BOOL;
+    case tflite::TensorType_INT8:
+      return tflchef::INT8;
+    case tflite::TensorType_INT16:
+      return tflchef::INT16;
+    case tflite::TensorType_FLOAT16:
+      return tflchef::FLOAT16;
     // TODO handle other types
-    // TensorType_FLOAT16
     // TensorType_STRING
-    // TensorType_INT16
     // TensorType_COMPLEX64
     default:
       throw std::runtime_error{"unsupported tensor type"};
@@ -55,9 +59,10 @@ tflchef::Activation as_tflchef_activation(const tflite::ActivationFunctionType t
       return tflchef::RELU_N1_TO_1;
     case tflite::ActivationFunctionType_RELU6:
       return tflchef::RELU6;
-    // TODO handle other types
-    // ActivationFunctionType_TANH
-    // ActivationFunctionType_SIGN_BIT
+    case tflite::ActivationFunctionType_TANH:
+      return tflchef::TANH;
+    case tflite::ActivationFunctionType_SIGN_BIT:
+      return tflchef::SIGN_BIT;
     default:
       throw std::runtime_error{"unsupported activation type"};
   }
@@ -89,4 +94,34 @@ tflchef::MirrorPadMode as_tflchef_mirrorpadmode(const tflite::MirrorPadMode mode
   }
 }
 
+tflchef::DimensionType as_tflchef_sparse_dim_type(const tflite::DimensionType type)
+{
+  switch (type)
+  {
+    case tflite::DimensionType_DENSE:
+      return tflchef::DimensionType::DENSE;
+    case tflite::DimensionType_SPARSE_CSR:
+      return tflchef::DimensionType::SPARSE_CSR;
+    default:
+      throw std::runtime_error("unsupported sparse dimension type");
+  }
+}
+
+tflchef::SparseIndexVecType as_tflchef_sparse_idx_vec_type(const tflite::SparseIndexVector type)
+{
+  switch (type)
+  {
+    case tflite::SparseIndexVector_NONE:
+      return tflchef::SparseIndexVecType::SparseIdxVecType_NONE;
+    case tflite::SparseIndexVector_Int32Vector:
+      return tflchef::SparseIndexVecType::INT32VEC;
+    case tflite::SparseIndexVector_Uint16Vector:
+      return tflchef::SparseIndexVecType::UINT16VEC;
+    case tflite::SparseIndexVector_Uint8Vector:
+      return tflchef::SparseIndexVecType::UINT8VEC;
+    default:
+      throw std::runtime_error("unsupported sparse index vector type");
+  }
+}
+
 } // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Convert.h b/compiler/tflchef/tflite/src/Convert.h
index 770bffa4d..cf0c61550 100644
--- a/compiler/tflchef/tflite/src/Convert.h
+++ b/compiler/tflchef/tflite/src/Convert.h
@@ -28,6 +28,8 @@ tflchef::TensorType as_tflchef_type(const tflite::TensorType type);
 tflchef::Activation as_tflchef_activation(const tflite::ActivationFunctionType type);
 tflchef::Padding as_tflchef_padding(const tflite::Padding padding);
 tflchef::MirrorPadMode as_tflchef_mirrorpadmode(const tflite::MirrorPadMode mode);
+tflchef::DimensionType as_tflchef_sparse_dim_type(const tflite::DimensionType type);
+tflchef::SparseIndexVecType as_tflchef_sparse_idx_vec_type(const tflite::SparseIndexVector type);
 
 /**
  * @brief extract buffer data to std::vector<DT>
diff --git a/compiler/tflchef/tflite/src/FillerHelper.cpp b/compiler/tflchef/tflite/src/FillerHelper.cpp
index cf96d2e8c..1ac99ad40 100644
--- a/compiler/tflchef/tflite/src/FillerHelper.cpp
+++ b/compiler/tflchef/tflite/src/FillerHelper.cpp
@@ -48,3 +48,18 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import)
 }
 
 } // namespace tflchef
+
+// helpers of common codes for filling inputs
+namespace tflchef
+{
+
+void fill_two_inputs(const tflite::Operator *op, TFliteImport *import)
+{
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 2);
+
+  fill_tensor_to_import(inputs[0], import);
+  fill_tensor_to_import(inputs[1], import);
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/FillerHelper.h b/compiler/tflchef/tflite/src/FillerHelper.h
index 053a5c18a..e96ae73d0 100644
--- a/compiler/tflchef/tflite/src/FillerHelper.h
+++ b/compiler/tflchef/tflite/src/FillerHelper.h
@@ -28,4 +28,12 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import);
 
 } // namespace tflchef
 
+// helpers of common codes for filling inputs
+namespace tflchef
+{
+
+void fill_two_inputs(const tflite::Operator *op, TFliteImport *import);
+
+} // namespace tflchef
+
 #endif // __FILLER_HELPER_H__
diff --git a/compiler/tflchef/tflite/src/Op/Add.cpp b/compiler/tflchef/tflite/src/Op/Add.cpp
index 3e880a63b..23d360616 100644
--- a/compiler/tflchef/tflite/src/Op/Add.cpp
+++ b/compiler/tflchef/tflite/src/Op/Add.cpp
@@ -27,11 +27,7 @@ void TFliteOpAdd::filler(const tflite::Operator *op, TFliteImport *import,
 {
   // Add may have constant input
 
-  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
-  assert(inputs.size() == 2);
-
-  fill_tensor_to_import(inputs[0], import);
-  fill_tensor_to_import(inputs[1], import);
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpAdd::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.cpp b/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..32548247e
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTM.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpBidirectionalSequenceLSTM::filler(const tflite::Operator *op, TFliteImport *import,
+                                               tflchef::ModelRecipe *model_recipe) const
+{
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 48);
+
+  for (int32_t i = 0; i < inputs.size(); i++)
+  {
+    // Except for Input 0, 35, 36, 37 and 38.
+    // Each Input mean Input Tensor, ActivationState Tensor (forward and backward), and CellState
+    // Tensor (forward and backward).
+    // This could be updated from previous input or User Given data, so This could not be Const
+    if (i == 0 || i == 35 || i == 36 || i == 37 || i == 38)
+      continue;
+    if (inputs[i] != -1)
+      fill_tensor_to_import(inputs[i], import);
+  }
+}
+
+tflchef::Operation *
+TFliteOpBidirectionalSequenceLSTM::build(const tflite::Operator *op, TFliteImport *import,
+                                         tflchef::ModelRecipe *model_recipe) const
+{
+  auto op_params = op->builtin_options_as_BidirectionalSequenceLSTMOptions();
+  assert(op_params != nullptr);
+
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("BidirectionalSequenceLSTM");
+
+  auto op_options = operation->mutable_bidirectional_sequence_lstm_options();
+
+  op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+  op_options->set_cell_clip(op_params->cell_clip());
+  op_options->set_proj_clip(op_params->proj_clip());
+  op_options->set_time_major(op_params->time_major());
+  op_options->set_asymmetric_quantize_inputs(op_params->asymmetric_quantize_inputs());
+  op_options->set_merge_outputs(op_params->merge_outputs());
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Dequantize.cpp b/compiler/tflchef/tflite/src/Op/Dequantize.cpp
new file mode 100644
index 000000000..436a0db19
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Dequantize.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dequantize.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpDequantize::filler(const tflite::Operator *op, TFliteImport *import,
+                                tflchef::ModelRecipe *model_recipe) const
+{
+  // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpDequantize::build(const tflite::Operator *, TFliteImport *import,
+                                              tflchef::ModelRecipe *model_recipe) const
+{
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("Dequantize");
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/FakeQuant.cpp b/compiler/tflchef/tflite/src/Op/FakeQuant.cpp
new file mode 100644
index 000000000..f44b85465
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/FakeQuant.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FakeQuant.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpFakeQuant::filler(const tflite::Operator *op, TFliteImport *import,
+                               tflchef::ModelRecipe *model_recipe) const
+{
+  // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpFakeQuant::build(const tflite::Operator *op, TFliteImport *import,
+                                             tflchef::ModelRecipe *model_recipe) const
+{
+  auto op_params = op->builtin_options_as_FakeQuantOptions();
+  assert(op_params != nullptr);
+
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("FakeQuant");
+
+  auto op_options = operation->mutable_fakequant_options();
+
+  op_options->set_min(op_params->min());
+  op_options->set_max(op_params->max());
+  op_options->set_num_bits(op_params->num_bits());
+  op_options->set_narrow_range(op_params->narrow_range());
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
index 4291c844b..bbc749fe4 100644
--- a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
+++ b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
@@ -17,6 +17,7 @@
 #include "FullyConnected.h"
 
 #include "Convert.h"
+#include "FillerHelper.h"
 
 namespace tflchef
 {
@@ -24,7 +25,14 @@ namespace tflchef
 void TFliteOpFullyConnected::filler(const tflite::Operator *op, TFliteImport *import,
                                     tflchef::ModelRecipe *model_recipe) const
 {
-  // Nothing to do with filler
+  const auto &inputs = *op->inputs();
+
+  for (uint32_t idx = 1; idx < inputs.size(); idx++)
+  {
+    // optional input tensor idx has minus value.
+    if (inputs[idx] >= 0)
+      fill_tensor_to_import(inputs[idx], import);
+  }
 }
 
 tflchef::Operation *TFliteOpFullyConnected::build(const tflite::Operator *op, TFliteImport *import,
@@ -40,6 +48,7 @@ tflchef::Operation *TFliteOpFullyConnected::build(const tflite::Operator *op, TF
   auto op_options = operation->mutable_fullyconnected_options();
 
   op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+  op_options->set_keep_num_dims(op_params->keep_num_dims());
 
   return operation;
 }
diff --git a/compiler/tflchef/tflite/src/Op/Gelu.cpp b/compiler/tflchef/tflite/src/Op/Gelu.cpp
new file mode 100644
index 000000000..23cee07b0
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Gelu.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Gelu.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpGelu::filler(const tflite::Operator *op, TFliteImport *import,
+                          tflchef::ModelRecipe *model_recipe) const
+{
+  // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpGelu::build(const tflite::Operator *op, TFliteImport *import,
+                                        tflchef::ModelRecipe *model_recipe) const
+{
+  auto op_params = op->builtin_options_as_GeluOptions();
+
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("Gelu");
+
+  auto *op_options = operation->mutable_gelu_options();
+
+  op_options->set_approximate(op_params->approximate());
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/HardSwish.cpp b/compiler/tflchef/tflite/src/Op/HardSwish.cpp
new file mode 100644
index 000000000..2282ff97d
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/HardSwish.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HardSwish.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpHardSwish::filler(const tflite::Operator *op, TFliteImport *import,
+                               tflchef::ModelRecipe *model_recipe) const
+{
+  // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpHardSwish::build(const tflite::Operator *op, TFliteImport *import,
+                                             tflchef::ModelRecipe *model_recipe) const
+{
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("HardSwish");
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Maximum.cpp b/compiler/tflchef/tflite/src/Op/Maximum.cpp
index fb977b6ed..65e4c2c99 100644
--- a/compiler/tflchef/tflite/src/Op/Maximum.cpp
+++ b/compiler/tflchef/tflite/src/Op/Maximum.cpp
@@ -16,13 +16,16 @@
 
 #include "Maximum.h"
 
+#include "Convert.h"
+#include "FillerHelper.h"
+
 namespace tflchef
 {
 
 void TFliteOpMaximum::filler(const tflite::Operator *op, TFliteImport *import,
                              tflchef::ModelRecipe *model_recipe) const
 {
-  // Nothing to do with filler
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpMaximum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Minimum.cpp b/compiler/tflchef/tflite/src/Op/Minimum.cpp
index 2bb50cb89..b4d255ce3 100644
--- a/compiler/tflchef/tflite/src/Op/Minimum.cpp
+++ b/compiler/tflchef/tflite/src/Op/Minimum.cpp
@@ -17,6 +17,7 @@
 #include "Minimum.h"
 
 #include "Convert.h"
+#include "FillerHelper.h"
 
 namespace tflchef
 {
@@ -24,7 +25,7 @@ namespace tflchef
 void TFliteOpMinimum::filler(const tflite::Operator *op, TFliteImport *import,
                              tflchef::ModelRecipe *model_recipe) const
 {
-  // Nothing to do with filler
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpMinimum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Mul.cpp b/compiler/tflchef/tflite/src/Op/Mul.cpp
index 9faa4acaf..1145ff7e6 100644
--- a/compiler/tflchef/tflite/src/Op/Mul.cpp
+++ b/compiler/tflchef/tflite/src/Op/Mul.cpp
@@ -27,11 +27,7 @@ void TFliteOpMul::filler(const tflite::Operator *op, TFliteImport *import,
 {
   // Mul may have constant input
 
-  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
-  assert(inputs.size() == 2);
-
-  fill_tensor_to_import(inputs[0], import);
-  fill_tensor_to_import(inputs[1], import);
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpMul::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
index ad9921970..4f096ced4 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
@@ -38,7 +38,7 @@ void TFliteOpNonMaxSuppressionV4::filler(const tflite::Operator *op, TFliteImpor
 
   for (int32_t index = 2; index < 5; ++index)
   {
-    fill_tensor_to_import(index, import);
+    fill_tensor_to_import(inputs[index], import);
   }
 }
 
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
index db7f4c932..332cba0ff 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
@@ -41,7 +41,7 @@ void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImpor
 
   for (int32_t index = 2; index < 6; ++index)
   {
-    fill_tensor_to_import(index, import);
+    fill_tensor_to_import(inputs[index], import);
   }
 }
 
diff --git a/compiler/tflchef/tflite/src/Op/PRelu.cpp b/compiler/tflchef/tflite/src/Op/PRelu.cpp
index 8a5e83a84..1a1a84bce 100644
--- a/compiler/tflchef/tflite/src/Op/PRelu.cpp
+++ b/compiler/tflchef/tflite/src/Op/PRelu.cpp
@@ -24,6 +24,11 @@ namespace tflchef
 void TFliteOpPRelu::filler(const tflite::Operator *op, TFliteImport *import,
                            tflchef::ModelRecipe *model_recipe) const
 {
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
+  assert(inputs.size() == 2);
+
+  import->set_tensor_filler(inputs.at(1)); // alpha
 }
 
 tflchef::Operation *TFliteOpPRelu::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/PadV2.cpp b/compiler/tflchef/tflite/src/Op/PadV2.cpp
index 0b1c9f3b2..a6b657f59 100644
--- a/compiler/tflchef/tflite/src/Op/PadV2.cpp
+++ b/compiler/tflchef/tflite/src/Op/PadV2.cpp
@@ -16,6 +16,7 @@
 
 #include "PadV2.h"
 
+#include "Convert.h"
 #include "FillerHelper.h"
 
 namespace tflchef
@@ -24,9 +25,11 @@ namespace tflchef
 void TFliteOpPadV2::filler(const tflite::Operator *op, TFliteImport *import,
                            tflchef::ModelRecipe *model_recipe) const
 {
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
   // Filler for paddings and constant_values
-  fill_tensor_to_import(1, import);
-  fill_tensor_to_import(2, import);
+  fill_tensor_to_import(inputs[1], import);
+  fill_tensor_to_import(inputs[2], import);
 }
 
 tflchef::Operation *TFliteOpPadV2::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Quantize.cpp b/compiler/tflchef/tflite/src/Op/Quantize.cpp
new file mode 100644
index 000000000..0808b9c3f
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/Quantize.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Quantize.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpQuantize::filler(const tflite::Operator *op, TFliteImport *import,
+                              tflchef::ModelRecipe *model_recipe) const
+{
+  // Nothing to do with filler
+}
+
+tflchef::Operation *TFliteOpQuantize::build(const tflite::Operator *, TFliteImport *import,
+                                            tflchef::ModelRecipe *model_recipe) const
+{
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("Quantize");
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/SVDF.cpp b/compiler/tflchef/tflite/src/Op/SVDF.cpp
new file mode 100644
index 000000000..015f968a8
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/SVDF.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDF.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpSVDF::filler(const tflite::Operator *op, TFliteImport *import,
+                          tflchef::ModelRecipe *model_recipe) const
+{
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 5);
+
+  // optional input tensor idx has minus value.
+  const bool hasBias = (inputs.at(3) >= 0);
+
+  // Note: last input is variable tensor without data
+  import->set_tensor_filler(inputs.at(1));
+  import->set_tensor_filler(inputs.at(2));
+  if (hasBias)
+    import->set_tensor_filler(inputs.at(3));
+}
+
+tflchef::Operation *TFliteOpSVDF::build(const tflite::Operator *op, TFliteImport *import,
+                                        tflchef::ModelRecipe *model_recipe) const
+{
+  const auto op_params = op->builtin_options_as_SVDFOptions();
+  assert(op_params != nullptr);
+
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("SVDF");
+
+  auto op_options = operation->mutable_svdf_options();
+
+  op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+  op_options->set_asymmetric_quantize_inputs(op_params->asymmetric_quantize_inputs());
+  op_options->set_rank(op_params->rank());
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
index 548a09a67..ec09a69a4 100644
--- a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
+++ b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
@@ -25,9 +25,11 @@ namespace tflchef
 void TFliteOpScatterNd::filler(const tflite::Operator *op, TFliteImport *import,
                                tflchef::ModelRecipe *model_recipe) const
 {
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
   // Filler for indices and shape
-  fill_tensor_to_import(0, import);
-  fill_tensor_to_import(2, import);
+  fill_tensor_to_import(inputs[0], import);
+  fill_tensor_to_import(inputs[2], import);
 }
 
 tflchef::Operation *TFliteOpScatterNd::build(const tflite::Operator *, TFliteImport *,
diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
index a975ca4b3..bc45a94e0 100644
--- a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
+++ b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
@@ -16,6 +16,7 @@
 
 #include "SegmentSum.h"
 
+#include "Convert.h"
 #include "FillerHelper.h"
 
 namespace tflchef
@@ -24,8 +25,10 @@ namespace tflchef
 void TFliteOpSegmentSum::filler(const tflite::Operator *op, TFliteImport *import,
                                 tflchef::ModelRecipe *model_recipe) const
 {
-  // Filler for indices and shape
-  fill_tensor_to_import(1, import);
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
+  // Filler for segment_ids
+  fill_tensor_to_import(inputs[1], import);
 }
 
 tflchef::Operation *TFliteOpSegmentSum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Sub.cpp b/compiler/tflchef/tflite/src/Op/Sub.cpp
index 0a08bbfdf..584be0ab9 100644
--- a/compiler/tflchef/tflite/src/Op/Sub.cpp
+++ b/compiler/tflchef/tflite/src/Op/Sub.cpp
@@ -27,11 +27,7 @@ void TFliteOpSub::filler(const tflite::Operator *op, TFliteImport *import,
 {
   // Sub may have constant input
 
-  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
-  assert(inputs.size() == 2);
-
-  fill_tensor_to_import(inputs[0], import);
-  fill_tensor_to_import(inputs[1], import);
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpSub::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/TransposeConv.cpp b/compiler/tflchef/tflite/src/Op/TransposeConv.cpp
index 4e7adf6c6..875ccb51b 100644
--- a/compiler/tflchef/tflite/src/Op/TransposeConv.cpp
+++ b/compiler/tflchef/tflite/src/Op/TransposeConv.cpp
@@ -53,10 +53,12 @@ tflchef::Operation *TFliteOpTransposeConv::build(const tflite::Operator *op, TFl
   operation->set_type("TransposeConv");
 
   auto op_options = operation->mutable_transpose_conv_options();
+  auto tflchef_activation = as_tflchef_activation(op_params->fused_activation_function());
 
   op_options->set_stride_h(op_params->stride_h());
   op_options->set_stride_w(op_params->stride_w());
   op_options->set_padding(as_tflchef_padding(op_params->padding()));
+  op_options->set_activation(tflchef_activation);
 
   return operation;
 }
diff --git a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.cpp b/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..b2bc1acbd
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnidirectionalSequenceLSTM.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpUnidirectionalSequenceLSTM::filler(const tflite::Operator *op, TFliteImport *import,
+                                                tflchef::ModelRecipe *model_recipe) const
+{
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 24);
+
+  for (int32_t i = 0; i < inputs.size(); i++)
+  {
+    // Except for Input 0, 18 and 19.
+    // Each Input mean Input[0](=Input Tensor), Input[18](=OutputState Tensor) and
+    // Input[19](=CellState Tensor).
+    // This could be updated from previous input or User Given data, so This could not be Const
+    if (i == 0 || i == 18 || i == 19)
+      continue;
+    if (inputs[i] != -1)
+      fill_tensor_to_import(inputs[i], import);
+  }
+}
+
+tflchef::Operation *
+TFliteOpUnidirectionalSequenceLSTM::build(const tflite::Operator *op, TFliteImport *import,
+                                          tflchef::ModelRecipe *model_recipe) const
+{
+  auto op_params = op->builtin_options_as_UnidirectionalSequenceLSTMOptions();
+  assert(op_params != nullptr);
+
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("UnidirectionalSequenceLSTM");
+
+  auto op_options = operation->mutable_unidirectional_sequence_lstm_options();
+
+  op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+  op_options->set_cell_clip(op_params->cell_clip());
+  op_options->set_proj_clip(op_params->proj_clip());
+  op_options->set_time_major(op_params->time_major());
+  op_options->set_asymmetric_quantize_inputs(op_params->asymmetric_quantize_inputs());
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/Abs.h b/compiler/tflchef/tflite/src/Op/include/Abs.h
index d99b0d593..d99b0d593 100644
--- a/compiler/tflchef/tflite/src/Op/Abs.h
+++ b/compiler/tflchef/tflite/src/Op/include/Abs.h
diff --git a/compiler/tflchef/tflite/src/Op/Add.h b/compiler/tflchef/tflite/src/Op/include/Add.h
index 49d945f8b..49d945f8b 100644
--- a/compiler/tflchef/tflite/src/Op/Add.h
+++ b/compiler/tflchef/tflite/src/Op/include/Add.h
diff --git a/compiler/tflchef/tflite/src/Op/AddN.h b/compiler/tflchef/tflite/src/Op/include/AddN.h
index 4387aa06a..4387aa06a 100644
--- a/compiler/tflchef/tflite/src/Op/AddN.h
+++ b/compiler/tflchef/tflite/src/Op/include/AddN.h
diff --git a/compiler/tflchef/tflite/src/Op/ArgMax.h b/compiler/tflchef/tflite/src/Op/include/ArgMax.h
index 30068ecf2..30068ecf2 100644
--- a/compiler/tflchef/tflite/src/Op/ArgMax.h
+++ b/compiler/tflchef/tflite/src/Op/include/ArgMax.h
diff --git a/compiler/tflchef/tflite/src/Op/ArgMin.h b/compiler/tflchef/tflite/src/Op/include/ArgMin.h
index 83c643c1a..83c643c1a 100644
--- a/compiler/tflchef/tflite/src/Op/ArgMin.h
+++ b/compiler/tflchef/tflite/src/Op/include/ArgMin.h
diff --git a/compiler/tflchef/tflite/src/Op/AveragePool2D.h b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h
index f9e9fb254..f9e9fb254 100644
--- a/compiler/tflchef/tflite/src/Op/AveragePool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/BatchMatMul.h b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h
index 6eb4c6e68..6eb4c6e68 100644
--- a/compiler/tflchef/tflite/src/Op/BatchMatMul.h
+++ b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h
diff --git a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h
index ae2114c97..ae2114c97 100644
--- a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h
+++ b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h
diff --git a/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..333f542ac
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+#define __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for BidirectionalSequenceLSTM
+ */
+class TFliteOpBidirectionalSequenceLSTM : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_BIDIRECTIONALSEQUENCE_LSTM_H__
diff --git a/compiler/tflchef/tflite/src/Op/Cast.h b/compiler/tflchef/tflite/src/Op/include/Cast.h
index 29c126c93..29c126c93 100644
--- a/compiler/tflchef/tflite/src/Op/Cast.h
+++ b/compiler/tflchef/tflite/src/Op/include/Cast.h
diff --git a/compiler/tflchef/tflite/src/Op/Ceil.h b/compiler/tflchef/tflite/src/Op/include/Ceil.h
index 44df20778..44df20778 100644
--- a/compiler/tflchef/tflite/src/Op/Ceil.h
+++ b/compiler/tflchef/tflite/src/Op/include/Ceil.h
diff --git a/compiler/tflchef/tflite/src/Op/Concatenation.h b/compiler/tflchef/tflite/src/Op/include/Concatenation.h
index 4a7ea5791..4a7ea5791 100644
--- a/compiler/tflchef/tflite/src/Op/Concatenation.h
+++ b/compiler/tflchef/tflite/src/Op/include/Concatenation.h
diff --git a/compiler/tflchef/tflite/src/Op/Conv2D.h b/compiler/tflchef/tflite/src/Op/include/Conv2D.h
index 0216e9ce9..0216e9ce9 100644
--- a/compiler/tflchef/tflite/src/Op/Conv2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/Conv2D.h
diff --git a/compiler/tflchef/tflite/src/Op/Cos.h b/compiler/tflchef/tflite/src/Op/include/Cos.h
index 8f3dbe3a6..8f3dbe3a6 100644
--- a/compiler/tflchef/tflite/src/Op/Cos.h
+++ b/compiler/tflchef/tflite/src/Op/include/Cos.h
diff --git a/compiler/tflchef/tflite/src/Op/DepthToSpace.h b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h
index b5852ac89..b5852ac89 100644
--- a/compiler/tflchef/tflite/src/Op/DepthToSpace.h
+++ b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h
diff --git a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h
index c172536b4..c172536b4 100644
--- a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h
diff --git a/compiler/tflchef/tflite/src/Op/include/Dequantize.h b/compiler/tflchef/tflite/src/Op/include/Dequantize.h
new file mode 100644
index 000000000..df1c7bbdb
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/include/Dequantize.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_DEQUANTIZE_H__
+#define __TFLITE_OP_DEQUANTIZE_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Dequantize
+ */
+class TFliteOpDequantize : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_DEQUANTIZE_H__
diff --git a/compiler/tflchef/tflite/src/Op/Div.h b/compiler/tflchef/tflite/src/Op/include/Div.h
index 254a4cd99..254a4cd99 100644
--- a/compiler/tflchef/tflite/src/Op/Div.h
+++ b/compiler/tflchef/tflite/src/Op/include/Div.h
diff --git a/compiler/tflchef/tflite/src/Op/ELU.h b/compiler/tflchef/tflite/src/Op/include/ELU.h
index 490c9fde4..490c9fde4 100644
--- a/compiler/tflchef/tflite/src/Op/ELU.h
+++ b/compiler/tflchef/tflite/src/Op/include/ELU.h
diff --git a/compiler/tflchef/tflite/src/Op/Equal.h b/compiler/tflchef/tflite/src/Op/include/Equal.h
index fd4b40001..fd4b40001 100644
--- a/compiler/tflchef/tflite/src/Op/Equal.h
+++ b/compiler/tflchef/tflite/src/Op/include/Equal.h
diff --git a/compiler/tflchef/tflite/src/Op/Exp.h b/compiler/tflchef/tflite/src/Op/include/Exp.h
index 5ff3ddc8b..5ff3ddc8b 100644
--- a/compiler/tflchef/tflite/src/Op/Exp.h
+++ b/compiler/tflchef/tflite/src/Op/include/Exp.h
diff --git a/compiler/tflchef/tflite/src/Op/ExpandDims.h b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h
index e2f3e4e50..e2f3e4e50 100644
--- a/compiler/tflchef/tflite/src/Op/ExpandDims.h
+++ b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h
diff --git a/compiler/tflchef/tflite/src/Op/include/FakeQuant.h b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h
new file mode 100644
index 000000000..f36e615df
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_FAKE_QUANT_H__
+#define __TFLITE_OP_FAKE_QUANT_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for FakeQuant
+ */
+class TFliteOpFakeQuant : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_FAKE_QUANT_H__
diff --git a/compiler/tflchef/tflite/src/Op/Fill.h b/compiler/tflchef/tflite/src/Op/include/Fill.h
index 4f46f628a..4f46f628a 100644
--- a/compiler/tflchef/tflite/src/Op/Fill.h
+++ b/compiler/tflchef/tflite/src/Op/include/Fill.h
diff --git a/compiler/tflchef/tflite/src/Op/Floor.h b/compiler/tflchef/tflite/src/Op/include/Floor.h
index f0f8ef38a..f0f8ef38a 100644
--- a/compiler/tflchef/tflite/src/Op/Floor.h
+++ b/compiler/tflchef/tflite/src/Op/include/Floor.h
diff --git a/compiler/tflchef/tflite/src/Op/FloorDiv.h b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h
index 5d049a668..5d049a668 100644
--- a/compiler/tflchef/tflite/src/Op/FloorDiv.h
+++ b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h
diff --git a/compiler/tflchef/tflite/src/Op/FloorMod.h b/compiler/tflchef/tflite/src/Op/include/FloorMod.h
index f36dfe813..f36dfe813 100644
--- a/compiler/tflchef/tflite/src/Op/FloorMod.h
+++ b/compiler/tflchef/tflite/src/Op/include/FloorMod.h
diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.h b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h
index 8fbe1f3ed..8fbe1f3ed 100644
--- a/compiler/tflchef/tflite/src/Op/FullyConnected.h
+++ b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h
diff --git a/compiler/tflchef/tflite/src/Op/Gather.h b/compiler/tflchef/tflite/src/Op/include/Gather.h
index e01276b76..e01276b76 100644
--- a/compiler/tflchef/tflite/src/Op/Gather.h
+++ b/compiler/tflchef/tflite/src/Op/include/Gather.h
diff --git a/compiler/tflchef/tflite/src/Op/GatherNd.h b/compiler/tflchef/tflite/src/Op/include/GatherNd.h
index 112f23d33..112f23d33 100644
--- a/compiler/tflchef/tflite/src/Op/GatherNd.h
+++ b/compiler/tflchef/tflite/src/Op/include/GatherNd.h
diff --git a/compiler/tflchef/tflite/src/Op/include/Gelu.h b/compiler/tflchef/tflite/src/Op/include/Gelu.h
new file mode 100644
index 000000000..0c51a51be
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/include/Gelu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_GELU_H__
+#define __TFLITE_OP_GELU_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Gelu
+ */
+class TFliteOpGelu : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_GELU_H__
diff --git a/compiler/tflchef/tflite/src/Op/Greater.h b/compiler/tflchef/tflite/src/Op/include/Greater.h
index 3ab2d1a4e..3ab2d1a4e 100644
--- a/compiler/tflchef/tflite/src/Op/Greater.h
+++ b/compiler/tflchef/tflite/src/Op/include/Greater.h
diff --git a/compiler/tflchef/tflite/src/Op/GreaterEqual.h b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h
index 96b0af78a..96b0af78a 100644
--- a/compiler/tflchef/tflite/src/Op/GreaterEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/include/HardSwish.h b/compiler/tflchef/tflite/src/Op/include/HardSwish.h
new file mode 100644
index 000000000..d9b5a5382
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/include/HardSwish.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_HARDSWISH_H__
+#define __TFLITE_OP_HARDSWISH_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Hard Swish
+ */
+class TFliteOpHardSwish : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_HARDSWISH_H__
diff --git a/compiler/tflchef/tflite/src/Op/L2Normalize.h b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h
index a73eae6c8..a73eae6c8 100644
--- a/compiler/tflchef/tflite/src/Op/L2Normalize.h
+++ b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h
diff --git a/compiler/tflchef/tflite/src/Op/L2Pool2D.h b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h
index 046353440..046353440 100644
--- a/compiler/tflchef/tflite/src/Op/L2Pool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/LeakyRelu.h b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h
index 28e63e0ca..28e63e0ca 100644
--- a/compiler/tflchef/tflite/src/Op/LeakyRelu.h
+++ b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h
diff --git a/compiler/tflchef/tflite/src/Op/Less.h b/compiler/tflchef/tflite/src/Op/include/Less.h
index 1316cb613..1316cb613 100644
--- a/compiler/tflchef/tflite/src/Op/Less.h
+++ b/compiler/tflchef/tflite/src/Op/include/Less.h
diff --git a/compiler/tflchef/tflite/src/Op/LessEqual.h b/compiler/tflchef/tflite/src/Op/include/LessEqual.h
index 81c710fbc..81c710fbc 100644
--- a/compiler/tflchef/tflite/src/Op/LessEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/LessEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h
index c0eb3f2b1..c0eb3f2b1 100644
--- a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h
+++ b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h
diff --git a/compiler/tflchef/tflite/src/Op/Log.h b/compiler/tflchef/tflite/src/Op/include/Log.h
index 9d17e2f81..9d17e2f81 100644
--- a/compiler/tflchef/tflite/src/Op/Log.h
+++ b/compiler/tflchef/tflite/src/Op/include/Log.h
diff --git a/compiler/tflchef/tflite/src/Op/LogSoftmax.h b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h
index efd81f3e9..efd81f3e9 100644
--- a/compiler/tflchef/tflite/src/Op/LogSoftmax.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalAnd.h b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h
index 1f7a964b9..1f7a964b9 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalAnd.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalNot.h b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h
index b75d33554..b75d33554 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalNot.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalOr.h b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h
index 5331a0d65..5331a0d65 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalOr.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h
diff --git a/compiler/tflchef/tflite/src/Op/Logistic.h b/compiler/tflchef/tflite/src/Op/include/Logistic.h
index a75bf490e..a75bf490e 100644
--- a/compiler/tflchef/tflite/src/Op/Logistic.h
+++ b/compiler/tflchef/tflite/src/Op/include/Logistic.h
diff --git a/compiler/tflchef/tflite/src/Op/MatrixDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h
index 4074f2c36..4074f2c36 100644
--- a/compiler/tflchef/tflite/src/Op/MatrixDiag.h
+++ b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h
diff --git a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h
index 0e7ec7f32..0e7ec7f32 100644
--- a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h
+++ b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h
diff --git a/compiler/tflchef/tflite/src/Op/MaxPool2D.h b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h
index 36533f80c..36533f80c 100644
--- a/compiler/tflchef/tflite/src/Op/MaxPool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/Maximum.h b/compiler/tflchef/tflite/src/Op/include/Maximum.h
index acafec343..acafec343 100644
--- a/compiler/tflchef/tflite/src/Op/Maximum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Maximum.h
diff --git a/compiler/tflchef/tflite/src/Op/Mean.h b/compiler/tflchef/tflite/src/Op/include/Mean.h
index 532c40c66..532c40c66 100644
--- a/compiler/tflchef/tflite/src/Op/Mean.h
+++ b/compiler/tflchef/tflite/src/Op/include/Mean.h
diff --git a/compiler/tflchef/tflite/src/Op/Minimum.h b/compiler/tflchef/tflite/src/Op/include/Minimum.h
index 5db5b7940..5db5b7940 100644
--- a/compiler/tflchef/tflite/src/Op/Minimum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Minimum.h
diff --git a/compiler/tflchef/tflite/src/Op/MirrorPad.h b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h
index c9acdd498..c9acdd498 100644
--- a/compiler/tflchef/tflite/src/Op/MirrorPad.h
+++ b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h
diff --git a/compiler/tflchef/tflite/src/Op/Mul.h b/compiler/tflchef/tflite/src/Op/include/Mul.h
index fd009d2fd..fd009d2fd 100644
--- a/compiler/tflchef/tflite/src/Op/Mul.h
+++ b/compiler/tflchef/tflite/src/Op/include/Mul.h
diff --git a/compiler/tflchef/tflite/src/Op/Neg.h b/compiler/tflchef/tflite/src/Op/include/Neg.h
index c77ab7e84..c77ab7e84 100644
--- a/compiler/tflchef/tflite/src/Op/Neg.h
+++ b/compiler/tflchef/tflite/src/Op/include/Neg.h
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h
index 114a2ad2f..114a2ad2f 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h
+++ b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h
index c948043f4..c948043f4 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h
+++ b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h
diff --git a/compiler/tflchef/tflite/src/Op/NotEqual.h b/compiler/tflchef/tflite/src/Op/include/NotEqual.h
index b1febdcc5..b1febdcc5 100644
--- a/compiler/tflchef/tflite/src/Op/NotEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/NotEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/OneHot.h b/compiler/tflchef/tflite/src/Op/include/OneHot.h
index 50bbed095..50bbed095 100644
--- a/compiler/tflchef/tflite/src/Op/OneHot.h
+++ b/compiler/tflchef/tflite/src/Op/include/OneHot.h
diff --git a/compiler/tflchef/tflite/src/Op/PRelu.h b/compiler/tflchef/tflite/src/Op/include/PRelu.h
index b35c6e7ce..b35c6e7ce 100644
--- a/compiler/tflchef/tflite/src/Op/PRelu.h
+++ b/compiler/tflchef/tflite/src/Op/include/PRelu.h
diff --git a/compiler/tflchef/tflite/src/Op/Pack.h b/compiler/tflchef/tflite/src/Op/include/Pack.h
index 7779f64ed..7779f64ed 100644
--- a/compiler/tflchef/tflite/src/Op/Pack.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pack.h
diff --git a/compiler/tflchef/tflite/src/Op/Pad.h b/compiler/tflchef/tflite/src/Op/include/Pad.h
index 99998d418..99998d418 100644
--- a/compiler/tflchef/tflite/src/Op/Pad.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pad.h
diff --git a/compiler/tflchef/tflite/src/Op/PadV2.h b/compiler/tflchef/tflite/src/Op/include/PadV2.h
index 3aa474b92..3aa474b92 100644
--- a/compiler/tflchef/tflite/src/Op/PadV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/PadV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Pow.h b/compiler/tflchef/tflite/src/Op/include/Pow.h
index 20e847377..20e847377 100644
--- a/compiler/tflchef/tflite/src/Op/Pow.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pow.h
diff --git a/compiler/tflchef/tflite/src/Op/include/Quantize.h b/compiler/tflchef/tflite/src/Op/include/Quantize.h
new file mode 100644
index 000000000..256ed5a5c
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/include/Quantize.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_QUANTIZE_H__
+#define __TFLITE_OP_QUANTIZE_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for Quantize
+ */
+class TFliteOpQuantize : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_QUANTIZE_H__
diff --git a/compiler/tflchef/tflite/src/Op/Range.h b/compiler/tflchef/tflite/src/Op/include/Range.h
index ad10dc58b..ad10dc58b 100644
--- a/compiler/tflchef/tflite/src/Op/Range.h
+++ b/compiler/tflchef/tflite/src/Op/include/Range.h
diff --git a/compiler/tflchef/tflite/src/Op/Rank.h b/compiler/tflchef/tflite/src/Op/include/Rank.h
index 003d9d310..003d9d310 100644
--- a/compiler/tflchef/tflite/src/Op/Rank.h
+++ b/compiler/tflchef/tflite/src/Op/include/Rank.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLU.h b/compiler/tflchef/tflite/src/Op/include/ReLU.h
index be1090270..be1090270 100644
--- a/compiler/tflchef/tflite/src/Op/ReLU.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLU.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLU6.h b/compiler/tflchef/tflite/src/Op/include/ReLU6.h
index 64ddb6a2e..64ddb6a2e 100644
--- a/compiler/tflchef/tflite/src/Op/ReLU6.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLU6.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h
index 0767006af..0767006af 100644
--- a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceAny.h b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h
index dd5e361d5..dd5e361d5 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceAny.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceMax.h b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h
index 8e65cf47c..8e65cf47c 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceMax.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceMin.h b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h
index 88cba6fe7..88cba6fe7 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceMin.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceProd.h b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h
index e7766840a..e7766840a 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceProd.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h
diff --git a/compiler/tflchef/tflite/src/Op/Reshape.h b/compiler/tflchef/tflite/src/Op/include/Reshape.h
index be9fdac08..be9fdac08 100644
--- a/compiler/tflchef/tflite/src/Op/Reshape.h
+++ b/compiler/tflchef/tflite/src/Op/include/Reshape.h
diff --git a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h
index 98c49c534..98c49c534 100644
--- a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h
+++ b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h
diff --git a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h
index 5090bb938..5090bb938 100644
--- a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h
+++ b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h
diff --git a/compiler/tflchef/tflite/src/Op/ReverseSequence.h b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h
index 8c8c811e4..8c8c811e4 100644
--- a/compiler/tflchef/tflite/src/Op/ReverseSequence.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h
diff --git a/compiler/tflchef/tflite/src/Op/ReverseV2.h b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h
index 6a8a75e6b..6a8a75e6b 100644
--- a/compiler/tflchef/tflite/src/Op/ReverseV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Round.h b/compiler/tflchef/tflite/src/Op/include/Round.h
index df0da3fa1..df0da3fa1 100644
--- a/compiler/tflchef/tflite/src/Op/Round.h
+++ b/compiler/tflchef/tflite/src/Op/include/Round.h
diff --git a/compiler/tflchef/tflite/src/Op/Rsqrt.h b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h
index 5d68344c2..5d68344c2 100644
--- a/compiler/tflchef/tflite/src/Op/Rsqrt.h
+++ b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h
diff --git a/compiler/tflchef/tflite/src/Op/include/SVDF.h b/compiler/tflchef/tflite/src/Op/include/SVDF.h
new file mode 100644
index 000000000..a59ca54a2
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/include/SVDF.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_SVDF_H__
+#define __TFLITE_OP_SVDF_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for SVDF
+ */
+class TFliteOpSVDF : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_SVDF_H__
diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.h b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h
index 76362d775..76362d775 100644
--- a/compiler/tflchef/tflite/src/Op/ScatterNd.h
+++ b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h
diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.h b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h
index d20e63bd7..d20e63bd7 100644
--- a/compiler/tflchef/tflite/src/Op/SegmentSum.h
+++ b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h
diff --git a/compiler/tflchef/tflite/src/Op/Select.h b/compiler/tflchef/tflite/src/Op/include/Select.h
index bf8e57d78..bf8e57d78 100644
--- a/compiler/tflchef/tflite/src/Op/Select.h
+++ b/compiler/tflchef/tflite/src/Op/include/Select.h
diff --git a/compiler/tflchef/tflite/src/Op/SelectV2.h b/compiler/tflchef/tflite/src/Op/include/SelectV2.h
index ff03341d7..ff03341d7 100644
--- a/compiler/tflchef/tflite/src/Op/SelectV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/SelectV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Shape.h b/compiler/tflchef/tflite/src/Op/include/Shape.h
index ebe1befb3..ebe1befb3 100644
--- a/compiler/tflchef/tflite/src/Op/Shape.h
+++ b/compiler/tflchef/tflite/src/Op/include/Shape.h
diff --git a/compiler/tflchef/tflite/src/Op/Sin.h b/compiler/tflchef/tflite/src/Op/include/Sin.h
index 51eabceb5..51eabceb5 100644
--- a/compiler/tflchef/tflite/src/Op/Sin.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sin.h
diff --git a/compiler/tflchef/tflite/src/Op/Slice.h b/compiler/tflchef/tflite/src/Op/include/Slice.h
index 6ca6724d3..6ca6724d3 100644
--- a/compiler/tflchef/tflite/src/Op/Slice.h
+++ b/compiler/tflchef/tflite/src/Op/include/Slice.h
diff --git a/compiler/tflchef/tflite/src/Op/Softmax.h b/compiler/tflchef/tflite/src/Op/include/Softmax.h
index cf168bdd9..cf168bdd9 100644
--- a/compiler/tflchef/tflite/src/Op/Softmax.h
+++ b/compiler/tflchef/tflite/src/Op/include/Softmax.h
diff --git a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h
index 9d7bc44e8..9d7bc44e8 100644
--- a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h
+++ b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h
diff --git a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h
index 784ad940a..784ad940a 100644
--- a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h
+++ b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h
diff --git a/compiler/tflchef/tflite/src/Op/SparseToDense.h b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h
index 5ffe4789d..5ffe4789d 100644
--- a/compiler/tflchef/tflite/src/Op/SparseToDense.h
+++ b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h
diff --git a/compiler/tflchef/tflite/src/Op/Split.h b/compiler/tflchef/tflite/src/Op/include/Split.h
index af247a1b9..af247a1b9 100644
--- a/compiler/tflchef/tflite/src/Op/Split.h
+++ b/compiler/tflchef/tflite/src/Op/include/Split.h
diff --git a/compiler/tflchef/tflite/src/Op/SplitV.h b/compiler/tflchef/tflite/src/Op/include/SplitV.h
index 3f715b5f9..3f715b5f9 100644
--- a/compiler/tflchef/tflite/src/Op/SplitV.h
+++ b/compiler/tflchef/tflite/src/Op/include/SplitV.h
diff --git a/compiler/tflchef/tflite/src/Op/Sqrt.h b/compiler/tflchef/tflite/src/Op/include/Sqrt.h
index 9f0ad04ae..9f0ad04ae 100644
--- a/compiler/tflchef/tflite/src/Op/Sqrt.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sqrt.h
diff --git a/compiler/tflchef/tflite/src/Op/Square.h b/compiler/tflchef/tflite/src/Op/include/Square.h
index 9c008fe52..9c008fe52 100644
--- a/compiler/tflchef/tflite/src/Op/Square.h
+++ b/compiler/tflchef/tflite/src/Op/include/Square.h
diff --git a/compiler/tflchef/tflite/src/Op/SquaredDifference.h b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h
index 58c2ed460..58c2ed460 100644
--- a/compiler/tflchef/tflite/src/Op/SquaredDifference.h
+++ b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h
diff --git a/compiler/tflchef/tflite/src/Op/Squeeze.h b/compiler/tflchef/tflite/src/Op/include/Squeeze.h
index b6c89f73d..b6c89f73d 100644
--- a/compiler/tflchef/tflite/src/Op/Squeeze.h
+++ b/compiler/tflchef/tflite/src/Op/include/Squeeze.h
diff --git a/compiler/tflchef/tflite/src/Op/StridedSlice.h b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h
index 98054b9b9..98054b9b9 100644
--- a/compiler/tflchef/tflite/src/Op/StridedSlice.h
+++ b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h
diff --git a/compiler/tflchef/tflite/src/Op/Sub.h b/compiler/tflchef/tflite/src/Op/include/Sub.h
index 2168e5e0d..2168e5e0d 100644
--- a/compiler/tflchef/tflite/src/Op/Sub.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sub.h
diff --git a/compiler/tflchef/tflite/src/Op/Sum.h b/compiler/tflchef/tflite/src/Op/include/Sum.h
index 38eeb080d..38eeb080d 100644
--- a/compiler/tflchef/tflite/src/Op/Sum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sum.h
diff --git a/compiler/tflchef/tflite/src/Op/Tanh.h b/compiler/tflchef/tflite/src/Op/include/Tanh.h
index 7339e4103..7339e4103 100644
--- a/compiler/tflchef/tflite/src/Op/Tanh.h
+++ b/compiler/tflchef/tflite/src/Op/include/Tanh.h
diff --git a/compiler/tflchef/tflite/src/Op/Tile.h b/compiler/tflchef/tflite/src/Op/include/Tile.h
index 640f52a1f..640f52a1f 100644
--- a/compiler/tflchef/tflite/src/Op/Tile.h
+++ b/compiler/tflchef/tflite/src/Op/include/Tile.h
diff --git a/compiler/tflchef/tflite/src/Op/TopKV2.h b/compiler/tflchef/tflite/src/Op/include/TopKV2.h
index b2b74cc75..b2b74cc75 100644
--- a/compiler/tflchef/tflite/src/Op/TopKV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/TopKV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Transpose.h b/compiler/tflchef/tflite/src/Op/include/Transpose.h
index f0d944b6b..f0d944b6b 100644
--- a/compiler/tflchef/tflite/src/Op/Transpose.h
+++ b/compiler/tflchef/tflite/src/Op/include/Transpose.h
diff --git a/compiler/tflchef/tflite/src/Op/TransposeConv.h b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h
index c79cdabd2..c79cdabd2 100644
--- a/compiler/tflchef/tflite/src/Op/TransposeConv.h
+++ b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h
diff --git a/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..cc4e5fb0f
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_UNIDIRECTIONALSEQUENCELSTM_H__
+#define __TFLITE_OP_UNIDIRECTIONALSEQUENCELSTM_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for UnidirectionalSequenceLSTM
+ */
+class TFliteOpUnidirectionalSequenceLSTM : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_UNIDIRECTIONALSEQUENCELSTM_H__
diff --git a/compiler/tflchef/tflite/src/Op/Unique.h b/compiler/tflchef/tflite/src/Op/include/Unique.h
index fae037c9f..fae037c9f 100644
--- a/compiler/tflchef/tflite/src/Op/Unique.h
+++ b/compiler/tflchef/tflite/src/Op/include/Unique.h
diff --git a/compiler/tflchef/tflite/src/Op/Unpack.h b/compiler/tflchef/tflite/src/Op/include/Unpack.h
index 1036bdc14..1036bdc14 100644
--- a/compiler/tflchef/tflite/src/Op/Unpack.h
+++ b/compiler/tflchef/tflite/src/Op/include/Unpack.h
diff --git a/compiler/tflchef/tflite/src/Op/Where.h b/compiler/tflchef/tflite/src/Op/include/Where.h
index 00cdc4b00..00cdc4b00 100644
--- a/compiler/tflchef/tflite/src/Op/Where.h
+++ b/compiler/tflchef/tflite/src/Op/include/Where.h
diff --git a/compiler/tflchef/tflite/src/Op/ZerosLike.h b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h
index 163c1fa21..163c1fa21 100644
--- a/compiler/tflchef/tflite/src/Op/ZerosLike.h
+++ b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h
diff --git a/compiler/tflchef/tflite/src/RecipeChef.cpp b/compiler/tflchef/tflite/src/RecipeChef.cpp
index 088961c1c..2203f5906 100644
--- a/compiler/tflchef/tflite/src/RecipeChef.cpp
+++ b/compiler/tflchef/tflite/src/RecipeChef.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <tflchef/RecipeChef.h>
+#include <mio_tflite2121/Helper.h>
 
 #include "Convert.h"
 #include "TFliteImport.h"
@@ -42,7 +43,7 @@ void set_inputs(TFliteImport *import, tflchef::Operation *operation, const tflit
     else
     {
       auto tensor = tensors->Get(input);
-      std::string name = tensor_name(tensor);
+      std::string name = mio::tflite::tensor_name(tensor);
       operation->add_input(name);
     }
   }
@@ -56,7 +57,7 @@ void set_outputs(TFliteImport *import, tflchef::Operation *operation, const tfli
   for (auto output : outputs)
   {
     auto tensor = tensors->Get(output);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
     operation->add_output(name);
   }
 }
@@ -108,8 +109,9 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
 
     ::tflchef::Operand *operand = model_recipe->add_operand();
 
-    operand->set_name(tensor_name(tensor));
+    operand->set_name(mio::tflite::tensor_name(tensor));
     operand->set_type(as_tflchef_type(tensor->type()));
+    operand->set_is_variable(tensor->is_variable());
 
     if (tensor->shape())
     {
@@ -187,6 +189,99 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
       tflchef::TensorQuantization *chef_quant = operand->mutable_quant();
       chef_quant->set_quantized_dimension(quant->quantized_dimension());
     }
+
+    auto sparsity = tensor->sparsity();
+    if (sparsity != nullptr)
+    {
+      tflchef::TensorSparsity *chef_sparsity = operand->mutable_sparsity();
+      // traversal_order
+      auto chef_traversal_order = chef_sparsity->mutable_traversal_order();
+      for (const auto &to : *(sparsity->traversal_order()))
+      {
+        chef_traversal_order->add_dim(to);
+      }
+      // block_map
+      auto chef_block_map = chef_sparsity->mutable_block_map();
+      for (const auto &bm : *(sparsity->block_map()))
+      {
+        chef_block_map->add_dim(bm);
+      }
+      // dim_metadata
+      for (const auto &dm : *(sparsity->dim_metadata()))
+      {
+        auto chef_dm = chef_sparsity->add_dim_metadata();
+        // format
+        chef_dm->set_format(as_tflchef_sparse_dim_type(dm->format()));
+        // dense_size
+        chef_dm->set_dense_size(dm->dense_size());
+        // array_segments
+        auto chef_array_segments = chef_dm->mutable_array_segments();
+        switch (dm->array_segments_type())
+        {
+          case tflite::SparseIndexVector_NONE:
+            // DO NOTHING
+            break;
+          case tflite::SparseIndexVector_Int32Vector:
+            for (const auto &as : *(dm->array_segments_as_Int32Vector()->values()))
+            {
+              chef_array_segments->add_dim(as);
+            }
+            break;
+          case tflite::SparseIndexVector_Uint16Vector:
+            for (const auto &as : *(dm->array_segments_as_Uint16Vector()->values()))
+            {
+              chef_array_segments->add_dim(as);
+            }
+            break;
+          case tflite::SparseIndexVector_Uint8Vector:
+            for (const auto &as : *(dm->array_segments_as_Uint8Vector()->values()))
+            {
+              chef_array_segments->add_dim(as);
+            }
+            break;
+          default:
+            throw std::runtime_error("unsupported sparse index vector type");
+        }
+        // array_indices
+        auto chef_array_indices = chef_dm->mutable_array_indices();
+        switch (dm->array_indices_type())
+        {
+          case tflite::SparseIndexVector_NONE:
+            // DO NOTHING
+            break;
+          case tflite::SparseIndexVector_Int32Vector:
+            for (const auto &as : *(dm->array_indices_as_Int32Vector()->values()))
+            {
+              chef_array_indices->add_dim(as);
+            }
+            break;
+          case tflite::SparseIndexVector_Uint16Vector:
+            for (const auto &as : *(dm->array_indices_as_Uint16Vector()->values()))
+            {
+              chef_array_indices->add_dim(as);
+            }
+            break;
+          case tflite::SparseIndexVector_Uint8Vector:
+            for (const auto &as : *(dm->array_indices_as_Uint8Vector()->values()))
+            {
+              chef_array_indices->add_dim(as);
+            }
+            break;
+          default:
+            throw std::runtime_error("unsupported sparse index vector type");
+        }
+      }
+    }
+
+    auto shape_signature = tensor->shape_signature();
+    if (shape_signature != nullptr)
+    {
+      tflchef::ShapeSignature *chef_shape_signature = operand->mutable_shape_signature();
+      for (uint32_t i = 0; i < shape_signature->size(); ++i)
+      {
+        chef_shape_signature->add_dim(shape_signature->Get(i));
+      }
+    }
   }
 
   // add all operators
@@ -217,14 +312,14 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
   for (const auto input : inputs)
   {
     auto tensor = tensors->Get(input);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
 
     model_recipe->add_input(name);
   }
   for (const auto output : outputs)
   {
     auto tensor = tensors->Get(output);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
 
     model_recipe->add_output(name);
   }
diff --git a/compiler/tflchef/tflite/src/TFliteImport.cpp b/compiler/tflchef/tflite/src/TFliteImport.cpp
index 51d9b5ffa..9abec9a08 100644
--- a/compiler/tflchef/tflite/src/TFliteImport.cpp
+++ b/compiler/tflchef/tflite/src/TFliteImport.cpp
@@ -18,38 +18,13 @@
 
 #include "Convert.h"
 
+#include <mio_tflite2121/Helper.h>
+
 #include <sstream>
 
 namespace tflchef
 {
 
-const char *kEmptyTensorName = "(noname)";
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
-  return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-  return kEmptyTensorName;
-}
-
-bool is_valid(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = opcode->builtin_code();
-  return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = opcode->builtin_code();
-  return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
 TFliteImport::TFliteImport(const tflite::Model *model)
 {
   _subgraphs = model->subgraphs();
@@ -92,7 +67,7 @@ tflite::BuiltinOperator TFliteImport::builtin_code(const tflite::Operator *op) c
   assert(index < _op_codes.size());
   const tflite::OperatorCode *opcode = _op_codes.at(index);
 
-  return opcode->builtin_code();
+  return mio::tflite::builtin_code_neutral(opcode);
 }
 
 std::string TFliteImport::opcode_name(const tflite::Operator *op) const
@@ -101,14 +76,14 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const
   assert(index < _op_codes.size());
   const tflite::OperatorCode *opcode = _op_codes.at(index);
 
-  if (!is_valid(opcode))
+  if (!mio::tflite::is_valid(opcode))
   {
     std::ostringstream oss;
     oss << "(invalid: " << index << ")";
     return oss.str();
   }
 
-  if (is_custom(opcode))
+  if (mio::tflite::is_custom(opcode))
   {
     if (!opcode->custom_code())
       return "(invalid custom)";
@@ -116,7 +91,7 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const
     return opcode->custom_code()->c_str();
   }
 
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = mio::tflite::builtin_code_neutral(opcode);
   return EnumNameBuiltinOperator(code);
 }
 
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h
index 9d0a642ab..e6722e455 100644
--- a/compiler/tflchef/tflite/src/TFliteImport.h
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -34,11 +34,6 @@ using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>
 using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>;
 using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
 
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-
 /**
  * @brief Loads TF lite file and provides helpers to access attributes
  */
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h
index 36a010957..34cb1bae2 100644
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -18,109 +18,117 @@
 #define __TFLITE_OP_CHEFS_H__
 
 // In alphabet order
-#include "Op/Abs.h"
-#include "Op/Add.h"
-#include "Op/AddN.h"
-#include "Op/ArgMax.h"
-#include "Op/ArgMin.h"
-#include "Op/AveragePool2D.h"
-#include "Op/BatchMatMul.h"
-#include "Op/BatchToSpaceND.h"
-#include "Op/Cast.h"
-#include "Op/Ceil.h"
-#include "Op/Concatenation.h"
-#include "Op/Conv2D.h"
-#include "Op/Cos.h"
-#include "Op/DepthToSpace.h"
-#include "Op/DepthwiseConv2D.h"
-#include "Op/Div.h"
-#include "Op/ELU.h"
-#include "Op/Equal.h"
-#include "Op/Exp.h"
-#include "Op/ExpandDims.h"
-#include "Op/Fill.h"
-#include "Op/Floor.h"
-#include "Op/FloorDiv.h"
-#include "Op/FloorMod.h"
-#include "Op/FullyConnected.h"
-#include "Op/Gather.h"
-#include "Op/GatherNd.h"
-#include "Op/Greater.h"
-#include "Op/GreaterEqual.h"
-#include "Op/L2Normalize.h"
-#include "Op/L2Pool2D.h"
-#include "Op/LeakyRelu.h"
-#include "Op/Less.h"
-#include "Op/LessEqual.h"
-#include "Op/LocalResponseNormalization.h"
-#include "Op/Log.h"
-#include "Op/LogicalAnd.h"
-#include "Op/LogicalNot.h"
-#include "Op/LogicalOr.h"
-#include "Op/Logistic.h"
-#include "Op/LogSoftmax.h"
-#include "Op/MatrixDiag.h"
-#include "Op/MatrixSetDiag.h"
-#include "Op/Maximum.h"
-#include "Op/MaxPool2D.h"
-#include "Op/Mean.h"
-#include "Op/Minimum.h"
-#include "Op/MirrorPad.h"
-#include "Op/Mul.h"
-#include "Op/Neg.h"
-#include "Op/NonMaxSuppressionV4.h"
-#include "Op/NonMaxSuppressionV5.h"
-#include "Op/NotEqual.h"
-#include "Op/OneHot.h"
-#include "Op/Pack.h"
-#include "Op/Pad.h"
-#include "Op/PadV2.h"
-#include "Op/Pow.h"
-#include "Op/PRelu.h"
-#include "Op/Range.h"
-#include "Op/Rank.h"
-#include "Op/ReduceAny.h"
-#include "Op/ReduceMax.h"
-#include "Op/ReduceMin.h"
-#include "Op/ReduceProd.h"
-#include "Op/ReLU.h"
-#include "Op/ReLU6.h"
-#include "Op/ReLUN1To1.h"
-#include "Op/Reshape.h"
-#include "Op/ResizeBilinear.h"
-#include "Op/ResizeNearestNeighbor.h"
-#include "Op/ReverseSequence.h"
-#include "Op/ReverseV2.h"
-#include "Op/Round.h"
-#include "Op/Rsqrt.h"
-#include "Op/ScatterNd.h"
-#include "Op/SegmentSum.h"
-#include "Op/Select.h"
-#include "Op/SelectV2.h"
-#include "Op/Shape.h"
-#include "Op/Sin.h"
-#include "Op/Slice.h"
-#include "Op/Softmax.h"
-#include "Op/SpaceToBatchND.h"
-#include "Op/SpaceToDepth.h"
-#include "Op/SparseToDense.h"
-#include "Op/Split.h"
-#include "Op/SplitV.h"
-#include "Op/Sqrt.h"
-#include "Op/Square.h"
-#include "Op/SquaredDifference.h"
-#include "Op/Squeeze.h"
-#include "Op/StridedSlice.h"
-#include "Op/Sub.h"
-#include "Op/Sum.h"
-#include "Op/Tanh.h"
-#include "Op/Tile.h"
-#include "Op/TopKV2.h"
-#include "Op/Transpose.h"
-#include "Op/TransposeConv.h"
-#include "Op/Unique.h"
-#include "Op/Unpack.h"
-#include "Op/Where.h"
-#include "Op/ZerosLike.h"
+#include "Op/include/Abs.h"
+#include "Op/include/Add.h"
+#include "Op/include/AddN.h"
+#include "Op/include/ArgMax.h"
+#include "Op/include/ArgMin.h"
+#include "Op/include/AveragePool2D.h"
+#include "Op/include/BatchMatMul.h"
+#include "Op/include/BatchToSpaceND.h"
+#include "Op/include/BidirectionalSequenceLSTM.h"
+#include "Op/include/Cast.h"
+#include "Op/include/Ceil.h"
+#include "Op/include/Concatenation.h"
+#include "Op/include/Conv2D.h"
+#include "Op/include/Cos.h"
+#include "Op/include/DepthToSpace.h"
+#include "Op/include/DepthwiseConv2D.h"
+#include "Op/include/Dequantize.h"
+#include "Op/include/Div.h"
+#include "Op/include/ELU.h"
+#include "Op/include/Equal.h"
+#include "Op/include/Exp.h"
+#include "Op/include/ExpandDims.h"
+#include "Op/include/FakeQuant.h"
+#include "Op/include/Fill.h"
+#include "Op/include/Floor.h"
+#include "Op/include/FloorDiv.h"
+#include "Op/include/FloorMod.h"
+#include "Op/include/FullyConnected.h"
+#include "Op/include/Gather.h"
+#include "Op/include/GatherNd.h"
+#include "Op/include/Gelu.h"
+#include "Op/include/Greater.h"
+#include "Op/include/GreaterEqual.h"
+#include "Op/include/HardSwish.h"
+#include "Op/include/L2Normalize.h"
+#include "Op/include/L2Pool2D.h"
+#include "Op/include/LeakyRelu.h"
+#include "Op/include/Less.h"
+#include "Op/include/LessEqual.h"
+#include "Op/include/LocalResponseNormalization.h"
+#include "Op/include/Log.h"
+#include "Op/include/LogicalAnd.h"
+#include "Op/include/LogicalNot.h"
+#include "Op/include/LogicalOr.h"
+#include "Op/include/Logistic.h"
+#include "Op/include/LogSoftmax.h"
+#include "Op/include/MatrixDiag.h"
+#include "Op/include/MatrixSetDiag.h"
+#include "Op/include/Maximum.h"
+#include "Op/include/MaxPool2D.h"
+#include "Op/include/Mean.h"
+#include "Op/include/Minimum.h"
+#include "Op/include/MirrorPad.h"
+#include "Op/include/Mul.h"
+#include "Op/include/Neg.h"
+#include "Op/include/NonMaxSuppressionV4.h"
+#include "Op/include/NonMaxSuppressionV5.h"
+#include "Op/include/NotEqual.h"
+#include "Op/include/OneHot.h"
+#include "Op/include/Pack.h"
+#include "Op/include/Pad.h"
+#include "Op/include/PadV2.h"
+#include "Op/include/Pow.h"
+#include "Op/include/PRelu.h"
+#include "Op/include/Quantize.h"
+#include "Op/include/Range.h"
+#include "Op/include/Rank.h"
+#include "Op/include/ReduceAny.h"
+#include "Op/include/ReduceMax.h"
+#include "Op/include/ReduceMin.h"
+#include "Op/include/ReduceProd.h"
+#include "Op/include/ReLU.h"
+#include "Op/include/ReLU6.h"
+#include "Op/include/ReLUN1To1.h"
+#include "Op/include/Reshape.h"
+#include "Op/include/ResizeBilinear.h"
+#include "Op/include/ResizeNearestNeighbor.h"
+#include "Op/include/ReverseSequence.h"
+#include "Op/include/ReverseV2.h"
+#include "Op/include/Round.h"
+#include "Op/include/Rsqrt.h"
+#include "Op/include/ScatterNd.h"
+#include "Op/include/SegmentSum.h"
+#include "Op/include/Select.h"
+#include "Op/include/SelectV2.h"
+#include "Op/include/Shape.h"
+#include "Op/include/Sin.h"
+#include "Op/include/Slice.h"
+#include "Op/include/Softmax.h"
+#include "Op/include/SpaceToBatchND.h"
+#include "Op/include/SpaceToDepth.h"
+#include "Op/include/SparseToDense.h"
+#include "Op/include/Split.h"
+#include "Op/include/SplitV.h"
+#include "Op/include/Sqrt.h"
+#include "Op/include/Square.h"
+#include "Op/include/SquaredDifference.h"
+#include "Op/include/Squeeze.h"
+#include "Op/include/StridedSlice.h"
+#include "Op/include/Sub.h"
+#include "Op/include/Sum.h"
+#include "Op/include/SVDF.h"
+#include "Op/include/Tanh.h"
+#include "Op/include/Tile.h"
+#include "Op/include/TopKV2.h"
+#include "Op/include/Transpose.h"
+#include "Op/include/TransposeConv.h"
+#include "Op/include/UnidirectionalSequenceLSTM.h"
+#include "Op/include/Unique.h"
+#include "Op/include/Unpack.h"
+#include "Op/include/Where.h"
+#include "Op/include/ZerosLike.h"
 
 #endif // __TFLITE_OP_CHEFS_H__
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
index a454e98b6..a37f15c0c 100644
--- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -63,6 +63,7 @@ private:
     REG_TFL_OP(AVERAGE_POOL_2D, TFliteOpAveragePool2D);
     REG_TFL_OP(BATCH_MATMUL, TFliteOpBatchMatMul);
     REG_TFL_OP(BATCH_TO_SPACE_ND, TFliteOpBatchToSpaceND);
+    REG_TFL_OP(BIDIRECTIONAL_SEQUENCE_LSTM, TFliteOpBidirectionalSequenceLSTM);
     REG_TFL_OP(CAST, TFliteOpCast);
     REG_TFL_OP(CEIL, TFliteOpCeil);
     REG_TFL_OP(CONCATENATION, TFliteOpConcatenation);
@@ -70,11 +71,13 @@ private:
     REG_TFL_OP(COS, TFliteOpCos);
     REG_TFL_OP(DEPTH_TO_SPACE, TFliteOpDepthToSpace);
     REG_TFL_OP(DEPTHWISE_CONV_2D, TFliteOpDepthwiseConv2D);
+    REG_TFL_OP(DEQUANTIZE, TFliteOpDequantize);
     REG_TFL_OP(DIV, TFliteOpDiv);
     REG_TFL_OP(ELU, TFliteOpELU);
     REG_TFL_OP(EQUAL, TFliteOpEqual);
     REG_TFL_OP(EXP, TFliteOpExp);
     REG_TFL_OP(EXPAND_DIMS, TFliteOpExpandDims);
+    REG_TFL_OP(FAKE_QUANT, TFliteOpFakeQuant);
     REG_TFL_OP(FILL, TFliteOpFill);
     REG_TFL_OP(FLOOR, TFliteOpFloor);
     REG_TFL_OP(FLOOR_DIV, TFliteOpFloorDiv);
@@ -82,8 +85,10 @@ private:
     REG_TFL_OP(FULLY_CONNECTED, TFliteOpFullyConnected);
     REG_TFL_OP(GATHER, TFliteOpGather);
     REG_TFL_OP(GATHER_ND, TFliteOpGatherNd);
+    REG_TFL_OP(GELU, TFliteOpGelu);
     REG_TFL_OP(GREATER, TFliteOpGreater);
     REG_TFL_OP(GREATER_EQUAL, TFliteOpGreaterEqual);
+    REG_TFL_OP(HARD_SWISH, TFliteOpHardSwish);
     REG_TFL_OP(L2_NORMALIZATION, TFliteOpL2Normalize);
     REG_TFL_OP(L2_POOL_2D, TFliteOpL2Pool2D);
     REG_TFL_OP(LEAKY_RELU, TFliteOpLeakyRelu);
@@ -114,6 +119,7 @@ private:
     REG_TFL_OP(PADV2, TFliteOpPadV2);
     REG_TFL_OP(POW, TFliteOpPow);
     REG_TFL_OP(PRELU, TFliteOpPRelu);
+    REG_TFL_OP(QUANTIZE, TFliteOpQuantize);
     REG_TFL_OP(RANGE, TFliteOpRange);
     REG_TFL_OP(RANK, TFliteOpRank);
     REG_TFL_OP(REDUCE_ANY, TFliteOpReduceAny);
@@ -150,11 +156,13 @@ private:
     REG_TFL_OP(STRIDED_SLICE, TFliteOpStridedSlice);
     REG_TFL_OP(SUB, TFliteOpSub);
     REG_TFL_OP(SUM, TFliteOpSum);
+    REG_TFL_OP(SVDF, TFliteOpSVDF);
     REG_TFL_OP(TANH, TFliteOpTanh);
     REG_TFL_OP(TILE, TFliteOpTile);
     REG_TFL_OP(TOPK_V2, TFliteOpTopKV2);
     REG_TFL_OP(TRANSPOSE, TFliteOpTranspose);
     REG_TFL_OP(TRANSPOSE_CONV, TFliteOpTransposeConv);
+    REG_TFL_OP(UNIDIRECTIONAL_SEQUENCE_LSTM, TFliteOpUnidirectionalSequenceLSTM);
     REG_TFL_OP(UNIQUE, TFliteOpUnique);
     REG_TFL_OP(UNPACK, TFliteOpUnpack);
     REG_TFL_OP(WHERE, TFliteOpWhere);
diff --git a/compiler/tflchef/tools/console/CMakeLists.txt b/compiler/tflchef/tools/console/CMakeLists.txt
index d9160c3a2..c57e3fdcb 100644
--- a/compiler/tflchef/tools/console/CMakeLists.txt
+++ b/compiler/tflchef/tools/console/CMakeLists.txt
@@ -1,3 +1,14 @@
 add_executable(tflchef Driver.cpp)
 target_link_libraries(tflchef tflchef_core)
 target_link_libraries(tflchef safemain)
+
+install(TARGETS tflchef DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(tflchef_test Driver.test.cpp Driver.cpp)
+target_link_libraries(tflchef_test tflchef_core)
diff --git a/compiler/tflchef/tools/console/Driver.cpp b/compiler/tflchef/tools/console/Driver.cpp
index d6f7ba1ae..23f2fff3f 100644
--- a/compiler/tflchef/tools/console/Driver.cpp
+++ b/compiler/tflchef/tools/console/Driver.cpp
@@ -22,7 +22,7 @@
 
 #include <iostream>
 
-int entry(int argc, char **argv)
+int entry_stream(std::istream &is)
 {
   int32_t model_version = 1;
 
@@ -30,7 +30,7 @@ int entry(int argc, char **argv)
 
   // Read a model recipe from standard input
   {
-    google::protobuf::io::IstreamInputStream iis{&std::cin};
+    google::protobuf::io::IstreamInputStream iis{&is};
     if (!google::protobuf::TextFormat::Parse(&iis, &model_recipe))
     {
       std::cerr << "ERROR: Failed to parse recipe" << std::endl;
@@ -56,3 +56,9 @@ int entry(int argc, char **argv)
 
   return 0;
 }
+
+int entry(int, char **)
+{
+  // forward to entry_stream
+  return entry_stream(std::cin);
+}
diff --git a/compiler/tflchef/tools/console/Driver.test.cpp b/compiler/tflchef/tools/console/Driver.test.cpp
new file mode 100644
index 000000000..b3cf2134d
--- /dev/null
+++ b/compiler/tflchef/tools/console/Driver.test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+// entry function to test from Driver.cpp
+int entry_stream(std::istream &is);
+
+TEST(TFlChefDriverTest, entry_empty_NEG)
+{
+  std::istringstream empty_input("");
+
+  ASSERT_EQ(0, entry_stream(empty_input));
+}
+
+TEST(TFlChefDriverTest, entry_invaid_NEG)
+{
+  std::istringstream empty_input("invalid: input");
+
+  ASSERT_NE(0, entry_stream(empty_input));
+}
+
+TEST(TFlChefDriverTest, entry_invaid_version_NEG)
+{
+  std::istringstream empty_input("version: 9999");
+
+  ASSERT_NE(0, entry_stream(empty_input));
+}
diff --git a/compiler/tflchef/tools/file/CMakeLists.txt b/compiler/tflchef/tools/file/CMakeLists.txt
index f411d60f1..e3b7b2f48 100644
--- a/compiler/tflchef/tools/file/CMakeLists.txt
+++ b/compiler/tflchef/tools/file/CMakeLists.txt
@@ -2,3 +2,5 @@ add_executable(tflchef-file Driver.cpp)
 target_link_libraries(tflchef-file arser)
 target_link_libraries(tflchef-file tflchef_core)
 target_link_libraries(tflchef-file safemain)
+
+install(TARGETS tflchef-file DESTINATION bin)
diff --git a/compiler/tflchef/tools/file/Driver.cpp b/compiler/tflchef/tools/file/Driver.cpp
index 46e5b5583..f6c6789bd 100644
--- a/compiler/tflchef/tools/file/Driver.cpp
+++ b/compiler/tflchef/tools/file/Driver.cpp
@@ -28,10 +28,8 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("recipe")
-      .type(arser::DataType::STR)
-      .help("Source recipe file path to convert");
-  arser.add_argument("tflite").type(arser::DataType::STR).help("Target tflite file path");
+  arser.add_argument("recipe").help("Source recipe file path to convert");
+  arser.add_argument("tflite").help("Target tflite file path");
 
   try
   {
@@ -67,8 +65,8 @@ int entry(int argc, char **argv)
 
   if (model_version > 1)
   {
-    std::cerr << "ERROR: Unsupported recipe version: " << model_version << ", '" << argv[1] << "'"
-              << std::endl;
+    std::cerr << "ERROR: Unsupported recipe version: " << model_version << ", '" << recipe_path
+              << "'" << std::endl;
     return 255;
   }
 
diff --git a/compiler/tflchef/tools/reverse/CMakeLists.txt b/compiler/tflchef/tools/reverse/CMakeLists.txt
index a5c0f5bca..21700faca 100644
--- a/compiler/tflchef/tools/reverse/CMakeLists.txt
+++ b/compiler/tflchef/tools/reverse/CMakeLists.txt
@@ -3,3 +3,5 @@ target_link_libraries(tflchef-reverse arser)
 target_link_libraries(tflchef-reverse tflchef_tflite)
 target_link_libraries(tflchef-reverse safemain)
 target_link_libraries(tflchef-reverse foder)
+
+install(TARGETS tflchef-reverse DESTINATION bin)
diff --git a/compiler/tflchef/tools/reverse/Driver.cpp b/compiler/tflchef/tools/reverse/Driver.cpp
index 4d795a3d0..119bee6be 100644
--- a/compiler/tflchef/tools/reverse/Driver.cpp
+++ b/compiler/tflchef/tools/reverse/Driver.cpp
@@ -25,10 +25,8 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("tflite")
-      .type(arser::DataType::STR)
-      .help("Source tflite file path to convert");
-  arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
+  arser.add_argument("tflite").help("Source tflite file path to convert");
+  arser.add_argument("recipe").help("Target recipe file path");
 
   try
   {
diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt
index e6afcb6d2..3fe1ea9ac 100644
--- a/compiler/tfldump/CMakeLists.txt
+++ b/compiler/tfldump/CMakeLists.txt
@@ -1,7 +1,7 @@
-if(NOT TARGET mio_tflite)
-  message(STATUS "Build tfldump: FAILED (missing mio_tflite)")
+if(NOT TARGET mio_tflite2121)
+  message(STATUS "Build tfldump: FAILED (missing mio_tflite2121)")
   return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite2121)
 
 set(DRIVER "driver/Driver.cpp")
 
@@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 add_executable(tfldump ${DRIVER} ${SOURCES})
 target_include_directories(tfldump PRIVATE include)
 target_link_libraries(tfldump arser)
-target_link_libraries(tfldump mio_tflite)
+target_link_libraries(tfldump foder)
+target_link_libraries(tfldump mio_tflite2121)
+target_link_libraries(tfldump mio_tflite2121_helper)
 target_link_libraries(tfldump safemain)
-target_link_libraries(tfldump flatbuffers)
diff --git a/compiler/tfldump/README.md b/compiler/tfldump/README.md
index 50d003f12..65ad105c2 100644
--- a/compiler/tfldump/README.md
+++ b/compiler/tfldump/README.md
@@ -63,5 +63,4 @@ O T(3) ofm
 ### Dependency
 
 - safemain
-- stdex
 - FlatBuffers
diff --git a/compiler/tfldump/driver/Driver.cpp b/compiler/tfldump/driver/Driver.cpp
index 38c9c062f..a3e748be1 100644
--- a/compiler/tfldump/driver/Driver.cpp
+++ b/compiler/tfldump/driver/Driver.cpp
@@ -15,7 +15,7 @@
  */
 
 #include <arser/arser.h>
-#include <tflread/Model.h>
+#include <foder/FileLoader.h>
 #include <tfldump/Dump.h>
 
 #include <iostream>
@@ -23,7 +23,7 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to dump");
+  arser.add_argument("tflite").help("TFLite file to dump");
 
   try
   {
@@ -38,14 +38,9 @@ int entry(int argc, char **argv)
 
   std::string tflite_path = arser.get<std::string>("tflite");
   // Load TF lite model from a tflite file
-  std::unique_ptr<tflread::Model> model = tflread::load_tflite(tflite_path);
-  if (model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl;
-    return 255;
-  }
-
-  const tflite::Model *tflmodel = model->model();
+  foder::FileLoader fileLoader{tflite_path};
+  std::vector<char> modelData = fileLoader.load();
+  const tflite::Model *tflmodel = tflite::GetModel(modelData.data());
   if (tflmodel == nullptr)
   {
     std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl;
diff --git a/compiler/tfldump/include/tflread/Model.h b/compiler/tfldump/include/tflread/Model.h
deleted file mode 100644
index c6e4a94ac..000000000
--- a/compiler/tfldump/include/tflread/Model.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLREAD_MODEL_H__
-#define __TFLREAD_MODEL_H__
-
-#include <mio/tflite/schema_generated.h>
-
-#include <memory>
-
-namespace tflread
-{
-
-struct Model
-{
-  virtual ~Model() = default;
-
-  virtual const ::tflite::Model *model(void) const = 0;
-};
-
-/**
- * @brief Load TensorFlow Lite model (as a raw Model) from a given path
- *
- * @note May return a nullptr
- */
-std::unique_ptr<Model> load_tflite(const std::string &path);
-
-} // namespace tflread
-
-#endif // __TFLREAD_MODEL_H__
diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake
index 2cdd3a391..80dc592fd 100644
--- a/compiler/tfldump/requires.cmake
+++ b/compiler/tfldump/requires.cmake
@@ -1,3 +1,4 @@
 require("arser")
-require("mio-tflite")
+require("foder")
+require("mio-tflite2121")
 require("safemain")
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp
index e1562d42f..a3450396b 100644
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <tfldump/Dump.h>
+#include <mio_tflite2121/Helper.h>
 
 #include "Read.h"
 #include "OpPrinter.h"
@@ -32,7 +33,7 @@ void dump_buffer(std::ostream &os, const uint8_t *buffer, size_t size, size_t am
   std::ios_base::fmtflags saveflags(os.flags());
 
   bool second = false;
-  bool ellipsis = amount > 0 && size > 4;
+  bool ellipsis = amount > 0 && size > 8;
   size_t count = ellipsis ? std::min(size, amount) : size;
 
   for (size_t i = 0; i < count; i++)
@@ -73,34 +74,50 @@ std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
   return os;
 }
 
-template <typename T> void dump_fbvect(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
+template <typename T>
+void dump_fbvect(std::ostream &os, const flatbuffers::Vector<T> *fbvect, uint32_t size)
 {
-  if (fbvect == nullptr)
-    return;
-
-  bool ellipsis = (fbvect->size() > 4);
-  auto limit_size = ellipsis ? 4 : fbvect->size();
-
-  if (ellipsis)
-  {
-    os << "(" << fbvect->size() << ") ";
-  }
-  for (uint32_t q = 0; q < limit_size; q++)
+  for (uint32_t q = 0; q < size; q++)
   {
     if (q)
       os << ", ";
     os << fbvect->Get(q);
   }
-  if (ellipsis)
+}
+
+template <>
+void dump_fbvect(std::ostream &os, const flatbuffers::Vector<uint8_t> *fbvect, uint32_t size)
+{
+  assert(fbvect);
+  for (uint32_t q = 0; q < size; q++)
   {
-    os << " ... ";
+    if (q)
+      os << ", ";
+    os << static_cast<uint32_t>(fbvect->Get(q));
   }
 }
 
 template <typename T>
 std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
 {
-  dump_fbvect(os, fbvect);
+  if (fbvect == nullptr)
+    return os;
+
+  bool ellipsis = (fbvect->size() > 8);
+  auto limit_size = ellipsis ? 8 : fbvect->size();
+
+  if (ellipsis)
+  {
+    os << "(" << fbvect->size() << ") ";
+  }
+
+  dump_fbvect(os, fbvect, limit_size);
+
+  if (ellipsis)
+  {
+    os << " ... ";
+  }
+
   return os;
 }
 
@@ -111,7 +128,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
 
   // dump operands(tensors)
   os << "Operands: T(subgraph index : tensor index) TYPE (shape) (shape_signature) "
-     << "B(buffer index) OperandName" << std::endl;
+     << "B(buffer index) (variable) OperandName" << std::endl;
   for (uint32_t i = 0; i < tensors->Length(); ++i)
   {
     // TODO refactor to some better structure
@@ -121,7 +138,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
     if (tensor->shape())
       dims = tflread::as_index_vector(tensor->shape());
 
-    os << "T(" << reader.subgraph_index() << ":" << i << ") " << tflread::tensor_type(tensor)
+    os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::tflite::tensor_type(tensor)
        << " ";
     os << "(" << dims << ") ";
     if (tensor->shape_signature())
@@ -130,7 +147,11 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
       os << "(" << dims_sig << ") ";
     }
     os << "B(" << tensor->buffer() << ") ";
-    os << tflread::tensor_name(tensor) << std::endl;
+    if (tensor->is_variable())
+    {
+      os << "(variable) ";
+    }
+    os << mio::tflite::tensor_name(tensor) << std::endl;
 
     if (auto q_params = tensor->quantization())
     {
@@ -169,8 +190,90 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
         os << std::endl;
       }
     }
+
+    if (const auto &s_params = tensor->sparsity())
+    {
+      std::string strsparsity = "    Sparsity: ";
+      std::string strsindent(strsparsity.size(), ' ');
+      os << strsparsity;
+
+      if (s_params->traversal_order())
+      {
+        os << "traversal_order(" << s_params->traversal_order() << ") ";
+        os << std::endl << strsindent;
+      }
+      if (s_params->block_map())
+      {
+        os << "block_map(" << s_params->block_map() << ") ";
+        os << std::endl << strsindent;
+      }
+      if (const auto &dim_metadata = s_params->dim_metadata())
+      {
+        uint32_t idx = 0;
+        for (const auto &dm : *dim_metadata)
+        {
+          std::string strdm = "dim_metadata[" + std::to_string(idx++) + "]: ";
+          std::string strdm_indent = strsindent + std::string(strdm.size(), ' ');
+          os << strdm;
+
+          os << "format(" << tflite::EnumNameDimensionType(dm->format()) << ") ";
+          os << std::endl << strdm_indent;
+
+          os << "dense_size(" << dm->dense_size() << ") ";
+          os << std::endl << strdm_indent;
+
+          os << "array_segments_type("
+             << tflite::EnumNameSparseIndexVector(dm->array_segments_type()) << ") ";
+          os << std::endl << strdm_indent;
+
+          os << "array_segments(";
+          switch (dm->array_segments_type())
+          {
+            case tflite::SparseIndexVector_NONE:
+              // DO NOTHING
+              break;
+            case tflite::SparseIndexVector_Int32Vector:
+              os << dm->array_segments_as_Int32Vector()->values();
+              break;
+            case tflite::SparseIndexVector_Uint16Vector:
+              os << dm->array_segments_as_Uint16Vector()->values();
+              break;
+            case tflite::SparseIndexVector_Uint8Vector:
+              os << dm->array_segments_as_Uint8Vector()->values();
+              break;
+            default:
+              throw std::runtime_error("Invalid SparseIndexVector type of array_segments");
+          }
+          os << ")" << std::endl << strdm_indent;
+
+          os << "array_indices_type(" << tflite::EnumNameSparseIndexVector(dm->array_indices_type())
+             << ") ";
+          os << std::endl << strdm_indent;
+
+          os << "array_indices(";
+          switch (dm->array_indices_type())
+          {
+            case tflite::SparseIndexVector_NONE:
+              // DO NOTHING
+              break;
+            case tflite::SparseIndexVector_Int32Vector:
+              os << dm->array_indices_as_Int32Vector()->values();
+              break;
+            case tflite::SparseIndexVector_Uint16Vector:
+              os << dm->array_indices_as_Uint16Vector()->values();
+              break;
+            case tflite::SparseIndexVector_Uint8Vector:
+              os << dm->array_indices_as_Uint8Vector()->values();
+              break;
+            default:
+              throw std::runtime_error("Invalid SparseIndexVector type of array_indices");
+          }
+          os << ")" << std::endl << strsindent;
+        }
+      }
+    }
+    os << std::endl;
   }
-  os << std::endl;
 
   // dump operators
   os << "Operators: O(subgraph index : operator index) OpCodeName " << std::endl;
@@ -200,7 +303,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
       if (input >= 0)
       {
         auto tensor = tensors->Get(input);
-        os << tflread::tensor_name(tensor);
+        os << mio::tflite::tensor_name(tensor);
       }
       os << std::endl;
     }
@@ -210,7 +313,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
       if (output >= 0)
       {
         auto tensor = tensors->Get(output);
-        os << tflread::tensor_name(tensor);
+        os << mio::tflite::tensor_name(tensor);
       }
       os << std::endl;
     }
@@ -223,14 +326,14 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
   for (const auto input : reader.inputs())
   {
     auto tensor = tensors->Get(input);
-    std::string name = tflread::tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
     os << "I T(" << reader.subgraph_index() << ":" << input << ") " << name << std::endl;
   }
 
   for (const auto output : reader.outputs())
   {
     auto tensor = tensors->Get(output);
-    std::string name = tflread::tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
     os << "O T(" << reader.subgraph_index() << ":" << output << ") " << name << std::endl;
   }
 
@@ -251,6 +354,8 @@ void dump_model(std::ostream &os, const tflite::Model *model)
 
   auto opcodes = reader.opcodes();
   auto buffers = reader.buffers();
+  auto metadata = reader.metadata();
+  auto signaturedefs = reader.signaturedefs();
 
   // dump operator_codes
   os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
@@ -258,11 +363,13 @@ void dump_model(std::ostream &os, const tflite::Model *model)
   for (auto opcode : opcodes)
   {
     tflite::BuiltinOperator op_code = opcode->builtin_code();
-    auto op_name = tflread::opcode_name(opcode);
+    tflite::BuiltinOperator dp_code = tflite::BuiltinOperator(opcode->deprecated_builtin_code());
+
+    auto op_name = mio::tflite::opcode_name(opcode);
     auto op_version = opcode->version();
 
     os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
-       << ", version: " << op_version << ")" << std::endl;
+       << ", dep_code: " << dp_code << ", version: " << op_version << ")" << std::endl;
 
     opcode_index++;
   }
@@ -284,6 +391,49 @@ void dump_model(std::ostream &os, const tflite::Model *model)
   }
   os << std::endl;
 
+  // dump metadata
+  if (metadata != nullptr)
+  {
+    os << "metadata : B(index) name" << std::endl;
+    for (uint32_t i = 0; i < metadata->Length(); ++i)
+    {
+      os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str()
+         << std::endl;
+    }
+    os << std::endl;
+  }
+
+  // dump signaturedef
+  if (signaturedefs != nullptr)
+  {
+    os << "SignatureDef" << std::endl;
+    for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
+    {
+      auto sign_i = signaturedefs->Get(i);
+      os << "S(" << i << ") signature_key(" << sign_i->signature_key()->c_str() << "), sub_graph("
+         << sign_i->subgraph_index() << ")" << std::endl;
+
+      auto inputs_i = sign_i->inputs();
+      for (uint32_t t = 0; t < inputs_i->Length(); ++t)
+      {
+        auto inputs_i_t = inputs_i->Get(t);
+        os << "    I(" << t << ")"
+           << " T(" << sign_i->subgraph_index() << ":" << inputs_i_t->tensor_index() << ") "
+           << inputs_i_t->name()->c_str() << std::endl;
+      }
+
+      auto outputs_i = sign_i->outputs();
+      for (uint32_t t = 0; t < outputs_i->Length(); ++t)
+      {
+        auto outputs_i_t = outputs_i->Get(t);
+        os << "    O(" << t << ")"
+           << " T(" << sign_i->subgraph_index() << ":" << outputs_i_t->tensor_index() << ") "
+           << outputs_i_t->name()->c_str() << std::endl;
+      }
+    }
+    os << std::endl;
+  }
+
   for (uint32_t sg = 0; sg < num_subgraph; ++sg)
   {
     reader.select_subgraph(sg);
diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp
deleted file mode 100644
index fe04a5dd6..000000000
--- a/compiler/tfldump/src/Load.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <tflread/Model.h>
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-namespace
-{
-
-class MemoryMappedModel final : public tflread::Model
-{
-public:
-  /**
-   * @require fd and data SHOULD be valid
-   */
-  explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
-  {
-    // DO NOTHING
-  }
-
-public:
-  ~MemoryMappedModel()
-  {
-    munmap(_data, _size);
-    close(_fd);
-  }
-
-public:
-  MemoryMappedModel(const MemoryMappedModel &) = delete;
-  MemoryMappedModel(MemoryMappedModel &&) = delete;
-
-public:
-  const ::tflite::Model *model(void) const override { return ::tflite::GetModel(_data); }
-
-private:
-  int _fd = -1;
-  void *_data = nullptr;
-  size_t _size = 0;
-};
-
-class FileDescriptor final
-{
-public:
-  FileDescriptor(int value) : _value{value}
-  {
-    // DO NOTHING
-  }
-
-public:
-  // NOTE Copy is not allowed
-  FileDescriptor(const FileDescriptor &) = delete;
-
-public:
-  // NOTE Move is allowed
-  FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
-
-public:
-  ~FileDescriptor()
-  {
-    if (_value != -1)
-    {
-      // Close on descturction
-      close(_value);
-    }
-  }
-
-public:
-  int value(void) const { return _value; }
-
-public:
-  int release(void)
-  {
-    auto res = _value;
-    _value = -1;
-    return res;
-  }
-
-private:
-  int _value = -1;
-};
-
-} // namespace
-
-namespace tflread
-{
-
-std::unique_ptr<Model> load_tflite(const std::string &path)
-{
-  FileDescriptor fd = open(path.c_str(), O_RDONLY);
-
-  if (fd.value() == -1)
-  {
-    // Return nullptr on open failure
-    return nullptr;
-  }
-
-  struct stat st;
-  if (fstat(fd.value(), &st) == -1)
-  {
-    // Return nullptr on fstat failure
-    return nullptr;
-  }
-
-  auto size = st.st_size;
-  auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
-
-  if (data == MAP_FAILED)
-  {
-    // Return nullptr on mmap failure
-    return nullptr;
-  }
-
-  return std::unique_ptr<tflread::Model>{new MemoryMappedModel(fd.release(), data, size)};
-}
-
-} // namespace tflread
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp
index 24b9264ff..f8777fad9 100644
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -74,6 +74,26 @@ public:
   }
 };
 
+class BidirectionalSequenceLSTMPrinter : public OpPrinter
+{
+public:
+  void options(const tflite::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_BidirectionalSequenceLSTMOptions())
+    {
+      os << "    ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "cell_clip(" << params->cell_clip() << ") ";
+      os << "proj_clip(" << params->proj_clip() << ") ";
+      os << "time_major(" << params->time_major() << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << "merge_outputs(" << params->merge_outputs() << ") ";
+      os << std::endl;
+    }
+  }
+};
+
 class CastPrinter : public OpPrinter
 {
 public:
@@ -201,6 +221,7 @@ public:
       os << std::boolalpha;
       os << "align_corners(" << resize_params->align_corners() << ")";
       os << "half_pixel_centers(" << resize_params->half_pixel_centers() << ")";
+      os << std::noboolalpha;
       os << std::endl;
     }
   }
@@ -216,6 +237,7 @@ public:
       os << "    ";
       os << std::boolalpha;
       os << "align_corners(" << resize_params->align_corners() << ")";
+      os << std::noboolalpha;
       os << std::endl;
     }
   }
@@ -277,7 +299,7 @@ public:
       os << "Stride.H(" << conv_params->stride_h() << ") ";
       os << "DepthMultiplier(" << conv_params->depth_multiplier() << ") ";
       os << "Dilation.W(" << conv_params->dilation_w_factor() << ") ";
-      os << "Dilation.H(" << conv_params->dilation_h_factor() << ")";
+      os << "Dilation.H(" << conv_params->dilation_h_factor() << ") ";
       os << "Activation("
          << EnumNameActivationFunctionType(conv_params->fused_activation_function()) << ") ";
       os << std::endl;
@@ -285,6 +307,25 @@ public:
   }
 };
 
+class FakeQuantPrinter : public OpPrinter
+{
+public:
+  void options(const tflite::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_FakeQuantOptions())
+    {
+      os << "    ";
+      os << "Min(" << params->min() << ") ";
+      os << "Max(" << params->max() << ") ";
+      os << "NumBits(" << params->num_bits() << ") ";
+      os << std::boolalpha;
+      os << "NarrowRange(" << params->narrow_range() << ") ";
+      os << std::noboolalpha;
+      os << std::endl;
+    }
+  }
+};
+
 class FullyConnectedPrinter : public OpPrinter
 {
 public:
@@ -318,6 +359,19 @@ public:
   }
 };
 
+class GeluPrinter : public OpPrinter
+{
+public:
+  void options(const tflite::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_GeluOptions())
+    {
+      os << "    ";
+      os << "approximate(" << params->approximate() << ") ";
+    }
+  }
+};
+
 class IfPrinter : public OpPrinter
 {
 public:
@@ -561,6 +615,23 @@ public:
   }
 };
 
+class SVDFPrinter : public OpPrinter
+{
+public:
+  void options(const tflite::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_SVDFOptions())
+    {
+      os << "    ";
+      os << "rank(" << params->rank() << ") ";
+      os << "activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << std::endl;
+    }
+  }
+};
+
 class TransposeConvPrinter : public OpPrinter
 {
 public:
@@ -572,6 +643,8 @@ public:
       os << "Padding(" << params->padding() << ") ";
       os << "Stride.W(" << params->stride_w() << ") ";
       os << "Stride.H(" << params->stride_h() << ") ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
       os << std::endl;
     }
   }
@@ -592,6 +665,25 @@ public:
   }
 };
 
+class UnidirectionalSequenceLSTMPrinter : public OpPrinter
+{
+public:
+  void options(const tflite::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_UnidirectionalSequenceLSTMOptions())
+    {
+      os << "    ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "cell_clip(" << params->cell_clip() << ") ";
+      os << "proj_clip(" << params->proj_clip() << ") ";
+      os << "time_major(" << params->time_major() << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << std::endl;
+    }
+  }
+};
+
 class UniquePrinter : public OpPrinter
 {
 public:
@@ -653,27 +745,34 @@ OpPrinterRegistry::OpPrinterRegistry()
   _op_map[tflite::BuiltinOperator_ARG_MAX] = make_unique<ArgMaxPrinter>();
   _op_map[tflite::BuiltinOperator_ARG_MIN] = make_unique<ArgMinPrinter>();
   _op_map[tflite::BuiltinOperator_AVERAGE_POOL_2D] = make_unique<Pool2DPrinter>();
+  _op_map[tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM] =
+    make_unique<BidirectionalSequenceLSTMPrinter>();
   _op_map[tflite::BuiltinOperator_CAST] = make_unique<CastPrinter>();
   // There is no Option for CEIL
   _op_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
   _op_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>();
+  // There is no Option for DENSIFY
   _op_map[tflite::BuiltinOperator_DEPTH_TO_SPACE] = make_unique<DepthToSpacePrinter>();
   _op_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
+  // There is no Option for DEQUANTIZE
   _op_map[tflite::BuiltinOperator_DIV] = make_unique<DivPrinter>();
+  _op_map[tflite::BuiltinOperator_FAKE_QUANT] = make_unique<FakeQuantPrinter>();
   // There is no Option for FLOOR
   // There is no Option for FLOOR_MOD
   _op_map[tflite::BuiltinOperator_FULLY_CONNECTED] = make_unique<FullyConnectedPrinter>();
   _op_map[tflite::BuiltinOperator_GATHER] = make_unique<GatherPrinter>();
+  _op_map[tflite::BuiltinOperator_GELU] = make_unique<GeluPrinter>();
   _op_map[tflite::BuiltinOperator_IF] = make_unique<IfPrinter>();
   _op_map[tflite::BuiltinOperator_L2_POOL_2D] = make_unique<Pool2DPrinter>();
   _op_map[tflite::BuiltinOperator_L2_NORMALIZATION] = make_unique<L2NormPrinter>();
   _op_map[tflite::BuiltinOperator_LEAKY_RELU] = make_unique<LeakyReluPrinter>();
   _op_map[tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION] =
-      make_unique<LocalResponseNormalizationPrinter>();
+    make_unique<LocalResponseNormalizationPrinter>();
   // There is no Option for LOG
   // There is no Option for LOGISTIC
   // There is no Option for LOG_SOFTMAX
   _op_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>();
+  _op_map[tflite::BuiltinOperator_MEAN] = make_unique<ReducerPrinter>();
   _op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
   _op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
   // There is no Option for NON_MAX_SUPPRESSION_V4
@@ -693,7 +792,7 @@ OpPrinterRegistry::OpPrinterRegistry()
   _op_map[tflite::BuiltinOperator_RESHAPE] = make_unique<ReshapePrinter>();
   _op_map[tflite::BuiltinOperator_RESIZE_BILINEAR] = make_unique<ResizeBilinearPrinter>();
   _op_map[tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR] =
-      make_unique<ResizeNearestNeighborPrinter>();
+    make_unique<ResizeNearestNeighborPrinter>();
   _op_map[tflite::BuiltinOperator_REVERSE_SEQUENCE] = make_unique<ReverseSequencePrinter>();
   // There is no Option for ROUND
   // There is no Option for SELECT
@@ -711,8 +810,11 @@ OpPrinterRegistry::OpPrinterRegistry()
   _op_map[tflite::BuiltinOperator_STRIDED_SLICE] = make_unique<StridedSlicePrinter>();
   _op_map[tflite::BuiltinOperator_SUB] = make_unique<SubPrinter>();
   _op_map[tflite::BuiltinOperator_SUM] = make_unique<ReducerPrinter>();
+  _op_map[tflite::BuiltinOperator_SVDF] = make_unique<SVDFPrinter>();
   _op_map[tflite::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
   // There is no Option for TOPK_V2
+  _op_map[tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
+    make_unique<UnidirectionalSequenceLSTMPrinter>();
   _op_map[tflite::BuiltinOperator_UNIQUE] = make_unique<UniquePrinter>();
   _op_map[tflite::BuiltinOperator_WHILE] = make_unique<WhilePrinter>();
   _op_map[tflite::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp
index f9782d9ef..7fa3d8181 100644
--- a/compiler/tfldump/src/Read.cpp
+++ b/compiler/tfldump/src/Read.cpp
@@ -16,71 +16,21 @@
 
 #include "Read.h"
 
+#include <mio_tflite2121/Helper.h>
+
 #include <sstream>
 #include <string>
 
 namespace tflread
 {
 
-bool is_valid(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = opcode->builtin_code();
-  return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = opcode->builtin_code();
-  return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const tflite::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  tflite::BuiltinOperator code = opcode->builtin_code();
-  return tflite::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
-  return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
 Reader::Reader(const tflite::Model *model)
 {
   _version = model->version();
   _subgraphs = model->subgraphs();
   _buffers = model->buffers();
+  _metadata = model->metadata();
+  _signaturedefs = model->signature_defs();
 
   auto opcodes = model->operator_codes();
   for (const ::tflite::OperatorCode *opcode : *opcodes)
@@ -117,7 +67,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
   assert(index < _op_codes.size());
   const tflite::OperatorCode *opcode = _op_codes.at(index);
 
-  return opcode->builtin_code();
+  return mio::tflite::builtin_code_neutral(opcode);
 }
 
 std::string Reader::opcode_name(const tflite::Operator *op) const
@@ -126,14 +76,14 @@ std::string Reader::opcode_name(const tflite::Operator *op) const
   assert(index < _op_codes.size());
   const tflite::OperatorCode *opcode = _op_codes.at(index);
 
-  if (!is_valid(opcode))
+  if (!mio::tflite::is_valid(opcode))
   {
     std::ostringstream oss;
     oss << "(invalid: " << index << ")";
     return oss.str();
   }
 
-  return tflread::opcode_name(opcode);
+  return mio::tflite::opcode_name(opcode);
 }
 
 bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h
index 7af2fa59b..1ae63877f 100644
--- a/compiler/tfldump/src/Read.h
+++ b/compiler/tfldump/src/Read.h
@@ -36,12 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
   return ret;
 }
 
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-std::string opcode_name(const tflite::OperatorCode *opcode);
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-
 /**
  * @brief Loads TF lite file and provides helpers to access attributes
  */
@@ -52,6 +46,8 @@ private:
   using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>;
   using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>;
   using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
+  using TFliteMetadata_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>;
+  using TFliteSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>>;
 
 public:
   Reader(const tflite::Model *model);
@@ -67,6 +63,8 @@ public:
   const TFliteOperators_t *operators() { return _operators; }
   const std::vector<int32_t> &inputs() const { return _inputs; }
   const std::vector<int32_t> &outputs() const { return _outputs; }
+  const TFliteMetadata_t *metadata() const { return _metadata; }
+  const TFliteSignatureDef_t *signaturedefs() const { return _signaturedefs; }
 
   uint32_t num_subgraph() const { return _subgraphs->Length(); }
 
@@ -86,6 +84,8 @@ private:
   const TFliteBuffers_t *_buffers{nullptr};
   const TFliteTensors_t *_tensors{nullptr};
   const TFliteOperators_t *_operators{nullptr};
+  const TFliteMetadata_t *_metadata{nullptr};
+  const TFliteSignatureDef_t *_signaturedefs{nullptr};
 
   uint32_t _subgraph_index;
   std::string _subgraph_name;
diff --git a/compiler/tflite2circle-conversion-test/CMakeLists.txt b/compiler/tflite2circle-conversion-test/CMakeLists.txt
index 83fe23a8f..2e67d48bd 100644
--- a/compiler/tflite2circle-conversion-test/CMakeLists.txt
+++ b/compiler/tflite2circle-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt
index b1d1f6149..73639f390 100644
--- a/compiler/tflite2circle/CMakeLists.txt
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -1,8 +1,8 @@
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
-list(APPEND REQUIRED_TARGETS mio_tflite)
-list(APPEND REQUIRED_TARGETS mio_circle)
+list(APPEND REQUIRED_TARGETS mio_tflite2121)
+list(APPEND REQUIRED_TARGETS mio_circle06)
 TargetRequire_Return(${REQUIRED_TARGETS})
 
 set(DRIVER "driver/Driver.cpp")
@@ -11,9 +11,12 @@ add_executable(tflite2circle ${DRIVER} ${SOURCES})
 target_include_directories(tflite2circle PRIVATE include)
 target_include_directories(tflite2circle PRIVATE src)
 target_link_libraries(tflite2circle arser)
+target_link_libraries(tflite2circle foder)
 target_link_libraries(tflite2circle safemain)
-target_link_libraries(tflite2circle mio_tflite)
-target_link_libraries(tflite2circle mio_circle)
+target_link_libraries(tflite2circle mio_tflite2121)
+target_link_libraries(tflite2circle mio_tflite2121_helper)
+target_link_libraries(tflite2circle mio_circle06)
 target_link_libraries(tflite2circle vconone)
+target_link_libraries(tflite2circle nncc_coverage)
 
 install(TARGETS tflite2circle DESTINATION bin)
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp
index 2f11e0a13..6afe1b0f2 100644
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -36,18 +36,11 @@ int entry(int argc, char **argv)
 {
   arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"};
 
-  arser.add_argument("--version")
-      .nargs(0)
-      .required(false)
-      .default_value(false)
-      .help("Show version information and exit")
-      .exit_with(print_version);
-
-  arser.add_argument("tflite")
-      .nargs(1)
-      .type(arser::DataType::STR)
-      .help("Source tflite file path to convert");
-  arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Target circle file path");
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
+
+  arser.add_argument("tflite").help("Source tflite file path to convert");
+  arser.add_argument("circle").help("Target circle file path");
 
   try
   {
@@ -55,7 +48,7 @@ int entry(int argc, char **argv)
   }
   catch (const std::runtime_error &err)
   {
-    std::cout << err.what() << std::endl;
+    std::cerr << err.what() << std::endl;
     std::cout << arser;
     return 255;
   }
@@ -64,9 +57,9 @@ int entry(int argc, char **argv)
   std::string circle_path = arser.get<std::string>("circle");
   // read tflite file
   tflite2circle::TFLModel tfl_model(tfl_path);
-  if (!tfl_model.is_valid())
+  if (not tfl_model.verify_data())
   {
-    std::cerr << "ERROR: Failed to load tflite '" << tfl_path << "'" << std::endl;
+    std::cerr << "ERROR: Failed to verify tflite '" << tfl_path << "'" << std::endl;
     return 255;
   }
 
@@ -74,7 +67,10 @@ int entry(int argc, char **argv)
   auto flatbuffer_builder = std::make_unique<flatbuffers::FlatBufferBuilder>(1024);
 
   // convert tflite to circle
-  tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model};
+  tflite2circle::CircleModel circle_model{flatbuffer_builder};
+
+  circle_model.load_offsets(tfl_model.get_model());
+  circle_model.model_build();
 
   std::ofstream outfile{circle_path, std::ios::binary};
 
diff --git a/compiler/tflite2circle/include/CircleModel.h b/compiler/tflite2circle/include/CircleModel.h
index e1e35d8ff..189cfaff2 100644
--- a/compiler/tflite2circle/include/CircleModel.h
+++ b/compiler/tflite2circle/include/CircleModel.h
@@ -60,16 +60,26 @@ template <typename T> class Offset
 private:
   using TFLFlatBufVec = flatbuffers::Vector<typename T::TFL>;
   using CIRFlatBufVecOffset = flatbuffers::Offset<flatbuffers::Vector<typename T::CIR>>;
+  using SignatureDefs = flatbuffers::Vector<flatbuffers::Offset<::tflite::SignatureDef>>;
 
 public:
   Offset(void) = delete;
-  Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec);
+  Offset(FlatBufBuilder &fb) : _fb{fb} {};
+
+public:
+  void set_signature_defs(const SignatureDefs *offset) { _tfl_signature_def_offsets = offset; }
+
+public:
+  void build(const TFLFlatBufVec *tflite_flatbuffer_vec);
 
 public:
   CIRFlatBufVecOffset offset(void) const { return _circle_flatbuffer_vec_offset; }
 
 private:
+  FlatBufBuilder &_fb;
   CIRFlatBufVecOffset _circle_flatbuffer_vec_offset;
+  // TODO revise this when Circle supports SignatureDef
+  const SignatureDefs *_tfl_signature_def_offsets = nullptr;
 };
 
 class CircleModel
@@ -79,9 +89,10 @@ private:
 
 public:
   CircleModel(void) = delete;
-  CircleModel(FlatBufBuilder &fb, TFLModel &tfl_model);
+  CircleModel(FlatBufBuilder &fb);
 
 public:
+  void load_offsets(const tflite::Model *tfl_model);
   void model_build(void) const;
   const char *base(void) const;
   size_t size(void) const;
diff --git a/compiler/tflite2circle/include/TFLModel.h b/compiler/tflite2circle/include/TFLModel.h
index e53d62749..507667bb9 100644
--- a/compiler/tflite2circle/include/TFLModel.h
+++ b/compiler/tflite2circle/include/TFLModel.h
@@ -37,15 +37,14 @@ public:
   TFLModel(const std::string &path);
 
 public:
-  bool is_valid(void) { return _valid; }
+  const tflite::Model *get_model(void);
 
-private:
-  const tflite::Model *load_model(void);
+public:
+  bool verify_data(void);
 
 private:
   std::ifstream _infile;
   DataBuffer _data;
-  bool _valid;
 
   friend class CircleModel;
 };
diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake
index 837c287b6..c5528c28a 100644
--- a/compiler/tflite2circle/requires.cmake
+++ b/compiler/tflite2circle/requires.cmake
@@ -1,5 +1,6 @@
 require("arser")
-require("mio-tflite")
-require("mio-circle")
+require("foder")
+require("mio-tflite2121")
+require("mio-circle06")
 require("safemain")
 require("vconone")
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h
index 680118618..5ed88ce02 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -26,22 +26,27 @@
 #include "BuildBuiltinOptions/ArgMinOptions.h"
 #include "BuildBuiltinOptions/BatchMatMulOptions.h"
 #include "BuildBuiltinOptions/BatchToSpaceNDOptions.h"
+#include "BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h"
 #include "BuildBuiltinOptions/CastOptions.h"
 #include "BuildBuiltinOptions/ConcatenationOptions.h"
 #include "BuildBuiltinOptions/Conv2DOptions.h"
 #include "BuildBuiltinOptions/CosOptions.h"
+#include "BuildBuiltinOptions/DensifyOptions.h"
 #include "BuildBuiltinOptions/DepthToSpaceOptions.h"
 #include "BuildBuiltinOptions/DepthwiseConv2DOptions.h"
+#include "BuildBuiltinOptions/DequantizeOptions.h"
 #include "BuildBuiltinOptions/DivOptions.h"
 #include "BuildBuiltinOptions/EqualOptions.h"
 #include "BuildBuiltinOptions/ExpandDimsOptions.h"
 #include "BuildBuiltinOptions/ExpOptions.h"
+#include "BuildBuiltinOptions/FakeQuantOptions.h"
 #include "BuildBuiltinOptions/FillOptions.h"
 #include "BuildBuiltinOptions/FloorDivOptions.h"
 #include "BuildBuiltinOptions/FloorModOptions.h"
 #include "BuildBuiltinOptions/FullyConnectedOptions.h"
 #include "BuildBuiltinOptions/GatherOptions.h"
 #include "BuildBuiltinOptions/GatherNdOptions.h"
+#include "BuildBuiltinOptions/GeluOptions.h"
 #include "BuildBuiltinOptions/GreaterOptions.h"
 #include "BuildBuiltinOptions/GreaterEqualOptions.h"
 #include "BuildBuiltinOptions/IfOptions.h"
@@ -100,10 +105,12 @@
 #include "BuildBuiltinOptions/SqueezeOptions.h"
 #include "BuildBuiltinOptions/StridedSliceOptions.h"
 #include "BuildBuiltinOptions/SubOptions.h"
+#include "BuildBuiltinOptions/SVDFOptions.h"
 #include "BuildBuiltinOptions/TileOptions.h"
 #include "BuildBuiltinOptions/TopKV2Options.h"
 #include "BuildBuiltinOptions/TransposeOptions.h"
 #include "BuildBuiltinOptions/TransposeConvOptions.h"
+#include "BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.h"
 #include "BuildBuiltinOptions/UniqueOptions.h"
 #include "BuildBuiltinOptions/UnpackOptions.h"
 #include "BuildBuiltinOptions/WhereOptions.h"
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp
index f93a0f21f..5bdb1020a 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/AddOptions.cpp
@@ -29,7 +29,7 @@ flatbuffers::Offset<circle::AddOptions> build_circle_AddOptions(flatbuffers::Fla
   assert(tflite_builtin_options);
   circle::AddOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp
index 0ccdde4cb..ac0044a8f 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMaxOptions.cpp
@@ -29,7 +29,7 @@ build_circle_ArgMaxOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
   assert(tflite_builtin_options);
   circle::ArgMaxOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_output_type(
-      get_circle_tensortype(tflite_builtin_options->output_type()));
+    get_circle_tensortype(tflite_builtin_options->output_type()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMinOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMinOptions.cpp
index 204558df8..3011c8b65 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMinOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ArgMinOptions.cpp
@@ -29,7 +29,7 @@ build_circle_ArgMinOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
   assert(tflite_builtin_options);
   circle::ArgMinOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_output_type(
-      get_circle_tensortype(tflite_builtin_options->output_type()));
+    get_circle_tensortype(tflite_builtin_options->output_type()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.cpp
new file mode 100644
index 000000000..2a6cf171b
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BidirectionalSequenceLSTMOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::BidirectionalSequenceLSTMOptions>
+build_circle_BidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &fb,
+                                              const tflite::Operator *op)
+{
+  auto tflite_builtin_options = op->builtin_options_as_BidirectionalSequenceLSTMOptions();
+  circle::BidirectionalSequenceLSTMOptionsBuilder builtin_options_builder{fb};
+  builtin_options_builder.add_fused_activation_function(
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+  builtin_options_builder.add_cell_clip(tflite_builtin_options->cell_clip());
+  builtin_options_builder.add_proj_clip(tflite_builtin_options->proj_clip());
+  builtin_options_builder.add_time_major(tflite_builtin_options->time_major());
+  builtin_options_builder.add_merge_outputs(tflite_builtin_options->merge_outputs());
+  builtin_options_builder.add_asymmetric_quantize_inputs(
+    tflite_builtin_options->asymmetric_quantize_inputs());
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h
new file mode 100644
index 000000000..7b77b1cea
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/BidirectionalSequenceLSTMOptions.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
+#define __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::BidirectionalSequenceLSTMOptions>
+build_circle_BidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &fb,
+                                              const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_BIDIRECTIONALSEQUENCE_LSTM_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp
index bc1445248..0f2422c05 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/CastOptions.cpp
@@ -31,9 +31,9 @@ build_circle_CastOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Opera
 
   circle::CastOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_in_data_type(
-      get_circle_tensortype(tflite_builtin_options->in_data_type()));
+    get_circle_tensortype(tflite_builtin_options->in_data_type()));
   builtin_options_builder.add_out_data_type(
-      get_circle_tensortype(tflite_builtin_options->out_data_type()));
+    get_circle_tensortype(tflite_builtin_options->out_data_type()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp
index 933e7cf66..becc63bf6 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/ConcatenationOptions.cpp
@@ -30,7 +30,7 @@ build_circle_ConcatenationOptions(flatbuffers::FlatBufferBuilder &fb, const tfli
   circle::ConcatenationOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_axis(tflite_builtin_options->axis());
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp
index ace63dd26..ec0cffeda 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/Conv2DOptions.cpp
@@ -32,7 +32,7 @@ build_circle_Conv2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
   builtin_options_builder.add_stride_w(tflite_builtin_options->stride_w());
   builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   builtin_options_builder.add_dilation_w_factor(tflite_builtin_options->dilation_w_factor());
   builtin_options_builder.add_dilation_h_factor(tflite_builtin_options->dilation_h_factor());
   return builtin_options_builder.Finish();
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp
new file mode 100644
index 000000000..4e5863576
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DensifyOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DensifyOptions>
+build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *)
+{
+  circle::DensifyOptionsBuilder builtin_options_builder{fb};
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h
new file mode 100644
index 000000000..b6126c4e2
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_DENSIFY_OPTIONS_H__
+#define __BBO_DENSIFY_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DensifyOptions>
+build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_DENSIFY_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp
index 2aa35abc6..910a6ead9 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DepthwiseConv2DOptions.cpp
@@ -33,7 +33,7 @@ build_circle_DepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &fb, const tf
   builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
   builtin_options_builder.add_depth_multiplier(tflite_builtin_options->depth_multiplier());
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   builtin_options_builder.add_dilation_w_factor(tflite_builtin_options->dilation_w_factor());
   builtin_options_builder.add_dilation_h_factor(tflite_builtin_options->dilation_h_factor());
   return builtin_options_builder.Finish();
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp
new file mode 100644
index 000000000..eeacece6a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DequantizeOptions.h"
+#include "DataLookup.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DequantizeOptions>
+build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+  circle::DequantizeOptionsBuilder builtin_options_builder{fb};
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h
new file mode 100644
index 000000000..1cb9f9c1a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_DEQUANTIZE_OPTIONS_H__
+#define __BBO_DEQUANTIZE_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DequantizeOptions>
+build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_DEQUANTIZE_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp
index 4272fe144..3678928a5 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DivOptions.cpp
@@ -29,7 +29,7 @@ flatbuffers::Offset<circle::DivOptions> build_circle_DivOptions(flatbuffers::Fla
   assert(tflite_builtin_options);
   circle::DivOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.cpp
new file mode 100644
index 000000000..e38600f82
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FillOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FakeQuantOptions>
+build_circle_FakeQuantOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+  auto tflite_builtin_options = op->builtin_options_as_FakeQuantOptions();
+  assert(tflite_builtin_options);
+  circle::FakeQuantOptionsBuilder builtin_options_builder{fb};
+  builtin_options_builder.add_min(tflite_builtin_options->min());
+  builtin_options_builder.add_max(tflite_builtin_options->max());
+  builtin_options_builder.add_num_bits(tflite_builtin_options->num_bits());
+  builtin_options_builder.add_narrow_range(tflite_builtin_options->narrow_range());
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.h
new file mode 100644
index 000000000..1f5f12b86
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FakeQuantOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_FAKEQUANT_OPTIONS_H__
+#define __BBO_FAKEQUANT_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::FillOptions>
+build_circle_FakeQuantOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_FAKEQUANT_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
index 098a96a40..27410012d 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
@@ -29,14 +29,15 @@ build_circle_FullyConnectedOptions(flatbuffers::FlatBufferBuilder &fb, const tfl
   assert(tflite_builtin_options);
   circle::FullyConnectedOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   // Get FullyConnectedOptionsWeightsFormat
   auto tflite_weight_format = tflite_builtin_options->weights_format();
   if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_DEFAULT)
     builtin_options_builder.add_weights_format(circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
   else if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)
     builtin_options_builder.add_weights_format(
-        circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
+      circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
+  builtin_options_builder.add_keep_num_dims(tflite_builtin_options->keep_num_dims());
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/GeluOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/GeluOptions.cpp
new file mode 100644
index 000000000..92fcd6083
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/GeluOptions.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GeluOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::GeluOptions>
+build_circle_GeluOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+  auto *tflite_builtin_options = op->builtin_options_as_GeluOptions();
+  assert(tflite_builtin_options);
+  circle::GeluOptionsBuilder builtin_options_builder{fb};
+  builtin_options_builder.add_approximate(tflite_builtin_options->approximate());
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/GeluOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/GeluOptions.h
new file mode 100644
index 000000000..76955d795
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/GeluOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_GELU_OPTIONS_H__
+#define __BBO_GELU_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::GeluOptions>
+build_circle_GeluOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_GELU_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/L2NormalizeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/L2NormalizeOptions.cpp
index d58aed83d..f5121a811 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/L2NormalizeOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/L2NormalizeOptions.cpp
@@ -29,7 +29,7 @@ build_circle_L2NormOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
   assert(tflite_builtin_options);
   circle::L2NormOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
index d2d2888f2..db88d3e82 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
@@ -25,8 +25,6 @@ namespace tflite2circle
 flatbuffers::Offset<circle::MaximumMinimumOptions>
 build_circle_MaximumMinimumOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
 {
-  auto tflite_builtin_options = op->builtin_options_as_MaximumMinimumOptions();
-  assert(tflite_builtin_options);
   circle::MaximumMinimumOptionsBuilder builtin_options_builder{fb};
   return builtin_options_builder.Finish();
 }
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp
index 009daea8b..3d4b9deb5 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MulOptions.cpp
@@ -29,7 +29,7 @@ flatbuffers::Offset<circle::MulOptions> build_circle_MulOptions(flatbuffers::Fla
   assert(tflite_builtin_options);
   circle::MulOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp
index 6b0bd1288..d796eadfa 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/Pool2DOptions.cpp
@@ -34,7 +34,7 @@ build_circle_Pool2DOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
   builtin_options_builder.add_filter_width(tflite_builtin_options->filter_width());
   builtin_options_builder.add_filter_height(tflite_builtin_options->filter_height());
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp
new file mode 100644
index 000000000..e23738a69
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDFOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SVDFOptions>
+build_circle_SVDFOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+  auto *tflite_builtin_options = op->builtin_options_as_SVDFOptions();
+  assert(tflite_builtin_options);
+
+  circle::SVDFOptionsBuilder builtin_options_builder{fb};
+  builtin_options_builder.add_rank(tflite_builtin_options->rank());
+  builtin_options_builder.add_asymmetric_quantize_inputs(
+    tflite_builtin_options->asymmetric_quantize_inputs());
+  builtin_options_builder.add_fused_activation_function(
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h
new file mode 100644
index 000000000..2ddbd3911
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_SVDF_OPTIONS_H__
+#define __BBO_SVDF_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SVDFOptions>
+build_circle_SVDFOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_SVDF_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp
index 2e55f4dab..982f3fd68 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SubOptions.cpp
@@ -29,7 +29,7 @@ flatbuffers::Offset<circle::SubOptions> build_circle_SubOptions(flatbuffers::Fla
   assert(tflite_builtin_options);
   circle::SubOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_fused_activation_function(
-      get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeConvOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeConvOptions.cpp
index 301f2c421..0873170f7 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeConvOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/TransposeConvOptions.cpp
@@ -31,6 +31,8 @@ build_circle_TransposeConvOptions(flatbuffers::FlatBufferBuilder &fb, const tfli
   builtin_options_builder.add_padding(get_circle_padding(tflite_builtin_options->padding()));
   builtin_options_builder.add_stride_w(tflite_builtin_options->stride_w());
   builtin_options_builder.add_stride_h(tflite_builtin_options->stride_h());
+  builtin_options_builder.add_fused_activation_function(
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.cpp
new file mode 100644
index 000000000..6e8143be9
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnidirectionalSequenceLSTMOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::UnidirectionalSequenceLSTMOptions>
+build_circle_UnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &fb,
+                                               const tflite::Operator *op)
+{
+  auto tflite_builtin_options = op->builtin_options_as_UnidirectionalSequenceLSTMOptions();
+  circle::UnidirectionalSequenceLSTMOptionsBuilder builtin_options_builder{fb};
+  builtin_options_builder.add_fused_activation_function(
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+  builtin_options_builder.add_cell_clip(tflite_builtin_options->cell_clip());
+  builtin_options_builder.add_proj_clip(tflite_builtin_options->proj_clip());
+  builtin_options_builder.add_time_major(tflite_builtin_options->time_major());
+  builtin_options_builder.add_asymmetric_quantize_inputs(
+    tflite_builtin_options->asymmetric_quantize_inputs());
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.h
new file mode 100644
index 000000000..2be0efbc2
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/UnidirectionalSequenceLSTMOptions.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_UNIDIRECTIONALSEQUENCELSTM_OPTIONS_H__
+#define __BBO_UNIDIRECTIONALSEQUENCELSTM_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::UnidirectionalSequenceLSTMOptions>
+build_circle_UnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &fb,
+                                               const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_UNIDIRECTIONALSEQUENCELSTM_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/UniqueOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/UniqueOptions.cpp
index 96ddc15ad..f7ddeffcb 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/UniqueOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/UniqueOptions.cpp
@@ -29,7 +29,7 @@ build_circle_UniqueOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Ope
   assert(tflite_builtin_options);
   circle::UniqueOptionsBuilder builtin_options_builder{fb};
   builtin_options_builder.add_idx_out_type(
-      get_circle_tensortype(tflite_builtin_options->idx_out_type()));
+    get_circle_tensortype(tflite_builtin_options->idx_out_type()));
   return builtin_options_builder.Finish();
 }
 
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index 14c44cb36..899bc84f7 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -14,27 +14,29 @@
  * limitations under the License.
  */
 
+#include <cassert>
 #include <iostream>
+#include <map>
 #include <memory>
 
 #include "CircleModel.h"
 #include "DataLookup.h"
 
+#include <mio_tflite2121/Helper.h>
+
 namespace tflite2circle
 {
 
-template <>
-Offset<MetaDataBufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<MetaDataBufferLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
 {
   if (tflite_flatbuffer_vec == nullptr)
     return;
   std::vector<int32_t> metadata_buffer_vec{tflite_flatbuffer_vec->begin(),
                                            tflite_flatbuffer_vec->end()};
-  _circle_flatbuffer_vec_offset = fb->CreateVector(metadata_buffer_vec);
+  _circle_flatbuffer_vec_offset = _fb->CreateVector(metadata_buffer_vec);
 }
 
-template <>
-Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<BufferLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
 {
   std::vector<flatbuffers::Offset<circle::Buffer>> buffers_vec;
 
@@ -44,21 +46,22 @@ Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatb
     if (it->data())
     {
       std::vector<uint8_t> data_vec{it->data()->begin(), it->data()->end()};
-      buffer_data = fb->CreateVector(data_vec);
+      buffer_data = _fb->CreateVector(data_vec);
     }
-    circle::BufferBuilder circle_buffer_builder{*fb};
+    circle::BufferBuilder circle_buffer_builder{*_fb};
     circle_buffer_builder.add_data(buffer_data);
     auto circle_buffers = circle_buffer_builder.Finish();
     buffers_vec.emplace_back(circle_buffers);
   }
-  _circle_flatbuffer_vec_offset = fb->CreateVector(buffers_vec);
+  _circle_flatbuffer_vec_offset = _fb->CreateVector(buffers_vec);
 }
 
-template <>
-Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
 {
   std::vector<flatbuffers::Offset<circle::SubGraph>> subgprahs_vec;
 
+  int32_t subgraph_index = 0;
+
   for (auto it_sg : *tflite_flatbuffer_vec)
   {
     // tensors of subgraph
@@ -72,12 +75,12 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
       if (it->shape())
       {
         auto shape_vec = std::vector<int32_t>({it->shape()->begin(), it->shape()->end()});
-        shape = fb->CreateVector(shape_vec);
+        shape = _fb->CreateVector(shape_vec);
       }
       // name
       flatbuffers::Offset<flatbuffers::String> name;
       if (it->name())
-        name = fb->CreateString(it->name()->str());
+        name = _fb->CreateString(it->name()->str());
       // quantization
       flatbuffers::Offset<circle::QuantizationParameters> quantization;
       if (it->quantization())
@@ -98,8 +101,8 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
           auto rmax = it->quantization()->max();
           tfmin = std::vector<float>{rmin->begin(), rmin->end()};
           tfmax = std::vector<float>{rmax->begin(), rmax->end()};
-          min = fb->CreateVector(tfmin);
-          max = fb->CreateVector(tfmax);
+          min = _fb->CreateVector(tfmin);
+          max = _fb->CreateVector(tfmax);
         }
 
         if (it->quantization()->scale() && it->quantization()->zero_point())
@@ -108,11 +111,11 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
           auto rz = it->quantization()->zero_point();
           tfscale = std::vector<float>{rs->begin(), rs->end()};
           tfzerop = std::vector<int64_t>{rz->begin(), rz->end()};
-          scale = fb->CreateVector(tfscale);
-          zero_point = fb->CreateVector(tfzerop);
+          scale = _fb->CreateVector(tfscale);
+          zero_point = _fb->CreateVector(tfzerop);
         }
 
-        quantization = circle::CreateQuantizationParameters(*fb, min, max, scale, zero_point,
+        quantization = circle::CreateQuantizationParameters(*_fb, min, max, scale, zero_point,
                                                             circle::QuantizationDetails_NONE, 0,
                                                             quantized_dimension);
       }
@@ -126,14 +129,14 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
         flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
         flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
         flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
-            dim_metadata;
+          dim_metadata;
 
         // traversal_order
         if (it->sparsity()->traversal_order())
         {
           auto traversal_order_vec = std::vector<int32_t>{
-              it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
-          traversal_order = fb->CreateVector(traversal_order_vec);
+            it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
+          traversal_order = _fb->CreateVector(traversal_order_vec);
         }
 
         // block_map
@@ -141,7 +144,7 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
         {
           auto block_map_vec = std::vector<int32_t>{it->sparsity()->block_map()->begin(),
                                                     it->sparsity()->block_map()->end()};
-          block_map = fb->CreateVector(block_map_vec);
+          block_map = _fb->CreateVector(block_map_vec);
         }
 
         // dim_metadata
@@ -152,18 +155,18 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
           // array_segments
           auto tflite_array_segments_type = it->array_segments_type();
           auto circle_array_segments =
-              get_circle_sparse_index_vector(*fb, it, tflite_array_segments_type);
+            get_circle_sparse_index_vector(*_fb, it->array_segments(), tflite_array_segments_type);
           auto circle_array_segments_type =
-              get_circle_sparse_index_vector_type(tflite_array_segments_type);
+            get_circle_sparse_index_vector_type(tflite_array_segments_type);
 
           // array_indices
           auto tflite_array_indices_type = it->array_indices_type();
           auto circle_array_indices =
-              get_circle_sparse_index_vector(*fb, it, tflite_array_indices_type);
+            get_circle_sparse_index_vector(*_fb, it->array_indices(), tflite_array_indices_type);
           auto circle_array_indices_type =
-              get_circle_sparse_index_vector_type(tflite_array_indices_type);
+            get_circle_sparse_index_vector_type(tflite_array_indices_type);
 
-          auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
+          auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*_fb};
 
           circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format()));
           circle_dim_metadata_builder.add_dense_size(it->dense_size());
@@ -174,9 +177,9 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
           auto dim_metadata = circle_dim_metadata_builder.Finish();
           dim_metadata_vec.emplace_back(dim_metadata);
         }
-        dim_metadata = fb->CreateVector(dim_metadata_vec);
+        dim_metadata = _fb->CreateVector(dim_metadata_vec);
 
-        sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata);
+        sparsity = circle::CreateSparsityParameters(*_fb, traversal_order, block_map, dim_metadata);
       }
 
       // shape signature
@@ -184,11 +187,11 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
       if (it->shape_signature())
       {
         auto shape_signature_vec =
-            std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
-        shape_signature = fb->CreateVector(shape_signature_vec);
+          std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
+        shape_signature = _fb->CreateVector(shape_signature_vec);
       }
 
-      circle::TensorBuilder tensor_builder{*fb};
+      circle::TensorBuilder tensor_builder{*_fb};
       tensor_builder.add_shape(shape);
       tensor_builder.add_type(get_circle_tensortype(it->type()));
       tensor_builder.add_buffer(it->buffer());
@@ -200,67 +203,120 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
       auto tensor = tensor_builder.Finish();
       tensor_vec.emplace_back(tensor);
     }
-    auto circle_tensors = fb->CreateVector(tensor_vec);
+    auto circle_tensors = _fb->CreateVector(tensor_vec);
 
     // inputs of subgraph
     auto tflite_inputs = it_sg->inputs();
     std::vector<int32_t> input_vec{tflite_inputs->begin(), tflite_inputs->end()};
 
-    auto circle_inputs = fb->CreateVector(input_vec);
+    // apply signature_def to input tensor index so that input orders follow like tensorflow lite
+    // interpreter._get_full_signature_list() method, which is ordered(sorted) in name
+    // NOTE we do not need this when circle format supports signature_def
+    if (_tfl_signature_def_offsets != nullptr)
+    {
+      for (auto it_signdef : *_tfl_signature_def_offsets)
+      {
+        if (it_signdef->subgraph_index() == subgraph_index)
+        {
+          auto inputs = it_signdef->inputs();
+          assert(inputs->size() == input_vec.size());
+
+          std::map<std::string, uint32_t> map_name_index;
+          for (auto it_tm : *inputs)
+          {
+            map_name_index[it_tm->name()->str()] = it_tm->tensor_index();
+          }
+          uint32_t input_vec_idx = 0;
+          for (auto &item : map_name_index)
+          {
+            input_vec[input_vec_idx++] = item.second;
+          }
+        }
+      }
+    }
+
+    auto circle_inputs = _fb->CreateVector(input_vec);
 
     // outputs of subgraph
     auto tflite_outputs = it_sg->outputs();
     std::vector<int32_t> output_vec{tflite_outputs->begin(), tflite_outputs->end()};
 
-    auto circle_outputs = fb->CreateVector(output_vec);
+    if (_tfl_signature_def_offsets != nullptr)
+    {
+      // apply SignatureDef
+      for (auto it_signdef : *_tfl_signature_def_offsets)
+      {
+        if (it_signdef->subgraph_index() == subgraph_index)
+        {
+          auto outputs = it_signdef->outputs();
+          assert(outputs->size() == output_vec.size());
+
+          std::map<std::string, uint32_t> map_name_index;
+          for (auto it_tm : *outputs)
+          {
+            map_name_index[it_tm->name()->str()] = it_tm->tensor_index();
+          }
+          uint32_t output_vec_idx = 0;
+          for (auto &item : map_name_index)
+          {
+            output_vec[output_vec_idx++] = item.second;
+          }
+        }
+      }
+    }
+
+    auto circle_outputs = _fb->CreateVector(output_vec);
 
     // operators of subgraph
     std::vector<flatbuffers::Offset<circle::Operator>> operator_vec;
 
     auto tflite_operators = it_sg->operators();
-    for (auto it : *tflite_operators)
+    if (tflite_operators != nullptr)
     {
-      // inputs
-      std::vector<int32_t> input_vec{it->inputs()->begin(), it->inputs()->end()};
-      auto circle_inputs = fb->CreateVector(input_vec);
-      // outputs
-      std::vector<int32_t> output_vec{it->outputs()->begin(), it->outputs()->end()};
-      auto circle_outputs = fb->CreateVector(output_vec);
-      // builtin options
-      auto circle_builtin_options = get_circle_builtin_options(*fb, it);
-      auto circle_builtin_options_type = get_circle_builtin_options_type(it);
-      // custom options
-      flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
-      if (it->custom_options())
+      for (auto it : *tflite_operators)
       {
-        std::vector<uint8_t> custom_options_vec{it->custom_options()->begin(),
-                                                it->custom_options()->end()};
-        circle_custom_options = fb->CreateVector(custom_options_vec);
+        // inputs
+        std::vector<int32_t> input_vec{it->inputs()->begin(), it->inputs()->end()};
+        auto circle_inputs = _fb->CreateVector(input_vec);
+        // outputs
+        std::vector<int32_t> output_vec{it->outputs()->begin(), it->outputs()->end()};
+        auto circle_outputs = _fb->CreateVector(output_vec);
+        // builtin options
+        auto circle_builtin_options = get_circle_builtin_options(*_fb, it);
+        auto circle_builtin_options_type = get_circle_builtin_options_type(it);
+        // custom options
+        flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
+        if (it->custom_options())
+        {
+          std::vector<uint8_t> custom_options_vec{it->custom_options()->begin(),
+                                                  it->custom_options()->end()};
+          circle_custom_options = _fb->CreateVector(custom_options_vec);
+        }
+        // custom options format
+        // TODO Make get_circle_custom_options_format
+        assert(it->custom_options_format() == tflite::CustomOptionsFormat_FLEXBUFFERS);
+        auto circle_custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS;
+
+        circle::OperatorBuilder operator_builder{*_fb};
+        operator_builder.add_opcode_index(it->opcode_index());
+        operator_builder.add_inputs(circle_inputs);
+        operator_builder.add_outputs(circle_outputs);
+        operator_builder.add_builtin_options(circle_builtin_options);
+        operator_builder.add_builtin_options_type(circle_builtin_options_type);
+        operator_builder.add_custom_options(circle_custom_options);
+        operator_builder.add_custom_options_format(circle_custom_options_format);
+        // TODO mutating_variable_inputs
+        auto opeartor = operator_builder.Finish();
+        operator_vec.emplace_back(opeartor);
       }
-      // custom options format
-      // TODO Make get_circle_custom_options_format
-      assert(it->custom_options_format() == tflite::CustomOptionsFormat_FLEXBUFFERS);
-      auto circle_custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS;
-
-      circle::OperatorBuilder operator_builder{*fb};
-      operator_builder.add_opcode_index(it->opcode_index());
-      operator_builder.add_inputs(circle_inputs);
-      operator_builder.add_outputs(circle_outputs);
-      operator_builder.add_builtin_options(circle_builtin_options);
-      operator_builder.add_builtin_options_type(circle_builtin_options_type);
-      operator_builder.add_custom_options(circle_custom_options);
-      operator_builder.add_custom_options_format(circle_custom_options_format);
-      // TODO mutating_variable_inputs
-      auto opeartor = operator_builder.Finish();
-      operator_vec.emplace_back(opeartor);
     }
-    auto circle_operators = fb->CreateVector(operator_vec);
+    auto circle_operators = _fb->CreateVector(operator_vec);
 
     // name of subgraph
-    auto subgraphs_name = fb->CreateString(it_sg->name());
+    auto subgraphs_name = _fb->CreateString(it_sg->name());
 
     // subgraphs
-    auto circle_subgraph_builder = circle::SubGraphBuilder{*fb};
+    auto circle_subgraph_builder = circle::SubGraphBuilder{*_fb};
 
     circle_subgraph_builder.add_tensors(circle_tensors);
     circle_subgraph_builder.add_inputs(circle_inputs);
@@ -271,47 +327,75 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
 
     auto circle_subgraph = circle_subgraph_builder.Finish();
     subgprahs_vec.emplace_back(circle_subgraph);
+
+    // next subgraph
+    subgraph_index = subgraph_index + 1;
   }
-  _circle_flatbuffer_vec_offset = fb->CreateVector(subgprahs_vec);
+  _circle_flatbuffer_vec_offset = _fb->CreateVector(subgprahs_vec);
 }
 
-template <>
-Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
 {
   std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec;
 
   for (auto it : *tflite_flatbuffer_vec)
   {
-    auto custom_code = fb->CreateString(it->custom_code());
-    circle::OperatorCodeBuilder operator_code_builder{*fb};
-    operator_code_builder.add_builtin_code(get_circle_builtin_code(it->builtin_code()));
+    auto custom_code = _fb->CreateString(it->custom_code());
+    circle::OperatorCodeBuilder operator_code_builder{*_fb};
+    auto de_code = it->deprecated_builtin_code();
+    auto bt_code = it->builtin_code();
+
+    // There are two builtin codes (deprecated_builtin, (extended) builtin)
+    // deprecated builtin code uses 0~126
+    // extended builtin code uses 127~
+    // NOTE 127 = BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
+    if (de_code >= 0 and de_code < 127)
+    {
+      // Use deprecated builtin opcode.
+      auto cir_de_code = get_circle_builtin_code(de_code);
+      auto cir_bt_code = get_circle_builtin_code(bt_code);
+      // correct bt_code where bt_code == 0 for old tflite format
+      if (cir_bt_code == 0)
+        cir_bt_code = static_cast<circle::BuiltinOperator>(cir_de_code);
+      operator_code_builder.add_deprecated_builtin_code(cir_de_code);
+      operator_code_builder.add_builtin_code(cir_bt_code);
+    }
+    else
+    {
+      // Use extended builtin opcode
+      // Set 127 (PLACEHOLDER_FOR_GREATER_OP_CODES) for deprecated builtin code
+      auto cir_bt_code = get_circle_builtin_code(bt_code);
+      operator_code_builder.add_deprecated_builtin_code(
+        tflite::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES);
+      operator_code_builder.add_builtin_code(cir_bt_code);
+    }
     operator_code_builder.add_custom_code(custom_code);
     operator_code_builder.add_version(it->version());
     auto code = operator_code_builder.Finish();
     operator_code_vec.emplace_back(code);
   }
-  _circle_flatbuffer_vec_offset = fb->CreateVector(operator_code_vec);
+  _circle_flatbuffer_vec_offset = _fb->CreateVector(operator_code_vec);
 }
 
-CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
-    : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
+CircleModel::CircleModel(FlatBufBuilder &fb)
+  : _version{0}, _description{fb->CreateString("ONE-tflite2circle")}, _fb{fb}
 {
-  const tflite::Model *tfl_model = model.load_model();
-  // verify flatbuffers
-  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()),
-                                 model._data.size()};
-  if (!tflite::VerifyModelBuffer(verifier))
-  {
-    throw std::runtime_error("ERROR: Failed to verify tflite");
-  }
+  // NOTHING TODO
+}
+
+void CircleModel::load_offsets(const tflite::Model *tfl_model)
+{
+  _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(_fb);
+  _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(_fb);
+  _buffers_offset = std::make_unique<Offset<BufferLink>>(_fb);
+  _metadata_buffer_offset = std::make_unique<Offset<MetaDataBufferLink>>(_fb);
+
+  _subGraphs_offset->set_signature_defs(tfl_model->signature_defs());
 
-  _operator_codes_offset =
-      std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
-  _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
-  _buffers_offset = std::make_unique<Offset<BufferLink>>(fb, tfl_model->buffers());
-  _metadata_buffer_offset =
-      std::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer());
-  model_build();
+  _operator_codes_offset->build(tfl_model->operator_codes());
+  _subGraphs_offset->build(tfl_model->subgraphs());
+  _buffers_offset->build(tfl_model->buffers());
+  _metadata_buffer_offset->build(tfl_model->metadata_buffer());
 }
 
 void CircleModel::model_build(void) const
diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp
index 75504b062..c16e60196 100644
--- a/compiler/tflite2circle/src/DataLookup.cpp
+++ b/compiler/tflite2circle/src/DataLookup.cpp
@@ -34,6 +34,29 @@ circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop)
   }
 }
 
+int8_t get_circle_builtin_code(int8_t tfl_bop_i8)
+{
+  return get_circle_builtin_code(static_cast<int32_t>(tfl_bop_i8));
+}
+
+int32_t get_circle_builtin_code(int32_t tfl_bop_i32)
+{
+  tflite::BuiltinOperator tfl_bop = static_cast<tflite::BuiltinOperator>(tfl_bop_i32);
+
+  switch (tfl_bop)
+  {
+#define TFL_OPERATOR(OP)             \
+  case tflite::BuiltinOperator_##OP: \
+    return static_cast<int32_t>(circle::BuiltinOperator_##OP);
+#include "TFLOperator.lst"
+#undef TFL_OPERATOR
+    case tflite::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
+      return static_cast<int32_t>(circle::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES);
+    default:
+      throw std::runtime_error("tflite2circle: wrong op");
+  }
+}
+
 circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt)
 {
   switch (tfl_tt)
@@ -137,8 +160,7 @@ circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_ty
 }
 
 flatbuffers::Offset<void>
-get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
-                               const tflite::DimensionMetadata *dm,
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const void *v_array,
                                const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
 {
   switch (tfl_sparse_index_vector_type)
@@ -147,9 +169,9 @@ get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
       return flatbuffers::Offset<void>();
     case tflite::SparseIndexVector_Int32Vector:
     {
+      const tflite::Int32Vector *i32_array = static_cast<const tflite::Int32Vector *>(v_array);
       auto values_vec_int32 =
-          std::vector<int32_t>{dm->array_segments_as_Int32Vector()->values()->begin(),
-                               dm->array_segments_as_Int32Vector()->values()->end()};
+        std::vector<int32_t>{i32_array->values()->begin(), i32_array->values()->end()};
       auto values_int32 = fb.CreateVector(values_vec_int32);
       circle::Int32VectorBuilder int32_vector_builder{fb};
       int32_vector_builder.add_values(values_int32);
@@ -157,9 +179,9 @@ get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
     }
     case tflite::SparseIndexVector_Uint16Vector:
     {
+      const tflite::Uint16Vector *u16_array = static_cast<const tflite::Uint16Vector *>(v_array);
       auto values_vec_uint16 =
-          std::vector<uint16_t>{dm->array_segments_as_Uint16Vector()->values()->begin(),
-                                dm->array_segments_as_Uint16Vector()->values()->end()};
+        std::vector<uint16_t>{u16_array->values()->begin(), u16_array->values()->end()};
       auto values_uint16 = fb.CreateVector(values_vec_uint16);
       circle::Uint16VectorBuilder uint16_vector_builder{fb};
       uint16_vector_builder.add_values(values_uint16);
@@ -167,9 +189,9 @@ get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
     }
     case tflite::SparseIndexVector_Uint8Vector:
     {
+      const tflite::Uint8Vector *u8_array = static_cast<const tflite::Uint8Vector *>(v_array);
       auto values_vec_uint8 =
-          std::vector<uint8_t>{dm->array_segments_as_Uint8Vector()->values()->begin(),
-                               dm->array_segments_as_Uint8Vector()->values()->end()};
+        std::vector<uint8_t>{u8_array->values()->begin(), u8_array->values()->end()};
       auto values_uint8 = fb.CreateVector(values_vec_uint8);
       circle::Uint8VectorBuilder uint8_vector_builder{fb};
       uint8_vector_builder.add_values(values_uint8);
diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h
index 26ad74666..f346b01f4 100644
--- a/compiler/tflite2circle/src/DataLookup.h
+++ b/compiler/tflite2circle/src/DataLookup.h
@@ -27,19 +27,22 @@ namespace tflite2circle
  * @brief Returns circle builtin_code according to tflite.
  *
  * @note You can see a list of currently supported BuiltinOperator in TFLOperator.lst file.
-*/
+ */
 circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop);
 
+int8_t get_circle_builtin_code(int8_t tfl_bop_i8);
+int32_t get_circle_builtin_code(int32_t tfl_bop_i32);
+
 /**
  * @brief Returns circle TensorType according to tflite.
  *
  * @note You can see a list of currently supported TensorType in TFLTensorType.lst file.
-*/
+ */
 circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt);
 
 /**
  * @brief Returns circle Padding enum according to tflite.
-*/
+ */
 circle::Padding get_circle_padding(tflite::Padding tfl_p);
 
 /**
@@ -47,7 +50,7 @@ circle::Padding get_circle_padding(tflite::Padding tfl_p);
  *
  * @note You can see a list of currently supported ActivationFunctionType in
  *       TFLActivationFunctionType.lst file.
-*/
+ */
 circle::ActivationFunctionType
 get_circle_activation_function_type(tflite::ActivationFunctionType tfl_aft);
 
@@ -60,7 +63,7 @@ get_circle_activation_function_type(tflite::ActivationFunctionType tfl_aft);
  *       This function calls the build_circle_##BuiltinOptions internally(e.g.
  *       build_circle_AbsOptions, build_circle_AddOptions, etc.), so refer to it for a more
  *       detailed implementation.
-*/
+ */
 flatbuffers::Offset<void> get_circle_builtin_options(flatbuffers::FlatBufferBuilder &fb,
                                                      const tflite::Operator *op);
 
@@ -68,30 +71,29 @@ flatbuffers::Offset<void> get_circle_builtin_options(flatbuffers::FlatBufferBuil
  * @brief Returns circle builtin_options_type according to tflite.
  *
  * @note You can see a list of currently supported BuiltinOptions in TFLBuiltinOptions.lst file.
-*/
+ */
 circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *op);
 
 /**
  * @brief Returns circle MirrorPadMode according to tflite.
-*/
+ */
 circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode);
 
 /**
  * @brief Returns circle DimensionType according to tflite.
-*/
+ */
 circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type);
 
 /**
  * @brief Returns circle SparseIndexVector according to tflite.
-*/
+ */
 flatbuffers::Offset<void>
-get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
-                               const tflite::DimensionMetadata *dm,
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const void *values,
                                const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
 
 /**
  * @brief Returns circle SparseIndexVector type according to tflite.
-*/
+ */
 circle::SparseIndexVector
 get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
 
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
index 22b59863b..ac77c9abf 100644
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -9,7 +9,7 @@ TFL_BUILTIN_OPTIONS(DepthwiseConv2DOptions)
 //TFL_BUILTIN_OPTIONS(ConcatEmbeddingsOptions)
 //TFL_BUILTIN_OPTIONS(LSHProjectionOptions)
 TFL_BUILTIN_OPTIONS(Pool2DOptions)
-//TFL_BUILTIN_OPTIONS(SVDFOptions)
+TFL_BUILTIN_OPTIONS(SVDFOptions)
 //TFL_BUILTIN_OPTIONS(RNNOptions)
 TFL_BUILTIN_OPTIONS(FullyConnectedOptions)
 TFL_BUILTIN_OPTIONS(SoftmaxOptions)
@@ -28,6 +28,7 @@ TFL_BUILTIN_OPTIONS(MulOptions)
 TFL_BUILTIN_OPTIONS(PadOptions)
 TFL_BUILTIN_OPTIONS(PadV2Options)
 TFL_BUILTIN_OPTIONS(GatherOptions)
+TFL_BUILTIN_OPTIONS(GeluOptions)
 TFL_BUILTIN_OPTIONS(BatchToSpaceNDOptions)
 TFL_BUILTIN_OPTIONS(SpaceToBatchNDOptions)
 TFL_BUILTIN_OPTIONS(TransposeOptions)
@@ -42,7 +43,7 @@ TFL_BUILTIN_OPTIONS(TopKV2Options)
 TFL_BUILTIN_OPTIONS(SplitOptions)
 TFL_BUILTIN_OPTIONS(LogSoftmaxOptions)
 TFL_BUILTIN_OPTIONS(CastOptions)
-//TFL_BUILTIN_OPTIONS(DequantizeOptions)
+TFL_BUILTIN_OPTIONS(DequantizeOptions)
 TFL_BUILTIN_OPTIONS(MaximumMinimumOptions)
 TFL_BUILTIN_OPTIONS(ArgMaxOptions)
 TFL_BUILTIN_OPTIONS(LessOptions)
@@ -63,7 +64,7 @@ TFL_BUILTIN_OPTIONS(NotEqualOptions)
 TFL_BUILTIN_OPTIONS(ShapeOptions)
 TFL_BUILTIN_OPTIONS(PowOptions)
 TFL_BUILTIN_OPTIONS(ArgMinOptions)
-//TFL_BUILTIN_OPTIONS(FakeQuantOptions)
+TFL_BUILTIN_OPTIONS(FakeQuantOptions)
 TFL_BUILTIN_OPTIONS(PackOptions)
 TFL_BUILTIN_OPTIONS(LogicalOrOptions)
 TFL_BUILTIN_OPTIONS(OneHotOptions)
@@ -74,9 +75,9 @@ TFL_BUILTIN_OPTIONS(FloorDivOptions)
 TFL_BUILTIN_OPTIONS(SquareOptions)
 TFL_BUILTIN_OPTIONS(ZerosLikeOptions)
 TFL_BUILTIN_OPTIONS(FillOptions)
-//TFL_BUILTIN_OPTIONS(BidirectionalSequenceLSTMOptions)
+TFL_BUILTIN_OPTIONS(BidirectionalSequenceLSTMOptions)
 //TFL_BUILTIN_OPTIONS(BidirectionalSequenceRNNOptions)
-//TFL_BUILTIN_OPTIONS(UnidirectionalSequenceLSTMOptions)
+TFL_BUILTIN_OPTIONS(UnidirectionalSequenceLSTMOptions)
 TFL_BUILTIN_OPTIONS(FloorModOptions)
 TFL_BUILTIN_OPTIONS(RangeOptions)
 TFL_BUILTIN_OPTIONS(ResizeNearestNeighborOptions)
@@ -106,3 +107,4 @@ TFL_BUILTIN_OPTIONS(RankOptions)
 TFL_BUILTIN_OPTIONS(ScatterNdOptions)
 TFL_BUILTIN_OPTIONS(SegmentSumOptions)
 TFL_BUILTIN_OPTIONS(BatchMatMulOptions)
+TFL_BUILTIN_OPTIONS(DensifyOptions)
diff --git a/compiler/tflite2circle/src/TFLModel.cpp b/compiler/tflite2circle/src/TFLModel.cpp
index 33f11fb83..470b1aec7 100644
--- a/compiler/tflite2circle/src/TFLModel.cpp
+++ b/compiler/tflite2circle/src/TFLModel.cpp
@@ -16,6 +16,8 @@
 
 #include <iostream>
 
+#include <foder/FileLoader.h>
+
 #include "TFLModel.h"
 
 namespace tflite2circle
@@ -23,21 +25,21 @@ namespace tflite2circle
 
 TFLModel::TFLModel(const std::string &path)
 {
-  _infile.open(path, std::ios::binary | std::ios::in);
-  _valid = _infile.good();
+  foder::FileLoader file_loader{path};
+  _data = file_loader.load();
 }
 
-const tflite::Model *TFLModel::load_model(void)
+bool TFLModel::verify_data(void)
 {
-  assert(_valid == true);
-  _infile.seekg(0, std::ios::end);
-  auto fileSize = _infile.tellg();
-  _infile.seekg(0, std::ios::beg);
-  _data.resize(fileSize);
-  _infile.read(_data.data(), fileSize);
-  _infile.close();
-
-  return tflite::GetModel(_data.data());
+  // verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(_data.data()), _data.size()};
+  if (not tflite::VerifyModelBuffer(verifier))
+  {
+    return false;
+  }
+  return true;
 }
 
+const tflite::Model *TFLModel::get_model(void) { return tflite::GetModel(_data.data()); }
+
 } // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/TFLOperator.lst b/compiler/tflite2circle/src/TFLOperator.lst
index 942c846c7..b7db49b0a 100644
--- a/compiler/tflite2circle/src/TFLOperator.lst
+++ b/compiler/tflite2circle/src/TFLOperator.lst
@@ -131,3 +131,24 @@ TFL_OPERATOR(SELECT_V2)
 TFL_OPERATOR(DENSIFY)
 TFL_OPERATOR(SEGMENT_SUM)
 TFL_OPERATOR(BATCH_MATMUL)
+// PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+TFL_OPERATOR(CUMSUM)
+TFL_OPERATOR(CALL_ONCE)
+TFL_OPERATOR(BROADCAST_TO)
+TFL_OPERATOR(RFFT2D)
+TFL_OPERATOR(CONV_3D)
+TFL_OPERATOR(IMAG)
+TFL_OPERATOR(REAL)
+TFL_OPERATOR(COMPLEX_ABS)
+TFL_OPERATOR(HASHTABLE)
+TFL_OPERATOR(HASHTABLE_FIND)
+TFL_OPERATOR(HASHTABLE_IMPORT)
+TFL_OPERATOR(HASHTABLE_SIZE)
+TFL_OPERATOR(REDUCE_ALL)
+TFL_OPERATOR(CONV_3D_TRANSPOSE)
+TFL_OPERATOR(VAR_HANDLE)
+TFL_OPERATOR(READ_VARIABLE)
+TFL_OPERATOR(ASSIGN_VARIABLE)
+TFL_OPERATOR(BROADCAST_ARGS)
+TFL_OPERATOR(RANDOM_STANDARD_NORMAL)
+TFL_OPERATOR(GELU)
diff --git a/compiler/tfts/CMakeLists.txt b/compiler/tfts/CMakeLists.txt
index eda39b6ef..2dac9445b 100644
--- a/compiler/tfts/CMakeLists.txt
+++ b/compiler/tfts/CMakeLists.txt
@@ -1,9 +1,8 @@
 nncc_find_resource(TensorFlowTests)
-nnas_find_package(TensorFlow QUIET)
 
-if(NOT TensorFlow_FOUND)
+if(NOT TARGET nnkit_tf_backend)
   return()
-endif(NOT TensorFlow_FOUND)
+endif(NOT TARGET nnkit_tf_backend)
 
 if(NOT TARGET tfkit)
   return()
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index be4398996..c5c9be25f 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x0000000000090001)
+  set(VCONONE_VERSION 0x0000000000190001)
   # NOTE order is [build patch minor major]
   # if VCONONE_VERSION is set with -D option, it will be cached
   # you may have to remove cache file if you remove -D option
diff --git a/compiler/vconone/src/version.cpp b/compiler/vconone/src/version.cpp
index 9b693c621..fd34e9204 100644
--- a/compiler/vconone/src/version.cpp
+++ b/compiler/vconone/src/version.cpp
@@ -54,7 +54,7 @@ std::string get_string(void)
 std::string get_copyright(void)
 {
   std::string str;
-  str = "Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
+  str = "Copyright (c) 2020-2023 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
   str += "Licensed under the Apache License, Version 2.0\r\n";
   str += "https://github.com/Samsung/ONE";
   return str;
diff --git a/compiler/visq-unittest/CMakeLists.txt b/compiler/visq-unittest/CMakeLists.txt
new file mode 100644
index 000000000..7cc0bc040
--- /dev/null
+++ b/compiler/visq-unittest/CMakeLists.txt
@@ -0,0 +1,58 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+unset(VISQ_TEST_DEPS)
+
+###
+### Copy test files
+###
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/test
+                   COMMAND ${CMAKE_COMMAND} -E copy_directory
+                   ${CMAKE_CURRENT_SOURCE_DIR}/test ${CMAKE_CURRENT_BINARY_DIR}/test)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/test)
+
+###
+### Import visqlib module
+###
+get_target_property(VISQ_BIN_PATH visq BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/visqlib
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${VISQ_BIN_PATH}/visqlib ${CMAKE_CURRENT_BINARY_DIR}/visqlib)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/visqlib)
+
+###
+### Import pics module
+###
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+###
+### Generate Resources.py
+###
+set(RESOURCE_FILE "${CMAKE_CURRENT_BINARY_DIR}/test/Resources.py")
+
+get_target_property(FP32_MODEL_DIR testDataGenerator BINARY_DIR)
+
+add_custom_command(
+  OUTPUT ${RESOURCE_FILE}
+  COMMAND ${CMAKE_COMMAND} -E echo 'fp32_model_dir=\"${FP32_MODEL_DIR}\"' >> ${RESOURCE_FILE}
+  COMMENT "Generate file to specify resource location"
+)
+
+list(APPEND VISQ_TEST_DEPS ${RESOURCE_FILE})
+
+add_custom_target(visq_unittest ALL DEPENDS ${VISQ_TEST_DEPS})
+
+# Use Python in venv to run unittest with pydot module
+add_test(
+  NAME visq_unittest
+  COMMAND ${NNCC_OVERLAY_DIR}/venv_2_12_1/bin/python -m unittest
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+)
diff --git a/compiler/visq-unittest/README.md b/compiler/visq-unittest/README.md
new file mode 100644
index 000000000..e90837b4e
--- /dev/null
+++ b/compiler/visq-unittest/README.md
@@ -0,0 +1,3 @@
+# visq-unittest
+
+_visq-unittest_ is a module to test visq
diff --git a/compiler/visq-unittest/requires.cmake b/compiler/visq-unittest/requires.cmake
new file mode 100644
index 000000000..bf7a41fcd
--- /dev/null
+++ b/compiler/visq-unittest/requires.cmake
@@ -0,0 +1,3 @@
+require("pics")
+require("common-artifacts")
+require("visq")
diff --git a/compiler/visq-unittest/test/__init__.py b/compiler/visq-unittest/test/__init__.py
new file mode 100644
index 000000000..0c29109f0
--- /dev/null
+++ b/compiler/visq-unittest/test/__init__.py
@@ -0,0 +1 @@
+# DO NOT REMOVE THIS FILE
diff --git a/compiler/visq-unittest/test/testDotBuilder.py b/compiler/visq-unittest/test/testDotBuilder.py
new file mode 100644
index 000000000..d0e6adaae
--- /dev/null
+++ b/compiler/visq-unittest/test/testDotBuilder.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test visqlib.DotBuilder module"""
+
+import unittest
+import pydot
+
+from visqlib.DotBuilder import DotBuilder
+from test.Resources import fp32_model_dir
+
+
+class VisqDotBuilderTest(unittest.TestCase):
+    def test_dot_builder_wrong_input_file(self):
+        self.assertRaises(FileNotFoundError, DotBuilder, "wrong", "wrong", "wrong",
+                          "wrong")
+
+    def test_dot_builder(self):
+        test_colors = [{"b": 0, "e": 0.5, "c": "green"}, {"b": 0.5, "e": 1, "c": "red"}]
+        test_qerror_map = dict()
+        test_qerror_map["ofm"] = 0.1
+        builder = DotBuilder(fp32_model_dir + "/Add_000.circle", "Add_000.dot", "MPEIR",
+                             test_colors)
+        builder.save(test_qerror_map)
+
+        graph = pydot.graph_from_dot_file("Add_000.dot")[0]
+        # Why 1? 0 is output
+        ofm_node = graph.get_node("\"ofm\"")[1]
+        self.assertEqual("green", ofm_node.get_fillcolor())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/compiler/visq-unittest/test/testPalette.py b/compiler/visq-unittest/test/testPalette.py
new file mode 100644
index 000000000..bf5fbb42e
--- /dev/null
+++ b/compiler/visq-unittest/test/testPalette.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.Palette module'''
+
+import unittest
+
+from visqlib.Palette import YLORRD9Palette
+
+
+class VisqPaletteTest(unittest.TestCase):
+    def test_ylorrd9(self):
+        min_test = [0.0, 0, -100, -100]
+        max_test = [1.0, 500, 100, -10]
+
+        for min_val, max_val in zip(min_test, max_test):
+            palette = YLORRD9Palette(qerror_min=min_val, qerror_max=max_val)
+            cs = palette.colorscheme()
+            self.assertEqual(9, len(cs))
+
+    def test_ylorrd9_wrong_minmax(self):
+        min_test = [0.0, 10]
+        max_test = [0.0, 0]
+
+        for min_val, max_val in zip(min_test, max_test):
+            # min must be less than max
+            self.assertRaises(
+                RuntimeError, YLORRD9Palette, qerror_min=min_val, qerror_max=max_val)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/compiler/visq-unittest/test/testQErrorComputer.py b/compiler/visq-unittest/test/testQErrorComputer.py
new file mode 100644
index 000000000..3065b7171
--- /dev/null
+++ b/compiler/visq-unittest/test/testQErrorComputer.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.QErrorComputer module'''
+
+import unittest
+import tempfile
+import numpy as np
+import os
+import json
+
+from visqlib.QErrorComputer import MPEIRComputer
+from visqlib.QErrorComputer import MSEComputer
+from visqlib.QErrorComputer import TAEComputer
+from visqlib.QErrorComputer import SRMSEComputer
+
+
+class VisqQErrorComputerTest(unittest.TestCase):
+    def setUp(self):
+        "Called before running each test"
+        self.fp32_dir = tempfile.TemporaryDirectory()
+        self.fq_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        "Called after running each test"
+        self.fp32_dir.cleanup()
+        self.fq_dir.cleanup()
+
+    def _setUpSingleTensorData(self):
+        tensor_id = {}
+        tensor_id['test'] = 0
+        with open(self.fp32_dir.name + '/tensors.json', 'w') as f:
+            json.dump(tensor_id, f)
+        with open(self.fq_dir.name + '/tensors.json', 'w') as f:
+            json.dump(tensor_id, f)
+        scales = {}
+        scales['test'] = 2.0
+        with open(self.fq_dir.name + '/scales.txt', 'w') as f:
+            json.dump(scales, f)
+        os.mkdir(self.fp32_dir.name + '/0')
+        os.mkdir(self.fq_dir.name + '/0')
+        test_data = np.zeros(16)
+        np.save(self.fp32_dir.name + '/0/0.npy', test_data)
+        np.save(self.fq_dir.name + '/0/0.npy', test_data)
+
+    def _setUpTwoTensorData(self):
+        tensor_id = {}
+        tensor_id['test'] = 0
+        with open(self.fp32_dir.name + '/tensors.json', 'w') as f:
+            json.dump(tensor_id, f)
+        with open(self.fq_dir.name + '/tensors.json', 'w') as f:
+            json.dump(tensor_id, f)
+        scales = {}
+        scales['test'] = 2.0
+        with open(self.fq_dir.name + '/scales.txt', 'w') as f:
+            json.dump(scales, f)
+        os.mkdir(self.fp32_dir.name + '/0')
+        os.mkdir(self.fp32_dir.name + '/1')
+        os.mkdir(self.fq_dir.name + '/0')
+        os.mkdir(self.fq_dir.name + '/1')
+        test_data_one = np.ones(16)
+        test_data_zero = np.zeros(16)
+        np.save(self.fp32_dir.name + '/0/0.npy', test_data_one)
+        np.save(self.fp32_dir.name + '/1/0.npy', test_data_zero)
+        np.save(self.fq_dir.name + '/0/0.npy', test_data_zero)
+        np.save(self.fq_dir.name + '/1/0.npy', test_data_zero)
+        # Golden: (1 + 0) / 2 = 0.5 for MSE
+
+    def _setUpDifferentTensorData(self):
+        # Two fp32 data (test, test2)
+        # One fq data (test)
+        # NOTE When does this happen?
+        # This case can happen because visq ignores nodes that do not affect qerrors.
+        # For example, RESHAPE Op does not affect qerrors, so its fq data is not dumped,
+        # although it is listed in 'tensors.json'.
+        tensor_id = {}
+        tensor_id['test'] = 0
+        tensor_id['test2'] = 1
+        with open(self.fp32_dir.name + '/tensors.json', 'w') as f:
+            json.dump(tensor_id, f)
+        with open(self.fq_dir.name + '/tensors.json', 'w') as f:
+            json.dump(tensor_id, f)
+        scales = {}
+        scales['test'] = 2.0
+        scales['test2'] = 1.0
+        with open(self.fq_dir.name + '/scales.txt', 'w') as f:
+            json.dump(scales, f)
+        os.mkdir(self.fp32_dir.name + '/0')
+        os.mkdir(self.fq_dir.name + '/0')
+        test_data = np.zeros(16)
+        np.save(self.fp32_dir.name + '/0/0.npy', test_data)
+        np.save(self.fp32_dir.name + '/0/1.npy', test_data)
+        np.save(self.fq_dir.name + '/0/0.npy', test_data)
+
+    def test_MPEIR(self):
+        self._setUpSingleTensorData()
+
+        computer = MPEIRComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, _, _ = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+
+    def test_MPEIR_different_tensors(self):
+        self._setUpDifferentTensorData()
+
+        computer = MPEIRComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, _, _ = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+
+    def test_MSE(self):
+        self._setUpSingleTensorData()
+
+        computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(0.0, qmax)
+
+    def test_MSE_two(self):
+        self._setUpTwoTensorData()
+
+        computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.5, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(1.0, qmax)
+
+    def test_MSE_different_tensors(self):
+        self._setUpDifferentTensorData()
+
+        computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(0.0, qmax)
+
+    def test_TAE(self):
+        self._setUpSingleTensorData()
+
+        computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+
+    def test_TAE_different_options(self):
+        self._setUpDifferentTensorData()
+
+        computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(0.0, qmax)
+
+    def test_TAE_two(self):
+        self._setUpTwoTensorData()
+        computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(8.0, qmap['test'])
+        self.assertAlmostEqual(16.0, qmax)
+
+    def test_SRMSE(self):
+        self._setUpSingleTensorData()
+
+        computer = SRMSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(0.0, qmax)
+
+    def test_SRMSE_different_options(self):
+        self._setUpDifferentTensorData()
+
+        computer = SRMSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(0.0, qmax)
+
+    def test_SRMSE_two(self):
+        self._setUpTwoTensorData()
+        computer = SRMSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        # Golden: sqrt(Golden of MSE) / scale = sqrt(0.5) / 2
+        self.assertAlmostEqual(np.sqrt(0.5) / 2, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(np.sqrt(0.5) / 2, qmax)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/compiler/visq-unittest/test/testUtil.py b/compiler/visq-unittest/test/testUtil.py
new file mode 100644
index 000000000..51f6eb98c
--- /dev/null
+++ b/compiler/visq-unittest/test/testUtil.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.Util module'''
+
+import unittest
+
+from visqlib.Util import to_filename
+from visqlib.Util import valid_attr
+from visqlib.Util import pretty_float
+
+
+class VisqUtilTest(unittest.TestCase):
+    def test_to_filename(self):
+        data = 'abc/d/e'
+        self.assertEqual('abc_d_e', to_filename(data))
+
+        long_data = 'x' * 300
+        self.assertEqual('x' * 255, to_filename(long_data))
+
+    def test_valid_attr(self):
+        class Test:
+            def __init__(self):
+                self.a = 'a'
+
+        test = Test()
+        self.assertTrue(valid_attr(test, 'a'))
+        self.assertFalse(valid_attr(test, 'b'))
+
+    def test_pretty_float(self):
+        test_configs = [0.123456, 12.3456, [0.123456], {'test': [0.123456]}]
+        three_digits_ans = [0.123, 12.346, [0.123], {'test': [0.123]}]
+        for test_data, ans in zip(test_configs, three_digits_ans):
+            res = pretty_float(test_data, ndigits=3)
+            self.assertEqual(res, ans)
+
+        test_configs = [0.123456, 12.3456, [0.123456], {'test': [0.123456]}]
+        four_digits_ans = [0.1235, 12.3456, [0.1235], {'test': [0.1235]}]
+        for test_data, ans in zip(test_configs, four_digits_ans):
+            res = pretty_float(test_data, ndigits=4)
+            self.assertEqual(res, ans)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/compiler/visq/CMakeLists.txt b/compiler/visq/CMakeLists.txt
new file mode 100644
index 000000000..6d13a7eca
--- /dev/null
+++ b/compiler/visq/CMakeLists.txt
@@ -0,0 +1,67 @@
+unset(VISQ_DEPS)
+
+###
+### Set up visq executable
+###
+set(VISQ_FILE "visq")
+set(VISQ_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${VISQ_FILE}")
+set(VISQ_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_FILE}")
+
+add_custom_command(OUTPUT ${VISQ_BIN}
+  COMMAND ${CMAKE_COMMAND} -E copy "${VISQ_SRC}" "${VISQ_BIN}"
+  DEPENDS ${VISQ_SRC}
+  COMMENT "Generate ${VISQ_BIN}"
+)
+
+list(APPEND VISQ_DEPS ${VISQ_BIN})
+
+###
+### Set up visqlib directory
+###
+set(VISQ_PYTHON_DIR "visqlib")
+set(VISQ_PYTHON_DIR_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_PYTHON_DIR}")
+
+add_custom_command(OUTPUT ${VISQ_PYTHON_DIR_BIN}
+  COMMAND ${CMAKE_COMMAND} -E make_directory "${VISQ_PYTHON_DIR_BIN}"
+  COMMENT "Generate ${VISQ_PYTHON_DIR_BIN}"
+)
+
+list(APPEND VISQ_DEPS ${VISQ_PYTHON_DIR_BIN})
+
+###
+### Set up Python files
+###
+set(VISQ_PYTHON_FILES DumpFakeQuantFM.py
+                      DumpFP32FM.py
+                      Palette.py
+                      QErrorComputer.py
+                      DotBuilder.py
+                      Util.py)
+
+foreach(VISQ_PYTHON_FILE IN ITEMS ${VISQ_PYTHON_FILES})
+  set(VISQ_PYTHON_FILE_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${VISQ_PYTHON_DIR}/${VISQ_PYTHON_FILE}")
+  set(VISQ_PYTHON_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_PYTHON_DIR}/${VISQ_PYTHON_FILE}")
+
+  add_custom_command(OUTPUT ${VISQ_PYTHON_FILE_BIN}
+    COMMAND ${CMAKE_COMMAND} -E copy "${VISQ_PYTHON_FILE_SRC}" "${VISQ_PYTHON_FILE_BIN}"
+    DEPENDS ${VISQ_PYTHON_FILE_SRC}
+    COMMENT "Generate ${VISQ_PYTHON_FILE_BIN}"
+  )
+
+  list(APPEND VISQ_DEPS ${VISQ_PYTHON_FILE_BIN})
+
+endforeach(VISQ_PYTHON_FILE)
+
+add_custom_target(visq ALL DEPENDS ${VISQ_DEPS})
+
+install(FILES ${VISQ_FILE}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION bin)
+
+install(DIRECTORY ${VISQ_PYTHON_DIR}
+        FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+                          GROUP_READ
+                          WORLD_READ
+        DESTINATION bin)
diff --git a/compiler/visq/README.md b/compiler/visq/README.md
new file mode 100644
index 000000000..0d0a83881
--- /dev/null
+++ b/compiler/visq/README.md
@@ -0,0 +1,32 @@
+# visq
+
+_visq_ is a module to generate a json file used to visualize layer-wise quantization errors
+(https://github.com/Samsung/ONE/issues/9694).
+
+## Example
+```bash
+$ ./visq --fp32_circle sample.circle \
+  --q_circle sample.q.circle \
+  --data test.h5 \
+  --mpeir_output sample.mpeir.visq.json \
+  --mse_output sample.mse.visq.json \
+  --tae_output sample.tae.visq.json \
+  --dump_dot_graph
+```
+
+The above command will generate
+- `sample.mpeir.visq.json`: Json file that contains layer-wise mpeir.
+- `sample.mse.visq.json`: Json file that conatins layer-wise mse.
+- `sample.mpeir.visq.json.dot`: Dot graph for layer-wise mpeir.
+- `sample.tae.visq.json.dot`: Dot graph for layer-wise tae.
+- `sample.mse.visq.json.dot`: Dot graph for layer-wise mse.
+
+## Quantization error metrics
+
+f: Result of fp32 model
+q: Result of quantized model
+
+- MPEIR: Mean Peak Error to Interval Ratio = Average(max(|f - q|) / (max(f) - min(f) + epsilon))
+epsilon: 1e-6
+- MSE: Mean Squared Error = Average(square(f - q))
+- TAE: Total Absolute Error = Sum(|f - q|)
diff --git a/compiler/visq/requires.cmake b/compiler/visq/requires.cmake
new file mode 100644
index 000000000..fdf32c605
--- /dev/null
+++ b/compiler/visq/requires.cmake
@@ -0,0 +1,2 @@
+require("dalgona")
+require("circle-quantizer")
diff --git a/compiler/visq/visq b/compiler/visq/visq
new file mode 100644
index 000000000..6c3b94d11
--- /dev/null
+++ b/compiler/visq/visq
@@ -0,0 +1,382 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import subprocess
+import tempfile
+import json
+import os
+import math
+import sys
+
+import h5py as h5
+import numpy as np
+
+from shutil import copyfile
+from pathlib import Path
+
+from visqlib.Palette import YLORRD9Palette
+from visqlib.QErrorComputer import MPEIRComputer, MSEComputer, TAEComputer, SRMSEComputer
+from visqlib.Util import valid_attr, pretty_float
+from visqlib.DotBuilder import DotBuilder
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='Command line tool to visualize layer-wise quantization errors')
+    parser.add_argument(
+        "-f",
+        "--fp32_circle",
+        type=str,
+        help="Path to the fp32 circle model.",
+        required=True)
+    parser.add_argument(
+        "-q",
+        "--q_circle",
+        type=str,
+        help="Path to the quantized circle model.",
+        required=True)
+    parser.add_argument(
+        "-d",
+        "--data",
+        type=str,
+        help=
+        "Path to the data used for inference. Random data will be used if this option is not given.",
+        required=False)
+    parser.add_argument(
+        "--mpeir_output",
+        type=str,
+        help="Path to the output json file (qerror metric = MPEIR).",
+        required=False)
+    parser.add_argument(
+        "--mse_output",
+        type=str,
+        help="Path to the output json file (qerror metric = MSE).",
+        required=False)
+    parser.add_argument(
+        "--tae_output",
+        type=str,
+        help="Path to the output json file (qerror metric = TAE).",
+        required=False)
+    parser.add_argument(
+        "--srmse_output",
+        type=str,
+        help="Path to the output json file (qerror metric = SRMSE).",
+        required=False)
+    parser.add_argument(
+        "--dump_dot_graph", action="store_true", help="Dump dot graph.", required=False)
+    parser.add_argument(
+        "-b",
+        "--batch_size",
+        type=int,
+        help="Batch size to process large datasets.",
+        required=False)
+
+    return parser
+
+
+def _verify_args(args):
+    """Verify the given arguments"""
+
+    valid_outputs = ['mpeir_output', 'mse_output', 'tae_output', 'srmse_output']
+
+    # Check if at least one output option is given
+    num_outputs = 0
+    for output_name in valid_outputs:
+        if valid_attr(args, output_name):
+            num_outputs += 1
+
+    if num_outputs == 0:
+        raise RuntimeError("At least one output should be given.")
+
+
+def _run_dalgona(model, data, analysis, save_dir):
+    dir_path = Path(__file__).parent.resolve()
+    dalgona_path = os.path.join(dir_path, 'dalgona')
+    cmd = [dalgona_path]
+    cmd += ['--input_model', str(model)]
+    cmd += ['--analysis', str(analysis)]
+    if data != None:
+        cmd += ['--input_data', str(data)]
+    cmd += ['--analysis_args', str(save_dir)]
+
+    try:
+        subprocess.run(cmd, capture_output=True, check=True, universal_newlines=True)
+    except subprocess.CalledProcessError as e:
+        print('Error raised while running the below command')
+        print(' '.join(cmd))
+        print(e.stderr)
+        raise
+
+
+# Generate h5 file that contains a dataset of a single batch
+# This is for batch execution of visq
+def gen_batch_h5(inputs_data, inputs_path, rawData):
+    # Create h5 file
+    output_path = inputs_path + "/inputs.h5"
+    h5_file = h5.File(output_path, 'w')
+    group = h5_file.create_group("value")
+    group.attrs['desc'] = "Input data"
+    if rawData:
+        group.attrs['rawData'] = '1'
+
+    for i in range(len(inputs_data)):
+        sample = group.create_group(str(i))
+        for j in range(len(inputs_data[i])):
+            sample.create_dataset(str(j), data=inputs_data[i][j])
+
+    h5_file.close()
+    return output_path
+
+
+# Aggregate intermediate results for a given data
+def advance_on_data(fp32_model, fq_model, data, computers):
+
+    curr_dir = Path(__file__).parent.resolve()
+    dump_fp32_py = curr_dir / 'visqlib' / 'DumpFP32FM.py'
+    dump_fq_py = curr_dir / 'visqlib' / 'DumpFakeQuantFM.py'
+
+    with tempfile.TemporaryDirectory() as fp32_dir, \
+         tempfile.TemporaryDirectory() as fq_dir:
+
+        _run_dalgona(fp32_model, data, dump_fp32_py, fp32_dir)
+        copyfile(fp32_dir + '/tensors.json', fq_dir + '/tensors.json')
+        _run_dalgona(fq_model, data, dump_fq_py, fq_dir)
+
+        for metric_key in computers:
+            computers[metric_key][0].advance_on(fp32_dir, fq_dir)
+
+
+def _run_batch(fp32_model, fq_model, data, computers, batch_size):
+    with tempfile.TemporaryDirectory() as inputs_dir:
+        with h5.File(data, 'r') as f:
+            dataset = f['value']
+            rawData = dataset.attrs.__contains__(
+                'rawData') and dataset.attrs['rawData'] == '1'
+            inputs = []
+            for data_index in dataset:
+                cur_inputs = []
+                for input_index in dataset[data_index]:
+                    d = dataset[data_index][input_index][:]
+                    cur_inputs.append(d)
+
+                inputs.append(cur_inputs)
+                if len(inputs) >= batch_size:
+                    input_path = gen_batch_h5(inputs, inputs_dir, rawData)
+                    advance_on_data(fp32_model, fq_model, input_path, computers)
+                    inputs = []
+
+            if len(inputs) > 0:
+                input_path = gen_batch_h5(inputs, inputs_dir, rawData)
+                advance_on_data(fp32_model, fq_model, input_path, computers)
+
+
+def _fake_quantize(input_model, output_model):
+    dir_path = Path(__file__).parent.resolve()
+    circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+    cmd = [circle_quantizer_path]
+    cmd += ['--fake_quantize']
+    cmd += [str(input_model)]
+    cmd += [str(output_model)]
+
+    try:
+        subprocess.run(cmd, check=True, universal_newlines=True)
+    except subprocess.CalledProcessError as e:
+        print('Error raised while running the below command')
+        print(' '.join(cmd))
+        print(e.stderr)
+        raise
+
+
+# Recursively visit items and check if there is Infinity or NaN
+def _check_float(item):
+    if isinstance(item, dict):
+        for v in item.values():
+            _check_float(v)
+    if isinstance(item, list):
+        for v in item:
+            _check_float(v)
+    if isinstance(item, float):
+        if item == -float('inf') or item == float('inf'):
+            raise RuntimeError('Infinite value detected. Value must be float')
+        if math.isnan(item):
+            raise RuntimeError('NaN value detected. Value must be float')
+
+
+def _build_json(model, metric, colorscheme, error):
+    # model: string
+    # metric: string
+    # colorscheme: list ['b': begin, 'e': end, 'c':color]
+    # error: dict {tensor_name:error}
+
+    meta = {}
+    meta["model"] = model
+    meta["metric"] = metric
+    meta["colorscheme"] = pretty_float(colorscheme)
+    result = {}
+    result["meta"] = meta
+    # Why list? To support multiple subgraphs
+    result["error"] = [pretty_float(error)]
+
+    # Invariants
+    _check_float(meta["colorscheme"])
+    _check_float(result["error"])
+    return result
+
+
+def _save_dot(circle_path: str, dot_path: str, metric: str, colors: list, qerror: dict):
+    # circle_path: Path to the circle model (required to build graph)
+    # dot_path: Path to the output dot file
+    # metric: Metric name (ex: MPEIR, MSE)
+    # colors: list [{'b': begin, 'e': end, 'c':color}, ..]
+    # qerror: dict {tensor_name (str) -> qerror (float)}
+    builder = DotBuilder(
+        circle_path=circle_path, dot_path=dot_path, metric=metric, colors=colors)
+
+    builder.save(qerror)
+
+
+def run_on_data_batchwise(fp32_model, q_model, data, dump_dot_graph, computers,
+                          batch_size):
+
+    with tempfile.TemporaryDirectory() as model_dir:
+        fq_model = model_dir + '/fq_model.circle'
+
+        # Step 1. Fake quantize quantized circle model
+        _fake_quantize(q_model, fq_model)
+
+        # process the whole dataset batch by batch
+        _run_batch(fp32_model, fq_model, data, computers, batch_size)
+
+        #compute the final results
+        for metric_key in computers:
+            cur_computer = computers[metric_key][0]
+            output = computers[metric_key][1]
+            qerror_map, q_min, q_max = cur_computer.get_final_result()
+
+            palette = YLORRD9Palette(qerror_min=q_min, qerror_max=q_max)
+            result = _build_json(
+                metric=metric_key,
+                model=Path(fp32_model).name,
+                colorscheme=palette.colorscheme(),
+                error=qerror_map)
+            with open(output, "w") as f:
+                json.dump(result, f)
+
+            if dump_dot_graph:
+                _save_dot(
+                    circle_path=fp32_model,
+                    dot_path=output + '.dot',
+                    metric=metric_key,
+                    colors=palette.colorscheme(),
+                    qerror=qerror_map)
+
+
+def run_on_data(fp32_model, q_model, data, dump_dot_graph, computers):
+    curr_dir = Path(__file__).parent.resolve()
+    dump_fp32_py = curr_dir / 'visqlib' / 'DumpFP32FM.py'
+    dump_fq_py = curr_dir / 'visqlib' / 'DumpFakeQuantFM.py'
+
+    with tempfile.TemporaryDirectory() as model_dir, \
+         tempfile.TemporaryDirectory() as fp32_dir, \
+         tempfile.TemporaryDirectory() as fq_dir:
+        fq_model = model_dir + '/fq_model.circle'
+
+        # Step 1. Fake quantize quantized circle model
+        _fake_quantize(q_model, fq_model)
+
+        # Step 2. Run dalgona to dump intermediate FMs in FP32 model
+        _run_dalgona(fp32_model, data, dump_fp32_py, fp32_dir)
+
+        # Copy list of dumped tensors
+        copyfile(fp32_dir + '/tensors.json', fq_dir + '/tensors.json')
+
+        # Step 3. Run dalgona to dump intermediate FMs in fq model
+        _run_dalgona(fq_model, data, dump_fq_py, fq_dir)
+
+        # Step 4. Read results and compute qerror
+        for metric_key in computers:
+            cur_computer = computers[metric_key][0]
+            output = computers[metric_key][1]
+            cur_computer.advance_on(fp32_dir, fq_dir)
+            qerror_map, q_min, q_max = cur_computer.get_final_result()
+
+            palette = YLORRD9Palette(qerror_min=q_min, qerror_max=q_max)
+            result = _build_json(
+                metric=metric_key,
+                model=Path(fp32_model).name,
+                colorscheme=palette.colorscheme(),
+                error=qerror_map)
+            with open(output, "w") as f:
+                json.dump(result, f)
+
+            if dump_dot_graph:
+                _save_dot(
+                    circle_path=fp32_model,
+                    dot_path=output + '.dot',
+                    metric=metric_key,
+                    colors=palette.colorscheme(),
+                    qerror=qerror_map)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = parser.parse_args()
+    _verify_args(args)
+
+    fp32_model = args.fp32_circle
+    q_model = args.q_circle
+    data = None
+    if valid_attr(args, 'data'):
+        data = args.data
+    dump_dot_graph = args.dump_dot_graph
+    batch_size = None
+    if valid_attr(args, 'batch_size'):
+        batch_size = args.batch_size
+
+    computers = {}
+    if args.mpeir_output:
+        computers['MPEIR'] = (MPEIRComputer(None, None), args.mpeir_output)
+
+    if args.mse_output:
+        computers['MSE'] = (MSEComputer(None, None), args.mse_output)
+
+    if args.tae_output:
+        computers['TAE'] = (TAEComputer(None, None), args.tae_output)
+
+    if args.srmse_output:
+        computers['SRMSE'] = (SRMSEComputer(None, None), args.srmse_output)
+
+    if batch_size == None:
+        run_on_data(fp32_model, q_model, data, dump_dot_graph, computers)
+    else:
+        run_on_data_batchwise(fp32_model, q_model, data, dump_dot_graph, computers,
+                              batch_size)
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except Exception as e:
+        prog_name = os.path.basename(__file__)
+        print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+        sys.exit(255)
diff --git a/compiler/visq/visqlib/DotBuilder.py b/compiler/visq/visqlib/DotBuilder.py
new file mode 100644
index 000000000..a6afb966c
--- /dev/null
+++ b/compiler/visq/visqlib/DotBuilder.py
@@ -0,0 +1,165 @@
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pydot
+import math
+
+from circle import Model
+
+from pathlib import Path
+
+
+# Return the name of the tensor
+def _tensor_name(graph, tid):
+    return graph.Tensors(tid).Name().decode('utf-8')
+
+
+# Return double-quoted string
+def _quote(string: str):
+    return '"' + string + '"'
+
+
+# Class to build dot graph from qerror_map
+class DotBuilder:
+    def __init__(self, circle_path: str, dot_path: str, metric: str, colors: str):
+        '''
+        circle_path: Path to the fp32 circle model (required to build graph)
+        dot_path: Path to the saved dot file
+        metric: Metric name (ex: MPEIR, MSE)
+        colors: List of color slots [{'b': begin, 'e': end, 'c':color}, ..]
+        '''
+        with open(circle_path, 'rb') as f:
+            self._model = Model.Model.GetRootAsModel(f.read())
+
+        if self._model.SubgraphsLength() != 1:
+            raise RuntimeError("Only one subgraph is supported")
+
+        self._name = Path(circle_path).name
+        self._dot_path = dot_path
+        self._metric = metric
+        self._colors = colors
+
+    # Return color (RGB) for the given qerror
+    def _get_color(self, qerror: float):
+        # Find a slot where qerror is in the range of [begin, end]
+        for slot in self._colors:
+            begin = slot['b']
+            end = slot['e']
+            if (qerror > begin or math.isclose(
+                    qerror, begin)) and (qerror < end or math.isclose(qerror, end)):
+                return slot['c']
+
+        # Use the first color if qerror is smaller than the first begin
+        if qerror < self._colors[0]['b']:
+            return self._colors[0]['c']
+
+        # Use the last color if qerror is larger than the last end
+        if qerror > self._colors[-1]['e']:
+            return self._colors[-1]['c']
+
+        raise RuntimeError("Color ID not found. QError: " + str(qerror))
+
+    # Generate a pydot.Node object which represents the color table
+    def _gen_color_table(self):
+        color_table = "< <table>"
+        for slot in self._colors:
+            begin = slot['b']
+            end = slot['e']
+            color = slot['c']
+            color_table += "<tr> <td bgcolor=\""
+            color_table += color
+            color_table += "\">"
+            color_table += self._metric + ": {:.4f}".format(
+                begin) + " ~ " + "{:.4f}".format(end)
+            color_table += "</td> </tr>"
+        color_table += "</table> >"
+        return pydot.Node("color_table", shape='none', label=color_table)
+
+    # Save dot graph to self._dot_path
+    def save(self, qerror_map: dict):
+        '''
+        qerror_map: Dictionary of {op_name (str) -> qerror (float)}
+        '''
+        # Build graph
+        DOT = pydot.Dot(self._name, graph_type="digraph")
+
+        # Add color table
+        DOT.add_node(self._gen_color_table())
+
+        # Dictionary from output tensor name to Op name {str -> str}
+        # This dict is for handling Ops with multiple output tensors.
+        # We use the first output tensor's name as the Op name, following
+        # the implementation of luci IR
+        output_to_op = dict()
+
+        graph = self._model.Subgraphs(0)
+
+        # Add Input nodes
+        for i in range(graph.InputsLength()):
+            name = _tensor_name(graph, graph.Inputs(i))
+            output_to_op[name] = name
+            DOT.add_node(pydot.Node(_quote(name)))
+
+        # Add Output nodes
+        for i in range(graph.OutputsLength()):
+            name = _tensor_name(graph, graph.Outputs(i))
+            output_to_op[name] = name
+            DOT.add_node(pydot.Node(_quote(name)))
+
+        # Add Edges
+        for i in range(graph.OperatorsLength()):
+            op = graph.Operators(i)
+            # Name of the first output tensor
+            op_name = _tensor_name(graph, op.Outputs(0))
+            if op.OutputsLength() == 0:
+                print(op_name)
+                continue
+
+            if op_name in qerror_map:
+                qerror = qerror_map[op_name]
+                node = pydot.Node(
+                    _quote(op_name),
+                    style="filled",
+                    fillcolor=self._get_color(qerror),
+                    xlabel=self._metric + ": {:.4f}".format(qerror))
+            else:
+                # qerror_map does not have qerror info for the op. Color gray.
+                # When this happen? visq does not collect qerror info of some Ops
+                # For example, Reshape Op does not change values, so its qerror
+                # info is not collected.
+                node = pydot.Node(_quote(op_name), style="filled", fillcolor='gray')
+
+            DOT.add_node(node)
+
+            for output_idx in range(op.OutputsLength()):
+                output_name = _tensor_name(graph, op.Outputs(output_idx))
+                # Set Op name as the first output tensor name (op_name)
+                output_to_op[output_name] = op_name
+
+            for j in range(op.InputsLength()):
+                op_input = op.Inputs(j)
+
+                # Optional input case (ex: For TConv with no bias, bias is -1)
+                if op_input == -1:
+                    continue
+
+                op_input_name = _tensor_name(graph, op_input)
+                if op_input_name not in output_to_op:
+                    continue
+
+                # Use the saved name to handle multiple outputs
+                op_input_name = output_to_op[op_input_name]
+                DOT.add_edge(pydot.Edge(_quote(op_input_name), _quote(op_name)))
+
+        DOT.write(self._dot_path)
diff --git a/compiler/visq/visqlib/DumpFP32FM.py b/compiler/visq/visqlib/DumpFP32FM.py
new file mode 100644
index 000000000..14dc900c5
--- /dev/null
+++ b/compiler/visq/visqlib/DumpFP32FM.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script that dumps FM of FP32 model
+# NOTE This script runs on dalgona
+
+import numpy as np
+import json
+
+from pathlib import Path
+
+
+# Dump FP32 model's intermediate FM data and their names
+#
+# Before
+# self._dir/
+#
+# After
+# self._dir/
+#  tensors.json
+#  <TENSOR_ID>.npy
+# NOTE tensors.json has a dictionary {TENSOR_NAME -> TENSOR_ID}
+class DumpFP32FM:
+    def StartAnalysis(self, args):
+        self._dir = Path(args)
+        self._num_data = 0
+        # Dict {tensor_name -> tid}
+        self._tname_to_tid = dict()
+        self._tensor_count = 0
+
+    def EndNetworkExecution(self, outputs):
+        self._num_data += 1
+
+    def DefaultOpPost(self, name, opcode, inputs, outputs):
+        # Save intermediate FM into <tid>.npy
+        data_path = self._dir / str(self._num_data)
+        data_path.mkdir(parents=False, exist_ok=True)
+        for output in outputs:
+            name = output['name']
+            data = output['data']
+            if name in self._tname_to_tid:
+                tid = self._tname_to_tid[name]
+            else:
+                tid = self._tensor_count
+                self._tname_to_tid[name] = tid
+                self._tensor_count += 1
+
+            np.save(str(data_path / str(tid)), data)
+
+    def EndAnalysis(self):
+        # Save tensor name : tensor id pairs
+        with open(self._dir / 'tensors.json', 'w') as f:
+            json.dump(self._tname_to_tid, f, indent=2)
diff --git a/compiler/visq/visqlib/DumpFakeQuantFM.py b/compiler/visq/visqlib/DumpFakeQuantFM.py
new file mode 100644
index 000000000..b8dde3831
--- /dev/null
+++ b/compiler/visq/visqlib/DumpFakeQuantFM.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script that dumps dequantized FM
+# NOTE This script runs on dalgona
+
+import numpy as np
+import json
+
+from pathlib import Path
+
+# Fake-quantized Op has the postfix of fq_postfix
+# TODO Remove coupling with fake quantization codes
+fq_postfix = '_FQ_Quantize_FQ_Dequantize'
+
+
+# Return the original name before fake quantization
+# Return None if name is not from fake quantization (Dequantize Op in original model)
+# TODO Handle the case when the original node's name contains fq_postfix
+def _name_before_fq(name):
+    if not name.endswith(fq_postfix):
+        return None
+
+    return name[0:name.find(fq_postfix)]
+
+
+# Dump fake-quantized model's intermediate FM data according to tensors.txt
+#
+# Before
+# self._dir/
+#  tensors.json
+#
+# After
+# self._dir/
+#  tensors.json
+#  <TENSOR_ID>.npy
+# NOTE tensors.json has a dictionary {TENSOR_NAME -> TENSOR_ID}
+class DumpFakeQuantFM:
+    def StartAnalysis(self, args):
+        self._dir = Path(args)
+        self._num_data = 0
+        with open(self._dir / 'tensors.json') as f:
+            self._tname_to_tid = json.load(f)
+        self._scale_map = {}
+
+    def EndNetworkExecution(self, outputs: list):
+        self._num_data += 1
+
+    # TODO Use DequantizePost when dalgona supports it
+    def DefaultOpPost(self, name, opcode, inputs, outputs):
+        if opcode == 'Dequantize':
+            for output in outputs:
+                name = output['name']
+                data = output['data']
+                orig_name = _name_before_fq(name)
+                if orig_name in self._tname_to_tid:
+                    tid = self._tname_to_tid[orig_name]
+                    data_path = self._dir / str(self._num_data)
+                    data_path.mkdir(parents=False, exist_ok=True)
+                    np.save(str(data_path / str(tid)), data)
+                    # Save scales (scale is fixed, so saving once)
+                    if orig_name not in self._scale_map:
+                        assert len(inputs) == 1
+                        assert 'quantparam' in inputs[0]
+                        assert 'scale' in inputs[0]['quantparam']
+                        assert len(inputs[0]['quantparam']['scale']) == 1
+                        scale = inputs[0]['quantparam']['scale'][0]
+                        self._scale_map[orig_name] = scale
+
+    def EndAnalysis(self):
+        # Dump saved scales into scales.txt
+        with open(self._dir / 'scales.txt', 'w') as f:
+            json.dump(self._scale_map, f)
diff --git a/compiler/visq/visqlib/Palette.py b/compiler/visq/visqlib/Palette.py
new file mode 100644
index 000000000..9df72232d
--- /dev/null
+++ b/compiler/visq/visqlib/Palette.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Class to save colorscheme
+class Palette:
+    # Child class must implement __init__ to fill the below members
+    def __init__(self):
+        # Element of self._slots has [lower bound, upper bound] of qerrors to decide a color
+        self._slots = []
+        # Element of self._colors has rgb values in string format
+        self._colors = []
+        raise NotImplementedError('Child class must implement __init__')
+
+    # Return color scheme as a list of objects
+    # Each object has the following attributes
+    # b: begin qerror
+    # e: end qerror
+    # c: color (in RGB string)
+    def colorscheme(self):
+        cs = []
+        for slot, color in zip(self._slots, self._colors):
+            cs.append({"b": slot[0], "e": slot[1], "c": color})
+        return cs
+
+
+# Ranges of slots are defined by qerror_min/qerror_max
+# Each slot has a uniform range
+# For example, if qerror_min = 0.0, qerror_max = 1.0, number of colors = 10
+# Ranges of slots will be as follows.
+# [0.0, 0.1], [0.1, 0.2], [0.2, 0.3] ... [0.8, 0.9], [0.9, 1.0]
+class UniformPalette(Palette):
+    def __init__(self, qerror_min, qerror_max, colors):
+        self._colors = colors
+        self._slots = []
+        qerror_range = qerror_max - qerror_min
+        num_colors = len(self._colors)
+        for i in range(num_colors):
+            lower_bound = qerror_min + i * (qerror_range / num_colors)
+            upper_bound = qerror_min + (i + 1) * (qerror_range / num_colors)
+
+            self._slots.append([lower_bound, upper_bound])
+
+        # Invariant
+        assert len(self._slots) == num_colors
+
+
+# Palette for ylorrd9 colorscheme
+class YLORRD9Palette(UniformPalette):
+    def __init__(self, qerror_min, qerror_max):
+        if qerror_min >= qerror_max:
+            raise RuntimeError('min must be less than max')
+
+        # From https://colorbrewer2.org/#type=sequential&scheme=YlOrRd&n=9
+        colors = [
+            "#ffffcc", "#ffeda0", "#fed976", "#feb24c", "#fd8d3c", "#fc4e2a", "#e31a1c",
+            "#bd0026", "#800026"
+        ]
+        super().__init__(qerror_min, qerror_max, colors)
diff --git a/compiler/visq/visqlib/QErrorComputer.py b/compiler/visq/visqlib/QErrorComputer.py
new file mode 100644
index 000000000..46ba3e703
--- /dev/null
+++ b/compiler/visq/visqlib/QErrorComputer.py
@@ -0,0 +1,254 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+import numpy as np
+import json
+
+from pathlib import Path
+from collections import defaultdict
+
+
+class QErrorComputer:
+    def __init__(self, fp32_dir, fq_dir):
+        self._fp32_dir = fp32_dir
+        self._fq_dir = fq_dir
+        self.qerror_map = defaultdict(float)
+        self._num_processed_data = 0
+
+    def collect_data_path(self, fp32_dir, fq_dir):
+        # Assumption: FM data are saved as follows
+        #
+        # fp32_dir/
+        #   tensors.json
+        #   <DATA_INDEX>/
+        #     <TENSOR_ID>.npy
+        #
+        # fq_dir/
+        #   tensors.json
+        #   <DATA_INDEX>/
+        #     <TENSOR_ID>.npy
+        #
+        # NOTE tensors.json has a dictionary {TENSOR_NAME -> TENSOR_ID}
+        self._num_data = len(list(filter(os.path.isdir, glob.glob(fp32_dir + '/*'))))
+        if self._num_data != len(list(filter(os.path.isdir, glob.glob(fq_dir + '/*')))):
+            raise RuntimeError("Number of data mistmatches")
+
+        self._num_processed_data += self._num_data
+
+        self._tid_to_tname = dict()  # {tensor id -> tensor name}
+        with open(Path(fp32_dir) / 'tensors.json') as f:
+            tname_to_tid = json.load(f)
+
+        for tname, tid in tname_to_tid.items():
+            self._tid_to_tname[tid] = tname
+
+        # Save paths to fp32 data and fq data for each tensor
+        # dict
+        # {
+        #   <tensor_name>: (fp32_path, fq_path),
+        #   <tensor_name>: (fp32_path, fq_path),
+        #   ...
+        # }
+        data_paths = dict()
+        for data_idx in range(self._num_data):
+            fp32_results = glob.glob(fp32_dir + '/' + str(data_idx) + '/*.npy')
+            for fp32_data_path in fp32_results:
+                fp32_path = Path(fp32_data_path)
+                fq_data_path = fq_dir + '/' + str(data_idx) + '/' + fp32_path.with_suffix(
+                    '.npy').name
+                fq_path = Path(fq_data_path)
+                tid = int(fp32_path.stem)
+                tensor_name = self._tid_to_tname[tid]
+
+                # Only save the tensors which have both fp32 data and fq data
+                if fq_path.is_file() and fp32_path.is_file():
+                    if tensor_name in data_paths:
+                        data_paths[tensor_name].append((fp32_data_path, fq_data_path))
+                    else:
+                        data_paths[tensor_name] = [(fp32_data_path, fq_data_path)]
+
+        return data_paths
+
+    def run(self):
+        '''Return qerror map (dict: tensor_name(string) -> qerror(float)).'''
+        raise NotImplementedError  # Child must implement this
+
+
+class MPEIRComputer(QErrorComputer):
+    def __init__(self, fp32_dir, fq_dir):
+        super().__init__(fp32_dir, fq_dir)
+
+    # Incrementally compute Qerror while traversing all data in fp32_dir and fq_dir
+    def advance_on(self, fp32_dir, fq_dir):
+        data_paths = self.collect_data_path(fp32_dir, fq_dir)
+        for tensor_name, data_path in data_paths.items():
+            for (fp32_data_path, fq_data_path) in data_path:
+                fp32_data = np.load(fp32_data_path)
+                fq_data = np.load(fq_data_path)
+
+                diff = np.absolute(fp32_data - fq_data).reshape(-1)
+
+                fp32_min = np.min(fp32_data.reshape(-1))
+                fp32_max = np.max(fp32_data.reshape(-1))
+
+                # Peak Error-to-Interval Ratio (PEIR)
+                # NOTE: PEIR is an analogue of PSNR (Peak Signal to Noise Ratio)
+                PEAK_ERROR = np.max(diff)
+                INTERVAL = fp32_max - fp32_min
+
+                # If INTERVAL is 0, PEIR becomes NaN.
+                # To prevent this, relaxed PEIR with epsilon(10^(-6)) is used.
+                rPEIR = PEAK_ERROR / (INTERVAL + 0.000001)
+
+                self.qerror_map[tensor_name] += rPEIR
+
+    # Return
+    # qerror_map (dict: tensor_name(string) -> qerror(float))
+    # qerror_min (float)
+    # qerror_max (float)
+    def get_final_result(self):
+        qerror_map = dict()
+        for tensor_name, acc in self.qerror_map.items():
+            qerror_map[tensor_name] = acc / self._num_processed_data
+
+        # Fixed qerror_min (0), qerror_max (1)
+        return qerror_map, 0.0, 1.0
+
+    def run(self):
+        self.advance_on(self._fp32_dir, self._fq_dir)
+        return self.get_final_result()
+
+
+class MSEComputer(QErrorComputer):
+    def __init__(self, fp32_dir, fq_dir):
+        super().__init__(fp32_dir, fq_dir)
+        self.qerror_min = float('inf')
+        self.qerror_max = -self.qerror_min
+
+    # Incrementally compute Qerror while traversing all data in fp32_dir and fq_dir
+    def advance_on(self, fp32_dir, fq_dir):
+        data_paths = self.collect_data_path(fp32_dir, fq_dir)
+        for tensor_name, data_path in data_paths.items():
+            for (fp32_data_path, fq_data_path) in data_path:
+                fp32_data = np.load(fp32_data_path)
+                fq_data = np.load(fq_data_path)
+
+                MSE = np.square(fp32_data - fq_data).mean()
+
+                self.qerror_map[tensor_name] += MSE
+
+                self.qerror_min = min(MSE, self.qerror_min)
+                self.qerror_max = max(MSE, self.qerror_max)
+
+    # Return
+    # qerror_map (dict: tensor_name(string) -> qerror(float))
+    # qerror_min (float)
+    # qerror_max (float)
+    def get_final_result(self):
+        qerror_map = dict()
+        for tensor_name, acc in self.qerror_map.items():
+            qerror_map[tensor_name] = acc / self._num_processed_data
+
+        return qerror_map, self.qerror_min, self.qerror_max
+
+    def run(self):
+        self.advance_on(self._fp32_dir, self._fq_dir)
+        return self.get_final_result()
+
+
+class TAEComputer(QErrorComputer):  #total absolute error
+    def __init__(self, fp32_dir, fq_dir):
+        super().__init__(fp32_dir, fq_dir)
+        self.total_error = 0
+        self.qerror_min = float('inf')
+        self.qerror_max = -self.qerror_min
+
+    def advance_on(self, fp32_dir, fq_dir):
+        data_paths = self.collect_data_path(fp32_dir, fq_dir)
+        for tensor_name, data_path in data_paths.items():
+            for (fp32_data_path, fq_data_path) in data_path:
+                fp32_data = np.load(fp32_data_path)
+                fq_data = np.load(fq_data_path)
+
+                total_error = np.sum(np.abs(fp32_data - fq_data))
+
+                self.qerror_map[tensor_name] += total_error
+
+                self.qerror_min = min(total_error, self.qerror_min)
+                self.qerror_max = max(total_error, self.qerror_max)
+
+    # Return
+    # qerror_map (dict: tensor_name(string) -> qerror(float))
+    # qerror_min (float)
+    # qerror_max (float)
+    def get_final_result(self):
+        qerror_map = dict()
+        for tensor_name, acc in self.qerror_map.items():
+            qerror_map[tensor_name] = acc / self._num_processed_data
+        return qerror_map, self.qerror_min, self.qerror_max
+
+    def run(self):
+        self.advance_on(self._fp32_dir, self._fq_dir)
+        return self.get_final_result()
+
+
+# Scaled Root Mean Square Error (SRMSE)
+# SRMSE = sqrt(MSE) / scale
+class SRMSEComputer(QErrorComputer):
+    def __init__(self, fp32_dir, fq_dir):
+        super().__init__(fp32_dir, fq_dir)
+        if fq_dir != None:
+            self.scale_file = Path(fq_dir) / 'scales.txt'
+
+    # Incrementally compute Qerror while traversing all data in fp32_dir and fq_dir
+    def advance_on(self, fp32_dir, fq_dir):
+        if fq_dir != None:
+            self.scale_file = Path(fq_dir) / 'scales.txt'
+            self._fq_dir = fq_dir
+
+        data_paths = self.collect_data_path(fp32_dir, fq_dir)
+
+        for tensor_name, data_path in data_paths.items():
+            for (fp32_data_path, fq_data_path) in data_path:
+                fp32_data = np.load(fp32_data_path)
+                fq_data = np.load(fq_data_path)
+
+                MSE = np.square(fp32_data - fq_data).mean()
+
+                self.qerror_map[tensor_name] += MSE
+
+    # Return
+    # qerror_map (dict: tensor_name(string) -> qerror(float))
+    # qerror_min (float)
+    # qerror_max (float)
+    def get_final_result(self):
+        with open(self.scale_file) as f:
+            # scale_map: {tensor_name(str) -> scale(float)}
+            scale_map = json.load(f)
+
+        qerror_max = 0.0
+        qerror_map = dict()
+        for tensor_name, acc in self.qerror_map.items():
+            MSE = acc / self._num_processed_data
+            SRMSE = np.sqrt(MSE) / scale_map[tensor_name]
+            qerror_map[tensor_name] = SRMSE
+            qerror_max = max(SRMSE, qerror_max)
+
+        return qerror_map, 0.0, qerror_max
+
+    def run(self):
+        self.advance_on(self._fp32_dir, self._fq_dir)
+        return self.get_final_result()
diff --git a/compiler/visq/visqlib/Util.py b/compiler/visq/visqlib/Util.py
new file mode 100644
index 000000000..f5b6c9a1d
--- /dev/null
+++ b/compiler/visq/visqlib/Util.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Change tensor name into the one compatible with Linux file system
+# '/' is replaced with '_'
+# Too long name is sliced to 255 characters
+def to_filename(tensor_name):
+    assert isinstance(tensor_name, str)
+    return tensor_name.replace('/', '_')[-255:]
+
+
+# Check if attr is valid
+def valid_attr(args, attr):
+    return hasattr(args, attr) and getattr(args, attr)
+
+
+# Recursively visit items and round floats with ndigits
+def pretty_float(item, ndigits=4):
+    if isinstance(item, dict):
+        return {k: pretty_float(v, ndigits) for k, v in item.items()}
+    if isinstance(item, list):
+        return [pretty_float(x, ndigits) for x in item]
+    if isinstance(item, float):
+        return round(item, ndigits)
+    return item
diff --git a/compute/ARMComputeEx/CMakeLists.txt b/compute/ARMComputeEx/CMakeLists.txt
index 58f558db2..c8d12c249 100644
--- a/compute/ARMComputeEx/CMakeLists.txt
+++ b/compute/ARMComputeEx/CMakeLists.txt
@@ -14,7 +14,7 @@ file(GLOB_RECURSE ACL_EX_SRCS "${ACL_EX_BASE}/*.cpp")
 # generate embeded cl_kernel
 execute_process (
     WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
-    COMMAND bash -c "python resolve_includes.py"
+    COMMAND bash -c "python3 resolve_includes.py"
 )
 
 add_library(arm_compute_ex SHARED ${ACL_EX_SRCS})
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
index d29886a9d..d3e116381 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
@@ -255,14 +255,14 @@ private:
   cl::Device _device;       /**< Underlying CL device. */
   std::string _kernel_path; /**< Path to the kernels folder. */
   mutable std::map<std::string, const Program>
-      _programs_map; /**< Map with all already loaded program data. */
+    _programs_map; /**< Map with all already loaded program data. */
   mutable std::map<std::string, cl::Program>
-      _built_programs_map; /**< Map with all already built program data. */
+    _built_programs_map; /**< Map with all already built program data. */
   static const std::map<std::string, std::string>
-      _kernel_program_map; /**< Map that associates kernel names with programs. */
+    _kernel_program_map; /**< Map that associates kernel names with programs. */
   static const std::map<std::string, std::string>
-      _program_source_map; /**< Contains sources for all programs.
-                                Used for compile-time kernel inclusion. >*/
+    _program_source_map; /**< Contains sources for all programs.
+                           Used for compile-time kernel inclusion. >*/
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h
new file mode 100644
index 000000000..46d4ae858
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H
+#define ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H
+
+#include "src/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the reduction operation kernel
+ *
+ * @note The default data type for an uninitialized output tensor is
+ *       signed 32-bit integer (S32). It is the user's responsibility to check
+ *       that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
+ */
+class CLArgMinMaxLayerKernelEx : public ICLKernel
+{
+public:
+  /** Default constructor */
+  CLArgMinMaxLayerKernelEx();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLArgMinMaxLayerKernelEx(const CLArgMinMaxLayerKernelEx &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLArgMinMaxLayerKernelEx &operator=(const CLArgMinMaxLayerKernelEx &) = delete;
+  /** Allow instances of this class to be moved */
+  CLArgMinMaxLayerKernelEx(CLArgMinMaxLayerKernelEx &&) = default;
+  /** Allow instances of this class to be moved */
+  CLArgMinMaxLayerKernelEx &operator=(CLArgMinMaxLayerKernelEx &&) = default;
+  /** Default destructor */
+  ~CLArgMinMaxLayerKernelEx() = default;
+
+  /** Set the input and output tensors.
+   *
+   * @param[in]  input       Source tensor. Data types supported: S32/F16/F32.
+   * @param[in]  prev_output Destination tensor of the previous iterations of @ref
+   * CLArgMinMaxLayerKernelEx. Data types supported: U32/S32
+   *                         Has to be nullptr for the first iteration
+   * @param[out] output      Destination tensor. Data types supported: U32/S32
+   *                         Output will have the same number of dimensions as input.
+   * @param[in]  axis        Axis along which to reduce. Supported reduction axis : 0,1,2,3
+   * @param[in]  op          Reduction operation to perform. Only ArgMin and ArgMax are supported.
+   */
+  void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output,
+                 unsigned int axis, ReductionOperation op);
+
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLArgMinMaxLayerKernelEx.
+   *
+   * @param[in] input       Source tensor info. Data types supported: S32/F16/F32.
+   * @param[in] prev_output Destination tensor info of the previous iterations. Data types
+   * supported: U32/S32
+   *                        Has to be nullptr for the first iteration
+   * @param[in] output      Destination tensor info. Data types supported: U32/S32
+   *                        Output will have the same number of dimensions as input.
+   * @param[in] axis        Axis along which to reduce. Supported reduction axis : 0,1,2,3
+   * @param[in] op          Reduction operation to perform.  Only ArgMin and ArgMax are supported.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output,
+                         const ITensorInfo *output, unsigned int axis, ReductionOperation op);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  const ICLTensor *_input;
+  const ICLTensor *_prev_output;
+  ICLTensor *_output;
+  unsigned int _reduction_axis;
+  ReductionOperation _op;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
index bb6fcb8f5..eac866b67 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
@@ -41,8 +41,8 @@
 #ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
 #define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
 #include "arm_compute/core/TypesEx.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h
new file mode 100644
index 000000000..cf671102e
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file      CLCastBoolKernel.h
+ * @ingroup   COM_AI_RUNTIME
+ * @brief     This file defines CLCastBoolKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLCASTBOOLKERNEL_H__
+#define __ARM_COMPUTE_CLCASTBOOLKERNEL_H__
+
+#include "src/core/CL/ICLSimple3DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class for the kernel converting boolean type
+ */
+class CLCastBoolKernel : public ICLSimple3DKernel
+{
+public:
+  /**
+   * @brief Initialise the kernel's input and output.
+   * @param[in]  input  Input tensor. Data types supported: U8
+   * @param[in]  output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+   * @return N/A
+   */
+  void configure(const ICLTensor *input, ICLTensor *output);
+
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLCastBoolKernel
+   *
+   * @param[in] input  Source tensor info. Data types supported: U8.
+   * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLCASTBOOLKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
index a614d5259..6729fb0f1 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
@@ -47,15 +47,15 @@
 #ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
 #define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
 class ICLTensor;
 
 /**
-* @brief Class to perform EmbeddingLookup operation with opencl kernel
-*/
+ * @brief Class to perform EmbeddingLookup operation with opencl kernel
+ */
 class CLEmbeddingLookupKernel : public ICLKernel
 {
 public:
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
new file mode 100644
index 000000000..64908ab59
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H
+#define ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface to add a bias to each row of the input tensor
+ *
+ */
+class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel
+{
+public:
+  /** Default constructor */
+  CLGEMMMatrixAccumulateBiasesKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLGEMMMatrixAccumulateBiasesKernel &
+  operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
+  /** Allow instances of this class to be moved */
+  CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
+  /** Allow instances of this class to be moved */
+  CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
+  /** Set the accumulate buffer and the biases of the kernel.
+   *
+   * @param[in, out] accum  The accumulate tensor to convert. Data types supported: F16/F32
+   * @param[in]      biases The shared biases tensor to append. It must be 1D tensor. Data types
+   * supported: Same as @p input
+   */
+  void configure(ICLTensor *accum, const ICLTensor *biases);
+  /** Set the accumulate buffer and the biases of the kernel.
+   *
+   * @param[in]      compile_context The compile context to be used.
+   * @param[in, out] accum           The accumulate tensor to convert. Data types supported: F16/F32
+   * @param[in]      biases          The shared biases tensor to append. It must be 1D tensor. Data
+   * types supported: Same as @p input
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *accum,
+                 const ICLTensor *biases);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLGEMMMatrixAccumulateBiasesKernel
+   *
+   * @param[in] accum      The accumulate tensor to convert. Data types supported: F16/F32
+   * @param[in] biases     The shared biases tensor to append. It must be 1D tensor. Data types
+   * supported: Same as @p input
+   * @param[in] gpu_target GPU target
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  ICLTensor *_accum;
+  const ICLTensor *_biases;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMMatrixAccumulateBiasesKernel_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
index 6630c7be7..a55f2401d 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
@@ -47,7 +47,7 @@
 #ifndef __ARM_COMPUTE_CLGATHEREXKERNEL_H__
 #define __ARM_COMPUTE_CLGATHEREXKERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
index 99cfa61ec..f9d6f7cc5 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
@@ -47,7 +47,7 @@
 #ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
 #define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 
 namespace arm_compute
@@ -55,8 +55,8 @@ namespace arm_compute
 class ICLTensor;
 
 /**
-* @brief Class to perform HashtableLookup operation with opencl kernel
-*/
+ * @brief Class to perform HashtableLookup operation with opencl kernel
+ */
 class CLHashtableLookupKernel : public ICLKernel
 {
 public:
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
index f57e799ad..7da9e9a4c 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
 #define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMemsetKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMemsetKernel.h
new file mode 100644
index 000000000..4befdd05c
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMemsetKernel.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H
+#define ARM_COMPUTE_CLMEMSETKERNEL_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for filling the planes of a tensor */
+class CLMemsetKernel : public ICLKernel
+{
+public:
+  /** Default constructor */
+  CLMemsetKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLMemsetKernel(const CLMemsetKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLMemsetKernel &operator=(const CLMemsetKernel &) = delete;
+  /** Allow instances of this class to be moved */
+  CLMemsetKernel(CLMemsetKernel &&) = default;
+  /** Allow instances of this class to be moved */
+  CLMemsetKernel &operator=(CLMemsetKernel &&) = default;
+  /** Default destructor */
+  ~CLMemsetKernel() = default;
+
+  /** Initialise the kernel's tensor and filling value
+   *
+   * @param[in,out] tensor         Input tensor to fill. Supported data types: All.
+   * @param[in]     constant_value The value used to fill the planes of the tensor
+   * @param[in]     window         Window to be used in case setting only part of a tensor. Default
+   * is nullptr.
+   */
+  void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
+  /** Initialise the kernel's tensor and filling value
+   *
+   * @param[in]     compile_context The compile context to be used.
+   * @param[in,out] tensor          Input tensor to fill. Supported data types: All.
+   * @param[in]     constant_value  The value used to fill the planes of the tensor
+   * @param[in]     window          Window to be used in case setting only part of a tensor. Default
+   * is nullptr.
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *tensor,
+                 const PixelValue &constant_value, Window *window = nullptr);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLMemsetKernel
+   *
+   * @param[in] tensor         Source tensor info. Data types supported: All.
+   * @param[in] constant_value The value used to fill the planes of the tensor
+   * @param[in] window         Window to be used in case setting only part of a tensor. Default is
+   * nullptr.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value,
+                         Window *window = nullptr);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  ICLTensor *_tensor;
+  Window _full_window;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h
index 90e8b5705..5394a062c 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__
 #define __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
index fa383c0d0..384050aff 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_CLNEGKERNEL_H__
 #define __ARM_COMPUTE_CLNEGKERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h
new file mode 100644
index 000000000..1d64f9f7d
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLONEHOTKERNEL_H__
+#define __ARM_COMPUTE_CLONEHOTKERNEL_H__
+#include "src/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+namespace arm_compute
+{
+class ICLTensor;
+/** Interface for the kernel to perform one-hot encoding*/
+class CLOneHotKernel : public ICLKernel
+{
+public:
+  /** Default constructor */
+  CLOneHotKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLOneHotKernel(const CLOneHotKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLOneHotKernel &operator=(const CLOneHotKernel &) = delete;
+  /** Allow instances of this class to be moved */
+  CLOneHotKernel(CLOneHotKernel &&) = default;
+  /** Allow instances of this class to be moved */
+  CLOneHotKernel &operator=(CLOneHotKernel &&) = default;
+  /** Default destructor */
+  ~CLOneHotKernel() = default;
+  /** Initialise the kernel's inputs and output
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+   * Same as @p on_value
+   * @param[out] output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  depth     The depth of the one hot dimension.
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * value must be in range [-indices.rank , indices.rank)
+   */
+  void configure(const ICLTensor *indices, const ICLTensor *on_value, const ICLTensor *off_value,
+                 ICLTensor *output, int depth, int axis = -1);
+  /** Initialise the kernel's inputs and output already initialized to off_value
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[out] output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  depth     The depth of the one hot dimension.
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * value must be in range [-indices.rank , indices.rank)
+   */
+  void configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output, int depth,
+                 int axis = -1);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLOneHotKernel
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+   * Same as @p on_value
+   * @param[in]  output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  depth     The depth of the one hot dimension.
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * value must be in range [-indices.rank , indices.rank)
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+                         const ITensorInfo *off_value, const ITensorInfo *output, int depth,
+                         int axis = -1);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLOneHotKernel without off_value
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  depth     The depth of the one hot dimension.
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * value must be in range [-indices.rank , indices.rank)
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+                         const ITensorInfo *output, int depth, int axis = -1);
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  /** Initialise the kernel's inputs and outputs internally
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[out] output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  depth     The depth of the one hot dimension.
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * value must be in range [-indices.rank , indices.rank)
+   */
+  void configure_common(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output,
+                        int depth, int axis);
+
+private:
+  const ICLTensor *_indices;   /**< Indices tensor */
+  const ICLTensor *_on_value;  /**< On value tensor */
+  const ICLTensor *_off_value; /**< Off value tensor */
+  ICLTensor *_output;          /**< Destination tensor */
+  bool _is_off_value_memset;   /**< Whether off_value is zero */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLONEHOTKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernelEx.h
new file mode 100644
index 000000000..d4230aaf3
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernelEx.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPADLAYERKERNELEX_H
+#define ARM_COMPUTE_CLPADLAYERKERNELEX_H
+
+#include "src/core/CL/ICLKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the PadLayer function. */
+class CLPadLayerKernelEx : public ICLKernel
+{
+public:
+  /** Default constructor */
+  CLPadLayerKernelEx();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLPadLayerKernelEx(const CLPadLayerKernelEx &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLPadLayerKernelEx &operator=(const CLPadLayerKernelEx &) = delete;
+  /** Allow instances of this class to be moved */
+  CLPadLayerKernelEx(CLPadLayerKernelEx &&) = default;
+  /** Allow instances of this class to be moved */
+  CLPadLayerKernelEx &operator=(CLPadLayerKernelEx &&) = default;
+  /** Default destructor */
+  ~CLPadLayerKernelEx() = default;
+  /** Set the input and output tensor.
+   *
+   * @param[in]  input          Source tensor. Data types supported: U8, S8, QASYMM8,
+   * QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+   * @param[out] output         Output tensor. Data type supported: same as @p input
+   * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair
+   * padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in]  constant_value (Optional) Constant value to be used for the padding.
+   * @param[in]  mode           (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+                 PixelValue constant_value = PixelValue(),
+                 PaddingMode mode = PaddingMode::CONSTANT);
+  /** Set the input and output tensor.
+   *
+   * @param[in]  compile_context The compile context to be used.
+   * @param[in]  input           Source tensor. Data types supported: All.
+   * @param[out] output          Output tensor. Data type supported: same as @p input
+   * @param[in]  padding         The padding for each spatial dimension of the input tensor. The
+   * pair padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in]  constant_value  (Optional) Constant value to be used for the padding.
+   * @param[in]  mode            (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+                 const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                 PaddingMode mode = PaddingMode::CONSTANT);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLPadLayerKernelEx
+   *
+   * @param[in] input          Source tensor info. Data types supported: U8, S8, QASYMM8,
+   * QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+   * @param[in] output         Output tensor info. Data type supported: same as @p input
+   * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair
+   * padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in] constant_value (Optional) Constant value to be used for the padding.
+   * @param[in] mode           (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+                         const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                         PaddingMode mode = PaddingMode::CONSTANT);
+
+  // Inherited methods overridden:
+  void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+  const ICLTensor *_input;
+  ICLTensor *_output;
+  int _input_start_x;
+  int _input_start_y;
+  bool _4d_enabled;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPADLAYERKERNELEX_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h
index 4e1b56cba..3f60db7bb 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__
 #define __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
index 9b8a239d3..548f29a27 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
@@ -47,8 +47,8 @@
 #ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
 #define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
+#include "src/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
 
 namespace arm_compute
 {
@@ -95,7 +95,7 @@ public:
    * @return N/A
    */
   void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis,
-                 ReduceOperation op);
+                 ReductionOperation op);
 
   /**
    * @brief Static function to check if given info will lead to a valid configuration of @ref
@@ -108,7 +108,7 @@ public:
    * @return a status
    */
   static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
-                         ReduceOperation op);
+                         ReductionOperation op);
 
   /*
    * @brief Run CLReduceOperationKernel op
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h
index 4d4478ece..5f5b7f9b8 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__
 #define __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
index aa4a14812..09073af7c 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
@@ -47,7 +47,7 @@
 #ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__
 #define __ARM_COMPUTE_CLTOPKV2KERNEL_H__
 
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
 
 // these parameters can be changed
 #define _ITEMS 16                          // number of items in a group
diff --git a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h
deleted file mode 100644
index 6e8bdc1c2..000000000
--- a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CPPONEHOTERNEL_H__
-#define __ARM_COMPUTE_CPPONEHOTERNEL_H__
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** CPP kernel to perform tensor OneHot operation. */
-class CPPOneHotKernelEx : public ICPPKernel
-{
-public:
-  const char *name() const override { return "CPPOneHotKernelEx"; }
-  /** Default constructor */
-  CPPOneHotKernelEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CPPOneHotKernelEx(const CPPOneHotKernelEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CPPOneHotKernelEx &operator=(const CPPOneHotKernelEx &) = delete;
-  /** Allow instances of this class to be moved */
-  CPPOneHotKernelEx(CPPOneHotKernelEx &&) = default;
-  /** Allow instances of this class to be moved */
-  CPPOneHotKernelEx &operator=(CPPOneHotKernelEx &&) = default;
-  /** Default destructor */
-  ~CPPOneHotKernelEx() = default;
-
-  /** Set the input and output of the kernel.
-   *
-   * @param[in]  indices     A tensor for indices. Data types supported: S32
-   * @param[in]  depth       A tensor for depth. Data types supported: S32
-   * @param[in]  on_value    A tensor for on_value. Data types supported: F32
-   * @param[in]  off_value   A tensor for off_value. Data types supported: F32*
-   * @param[out] output      A tensor for computed value of one hot operator
-   * @param[in]  axis        An int value for axis
-   */
-  void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
-                 const ITensor *off_value, ITensor *output, const int axis);
-
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * CPPOneHotKernelEx
-   *
-   * @param[in]  indices     A tensor for indices. Data types supported: S32
-   * @param[in]  depth       A tensor for depth. Data types supported: S32
-   * @param[in]  on_value    A tensor for on_value. Data types supported: F32
-   * @param[in]  off_value   A tensor for off_value. Data types supported: F32*
-   * @param[in]  axis        An int value for axis
-   *
-   * @return a status
-   */
-  static Status validate(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
-                         const ITensor *off_value, const int axis);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-  bool is_parallelisable() const override;
-
-private:
-  /** Template function to run the topKV operation. */
-  template <typename T> void run_one_hot();
-
-  const ITensor *_indices;
-  const ITensor *_depth;
-  const ITensor *_on_value;
-  const ITensor *_off_value;
-  ITensor *_output;
-  int _axis;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CPPONEHOTKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h
index 28114f8b5..933d8760d 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h
@@ -53,22 +53,6 @@ class QuantizationInfo;
 namespace arm_compute
 {
 
-float32x4x4_t load_quantized(const uint8_t *input1_ptr, const int32x4_t &offset,
-                             const float32x4_t &scale);
-
-void store_quantized(uint8_t *output_ptr, const float32x4x4_t &rf, const float32x4_t &offset,
-                     const float32x4_t &invscale);
-
-float32x4x4_t dup_quantized(uint8_t broadcast_value, int offset, float scale);
-
-void elementwise_op_quantized(
-    const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
-    uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo),
-    int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t,
-                          float32x4_t, float32x4_t, float32x4_t, const bool),
-    int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *, int32x4_t,
-                     int32x4_t, float32x4_t, float32x4_t, float32x4_t, float32x4_t));
-
 void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
                     float (*scalar_func)(const float &, const float &),
                     int (*broadcast_func)(int, int, int, const float *, const float &, float *,
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h
deleted file mode 100644
index a827f48f8..000000000
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__
-#define __ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the activation layer kernel. */
-class NEActivationLayerKernelEx : public INEKernel
-{
-public:
-  const char *name() const override { return "NEActivationLayerKernelEx"; }
-  /** Constructor */
-  NEActivationLayerKernelEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEActivationLayerKernelEx(const NEActivationLayerKernelEx &) = delete;
-  /** Default move constructor */
-  NEActivationLayerKernelEx(NEActivationLayerKernelEx &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEActivationLayerKernelEx &operator=(const NEActivationLayerKernelEx &) = delete;
-  /** Default move assignment operator */
-  NEActivationLayerKernelEx &operator=(NEActivationLayerKernelEx &&) = default;
-  /** Set the input and output tensor.
-   *
-   * @note If the output tensor is a nullptr, the activation function will be performed in-place
-   *
-   * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this
-   * tensor will store the result
-   *                                 of the activation function. Data types supported:
-   * QASYMM8/QSYMM16/F16/F32.
-   * @param[out]     output          Destination tensor. Data type supported: same as @p input
-   * @param[in]      activation_info Activation layer information.
-   */
-  void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEActivationLayerKernelEx
-   *
-   * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor
-   * will store the result
-   *                     of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
-   * @param[in] output   Destination tensor info. Data type supported: same as @p input
-   * @param[in] act_info Activation layer information.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         const ActivationLayerInfo &act_info);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-  using ActivationFunction = ActivationLayerInfo::ActivationFunction;
-  /** Common signature for all the specialised @ref NEActivationLayerKernelEx functions
-   *
-   * @param[in] window Region on which to execute the kernel.
-   */
-  using ActivationFunctionExecutorPtr = void (NEActivationLayerKernelEx::*)(const Window &window);
-  /** Function to apply an activation function on a tensor.
-   *
-   * @param[in] window Region on which to execute the kernel
-   */
-  template <ActivationLayerInfo::ActivationFunction F, typename T>
-  typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
-  activation(const Window &window);
-  /** Function to apply an activation function on a tensor.
-   *
-   * @param[in] window Region on which to execute the kernel
-   */
-  template <ActivationLayerInfo::ActivationFunction F, typename T>
-  typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type
-  activation(const Window &window);
-  /** Function to apply an activation function on a tensor.
-   *
-   * @param[in] window Region on which to execute the kernel
-   */
-  template <ActivationLayerInfo::ActivationFunction F, typename T>
-  typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type
-  activation(const Window &window);
-
-private:
-  ITensor *_input;
-  ITensor *_output;
-  ActivationFunctionExecutorPtr _func;
-  ActivationLayerInfo _act_info;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
index 8c544cda8..c46b26170 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
@@ -41,15 +41,19 @@
 #ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
 #define __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
 
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
 #include "arm_compute/core/TypesEx.h"
 
+#include "src/core/cpu/kernels/CpuElementwiseKernel.h"
+
 namespace arm_compute
 {
 
-class NEBinaryLogicalOperationKernel : public NEElementwiseOperationKernel
+class NEBinaryLogicalOperationKernel : public cpu::kernels::CpuComparisonKernel
 {
 public:
+  const char *name() const override { return "NEBinaryLogicalOperationKernel"; }
+
+  NEBinaryLogicalOperationKernel() = default;
   /** Default destructor */
   ~NEBinaryLogicalOperationKernel() = default;
 
@@ -81,6 +85,10 @@ protected:
   // Inherited methods overridden:
   static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
                                    const ITensorInfo &output);
+
+  std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output,
+                     const Window &window)>
+    _function;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h
new file mode 100644
index 000000000..036d56e69
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NECASTBOOLKERNEL_H__
+#define __ARM_COMPUTE_NECASTBOOLKERNEL_H__
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/**
+ * @brief Class for the kernel converting boolean type
+ */
+class NECastBoolKernel : public INEKernel
+{
+public:
+  const char *name() const override { return "NECastBoolKernel"; }
+  /** Default constructor*/
+  NECastBoolKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  NECastBoolKernel(const NECastBoolKernel &) = delete;
+  /** Default move constructor */
+  NECastBoolKernel(NECastBoolKernel &&) = default;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  NECastBoolKernel &operator=(const NECastBoolKernel &) = delete;
+  /** Default move assignment operator */
+  NECastBoolKernel &operator=(NECastBoolKernel &&) = default;
+  /** Set the input and output of the kernel
+   *
+   * Valid conversions Input -> Output :
+   *
+   *   - U8             -> U8, S8, U16, S16, U32, S32, F32, F16
+   *
+   * @param[in]  input  The input tensor to convert. Data types supported: U8
+   * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+   */
+  void configure(const ITensor *input, ITensor *output);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * NECastBoolKernel
+   *
+   * @param[in] input  Source tensor info. Data types supported: U8
+   * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+  // Inherited methods overridden:
+  void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+  const ITensor *_input;
+  ITensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NECASTBOOLKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
index 88f21c96e..621500eb8 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
 #define __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
 
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 
 namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h
new file mode 100644
index 000000000..f8f7ac567
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H
+#define ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H
+
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+/** NEON kernel to add a bias to each row of the input tensor */
+class NEGEMMMatrixAccumulateBiasesKernel : public INEKernel
+{
+public:
+  const char *name() const override { return "NEGEMMMatrixAccumulateBiasesKernel"; }
+  /** Default constructor */
+  NEGEMMMatrixAccumulateBiasesKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  NEGEMMMatrixAccumulateBiasesKernel(const NEGEMMMatrixAccumulateBiasesKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  NEGEMMMatrixAccumulateBiasesKernel &
+  operator=(const NEGEMMMatrixAccumulateBiasesKernel &) = delete;
+  /** Allow instances of this class to be moved */
+  NEGEMMMatrixAccumulateBiasesKernel(NEGEMMMatrixAccumulateBiasesKernel &&) = default;
+  /** Allow instances of this class to be moved */
+  NEGEMMMatrixAccumulateBiasesKernel &operator=(NEGEMMMatrixAccumulateBiasesKernel &&) = default;
+  /** Default destructor */
+  ~NEGEMMMatrixAccumulateBiasesKernel() = default;
+  /** Set the accumulate buffer and the biases of the kernel.
+   *
+   * @param[in, out] accum  The accumulate tensor to convert. Data type supported: F32
+   * @param[in]      biases The shared biases tensor to append. It must be 1D Tensor. Data type
+   * supported: Same as @p input
+   */
+  void configure(ITensor *accum, const ITensor *biases);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * NEGEMMMatrixAccumulateBiasesKernel
+   *
+   * @param[in] accum  The accumulate tensor to convert. Data type supported: F32
+   * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type
+   * supported: Same as @p input
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *accum, const ITensorInfo *biases);
+
+  // Inherited methods overridden:
+  void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+  ITensor *_accum;
+  const ITensor *_biases;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
index e765aa489..a03e08ade 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_NEGATHERKERNELEX_H__
 #define __ARM_COMPUTE_NEGATHERKERNELEX_H__
 
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 
 namespace arm_compute
@@ -126,6 +126,7 @@ private:
   const ITensor *_input;
   const ITensor *_indices;
   int _axis;
+  size_t _indices_rank;
   ITensor *_output;
   kernel_ptr _func;
 };
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
index cb2a485d5..fb3a72725 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
 #define __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
 
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 
 namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
index 8724cc69b..1d786b59e 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
 #define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
 
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
index 198b0be9d..ab534fe96 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
 #define __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
 
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h
new file mode 100644
index 000000000..c1c9f7a3c
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEONEHOTKERNEL_H__
+#define __ARM_COMPUTE_NEONEHOTKERNEL_H__
+#include "src/core/NEON/INEKernel.h"
+#include "arm_compute/core/Types.h"
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+/** Kernel to perform other operation on NEON */
+class NEOneHotKernel : public INEKernel
+{
+public:
+  /** Default constructor. */
+  NEOneHotKernel();
+  /** Prevent instances of this class from being copied (As this class contains pointers). */
+  NEOneHotKernel(const NEOneHotKernel &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers). */
+  NEOneHotKernel &operator=(const NEOneHotKernel &) = delete;
+  /** Allow instances of this class to be moved. */
+  NEOneHotKernel(NEOneHotKernel &&) = default;
+  /** Allow instances of this class to be moved. */
+  NEOneHotKernel &operator=(NEOneHotKernel &&) = default;
+  /** Default detructor */
+  ~NEOneHotKernel() = default;
+  /** Name of the kernel
+   *
+   * @return Kernel name
+   */
+  const char *name() const override { return "NEOneHotKernel"; }
+  /** Initialise the kernel's inputs and outputs
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   *                       following types: U32/S32
+   * @param[in]  depth     The tensor for depth of the one hot dimension.
+   *                       Supported tensor rank: up to 3.
+   *                       Must be one of the following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1.
+   *                       Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  off_value Off value tensor. Supported tensor rank: only 1.
+   *                       Data type supported: Same as @p on_value
+   * @param[out] output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around.
+   *                       Defaults to -1.
+   *                       The value must be in range [-indices.rank , indices.rank)
+   */
+  void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
+                 const ITensor *off_value, ITensor *output, int axis = -1);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * NEOneHotKernel
+   *
+   * @param[in]  indices   Indices tensor info. Supported tensor rank: up to 3.
+   *                       Must be one of the following types: U32/S32
+   * @param[in]  depth     The tensor info for depth of the one hot dimension.
+   *                       Supported tensor rank: up to 3.
+   *                       Must be one of the following types: U32/S32
+   * @param[in]  on_value  On value tensor info. Supported tensor rank: only 1.
+   *                       Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  off_value Off value tensor info. Supported tensor rank: only 1.
+   *                       Data type supported: Same as @p on_value
+   * @param[out] output    Destination tensor info. Data type supported: Same as @p on_value
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   *                       The value must be in range [-indices.rank , indices.rank)
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *indices, const ITensorInfo *depth,
+                         const ITensorInfo *on_value, const ITensorInfo *off_value,
+                         const ITensorInfo *output, int axis = -1);
+  // Inherited methods overridden:
+  void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+  /** Implementation of the onehot operation for 0 axis.
+   *
+   * For onehot on the 0 axis an element by element copy is performed.
+   *
+   * @param[in] window Region on which to execute the kernel. (Must be a region of the window
+   * returned by window())
+   * @param[in] info   Info about executing thread and CPU.
+   */
+  template <typename U> void onehot_0_axis(const Window &window, const ThreadInfo &info);
+  /** Implementation of the onehot operation.
+   *
+   * For 1<=axis a row-wise copy is taking place.
+   *
+   * @param[in] window Region on which to execute the kernel. (Must be a region of the window
+   * returned by window())
+   * @param[in] info   Info about executing thread and CPU.
+   */
+  template <typename U> void onehot_n_axis(const Window &window, const ThreadInfo &info);
+  using kernel_ptr = void (NEOneHotKernel::*)(const Window &window, const ThreadInfo &info);
+  const ITensor *_indices;
+  const ITensor *_depth;
+  const ITensor *_on_value;
+  const ITensor *_off_value;
+  int _axis;
+  ITensor *_output;
+  kernel_ptr _func;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEONEHOTKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
index 0b080cf73..1fd5362ae 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
@@ -41,7 +41,7 @@
 #ifndef __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
 #define __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
 
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "src/core/NEON/INEKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h
deleted file mode 100644
index c9024fbb3..000000000
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__
-#define __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a reduction operation */
-class NEReductionOperationKernelEx : public INEKernel
-{
-public:
-  const char *name() const override { return "NEReductionOperationKernelEx"; }
-  /** Default constructor */
-  NEReductionOperationKernelEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEReductionOperationKernelEx(const NEReductionOperationKernelEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEReductionOperationKernelEx &operator=(const NEReductionOperationKernelEx &) = delete;
-  /** Allow instances of this class to be moved */
-  NEReductionOperationKernelEx(NEReductionOperationKernelEx &&) = default;
-  /** Allow instances of this class to be moved */
-  NEReductionOperationKernelEx &operator=(NEReductionOperationKernelEx &&) = default;
-  /** Default destructor */
-  ~NEReductionOperationKernelEx() = default;
-
-  /** Set the source, destination of the kernel
-   *
-   * @param[in]  input  Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported:
-   * NCHW.
-   * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input.
-   *                    Output will have the same number of dimensions as input.
-   * @param[in]  axis   Axis along which to reduce. Supported reduction axis : 0
-   * @param[in]  op     Reduction operation to perform.
-   */
-  void configure(const ITensor *input, ITensor *output, unsigned int axis, ReduceOperation op);
-
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEReductionOperationKernelEx.
-   *
-   * @param[in] input  Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts
-   * supported: NCHW.
-   * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p
-   * input.
-   *                   Output will have the same number of dimensions as input.
-   * @param[in] axis   Axis along which to reduce. Supported reduction axis : 0
-   * @param[in] op     Reduction operation to perform.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis,
-                         ReduceOperation op);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-  BorderSize border_size() const override;
-
-private:
-  const ITensor *_input;
-  ITensor *_output;
-  unsigned int _reduction_axis;
-  ReduceOperation _op;
-  BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/TypesEx.h b/compute/ARMComputeEx/arm_compute/core/TypesEx.h
index faba8a449..cda8a30b1 100644
--- a/compute/ARMComputeEx/arm_compute/core/TypesEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/TypesEx.h
@@ -51,15 +51,6 @@ enum class ArgOperation
   MIN,
 };
 
-/** Available reduce operations */
-enum class ReduceOperation
-{
-  MAX,  /**< Max */
-  MEAN, /**< Mean */
-  SUM,  /**< Sum */
-  MIN,  /**< Min */
-};
-
 /** Available binary logical operations */
 enum class BinaryLogicalOperation
 {
diff --git a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
index d57e8fcf5..d7ec1b4f0 100644
--- a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
@@ -67,5 +67,5 @@ transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height,
                                 unsigned int kernel_width, unsigned int kernel_height,
                                 const PadStrideInfo &info, unsigned int invalid_right,
                                 unsigned int invalid_top);
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_UTILSEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
index a9ceacbea..2aaab6b3a 100644
--- a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
@@ -72,10 +72,10 @@ namespace shape_calculator
  * @return the calculated shape
  */
 inline TensorShape compute_transposeconv_upsampled_shape(
-    const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
-    std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right,
-    unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right,
-    unsigned int &pad_top, unsigned int &pad_bottom)
+  const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
+  std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right,
+  unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right,
+  unsigned int &pad_top, unsigned int &pad_bottom)
 {
   unsigned int sx = info.stride().first;
   unsigned int sy = info.stride().second;
@@ -103,7 +103,7 @@ inline TensorShape compute_transposeconv_upsampled_shape(
 
   unsigned int padx_all_except_invallid = padx + info.pad_left() + info.pad_right() - invalid_right;
   unsigned int pady_all_except_invallid =
-      pady + info.pad_top() + info.pad_bottom() - invalid_bottom;
+    pady + info.pad_top() + info.pad_bottom() - invalid_bottom;
   pad_left = (padx_all_except_invallid + 1) / 2 - info.pad_left();
   pad_right = pady_all_except_invallid / 2 - info.pad_right() + invalid_right;
   pad_top = (padx_all_except_invallid + 1) / 2 - info.pad_top();
@@ -135,7 +135,7 @@ compute_transposeconv_output_shape(const std::pair<unsigned int, unsigned int> &
   const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
   const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
   const int channel_idx =
-      get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+    get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
   const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
 
   TensorShape out_shape{input_shape};
@@ -160,7 +160,7 @@ inline TensorShape compute_depth_to_space_shape_ex(const ITensorInfo *input, int
   const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
   const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
   const int idx_channel =
-      get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+    get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
 
   TensorShape output_shape{input->tensor_shape()};
   output_shape.set(idx_width, input->dimension(idx_width) * block);
@@ -238,6 +238,36 @@ inline TensorShape compute_gather_shape_ex(const TensorShape &input_shape,
   return output_shape;
 }
 
+/** Calculate the gather output shape of a tensor
+ *
+ * @param[in] input_shape   Input tensor shape
+ * @param[in] indices_shape Indices tensor shape
+ * @param[in] actual_axis   The axis to be gathered
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_onehot_shape_ex(const TensorShape &indices_shape, uint32_t depth,
+                                           uint32_t actual_axis)
+{
+  ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 3);
+  ARM_COMPUTE_ERROR_ON(actual_axis > indices_shape.num_dimensions());
+
+  TensorShape output_shape;
+  output_shape.set(actual_axis, depth);
+
+  unsigned int i_shift = 0;
+  for (unsigned int i = 0; i < indices_shape.num_dimensions(); ++i)
+  {
+    if (i == actual_axis)
+    {
+      i_shift++;
+    }
+    output_shape.set(i + i_shift, indices_shape[i]);
+  }
+
+  return output_shape;
+}
+
 } // namespace shape_calculator
 } // namespace misc
 } // namespace arm_compute
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
index cfbd13436..664b8b3b1 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
@@ -16,14 +16,19 @@
 #ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__
 #define __ARM_COMPUTE_CLFUNCTIONSEX_H__
 
+#include <arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h>
 #include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
+#include <arm_compute/runtime/CL/functions/CLCastBool.h>
 #include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
 #include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h>
 #include <arm_compute/runtime/CL/functions/CLGatherEx.h>
 #include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
 #include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h>
 #include <arm_compute/runtime/CL/functions/CLNeg.h>
+#include <arm_compute/runtime/CL/functions/CLOneHot.h>
+#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h>
 #include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
+#include <arm_compute/runtime/CL/functions/CLSplitVEx.h>
 #include <arm_compute/runtime/CL/functions/CLTopKV2.h>
 #include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h>
 
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h
new file mode 100644
index 000000000..05bcc4075
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__
+#define __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__
+
+#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+namespace arm_compute
+{
+class ITensorInfo;
+class ICLTensor;
+
+/** Function to calculate the index of the minimum or maximum values in a
+ *  tensor based on an axis.
+ *
+ * @note The default data type for an uninitialized output tensor is
+ *       signed 32-bit integer (S32). It is the user's responsibility to check
+ *       that the results do not overflow because the indices are computed
+ *       in unsigned 32-bit (U32).
+ */
+class CLArgMinMaxLayerEx : public IFunction
+{
+public:
+  /** Default Constructor.
+   *
+   * @param[in] memory_manager (Optional) Memory manager.
+   */
+  CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+  /** Set the input and output tensors.
+   *
+   * @param[in]  input  Input source tensor. Data types supported: QASYMM8/F16/F32.
+   * @param[in]  axis   Axis to find max/min index.
+   * @param[out] output Output source tensor. Data types supported: U32/S32.
+   * @param[in]  op     Reduction operation to perform. Operations supported: ARG_IDX_MAX,
+   * ARG_IDX_MIN
+   */
+  void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLArgMinMaxLayerEx
+   *
+   * @param[in] input  Input source tensor info. Data types supported: QASYMM8/F16/F32.
+   * @param[in] axis   Axis to find max/min index.
+   * @param[in] output Output source tensor info. Data types supported: U32/S32.
+   * @param[in] op     Reduction operation to perform. Operations supported: ARG_IDX_MAX,
+   * ARG_IDX_MIN
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output,
+                         const ReductionOperation &op);
+
+  // Inherited methods overridden:
+  void run() override;
+
+private:
+  MemoryGroup _memory_group;
+  std::vector<CLTensor> _results_vector;
+  CLTensor _not_reshaped_output;
+  std::vector<CLArgMinMaxLayerKernelEx> _reduction_kernels_vector;
+  CLReshapeLayer _reshape_kernel;
+  unsigned int _num_of_stages;
+  unsigned int _reduction_axis;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
index 88a9b00ec..fc4322798 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
@@ -43,6 +43,7 @@
 
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
 #include "arm_compute/core/TypesEx.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h
new file mode 100644
index 000000000..854ddce52
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file CLCastBool.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLCastBool class
+ */
+
+#ifndef ARM_COMPUTE_CLCASTBOOL_H
+#define ARM_COMPUTE_CLCASTBOOL_H
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to run @ref CLCastBoolKernel.
+ * This converts the boolean input tensor to the output tensor's type.
+ */
+class CLCastBool : public ICLSimpleFunction
+{
+public:
+  /**
+   * @brief Initialise the kernel's input and output
+   * @param[in]  input   Input tensor. Data types supported: U8
+   * @param[out] output  Output tensor. Data types supported: U8/S8/U16/S16/U32/F16/F32.
+   */
+  void configure(ICLTensor *input, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCASTBOOL_H */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
index 409eaf593..026209f69 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
@@ -106,22 +106,24 @@ public:
   CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
   /** Set the input, weights, biases and output tensors.
    *
-   * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs.
-   *                             Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
-   * @param[in]     weights      The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
-   * @param[in]     bias         (Optional) The biases have one dimension.
-   *                             Data type supported: Should match @p input data type, except for
- * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
-   * @param[out]    output       Output tensor. The output has the same number of dimensions as the
- * @p input.
-   * @param[in]     info         Contains padding and policies to be used in the deconvolution, this
- * is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right  The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
-   * @param[in]     weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input,
+   *                                and an optional 4th dimension for batch of inputs.
+   *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM].
+   *                                Data type supported: Same as @p input.
+   * @param[in]     bias            (Optional) The biases have one dimension.
+   *                                Data type supported: Should match @p input data type,
+   *                                except for input of QASYMM8 and QASYMM8_SIGNED type
+   *                                where biases should be of S32 type
+   * @param[out]    output          Output tensor.
+   *                                The output has the same number of dimensions as the @p input.
+   * @param[in]     info            Contains padding and policies to be used in the deconvolution,
+   *                                this is decribed in @ref PadStrideInfo.
+   * @param[in]     invalid_right   The number of zeros added to right edge of the output.
+   * @param[in]     invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info    (Optional) Weights information needed for
+   *                @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with
+   *                @ref CLWeightsReshapeKernel.
    *
    */
   void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
@@ -130,23 +132,24 @@ public:
   /** Set the input, weights, biases and output tensors.
    *
    * @param[in]     compile_context The compile context to be used.
-   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and
- * an optional 4th dimension for batch of inputs.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input,
+   *                                 and an optional 4th dimension for batch of inputs.
    *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
-   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM].
+   *                                Data type supported: Same as @p input.
    * @param[in]     bias            (Optional) The biases have one dimension.
    *                                Data type supported: Should match @p input data type, except for
- * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+   *                                input of QASYMM8 and QASYMM8_SIGNED type
+   *                                where biases should be of S32 type
    * @param[out]    output          Output tensor. The output has the same number of dimensions as
- * the @p input.
+   *                                the @p input.
    * @param[in]     info            Contains padding and policies to be used in the deconvolution,
- * this is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right  The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
-   * @param[in]     weights_info    (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+   *                                this is decribed in @ref PadStrideInfo.
+   * @param[in]     invalid_right   The number of zeros added to right edge of the output.
+   * @param[in]     invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info    (Optional) Weights information needed for
+   *                                @ref CLConvolutionLayer, specifies if the weights tensor has
+   *                                been reshaped with @ref CLWeightsReshapeKernel.
    *
    */
   void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
@@ -154,24 +157,26 @@ public:
                  unsigned int invalid_right, unsigned int invalid_bottom,
                  const WeightsInfo &weights_info = WeightsInfo());
   /** Static function to check if given info will lead to a valid configuration of @ref
- * CLDirectTransposeConvLayer
+   * CLDirectTransposeConvLayer
    *
-   * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs.
-   *                         Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
-   * @param[in] weights      The 4d weights info with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
-   * @param[in] bias         (Optional) The biases have one dimension.
-   *                         Data type supported: Should match @p input data type, except for input
- * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
-   * @param[in] output       Output tensor info. The output has the same number of dimensions as the
- * @p input.
-   * @param[in] info         Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] invalid_right  The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
-   * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   * @param[in] input           Input tensor info. 3 lower dimensions represent a single input,
+   *                            and an optional 4th dimension for batch of inputs.
+   *                            Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in] weights         The 4d weights info with dimensions [width, height, IFM, OFM].
+   *                            Data type supported: Same as @p input.
+   * @param[in] bias            (Optional) The biases have one dimension.
+   *                            Data type supported: Should match @p input data type,
+   *                            except for input of QASYMM8 and QASYMM8_SIGNED type
+   *                            where biases should be of S32 type
+   * @param[in] output          Output tensor info. The output has the same number of dimensions
+   *                            as the @p input.
+   * @param[in] info            Contains padding and policies to be used in the deconvolution,
+   *                            this is decribed in @ref PadStrideInfo.
+   * @param[in] invalid_right   The number of zeros added to right edge of the output.
+   * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in] weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer,
+   *                            specifies if the weights tensor has been reshaped
+   *                            with @ref CLWeightsReshapeKernel.
    *
    * @return a status
    */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
index fbee7e40e..b0149cb09 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
@@ -73,5 +73,5 @@ public:
    */
   void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
index f3266f688..c75ae9a50 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
@@ -43,14 +43,14 @@
 
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
 
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
 #include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
 #include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
 #include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
 
 namespace arm_compute
 {
@@ -182,5 +182,5 @@ private:
   bool _is_prepared;
   const ICLTensor *_original_weights;
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
index e65a646dc..c08da526a 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
@@ -43,16 +43,14 @@
 
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
 
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
 #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
 #include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
 
 namespace arm_compute
 {
@@ -132,9 +130,6 @@ private:
  * transpose_weights is set to true ) (called once)
  *  -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized
  * asymmetric)
- *  -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref
- * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
- * not equal to nullptr)
  *
  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  */
@@ -157,40 +152,36 @@ public:
    * @param[in]  input   Source tensor. Data type supported: QASYMM8/F16/F32.
    * @param[in]  weights Weights tensor. The weights must be 2 dimensional.
    *                     If this function is called after a Convolution Layer, the (transposed)
-   * weights will have as many rows as the product of the first 3 input's dimensions.
-   *                     If it is called after another FullyConnected Layer, the (transposed)
-   * weights will have as many rows as the input's first dimension.
-   *                     Data type supported: Same as @p input.
+   * weights will have as many rows as the product of the first 3 input's dimensions. If it is
+   * called after another FullyConnected Layer, the (transposed) weights will have as many rows as
+   * the input's first dimension. Data type supported: Same as @p input.
    * @param[in]  biases  Bias tensor. Can be nullptr. Data type supported:Same as @p input.
    * @param[out] output  Destination tensor. Its shape should be equal to the output of a matrix
    * multiplication between:
    *                     - The output of im2col on the input and the (transposed) 2D weights, if the
    * function is called after a Convolution Layer
    *                     - The input tensor and the (transposed) 2D weights, if the function is
-   * called after another FullyConnected Layer.
-   *                     Data type supported: Same as @p input.
+   * called after another FullyConnected Layer. Data type supported: Same as @p input.
    * @param[in]  fc_info (Optional) Fully connected layer additional info
    */
   void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
                  ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
   /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLFullyConnectedLayerEx
+   * CLFullyConnectedLayer
    *
    * @param[in]  input   Source tensor info. Data type supported: QASYMM8/F16/F32.
    * @param[in]  weights Weights tensor info. The weights must be 2 dimensional.
    *                     If this function is called after a Convolution Layer, the (transposed)
-   * weights will have as many rows as the product of the first 3 input's dimensions.
-   *                     If it is called after another FullyConnected Layer, the (transposed)
-   * weights will have as many rows as the input's first dimension.
-   *                     Data type supported: Same as @p input.
+   * weights will have as many rows as the product of the first 3 input's dimensions. If it is
+   * called after another FullyConnected Layer, the (transposed) weights will have as many rows as
+   * the input's first dimension. Data type supported: Same as @p input.
    * @param[in]  biases  Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
    * @param[out] output  Destination tensor info. Its shape should be equal to the output of a
    * matrix multiplication between:
    *                     - The output of im2col on the input and the (transposed) 2D weights, if the
    * function is called after a Convolution Layer
    *                     - The input tensor and the (transposed) 2D weights, if the function is
-   * called after another FullyConnected Layer.
-   *                     Data type supported: Same as @p input.
+   * called after another FullyConnected Layer. Data type supported: Same as @p input.
    * @param[in]  fc_info (Optional) Fully connected layer additional info
    *
    * @return a status
@@ -216,7 +207,7 @@ private:
   CLConvertFullyConnectedWeights _convert_weights;
   weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed;
   weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged
-      _reshape_weights_managed_function;
+    _reshape_weights_managed_function;
   CLFlattenLayer _flatten_layer;
   CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function;
   CLGEMM _mm_gemm;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
index 289ab167f..bdb168664 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
@@ -43,8 +43,8 @@ public:
 
 public:
   CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
-      : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
-        _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
+    : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+      _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
   {
     // DO NOTHING
   }
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
index b01ec4255..385eb0b2c 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
@@ -47,11 +47,14 @@
 #ifndef __ARM_COMPUTE_CLGATHEREX_H__
 #define __ARM_COMPUTE_CLGATHEREX_H__
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
 
 namespace arm_compute
 {
+class CLCompileContext;
 class ICLTensor;
+class ITensorInfo;
 
 /**
  * @brief Class to to run @ref CLGatherKernel.
@@ -66,7 +69,7 @@ public:
    * @param[out] output  The output tensor, Data types supported: same as @p input.
    * @param[in]  axis    (Optional) The axis in @p input to gather @p indices from. Defaults to 0
    * @return N/A
- */
+   */
   void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
 
   /**
@@ -81,5 +84,5 @@ public:
   static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
                          const ITensorInfo *output, int axis = 0);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLGATHEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
index 6618f5aa4..5e172a4c7 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
@@ -78,5 +78,5 @@ public:
   void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput,
                  ICLTensor *output, ICLTensor *hits);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
index 887e7aaa5..02ae6d719 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
@@ -41,11 +41,14 @@
 #ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
 #define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
 
 namespace arm_compute
 {
+class CLCompileContext;
 class ICLTensor;
+class ITensorInfo;
 
 /** Basic function to perform a Instance normalization.
  *
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h
new file mode 100644
index 000000000..62a36f06d
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLONEHOT_H__
+#define __ARM_COMPUTE_CLONEHOT_H__
+
+#include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Basic function to run @ref CLOneHotKernel */
+class CLOneHot : public IFunction
+{
+public:
+  /** Constructor */
+  CLOneHot();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLOneHot(const CLOneHot &) = delete;
+  /** Default move constructor */
+  CLOneHot(CLOneHot &&) = default;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLOneHot &operator=(const CLOneHot &) = delete;
+  /** Default move assignment operator */
+  CLOneHot &operator=(CLOneHot &&) = default;
+  /** Initialise the kernel's inputs and outputs
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+   * Same as @p on_value
+   * @param[out] output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  depth     The depth of the one hot dimension.
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * value must be in range [-indices.rank , indices.rank)
+   */
+  void configure(const ICLTensor *indices, const ICLTensor *on_value, const ICLTensor *off_value,
+                 ICLTensor *output, int depth, int axis = -1);
+  /** Initialise the kernel's inputs and outputs with off_value being constant
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[out] output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  off_value The PixelValue for off value. Data type supported: Same as @p on_value
+   * @param[in]  depth     The depth of the one hot dimension.
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * value must be in range [-indices.rank , indices.rank)
+   */
+  void configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output,
+                 PixelValue off_value, int depth, int axis = -1);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * CLOneHotKernel
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+   * Same as @p on_value
+   * @param[in]  output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  depth     The depth of the one hot dimension.
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * value must be in range [-indices.rank , indices.rank)
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+                         const ITensorInfo *off_value, const ITensorInfo *output, int depth,
+                         int axis = -1);
+
+  // Inherited methods overridden:
+  void run() override;
+
+private:
+  CLMemsetKernel _memset_kernel; /**< Memset kernel */
+  CLOneHotKernel _onehot_kernel; /**< OneHot kernel */
+  bool _has_to_memset;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLONEHOT_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
new file mode 100644
index 000000000..ee1879aaa
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPADLAYEREX_H
+#define ARM_COMPUTE_CLPADLAYEREX_H
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
+// #include "arm_compute/runtime/CL/functions/CLCopy.h"
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels:
+ *
+ *  -# @ref CLPadLayerKernelEx if there is padding to be added
+ *  -# @ref CLCopyKernel otherwise
+ */
+class CLPadLayerEx : public IFunction
+{
+public:
+  /** Default constructor */
+  CLPadLayerEx();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLPadLayerEx(const CLPadLayerEx &) = delete;
+  /** Default move constructor */
+  CLPadLayerEx(CLPadLayerEx &&) = default;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLPadLayerEx &operator=(const CLPadLayerEx &) = delete;
+  /** Default move assignment operator */
+  CLPadLayerEx &operator=(CLPadLayerEx &&) = default;
+
+  /** Initialize the function
+   *
+   * @param[in]  input          Source tensor. Data types supported: All.
+   * @param[out] output         Output tensor. Data type supported: same as @p input
+   * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair
+   * padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in]  constant_value (Optional) Constant value to be used for the padding.
+   * @param[in]  mode           (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+                 PixelValue constant_value = PixelValue(),
+                 PaddingMode mode = PaddingMode::CONSTANT);
+  /** Initialize the function
+   *
+   * @param[in]  compile_context The compile context to be used.
+   * @param[in]  input           Source tensor. Data types supported: All.
+   * @param[out] output          Output tensor. Data type supported: same as @p input
+   * @param[in]  padding         The padding for each spatial dimension of the input tensor. The
+   * pair padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in]  constant_value  (Optional) Constant value to be used for the padding.
+   * @param[in]  mode            (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
+                 const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                 PaddingMode mode = PaddingMode::CONSTANT);
+
+  /**  Static function to check if given info will lead to a valid configuration of @ref
+   * CLPadLayerEx.
+   *
+   * @param[in] input          Source tensor info. Data types supported: All.
+   * @param[in] output         Output tensor info. Data type supported: same as @p input
+   * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair
+   * padding[i] specifies the front and the end padding in the i-th dimension.
+   * @param[in] constant_value (Optional) Constant value to be used for the padding
+   * @param[in] mode           (Optional) Controls whether the padding should be filled with @p
+   * constant_value using CONSTANT, or reflect the input, either including the border values
+   * (SYMMETRIC) or not (REFLECT).
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+                         const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                         PaddingMode mode = PaddingMode::CONSTANT);
+
+  // Inherited methods overridden:
+  void run() override;
+
+private:
+  void configure_reflect_mode(ICLTensor *input, ICLTensor *output);
+
+  std::unique_ptr<CLPadLayerKernelEx> _pad_kernel;
+  std::unique_ptr<opencl::kernels::ClCopyKernel> _copy_kernel;
+  bool _perform_pad;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPADLAYEREX_H */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
index 7dba84b12..45eb72bef 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
@@ -48,7 +48,7 @@
 #define __ARM_COMPUTE_CLREDUCEOPERATION_H__
 
 #include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
-#include "arm_compute/core/TypesEx.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
@@ -82,7 +82,7 @@ public:
    * @return N/A
    */
   void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis,
-                 bool keep_dims, ReduceOperation op);
+                 bool keep_dims, ReductionOperation op);
 
   /**
    * @brief Static function to check if given info will lead to a valid configuration of @ref
@@ -96,7 +96,8 @@ public:
    * @return a status
    */
   static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         const std::set<uint32_t> &axis, bool keep_dims, const ReduceOperation &op);
+                         const std::set<uint32_t> &axis, bool keep_dims,
+                         const ReductionOperation &op);
 
   /**
    * @brief Run the OpenCL kernel for this operation
@@ -115,5 +116,5 @@ private:
   std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr};
   CLReshapeLayer _reshape;
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h
new file mode 100644
index 000000000..3023df3f0
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSPLITVEX__
+#define __ARM_COMPUTE_CLSPLITVEX__
+
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+#include "arm_compute/core/Types.h"
+#include <vector>
+#include <memory>
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/runtime/CPP/functions/CPPSplit.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLSplitVKernel */
+class CLSplitVEx : public IFunction
+{
+public:
+  /** Default constructor */
+  CLSplitVEx();
+  /** Configure the split CL kernel
+   *
+   * @param[in]  input       The input tensor to split. Data types supported:
+   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+   * @param[in]  size_splits A 1-D tensor containing the number of tensor values per split
+   * @param[out] outputs     A vector containing the output tensor. Data types supported: Same as @p
+   * input
+   *                         The output tensors should match the input tensor dimensions for all
+   * shape dimensions apart
+   *                         from the split dimension.
+   * @param[in]  split_dim   Integer value representing the input tensor dimension along which to
+   * split
+   * @param[in]  num_splits  Number of splits
+   */
+  void configure(const ICLTensor *input, const ICLTensor *size_splits, uint32_t split_dim,
+                 const std::vector<ICLTensor *> &outputs, unsigned int num_splits);
+
+  void run() override;
+
+private:
+  const ICLTensor *_input;
+  const ICLTensor *_size_splits;
+  std::vector<ICLTensor *> _outputs;
+  unsigned int _num_splits;
+  std::vector<CLSlice> _slice_functions;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPLITVEX__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
index e301a5152..f426a4d75 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
@@ -160,5 +160,5 @@ private:
   CLTopKV2Store _store_kernel;
 #endif
 };
-}
+} // namespace arm_compute
 #endif // __ARM_COMPUTE_CLTOPK_V2_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
index 5fb102e47..5b27d362a 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -63,20 +63,22 @@ public:
 
   /** Set the input, weights, biases and output tensors.
    *
-   * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
-   * @param[in]     weights      The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
-   * @param[in]     bias         (Optional) The biases have one dimension. Data type supported: Same
- * as @p input.
-   * @param[out]    output       Output tensor. The output has the same number of dimensions as the
- * @p input.
-   * @param[in]     deconv_info  Contains padding and policies to be used in the deconvolution, this
- * is described in @ref PadStrideInfo.
- * @param[in] invalid_right  The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
-   * @param[in]     weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input,
+   *                                and an optional 4th dimension for batch of inputs.
+   *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM].
+   *                                Data type supported: Same as @p input.
+   * @param[in]     bias            (Optional) The biases have one dimension.
+   *                                Data type supported: Same as @p input.
+   * @param[out]    output          Output tensor. The output has the same number of dimensions
+   *                                as the @p input.
+   * @param[in]     deconv_info     Contains padding and policies to be used in the deconvolution,
+   *                                this is described in @ref PadStrideInfo.
+   * @param[in]     invalid_right   The number of zeros added to right edge of the output.
+   * @param[in]     invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info    (Optional) Weights information needed for
+   *                                @ref CLConvolutionLayer, specifies if the weights tensor has
+   *                                been reshaped with @ref CLWeightsReshapeKernel.
    *
    */
   void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
@@ -85,22 +87,22 @@ public:
   /** Set the input, weights, biases and output tensors.
    *
    * @param[in]     compile_context The compile context to be used.
-   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and
- * an optional 4th dimension for batch of inputs. Data types supported:
- * QASYMM8_SIGNED/QASYMM8/F16/F32.
-   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
-   * @param[in]     bias            (Optional) The biases have one dimension. Data type supported:
- * Same as @p input.
-   * @param[out]    output          Output tensor. The output has the same number of dimensions as
- * the @p input.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input,
+   *                                and an optional 4th dimension for batch of inputs.
+   *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM].
+   *                                Data type supported: Same as @p input.
+   * @param[in]     bias            (Optional) The biases have one dimension.
+   *                                Data type supported: Same as @p input.
+   * @param[out]    output          Output tensor. The output has the same number of dimensions
+   *                                as the @p input.
    * @param[in]     deconv_info     Contains padding and policies to be used in the deconvolution,
- * this is described in @ref PadStrideInfo.
- * @param[in] invalid_right  The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
-   * @param[in]     weights_info    (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+   *                                this is described in @ref PadStrideInfo.
+   * @param[in]     invalid_right   The number of zeros added to right edge of the output.
+   * @param[in]     invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info    (Optional) Weights information needed for
+   *                                @ref CLConvolutionLayer, specifies if the weights tensor has
+   *                                been reshaped with @ref CLWeightsReshapeKernel.
    *
    */
   void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
@@ -108,22 +110,24 @@ public:
                  unsigned int invalid_right, unsigned int invalid_bottom,
                  const WeightsInfo &weights_info = WeightsInfo());
   /** Static function to check if given info will lead to a valid configuration of @ref
- * CLTransposeConvLayer
+   * CLTransposeConvLayer
    *
-   * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
-   * @param[in] weights      The 4d weights info with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
-   * @param[in] bias         (Optional) The biases have one dimension. Data type supported: Same as
- * @p input.
-   * @param[in] output       Output tensor info. The output has the same number of dimensions as the
- * @p input.
-   * @param[in] deconv_info  Contains padding and policies to be used in the deconvolution, this is
- * described in @ref PadStrideInfo.
- * @param[in] invalid_right  The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
-   * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   * @param[in] input           Input tensor info. 3 lower dimensions represent a single input,
+   *                            and an optional 4th dimension for batch of inputs.
+   *                            Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in] weights         The 4d weights info with dimensions [width, height, IFM, OFM].
+   *                            Data type supported: Same as @p input.
+   * @param[in] bias            (Optional) The biases have one dimension.
+   *                            Data type supported: Same as @p input.
+   * @param[in] output          Output tensor info. The output has the same number of dimensions
+   *                            as the @p input.
+   * @param[in] deconv_info     Contains padding and policies to be used in the deconvolution,
+   *                            this is described in @ref PadStrideInfo.
+   * @param[in] invalid_right   The number of zeros added to right edge of the output.
+   * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in] weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer,
+   *                            specifies if the weights tensor has been reshaped with
+   *                            @ref CLWeightsReshapeKernel.
    *
    * @return a status
    */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h
deleted file mode 100644
index 7930e4e20..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CPPONEHOT_EX_H__
-#define __ARM_COMPUTE_CPPONEHOT_EX_H__
-
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref CPPOneHot */
-class CPPOneHotEx : public ICPPSimpleFunction
-{
-public:
-  /** Configure the one_hot function
-   *
-   * @param[in]  indices     A tensor for indices. Data types supported: S32
-   * @param[in]  depth       A tensor for depth. Data types supported: S32
-   * @param[in]  on_value    A tensor for on_value. Data types supported: F32
-   * @param[in]  off_value   A tensor for off_value. Data types supported: F32
-   * @param[out] output      A tensor for computed value of one hot operator
-   * @param[in]  axis        An int value for axis
-   */
-  void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
-                 const ITensor *off_value, ITensor *output, const int axis);
-};
-}
-#endif /* __ARM_COMPUTE_CPPONEHOT_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
index 3fad230f1..d0ddc2609 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -16,13 +16,13 @@
 #ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
 #define __ARM_COMPUTE_NEFUNCTIONSEX_H__
 
-#include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
+#include <arm_compute/runtime/NEON/functions/NECastBool.h>
 #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
 #include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h>
 #include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h>
+#include <arm_compute/runtime/NEON/functions/NEOneHot.h>
 #include <arm_compute/runtime/NEON/functions/NEReduceSum.h>
 #include <arm_compute/runtime/NEON/functions/NEReduceOperation.h>
 #include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h>
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h
deleted file mode 100644
index 6156c84f8..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__
-#define __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NEActivationLayerKernelEx
- *
- * @note The function simulates an activation layer with the specified activation function.
- */
-class NEActivationLayerEx : public INESimpleFunctionNoBorder
-{
-public:
-  /** Constructor
-   *
-   * @param[in] ctx Runtime context to be used by the function
-   */
-  NEActivationLayerEx(IRuntimeContext *ctx = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEActivationLayerEx(const NEActivationLayerEx &) = delete;
-  /** Default move constructor */
-  NEActivationLayerEx(NEActivationLayerEx &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEActivationLayerEx &operator=(const NEActivationLayerEx &) = delete;
-  /** Default move assignment operator */
-  NEActivationLayerEx &operator=(NEActivationLayerEx &&) = default;
-  /** [NEActivationLayerEx snippet] **/
-  /** Set the input and output tensor.
-   *
-   * @note If the output tensor is a nullptr or is equal to the input, the activation function will
-   * be performed in-place
-   *
-   * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this
-   * tensor will store the result
-   *                                 of the activation function. Data types supported:
-   * QASYMM8/QSYMM16/F16/F32.
-   * @param[out]     output          Destination tensor. Data type supported: same as @p input
-   * @param[in]      activation_info Activation layer parameters.
-   */
-  void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
-  /** [NEActivationLayerEx snippet] **/
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEActivationLayerEx
-   *
-   * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor
-   * will store the result
-   *                     of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
-   * @param[in] output   Destination tensor info. Data type supported: same as @p input
-   * @param[in] act_info Activation layer information.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         const ActivationLayerInfo &act_info);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
index 026d30098..8d931f08d 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
@@ -41,8 +41,10 @@
 #ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
 #define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/core/TypesEx.h"
 #include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/ITensorInfo.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h
new file mode 100644
index 000000000..dd62645ee
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NECASTBOOL_H__
+#define __ARM_COMPUTE_NECASTBOOL_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+namespace arm_compute
+{
+class ITensor;
+class ITensorInfo;
+
+/**
+ * @brief Class to run @ref INESimpleFunctionNoBorder.
+ */
+class NECastBool : public INESimpleFunctionNoBorder
+{
+public:
+  /** Initialize the function's source, destination
+   *
+   * Valid conversions Input -> Output :
+   *
+   *   - U8 -> U8, S8, U16, S16, U32, S32, F32, F16
+   *
+   * @param[in]  input  The input tensor to convert. Data types supported: U8
+   * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+   */
+  void configure(const ITensor *input, ITensor *output);
+  /** Static function to check if given info will lead to a valid configuration of @ref NECastBool
+   *
+   * @param[in] input  Source tensor info. Data types supported: U8.
+   * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NECASTBOOL_H__*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
index 63f7714aa..82a789e86 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
@@ -48,12 +48,14 @@
 #define __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__
 
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
 
 #include <vector>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /**
  * @brief Class to perform EmbeddingLookup operation
@@ -84,5 +86,5 @@ public:
   static Status validate(const ITensorInfo *input, const ITensorInfo *output,
                          const ITensorInfo *lookups);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
index 56548a479..214592710 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
@@ -44,11 +44,11 @@
 #include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h"
 #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
 #include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 #include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
index 8f98f220a..2bbb1fea1 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
@@ -43,16 +43,16 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
 
 namespace arm_compute
 {
@@ -79,11 +79,11 @@ public:
   /** Prevent instances of this class from being copied (As this class contains pointers) */
   NEFullyConnectedLayerEx(const NEFullyConnectedLayerEx &) = delete;
   /** Default move constructor */
-  NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = default;
+  NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = delete;
   /** Prevent instances of this class from being copied (As this class contains pointers) */
   NEFullyConnectedLayerEx &operator=(const NEFullyConnectedLayerEx &) = delete;
   /** Default move assignment operator */
-  NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = default;
+  NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = delete;
   /** Set the input and output tensors.
    *
    * @param[in]  input   Source tensor. Data type supported: QASYMM8/F16/F32.
@@ -141,7 +141,7 @@ private:
   void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
 
   MemoryGroup _memory_group;
-  NEFlattenLayerKernel _flatten_kernel;
+  NEFlattenLayer _flatten_kernel;
   NEConvertFullyConnectedWeights _convert_weights;
   NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
   NEGEMM _mm_gemm;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
index 18cb61bf9..e34b4dcb0 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
@@ -43,8 +43,8 @@ public:
 
 public:
   NEFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
-      : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
-        _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
+    : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
+      _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
   {
     // DO NOTHING
   }
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
index 155a1b837..6944c77f6 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
@@ -47,6 +47,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEGatherKernelEx */
 class NEGatherEx : public INESimpleFunctionNoBorder
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
index 521a05ad9..f6fda60a9 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
@@ -48,12 +48,14 @@
 #define __ARM_COMPUTE_NEHASHTABLELOOKUP_H__
 
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
 
 #include <vector>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /**
  * @brief Class to perform HashtableLookup operation
@@ -96,5 +98,5 @@ public:
                          const ITensorInfo *input, const ITensorInfo *output,
                          const ITensorInfo *hits);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEHASHTABLELOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
index 18e813923..0ee967698 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
@@ -54,6 +54,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to perform a Instance normalization.
  *
@@ -112,5 +113,5 @@ private:
   Tensor _permuted_input;
   Tensor _permuted_output;
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
new file mode 100644
index 000000000..668f024a1
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEONEHOT_H__
+#define __ARM_COMPUTE_NEONEHOT_H__
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+
+/** Basic function to run @ref NEOneHotKernel */
+class NEOneHot : public INESimpleFunctionNoBorder
+{
+public:
+  /** Initialise the kernel's inputs and outputs
+   *
+   * @param[in]  indices   Indices tensor. Supported tensor rank: up to 3. Must be one of the
+   * following types: U32/S32
+   * @param[in]  depth     The tensor for depth of the one hot dimension. Supported tensor rank: up
+   * to 3. Must be one of the following types: U32/S32
+   * @param[in]  on_value  On value tensor. Supported tensor rank: only 1. Data type supported:
+   * U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+   * Same as @p on_value
+   * @param[out] output    Destination tensor. Data type supported: Same as @p on_value
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   * The value must be in range [-indices.rank , indices.rank)
+   */
+  void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
+                 const ITensor *off_value, ITensor *output, int axis = -1);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * NEOneHotKernel
+   *
+   * @param[in]  indices   Indices tensor info. Supported tensor rank: up to 3.
+   *                       Must be one of the following types: U32/S32
+   * @param[in]  depth     The tensor info for depth of the one hot dimension.
+   *                       Supported tensor rank: up to 3.
+   *                       Must be one of the following types: U32/S32
+   * @param[in]  on_value  On value tensor info. Supported tensor rank: only 1.
+   *                       Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+   * @param[in]  off_value Off value tensor info. Supported tensor rank: only 1.
+   *                       Data type supported: Same as @p on_value
+   * @param[out] output    Destination tensor info. Data type supported: Same as @p on_value
+   * @param[in]  axis      (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+   *                       The value must be in range [-indices.rank , indices.rank)
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *indices, const ITensorInfo *depth,
+                         const ITensorInfo *on_value, const ITensorInfo *off_value,
+                         const ITensorInfo *output, int axis = -1);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEONEHOT_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
index 7f764b000..9858e6c09 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
@@ -43,10 +43,10 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/TypesEx.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -71,7 +71,7 @@ public:
    * @param[in]  op             Reduce operation to perform.
    */
   void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, ITensor *output,
-                 ReduceOperation op);
+                 ReductionOperation op);
 
   /** Static function to check if given info will lead to a valid configuration of @ref
    * NEReduceOperation
@@ -85,14 +85,14 @@ public:
    * @return A status
    */
   static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
-                         bool keep_dims, const ITensorInfo *output, ReduceOperation op);
+                         bool keep_dims, const ITensorInfo *output, ReductionOperation op);
 
   // Inherited methods overridden:
   void run() override;
 
 private:
   MemoryGroup _memory_group;
-  std::vector<NEReductionOperationEx> _reduction_kernels;
+  std::vector<NEReductionOperation> _reduction_kernels;
   std::vector<Tensor> _reduced_outs;
   NEReshapeLayer _reshape;
   unsigned int _reduction_ops;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
index 48b416923..f34a8f8af 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
@@ -43,11 +43,13 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
 
 namespace arm_compute
 {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h
deleted file mode 100644
index 1693922b7..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__
-#define __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to simulate a reduction operation. This function calls the following NEON
- * kernels:
- *
- * -# @ref NEFillBorderKernel
- * -# @ref NEReductionOperationKernelEx
- *
- */
-class NEReductionOperationEx : public IFunction
-{
-public:
-  /** Default constructor */
-  NEReductionOperationEx();
-  /** Set the input and output tensors.
-   *
-   * @param[in]  input  Source tensor. Data type supported: QASYMM8/F16/F32.
-   * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
-   * @param[in]  axis   Dimension along which to reduce.
-   * @param[in]  op     Reduction operation to perform.
-   */
-  void configure(ITensor *input, ITensor *output, unsigned int axis, ReduceOperation op);
-
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEReductionOperationEx.
-   *
-   * @param[in] input  Source tensor info. Data type supported: QASYMM8/F16/F32.
-   * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p
-   * input.
-   * @param[in] axis   Dimension along which to reduce.
-   * @param[in] op     Reduction operation to perform.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis,
-                         ReduceOperation op);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  NEReductionOperationKernelEx _reduction_kernel;
-  NEFillBorderKernel _fill_border_kernel;
-  size_t _window_split;
-  int _reduction_axis;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
index 24ff5dac9..f82579a45 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -102,47 +102,50 @@ public:
   /** Prevent instances of this class from being copied (As this class contains pointers) */
   NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete;
   /** Allow instances of this class to be moved */
-  NETransposeConvLayer(NETransposeConvLayer &&) = default;
+  NETransposeConvLayer(NETransposeConvLayer &&) = delete;
   /** Allow instances of this class to be moved */
-  NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default;
+  NETransposeConvLayer &operator=(NETransposeConvLayer &&) = delete;
   /** Default destructor */
   virtual ~NETransposeConvLayer() = default;
 
   /** Set the input, weights, biases and output tensors.
    *
-   * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
-   * @param[in]     weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
-   * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type
- * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16
- * for F16 input.
-   * @param[out]    output  Output tensor. The output has the same number of dimensions as the @p
- * input.
-   * @param[in]     info    Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in]     invalid_right  The number of zeros added to right edge of the output.
- * @param[in]     invalid_bottom The number of zeros added to bottom edge of the output.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input,
+   *                                and an optional 4th dimension for batch of inputs.
+   *                                Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM].
+   *                                Data type supported: Same as @p input.
+   * @param[in]     bias            Optional, ignored if NULL. The biases have one dimension.
+   *                                Data type supported: Data types supported: S32 for QASYMM8 and
+   * QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+   * @param[out]    output          Output tensor. The output has the same number of dimensions as
+   *                                the @p input.
+   * @param[in]     info            Contains padding and policies to be used in the deconvolution,
+   *                                this is decribed in @ref PadStrideInfo.
+   * @param[in]     invalid_right   The number of zeros added to right edge of the output.
+   * @param[in]     invalid_bottom  The number of zeros added to bottom edge of the output.
    *
    */
   void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
                  const PadStrideInfo &info, unsigned int invalid_right,
                  unsigned int invalid_bottom);
   /** Static function to check if given info will lead to a valid configuration of @ref
- * NETransposeConvLayer
+   * NETransposeConvLayer
    *
-   * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
-   * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
-   * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types
- * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
-   * @param[in] output  Output tensor info. The output has the same number of dimensions as the @p
- * input.
-   * @param[in] info    Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] innvalid_right  The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in] input           Input tensor info. 3 lower dimensions represent a single input,
+   *                            and an optional 4th dimension for batch of inputs.
+   *                            Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+   * @param[in] weights         The 4d weights info with dimensions [width, height, IFM, OFM].
+   *                            Data type supported: Same as @p input.
+   * @param[in] bias            (Optional) The biases have one dimension.
+   *                            Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input,
+   *                                                  F32 for F32 input, F16 for F16 input.
+   * @param[in] output          Output tensor info. The output has the same number of dimensions as
+   *                            the @p input.
+   * @param[in] info            Contains padding and policies to be used in the deconvolution,
+   *                            this is decribed in @ref PadStrideInfo.
+   * @param[in] innvalid_right  The number of zeros added to right edge of the output.
+   * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
    *
    * @return a status
    */
@@ -168,5 +171,5 @@ private:
   PadStrideInfo _info;
   bool _is_prepared;
 };
-} // arm_compute
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
index ba42a2456..e15dc2685 100644
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -54,104 +54,143 @@
 using namespace arm_compute;
 
 const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
-    // ARMComputeEx kernels
-    {"binary_logical_op", "binary_logical_op.cl"},
-    {"embedding_lookup", "embedding_lookup.cl"},
-    {"gather_ex", "gather_ex.cl"},
-    {"gather_ex_1d", "gather_ex.cl"},
-    {"gather_ex_1d_out", "gather_ex.cl"},
-    {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
-    {"hashtable_lookup", "hashtable_lookup.cl"},
-    {"instance_normalization_ex", "instance_normalization_ex.cl"},
-    {"multiply_scale_factor", "multiply_scale_factor.cl"},
-    {"neg_tensor", "neg_tensor.cl"},
-    {"quantization_symm8", "quantization_symm8.cl"},
-    {"reduce_min_max", "reduce_operation.cl"},
-    {"reduce_sum_mean", "reduce_operation.cl"},
-    {"topkv2_init", "topkv2.cl"},
-    {"topkv2_find_first_negative", "topkv2.cl"},
-    {"topkv2_reorder_negatives", "topkv2.cl"},
-    {"topkv2_store", "topkv2.cl"},
-    {"radixsort_histogram", "topkv2_radixsort.cl"},
-    {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
-    {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
-    {"radixsort_reorder", "topkv2_radixsort.cl"},
-    {"topkv2_quicksort", "topkv2_quicksort.cl"},
-    {"scale_factor_symm8", "scale_factor.cl"},
+  // ARMComputeEx kernels
+  {"arg_min_max_ex_x", "arg_min_max_ex.cl"},
+  {"arg_min_max_ex_y", "arg_min_max_ex.cl"},
+  {"arg_min_max_ex_z", "arg_min_max_ex.cl"},
+  {"arg_min_max_ex_w", "arg_min_max_ex.cl"},
+  {"binary_logical_op", "binary_logical_op.cl"},
+  {"cast_bool", "cast.cl"},
+  {"embedding_lookup", "embedding_lookup.cl"},
+  {"gather_ex", "gather_ex.cl"},
+  {"gather_ex_1d", "gather_ex.cl"},
+  {"gather_ex_1d_out", "gather_ex.cl"},
+  {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
+  {"gemm_accumulate_biases", "gemm.cl"},
+  {"hashtable_lookup", "hashtable_lookup.cl"},
+  {"instance_normalization_ex", "instance_normalization_ex.cl"},
+  {"memset", "memset.cl"},
+  {"multiply_scale_factor", "multiply_scale_factor.cl"},
+  {"neg_tensor", "neg_tensor.cl"},
+  {"one_hot", "one_hot.cl"},
+  {"one_hot_only_on_value", "one_hot.cl"},
+  {"pad_layer_constant", "pad_layer.cl"},
+  {"pad_layer_symmetric_reflect", "pad_layer.cl"},
+  {"quantization_symm8", "quantization_symm8.cl"},
+  {"reduce_min_max", "reduce_operation.cl"},
+  {"reduce_sum_mean", "reduce_operation.cl"},
+  {"topkv2_init", "topkv2.cl"},
+  {"topkv2_find_first_negative", "topkv2.cl"},
+  {"topkv2_reorder_negatives", "topkv2.cl"},
+  {"topkv2_store", "topkv2.cl"},
+  {"topkv2_quicksort", "topkv2_quicksort.cl"},
+  {"scale_factor_symm8", "scale_factor.cl"},
 };
 
 const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
 #ifdef EMBEDDED_KERNELS
-    {
-        "embedding_lookup.cl",
+  {
+    "activation_float_helpers.h",
+#include "./cl_kernels/activation_float_helpers.hembed"
+  },
+  {
+    "arg_min_max_ex.cl",
+#include "./cl_kernels/arg_min_max_ex.clembed"
+  },
+  {
+    "binary_logical_op.cl",
+#include "./cl_kernels/binary_logical_op.clembed"
+  },
+  {
+    "cast.cl",
+#include "./cl_kernels/cast.clembed"
+  },
+  {
+    "embedding_lookup.cl",
 #include "./cl_kernels/embedding_lookup.clembed"
-    },
-    {
-        "gather_ex.cl",
+  },
+  {
+    "gather_ex.cl",
 #include "./cl_kernels/gather_ex.clembed"
-    },
-    {
-        "gemmlowp_ex.cl",
+  },
+  {
+    "gemmlowp_ex.cl",
 #include "./cl_kernels/gemmlowp_ex.clembed"
-    },
-    {
-        "hashtable_lookup.cl",
+  },
+  {
+    "gemm_helpers.h",
+#include "./cl_kernels/gemm_helpers.hembed"
+  },
+  {
+    "hashtable_lookup.cl",
 #include "./cl_kernels/hashtable_lookup.clembed"
-    },
-    {
-        "helpers.h",
+  },
+  {
+    "helpers.h",
 #include "./cl_kernels/helpers.hembed"
-    },
-    {
-        "helpers_asymm.h",
+  },
+  {
+    "helpers_asymm.h",
 #include "./cl_kernels/helpers_asymm.hembed"
-    },
-    {
-        "instance_normalization_ex.cl",
+  },
+  {
+    "instance_normalization_ex.cl",
 #include "./cl_kernels/instance_normalization_ex.clembed"
-    },
-    {
-        "binary_logical_op.cl",
-#include "./cl_kernels/binary_logical_op.clembed"
-    },
-    {
-        "multiply_scale_factor.cl",
+  },
+  {
+    "gemm.cl",
+#include "./cl_kernels/gemm.clembed"
+  },
+  {
+    "memset.cl",
+#include "./cl_kernels/memset.clembed"
+  },
+  {
+    "multiply_scale_factor.cl",
 #include "./cl_kernels/multiply_scale_factor.clembed"
-    },
-    {
-        "neg_tensor.cl",
+  },
+  {
+    "neg_tensor.cl",
 #include "./cl_kernels/neg_tensor.clembed"
-    },
-    {
-        "quantization_symm8.cl",
+  },
+  {
+    "one_hot.cl",
+#include "./cl_kernels/one_hot.clembed"
+  },
+  {
+    "pad_layer.cl",
+#include "./cl_kernels/pad_layer.clembed"
+  },
+  {
+    "quantization_symm8.cl",
 #include "./cl_kernels/quantization_symm8.clembed"
-    },
-    {
-        "reduce_operation.cl",
+  },
+  {
+    "reduce_operation.cl",
 #include "./cl_kernels/reduce_operation.clembed"
-    },
-    {
-        "scale_factor.cl",
+  },
+  {
+    "repeat.h",
+#include "./cl_kernels/repeat.hembed"
+  },
+  {
+    "scale_factor.cl",
 #include "./cl_kernels/scale_factor.clembed"
-    },
-    {
-        "topkv2.cl",
+  },
+  {
+    "topkv2.cl",
 #include "./cl_kernels/topkv2.clembed"
-    },
-    {
-        "topkv2_radixsort.cl",
-#include "./cl_kernels/topkv2_radixsort.clembed"
-    },
-    {
-        "topkv2_quicksort.cl",
+  },
+  {
+    "topkv2_quicksort.cl",
 #include "./cl_kernels/topkv2_quicksort.clembed"
-    },
+  },
 
 #endif /* EMBEDDED_KERNELS */
 };
 
 CLKernelLibraryEx::CLKernelLibraryEx()
-    : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
+  : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
 {
   opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the
                          // CLKernelLibraryEx is built
@@ -318,8 +357,8 @@ size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) con
 
   size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result);
   ARM_COMPUTE_ERROR_ON_MSG(
-      err != 0,
-      "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
+    err != 0,
+    "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
   ARM_COMPUTE_UNUSED(err);
 
   return result;
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/activation_float_helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/activation_float_helpers.h
new file mode 100644
index 000000000..3c3ff8419
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/activation_float_helpers.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if GPU_ARCH == GPU_ARCH_BIFROST
+#define MLA(a, b, c) (fma(c, b, a))
+#else // GPU_ARCH == GPU_ARCH_BIFROST
+#define MLA(a, b, c) ((b) * (c) + (a))
+#endif // GPU_ARCH == GPU_ARCH_BIFROST
+
+// Hard-Swish
+#define hard_swish_op(DATA_TYPE, x, A_VAL, B_VAL) \
+  (x * ((min(max((x + (DATA_TYPE)3.0), (DATA_TYPE)0.0), (DATA_TYPE)6.0)) * (DATA_TYPE)0.166666667))
+
+// Logistic Activation
+#define logistic_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)1.0 / ((DATA_TYPE)1.0 + exp(-x)))
+
+// Hyperbolic Tangent Activation
+#define tanh_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)A_VAL * tanh((DATA_TYPE)B_VAL * x))
+
+// RELU Tangent Activation
+#define relu_op(DATA_TYPE, x, A_VAL, B_VAL) (max((DATA_TYPE)0.0, x))
+
+// Bounded RELU Activation
+#define brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min((DATA_TYPE)A_VAL, max((DATA_TYPE)0.0, x)))
+
+// Lower Upper Bounded RELU Activation
+#define lu_brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL))
+
+// Leaky RELU Activation
+#define lrelu_op(DATA_TYPE, x, A_VAL, B_VAL) \
+  ((min(x, (DATA_TYPE)0.0) * (DATA_TYPE)A_VAL) + max(x, (DATA_TYPE)0.0))
+
+// Soft RELU Activation
+#define srelu_op(DATA_TYPE, x, A_VAL, B_VAL) (log((DATA_TYPE)1.0 + exp(x)))
+
+// ELU Activation
+#define elu_op(DATA_TYPE, x, A_VAL, B_VAL) \
+  (select(((DATA_TYPE)A_VAL * (exp(x) - (DATA_TYPE)1.0)), x, isgreaterequal(x, (DATA_TYPE)0.0)))
+
+// Absolute Activation
+#define abs_op(DATA_TYPE, x, A_VAL, B_VAL) (fabs(x))
+
+// Square Activation
+#define square_op(DATA_TYPE, x, A_VAL, B_VAL) (x * x)
+
+// Square-root Activation
+#define sqrt_op(DATA_TYPE, x, A_VAL, B_VAL) (sqrt(x))
+
+// Linear Activation
+#define linear_op(DATA_TYPE, x, A_VAL, B_VAL) (MLA((DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL, x))
+
+// Identity Activation
+#define identity_op(DATA_TYPE, x, A_VAL, B_VAL) (x)
+
+#define ACT_OP(op, DATA_TYPE, x, A_VAL, B_VAL) op##_op(DATA_TYPE, x, A_VAL, B_VAL)
+
+#define ACTIVATION(op, DATA_TYPE, x, A_VAL, B_VAL) ACT_OP(op, DATA_TYPE, x, A_VAL, B_VAL)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl
new file mode 100644
index 000000000..135cacf59
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(FLOAT_DATA_TYPE)
+#define ISGREATER(x, y) isgreater(x, y)
+#define ISLESS(x, y) isless(x, y)
+#else // !FLOAT_DATA_TYPE
+#if defined(WIDTH)
+#define ISGREATER(x, y) (x > y) ? 1 : 0
+#define ISLESS(x, y) (x < y) ? 1 : 0
+#else // !defined(WIDTH)
+#define ISGREATER(x, y) \
+  select((VEC_DATA_TYPE(DATA_TYPE_SELECT, 16))0, (VEC_DATA_TYPE(DATA_TYPE_SELECT, 16)) - 1, x > y)
+#define ISLESS(x, y) \
+  select((VEC_DATA_TYPE(DATA_TYPE_SELECT, 16))0, (VEC_DATA_TYPE(DATA_TYPE_SELECT, 16)) - 1, x < y)
+#endif // defined(WIDTH)
+#endif // defined(FLOAT_DATA_TYPE)
+
+#if defined(ARG_MAX)
+#define CONDITION_TO_USE(x, y) ISGREATER(x, y)
+#elif defined(ARG_MIN)
+#define CONDITION_TO_USE(x, y) ISLESS(x, y)
+#else // !(defined(ARG_MAX) || defined(ARG_MIN))
+#error "Unsupported reduction operation!"
+#endif // defined(ARG_MAX)
+
+#if defined(DATA_TYPE_OUTPUT) && defined(DATA_TYPE_SELECT)
+#if defined(WIDTH)
+#if defined(ARG_MIN)
+#if defined(PREV_OUTPUT)
+/** Find index minimum value of a vector
+ *
+ * @param[in] input Pointer to the first value.
+ *
+ * @return index of the vector.
+ */
+inline DATA_TYPE_OUTPUT arg_idx_min_prev_out(__global const DATA_TYPE *input,
+                                             __global const DATA_TYPE_OUTPUT *prev_res,
+                                             const int x_idx)
+{
+  int end_elem = (x_idx + 1) * 16;
+  if (end_elem > WIDTH)
+  {
+    end_elem = WIDTH - x_idx * 16;
+  }
+  DATA_TYPE_OUTPUT res = prev_res[0];
+  for (int x_v = 1; x_v < end_elem; ++x_v)
+  {
+    res = select(res, prev_res[x_v], *(input + prev_res[x_v]) < *(input + res));
+  }
+  return res;
+}
+#else // !defined(PREV_OUTPUT)
+/** Find index minimum value of a vector
+ *
+ * @param[in] input Pointer to the first value.
+ *
+ * @return index of the vector.
+ */
+inline DATA_TYPE_OUTPUT arg_idx_min(__global const DATA_TYPE *input, const int x_idx)
+{
+#if WIDTH < 16
+  DATA_TYPE_OUTPUT res = 0;
+  for (DATA_TYPE_OUTPUT x_v = res + 1; x_v < WIDTH; ++x_v)
+  {
+    res = select(res, x_v, *(input + x_v) < *(input + res));
+  }
+  return res;
+#else  // WIDTH >= 16
+  int x_elem = x_idx * 16;
+  const int x_goback = select(0, 16 - WIDTH % 16, x_elem + 16 > WIDTH);
+  x_elem -= x_goback;
+
+  VEC_DATA_TYPE(DATA_TYPE, 16)
+  in = vload16(0, input - x_goback);
+  VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
+  res = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+  VEC_DATA_TYPE(DATA_TYPE_SELECT, 8)
+  idx_sel = (in.s01234567 <= in.s89abcdef);
+  in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel);
+  res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8));
+
+  idx_sel.s0123 =
+    (in.s0123 < in.s4567) ||
+    (in.s0123 == in.s4567 && CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
+  in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123);
+  res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4));
+
+  idx_sel.s01 =
+    (in.s01 < in.s23) ||
+    (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
+  in.s01 = select(in.s23, in.s01, idx_sel.s01);
+  res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2));
+
+  idx_sel.s0 = (in.s0 < in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), DATA_TYPE_SELECT));
+  res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int));
+
+  return res.s0 + x_elem;
+#endif // WIDTH < 16
+}
+#endif // defined(PREV_OUTPUT)
+#endif // defined(ARG_MIN)
+#if defined(ARG_MAX)
+#if defined(PREV_OUTPUT)
+/** Find index maximum value of a vector
+ *
+ * @param[in] input Pointer to the first value.
+ *
+ * @return index of the vector.
+ */
+inline DATA_TYPE_OUTPUT arg_idx_max_prev_out(__global const DATA_TYPE *input,
+                                             __global const DATA_TYPE_OUTPUT *prev_res,
+                                             const int x_idx)
+{
+  int end_elem = (x_idx + 1) * 16;
+  if (end_elem > WIDTH)
+  {
+    end_elem = WIDTH - x_idx * 16;
+  }
+  DATA_TYPE_OUTPUT res = prev_res[0];
+  unsigned int res_int = res;
+  DATA_TYPE_OUTPUT condition_check2;
+  for (int x_v = 1; x_v < end_elem; ++x_v)
+  {
+    int i1 = prev_res[x_v];
+    condition_check2 = *(input + i1) > *(input + res_int);
+    res = select(res, prev_res[x_v], condition_check2);
+  }
+  return res;
+}
+#else // !defined(PREV_OUTPUT)
+/** Find index maximum value of a vector
+ *
+ * @param[in] input Pointer to the first value.
+ *
+ * @return index of the vector.
+ */
+inline DATA_TYPE_OUTPUT arg_idx_max(__global const DATA_TYPE *input, const int x_idx)
+{
+#if WIDTH < 16
+  DATA_TYPE_OUTPUT res = 0;
+  unsigned int i1;
+  unsigned int i2;
+  DATA_TYPE_OUTPUT condition_check;
+  for (DATA_TYPE_OUTPUT x_v = res + 1; x_v < WIDTH; ++x_v)
+  {
+    i1 = x_v;
+    i2 = res;
+    condition_check = *(input + i1) > *(input + i2);
+    res = select(res, x_v, condition_check);
+  }
+  return res;
+#else  // WIDTH >= 16
+  int x_elem = x_idx * 16;
+  const int x_goback = select(0, 16 - WIDTH % 16, x_elem + 16 > WIDTH);
+  x_elem -= x_goback;
+
+  VEC_DATA_TYPE(DATA_TYPE, 16)
+  in = vload16(0, input - x_goback);
+  VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
+  res = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+  VEC_DATA_TYPE(DATA_TYPE_SELECT, 8)
+  idx_sel = (in.s01234567 >= in.s89abcdef);
+  in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel);
+  res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8));
+
+  idx_sel.s0123 =
+    (in.s0123 > in.s4567) ||
+    (in.s0123 == in.s4567 && CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
+  in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123);
+  res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4));
+
+  idx_sel.s01 =
+    (in.s01 > in.s23) ||
+    (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
+  in.s01 = select(in.s23, in.s01, idx_sel.s01);
+  res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2));
+
+  idx_sel.s0 = (in.s0 > in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), DATA_TYPE_SELECT));
+  res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int));
+
+  return res.s0 + x_elem;
+#endif // WIDTH < 16
+}
+#endif // defined(PREV_OUTPUT)
+#endif // defined(ARG_MAX)
+
+/** This kernel performs parallel reduction given an operation on x-axis.
+ *
+ * @note In case the results of previous stages are passed the flag PREV_OUTPUT has to be passed
+ * using -DPREV_OUTPUT
+ * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
+ * @note The data type of the output must be passed at compile time using -DDATA_TYPE_OUTPUT: e.g.
+ * -DDATA_TYPE_OUTPUT=uint
+ * @note The arg_max flag must be passed at compile time using -DARG_MAX if we want to compute the
+ * ArgMax
+ * @note The arg_min flag must be passed at compile time using -DARG_MIN if we want to compute the
+ * ArgMin
+ *
+ * @param[in] src_ptr                                   Pointer to the source tensor. Supported data
+ * types: S32/F16/F32
+ * @param[in] src_stride_x                              Stride of the source tensor in X dimension
+ * (in bytes)
+ * @param[in] src_step_x                                src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_y                              Stride of the source tensor in Y dimension
+ * (in bytes)
+ * @param[in] src_step_y                                src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes         The offset of the first element in the
+ * source tensor
+ * @param[in] prev_res_ptr                              (Optional) Pointer to previous results
+ * tensor. Supported data types: U32/S32
+ * @param[in] prev_res_stride_x                         (Optional) Stride of the output tensor in X
+ * dimension (in bytes)
+ * @param[in] prev_res_step_x                           (Optional) prev_res_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] prev_res_stride_y                         (Optional) Stride of the output tensor in Y
+ * dimension (in bytes)
+ * @param[in] prev_res_step_y                           (Optional) prev_res_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] prev_res_offset_first_element_in_bytes    (Optional) The offset of the first element
+ * in the previous results tensor
+ * @param[in] partial_res_ptr                           The local buffer to hold partial result
+ * values. Supported data types: U32/S32
+ * @param[in] partial_res_stride_x                      Stride of the output tensor in X dimension
+ * (in bytes)
+ * @param[in] partial_res_step_x                        partial_res_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] partial_res_stride_y                      Stride of the output tensor in Y dimension
+ * (in bytes)
+ * @param[in] partial_res_step_y                        partial_res_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the
+ * source tensor
+ * @param[in] local_results                             Local buffer for storing the partial result
+ */
+__kernel void arg_min_max_ex_x(IMAGE_DECLARATION(src),
+#if defined(PREV_OUTPUT)
+                               IMAGE_DECLARATION(prev_res),
+#endif // defined(PREV_OUTPUT)
+                               IMAGE_DECLARATION(partial_res),
+                               __local DATA_TYPE_OUTPUT *local_results)
+{
+#if defined(PREV_OUTPUT)
+  Image src = CONVERT_TO_IMAGE_STRUCT_NO_STEP(src);
+  Image prev_res = CONVERT_TO_IMAGE_STRUCT(prev_res);
+#else  // !defined(PREV_OUTPUT)
+  Image src = CONVERT_TO_IMAGE_STRUCT(src);
+#endif // defined(PREV_OUTPUT)
+  Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
+
+  unsigned int lsize = get_local_size(0);
+  unsigned int lid = get_local_id(0);
+
+  const uint x_idx = get_global_id(0);
+  const uint y_idx = get_global_id(1);
+  const __global DATA_TYPE *src_in_row =
+    (const __global DATA_TYPE *)(src_ptr + src_offset_first_element_in_bytes + y_idx * src_step_y);
+
+  for (unsigned int y = 0; y < get_local_size(1); ++y)
+  {
+#if defined(ARG_MAX)
+#if defined(PREV_OUTPUT)
+    local_results[lid] =
+      arg_idx_max_prev_out(src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
+#else  // !defined(PREV_OUTPUT)
+    local_results[lid] = arg_idx_max((__global DATA_TYPE *)offset(&src, 0, y), x_idx);
+#endif // defined(PREV_OUTPUT)
+#else  // defined(ARG_MIN)
+#if defined(PREV_OUTPUT)
+    local_results[lid] =
+      arg_idx_min_prev_out(src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
+#else  // !defined(PREV_OUTPUT)
+    local_results[lid] = arg_idx_min((__global DATA_TYPE *)offset(&src, 0, y), x_idx);
+#endif // defined(PREV_OUTPUT)
+#endif // defined(ARG_MAX) || defined(ARG_MIN)
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // Looking for the next highest power of 2 (maximum value of lsize is 8)
+    unsigned int middle = lsize - 1;
+    middle |= middle >> 1;
+    middle |= middle >> 2;
+    middle += 1;
+    // Perform parallel reduction
+    DATA_TYPE_OUTPUT condition_check3;
+    for (unsigned int i = middle; i > 0; i >>= 1)
+    {
+      if (lid < i && lid + i < lsize)
+      {
+        DATA_TYPE tmp0 = *(src_in_row + local_results[lid]);
+        DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]);
+#if defined(ARG_MAX)
+        condition_check3 =
+          ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 < tmp1);
+        local_results[lid] = select(local_results[lid], local_results[lid + i], condition_check3);
+#else  // defined(ARG_MIN)
+        local_results[lid] = select(
+          local_results[lid], local_results[lid + i],
+          ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 > tmp1));
+#endif // defined(ARG_MAX) || defined(ARG_MIN)
+      }
+      barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    if (lid == 0)
+    {
+      ((__global DATA_TYPE_OUTPUT *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
+    }
+  }
+}
+#endif // defined(WIDTH)
+
+#if defined(HEIGHT)
+/** This kernel performs reduction on y-axis.
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g.
+ * -DDATA_TYPE=float
+ * @note The data type of the output must be passed at compile time using -DDATA_TYPE_OUTPUT: e.g.
+ * -DDATA_TYPE_OUTPUT=uint
+ * @note The data type of the select results must be passed at compile time using
+ * -DDATA_TYPE_SELECT: e.g. -DDATA_TYPE_SELECT=int
+ * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
+ *
+ * @param[in] src_ptr                              Pointer to the source tensor. Supported data
+ * types: S32/F16/F32
+ * @param[in] src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in] output_ptr                           The local buffer to hold sumed values. Supported
+ * data types: U32/S32
+ * @param[in] output_stride_x                      Stride of the output tensor in X dimension (in
+ * bytes)
+ * @param[in] output_step_x                        output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_y                      Stride of the output tensor in Y dimension (in
+ * bytes)
+ * @param[in] output_step_y                        output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ */
+__kernel void arg_min_max_ex_y(IMAGE_DECLARATION(src), IMAGE_DECLARATION(output))
+{
+  Image src = CONVERT_TO_IMAGE_STRUCT(src);
+  Image output = CONVERT_TO_IMAGE_STRUCT(output);
+
+  VEC_DATA_TYPE(DATA_TYPE, 16)
+  res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE, 16));
+
+  VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
+  indx = 0;
+  for (unsigned int y = 1; y < HEIGHT; ++y)
+  {
+    VEC_DATA_TYPE(DATA_TYPE, 16)
+    in =
+      CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE, 16));
+
+    VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
+    cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16));
+    indx = select(indx, y, cond_conv);
+    res = select(res, in, CONDITION_TO_USE(in, res));
+  }
+
+  // Store result
+  vstore16(indx, 0, (__global DATA_TYPE_OUTPUT *)output.ptr);
+}
+#endif // defined(HEIGHT)
+
+#if defined(DEPTH)
+/** This kernel performs reduction on z-axis.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
+ * @note The data type of the select results must be passed at compile time using
+ * -DDATA_TYPE_SELECT: e.g. -DDATA_TYPE_SELECT=int
+ * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
+ *
+ * @param[in] input_ptr                            Pointer to the source tensor. Supported data
+ * types: S32/F16/F32
+ * @param[in] input_stride_x                       Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] input_step_x                         input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_y                       Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] input_step_y                         input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_z                       Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] input_step_z                         input_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes  The offset of the first element in the source
+ * tensor
+ * @param[in] output_ptr                           The local buffer to hold sumed values. Supported
+ * data types: U32/S32
+ * @param[in] output_stride_x                      Stride of the output tensor in X dimension (in
+ * bytes)
+ * @param[in] output_step_x                        output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_y                      Stride of the output tensor in Y dimension (in
+ * bytes)
+ * @param[in] output_step_y                        output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_z                      Stride of the output tensor in Z dimension (in
+ * bytes)
+ * @param[in] output_step_z                        output_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ */
+__kernel void arg_min_max_ex_z(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output))
+{
+  Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+  Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+
+  VEC_DATA_TYPE(DATA_TYPE, 16)
+  res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)),
+                VEC_DATA_TYPE(DATA_TYPE, 16));
+
+  VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
+  indx = 0;
+  for (DATA_TYPE_OUTPUT z = 1; z < DEPTH; ++z)
+  {
+    VEC_DATA_TYPE(DATA_TYPE, 16)
+    in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)),
+                 VEC_DATA_TYPE(DATA_TYPE, 16));
+
+    VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
+    cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16));
+    indx = select(indx, z, cond_conv);
+    res = select(res, in, CONDITION_TO_USE(in, res));
+  }
+
+  // Store result
+  vstore16(indx, 0, (__global DATA_TYPE_OUTPUT *)output.ptr);
+}
+#endif /* defined(DEPTH) */
+
+#if defined(BATCH) && defined(DEPTH)
+/** This kernel performs reduction on w-axis.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
+ * @note The data type of the select results must be passed at compile time using
+ * -DDATA_TYPE_SELECT: e.g. -DDATA_TYPE_SELECT=int
+ * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
+ * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
+ *
+ * @param[in] input_ptr                            Pointer to the source tensor. Supported data
+ * types: S32/F16/F32
+ * @param[in] input_stride_x                       Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] input_step_x                         input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_y                       Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] input_step_y                         input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_z                       Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] input_step_z                         input_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_w                       Stride of the source tensor in W dimension (in
+ * bytes)
+ * @param[in] input_step_w                         input_stride_w * number of elements along W
+ * processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes  The offset of the first element in the source
+ * tensor
+ * @param[in] output_ptr                           The local buffer to hold sumed values. Supported
+ * data types: U32/S32
+ * @param[in] output_stride_x                      Stride of the output tensor in X dimension (in
+ * bytes)
+ * @param[in] output_step_x                        output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_y                      Stride of the output tensor in Y dimension (in
+ * bytes)
+ * @param[in] output_step_y                        output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_z                      Stride of the output tensor in Z dimension (in
+ * bytes)
+ * @param[in] output_step_z                        output_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_w                      Stride of the output tensor in W dimension (in
+ * bytes)
+ * @param[in] output_step_w                        output_stride_w * number of elements along W
+ * processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ */
+__kernel void arg_min_max_ex_w(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
+{
+  Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
+  Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
+
+  VEC_DATA_TYPE(DATA_TYPE, 16)
+  res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)),
+                VEC_DATA_TYPE(DATA_TYPE, 16));
+
+  VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
+  indx = 0;
+  for (DATA_TYPE_OUTPUT w = 1; w < BATCH; ++w)
+  {
+    VEC_DATA_TYPE(DATA_TYPE, 16)
+    in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)),
+                 VEC_DATA_TYPE(DATA_TYPE, 16));
+
+    VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
+    cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16));
+    indx = select(indx, w, cond_conv);
+    res = select(res, in, CONDITION_TO_USE(in, res));
+  }
+
+  // Store result
+  vstore16(indx, 0, (__global DATA_TYPE_OUTPUT *)output.ptr);
+}
+#endif /* defined(BATCH) && defined(DEPTH) */
+#endif /* defined(DATA_TYPE_OUTPUT) && defined(DATA_TYPE_SELECT) */
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
index e249663bc..f8b5bbeb8 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
@@ -111,14 +111,14 @@ __kernel void binary_logical_op(TENSOR3D_DECLARATION(input1), TENSOR3D_DECLARATI
 #if OP_CODE == 1 // LOGICAL AND
   VSTORE(VEC_SIZE)
   (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) &&
-               VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
+             VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
            VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
    0, (__global DATA_TYPE *)output.ptr);
 
 #elif OP_CODE == 2 // LOGICAL OR
   VSTORE(VEC_SIZE)
   (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) ||
-               VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
+             VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
            VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
    0, (__global DATA_TYPE *)output.ptr);
 
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
new file mode 100644
index 000000000..3b0a175a4
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+/** This function performs a up-scaling depth conversion for boolean type input.
+ *
+ * @note The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and
+ * -DDATA_TYPE_OUT:
+ * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
+ * -DVEC_SIZE=16
+ * @note The integer shift amount value need to be passed at compile time using -DSHIFT:
+ * e.g. -DSHIFT=7
+ *
+ * @param[in]  in_ptr                            Pointer to the source image. Supported data types:
+ * U8
+ * @param[in]  in_stride_x                       Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in]  in_step_x                         in_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  in_stride_y                       Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in]  in_step_y                         in_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  in_stride_z                       Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  in_step_z                         in_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  in_offset_first_element_in_bytes  The offset of the first element in the source image
+ * @param[out] out_ptr                           Pointer to the destination image. Supported data
+ * types: U8/S8/U16/S16/U32/S32/F16/F32
+ * @param[in]  out_stride_x                      Stride of the destination image in X dimension (in
+ * bytes)
+ * @param[in]  out_step_x                        out_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  out_stride_y                      Stride of the destination image in Y dimension (in
+ * bytes)
+ * @param[in]  out_step_y                        out_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  out_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  out_step_z                        out_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  out_offset_first_element_in_bytes The offset of the first element in the destination
+ * image
+ */
+__kernel void cast_bool(TENSOR3D_DECLARATION(in), TENSOR3D_DECLARATION(out))
+{
+  // Get pixels pointer
+  Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(in);
+  Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
+
+  // Load data
+  VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE)
+  in_data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)in.ptr);
+
+  VSTORE(VEC_SIZE)
+  (CONVERT(in_data & 1, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0,
+   (__global DATA_TYPE_OUT *)out.ptr);
+}
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
index 92e5dfbee..5ebc78d23 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
@@ -117,15 +117,15 @@ __kernel void embedding_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION
   // lookup ids for based on the tensor dimensions
   int lup_id[4] = {0};
 
-  lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0)))
-                              : get_global_id(0);
-  lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1)))
-                              : get_global_id(1);
+  lup_id[0] =
+    (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0);
+  lup_id[1] =
+    (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1);
   lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2)))
                               : get_global_id(2) % DEPTH_OUT;
   lup_id[3] = (NUM_DIMS == 4)
-                  ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
-                  : get_global_id(2) / DEPTH_OUT;
+                ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
+                : get_global_id(2) / DEPTH_OUT;
 
   in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x +
             lup_id[1] * input_step_y + lup_id[2] * input_step_z + lup_id[3] * input_step_w;
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm.cl
new file mode 100644
index 000000000..9b826a2bd
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm.cl
@@ -0,0 +1,7210 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "gemm_helpers.h"
+#include "repeat.h"
+
+#if defined(M0) && defined(K0) && defined(V0) && defined(DATA_TYPE) && defined(SRC_WIDTH)
+#define INC2 (VEC_DATA_TYPE(uint, 2))(0, 1)
+#define INC3 (VEC_DATA_TYPE(uint, 3))(0, 1, 2)
+#define INC4 (VEC_DATA_TYPE(uint, 4))(0, 1, 2, 3)
+#define INC8 (VEC_DATA_TYPE(uint, 8))(0, 1, 2, 3, 4, 5, 6, 7)
+#define INC16 (VEC_DATA_TYPE(uint, 16))(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
+#define CONCAT_INC(K0) INC##K0
+#define INC(K0) CONCAT_INC(K0)
+
+#if (SRC_WIDTH % K0)
+#define BOUNDARY_CONDITION_X(x, a)                                                                \
+  ({                                                                                              \
+    a = select(                                                                                   \
+      0, a,                                                                                       \
+      CONVERT(((x * (VEC_DATA_TYPE(uint, K0))K0 + INC(K0)) < (VEC_DATA_TYPE(uint, K0))SRC_WIDTH), \
+              VEC_DATA_TYPE(DATA_TYPE, K0)));                                                     \
+  })
+#else // (SRC_WIDTH % K0)
+#define BOUNDARY_CONDITION_X(x, a) ({})
+#endif // (SRC_WIDTH % K0)
+
+/** This OpenCL kernel reshapes the lhs input matrix. The kernel splits the input matrix in blocks
+ * of size M0xK0 and stores each one (not transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The width of the input tensor must be passed at compile time using -DSRC_WIDTH (e.g.
+ * -DSRC_WIDTH=16)
+ * @note The block's dimensions (M0 and K0) must be passed at compile time using -DM0 and -DK0 (e.g.
+ * -DM0=2, -DK0=2).
+ * @note The number of M0xK0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DV0 (e.g. -DV0=2)
+ * @note Only the following values for M0, K0 and V0 are supported:
+ *                                      M0: 2,3,4,5,6,7,8
+ *                                      K0: 2,3,4,8,16
+ *                                      V0: greater than 0
+ * @note In case the input has to be reinterpreted as a 3D tensor (e.g. input of convolution layer
+ * 1x1), the following information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# HEIGHT_GEMM3D: The height of the input in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the input in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ * @note If the M0xK0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ *
+ * @param[in]  src_ptr                           Pointer to the source LHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  src_stride_x                      Stride of the source LHS tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source LHS tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source LHS tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source LHS
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  cross_plane_pad                   (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_INPUT_AS_3D)
+ */
+__kernel void gemm_reshape_lhs_matrix_nt(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                                      ,
+                                         uint cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+)
+{
+  // Block size
+#define BLOCK_SIZE ((M0) * (K0))
+
+  // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (K0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+  // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (K0) * (V0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (K0)
+#endif // defined(INTERLEAVE)
+
+  // Compute source and destination addresses
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+  // ------------------ Compute input/output addresses ---------------------------
+
+  // Compute the input address
+  __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+                              x * (uint)K0 * sizeof(DATA_TYPE) + y * (uint)M0 * src_stride_y;
+
+  // Compute the output address
+  __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+                               (x * (uint)BLOCK_SIZE * (uint)V0 * sizeof(DATA_TYPE)) +
+                               ((y / (uint)V0) * (uint)dst_stride_y) +
+                               ((y % V0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE));
+
+  // Create variables: uint zin0=0, zin1=0, zin2=0...zin(M0-1)=0;
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zin, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src_stride_z by DEPTH_GEMM3D
+
+  input_ptr += z * (uint)src_stride_z * DEPTH_GEMM3D;
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, cross_plane_pad, src_stride_y);
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  input_ptr += z * (uint)src_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  output_ptr += z * (uint)dst_stride_z;
+
+  // ---------------------------Load input values --------------------------------
+  // Load values from the LHS matrix
+  LOAD_BLOCK(M0, K0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
+  BOUNDARY_CONDITION_X(x, a0);
+#if M0 > 1
+  BOUNDARY_CONDITION_X(x, a1);
+#endif // M0 > 1
+#if M0 > 2
+  BOUNDARY_CONDITION_X(x, a2);
+#endif // M0 > 2
+#if M0 > 3
+  BOUNDARY_CONDITION_X(x, a3);
+#endif // M0 > 3
+#if M0 > 4
+  BOUNDARY_CONDITION_X(x, a4);
+#endif // M0 > 4
+#if M0 > 5
+  BOUNDARY_CONDITION_X(x, a5);
+#endif // M0 > 5
+#if M0 > 6
+  BOUNDARY_CONDITION_X(x, a6);
+#endif // M0 > 6
+#if M0 > 7
+  BOUNDARY_CONDITION_X(x, a7);
+#endif // M0 > 7
+  // ---------------------------Store output values ------------------------------
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+  STORE_BLOCK(M0, K0, DATA_TYPE, a, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+
+#if M0 == 2
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                              \
+  ({                                                                                          \
+    VEC_DATA_TYPE(DATA_TYPE, M0)                                                              \
+    res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i);                                   \
+    VSTORE(M0)                                                                                \
+    (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+  })
+#elif M0 == 3 // M0 == 3
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                              \
+  ({                                                                                          \
+    VEC_DATA_TYPE(DATA_TYPE, M0)                                                              \
+    res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i);                          \
+    VSTORE(M0)                                                                                \
+    (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+  })
+#elif M0 == 4 // M0 == 4
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                              \
+  ({                                                                                          \
+    VEC_DATA_TYPE(DATA_TYPE, M0)                                                              \
+    res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i, a3.s##i);                 \
+    VSTORE(M0)                                                                                \
+    (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE))); \
+  })
+#elif M0 == 5 // M0 == 5
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                                  \
+  ({                                                                                              \
+    VEC_DATA_TYPE(DATA_TYPE, 4)                                                                   \
+    res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i);                     \
+    DATA_TYPE res1 = a4.s##i;                                                                     \
+    VSTORE(4)                                                                                     \
+    (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)));    \
+    *((__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4) = res1; \
+  })
+#elif M0 == 6 // M0 == 6
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                                   \
+  ({                                                                                               \
+    VEC_DATA_TYPE(DATA_TYPE, 4)                                                                    \
+    res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i);                      \
+    VEC_DATA_TYPE(DATA_TYPE, 2)                                                                    \
+    res1 = (VEC_DATA_TYPE(DATA_TYPE, 2))(a4.s##i, a5.s##i);                                        \
+    VSTORE(4)                                                                                      \
+    (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)));     \
+    VSTORE(2)                                                                                      \
+    (res1, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4); \
+  })
+#elif M0 == 7 // M0 == 7
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                                   \
+  ({                                                                                               \
+    VEC_DATA_TYPE(DATA_TYPE, 4)                                                                    \
+    res0 = (VEC_DATA_TYPE(DATA_TYPE, 4))(a0.s##i, a1.s##i, a2.s##i, a3.s##i);                      \
+    VEC_DATA_TYPE(DATA_TYPE, 3)                                                                    \
+    res1 = (VEC_DATA_TYPE(DATA_TYPE, 3))(a4.s##i, a5.s##i, a6.s##i);                               \
+    VSTORE(4)                                                                                      \
+    (res0, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)));     \
+    VSTORE(3)                                                                                      \
+    (res1, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)) + 4); \
+  })
+#elif M0 == 8 // M0 == 8
+#define TRANSPOSE_COLUMN_AND_STORE(output_ptr, output_step_x, i)                               \
+  ({                                                                                           \
+    VEC_DATA_TYPE(DATA_TYPE, M0)                                                               \
+    res = (VEC_DATA_TYPE(DATA_TYPE, M0))(a0.s##i, a1.s##i, a2.s##i, a3.s##i, a4.s##i, a5.s##i, \
+                                         a6.s##i, a7.s##i);                                    \
+    VSTORE(M0)                                                                                 \
+    (res, 0, (__global DATA_TYPE *)(output_ptr + 0x##i * output_step_x * sizeof(DATA_TYPE)));  \
+  })
+#else // M0 not supported
+#error "M0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel reshapes the lhs input matrix. The kernel splits the input matrix in blocks
+ * of size M0xK0 and stores each one (transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The width of the input tensor must be passed at compile time using -DSRC_WIDTH (e.g.
+ * -DSRC_WIDTH=16)
+ * @note The block's dimensions (M0 and K0) must be passed at compile time using -DM0 and -DK0 (e.g.
+ * -DM0=2, -DK0=2).
+ * @note The number of M0xK0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DV0 (e.g. -DV0=2)
+ * @note Only the following values for M0, K0 and V0 are supported:
+ *                                      M0: 2,3,4,5,6,7,8
+ *                                      K0: 2,3,4,8,16
+ *                                      V0: greater than 0
+ * @note In case the input has to be reinterpreted as a 3D tensor (e.g. input of convolution layer
+ * 1x1), the following information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# HEIGHT_GEMM3D: The height of the input in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the input in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ * @note If the M0xK0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ *
+ * @param[in]  src_ptr                           Pointer to the source LHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  src_stride_x                      Stride of the source LHS tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source LHS tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source LHS tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source LHS
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  cross_plane_pad                   (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_INPUT_AS_3D)
+ */
+__kernel void gemm_reshape_lhs_matrix_t(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                                     ,
+                                        uint cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+)
+{
+  // Block size
+#define BLOCK_SIZE ((M0) * (K0))
+
+  // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (M0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+  // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (M0) * (V0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (M0)
+#endif // defined(INTERLEAVE)
+
+  // Compute source and destination addresses
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+  // ------------------ Compute input/output addresses ---------------------------
+
+  // Compute the input address
+  __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+                              x * (uint)K0 * sizeof(DATA_TYPE) + y * (uint)M0 * src_stride_y;
+
+  // Compute the output address
+  __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+                               (x * (uint)BLOCK_SIZE * (uint)V0 * sizeof(DATA_TYPE)) +
+                               ((y / (uint)V0) * (uint)dst_stride_y) +
+                               ((y % V0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE));
+
+  // Create variables: uint zin0=0, zin1=0, zin2=0...zin(M0-1)=0;
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zin, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src_stride_z by DEPTH_GEMM3D
+
+  input_ptr += z * (uint)src_stride_z * DEPTH_GEMM3D;
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, cross_plane_pad, src_stride_y);
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  input_ptr += z * (uint)src_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  output_ptr += z * (uint)dst_stride_z;
+
+  // ---------------------------Load input values --------------------------------
+
+  // Load values from the LHS matrix
+  LOAD_BLOCK(M0, K0, DATA_TYPE, a, input_ptr, 0, src_stride_y, zin);
+  BOUNDARY_CONDITION_X(x, a0);
+#if M0 > 1
+  BOUNDARY_CONDITION_X(x, a1);
+#endif // M0 > 1
+#if M0 > 2
+  BOUNDARY_CONDITION_X(x, a2);
+#endif // M0 > 2
+#if M0 > 3
+  BOUNDARY_CONDITION_X(x, a3);
+#endif // M0 > 3
+#if M0 > 4
+  BOUNDARY_CONDITION_X(x, a4);
+#endif // M0 > 4
+#if M0 > 5
+  BOUNDARY_CONDITION_X(x, a5);
+#endif // M0 > 5
+#if M0 > 6
+  BOUNDARY_CONDITION_X(x, a6);
+#endif // M0 > 6
+#if M0 > 7
+  BOUNDARY_CONDITION_X(x, a7);
+#endif // M0 > 7
+  // ---------------------------Transpose and store block -----------------------
+
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 0);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 1);
+#if K0 > 2
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 2);
+#endif // K0 > 2
+#if K0 > 3
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 3);
+#endif // K0 > 3
+#if K0 > 4
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 4);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 5);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 6);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 7);
+#endif // K0 > 4
+#if K0 > 8
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 8);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, 9);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, A);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, B);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, C);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, D);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, E);
+  TRANSPOSE_COLUMN_AND_STORE(output_ptr, OUTPUT_STEP_X, F);
+#endif // K0 > 8
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+#endif // defined(M0) && defined(K0) && defined(V0) && defined(DATA_TYPE) && defined(SRC_WIDTH)
+
+#if defined(K0) && defined(N0) && defined(H0) && defined(DATA_TYPE) && defined(SRC_HEIGHT)
+/** This OpenCL kernel reshapes the rhs input matrix. The kernel splits the input matrix in blocks
+ * of size K0xN0 and stores each one (not transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The height of the input tensor must be passed at compile time using -DSRC_HEIGHT (e.g.
+ * -DSRC_HEIGHT=16)
+ * @note The block's dimensions (K0 and N0) must be passed at compile time using -DK0 and -DN0 (e.g.
+ * -DK0=2, -DN0=2).
+ * @note The number of K0xN0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ * @note Only the following values for K0, N0 and H0 are supported:
+ *                                      N0: 2,3,4,8,16
+ *                                      K0: 1,2,3,4,8,16
+ *                                      H0: greater than 0
+ *
+ * @param[in]  src_ptr                           Pointer to the source RHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  src_stride_x                      Stride of the source RHS tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source RHS tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source RHS tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source RHS
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_reshape_rhs_matrix_nt(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Block size
+#define BLOCK_SIZE ((K0) * (N0))
+
+  // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (N0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+  // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (N0) * (H0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (N0)
+#endif // defined(INTERLEAVE)
+
+  // Compute source and destination addresses
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+  // ------------------ Compute input/output addresses ---------------------------
+
+  // Compute the input address
+  __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+                              x * (uint)N0 * sizeof(DATA_TYPE) + y * (uint)K0 * src_stride_y +
+                              z * (uint)src_stride_z;
+
+  // Compute the output address
+  __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+                               (y * (uint)BLOCK_SIZE * (uint)H0 * sizeof(DATA_TYPE)) +
+                               ((x % (uint)H0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE)) +
+                               ((x / (uint)H0) * (uint)dst_stride_y) + z * (uint)dst_stride_z;
+
+  // ---------------------------Load input values --------------------------------
+
+  REPEAT_VAR_INIT_TO_CONST(K0, VEC_DATA_TYPE(DATA_TYPE, N0), a,
+                           0); ////uint a0=0, a1=0, a2=0...a(M0-1)=0;
+
+  // Load values from the RHS matrix
+  a0 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 0 * src_stride_y));
+#if K0 > 1
+  if (y * (uint)K0 + 1 < SRC_HEIGHT)
+  {
+    a1 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 1 * src_stride_y));
+  }
+#endif // K0 > 1
+#if K0 > 2
+  if (y * (uint)K0 + 2 < SRC_HEIGHT)
+  {
+    a2 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 2 * src_stride_y));
+  }
+#endif // K0 > 2
+#if K0 > 3
+  if (y * (uint)K0 + 3 < SRC_HEIGHT)
+  {
+    a3 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 3 * src_stride_y));
+  }
+#endif // K0 > 3
+#if K0 > 4
+  if (y * (uint)K0 + 4 < SRC_HEIGHT)
+  {
+    a4 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 4 * src_stride_y));
+  }
+  if (y * (uint)K0 + 5 < SRC_HEIGHT)
+  {
+    a5 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 5 * src_stride_y));
+  }
+  if (y * (uint)K0 + 6 < SRC_HEIGHT)
+  {
+    a6 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 6 * src_stride_y));
+  }
+  if (y * (uint)K0 + 7 < SRC_HEIGHT)
+  {
+    a7 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 7 * src_stride_y));
+  }
+#endif // K0 > 4
+#if K0 > 8
+  if (y * (uint)K0 + 8 < SRC_HEIGHT)
+  {
+    a8 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 8 * src_stride_y));
+  }
+  if (y * (uint)K0 + 9 < SRC_HEIGHT)
+  {
+    a9 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 9 * src_stride_y));
+  }
+  if (y * (uint)K0 + 10 < SRC_HEIGHT)
+  {
+    aA = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 10 * src_stride_y));
+  }
+  if (y * (uint)K0 + 11 < SRC_HEIGHT)
+  {
+    aB = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 11 * src_stride_y));
+  }
+  if (y * (uint)K0 + 12 < SRC_HEIGHT)
+  {
+    aC = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 12 * src_stride_y));
+  }
+  if (y * (uint)K0 + 13 < SRC_HEIGHT)
+  {
+    aD = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 13 * src_stride_y));
+  }
+  if (y * (uint)K0 + 14 < SRC_HEIGHT)
+  {
+    aE = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 14 * src_stride_y));
+  }
+  if (y * (uint)K0 + 15 < SRC_HEIGHT)
+  {
+    aF = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 15 * src_stride_y));
+  }
+#endif // K0 > 8
+
+  // ---------------------------Store output values ------------------------------
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+  STORE_BLOCK(K0, N0, DATA_TYPE, a, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+
+#if defined(TRANSPOSE)
+/** This OpenCL kernel reshapes the rhs input matrix. The kernel splits the input matrix in blocks
+ * of size K0xN0 and stores each one (transposed) in the output matrix unrolling the values.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The height of the input tensor must be passed at compile time using -DSRC_HEIGHT (e.g.
+ * -DSRC_HEIGHT=16)
+ * @note The block's dimensions (K0 and N0) must be passed at compile time using -DK0 and -DN0 (e.g.
+ * -DK0=2, -DN0=2).
+ * @note The number of K0xN0 vertical blocks to store on the same output row must be passed at
+ * compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile
+ * time.
+ * @note The option -DTRANSPOSE must passed at compile time.
+ * @note Only the following values for K0, N0 and H0 are supported:
+ *                                      N0: 2,3,4,8,16
+ *                                      K0: 2,3,4,8,16
+ *                                      H0: greater than 0
+ *
+ * @param[in]  src_ptr                           Pointer to the source RHS tensor. Supported data
+ * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  src_stride_x                      Stride of the source RHS tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source RHS tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source RHS tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source RHS
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_reshape_rhs_matrix_t(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Block size
+#define BLOCK_SIZE ((K0) * (N0))
+
+  // Output offset X
+#if defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (K0)
+#else // defined(INTERLEAVE)
+#define OUTPUT_OFFSET_X (BLOCK_SIZE)
+#endif // defined(INTERLEAVE)
+
+  // Output step X
+#if defined(INTERLEAVE)
+#define OUTPUT_STEP_X (K0) * (H0)
+#else // Do not interleave
+#define OUTPUT_STEP_X (K0)
+#endif // defined(INTERLEAVE)
+
+  // Compute source and destination addresses
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+  // ------------------ Compute input/output addresses ---------------------------
+
+  // Compute the input address
+  __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes +
+                              x * (uint)N0 * sizeof(DATA_TYPE) + y * (uint)K0 * src_stride_y +
+                              z * (uint)src_stride_z;
+
+  // Compute the output address
+  __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes +
+                               (y * (uint)BLOCK_SIZE * (uint)H0 * sizeof(DATA_TYPE)) +
+                               ((x % H0) * (uint)OUTPUT_OFFSET_X * sizeof(DATA_TYPE)) +
+                               ((x / (uint)H0) * (uint)dst_stride_y) + z * (uint)dst_stride_z;
+
+  // ---------------------------Load input values --------------------------------
+  REPEAT_VAR_INIT_TO_CONST(K0, VEC_DATA_TYPE(DATA_TYPE, N0), a,
+                           0); // VEC_DATA_TYPE(DATA_TYPE, N0)    a0=0, a1=0, ... a(K0-1)=0;
+
+  // Load values from the RHS matrix
+  a0 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 0 * src_stride_y));
+  if (y * (uint)K0 + 1 < SRC_HEIGHT)
+  {
+    a1 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 1 * src_stride_y));
+  }
+#if K0 > 2
+  if (y * (uint)K0 + 2 < SRC_HEIGHT)
+  {
+    a2 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 2 * src_stride_y));
+  }
+#endif // K0 > 2
+#if K0 > 3
+  if (y * (uint)K0 + 3 < SRC_HEIGHT)
+  {
+    a3 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 3 * src_stride_y));
+  }
+#endif // K0 > 3
+#if K0 > 4
+  if (y * (uint)K0 + 4 < SRC_HEIGHT)
+  {
+    a4 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 4 * src_stride_y));
+  }
+  if (y * (uint)K0 + 5 < SRC_HEIGHT)
+  {
+    a5 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 5 * src_stride_y));
+  }
+  if (y * (uint)K0 + 6 < SRC_HEIGHT)
+  {
+    a6 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 6 * src_stride_y));
+  }
+  if (y * (uint)K0 + 7 < SRC_HEIGHT)
+  {
+    a7 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 7 * src_stride_y));
+  }
+#endif // K0 > 4
+#if K0 > 8
+  if (y * (uint)K0 + 8 < SRC_HEIGHT)
+  {
+    a8 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 8 * src_stride_y));
+  }
+  if (y * (uint)K0 + 9 < SRC_HEIGHT)
+  {
+    a9 = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 9 * src_stride_y));
+  }
+  if (y * (uint)K0 + 10 < SRC_HEIGHT)
+  {
+    aA = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 10 * src_stride_y));
+  }
+  if (y * (uint)K0 + 11 < SRC_HEIGHT)
+  {
+    aB = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 11 * src_stride_y));
+  }
+  if (y * (uint)K0 + 12 < SRC_HEIGHT)
+  {
+    aC = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 12 * src_stride_y));
+  }
+  if (y * (uint)K0 + 13 < SRC_HEIGHT)
+  {
+    aD = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 13 * src_stride_y));
+  }
+  if (y * (uint)K0 + 14 < SRC_HEIGHT)
+  {
+    aE = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 14 * src_stride_y));
+  }
+  if (y * (uint)K0 + 15 < SRC_HEIGHT)
+  {
+    aF = VLOAD(N0)(0, (__global DATA_TYPE *)(input_ptr + 15 * src_stride_y));
+  }
+#endif // K0 > 8
+
+  // ---------------------------Transpose the block ------------------------------
+  REPEAT_VAR_INIT_TO_CONST(
+    N0, VEC_DATA_TYPE(DATA_TYPE, K0), res,
+    0); // VEC_DATA_TYPE(DATA_TYPE, K0)    res0=0, res1=0, res2=0,... res(N0-1)=0;
+
+#if K0 == 2
+  // This part computes the following transpositions:
+  // 2x2 -> 2x2
+  // 2x4 -> 4x2
+  // 2x8 -> 8x2
+  // 2x16 -> 16x2
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF);
+#endif // N0 > 8
+
+#elif K0 == 3 // K0 == 2
+  // This part computes the following transpositions:
+  // 3x2 -> 2x3
+  // 3x4 -> 4x3
+  // 3x8 -> 8x3
+  // 3x16 -> 16x3
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF);
+#endif // N0 > 8
+
+#elif K0 == 4 // K0 == 4
+  // This part computes the following transpositions:
+  // 4x2 -> 2x4
+  // 4x4 -> 4x4
+  // 4x8 -> 8x4
+  // 4x16 -> 16x4
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF);
+#endif // N0 > 8
+
+#elif K0 == 8 // K0 == 8
+  // This part computes the following transpositions:
+  // 8x2 -> 2x8
+  // 8x4 -> 4x8
+  // 8x8 -> 8x8
+  // 8x16 -> 16x8
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0, a4.s0, a5.s0, a6.s0, a7.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1, a4.s1, a5.s1, a6.s1, a7.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2, a4.s2, a5.s2, a6.s2, a7.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3, a4.s3, a5.s3, a6.s3, a7.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4, a4.s4, a5.s4, a6.s4, a7.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5, a4.s5, a5.s5, a6.s5, a7.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6, a4.s6, a5.s6, a6.s6, a7.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7, a4.s7, a5.s7, a6.s7, a7.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8, a4.s8, a5.s8, a6.s8, a7.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9, a4.s9, a5.s9, a6.s9, a7.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA, a4.sA, a5.sA, a6.sA, a7.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB, a4.sB, a5.sB, a6.sB, a7.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC, a4.sC, a5.sC, a6.sC, a7.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD, a4.sD, a5.sD, a6.sD, a7.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE, a4.sE, a5.sE, a6.sE, a7.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF, a4.sF, a5.sF, a6.sF, a7.sF);
+#endif // N0 > 8
+
+#elif K0 == 16 // K0 == 16
+
+  // This part computes the following transpositions:
+  // 16x2 -> 2x16
+  // 16x4 -> 4x16
+  // 16x8 -> 8x16
+  // 16x16 -> 16x16
+  res0 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s0, a1.s0, a2.s0, a3.s0, a4.s0, a5.s0, a6.s0, a7.s0,
+                                        a8.s0, a9.s0, aA.s0, aB.s0, aC.s0, aD.s0, aE.s0, aF.s0);
+  res1 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s1, a1.s1, a2.s1, a3.s1, a4.s1, a5.s1, a6.s1, a7.s1,
+                                        a8.s1, a9.s1, aA.s1, aB.s1, aC.s1, aD.s1, aE.s1, aF.s1);
+#if N0 > 2
+  res2 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s2, a1.s2, a2.s2, a3.s2, a4.s2, a5.s2, a6.s2, a7.s2,
+                                        a8.s2, a9.s2, aA.s2, aB.s2, aC.s2, aD.s2, aE.s2, aF.s2);
+#endif // N0 > 2
+#if N0 > 3
+  res3 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s3, a1.s3, a2.s3, a3.s3, a4.s3, a5.s3, a6.s3, a7.s3,
+                                        a8.s3, a9.s3, aA.s3, aB.s3, aC.s3, aD.s3, aE.s3, aF.s3);
+#endif // N0 > 3
+#if N0 > 4
+  res4 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s4, a1.s4, a2.s4, a3.s4, a4.s4, a5.s4, a6.s4, a7.s4,
+                                        a8.s4, a9.s4, aA.s4, aB.s4, aC.s4, aD.s4, aE.s4, aF.s4);
+  res5 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s5, a1.s5, a2.s5, a3.s5, a4.s5, a5.s5, a6.s5, a7.s5,
+                                        a8.s5, a9.s5, aA.s5, aB.s5, aC.s5, aD.s5, aE.s5, aF.s5);
+  res6 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s6, a1.s6, a2.s6, a3.s6, a4.s6, a5.s6, a6.s6, a7.s6,
+                                        a8.s6, a9.s6, aA.s6, aB.s6, aC.s6, aD.s6, aE.s6, aF.s6);
+  res7 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s7, a1.s7, a2.s7, a3.s7, a4.s7, a5.s7, a6.s7, a7.s7,
+                                        a8.s7, a9.s7, aA.s7, aB.s7, aC.s7, aD.s7, aE.s7, aF.s7);
+#endif // N0 > 4
+#if N0 > 8
+  res8 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s8, a1.s8, a2.s8, a3.s8, a4.s8, a5.s8, a6.s8, a7.s8,
+                                        a8.s8, a9.s8, aA.s8, aB.s8, aC.s8, aD.s8, aE.s8, aF.s8);
+  res9 = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.s9, a1.s9, a2.s9, a3.s9, a4.s9, a5.s9, a6.s9, a7.s9,
+                                        a8.s9, a9.s9, aA.s9, aB.s9, aC.s9, aD.s9, aE.s9, aF.s9);
+  resA = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sA, a1.sA, a2.sA, a3.sA, a4.sA, a5.sA, a6.sA, a7.sA,
+                                        a8.sA, a9.sA, aA.sA, aB.sA, aC.sA, aD.sA, aE.sA, aF.sA);
+  resB = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sB, a1.sB, a2.sB, a3.sB, a4.sB, a5.sB, a6.sB, a7.sB,
+                                        a8.sB, a9.sB, aA.sB, aB.sB, aC.sB, aD.sB, aE.sB, aF.sB);
+  resC = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sC, a1.sC, a2.sC, a3.sC, a4.sC, a5.sC, a6.sC, a7.sC,
+                                        a8.sC, a9.sC, aA.sC, aB.sC, aC.sC, aD.sC, aE.sC, aF.sC);
+  resD = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sD, a1.sD, a2.sD, a3.sD, a4.sD, a5.sD, a6.sD, a7.sD,
+                                        a8.sD, a9.sD, aA.sD, aB.sD, aC.sD, aD.sD, aE.sD, aF.sD);
+  resE = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sE, a1.sE, a2.sE, a3.sE, a4.sE, a5.sE, a6.sE, a7.sE,
+                                        a8.sE, a9.sE, aA.sE, aB.sE, aC.sE, aD.sE, aE.sE, aF.sE);
+  resF = (VEC_DATA_TYPE(DATA_TYPE, K0))(a0.sF, a1.sF, a2.sF, a3.sF, a4.sF, a5.sF, a6.sF, a7.sF,
+                                        a8.sF, a9.sF, aA.sF, aB.sF, aC.sF, aD.sF, aE.sF, aF.sF);
+#endif // N0 > 8
+
+#else // N0 == 16
+#error "Not supported N0 value"
+#endif // N0 > 2
+
+  // ---------------------------Store the output values ------------------------------
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zout, 0);
+  STORE_BLOCK(N0, K0, DATA_TYPE, res, output_ptr, OUTPUT_STEP_X * sizeof(DATA_TYPE), zout);
+
+#undef BLOCK_SIZE
+#undef OUTPUT_OFFSET_X
+#undef OUTPUT_STEP_X
+}
+#endif // defined(TRANSPOSE)
+#endif // defined(K0) && defined(N0) && defined(H0) && defined(DATA_TYPE) && defined(SRC_HEIGHT)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) && \
+  defined(M) && defined(N) && defined(K)
+
+#define CONCAT(a, b) a##b
+
+#define ARM_DOT1(a, b, c) ({ c = fma(a, b, c); })
+#define ARM_DOT2(a, b, c)   \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+  })
+#define ARM_DOT3(a, b, c)       \
+  ({                            \
+    ARM_DOT2(a, b, c);          \
+    c = fma((a.s2), (b.s2), c); \
+  })
+#define ARM_DOT4(a, b, c)       \
+  ({                            \
+    ARM_DOT3(a, b, c);          \
+    c = fma((a.s3), (b.s3), c); \
+  })
+#define ARM_DOT8(a, b, c)        \
+  ({                             \
+    ARM_DOT4((a.lo), (b.lo), c); \
+    ARM_DOT4((a.hi), (b.hi), c); \
+  })
+#define ARM_DOT16(a, b, c)       \
+  ({                             \
+    ARM_DOT8((a.lo), (b.lo), c); \
+    ARM_DOT8((a.hi), (b.hi), c); \
+  })
+
+#if N0 == 2
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+  })
+#elif N0 == 3 // N0 == 3
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##2), (c.s2));         \
+  })
+#elif N0 == 4 // N0 == 4
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##2), (c.s2));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##3), (c.s3));         \
+  })
+#elif N0 == 8 // N0 == 8
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##2), (c.s2));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##3), (c.s3));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##4), (c.s4));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##5), (c.s5));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##6), (c.s6));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##7), (c.s7));         \
+  })
+#elif N0 == 16 // N0 == 16
+#define ARM_DOT_K0XN0(k0, a, b, c) \
+  ({                               \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##0), (c.s0));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##1), (c.s1));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##2), (c.s2));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##3), (c.s3));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##4), (c.s4));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##5), (c.s5));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##6), (c.s6));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##7), (c.s7));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##8), (c.s8));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##9), (c.s9));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##A), (c.sA));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##B), (c.sB));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##C), (c.sC));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##D), (c.sD));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##E), (c.sE));         \
+    CONCAT(ARM_DOT, k0)            \
+    ((a), (b##F), (c.sF));         \
+  })
+#else // N0 not supported
+#error "N0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is transposed
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (e.g. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (e.g. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in]  lhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in]  lhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                       Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  lhs_cross_plane_pad                (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                                          IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                                          IMAGE_DECLARATION(dst), uint lhs_stride_z,
+                                          uint rhs_stride_z,
+#if defined(BETA)
+                                          uint bias_stride_z,
+#endif // defined(BETA)
+                                          uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                          ,
+                                          uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                          ,
+                                          uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS reshaped matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes +
+                    (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+                    (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zlhs0=0,zlhs1=0,zlhs2=0,... zlhs7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+  int i = 0;
+  for (; i <= (K - K0); i += K0)
+  {
+    // Supported cases (M0, K0):
+    // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+    // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+    // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+    // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+    // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+    // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+    // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+    // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS reshaped matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+    // Accumulate
+    ARM_DOT_K0XN0(K0, a0, b, c0);
+#if M0 > 1
+    ARM_DOT_K0XN0(K0, a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+    ARM_DOT_K0XN0(K0, a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+    ARM_DOT_K0XN0(K0, a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+    ARM_DOT_K0XN0(K0, a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+    ARM_DOT_K0XN0(K0, a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+    ARM_DOT_K0XN0(K0, a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+    ARM_DOT_K0XN0(K0, a7, b, c7);
+#endif // M0 > 7
+
+    lhs_offset += K0 * sizeof(DATA_TYPE);
+    rhs_offset += (N0 * RHS_STEP_X * RHS_STEP_LOOP) * sizeof(DATA_TYPE);
+  }
+
+  // Left-over accumulations
+  for (; i < K; ++i)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, 1, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS reshaped matrix
+    LOAD_BLOCK(N0, 1, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+    // Accumulate
+    ARM_DOT_K0XN0(1, a0, b, c0);
+#if M0 > 1
+    ARM_DOT_K0XN0(1, a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+    ARM_DOT_K0XN0(1, a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+    ARM_DOT_K0XN0(1, a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+    ARM_DOT_K0XN0(1, a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+    ARM_DOT_K0XN0(1, a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+    ARM_DOT_K0XN0(1, a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+    ARM_DOT_K0XN0(1, a7, b, c7);
+#endif // M0 > 7
+
+    lhs_offset += sizeof(DATA_TYPE);
+    rhs_offset += sizeof(DATA_TYPE);
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                              (get_global_id(1) * (uint)M0 * bias_stride_y) +
+                              get_global_id(2) * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#define VFMA(a, b, c) ({ c = fma(a, b, c); })
+
+#if M0 == 1
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+  })
+#elif M0 == 2 // M0 == 2
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+  })
+#elif M0 == 3 // M0 == 3
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+  })
+#elif M0 == 4 // M0 == 4
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+  })
+#elif M0 == 5 // M0 == 5
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4));                                \
+  })
+#elif M0 == 6 // M0 == 6
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5));                                \
+  })
+#elif M0 == 7 // M0 == 7
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6));                                \
+  })
+#elif M0 == 8 // M0 == 8
+#define LD_RHS_VFMA_M0xN0(i, a, c)                                                               \
+  ({                                                                                             \
+    VEC_DATA_TYPE(DATA_TYPE, N0)                                                                 \
+    b = VLOAD(N0)(                                                                               \
+      0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0x##i * RHS_STEP_X * sizeof(DATA_TYPE))); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6));                                \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##7).s##i), b, (c##7));                                \
+  })
+#else // M0 not supported
+#error "M0 not supported"
+#endif // M0 not supported
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is NOT transposed
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90).
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (e.g. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in]  lhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in]  lhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                       Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  lhs_cross_plane_pad                (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                                           IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                                           IMAGE_DECLARATION(dst), uint lhs_stride_z,
+                                           uint rhs_stride_z,
+#if defined(BETA)
+                                           uint bias_stride_z,
+#endif // defined(BETA)
+                                           uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                           ,
+                                           uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                           ,
+                                           uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (N0)
+#define RHS_STEP_X ((N0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (N0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS reshaped matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes +
+                    (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+                    (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zin, 0);   // uint zin0=0,zin1=0,zin2=0,... zin7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0); // uint zero0=0,zero1=0,zero2=0,... zero7=0;
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zin, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+  int i = 0;
+  for (; i <= (K - K0); i += K0)
+  {
+    // Supported cases (M0, K0):
+    // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+    // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+    // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+    // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+    // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+    // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+    // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+    // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zin);
+
+    LD_RHS_VFMA_M0xN0(0, a, c);
+    LD_RHS_VFMA_M0xN0(1, a, c);
+#if K0 > 2
+    LD_RHS_VFMA_M0xN0(2, a, c);
+#endif // K0 > 2
+#if K0 > 3
+    LD_RHS_VFMA_M0xN0(3, a, c);
+#endif // K0 > 3
+#if K0 > 4
+    LD_RHS_VFMA_M0xN0(4, a, c);
+    LD_RHS_VFMA_M0xN0(5, a, c);
+    LD_RHS_VFMA_M0xN0(6, a, c);
+    LD_RHS_VFMA_M0xN0(7, a, c);
+#endif // K0 > 4
+#if K0 > 8
+    LD_RHS_VFMA_M0xN0(8, a, c);
+    LD_RHS_VFMA_M0xN0(9, a, c);
+    LD_RHS_VFMA_M0xN0(A, a, c);
+    LD_RHS_VFMA_M0xN0(B, a, c);
+    LD_RHS_VFMA_M0xN0(C, a, c);
+    LD_RHS_VFMA_M0xN0(D, a, c);
+    LD_RHS_VFMA_M0xN0(E, a, c);
+    LD_RHS_VFMA_M0xN0(F, a, c);
+#endif // K0 > 8
+
+    lhs_offset += K0 * sizeof(DATA_TYPE);
+    rhs_offset += K0 * RHS_STEP_X * RHS_STEP_LOOP * sizeof(DATA_TYPE);
+  }
+
+  // Left-over accumulations
+  for (; i < K; ++i)
+  {
+    // Load values from LHS matrix
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a0 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 0 * lhs_stride_y + zin0));
+#if M0 > 1
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a1 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 1 * lhs_stride_y + zin1));
+#endif // M0 > 1
+#if M0 > 2
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a2 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 2 * lhs_stride_y + zin2));
+#endif // M0 > 2
+#if M0 > 3
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a3 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 3 * lhs_stride_y + zin3));
+#endif // M0 > 3
+#if M0 > 4
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a4 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 4 * lhs_stride_y + zin4));
+#endif // M0 > 4
+#if M0 > 5
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a5 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 5 * lhs_stride_y + zin5));
+#endif // M0 > 5
+#if M0 > 6
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a6 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 6 * lhs_stride_y + zin6));
+#endif // M0 > 6
+#if M0 > 7
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a7 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 7 * lhs_stride_y + zin7));
+#endif // M0 > 7
+
+    LD_RHS_VFMA_M0xN0(0, a, c);
+
+    lhs_offset += sizeof(DATA_TYPE);
+    rhs_offset += RHS_STEP_X * sizeof(DATA_TYPE);
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                              (get_global_id(1) * (uint)M0 * bias_stride_y) +
+                              get_global_id(2) * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) &&
+       // defined(M) && defined(N) && defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && \
+  defined(DATA_TYPE) && defined(DATA_TYPE_ACCUMULATOR) && defined(M) && defined(N)
+
+#if defined(MIXED_PRECISION)
+#if K0 == 2
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+  })
+#elif K0 == 3 // K0 == 3
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+    c += a.s2 * b.s2;       \
+  })
+#elif K0 == 4 // K0 == 4
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+    c += a.s2 * b.s2;       \
+    c += a.s3 * b.s3;       \
+  })
+#elif K0 == 8 // K0 == 8
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+    c += a.s2 * b.s2;       \
+    c += a.s3 * b.s3;       \
+    c += a.s4 * b.s4;       \
+    c += a.s5 * b.s5;       \
+    c += a.s6 * b.s6;       \
+    c += a.s7 * b.s7;       \
+  })
+#elif K0 == 16 // K0 == 16
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c += a.s0 * b.s0;       \
+    c += a.s1 * b.s1;       \
+    c += a.s2 * b.s2;       \
+    c += a.s3 * b.s3;       \
+    c += a.s4 * b.s4;       \
+    c += a.s5 * b.s5;       \
+    c += a.s6 * b.s6;       \
+    c += a.s7 * b.s7;       \
+    c += a.s8 * b.s8;       \
+    c += a.s9 * b.s9;       \
+    c += a.sA * b.sA;       \
+    c += a.sB * b.sB;       \
+    c += a.sC * b.sC;       \
+    c += a.sD * b.sD;       \
+    c += a.sE * b.sE;       \
+    c += a.sF * b.sF;       \
+  })
+#else // K0 not supported
+#error "K0 value not supported"
+#endif // K0 conditions
+#else  // defined(MIXED_PRECISION)
+#if K0 == 2
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+  })
+#elif K0 == 3 // K0 == 3
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+    c = fma(a.s2, b.s2, c); \
+  })
+#elif K0 == 4 // K0 == 4
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+    c = fma(a.s2, b.s2, c); \
+    c = fma(a.s3, b.s3, c); \
+  })
+#elif K0 == 8 // K0 == 8
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+    c = fma(a.s2, b.s2, c); \
+    c = fma(a.s3, b.s3, c); \
+    c = fma(a.s4, b.s4, c); \
+    c = fma(a.s5, b.s5, c); \
+    c = fma(a.s6, b.s6, c); \
+    c = fma(a.s7, b.s7, c); \
+  })
+#elif K0 == 16 // K0 == 16
+#define ARM_DOT_K0(a, b, c) \
+  ({                        \
+    c = fma(a.s0, b.s0, c); \
+    c = fma(a.s1, b.s1, c); \
+    c = fma(a.s2, b.s2, c); \
+    c = fma(a.s3, b.s3, c); \
+    c = fma(a.s4, b.s4, c); \
+    c = fma(a.s5, b.s5, c); \
+    c = fma(a.s6, b.s6, c); \
+    c = fma(a.s7, b.s7, c); \
+    c = fma(a.s8, b.s8, c); \
+    c = fma(a.s9, b.s9, c); \
+    c = fma(a.sA, b.sA, c); \
+    c = fma(a.sB, b.sB, c); \
+    c = fma(a.sC, b.sC, c); \
+    c = fma(a.sD, b.sD, c); \
+    c = fma(a.sE, b.sE, c); \
+    c = fma(a.sF, b.sF, c); \
+  })
+#else // K0 not supported
+#error "K0 value not supported"
+#endif // K0 conditions
+#endif // defined(MIXED_PRECISION)
+
+#if N0 == 2
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+  })
+#elif N0 == 3 // N0 == 3
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+    ARM_DOT_K0((a), (b##2), (c.s2)); \
+  })
+#elif N0 == 4 // N0 == 4
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+    ARM_DOT_K0((a), (b##2), (c.s2)); \
+    ARM_DOT_K0((a), (b##3), (c.s3)); \
+  })
+#elif N0 == 8 // N0 == 8
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+    ARM_DOT_K0((a), (b##2), (c.s2)); \
+    ARM_DOT_K0((a), (b##3), (c.s3)); \
+    ARM_DOT_K0((a), (b##4), (c.s4)); \
+    ARM_DOT_K0((a), (b##5), (c.s5)); \
+    ARM_DOT_K0((a), (b##6), (c.s6)); \
+    ARM_DOT_K0((a), (b##7), (c.s7)); \
+  })
+#elif N0 == 16 // N0 == 16
+#define ARM_DOT_K0XN0(a, b, c)       \
+  ({                                 \
+    ARM_DOT_K0((a), (b##0), (c.s0)); \
+    ARM_DOT_K0((a), (b##1), (c.s1)); \
+    ARM_DOT_K0((a), (b##2), (c.s2)); \
+    ARM_DOT_K0((a), (b##3), (c.s3)); \
+    ARM_DOT_K0((a), (b##4), (c.s4)); \
+    ARM_DOT_K0((a), (b##5), (c.s5)); \
+    ARM_DOT_K0((a), (b##6), (c.s6)); \
+    ARM_DOT_K0((a), (b##7), (c.s7)); \
+    ARM_DOT_K0((a), (b##8), (c.s8)); \
+    ARM_DOT_K0((a), (b##9), (c.s9)); \
+    ARM_DOT_K0((a), (b##A), (c.sA)); \
+    ARM_DOT_K0((a), (b##B), (c.sB)); \
+    ARM_DOT_K0((a), (b##C), (c.sC)); \
+    ARM_DOT_K0((a), (b##D), (c.sD)); \
+    ARM_DOT_K0((a), (b##E), (c.sE)); \
+    ARM_DOT_K0((a), (b##F), (c.sF)); \
+  })
+#else // N0 not supported
+#error "N0 value not supported"
+#endif // N0 conditions
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix must be reshaped with @ref CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be NOT
+ * transposed The RHS matrix must be reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0
+ * must be transposed
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The data type used for the accumulators must be passed at compile time using
+ * -DDATA_TYPE_ACCUMULATOR (e.g. -DDATA_TYPE_ACCUMULATOR=float)
+ * @note The F16 computation also supports mixed precision through the option -DMIXED_PRECISION
+ * passed at compile time. If enabled, DATA_TYPE_ACCUMULATOR should be set to float
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (e.g. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (e.g. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (e.g. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - V0 >= 1
+ *  - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS reshaped matrix. Supported data
+ * type: F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS
+ * reshaped matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  k                                  Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in]  lhs_stride_z                       Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                                            IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                                            IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+                                            uint rhs_stride_z,
+#if defined(BETA)
+                                            uint bias_stride_z,
+#endif // defined(BETA)
+                                            uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                            ,
+                                            uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (K0)
+#define LHS_STEP_X ((K0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (K0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((get_global_id(0) * N0 >= N) || (get_global_id(1) * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  __global uchar *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+                             (get_global_id(1) % V0) * (uint)LHS_OFFSET_X * sizeof(DATA_TYPE) +
+                             (get_global_id(1) / V0) * (uint)lhs_stride_y +
+                             (get_global_id(2) * lhs_stride_z);
+
+  // Compute RHS matrix address
+  __global uchar *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+                             (get_global_id(0) % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+                             (get_global_id(0) / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_addr += (get_global_id(2) % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_addr += get_global_id(2) * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0), c, 0);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zlhs, 0); // uint zlhs0=0,zlhs1=0,zlhs2=0,... zlhs7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+  for (int i = 0; i < k; i += K0)
+  {
+    // Supported cases (M0, K0):
+    // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+    // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+    // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+    // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+    // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+    // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+    // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+    // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_addr, 0, LHS_STEP_X * sizeof(DATA_TYPE), zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_addr, 0, RHS_STEP_X * sizeof(DATA_TYPE), zero);
+
+    // Accumulate
+    ARM_DOT_K0XN0(a0, b, c0);
+#if M0 > 1
+    ARM_DOT_K0XN0(a1, b, c1);
+#endif // M0 > 1
+#if M0 > 2
+    ARM_DOT_K0XN0(a2, b, c2);
+#endif // M0 > 2
+#if M0 > 3
+    ARM_DOT_K0XN0(a3, b, c3);
+#endif // M0 > 3
+#if M0 > 4
+    ARM_DOT_K0XN0(a4, b, c4);
+#endif // M0 > 4
+#if M0 > 5
+    ARM_DOT_K0XN0(a5, b, c5);
+#endif // M0 > 5
+#if M0 > 6
+    ARM_DOT_K0XN0(a6, b, c6);
+#endif // M0 > 6
+#if M0 > 7
+    ARM_DOT_K0XN0(a7, b, c7);
+#endif // M0 > 7
+
+    lhs_addr += (M0 * LHS_STEP_X * LHS_STEP_LOOP) * sizeof(DATA_TYPE);
+    rhs_addr += (N0 * RHS_STEP_X * RHS_STEP_LOOP) * sizeof(DATA_TYPE);
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                             (get_global_id(1) * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, get_global_id(1), HEIGHT_GEMM3D, DEPTH_GEMM3D,
+                     dst_cross_plane_pad, dst_stride_y);
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += get_global_id(2) * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += get_global_id(2) * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+#if defined(MIXED_PRECISION)
+  CONVERT_BLOCK(1, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+  ADD_BLOCK_BROADCAST(M0, c, bias_hp0);
+#else  // defined(MIXED_PRECISION)
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+#endif // defined(MIXED_PRECISION)
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                              (get_global_id(1) * (uint)M0 * bias_stride_y) +
+                              get_global_id(2) * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+#if defined(MIXED_PRECISION)
+  CONVERT_BLOCK(M0, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+  ADD_BLOCK(M0, c, bias_hp);
+#else  // defined(MIXED_PRECISION)
+  ADD_BLOCK(M0, c, bias);
+#endif // defined(MIXED_PRECISION)
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+#if defined(MIXED_PRECISION)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+#else  // defined(MIXED_PRECISION)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(MIXED_PRECISION)
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+#if defined(MIXED_PRECISION)
+  CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#else  // defined(MIXED_PRECISION)
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#endif // defined(MIXED_PRECISION)
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#if defined(LHS_TRANSPOSE)
+
+#define VTYPE(TYPE, SIZE) VEC_DATA_TYPE(TYPE, SIZE)
+
+#if defined(MIXED_PRECISION)
+
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+#define ARM_VFMA(N0, a, b, c)                                   \
+  c += (CONVERT(a, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))) * \
+       (CONVERT(b, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0)));
+#else // GPU_ARCH == GPU_ARCH_MIDGARD
+#define ARM_VFMA(N0, a, b, c)                                     \
+  c = fma((CONVERT(a, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))), \
+          (CONVERT(b, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0))), (c));
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+#else // defined(MIXED_PRECISION
+
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+#define ARM_VFMA(N0, a, b, c) c += (a) * (b);
+#else // GPU_ARCH == GPU_ARCH_MIDGARD
+#define ARM_VFMA(N0, a, b, c) c = fma((a), (b), (c));
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+#endif // defined(MIXED_PRECISION)
+
+#define ARM_VVM_T_NT_1xN0x1(N0, TYPE, a, b, C) ({ ARM_VFMA(N0, (VTYPE(TYPE, N0))(a), b, (C##0)); })
+#define ARM_VVM_T_NT_2xN0x1(N0, TYPE, a, b, C)        \
+  ({                                                  \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s0), b, (C##0)); \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s1), b, (C##1)); \
+  })
+#define ARM_VVM_T_NT_3xN0x1(N0, TYPE, a, b, C)        \
+  ({                                                  \
+    ARM_VVM_T_NT_2xN0x1(N0, TYPE, a, b, C);           \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s2), b, (C##2)); \
+  })
+#define ARM_VVM_T_NT_4xN0x1(N0, TYPE, a, b, C)        \
+  ({                                                  \
+    ARM_VVM_T_NT_3xN0x1(N0, TYPE, a, b, C);           \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s3), b, (C##3)); \
+  })
+#define ARM_VVM_T_NT_8xN0x1(N0, TYPE, a, b, C)        \
+  ({                                                  \
+    ARM_VVM_T_NT_4xN0x1(N0, TYPE, a, b, C);           \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s4), b, (C##4)); \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s5), b, (C##5)); \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s6), b, (C##6)); \
+    ARM_VFMA(N0, (VTYPE(TYPE, N0))(a.s7), b, (C##7)); \
+  })
+
+// Factory macro for the column-vector (transposed) by row-vector (not transposed) multiplication.
+// K0 = 1 a is the column-vector (transposed) b is the row-vector (not transposed) C is the output
+// matrix Lower case is a vector (a, b) Upper case is a matrix (C)
+#define ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, a, b, C) ARM_VVM_T_NT_##M0##xN0x1(N0, TYPE, a, b, C)
+
+#define ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, A, B, C) \
+  ({ ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##0), (B##0), C); })
+#define ARM_MM_T_NT_M0xN0x2(M0, N0, TYPE, A, B, C)         \
+  ({                                                       \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, A, B, C);            \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##1), (B##1), C); \
+  })
+#define ARM_MM_T_NT_M0xN0x3(M0, N0, TYPE, A, B, C)         \
+  ({                                                       \
+    ARM_MM_T_NT_M0xN0x2(M0, N0, TYPE, A, B, C);            \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##2), (B##2), C); \
+  })
+#define ARM_MM_T_NT_M0xN0x4(M0, N0, TYPE, A, B, C)         \
+  ({                                                       \
+    ARM_MM_T_NT_M0xN0x3(M0, N0, TYPE, A, B, C);            \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##3), (B##3), C); \
+  })
+#define ARM_MM_T_NT_M0xN0x8(M0, N0, TYPE, A, B, C)         \
+  ({                                                       \
+    ARM_MM_T_NT_M0xN0x4(M0, N0, TYPE, A, B, C);            \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##4), (B##4), C); \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##5), (B##5), C); \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##6), (B##6), C); \
+    ARM_VVM_T_NT_M0xN0x1(M0, N0, TYPE, (A##7), (B##7), C); \
+  })
+#define ARM_MM_T_NT_M0xN0x16(M0, N0, TYPE, A, B, C)       \
+  ({                                                      \
+    ARM_MM_T_NT_M0xN0x8(M0, N0, TYPE, A, B, C);           \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##8), (B##8), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##9), (B##9), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##A), (B##A), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##B), (B##B), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##C), (B##C), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##D), (B##D), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##E), (B##E), C); \
+    ARM_MM_T_NT_M0xN0x1(M0, N0, TYPE, (A##F), (B##F), C); \
+  })
+
+// Factory macro for the matrix (transposed) by matrix (not transposed) multiplication.
+// The dimensions for this matrix multiplications are defined through M0, N0 and K0
+// The dimensions supported are:
+// M0: 1, 2, 3, 4, 8
+// N0: 1, 2, 3, 4, 8, 16
+// K0: 1, 2, 3, 4, 8, 16
+// This macro calls the vector-by-matrix macro K0 times
+// A, B and C are matrices
+#define ARM_MM_T_NT(M0, N0, K0, TYPE, A, B, C) \
+  CONCAT(ARM_MM_T_NT_M0xN0x, K0)               \
+  (M0, N0, TYPE, A, B, C)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix must be reshaped with @ref CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be
+ * transposed The RHS matrix must be reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0
+ * must be NOT transposed
+ *
+ * @note LHS_TRANSPOSE should be passed at compile time in order to compile this OpenCL kernel (e.g.
+ * -DLHS_TRANSPOSE).
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (e.g. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (e.g. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (e.g. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (e.g. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 2, 3, 4, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - V0 >= 1
+ *  - H0 >= 1
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS reshaped matrix. Supported data
+ * type: F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS
+ * reshaped matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                         src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS
+ * reshaped matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  k                                  Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in]  lhs_stride_z                       Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                                            IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                                            IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+                                            uint rhs_stride_z,
+#if defined(BETA)
+                                            uint bias_stride_z,
+#endif // defined(BETA)
+                                            uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                            ,
+                                            uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (M0)
+#define LHS_STEP_X ((M0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (M0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (N0)
+#define RHS_STEP_X ((N0) * (H0))
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (N0)
+#endif // defined(RHS_INTERLEAVE)
+
+  const uint x = get_global_id(0);
+  const uint y = get_global_id(1);
+  const uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  __global uchar *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+                             (y % V0) * (uint)LHS_OFFSET_X * sizeof(DATA_TYPE) +
+                             (y / V0) * (uint)lhs_stride_y + (z * lhs_stride_z);
+
+  // Compute RHS matrix address
+  __global uchar *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+                             (x % H0) * (uint)RHS_OFFSET_X * sizeof(DATA_TYPE) +
+                             (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_addr += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_addr += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE_ACCUMULATOR, N0), c, 0);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zero, 0);
+
+  __global DATA_TYPE *lhs = (__global DATA_TYPE *)(lhs_addr);
+  __global DATA_TYPE *rhs = (__global DATA_TYPE *)(rhs_addr);
+
+  for (int i = 0; i < k; i += K0)
+  {
+    VEC_DATA_TYPE(DATA_TYPE, M0)
+    a0 = VLOAD(M0)(0, lhs);
+    VEC_DATA_TYPE(DATA_TYPE, N0)
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+#if K0 > 1
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 1
+
+#if K0 > 2
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 2
+
+#if K0 > 3
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 3
+
+#if K0 > 4
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 4
+
+#if K0 > 8
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+
+    a0 = VLOAD(M0)(0, lhs);
+    b0 = VLOAD(N0)(0, rhs);
+
+    ARM_MM_T_NT(M0, N0, 1, DATA_TYPE, a, b, c);
+
+    lhs += LHS_STEP_X;
+    rhs += RHS_STEP_X;
+#endif // K0 > 8
+
+#ifndef LHS_INTERLEAVE
+    lhs += (M0 * K0 * (V0 - 1));
+#endif // LHS_INTERLEAVE
+
+#ifndef RHS_INTERLEAVE
+    rhs += (N0 * K0 * (H0 - 1));
+#endif // RHS_INTERLEAVE
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr =
+    bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+#if defined(MIXED_PRECISION)
+  CONVERT_BLOCK(1, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+  ADD_BLOCK_BROADCAST(M0, c, bias_hp0);
+#else  // defined(MIXED_PRECISION)
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+#endif // defined(MIXED_PRECISION)
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * bias_stride_y) +
+                              z * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+#if defined(MIXED_PRECISION)
+  CONVERT_BLOCK(M0, N0, DATA_TYPE_ACCUMULATOR, bias, bias_hp);
+  ADD_BLOCK(M0, c, bias_hp);
+#else  // defined(MIXED_PRECISION)
+  ADD_BLOCK(M0, c, bias);
+#endif // defined(MIXED_PRECISION)
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+#if defined(MIXED_PRECISION)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE_ACCUMULATOR, c, A_VAL, B_VAL);
+#else  // defined(MIXED_PRECISION)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(MIXED_PRECISION)
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+#if defined(MIXED_PRECISION)
+  CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#else  // defined(MIXED_PRECISION)
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+#endif // defined(MIXED_PRECISION)
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#endif // defined(LHS_TRANSPOSE)
+
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(K) &&
+       // defined(DATA_TYPE)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE)
+
+#define VFMA(a, b, c) ({ c = fma(a, b, c); })
+
+#if M0 == 1
+#define RHS_VFMA_M0xN0(i, a, b, c) \
+  ({ VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); })
+#elif M0 == 2 // M0 == 2
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+  })
+#elif M0 == 3 // M0 == 3
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+  })
+#elif M0 == 4 // M0 == 4
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+  })
+#elif M0 == 5 // M0 == 5
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+  })
+#elif M0 == 6 // M0 == 6
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+  })
+#elif M0 == 7 // M0 == 7
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6)); \
+  })
+#elif M0 == 8 // M0 == 8
+#define RHS_VFMA_M0xN0(i, a, b, c)                                \
+  ({                                                              \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##0).s##i), b, (c##0)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##1).s##i), b, (c##1)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##2).s##i), b, (c##2)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##3).s##i), b, (c##3)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##4).s##i), b, (c##4)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##5).s##i), b, (c##5)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##6).s##i), b, (c##6)); \
+    VFMA((VEC_DATA_TYPE(DATA_TYPE, N0))((a##7).s##i), b, (c##7)); \
+  })
+#else // M0 not supported
+#error "M0 not supported"
+#endif // M0 not supported
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS matrix is NOT reshaped
+ *
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions (M,N and K) must be passed at compile time using -DM, -DN and and -DK
+ * (e.g. -DM=52, -DN=30 and -DK=90)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (e.g. -DK=64)
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (e.g. -DM0=2)
+ * @note The number of K0 partial accumulations must be passed at compile time using -DK0 (e.g.,
+ * -DK0=2)
+ * @note The number of N0 columns to process must be passed at compile time using -DN0 (e.g. -DN0=2)
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                            Pointer to the LHS matrix. Supported data type:
+ * F16/F32
+ * @param[in]  lhs_stride_x                       Stride of the LHS matrix in X dimension (in bytes)
+ * @param[in]  lhs_step_x                         lhs_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                       Stride of the LHS matrix in Y dimension (in bytes)
+ * @param[in]  lhs_step_y                         lhs_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes  The offset of the first element in the LHS matrix
+ * @param[in]  rhs_ptr                            Pointer to the RHS matrix. Supported data type:
+ * same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                       Stride of the RHS matrix in X dimension (in bytes)
+ * @param[in]  rhs_step_x                         rhs_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                       Stride of the RHS matrix in Y dimension (in bytes)
+ * @param[in]  rhs_step_y                         rhs_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes  The offset of the first element in the RHS matrix
+ * @param[in]  bias_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  bias_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  bias_step_x                        (Optional) bias_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  bias_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  bias_step_y                        (Optional) bias_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  bias_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                       Stride of the LHS matrix in Z dimension (in bytes)
+ * @param[in]  rhs_stride_z                       Stride of the RHS matrix in Z dimension (in bytes)
+ * @param[in]  bias_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  lhs_cross_plane_pad                (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings for the output matrix
+ * in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_native(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+#if defined(BETA)
+                             IMAGE_DECLARATION(bias),
+#endif // defined(BETA)
+                             IMAGE_DECLARATION(dst), uint lhs_stride_z, uint rhs_stride_z,
+#if defined(BETA)
+                             uint bias_stride_z,
+#endif // defined(BETA)
+                             uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                             ,
+                             uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                             ,
+                             uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes + x * N0 * sizeof(DATA_TYPE);
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zlhs, 0);
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zero, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+  int i = 0;
+  for (; i <= (K - K0); i += K0)
+  {
+    // Supported cases (M0, K0):
+    // 1,2 - 1,3 - 1,4 - 1,8 - 1,16
+    // 2,2 - 2,3 - 2,4 - 2,8 - 2,16
+    // 3,2 - 3,3 - 3,4 - 3,8 - 3,16
+    // 4,2 - 4,3 - 4,4 - 4,8 - 4,16
+    // 5,2 - 5,3 - 5,4 - 5,8 - 5,16
+    // 6,2 - 6,3 - 6,4 - 6,8 - 6,16
+    // 7,2 - 7,3 - 7,4 - 7,8 - 7,16
+    // 8,2 - 8,3 - 8,4 - 8,8 - 8,16
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(K0, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zero);
+
+    RHS_VFMA_M0xN0(0, a, b0, c);
+    RHS_VFMA_M0xN0(1, a, b1, c);
+#if K0 > 2
+    RHS_VFMA_M0xN0(2, a, b2, c);
+#endif // K0 > 2
+#if K0 > 3
+    RHS_VFMA_M0xN0(3, a, b3, c);
+#endif // K0 > 3
+#if K0 > 4
+    RHS_VFMA_M0xN0(4, a, b4, c);
+    RHS_VFMA_M0xN0(5, a, b5, c);
+    RHS_VFMA_M0xN0(6, a, b6, c);
+    RHS_VFMA_M0xN0(7, a, b7, c);
+#endif // K0 > 4
+#if K0 > 8
+    RHS_VFMA_M0xN0(8, a, b8, c);
+    RHS_VFMA_M0xN0(9, a, b9, c);
+    RHS_VFMA_M0xN0(A, a, bA, c);
+    RHS_VFMA_M0xN0(B, a, bB, c);
+    RHS_VFMA_M0xN0(C, a, bC, c);
+    RHS_VFMA_M0xN0(D, a, bD, c);
+    RHS_VFMA_M0xN0(E, a, bE, c);
+    RHS_VFMA_M0xN0(F, a, bF, c);
+#endif // K0 > 8
+
+    lhs_offset += K0 * sizeof(DATA_TYPE);
+    rhs_offset += K0 * rhs_stride_y;
+  }
+
+  // Left-over accumulations
+  for (; i < K; ++i)
+  {
+    // Load values from LHS matrix
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a0 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 0 * lhs_stride_y + zlhs0));
+#if M0 > 1
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a1 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 1 * lhs_stride_y + zlhs1));
+#endif // M0 > 1
+#if M0 > 2
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a2 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 2 * lhs_stride_y + zlhs2));
+#endif // M0 > 2
+#if M0 > 3
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a3 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 3 * lhs_stride_y + zlhs3));
+#endif // M0 > 3
+#if M0 > 4
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a4 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 4 * lhs_stride_y + zlhs4));
+#endif // M0 > 4
+#if M0 > 5
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a5 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 5 * lhs_stride_y + zlhs5));
+#endif // M0 > 5
+#if M0 > 6
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a6 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 6 * lhs_stride_y + zlhs6));
+#endif // M0 > 6
+#if M0 > 7
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a7 = *((__global DATA_TYPE *)(lhs_ptr + lhs_offset + 7 * lhs_stride_y + zlhs7));
+#endif // M0 > 7
+
+    VEC_DATA_TYPE(DATA_TYPE, N0)
+    b = VLOAD(N0)(0, (__global DATA_TYPE *)(rhs_ptr + rhs_offset + 0 * rhs_stride_y));
+    RHS_VFMA_M0xN0(0, a, b, c);
+
+    lhs_offset += sizeof(DATA_TYPE);
+    rhs_offset += rhs_stride_y;
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(M0, DATA_TYPE, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+#if defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(M0, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes +
+                              (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) +
+                              (get_global_id(1) * (uint)M0 * bias_stride_y) +
+                              get_global_id(2) * bias_stride_z;
+
+  LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(M0, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(M0, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+  STORE_BLOCK(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(K) && defined(DATA_TYPE)
+
+#if defined(COLS_B) && defined(MULT_TRANSPOSE1XW_WIDTH) && defined(MULT_INTERLEAVE4X4_HEIGHT)
+/** This OpenCL kernel is optimised for Midgard. It computes the matrix multiplication between
+ * matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f32(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                 IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                 IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                 uint src1_stride_z,
+#if defined(BETA)
+                                                 uint src2_stride_z,
+#endif // defined(BETA)
+                                                 uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                 ,
+                                                 uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 4;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global float *src_addr_a = (__global float *)(src0_ptr + src0_addr_in_bytes);
+  __global float *src_addr_b = (__global float *)(src1_ptr + src1_addr_in_bytes);
+
+  // Compute end row address for matrix B
+  __global float *src_end_addr_b = src_addr_b + COLS_B;
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  float4 c0 = 0.0f;
+  float4 c1 = 0.0f;
+  float4 c2 = 0.0f;
+  float4 c3 = 0.0f;
+
+  for (; src_addr_b <= (src_end_addr_b - (int)(8 * MULT_TRANSPOSE1XW_WIDTH));
+       src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = vload4(0, src_addr_a);
+    float4 b0 = vload4(0, src_addr_b);
+
+    c0 += (float4)a0.s0 * b0;
+    c1 += (float4)a0.s1 * b0;
+    c2 += (float4)a0.s2 * b0;
+    c3 += (float4)a0.s3 * b0;
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT);
+    b0 = vload4(0, src_addr_b + 4 * MULT_TRANSPOSE1XW_WIDTH);
+
+    c0 += (float4)a0.s0 * b0;
+    c1 += (float4)a0.s1 * b0;
+    c2 += (float4)a0.s2 * b0;
+    c3 += (float4)a0.s3 * b0;
+  }
+
+  for (; src_addr_b < src_end_addr_b;
+       src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = vload4(0, src_addr_a);
+    float4 b0 = vload4(0, src_addr_b);
+
+    c0 += (float4)a0.s0 * b0;
+    c1 += (float4)a0.s1 * b0;
+    c2 += (float4)a0.s2 * b0;
+    c3 += (float4)a0.s3 * b0;
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+  LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x4 block
+  vstore4(c0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore4(c1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore4(c2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore4(c3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel is optimized for Bifrost and tt computes the matrix multiplication between
+ * matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f32_bifrost(IMAGE_DECLARATION(src0),
+                                                         IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                         IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                         IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                         uint src1_stride_z,
+#if defined(BETA)
+                                                         uint src2_stride_z,
+#endif // defined(BETA)
+                                                         uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                         ,
+                                                         uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 4;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global float *src_addr_a = (__global float *)(src0_ptr + src0_addr_in_bytes);
+  __global float *src_addr_b = (__global float *)(src1_ptr + src1_addr_in_bytes);
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  float4 c0 = 0.0f;
+  float4 c1 = 0.0f;
+  float4 c2 = 0.0f;
+  float4 c3 = 0.0f;
+
+#define COLS_MTX_B (COLS_B / (4 * MULT_TRANSPOSE1XW_WIDTH))
+
+  int i = 0;
+  for (; i <= (int)(COLS_MTX_B - 4); i += 4)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = vload4(0, src_addr_a);
+    float4 b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+  }
+
+  for (; i < (int)(COLS_MTX_B); ++i)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = vload4(0, src_addr_a);
+    float4 b0 = vload4(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 4 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0.s0 = fma(a0.s0, b0.s0, c0.s0);
+    c0.s1 = fma(a0.s0, b0.s1, c0.s1);
+    c0.s2 = fma(a0.s0, b0.s2, c0.s2);
+    c0.s3 = fma(a0.s0, b0.s3, c0.s3);
+
+    c1.s0 = fma(a0.s1, b0.s0, c1.s0);
+    c1.s1 = fma(a0.s1, b0.s1, c1.s1);
+    c1.s2 = fma(a0.s1, b0.s2, c1.s2);
+    c1.s3 = fma(a0.s1, b0.s3, c1.s3);
+
+    c2.s0 = fma(a0.s2, b0.s0, c2.s0);
+    c2.s1 = fma(a0.s2, b0.s1, c2.s1);
+    c2.s2 = fma(a0.s2, b0.s2, c2.s2);
+    c2.s3 = fma(a0.s2, b0.s3, c2.s3);
+
+    c3.s0 = fma(a0.s3, b0.s0, c3.s0);
+    c3.s1 = fma(a0.s3, b0.s1, c3.s1);
+    c3.s2 = fma(a0.s3, b0.s2, c3.s2);
+    c3.s3 = fma(a0.s3, b0.s3, c3.s3);
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+  LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, float, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x4 block
+  vstore4(c0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore4(c1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore4(c2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore4(c3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+// Undefine local defines
+#undef COLS_MTX_B
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel computes the matrix multiplication between matrix A reshaped (src0) and
+ * matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                 IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                 IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                 uint src1_stride_z,
+#if defined(BETA)
+                                                 uint src2_stride_z,
+#endif // defined(BETA)
+                                                 uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                 ,
+                                                 uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+  __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+  // Compute end row address for matrix B
+  __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  half8 c0 = 0.0f;
+  half8 c1 = 0.0f;
+  half8 c2 = 0.0f;
+  half8 c3 = 0.0f;
+
+  for (; src_addr_b <= (src_end_addr_b - (int)(16 * MULT_TRANSPOSE1XW_WIDTH));
+       src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 16 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    half4 a0 = vload4(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    c0 += (half8)a0.s0 * b0;
+    c1 += (half8)a0.s1 * b0;
+    c2 += (half8)a0.s2 * b0;
+    c3 += (half8)a0.s3 * b0;
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT);
+    b0 = vload8(0, src_addr_b + 8 * MULT_TRANSPOSE1XW_WIDTH);
+
+    c0 += (half8)a0.s0 * b0;
+    c1 += (half8)a0.s1 * b0;
+    c2 += (half8)a0.s2 * b0;
+    c3 += (half8)a0.s3 * b0;
+  }
+
+  for (; src_addr_b < src_end_addr_b;
+       src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    half4 a0 = vload4(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    c0 += (half8)a0.s0 * b0;
+    c1 += (half8)a0.s1 * b0;
+    c2 += (half8)a0.s2 * b0;
+    c3 += (half8)a0.s3 * b0;
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, half, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x8 block
+  vstore8(c0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore8(c1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore8(c2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore8(c3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel computes the matrix multiplication between matrix A reshaped (src0) and
+ * matrix B reshaped (src1) while accumulating the result in a 32 floating point variable.
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16_acc32(IMAGE_DECLARATION(src0),
+                                                       IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                       IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                       IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                       uint src1_stride_z,
+#if defined(BETA)
+                                                       uint src2_stride_z,
+#endif // defined(BETA)
+                                                       uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                       ,
+                                                       uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+  __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+  // Compute end row address for matrix B
+  __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  float8 c0 = 0.0f;
+  float8 c1 = 0.0f;
+  float8 c2 = 0.0f;
+  float8 c3 = 0.0f;
+
+  for (; src_addr_b <= (src_end_addr_b - (int)(16 * MULT_TRANSPOSE1XW_WIDTH));
+       src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 16 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = convert_float4(vload4(0, src_addr_a));
+    float8 b0 = convert_float8(vload8(0, src_addr_b));
+
+    c0 += (float8)a0.s0 * b0;
+    c1 += (float8)a0.s1 * b0;
+    c2 += (float8)a0.s2 * b0;
+    c3 += (float8)a0.s3 * b0;
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = convert_float4(vload4(0, src_addr_a + 4 * MULT_INTERLEAVE4X4_HEIGHT));
+    b0 = convert_float8(vload8(0, src_addr_b + 8 * MULT_TRANSPOSE1XW_WIDTH));
+
+    c0 += (float8)a0.s0 * b0;
+    c1 += (float8)a0.s1 * b0;
+    c2 += (float8)a0.s2 * b0;
+    c3 += (float8)a0.s3 * b0;
+  }
+
+  for (; src_addr_b < src_end_addr_b;
+       src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT, src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    float4 a0 = convert_float4(vload4(0, src_addr_a));
+    float8 b0 = convert_float8(vload8(0, src_addr_b));
+
+    c0 += (float8)a0.s0 * b0;
+    c1 += (float8)a0.s1 * b0;
+    c2 += (float8)a0.s2 * b0;
+    c3 += (float8)a0.s3 * b0;
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, float, c, ALPHA);
+#endif // defined(ALPHA)
+
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+  float8 bias_f0 = convert_float8(bias0);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias_f0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+  float8 bias_f0 = convert_float8(bias0);
+  float8 bias_f1 = convert_float8(bias1);
+  float8 bias_f2 = convert_float8(bias2);
+  float8 bias_f3 = convert_float8(bias3);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias_f);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+  half8 c_h0 = convert_half8(c0);
+  half8 c_h1 = convert_half8(c1);
+  half8 c_h2 = convert_half8(c2);
+  half8 c_h3 = convert_half8(c3);
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c_h, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x8 block
+  vstore8(c_h0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore8(c_h1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore8(c_h2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore8(c_h3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+/** This OpenCL kernel optimized for Bifrost architectures computes the matrix multiplication
+ * between matrix A reshaped (src0) and matrix B reshaped (src1)
+ *
+ * @note The number of columns of matrix B and the optional alpha's value need to be passed at
+ * compile time using -DCOLS_B and -DALPHA
+ * @note The multiplication factor for the transposition width (mult_transpose1xW_width) must be
+ * passed at compile time using -DMULT_TRANSPOSE1XW_WIDTH (e.g. -DMULT_TRANSPOSE1XW_WIDTH=2)
+ * @note The multiplication factor for the height of the 4x4 interleaved block must be passed at
+ * compile time using -DMULT_INTERLEAVE4X4_HEIGHT (e.g. -DMULT_INTERLEAVE4X4_HEIGHT=2)
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the output has to be reinterpreted as a 3D tensor (e.g. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  cross_plane_pad                    (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_interleaved_transposed_f16_bifrost(IMAGE_DECLARATION(src0),
+                                                         IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                         IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                         IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                         uint src1_stride_z,
+#if defined(BETA)
+                                                         uint src2_stride_z,
+#endif // defined(BETA)
+                                                         uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                         ,
+                                                         uint cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int x = get_global_id(0) / MULT_TRANSPOSE1XW_WIDTH;
+  int y = get_global_id(1) / MULT_INTERLEAVE4X4_HEIGHT;
+  int z = get_global_id(2);
+
+  // Offset
+  const int offset_row_a = (get_global_id(1) % MULT_INTERLEAVE4X4_HEIGHT) * 4;
+  const int offset_row_b = (get_global_id(0) % MULT_TRANSPOSE1XW_WIDTH) * 8;
+
+  // src_addr_a = address of matrix A
+  // src_addr_b = address of matrix B
+  int src0_addr_in_bytes =
+    z * src0_stride_z + y * src0_stride_y + src0_offset_first_element_in_bytes;
+  int src1_addr_in_bytes = x * src1_stride_y + src1_offset_first_element_in_bytes;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src1_addr_in_bytes += (z % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src1_addr_in_bytes += z * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  __global half *src_addr_a = (__global half *)(src0_ptr + src0_addr_in_bytes);
+  __global half *src_addr_b = (__global half *)(src1_ptr + src1_addr_in_bytes);
+
+  // Compute end row address for matrix B
+  __global half *src_end_addr_b = src_addr_b + COLS_B;
+
+  src_addr_a += offset_row_a;
+  src_addr_b += offset_row_b;
+
+  // Reset accumulators
+  half8 c0 = 0.0f;
+  half8 c1 = 0.0f;
+  half8 c2 = 0.0f;
+  half8 c3 = 0.0f;
+
+#define COLS_MTX_B (COLS_B / (8 * MULT_TRANSPOSE1XW_WIDTH))
+
+  int i = 0;
+  for (; i <= (int)(COLS_MTX_B - 4); i += 4)
+  {
+#if MULT_INTERLEAVE4X4_HEIGHT == 1
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    half8 a0 = vload8(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix B (transposed)
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s4, b0, c0);
+    c1 = fma((half8)a0.s5, b0, c1);
+    c2 = fma((half8)a0.s6, b0, c2);
+    c3 = fma((half8)a0.s7, b0, c3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload8(0, src_addr_a);
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 8 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix B (transposed)
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s4, b0, c0);
+    c1 = fma((half8)a0.s5, b0, c1);
+    c2 = fma((half8)a0.s6, b0, c2);
+    c3 = fma((half8)a0.s7, b0, c3);
+#else  // MULT_INTERLEAVE4X4_HEIGHT == 1
+       // Load values from matrix A (interleaved) and matrix B (transposed)
+    half4 a0 = vload4(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    a0 = vload4(0, src_addr_a);
+    b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+#endif // MULT_INTERLEAVE4X4_HEIGHT == 1
+  }
+
+  for (; i < (int)(COLS_MTX_B); ++i)
+  {
+    // Load values from matrix A (interleaved) and matrix B (transposed)
+    half4 a0 = vload4(0, src_addr_a);
+    half8 b0 = vload8(0, src_addr_b);
+
+    src_addr_a += 4 * MULT_INTERLEAVE4X4_HEIGHT;
+    src_addr_b += 8 * MULT_TRANSPOSE1XW_WIDTH;
+
+    c0 = fma((half8)a0.s0, b0, c0);
+    c1 = fma((half8)a0.s1, b0, c1);
+    c2 = fma((half8)a0.s2, b0, c2);
+    c3 = fma((half8)a0.s3, b0, c3);
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * 4) by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * 4)) / (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(4, half, c, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(4, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(4, c, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)4 * src2_stride_y) + get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(4, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(4, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(4, c, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(4, ACTIVATION_TYPE, half, c, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store 4x8 block
+  vstore8(c0, 0, (__global half *)(dst_addr + 0 * dst_stride_y + zout.s0));
+  vstore8(c1, 0, (__global half *)(dst_addr + 1 * dst_stride_y + zout.s1));
+  vstore8(c2, 0, (__global half *)(dst_addr + 2 * dst_stride_y + zout.s2));
+  vstore8(c3, 0, (__global half *)(dst_addr + 3 * dst_stride_y + zout.s3));
+}
+
+// Undefine local defines
+#undef COLS_MTX_B
+
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+
+#endif // defined(COLS_B) && defined(MULT_TRANSPOSE1XW_WIDTH) && defined(MULT_INTERLEAVE4X4_HEIGHT)
+
+#if defined(COLS_A) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && \
+  (NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+#if defined(DATA_TYPE)
+#define VECTOR_TYPE VEC_DATA_TYPE(DATA_TYPE, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped.
+ *
+ * @note This OpenCL kernel works with floating point data types (F16/F32)
+ * @note The floating point data type must be passed at compile time using -DDATA_TYPE (e.g.
+ * -DDATA_TYPE=float)
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y
+ * @note The number of matrix A columns and the optional alpha's value need to be passed at compile
+ * time using -DCOLS_A and -DALPHA
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16/F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the output tensor (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                     IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                     IMAGE_DECLARATION(dst), uint src0_stride_z, uint src1_stride_z,
+#if defined(BETA)
+                                     uint src2_stride_z,
+#endif // defined(BETA)
+                                     uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                     ,
+                                     uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                     ,
+                                     uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and Matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for the matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for the matrix B
+  src_addr.s1 += idx * sizeof(DATA_TYPE);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  int end_row_vec_a = src_addr.s0 + (COLS_A * sizeof(DATA_TYPE));
+
+  VECTOR_TYPE acc0 = 0.0f;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  VECTOR_TYPE acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  VECTOR_TYPE acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  VECTOR_TYPE acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  for (; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(DATA_TYPE));
+       src_addr += (int2)(2 * sizeof(DATA_TYPE), 2 * src1_stride_y))
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 2, DATA_TYPE, a, src0_ptr, src_addr.s0,
+               src0_stride_y, zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+      // Load values from matrix A
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a0 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a1 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a2 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    VEC_DATA_TYPE(DATA_TYPE, 2)
+    a3 = vload2(0, (__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    VECTOR_TYPE b0 =
+      VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1));
+    VECTOR_TYPE b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(
+      0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1 + src1_stride_y));
+
+    // Accumulate
+    acc0 += b0 * (VECTOR_TYPE)a0.s0;
+    acc0 += b1 * (VECTOR_TYPE)a0.s1;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 += b0 * (VECTOR_TYPE)a1.s0;
+    acc1 += b1 * (VECTOR_TYPE)a1.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 += b0 * (VECTOR_TYPE)a2.s0;
+    acc2 += b1 * (VECTOR_TYPE)a2.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 += b0 * (VECTOR_TYPE)a3.s0;
+    acc3 += b1 * (VECTOR_TYPE)a3.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  }
+
+  for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(sizeof(DATA_TYPE), src1_stride_y))
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    DATA_TYPE a0 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    DATA_TYPE a1 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    DATA_TYPE a2 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    DATA_TYPE a3 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    DATA_TYPE a0 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    DATA_TYPE a1 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    DATA_TYPE a2 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    DATA_TYPE a3 = *((__global DATA_TYPE *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    VECTOR_TYPE b0 =
+      VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global DATA_TYPE *)(src1_ptr + src_addr.s1));
+
+    // Accumulate
+    acc0 += b0 * (VECTOR_TYPE)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 += b0 * (VECTOR_TYPE)a1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 += b0 * (VECTOR_TYPE)a2;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 += b0 * (VECTOR_TYPE)a3;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, DATA_TYPE, acc, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes +
+    (get_global_id(0) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_X * sizeof(DATA_TYPE));
+
+  LOAD_BLOCK(1, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, bias, src2_addr, 0, src2_stride_y,
+             zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes +
+    (get_global_id(0) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_X * sizeof(DATA_TYPE)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, bias,
+             src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, DATA_TYPE, bias, BETA);
+#endif // UNIT_BIAS
+
+  // c = c + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, DATA_TYPE, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store output block
+  STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, NUM_ELEMS_PROCESSED_PER_THREAD_X, DATA_TYPE, acc,
+              dst_addr, dst_stride_y, zout.s);
+}
+#endif // defined(DATA_TYPE)
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped
+ *
+ * @note This OpenCL kernel works with the 32-bit floating point data type (float) and uses the fma
+ * units.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f32_bifrost(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                 IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                 IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                 uint src1_stride_z,
+#if defined(BETA)
+                                                 uint src2_stride_z,
+#endif // defined(BETA)
+                                                 uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                 ,
+                                                 uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                 ,
+                                                 uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for matrix B
+  src_addr.s1 += idx * sizeof(float);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  // Initialize accumulators
+  float4 acc0 = 0.0f;
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  float4 acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  float4 acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  float4 acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  // A and B src indices get incremented at the same time.
+  int i = 0;
+  for (; i <= ((int)COLS_A - 4); i += 4)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A and matrix B
+    LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, float, a, src0_ptr, src_addr.s0, src0_stride_y,
+               zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+      // Load values from matrix A and matrix B
+    float4 a0 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float4 a1 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float4 a2 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float4 a3 = vload4(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s0, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0.s0, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0.s0, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0.s0, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+    acc1.s0 = fma(a1.s0, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1.s0, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1.s0, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1.s0, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+    acc2.s0 = fma(a2.s0, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2.s0, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2.s0, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2.s0, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    acc3.s0 = fma(a3.s0, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3.s0, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3.s0, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3.s0, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    // Load values from matrix A and matrix B
+    b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s1, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0.s1, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0.s1, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0.s1, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+    acc1.s0 = fma(a1.s1, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1.s1, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1.s1, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1.s1, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+    acc2.s0 = fma(a2.s1, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2.s1, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2.s1, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2.s1, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    acc3.s0 = fma(a3.s1, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3.s1, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3.s1, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3.s1, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    // Load values from matrix A and matrix B
+    b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s2, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0.s2, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0.s2, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0.s2, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+    acc1.s0 = fma(a1.s2, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1.s2, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1.s2, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1.s2, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+    acc2.s0 = fma(a2.s2, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2.s2, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2.s2, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2.s2, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    acc3.s0 = fma(a3.s2, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3.s2, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3.s2, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3.s2, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    // Load values from matrix A and matrix B
+    b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s3, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0.s3, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0.s3, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0.s3, b0.s3, acc0.s3);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+
+    acc1.s0 = fma(a1.s3, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1.s3, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1.s3, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1.s3, b0.s3, acc1.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+
+    acc2.s0 = fma(a2.s3, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2.s3, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2.s3, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2.s3, b0.s3, acc2.s3);
+
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    acc3.s0 = fma(a3.s3, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3.s3, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3.s3, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3.s3, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += 4 * sizeof(float);
+  }
+
+  for (; i < (int)COLS_A; ++i)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0, b0.s1, acc0.s1);
+    acc0.s2 = fma(a0, b0.s2, acc0.s2);
+    acc0.s3 = fma(a0, b0.s3, acc0.s3);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1.s0 = fma(a1, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1, b0.s1, acc1.s1);
+    acc1.s2 = fma(a1, b0.s2, acc1.s2);
+    acc1.s3 = fma(a1, b0.s3, acc1.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2.s0 = fma(a2, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2, b0.s1, acc2.s1);
+    acc2.s2 = fma(a2, b0.s2, acc2.s2);
+    acc2.s3 = fma(a2, b0.s3, acc2.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3.s0 = fma(a3, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3, b0.s1, acc3.s1);
+    acc3.s2 = fma(a3, b0.s2, acc3.s2);
+    acc3.s3 = fma(a3, b0.s3, acc3.s3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += sizeof(float);
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float));
+
+  LOAD_BLOCK(1, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)4 * sizeof(float)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store the output block
+  vstore4(acc0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  vstore4(acc1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  vstore4(acc2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  vstore4(acc3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+}
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not been reshaped
+ *
+ * @note This OpenCL kernel works with the 32-bit floating point data type (float) and uses the fma
+ * units. This OpenCL kernel is optimized for Bifrost when the number of matrix B columns is less or
+ * equal to 1000.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=2.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha if
+ * alpha!=1.0f.
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f32_bifrost_1000(IMAGE_DECLARATION(src0),
+                                                      IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                      IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                      IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                      uint src1_stride_z,
+#if defined(BETA)
+                                                      uint src2_stride_z,
+#endif // defined(BETA)
+                                                      uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                      ,
+                                                      uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                      ,
+                                                      uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Requires 2 NUM_ELEMS_PROCESSED_PER_THREAD_X, C vect2, A vect4, B (2 vload2) // to fix for
+  // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and Matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for the matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for the matrix B
+  src_addr.s1 += idx * sizeof(float);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  // Initialize accumulators
+  float2 acc0 = 0.0f;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  float2 acc1 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  float2 acc2 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  float2 acc3 = 0.0f;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  // A and B src indices get incremented at the same time.
+  int i = 0;
+  for (; i <= ((int)COLS_A - 8); i += 8)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    float8 a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + zin.s0));
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    float8 a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float2 b0 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b1 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b2 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b3 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b4 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b5 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b6 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    float2 b7 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0.s0, b0.s0, acc0.s0);
+    acc0.s0 = fma(a0.s1, b1.s0, acc0.s0);
+    acc0.s0 = fma(a0.s2, b2.s0, acc0.s0);
+    acc0.s0 = fma(a0.s3, b3.s0, acc0.s0);
+    acc0.s0 = fma(a0.s4, b4.s0, acc0.s0);
+    acc0.s0 = fma(a0.s5, b5.s0, acc0.s0);
+    acc0.s0 = fma(a0.s6, b6.s0, acc0.s0);
+    acc0.s0 = fma(a0.s7, b7.s0, acc0.s0);
+
+    acc0.s1 = fma(a0.s0, b0.s1, acc0.s1);
+    acc0.s1 = fma(a0.s1, b1.s1, acc0.s1);
+    acc0.s1 = fma(a0.s2, b2.s1, acc0.s1);
+    acc0.s1 = fma(a0.s3, b3.s1, acc0.s1);
+    acc0.s1 = fma(a0.s4, b4.s1, acc0.s1);
+    acc0.s1 = fma(a0.s5, b5.s1, acc0.s1);
+    acc0.s1 = fma(a0.s6, b6.s1, acc0.s1);
+    acc0.s1 = fma(a0.s7, b7.s1, acc0.s1);
+
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+    acc1.s0 = fma(a0.s0, b0.s0, acc1.s0);
+    acc1.s0 = fma(a0.s1, b1.s0, acc1.s0);
+    acc1.s0 = fma(a0.s2, b2.s0, acc1.s0);
+    acc1.s0 = fma(a0.s3, b3.s0, acc1.s0);
+    acc1.s0 = fma(a0.s4, b4.s0, acc1.s0);
+    acc1.s0 = fma(a0.s5, b5.s0, acc1.s0);
+    acc1.s0 = fma(a0.s6, b6.s0, acc1.s0);
+    acc1.s0 = fma(a0.s7, b7.s0, acc1.s0);
+
+    acc1.s1 = fma(a0.s0, b0.s1, acc1.s1);
+    acc1.s1 = fma(a0.s1, b1.s1, acc1.s1);
+    acc1.s1 = fma(a0.s2, b2.s1, acc1.s1);
+    acc1.s1 = fma(a0.s3, b3.s1, acc1.s1);
+    acc1.s1 = fma(a0.s4, b4.s1, acc1.s1);
+    acc1.s1 = fma(a0.s5, b5.s1, acc1.s1);
+    acc1.s1 = fma(a0.s6, b6.s1, acc1.s1);
+    acc1.s1 = fma(a0.s7, b7.s1, acc1.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+    acc2.s0 = fma(a0.s0, b0.s0, acc2.s0);
+    acc2.s0 = fma(a0.s1, b1.s0, acc2.s0);
+    acc2.s0 = fma(a0.s2, b2.s0, acc2.s0);
+    acc2.s0 = fma(a0.s3, b3.s0, acc2.s0);
+    acc2.s0 = fma(a0.s4, b4.s0, acc2.s0);
+    acc2.s0 = fma(a0.s5, b5.s0, acc2.s0);
+    acc2.s0 = fma(a0.s6, b6.s0, acc2.s0);
+    acc2.s0 = fma(a0.s7, b7.s0, acc2.s0);
+
+    acc2.s1 = fma(a0.s0, b0.s1, acc2.s1);
+    acc2.s1 = fma(a0.s1, b1.s1, acc2.s1);
+    acc2.s1 = fma(a0.s2, b2.s1, acc2.s1);
+    acc2.s1 = fma(a0.s3, b3.s1, acc2.s1);
+    acc2.s1 = fma(a0.s4, b4.s1, acc2.s1);
+    acc2.s1 = fma(a0.s5, b5.s1, acc2.s1);
+    acc2.s1 = fma(a0.s6, b6.s1, acc2.s1);
+    acc2.s1 = fma(a0.s7, b7.s1, acc2.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+    a0 = vload8(0, (__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+    acc3.s0 = fma(a0.s0, b0.s0, acc3.s0);
+    acc3.s0 = fma(a0.s1, b1.s0, acc3.s0);
+    acc3.s0 = fma(a0.s2, b2.s0, acc3.s0);
+    acc3.s0 = fma(a0.s3, b3.s0, acc3.s0);
+    acc3.s0 = fma(a0.s4, b4.s0, acc3.s0);
+    acc3.s0 = fma(a0.s5, b5.s0, acc3.s0);
+    acc3.s0 = fma(a0.s6, b6.s0, acc3.s0);
+    acc3.s0 = fma(a0.s7, b7.s0, acc3.s0);
+
+    acc3.s1 = fma(a0.s0, b0.s1, acc3.s1);
+    acc3.s1 = fma(a0.s1, b1.s1, acc3.s1);
+    acc3.s1 = fma(a0.s2, b2.s1, acc3.s1);
+    acc3.s1 = fma(a0.s3, b3.s1, acc3.s1);
+    acc3.s1 = fma(a0.s4, b4.s1, acc3.s1);
+    acc3.s1 = fma(a0.s5, b5.s1, acc3.s1);
+    acc3.s1 = fma(a0.s6, b6.s1, acc3.s1);
+    acc3.s1 = fma(a0.s7, b7.s1, acc3.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += sizeof(float) * 8;
+  }
+  // float size increment
+  for (; i < (int)COLS_A; ++i)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    float a1 = *((__global float *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    float a2 = *((__global float *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    float a3 = *((__global float *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float2 b0 = vload2(0, (__global float *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Multiply and accumulate
+    acc0.s0 = fma(a0, b0.s0, acc0.s0);
+    acc0.s1 = fma(a0, b0.s1, acc0.s1);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1.s0 = fma(a1, b0.s0, acc1.s0);
+    acc1.s1 = fma(a1, b0.s1, acc1.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2.s0 = fma(a2, b0.s0, acc2.s0);
+    acc2.s1 = fma(a2, b0.s1, acc2.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3.s0 = fma(a3, b0.s0, acc3.s0);
+    acc3.s1 = fma(a3, b0.s1, acc3.s1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += sizeof(float);
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)2 * sizeof(float));
+
+  LOAD_BLOCK(1, 2, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)2 * sizeof(float)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 2, float, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, float, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store the output block
+  vstore2(acc0, 0, (__global float *)(dst_addr + 0 * dst_stride_y + zout.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  vstore2(acc1, 0, (__global float *)(dst_addr + 1 * dst_stride_y + zout.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  vstore2(acc2, 0, (__global float *)(dst_addr + 2 * dst_stride_y + zout.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  vstore2(acc3, 0, (__global float *)(dst_addr + 3 * dst_stride_y + zout.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+}
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not beed reshaped
+ *
+ * @note This OpenCL kernel works with the 16-bit floating point data type (half) and accumulating
+ * the result in a 32 floating point variable.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f16_bifrost_acc32(IMAGE_DECLARATION(src0),
+                                                       IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                       IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                       IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                       uint src1_stride_z,
+#if defined(BETA)
+                                                       uint src2_stride_z,
+#endif // defined(BETA)
+                                                       uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                       ,
+                                                       uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                       ,
+                                                       uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and Matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for the matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for the matrix B
+  src_addr.s1 += idx * sizeof(half);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  float8 acc0 = 0.0h;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  float8 acc1 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  float8 acc2 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  float8 acc3 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  int i = 0;
+  for (; i <= ((int)COLS_A - 4); i += 4)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, half, a, src0_ptr, src_addr.s0, src0_stride_y,
+               zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+      // Load values from matrix A
+    half4 a0 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half4 a1 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half4 a2 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half4 a3 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float8 b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+    src_addr.s1 += src1_stride_y;
+
+    // Accumulate
+    acc0 = fma(b0, (float8)a0.s0, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1.s0, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2.s0, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3.s0, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (float8)a0.s1, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1.s1, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2.s1, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3.s1, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (float8)a0.s2, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1.s2, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2.s2, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3.s2, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (float8)a0.s3, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1.s3, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2.s3, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3.s3, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += 4 * sizeof(half);
+  }
+
+  for (; i < (int)COLS_A; ++i)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    float8 b0 = convert_float8(vload8(0, (__global half *)(src1_ptr + src_addr.s1)));
+
+    src_addr += (int2)(sizeof(half), src1_stride_y);
+
+    // Accumulate
+    acc0 = fma(b0, (float8)a0, acc0); // b0 * (half8)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (float8)a1, acc1); // b0 * (half8)a1;
+#endif                                // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (float8)a2, acc2); // b0 * (half8)a2;
+#endif                                // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (float8)a3, acc3); // b0 * (half8)a3;
+#endif                                // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, acc, ALPHA);
+#endif // defined(ALPHA)
+
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+  float8 bias_f0 = convert_float8(bias0);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias_f0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+  float8 bias_f0 = convert_float8(bias0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  float8 bias_f1 = convert_float8(bias1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  float8 bias_f2 = convert_float8(bias2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  float8 bias_f3 = convert_float8(bias3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, float, bias_f, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias_f);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+  half8 acc_h0 = convert_half8(acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  half8 acc_h1 = convert_half8(acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  half8 acc_h2 = convert_half8(acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  half8 acc_h3 = convert_half8(acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, acc_h, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store the output block
+  STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, acc_h, dst_addr, dst_stride_y, zout.s);
+}
+
+/** This OpenCL kernel computes the matrix by matrix multiplication between the matrix A (src0) and
+ * matrix B (src1) in case both matrices have not beed reshaped
+ *
+ * @note This OpenCL kernel works with the 16-bit floating point data type (half) and uses the fma
+ * units.
+ * @note The number of elements processed along the x and y directions must be passed at compile
+ * time using -DNUM_ELEMS_PROCESSED_PER_THREAD_X and -DNUM_ELEMS_PROCESSED_PER_THREAD_Y. This kernel
+ * optimally uses -DNUM_ELEMS_PROCESSED_PER_THREAD_X=4.
+ * @note The number of matrix A columns must be passed at compile time using -DCOLS_A.
+ * @note The optional value of scalar alpha is passed at compile time using -DALPHA=alpha
+ * @note In case the matrix B has 3 dimensions and the matrix A more than 3, in order to avoid
+ * out-of-bounds reads, the number of channels of matrix B must be passed at compile time using
+ * MATRIX_B_DEPTH (e.g. -DMATRIX_B_DEPTH=16) This case can happen when GEMM is used to perform the
+ * element-wise multiplication through a batched matrix multiplication (2D Winograd) and we have
+ * multiple inputs (e.g. a = [K, M, 16, Batches], b = [N, K, 16])
+ *
+ * @note If the activation type were passed at compile time through -DACTIVATION_TYPE (e.g.
+ * -DACTIVATION_TYPE=RELU), A, B variables, required by some activation functions, should be passed
+ * at compile time as well using -DA_VAL= and -DB_VAL= respectively. The activation function is
+ * performed after the bias addition
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F16
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src2_ptr                           (Optional) Pointer to the bias matrix. Supported
+ * data type: same as @p lhs_ptr
+ * @param[in]  src2_stride_x                      (Optional) Stride of the bias matrix in X
+ * dimension (in bytes)
+ * @param[in]  src2_step_x                        (Optional) src2_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      (Optional) Stride of the bias matrix in Y
+ * dimension (in bytes)
+ * @param[in]  src2_step_y                        (Optional) src2_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes (Optional) The offset of the first element in the
+ * bias matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ * @param[in]  src0_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src2_stride_z                      (Optional) Stride of the bias matrix in Z
+ * dimension (in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  src_cross_plane_pad                (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemm_mm_floating_point_f16_bifrost(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+#if defined(BETA)
+                                                 IMAGE_DECLARATION(src2),
+#endif // defined(BETA)
+                                                 IMAGE_DECLARATION(dst), uint src0_stride_z,
+                                                 uint src1_stride_z,
+#if defined(BETA)
+                                                 uint src2_stride_z,
+#endif // defined(BETA)
+                                                 uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                                 ,
+                                                 uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                 ,
+                                                 uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+  // Compute starting address for matrix A and Matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+  // Update address for the matrix A
+  src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+  // Update address for the matrix B
+  src_addr.s1 += idx * sizeof(half);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+              (uint4)HEIGHT_GEMM3D;
+  zin = min(DEPTH_GEMM3D - 1, zin);
+
+  // Add offset due to the cross plane paddings
+  zin *= (src_cross_plane_pad * src0_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply src0_stride_z by DEPTH_GEMM3D
+  src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  half8 acc0 = 0.0h;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+  half8 acc1 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+  half8 acc2 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  half8 acc3 = 0.0h;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+  int i = 0;
+  for (; i <= ((int)COLS_A - 4); i += 4)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 4, half, a, src0_ptr, src_addr.s0, src0_stride_y,
+               zin.s);
+#else // defined(REINTERPRET_INPUT_AS_3D)
+      // Load values from matrix A
+    half4 a0 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half4 a1 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half4 a2 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half4 a3 = vload4(0, (__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    half8 b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+
+    // Accumulate
+    acc0 = fma(b0, (half8)a0.s0, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1.s0, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2.s0, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3.s0, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (half8)a0.s1, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1.s1, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2.s1, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3.s1, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (half8)a0.s2, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1.s2, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2.s2, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3.s2, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+    src_addr.s1 += src1_stride_y;
+    acc0 = fma(b0, (half8)a0.s3, acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1.s3, acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2.s3, acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3.s3, acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    src_addr.s0 += 4 * sizeof(half);
+  }
+
+  for (; i < (int)COLS_A; ++i)
+  {
+#if defined(REINTERPRET_INPUT_AS_3D)
+    // Load values from matrix A
+    half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y + zin.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y + zin.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y + zin.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y + zin.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#else  // defined(REINTERPRET_INPUT_AS_3D)
+       // Load values from matrix A
+    half a0 = *((__global half *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    half a1 = *((__global half *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    half a2 = *((__global half *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    half a3 = *((__global half *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+    // Load values from matrix B
+    half8 b0 = vload8(0, (__global half *)(src1_ptr + src_addr.s1));
+
+    src_addr += (int2)(sizeof(half), src1_stride_y);
+
+    // Accumulate
+    acc0 = fma(b0, (half8)a0, acc0); // b0 * (half8)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = fma(b0, (half8)a1, acc1); // b0 * (half8)a1;
+#endif                               // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = fma(b0, (half8)a2, acc2); // b0 * (half8)a2;
+#endif                               // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = fma(b0, (half8)a3, acc3); // b0 * (half8)a3;
+#endif                               // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+  }
+
+  int z = get_global_id(2);
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  // Compute dst address
+  __global uchar *dst_addr = offset(&dst, 0, 0);
+
+  uint4 zout = 0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+  // the z dimension in order to take into account the presence of possible cross plane paddings
+  //
+  //  |                  |
+  //  |      plane0      |
+  //  |                  |
+  //  |__________________|
+  //  |******************|
+  //  |  cross_plane_pad |
+  //  |******************|
+  //  |                  |
+  //  |      plane1      |
+  //  |                  |
+  //  |__________________|
+
+  // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+  // by HEIGHT_GEMM3D
+  zout = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+         (uint4)HEIGHT_GEMM3D;
+  zout = min(DEPTH_GEMM3D - 1, zout);
+
+  // Add offset due to the cross plane paddings
+  zout *= (dst_cross_plane_pad * dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+#else  // defined(REINTERPRET_OUTPUT_AS_3D)
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Multiply by the weight of matrix-matrix product and store the result
+#if defined(ALPHA)
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, half, acc, ALPHA);
+#endif // defined(ALPHA)
+
+  // Add beta*bias
+#if defined(BETA)
+  REPEAT_VAR_INIT_TO_CONST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, uint, zero, 0);
+
+#if defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half));
+
+  LOAD_BLOCK(1, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(1, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias[broadcasted]
+  ADD_BLOCK_BROADCAST(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias0);
+
+#else // defined(BROADCAST_BIAS)
+  __global uchar *src2_addr =
+    src2_ptr + src2_offset_first_element_in_bytes + (get_global_id(0) * (uint)8 * sizeof(half)) +
+    (get_global_id(1) * (uint)NUM_ELEMS_PROCESSED_PER_THREAD_Y * src2_stride_y) +
+    get_global_id(2) * src2_stride_z;
+
+  LOAD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, bias, src2_addr, 0, src2_stride_y, zero);
+
+#ifndef UNIT_BETA
+  SCALE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, half, bias, BETA);
+#endif // UNIT_BIAS
+
+  // acc = acc + bias
+  ADD_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, acc, bias);
+
+#endif // defined(BROADCAST_BIAS)
+#endif // defined(BETA)
+
+#if defined(ACTIVATION_TYPE)
+  ACTIVATION_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, ACTIVATION_TYPE, half, acc, A_VAL, B_VAL);
+#endif // defined(ACTIVATION_TYPE)
+
+  // Store the output block
+  STORE_BLOCK(NUM_ELEMS_PROCESSED_PER_THREAD_Y, 8, half, acc, dst_addr, dst_stride_y, zout.s);
+}
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+
+#endif // defined(COLS_A) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) &&
+       // (NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+
+#if defined(BETA)
+/** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account
+ * that the second matrix might be weighted by a scalar value beta:
+ *
+ * @note The beta's value need to be passed at compile time using -DBETA
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types:
+ * F32
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_ma_f32(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+  // Load values from A x B
+  float4 alpha_ab = vload4(0, (__global float *)dst.ptr);
+
+  // Load values from Matrix C
+  float4 c = vload4(0, (__global float *)src.ptr);
+
+  // Computes alpha * axb + beta * c
+  float4 out = alpha_ab + (float4)BETA * c;
+
+  // Store final result in axb matrix
+  vstore4(out, 0, (__global float *)dst.ptr);
+}
+
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+/** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account
+ * that the second matrix might be weighted by a scalar value beta:
+ *
+ * @note The beta's value need to be passed at compile time using -DBETA
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types:
+ * F16
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_ma_f16(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+  // Load values from A x B
+  half8 alpha_ab = vload8(0, (__global half *)dst.ptr);
+
+  // Load values from Matrix C
+  half8 c = vload8(0, (__global half *)src.ptr);
+
+  // Computes alpha * axb + beta * c
+  half8 out = alpha_ab + (half8)BETA * c;
+
+  // Store final result in axb matrix
+  vstore8(out, 0, (__global half *)dst.ptr);
+}
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+#endif // defined(BETA)
+
+#if defined(WIDTH_VECTOR_A)
+/** This OpenCL kernel computes the vector by matrix multiplication between each row of A (src0) and
+ * matrix B (src1) used for locally connected layer
+ *
+ * @note The width of A need to be passed at compile time using -DWIDTH_VECTOR_A
+ *
+ * @note The input A and matrix B must not be reshaped
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data
+ * types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src1_stride_z                      Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in]  src1_step_z                        src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data
+ * types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination
+ * matrix
+ */
+__kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0), TENSOR3D_DECLARATION(src1),
+                             IMAGE_DECLARATION(dst))
+{
+  int idx = get_global_id(0) * 4;
+  int idy = get_global_id(1);
+
+  // Compute the address for the vector A and matrix B
+  int2 src_addr = ((int2)(src0_offset_first_element_in_bytes + src0_stride_y * idy,
+                          src1_offset_first_element_in_bytes + src1_stride_z * idy));
+  src_addr.s1 += idx * sizeof(float);
+
+  int end_row_vec_a = src_addr.s0 + (WIDTH_VECTOR_A * sizeof(float));
+
+  float4 acc = 0.0f;
+
+  for (; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(float));
+       src_addr += (int2)(2 * sizeof(float), 2 * src1_stride_y))
+  {
+    float2 a0 = vload2(0, (__global float *)(src0_ptr + src_addr.s0));
+    float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+    float4 b1 = vload4(0, (__global float *)(src1_ptr + src_addr.s1 + src1_stride_y));
+
+    acc += b0 * (float4)a0.s0;
+    acc += b1 * (float4)a0.s1;
+  }
+
+  for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(sizeof(float), src1_stride_y))
+  {
+    float a0 = *((__global float *)(src0_ptr + src_addr.s0));
+    float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1));
+
+    acc += b0 * (float4)a0;
+  }
+
+  // Compute destination address
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  vstore4(acc, 0, (__global float *)(offset(&dst, 0, 0)));
+}
+#endif // defined(WIDTH_VECTOR_A)
+
+/** This kernel accumulates each row with the biases vector.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=short.
+ * @note The vector size must be passed at compile time using -DVECTOR_SIZE e.g. -DVECTOR_SIZE=16.
+ *
+ * @param[in, out] accum_ptr                            Pointer to the accumulate tensor. Supported
+ * data type: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in]      accum_stride_x                       Stride of the accmulate tensor in X
+ * dimension (in bytes)
+ * @param[in]      accum_step_x                         accum_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]      accum_stride_y                       Stride of the accumlulate tensor in Y
+ * dimension (in bytes)
+ * @param[in]      accum_step_y                         src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]      accum_offset_first_element_in_bytes  The offset of the first element in the
+ * accumulate tensor
+ * @param[in]      biases_ptr                           Pointer to the biases vector. Same as @p
+ * accum_ptr
+ * @param[in]      biases_stride_x                      Stride of the destination tensor in X
+ * dimension (in bytes)
+ * @param[in]      biases_step_x                        dst_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]      biases_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+#if defined(DATA_TYPE) && defined(VECTOR_SIZE)
+__kernel void gemm_accumulate_biases(IMAGE_DECLARATION(accum), VECTOR_DECLARATION(biases))
+{
+  Image accum = CONVERT_TO_IMAGE_STRUCT(accum);
+  Vector biases = CONVERT_TO_VECTOR_STRUCT(biases);
+
+  // Vector size, e.g. number of vector elements.
+  VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
+  accum_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)accum.ptr);
+  VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
+  biases_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)biases.ptr);
+  accum_value = biases_value + accum_value;
+  // Store result in the accumulate buffer
+  VSTORE(VECTOR_SIZE)
+  (accum_value, 0, (__global DATA_TYPE *)accum.ptr);
+}
+#endif // defined(DATA_TYPE) && defined(VECTOR_SIZE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm_helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm_helpers.h
new file mode 100644
index 000000000..0c75d061f
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemm_helpers.h
@@ -0,0 +1,1235 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "activation_float_helpers.h"
+#include "helpers.h"
+
+/** Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
+ * @name LOAD_ROW_n
+ *
+ * @param[in] N0        The number of rows to load
+ * @param[in] DATA_TYPE The data type of variables
+ * @param[in] BASENAME  The basename of the destination variables for the loaded rows
+ * @param[in] PTR       The base pointer
+ * @param[in] OFFSET    The offset within a row
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The z-axis offset vector
+ * @{
+ */
+#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
+
+#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
+
+#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
+
+#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
+
+#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
+
+#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
+
+#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
+
+#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
+
+#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                        \
+  BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
+
+#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)        \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
+
+#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
+
+#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
+
+#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
+
+#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
+
+#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
+
+#define LOAD_ROW_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
+
+/** @}*/ // end of group LOAD_ROW_n
+
+/** Load Blocks (consecutive rows and columns) with Z offset.
+ * @name LOAD_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16
+ * The data to load is expected to have consecutive names for each row.
+ * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3, and Z=zin, the expected Z offsets are zin0, zin1 and zin2.
+ *
+ * @param[in] M0        The number of consecutive rows
+ * @param[in] N0        The number of consecutive columns
+ * @param[in] DATA_TYPE The data type of the target
+ * @param[in] BASENAME  The basename of the result variables
+ * @param[in] PTR       The base pointer for the data
+ * @param[in] OFFSET    The offset within a row
+ * @param[in] STRIDE_Y  The stride in y-axis direction
+ * @param[in] Z         The z-axis offset vector
+ * @{
+ */
+#define LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
+#define LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
+  LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
+/** @} */ // end of group LOAD_BLOCK
+
+/** Loads the elements from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
+ * @name LOAD_ELEMENT_n
+ *
+ * @param[in] N0        The number of rows to load
+ * @param[in] DATA_TYPE The data type of variables
+ * @param[in] BASENAME  The basename of the destination variables for the loaded rows
+ * @param[in] PTR       The base pointer
+ * @param[in] OFFSET    The offset within a row
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @{
+ */
+#define LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##0 = *((__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y));
+
+#define LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##1 = *((__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y));
+
+#define LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##2 = *((__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y));
+
+#define LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##3 = *((__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y));
+
+#define LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##4 = *((__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y));
+
+#define LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##5 = *((__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y));
+
+#define LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##6 = *((__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y));
+
+#define LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##7 = *((__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y));
+
+#define LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                         \
+  BASENAME##8 = *((__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y));
+
+#define LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)        \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##9 = *((__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y));
+
+#define LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##A = *((__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y));
+
+#define LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##B = *((__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y));
+
+#define LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##C = *((__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y));
+
+#define LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##D = *((__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y));
+
+#define LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##E = *((__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y));
+
+#define LOAD_ELEMENT_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)       \
+  VEC_DATA_TYPE(DATA_TYPE, N0)                                          \
+  BASENAME##F = *((__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y));
+
+/** @}*/ // end of group LOAD_ELEMENT_n
+
+/** Load Scalar as Vector (consecutive elements).
+ * @name LOAD_SCALAR_AS_VECTOR
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16
+ * The data to load is expected to have consecutive names for each row.
+ * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
+ *
+ * @param[in] M0        The number of consecutive rows
+ * @param[in] N0        The number of consecutive columns
+ * @param[in] DATA_TYPE The data type of the target
+ * @param[in] BASENAME  The basename of the result variables
+ * @param[in] PTR       The base pointer for the data
+ * @param[in] OFFSET    The offset within a row
+ * @param[in] STRIDE_Y  The stride in y-axis direction
+ * @{
+ */
+#define LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_ELEMENT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
+#define LOAD_SCALAR_AS_VECTOR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
+  LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
+/** @} */ // end of group LOAD_SCALAR_AS_VECTOR
+
+/** Basic macros to calculate Z offset values from Z0 to Zn-1
+ * @name CALCULATE_Z_OFFSET_n
+ *
+ * @param[in] M0              The number of offset values to calculate
+ * @param[in] DATA_TYPE       The data type of the results
+ * @param[in] Z               The basename of the result variables
+ * @param[in] Y               The work-itme ID of y-axis
+ * @param[in] HEIGHT_GEMM3D   The height of GEMM3D
+ * @param[in] DEPTH_GEMM3D    The depth of GEMM3D
+ * @param[in] CROSS_PLANE_PAD The padding required for plane changes accross the z-dimension
+ * @param[in] STRIDE_Y        The stride value in y-axis direction
+ *
+ * @{
+ */
+#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  Z##0 = (0 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##0 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##0);                                              \
+  Z##0 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##1 = (1 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##1 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##1);                                              \
+  Z##1 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##2 = (2 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##2 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##2);                                              \
+  Z##2 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##3 = (3 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##3 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##3);                                              \
+  Z##3 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##4 = (4 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##4 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##4);                                              \
+  Z##4 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##5 = (5 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##5 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##5);                                              \
+  Z##5 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##6 = (6 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##6 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##6);                                              \
+  Z##6 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+#define CALCULATE_Z_OFFSET_8(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                             STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,       \
+                       STRIDE_Y)                                                                \
+  Z##7 = (7 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D;                       \
+  Z##7 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##7);                                              \
+  Z##7 *= (CROSS_PLANE_PAD * STRIDE_Y);
+
+/** @} */ // end of group CALCULATE_Z_OFFSET_n
+
+/** Calculate Z offset values from Z0 to Zn-1
+ * @name CALCULATE_Z_OFFSET
+ *
+ * The Z offsets are expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected names of Z offsets are zin1, zin2, zin3.
+ * Note that, CROSS_PLANE_PAD (cross plain padding) is required to take into account
+ * the possible cross plane paddings in case of the plance changes across the z-dimension.
+ *
+ * <!--
+ * |                  |
+ * |      plane0      |
+ * |                  |
+ * |__________________|
+ * |******************|
+ * |  cross_plane_pad |
+ * |******************|
+ * |                  |
+ * |      plane1      |
+ * |                  |
+ * |__________________|
+ * -->
+ *
+ * @param[in] M0              The number of offset values to calculate
+ * @param[in] DATA_TYPE       The data type of the results
+ * @param[in] Z               The basename of the result variables
+ * @param[in] Y               The work-itme ID of y-axis
+ * @param[in] HEIGHT_GEMM3D   The height of GEMM3D
+ * @param[in] DEPTH_GEMM3D    The depth of GEMM3D
+ * @param[in] CROSS_PLANE_PAD The padding required for plane changes accross the z-dimension
+ * @param[in] STRIDE_Y        The stride value in y-axis direction
+ * @{
+ */
+#define CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                               STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,      \
+                          STRIDE_Y)
+#define CALCULATE_Z_OFFSET(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, \
+                           STRIDE_Y)                                                          \
+  CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD,   \
+                         STRIDE_Y)
+/** @} */ // end of group CALCULATE_Z_OFFSET
+
+/** Store the 0 to (n-1)th rows of the given variables
+ * @name STORE_ROW_n
+ *
+ * @param[in] N0        The size of the vectors
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] PTR       The base pointer
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The offset in z-axis direction
+ * @{
+ */
+#define STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  VSTORE(N0)                                                   \
+  (BASENAME##0, 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
+
+#define STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##1, 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
+
+#define STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##2, 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
+
+#define STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##3, 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
+
+#define STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##4, 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
+
+#define STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##5, 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
+
+#define STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##6, 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
+
+#define STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##7, 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
+
+#define STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                   \
+  (BASENAME##8, 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
+
+#define STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)        \
+  VSTORE(N0)                                                    \
+  (BASENAME##9, 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
+
+#define STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##A, 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
+
+#define STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##B, 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
+
+#define STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##C, 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
+
+#define STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##D, 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
+
+#define STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##E, 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
+
+#define STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                    \
+  (BASENAME##F, 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
+/** @} */ // end of groupd STORE_ROW_n
+
+/** Convert and store the 0th to (n-1)th rows of the given variables
+ * @name CONVERT_STORE_ROW_n
+ *
+ * @param[in] N0        The size of the vectors
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] PTR       The base pointer
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The offset in z-axis direction
+ * @{
+ */
+#define CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##0), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
+
+#define CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##1), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
+
+#define CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##2), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
+
+#define CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##3), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
+
+#define CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##4), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
+
+#define CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##5), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
+
+#define CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##6), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
+
+#define CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##7), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
+
+#define CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                           \
+  (CONVERT_SAT((BASENAME##8), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,        \
+   (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
+
+#define CONVERT_STORE_ROW_10(N0, DATA, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)   \
+  VSTORE(N0)                                                       \
+  (CONVERT_SAT((BASENAME##9), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,    \
+   (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
+
+#define CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##A), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
+
+#define CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##B), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
+
+#define CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##C), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
+
+#define CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##D), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
+
+#define CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##E), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
+
+#define CONVERT_STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)       \
+  VSTORE(N0)                                                            \
+  (CONVERT_SAT((BASENAME##F), VEC_DATA_TYPE(DATA_TYPE, N0)), 0,         \
+   (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
+
+/** @} */ // end of groupd CONVERT_STORE_ROW_n
+
+/** Store a block of the given size M0xN0
+ * @name STORE_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=2,3,4,8,16.
+ * The data to store is expected to have consecutive names for each row.
+ * E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.
+ *
+ * @param[in] M0        The number of rows to store
+ * @param[in] N0        The size of each vector
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] PTR       The base pointer
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The offset in z-axis direction
+ * @{
+ */
+#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+/** @} */ // end of group STORE_BLOCK
+
+/** Convert and store a block of the given size M0xN0
+ * @name CONVERT_STORE_BLOCK
+ *
+ * Supported cases are M0=1,2,3,...,16 and N0=2,3,4,8,16.
+ * The data to store is expected to have consecutive names for each row.
+ * E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2.
+ * The Z offset is expected to have consecutive names.
+ * E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.
+ *
+ * @param[in] M0        The number of rows to store
+ * @param[in] N0        The size of each vector
+ * @param[in] DATA_TYPE The data type of the vectors
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] PTR       The base pointer
+ * @param[in] STRIDE_Y  The stride value in y-axis direction
+ * @param[in] Z         The offset in z-axis direction
+ * @{
+ */
+#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
+  CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
+/** @} */ // end of group CONVERT_STORE_BLOCK
+
+/** Scale the rows in the given variables (BASENAME0 to BASENAMEn-1)
+ * @name SCALE_ROW_n
+ *
+ * @param[in] DATA_TYPE The data type of the variables
+ * @param[in] BASENAME  The basename of the variables
+ * @param[in] SCALE     The scale factor
+ * @{
+ */
+#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) BASENAME##0 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##1 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##2 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##3 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##4 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##5 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##6 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##7 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##8 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE)        \
+  BASENAME##9 *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##A *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##B *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##C *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##D *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##E *= (DATA_TYPE)SCALE;
+
+#define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE) \
+  SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE)       \
+  BASENAME##F *= (DATA_TYPE)SCALE;
+/** @} */ // end of group SCALE_ROW_n
+
+/** Scale elements stored in a block (BASENAME)
+ * @name SCALE_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16
+ *
+ * @param[in] N         The number of rows in the block
+ * @param[in] DATA_TYPE The data type of the block
+ * @param[in] BASENAME  The basename of the block
+ * @param[in] SCALE     The scale factor
+ * @{
+ */
+#define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE) SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)
+#define SCALE_BLOCK(N, DATA_TYPE, BASENAME, SCALE) SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)
+/** @} */ // end of group SCALE_BLOCK
+
+/** Create a new vector containing the values at the given index for a set of given vectors
+ * @name COLUMN_VECTORn
+ *
+ * @param[in] IDX_COL  The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] X        The basename of the source vectors
+ * @param[in] TYPE     The data type of the destination vectors
+ * @{
+ */
+#define COLUMN_VECTOR1(IDX_COL, BASENAME, X, TYPE) \
+  TYPE BASENAME##IDX_COL = (TYPE)((X##0).s##IDX_COL);
+#define COLUMN_VECTOR2(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 2)                           \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0).s##IDX_COL, (X##1).s##IDX_COL);
+#define COLUMN_VECTOR3(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 3)                           \
+  BASENAME##IDX_COL =                              \
+    (VEC_DATA_TYPE(TYPE, 3))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL);
+#define COLUMN_VECTOR4(IDX_COL, BASENAME, X, TYPE)                                   \
+  VEC_DATA_TYPE(TYPE, 4)                                                             \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0).s##IDX_COL, (X##1).s##IDX_COL, \
+                                               (X##2).s##IDX_COL, (X##3).s##IDX_COL);
+#define COLUMN_VECTOR8(IDX_COL, BASENAME, X, TYPE)                                                 \
+  VEC_DATA_TYPE(TYPE, 8)                                                                           \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))(                                                    \
+    (X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
+    (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL);
+#define COLUMN_VECTOR16(IDX_COL, BASENAME, X, TYPE)                                                \
+  VEC_DATA_TYPE(TYPE, 16)                                                                          \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))(                                                   \
+    (X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, \
+    (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, \
+    (X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, \
+    (X##F).s##IDX_COL);
+/** @} */ // end of group COLUMN_VECTORn
+
+/** Create a new vector containing the values at the given index. Utility macros for transposing a
+ * colum-vector
+ * @name COLUMN_VECTOR_SCALARn
+ *
+ * @param[in] IDX_COL  The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] X        The basename of the source vectors
+ * @param[in] TYPE     The data type of the destination vectors
+ * @{
+ */
+#define COLUMN_VECTOR_SCALAR1(IDX_COL, BASENAME, X, TYPE) TYPE BASENAME##IDX_COL = (TYPE)((X##0));
+#define COLUMN_VECTOR_SCALAR2(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 2)                                  \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0), (X##1));
+#define COLUMN_VECTOR_SCALAR3(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 3)                                  \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0), (X##1), (X##2));
+#define COLUMN_VECTOR_SCALAR4(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 4)                                  \
+  BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0), (X##1), (X##2), (X##3));
+#define COLUMN_VECTOR_SCALAR8(IDX_COL, BASENAME, X, TYPE) \
+  VEC_DATA_TYPE(TYPE, 8)                                  \
+  BASENAME##IDX_COL =                                     \
+    (VEC_DATA_TYPE(TYPE, 8))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7));
+#define COLUMN_VECTOR_SCALAR16(IDX_COL, BASENAME, X, TYPE)                                    \
+  VEC_DATA_TYPE(TYPE, 16)                                                                     \
+  BASENAME##IDX_COL =                                                                         \
+    (VEC_DATA_TYPE(TYPE, 16))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7), \
+                              (X##8), (X##9), (X##A), (X##B), (X##C), (X##D), (X##E), (X##F));
+/** @} */ // end of group COLUMN_VECTORn
+
+/** Create transposed vectors of the given vectors
+ * @name TRANSPOSE_K0Xn
+ *
+ * @param[in] K0       The size of the source vectors
+ * @param[in] BASENAME The basename of transposed vectors
+ * @param[in] B        The basename of source vectors for transposition
+ * @param[in] TYPE     The data type of the transposed vectors
+ * @{
+ */
+#define TRANSPOSE_K0X1(K0, BASENAME, B, TYPE) COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X2(K0, BASENAME, B, TYPE) \
+  COLUMN_VECTOR(K0, 0, BASENAME, B, TYPE);    \
+  COLUMN_VECTOR(K0, 1, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X3(K0, BASENAME, B, TYPE) \
+  TRANSPOSE_K0X2(K0, BASENAME, B, TYPE);      \
+  COLUMN_VECTOR(K0, 2, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X4(K0, BASENAME, B, TYPE) \
+  TRANSPOSE_K0X3(K0, BASENAME, B, TYPE);      \
+  COLUMN_VECTOR(K0, 3, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X8(K0, BASENAME, B, TYPE) \
+  TRANSPOSE_K0X4(K0, BASENAME, B, TYPE);      \
+  COLUMN_VECTOR(K0, 4, BASENAME, B, TYPE);    \
+  COLUMN_VECTOR(K0, 5, BASENAME, B, TYPE);    \
+  COLUMN_VECTOR(K0, 6, BASENAME, B, TYPE);    \
+  COLUMN_VECTOR(K0, 7, BASENAME, B, TYPE);
+#define TRANSPOSE_K0X16(K0, BASENAME, B, TYPE) \
+  TRANSPOSE_K0X8(K0, BASENAME, B, TYPE);       \
+  COLUMN_VECTOR(K0, 8, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, 9, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, A, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, B, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, C, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, D, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, E, BASENAME, B, TYPE);     \
+  COLUMN_VECTOR(K0, F, BASENAME, B, TYPE);
+
+/** @} */ // end of group TRANSPOSE_K0Xn
+
+/** Create column vectors to contain the values at the given index for a set of given vectors
+ *
+ * @param[in] K0       The number of source vectors
+ * @param[in] IDX_COL  The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] B        The basename of the source vectors
+ * @param[in] TYPE     The data type of the destination vectors
+ */
+#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, B, TYPE) \
+  CONCAT(COLUMN_VECTOR, K0)                           \
+  (IDX_COL, BASENAME, B, TYPE);
+
+/** Create column vectors to contain the values at the given index. Utility macro for transposing a
+ * column-vector
+ *
+ * @param[in] K0       The number of source vectors
+ * @param[in] IDX_COL  The index value
+ * @param[in] BASENAME The basename of the destination vectors
+ * @param[in] B        The basename of the source vectors
+ * @param[in] TYPE     The data type of the destination vectors
+ */
+#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, B, TYPE) \
+  CONCAT(COLUMN_VECTOR_SCALAR, K0)                           \
+  (IDX_COL, BASENAME, B, TYPE);
+
+/** Create transposed vectors form the given source vectors
+ *
+ * @param[in] K0       The size of source vectors
+ * @param[in] N0       The number of source vectors
+ * @param[in] BASENAME The basename of transposed vectors
+ * @param[in] B        The basename of source vectors for transposition
+ * @param[in] TYPE     The data type of the transposed vectors
+ *
+ */
+#define TRANSPOSE_K0XN0(K0, N0, BASENAME, B, TYPE) \
+  CONCAT(TRANSPOSE_K0X, N0)                        \
+  (K0, BASENAME, B, TYPE);
+
+/** Add the variables (BIAS0 to BIASn-1) to the others (BASENAME0 to BASENAMEn-1)
+ * @name ADD_ROW_n
+ *
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS     The basename of the added variables
+ * @{
+ */
+#define ADD_ROW_1(BASENAME, BIAS) BASENAME##0 += BIAS##0;
+
+#define ADD_ROW_2(BASENAME, BIAS) \
+  ADD_ROW_1(BASENAME, BIAS)       \
+  BASENAME##1 += BIAS##1;
+
+#define ADD_ROW_3(BASENAME, BIAS) \
+  ADD_ROW_2(BASENAME, BIAS)       \
+  BASENAME##2 += BIAS##2;
+
+#define ADD_ROW_4(BASENAME, BIAS) \
+  ADD_ROW_3(BASENAME, BIAS)       \
+  BASENAME##3 += BIAS##3;
+
+#define ADD_ROW_5(BASENAME, BIAS) \
+  ADD_ROW_4(BASENAME, BIAS)       \
+  BASENAME##4 += BIAS##4;
+
+#define ADD_ROW_6(BASENAME, BIAS) \
+  ADD_ROW_5(BASENAME, BIAS)       \
+  BASENAME##5 += BIAS##5;
+
+#define ADD_ROW_7(BASENAME, BIAS) \
+  ADD_ROW_6(BASENAME, BIAS)       \
+  BASENAME##6 += BIAS##6;
+
+#define ADD_ROW_8(BASENAME, BIAS) \
+  ADD_ROW_7(BASENAME, BIAS)       \
+  BASENAME##7 += BIAS##7;
+
+#define ADD_ROW_9(BASENAME, BIAS) \
+  ADD_ROW_8(BASENAME, BIAS)       \
+  BASENAME##8 += BIAS##8;
+
+#define ADD_ROW_10(BASENAME, BIAS) \
+  ADD_ROW_9(BASENAME, BIAS)        \
+  BASENAME##9 += BIAS##9;
+
+#define ADD_ROW_11(BASENAME, BIAS) \
+  ADD_ROW_10(BASENAME, BIAS)       \
+  BASENAME##A += BIAS##A;
+
+#define ADD_ROW_12(BASENAME, BIAS) \
+  ADD_ROW_11(BASENAME, BIAS)       \
+  BASENAME##B += BIAS##B;
+
+#define ADD_ROW_13(BASENAME, BIAS) \
+  ADD_ROW_12(BASENAME, BIAS)       \
+  BASENAME##C += BIAS##C;
+
+#define ADD_ROW_14(BASENAME, BIAS) \
+  ADD_ROW_13(BASENAME, BIAS)       \
+  BASENAME##D += BIAS##D;
+
+#define ADD_ROW_15(BASENAME, BIAS) \
+  ADD_ROW_14(BASENAME, BIAS)       \
+  BASENAME##E += BIAS##E;
+
+#define ADD_ROW_16(BASENAME, BIAS) \
+  ADD_ROW_15(BASENAME, BIAS)       \
+  BASENAME##F += BIAS##F;
+
+/** @} */ // end of group ADD_ROW_n
+
+/** Add the block (BIAS) to another block (BASENAME)
+ * @name ADD_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16
+ *
+ * @param[in] N        The number of vectors in the block
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS     The basename of the added variables
+ * @{
+ */
+#define ADD_BLOCK_STR(N, BASENAME, BIAS) ADD_ROW_##N(BASENAME, BIAS)
+#define ADD_BLOCK(N, BASENAME, BIAS) ADD_BLOCK_STR(N, BASENAME, BIAS)
+/** @} */ // end of group ADD_BLOCK
+
+/** Broadcast (add single value) to the each element of the destination variables
+ * @name ADD_ROW_BROADCAST_n
+ *
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS     The variable containing the value to add
+ * @{
+ */
+#define ADD_ROW_BROADCAST_1(BASENAME, BIAS) BASENAME##0 += BIAS;
+
+#define ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_1(BASENAME, BIAS)       \
+  BASENAME##1 += BIAS;
+
+#define ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_2(BASENAME, BIAS)       \
+  BASENAME##2 += BIAS;
+
+#define ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_3(BASENAME, BIAS)       \
+  BASENAME##3 += BIAS;
+
+#define ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_4(BASENAME, BIAS)       \
+  BASENAME##4 += BIAS;
+
+#define ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_5(BASENAME, BIAS)       \
+  BASENAME##5 += BIAS;
+
+#define ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_6(BASENAME, BIAS)       \
+  BASENAME##6 += BIAS;
+
+#define ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_7(BASENAME, BIAS)       \
+  BASENAME##7 += BIAS;
+
+#define ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_8(BASENAME, BIAS)       \
+  BASENAME##8 += BIAS;
+
+#define ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_9(BASENAME, BIAS)        \
+  BASENAME##9 += BIAS;
+
+#define ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_10(BASENAME, BIAS)       \
+  BASENAME##A += BIAS;
+
+#define ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_11(BASENAME, BIAS)       \
+  BASENAME##B += BIAS;
+
+#define ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_12(BASENAME, BIAS)       \
+  BASENAME##C += BIAS;
+
+#define ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_13(BASENAME, BIAS)       \
+  BASENAME##D += BIAS;
+
+#define ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_14(BASENAME, BIAS)       \
+  BASENAME##E += BIAS;
+
+#define ADD_ROW_BROADCAST_16(BASENAME, BIAS) \
+  ADD_ROW_BROADCAST_15(BASENAME, BIAS)       \
+  BASENAME##F += BIAS;
+
+/** Broadcast (add a value) to the each element of the destination block (BASENAME)
+ * @name ADD_BLOCK_BROADCAST
+ *
+ * Supported cases are N=1,2,3,...,16.
+ *
+ * @param[in] N        The number of vectors in the block
+ * @param[in] BASENAME The basename of the destination variables
+ * @param[in] BIAS     The variable containing the value to add
+ * @{
+ */
+#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) ADD_ROW_BROADCAST_##N(BASENAME, BIAS)
+#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS) ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)
+/** @} */ // end of group ADD_BLOCK_BROADCAST
+
+/** Apply activation to the given variables
+ * @name ACTIVATION_ROW_n
+ *
+ * @param[in] ACTIVATION_TYPE The type of the activation
+ * @param[in] DATA_TYPE       The data type of the vectors
+ * @param[in] BASENAME        The basename of the variables
+ * @param[in] A_VAL           Additional value required by the activation
+ * @param[in] B_VAL           Additional value required by the activation
+ * @{
+ */
+#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##0, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##1, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##2, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##3, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##4, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##5, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##6, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##7, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##8, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)        \
+  BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##9, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##A, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##B, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##C, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##D, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##E, A_VAL, B_VAL);
+
+#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)       \
+  BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##F, A_VAL, B_VAL);
+/** @} */ // end of group ACTIVATION_ROW_n
+
+/** Apply activation to a block (BASENAME)
+ * @name ACTIVATION_BLOCK
+ *
+ * Supported cases are N=1,2,3,...,16.
+ *
+ * @param[in] N               The number of vectors in the block
+ * @param[in] ACTIVATION_TYPE The type of the activation
+ * @param[in] DATA_TYPE       The data type of the vectors
+ * @param[in] BASENAME        The basename of the variables
+ * @param[in] A_VAL           Additional value required by the activation
+ * @param[in] B_VAL           Additional value required by the activation
+ * @{
+ */
+#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
+#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
+  ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
+/** @} */ // end of group ACTIVATION_BLOCK
+
+/** Apply convert_<data_type> to the given variables
+ * @name CONVERT_ROW_n
+ *
+ * @param[in] N            The size of the vectors
+ * @param[in] DATA_TYPE    The data type of the vectors
+ * @param[in] BASENAME_SRC The basename of the source variables
+ * @param[in] BASENAME_DST The basename of the destination variables
+ */
+#define CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##0 = CONVERT(BASENAME_SRC##0, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##1 = CONVERT(BASENAME_SRC##1, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##2 = CONVERT(BASENAME_SRC##2, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##3 = CONVERT(BASENAME_SRC##3, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##4 = CONVERT(BASENAME_SRC##4, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##5 = CONVERT(BASENAME_SRC##5, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##6 = CONVERT(BASENAME_SRC##6, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##7 = CONVERT(BASENAME_SRC##7, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                   \
+  BASENAME_DST##8 = CONVERT(BASENAME_SRC##8, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)        \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##9 = CONVERT(BASENAME_SRC##9, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##A = CONVERT(BASENAME_SRC##A, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##B = CONVERT(BASENAME_SRC##B, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##C = CONVERT(BASENAME_SRC##C, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##D = CONVERT(BASENAME_SRC##D, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##E = CONVERT(BASENAME_SRC##E, VEC_DATA_TYPE(DATA_TYPE, N));
+
+#define CONVERT_ROW_16(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)       \
+  VEC_DATA_TYPE(DATA_TYPE, N)                                    \
+  BASENAME_DST##F = CONVERT(BASENAME_SRC##F, VEC_DATA_TYPE(DATA_TYPE, N));
+/** @} */ // end of group CONVERT_ROW_n
+
+/** Apply convert_<data_type> to a block (BASENAME_SRC) and save to another block (BASENAME_DST)
+ * @name CONVERT_BLOCK
+ *
+ * Supported cases N=1,2,3,...,16.
+ *
+ * @param[in] M            The number of vectors to convert
+ * @param[in] N            The size of the vectors
+ * @param[in] DATA_TYPE    The data type of the vectors
+ * @param[in] BASENAME_SRC The basename of the source variables
+ * @param[in] BASENAME_DST The basename of the destination variables
+ */
+#define CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_ROW_##M(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
+#define CONVERT_BLOCK(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
+  CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
+/** @} */ // end of group CONVERT_BLOCK
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp.cl
new file mode 100644
index 000000000..2d9acc753
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp.cl
@@ -0,0 +1,2733 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "gemm_helpers.h"
+#include "helpers_asymm.h"
+#include "repeat.h"
+
+#if defined(DATA_TYPE) && defined(ACC_DATA_TYPE)
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
+  defined(cl_arm_integer_dot_product_accumulate_int8)
+#define ARM_DOT(x, y, val) val = arm_dot_acc((x), (y), (val));
+#else // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
+      // defined(cl_arm_integer_dot_product_accumulate_int8)
+#define ARM_DOT(x, y, val) val += arm_dot((x), (y));
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
+       // defined(cl_arm_integer_dot_product_accumulate_int8)
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform the dot product instruction between two vectors of size N [1,16].
+ * These macros use the dot8 instruction */
+#define ARM_DOT1(a, b, c)                                                         \
+  ({                                                                              \
+    ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (VEC_DATA_TYPE(DATA_TYPE, 3))0),     \
+            (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (VEC_DATA_TYPE(DATA_TYPE, 3))0), c); \
+  })
+#define ARM_DOT2(a, b, c)                                                         \
+  ({                                                                              \
+    ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (VEC_DATA_TYPE(DATA_TYPE, 2))0),     \
+            (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (VEC_DATA_TYPE(DATA_TYPE, 2))0), c); \
+  })
+#define ARM_DOT3(a, b, c)                                       \
+  ({                                                            \
+    ARM_DOT((VEC_DATA_TYPE(DATA_TYPE, 4))(a, (DATA_TYPE)0),     \
+            (VEC_DATA_TYPE(DATA_TYPE, 4))(b, (DATA_TYPE)0), c); \
+  })
+#define ARM_DOT4(a, b, c) ({ ARM_DOT(a, b, c); })
+#define ARM_DOT8(a, b, c)        \
+  ({                             \
+    ARM_DOT4((a.lo), (b.lo), c); \
+    ARM_DOT4((a.hi), (b.hi), c); \
+  })
+#define ARM_DOT16(a, b, c)       \
+  ({                             \
+    ARM_DOT8((a.lo), (b.lo), c); \
+    ARM_DOT8((a.hi), (b.hi), c); \
+  })
+
+#else // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform the dot product instruction between two vectors of size K0 [1,16]
+ * without using the dot8 instruction. */
+#define ARM_DOT1(a, b, c) ({ c += (ACC_DATA_TYPE)a * b; })
+#define ARM_DOT2(a, b, c)            \
+  ({                                 \
+    c += (ACC_DATA_TYPE)a.s0 * b.s0; \
+    c += (ACC_DATA_TYPE)a.s1 * b.s1; \
+  })
+#define ARM_DOT3(a, b, c)            \
+  ({                                 \
+    ARM_DOT2(a, b, c);               \
+    c += (ACC_DATA_TYPE)a.s2 * b.s2; \
+  })
+#define ARM_DOT4(a, b, c)            \
+  ({                                 \
+    ARM_DOT3(a, b, c);               \
+    c += (ACC_DATA_TYPE)a.s3 * b.s3; \
+  })
+#define ARM_DOT8(a, b, c)        \
+  ({                             \
+    ARM_DOT4((a.lo), (b.lo), c); \
+    ARM_DOT4((a.hi), (b.hi), c); \
+  })
+#define ARM_DOT16(a, b, c)       \
+  ({                             \
+    ARM_DOT8((a.lo), (b.lo), c); \
+    ARM_DOT8((a.hi), (b.hi), c); \
+  })
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+
+/** Specialized macros to perform a broadcast dot product operation between one vector "a" and N0
+ * vectors "b" of size K0 [1,16] */
+#define ARM_DOT_K0X1(k0, a, b, c) ({ ARM_DOT_K0(k0, (a), (b##0), (c)); })
+#define ARM_DOT_K0X2(k0, a, b, c)        \
+  ({                                     \
+    ARM_DOT_K0(k0, (a), (b##0), (c.s0)); \
+    ARM_DOT_K0(k0, (a), (b##1), (c.s1)); \
+  })
+#define ARM_DOT_K0X3(k0, a, b, c)        \
+  ({                                     \
+    ARM_DOT_K0X2(k0, a, b, c);           \
+    ARM_DOT_K0(k0, (a), (b##2), (c.s2)); \
+  })
+#define ARM_DOT_K0X4(k0, a, b, c)        \
+  ({                                     \
+    ARM_DOT_K0X3(k0, a, b, c);           \
+    ARM_DOT_K0(k0, (a), (b##3), (c.s3)); \
+  })
+#define ARM_DOT_K0X8(k0, a, b, c)        \
+  ({                                     \
+    ARM_DOT_K0X4(k0, a, b, c);           \
+    ARM_DOT_K0(k0, (a), (b##4), (c.s4)); \
+    ARM_DOT_K0(k0, (a), (b##5), (c.s5)); \
+    ARM_DOT_K0(k0, (a), (b##6), (c.s6)); \
+    ARM_DOT_K0(k0, (a), (b##7), (c.s7)); \
+  })
+#define ARM_DOT_K0X16(k0, a, b, c)       \
+  ({                                     \
+    ARM_DOT_K0X8(k0, a, b, c);           \
+    ARM_DOT_K0(k0, (a), (b##8), (c.s8)); \
+    ARM_DOT_K0(k0, (a), (b##9), (c.s9)); \
+    ARM_DOT_K0(k0, (a), (b##A), (c.sA)); \
+    ARM_DOT_K0(k0, (a), (b##B), (c.sB)); \
+    ARM_DOT_K0(k0, (a), (b##C), (c.sC)); \
+    ARM_DOT_K0(k0, (a), (b##D), (c.sD)); \
+    ARM_DOT_K0(k0, (a), (b##E), (c.sE)); \
+    ARM_DOT_K0(k0, (a), (b##F), (c.sF)); \
+  })
+
+/** Specialized macros to perform a partial matrix multiplication with dimensions M0,N0,K0 */
+#define ARM_MM_K0XN0X1(n0, k0, a, b, c) ({ ARM_DOT_K0XN0(n0, k0, (a##0), b, (c##0)); })
+#define ARM_MM_K0XN0X2(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X1(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##1), b, (c##1)); \
+  })
+#define ARM_MM_K0XN0X3(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X2(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##2), b, (c##2)); \
+  })
+#define ARM_MM_K0XN0X4(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X3(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##3), b, (c##3)); \
+  })
+#define ARM_MM_K0XN0X5(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X4(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##4), b, (c##4)); \
+  })
+#define ARM_MM_K0XN0X6(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X5(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##5), b, (c##5)); \
+  })
+#define ARM_MM_K0XN0X7(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X6(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##6), b, (c##6)); \
+  })
+#define ARM_MM_K0XN0X8(n0, k0, a, b, c)       \
+  ({                                          \
+    ARM_MM_K0XN0X7(n0, k0, a, b, c);          \
+    ARM_DOT_K0XN0(n0, k0, (a##7), b, (c##7)); \
+  })
+
+#define ARM_DOT_K0(k0, a, b, c) \
+  ({                            \
+    CONCAT(ARM_DOT, k0)         \
+    ((a), (b), (c));            \
+  })
+
+#define ARM_DOT_K0XN0(n0, k0, a, b, c) \
+  ({                                   \
+    CONCAT(ARM_DOT_K0X, n0)            \
+    (k0, (a), b, (c));                 \
+  })
+
+#define ARM_MM_K0XN0XM0(m0, n0, k0, a, b, c) \
+  ({                                         \
+    CONCAT(ARM_MM_K0XN0X, m0)                \
+    (n0, k0, a, b, c);                       \
+  })
+
+/** Specialized macros to perform a broadcast dot product operation between one vector "a" and N0
+ * vectors "b" of size K0 [1,16] */
+#define ARM_MUL_N0X1(VECTOR_ACC_TYPE, a, b, c) ({ c += CONVERT(b##0, VECTOR_ACC_TYPE) * a; })
+#define ARM_MUL_N0X2(VECTOR_ACC_TYPE, a, b, c)    \
+  ({                                              \
+    c += CONVERT(b##0, VECTOR_ACC_TYPE) * a.s##0; \
+    c += CONVERT(b##1, VECTOR_ACC_TYPE) * a.s##1; \
+  })
+#define ARM_MUL_N0X3(VECTOR_ACC_TYPE, a, b, c)    \
+  ({                                              \
+    ARM_MUL_N0X2(VECTOR_ACC_TYPE, a, b, c);       \
+    c += CONVERT(b##2, VECTOR_ACC_TYPE) * a.s##2; \
+  })
+#define ARM_MUL_N0X4(VECTOR_ACC_TYPE, a, b, c)    \
+  ({                                              \
+    ARM_MUL_N0X3(VECTOR_ACC_TYPE, a, b, c);       \
+    c += CONVERT(b##3, VECTOR_ACC_TYPE) * a.s##3; \
+  })
+#define ARM_MUL_N0X8(VECTOR_ACC_TYPE, a, b, c)    \
+  ({                                              \
+    ARM_MUL_N0X4(VECTOR_ACC_TYPE, a, b, c);       \
+    c += CONVERT(b##4, VECTOR_ACC_TYPE) * a.s##4; \
+    c += CONVERT(b##5, VECTOR_ACC_TYPE) * a.s##5; \
+    c += CONVERT(b##6, VECTOR_ACC_TYPE) * a.s##6; \
+    c += CONVERT(b##7, VECTOR_ACC_TYPE) * a.s##7; \
+  })
+#define ARM_MUL_N0X16(VECTOR_ACC_TYPE, a, b, c)   \
+  ({                                              \
+    ARM_MUL_N0X8(VECTOR_ACC_TYPE, a, b, c);       \
+    c += CONVERT(b##8, VECTOR_ACC_TYPE) * a.s##8; \
+    c += CONVERT(b##9, VECTOR_ACC_TYPE) * a.s##9; \
+    c += CONVERT(b##A, VECTOR_ACC_TYPE) * a.s##A; \
+    c += CONVERT(b##B, VECTOR_ACC_TYPE) * a.s##B; \
+    c += CONVERT(b##C, VECTOR_ACC_TYPE) * a.s##C; \
+    c += CONVERT(b##D, VECTOR_ACC_TYPE) * a.s##D; \
+    c += CONVERT(b##E, VECTOR_ACC_TYPE) * a.s##E; \
+    c += CONVERT(b##F, VECTOR_ACC_TYPE) * a.s##F; \
+  })
+/** Specialized macros to perform a a partial matrix multiplication with dimensions M0,N0,K0 */
+#define ARM_MM_NATIVE_N0XK0X1(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({ ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##0), b, (c##0)); })
+#define ARM_MM_NATIVE_N0XK0X2(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X1(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##1), b, (c##1));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X3(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X2(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##2), b, (c##2));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X4(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X3(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##3), b, (c##3));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X5(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X4(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##4), b, (c##4));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X6(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X5(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##5), b, (c##5));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X7(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X6(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##6), b, (c##6));  \
+  })
+#define ARM_MM_NATIVE_N0XK0X8(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                        \
+    ARM_MM_NATIVE_N0XK0X7(VECTOR_ACC_TYPE, k0, a, b, c);    \
+    ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, (a##7), b, (c##7));  \
+  })
+#define ARM_MUL_N0XK0(VECTOR_ACC_TYPE, k0, a, b, c) \
+  ({                                                \
+    CONCAT(ARM_MUL_N0X, k0)                         \
+    (VECTOR_ACC_TYPE, (a), b, (c));                 \
+  })
+#define ARM_MM_NATIVE_N0XK0XM0(VECTOR_ACC_TYPE, m0, k0, a, b, c) \
+  ({                                                             \
+    CONCAT(ARM_MM_NATIVE_N0XK0X, m0)                             \
+    (VECTOR_ACC_TYPE, k0, a, b, c);                              \
+  })
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(M) && \
+  defined(N)
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices with
+ * QASYMM/QASYMM_SIGNED data type. The LHS matrix must be reshaped with @ref
+ * CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be NOT transposed The RHS matrix must be reshaped
+ * with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0 must be transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note If the first two dimensions of NDRange have been dispatched with "dummy_work_items"
+ * support, the option -DDUMMY_WORK_ITEMS must be passed at compile time.
+ * @note The GEMM's dimensions M and N must be passed at compile time using -DM and -DN (i.e. -DM=52
+ * and -DN=90).
+ * @note The block's dimensions used for reshaping the LHS matrix and the RHS matrix (M0, N0 and K0)
+ * must be passed at compile time using -DM0, -DN0 and -DK0 (i.e. -DM0=4, -DN0=8, -DK0=4).
+ * @note The number of M0xK0 vertical blocks stored on the same output row of the reshaped LHS
+ * matrix must be passed at compile time using -DV0 (i.e. -DV0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the M0xK0 blocks in the reshaped LHS matrix have been interleaved, the option
+ * -DLHS_INTERLEAVE must passed at compile time.
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - V0 >= 1
+ *  - H0 >= 1
+ *
+ * @note In case the output has to be reinterpreted as a 3D tensor (i.e. output of convolution
+ * layer), the following information must be passed at compile time:
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix NOT reshaped
+ *
+ * @param[in]  lhs_ptr                           Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8/QASYMM_SIGNED
+ * @param[in]  lhs_stride_x                      Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_stride_y                      Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in]  rhs_ptr                           Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                      Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_stride_y                      Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  k                                 Number of columns in LHS matrix and rows in RHS
+ * matrix not reshaped.
+ * @param[in]  lhs_stride_z                      Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                      Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_cross_plane_pad               (Optional) Bottom paddings in unit of elements
+ * (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+                                                IMAGE_DECLARATION(dst), uint k, uint lhs_stride_z,
+                                                uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                                ,
+                                                uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define LHS_BLOCK_SIZE ((K0) * (M0))
+
+#if defined(LHS_INTERLEAVE)
+#define LHS_OFFSET_X (K0)
+#define LHS_STEP_X ((K0) * (V0))
+#define LHS_STEP_LOOP (1)
+#else // defined(INTERLEAVE)
+#define LHS_OFFSET_X (LHS_BLOCK_SIZE)
+#define LHS_STEP_X (K0)
+#define LHS_STEP_LOOP (V0)
+#endif // defined(INTERLEAVE)
+
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  __global DATA_TYPE *lhs_addr = lhs_ptr + lhs_offset_first_element_in_bytes +
+                                 (y % V0) * (uint)LHS_OFFSET_X + (y / V0) * (uint)lhs_stride_y +
+                                 (z * lhs_stride_z);
+
+  // Compute RHS matrix address
+  __global DATA_TYPE *rhs_addr = rhs_ptr + rhs_offset_first_element_in_bytes +
+                                 (x % H0) * (uint)RHS_OFFSET_X + (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_addr += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_addr += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+  for (int i = 0; i < k; i += K0)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_addr, 0, LHS_STEP_X, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_addr, 0, RHS_STEP_X, zrhs);
+
+    // Partial matrix multiplication M0,N0,K0
+    ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+    // Update address
+    lhs_addr += (M0 * LHS_STEP_X * LHS_STEP_LOOP);
+    rhs_addr += (N0 * RHS_STEP_X * RHS_STEP_LOOP);
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0 * sizeof(int)) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Convert and store output block
+  CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+
+#undef LHS_BLOCK_SIZE
+#undef LHS_OFFSET_X
+#undef LHS_STEP_X
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(K)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS matrix is reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is
+ * transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (i.e. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - H0 >= 1
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                           Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  lhs_stride_x                      Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_stride_y                      Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in]  rhs_ptr                           Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                      Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_stride_y                      Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                      Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                      Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  lhs_cross_plane_pad               (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad               (Optional) Bottom paddings for the output matrix in
+ * unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+                                              IMAGE_DECLARATION(dst), uint lhs_stride_z,
+                                              uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                              ,
+                                              uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                              ,
+                                              uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes + (x % H0) * (uint)RHS_OFFSET_X +
+                    (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+  for (int i = 0; i < K; i += K0)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X, zrhs);
+
+    // Partial matrix multiplication M0,N0,K0
+    ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+    lhs_offset += K0;
+    rhs_offset += N0 * RHS_STEP_X * RHS_STEP_LOOP;
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0) * sizeof(int) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Convert and store output block
+  CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+
+#if defined(RESULT_OFFSET) && defined(RESULT_SHIFT) && defined(RESULT_MULTIPLIER)
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices with fused output stage
+ * using fixed-point arithmetic. The LHS matrix is NOT reshaped The RHS matrix is reshaped with @ref
+ * CLGEMMReshapeRHSMatrixKernel and the block K0xN0 is transposed
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The block's dimensions used for reshaping the RHS matrix (N0 and K0) must be passed at
+ * compile time using -DN0 and -DK0 (i.e. -DN0=8, -DK0=4).
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of K0xN0 horizontal blocks stored on the same output row of the reshaped RHS
+ * matrix must be passed at compile time using -DH0 (i.e. -DH0=2)
+ * @note If the K0xN0 blocks in the reshaped RHS matrix have been interleaved, the option
+ * -DRHS_INTERLEAVE must passed at compile time.
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *  - H0 >= 1
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @note The offset, scalar scale factor and number of bits to shift right of output tensor must be
+ * passed at compile time using -DRESULT_OFFSET, -RESULT_MULTIPLIER and -DRESULT_SHIFT
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ * @note In case of per-channel quantization of matrix B, -DPER_CHANNEL_QUANTIZATION must be passed
+ * at compile time.
+ *
+ * @param[in]  lhs_ptr                                          Pointer to the LHS reshaped matrix.
+ * Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  lhs_stride_x                                     Stride of the LHS reshaped matrix in
+ * X dimension (in bytes)
+ * @param[in]  lhs_step_x                                       src_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  lhs_stride_y                                     Stride of the LHS reshaped matrix in
+ * Y dimension (in bytes)
+ * @param[in]  lhs_step_y                                       src_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes                The offset of the first element in
+ * the LHS reshaped matrix
+ * @param[in]  rhs_ptr                                          Pointer to the RHS reshaped matrix.
+ * Supported data type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                                     Stride of the RHS reshaped matrix in
+ * X dimension (in bytes)
+ * @param[in]  rhs_step_x                                       src_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  rhs_stride_y                                     Stride of the RHS reshaped matrix in
+ * Y dimension (in bytes)
+ * @param[in]  rhs_step_y                                       src_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes                The offset of the first element in
+ * the RHS reshaped matrix
+ * @param[out] dst_ptr                                          Pointer to the destination matrix
+ * Supported data type: same as @p lhs_ptr
+ * @param[in]  dst_stride_x                                     Stride of the destination matrix in
+ * X dimension (in bytes)
+ * @param[in]  dst_step_x                                       dst_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                                     Stride of the destination matrix in
+ * Y dimension (in bytes)
+ * @param[in]  dst_step_y                                       dst_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes                The offset of the first element in
+ * the destination matrix
+ * @param[in]  lhs_stride_z                                     Stride of the LHS reshaped matrix in
+ * Z dimension (in bytes)
+ * @param[in]  rhs_stride_z                                     Stride of the RHS reshaped matrix in
+ * Z dimension (in bytes)
+ * @param[in]  dst_stride_z                                     Stride of the destination tensor in
+ * Z dimension (in bytes)
+ * @param[in]  lhs_cross_plane_pad                              (Optional) Bottom paddings for LHS
+ * matrix in unit of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad                              (Optional) Bottom paddings for the
+ * output matrix in unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ * @param[in]  sum_col_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: S32
+ * @param[in]  sum_col_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_col_step_x                                   (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_col_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_col_step_y                                   (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_col_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  sum_row_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: S32
+ * @param[in]  sum_row_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_row_step_x                                   (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_row_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_row_step_y                                   (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_row_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  biases_ptr                                       (Optional) Pointer to the biases
+ * tensor. Supported data type: S32
+ * @param[in]  biases_stride_x                                  (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                                    (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes             (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[in]  result_multipliers_ptr                           (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_multipliers_stride_x                      (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in]  result_multipliers_step_x                        (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in]  result_shifts_ptr                                (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_shifts_stride_x                           (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in]  result_shifts_step_x                             (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_shifts_offset_first_element_in_bytes      (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint(
+  IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs), IMAGE_DECLARATION(dst), uint lhs_stride_z,
+  uint rhs_stride_z, uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+  ,
+  uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  ,
+  uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+#if defined(A_OFFSET)
+  ,
+  IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+    ,
+  IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+    ,
+  VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS)
+#if defined(PER_CHANNEL_QUANTIZATION)
+    ,
+  VECTOR_DECLARATION(result_multipliers), VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+)
+{
+  // Block size
+#define RHS_BLOCK_SIZE ((K0) * (N0))
+
+  // RHS offset and step X
+#if defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (K0)
+#define RHS_STEP_X ((K0) * (H0))
+#define RHS_STEP_LOOP (1)
+#else // defined(RHS_INTERLEAVE)
+#define RHS_OFFSET_X (RHS_BLOCK_SIZE)
+#define RHS_STEP_X (K0)
+#define RHS_STEP_LOOP (H0)
+#endif // defined(RHS_INTERLEAVE)
+
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes + (x % H0) * (uint)RHS_OFFSET_X +
+                    (x / (uint)H0) * rhs_stride_y;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(N0-1)=0;
+
+  for (int i = 0; i < K; i += K0)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(N0, K0, DATA_TYPE, b, rhs_ptr, rhs_offset, RHS_STEP_X, zrhs);
+
+    // Partial matrix multiplication M0,N0,K0
+    ARM_MM_K0XN0XM0(M0, N0, K0, a, b, c);
+
+    lhs_offset += K0;
+    rhs_offset += N0 * RHS_STEP_X * RHS_STEP_LOOP;
+  }
+
+  // Result of MM is of type DATA_TYPE
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0) * sizeof(DATA_TYPE) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Convert result of matrix multiplication to S32
+  REPEAT_VAR_INIT_CONVERT_SAT(M0, VEC_DATA_TYPE(int, N0), c, c_int);
+
+  int batch_id = z;
+#if defined(DEPTH_GEMM3D)
+  batch_id /= (int)DEPTH_GEMM3D;
+#endif // defined(DEPTH_GEMM3D)
+
+  // Offset contribution: c += (A_OFFSET * sum_col) + (B_OFFSET * sum_row) +  K_OFFSET;
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(int, N0), offset_s32_, K_OFFSET);
+
+#if defined(A_OFFSET)
+  // Compute the offset contribution due to A_OFFSET
+  __global uchar *sum_col_addr =
+    sum_col_ptr + sum_col_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+#if defined(SUM_COL_HAS_BATCHES)
+  sum_col_addr += z * sum_col_stride_y;
+#endif // defined(SUM_COL_HAS_BATCHES)
+  VEC_DATA_TYPE(int, N0)
+  a_offset_s32 = VLOAD(N0)(0, (__global int *)sum_col_addr);
+  a_offset_s32 *= (VEC_DATA_TYPE(int, N0))A_OFFSET;
+
+  REPEAT_ADD_VECTOR_TO_VAR(M0, offset_s32_, a_offset_s32);
+#endif // defined(A_OFFSET)
+
+#if defined(B_OFFSET)
+  // Compute the offset contribution due to B_OFFSET
+  __global uchar *sum_row_addr = sum_row_ptr + sum_row_offset_first_element_in_bytes +
+                                 (y * (uint)M0) * sizeof(int) + z * sum_row_stride_y;
+
+#if defined(HEIGHT_GEMM3D) && defined(DEPTH_GEMM3D)
+  sum_row_addr += (batch_id % (int)DEPTH_GEMM3D) * (int)HEIGHT_GEMM3D * sizeof(int);
+#endif // defined(HEIGHT_GEMM3D) && defined(DEPTH_GEMM3D)
+  LOAD_SCALAR_AS_VECTOR(M0, N0, int, b_offset_s32_, sum_row_addr, 0, sum_row_stride_x);
+
+  REPEAT_MLA_VAR_WITH_CONST_VEC(M0, offset_s32_, b_offset_s32_, (VEC_DATA_TYPE(int, N0))B_OFFSET);
+#endif // defined(B_OFFSET)
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr =
+    biases_ptr + biases_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+  VEC_DATA_TYPE(int, N0)
+  bias_values = VLOAD(N0)(0, (__global int *)bias_addr);
+  REPEAT_ADD_VECTOR_TO_VAR(M0, offset_s32_, bias_values);
+#endif // defined(ADD_BIAS)
+
+  REPEAT_ADD_TWO_VARS(M0, c_int, offset_s32_);
+
+  // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+  __global uchar *result_multipliers_addr = result_multipliers_ptr +
+                                            result_multipliers_offset_first_element_in_bytes +
+                                            (x * (uint)N0) * sizeof(int);
+  __global uchar *result_shifts_addr =
+    result_shifts_ptr + result_shifts_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int);
+
+  VEC_DATA_TYPE(int, N0)
+  res_mul = VLOAD(N0)(0, (__global int *)result_multipliers_addr);
+  VEC_DATA_TYPE(int, N0)
+  res_shift = VLOAD(N0)(0, (__global int *)result_shifts_addr);
+
+  REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL(M0, N0, c_int, res_mul, res_shift);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+
+#if RESULT_SHIFT < 0
+  REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(M0, N0, c_int, RESULT_MULTIPLIER,
+                                                         RESULT_SHIFT);
+#else  // RESULT_SHIFT >= 0
+  REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(M0, N0, c_int, RESULT_MULTIPLIER,
+                                                      RESULT_SHIFT);
+#endif // RESULT_SHIFT < 0
+
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+  // Add the offset terms to GEMM's result
+  REPEAT_ADD_CONST_TO_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, RESULT_OFFSET);
+
+#if defined(MIN_BOUND)
+  REPEAT_MAX_CONST_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  REPEAT_MIN_CONST_VAR(M0, VEC_DATA_TYPE(int, N0), c_int, MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Convert and store output block (does convert saturate)
+  CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, c_int, dst_addr, dst_stride_y, zout);
+
+#undef RHS_BLOCK_SIZE
+#undef RHS_OFFSET_X
+#undef RHS_STEP_X
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_SHIFT) && defined(RESULT_MULTIPLIER)
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(DATA_TYPE) &&
+       // defined(K)
+
+#if defined(M0) && defined(N0) && defined(K0) && defined(K)
+
+/** This OpenCL kernel computes the matrix multiplication between 2 matrices.
+ *  The LHS matrix is NOT reshaped
+ *  The RHS matrix is NOT reshaped
+ *
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The accumulator data type must be passed at compile time using -DACC_DATA_TYPE (i.e.
+ * -DACC_DATA_TYPE=uint)
+ * @note The number of columns of LHS matrix must be passed at compile time using -DK (i.e. -DK=64)
+ * @note The number of M0 rows to process must be passed at compile time using -DM0 (i.e. -DM0=2)
+ * @note The number of N0 columns to process must be passed at compile time using -DN0 (i.e. -DN0=2)
+ * @note The number of K0 partial accumulations must be passed at compile time using -DK0 (i.e.,
+ * -DK0=2)
+ * @note Only the following configurations of M0, N0 and K0 are currently supported:
+ *  - M0 = 1, 2, 3, 4, 5, 6, 7, 8
+ *  - N0 = 2, 3, 4, 8, 16
+ *  - K0 = 2, 3, 4, 8, 16
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ *       -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ *       -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ *       -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ *       -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ *          (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns LHS matrix
+ *
+ * @param[in]  lhs_ptr                           Pointer to the LHS reshaped matrix. Supported data
+ * type: QASYMM8
+ * @param[in]  lhs_stride_x                      Stride of the LHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  lhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_stride_y                      Stride of the LHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  lhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  lhs_offset_first_element_in_bytes The offset of the first element in the LHS reshaped
+ * matrix
+ * @param[in]  rhs_ptr                           Pointer to the RHS reshaped matrix. Supported data
+ * type: same as @p lhs_ptr
+ * @param[in]  rhs_stride_x                      Stride of the RHS reshaped matrix in X dimension
+ * (in bytes)
+ * @param[in]  rhs_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_stride_y                      Stride of the RHS reshaped matrix in Y dimension
+ * (in bytes)
+ * @param[in]  rhs_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  rhs_offset_first_element_in_bytes The offset of the first element in the RHS reshaped
+ * matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in]  lhs_stride_z                      Stride of the LHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  rhs_stride_z                      Stride of the RHS reshaped matrix in Z dimension
+ * (in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in
+ * bytes)
+ * @param[in]  lhs_cross_plane_pad               (Optional) Bottom paddings for LHS matrix in unit
+ * of elements (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in]  dst_cross_plane_pad               (Optional) Bottom paddings for the output matrix in
+ * unit of elements (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs), IMAGE_DECLARATION(rhs),
+                                 IMAGE_DECLARATION(dst), uint lhs_stride_z, uint rhs_stride_z,
+                                 uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+                                 ,
+                                 uint lhs_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+                                 ,
+                                 uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+)
+{
+  uint x = get_global_id(0);
+  uint y = get_global_id(1);
+  uint z = get_global_id(2);
+
+#if defined(DUMMY_WORK_ITEMS)
+  if ((x * N0 >= N) || (y * M0 >= M))
+  {
+    return;
+  }
+#endif // defined(DUMMY_WORK_ITEMS)
+
+  // Compute LHS matrix address
+  uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+
+  // Compute RHS matrix address
+  uint rhs_offset = rhs_offset_first_element_in_bytes + x * N0;
+
+#if defined(MATRIX_B_DEPTH)
+  // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+  rhs_offset += (z % MATRIX_B_DEPTH) * rhs_stride_z;
+#else  // defined(MATRIX_B_DEPTH)
+  rhs_offset += z * rhs_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+  REPEAT_VAR_INIT_TO_CONST(8, uint, zlhs, 0);
+  REPEAT_VAR_INIT_TO_CONST(16, uint, zrhs, 0);
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+  // The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad,
+                     lhs_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply lhs_stride_z by DEPTH_GEMM3D
+  lhs_offset += z * lhs_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  lhs_offset += z * lhs_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+  // Initialize the accumulators
+  REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(ACC_DATA_TYPE, N0), c,
+                           0); // VEC_DATA_TYPE(ACC_DATA_TYPE, N0)    c0=0,c1=0,c2=0,... c(M0-1)=0;
+
+  int i = 0;
+
+  for (; i <= (K - K0); i += K0)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, K0, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(K0, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zrhs);
+
+    // Partial matrix multiplication M0,N0,K0
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+    ARM_MM_NATIVE_N0XK0XM0(VEC_DATA_TYPE(ACC_DATA_TYPE, N0), M0, K0, a, b, c);
+#else  // GPU_ARCH == GPU_ARCH_MIDGARD
+       // Transpose the values from RHS matrix
+    TRANSPOSE_K0XN0(K0, N0, b_t, b, DATA_TYPE);
+
+    ARM_MM_K0XN0XM0(M0, N0, K0, a, b_t, c);
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+    // Update the offset
+    lhs_offset += K0;
+    rhs_offset += K0 * rhs_stride_y;
+  }
+
+  // Left-over for loop
+  for (; i < K; ++i)
+  {
+    // Load values from LHS matrix
+    LOAD_BLOCK(M0, 1, DATA_TYPE, a, lhs_ptr, lhs_offset, lhs_stride_y, zlhs);
+
+    // Load values from RHS matrix
+    LOAD_BLOCK(1, N0, DATA_TYPE, b, rhs_ptr, rhs_offset, rhs_stride_y, zrhs);
+
+    // Partial matrix multiplication M0,N0,1
+#if (GPU_ARCH == GPU_ARCH_MIDGARD)
+    ARM_MM_NATIVE_N0XK0XM0(VEC_DATA_TYPE(ACC_DATA_TYPE, N0), M0, 1, a, b, c);
+#else  // GPU_ARCH == GPU_ARCH_MIDGARD
+       // Transpose the values from RHS matrix
+    TRANSPOSE_K0XN0(1, N0, b_t, b, DATA_TYPE);
+
+    ARM_MM_K0XN0XM0(M0, N0, 1, a, b_t, c);
+#endif // GPU_ARCH == GPU_ARCH_MIDGARD
+
+    // Update the offset
+    lhs_offset += 1;
+    rhs_offset += rhs_stride_y;
+  }
+
+  __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes +
+                             (x * (uint)N0) * sizeof(int) + (y * (uint)M0 * dst_stride_y);
+
+  REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0); // uint zout0=0,zout1=0,zout2=0,... zout7=0;
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+  // The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
+  CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad,
+                     dst_stride_y);
+
+  // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+  // multiply dst_stride_z by DEPTH_GEMM3D
+  dst_addr += z * dst_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Add offset for batched GEMM
+  dst_addr += z * dst_stride_z;
+
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+
+  // Convert and store output block
+  CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+}
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(K)
+
+#if defined(COLS_A)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix
+ * A. It is also possible to multiply each reduced row by a scalar value, if SCALAR is passed at
+ * compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (e.g.
+ * -DSCALAR=3)
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  VEC_DATA_TYPE(ACC_DATA_TYPE, 4)
+  sum_row_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))0;
+  ACC_DATA_TYPE sum_row = 0;
+
+  __global const DATA_TYPE *matrix_a =
+    (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y +
+                                 get_global_id(1) * src_stride_z);
+
+  int i = 0;
+
+  // This for loop performs 16 accumulations
+  for (; i <= ((int)COLS_A - 16); i += 16)
+  {
+    const VEC_DATA_TYPE(DATA_TYPE, 16) a0 = vload16(0, matrix_a + i);
+
+    sum_row_32 += CONVERT(a0.s0123, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+                  CONVERT(a0.s4567, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+                  CONVERT(a0.s89AB, VEC_DATA_TYPE(ACC_DATA_TYPE, 4)) +
+                  CONVERT(a0.sCDEF, VEC_DATA_TYPE(ACC_DATA_TYPE, 4));
+  }
+
+  // This for loop performs the leftover accumulations
+  for (; i < COLS_A; ++i)
+  {
+    sum_row += (ACC_DATA_TYPE)matrix_a[i];
+  }
+
+  sum_row += sum_row_32.s0 + sum_row_32.s1 + sum_row_32.s2 + sum_row_32.s3;
+
+#if defined(SCALAR)
+  sum_row *= (int)SCALAR;
+#endif // defined(SCALAR)
+  *((__global int *)dst.ptr) = (int)sum_row;
+}
+
+#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A
+ * using the arm dot product instruction. It is also possible to multiply each reduced row by a
+ * scalar value, if SCALAR is passed at compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (e.g.
+ * -DSCALAR=3)
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  ACC_DATA_TYPE sum_row = 0;
+
+  __global const DATA_TYPE *matrix_a =
+    (__global const DATA_TYPE *)(src.ptr + get_global_id(0) * src_stride_y +
+                                 get_global_id(1) * src_stride_z);
+
+  int i = 0;
+
+  // This for loop performs 16 accumulations
+  for (; i <= ((int)COLS_A - 32); i += 32)
+  {
+    VEC_DATA_TYPE(DATA_TYPE, 16)
+    a0 = vload16(0, matrix_a + i);
+
+    sum_row += arm_dot(a0.s0123, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.s4567, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.s89AB, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.sCDEF, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+
+    a0 = vload16(1, matrix_a + i);
+
+    sum_row += arm_dot(a0.s0123, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.s4567, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.s89AB, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+    sum_row += arm_dot(a0.sCDEF, (VEC_DATA_TYPE(DATA_TYPE, 4))(1));
+  }
+
+  // This for loop performs the leftover accumulations
+  for (; i < COLS_A; ++i)
+  {
+    sum_row += (ACC_DATA_TYPE)matrix_a[i];
+  }
+
+#if defined(SCALAR)
+  sum_row *= (int)SCALAR;
+#endif // defined(SCALAR)
+  *((__global int *)dst.ptr) = (int)sum_row;
+}
+#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
+#endif // defined(COLS_A)
+
+#if defined(COLS_B) && defined(ROWS_B)
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of
+ * Matrix B. It is also possible to multiply each reduced column by a scalar value, if SCALAR is
+ * passed at compile time.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ *
+ * @attention The number of matrix B columns and rows needs to be passed at compile time using
+ * -DCOLS_B and -DROWS_B
+ * @note The input data type must be passed at compile time using -DDATA_TYPE (i.e.
+ * -DDATA_TYPE=uchar)
+ * @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE
+ * (i.e. -DACC_DATA_TYPE=uint)
+ * @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (i.e.
+ * -DSCALAR=3)
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data type:
+ * QASYMM8/QASYMM8_SIGNED
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor Supported data
+ * type: S32
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * tensor
+ */
+__kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+  Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+  VEC_DATA_TYPE(ACC_DATA_TYPE, 16)
+  sum_col_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 16))0;
+
+  __global const DATA_TYPE *matrix_b =
+    (__global const DATA_TYPE *)(src.ptr + get_global_id(1) * src_stride_z);
+
+  int i = 0;
+  // This for loop performs 4 accumulations
+  for (; i <= ((int)ROWS_B - 4); i += 4)
+  {
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b0 = vload16(0, matrix_b + 0 * src_stride_y);
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b1 = vload16(0, matrix_b + 1 * src_stride_y);
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b2 = vload16(0, matrix_b + 2 * src_stride_y);
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b3 = vload16(0, matrix_b + 3 * src_stride_y);
+
+    sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+                  CONVERT(b1, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+                  CONVERT(b2, VEC_DATA_TYPE(ACC_DATA_TYPE, 16)) +
+                  CONVERT(b3, VEC_DATA_TYPE(ACC_DATA_TYPE, 16));
+
+    matrix_b += 4 * src_stride_y;
+  }
+
+  // This for loop perfoms the leftover accumulations
+  for (; i < (int)ROWS_B; ++i)
+  {
+    const VEC_DATA_TYPE(DATA_TYPE, 16) b0 = vload16(0, matrix_b);
+
+    sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, 16));
+
+    matrix_b += src_stride_y;
+  }
+
+#if defined(SCALAR)
+  sum_col_32 *= (VEC_DATA_TYPE(ACC_DATA_TYPE, 16))SCALAR;
+#endif // defined(SCALAR)
+  VSTORE(16)
+  (convert_int16(sum_col_32), 0, (__global int *)dst.ptr);
+}
+#endif // defined(COLS_B) && defined(ROWS_B)
+
+#endif // defined(DATA_TYPE) && defined(ACC_DATA_TYPE)
+
+#if defined(K_OFFSET)
+
+/* Helper function used to calculate the offset contribution after matrix multiplication.
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication),
+ * and calculates the offset contribution of matrix A and matrix B.
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * @param[in] x                                     get_global_id(0) * 4
+ * @param[in] y                                     get_global_id(1)
+ * @param[in] z                                     get_global_id(2)
+ * @param[in] sum_col_ptr                           (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_col_stride_x                      (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_col_step_x                        (Optional) sum_col_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y                      (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_col_step_y                        (Optional) sum_col_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] sum_row_ptr                           (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_row_stride_x                      (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_row_step_x                        (Optional) sum_row_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y                      (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_row_step_y                        (Optional) sum_row_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] biases_ptr                            (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x                       (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x                         (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes  (Optional) The offset of the first element in
+ * the biases tensor
+ */
+inline int4 offset_contribution(int x, int y, int z
+#if defined(A_OFFSET)
+                                ,
+                                IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+                                  ,
+                                IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+                                  ,
+                                VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS)
+)
+{
+  int4 a_offset_s32 = (int4)0;
+  int4 b_offset_s32 = (int4)0;
+
+  int batch_id = z;
+#if defined(DEPTH_INPUT3D)
+  batch_id /= (int)DEPTH_INPUT3D;
+#endif // defined(DEPTH_INPUT3D)
+
+#if defined(A_OFFSET)
+  // Compute the offset contribution due to A_OFFSET
+  __global uchar *sum_col_addr =
+    sum_col_ptr + sum_col_offset_first_element_in_bytes + x * sizeof(int);
+
+  // Compute the offset contribution due to A_OFFSET
+#if defined(SUM_COL_HAS_BATCHES)
+  a_offset_s32 = vload4(0, (__global int *)(sum_col_addr + batch_id * sum_col_stride_y));
+#else  // defined(SUM_COL_HAS_BATCHES)
+  a_offset_s32 = vload4(0, (__global int *)sum_col_addr);
+#endif // defined(SUM_COL_HAS_BATCHES)
+
+  a_offset_s32 *= (int4)A_OFFSET;
+#endif // defined(A_OFFSET)
+
+#if defined(B_OFFSET)
+  // Compute the offset contribution due to A_OFFSET
+  __global uchar *sum_row_addr =
+    sum_row_ptr + sum_row_offset_first_element_in_bytes + y * sizeof(int);
+
+  // Compute the offset contribution due to B_OFFSET
+#if defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+  b_offset_s32 = (int4) * (((__global int *)(sum_row_addr + batch_id * sum_row_stride_y)) +
+                           (z % (int)DEPTH_INPUT3D) * (int)HEIGHT_INPUT3D);
+#else  // defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+  b_offset_s32 = (int4) * (((__global int *)(sum_row_addr + batch_id * sum_row_stride_y)));
+#endif // defined(HEIGHT_INPUT3D) && defined(DEPTH_INPUT3D)
+  b_offset_s32 *= (int4)B_OFFSET;
+#endif // defined(B_OFFSET)
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  b_offset_s32 += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  return (int4)K_OFFSET + a_offset_s32 + b_offset_s32;
+}
+
+/* OpenCL kernel used to add the offset contribution after matrix multiplication. The computation is
+ * performed in-place
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication),
+ * and adds to it the offset contribution of matrix A and matrix B in-place.
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The final result is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ *                   (sum_col[k] * A_OFFSET) +
+ *                   (sum_row[i] * B_OFFSET) +
+ *                   (K_OFFSET)
+ *
+ * @param[in] mm_result_ptr                           Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in] mm_result_stride_x                      Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] mm_result_step_x                        mm_result_stride_x * number of elements along
+ * X processed per workitem(in bytes)
+ * @param[in] mm_result_stride_y                      Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] mm_result_step_y                        mm_result_stride_y * number of elements along
+ * Y processed per workitem(in bytes)
+ * @param[in] mm_result_stride_z                      Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in] mm_result_step_z                        mm_result_stride_z * number of elements along
+ * Z processed per workitem(in bytes)
+ * @param[in] mm_result_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] sum_col_ptr                             (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_col_stride_x                        (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_col_step_x                          (Optional) sum_col_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] sum_col_stride_y                        (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_col_step_y                          (Optional) sum_col_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] sum_col_offset_first_element_in_bytes   (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] sum_row_ptr                             (Optional) Pointer to the source tensor.
+ * Supported data type: same as @p mm_result_ptr
+ * @param[in] sum_row_stride_x                        (Optional) Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in] sum_row_step_x                          (Optional) sum_row_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] sum_row_stride_y                        (Optional) Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in] sum_row_step_y                          (Optional) sum_row_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in] sum_row_offset_first_element_in_bytes   (Optional) The offset of the first element in
+ * the source tensor
+ * @param[in] biases_ptr                              (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in] biases_stride_x                         (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in] biases_step_x                           (Optional) biases_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in] biases_offset_first_element_in_bytes    (Optional) The offset of the first element in
+ * the biases tensor
+ */
+__kernel void gemmlowp_offset_contribution(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+                                             ,
+                                           IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+                                             ,
+                                           IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+                                             ,
+                                           VECTOR_DECLARATION(biases)
+#endif // defined(ADD_BIAS))
+)
+{
+  const int x = get_global_id(0) * 4;
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  // Compute offset contribution
+  int4 offset_term_s32 = offset_contribution(
+    x, y, z
+#if defined(A_OFFSET)
+    ,
+    sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+    sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+    ,
+    sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+    sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+    ,
+    biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+  );
+
+  __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+                                   x * sizeof(int) + y * mm_result_stride_y +
+                                   z * mm_result_stride_z;
+
+  int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+  // Add the offset terms to GEMM's result
+  in_s32 += offset_term_s32;
+
+  // Store the result with the offset contribution
+  vstore4(in_s32, 0, (__global int *)mm_result_addr);
+}
+
+#if defined(RESULT_OFFSET) && defined(RESULT_MULTIPLIER) && defined(RESULT_SHIFT) && \
+  defined(OUTPUT_DATA_TYPE)
+/* OpenCL kernel used to add the offset contribution after @ref CLGEMMLowpMatrixMultiplyKernel and
+ * it quantizes down to uint8.
+ *
+ * This kernel takes a final int32 accumulator value (the output of
+ * @CLGEMMLowpMatrixMultiplyKernel), adds to it the offset contribution of matrix A and matrix B and
+ * quantizes to uint8 through the output stage.
+ *
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The result before the output stage is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ *                   (sum_col[k] * A_OFFSET) +
+ *                   (sum_row[i] * B_OFFSET) +
+ *                   (K_OFFSET)
+ *
+ * This result is quantized down to uint8/int8 using the output stage. The output stage computes the
+ * following operations:
+ *
+ *  -# Add offset terms to final result
+ *  -# Multiply each entry of result by result_mult_int
+ *  -# Add bias to final result (if -DADD_BIAS is passed at compile time)
+ *  -# Shift the int32 accumulator by result_shift
+ *  -# Clamp the value between the specified min and max bounds (if -DMIN_BOUND and/or -DMAX_BOUND
+ * are passed at compile time)
+ *  -# Clamp the resulting int32 values:
+ *      - to the [0..255] range and cast to QASYMM8.
+ *      - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  mm_result_ptr                                    Pointer to the source tensor.
+ * Supported data type: S32
+ * @param[in]  mm_result_stride_x                               Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_x                                 mm_result_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in]  mm_result_stride_y                               Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_y                                 mm_result_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in]  mm_result_stride_z                               Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_z                                 mm_result_stride_z * number of
+ * elements along Z processed per workitem(in bytes)
+ * @param[in]  mm_result_offset_first_element_in_bytes          The offset of the first element in
+ * the source tensor
+ * @param[in]  sum_col_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in]  sum_col_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_col_step_x                                   (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_col_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_col_step_y                                   (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_col_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  sum_row_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in]  sum_row_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_row_step_x                                   (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_row_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_row_step_y                                   (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_row_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  biases_ptr                                       (Optional) Pointer to the biases
+ * tensor. Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                                  (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                                    (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes             (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[out] dst_ptr                                          Pointer to the destination tensor
+ * Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  dst_stride_x                                     Stride of the destination tensor in
+ * X dimension (in bytes)
+ * @param[in]  dst_step_x                                       dst_gx_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                                     Stride of the destination tensor in
+ * Y dimension (in bytes)
+ * @param[in]  dst_step_y                                       dst_gx_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                                     Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in]  dst_step_z                                       src_stride_z * number of elements
+ * along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes                The offset of the first element in
+ * the destination tensor
+ * @param[in]  result_multipliers_ptr                           (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_multipliers_stride_x                      (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in]  result_multipliers_step_x                        (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in]  result_shifts_ptr                                (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_shifts_stride_x                           (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in]  result_shifts_step_x                             (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_shifts_offset_first_element_in_bytes      (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void gemmlowp_offset_contribution_quantize_down(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+                                                           ,
+                                                         IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+                                                           ,
+                                                         IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+                                                           ,
+#if defined(ADD_BIAS)
+                                                         VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                         TENSOR3D_DECLARATION(dst)
+#if defined(PER_CHANNEL_QUANTIZATION)
+                                                           ,
+                                                         VECTOR_DECLARATION(result_multipliers),
+                                                         VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+)
+{
+  const int x = get_global_id(0) * 4;
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  // Compute offset contribution
+  int4 offset_term_s32 = offset_contribution(
+    x, y, z
+#if defined(A_OFFSET)
+    ,
+    sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+    sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+    ,
+    sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+    sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+    ,
+    biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+  );
+
+  __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+                                   x * sizeof(int) + y * mm_result_stride_y +
+                                   z * mm_result_stride_z;
+
+  int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+  // Add the offset terms to GEMM's result
+  in_s32 += offset_term_s32;
+
+  // -------------- OUTPUT STAGE
+
+  // Add the offset terms to GEMM's result
+  in_s32 += (int4)RESULT_OFFSET;
+
+  // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+  __global uchar *result_multipliers_addr =
+    result_multipliers_ptr + result_multipliers_offset_first_element_in_bytes + x * sizeof(int);
+  __global uchar *result_shifts_addr =
+    result_shifts_ptr + result_shifts_offset_first_element_in_bytes + x * sizeof(int);
+  int4 result_multipliers_values = vload4(0, (__global int *)result_multipliers_addr);
+  int4 result_shifts_values = vload4(0, (__global int *)result_shifts_addr);
+
+  in_s32 *= result_multipliers_values;
+  in_s32 >>= result_shifts_values;
+#else  // defined(PER_CHANNEL_QUANTIZATION)
+  in_s32 *= RESULT_MULTIPLIER;
+
+  in_s32 >>= RESULT_SHIFT;
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(in_s32, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+
+/* OpenCL kernel used to add the offset contribution after matrix multiplication and it quantizes
+ * down to uint8.
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), adds to
+ * it the offset contribution of matrix A and matrix B and quantizes to uint8 through the output
+ * stage.
+ *
+ *
+ * @attention The k_offset = a_offset * b_offset * k (where k is the number of matrix A columns)
+ * needs to be passed at compile time using -DK_OFFSET (i.e. -DK_OFFSET=1200)
+ * @note In case the offset contribution due to a_offset is required, a_offset needs to be passed at
+ * compile time using -DA_OFFSET (i.e. -DA_OFFSET=1)
+ * @note In case the offset contribution due to b_offset is required, b_offset needs to be passed at
+ * compile time using -DB_OFFSET (i.e. -DB_OFFSET=6)
+ * @note In case sum_col has batches, -DSUM_COL_HAS_BATCHES must be passed at compile time. Usually
+ * if gemmlowp is used to accelerate convolution layer, sum_col will not have batches
+ *
+ * The result before the output stage is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ *                   (sum_col[k] * A_OFFSET) +
+ *                   (sum_row[i] * B_OFFSET) +
+ *                   (K_OFFSET)
+ *
+ * This result is quantized down to uint8/int8 using the output stage. The output stage computes the
+ * following operations:
+ *
+ *  -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ *  -# Add bias to final result if bias tensor is not a nullptr
+ *  -# Round to nearest division by a power-of-two using result_shift
+ *  -# Add offset to each result
+ *  -# Clamp the value between the specified min and max bounds
+ *  -# Clamp the resulting int32 values:
+ *      - to the [0..255] range and cast to QASYMM8.
+ *      - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  mm_result_ptr                                    Pointer to the source tensor.
+ * Supported data type: S32
+ * @param[in]  mm_result_stride_x                               Stride of the source tensor in X
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_x                                 mm_result_stride_x * number of
+ * elements along X processed per workitem(in bytes)
+ * @param[in]  mm_result_stride_y                               Stride of the source tensor in Y
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_y                                 mm_result_stride_y * number of
+ * elements along Y processed per workitem(in bytes)
+ * @param[in]  mm_result_stride_z                               Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in]  mm_result_step_z                                 mm_result_stride_z * number of
+ * elements along Z processed per workitem(in bytes)
+ * @param[in]  mm_result_offset_first_element_in_bytes          The offset of the first element in
+ * the source tensor
+ * @param[in]  sum_col_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in]  sum_col_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_col_step_x                                   (Optional) sum_col_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_col_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_col_step_y                                   (Optional) sum_col_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_col_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  sum_row_ptr                                      (Optional) Pointer to the source
+ * tensor. Supported data type: same as @p mm_result_ptr
+ * @param[in]  sum_row_stride_x                                 (Optional) Stride of the source
+ * tensor in X dimension (in bytes)
+ * @param[in]  sum_row_step_x                                   (Optional) sum_row_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_row_stride_y                                 (Optional) Stride of the source
+ * tensor in Y dimension (in bytes)
+ * @param[in]  sum_row_step_y                                   (Optional) sum_row_stride_y * number
+ * of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_row_offset_first_element_in_bytes            (Optional) The offset of the first
+ * element in the source tensor
+ * @param[in]  biases_ptr                                       (Optional) Pointer to the biases
+ * tensor. Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                                  (Optional) Stride of the biases
+ * tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                                    (Optional) biases_stride_x * number
+ * of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes             (Optional) The offset of the first
+ * element in the biases tensor
+ * @param[out] dst_ptr                                          Pointer to the destination tensor
+ * Supported data type: QASYMM8
+ * @param[in]  dst_stride_x                                     Stride of the destination tensor in
+ * X dimension (in bytes)
+ * @param[in]  dst_step_x                                       dst_gx_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                                     Stride of the destination tensor in
+ * Y dimension (in bytes)
+ * @param[in]  dst_step_y                                       dst_gx_stride_y * number of elements
+ * along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                                     Stride of the source tensor in Z
+ * dimension (in bytes)
+ * @param[in]  dst_step_z                                       src_stride_z * number of elements
+ * along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes                The offset of the first element in
+ * the destination tensor
+ * @param[in]  result_multipliers_ptr                           (Optional) Pointer to the output
+ * multipliers vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_multipliers_stride_x                      (Optional) Stride of the output
+ * multipliers vector in X dimension (in bytes)
+ * @param[in]  result_multipliers_step_x                        (Optional)
+ * output_multipliers_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_multipliers_offset_first_element_in_bytes (Optional) The offset of the first
+ * element in the output multipliers vector
+ * @param[in]  result_shifts_ptr                                (Optional) Pointer to the output
+ * shifts vector for per-channel quantization. Supported data types: S32
+ * @param[in]  result_shifts_stride_x                           (Optional) Stride of the output
+ * shifts vector in X dimension (in bytes)
+ * @param[in]  result_shifts_step_x                             (Optional) output_shifts_stride_x *
+ * number of elements along X processed per workitem(in bytes)
+ * @param[in]  result_shifts_offset_first_element_in_bytes      (Optional) The offset of the first
+ * element in the output shifts vector
+ */
+__kernel void
+  gemmlowp_offset_contribution_quantize_down_fixedpoint(TENSOR3D_DECLARATION(mm_result)
+#if defined(A_OFFSET)
+                                                          ,
+                                                        IMAGE_DECLARATION(sum_col)
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+                                                          ,
+                                                        IMAGE_DECLARATION(sum_row)
+#endif // defined(B_OFFSET)
+                                                          ,
+#if defined(ADD_BIAS)
+                                                        VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                        TENSOR3D_DECLARATION(dst)
+#if defined(PER_CHANNEL_QUANTIZATION)
+                                                          ,
+                                                        VECTOR_DECLARATION(result_multipliers),
+                                                        VECTOR_DECLARATION(result_shifts)
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+  )
+{
+  const int x = get_global_id(0) * 4;
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  // Compute offset contribution
+  int4 offset_term_s32 = offset_contribution(
+    x, y, z
+#if defined(A_OFFSET)
+    ,
+    sum_col_ptr, sum_col_stride_x, sum_col_step_x, sum_col_stride_y, sum_col_step_y,
+    sum_col_offset_first_element_in_bytes
+#endif // defined(A_OFFSET)
+#if defined(B_OFFSET)
+    ,
+    sum_row_ptr, sum_row_stride_x, sum_row_step_x, sum_row_stride_y, sum_row_step_y,
+    sum_row_offset_first_element_in_bytes
+#endif // defined(B_OFFSET)
+#if defined(ADD_BIAS)
+    ,
+    biases_ptr, biases_stride_x, biases_step_x, biases_offset_first_element_in_bytes
+#endif // defined(ADD_BIAS)
+  );
+
+  __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes +
+                                   x * sizeof(int) + y * mm_result_stride_y +
+                                   z * mm_result_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  int4 in_s32 = vload4(0, (__global int *)mm_result_addr);
+
+  // Add the offset terms to GEMM's result
+  in_s32 += offset_term_s32;
+
+  // -------------- OUTPUT STAGE
+
+  // Multiply by result_mult_int and shift
+#if defined(PER_CHANNEL_QUANTIZATION)
+  __global uchar *result_multipliers_addr =
+    result_multipliers_ptr + result_multipliers_offset_first_element_in_bytes + x * sizeof(int);
+  __global uchar *result_shifts_addr =
+    result_shifts_ptr + result_shifts_offset_first_element_in_bytes + x * sizeof(int);
+  int4 result_multipliers_values = vload4(0, (__global int *)result_multipliers_addr);
+  int4 result_shifts_values = vload4(0, (__global int *)result_shifts_addr);
+
+  int4 in_s32_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+    in_s32, result_multipliers_values, result_shifts_values, 4);
+  int4 in_s32_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+    in_s32, result_multipliers_values, result_shifts_values, 4);
+  in_s32 = select(in_s32_shift_lt0, in_s32_shift_gt0, result_shifts_values >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+
+#if RESULT_SHIFT < 0
+  in_s32 =
+    ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4);
+#else  // RESULT_SHIFT >= 0
+  in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+#endif // defined(PER_CHANNEL_QUANTIZATION)
+
+  // Add the offset terms to GEMM's result
+  in_s32 += (int4)RESULT_OFFSET;
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(in_s32, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_MULTIPLIER) && defined(RESULT_SHIFT) &&
+       // defined(OUTPUT_DATA_TYPE)
+
+#endif // defined(K_OFFSET)
+
+#if defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value and processes it to obtain the final
+ * QASYMM8/QASYMM8_SIGNED value. The following computations will be performed by the kernel:
+ *
+ *  -# Add offset terms to final result
+ *  -# Multiply each entry of result by result_mult_int
+ *  -# Add bias to final result (if -DADD_BIAS is passed at compile time)
+ *  -# Shift the int32 accumulator by result_shift
+ *  -# Clamp the value between the specified min and max bounds (if -DMIN_BOUND and/or -DMAX_BOUND
+ * are passed at compile time)
+ *  -# Clamp the resulting int32 values:
+ *  -#  - to the [0..255] range and cast to QASYMM8.
+ *  -#  - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  src_ptr                              Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in]  src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in]  biases_ptr                           (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                      (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in]  biases_step_x                        (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                           dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                           dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+                                                  VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                  TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  int x = get_global_id(0) * 4;
+  int y = get_global_id(1);
+  int z = get_global_id(2);
+
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+                             y * src_stride_y + z * src_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  // Add the offset terms to GEMM's result
+  input_values += (int4)RESULT_OFFSET;
+
+  // Multiply by result_mult_int and shift
+  input_values *= RESULT_MULT_INT;
+
+#if RESULT_SHIFT < 0
+  input_values >>= -RESULT_SHIFT;
+#else  // RESULT_SHIFT >= 0
+  input_values >>= RESULT_SHIFT;
+#endif // RESULT_SHIFT < 0
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(input_values, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT)
+
+#if defined(RESULT_OFFSET_AFTER_SHIFT) && defined(RESULT_FIXEDPOINT_MULTIPLIER) && \
+  defined(RESULT_SHIFT)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. The following computations will be
+ * performed by the kernel:
+ *
+ *  -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ *  -# Add bias to final result if bias tensor is not a nullptr
+ *  -# Round to nearest division by a power-of-two using result_shift
+ *  -# Add offset to each result
+ *  -# Clamp the value between the specified min and max bounds
+ *  -# Clamp the resulting int32 values:
+ *      - to the [0..255] range and cast to QASYMM8.
+ *      - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_OFFSET_AFTER_SHIFT, -DRESULT_FIXEDPOINT_MULTIPLIER
+ * and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  src_ptr                              Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in]  src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in]  biases_ptr                           (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                      (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in]  biases_step_x                        (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor Supported data
+ * type: QASYMM8/QASYMM8_SIGNED
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                           dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                           dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_fixedpoint(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+                                                             VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                             TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  int x = get_global_id(0) * 4;
+  int y = get_global_id(1);
+  int z = get_global_id(2);
+
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+                             y * src_stride_y + z * src_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  // Multiply by result_mult_int and shift
+#if RESULT_SHIFT < 0
+  input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+    input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#else  // RESULT_SHIFT >= 0
+  input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+    input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+  // Add the offset terms to GEMM's result
+  input_values += (int4)RESULT_OFFSET_AFTER_SHIFT;
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(input_values, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(RESULT_OFFSET_AFTER_SHIFT) && defined(RESULT_FIXEDPOINT_MULTIPLIER) &&
+       // defined(RESULT_SHIFT)
+
+#if defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT)
+
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QSYMM16 value. The following computations will be performed by
+ * the kernel:
+ *
+ *  -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ *  -# Add bias to final result if bias tensor is not a nullptr
+ *  -# Round to nearest division by a power-of-two using result_shift
+ *  -# Add offset to each result
+ *  -# Clamp the value between the specified min and max bounds
+ *  -# Clamp the resulting int32 values to the [-32768..32767] range and cast to QSYMM16.
+ *
+ * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
+ * must be passed at compile time using -DRESULT_FIXEDPOINT_MULTIPLIER and -DRESULT_SHIFT
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  src_ptr                              Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in]  src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in]  biases_ptr                           (Optional) Pointer to the biases tensor.
+ * Supported data type: same as @p src_ptr
+ * @param[in]  biases_stride_x                      (Optional) Stride of the biases tensor in X
+ * dimension (in bytes)
+ * @param[in]  biases_step_x                        (Optional) biases_stride_x * number of elements
+ * along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes (Optional) The offset of the first element in
+ * the biases tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor Supported data
+ * type: QSYMM16
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                           dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                           dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+                                                                     VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+                                                                     TENSOR3D_DECLARATION(dst))
+{
+  // Compute source and destination addresses
+  int x = get_global_id(0) * 4;
+  int y = get_global_id(1);
+  int z = get_global_id(2);
+
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+                             y * src_stride_y + z * src_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x * 2 + y * dst_stride_y + z * dst_stride_z;
+
+  int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  // Multiply by result_mult_int and shift
+#if RESULT_SHIFT < 0
+  input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(
+    input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#else  // RESULT_SHIFT >= 0
+  input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(
+    input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
+  short4 res = convert_short4_sat(input_values);
+
+#if defined(MIN_BOUND)
+  res = max(res, (short4)MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (short4)MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global short *)dst_addr);
+}
+#endif // defined(RESULT_FIXEDPOINT_MULTIPLIER) && defined(RESULT_SHIFT)
+
+#if defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET)
+/** This OpenCL kernel is used to quantize down the int32 accumulator values of GEMMLowp to
+ * QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of matrix multiplication), and
+ * processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. The following computations will be
+ * performed by the kernel:
+ *
+ *  -# Compute fixed point multiplication between each entry of input by
+ * result_fixedpoint_multiplier
+ *  -# Add bias to final result if bias tensor is not a nullptr
+ *  -# Requantize
+ *  -# Add offset to each result
+ *  -# Clamp the value between the specified min and max bounds
+ *  -# Clamp the resulting int32 values:
+ *      - to the [0..255] range and cast to QASYMM8.
+ *      - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ * @attention The offset and scalar scale factor must be passed at compile time using
+ * -DRESULT_OFFSET, -DREAL_MULTIPLIER
+ *
+ * @note In case the addition of int32 biases is required, -DADD_BIAS should be passed at compile
+ * time
+ * @note The output datatype should be passed at compile time using -DOUTPUT_DATA_TYPE
+ * @note In case the clamping of the result is required, the min and max bounds can be passed at
+ * compile time using -DMIN_BOUND and -DMAX_BOUND. These values can be used to implement "rectified
+ * linear unit" activation functions
+ *
+ * @param[in]  src_ptr                              Pointer to the source tensor. Supported data
+ * type: S32
+ * @param[in]  src_stride_x                         Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source
+ * tensor
+ * @param[in]  biases_ptr                           Pointer to the biases tensor. Supported data
+ * type: same as @p src_ptr
+ * @param[in]  biases_stride_x                      Stride of the biases tensor in X dimension (in
+ * bytes)
+ * @param[in]  biases_step_x                        biases_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes The offset of the first element in the biases
+ * tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor Supported data
+ * type: QASYMM8
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  dst_step_x                           dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  dst_step_y                           dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                           src_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  dst_stride_w                         Stride of the source tensor in W dimension (in
+ * bytes)
+ * @param[in]  dst_step_w                           src_stride_w * number of elements along W
+ * processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the
+ * destination tensor
+ */
+__kernel void gemmlowp_output_stage_quantize_down_float(TENSOR3D_DECLARATION(src),
+#if defined(ADD_BIAS)
+                                                        VECTOR_DECLARATION(biases),
+#endif // defined(ADD_BIAS)
+#if defined(DST_HEIGHT)
+                                                        TENSOR4D_DECLARATION(dst))
+#else  // defined(DST_HEIGHT)
+                                                        TENSOR3D_DECLARATION(dst))
+#endif // defined(DST_HEIGHT)
+{
+  // Compute source and destination addresses
+  int x = get_global_id(0) * 4;
+  int y = get_global_id(1);
+  int z = get_global_id(2);
+
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * sizeof(int) +
+                             y * src_stride_y + z * src_stride_z;
+
+  __global uchar *dst_addr =
+    dst_ptr + dst_offset_first_element_in_bytes + x + y * dst_stride_y + z * dst_stride_z;
+
+  int4 input_values = vload4(0, (__global int *)src_addr);
+
+#if defined(ADD_BIAS)
+  // Add bias
+  __global uchar *bias_addr = biases_ptr + biases_offset_first_element_in_bytes + x * sizeof(int);
+
+  int4 biases_values = vload4(0, (__global int *)bias_addr);
+  input_values += (int4)biases_values;
+#endif // defined(ADD_BIAS)
+
+  // Convert to float
+  float4 input_values_f = convert_float4(input_values);
+  input_values_f = round(input_values_f * (float)REAL_MULTIPLIER + (float)OUTPUT_OFFSET);
+
+  VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4)
+  res = CONVERT_SAT(input_values_f, VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4));
+
+#if defined(MIN_BOUND)
+  res = max(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MIN_BOUND);
+#endif // defined(MIN_BOUND)
+#if defined(MAX_BOUND)
+  res = min(res, (VEC_DATA_TYPE(OUTPUT_DATA_TYPE, 4))MAX_BOUND);
+#endif // defined(MAX_BOUND)
+
+  // Store the result
+  vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
+}
+#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
index 80ba73d1d..85fc09de4 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
@@ -41,7 +41,7 @@
 #include "helpers.h"
 
 #if defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && \
-    defined(COLS_A)
+  defined(COLS_A)
 #define VECTOR_CHAR VEC_DATA_TYPE(char, NUM_ELEMS_PROCESSED_PER_THREAD_X)
 #define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X)
 #define VECTOR_FLOAT VEC_DATA_TYPE(float, NUM_ELEMS_PROCESSED_PER_THREAD_X)
@@ -117,7 +117,7 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(
                                      ,
                                      uint dst_cross_plane_pad
 #endif // REINTERPRET_OUTPUT_AS_3D
-                                     )
+)
 {
   int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
 
@@ -208,9 +208,9 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(
 #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
     // Load values from matrix B
     VECTOR_CHAR b0 =
-        VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+      VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
     VECTOR_CHAR b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(
-        0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y));
+      0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y));
 
     // Accumulate
     acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0.s0;
@@ -251,7 +251,7 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(
 #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
     // Load values from matrix B
     VECTOR_CHAR b0 =
-        VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+      VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
 
     // Accumulate
     acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0;
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
index a4f7dbd48..3ace1fde8 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
@@ -115,15 +115,15 @@ __kernel void hashtable_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION
 
   int lup_id[4] = {0};
 
-  lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0)))
-                              : get_global_id(0);
-  lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1)))
-                              : get_global_id(1);
+  lup_id[0] =
+    (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0);
+  lup_id[1] =
+    (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1);
   lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2)))
                               : get_global_id(2) % DEPTH_OUT;
   lup_id[3] = (NUM_DIMS == 4)
-                  ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
-                  : get_global_id(2) / DEPTH_OUT;
+                ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
+                : get_global_id(2) / DEPTH_OUT;
 
   if (lup_id[NUM_DIMS - 1] < 0)
   {
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
index e07a25ec9..4a3bc1369 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
@@ -49,7 +49,7 @@
 #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
 
 #if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
-    defined(cl_arm_integer_dot_product_accumulate_int8)
+  defined(cl_arm_integer_dot_product_accumulate_int8)
 #pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
 #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
        // defined(cl_arm_integer_dot_product_accumulate_int8)
@@ -288,21 +288,21 @@
 
 #define VECTOR_DECLARATION(name)                                        \
   __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \
-      uint name##_offset_first_element_in_bytes
+    uint name##_offset_first_element_in_bytes
 
 #define IMAGE_DECLARATION(name)                                                               \
   __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
-      uint name##_step_y, uint name##_offset_first_element_in_bytes
+    uint name##_step_y, uint name##_offset_first_element_in_bytes
 
 #define TENSOR3D_DECLARATION(name)                                                            \
   __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
-      uint name##_step_y, uint name##_stride_z, uint name##_step_z,                           \
-      uint name##_offset_first_element_in_bytes
+    uint name##_step_y, uint name##_stride_z, uint name##_step_z,                             \
+    uint name##_offset_first_element_in_bytes
 
 #define TENSOR4D_DECLARATION(name)                                                            \
   __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
-      uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w,     \
-      uint name##_step_w, uint name##_offset_first_element_in_bytes
+    uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w,       \
+    uint name##_step_w, uint name##_offset_first_element_in_bytes
 
 #define CONVERT_TO_VECTOR_STRUCT(name)                                                          \
   update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
@@ -406,9 +406,9 @@ inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_
                                          uint stride_x, uint step_x)
 {
   Vector vector = {
-      .ptr = ptr,
-      .offset_first_element_in_bytes = offset_first_element_in_bytes,
-      .stride_x = stride_x,
+    .ptr = ptr,
+    .offset_first_element_in_bytes = offset_first_element_in_bytes,
+    .stride_x = stride_x,
   };
   vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
   return vector;
@@ -436,7 +436,7 @@ inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_el
                .stride_x = stride_x,
                .stride_y = stride_y};
   img.ptr +=
-      img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
+    img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
   return img;
 }
 
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
index 5f1b3f902..d7f1d0814 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
@@ -100,16 +100,16 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  *
  * @return quantized values
  */
-#define QUANTIZE_IMPL(type, size)                                                                 \
-  inline VEC_DATA_TYPE(type, size)                                                                \
-      quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale)          \
-  {                                                                                               \
-    VEC_DATA_TYPE(float, size)                                                                    \
-    out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \
-    VEC_DATA_TYPE(type, size)                                                                     \
-    res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)),                        \
-                      VEC_DATA_TYPE(type, size));                                                 \
-    return res;                                                                                   \
+#define QUANTIZE_IMPL(type, size)                                                                  \
+  inline VEC_DATA_TYPE(type, size)                                                                 \
+    quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale)             \
+  {                                                                                                \
+    VEC_DATA_TYPE(float, size)                                                                     \
+    out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset);  \
+    VEC_DATA_TYPE(type, size)                                                                      \
+    res =                                                                                          \
+      CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), VEC_DATA_TYPE(type, size)); \
+    return res;                                                                                    \
   }
 
 /** Dequantize a vector of values to floating-point
@@ -119,11 +119,11 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  *
  * @return dequantized values in floating point
  */
-#define DEQUANTIZE_IMPL(type, size)                                                       \
-  inline VEC_DATA_TYPE(float, size)                                                       \
-      dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
-  {                                                                                       \
-    return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale;                 \
+#define DEQUANTIZE_IMPL(type, size)                                                     \
+  inline VEC_DATA_TYPE(float, size)                                                     \
+    dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
+  {                                                                                     \
+    return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale;               \
   }
 
 /** Correctly-rounded-to-nearest division by a power-of-two.
@@ -134,7 +134,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  */
 #define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size)                        \
   inline VEC_DATA_TYPE(int, size) asymm_rounding_divide_by_POW2_##size( \
-      VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent)    \
+    VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent)      \
   {                                                                     \
     const VEC_DATA_TYPE(int, size) zero = (VEC_DATA_TYPE(int, size))0;  \
     const VEC_DATA_TYPE(int, size) one = (VEC_DATA_TYPE(int, size))1;   \
@@ -152,32 +152,32 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  *
  * @return Product of two fixed-point numbers.
  */
-#define ASYMM_MULT_IMPL(size)                                                  \
-  inline VEC_DATA_TYPE(int, size)                                              \
-      asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
-  {                                                                            \
-    VEC_DATA_TYPE(int, size)                                                   \
-    overflow = a == b && a == INT_MIN;                                         \
-    VEC_DATA_TYPE(long, size)                                                  \
-    a_64 = convert_long##size(a);                                              \
-    VEC_DATA_TYPE(long, size)                                                  \
-    b_64 = convert_long##size(b);                                              \
-    VEC_DATA_TYPE(long, size)                                                  \
-    ab_64 = a_64 * b_64;                                                       \
-    /* Revert COMPMID-907 */                                                   \
-    VEC_DATA_TYPE(long, size)                                                  \
-    mask1 = 1 << 30;                                                           \
-    VEC_DATA_TYPE(long, size)                                                  \
-    mask2 = 1 - (1 << 30);                                                     \
-    VEC_DATA_TYPE(long, size)                                                  \
-    is_positive_or_zero = ab_64 >= 0;                                          \
-    VEC_DATA_TYPE(long, size)                                                  \
-    nudge = select(mask2, mask1, is_positive_or_zero);                         \
-    VEC_DATA_TYPE(long, size)                                                  \
-    mask = 1ll << 31;                                                          \
-    VEC_DATA_TYPE(int, size)                                                   \
-    ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask);                  \
-    return select(ab_x2_high32, INT_MAX, overflow);                            \
+#define ASYMM_MULT_IMPL(size)                                                \
+  inline VEC_DATA_TYPE(int, size)                                            \
+    asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
+  {                                                                          \
+    VEC_DATA_TYPE(int, size)                                                 \
+    overflow = a == b && a == INT_MIN;                                       \
+    VEC_DATA_TYPE(long, size)                                                \
+    a_64 = convert_long##size(a);                                            \
+    VEC_DATA_TYPE(long, size)                                                \
+    b_64 = convert_long##size(b);                                            \
+    VEC_DATA_TYPE(long, size)                                                \
+    ab_64 = a_64 * b_64;                                                     \
+    /* Revert COMPMID-907 */                                                 \
+    VEC_DATA_TYPE(long, size)                                                \
+    mask1 = 1 << 30;                                                         \
+    VEC_DATA_TYPE(long, size)                                                \
+    mask2 = 1 - (1 << 30);                                                   \
+    VEC_DATA_TYPE(long, size)                                                \
+    is_positive_or_zero = ab_64 >= 0;                                        \
+    VEC_DATA_TYPE(long, size)                                                \
+    nudge = select(mask2, mask1, is_positive_or_zero);                       \
+    VEC_DATA_TYPE(long, size)                                                \
+    mask = 1ll << 31;                                                        \
+    VEC_DATA_TYPE(int, size)                                                 \
+    ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask);                \
+    return select(ab_x2_high32, INT_MAX, overflow);                          \
   }
 
 /** Calculates \f$ exp(x) \f$ for x in [-1/4, 0).
@@ -186,32 +186,32 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  *
  * @return Result in fixed-point format Q0.
  */
-#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size)                   \
-  inline VEC_DATA_TYPE(int, size)                                                                  \
-      asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \
-                                                                              a)                   \
-  {                                                                                                \
-    const VEC_DATA_TYPE(int, size) constant_term = 1895147668;                                     \
-    const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883;                                  \
-    const int k_fractional_bits = 31;                                                              \
-    VEC_DATA_TYPE(int, size)                                                                       \
-    x = a + (1 << (k_fractional_bits - 3));                                                        \
-    VEC_DATA_TYPE(int, size)                                                                       \
-    x2 = ASYMM_MULT(x, x, size);                                                                   \
-    VEC_DATA_TYPE(int, size)                                                                       \
-    x3 = ASYMM_MULT(x2, x, size);                                                                  \
-    VEC_DATA_TYPE(int, size)                                                                       \
-    x4 = ASYMM_MULT(x2, x2, size);                                                                 \
-    VEC_DATA_TYPE(int, size)                                                                       \
-    x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size);                                        \
-    VEC_DATA_TYPE(int, size)                                                                       \
-    x4_over_24_plus_x3_over_6_plus_x2 =                                                            \
-        ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2;                                \
-    VEC_DATA_TYPE(int, size)                                                                       \
-    x4_over_24_plus_x3_over_6_plus_x2_over_2 =                                                     \
-        ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size);                 \
-    return constant_term +                                                                         \
-           ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size);          \
+#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size)                 \
+  inline VEC_DATA_TYPE(int, size)                                                                \
+    asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \
+                                                                          a)                     \
+  {                                                                                              \
+    const VEC_DATA_TYPE(int, size) constant_term = 1895147668;                                   \
+    const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883;                                \
+    const int k_fractional_bits = 31;                                                            \
+    VEC_DATA_TYPE(int, size)                                                                     \
+    x = a + (1 << (k_fractional_bits - 3));                                                      \
+    VEC_DATA_TYPE(int, size)                                                                     \
+    x2 = ASYMM_MULT(x, x, size);                                                                 \
+    VEC_DATA_TYPE(int, size)                                                                     \
+    x3 = ASYMM_MULT(x2, x, size);                                                                \
+    VEC_DATA_TYPE(int, size)                                                                     \
+    x4 = ASYMM_MULT(x2, x2, size);                                                               \
+    VEC_DATA_TYPE(int, size)                                                                     \
+    x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size);                                      \
+    VEC_DATA_TYPE(int, size)                                                                     \
+    x4_over_24_plus_x3_over_6_plus_x2 =                                                          \
+      ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2;                                \
+    VEC_DATA_TYPE(int, size)                                                                     \
+    x4_over_24_plus_x3_over_6_plus_x2_over_2 =                                                   \
+      ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size);                 \
+    return constant_term +                                                                       \
+           ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size);        \
   }
 
 /** Each bit of the result is set to the corresponding bit of either then_val or
@@ -263,15 +263,15 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
 
 #define EXP_BARREL_SHIFTER_IMPL(size)                                                          \
   inline VEC_DATA_TYPE(int, size) exp_barrel_shifter##size(                                    \
-      VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits,    \
-      int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder)                               \
+    VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits,      \
+    int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder)                                 \
   {                                                                                            \
     if (k_integer_bits > exponent)                                                             \
     {                                                                                          \
       const int k_shift_amount = k_integer_bits > exponent ? k_fractional_bits + exponent : 0; \
       return ASYMM_SELECT_USING_MASK(                                                          \
-          ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size),                     \
-          ASYMM_MULT(result, fp_multiplier, size), result, size);                              \
+        ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size),                       \
+        ASYMM_MULT(result, fp_multiplier, size), result, size);                                \
     }                                                                                          \
                                                                                                \
     return result;                                                                             \
@@ -285,7 +285,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  */
 #define ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(size)                                                   \
   inline VEC_DATA_TYPE(int, size)                                                                 \
-      asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits)          \
+    asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits)            \
   {                                                                                               \
     const int k_fractional_bits = 31 - k_integer_bits;                                            \
     VEC_DATA_TYPE(int, size)                                                                      \
@@ -298,7 +298,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
     a_mod_quarter_minus_one_quarter_scaled = a_mod_quarter_minus_one_quarter << k_integer_bits;   \
     VEC_DATA_TYPE(int, size)                                                                      \
     result = ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL(                       \
-        a_mod_quarter_minus_one_quarter_scaled, size);                                            \
+      a_mod_quarter_minus_one_quarter_scaled, size);                                              \
     VEC_DATA_TYPE(int, size)                                                                      \
     remainder = a_mod_quarter_minus_one_quarter - a;                                              \
                                                                                                   \
@@ -312,10 +312,10 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
                                 remainder, size);                                                 \
     result = EXP_BARREL_SHIFTER(result, +2, 39332535, k_integer_bits, k_fractional_bits,          \
                                 remainder, size);                                                 \
-    result = EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, \
-                                size);                                                            \
     result =                                                                                      \
-        EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size);  \
+      EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, size); \
+    result =                                                                                      \
+      EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size);    \
                                                                                                   \
     if (k_integer_bits > 5)                                                                       \
     {                                                                                             \
@@ -335,27 +335,27 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  *
  * @return Arithmetic left or right shift.
  */
-#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size)                                    \
-  inline VEC_DATA_TYPE(int, size)                                                            \
-      asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \
-  {                                                                                          \
-    if (exponent < 0)                                                                        \
-    {                                                                                        \
-      return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size);                              \
-    }                                                                                        \
-                                                                                             \
-    const VEC_DATA_TYPE(int, size) min = INT_MIN;                                            \
-    const VEC_DATA_TYPE(int, size) max = INT_MAX;                                            \
-    int threshold = ((1 << (31 - exponent)) - 1);                                            \
-    VEC_DATA_TYPE(int, size)                                                                 \
-    positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size);                             \
-    VEC_DATA_TYPE(int, size)                                                                 \
-    negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size);                            \
-    VEC_DATA_TYPE(int, size)                                                                 \
-    result = x << exponent;                                                                  \
-    result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size);                      \
-    result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size);                      \
-    return result;                                                                           \
+#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size)                                  \
+  inline VEC_DATA_TYPE(int, size)                                                          \
+    asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \
+  {                                                                                        \
+    if (exponent < 0)                                                                      \
+    {                                                                                      \
+      return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size);                            \
+    }                                                                                      \
+                                                                                           \
+    const VEC_DATA_TYPE(int, size) min = INT_MIN;                                          \
+    const VEC_DATA_TYPE(int, size) max = INT_MAX;                                          \
+    int threshold = ((1 << (31 - exponent)) - 1);                                          \
+    VEC_DATA_TYPE(int, size)                                                               \
+    positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size);                           \
+    VEC_DATA_TYPE(int, size)                                                               \
+    negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size);                          \
+    VEC_DATA_TYPE(int, size)                                                               \
+    result = x << exponent;                                                                \
+    result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size);                    \
+    result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size);                    \
+    return result;                                                                         \
   }
 
 /** Calculates (a+b)/2, rounded to the nearest integer.
@@ -365,21 +365,21 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  *
  * @return (a+b)/2, rounded to the nearest integer.
  */
-#define ASYMM_ROUNDING_HALF_SUM_IMPL(size)                                                  \
-  inline VEC_DATA_TYPE(int, size)                                                           \
-      asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
-  {                                                                                         \
-    VEC_DATA_TYPE(long, size)                                                               \
-    a64 = convert_long##size(a);                                                            \
-    VEC_DATA_TYPE(long, size)                                                               \
-    b64 = convert_long##size(b);                                                            \
-    VEC_DATA_TYPE(long, size)                                                               \
-    sum = a64 + b64;                                                                        \
-    const VEC_DATA_TYPE(long, size) one = 1;                                                \
-    const VEC_DATA_TYPE(long, size) minus_one = -1;                                         \
-    VEC_DATA_TYPE(long, size)                                                               \
-    sign = select(minus_one, one, sum >= 0);                                                \
-    return convert_int##size((sum + sign) / 2);                                             \
+#define ASYMM_ROUNDING_HALF_SUM_IMPL(size)                                                \
+  inline VEC_DATA_TYPE(int, size)                                                         \
+    asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
+  {                                                                                       \
+    VEC_DATA_TYPE(long, size)                                                             \
+    a64 = convert_long##size(a);                                                          \
+    VEC_DATA_TYPE(long, size)                                                             \
+    b64 = convert_long##size(b);                                                          \
+    VEC_DATA_TYPE(long, size)                                                             \
+    sum = a64 + b64;                                                                      \
+    const VEC_DATA_TYPE(long, size) one = 1;                                              \
+    const VEC_DATA_TYPE(long, size) minus_one = -1;                                       \
+    VEC_DATA_TYPE(long, size)                                                             \
+    sign = select(minus_one, one, sum >= 0);                                              \
+    return convert_int##size((sum + sign) / 2);                                           \
   }
 
 /** Calculates \f$ 1 / (1 + x) \f$ for x in (0, 1).
@@ -390,7 +390,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
  */
 #define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(size)                      \
   inline VEC_DATA_TYPE(int, size)                                              \
-      asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \
+    asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a)   \
   {                                                                            \
     const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX;                           \
     const VEC_DATA_TYPE(int, size) Q2_one = 1 << (31 - 2);                     \
@@ -462,14 +462,14 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
 #define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \
   asymm_rescale##size(value, src_integer_bits, dst_integer_bits)
 
-#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size)                                               \
-  inline VEC_DATA_TYPE(int, size)                                                                 \
-      multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \
-  {                                                                                               \
-    const int left_shift = shift > 0 ? shift : 0;                                                 \
-    const int right_shift = shift > 0 ? 0 : -shift;                                               \
-    return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size),       \
-                                         right_shift, size);                                      \
+#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size)                                             \
+  inline VEC_DATA_TYPE(int, size)                                                               \
+    multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \
+  {                                                                                             \
+    const int left_shift = shift > 0 ? shift : 0;                                               \
+    const int right_shift = shift > 0 ? 0 : -shift;                                             \
+    return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size),     \
+                                         right_shift, size);                                    \
   }
 #define MULTIPLY_BY_QUANTIZED_MULTIPLIER(input, qmul, shift, size) \
   multiply_by_quantized_multiplier##size(input, qmul, shift)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
index 014842680..96a243110 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
@@ -41,7 +41,7 @@
 #include "helpers.h"
 
 #if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \
-    defined(DIM_Y) && defined(DIM_Z)
+  defined(DIM_Y) && defined(DIM_Z)
 /** This function normalizes the input 2D tensor across the first dimension with respect to mean and
  * standard deviation of the same dimension.
  *
@@ -108,14 +108,14 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input),
                                         TENSOR4D_DECLARATION(output)
 #endif /* IN_PLACE */
 #ifdef GAMMA
-                                            ,
+                                          ,
                                         VECTOR_DECLARATION(gamma)
 #endif // GAMMA
 #ifdef BETA
-                                            ,
+                                          ,
                                         VECTOR_DECLARATION(beta)
 #endif // BETA
-                                            )
+)
 {
   Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
 #ifndef IN_PLACE
@@ -213,12 +213,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input),
     for (int i_h = 0; i_h < DIM_Z; ++i_h)
     {
       __global DATA_TYPE *input_address =
-          (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch);
+        (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch);
 #ifdef IN_PLACE
       __global DATA_TYPE *output_address = input_address;
 #else  /* !IN_PLACE */
       __global DATA_TYPE *output_address =
-          (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch);
+        (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch);
 #endif /* IN_PLACE */
       *(output_address) = (*(input_address)-mean) * multip + beta;
     }
@@ -231,12 +231,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input),
     for (; x <= (DIM_X - VEC_SIZE); x += VEC_SIZE)
     {
       __global DATA_TYPE *input_address =
-          (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
+        (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
 #ifdef IN_PLACE
       __global DATA_TYPE *output_address = input_address;
 #else  /* !IN_PLACE */
       __global DATA_TYPE *output_address =
-          (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
+        (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
 #endif /* IN_PLACE */
 
       VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
@@ -251,12 +251,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input),
     for (; x < DIM_X; ++x)
     {
       __global DATA_TYPE *input_address =
-          (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
+        (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
 #ifdef IN_PLACE
       __global DATA_TYPE *output_address = input_address;
 #else  /* !IN_PLACE */
       __global DATA_TYPE *output_address =
-          (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
+        (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
 #endif /* IN_PLACE */
       *(output_address) = (*(input_address)-mean) * multip + beta;
     }
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/memset.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/memset.cl
new file mode 100644
index 000000000..51919c8a5
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/memset.cl
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(DATA_TYPE) && defined(CONSTANT_VALUE) // Check for compile time constants
+
+/** Fill the tensor's planes with all value
+ * @attention The following variables must be passed at compile time:
+ * -# -DDATA_TYPE = Tensor data type. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * -# -DCONSTANT_VALUE = The value use to fill the tensor's planes
+ * -# -DVEC_SIZE = Vector size
+ * -# -DLAST_ACCESSED_X = The element that is on the X border (threads trying to set this, might
+ * need to step back a bit)
+ *
+ * @param[in] tensor_ptr                           Pointer to the source image. Data types
+ * supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] tensor_stride_x                      Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in] tensor_step_x                        tensor_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] tensor_stride_y                      Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in] tensor_step_y                        tensor_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] tensor_offset_first_element_in_bytes The offset of the first element in the source
+ * image
+ * @param[in] value                                The value used to fill the pages of the tensor
+ */
+__kernel void memset(TENSOR3D_DECLARATION(tensor))
+{
+  Tensor3D tensor = CONVERT_TO_TENSOR3D_STRUCT(tensor);
+
+#if defined(VEC_SIZE)
+
+#if defined(LAST_ACCESSED_X)
+  // Check if access on width gets out of bounds
+  // If it does shift access vector to access elements within bounds
+  const int xi = (int)(get_global_id(0) * VEC_SIZE);
+  tensor.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * tensor_stride_x;
+#endif // defined(LAST_ACCESSED_X)
+
+  VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+  data = (DATA_TYPE)(CONSTANT_VALUE);
+
+  VSTORE(VEC_SIZE)
+  (data, 0, (__global DATA_TYPE *)tensor.ptr);
+#else  // !defined(VEC_SIZE)
+  *((__global DATA_TYPE *)(tensor.ptr)) = (DATA_TYPE)(CONSTANT_VALUE);
+#endif // defined(VEC_SIZE)
+}
+
+#endif // Check for compile time constants
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
index 3943fc4c2..abbfbd275 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
@@ -114,8 +114,8 @@ __kernel void multiply_scale_factor(IMAGE_DECLARATION(input), VECTOR_DECLARATION
   (val, 0, (__global DATA_TYPE *)output.ptr);
 #else  // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
   *((__global DATA_TYPE *)(output.ptr)) =
-      ((DATA_TYPE)(*((__global int *)(input.ptr)))) *
-      *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier);
+    ((DATA_TYPE)(*((__global int *)(input.ptr)))) *
+    *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier);
 #endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
 }
 
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl
new file mode 100644
index 000000000..784a8d6aa
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(DATA_TYPE) && defined(AXIS) && defined(DEPTH) && defined(OUTPUT_DIM_Z)
+
+/** Performs the OneHot operation along the chosen axis
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
+ * -DDATA_TYPE=short
+ * @note Axis should be given as a preprocessor argument using -DAXIS=axis. e.g. -DAXIS=1
+ * @attention Output tensor depth should be given as a preprocessor argument using
+ * -DOUTPUT_DIM_Z=size. e.g. -DOUTPUT_DIM_Z=16
+ * @attention Input tensor depth should be given as a preprocessor argument using
+ * -DINPUT_DIM_Z=size. e.g. -DINPUT_DIM_Z=16
+ *
+ *
+ * @param[in]  indices_ptr                              Pointer to the source tensor. Supported data
+ * types: S32
+ * @param[in]  indices_stride_x                         Stride of the source tensor in X dimension
+ * (in bytes)
+ * @param[in]  indices_step_x                           indices_stride_x * number of elements along
+ * X processed per work item (in bytes)
+ * @param[in]  indices_stride_y                         Stride of the source tensor in Y dimension
+ * (in bytes)
+ * @param[in]  indices_step_y                           indices_stride_y * number of elements along
+ * Y processed per work item (in bytes)
+ * @param[in]  indices_stride_z                         Stride of the source tensor in Y dimension
+ * (in bytes)
+ * @param[in]  indices_step_z                           indices_stride_z * number of elements along
+ * Z processed per work item (in bytes)
+ * @param[in]  indices_offset_first_element_in_bytes    Offset of the first element in the source
+ * tensor
+ * @param[in]  on_value_ptr                             Pointer to the on_value vector. Supported
+ * data types: U8/S8/U16/S16/F16/U32/S32/F32.
+ * @param[in]  on_value_stride_x                        Stride of the on_value vector in X dimension
+ * (in bytes)
+ * @param[in]  on_value_step_x                          on_value_stride_x * number of elements along
+ * X processed per work item (in bytes)
+ * @param[in]  on_value_offset_first_element_in_bytes   Offset of the first element in the on_value
+ * vector
+ * @param[in]  off_value_ptr                            Pointer to the off_value vector. Supported
+ * data types: Same as @p on_value.
+ * @param[in]  off_value_stride_x                       Stride of the off_value vector in X
+ * dimension (in bytes)
+ * @param[in]  off_value_step_x                         off_value_stride_x * number of elements
+ * along X processed per work item (in bytes)
+ * @param[in]  off_value_offset_first_element_in_bytes  Offset of the first element in the off_value
+ * vector
+ * @param[out] output_ptr                               Pointer to the destination tensor. Supported
+ * data types: same as @p on_value
+ * @param[in]  output_stride_x                          Stride of the destination tensor in X
+ * dimension (in bytes)
+ * @param[in]  output_step_x                            output_stride_x * number of elements along X
+ * processed per work item (in bytes)
+ * @param[in]  output_stride_y                          Stride of the destination tensor in Y
+ * dimension (in bytes)
+ * @param[in]  output_step_y                            output_stride_y * number of elements along Y
+ * processed per work item (in bytes)
+ * @param[in]  output_stride_z                          Stride of the destination tensor in Z
+ * dimension (in bytes)
+ * @param[in]  output_step_z                            output_stride_z * number of elements along Z
+ * processed per work item (in bytes)
+ * @param[in]  output_stride_w                          Stride of the destination tensor in W
+ * dimension (in bytes)
+ * @param[in]  output_step_w                            output_stride_w * number of elements along W
+ * processed per work item (in bytes)
+ * @param[in]  output_offset_first_element_in_bytes     Offset of the first element in the
+ * destination tensor
+ */
+__kernel void one_hot(TENSOR3D_DECLARATION(indices), VECTOR_DECLARATION(on_value),
+                      VECTOR_DECLARATION(off_value), TENSOR4D_DECLARATION(output))
+{
+  const int px = get_global_id(0);
+  const int py = get_global_id(1);
+  const int pz = get_global_id(2) % OUTPUT_DIM_Z;
+  const int pw = get_global_id(2) / OUTPUT_DIM_Z;
+
+  const Tensor3D indices = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(indices);
+  Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, OUTPUT_DIM_Z);
+
+#if AXIS == 0
+  const int index = *(__global const int *)tensor3D_offset(&indices, py, pz, pw);
+  *(__global DATA_TYPE *)output.ptr = index == px ? *((__global const DATA_TYPE *)on_value_ptr)
+                                                  : *((__global const DATA_TYPE *)off_value_ptr);
+#elif AXIS == 1
+  const uint index = *(__global const uint *)tensor3D_offset(&indices, px, pz, pw);
+  *(__global DATA_TYPE *)output.ptr = index == py ? *((__global const DATA_TYPE *)on_value_ptr)
+                                                  : *((__global const DATA_TYPE *)off_value_ptr);
+#elif AXIS == 2
+  const uint index = *(__global const uint *)tensor3D_offset(&indices, px, py, pw);
+  *(__global DATA_TYPE *)output.ptr = index == pz ? *((__global const DATA_TYPE *)on_value_ptr)
+                                                  : *((__global const DATA_TYPE *)off_value_ptr);
+#elif AXIS == 3
+  const uint index = *(__global const uint *)tensor3D_offset(&indices, px, py, pz);
+  *(__global DATA_TYPE *)output.ptr = index == pw ? *((__global const DATA_TYPE *)on_value_ptr)
+                                                  : *((__global const DATA_TYPE *)off_value_ptr);
+#endif // AXIS
+}
+
+/** Performs the OneHot operation along the chosen axis as off_value being zero
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
+ * -DDATA_TYPE=short
+ * @note Axis should be given as a preprocessor argument using -DAXIS=axis. e.g. -DAXIS=1
+ * @attention Output tensor depth should be given as a preprocessor argument using
+ * -DOUTPUT_DIM_Z=size. e.g. -DOUTPUT_DIM_Z=16
+ * @attention Input tensor depth should be given as a preprocessor argument using
+ * -DINPUT_DIM_Z=size. e.g. -DINPUT_DIM_Z=16
+ *
+ *
+ * @param[in]  indices_ptr                              Pointer to the source tensor. Supported data
+ * types: S32
+ * @param[in]  indices_stride_x                         Stride of the source tensor in X dimension
+ * (in bytes)
+ * @param[in]  indices_step_x                           indices_stride_x * number of elements along
+ * X processed per work item (in bytes)
+ * @param[in]  indices_stride_y                         Stride of the source tensor in Y dimension
+ * (in bytes)
+ * @param[in]  indices_step_y                           indices_stride_y * number of elements along
+ * Y processed per work item (in bytes)
+ * @param[in]  indices_stride_z                         Stride of the source tensor in Y dimension
+ * (in bytes)
+ * @param[in]  indices_step_z                           indices_stride_z * number of elements along
+ * Z processed per work item (in bytes)
+ * @param[in]  indices_offset_first_element_in_bytes    Offset of the first element in the source
+ * tensor
+ * @param[in]  on_value_ptr                             Pointer to the on_value vector. Supported
+ * data types: U8/S8/U16/S16/F16/U32/S32/F32.
+ * @param[in]  on_value_stride_x                        Stride of the on_value vector in X dimension
+ * (in bytes)
+ * @param[in]  on_value_step_x                          on_value_stride_x * number of elements along
+ * X processed per work item (in bytes)
+ * @param[in]  on_value_offset_first_element_in_bytes   Offset of the first element in the on_value
+ * vector
+ * @param[out] output_ptr                               Pointer to the destination tensor. Supported
+ * data types: same as @p on_value
+ * @param[in]  output_stride_x                          Stride of the destination tensor in X
+ * dimension (in bytes)
+ * @param[in]  output_step_x                            output_stride_x * number of elements along X
+ * processed per work item (in bytes)
+ * @param[in]  output_stride_y                          Stride of the destination tensor in Y
+ * dimension (in bytes)
+ * @param[in]  output_step_y                            output_stride_y * number of elements along Y
+ * processed per work item (in bytes)
+ * @param[in]  output_stride_z                          Stride of the destination tensor in Z
+ * dimension (in bytes)
+ * @param[in]  output_step_z                            output_stride_z * number of elements along Z
+ * processed per work item (in bytes)
+ * @param[in]  output_stride_w                          Stride of the destination tensor in W
+ * dimension (in bytes)
+ * @param[in]  output_step_w                            output_stride_w * number of elements along W
+ * processed per work item (in bytes)
+ * @param[in]  output_offset_first_element_in_bytes     Offset of the first element in the
+ * destination tensor
+ */
+__kernel void one_hot_only_on_value(TENSOR3D_DECLARATION(indices), VECTOR_DECLARATION(on_value),
+                                    TENSOR4D_DECLARATION(output))
+{
+  const int px = get_global_id(0);
+  const int py = get_global_id(1);
+  const int pz = get_global_id(2);
+
+  const Tensor3D indices = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(indices);
+  const Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, OUTPUT_DIM_Z);
+
+  const int index = *(__global const int *)tensor3D_offset(&indices, px, py, pz);
+
+  if (index < 0 || index >= DEPTH)
+    return;
+
+#if AXIS == 0
+  *(__global DATA_TYPE *)tensor4D_offset(&output, index, px, py, pz) =
+    *((__global const DATA_TYPE *)on_value_ptr);
+#elif AXIS == 1
+  *(__global DATA_TYPE *)tensor4D_offset(&output, px, index, py, pz) =
+    *((__global const DATA_TYPE *)on_value_ptr);
+#elif AXIS == 2
+  *(__global DATA_TYPE *)tensor4D_offset(&output, px, py, index, pz) =
+    *((__global const DATA_TYPE *)on_value_ptr);
+#elif AXIS == 3
+  *(__global DATA_TYPE *)tensor4D_offset(&output, px, py, pz, index) =
+    *((__global const DATA_TYPE *)on_value_ptr);
+#endif // AXIS
+}
+
+#endif // defined(DATA_TYPE) && defined(AXIS) && defined(DEPTH) && defined(OUTPUT_DIM_Z)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pad_layer.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pad_layer.cl
new file mode 100644
index 000000000..96f2f9ef0
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/pad_layer.cl
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(DATA_TYPE) && defined(SELECT_DT) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && \
+  defined(SRC_WIDTH)
+
+#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
+#define VEC_SELECT VEC_DATA_TYPE(SELECT_DT, VEC_SIZE)
+#define OFFSETS VEC_OFFS(VEC_SELECT, VEC_SIZE)
+
+#if defined(CONST_VAL)
+/** Perform a pad operation when PaddingMode is CONSTANT
+ *
+ * @note Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @note Vector size must be passed using the -DVEC_SIZE compile flag, e.g. -DVEC_SIZE=4
+ * @note Constant value used to fill the pads must be passed using the -DCONST_VAL compile flag,
+ * e.g. -DCONST_VAL=1.27
+ * @note Pad to add to the left must be passed using the -DPAD_X_BEFORE compile flag, e.g.
+ * -DPAD_X_BEFORE=5
+ * @note Input tensor's width must be passed using the -DSRC_WIDTH compile flag, e.g.
+ * -DSRC_WIDTH=224
+ * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile
+ * flag, e.g. -DSELECT_DT=float
+ * @note In case pad left is more than the vector size, the number of threads to skip along the X
+ * axis must be passed using the -DNUM_THREADS_TO_SKIP_X compile flag, e.g.
+ * -DNUM_THREADS_TO_SKIP_X=1. This is defined as (PAD_X_BEFORE / VEC_SIZE)
+ * @note If pad also needs to be added to the top of the tensor, the following compile flags must be
+ * passed at compile time:
+ *       -# -DPAD_Y_BEFORE: Pad to add to the top of the input tensor (e.g. -DPAD_Y_BEFORE=3)
+ *       -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127)
+ * @note If pad also needs to be added to the depth of the tensor, the following compile flags must
+ * be passed at compile time:
+ *       -# -DPAD_Z_BEFORE: Pad to add before the first plane of the input tensor (e.g.
+ * -DPAD_Z_BEFORE=3)
+ *       -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32)
+ * @note If pad also needs to be added to the batch of the tensor, the following compile flags must
+ * be passed at compile time:
+ *       -# -DPAD_W_BEFORE: Pad to add before the first batch of the input tensor (e.g.
+ * -DPAD_W_BEFORE=3)
+ *       -# -DSRC_BATCH: Input tensor's batch size (e.g. -DSRC_BATCH=4)
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types:
+ * U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source image in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination image in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * image
+ * @param[in]  batch                             (Optional) Batch index if 4D pad must be applied
+ */
+__kernel void pad_layer_constant(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst)
+#if defined(PAD_W_BEFORE)
+                                                              ,
+                                 uint batch
+#endif // defined(PAD_W_BEFORE)
+)
+{
+  const int x = get_global_id(0);
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  uint cond = 0;
+
+#if defined(PAD_W_BEFORE)
+  cond |= batch < PAD_W_BEFORE || batch >= (SRC_BATCH + PAD_W_BEFORE);
+#endif // defined(PAD_W_BEFORE)
+#if defined(PAD_Z_BEFORE)
+  cond |= z < PAD_Z_BEFORE || z >= (SRC_DEPTH + PAD_Z_BEFORE);
+#endif // defined(PAD_Z_BEFORE)
+
+  if (cond)
+  {
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+    VSTORE(VEC_SIZE)
+    ((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr);
+  }
+  else
+  {
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#if defined(NUM_THREADS_TO_SKIP_X)
+    /* In case the pad left is greater than the vector size, and we are past the threads operating
+     * solely on pad values, the input pointer must be brought back along the X axis to start from
+     * the first non-pad values.
+     *
+     * E.g. with VEC_SIZE=2, PAD_X_BEFORE=5, CONST_VAL=0 and 1D input |1 2 3 4 5 6|:
+     *  -# The first thread will compute the output values |0 0| since it detects (x_outs == (0, 1))
+     * < PAD_X_BEFORE
+     *  -# The second thread will compute the output values |0 0| since it detects (x_outs == (2,
+     * 3)) < PAD_X_BEFORE
+     *  -# The third thread should compute |0 1|, however the input pointer is now ahead of ((x *
+     * VEC_SIZE) == 4) values, reading |4 5|
+     *  -# To detect this, we use ((PAD_X_BEFORE / VEC_SIZE) == NUM_THREADS_TO_SKIP_X == 2) and
+     * check that it is >= to the current x
+     *  -# So, we bring the pointer back of NUM_THREADS_TO_SKIP_X threads, which means multiplying
+     * this constant by the input's step along the X axis
+     *  -# Now that the pointer is back of ((NUM_THREADS_TO_SKIP_X * src_step_x) == 4) values, it
+     * will read the desired values |0 1|
+     */
+    src.ptr -= select(0u, NUM_THREADS_TO_SKIP_X * src_step_x, x >= NUM_THREADS_TO_SKIP_X);
+#endif // defined(NUM_THREADS_TO_SKIP_X)
+#if defined(PAD_Z_BEFORE)
+    src.ptr -= PAD_Z_BEFORE * src_step_z;
+#endif // defined(PAD_Z_BEFORE)
+#if defined(PAD_W_BEFORE)
+    src.ptr -= PAD_W_BEFORE * SRC_DEPTH * src_step_z;
+#endif // defined(PAD_W_BEFORE)
+
+    VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
+
+    VEC_INT xs_out = (VEC_INT)(x * VEC_SIZE) + CONVERT(OFFSETS, VEC_INT);
+    VEC_INT cond = xs_out < (VEC_INT)PAD_X_BEFORE || xs_out >= (VEC_INT)(SRC_WIDTH + PAD_X_BEFORE);
+#if defined(PAD_Y_BEFORE)
+    cond |=
+      (VEC_INT)y < (VEC_INT)PAD_Y_BEFORE || (VEC_INT)y >= (VEC_INT)(SRC_HEIGHT + PAD_Y_BEFORE);
+#endif // defined(PAD_Y_BEFORE)
+    VSTORE(VEC_SIZE)
+    (select(src_vals, (VEC_TYPE)CONST_VAL, CONVERT(cond, VEC_SELECT)), 0,
+     (__global DATA_TYPE *)dst.ptr);
+  }
+}
+#endif // defined(CONST_VAL)
+
+#if defined(PAD_X_BEFORE_REMAINDER) && defined(PAD_X_AFTER_REMAINDER) &&         \
+  defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) && \
+  defined(AFTER_PAD_FACT_X)
+
+#define SCALAR_COND(x) (VEC_SELECT) x == (VEC_SELECT)1
+#define ROTATE_REVERSE(x, n) ROTATE(REVERSE(x, VEC_SIZE), VEC_SIZE, n)
+#define SYMM_REFL_LEFT(x, n0, n1) \
+  select(ROTATE_REVERSE(x, n1), ROTATE(x, VEC_SIZE, n0), OFFSETS >= (VEC_SELECT)n0)
+#define SYMM_REFL_RIGHT(x, n0, n1) \
+  select(ROTATE(x, VEC_SIZE, n0), ROTATE_REVERSE(x, n1), OFFSETS >= (VEC_SELECT)n0)
+
+/** Perform a pad operation when PaddingMode is SYMMETRIC
+ *
+ * @note Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
+ * @note Vector size must be passed using the -DVEC_SIZE compile flag, e.g. -DVEC_SIZE=4
+ * @note Constant value must be passed using the -DCONST_VAL compile flag, e.g. -DCONST_VAL=1.27
+ * @note Pad to add to the left must be passed using the -DPAD_X_BEFORE compile flag, e.g.
+ * -DPAD_X_BEFORE=5
+ * @note Input tensor's width must be passed using the -DSRC_WIDTH compile flag, e.g.
+ * -DSRC_WIDTH=224
+ * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile
+ * flag, e.g. -DSELECT_DT=float
+ * @note Number of values to the left when operating across left padding must be passed using the
+ * -DPAD_X_BEFORE_REMAINDER compile flag, e.g. -DPAD_X_BEFORE_REMAINDER=5
+ * @note Number of values to the left when operating across right padding must be passed using the
+ * -DPAD_X_AFTER_REMAINDER compile flag, e.g. -DPAD_X_AFTER_REMAINDER=6
+ * @note To rearrange the vectors properly, (PAD_X_BEFORE_REMAINDER + 1) must be passed when mode is
+ * REFLECT using the -DPAD_X_BEFORE_REMAINDER_REFL compile flag, e.g. -DPAD_X_BEFORE_REMAINDER=6
+ * @note To rearrange the vectors properly, (PAD_X_AFTER_REMAINDER - 1) must be passed using the
+ * -DPAD_X_AFTER_REMAINDER_REFL compile flag, e.g. -DPAD_X_AFTER_REMAINDER=5
+ * @note When after pad X, starting point to read backward from must be passed using the
+ * -DAFTER_PAD_FACT_X compile flag, e.g. -DAFTER_PAD_FACT_X=253
+ * @note If padding mode is REFLECT, the -DIS_REFLECT compile flag must be set to 1, else it must be
+ * set to 0
+ * @note If pad also needs to be added to the top of the tensor, the following compile flags must be
+ * passed at compile time:
+ *       -# -DPAD_Y_BEFORE: Pad to add to the top of the input tensor (e.g. -DPAD_Y_BEFORE=3)
+ *       -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127)
+ * @note If pad also needs to be added to the depth of the tensor, the following compile flags must
+ * be passed at compile time:
+ *       -# -DPAD_Z_BEFORE: Pad to add before the first plane of the input tensor (e.g.
+ * -DPAD_Z_BEFORE=3)
+ *       -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32)
+ * @note If the starting point to read backward from is less than the output's last element accessed
+ * in the X, the following compile flags must be passed at compile time to avoid negative offsets:
+ *       -# -DAFTER_PAD_REM: Defines how much to rotate the vector if the backward calculation
+ * attempted to read from a negative offset (e.g. -DAFTER_PAD_REM=3)
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types:
+ * U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in
+ * bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in
+ * bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source image in Z dimension (in
+ * bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data
+ * types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in
+ * bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in
+ * bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination image in Z dimension (in
+ * bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed
+ * per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * image
+ */
+__kernel void pad_layer_symmetric_reflect(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst))
+{
+  // Get current thread position
+  const int x = get_global_id(0);
+  const int y = get_global_id(1);
+  const int z = get_global_id(2);
+
+  // Define conditions based on the thread X position w.r.t. pad left and right
+  const int x_out_first = x * VEC_SIZE;
+  const int x_out_last = x_out_first + VEC_SIZE;
+  const int is_before_pad_left = (x_out_last <= PAD_X_BEFORE);
+  const int is_across_pad_left = (x_out_first < PAD_X_BEFORE) && (x_out_last > PAD_X_BEFORE);
+  const int is_inside_input =
+    (x_out_first >= PAD_X_BEFORE) && (x_out_last <= (SRC_WIDTH + PAD_X_BEFORE));
+  const int is_across_pad_right =
+    (x_out_first < (SRC_WIDTH + PAD_X_BEFORE)) && (x_out_last > (SRC_WIDTH + PAD_X_BEFORE));
+  const int is_after_pad_right = (x_out_first >= (SRC_WIDTH + PAD_X_BEFORE));
+
+  // Calculate base pointers
+  __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes;
+  Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+  // Calculate input tensor's offset based on the defined conditions
+  int x_offset = 0;
+  x_offset = select(x_offset, PAD_X_BEFORE - x_out_last + IS_REFLECT, is_before_pad_left);
+  x_offset = select(x_offset, x_out_first - PAD_X_BEFORE, is_inside_input);
+  x_offset = select(x_offset, SRC_WIDTH - VEC_SIZE, is_across_pad_right);
+  x_offset = select(x_offset, AFTER_PAD_FACT_X - x_out_last, is_after_pad_right);
+
+#if defined(AFTER_PAD_REM)
+  int neg_offs = x_offset < 0;
+  x_offset = max(x_offset, 0);
+#endif // defined(AFTER_PAD_REM)
+
+  // Load input values from the computed offset
+  int y_in = y;
+  int z_in = z;
+#if defined(PAD_Y_BEFORE)
+  y_in = select(y - PAD_Y_BEFORE, PAD_Y_BEFORE - y + IS_REFLECT - 1, y < PAD_Y_BEFORE);
+  y_in = select(y_in, 2 * SRC_HEIGHT + PAD_Y_BEFORE - y - IS_REFLECT - 1,
+                y >= (SRC_HEIGHT + PAD_Y_BEFORE));
+#endif // defined(PAD_Y_BEFORE)
+#if defined(PAD_Z_BEFORE)
+  z_in = select(z - PAD_Z_BEFORE, PAD_Z_BEFORE - z + IS_REFLECT - 1, z < PAD_Z_BEFORE);
+  z_in = select(z_in, 2 * SRC_DEPTH + PAD_Z_BEFORE - z - IS_REFLECT - 1,
+                z >= (SRC_DEPTH + PAD_Z_BEFORE));
+#endif // defined(PAD_Y_BEFORE)
+
+  src_addr += x_offset * src_stride_x + y_in * src_step_y + z_in * src_step_z;
+
+#if SRC_WIDTH == 1
+  VSTORE(VEC_SIZE)
+  ((VEC_TYPE)(*(__global DATA_TYPE *)src_addr), 0, (__global DATA_TYPE *)dst.ptr);
+#else // SRC_WIDTH == 1
+
+  VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src_addr);
+
+  // Choose rearrangement policy based on the defined conditions
+  src_vals =
+    select(src_vals, SYMM_REFL_LEFT(src_vals, PAD_X_BEFORE_REMAINDER, PAD_X_BEFORE_REMAINDER_REFL),
+           SCALAR_COND(is_across_pad_left));
+  src_vals =
+    select(src_vals, SYMM_REFL_RIGHT(src_vals, PAD_X_AFTER_REMAINDER, PAD_X_AFTER_REMAINDER_REFL),
+           SCALAR_COND(is_across_pad_right));
+  src_vals = select(src_vals, REVERSE(src_vals, VEC_SIZE),
+                    SCALAR_COND((is_before_pad_left || is_after_pad_right)));
+#if defined(AFTER_PAD_REM)
+  src_vals = select(src_vals, ROTATE(src_vals, VEC_SIZE, AFTER_PAD_REM), SCALAR_COND(neg_offs));
+#endif // defined(AFTER_PAD_REM)
+
+  // Store
+  VSTORE(VEC_SIZE)
+  (src_vals, 0, (__global DATA_TYPE *)dst.ptr);
+#endif // SRC_WIDTH == 1
+}
+#endif // defined(PAD_X_BEFORE_REMAINDER) && defined(PAD_X_AFTER_REMAINDER) &&
+       // defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) &&
+       // defined(AFTER_PAD_FACT_X)
+#endif // defined(DATA_TYPE) && defined(SELECT_DT) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) &&
+       // defined(SRC_WIDTH)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
index 76fda9041..532000e9e 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
@@ -138,7 +138,7 @@ __kernel void pixelwise_mul_qasymm8(TENSOR3D_DECLARATION(in1), TENSOR3D_DECLARAT
 
   // Multiply with a multiplier smaller than 1
   out_val =
-      ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
+    ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
   out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET);
 
   VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16));
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
index 4ae9adb0b..c829f264d 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
@@ -116,7 +116,7 @@ __kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(sc
 
   // Create scale vector
   const VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) vscale =
-      *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1));
+    *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1));
 
   // Quantize
   VEC_DATA_TYPE(int, VEC_SIZE)
@@ -127,10 +127,10 @@ __kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(sc
   (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr);
 #else  //! defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
   *((__global DATA_TYPE_OUT *)(output.ptr)) = (DATA_TYPE_OUT)CLAMP(
-      CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) /
-                      (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))),
-                  int),
-      MIN_QUANT_VAL, MAX_QUANT_VAL);
+    CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) /
+                  (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))),
+                int),
+    MIN_QUANT_VAL, MAX_QUANT_VAL);
 #endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
 }
 #endif // defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
index 832ac1270..d0ef31b20 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
@@ -100,12 +100,14 @@ __kernel void reduce_min_max(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(o
   Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
 
   int indices[4] = {
-      get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
-      get_global_id(2) / DEPTH_OUT,
+    get_global_id(0),
+    get_global_id(1),
+    get_global_id(2) % DEPTH_OUT,
+    get_global_id(2) / DEPTH_OUT,
   };
 
   DATA_TYPE value =
-      *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
+    *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
   for (int i = 1; i < dim; ++i)
   {
     indices[axis] = i;
@@ -186,16 +188,18 @@ __kernel void reduce_sum_mean(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(
   Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
 
   int indices[4] = {
-      get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
-      get_global_id(2) / DEPTH_OUT,
+    get_global_id(0),
+    get_global_id(1),
+    get_global_id(2) % DEPTH_OUT,
+    get_global_id(2) / DEPTH_OUT,
   };
 
   DATA_TYPE sum_value = (DATA_TYPE)0;
   for (int i = 0; i < dim; ++i)
   {
     indices[axis] = i;
-    sum_value += *(
-        (__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
+    sum_value +=
+      *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
   }
 
 #if OP_CODE == 3 // REDUCE_SUM
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/repeat.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/repeat.h
new file mode 100644
index 000000000..cfc811cce
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/repeat.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_REPEAT_H
+#define ARM_COMPUTE_REPEAT_H
+
+#include "helpers.h"
+
+/** Macros that help in loop unrolling */
+// Repeat macros with 3 param, excluding the implicit ID param
+#define REPEAT_3_1(P_X, P_A, P_B, P_C) P_X##_DEF(0, P_A, P_B, P_C)
+#define REPEAT_3_2(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(1, P_A, P_B, P_C);         \
+  REPEAT_3_1(P_X, P_A, P_B, P_C)
+#define REPEAT_3_3(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(2, P_A, P_B, P_C);         \
+  REPEAT_3_2(P_X, P_A, P_B, P_C)
+#define REPEAT_3_4(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(3, P_A, P_B, P_C);         \
+  REPEAT_3_3(P_X, P_A, P_B, P_C)
+#define REPEAT_3_5(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(4, P_A, P_B, P_C);         \
+  REPEAT_3_4(P_X, P_A, P_B, P_C)
+#define REPEAT_3_6(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(5, P_A, P_B, P_C);         \
+  REPEAT_3_5(P_X, P_A, P_B, P_C)
+#define REPEAT_3_7(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(6, P_A, P_B, P_C);         \
+  REPEAT_3_6(P_X, P_A, P_B, P_C)
+#define REPEAT_3_8(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(7, P_A, P_B, P_C);         \
+  REPEAT_3_7(P_X, P_A, P_B, P_C)
+#define REPEAT_3_9(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(8, P_A, P_B, P_C);         \
+  REPEAT_3_8(P_X, P_A, P_B, P_C)
+#define REPEAT_3_10(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(9, P_A, P_B, P_C);          \
+  REPEAT_3_9(P_X, P_A, P_B, P_C)
+#define REPEAT_3_11(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(A, P_A, P_B, P_C);          \
+  REPEAT_3_10(P_X, P_A, P_B, P_C)
+#define REPEAT_3_12(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(B, P_A, P_B, P_C);          \
+  REPEAT_3_11(P_X, P_A, P_B, P_C)
+#define REPEAT_3_13(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(C, P_A, P_B, P_C);          \
+  REPEAT_3_12(P_X, P_A, P_B, P_C)
+#define REPEAT_3_14(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(D, P_A, P_B, P_C);          \
+  REPEAT_3_13(P_X, P_A, P_B, P_C)
+#define REPEAT_3_15(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(E, P_A, P_B, P_C);          \
+  REPEAT_3_14(P_X, P_A, P_B, P_C)
+#define REPEAT_3_16(P_X, P_A, P_B, P_C) \
+  P_X##_DEF(F, P_A, P_B, P_C);          \
+  REPEAT_3_15(P_X, P_A, P_B, P_C)
+
+#define REPEAT_DEF_3_N(P_NUM, P_OP, P_A, P_B, P_C) \
+  REPEAT_3_##P_NUM(P_OP, P_A, P_B, P_C) // One level of indirection to ensure order of expansion
+                                        // does not affect preprocessing P_NUM
+#define REPEAT_3_N(P_NUM, P_OP, P_A, P_B, P_C) REPEAT_DEF_3_N(P_NUM, P_OP, P_A, P_B, P_C)
+
+// Repeat macros with 4 param, excluding the implicit ID param
+#define REPEAT_4_1(P_X, P_A, P_B, P_C, P_D) P_X##_DEF(0, P_A, P_B, P_C, P_D)
+#define REPEAT_4_2(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(1, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_1(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_3(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(2, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_2(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_4(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(3, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_3(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_5(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(4, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_4(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_6(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(5, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_5(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_7(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(6, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_6(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_8(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(7, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_7(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_9(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(8, P_A, P_B, P_C, P_D);         \
+  REPEAT_4_8(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_10(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(9, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_9(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_11(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(A, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_10(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_12(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(B, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_11(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_13(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(C, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_12(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_14(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(D, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_13(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_15(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(E, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_14(P_X, P_A, P_B, P_C, P_D)
+#define REPEAT_4_16(P_X, P_A, P_B, P_C, P_D) \
+  P_X##_DEF(F, P_A, P_B, P_C, P_D);          \
+  REPEAT_4_15(P_X, P_A, P_B, P_C, P_D)
+
+#define REPEAT_DEF_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D) \
+  REPEAT_4_##P_NUM(P_OP, P_A, P_B, P_C, P_D) // One level of indirection to ensure order of
+                                             // expansion does not affect preprocessing P_NUM
+#define REPEAT_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D) REPEAT_DEF_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D)
+
+// Macro for initializing N variables. Generates N statements that defines VAR##N =
+// RHS_ACCESSOR_DEF(...)
+#define VAR_INIT_TO_CONST_DEF(ID, TYPE, VAR, VAL) TYPE VAR##ID = VAL
+#define REPEAT_VAR_INIT_TO_CONST(N, TYPE, VAR, VAL) REPEAT_3_N(N, VAR_INIT_TO_CONST, TYPE, VAR, VAL)
+
+// Macro for initializing N variables by converting the data type. Generates N statements that
+// defines VAR##N = RHS_ACCESSOR_DEF(...)
+#define VAR_INIT_CONVERT_SAT_DEF(ID, TYPE_OUT, VAR_IN, VAR_OUT) \
+  TYPE_OUT VAR_OUT##ID = CONVERT_SAT(VAR_IN##ID, TYPE_OUT)
+#define REPEAT_VAR_INIT_CONVERT_SAT(N, TYPE_OUT, VAR_IN, VAR_OUT) \
+  REPEAT_3_N(N, VAR_INIT_CONVERT_SAT, TYPE_OUT, VAR_IN, VAR_OUT)
+
+// Macro for adding a constant to N variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_CONST_TO_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID += (TYPE)VAL
+#define REPEAT_ADD_CONST_TO_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, ADD_CONST_TO_VAR, TYPE, VAR, VAL)
+
+// Macro for multiplying N variables (VAR_B) by a constant (VAL) and adding to other N variables
+// (VAR_A). Generates N statements that defines VAR_A##N =RHS_ACCESSOR_DEF(...)
+#define MLA_VAR_WITH_CONST_VEC_DEF(ID, VAR_A, VAR_B, VAL) VAR_A##ID += VAR_B##ID * VAL
+#define REPEAT_MLA_VAR_WITH_CONST_VEC(N, VAR_A, VAR_B, VAL) \
+  REPEAT_3_N(N, MLA_VAR_WITH_CONST_VEC, VAR_A, VAR_B, VAL)
+
+// Macro for adding a vector to N-variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_VECTOR_TO_VAR_DEF(ID, TYPE, VAR, VEC) VAR##ID += VEC
+#define REPEAT_ADD_VECTOR_TO_VAR(N, VAR, VEC) REPEAT_3_N(N, ADD_VECTOR_TO_VAR, "", VAR, VEC)
+
+// Macro for adding a two N-variables. Generates N statements that defines VAR##N
+// =RHS_ACCESSOR_DEF(...)
+#define ADD_TWO_VARS_DEF(ID, TYPE, VAR_A, VAR_B) VAR_A##ID += VAR_B##ID
+#define REPEAT_ADD_TWO_VARS(N, VAR_A, VAR_B) REPEAT_3_N(N, ADD_TWO_VARS, "", VAR_A, VAR_B)
+
+// Macro for performing Max between a constant and N variables. Generates N statements that defines
+// VAR##N =RHS_ACCESSOR_DEF(...)
+#define MAX_CONST_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID = max(VAR##ID, (TYPE)VAL)
+#define REPEAT_MAX_CONST_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, MAX_CONST_VAR, TYPE, VAR, VAL)
+
+// Macro for performing Min between a constant and N variables. Generates N statements that defines
+// VAR##N =RHS_ACCESSOR_DEF(...)
+#define MIN_CONST_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID = min(VAR##ID, (TYPE)VAL)
+#define REPEAT_MIN_CONST_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, MIN_CONST_VAR, TYPE, VAR, VAL)
+
+// Macro for performing ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE to N variables. Generates N
+// statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  VAR##ID = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, SIZE)
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+// Macro for performing ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE to N variables. Generates N
+// statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  VAR##ID = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, SIZE)
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+// Macro for performing per-channel ASYMM_MULT_BY_QUANT_MULTIPLIER to N variables.
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  ({                                                                                      \
+    VEC_DATA_TYPE(int, N0)                                                                \
+    VAR##ID_shift_lt0 =                                                                   \
+      ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, N0);   \
+    VEC_DATA_TYPE(int, N0)                                                                \
+    VAR##ID_shift_gt0 =                                                                   \
+      ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, N0);      \
+    VAR##ID = select(VAR##ID_shift_lt0, VAR##ID_shift_gt0, RES_SHIFT >= 0);               \
+  })
+#define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL(N, SIZE, VAR, RES_MUL, RES_SHIFT) \
+  REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL, SIZE, VAR, RES_MUL, RES_SHIFT)
+
+#endif // ARM_COMPUTE_REPEAT_H
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reshape_layer.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reshape_layer.cl
new file mode 100644
index 000000000..8da8bfc8e
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/reshape_layer.cl
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+/** Perform tensor reshape
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
+ * -DDATA_TYPE=short
+ *
+ * @param[in]  input_ptr                            Pointer to the first source tensor. Supported
+ * data types: All
+ * @param[in]  input_stride_x                       Stride of the first source tensor in X dimension
+ * (in bytes)
+ * @param[in]  input_step_x                         input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  input_stride_y                       Stride of the first source tensor in Y dimension
+ * (in bytes)
+ * @param[in]  input_step_y                         input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  input_stride_z                       Stride of the first source tensor in Z dimension
+ * (in bytes)
+ * @param[in]  input_step_z                         input_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the first
+ * source tensor
+ * @param[out] output_ptr                           Pointer to the destination tensor. Supported
+ * data types: same as @p input_ptr
+ * @param[in]  output_stride_x                      Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in]  output_step_x                        output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in]  output_stride_y                      Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in]  output_step_y                        output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in]  output_stride_z                      Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in]  output_step_z                        output_stride_z * number of elements along Z
+ * processed per workitem(in bytes)
+ * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ * @param[in]  input_shape                          Input spatial shape
+ * @param[in]  output_shape                         Output spatial shape
+ */
+__kernel void reshape_layer(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output),
+                            int2 input_shape, int2 output_shape)
+{
+  Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);
+  Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output);
+
+  int3 id = (int3)(get_global_id(0), get_global_id(1), get_global_id(2));
+
+  // Linearize index
+  int linear_idx = id.x + id.y * input_shape.x + id.z * input_shape.x * input_shape.y;
+
+  // Translate to output
+  int3 out_id;
+  out_id.x = linear_idx % output_shape.x;
+  out_id.y = (linear_idx / output_shape.x) % output_shape.y;
+  out_id.z = linear_idx / (output_shape.x * output_shape.y);
+
+  // Store result
+  *((__global DATA_TYPE *)tensor3D_offset(&out, out_id.x, out_id.y, out_id.z)) =
+    *((__global DATA_TYPE *)in.ptr);
+}
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
deleted file mode 100644
index e9d4696b4..000000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-// reference:
-// https://code.google.com/archive/p/ocl-radix-sort/source/default/source
-// OpenCL kernel sources for the CLRadixSort class
-// the #include does not exist in OpenCL
-// Copyright Philippe Helluy, Université de Strasbourg, France, 2011, helluy@math.unistra.fr
-// licensed under the GNU Lesser General Public License see http://www.gnu.org/copyleft/lesser.html
-// if you find this software usefull you can cite the following work in your reports or articles:
-// Philippe HELLUY, A portable implementation of the radix sort algorithm in OpenCL, 2011.
-// http://hal.archives-ouvertes.fr/hal-00596730
-
-// Reference for floating point radix sort:
-// http://www.codercorner.com/RadixSortRevisited.htm
-
-// compute the histogram for each radix and each virtual processor for the pass
-__kernel void radixsort_histogram(__global float *in_key_buf, __global int *d_Histograms,
-                                  const int pass, __local int *loc_histo, const int n)
-{
-  int it = get_local_id(0);  // i local number of the processor
-  int ig = get_global_id(0); // global number = i + g I
-
-  int gr = get_group_id(0); // g group number
-
-  int groups = get_num_groups(0);
-  int items = get_local_size(0);
-
-  // set the local histograms to zero
-  for (int ir = 0; ir < _RADIX; ir++)
-  {
-    loc_histo[ir * items + it] = 0;
-  }
-
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  // range of keys that are analyzed by the work item
-  int size = n / groups / items; // size of the sub-list
-  int start = ig * size;         // beginning of the sub-list
-
-  unsigned int key;
-  int shortkey, k;
-
-  // compute the index
-  // the computation depends on the transposition
-  for (int j = 0; j < size; j++)
-  {
-#ifdef TRANSPOSE
-    k = groups * items * j + ig;
-#else
-    k = j + start;
-#endif
-
-    key = *((__global unsigned int *)(in_key_buf + k));
-
-    // extract the group of _BITS bits of the pass
-    // the result is in the range 0.._RADIX-1
-    shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1));
-
-    // increment the local histogram
-    loc_histo[shortkey * items + it]++;
-  }
-
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  // copy the local histogram to the global one
-  for (int ir = 0; ir < _RADIX; ir++)
-  {
-    d_Histograms[items * (ir * groups + gr) + it] = loc_histo[ir * items + it];
-  }
-
-  barrier(CLK_GLOBAL_MEM_FENCE);
-}
-
-// initial transpose of the list for improving
-// coalescent memory access
-__kernel void transpose(const __global int *invect, __global int *outvect, const int nbcol,
-                        const int nbrow, const __global int *inperm, __global int *outperm,
-                        __local int *blockmat, __local int *blockperm, const int tilesize)
-{
-
-  int i0 = get_global_id(0) * tilesize; // first row index
-  int j = get_global_id(1);             // column index
-
-  int jloc = get_local_id(1); // local column index
-
-  // fill the cache
-  for (int iloc = 0; iloc < tilesize; iloc++)
-  {
-    int k = (i0 + iloc) * nbcol + j; // position in the matrix
-    blockmat[iloc * tilesize + jloc] = invect[k];
-#ifdef PERMUT
-    blockperm[iloc * tilesize + jloc] = inperm[k];
-#endif
-  }
-
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  // first row index in the transpose
-  int j0 = get_group_id(1) * tilesize;
-
-  // put the cache at the good place
-  for (int iloc = 0; iloc < tilesize; iloc++)
-  {
-    int kt = (j0 + iloc) * nbrow + i0 + jloc; // position in the transpose
-    outvect[kt] = blockmat[jloc * tilesize + iloc];
-#ifdef PERMUT
-    outperm[kt] = blockperm[jloc * tilesize + iloc];
-#endif
-  }
-}
-
-// each virtual processor reorders its data using the scanned histogram
-__kernel void radixsort_reorder(__global float *in_key, __global float *out_key,
-                                __global int *d_Histograms, const int pass,
-                                __global int *indices_in, __global int *indices_out,
-                                __local int *loc_histo, const int n)
-{
-
-  int it = get_local_id(0);
-  int ig = get_global_id(0);
-
-  int gr = get_group_id(0);
-  int groups = get_num_groups(0);
-  int items = get_local_size(0);
-
-  int start = ig * (n / groups / items);
-  int size = n / groups / items;
-
-  // take the histogram in the cache
-  for (int ir = 0; ir < _RADIX; ir++)
-  {
-    loc_histo[ir * items + it] = d_Histograms[items * (ir * groups + gr) + it];
-  }
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  int newpos, shortkey, k, newpost;
-  unsigned int key;
-
-  for (int j = 0; j < size; j++)
-  {
-#ifdef TRANSPOSE
-    k = groups * items * j + ig;
-#else
-    k = j + start;
-#endif
-    float org_value = in_key[k];
-    key = *(__global unsigned int *)(in_key + k);
-    shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1));
-
-    newpos = loc_histo[shortkey * items + it];
-
-#ifdef TRANSPOSE
-    int ignew, jnew;
-    ignew = newpos / (n / groups / items);
-    jnew = newpos % (n / groups / items);
-    newpost = jnew * (groups * items) + ignew;
-#else
-    newpost = newpos;
-#endif
-
-    // d_outKeys[newpost]= key;  // killing line !!!
-    out_key[newpost] = org_value;
-
-#ifdef PERMUT
-    indices_out[newpost] = indices_in[k];
-#endif
-
-    newpos++;
-    loc_histo[shortkey * items + it] = newpos;
-  }
-}
-
-// perform a parallel prefix sum (a scan) on the local histograms
-// (see Blelloch 1990) each workitem worries about two memories
-// see also http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html
-__kernel void radixsort_scanhistograms(__global int *histo, __local int *temp,
-                                       __global int *globsum)
-{
-  int it = get_local_id(0);
-  int ig = get_global_id(0);
-  int decale = 1;
-  int n = get_local_size(0) * 2;
-  int gr = get_group_id(0);
-
-  // load input into local memory
-  // up sweep phase
-  temp[2 * it] = histo[2 * ig];
-  temp[2 * it + 1] = histo[2 * ig + 1];
-
-  // parallel prefix sum (algorithm of Blelloch 1990)
-  for (int d = n >> 1; d > 0; d >>= 1)
-  {
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (it < d)
-    {
-      int ai = decale * (2 * it + 1) - 1;
-      int bi = decale * (2 * it + 2) - 1;
-      temp[bi] += temp[ai];
-    }
-    decale *= 2;
-  }
-
-  // store the last element in the global sum vector
-  // (maybe used in the next step for constructing the global scan)
-  // clear the last element
-  if (it == 0)
-  {
-    globsum[gr] = temp[n - 1];
-    temp[n - 1] = 0;
-  }
-
-  // down sweep phase
-  for (int d = 1; d < n; d *= 2)
-  {
-    decale >>= 1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (it < d)
-    {
-      int ai = decale * (2 * it + 1) - 1;
-      int bi = decale * (2 * it + 2) - 1;
-
-      int t = temp[ai];
-      temp[ai] = temp[bi];
-      temp[bi] += t;
-    }
-  }
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  // write results to device memory
-
-  histo[2 * ig] = temp[2 * it];
-  histo[2 * ig + 1] = temp[2 * it + 1];
-
-  barrier(CLK_GLOBAL_MEM_FENCE);
-}
-
-// use the global sum for updating the local histograms
-// each work item updates two values
-__kernel void radixsort_pastehistograms(__global int *histo, __global int *globsum)
-{
-  int ig = get_global_id(0);
-  int gr = get_group_id(0);
-
-  int s;
-
-  s = globsum[gr];
-
-  // write results to device memory
-  histo[2 * ig] += s;
-  histo[2 * ig + 1] += s;
-
-  barrier(CLK_GLOBAL_MEM_FENCE);
-}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp
new file mode 100644
index 000000000..987409739
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace
+{
+constexpr unsigned int vector_size = 16;
+
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *prev_output,
+                          const ITensorInfo *output, unsigned int axis, ReductionOperation op)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+  ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8,
+                                                       DataType::QASYMM8_SIGNED, DataType::S32,
+                                                       DataType::F16, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX &&
+                                    op != ReductionOperation::ARG_IDX_MIN,
+                                  "Only ARG_IDX_MAX and ARG_IDX_MIN are supported");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions,
+                                  "Reduction axis greater than max number of dimensions");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
+
+  if (output->total_size() != 0)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32,
+                                                         DataType::S64);
+  }
+  if (prev_output != nullptr && prev_output->total_size() != 0)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(prev_output, 1, DataType::U32,
+                                                         DataType::S32, DataType::S64);
+    if (output->total_size() != 0)
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(prev_output, output);
+    }
+  }
+
+  return Status{};
+}
+
+std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input,
+                                                         ITensorInfo *prev_output,
+                                                         ITensorInfo *output, unsigned int axis,
+                                                         ReductionOperation op)
+{
+  ARM_COMPUTE_UNUSED(op);
+  // Output tensor auto initialization if not yet initialized
+  TensorShape output_shape{input->tensor_shape()};
+  output_shape.set(axis, 1);
+  DataType output_data_type = (prev_output != nullptr) ? (prev_output->data_type()) : DataType::S32;
+  auto_init_if_empty(*output, input->clone()
+                                ->set_tensor_shape(output_shape)
+                                .set_data_type(output_data_type)
+                                .reset_padding()
+                                .set_is_resizable(true));
+
+  Window win =
+    calculate_max_window((prev_output != nullptr) ? (*prev_output) : (*input), Steps(vector_size));
+  bool window_changed = false;
+
+  switch (axis)
+  {
+    case 0:
+    {
+      ITensorInfo *input_tensor_access = prev_output != nullptr ? prev_output : input;
+      AccessWindowStatic input_access(input_tensor_access, 0, 0,
+                                      static_cast<int>(input_tensor_access->dimension(0)), 1);
+      AccessWindowHorizontal output_access(output, 0, 1);
+      window_changed = update_window_and_padding(win, input_access, output_access);
+      output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+    }
+    break;
+    case 1:
+    case 2:
+    case 3:
+    {
+      AccessWindowHorizontal input_access(input, 0, vector_size);
+      AccessWindowHorizontal output_access(output, 0, vector_size);
+      window_changed = update_window_and_padding(win, input_access, output_access);
+      output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+    }
+    break;
+    default:
+      ARM_COMPUTE_ERROR("Not supported");
+  }
+
+  Status err = (window_changed)
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
+  return std::make_tuple(err, win);
+}
+} // namespace
+
+CLArgMinMaxLayerKernelEx::CLArgMinMaxLayerKernelEx()
+  : _input(nullptr), _prev_output(nullptr), _output(nullptr), _reduction_axis(0),
+    _op(ReductionOperation::ARG_IDX_MAX)
+{
+}
+
+void CLArgMinMaxLayerKernelEx::configure(const ICLTensor *input, const ICLTensor *prev_output,
+                                         ICLTensor *output, unsigned int axis,
+                                         ReductionOperation op)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+  ARM_COMPUTE_ERROR_THROW_ON(
+    validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr,
+                       output->info(), axis, op));
+  auto win_config = validate_and_configure_window(
+    input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis,
+    op);
+  ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
+
+  _input = input;
+  _prev_output = prev_output;
+  _output = output;
+  _reduction_axis = axis;
+  _op = op;
+
+  // Set build options
+  CLBuildOptions build_opts;
+
+  build_opts.add_option_if(_prev_output != nullptr, "-DPREV_OUTPUT");
+  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+  build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DFLOAT_DATA_TYPE");
+  build_opts.add_option_if_else(op == ReductionOperation::ARG_IDX_MAX, "-DARG_MAX", "-DARG_MIN");
+  build_opts.add_option("-DDATA_TYPE_OUTPUT=" +
+                        get_cl_type_from_data_type(output->info()->data_type()));
+  build_opts.add_option("-DDATA_TYPE_SELECT=" +
+                        get_cl_signed_type_from_element_size(input->info()->element_size()));
+
+  // Create kernel
+  cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange();
+  std::string kernel_axis_name;
+  switch (axis)
+  {
+    case 0:
+    {
+      const ICLTensor *input_for_width = prev_output != nullptr ? _prev_output : _input;
+      build_opts.add_option("-DWIDTH=" +
+                            support::cpp11::to_string(input_for_width->info()->dimension(0)));
+
+      kernel_axis_name = "x";
+      lws_hint = create_lws_hint_parallel_implementations(input_for_width->info()->dimension(0),
+                                                          vector_size);
+    }
+    break;
+    case 1:
+      build_opts.add_option("-DHEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
+      kernel_axis_name = "y";
+      break;
+    case 2:
+      build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
+      kernel_axis_name = "z";
+      break;
+    case 3:
+      build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
+      build_opts.add_option("-DBATCH=" + support::cpp11::to_string(input->info()->dimension(3)));
+      kernel_axis_name = "w";
+      break;
+    default:
+      ARM_COMPUTE_ERROR("Not supported");
+  }
+  _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(
+    "arg_min_max_ex_" + kernel_axis_name, build_opts.options()));
+
+  // Configure kernel window
+  ICLKernel::configure_internal(std::get<1>(win_config), lws_hint);
+}
+
+Status CLArgMinMaxLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *prev_output,
+                                          const ITensorInfo *output, unsigned int axis,
+                                          ReductionOperation op)
+{
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, prev_output, output, axis, op));
+  ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
+    input->clone().get(), (prev_output != nullptr) ? prev_output->clone().get() : nullptr,
+    output->clone().get(), axis, op)));
+  return Status{};
+}
+
+void CLArgMinMaxLayerKernelEx::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+  switch (_reduction_axis)
+  {
+    case 0:
+    {
+      // Set out window
+      Window out_window(window);
+      out_window.set(Window::DimX, Window::Dimension(0, 0, 0));
+
+      // Get first input and output slices
+      Window in_slice = window.first_slice_window_2D();
+      Window out_slice = out_window.first_slice_window_2D();
+
+      // Reshape window
+      const unsigned int num_tensors = _prev_output != nullptr ? 3 : 2;
+
+      // Set local sums buffer
+      unsigned int local_res_size = lws_hint()[0] * _output->info()->element_size();
+      _kernel.setArg(num_arguments_per_2D_tensor() * num_tensors, local_res_size, nullptr);
+      do
+      {
+        unsigned int idx = 0;
+        add_2D_tensor_argument(idx, _input, in_slice);
+        if (_prev_output != nullptr)
+        {
+          add_2D_tensor_argument(idx, _prev_output, in_slice);
+        }
+        add_2D_tensor_argument(idx, _output, out_slice);
+        enqueue(queue, *this, in_slice, lws_hint());
+      } while (window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
+    }
+    break;
+    case 1:
+    {
+      // Get first input and output slices
+      Window window_in{window};
+      window_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1),
+                                                    _input->info()->dimension(1)));
+      Window in_slice = window_in.first_slice_window_2D();
+      Window out_slice = window.first_slice_window_2D();
+
+      do
+      {
+        unsigned int idx = 0;
+        add_2D_tensor_argument(idx, _input, in_slice);
+        add_2D_tensor_argument(idx, _output, out_slice);
+        enqueue(queue, *this, in_slice, lws_hint());
+      } while (window_in.slide_window_slice_2D(in_slice) &&
+               window.slide_window_slice_2D(out_slice));
+    }
+    break;
+    case 2:
+    {
+      // Get first input and output slices
+      Window window_in{window};
+      window_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2),
+                                                    _input->info()->dimension(2)));
+      Window in_slice = window_in.first_slice_window_3D();
+      Window out_slice = window.first_slice_window_3D();
+
+      do
+      {
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, in_slice);
+        add_3D_tensor_argument(idx, _output, out_slice);
+        enqueue(queue, *this, in_slice, lws_hint());
+      } while (window_in.slide_window_slice_3D(in_slice) &&
+               window.slide_window_slice_3D(out_slice));
+    }
+    break;
+    case 3:
+    {
+      // Get first input and output slices
+      Window window_in{window};
+      window_in.set(3, Window::Dimension(0, 1, 1));
+      Window in_slice = window_in.first_slice_window_4D();
+      Window out_slice = window.first_slice_window_4D();
+
+      do
+      {
+        unsigned int idx = 0;
+        add_4D_tensor_argument(idx, _input, in_slice);
+        add_4D_tensor_argument(idx, _output, out_slice);
+        enqueue(queue, *this, in_slice, lws_hint());
+      } while (window_in.slide_window_slice_4D(in_slice) &&
+               window.slide_window_slice_4D(out_slice));
+    }
+    break;
+    default:
+      ARM_COMPUTE_ERROR("Not supported");
+  }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
index fbc76f5e1..a5daa2410 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
@@ -43,6 +43,8 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
 #include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/AccessWindowStatic.h"
 #include "support/StringSupport.h"
 
 using namespace arm_compute;
@@ -55,7 +57,7 @@ Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2,
                            const ITensorInfo *output)
 {
   const TensorShape &out_shape =
-      TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
+    TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
 
   ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8);
   ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8);
@@ -68,15 +70,15 @@ Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2,
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8,
                                                          DataType::QASYMM8);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-        detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
-        "Wrong shape for output");
+      detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
+      "Wrong shape for output");
   }
   return Status{};
 }
 } // namespace
 
 CLBinaryLogicalOpKernel::CLBinaryLogicalOpKernel()
-    : _input1(nullptr), _input2(nullptr), _output(nullptr)
+  : _input1(nullptr), _input2(nullptr), _output(nullptr)
 {
 }
 
@@ -111,13 +113,13 @@ void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor
 
   build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code)));
   build_opts.emplace(
-      ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+    ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
 
   _kernel =
-      static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
+    static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
 
   const std::pair<TensorShape, ValidRegion> broadcast_pair =
-      ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
+    ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
 
   const ValidRegion &valid_region = broadcast_pair.second;
 
@@ -130,8 +132,8 @@ void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor
   AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
 
   update_window_and_padding(win_input1, input1_access) ||
-      update_window_and_padding(win_input2, input2_access) ||
-      update_window_and_padding(win, output_access);
+    update_window_and_padding(win_input2, input2_access) ||
+    update_window_and_padding(win, output_access);
 
   output_access.set_valid_region(win, valid_region);
 
@@ -151,7 +153,7 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue)
   if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
   {
     can_collapse =
-        (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
+      (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
     for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
     {
       can_collapse = (in_shape1[d] == in_shape2[d]);
@@ -160,13 +162,13 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue)
 
   bool has_collapsed = false;
   Window collapsed =
-      can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
-                   : window;
+    can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
+                 : window;
 
   const TensorShape &in_shape1_collapsed =
-      has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
+    has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
   const TensorShape &in_shape2_collapsed =
-      has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
+    has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
 
   Window slice = collapsed.first_slice_window_3D();
   Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
@@ -189,9 +191,9 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue)
 BorderSize CLBinaryLogicalOpKernel::border_size() const
 {
   const unsigned int replicateSize =
-      _output->info()->dimension(0) -
-      std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
+    _output->info()->dimension(0) -
+    std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
   const unsigned int border =
-      std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
+    std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
   return BorderSize(0, border, 0, 0);
 }
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp
new file mode 100644
index 000000000..dc06bfbb3
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLCastBoolKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+#include "support/StringSupport.h"
+
+#include <cstddef>
+#include <set>
+#include <string>
+
+namespace arm_compute
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
+  ARM_COMPUTE_RETURN_ERROR_ON(input == output);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8,
+                                                       DataType::S16, DataType::U16, DataType::U32,
+                                                       DataType::S32, DataType::F16, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == output->data_type(),
+                                  "Input and output data types must be different");
+
+  // Validate in case of configured output
+  if (output->total_size() > 0)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+  }
+
+  return Status{};
+}
+} // namespace
+
+void CLCastBoolKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+  // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype
+  // must be given)
+  set_shape_if_empty(*output->info(), input->info()->tensor_shape());
+
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
+
+  // Get number of elements to process per iterations
+  constexpr unsigned int num_elems_processed_per_iteration = 16;
+
+  // Set build options
+  CLBuildOptions build_opts;
+  build_opts.add_option("-DVEC_SIZE=" +
+                        support::cpp11::to_string(num_elems_processed_per_iteration));
+  build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
+  build_opts.add_option("-DDATA_TYPE_OUT=" +
+                        get_cl_type_from_data_type(output->info()->data_type()));
+
+  // Create kernel
+  const std::string kernel_name = "cast_bool";
+  _kernel = static_cast<cl::Kernel>(
+    CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+
+  // Configure kernel
+  ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
+
+  // Collapse window
+  const Window &full_window = window();
+  Window collapsed_window = full_window.collapse_if_possible(full_window, Window::DimZ);
+  ICLKernel::configure_internal(collapsed_window);
+
+  // Set config_id for enabling LWS tuning
+  _config_id = kernel_name;
+  _config_id += "_";
+  _config_id += lower_string(string_from_data_type(output->info()->data_type()));
+  _config_id += "_";
+  _config_id += support::cpp11::to_string(output->info()->dimension(0));
+  _config_id += "_";
+  _config_id += support::cpp11::to_string(output->info()->dimension(1));
+}
+
+Status CLCastBoolKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+{
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+
+  return Status{};
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
index 67aaf2db6..4206f1fd4 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
@@ -43,6 +43,9 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
 #include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
 #include "support/StringSupport.h"
 
 using namespace arm_compute;
@@ -61,14 +64,14 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
   input_access.set_valid_region(win, output->valid_region());
 
   Status err = (window_changed)
-                   ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
-                   : Status{};
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
   return std::make_pair(err, win);
 }
 } // namespace
 
 CLEmbeddingLookupKernel::CLEmbeddingLookupKernel()
-    : _input(nullptr), _output(nullptr), _lookups(nullptr)
+  : _input(nullptr), _output(nullptr), _lookups(nullptr)
 {
 }
 
@@ -77,8 +80,8 @@ Status CLEmbeddingLookupKernel::validate(const ITensorInfo *input, const ITensor
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
-      DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+    DataType::U32, DataType::S32, DataType::F16, DataType::F32);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
   ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
 
@@ -108,8 +111,8 @@ void CLEmbeddingLookupKernel::configure(const ICLTensor *input, ICLTensor *outpu
   build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions()));
 
   // Create kernel
-  _kernel = static_cast<cl::Kernel>(
-      CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
+  _kernel =
+    static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
 
   // Configure kernel window
   auto win_config = validate_and_configure_window(input->info(), output->info());
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
index 3bfe3e407..62da2376e 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
@@ -45,6 +45,10 @@
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
 #include "arm_compute/core/UtilsEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 #include "support/StringSupport.h"
 
 using namespace arm_compute;
@@ -62,15 +66,15 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in
   ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= input->num_dimensions());
   ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
   ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
-      DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+    DataType::U32, DataType::S32, DataType::F16, DataType::F32);
 
   if (output->total_size() != 0)
   {
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
     TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
-        input->tensor_shape(), indices->tensor_shape(), actual_axis);
+      input->tensor_shape(), indices->tensor_shape(), actual_axis);
     ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
   }
 
@@ -86,7 +90,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
   const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions()));
   std::unique_ptr<ITensorInfo> output_info = input->clone();
   output_info->set_tensor_shape(arm_compute::misc::shape_calculator::compute_gather_shape_ex(
-      input->tensor_shape(), indices->tensor_shape(), actual_axis));
+    input->tensor_shape(), indices->tensor_shape(), actual_axis));
   // Output auto initialization if not yet initialized
   auto_init_if_empty((*output), output_info->tensor_shape(), 1, input->data_type());
 
@@ -100,7 +104,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
 } // namespace
 
 CLGatherExKernel::CLGatherExKernel()
-    : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0)
+  : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0)
 {
 }
 
@@ -109,11 +113,11 @@ void CLGatherExKernel::configure(const ICLTensor *input, const ICLTensor *indice
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices);
   ARM_COMPUTE_ERROR_THROW_ON(
-      validate_arguments(input->info(), indices->info(), output->info(), axis));
+    validate_arguments(input->info(), indices->info(), output->info(), axis));
 
   // Configure kernel window
   auto win_config =
-      validate_and_configure_window(input->info(), indices->info(), output->info(), axis);
+    validate_and_configure_window(input->info(), indices->info(), output->info(), axis);
   ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
 
   _input = input;
@@ -133,7 +137,7 @@ void CLGatherExKernel::configure(const ICLTensor *input, const ICLTensor *indice
 
   // Create kernel
   _kernel = static_cast<cl::Kernel>(
-      CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options()));
+    CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options()));
   ICLKernel::configure_internal(win_config.second);
 }
 
@@ -144,7 +148,7 @@ Status CLGatherExKernel::validate(const ITensorInfo *input, const ITensorInfo *i
   ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
                                                             indices->clone().get(),
                                                             output->clone().get(), axis)
-                                  .first);
+                                .first);
   return Status{};
 }
 
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
index 930e7c944..03ca6ddcb 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
@@ -43,6 +43,7 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
 #include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/helpers/WindowHelpers.h"
 #include "support/StringSupport.h"
 
 using namespace arm_compute;
@@ -61,8 +62,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
   input_access.set_valid_region(win, output->valid_region());
 
   Status err = (window_changed)
-                   ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
-                   : Status{};
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
   return std::make_pair(err, win);
 }
 } // namespace
@@ -78,8 +79,8 @@ Status CLHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITens
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
-      DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+    DataType::U32, DataType::S32, DataType::F16, DataType::F32);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(hits, 1, DataType::U8, DataType::QASYMM8);
@@ -102,7 +103,7 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso
                                         const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
 {
   ARM_COMPUTE_ERROR_THROW_ON(
-      validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
+    validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
 
   _lookups = lookups;
   _keys = keys;
@@ -111,9 +112,9 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso
   _hits = hits;
 
   // Make _lookup_indices tensor
-  _lookup_indices = support::cpp14::make_unique<CLTensor>();
+  _lookup_indices = std::make_unique<CLTensor>();
   _lookup_indices->allocator()->init(
-      TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
+    TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
   _lookup_indices->allocator()->allocate();
 
   // Set kernel build options
@@ -127,8 +128,8 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso
   build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions()));
 
   // Create kernel
-  _kernel = static_cast<cl::Kernel>(
-      CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
+  _kernel =
+    static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
 
   // Configure kernel window
   auto win_config = validate_and_configure_window(input->info(), output->info());
@@ -148,7 +149,7 @@ void CLHashtableLookupKernel::run(const Window &window, cl::CommandQueue &queue)
 
   // Set values of hits
   const int32_t *lookups_buf =
-      reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer());
+    reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer());
   const int32_t *keys_buf = reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_keys)->buffer());
   uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
   int32_t *lookup_indices_buf = reinterpret_cast<int32_t *>(_lookup_indices->buffer());
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
index 61c14d271..945af3c51 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
@@ -42,12 +42,16 @@
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Window.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 #include "support/StringSupport.h"
 #include "support/ToolchainSupport.h"
 
@@ -94,8 +98,8 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
 } // namespace
 
 CLInstanceNormalizationLayerKernelEx::CLInstanceNormalizationLayerKernelEx()
-    : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12),
-      _run_in_place(false)
+  : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12),
+    _run_in_place(false)
 {
 }
 
@@ -132,7 +136,7 @@ void CLInstanceNormalizationLayerKernelEx::configure(ICLTensor *input, ICLTensor
 
   // Create kernel
   _kernel = static_cast<cl::Kernel>(
-      CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options()));
+    CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options()));
 
   // Configure kernel window
   auto win_config = validate_and_configure_window(_input->info(), _output->info());
@@ -147,7 +151,7 @@ Status CLInstanceNormalizationLayerKernelEx::validate(const ITensorInfo *input,
 {
   ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon));
   ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
-      input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
+    input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
   return Status{};
 }
 
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMemsetKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMemsetKernel.cpp
new file mode 100644
index 000000000..a00fc5e2e
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMemsetKernel.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+CLMemsetKernel::CLMemsetKernel() : ICLKernel(), _tensor(nullptr), _full_window() {}
+
+void CLMemsetKernel::configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), tensor, constant_value, window);
+}
+
+void CLMemsetKernel::configure(const CLCompileContext &compile_context, ICLTensor *tensor,
+                               const PixelValue &constant_value, Window *window)
+{
+  ARM_COMPUTE_UNUSED(compile_context);
+  ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
+  ARM_COMPUTE_ERROR_THROW_ON(validate(tensor->info(), constant_value, window));
+
+  _tensor = tensor;
+
+  const DataType data_type = tensor->info()->data_type();
+  const int vec_size_x = 16 / tensor->info()->element_size();
+
+  // Create and update the window (if needed)
+  _full_window = calculate_max_window(*tensor->info());
+  Window win = _full_window;
+  if (window != nullptr)
+  {
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(win, *window);
+    win = *window;
+  }
+
+  const int output_width_x = win.num_iterations(0);
+  const bool multi_access_x = output_width_x >= vec_size_x;
+  const bool remainder_x = output_width_x % vec_size_x > 0;
+
+  if (multi_access_x)
+  {
+    win.set(
+      Window::DimX,
+      Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+  }
+  ICLKernel::configure_internal(win);
+
+  // Create kernel
+  CLBuildOptions build_opts;
+  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+  build_opts.add_option("-DCONSTANT_VALUE=" + string_from_pixel_value(constant_value, data_type));
+  build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+  build_opts.add_option_if(multi_access_x && remainder_x,
+                           "-DLAST_ACCESSED_X=" + support::cpp11::to_string(
+                                                    std::max<int>(output_width_x - vec_size_x, 0)));
+
+  _kernel =
+    static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("memset", build_opts.options()));
+}
+
+Status CLMemsetKernel::validate(const ITensorInfo *tensor, const PixelValue &constant_value,
+                                Window *window)
+{
+  ARM_COMPUTE_UNUSED(tensor);
+  ARM_COMPUTE_UNUSED(constant_value);
+  if (window != nullptr)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON(window->x().step() != 1);
+  }
+  return Status{};
+}
+
+void CLMemsetKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+  // Collapse all the batches on the third
+  Window collapsed = window.collapse_if_possible(_full_window, Window::DimZ);
+  Window slice = collapsed.first_slice_window_3D();
+
+  do
+  {
+    unsigned int idx = 0;
+    add_3D_tensor_argument(idx, _tensor, slice);
+    enqueue(queue, *this, slice, lws_hint());
+  } while (collapsed.slide_window_slice_3D(slice));
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
index 6b27c9917..da7437e97 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
@@ -40,15 +40,19 @@
 
 #include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
 
-#include "arm_compute/core/AccessWindowStatic.h"
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
+
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 #include "support/StringSupport.h"
 
 using namespace arm_compute;
@@ -99,7 +103,7 @@ std::tuple<Status, Window> validate_and_configure_window(const ITensorInfo *inpu
 } // namespace
 
 CLMultiplyScaleFactorKernel::CLMultiplyScaleFactorKernel()
-    : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
+  : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
 {
 }
 
@@ -108,7 +112,7 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
   ARM_COMPUTE_ERROR_THROW_ON(
-      validate_arguments(input->info(), scale_factor->info(), output->info()));
+    validate_arguments(input->info(), scale_factor->info(), output->info()));
 
   _input = input;
   _scale_factor = scale_factor;
@@ -123,9 +127,9 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen
   Window win = calculate_max_window(*output->info());
   if (multi_access_x)
   {
-    win.set(Window::DimX,
-            Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
-                              vec_size_x));
+    win.set(
+      Window::DimX,
+      Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
   }
   ICLKernel::configure_internal(win);
 
@@ -134,11 +138,11 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen
   build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
   build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
   build_opts.add_option_if(
-      multi_access_x, "-DLAST_ACCESSED_X=" +
-                          support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+    multi_access_x, "-DLAST_ACCESSED_X=" +
+                      support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
 
   _kernel = static_cast<cl::Kernel>(
-      CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options()));
+    CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options()));
 }
 
 Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input,
@@ -147,7 +151,7 @@ Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input,
 {
   ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
   ARM_COMPUTE_RETURN_ON_ERROR(
-      std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+    std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
   return Status{};
 }
 
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
index 643c8b110..cd5e571e9 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
@@ -43,6 +43,9 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
 #include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
 #include "support/StringSupport.h"
 
 using namespace arm_compute;
@@ -80,9 +83,9 @@ void CLNegKernel::configure(const ICLTensor *input, ICLTensor *output)
   std::set<std::string> build_opts;
   build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
   build_opts.emplace(
-      ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+    ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
   _kernel =
-      static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts));
+    static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts));
 
   // Configure window
   Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp
new file mode 100644
index 000000000..4c4cbe710
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+#include "support/StringSupport.h"
+#include <string>
+namespace arm_compute
+{
+namespace
+{
+inline Status validate_arguments(const ITensorInfo *indices, const ITensorInfo *on_value,
+                                 const ITensorInfo *output, int depth, int axis)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(indices, on_value, output);
+  const uint32_t actual_axis = wrap_around(axis, static_cast<int>(output->num_dimensions()));
+  ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
+  ARM_COMPUTE_RETURN_ERROR_ON(on_value->tensor_shape().total_size() != 1);
+  ARM_COMPUTE_RETURN_ERROR_ON(depth <= 0);
+  ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= output->num_dimensions());
+  ARM_COMPUTE_RETURN_ERROR_ON(on_value->data_type() == DataType::UNKNOWN);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(on_value, 1, DataType::U8, DataType::S8,
+                                                       DataType::U16, DataType::S16, DataType::F16,
+                                                       DataType::U32, DataType::S32, DataType::F32);
+  if (output->total_size() != 0)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, output);
+    TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex(
+      indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
+    ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
+  }
+  return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *indices,
+                                                        const ITensorInfo *on_value,
+                                                        ITensorInfo *output, int depth, int axis)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(indices, on_value, output, indices);
+  const uint32_t actual_axis = wrap_around(axis, static_cast<int>(output->num_dimensions()));
+  // Output auto initialization if not yet initialized
+  TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex(
+    indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
+  auto_init_if_empty((*output), output_shape, 1, on_value->data_type());
+  // Create window
+  Window win = calculate_max_window(*output, Steps());
+  output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+  return std::make_pair(Status{}, win);
+}
+} // namespace
+CLOneHotKernel::CLOneHotKernel()
+  : _indices(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
+    _is_off_value_memset(false)
+{
+}
+void CLOneHotKernel::configure(const ICLTensor *indices, const ICLTensor *on_value,
+                               const ICLTensor *off_value, ICLTensor *output, int depth, int axis)
+{
+  _is_off_value_memset = false;
+  ARM_COMPUTE_ERROR_ON_NULLPTR(indices, on_value, off_value, output);
+  ARM_COMPUTE_ERROR_ON_NULLPTR(off_value->info());
+  ARM_COMPUTE_ERROR_ON(off_value->info()->tensor_shape().total_size() != 1);
+  ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, off_value);
+  _off_value = off_value;
+  configure_common(indices, on_value, output, depth, axis);
+}
+void CLOneHotKernel::configure(const ICLTensor *indices, const ICLTensor *on_value,
+                               ICLTensor *output, int depth, int axis)
+{
+  _is_off_value_memset = true;
+  ARM_COMPUTE_ERROR_ON_NULLPTR(indices, on_value, output);
+  configure_common(indices, on_value, output, depth, axis);
+}
+void CLOneHotKernel::configure_common(const ICLTensor *indices, const ICLTensor *on_value,
+                                      ICLTensor *output, int depth, int axis)
+{
+  ARM_COMPUTE_ERROR_THROW_ON(
+    validate_arguments(indices->info(), on_value->info(), output->info(), depth, axis));
+  // Configure kernel window
+  auto win_config =
+    validate_and_configure_window(indices->info(), on_value->info(), output->info(), depth, axis);
+  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+  if (_is_off_value_memset)
+  {
+    // Replace window with calculated by infices info
+    win_config.second = calculate_max_window(*indices->info(), Steps());
+  }
+  _indices = indices;
+  _on_value = on_value;
+  _output = output;
+  const auto actual_axis = wrap_around(axis, static_cast<int>(output->info()->num_dimensions()));
+  // Set build options
+  CLBuildOptions build_opts;
+  build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(
+                                           data_size_from_type(on_value->info()->data_type())));
+  build_opts.add_option("-DAXIS=" + support::cpp11::to_string(actual_axis));
+  build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(depth));
+  build_opts.add_option("-DOUTPUT_DIM_Z=" +
+                        support::cpp11::to_string(output->info()->dimension(2)));
+  // Create kernel
+  const std::string kernel_name = _is_off_value_memset ? "one_hot_only_on_value" : "one_hot";
+  _kernel = static_cast<cl::Kernel>(
+    CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+  ICLKernel::configure_internal(win_config.second);
+}
+Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+                                const ITensorInfo *off_value, const ITensorInfo *output, int depth,
+                                int axis)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(off_value);
+  ARM_COMPUTE_RETURN_ERROR_ON(off_value->tensor_shape().total_size() != 1);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, off_value);
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(indices, on_value, output, depth, axis));
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(),
+                                                            on_value->clone().get(),
+                                                            output->clone().get(), depth, axis)
+                                .first);
+  return Status{};
+}
+Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+                                const ITensorInfo *output, int depth, int axis)
+{
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(indices, on_value, output, depth, axis));
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(),
+                                                            on_value->clone().get(),
+                                                            output->clone().get(), depth, axis)
+                                .first);
+  return Status{};
+}
+void CLOneHotKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+  Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+  unsigned int idx = 0;
+  add_3D_tensor_argument(idx, _indices, window_collapsed);
+  add_1D_tensor_argument(idx, _on_value, window_collapsed);
+  if (!_is_off_value_memset)
+  {
+    add_1D_tensor_argument(idx, _off_value, window_collapsed);
+  }
+  add_4D_tensor_argument(idx, _output, window_collapsed);
+  enqueue(queue, *this, window_collapsed, lws_hint());
+}
+
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernelEx.cpp
new file mode 100644
index 000000000..b6efeac35
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernelEx.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
+                          const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+  ARM_COMPUTE_UNUSED(constant_value);
+  ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
+  ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > input->num_dimensions());
+  if (mode == PaddingMode::REFLECT || mode == PaddingMode::SYMMETRIC)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 3);
+
+    const auto is_reflect = static_cast<unsigned int>(mode == PaddingMode::REFLECT);
+    for (size_t i = 0; i < padding.size(); ++i)
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).first > (input->dimension(i) - is_reflect));
+      ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).second > (input->dimension(i) - is_reflect));
+    }
+  }
+
+  if (output->total_size() > 0)
+  {
+    TensorShape padded_shape =
+      misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(output, input);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), padded_shape);
+  }
+
+  return Status{};
+}
+
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PaddingList &padding,
+                              PixelValue constant_value, PaddingMode mode,
+                              unsigned int &num_elems_processed_per_iteration)
+{
+  ARM_COMPUTE_UNUSED(constant_value, mode);
+
+  const TensorShape padded_shape =
+    misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
+  auto_init_if_empty(*output, input->clone()->set_tensor_shape(padded_shape));
+
+  num_elems_processed_per_iteration =
+    std::min(16U, 32U / static_cast<unsigned int>(element_size_from_data_type(input->data_type())));
+  if (input->dimension(0) < num_elems_processed_per_iteration)
+  {
+    num_elems_processed_per_iteration =
+      1 << static_cast<unsigned int>(std::log2(input->dimension(0)));
+  }
+
+  // Configure kernel window
+  Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+
+  const int input_start_x =
+    mode == PaddingMode::CONSTANT ? -(padding.at(0).first % num_elems_processed_per_iteration) : 0;
+  const int input_start_y =
+    (mode == PaddingMode::CONSTANT && padding.size() > 1) ? -padding.at(1).first : 0;
+
+  AccessWindowRectangle input_access(input, input_start_x, input_start_y,
+                                     num_elems_processed_per_iteration, 1);
+  AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+
+  const bool window_changed = update_window_and_padding(win, input_access, output_access);
+  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+  Status err = (window_changed)
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
+  return std::make_pair(err, win);
+}
+} // namespace
+
+CLPadLayerKernelEx::CLPadLayerKernelEx()
+  : _input(nullptr), _output(nullptr), _input_start_x(0), _input_start_y(0), _4d_enabled(false)
+{
+}
+
+void CLPadLayerKernelEx::configure(const ICLTensor *input, ICLTensor *output,
+                                   const PaddingList &padding, PixelValue constant_value,
+                                   PaddingMode mode)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value,
+            mode);
+}
+
+void CLPadLayerKernelEx::configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                                   ICLTensor *output, const PaddingList &padding,
+                                   PixelValue constant_value, PaddingMode mode)
+{
+  ARM_COMPUTE_UNUSED(compile_context);
+  // Perform validation step
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+  ARM_COMPUTE_ERROR_THROW_ON(
+    validate_arguments(input->info(), output->info(), padding, constant_value, mode));
+
+  _input = input;
+  _output = output;
+  _4d_enabled = (mode == PaddingMode::CONSTANT) && (padding.size() > 3);
+
+  // Configure window
+  unsigned int vec_size;
+  auto win_config = validate_and_configure_window(input->info(), output->info(), padding,
+                                                  constant_value, mode, vec_size);
+  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+  ICLKernel::configure_internal(win_config.second);
+
+  // Set build options
+  std::string kernel_name = "pad_layer_";
+
+  const DataType &data_type = input->info()->data_type();
+  const unsigned int input_width = input->info()->dimension(0);
+  const unsigned int input_height = input->info()->dimension(1);
+  const unsigned int input_depth = input->info()->dimension(2);
+  const unsigned int pad_x_before = padding.at(0).first;
+  const unsigned int pad_y_before = padding.size() > 1 ? padding.at(1).first : 0;
+  const unsigned int pad_z_before = padding.size() > 2 ? padding.at(2).first : 0;
+  const unsigned int pad_right_start = input_width + pad_x_before;
+
+  _input_start_x = mode == PaddingMode::CONSTANT ? -(pad_x_before % vec_size) : 0;
+  _input_start_y = (mode == PaddingMode::CONSTANT && padding.size() > 1) ? -padding.at(1).first : 0;
+
+  CLBuildOptions build_opts;
+  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+  build_opts.add_option("-DSELECT_DT=" + get_cl_select_type_from_data_type(data_type));
+  build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size));
+  build_opts.add_option("-DPAD_X_BEFORE=" + support::cpp11::to_string(pad_x_before));
+  build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input_width));
+  if (padding.size() > 1)
+  {
+    build_opts.add_option("-DPAD_Y_BEFORE=" + support::cpp11::to_string(pad_y_before));
+    build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input_height));
+
+    if (padding.size() > 2)
+    {
+      build_opts.add_option("-DPAD_Z_BEFORE=" + support::cpp11::to_string(pad_z_before));
+      build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input_depth));
+    }
+  }
+
+  switch (mode)
+  {
+    case PaddingMode::CONSTANT:
+    {
+      kernel_name += "constant";
+
+      build_opts.add_option("-DCONST_VAL=" + string_from_pixel_value(constant_value, data_type));
+      build_opts.add_option_if(pad_x_before >= vec_size,
+                               "-DNUM_THREADS_TO_SKIP_X=" +
+                                 support::cpp11::to_string(pad_x_before / vec_size));
+
+      if (_4d_enabled)
+      {
+        build_opts.add_option("-DPAD_W_BEFORE=" + support::cpp11::to_string(padding.at(3).first));
+        build_opts.add_option("-DSRC_BATCH=" +
+                              support::cpp11::to_string(input->info()->dimension(3)));
+      }
+
+      break;
+    }
+    case PaddingMode::SYMMETRIC:
+    case PaddingMode::REFLECT:
+    {
+      kernel_name += "symmetric_reflect";
+
+      const auto is_reflect = static_cast<unsigned int>(mode == PaddingMode::REFLECT);
+
+      const unsigned int pad_x_before_remainder = pad_x_before % vec_size;
+      const unsigned int pad_x_after_remainder = pad_right_start % vec_size;
+      const unsigned int after_pad_fact_x = (2 * input_width + pad_x_before) - is_reflect;
+      const unsigned int output_last_x =
+        ceil_to_multiple(pad_right_start + padding.at(0).second, vec_size);
+
+      build_opts.add_option("-DIS_REFLECT=" + support::cpp11::to_string(is_reflect));
+      build_opts.add_option("-DPAD_X_BEFORE_REMAINDER=" +
+                            support::cpp11::to_string(pad_x_before_remainder));
+      build_opts.add_option("-DPAD_X_AFTER_REMAINDER=" +
+                            support::cpp11::to_string(pad_x_after_remainder));
+      build_opts.add_option(
+        "-DPAD_X_BEFORE_REMAINDER_REFL=" +
+        support::cpp11::to_string((pad_x_before_remainder + is_reflect) % vec_size));
+      build_opts.add_option(
+        "-DPAD_X_AFTER_REMAINDER_REFL=" +
+        support::cpp11::to_string((pad_x_after_remainder - is_reflect) % vec_size));
+      build_opts.add_option("-DAFTER_PAD_FACT_X=" + support::cpp11::to_string(after_pad_fact_x));
+      build_opts.add_option_if(after_pad_fact_x < output_last_x,
+                               "-DAFTER_PAD_REM=" +
+                                 support::cpp11::to_string(after_pad_fact_x % vec_size));
+
+      break;
+    }
+    default:
+      ARM_COMPUTE_ERROR("Padding mode not supported.");
+  }
+
+  // Create kernel
+  _kernel = static_cast<cl::Kernel>(
+    CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+}
+
+Status CLPadLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+                                    const PaddingList &padding, PixelValue constant_value,
+                                    PaddingMode mode)
+{
+  unsigned int vec_size;
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, padding, constant_value, mode));
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
+                                                            output->clone().get(), padding,
+                                                            constant_value, mode, vec_size)
+                                .first);
+
+  return Status{};
+}
+
+void CLPadLayerKernelEx::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+  Window win_in = window;
+  win_in.adjust(Window::DimX, _input_start_x, true);
+  win_in.adjust(Window::DimY, _input_start_y, true);
+
+  Window slice_out = window.first_slice_window_3D();
+  Window slice_in = win_in.first_slice_window_3D();
+  unsigned int batch = 0;
+  do
+  {
+    unsigned int idx = 0;
+    add_3D_tensor_argument(idx, _input, slice_in);
+    add_3D_tensor_argument(idx, _output, slice_out);
+    if (_4d_enabled)
+    {
+      add_argument<unsigned int>(idx, batch++);
+    }
+
+    enqueue(queue, *this, slice_out, lws_hint());
+  } while (window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
index 1a7a18cfa..9aa815f55 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
@@ -40,15 +40,19 @@
 
 #include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
 
-#include "arm_compute/core/AccessWindowStatic.h"
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/CLValidate.h"
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 #include "support/StringSupport.h"
 
 namespace arm_compute
@@ -87,9 +91,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
 
   if (multi_access_x)
   {
-    win.set(Window::DimX,
-            Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
-                              vec_size_x));
+    win.set(
+      Window::DimX,
+      Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
   }
 
   Coordinates coord;
@@ -101,7 +105,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
 } // namespace
 
 CLQuantizationSymmetricKernel::CLQuantizationSymmetricKernel()
-    : _input(nullptr), _scale_factor(nullptr), _output(nullptr)
+  : _input(nullptr), _scale_factor(nullptr), _output(nullptr)
 {
 }
 
@@ -110,7 +114,7 @@ void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLT
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, scale_factor, output);
   ARM_COMPUTE_ERROR_THROW_ON(
-      validate_arguments(input->info(), scale_factor->info(), output->info()));
+    validate_arguments(input->info(), scale_factor->info(), output->info()));
 
   _input = input;
   _scale_factor = scale_factor;
@@ -132,11 +136,11 @@ void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLT
   build_opts.add_option("-DDATA_TYPE_OUT=" +
                         get_cl_type_from_data_type(output->info()->data_type()));
   build_opts.add_option_if(
-      multi_access_x, "-DLAST_ACCESSED_X=" +
-                          support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
+    multi_access_x,
+    "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
 
   _kernel = static_cast<cl::Kernel>(
-      CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options()));
+    CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options()));
 }
 
 Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input,
@@ -145,7 +149,7 @@ Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input,
 {
   ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
   ARM_COMPUTE_RETURN_ON_ERROR(
-      validate_and_configure_window(input->clone().get(), output->clone().get()).first);
+    validate_and_configure_window(input->clone().get(), output->clone().get()).first);
 
   return Status{};
 }
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
index 06c2579f2..70374ba61 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
@@ -43,6 +43,9 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
 #include "arm_compute/core/CL/ICLTensor.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+
 #include "support/StringSupport.h"
 
 using namespace arm_compute;
@@ -63,7 +66,7 @@ const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_
 namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
-                          ReduceOperation op)
+                          ReductionOperation op)
 {
   ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
 
@@ -74,7 +77,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
 
   ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
                                                        DataType::F32, DataType::S32);
-  if (op == ReduceOperation::SUM)
+  if (op == ReductionOperation::SUM)
   {
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8,
                                     "Not support QASYMM8, yet");
@@ -98,7 +101,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
 CLReduceOperationKernel::CLReduceOperationKernel() : _input(nullptr), _output(nullptr), _axis() {}
 
 void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *output,
-                                        const uint32_t axis, ReduceOperation op)
+                                        const uint32_t axis, ReductionOperation op)
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
@@ -114,22 +117,22 @@ void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *outpu
   // Construct kernel name
   std::string kernel_name;
   int op_code = 0;
-  if (op == ReduceOperation::MAX)
+  if (op == ReductionOperation::MAX)
   {
     kernel_name = "reduce_min_max";
     op_code = 1;
   }
-  else if (op == ReduceOperation::MIN)
+  else if (op == ReductionOperation::MIN)
   {
     kernel_name = "reduce_min_max";
     op_code = 2;
   }
-  else if (op == ReduceOperation::SUM)
+  else if (op == ReductionOperation::SUM)
   {
     kernel_name = "reduce_sum_mean";
     op_code = 3;
   }
-  else if (op == ReduceOperation::MEAN)
+  else if (op == ReductionOperation::MEAN_SUM)
   {
     kernel_name = "reduce_sum_mean";
     op_code = 4;
@@ -145,7 +148,7 @@ void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *outpu
 
   // Create kernel
   _kernel =
-      static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
+    static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
 
   // Configure  kernel window
   Window win = calculate_max_window(*output_info, Steps());
@@ -158,7 +161,7 @@ void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *outpu
 }
 
 Status CLReduceOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                         const uint32_t axis, ReduceOperation op)
+                                         const uint32_t axis, ReductionOperation op)
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
   ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op));
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
index 8d8853c81..c9d6dc31c 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
@@ -40,7 +40,7 @@
 
 #include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
 
-#include "arm_compute/core/AccessWindowStatic.h"
+#include "src/core/AccessWindowStatic.h"
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibraryEx.h"
 #include "arm_compute/core/CL/ICLTensor.h"
@@ -48,6 +48,10 @@
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 #include "support/StringSupport.h"
 
 #include <climits>
@@ -94,8 +98,8 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
   output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
 
   Status err = (window_changed)
-                   ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
-                   : Status{};
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
   return std::make_tuple(err, win);
 }
 } // namespace
@@ -115,7 +119,7 @@ void CLScaleFactorSymm8Kernel::configure(const ICLTensor *input, ICLTensor *outp
 
   // Create kernel
   _kernel = static_cast<cl::Kernel>(
-      CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts));
+    CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts));
 
   auto win_config = validate_and_configure_window(input->info(), output->info());
 
@@ -128,7 +132,7 @@ Status CLScaleFactorSymm8Kernel::validate(const ITensorInfo *input, const ITenso
 {
   ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
   ARM_COMPUTE_RETURN_ON_ERROR(
-      std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+    std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
 
   return Status{};
 }
diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp
deleted file mode 100644
index 480532388..000000000
--- a/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-
-namespace arm_compute
-{
-CPPOneHotKernelEx::CPPOneHotKernelEx()
-    : _indices(nullptr), _depth(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
-      _axis(-1)
-{
-}
-
-void CPPOneHotKernelEx::configure(const ITensor *indices, const ITensor *depth,
-                                  const ITensor *on_value, const ITensor *off_value,
-                                  ITensor *output, const int axis)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(indices, output);
-  ARM_COMPUTE_ERROR_THROW_ON(validate(indices, depth, on_value, off_value, axis));
-
-  _indices = indices;
-  _depth = depth;
-  _on_value = on_value;
-  _off_value = off_value;
-  _output = output;
-  _axis = axis;
-
-  ICPPKernel::configure(Window()); // Default 1 iteration window
-}
-
-Status CPPOneHotKernelEx::validate(const ITensor *indices, const ITensor *depth,
-                                   const ITensor *on_value, const ITensor *off_value,
-                                   const int axis)
-{
-  ARM_COMPUTE_UNUSED(on_value, off_value);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(indices, DataType::S32);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(depth, DataType::S32);
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices->info()->num_dimensions() != 1,
-                                  "Only 1D indices are supported.");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis != -1, "Only axis = -1 is supported.");
-  return Status{};
-}
-
-bool CPPOneHotKernelEx::is_parallelisable() const { return false; }
-
-void CPPOneHotKernelEx::run(const Window &window, const ThreadInfo &info)
-{
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window);
-
-  const auto num_indices = _indices->info()->dimension(0);
-  const auto depth = *reinterpret_cast<int32_t *>(_depth->ptr_to_element(Coordinates{0}));
-  const auto dtype = _output->info()->data_type();
-  switch (dtype)
-  {
-    case DataType::F32:
-    {
-      const auto on_value = *reinterpret_cast<float *>(_on_value->ptr_to_element(Coordinates{0}));
-      const auto off_value = *reinterpret_cast<float *>(_off_value->ptr_to_element(Coordinates{0}));
-      for (size_t i = 0; i < num_indices; ++i)
-      {
-        const auto index = *reinterpret_cast<int32_t *>(_indices->ptr_to_element(Coordinates{i}));
-        for (int d = 0; d < depth; ++d)
-          *reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(d, i))) =
-              (d == index) ? on_value : off_value;
-      }
-      break;
-    }
-    default:
-      ARM_COMPUTE_ERROR("Unsupported data type.");
-  }
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
index 254c33ea9..1d4d33ac2 100644
--- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
@@ -42,29 +42,23 @@
 
 #include <algorithm>
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
+#include "src/core/NEON/NEAsymm.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Window.h"
 
 namespace
 {
-void store_quantized_int32(uint8_t *output_ptr, const int32x4x4_t &out)
-{
-  const uint8x8_t pa = vqmovun_s16(vcombine_s16(vqmovn_s32(out.val[0]), vqmovn_s32(out.val[1])));
-  const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(out.val[2]), vqmovn_s32(out.val[3])));
-  vst1q_u8(output_ptr, vcombine_u8(pa, pb));
-}
 
 using namespace arm_compute;
 template <typename InputScalarType, typename OutputScalarType, typename InputVectorType>
 void elementwise_op_templ(
-    const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
-    OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &),
-    int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &,
-                          OutputScalarType *, const bool),
-    int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *,
-                     OutputScalarType *))
+  const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
+  OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &),
+  int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &,
+                        OutputScalarType *, const bool),
+  int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *,
+                   OutputScalarType *))
 {
   // Create input windows
   Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());
@@ -94,26 +88,26 @@ void elementwise_op_templ(
     Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);
     Iterator output(out, win);
 
-    execute_window_loop(win,
-                        [&](const Coordinates &) {
-                          auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
-                          const auto non_broadcast_input_ptr =
-                              reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr());
-                          const InputScalarType broadcast_value =
-                              *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr());
-
-                          int x = (*broadcast_func)(window_start_x, window_end_x, window_step_x,
-                                                    non_broadcast_input_ptr, broadcast_value,
-                                                    output_ptr, !is_broadcast_input_2);
-                          for (; x < window_end_x; ++x)
-                          {
-                            const auto a = *(non_broadcast_input_ptr + x);
-                            *(output_ptr + x) =
-                                (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a,
-                                               !is_broadcast_input_2 ? a : broadcast_value);
-                          }
-                        },
-                        broadcast_input, non_broadcast_input, output);
+    execute_window_loop(
+      win,
+      [&](const Coordinates &) {
+        auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
+        const auto non_broadcast_input_ptr =
+          reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr());
+        const InputScalarType broadcast_value =
+          *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr());
+
+        int x =
+          (*broadcast_func)(window_start_x, window_end_x, window_step_x, non_broadcast_input_ptr,
+                            broadcast_value, output_ptr, !is_broadcast_input_2);
+        for (; x < window_end_x; ++x)
+        {
+          const auto a = *(non_broadcast_input_ptr + x);
+          *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a,
+                                             !is_broadcast_input_2 ? a : broadcast_value);
+        }
+      },
+      broadcast_input, non_broadcast_input, output);
   }
   else
   {
@@ -125,24 +119,23 @@ void elementwise_op_templ(
     Iterator input2(in2, input2_win);
     Iterator output(out, win);
 
-    execute_window_loop(win,
-                        [&](const Coordinates &) {
-                          auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
-                          const auto input1_ptr =
-                              reinterpret_cast<const InputScalarType *>(input1.ptr());
-                          const auto input2_ptr =
-                              reinterpret_cast<const InputScalarType *>(input2.ptr());
-
-                          int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
-                                               input1_ptr, input2_ptr, output_ptr);
-                          for (; x < window_end_x; ++x)
-                          {
-                            const auto a = *(input1_ptr + x);
-                            const auto b = *(input2_ptr + x);
-                            *(output_ptr + x) = (*scalar_func)(a, b);
-                          }
-                        },
-                        input1, input2, output);
+    execute_window_loop(
+      win,
+      [&](const Coordinates &) {
+        auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
+        const auto input1_ptr = reinterpret_cast<const InputScalarType *>(input1.ptr());
+        const auto input2_ptr = reinterpret_cast<const InputScalarType *>(input2.ptr());
+
+        int x = (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr,
+                             output_ptr);
+        for (; x < window_end_x; ++x)
+        {
+          const auto a = *(input1_ptr + x);
+          const auto b = *(input2_ptr + x);
+          *(output_ptr + x) = (*scalar_func)(a, b);
+        }
+      },
+      input1, input2, output);
   }
 }
 
@@ -151,195 +144,6 @@ void elementwise_op_templ(
 namespace arm_compute
 {
 
-float32x4x4_t load_quantized(const uint8_t *input1_ptr, const int32x4_t &offset,
-                             const float32x4_t &scale)
-{
-  qasymm8x16_t x = vld1q_u8(input1_ptr);
-  const float32x4x4_t out = {{
-      vmulq_f32(
-          vcvtq_f32_s32(vsubq_s32(
-              vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(x))))), offset)),
-          scale),
-      vmulq_f32(
-          vcvtq_f32_s32(vsubq_s32(
-              vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(x))))), offset)),
-          scale),
-      vmulq_f32(
-          vcvtq_f32_s32(vsubq_s32(
-              vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(x))))), offset)),
-          scale),
-      vmulq_f32(
-          vcvtq_f32_s32(vsubq_s32(
-              vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(x))))), offset)),
-          scale),
-  }};
-  return out;
-}
-
-void store_quantized(uint8_t *output_ptr, const float32x4x4_t &rf, const float32x4_t &offset,
-                     const float32x4_t &invscale)
-{
-  int32x4x4_t out = {{
-      vcvtq_s32_f32(vmlaq_f32(offset, rf.val[0], invscale)),
-      vcvtq_s32_f32(vmlaq_f32(offset, rf.val[1], invscale)),
-      vcvtq_s32_f32(vmlaq_f32(offset, rf.val[2], invscale)),
-      vcvtq_s32_f32(vmlaq_f32(offset, rf.val[3], invscale)),
-  }};
-  store_quantized_int32(output_ptr, out);
-}
-
-float32x4x4_t dup_quantized(uint8_t broadcast_value, int offset, float scale)
-{
-  const qasymm8x16_t broadcast_value_vec = vdupq_n_u8(broadcast_value);
-  const int32x4_t voffset = vdupq_n_s32(offset);
-  const float32x4_t vscale = vdupq_n_f32(scale);
-
-  const float32x4x4_t broadcast_vector = {{
-      vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(
-                                            vmovl_u8(vget_low_u8(broadcast_value_vec))))),
-                                        voffset)),
-                vscale),
-      vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(
-                                            vmovl_u8(vget_low_u8(broadcast_value_vec))))),
-                                        voffset)),
-                vscale),
-      vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(
-                                            vmovl_u8(vget_high_u8(broadcast_value_vec))))),
-                                        voffset)),
-                vscale),
-      vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(
-                                            vmovl_u8(vget_high_u8(broadcast_value_vec))))),
-                                        voffset)),
-                vscale),
-  }};
-  return broadcast_vector;
-}
-
-void elementwise_op_quantized(
-    const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
-    uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo),
-    int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t,
-                          float32x4_t, float32x4_t, float32x4_t, const bool),
-    int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *, int32x4_t,
-                     int32x4_t, float32x4_t, float32x4_t, float32x4_t, float32x4_t))
-{
-  // Create input windows
-  Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());
-  Window input2_win = window.broadcast_if_dimension_le_one(in2->info()->tensor_shape());
-
-  // Clear X Dimension on execution window as we handle manually
-  Window win = window;
-  win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-  const int window_step_x = 16;
-  const auto window_start_x = static_cast<int>(window.x().start());
-  const auto window_end_x = static_cast<int>(window.x().end());
-  const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
-
-  UniformQuantizationInfo qinfo = out->info()->quantization_info().uniform();
-  const float output_scale = qinfo.scale;
-  const int output_offset = qinfo.offset;
-
-  // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from
-  // zero)
-  const float32x4_t voffseto = vdupq_n_f32(output_offset + 0.5f);
-  const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale);
-
-  if (is_broadcast_across_x)
-  {
-    // Select the broadcast input on the X axis
-    const bool is_broadcast_input_2 = input2_win.x().step() == 0;
-    Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win;
-    Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win;
-    const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
-    const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
-
-    const UniformQuantizationInfo broadcast_qinfo =
-        broadcast_tensor->info()->quantization_info().uniform();
-    const UniformQuantizationInfo non_broadcast_qinfo =
-        non_broadcast_tensor->info()->quantization_info().uniform();
-
-    const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset);
-    const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale);
-
-    // Clear X Dimension on execution window as we handle manually
-    non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-    Iterator broadcast_input(broadcast_tensor, broadcast_win);
-    Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);
-    Iterator output(out, win);
-
-    execute_window_loop(
-        win,
-        [&](const Coordinates &) {
-          const auto non_broadcast_input_ptr =
-              reinterpret_cast<const uint8_t *>(non_broadcast_input.ptr());
-          const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
-
-          const uint8_t broadcast_value = *reinterpret_cast<const uint8_t *>(broadcast_input.ptr());
-          const float32x4x4_t broadcast_vector =
-              dup_quantized(broadcast_value, broadcast_qinfo.offset, broadcast_qinfo.scale);
-
-          int x = (*broadcast_func)(window_start_x, window_end_x, window_step_x,
-                                    non_broadcast_input_ptr, broadcast_vector, output_ptr,
-                                    voffset_non_broadcast, vscale_non_broadcast, voffseto,
-                                    invvscaleo, !is_broadcast_input_2);
-          for (; x < window_end_x; ++x)
-          {
-            const float afs =
-                dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo);
-            const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo);
-            *(output_ptr + x) =
-                (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs,
-                               out->info()->quantization_info());
-          }
-        },
-        broadcast_input, non_broadcast_input, output);
-  }
-  else
-  {
-    // Input1 quantization info
-    UniformQuantizationInfo qinfo = in1->info()->quantization_info().uniform();
-    const int32x4_t voffset1 = vdupq_n_s32(qinfo.offset);
-    const float32x4_t vscale1 = vdupq_n_f32(qinfo.scale);
-
-    // Input2 quantization info
-    qinfo = in2->info()->quantization_info().uniform();
-    const int32x4_t voffset2 = vdupq_n_s32(qinfo.offset);
-    const float32x4_t vscale2 = vdupq_n_f32(qinfo.scale);
-
-    // Clear X Dimension on execution window as we handle manually
-    input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
-    input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-    const QuantizationInfo input1_qinfo = in1->info()->quantization_info();
-    const QuantizationInfo input2_qinfo = in2->info()->quantization_info();
-
-    Iterator input1(in1, input1_win);
-    Iterator input2(in2, input2_win);
-    Iterator output(out, win);
-
-    execute_window_loop(win,
-                        [&](const Coordinates &) {
-                          const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
-                          const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
-                          const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
-
-                          int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
-                                               input1_ptr, input2_ptr, output_ptr, voffset1,
-                                               voffset2, vscale1, vscale2, voffseto, invvscaleo);
-                          for (; x < window_end_x; ++x)
-                          {
-                            const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo);
-                            const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo);
-                            *(output_ptr + x) =
-                                (*scalar_func)(afs, bfs, out->info()->quantization_info());
-                          }
-                        },
-                        input1, input2, output);
-  }
-}
-
 void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
                     float (*scalar_func)(const float &, const float &),
                     int (*broadcast_func)(int, int, int, const float *, const float &, float *,
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp
deleted file mode 100644
index 648705ba9..000000000
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/NEFixedPoint.h"
-#include "arm_compute/core/NEON/NEMath.h"
-#include "arm_compute/core/NEON/NESymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-#include <array>
-#include <cmath>
-#include <map>
-#include <set>
-
-using namespace arm_compute;
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
-                          const ActivationLayerInfo &activation_info)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
-
-  static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = {
-      ActivationLayerInfo::ActivationFunction::RELU,
-      ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
-      ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
-      ActivationLayerInfo::ActivationFunction::LOGISTIC,
-      ActivationLayerInfo::ActivationFunction::TANH};
-  static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = {
-      ActivationLayerInfo::ActivationFunction::LOGISTIC,
-      ActivationLayerInfo::ActivationFunction::TANH};
-  const DataType data_type = input->data_type();
-  const QuantizationInfo &oq_info =
-      (output != nullptr) ? output->quantization_info() : input->quantization_info();
-  const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-      is_data_type_quantized_asymmetric(data_type) &&
-          (qasymm8_supported_activations.count(f_act) == 0),
-      "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) &&
-                                      (qsymm16_supported_activations.count(f_act) == 0),
-                                  "For QSYMM16 only tanh and logistic are supported");
-  ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) &&
-                              (f_act == ActivationLayerInfo::ActivationFunction::TANH) &&
-                              (oq_info != QuantizationInfo(1.f / 128.f, 128)));
-  ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) &&
-                              (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) &&
-                              (oq_info != QuantizationInfo(1.f / 256.f, 0)));
-
-  ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) &&
-                              (f_act == ActivationLayerInfo::ActivationFunction::TANH) &&
-                              (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
-  ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) &&
-                              (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) &&
-                              (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
-
-  // Checks performed when output is configured
-  if ((output != nullptr) && (output->total_size() != 0))
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-  }
-
-  return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
-  // Configure kernel window
-  Window win = calculate_max_window(*input, Steps());
-
-  if (output != nullptr)
-  {
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output, *input->clone());
-
-    // NEActivationLayerKernelEx doesn't need padding so update_window_and_padding() can be skipped
-    Coordinates coord;
-    coord.set_num_dimensions(output->num_dimensions());
-    output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
-  }
-
-  return std::make_pair(Status{}, win);
-}
-
-inline uint32x4_t vreinterpret_unsigend_int(const float32x4_t &vec)
-{
-  return vreinterpretq_u32_f32(vec);
-}
-
-inline float32x4_t vreinterpret_floating_point(const uint32x4_t &vec)
-{
-  return vreinterpretq_f32_u32(vec);
-}
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-inline uint16x8_t vreinterpret_unsigend_int(const float16x8_t &vec)
-{
-  return vreinterpretq_u16_f16(vec);
-}
-inline float16x8_t vreinterpret_floating_point(const uint16x8_t &vec)
-{
-  return vreinterpretq_f16_u16(vec);
-}
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
-} // namespace
-
-NEActivationLayerKernelEx::NEActivationLayerKernelEx()
-    : _input(nullptr), _output(nullptr), _func(nullptr), _act_info()
-{
-}
-
-void NEActivationLayerKernelEx::configure(ITensor *input, ITensor *output,
-                                          ActivationLayerInfo activation_info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
-  _input = input;
-  _act_info = activation_info;
-  _output = input;
-
-  // Out-of-place calculation
-  if (output != nullptr)
-  {
-    _output = output;
-  }
-
-  // Disabled activation, thus no operation needed
-  if (!activation_info.enabled())
-  {
-    _func = nullptr;
-  }
-
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(
-      input->info(), (output != nullptr) ? output->info() : nullptr, activation_info));
-
-  // Activation functions : FP32
-  static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 = {
-      {ActivationFunction::ABS,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float>},
-      {ActivationFunction::LINEAR,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float>},
-      {ActivationFunction::LOGISTIC,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float>},
-      {ActivationFunction::RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float>},
-      {ActivationFunction::BOUNDED_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float>},
-      {ActivationFunction::LU_BOUNDED_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float>},
-      {ActivationFunction::LEAKY_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float>},
-      {ActivationFunction::SOFT_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float>},
-      {ActivationFunction::ELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float>},
-      {ActivationFunction::SQRT,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float>},
-      {ActivationFunction::SQUARE,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float>},
-      {ActivationFunction::TANH,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float>},
-      {ActivationFunction::IDENTITY,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float>},
-  };
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-  // Activation functions : FP16
-  static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 = {
-      {ActivationFunction::ABS,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float16_t>},
-      {ActivationFunction::LINEAR,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float16_t>},
-      {ActivationFunction::LOGISTIC,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float16_t>},
-      {ActivationFunction::RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float16_t>},
-      {ActivationFunction::BOUNDED_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float16_t>},
-      {ActivationFunction::LU_BOUNDED_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t>},
-      {ActivationFunction::LEAKY_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float16_t>},
-      {ActivationFunction::SOFT_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float16_t>},
-      {ActivationFunction::ELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float16_t>},
-      {ActivationFunction::SQRT,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float16_t>},
-      {ActivationFunction::SQUARE,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float16_t>},
-      {ActivationFunction::TANH,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float16_t>},
-      {ActivationFunction::IDENTITY,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float16_t>},
-  };
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
-
-  // Activation functions : QASYMM8
-  static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 = {
-      {ActivationFunction::LOGISTIC,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qasymm8_t>},
-      {ActivationFunction::BOUNDED_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t>},
-      {ActivationFunction::LU_BOUNDED_RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t>},
-      {ActivationFunction::RELU,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, qasymm8_t>},
-      {ActivationFunction::TANH,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qasymm8_t>},
-      {ActivationFunction::IDENTITY,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, qasymm8_t>},
-  };
-
-  // Activation functions : QSYMM16
-  static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 = {
-      {ActivationFunction::LOGISTIC,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qsymm16_t>},
-      {ActivationFunction::TANH,
-       &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qsymm16_t>},
-  };
-
-  switch (input->info()->data_type())
-  {
-    case DataType::QASYMM8:
-      _func = act_map_qasymm8[activation_info.activation()];
-      break;
-    case DataType::QSYMM16:
-      _func = act_map_qsymm16[activation_info.activation()];
-      break;
-    case DataType::F32:
-      _func = act_map_f32[activation_info.activation()];
-      break;
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-    case DataType::F16:
-      _func = act_map_f16[activation_info.activation()];
-      break;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-    default:
-      ARM_COMPUTE_ERROR("Unsupported data type.");
-  }
-
-  // Configure kernel window
-  auto win_config =
-      validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
-  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-  ICPPKernel::configure(win_config.second);
-}
-
-template <ActivationLayerInfo::ActivationFunction F, typename T>
-typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
-NEActivationLayerKernelEx::activation(const Window &window)
-{
-  /** NEON vector tag type. */
-  using ExactTagType =
-      typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
-
-  const int window_step_x = 16 / sizeof(T);
-  const auto window_start_x = static_cast<int>(window.x().start());
-  const auto window_end_x = static_cast<int>(window.x().end());
-  const ActivationFunction act = F;
-
-  Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
-  win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-  Iterator input(_input, win_collapsed);
-  Iterator output(_output, win_collapsed);
-
-  const auto infinity = wrapper::vdup_n(std::numeric_limits<T>::infinity(), ExactTagType{});
-  const auto epsilon = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
-  const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
-  const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
-  const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{});
-  const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{});
-  const auto a = static_cast<T>(_act_info.a());
-  const auto b = static_cast<T>(_act_info.b());
-
-  execute_window_loop(
-      win_collapsed,
-      [&](const Coordinates &) {
-        const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
-        const auto output_ptr = reinterpret_cast<T *>(output.ptr());
-
-        wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
-
-        // Compute S elements per iteration
-        int x = window_start_x;
-
-        for (; x <= (window_end_x - window_step_x); x += window_step_x)
-        {
-          const auto vin = wrapper::vloadq(input_ptr + x);
-          switch (act)
-          {
-            case ActivationFunction::ABS:
-              tmp = wrapper::vabs(vin);
-              break;
-            case ActivationFunction::LINEAR:
-              tmp = wrapper::vmla(vb, va, vin);
-              break;
-            case ActivationFunction::LOGISTIC:
-              // exp(-vin)
-              tmp = wrapper::vexpq(wrapper::vneg(vin));
-
-              // NaN -> INF
-              tmp = vreinterpret_floating_point(wrapper::vorr(
-                  wrapper::vand(wrapper::vnot(wrapper::vceq(tmp, tmp)),
-                                vreinterpret_unsigend_int(infinity)),
-                  wrapper::vand(wrapper::vceq(tmp, tmp), vreinterpret_unsigend_int(tmp))));
-
-              // 1 / 1 + tmp
-              tmp = wrapper::vinv(wrapper::vadd(const_1, tmp));
-              break;
-            case ActivationFunction::RELU:
-              tmp = wrapper::vmax(const_0, vin);
-              break;
-            case ActivationFunction::BOUNDED_RELU:
-              tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
-              break;
-            case ActivationFunction::LU_BOUNDED_RELU:
-              tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
-              break;
-            case ActivationFunction::LEAKY_RELU:
-              tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
-              break;
-            case ActivationFunction::SOFT_RELU:
-              tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin)));
-              break;
-            case ActivationFunction::ELU:
-              tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin,
-                                  wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
-              break;
-            case ActivationFunction::SQRT:
-              tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon));
-              break;
-            case ActivationFunction::SQUARE:
-              tmp = wrapper::vmul(vin, vin);
-              break;
-            case ActivationFunction::TANH:
-              tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
-              break;
-            case ActivationFunction::IDENTITY:
-              tmp = vin;
-              break;
-            default:
-              ARM_COMPUTE_ERROR("Unsupported activation function");
-          }
-          wrapper::vstore(output_ptr + x, tmp);
-        }
-
-        // Compute left-over elements
-        for (; x < window_end_x; ++x)
-        {
-          const T in = *(reinterpret_cast<const T *>(input_ptr + x));
-          T tmp;
-          switch (act)
-          {
-            case ActivationFunction::ABS:
-              tmp = std::abs(in);
-              break;
-            case ActivationFunction::LINEAR:
-              tmp = a * in + b;
-              break;
-            case ActivationFunction::LOGISTIC:
-              tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
-              break;
-            case ActivationFunction::RELU:
-              tmp = std::max<T>(static_cast<T>(0), in);
-              break;
-            case ActivationFunction::BOUNDED_RELU:
-              tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
-              break;
-            case ActivationFunction::LU_BOUNDED_RELU:
-              tmp = std::min<T>(a, std::max<T>(b, in));
-              break;
-            case ActivationFunction::LEAKY_RELU:
-              tmp = (in > 0) ? in : a * in;
-              break;
-            case ActivationFunction::SOFT_RELU:
-              tmp = std::log(static_cast<T>(1) + std::exp(in));
-              break;
-            case ActivationFunction::ELU:
-              tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
-              break;
-            case ActivationFunction::SQRT:
-              tmp = std::sqrt(in);
-              break;
-            case ActivationFunction::SQUARE:
-              tmp = in * in;
-              break;
-            case ActivationFunction::TANH:
-              tmp = a * std::tanh(b * in);
-              break;
-            case ActivationFunction::IDENTITY:
-              tmp = in;
-              break;
-            default:
-              ARM_COMPUTE_ERROR("Unsupported activation function");
-          }
-          *(output_ptr + x) = tmp;
-        }
-      },
-      input, output);
-}
-
-template <ActivationLayerInfo::ActivationFunction F, typename T>
-typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type
-NEActivationLayerKernelEx::activation(const Window &window)
-{
-  const int window_step_x = 16 / sizeof(T);
-  const auto window_start_x = static_cast<int>(window.x().start());
-  const auto window_end_x = static_cast<int>(window.x().end());
-  const ActivationFunction act = F;
-
-  Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
-  win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-  Iterator input(_input, win_collapsed);
-  Iterator output(_output, win_collapsed);
-
-  const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
-  const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
-  const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
-  const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
-  const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
-  const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
-  const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
-  const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
-  const auto vconst_1 = vdupq_n_f32(1.f);
-  const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
-  const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
-  const float a_f32 = _act_info.a();
-  const float b_f32 = _act_info.b();
-
-  // Initialise scale/offset for re-quantization
-  float s = qi_in.scale / qi_out.scale;
-  float o = -qi_in.offset * s + qi_out.offset;
-  float32x4_t vs = vdupq_n_f32(s);
-  float32x4_t vo = vdupq_n_f32(o);
-
-  execute_window_loop(
-      win_collapsed,
-      [&](const Coordinates &) {
-        const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
-        const auto output_ptr = reinterpret_cast<T *>(output.ptr());
-
-        wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
-
-        // Compute S elements per iteration
-        int x = window_start_x;
-        for (; x <= (window_end_x - window_step_x); x += window_step_x)
-        {
-          const auto vin = wrapper::vloadq(input_ptr + x);
-          if (act == ActivationFunction::RELU)
-          {
-            // Perform activation
-            tmp = vmaxq_u8(vconst_0, vin);
-            // Re-quantize to new output space
-            tmp = vmlaq_qasymm8(tmp, vs, vo);
-          }
-          else if (act == ActivationFunction::BOUNDED_RELU)
-          {
-            // Perform activation
-            tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin));
-            // Re-quantize to new output space
-            tmp = vmlaq_qasymm8(tmp, vs, vo);
-          }
-          else if (act == ActivationFunction::LU_BOUNDED_RELU)
-          {
-            // Perform activation
-            tmp = vminq_u8(va, vmaxq_u8(vb, vin));
-            // Re-quantize to new output space
-            tmp = vmlaq_qasymm8(tmp, vs, vo);
-          }
-          else if (act == ActivationFunction::LOGISTIC)
-          {
-            // De-quantize
-            const auto vin_deq = vdequantize(vin, qi_in);
-            // Perform activation
-            const float32x4x4_t tmp_dep = {{
-                wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
-                                                                    vin_deq.val[0])))),
-                wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
-                                                                    vin_deq.val[1])))),
-                wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
-                                                                    vin_deq.val[2])))),
-                wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
-                                                                    vin_deq.val[3])))),
-            }};
-            // Re-quantize to new output space
-            tmp = vquantize(tmp_dep, qi_out);
-          }
-          else if (act == ActivationFunction::TANH)
-          {
-            // De-quantize
-            const auto vin_deq = vdequantize(vin, qi_in);
-            // Perform activation
-            const float32x4x4_t tmp_dep = {{
-                wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
-                wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
-                wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
-                wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
-            }};
-            // Re-quantize to new output space
-            tmp = vquantize(tmp_dep, qi_out);
-          }
-          else
-          {
-            ARM_COMPUTE_ERROR("Unsupported activation function");
-          }
-          wrapper::vstore(output_ptr + x, tmp);
-        }
-
-        // Compute left-over elements
-        for (; x < window_end_x; ++x)
-        {
-          T in = *(reinterpret_cast<const T *>(input_ptr + x));
-          T tmp;
-          if (act == ActivationFunction::RELU)
-          {
-            tmp = std::max(const_0, in);
-            tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
-          }
-          else if (act == ActivationFunction::BOUNDED_RELU)
-          {
-            tmp = std::min(a, std::max(const_0, in));
-            tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
-          }
-          else if (act == ActivationFunction::LU_BOUNDED_RELU)
-          {
-            tmp = std::min(a, std::max(b, in));
-            tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
-          }
-          else if (act == ActivationFunction::LOGISTIC)
-          {
-            float tmp_f = dequantize_qasymm8(in, qi_in);
-            tmp_f = 1.f / (1.f + std::exp(-tmp_f));
-            tmp = quantize_qasymm8(tmp_f, qi_out);
-          }
-          else if (act == ActivationFunction::TANH)
-          {
-            float tmp_f = dequantize_qasymm8(in, qi_in);
-            tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
-            tmp = quantize_qasymm8(tmp_f, qi_out);
-          }
-          else
-          {
-            ARM_COMPUTE_ERROR("Unsupported activation function");
-          }
-          *(output_ptr + x) = tmp;
-        }
-      },
-      input, output);
-}
-
-template <ActivationLayerInfo::ActivationFunction F, typename T>
-typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type
-NEActivationLayerKernelEx::activation(const Window &window)
-{
-  const int window_step_x = 16 / sizeof(T);
-  const auto window_start_x = static_cast<int>(window.x().start());
-  const auto window_end_x = static_cast<int>(window.x().end());
-  const ActivationFunction act = F;
-
-  Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
-  win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-  Iterator input(_input, win_collapsed);
-  Iterator output(_output, win_collapsed);
-
-  const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
-  const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
-  const auto vconst_1 = vdupq_n_f32(1.f);
-  const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
-  const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
-  const float a_f32 = _act_info.a();
-  const float b_f32 = _act_info.b();
-
-  execute_window_loop(
-      win_collapsed,
-      [&](const Coordinates &) {
-        const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
-        const auto output_ptr = reinterpret_cast<T *>(output.ptr());
-
-        wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
-        ARM_COMPUTE_UNUSED(tmp);
-
-        // Compute S elements per iteration
-        int x = window_start_x;
-        for (; x <= (window_end_x - window_step_x); x += window_step_x)
-        {
-          const auto vin = wrapper::vloadq(input_ptr + x);
-          if (act == ActivationFunction::LOGISTIC)
-          {
-            // De-quantize
-            const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
-            // Perform activation
-            const float32x4x2_t tmp_dep = {{
-                wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
-                                                                    vin_deq.val[0])))),
-                wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
-                                                                    vin_deq.val[1])))),
-            }};
-            // Re-quantize to new output space
-            tmp = vquantize_int16(tmp_dep, qi_out.scale);
-          }
-          else if (act == ActivationFunction::TANH)
-          {
-            // De-quantize
-            const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
-            // Perform activation
-            const float32x4x2_t tmp_dep = {{
-                wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
-                wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
-            }};
-            // Re-quantize to new output space
-            tmp = vquantize_int16(tmp_dep, qi_out.scale);
-          }
-          else
-          {
-            ARM_COMPUTE_ERROR("Unsupported activation function");
-          }
-          wrapper::vstore(output_ptr + x, tmp);
-        }
-
-        // Compute left-over elements
-        for (; x < window_end_x; ++x)
-        {
-          T in = *(reinterpret_cast<const T *>(input_ptr + x));
-          T tmp;
-          if (act == ActivationFunction::LOGISTIC)
-          {
-            float tmp_f = dequantize_qsymm16(in, qi_in.scale);
-            tmp_f = 1.f / (1.f + std::exp(-tmp_f));
-            tmp = quantize_qsymm16(tmp_f, qi_out);
-          }
-          else if (act == ActivationFunction::TANH)
-          {
-            float tmp_f = dequantize_qsymm16(in, qi_in.scale);
-            tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
-            tmp = quantize_qsymm16(tmp_f, qi_out);
-          }
-          else
-          {
-            ARM_COMPUTE_ERROR("Unsupported activation function");
-          }
-          *(output_ptr + x) = tmp;
-        }
-      },
-      input, output);
-}
-
-Status NEActivationLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                           const ActivationLayerInfo &act_info)
-{
-  ARM_COMPUTE_UNUSED(act_info);
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
-  ARM_COMPUTE_RETURN_ON_ERROR(
-      validate_and_configure_window(input->clone().get(),
-                                    (output != nullptr) ? output->clone().get() : nullptr)
-          .first);
-
-  return Status{};
-}
-
-void NEActivationLayerKernelEx::run(const Window &window, const ThreadInfo &info)
-{
-  // Early exit on disabled activation
-  if (!_act_info.enabled())
-  {
-    return;
-  }
-
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-  ARM_COMPUTE_ERROR_ON(_func == nullptr);
-
-  (this->*_func)(window);
-}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
index 32d7d6237..0551fc7db 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
@@ -43,10 +43,10 @@
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
 #include "arm_compute/core/NEON/NEElementwiseOperationFuncs.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Validate.h"
+#include "src/core/NEON/wrapper/wrapper.h"
 
 #include <algorithm>
 #include <arm_neon.h>
@@ -103,8 +103,10 @@ template <BinaryLogicalOperation op>
 inline uint8x16x4_t elementwise_logic_op(const uint8x16x4_t &a, const uint8x16x4_t &b)
 {
   uint8x16x4_t out = {{
-      elementwise_logic_op<op>(a.val[0], b.val[0]), elementwise_logic_op<op>(a.val[1], b.val[1]),
-      elementwise_logic_op<op>(a.val[2], b.val[2]), elementwise_logic_op<op>(a.val[3], b.val[3]),
+    elementwise_logic_op<op>(a.val[0], b.val[0]),
+    elementwise_logic_op<op>(a.val[1], b.val[1]),
+    elementwise_logic_op<op>(a.val[2], b.val[2]),
+    elementwise_logic_op<op>(a.val[3], b.val[3]),
   }};
   return out;
 }
@@ -160,8 +162,8 @@ void elementwise_logic_op(const ITensor *in1, const ITensor *in2, ITensor *out,
 }
 
 std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> configure_func(
-    const ITensor *input1, const ITensor *input2, ITensor *output,
-    std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function)
+  const ITensor *input1, const ITensor *input2, ITensor *output,
+  std::map<std::string, cpu::kernels::CpuElementwiseKernel::ElementwiseFunction *> map_function)
 {
   std::string function_to_call("op_");
   function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
@@ -183,9 +185,9 @@ template <BinaryLogicalOperation op>
 std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
 configure_logic_func(const ITensor *input1, const ITensor *input2, ITensor *output)
 {
-  static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = {
-      {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
-      {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
+  static std::map<std::string, cpu::kernels::CpuElementwiseKernel::ElementwiseFunction *>
+    map_function = {{"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
+                    {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
 
   return configure_func(input1, input2, output, map_function);
 }
@@ -194,7 +196,7 @@ void NEBinaryLogicalOperationKernel::configure(BinaryLogicalOperation op, const
                                                const ITensor *input2, ITensor *output)
 {
   ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info()));
-  configure_common(input1, input2, output);
+  configure_common(input1->info(), input2->info(), output->info());
   switch (op)
   {
     case BinaryLogicalOperation::AND:
@@ -223,7 +225,7 @@ Status NEBinaryLogicalOperationKernel::validate_arguments(const ITensorInfo &inp
   ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
 
   const TensorShape out_shape =
-      TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
+    TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
 
   ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
                                   "Inputs are not broadcast compatible");
@@ -232,8 +234,8 @@ Status NEBinaryLogicalOperationKernel::validate_arguments(const ITensorInfo &inp
   if (output.total_size() > 0)
   {
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-        detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
-        "Wrong shape for output");
+      detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
+      "Wrong shape for output");
   }
 
   return Status{};
@@ -249,5 +251,4 @@ Status NEBinaryLogicalOperationKernel::validate(BinaryLogicalOperation op,
   ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
   return Status{};
 }
-
 } // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp
new file mode 100644
index 000000000..87e716b4f
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h"
+
+#include "src/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "src/core/NEON/NEMath.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "support/SaturateCast.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+#include "src/core/NEON/INEKernel.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
+  ARM_COMPUTE_RETURN_ERROR_ON(input == output);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8,
+                                                       DataType::S16, DataType::U16, DataType::F16,
+                                                       DataType::U32, DataType::S32, DataType::F32);
+
+  // Validate in case of configured output
+  if (output->total_size() > 0)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+  }
+
+  return Status{};
+}
+} // namespace
+
+NECastBoolKernel::NECastBoolKernel() : _input(nullptr), _output(nullptr) {}
+
+void NECastBoolKernel::configure(const ITensor *input, ITensor *output)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+  // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype
+  // must be given)
+  set_shape_if_empty(*output->info(), input->info()->tensor_shape());
+
+  _input = input;
+  _output = output;
+
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
+
+  // Configure kernel window
+  Window win = calculate_max_window(*input->info(), Steps());
+  Coordinates coord;
+  coord.set_num_dimensions(output->info()->num_dimensions());
+  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+
+  ICPPKernel::configure(win);
+}
+
+Status NECastBoolKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+{
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+  return Status{};
+}
+
+void NECastBoolKernel::run(const Window &window, const ThreadInfo &info)
+{
+  ARM_COMPUTE_UNUSED(info);
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+  ARM_COMPUTE_ERROR_ON_NULLPTR(_input, _output);
+  ARM_COMPUTE_ERROR_ON(_input == _output);
+
+  const auto window_start_x = static_cast<int>(window.x().start());
+  const auto window_end_x = static_cast<int>(window.x().end());
+  const int window_step_x = 16;
+
+  Window win{window};
+  win.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+  Iterator input(_input, win);
+  Iterator output(_output, win);
+
+  const uint8_t true_val = 1;
+  const uint8x8_t mask_bool = vdup_n_u8(true_val);
+
+  switch (_output->info()->data_type())
+  {
+    case DataType::S8:
+    {
+      /* Conversion U8 -> S8 */
+      execute_window_loop(
+        win,
+        [&](const Coordinates &) {
+          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+          const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+
+          int x = window_start_x;
+          for (; x <= (window_end_x - window_step_x); x += window_step_x)
+          {
+            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+            vst1q_s8(output_ptr + x,
+                     vreinterpretq_s8_u8(vandq_u8(texels_u8, vdupq_n_u8(true_val))));
+          }
+
+          // Compute left-over elements
+          for (; x < window_end_x; ++x)
+          {
+            *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) & true_val);
+          }
+        },
+        input, output);
+      break;
+    }
+    case DataType::S16:
+    {
+      /* Up-conversion U8 -> S16 */
+      execute_window_loop(
+        win,
+        [&](const Coordinates &) {
+          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+          const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
+
+          int x = window_start_x;
+          for (; x <= (window_end_x - window_step_x); x += window_step_x)
+          {
+            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+            const int16x8x2_t texels = {
+              {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+               vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+
+            vst1q_s16(output_ptr + x, texels.val[0]);
+            vst1q_s16(output_ptr + x + 8, texels.val[1]);
+          }
+
+          // Compute left-over elements
+          for (; x < window_end_x; ++x)
+          {
+            *(output_ptr + x) = static_cast<int32_t>(*(input_ptr + x) & true_val);
+          }
+        },
+        input, output);
+      break;
+    }
+    case DataType::S32:
+    {
+      /* Up-conversion U8 -> S32 */
+      execute_window_loop(
+        win,
+        [&](const Coordinates &) {
+          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+          const auto output_ptr = reinterpret_cast<int32_t *>(output.ptr());
+
+          int x = window_start_x;
+          for (; x <= (window_end_x - window_step_x); x += window_step_x)
+          {
+            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+            const int16x8x2_t texels = {
+              {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+               vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+
+            vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0])));
+            vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0])));
+            vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1])));
+            vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1])));
+          }
+
+          // Compute left-over elements
+          for (; x < window_end_x; ++x)
+          {
+            *(output_ptr + x) = static_cast<uint32_t>(*(input_ptr + x) & true_val);
+          }
+        },
+        input, output);
+      break;
+    }
+    case DataType::F32:
+    {
+      /* Up-conversion U8 -> F32 */
+      execute_window_loop(
+        win,
+        [&](const Coordinates &) {
+          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+          const auto output_ptr = reinterpret_cast<float *>(output.ptr());
+
+          int x = window_start_x;
+          for (; x <= (window_end_x - window_step_x); x += window_step_x)
+          {
+            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+            const int16x8x2_t texels = {
+              {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+               vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+            vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
+            vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
+            vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
+            vst1q_f32(output_ptr + x + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
+          }
+
+          // Compute left-over elements
+          for (; x < window_end_x; ++x)
+          {
+            auto in = static_cast<uint32_t>(*(input_ptr + x) & true_val);
+            *(output_ptr + x) = static_cast<float>(in);
+          }
+        },
+        input, output);
+      break;
+    }
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    case DataType::F16:
+    {
+      /* Up-conversion U8 -> F16 */
+      execute_window_loop(
+        win,
+        [&](const Coordinates &) {
+          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+          const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
+
+          int x = window_start_x;
+          for (; x <= (window_end_x - window_step_x); x += window_step_x)
+          {
+            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+            const int16x8x2_t texels = {
+              {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+               vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+            vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0]));
+            vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1]));
+          }
+
+          // Compute left-over elements
+          for (; x < window_end_x; ++x)
+          {
+            *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) & true_val);
+          }
+        },
+        input, output);
+      break;
+    }
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    case DataType::U8:
+    {
+      /* Conversion U8 -> S8 */
+      execute_window_loop(
+        win,
+        [&](const Coordinates &) {
+          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+          const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+
+          int x = window_start_x;
+          for (; x <= (window_end_x - window_step_x); x += window_step_x)
+          {
+            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+            vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val)));
+          }
+
+          // Compute left-over elements
+          for (; x < window_end_x; ++x)
+          {
+            *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) & true_val);
+          }
+        },
+        input, output);
+      break;
+    }
+    case DataType::U16:
+    {
+      /* Up-conversion U8 -> U16 */
+      execute_window_loop(
+        win,
+        [&](const Coordinates &) {
+          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+          const auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
+
+          int x = window_start_x;
+          for (; x <= (window_end_x - window_step_x); x += window_step_x)
+          {
+            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+            const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)),
+                                          vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}};
+
+            vst1q_u16(output_ptr + x, texels.val[0]);
+            vst1q_u16(output_ptr + x + 8, texels.val[1]);
+          }
+
+          // Compute left-over elements
+          for (; x < window_end_x; ++x)
+          {
+            *(output_ptr + x) = static_cast<uint16_t>(*(input_ptr + x) & true_val);
+          }
+        },
+        input, output);
+      break;
+    }
+    default:
+      ARM_COMPUTE_ERROR("Output data type not supported");
+  }
+}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
index 091d38c56..3ad9ee945 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
@@ -47,10 +47,13 @@
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 using namespace arm_compute;
 
 NEEmbeddingLookupKernel::NEEmbeddingLookupKernel()
-    : _input(nullptr), _lookups(nullptr), _output(nullptr)
+  : _input(nullptr), _lookups(nullptr), _output(nullptr)
 {
 }
 
@@ -79,8 +82,8 @@ Status NEEmbeddingLookupKernel::validate(const arm_compute::ITensorInfo *input,
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
-      DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+    DataType::U32, DataType::S32, DataType::F16, DataType::F32);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
 
   ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4);
@@ -119,16 +122,17 @@ void NEEmbeddingLookupKernel::run(const Window &window, const ThreadInfo &info)
   {
     Iterator output_it(_output, out_slice);
 
-    execute_window_loop(out_slice,
-                        [&](const Coordinates &id) {
-                          const int32_t lookup = *reinterpret_cast<int32_t *>(
-                              _lookups->ptr_to_element(Coordinates{id[lookup_dim]}));
-                          Coordinates input_id{id};
-                          input_id.set(lookup_dim, lookup);
-                          memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
-                                 _output->info()->dimension(0) * _output->info()->element_size());
-                        },
-                        output_it);
+    execute_window_loop(
+      out_slice,
+      [&](const Coordinates &id) {
+        const int32_t lookup =
+          *reinterpret_cast<int32_t *>(_lookups->ptr_to_element(Coordinates{id[lookup_dim]}));
+        Coordinates input_id{id};
+        input_id.set(lookup_dim, lookup);
+        memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
+               _output->info()->dimension(0) * _output->info()->element_size());
+      },
+      output_it);
 
   } while (window.slide_window_slice_4D(out_slice));
 }
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
new file mode 100644
index 000000000..375fa28e5
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/NEFixedPoint.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include <arm_neon.h>
+#include <cstddef>
+#include <cstdint>
+#include <mutex>
+
+using namespace arm_compute;
+
+namespace
+{
+inline Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(accum);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+  ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
+  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != accum->dimension(0));
+
+  return Status{};
+}
+
+inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *accum,
+                                                               ITensorInfo *biases)
+{
+  constexpr unsigned int num_elems_processed_per_iteration = 16;
+
+  // Configure kernel window
+  Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
+
+  bool window_changed = update_window_and_padding(
+    win, AccessWindowHorizontal(accum, 0, num_elems_processed_per_iteration),
+    AccessWindowStatic(biases, 0, 0,
+                       ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration),
+                       biases->tensor_shape().y()));
+
+  AccessWindowHorizontal output_access(accum, 0, num_elems_processed_per_iteration);
+
+  // Set the valid region for the accum tensor
+  Coordinates coord;
+  coord.set_num_dimensions(accum->num_dimensions());
+  output_access.set_valid_region(win, ValidRegion(coord, accum->tensor_shape()));
+
+  Status err = (window_changed)
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
+  return std::make_pair(err, win);
+}
+} // namespace
+
+NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel()
+  : _accum(nullptr), _biases(nullptr)
+{
+}
+
+void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
+
+  // Perform validate step
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
+
+  _biases = biases;
+  _accum = accum;
+
+  // Configure kernel window
+  auto win_config = validate_and_configure_window(accum->info(), biases->info());
+  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+  INEKernel::configure(win_config.second);
+}
+
+Status NEGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum,
+                                                    const ITensorInfo *biases)
+{
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
+  ARM_COMPUTE_RETURN_ON_ERROR(
+    validate_and_configure_window(accum->clone().get(), biases->clone().get()).first);
+
+  return Status{};
+}
+
+std::mutex m;
+void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadInfo &info)
+{
+  std::lock_guard<std::mutex> lock_guard(m);
+  ARM_COMPUTE_UNUSED(info);
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+
+  Window win_biases;
+  win_biases.set(Window::DimX,
+                 Window::Dimension(window.x().start(), window.x().end(), window.x().step()));
+  win_biases.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+  Iterator in0_out(_accum, window);
+  Iterator in1(_biases, win_biases);
+
+  switch (_accum->info()->data_type())
+  {
+    case DataType::F32:
+    {
+      execute_window_loop(
+        window,
+        [&](const Coordinates &) {
+          const float32x4x4_t accum = vld4q_f32(reinterpret_cast<const float *>(in0_out.ptr()));
+          const float32x4x4_t biases = vld4q_f32(reinterpret_cast<const float *>(in1.ptr()));
+          const float32x4x4_t res = {
+            {vaddq_f32(accum.val[0], biases.val[0]), vaddq_f32(accum.val[1], biases.val[1]),
+             vaddq_f32(accum.val[2], biases.val[2]), vaddq_f32(accum.val[3], biases.val[3])}};
+
+          vst4q_f32(reinterpret_cast<float *>(in0_out.ptr()), res);
+        },
+        in0_out, in1);
+      break;
+    }
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    case DataType::F16:
+    {
+      execute_window_loop(
+        window,
+        [&](const Coordinates &) {
+          const float16x8x2_t accum = vld2q_f16(reinterpret_cast<const float16_t *>(in0_out.ptr()));
+          const float16x8x2_t biases = vld2q_f16(reinterpret_cast<const float16_t *>(in1.ptr()));
+          const float16x8x2_t res = {
+            {vaddq_f16(accum.val[0], biases.val[0]), vaddq_f16(accum.val[1], biases.val[1])}};
+
+          vst2q_f16(reinterpret_cast<float16_t *>(in0_out.ptr()), res);
+        },
+        in0_out, in1);
+      break;
+    }
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+    default:
+      ARM_COMPUTE_ERROR("Data type not supported");
+      break;
+  }
+}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
index 4c0a5e799..d4144e6b9 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
@@ -40,7 +40,7 @@
 
 #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
 
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
 #include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
@@ -50,6 +50,9 @@
 #include "arm_compute/core/Window.h"
 #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
 
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 namespace arm_compute
 {
 namespace
@@ -70,7 +73,10 @@ template <typename U> void validate_indices(const ITensor *indices)
 
 } // namespace
 
-NEGatherKernelEx::NEGatherKernelEx() : _input{}, _indices{}, _axis{}, _output{}, _func{} {}
+NEGatherKernelEx::NEGatherKernelEx()
+  : _input{}, _indices{}, _axis{}, _indices_rank{}, _output{}, _func{}
+{
+}
 
 template <typename U>
 inline void NEGatherKernelEx::gather_0_axis(const Window &window, const ThreadInfo &info)
@@ -82,36 +88,35 @@ inline void NEGatherKernelEx::gather_0_axis(const Window &window, const ThreadIn
 
   Iterator output_it(_output, window);
   execute_window_loop(
-      window,
-      [&](const Coordinates &id) {
-        Coordinates gather_id(id);
-        gather_id.collapse(_indices->info()->num_dimensions(), 0);
-
-        U new_index;
-        switch (_indices->info()->num_dimensions())
-        {
-          case 1:
-            new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0]))));
-            break;
-          case 2:
-            new_index =
-                *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1]))));
-            break;
-          case 3:
-            new_index = *(
-                reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2]))));
-            break;
-          default:
-            ARM_COMPUTE_ERROR("Wrong num of dimensions");
-            break;
-        }
-
-        gather_id.set(0, new_index);
-
-        std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(),
-                    output_it.ptr());
-      },
-      output_it);
+    window,
+    [&](const Coordinates &id) {
+      Coordinates gather_id(id);
+      gather_id.collapse(_indices_rank);
+
+      U new_index;
+      switch (_indices_rank)
+      {
+        case 1:
+          new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0]))));
+          break;
+        case 2:
+          new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1]))));
+          break;
+        case 3:
+          new_index =
+            *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2]))));
+          break;
+        default:
+          ARM_COMPUTE_ERROR("Wrong num of dimensions");
+          break;
+      }
+
+      gather_id.set(0, new_index);
+
+      std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(),
+                  output_it.ptr());
+    },
+    output_it);
 }
 
 template <typename U>
@@ -127,37 +132,36 @@ void NEGatherKernelEx::gather_n_axis(const Window &window, const ThreadInfo &inf
 
   Iterator output_it(_output, output_window);
   execute_window_loop(
-      output_window,
-      [&](const Coordinates &id) {
-        Coordinates gather_id(id);
-        gather_id.collapse(_indices->info()->num_dimensions(), _axis);
-
-        U new_index;
-        switch (_indices->info()->num_dimensions())
-        {
-          case 1:
-            new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis]))));
-            break;
-          case 2:
-            new_index = *(reinterpret_cast<U *>(
-                _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1]))));
-            break;
-          case 3:
-            new_index = *(reinterpret_cast<U *>(
-                _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2]))));
-            break;
-          default:
-            ARM_COMPUTE_ERROR("Wrong num of dimensions");
-            break;
-        }
-
-        gather_id.set(_axis, new_index);
-
-        std::copy_n(_input->ptr_to_element(gather_id),
-                    _input->info()->dimension(0) * _output->info()->element_size(),
-                    output_it.ptr());
-      },
-      output_it);
+    output_window,
+    [&](const Coordinates &id) {
+      Coordinates gather_id(id);
+      gather_id.collapse(_indices_rank, _axis);
+
+      U new_index;
+      switch (_indices_rank)
+      {
+        case 1:
+          new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis]))));
+          break;
+        case 2:
+          new_index = *(
+            reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1]))));
+          break;
+        case 3:
+          new_index = *(reinterpret_cast<U *>(
+            _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2]))));
+          break;
+        default:
+          ARM_COMPUTE_ERROR("Wrong num of dimensions");
+          break;
+      }
+
+      gather_id.set(_axis, new_index);
+
+      std::copy_n(_input->ptr_to_element(gather_id),
+                  _input->info()->dimension(0) * _output->info()->element_size(), output_it.ptr());
+    },
+    output_it);
 }
 
 void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, ITensor *output,
@@ -167,13 +171,14 @@ void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, I
   ARM_COMPUTE_ERROR_ON(indices->info()->num_dimensions() > 3);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
-      DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+    DataType::U32, DataType::S32, DataType::F16, DataType::F32);
 
   _input = input;
   _indices = indices;
   _output = output;
   _axis = axis;
+  _indices_rank = indices->info()->num_dimensions();
 
   if (_axis < 0)
   {
@@ -213,7 +218,7 @@ void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, I
   }
   // Output auto initialization if not yet initialized
   TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
-      input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis);
+    input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis);
   auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
 
   // Create window
@@ -239,15 +244,15 @@ Status NEGatherKernelEx::validate(const ITensorInfo *input, const ITensorInfo *i
   ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast<int32_t>(input->num_dimensions()));
   ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
   ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
-      DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+    DataType::U32, DataType::S32, DataType::F16, DataType::F32);
 
   if (output->total_size() != 0)
   {
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
     TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
-        input->tensor_shape(), indices->tensor_shape(), axis);
+      input->tensor_shape(), indices->tensor_shape(), axis);
     ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
   }
 
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
index 30787c0a4..f178865b7 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
@@ -47,6 +47,9 @@
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 #include <unordered_map>
 
 using namespace arm_compute;
@@ -57,7 +60,7 @@ constexpr size_t NOT_HIT = 0xFFFFFFFF;
 } // namespace
 
 NEHashtableLookupKernel::NEHashtableLookupKernel()
-    : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr}
+  : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr}
 {
 }
 
@@ -66,7 +69,7 @@ void NEHashtableLookupKernel::configure(const ITensor *lookups, const ITensor *k
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
   ARM_COMPUTE_ERROR_THROW_ON(
-      validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
+    validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
 
   _lookups = lookups;
   _keys = keys;
@@ -92,8 +95,8 @@ Status NEHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITens
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
-      DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+    DataType::U32, DataType::S32, DataType::F16, DataType::F32);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
   ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32);
 
@@ -134,8 +137,8 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info)
 
   const size_t lookup_dim = _output->info()->num_dimensions() - 1;
   const int const_0 = _output->info()->data_type() == DataType::QASYMM8
-                          ? _output->info()->quantization_info().uniform().offset
-                          : 0;
+                        ? _output->info()->quantization_info().uniform().offset
+                        : 0;
 
   std::unordered_map<int32_t, size_t> key_index_map;
   for (size_t n = 0; n < _keys->info()->dimension(0); ++n)
@@ -174,24 +177,24 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info)
   {
     Iterator output_it(_output, out_slice);
 
-    execute_window_loop(out_slice,
-                        [&](const Coordinates &id) {
-                          const auto lookup = lookup_indices.at(id[lookup_dim]);
-                          if (lookup == NOT_HIT)
-                          {
-                            memset(output_it.ptr(), const_0,
-                                   _output->info()->dimension(0) * _output->info()->element_size());
-                          }
-                          else
-                          {
-                            Coordinates input_id{id};
-                            input_id.set(lookup_dim, lookup);
-                            memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
-                                   _output->info()->dimension(0) * _output->info()->element_size());
-                          }
-
-                        },
-                        output_it);
+    execute_window_loop(
+      out_slice,
+      [&](const Coordinates &id) {
+        const auto lookup = lookup_indices.at(id[lookup_dim]);
+        if (lookup == NOT_HIT)
+        {
+          memset(output_it.ptr(), const_0,
+                 _output->info()->dimension(0) * _output->info()->element_size());
+        }
+        else
+        {
+          Coordinates input_id{id};
+          input_id.set(lookup_dim, lookup);
+          memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
+                 _output->info()->dimension(0) * _output->info()->element_size());
+        }
+      },
+      output_it);
 
   } while (window.slide_window_slice_4D(out_slice));
 }
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
index 49adf1462..7804f9c6a 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
@@ -40,17 +40,22 @@
 
 #include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h"
 
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEMath.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "src/core/NEON/NEMath.h"
+#include "src/core/NEON/INEKernel.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 
+#include "src/core/NEON/wrapper/wrapper.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
 #include <arm_neon.h>
 
 namespace arm_compute
@@ -63,7 +68,7 @@ void instance_normalization_nchw(ITensor *input, ITensor *output, ITensor *gamma
 {
   /** NEON vector tag type. */
   using ExactTagType =
-      typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
+    typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
 
   // Clear X/Y dimensions on execution window as we handle the planes manually
   Window win = window;
@@ -73,107 +78,107 @@ void instance_normalization_nchw(ITensor *input, ITensor *output, ITensor *gamma
   constexpr int window_step_x = 16 / sizeof(T);
   const unsigned int elements_plane = input->info()->dimension(0) * output->info()->dimension(1);
   const auto channel_idx =
-      get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
+    get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
 
   Iterator input_it(input, win);
   execute_window_loop(
-      win,
-      [&](const Coordinates &id) {
-        Window win_plane = window;
-        win_plane.set(Window::DimX, Window::Dimension(0, 1, 1));
-        win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1));
-        win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1));
-
-        Iterator input_plane_it(input, win_plane);
-        Iterator output_plane_it(output, win_plane);
-
-        auto sum_h_w = static_cast<T>(0.f);
-        auto sum_squares_h_w = static_cast<T>(0.f);
-
-        execute_window_loop(
-            win_plane,
-            [&](const Coordinates &) {
-              const auto input_ptr = reinterpret_cast<const T *>(input_plane_it.ptr());
-
-              auto vec_sum_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
-              auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
-
-              // Compute S elements per iteration
-              int x = window.x().start();
-              for (; x <= (window.x().end() - window_step_x); x += window_step_x)
-              {
-                auto vec_input_val = wrapper::vloadq(input_ptr + x);
-                vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val);
-                vec_sum_squares_h_w =
-                    wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val));
-              }
-
-              auto vec2_sum_h_w =
-                  wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w));
-              auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w),
-                                                         wrapper::vgetlow(vec_sum_squares_h_w));
-              for (int i = 0; i < window_step_x / 4; ++i)
-              {
-                vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w);
-                vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w);
-              }
-              sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0);
-              sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0);
-
-              // Compute left-over elements
-              for (; x < window.x().end(); ++x)
-              {
-                const auto value = *(input_ptr + x);
-                sum_h_w += value;
-                sum_squares_h_w += value * value;
-              }
-            },
-            input_plane_it, output_plane_it);
-
-        const auto mean_h_w = sum_h_w / elements_plane;
-        const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w;
-
-        auto gamma_val = 1.0f;
-        if (gamma != nullptr)
-        {
-          gamma_val = *reinterpret_cast<T *>(gamma->ptr_to_element({id[channel_idx]}));
-        }
-        const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon);
-        const auto vec_mean_h_w = wrapper::vdup_n(static_cast<T>(mean_h_w), ExactTagType{});
-        const auto vec_multip_h_w = wrapper::vdup_n(static_cast<T>(multip_h_w), ExactTagType{});
-        auto beta_val = 0.0f;
-        if (beta != nullptr)
-        {
-          beta_val = *reinterpret_cast<T *>(beta->ptr_to_element({id[channel_idx]}));
-        }
-        const auto vec_beta = wrapper::vdup_n(static_cast<T>(beta_val), ExactTagType{});
-
-        execute_window_loop(
-            win_plane,
-            [&](const Coordinates &) {
-              auto input_ptr = reinterpret_cast<T *>(input_plane_it.ptr());
-              auto output_ptr = reinterpret_cast<T *>(output_plane_it.ptr());
-
-              // Compute S elements per iteration
-              int x = window.x().start();
-              auto vec_val = wrapper::vdup_n(static_cast<T>(0.0f), ExactTagType{});
-              for (; x <= (window.x().end() - window_step_x); x += window_step_x)
-              {
-                vec_val = wrapper::vloadq(input_ptr + x);
-                vec_val = wrapper::vadd(
-                    wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta);
-                wrapper::vstore(output_ptr + x, vec_val);
-              }
-
-              // Compute left-over elements
-              for (; x < window.x().end(); ++x)
-              {
-                *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val;
-              }
-            },
-            input_plane_it, output_plane_it);
-      },
-      input_it);
+    win,
+    [&](const Coordinates &id) {
+      Window win_plane = window;
+      win_plane.set(Window::DimX, Window::Dimension(0, 1, 1));
+      win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1));
+      win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1));
+
+      Iterator input_plane_it(input, win_plane);
+      Iterator output_plane_it(output, win_plane);
+
+      auto sum_h_w = static_cast<T>(0.f);
+      auto sum_squares_h_w = static_cast<T>(0.f);
+
+      execute_window_loop(
+        win_plane,
+        [&](const Coordinates &) {
+          const auto input_ptr = reinterpret_cast<const T *>(input_plane_it.ptr());
+
+          auto vec_sum_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
+          auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
+
+          // Compute S elements per iteration
+          int x = window.x().start();
+          for (; x <= (window.x().end() - window_step_x); x += window_step_x)
+          {
+            auto vec_input_val = wrapper::vloadq(input_ptr + x);
+            vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val);
+            vec_sum_squares_h_w =
+              wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val));
+          }
+
+          auto vec2_sum_h_w =
+            wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w));
+          auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w),
+                                                     wrapper::vgetlow(vec_sum_squares_h_w));
+          for (int i = 0; i < window_step_x / 4; ++i)
+          {
+            vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w);
+            vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w);
+          }
+          sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0);
+          sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0);
+
+          // Compute left-over elements
+          for (; x < window.x().end(); ++x)
+          {
+            const auto value = *(input_ptr + x);
+            sum_h_w += value;
+            sum_squares_h_w += value * value;
+          }
+        },
+        input_plane_it, output_plane_it);
+
+      const auto mean_h_w = sum_h_w / elements_plane;
+      const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w;
+
+      auto gamma_val = 1.0f;
+      if (gamma != nullptr)
+      {
+        gamma_val = *reinterpret_cast<T *>(gamma->ptr_to_element({id[channel_idx]}));
+      }
+      const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon);
+      const auto vec_mean_h_w = wrapper::vdup_n(static_cast<T>(mean_h_w), ExactTagType{});
+      const auto vec_multip_h_w = wrapper::vdup_n(static_cast<T>(multip_h_w), ExactTagType{});
+      auto beta_val = 0.0f;
+      if (beta != nullptr)
+      {
+        beta_val = *reinterpret_cast<T *>(beta->ptr_to_element({id[channel_idx]}));
+      }
+      const auto vec_beta = wrapper::vdup_n(static_cast<T>(beta_val), ExactTagType{});
+
+      execute_window_loop(
+        win_plane,
+        [&](const Coordinates &) {
+          auto input_ptr = reinterpret_cast<T *>(input_plane_it.ptr());
+          auto output_ptr = reinterpret_cast<T *>(output_plane_it.ptr());
+
+          // Compute S elements per iteration
+          int x = window.x().start();
+          auto vec_val = wrapper::vdup_n(static_cast<T>(0.0f), ExactTagType{});
+          for (; x <= (window.x().end() - window_step_x); x += window_step_x)
+          {
+            vec_val = wrapper::vloadq(input_ptr + x);
+            vec_val = wrapper::vadd(
+              wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta);
+            wrapper::vstore(output_ptr + x, vec_val);
+          }
+
+          // Compute left-over elements
+          for (; x < window.x().end(); ++x)
+          {
+            *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val;
+          }
+        },
+        input_plane_it, output_plane_it);
+    },
+    input_it);
 }
 
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
@@ -199,8 +204,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
   {
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index(
-                                        input->data_layout(), DataLayoutDimension::CHANNEL)) !=
-                                        gamma->dimension(0),
+                                      input->data_layout(), DataLayoutDimension::CHANNEL)) !=
+                                      gamma->dimension(0),
                                     "Gamma's size must be the same as size of input's channel");
   }
 
@@ -208,8 +213,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
   {
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index(
-                                        input->data_layout(), DataLayoutDimension::CHANNEL)) !=
-                                        beta->dimension(0),
+                                      input->data_layout(), DataLayoutDimension::CHANNEL)) !=
+                                      beta->dimension(0),
                                     "Beta's size must be the same as size of input's channel");
   }
 
@@ -234,8 +239,8 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
 } // namespace
 
 NEInstanceNormalizationLayerKernelEx::NEInstanceNormalizationLayerKernelEx()
-    : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr),
-      _epsilon(1e-12)
+  : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr),
+    _epsilon(1e-12)
 {
 }
 
@@ -251,7 +256,7 @@ void NEInstanceNormalizationLayerKernelEx::configure(ITensor *input, ITensor *ou
   _epsilon = epsilon;
 
   ARM_COMPUTE_ERROR_THROW_ON(
-      validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon));
+    validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon));
 
   if (_input->info()->data_type() == DataType::F32)
   {
@@ -282,7 +287,7 @@ Status NEInstanceNormalizationLayerKernelEx::validate(const ITensorInfo *input,
 {
   ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon));
   ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
-      input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
+    input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
   return Status{};
 }
 
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
index b92130cec..8ad998313 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
@@ -42,13 +42,15 @@
 
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/INEKernel.h"
+#include "src/core/NEON/NEAsymm.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/WindowHelpers.h"
 
 #include <arm_neon.h>
 
@@ -123,15 +125,17 @@ inline float32x4x4_t multiply_scale_vec(const int32x4x4_t &iv, float scale)
   const float32x4_t vscale = vdupq_n_f32(scale);
 
   const float32x4x4_t ret = {{
-      vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale),
-      vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale),
+    vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale),
+    vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale),
+    vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale),
+    vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale),
   }};
   return ret;
 }
 } // namespace
 
 NEMultiplyScaleFactorKernel::NEMultiplyScaleFactorKernel()
-    : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
+  : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
 {
 }
 
@@ -140,7 +144,7 @@ void NEMultiplyScaleFactorKernel::configure(const ITensor *input, const ITensor
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
   ARM_COMPUTE_ERROR_THROW_ON(
-      validate_arguments(input->info(), scale_factor->info(), output->info()));
+    validate_arguments(input->info(), scale_factor->info(), output->info()));
 
   _input = input;
   _scale_factor = scale_factor;
@@ -180,25 +184,25 @@ template <typename T> void NEMultiplyScaleFactorKernel::multiply(const Window &w
   Iterator output(_output, win_collapsed);
   win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
   execute_window_loop(
-      win_collapsed,
-      [&](const Coordinates &id) {
-        auto scale = *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()}));
-        scale *= _multiplier;
-
-        const auto input_ptr = reinterpret_cast<const int32_t *>(input.ptr());
-        auto output_ptr = reinterpret_cast<T *>(output.ptr());
-        int x = window_start_x;
-        for (; x <= (window_end_x - window_step); x += window_step)
-        {
-          store_result<float>(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale));
-        }
-        // Compute left-over elements
-        for (; x < window_end_x; ++x)
-        {
-          output_ptr[x] = input_ptr[x] * scale;
-        }
-      },
-      input, output);
+    win_collapsed,
+    [&](const Coordinates &id) {
+      auto scale = *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()}));
+      scale *= _multiplier;
+
+      const auto input_ptr = reinterpret_cast<const int32_t *>(input.ptr());
+      auto output_ptr = reinterpret_cast<T *>(output.ptr());
+      int x = window_start_x;
+      for (; x <= (window_end_x - window_step); x += window_step)
+      {
+        store_result<float>(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale));
+      }
+      // Compute left-over elements
+      for (; x < window_end_x; ++x)
+      {
+        output_ptr[x] = input_ptr[x] * scale;
+      }
+    },
+    input, output);
 }
 
 void NEMultiplyScaleFactorKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp
new file mode 100644
index 000000000..e56fbf7f3
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h"
+#include "src/core/CPP/Validate.h"
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+namespace arm_compute
+{
+namespace
+{
+/** Validate the depth
+ *
+ * Validate that depth are not negative
+ *
+ * @param[in] depth Depth tensor.
+ * @param[in] output Output tensor.
+ * @param[in] axis Axis of depth.
+ */
+template <typename U> void validate_depth(const ITensor *depth, const ITensor *output, int axis)
+{
+  ARM_COMPUTE_ERROR_ON(*(reinterpret_cast<U *>(depth->buffer())) < 0);
+  ARM_COMPUTE_ERROR_ON(static_cast<U>(output->info()->tensor_shape()[axis]) !=
+                       *(reinterpret_cast<U *>(depth->buffer())));
+}
+
+Status validate_arguments(const ITensorInfo *indices, const ITensorInfo *depth,
+                          const ITensorInfo *on_value, const ITensorInfo *off_value,
+                          const ITensorInfo *output, int axis)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(indices, depth, on_value, off_value, output);
+  const int actual_axis = wrap_around(axis, static_cast<int>(output->num_dimensions()));
+  ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
+  ARM_COMPUTE_RETURN_ERROR_ON(on_value->tensor_shape().total_size() != 1);
+  ARM_COMPUTE_RETURN_ERROR_ON(0 > actual_axis ||
+                              actual_axis >= static_cast<int>(output->num_dimensions()));
+  ARM_COMPUTE_RETURN_ERROR_ON(on_value->data_type() == DataType::UNKNOWN);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(on_value, 1, DataType::U8, DataType::S8,
+                                                       DataType::U16, DataType::S16, DataType::F16,
+                                                       DataType::U32, DataType::S32, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32);
+
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, off_value);
+  if (output->total_size() != 0)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, output);
+  }
+
+  return Status{};
+}
+
+template <typename U, typename Enable = void> bool isOnValue(U) { return true; }
+
+template <typename U, std::enable_if_t<std::is_integral<U>::value, int> = 0>
+bool isOnValue(U index, U depth)
+{
+  return index >= 0 && index < depth;
+}
+} // namespace
+
+NEOneHotKernel::NEOneHotKernel()
+  : _indices{nullptr}, _depth{nullptr}, _on_value{nullptr},
+    _off_value{nullptr}, _axis{-1}, _output{nullptr}, _func{}
+{
+}
+
+template <typename U>
+void NEOneHotKernel::onehot_0_axis(const Window &window, const ThreadInfo &info)
+{
+  ARM_COMPUTE_UNUSED(info);
+  // Validate that the depth are not negative
+  validate_depth<U>(_depth, _output, _axis);
+  Window output_window{window};
+  output_window.set(Window::DimX, Window::Dimension(0, 1, 1));
+  Iterator output_it(_output, output_window);
+  const U off_value = *reinterpret_cast<U *>(_off_value->buffer());
+  execute_window_loop(
+    output_window,
+    [&](const Coordinates &id) {
+      std::fill_n(output_it.ptr(), _output->info()->dimension(0) * _output->info()->element_size(),
+                  off_value);
+      Coordinates indices_id(id);
+      indices_id.remove(0);
+      const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
+      if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
+      {
+        Coordinates onehot_id(id);
+        onehot_id.set(0, new_index);
+        std::copy_n(_on_value->buffer(), _output->info()->element_size(),
+                    _output->ptr_to_element(onehot_id));
+      }
+    },
+    output_it);
+}
+
+template <typename U>
+inline void NEOneHotKernel::onehot_n_axis(const Window &window, const ThreadInfo &info)
+{
+  ARM_COMPUTE_UNUSED(info);
+  // Validate that the indices are not negative
+  validate_depth<U>(_depth, _output, _axis);
+  Iterator output_it(_output, window);
+  execute_window_loop(
+    window,
+    [&](const Coordinates &id) {
+      Coordinates indices_id(id);
+      indices_id.remove(_axis);
+      const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
+      if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
+      {
+        Coordinates onehot_id(id);
+        onehot_id.set(_axis, new_index);
+        std::copy_n(static_cast<U>(id[_axis]) == new_index ? _on_value->buffer()
+                                                           : _off_value->buffer(),
+                    _output->info()->element_size(), output_it.ptr());
+      }
+    },
+    output_it);
+}
+
+void NEOneHotKernel::configure(const ITensor *indices, const ITensor *depth,
+                               const ITensor *on_value, const ITensor *off_value, ITensor *output,
+                               int axis)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(indices, depth, on_value, off_value, output);
+  ARM_COMPUTE_ERROR_ON(output->info()->total_size() == 0);
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(indices->info(), depth->info(), on_value->info(),
+                                                off_value->info(), output->info(), axis));
+  _indices = indices;
+  _depth = depth;
+  _on_value = on_value;
+  _off_value = off_value;
+  _output = output;
+  _axis = wrap_around(axis, static_cast<int>(output->info()->num_dimensions()));
+  if (0 == _axis)
+  {
+    switch (_indices->info()->data_type())
+    {
+      case DataType::U32:
+        _func = &NEOneHotKernel::onehot_0_axis<uint32_t>;
+        break;
+      case DataType::S32:
+        _func = &NEOneHotKernel::onehot_0_axis<int32_t>;
+        break;
+      default:
+        ARM_COMPUTE_ERROR("Not supported");
+        break;
+    }
+  }
+  else
+  {
+    switch (_indices->info()->data_type())
+    {
+      case DataType::U32:
+        _func = &NEOneHotKernel::onehot_n_axis<uint32_t>;
+        break;
+      case DataType::S32:
+        _func = &NEOneHotKernel::onehot_n_axis<int32_t>;
+        break;
+      default:
+        ARM_COMPUTE_ERROR("Not supported");
+        break;
+    }
+  }
+  // Create window
+  Window win = calculate_max_window(*output->info(), Steps());
+  output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+  INEKernel::configure(win);
+}
+
+Status NEOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *depth,
+                                const ITensorInfo *on_value, const ITensorInfo *off_value,
+                                const ITensorInfo *output, int axis)
+{
+  ARM_COMPUTE_RETURN_ON_ERROR(
+    validate_arguments(indices, depth, on_value, off_value, output, axis));
+  return Status{};
+}
+
+void NEOneHotKernel::run(const Window &window, const ThreadInfo &info)
+{
+  ARM_COMPUTE_UNUSED(info);
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON(_func == nullptr);
+  (this->*_func)(window, info);
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
index 5841f1d69..420e5063c 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
@@ -42,13 +42,16 @@
 
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "src/core/NEON/NEAsymm.h"
+#include "src/core/NEON/INEKernel.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 
-#include "arm_compute/core/CPP/Validate.h"
+#include "src/core/CPP/Validate.h"
+
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/helpers/WindowHelpers.h"
 
 #include <arm_neon.h>
 
@@ -107,19 +110,15 @@ inline int8x16_t vquantizeSymm(const float32x4x4_t &fv, float scale_factor_inv,
 
   const int32x4x4_t rf = {{
 #ifdef __aarch64__
-      vminq_s32(vposend,
-                vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
-      vminq_s32(vposend,
-                vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
-      vminq_s32(vposend,
-                vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
-      vminq_s32(vposend,
-                vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
+    vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
+    vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
+    vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
+    vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
 #else  //__aarch64__
-      vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
-      vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
-      vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
-      vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
+    vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
+    vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
+    vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
+    vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
 #endif //__aarch64__
   }};
   const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));
@@ -129,7 +128,7 @@ inline int8x16_t vquantizeSymm(const float32x4x4_t &fv, float scale_factor_inv,
 } // namespace
 
 NEQuantizationSymmetricKernel::NEQuantizationSymmetricKernel()
-    : _input(nullptr), _output(nullptr), _scale_factor(nullptr)
+  : _input(nullptr), _output(nullptr), _scale_factor(nullptr)
 {
 }
 
@@ -138,7 +137,7 @@ void NEQuantizationSymmetricKernel::configure(const ITensor *input, ITensor *out
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
   ARM_COMPUTE_ERROR_THROW_ON(
-      validate_arguments(input->info(), output->info(), scale_factor->info()));
+    validate_arguments(input->info(), output->info(), scale_factor->info()));
 
   _input = input;
   _output = output;
@@ -182,40 +181,40 @@ template <typename T> void NEQuantizationSymmetricKernel::quantize(const Window
   const auto dim_x = _input->info()->dimension(0);
   win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
   execute_window_loop(
-      win_collapsed,
-      [&](const Coordinates &id) {
-        const auto start = reinterpret_cast<const T *>(input.ptr());
-        const auto min_max = std::minmax_element(start, start + dim_x);
-        const auto int8_scale = 127;
-        auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second));
-        if (range == 0)
-        {
-          *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = 1;
-          range = 1;
-        }
-        else
-        {
-          *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale;
-        }
-        const auto scale_factor_inv = int8_scale / range;
-
-        auto input_ptr = reinterpret_cast<const T *>(input.ptr());
-        auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
-        int x = window_start_x;
-        for (; x <= (window_end_x - window_step); x += window_step)
-        {
-          wrapper::vstore(&output_ptr[x],
-                          vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale));
-        }
-        // Compute left-over elements
-        for (; x < window_end_x; ++x)
-        {
-          int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy);
-          quantized = std::min(int8_scale, std::max(quantized, -int8_scale));
-          output_ptr[x] = static_cast<int8_t>(quantized);
-        }
-      },
-      input, output);
+    win_collapsed,
+    [&](const Coordinates &id) {
+      const auto start = reinterpret_cast<const T *>(input.ptr());
+      const auto min_max = std::minmax_element(start, start + dim_x);
+      const auto int8_scale = 127;
+      auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second));
+      if (range == 0)
+      {
+        *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = 1;
+        range = 1;
+      }
+      else
+      {
+        *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale;
+      }
+      const auto scale_factor_inv = int8_scale / range;
+
+      auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+      auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+      int x = window_start_x;
+      for (; x <= (window_end_x - window_step); x += window_step)
+      {
+        wrapper::vstore(&output_ptr[x],
+                        vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale));
+      }
+      // Compute left-over elements
+      for (; x < window_end_x; ++x)
+      {
+        int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy);
+        quantized = std::min(int8_scale, std::max(quantized, -int8_scale));
+        output_ptr[x] = static_cast<int8_t>(quantized);
+      }
+    },
+    input, output);
 }
 
 void NEQuantizationSymmetricKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp
deleted file mode 100644
index 3b65eac10..000000000
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp
+++ /dev/null
@@ -1,693 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/NEMath.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace
-{
-// Helper function to calculate the minimum value of the input vector. All the elements in the
-// output vector contain the min value.
-float32x2_t calculate_min(float32x4_t in)
-{
-  auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in));
-  return wrapper::vpmin(pmin, pmin);
-}
-
-// Helper function to calculate the maximum value of the input vector. All the elements in the
-// output vector contain the max value.
-float32x2_t calculate_max(float32x4_t in)
-{
-  auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in));
-  return wrapper::vpmax(pmax, pmax);
-}
-// Helper function to calculate the minimum value of the input vector. All the elements in the
-// output vector contain the min value.
-int32x2_t calculate_min(int32x4_t in)
-{
-  auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in));
-  return wrapper::vpmin(pmin, pmin);
-}
-
-// Helper function to calculate the maximum value of the input vector. All the elements in the
-// output vector contain the max value.
-int32x2_t calculate_max(int32x4_t in)
-{
-  auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in));
-  return wrapper::vpmax(pmax, pmax);
-}
-
-// Helper function to calculate the minimum value of the input vector. All the elements in the
-// output vector contain the min value.
-inline uint8x8_t calculate_min(uint8x16_t in)
-{
-  auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in));
-  pmin = wrapper::vpmin(pmin, pmin);
-  pmin = wrapper::vpmin(pmin, pmin);
-  return wrapper::vpmin(pmin, pmin);
-}
-// Helper function to calculate the maximum value of the input vector. All the elements in the
-// output vector contain the max value.
-inline uint8x8_t calculate_max(uint8x16_t in)
-{
-  auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in));
-  pmax = wrapper::vpmax(pmax, pmax);
-  pmax = wrapper::vpmax(pmax, pmax);
-  return wrapper::vpmax(pmax, pmax);
-}
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-// Helper function to calculate the minimum value of the input vector. All the elements in the
-// output vector contain the min value.
-inline float16x4_t calculate_min(float16x8_t in)
-{
-  auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in));
-  pmin = wrapper::vpmin(pmin, pmin);
-  return wrapper::vpmin(pmin, pmin);
-}
-// Helper function to calculate the maximum value of the input vector. All the elements in the
-// output vector contain the max value.
-inline float16x4_t calculate_max(float16x8_t in)
-{
-  auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in));
-  pmax = wrapper::vpmax(pmax, pmax);
-  return wrapper::vpmax(pmax, pmax);
-}
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-template <class F> class Reducer
-{
-public:
-  static void reduceX(const Window &window, const ITensor *input, ITensor *output, F f,
-                      const ReduceOperation op)
-  {
-    // Set out window
-    Window out_window(window);
-    out_window.set(Window::DimX, Window::Dimension(0, 0, 0));
-
-    // Get first input and output slices
-    Window in_slice = window.first_slice_window_1D();
-    Window out_slice = out_window.first_slice_window_1D();
-
-    do
-    {
-      Iterator in(input, in_slice);
-      Iterator out(output, out_slice);
-
-      f(in, out, in_slice, out_slice, *input->info(), op);
-    } while (window.slide_window_slice_1D(in_slice) && out_window.slide_window_slice_1D(out_slice));
-  }
-  static void reduceY(const Window &window, const ITensor *input, ITensor *output, F f,
-                      const ReduceOperation op)
-  {
-    // Set in window
-    Window in_window(window);
-    Window out_window(window);
-
-    in_window.set(Window::DimY, Window::Dimension(0, 1, 1));
-    out_window.set(Window::DimY, Window::Dimension(0, output->info()->dimension(1),
-                                                   output->info()->dimension(1)));
-
-    // Get first input and output slices
-    Window in_slice = in_window.first_slice_window_2D();
-    Window out_slice = out_window.first_slice_window_2D();
-
-    do
-    {
-      Iterator in(input, in_slice);
-      Iterator out(output, out_slice);
-
-      f(in, out, in_slice, out_slice, *input->info(), 1, op);
-    } while (in_window.slide_window_slice_2D(in_slice) &&
-             out_window.slide_window_slice_2D(out_slice));
-  }
-  static void reduceZ(const Window &window, const ITensor *input, ITensor *output, F f,
-                      const ReduceOperation op)
-  {
-    // Set in window
-    Window in_window(window);
-    Window out_window(window);
-
-    in_window.set(Window::DimZ, Window::Dimension(0, 1, 1));
-    out_window.set(Window::DimZ, Window::Dimension(0, output->info()->dimension(2),
-                                                   output->info()->dimension(2)));
-
-    // Get first input and output slices
-    Window in_slice = in_window.first_slice_window_3D();
-    Window out_slice = out_window.first_slice_window_3D();
-
-    do
-    {
-      Iterator in(input, in_slice);
-      Iterator out(output, out_slice);
-
-      f(in, out, in_slice, out_slice, *input->info(), 2, op);
-    } while (in_window.slide_window_slice_3D(in_slice) &&
-             out_window.slide_window_slice_3D(out_slice));
-  }
-  static void reduceW(const Window &window, const ITensor *input, ITensor *output, F f,
-                      const ReduceOperation op)
-  {
-    // Set in/out window
-    Window in_window(window);
-    Window out_window(window);
-
-    in_window.set(3, Window::Dimension(0, 1, 1));
-    out_window.set(3, Window::Dimension(0, 1, 1));
-
-    // Get first input and output slices
-    Window in_slice = in_window.first_slice_window_4D();
-    Window out_slice = out_window.first_slice_window_4D();
-
-    do
-    {
-      Iterator in(input, in_slice);
-      Iterator out(output, out_slice);
-
-      f(in, out, in_slice, out_slice, *input->info(), 3, op);
-    } while (in_window.slide_window_slice_4D(in_slice) &&
-             out_window.slide_window_slice_4D(out_slice));
-  }
-};
-
-template <typename T, int S> struct RedOpX
-{
-  /** NEON vector tag type. */
-  using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
-
-  inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice,
-                         const TensorInfo &in_info, const ReduceOperation op)
-  {
-    ARM_COMPUTE_UNUSED(out_slice);
-    ARM_COMPUTE_UNUSED(in_info);
-    auto init_res_value = static_cast<T>(0.f);
-    switch (op)
-    {
-      case ReduceOperation::MIN:
-      case ReduceOperation::MAX:
-      {
-        init_res_value = *reinterpret_cast<T *>(input.ptr());
-        break;
-      }
-      default:
-        break;
-    }
-    auto vec_res_value = wrapper::vdup_n(init_res_value, ExactTagType{});
-
-    execute_window_loop(in_slice,
-                        [&](const Coordinates &) {
-                          const auto in_ptr = reinterpret_cast<const T *>(input.ptr());
-                          const auto vec_elements = wrapper::vloadq(in_ptr);
-
-                          switch (op)
-                          {
-                            case ReduceOperation::MIN:
-                            {
-                              vec_res_value = wrapper::vmin(vec_elements, vec_res_value);
-                              break;
-                            }
-                            case ReduceOperation::MAX:
-                            {
-                              vec_res_value = wrapper::vmax(vec_elements, vec_res_value);
-                              break;
-                            }
-                            default:
-                              ARM_COMPUTE_ERROR("Not supported");
-                          }
-                        },
-                        input);
-
-    switch (op)
-    {
-      case ReduceOperation::MIN:
-      {
-        *(reinterpret_cast<T *>(output.ptr())) = wrapper::vgetlane(calculate_min(vec_res_value), 0);
-        break;
-      }
-      case ReduceOperation::MAX:
-      {
-        *(reinterpret_cast<T *>(output.ptr())) = wrapper::vgetlane(calculate_max(vec_res_value), 0);
-        break;
-      }
-      default:
-        ARM_COMPUTE_ERROR("Not supported");
-    }
-  }
-};
-
-struct RedOpX_qasymm8
-{
-  inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice,
-                         const TensorInfo &in_info, const ReduceOperation op)
-  {
-    ARM_COMPUTE_UNUSED(out_slice);
-    ARM_COMPUTE_UNUSED(in_info);
-
-    uint8x16_t vec_res_value = {0};
-
-    if (op == ReduceOperation::MIN || op == ReduceOperation::MAX)
-    {
-      vec_res_value = wrapper::vdup_n(*input.ptr(), wrapper::traits::vector_128_tag{});
-    }
-
-    execute_window_loop(in_slice,
-                        [&](const Coordinates &) {
-                          const auto vec_elements = wrapper::vloadq(input.ptr());
-                          switch (op)
-                          {
-                            case ReduceOperation::MIN:
-                            {
-                              vec_res_value = wrapper::vmin(vec_elements, vec_res_value);
-                              break;
-                            }
-                            case ReduceOperation::MAX:
-                            {
-                              vec_res_value = wrapper::vmax(vec_elements, vec_res_value);
-                              break;
-                            }
-                            default:
-                              ARM_COMPUTE_ERROR("Not supported");
-                          }
-                        },
-                        input);
-
-    switch (op)
-    {
-      case ReduceOperation::MIN:
-      {
-        *(output.ptr()) = static_cast<uint8_t>(wrapper::vgetlane(calculate_min(vec_res_value), 0));
-        break;
-      }
-      case ReduceOperation::MAX:
-      {
-        *(output.ptr()) = static_cast<uint8_t>(wrapper::vgetlane(calculate_max(vec_res_value), 0));
-        break;
-      }
-      default:
-      {
-        ARM_COMPUTE_ERROR("Not supported");
-      }
-    }
-  }
-};
-
-template <typename T, int S> struct RedOpYZW
-{
-  /** NEON vector tag type. */
-  using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
-  using neon_vector = typename wrapper::traits::neon_vector<T, S>::type;
-
-  inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice,
-                         const TensorInfo &in_info, int axis, const ReduceOperation op)
-  {
-    ARM_COMPUTE_UNUSED(out_slice);
-
-    execute_window_loop(
-        in_slice,
-        [&](const Coordinates &) {
-          neon_vector vec_res_value = {0};
-          switch (op)
-          {
-            case ReduceOperation::MIN:
-            case ReduceOperation::MAX:
-            {
-              vec_res_value = wrapper::vloadq(reinterpret_cast<T *>(input.ptr()));
-              break;
-            }
-            default:
-            {
-              vec_res_value = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
-              break;
-            }
-          }
-
-          for (unsigned int dim = 0; dim < in_info.dimension(axis); ++dim)
-          {
-            T *in_ptr;
-            switch (axis)
-            {
-              case 1:
-                in_ptr = reinterpret_cast<T *>(
-                    input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, dim)));
-                break;
-              case 2:
-                in_ptr = reinterpret_cast<T *>(
-                    input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, dim)));
-                break;
-              case 3:
-                in_ptr = reinterpret_cast<T *>(
-                    input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, 0, dim)));
-                break;
-              default:
-                ARM_COMPUTE_ERROR("Not supported");
-            }
-            const auto vec_elements = wrapper::vloadq(in_ptr);
-
-            switch (op)
-            {
-              case ReduceOperation::MIN:
-              {
-                vec_res_value = wrapper::vmin(vec_elements, vec_res_value);
-                break;
-              }
-              case ReduceOperation::MAX:
-              {
-                vec_res_value = wrapper::vmax(vec_elements, vec_res_value);
-                break;
-              }
-              default:
-                ARM_COMPUTE_ERROR("Not supported");
-            }
-          }
-          wrapper::vstore(reinterpret_cast<T *>(output.ptr()), vec_res_value);
-        },
-        input, output);
-  }
-};
-
-struct RedOpYZW_qasymm8
-{
-  inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice,
-                         const TensorInfo &in_info, int axis, const ReduceOperation op)
-  {
-    ARM_COMPUTE_UNUSED(out_slice);
-
-    execute_window_loop(
-        in_slice,
-        [&](const Coordinates &) {
-          auto vec_res_value = wrapper::vloadq(input.ptr());
-
-          for (unsigned int index_dim = 0; index_dim < in_info.dimension(axis); ++index_dim)
-          {
-            uint8_t *in_ptr;
-            switch (axis)
-            {
-              case 1:
-                in_ptr = input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, index_dim));
-                break;
-              case 2:
-                in_ptr =
-                    input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, index_dim));
-                break;
-              case 3:
-                in_ptr =
-                    input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, 0, index_dim));
-                break;
-              default:
-                ARM_COMPUTE_ERROR("Not supported");
-            }
-            const auto vec_elements = wrapper::vloadq(in_ptr);
-
-            switch (op)
-            {
-              case ReduceOperation::MIN:
-              {
-                vec_res_value = wrapper::vmin(vec_elements, vec_res_value);
-                break;
-              }
-              case ReduceOperation::MAX:
-              {
-                vec_res_value = wrapper::vmax(vec_elements, vec_res_value);
-                break;
-              }
-              default:
-                ARM_COMPUTE_ERROR("Not supported");
-            }
-          }
-          wrapper::vstore(reinterpret_cast<uint8_t *>(output.ptr()), vec_res_value);
-        },
-        input, output);
-  }
-};
-
-void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsigned int axis,
-               const ReduceOperation op)
-{
-  const bool is_complex = (input->info()->num_channels() == 2);
-  if (is_complex)
-  {
-    ARM_COMPUTE_ERROR("Not supported");
-  }
-
-  switch (axis)
-  {
-    case 0:
-      switch (input->info()->data_type())
-      {
-        case DataType::QASYMM8:
-          return Reducer<RedOpX_qasymm8>::reduceX(window, input, output, RedOpX_qasymm8(), op);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F16:
-          return Reducer<RedOpX<float16_t, 8>>::reduceX(window, input, output,
-                                                        RedOpX<float16_t, 8>(), op);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F32:
-          return Reducer<RedOpX<float, 4>>::reduceX(window, input, output, RedOpX<float, 4>(), op);
-        case DataType::S32:
-          return Reducer<RedOpX<int32_t, 4>>::reduceX(window, input, output, RedOpX<int32_t, 4>(),
-                                                      op);
-        default:
-          ARM_COMPUTE_ERROR("Not supported");
-      }
-    case 1:
-      switch (input->info()->data_type())
-      {
-        case DataType::QASYMM8:
-          return Reducer<RedOpYZW_qasymm8>::reduceY(window, input, output, RedOpYZW_qasymm8(), op);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F16:
-          return Reducer<RedOpYZW<float16_t, 8>>::reduceY(window, input, output,
-                                                          RedOpYZW<float16_t, 8>(), op);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F32:
-          return Reducer<RedOpYZW<float, 4>>::reduceY(window, input, output, RedOpYZW<float, 4>(),
-                                                      op);
-        case DataType::S32:
-          return Reducer<RedOpYZW<int32_t, 4>>::reduceY(window, input, output,
-                                                        RedOpYZW<int32_t, 4>(), op);
-        default:
-          ARM_COMPUTE_ERROR("Not supported");
-      }
-    case 2:
-      switch (input->info()->data_type())
-      {
-        case DataType::QASYMM8:
-          return Reducer<RedOpYZW_qasymm8>::reduceZ(window, input, output, RedOpYZW_qasymm8(), op);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F16:
-          return Reducer<RedOpYZW<float16_t, 8>>::reduceZ(window, input, output,
-                                                          RedOpYZW<float16_t, 8>(), op);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F32:
-          return Reducer<RedOpYZW<float, 4>>::reduceZ(window, input, output, RedOpYZW<float, 4>(),
-                                                      op);
-        case DataType::S32:
-          return Reducer<RedOpYZW<int32_t, 4>>::reduceZ(window, input, output,
-                                                        RedOpYZW<int32_t, 4>(), op);
-        default:
-          ARM_COMPUTE_ERROR("Not supported");
-      }
-    case 3:
-      switch (input->info()->data_type())
-      {
-        case DataType::QASYMM8:
-          return Reducer<RedOpYZW_qasymm8>::reduceW(window, input, output, RedOpYZW_qasymm8(), op);
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F16:
-          return Reducer<RedOpYZW<float16_t, 8>>::reduceW(window, input, output,
-                                                          RedOpYZW<float16_t, 8>(), op);
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-        case DataType::F32:
-          return Reducer<RedOpYZW<float, 4>>::reduceW(window, input, output, RedOpYZW<float, 4>(),
-                                                      op);
-        case DataType::S32:
-          return Reducer<RedOpYZW<int32_t, 4>>::reduceW(window, input, output,
-                                                        RedOpYZW<int32_t, 4>(), op);
-        default:
-          ARM_COMPUTE_ERROR("Not supported");
-      }
-    default:
-      ARM_COMPUTE_ERROR("Unsupported reduction axis");
-  }
-}
-
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis,
-                          ReduceOperation op)
-{
-  ARM_COMPUTE_UNUSED(op);
-
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-
-  if (input->num_channels() == 1)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32,
-                                                         DataType::F16, DataType::F32);
-  }
-  else
-  {
-    ARM_COMPUTE_RETURN_ERROR_MSG("Not support complex");
-  }
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions,
-                                  "Reduction axis greater than max number of dimensions");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
-
-  if (output->total_size() != 0)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != output->num_channels());
-
-    const TensorShape output_shape =
-        arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis);
-    const TensorInfo tensor_info_reshaped = input->clone()->set_tensor_shape(output_shape);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_reshaped);
-  }
-
-  return Status{};
-}
-
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output,
-                                                         unsigned int axis, ReduceOperation op)
-{
-  ARM_COMPUTE_UNUSED(op);
-
-  // Calculate output shape and set if empty
-  const TensorShape output_shape =
-      arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis);
-
-  // Output auto initialization if not yet initialized
-  DataType output_data_type = input->data_type();
-  auto_init_if_empty(*output, input->clone()
-                                  ->set_tensor_shape(output_shape)
-                                  .set_data_type(output_data_type)
-                                  .reset_padding()
-                                  .set_is_resizable(true));
-
-  unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type());
-
-  // Configure kernel window
-  Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-  AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
-  AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
-  bool window_changed = update_window_and_padding(win, input_access, output_access);
-  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
-
-  Status err = (window_changed)
-                   ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
-                   : Status{};
-
-  return std::make_tuple(err, win);
-}
-} // namespace
-
-NEReductionOperationKernelEx::NEReductionOperationKernelEx()
-    : _input(nullptr), _output(nullptr), _reduction_axis(0), _op(ReduceOperation::MAX),
-      _border_size()
-{
-}
-
-BorderSize NEReductionOperationKernelEx::border_size() const { return _border_size; }
-
-void NEReductionOperationKernelEx::configure(const ITensor *input, ITensor *output,
-                                             unsigned int axis, ReduceOperation op)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op));
-
-  unsigned int num_elems_processed_per_iteration =
-      16 / data_size_from_type(input->info()->data_type());
-
-  _input = input;
-  _output = output;
-  _border_size =
-      (axis == 0)
-          ? BorderSize(0, num_elems_processed_per_iteration -
-                              (input->info()->dimension(0) % num_elems_processed_per_iteration),
-                       0, 0)
-          : BorderSize();
-  _op = op;
-  _reduction_axis = axis;
-
-  // Configure kernel window
-  auto win_config = validate_and_configure_window(_input->info(), _output->info(), axis, op);
-
-  ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
-  INEKernel::configure(std::get<1>(win_config));
-}
-
-Status NEReductionOperationKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                              unsigned int axis, ReduceOperation op)
-{
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op));
-  ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(
-      validate_and_configure_window(input->clone().get(), output->clone().get(), axis, op)));
-
-  return Status{};
-}
-
-void NEReductionOperationKernelEx::run(const Window &window, const ThreadInfo &info)
-{
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
-  reduce_op(window, _input, _output, _reduction_axis, _op);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
new file mode 100644
index 000000000..6b9b0d4b4
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/runtime/Utils.h"
+
+namespace arm_compute
+{
+CLArgMinMaxLayerEx::CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
+  : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(),
+    _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis()
+{
+}
+
+Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const ITensorInfo *output,
+                                    const ReductionOperation &op)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX &&
+                                    op != ReductionOperation::ARG_IDX_MIN,
+                                  "Invalid reduction operation");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast<int>(TensorShape::num_max_dimensions),
+                                  "Reduction axis greater than max number of dimensions");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
+  const unsigned int num_of_stages =
+    utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
+
+  DataType output_data_type = DataType::S32;
+  TensorInfo not_reshaped_output;
+  const auto input_num_channles = input->num_channels();
+  const auto input_qinfo = input->quantization_info();
+
+  if (output->total_size() != 0)
+  {
+    output_data_type = output->data_type();
+    const TensorInfo expected_output_shape =
+      output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape(
+        input->tensor_shape(), axis, false));
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output);
+  }
+
+  auto shape_before_reshape = input->tensor_shape();
+  shape_before_reshape.set(axis, 1);
+  auto initialize_tensorinfo = [](TensorInfo &ti, TensorShape shape, DataType data_type,
+                                  int num_channels, QuantizationInfo qinfo) {
+    ti.set_data_type(data_type)
+      .set_tensor_shape(shape)
+      .set_num_channels(num_channels)
+      .set_quantization_info(qinfo);
+  };
+
+  initialize_tensorinfo(not_reshaped_output, shape_before_reshape, output_data_type,
+                        input_num_channles, input_qinfo);
+
+  if (num_of_stages == 1)
+  {
+    ARM_COMPUTE_RETURN_ON_ERROR(
+      CLArgMinMaxLayerKernelEx::validate(input, nullptr, &not_reshaped_output, axis, op));
+  }
+  else
+  {
+    // Create temporary tensor infos
+    std::vector<TensorInfo> sums_vector(num_of_stages - 1);
+
+    // Create intermediate tensor info
+    TensorShape shape{input->tensor_shape()};
+
+    for (unsigned int i = 0; i < num_of_stages - 1; i++)
+    {
+      shape.set(0, ceil(shape.x() / 128.f));
+      sums_vector[i].set_data_type(input->data_type());
+      sums_vector[i].set_tensor_shape(shape);
+      sums_vector[i].set_num_channels(input->num_channels());
+    }
+
+    // Validate ReductionOperation only on first kernel
+    ARM_COMPUTE_RETURN_ON_ERROR(
+      CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op));
+
+    // Validate ReductionOperation on intermediate stages
+    for (unsigned int i = 1; i < num_of_stages - 1; ++i)
+    {
+      ARM_COMPUTE_RETURN_ON_ERROR(
+        CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1], &sums_vector[i], axis, op));
+    }
+
+    // Validate ReductionOperation on the last stage
+    const unsigned int last_stage = num_of_stages - 1;
+    ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(
+      input, &sums_vector[last_stage - 1], &not_reshaped_output, axis, op));
+  }
+  ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayer::validate(&not_reshaped_output, output));
+  return Status{};
+}
+
+void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor *output,
+                                   const ReductionOperation &op)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+  _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
+  _reduction_axis = axis;
+
+  const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(
+    input->info()->tensor_shape(), axis, false);
+  DataType output_data_type = (output->info()->data_type() == DataType::UNKNOWN)
+                                ? DataType::S32
+                                : output->info()->data_type();
+  auto_init_if_empty(*output->info(), input->info()
+                                        ->clone()
+                                        ->set_tensor_shape(output_shape)
+                                        .set_data_type(output_data_type)
+                                        .reset_padding()
+                                        .set_is_resizable(true));
+
+  // Configure reduction operation kernels
+  _reduction_kernels_vector.resize(_num_of_stages);
+
+  _memory_group.manage(&_not_reshaped_output);
+  // Create temporary tensors
+  if (_num_of_stages == 1)
+  {
+    // Force an early initialization for int64 output type
+    TensorShape output_shape{input->info()->tensor_shape()};
+    output_shape.set(axis, 1);
+    auto_init_if_empty(*_not_reshaped_output.info(), input->info()
+                                                       ->clone()
+                                                       ->set_tensor_shape(output_shape)
+                                                       .set_data_type(output_data_type)
+                                                       .reset_padding()
+                                                       .set_is_resizable(true));
+    _not_reshaped_output.info()->set_tensor_shape(output_shape);
+    _reduction_kernels_vector[0].configure(input, nullptr, &_not_reshaped_output, axis, op);
+  }
+  else
+  {
+    _results_vector.resize(_num_of_stages - 1);
+    TensorShape shape{input->info()->tensor_shape()};
+    for (unsigned int i = 0; i < _num_of_stages - 1; i++)
+    {
+      shape.set(0, ceil(shape.x() / 128.f));
+      _results_vector[i].allocator()->init(
+        input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type));
+    }
+
+    // Apply ReductionOperation only on first kernel
+    _memory_group.manage(&_results_vector[0]);
+    _reduction_kernels_vector[0].configure(input, nullptr, &_results_vector[0], axis, op);
+
+    // Apply ReductionOperation on intermediate stages
+    for (unsigned int i = 1; i < _num_of_stages - 1; ++i)
+    {
+      _memory_group.manage(&_results_vector[i]);
+      _reduction_kernels_vector[i].configure(input, &_results_vector[i - 1], &_results_vector[i],
+                                             axis, op);
+      _results_vector[i - 1].allocator()->allocate();
+    }
+
+    // Apply ReductionOperation on the last stage
+    const unsigned int last_stage = _num_of_stages - 1;
+    _reduction_kernels_vector[last_stage].configure(input, &_results_vector[last_stage - 1],
+                                                    &_not_reshaped_output, axis, op);
+    _results_vector[last_stage - 1].allocator()->allocate();
+  }
+  _reshape_kernel.configure(CLKernelLibrary::get().get_compile_context(), &_not_reshaped_output,
+                            output);
+  _not_reshaped_output.allocator()->allocate();
+}
+
+void CLArgMinMaxLayerEx::run()
+{
+  MemoryGroupResourceScope scope_mg(_memory_group);
+
+  for (unsigned int i = 0; i < _num_of_stages; ++i)
+  {
+    CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
+  }
+  _reshape_kernel.run();
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
index e5122ab8f..31c96b080 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
@@ -42,13 +42,14 @@
 
 #include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
 #include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
 
 using namespace arm_compute;
 
 void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
                                   BinaryLogicalOperation op)
 {
-  auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
+  auto k = std::make_unique<CLBinaryLogicalOpKernel>();
   k->configure(input1, input2, output, op);
   _kernel = std::move(k);
 
@@ -57,7 +58,7 @@ void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTenso
     ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
     if (broadcasted_info->info()->dimension(0) == 1)
     {
-      _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+      _border_handler->configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
     }
   }
 }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp
new file mode 100644
index 000000000..96f9c17a9
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLCastBool.h"
+
+#include "arm_compute/core/CL/kernels/CLCastBoolKernel.h"
+
+using namespace arm_compute;
+
+void CLCastBool::configure(ICLTensor *input, ICLTensor *output)
+{
+  auto k = std::make_unique<CLCastBoolKernel>();
+  k->configure(input, output);
+  _kernel = std::move(k);
+}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
index 3dede0562..464f60dee 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
@@ -45,6 +45,8 @@
 #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 
+#include "src/core/helpers/AutoConfiguration.h"
+
 #include <memory>
 #include <tuple>
 
@@ -53,16 +55,10 @@ namespace arm_compute
 using namespace arm_compute::misc::shape_calculator;
 
 CLDirectTransposeConvLayer::CLDirectTransposeConvLayer(
-    std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
-    : _memory_group(std::move(memory_manager)),
-      _scale_f(),
-      _conv_f(),
-      _flip_weights(),
-      _scaled_output(),
-      _original_weights(nullptr),
-      _weights_flipped(),
-      _flip_axis(),
-      _is_prepared(false)
+  std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+  : _memory_group(std::move(memory_manager)), _scale_f(), _conv_f(), _flip_weights(),
+    _scaled_output(), _original_weights(nullptr), _weights_flipped(), _flip_axis(),
+    _is_prepared(false)
 {
 }
 
@@ -74,7 +70,7 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
 {
   ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
   ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
-      input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
+    input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
   ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
   const DataLayout data_layout = input->data_layout();
 
@@ -86,8 +82,8 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
   ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
 
   auto out_dims = transposeconv_output_dimensions(
-      input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
-      weights->dimension(idx_h), info, invalid_right, invalid_bottom);
+    input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
+    weights->dimension(idx_h), info, invalid_right, invalid_bottom);
 
   const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
 
@@ -117,19 +113,19 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
   unsigned int pad_right = 0;
   unsigned int pad_top = 0;
   unsigned int pad_bottom = 0;
-  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-      *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
-      pad_bottom);
+  const TensorShape scale_out_shape =
+    compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right,
+                                          invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
   TensorInfo scale_out_info(input->clone()
-                                ->set_is_resizable(true)
-                                .reset_padding()
-                                .set_tensor_shape(scale_out_shape)
-                                .set_data_layout(data_layout));
+                              ->set_is_resizable(true)
+                              .reset_padding()
+                              .set_tensor_shape(scale_out_shape)
+                              .set_data_layout(data_layout));
   const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
 
   ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info));
-  ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
-                                                           conv_info, weights_info));
+  ARM_COMPUTE_RETURN_ON_ERROR(
+    CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, weights_info));
 
   return Status{};
 }
@@ -171,22 +167,22 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte
   _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
 
   auto out_dims = transposeconv_output_dimensions(
-      input->info()->dimension(idx_w), input->info()->dimension(idx_h),
-      weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
-      invalid_bottom);
+    input->info()->dimension(idx_w), input->info()->dimension(idx_h),
+    weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+    invalid_bottom);
 
   const TensorShape output_shape =
-      compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+    compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
 
   // Output auto initialization if not yet initialized
   auto_init_if_empty(
-      *output->info(),
-      input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
+    *output->info(),
+    input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
 
   // Perform validation step
   ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate(
-      input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
-      info, invalid_right, invalid_bottom));
+    input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info,
+    invalid_right, invalid_bottom));
 
   _is_prepared = weights_info.retain_internal_weights();
 
@@ -195,8 +191,8 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte
   // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
   // to match output shape
   const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-      *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
-      pad_right, pad_top, pad_bottom);
+    *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+    pad_right, pad_top, pad_bottom);
 
   TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
                             input->info()->quantization_info());
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
index ae9d8afc6..003ec8042 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
@@ -39,7 +39,6 @@
  */
 
 #include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h"
-
 #include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
 
 using namespace arm_compute;
@@ -47,7 +46,7 @@ using namespace arm_compute;
 void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
                                   const ICLTensor *lookups)
 {
-  auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>();
+  auto k = std::make_unique<CLEmbeddingLookupKernel>();
   k->configure(input, output, lookups);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
index 01989461e..af936e873 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
@@ -45,7 +45,6 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
 
 #include <algorithm>
 
@@ -60,7 +59,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
   ARM_COMPUTE_UNUSED(weights);
   ARM_COMPUTE_UNUSED(output);
   ARM_COMPUTE_RETURN_ON_ERROR(
-      CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
+    CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
 
   return Status{};
 }
@@ -68,7 +67,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
 
 void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
 {
-  auto k = support::cpp14::make_unique<CLTransposeKernel>();
+  auto k = std::make_unique<CLTransposeKernel>();
   k->configure(input, output);
   _kernel = std::move(k);
 }
@@ -80,12 +79,12 @@ Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *in
 }
 
 CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer(
-    std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
-      _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
-      _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
-      _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
-      _original_weights(nullptr)
+  std::shared_ptr<IMemoryManager> memory_manager)
+  : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
+    _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
+    _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
+    _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
+    _original_weights(nullptr)
 {
 }
 void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights,
@@ -107,8 +106,8 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
 
   // Perform validate step
   ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate(
-      input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
-      fc_info));
+    input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+    fc_info));
 
   _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
   _accumulate_biases = false;
@@ -140,10 +139,10 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
   bool is_fc_after_conv = false;
   if (is_batched_fc_layer)
   {
-    is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
-                       (std::equal(input->info()->tensor_shape().cbegin() + 3,
-                                   input->info()->tensor_shape().cend(),
-                                   output->info()->tensor_shape().cbegin() + 1));
+    is_fc_after_conv =
+      (TensorShape::num_max_dimensions >= 4) &&
+      (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+                  output->info()->tensor_shape().cbegin() + 1));
   }
   else
   {
@@ -158,28 +157,28 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
   {
     // Reshape the weights
     _reshape_weights_output.allocator()->init(
-        weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
-            compute_transposed_shape(*weights->info())));
+      weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+        compute_transposed_shape(*weights->info())));
     _reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output);
     weights_to_use = &_reshape_weights_output;
   }
 
   // Extract scale factor
   _scale_factor.allocator()->init(
-      TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
+    TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
   _memory_group.manage(&_scale_factor);
   _scale_factor_kernel.configure(input, &_scale_factor);
 
   // Quantize input
   _quantized_input.allocator()->init(
-      input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
-          DataType::QASYMM8_SIGNED));
+    input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+      DataType::QASYMM8_SIGNED));
   _memory_group.manage(&_quantized_input);
   _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
 
   // GEMMLowp
   _gemmlowp_output.allocator()->init(
-      output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+    output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
   _memory_group.manage(&_gemmlowp_output);
   configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output,
                fc_info.retain_internal_weights);
@@ -209,15 +208,15 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
   const GPUTarget gpu_target = CLScheduler::get().target();
 
   const ITensorInfo &reshaped_weights =
-      TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
-          compute_transposed_shape(*weights)));
+    TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+      compute_transposed_shape(*weights)));
 
   // Configure accumulate biases kernel for non quantized asymmetric types
   if (biases != nullptr)
   {
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
     ARM_COMPUTE_RETURN_ON_ERROR(
-        CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
+      CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
   }
 
   // With the Fully Connected layer we can have 4 different cases:
@@ -247,33 +246,32 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
   {
     // Validate reshape weights kernel
     ARM_COMPUTE_RETURN_ON_ERROR(
-        CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+      CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
     weights_to_use = &reshaped_weights;
   }
 
   // Validate Scale factor kernel
   const ITensorInfo &scale_factor =
-      TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
+    TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
   ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
 
   // Validate quantization symm8 kernel
-  const ITensorInfo &quantized_input =
-      TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
-          DataType::QASYMM8_SIGNED));
+  const ITensorInfo &quantized_input = TensorInfo(
+    input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
   ARM_COMPUTE_RETURN_ON_ERROR(
-      CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
+    CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
 
   // Fully Connected layer after a Fully Connected Layer without batches
   ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
 
   // Validate matrix multiply kernel
   const ITensorInfo &gemmlowp_output = TensorInfo(
-      output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+    output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
   ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
 
   // Multiply scale
   ARM_COMPUTE_RETURN_ON_ERROR(
-      CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
+    CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
 
   return Status{};
 }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
index 2ff4b9659..c6a88d340 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
@@ -42,11 +42,11 @@
 
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/Cast.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
+
+#include "support/Cast.h"
 
 #include <algorithm>
 
@@ -79,7 +79,7 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
     int output_multiplier = 0;
     int output_shift = 0;
     ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(
-        multiplier, &output_multiplier, &output_shift));
+      multiplier, &output_multiplier, &output_shift));
 
     // Set the GEMMLowp output stage info
     gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
@@ -99,7 +99,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
 {
   GEMMLowpOutputStageInfo gemmlowp_output_stage;
   ARM_COMPUTE_RETURN_ON_ERROR(
-      construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
+    construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
 
   const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
                                        false, // is_b_reshaped
@@ -125,14 +125,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
 
     // Validate gemmlowp function
     ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(
-        &input.clone()->set_quantization_info(input_quantization_info),
-        &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
-        gemm_info));
+      &input.clone()->set_quantization_info(input_quantization_info),
+      &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
+      gemm_info));
   }
   else
   {
     ARM_COMPUTE_RETURN_ON_ERROR(
-        CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
+      CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
   }
 
   return Status{};
@@ -141,7 +141,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
 
 void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
 {
-  auto k = support::cpp14::make_unique<CLTransposeKernel>();
+  auto k = std::make_unique<CLTransposeKernel>();
   k->configure(input, output);
   _kernel = std::move(k);
 }
@@ -154,12 +154,12 @@ Status CLFullyConnectedLayerReshapeWeightsEx::validate(const ITensorInfo *input,
 
 CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager,
                                                  IWeightsManager *weights_manager)
-    : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
-      _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
-      _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
-      _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
-      _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
-      _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
+  : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
+    _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
+    _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
+    _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
+    _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
+    _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
 {
 }
 void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights,
@@ -190,9 +190,9 @@ void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTens
     const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
 
     input->info()->set_quantization_info(QuantizationInfo(
-        input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+      input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
     weights->info()->set_quantization_info(QuantizationInfo(
-        weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+      weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
 
     // Configure gemmlowp function
     _mm_gemmlowp.configure(input, weights, bias, output, gemm_info);
@@ -214,8 +214,8 @@ void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const IC
                                                 const FullyConnectedLayerInfo &fc_info)
 {
   ARM_COMPUTE_ERROR_ON(
-      (weights->info()->dimension(1) !=
-       (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+    (weights->info()->dimension(1) !=
+     (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
 
   // If the fully connected layer is called after a convolution layer, the input tensor must be
   // linearized
@@ -223,11 +223,11 @@ void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const IC
   // Initialize output tensor for flatten
   TensorShape shape_flatten = compute_flatten_shape(input->info());
   _flatten_output.allocator()->init(input->info()
-                                        ->clone()
-                                        ->set_is_resizable(true)
-                                        .reset_padding()
-                                        .set_tensor_shape(shape_flatten)
-                                        .set_data_layout(DataLayout::NCHW));
+                                      ->clone()
+                                      ->set_is_resizable(true)
+                                      .reset_padding()
+                                      .set_tensor_shape(shape_flatten)
+                                      .set_data_layout(DataLayout::NCHW));
 
   // Configure flatten kernel
   _memory_group.manage(&_flatten_output);
@@ -258,8 +258,8 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
 
   // Perform validate step
   ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate(
-      input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
-      fc_info));
+    input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+    fc_info));
 
   _are_weights_converted = true;
   _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
@@ -285,10 +285,10 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
   const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
   if (is_batched_fc_layer)
   {
-    _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
-                        (std::equal(input->info()->tensor_shape().cbegin() + 3,
-                                    input->info()->tensor_shape().cend(),
-                                    output->info()->tensor_shape().cbegin() + 1));
+    _is_fc_after_conv =
+      (TensorShape::num_max_dimensions >= 4) &&
+      (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+                  output->info()->tensor_shape().cbegin() + 1));
   }
   else
   {
@@ -302,7 +302,7 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
     {
       _reshape_weights_managed_function.configure(weights);
       weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
-          _weights_manager->acquire(weights, &_reshape_weights_managed_function));
+        _weights_manager->acquire(weights, &_reshape_weights_managed_function));
     }
     else
     {
@@ -320,7 +320,7 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
       _convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(),
                                          fc_info.weights_trained_layout);
       weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
-          _weights_manager->acquire(weights, &_convert_weights_managed));
+        _weights_manager->acquire(weights, &_convert_weights_managed));
     }
     else
     {
@@ -359,16 +359,16 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   bool is_fc_after_conv = true;
 
   const ITensorInfo &flatten_input = TensorInfo(input->clone()
-                                                    ->set_is_resizable(true)
-                                                    .reset_padding()
-                                                    .set_tensor_shape(compute_flatten_shape(input))
-                                                    .set_data_layout(DataLayout::NCHW));
+                                                  ->set_is_resizable(true)
+                                                  .reset_padding()
+                                                  .set_tensor_shape(compute_flatten_shape(input))
+                                                  .set_data_layout(DataLayout::NCHW));
   const ITensorInfo &reshaped_weights =
-      TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
-          compute_transposed_shape(*weights)));
+    TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+      compute_transposed_shape(*weights)));
   const ITensorInfo &converted_weights =
-      weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
-                       : TensorInfo(*reshaped_weights.clone());
+    weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+                     : TensorInfo(*reshaped_weights.clone());
 
   // With the Fully Connected layer we can have 4 different cases:
   //  1) Convolution layer -> Fully Connected layer without batches
@@ -396,7 +396,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   {
     // Validate reshape weights kernel
     ARM_COMPUTE_RETURN_ON_ERROR(
-        CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
+      CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
     weights_to_use = &reshaped_weights;
   }
 
@@ -404,7 +404,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   {
     // Validate convert weights kernel
     ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate(
-        weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+      weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
     weights_to_use = &converted_weights;
   }
 
@@ -412,8 +412,8 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   {
     // Fully Connected layer after a Convolution Layer without batches
     ARM_COMPUTE_RETURN_ERROR_ON(
-        (weights_to_use->dimension(1) !=
-         (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+      (weights_to_use->dimension(1) !=
+       (input->dimension(0) * input->dimension(1) * input->dimension(2))));
 
     // Validate flatten kernel
     ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input));
@@ -427,7 +427,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
 
   // Validate matrix multiply kernel
   ARM_COMPUTE_RETURN_ON_ERROR(
-      validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
+    validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
 
   return Status{};
 }
@@ -457,7 +457,7 @@ void CLFullyConnectedLayerEx::run()
       if (_weights_manager && _weights_manager->are_weights_managed(cur_weights))
       {
         _original_weights = utils::cast::polymorphic_downcast<ICLTensor *>(
-            _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
+          _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
       }
       else
       {
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
index 157b4d977..cda784541 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
@@ -19,6 +19,7 @@
 #include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h>
 #include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
 #include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h>
+#include "src/core/helpers/AutoConfiguration.h"
 
 using namespace arm_compute;
 
@@ -41,7 +42,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
     // reshape
     auto_init_if_empty(*_cl_buffer.info(),
                        _input->info()->clone()->set_tensor_shape(reshape).set_data_layout(
-                           _input->info()->data_layout()));
+                         _input->info()->data_layout()));
     _cl_reshape.configure(_input, &_cl_buffer);
     input_to_use = &_cl_buffer;
   }
@@ -57,7 +58,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
     {
       bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
                         input->info()->data_type() == DataType::F16) &&
-                       (weights->info()->data_type() == DataType::S8 ||
+                       (weights->info()->data_type() == DataType::QSYMM8 ||
                         weights->info()->data_type() == DataType::QASYMM8_SIGNED);
 
       if (is_hybrid)
@@ -81,7 +82,6 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
     {
       throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type");
     }
-
   }();
 
   if (_needs_reshape)
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp
new file mode 100644
index 000000000..cd7409417
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "support/StringSupport.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(accum);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+  ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() != 1);
+
+  return Status{};
+}
+
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases, GPUTarget gpu_target,
+                              unsigned int &num_elems_processed_per_iteration)
+{
+  // Select the vector size to use (8 for Bifrost; 16 for Midgard).
+  bool is_gpu_bifrost =
+    gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G76, GPUTarget::G51,
+                     GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::G52, GPUTarget::G52LIT);
+  num_elems_processed_per_iteration = is_gpu_bifrost ? 8 : 16;
+
+  // Configure kernel window
+  Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
+
+  AccessWindowStatic biases_access(
+    biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration),
+    biases->dimension(1));
+  AccessWindowHorizontal accum_access(accum, 0, num_elems_processed_per_iteration);
+
+  bool window_changed = update_window_and_padding(win, biases_access, accum_access);
+
+  Status err = (window_changed)
+                 ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+                 : Status{};
+  return std::make_pair(err, win);
+}
+} // namespace
+
+CLGEMMMatrixAccumulateBiasesKernel::CLGEMMMatrixAccumulateBiasesKernel()
+  : _accum(nullptr), _biases(nullptr)
+{
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), accum, biases);
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(const CLCompileContext &compile_context,
+                                                   ICLTensor *accum, const ICLTensor *biases)
+{
+  ARM_COMPUTE_UNUSED(compile_context);
+  // Perform validate step
+  ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
+
+  _biases = biases;
+  _accum = accum;
+
+  // Get the target gpu
+  GPUTarget gpu_target = get_target();
+  unsigned int vector_size = 0;
+
+  // Configure kernel window
+  auto win_config =
+    validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size);
+  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+  ICLKernel::configure_internal(win_config.second);
+
+  // Add build options
+  CLBuildOptions build_opts;
+  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type()));
+  build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
+
+  // Create kernel
+  _kernel = static_cast<cl::Kernel>(
+    CLKernelLibraryEx::get().create_kernel("gemm_accumulate_biases", build_opts.options()));
+}
+
+Status CLGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum,
+                                                    const ITensorInfo *biases, GPUTarget gpu_target)
+{
+  unsigned int num_elems_processed_per_iteration = 0;
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
+  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(),
+                                                            biases->clone().get(), gpu_target,
+                                                            num_elems_processed_per_iteration)
+                                .first);
+
+  return Status{};
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
+
+  Window accum_slice = window.first_slice_window_2D();
+
+  Window biases_slice(accum_slice);
+  biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+  // Run kernel
+  do
+  {
+    // Set arguments
+    unsigned int idx = 0;
+    add_2D_tensor_argument(idx, _accum, accum_slice);
+    add_1D_tensor_argument(idx, _biases, biases_slice);
+
+    enqueue(queue, *this, accum_slice, lws_hint());
+  } while (window.slide_window_slice_2D(accum_slice));
+}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
index e0b833b04..f380e3e2c 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
@@ -41,6 +41,8 @@
 #include "arm_compute/runtime/CL/functions/CLGatherEx.h"
 
 #include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
+
 #include "arm_compute/core/CL/kernels/CLGatherExKernel.h"
 
 using namespace arm_compute;
@@ -48,7 +50,7 @@ using namespace arm_compute;
 void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output,
                            int axis)
 {
-  auto k = support::cpp14::make_unique<CLGatherExKernel>();
+  auto k = std::make_unique<CLGatherExKernel>();
   k->configure(input, indices, output, axis);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
index 65b89a389..9896abd4b 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
@@ -47,7 +47,7 @@ using namespace arm_compute;
 void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
                                   const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
 {
-  auto k = support::cpp14::make_unique<CLHashtableLookupKernel>();
+  auto k = std::make_unique<CLHashtableLookupKernel>();
   k->configure(lookups, keys, input, output, hits);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
index 5a7e40839..ca45a57f8 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
@@ -50,7 +50,7 @@ CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {}
 void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
                                                ICLTensor *gamma, ICLTensor *beta, float epsilon)
 {
-  auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
+  auto k = std::make_unique<CLInstanceNormalizationLayerKernelEx>();
   k->configure(input, output, gamma, beta, epsilon);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
index 28e5bc0da..2bdc451b3 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
@@ -46,7 +46,7 @@ using namespace arm_compute;
 
 void CLNeg::configure(ICLTensor *input, ICLTensor *output)
 {
-  auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>();
+  auto k = std::make_unique<CLNegKernel>();
   k->configure(input, output);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp
new file mode 100644
index 000000000..759a19ff3
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLOneHot.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+namespace arm_compute
+{
+CLOneHot::CLOneHot() : _memset_kernel(), _onehot_kernel(), _has_to_memset(false) {}
+void CLOneHot::configure(const ICLTensor *indices, const ICLTensor *on_value,
+                         const ICLTensor *off_value, ICLTensor *output, int depth, int axis)
+{
+  _onehot_kernel.configure(indices, on_value, off_value, output, depth, axis);
+}
+void CLOneHot::configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output,
+                         PixelValue off_value, int depth, int axis)
+{
+  _has_to_memset = true;
+  _memset_kernel.configure(output, off_value);
+  _onehot_kernel.configure(indices, on_value, output, depth, axis);
+}
+Status CLOneHot::validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+                          const ITensorInfo *off_value, const ITensorInfo *output, int depth,
+                          int axis)
+{
+  return CLOneHotKernel::validate(indices, on_value, off_value, output, depth, axis);
+}
+void CLOneHot::run()
+{
+  if (_has_to_memset)
+  {
+    CLScheduler::get().enqueue(_memset_kernel, true);
+  }
+
+  CLScheduler::get().enqueue(_onehot_kernel, false);
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
new file mode 100644
index 000000000..4d940e966
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h"
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+
+namespace arm_compute
+{
+CLPadLayerEx::CLPadLayerEx()
+  : _pad_kernel(std::make_unique<CLPadLayerKernelEx>()),
+    _copy_kernel(std::make_unique<opencl::kernels::ClCopyKernel>()), _perform_pad(false)
+{
+}
+
+void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+                             PixelValue constant_value, PaddingMode mode)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value,
+            mode);
+}
+
+void CLPadLayerEx::configure(const CLCompileContext &compile_context, ICLTensor *input,
+                             ICLTensor *output, const PaddingList &padding,
+                             PixelValue constant_value, PaddingMode mode)
+{
+  ARM_COMPUTE_ERROR_THROW_ON(
+    validate(input->info(), output->info(), padding, constant_value, mode));
+
+  _perform_pad = std::any_of(padding.begin(), padding.end(),
+                             [](PaddingInfo info) { return info.first > 0 || info.second > 0; });
+
+  if (_perform_pad)
+  {
+    _pad_kernel->configure(compile_context, input, output, padding, constant_value, mode);
+  }
+  else
+  {
+    Window copy_window = Window();
+    copy_window.use_tensor_dimensions(output->info()->tensor_shape());
+    // Copy the input to the whole output if no padding is applied
+    _copy_kernel->configure(compile_context, input->info(), output->info(), &copy_window);
+  }
+}
+Status CLPadLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+                              const PaddingList &padding, PixelValue constant_value,
+                              PaddingMode mode)
+{
+  bool perform_pad = std::any_of(padding.begin(), padding.end(), [](PaddingInfo info) {
+    return info.first > 0 || info.second > 0;
+  });
+
+  if (perform_pad)
+  {
+    ARM_COMPUTE_RETURN_ON_ERROR(
+      CLPadLayerKernelEx::validate(input, output, padding, constant_value, mode));
+  }
+  else
+  {
+    ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCopyKernel::validate(input, output));
+  }
+  return Status{};
+}
+void CLPadLayerEx::run()
+{
+  if (_perform_pad)
+  {
+    CLScheduler::get().enqueue(*_pad_kernel);
+  }
+  else
+  {
+    CLScheduler::get().enqueue(*_copy_kernel);
+  }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
index b198e7330..6740835a8 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -40,21 +40,20 @@
 
 #include "arm_compute/runtime/CL/functions/CLReduceOperation.h"
 
-#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 
 using namespace arm_compute;
 
 CLReduceOperation::CLReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
-      _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
+  : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
+    _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
 {
 }
 
 Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output,
                                    const std::set<uint32_t> &axis, bool keep_dims,
-                                   const ReduceOperation &op)
+                                   const ReductionOperation &op)
 {
   const size_t num_of_kernels = axis.size();
   const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
@@ -62,7 +61,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
   ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1);
 
   // Create temporary tensor infos
-  auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+  auto interm_tensors = std::make_unique<TensorInfo[]>(num_of_interm_tensors);
 
   // Create intermediate tensor info
   TensorShape shape{input->tensor_shape()};
@@ -92,13 +91,13 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
   for (size_t i = 0; i < num_of_kernels; ++i, ++it)
   {
     ARM_COMPUTE_RETURN_ON_ERROR(
-        CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
+      CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
   }
 
   if (!keep_dims)
   {
     ARM_COMPUTE_RETURN_ON_ERROR(
-        CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
+      CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
   }
 
   return Status{};
@@ -106,7 +105,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
 
 void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
                                   const std::set<uint32_t> &axis, bool keep_dims,
-                                  ReduceOperation op)
+                                  ReductionOperation op)
 {
   ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, keep_dims, op));
 
@@ -125,8 +124,8 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
     throw std::runtime_error("CLReduceOperation: there is no axis to reduce");
   }
 
-  _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
-  _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
+  _interm_tensors = std::make_unique<CLTensor[]>(num_of_interm_tensors);
+  _reduce_kernels = std::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
 
   // Set a vector that is ordered ICLTensors sequentially.
   std::vector<ICLTensor *> tensors;
@@ -137,7 +136,7 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
   }
   tensors.emplace_back(output);
 
-  // Apply ReduceOperation on all kernels
+  // Apply ReductionOperation on all kernels
   TensorShape shape{input->info()->tensor_shape()};
   auto it = axis.begin();
   for (size_t i = 0; i < num_of_kernels; ++i, ++it)
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
new file mode 100644
index 000000000..bca4d5cb6
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLSplitVEx.h"
+#include "support/ToolchainSupport.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include <cassert>
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ICLTensor *size_splits, const std::vector<ICLTensor *> &outputs,
+                          unsigned int num_splits)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(size_splits->info()->num_dimensions() != 1,
+                                  "size_splits must be a 1-D tensor.");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_splits != outputs.size(),
+                                  "Number of output tensors does not match number of splits.");
+  return Status{};
+}
+
+Status validate_slices(const ITensorInfo *input, const std::vector<ITensorInfo *> &outputs,
+                       uint32_t split_dim)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+  ARM_COMPUTE_RETURN_ERROR_ON(split_dim >= input->num_dimensions());
+  ARM_COMPUTE_RETURN_ERROR_ON(outputs.size() < 2);
+
+  // Start/End coordinates
+  Coordinates start_coords;
+  Coordinates end_coords;
+  for (unsigned int d = 0; d < input->num_dimensions(); ++d)
+  {
+    end_coords.set(d, -1);
+  }
+  unsigned int axis_offset = 0;
+  // Validate output tensors
+  for (const auto &output : outputs)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+    // Get output shape
+    const TensorShape output_shape = output->tensor_shape();
+    ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() == 0);
+
+    const size_t axis_split_step = output_shape[split_dim];
+
+    // Output auto inizialitation if not yet initialized
+    TensorInfo tmp_output_info = *output->clone();
+    auto_init_if_empty(tmp_output_info,
+                       input->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
+
+    // Update coordinate on axis
+    start_coords.set(split_dim, axis_offset);
+    end_coords.set(split_dim, axis_offset + axis_split_step);
+
+    ARM_COMPUTE_RETURN_ON_ERROR(CLSlice::validate(input, output, start_coords, end_coords));
+
+    axis_offset += axis_split_step;
+  }
+
+  return Status{};
+}
+
+void configure_slices(const ICLTensor *input, const std::vector<ICLTensor *> &outputs,
+                      std::vector<CLSlice> &_slice_functions, uint32_t split_dim)
+{
+  unsigned int axis_offset = 0;
+  // Start/End coordinates
+  Coordinates start_coords;
+  Coordinates end_coords;
+  for (unsigned int d = 0; d < input->info()->num_dimensions(); ++d)
+  {
+    end_coords.set(d, -1);
+  }
+  int out_iter = 0;
+  for (const auto &output : outputs)
+  {
+    const TensorShape output_shape = output->info()->tensor_shape();
+    auto op_size = output_shape.total_size();
+    if (!op_size)
+    {
+      continue;
+    }
+
+    assert(op_size != 0);
+    assert(split_dim <= output_shape.num_dimensions());
+
+    const size_t axis_split_step = output_shape[split_dim];
+
+    // Output auto inizialitation if not yet initialized
+    TensorInfo tmp_output_info = *output->info()->clone();
+    auto_init_if_empty(
+      tmp_output_info,
+      input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
+
+    // Update coordinate on axis
+    start_coords.set(split_dim, axis_offset);
+    end_coords.set(split_dim, axis_offset + axis_split_step);
+
+    // Configure slice function
+    _slice_functions[out_iter].configure(input, output, start_coords, end_coords);
+
+    // Set valid region from shape
+    outputs[out_iter++]->info()->set_valid_region(ValidRegion(Coordinates(), output_shape));
+    axis_offset += axis_split_step;
+  }
+}
+
+} // namespace
+
+CLSplitVEx::CLSplitVEx()
+  : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions()
+{
+}
+
+void CLSplitVEx::configure(const ICLTensor *input, const ICLTensor *size_splits, uint32_t split_dim,
+                           const std::vector<ICLTensor *> &outputs, unsigned int num_splits)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, size_splits);
+  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(size_splits, outputs, num_splits));
+
+  _input = input;
+  _size_splits = size_splits;
+  _outputs = outputs;
+  _num_splits = num_splits;
+
+  // Create tensor slices
+  _slice_functions.resize(_num_splits);
+
+  // Extract output tensor info
+  std::vector<ITensorInfo *> outputs_info;
+  for (auto &&output : _outputs)
+  {
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+    outputs_info.emplace_back(output->info());
+  }
+
+  // Validate slices
+  ARM_COMPUTE_ERROR_THROW_ON(validate_slices(_input->info(), outputs_info, split_dim));
+
+  // Configure slices
+  configure_slices(_input, _outputs, _slice_functions, split_dim);
+}
+
+void CLSplitVEx::run()
+{
+  // execute the slices
+  for (unsigned i = 0; i < _outputs.size(); ++i)
+  {
+    _slice_functions[i].run();
+  }
+}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
index 3ac95a8e6..accd51302 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
@@ -49,14 +49,14 @@ namespace arm_compute
 {
 
 CLTopKV2::CLTopKV2()
-    : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
-      _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
-      _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
-      _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr),
-      _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
-       _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
-       _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
-       _reorder_negatives_kernel(), _store_kernel()*/
+  : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
+    _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
+    _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
+    _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), _p_out_key_buf(nullptr),
+    _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
+    _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
+    _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
+    _reorder_negatives_kernel(), _store_kernel()*/
 {
 }
 
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
index 3215d01a7..f3f093c18 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -53,7 +53,7 @@ using namespace arm_compute;
 using namespace arm_compute::misc::shape_calculator;
 
 CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_manager(std::move(memory_manager)), _function()
+  : _memory_manager(std::move(memory_manager)), _function()
 {
 }
 
@@ -79,7 +79,7 @@ void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, IC
   {
     case DeconvolutionMethod::DIRECT:
     {
-      auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>();
+      auto f = std::make_unique<CLDirectTransposeConvLayer>();
       f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right,
                    invalid_bottom, weights_info);
       _function = std::move(f);
@@ -87,7 +87,7 @@ void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, IC
     }
     case DeconvolutionMethod::GEMM:
     {
-      auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
+      auto f = std::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
       f->configure(compile_context, input, weights, bias, output, deconv_info);
       _function = std::move(f);
       break;
@@ -105,20 +105,20 @@ Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
 {
   ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
   switch (CLTransposeConvLayer::get_deconvolution_method(
-      input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
+    input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
   {
     case DeconvolutionMethod::DIRECT:
     {
       // Validate direct convolution layer
       ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate(
-          input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
+        input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
       break;
     }
     case DeconvolutionMethod::GEMM:
     {
       // Validate gemm-based convolution layer
       ARM_COMPUTE_RETURN_ON_ERROR(
-          CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
+        CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
       break;
     }
     default:
@@ -130,9 +130,9 @@ Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
 }
 
 DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method(
-    const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
-    ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
-    unsigned int invalid_bottom, const WeightsInfo &weights_info)
+  const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
+  ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
+  unsigned int invalid_bottom, const WeightsInfo &weights_info)
 {
   ARM_COMPUTE_UNUSED(output, bias, weights_info);
 
diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
deleted file mode 100644
index 768c15b41..000000000
--- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CPP/functions/CPPOneHotEx.h"
-
-#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
-#include "support/MemorySupport.h"
-
-using namespace arm_compute;
-
-void CPPOneHotEx::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
-                            const ITensor *off_value, ITensor *output, const int axis)
-{
-  auto k = support::cpp14::make_unique<CPPOneHotKernelEx>();
-  k->configure(indices, depth, on_value, off_value, output, axis);
-  _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
deleted file mode 100644
index 2752eb6aa..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEActivationLayerEx.h"
-
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
-#include "arm_compute/runtime/IRuntimeContext.h"
-#include "support/MemorySupport.h"
-
-namespace arm_compute
-{
-NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT
-    : INESimpleFunctionNoBorder(ctx)
-{
-}
-void NEActivationLayerEx::configure(ITensor *input, ITensor *output,
-                                    ActivationLayerInfo activation_info)
-{
-  auto k = support::cpp14::make_unique<NEActivationLayerKernelEx>();
-  k->configure(input, output, activation_info);
-  _kernel = std::move(k);
-}
-
-Status NEActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                     const ActivationLayerInfo &act_info)
-{
-  return NEActivationLayerKernelEx::validate(input, output, act_info);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
index 2fc94b267..e6b7329d1 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
@@ -38,11 +38,10 @@
  * SOFTWARE.
  */
 
-#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
 #include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
+#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
 
 #include "arm_compute/core/ITensor.h"
-#include "support/MemorySupport.h"
 
 #include <utility>
 
@@ -53,7 +52,7 @@ template <BinaryLogicalOperation COP>
 void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2,
                                                     ITensor *output)
 {
-  auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+  auto k = std::make_unique<NEBinaryLogicalOperationKernel>();
   k->configure(COP, input1, input2, output);
   _kernel = std::move(k);
 }
@@ -69,7 +68,7 @@ Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1,
 void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output,
                                          BinaryLogicalOperation op)
 {
-  auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+  auto k = std::make_unique<NEBinaryLogicalOperationKernel>();
   k->configure(op, input1, input2, output);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp
new file mode 100644
index 000000000..f6eec2603
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NECastBool.h"
+
+#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h"
+
+using namespace arm_compute;
+
+void NECastBool::configure(const ITensor *input, ITensor *output)
+{
+  auto k = std::make_unique<NECastBoolKernel>();
+  k->configure(input, output);
+  _kernel = std::move(k);
+}
+
+Status NECastBool::validate(const ITensorInfo *input, const ITensorInfo *output)
+{
+  return NECastBoolKernel::validate(input, output);
+}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
index e0ab3e025..99fc5c579 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
@@ -41,13 +41,12 @@
 #include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
 
 #include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
-#include "support/MemorySupport.h"
 
 using namespace arm_compute;
 
 void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups)
 {
-  auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>();
+  auto k = std::make_unique<NEEmbeddingLookupKernel>();
   k->configure(input, output, lookups);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
index a123439d9..fbd88fff0 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
@@ -58,7 +58,7 @@ namespace
 Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
 {
   ARM_COMPUTE_RETURN_ON_ERROR(
-      NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
+    NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
 
   return Status{};
 }
@@ -66,7 +66,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
 
 void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
 {
-  auto k = support::cpp14::make_unique<NETransposeKernel>();
+  auto k = std::make_unique<NETransposeKernel>();
   k->configure(input, output);
   _kernel = std::move(k);
 }
@@ -78,11 +78,11 @@ Status NEFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *in
 }
 
 NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer(
-    std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
-      _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
-      _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
-      _accumulate_biases(false), _is_prepared(false)
+  std::shared_ptr<IMemoryManager> memory_manager)
+  : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
+    _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
+    _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
+    _accumulate_biases(false), _is_prepared(false)
 {
 }
 
@@ -103,8 +103,8 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
 
   // Perform validate step
   ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate(
-      input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
-      fc_info));
+    input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+    fc_info));
 
   _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
   _accumulate_biases = false;
@@ -132,10 +132,10 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
   bool _is_fc_after_conv;
   if (is_batched_fc_layer)
   {
-    _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
-                        (std::equal(input->info()->tensor_shape().cbegin() + 3,
-                                    input->info()->tensor_shape().cend(),
-                                    output->info()->tensor_shape().cbegin() + 1));
+    _is_fc_after_conv =
+      (TensorShape::num_max_dimensions >= 4) &&
+      (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+                  output->info()->tensor_shape().cbegin() + 1));
   }
   else
   {
@@ -150,23 +150,23 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
   {
     // Reshape the weights
     _reshape_weights_output.allocator()->init(
-        weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
-            compute_transposed_shape(*weights->info())));
+      weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+        compute_transposed_shape(*weights->info())));
     _reshape_weights_function.configure(weights_to_use, &_reshape_weights_output);
     weights_to_use = &_reshape_weights_output;
   }
 
   // Quantize input
   _quantized_input.allocator()->init(
-      input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
-          DataType::QASYMM8_SIGNED));
+    input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+      DataType::QASYMM8_SIGNED));
   _scale_factor.allocator()->init(
-      TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
+    TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
   _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor);
 
   // GEMM
   _gemmlowp_output.allocator()->init(
-      output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+    output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
   configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output);
 
   // Multiply scale
@@ -195,8 +195,8 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
   bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
 
   const ITensorInfo &reshaped_weights =
-      TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
-          compute_transposed_shape(*weights)));
+    TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+      compute_transposed_shape(*weights)));
 
   // Configure accumulate biases kernel for non quantized asymmetric types
   if (biases != nullptr)
@@ -217,7 +217,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
   {
     // Validate reshape weights kernel
     ARM_COMPUTE_RETURN_ON_ERROR(
-        NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+      NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
     weights_to_use = &reshaped_weights;
   }
 
@@ -225,20 +225,19 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
   ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
 
   // Validate quantization kernel
-  const ITensorInfo &quantized_input =
-      TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
-          DataType::QASYMM8_SIGNED));
+  const ITensorInfo &quantized_input = TensorInfo(
+    input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
   const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32);
   ARM_COMPUTE_RETURN_ON_ERROR(
-      NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
+    NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
 
   const ITensorInfo &gemmlowp_output = TensorInfo(
-      output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+    output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
   // Validate matrix multiply kernel
   ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
 
   ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
-      &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
+    &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
 
   return Status{};
 }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
index cb7557a5a..758f7dc59 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
@@ -50,7 +50,8 @@
 #include <algorithm>
 #include <cmath>
 
-using namespace arm_compute;
+namespace arm_compute
+{
 using namespace arm_compute::misc::shape_calculator;
 
 namespace
@@ -69,14 +70,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
 
     // Validate gemmlowp function
     ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
-        &input.clone()->set_quantization_info(input_quantization_info),
-        &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
+      &input.clone()->set_quantization_info(input_quantization_info),
+      &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
   }
   else
   {
-    ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(
-        &input, &weights, nullptr, &output, 1.f, 0.0f,
-        GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
+    ARM_COMPUTE_RETURN_ON_ERROR(
+      NEGEMM::validate(&input, &weights, nullptr, &output, 1.f, 0.0f,
+                       GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
   }
 
   return Status{};
@@ -84,12 +85,12 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
 } // namespace
 
 NEFullyConnectedLayerEx::NEFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
-      _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
-      _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(),
-      _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr),
-      _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false),
-      _accumulate_biases(false), _is_quantized(false), _is_prepared(false)
+  : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
+    _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
+    _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(), _converted_weights_output(),
+    _reshape_weights_output(), _original_weights(nullptr), _are_weights_converted(true),
+    _are_weights_reshaped(false), _is_fc_after_conv(false), _accumulate_biases(false),
+    _is_quantized(false), _is_prepared(false)
 {
 }
 
@@ -105,9 +106,9 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *
     const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
 
     input->info()->set_quantization_info(QuantizationInfo(
-        input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+      input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
     weights->info()->set_quantization_info(QuantizationInfo(
-        weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+      weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
 
     // Configure gemmlowp function
     _mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -129,8 +130,8 @@ void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITen
                                                 ITensor *output)
 {
   ARM_COMPUTE_ERROR_ON(
-      (weights->info()->dimension(1) !=
-       (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+    (weights->info()->dimension(1) !=
+     (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
 
   // If the fully connected layer is called after a convolution layer, the input tensor must be
   // linearized
@@ -138,8 +139,7 @@ void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITen
   // Initialize output tensor for flatten
   TensorShape shape_flatten = compute_flatten_shape(input->info());
   _flatten_output.allocator()->init(
-      input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
-          shape_flatten));
+    input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
 
   // Configure flatten kernel
   _memory_group.manage(&_flatten_output);
@@ -165,12 +165,11 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
                                         const ITensor *biases, ITensor *output,
                                         FullyConnectedLayerInfo fc_info)
 {
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
   // Perform validate step
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
   ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate(
-      input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
-      fc_info));
+    input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+    fc_info));
 
   _are_weights_converted = true;
   _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
@@ -183,8 +182,7 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
   if (_is_quantized)
   {
     _gemmlowp_output.allocator()->init(
-        output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
-            DataType::S32));
+      output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
   }
 
   // Configure accumulate biases kernel for non quantized asymmetric types
@@ -208,10 +206,10 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
   const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
   if (is_batched_fc_layer)
   {
-    _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
-                        (std::equal(input->info()->tensor_shape().cbegin() + 3,
-                                    input->info()->tensor_shape().cend(),
-                                    output->info()->tensor_shape().cbegin() + 1));
+    _is_fc_after_conv =
+      (TensorShape::num_max_dimensions >= 4) &&
+      (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+                  output->info()->tensor_shape().cbegin() + 1));
   }
   else
   {
@@ -284,16 +282,16 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
 
   const ITensorInfo &flatten_input =
-      TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
-          compute_flatten_shape(input)));
+    TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+      compute_flatten_shape(input)));
   const ITensorInfo &reshaped_weights =
-      TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
-          compute_transposed_shape(*weights)));
+    TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+      compute_transposed_shape(*weights)));
   const ITensorInfo &converted_weights =
-      weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
-                       : TensorInfo(*reshaped_weights.clone());
+    weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+                     : TensorInfo(*reshaped_weights.clone());
   const ITensorInfo &gemmlowp_output = TensorInfo(
-      output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+    output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
 
   // Configure accumulate biases kernel for non quantized asymmetric types
   if (biases != nullptr && !is_quantized)
@@ -330,7 +328,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   {
     // Validate reshape weights kernel
     ARM_COMPUTE_RETURN_ON_ERROR(
-        NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
+      NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
     weights_to_use = &reshaped_weights;
   }
 
@@ -338,7 +336,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   {
     // Validate convert weights kernel
     ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(
-        weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+      weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
     weights_to_use = &converted_weights;
   }
 
@@ -346,11 +344,11 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   {
     // Fully Connected layer after a Convolution Layer without batches
     ARM_COMPUTE_RETURN_ERROR_ON(
-        (weights_to_use->dimension(1) !=
-         (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+      (weights_to_use->dimension(1) !=
+       (input->dimension(0) * input->dimension(1) * input->dimension(2))));
 
     // Validate flatten kernel
-    ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
+    ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
     input_to_use = &flatten_input;
   }
   else
@@ -365,7 +363,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
   if (is_quantized)
   {
     ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(
-        &gemmlowp_output, biases, output));
+      &gemmlowp_output, biases, output));
   }
 
   return Status{};
@@ -376,9 +374,13 @@ void NEFullyConnectedLayerEx::run()
   if (!_is_prepared)
   {
     if (!_are_weights_reshaped)
+    {
       _reshape_weights_output.allocator()->allocate();
+    }
     if (!_are_weights_converted)
+    {
       _converted_weights_output.allocator()->allocate();
+    }
     _is_prepared = true;
   }
 
@@ -409,7 +411,7 @@ void NEFullyConnectedLayerEx::run()
   // Linearize input if it comes from a convolutional layer
   if (_is_fc_after_conv)
   {
-    NEScheduler::get().schedule(&_flatten_kernel, Window::DimY);
+    _flatten_kernel.run();
   }
 
   // Run matrix multiply
@@ -492,3 +494,4 @@ void NEFullyConnectedLayerEx::prepare()
   }
 #endif
 }
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
index dc6c78478..2199839fb 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
@@ -19,6 +19,8 @@
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h>
+#include "src/core/helpers/AutoConfiguration.h"
+#include <cassert>
 
 using namespace arm_compute;
 
@@ -56,7 +58,7 @@ void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input
       assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
 
       bool is_hybrid = input->info()->data_type() == DataType::F32 &&
-                       (weights->info()->data_type() == DataType::S8 ||
+                       (weights->info()->data_type() == DataType::QSYMM8 ||
                         weights->info()->data_type() == DataType::QASYMM8_SIGNED);
 
       if (is_hybrid)
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
index 433c35d58..e5607ab9a 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
@@ -41,7 +41,6 @@
 #include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
 
 #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
-#include "support/MemorySupport.h"
 
 #include <utility>
 
@@ -49,7 +48,7 @@ namespace arm_compute
 {
 void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis)
 {
-  auto k = support::cpp14::make_unique<NEGatherKernelEx>();
+  auto k = std::make_unique<NEGatherKernelEx>();
   k->configure(input, indices, output, axis);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
index 52d58accf..7cc6c89e7 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
@@ -41,14 +41,13 @@
 #include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
 
 #include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
-#include "support/MemorySupport.h"
 
 using namespace arm_compute;
 
 void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input,
                                   ITensor *output, ITensor *hits)
 {
-  auto k = support::cpp14::make_unique<NEHashtableLookupKernel>();
+  auto k = std::make_unique<NEHashtableLookupKernel>();
   k->configure(lookups, keys, input, output, hits);
   _kernel = std::move(k);
 }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
index 16d74e62d..451aa0997 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
@@ -46,9 +46,9 @@
 namespace arm_compute
 {
 NEInstanceNormalizationLayerEx::NEInstanceNormalizationLayerEx(
-    std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
-      _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
+  std::shared_ptr<IMemoryManager> memory_manager)
+  : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
+    _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
 {
 }
 
@@ -88,8 +88,8 @@ Status NEInstanceNormalizationLayerEx::validate(const ITensorInfo *input, const
                                                 float epsilon)
 {
   return NEInstanceNormalizationLayerKernelEx::validate(
-      &input->clone()->set_data_layout(DataLayout::NCHW),
-      &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
+    &input->clone()->set_data_layout(DataLayout::NCHW),
+    &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
 }
 
 void NEInstanceNormalizationLayerEx::run()
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp
new file mode 100644
index 000000000..e0620bad2
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEOneHot.h"
+#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h"
+
+#include <utility>
+namespace arm_compute
+{
+void NEOneHot::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
+                         const ITensor *off_value, ITensor *output, int axis)
+{
+  auto k = std::make_unique<NEOneHotKernel>();
+  k->configure(indices, depth, on_value, off_value, output, axis);
+  _kernel = std::move(k);
+}
+Status NEOneHot::validate(const ITensorInfo *indices, const ITensorInfo *depth,
+                          const ITensorInfo *on_value, const ITensorInfo *off_value,
+                          const ITensorInfo *output, int axis)
+{
+  return NEOneHotKernel::validate(indices, depth, on_value, off_value, output, axis);
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
index aedb537e9..a30c00ea1 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
@@ -40,22 +40,24 @@
 
 #include "arm_compute/runtime/NEON/functions/NEReduceOperation.h"
 
-#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
 
 using namespace arm_compute;
 
 NEReduceOperation::NEReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
-      _reduction_ops(), _keep_dims()
+  : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
+    _reduction_ops(), _keep_dims()
 {
 }
 
 Status NEReduceOperation::validate(const ITensorInfo *input, const Coordinates &reduction_axis,
-                                   bool keep_dims, const ITensorInfo *output, ReduceOperation op)
+                                   bool keep_dims, const ITensorInfo *output, ReductionOperation op)
 {
   ARM_COMPUTE_UNUSED(keep_dims);
   ARM_COMPUTE_UNUSED(op);
@@ -102,7 +104,7 @@ Status NEReduceOperation::validate(const ITensorInfo *input, const Coordinates &
 }
 
 void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
-                                  ITensor *output, ReduceOperation op)
+                                  ITensor *output, ReductionOperation op)
 {
   ARM_COMPUTE_ERROR_ON_NULLPTR(input);
 
@@ -125,7 +127,7 @@ void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_a
   for (unsigned int i = 0; i < _reduction_ops; ++i)
   {
     TensorShape out_shape =
-        i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+      i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
     out_shape.set(axis_local[i], 1);
     auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
 
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
index 26a887912..7a1342644 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
@@ -40,15 +40,19 @@
 
 #include "arm_compute/runtime/NEON/functions/NEReduceSum.h"
 
-#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
 
 using namespace arm_compute;
 
 NEReduceSum::NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
-      _reduction_ops(), _keep_dims()
+  : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
+    _reduction_ops(), _keep_dims()
 {
 }
 
@@ -122,7 +126,7 @@ void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, b
   for (unsigned int i = 0; i < _reduction_ops; ++i)
   {
     TensorShape out_shape =
-        i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+      i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
     out_shape.set(axis_local[i], 1);
     auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
 
@@ -135,7 +139,7 @@ void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, b
       _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(),
                                                     input->info()->data_type(),
                                                     input->info()->quantization_info())
-                                             .set_data_layout(input->info()->data_layout()));
+                                           .set_data_layout(input->info()->data_layout()));
       _memory_group.manage(&_reduced_outs[i]);
       _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i],
                                       ReductionOperation::SUM);
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
deleted file mode 100644
index 2aa0d2d4b..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-namespace
-{
-/** Define dimension to split the window
- *
- * @param[in] axis Reduction axis
- *
- * @return The dimension to split the window
- */
-size_t reduction_window_split_dimension(unsigned int axis)
-{
-  switch (axis)
-  {
-    case 0:
-      return Window::DimY;
-    case 1:
-    case 2:
-    case 3:
-      return Window::DimX;
-    default:
-      ARM_COMPUTE_ERROR("Unsupported reduction axis");
-  }
-}
-} // namespace
-
-NEReductionOperationEx::NEReductionOperationEx()
-    : _reduction_kernel(), _fill_border_kernel(), _window_split(0), _reduction_axis()
-{
-}
-
-Status NEReductionOperationEx::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                        unsigned int axis, ReduceOperation op)
-{
-  ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperationKernelEx::validate(input, output, axis, op));
-
-  return Status{};
-}
-
-void NEReductionOperationEx::configure(ITensor *input, ITensor *output, unsigned int axis,
-                                       ReduceOperation op)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_ERROR_THROW_ON(
-      NEReductionOperationEx::validate(input->info(), output->info(), axis, op));
-
-  // Configure reduction kernel
-  _reduction_kernel.configure(input, output, axis, op);
-  _window_split = reduction_window_split_dimension(axis);
-  _reduction_axis = axis;
-
-  if (axis == 0)
-  {
-    // Configure fill border kernel
-    const BorderSize fill_border_size = _reduction_kernel.border_size();
-    PixelValue pixelValue;
-    switch (op)
-    {
-      case ReduceOperation::MIN:
-      {
-        switch (input->info()->data_type())
-        {
-          case DataType::F32:
-          {
-            pixelValue = PixelValue(std::numeric_limits<float>::max());
-            break;
-          }
-          case DataType::F16:
-          {
-            pixelValue = PixelValue(static_cast<half>(65504.0f));
-            break;
-          }
-          case DataType::QASYMM8:
-          {
-            pixelValue =
-                PixelValue(255, input->info()->data_type(), input->info()->quantization_info());
-            break;
-          }
-          default:
-          {
-            ARM_COMPUTE_ERROR("Unsupported DataType");
-          }
-        }
-        break;
-      }
-      case ReduceOperation::MAX:
-      {
-        switch (input->info()->data_type())
-        {
-          case DataType::F32:
-          {
-            pixelValue = PixelValue(-std::numeric_limits<float>::max());
-            break;
-          }
-          case DataType::F16:
-          {
-            pixelValue = PixelValue(static_cast<half>(-65504.0f));
-            break;
-          }
-          case DataType::QASYMM8:
-          {
-            pixelValue =
-                PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
-            break;
-          }
-          default:
-          {
-            ARM_COMPUTE_ERROR("Unsupported DataType");
-          }
-        }
-        break;
-      }
-      default:
-        ARM_COMPUTE_ERROR("Reduction Operation unsupported");
-    }
-    _fill_border_kernel.configure(input, fill_border_size, BorderMode::CONSTANT, pixelValue);
-  }
-}
-
-void NEReductionOperationEx::run()
-{
-  if (_reduction_axis == 0)
-  {
-    NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY);
-  }
-  NEScheduler::get().schedule(&_reduction_kernel, _window_split);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
index aa165cc15..4675121b2 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -44,6 +44,7 @@
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
 
 using namespace arm_compute::misc::shape_calculator;
 
@@ -51,17 +52,9 @@ namespace arm_compute
 {
 
 NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
-    : _memory_group(std::move(memory_manager)),
-      _conv_f(),
-      _upsample_f(),
-      _flip_weights(),
-      _scaled_output(),
-      _weights_flipped(),
-      _flip_axis(),
-      _original_weights(nullptr),
-      _input(nullptr),
-      _info(),
-      _is_prepared(false)
+  : _memory_group(std::move(memory_manager)), _conv_f(), _upsample_f(), _flip_weights(),
+    _scaled_output(), _weights_flipped(), _flip_axis(), _original_weights(nullptr), _input(nullptr),
+    _info(), _is_prepared(false)
 {
 }
 
@@ -76,15 +69,15 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
   ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
   ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
   const unsigned int width_idx =
-      get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
+    get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
   const unsigned int height_idx =
-      get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
+    get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
   ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx));
   ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1);
 
   auto out_dims = transposeconv_output_dimensions(
-      input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
-      weights->dimension(height_idx), info, invalid_right, invalid_bottom);
+    input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
+    weights->dimension(height_idx), info, invalid_right, invalid_bottom);
 
   ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
   if (bias != nullptr)
@@ -117,24 +110,24 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
   unsigned int pad_right = 0;
   unsigned int pad_top = 0;
   unsigned int pad_bottom = 0;
-  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-      *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
-      pad_bottom);
+  const TensorShape scale_out_shape =
+    compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right,
+                                          invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
   TensorInfo scale_out_info(
-      input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
+    input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
   const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
 
   const unsigned int batches_idx =
-      get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
+    get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
   const unsigned int channel_idx =
-      get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
+    get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
   ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) !=
                               scale_out_info.dimension(batches_idx));
   ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) !=
                               scale_out_info.dimension(channel_idx));
 
-  ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output,
-                                                           conv_info, WeightsInfo()));
+  ARM_COMPUTE_RETURN_ON_ERROR(
+    NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo()));
 
   return Status{};
 }
@@ -146,21 +139,21 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
   // Perform validation step
   ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
   ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
-      input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
-      info, invalid_right, invalid_bottom));
+    input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
+    info, invalid_right, invalid_bottom));
 
   const DataLayout data_layout = input->info()->data_layout();
   const unsigned int width_idx =
-      get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+    get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
   const unsigned int height_idx =
-      get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+    get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
   auto out_dims = transposeconv_output_dimensions(
-      input->info()->dimension(width_idx), input->info()->dimension(height_idx),
-      weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
-      invalid_right, invalid_bottom);
+    input->info()->dimension(width_idx), input->info()->dimension(height_idx),
+    weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
+    invalid_right, invalid_bottom);
 
   const TensorShape output_shape =
-      compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+    compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
 
   _input = input;
   _original_weights = weights;
@@ -188,8 +181,8 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
   const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
 
   const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-      *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
-      pad_right, pad_top, pad_bottom);
+    *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+    pad_right, pad_top, pad_bottom);
 
   const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
                                     DimensionRoundingType::FLOOR);
diff --git a/compute/cker/CMakeLists.txt b/compute/cker/CMakeLists.txt
index 609dd45a3..d464dccae 100644
--- a/compute/cker/CMakeLists.txt
+++ b/compute/cker/CMakeLists.txt
@@ -8,15 +8,33 @@ target_link_libraries(nnfw_lib_cker INTERFACE gemmlowp)
 target_link_libraries(nnfw_lib_cker INTERFACE ruy)
 target_link_libraries(nnfw_lib_cker INTERFACE ruy_instrumentation)
 target_compile_definitions(nnfw_lib_cker INTERFACE USE_RUY_GEMV)
-if(EXPERIMENTAL_RUY_FEATURE)
-  target_compile_definitions(nnfw_lib_cker INTERFACE EXPERIMENTAL_RUY_FEATURE)
-endif(EXPERIMENTAL_RUY_FEATURE)
 if(PROFILE_RUY)
   target_link_libraries(nnfw_lib_cker INTERFACE ruy_profiler)
 endif(PROFILE_RUY)
 
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
+  target_compile_definitions(nnfw_lib_cker INTERFACE CKER_X86_PLATFORM)
+endif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
+
 target_include_directories(nnfw_lib_cker INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
 
 # Workaround to avoid warning
 # TODO Resolve warning
 target_compile_options(nnfw_lib_cker INTERFACE -Wno-attributes)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+set(TEST_CKER test_cker)
+
+file(GLOB_RECURSE TESTS "src/*.test.cc")
+
+add_executable(${TEST_CKER} ${TESTS})
+
+target_link_libraries(${TEST_CKER} nnfw_lib_cker)
+target_link_libraries(${TEST_CKER} nnfw_coverage)
+target_link_libraries(${TEST_CKER} gtest gtest_main ${LIB_PTHREAD})
+
+add_test(${TEST_CKER} ${TEST_CKER})
+install(TARGETS ${TEST_CKER} DESTINATION unittest)
diff --git a/compute/cker/include/cker/CpuBackendThreadpool.h b/compute/cker/include/cker/CpuBackendThreadpool.h
new file mode 100644
index 000000000..8ec6140bd
--- /dev/null
+++ b/compute/cker/include/cker/CpuBackendThreadpool.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_
+#define __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_
+
+#include <ruy/context.h>     // from @ruy
+#include <ruy/thread_pool.h> // from @ruy
+
+#include <stdexcept>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace cpu_backend_threadpool
+{
+
+using Task = ruy::Task;
+
+template <typename TaskType>
+void Execute(int tasks_count, TaskType *tasks, ruy::Context *ruy_context)
+{
+  assert(ruy_context != nullptr);
+  assert(tasks_count <= ruy_context->max_num_threads());
+  if (ruy_context == nullptr)
+  {
+    throw std::runtime_error("CpuBackendThreadpool.h: ruy::Context is null");
+  }
+  ruy_context->mutable_thread_pool()->Execute(tasks_count, tasks);
+}
+
+} // namespace cpu_backend_threadpool
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_
diff --git a/compute/cker/include/cker/NeonTensorUtils.h b/compute/cker/include/cker/NeonTensorUtils.h
index 246fd9a46..45ad969c3 100644
--- a/compute/cker/include/cker/NeonTensorUtils.h
+++ b/compute/cker/include/cker/NeonTensorUtils.h
@@ -20,11 +20,13 @@
 
 #include <ruy/path.h>
 #include <ruy/ruy.h>
-#include <ruy/detect_arm.h>
 #include "cker/Types.h"
 #include "cker/neon/neon_check.h"
 #include "cker/ruy/RuySupport.h"
 #include "util/logging.h"
+#if defined __linux__ && defined __aarch64__
+#include <sys/auxv.h>
+#endif
 
 #include <cassert>
 #include <cmath>
@@ -41,6 +43,8 @@ namespace cker
 namespace
 {
 
+constexpr int kFloatValuesPerNeonVector = 4;
+
 // TODO(ahentz): Clean up.
 using int8 = std::int8_t;
 using uint8 = std::uint8_t;
@@ -49,6 +53,11 @@ using uint16 = std::uint16_t;
 using int32 = std::int32_t;
 using uint32 = std::uint32_t;
 
+template <int PerNeonSize> inline int RoundDownVectors(int size)
+{
+  return size & ~(PerNeonSize - 1);
+}
+
 // Allocates, at least, size bytes of uninitialized storage whose alignment is
 // specified by alignment. The size parameter must be an integral multiple of
 // alignment.
@@ -73,14 +82,37 @@ inline int32_t AccumulateNeonLane(const int32x4_t lane)
 
 } // namespace
 
-#ifdef __aarch64__
+// The implementation of dotprod detection is copied from ruy's internal
+// function DetectDotprod().
+// At the moment it's only implemented on Linux ARM64. Consider syncing again
+// with ruy in the future to share improvements.
+#if defined __linux__ && defined __aarch64__
+inline bool DetectDotprodByLinuxAuxvMethod()
+{
+  // This is the value of HWCAP_ASIMDDP in sufficiently recent Linux headers,
+  // however we need to support building against older headers for the time
+  // being.
+  const int kLocalHwcapAsimddp = 1 << 20;
+  return getauxval(AT_HWCAP) & kLocalHwcapAsimddp;
+}
+#endif
+
+inline bool DetectArmNeonDotprod()
+{
+#if defined __linux__ && defined __aarch64__
+  return DetectDotprodByLinuxAuxvMethod();
+#endif
 
-bool HasSdotInstruction()
+  return false;
+}
+
+inline bool HasSdotInstruction()
 {
-  static const bool has_dotprod = ruy::DetectDotprod();
+  static const bool has_dotprod = DetectArmNeonDotprod();
   return has_dotprod;
 }
 
+#ifdef __aarch64__
 // We interleave vector data to make the dot product logic more efficient.
 // Suppose that vectors is:
 //     a0 a1 a2 a3 a4 a5 ...
@@ -93,13 +125,13 @@ bool HasSdotInstruction()
 //     e0 e1 e2 e3 f0 f1 f2 f3 ...
 // Once the data is interleaved, each 16-byte read from the vectors pointer
 // contains 4 bytes from each of 4 vectors.
-const int8_t *ShuffleVectors(const int8_t *vectors, const int n_batch, const int m_cols,
-                             void **shuffled_vectors_free)
+inline const int8_t *ShuffleVectors(const int8_t *vectors, const int n_batch, const int m_cols,
+                                    void **shuffled_vectors_free)
 {
   const int kWeightsPerUint32 = 4;
 
   int8 *shuffled_vectors = reinterpret_cast<int8 *>(
-      aligned_alloc(kWeightsPerUint32, n_batch * m_cols, shuffled_vectors_free));
+    aligned_alloc(kWeightsPerUint32, n_batch * m_cols, shuffled_vectors_free));
 
   for (int i = 0; i < n_batch; i += 4)
   {
@@ -113,25 +145,25 @@ const int8_t *ShuffleVectors(const int8_t *vectors, const int n_batch, const int
     while (unshuffled_vec0_ptr != end_vec0_ptr)
     {
       asm volatile(
-          // This code path requires that (n_cols % 16) == 0 so we can safely
-          // read in 16-byte chunks from each row.
-          "ld1 {v0.16b}, [%[unshuffled_vec0_ptr]], #16\n"
-          "ld1 {v1.16b}, [%[unshuffled_vec1_ptr]], #16\n"
-          "ld1 {v2.16b}, [%[unshuffled_vec2_ptr]], #16\n"
-          "ld1 {v3.16b}, [%[unshuffled_vec3_ptr]], #16\n"
-
-          "st4 {v0.s, v1.s, v2.s, v3.s}[0], [%[shuffled_vectors_ptr]], #16\n"
-          "st4 {v0.s, v1.s, v2.s, v3.s}[1], [%[shuffled_vectors_ptr]], #16\n"
-          "st4 {v0.s, v1.s, v2.s, v3.s}[2], [%[shuffled_vectors_ptr]], #16\n"
-          "st4 {v0.s, v1.s, v2.s, v3.s}[3], [%[shuffled_vectors_ptr]], #16\n"
-
-          : [unshuffled_vec0_ptr] "+r"(unshuffled_vec0_ptr),
-            [unshuffled_vec1_ptr] "+r"(unshuffled_vec1_ptr),
-            [unshuffled_vec2_ptr] "+r"(unshuffled_vec2_ptr),
-            [unshuffled_vec3_ptr] "+r"(unshuffled_vec3_ptr),
-            [shuffled_vectors_ptr] "+r"(shuffled_vectors_ptr)
-          :
-          : "v0", "v1", "v2", "v3", "cc", "memory");
+        // This code path requires that (n_cols % 16) == 0 so we can safely
+        // read in 16-byte chunks from each row.
+        "ld1 {v0.16b}, [%[unshuffled_vec0_ptr]], #16\n"
+        "ld1 {v1.16b}, [%[unshuffled_vec1_ptr]], #16\n"
+        "ld1 {v2.16b}, [%[unshuffled_vec2_ptr]], #16\n"
+        "ld1 {v3.16b}, [%[unshuffled_vec3_ptr]], #16\n"
+
+        "st4 {v0.s, v1.s, v2.s, v3.s}[0], [%[shuffled_vectors_ptr]], #16\n"
+        "st4 {v0.s, v1.s, v2.s, v3.s}[1], [%[shuffled_vectors_ptr]], #16\n"
+        "st4 {v0.s, v1.s, v2.s, v3.s}[2], [%[shuffled_vectors_ptr]], #16\n"
+        "st4 {v0.s, v1.s, v2.s, v3.s}[3], [%[shuffled_vectors_ptr]], #16\n"
+
+        : [ unshuffled_vec0_ptr ] "+r"(unshuffled_vec0_ptr),
+          [ unshuffled_vec1_ptr ] "+r"(unshuffled_vec1_ptr),
+          [ unshuffled_vec2_ptr ] "+r"(unshuffled_vec2_ptr),
+          [ unshuffled_vec3_ptr ] "+r"(unshuffled_vec3_ptr),
+          [ shuffled_vectors_ptr ] "+r"(shuffled_vectors_ptr)
+        :
+        : "v0", "v1", "v2", "v3", "cc", "memory");
     }
   }
 
@@ -172,104 +204,104 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate(const int8_t *__restr
       const int8 *mat_ptr3 = matrix + ((row + 3) * m_cols);
 
       asm volatile(
-          // Zero out the accumulator registers.
-          "dup v0.4s, wzr\n"
-          "dup v1.4s, wzr\n"
-          "dup v2.4s, wzr\n"
-          "dup v3.4s, wzr\n"
-
-          "1:\n" // batch_cols_loop
-
-          // Read 16 more bytes from a pair of matrix rows.
-          "ld1 {v12.16b}, [%[mat_ptr0]], #16\n"
-
-          // Prefetch two rows ahead.
-          "prfm pldl1strm, [%[mat_ptr2]]\n"
-          "prfm pldl1strm, [%[mat_ptr3]]\n"
-
-          // Read from input vectors 4 times; 64 bytes total.
-          // Each 16-byte register contains parts of 4 vectors; see the
-          // shuffle logic above.
-
-          // From Benoit, places to look in the future:
-          // - Move load instructions further from sdot
-          // - Switch loop use-then-reload
-          // - Do partial unrolling to use register space better
-          "ld1 {v8.16b}, [%[vec_ptr]], #16\n"
-          ".word 0x4f8ce100  // sdot v0.4s, v8.16b, v12.4b[0]\n"
-          "ld1 {v9.16b}, [%[vec_ptr]], #16\n"
-          ".word 0x4face121  // sdot v1.4s, v9.16b, v12.4b[1]\n"
-          "ld1 {v10.16b}, [%[vec_ptr]], #16\n"
-          ".word 0x4f8ce940  // sdot v0.4s, v10.16b, v12.4b[2]\n"
-          "ld1 {v11.16b}, [%[vec_ptr]], #16\n"
-          ".word 0x4face961  // sdot v1.4s, v11.16b, v12.4b[3]\n"
-
-          // Update prefetch pointers.
-          "add %[mat_ptr2], %[mat_ptr2], #16\n"
-          "add %[mat_ptr3], %[mat_ptr3], #16\n"
-
-          // Re-use those vectors for the next row as well.
-          "ld1 {v13.16b}, [%[mat_ptr1]], #16\n"
-          ".word 0x4f8de102  // sdot v2.4s, v8.16b, v13.4b[0]\n"
-          ".word 0x4fade123  // sdot v3.4s, v9.16b, v13.4b[1]\n"
-          ".word 0x4f8de942  // sdot v2.4s, v10.16b, v13.4b[2]\n"
-          ".word 0x4fade963  // sdot v3.4s, v11.16b, v13.4b[3]\n"
-
-          // If we're not done with these rows, continue.
-          "cmp %[mat_ptr0], %[mat_ptr0_end]\n"
-          "bne 1b\n" // batch_cols_loop
-
-          // Done with the rows, sum the results.
-          "add v0.4s, v0.4s, v1.4s\n"
-          "add v2.4s, v2.4s, v3.4s\n"
-
-          // Convert the per-vector sums to floating point.
-          "scvtf v0.4s, v0.4s\n"
-          "scvtf v1.4s, v2.4s\n"
-
-          // Fetch scale factors.
-          "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n"
-
-          // Multiply scale factors times sums.
-          "fmul v0.4s, v4.4s, v0.4s\n"
-          "fmul v1.4s, v4.4s, v1.4s\n"
-
-          // Load previous result values.
-          // The result position is:
-          //   result[batch * m_rows + row]
-          // Here that is factored into:
-          //   result_ptr = result + row
-          //   *result_ptr = res[0]
-          //   (uint8*)result_ptr += (m_rows * sizeof(float))
-          //   *result_ptr = res[1]
-          //   ...
-          // Since we're reading two rows at a time, though, we read both
-          //   result[batch * m_rows + row]
-          // and
-          //   result[batch * m_rows + row + 1]
-          "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
-          "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
-          "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
-          "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
-
-          // Go back to the starting position (subtract wide_rows * 4).
-          "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n"
-
-          // Add previous result values.
-          "fadd v9.4s, v9.4s, v0.4s\n"
-          "fadd v10.4s, v10.4s, v1.4s\n"
-
-          // Store results.
-          "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
-          "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
-          "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
-          "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
-          : [mat_ptr0] "+r"(mat_ptr0), [mat_ptr1] "+r"(mat_ptr1), [vec_ptr] "+r"(vec_ptr),
-            [result_ptr] "+r"(result_ptr), [mat_ptr2] "+r"(mat_ptr2), [mat_ptr3] "+r"(mat_ptr3)
-          : [mat_ptr0_end] "r"(mat_ptr0_end), [scaling_factors_ptr] "r"(scaling_factors_ptr),
-            [wide_rows] "r"(wide_rows)
-          : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
-            "v13", "cc", "memory");
+        // Zero out the accumulator registers.
+        "dup v0.4s, wzr\n"
+        "dup v1.4s, wzr\n"
+        "dup v2.4s, wzr\n"
+        "dup v3.4s, wzr\n"
+
+        "1:\n" // batch_cols_loop
+
+        // Read 16 more bytes from a pair of matrix rows.
+        "ld1 {v12.16b}, [%[mat_ptr0]], #16\n"
+
+        // Prefetch two rows ahead.
+        "prfm pldl1strm, [%[mat_ptr2]]\n"
+        "prfm pldl1strm, [%[mat_ptr3]]\n"
+
+        // Read from input vectors 4 times; 64 bytes total.
+        // Each 16-byte register contains parts of 4 vectors; see the
+        // shuffle logic above.
+
+        // From Benoit, places to look in the future:
+        // - Move load instructions further from sdot
+        // - Switch loop use-then-reload
+        // - Do partial unrolling to use register space better
+        "ld1 {v8.16b}, [%[vec_ptr]], #16\n"
+        ".word 0x4f8ce100  // sdot v0.4s, v8.16b, v12.4b[0]\n"
+        "ld1 {v9.16b}, [%[vec_ptr]], #16\n"
+        ".word 0x4face121  // sdot v1.4s, v9.16b, v12.4b[1]\n"
+        "ld1 {v10.16b}, [%[vec_ptr]], #16\n"
+        ".word 0x4f8ce940  // sdot v0.4s, v10.16b, v12.4b[2]\n"
+        "ld1 {v11.16b}, [%[vec_ptr]], #16\n"
+        ".word 0x4face961  // sdot v1.4s, v11.16b, v12.4b[3]\n"
+
+        // Update prefetch pointers.
+        "add %[mat_ptr2], %[mat_ptr2], #16\n"
+        "add %[mat_ptr3], %[mat_ptr3], #16\n"
+
+        // Re-use those vectors for the next row as well.
+        "ld1 {v13.16b}, [%[mat_ptr1]], #16\n"
+        ".word 0x4f8de102  // sdot v2.4s, v8.16b, v13.4b[0]\n"
+        ".word 0x4fade123  // sdot v3.4s, v9.16b, v13.4b[1]\n"
+        ".word 0x4f8de942  // sdot v2.4s, v10.16b, v13.4b[2]\n"
+        ".word 0x4fade963  // sdot v3.4s, v11.16b, v13.4b[3]\n"
+
+        // If we're not done with these rows, continue.
+        "cmp %[mat_ptr0], %[mat_ptr0_end]\n"
+        "bne 1b\n" // batch_cols_loop
+
+        // Done with the rows, sum the results.
+        "add v0.4s, v0.4s, v1.4s\n"
+        "add v2.4s, v2.4s, v3.4s\n"
+
+        // Convert the per-vector sums to floating point.
+        "scvtf v0.4s, v0.4s\n"
+        "scvtf v1.4s, v2.4s\n"
+
+        // Fetch scale factors.
+        "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n"
+
+        // Multiply scale factors times sums.
+        "fmul v0.4s, v4.4s, v0.4s\n"
+        "fmul v1.4s, v4.4s, v1.4s\n"
+
+        // Load previous result values.
+        // The result position is:
+        //   result[batch * m_rows + row]
+        // Here that is factored into:
+        //   result_ptr = result + row
+        //   *result_ptr = res[0]
+        //   (uint8*)result_ptr += (m_rows * sizeof(float))
+        //   *result_ptr = res[1]
+        //   ...
+        // Since we're reading two rows at a time, though, we read both
+        //   result[batch * m_rows + row]
+        // and
+        //   result[batch * m_rows + row + 1]
+        "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
+        "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
+        "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
+        "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
+
+        // Go back to the starting position (subtract wide_rows * 4).
+        "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n"
+
+        // Add previous result values.
+        "fadd v9.4s, v9.4s, v0.4s\n"
+        "fadd v10.4s, v10.4s, v1.4s\n"
+
+        // Store results.
+        "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
+        "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
+        "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
+        "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
+        : [ mat_ptr0 ] "+r"(mat_ptr0), [ mat_ptr1 ] "+r"(mat_ptr1), [ vec_ptr ] "+r"(vec_ptr),
+          [ result_ptr ] "+r"(result_ptr), [ mat_ptr2 ] "+r"(mat_ptr2), [ mat_ptr3 ] "+r"(mat_ptr3)
+        : [ mat_ptr0_end ] "r"(mat_ptr0_end), [ scaling_factors_ptr ] "r"(scaling_factors_ptr),
+          [ wide_rows ] "r"(wide_rows)
+        : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
+          "v13", "cc", "memory");
     }
   }
 
@@ -277,9 +309,9 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate(const int8_t *__restr
 }
 
 static void DotprodMatrixBatchFourVectorMultiplyAccumulate(
-    const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
-    const float *scaling_factors, int n_batch, float *__restrict__ result,
-    const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums)
+  const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
+  const float *scaling_factors, int n_batch, float *__restrict__ result,
+  const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums)
 {
   void *shuffled_vectors_free;
   const int8_t *shuffled_vectors = ShuffleVectors(vectors, n_batch, m_cols, &shuffled_vectors_free);
@@ -300,102 +332,102 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate(
       const int32_t *batch_offsets_ptr = input_offset + batch;
       const int32_t is_channel_scale_nullptr = per_channel_scale == nullptr;
       const int32_t is_row_sums_nullptr = row_sums_ptr == nullptr;
-      asm volatile("dup v0.4s, wzr\n"
-                   "dup v1.4s, wzr\n"
-                   "dup v2.4s, wzr\n"
-                   "dup v3.4s, wzr\n"
-                   // Load zero points.
-                   "ld1 {v7.4s}, [%[batch_offsets_ptr]]\n"
-                   "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n"
-                   // Zero out zero point accumulators.
-                   "dup v14.4s, wzr\n"
-                   "dup v15.4s, wzr\n"
-
-                   // Load per channel scales if not null.
-                   "cmp %w[is_channel_scale_nullptr], #0\n"
-                   "bne 1f\n"
-                   "ld1r {v16.4s}, [%[channel_scales_ptr]], #4\n"
-                   "ld1r {v17.4s}, [%[channel_scales_ptr]]\n"
-                   "fmul v16.4s, v16.4s, v4.4s\n"
-                   "fmul v17.4s, v17.4s, v4.4s\n"
-                   "b 2f\n"
-                   "1:\n"
-                   "mov v16.16b, v4.16b\n"
-                   "mov v17.16b, v4.16b\n"
-                   "2:\n"
-                   "ld1 {v12.16b}, [%[mat_ptr0]], #16\n"
-                   "ld1 {v8.16b}, [%[vec_ptr]], #16\n"
-                   ".word 0x4f8ce100  // sdot v0.4s, v8.16b, v12.4b[0]\n"
-                   "ld1 {v9.16b}, [%[vec_ptr]], #16\n"
-                   ".word 0x4face121  // sdot v1.4s, v9.16b, v12.4b[1]\n"
-                   "ld1 {v10.16b}, [%[vec_ptr]], #16\n"
-                   ".word 0x4f8ce940  // sdot v0.4s, v10.16b, v12.4b[2]\n"
-                   "ld1 {v11.16b}, [%[vec_ptr]], #16\n"
-                   ".word 0x4face961  // sdot v1.4s, v11.16b, v12.4b[3]\n"
-                   "ld1 {v13.16b}, [%[mat_ptr1]], #16\n"
-                   ".word 0x4f8de102  // sdot v2.4s, v8.16b, v13.4b[0]\n"
-                   ".word 0x4fade123  // sdot v3.4s, v9.16b, v13.4b[1]\n"
-                   ".word 0x4f8de942  // sdot v2.4s, v10.16b, v13.4b[2]\n"
-                   ".word 0x4fade963  // sdot v3.4s, v11.16b, v13.4b[3]\n"
-                   "cmp %w[is_row_sums_nullptr], #1\n"
-                   "bne 3f\n"
-                   // Accumulate row_sums for zero point calculations.
-                   "saddlp v12.8h, v12.16b\n"
-                   "saddlp v13.8h, v13.16b\n"
-                   "sadalp v14.4s, v12.8h\n"
-                   "sadalp v15.4s, v13.8h\n"
-                   "3:\n"
-                   "cmp %[mat_ptr0], %[mat_ptr0_end]\n"
-                   "bne 2b\n"
-                   "add v0.4s, v0.4s, v1.4s\n"
-                   "add v2.4s, v2.4s, v3.4s\n"
-
-                   "cmp %w[is_row_sums_nullptr], #1\n"
-                   "bne 4f\n"
-                   // Calculate zero point offsets.
-                   "addv s14, v14.4s\n"
-                   "addv s15, v15.4s\n"
-                   "dup v14.4s, v14.s[0]\n"
-                   "dup v15.4s, v15.s[0]\n"
-                   "b 5f\n"
-                   "4:\n"
-                   "ld1r {v14.4s}, [%[row_sums_ptr]], #4\n"
-                   "ld1r {v15.4s}, [%[row_sums_ptr]]\n"
-                   "5:\n"
-
-                   "mul v14.4s, v14.4s, v7.4s\n"
-                   "mul v15.4s, v15.4s, v7.4s\n"
-                   "sub v0.4s, v0.4s, v14.4s\n"
-                   "sub v2.4s, v2.4s, v15.4s\n"
-
-                   "scvtf v0.4s, v0.4s\n"
-                   "scvtf v1.4s, v2.4s\n"
-
-                   // Multiply scale.
-                   "fmul v0.4s, v16.4s, v0.4s\n"
-                   "fmul v1.4s, v17.4s, v1.4s\n"
-
-                   "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
-                   "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
-                   "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
-                   "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
-                   "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n"
-                   "fadd v9.4s, v9.4s, v0.4s\n"
-                   "fadd v10.4s, v10.4s, v1.4s\n"
-                   "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
-                   "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
-                   "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
-                   "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
-                   : [mat_ptr0] "+r"(mat_ptr0), [mat_ptr1] "+r"(mat_ptr1), [vec_ptr] "+r"(vec_ptr),
-                     [result_ptr] "+r"(result_ptr), [row_sums_ptr] "+r"(row_sums_ptr)
-                   : [mat_ptr0_end] "r"(mat_ptr0_end),
-                     [scaling_factors_ptr] "r"(scaling_factors_ptr), [wide_rows] "r"(wide_rows),
-                     [channel_scales_ptr] "r"(channel_scales_ptr),
-                     [batch_offsets_ptr] "r"(batch_offsets_ptr),
-                     [is_channel_scale_nullptr] "r"(is_channel_scale_nullptr),
-                     [is_row_sums_nullptr] "r"(is_row_sums_nullptr)
-                   : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
-                     "v12", "v13", "v14", "v15", "v16", "v17", "w0", "w1", "cc", "memory");
+      asm volatile(
+        "dup v0.4s, wzr\n"
+        "dup v1.4s, wzr\n"
+        "dup v2.4s, wzr\n"
+        "dup v3.4s, wzr\n"
+        // Load zero points.
+        "ld1 {v7.4s}, [%[batch_offsets_ptr]]\n"
+        "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n"
+        // Zero out zero point accumulators.
+        "dup v14.4s, wzr\n"
+        "dup v15.4s, wzr\n"
+
+        // Load per channel scales if not null.
+        "cmp %w[is_channel_scale_nullptr], #0\n"
+        "bne 1f\n"
+        "ld1r {v16.4s}, [%[channel_scales_ptr]], #4\n"
+        "ld1r {v17.4s}, [%[channel_scales_ptr]]\n"
+        "fmul v16.4s, v16.4s, v4.4s\n"
+        "fmul v17.4s, v17.4s, v4.4s\n"
+        "b 2f\n"
+        "1:\n"
+        "mov v16.16b, v4.16b\n"
+        "mov v17.16b, v4.16b\n"
+        "2:\n"
+        "ld1 {v12.16b}, [%[mat_ptr0]], #16\n"
+        "ld1 {v8.16b}, [%[vec_ptr]], #16\n"
+        ".word 0x4f8ce100  // sdot v0.4s, v8.16b, v12.4b[0]\n"
+        "ld1 {v9.16b}, [%[vec_ptr]], #16\n"
+        ".word 0x4face121  // sdot v1.4s, v9.16b, v12.4b[1]\n"
+        "ld1 {v10.16b}, [%[vec_ptr]], #16\n"
+        ".word 0x4f8ce940  // sdot v0.4s, v10.16b, v12.4b[2]\n"
+        "ld1 {v11.16b}, [%[vec_ptr]], #16\n"
+        ".word 0x4face961  // sdot v1.4s, v11.16b, v12.4b[3]\n"
+        "ld1 {v13.16b}, [%[mat_ptr1]], #16\n"
+        ".word 0x4f8de102  // sdot v2.4s, v8.16b, v13.4b[0]\n"
+        ".word 0x4fade123  // sdot v3.4s, v9.16b, v13.4b[1]\n"
+        ".word 0x4f8de942  // sdot v2.4s, v10.16b, v13.4b[2]\n"
+        ".word 0x4fade963  // sdot v3.4s, v11.16b, v13.4b[3]\n"
+        "cmp %w[is_row_sums_nullptr], #1\n"
+        "bne 3f\n"
+        // Accumulate row_sums for zero point calculations.
+        "saddlp v12.8h, v12.16b\n"
+        "saddlp v13.8h, v13.16b\n"
+        "sadalp v14.4s, v12.8h\n"
+        "sadalp v15.4s, v13.8h\n"
+        "3:\n"
+        "cmp %[mat_ptr0], %[mat_ptr0_end]\n"
+        "bne 2b\n"
+        "add v0.4s, v0.4s, v1.4s\n"
+        "add v2.4s, v2.4s, v3.4s\n"
+
+        "cmp %w[is_row_sums_nullptr], #1\n"
+        "bne 4f\n"
+        // Calculate zero point offsets.
+        "addv s14, v14.4s\n"
+        "addv s15, v15.4s\n"
+        "dup v14.4s, v14.s[0]\n"
+        "dup v15.4s, v15.s[0]\n"
+        "b 5f\n"
+        "4:\n"
+        "ld1r {v14.4s}, [%[row_sums_ptr]], #4\n"
+        "ld1r {v15.4s}, [%[row_sums_ptr]]\n"
+        "5:\n"
+
+        "mul v14.4s, v14.4s, v7.4s\n"
+        "mul v15.4s, v15.4s, v7.4s\n"
+        "sub v0.4s, v0.4s, v14.4s\n"
+        "sub v2.4s, v2.4s, v15.4s\n"
+
+        "scvtf v0.4s, v0.4s\n"
+        "scvtf v1.4s, v2.4s\n"
+
+        // Multiply scale.
+        "fmul v0.4s, v16.4s, v0.4s\n"
+        "fmul v1.4s, v17.4s, v1.4s\n"
+
+        "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
+        "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
+        "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
+        "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
+        "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n"
+        "fadd v9.4s, v9.4s, v0.4s\n"
+        "fadd v10.4s, v10.4s, v1.4s\n"
+        "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
+        "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
+        "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
+        "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
+        : [ mat_ptr0 ] "+r"(mat_ptr0), [ mat_ptr1 ] "+r"(mat_ptr1), [ vec_ptr ] "+r"(vec_ptr),
+          [ result_ptr ] "+r"(result_ptr), [ row_sums_ptr ] "+r"(row_sums_ptr)
+        : [ mat_ptr0_end ] "r"(mat_ptr0_end), [ scaling_factors_ptr ] "r"(scaling_factors_ptr),
+          [ wide_rows ] "r"(wide_rows), [ channel_scales_ptr ] "r"(channel_scales_ptr),
+          [ batch_offsets_ptr ] "r"(batch_offsets_ptr),
+          [ is_channel_scale_nullptr ] "r"(is_channel_scale_nullptr),
+          [ is_row_sums_nullptr ] "r"(is_row_sums_nullptr)
+        : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
+          "v13", "v14", "v15", "v16", "v17", "w0", "w1", "cc", "memory");
     }
   }
 
@@ -425,10 +457,10 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate(
 //
 // We don't use this kernel when n_batch = 1 because the baseline kernel
 // is fine for that case.
-void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
-    const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
-    const float *scaling_factors, int n_batch, float *__restrict__ result,
-    const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums)
+inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
+  const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
+  const float *scaling_factors, int n_batch, float *__restrict__ result,
+  const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums)
 {
   const int kWeightsPerUint32 = 4;
 
@@ -443,14 +475,14 @@ void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
   void *padded_vectors_free;
   const int padded_vectors_size = batch_round_up * m_cols;
   int8_t *padded_vectors = reinterpret_cast<int8_t *>(
-      aligned_alloc(kWeightsPerUint32, padded_vectors_size, &padded_vectors_free));
+    aligned_alloc(kWeightsPerUint32, padded_vectors_size, &padded_vectors_free));
   memset(padded_vectors, 0, padded_vectors_size);
 
   void *padded_result_free;
   const int result_size = n_batch * m_rows * sizeof(float);
   const int padded_result_size = batch_round_up * m_rows * sizeof(float);
   float *padded_result = reinterpret_cast<float *>(
-      aligned_alloc(kWeightsPerUint32, padded_result_size, &padded_result_free));
+    aligned_alloc(kWeightsPerUint32, padded_result_size, &padded_result_free));
   memcpy(padded_result, result, result_size);
   memset(reinterpret_cast<char *>(padded_result) + result_size, 0,
          padded_result_size - result_size);
@@ -462,7 +494,7 @@ void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
   void *padded_scaling_factors_free;
   const int padded_scaling_factors_size = batch_round_up * sizeof(float);
   float *padded_scaling_factors = reinterpret_cast<float *>(
-      aligned_alloc(kWeightsPerUint32, padded_scaling_factors_size, &padded_scaling_factors_free));
+    aligned_alloc(kWeightsPerUint32, padded_scaling_factors_size, &padded_scaling_factors_free));
   assert(static_cast<int>(n_batch * sizeof(float)) <= padded_scaling_factors_size);
   assert(static_cast<int>(batch_round_up * sizeof(float)) <= padded_scaling_factors_size);
   memset(padded_scaling_factors, 0, batch_round_up * sizeof(float));
@@ -473,7 +505,7 @@ void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
     void *padded_input_offset_free;
     const int padded_input_offset_size = batch_round_up * sizeof(int32_t);
     int32_t *padded_input_offset = reinterpret_cast<int32_t *>(
-        aligned_alloc(kWeightsPerUint32, padded_input_offset_size, &padded_input_offset_free));
+      aligned_alloc(kWeightsPerUint32, padded_input_offset_size, &padded_input_offset_free));
     assert(static_cast<int>(n_batch * sizeof(int32_t)) <= padded_input_offset_size);
     assert(static_cast<int>(batch_round_up * sizeof(int32_t)) <= padded_input_offset_size);
     memset(padded_input_offset, 0, batch_round_up * sizeof(int32_t));
@@ -481,8 +513,8 @@ void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
 
     // Call the main kernel.
     DotprodMatrixBatchFourVectorMultiplyAccumulate(
-        matrix, m_rows, m_cols, padded_vectors, padded_scaling_factors, batch_round_up,
-        padded_result, per_channel_scale, padded_input_offset, row_sums);
+      matrix, m_rows, m_cols, padded_vectors, padded_scaling_factors, batch_round_up, padded_result,
+      per_channel_scale, padded_input_offset, row_sums);
 
     free(padded_input_offset_free);
   }
@@ -500,20 +532,40 @@ void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
   free(padded_scaling_factors_free);
 }
 
-void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
-                                                          const int m_rows, const int m_cols,
-                                                          const int8_t *vectors,
-                                                          const float *scaling_factors, int n_batch,
-                                                          float *__restrict__ result)
+inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
+  const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
+  const float *scaling_factors, int n_batch, float *__restrict__ result)
 {
   DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
-      matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
-      /*per_channel_scale=*/nullptr, /*input_offset=*/nullptr,
-      /*row_sums=*/nullptr);
+    matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
+    /*per_channel_scale=*/nullptr, /*input_offset=*/nullptr,
+    /*row_sums=*/nullptr);
 }
 #endif // __aarch64__
 
-bool NeonIsZeroVector(const float *vector, int v_size)
+inline void NeonCwiseClipping(float *vector, const int v_size, const float clipping_value)
+{
+  const float32x4_t clipping_value_f32x4 = vmovq_n_f32(clipping_value);
+  const float32x4_t neg_clipping_value_f32x4 = vmovq_n_f32(-clipping_value);
+
+  int i = 0;
+  for (; i <= v_size - kFloatValuesPerNeonVector; i += kFloatValuesPerNeonVector)
+  {
+    // Load from memory to vector.
+    float32x4_t v_f32x4 = vld1q_f32(vector + i);
+    // Clip between clipping_value and -clipping_value.
+    v_f32x4 = vminq_f32(clipping_value_f32x4, v_f32x4);
+    v_f32x4 = vmaxq_f32(neg_clipping_value_f32x4, v_f32x4);
+    // Save to output.
+    vst1q_f32(vector + i, v_f32x4);
+  }
+  for (; i < v_size; i++)
+  {
+    vector[i] = std::max(std::min(clipping_value, vector[i]), -clipping_value);
+  }
+}
+
+inline bool NeonIsZeroVector(const float *vector, int v_size)
 {
   // If v_size is not divisible by kFloatWeightsPerNeonLane, we cannot
   // use the main vectorized loop, and we need to process sequentially.
@@ -544,15 +596,16 @@ bool NeonIsZeroVector(const float *vector, int v_size)
   return true;
 }
 
-void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias,
-                        const int8_t *input_to_gate_weights, int32_t n_batch, int32_t n_input,
-                        int32_t n_output, int32_t, int32_t *scratch, ruy::Context *ruy_context)
+inline void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias,
+                               const int8_t *input_to_gate_weights, int32_t n_batch,
+                               int32_t n_input, int32_t n_output, int32_t, int32_t *scratch,
+                               ruy::Context *ruy_context)
 {
   MatrixParams<int8_t> lhs_params;
   lhs_params.order = Order::kRowMajor;
   lhs_params.rows = n_output;
   lhs_params.cols = n_input;
-  lhs_params.cacheable = true;
+  lhs_params.cache_policy = CachePolicy::kAlwaysCache;
 
   MatrixParams<int8_t> rhs_params;
   rhs_params.order = Order::kColMajor;
@@ -574,19 +627,44 @@ void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias,
   ruy::Matrix<int8_t> ruy_lhs;
   ruy::Matrix<int8_t> ruy_rhs;
   ruy::Matrix<int32_t> ruy_dst;
-  ruy_support::MakeRuyMatrix(lhs_params, input_to_gate_weights, &ruy_lhs);
-  ruy_support::MakeRuyMatrix(rhs_params, input, &ruy_rhs);
+  // Note that cache is always enabled for input and weight tensors
+  ruy_support::MakeRuyMatrix(lhs_params, input_to_gate_weights, &ruy_lhs, true);
+  ruy_support::MakeRuyMatrix(rhs_params, input, &ruy_rhs, true);
   ruy_support::MakeRuyMatrix(dst_params, scratch, &ruy_dst);
 
-  ruy::BasicSpec<int32_t, int32_t> ruy_spec;
-  ruy_support::MakeRuySpec(gemm_params, &ruy_spec);
+  ruy::MulParams<int32_t, int32_t> ruy_mul_params;
+  ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
 
-  constexpr ruy::Path kRuyPath = ruy::kAllPaths;
-  ruy::Mul<kRuyPath>(ruy_lhs, ruy_rhs, ruy_spec, ruy_context, &ruy_dst);
+  ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
+}
+
+inline void NeonSub1Vector(const float *vector, int v_size, float *result)
+{
+  // If v_size is not divisible by the vector size, then we need to process the
+  // final few elements sequentially. postamble_start shows the start index
+  // where this should happen.
+  const int postamble_start = RoundDownVectors<kFloatValuesPerNeonVector>(v_size);
+
+  float32x4_t one_f32x4 = vmovq_n_f32(1.0);
+  int v = 0;
+  for (; v < postamble_start; v += kFloatValuesPerNeonVector)
+  {
+    // Load 4 float values from the current pointers of the input column and
+    // subtract from 1.
+    float32x4_t v_f32x4 = vld1q_f32(vector + v);
+    float32x4_t result_f32x4 = vsubq_f32(one_f32x4, v_f32x4);
+    // Save to output.
+    vst1q_f32(result + v, result_f32x4);
+  }
+  for (; v < v_size; v++)
+  {
+    result[v] = 1.0f - vector[v];
+  }
 }
 
-void NeonSymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
-                                 float *min, float *max, float *scaling_factor)
+inline void NeonSymmetricQuantizeFloats(const float *values, const int size,
+                                        int8_t *quantized_values, float *min, float *max,
+                                        float *scaling_factor)
 {
   // TODO(raziel): vectorize min/max calculation.
   auto minmax = std::minmax_element(values, values + size);
@@ -658,15 +736,16 @@ void NeonSymmetricQuantizeFloats(const float *values, const int size, int8_t *qu
   for (int i = postamble_start; i < size; ++i)
   {
     const int32_t quantized_value =
-        static_cast<int32_t>(std::round(scaling_factor_inv * values[i]));
+      static_cast<int32_t>(std::round(scaling_factor_inv * values[i]));
     quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
   }
 }
 
-void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix, const int m_rows,
-                                             const int m_cols, const int8_t *__restrict__ vectors,
-                                             const float *scaling_factors, int n_batch,
-                                             float *__restrict__ result, int result_stride)
+inline void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
+                                                    const int m_rows, const int m_cols,
+                                                    const int8_t *__restrict__ vectors,
+                                                    const float *scaling_factors, int n_batch,
+                                                    float *__restrict__ result, int result_stride)
 {
 #ifdef __aarch64__
   if (HasSdotInstruction() && m_cols % 16 == 0 && m_rows % 2 == 0 && m_rows >= n_batch)
@@ -751,7 +830,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
         // Here the assumption is that each buffer is 4-byte aligned. Otherwise,
         // performance may suffer significantly.
         assert( // NOLINT
-            ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
+          ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
         const int8x16_t s1_8x16 = vld1q_s8((const int8_t *)(aligned_vec + col));
         const int8x16_t s2_8x16 = vld1q_s8((const int8_t *)(row_ptr + col));
         // Multiply the low bits (i.e. the lower 8 8bit numbers in the
@@ -776,7 +855,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
         // Here the assumption is that each buffer is 4-bytes aligned.
         // Otherwise, performance may suffer significantly.
         assert( // NOLINT
-            ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
+          ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
         const int8x8_t s1_8x8 = vld1_s8((const int8_t *)(aligned_vec + col));
         const int8x8_t s2_8x8 = vld1_s8((const int8_t *)(row_ptr + col));
         const int16x8_t prod_16x8 = vmull_s8(s1_8x8, s2_8x8);
@@ -804,9 +883,9 @@ void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
   free(aligned_vec_free);
 }
 
-void NeonMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
-                                             const float *vector, int n_batch, float *result,
-                                             int result_stride)
+inline void NeonMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
+                                                    const float *vector, int n_batch, float *result,
+                                                    int result_stride)
 {
   // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main
   // vectorized loop, and we need to process sequentially. postamble_start shows
@@ -845,11 +924,12 @@ void NeonMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, in
   }
 }
 
-void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix, const int m_rows,
-                                             const int m_cols, const int8_t *__restrict__ vectors,
-                                             const float *scaling_factors, int n_batch,
-                                             int32_t *scratch, float *__restrict__ result,
-                                             int result_stride, ruy::Context *ruy_context)
+inline void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
+                                                    const int m_rows, const int m_cols,
+                                                    const int8_t *__restrict__ vectors,
+                                                    const float *scaling_factors, int n_batch,
+                                                    int32_t *scratch, float *__restrict__ result,
+                                                    int result_stride, ruy::Context *ruy_context)
 {
   if (m_rows % 4 == 0 && result_stride == 1)
   {
@@ -872,7 +952,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
       const float32x4_t float_val1 = vcvtq_f32_s32(scratch_val1);
       const float32x4_t result0 = vmlaq_f32(vld1q_f32(result), float_val0, scaling_factor0);
       const float32x4_t result1 =
-          vmlaq_f32(vld1q_f32(result + 4 * result_stride), float_val1, scaling_factor1);
+        vmlaq_f32(vld1q_f32(result + 4 * result_stride), float_val1, scaling_factor1);
       vst1q_f32(result, result0);
       vst1q_f32(result + 4 * result_stride, result1);
     }
diff --git a/compute/cker/include/cker/PortableTensorUtils.h b/compute/cker/include/cker/PortableTensorUtils.h
index 54714e214..7e4b01a01 100644
--- a/compute/cker/include/cker/PortableTensorUtils.h
+++ b/compute/cker/include/cker/PortableTensorUtils.h
@@ -45,6 +45,10 @@ public:
         return a < 0.f ? 0.f : a;
       case FusedActivationFunctionType::kRelu6:
         return std::max(0.f, std::min(a, 6.f));
+      case FusedActivationFunctionType::kTanh:
+        return std::tanh(a);
+      case FusedActivationFunctionType::kSigmoid:
+        return 1.0f / (1.0f + std::exp(-a));
       default:
         // TODO(aselle): More informative fatal error!
         exit(1);
@@ -55,8 +59,17 @@ private:
   FusedActivationFunctionType act_;
 };
 
-void PortableVectorBatchVectorAssign(const float *vector, int v_size, int n_batch,
-                                     float *batch_vector)
+template <typename T>
+void PortableCwiseClipping(T *vector, const int v_size, const T clipping_value)
+{
+  for (int i = 0; i < v_size; i++)
+  {
+    vector[i] = std::max(std::min(clipping_value, vector[i]), static_cast<T>(-clipping_value));
+  }
+}
+
+inline void PortableVectorBatchVectorAssign(const float *vector, int v_size, int n_batch,
+                                            float *batch_vector)
 {
   for (int b = 0; b < n_batch; b++)
   {
@@ -64,7 +77,20 @@ void PortableVectorBatchVectorAssign(const float *vector, int v_size, int n_batc
   }
 }
 
-bool PortableIsZeroVector(const float *vector, int v_size)
+inline void PortableVectorBatchVectorAdd(const float *vector, int v_size, int n_batch,
+                                         float *batch_vector)
+{
+  for (int b = 0; b < n_batch; b++)
+  {
+    for (int i = 0; i < v_size; ++i)
+    {
+      batch_vector[i] += vector[i];
+    }
+    batch_vector += v_size;
+  }
+}
+
+inline bool PortableIsZeroVector(const float *vector, int v_size)
 {
   for (int i = 0; i < v_size; ++i)
   {
@@ -74,8 +100,8 @@ bool PortableIsZeroVector(const float *vector, int v_size)
   return true;
 }
 
-void PortableApplyActivationToVector(const float *vector, int v_size,
-                                     FusedActivationFunctionType activation, float *result)
+inline void PortableApplyActivationToVector(const float *vector, int v_size,
+                                            FusedActivationFunctionType activation, float *result)
 {
   auto activation_func = ActivationFunctor(activation);
   for (int v = 0; v < v_size; v++)
@@ -84,8 +110,17 @@ void PortableApplyActivationToVector(const float *vector, int v_size,
   }
 }
 
-void PortableSymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
-                                     float *min_value, float *max_value, float *scaling_factor)
+inline void PortableSub1Vector(const float *vector, int v_size, float *result)
+{
+  for (int v = 0; v < v_size; v++)
+  {
+    *result++ = 1.0f - *vector++;
+  }
+}
+
+inline void PortableSymmetricQuantizeFloats(const float *values, const int size,
+                                            int8_t *quantized_values, float *min_value,
+                                            float *max_value, float *scaling_factor)
 {
   auto minmax = std::minmax_element(values, values + size);
   *min_value = *minmax.first;
@@ -103,17 +138,72 @@ void PortableSymmetricQuantizeFloats(const float *values, const int size, int8_t
   for (int i = 0; i < size; ++i)
   {
     const int32_t quantized_value =
-        static_cast<int32_t>(std::round(values[i] * scaling_factor_inv));
+      static_cast<int32_t>(std::round(values[i] * scaling_factor_inv));
     // Clamp: just in case some odd numeric offset.
     quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
   }
 }
 
-void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
-                                                 const int m_rows, const int m_cols,
-                                                 const int8_t *__restrict__ vectors,
-                                                 const float *scaling_factors, int n_batch,
-                                                 float *__restrict__ result, int result_stride)
+inline void PortableAsymmetricQuantizeFloats(const float *values, const int size,
+                                             int8_t *quantized_values, float *scaling_factor,
+                                             int32_t *offset)
+{
+  /* Copied from TensorFlow PortableAsymmetricQuantizeFloats */
+  const int32_t kMinScale = -128;
+  const int32_t kMaxScale = 127;
+  const double qmin_double = kMinScale;
+  const double qmax_double = kMaxScale;
+  const auto minmax = std::minmax_element(values, values + size);
+  const double rmin = static_cast<double>(std::min(0.0f, *minmax.first));
+  const double rmax = static_cast<double>(std::max(0.0f, *minmax.second));
+  if (rmin == rmax)
+  {
+    memset(quantized_values, 0, size * sizeof(int8_t));
+    *scaling_factor = 1;
+    *offset = 0;
+    return;
+  }
+  else
+  {
+    double scale = (rmax - rmin) / (qmax_double - qmin_double);
+    const double zero_point_from_min = qmin_double - rmin / scale;
+    const double zero_point_from_max = qmax_double - rmax / scale;
+    const double zero_point_from_min_error = std::abs(qmin_double) + std::abs(rmin / scale);
+    const double zero_point_from_max_error = std::abs(qmax_double) + std::abs(rmax / scale);
+    const double zero_point_double = zero_point_from_min_error < zero_point_from_max_error
+                                       ? zero_point_from_min
+                                       : zero_point_from_max;
+    int8_t nudged_zero_point = 0;
+    if (zero_point_double <= qmin_double)
+    {
+      nudged_zero_point = kMinScale;
+    }
+    else if (zero_point_double >= qmax_double)
+    {
+      nudged_zero_point = kMaxScale;
+    }
+    else
+    {
+      nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
+    }
+    *scaling_factor = scale;
+    *offset = nudged_zero_point;
+  }
+  const float scaling_factor_inv = 1.0f / *scaling_factor;
+  for (int i = 0; i < size; ++i)
+  {
+    const int32_t quantized_value =
+      static_cast<int32_t>(std::round(*offset + values[i] * scaling_factor_inv));
+    quantized_values[i] = std::min(kMaxScale, std::max(kMinScale, quantized_value));
+  }
+}
+
+inline void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
+                                                        const int m_rows, const int m_cols,
+                                                        const int8_t *__restrict__ vectors,
+                                                        const float *scaling_factors, int n_batch,
+                                                        float *__restrict__ result,
+                                                        int result_stride)
 {
   int batch, row, col;
   for (batch = 0; batch < n_batch; ++batch, vectors += m_cols)
@@ -138,20 +228,20 @@ void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matr
   }   // for batch
 }
 
-void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
-                                                 const int m_rows, const int m_cols,
-                                                 const int8_t *__restrict__ vector,
-                                                 const float *scaling_factors, int n_batch,
-                                                 int32_t *, float *__restrict__ result,
-                                                 int result_stride, ruy::Context *)
+inline void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
+                                                        const int m_rows, const int m_cols,
+                                                        const int8_t *__restrict__ vector,
+                                                        const float *scaling_factors, int n_batch,
+                                                        int32_t *, float *__restrict__ result,
+                                                        int result_stride, ruy::Context *)
 {
   PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, scaling_factors,
                                               n_batch, result, result_stride);
 }
 
-void PortableMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
-                                                 const float *vector, int n_batch, float *result,
-                                                 int result_stride)
+inline void PortableMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
+                                                        const float *vector, int n_batch,
+                                                        float *result, int result_stride)
 {
   float *result_in_batch = result;
   for (int b = 0; b < n_batch; b++)
@@ -171,7 +261,36 @@ void PortableMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows
   }
 }
 
-void PortableZeroVector(float *vector, int v_size) { std::fill_n(vector, v_size, 0); }
+inline void PortableMeanStddevNormalization(const float *input_vector, float *output_vector,
+                                            int v_size, int n_batch)
+{
+  for (int batch = 0; batch < n_batch; ++batch)
+  {
+    float sum = 0.0f;
+    for (int i = 0; i < v_size; ++i)
+    {
+      sum += input_vector[i];
+    }
+    const float mean = sum / v_size;
+    float sum_diff_sq = 0.0f;
+    for (int i = 0; i < v_size; ++i)
+    {
+      const float diff = input_vector[i] - mean;
+      sum_diff_sq += diff * diff;
+    }
+    const float variance = sum_diff_sq / v_size;
+    constexpr float kNormalizationConstant = 1e-8f;
+    const float stddev_inv = 1.0f / std::sqrt(variance + kNormalizationConstant);
+    for (int i = 0; i < v_size; ++i)
+    {
+      output_vector[i] = (input_vector[i] - mean) * stddev_inv;
+    }
+    input_vector += v_size;
+    output_vector += v_size;
+  }
+}
+
+inline void PortableZeroVector(float *vector, int v_size) { std::fill_n(vector, v_size, 0); }
 
 } // namespace cker
 } // namespace nnfw
diff --git a/compute/cker/include/cker/Shape.h b/compute/cker/include/cker/Shape.h
index 2486f01a6..9269ce9aa 100644
--- a/compute/cker/include/cker/Shape.h
+++ b/compute/cker/include/cker/Shape.h
@@ -136,12 +136,27 @@ public:
     std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
   }
 
+  inline void ReplaceWith(const Shape &other)
+  {
+    ReplaceWith(other.DimensionsCount(), other.DimsData());
+  }
+
+  inline void ReplaceWith(Shape &&other)
+  {
+    Resize(0);
+    std::swap(_size, other._size);
+    if (_size <= kMaxSmallSize)
+      std::copy(other._dims, other._dims + kMaxSmallSize, _dims);
+    else
+      _dims_pointer = other._dims_pointer;
+  }
+
   template <typename T> inline void BuildFrom(const T &src_iterable)
   {
     const int dimensions_count = std::distance(src_iterable.begin(), src_iterable.end());
     Resize(dimensions_count);
     int32_t *data = DimsData();
-    for (auto it : src_iterable)
+    for (auto &&it : src_iterable)
     {
       *data = it;
       ++data;
@@ -172,7 +187,6 @@ public:
     for (int i = 0; i < _size; i++)
     {
       const int dim = dims_data[i];
-      assert(dim >= 1);
       buffer_size *= dim;
     }
     return buffer_size;
diff --git a/compute/cker/include/cker/TensorUtils.h b/compute/cker/include/cker/TensorUtils.h
index e07c91239..bac79b887 100644
--- a/compute/cker/include/cker/TensorUtils.h
+++ b/compute/cker/include/cker/TensorUtils.h
@@ -31,55 +31,133 @@ namespace nnfw
 namespace cker
 {
 
-void VectorBatchVectorAssign(const float *vector, int v_size, int n_batch, float *batch_vector)
+inline void CwiseClipping(float *vector, const int v_size, const float clipping_value)
+{
+  NEON_OR_PORTABLE(CwiseClipping, vector, v_size, clipping_value);
+}
+
+inline void VectorBatchVectorAdd(const float *vector, int v_size, int n_batch, float *batch_vector)
+{
+  PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector);
+}
+
+inline void VectorBatchVectorAssign(const float *vector, int v_size, int n_batch,
+                                    float *batch_vector)
 {
   PortableVectorBatchVectorAssign(vector, v_size, n_batch, batch_vector);
 }
 
-bool IsZeroVector(const float *vector, int v_size)
+// Cwise product of two vectors.
+template <typename T>
+inline void VectorVectorCwiseProduct(const T *__restrict__ vector1, const T *__restrict__ vector2,
+                                     int v_size, T *__restrict__ result)
+{
+  for (int v = 0; v < v_size; v++)
+  {
+    *result++ = *vector1++ * *vector2++;
+  }
+}
+
+// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
+// assumption here is that result array is initialized to valid values.
+template <typename T>
+inline void VectorVectorCwiseProductAccumulate(const T *__restrict__ vector1,
+                                               const T *__restrict__ vector2, int v_size,
+                                               T *__restrict__ result)
+{
+  for (int v = 0; v < v_size; v++)
+  {
+    *result++ += *vector1++ * *vector2++;
+  }
+}
+
+// Cwise product of a vector and a batch-vector.
+template <typename T>
+inline void VectorBatchVectorCwiseProduct(const T *vector, int v_size, const T *batch_vector,
+                                          int n_batch, T *result)
+{
+  for (int b = 0; b < n_batch; b++)
+  {
+    VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
+    // Update the pointers.
+    result += v_size;
+    batch_vector += v_size;
+  }
+}
+
+// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
+// operation, the assumption here is that result array is initialized to valid
+// values.
+template <typename T>
+inline void VectorBatchVectorCwiseProductAccumulate(const T *vector, int v_size,
+                                                    const T *batch_vector, int n_batch, T *result)
+{
+  for (int b = 0; b < n_batch; b++)
+  {
+    VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
+    // Update the pointers.
+    result += v_size;
+    batch_vector += v_size;
+  }
+}
+
+inline bool IsZeroVector(const float *vector, int v_size)
 {
   return NEON_OR_PORTABLE(IsZeroVector, vector, v_size);
 }
 
-void ApplyActivationToVector(const float *vector, int v_size,
-                             FusedActivationFunctionType activation, float *result)
+inline void ApplyActivationToVector(const float *vector, int v_size,
+                                    FusedActivationFunctionType activation, float *result)
 {
   PortableApplyActivationToVector(vector, v_size, activation, result);
 }
 
-void SymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
-                             float *min, float *max, float *scaling_factor)
+inline void Sub1Vector(const float *vector, int v_size, float *result)
+{
+  NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result);
+}
+
+inline void SymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
+                                    float *min, float *max, float *scaling_factor)
 {
   return NEON_OR_PORTABLE(SymmetricQuantizeFloats, values, size, quantized_values, min, max,
                           scaling_factor);
 }
 
-void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols,
-                                         const int8_t *vector, const float *scaling_factors,
-                                         int n_batch, float *result, int result_stride)
+inline void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows,
+                                                const int m_cols, const int8_t *vector,
+                                                const float *scaling_factors, int n_batch,
+                                                float *result, int result_stride)
 {
   NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vector,
                    scaling_factors, n_batch, result, result_stride);
 }
 
-void MatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
-                                         const float *vector, int n_batch, float *result,
-                                         int result_stride)
+inline void MatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
+                                                const float *vector, int n_batch, float *result,
+                                                int result_stride)
 {
   NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vector, n_batch,
                    result, result_stride);
 }
 
-void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols,
-                                         const int8_t *vectors, const float *scaling_factors,
-                                         int n_batch, int32_t *scratch, float *result,
-                                         int result_stride, ruy::Context *ruy_context)
+inline void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows,
+                                                const int m_cols, const int8_t *vectors,
+                                                const float *scaling_factors, int n_batch,
+                                                int32_t *scratch, float *result, int result_stride,
+                                                ruy::Context *ruy_context)
 {
   NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vectors,
                    scaling_factors, n_batch, scratch, result, result_stride, ruy_context);
 }
 
-void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); }
+inline void MeanStddevNormalization(const float *input_vector, float *output_vector, int v_size,
+                                    int n_batch)
+{
+  PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
+}
+
+inline void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); }
 
 } // namespace cker
 } // namespace nnfw
diff --git a/compute/cker/include/cker/Types.h b/compute/cker/include/cker/Types.h
index c0c9313ea..3fd0cf5b6 100644
--- a/compute/cker/include/cker/Types.h
+++ b/compute/cker/include/cker/Types.h
@@ -34,6 +34,8 @@ enum class FusedActivationFunctionType
   kRelu6 = 1,
   kRelu1 = 2,
   kRelu = 3,
+  kTanh = 4,
+  kSigmoid = 6,
 };
 enum class PaddingType
 {
@@ -78,8 +80,6 @@ enum class BroadcastableOpCategory : uint8_t
 
 struct PoolParams
 {
-  FusedActivationFunctionType activation;
-  PaddingType padding_type;
   PaddingValues padding_values;
   int stride_height;
   int stride_width;
@@ -109,6 +109,8 @@ struct SoftmaxParams
   int32_t zero_point;
   float scale;
   float *table;
+  uint8_t *uint8_table1;
+  uint8_t *uint8_table2;
 };
 
 struct PackParams
@@ -170,25 +172,25 @@ struct ComparisonParams
 struct BinaryArithmeticOpParam
 {
   // Shape dependent / common to data / op types.
-  BroadcastableOpCategory broadcast_category;
+  BroadcastableOpCategory broadcast_category{BroadcastableOpCategory::kNone};
   // uint8 inference params.
-  int32_t input1_offset;
-  int32_t input2_offset;
-  int32_t output_offset;
-  int32_t output_multiplier;
-  int32_t output_shift;
+  int32_t input1_offset = 0;
+  int32_t input2_offset = 0;
+  int32_t output_offset = 0;
+  int32_t output_multiplier = 0;
+  int32_t output_shift = 0;
   // Add / Sub, not Mul, uint8 inference params.
-  int32_t left_shift;
-  int32_t input1_multiplier;
-  int32_t input1_shift;
-  int32_t input2_multiplier;
-  int32_t input2_shift;
+  int32_t left_shift = 0;
+  int32_t input1_multiplier = 0;
+  int32_t input1_shift = 0;
+  int32_t input2_multiplier = 0;
+  int32_t input2_shift = 0;
   // uint8, etc, activation params.
-  int32_t quantized_activation_min;
-  int32_t quantized_activation_max;
+  int32_t quantized_activation_min = 0;
+  int32_t quantized_activation_max = 0;
   // float activation params.
-  float float_activation_min;
-  float float_activation_max;
+  float float_activation_min = 0;
+  float float_activation_max = 0;
 
   // Processed output dimensions.
   // Let input "a" be the one that broadcasts in the faster-changing dimension.
@@ -256,9 +258,12 @@ struct FullyConnectedParams
   // uint8, etc, activation params.
   int32_t quantized_activation_min;
   int32_t quantized_activation_max;
-  // float activation params.
+  // float activation params
   float float_activation_min;
   float float_activation_max;
+  // Mark the operands as cacheable if they are unchanging, e.g. weights.
+  bool lhs_cacheable;
+  bool rhs_cacheable;
   // FullyConnectedWeightsFormat weights_format;
 };
 
@@ -268,6 +273,27 @@ struct L2NormParams
   int32_t input_zero_point;
 };
 
+enum LSTMKernelType
+{
+  kTfLiteLSTMFullKernel = 0,
+  kTfLiteLSTMBasicKernel
+};
+
+struct LSTMParams
+{
+  // Parameters for LSTM version 1.
+  FusedActivationFunctionType activation{FusedActivationFunctionType::kNone};
+  float cell_clip;
+  float proj_clip;
+
+  // Parameters for LSTM version 2.
+  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
+  LSTMKernelType kernel_type;
+
+  // Parameters for LSTM version 4.
+  bool asymmetric_quantize_inputs;
+};
+
 struct GatherParams
 {
   int32_t axis;
@@ -366,12 +392,24 @@ struct SpaceToDepthParams
   int32_t block_size;
 };
 
+struct LeakyReluParams
+{
+  float alpha;
+};
+
 enum class Order
 {
   kColMajor,
   kRowMajor
 };
 
+enum class CachePolicy : std::uint8_t
+{
+  kNeverCache,
+  kCacheIfLargeSpeedup,
+  kAlwaysCache,
+};
+
 // MatrixParams encapsulates the parameters that Gemm needs about each
 // matrix, besides the buffer data pointer.
 // Compare to ruy::Matrix, which also encapsulates the data pointer.
@@ -390,10 +428,13 @@ template <typename Scalar> struct MatrixParams
   // The zero_point, i.e. which Scalar value is to be interpreted as zero.
   // When Scalar is floating-point, this must be 0.
   Scalar zero_point = 0;
-  // Indicate whether the underlying data will remain unchanged for
-  // some period of time. Defaults to false, but should be set to true
-  // for unchanging data (e.g. weights buffers in many cases)
-  bool cacheable = false;
+  // When the data pointed to by this matrix is constant data, so that it is
+  // valid to assume that equality of pointers implies equality of data,
+  // a CachePolicy may be used instead of the default kNeverCache,
+  // which will enable ruy to take advantage of this constancy of the data to
+  // cache the packing work, which can be a large speedup in matrix*vector
+  // and other narrow shapes.
+  CachePolicy cache_policy = CachePolicy::kNeverCache;
 };
 
 // Enumeration of broad categories of Gemm.
@@ -442,9 +483,9 @@ enum class QuantizationFlavor
 // (only those that need perchannel quantization do).
 template <typename AccumScalar, typename DstScalar,
           QuantizationFlavor quantization_flavor =
-              std::is_floating_point<AccumScalar>::value
-                  ? QuantizationFlavor::kFloatingPoint
-                  : QuantizationFlavor::kIntegerWithUniformMultiplier>
+            std::is_floating_point<AccumScalar>::value
+              ? QuantizationFlavor::kFloatingPoint
+              : QuantizationFlavor::kIntegerWithUniformMultiplier>
 struct GemmParams
 {
   // Only for non-floating-point cases. The fixed-point part (i.e. the mantissa)
@@ -471,12 +512,12 @@ struct GemmParams
   const AccumScalar *bias = nullptr;
   // min clamp bound of destination values.
   DstScalar clamp_min = std::is_floating_point<DstScalar>::value
-                            ? -std::numeric_limits<DstScalar>::infinity()
-                            : std::numeric_limits<DstScalar>::lowest();
+                          ? -std::numeric_limits<DstScalar>::infinity()
+                          : std::numeric_limits<DstScalar>::lowest();
   // max clamp bound of destination values.
   DstScalar clamp_max = std::is_floating_point<DstScalar>::value
-                            ? std::numeric_limits<DstScalar>::infinity()
-                            : std::numeric_limits<DstScalar>::max();
+                          ? std::numeric_limits<DstScalar>::infinity()
+                          : std::numeric_limits<DstScalar>::max();
 };
 
 // Validates self-consistency of GemmParams.
diff --git a/compute/cker/include/cker/Utils.h b/compute/cker/include/cker/Utils.h
index 2abb998d0..9aae0a957 100644
--- a/compute/cker/include/cker/Utils.h
+++ b/compute/cker/include/cker/Utils.h
@@ -20,6 +20,8 @@
 
 #include "Shape.h"
 
+#include "neon/neon_check.h"
+
 #include <algorithm>
 #include <cstdint>
 #include <fixedpoint/fixedpoint.h>
@@ -29,6 +31,11 @@ namespace nnfw
 namespace cker
 {
 
+template <typename T> struct is_quant8
+{
+  static constexpr bool value = std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value;
+};
+
 template <typename T>
 inline T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
 {
@@ -88,8 +95,8 @@ inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multip
   int left_shift = shift > 0 ? shift : 0;
   int right_shift = shift > 0 ? 0 : -shift;
   return gemmlowp::RoundingDivideByPOT(
-      gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier),
-      right_shift);
+    gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier),
+    right_shift);
 }
 
 inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(int32_t x, int32_t quantized_multiplier,
@@ -103,8 +110,36 @@ inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x,
                                                               int left_shift)
 {
   return gemmlowp::RoundingDivideByPOT(
-      gemmlowp::SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
+    gemmlowp::SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
+}
+
+#ifdef USE_NEON
+inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(int32x4x4_t input_val,
+                                                      int32_t quantized_multiplier, int32_t shift)
+{
+  const int left_shift = std::max(shift, 0);
+  const int right_shift = std::min(shift, 0);
+  int32x4x4_t result;
+
+  int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
+  int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
+  int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
+
+  result.val[0] = vrshlq_s32(
+    vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup), right_shift_dup);
+
+  result.val[1] = vrshlq_s32(
+    vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup), right_shift_dup);
+
+  result.val[2] = vrshlq_s32(
+    vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup), right_shift_dup);
+
+  result.val[3] = vrshlq_s32(
+    vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup), right_shift_dup);
+
+  return result;
 }
+#endif
 
 inline int NodeOffset(int b, int h, int w, int height, int width)
 {
@@ -162,7 +197,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
   const F3 fixedpoint_input = F3::FromRaw(input >> 1);
   const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
   const F3 fixedpoint_half_three =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
+    GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
   // Newton-Raphson iteration
   // Naive unoptimized starting guess: x = 1
   F3 x = F3::One();
@@ -173,7 +208,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
     x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
   }
   const F0 fixedpoint_half_sqrt_2 =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
+    GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
   x = x * fixedpoint_half_sqrt_2;
   *output_inv_sqrt = x.raw();
   if (*output_shift < 0)
@@ -429,7 +464,7 @@ template <typename T> class SequentialTensorWriter
 {
 public:
   SequentialTensorWriter(const T *input_data, T *output_data)
-      : input_data_(input_data), output_ptr_(output_data)
+    : input_data_(input_data), output_ptr_(output_data)
   {
   }
 
diff --git a/compute/cker/include/cker/eigen/EigenSupport.h b/compute/cker/include/cker/eigen/EigenSupport.h
index 49c34211a..e3b10990e 100644
--- a/compute/cker/include/cker/eigen/EigenSupport.h
+++ b/compute/cker/include/cker/eigen/EigenSupport.h
@@ -39,17 +39,17 @@ namespace eigen_support
 // library.
 typedef Eigen::TensorMap<Eigen::Tensor<float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
                          Eigen::Aligned>
-    EigenMatrix;
+  EigenMatrix;
 typedef Eigen::TensorMap<Eigen::Tensor<const float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
                          Eigen::Aligned>
-    ConstEigenMatrix;
+  ConstEigenMatrix;
 
 typedef Eigen::TensorMap<Eigen::Tensor<float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
                          Eigen::Aligned>
-    EigenTensor;
+  EigenTensor;
 typedef Eigen::TensorMap<Eigen::Tensor<const float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
                          Eigen::Aligned>
-    ConstEigenTensor;
+  ConstEigenTensor;
 
 // Utility functions we need for the EigenTensor API.
 template <typename Device, typename T> struct MatMulConvFunctor
diff --git a/compute/cker/include/cker/eigen/Utils.h b/compute/cker/include/cker/eigen/Utils.h
index f9c706370..40cb85432 100644
--- a/compute/cker/include/cker/eigen/Utils.h
+++ b/compute/cker/include/cker/eigen/Utils.h
@@ -36,9 +36,9 @@ namespace cker
 //    Eigen::Map<Eigen::Matrix<const float, ...>>
 template <typename Scalar>
 using VectorMap = typename std::conditional<
-    std::is_const<Scalar>::value,
-    Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic, 1>>,
-    Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>>>::type;
+  std::is_const<Scalar>::value,
+  Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic, 1>>,
+  Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>>>::type;
 
 template <typename Scalar> VectorMap<Scalar> MapAsVector(Scalar *data, const Shape &shape)
 {
@@ -51,10 +51,10 @@ template <typename Scalar> VectorMap<Scalar> MapAsVector(Scalar *data, const Sha
 // above also applies here.
 template <typename Scalar>
 using MatrixMap = typename std::conditional<
-    std::is_const<Scalar>::value,
-    Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic,
-                                   Eigen::Dynamic>>,
-    Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
+  std::is_const<Scalar>::value,
+  Eigen::Map<
+    const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic, Eigen::Dynamic>>,
+  Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
 
 template <typename Scalar>
 MatrixMap<Scalar> MapAsMatrixWithLastDimAsRows(Scalar *data, const Shape &shape)
diff --git a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
index dc3e2552d..9d4fd2eaf 100644
--- a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
+++ b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
@@ -49,20 +49,19 @@ class TensorEvaluatorHasPartialPacket
 public:
   template <typename TensorEvaluatorT, typename PacketT, typename IndexT>
   static auto functionExistsSfinae(
-      typename std::enable_if<
-          unpacket_traits<PacketT>::masked_load_available &&
-          std::is_same<
-              PacketT,
-              decltype(std::declval<const TensorEvaluatorT>().template partialPacket<PacketT>(
-                  std::declval<IndexT>(),
-                  std::declval<typename unpacket_traits<PacketT>::mask_t>()))>::value>::type *)
-      -> std::true_type;
+    typename std::enable_if<
+      unpacket_traits<PacketT>::masked_load_available &&
+      std::is_same<PacketT,
+                   decltype(std::declval<const TensorEvaluatorT>().template partialPacket<PacketT>(
+                     std::declval<IndexT>(),
+                     std::declval<typename unpacket_traits<PacketT>::mask_t>()))>::value>::type *)
+    -> std::true_type;
 
   template <typename TensorEvaluatorT, typename PacketT, typename IndexT>
   static auto functionExistsSfinae(...) -> std::false_type;
 
   typedef decltype(
-      functionExistsSfinae<TensorEvaluatorType, PacketType, IndexType>(nullptr)) status;
+    functionExistsSfinae<TensorEvaluatorType, PacketType, IndexType>(nullptr)) status;
 
   static constexpr bool value = status::value;
 };
@@ -71,9 +70,9 @@ public:
 // [from, to) range. If the mask bit is 1, element will be loaded/stored.
 template <typename Packet>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-    typename std::enable_if<unpacket_traits<Packet>::masked_load_available,
-                            typename unpacket_traits<Packet>::mask_t>::type
-    mask(int from, int to)
+  typename std::enable_if<unpacket_traits<Packet>::masked_load_available,
+                          typename unpacket_traits<Packet>::mask_t>::type
+  mask(int from, int to)
 {
   const Index packet_size = internal::unpacket_traits<Packet>::size;
   eigen_assert(0 <= from && to <= (packet_size + 1) && from < to);
diff --git a/compute/cker/include/cker/eigen/eigen_gemm_eigen.h b/compute/cker/include/cker/eigen/eigen_gemm_eigen.h
new file mode 100644
index 000000000..d4f8fc09d
--- /dev/null
+++ b/compute/cker/include/cker/eigen/eigen_gemm_eigen.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_EGIEN_EIGEN_GEMM_EIGEN_H__
+#define __NNFW_CKER_EGIEN_EIGEN_GEMM_EIGEN_H__
+
+// See b/131835803: in TFLite code, because eigen_spatial_convolutions.h does
+// #define Eigen EigenForTFLite, it is difficult to have any #include of Eigen
+// headers in a header file, as that results in name classes (compilation
+// errors) depending on the order in which these headers are #included.
+// So we have moved the #include of Eigen here, in a .cc file, where we have
+// control over the header #include sequence.
+// #include "third_party/eigen3/Eigen/Core"
+// #include "tensorflow/lite/kernels/cpu_backend_context.h"
+// #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
+// #include "tensorflow/lite/kernels/internal/common.h"
+// #include "cker/eigen/eigen_convolution_helpers.h"
+#include "cker/operation/Common.h"
+#include "cker/Types.h"
+
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace detail
+{
+
+// tensorflow/tensorflow/lite/kernels/cpu_backend_gemm_eigen.h and cpu_backend_gemm_eigen.cc
+struct GemmImplUsingEigen
+{
+  static void Run(const MatrixParams<float> &lhs_params, const float *lhs_data,
+                  const MatrixParams<float> &rhs_params, const float *rhs_data,
+                  const MatrixParams<float> &dst_params, float *dst_data,
+                  const GemmParams<float, float> &params)
+  {
+    // This code assumes specific storage orders, encoded in these Eigen types.
+    // These assumptions have been checked by TF_LITE_ASSERT's in the public
+    // Gemm entry point already, before the implementation gets to this point.
+    using EigenMatrixMapRowMajorConst =
+      Eigen::Map<const Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
+    using EigenMatrixMapColMajorConst =
+      Eigen::Map<const Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>;
+    using EigenMatrixMapColMajorMutable =
+      Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>;
+
+    EigenMatrixMapRowMajorConst eigen_lhs(lhs_data, lhs_params.rows, lhs_params.cols);
+    EigenMatrixMapColMajorConst eigen_rhs(rhs_data, rhs_params.rows, rhs_params.cols);
+    EigenMatrixMapColMajorMutable eigen_dst(dst_data, dst_params.rows, dst_params.cols);
+
+    if (rhs_params.cols == 1)
+    {
+      eigen_dst.col(0).noalias() = eigen_lhs * eigen_rhs.col(0);
+    }
+    else if (lhs_params.rows == 1)
+    {
+      eigen_dst.row(0).noalias() = eigen_lhs.row(0) * eigen_rhs;
+    }
+    else
+    {
+      eigen_dst.noalias() = eigen_lhs * eigen_rhs;
+    }
+
+    if (params.bias)
+    {
+      BiasAndClamp(params.clamp_min, params.clamp_max, dst_params.rows, params.bias,
+                   dst_params.rows * dst_params.cols, dst_data);
+    }
+    else
+    {
+      eigen_dst = eigen_dst.cwiseMin(params.clamp_max).cwiseMax(params.clamp_min);
+    }
+  }
+};
+
+} // namespace detail
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_EGIEN_EIGEN_GEMM_EIGEN_H__
diff --git a/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h b/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h
index 92e1614d1..c931ac518 100644
--- a/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h
+++ b/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h
@@ -62,30 +62,27 @@ template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typen
           typename Scalar_, typename Index, typename nocontract_t, typename contract_t, int Side,
           int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
 class TensorContractionInputMapper<
-    Scalar_, Index, Side,
-    TensorEvaluator<
-        const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-        Device>,
-    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+  Scalar_, Index, Side,
+  TensorEvaluator<
+    const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+  nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
 {
 public:
   typedef Scalar_ Scalar;
 
   typedef TensorContractionInputMapper<
-      Scalar, Index, Side,
-      TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device>,
-      nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
-      Self;
+    Scalar, Index, Side,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+    Self;
 
   typedef TensorContractionSubMapper<
-      Scalar, Index, Side,
-      TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device>,
-      nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
-      SubMapper;
+    Scalar, Index, Side,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+    SubMapper;
 
   typedef SubMapper VectorMapper;
   typedef SubMapper LinearMapper;
@@ -95,11 +92,11 @@ public:
 
   EIGEN_DEVICE_FUNC
   TensorContractionInputMapper(
-      const TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device> &tensor,
-      const nocontract_t &, const nocontract_t &, const contract_t &, const contract_t &)
-      : m_impl(tensor.impl().impl())
+    const TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>
+      &tensor,
+    const nocontract_t &, const nocontract_t &, const contract_t &, const contract_t &)
+    : m_impl(tensor.impl().impl())
   {
     Index patch_rows;
     Index patch_depth;
@@ -167,7 +164,7 @@ public:
 
   EIGEN_DEVICE_FUNC
   TensorContractionInputMapper(const TensorContractionInputMapper &base_mapper)
-      : m_impl(base_mapper.m_impl)
+    : m_impl(base_mapper.m_impl)
   {
     m_patch_cols = base_mapper.m_patch_cols;
     m_num_patches = base_mapper.m_num_patches;
@@ -280,11 +277,10 @@ public:
 
 private:
   friend class TensorContractionSubMapper<
-      Scalar, Index, Side,
-      TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device>,
-      nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>;
+    Scalar, Index, Side,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>;
 
   // Load coefficient from a patch specified by the "within patch offset"
   // (patchId) and the precomputed indices of the first element of the patch.
@@ -298,14 +294,14 @@ private:
     const Index colOffset = patchOffset / m_fastColStride;
     const Index inputCol = colIndex + colOffset * m_in_col_strides;
     const Index origInputCol = (m_patch_col_inflate_strides == 1)
-                                   ? inputCol
-                                   : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
+                                 ? inputCol
+                                 : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
 
     const Index rowOffset = patchOffset - colOffset * m_colStride;
     const Index inputRow = rowIndex + rowOffset * m_in_row_strides;
     const Index origInputRow = (m_patch_row_inflate_strides == 1)
-                                   ? inputRow
-                                   : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
+                                 ? inputRow
+                                 : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
     if (origInputCol < 0 || origInputRow < 0 || origInputCol >= m_inputCols ||
         origInputRow >= m_inputRows || (inputCol != origInputCol * m_patch_col_inflate_strides) ||
         (inputRow != origInputRow * m_patch_row_inflate_strides))
@@ -314,7 +310,7 @@ private:
     }
     const Index depth = patchId - patchOffset * patchDepth();
     const Index inputIndex =
-        depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex;
+      depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex;
     return m_impl.coeff(inputIndex);
   }
 
@@ -338,7 +334,7 @@ private:
     }
     const Index depth = patchId - patchOffset * patchDepth();
     const Index inputIndex =
-        depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+      depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
     return m_impl.coeff(inputIndex);
   }
 
@@ -390,7 +386,7 @@ private:
       // span[0] all the way upto (and including) span[1].
       const Index depth = patchId - patchOffsets[0] * patchDepth();
       const Index inputIndex =
-          depth + inputRows[0] * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+        depth + inputRows[0] * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
       return m_impl.template partialPacket<Packet>(inputIndex - span[0],
                                                    mask<Packet>(span[0], span[1] + 1));
     }
@@ -445,10 +441,10 @@ private:
 
     // Load partial packets and do bit-wise OR to generate required packet
     return internal::por<Packet>(
-        loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[0], spans[0],
-                                  patchOffsets2Cols[0], colOffsets[0]),
-        loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[1], spans[1],
-                                  patchOffsets2Cols[1], colOffsets[1]));
+      loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[0], spans[0],
+                                patchOffsets2Cols[0], colOffsets[0]),
+      loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[1], spans[1],
+                                patchOffsets2Cols[1], colOffsets[1]));
   }
 
   // Helper function to load a packet that is present in a single columns.
@@ -477,7 +473,7 @@ private:
       // no padding
       const Index depth = patchId - patchOffsets[0] * patchDepth();
       const Index inputIndex =
-          depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex;
+        depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex;
       return m_impl.template packet<Unaligned>(inputIndex);
     }
     return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex);
@@ -490,7 +486,7 @@ private:
   // load.
   template <typename PacketT, typename TensorEvaluatorT>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
-      !TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+    !TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
   loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const
   {
     const Index packetSize = internal::unpacket_traits<Packet>::size;
@@ -538,7 +534,7 @@ private:
   // packets.
   template <typename PacketT, typename TensorEvaluatorT>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
-      TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+    TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
   loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const
   {
     const Index packetSize = internal::unpacket_traits<PacketT>::size;
@@ -604,7 +600,7 @@ private:
     // no padding
     const Index depth = patchId - patchOffset * patchDepth();
     const Index inputIndex =
-        depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+      depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
     return m_impl.template packet<Unaligned>(inputIndex);
   }
 
@@ -627,10 +623,10 @@ private:
   computeBaseIndices(Index patchIndex, Index &rowIndex, Index &colIndex, Index &otherIndex) const
   {
     const size_t NumInputDims =
-        array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+      array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
     otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches;
     const Index patch2DIndex =
-        (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches);
+      (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches);
     otherIndex *= m_patchInputStride;
     colIndex = patch2DIndex / m_fastOutputRows;
     rowIndex = patch2DIndex - colIndex * m_outputRows;
@@ -689,31 +685,28 @@ template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typen
           typename Scalar, typename Index, typename nocontract_t, typename contract_t, int Side,
           int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
 class TensorContractionSubMapper<
-    Scalar, Index, Side,
-    TensorEvaluator<
-        const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-        Device>,
-    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+  Scalar, Index, Side,
+  TensorEvaluator<
+    const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+  nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
 {
 public:
   typedef typename packet_traits<Scalar>::type Packet;
   typedef typename packet_traits<Scalar>::half HalfPacket;
 
   typedef TensorContractionInputMapper<
-      Scalar, Index, Side,
-      TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device>,
-      nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
-      ParentMapper;
+    Scalar, Index, Side,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+    ParentMapper;
 
   typedef TensorContractionSubMapper<
-      Scalar, Index, Side,
-      TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device>,
-      nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
-      Self;
+    Scalar, Index, Side,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+    Self;
 
   typedef Self LinearMapper;
 
@@ -722,16 +715,16 @@ public:
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const ParentMapper &base_mapper,
                                                                    Index vert_offset,
                                                                    Index horiz_offset)
-      : m_depth_offset(vert_offset), m_col_offset(horiz_offset), m_base_mapper(base_mapper)
+    : m_depth_offset(vert_offset), m_col_offset(horiz_offset), m_base_mapper(base_mapper)
   {
     m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex);
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const Self &base_mapper,
                                                                    Index vert_offset,
                                                                    Index horiz_offset)
-      : m_depth_offset(vert_offset + base_mapper.m_depth_offset),
-        m_col_offset(horiz_offset + base_mapper.m_col_offset),
-        m_base_mapper(base_mapper.m_base_mapper)
+    : m_depth_offset(vert_offset + base_mapper.m_depth_offset),
+      m_col_offset(horiz_offset + base_mapper.m_col_offset),
+      m_base_mapper(base_mapper.m_base_mapper)
   {
     m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex);
   }
@@ -766,7 +759,7 @@ public:
   {
     typedef decltype(m_base_mapper.m_impl) TensorEvaluatorT;
     return m_base_mapper.template loadPacketStandard<Packet, TensorEvaluatorT>(
-        i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
+      i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
   }
   template <typename Packet> EIGEN_DEVICE_FUNC bool aligned(Index) const { return false; }
 
@@ -781,7 +774,7 @@ public:
   EIGEN_ALWAYS_INLINE Index maxCol(const Index peeled_k) const
   {
     const Index max_col =
-        (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1)) / fastPatchColStride();
+      (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1)) / fastPatchColStride();
     return std::min<Index>(1 + max_col, patchCols());
   }
 
@@ -789,8 +782,8 @@ public:
   EIGEN_ALWAYS_INLINE Index maxRow(const Index peeled_k, const Index col) const
   {
     const Index max_row =
-        (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1) - col * patchColStride()) /
-        fastPatchRowStride();
+      (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1) - col * patchColStride()) /
+      fastPatchRowStride();
     return std::min<Index>(1 + max_row, patchRows());
   }
 
@@ -862,7 +855,7 @@ public:
   }
   template <typename PacketT = Packet>
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
-      TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+    TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
   partialPacketNoPadding(const Index depth, const Index baseIndex, Index num_coeffs) const
   {
     const Index inputIndex = depth + baseIndex;
@@ -913,8 +906,8 @@ public:
 
     const Index input_row = m_rowIndex + row * m_base_mapper.m_in_row_strides;
     *orig_row = (m_base_mapper.m_patch_row_inflate_strides == 1)
-                    ? input_row
-                    : ((input_row >= 0) ? (input_row / m_base_mapper.m_fastInputRowStride) : 0);
+                  ? input_row
+                  : ((input_row >= 0) ? (input_row / m_base_mapper.m_fastInputRowStride) : 0);
 
     return (*orig_row < 0 || *orig_row >= m_base_mapper.m_inputRows) ||
            (input_row != *orig_row * m_base_mapper.m_patch_row_inflate_strides);
@@ -932,8 +925,8 @@ public:
 
     const Index input_col = m_colIndex + col * m_base_mapper.m_in_col_strides;
     *orig_col = (m_base_mapper.m_patch_col_inflate_strides == 1)
-                    ? input_col
-                    : ((input_col >= 0) ? (input_col / m_base_mapper.m_fastInputColStride) : 0);
+                  ? input_col
+                  : ((input_col >= 0) ? (input_col / m_base_mapper.m_fastInputColStride) : 0);
 
     return (*orig_col < 0 || *orig_col >= m_base_mapper.m_inputCols) ||
            (input_col != *orig_col * m_base_mapper.m_patch_col_inflate_strides);
@@ -1033,23 +1026,20 @@ template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typen
           int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment,
           int nr>
 struct gemm_pack_rhs<
-    Scalar, Index,
-    TensorContractionSubMapper<
-        Scalar, Index, Rhs,
-        TensorEvaluator<
-            const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-            Device>,
-        nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered,
-        Alignment>,
-    nr, ColMajor, false, false>
+  Scalar, Index,
+  TensorContractionSubMapper<
+    Scalar, Index, Rhs,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>,
+  nr, ColMajor, false, false>
 {
   typedef TensorContractionSubMapper<
-      Scalar, Index, Rhs,
-      TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device>,
-      nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
-      SubMapper;
+    Scalar, Index, Rhs,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+    SubMapper;
   typedef SubMapper DataMapper;
   typedef typename packet_traits<Scalar>::type Packet;
 
@@ -1159,7 +1149,7 @@ struct gemm_pack_rhs<
               const Index idx3 = dm3.baseIndex(r, c);
 
               const Index start_depth =
-                  ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
+                ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
               const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
               eigen_assert((max_depth - start_depth) % packet_size == 0);
 
@@ -1248,22 +1238,20 @@ template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typen
           typename Scalar, typename Index, typename nocontract_t, typename contract_t,
           bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, int nr>
 struct gemm_pack_rhs<
-    Scalar, Index,
-    TensorContractionSubMapper<
-        Scalar, Index, Rhs,
-        TensorEvaluator<
-            const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-            Device>,
-        nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>,
-    nr, ColMajor, false, false>
+  Scalar, Index,
+  TensorContractionSubMapper<
+    Scalar, Index, Rhs,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>,
+  nr, ColMajor, false, false>
 {
   typedef TensorContractionSubMapper<
-      Scalar, Index, Rhs,
-      TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device>,
-      nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>
-      SubMapper;
+    Scalar, Index, Rhs,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>
+    SubMapper;
   typedef SubMapper DataMapper;
   typedef typename packet_traits<Scalar>::type Packet;
 
@@ -1378,7 +1366,7 @@ struct gemm_pack_rhs<
               const Index idx3 = dm3.baseIndex(r, c);
 
               const Index start_depth =
-                  ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
+                ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
               const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
               eigen_assert((max_depth - start_depth) % packet_size == 0);
 
@@ -1472,22 +1460,20 @@ template <typename NewDimension, Index Rows, Index Cols, typename ArgType, typen
           typename Scalar, typename Index, typename nocontract_t, typename contract_t,
           bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, int nr>
 struct gemm_pack_rhs<
-    Scalar, Index,
-    TensorContractionSubMapper<
-        Scalar, Index, Rhs,
-        TensorEvaluator<
-            const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-            Device>,
-        nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>,
-    nr, ColMajor, false, false>
+  Scalar, Index,
+  TensorContractionSubMapper<
+    Scalar, Index, Rhs,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>,
+  nr, ColMajor, false, false>
 {
   typedef TensorContractionSubMapper<
-      Scalar, Index, Rhs,
-      TensorEvaluator<
-          const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
-          Device>,
-      nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>
-      SubMapper;
+    Scalar, Index, Rhs,
+    TensorEvaluator<
+      const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+    nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>
+    SubMapper;
   typedef SubMapper DataMapper;
 
   EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -1582,27 +1568,25 @@ struct gemm_pack_rhs<
  */
 template <typename Input, typename Kernel, typename OutputKernel = const NoOpOutputKernel>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename internal::conditional<
-    internal::traits<Input>::Layout == ColMajor,
-    TensorReshapingOp<
-        const DSizes<typename internal::traits<Input>::Index,
-                     internal::traits<Input>::NumDimensions>,
-        const TensorContractionOp<
-            const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
-            const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
-                                    const Kernel>,
-            const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
-                                    const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
-            const OutputKernel>>,
-    TensorReshapingOp<
-        const DSizes<typename internal::traits<Input>::Index,
-                     internal::traits<Input>::NumDimensions>,
-        const TensorContractionOp<
-            const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
-            const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
-                                    const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
-            const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
-                                    const Kernel>,
-            const OutputKernel>>>::type
+  internal::traits<Input>::Layout == ColMajor,
+  TensorReshapingOp<
+    const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>,
+    const TensorContractionOp<
+      const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+      const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+                              const Kernel>,
+      const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+                              const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
+      const OutputKernel>>,
+  TensorReshapingOp<
+    const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>,
+    const TensorContractionOp<
+      const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+      const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+                              const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
+      const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+                              const Kernel>,
+      const OutputKernel>>>::type
 SpatialConvolution(const Input &input, const Kernel &kernel, const Index row_stride = 1,
                    const Index col_stride = 1, const PaddingType padding_type = PADDING_SAME,
                    const Index row_in_stride = 1, const Index col_in_stride = 1,
@@ -1612,11 +1596,11 @@ SpatialConvolution(const Input &input, const Kernel &kernel, const Index row_str
   typedef typename internal::traits<Input>::Index TensorIndex;
   TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions,
                    internal::traits<Input>::Layout, TensorIndex>>
-      in(input);
+    in(input);
   TensorRef<
-      Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions,
-             internal::traits<Kernel>::Layout, TensorIndex>>
-      kern(kernel);
+    Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions,
+           internal::traits<Kernel>::Layout, TensorIndex>>
+    kern(kernel);
 
   EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<Kernel>::Layout,
                       YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -1735,46 +1719,46 @@ SpatialConvolution(const Input &input, const Kernel &kernel, const Index row_str
   }
   if (padding_explicit)
   {
-    return choose(
-        Cond<internal::traits<Input>::Layout == ColMajor>(),
-        kernel.reshape(kernel_dims)
-            .contract(input
-                          .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
-                                                 row_in_stride, col_in_stride,
-                                                 /*row_inflate_stride=*/1,
-                                                 /*col_inflate_stride=*/1, padding_top,
-                                                 padding_bottom, padding_left, padding_right,
-                                                 /*padding_value=*/0)
-                          .reshape(pre_contract_dims),
-                      contract_dims, output_kernel)
-            .reshape(post_contract_dims),
-        input
-            .extract_image_patches(
-                kernelRows, kernelCols, row_stride, col_stride, row_in_stride, col_in_stride,
-                /*row_inflate_stride=*/1,
-                /*col_inflate_stride=*/1, padding_top, padding_bottom, padding_left, padding_right,
-                /*padding_value=*/0)
-            .reshape(pre_contract_dims)
-            .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
-            .reshape(post_contract_dims));
+    return choose(Cond<internal::traits<Input>::Layout == ColMajor>(),
+                  kernel.reshape(kernel_dims)
+                    .contract(input
+                                .extract_image_patches(kernelRows, kernelCols, row_stride,
+                                                       col_stride, row_in_stride, col_in_stride,
+                                                       /*row_inflate_stride=*/1,
+                                                       /*col_inflate_stride=*/1, padding_top,
+                                                       padding_bottom, padding_left, padding_right,
+                                                       /*padding_value=*/0)
+                                .reshape(pre_contract_dims),
+                              contract_dims, output_kernel)
+                    .reshape(post_contract_dims),
+                  input
+                    .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
+                                           row_in_stride, col_in_stride,
+                                           /*row_inflate_stride=*/1,
+                                           /*col_inflate_stride=*/1, padding_top, padding_bottom,
+                                           padding_left, padding_right,
+                                           /*padding_value=*/0)
+                    .reshape(pre_contract_dims)
+                    .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
+                    .reshape(post_contract_dims));
   }
   else
   {
     return choose(
-        Cond<internal::traits<Input>::Layout == ColMajor>(),
-        kernel.reshape(kernel_dims)
-            .contract(input
-                          .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
-                                                 row_in_stride, col_in_stride, padding_type)
-                          .reshape(pre_contract_dims),
-                      contract_dims, output_kernel)
-            .reshape(post_contract_dims),
-        input
-            .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, row_in_stride,
-                                   col_in_stride, padding_type)
-            .reshape(pre_contract_dims)
-            .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
-            .reshape(post_contract_dims));
+      Cond<internal::traits<Input>::Layout == ColMajor>(),
+      kernel.reshape(kernel_dims)
+        .contract(input
+                    .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
+                                           row_in_stride, col_in_stride, padding_type)
+                    .reshape(pre_contract_dims),
+                  contract_dims, output_kernel)
+        .reshape(post_contract_dims),
+      input
+        .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, row_in_stride,
+                               col_in_stride, padding_type)
+        .reshape(pre_contract_dims)
+        .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
+        .reshape(post_contract_dims));
   }
 }
 
diff --git a/compute/cker/include/cker/operation/AddN.h b/compute/cker/include/cker/operation/AddN.h
new file mode 100644
index 000000000..1704da641
--- /dev/null
+++ b/compute/cker/include/cker/operation/AddN.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_ADDN_H__
+#define __NNFW_CKER_ADDN_H__
+
+#include "cker/Shape.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+void AddN(const Shape &input_shape, const size_t num_inputs, const T **input_data, T *output_data)
+{
+  const size_t size = input_shape.FlatSize();
+  for (size_t i = 0; i < size; ++i)
+  {
+    T x = 0;
+    for (size_t j = 0; j < num_inputs; ++j)
+    {
+      x += input_data[j][i];
+    }
+    output_data[i] = x;
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ADDN_H__
diff --git a/compute/cker/include/cker/operation/AveragePool.h b/compute/cker/include/cker/operation/AveragePool.h
index 6149cafa7..e10f02ad4 100644
--- a/compute/cker/include/cker/operation/AveragePool.h
+++ b/compute/cker/include/cker/operation/AveragePool.h
@@ -73,10 +73,10 @@ void AveragePool<float>(const PoolParams &params, const Shape &input_shape, cons
         int hpad = h + params.padding_values.height;
         int wpad = w + params.padding_values.width;
         int h_start =
-            (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+          (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
         int h_end = std::min(hpad / stride_height + 1, output_height);
         int w_start =
-            (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+          (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
         int w_end = std::min(wpad / stride_width + 1, output_width);
         // compute elementwise sum
         for (int ph = h_start; ph < h_end; ++ph)
@@ -146,11 +146,11 @@ inline void AveragePool16(const PoolParams &params, const Shape &input_shape,
           const int filter_y_start = std::max(0, -in_y_origin);
           const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
           const int filter_count =
-              (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+            (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
           memset(acc, 0, tranche_depth * sizeof(acc[0]));
           const uint8_t *input_ptr =
-              input_data + depth_base +
-              depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+            input_data + depth_base +
+            depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
           for (int fy = filter_y_start; fy < filter_y_end; fy++)
           {
             const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
@@ -283,11 +283,11 @@ inline void AveragePool32(const PoolParams &params, const Shape &input_shape,
           const int filter_y_start = std::max(0, -in_y_origin);
           const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
           const int filter_count =
-              (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+            (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
           memset(acc, 0, tranche_depth * sizeof(acc[0]));
           const uint8_t *input_ptr =
-              input_data + depth_base +
-              depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+            input_data + depth_base +
+            depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
           for (int fy = filter_y_start; fy < filter_y_end; fy++)
           {
             const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
@@ -395,6 +395,129 @@ void AveragePool<uint8_t>(const PoolParams &params, const Shape &input_shape,
   }
 }
 
+template <>
+void AveragePool<int8_t>(const PoolParams &params, const Shape &input_shape,
+                         const int8_t *input_data, const Shape &output_shape, int8_t *output_data)
+{
+  // Here, and in other pooling ops, in order to maintain locality of reference,
+  // to minimize some recalculations, and to load into NEON vector registers, we
+  // use an inner loop down the depth. Since depths can be large and hence we
+  // would need arbitrarily large temporary storage, we divide the work up into
+  // depth tranches just within the batch loop.
+  static constexpr int kPoolingAccTrancheSize = 256;
+
+  assert(params.quantized_activation_min <= params.quantized_activation_max);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+
+  int32_t acc[kPoolingAccTrancheSize];
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    // We proceed through the depth in tranches (see comment above). The
+    // depth_base is the depth at the beginning of the tranche. The
+    // tranche_depth is the depth dimension of the tranche.
+    for (int depth_base = 0; depth_base < depth; depth_base += kPoolingAccTrancheSize)
+    {
+      const int tranche_depth = std::min(depth - depth_base, kPoolingAccTrancheSize);
+      for (int out_y = 0; out_y < output_height; ++out_y)
+      {
+        for (int out_x = 0; out_x < output_width; ++out_x)
+        {
+          const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+          const int filter_count =
+            (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+          memset(acc, 0, tranche_depth * sizeof(acc[0]));
+          const int8_t *input_ptr =
+            input_data + depth_base +
+            depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+          for (int fy = filter_y_start; fy < filter_y_end; fy++)
+          {
+            const int8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
+            for (int fx = filter_x_start; fx < filter_x_end; fx++)
+            {
+              const int8_t *input_channel_ptr = input_row_ptr;
+              int channel = 0;
+#ifdef USE_NEON
+              for (; channel <= tranche_depth - 16; channel += 16)
+              {
+                int16x4_t acc_reg[4];
+                int8x16_t input_reg = vld1q_s8(input_channel_ptr);
+                input_channel_ptr += 16;
+                acc_reg[0] = vget_low_s16(vmovl_s8(vget_low_s8(input_reg)));
+                acc_reg[1] = vget_high_s16(vmovl_s8(vget_low_s8(input_reg)));
+                acc_reg[2] = vget_low_s16(vmovl_s8(vget_high_s8(input_reg)));
+                acc_reg[3] = vget_high_s16(vmovl_s8(vget_high_s8(input_reg)));
+                for (int i = 0; i < 4; i++)
+                {
+                  vst1q_s32(acc + channel + 4 * i,
+                            vaddw_s16(vld1q_s32(acc + channel + 4 * i), acc_reg[i]));
+                }
+              }
+              for (; channel <= tranche_depth - 8; channel += 8)
+              {
+                int16x4_t acc_reg[2];
+                int16x8_t input_reg = vmovl_s8(vld1_s8(input_channel_ptr));
+                input_channel_ptr += 8;
+                acc_reg[0] = vget_low_s16(input_reg);
+                acc_reg[1] = vget_high_s16(input_reg);
+                for (int i = 0; i < 2; i++)
+                {
+                  vst1q_s32(acc + channel + 4 * i,
+                            vaddw_s16(vld1q_s32(acc + channel + 4 * i), acc_reg[i]));
+                }
+              }
+#endif
+              for (; channel < tranche_depth; ++channel)
+              {
+                acc[channel] += *input_channel_ptr++;
+              }
+              input_row_ptr += depth;
+            }
+          }
+          int8_t *output_ptr = output_data + Offset(output_shape, batch, out_y, out_x, depth_base);
+          int channel = 0;
+#ifdef USE_NEON
+          for (; channel <= tranche_depth - 8; channel += 8)
+          {
+            int16_t buf[8];
+            for (int i = 0; i < 8; i++)
+            {
+              buf[i] = acc[channel + i] > 0 ? (acc[channel + i] + filter_count / 2) / filter_count
+                                            : (acc[channel + i] - filter_count / 2) / filter_count;
+            }
+            int8x8_t buf8 = vqmovn_s16(vld1q_s16(buf));
+            buf8 = vmin_s8(buf8, vdup_n_s8(params.quantized_activation_max));
+            buf8 = vmax_s8(buf8, vdup_n_s8(params.quantized_activation_min));
+            vst1_s8(output_ptr + channel, buf8);
+          }
+#endif
+          for (; channel < tranche_depth; ++channel)
+          {
+            int16_t a = acc[channel] > 0 ? (acc[channel] + filter_count / 2) / filter_count
+                                         : (acc[channel] - filter_count / 2) / filter_count;
+            a = std::max<int16_t>(a, params.quantized_activation_min);
+            a = std::min<int16_t>(a, params.quantized_activation_max);
+            output_ptr[channel] = static_cast<int8_t>(a);
+          }
+        }
+      }
+    }
+  }
+}
+
 } // namespace cker
 } // namespace nnfw
 
diff --git a/compute/cker/include/cker/operation/BatchToSpaceND.h b/compute/cker/include/cker/operation/BatchToSpaceND.h
index e33b2fba5..980ad48dd 100644
--- a/compute/cker/include/cker/operation/BatchToSpaceND.h
+++ b/compute/cker/include/cker/operation/BatchToSpaceND.h
@@ -43,7 +43,7 @@ inline void GetIndexRange(int spatial_index_dim, int block_shape_dim, int input_
   // Similarly, (*end_index) * block_shape_dim is rounded up too (note that
   // end_index is exclusive).
   *end_index =
-      std::min(input_dim, (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim);
+    std::min(input_dim, (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim);
 }
 
 template <typename T>
@@ -116,7 +116,7 @@ inline void BatchToSpaceND(const Shape &unextended_input1_shape, const T *input1
       for (int in_w = in_w_start; in_w < in_w_end; ++in_w)
       {
         const int out_w =
-            in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
+          in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
         assert(out_w >= 0);
         assert(out_w < output_width);
         T *out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
diff --git a/compute/cker/include/cker/operation/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/BinaryArithmeticOps.h
index 8aef1f8c1..c7878496a 100644
--- a/compute/cker/include/cker/operation/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/BinaryArithmeticOps.h
@@ -139,7 +139,7 @@ inline bool ProcessBroadcastShapes(const Shape &shape0, const Shape &shape1,
   // From this point it is assumed contractually that corresponding dimensions
   // in shape0 and shape1 are either (a) equal or (b) one or other equals 1.
   const bool swap_inputs =
-      params->broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast;
+    params->broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast;
   const Shape *shape_a = swap_inputs ? &extended_shape1 : &extended_shape0;
   const Shape *shape_b = swap_inputs ? &extended_shape0 : &extended_shape1;
 
@@ -190,34 +190,34 @@ inline bool ProcessBroadcastShapes(const Shape &shape0, const Shape &shape1,
 }
 
 template <BinaryArithmeticOpType op_type, typename T>
-inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                               const T *input1_data, const Shape &input2_shape,
-                               const T *input2_data, const Shape &output_shape, T *output_data)
+inline typename std::enable_if_t<!is_quant8<T>::value>
+BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                   const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                   const Shape &output_shape, T *output_data)
 {
   reference::BinaryArithmeticOp(params, input1_shape, input1_data, input2_shape, input2_data,
                                 output_shape, output_data, GetBinaryArtithmeticFn<op_type, T>());
 }
 
-template <BinaryArithmeticOpType op_type>
-inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                               const uint8_t *input1_data, const Shape &input2_shape,
-                               const uint8_t *input2_data, const Shape &output_shape,
-                               uint8_t *output_data)
+template <BinaryArithmeticOpType op_type, typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                   const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                   const Shape &output_shape, T *output_data)
 {
   switch (op_type)
   {
     case nnfw::cker::BinaryArithmeticOpType::ADD:
     case nnfw::cker::BinaryArithmeticOpType::SUB:
-      optimized::AddQuant8(params, input1_shape, input1_data, input2_shape, input2_data,
-                           output_shape, output_data);
+      optimized::Add(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+                     output_data);
       break;
     case nnfw::cker::BinaryArithmeticOpType::MUL:
-      optimized::MulQuant8(params, input1_shape, const_cast<uint8_t *>(input1_data), input2_shape,
-                           const_cast<uint8_t *>(input2_data), output_shape, output_data);
+      optimized::Mul(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+                     output_data);
       break;
     case nnfw::cker::BinaryArithmeticOpType::DIV:
       throw std::runtime_error{"Quant8 Asymm NYI"};
-
     default:
       assert(false);
       break;
@@ -246,9 +246,8 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
                      output_data);
       break;
     case nnfw::cker::BinaryArithmeticOpType::DIV:
-      reference::BinaryArithmeticOp<float>(params, input1_shape, input1_data, input2_shape,
-                                           input2_data, output_shape, output_data,
-                                           GetBinaryArtithmeticFn<op_type, float>());
+      optimized::Div(params, input1_shape, input1_data, input2_shape, input2_data, output_shape,
+                     output_data);
       break;
     default:
       assert(false);
@@ -257,33 +256,32 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
 }
 
 template <BinaryArithmeticOpType op_type, typename T>
-inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                                        const T *input1_data, const Shape &input2_shape,
-                                        const T *input2_data, const Shape &output_shape,
-                                        T *output_data)
+inline typename std::enable_if_t<!is_quant8<T>::value>
+BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                            const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                            const Shape &output_shape, T *output_data)
 {
   reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
                                              input2_data, output_shape, output_data,
                                              GetBinaryArtithmeticFn<op_type, T>());
 }
 
-template <BinaryArithmeticOpType op_type>
-inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                                        const uint8_t *input1_data, const Shape &input2_shape,
-                                        const uint8_t *input2_data, const Shape &output_shape,
-                                        uint8_t *output_data)
+template <BinaryArithmeticOpType op_type, typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                            const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                            const Shape &output_shape, T *output_data)
 {
   switch (op_type)
   {
     case nnfw::cker::BinaryArithmeticOpType::ADD:
     case nnfw::cker::BinaryArithmeticOpType::SUB:
-      optimized::BroadcastAddDispatchQuant8(params, input1_shape, input1_data, input2_shape,
-                                            input2_data, output_shape, output_data);
+      optimized::BroadcastAddDispatch(params, input1_shape, input1_data, input2_shape, input2_data,
+                                      output_shape, output_data);
       break;
     case nnfw::cker::BinaryArithmeticOpType::MUL:
-      optimized::BroadcastMulDispatchQuant8(
-          params, input1_shape, const_cast<uint8_t *>(input1_data), input2_shape,
-          const_cast<uint8_t *>(input2_data), output_shape, output_data);
+      optimized::BroadcastMulDispatch(params, input1_shape, input1_data, input2_shape, input2_data,
+                                      output_shape, output_data);
       break;
     case nnfw::cker::BinaryArithmeticOpType::DIV:
     case nnfw::cker::BinaryArithmeticOpType::POW:
@@ -312,11 +310,17 @@ inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam &params, const S
                                       output_shape, output_data);
       break;
     case nnfw::cker::BinaryArithmeticOpType::SUB:
+      optimized::BroadcastSubDispatch(params, input1_shape, input1_data, input2_shape, input2_data,
+                                      output_shape, output_data);
+      break;
     case nnfw::cker::BinaryArithmeticOpType::DIV:
+      optimized::BroadcastDivDispatch(params, input1_shape, input1_data, input2_shape, input2_data,
+                                      output_shape, output_data);
+      break;
     case nnfw::cker::BinaryArithmeticOpType::POW:
       reference::BroadcastBinaryArithmeticOpSlow<float>(
-          params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
-          GetBinaryArtithmeticFn<op_type, float>());
+        params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+        GetBinaryArtithmeticFn<op_type, float>());
       break;
     default:
       assert(false);
diff --git a/compute/cker/include/cker/operation/BroadcastTo.h b/compute/cker/include/cker/operation/BroadcastTo.h
index 5068eca96..145deda29 100644
--- a/compute/cker/include/cker/operation/BroadcastTo.h
+++ b/compute/cker/include/cker/operation/BroadcastTo.h
@@ -126,7 +126,7 @@ template <typename Device, typename T> struct BroadcastTo
     }
   }
 };
-} // functor
+} // namespace functor
 
 template <typename T>
 inline void BroadcastTo(const Shape &input_shape, T *input_data, const Shape &output_shape,
diff --git a/compute/cker/include/cker/operation/Common.h b/compute/cker/include/cker/operation/Common.h
index d69b38aca..24d4cc4c7 100644
--- a/compute/cker/include/cker/operation/Common.h
+++ b/compute/cker/include/cker/operation/Common.h
@@ -82,7 +82,7 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, const
     for (; i < bias_size; i++)
     {
       array_ptr[i] =
-          ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max);
+        ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max);
     }
   }
 #else // not NEON
@@ -91,7 +91,7 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, const
     for (int i = 0; i < bias_size; i++)
     {
       array_data[array_offset + i] = ActivationFunctionWithMinMax(
-          array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
+        array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
     }
   }
 #endif
diff --git a/compute/cker/include/cker/operation/Comparison.h b/compute/cker/include/cker/operation/Comparison.h
index 47eb6034c..ac6af8487 100644
--- a/compute/cker/include/cker/operation/Comparison.h
+++ b/compute/cker/include/cker/operation/Comparison.h
@@ -42,7 +42,7 @@ inline void ComparisonImpl(const Shape &input1_shape, const T *input1_data,
                            const Shape &output_shape, bool *output_data)
 {
   const int64_t flatsize = // number of data....
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
+    MatchingFlatSize(input1_shape, input2_shape, output_shape);
   for (int64_t i = 0; i < flatsize; ++i)
   {
     output_data[i] = F(input1_data[i], input2_data[i]);
@@ -79,9 +79,9 @@ inline void ComparisonWithScaling(ComparisonParams &params, const Shape &input1_
     const int32_t shifted_input1_val = input1_val * (1 << left_shift);
     const int32_t shifted_input2_val = input2_val * (1 << left_shift);
     const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-        shifted_input1_val, input1_multiplier, input1_shift);
+      shifted_input1_val, input1_multiplier, input1_shift);
     const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-        shifted_input2_val, input2_multiplier, input2_shift);
+      shifted_input2_val, input2_multiplier, input2_shift);
     output_data[i] = F(scaled_input1_val, scaled_input2_val);
   }
 }
@@ -111,8 +111,8 @@ BroadcastComparison4DSlowImpl(const Shape &unextended_input1_shape, const T *inp
         for (int c = 0; c < output_shape.Dims(3); ++c)
         {
           output_data[Offset(output_shape, b, y, x, c)] =
-              F(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
-                input2_data[SubscriptToIndex(desc2, b, y, x, c)]);
+            F(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+              input2_data[SubscriptToIndex(desc2, b, y, x, c)]);
         }
       }
     }
@@ -159,15 +159,15 @@ inline void BroadcastComparison4DSlowWithScaling(ComparisonParams &params,
         for (int c = 0; c < output_shape.Dims(3); ++c)
         {
           const int32_t input1_val =
-              input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)];
+            input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)];
           const int32_t input2_val =
-              input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)];
+            input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)];
           const int32_t shifted_input1_val = input1_val * (1 << left_shift);
           const int32_t shifted_input2_val = input2_val * (1 << left_shift);
           const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-              shifted_input1_val, input1_multiplier, input1_shift);
+            shifted_input1_val, input1_multiplier, input1_shift);
           const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-              shifted_input2_val, input2_multiplier, input2_shift);
+            shifted_input2_val, input2_multiplier, input2_shift);
           output_data[Offset(output_shape, b, y, x, c)] = F(scaled_input1_val, scaled_input2_val);
         }
       }
@@ -175,55 +175,53 @@ inline void BroadcastComparison4DSlowWithScaling(ComparisonParams &params,
   }
 }
 
-#define TFLITE_COMPARISON_OP(name)                                                                \
-  template <typename T>                                                                           \
-  inline void name(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,    \
-                   const T *input2_data, const Shape &output_shape, bool *output_data)            \
-  {                                                                                               \
-    Comparison<name##Fn>(input1_shape, input1_data, input2_shape, input2_data, output_shape,      \
-                         output_data);                                                            \
-  }                                                                                               \
-  template <typename T>                                                                           \
-  inline void name##NoScaling(const Shape &input1_shape, const T *input1_data,                    \
-                              const Shape &input2_shape, const T *input2_data,                    \
-                              const Shape &output_shape, bool *output_data)                       \
-  {                                                                                               \
-    ComparisonImpl<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data,             \
-                                output_shape, output_data);                                       \
-  }                                                                                               \
-  template <typename T>                                                                           \
-  inline void name##WithScaling(ComparisonParams &params, const Shape &input1_shape,              \
-                                const T *input1_data, const Shape &input2_shape,                  \
-                                const T *input2_data, const Shape &output_shape,                  \
-                                bool *output_data)                                                \
-  {                                                                                               \
-    ComparisonWithScaling<T, name##Fn>(params, input1_shape, input1_data, input2_shape,           \
-                                       input2_data, output_shape, output_data);                   \
-  }                                                                                               \
-  template <typename T>                                                                           \
-  inline void Broadcast4DSlow##name##NoScaling(const Shape &input1_shape, const T *input1_data,   \
-                                               const Shape &input2_shape, const T *input2_data,   \
-                                               const Shape &output_shape, bool *output_data)      \
-  {                                                                                               \
-    BroadcastComparison4DSlowImpl<T, name##Fn>(input1_shape, input1_data, input2_shape,           \
-                                               input2_data, output_shape, output_data);           \
-  }                                                                                               \
-  template <typename T>                                                                           \
-  inline void Broadcast4DSlow##name(const Shape &input1_shape, const T *input1_data,              \
-                                    const Shape &input2_shape, const T *input2_data,              \
-                                    const Shape &output_shape, bool *output_data)                 \
-  {                                                                                               \
-    BroadcastComparison4DSlow<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data,  \
-                                           output_shape, output_data);                            \
-  }                                                                                               \
-  template <typename T>                                                                           \
-  inline void Broadcast4DSlow##name##WithScaling(ComparisonParams &params,                        \
-                                                 const Shape &input1_shape, const T *input1_data, \
-                                                 const Shape &input2_shape, const T *input2_data, \
-                                                 const Shape &output_shape, bool *output_data)    \
-  {                                                                                               \
-    BroadcastComparison4DSlowWithScaling<T, name##Fn>(                                            \
-        params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); \
+#define TFLITE_COMPARISON_OP(name)                                                                 \
+  template <typename T>                                                                            \
+  inline void name(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,     \
+                   const T *input2_data, const Shape &output_shape, bool *output_data)             \
+  {                                                                                                \
+    Comparison<name##Fn>(input1_shape, input1_data, input2_shape, input2_data, output_shape,       \
+                         output_data);                                                             \
+  }                                                                                                \
+  template <typename T>                                                                            \
+  inline void name##NoScaling(const Shape &input1_shape, const T *input1_data,                     \
+                              const Shape &input2_shape, const T *input2_data,                     \
+                              const Shape &output_shape, bool *output_data)                        \
+  {                                                                                                \
+    ComparisonImpl<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data,              \
+                                output_shape, output_data);                                        \
+  }                                                                                                \
+  template <typename T>                                                                            \
+  inline void name##WithScaling(                                                                   \
+    ComparisonParams &params, const Shape &input1_shape, const T *input1_data,                     \
+    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data) \
+  {                                                                                                \
+    ComparisonWithScaling<T, name##Fn>(params, input1_shape, input1_data, input2_shape,            \
+                                       input2_data, output_shape, output_data);                    \
+  }                                                                                                \
+  template <typename T>                                                                            \
+  inline void Broadcast4DSlow##name##NoScaling(const Shape &input1_shape, const T *input1_data,    \
+                                               const Shape &input2_shape, const T *input2_data,    \
+                                               const Shape &output_shape, bool *output_data)       \
+  {                                                                                                \
+    BroadcastComparison4DSlowImpl<T, name##Fn>(input1_shape, input1_data, input2_shape,            \
+                                               input2_data, output_shape, output_data);            \
+  }                                                                                                \
+  template <typename T>                                                                            \
+  inline void Broadcast4DSlow##name(const Shape &input1_shape, const T *input1_data,               \
+                                    const Shape &input2_shape, const T *input2_data,               \
+                                    const Shape &output_shape, bool *output_data)                  \
+  {                                                                                                \
+    BroadcastComparison4DSlow<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data,   \
+                                           output_shape, output_data);                             \
+  }                                                                                                \
+  template <typename T>                                                                            \
+  inline void Broadcast4DSlow##name##WithScaling(                                                  \
+    ComparisonParams &params, const Shape &input1_shape, const T *input1_data,                     \
+    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data) \
+  {                                                                                                \
+    BroadcastComparison4DSlowWithScaling<T, name##Fn>(                                             \
+      params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data);    \
   }
 
 TFLITE_COMPARISON_OP(Equal);
diff --git a/compute/cker/include/cker/operation/Concatenation.h b/compute/cker/include/cker/operation/Concatenation.h
index 394123e30..9aaca00b7 100644
--- a/compute/cker/include/cker/operation/Concatenation.h
+++ b/compute/cker/include/cker/operation/Concatenation.h
@@ -142,7 +142,7 @@ inline void ConcatenationWithScaling(const ConcatenationParams &params,
         for (int j = 0; j < copy_size; ++j)
         {
           const int32_t value =
-              static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
           output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
         }
       }
diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h
index 214f2e612..2572b51ee 100644
--- a/compute/cker/include/cker/operation/Conv.h
+++ b/compute/cker/include/cker/operation/Conv.h
@@ -57,9 +57,9 @@ class Conv
 public:
   Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {}
 
-  void prepare(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
-               bool &is_replaced_weights, uint32_t dilationWidthFactor,
-               uint32_t dilationHeightFactor)
+  void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
+                  bool &is_replaced_weights, uint32_t dilationWidthFactor,
+                  uint32_t dilationHeightFactor)
   {
     if (!_prepared)
     {
@@ -71,12 +71,14 @@ public:
     }
   }
 
-  void prepareQuant(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape,
-                    uint32_t stride_width, uint32_t stride_height)
+  void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape,
+                           const Shape &output_shape, uint32_t stride_width, uint32_t stride_height,
+                           uint32_t dilation_width_factor, uint32_t dilation_height_factor)
   {
     if (!_prepared)
     {
-      IsRequiredIm2col(input_shape, kernel_shape, output_shape, stride_width, stride_height);
+      IsRequiredIm2col(input_shape, kernel_shape, output_shape, stride_width, stride_height,
+                       dilation_width_factor, dilation_height_factor);
       _prepared = true;
     }
   }
@@ -115,7 +117,8 @@ public:
     {
       // This means that input or output are dynamic or filter is not constant
       IsRequiredIm2col(input_shape, filter_shape, output_shape, params.stride_width,
-                       params.stride_height);
+                       params.stride_height, params.dilation_width_factor,
+                       params.dilation_height_factor);
     }
 
     int im2col_size = _need_im2col ? _im2col_shape.FlatSize() : 1;
@@ -135,6 +138,29 @@ public:
     }
   }
 
+  void operator()(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
+                  const Shape &filter_shape, const uint8_t *filter_data,
+                  const int32_t *filter_zero_point, const Shape &bias_shape,
+                  const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+  {
+    reference::Conv<uint8_t, true>(params, _per_channel_output_multiplier.data(),
+                                   _per_channel_output_shift.data(), input_shape, input_data,
+                                   filter_shape, filter_data, filter_zero_point, bias_shape,
+                                   bias_data, output_shape, output_data);
+  }
+
+  void operator()(const ConvParams &params, const Shape &input_shape, const int8_t *input_data,
+                  const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
+                  const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
+  {
+    reference::Conv<int8_t, false>(params, _per_channel_output_multiplier.data(),
+                                   _per_channel_output_shift.data(), input_shape, input_data,
+                                   filter_shape, filter_data, nullptr /* filter_zero_point */,
+                                   bias_shape, bias_data, output_shape, output_data);
+  }
+  std::vector<int32_t> &per_channel_output_multiplier() { return _per_channel_output_multiplier; }
+  std::vector<int> &per_channel_output_shift() { return _per_channel_output_shift; }
+
 private:
   bool usableMultiThreaded(PaddingType padding_type, uint32_t dilation_width_factor,
                            int32_t dilation_height_factor)
@@ -154,10 +180,15 @@ private:
   }
 
   void IsRequiredIm2col(const Shape &input_shape, const Shape &kernel_shape,
-                        const Shape &output_shape, uint32_t stride_width, uint32_t stride_height)
+                        const Shape &output_shape, uint32_t stride_width, uint32_t stride_height,
+                        uint32_t dilation_width_factor, uint32_t dilation_height_factor)
   {
-    _need_im2col = stride_width != 1 || stride_height != 1 || kernel_shape.Dims(1) != 1 ||
-                   kernel_shape.Dims(2) != 1;
+    const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
+    const bool need_non_dilated_im2col = stride_width != 1 || stride_height != 1 ||
+                                         kernel_shape.Dims(1) != 1 || kernel_shape.Dims(2) != 1;
+
+    _need_im2col = need_dilated_im2col || need_non_dilated_im2col;
+
     if (_need_im2col)
     {
       _im2col_shape.SetDim(0, output_shape.Dims(0));
@@ -172,7 +203,25 @@ private:
   Shape _im2col_shape;
   bool _need_im2col;
   bool _prepared;
+  // Per channel output multiplier and shift.
+  std::vector<int32_t> _per_channel_output_multiplier;
+  std::vector<int> _per_channel_output_shift;
+};
+
+struct ConvHybridTempArena
+{
+  ConvHybridTempArena(int batch_size, int input_size)
+  {
+    input_quantized.resize(input_size);
+    // TODO: Optimize the case of batch_size = 1
+    input_scaling_factors.resize(batch_size);
+    input_offsets.resize(batch_size);
+  }
+  std::vector<int8_t> input_quantized;
+  std::vector<float> input_scaling_factors;
+  std::vector<int32_t> input_offsets;
 };
+
 } // namespace cker
 } // namespace nnfw
 
diff --git a/compute/cker/include/cker/operation/DepthToSpace.h b/compute/cker/include/cker/operation/DepthToSpace.h
new file mode 100644
index 000000000..e57fef01d
--- /dev/null
+++ b/compute/cker/include/cker/operation/DepthToSpace.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_DEPTH_TO_SPACE_H__
+#define __NNFW_CKER_DEPTH_TO_SPACE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+inline void DepthToSpace(const Shape &unextended_input_shape, const T *input_data,
+                         const Shape &unextended_output_shape, T *output_data, int32_t block_size)
+{
+  assert(unextended_input_shape.DimensionsCount() <= 4);
+  assert(unextended_output_shape.DimensionsCount() <= 4);
+  const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+  const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+  const int input_depth = input_shape.Dims(3);
+  const int input_width = input_shape.Dims(2);
+  const int input_height = input_shape.Dims(1);
+
+  const int output_depth = output_shape.Dims(3);
+  const int batch_size = output_shape.Dims(0);
+
+  // Number of continuous values that we can copy in one interation.
+  const int stride = block_size * output_depth;
+
+  for (int batch = 0; batch < batch_size; ++batch)
+  {
+    for (int in_h = 0; in_h < input_height; ++in_h)
+    {
+      const T *input_ptr = input_data + Offset(input_shape, batch, in_h, 0, 0);
+      for (int offset_h = 0; offset_h < block_size; ++offset_h)
+      {
+        const T *src = input_ptr;
+        for (int in_w = 0; in_w < input_width; ++in_w)
+        {
+          memcpy(output_data, src, stride * sizeof(T));
+          output_data += stride;
+          src += input_depth;
+        }
+        input_ptr += stride;
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SPACE_TO_DEPTH_H__
diff --git a/compute/cker/include/cker/operation/DepthwiseConv.h b/compute/cker/include/cker/operation/DepthwiseConv.h
index 814a9e019..c926ec4f1 100644
--- a/compute/cker/include/cker/operation/DepthwiseConv.h
+++ b/compute/cker/include/cker/operation/DepthwiseConv.h
@@ -22,143 +22,162 @@
 #include "cker/Types.h"
 #include "cker/Utils.h"
 #include "cker/neon/neon_check.h"
+#include "cker/operation/optimized/DepthwiseConvFloat.h"
 #include "cker/operation/optimized/DepthwiseConvUint8.h"
+#include "cker/operation/optimized/integer_ops/DepthwiseConvInt8.h"
+#include "cker/operation/reference/integer_ops/DepthwiseConvUInt8.h"
+#include "cker/operation/reference/integer_ops/DepthwiseConvHybrid.h"
+#include "cker/CpuBackendThreadpool.h"
 
 namespace nnfw
 {
 namespace cker
 {
 
-inline void DepthwiseConv(const DepthwiseConvParams &params, const Shape &input_shape,
-                          const uint8_t *input_data, const Shape &filter_shape,
-                          const uint8_t *filter_data, const Shape &bias_shape,
-                          const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+// TODO(luwa): add multithread to per-channel depthwise_conv
+// DepthwiseConv can run with multi threads on the dim specified by thread_dim.
+// Each thread processes output elements on dim, thread_dim, in the range of
+// [thread_start, thread_end).
+// For example, assume thread_start = 2, thread_end = 6, and thread_dim = 1, it
+// means that it will calculate DepthwiseConv for output_data[:, 2:5, :, :].
+template <typename T, typename TS> struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task
 {
-  const int depth_multiplier = params.depth_multiplier;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  assert(dilation_width_factor >= 1);
-  assert(dilation_height_factor >= 1);
-  UNUSED_RELEASE(dilation_width_factor);
-  UNUSED_RELEASE(dilation_height_factor);
-  assert(input_shape.DimensionsCount() == 4);
-  assert(filter_shape.DimensionsCount() == 4);
-  assert(output_shape.DimensionsCount() == 4);
-  assert(output_activation_min <= output_activation_max);
-  UNUSED_RELEASE(output_activation_min);
-  UNUSED_RELEASE(output_activation_max);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_depth = input_shape.Dims(3);
-  assert(output_depth == input_depth * depth_multiplier);
-  assert(bias_shape.FlatSize() == output_depth);
-  UNUSED_RELEASE(input_depth);
-  UNUSED_RELEASE(output_depth);
-  UNUSED_RELEASE(depth_multiplier);
-
-// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
-// Jetson TX-2. This compiler does not support the offsetof() macro.
-#if defined(__aarch64__)
-//  TODO Use below codes
-
-//  const int stride_width = params.stride_width;
-//  const int stride_height = params.stride_height;
-//  const int pad_width = params.padding_values.width;
-//  const int pad_height = params.padding_values.height;
-//  const int output_shift = params.output_shift;
-//
-//  // Call kernel optimized for depthwise convolutions using 3x3 filters if
-//  // parameters are supported.
-//  if (Fast3x3FilterKernelSupported(
-//          input_shape, filter_shape, stride_width, stride_height,
-//          dilation_width_factor, dilation_height_factor, pad_width, pad_height,
-//          depth_multiplier, output_shape, output_shift)) {
-//    DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape,
-//                           filter_data, bias_shape, bias_data, output_shape,
-//                           output_data);
-//    return;
-//  }
-#endif
-
-  optimized::DepthwiseConvGeneral(params, input_shape, input_data, filter_shape, filter_data,
-                                  bias_shape, bias_data, output_shape, output_data);
+  DepthwiseConvWorkerTask(const DepthwiseConvParams &params, const Shape &input_shape,
+                          const T *input_data, const Shape &filter_shape, const T *filter_data,
+                          const Shape &bias_shape, const TS *bias_data, const Shape &output_shape,
+                          T *output_data, int thread_start, int thread_end, int thread_dim)
+    : params_(params), input_shape_(input_shape), input_data_(input_data),
+      filter_shape_(filter_shape), filter_data_(filter_data), bias_shape_(bias_shape),
+      bias_data_(bias_data), output_shape_(output_shape), output_data_(output_data),
+      thread_start_(thread_start), thread_end_(thread_end), thread_dim_(thread_dim)
+  {
+  }
+
+  void Run() override
+  {
+    optimized::DepthwiseConvImpl(params_, input_shape_, input_data_, filter_shape_, filter_data_,
+                                 bias_shape_, bias_data_, output_shape_, output_data_,
+                                 thread_start_, thread_end_, thread_dim_);
+  }
+
+private:
+  const DepthwiseConvParams &params_;
+  const Shape &input_shape_;
+  const T *input_data_;
+  const Shape &filter_shape_;
+  const T *filter_data_;
+  const Shape &bias_shape_;
+  const TS *bias_data_;
+  const Shape &output_shape_;
+  T *output_data_;
+  // const CpuFlags& cpu_flags_;
+  int thread_start_;
+  int thread_end_;
+  int thread_dim_;
+};
+
+inline int HowManyConvThreads(const Shape &output_shape, const Shape &filter_shape)
+{
+  // How many scalar multiplications are needed to make it worth using one
+  // more thread
+  static constexpr int kMinMulPerThread = 1 << 13; // 8k
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int num_muls = output_shape.FlatSize() * filter_height * filter_width;
+  // Try to avoid real runtime divisions if possible by dividing by a
+  // compile-time constant.
+  int thread_count = std::max(1, num_muls / kMinMulPerThread);
+  return thread_count;
+}
+
+inline bool MultithreadAlongBatches(int thread_count, int batches)
+{
+  assert(thread_count >= 2);
+  // If there are fewer batch entries than the number of threads we want to use,
+  // then better do intra-batch-entry multithreading.
+  if (batches < thread_count)
+  {
+    return false;
+  }
+  // If there are at least 2 batch entries to be handed to each thread, then
+  // it's safe to proceed with batch-wise multithreading: each thread will have
+  // approximately equal number of batch entries to handle, so the load
+  // balancing will be reasonable, and the amount to which the load is not
+  // perfectly balanced will be offset by the inherent advantages of
+  // batch-wise multithreading (each thread is more efficient thanks to working
+  // on larger buffers with less boundary-handling overhead).
+  if (batches >= 2 * thread_count)
+  {
+    return true;
+  }
+  // In the limit case were there are at least 1 but not much more than 1
+  // batch entries per thread, it may be a good idea to do per-batch
+  // multithreading if the number of batch entries is a multiple of the number
+  // of threads, so that each thread will have the same number of batch entries
+  // to process.
+  return ((batches % thread_count) == 0);
 }
 
+template <typename T, typename TS>
 inline void DepthwiseConv(const DepthwiseConvParams &params, const Shape &input_shape,
-                          const float *input_data, const Shape &filter_shape,
-                          const float *filter_data, const Shape &bias_shape, const float *bias_data,
-                          const Shape &output_shape, float *output_data)
+                          const T *input_data, const Shape &filter_shape, const T *filter_data,
+                          const Shape &bias_shape, const TS *bias_data, const Shape &output_shape,
+                          T *output_data, ruy::Context *ruy_context)
 {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
   assert(input_shape.DimensionsCount() == 4);
   assert(filter_shape.DimensionsCount() == 4);
   assert(output_shape.DimensionsCount() == 4);
 
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
+  int thread_count = HowManyConvThreads(output_shape, filter_shape);
+
+  // NOTE Borrow RuyContext to get max_num_threads setting
+  // TODO Define and use max_num_threads for CPU backend
+  const auto max_threads = (ruy_context == nullptr) ? 1 : ruy_context->max_num_threads();
+
+  thread_count = std::max(1, std::min(thread_count, max_threads));
+  // Cap the number of threads to 2 for float path to avoid regression in
+  // performance (b/132294857).
+  if (std::is_floating_point<T>::value)
+  {
+    thread_count = std::min(thread_count, 2);
+  }
+
+  const int output_batches = output_shape.Dims(0);
   const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  assert(output_depth == input_depth * depth_multiplier);
-  assert(bias_shape.FlatSize() == output_depth);
-  UNUSED_RELEASE(output_depth);
-  UNUSED_RELEASE(bias_shape);
 
-  for (int b = 0; b < batches; ++b)
+  if (thread_count == 1)
+  {
+    optimized::DepthwiseConvImpl(params, input_shape, input_data, filter_shape, filter_data,
+                                 bias_shape, bias_data, output_shape, output_data, 0, output_height,
+                                 1);
+    return;
+  }
+
+  int thread_dim, thread_dim_size;
+  if (MultithreadAlongBatches(thread_count, output_batches))
+  {
+    thread_dim = 0;
+    thread_dim_size = output_batches;
+  }
+  else
+  {
+    thread_dim = 1;
+    thread_dim_size = output_height;
+  }
+
+  std::vector<DepthwiseConvWorkerTask<T, TS>> tasks;
+  // TODO(b/131746020) don't create new heap allocations every time.
+  // At least we make it a single heap allocation by using reserve().
+  tasks.reserve(thread_count);
+  int thread_start = 0;
+  for (int i = 0; i < thread_count; ++i)
   {
-    for (int out_y = 0; out_y < output_height; ++out_y)
-    {
-      for (int out_x = 0; out_x < output_width; ++out_x)
-      {
-        for (int ic = 0; ic < input_depth; ++ic)
-        {
-          for (int m = 0; m < depth_multiplier; m++)
-          {
-            const int oc = m + ic * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            float total = 0.f;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
-            {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
-              {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y = in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
-                {
-                  float input_value = input_data[Offset(input_shape, b, in_y, in_x, ic)];
-                  float filter_value = filter_data[Offset(filter_shape, 0, filter_y, filter_x, oc)];
-                  total += (input_value * filter_value);
-                }
-              }
-            }
-            float bias_value = 0.0f;
-            if (bias_data)
-            {
-              bias_value = bias_data[oc];
-            }
-            output_data[Offset(output_shape, b, out_y, out_x, oc)] = ActivationFunctionWithMinMax(
-                total + bias_value, output_activation_min, output_activation_max);
-          }
-        }
-      }
-    }
+    int thread_end = thread_start + (thread_dim_size - thread_start) / (thread_count - i);
+    tasks.emplace_back(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+                       bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+    thread_start = thread_end;
   }
+  cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), ruy_context);
 }
 
 } // namespace cker
diff --git a/compute/cker/include/cker/operation/Dequantize.h b/compute/cker/include/cker/operation/Dequantize.h
new file mode 100644
index 000000000..c8c2fd9d4
--- /dev/null
+++ b/compute/cker/include/cker/operation/Dequantize.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_DEQUANTIZE_H__
+#define __NNFW_CKER_DEQUANTIZE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/neon/neon_check.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+#ifdef USE_NEON
+namespace
+{
+inline void ScaleWithNewZeroPoint(const int32x4_t input, const float32x4_t scale_dup,
+                                  const float32x4_t zero_times_scale_dup, float32x4_t *output)
+{
+#ifdef __ARM_FEATURE_FMA
+  *output = vfmaq_f32(zero_times_scale_dup, vcvtq_f32_s32(input), scale_dup);
+#else
+  *output = vaddq_f32(vmulq_f32(vcvtq_f32_s32(input), scale_dup), zero_times_scale_dup);
+#endif
+}
+} // namespace
+#endif // USE_NEON
+
+inline void Dequantize(const Shape &input_shape, const uint8_t *input_data,
+                       const Shape &output_shape, float *output_data, const float scale,
+                       const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t scale_dup = vdupq_n_f32(static_cast<float>(scale));
+  const float32x4_t zero_times_scale_dup = vdupq_n_f32(static_cast<float>(-zero_point * scale));
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const uint8x8_t input_u8 = vld1_u8(input_data + i);
+    const uint16x8_t input_u16 = vmovl_u8(input_u8);
+    const int16x8_t input_s16 = vreinterpretq_s16_u16(input_u16);
+    const int16x4_t input_s16_low = vget_low_s16(input_s16);
+    const int16x4_t input_s16_high = vget_high_s16(input_s16);
+    const int32x4_t val_low = vmovl_s16(input_s16_low);
+    const int32x4_t val_high = vmovl_s16(input_s16_high);
+
+    float32x4_t result_low, result_high;
+    ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
+    ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
+
+    vst1q_f32(output_data + i, result_low);
+    vst1q_f32(output_data + i + 4, result_high);
+  }
+#endif // NEON
+  for (; i < flat_size; ++i)
+  {
+    const int32_t val = input_data[i];
+    const float result = static_cast<float>(scale * (val - zero_point));
+    output_data[i] = result;
+  }
+}
+
+inline void Dequantize(const Shape &input_shape, const int8_t *input_data,
+                       const Shape &output_shape, float *output_data, const float scale,
+                       const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t scale_dup = vdupq_n_f32(static_cast<float>(scale));
+  const float32x4_t zero_times_scale_dup = vdupq_n_f32(static_cast<float>(-zero_point * scale));
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const int8x8_t input_s8 = vld1_s8(input_data + i);
+    const int16x8_t input_s16 = vmovl_s8(input_s8);
+    const int16x4_t input_s16_low = vget_low_s16(input_s16);
+    const int16x4_t input_s16_high = vget_high_s16(input_s16);
+    const int32x4_t val_low = vmovl_s16(input_s16_low);
+    const int32x4_t val_high = vmovl_s16(input_s16_high);
+
+    float32x4_t result_low, result_high;
+    ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
+    ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
+
+    vst1q_f32(output_data + i, result_low);
+    vst1q_f32(output_data + i + 4, result_high);
+  }
+#endif // NEON
+  for (; i < flat_size; ++i)
+  {
+    const int32_t val = input_data[i];
+    const float result = static_cast<float>(scale * (val - zero_point));
+    output_data[i] = result;
+  }
+}
+
+inline void Dequantize(const Shape &input_shape, const int16_t *input_data,
+                       const Shape &output_shape, float *output_data, const float scale,
+                       const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t scale_dup = vdupq_n_f32(static_cast<float>(scale));
+  const float32x4_t zero_times_scale_dup = vdupq_n_f32(static_cast<float>(-zero_point * scale));
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const int16x4_t input_s16_low = vld1_s16(input_data + i);
+    const int16x4_t input_s16_high = vld1_s16(input_data + i + 4);
+    const int32x4_t val_low = vmovl_s16(input_s16_low);
+    const int32x4_t val_high = vmovl_s16(input_s16_high);
+
+    float32x4_t result_low, result_high;
+    ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
+    ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
+
+    vst1q_f32(output_data + i, result_low);
+    vst1q_f32(output_data + i + 4, result_high);
+  }
+#endif // NEON
+  for (; i < flat_size; ++i)
+  {
+    const int32_t val = input_data[i];
+    const float result = static_cast<float>(scale * (val - zero_point));
+    output_data[i] = result;
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_DEQUANTIZE_H__
diff --git a/compute/cker/include/cker/operation/ELU.h b/compute/cker/include/cker/operation/ELU.h
new file mode 100644
index 000000000..6bdd7c62e
--- /dev/null
+++ b/compute/cker/include/cker/operation/ELU.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_ELU_H__
+#define __NNFW_CKER_ELU_H__
+
+#include "cker/Shape.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void ELU(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    output_data[i] = val < 0.0 ? std::exp(val) - 1 : val;
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ELU_H__
diff --git a/compute/cker/include/cker/operation/Einsum.h b/compute/cker/include/cker/operation/Einsum.h
index 3d1837f47..bb9f88f8d 100644
--- a/compute/cker/include/cker/operation/Einsum.h
+++ b/compute/cker/include/cker/operation/Einsum.h
@@ -177,7 +177,7 @@ inline Shape copyShape(const Shape &shape)
 {
   return Shape::ExtendedShape(shape.DimensionsCount(), shape);
 }
-}
+} // namespace
 
 class Einsum
 {
@@ -274,7 +274,7 @@ public:
     }
     for (int i = 0; i < num_inputs; ++i)
     {
-      for (int label : free_labels[i])
+      for (auto &&label : free_labels[i])
       {
         result_labels.push_back(label);
         result_shape_dims.push_back(label_to_dim_sizes[label]);
@@ -300,7 +300,7 @@ public:
     {
       // We inflated the output. Modify result labels accordingly.
       Labels inflated_labels;
-      for (int label : result_labels)
+      for (auto &&label : result_labels)
       {
         inflated_labels.insert(inflated_labels.end(), output_label_counts[label], label);
       }
@@ -394,8 +394,8 @@ private:
     for (int label = 0; label < num_labels; ++label)
     {
       bool removed = (_output_label_counts[label] == 0);
-      bool unique = num_inputs == 1 || _input_label_counts[0][label] == 0 ||
-                    _input_label_counts[1][label] == 0;
+      bool unique =
+        num_inputs == 1 || _input_label_counts[0][label] == 0 || _input_label_counts[1][label] == 0;
       _label_types[label] = getDimensionType(removed, unique);
     }
   }
@@ -483,8 +483,8 @@ private:
       if (inputs[i].shape.DimensionsCount() + 1 < (int32_t)labels->size())
       {
         throw std::runtime_error{"Expected input " + std::to_string(i) + " to have rank at least " +
-                                 std::to_string(labels->size() - 1) + " but got: " +
-                                 std::to_string(inputs[i].shape.DimensionsCount())};
+                                 std::to_string(labels->size() - 1) +
+                                 " but got: " + std::to_string(inputs[i].shape.DimensionsCount())};
       }
       int ellipsis_axis = -1;
       const int num_bcast_dims = inputs[i].shape.DimensionsCount() - labels->size() + 1;
@@ -511,7 +511,7 @@ private:
     }
 
     std::vector<bool>::iterator it_input =
-        std::find(_input_has_ellipsis.begin(), _input_has_ellipsis.end(), true);
+      std::find(_input_has_ellipsis.begin(), _input_has_ellipsis.end(), true);
     if (it_input == _input_has_ellipsis.end() && !_output_has_ellipsis)
     {
       return;
@@ -645,11 +645,11 @@ private:
 
     // Reduce along the last axis (i.e axis 1) of the rank-2 Tensor.
     const int32_t output_size =
-        reshape[kBroadcasting] * reshape[kBatch] * reshape[kFree] * reshape[kContract];
+      reshape[kBroadcasting] * reshape[kBatch] * reshape[kFree] * reshape[kContract];
     functor::ReduceFunctor<Eigen::ThreadPoolDevice, Reducer>::Reduce(
-        device, output->shaped<T, 1>({output_size}),
-        input_deduped.shaped<T, 2>({output_size, reshape[kReduce]}), Eigen::array<Index, 1>({1}),
-        Reducer());
+      device, output->shaped<T, 1>({output_size}),
+      input_deduped.shaped<T, 2>({output_size, reshape[kReduce]}), Eigen::array<Index, 1>({1}),
+      Reducer());
   }
 
   bool shouldSwapFreeAndContract(const Labels &labels,
@@ -775,11 +775,11 @@ private:
     Shape inflated_shape;
     std::vector<int32_t> strided_shape_dims;
     std::vector<int32_t> inflated_shape_dims;
-    for (int label : labels)
+    for (auto &&label : labels)
     {
       const int32_t count = label_counts[label];
       const int current_axis =
-          should_inflate ? strided_shape_dims.size() : inflated_shape_dims.size();
+        should_inflate ? strided_shape_dims.size() : inflated_shape_dims.size();
       const int32_t dim = input.shape.Dims(current_axis);
       strided_shape_dims.push_back(dim);
       inflated_shape_dims.insert(inflated_shape_dims.end(), count, dim);
@@ -879,7 +879,7 @@ private:
     for (size_t i = 0; i < inputs.size(); ++i)
     {
       const int32_t free_axis =
-          inputs[i].shape.DimensionsCount() - (swap_free_and_contract[i] ? 1 : 2);
+        inputs[i].shape.DimensionsCount() - (swap_free_and_contract[i] ? 1 : 2);
       output_shape.SetDim(i + old_output_shape.DimensionsCount(), inputs[i].shape.Dims(free_axis));
     }
     bool adj_x = swap_free_and_contract[0];
diff --git a/compute/cker/include/cker/operation/Elementwise.h b/compute/cker/include/cker/operation/Elementwise.h
index 598a032bb..0e980f18e 100644
--- a/compute/cker/include/cker/operation/Elementwise.h
+++ b/compute/cker/include/cker/operation/Elementwise.h
@@ -66,8 +66,9 @@ inline void Rsqrt(const Shape &input_shape, const float *input_data, const Shape
   }
 }
 
-inline void Neg(const Shape &input_shape, const float *input_data, const Shape &output_shape,
-                float *output_data)
+template <typename T>
+inline void Neg(const Shape &input_shape, const T *input_data, const Shape &output_shape,
+                T *output_data)
 {
   const int size = MatchingFlatSize(input_shape, output_shape);
   for (int i = 0; i < size; i++)
@@ -86,6 +87,39 @@ inline void Log(const Shape &input_shape, const float *input_data, const Shape &
   }
 }
 
+inline void Floor(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                  float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = std::floor(input_data[i]);
+  }
+}
+
+inline void Sqrt(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                 float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = std::sqrt(input_data[i]);
+  }
+}
+
+inline void Square(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                   float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input_data[i] * input_data[i];
+  }
+}
+
 } // namespace cker
 } // namespace nnfw
 
diff --git a/compute/cker/include/cker/operation/Fill.h b/compute/cker/include/cker/operation/Fill.h
index 14daf9839..f88c3a5fb 100644
--- a/compute/cker/include/cker/operation/Fill.h
+++ b/compute/cker/include/cker/operation/Fill.h
@@ -25,26 +25,12 @@ namespace nnfw
 namespace cker
 {
 template <typename T>
-inline void Fill(const Shape &input_shape, int *input_data, const T value_data,
-                 const Shape &output_shape, T output_data)
+inline void Fill(const T *value_data, const Shape &output_shape, T *output_data)
 {
-  int input_size = input_shape.FlatSize();
-  int output_size = 1;
-  for (int i = 0; i < input_size; i++)
+  int output_size = output_shape.FlatSize();
+  for (int i = 0; i < output_size; i++)
   {
-    output_size *= input_data[i];
-  }
-
-  if (output_size == output_shape.FlatSize())
-  {
-    for (int i = 0; i < output_size; i++)
-    {
-      output_data[i] = *value_data;
-    }
-  }
-  else
-  {
-    throw std::runtime_error("Cker Fill.h: output's size is not matched inferred size of output");
+    output_data[i] = *value_data;
   }
 }
 
diff --git a/compute/cker/include/cker/operation/FloorDiv.h b/compute/cker/include/cker/operation/FloorDiv.h
new file mode 100644
index 000000000..cdb2c2a8b
--- /dev/null
+++ b/compute/cker/include/cker/operation/FloorDiv.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_FLOOR_DIV_H__
+#define __NNFW_CKER_FLOOR_DIV_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+inline void FloorDivBroadcast(const Shape &unextended_input1_shape, const T *input1_data,
+                              const Shape &unextended_input2_shape, const T *input2_data,
+                              const Shape &unextended_output_shape, T *output_data)
+{
+  assert(unextended_input1_shape.DimensionsCount() <= 4);
+  assert(unextended_input2_shape.DimensionsCount() <= 4);
+  assert(unextended_output_shape.DimensionsCount() <= 4);
+  const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < output_shape.Dims(3); ++c)
+        {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = std::floor(
+            std::divides<double>()(static_cast<double>(in1_val), static_cast<double>(in2_val)));
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void FloorDivElementwise(const Shape &shape, const T *input1_data, const T *input2_data,
+                                T *output_data)
+{
+
+  int num_elements = shape.FlatSize();
+
+  for (int t = 0; t < num_elements; t++)
+  {
+    output_data[t] = std::floor(std::divides<double>()(static_cast<double>(input1_data[t]),
+                                                       static_cast<double>(input2_data[t])));
+  }
+}
+
+} // namespace cker
+
+} // namespace nnfw
+#endif
diff --git a/compute/cker/include/cker/operation/FullyConnected.h b/compute/cker/include/cker/operation/FullyConnected.h
index 4280c9ae2..71a2f19ef 100644
--- a/compute/cker/include/cker/operation/FullyConnected.h
+++ b/compute/cker/include/cker/operation/FullyConnected.h
@@ -19,10 +19,14 @@
 #define __NNFW_CKER_FULLY_CONNECTED_H__
 
 #include <ruy/context.h>
+#include "cker/operation/FullyConnectedDense16x1.h"
+#include "cker/operation/FullyConnectedSparse16x1.h"
+#include "cker/operation/optimized/Gemm.h"
 #include "cker/Shape.h"
 #include "cker/Types.h"
 #include "cker/Utils.h"
 #include "cker/TensorUtils.h"
+#include "cker/neon/neon_check.h"
 
 namespace nnfw
 {
@@ -55,6 +59,42 @@ public:
   std::vector<int32_t> accum_scratch;
 };
 
+#if defined(CKER_X86_PLATFORM)
+
+// From tensorflow/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
+                           const float *input_data, const Shape &weights_shape,
+                           const float *weights_data, const Shape &,
+                           const float *optional_bias_data, const Shape &output_shape,
+                           float *output_data)
+{
+  const int dims_count = weights_shape.DimensionsCount();
+  const int input_rows = weights_shape.Dims(dims_count - 1);
+  MatrixParams<float> rhs_params;
+  rhs_params.order = Order::kColMajor;
+  rhs_params.rows = input_rows;
+  rhs_params.cols = input_shape.FlatSize() / input_rows;
+  rhs_params.cache_policy = optimized::DefaultCachePolicy(params.rhs_cacheable);
+
+  MatrixParams<float> lhs_params;
+  lhs_params.order = Order::kRowMajor;
+  lhs_params.cols = weights_shape.Dims(dims_count - 1);
+  lhs_params.rows = FlatSizeSkipDim(weights_shape, dims_count - 1);
+  lhs_params.cache_policy = optimized::DefaultCachePolicy(params.lhs_cacheable);
+  MatrixParams<float> dst_params;
+  dst_params.order = Order::kColMajor;
+  dst_params.rows = output_shape.Dims(output_shape.DimensionsCount() - 1);
+  dst_params.cols = FlatSizeSkipDim(output_shape, output_shape.DimensionsCount() - 1);
+  GemmParams<float, float> gemm_params;
+  gemm_params.bias = optional_bias_data;
+  gemm_params.clamp_min = params.float_activation_min;
+  gemm_params.clamp_max = params.float_activation_max;
+  optimized::Gemm(lhs_params, weights_data, rhs_params, input_data, dst_params, output_data,
+                  gemm_params);
+}
+
+#else // CKER_X86_PLATFORM
+
 inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
                            const float *input_data, const Shape &weights_shape,
                            const float *weights_data, const Shape &, const float *bias_data,
@@ -86,6 +126,8 @@ inline void FullyConnected(const FullyConnectedParams &params, const Shape &inpu
   }
 }
 
+#endif // CKER_X86_PLATFORM
+
 inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
                            const uint8_t *input_data, const Shape &filter_shape,
                            const uint8_t *filter_data, const Shape &bias_shape,
@@ -114,7 +156,7 @@ inline void FullyConnected(const FullyConnectedParams &params, const Shape &inpu
   const int filter_dim_count = filter_shape.DimensionsCount();
   const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
   const int output_depth =
-      MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1);
+    MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1);
   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b)
   {
@@ -208,12 +250,13 @@ inline void FullyConnectedHybrid(const FullyConnectedParams &params, const Shape
   return;
 }
 
-inline void FullyConnectedSparseWeight(const FullyConnectedParams &params, const Shape &input_shape,
-                                       const float *input_data, const Shape &weights_shape,
-                                       const float *weights_data, const Shape &bias_shape,
-                                       const float *bias_data, const Shape &output_shape,
-                                       float *output_data, int w0_size, const uint16_t *w1_segments,
-                                       const uint16_t *w1_indices)
+inline void FullyConnectedSparseWeightRandom(const FullyConnectedParams &params,
+                                             const Shape &input_shape, const float *input_data,
+                                             const Shape &weights_shape, const float *weights_data,
+                                             const Shape &bias_shape, const float *bias_data,
+                                             const Shape &output_shape, float *output_data,
+                                             const uint16_t *w1_segments,
+                                             const uint16_t *w1_indices)
 {
   UNUSED_RELEASE(params);
   UNUSED_RELEASE(input_shape);
@@ -225,7 +268,7 @@ inline void FullyConnectedSparseWeight(const FullyConnectedParams &params, const
   const int weights_dims_count = weights_shape.DimensionsCount();
   const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
   const int output_depth =
-      MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
+    MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
   const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
 
   UNUSED_RELEASE(bias_shape);
@@ -239,13 +282,13 @@ inline void FullyConnectedSparseWeight(const FullyConnectedParams &params, const
   }
   for (int b = 0; b < batches; ++b)
   {
-    for (int idx_0 = 0; idx_0 < w0_size; ++idx_0)
+    for (int idx_0 = 0; idx_0 < output_depth; ++idx_0)
     {
       for (int pw1 = w1_segments[idx_0]; pw1 < w1_segments[idx_0 + 1]; ++pw1)
       {
         int idx_1 = w1_indices[pw1];
         output_data[b * output_depth + idx_0] +=
-            weights_data[pw1] * input_data[b * accum_depth + idx_1];
+          weights_data[pw1] * input_data[b * accum_depth + idx_1];
       }
     }
   }
diff --git a/compute/cker/include/cker/operation/FullyConnectedDense16x1.h b/compute/cker/include/cker/operation/FullyConnectedDense16x1.h
new file mode 100644
index 000000000..a7e9efd7f
--- /dev/null
+++ b/compute/cker/include/cker/operation/FullyConnectedDense16x1.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Copyright (c) 2018 Mozilla
+                 2008-2011 Octasic Inc.
+                 2012-2017 Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __NNFW_CKER_FULLY_CONNECTED_DENSE16x1_H__
+#define __NNFW_CKER_FULLY_CONNECTED_DENSE16x1_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include "cker/TensorUtils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+#if defined(__aarch64__) && defined(USE_NEON)
+inline void FullyConnected16x1Float32(const FullyConnectedParams &params, const Shape &input_shape,
+                                      const float *input_data, const Shape &weights_shape,
+                                      const float *weights_data, const Shape &,
+                                      const float *bias_data, const Shape &, float *output_data)
+{
+  int total_input_size = input_shape.FlatSize();
+  int input_size = weights_shape.Dims(1);
+  const int batch_size = total_input_size / input_size;
+  const int num_units = weights_shape.Dims(0);
+
+  float *out = output_data;
+  const float *weights = weights_data;
+  int rows = num_units;
+  int cols = input_size;
+  int col_stride = input_size;
+  const float *x = input_data;
+
+  // Output = bias if bias tensor exists.
+  if (bias_data)
+  {
+    VectorBatchVectorAssign(bias_data, num_units, batch_size, output_data);
+  }
+  else
+  {
+    ZeroVector(output_data, batch_size * num_units);
+  }
+
+  //  rows : out, cols : in
+  int i, j;
+  for (i = 0; i < rows; i += 16)
+  {
+    const float *w = &weights[i * col_stride];
+
+    /* keep y[0..15] in registers for duration of inner loop */
+    float *__restrict y = &out[i];
+
+    float32x4_t y0_3 = vld1q_f32(&y[0]);
+    float32x4_t y4_7 = vld1q_f32(&y[4]);
+    float32x4_t y8_11 = vld1q_f32(&y[8]);
+    float32x4_t y12_15 = vld1q_f32(&y[12]);
+
+    for (j = 0; j < cols; j++)
+    {
+      float32x4_t wvec0_3, wvec4_7, wvec8_11, wvec12_15;
+      float32x4_t xj;
+
+      xj = vld1q_dup_f32(&x[j]);
+
+      wvec0_3 = vld1q_f32(&w[0]);
+      y0_3 = vmlaq_f32(y0_3, wvec0_3, xj);
+      wvec4_7 = vld1q_f32(&w[4]);
+      y4_7 = vmlaq_f32(y4_7, wvec4_7, xj);
+      wvec8_11 = vld1q_f32(&w[8]);
+      y8_11 = vmlaq_f32(y8_11, wvec8_11, xj);
+      wvec12_15 = vld1q_f32(&w[12]);
+      y12_15 = vmlaq_f32(y12_15, wvec12_15, xj);
+
+      w += 16;
+    }
+
+    /* save y[0..15] back to memory */
+
+    vst1q_f32(&y[0], y0_3);
+    vst1q_f32(&y[4], y4_7);
+    vst1q_f32(&y[8], y8_11);
+    vst1q_f32(&y[12], y12_15);
+  }
+  if (params.activation != FusedActivationFunctionType::kNone)
+  {
+    // Apply activation function
+    ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+  }
+}
+#endif
+} // namespace cker
+} // namespace nnfw
+#endif // __NNFW_CKER_FULLY_CONNECTED_DENSE16x1_H__
diff --git a/compute/cker/include/cker/operation/FullyConnectedSparse16x1.h b/compute/cker/include/cker/operation/FullyConnectedSparse16x1.h
new file mode 100644
index 000000000..df397f73e
--- /dev/null
+++ b/compute/cker/include/cker/operation/FullyConnectedSparse16x1.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Copyright (c) 2018 Mozilla
+                 2008-2011 Octasic Inc.
+                 2012-2017 Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __NNFW_CKER_FULLY_CONNECTED_SPARSE16x1_H__
+#define __NNFW_CKER_FULLY_CONNECTED_SPARSE16x1_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include "cker/TensorUtils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+inline void FullyConnectedSparseWeight16x1(const FullyConnectedParams &params,
+                                           const Shape &input_shape, const float *input_data,
+                                           const Shape &weights_shape, const float *weights_data,
+                                           const Shape &bias_shape, const float *bias_data,
+                                           const Shape &output_shape, float *output_data,
+                                           const uint16_t *w1_segments, const uint16_t *w1_indices)
+{
+  UNUSED_RELEASE(input_shape);
+
+  assert(weights_shape.DimensionsCount() == 2);
+  assert(output_shape.DimensionsCount() == 2);
+
+  const int output_dims_count = output_shape.DimensionsCount();
+  const int weights_dims_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
+  const int output_depth =
+    MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
+
+  UNUSED_RELEASE(bias_shape);
+  if (bias_data)
+  {
+    VectorBatchVectorAssign(bias_data, output_depth, batches, output_data);
+  }
+  else
+  {
+    ZeroVector(output_data, batches * output_depth);
+  }
+  for (int b = 0; b < batches; ++b)
+  {
+    int depth_size = output_depth / 16;
+    for (int idx_0 = 0; idx_0 < depth_size; ++idx_0)
+#ifdef USE_NEON
+    {
+      float *__restrict y;
+      y = &output_data[b * output_depth + idx_0 * 16];
+      /* keep y[0..15] in registers for duration of inner loop */
+      float32x4_t y0_3 = vld1q_f32(&y[0]);
+      float32x4_t y4_7 = vld1q_f32(&y[4]);
+      float32x4_t y8_11 = vld1q_f32(&y[8]);
+      float32x4_t y12_15 = vld1q_f32(&y[12]);
+      for (auto pw1 = w1_segments[idx_0]; pw1 < w1_segments[idx_0 + 1]; ++pw1)
+      {
+        auto idx_1 = w1_indices[pw1];
+        float32x4_t xj = vld1q_dup_f32(&input_data[b * accum_depth + idx_1]);
+        float32x4_t wvec;
+
+        wvec = vld1q_f32(&weights_data[0]);
+        y0_3 = vmlaq_f32(y0_3, wvec, xj);
+        wvec = vld1q_f32(&weights_data[4]);
+        y4_7 = vmlaq_f32(y4_7, wvec, xj);
+        wvec = vld1q_f32(&weights_data[8]);
+        y8_11 = vmlaq_f32(y8_11, wvec, xj);
+        wvec = vld1q_f32(&weights_data[12]);
+        y12_15 = vmlaq_f32(y12_15, wvec, xj);
+
+        weights_data += 16;
+      }
+      /* save y[0..15] back to memory */
+      vst1q_f32(&y[0], y0_3);
+      vst1q_f32(&y[4], y4_7);
+      vst1q_f32(&y[8], y8_11);
+      vst1q_f32(&y[12], y12_15);
+    }
+#else
+    {
+      for (auto pw1 = w1_segments[idx_0]; pw1 < w1_segments[idx_0 + 1]; ++pw1)
+      {
+        float *__restrict y;
+        float xj;
+        auto idx_1 = w1_indices[pw1];
+        xj = input_data[b * accum_depth + idx_1];
+        y = &output_data[b * output_depth + idx_0 * 16];
+        y[0] += weights_data[0] * xj;
+        y[1] += weights_data[1] * xj;
+        y[2] += weights_data[2] * xj;
+        y[3] += weights_data[3] * xj;
+        y[4] += weights_data[4] * xj;
+        y[5] += weights_data[5] * xj;
+        y[6] += weights_data[6] * xj;
+        y[7] += weights_data[7] * xj;
+        y[8] += weights_data[8] * xj;
+        y[9] += weights_data[9] * xj;
+        y[10] += weights_data[10] * xj;
+        y[11] += weights_data[11] * xj;
+        y[12] += weights_data[12] * xj;
+        y[13] += weights_data[13] * xj;
+        y[14] += weights_data[14] * xj;
+        y[15] += weights_data[15] * xj;
+        weights_data += 16;
+      }
+    }
+#endif
+  }
+  if (params.activation != FusedActivationFunctionType::kNone)
+  {
+    // Apply activation function
+    ApplyActivationToVector(output_data, batches * output_depth, params.activation, output_data);
+  }
+}
+} // namespace cker
+} // namespace nnfw
+#endif // __NNFW_CKER_FULLY_CONNECTED_SPARSE16x1_H__
diff --git a/compute/cker/include/cker/operation/FusedBatchNorm.h b/compute/cker/include/cker/operation/FusedBatchNorm.h
index d17a5796b..8a97d8421 100644
--- a/compute/cker/include/cker/operation/FusedBatchNorm.h
+++ b/compute/cker/include/cker/operation/FusedBatchNorm.h
@@ -105,7 +105,7 @@ public:
     float rest_size_inv = static_cast<float>(1.0f / static_cast<float>(rest_size));
     // This adjustment is for Bessel's correction
     float rest_size_adjust =
-        static_cast<float>(rest_size) / static_cast<float>(rest_size_minus_one);
+      static_cast<float>(rest_size) / static_cast<float>(rest_size_minus_one);
 
     Eigen::Tensor<float, 1, Eigen::RowMajor> batch_mean(depth);
     Eigen::Tensor<float, 1, Eigen::RowMajor> batch_variance(depth);
@@ -117,12 +117,12 @@ public:
 
     batch_variance.device(d) = x_centered.square().sum(reduce_dims) * rest_size_inv;
     auto scaling_factor = ((batch_variance + param.epsilon).rsqrt() * scale)
-                              .eval()
-                              .reshape(one_by_depth)
-                              .broadcast(bcast_spec);
+                            .eval()
+                            .reshape(one_by_depth)
+                            .broadcast(bcast_spec);
     auto x_scaled = x_centered * scaling_factor;
     auto x_shifted =
-        (x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec)).template cast<float>();
+      (x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec)).template cast<float>();
 
     UNUSED_RELEASE(rest_size_adjust);
 
diff --git a/compute/cker/include/cker/operation/Helper/BCast.h b/compute/cker/include/cker/operation/Helper/BCast.h
index a0abf2935..211db98ce 100644
--- a/compute/cker/include/cker/operation/Helper/BCast.h
+++ b/compute/cker/include/cker/operation/Helper/BCast.h
@@ -22,7 +22,7 @@
  * ToDo : This file will be moved into upper folder when integrate with other
  *        custom operations.
  *        And It should merged with EinsumHelper's BCast.
-**/
+ **/
 
 #include "cker/Shape.h"
 #include "cker/eigen/EigenSupport.h"
@@ -393,7 +393,7 @@ public:
 
   BCast(const Vec &x, const Vec &y, const bool fewer_dims_optimization = true,
         const bool return_flattened_batch_indices = false)
-      : BCastList<2>({x, y}, fewer_dims_optimization, return_flattened_batch_indices)
+    : BCastList<2>({x, y}, fewer_dims_optimization, return_flattened_batch_indices)
   {
   }
 
diff --git a/compute/cker/include/cker/operation/Helper/MatmulBCast.h b/compute/cker/include/cker/operation/Helper/MatmulBCast.h
index b80ccc0d0..b7d639433 100644
--- a/compute/cker/include/cker/operation/Helper/MatmulBCast.h
+++ b/compute/cker/include/cker/operation/Helper/MatmulBCast.h
@@ -62,13 +62,13 @@ public:
     if (!_batch_bcast->IsValid())
       return;
 
-    auto x_reshaped = _batch_bcast->x_reshape();
-    auto y_reshaped = _batch_bcast->y_reshape();
+    const auto &x_reshaped = _batch_bcast->x_reshape();
+    const auto &y_reshaped = _batch_bcast->y_reshape();
     auto output_shape = _batch_bcast->output_shape();
 
     _x_batch_size = std::accumulate(x_reshaped.cbegin(), x_reshaped.cend(), INT32_C(1),
                                     std::multiplies<int32_t>());
-    _y_batch_size = std::accumulate(x_reshaped.cbegin(), x_reshaped.cend(), INT32_C(1),
+    _y_batch_size = std::accumulate(y_reshaped.cbegin(), y_reshaped.cend(), INT32_C(1),
                                     std::multiplies<int32_t>());
     _output_shape.ReplaceWith(output_shape.size(), output_shape.data());
     _output_batch_size = _output_shape.FlatSize();
diff --git a/compute/cker/include/cker/operation/Helper/RandomDistributions.h b/compute/cker/include/cker/operation/Helper/RandomDistributions.h
index baeafd7c9..f16e5019d 100644
--- a/compute/cker/include/cker/operation/Helper/RandomDistributions.h
+++ b/compute/cker/include/cker/operation/Helper/RandomDistributions.h
@@ -168,7 +168,7 @@ public:
 
   // Must have lo < hi
   UniformDistribution(int32_t lo, int32_t hi)
-      : lo_(lo), range_(static_cast<uint32_t>(hi) - static_cast<uint32_t>(lo))
+    : lo_(lo), range_(static_cast<uint32_t>(hi) - static_cast<uint32_t>(lo))
   {
   }
 
@@ -207,7 +207,7 @@ public:
 
   // Must have lo < hi
   UniformDistribution(int64_t lo, int64_t hi)
-      : lo_(lo), range_(static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo))
+    : lo_(lo), range_(static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo))
   {
   }
 
@@ -291,22 +291,22 @@ public:
 
 template <typename Generator>
 class UniformFullIntDistribution<Generator, int32_t>
-    : public UniformFullIntDistribution32<Generator, int32_t>
+  : public UniformFullIntDistribution32<Generator, int32_t>
 {
 };
 template <typename Generator>
 class UniformFullIntDistribution<Generator, uint32_t>
-    : public UniformFullIntDistribution32<Generator, uint32_t>
+  : public UniformFullIntDistribution32<Generator, uint32_t>
 {
 };
 template <typename Generator>
 class UniformFullIntDistribution<Generator, int64_t>
-    : public UniformFullIntDistribution64<Generator, int64_t>
+  : public UniformFullIntDistribution64<Generator, int64_t>
 {
 };
 template <typename Generator>
 class UniformFullIntDistribution<Generator, uint64_t>
-    : public UniformFullIntDistribution64<Generator, uint64_t>
+  : public UniformFullIntDistribution64<Generator, uint64_t>
 {
 };
 
@@ -324,7 +324,7 @@ public:
 
   PHILOX_DEVICE_INLINE
   explicit SingleSampleAdapter(Generator *gen)
-      : generator_(gen), used_result_index_(Generator::kResultElementCount)
+    : generator_(gen), used_result_index_(Generator::kResultElementCount)
   {
   }
 
@@ -615,8 +615,8 @@ class TruncatedNormalDistribution<SingleSampleGenerator, double>
 public:
   // The number of elements that will be returned.
   static constexpr int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1)
-                                                 ? SingleSampleGenerator::kNativeElementCount / 2
-                                                 : 1;
+                                               ? SingleSampleGenerator::kNativeElementCount / 2
+                                               : 1;
   // Cost of generation of a single element (in cycles).
   static constexpr int kElementCost = 90;
   // Indicate that this distribution may take variable number of samples
@@ -772,7 +772,7 @@ PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32_t x0, uint32_t x1)
 }
 
 } // namespace random
-} // namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
 
 #endif // __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
diff --git a/compute/cker/include/cker/operation/Helper/RandomOp.h b/compute/cker/include/cker/operation/Helper/RandomOp.h
index 7dc51fe94..6b7049ddf 100644
--- a/compute/cker/include/cker/operation/Helper/RandomOp.h
+++ b/compute/cker/include/cker/operation/Helper/RandomOp.h
@@ -47,6 +47,6 @@ template <class Distribution> struct FillPhiloxRandom<CPUDevice, Distribution>
 };
 
 } // namespace functor
-} // namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
 #endif // __NNFW_CKER_HELPER_RANDOM_OP_H__
diff --git a/compute/cker/include/cker/operation/Helper/RandomOpCpu.h b/compute/cker/include/cker/operation/Helper/RandomOpCpu.h
index 85d267723..c99f69709 100644
--- a/compute/cker/include/cker/operation/Helper/RandomOpCpu.h
+++ b/compute/cker/include/cker/operation/Helper/RandomOpCpu.h
@@ -109,7 +109,7 @@ template <class Distribution> struct FillPhiloxRandomTask<Distribution, true>
   {
     const int kGroupSize = Distribution::kResultElementCount;
     static const int kGeneratorSkipPerOutputGroup =
-        kGroupSize * kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount;
+      kGroupSize * kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount;
 
     int64_t offset = 0;
 
@@ -157,7 +157,7 @@ operator()(random::PhiloxRandom gen, typename Distribution::ResultElementType *d
 
 } // namespace functor
 
-} // end namespace tensorflow
-}
+} // namespace cker
+} // namespace nnfw
 
 #endif // __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
diff --git a/compute/cker/include/cker/operation/Helper/Tensor.h b/compute/cker/include/cker/operation/Helper/Tensor.h
index e6ac008a5..ec29a15c3 100644
--- a/compute/cker/include/cker/operation/Helper/Tensor.h
+++ b/compute/cker/include/cker/operation/Helper/Tensor.h
@@ -29,58 +29,58 @@ template <typename T, int NDIMS = 1, typename IndexType = Eigen::DenseIndex> str
 {
   // Rank-<NDIMS> tensor of scalar type T.
   typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      Tensor;
+    Tensor;
   typedef Eigen::TensorMap<Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType>,
                            Eigen::Aligned>
-      ConstTensor;
+    ConstTensor;
 
   // Unaligned Rank-<NDIMS> tensor of scalar type T.
   typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>> UnalignedTensor;
   typedef Eigen::TensorMap<Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType>>
-      UnalignedConstTensor;
+    UnalignedConstTensor;
 
   typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, int>, Eigen::Aligned>
-      Tensor32Bit;
+    Tensor32Bit;
 
   // Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
   typedef Eigen::TensorMap<Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>,
                            Eigen::Aligned>
-      Scalar;
+    Scalar;
   typedef Eigen::TensorMap<
-      Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      ConstScalar;
+    Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>, Eigen::Aligned>
+    ConstScalar;
 
   // Unaligned Scalar tensor of scalar type T.
   typedef Eigen::TensorMap<Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>>
-      UnalignedScalar;
+    UnalignedScalar;
   typedef Eigen::TensorMap<
-      Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>>
-      UnalignedConstScalar;
+    Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>>
+    UnalignedConstScalar;
 
   // Rank-1 tensor (vector) of scalar type T.
   typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned> Flat;
   typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      ConstFlat;
+    ConstFlat;
   typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned> Vec;
   typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      ConstVec;
+    ConstVec;
 
   // Unaligned Rank-1 tensor (vector) of scalar type T.
   typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>> UnalignedFlat;
   typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>>
-      UnalignedConstFlat;
+    UnalignedConstFlat;
   typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>> UnalignedVec;
   typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>> UnalignedConstVec;
 
   // Rank-2 tensor (matrix) of scalar type T.
   typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned> Matrix;
   typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      ConstMatrix;
+    ConstMatrix;
 
   // Unaligned Rank-2 tensor (matrix) of scalar type T.
   typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType>> UnalignedMatrix;
   typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType>>
-      UnalignedConstMatrix;
+    UnalignedConstMatrix;
 };
 
 typedef typename TTypes<float, 1>::Tensor32Bit::Index Index32;
diff --git a/compute/cker/include/cker/operation/InstanceNorm.h b/compute/cker/include/cker/operation/InstanceNorm.h
index 6445e8a2b..8fa8b03bc 100644
--- a/compute/cker/include/cker/operation/InstanceNorm.h
+++ b/compute/cker/include/cker/operation/InstanceNorm.h
@@ -78,8 +78,8 @@ inline void InstanceNorm(const InstanceNormParams &params, const Shape &input_sh
           double input_value = input_data[Offset(output_shape, batch, height, width, channel)];
           double output_value = input_value * a + b;
           output_data[Offset(output_shape, batch, height, width, channel)] =
-              ActivationFunctionWithMinMax((float)output_value, output_activation_min,
-                                           output_activation_max);
+            ActivationFunctionWithMinMax((float)output_value, output_activation_min,
+                                         output_activation_max);
         }
       }
     }
diff --git a/compute/cker/include/cker/operation/L2Normalize.h b/compute/cker/include/cker/operation/L2Normalize.h
index a0075c3d0..c1fca91cc 100644
--- a/compute/cker/include/cker/operation/L2Normalize.h
+++ b/compute/cker/include/cker/operation/L2Normalize.h
@@ -77,7 +77,7 @@ void L2NormalizeQuant8(L2NormParams &params, const Shape &input_shape, const uin
     {
       int32_t diff = *input_data - input_zero_point;
       int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
+        128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
       int32_t unclamped_output_val = 128 + rescaled_diff;
       int32_t output_val = std::min(static_cast<int32_t>(255),
                                     std::max(static_cast<int32_t>(0), unclamped_output_val));
diff --git a/compute/cker/include/cker/operation/LSTM.h b/compute/cker/include/cker/operation/LSTM.h
new file mode 100644
index 000000000..a8f1f8ca3
--- /dev/null
+++ b/compute/cker/include/cker/operation/LSTM.h
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_UNIDIRECTIONALSEQUENCELSTM_H__
+#define __NNFW_CKER_UNIDIRECTIONALSEQUENCELSTM_H__
+
+#include "cker/TensorUtils.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+// LINT.IfChange
+// Calculates a single LSTM gate.
+//
+// Implements the following formula: (* is matrix multiply)
+//   gate = activate(W_input    * input + W_aux       * aux_input   +
+//                   W_peephole * cell  + W_recurrent * prev_output + bias)
+// with layer norm:
+//   gate = activate(W_norm * normalize(...) + bias) // not adding bias inside
+//
+// Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
+//
+// Parameters:
+// Input vectors (to LSTM):    | Size:                | Optional?
+//   input                     | n_input              |
+//   aux_input                 | n_aux_input          | y (bidir LSTM)
+// Input vectors (persistent states):
+//   output_state              | n_output             |
+//   cell_state                | n_cell               |
+// 'Constant' inputs:
+//   input_to_gate_weights     | n_cell * n_input     |
+//   aux_input_to_gate_weights | n_cell * n_aux_input | y (bidir LSTM)
+//   recurrent_to_gate_weights | n_cell * n_output    |
+//   cell_to_gate_weights      | n_cell               | y (peephole)
+//   gate_bias                 | n_cell               |
+//   layer_norm_coefficients   | n_cell               | y (layer norm)
+// Output vector:
+//   gate                      | n_cell               |
+// Scalar parameters:
+//   n_batch                                    - batch size / number of vectors
+//   n_input, n_aux_input, n_output, n_cell     - size of vectors.
+//   activation                                 - activation to use.
+//   is_input_all_zeros, is_aux_input_all_zeros - if input vectors are all zero.
+//   use_layer_norm                             - if doing layer norm LSTM.
+inline void CalculateLstmGateFloat(const float *input, const float *input_to_gate_weights,
+                                   const float *aux_input, const float *aux_input_to_gate_weights,
+                                   const float *output_state,
+                                   const float *recurrent_to_gate_weights, const float *cell_state,
+                                   const float *cell_to_gate_weights,
+                                   const float *layer_norm_coefficients, const float *gate_bias,
+                                   const int n_batch, const int n_input, const int n_aux_input,
+                                   const int n_output, const int n_cell,
+                                   const FusedActivationFunctionType activation, float *gate,
+                                   const bool is_input_all_zeros, const bool is_aux_input_all_zeros)
+{
+  const bool use_peephole = (cell_to_gate_weights != nullptr);
+  const bool use_layer_norm = (layer_norm_coefficients != nullptr);
+
+  // Initialize scratch buffers with bias for regular lstm or initialize with
+  // zero for layer norm lstm.
+  if (use_layer_norm)
+  {
+    std::fill_n(gate, n_cell * n_batch, 0.0f);
+  }
+  else
+  {
+    VectorBatchVectorAssign(gate_bias, n_cell, n_batch, gate);
+  }
+  // For each batch and cell: compute input_weight * input.
+  // Skip if input is all zeros.
+  if (!is_input_all_zeros)
+  {
+    MatrixBatchVectorMultiplyAccumulate(input_to_gate_weights, n_cell, n_input, input, n_batch,
+                                        gate, /*result_stride=*/1);
+  }
+  // For each batch and cell: compute aux_input_weight * aux_input.
+  // Skip if auxiliary input is not available or all zeros.
+  if (!is_aux_input_all_zeros)
+  {
+    MatrixBatchVectorMultiplyAccumulate(aux_input_to_gate_weights, n_cell, n_aux_input, aux_input,
+                                        n_batch, gate, /*result_stride=*/1);
+  }
+  // For each batch and cell: compute recurrent_weight * output_state.
+  MatrixBatchVectorMultiplyAccumulate(recurrent_to_gate_weights, n_cell, n_output, output_state,
+                                      n_batch, gate, /*result_stride=*/1);
+  // For each batch and cell: compute cell_weight .* cell_state (peephole LSTM)
+  if (use_peephole)
+  {
+    VectorBatchVectorCwiseProductAccumulate(cell_to_gate_weights, n_cell, cell_state, n_batch,
+                                            gate);
+  }
+  // Do layer normalization (if layer norm LSTM)
+  if (use_layer_norm)
+  {
+    MeanStddevNormalization(gate, gate, n_cell, n_batch);
+    VectorBatchVectorCwiseProduct(layer_norm_coefficients, n_cell, gate, n_batch, gate);
+    VectorBatchVectorAdd(gate_bias, n_cell, n_batch, gate);
+  }
+  // Apply activation
+  ApplyActivationToVector(gate, n_batch * n_cell, activation, gate);
+}
+
+// Updates the LSTM cell state, used by both float and hybrid LSTM versions.
+//
+// Implements the following formula:
+//   cell_state_new = clip(forget_gate * cell_state + input_gate * cell_gate)
+//
+// With CIFG LSTM, input gate is replaced by (1-forget_gate).
+//
+// Parameters:
+//  - n_batch, n_cell: sizes of vectors
+//  - cell_state: input/output vector, size n_batch*n_cell
+//  - input_gate: input vector, size n_batch*n_cell.
+//  - forget_gate: input/scratch vector, size n_batch*n_cell, modified with CIFG
+//  - cell_gate: input vector, size n_batch*n_cell.
+//  - use_cifg: use 1-forget_gate instead of input_gate.
+//  - clip: if > 0, clip the resulting cell state to [-clip, +clip].
+void UpdateLstmCellFloat(int n_batch, int n_cell, float *cell_state, const float *input_gate,
+                         float *forget_gate, const float *cell_gate, bool use_cifg, float clip)
+{
+  // Define variable for 4th argument to avoid warning
+  // Compiler warning: passing argument 4 to restrict-qualified parameter aliases with argument 2
+  const float *cwise_product_rhs = cell_state;
+  VectorVectorCwiseProduct(forget_gate, cwise_product_rhs, n_batch * n_cell, cell_state);
+
+  if (use_cifg)
+  {
+    // With CIFG, input_gate = 1-forget_gate. Use the forget_gate array as
+    // scratch, as input_gate array is not allocated in this case. (Be careful
+    // not to write to the scratch before reading the forget gate data.)
+    float *scratch = forget_gate;
+    Sub1Vector(forget_gate, n_batch * n_cell, scratch);
+    VectorVectorCwiseProductAccumulate(cell_gate, scratch, n_batch * n_cell, cell_state);
+  }
+  else
+  {
+    VectorVectorCwiseProductAccumulate(cell_gate, input_gate, n_batch * n_cell, cell_state);
+  }
+  if (clip > 0.0f)
+  {
+    CwiseClipping(cell_state, n_batch * n_cell, clip);
+  }
+}
+
+// Calculates the output state tensor of an LSTM step.
+//
+// Implements the following formula:
+//   output_no_projection = output_gate .* activate(cell_state)
+//     (elementwise vector product)
+// If no projection is used:
+//   output = output_state = output_no_projection
+// With projection:
+//   output = output_state = clip(W*output_no_projection + bias)
+//
+// Output might not have a different 'stride' than n_batch, so we need to copy.
+//
+// Parameters:
+//  - n_batch: batches: the number of distinct vectors in each array.
+//  - n_cell, n_output: sizes of vectors.
+//  - cell_state, output_gate: input vectors, size n_batch*n_cell.
+//  - projection_weights, projection_weights_scale, projection_bias:
+//      constant inputs, describing projection matrix and bias.
+//  - proj_clip: if > 0, clip the output of the projection.
+//  - output_state: output vector, size n_batch*n_output. Must be contigous.
+//  - scratch: scratch area, size n_batch*n_cell.
+void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, const float *cell_state,
+                              const float *output_gate, FusedActivationFunctionType activation,
+                              const float *projection_weights, const float *projection_bias,
+                              const float proj_clip, float *output_state, float *scratch)
+{
+  ApplyActivationToVector(cell_state, n_batch * n_cell, activation, scratch);
+
+  // Define variable for 4th argument to avoid warning
+  // Compiler warning: passing argument 4 to restrict-qualified parameter aliases with argument 2
+  const float *cwise_product_rhs = scratch;
+  VectorVectorCwiseProduct(output_gate, cwise_product_rhs, n_batch * n_cell, scratch);
+
+  const bool use_projection = (projection_weights != nullptr);
+  const bool use_projection_bias = (projection_bias != nullptr);
+
+  if (use_projection)
+  {
+    if (use_projection_bias)
+    {
+      VectorBatchVectorAssign(projection_bias, n_output, n_batch, output_state);
+    }
+    else
+    {
+      std::fill_n(output_state, n_batch * n_output, 0.0f);
+    }
+    MatrixBatchVectorMultiplyAccumulate(projection_weights, n_output, n_cell, scratch, n_batch,
+                                        output_state, /*result_stride=*/1);
+    if (proj_clip > 0.0f)
+    {
+      CwiseClipping(output_state, n_batch * n_output, proj_clip);
+    }
+  }
+  else
+  {
+    std::copy_n(scratch, n_batch * n_output, output_state);
+  }
+}
+
+// Performs an LSTM batch inference step for input specified by input_ptr.
+// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and
+// biases (*_bias_ptr), and buffers (*_scratch), along with additional
+// parameters:
+//  - params: various LSTM params including activation, clipping, etc.,
+//  - n_batch: size of batch,
+//  - n_cell: number of cells (or units),
+//  - n_input: the input size,
+//  - n_aux_input: the auxiliary input size.
+//  - n_output: the output size.
+//  - output_batch_leading_dim: the leading dimension of the output buffer.
+//
+// Input of size 'n_batch * n_input':
+//   input_ptr
+// Input of size 'n_batch * n_aux_input':
+//   aux_input_ptr                     - optional (can be nullptr)
+//
+// LSTM weights:
+// Input weights of size 'n_cell * n_input':
+//   input_to_input_weights            - optional
+//   input_to_forget_weights
+//   input_to_cell_weights
+//   input_to_output_weights
+// Auxiliary input weights of size 'n_cell * n_aux_input':
+//   aux_input_to_input_weights        - optional
+//   aux_input_to_forget_weights       - optional
+//   aux_input_to_cell_weights         - optional
+//   aux_input_to_output_weights       - optional
+// Recurrent weights of size 'n_cell * n_output':
+//   recurrent_to_input_weights        - optional
+//   recurrent_to_forget_weights
+//   recurrent_to_cell_weights
+//   recurrent_to_input_weights
+// Peephole weights of size 'n_cell', representing diagonal matrices.
+//   cell_to_input_weights             - optional
+//   cell_to_cell_weights              - optional
+//   cell_to_output_weights            - optional
+// Projection weights of size 'n_output * n_cell'
+//   projection_weights_ptr            - optional
+// Gate biases of size 'n_cell':
+//   input_gate_bias_ptr               - optional
+//   forget_gate_bias_ptr
+//   cell_gate_bias_ptr
+//   output_gate_bias_ptr
+//
+// Layer norm coefficients of size 'n_cell', representing diagonal matrices.
+//   input_layer_norm_coefficients_ptr  - optional
+//   forget_layer_norm_coefficients_ptr - optional
+//   cell_layer_norm_coefficients_ptr   - optional
+//   output_layer_norm_coefficients_ptr - optional
+//
+// The pointers to the cell and output state and the output are updated.
+//
+// The pointers input_ptr, aux_input_ptr, and output_ptr point to data aligned
+// in batch_major order, and each step processes batch_size many inputs from
+// input_ptr, and updates batch_size many cell and output states.
+//
+// The output_batch_dim is output.shape[-1], i.e. the outermost dimension of the
+// output tensor, and in most cases will be equal to n_output. It is usually not
+// when we want to store the LSTM output into a slice of the output tensor, e.g.
+// for bidirectional LSTMs with merge_outputs. In this case, the batched
+// operations cannot be used since they assume that the batched outputs are
+// contiguous, and we manually loop over the batched outputs.
+// LINT.IfChange
+inline void LstmStepFloat(
+  const float *input_ptr, const float *input_to_input_weights_ptr,
+  const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr,
+  const float *input_to_output_weights_ptr, const float *aux_input_ptr,
+  const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr,
+  const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr,
+  const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr,
+  const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr,
+  const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr,
+  const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr,
+  const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr,
+  const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr,
+  const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr,
+  const float *output_gate_bias_ptr, const float *projection_weights_ptr,
+  const float *projection_bias_ptr, const LSTMParams *params, int n_batch, int n_cell, int n_input,
+  int n_aux_input, int n_output, int output_batch_leading_dim, float *output_state_ptr,
+  float *cell_state_ptr, float *scratch0, float *scratch1, float *scratch2, float *scratch3,
+  float *output_ptr)
+{
+  // Since we have already checked that weights are all there or none, we can
+  // check the existence of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+
+  // Make named scratch buffers.
+  float *input_gate_scratch = scratch0;
+  float *forget_gate_scratch = scratch1;
+  float *cell_gate_scratch = scratch2;
+  float *output_gate_scratch = scratch3;
+
+  // Check if inputs are all zeros so we can skip some computations.
+  const bool is_input_all_zeros = IsZeroVector(input_ptr, n_batch * n_input);
+  const bool is_aux_input_all_zeros =
+    (aux_input_ptr == nullptr || IsZeroVector(aux_input_ptr, n_batch * n_aux_input));
+  if (!use_cifg)
+  {
+    // Calculate the input gate. (If not CIFG.)
+    CalculateLstmGateFloat(input_ptr, input_to_input_weights_ptr, aux_input_ptr,
+                           aux_input_to_input_weights_ptr, output_state_ptr,
+                           recurrent_to_input_weights_ptr, cell_state_ptr,
+                           cell_to_input_weights_ptr, input_layer_norm_coefficients_ptr,
+                           input_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                           /*activation=kTfLiteActSigmoid*/ FusedActivationFunctionType::kSigmoid,
+                           input_gate_scratch, is_input_all_zeros, is_aux_input_all_zeros);
+  }
+  // Calculate the forget gate.
+  CalculateLstmGateFloat(input_ptr, input_to_forget_weights_ptr, aux_input_ptr,
+                         aux_input_to_forget_weights_ptr, output_state_ptr,
+                         recurrent_to_forget_weights_ptr, cell_state_ptr,
+                         cell_to_forget_weights_ptr, forget_layer_norm_coefficients_ptr,
+                         forget_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                         /*activation=kTfLiteActSigmoid*/ FusedActivationFunctionType::kSigmoid,
+                         forget_gate_scratch, is_input_all_zeros, is_aux_input_all_zeros);
+  // Calculate the cell update gate.
+  CalculateLstmGateFloat(
+    input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr,
+    output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr,
+    /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr, n_batch,
+    n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch,
+    is_input_all_zeros, is_aux_input_all_zeros);
+  // Update the cell state.
+  UpdateLstmCellFloat(n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch,
+                      cell_gate_scratch, use_cifg, params->cell_clip);
+  // Calculate output gate.
+  CalculateLstmGateFloat(input_ptr, input_to_output_weights_ptr, aux_input_ptr,
+                         aux_input_to_output_weights_ptr, output_state_ptr,
+                         recurrent_to_output_weights_ptr, cell_state_ptr,
+                         cell_to_output_weights_ptr, output_layer_norm_coefficients_ptr,
+                         output_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                         /*activation=kTfLiteActSigmoid*/ FusedActivationFunctionType::kSigmoid,
+                         output_gate_scratch, is_input_all_zeros, is_aux_input_all_zeros);
+  // Update the output state.
+  CalculateLstmOutputFloat(n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch,
+                           params->activation, projection_weights_ptr, projection_bias_ptr,
+                           params->proj_clip, output_state_ptr, scratch2);
+  // Copy output state to the output. Note that the output's rows may not be
+  // contiguous (output_batch_leading_dim != n_output).
+  for (int b = 0; b < n_batch; b++)
+  {
+    std::copy_n(output_state_ptr + b * n_output, n_output,
+                output_ptr + b * output_batch_leading_dim);
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_UNIDIRECTIONALSEQUENCELSTM_H__
diff --git a/compute/cker/include/cker/operation/LeakyReLU.h b/compute/cker/include/cker/operation/LeakyReLU.h
new file mode 100644
index 000000000..e12d01bba
--- /dev/null
+++ b/compute/cker/include/cker/operation/LeakyReLU.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_LEKAY_RELU_H__
+#define __NNFW_CKER_LEKAY_RELU_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void LeakyReLU(const LeakyReluParams &params, const Shape &input_shape,
+                      const float *input_data, const Shape &output_shape, float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    const float val = input_data[i];
+    // Note that alpha might be > 1 or < 0, so we don't use std::max here.
+    output_data[i] = val > 0 ? val : val * params.alpha;
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_RELU_H__
diff --git a/compute/cker/include/cker/operation/LogSoftMax.h b/compute/cker/include/cker/operation/LogSoftMax.h
index 326a44f0c..eb7bdd900 100644
--- a/compute/cker/include/cker/operation/LogSoftMax.h
+++ b/compute/cker/include/cker/operation/LogSoftMax.h
@@ -71,7 +71,7 @@ inline void LogSoftmax(const SoftmaxParams &params, const Shape &input_shape,
       for (int c = 0; c < depth; ++c)
       {
         output_data[(i * depth + c) * inner_size + j] =
-            (input_data[(i * depth + c) * inner_size + j] - max) * beta - log_sum;
+          (input_data[(i * depth + c) * inner_size + j] - max) * beta - log_sum;
       }
     }
   }
@@ -124,10 +124,10 @@ inline void LogSoftmax(const SoftmaxParams &params, float input_scale, const Sha
       for (int c = 0; c < depth; ++c)
       {
         const float log_prob =
-            scale * input_data[(i * depth + c) * inner_size] * beta - precomputed;
+          scale * input_data[(i * depth + c) * inner_size] * beta - precomputed;
         const int32_t prob_quantized = std::rint(log_prob) + params.zero_point;
         output_data[(i * depth + c) * inner_size] =
-            static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+          static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
       }
     }
   }
diff --git a/compute/cker/include/cker/operation/LogicalAnd.h b/compute/cker/include/cker/operation/LogicalAnd.h
new file mode 100644
index 000000000..e877f5f47
--- /dev/null
+++ b/compute/cker/include/cker/operation/LogicalAnd.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_LOGICAL_AND_H__
+#define __NNFW_CKER_LOGICAL_AND_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+inline void LogicalAndBroadcast(const Shape &unextended_input1_shape, const T *input1_data,
+                                const Shape &unextended_input2_shape, const T *input2_data,
+                                const Shape &unextended_output_shape, T *output_data)
+{
+  assert(unextended_input1_shape.DimensionsCount() <= 4);
+  assert(unextended_input2_shape.DimensionsCount() <= 4);
+  assert(unextended_output_shape.DimensionsCount() <= 4);
+  const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < output_shape.Dims(3); ++c)
+        {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = in1_val && in2_val;
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void LogicalAndElementwise(const Shape &shape, const T *input1_data, const T *input2_data,
+                                  T *output_data)
+{
+
+  int num_elements = shape.FlatSize();
+
+  for (int t = 0; t < num_elements; t++)
+  {
+    output_data[t] = input1_data[t] && input2_data[t];
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_LOGICAL_AND_H__
diff --git a/compute/cker/include/cker/operation/Logistic.h b/compute/cker/include/cker/operation/Logistic.h
index 3d3e59e55..e9907729e 100644
--- a/compute/cker/include/cker/operation/Logistic.h
+++ b/compute/cker/include/cker/operation/Logistic.h
@@ -29,12 +29,39 @@ namespace nnfw
 namespace cker
 {
 
+/**
+ * @brief Internal scalar_logistic_op operation struct
+ *
+ * @note  Recent Eigen3 scalar_logistic_op return invalid value on ARM32 if
+ *        input value is float type 88 (expected: 1, actual: 0)
+ *        As a workaround, we use old version scalar_logistic_op internal struct
+ *        TODO Remove this workaround
+ */
+template <typename T> struct scalar_logistic_op
+{
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T &x) const
+  {
+    const T one = T(1);
+    return one / (one + Eigen::numext::exp(-x));
+  }
+
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet &x) const
+  {
+    const Packet one = Eigen::internal::pset1<Packet>(T(1));
+    return pdiv(one, padd(one, pexp(pnegate(x))));
+  }
+};
+
 inline void Logistic(const Shape &input_shape, const float *input_data, const Shape &output_shape,
                      float *output_data)
 {
   auto input_map = MapAsVector(input_data, input_shape);
   auto output_map = MapAsVector(output_data, output_shape);
-  output_map.array() = input_map.array().unaryExpr(Eigen::internal::scalar_logistic_op<float>());
+
+  // Use old version scalar_logistic_op
+  output_map.array() = input_map.array().unaryExpr(nnfw::cker::scalar_logistic_op<float>());
 }
 
 } // namespace cker
diff --git a/compute/cker/include/cker/operation/MatrixBandPart.h b/compute/cker/include/cker/operation/MatrixBandPart.h
index 5674ff3ef..ef2868455 100644
--- a/compute/cker/include/cker/operation/MatrixBandPart.h
+++ b/compute/cker/include/cker/operation/MatrixBandPart.h
@@ -43,11 +43,11 @@ void MatrixBandPart(const T num_lower_diags, const T num_upper_diags, const Shap
 
   if (!(num_lower_diags <= row_num))
     throw std::runtime_error(
-        "MatrixBandPart : num_lower must be negative or less or equal to number of rows");
+      "MatrixBandPart : num_lower must be negative or less or equal to number of rows");
 
   if (!(num_upper_diags <= col_num))
     throw std::runtime_error(
-        "MatrixBandPart : num_upper must be negative or less or equal to number of columns");
+      "MatrixBandPart : num_upper must be negative or less or equal to number of columns");
 
   std::fill(output_data, output_data + output_shape.FlatSize(), 0); // output matrix init
 
@@ -60,9 +60,10 @@ void MatrixBandPart(const T num_lower_diags, const T num_upper_diags, const Shap
       auto input = input_data + (batch * row_num * col_num + row * col_num);
 
       const T band_start =
-          num_lower_diags < 0 ? 0 : std::min(col_num, std::max(T{0}, row - num_lower_diags));
-      const T band_end = num_upper_diags < 0 ? col_num : std::min(static_cast<T>(col_num),
-                                                                  row + num_upper_diags + 1);
+        num_lower_diags < 0 ? 0 : std::min(col_num, std::max(T{0}, row - num_lower_diags));
+      const T band_end = num_upper_diags < 0
+                           ? col_num
+                           : std::min(static_cast<T>(col_num), row + num_upper_diags + 1);
 
       for (T band_idx = band_start; band_idx < band_end; band_idx++)
       {
diff --git a/compute/cker/include/cker/operation/MaxPool.h b/compute/cker/include/cker/operation/MaxPool.h
index ea3fcaca6..5dc84d368 100644
--- a/compute/cker/include/cker/operation/MaxPool.h
+++ b/compute/cker/include/cker/operation/MaxPool.h
@@ -67,10 +67,10 @@ void MaxPool<float>(const PoolParams &params, const Shape &input_shape, const fl
         int hpad = h + params.padding_values.height;
         int wpad = w + params.padding_values.width;
         int h_start =
-            (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+          (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
         int h_end = std::min(hpad / stride_height + 1, output_height);
         int w_start =
-            (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+          (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
         int w_end = std::min(wpad / stride_width + 1, output_width);
         // compute elementwise sum
         for (int ph = h_start; ph < h_end; ++ph)
@@ -79,8 +79,8 @@ void MaxPool<float>(const PoolParams &params, const Shape &input_shape, const fl
           {
             int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
             out_mat.col(out_offset) =
-                out_mat.col(out_offset)
-                    .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width)));
+              out_mat.col(out_offset)
+                .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width)));
           }
         }
       }
@@ -139,8 +139,8 @@ void MaxPool<uint8_t>(const PoolParams &params, const Shape &input_shape, const
           const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
           memset(acc, 0, tranche_depth * sizeof(acc[0]));
           const uint8_t *input_ptr =
-              input_data + depth_base +
-              depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+            input_data + depth_base +
+            depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
           for (int fy = filter_y_start; fy < filter_y_end; fy++)
           {
             const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
diff --git a/compute/cker/include/cker/operation/OneHot.h b/compute/cker/include/cker/operation/OneHot.h
index c0dbc6df5..ddc27b4c2 100644
--- a/compute/cker/include/cker/operation/OneHot.h
+++ b/compute/cker/include/cker/operation/OneHot.h
@@ -55,7 +55,7 @@ void OneHot(const int32_t depth, const T on_value, const T off_value, int32_t ax
       for (int k = 0; k < suffix_dim_size; ++k, ++output_data)
       {
         *output_data =
-            static_cast<int>(indices_data[i * suffix_dim_size + k]) == j ? on_value : off_value;
+          static_cast<int>(indices_data[i * suffix_dim_size + k]) == j ? on_value : off_value;
       }
     }
   }
diff --git a/compute/cker/include/cker/operation/Quantize.h b/compute/cker/include/cker/operation/Quantize.h
index 5c82d111f..7292a199a 100644
--- a/compute/cker/include/cker/operation/Quantize.h
+++ b/compute/cker/include/cker/operation/Quantize.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.*
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,11 +18,14 @@
 #ifndef __NNFW_CKER_QUANTIZE_H__
 #define __NNFW_CKER_QUANTIZE_H__
 
+#include "cker/operation/Round.h"
 #include "cker/Shape.h"
 #include "cker/Types.h"
 #include "cker/Utils.h"
-#include <stdexcept>
+#include <cassert>
 #include <iostream>
+#include <stdexcept>
+
 namespace nnfw
 {
 namespace cker
@@ -41,6 +45,409 @@ inline void Quantize(const Shape &input_shape, const InputT *input_data, const S
     output_data[i] = clamped;
   }
 }
+
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                     int8_t *output_data, const float scale, const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  static constexpr int32_t min_val = std::numeric_limits<int8_t>::min();
+  static constexpr int32_t max_val = std::numeric_limits<int8_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+  const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+  const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+  const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const float *src_data_ptr = input_data + i;
+    float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+    float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+    input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+    input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+    int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+    int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+    casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+    casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+    // Clamp the values to fit the target type's range.
+    casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+    casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+    casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+    casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+    const int16x4_t narrowed_val_0 = vmovn_s32(casted_val_0);
+    const int16x4_t narrowed_val_1 = vmovn_s32(casted_val_1);
+    const int16x8_t combined_val = vcombine_s16(narrowed_val_0, narrowed_val_1);
+    const int8x8_t combined_val_narrowed = vmovn_s16(combined_val);
+    vst1_s8(output_data + i, combined_val_narrowed);
+  }
+#endif // NEON
+
+  for (; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+    const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+    output_data[i] = clamped;
+  }
+}
+
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                     uint8_t *output_data, const float scale, const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  static constexpr int32_t min_val = std::numeric_limits<uint8_t>::min();
+  static constexpr int32_t max_val = std::numeric_limits<uint8_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+  const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+  const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+  const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const float *src_data_ptr = input_data + i;
+    float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+    float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+    input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+    input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+    int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+    int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+    casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+    casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+    // Clamp the values to fit the target type's range.
+    casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+    casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+    casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+    casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+    const uint16x4_t narrowed_val_0 = vqmovun_s32(casted_val_0);
+    const uint16x4_t narrowed_val_1 = vqmovun_s32(casted_val_1);
+    const uint16x8_t combined_val = vcombine_u16(narrowed_val_0, narrowed_val_1);
+    const uint8x8_t combined_val_narrowed = vmovn_u16(combined_val);
+    vst1_u8(output_data + i, combined_val_narrowed);
+  }
+#endif // NEON
+
+  for (; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+    const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+    output_data[i] = clamped;
+  }
+}
+
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                     int16_t *output_data, const float scale, const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  static constexpr int32_t min_val = std::numeric_limits<int16_t>::min();
+  static constexpr int32_t max_val = std::numeric_limits<int16_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+  const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+  const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+  const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const float *src_data_ptr = input_data + i;
+    float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+    float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+    input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+    input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+    int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+    int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+    casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+    casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+    // Clamp the values to fit the target type's range.
+    casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+    casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+    casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+    casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+    const int16x4_t narrowed_val_0 = vmovn_s32(casted_val_0);
+    const int16x4_t narrowed_val_1 = vmovn_s32(casted_val_1);
+    vst1_s16(output_data + i, narrowed_val_0);
+    vst1_s16(output_data + i + 4, narrowed_val_1);
+  }
+#endif // NEON
+
+  for (; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+    const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+    output_data[i] = clamped;
+  }
+}
+
+inline void Quantize(const int32_t *multiplier, const int32_t *shift, int32_t channel_size,
+                     int32_t total_size, int32_t output_zp, int32_t output_min, int32_t output_max,
+                     int32_t *scratch, int8_t *output)
+{
+  // Here we're trying to quantize the raw accumulators:
+  //        output_channels
+  //       data data data data data
+  // rows  data data data data data
+  //       data data data data data
+  //          ....
+  //
+  // In order to minimize the reload of the multipliers & shifts, once we load
+  // the multipliers & shifts, we load & quantize the raw accumulators for every
+  // row.
+#ifdef USE_NEON
+  const int32x4_t output_offset_vec = vdupq_n_s32(output_zp);
+  const int32x4_t output_activation_min_vec = vdupq_n_s32(output_min);
+  const int32x4_t output_activation_max_vec = vdupq_n_s32(output_max);
+  const int32x4_t zeros = vdupq_n_s32(0);
+#endif
+
+  assert(total_size % channel_size == 0);
+  const int32_t rows = total_size / channel_size;
+
+  int c = 0;
+
+#ifdef USE_NEON
+  using gemmlowp::RoundingDivideByPOT;
+  for (; c <= channel_size - 8; c += 8)
+  {
+    int32x4_t out_shift_1 = vld1q_s32(shift + c);
+    int32x4_t out_shift_2 = vld1q_s32(shift + c + 4);
+    int32x4_t left_shift_1 = vmaxq_s32(out_shift_1, zeros);
+    int32x4_t left_shift_2 = vmaxq_s32(out_shift_2, zeros);
+
+    // Right shift will be performed as left shift with negative values.
+    int32x4_t right_shift_1 = vminq_s32(out_shift_1, zeros);
+    int32x4_t right_shift_2 = vminq_s32(out_shift_2, zeros);
+
+    int32x4_t out_mul_1 = vld1q_s32(multiplier + c);
+    int32x4_t out_mul_2 = vld1q_s32(multiplier + c + 4);
+    for (int n = 0; n < rows; ++n)
+    {
+      int loc = n * channel_size + c;
+      int32x4_t acc_1 = vld1q_s32(scratch + loc);
+      int32x4_t acc_2 = vld1q_s32(scratch + loc + 4);
+
+      // Saturating Rounding Doubling High Mul.
+      acc_1 = vshlq_s32(acc_1, left_shift_1);
+      acc_1 = vqrdmulhq_s32(acc_1, out_mul_1);
+      acc_2 = vshlq_s32(acc_2, left_shift_2);
+      acc_2 = vqrdmulhq_s32(acc_2, out_mul_2);
+
+      // Rounding Dividing By POT.
+      acc_1 = vrshlq_s32(acc_1, right_shift_1);
+      acc_2 = vrshlq_s32(acc_2, right_shift_2);
+
+      // Add the output offset.
+      acc_1 = vaddq_s32(acc_1, output_offset_vec);
+      acc_2 = vaddq_s32(acc_2, output_offset_vec);
+
+      // Apply the activation function.
+      acc_1 = vmaxq_s32(acc_1, output_activation_min_vec);
+      acc_1 = vminq_s32(acc_1, output_activation_max_vec);
+      acc_2 = vmaxq_s32(acc_2, output_activation_min_vec);
+      acc_2 = vminq_s32(acc_2, output_activation_max_vec);
+
+      // Saturating cast to int8 and store to destination.
+      const int16x4_t acc_s16_1 = vqmovn_s32(acc_1);
+      const int16x4_t acc_s16_2 = vqmovn_s32(acc_2);
+      const int16x8_t res_s16 = vcombine_s16(acc_s16_1, acc_s16_2);
+      const int8x8_t res_s8 = vqmovn_s16(res_s16);
+      vst1_s8(output + loc, res_s8);
+    }
+  }
+
+#endif // USE_NEON
+  // Handle leftover values, one by one. This is very slow.
+  for (; c < channel_size; c++)
+  {
+    for (int n = 0; n < rows; ++n)
+    {
+      int loc = n * channel_size + c;
+      int32_t acc = scratch[loc];
+      acc = MultiplyByQuantizedMultiplier(acc, multiplier[c], shift[c]);
+      acc += output_zp;
+      acc = std::max(acc, output_min);
+      acc = std::min(acc, output_max);
+      output[loc] = static_cast<int8_t>(acc);
+    }
+  }
+}
+
+template <typename input_type, typename output_type>
+inline void Requantize(const input_type *input_data, int32_t size,
+                       int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                       int32_t input_zeropoint, int32_t output_zeropoint, output_type *output_data)
+{
+  assert(!"Requantize: not supported type. It shouldn't reach here.");
+  UNUSED_ALL(input_data, size, effective_scale_multiplier, effective_scale_shift, input_zeropoint,
+             output_zeropoint, output_data);
+}
+
+template <>
+inline void Requantize<uint8_t, int8_t>(const uint8_t *input_data, int32_t size,
+                                        int32_t effective_scale_multiplier,
+                                        int32_t effective_scale_shift, int32_t input_zeropoint,
+                                        int32_t output_zeropoint, int8_t *output_data)
+{
+  static constexpr int32_t kMinOutput = std::numeric_limits<int8_t>::min();
+  static constexpr int32_t kMaxOutput = std::numeric_limits<int8_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  // Constants.
+  const int32x4_t input_zero_point_dup = vdupq_n_s32(-input_zeropoint);
+  const int32x4_t output_zero_point_dup = vdupq_n_s32(output_zeropoint);
+  const int32x4_t min_val_dup = vdupq_n_s32(kMinOutput);
+  const int32x4_t max_val_dup = vdupq_n_s32(kMaxOutput);
+
+  for (; i <= size - 16; i += 16)
+  {
+    const uint8x16_t input_vec = vld1q_u8(input_data + i);
+    const uint16x8_t first_half = vmovl_u8(vget_low_u8(input_vec));
+    const uint16x8_t second_half = vmovl_u8(vget_high_u8(input_vec));
+    int32x4x4_t input;
+    input.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(first_half)));
+    input.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(first_half)));
+    input.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(second_half)));
+    input.val[3] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(second_half)));
+    input.val[0] = vaddq_s32(input.val[0], input_zero_point_dup);
+    input.val[1] = vaddq_s32(input.val[1], input_zero_point_dup);
+    input.val[2] = vaddq_s32(input.val[2], input_zero_point_dup);
+    input.val[3] = vaddq_s32(input.val[3], input_zero_point_dup);
+
+    int32x4x4_t result =
+      MultiplyByQuantizedMultiplier4Rows(input, effective_scale_multiplier, effective_scale_shift);
+
+    result.val[0] = vaddq_s32(result.val[0], output_zero_point_dup);
+    result.val[1] = vaddq_s32(result.val[1], output_zero_point_dup);
+    result.val[2] = vaddq_s32(result.val[2], output_zero_point_dup);
+    result.val[3] = vaddq_s32(result.val[3], output_zero_point_dup);
+    result.val[0] = vmaxq_s32(vminq_s32(result.val[0], max_val_dup), min_val_dup);
+    result.val[1] = vmaxq_s32(vminq_s32(result.val[1], max_val_dup), min_val_dup);
+    result.val[2] = vmaxq_s32(vminq_s32(result.val[2], max_val_dup), min_val_dup);
+    result.val[3] = vmaxq_s32(vminq_s32(result.val[3], max_val_dup), min_val_dup);
+
+    const int16x4_t narrowed_val_1 = vqmovn_s32(result.val[0]);
+    const int16x4_t narrowed_val_2 = vqmovn_s32(result.val[1]);
+    const int16x4_t narrowed_val_3 = vqmovn_s32(result.val[2]);
+    const int16x4_t narrowed_val_4 = vqmovn_s32(result.val[3]);
+    const int16x8_t output_first_half = vcombine_s16(narrowed_val_1, narrowed_val_2);
+    const int16x8_t output_second_half = vcombine_s16(narrowed_val_3, narrowed_val_4);
+    const int8x8_t narrowed_first_half = vqmovn_s16(output_first_half);
+    const int8x8_t narrowed_second_half = vqmovn_s16(output_second_half);
+    const int8x16_t narrowed_result = vcombine_s8(narrowed_first_half, narrowed_second_half);
+    vst1q_s8(output_data + i, narrowed_result);
+  }
+
+#endif
+  for (; i < size; ++i)
+  {
+    const int32_t input = input_data[i] - input_zeropoint;
+    const int32_t output =
+      MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, effective_scale_shift) +
+      output_zeropoint;
+    const int32_t clamped_output = std::max(std::min(output, kMaxOutput), kMinOutput);
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
+template <>
+inline void Requantize<int8_t, uint8_t>(const int8_t *input_data, int32_t size,
+                                        int32_t effective_scale_multiplier,
+                                        int32_t effective_scale_shift, int32_t input_zeropoint,
+                                        int32_t output_zeropoint, uint8_t *output_data)
+{
+  static constexpr int32_t kMinOutput = std::numeric_limits<uint8_t>::min();
+  static constexpr int32_t kMaxOutput = std::numeric_limits<uint8_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  // Constants.
+  const int32x4_t input_zero_point_dup = vdupq_n_s32(-input_zeropoint);
+  const int32x4_t output_zero_point_dup = vdupq_n_s32(output_zeropoint);
+  const int32x4_t min_val_dup = vdupq_n_s32(kMinOutput);
+  const int32x4_t max_val_dup = vdupq_n_s32(kMaxOutput);
+
+  for (; i <= size - 16; i += 16)
+  {
+    const int8x16_t input_vec = vld1q_s8(input_data + i);
+    const int16x8_t first_half = vmovl_s8(vget_low_s8(input_vec));
+    const int16x8_t second_half = vmovl_s8(vget_high_s8(input_vec));
+    int32x4x4_t input;
+    input.val[0] = vmovl_s16(vget_low_s16(first_half));
+    input.val[1] = vmovl_s16(vget_high_s16(first_half));
+    input.val[2] = vmovl_s16(vget_low_s16(second_half));
+    input.val[3] = vmovl_s16(vget_high_s16(second_half));
+    input.val[0] = vaddq_s32(input.val[0], input_zero_point_dup);
+    input.val[1] = vaddq_s32(input.val[1], input_zero_point_dup);
+    input.val[2] = vaddq_s32(input.val[2], input_zero_point_dup);
+    input.val[3] = vaddq_s32(input.val[3], input_zero_point_dup);
+
+    int32x4x4_t result =
+      MultiplyByQuantizedMultiplier4Rows(input, effective_scale_multiplier, effective_scale_shift);
+
+    result.val[0] = vaddq_s32(result.val[0], output_zero_point_dup);
+    result.val[1] = vaddq_s32(result.val[1], output_zero_point_dup);
+    result.val[2] = vaddq_s32(result.val[2], output_zero_point_dup);
+    result.val[3] = vaddq_s32(result.val[3], output_zero_point_dup);
+    result.val[0] = vmaxq_s32(vminq_s32(result.val[0], max_val_dup), min_val_dup);
+    result.val[1] = vmaxq_s32(vminq_s32(result.val[1], max_val_dup), min_val_dup);
+    result.val[2] = vmaxq_s32(vminq_s32(result.val[2], max_val_dup), min_val_dup);
+    result.val[3] = vmaxq_s32(vminq_s32(result.val[3], max_val_dup), min_val_dup);
+
+    const uint32x4_t result_val_1_unsigned = vreinterpretq_u32_s32(result.val[0]);
+    const uint32x4_t result_val_2_unsigned = vreinterpretq_u32_s32(result.val[1]);
+    const uint32x4_t result_val_3_unsigned = vreinterpretq_u32_s32(result.val[2]);
+    const uint32x4_t result_val_4_unsigned = vreinterpretq_u32_s32(result.val[3]);
+
+    const uint16x4_t narrowed_val_1 = vqmovn_u32(result_val_1_unsigned);
+    const uint16x4_t narrowed_val_2 = vqmovn_u32(result_val_2_unsigned);
+    const uint16x4_t narrowed_val_3 = vqmovn_u32(result_val_3_unsigned);
+    const uint16x4_t narrowed_val_4 = vqmovn_u32(result_val_4_unsigned);
+    const uint16x8_t output_first_half = vcombine_u16(narrowed_val_1, narrowed_val_2);
+    const uint16x8_t output_second_half = vcombine_u16(narrowed_val_3, narrowed_val_4);
+    const uint8x8_t narrowed_first_half = vqmovn_u16(output_first_half);
+    const uint8x8_t narrowed_second_half = vqmovn_u16(output_second_half);
+    const uint8x16_t narrowed_result = vcombine_u8(narrowed_first_half, narrowed_second_half);
+    vst1q_u8(output_data + i, narrowed_result);
+  }
+
+#endif
+  for (; i < size; ++i)
+  {
+    const int32_t input = input_data[i] - input_zeropoint;
+    const int32_t output =
+      MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, effective_scale_shift) +
+      output_zeropoint;
+    const int32_t clamped_output = std::max(std::min(output, kMaxOutput), kMinOutput);
+    output_data[i] = static_cast<uint8_t>(clamped_output);
+  }
+}
+
 } // namespace cker
 } // namespace nnfw
 
diff --git a/compute/cker/include/cker/operation/Range.h b/compute/cker/include/cker/operation/Range.h
index 5c3a773a2..d6ccc68c8 100644
--- a/compute/cker/include/cker/operation/Range.h
+++ b/compute/cker/include/cker/operation/Range.h
@@ -35,8 +35,8 @@ template <typename T> inline int GetSize(T start, T limit, T delta)
   }
 
   int size = (std::is_integral<T>::value
-                  ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta))
-                  : std::ceil(std::abs((limit - start) / delta)));
+                ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta))
+                : std::ceil(std::abs((limit - start) / delta)));
   return size;
 }
 
diff --git a/compute/cker/include/cker/operation/Reduce.h b/compute/cker/include/cker/operation/Reduce.h
index cf9634a67..02a9eac5e 100644
--- a/compute/cker/include/cker/operation/Reduce.h
+++ b/compute/cker/include/cker/operation/Reduce.h
@@ -21,6 +21,7 @@
 #include "cker/Shape.h"
 #include "cker/Types.h"
 #include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
 
 namespace nnfw
 {
@@ -30,6 +31,89 @@ namespace cker
 // A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
 // This method iterates through input data and reduce elements along the
 // dimensions given in axis.
+
+#ifdef USE_NEON
+inline void OptimizedReduceSum(const float *input_data, const Shape &input_shape,
+                               float *output_data)
+{
+  const auto input_dims = input_shape.DimsData();
+  const auto input_num_dims = input_shape.DimensionsCount();
+
+  int input_size = 1;
+  int reduce_size = 0;
+  for (int idx = 0; idx < input_num_dims - 1; idx++)
+  {
+    input_size *= input_dims[idx];
+  }
+  reduce_size = input_dims[input_num_dims - 1];
+  int offset = 0;
+  for (int idx = 0; idx < input_size; idx++)
+  {
+    int r_idx = 0;
+    float tmp_data[4] = {
+      0,
+    };
+    float32x4_t tmp_data_32x4 = vld1q_f32(tmp_data);
+    for (; r_idx <= reduce_size - 32; r_idx += 32)
+    {
+      float32x4_t a10 = vld1q_f32(input_data + offset + r_idx);
+      float32x4_t a11 = vld1q_f32(input_data + offset + r_idx + 4);
+      float32x4_t a12 = vld1q_f32(input_data + offset + r_idx + 8);
+      float32x4_t a13 = vld1q_f32(input_data + offset + r_idx + 12);
+      float32x4_t a20 = vld1q_f32(input_data + offset + r_idx + 16);
+      float32x4_t a21 = vld1q_f32(input_data + offset + r_idx + 20);
+      float32x4_t a22 = vld1q_f32(input_data + offset + r_idx + 24);
+      float32x4_t a23 = vld1q_f32(input_data + offset + r_idx + 28);
+
+      float32x4_t x0 = vaddq_f32(a10, a20);
+      float32x4_t x1 = vaddq_f32(a11, a21);
+      float32x4_t x2 = vaddq_f32(a12, a22);
+      float32x4_t x3 = vaddq_f32(a13, a23);
+
+      float32x4_t y0 = vaddq_f32(x0, x1);
+      float32x4_t y1 = vaddq_f32(x2, x3);
+      float32x4_t y2 = vaddq_f32(y0, y1);
+      tmp_data_32x4 = vaddq_f32(tmp_data_32x4, y2);
+    }
+    for (; r_idx <= reduce_size - 16; r_idx += 16)
+    {
+      float32x4_t a10 = vld1q_f32(input_data + offset + r_idx);
+      float32x4_t a11 = vld1q_f32(input_data + offset + r_idx + 4);
+      float32x4_t a12 = vld1q_f32(input_data + offset + r_idx + 8);
+      float32x4_t a13 = vld1q_f32(input_data + offset + r_idx + 12);
+
+      float32x4_t x0 = vaddq_f32(a10, a11);
+      float32x4_t x1 = vaddq_f32(a12, a13);
+
+      float32x4_t y0 = vaddq_f32(x0, x1);
+      tmp_data_32x4 = vaddq_f32(tmp_data_32x4, y0);
+    }
+    for (; r_idx <= reduce_size - 8; r_idx += 8)
+    {
+      float32x4_t a1 = vld1q_f32(input_data + offset + r_idx);
+      float32x4_t a2 = vld1q_f32(input_data + offset + r_idx + 4);
+      float32x4_t x = vaddq_f32(a1, a2);
+      tmp_data_32x4 = vaddq_f32(tmp_data_32x4, x);
+    }
+    vst1q_f32(tmp_data, tmp_data_32x4);
+    output_data[idx] = tmp_data[0] + tmp_data[1] + tmp_data[2] + tmp_data[3];
+
+    for (; r_idx < reduce_size; r_idx++)
+    {
+      if (r_idx == 0)
+      {
+        output_data[idx] = input_data[offset];
+      }
+      else
+      {
+        output_data[idx] += input_data[offset + r_idx];
+      }
+    }
+    offset += reduce_size;
+  }
+}
+#endif // NEON
+
 template <typename In, typename Out>
 inline bool ReduceImpl(const In *input_data, const Shape &input_shape, const Shape &,
                        const int *axis, const int num_axis, int *input_iter,
@@ -39,6 +123,32 @@ inline bool ReduceImpl(const In *input_data, const Shape &input_shape, const Sha
   const auto input_num_dims = input_shape.DimensionsCount();
 
   // Reset input iterator.
+  if (num_axis == 1 && axis[0] == input_num_dims - 1)
+  {
+    int input_size = 1;
+    int reduce_size = 0;
+    for (int idx = 0; idx < input_num_dims - 1; idx++)
+    {
+      input_size *= input_dims[idx];
+    }
+    reduce_size = input_dims[input_num_dims - 1];
+    for (int idx = 0; idx < input_size; idx++)
+    {
+      for (int r_idx = 0; r_idx < reduce_size; r_idx++)
+      {
+        if (r_idx == 0)
+        {
+          output_data[idx] = input_data[idx * reduce_size];
+        }
+        else
+        {
+          output_data[idx] = reducer(output_data[idx], input_data[idx * reduce_size + r_idx]);
+        }
+      }
+    }
+    return true;
+  }
+
   for (int idx = 0; idx < input_num_dims; ++idx)
   {
     input_iter[idx] = 0;
@@ -48,7 +158,7 @@ inline bool ReduceImpl(const In *input_data, const Shape &input_shape, const Sha
   {
     size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
     size_t output_offset =
-        ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
+      ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
     output_data[output_offset] = reducer(output_data[output_offset], input_data[input_offset]);
   } while (NextIndex(input_num_dims, input_dims, input_iter));
   return true;
@@ -202,12 +312,12 @@ public:
     }
 
     // Calculate mean by dividing output_data by num of aggregated element.
-    U num_elements_in_axis = 1;
+    size_t num_elements_in_axis = 1;
     for (int idx = 0; idx < num_resolved_axis; ++idx)
     {
       size_t current = static_cast<size_t>(input_shape.Dims(resolved_axis_data()[idx]));
       // Overflow prevention.
-      if (current > static_cast<size_t>(std::numeric_limits<U>::max() / num_elements_in_axis))
+      if (current > static_cast<size_t>(std::numeric_limits<size_t>::max() / num_elements_in_axis))
       {
         return false;
       }
@@ -220,21 +330,21 @@ public:
       if (compute_sum)
       {
         // TODO(b/116341117): Eliminate float and do this completely in 8bit.
-        const float bias = -input_zero_point * scale * num_elements_in_axis + 0.5f;
+        const float bias = -input_zero_point * scale * num_elements_in_axis;
         for (size_t idx = 0; idx < num_outputs; ++idx)
         {
           const U value =
-              static_cast<U>(std::round(temp_sum[idx] * scale + bias)) + output_zero_point;
+            static_cast<U>(std::round(temp_sum[idx] * scale + bias)) + output_zero_point;
           output_data[idx] = static_cast<T>(value);
         }
       }
       else
       {
-        const float bias = -input_zero_point * scale + 0.5f;
+        const float bias = -input_zero_point * scale;
         for (size_t idx = 0; idx < num_outputs; ++idx)
         {
           float float_mean =
-              static_cast<float>(temp_sum[idx]) / static_cast<float>(num_elements_in_axis);
+            static_cast<float>(temp_sum[idx]) / static_cast<float>(num_elements_in_axis);
           float result = std::min(std::round(float_mean * scale + bias) + output_zero_point,
                                   static_cast<float>(std::numeric_limits<T>::max()));
           result = std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
diff --git a/compute/cker/include/cker/operation/ReduceMean.h b/compute/cker/include/cker/operation/ReduceMean.h
index 2e4fc6274..924e85037 100644
--- a/compute/cker/include/cker/operation/ReduceMean.h
+++ b/compute/cker/include/cker/operation/ReduceMean.h
@@ -72,9 +72,9 @@ inline bool ReduceMeanImpl(const In *input_data, const Shape &input_shape, const
   {
     size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
     size_t output_offset =
-        ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
+      ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
     output_data[output_offset] =
-        reducer(output_data[output_offset], input_data[input_offset], normalizer);
+      reducer(output_data[output_offset], input_data[input_offset], normalizer);
   } while (NextIndex(input_num_dims, input_dims, input_iter));
   return true;
 }
@@ -102,7 +102,7 @@ inline size_t ReduceSumQuantImpl(const In *input_data, const Shape &input_shape,
   {
     size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
     size_t output_offset =
-        ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
+      ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
     temp_sum[output_offset] = reducer(temp_sum[output_offset], input_data[input_offset]);
   } while (NextIndex(input_num_dims, input_dims, input_iter));
   return normalizer;
@@ -185,8 +185,8 @@ public:
     }
 
     size_t normalizer =
-        ReduceSumQuantImpl<In>(input_data, input_shape, resolved_axis_data(), num_resolved_axis,
-                               temp_index_data(), reducer, _temp_sum.data());
+      ReduceSumQuantImpl<In>(input_data, input_shape, resolved_axis_data(), num_resolved_axis,
+                             temp_index_data(), reducer, _temp_sum.data());
     if (num_outputs > 0)
     {
       float scale = input_scale / output_scale;
@@ -231,6 +231,37 @@ void MeanQ8Asymm(const Shape &input_shape, const In *input_data, float input_sca
                           sum_reducer);
 }
 
+template <typename In, typename Out>
+void MeanAxis1And2(const Shape &input_shape, const In *input_data, const Shape &output_shape,
+                   Out *output_data)
+{
+  UNUSED_RELEASE(output_shape);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  const int output_batch = output_shape.Dims(0);
+  const int output_depth = output_shape.Dims(3);
+
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+
+  for (int out_b = 0; out_b < output_batch; ++out_b)
+  {
+    for (int out_d = 0; out_d < output_depth; ++out_d)
+    {
+      float value = 0;
+      for (int in_h = 0; in_h < input_height; ++in_h)
+      {
+        for (int in_w = 0; in_w < input_width; ++in_w)
+        {
+          value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
+        }
+      }
+      output_data[Offset(output_shape, out_b, 0, 0, out_d)] = value / (input_width * input_height);
+    }
+  }
+}
+
 } // namespace cker
 } // namespace nnfw
 
diff --git a/compute/cker/include/cker/operation/ResizeBilinear.h b/compute/cker/include/cker/operation/ResizeBilinear.h
index 7fc1e9123..ae5af7bb3 100644
--- a/compute/cker/include/cker/operation/ResizeBilinear.h
+++ b/compute/cker/include/cker/operation/ResizeBilinear.h
@@ -62,7 +62,7 @@ inline void ResizeBilinearKernel2x2(int32_t x0, int32_t x1, int32_t y0, int32_t
 
     // Bottom right corner.
     output_data[output_offset + output_x_offset + output_y_offset] =
-        (output + ((x1y0 + x1y1) / 2)) / 2;
+      (output + ((x1y0 + x1y1) / 2)) / 2;
   }
 }
 
@@ -192,8 +192,8 @@ inline void ResizeBilinearGenericSmallChannel(int32_t batches, int32_t input_hei
                                    &x1);
 
         int32_t input_offset[4] = {
-            Offset(input_shape, b, y0, x0, 0), Offset(input_shape, b, y0, x1, 0),
-            Offset(input_shape, b, y1, x0, 0), Offset(input_shape, b, y1, x1, 0)};
+          Offset(input_shape, b, y0, x0, 0), Offset(input_shape, b, y0, x1, 0),
+          Offset(input_shape, b, y1, x0, 0), Offset(input_shape, b, y1, x1, 0)};
         float scale[4] = {(1 - (input_y - y0)) * (1 - (input_x - x0)),
                           (1 - (input_y - y0)) * (input_x - x0),
                           (input_y - y0) * (1 - (input_x - x0)), (input_y - y0) * (input_x - x0)};
@@ -202,8 +202,8 @@ inline void ResizeBilinearGenericSmallChannel(int32_t batches, int32_t input_hei
         {
           const T *input_ptr = &input_data[d];
           *output_ptr++ = static_cast<T>(
-              input_ptr[input_offset[0]] * scale[0] + input_ptr[input_offset[1]] * scale[1] +
-              input_ptr[input_offset[2]] * scale[2] + input_ptr[input_offset[3]] * scale[3]);
+            input_ptr[input_offset[0]] * scale[0] + input_ptr[input_offset[1]] * scale[1] +
+            input_ptr[input_offset[2]] * scale[2] + input_ptr[input_offset[3]] * scale[3]);
         }
       }
     }
@@ -253,17 +253,102 @@ void ResizeBilinear(ResizeBilinearParams &params, const Shape &input_shape,
   int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
 
   float height_scale = (params.align_corners && params.output_height > 1)
-                           ? (static_cast<float>(input_height - 1) / (params.output_height - 1))
-                           : (static_cast<float>(input_height) / params.output_height);
+                         ? (static_cast<float>(input_height - 1) / (params.output_height - 1))
+                         : (static_cast<float>(input_height) / params.output_height);
 
   float width_scale = (params.align_corners && params.output_width > 1)
-                          ? (static_cast<float>(input_width - 1) / (params.output_width - 1))
-                          : (static_cast<float>(input_width) / params.output_width);
+                        ? (static_cast<float>(input_width - 1) / (params.output_width - 1))
+                        : (static_cast<float>(input_width) / params.output_width);
 
   ResizeBilinearGenericSmallChannel<uint8_t>(
-      batches, input_height, input_width, depth, params.output_height, params.output_width,
-      height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers);
+    batches, input_height, input_width, depth, params.output_height, params.output_width,
+    height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers);
 }
+
+inline void ComputeInterpolationValues(const int32_t value, const int32_t scale_10,
+                                       const bool half_pixel_centers, int32_t input_size,
+                                       int32_t *scaled_value, int32_t *lower_bound,
+                                       int32_t *upper_bound)
+{
+  if (half_pixel_centers)
+  {
+    *scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9);
+  }
+  else
+  {
+    *scaled_value = value * scale_10;
+  }
+  *lower_bound = std::max(*scaled_value / (1 << 10), 0);
+  *upper_bound = std::min(*scaled_value / (1 << 10) + 1, input_size - 1);
+}
+
+inline void ResizeBilinear(const ResizeBilinearParams &op_params,
+                           const Shape &unextended_input_shape, const int8_t *input_data,
+                           const Shape &unextended_output_shape, int8_t *output_data)
+{
+  // If half_pixel_centers is True, align_corners must be False.
+  assert(!op_params.half_pixel_centers || !op_params.align_corners);
+  assert(unextended_input_shape.DimensionsCount() <= 4);
+  assert(unextended_output_shape.DimensionsCount() <= 4);
+  const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+  const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+  const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t input_height = input_shape.Dims(1);
+  const int32_t input_width = input_shape.Dims(2);
+  const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+  const int32_t output_height = op_params.output_height;
+  const int32_t output_width = op_params.output_width;
+
+  int32_t height_scale_10 = ((1 << 10) * input_height + output_height / 2) / output_height;
+  int32_t width_scale_10 = ((1 << 10) * input_width + output_width / 2) / output_width;
+  if (op_params.align_corners && output_height > 1)
+  {
+    height_scale_10 =
+      ((1 << 10) * (input_height - 1) + (output_height - 1) / 2) / (output_height - 1);
+  }
+  if (op_params.align_corners && output_width > 1)
+  {
+    width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) / (output_width - 1);
+  }
+
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int y = 0; y < output_height; ++y)
+    {
+      int32_t input_y, y0, y1;
+      ComputeInterpolationValues(y, height_scale_10, op_params.half_pixel_centers, input_height,
+                                 &input_y, &y0, &y1);
+      for (int x = 0; x < output_width; ++x)
+      {
+        int32_t input_x, x0, x1;
+        ComputeInterpolationValues(x, width_scale_10, op_params.half_pixel_centers, input_width,
+                                   &input_x, &x0, &x1);
+        for (int c = 0; c < depth; ++c)
+        {
+          const int64_t output_20_ll =
+            static_cast<int64_t>(input_data[Offset(input_shape, b, y0, x0, c)]) *
+            ((1 << 10) - (input_y - (1 << 10) * y0)) * ((1 << 10) - (input_x - (1 << 10) * x0));
+          const int64_t output_20_lu =
+            static_cast<int64_t>(input_data[Offset(input_shape, b, y1, x0, c)]) *
+            (input_y - (1 << 10) * y0) * ((1 << 10) - (input_x - (1 << 10) * x0));
+          const int64_t output_20_rl =
+            static_cast<int64_t>(input_data[Offset(input_shape, b, y0, x1, c)]) *
+            ((1 << 10) - (input_y - (1 << 10) * y0)) * (input_x - (1 << 10) * x0);
+          const int64_t output_20_ru =
+            static_cast<int64_t>(input_data[Offset(input_shape, b, y1, x1, c)]) *
+            (input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0);
+          const int64_t output_20 = output_20_ll + output_20_lu + output_20_rl + output_20_ru;
+          const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19);
+          const int8_t interpolation = static_cast<int8_t>((output_20 + round) / (1 << 20));
+          output_data[Offset(output_shape, b, y, x, c)] = interpolation;
+        }
+      }
+    }
+  }
+}
+
 } // namespace cker
 } // namespace nnfw
 
diff --git a/compute/cker/include/cker/operation/Round.h b/compute/cker/include/cker/operation/Round.h
index a04a741cf..d67714564 100644
--- a/compute/cker/include/cker/operation/Round.h
+++ b/compute/cker/include/cker/operation/Round.h
@@ -19,6 +19,7 @@
 #define __NNFW_CKER_ROUND_H__
 
 #include "cker/Shape.h"
+#include "cker/Utils.h"
 
 #include <cmath>
 
@@ -41,6 +42,26 @@ inline float RoundToNearest(float value)
   }
 }
 
+#ifdef USE_NEON
+
+inline int32x4_t RoundToNearest(const float32x4_t input)
+{
+#if defined(__aarch64__) || defined(__SSSE3__)
+  // Note: vcvtnq_s32_f32 is not available in ARMv7
+  return vcvtnq_s32_f32(input);
+#else
+  static const float32x4_t zero_val_dup = vdupq_n_f32(0.0f);
+  static const float32x4_t point5_val_dup = vdupq_n_f32(0.5f);
+  static const float32x4_t minus_point5_val_dup = vdupq_n_f32(-0.5f);
+
+  const uint32x4_t mask = vcltq_f32(input, zero_val_dup);
+  const float32x4_t round = vbslq_f32(mask, minus_point5_val_dup, point5_val_dup);
+  return vcvtq_s32_f32(vaddq_f32(input, round));
+#endif // defined(__aarch64__) || defined(__SSSE3__)
+}
+
+#endif // NEON
+
 inline void Round(const Shape &input_shape, const float *input_data, const Shape &output_shape,
                   float *output_data)
 {
diff --git a/compute/cker/include/cker/operation/Select.h b/compute/cker/include/cker/operation/Select.h
index ab2de94cc..644fe0a0e 100644
--- a/compute/cker/include/cker/operation/Select.h
+++ b/compute/cker/include/cker/operation/Select.h
@@ -34,7 +34,7 @@ void Select(const Shape &input_condition_shape, const D *input_condition_data,
             const T *input_y_data, const Shape &output_shape, T *output_data)
 {
   const int64_t flatsize =
-      MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+    MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
   for (int64_t i = 0; i < flatsize; ++i)
   {
     output_data[i] = (input_condition_data[i] != 0) ? input_x_data[i] : input_y_data[i];
@@ -101,7 +101,7 @@ void BroadcastSelect4DSlow(const Shape &input_condition_shape, const D *input_co
           const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
           const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
           output_data[Offset(extended_output_shape, b, y, x, c)] =
-              input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+            input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
         }
       }
     }
diff --git a/compute/cker/include/cker/operation/Slice.h b/compute/cker/include/cker/operation/Slice.h
index a072cff8e..ef97fd5d8 100644
--- a/compute/cker/include/cker/operation/Slice.h
+++ b/compute/cker/include/cker/operation/Slice.h
@@ -43,16 +43,16 @@ inline void Slice(const SliceParams &op_params, const Shape &input_shape,
                                                                      : start_b + op_params.size[0];
   const int start_h = begin_count < 3 ? 0 : op_params.begin[begin_count - 3];
   const int stop_h = (size_count < 3 || op_params.size[size_count - 3] == -1)
-                         ? input_shape.Dims(1)
-                         : start_h + op_params.size[size_count - 3];
+                       ? input_shape.Dims(1)
+                       : start_h + op_params.size[size_count - 3];
   const int start_w = begin_count < 2 ? 0 : op_params.begin[begin_count - 2];
   const int stop_w = (size_count < 2 || op_params.size[size_count - 2] == -1)
-                         ? input_shape.Dims(2)
-                         : start_w + op_params.size[size_count - 2];
+                       ? input_shape.Dims(2)
+                       : start_w + op_params.size[size_count - 2];
   const int start_d = begin_count < 1 ? 0 : op_params.begin[begin_count - 1];
   const int stop_d = (size_count < 1 || op_params.size[size_count - 1] == -1)
-                         ? input_shape.Dims(3)
-                         : start_d + op_params.size[size_count - 1];
+                       ? input_shape.Dims(3)
+                       : start_d + op_params.size[size_count - 1];
 
   for (int in_b = start_b; in_b < stop_b; ++in_b)
   {
diff --git a/compute/cker/include/cker/operation/SoftMax.h b/compute/cker/include/cker/operation/SoftMax.h
index 13e50b87a..35ecde4ba 100644
--- a/compute/cker/include/cker/operation/SoftMax.h
+++ b/compute/cker/include/cker/operation/SoftMax.h
@@ -23,6 +23,10 @@
 #include "cker/Types.h"
 #include "cker/eigen/Utils.h"
 
+#if __aarch64__ && __clang__
+#define TFLITE_SOFTMAX_USE_UINT16_LUT
+#endif
+
 #include <Eigen/Core>
 #include <fixedpoint/fixedpoint.h>
 #include <cmath>
@@ -32,6 +36,45 @@ namespace nnfw
 namespace cker
 {
 
+namespace reference
+{
+
+// Note. This Softmax function supports all of dimensions
+inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
+                    const Shape &output_shape, float *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    // Find max element value which we'll use to ensure numerical stability
+    // taking advantage of the following equality:
+    // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
+    float max = std::numeric_limits<float>::lowest();
+    for (int c = 0; c < depth; ++c)
+    {
+      max = std::max(max, input_data[i * depth + c]);
+    }
+
+    // Compute sum.
+    float sum = 0.f;
+    for (int c = 0; c < depth; ++c)
+    {
+      sum += std::exp((input_data[i * depth + c] - max) * static_cast<float>(params.beta));
+    }
+
+    // Compute result.
+    for (int c = 0; c < depth; ++c)
+    {
+      output_data[i * depth + c] =
+        std::exp((input_data[i * depth + c] - max) * static_cast<float>(params.beta)) / sum;
+    }
+  }
+}
+} // namespace reference
+
 // Performs softmax along the input of size (input_size * batch_size).
 inline void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
                     float *out)
@@ -88,87 +131,306 @@ inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const
   out_mat.array().rowwise() *= scale;
 }
 
-inline void Softmax(const SoftmaxParams &params, const Shape &input_shape,
-                    const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
-{
-  const int32_t input_beta_multiplier = params.input_multiplier;
-  const int32_t input_beta_left_shift = params.input_left_shift;
-  const int diff_min = params.diff_min;
-  // The representation chosen for the input to the exp() function is Q5.26.
-  // We need to leave extra space since values that we skip might be as large as
-  // -32 before multiplying by input_beta_multiplier, and therefore as large as
-  // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
-  // accumulation, but exp(-16) definitely is.
-  static const int kScaledDiffIntegerBits = 5;
-  static const int kAccumulationIntegerBits = 12;
-  using FixedPointScaledDiff = gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
-  using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
-  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
+template <typename T> inline int32_t QuantizeSoftmaxOutput(float prob_rescaled, int32_t zero_point)
+{
+  const int32_t prob_rnd = static_cast<int32_t>(std::round(prob_rescaled));
+  return prob_rnd + zero_point;
+}
+
+#if !__aarch64__
+// With ARM64, rounding is faster than add + truncation.
+template <> inline int32_t QuantizeSoftmaxOutput<uint8_t>(float prob_rescaled, int32_t)
+{
+  return static_cast<int32_t>(prob_rescaled + 0.5f);
+}
+#endif
+
+inline void PopulateSoftmaxLookupTable(float *table, float input_scale, float beta)
+{
+  const float scale = -input_scale * beta;
+  const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+  for (int32_t val = 0; val <= max_uint8; ++val)
+  {
+    table[max_uint8 - val] = expf(scale * val);
+  }
+}
 
+template <typename In, typename Out>
+inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const In *input_data,
+                    const Shape &output_shape, Out *output_data)
+{
   const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  const int excluding_last_dim = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int last_dim = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
 
-  for (int i = 0; i < outer_size; ++i)
+  const int32_t clamp_max = std::numeric_limits<Out>::max();
+  const int32_t clamp_min = std::numeric_limits<Out>::min();
+  for (int i = 0; i < excluding_last_dim; ++i)
   {
-    uint8_t max_in_row = 0;
-    for (int c = 0; c < depth; ++c)
+    int32_t max_val = std::numeric_limits<In>::min();
+    // Find max quantized value.
+    for (int j = 0; j < last_dim; ++j)
     {
-      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
+      max_val = std::max(max_val, static_cast<int32_t>(input_data[j]));
     }
 
-    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
-    for (int c = 0; c < depth; ++c)
+    float sum_exp = 0.0f;
+    const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+    const float *table_offset = &params.table[max_uint8 - max_val];
+    // Calculate normalizer sum(exp(x)).
+    for (int j = 0; j < last_dim; ++j)
     {
-      int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min)
-      {
-        const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
-            input_diff, input_beta_multiplier, input_beta_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
-                                        exp_on_negative_values(scaled_diff_f8));
-      }
+      sum_exp += table_offset[input_data[j]];
     }
 
-    int32_t fixed_sum_of_exps = sum_of_exps.raw();
-    int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(fixed_sum_of_exps));
-    // This is the number of bits to the left of the binary point above 1.0.
-    // Consider fixed_sum_of_exps=1.25.  In that case shifted_scale=0.8 and
-    // no later adjustment will be needed.
-    int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
-    int32_t shifted_sum_minus_one =
-        static_cast<int32_t>((static_cast<uint32_t>(fixed_sum_of_exps) << headroom_plus_one) -
-                             (static_cast<uint32_t>(1) << 31));
+    const float inv_sum_exp = 1.0f / (sum_exp * params.scale);
+    // Normalize and quantize probabilities.
+    for (int j = 0; j < last_dim; ++j)
+    {
+      const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp;
+      const int32_t prob_quantized = QuantizeSoftmaxOutput<Out>(prob_rescaled, params.zero_point);
+      output_data[j] = static_cast<Out>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+    }
+    input_data += last_dim;
+    output_data += last_dim;
+  }
+}
 
-    FixedPoint0 shifted_scale =
-        one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one));
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+// Looks up each element of <indices> in <table>, returns them in a vector.
+inline uint8x16_t aarch64_lookup_vector(const uint8x16x4_t table[4], uint8x16_t indices)
+{
+  // Look up in 1st quarter of the table: top 2 bits of indices == 00
+  uint8x16_t output1 = vqtbl4q_u8(table[0], indices);
+  // Look up in 2nd quarter of the table: top 2 bits of indices == 01
+  uint8x16_t output2 = vqtbl4q_u8(table[1], veorq_u8(indices, vdupq_n_u8(0x40)));
+  // Look up in 3rd quarter of the table: top 2 bits of indices == 10
+  uint8x16_t output3 = vqtbl4q_u8(table[2], veorq_u8(indices, vdupq_n_u8(0x80)));
+  // Look up in 4th quarter of the table: top 2 bits of indices == 11
+  uint8x16_t output4 = vqtbl4q_u8(table[3], veorq_u8(indices, vdupq_n_u8(0xc0)));
 
-    for (int c = 0; c < depth; ++c)
+  // Combine result of the 4 lookups.
+  return vorrq_u8(vorrq_u8(output1, output2), vorrq_u8(output3, output4));
+}
+
+inline void PopulateSoftmaxUInt8LookupTable(uint8_t *uint8_table1, uint8_t *uint8_table2,
+                                            float input_scale, float beta)
+{
+  const float scale = input_scale * beta;
+  const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+  const int32_t max_uint16 = std::numeric_limits<uint16_t>::max();
+
+  for (int32_t val = 0; val <= max_uint8; ++val)
+  {
+    float input_to_exp = scale * (val - max_uint8);
+    int32_t temp = static_cast<int>(expf(input_to_exp) * max_uint16 + 0.5);
+    temp = std::min(max_uint16, temp);
+    uint8_t part1 = temp >> 8;
+    uint8_t part2 = temp & 0xff;
+    uint8_table1[val] = static_cast<uint8_t>(part1);
+    uint8_table2[val] = static_cast<uint8_t>(part2);
+  }
+}
+
+inline int FindMaxValue(int size, const uint8_t *input_data, uint8_t offset)
+{
+  int32_t max_val = std::numeric_limits<uint8_t>::min();
+  int j = 0;
+
+  uint8x16_t max_val_dup = vdupq_n_u8(max_val);
+  uint8x16_t offset_dup = vdupq_n_u8(offset);
+  for (; j <= size - 16; j += 16)
+  {
+    uint8x16_t input_value = vld1q_u8(input_data + j);
+    input_value = veorq_u8(input_value, offset_dup);
+    max_val_dup = vmaxq_u8(input_value, max_val_dup);
+  }
+  max_val = std::max(max_val, static_cast<int32_t>(vmaxvq_u8(max_val_dup)));
+
+  for (; j < size; ++j)
+  {
+    max_val = std::max(max_val, static_cast<int32_t>(input_data[j] ^ offset));
+  }
+  return max_val;
+}
+
+#ifdef USE_NEON
+// Value_to_store layout:
+// [high_high, high_low, low_high, low_low].
+inline void StoreValue(int32x4x4_t value_to_store, int8_t *output)
+{
+  const int16x8_t result_1 =
+    vcombine_s16(vqmovn_s32(value_to_store.val[1]), vqmovn_s32(value_to_store.val[0]));
+  const int16x8_t result_2 =
+    vcombine_s16(vqmovn_s32(value_to_store.val[3]), vqmovn_s32(value_to_store.val[2]));
+  const int8x16_t result = vcombine_s8(vqmovn_s16(result_2), vqmovn_s16(result_1));
+  vst1q_s8(output, result);
+}
+
+// Value_to_store layout:
+// [high_high, high_low, low_high, low_low].
+inline void StoreValue(int32x4x4_t value_to_store, uint8_t *output)
+{
+  const uint16x8_t result_1 =
+    vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[1])),
+                 vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[0])));
+  const uint16x8_t result_2 =
+    vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[3])),
+                 vqmovn_u32(vreinterpretq_u32_s32(value_to_store.val[2])));
+  const uint8x16_t result = vcombine_u8(vqmovn_u16(result_2), vqmovn_u16(result_1));
+  vst1q_u8(output, result);
+}
+
+#endif
+
+template <typename In, typename Out>
+inline void SoftmaxInt8LUT(const SoftmaxParams &params, const Shape &input_shape,
+                           const In *input_data, const Shape &output_shape, Out *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int excluding_last_dim = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int last_dim = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  const int32_t clamp_max = std::numeric_limits<Out>::max();
+  const int32_t clamp_min = std::numeric_limits<Out>::min();
+
+  // Offset is used to interpret the input data "correctly".
+  // If the input is uint8, the data will be unchanged.
+  // If the input is int8, since it will be reinterpret as uint8.
+  // e.g.,
+  // int8 127 will be applied "offset" to become 255 in uint8.
+  uint8_t offset = 0;
+  if (std::is_same<In, int8_t>::value)
+  {
+    offset = 0x80;
+  }
+
+  const uint8_t *input_data_uint = reinterpret_cast<const uint8_t *>(input_data);
+
+  // This code uses ARM64-only instructions.
+  // TODO(b/143709993): Port to ARMv7
+
+  // Load the tables into registers. (4*4 128-bit registers)
+  uint8x16x4_t table1[4];
+  table1[0] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 0);
+  table1[1] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 1);
+  table1[2] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 2);
+  table1[3] = vld1q_u8_x4(params.uint8_table1 + 16 * 4 * 3);
+
+  uint8x16x4_t table2[4];
+  table2[0] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 0);
+  table2[1] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 1);
+  table2[2] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 2);
+  table2[3] = vld1q_u8_x4(params.uint8_table2 + 16 * 4 * 3);
+
+  for (int i = 0; i < excluding_last_dim; ++i)
+  {
+    // Find max quantized value.
+    int32_t max_val = FindMaxValue(last_dim, input_data_uint, offset);
+
+    int32_t sum_exp = 0;
+    const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+    const uint8_t table_offset = max_uint8 - max_val;
+
+    // Calculate normalizer sum(exp(x)).
+    int sum_j = 0;
+    uint8x16_t table_offset_dup = vdupq_n_u8(table_offset);
+    uint8x16_t offset_dup = vdupq_n_u8(offset);
+    uint32x4_t sum_4 = vdupq_n_u32(0);
+    const int multiplier_shift = 8;
+    for (; sum_j <= last_dim - 16; sum_j += 16)
+    {
+      uint8x16_t input_value = vld1q_u8(input_data_uint + sum_j);
+      input_value = veorq_u8(input_value, offset_dup);
+      input_value = vaddq_u8(input_value, table_offset_dup);
+
+      const uint8x16_t output1 = aarch64_lookup_vector(table1, input_value);
+      const uint8x16_t output2 = aarch64_lookup_vector(table2, input_value);
+
+      uint16x8_t exp_value1 = vshll_n_u8(vget_high_u8(output1), multiplier_shift);
+      uint16x8_t exp_value2 = vshll_n_u8(vget_low_u8(output1), multiplier_shift);
+
+      exp_value1 = vaddw_u8(exp_value1, vget_high_u8(output2));
+      exp_value2 = vaddw_u8(exp_value2, vget_low_u8(output2));
+
+      sum_4 = vpadalq_u16(sum_4, exp_value1);
+      sum_4 = vpadalq_u16(sum_4, exp_value2);
+    }
+    int temp = vgetq_lane_u32(sum_4, 0) + vgetq_lane_u32(sum_4, 1) + vgetq_lane_u32(sum_4, 2) +
+               vgetq_lane_u32(sum_4, 3);
+    sum_exp += temp;
+
+    for (; sum_j < last_dim; ++sum_j)
     {
-      int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min)
-      {
-        const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
-            input_diff, input_beta_multiplier, input_beta_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-
-        FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
-        int32_t unsat_output = gemmlowp::RoundingDivideByPOT((shifted_scale * exp_in_0).raw(),
-                                                             num_bits_over_unit + 31 - 8);
-
-        output_data[i * depth + c] = static_cast<uint8_t>(
-            std::max(std::min(unsat_output, static_cast<int32_t>(255)), static_cast<int32_t>(0)));
-      }
-      else
-      {
-        output_data[i * depth + c] = 0;
-      }
+      const uint8_t index = (input_data_uint[sum_j] ^ offset) + table_offset;
+
+      uint8_t part1 = params.uint8_table1[index];
+      uint8_t part2 = params.uint8_table2[index];
+      sum_exp += ((part1 << 8) + part2);
+    }
+
+    const float inv_sum_exp = 1.0f / (sum_exp * params.scale);
+
+    int32_t multiplier, shift;
+    QuantizeMultiplier(inv_sum_exp, &multiplier, &shift);
+
+    // Normalize and quantize probabilities.
+    int j = 0;
+    const int32x4_t output_zp_dup = vdupq_n_s32(params.zero_point);
+    const int32x4_t max_val_dup = vdupq_n_s32(clamp_max);
+    const int32x4_t min_val_dup = vdupq_n_s32(clamp_min);
+
+    for (; j <= last_dim - 16; j += 16)
+    {
+      uint8x16_t input_value = vld1q_u8(input_data_uint + j);
+      input_value = veorq_u8(input_value, offset_dup);
+      input_value = vaddq_u8(input_value, table_offset_dup);
+
+      const uint8x16_t output1 = aarch64_lookup_vector(table1, input_value);
+      const uint8x16_t output2 = aarch64_lookup_vector(table2, input_value);
+
+      uint16x8_t exp_value1 = vshll_n_u8(vget_high_u8(output1), multiplier_shift);
+      uint16x8_t exp_value2 = vshll_n_u8(vget_low_u8(output1), multiplier_shift);
+
+      exp_value1 = vaddw_u8(exp_value1, vget_high_u8(output2));
+      exp_value2 = vaddw_u8(exp_value2, vget_low_u8(output2));
+
+      int32x4x4_t output_value;
+      output_value.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(exp_value1)));
+      output_value.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(exp_value1)));
+      output_value.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(exp_value2)));
+      output_value.val[3] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(exp_value2)));
+
+      int32x4x4_t temp_val = MultiplyByQuantizedMultiplier4Rows(output_value, multiplier, shift);
+
+      temp_val.val[0] = vaddq_s32(temp_val.val[0], output_zp_dup);
+      temp_val.val[1] = vaddq_s32(temp_val.val[1], output_zp_dup);
+      temp_val.val[2] = vaddq_s32(temp_val.val[2], output_zp_dup);
+      temp_val.val[3] = vaddq_s32(temp_val.val[3], output_zp_dup);
+
+      temp_val.val[0] = vmaxq_s32(vminq_s32(temp_val.val[0], max_val_dup), min_val_dup);
+      temp_val.val[1] = vmaxq_s32(vminq_s32(temp_val.val[1], max_val_dup), min_val_dup);
+      temp_val.val[2] = vmaxq_s32(vminq_s32(temp_val.val[2], max_val_dup), min_val_dup);
+      temp_val.val[3] = vmaxq_s32(vminq_s32(temp_val.val[3], max_val_dup), min_val_dup);
+
+      StoreValue(temp_val, output_data + j);
+    }
+    for (; j < last_dim; ++j)
+    {
+      const uint8_t index = (input_data_uint[j] ^ offset) + table_offset;
+      const uint8_t part1 = params.uint8_table1[index];
+      const uint8_t part2 = params.uint8_table2[index];
+      const int32_t exp_value = (part1 << 8) + part2;
+      const int32_t output_value = MultiplyByQuantizedMultiplier(exp_value, multiplier, shift);
+
+      output_data[j] = static_cast<Out>(
+        std::max(std::min(clamp_max, output_value + params.zero_point), clamp_min));
     }
+    input_data_uint += last_dim;
+    output_data += last_dim;
   }
 }
+#endif
 
 } // namespace cker
 } // namespace nnfw
diff --git a/compute/cker/include/cker/operation/SpaceToBatchND.h b/compute/cker/include/cker/operation/SpaceToBatchND.h
index feeb358c9..aff36e2f3 100644
--- a/compute/cker/include/cker/operation/SpaceToBatchND.h
+++ b/compute/cker/include/cker/operation/SpaceToBatchND.h
@@ -79,9 +79,9 @@ inline void SpaceToBatchND(const SpaceToBatchParams &params, const Shape &unexte
         else
         {
           const T *in =
-              input_data + Offset(input_shape, input_batch,
-                                  (out_h * block_shape_height + shift_h) - padding_top,
-                                  (out_w * block_shape_width + shift_w) - padding_left, 0);
+            input_data + Offset(input_shape, input_batch,
+                                (out_h * block_shape_height + shift_h) - padding_top,
+                                (out_w * block_shape_width + shift_w) - padding_left, 0);
           memcpy(out, in, depth * sizeof(T));
         }
       }
diff --git a/compute/cker/include/cker/operation/StatelessRandomUniform.h b/compute/cker/include/cker/operation/StatelessRandomUniform.h
index d5952ae23..dcf649ca1 100644
--- a/compute/cker/include/cker/operation/StatelessRandomUniform.h
+++ b/compute/cker/include/cker/operation/StatelessRandomUniform.h
@@ -72,8 +72,8 @@ void Fill(random::PhiloxRandom random, Tensor *output)
                                                     Distribution());
 }
 
-inline void StatelessRandomUniform(const Shape &shape_shape, const int *shape_data,
-                                   const Shape &seed_shape, const int *seed_data,
+inline void StatelessRandomUniform(const Shape &shape_shape, const int32_t *shape_data,
+                                   const Shape &seed_shape, const int32_t *seed_data,
                                    const Shape &output_shape, float *output_data)
 {
   Tensor shape_t;
@@ -95,7 +95,7 @@ inline void StatelessRandomUniform(const Shape &shape_shape, const int *shape_da
   GenerateKey(seed_t, &key, &counter);
 
   Fill<Eigen::ThreadPoolDevice, random::UniformDistribution<random::PhiloxRandom, float>>(
-      random::PhiloxRandom(counter, key), &output_t);
+    random::PhiloxRandom(counter, key), &output_t);
 }
 } // namespace cker
 } // namespace nnfw
diff --git a/compute/cker/include/cker/operation/StridedSlice.h b/compute/cker/include/cker/operation/StridedSlice.h
index c57b4daa0..2f1089575 100644
--- a/compute/cker/include/cker/operation/StridedSlice.h
+++ b/compute/cker/include/cker/operation/StridedSlice.h
@@ -260,12 +260,41 @@ template <typename T>
 inline void StridedSlice(const StridedSliceParams &op_params, const Shape &unextended_input_shape,
                          const T *input_data, const Shape &unextended_output_shape, T *output_data)
 {
-  // Note that the output_shape is not used herein.
-  StridedSliceParams params_copy = op_params;
-
   assert(unextended_input_shape.DimensionsCount() <= 4);
   assert(unextended_output_shape.DimensionsCount() <= 4);
 
+  bool optimize = true;
+  int st_count = op_params.strides_count;
+  for (int idx = 0; idx < st_count - 1; idx++)
+  {
+    const int axis_size = unextended_input_shape.Dims(idx);
+    const int start = StartForAxis(op_params, unextended_input_shape, idx);
+    const int stop = StopForAxis(op_params, unextended_input_shape, idx, start);
+    if ((axis_size != 1) && (start != 0 || stop != 0))
+    {
+      optimize = false;
+      break;
+    }
+  }
+
+  if (optimize)
+  {
+    if (op_params.strides[st_count - 1] == 1)
+    {
+      const int start = StartForAxis(op_params, unextended_input_shape, st_count - 1);
+      const int end = StopForAxis(op_params, unextended_input_shape, st_count - 1, start);
+
+      for (int idx = 0; idx < end - start; idx++)
+      {
+        output_data[idx] = input_data[idx + start];
+      }
+      return;
+    }
+  }
+
+  // Note that the output_shape is not used herein.
+  StridedSliceParams params_copy = op_params;
+
   const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
   const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
 
diff --git a/compute/cker/include/cker/operation/Tile.h b/compute/cker/include/cker/operation/Tile.h
index 1dcdd9b79..42433468a 100644
--- a/compute/cker/include/cker/operation/Tile.h
+++ b/compute/cker/include/cker/operation/Tile.h
@@ -55,7 +55,7 @@ std::pair<int, int> TileOneDimension(const Shape &in_dimensions, const T *in_dat
   {
     int stride_size = 0, tiled_stride_size = 0;
     std::tie(stride_size, tiled_stride_size) =
-        TileOneDimension(in_dimensions, copy_from_data, multipliers, copy_to_data, dimension + 1);
+      TileOneDimension(in_dimensions, copy_from_data, multipliers, copy_to_data, dimension + 1);
     copy_from_data += stride_size;
     copy_to_data += tiled_stride_size;
     total_stride_size += stride_size;
diff --git a/compute/cker/include/cker/operation/Transpose.h b/compute/cker/include/cker/operation/Transpose.h
index 9d8cd340d..52c826c39 100644
--- a/compute/cker/include/cker/operation/Transpose.h
+++ b/compute/cker/include/cker/operation/Transpose.h
@@ -288,7 +288,7 @@ size_t Flatten(const Shape &input_shape, const Shape &output_shape, const Transp
   return flat_size;
 }
 
-} // namespace anonymous (util)
+} // namespace
 
 // Transpose2D only deals with typical 2D matrix transpose ops.
 // Perform transpose by transposing 4x4 blocks of the input, proceeding from
@@ -555,9 +555,9 @@ void Transpose(const TransposeParams &unshrunk_params, const Shape &unshrunk_inp
     const int total_size = shrunk_input_shape.FlatSize();
 
     const int non_flatten_size =
-        Flatten(shrunk_input_shape, shrunk_output_shape, shrunk_params,
+      Flatten(shrunk_input_shape, shrunk_output_shape, shrunk_params,
 
-                &non_flatten_input_shape, &non_flatten_output_shape, &non_flatten_params);
+              &non_flatten_input_shape, &non_flatten_output_shape, &non_flatten_params);
     assert(non_flatten_params.perm[0] != 0);
 
     for (int i = 0; i < total_size; i += non_flatten_size)
diff --git a/compute/cker/include/cker/operation/TransposeConv.h b/compute/cker/include/cker/operation/TransposeConv.h
index 7db3a1179..d41f86047 100644
--- a/compute/cker/include/cker/operation/TransposeConv.h
+++ b/compute/cker/include/cker/operation/TransposeConv.h
@@ -90,11 +90,11 @@ inline void TransposeConv(const TransposeConvParams &params, const Shape &input_
                     (out_y < output_height))
                 {
                   float input_value =
-                      input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
-                  float filter_value = filter_data[Offset(filter_shape, out_channel, filter_y,
-                                                          filter_x, in_channel)];
+                    input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+                  float filter_value =
+                    filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
                   output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] +=
-                      input_value * filter_value;
+                    input_value * filter_value;
                 }
               }
             }
diff --git a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
index ac5069917..1fe3e1517 100644
--- a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
@@ -19,6 +19,8 @@
 #define __NNFW_CKER_OPTIMIZED_BINARYARITHMETICOPS_H__
 
 #include <functional>
+#include <limits>
+#include <utility>
 #include "cker/neon/neon_check.h"
 #include "cker/operation/reference/BinaryArithmeticOps.h"
 #include "cker/Shape.h"
@@ -33,8 +35,9 @@ namespace cker
 namespace optimized
 {
 
+/* Old version: For Sub(float) and Div. */
 template <typename ElementwiseF, typename ScalarBroadcastF, typename T>
-inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &params,
+inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &params, bool switch_inputs,
                                     const Shape & /* unswitched_input1_shape */,
                                     const T *unswitched_input1_data,
                                     const Shape & /* unswitched_input2_shape */,
@@ -42,11 +45,8 @@ inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &params,
                                     const Shape & /* output_shape */, T *output_data,
                                     ElementwiseF elementwise_f, ScalarBroadcastF scalar_broadcast_f)
 {
-  const bool use_unswitched =
-      params.broadcast_category == BroadcastableOpCategory::kFirstInputBroadcastsFast;
-
-  const T *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
-  const T *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+  const T *input1_data = switch_inputs ? unswitched_input2_data : unswitched_input1_data;
+  const T *input2_data = switch_inputs ? unswitched_input1_data : unswitched_input2_data;
 
   // Fivefold nested loops. The second input resets its position for each
   // iteration of the second loop. The first input resets its position at the
@@ -123,29 +123,129 @@ inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &params,
   }
 }
 
-inline int32_t quant8_sum(const BinaryArithmeticOpParam &params, const uint8_t input1_data,
-                          const uint8_t input2_data)
+// New version: For Mul, Add and Sub(quant8)
+template <typename ElementwiseF, typename ScalarBroadcastF, typename T>
+inline void BinaryBroadcastFiveFold(const BinaryArithmeticOpParam &unswitched_params,
+                                    const Shape & /* unswitched_input1_shape */,
+                                    const T *unswitched_input1_data,
+                                    const Shape & /* unswitched_input2_shape */,
+                                    const T *unswitched_input2_data,
+                                    const Shape & /* output_shape */, T *output_data,
+                                    ElementwiseF elementwise_f, ScalarBroadcastF scalar_broadcast_f)
+{
+  BinaryArithmeticOpParam switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
+  switched_params.input1_shift = unswitched_params.input2_shift;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
+  switched_params.input2_shift = unswitched_params.input1_shift;
+
+  const bool use_unswitched =
+    unswitched_params.broadcast_category == BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const BinaryArithmeticOpParam &params = use_unswitched ? unswitched_params : switched_params;
+  const T *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const T *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise add of
+  // sections of the arrays.
+  T *output_data_ptr = output_data;
+  const T *input1_data_ptr = input1_data;
+  const T *input2_data_reset = input2_data;
+  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
+  // between input shapes. y3 for input 1 is always broadcast, and so the
+  // dimension there is 1, whereas optionally y1 might be broadcast for
+  // input 2. Put another way, input1.shape.FlatSize = y0 * y1 * y2 * y4,
+  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  if (y4 > 1)
+  {
+    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
+    // dimension.
+    for (int i0 = 0; i0 < y0; ++i0)
+    {
+      const T *input2_data_ptr = nullptr;
+      for (int i1 = 0; i1 < y1; ++i1)
+      {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2)
+        {
+          for (int i3 = 0; i3 < y3; ++i3)
+          {
+            elementwise_f(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+            input2_data_ptr += y4;
+            output_data_ptr += y4;
+          }
+          // We have broadcast y4 of input1 data y3 times, and now move on.
+          input1_data_ptr += y4;
+        }
+      }
+      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
+      input2_data_reset = input2_data_ptr;
+    }
+  }
+  else
+  {
+    // Special case of y4 == 1, in which the innermost loop is a single
+    // element and can be combined with the next (y3) as an inner broadcast.
+    //
+    // Note that this handles the case of pure scalar broadcast when
+    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
+    // broadcast with batch (as y2 > 1).
+    //
+    // NOTE The process is the same as the above general case except
+    // simplified for y4 == 1 and the loop over y3 is contained within the
+    // AddScalarBroadcast function.
+    for (int i0 = 0; i0 < y0; ++i0)
+    {
+      const T *input2_data_ptr = nullptr;
+      for (int i1 = 0; i1 < y1; ++i1)
+      {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2)
+        {
+          scalar_broadcast_f(y3, params, *input1_data_ptr, input2_data_ptr, output_data_ptr);
+          input2_data_ptr += y3;
+          output_data_ptr += y3;
+          input1_data_ptr += 1;
+        }
+      }
+      input2_data_reset = input2_data_ptr;
+    }
+  }
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value, int32_t>
+quant8_sum(const BinaryArithmeticOpParam &params, const T input1_data, const T input2_data)
 {
   const int32_t input1_val = params.input1_offset + input1_data;
   const int32_t input2_val = params.input2_offset + input2_data;
   const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
   const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
   const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-      shifted_input1_val, params.input1_multiplier, params.input1_shift);
+    shifted_input1_val, params.input1_multiplier, params.input1_shift);
   const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-      shifted_input2_val, params.input2_multiplier, params.input2_shift);
+    shifted_input2_val, params.input2_multiplier, params.input2_shift);
   const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
   const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                                 raw_sum, params.output_multiplier, params.output_shift) +
+                               raw_sum, params.output_multiplier, params.output_shift) +
                              params.output_offset;
   const int32_t clamped_output = std::min(params.quantized_activation_max,
                                           std::max(params.quantized_activation_min, raw_output));
   return clamped_output;
 }
 
-inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam &params,
-                                 const uint8_t *input1_data, const uint8_t *input2_data,
-                                 uint8_t *output_data)
+inline void AddElementwise(int size, const BinaryArithmeticOpParam &params,
+                           const uint8_t *input1_data, const uint8_t *input2_data,
+                           uint8_t *output_data)
 {
   int i = 0;
 
@@ -193,9 +293,9 @@ inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
     const int16x4_t s1_narrowed = vmovn_s32(s1);
     const int16x4_t s2_narrowed = vmovn_s32(s2);
     const int16x8_t s =
-        vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
-    const uint8x8_t clamped = vmax_u8(output_activation_min_vector,
-                                      vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
+      vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
+    const uint8x8_t clamped =
+      vmax_u8(output_activation_min_vector, vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
     vst1_u8(output_data + i, clamped);
   }
 #endif // NEON
@@ -206,12 +306,12 @@ inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
     const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
     const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
     const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-        shifted_input1_val, params.input1_multiplier, params.input1_shift);
+      shifted_input1_val, params.input1_multiplier, params.input1_shift);
     const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-        shifted_input2_val, params.input2_multiplier, params.input2_shift);
+      shifted_input2_val, params.input2_multiplier, params.input2_shift);
     const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
     const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                                   raw_sum, params.output_multiplier, params.output_shift) +
+                                 raw_sum, params.output_multiplier, params.output_shift) +
                                params.output_offset;
     const int32_t clamped_output = std::min(params.quantized_activation_max,
                                             std::max(params.quantized_activation_min, raw_output));
@@ -220,7 +320,248 @@ inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
 }
 
 inline void AddElementwise(int size, const BinaryArithmeticOpParam &params,
-                           const float *input1_data, const float *input2_data, float *output_data)
+                           const int8_t *input1_data, const int8_t *input2_data,
+                           int8_t *output_data)
+{
+  int i = 0;
+#ifdef USE_NEON
+  const int8x16_t output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+  const int8x16_t output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+
+  const int input1_left_shift = params.left_shift + params.input1_shift;
+  const int input2_left_shift = params.left_shift + params.input2_shift;
+  const int32x4_t input1_left_dup = vdupq_n_s32(input1_left_shift);
+  const int32x4_t input2_left_dup = vdupq_n_s32(input2_left_shift);
+
+  const int16x8_t input1_offset_dup = vdupq_n_s16(params.input1_offset);
+  const int16x8_t input2_offset_dup = vdupq_n_s16(params.input2_offset);
+
+  for (; i <= size - 16; i += 16)
+  {
+    const int8x16_t input1_val_original = vld1q_s8(input1_data + i);
+    const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+
+    const int16x8_t input1_val_s16_high = vmovl_s8(vget_high_s8(input1_val_original));
+    const int16x8_t input1_val_s16_low = vmovl_s8(vget_low_s8(input1_val_original));
+
+    const int16x8_t input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+    const int16x8_t input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+    const int16x8_t input1_val_high = vaddq_s16(input1_val_s16_high, input1_offset_dup);
+    const int16x8_t input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_dup);
+    const int16x8_t input1_val_low = vaddq_s16(input1_val_s16_low, input1_offset_dup);
+    const int16x8_t input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_dup);
+    const int16x4_t input1_val_high_high = vget_high_s16(input1_val_high);
+    const int16x4_t input1_val_high_low = vget_low_s16(input1_val_high);
+    const int16x4_t input1_val_low_high = vget_high_s16(input1_val_low);
+    const int16x4_t input1_val_low_low = vget_low_s16(input1_val_low);
+    const int16x4_t input2_val_high_high = vget_high_s16(input2_val_high);
+    const int16x4_t input2_val_high_low = vget_low_s16(input2_val_high);
+    const int16x4_t input2_val_low_high = vget_high_s16(input2_val_low);
+    const int16x4_t input2_val_low_low = vget_low_s16(input2_val_low);
+    int32x4_t x111 = vmovl_s16(input1_val_low_low);
+    int32x4_t x112 = vmovl_s16(input1_val_low_high);
+    int32x4_t x121 = vmovl_s16(input1_val_high_low);
+    int32x4_t x122 = vmovl_s16(input1_val_high_high);
+    int32x4_t x211 = vmovl_s16(input2_val_low_low);
+    int32x4_t x212 = vmovl_s16(input2_val_low_high);
+    int32x4_t x221 = vmovl_s16(input2_val_high_low);
+    int32x4_t x222 = vmovl_s16(input2_val_high_high);
+
+    x111 = vshlq_s32(x111, input1_left_dup);
+    x112 = vshlq_s32(x112, input1_left_dup);
+    x121 = vshlq_s32(x121, input1_left_dup);
+    x122 = vshlq_s32(x122, input1_left_dup);
+    x211 = vshlq_s32(x211, input2_left_dup);
+    x212 = vshlq_s32(x212, input2_left_dup);
+    x221 = vshlq_s32(x221, input2_left_dup);
+    x222 = vshlq_s32(x222, input2_left_dup);
+    x111 = vqrdmulhq_n_s32(x111, params.input1_multiplier);
+    x112 = vqrdmulhq_n_s32(x112, params.input1_multiplier);
+    x121 = vqrdmulhq_n_s32(x121, params.input1_multiplier);
+    x122 = vqrdmulhq_n_s32(x122, params.input1_multiplier);
+    x211 = vqrdmulhq_n_s32(x211, params.input2_multiplier);
+    x212 = vqrdmulhq_n_s32(x212, params.input2_multiplier);
+    x221 = vqrdmulhq_n_s32(x221, params.input2_multiplier);
+    x222 = vqrdmulhq_n_s32(x222, params.input2_multiplier);
+    int32x4_t s11 = vaddq_s32(x111, x211);
+    int32x4_t s12 = vaddq_s32(x112, x212);
+    int32x4_t s21 = vaddq_s32(x121, x221);
+    int32x4_t s22 = vaddq_s32(x122, x222);
+    s11 = vqrdmulhq_n_s32(s11, params.output_multiplier);
+    s12 = vqrdmulhq_n_s32(s12, params.output_multiplier);
+    s21 = vqrdmulhq_n_s32(s21, params.output_multiplier);
+    s22 = vqrdmulhq_n_s32(s22, params.output_multiplier);
+    using gemmlowp::RoundingDivideByPOT;
+    s11 = RoundingDivideByPOT(s11, -params.output_shift);
+    s12 = RoundingDivideByPOT(s12, -params.output_shift);
+    s21 = RoundingDivideByPOT(s21, -params.output_shift);
+    s22 = RoundingDivideByPOT(s22, -params.output_shift);
+    const int16x4_t s11_narrowed = vmovn_s32(s11);
+    const int16x4_t s12_narrowed = vmovn_s32(s12);
+    const int16x4_t s21_narrowed = vmovn_s32(s21);
+    const int16x4_t s22_narrowed = vmovn_s32(s22);
+    const int16x8_t s1 =
+      vaddq_s16(vcombine_s16(s11_narrowed, s12_narrowed), vdupq_n_s16(params.output_offset));
+    const int16x8_t s2 =
+      vaddq_s16(vcombine_s16(s21_narrowed, s22_narrowed), vdupq_n_s16(params.output_offset));
+    const int8x16_t s = vcombine_s8(vqmovn_s16(s1), vqmovn_s16(s2));
+
+    const int8x16_t clamped =
+      vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, s));
+    vst1q_s8(output_data + i, clamped);
+  }
+#endif // NEON
+
+  for (; i < size; ++i)
+  {
+    const int32_t input1_val = params.input1_offset + input1_data[i];
+    const int32_t input2_val = params.input2_offset + input2_data[i];
+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+    const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, params.input1_multiplier, params.input1_shift);
+    const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input2_val, params.input2_multiplier, params.input2_shift);
+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                                 raw_sum, params.output_multiplier, params.output_shift) +
+                               params.output_offset;
+    const int32_t clamped_output = std::min(params.quantized_activation_max,
+                                            std::max(params.quantized_activation_min, raw_output));
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
+struct BinaryOpFuncAddFloat
+{
+#ifdef USE_NEON
+  static inline float32x4_t calculate(const float32x4_t &a, const float32x4_t &b)
+  {
+    return vaddq_f32(a, b);
+  }
+#endif // USE_NEON
+  static inline float calculate(const float a, const float b) { return a + b; }
+};
+
+struct BinaryOpFuncSubFloat
+{
+#ifdef USE_NEON
+  static inline float32x4_t calculate(const float32x4_t &a, const float32x4_t &b)
+  {
+    return vsubq_f32(a, b);
+  }
+#endif // USE_NEON
+  static inline float calculate(const float a, const float b) { return a - b; }
+};
+
+struct BinaryOpFuncMulFloat
+{
+#ifdef USE_NEON
+  static inline float32x4_t calculate(const float32x4_t &a, const float32x4_t &b)
+  {
+    return vmulq_f32(a, b);
+  }
+#endif // USE_NEON
+  static inline float calculate(const float a, const float b) { return a * b; }
+};
+
+struct BinaryOpFuncDivFloat
+{
+#ifdef USE_NEON
+#ifdef __aarch64__
+  static inline float32x4_t calculate(const float32x4_t &a, const float32x4_t &b)
+  {
+    return vdivq_f32(a, b);
+  }
+#endif // __aarch64__
+#endif // USE_NEON
+  static inline float calculate(const float a, const float b) { return a / b; }
+};
+
+template <class BASEOPERATOR> struct BinaryOpFuncSwapArgs
+{
+  template <typename T> static inline T calculate(const T &a, const T &b)
+  {
+    return BASEOPERATOR::calculate(b, a);
+  }
+};
+
+struct BinaryOpActivationFloatNone
+{
+#ifdef USE_NEON
+  static inline float32x4_t applyCeiling(const float32x4_t &value, const float32x4_t &ceilingParam)
+  {
+    (void)ceilingParam; // suppress unused argument warning
+    return value;
+  }
+  static inline float32x4_t applyFloor(const float32x4_t &value, const float32x4_t &floorParam)
+  {
+    (void)floorParam;
+    return value;
+  }
+#endif // USE_NEON
+  static inline float applyCeiling(const float value, const float ceilingParam)
+  {
+    (void)ceilingParam;
+    return value;
+  }
+  static inline float applyFloor(const float value, const float floorParam)
+  {
+    (void)floorParam;
+    return value;
+  }
+};
+
+struct BinaryOpActivationFloatMax
+{
+#ifdef USE_NEON
+  static inline float32x4_t applyCeiling(const float32x4_t &value, const float32x4_t &ceilingParam)
+  {
+    (void)ceilingParam; // suppress unused argument warning
+    return value;
+  }
+  static inline float32x4_t applyFloor(const float32x4_t &value, const float32x4_t &floorParam)
+  {
+    return vmaxq_f32(value, floorParam);
+  }
+#endif // USE_NEON
+  static inline float applyCeiling(const float value, const float ceilingParam)
+  {
+    (void)ceilingParam;
+    return value;
+  }
+  static inline float applyFloor(const float value, const float floorParam)
+  {
+    return std::max(value, floorParam);
+  }
+};
+
+struct BinaryOpActivationFloatMinMax
+{
+#ifdef USE_NEON
+  static inline float32x4_t applyCeiling(const float32x4_t &value, const float32x4_t &ceilingParam)
+  {
+    return vminq_f32(value, ceilingParam);
+  }
+  static inline float32x4_t applyFloor(const float32x4_t &value, const float32x4_t &floorParam)
+  {
+    return vmaxq_f32(value, floorParam);
+  }
+#endif // USE_NEON
+  static inline float applyCeiling(const float value, const float ceilingParam)
+  {
+    return std::min(value, ceilingParam);
+  }
+  static inline float applyFloor(const float value, const float floorParam)
+  {
+    return std::max(value, floorParam);
+  }
+};
+
+template <class OPERATOR, class ACTIVATION>
+inline void BinaryOpElementwise(int size, const BinaryArithmeticOpParam &params,
+                                const float *input1_data, const float *input2_data,
+                                float *output_data)
 {
   int i = 0;
 
@@ -237,18 +578,18 @@ inline void AddElementwise(int size, const BinaryArithmeticOpParam &params,
     auto a21 = vld1q_f32(input2_data + i + 4);
     auto a22 = vld1q_f32(input2_data + i + 8);
     auto a23 = vld1q_f32(input2_data + i + 12);
-    auto x0 = vaddq_f32(a10, a20);
-    auto x1 = vaddq_f32(a11, a21);
-    auto x2 = vaddq_f32(a12, a22);
-    auto x3 = vaddq_f32(a13, a23);
-    x0 = vmaxq_f32(activation_min, x0);
-    x1 = vmaxq_f32(activation_min, x1);
-    x2 = vmaxq_f32(activation_min, x2);
-    x3 = vmaxq_f32(activation_min, x3);
-    x0 = vminq_f32(activation_max, x0);
-    x1 = vminq_f32(activation_max, x1);
-    x2 = vminq_f32(activation_max, x2);
-    x3 = vminq_f32(activation_max, x3);
+    auto x0 = OPERATOR::calculate(a10, a20);
+    auto x1 = OPERATOR::calculate(a11, a21);
+    auto x2 = OPERATOR::calculate(a12, a22);
+    auto x3 = OPERATOR::calculate(a13, a23);
+    x0 = ACTIVATION::applyFloor(x0, activation_min);
+    x1 = ACTIVATION::applyFloor(x1, activation_min);
+    x2 = ACTIVATION::applyFloor(x2, activation_min);
+    x3 = ACTIVATION::applyFloor(x3, activation_min);
+    x0 = ACTIVATION::applyCeiling(x0, activation_max);
+    x1 = ACTIVATION::applyCeiling(x1, activation_max);
+    x2 = ACTIVATION::applyCeiling(x2, activation_max);
+    x3 = ACTIVATION::applyCeiling(x3, activation_max);
     vst1q_f32(output_data + i, x0);
     vst1q_f32(output_data + i + 4, x1);
     vst1q_f32(output_data + i + 8, x2);
@@ -258,26 +599,101 @@ inline void AddElementwise(int size, const BinaryArithmeticOpParam &params,
   {
     auto a1 = vld1q_f32(input1_data + i);
     auto a2 = vld1q_f32(input2_data + i);
-    auto x = vaddq_f32(a1, a2);
-    x = vmaxq_f32(activation_min, x);
-    x = vminq_f32(activation_max, x);
-    vst1q_f32(output_data + i, x);
+    auto x = OPERATOR::calculate(a1, a2); // vaddq
+    auto x_clamped =
+      ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max);
+    vst1q_f32(output_data + i, x_clamped);
   }
-#endif // NEON
+#endif // USE_NEON
+  for (; i < size; i++)
+  {
+    auto x = OPERATOR::calculate(input1_data[i], input2_data[i]);
+    output_data[i] = ACTIVATION::applyCeiling(
+      ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max);
+  }
+}
+
+// Broadcast binary op template that can often be used for inner loop
+// This function will handle scalar_value (LHS) and vector_values (RHS).
+// Since it's a float function, input params does not matter here.
+template <class OPERATOR, class ACTIVATION>
+inline void BinaryOpScalarBroadcast(int size, const BinaryArithmeticOpParam &params,
+                                    const float broadcast_value, const float *input2_data,
+                                    float *output_data)
+{
+  int i = 0;
+
+#ifdef USE_NEON
+  const auto activation_min = vdupq_n_f32(params.float_activation_min);
+  const auto activation_max = vdupq_n_f32(params.float_activation_max);
+  const auto broadcast_value_dup = vdupq_n_f32(broadcast_value);
+  for (; i <= size - 16; i += 16)
+  {
+    auto a20 = vld1q_f32(input2_data + i);
+    auto a21 = vld1q_f32(input2_data + i + 4);
+    auto a22 = vld1q_f32(input2_data + i + 8);
+    auto a23 = vld1q_f32(input2_data + i + 12);
+    auto x0 = OPERATOR::calculate(broadcast_value_dup, a20);
+    auto x1 = OPERATOR::calculate(broadcast_value_dup, a21);
+    auto x2 = OPERATOR::calculate(broadcast_value_dup, a22);
+    auto x3 = OPERATOR::calculate(broadcast_value_dup, a23);
+    x0 = ACTIVATION::applyFloor(x0, activation_min);
+    x1 = ACTIVATION::applyFloor(x1, activation_min);
+    x2 = ACTIVATION::applyFloor(x2, activation_min);
+    x3 = ACTIVATION::applyFloor(x3, activation_min);
+    x0 = ACTIVATION::applyCeiling(x0, activation_max);
+    x1 = ACTIVATION::applyCeiling(x1, activation_max);
+    x2 = ACTIVATION::applyCeiling(x2, activation_max);
+    x3 = ACTIVATION::applyCeiling(x3, activation_max);
+    vst1q_f32(output_data + i, x0);
+    vst1q_f32(output_data + i + 4, x1);
+    vst1q_f32(output_data + i + 8, x2);
+    vst1q_f32(output_data + i + 12, x3);
+  }
+  for (; i <= size - 4; i += 4)
+  {
+    auto a2 = vld1q_f32(input2_data + i);
+    auto x = OPERATOR::calculate(broadcast_value_dup, a2);
+    auto x_clamped =
+      ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max);
+    vst1q_f32(output_data + i, x_clamped);
+  }
+#endif // USE_NEON
   for (; i < size; i++)
   {
-    auto x = input1_data[i] + input2_data[i];
-    output_data[i] = ActivationFunctionWithMinMax<float>(x, params.float_activation_min,
-                                                         params.float_activation_max);
+    auto x = OPERATOR::calculate(broadcast_value, input2_data[i]);
+    output_data[i] = ACTIVATION::applyCeiling(
+      ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max);
   }
 }
 
-inline void AddQuant8(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                      const uint8_t *input1_data, const Shape &input2_shape,
-                      const uint8_t *input2_data, const Shape &output_shape, uint8_t *output_data)
+using BinaryOpImplFloatFuncs =
+  std::pair<void (*)(int, const BinaryArithmeticOpParam &, const float *, const float *, float *),
+            void (*)(int, const BinaryArithmeticOpParam &, const float, const float *, float *)>;
+
+template <class FUNC>
+inline BinaryOpImplFloatFuncs
+getBinaryOpWithActivationImplFloat(const BinaryArithmeticOpParam &params)
+{
+  if (params.float_activation_max == std::numeric_limits<float>::max())
+    if (params.float_activation_min == std::numeric_limits<float>::lowest())
+      return BinaryOpImplFloatFuncs(BinaryOpElementwise<FUNC, BinaryOpActivationFloatNone>,
+                                    BinaryOpScalarBroadcast<FUNC, BinaryOpActivationFloatNone>);
+    else
+      return BinaryOpImplFloatFuncs(BinaryOpElementwise<FUNC, BinaryOpActivationFloatMax>,
+                                    BinaryOpScalarBroadcast<FUNC, BinaryOpActivationFloatMax>);
+  else
+    return BinaryOpImplFloatFuncs(BinaryOpElementwise<FUNC, BinaryOpActivationFloatMinMax>,
+                                  BinaryOpScalarBroadcast<FUNC, BinaryOpActivationFloatMinMax>);
+}
+
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+Add(const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
+    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data)
 {
   const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  AddElementwiseQuant8(flat_size, params, input1_data, input2_data, output_data);
+  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
 }
 
 inline void Add(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
@@ -285,15 +701,16 @@ inline void Add(const BinaryArithmeticOpParam &params, const Shape &input1_shape
                 const Shape &output_shape, float *output_data)
 {
   const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
+  auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncAddFloat>(params);
+  (*implFuncs.first)(flat_size, params, input1_data, input2_data, output_data);
 }
 
 // Scalar-broadcast add that can be used for inner loop of more general
 // broadcast add, so that, for example, scalar-broadcast with batch will still
 // be fast.
-inline void AddScalarBroadcastQuant8(int size, const BinaryArithmeticOpParam &params,
-                                     uint8_t broadcast_value, const uint8_t *input2_data,
-                                     uint8_t *output_data)
+inline void AddScalarBroadcast(int size, const BinaryArithmeticOpParam &params,
+                               uint8_t broadcast_value, const uint8_t *input2_data,
+                               uint8_t *output_data)
 {
   int i = 0;
   int32_t clamped_output;
@@ -304,58 +721,115 @@ inline void AddScalarBroadcastQuant8(int size, const BinaryArithmeticOpParam &pa
   }
 }
 
-inline void AddScalarBroadcast(int size, const BinaryArithmeticOpParam &params,
-                               float broadcast_value, const float *input2_data, float *output_data)
+// Scalar-broadcast add that can be used for inner loop of more general
+// broadcast add, so that, for example, scalar-broadcast with batch will still
+// be fast.
+inline void AddScalarBroadcast(int size, const BinaryArithmeticOpParam &params, int8_t input1_data,
+                               const int8_t *input2_data, int8_t *output_data)
 {
+  using gemmlowp::RoundingDivideByPOT;
   int i = 0;
 #ifdef USE_NEON
-  const float32x4_t output_activation_min_vector = vdupq_n_f32(params.float_activation_min);
-  const float32x4_t output_activation_max_vector = vdupq_n_f32(params.float_activation_max);
-  const float32x4_t broadcast_value_dup = vdupq_n_f32(broadcast_value);
-  for (; i <= size - 4; i += 4)
-  {
-    const float32x4_t input2_val_original = vld1q_f32(input2_data + i);
+  const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
+  const int8x8_t output_activation_min_vector = vdup_n_s8(params.quantized_activation_min);
+  const int8x8_t output_activation_max_vector = vdup_n_s8(params.quantized_activation_max);
 
-    const float32x4_t output = vaddq_f32(input2_val_original, broadcast_value_dup);
+  // Process broadcast scalar.
+  const int8x8_t input1_val_original = vdup_n_s8(input1_data);
+  const int16x8_t input1_val_s16 = vmovl_s8(input1_val_original);
+  const int16x8_t input1_val = vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
+  const int16x4_t input1_val_high = vget_high_s16(input1_val);
+  const int16x4_t input1_val_low = vget_low_s16(input1_val);
+  int32x4_t x11 = vmovl_s16(input1_val_low);
+  int32x4_t x12 = vmovl_s16(input1_val_high);
+  x11 = vshlq_s32(x11, left_shift_dup);
+  x12 = vshlq_s32(x12, left_shift_dup);
+  x11 = vqrdmulhq_n_s32(x11, params.input1_multiplier);
+  x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
+  const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
+  x11 = vshlq_s32(x11, input1_shift_dup);
+  x12 = vshlq_s32(x12, input1_shift_dup);
 
-    const float32x4_t clamped =
-        vmaxq_f32(output_activation_min_vector, vminq_f32(output_activation_max_vector, output));
-    vst1q_f32(output_data + i, clamped);
+  for (; i <= size - 8; i += 8)
+  {
+    const int8x8_t input2_val_original = vld1_s8(input2_data + i);
+    const int16x8_t input2_val_s16 = vmovl_s8(input2_val_original);
+    const int16x8_t input2_val = vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
+    const int16x4_t input2_val_high = vget_high_s16(input2_val);
+    const int16x4_t input2_val_low = vget_low_s16(input2_val);
+    int32x4_t x21 = vmovl_s16(input2_val_low);
+    int32x4_t x22 = vmovl_s16(input2_val_high);
+    x21 = vshlq_s32(x21, left_shift_dup);
+    x22 = vshlq_s32(x22, left_shift_dup);
+    x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
+    x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
+    const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
+    x21 = vshlq_s32(x21, input2_shift_dup);
+    x22 = vshlq_s32(x22, input2_shift_dup);
+    int32x4_t s1 = vaddq_s32(x11, x21);
+    int32x4_t s2 = vaddq_s32(x12, x22);
+    s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
+    s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
+    s1 = RoundingDivideByPOT(s1, -params.output_shift);
+    s2 = RoundingDivideByPOT(s2, -params.output_shift);
+    const int16x4_t s1_narrowed = vmovn_s32(s1);
+    const int16x4_t s2_narrowed = vmovn_s32(s2);
+    const int16x8_t s =
+      vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
+    const int8x8_t clamped =
+      vmax_s8(output_activation_min_vector, vmin_s8(output_activation_max_vector, vqmovn_s16(s)));
+    vst1_s8(output_data + i, clamped);
   }
 #endif // NEON
-  for (; i < size; ++i)
+
+  if (i < size)
   {
-    auto x = broadcast_value + input2_data[i];
-    output_data[i] = ActivationFunctionWithMinMax<float>(x, params.float_activation_min,
-                                                         params.float_activation_max);
+    // Process broadcast scalar.
+    const int32_t input1_val = params.input1_offset + input1_data;
+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+    const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, params.input1_multiplier, params.input1_shift);
+
+    for (; i < size; ++i)
+    {
+      const int32_t input2_val = params.input2_offset + input2_data[i];
+      const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+      const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+        shifted_input2_val, params.input2_multiplier, params.input2_shift);
+      const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+      const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                                   raw_sum, params.output_multiplier, params.output_shift) +
+                                 params.output_offset;
+      const int32_t clamped_output = std::min(
+        params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output));
+      output_data[i] = static_cast<int8_t>(clamped_output);
+    }
   }
 }
 
-inline void BroadcastAddDispatchQuant8(const BinaryArithmeticOpParam &params,
-                                       const Shape &input1_shape, const uint8_t *input1_data,
-                                       const Shape &input2_shape, const uint8_t *input2_data,
-                                       const Shape &output_shape, uint8_t *output_data)
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastAddDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                     const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                     const Shape &output_shape, T *output_data)
 {
   if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
   {
-    const std::function<uint8_t(const BinaryArithmeticOpParam &, const uint8_t &, const uint8_t &)>
-        fn = [](const BinaryArithmeticOpParam &params, const uint8_t &a,
-                const uint8_t &b) -> uint8_t {
-      return static_cast<uint8_t>(quant8_sum(params, a, b));
-    };
-    reference::BroadcastBinaryArithmeticOpSlowQuant8(params, input1_shape, input1_data,
-                                                     input2_shape, input2_data, output_shape,
-                                                     output_data, fn);
-  }
-  else
-  {
-    BinaryBroadcastFiveFold(
-        params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
-        static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
-                             uint8_t *)>(AddElementwiseQuant8),
-        static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
-                             uint8_t *)>(AddScalarBroadcastQuant8));
+    const std::function<T(const BinaryArithmeticOpParam &, const T &, const T &)> fn =
+      [](const BinaryArithmeticOpParam &params, const T &a, const T &b) {
+        return static_cast<T>(quant8_sum(params, a, b));
+      };
+    reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
+                                               input2_data, output_shape, output_data, fn);
+    return;
   }
+
+  BinaryBroadcastFiveFold(
+    params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+    static_cast<void (*)(int, const BinaryArithmeticOpParam &, const T *, const T *, T *)>(
+      AddElementwise),
+    static_cast<void (*)(int, const BinaryArithmeticOpParam &, T, const T *, T *)>(
+      AddScalarBroadcast));
 }
 
 inline void BroadcastAddDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
@@ -366,18 +840,18 @@ inline void BroadcastAddDispatch(const BinaryArithmeticOpParam &params, const Sh
   if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
   {
     const std::function<float(const float &, const float &)> fn =
-        [](const float &a, const float &b) -> float { return a + b; };
+      [](const float &a, const float &b) -> float { return a + b; };
     reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
                                                input2_data, output_shape, output_data, fn);
   }
   else
   {
+    auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncAddFloat>(params);
+
     BinaryBroadcastFiveFold(
-        params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
-        static_cast<void (*)(int, const BinaryArithmeticOpParam &, const float *, const float *,
-                             float *)>(AddElementwise),
-        static_cast<void (*)(int, const BinaryArithmeticOpParam &, float, const float *, float *)>(
-            AddScalarBroadcast));
+      params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
+      input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+      implFuncs.first, implFuncs.second);
   }
 }
 
@@ -385,75 +859,57 @@ inline void Sub(const BinaryArithmeticOpParam &params, const Shape &input1_shape
                 const float *input1_data, const Shape &input2_shape, const float *input2_data,
                 const Shape &output_shape, float *output_data)
 {
-  int i = 0;
-  const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
-#ifdef USE_NEON
-  const auto activation_min = vdupq_n_f32(params.float_activation_min);
-  const auto activation_max = vdupq_n_f32(params.float_activation_max);
-  for (; i <= size - 16; i += 16)
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+  auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncSubFloat>(params);
+  (*implFuncs.first)(flat_size, params, input1_data, input2_data, output_data);
+}
+
+inline void BroadcastSubDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                                 const float *input1_data, const Shape &input2_shape,
+                                 const float *input2_data, const Shape &output_shape,
+                                 float *output_data)
+{
+  if (params.broadcast_category == BroadcastableOpCategory::kFirstInputBroadcastsFast)
   {
-    auto a10 = vld1q_f32(input1_data + i);
-    auto a11 = vld1q_f32(input1_data + i + 4);
-    auto a12 = vld1q_f32(input1_data + i + 8);
-    auto a13 = vld1q_f32(input1_data + i + 12);
-    auto a20 = vld1q_f32(input2_data + i);
-    auto a21 = vld1q_f32(input2_data + i + 4);
-    auto a22 = vld1q_f32(input2_data + i + 8);
-    auto a23 = vld1q_f32(input2_data + i + 12);
-    auto x0 = vsubq_f32(a10, a20);
-    auto x1 = vsubq_f32(a11, a21);
-    auto x2 = vsubq_f32(a12, a22);
-    auto x3 = vsubq_f32(a13, a23);
-    x0 = vmaxq_f32(activation_min, x0);
-    x1 = vmaxq_f32(activation_min, x1);
-    x2 = vmaxq_f32(activation_min, x2);
-    x3 = vmaxq_f32(activation_min, x3);
-    x0 = vminq_f32(activation_max, x0);
-    x1 = vminq_f32(activation_max, x1);
-    x2 = vminq_f32(activation_max, x2);
-    x3 = vminq_f32(activation_max, x3);
-    vst1q_f32(output_data + i, x0);
-    vst1q_f32(output_data + i + 4, x1);
-    vst1q_f32(output_data + i + 8, x2);
-    vst1q_f32(output_data + i + 12, x3);
+    auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncSubFloat>(params);
+    BinaryBroadcastFiveFold(params, false, input1_shape, input1_data, input2_shape, input2_data,
+                            output_shape, output_data, implFuncs.first, implFuncs.second);
   }
-  for (; i <= size - 4; i += 4)
+  else if (params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast)
   {
-    auto a1 = vld1q_f32(input1_data + i);
-    auto a2 = vld1q_f32(input2_data + i);
-    auto x = vsubq_f32(a1, a2);
-    x = vmaxq_f32(activation_min, x);
-    x = vminq_f32(activation_max, x);
-    vst1q_f32(output_data + i, x);
+    auto implFuncs =
+      getBinaryOpWithActivationImplFloat<BinaryOpFuncSwapArgs<BinaryOpFuncSubFloat>>(params);
+    BinaryBroadcastFiveFold(params, true, input1_shape, input1_data, input2_shape, input2_data,
+                            output_shape, output_data, implFuncs.first, implFuncs.second);
   }
-#endif // NEON
-
-  for (; i < size; i++)
+  else
   {
-    auto x = input1_data[i] - input2_data[i];
-    output_data[i] =
-        ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+    const std::function<float(const float &, const float &)> fn =
+      [](const float &a, const float &b) -> float { return a - b; };
+    reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
+                                               input2_data, output_shape, output_data, fn);
   }
 }
 
-inline int32_t quant8_mul(const BinaryArithmeticOpParam &params, const uint8_t input1_data,
-                          const uint8_t input2_data)
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value, int32_t>
+quant8_mul(const BinaryArithmeticOpParam &params, const T input1_data, const T input2_data)
 {
   const int32_t input1_val = params.input1_offset + input1_data;
   const int32_t input2_val = params.input2_offset + input2_data;
   const int32_t unclamped_result =
-      params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
-                                                           params.output_multiplier,
-                                                           params.output_shift);
+    params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                         params.output_multiplier,
+                                                         params.output_shift);
   const int32_t clamped_output = std::min(
-      params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
+    params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
 
   return clamped_output;
 }
 
-inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam &params,
-                                 const uint8_t *input1_data, const uint8_t *input2_data,
-                                 uint8_t *output_data)
+inline void MulElementwise(int size, const BinaryArithmeticOpParam &params,
+                           const uint8_t *input1_data, const uint8_t *input2_data,
+                           uint8_t *output_data)
 {
   int i = 0;
 
@@ -495,8 +951,8 @@ inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
     const auto p1_narrowed = vqmovn_s32(p1);
     const auto p2_narrowed = vqmovn_s32(p2);
     const auto p = vaddq_s16(vcombine_s16(p1_narrowed, p2_narrowed), output_offset_vector);
-    const auto clamped = vmax_u8(output_activation_min_vector,
-                                 vmin_u8(output_activation_max_vector, vqmovun_s16(p)));
+    const auto clamped =
+      vmax_u8(output_activation_min_vector, vmin_u8(output_activation_max_vector, vqmovun_s16(p)));
     vst1_u8(output_data + i, clamped);
   }
 #endif // NEON
@@ -506,76 +962,111 @@ inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
     const int32_t input1_val = params.input1_offset + input1_data[i];
     const int32_t input2_val = params.input2_offset + input2_data[i];
     const int32_t unclamped_result =
-        params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
-                                                             params.output_multiplier,
-                                                             params.output_shift);
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, unclamped_result));
+      params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                           params.output_multiplier,
+                                                           params.output_shift);
+    const int32_t clamped_output = std::min(
+      params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
     output_data[i] = static_cast<uint8_t>(clamped_output);
   }
 }
 
 inline void MulElementwise(int size, const BinaryArithmeticOpParam &params,
-                           const float *input1_data, const float *input2_data, float *output_data)
+                           const int8_t *input1_data, const int8_t *input2_data,
+                           int8_t *output_data)
 {
   int i = 0;
-
 #ifdef USE_NEON
-  const auto activation_min = vdupq_n_f32(params.float_activation_min);
-  const auto activation_max = vdupq_n_f32(params.float_activation_max);
+  const int16x8_t input1_offset_vector = vdupq_n_s16(params.input1_offset);
+  const int16x8_t input2_offset_vector = vdupq_n_s16(params.input2_offset);
+  const int16x8_t output_offset_vector = vdupq_n_s16(params.output_offset);
+  const auto output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+  const auto output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+  const int left_shift = std::max(0, params.output_shift);
+  const int right_shift = std::max(0, -params.output_shift);
+  const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
   for (; i <= size - 16; i += 16)
   {
-    auto a10 = vld1q_f32(input1_data + i);
-    auto a11 = vld1q_f32(input1_data + i + 4);
-    auto a12 = vld1q_f32(input1_data + i + 8);
-    auto a13 = vld1q_f32(input1_data + i + 12);
-    auto a20 = vld1q_f32(input2_data + i);
-    auto a21 = vld1q_f32(input2_data + i + 4);
-    auto a22 = vld1q_f32(input2_data + i + 8);
-    auto a23 = vld1q_f32(input2_data + i + 12);
-    auto x0 = vmulq_f32(a10, a20);
-    auto x1 = vmulq_f32(a11, a21);
-    auto x2 = vmulq_f32(a12, a22);
-    auto x3 = vmulq_f32(a13, a23);
-    x0 = vmaxq_f32(activation_min, x0);
-    x1 = vmaxq_f32(activation_min, x1);
-    x2 = vmaxq_f32(activation_min, x2);
-    x3 = vmaxq_f32(activation_min, x3);
-    x0 = vminq_f32(activation_max, x0);
-    x1 = vminq_f32(activation_max, x1);
-    x2 = vminq_f32(activation_max, x2);
-    x3 = vminq_f32(activation_max, x3);
-    vst1q_f32(output_data + i, x0);
-    vst1q_f32(output_data + i + 4, x1);
-    vst1q_f32(output_data + i + 8, x2);
-    vst1q_f32(output_data + i + 12, x3);
-  }
-  for (; i <= size - 4; i += 4)
-  {
-    auto a1 = vld1q_f32(input1_data + i);
-    auto a2 = vld1q_f32(input2_data + i);
-    auto x = vmulq_f32(a1, a2);
-    x = vmaxq_f32(activation_min, x);
-    x = vminq_f32(activation_max, x);
-    vst1q_f32(output_data + i, x);
+    // We load / store 16 at a time, multiplying as four sets of 4 int32s.
+    const int8x16_t input1_val_original = vld1q_s8(input1_data + i);
+    const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+
+    const int16x8_t input1_val_s16_high = vmovl_s8(vget_high_s8(input1_val_original));
+    const int16x8_t input1_val_s16_low = vmovl_s8(vget_low_s8(input1_val_original));
+
+    const int16x8_t input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+    const int16x8_t input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+    const int16x8_t input1_val_high = vaddq_s16(input1_val_s16_high, input1_offset_vector);
+    const int16x8_t input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_vector);
+    const int16x8_t input1_val_low = vaddq_s16(input1_val_s16_low, input1_offset_vector);
+    const int16x8_t input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_vector);
+    const int16x4_t input1_val_high_high = vget_high_s16(input1_val_high);
+    const int16x4_t input1_val_high_low = vget_low_s16(input1_val_high);
+    const int16x4_t input1_val_low_high = vget_high_s16(input1_val_low);
+    const int16x4_t input1_val_low_low = vget_low_s16(input1_val_low);
+    const int16x4_t input2_val_high_high = vget_high_s16(input2_val_high);
+    const int16x4_t input2_val_high_low = vget_low_s16(input2_val_high);
+    const int16x4_t input2_val_low_high = vget_high_s16(input2_val_low);
+    const int16x4_t input2_val_low_low = vget_low_s16(input2_val_low);
+
+    auto p1 = vmull_s16(input2_val_high_high, input1_val_high_high);
+    auto p2 = vmull_s16(input2_val_high_low, input1_val_high_low);
+    auto p3 = vmull_s16(input2_val_low_high, input1_val_low_high);
+    auto p4 = vmull_s16(input2_val_low_low, input1_val_low_low);
+
+    p1 = vshlq_s32(p1, left_shift_vec);
+    p2 = vshlq_s32(p2, left_shift_vec);
+    p3 = vshlq_s32(p3, left_shift_vec);
+    p4 = vshlq_s32(p4, left_shift_vec);
+
+    p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+    p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+    p3 = vqrdmulhq_n_s32(p3, params.output_multiplier);
+    p4 = vqrdmulhq_n_s32(p4, params.output_multiplier);
+    using gemmlowp::RoundingDivideByPOT;
+    p1 = RoundingDivideByPOT(p1, right_shift);
+    p2 = RoundingDivideByPOT(p2, right_shift);
+    p3 = RoundingDivideByPOT(p3, right_shift);
+    p4 = RoundingDivideByPOT(p4, right_shift);
+
+    const auto p1_narrowed = vqmovn_s32(p1);
+    const auto p2_narrowed = vqmovn_s32(p2);
+    const auto p3_narrowed = vqmovn_s32(p3);
+    const auto p4_narrowed = vqmovn_s32(p4);
+
+    const int16x8_t p_part1 =
+      vaddq_s16(vcombine_s16(p2_narrowed, p1_narrowed), output_offset_vector);
+    const int16x8_t p_part2 =
+      vaddq_s16(vcombine_s16(p4_narrowed, p3_narrowed), output_offset_vector);
+    const int8x16_t p = vcombine_s8(vqmovn_s16(p_part2), vqmovn_s16(p_part1));
+
+    const auto clamped =
+      vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, p));
+    vst1q_s8(output_data + i, clamped);
   }
 #endif // NEON
 
-  for (; i < size; i++)
+  for (; i < size; ++i)
   {
-    auto x = input1_data[i] * input2_data[i];
-    output_data[i] =
-        ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+    const int32_t input1_val = params.input1_offset + input1_data[i];
+    const int32_t input2_val = params.input2_offset + input2_data[i];
+    const int32_t unclamped_result =
+      params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                           params.output_multiplier,
+                                                           params.output_shift);
+    const int32_t clamped_output = std::min(
+      params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
+    output_data[i] = static_cast<int8_t>(clamped_output);
   }
 }
 
-inline void MulQuant8(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
-                      const uint8_t *input1_data, const Shape &input2_shape,
-                      const uint8_t *input2_data, const Shape &output_shape, uint8_t *output_data)
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+Mul(const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
+    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data)
 {
   const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  MulElementwiseQuant8(flat_size, params, input1_data, input2_data, output_data);
+  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
 }
 
 inline void Mul(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
@@ -583,12 +1074,13 @@ inline void Mul(const BinaryArithmeticOpParam &params, const Shape &input1_shape
                 const Shape &output_shape, float *output_data)
 {
   const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
+  auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncMulFloat>(params);
+  (*implFuncs.first)(flat_size, params, input1_data, input2_data, output_data);
 }
 
-inline void MulSimpleBroadcastQuant8(int size, const BinaryArithmeticOpParam &params,
-                                     const uint8_t broadcast_value, const uint8_t *input2_data,
-                                     uint8_t *output_data)
+inline void MulSimpleBroadcast(int size, const BinaryArithmeticOpParam &params,
+                               const uint8_t broadcast_value, const uint8_t *input2_data,
+                               uint8_t *output_data)
 {
   int i = 0;
   int32_t clamped_output;
@@ -600,60 +1092,108 @@ inline void MulSimpleBroadcastQuant8(int size, const BinaryArithmeticOpParam &pa
 }
 
 // Broadcast mul that can often be used for inner loop of broadcast Mul.
-// This function will handle scalar_value (LHS) * vector_values (RHS).
-// Since it's a float function, input params does not matter here.
 inline void MulSimpleBroadcast(int size, const BinaryArithmeticOpParam &params,
-                               const float broadcast_value, const float *input2_data,
-                               float *output_data)
+                               const int8_t broadcast_value, const int8_t *input2_data,
+                               int8_t *output_data)
 {
+  const int16_t input1_val = params.input1_offset + broadcast_value;
+
   int i = 0;
 #ifdef USE_NEON
-  const float32x4_t output_activation_min_vector = vdupq_n_f32(params.float_activation_min);
-  const float32x4_t output_activation_max_vector = vdupq_n_f32(params.float_activation_max);
-  const float32x4_t broadcast_value_dup = vdupq_n_f32(broadcast_value);
-  for (; i <= size - 4; i += 4)
+  const auto input2_offset_vector = vdupq_n_s16(params.input2_offset);
+  const auto output_offset_vector = vdupq_n_s16(params.output_offset);
+  const auto output_activation_min_vector = vdupq_n_s8(params.quantized_activation_min);
+  const auto output_activation_max_vector = vdupq_n_s8(params.quantized_activation_max);
+  const int left_shift = std::max(0, params.output_shift);
+  const int right_shift = std::max(0, -params.output_shift);
+  const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
+  for (; i <= size - 16; i += 16)
   {
-    const float32x4_t input2_val_original = vld1q_f32(input2_data + i);
+    // We load / store 16 at a time, multiplying as four sets of 4 int32s.
+    const auto input2_val_original = vld1q_s8(input2_data + i);
+    const auto input2_val_s16_high = vmovl_s8(vget_high_s8(input2_val_original));
+    const auto input2_val_s16_low = vmovl_s8(vget_low_s8(input2_val_original));
+
+    const auto input2_val_high = vaddq_s16(input2_val_s16_high, input2_offset_vector);
+    const auto input2_val_low = vaddq_s16(input2_val_s16_low, input2_offset_vector);
+
+    const auto input2_val_low_low = vget_low_s16(input2_val_low);
+    const auto input2_val_low_high = vget_high_s16(input2_val_low);
+    const auto input2_val_high_low = vget_low_s16(input2_val_high);
+    const auto input2_val_high_high = vget_high_s16(input2_val_high);
+
+    auto p1 = vmull_n_s16(input2_val_high_high, input1_val);
+    auto p2 = vmull_n_s16(input2_val_high_low, input1_val);
+    auto p3 = vmull_n_s16(input2_val_low_high, input1_val);
+    auto p4 = vmull_n_s16(input2_val_low_low, input1_val);
+
+    p1 = vshlq_s32(p1, left_shift_vec);
+    p2 = vshlq_s32(p2, left_shift_vec);
+    p3 = vshlq_s32(p3, left_shift_vec);
+    p4 = vshlq_s32(p4, left_shift_vec);
+
+    p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+    p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+    p3 = vqrdmulhq_n_s32(p3, params.output_multiplier);
+    p4 = vqrdmulhq_n_s32(p4, params.output_multiplier);
+    using gemmlowp::RoundingDivideByPOT;
+    p1 = RoundingDivideByPOT(p1, right_shift);
+    p2 = RoundingDivideByPOT(p2, right_shift);
+    p3 = RoundingDivideByPOT(p3, right_shift);
+    p4 = RoundingDivideByPOT(p4, right_shift);
 
-    const float32x4_t output = vmulq_f32(input2_val_original, broadcast_value_dup);
+    const auto p1_narrowed = vqmovn_s32(p1);
+    const auto p2_narrowed = vqmovn_s32(p2);
+    const auto p3_narrowed = vqmovn_s32(p3);
+    const auto p4_narrowed = vqmovn_s32(p4);
 
-    const float32x4_t clamped =
-        vmaxq_f32(output_activation_min_vector, vminq_f32(output_activation_max_vector, output));
-    vst1q_f32(output_data + i, clamped);
+    const int16x8_t p_part1 =
+      vaddq_s16(vcombine_s16(p2_narrowed, p1_narrowed), output_offset_vector);
+    const int16x8_t p_part2 =
+      vaddq_s16(vcombine_s16(p4_narrowed, p3_narrowed), output_offset_vector);
+    const int8x16_t p = vcombine_s8(vqmovn_s16(p_part2), vqmovn_s16(p_part1));
+
+    const auto clamped =
+      vmaxq_s8(output_activation_min_vector, vminq_s8(output_activation_max_vector, p));
+    vst1q_s8(output_data + i, clamped);
   }
 #endif // NEON
 
   for (; i < size; ++i)
   {
-    float x = broadcast_value * input2_data[i];
-    output_data[i] =
-        ActivationFunctionWithMinMax(x, params.float_activation_min, params.float_activation_max);
+    const int32_t input2_val = params.input2_offset + input2_data[i];
+    const int32_t unclamped_result =
+      params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                           params.output_multiplier,
+                                                           params.output_shift);
+    const int32_t clamped_output = std::min(
+      params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
+    output_data[i] = static_cast<int8_t>(clamped_output);
   }
 }
 
-inline void BroadcastMulDispatchQuant8(const BinaryArithmeticOpParam &params,
-                                       const Shape &input1_shape, const uint8_t *input1_data,
-                                       const Shape &input2_shape, const uint8_t *input2_data,
-                                       const Shape &output_shape, uint8_t *output_data)
+template <typename T>
+inline typename std::enable_if_t<is_quant8<T>::value>
+BroadcastMulDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                     const T *input1_data, const Shape &input2_shape, const T *input2_data,
+                     const Shape &output_shape, T *output_data)
 {
   if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
   {
-    const std::function<uint8_t(const BinaryArithmeticOpParam &, const uint8_t &, const uint8_t &)>
-        fn = [](const BinaryArithmeticOpParam &params, const uint8_t &a,
-                const uint8_t &b) -> uint8_t {
-      return static_cast<uint8_t>(quant8_mul(params, a, b));
-    };
-    reference::BroadcastBinaryArithmeticOpSlowQuant8(params, input1_shape, input1_data,
-                                                     input2_shape, input2_data, output_shape,
-                                                     output_data, fn);
+    const std::function<T(const BinaryArithmeticOpParam &, const T &, const T &)> fn =
+      [](const BinaryArithmeticOpParam &params, const T &a, const T &b) {
+        return static_cast<T>(quant8_mul(params, a, b));
+      };
+    reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
+                                               input2_data, output_shape, output_data, fn);
     return;
   }
   BinaryBroadcastFiveFold(
-      params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
-      static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
-                           uint8_t *)>(MulElementwiseQuant8),
-      static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
-                           uint8_t *)>(MulSimpleBroadcastQuant8));
+    params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+    static_cast<void (*)(int, const BinaryArithmeticOpParam &, const T *, const T *, T *)>(
+      MulElementwise),
+    static_cast<void (*)(int, const BinaryArithmeticOpParam &, T, const T *, T *)>(
+      MulSimpleBroadcast));
 }
 
 inline void BroadcastMulDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
@@ -665,17 +1205,59 @@ inline void BroadcastMulDispatch(const BinaryArithmeticOpParam &params, const Sh
   {
     // TODO: Use GetBinaryArithmeticFn
     const std::function<float(const float &, const float &)> fn =
-        [](const float &a, const float &b) -> float { return a * b; };
+      [](const float &a, const float &b) -> float { return a * b; };
     reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
                                                input2_data, output_shape, output_data, fn);
     return;
   }
-  BinaryBroadcastFiveFold(
-      params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
-      static_cast<void (*)(int, const BinaryArithmeticOpParam &, const float *, const float *,
-                           float *)>(MulElementwise),
-      static_cast<void (*)(int, const BinaryArithmeticOpParam &, float, const float *, float *)>(
-          MulSimpleBroadcast));
+  auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncMulFloat>(params);
+  BinaryBroadcastFiveFold(params, input1_shape, input1_data, input2_shape, input2_data,
+                          output_shape, output_data, implFuncs.first, implFuncs.second);
+}
+
+inline void Div(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                const float *input1_data, const Shape &input2_shape, const float *input2_data,
+                const Shape &output_shape, float *output_data)
+{
+#ifdef __aarch64__
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+  auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncDivFloat>(params);
+  (*implFuncs.first)(flat_size, params, input1_data, input2_data, output_data);
+#else
+  const std::function<float(const float &, const float &)> fn =
+    [](const float &a, const float &b) -> float { return a / b; };
+  reference::BinaryArithmeticOp(params, input1_shape, input1_data, input2_shape, input2_data,
+                                output_shape, output_data, fn);
+#endif // __aarch64__
+}
+
+inline void BroadcastDivDispatch(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                                 const float *input1_data, const Shape &input2_shape,
+                                 const float *input2_data, const Shape &output_shape,
+                                 float *output_data)
+{
+#ifdef __aarch64__
+  if (params.broadcast_category == BroadcastableOpCategory::kFirstInputBroadcastsFast)
+  {
+    auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncDivFloat>(params);
+    BinaryBroadcastFiveFold(params, false, input1_shape, input1_data, input2_shape, input2_data,
+                            output_shape, output_data, implFuncs.first, implFuncs.second);
+  }
+  else if (params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast)
+  {
+    auto implFuncs =
+      getBinaryOpWithActivationImplFloat<BinaryOpFuncSwapArgs<BinaryOpFuncDivFloat>>(params);
+    BinaryBroadcastFiveFold(params, true, input1_shape, input1_data, input2_shape, input2_data,
+                            output_shape, output_data, implFuncs.first, implFuncs.second);
+  }
+  else
+#endif // __aarch64__
+  {
+    const std::function<float(const float &, const float &)> fn =
+      [](const float &a, const float &b) -> float { return a / b; };
+    reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
+                                               input2_data, output_shape, output_data, fn);
+  }
 }
 
 } // namespace optimized
diff --git a/compute/cker/include/cker/operation/optimized/Conv.h b/compute/cker/include/cker/operation/optimized/Conv.h
index 0f620146c..6e0e129c6 100644
--- a/compute/cker/include/cker/operation/optimized/Conv.h
+++ b/compute/cker/include/cker/operation/optimized/Conv.h
@@ -42,13 +42,15 @@ namespace cker
 namespace optimized
 {
 
+std::mutex _gemmlowp_mutex;
+
 struct GemmlowpOutputPipeline
 {
   typedef gemmlowp::VectorMap<const int32_t, gemmlowp::VectorShape::Col> ColVectorMap;
   typedef std::tuple<gemmlowp::OutputStageBiasAddition<ColVectorMap>,
                      gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent,
                      gemmlowp::OutputStageClamp, gemmlowp::OutputStageSaturatingCastToUint8>
-      Pipeline;
+    Pipeline;
   static Pipeline MakeExp(const int32_t *bias_data, int output_rows, int32_t output_offset,
                           int32_t output_multiplier, int output_left_shift,
                           int32_t output_activation_min, int32_t output_activation_max)
@@ -106,7 +108,7 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
   const int filter_height = filter_shape.Dims(1);
   const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
   const bool need_im2col =
-      stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1;
+    stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1;
   if (need_dilated_im2col)
   {
     assert(im2col_data);
@@ -141,7 +143,7 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
   // the other calls commented out. This is a partial rollback of cl/196819423.
   // const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_shape, 3);
   const int gemm_input_cols =
-      gemm_input_shape->Dims(0) * gemm_input_shape->Dims(1) * gemm_input_shape->Dims(2);
+    gemm_input_shape->Dims(0) * gemm_input_shape->Dims(1) * gemm_input_shape->Dims(2);
   const int filter_rows = filter_shape.Dims(0);
   // See b/79927784.
   // const int filter_cols = FlatSizeSkipDim(filter_shape, 0);
@@ -156,17 +158,19 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
   assert(bias_shape.FlatSize() == output_rows);
   UNUSED_RELEASE(bias_shape);
   gemmlowp::MatrixMap<const uint8_t, gemmlowp::MapOrder::RowMajor> filter_matrix(
-      filter_data, filter_rows, filter_cols);
+    filter_data, filter_rows, filter_cols);
   gemmlowp::MatrixMap<const uint8_t, gemmlowp::MapOrder::ColMajor> input_matrix(
-      gemm_input_data, gemm_input_rows, gemm_input_cols);
+    gemm_input_data, gemm_input_rows, gemm_input_cols);
   gemmlowp::MatrixMap<uint8_t, gemmlowp::MapOrder::ColMajor> output_matrix(output_data, output_rows,
                                                                            output_cols);
   const auto &output_pipeline =
-      GemmlowpOutputPipeline::MakeExp(bias_data, output_rows, output_offset, output_multiplier,
-                                      output_shift, output_activation_min, output_activation_max);
+    GemmlowpOutputPipeline::MakeExp(bias_data, output_rows, output_offset, output_multiplier,
+                                    output_shift, output_activation_min, output_activation_max);
+
+  std::lock_guard<std::mutex> lock_guard(_gemmlowp_mutex);
   gemmlowp::GemmWithOutputPipeline<uint8_t, uint8_t, gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
-      gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, input_offset,
-      output_pipeline);
+    gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, input_offset,
+    output_pipeline);
 }
 
 } // namespace optimized
@@ -202,10 +206,10 @@ public:
                   T *output_data, int output_height, int output_width)
   {
     const bool is_1x1_kernel =
-        (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1);
+      (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1);
     const bool is_same_height_width =
-        (filter_height == input_height && filter_width == input_width && pad_width == 0 &&
-         pad_height == 0);
+      (filter_height == input_height && filter_width == input_width && pad_width == 0 &&
+       pad_height == 0);
     if (is_1x1_kernel || is_same_height_width)
     {
       // is_1x1_kernel: For 1x1 kernel, the 2D convolution is reduced to matrix multiplication.
diff --git a/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h b/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h
new file mode 100644
index 000000000..17b2fc7a2
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h
@@ -0,0 +1,1250 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_FLOAT_H__
+#define __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_FLOAT_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+// Implementation of float DepthwiseConv
+
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+struct FloatDepthwiseConvKernel
+{
+};
+
+#ifdef USE_NEON
+
+template <> struct FloatDepthwiseConvKernel<false, 8, 1>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+    (void)input_ptr_increment;
+    // Load the filters
+    float32x4_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vld1q_f32(filter_ptr + 4 * i);
+    }
+    int outp = 0;
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the inputs
+      float32x4_t input[4];
+      for (int i = 0; i < 4; i++)
+      {
+        input[i] = vld1q_f32(input_ptr + 4 * i);
+      }
+      input_ptr += 16;
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      acc[0] = vmlaq_f32(acc[0], input[0], filter[0]);
+      acc[1] = vmlaq_f32(acc[1], input[1], filter[1]);
+      acc[2] = vmlaq_f32(acc[2], input[2], filter[0]);
+      acc[3] = vmlaq_f32(acc[3], input[3], filter[1]);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs
+      float32x4_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vld1q_f32(input_ptr + 4 * i);
+      }
+      input_ptr += 8;
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vmlaq_f32(acc[i], input[i], filter[i]);
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<false, 2, 1>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+    (void)input_ptr_increment;
+
+    const float32x2_t filters = vld1_f32(filter_ptr);
+    const float32x4_t filters_dup2 = vcombine_f32(filters, filters);
+    int outp = 0;
+    // Handle 8 output pixels at a time.
+    for (; outp <= num_output_pixels - 8; outp += 8)
+    {
+      // Load the inputs
+      float32x4_t input[4];
+      for (int i = 0; i < 4; i++)
+      {
+        input[i] = vld1q_f32(input_ptr + 4 * i);
+      }
+      input_ptr += 16;
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2);
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the inputs
+      float32x4_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vld1q_f32(input_ptr + 4 * i);
+      }
+      input_ptr += 8;
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2);
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the inputs
+      const float32x4_t input = vld1q_f32(input_ptr);
+      input_ptr += 4;
+      // Load the accumulators from acc_buffer
+      float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+      // Multiply-accumulate
+      acc = vmlaq_f32(acc, input, filters_dup2);
+      // Store the accumulators back to acc_buffer
+      vst1q_f32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+    // Handle 1 output pixel at a time
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs
+      const float32x2_t input = vld1_f32(input_ptr);
+      input_ptr += 2;
+      // Load the accumulators from acc_buffer
+      float32x2_t acc = vld1_f32(acc_buffer_ptr);
+      // Multiply-accumulate
+      acc = vmla_f32(acc, input, filters);
+      // Store the accumulators back to acc_buffer
+      vst1_f32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 2;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 0, 1>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)depth_multiplier;
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const float *local_filter_ptr = filter_ptr;
+      const float *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 16 input channels at a time.
+      for (; ic <= input_depth - 16; ic += 16)
+      {
+        // Load the filters
+        float32x4_t filter_0 = vld1q_f32(local_filter_ptr + 4 * 0);
+        float32x4_t filter_1 = vld1q_f32(local_filter_ptr + 4 * 1);
+        float32x4_t filter_2 = vld1q_f32(local_filter_ptr + 4 * 2);
+        float32x4_t filter_3 = vld1q_f32(local_filter_ptr + 4 * 3);
+        local_filter_ptr += 16;
+        // Load the inputs
+        float32x4_t input_0 = vld1q_f32(local_input_ptr + 4 * 0);
+        float32x4_t input_1 = vld1q_f32(local_input_ptr + 4 * 1);
+        float32x4_t input_2 = vld1q_f32(local_input_ptr + 4 * 2);
+        float32x4_t input_3 = vld1q_f32(local_input_ptr + 4 * 3);
+        local_input_ptr += 16;
+        // Load the accumulators from acc_buffer
+        float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+        float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+        float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+        float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+        // Multiply-accumulate
+        acc_0 = vmlaq_f32(acc_0, input_0, filter_0);
+        acc_1 = vmlaq_f32(acc_1, input_1, filter_1);
+        acc_2 = vmlaq_f32(acc_2, input_2, filter_2);
+        acc_3 = vmlaq_f32(acc_3, input_3, filter_3);
+        // Store the accumulators back to acc_buffer
+        vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+        vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+        vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+        vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+        acc_buffer_ptr += 16;
+      }
+      // Handle 4 input channels at a time.
+      for (; ic <= input_depth - 4; ic += 4)
+      {
+        // Load the filters
+        float32x4_t filter;
+        filter = vld1q_f32(local_filter_ptr);
+        local_filter_ptr += 4;
+        // Load the inputs
+        float32x4_t input;
+        input = vld1q_f32(local_input_ptr);
+        local_input_ptr += 4;
+        // Load the accumulators from acc_buffer
+        float32x4_t acc;
+        acc = vld1q_f32(acc_buffer_ptr);
+        // Multiply-accumulate
+        acc = vmlaq_f32(acc, input, filter);
+        // Store the accumulators back to acc_buffer
+        vst1q_f32(acc_buffer_ptr, acc);
+        acc_buffer_ptr += 4;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        const float input_val = *local_input_ptr++;
+        const float filter_val = *local_filter_ptr++;
+        *acc_buffer_ptr++ += filter_val * input_val;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 0, 8>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)depth_multiplier;
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const float *local_filter_ptr = filter_ptr;
+      const float *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 2 input channels at a time.
+      for (; ic <= input_depth - 2; ic += 2)
+      {
+        // Load the filters
+        float32x4_t filter[4];
+        for (int i = 0; i < 4; i++)
+        {
+          filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+        }
+        local_filter_ptr += 16;
+        // Load the inputs
+        const float32x2_t input = vld1_f32(local_input_ptr);
+        local_input_ptr += 2;
+        // Load the accumulators from acc_buffer
+        float32x4_t acc[4];
+        for (int i = 0; i < 4; i++)
+        {
+          acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+        }
+        // Multiply-accumulate
+        acc[0] = vmlaq_lane_f32(acc[0], filter[0], input, 0);
+        acc[1] = vmlaq_lane_f32(acc[1], filter[1], input, 0);
+        acc[2] = vmlaq_lane_f32(acc[2], filter[2], input, 1);
+        acc[3] = vmlaq_lane_f32(acc[3], filter[3], input, 1);
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 4; i++)
+        {
+          vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+        }
+        acc_buffer_ptr += 16;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        // Load the filters
+        float32x4_t filter[2];
+        for (int i = 0; i < 2; i++)
+        {
+          filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+        }
+        local_filter_ptr += 8;
+        // Load the inputs
+        const float input_val = *local_input_ptr++;
+        // Load the accumulators from acc_buffer
+        float32x4_t acc[2];
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+        }
+        // Multiply-accumulate
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+        }
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 2; i++)
+        {
+          vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+        }
+        acc_buffer_ptr += 8;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+// Note this implementation is very slow for input_depths < 8
+// (e.g. comparable to reference implementation) see, specializations for
+// input_depth=3 below.
+template <> struct FloatDepthwiseConvKernel<true, 0, 2>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)depth_multiplier;
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const float *local_filter_ptr = filter_ptr;
+      const float *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 8 input channels at a time.
+      for (; ic <= input_depth - 8; ic += 8)
+      {
+        // Load the filters
+        float32x4_t filter[4];
+        for (int i = 0; i < 4; i++)
+        {
+          filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+        }
+        local_filter_ptr += 16;
+        // Load the inputs
+        float32x4x2_t input_dup2[2];
+        for (int i = 0; i < 2; i++)
+        {
+          const float32x4_t input = vld1q_f32(local_input_ptr + 4 * i);
+          input_dup2[i] = vzipq_f32(input, input);
+        }
+        local_input_ptr += 8;
+        // Load the accumulators from acc_buffer
+        float32x4_t acc[4];
+        for (int i = 0; i < 4; i++)
+        {
+          acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+        }
+        // Multiply-accumulate
+        acc[0] = vmlaq_f32(acc[0], filter[0], input_dup2[0].val[0]);
+        acc[1] = vmlaq_f32(acc[1], filter[1], input_dup2[0].val[1]);
+        acc[2] = vmlaq_f32(acc[2], filter[2], input_dup2[1].val[0]);
+        acc[3] = vmlaq_f32(acc[3], filter[3], input_dup2[1].val[1]);
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 4; i++)
+        {
+          vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+        }
+        acc_buffer_ptr += 16;
+      }
+      // Handle 4 input channels at a time.
+      for (; ic <= input_depth - 4; ic += 4)
+      {
+        // Load the filters
+        float32x2_t filter[4];
+        for (int i = 0; i < 4; i++)
+        {
+          filter[i] = vld1_f32(local_filter_ptr + 2 * i);
+        }
+        local_filter_ptr += 8;
+        // Load the inputs
+        const float32x4_t input = vld1q_f32(local_input_ptr);
+        local_input_ptr += 4;
+        // Load the accumulators from acc_buffer
+        float32x2_t acc[4];
+        for (int i = 0; i < 4; i++)
+        {
+          acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+        }
+        // Multiply-accumulate
+        acc[0] = vmla_lane_f32(acc[0], filter[0], vget_low_f32(input), 0);
+        acc[1] = vmla_lane_f32(acc[1], filter[1], vget_low_f32(input), 1);
+        acc[2] = vmla_lane_f32(acc[2], filter[2], vget_high_f32(input), 0);
+        acc[3] = vmla_lane_f32(acc[3], filter[3], vget_high_f32(input), 1);
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 4; i++)
+        {
+          vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+        }
+        acc_buffer_ptr += 8;
+      }
+      // Handle 2 input channels at a time.
+      for (; ic <= input_depth - 2; ic += 2)
+      {
+        // Load the filters
+        const float32x4_t filter = vld1q_f32(local_filter_ptr);
+        local_filter_ptr += 4;
+        // Load the inputs
+        const float32x2_t input = vld1_f32(local_input_ptr);
+        local_input_ptr += 2;
+        // Load the accumulators from acc_buffer
+        float32x2_t acc[2];
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+        }
+        // Multiply-accumulate
+        acc[0] = vmla_lane_f32(acc[0], vget_low_f32(filter), input, 0);
+        acc[1] = vmla_lane_f32(acc[1], vget_high_f32(filter), input, 1);
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 2; i++)
+        {
+          vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+        }
+        acc_buffer_ptr += 4;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        // Load the inputs
+        const float input_val = *local_input_ptr++;
+        // Multiply-accumulate
+        for (int i = 0; i < 2; i++)
+        {
+          acc_buffer_ptr[i] += local_filter_ptr[i] * input_val;
+        }
+        local_filter_ptr += 2;
+        acc_buffer_ptr += 2;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 3, 2>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+
+    // Load the filters
+    float32x2_t filter[3];
+    for (int i = 0; i < 3; i++)
+    {
+      filter[i] = vld1_f32(filter_ptr + 2 * i);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const float32x2_t input01 = vld1_f32(input_ptr);
+      const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+      // Load the accumulators from acc_buffer
+      float32x2_t acc[3];
+      for (int i = 0; i < 3; i++)
+      {
+        acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+      }
+      // Multiply-accumulate for each input channel there 2 outputs
+      acc[0] = vmla_lane_f32(acc[0], filter[0], input01, 0);
+      acc[1] = vmla_lane_f32(acc[1], filter[1], input01, 1);
+      acc[2] = vmla_lane_f32(acc[2], filter[2], input2, 0);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 3; i++)
+      {
+        vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+      }
+      acc_buffer_ptr += 6;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 3, 4>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+
+    // Load the filters
+    float32x4_t filter[3];
+    for (int i = 0; i < 3; i++)
+    {
+      filter[i] = vld1q_f32(filter_ptr + 4 * i);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // NOTE: we only want 3 values, so we read it as two ops where
+      // the second op just duplicates the lane
+      const float32x2_t input01 = vld1_f32(input_ptr);
+      const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[3];
+      for (int i = 0; i < 3; i++)
+      {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate all outputs.
+      acc[0] = vmlaq_lane_f32(acc[0], filter[0], input01, 0);
+      acc[1] = vmlaq_lane_f32(acc[1], filter[1], input01, 1);
+      acc[2] = vmlaq_lane_f32(acc[2], filter[2], input2, 0);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 3; i++)
+      {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 12;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 1, 8>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+
+    // Load the filters
+    float32x4_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vld1q_f32(filter_ptr + 4 * i);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs
+      const float input_val = *input_ptr;
+      input_ptr += input_ptr_increment;
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 1, 32>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+
+    // Load the filters
+    float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0);
+    float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1);
+    float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2);
+    float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3);
+    float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4);
+    float32x4_t filter_5 = vld1q_f32(filter_ptr + 4 * 5);
+    float32x4_t filter_6 = vld1q_f32(filter_ptr + 4 * 6);
+    float32x4_t filter_7 = vld1q_f32(filter_ptr + 4 * 7);
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs
+      const float input_val = *input_ptr;
+      input_ptr += input_ptr_increment;
+      // Load the accumulators from acc_buffer
+      float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+      float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+      float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+      float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+      float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4);
+      float32x4_t acc_5 = vld1q_f32(acc_buffer_ptr + 4 * 5);
+      float32x4_t acc_6 = vld1q_f32(acc_buffer_ptr + 4 * 6);
+      float32x4_t acc_7 = vld1q_f32(acc_buffer_ptr + 4 * 7);
+      // Multiply-accumulate
+      acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val);
+      acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val);
+      acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val);
+      acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val);
+      acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val);
+      acc_5 = vmlaq_n_f32(acc_5, filter_5, input_val);
+      acc_6 = vmlaq_n_f32(acc_6, filter_6, input_val);
+      acc_7 = vmlaq_n_f32(acc_7, filter_7, input_val);
+      // Store the accumulators back to acc_buffer
+      vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+      vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+      vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+      vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+      vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4);
+      vst1q_f32(acc_buffer_ptr + 4 * 5, acc_5);
+      vst1q_f32(acc_buffer_ptr + 4 * 6, acc_6);
+      vst1q_f32(acc_buffer_ptr + 4 * 7, acc_7);
+      acc_buffer_ptr += 32;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 1, 20>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+
+    // Load the filters
+    float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0);
+    float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1);
+    float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2);
+    float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3);
+    float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4);
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs
+      const float input_val = *input_ptr;
+      input_ptr += input_ptr_increment;
+      // Load the accumulators from acc_buffer
+      float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+      float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+      float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+      float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+      float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4);
+      // Multiply-accumulate
+      acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val);
+      acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val);
+      acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val);
+      acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val);
+      acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val);
+      // Store the accumulators back to acc_buffer
+      vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+      vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+      vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+      vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+      vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4);
+      acc_buffer_ptr += 20;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 0, 16>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)depth_multiplier;
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const float *local_filter_ptr = filter_ptr;
+      const float *local_input_ptr = input_ptr;
+      for (int ic = 0; ic < input_depth; ic++)
+      {
+        // Load the filters
+        float32x4_t filter[4];
+        for (int i = 0; i < 4; i++)
+        {
+          filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+        }
+        local_filter_ptr += 16;
+        // Load the inputs
+        const float input_val = *local_input_ptr++;
+        // Load the accumulators from acc_buffer
+        float32x4_t acc[4];
+        for (int i = 0; i < 4; i++)
+        {
+          acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+        }
+        // Multiply-accumulate
+        for (int i = 0; i < 4; i++)
+        {
+          acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+        }
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 4; i++)
+        {
+          vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+        }
+        acc_buffer_ptr += 16;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 8, 1>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+
+    // Load the filters
+    float32x4_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vld1q_f32(filter_ptr + 4 * i);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs
+      float32x4_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vld1q_f32(input_ptr + 4 * i);
+      }
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vmlaq_f32(acc[i], input[i], filter[i]);
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 2, 1>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+
+    float32x2_t filter = vld1_f32(filter_ptr);
+    float32x4_t filter_x4 = vcombine_f32(filter, filter);
+    int outp = 0;
+
+    // Handle two output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the inputs
+      float32x2_t input_1 = vld1_f32(input_ptr);
+      input_ptr += input_ptr_increment;
+      float32x2_t input_2 = vld1_f32(input_ptr);
+      input_ptr += input_ptr_increment;
+      float32x4_t input = vcombine_f32(input_1, input_2);
+
+      // Load the accumulators from acc_buffer
+      float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+
+      // Multiply-accumulate
+      acc = vmlaq_f32(acc, input, filter_x4);
+
+      // Store the accumulators back to acc_buffer
+      vst1q_f32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs
+      float32x2_t input = vld1_f32(input_ptr);
+      input_ptr += input_ptr_increment;
+
+      // Load the accumulators from acc_buffer
+      float32x2_t acc = vld1_f32(acc_buffer_ptr);
+
+      // Multiply-accumulate
+      acc = vmla_f32(acc, input, filter);
+
+      // Store the accumulators back to acc_buffer
+      vst1_f32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 2;
+    }
+  }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 4, 1>
+{
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+                  float *acc_buffer_ptr)
+  {
+    (void)input_depth;
+    (void)depth_multiplier;
+
+    float32x4_t filter = vld1q_f32(filter_ptr);
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs
+      float32x4_t input = vld1q_f32(input_ptr);
+      // Load the accumulators from acc_buffer
+      float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+      // Multiply-accumulate
+      acc = vmlaq_f32(acc, input, filter);
+      // Store the accumulators back to acc_buffer
+      vst1q_f32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+#endif
+
+// Accumulates the effect of one row of the filter, on a segment of one row
+// of the output, accessing the corresponding one row of the input.
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+void FloatDepthwiseConvAccumRow(int stride, int dilation_factor, int input_depth, int input_width,
+                                const float *input_data, int pad_width, int depth_multiplier,
+                                int filter_width, const float *filter_data, int out_x_buffer_start,
+                                int out_x_buffer_end, int output_depth, float *acc_buffer)
+{
+  // Sanity check parameters. This is important in particular to ensure
+  // that we keep the number of template instantiations minimal, so we don't
+  // increase binary size unnecessarily.
+  static_assert(kFixedDepthMultiplier || !kFixedInputDepth, "");
+  static_assert(kFixedInputDepth || kAllowStrided, "");
+  assert(stride == 1 || kAllowStrided);
+  if (kFixedInputDepth)
+  {
+    assert(input_depth == kFixedInputDepth);
+  }
+  if (kFixedDepthMultiplier)
+  {
+    assert(depth_multiplier == kFixedDepthMultiplier);
+  }
+  assert(output_depth == input_depth * depth_multiplier);
+  const int input_ptr_increment = stride * input_depth;
+  const float *filter_base_ptr = filter_data;
+  for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+  {
+    // For the current (filter_x, filter_y) point in the filter,
+    // compute the boundaries of the corresponding output row segment.
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
+    if (kAllowStrided)
+    {
+      if (stride == 2)
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 1) / 2;
+        out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
+      }
+      else if (stride == 4)
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 3) / 4;
+        out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
+      }
+      else
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+        out_x_loop_end_unclamped =
+          (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
+      }
+    }
+    else
+    {
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped = pad_width + input_width - dilation_factor * filter_x;
+    }
+    // The kernel will have to iterate on the segment of the
+    // output row that starts at out_x_loop_start and out_x_loop_end.
+    const int out_x_loop_start = std::max(out_x_buffer_start, out_x_loop_start_unclamped);
+    const int out_x_loop_end = std::min(out_x_buffer_end, out_x_loop_end_unclamped);
+
+    float *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+    const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+    const float *input_ptr = input_data + in_x_origin * input_depth;
+    const int num_output_pixels = out_x_loop_end - out_x_loop_start;
+    FloatDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
+      num_output_pixels, input_depth, depth_multiplier, input_ptr, input_ptr_increment,
+      filter_base_ptr, acc_buffer_ptr);
+    filter_base_ptr += output_depth;
+  }
+}
+
+// generic fallback of FloatDepthwiseConvAccumRow, portable, non-templatized.
+inline void FloatDepthwiseConvAccumRowGeneric(int stride, int dilation_factor, int input_depth,
+                                              int input_width, const float *input_data,
+                                              int pad_width, int depth_multiplier, int filter_width,
+                                              const float *filter_data, int out_x_buffer_start,
+                                              int out_x_buffer_end, int output_depth,
+                                              float *acc_buffer)
+{
+  const float *filter_base_ptr = filter_data;
+  for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+  {
+    const int out_x_loop_start =
+      std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+    const int out_x_loop_end =
+      std::min(out_x_buffer_end,
+               (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
+
+    float *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+    const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+    const float *input_ptr = input_data + in_x_origin * input_depth;
+    const int input_ptr_increment = (stride - 1) * input_depth;
+    for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++)
+    {
+      const float *filter_ptr = filter_base_ptr;
+      for (int ic = 0; ic < input_depth; ++ic)
+      {
+        const float input_val = *input_ptr++;
+        for (int m = 0; m < depth_multiplier; m++)
+        {
+          const float filter_val = *filter_ptr++;
+          *acc_buffer_ptr++ += filter_val * input_val;
+        }
+      }
+      input_ptr += input_ptr_increment;
+    }
+    filter_base_ptr += output_depth;
+  }
+}
+
+// Initializes the accumulator buffer with bias values.
+inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
+                                       const float *bias_data, float *acc_buffer)
+{
+  // TODO(benoitjacob): This might need optimized specializations
+  // for small output_depth values, if that ever becomes an important
+  // case (like it was for some quantized DepthwiseConv cases).
+  for (int i = 0; i < num_output_pixels; i++)
+  {
+    memcpy(acc_buffer + i * output_depth, bias_data, sizeof(acc_buffer[0]) * output_depth);
+  }
+}
+
+// DepthwiseConv can run with multi threads on the dim specified by thread_dim.
+// Each thread processes output elements on dim, thread_dim, in the range of
+// [thread_start, thread_end).
+// For example, assume thread_start = 2, thread_end = 6, and thread_dim = 1, it
+// means that it will calculate DepthwiseConv for output_data[:, 2:5, :, :].
+inline void DepthwiseConvImpl(const DepthwiseConvParams &params, const Shape &input_shape,
+                              const float *input_data, const Shape &filter_shape,
+                              const float *filter_data, const Shape &bias_shape,
+                              const float *bias_data, const Shape &output_shape, float *output_data,
+                              int thread_start, int thread_end, int thread_dim)
+{
+  UNUSED_RELEASE(bias_shape);
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  assert(thread_dim == 0 || thread_dim == 1);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_shape.FlatSize() == output_depth);
+
+  static const int kAccBufferMaxSize = 4832;
+  float acc_buffer[kAccBufferMaxSize];
+  assert(kAccBufferMaxSize >= output_depth);
+  const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth;
+  const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth;
+  assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize);
+  assert(kAccBufferActualSize <= kAccBufferMaxSize);
+  assert(kOutputPixelsInAccBuffer >= 1);
+
+  UNUSED_RELEASE(kAccBufferActualSize);
+
+  // row_accum_func will point to the core accumulation function to be used
+  // for this DepthwiseConv op.
+  using row_accum_func_t = decltype(&FloatDepthwiseConvAccumRowGeneric);
+  row_accum_func_t row_accum_func = nullptr;
+
+#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER) \
+  if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) &&                                  \
+      (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) &&                             \
+      depth_multiplier == FIXED_DEPTH_MULTIPLIER)                                                 \
+  {                                                                                               \
+    row_accum_func =                                                                              \
+      FloatDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>;       \
+  }
+
+#ifdef USE_NEON
+  // We go over our list of kernels by decreasing order of preference
+  // for the cases where multiple kernels could apply.
+
+  // Start with the fastest kernels: AllowStrided=false, fixed input depth.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1)
+
+  // Next come the strided kernels: AllowStrided=true, fixed input depth.
+  // They are a bit less efficient, but allow stride!=1.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 4)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
+
+  // Finally, the kernels allowing a variable input depth,
+  // these are the least efficient but most general kernels.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 8)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 16)
+
+#endif // USE_NEON
+
+#undef TFMINI_USE_DEPTHWISECONV_KERNEL
+
+  // No matching fast kernel found, use slow fallback.
+  if (!row_accum_func)
+  {
+    row_accum_func = FloatDepthwiseConvAccumRowGeneric;
+  }
+
+  const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+  const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+  const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
+  // Now that we have determined row_accum_func, we can start work.
+  int batch_start = 0;
+  int batch_end = batches;
+  int row_start = 0;
+  int row_end = output_height;
+  int output_ptr_offset = 0;
+
+  switch (thread_dim)
+  {
+    case 0:
+      // Multithread along with the batch axis
+      assert(thread_start >= 0);
+      assert(thread_end <= batches);
+      batch_start = thread_start;
+      batch_end = thread_end;
+      output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0);
+      break;
+    case 1:
+      // Multithread along with the row axis
+      assert(thread_start >= 0);
+      assert(thread_end <= output_height);
+      row_start = thread_start;
+      row_end = thread_end;
+      output_ptr_offset = row_start * output_width * output_depth;
+      break;
+  }
+
+  float *output_ptr = output_data + output_ptr_offset;
+  int batch_step = (output_height + row_start - row_end) * output_width * output_depth;
+
+  for (int b = batch_start; b < batch_end; ++b)
+  {
+    for (int out_y = row_start; out_y < row_end; ++out_y)
+    {
+      const int in_y_origin = (out_y * stride_height) - pad_height;
+      const int filter_y_start =
+        std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
+      const int filter_y_end =
+        std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
+                                  dilation_height_factor);
+      for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
+           out_x_buffer_start += kOutputPixelsInAccBuffer)
+      {
+        const int out_x_buffer_end =
+          std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+        // We call a 'pixel' a group of activation that share all but the
+        // 'depth'/'channel' coordinate. num_output_pixels is the number of
+        // output pixels that we will accumulate in this loop iteration.
+        const int num_output_pixels = out_x_buffer_end - out_x_buffer_start;
+        // Initialize our local accumulator with the bias values, so we don't
+        // have to add them later.
+        DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, acc_buffer);
+        // Accumulation loop. Most of the time should be spent in here.
+        for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+        {
+          const int in_y = in_y_origin + dilation_height_factor * filter_y;
+          row_accum_func(stride_width, dilation_width_factor, input_depth, input_width,
+                         input_data + in_y * input_height_stride + b * input_batch_stride,
+                         pad_width, depth_multiplier, filter_width,
+                         filter_data + filter_y * filter_height_stride, out_x_buffer_start,
+                         out_x_buffer_end, output_depth, acc_buffer);
+        }
+        // Finished accumulating. Now store to destination.
+        const int num_output_values = output_depth * num_output_pixels;
+        int i = 0;
+// TODO(benoitjacob) optimized code goes here
+#ifdef USE_NEON
+        // Handle 16 values at a time
+        for (; i <= num_output_values - 16; i += 16)
+        {
+          float32x4_t acc[4];
+          for (int k = 0; k < 4; k++)
+          {
+            acc[k] = vld1q_f32(acc_buffer + i + 4 * k);
+          }
+          for (int k = 0; k < 4; k++)
+          {
+            acc[k] = vmaxq_f32(vdupq_n_f32(output_activation_min),
+                               vminq_f32(vdupq_n_f32(output_activation_max), acc[k]));
+          }
+          for (int k = 0; k < 4; k++)
+          {
+            vst1q_f32(output_ptr + 4 * k, acc[k]);
+          }
+          output_ptr += 16;
+        }
+        // Handle 4 values at a time
+        for (; i <= num_output_values - 4; i += 4)
+        {
+          float32x4_t acc = vld1q_f32(acc_buffer + i);
+
+          acc = vmaxq_f32(vdupq_n_f32(output_activation_min),
+                          vminq_f32(vdupq_n_f32(output_activation_max), acc));
+
+          vst1q_f32(output_ptr, acc);
+          output_ptr += 4;
+        }
+#endif
+        // Handle leftover values, one by one. This is very slow.
+        for (; i < num_output_values; i++)
+        {
+          float acc = acc_buffer[i];
+          acc = std::max(output_activation_min, std::min(output_activation_max, acc));
+
+          *output_ptr++ = acc;
+        }
+      }
+    }
+    output_ptr += batch_step;
+  }
+}
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif
diff --git a/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h b/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h
index d383b126d..5ca56fd09 100644
--- a/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h
+++ b/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h
@@ -32,6 +32,8 @@ namespace cker
 {
 namespace optimized
 {
+namespace depthwise_conv
+{
 
 // Implementation of quantized DepthwiseConv
 
@@ -44,8 +46,8 @@ struct QuantizedDepthwiseConvKernel
 template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -57,7 +59,7 @@ template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
     for (int i = 0; i < 2; i++)
     {
       filter[i] =
-          vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), vdupq_n_s16(filter_offset));
+        vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), vdupq_n_s16(filter_offset));
     }
     // Handle one output pixel at a time.
     for (int outp = 0; outp < num_output_pixels; outp++)
@@ -80,9 +82,9 @@ template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
       for (int i = 0; i < 2; i++)
       {
         acc[0].val[i] =
-            vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i]));
+          vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i]));
         acc[1].val[i] =
-            vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i]));
+          vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i]));
       }
       // Store the accumulators back to acc_buffer
       for (int i = 0; i < 2; i++)
@@ -98,8 +100,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
 template <> struct QuantizedDepthwiseConvKernel<false, 8, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -174,8 +176,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 8, 1>
 template <> struct QuantizedDepthwiseConvKernel<false, 4, 2>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -206,9 +208,9 @@ template <> struct QuantizedDepthwiseConvKernel<false, 4, 2>
       for (int i = 0; i < 2; i++)
       {
         acc[2 * i + 0] =
-            vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i]));
+          vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i]));
         acc[2 * i + 1] =
-            vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i]));
+          vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i]));
       }
       // Store the accumulators back to acc_buffer
       for (int i = 0; i < 4; i++)
@@ -253,8 +255,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 4, 2>
 template <> struct QuantizedDepthwiseConvKernel<false, 2, 8>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -338,8 +340,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 2, 8>
 template <> struct QuantizedDepthwiseConvKernel<false, 2, 2>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -409,8 +411,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 2, 2>
 template <> struct QuantizedDepthwiseConvKernel<false, 2, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -534,8 +536,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 2, 1>
 template <> struct QuantizedDepthwiseConvKernel<false, 1, 2>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -600,8 +602,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 1, 2>
 template <> struct QuantizedDepthwiseConvKernel<false, 1, 4>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -703,8 +705,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 1, 4>
 template <> struct QuantizedDepthwiseConvKernel<false, 4, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -778,8 +780,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 4, 1>
 template <> struct QuantizedDepthwiseConvKernel<false, 4, 4>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -864,8 +866,8 @@ template <> struct QuantizedDepthwiseConvKernel<false, 4, 4>
 template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -873,7 +875,7 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
     // We will do that by register-level table-look-up using VTBL instructions.
     // Here we prepare the registers containing the table-lookup indices.
     static const uint8_t dup3_indices_array[3][8] = {
-        {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}};
+      {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}};
     uint8x8_t dup3_indices[3];
     for (int i = 0; i < 3; i++)
     {
@@ -928,9 +930,9 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
         for (int j = 0; j < 3; j++)
         {
           acc[0].val[j] =
-              vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j]));
+            vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j]));
           acc[1].val[j] =
-              vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j]));
+            vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j]));
         }
         // Store the accumulators back to acc_buffer
         for (int i = 0; i < 2; i++)
@@ -944,10 +946,10 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
       // Handle one input channel at a time.
       for (; ic < input_depth; ic++)
       {
-        const uint16_t input_val = *local_input_ptr++ + input_offset;
+        const int16_t input_val = *local_input_ptr++ + input_offset;
         for (int i = 0; i < 3; i++)
         {
-          const uint16_t filter_val = local_filter_ptr[i] + filter_offset;
+          const int16_t filter_val = local_filter_ptr[i] + filter_offset;
           *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
         }
         local_filter_ptr += 3;
@@ -960,8 +962,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
 template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1002,9 +1004,9 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
         for (int j = 0; j < 2; j++)
         {
           acc[0].val[j] =
-              vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j]));
+            vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j]));
           acc[1].val[j] =
-              vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j]));
+            vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j]));
         }
         // Store the accumulators back to acc_buffer.
         for (int i = 0; i < 2; i++)
@@ -1018,10 +1020,10 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
       for (; ic < input_depth; ic++)
       {
         // Load the inputs.
-        const uint16_t input_val = *local_input_ptr++ + input_offset;
+        const int16_t input_val = *local_input_ptr++ + input_offset;
         for (int i = 0; i < 2; i++)
         {
-          const uint16_t filter_val = local_filter_ptr[i] + filter_offset;
+          const int16_t filter_val = local_filter_ptr[i] + filter_offset;
           *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
         }
         local_filter_ptr += 2;
@@ -1034,8 +1036,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
 template <> struct QuantizedDepthwiseConvKernel<true, 0, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1112,8 +1114,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 1>
       // Handle one input channel at a time.
       for (; ic < input_depth; ic++)
       {
-        const uint16_t input_val = *local_input_ptr++ + input_offset;
-        const uint16_t filter_val = *local_filter_ptr++ + filter_offset;
+        const int16_t input_val = *local_input_ptr++ + input_offset;
+        const int16_t filter_val = *local_filter_ptr++ + filter_offset;
         *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
       }
       input_ptr += input_ptr_increment;
@@ -1124,8 +1126,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 0, 1>
 template <> struct QuantizedDepthwiseConvKernel<true, 16, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1174,7 +1176,7 @@ template <> struct QuantizedDepthwiseConvKernel<true, 16, 1>
       {
         acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(input[i]), vget_low_s16(filter[i]));
         acc[2 * i + 1] =
-            vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i]));
+          vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i]));
       }
       // Store the accumulators back to acc_buffer
       for (int i = 0; i < 4; i++)
@@ -1189,8 +1191,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 16, 1>
 template <> struct QuantizedDepthwiseConvKernel<true, 8, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1228,8 +1230,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 8, 1>
 template <> struct QuantizedDepthwiseConvKernel<true, 1, 16>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1253,7 +1255,7 @@ template <> struct QuantizedDepthwiseConvKernel<true, 1, 16>
     {
       uint8_t input_u8 = *input_ptr;
       input_ptr += input_ptr_increment;
-      uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+      int16_t input = static_cast<int16_t>(input_u8) + input_offset;
       // Load the accumulators from acc_buffer
       int32x4_t acc[4];
       for (int i = 0; i < 4; i++)
@@ -1279,8 +1281,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 1, 16>
 template <> struct QuantizedDepthwiseConvKernel<true, 1, 32>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1302,7 +1304,7 @@ template <> struct QuantizedDepthwiseConvKernel<true, 1, 32>
     {
       uint8_t input_u8 = *input_ptr;
       input_ptr += input_ptr_increment;
-      uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+      int16_t input = static_cast<int16_t>(input_u8) + input_offset;
       // Load the accumulators from acc_buffer
       int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
       int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
@@ -1338,8 +1340,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 1, 32>
 template <> struct QuantizedDepthwiseConvKernel<true, 1, 20>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1363,7 +1365,7 @@ template <> struct QuantizedDepthwiseConvKernel<true, 1, 20>
     {
       uint8_t input_u8 = *input_ptr;
       input_ptr += input_ptr_increment;
-      uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+      int16_t input = static_cast<int16_t>(input_u8) + input_offset;
       // Load the accumulators from acc_buffer
       int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
       int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
@@ -1390,21 +1392,21 @@ template <> struct QuantizedDepthwiseConvKernel<true, 1, 20>
 template <> struct QuantizedDepthwiseConvKernel<true, 1, 8>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
     // Load the filters, add filter_offset.
     const uint8x8_t filter_u8 = vld1_u8(filter_ptr);
     const int16x8_t filter =
-        vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8)), vdupq_n_s16(filter_offset));
+      vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8)), vdupq_n_s16(filter_offset));
     // Handle one output pixel at a time.
     for (int outp = 0; outp < num_output_pixels; outp++)
     {
       uint8_t input_u8 = *input_ptr;
       input_ptr += input_ptr_increment;
-      uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+      int16_t input = static_cast<int16_t>(input_u8) + input_offset;
       // Load the accumulators from acc_buffer
       int32x4_t acc[2];
       for (int i = 0; i < 2; i++)
@@ -1427,8 +1429,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 1, 8>
 template <> struct QuantizedDepthwiseConvKernel<true, 2, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1455,7 +1457,7 @@ template <> struct QuantizedDepthwiseConvKernel<true, 2, 1>
       input_u16 = vset_lane_u16((reinterpret_cast<const uint16_t *>(input_ptr))[0], input_u16, 1);
       input_ptr += input_ptr_increment;
       const int16x4_t input_s16 =
-          vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u16(input_u16))));
+        vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u16(input_u16))));
       const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
 
       // Multiply-accumulate.
@@ -1490,8 +1492,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 2, 1>
 template <> struct QuantizedDepthwiseConvKernel<true, 4, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1555,8 +1557,8 @@ template <> struct QuantizedDepthwiseConvKernel<true, 4, 1>
 template <> struct QuantizedDepthwiseConvKernel<false, 12, 1>
 {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
-                  const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
-                  const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+                  const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
   {
     (void)input_depth;
     (void)depth_multiplier;
@@ -1652,9 +1654,9 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, int input_d
       else
       {
         out_x_loop_start_unclampled =
-            (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+          (pad_width - dilation_factor * filter_x + stride - 1) / stride;
         out_x_loop_end_unclampled =
-            (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
+          (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
       }
     }
     else
@@ -1672,8 +1674,8 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, int input_d
     const uint8_t *input_ptr = input_data + in_x_origin * input_depth;
     const int num_output_pixels = out_x_loop_end - out_x_loop_start;
     QuantizedDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
-        num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset,
-        input_ptr_increment, filter_base_ptr, filter_offset, acc_buffer_ptr);
+      num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset,
+      input_ptr_increment, filter_base_ptr, filter_offset, acc_buffer_ptr);
     filter_base_ptr += output_depth;
   }
 }
@@ -1690,11 +1692,11 @@ inline void QuantizedDepthwiseConvAccumRowGeneric(int stride, int dilation_facto
   const uint8_t *filter_base_ptr = filter_data;
   for (int filter_x = 0; filter_x < filter_width; ++filter_x)
   {
-    const int out_x_loop_start = std::max(
-        out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+    const int out_x_loop_start =
+      std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
     const int out_x_loop_end =
-        std::min(out_x_buffer_end,
-                 (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
+      std::min(out_x_buffer_end,
+               (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
 
     int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
     const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
@@ -1813,7 +1815,8 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams &params, const Shape
                                  const uint8_t *input_data, const Shape &filter_shape,
                                  const uint8_t *filter_data, const Shape &bias_shape,
                                  const int32_t *bias_data, const Shape &output_shape,
-                                 uint8_t *output_data)
+                                 uint8_t *output_data, int thread_start, int thread_end,
+                                 int thread_dim)
 {
   (void)bias_shape;
   const int stride_width = params.stride_width;
@@ -1852,6 +1855,8 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams &params, const Shape
   assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize);
   assert(kAccBufferActualSize <= kAccBufferMaxSize);
   assert(kOutputPixelsInAccBuffer >= 1);
+  assert(thread_dim == 0 || thread_dim == 1);
+
   UNUSED_RELEASE(kAccBufferActualSize);
 
   // row_accum_func will point to the core accumulation function to be used
@@ -1865,7 +1870,7 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams &params, const Shape
       depth_multiplier == FIXED_DEPTH_MULTIPLIER)                                                 \
   {                                                                                               \
     row_accum_func =                                                                              \
-        QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>; \
+      QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>;   \
   }
 
 #ifdef USE_NEON
@@ -1919,22 +1924,49 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams &params, const Shape
   const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
 
   // Now that we have determined row_accum_func, we can start work.
-  uint8_t *output_ptr = output_data;
-  for (int b = 0; b < batches; ++b)
+  int batch_start = 0;
+  int batch_end = batches;
+  int row_start = 0;
+  int row_end = output_height;
+  int output_ptr_offset = 0;
+
+  switch (thread_dim)
+  {
+    case 0:
+      // Multithread along with the batch axis
+      assert(thread_start >= 0);
+      assert(thread_end <= batches);
+      batch_start = thread_start;
+      batch_end = thread_end;
+      output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0);
+      break;
+    case 1:
+      // Multithread along with the row axis
+      assert(thread_start >= 0);
+      assert(thread_end <= output_height);
+      row_start = thread_start;
+      row_end = thread_end;
+      output_ptr_offset = row_start * output_width * output_depth;
+      break;
+  }
+
+  uint8_t *output_ptr = output_data + output_ptr_offset;
+  int batch_step = (output_height + row_start - row_end) * output_width * output_depth;
+  for (int b = batch_start; b < batch_end; ++b)
   {
-    for (int out_y = 0; out_y < output_height; ++out_y)
+    for (int out_y = row_start; out_y < row_end; ++out_y)
     {
       const int in_y_origin = (out_y * stride_height) - pad_height;
       const int filter_y_start =
-          std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
+        std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
       const int filter_y_end =
-          std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
-                                      dilation_height_factor);
+        std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
+                                  dilation_height_factor);
       for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
            out_x_buffer_start += kOutputPixelsInAccBuffer)
       {
         const int out_x_buffer_end =
-            std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+          std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
         // We call a 'pixel' a group of activation that share all but the
         // 'depth'/'channel' coordinate. num_output_pixels is the number of
         // output pixels that we will accumulate in this loop iteration.
@@ -1952,7 +1984,7 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams &params, const Shape
                          filter_data + filter_y * filter_height_stride, filter_offset,
                          out_x_buffer_start, out_x_buffer_end, output_depth, acc_buffer);
         }
-        // Finished accumulating int32 values. Now need to convert them to
+        // Finished accumulating int32_t values. Now need to convert them to
         // the final 8bit form and store them.
         const int num_output_values = output_depth * num_output_pixels;
         int i = 0;
@@ -2113,9 +2145,111 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams &params, const Shape
         }
       }
     }
+    output_ptr += batch_step;
   }
 }
 
+} // namespace depthwise_conv
+
+// template <DepthwiseConvOutputRounding kOutputRounding>
+inline void DepthwiseConvWithRounding(const DepthwiseConvParams &params, const Shape &input_shape,
+                                      const uint8_t *input_data, const Shape &filter_shape,
+                                      const uint8_t *filter_data, const Shape &bias_shape,
+                                      const int32_t *bias_data, const Shape &output_shape,
+                                      uint8_t *output_data, int thread_start, int thread_end,
+                                      int thread_dim)
+{
+  const int depth_multiplier = params.depth_multiplier;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  assert(dilation_width_factor >= 1);
+  assert(dilation_height_factor >= 1);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  assert(output_activation_min <= output_activation_max);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_depth = input_shape.Dims(3);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_shape.FlatSize() == output_depth);
+
+  UNUSED_RELEASE(depth_multiplier);
+  UNUSED_RELEASE(output_activation_min);
+  UNUSED_RELEASE(output_activation_max);
+  UNUSED_RELEASE(dilation_width_factor);
+  UNUSED_RELEASE(dilation_height_factor);
+  UNUSED_RELEASE(output_depth);
+  UNUSED_RELEASE(input_depth);
+
+// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
+// Jetson TX-2. This compiler does not support the offsetof() macro.
+#if defined(__aarch64__) && !defined(GOOGLE_L4T)
+//  TODO Use below codes
+//  // Dispatch to dot-product 3x3 kernels when supported.
+//
+//  ruy::Context *ruy_context = cpu_backend_context->ruy_context();
+//  const bool has_dot_product_instructions =
+//      ruy_context != nullptr &&
+//      (ruy_context->GetRuntimeEnabledPaths() & ruy::Path::kNeonDotprod) != ruy::Path::kNone;
+//  if (has_dot_product_instructions)
+//  {
+//    using optimized_ops::depthwise_conv::DotProduct3x3KernelType;
+//    DotProduct3x3KernelType kernel_type =
+//    optimized_ops::depthwise_conv::CategorizeDotProductKernel(
+//        input_shape, filter_shape, params);
+//    if (kernel_type != DotProduct3x3KernelType::kNone)
+//    {
+//      optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3<
+//          DepthwiseConvImplementation::kUseNeon3x3DotProduct>(params, input_shape, input_data,
+//                                                              filter_shape, filter_data,
+//                                                              bias_shape,
+//                                                              bias_data, output_shape,
+//                                                              output_data);
+//      return;
+//    }
+//  }
+//
+//  // Dispatch to non-dot-product 3x3 kernels when supported.
+//
+//  const int stride_width = params.stride_width;
+//  const int stride_height = params.stride_height;
+//  const int pad_width = params.padding_values.width;
+//  const int pad_height = params.padding_values.height;
+//  const int output_shift = params.output_shift;
+//
+//  // Call kernel optimized for depthwise convolutions using 3x3 filters if
+//  // parameters are supported.
+//  if (depthwise_conv::Fast3x3FilterKernelSupported(input_shape, filter_shape, stride_width,
+//                                                   stride_height, dilation_width_factor,
+//                                                   dilation_height_factor, pad_width, pad_height,
+//                                                   depth_multiplier, output_shape, output_shift))
+//  {
+//    depthwise_conv::DepthwiseConv3x3Filter<kOutputRounding>(
+//        params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+//        output_shape, output_data, thread_start, thread_end, thread_dim);
+//    return;
+//  }
+#endif
+
+  depthwise_conv::DepthwiseConvGeneral(params, input_shape, input_data, filter_shape, filter_data,
+                                       bias_shape, bias_data, output_shape, output_data,
+                                       thread_start, thread_end, thread_dim);
+}
+
+inline void DepthwiseConvImpl(const DepthwiseConvParams &params, const Shape &input_shape,
+                              const uint8_t *input_data, const Shape &filter_shape,
+                              const uint8_t *filter_data, const Shape &bias_shape,
+                              const int32_t *bias_data, const Shape &output_shape,
+                              uint8_t *output_data, int thread_start, int thread_end,
+                              int thread_dim)
+{
+  return DepthwiseConvWithRounding(params, input_shape, input_data, filter_shape, filter_data,
+                                   bias_shape, bias_data, output_shape, output_data, thread_start,
+                                   thread_end, thread_dim);
+}
+
 } // namespace optimized
 } // namespace cker
 } // namespace nnfw
diff --git a/compute/cker/include/cker/operation/optimized/Gemm.h b/compute/cker/include/cker/operation/optimized/Gemm.h
new file mode 100644
index 000000000..cfebef452
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/Gemm.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_GEMM_H__
+#define __NNFW_CKER_OPTIMIZED_GEMM_H__
+
+#include "cker/eigen/eigen_gemm_eigen.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+#include <ruy/context.h>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+#if defined(CKER_X86_PLATFORM)
+
+/* From tensorflow/tensorflow/lite/kernels/cpu_backend_gemm_x86.h */
+template <typename LhsScalar, typename RhsScalar, typename AccumScalar, typename DstScalar,
+          QuantizationFlavor quantization_flavor>
+struct GemmImplX86
+{
+  static void Run(const MatrixParams<LhsScalar> &, const LhsScalar *,
+                  const MatrixParams<RhsScalar> &, const RhsScalar *,
+                  const MatrixParams<DstScalar> &, DstScalar *,
+                  const GemmParams<AccumScalar, DstScalar, quantization_flavor> &)
+  {
+    static_assert(
+      std::is_floating_point<LhsScalar>::value && std::is_floating_point<RhsScalar>::value &&
+        std::is_floating_point<AccumScalar>::value && std::is_floating_point<DstScalar>::value &&
+        quantization_flavor != QuantizationFlavor::kFloatingPoint,
+      "GemmImplX86 does not supported types other than float yet.");
+  }
+};
+
+// For float, defer to eigen for now.
+template <> struct GemmImplX86<float, float, float, float, QuantizationFlavor::kFloatingPoint>
+{
+  static void Run(const MatrixParams<float> &lhs_params, const float *lhs_data,
+                  const MatrixParams<float> &rhs_params, const float *rhs_data,
+                  const MatrixParams<float> &dst_params, float *dst_data,
+                  const GemmParams<float, float, QuantizationFlavor::kFloatingPoint> &params)
+  {
+    detail::GemmImplUsingEigen::Run(lhs_params, lhs_data, rhs_params, rhs_data, dst_params,
+                                    dst_data, params);
+  }
+};
+
+/* From tensorflow/tensorflow/lite/kernels/cpu_backend_gemm.h */
+/* GEMM dispatch implementation for x86.
+ */
+template <typename LhsScalar, typename RhsScalar, typename AccumScalar, typename DstScalar,
+          QuantizationFlavor quantization_flavor>
+struct GemmImpl : GemmImplX86<LhsScalar, RhsScalar, AccumScalar, DstScalar, quantization_flavor>
+{
+};
+
+/* From tensorflow/tensorflow/lite/kernels/cpu_backend_gemm.h */
+template <typename LhsScalar, typename RhsScalar, typename AccumScalar, typename DstScalar,
+          QuantizationFlavor quantization_flavor>
+void Gemm(const MatrixParams<LhsScalar> &lhs_params, const LhsScalar *lhs_data,
+          const MatrixParams<RhsScalar> &rhs_params, const RhsScalar *rhs_data,
+          const MatrixParams<DstScalar> &dst_params, DstScalar *dst_data,
+          const GemmParams<AccumScalar, DstScalar, quantization_flavor> &params)
+{
+  // Generic case: dispatch to any backend as a general GEMM.
+  GemmImpl<LhsScalar, RhsScalar, AccumScalar, DstScalar, quantization_flavor>::Run(
+    lhs_params, lhs_data, rhs_params, rhs_data, dst_params, dst_data, params);
+}
+
+// From tensorflow/tensorflow/lite/kernels/cpu_backend_gemm_params.h
+inline CachePolicy DefaultCachePolicy(bool is_constant_data)
+{
+  return is_constant_data ? CachePolicy::kCacheIfLargeSpeedup : CachePolicy::kNeverCache;
+}
+#endif // CKER_X86_PLATFORM
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_OPTIMIZED_GEMM_H__
diff --git a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
index ae1f9e78e..f5edc94ab 100644
--- a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
+++ b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
@@ -111,7 +111,7 @@ inline void ExtractPatchIntoBufferColumn(const Shape &input_shape, int w, int h,
   {
     const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
     const int bottom_start =
-        output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
+      output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
     memset(conv_buffer_data + bottom_start, zero_byte, (bottom_row_elements * sizeof(T)));
   }
 }
@@ -159,7 +159,7 @@ void DilatedIm2col(const ConvParams &params, const Shape &input_shape, const T *
   for (int batch = 0; batch < batches; ++batch)
   {
     const T zero_byte =
-        zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
+      zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
     for (int out_y = 0; out_y < output_height; ++out_y)
     {
       for (int out_x = 0; out_x < output_width; ++out_x)
diff --git a/compute/cker/include/cker/operation/optimized/integer_ops/DepthwiseConvInt8.h b/compute/cker/include/cker/operation/optimized/integer_ops/DepthwiseConvInt8.h
new file mode 100644
index 000000000..bd8497920
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/integer_ops/DepthwiseConvInt8.h
@@ -0,0 +1,2138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
+#define __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
+
+#include "cker/CpuBackendThreadpool.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
+#include "cker/operation/Quantize.h"
+
+#include <fixedpoint/fixedpoint.h>
+#include <public/gemmlowp.h>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized_integer_ops
+{
+
+// Category of depthwise convolution output rounding.
+enum class DepthwiseConvOutputRounding
+{
+  kNone = 0,     // Invalid: specific method must be specified.
+  kAwayFromZero, // Original method: exact halves rounded away from zero.
+  kUpward,       // Halves towards +infinity: adds 0.5 before truncate.
+  // This is where a future kNearestEven would be placed.
+};
+
+// Category of depthwise convolution depth multiplication.
+enum class DepthwiseConvDepthMultiplication
+{
+  kNoMultiplication = 0, // Depth multiplier = 1.
+  kUnitInputDepth,       // Input depth = 1, output depth = depth multiplier.
+};
+
+namespace depthwise_conv
+{
+
+// Implementation of quantized DepthwiseConv
+
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+struct QuantizedDepthwiseConvKernel
+{
+};
+
+#ifdef USE_NEON
+template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8x2_t filter_s8;
+    filter_s8.val[0] = vld1_s8(filter_ptr);
+    filter_s8.val[1] = vld1_s8(filter_ptr + 8);
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vmovl_s8(filter_s8.val[i]);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4x2_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+        acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+      }
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += input_ptr_increment;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[0].val[i] =
+          vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i]));
+        acc[1].val[i] =
+          vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i]));
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+        vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 8, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+    const int16x8_t filter = vmovl_s8(filter_s8);
+
+    int outp = 0;
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input_s8[i] = vld1_s8(input_ptr + 8 * i);
+      }
+      input_ptr += 16;
+      int16x8_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vmovl_s8(input_s8[i]);
+      }
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+      }
+      // Multiply-accumulate.
+      acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0]));
+      acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0]));
+      acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1]));
+      acc[3] = vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1]));
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle 1 output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[2];
+      acc[0] = vld1q_s32(acc_buffer_ptr);
+      acc[1] = vld1q_s32(acc_buffer_ptr + 4);
+
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Multiply-accumulate.
+      acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input));
+      acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input));
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc[0]);
+      vst1q_s32(acc_buffer_ptr + 4, acc[1]);
+      acc_buffer_ptr += 8;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 2>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+    const int16x8_t filter = vmovl_s8(filter_s8);
+
+    int outp = 0;
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[2 * i + 0] =
+          vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i]));
+        acc[2 * i + 1] =
+          vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i]));
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x4x2_t input_dup2 = vzip_s16(input, input);
+      // Multiply-accumulate
+      acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]);
+      acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), input_dup2.val[1]);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 8>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      const int8x8_t filter_s8 = vld1_s8(filter_ptr + 8 * i);
+      filter[i] = vmovl_s8(filter_s8);
+    }
+    int outp = 0;
+    // Handle two output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[8];
+      for (int i = 0; i < 8; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Multiply-accumulate.
+      acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+      acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0);
+      acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
+      acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1);
+      acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), input, 2);
+      acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), input, 2);
+      acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), input, 3);
+      acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), input, 3);
+      // Store the accumulators back to acc_buffer.
+      for (int i = 0; i < 8; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 32;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_ptr += 2;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+      acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 0);
+      acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
+      acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 1);
+
+      // Store the accumulators back to acc_buffer.
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 2>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+      // Multiply-accumulate
+      acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0]));
+      acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0]));
+      acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1]));
+      acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1]));
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_ptr += 2;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x4_t input_dup2 = vzip_s16(input, input).val[0];
+      // Multiply-accumulate
+      acc = vmlal_s16(acc, filter, input_dup2);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 2, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 8 output pixels at a time.
+    for (; outp <= num_output_pixels - 8; outp += 8)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input_s8[i] = vld1_s8(input_ptr + 8 * i);
+      }
+      input_ptr += 16;
+      int16x8_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vmovl_s8(input_s8[i]);
+      }
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+      }
+
+      // Multiply-accumulate.
+      acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input[0]));
+      acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input[0]));
+      acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input[1]));
+      acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input[1]));
+      // Store the accumulators back to acc_buffer.
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input));
+      acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input));
+      // Store the accumulators back to acc_buffer.
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc = vmlal_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer.
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+    // Handle 1 output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x2_t acc = vld1_s32(acc_buffer_ptr);
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_ptr += 2;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input));
+      // Store the accumulators back to acc_buffer.
+      vst1_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 2;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 1, 2>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 8 output pixels at a time.
+    for (; outp <= num_output_pixels - 8; outp += 8)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Duplicate the input values, 2-fold
+      const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+      // Multiply-accumulate
+      acc[0] = vmlal_s16(acc[0], filter, vget_low_s16(input_dup2.val[0]));
+      acc[1] = vmlal_s16(acc[1], filter, vget_high_s16(input_dup2.val[0]));
+      acc[2] = vmlal_s16(acc[2], filter, vget_low_s16(input_dup2.val[1]));
+      acc[3] = vmlal_s16(acc[3], filter, vget_high_s16(input_dup2.val[1]));
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x2_t acc = vld1_s32(acc_buffer_ptr);
+
+      // Load the inputs, add input_offset.
+      const uint32_t input = *input_ptr++ + input_offset;
+
+      // Multiply-accumulate
+      acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input));
+      // Store the accumulators back to acc_buffer
+      vst1_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 2;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 1, 4>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 8 output pixels at a time.
+    for (; outp <= num_output_pixels - 8; outp += 8)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[8];
+      for (int i = 0; i < 8; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+      // Multiply-accumulate
+      acc[0] = vmlal_lane_s16(acc[0], filter, vget_low_s16(input), 0);
+      acc[1] = vmlal_lane_s16(acc[1], filter, vget_low_s16(input), 1);
+      acc[2] = vmlal_lane_s16(acc[2], filter, vget_low_s16(input), 2);
+      acc[3] = vmlal_lane_s16(acc[3], filter, vget_low_s16(input), 3);
+      acc[4] = vmlal_lane_s16(acc[4], filter, vget_high_s16(input), 0);
+      acc[5] = vmlal_lane_s16(acc[5], filter, vget_high_s16(input), 1);
+      acc[6] = vmlal_lane_s16(acc[6], filter, vget_high_s16(input), 2);
+      acc[7] = vmlal_lane_s16(acc[7], filter, vget_high_s16(input), 3);
+
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 8; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 32;
+    }
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate
+      acc[0] = vmlal_lane_s16(acc[0], filter, input, 0);
+      acc[1] = vmlal_lane_s16(acc[1], filter, input, 1);
+      acc[2] = vmlal_lane_s16(acc[2], filter, input, 2);
+      acc[3] = vmlal_lane_s16(acc[3], filter, input, 3);
+
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+
+      // Load the inputs, add input_offset.
+      const uint32_t input = *input_ptr++ + input_offset;
+
+      // Multiply-accumulate
+      acc = vmlal_n_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+    // Handle 4 output pixels at a time.
+    for (; outp <= num_output_pixels - 4; outp += 4)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Load the inputs, add input_offset.
+      int16x8_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        const int8x8_t input_s8 = vld1_s8(input_ptr + 8 * i);
+        const int16x8_t input_s16 = vmovl_s8(input_s8);
+        input[i] = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      }
+      input_ptr += 16;
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], filter, vget_low_s16(input[i]));
+        acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], filter, vget_high_s16(input[i]));
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc;
+      acc = vld1q_s32(acc_buffer_ptr);
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Multiply-accumulate
+      acc = vmlal_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 4, 4>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int /* input_ptr_increment */,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      const int8x8_t filter_s8 = vld1_s8(filter_ptr + 8 * i);
+      filter[i] = vmovl_s8(filter_s8);
+    }
+
+    int outp = 0;
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[8];
+      for (int i = 0; i < 8; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += 8;
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+
+      // Multiply-accumulate
+      acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), vget_low_s16(input), 0);
+      acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), vget_low_s16(input), 1);
+      acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), vget_low_s16(input), 2);
+      acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), vget_low_s16(input), 3);
+      acc[4] = vmlal_lane_s16(acc[4], vget_low_s16(filter[0]), vget_high_s16(input), 0);
+      acc[5] = vmlal_lane_s16(acc[5], vget_high_s16(filter[0]), vget_high_s16(input), 1);
+      acc[6] = vmlal_lane_s16(acc[6], vget_low_s16(filter[1]), vget_high_s16(input), 2);
+      acc[7] = vmlal_lane_s16(acc[7], vget_high_s16(filter[1]), vget_high_s16(input), 3);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 8; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 32;
+    }
+    // Handle one output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+      input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+      input_ptr += 4;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate
+      acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
+      acc[1] = vmlal_lane_s16(acc[1], vget_high_s16(filter[0]), input, 1);
+      acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 2);
+      acc[3] = vmlal_lane_s16(acc[3], vget_high_s16(filter[1]), input, 3);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
+{
+  static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // We will have to duplicate bytes in a NEON register, 3-fold.
+    // We will do that by register-level table-look-up using VTBL instructions.
+    // Here we prepare the registers containing the table-lookup indices.
+    static const int8_t dup3_indices_array[3][8] = {
+      {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}};
+    int8x8_t dup3_indices[3];
+    for (int i = 0; i < 3; i++)
+    {
+      dup3_indices[i] = vld1_s8(dup3_indices_array[i]);
+    }
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const int8_t *local_filter_ptr = filter_ptr;
+      const int8_t *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 8 input channels at a time.
+      for (; ic <= input_depth - 8; ic += 8)
+      {
+        // Load the filters.
+        int16x8_t filter[3];
+        int8x8x3_t filter_s8;
+        filter_s8.val[0] = vld1_s8(local_filter_ptr);
+        filter_s8.val[1] = vld1_s8(local_filter_ptr + 8);
+        filter_s8.val[2] = vld1_s8(local_filter_ptr + 16);
+        local_filter_ptr += 24;
+        for (int i = 0; i < 3; i++)
+        {
+          filter[i] = vmovl_s8(filter_s8.val[i]);
+        }
+        // Load the inputs, duplicate 3-fold, add input_offset.
+        const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+        local_input_ptr += 8;
+
+        int8x8_t input_s8_dup3[3];
+        for (int i = 0; i < 3; i++)
+        {
+          input_s8_dup3[i] = vtbl1_s8(input_s8, dup3_indices[i]);
+        }
+        int16x8_t input_dup3[3];
+        for (int i = 0; i < 3; i++)
+        {
+          const int16x8_t input_s16_dup3 = vmovl_s8(input_s8_dup3[i]);
+          input_dup3[i] = vaddq_s16(input_s16_dup3, vdupq_n_s16(input_offset));
+        }
+        // Load the accumulators from acc_buffer
+        int32x4x3_t acc[2];
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+          acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+          acc[i].val[2] = vld1q_s32(acc_buffer_ptr + 4 * i + 16);
+        }
+        // Multiply-accumulate
+        for (int j = 0; j < 3; j++)
+        {
+          acc[0].val[j] =
+            vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j]));
+          acc[1].val[j] =
+            vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j]));
+        }
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 2; i++)
+        {
+          vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+          vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+          vst1q_s32(acc_buffer_ptr + 4 * i + 16, acc[i].val[2]);
+        }
+        acc_buffer_ptr += 24;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        const int16_t input_val = *local_input_ptr++ + input_offset;
+        for (int i = 0; i < 3; i++)
+        {
+          *acc_buffer_ptr++ += static_cast<int32_t>(local_filter_ptr[i]) * input_val;
+        }
+        local_filter_ptr += 3;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
+{
+  static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const int8_t *local_filter_ptr = filter_ptr;
+      const int8_t *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 8 input channels at a time.
+      for (; ic <= input_depth - 8; ic += 8)
+      {
+        // Load the filters.
+        int16x8_t filter[2];
+        int8x8x2_t filter_s8;
+        filter_s8.val[0] = vld1_s8(local_filter_ptr);
+        filter_s8.val[1] = vld1_s8(local_filter_ptr + 8);
+        local_filter_ptr += 16;
+        for (int i = 0; i < 2; i++)
+        {
+          filter[i] = vmovl_s8(filter_s8.val[i]);
+        }
+        // Load the inputs, add input_offset, duplicate 2-fold.
+        const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+        local_input_ptr += 8;
+        const int16x8_t input_s16 = vmovl_s8(input_s8);
+        const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+        const int16x8x2_t input_dup2 = vzipq_s16(input, input);
+        // Load the accumulators from acc_buffer.
+        int32x4x2_t acc[2];
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i].val[0] = vld1q_s32(acc_buffer_ptr + 4 * i);
+          acc[i].val[1] = vld1q_s32(acc_buffer_ptr + 4 * i + 8);
+        }
+        // Multiply-accumulate.
+        for (int j = 0; j < 2; j++)
+        {
+          acc[0].val[j] =
+            vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j]));
+          acc[1].val[j] =
+            vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j]));
+        }
+        // Store the accumulators back to acc_buffer.
+        for (int i = 0; i < 2; i++)
+        {
+          vst1q_s32(acc_buffer_ptr + 4 * i, acc[i].val[0]);
+          vst1q_s32(acc_buffer_ptr + 4 * i + 8, acc[i].val[1]);
+        }
+        acc_buffer_ptr += 16;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        // Load the inputs.
+        const int16_t input_val = *local_input_ptr++ + input_offset;
+        for (int i = 0; i < 2; i++)
+        {
+          *acc_buffer_ptr++ += static_cast<int32_t>(local_filter_ptr[i]) * input_val;
+        }
+        local_filter_ptr += 2;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 0, 1>
+{
+  static void Run(int num_output_pixels, int input_depth, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      const int8_t *local_filter_ptr = filter_ptr;
+      const int8_t *local_input_ptr = input_ptr;
+      int ic = 0;
+      // Handle 16 input channels at a time.
+      for (; ic <= input_depth - 16; ic += 16)
+      {
+        // Load the filters.
+        int8x8_t filter_s8_0 = vld1_s8(local_filter_ptr + 8 * 0);
+        int8x8_t filter_s8_1 = vld1_s8(local_filter_ptr + 8 * 1);
+        local_filter_ptr += 16;
+        int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+        int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+        // Load the inputs, add input_offset.
+        int8x8_t input_s8_0 = vld1_s8(local_input_ptr + 8 * 0);
+        int8x8_t input_s8_1 = vld1_s8(local_input_ptr + 8 * 1);
+        local_input_ptr += 16;
+        int16x8_t input_0 = vmovl_s8(input_s8_0);
+        int16x8_t input_1 = vmovl_s8(input_s8_1);
+        input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset));
+        input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset));
+        // Load the accumulators from acc_buffer
+        int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+        int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+        int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+        int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+        acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), vget_low_s16(filter_0));
+        acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), vget_high_s16(filter_0));
+        acc_2 = vmlal_s16(acc_2, vget_low_s16(input_1), vget_low_s16(filter_1));
+        acc_3 = vmlal_s16(acc_3, vget_high_s16(input_1), vget_high_s16(filter_1));
+        // Store the accumulators back to acc_buffer
+        vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+        vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+        vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+        vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+        acc_buffer_ptr += 16;
+      }
+      // Handle 8 input channels at a time.
+      for (; ic <= input_depth - 8; ic += 8)
+      {
+        // Load the filters.
+        const int8x8_t filter_s8 = vld1_s8(local_filter_ptr);
+        local_filter_ptr += 8;
+        const int16x8_t filter = vmovl_s8(filter_s8);
+        // Load the inputs, add input_offset.
+        const int8x8_t input_s8 = vld1_s8(local_input_ptr);
+        local_input_ptr += 8;
+        const int16x8_t input_s16 = vmovl_s8(input_s8);
+        const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+        // Load the accumulators from acc_buffer
+        int32x4_t acc[2];
+        for (int i = 0; i < 2; i++)
+        {
+          acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+        }
+        // Multiply-accumulate
+        acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter));
+        acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter));
+        // Store the accumulators back to acc_buffer
+        for (int i = 0; i < 2; i++)
+        {
+          vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+        }
+        acc_buffer_ptr += 8;
+      }
+      // Handle one input channel at a time.
+      for (; ic < input_depth; ic++)
+      {
+        const int16_t input_val = *local_input_ptr++ + input_offset;
+        const int16_t filter_val = *local_filter_ptr++;
+        *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+      }
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 16, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter_s8[i] = vld1_s8(filter_ptr + 8 * i);
+    }
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vmovl_s8(filter_s8[i]);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input_s8[i] = vld1_s8(input_ptr + 8 * i);
+      }
+      input_ptr += input_ptr_increment;
+      int16x8_t input[2];
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vmovl_s8(input_s8[i]);
+      }
+      for (int i = 0; i < 2; i++)
+      {
+        input[i] = vaddq_s16(input[i], vdupq_n_s16(input_offset));
+      }
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(input[i]), vget_low_s16(filter[i]));
+        acc[2 * i + 1] =
+          vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i]));
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 8, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+    const int16x8_t filter = vmovl_s8(filter_s8);
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs, add input_offset.
+      const int8x8_t input_s8 = vld1_s8(input_ptr);
+      const int16x8_t input_s16 = vmovl_s8(input_s8);
+      const int16x8_t input = vaddq_s16(input_s16, vdupq_n_s16(input_offset));
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      acc[0] = vmlal_s16(acc[0], vget_low_s16(input), vget_low_s16(filter));
+      acc[1] = vmlal_s16(acc[1], vget_high_s16(input), vget_high_s16(filter));
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 16>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter_s8[i] = vld1_s8(filter_ptr + 8 * i);
+    }
+    int16x8_t filter[2];
+    for (int i = 0; i < 2; i++)
+    {
+      filter[i] = vmovl_s8(filter_s8[i]);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      int8_t input_s8 = *input_ptr;
+      input_ptr += input_ptr_increment;
+      int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[4];
+      for (int i = 0; i < 4; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      for (int i = 0; i < 2; i++)
+      {
+        acc[2 * i + 0] = vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input);
+        acc[2 * i + 1] = vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input);
+      }
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 4; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 16;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 32>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8_0 = vld1_s8(filter_ptr + 8 * 0);
+    int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 8 * 1);
+    int8x8_t filter_s8_2 = vld1_s8(filter_ptr + 8 * 2);
+    int8x8_t filter_s8_3 = vld1_s8(filter_ptr + 8 * 3);
+    int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+    int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+    int16x8_t filter_2 = vmovl_s8(filter_s8_2);
+    int16x8_t filter_3 = vmovl_s8(filter_s8_3);
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      int8_t input_s8 = *input_ptr;
+      input_ptr += input_ptr_increment;
+      int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+      // Load the accumulators from acc_buffer
+      int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+      int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+      int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+      int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+      int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4);
+      int32x4_t acc_5 = vld1q_s32(acc_buffer_ptr + 4 * 5);
+      int32x4_t acc_6 = vld1q_s32(acc_buffer_ptr + 4 * 6);
+      int32x4_t acc_7 = vld1q_s32(acc_buffer_ptr + 4 * 7);
+      // Multiply-accumulate
+      acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
+      acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
+      acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
+      acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
+      acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input);
+      acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input);
+      acc_6 = vmlal_n_s16(acc_6, vget_low_s16(filter_3), input);
+      acc_7 = vmlal_n_s16(acc_7, vget_high_s16(filter_3), input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+      vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+      vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+      vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+      vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4);
+      vst1q_s32(acc_buffer_ptr + 4 * 5, acc_5);
+      vst1q_s32(acc_buffer_ptr + 4 * 6, acc_6);
+      vst1q_s32(acc_buffer_ptr + 4 * 7, acc_7);
+      acc_buffer_ptr += 32;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 20>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    // NEON wants to load 8 bytes at a time, but 20 is not divisible by 8.
+    // We load the first 16 bytes into filter_s8_{0,1} as usual.
+    // Then we load the 8 last bytes into filter_s8_x  (x for 'extra').
+    // This is redundant: the first 4 bytes of filter_s8_x are the same
+    // as the last 4 bytes of filter_s8_x.
+    int8x8_t filter_s8_0 = vld1_s8(filter_ptr + 8 * 0);
+    int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 8 * 1);
+    int8x8_t filter_s8_x = vld1_s8(filter_ptr + 8 * 1 + 4);
+    int16x8_t filter_0 = vmovl_s8(filter_s8_0);
+    int16x8_t filter_1 = vmovl_s8(filter_s8_1);
+    int16x8_t filter_x = vmovl_s8(filter_s8_x);
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      int8_t input_s8 = *input_ptr;
+      input_ptr += input_ptr_increment;
+      int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+      // Load the accumulators from acc_buffer
+      int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+      int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+      int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+      int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3);
+      int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4);
+      // Multiply-accumulate
+      acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
+      acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
+      acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
+      acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
+      acc_4 = vmlal_n_s16(acc_4, vget_high_s16(filter_x), input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+      vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+      vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+      vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3);
+      vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4);
+      acc_buffer_ptr += 20;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 1, 8>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    const int8x8_t filter_s8 = vld1_s8(filter_ptr);
+    const int16x8_t filter = vmovl_s8(filter_s8);
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      int8_t input_s8 = *input_ptr;
+      input_ptr += input_ptr_increment;
+      int16_t input = static_cast<int16_t>(input_s8 + input_offset);
+      // Load the accumulators from acc_buffer
+      int32x4_t acc[2];
+      for (int i = 0; i < 2; i++)
+      {
+        acc[i] = vld1q_s32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate
+      acc[0] = vmlal_n_s16(acc[0], vget_low_s16(filter), input);
+      acc[1] = vmlal_n_s16(acc[1], vget_high_s16(filter), input);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 2; i++)
+      {
+        vst1q_s32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 8;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 2, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+
+    // Handle 2 output pixels at a time.
+    for (; outp <= num_output_pixels - 2; outp += 2)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x4_t acc = vld1q_s32(acc_buffer_ptr);
+      // Load the inputs, add input_offset.
+      int16x4_t input_s16 = vdup_n_s16(0);
+      input_s16 = vset_lane_s16((reinterpret_cast<const int16_t *>(input_ptr))[0], input_s16, 0);
+      input_ptr += input_ptr_increment;
+      input_s16 = vset_lane_s16((reinterpret_cast<const int16_t *>(input_ptr))[0], input_s16, 1);
+      input_ptr += input_ptr_increment;
+      input_s16 = vget_low_s16(vmovl_s8(vreinterpret_s8_s16(input_s16)));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc = vmlal_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer.
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+
+    // Handle 1 output pixel at a time.
+    for (; outp < num_output_pixels; outp++)
+    {
+      // Load the accumulators from acc_buffer.
+      int32x2_t acc = vld1_s32(acc_buffer_ptr);
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vdup_n_s8(0);
+      input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+      input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+      input_ptr += input_ptr_increment;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+
+      // Multiply-accumulate.
+      acc = vget_low_s32(vmlal_s16(vcombine_s32(acc, acc), filter, input));
+      // Store the accumulators back to acc_buffer.
+      vst1_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 2;
+    }
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<true, 4, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    if (num_output_pixels <= 0)
+    {
+      return;
+    }
+
+    // Load the filters.
+    int8x8_t filter_s8 = vdup_n_s8(0);
+    filter_s8 = vset_lane_s8(filter_ptr[0], filter_s8, 0);
+    filter_s8 = vset_lane_s8(filter_ptr[1], filter_s8, 1);
+    filter_s8 = vset_lane_s8(filter_ptr[2], filter_s8, 2);
+    filter_s8 = vset_lane_s8(filter_ptr[3], filter_s8, 3);
+    const int16x4_t filter = vget_low_s16(vmovl_s8(filter_s8));
+
+    int outp = 0;
+
+    // Handle one output pixel at a time until second to the last pixel. Second
+    // to the last because we read eight input pixels while only processing
+    // four.
+    for (; outp < num_output_pixels - 1; outp++)
+    {
+      // Load the accumulators from acc_buffer
+      int32x4_t acc;
+      acc = vld1q_s32(acc_buffer_ptr);
+
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8 = vld1_s8(input_ptr);
+      input_ptr += input_ptr_increment;
+      const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+      const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+      // Multiply-accumulate
+      acc = vmlal_s16(acc, filter, input);
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr, acc);
+      acc_buffer_ptr += 4;
+    }
+
+    // Handle the last output pixel.
+    // Load the accumulators from acc_buffer
+    int32x4_t acc;
+    acc = vld1q_s32(acc_buffer_ptr);
+
+    // Load the inputs, add input_offset.
+    int8x8_t input_s8 = vdup_n_s8(0);
+    input_s8 = vset_lane_s8(input_ptr[0], input_s8, 0);
+    input_s8 = vset_lane_s8(input_ptr[1], input_s8, 1);
+    input_s8 = vset_lane_s8(input_ptr[2], input_s8, 2);
+    input_s8 = vset_lane_s8(input_ptr[3], input_s8, 3);
+    const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
+    const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
+    // Multiply-accumulate
+    acc = vmlal_s16(acc, filter, input);
+    // Store the accumulators back to acc_buffer
+    vst1q_s32(acc_buffer_ptr, acc);
+  }
+};
+
+template <> struct QuantizedDepthwiseConvKernel<false, 12, 1>
+{
+  static void Run(int num_output_pixels, int /* input_depth */, int /* depth_multiplier */,
+                  const int8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+                  const int8_t *filter_ptr, int32_t *acc_buffer_ptr)
+  {
+    // Load the filters.
+    int8x8_t filter_s8_0 = vld1_s8(filter_ptr);
+    int8x8_t filter_s8_1 = vld1_s8(filter_ptr + 4);
+    int16x8_t filter_s16_0 = vmovl_s8(filter_s8_0);
+    int16x8_t filter_s16_1 = vmovl_s8(filter_s8_1);
+    int16x4_t filter_0 = vget_low_s16(filter_s16_0);
+    int16x4_t filter_1 = vget_high_s16(filter_s16_0);
+    int16x4_t filter_2 = vget_high_s16(filter_s16_1);
+
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++)
+    {
+      // Load the inputs, add input_offset.
+      int8x8_t input_s8_0 = vld1_s8(input_ptr);
+      int8x8_t input_s8_1 = vld1_s8(input_ptr + 4);
+      input_ptr += input_ptr_increment;
+      int16x8_t input_0 = vmovl_s8(input_s8_0);
+      int16x8_t input_1 = vmovl_s8(input_s8_1);
+      input_0 = vaddq_s16(input_0, vdupq_n_s16(input_offset));
+      input_1 = vaddq_s16(input_1, vdupq_n_s16(input_offset));
+
+      // Load the accumulators from acc_buffer
+      int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
+      int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
+      int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2);
+
+      // Multiply-accumulate
+      acc_0 = vmlal_s16(acc_0, vget_low_s16(input_0), filter_0);
+      acc_1 = vmlal_s16(acc_1, vget_high_s16(input_0), filter_1);
+      acc_2 = vmlal_s16(acc_2, vget_high_s16(input_1), filter_2);
+
+      // Store the accumulators back to acc_buffer
+      vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0);
+      vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1);
+      vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2);
+
+      acc_buffer_ptr += 12;
+    }
+  }
+};
+#endif
+
+// Accumulates the effect of one row of the filter, on a segment of one row
+// of the output, accessing the corresponding one row of the input.
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, int input_depth,
+                                    int input_width, const int8_t *input_data, int16_t input_offset,
+                                    int pad_width, int depth_multiplier, int filter_width,
+                                    const int8_t *filter_data, int out_x_buffer_start,
+                                    int out_x_buffer_end, int output_depth, int32_t *acc_buffer)
+{
+  // Consistency check parameters. This is important in particular to ensure
+  // that we keep the number of template instantiations minimal, so we don't
+  // increase binary size unnecessarily.
+  static_assert(kFixedDepthMultiplier || !kFixedInputDepth, "");
+  static_assert(kFixedInputDepth || kAllowStrided, "");
+  assert(stride == 1 || kAllowStrided);
+  if (kFixedInputDepth)
+  {
+    assert(input_depth == kFixedInputDepth);
+  }
+  if (kFixedDepthMultiplier)
+  {
+    assert(depth_multiplier == kFixedDepthMultiplier);
+  }
+  assert(output_depth == input_depth * depth_multiplier);
+  const int input_ptr_increment = stride * input_depth;
+  const int8_t *filter_base_ptr = filter_data;
+  for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+  {
+    // For the current (filter_x, filter_y) point in the filter,
+    // compute the boundaries of the corresponding output row segment.
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
+    if (kAllowStrided)
+    {
+      if (stride == 2)
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 1) / 2;
+        out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
+      }
+      else if (stride == 4)
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 3) / 4;
+        out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
+      }
+      else
+      {
+        out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+        out_x_loop_end_unclamped =
+          (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
+      }
+    }
+    else
+    {
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped = pad_width + input_width - dilation_factor * filter_x;
+    }
+    // The kernel will have to iterate on the segment of the
+    // output row that starts at out_x_loop_start and out_x_loop_end.
+    const int out_x_loop_start = std::max(out_x_buffer_start, out_x_loop_start_unclamped);
+    const int out_x_loop_end = std::min(out_x_buffer_end, out_x_loop_end_unclamped);
+
+    int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+    const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+    const int8_t *input_ptr = input_data + in_x_origin * input_depth;
+    const int num_output_pixels = out_x_loop_end - out_x_loop_start;
+    QuantizedDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
+      num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset,
+      input_ptr_increment, filter_base_ptr, acc_buffer_ptr);
+    filter_base_ptr += output_depth;
+  }
+}
+
+// generic fallback of DepthwiseConvAccumRow, portable, non-templatized.
+inline void QuantizedDepthwiseConvAccumRowGeneric(int stride, int dilation_factor, int input_depth,
+                                                  int input_width, const int8_t *input_data,
+                                                  int16_t input_offset, int pad_width,
+                                                  int depth_multiplier, int filter_width,
+                                                  const int8_t *filter_data, int out_x_buffer_start,
+                                                  int out_x_buffer_end, int output_depth,
+                                                  int32_t *acc_buffer)
+{
+  const int8_t *filter_base_ptr = filter_data;
+  for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+  {
+    const int out_x_loop_start =
+      std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+    const int out_x_loop_end =
+      std::min(out_x_buffer_end,
+               (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
+
+    int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+    const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+    const int8_t *input_ptr = input_data + in_x_origin * input_depth;
+    const int input_ptr_increment = (stride - 1) * input_depth;
+    for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++)
+    {
+      const int8_t *filter_ptr = filter_base_ptr;
+      for (int ic = 0; ic < input_depth; ++ic)
+      {
+        const int16_t input_val = *input_ptr++ + input_offset;
+        for (int m = 0; m < depth_multiplier; m++)
+        {
+          const int16_t filter_val = *filter_ptr++;
+          *acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
+        }
+      }
+      input_ptr += input_ptr_increment;
+    }
+    filter_base_ptr += output_depth;
+  }
+}
+
+// Initializes the accumulator buffer with bias values.
+inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
+                                       const int32_t *bias_data, int32_t *acc_buffer)
+{
+  int i = 0;
+#ifdef USE_NEON
+  if (output_depth == 1)
+  {
+    const int32x4_t b = vdupq_n_s32(bias_data[0]);
+    for (; i <= num_output_pixels - 16; i += 16)
+    {
+      vst1q_s32(acc_buffer + i + 0, b);
+      vst1q_s32(acc_buffer + i + 4, b);
+      vst1q_s32(acc_buffer + i + 8, b);
+      vst1q_s32(acc_buffer + i + 12, b);
+    }
+    for (; i <= num_output_pixels - 4; i += 4)
+    {
+      vst1q_s32(acc_buffer + i, b);
+    }
+  }
+  else if (output_depth == 2)
+  {
+    int32x4_t b = vdupq_n_s32(bias_data[0]);
+    b = vsetq_lane_s32(bias_data[1], b, 1);
+    b = vsetq_lane_s32(bias_data[1], b, 3);
+    for (; i <= num_output_pixels - 8; i += 8)
+    {
+      vst1q_s32(acc_buffer + 2 * i + 0, b);
+      vst1q_s32(acc_buffer + 2 * i + 4, b);
+      vst1q_s32(acc_buffer + 2 * i + 8, b);
+      vst1q_s32(acc_buffer + 2 * i + 12, b);
+    }
+    for (; i <= num_output_pixels - 2; i += 2)
+    {
+      vst1q_s32(acc_buffer + 2 * i, b);
+    }
+  }
+  else if (output_depth == 4)
+  {
+    const int32x4_t b = vld1q_s32(bias_data);
+    for (; i <= num_output_pixels - 4; i += 4)
+    {
+      vst1q_s32(acc_buffer + 4 * i + 0, b);
+      vst1q_s32(acc_buffer + 4 * i + 4, b);
+      vst1q_s32(acc_buffer + 4 * i + 8, b);
+      vst1q_s32(acc_buffer + 4 * i + 12, b);
+    }
+    for (; i < num_output_pixels; i++)
+    {
+      vst1q_s32(acc_buffer + 4 * i, b);
+    }
+  }
+  else if (output_depth == 8)
+  {
+    const int32x4_t b0 = vld1q_s32(bias_data);
+    const int32x4_t b1 = vld1q_s32(bias_data + 4);
+    for (; i <= num_output_pixels - 2; i += 2)
+    {
+      vst1q_s32(acc_buffer + 8 * i + 0, b0);
+      vst1q_s32(acc_buffer + 8 * i + 4, b1);
+      vst1q_s32(acc_buffer + 8 * i + 8, b0);
+      vst1q_s32(acc_buffer + 8 * i + 12, b1);
+    }
+    for (; i < num_output_pixels; i++)
+    {
+      vst1q_s32(acc_buffer + 8 * i + 0, b0);
+      vst1q_s32(acc_buffer + 8 * i + 4, b1);
+    }
+  }
+  else if (output_depth == 16)
+  {
+    const int32x4_t b0 = vld1q_s32(bias_data);
+    const int32x4_t b1 = vld1q_s32(bias_data + 4);
+    const int32x4_t b2 = vld1q_s32(bias_data + 8);
+    const int32x4_t b3 = vld1q_s32(bias_data + 12);
+    for (; i < num_output_pixels; i++)
+    {
+      vst1q_s32(acc_buffer + 16 * i + 0, b0);
+      vst1q_s32(acc_buffer + 16 * i + 4, b1);
+      vst1q_s32(acc_buffer + 16 * i + 8, b2);
+      vst1q_s32(acc_buffer + 16 * i + 12, b3);
+    }
+  }
+#endif
+  for (; i < num_output_pixels; i++)
+  {
+    memcpy(acc_buffer + i * output_depth, bias_data, sizeof(acc_buffer[0]) * output_depth);
+  }
+}
+
+inline void DepthwiseConvGeneral(const DepthwiseConvParams &params,
+                                 const int32_t *output_multiplier, const int32_t *output_shift,
+                                 const Shape &input_shape, const int8_t *input_data,
+                                 const Shape &filter_shape, const int8_t *filter_data,
+                                 const Shape & /* bias_shape */, const int32_t *bias_data,
+                                 const Shape &output_shape, int8_t *output_data, int thread_start,
+                                 int thread_end, int thread_dim)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  const int32_t input_offset = params.input_offset;
+  const int32_t output_offset = params.output_offset;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_rows = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+
+  static const int kAccBufferMaxSize = 2048;
+  int32_t acc_buffer[kAccBufferMaxSize];
+  assert(kAccBufferMaxSize >= output_depth);
+  const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth;
+  const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth;
+  UNUSED_RELEASE(kAccBufferActualSize);
+  assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize);
+  assert(kAccBufferActualSize <= kAccBufferMaxSize);
+  assert(kOutputPixelsInAccBuffer >= 1);
+  assert(thread_dim == 0 || thread_dim == 1);
+
+  // row_accum_func will point to the core accumulation function to be used
+  // for this DepthwiseConv op.
+  using row_accum_func_t = decltype(&QuantizedDepthwiseConvAccumRowGeneric);
+  row_accum_func_t row_accum_func = nullptr;
+
+#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER) \
+  if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) &&                                  \
+      (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) &&                             \
+      depth_multiplier == FIXED_DEPTH_MULTIPLIER)                                                 \
+  {                                                                                               \
+    row_accum_func =                                                                              \
+      QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>;   \
+  }
+
+#ifdef USE_NEON
+  // We go over our list of kernels by decreasing order of preference
+  // for the cases where multiple kernels could apply.
+
+  // Start with the fastest kernels: AllowStrided=false, fixed input depth.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 1, 4)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 4, 4)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 8)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(false, 12, 1)
+
+  // Next come the strided kernels: AllowStrided=true, fixed input depth.
+  // They are a bit less efficient, but allow stride!=1.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 16, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 16)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
+
+  // Finally, the kernels allowing a variable input depth,
+  // these are the least efficient but most general kernels.
+
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 3)
+#endif // USE_NEON
+
+  // No matching fast kernel found, use slow fallback.
+  if (!row_accum_func)
+  {
+    row_accum_func = QuantizedDepthwiseConvAccumRowGeneric;
+  }
+
+#undef TFMINI_USE_DEPTHWISECONV_KERNEL
+
+  const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+  const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+  const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
+  // Now that we have determined row_accum_func, we can start work.
+  int batch_start = 0;
+  int batch_end = batches;
+  int row_start = 0;
+  int row_end = output_rows;
+  int output_ptr_offset = 0;
+
+  switch (thread_dim)
+  {
+    case 0:
+      assert(thread_start >= 0);
+      assert(thread_end <= batches);
+      batch_start = thread_start;
+      batch_end = thread_end;
+      output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0);
+      break;
+    case 1:
+      assert(thread_start >= 0);
+      assert(thread_end <= output_rows);
+      row_start = thread_start;
+      row_end = thread_end;
+      output_ptr_offset = row_start * output_width * output_depth;
+      break;
+  }
+
+  int8_t *output_ptr = output_data + output_ptr_offset;
+  int batch_step = (output_rows + row_start - row_end) * output_width * output_depth;
+  for (int b = batch_start; b < batch_end; ++b)
+  {
+    for (int out_y = row_start; out_y < row_end; ++out_y)
+    {
+      const int in_y_origin = (out_y * stride_height) - pad_height;
+      const int filter_y_start =
+        std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
+      const int filter_y_end =
+        std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
+                                  dilation_height_factor);
+      for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
+           out_x_buffer_start += kOutputPixelsInAccBuffer)
+      {
+        const int out_x_buffer_end =
+          std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+        // We call a 'pixel' a group of activation that share all but the
+        // 'depth'/'channel' coordinate. num_output_pixels is the number of
+        // output pixels that we will accumulate in this loop iteration.
+        const int num_output_pixels = out_x_buffer_end - out_x_buffer_start;
+        // Initialize our local accumulator with the bias values, so we don't
+        // have to add them later.
+        DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, acc_buffer);
+        // Accumulation loop. Most of the time should be spent in here.
+        for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+        {
+          const int in_y = in_y_origin + dilation_height_factor * filter_y;
+          row_accum_func(stride_width, dilation_width_factor, input_depth, input_width,
+                         input_data + in_y * input_height_stride + b * input_batch_stride,
+                         input_offset, pad_width, depth_multiplier, filter_width,
+                         filter_data + filter_y * filter_height_stride, out_x_buffer_start,
+                         out_x_buffer_end, output_depth, acc_buffer);
+        }
+        // Finished accumulating int32_t values. Now need to convert them to
+        // the final 8bit form and store them.
+        const int num_output_values = output_depth * num_output_pixels;
+
+        Quantize(output_multiplier, output_shift, output_depth, num_output_values, output_offset,
+                 output_activation_min, output_activation_max, acc_buffer, output_ptr);
+
+        output_ptr += num_output_values;
+      }
+    }
+    output_ptr += batch_step;
+  }
+}
+
+} // namespace depthwise_conv
+
+template <DepthwiseConvOutputRounding kOutputRounding>
+inline void DepthwiseConvWithRounding(const DepthwiseConvParams &params,
+                                      const int32_t *output_multiplier, const int32_t *output_shift,
+                                      const Shape &input_shape, const int8_t *input_data,
+                                      const Shape &filter_shape, const int8_t *filter_data,
+                                      const Shape &bias_shape, const int32_t *bias_data,
+                                      const Shape &output_shape, int8_t *output_data,
+                                      int thread_start, int thread_end, int thread_dim)
+{
+  const int depth_multiplier = params.depth_multiplier;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  UNUSED_RELEASE(depth_multiplier);
+  UNUSED_RELEASE(dilation_width_factor);
+  UNUSED_RELEASE(dilation_height_factor);
+  assert(dilation_width_factor >= 1);
+  assert(dilation_height_factor >= 1);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_depth = input_shape.Dims(3);
+  UNUSED_RELEASE(output_depth);
+  UNUSED_RELEASE(input_depth);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_shape.FlatSize() == output_depth);
+
+//  TODO Use below codes
+#if 0
+// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
+// Jetson TX-2. This compiler does not support the offsetof() macro.
+#if defined(__aarch64__) && !defined(GOOGLE_L4T)
+#if defined(__ANDROID__) && defined(__clang__)
+  CpuFlags cpu_flags;
+  GetCpuFlags(&cpu_flags);
+  const bool has_dot_product_instructions = cpu_flags.neon_dotprod;
+
+  // Dispatch to dot-product 3x3 kernels when supported.
+  if (has_dot_product_instructions)
+  {
+    using optimized_ops::depthwise_conv::DotProduct3x3KernelType;
+    DotProduct3x3KernelType kernel_type = optimized_ops::depthwise_conv::CategorizeDotProductKernel<
+      optimized_ops::depthwise_conv::QuantizationType::kPerChannelInt8>(
+      input_shape, filter_shape, output_shape, params, output_shift);
+    if (kernel_type != DotProduct3x3KernelType::kNone)
+    {
+      DepthwiseConvParams params_copy = params;
+      params_copy.output_shift_per_channel = output_shift;
+      params_copy.output_multiplier_per_channel = output_multiplier;
+      optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3PerChannel<
+        DepthwiseConvImplementation::kUseNeon3x3DotProduct>(
+        params_copy, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+        output_shape, output_data, thread_start, thread_end, thread_dim);
+      return;
+    }
+  }
+
+#endif
+  // Dispatch to non-dot-product 3x3 kernels when supported.
+
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+
+  // Call kernel optimized for depthwise convolutions using 3x3 filters if
+  // parameters are supported.
+  if (optimized_ops::depthwise_conv::Fast3x3FilterKernelSupported<
+        optimized_ops::depthwise_conv::QuantizationType::kPerChannelInt8>(
+        input_shape, filter_shape, stride_width, stride_height, dilation_width_factor,
+        dilation_height_factor, pad_width, pad_height, depth_multiplier, output_shape, 0,
+        output_shift))
+  {
+    optimized_ops::depthwise_conv::DepthwiseConv3x3FilterPerChannel<
+      DepthwiseConvOutputRounding::kUpward>(
+      params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+      bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+    return;
+  }
+#endif
+
+#endif /* end of if 0 */
+
+  depthwise_conv::DepthwiseConvGeneral(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+}
+
+inline void DepthwiseConvImpl(const DepthwiseConvParams &params, const int32_t *output_multiplier,
+                              const int32_t *output_shift, const Shape &input_shape,
+                              const int8_t *input_data, const Shape &filter_shape,
+                              const int8_t *filter_data, const Shape &bias_shape,
+                              const int32_t *bias_data, const Shape &output_shape,
+                              int8_t *output_data, int thread_start, int thread_end, int thread_dim)
+{
+  return DepthwiseConvWithRounding<DepthwiseConvOutputRounding::kAwayFromZero>(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+}
+
+template <typename T, typename TS> struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task
+{
+  DepthwiseConvWorkerTask(const DepthwiseConvParams &params, const int32_t *output_multiplier,
+                          const int32_t *output_shift, const Shape &input_shape,
+                          const T *input_data, const Shape &filter_shape, const T *filter_data,
+                          const Shape &bias_shape, const TS *bias_data, const Shape &output_shape,
+                          T *output_data, int thread_start, int thread_end, int thread_dim)
+    : params_(params), output_multiplier_(output_multiplier), output_shift_(output_shift),
+      input_shape_(input_shape), input_data_(input_data), filter_shape_(filter_shape),
+      filter_data_(filter_data), bias_shape_(bias_shape), bias_data_(bias_data),
+      output_shape_(output_shape), output_data_(output_data), thread_start_(thread_start),
+      thread_end_(thread_end), thread_dim_(thread_dim)
+  {
+  }
+
+  void Run() override
+  {
+    DepthwiseConvImpl(params_, output_multiplier_, output_shift_, input_shape_, input_data_,
+                      filter_shape_, filter_data_, bias_shape_, bias_data_, output_shape_,
+                      output_data_, thread_start_, thread_end_, thread_dim_);
+  }
+
+private:
+  const DepthwiseConvParams &params_;
+  const int32_t *output_multiplier_;
+  const int32_t *output_shift_;
+  const Shape &input_shape_;
+  const T *input_data_;
+  const Shape &filter_shape_;
+  const T *filter_data_;
+  const Shape &bias_shape_;
+  const TS *bias_data_;
+  const Shape &output_shape_;
+  T *output_data_;
+  int thread_start_;
+  int thread_end_;
+  int thread_dim_;
+};
+
+inline int HowManyConvThreads(const Shape &output_shape, const Shape &filter_shape, int thread_dim)
+{
+  constexpr int kMinMulPerThread = 8;
+  const int output_units = output_shape.Dims(thread_dim);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int num_mul_per_unit =
+    FlatSizeSkipDim(output_shape, thread_dim) * filter_height * filter_width;
+  const int min_units_per_thread = kMinMulPerThread / num_mul_per_unit + 1;
+  int thread_count = output_units / min_units_per_thread;
+  return thread_count;
+}
+
+inline void DepthwiseConvPerChannel(const DepthwiseConvParams &params,
+                                    const int32_t *output_multiplier, const int32_t *output_shift,
+                                    const Shape &input_shape, const int8_t *input_data,
+                                    const Shape &filter_shape, const int8_t *filter_data,
+                                    const Shape &bias_shape, const int32_t *bias_data,
+                                    const Shape &output_shape, int8_t *output_data,
+                                    ruy::Context *ruy_context)
+{
+  UNUSED_ALL(params, output_multiplier, output_shift, input_shape, input_data, filter_shape,
+             filter_data, bias_shape, bias_data, output_shape, output_data, ruy_context);
+
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  const int output_batches = output_shape.Dims(0);
+  const int output_rows = output_shape.Dims(1);
+  int thread_count_batch = HowManyConvThreads(output_shape, filter_shape, 0);
+  int thread_count_row = HowManyConvThreads(output_shape, filter_shape, 1);
+  int thread_dim, thread_count, thread_dim_size;
+  if (thread_count_batch > thread_count_row)
+  {
+    thread_dim = 0;
+    thread_dim_size = output_batches;
+    thread_count = thread_count_batch;
+  }
+  else
+  {
+    thread_dim = 1;
+    thread_dim_size = output_rows;
+    thread_count = thread_count_row;
+  }
+
+  // NOTE Borrow RuyContext to get max_num_threads setting
+  // TODO Define and use max_num_threads for CPU backend
+  const int max_threads = ruy_context->max_num_threads();
+  thread_count = std::max(1, std::min(thread_count, max_threads));
+
+  if (thread_count == 1)
+  {
+    DepthwiseConvImpl(params, output_multiplier, output_shift, input_shape, input_data,
+                      filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data,
+                      /*thread_start=*/0,
+                      /*thread_end=*/output_rows, /*thread_dim=*/1);
+  }
+  else
+  {
+    std::vector<DepthwiseConvWorkerTask<int8_t, int32_t>> tasks;
+    // TODO(b/131746020) don't create new heap allocations every time.
+    // At least we make it a single heap allocation by using reserve().
+    tasks.reserve(thread_count);
+    int thread_start = 0;
+    for (int i = 0; i < thread_count; ++i)
+    {
+      int thread_end = thread_start + (thread_dim_size - thread_start) / (thread_count - i);
+      tasks.emplace_back(params, output_multiplier, output_shift, input_shape, input_data,
+                         filter_shape, filter_data, bias_shape, bias_data, output_shape,
+                         output_data, thread_start, thread_end, thread_dim);
+      thread_start = thread_end;
+    }
+    cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), ruy_context);
+  }
+}
+
+} // namespace optimized_integer_ops
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_INT8_H__
diff --git a/compute/cker/include/cker/operation/reference/BatchMatMul.h b/compute/cker/include/cker/operation/reference/BatchMatMul.h
index e8ffd4014..1b3020de2 100644
--- a/compute/cker/include/cker/operation/reference/BatchMatMul.h
+++ b/compute/cker/include/cker/operation/reference/BatchMatMul.h
@@ -87,9 +87,8 @@ inline void BatchMatMul(const Shape &lhs_shape, const float *lhs_data, const Sha
       {
         const float *lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
         const float *rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
-        float *out_ptr =
-            output_data +
-            ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * lhs_rows * rhs_cols;
+        float *out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
+                                         lhs_rows * rhs_cols;
         for (int j = 0; j < rhs_cols; ++j)
         {
           for (int i = 0; i < lhs_rows; ++i)
diff --git a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
index f7e39248c..96e1d9127 100644
--- a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
@@ -56,28 +56,22 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
   const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
   for (int i = 0; i < size; i++)
   {
-    output_data[i] =
-        ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
-                                     params.float_activation_min, params.float_activation_max);
+    output_data[i] = ActivationFunctionWithMinMax(
+      fn(input1_data[i], input2_data[i]), params.float_activation_min, params.float_activation_max);
   }
 }
 
 template <typename T>
-inline void BroadcastBinaryArithmeticOpSlowQuant8(
-    const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
-    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
-    const std::function<T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)
+inline typename std::enable_if_t<is_quant8<T>::value> BroadcastBinaryArithmeticOpSlow(
+  const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
+  const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
+  const std::function<T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)
 {
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
   const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
 
-  if ((params.quantized_activation_min < 0) && (params.quantized_activation_max > 255))
-  {
-    throw std::runtime_error{"Support only for Quant8."};
-  }
-
   // Comment from tensorflow lite:
   //
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -99,11 +93,10 @@ inline void BroadcastBinaryArithmeticOpSlowQuant8(
       {
         for (int c = 0; c < extended_output_shape.Dims(3); ++c)
         {
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              ActivationFunctionWithMinMax<uint8_t>(
-                  fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
-                     input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
-                  params.quantized_activation_min, params.quantized_activation_max);
+          output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
+            fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+               input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+            params.quantized_activation_min, params.quantized_activation_max);
         }
       }
     }
@@ -143,9 +136,9 @@ inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &param
         for (int c = 0; c < extended_output_shape.Dims(3); ++c)
         {
           output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
-              fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
-                 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
-              params.quantized_activation_min, params.quantized_activation_max);
+            fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+               input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+            params.quantized_activation_min, params.quantized_activation_max);
         }
       }
     }
@@ -154,9 +147,9 @@ inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &param
 
 template <>
 inline void BroadcastBinaryArithmeticOpSlow(
-    const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float *input1_data,
-    const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
-    float *output_data, const std::function<float(const float &, const float &)> &fn)
+  const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float *input1_data,
+  const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
+  float *output_data, const std::function<float(const float &, const float &)> &fn)
 {
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
@@ -171,10 +164,10 @@ inline void BroadcastBinaryArithmeticOpSlow(
       {
         for (int c = 0; c < extended_output_shape.Dims(3); ++c)
         {
-          output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax(
-              fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
-                 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
-              params.float_activation_min, params.float_activation_max);
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+            ActivationFunctionWithMinMax(fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+                                            input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+                                         params.float_activation_min, params.float_activation_max);
         }
       }
     }
diff --git a/compute/cker/include/cker/operation/reference/Conv.h b/compute/cker/include/cker/operation/reference/Conv.h
index 86e8b5143..e316083a5 100644
--- a/compute/cker/include/cker/operation/reference/Conv.h
+++ b/compute/cker/include/cker/operation/reference/Conv.h
@@ -98,8 +98,8 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const float
             bias_value = bias_data[out_channel];
           }
           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              ActivationFunctionWithMinMax(total + bias_value, output_activation_min,
-                                           output_activation_max);
+            ActivationFunctionWithMinMax(total + bias_value, output_activation_min,
+                                         output_activation_max);
         }
       }
     }
@@ -183,7 +183,213 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
           acc = std::max(acc, output_activation_min);
           acc = std::min(acc, output_activation_max);
           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<uint8_t>(acc);
+            static_cast<uint8_t>(acc);
+        }
+      }
+    }
+  }
+}
+
+template <typename T, bool is_asymmetric>
+inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
+                 const int32_t *output_shift, const Shape &input_shape, const T *input_data,
+                 const Shape &filter_shape, const T *filter_data, const int32_t *filter_zeropoint,
+                 const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape,
+                 T *output_data)
+
+{
+  UNUSED_RELEASE(bias_shape);
+  // Get parameters.
+  const int32_t input_offset = params.input_offset; // r = s(q - Z)
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int32_t output_offset = params.output_offset;
+
+  // Set min and max value of the output.
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+
+  // Consistency check.
+  assert(output_activation_min < output_activation_max);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (bias_data)
+  {
+    assert(bias_shape.FlatSize() == output_depth);
+  }
+
+  // Check dimensions of the tensors.
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      const int in_y_origin = (out_y * stride_height) - pad_height;
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        const int in_x_origin = (out_x * stride_width) - pad_width;
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+        {
+          int32_t acc = 0;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            const int in_y = in_y_origin + dilation_height_factor * filter_y;
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+
+              // Zero padding by omitting the areas outside the image.
+              const bool is_point_inside_image =
+                (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+
+              if (!is_point_inside_image)
+              {
+                continue;
+              }
+
+              for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+              {
+                const T input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+                const T filter_val =
+                  filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
+                if (is_asymmetric)
+                {
+                  const int32_t filter_offset = -filter_zeropoint[out_channel];
+                  acc += (filter_val + filter_offset) * (input_val + input_offset);
+                }
+                else
+                {
+                  // Accumulate with 32 bits accumulator.
+                  // In the nudging process during model quantization, we force
+                  // real value of 0.0 be represented by a quantized value. This
+                  // guarantees that the input_offset is a int8_t, even though
+                  // it is represented using int32_t. int32_t += int8_t *
+                  // (int8_t - int8_t) so the highest value we can get from each
+                  // accumulation is [-127, 127] * ([-128, 127] -
+                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
+                  // = 14.98, which means we can accumulate at least 2^16
+                  // multiplications without overflow. The accumulator is
+                  // applied to a filter so the accumulation logic will hold as
+                  // long as the filter size (filter_y * filter_x * in_channel)
+                  // does not exceed 2^16, which is the case in all the models
+                  // we have seen so far.
+                  // TODO(jianlijianli): Add a check to make sure the
+                  // accumulator depth is smaller than 2^16.
+                  acc += filter_val * (input_val + input_offset);
+                  UNUSED_RELEASE(filter_zeropoint);
+                }
+              }
+            }
+          }
+
+          if (bias_data)
+          {
+            acc += bias_data[out_channel];
+          }
+          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_channel],
+                                              output_shift[out_channel]);
+          acc += output_offset;
+          acc = std::max(acc, output_activation_min);
+          acc = std::min(acc, output_activation_max);
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = static_cast<T>(acc);
+        }
+      }
+    }
+  }
+}
+
+// Slightly modified from tflite 2.13.0 HybridConvPerChannel
+// im2col and im2col_shape are removed since it is not used in reference kernel.
+inline void HybridConvPerChannel(const ConvParams &params, float *scaling_factors_ptr,
+                                 const Shape &input_shape, const int8_t *input_data,
+                                 const Shape &filter_shape, const int8_t *filter_data,
+                                 const Shape &bias_shape, const float *bias_data,
+                                 const Shape &output_shape, float *output_data,
+                                 const float *per_channel_scale, const int32_t *input_offset)
+
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = input_shape.Dims(3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (bias_data)
+  {
+    assert(bias_shape.FlatSize() == output_depth);
+    UNUSED_RELEASE(bias_shape);
+  }
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  const int groups = input_depth / filter_input_depth;
+  assert(input_depth % filter_input_depth == 0);
+  const int filters_per_group = output_depth / groups;
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+        {
+          auto group = out_channel / filters_per_group;
+          const int in_x_origin = (out_x * stride_width) - pad_width;
+          const int in_y_origin = (out_y * stride_height) - pad_height;
+          int32_t acc = 0;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // If the location is outside the bounds of the input image,
+                // use zero as a default value.
+                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+                {
+                  int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x,
+                                                        in_channel + group * filter_input_depth)];
+                  int32_t filter_val =
+                    filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
+                  acc += filter_val * (input_val - input_offset[batch]);
+                }
+              }
+            }
+          }
+          float acc_float = acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
+          if (bias_data)
+          {
+            acc_float += bias_data[out_channel];
+          }
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+            ActivationFunctionWithMinMax(acc_float, output_activation_min, output_activation_max);
         }
       }
     }
diff --git a/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvHybrid.h b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvHybrid.h
new file mode 100644
index 000000000..9fc58ad3b
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvHybrid.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_HYBRID_H__
+#define __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_HYBRID_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference_integer_ops
+{
+
+inline void DepthwiseConvHybridPerChannel(const DepthwiseConvParams &params,
+                                          float *scaling_factors_ptr, const Shape &input_shape,
+                                          const int8_t *input_data, const Shape &filter_shape,
+                                          const int8_t *filter_data, const Shape &bias_shape,
+                                          const float *bias_data, const Shape &output_shape,
+                                          float *output_data, const float *per_channel_scale,
+                                          int32_t *input_offset)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+
+  // Check dimensions of the tensors.
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int bias_depth = bias_shape.FlatSize();
+  UNUSED_RELEASE(output_depth);
+  UNUSED_RELEASE(bias_shape);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_depth == output_depth);
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          for (int m = 0; m < depth_multiplier; ++m)
+          {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - pad_width;
+            const int in_y_origin = (out_y * stride_height) - pad_height;
+            int32_t acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                if (is_point_inside_image)
+                {
+                  int32_t input_val =
+                    input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+                  int32_t filter_val =
+                    filter_data[Offset(filter_shape, 0, filter_y, filter_x, output_channel)];
+                  acc += filter_val * (input_val - input_offset[batch]);
+                }
+              }
+            }
+            float acc_float = static_cast<float>(acc);
+            acc_float *= per_channel_scale[output_channel] * scaling_factors_ptr[batch];
+            if (bias_data && output_channel < bias_depth)
+            {
+              acc_float += bias_data[output_channel];
+            }
+            output_data[Offset(output_shape, batch, out_y, out_x, output_channel)] =
+              ActivationFunctionWithMinMax(acc_float, output_activation_min, output_activation_max);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace reference_integer_ops
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_HYBRID_H__
diff --git a/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h
new file mode 100644
index 000000000..025e40705
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
+#define __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference_integer_ops
+{
+inline void DepthwiseConvPerChannel(const DepthwiseConvParams &params,
+                                    const int32_t *output_multiplier, const int32_t *output_shift,
+                                    const Shape &input_shape, const uint8_t *input_data,
+                                    const Shape &filter_shape, const uint8_t *filter_data,
+                                    const int32_t *filter_zeropoint, const Shape &bias_shape,
+                                    const int32_t *bias_data, const Shape &output_shape,
+                                    uint8_t *output_data)
+{
+  // Get parameters.
+  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const int32_t input_offset = params.input_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+
+  // Check dimensions of the tensors.
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  assert(output_activation_min <= output_activation_max);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  UNUSED_RELEASE(output_depth);
+  UNUSED_RELEASE(bias_shape);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_shape.FlatSize() == output_depth);
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          for (int m = 0; m < depth_multiplier; ++m)
+          {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - pad_width;
+            const int in_y_origin = (out_y * stride_height) - pad_height;
+            int32_t acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                if (is_point_inside_image)
+                {
+                  uint8_t input_val =
+                    input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+                  uint8_t filter_val =
+                    filter_data[Offset(filter_shape, 0, filter_y, filter_x, output_channel)];
+
+                  // { for per-channel
+                  // NOTE: The following comment is copied from tflite int8 implementation
+                  //       It may not be 100% true for uint8 per-channel.
+                  //
+                  // Accumulate with 32 bits accumulator.
+                  // In the nudging process during model quantization, we force
+                  // real value of 0.0 be represented by a quantized value. This
+                  // guarantees that the input_offset is a int8, even though it
+                  // is represented using int32_t.
+                  // int32 += int8 * (int8 - int8) so the highest value we can
+                  // get from each accumulation is [-127, 127] * ([-128, 127] -
+                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
+                  // = 14.98, which means we can accumulate at least 2^16
+                  // multiplications without overflow. The accumulator is
+                  // applied to a filter so the accumulation logic will hold as
+                  // long as the filter size (filter_y * filter_x * in_channel)
+                  // does not exceed 2^16, which is the case in all the models
+                  // we have seen so far.
+                  // TODO(jianlijianli): Add a check to make sure the
+                  // accumulator depth is smaller than 2^16.
+                  const int32_t filter_offset = -filter_zeropoint[output_channel];
+                  acc += (filter_val + filter_offset) * (input_val + input_offset);
+                  // } for per-channel
+                }
+              }
+            }
+            if (bias_data)
+            {
+              acc += bias_data[output_channel];
+            }
+            acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[output_channel],
+                                                output_shift[output_channel]);
+            acc += output_offset;
+            acc = std::max(acc, output_activation_min);
+            acc = std::min(acc, output_activation_max);
+            // For q8u per-channel, int8_t -> uint8_t
+            output_data[Offset(output_shape, batch, out_y, out_x, output_channel)] =
+              static_cast<uint8_t>(acc);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace reference_integer_ops
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
diff --git a/compute/cker/include/cker/ruy/RuySupport.h b/compute/cker/include/cker/ruy/RuySupport.h
index 9612dd517..14489a804 100644
--- a/compute/cker/include/cker/ruy/RuySupport.h
+++ b/compute/cker/include/cker/ruy/RuySupport.h
@@ -19,7 +19,9 @@
 #define __NNFW_CKER_RUY_RUY_SUPPORT_H__
 
 #include <util/ConfigSource.h>
-#include <ruy/context.h>
+#include <ruy/matrix.h>
+#include <ruy/ruy.h>
+#include <cassert>
 #include "cker/Types.h"
 
 namespace nnfw
@@ -29,44 +31,66 @@ namespace cker
 namespace ruy_support
 {
 
+inline ruy::CachePolicy ToRuyCachePolicy(CachePolicy cache_policy)
+{
+  switch (cache_policy)
+  {
+    case CachePolicy::kNeverCache:
+      return ruy::CachePolicy::kNeverCache;
+    case CachePolicy::kCacheIfLargeSpeedup:
+      return ruy::CachePolicy::kCacheIfLargeSpeedup;
+    case CachePolicy::kAlwaysCache:
+      return ruy::CachePolicy::kAlwaysCache;
+    default:
+      assert(false);
+      return ruy::CachePolicy::kNeverCache;
+  }
+}
+
 template <typename Scalar, typename DataPointer>
 void MakeRuyMatrix(const MatrixParams<Scalar> &params, DataPointer data_ptr,
-                   ruy::Matrix<Scalar> *dst)
+                   ruy::Matrix<Scalar> *dst, bool use_caching = false)
 {
-  dst->layout.rows = params.rows;
-  dst->layout.cols = params.cols;
-  if (params.order == Order::kColMajor)
+  ruy::Order ruy_order =
+    params.order == Order::kColMajor ? ruy::Order::kColMajor : ruy::Order::kRowMajor;
+  ruy::MakeSimpleLayout(params.rows, params.cols, ruy_order, dst->mutable_layout());
+  // Note that ruy::Matrix::data is a ConstCheckingPtr, not a plain pointer.
+  // It does care whether we assign to it a Scalar* or a const Scalar*.
+  dst->set_data(data_ptr);
+  dst->set_zero_point(params.zero_point);
+  if (use_caching)
   {
-    dst->layout.order = ruy::Order::kColMajor;
-    dst->layout.stride = params.rows;
+    dst->set_cache_policy(ToRuyCachePolicy(params.cache_policy));
   }
-  else
+}
+
+// Integer-quantized case with destination type narrower than int32
+template <typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, DstScalar, quantization_flavor> &params,
+                      ruy::MulParams<std::int32_t, DstScalar> *ruy_mul_params)
+{
+  static_assert(sizeof(DstScalar) < sizeof(std::int32_t), "");
+  if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier)
   {
-    dst->layout.order = ruy::Order::kRowMajor;
-    dst->layout.stride = params.cols;
+    ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
+    ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
   }
-  // Note that ruy::Matrix::data is a ConstCheckingPtr, not a plain pointer.
-  // It does care whether we assign to it a Scalar* or a const Scalar*.
-  dst->data = data_ptr;
-  dst->zero_point = params.zero_point;
-  dst->cacheable = params.cacheable;
+  if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier)
+  {
+    ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
+    ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+  }
+  ruy_mul_params->set_bias(params.bias);
+  ruy_mul_params->set_clamp_min(params.clamp_min);
+  ruy_mul_params->set_clamp_max(params.clamp_max);
 }
 
-template <typename GemmParamsType, typename RuySpecType>
-void MakeRuySpec(const GemmParamsType &params, RuySpecType *ruy_spec)
+// Raw-integer case with destination type int32.
+template <QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, std::int32_t, quantization_flavor> &params,
+                      ruy::MulParams<std::int32_t, std::int32_t> *ruy_mul_params)
 {
-  // This validation has already been performed by the Gemm API entry point,
-  // but it doesn't hurt to test specifically this again here, where it's
-  // being used.
-  ValidateGemmParams(params);
-
-  ruy_spec->multiplier_fixedpoint = params.multiplier_fixedpoint;
-  ruy_spec->multiplier_exponent = params.multiplier_exponent;
-  ruy_spec->multiplier_fixedpoint_perchannel = params.multiplier_fixedpoint_perchannel;
-  ruy_spec->multiplier_exponent_perchannel = params.multiplier_exponent_perchannel;
-  ruy_spec->bias = params.bias;
-  ruy_spec->clamp_min = params.clamp_min;
-  ruy_spec->clamp_max = params.clamp_max;
+  ruy_mul_params->set_bias(params.bias);
 }
 
 } // namespace ruy_support
diff --git a/compute/cker/include/cker/train/operation/FullyConnected.h b/compute/cker/include/cker/train/operation/FullyConnected.h
new file mode 100644
index 000000000..b0255d287
--- /dev/null
+++ b/compute/cker/include/cker/train/operation/FullyConnected.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_TRAIN_OPERATION_FULLY_CONNECTED_H__
+#define __NNFW_CKER_TRAIN_OPERATION_FULLY_CONNECTED_H__
+
+#include "cker/eigen/Utils.h"
+#include "cker/Shape.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace train
+{
+
+template <typename T>
+inline void FullyConnectedBiasGrad(const Shape &incomming_shape, const T *incomming_data,
+                                   const Shape &grad_shape, T *grad_data)
+{
+  const auto bias_size = grad_shape.FlatSize();
+  if (bias_size != incomming_shape.Dims(incomming_shape.DimensionsCount() - 1) ||
+      bias_size != grad_shape.Dims(0))
+    throw std::runtime_error("cker::FullyConnectedBiasGrad: Unmatched shape");
+
+  const auto in_mat = MapAsMatrixWithLastDimAsRows(incomming_data, incomming_shape);
+  auto grad_mat = MapAsMatrixWithLastDimAsRows(grad_data, grad_shape);
+
+  grad_mat = in_mat.rowwise().sum();
+}
+
+} // namespace train
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_FULLY_CONNECTED_H__
diff --git a/compute/cker/include/cker/train/operation/Loss.h b/compute/cker/include/cker/train/operation/Loss.h
new file mode 100644
index 000000000..94f49ff07
--- /dev/null
+++ b/compute/cker/include/cker/train/operation/Loss.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_TRAIN_OPERATION_LOSS_H__
+#define __NNFW_CKER_TRAIN_OPERATION_LOSS_H__
+
+#include "cker/Shape.h"
+#include "cker/eigen/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace train
+{
+
+template <typename T>
+inline void MSE(const Shape &y_pred_shape, const T *y_pred_data, const Shape &y_true_shape,
+                const T *y_true_data, const Shape &output_shape, T *output_data)
+{
+  // TODO Consider Reduction
+  if (output_shape != Shape{1})
+    throw std::runtime_error("cker::MSE: output_shape != Shape{1}");
+  if (y_pred_shape != y_true_shape)
+    throw std::runtime_error("cker::MSE: y_pred_shape != y_true_shape");
+
+  const auto y_pred = MapAsMatrixWithLastDimAsRows(y_pred_data, y_pred_shape);
+  const auto y_true = MapAsMatrixWithLastDimAsRows(y_true_data, y_true_shape);
+
+  double squared_sum = 0.0f;
+  for (size_t c = 0; c < (size_t)y_pred.cols(); ++c)
+  {
+    for (size_t r = 0; r < (size_t)y_pred.rows(); ++r)
+    {
+      double error = y_pred.coeff(r, c) - y_true.coeff(r, c);
+      squared_sum += (error * error);
+    }
+  }
+
+  auto size = y_pred.cols() * y_pred.rows();
+  output_data[0] = (T)(squared_sum / size);
+}
+
+template <typename T>
+inline void MSEGrad(const Shape &y_pred_shape, const T *y_pred_data, const Shape &y_true_shape,
+                    const T *y_true_data, const Shape &grad_shape, T *grad_data)
+{
+  if (y_pred_shape != y_true_shape)
+    throw std::runtime_error("cker::MSEGrad: y_pred_shape != y_true_shape");
+  if (y_pred_shape != grad_shape)
+    throw std::runtime_error("cker::MSEGrad: y_pred_shape != grad_shape");
+
+  const int size = grad_shape.FlatSize();
+  for (int i = 0; i < size; ++i)
+  {
+    grad_data[i] = static_cast<T>(-2 * (y_true_data[i] - y_pred_data[i]) / size);
+  }
+}
+
+} // namespace train
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_TRAIN_OPERATION_LOSS_H__
diff --git a/compute/cker/include/cker/train/operation/ReLU.h b/compute/cker/include/cker/train/operation/ReLU.h
new file mode 100644
index 000000000..32cf7fa9c
--- /dev/null
+++ b/compute/cker/include/cker/train/operation/ReLU.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_TRAIN_OPERATION_RELU_H__
+#define __NNFW_CKER_TRAIN_OPERATION_RELU_H__
+
+#include "cker/Shape.h"
+#include "cker/eigen/Utils.h"
+
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace train
+{
+
+inline void ReLUGrad(const Shape &output_shape, const float *output_data,
+                     const Shape &incoming_shape, const float *incoming_data,
+                     const Shape &grad_shape, float *grad_data)
+{
+  const auto output_map = MapAsVector(output_data, output_shape);
+  const auto incoming_map = MapAsVector(incoming_data, incoming_shape);
+  auto grad_map = MapAsVector(grad_data, grad_shape);
+
+  if (output_shape == incoming_shape && output_shape == grad_shape)
+    grad_map.array() = incoming_map.array() * (output_map.array() > 0.0f).template cast<float>();
+  else
+    throw std::runtime_error("cker::ReLUGrad: Unsupported shape");
+}
+
+} // namespace train
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_TRAIN_OPERATION_RELU_H__
diff --git a/compute/cker/src/Range.test.cc b/compute/cker/src/Range.test.cc
new file mode 100644
index 000000000..e5fe4801f
--- /dev/null
+++ b/compute/cker/src/Range.test.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/Range.h>
+
+#include <gtest/gtest.h>
+#include <vector>
+
+TEST(CKer_Operation, Range)
+{
+  {
+    const int start = 0;
+    const int limit = 10;
+    const int delta = 1;
+    std::vector<int> actual(10);
+    nnfw::cker::Range<int>(&start, &limit, &delta, actual.data());
+
+    for (int i = 0; i < actual.size(); i++)
+      ASSERT_EQ(actual[i], i);
+  }
+
+  {
+    const int start = 3;
+    const int limit = 18;
+    const int delta = 3;
+    std::vector<int> expected = {3, 6, 9, 12, 15};
+    std::vector<int> actual(expected.size());
+    nnfw::cker::Range<int>(&start, &limit, &delta, actual.data());
+
+    for (int i = 0; i < actual.size(); i++)
+      ASSERT_EQ(actual[i], expected[i]);
+  }
+
+  {
+    const float start = 3;
+    const float limit = 1;
+    const float delta = -0.5;
+    std::vector<float> expected = {3, 2.5, 2, 1.5};
+    std::vector<float> actual(expected.size());
+    nnfw::cker::Range<float>(&start, &limit, &delta, actual.data());
+
+    for (int i = 0; i < actual.size(); i++)
+      ASSERT_FLOAT_EQ(actual[i], expected[i]);
+  }
+}
+
+TEST(CKer_Operation, neg_Range)
+{
+  {
+    const int start = 212;
+    const int limit = 10;
+    const int delta = 1;
+    std::vector<int> actual(10);
+
+    EXPECT_ANY_THROW(nnfw::cker::Range<int>(&start, &limit, &delta, actual.data()));
+  }
+}
diff --git a/compute/cker/src/train/FullyConnected.test.cc b/compute/cker/src/train/FullyConnected.test.cc
new file mode 100644
index 000000000..37c2d4a97
--- /dev/null
+++ b/compute/cker/src/train/FullyConnected.test.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/train/operation/FullyConnected.h>
+
+#include <gtest/gtest.h>
+#include <vector>
+
+TEST(CKer_Operation, FullyConnectedBiasGrad)
+{
+  {
+    // Shape: {2, 4}
+    std::vector<float> incoming_backward = {-1, 2, -3, 4, 5, -6, -7, 8};
+    // Shape: {4}
+    std::vector<float> expected_bias_backward = {4, -4, -10, 12};
+    std::vector<float> bias_backward(4);
+
+    nnfw::cker::train::FullyConnectedBiasGrad(
+      nnfw::cker::Shape{2, 4}, incoming_backward.data(),
+      nnfw::cker::Shape{static_cast<int>(bias_backward.size())}, bias_backward.data());
+
+    for (size_t i = 0; i < bias_backward.size(); ++i)
+      ASSERT_EQ(bias_backward[i], expected_bias_backward[i]);
+  }
+
+  {
+    // Shape: {3, 3}
+    std::vector<float> incoming_backward = {-1, 2, -3, 4, 5, -6, -7, 8, 9};
+    // Shape: {3}
+    std::vector<float> expected_bias_backward = {-4, 15, 0};
+    std::vector<float> bias_backward(3);
+
+    nnfw::cker::train::FullyConnectedBiasGrad(
+      nnfw::cker::Shape{3, 3}, incoming_backward.data(),
+      nnfw::cker::Shape{static_cast<int>(bias_backward.size())}, bias_backward.data());
+
+    for (size_t i = 0; i < bias_backward.size(); ++i)
+      ASSERT_EQ(bias_backward[i], expected_bias_backward[i]);
+  }
+
+  {
+    // Shape: {1, 2, 2, 3}
+    std::vector<float> incoming_backward = {-1, 2, -3, 4, 5, -6, -7, 8, 9, -10, -11, 12};
+    // Shape: {3}
+    std::vector<float> expected_bias_backward = {-14, 4, 12};
+    std::vector<float> bias_backward(3);
+
+    nnfw::cker::train::FullyConnectedBiasGrad(
+      nnfw::cker::Shape{1, 2, 2, 3}, incoming_backward.data(),
+      nnfw::cker::Shape{static_cast<int>(bias_backward.size())}, bias_backward.data());
+
+    for (size_t i = 0; i < bias_backward.size(); ++i)
+      ASSERT_EQ(bias_backward[i], expected_bias_backward[i]);
+  }
+}
+
+TEST(CKer_Operation, neg_FullyConnectedBiasGrad)
+{
+  {
+    // Unmatched shape
+    // Shape: {2, 4}
+    std::vector<float> incoming_backward = {-1, 2, -3, 4, 5, -6, -7, 8};
+    // Shape: {3}
+    std::vector<float> bias_backward(3);
+    EXPECT_ANY_THROW(nnfw::cker::train::FullyConnectedBiasGrad(
+                       nnfw::cker::Shape{2, 4}, incoming_backward.data(),
+                       nnfw::cker::Shape{static_cast<int>(bias_backward.size())},
+                       bias_backward.data()););
+  }
+}
diff --git a/compute/cker/src/train/Loss.test.cc b/compute/cker/src/train/Loss.test.cc
new file mode 100644
index 000000000..98568f47a
--- /dev/null
+++ b/compute/cker/src/train/Loss.test.cc
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/train/operation/Loss.h>
+
+#include <gtest/gtest.h>
+#include <vector>
+
+TEST(CKer_Operation, LossMSE)
+{
+  {
+    // Shape: {1, 10} -> m_rows:10, m_cols:1
+    std::vector<int> y_pred = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+    std::vector<int> y_true = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<int> output(1);
+    std::vector<int> expected = {1};
+
+    nnfw::cker::train::MSE(nnfw::cker::Shape{1, 10}, y_pred.data(), nnfw::cker::Shape{1, 10},
+                           y_true.data(), nnfw::cker::Shape{1}, output.data());
+
+    EXPECT_EQ(output[0], expected[0]);
+  }
+
+  {
+    // Shape: {1, 10} -> m_rows:10, m_cols:1
+    std::vector<float> y_pred = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.};
+    std::vector<float> y_true = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9.};
+    std::vector<float> output(1);
+    std::vector<float> expected = {1.0};
+
+    nnfw::cker::train::MSE(nnfw::cker::Shape{1, 10}, y_pred.data(), nnfw::cker::Shape{1, 10},
+                           y_true.data(), nnfw::cker::Shape{1}, output.data());
+
+    EXPECT_FLOAT_EQ(output[0], expected[0]);
+  }
+
+  {
+    // Shape: {2, 3} -> m_rows:3, m_cols:2
+    std::vector<float> y_pred = {27.2, 31.8, 51.9, 10.2, 34.2, 12.4};
+    std::vector<float> y_true = {31.3, 40.3, 29.7, 12.9, 25.8, 11.9};
+    std::vector<float> output(1);
+    std::vector<float> expected = {110.0};
+
+    nnfw::cker::train::MSE(nnfw::cker::Shape{2, 3}, y_pred.data(), nnfw::cker::Shape{2, 3},
+                           y_true.data(), nnfw::cker::Shape{1}, output.data());
+
+    EXPECT_FLOAT_EQ(output[0], expected[0]);
+  }
+
+  {
+    // Shape: {2, 3, 4} -> m_rows:4, m_cols:6
+    std::vector<float> y_pred = {1., 2., 3., 4., 1., 2., 3., 4., 1., 2., 3., 4.,
+                                 1., 2., 3., 4., 1., 2., 3., 4., 1., 2., 3., 4.};
+    std::vector<float> y_true = {1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.,
+                                 1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.};
+    std::vector<float> output(1);
+    std::vector<float> expected = {2.1666667};
+
+    nnfw::cker::train::MSE(nnfw::cker::Shape{2, 3, 4}, y_pred.data(), nnfw::cker::Shape{2, 3, 4},
+                           y_true.data(), nnfw::cker::Shape{1}, output.data());
+
+    EXPECT_FLOAT_EQ(output[0], expected[0]);
+  }
+}
+
+TEST(CKer_Operation, neg_LossMSE)
+{
+  {
+    // Invalid expected value
+    std::vector<float> y_pred = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.};
+    std::vector<float> y_true = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9.};
+    std::vector<float> output(1);
+    std::vector<float> expected = {-1.0};
+
+    nnfw::cker::train::MSE(nnfw::cker::Shape{2, 3, 4}, y_pred.data(), nnfw::cker::Shape{2, 3, 4},
+                           y_true.data(), nnfw::cker::Shape{1}, output.data());
+
+    EXPECT_NE(output[0], expected[0]);
+  }
+
+  {
+    // Invalid output shape
+    std::vector<float> y_pred = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.};
+    std::vector<float> y_true = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9.};
+    std::vector<float> output(3);
+    std::vector<float> expected = {1.0};
+
+    EXPECT_ANY_THROW(nnfw::cker::train::MSE(nnfw::cker::Shape{2, 3, 4}, y_pred.data(),
+                                            nnfw::cker::Shape{2, 3, 4}, y_true.data(),
+                                            nnfw::cker::Shape{3}, output.data()));
+  }
+
+  {
+    // Different y_pread and y_true shape
+    std::vector<float> y_pred = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.};
+    std::vector<float> y_true = {0., 1., 2., 3., 4., 5.};
+    std::vector<float> output(1);
+    std::vector<float> expected = {1.0};
+
+    EXPECT_ANY_THROW(nnfw::cker::train::MSE(nnfw::cker::Shape{2, 3, 4}, y_pred.data(),
+                                            nnfw::cker::Shape{2, 3}, y_true.data(),
+                                            nnfw::cker::Shape{1}, output.data()));
+  }
+}
+
+TEST(CKer_Operation, LossMSEGrad)
+{
+  {
+    // Shape: {1, 10} -> m_rows:10, m_cols:1
+    std::vector<int> y_pred = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+    std::vector<int> y_true = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<int> deriv_y_pred(10);
+    std::vector<int> expected = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+    nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(), nnfw::cker::Shape{1, 10},
+                               y_true.data(), nnfw::cker::Shape{1, 10}, deriv_y_pred.data());
+
+    for (size_t i = 0; i < deriv_y_pred.size(); ++i)
+      EXPECT_EQ(deriv_y_pred[i], expected[i]);
+  }
+
+  {
+    // Shape: {1, 10} -> m_rows:10, m_cols:1
+    std::vector<float> y_pred = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.};
+    std::vector<float> y_true = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9.};
+    std::vector<float> deriv_y_pred(10);
+    std::vector<float> expected = {0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2};
+
+    nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(), nnfw::cker::Shape{1, 10},
+                               y_true.data(), nnfw::cker::Shape{1, 10}, deriv_y_pred.data());
+
+    for (size_t i = 0; i < deriv_y_pred.size(); ++i)
+      EXPECT_FLOAT_EQ(deriv_y_pred[i], expected[i]);
+  }
+
+  {
+    // Shape: {2, 3} -> m_rows:3, m_cols:2
+    std::vector<float> y_pred = {27.2, 31.8, 51.9, 10.2, 34.2, 12.4};
+    std::vector<float> y_true = {31.3, 40.3, 29.7, 12.9, 25.8, 11.9};
+    std::vector<float> deriv_y_pred(6);
+    std::vector<float> expected = {-1.3666667, -2.8333333, 7.4, -0.9, 2.8, 0.1666667};
+
+    nnfw::cker::train::MSEGrad(nnfw::cker::Shape{2, 3}, y_pred.data(), nnfw::cker::Shape{2, 3},
+                               y_true.data(), nnfw::cker::Shape{2, 3}, deriv_y_pred.data());
+
+    for (size_t i = 0; i < deriv_y_pred.size(); ++i)
+      EXPECT_FLOAT_EQ(deriv_y_pred[i], expected[i]);
+  }
+}
+
+TEST(CKer_Operation, neg_LossMSEGrad)
+{
+  {
+    // Invalid expected value
+    std::vector<float> y_pred = {27.2, 31.8, 51.9, 10.2, 34.2, 12.4};
+    std::vector<float> y_true = {31.3, 40.3, 29.7, 12.9, 25.8, 11.9};
+    std::vector<float> deriv_y_pred(6);
+    std::vector<float> expected = {1., 1., 1., 1., 1., 1.};
+
+    nnfw::cker::train::MSEGrad(nnfw::cker::Shape{2, 3}, y_pred.data(), nnfw::cker::Shape{2, 3},
+                               y_true.data(), nnfw::cker::Shape{2, 3}, deriv_y_pred.data());
+
+    for (size_t i = 0; i < deriv_y_pred.size(); ++i)
+      EXPECT_NE(deriv_y_pred[i], expected[i]);
+  }
+
+  {
+    // Different y_pred and y_true shape
+    std::vector<float> y_pred = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.};
+    std::vector<float> y_true = {0., 1., 2., 3., 4., 5.};
+    std::vector<float> deriv_y_pred(10);
+
+    EXPECT_ANY_THROW(nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(),
+                                                nnfw::cker::Shape{2, 3}, y_true.data(),
+                                                nnfw::cker::Shape{1, 10}, deriv_y_pred.data()));
+  }
+
+  {
+    // Different y_pred and deriv_y_pred shape
+    std::vector<float> y_pred = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.};
+    std::vector<float> y_true = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9.};
+    std::vector<float> deriv_y_pred(6);
+
+    EXPECT_ANY_THROW(nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(),
+                                                nnfw::cker::Shape{1, 10}, y_true.data(),
+                                                nnfw::cker::Shape{2, 3}, deriv_y_pred.data()));
+  }
+}
diff --git a/compute/cker/src/train/Relu.test.cc b/compute/cker/src/train/Relu.test.cc
new file mode 100644
index 000000000..d94411038
--- /dev/null
+++ b/compute/cker/src/train/Relu.test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cker/operation/ReLU.h>
+#include <cker/train/operation/ReLU.h>
+
+#include <gtest/gtest.h>
+#include <vector>
+
+namespace
+{
+
+template <typename T> class ReluOpVerifier
+{
+public:
+  ReluOpVerifier(const std::vector<T> &input, const std::vector<T> &expected_output,
+                 const std::vector<T> &backprop_output,
+                 const std::vector<T> &expected_backprop_input)
+    : _input{input}, _expected_output{expected_output}, _backprop_output{backprop_output},
+      _expected_backprop_input{expected_backprop_input}
+  {
+    EXPECT_TRUE(input.size() == expected_output.size());
+    _output.resize(_expected_output.size());
+    _backprop_input.resize(_expected_backprop_input.size());
+  }
+
+public:
+  void verifyExpected()
+  {
+    nnfw::cker::ReLU(nnfw::cker::Shape{static_cast<int>(_input.size())}, _input.data(),
+                     nnfw::cker::Shape{static_cast<int>(_output.size())}, _output.data());
+
+    for (size_t i = 0; i < _output.size(); ++i)
+      ASSERT_EQ(_output[i], _expected_output[i]);
+
+    if (_backprop_output.size() > 0)
+    {
+      nnfw::cker::train::ReLUGrad(
+        nnfw::cker::Shape{static_cast<int>(_output.size())}, _output.data(),
+        nnfw::cker::Shape{static_cast<int>(_backprop_output.size())}, _backprop_output.data(),
+        nnfw::cker::Shape{static_cast<int>(_backprop_input.size())}, _backprop_input.data());
+
+      for (size_t i = 0; i < _backprop_input.size(); ++i)
+        ASSERT_EQ(_backprop_input[i], _expected_backprop_input[i]);
+    }
+  }
+
+private:
+  std::vector<T> _input;
+  std::vector<T> _output;
+  std::vector<T> _expected_output;
+  std::vector<T> _backprop_output;
+  std::vector<T> _backprop_input;
+  std::vector<T> _expected_backprop_input;
+};
+
+} // namespace
+
+TEST(CKer_Operation, ReLU)
+{
+  {
+    std::vector<float> input_forward = {-1, 2, 3, -4};
+    std::vector<float> expected_forward = {0, 2, 3, 0};
+    std::vector<float> incoming_backward = {-5, 6, -7, 8};
+    std::vector<float> expected_backward = {0, 6, -7, 0};
+    ReluOpVerifier<float> verifier{input_forward, expected_forward, incoming_backward,
+                                   expected_backward};
+    verifier.verifyExpected();
+  }
+
+  {
+    std::vector<float> input_forward = {0, -1, 2, 3, -4, 5, 6, -7};
+    std::vector<float> expected_forward = {0, 0, 2, 3, 0, 5, 6, 0};
+    std::vector<float> incoming_backward = {8, -9, 10, 11, -12, -13, 14, -15};
+    std::vector<float> expected_backward = {0, 0, 10, 11, 0, -13, 14, 0};
+    ReluOpVerifier<float> verifier{input_forward, expected_forward, incoming_backward,
+                                   expected_backward};
+    verifier.verifyExpected();
+  }
+}
+
+TEST(CKer_Operation, neg_ReLU)
+{
+  {
+    // Unmatched shape
+    std::vector<float> input_forward = {0, -1, 2, 3, -4};
+    std::vector<float> expected_forward = {0, 0, 2, 3, 0};
+    std::vector<float> incoming_backward = {-5, 6, -7, 8};
+    std::vector<float> expected_backward = {0, 6, -7, 0};
+    ReluOpVerifier<float> verifier{input_forward, expected_forward, incoming_backward,
+                                   expected_backward};
+    EXPECT_ANY_THROW(verifier.verifyExpected());
+  }
+}
diff --git a/compute/ruy/CMakeLists.txt b/compute/ruy/CMakeLists.txt
new file mode 100644
index 000000000..d98ee1cd6
--- /dev/null
+++ b/compute/ruy/CMakeLists.txt
@@ -0,0 +1,11 @@
+nnfw_find_package(Ruy REQUIRED)
+
+add_library(nnfw_lib_ruy INTERFACE)
+target_link_libraries(nnfw_lib_ruy INTERFACE ruy)
+target_link_libraries(nnfw_lib_ruy INTERFACE ruy_instrumentation)
+target_compile_definitions(nnfw_lib_ruy INTERFACE USE_RUY_GEMV)
+if(PROFILE_RUY)
+  target_link_libraries(nnfw_lib_ruy INTERFACE ruy_profiler)
+endif(PROFILE_RUY)
+
+target_include_directories(nnfw_lib_ruy INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
diff --git a/compute/ruy/include/ruy/NeonTensorUtils.h b/compute/ruy/include/ruy/NeonTensorUtils.h
new file mode 100644
index 000000000..fb8b0a363
--- /dev/null
+++ b/compute/ruy/include/ruy/NeonTensorUtils.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_NEON_TENSOR_UTILS_H__
+#define __NNFW_RUY_NEON_TENSOR_UTILS_H__
+
+#include "ruy/neon/neon_check.h"
+
+#ifdef USE_NEON
+
+#define kFloatWeightsPerNeonLane 4
+
+namespace nnfw
+{
+namespace ruy
+{
+
+inline bool NeonIsZeroVector(const float *vector, int v_size)
+{
+  // If v_size is not divisible by kFloatWeightsPerNeonLane, we cannot
+  // use the main vectorized loop, and we need to process sequentially.
+  // postamble_start shows the start index where this should happen.
+  const int postamble_start = v_size - (v_size & (kFloatWeightsPerNeonLane - 1));
+
+  const float32x4_t zero_x4_float = vmovq_n_f32(0.0f);
+  for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane)
+  {
+    const float32x4_t i_x4_float = vld1q_f32(vector + v);
+    uint32x4_t cmp_result = vceqq_f32(i_x4_float, zero_x4_float);
+    if (vgetq_lane_u32(cmp_result, 0) == 0)
+      return false;
+    if (vgetq_lane_u32(cmp_result, 1) == 0)
+      return false;
+    if (vgetq_lane_u32(cmp_result, 2) == 0)
+      return false;
+    if (vgetq_lane_u32(cmp_result, 3) == 0)
+      return false;
+  }
+
+  // Postamble loop
+  for (int v = postamble_start; v < v_size; ++v)
+  {
+    if (vector[v] != 0.0)
+      return false;
+  }
+  return true;
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // USE_NEON
+
+#endif // __NNFW_RUY_NEON_TENSOR_UTILS_H__
diff --git a/compute/ruy/include/ruy/PortableTensorUtils.h b/compute/ruy/include/ruy/PortableTensorUtils.h
new file mode 100644
index 000000000..2d2c36cb2
--- /dev/null
+++ b/compute/ruy/include/ruy/PortableTensorUtils.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__
+#define __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__
+
+namespace nnfw
+{
+namespace ruy
+{
+
+inline bool PortableIsZeroVector(const float *vector, int v_size)
+{
+  for (int i = 0; i < v_size; ++i)
+  {
+    if (*vector++ != 0.0f)
+      return false;
+  }
+  return true;
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__
diff --git a/compute/ruy/include/ruy/RuySupport.h b/compute/ruy/include/ruy/RuySupport.h
new file mode 100644
index 000000000..2f9ed7457
--- /dev/null
+++ b/compute/ruy/include/ruy/RuySupport.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_RUY_SUPPORT_H__
+#define __NNFW_RUY_RUY_SUPPORT_H__
+
+#include <util/ConfigSource.h>
+#include <ruy/matrix.h>
+#include <ruy/ruy.h>
+#include <cassert>
+#include "Types.h"
+
+namespace nnfw
+{
+namespace ruy
+{
+namespace ruy_support
+{
+
+inline ::ruy::CachePolicy ToRuyCachePolicy(CachePolicy cache_policy)
+{
+  switch (cache_policy)
+  {
+    case CachePolicy::kNeverCache:
+      return ::ruy::CachePolicy::kNeverCache;
+    case CachePolicy::kCacheIfLargeSpeedup:
+      return ::ruy::CachePolicy::kCacheIfLargeSpeedup;
+    case CachePolicy::kAlwaysCache:
+      return ::ruy::CachePolicy::kAlwaysCache;
+    default:
+      assert(false);
+      return ::ruy::CachePolicy::kNeverCache;
+  }
+}
+
+template <typename Scalar, typename DataPointer>
+void MakeRuyMatrix(const MatrixParams<Scalar> &params, DataPointer data_ptr,
+                   ::ruy::Matrix<Scalar> *dst, bool use_caching = false)
+{
+  ::ruy::Order ruy_order =
+    params.order == Order::kColMajor ? ::ruy::Order::kColMajor : ::ruy::Order::kRowMajor;
+  ::ruy::MakeSimpleLayout(params.rows, params.cols, ruy_order, dst->mutable_layout());
+  // Note that ruy::Matrix::data is a ConstCheckingPtr, not a plain pointer.
+  // It does care whether we assign to it a Scalar* or a const Scalar*.
+  dst->set_data(data_ptr);
+  dst->set_zero_point(params.zero_point);
+  if (use_caching)
+  {
+    dst->set_cache_policy(ToRuyCachePolicy(params.cache_policy));
+  }
+}
+
+// Floating-point case.
+template <typename AccumScalar, typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<AccumScalar, DstScalar, quantization_flavor> &params,
+                      ::ruy::MulParams<AccumScalar, DstScalar> *ruy_mul_params)
+{
+  static_assert(quantization_flavor == QuantizationFlavor::kFloatingPoint, "");
+  ruy_mul_params->set_bias(params.bias);
+  ruy_mul_params->set_clamp_min(params.clamp_min);
+  ruy_mul_params->set_clamp_max(params.clamp_max);
+}
+
+// Integer-quantized case with destination type narrower than int32
+template <typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, DstScalar, quantization_flavor> &params,
+                      ::ruy::MulParams<std::int32_t, DstScalar> *ruy_mul_params)
+{
+  static_assert(sizeof(DstScalar) < sizeof(std::int32_t), "");
+  if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier)
+  {
+    ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
+    ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
+  }
+  if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier)
+  {
+    ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
+    ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+  }
+  ruy_mul_params->set_bias(params.bias);
+  ruy_mul_params->set_clamp_min(params.clamp_min);
+  ruy_mul_params->set_clamp_max(params.clamp_max);
+}
+
+// Raw-integer case with destination type int32.
+template <QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, std::int32_t, quantization_flavor> &params,
+                      ::ruy::MulParams<std::int32_t, std::int32_t> *ruy_mul_params)
+{
+  ruy_mul_params->set_bias(params.bias);
+}
+
+} // namespace ruy_support
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_RUY_SUPPORT_H__
diff --git a/compute/ruy/include/ruy/Shape.h b/compute/ruy/include/ruy/Shape.h
new file mode 100644
index 000000000..151a67377
--- /dev/null
+++ b/compute/ruy/include/ruy/Shape.h
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_SHAPE_H__
+#define __NNFW_RUY_SHAPE_H__
+
+#include <algorithm>
+#include <cstring>
+#include <cassert>
+#include <vector>
+
+#define UNUSED_RELEASE(a) (void)(a)
+
+namespace nnfw
+{
+namespace ruy
+{
+
+class Shape
+{
+public:
+  // Shapes with dimensions up to 5 are stored directly in the structure, while
+  // larger shapes are separately allocated.
+  static constexpr int kMaxSmallSize = 5;
+
+  Shape &operator=(Shape const &) = delete;
+
+  Shape() : _size(0) {}
+
+  explicit Shape(int dimensions_count) : _size(dimensions_count)
+  {
+    if (dimensions_count > kMaxSmallSize)
+    {
+      _dims_pointer = new int32_t[dimensions_count];
+    }
+  }
+
+  Shape(int shape_size, int32_t value) : _size(0)
+  {
+    Resize(shape_size);
+    for (int i = 0; i < shape_size; ++i)
+    {
+      SetDim(i, value);
+    }
+  }
+
+  Shape(int dimensions_count, const int32_t *dims_data) : _size(0)
+  {
+    ReplaceWith(dimensions_count, dims_data);
+  }
+
+  Shape(const std::initializer_list<int> init_list) : _size(0) { BuildFrom(init_list); }
+
+  // Avoid using this constructor.  We should be able to delete it when C++17
+  // rolls out.
+  Shape(Shape const &other) : _size(other.DimensionsCount())
+  {
+    if (_size > kMaxSmallSize)
+    {
+      _dims_pointer = new int32_t[_size];
+    }
+    std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * _size);
+  }
+
+  bool operator==(const Shape &comp) const
+  {
+    return this->_size == comp._size &&
+           std::memcmp(DimsData(), comp.DimsData(), _size * sizeof(int32_t)) == 0;
+  }
+
+  ~Shape()
+  {
+    if (_size > kMaxSmallSize)
+    {
+      delete[] _dims_pointer;
+    }
+  }
+
+  inline int32_t DimensionsCount() const { return _size; }
+  inline int32_t Dims(int i) const
+  {
+    assert(i >= 0);
+    assert(i < _size);
+    return _size > kMaxSmallSize ? _dims_pointer[i] : _dims[i];
+  }
+  inline void SetDim(int i, int32_t val)
+  {
+    assert(i >= 0);
+    assert(i < _size);
+    if (_size > kMaxSmallSize)
+    {
+      _dims_pointer[i] = val;
+    }
+    else
+    {
+      _dims[i] = val;
+    }
+  }
+
+  inline int32_t *DimsData() { return _size > kMaxSmallSize ? _dims_pointer : _dims; }
+  inline const int32_t *DimsData() const { return _size > kMaxSmallSize ? _dims_pointer : _dims; }
+  // The caller must ensure that the shape is no bigger than 4-D.
+  inline const int32_t *DimsDataUpTo4D() const { return _dims; }
+
+  inline void Resize(int dimensions_count)
+  {
+    if (_size > kMaxSmallSize)
+    {
+      delete[] _dims_pointer;
+    }
+    _size = dimensions_count;
+    if (dimensions_count > kMaxSmallSize)
+    {
+      _dims_pointer = new int32_t[dimensions_count];
+    }
+  }
+
+  inline void ReplaceWith(int dimensions_count, const int32_t *dims_data)
+  {
+    Resize(dimensions_count);
+    int32_t *dst_dims = DimsData();
+    std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
+  }
+
+  inline void ReplaceWith(const Shape &other)
+  {
+    ReplaceWith(other.DimensionsCount(), other.DimsData());
+  }
+
+  inline void ReplaceWith(Shape &&other)
+  {
+    Resize(0);
+    std::swap(_size, other._size);
+    if (_size <= kMaxSmallSize)
+      std::copy(other._dims, other._dims + kMaxSmallSize, _dims);
+    else
+      _dims_pointer = other._dims_pointer;
+  }
+
+  template <typename T> inline void BuildFrom(const T &src_iterable)
+  {
+    const int dimensions_count = std::distance(src_iterable.begin(), src_iterable.end());
+    Resize(dimensions_count);
+    int32_t *data = DimsData();
+    for (auto &&it : src_iterable)
+    {
+      *data = it;
+      ++data;
+    }
+  }
+
+  // This will probably be factored out. Old code made substantial use of 4-D
+  // shapes, and so this function is used to extend smaller shapes. Note that
+  // (a) as Dims<4>-dependent code is eliminated, the reliance on this should be
+  // reduced, and (b) some kernels are stricly 4-D, but then the shapes of their
+  // inputs should already be 4-D, so this function should not be needed.
+  inline static Shape ExtendedShape(int new_shape_size, const Shape &shape)
+  {
+    return Shape(new_shape_size, shape, 1);
+  }
+
+  inline void BuildFrom(const std::initializer_list<int> init_list)
+  {
+    BuildFrom<const std::initializer_list<int>>(init_list);
+  }
+
+  // Returns the total count of elements, that is the size when flattened into a
+  // vector.
+  inline int FlatSize() const
+  {
+    int buffer_size = 1;
+    const int *dims_data = DimsData();
+    for (int i = 0; i < _size; i++)
+    {
+      const int dim = dims_data[i];
+      assert(dim >= 1);
+      buffer_size *= dim;
+    }
+    return buffer_size;
+  }
+
+  bool operator!=(const Shape &comp) const { return !((*this) == comp); }
+
+private:
+  // For use only by ExtendedShape(), written to guarantee (return-value) copy
+  // elision in C++17.
+  // This creates a shape padded to the desired size with the specified value.
+  Shape(int new_shape_size, const Shape &shape, int pad_value) : _size(0)
+  {
+    assert(new_shape_size >= shape.DimensionsCount());
+    assert(new_shape_size <= kMaxSmallSize);
+    Resize(new_shape_size);
+    const int size_increase = new_shape_size - shape.DimensionsCount();
+    for (int i = 0; i < size_increase; ++i)
+    {
+      SetDim(i, pad_value);
+    }
+    std::memcpy(DimsData() + size_increase, shape.DimsData(),
+                sizeof(int32_t) * shape.DimensionsCount());
+  }
+
+  int32_t _size;
+  union {
+    int32_t _dims[kMaxSmallSize];
+    int32_t *_dims_pointer{nullptr};
+  };
+};
+
+inline int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2)
+{
+  UNUSED_RELEASE(shape2);
+  UNUSED_RELEASE(index2);
+  assert(shape1.Dims(index1) == shape2.Dims(index2));
+  return shape1.Dims(index1);
+}
+
+template <typename... Args>
+int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2, Args... args)
+{
+  assert(shape1.Dims(index1) == shape2.Dims(index2));
+  UNUSED_RELEASE(shape2);
+  UNUSED_RELEASE(index2);
+  return MatchingDim(shape1, index1, args...);
+}
+
+inline Shape GetShape(const std::vector<int32_t> &data) { return Shape(data.size(), data.data()); }
+
+inline int Offset(const Shape &shape, int i0, int i1, int i2, int i3)
+{
+  assert(shape.DimensionsCount() == 4);
+  const int *dims_data = shape.DimsDataUpTo4D();
+  assert(i0 >= 0 && i0 < dims_data[0]);
+  assert(i1 >= 0 && i1 < dims_data[1]);
+  assert(i2 >= 0 && i2 < dims_data[2]);
+  assert(i3 >= 0 && i3 < dims_data[3]);
+  return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
+}
+
+inline int Offset(const Shape &shape, int *index)
+{
+  return Offset(shape, index[0], index[1], index[2], index[3]);
+}
+
+inline int FlatSizeSkipDim(const Shape &shape, int skip_dim)
+{
+  const int dims_count = shape.DimensionsCount();
+  assert(skip_dim >= 0 && skip_dim < dims_count);
+  const auto *dims_data = shape.DimsData();
+  int flat_size = 1;
+  for (int i = 0; i < dims_count; ++i)
+  {
+    flat_size *= (i == skip_dim) ? 1 : dims_data[i];
+  }
+  return flat_size;
+}
+
+// Flat size calculation, checking that dimensions match with one or more other
+// arrays.
+template <typename... Ts> inline bool checkMatching(const Shape &shape, Ts... check_shapes)
+{
+  const Shape check_shapes_array[sizeof...(Ts)] = {std::forward<Ts>(check_shapes)...};
+  for (const auto &check_shape : check_shapes_array)
+  {
+    // Check matching of shapes except the case of that two shapes can be scalar
+    if (shape.DimensionsCount() > 1 || check_shape.DimensionsCount() > 1 || shape.FlatSize() != 1 ||
+        check_shape.FlatSize() != 1)
+    {
+      if (shape.DimensionsCount() != check_shape.DimensionsCount())
+      {
+        return false;
+      }
+      for (int i = 0; i < shape.DimensionsCount(); ++i)
+      {
+        if (shape.Dims(i) != check_shape.Dims(i))
+        {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+struct UNUSED_ALL
+{
+  template <typename... Args> UNUSED_ALL(Args const &...) {}
+};
+template <typename... Ts> inline int MatchingFlatSize(const Shape &shape, Ts... check_shapes)
+{
+  UNUSED_ALL{check_shapes...};
+  assert(checkMatching(shape, std::forward<Ts>(check_shapes)...));
+  return shape.FlatSize();
+}
+
+inline int MatchingFlatSizeSkipDim(const Shape &shape, int skip_dim, const Shape &check_shape_0)
+{
+  UNUSED_RELEASE(check_shape_0);
+  const int dims_count = shape.DimensionsCount();
+  for (int i = 0; i < dims_count; ++i)
+  {
+    if (i != skip_dim)
+    {
+      assert(shape.Dims(i) == check_shape_0.Dims(i));
+    }
+  }
+  return FlatSizeSkipDim(shape, skip_dim);
+}
+
+inline int MatchingFlatSizeSkipDim(const Shape &shape, int skip_dim, const Shape &check_shape_0,
+                                   const Shape &check_shape_1)
+{
+  UNUSED_RELEASE(check_shape_0);
+  const int dims_count = shape.DimensionsCount();
+  for (int i = 0; i < dims_count; ++i)
+  {
+    if (i != skip_dim)
+    {
+      assert(shape.Dims(i) == check_shape_0.Dims(i));
+    }
+  }
+  return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1);
+}
+
+inline int MatchingElementsSize(const Shape &shape, const Shape &check_shape_0,
+                                const Shape &check_shape_1)
+{
+  const int size_1 = shape.FlatSize();
+  const int size_2 = check_shape_0.FlatSize();
+  const int size_3 = check_shape_1.FlatSize();
+  assert(size_1 == size_2);
+  assert(size_2 == size_3);
+  UNUSED_RELEASE(size_2);
+  UNUSED_RELEASE(size_3);
+  return size_1;
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_SHAPE_H__
diff --git a/compute/ruy/include/ruy/TensorUtils.h b/compute/ruy/include/ruy/TensorUtils.h
new file mode 100644
index 000000000..149037cc9
--- /dev/null
+++ b/compute/ruy/include/ruy/TensorUtils.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_TENSOR_UTILS_H__
+#define __NNFW_RUY_TENSOR_UTILS_H__
+
+#include "ruy/PortableTensorUtils.h"
+#include "ruy/NeonTensorUtils.h"
+
+namespace nnfw
+{
+namespace ruy
+{
+
+inline bool IsZeroVector(const float *vector, int v_size)
+{
+  return NEON_OR_PORTABLE(IsZeroVector, vector, v_size);
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_TENSOR_UTILS_H__
diff --git a/compute/ruy/include/ruy/Types.h b/compute/ruy/include/ruy/Types.h
new file mode 100644
index 000000000..b19b59735
--- /dev/null
+++ b/compute/ruy/include/ruy/Types.h
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_TYPES_H__
+#define __NNFW_RUY_TYPES_H__
+
+#include <cassert>
+#include <cstdint>
+#include <type_traits>
+#include <limits>
+#include <string>
+#include "Shape.h"
+
+namespace nnfw
+{
+namespace ruy
+{
+
+enum class FusedActivationFunctionType
+{
+  kNone = 0,
+  kRelu6 = 1,
+  kRelu1 = 2,
+  kRelu = 3,
+  kTanh = 4,
+  kSigmoid = 6,
+};
+
+enum class PaddingType
+{
+  kNone = 0,
+  kSame = 1,
+  kValid = 2,
+};
+
+struct PaddingValues
+{
+  int16_t width;
+  int16_t height;
+};
+
+struct ConvParams
+{
+  PaddingType padding_type;
+  PaddingValues padding_values;
+  // TODO(starka): This was just "stride", so check that width+height is OK.
+  int16_t stride_width;
+  int16_t stride_height;
+  int16_t dilation_width_factor;
+  int16_t dilation_height_factor;
+  // uint8_t inference params.
+  // TODO(b/65838351): Use smaller types if appropriate.
+  int32_t input_offset;
+  int32_t weights_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // uint8_t, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+  bool is_replaced_weights{false};
+};
+
+struct FullyConnectedParams
+{
+  FusedActivationFunctionType activation{FusedActivationFunctionType::kNone};
+  // uint8 inference params.
+  // TODO(b/65838351): Use smaller types if appropriate.
+  int32_t input_offset;
+  int32_t weights_offset;
+  float weights_scale;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // uint8, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params - no one use this params, but ruy might use them later.
+  float float_activation_min;
+  float float_activation_max;
+  // Mark the operands as cacheable if they are unchanging, e.g. weights.
+  bool lhs_cacheable;
+  bool rhs_cacheable;
+  // FullyConnectedWeightsFormat weights_format;
+};
+
+enum class Order
+{
+  kColMajor,
+  kRowMajor
+};
+
+enum class CachePolicy : std::uint8_t
+{
+  kNeverCache,
+  kCacheIfLargeSpeedup,
+  kAlwaysCache,
+};
+
+// MatrixParams encapsulates the parameters that Gemm needs about each
+// matrix, besides the buffer data pointer.
+// Compare to ruy::Matrix, which also encapsulates the data pointer.
+// Rationale for leaving the data pointer out of here: doing so
+// requires complicated const-correctness mechanics. See
+// ruy::ConstCheckingPtr.
+template <typename Scalar> struct MatrixParams
+{
+  // Storage layout order. For now we only do plain linear non-strided
+  // layout. It would be easy to support a stride if needed.
+  Order order = Order::kColMajor;
+  // Number of rows of the matrix.
+  int rows = 0;
+  // Number of columns of the matrix.
+  int cols = 0;
+  // The zero_point, i.e. which Scalar value is to be interpreted as zero.
+  // When Scalar is floating-point, this must be 0.
+  Scalar zero_point = 0;
+  // When the data pointed to by this matrix is constant data, so that it is
+  // valid to assume that equality of pointers implies equality of data,
+  // a CachePolicy may be used instead of the default kNeverCache,
+  // which will enable ruy to take advantage of this constancy of the data to
+  // cache the packing work, which can be a large speedup in matrix*vector
+  // and other narrow shapes.
+  CachePolicy cache_policy = CachePolicy::kNeverCache;
+};
+
+// Enumeration of broad categories of Gemm.
+//
+// The primary reason for this to exist is to allow Gemm to compile
+// only uniform-quantized or only per-channel-quantized code paths.
+// This is unneeded with ruy as the back-end, as this is only a runtime
+// difference in ruy, but with gemmlowp these really are separate code
+// paths and templatizing in a QuantizationFlavor is necessary to avoid
+// compiling unused gemmlowp code. Indeed, TFLite currently uses
+// uint8 with uniform quantization and int8 with per-channel quantization,
+// and does not use uint8 with per-channel. We want to avoid compiling
+// the gemmlowp uint8 per-channel path when gemmlowp is the back-end.
+//
+// It's possible to drop this in the future if gemmlowp goes away and no
+// other then-relevant backend library handles quantized paths in a way that
+// requires knowing this at compile-time.
+enum class QuantizationFlavor
+{
+  // Floating-point Gemm: the accumulators are not multiplied by any
+  // 'multiplier'.
+  kFloatingPoint,
+  // Quantized Gemm using a single multiplier for all accumulators.
+  kIntegerWithUniformMultiplier,
+  // Quantized Gemm using a separate multipliers for accumulators of each
+  // row of the destination matrix. This is what is called 'per-channel'
+  // in GemmParams. Here we use the more specific 'per-row' terminology
+  // to allow for the possibility of 'per-column' in the future, and to
+  // allow for that to be a separate code path in some back-end such as
+  // gemmlowp.
+  kIntegerWithPerRowMultiplier
+};
+
+// Additional parameters that Gemm needs, beyond what falls into
+// the MatrixParams that it takes. Compare to ruy::Spec.
+//
+// Decoupling AccumScalar from DstScalar (rather than deducing it from that)
+// is useful future-proofing. Think of a float16 path using float32 accum.
+//
+// QuantizationFlavor is passed here even though it's technically not used
+// in this class. This is so that we retain the ability in the future to
+// specialize this class for quantization flavor, and this allows for
+// Gemm to be templatized in quantization_flavor via the GemmParams that it
+// takes, allowing for automatic template parameter deduction to take place,
+// so that most call sites don't need to specify a QuantizationFlavor
+// (only those that need perchannel quantization do).
+template <typename AccumScalar, typename DstScalar,
+          QuantizationFlavor quantization_flavor =
+            std::is_floating_point<AccumScalar>::value
+              ? QuantizationFlavor::kFloatingPoint
+              : QuantizationFlavor::kIntegerWithUniformMultiplier>
+struct GemmParams
+{
+  // Only for non-floating-point cases. The fixed-point part (i.e. the mantissa)
+  // of the multiplier by which accumulators are multiplied before being casted
+  // to the destination type.
+  AccumScalar multiplier_fixedpoint = 0;
+  // Only for non-floating-point cases. The exponent part of the aforementioned
+  // multiplier.
+  int multiplier_exponent = 0;
+  // Per-channel variant of multiplier_fixedpoint. If not nullptr, this must
+  // point to a buffer of as many values as there are rows in the destination
+  // matrix. Each row of the destination matrix will use the corresponding
+  // buffer element instead of multiplier_fixedpoint.
+  const AccumScalar *multiplier_fixedpoint_perchannel = nullptr;
+  // Per-channel variant of multiplier_exponent. If not nullptr, this must
+  // point to a buffer of as many values as there are rows in the destination
+  // matrix. Each row of the destination matrix will use the corresponding
+  // buffer element instead of multiplier_exponent.
+  //
+  // Either none or both of multiplier_exponent_perchannel and
+  // multiplier_fixedpoint_perchannel must be nullptr.
+  const int *multiplier_exponent_perchannel = nullptr;
+  // The bias vector data, if not null.
+  const AccumScalar *bias = nullptr;
+  // min clamp bound of destination values.
+  DstScalar clamp_min = std::is_floating_point<DstScalar>::value
+                          ? -std::numeric_limits<DstScalar>::infinity()
+                          : std::numeric_limits<DstScalar>::lowest();
+  // max clamp bound of destination values.
+  DstScalar clamp_max = std::is_floating_point<DstScalar>::value
+                          ? std::numeric_limits<DstScalar>::infinity()
+                          : std::numeric_limits<DstScalar>::max();
+};
+
+// Validates self-consistency of GemmParams.
+template <typename AccumScalar, typename DstScalar, QuantizationFlavor quantization_flavor>
+void ValidateGemmParams(const GemmParams<AccumScalar, DstScalar, quantization_flavor> &params)
+{
+  // Guard consistency of the quantized multiplier fields.
+  if (quantization_flavor == QuantizationFlavor::kFloatingPoint)
+  {
+    assert(!params.multiplier_fixedpoint);
+    assert(!params.multiplier_exponent);
+    assert(!params.multiplier_fixedpoint_perchannel);
+    assert(!params.multiplier_exponent_perchannel);
+  }
+  else if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier &&
+           !std::is_same<DstScalar, int32_t>::value)
+  {
+    assert(params.multiplier_fixedpoint);
+    // Nothing to check about multiplier_exponent
+    assert(!params.multiplier_fixedpoint_perchannel);
+    assert(!params.multiplier_exponent_perchannel);
+  }
+  else if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier &&
+           !std::is_same<DstScalar, int32_t>::value)
+  {
+    assert(!params.multiplier_fixedpoint);
+    assert(!params.multiplier_exponent);
+    assert(params.multiplier_fixedpoint_perchannel);
+    assert(params.multiplier_exponent_perchannel);
+  }
+  else
+  {
+    // For the get raw accumulator case, we should make sure none of the
+    // quantization params are set.
+    assert(!params.multiplier_fixedpoint);
+    assert(!params.multiplier_exponent);
+    assert(!params.multiplier_fixedpoint_perchannel);
+    assert(!params.multiplier_exponent_perchannel);
+  }
+  UNUSED_RELEASE(params);
+}
+
+inline CachePolicy DefaultCachePolicy(bool is_constant_data)
+{
+  return is_constant_data ? CachePolicy::kCacheIfLargeSpeedup : CachePolicy::kNeverCache;
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_TYPES_H__
diff --git a/compute/ruy/include/ruy/Utils.h b/compute/ruy/include/ruy/Utils.h
new file mode 100644
index 000000000..50205abe5
--- /dev/null
+++ b/compute/ruy/include/ruy/Utils.h
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_UTILS_H__
+#define __NNFW_RUY_UTILS_H__
+
+#include "Types.h"
+#include "Shape.h"
+
+#include <stdexcept>
+
+namespace nnfw
+{
+namespace ruy
+{
+template <typename T>
+inline void ExtractPatchIntoBufferColumn(const Shape &input_shape, int w, int h, int b, int kheight,
+                                         int kwidth, int stride_width, int stride_height,
+                                         int pad_width, int pad_height, int in_width, int in_height,
+                                         int in_depth, int single_buffer_length, int buffer_id,
+                                         const T *in_data, T *conv_buffer_data, uint8_t zero_byte)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  // This chunk of code reshapes all the inputs corresponding to
+  // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
+  const int kwidth_times_indepth = kwidth * in_depth;
+  const int inwidth_times_indepth = in_width * in_depth;
+  const int ih_ungated_start = h * stride_height - pad_height;
+  const int ih_ungated_end = (ih_ungated_start + kheight);
+  const int ih_end = std::min(ih_ungated_end, in_height);
+  const int iw_ungated_start = w * stride_width - pad_width;
+  const int iw_ungated_end = (iw_ungated_start + kwidth);
+  const int iw_end = std::min(iw_ungated_end, in_width);
+  // If the patch is off the edge of the input image, skip writing those rows
+  // and columns from the patch into the output array.
+  const int h_offset = std::max(0, -ih_ungated_start);
+  const int w_offset = std::max(0, -iw_ungated_start);
+  const int ih_start = std::max(0, ih_ungated_start);
+  const int iw_start = std::max(0, iw_ungated_start);
+  const int single_row_num = std::min(kwidth - w_offset, in_width - iw_start) * in_depth;
+  const int output_row_offset = (buffer_id * single_buffer_length);
+  int out_offset = output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
+  int in_offset = Offset(input_shape, b, ih_start, iw_start, 0);
+
+  // Express all of the calculations as padding around the input patch.
+  const int top_padding = h_offset;
+  const int bottom_padding = (ih_ungated_end - ih_end);
+  const int left_padding = w_offset;
+  const int right_padding = (iw_ungated_end - iw_end);
+  assert(single_row_num == ((kwidth - (left_padding + right_padding)) * in_depth));
+
+  // Write out zeroes to the elements representing the top rows of the input
+  // patch that are off the edge of the input image.
+  if (top_padding > 0)
+  {
+    const int top_row_elements = (top_padding * kwidth * in_depth);
+    memset(conv_buffer_data + output_row_offset, zero_byte, (top_row_elements * sizeof(T)));
+  }
+
+  // If the patch is on the interior of the input image horizontally, just copy
+  // over the rows sequentially, otherwise add zero padding at the start or end.
+  if ((left_padding == 0) && (right_padding == 0))
+  {
+    for (int ih = ih_start; ih < ih_end; ++ih)
+    {
+      memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T));
+      out_offset += kwidth_times_indepth;
+      in_offset += inwidth_times_indepth;
+    }
+  }
+  else
+  {
+    for (int ih = ih_start; ih < ih_end; ++ih)
+    {
+      if (left_padding > 0)
+      {
+        const int left_start = (out_offset - (left_padding * in_depth));
+        memset(conv_buffer_data + left_start, zero_byte, (left_padding * in_depth * sizeof(T)));
+      }
+      memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T));
+      if (right_padding > 0)
+      {
+        const int right_start = (out_offset + single_row_num);
+        memset(conv_buffer_data + right_start, zero_byte, (right_padding * in_depth * sizeof(T)));
+      }
+      out_offset += kwidth_times_indepth;
+      in_offset += inwidth_times_indepth;
+    }
+  }
+
+  // If the bottom of the patch falls off the input image, pad the values
+  // representing those input rows with zeroes.
+  if (bottom_padding > 0)
+  {
+    const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
+    const int bottom_start =
+      output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
+    memset(conv_buffer_data + bottom_start, zero_byte, (bottom_row_elements * sizeof(T)));
+  }
+}
+
+// Supports per-batch zero_byte for per-batch asymmetric quantized inputs.
+template <typename T>
+void DilatedIm2col(const ConvParams &params, const Shape &input_shape, const T *input_data,
+                   const Shape &filter_shape, const Shape &output_shape, T *im2col_data,
+                   const int32_t *zero_bytes, const int zero_bytes_len)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  // For dilated convolution, the input pixels are not contiguous therefore we
+  // can't use the same optimizations as Im2Col(). Though note this code would
+  // work fine for the non-dilated case too (though likely a bit slower).
+  assert(dilation_width_factor != 1 || dilation_height_factor != 1);
+  assert(im2col_data);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  MatchingDim(output_shape, 3, filter_shape, 0);
+
+  // Construct the MxN sized im2col matrix.
+  // The rows M, are sub-ordered B x H x W
+  const Shape row_shape({1, batches, output_height, output_width});
+  // The columns, N, are sub-ordered Kh x Kw x Din
+  const Shape col_shape({1, filter_height, filter_width, input_depth});
+  // Use dimensions M and N to construct dims for indexing directly into im2col
+  const Shape im2col_shape({1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
+
+  // Loop through the output rows (B x H x W)
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    const T zero_byte =
+      zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        // Each im2col row is an output pixel. Arrange the input data in this
+        // row in an order we can conveniently multiply with the filter data.
+        int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
+        const int in_x_origin = (out_x * stride_width) - pad_width;
+        const int in_y_origin = (out_y * stride_height) - pad_height;
+        // Loop through all the pixels of the filter (Kh x Kw)
+        for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+        {
+          const int in_y = in_y_origin + dilation_height_factor * filter_y;
+          if ((in_y >= 0) && (in_y < input_height))
+          {
+            // Filter row is within the input data.
+            // Loop through all the filter pixels in this row.
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+              int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
+              T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+              if ((in_x >= 0) && (in_x < input_width))
+              {
+                // Filter pixel is within the input, copy the input data.
+                T const *src = input_data + Offset(input_shape, batch, in_y, in_x, 0);
+                memcpy(dst, src, input_depth * sizeof(T));
+              }
+              else
+              {
+                // Filter pixel is outside the input, zero it out.
+                memset(dst, zero_byte, input_depth * sizeof(T));
+              }
+            }
+          }
+          else
+          {
+            // Filter row is outside the input, zero out the entire filter row.
+            int col_offset = Offset(col_shape, 0, filter_y, 0, 0);
+            T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+            memset(dst, zero_byte, filter_width * input_depth * sizeof(T));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+void DilatedIm2col(const ConvParams &params, uint8_t zero_byte, const Shape &input_shape,
+                   const T *input_data, const Shape &filter_shape, const Shape &output_shape,
+                   T *im2col_data)
+{
+  const int32_t zero_point = static_cast<int32_t>(zero_byte);
+  DilatedIm2col<T>(params, input_shape, input_data, filter_shape, output_shape, im2col_data,
+                   &zero_point, 1);
+}
+
+template <typename T>
+void Im2col(const ConvParams &params, int kheight, int kwidth, uint8_t zero_byte,
+            const Shape &input_shape, const T *input_data, const Shape &output_shape,
+            T *output_data)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = input_shape.Dims(3);
+  const int input_width = input_shape.Dims(2);
+  const int input_height = input_shape.Dims(1);
+  const int output_depth = output_shape.Dims(3);
+  const int output_width = output_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+
+  int buffer_id = 0;
+  // Loop over the output nodes.
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int h = 0; h < output_height; ++h)
+    {
+      for (int w = 0; w < output_width; ++w)
+      {
+        ExtractPatchIntoBufferColumn(input_shape, w, h, b, kheight, kwidth, stride_width,
+                                     stride_height, pad_width, pad_height, input_width,
+                                     input_height, input_depth, output_depth, buffer_id, input_data,
+                                     output_data, zero_byte);
+        ++buffer_id;
+      }
+    }
+  }
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_UTILS_H__
diff --git a/compute/ruy/include/ruy/neon/neon_check.h b/compute/ruy/include/ruy/neon/neon_check.h
new file mode 100644
index 000000000..08394f26f
--- /dev/null
+++ b/compute/ruy/include/ruy/neon/neon_check.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_NEON_CHECK_H__
+#define __NNFW_RUY_NEON_CHECK_H__
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define USE_NEON
+#include <arm_neon.h>
+#endif
+
+// Disable X86_NEON
+// #if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
+#if 0
+#define USE_NEON
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#pragma GCC diagnostic ignored "-Wattributes"
+#pragma GCC diagnostic ignored "-Wnarrowing"
+#pragma GCC diagnostic ignored "-Wsequence-point"
+#include "NEON_2_SSE.h"
+#pragma GCC diagnostic pop
+#endif
+
+// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
+// defined, PortableSomeFunc(args) otherwise.
+#ifdef USE_NEON
+// Always use Neon code
+#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
+
+#else
+// No NEON available: Use Portable code
+#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
+
+#endif // defined(USE_NEON)
+
+#endif // __NNFW_RUY_NEON_CHECK_H__
diff --git a/compute/ruy/include/ruy/operation/Conv.h b/compute/ruy/include/ruy/operation/Conv.h
new file mode 100644
index 000000000..3f03694bd
--- /dev/null
+++ b/compute/ruy/include/ruy/operation/Conv.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_CONV_H__
+#define __NNFW_RUY_CONV_H__
+
+#include "ruy/Types.h"
+#include "ruy/Shape.h"
+#include "ruy/Utils.h"
+#include "ruy/RuySupport.h"
+
+#include <ruy/ruy.h>
+#include <ruy/context.h>
+#include <iostream>
+#include <vector>
+
+namespace nnfw
+{
+namespace ruy
+{
+
+class Conv
+{
+public:
+  Conv() : _im2col_shape(4), _need_im2col(false), _prepared(false) {}
+
+  void prepare(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape,
+               uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor,
+               uint32_t dilation_height_factor)
+  {
+    if (!_prepared)
+    {
+      IsRequiredIm2col(input_shape, kernel_shape, output_shape, stride_width, stride_height,
+                       dilation_width_factor, dilation_height_factor);
+      _prepared = true;
+    }
+  }
+
+  void operator()(const ConvParams &params, const Shape &input_shape, const float *input_data,
+                  const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
+                  const float *bias_data, const Shape &output_shape, float *output_data,
+                  ::ruy::Context *ruy_context)
+  {
+    if (!_prepared)
+    {
+      // This means that input or output are dynamic or filter is not constant
+      IsRequiredIm2col(input_shape, filter_shape, output_shape, params.stride_width,
+                       params.stride_height, params.dilation_width_factor,
+                       params.dilation_height_factor);
+      _prepared = true;
+    }
+
+    int im2col_size = _need_im2col ? _im2col_shape.FlatSize() : 0;
+
+    // Use heap if size is larger than 8MB
+    if (im2col_size > 2 * 1024 * 1024)
+    {
+      std::unique_ptr<float[]> im2col_data = std::make_unique<float[]>(im2col_size);
+      ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+                output_shape, output_data, _im2col_shape, im2col_data.get(), ruy_context);
+    }
+    else if (im2col_size > 0)
+    {
+      float im2col_data[im2col_size];
+      ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+                output_shape, output_data, _im2col_shape, im2col_data, ruy_context);
+    }
+    else
+    {
+      ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+                output_shape, output_data, _im2col_shape, nullptr, ruy_context);
+    }
+  }
+
+private:
+  void ConvFloat(const ConvParams &params, const Shape &input_shape, const float *input_data,
+                 const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
+                 const float *bias_data, const Shape &output_shape, float *output_data,
+                 const Shape &im2col_shape, float *im2col_data, ::ruy::Context *ruy_context)
+  {
+    UNUSED_RELEASE(bias_shape);
+    const int stride_width = params.stride_width;
+    const int stride_height = params.stride_height;
+    const int dilation_width_factor = params.dilation_width_factor;
+    const int dilation_height_factor = params.dilation_height_factor;
+    const float output_activation_min = params.float_activation_min;
+    const float output_activation_max = params.float_activation_max;
+    assert(input_shape.DimensionsCount() == 4);
+    assert(filter_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+
+    // NB: the float 0.0f value is represented by all zero bytes.
+    const uint8_t float_zero_byte = 0x00;
+    const float *gemm_input_data = nullptr;
+    const Shape *gemm_input_shape = nullptr;
+    const int filter_width = filter_shape.Dims(2);
+    const int filter_height = filter_shape.Dims(1);
+    const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
+    const bool need_im2col =
+      stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1;
+    if (need_dilated_im2col)
+    {
+      DilatedIm2col(params, float_zero_byte, input_shape, input_data, filter_shape, output_shape,
+                    im2col_data);
+      gemm_input_data = im2col_data;
+      gemm_input_shape = &im2col_shape;
+    }
+    else if (need_im2col)
+    {
+      assert(im2col_data);
+      Im2col(params, filter_height, filter_width, float_zero_byte, input_shape, input_data,
+             im2col_shape, im2col_data);
+      gemm_input_data = im2col_data;
+      gemm_input_shape = &im2col_shape;
+    }
+    else
+    {
+      // TODO(aselle): We need to make sure to not send im2col if it is not
+      // needed.
+      assert(!im2col_data);
+      gemm_input_data = input_data;
+      gemm_input_shape = &input_shape;
+    }
+
+    const int gemm_input_dims = gemm_input_shape->DimensionsCount();
+    int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1);
+    int n = output_shape.Dims(3);
+    int k = gemm_input_shape->Dims(gemm_input_dims - 1);
+
+    // When an optimized CBLAS implementation is not available, fall back
+    // to using cpu_backend_gemm.
+    MatrixParams<float> lhs_params;
+    lhs_params.order = Order::kRowMajor;
+    lhs_params.rows = n;
+    lhs_params.cols = k;
+    MatrixParams<float> rhs_params;
+    rhs_params.order = Order::kColMajor;
+    rhs_params.rows = k;
+    rhs_params.cols = m;
+    MatrixParams<float> dst_params;
+    dst_params.order = Order::kColMajor;
+    dst_params.rows = n;
+    dst_params.cols = m;
+    GemmParams<float, float> gemm_params;
+    gemm_params.bias = bias_data;
+    gemm_params.clamp_min = output_activation_min;
+    gemm_params.clamp_max = output_activation_max;
+
+    // Below code is from tflite::cpu_backend_gemm::detail::GemmImplUsingRuy
+    ::ruy::Matrix<float> ruy_lhs;
+    ::ruy::Matrix<float> ruy_rhs;
+    ::ruy::Matrix<float> ruy_dst;
+    // Note that cache is always enabled for input and weight tensors
+    ruy_support::MakeRuyMatrix(lhs_params, filter_data, &ruy_lhs, true);
+    ruy_support::MakeRuyMatrix(rhs_params, gemm_input_data, &ruy_rhs, true);
+    ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst);
+
+    ::ruy::MulParams<float, float> ruy_mul_params;
+    ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
+
+    ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
+  }
+
+  void IsRequiredIm2col(const Shape &input_shape, const Shape &kernel_shape,
+                        const Shape &output_shape, uint32_t stride_width, uint32_t stride_height,
+                        uint32_t dilation_width_factor, uint32_t dilation_height_factor)
+  {
+    const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
+    const bool need_non_dilated_im2col = stride_width != 1 || stride_height != 1 ||
+                                         kernel_shape.Dims(1) != 1 || kernel_shape.Dims(2) != 1;
+
+    _need_im2col = need_dilated_im2col || need_non_dilated_im2col;
+
+    if (_need_im2col)
+    {
+      _im2col_shape.SetDim(0, output_shape.Dims(0));
+      _im2col_shape.SetDim(1, output_shape.Dims(1));
+      _im2col_shape.SetDim(2, output_shape.Dims(2));
+      _im2col_shape.SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2));
+    }
+  }
+
+private:
+  Shape _im2col_shape;
+  bool _need_im2col;
+  bool _prepared;
+};
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_CONV_H_
diff --git a/compute/ruy/include/ruy/operation/FullyConnected.h b/compute/ruy/include/ruy/operation/FullyConnected.h
new file mode 100644
index 000000000..1d686b64b
--- /dev/null
+++ b/compute/ruy/include/ruy/operation/FullyConnected.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_FULLY_CONNECTED_H__
+#define __NNFW_RUY_FULLY_CONNECTED_H__
+
+#include "ruy/Shape.h"
+#include "ruy/Types.h"
+#include "ruy/Utils.h"
+#include "ruy/RuySupport.h"
+
+#include <ruy/ruy.h>
+#include <ruy/context.h>
+
+namespace nnfw
+{
+namespace ruy
+{
+
+inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
+                           const float *input_data, const Shape &weights_shape,
+                           const float *weights_data, const Shape &,
+                           const float *optional_bias_data, const Shape &output_shape,
+                           float *output_data, ::ruy::Context *ruy_context)
+{
+  const int dims_count = weights_shape.DimensionsCount();
+  const int input_rows = weights_shape.Dims(dims_count - 1);
+  MatrixParams<float> rhs_params;
+  rhs_params.order = Order::kColMajor;
+  rhs_params.rows = input_rows;
+  rhs_params.cols = input_shape.FlatSize() / input_rows;
+  rhs_params.cache_policy = DefaultCachePolicy(params.rhs_cacheable);
+  assert(input_shape.FlatSize() == (rhs_params.rows * rhs_params.cols));
+  MatrixParams<float> lhs_params;
+  lhs_params.order = Order::kRowMajor;
+  lhs_params.cols = weights_shape.Dims(dims_count - 1);
+  lhs_params.rows = FlatSizeSkipDim(weights_shape, dims_count - 1);
+  lhs_params.cache_policy = DefaultCachePolicy(params.lhs_cacheable);
+  MatrixParams<float> dst_params;
+  dst_params.order = Order::kColMajor;
+  dst_params.rows = output_shape.Dims(output_shape.DimensionsCount() - 1);
+  dst_params.cols = FlatSizeSkipDim(output_shape, output_shape.DimensionsCount() - 1);
+  GemmParams<float, float> gemm_params;
+  gemm_params.bias = optional_bias_data;
+  gemm_params.clamp_min = params.float_activation_min;
+  gemm_params.clamp_max = params.float_activation_max;
+
+  // Below code was copied from tflite::cpu_backend_gemm::detail::GemmImplUsingRuy
+  ::ruy::Matrix<float> ruy_lhs;
+  ::ruy::Matrix<float> ruy_rhs;
+  ::ruy::Matrix<float> ruy_dst;
+  // Note that cache is always enabled for input and weight tensors
+  ruy_support::MakeRuyMatrix(lhs_params, weights_data, &ruy_lhs, true);
+  ruy_support::MakeRuyMatrix(rhs_params, input_data, &ruy_rhs, true);
+  ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst);
+
+  ::ruy::MulParams<float, float> ruy_mul_params;
+  ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
+
+  ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_FULLY_CONNECTED_H__
diff --git a/compute/test/CMakeLists.txt b/compute/test/CMakeLists.txt
deleted file mode 100644
index 92aac3e72..000000000
--- a/compute/test/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-set(TEST_COMPUTE test_compute)
-
-file(GLOB_RECURSE TESTS "*.cc")
-
-add_executable(${TEST_COMPUTE} ${TESTS})
-
-target_link_libraries(${TEST_COMPUTE} nnfw_lib_cker)
-target_link_libraries(${TEST_COMPUTE} gtest)
-target_link_libraries(${TEST_COMPUTE} gtest_main)
-target_link_libraries(${TEST_COMPUTE} ${LIB_PTHREAD} dl)
-add_test(${TEST_COMPUTE} ${TEST_COMPUTE})
-
-install(TARGETS ${TEST_COMPUTE} DESTINATION unittest_standalone)
diff --git a/compute/test/cker/Range.cc b/compute/test/cker/Range.cc
deleted file mode 100644
index 55f4fcf20..000000000
--- a/compute/test/cker/Range.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Range.h>
-
-#include <gtest/gtest.h>
-#include <vector>
-
-TEST(CKer_Operation, Range)
-{
-  {
-    const int start = 0;
-    const int limit = 10;
-    const int delta = 1;
-    std::vector<int> actual(10);
-    nnfw::cker::Range<int>(&start, &limit, &delta, actual.data());
-
-    for (int i = 0; i < actual.size(); i++)
-      ASSERT_EQ(actual[i], i);
-  }
-
-  {
-    const int start = 3;
-    const int limit = 18;
-    const int delta = 3;
-    std::vector<int> expected = {3, 6, 9, 12, 15};
-    std::vector<int> actual(expected.size());
-    nnfw::cker::Range<int>(&start, &limit, &delta, actual.data());
-
-    for (int i = 0; i < actual.size(); i++)
-      ASSERT_EQ(actual[i], expected[i]);
-  }
-
-  {
-    const float start = 3;
-    const float limit = 1;
-    const float delta = -0.5;
-    std::vector<float> expected = {
-        3, 2.5, 2, 1.5,
-    };
-    std::vector<float> actual(expected.size());
-    nnfw::cker::Range<float>(&start, &limit, &delta, actual.data());
-
-    for (int i = 0; i < actual.size(); i++)
-      ASSERT_FLOAT_EQ(actual[i], expected[i]);
-  }
-}
-
-TEST(CKer_Operation, neg_Range)
-{
-  {
-    const int start = 212;
-    const int limit = 10;
-    const int delta = 1;
-    std::vector<int> actual(10);
-
-    EXPECT_ANY_THROW(nnfw::cker::Range<int>(&start, &limit, &delta, actual.data()));
-  }
-}
diff --git a/docs/conf.py b/docs/conf.py
index 9b870097a..50256719a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
 author = 'Samsung Research & contributors'
 
 # The full version, including alpha/beta/rc tags
-release = '1.9.0'
+release = '1.25.0'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/docs/howto/how-to-add-a-new-operation.md b/docs/howto/how-to-add-a-new-operation.md
index 8ea7014a3..241ba6cb1 100644
--- a/docs/howto/how-to-add-a-new-operation.md
+++ b/docs/howto/how-to-add-a-new-operation.md
@@ -6,4 +6,4 @@
 
 ## Runtime
 
-- [How to introduce a new operatoin into runtime](how-to-introduce-a-new-operation-into-runtime.md)
+- [How to introduce a new operation into runtime](how-to-introduce-a-new-operation-into-runtime.md)
diff --git a/docs/howto/how-to-build-compiler.md b/docs/howto/how-to-build-compiler.md
index e9dcb263a..3a9638677 100644
--- a/docs/howto/how-to-build-compiler.md
+++ b/docs/howto/how-to-build-compiler.md
@@ -1,7 +1,211 @@
 # How to Build Compiler
 
+This document is based on the system where Ubuntu Desktop Linux 18.04 LTS is installed with default
+settings, and can be applied in other environments without much difference. For reference, the
+development of our project started in the Ubuntu Desktop Linux 16.04 LTS environment.
+As of now, to build in 16.04, please use gcc 7.x or above.
+
 ## Build Requires
 
+If you are going to build this project, the following modules must be installed on your system:
+
+- CMake
+- Boost C++ libraries
+
+In the Ubuntu, you can easily install it with the following command.
+
+```
+$ sudo apt-get install cmake libboost-all-dev
+```
+
+If your linux system does not have the basic development configuration, you will need to install
+more packages. A list of all packages needed to configure the development environment can be found
+in the https://github.com/Samsung/ONE/blob/master/infra/docker/Dockerfile.1804 file.
+
+Here is a summary of it
+
+```
+$ sudo apt-get install \
+build-essential \
+clang-format-8 \
+cmake \
+doxygen \
+git \
+hdf5-tools \
+lcov \
+libatlas-base-dev \
+libboost-all-dev \
+libgflags-dev \
+libgoogle-glog-dev \
+libgtest-dev \
+libhdf5-dev \
+libprotobuf-dev \
+protobuf-compiler \
+pylint \
+python3 \
+python3-pip \
+python3-venv \
+scons \
+software-properties-common \
+unzip \
+wget
+
+$ mkdir /tmp/gtest
+$ cd /tmp/gtest
+$ cmake /usr/src/gtest
+$ make
+$ sudo mv *.a /usr/lib
+
+$ pip install yapf==0.22.0 numpy
+```
+
+Additional install python3.8 if you are using Ubuntu 18.04.
+```
+$ sudo apt-get install \
+python3.8 \
+python3.8-dev \
+python3.8-venv
+```
+
+If you get `Unable to locate package clang-format-8` then just use `clang-format`.
+
 ## Build for Ubuntu
 
-## Build for windows
+In a typical linux development environment, including Ubuntu, you can build the compiler with a
+simple command like this:
+
+```
+$ git clone https://github.com/Samsung/ONE.git one
+$ cd one
+$ ./nncc configure
+$ ./nncc build
+```
+Build artifacts will be placed in `build` folder.
+
+To run unit tests:
+```
+$ ./nncc test
+```
+
+Above steps will build all the modules in the compiler folder. There are modules that are currently
+not active. To build only as of now active modules of the compiler, we provide a preset of modules
+to build with below command:
+```
+$ ./nnas create-package --prefix $HOME/.local
+```
+
+With this command, `~/.local` folder will contain all files in release.
+If you have added `~/.local/bin` in PATH, then you will now have latest compiler binaries.
+
+### Build for debug and release separately
+
+Build target folder can be customized by `NNCC_WORKSPACE` environment, as we may want to separate
+debug and release builds.
+
+```
+$ NNCC_WORKSPACE=build/debug ./nncc configure
+$ ./nncc build
+```
+will build debug version in `build/debug` folder, and
+
+```
+$ NNCC_WORKSPACE=build/release ./nncc configure -DCMAKE_BUILD_TYPE=Release
+$ ./nncc build
+```
+will build release version in `build/release` folder.
+
+### Trouble shooting
+
+If you are using python3.8, as there is no TensorFlow1.13.2 package for python3.8, build may fail.
+Please install python3.7 or lower versions as default python3.
+
+## Build for Windows
+
+To build for Windows, we use MinGW(Minimalist GNU for Windows). [Here](https://github.com/git-for-windows/build-extra/releases) you can download a tool that includes it.
+
+```
+$ git clone https://github.com/Samsung/ONE.git one
+$ cd one
+$ NNAS_BUILD_PREFIX=build ./nnas create-package --preset 20200731_windows --prefix install
+```
+
+- `NNAS_BUILD_PREFIX` is the path to directory where compiler-build-artifacts will be stored.
+- `--preset` is the one that specifies a version you will install. You can see `infra/packaging/preset/` directory for more details and getting latest version.
+- `--prefix` is the install directory.
+
+## Cross build for Ubuntu/ARM32 (experimental)
+
+Some modules are availble to run in Ubuntu/ARM32 through cross building.
+
+While configuring the build, some modules need to execute tools for generating
+test materials and they need to execute in the host(x86-64). So some modules
+are needed to build the tools for host before cross building.
+
+Cross build overall steps are like, (1) configure for host
+(2) build tools for host (3) configure for ARM32 target (4) and then build
+for ARM32 target.
+
+Unit tests can also run in target device.
+But value test needs to run TensorFlow lite to get expected results,
+and it would be a task to do this so the data files from host execution
+are used instead.
+
+Thus to run the unit tests in the target, running in host is needed in prior.
+
+### Prepare root file system
+
+You should prepare Ubuntu/ARM32 root file system for cross compilation.
+Please refer
+[how-to-cross-build-runtime-for-arm.md](how-to-cross-build-runtime-for-arm.md)
+for preparation.
+
+You can set `ROOTFS_ARM` environment variable if you have in alternative
+folder.
+
+### Clean existing external source for patches
+
+Some external projects from source are not "cross compile ready with CMake"
+projects. This experimental project prepared some patches for this.
+Just remove the source and stamp file like below and the `make` will prepare
+patch applied source codes.
+```
+rm -rf externals/HDF5
+rm -rf externals/PROTOBUF
+rm externals/HDF5.stamp
+rm externals/PROTOBUF.stamp
+```
+
+### Build
+
+To cross build, `infra/nncc/Makefile.arm32` file is provided as an example to
+work with `make` command.
+```
+make -f infra/nncc/Makefile.arm32 cfg
+make -f infra/nncc/Makefile.arm32 debug
+```
+First `make` will run above steps (1), (2) and (3). Second `make` will run (4).
+
+### Test
+
+Preprequisite for testing in ARM32 device.
+```
+# numpy is required for value match in ARM32 target device
+sudo apt-get install python3-pip
+python3 -m pip install numpy
+```
+
+You can also run unit tests in ARM32 Ubuntu device with cross build results.
+First you need to run the test in host to prepare files that are currently
+complicated in target device.
+```
+# run this in x86-64 host
+make -f infra/nncc/Makefile.arm32 test_prep
+
+# run this in ARM32 target device
+make -f infra/nncc/Makefile.arm32 test
+```
+
+NOTE: this assumes
+- host and target have same directoy structure
+- should copy `build` folder to target or
+- mounting `ONE` folder with NFS on the target would be simple
diff --git a/docs/howto/how-to-build-package.md b/docs/howto/how-to-build-package.md
index 65cc3c753..803466bfe 100644
--- a/docs/howto/how-to-build-package.md
+++ b/docs/howto/how-to-build-package.md
@@ -1 +1,188 @@
 # How to Build Package
+
+## Overview
+
+This document describes how to build a Package to run the model in our runtime
+_onert_ that consists of model and additional file(s). Users can build a
+package through command line tools.
+
+Steps of building a Package:
+1. Import model and convert to circle
+1. Optionally, optimize and quantize circle
+1. Create package from circle
+
+NOTE: Examples and options of each command shown below are from the version of
+writing this document. They may differ from latest version of commands, 1.9.0.
+Please fire an issue or post a PR to correct them if anything needs update.
+
+## Import model
+
+Currently TensorFlow and TensorFlow lite models are supported as of writing
+this document.
+
+To import a model, use `one-import` with a model framework key and arguments.
+```
+$ one-import FRAMEWORK [arguments]
+```
+
+Execute `one-import` without any key will show the list of supported frameworks.
+
+Example of `one-import` command:
+```
+$ one-import
+Usage: one-import [FRAMEWORK] ...
+Available FRAMEWORK drivers:
+  bcq
+  tf
+  tflite
+```
+
+### Example for TensorFlow
+
+This is an example to import TensorFlow model:
+```
+$ one-import tf --input_path mymodel.pb --output_path mymodel.circle \
+--input_arrays input1,input2 --input_shapes "1,224,224,3:1000" \
+--output_arrays output
+```
+
+Running with `--help` will show current required/optional arguments:
+```
+$ one-import tf --help
+Convert TensorFlow model to circle.
+Usage: one-import-tf
+    --version Show version information and exit
+    --input_path <path/to/tfmodel>
+    --output_path <path/to/circle>
+    --input_arrays <names of the input arrays, comma-separated>
+    --input_shapes <input shapes, colon-separated>
+    --output_arrays <names of the output arrays, comma-separated>
+    --v2 Use TensorFlow 2.x interface (default is 1.x interface)
+```
+
+### Example for TensorFlow lite
+
+This is an example to import TensorFlow lite model:
+```
+$ one-import tflite --input_path mymodel.tflite --output_path mymodel.circle
+```
+
+Likewise, running with `--help` will show current required/optional arguments:
+```
+$ one-import tflite --help
+Convert TensorFlow lite model to circle.
+Usage: one-import-tflite
+    --version Show version information and exit
+    --input_path <path/to/tflitemodel>
+    --output_path <path/to/circle>
+```
+
+### Example for TensorFlow Model Including BCQ Information
+
+This is an example to import TensorFlow model which includes BCQ information.
+As a result of this command, BCQ information nodes will be preserved.
+```
+$ one-import bcq --input_path bcqmodel.pb --output_path bcqmodel.circle
+```
+
+Likewise, running with `--help` will show current required/optional arguments:
+```
+$ one-import bcq --help
+Convert TensorFlow model with BCQ to circle.
+Usage: one-import-bcq
+    --version Show version information and exit
+    --input_path <path/to/tfmodel/with/BCQ>
+    --output_path <path/to/circle>
+    --input_arrays <names of the input arrays, comma-separated>
+    --input_shapes <input shapes, colon-separated>
+    --output_arrays <names of the output arrays, comma-separated>
+    --v2 Use TensorFlow 2.x interface (default is 1.x interface)
+```
+
+## Optimize circle model
+
+circle model can be optimized for better performance and smaller size.
+Typical optimization algorithm for this is to fuse some patterns of operators
+to one fused operator.
+
+This is an example to optimize circle model:
+```
+$ one-optimize --all --input_path mymodel.circle --output_path optmodel.circle
+```
+
+Run with `--help` will show current optimization options:
+```
+$ one-optimize --help
+Optimize circle model.
+Usage: one-optimize
+    --version       Show version information and exit
+    --all           Enable all optimization algorithms
+    --fuse_bcq      Enable FuseBCQ Pass
+    --fuse_instnorm Enable FuseInstanceNormalization Pass
+    --resolve_customop_add
+                    Enable ResolveCustomOpAddPass Pass
+    --resolve_customop_batchmatmul
+                    Enable ResolveCustomOpBatchMatMulPass Pass
+    --resolve_customop_matmul
+                    Enable ResolveCustomOpMatMulPass Pass
+    --input_path <path/to/input/circle>
+    --output_path <path/to/output/circle>
+```
+
+## Quantize circle model
+
+Floating-point circle model can be quantized to lower-precision format
+(e.g., uint8 or int16) for faster inference speed and smaller model size,
+by reducing the number of bits that represent weights and activations.
+
+This is an example to quantize circle model:
+```
+$ one-quantize --input_path mymodel.circle --output_path quantmodel.circle
+```
+
+Like wise, `--help` will show current quantization options:
+```
+$ one-quantize --help
+Quantize circle model.
+Usage: one-quantize
+    --version         Show version information and exit
+    --input_dtype     Input data type (supported: float32, default=float32)
+    --quantized_dtype Output quantized data type (supported: uint8, default=uint8)
+    --granularity     Quantize granularity (supported: layer, channel, default=layer)
+    --min_percentile  Minimum percentile (0.0~100.0, default=1.0)
+    --max_percentile  Maximum percentile (0.0~100.0, default=99.0)
+    --mode            Record mode (supported: percentile/moving_average, default=percentile)
+    --input_path <path/to/input/circle>
+    --input_data <path/to/input/data>
+    --output_path <path/to/output/circle>
+```
+
+## Pack circle model
+
+Use `one-pack` command to create package.
+
+```
+$ one-pack -i mymodel.circle -o nnpackage
+```
+
+`nnpackage` is a folder containing circle model and addition file(s)
+
+```
+$ tree nnpackage
+nnpackage
+└── mymodel
+    ├── metadata
+    │   └── MANIFEST
+    └── mymodel.circle
+```
+
+Likewise, `--help` will show current package options:
+
+```
+$ one-pack --help
+Package circle to nnpkg
+Usage: one-pack
+    -v, --version Show version information and exit
+    -i <path/to/circle>
+    -o <path/to/nnpackage/folder>
+```
diff --git a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
new file mode 100644
index 000000000..5479a342e
--- /dev/null
+++ b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
@@ -0,0 +1,316 @@
+# How to Build Runtime with GBS for Tizen/RPi4
+
+This document describes how to build runtime with GBS for Tizen AARCH64.
+As a real example, we'll also describe how to prepare Tizen on Raspberry Pi 4
+and show you how to run our test package runner `onert_run`.
+
+For ARM32, there would be not much difference with some changes.
+
+Host PC is Ubuntu 18.04 but other versions or distro may work with a little
+adjustments.
+
+Detailed technical informations are not described here so please read referecnce
+pages while you go on.
+
+
+## Setting up build environment
+
+(1) Add Tizen build tools repo
+```
+$ sudo vim /etc/apt/sources.list
+```
+Add this at the end
+```
+deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubuntu_18.04/ /
+```
+Note: There's a slash('/`) at the end.
+
+For other versions of Ubuntu, please refer
+http://download.tizen.org/tools/latest-release/ lists.
+
+(2) Update package informations and upgrade to latest
+```
+$ sudo apt-get update
+$ sudo apt-get upgrade
+```
+
+(3) Install GBS tools
+```
+$ sudo apt-get install gbs mic
+```
+
+To get more informations, please refer [HERE](https://source.tizen.org/ko/documentation/developer-guide/getting-started-guide/installing-development-tools)
+
+## Build ONERT
+
+(1) Set `python2` as default python
+
+Some tools of GBS run in `python2` and won't run with `python3`.
+Please check `python` version and set it to 2.x.
+
+(2) set `TIZEN_BUILD_ROOT`
+
+You may set `GBS-ROOT` to any place you like. Ususally we use home folder.
+```
+$ export TIZEN_BUILD_ROOT=$HOME/GBS-ROOT/
+```
+Adding to `$HOME/.profile` file would be a good thing.
+
+(3) clone ONE repo
+
+```
+git clone https://github.com/Samsung/ONE.git
+```
+
+(4) Build
+
+```
+$ cd ONE
+
+$ gbs -c infra/nnfw/config/gbs.conf build --include-all -A aarch64 --define 'test_build 1'
+```
+- `-A aarch64` is to set architecture to AARCH64. Use `arm32` for ARM32 target.
+- `--define 'test_build 1'` is to enable test build so that we can use `onert_run`
+
+Now take a cup of coffee.
+
+(5) Build result RPM packages
+
+```
+$ ls ~/GBS-ROOT/local/repos/tizen/aarch64/RPMS
+nnfw-1.10.0-1.aarch64.rpm
+nnfw-debuginfo-1.10.0-1.aarch64.rpm
+nnfw-debugsource-1.10.0-1.aarch64.rpm
+nnfw-devel-1.10.0-1.aarch64.rpm
+nnfw-minimal-app-1.10.0-1.aarch64.rpm
+nnfw-minimal-app-debuginfo-1.10.0-1.aarch64.rpm
+nnfw-plugin-devel-1.10.0-1.aarch64.rpm
+nnfw-test-1.10.0-1.aarch64.rpm
+nnfw-test-debuginfo-1.10.0-1.aarch64.rpm
+```
+
+`-1.10.0-1` may differ as this document was written with under `1.10.0` development.
+
+## Prepare Tizen on Raspberry Pi 4
+
+Please refer https://wiki.tizen.org/Quick_guide_for_RPI4 for detailed descriptions.
+
+(1) Download flashing tool
+```
+$ wget \
+https://git.tizen.org/cgit/platform/kernel/u-boot/plain/scripts/tizen/sd_fusing_rpi3.sh?h=tizen \
+--output-document=sd_fusing_rpi3.sh
+
+$ chmod 755 sd_fusing_rpi3.sh
+```
+
+(2) Prepare Micro-SD memory card.
+
+You first need to find out device name. This document will skip how to find this.
+Suppose it's `/dev/sdj`:
+```
+$ sudo ./sd_fusing_rpi3.sh -d /dev/sdj --format
+```
+You need to change `/dev/sdj` to your configuration.
+
+Partition table may look like this
+```
+Device     Boot    Start      End  Sectors  Size Id Type
+/dev/sdj1  *        8192   139263   131072   64M  e W95 FAT16 (LBA)
+/dev/sdj2         139264  6430719  6291456    3G 83 Linux
+/dev/sdj3        6430720  9183231  2752512  1.3G 83 Linux
+/dev/sdj4        9183232 62521343 53338112 25.4G  5 Extended
+/dev/sdj5        9185280 61958143 52772864 25.2G 83 Linux
+/dev/sdj6       61960192 62025727    65536   32M 83 Linux
+/dev/sdj7       62027776 62044159    16384    8M 83 Linux
+/dev/sdj8       62046208 62111743    65536   32M 83 Linux
+/dev/sdj9       62113792 62130175    16384    8M 83 Linux
+/dev/sdj10      62132224 62263295   131072   64M 83 Linux
+/dev/sdj11      62265344 62521343   256000  125M 83 Linux
+```
+
+(3) Download images
+
+Please visit http://download.tizen.org/snapshots/tizen/unified/latest/images/standard/iot-boot-arm64-rpi4/
+and http://download.tizen.org/snapshots/tizen/unified/latest/images/standard/iot-headed-3parts-aarch64-rpi.
+
+Please visit `iot-boot-armv7l-rpi4` folder for ARM32 images.
+
+Get latest file. As of writing this document, name has `20200908.3`.
+```
+$ wget  http://download.tizen.org/snapshots/tizen/unified/latest/images/standard/iot-boot-arm64-rpi4/tizen-unified_20200908.3_iot-boot-arm64-rpi4.tar.gz
+
+$ wget http://download.tizen.org/snapshots/tizen/unified/latest/images/standard/iot-headed-3parts-aarch64-rpi/tizen-unified_20200908.3_iot-headed-3parts-aarch64-rpi.tar.gz
+```
+
+(4) Flash images to memory card
+
+As like above, suppose memory card is at `/dev/sdj`
+```
+$ sudo ./sd_fusing_rpi3.sh -d /dev/sdj \
+-b tizen-unified_20200908.3_iot-boot-arm64-rpi4.tar.gz \
+tizen-unified_20200908.3_iot-headed-3parts-aarch64-rpi.tar.gz
+```
+You need to change `/dev/sdj` to your configuration and also `tizen-unified_...` file to your
+latest download file name.
+
+(5) Assign IP address for `sdb` connection
+
+Here, we provide a way to connect `sdb` tool through TCP/IP.
+
+Below steps will modify root image and set fixed IP address.
+
+(5-1) Mount image to host
+```
+$ mkdir j2
+$ sudo mount /dev/sdj2 j2
+```
+As like above, please update `/dev/sdj2` to your configuration.
+
+(5-2) Add a new file
+```
+$ vi j2/etc/systemd/system/ip.service
+```
+and set as like:
+```
+[Service]
+Restart=always
+RestartSec=1
+User=root
+ExecStart=/bin/sh -c "ifconfig eth0 192.168.x.y netmask 255.255.255.0 up"
+
+[Install]
+WantedBy=multi-user.target
+```
+Replace 192.168.x.y to your actual ip address.
+
+
+(5-3) Add a symbolic link
+```
+$ sudo mkdir -p j2/etc/systemd/system/multi-user.target.wants/
+$ pushd j2/etc/systemd/system/multi-user.target.wants/
+$ sudo ln -s ../../system/ip.service .
+$ popd
+```
+
+(5-4) Now that every thing is ready, unmount and unplug your memory card and plug into
+RPi4, turn on the power.
+```
+$ sync
+$ sudo umount j2
+```
+
+## sdb connect to Tizen/RPi4
+
+You may need to install Tizen Studio to use `sdb` command.
+Please visit https://developer.tizen.org/ if you don't have this.
+
+We assume `sdb` command is in the PATH.
+
+(1) Connect
+
+```
+$ sdb connect 192.168.x.y
+connecting to 192.168.x.y:26101 ...
+connected to 192.168.x.y:26101
+```
+Please update `192.168.x.y` part to your actual IP address.
+
+Check with `devices` command: you should see `rpi3` or alike.
+```
+$ sdb devices
+List of devices attached
+192.168.x.y:26101     device          rpi3
+```
+
+(2) Remount filesystem with R/W
+
+You need to remount file system with Read/Write so that you can install packages.
+```
+$ sdb root on
+$ sdb shell
+```
+Inside your Tizen/RPi4:
+```
+sh-3.2# mount -o rw,remount /
+```
+
+(3) Download dependent packages
+
+In your host, maybe with another terminal, download packages from
+http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/
+
+```
+$ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libarmcl-v21.02-17.5.aarch64.rpm
+
+$ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libhdf5-101-1.10.1-3.85.aarch64.rpm
+
+$ wget http://download.tizen.org/releases/daily/tizen/unified/latest/repos/standard/packages/aarch64/libhdf5_cpp101-1.10.1-3.85.aarch64.rpm
+```
+
+(4) Copy to device
+```
+$ sdb push libarmcl-v21.02-17.5.aarch64.rpm /opt/usr/home/owner/share/tmp/
+$ sdb push libhdf5-101-1.10.1-3.85.aarch64.rpm /opt/usr/home/owner/share/tmp/
+$ sdb push libhdf5_cpp101-1.10.1-3.85.aarch64.rpm /opt/usr/home/owner/share/tmp/
+```
+And our runtime packages
+```
+$ cd ~/GBS-ROOT/local/repos/tizen/aarch64/RPMS
+$ sdb push nnfw-1.10.0-1.aarch64.rpm /opt/usr/home/owner/share/tmp/
+$ sdb push nnfw-test-1.10.0-1.aarch64.rpm /opt/usr/home/owner/share/tmp/
+```
+
+(5) Install dependent packages
+
+Within Tizen/RPi4 shell
+```
+sh-3.2# cd /opt/usr/home/owner/share/tmp/
+
+sh-3.2# rpm -i libarmcl-v21.02-17.5.aarch64.rpm
+sh-3.2# rpm -i libhdf5-101-1.10.1-3.85.aarch64.rpm
+sh-3.2# rpm -i libhdf5_cpp101-1.10.1-3.85.aarch64.rpm
+```
+There may be message like this but it seems OK:
+```
+/sbin/ldconfig: Cannot lstat /lib64/libhdf5.so.101.0.0: Permission denied
+```
+Continue install
+```
+sh-3.2# rpm -i nnfw-1.10.0-1.aarch64.rpm
+sh-3.2# rpm -i nnfw-test-1.10.0-1.aarch64.rpm
+```
+
+Our `Product` binary folder is installed at `/opt/usr/nnfw-test`.
+```
+sh-3.2# cd /opt/usr/nnfw-test
+sh-3.2# ls -al
+total 16
+drwxr-xr-x  4 root root 4096 Jan  1 09:05 .
+drwxr-xr-x 14 root root 4096 Jan  1 09:05 ..
+drwxr-xr-x  3 root root 4096 Jan  1 09:05 Product
+drwxr-xr-x  3 root root 4096 Jan  1 09:05 infra
+```
+
+(6) Run nnpackage
+
+Refer `how-to-build-package.md` document to produce nnpackage from a model.
+
+Assume `mobilenet_v2_1.4_224` nnpackage is already copied to
+`/opt/usr/home/owner/media/models` folder with `sdb` command.
+
+```
+sh-3.2# BACKENDS="cpu" Product/out/bin/onert_run \
+--nnpackage /opt/usr/home/owner/media/models/mobilenet_v2_1.4_224
+
+Package Filename /opt/usr/home/owner/media/models/mobilenet_v2_1.4_224
+===================================
+MODEL_LOAD   takes 65.403 ms
+PREPARE      takes 158.716 ms
+EXECUTE      takes 373.447 ms
+- MEAN     :  373.447 ms
+- MAX      :  373.447 ms
+- MIN      :  373.447 ms
+- GEOMEAN  :  373.447 ms
+===================================
+```
diff --git a/docs/howto/how-to-build-runtime.md b/docs/howto/how-to-build-runtime.md
index f4751198e..f4d7b56e0 100644
--- a/docs/howto/how-to-build-runtime.md
+++ b/docs/howto/how-to-build-runtime.md
@@ -1,11 +1,12 @@
 # How to Build Runtime
 
-This document is based on the system where Ubuntu Desktop Linux 18.04 LTS is installed with default settings, and can be applied in other environments without much difference. For reference, the development of our project started in the Ubuntu Desktop Linux 16.04 LTS environment.
+This document is based on the system where Ubuntu Desktop Linux 20.04 LTS is installed with default settings, and can be applied in other environments without much difference. For reference, the development of our project started in the Ubuntu Desktop Linux 16.04 LTS environment.
 
 ## Build requirements
 
 If you are going to build this project, the following modules must be installed on your system:
 
+- C & C++ compiler
 - CMake
 - Boost C++ libraries
 
@@ -15,44 +16,30 @@ In the Ubuntu, you can easily install it with the following command.
 $ sudo apt-get install cmake libboost-all-dev
 ```
 
-If your linux system does not have the basic development configuration, you will need to install more packages. A list of all packages needed to configure the development environment can be found in the https://github.com/Samsung/ONE/blob/master/infra/docker/Dockerfile.1804 file.
+If your linux system does not have the basic development configuration, you will need to install more packages. A list of all packages needed to configure the development environment can be found in https://github.com/Samsung/ONE/blob/master/infra/docker/focal/Dockerfile.
 
-Here is a summary of it
+Here is a summary of it for runtime and related tools
 
 ```
 $ sudo apt install \
 build-essential \
-clang-format-3.9 \
+clang-format-8 \
 cmake \
 doxygen \
 git \
 graphviz \
 hdf5-tools \
 lcov \
-libatlas-base-dev \
 libboost-all-dev \
-libgflags-dev \
-libgoogle-glog-dev \
-libgtest-dev \
 libhdf5-dev \
-libprotobuf-dev \
-protobuf-compiler \
-pylint \
 python3 \
 python3-pip \
-python3-venv \
 scons \
 software-properties-common \
 unzip \
 wget
 
-$ mkdir /tmp/gtest
-$ cd /tmp/gtest
-$ cmake /usr/src/gtest
-$ make
-$ sudo mv *.a /usr/lib
-
-$ pip install yapf==0.22.0 numpy
+$ pip3 install yapf==0.22.0 numpy
 
 ```
 
@@ -63,20 +50,28 @@ In a typical linux development environment, including Ubuntu, you can build the
 ```
 $ git clone https://github.com/Samsung/ONE.git one
 $ cd one
-$ make -f Makefile.template install
+$ ./nnfw configure
+$ ./nnfw build
+$ ./nnfw install
+```
+
+For easy build process, we provides `Makefile.template` makefile.
+
+```
+$ make -f Makefile.template
 ```
 
-Unfortunately, the debug build on the x86_64 architecture currently has an error. To solve the problem, you must use gcc version 9 or higher. Another workaround is to do a release build rather than a debug build. This is not a suitable method for debugging during development, but it is enough to check the function of the runtime. To release build the runtime, add the environment variable `BUILD_TYPE=release` to the build command as follows.
+To release build the runtime, add the environment variable `BUILD_TYPE=release` to the build command as follows.
 
 ```
 $ export BUILD_TYPE=release
-$ make install
+$ make -f Makefile.template
 ```
 
 Or you can simply do something like this:
 
 ```
-$ BUILD_TYPE=release make install
+$ BUILD_TYPE=release make -f Makefile.template
 ```
 
 The build method described here is a `native build` in which the build environment and execution environment are same. So, this command creates a runtime binary targeting the current build architecture, probably x86_64, as the execution environment. You can find the build output in the ./Product folder as follows:
@@ -84,12 +79,8 @@ The build method described here is a `native build` in which the build environme
 ```
 $ tree -L 2 ./Product
 ./Product
-├── obj -> /home/sjlee/star/one/Product/x86_64-linux.debug/obj
-├── out -> /home/sjlee/star/one/Product/x86_64-linux.debug/out
-└── x86_64-linux.debug
-    ├── BUILD
-    ├── CONFIGURE
-    ├── INSTALL
+├── out -> /home/sjlee/star/one/Product/x86_64-linux.release/out
+└── x86_64-linux.release
     ├── obj
     └── out
 
@@ -98,62 +89,74 @@ $ tree -L 2 ./Product
 $ tree -L 3 ./Product/out
 ./Product/out
 ├── bin
-│   ├── nnapi_test
-│   ├── nnpackage_run
-│   ├── tflite_loader_test_tool
-│   └── tflite_run
+│   ├── onert_run
+│   ├── tflite_comparator
+│   └── tflite_run
 ├── include
-│   ├── nnfw
-│   │   ├── NeuralNetworksEx.h
-│   │   ├── NeuralNetworksExtensions.h
-│   │   ├── NeuralNetworks.h
-│   │   ├── nnfw_experimental.h
-│   │   └── nnfw.h
-│   └── onert
-│       ├── backend
-│       ├── compiler
-│       ├── exec
-│       ├── ir
-│       ├── misc
-│       └── util
+│   ├── nnfw
+│   │   ├── NeuralNetworksEx.h
+│   │   ├── NeuralNetworksExtensions.h
+│   │   ├── NeuralNetworks.h
+│   │   ├── nnfw_experimental.h
+│   │   └── nnfw.h
+│   └── onert
+│       ├── backend
+│       ├── compiler
+│       ├── exec
+│       ├── ir
+│       └── util
 ├── lib
-│   ├── libbackend_cpu.so
-│   ├── libcircle_loader.so
-│   ├── libneuralnetworks.so
-│   ├── libnnfw-dev.so
-│   ├── libnnfw_lib_benchmark.so
-│   ├── libnnfw_lib_misc.a
-│   ├── libonert_core.so
-│   └── libtflite_loader.so
-├── tests
-│   ├── FillFrom_runner
-│   ├── nnpkgs
-│   │   └── FillFrom
-│   └── scripts
-│       ├── benchmark_nnapi.sh
-│       ├── benchmark_nnpkg.sh
-│       ├── common.sh
-│       ├── framework
-│       ├── list
-│       ├── print_to_json.sh
-│       ├── test-driver.sh
-│       ├── test_framework.sh
-│       ├── test_scheduler_with_profiling.sh
-│       └── unittest.sh
-├── unittest
-│   ├── nnapi_gtest
-│   ├── nnapi_gtest.skip
-│   ├── nnapi_gtest.skip.noarch.interp
-│   ├── nnapi_gtest.skip.x86_64-linux.cpu
-│   ├── test_compute
-│   ├── test_onert
-│   ├── test_onert_backend_cpu_common
-│   ├── test_onert_frontend_nnapi
-│   └── tflite_test
-└── unittest_standalone
-    └── nnfw_api_gtest
-
-19 directories, 36 files
+│   ├── libbackend_cpu.so
+│   ├── libbackend_ruy.so
+│   ├── libneuralnetworks.so
+│   ├── libnnfw-dev.so
+│   └── libonert_core.so
+├── nnapi-gtest
+│   ├── nnapi_gtest
+│   ├── nnapi_gtest.skip
+│   └── nnapi_gtest.skip.x86_64-linux.cpu
+├── test
+│   ├── command
+│   │   ├── nnpkg-test
+│   │   ├── prepare-model
+│   │   ├── unittest
+│   │   └── verify-tflite
+│   ├── FillFrom_runner
+│   ├── list
+│   │   ├── benchmark_nnpkg_model_list.txt
+│   │   ├── nnpkg_test_list.armv7l-linux.acl_cl
+│   │   ├── nnpkg_test_list.armv7l-linux.acl_neon
+│   │   ├── nnpkg_test_list.armv7l-linux.cpu
+│   │   ├── tflite_comparator.aarch64.acl_cl.list
+│   │   ├── tflite_comparator.aarch64.acl_neon.list
+│   │   ├── tflite_comparator.aarch64.cpu.list
+│   │   ├── tflite_comparator.armv7l.acl_cl.list
+│   │   ├── tflite_comparator.armv7l.acl_neon.list
+│   │   ├── tflite_comparator.armv7l.cpu.list
+│   │   └── tflite_comparator.x86_64.cpu.list
+│   ├── models
+│   │   ├── run_test.sh
+│   │   └── tflite
+│   ├── nnpkgs
+│   │   └── FillFrom
+│   └── onert-test
+└── unittest
+    ├── ndarray_test
+    ├── nnfw_api_gtest
+    ├── nnfw_api_gtest_models
+    │   ├── add
+    │   ├── add_invalid_manifest
+    │   ├── add_no_manifest
+    │   ├── if_dynamic
+    │   ├── mobilenet_v1_1.0_224
+    │   └── while_dynamic
+    ├── nnfw_lib_misc_test
+    ├── test_cker
+    ├── test_onert_core
+    ├── test_onert_frontend_nnapi
+    └── tflite_test
+
+26 directories, 42 files
 
 ```
 
@@ -173,26 +176,8 @@ inception_v3.tflite
 The result of running the inception_v3 model using runtime is as follows. Please consider that this is a test that simply checks execution latency without considering the accuracy of the model.
 
 ```
-$ USE_NNAPI=1 LD_LIBRARY_PATH="./Product/out/lib/:$LD_LIBRARY_PATH" ./Product/out
-/bin/tflite_run ./inception_v3.tflite
-nnapi function 'ANeuralNetworksModel_create' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_addOperand' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_setOperandValue' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_addOperation' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_identifyInputsAndOutputs' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_finish' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksCompilation_create' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksCompilation_finish' is loaded from './Product/out/lib/libneuralnetworks.so'
-input tensor indices = [317,]
-nnapi function 'ANeuralNetworksExecution_create' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_setInput' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_setOutput' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_startCompute' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksEvent_wait' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksEvent_free' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_free' is loaded from './Product/out/lib/libneuralnetworks.so'
-... run 1 takes 183.895 ms
-output tensor indices = [316(max:905),]
+$ ./Product/out/bin/onert_run ./inception_v3.tflite
+Model Filename ./inception_v3.tflite
 ===================================
 MODEL_LOAD   takes 1.108 ms
 PREPARE      takes 0.190 ms
@@ -202,10 +187,8 @@ EXECUTE      takes 183.895 ms
 - MIN      :  183.895 ms
 - GEOMEAN  :  183.895 ms
 ===================================
-nnapi function 'ANeuralNetworksCompilation_free' is loaded from './Product/out/lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_free' is loaded from './Product/out/lib/libneuralnetworks.so'
 ```
-Here, `USE_NNAPI=1` means that **ONE** runtime is used for model inference. If omitted, the model will be executed using Tensorflow lite, the basic framework for verification. `LD_LIBRARY_PATH="./Product/out/lib/:$LD_LIBRARY_PATH"` specifies the location of the runtime library to be used for testing. From the previous build result, you can see that it is the path to the directory where `libneuralnetworks.so` and `libonert_core.so` are located.
+If you use `tflite_run` instead of `onert_run`, the model will be executed using Tensorflow lite, the basic framework for verification. From the previous build result, you can see that it is the path to the directory where `tflite_run` and `onert_run` are located.
 
 If you come here without any problems, you have all of the basic environments for runtime development.
 
diff --git a/docs/howto/how-to-contribute.md b/docs/howto/how-to-contribute.md
index fe7b140eb..7c6855427 100644
--- a/docs/howto/how-to-contribute.md
+++ b/docs/howto/how-to-contribute.md
@@ -34,8 +34,9 @@ This section explains the steps to create a pull request (PR).
 1. Create a commit
 
    It is time to create a commit for submission once you are convinced that your contribution is
-   ready to go. Please include signed-off message at the end of commit message. If not, your pull
-   request will be **rejected** by CI.
+   ready to go. Please include 
+   [signed-off message](https://github.com/Samsung/ONE/wiki/ONE-Developer's-Certificate-of-Origin) 
+   at the end of commit message. If not, your pull request will be **rejected** by CI.
 
 1. Check code format locally
 
diff --git a/docs/howto/how-to-cross-build-runtime-for-aarch64.md b/docs/howto/how-to-cross-build-runtime-for-aarch64.md
index b30ce348a..3e2705762 100644
--- a/docs/howto/how-to-cross-build-runtime-for-aarch64.md
+++ b/docs/howto/how-to-cross-build-runtime-for-aarch64.md
@@ -16,7 +16,7 @@ Use `install_rootfs.sh` script to prepare Root File System. You should have `sud
 $ sudo ./tools/cross/install_rootfs.sh aarch64
 ```
 - supports `arm`(default) and `aarch64` architecutre for now
-- supports `xenial`(default), `trusty` and `bionic` release
+- supports `bionic`(default) and `focal` release
 
 To see the options,
 ```
@@ -27,7 +27,7 @@ RootFS will be prepared at `tools/cross/rootfs/aarch64` folder.
 
 ***\* CAUTION: The OS version of rootfs must match the OS version of execution target device. On the other hand, you need to match the Ubuntu version of the development PC with the Ubuntu version of rootfs to be used for cross-build. Otherwise, unexpected build errors may occur.***
 
-If you are using Ubuntu 16.04 LTS, select `xenial`, if you are using Ubuntu 18.04 LTS, select `bionic`. You can check your Ubuntu code name in the following way.
+If you are using Ubuntu 18.04 LTS, select `bionic`, if you are using Ubuntu 20.04 LTS, select `focal`. You can check your Ubuntu code name in the following way.
 
 ```
 $ cat /etc/lsb-release
@@ -44,7 +44,7 @@ If a build error occurs because the version of the development system and the ta
 Use `ROOTFS_DIR` to a full path to prepare at alternative path.
 
 ```
-$ ROOTFS_DIR=/home/user/rootfs/aarch64-xenial sudo -E ./tools/cross/install_rootfs.sh aarch64
+$ ROOTFS_DIR=/home/user/rootfs/aarch64-bionic sudo -E ./tools/cross/install_rootfs.sh aarch64
 ```
 
 ### Using proxy
@@ -89,8 +89,8 @@ normal build and cross build as follows.
 ```
 $ export ROOTFS_DIR=xxx
 ...
-$ make all install                        # do normal build
-$ TARGET_ARCH=aarch64 make all install    # do cross build
+$ make                         # do normal build
+$ TARGET_ARCH=aarch64 make     # do cross build
 ```
 
 ### Run test
diff --git a/docs/howto/how-to-cross-build-runtime-for-arm.md b/docs/howto/how-to-cross-build-runtime-for-arm.md
index c17c465bf..2db4d564a 100644
--- a/docs/howto/how-to-cross-build-runtime-for-arm.md
+++ b/docs/howto/how-to-cross-build-runtime-for-arm.md
@@ -13,26 +13,26 @@ Use `install_rootfs.sh` script to prepare Root File System. You should have `sud
 ```
 $ sudo ./tools/cross/install_rootfs.sh arm
 ```
-- supports `arm`(default) and `aarch` architecutre for now
-- supports `xenial`(default) `trusty`, and `bionic` release
+- supports `arm`(default) and `aarch64` architecutre for now
+- supports `focal`(default), `bionic`, and `jammy` release
 
 To see the options,
 ```
 $ ./tools/cross/install_rootfs.sh -h
 ```
 
-RootFS will be prepared at `tools/cross/rootfs/arm` folder.
+RootFS will be prepared at `tools/cross/rootfs/arm` or `tools/cross/rootfs/aarch64` folder.
 
 ***\* CAUTION: The OS version of rootfs must match the OS version of execution target device. On the other hand, you need to match the Ubuntu version of the development PC with the Ubuntu version of rootfs to be used for cross-build. Otherwise, unexpected build errors may occur.***
 
-If you are using Ubuntu 16.04 LTS, select `xenial`, if you are using Ubuntu 18.04 LTS, select `bionic`. You can check your Ubuntu code name in the following way.
+If you are using Ubuntu 20.04 LTS, select `focal`, if you are using Ubuntu 22.04 LTS, select `jammy`. You can check your Ubuntu code name in the following way.
 
 ```
 $ cat /etc/lsb-release
 DISTRIB_ID=Ubuntu
-DISTRIB_RELEASE=18.04
-DISTRIB_CODENAME=bionic
-DISTRIB_DESCRIPTION="Ubuntu 18.04.4 LTS"
+DISTRIB_RELEASE=22.04
+DISTRIB_CODENAME=jammy
+DISTRIB_DESCRIPTION="Ubuntu 22.04.3 LTS"
 ```
 
 If a build error occurs because the version of the development system and the target system do not match, and if you can't replace your development system for any reason, you can consider [cross-build using the docker image](how-to-build-runtime-using-prebuilt-docker-image.md).
@@ -42,7 +42,7 @@ If a build error occurs because the version of the development system and the ta
 Use `ROOTFS_DIR` to a full path to prepare at alternative path.
 
 ```
-$ ROOTFS_DIR=/home/user/rootfs/arm-xenial sudo -E ./tools/cross/install_rootfs.sh arm
+$ ROOTFS_DIR=/home/user/rootfs/arm-bionic sudo -E ./tools/cross/install_rootfs.sh arm
 ```
 
 ### Using proxy
@@ -61,55 +61,59 @@ for `http`, `https` and `ftp` protocol.
 
 ## Install ARM Cross Toolchain
 
-We recommend you have g++ >= 6 installed on your system because NN generated tests require it.
+We recommend you have g++ >= 6.1 installed on your system because NN generated tests require it (c++14).
 
-- On Ubuntu 16.04 or older, follow the next steps:
+### Ubuntu 20.04 LTS
 
-```
-$ cd ~/your/path
-$ wget https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11/arm-linux-gnueabihf/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf.tar.xz
-$ tar xvf gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf.tar.xz
-$ echo 'export PATH=~/your/path/gcc-linaro-7.2.1-2017.11-x86_64_arm-linux-gnueabihf/bin:$PATH' >> ~/.bashrc
-```
+On Ubuntu 20.04 LTS, you can install using `apt-get`.
 
-- On Ubuntu 18.04 LTS, you can install using `apt-get`.
-Choose g++ version whatever you prefer: 6, 7 or 8.
+Choose g++ version whatever you prefer: 9 (default) or 10. We are officially testing on default g++ version,
+so we don't confirm build on different version.
 
 ```
-$ sudo apt-get install g++-{6,7,8}-arm-linux-gnueabihf
+$ sudo apt-get install g++-{9,10}-arm-linux-gnueabihf
 ```
 
-Make sure you get `libstdc++.so` updated on your target with your new toolchain's corresponding one.
+If you select specific version, update symbolic link for build toolchain.
 
-For example, if you installed gcc-linaro-7.2.1-2017.11 above, do
+Otherwise, you should set your custom cmake crossbuild toolchain. You can find cmake toolchain files in `infra/nnfw/cmake/buildtool/cross/`.
 
 ```
-$ wget https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11/arm-linux-gnueabihf/runtime-gcc-linaro-7.2.1-2017.11-arm-linux-gnueabihf.tar.xz
-$ tar xvf runtime-gcc-linaro-7.2.1-2017.11-arm-linux-gnueabihf.tar.xz
+$ update-alternatives --install /usr/bin/arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc /usr/bin/arm-linux-gnueabihf-gcc-10 80 \
+    --slave /usr/bin/arm-linux-gnueabihf-g++ arm-linux-gnueabihf-g++ /usr/bin/arm-linux-gnueabihf-g++-10 \
+    --slave /usr/bin/arm-linux-gnueabihf-gcov arm-linux-gnueabihf-gcov /usr/bin/arm-linux-gnueabihf-gcov-10
 ```
 
-Then, copy `libstdc++.so.6.0.24` into `/usr/lib/arm-linux-gnueabihf`, and update symbolic links on your device.
+### Ubuntu 22.04 LTS
+
+Same with Ubuntu 20.04 LTS. (except g++ version)
 
 ## Build and install ARM Compute Library
 
-Mostly you only need once of ACL build.
+Mostly you only need once of ACL (ARM Compute Library) build.
+
+To build ACL, you need to install scons
+
+```
+$ sudo apt-get install scons
+```
 
-ACL will be automatically installed in `externals/acl` when you build runtime without any changes.
+ACL source will be automatically installed in `externals/ARMCOMPUTE` when you build runtime without any changes.
 
-You can check ACL source information in `cmake/packages/ARMComputeSourceConfig.cmake`
+You can check ACL source information in `infra/cmake/packages/ARMComputeSourceConfig.cmake`
 
-## Cross build for ARM
+## Cross build for ARM by using Makefile.template
 
 Give `TARGET_ARCH` variable to set the target architecture.
 
 If you used `ROOTFS_DIR` to prepare in alternative folder, you should also give this to makefile.
 
 ```
-$ CROSS_BUILD=1 TARGET_ARCH=armv7l make all install
+$ CROSS_BUILD=1 TARGET_ARCH=armv7l make -f Makefile.template
 
 # If ROOTFS_DIR is in alternative folder
 $ ROOTFS_DIR=/path/to/your/rootfs/arm \
-CROSS_BUILD=1 TARGET_ARCH=armv7l make all install
+CROSS_BUILD=1 TARGET_ARCH=armv7l make
 ```
 
 You can also omit the `CROSS_BUILD=1` option if you explicitly pass `ROOTFS_DIR`. In that case, if
@@ -120,15 +124,23 @@ normal build and cross build as follows.
 ```
 $ export ROOTFS_DIR=xxx
 ...
-$ make all install    # do normal build
-$ TARGET_ARCH=armv7l make all install    # do cross build
+$ make -f Makefile.template                     # do normal build
+$ TARGET_ARCH=armv7l make -f Makefile.template  # do cross build
 ```
 
+Makefile.template will pass crossbuild toolchain setting to cmake automatically by parsing variables.
+
 ### Run test
 
-To run and test the cross-compiled runtime, you need to copy the compiled output to the target device of the architecture in which it is executable.
+To run and test the cross-compiled runtime, you need to install library packages and copy the compiled output to the target device of the architecture in which it is executable.
+
+1. Install hdf5 and boost library package
+
+```
+$ sudo apt install libhdf5-dev libboost-system-dev libboost-program-options-dev
+```
 
-1. Copy all artifacts under the `./Product` folder to the target device, Odroid-XU4 for example, as a whole.
+2. Copy all artifacts under the `./Product/armv7l-linux.<BUILD_TYPE>` folder to the target device, Odroid-XU4 for example, as a whole.
 
 ```
 $ ssh odroid mkdir -p one/Product
@@ -143,19 +155,17 @@ test-driver.sh
 ...
 ```
 
-2. Log in to the target device, go to the copied path, and reestore the symbolic link settings of the `Product` directory.
+3. Log in to the target device, go to the copied path, and reestore the symbolic link settings of the `Product` directory.
 
 ```
 $ ssh odroid
 sjlee@odroid's password:
 ...
 $ cd ~/one/Product
-$ ln ${PWD}/armv7l-linux.debug/obj obj
 $ ln ${PWD}/armv7l-linux.debug/out out
 $ cd ..
 $ ls -la Product
 drwxrwxr-x  5 sjlee sjlee 4096 Jun  4 20:55 armv7l-linux.debug
-lrwxrwxrwx  1 sjlee sjlee   51 Jun  4 20:54 obj -> /home/sjlee/one/Product/armv7l-linux.debug/obj
 lrwxrwxrwx  1 sjlee sjlee   51 Jun  4 20:55 out -> /home/sjlee/one/Product/armv7l-linux.debug/out
 ```
 
diff --git a/docs/howto/how-to-introduce-a-new-operation-into-runtime.md b/docs/howto/how-to-introduce-a-new-operation-into-runtime.md
index ab449c4be..9ab498783 100644
--- a/docs/howto/how-to-introduce-a-new-operation-into-runtime.md
+++ b/docs/howto/how-to-introduce-a-new-operation-into-runtime.md
@@ -24,7 +24,6 @@ onert support the operation.
       - [acl_cl](#acl_cl-1)
       - [acl_neon](#acl_neon-1)
       - [cpu](#cpu-1)
-    - [TensorRegister (in some cases)](#tensorregister-in-some-cases)
     - [ConstantInitializer (in some cases)](#constantinitializer-in-some-cases)
       - [cpu](#cpu-2)
   - [Samples (to be updated)](#samples-to-be-updated)
@@ -179,12 +178,12 @@ void Dumper::visit(const Select &node)
 5. Add code for shape inference
 - ONE runtime tries to calculate shapes and allocate memory during compilation time. For some calculations of output shapes that cannot be done during compilation time, ONE runtime will calculate shapes and allocate memory during execution time.
 - Calculation of shapes during compilation time is called _static shape inference_ and calculation of shapes during execution time is called _dynamic shape inference_.
-- [`StaticShapeInference.h`](`/runtime/onert/compiler/StaticShapeInference.h`)
+- [`StaticShapeInferer.h`](`/runtime/onert/compiler/StaticShapeInferer.h`)
 
 ```CPP
   void visit(const ir::operation::Select &op) override;
 ```
-- [`StaticShapeInference.cc`](/runtime/onert/core/src/compiler/StaticShapeInference.cc)
+- [`StaticShapeInferer.cc`](/runtime/onert/core/src/compiler/StaticShapeInferer.cc)
 ```CPP
 void StaticShapeInferer::visit(const ir::operation::Select &op)
 {
@@ -229,7 +228,7 @@ void DynamicShapeInferer::visit(const ir::operation::Select &op)
   ir::Shape new_shape =
       shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
 
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+  output->applyShape(new_shape);
 }
 ```
 
@@ -420,51 +419,28 @@ void visit(const ir::operation::Select &) override;
 ```cpp
 void KernelGenerator::visit(const ir::operation::Select &node)
 {
-  const auto output_index{node.getOutputs().at(ir::operation::Select::Output::OUTPUT)};
-  const auto cond_index{node.getInputs().at(ir::operation::Select::Input::COND)};
-  const auto input1_index{node.getInputs().at(ir::operation::Select::Input::INPUT1)};
-  const auto input2_index{node.getInputs().at(ir::operation::Select::Input::INPUT2)};
-
-  const auto output_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor(
-      _ctx.at(output_index), _current_op_seq_layout);
-  const auto cond_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor(
-      _ctx.at(cond_index), _current_op_seq_layout);
-  const auto input1_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor(
-      _ctx.at(input1_index), _current_op_seq_layout);
-  const auto input2_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor(
-      _ctx.at(input2_index), _current_op_seq_layout);
+  const auto output_index{node.getOutputs().at(0)};
+  const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
+  const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
+  const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto cond_alloc = _tensor_builder->at(cond_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
-  auto input2_alloc = _tensor_builder->at(input2_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
+  auto true_tensor = _tensor_reg->getPortableTensor(true_index);
+  auto false_tensor = _tensor_reg->getPortableTensor(false_index);
 
-  auto fn = std::make_unique<::onert::backend::cpu::kernel::SelectLayer>();
+  auto fn = std::make_unique<ops::SelectLayer>();
 
-  fn->configure(cond_alloc->buffer(), cond_backend_descr, input1_alloc->buffer(),
-                input1_backend_descr, input2_alloc->buffer(), input2_backend_descr,
-                output_alloc->buffer(), output_backend_descr);
+  fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
 
-  _execution_builder->append(std::move(fn));
+  _return_fn = std::move(fn);
 }
 ```
 
-### TensorRegister (in some cases)
-
-This component registers tensors. Most tensors will be automatically registered internally. There
-are some exceptions, however, where additional implementations are required. It is the case when a
-tensor is treated unusually in its backend.
-
-The kernel of some operation has weights in `HWIO` as layout(data format) in case of that input's
-layout is `NHWC`. And, for `NCHW`, weights is `OIHW`. But TFLite model has weigths, `OHWI` for
-`NHWC` and `OIHW` for `NCHW`. Therefore, to register the appropriate tensor on the backend, you have
-to implement it additionally.
-
 ### ConstantInitializer (in some cases)
 
 This component registers function initializing constant tensors and initialize constant tensor
-layer. This is similar to TensorRegister. Most tensors will be automatically registered internally.
-And there are some exceptions.
+layer. Most tensors will be automatically registered internally. And there are some exceptions.
 
 #### cpu
 
diff --git a/docs/howto/how-to-use-specific-backend.md b/docs/howto/how-to-use-specific-backend.md
new file mode 100644
index 000000000..a839777a0
--- /dev/null
+++ b/docs/howto/how-to-use-specific-backend.md
@@ -0,0 +1,40 @@
+# How to Use Specific Backend during Inference
+
+ONE runtime has many ways to use specific backend during inference
+
+## Using NNFW API
+
+### [nnfw_set_available_backends](https://github.com/Samsung/ONE/blob/c46ddc04abdb58323fbd38389e6927f003bfaea1/runtime/onert/api/include/nnfw.h#L458)
+- Multiple backends can be set and they must be separated by a semicolon (ex: "acl_cl;cpu").
+- For each backend string, `libbackend_{backend}.so` will be dynamically loaded during nnfw_prepare.
+- Among the multiple backends, the 1st element is used as the default backend.
+
+### [nnfw_set_op_backend](https://github.com/Samsung/ONE/blob/c46ddc04abdb58323fbd38389e6927f003bfaea1/runtime/onert/api/include/nnfw.h#L476)
+- The backend for op has higher priority than available backends specified by nnfw_set_available_backends.
+
+## Using Environment Variable
+
+### 1. BACKENDS
+- Same as `nnfw_set_available_backends`
+- Example
+```bash
+BACKENDS=cpu ./Product/out/bin/onert_run ...
+```
+
+### 2. OP_BACKEND_[OP_TYPE]
+- Same as `nnfw_set_op_backend`
+- Set backend for specific operator type
+- Example
+  - Execute `Conv2D` operator on ruy backend and others on cpu backend
+```bash
+OP_BACKEND_Conv2D=ruy BACKENDS="cpu;ruy" ./Product/out/bin/onert_run ...
+```
+
+### 3. OP_BACKEND_MAP
+- Set backend for specific operator by its index
+- Format : `<op_id>=<backend>;<op_id>=<backend>...`
+- Example
+  - Execute `operator 10` on `acl_cl` backend and others on `acl_neon` backend
+```bash
+OP_BACKEND_MAP="10=acl_cl" BACKENDS="acl_neon;acl_cl" ./Product/out/bin/onert_run ...
+```
diff --git a/docs/howto/index.rst b/docs/howto/index.rst
index d04224ed6..faeedbfaa 100644
--- a/docs/howto/index.rst
+++ b/docs/howto/index.rst
@@ -10,18 +10,22 @@ How To
    :maxdepth: 2
    :caption: Contents:
 
-  ./how-to-add-a-new-operation.md
   ./how-to-build-compiler.md
   ./how-to-build-package.md
   ./how-to-build-runtime.md
+  ./how-to-build-runtime-tizen-gbs-rpi4.md
   ./how-to-build-runtime-using-prebuilt-docker-image.md
-  ./how-to-cross-build-runtime-for-arm.md
   ./how-to-cross-build-runtime-for-aarch64.md
   ./how-to-cross-build-runtime-for-android.md
-  ./how-to-contribute.md
-  ./how-to-make-an-application-with-runtime.md
-  ./how-to-remote-debugging-with-visual-studio-code.md
+  ./how-to-cross-build-runtime-for-arm.md
   ./how-to-run-package.md
+  ./how-to-make-an-application-with-runtime.md
   ./how-to-use-api.md
-  ./how-to-use-nnfw-api.md
   ./how-to-use-nnapi-binding.md
+  ./how-to-use-nnfw-api.md
+  ./how-to-use-specific-backend.md
+  ./how-to-contribute.md
+  ./how-to-remote-debugging-with-visual-studio-code.md
+  ./how-to-add-a-new-operation.md
+  ./how-to-introduce-a-new-operation-into-compiler.md
+  ./how-to-introduce-a-new-operation-into-runtime.md
diff --git a/docs/release/1.10/index.rst b/docs/release/1.10/index.rst
new file mode 100644
index 000000000..28325e685
--- /dev/null
+++ b/docs/release/1.10/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 14 18:13:12 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.10
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.10.0.md
diff --git a/docs/release/1.10/release-note-1.10.0.md b/docs/release/1.10/release-note-1.10.0.md
new file mode 100644
index 000000000..e6423be6f
--- /dev/null
+++ b/docs/release/1.10/release-note-1.10.0.md
@@ -0,0 +1,25 @@
+# Release Note 1.10.0
+
+## ONE Compiler
+
+### Compiler supports more operations
+
+- Dequantize,  UnidirectionalSequenceLSTM
+
+### Changes
+
+- New `--fold_dequantize` option in `one-optimize`
+- New `--fuse_add_with_tconv` option in `one-optimize`
+- Support `int16` quantization in `one-quantize`
+- Test scripts are added for basic testing of one-cmds command line tools
+- Bug fixes for one-cmds command line tools
+
+
+## ONE Runtime
+
+### Runtime backend operation support
+  - ACL-CL backend: OneHot
+  - CPU backend: FullyConnected for Float32 16x1 Block Sparsity
+
+### Optimization
+  - Speed up for ReduceSum, StrideSlice and BinaryArithmetic in CPU backend
diff --git a/docs/release/1.11/index.rst b/docs/release/1.11/index.rst
new file mode 100644
index 000000000..d08c88b29
--- /dev/null
+++ b/docs/release/1.11/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 14 18:13:12 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.11
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.11.0.md
diff --git a/docs/release/1.11/release-note-1.11.0.md b/docs/release/1.11/release-note-1.11.0.md
new file mode 100644
index 000000000..4b0bf4e40
--- /dev/null
+++ b/docs/release/1.11/release-note-1.11.0.md
@@ -0,0 +1,21 @@
+# Release Note 1.11.0
+
+## ONE Compiler
+
+### Compiler supports more operations
+- MaxPoolWithArgMax by CustomOp
+
+### Changes 
+- `one-build` command added as representative command
+- one-cmds are now revised to python script and supports configuration file as input parameters
+- added `rawdata2hdf5` tool to help creating input datasets for calibration
+- added more optimization passes in `one-optimize`; `fuse_preactivation_batchnorm`, `make_batchnorm_gamma_positive` and `fuse_activation_function`
+
+## ONE Runtime
+
+### Runtime backend operation supports more operations and types
+
+- CPU backend
+  - float: AddN, Floor, UniDirectionalSequenceLSTM
+  - uint8: Dequantize, Rank
+  - int8: Dequantize, Rank, Shape
diff --git a/docs/release/1.12/index.rst b/docs/release/1.12/index.rst
new file mode 100644
index 000000000..61465ee7b
--- /dev/null
+++ b/docs/release/1.12/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 14 18:13:12 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.12
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.12.0.md
diff --git a/docs/release/1.12/release-note-1.12.0.md b/docs/release/1.12/release-note-1.12.0.md
new file mode 100644
index 000000000..1f13bc4ce
--- /dev/null
+++ b/docs/release/1.12/release-note-1.12.0.md
@@ -0,0 +1,28 @@
+# Release Note 1.12.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Add optimization pass: ReplaceMulAddWithDepthwiseConvPass, SubstitutePackToReshape, RemoveRedundantTranspose, ShuffleWeightTo16x1Float32Pass
+- Add quantization for InstanceNorm.
+- Fix bug of `one-import-bcq` command for `--v1`, `--v2` arguments.
+- Fix FuseBCQPass to work with inter-subgraphs in the model file and minor BCQ related optimizations.
+
+## ONE Runtime
+
+### Runtime backend operation supports more operations and types
+
+- CPU backend
+  - Concat: int8
+  - DepthToSpace: float, uint8, int8
+  - LeakyRelu: float
+- ACL-CL backend
+  - ArgMin: float, uint8, int8
+- ACL-NEON backend
+  - ArgMax: int8
+  - ArgMin: float, uint8, int8
+
+### nnpackage defines configuration file
+
+- Allow users to set configuration variable via conf file. For more information, See [nnpackage spec](../../../nnpackage/spec)
diff --git a/docs/release/1.13/index.rst b/docs/release/1.13/index.rst
new file mode 100644
index 000000000..5601f743b
--- /dev/null
+++ b/docs/release/1.13/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Wed Jan 14 16:48:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.13
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.13.0.md
diff --git a/docs/release/1.13/release-note-1.13.0.md b/docs/release/1.13/release-note-1.13.0.md
new file mode 100644
index 000000000..31e3a0d77
--- /dev/null
+++ b/docs/release/1.13/release-note-1.13.0.md
@@ -0,0 +1,12 @@
+# Release Note 1.13.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Add optimization pass: ConvertNCHWToNHWC, FoldSparseToDensePass, FuseBatchNormWithConvPass, ForwardReshapeToUnaryOpPass, RemoveUnnecessarySlicePass, RemoveUnnecessarySplitPass,  RemoveUnnecessaryReshapePass, RemoveRedundantReshape, SubstituteTransposeToReshapePass, SubstituteSqueezeToReshapePass, 
+- Support more operators: FAKE_QUANT
+- Enhancements: Support auto generated random input for record-minmax (for better quantization testing)
+- Changes: `--all` option to `--O1` in circle2circle(and one-optimize)
+- Fixes: `tf2tfliteV2` accept input shapes `--v2` option, lots of fixes for increase test coverage
+- Experimental: Compile ONNX models to circle
diff --git a/docs/release/1.14/index.rst b/docs/release/1.14/index.rst
new file mode 100644
index 000000000..558229e7f
--- /dev/null
+++ b/docs/release/1.14/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu Mar 18 16:47:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.14
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.14.0.md
diff --git a/docs/release/1.14/release-note-1.14.0.md b/docs/release/1.14/release-note-1.14.0.md
new file mode 100644
index 000000000..7c567b0fe
--- /dev/null
+++ b/docs/release/1.14/release-note-1.14.0.md
@@ -0,0 +1,10 @@
+# Release Note 1.14.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- `one-codegen` interface now distinguishes own arguments from backend's.
+- Adds `RemoveUnnecessaryStridedSlice` optimization pass.
+- Introduces experimental support for generating profile data.
+  - Adds `--generate_profile_data` option to `one-optimize`, `one-quantize`.
diff --git a/docs/release/1.15/index.rst b/docs/release/1.15/index.rst
new file mode 100644
index 000000000..de2194742
--- /dev/null
+++ b/docs/release/1.15/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu Mar 18 16:47:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.15
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.15.0.md
diff --git a/docs/release/1.15/release-note-1.15.0.md b/docs/release/1.15/release-note-1.15.0.md
new file mode 100644
index 000000000..106cefd42
--- /dev/null
+++ b/docs/release/1.15/release-note-1.15.0.md
@@ -0,0 +1,42 @@
+# Release Note 1.15.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Support more Ops for quantization
+- Fix `record-minmax` tool for bool type, NaN values
+- Fix `one-cmds` test scripts
+- Remove `stdex` module
+- `arser` supports short option
+
+
+## ONE Runtime
+
+### Runtime backend supports more operations and types
+
+- CPU backend
+  - Add: int8
+  - AvgPool2d: int8
+  - Conv2D: int8
+  - DepthwiseConv2D: int8
+  - Div: uint8
+  - Elu: float
+  - ExpandDims: int8
+  - LogicalAnd: boolean
+  - Maximum: uint8
+  - MaxPool2D: int8
+  - Minimum: uint8
+  - Mul: int8 
+  - Pad: int8
+  - PadV2: int8
+  - Quantize: uint8, int8
+  - Reshape: int8
+  - Resizebiliear: int8
+  - Softmax: int8
+  - Squeeze: int8
+  - Sub: int8
+
+### ARM Compute Library Update 
+
+- ONERT uses Compute Library v21.02
diff --git a/docs/release/1.16/index.rst b/docs/release/1.16/index.rst
new file mode 100644
index 000000000..08f4c3e65
--- /dev/null
+++ b/docs/release/1.16/index.rst
@@ -0,0 +1,14 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 20 12:56:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.16
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.16.0.md
+  ./release-note-1.16.1.md
diff --git a/docs/release/1.16/release-note-1.16.0.md b/docs/release/1.16/release-note-1.16.0.md
new file mode 100644
index 000000000..be492033c
--- /dev/null
+++ b/docs/release/1.16/release-note-1.16.0.md
@@ -0,0 +1,17 @@
+# Release Note 1.16.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Enable `PadV2` in luci-interpreter and quantization
+- Provide `circle-tensordump`, `circledump` as a development tool
+- Provide `luci-eval-driver` as test tool
+- Enable `STRING` type as constant values in CircleConst
+- Fix CircleCustom may have 0 input, 0 output
+- Enable debian package generation
+- More optimization pass
+   - Min(6)+ReLU to ReLU6
+   - Remove FakeQuant Op
+- Experimental support of ONNX upgraded to version 1.8.0 with additional patch
+- Fix bugs where one-cmds' config file didn't evaluate boolean properly
diff --git a/docs/release/1.16/release-note-1.16.1.md b/docs/release/1.16/release-note-1.16.1.md
new file mode 100644
index 000000000..e357b9589
--- /dev/null
+++ b/docs/release/1.16/release-note-1.16.1.md
@@ -0,0 +1,7 @@
+# Release Note 1.16.1
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Extends the point where `one-codegen` finds backends.
diff --git a/docs/release/1.17/index.rst b/docs/release/1.17/index.rst
new file mode 100644
index 000000000..c2eda6020
--- /dev/null
+++ b/docs/release/1.17/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 20 12:56:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.17
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.17.0.md
diff --git a/docs/release/1.17/release-note-1.17.0.md b/docs/release/1.17/release-note-1.17.0.md
new file mode 100644
index 000000000..20f8f7ec5
--- /dev/null
+++ b/docs/release/1.17/release-note-1.17.0.md
@@ -0,0 +1,24 @@
+# Release Note 1.17.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- More optimization pass
+  - Remove Quant-Dequant sequence
+  - Replace Sub with Add
+  - Substitute StridedSlice to Reshape
+  - Fuse Mean with Mean
+  - Fuse Transpose with Mean
+  - Substitute PadV2 to Pad
+- Add new InstanceNorm pattern in `FuseInstanceNormPass`
+- Add verbose option
+- Introduce `onecc` driver to `one-cmds`
+- Introduce `one-profile` driver to `one-cmds`
+
+## ONE Runtime
+
+### gpu_cl backend added
+
+- New backend(gpu_cl) added. This backend exploits tensorflow lite's gpu delegate.
+- This backend supports the following operations : Add, Convolution, Depthwise Convolution, Pooling, Reshape, Relu, Softmax 
diff --git a/docs/release/1.18/index.rst b/docs/release/1.18/index.rst
new file mode 100644
index 000000000..71c46585a
--- /dev/null
+++ b/docs/release/1.18/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Fri Oct 20 15:20:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.18
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.18.0.md
diff --git a/docs/release/1.18/release-note-1.18.0.md b/docs/release/1.18/release-note-1.18.0.md
new file mode 100644
index 000000000..a10f10e37
--- /dev/null
+++ b/docs/release/1.18/release-note-1.18.0.md
@@ -0,0 +1,11 @@
+# Release Note 1.18.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- More optimization pass
+  - Fold DepthwiseConv2D
+  - Substitute SplitV to Split
+  - Expand BroadCast Const
+  - Force QuantParam
diff --git a/docs/release/1.19/index.rst b/docs/release/1.19/index.rst
new file mode 100644
index 000000000..c80782ce8
--- /dev/null
+++ b/docs/release/1.19/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Wed Nov 10 15:21:13 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.19
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.19.0.md
diff --git a/docs/release/1.19/release-note-1.19.0.md b/docs/release/1.19/release-note-1.19.0.md
new file mode 100644
index 000000000..e63d8706c
--- /dev/null
+++ b/docs/release/1.19/release-note-1.19.0.md
@@ -0,0 +1,8 @@
+# Release Note 1.19.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- `circle-quantizer` supports input/output type option
+- Introduce configuration file for optimization options
diff --git a/docs/release/1.20/index.rst b/docs/release/1.20/index.rst
new file mode 100644
index 000000000..082d867f3
--- /dev/null
+++ b/docs/release/1.20/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Tue Apr 26 10:18:12 2022.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.20
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.20.0.md
diff --git a/docs/release/1.20/release-note-1.20.0.md b/docs/release/1.20/release-note-1.20.0.md
new file mode 100644
index 000000000..2c75e06af
--- /dev/null
+++ b/docs/release/1.20/release-note-1.20.0.md
@@ -0,0 +1,34 @@
+# Release Note 1.20.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- luci-interpreter supports multiple kernels with PAL layer including Cortext-M
+- luci-interpreter supports integer tensor for partly kernels
+- luci import support constant without coping to reduce memory for luci-interpreter
+- Reduce duplicate codes to package released modules
+- Limited support for ONNX LSTM/RNN unrolling while importing
+- Limited support for ARM32 cross build
+- Support new operator: SVDF
+- New virtual CircleVariable to support tensor with variable
+- Support quantization of BatchMatMul Op
+- Support mixed(UINT8 + INT16) quantization
+- Support backward propagation of quantization parameters
+- Upgrade default python to version 3.8
+- Support TensorFlow 2.8.0, ONNX-TF 1.10.0, ONNX 1.11.0
+- Upgrade circle schema to follow tflite schema v3b
+- Refactor to mio-tflite280, mio-circle04 with version and helpers methods
+- Use one flatbuffers 2.0 version
+- Drop support for TensorFlow 1.x
+- Fix for several bugs, performance enhancements, and typos
+
+## ONE Runtime
+
+### Introduce TRIX backend
+- TRIX backend supports trix binary with NHWC layout
+- TRIX backend supports trix binary with input/output of Q8 and Q16 type
+
+### API supports new data type
+- Symmetric Quantized int16 type named "NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED"
+
diff --git a/docs/release/1.21/index.rst b/docs/release/1.21/index.rst
new file mode 100644
index 000000000..587065f56
--- /dev/null
+++ b/docs/release/1.21/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Wed Sep 06 12:18:12 2022.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.21
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.21.0.md
diff --git a/docs/release/1.21/release-note_1.21.0.md b/docs/release/1.21/release-note_1.21.0.md
new file mode 100644
index 000000000..49bf074b6
--- /dev/null
+++ b/docs/release/1.21/release-note_1.21.0.md
@@ -0,0 +1,35 @@
+# Release Note 1.21.0
+
+## ONE Compiler
+
+- Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool
+- Introduced new tools `one-infer`, `circle-operator`, `circle-interpreter`
+- Introduced `Workflow`(WIP) in `one-cmds`
+- New option `quant_config` in `one-quantize`
+- New option `fake_quantize` in `one-quantize`
+- More Ops supported: Densify
+- More Ops for quantization: ReduceMax
+- More Ops for mixed-precision quantization (MPQ): LeakyRelu, Neg, Relu6, Squeeze
+- More Ops for `convert_nchw_to_nhwc` option: LogSoftmax, ReduceMax, SplitV, Softmax
+- New optimization options in `one-optimize`: `replace_non_const_fc_with_bmm`, `resolve_customop_splitv`, `fold_densify`
+- Improved reshape elimination in `convert_nchw_to_nhwc` option.
+- Support fusion of Channel-wise Add + Relu with TConv
+- Support negative axis in ArgMin/Max
+- Show errors for unrecognized options in `one-optimize`
+- Fix shape inference for `StridedSlice`
+- Fix FuseBatchNormWithTConvPass to support TConv with bias
+- Deprecate `--O1` option in `circle2circle`
+- Support gcc-11
+- Support limited Float16 for kernels constants with dequantization to Float32
+
+## ONE Runtime
+
+### Basic Multimodel nnpackage
+- Runtime supports to run nnpackage with two models
+
+### Channel Wise Quantization on Conv2D and Depthwise Conv2D
+- Conv2D and Depthwise Conv2D supports per-channel quantization of uint8 type. 
+
+### Batch Execution with TRIX backend
+- TRIX backend supports batch execution which run in parallel with multicore
+
diff --git a/docs/release/1.22/index.rst b/docs/release/1.22/index.rst
new file mode 100644
index 000000000..e3c330df6
--- /dev/null
+++ b/docs/release/1.22/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Fri Mar 24 14:03:12 2023.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.22
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.22.0.md
diff --git a/docs/release/1.22/release-note-1.22.0.md b/docs/release/1.22/release-note-1.22.0.md
new file mode 100644
index 000000000..2e6dee74a
--- /dev/null
+++ b/docs/release/1.22/release-note-1.22.0.md
@@ -0,0 +1,12 @@
+# Release Note 1.22.0
+
+## ONE Compiler
+
+- Introduce new optimization options: `unroll_unidirseqlstm`, `forward_transpose_op`, `fold_fully_connected`, `fuse_prelu`
+- Support more Ops for fake quantization: `Depth2Space`, `Space2Depth`, `Pack`, `Unpack`, `Abs`
+- Support more Ops for quantization: `Abs`, `ReduceProd`
+- Introduce _visq_ tool for quantization error visualization
+- Introduce _Environment_ section into configuration file
+- Improve speed of `convert_nchw_to_nhwc` option
+- Support `Add`, `Mul` of index-type (int32, int64) tensors in _one-quantize_
+- Support ubuntu 20.04
diff --git a/docs/release/1.22/release-note-1.22.1.md b/docs/release/1.22/release-note-1.22.1.md
new file mode 100644
index 000000000..c76eb2688
--- /dev/null
+++ b/docs/release/1.22/release-note-1.22.1.md
@@ -0,0 +1,9 @@
+# Release Note 1.22.1
+
+## ONE Runtime
+
+### Multimodel nnpackage
+
+- Runtime supports to run nnpackage with 3 or more models
+- Runtime supports to run multimodel nnpackage with multiple subgraphs
+- Runtime supports type casting when tensor's data type between edge is different
diff --git a/docs/release/1.23/index.rst b/docs/release/1.23/index.rst
new file mode 100644
index 000000000..ae29d5226
--- /dev/null
+++ b/docs/release/1.23/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 18 19:07:17 2023.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.23
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.23.0.md
diff --git a/docs/release/1.23/release-note-1.23.0.md b/docs/release/1.23/release-note-1.23.0.md
new file mode 100644
index 000000000..b5a3d1b9b
--- /dev/null
+++ b/docs/release/1.23/release-note-1.23.0.md
@@ -0,0 +1,8 @@
+# Release Note 1.23.0
+
+## ONE Compiler
+
+- Support more Op(s): GeLU
+- Support more option(s): `--fuse-gelu`
+- Support multiple backends compilation with a single configuration file
+- Upgrade Circle schema to 0.5
diff --git a/docs/release/1.24/index.rst b/docs/release/1.24/index.rst
new file mode 100644
index 000000000..fa1698820
--- /dev/null
+++ b/docs/release/1.24/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu Jul 18 14:08:15 2023.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.24
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.24.0.md
diff --git a/docs/release/1.24/release-note-1.24.0.md b/docs/release/1.24/release-note-1.24.0.md
new file mode 100644
index 000000000..9020da8aa
--- /dev/null
+++ b/docs/release/1.24/release-note-1.24.0.md
@@ -0,0 +1,9 @@
+# Release Note 1.24.0
+
+## ONE Compiler
+
+- Introduce _one-import-onnx_ extension interface
+- _onecc_ supports profiling of multiple backends with a single cfg file
+- Enable more Quantize operator: FloorMod, Squeeze
+- _visq_ supports multi-out nodes
+- _onecc_ introduces `dynamic_batch_to_single_batch option` option.
diff --git a/docs/release/1.25/index.rst b/docs/release/1.25/index.rst
new file mode 100644
index 000000000..1965b42e8
--- /dev/null
+++ b/docs/release/1.25/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Wed Sep 06 12:18:12 2022.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.25
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.25.0.md
diff --git a/docs/release/1.25/release-note_1.25.0.md b/docs/release/1.25/release-note_1.25.0.md
new file mode 100644
index 000000000..3d62c6bde
--- /dev/null
+++ b/docs/release/1.25/release-note_1.25.0.md
@@ -0,0 +1,17 @@
+# Release Note 1.25.0
+
+## ONE Runtime
+
+- Support ubuntu 20.04
+
+### CPU Backend Operation
+- CPU backend supports per-channel hybrid quantization of int8 type weight and float activation. (TFLite's dynamic range quantization)
+
+### On-device Quantization
+- _onert_ supports new experimental API for on-device quantization.
+- As the 1st step, _onert_ supports per-channel hybrid quantization of int8/int16 type weight and float activation.
+- API requires file path to export quantized model.
+
+### Minmax Recorder
+- _onert_` support minmax recording of each layer as experimental feature. It is not supported by API yet.
+- Output file format is HDF5. (File format may change later).
diff --git a/docs/release/1.5/index.rst b/docs/release/1.5/index.rst
new file mode 100644
index 000000000..6e7810280
--- /dev/null
+++ b/docs/release/1.5/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 14 18:13:12 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.5
+===
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.5.0.md
diff --git a/docs/release/1.6/index.rst b/docs/release/1.6/index.rst
new file mode 100644
index 000000000..3f380b5f8
--- /dev/null
+++ b/docs/release/1.6/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 14 18:13:12 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.6
+===
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.6.0.md
diff --git a/docs/release/1.7/index.rst b/docs/release/1.7/index.rst
new file mode 100644
index 000000000..234e4b6ec
--- /dev/null
+++ b/docs/release/1.7/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 14 18:13:12 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.7
+===
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.7.0.md
diff --git a/docs/release/1.7/release-note-1.7.0.md b/docs/release/1.7/release-note-1.7.0.md
new file mode 100644
index 000000000..c1a4f50b2
--- /dev/null
+++ b/docs/release/1.7/release-note-1.7.0.md
@@ -0,0 +1,46 @@
+## Feature Highlights
+
+- **ONE** Compiler
+  - Compiler supports more operations
+  - New command line interface for user interface consistancy
+- **ONE** Runtime
+  - Runtime CPU backend supports more operations
+  - Runtime CPU backend supports more quant8 operations
+  - API changes
+  - New optimization
+  
+## ONE Compiler
+
+### Compiler supports more operations
+
+- MatrixDiag, MatrixSetDiag, ReverseSequence, ReverseV2, SegmentSum, SelectV2, SparseToDense, Where
+
+### New command line interface for user interface consistancy
+
+- one-import: imports conventional model files to circle
+   - one-import-tf: imports TensorFlow model to circle
+   - one-import-tflite: imports TensorFlow lite model to circle
+- one-optimize: circle optimize command
+- one-quantize: circle quantize command
+   - supports float32 to uint8, layer wise (for Conv series)
+- one-pack: package command
+- one-prepare-venv: prepares python virtual environment for importing TensorFlow model
+- one-codegen: backend(if available) code generator
+
+## ONE Runtime
+
+### Runtime CPU backend supports more operations
+
+- LogSoftmax, SpaceToBatchND
+
+### Runtime CPU backend supports more quant8 operations
+
+- Logistic, Mul, Tanh, SpaceToBatchND, Transpose, Sub, Max, Min, Less, Greater, GreaterEqual, LessEqual, Equal, NotEqual
+
+### API changes
+
+- Introduce basic asynchronous execution API
+
+### New optimization
+    
+- Remove dynamic tensor overhead from static models
diff --git a/docs/release/1.8/index.rst b/docs/release/1.8/index.rst
new file mode 100644
index 000000000..022e00ba5
--- /dev/null
+++ b/docs/release/1.8/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 14 18:13:12 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.8
+===
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.8.0.md
diff --git a/docs/release/1.9/index.rst b/docs/release/1.9/index.rst
new file mode 100644
index 000000000..82f0e1210
--- /dev/null
+++ b/docs/release/1.9/index.rst
@@ -0,0 +1,14 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Thu May 14 18:13:12 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.9
+===
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.9.0.md
+  ./release-note-1.9.1.md
diff --git a/docs/release/1.9/release-note-1.9.1.md b/docs/release/1.9/release-note-1.9.1.md
new file mode 100644
index 000000000..55290cfa1
--- /dev/null
+++ b/docs/release/1.9/release-note-1.9.1.md
@@ -0,0 +1,9 @@
+# Release Note 1.9.1
+
+## ONE Compiler
+
+### Changes
+
+- `tf2nnpkg` now supports to import TensorFlow model which includes BCQ information.
+- Minor change for preserving BCQ information.
+- Fix invalid input arguments and add more error handles for one-cmds
diff --git a/docs/release/index.rst b/docs/release/index.rst
index bb542bc1c..f49d3eac4 100644
--- a/docs/release/index.rst
+++ b/docs/release/index.rst
@@ -15,3 +15,15 @@ Release
   ./1.2/index
   ./1.3/index
   ./1.4/index
+  ./1.5/index
+  ./1.6/index
+  ./1.7/index
+  ./1.8/index
+  ./1.9/index
+  ./1.10/index
+  ./1.11/index
+  ./1.12/index
+  ./1.13/index
+  ./1.14/index
+  ./1.15/index
+  ./1.16/index
diff --git a/docs/release/onert-micro/0.1/release-note-0.1.0.md b/docs/release/onert-micro/0.1/release-note-0.1.0.md
new file mode 100644
index 000000000..5a4692b4f
--- /dev/null
+++ b/docs/release/onert-micro/0.1/release-note-0.1.0.md
@@ -0,0 +1,72 @@
+## Release Notes for onert-micro 0.1.0
+
+_onert-micro_ is tiny runtime specialized for running NN model in MCU boards. Note that onert-micro is under active development and is subject to change.
+
+### Supported operations
+
+For MCU board, we support 22 operations as follows : 
+
+   ADD, FULLY_CONNECTED, CONV_2D, LOGISTIC ,GATHER, EXPAND_DIMS, PACK, RESHAPE, REDUCE_PROD, LESS, MUL, MAX_POOL_2D, CONCATENATION, SHAPE, SLICE, SUB, SPLIT, STRIDED_SLICE, TANH, SOFTMAX, WHILE, UNIDIRECTIONAL_SEQUENCE_LSTM  
+
+### RNN Model
+
+#### LSTM
+
+onert-micro supports Keras model with LSTM operations. But, it should be converted to UNIDIRECTIONAL_SEQUENCE_LSTM operation in circle format.
+
+#### GRU 
+
+onert-micro supports model with GRU Operations, which is converted from Keras Model. Please refer to https://github.com/Samsung/ONE/issues/10465 to see GRU operation supported by onert-micro. 
+
+### Benchmark
+
+onert-micro shows better performance than tflite-micro especially in memory consumption, binary size.
+
+The measurement is done on TizenRT running reference models on the development board with the following spec : 
+
+- 32-bit Arm Cortex-M33 200MHz
+- 4MB RAM, 8MB Flash
+
+Commit for measurement : 
+- tflite-micro commit: https://github.com/tensorflow/tflite-micro/commit/4e62ea7b821c1e6af004912132395fb81922ea8d
+
+- onert-micro commit: https://github.com/Samsung/ONE/commit/c763867500fe3d80bfd1ef834990d34a81640d17
+#### L model
+
+| Params                            | Tflite micro    | Onert-micro |
+|-----------------------------------|---------------|-------------|
+| Execution time(us)*               | **2 912 700** | 2 953 000   |
+| RAM consumption(bytes)            | 126 800       | **93 376**  |
+| Binary file size overhead (bytes) | 57 676        | **32 248**  |
+
+
+#### T1 model
+
+Params | Tflite micro | Onert-micro |
+--- | --- | --- 
+Execution time(us)* | **1 340** | 1 510 | 
+RAM consumption(bytes) | 1 640 | **1 152** |
+Binary file size overhead (bytes) | 35 040 | **19 432** |
+
+#### T2 model
+
+Params | Tflite micro** | Onert-micro |
+--- | --- | --- 
+Execution time(us)* | N/A | 5 090 | 
+RAM consumption(bytes) | N/A | 3 360 |
+Binary file size overhead (bytes) | N/A | 30 488 |
+
+#### Model with GRU operations
+
+- model link : https://github.com/Samsung/ONE/files/8368702/gru.zip
+
+Params | Tflite micro** | Onert-micro |
+--- | --- | --- 
+Execution time(us)* | N/A | 335 000 | 
+RAM consumption(bytes) | N/A | 14 816 |
+Binary file size overhead (bytes) | N/A | 43 444 |
+
+
+(*) Average for 100 inferences
+(**) Tflite-micro has not launched this model
+
diff --git a/docs/release/onert-micro/1.0/release-note-1.0.0.md b/docs/release/onert-micro/1.0/release-note-1.0.0.md
new file mode 100644
index 000000000..a18d55724
--- /dev/null
+++ b/docs/release/onert-micro/1.0/release-note-1.0.0.md
@@ -0,0 +1,12 @@
+## Release Notes for onert-micro 1.0
+
+### Supported operations
+
+More operations are supported as follows: 
+
+- AveragePool2D, Elu, Exp, Abs, Neg, Div, AddN, Relu, Relu6, Leak_Relu, Pad, PadV2, ArgMin, ArgMax, Resize_Bilinear, LogicalAnd, LogicalOr, Equal, NotEqual, Greater, GreaterEqual, LessEqual
+
+### Etc
+
+- Address sanitizer build option(ENABLE_SANITIZER) is added
+- Fix buffer overflow in static analyzer's defect
diff --git a/docs/runtime/api.md b/docs/runtime/api.md
index 3ff9ff056..9dacd2868 100644
--- a/docs/runtime/api.md
+++ b/docs/runtime/api.md
@@ -28,8 +28,6 @@ For usage, refer to [Howto : NNFW API](../howto/how-to-use-nnfw-api.md).
 
 ## Backend API
 
-Backend API is defined by One Runtime.
-
-Backend API is about actual computation of operations and memory management for operands. In order to allow different kinds of computation units or computation libraries, One Runtime defines Backend API to support user defined operation kernels and memory manager. It contains a lot of C++ headers which are subject to change.
+Backend API enables anyone to extend the runtime in terms of operation computation and memory management.
 
 For detailed descriptions, refer to [Backend API](../runtime/backend-api.md).
diff --git a/docs/runtime/backend-api.md b/docs/runtime/backend-api.md
index b291badcf..54ef87090 100644
--- a/docs/runtime/backend-api.md
+++ b/docs/runtime/backend-api.md
@@ -1 +1,65 @@
 # Backend API
+
+Backend API is defined by One Runtime. It is about actual computation of operations and memory management for operands. In order to allow different kinds of computation units or libraries, Backend API is exposed to support user defined operation kernels and memory manager. It contains several C++ interface classes which are **subject to change**.
+
+## How backends are loaded
+
+When a backend ID is given to a session, the compiler module tries to load `libbackend_{BACKEND_ID}.so`. If it is successful, the runtime looks up for C API functions in it, and make use of those.
+
+## C and C++ API
+
+### C API
+
+We have 2 C API functions which are used as the entrypoint and the exitpoint. Here are the definitions of those.
+
+```c
+onert::backend::Backend *onert_backend_create();
+void onert_backend_destroy(onert::backend::Backend *backend);
+```
+
+What they do is creating a C++ object and destroying it, respectively. These two functions are the only ones that are dynamically resolved at runtime.
+
+### C++ API
+
+> **NOTE** C++ API is subject to change so it may change in every release
+
+C API above is just an entrypoint and it delegates core stuff to C++ API.
+
+Here are major classes are described below. One must implement these classes(and some more classes) to create a backend.
+
+- `Backend` : Responsible to create a backend context which is a set of backend components
+- `BackendContext` : Holds data for the current session and also responsible to create tensor objects and kernels
+  - `BackendContext::genTensors` : Create tensor objects
+  - `BackendContext::genKernels` : Create kernels
+- `IConfig` : Configurations and miscellaneous stuff (not session based, global)
+- `ITensorRegistry` : A set of tensor(`ITensor`) objects that are used by the current backend
+
+Please refer to each class document for details. You may refer to [Bundle Backends](#bundle-backends) for actual implementation samples.
+
+## Provided Backend Implementations
+
+We provide some backends along with the runtime. There is the special backend `builtin` which is part of runtime core, and some bundle backends which are baseline backends and samples of backend implementation.
+
+## `builtin` Backend
+
+`builtin` is a special backend that is always loaded(statically linked, part of runtime core). It is implemented just like other backends, but there are some things that it does exclusively.
+
+- Has kernels for If, While and Permute operations (Kernels from other backends are never be used)
+- The runtime core directly creates `builtin`'s tensor objects to accept user-given input and output buffers
+- The runtime core gives the executor context to `builtin` backend which allows control flow ops can change execution flow properly
+
+## Bundle Backends
+
+Without actual implmentation of backends, we cannot run any models. So we provide 3 bundle backends which support dozens of operations.
+
+### cpu
+
+This backend is written in C++ and all the computation is done with CPU only.
+
+### acl_neon
+
+`acl_neon` is a backend that is an adaptation layer of [ARM ComputeLibrary](https://github.com/ARM-software/ComputeLibrary) NE(NEON) part. So it basically only uses CPU too, but worksonly on ARM.
+
+### acl_cl
+
+`acl_cl` is a backend that is an adaptation layer of [ARM ComputeLibrary](https://github.com/ARM-software/ComputeLibrary) CL(OpenCL) part. OpenCL support(`libOpenCL.so`) is also necessary in the running environment to be able to use this backend. Also, it works only on ARM.
diff --git a/docs/runtime/controlflow-operations.md b/docs/runtime/controlflow-operations.md
new file mode 100644
index 000000000..8c456b23a
--- /dev/null
+++ b/docs/runtime/controlflow-operations.md
@@ -0,0 +1,40 @@
+# Controlflow Operations
+
+We call `If` and `While` operations "Controlflow operations". These operations are different from the others. They are not for computing data, they are used to invoke another subgraph and return back which is to make conditional/iterations work in dataflow models.
+
+## Defining controlflow operations
+
+As we use Tensorflow Lite schema(or Circle which is based on TF Lite), the runtime follows the way TF Lite does. The details are stated in [Control Flow in TensorFlow Lite](https://github.com/tensorflow/community/blob/master/rfcs/20190315-tflite-control-flow.md) RFC document.
+
+Controlflow operations from NN API is not yet supported. But we expect that it can be enabled in the similar way.
+
+## Implementation
+
+### Graph representation
+
+`onert` internally has its representation for controlflow operations and subgraphs. It is pretty much straightforward as it is pretty much isomorphic with the schema. The `onert`'s in-memory model contains multiple subgraphs and the controlflow operations have same parameters(subgraph indices) just like TF Lite schema has.
+
+### Execution
+
+`controlflow` backend is a built-in backend to support these controlflow operations. This backend is special as it has access to `onert` core's executor manager(`ExecutorMap`) so it can invoke/return a subgraph. This backend has implementation for `If` and `While` operation and they make use of the access to executor manager.
+
+An `Executor` has two different ways to execute depending on if it is the initial execution or invoking a subgraph from a controlflow operation.
+
+- Executing the primary subgraph
+    - Pass user-given tensors as the subgraph inputs and outputs
+- Executing a subgraph for controlflow operations
+    - Pass controlflow operation inputs tensors as the subgraph inputs
+    - Pass the subgraph outputs as controlflow operation outputs
+
+#### Kernel Implementation
+
+Here is brief explanation what the kernels do, which is quoted from [Control Flow in TensorFlow Lite](https://github.com/tensorflow/community/blob/master/rfcs/20190315-tflite-control-flow.md).
+
+> * `If` : Check the condition input and invoke one of the 2 subgraphs.
+> * `While` :
+>     * Invoke the condition subgraph. Break out the loop if result is false.
+>     * Invoke the body subgraph, use the output as the input of the next iteration.
+
+Invoking a subgraph needs to pass the operation's inputs to the subgraph inputs. And Returning back needs to pass the subgraph outputs to the operation outputs.
+
+When invoking a subgraph and returning back, the current kernel implementation performs literally copy all the subgraph inputs and outputs. This is going to be optimized to minimize redundant copies.
diff --git a/docs/runtime/index.rst b/docs/runtime/index.rst
index 4d12dc37b..e80dfc81e 100644
--- a/docs/runtime/index.rst
+++ b/docs/runtime/index.rst
@@ -12,7 +12,9 @@ Runtime
 
   ./api.md
   ./core.md
-  ./compute.md
+  ./controlflow-operations.md
   ./executors.md
-  ./backend-api.md
   ./heterogeneous-execution.md
+  ./backend-api.md
+  ./compute.md
+  ./supported-operations-backend.md
diff --git a/docs/runtime/supported-operations-backend.md b/docs/runtime/supported-operations-backend.md
new file mode 100644
index 000000000..b5b5c6268
--- /dev/null
+++ b/docs/runtime/supported-operations-backend.md
@@ -0,0 +1,208 @@
+# Supported Operations and backend
+
+As of 2021-03-08
+
+### Raw-data format (float32, int32, boolean, etc)
+
+Operation | CPU | ACL-CL | ACL-NEON
+-- | -- | -- | --
+Abs | O | O | O
+Add | O | O | O
+AddN | O |   |
+ArgMax | O | O | O
+ArgMin | O | O | O
+AvgPool2D | O | O | O
+BatchMatmul | O |   |
+BatchToSpaceND | O | O | O
+BroadcastTo | O |   |
+Cast | O | O | O
+Concat | O | O | O
+Conv2D | O | O | O
+Cos | O |   |
+Custom | O |   |
+DepthToSpace | O | O | O
+DepthwiseConv2D | O | O | O
+Div | O | O | O
+Einsum | O |   |
+Elu | O |   |
+EmbeddingLookup |   | O | O
+Equal | O | O | O
+Exp | O | O | O
+ExpandDims | O | O | O
+Fill | O |   |
+Floor | O | O | O
+FullyConnected | O | O | O
+FusedBatchNorm | O |   |
+Gather | O | O | O
+Greater | O | O | O
+GreaterEqual | O | O | O
+HashtableLookup |   | O | O
+If | O |   |
+InstanceNormalize |   | O | O
+L2Normalization | O | O | O
+L2Pool |   | O | O
+LeakyRelu | O | O | O
+Less | O | O | O
+LessEqual | O | O | O
+LocalResponseNormalize |   | O | O
+Log | O |   |
+LogicalAnd | O | O | O
+LogicalNot | O | O | O
+LogicalOr | O | O | O
+Logistic | O | O | O
+LogSoftmax | O |   |
+LSTM |   | O | O
+MatrixBandPart | O |   |
+Maximum | O | O | O
+MaxPool2D | O | O | O
+Mean | O | O | O
+Minimum | O | O | O
+Mul | O | O | O
+Neg | O | O | O
+NotEqual | O | O | O
+OneHot | O | O |
+Pack | O | O | O
+Pad | O | O | O
+PadV2 | O | O | O
+Pow | O |   |
+PReLU |   | O | O
+Quantize | O |   |
+Range | O |   |
+Rank | O |   |
+ReduceAny(All) | O |   |
+ReduceAny(Any) | O |   |
+ReduceMax(Max) | O | O | O
+ReduceMin(Min) | O | O | O
+ReduceProd | O |   |
+ReduceSum(Sum) | O | O | O
+ReLU | O | O | O
+ReLU6 | O | O | O
+Reshape | O | O | O
+ResizeBilinear | O | O | O
+ResizeNearestNeighbor |   | O | O
+ReverseV2 | O | O | O
+RNN |   | O | O
+Round | O |   |
+Rsqrt | O | O | O
+Select | O |   |
+SelectV2 | O |   |
+Shape | O |   |
+Sin | O |   |
+Slice | O | O | O
+Softmax | O | O | O
+SpaceToBatchND | O | O | O
+SpaceToDepth | O | O | O
+Split | O | O | O
+SplitV | O | O |
+Sqrt | O | O | O
+Square | O |   |   |
+SquaredDifference | O | O | O
+Squeeze | O | O | O
+StridedSlice | O | O | O
+Sub | O | O | O
+Tanh | O | O | O
+Tile | O |   |
+TopKV2 |   |   | O
+Transpose | O | O | O
+TransposeConv |   | O | O
+Unpack(Unstack) | O | O | O
+UniDirectionalSequenceLSTM | O |   |
+While | O |   |
+ZerosLike | O |   |
+
+### Quantization format (uint8 asymmetric)
+
+Operation | CPU | ACL-CL | ACL-NEON
+-- | -- | -- | --
+Add | O | O | O
+ArgMax | O | O | O
+ArgMin | O | O | O
+AvgPool2D | O | O | O
+BatchToSpaceND | O | O | O
+Cast | O | O |
+Concat | O | O | O
+Conv2D | O | O | O
+Custom | O |   |
+DepthToSpace | O | O | O
+DepthwiseConv2D | O | O | O
+Dequantize | O | O | O
+Div | O |   |
+EmbeddingLookup |   | O | O
+Equal | O | O | O
+Erf | O |   |
+ExpandDims | O | O | O
+FullyConnected | O | O | O
+Gather | O | O | O
+Greater | O | O | O
+GreaterEqual | O | O | O
+HashtableLookup |   | O | O
+L2Normalization | O |   |
+Less | O | O | O
+LessEqual | O | O | O
+Logistic | O | O | O
+LogSoftmax | O |   |
+Maximum | O | O | O
+MaxPool2D | O | O | O
+Mean | O | O | O
+Minimum | O | O | O
+Mul | O | O |
+NotEqual | O | O | O
+Pack |   | O | O
+Pad | O | O | O
+PadV2 | O | O | O
+PReLU |   | O | O
+Quantize | O |   |
+Rank | O |   |
+ReduceMax(Max) |   | O |
+ReduceMin(Min) |   | O |
+ReduceSum(Sum) | O | O |
+ReLU |   | O | O
+ReLU6 |   | O | O
+Reshape | O | O | O
+ResizeBilinear | O | O | O
+ResizeNearestNeighbor |   | O | O
+Shape | O |   |
+Slice | O | O | O
+Softmax | O | O | O
+SpaceToBatchND | O | O | O
+SpaceToDepth | O | O | O
+Split | O | O | O
+SplitV | O | O |
+Squeeze | O | O | O
+StatelessRandomUniform | O |   |
+StridedSlice |   | O | O
+Sub | O | O | O
+Tanh | O | O | O
+Tile | O |   |
+Transpose | O | O | O
+TransposeConv |   | O | O
+Unpack(Unstack) |   | O | O
+
+### Quantization format (int8)
+
+Operation | CPU | ACL-CL | ACL-NEON
+-- | -- | -- | --
+Add | O | O | O
+ArgMax | O | O | O
+ArgMin | O | O | O
+AvgPool2D | O |   |
+Concat | O | O | O
+Conv2D | O |   |
+DepthToSpace | O |   |
+DepthwiseConv2D | O |   |
+Dequantize | O | O | O
+ExpandDims | O | O | O
+MaxPool2D | O |   |
+Mul | O | O | O
+Pad | O | O | O
+PadV2 | O |   |
+PReLU |   | O | O
+Quantize | O |   |
+Rank | O |   |
+Reshape | O | O | O
+ResizeBilinear | O | O | O
+ResizeNearestNeighbor |   | O | O
+Shape | O |   |
+Softmax | O | O | O
+Squeeze | O | O | O
+Sub | O | O | O
diff --git a/infra/cmake/modules/ExternalBuildTools.cmake b/infra/cmake/modules/ExternalBuildTools.cmake
index 4f2027b4b..557e6f47d 100644
--- a/infra/cmake/modules/ExternalBuildTools.cmake
+++ b/infra/cmake/modules/ExternalBuildTools.cmake
@@ -14,7 +14,6 @@ function(ExternalBuild_CMake)
                         ${ARGN}
   )
 
-  set(BUILD_STAMP_PATH "${ARG_BUILD_DIR}/${ARG_PKG_NAME}.stamp")
   set(BUILD_LOG_PATH "${ARG_BUILD_DIR}/${ARG_PKG_NAME}.log")
   set(INSTALL_STAMP_PATH "${ARG_INSTALL_DIR}/${ARG_PKG_NAME}.stamp")
   set(INSTALL_LOG_PATH "${ARG_INSTALL_DIR}/${ARG_PKG_NAME}.log")
@@ -24,14 +23,6 @@ function(ExternalBuild_CMake)
     set(PKG_IDENTIFIER "${ARG_IDENTIFIER}")
   endif(DEFINED ARG_IDENTIFIER)
 
-  # NOTE Do NOT retry build once it fails
-  if(EXISTS ${BUILD_STAMP_PATH})
-    file(READ ${BUILD_STAMP_PATH} READ_IDENTIFIER)
-    if("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
-      return()
-    endif("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
-  endif(EXISTS ${BUILD_STAMP_PATH})
-
   # NOTE Do NOT build pre-installed exists
   if(EXISTS ${INSTALL_STAMP_PATH})
     file(READ ${INSTALL_STAMP_PATH} READ_IDENTIFIER)
@@ -42,11 +33,23 @@ function(ExternalBuild_CMake)
 
   message(STATUS "Build ${ARG_PKG_NAME} from ${ARG_CMAKE_DIR}")
 
+  # if we're doing the cross compilation, external project also needs it
+  if(CMAKE_TOOLCHAIN_FILE)
+    set(TOOLCHAIN_FILE ${CMAKE_TOOLCHAIN_FILE})
+    # NOTE CMAKE_TOOLCHAIN_FILE maybe relative path -> make abs folder
+    if(NOT EXISTS ${TOOLCHAIN_FILE})
+      set(TOOLCHAIN_FILE ${CMAKE_SOURCE_DIR}/${CMAKE_TOOLCHAIN_FILE})
+      if(NOT EXISTS ${TOOLCHAIN_FILE})
+        message(FATAL "Failed to find ${CMAKE_TOOLCHAIN_FILE}")
+      endif()
+    endif()
+    message(STATUS "ExternalBuild_CMake TOOLCHAIN_FILE=${TOOLCHAIN_FILE}")
+    list(APPEND ARG_EXTRA_OPTS -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE})
+  endif(CMAKE_TOOLCHAIN_FILE)
+
   file(MAKE_DIRECTORY ${ARG_BUILD_DIR})
   file(MAKE_DIRECTORY ${ARG_INSTALL_DIR})
 
-  file(WRITE "${BUILD_STAMP_PATH}" "${PKG_IDENTIFIER}")
-
   execute_process(COMMAND ${CMAKE_COMMAND}
                             -G "${CMAKE_GENERATOR}"
                             -DCMAKE_INSTALL_PREFIX=${ARG_INSTALL_DIR}
diff --git a/infra/cmake/modules/ExternalSourceTools.cmake b/infra/cmake/modules/ExternalSourceTools.cmake
index 87cb15270..5671ae0c8 100644
--- a/infra/cmake/modules/ExternalSourceTools.cmake
+++ b/infra/cmake/modules/ExternalSourceTools.cmake
@@ -5,7 +5,7 @@ function(ExternalSource_Download PREFIX)
   include(CMakeParseArguments)
   nnas_include(StampTools)
 
-  cmake_parse_arguments(ARG "" "DIRNAME;URL;CHECKSUM" "" ${ARGN})
+  cmake_parse_arguments(ARG "" "DIRNAME;URL;CHECKSUM;PATCH" "" ${ARGN})
 
   # Configure URL
   if(ARG_URL)
@@ -47,19 +47,39 @@ function(ExternalSource_Download PREFIX)
     file(MAKE_DIRECTORY "${TMP_DIR}")
 
     message(STATUS "Download ${PREFIX} from ${URL}")
-    file(DOWNLOAD ${URL} "${DOWNLOAD_PATH}"
-                  STATUS status
-                  LOG log)
 
-    list(GET status 0 status_code)
-    list(GET status 1 status_string)
+    foreach(retry_count RANGE 5)
+      message(STATUS "(Trial Count : ${retry_count})")
 
-    if(NOT status_code EQUAL 0)
-      message(FATAL_ERROR "error: downloading '${URL}' failed
+      # For external mirror server
+      envoption(EXTERNAL_SERVER_USERPWD "")
+      file(DOWNLOAD ${URL} "${DOWNLOAD_PATH}"
+                    STATUS status
+                    USERPWD "${EXTERNAL_SERVER_USERPWD}"
+                    LOG log)
+
+      list(GET status 0 status_code)
+      list(GET status 1 status_string)
+
+      # Download success
+      if(status_code EQUAL 0)
+        break()
+      endif()
+
+      message(WARNING "error: downloading '${URL}' failed
               status_code: ${status_code}
               status_string: ${status_string}
               log: ${log}")
-    endif()
+
+      # Retry limit exceed
+      if(retry_count EQUAL 5)
+        message(FATAL_ERROR "Download ${PREFIX} from ${URL} - failed")
+      endif()
+
+      # Retry after 10 seconds when download fails
+      execute_process(COMMAND sleep 10)
+    endforeach()
+
     message(STATUS "Download ${PREFIX} from ${URL} - done")
 
     # Verify checksum
@@ -86,7 +106,14 @@ function(ExternalSource_Download PREFIX)
 
     message(STATUS "Extract ${PREFIX}")
     execute_process(COMMAND ${CMAKE_COMMAND} -E tar xfz "${DOWNLOAD_PATH}"
-                    WORKING_DIRECTORY "${TMP_DIR}")
+                    WORKING_DIRECTORY "${TMP_DIR}"
+                    RESULT_VARIABLE EXTRACTION_RESULT
+                    ERROR_VARIABLE EXTRACTION_ERROR)
+
+    if(EXTRACTION_RESULT AND NOT EXTRACTION_RESULT EQUAL 0)
+      message(FATAL_ERROR "Extract ${PREFIX} - failed: ${EXTRACTION_ERROR}")
+    endif()
+
     file(REMOVE "${DOWNLOAD_PATH}")
     message(STATUS "Extract ${PREFIX} - done")
 
@@ -100,6 +127,19 @@ function(ExternalSource_Download PREFIX)
     get_filename_component(contents ${contents} ABSOLUTE)
 
     file(RENAME ${contents} "${OUT_DIR}")
+    if(ARG_PATCH)
+      message(STATUS "Patch with ${ARG_PATCH}")
+      execute_process(COMMAND patch -p1 -i ${ARG_PATCH}
+                      WORKING_DIRECTORY ${OUT_DIR}
+                      RESULT_VARIABLE EXEC_RESULT
+                      ERROR_VARIABLE EXEC_ERROR)
+      if(NOT EXEC_RESULT EQUAL 0)
+        message(FATAL_ERROR "${PREFIX} failed patch ${ARG_PATCH}")
+      endif(NOT EXEC_RESULT EQUAL 0)
+
+      message(STATUS "patch ${PATCH_FILE}: ${EXEC_RESULT}, ${EXEC_ERROR}")
+    endif(ARG_PATCH)
+
     file(REMOVE_RECURSE "${TMP_DIR}")
     file(WRITE "${STAMP_PATH}" "${URL}")
     message(STATUS "Cleanup ${PREFIX} - done")
diff --git a/infra/cmake/modules/IdentifyPlatform.cmake b/infra/cmake/modules/IdentifyPlatform.cmake
index 69fe48cad..ebaaaced6 100644
--- a/infra/cmake/modules/IdentifyPlatform.cmake
+++ b/infra/cmake/modules/IdentifyPlatform.cmake
@@ -35,20 +35,40 @@ endif()
 
 if("${HOST_ARCH}" STREQUAL "x86_64")
   set(HOST_ARCH_BASE ${HOST_ARCH})
+elseif("${HOST_ARCH}" STREQUAL "armv7em")
+  set(HOST_ARCH_BASE "arm")
 elseif("${HOST_ARCH}" STREQUAL "armv7l")
   set(HOST_ARCH_BASE "arm")
+elseif("${HOST_ARCH}" STREQUAL "armv7hl")
+  set(HOST_ARCH_BASE "arm")
 elseif("${HOST_ARCH}" STREQUAL "aarch64")
   set(HOST_ARCH_BASE "aarch64")
+elseif("${HOST_ARCH}" STREQUAL "i686")
+  set(HOST_ARCH_BASE "i686")
+elseif("${HOST_ARCH}" STREQUAL "riscv64")
+  set(HOST_ARCH_BASE "riscv64")
 else()
   message(FATAL_ERROR "'${HOST_ARCH}' architecture is not supported")
 endif()
 
 if("${TARGET_ARCH}" STREQUAL "x86_64")
   set(TARGET_ARCH_BASE ${TARGET_ARCH})
+elseif("${TARGET_ARCH}" STREQUAL "armv8-m")
+  set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7-r")
+  set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7em")
+  set(TARGET_ARCH_BASE "arm")
 elseif("${TARGET_ARCH}" STREQUAL "armv7l")
   set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7hl")
+  set(TARGET_ARCH_BASE "arm")
 elseif("${TARGET_ARCH}" STREQUAL "aarch64")
   set(TARGET_ARCH_BASE "aarch64")
+elseif("${TARGET_ARCH}" STREQUAL "i686")
+  set(TARGET_ARCH_BASE "i686")
+elseif("${TARGET_ARCH}" STREQUAL "riscv64")
+  set(TARGET_ARCH_BASE "riscv64")
 else()
   message(FATAL_ERROR "'${TARGET_ARCH}' architecture is not supported")
 endif()
diff --git a/infra/cmake/packages/ARMComputeSourceConfig.cmake b/infra/cmake/packages/ARMComputeSourceConfig.cmake
index adec1f91b..16e12bbca 100644
--- a/infra/cmake/packages/ARMComputeSourceConfig.cmake
+++ b/infra/cmake/packages/ARMComputeSourceConfig.cmake
@@ -8,11 +8,11 @@ function(_ARMComputeSource_import)
   nnas_include(OptionTools)
 
   envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v20.05.tar.gz)
+  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v21.02.tar.gz)
   ExternalSource_Download(ARMCOMPUTE ${ARMCOMPUTE_URL})
 
   set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
-  set(ARMComputeSource_FOUND ${ARMCOMPUTE_SOURCE_GET} PARENT_SCOPE)
+  set(ARMComputeSource_FOUND TRUE PARENT_SCOPE)
 endfunction(_ARMComputeSource_import)
 
 _ARMComputeSource_import()
diff --git a/infra/cmake/packages/AbseilConfig.cmake b/infra/cmake/packages/AbseilConfig.cmake
index e16dd94d7..b3cb364e1 100644
--- a/infra/cmake/packages/AbseilConfig.cmake
+++ b/infra/cmake/packages/AbseilConfig.cmake
@@ -12,11 +12,18 @@ function(_Abseil_import)
 
     # NOTE Turn off abseil testing
     set(BUILD_TESTING OFF)
+    # Set -fPIC property because Abseil-cpp can be used for shared library
+    set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+    # Abseil-cpp 20211102.0 show warning without below setting
+    set(ABSL_PROPAGATE_CXX_STD ON)
+
     add_extdirectory("${AbseilSource_DIR}" ABSEIL)
 
     add_library(abseil INTERFACE)
+
     target_link_libraries(abseil INTERFACE
       # From "Available Abseil CMake Public Targets" in CMake/README.md
+      # Add absl::status (It is not listed in CMake/README.md)
       absl::algorithm
       absl::base
       absl::debugging
@@ -30,6 +37,7 @@ function(_Abseil_import)
       absl::synchronization
       absl::time
       absl::utility
+      absl::status
     )
   endif(NOT TARGET abseil)
 
diff --git a/infra/cmake/packages/AbseilSourceConfig.cmake b/infra/cmake/packages/AbseilSourceConfig.cmake
index 8be732660..8d0c7798f 100644
--- a/infra/cmake/packages/AbseilSourceConfig.cmake
+++ b/infra/cmake/packages/AbseilSourceConfig.cmake
@@ -7,19 +7,13 @@ function(_AbseilSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  # NOTE TensorFlow 1.12 downloads abseil from the following URL
-  # - https://github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz
-  #
-  # The last change of "48cd2c3f351" was commited on 2018.09.27
-  #
-  # Let's use the latest released version (2020-02 release patch 2)
+  # NOTE GCC 13 requires abseil 20230125.3
   envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/20200225.2.tar.gz)
-
+  envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/20230125.3.tar.gz)
   ExternalSource_Download(ABSEIL
     DIRNAME ABSEIL
     URL ${ABSEIL_URL}
-    CHECKSUM MD5=73f2b6e72f1599a9139170c29482ddc4)
+    CHECKSUM MD5=9b6dae642c4bd92f007ab2c148bc0498)
 
   set(AbseilSource_DIR ${ABSEIL_SOURCE_DIR} PARENT_SCOPE)
   set(AbseilSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/BoostConfig.cmake b/infra/cmake/packages/BoostConfig.cmake
index c4d7d5857..e72f742f3 100644
--- a/infra/cmake/packages/BoostConfig.cmake
+++ b/infra/cmake/packages/BoostConfig.cmake
@@ -25,6 +25,17 @@ function(_Boost_Build Boost_PREFIX)
   list(APPEND Boost_Options --with-system)
   list(APPEND Boost_Options --with-filesystem)
 
+  if(DEFINED EXTERNALS_BUILD_THREADS)
+    set(N ${EXTERNALS_BUILD_THREADS})
+  else(DEFINED EXTERNALS_BUILD_THREADS)
+    include(ProcessorCount)
+    ProcessorCount(N)
+  endif(DEFINED EXTERNALS_BUILD_THREADS)
+
+  if((NOT N EQUAL 0) AND BUILD_EXT_MULTITHREAD)
+    list(APPEND Boost_Options -j${N})
+  endif()
+
   set(JAM_FILENAME ${BoostBuild_DIR}/user-config.jam)
 
   if(ANDROID)
diff --git a/infra/cmake/packages/BoostSourceConfig.cmake b/infra/cmake/packages/BoostSourceConfig.cmake
index 52cda7c7d..2477a4857 100644
--- a/infra/cmake/packages/BoostSourceConfig.cmake
+++ b/infra/cmake/packages/BoostSourceConfig.cmake
@@ -13,7 +13,7 @@ function(_BoostSource_import)
   ExternalSource_Download(BOOST ${BOOST_URL})
 
   set(BoostSource_DIR ${BOOST_SOURCE_DIR} PARENT_SCOPE)
-  set(BoostSource_FOUND ${BOOST_SOURCE_GET} PARENT_SCOPE)
+  set(BoostSource_FOUND TRUE PARENT_SCOPE)
 endfunction(_BoostSource_import)
 
 _BoostSource_import()
diff --git a/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfig.cmake b/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfig.cmake
new file mode 100644
index 000000000..4c82af2cb
--- /dev/null
+++ b/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfig.cmake
@@ -0,0 +1,14 @@
+function(_CMSIS_NN_import)
+    nnas_include(ExternalSourceTools)
+    nnas_include(OptionTools)
+
+    envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+    envoption(CMSIS_NN_4_0_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/CMSIS-NN/archive/refs/tags/v4.0.0.tar.gz)
+
+    ExternalSource_Download(CMSIS_NN DIRNAME CMSIS-NN-4.0.0 ${CMSIS_NN_4_0_0_URL})
+
+    set(CMSIS_NNSource_DIR ${CMSIS_NN_SOURCE_DIR} PARENT_SCOPE)
+    set(CMSIS_NNSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_CMSIS_NN_import)
+
+_CMSIS_NN_import()
diff --git a/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfigVersion.cmake b/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfigVersion.cmake
new file mode 100644
index 000000000..5fa88e6c5
--- /dev/null
+++ b/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "4.0.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+    set(PACKAGE_VERSION_EXACT TRUE)
+    set(PACKAGE_VERSION_COMPATIBLE TRUE)
+    set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/CMSIS-NN-4.1.0/CMSIS-NNConfig.cmake b/infra/cmake/packages/CMSIS-NN-4.1.0/CMSIS-NNConfig.cmake
new file mode 100644
index 000000000..06106dc60
--- /dev/null
+++ b/infra/cmake/packages/CMSIS-NN-4.1.0/CMSIS-NNConfig.cmake
@@ -0,0 +1,14 @@
+function(_CMSIS_NN_import)
+    nnas_include(ExternalSourceTools)
+    nnas_include(OptionTools)
+
+    envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+    envoption(CMSIS_NN_4_1_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/CMSIS-NN/archive/refs/tags/v4.1.0.tar.gz)
+
+    ExternalSource_Download(CMSIS_NN DIRNAME CMSIS-NN-4.1.0 ${CMSIS_NN_4_1_0_URL})
+
+    set(CMSIS_NNSource_DIR ${CMSIS_NN_SOURCE_DIR} PARENT_SCOPE)
+    set(CMSIS_NNSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_CMSIS_NN_import)
+
+_CMSIS_NN_import()
diff --git a/infra/cmake/packages/CMSIS-NN-4.1.0/CMSIS-NNConfigVersion.cmake b/infra/cmake/packages/CMSIS-NN-4.1.0/CMSIS-NNConfigVersion.cmake
new file mode 100644
index 000000000..5296e197d
--- /dev/null
+++ b/infra/cmake/packages/CMSIS-NN-4.1.0/CMSIS-NNConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "4.1.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+    set(PACKAGE_VERSION_EXACT TRUE)
+    set(PACKAGE_VERSION_COMPATIBLE TRUE)
+    set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
new file mode 100644
index 000000000..d1588d3fd
--- /dev/null
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
@@ -0,0 +1,16 @@
+function(_CMSISSource_import)
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(CMSIS_5_8_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
+  set(CMSIS_5_8_0_SHA256 fe6b697b8782e7fd6131034b7646a3b65c83018774abf7f9f94901a3bc7c82ad)
+
+  ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL}
+          CHECKSUM "SHA256=${CMSIS_5_8_0_SHA256}")
+
+  set(CMSISSource_DIR ${CMSIS_SOURCE_DIR} PARENT_SCOPE)
+  set(CMSISSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_CMSISSource_import)
+
+_CMSISSource_import()
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake
new file mode 100644
index 000000000..ca6f7826d
--- /dev/null
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "5.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/CaffeSourceConfig.cmake b/infra/cmake/packages/CaffeSourceConfig.cmake
index 41cc2c9f7..05eb5b30e 100644
--- a/infra/cmake/packages/CaffeSourceConfig.cmake
+++ b/infra/cmake/packages/CaffeSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_CaffeSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(CAFFE_URL https://github.com/BVLC/caffe/archive/1.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(CAFFE_URL ${EXTERNAL_DOWNLOAD_SERVER}/BVLC/caffe/archive/1.0.tar.gz)
 
   ExternalSource_Download(CAFFE ${CAFFE_URL})
 
diff --git a/infra/cmake/packages/CpuInfoSourceConfig.cmake b/infra/cmake/packages/CpuInfoSourceConfig.cmake
new file mode 100644
index 000000000..b93a6a2e5
--- /dev/null
+++ b/infra/cmake/packages/CpuInfoSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_CpuInfoSource_import)
+  if(NOT ${DOWNLOAD_CPUINFO})
+    set(CpuInfoSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_CPUINFO})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  # CPUINFO commit from tflite v2.8
+  envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/5916273f79a21551890fd3d56fc5375a78d1598d.tar.gz)
+  ExternalSource_Download(CPUINFO
+    DIRNAME CPUINFO
+    URL ${CPUINFO_URL})
+
+  set(CpuInfoSource_DIR ${CPUINFO_SOURCE_DIR} PARENT_SCOPE)
+  set(CpuInfoSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_CpuInfoSource_import)
+
+_CpuInfoSource_import()
diff --git a/infra/cmake/packages/Egl_HeadersSourceConfig.cmake b/infra/cmake/packages/Egl_HeadersSourceConfig.cmake
new file mode 100644
index 000000000..fae57f6ce
--- /dev/null
+++ b/infra/cmake/packages/Egl_HeadersSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_Egl_HeadersSource_import)
+  if(NOT DOWNLOAD_EGL_HEADERS)
+    set(Egl_HeadersSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_EGL_HEADERS)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(EGL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/EGL-Registry/archive/649981109e263b737e7735933c90626c29a306f2.zip)
+
+  ExternalSource_Download(EGL_HEADERS
+    DIRNAME EGL_HEADERS
+    URL ${EGL_HEADERS_URL})
+
+  set(Egl_HeadersSource_DIR ${EGL_HEADERS_SOURCE_DIR} PARENT_SCOPE)
+  set(Egl_HeadersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_Egl_HeadersSource_import)
+
+_Egl_HeadersSource_import()
diff --git a/infra/cmake/packages/FarmhashSourceConfig.cmake b/infra/cmake/packages/FarmhashSourceConfig.cmake
index a19c8b992..fa1867c5c 100644
--- a/infra/cmake/packages/FarmhashSourceConfig.cmake
+++ b/infra/cmake/packages/FarmhashSourceConfig.cmake
@@ -10,7 +10,8 @@ function(_FarmhashSource_import)
   # NOTE TensorFlow 1.12 downloads farmhash from the following URL
   #      TensorFlow 1.13.1 downloads farmhash from the following URL
   #      TensorFlow 2.3.0 downloads farmhash from the following URL
-  envoption(FARMHASH_1_12_URL https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(FARMHASH_1_12_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
 
   ExternalSource_Download(FARMHASH ${FARMHASH_1_12_URL})
 
diff --git a/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake
new file mode 100644
index 000000000..99da30803
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake
@@ -0,0 +1,135 @@
+# TODO Remove other Flatbuffers versions
+function(_FlatBuffers_import)
+  find_package(Flatbuffers 2.0 QUIET)
+  set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
+endfunction(_FlatBuffers_import)
+
+function(_FlatBuffers_build)
+  if(NOT BUILD_FLATBUFFERS)
+    message(STATUS "FlatBuffersConfig !BUILD_FLATBUFFERS")
+    return()
+  endif(NOT BUILD_FLATBUFFERS)
+
+  nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
+
+  if(NOT FlatBuffersSource_FOUND)
+    # Source is not available
+    message(STATUS "FlatBuffersConfig !FlatBuffersSource_FOUND")
+    return()
+  endif(NOT FlatBuffersSource_FOUND)
+
+  set(ADDITIONAL_CXX_FLAGS "")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
+    set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
+  endif()
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0)
+    set(ADDITIONAL_CXX_FLAGS "-Wno-error=stringop-overflow")
+  endif()
+
+  nnas_include(ExternalBuildTools)
+  ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
+                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-2.0/build
+                      INSTALL_DIR ${EXT_OVERLAY_DIR}
+                      BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
+                      IDENTIFIER  "2.0"
+                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
+                                  "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
+                      PKG_NAME    "FLATBUFFERS-2.0")
+
+endfunction(_FlatBuffers_build)
+
+_FlatBuffers_build()
+_FlatBuffers_import()
+
+# for cross compilation BUILD_HOST_EXEC should be set for host flatc executable
+# flatc should exist as ${BUILD_HOST_EXEC}/overlay/bin/flatc.
+# and then if EXTERNAL_FLATC is set then use ${EXTERNAL_FLATC} file.
+set(FLATC_PATH "$<TARGET_FILE:flatbuffers::flatc>")
+
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  set(FLATC_PATH $ENV{BUILD_HOST_EXEC}/overlay/bin/flatc)
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+if(DEFINED ENV{EXTERNAL_FLATC})
+  set(FLATC_PATH $ENV{EXTERNAL_FLATC})
+endif(DEFINED ENV{EXTERNAL_FLATC})
+
+if(FlatBuffers_FOUND)
+  if(NOT TARGET flatbuffers-2.0)
+    add_library(flatbuffers-2.0 INTERFACE)
+    target_link_libraries(flatbuffers-2.0 INTERFACE flatbuffers::flatbuffers)
+    message(STATUS "Found flatbuffers-2.0: TRUE")
+  endif(NOT TARGET flatbuffers-2.0)
+
+  function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
+    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
+    get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
+
+    foreach(schema ${ARGN})
+      get_filename_component(schema_fn "${schema}" NAME)
+      get_filename_component(dir "${schema}" DIRECTORY)
+
+      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+    endforeach()
+
+    add_custom_command(OUTPUT ${OUTPUT_FILES}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+                       COMMAND "${FLATC_PATH}" -c --no-includes
+                       --no-union-value-namespacing
+                       --gen-object-api -o "${abs_output_dir}"
+                       ${SCHEMA_FILES}
+                       DEPENDS flatbuffers::flatc)
+
+    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
+    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
+  endfunction(FlatBuffers_Generate)
+
+  function(FlatBuffers_Target TGT)
+    set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
+    set(multiValueArgs SCHEMA_FILES)
+    cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
+    if(NOT ARG_INCLUDE_DIR)
+      set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
+    endif(NOT ARG_INCLUDE_DIR)
+
+    get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
+    get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
+    get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
+
+    # Let's reset list variables before using them
+    # NOTE THIS DOES NOT AFFECT parent scope
+    unset(SCHEMA_FILES)
+    unset(OUTPUT_FILES)
+
+    foreach(schema ${ARG_SCHEMA_FILES})
+      get_filename_component(schema_fn "${schema}" NAME)
+      get_filename_component(dir "${schema}" DIRECTORY)
+
+      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+    endforeach()
+
+    # Generate headers
+    add_custom_command(OUTPUT ${OUTPUT_FILES}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+                       COMMAND "${FLATC_PATH}" -c --no-includes
+                               --no-union-value-namespacing
+                               --gen-object-api -o "${abs_output_dir}"
+                               ${SCHEMA_FILES}
+                       DEPENDS ${SCHEMA_FILES}
+                       COMMENT "Generate '${TGT}' headers")
+
+    # NOTE This header-only library is deliberately declared as STATIC library
+    #      to avoid possible scope issues related with generated files
+    add_library(${TGT} STATIC ${OUTPUT_FILES})
+    set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
+    target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
+    target_link_libraries(${TGT} PUBLIC flatbuffers-2.0)
+  endfunction(FlatBuffers_Target)
+endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake
new file mode 100644
index 000000000..e4a87a7d5
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake
deleted file mode 100644
index da084e7d3..000000000
--- a/infra/cmake/packages/FlatBuffersConfig.cmake
+++ /dev/null
@@ -1,116 +0,0 @@
-function(_FlatBuffers_import)
-  find_package(Flatbuffers QUIET)
-  set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
-endfunction(_FlatBuffers_import)
-
-function(_FlatBuffers_build)
-  if(NOT BUILD_FLATBUFFERS)
-    return()
-  endif(NOT BUILD_FLATBUFFERS)
-
-  nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
-
-  if(NOT FlatBuffersSource_FOUND)
-    # Source is not available
-    return()
-  endif(NOT FlatBuffersSource_FOUND)
-
-  set(ADDITIONAL_CXX_FLAGS "")
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
-    set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
-  endif()
-
-  nnas_include(ExternalBuildTools)
-  ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
-                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS/build
-                      INSTALL_DIR ${EXT_OVERLAY_DIR}
-                      BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.10-fix2"
-                      EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
-                      PKG_NAME    "FLATBUFFERS")
-
-endfunction(_FlatBuffers_build)
-
-_FlatBuffers_build()
-_FlatBuffers_import()
-
-if(FlatBuffers_FOUND)
-  if(NOT TARGET flatbuffers)
-    add_library(flatbuffers INTERFACE)
-    target_link_libraries(flatbuffers INTERFACE flatbuffers::flatbuffers)
-    message(STATUS "Found FlatBuffers: TRUE")
-  endif(NOT TARGET flatbuffers)
-
-  function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
-    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
-
-    foreach(schema ${ARGN})
-      get_filename_component(schema_fn "${schema}" NAME)
-      get_filename_component(dir "${schema}" DIRECTORY)
-
-      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
-      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
-      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
-    endforeach()
-
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
-                       --no-union-value-namespacing
-                       --gen-object-api -o "${abs_output_dir}"
-                       ${SCHEMA_FILES}
-                       DEPENDS flatbuffers::flatc)
-
-    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
-    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
-  endfunction(FlatBuffers_Generate)
-
-  function(FlatBuffers_Target TGT)
-    set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
-    set(multiValueArgs SCHEMA_FILES)
-    cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-
-    # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
-    if(NOT ARG_INCLUDE_DIR)
-      set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
-    endif(NOT ARG_INCLUDE_DIR)
-
-    get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
-    get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
-
-    # Let's reset list variables before using them
-    # NOTE THIS DOES NOT AFFECT parent scope
-    unset(SCHEMA_FILES)
-    unset(OUTPUT_FILES)
-
-    foreach(schema ${ARG_SCHEMA_FILES})
-      get_filename_component(schema_fn "${schema}" NAME)
-      get_filename_component(dir "${schema}" DIRECTORY)
-
-      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
-      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
-      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
-    endforeach()
-
-    # Generate headers
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
-                               --no-union-value-namespacing
-                               --gen-object-api -o "${abs_output_dir}"
-                               ${SCHEMA_FILES}
-                       DEPENDS ${SCHEMA_FILES}
-                       COMMENT "Generate '${TGT}' headers")
-
-    # NOTE This header-only library is deliberately declared as STATIC library
-    #      to avoid possible scope issues related with generated files
-    add_library(${TGT} STATIC ${OUTPUT_FILES})
-    set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
-    target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
-    target_link_libraries(${TGT} PUBLIC flatbuffers)
-  endfunction(FlatBuffers_Target)
-endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake
deleted file mode 100644
index 09a922b67..000000000
--- a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-function(_FlatBuffersSource_import)
-  if(NOT DOWNLOAD_FLATBUFFERS)
-    set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT DOWNLOAD_FLATBUFFERS)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  envoption(FLATBUFFERS_1_10_URL https://github.com/google/flatbuffers/archive/v1.10.0.tar.gz)
-  ExternalSource_Download(FLATBUFFERS
-    DIRNAME FLATBUFFERS-1.10
-    CHECKSUM MD5=f7d19a3f021d93422b0bc287d7148cd2
-    URL ${FLATBUFFERS_1_10_URL}
-  )
-
-  set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
-  set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_FlatBuffersSource_import)
-
-_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake
deleted file mode 100644
index 6585f21d5..000000000
--- a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-set(PACKAGE_VERSION "1.10")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersSource-1.11/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-1.11/FlatBuffersSourceConfig.cmake
deleted file mode 100644
index 92efbf97e..000000000
--- a/infra/cmake/packages/FlatBuffersSource-1.11/FlatBuffersSourceConfig.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-function(_FlatBuffersSource_import)
-  if(NOT DOWNLOAD_FLATBUFFERS)
-    set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT DOWNLOAD_FLATBUFFERS)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  envoption(FLATBUFFERS_1_11_URL https://github.com/google/flatbuffers/archive/v1.11.0.tar.gz)
-  ExternalSource_Download(FLATBUFFERS
-    DIRNAME FLATBUFFERS-1.11
-    CHECKSUM MD5=02c64880acb89dbd57eebacfd67200d8
-    URL ${FLATBUFFERS_1_11_URL}
-  )
-
-  set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
-  set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_FlatBuffersSource_import)
-
-_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSource-1.11/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSource-1.11/FlatBuffersSourceConfigVersion.cmake
deleted file mode 100644
index f008e0528..000000000
--- a/infra/cmake/packages/FlatBuffersSource-1.11/FlatBuffersSourceConfigVersion.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-set(PACKAGE_VERSION "1.11")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
new file mode 100644
index 000000000..e094055b7
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
@@ -0,0 +1,22 @@
+function(_FlatBuffersSource_import)
+  if(NOT DOWNLOAD_FLATBUFFERS)
+    set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_FLATBUFFERS)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(FLATBUFFERS_2_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/flatbuffers/archive/v2.0.0.tar.gz)
+  ExternalSource_Download(FLATBUFFERS
+    DIRNAME FLATBUFFERS-2.0
+    CHECKSUM MD5=a27992324c3cbf86dd888268a23d17bd
+    URL ${FLATBUFFERS_2_0_URL}
+  )
+
+  set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
+  set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_FlatBuffersSource_import)
+
+_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake
new file mode 100644
index 000000000..e4a87a7d5
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSourceConfig.cmake
deleted file mode 100644
index 52bce6de0..000000000
--- a/infra/cmake/packages/FlatBuffersSourceConfig.cmake
+++ /dev/null
@@ -1,28 +0,0 @@
-function(_FlatBuffersSource_import)
-  if(NOT DOWNLOAD_FLATBUFFERS)
-    set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT DOWNLOAD_FLATBUFFERS)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # Each TensorFlow needs a specific version of Flatbuffers
-  # - TensorFlow 1.7 downloads it from https://github.com/google/flatbuffers/archive/971a68110e4.tar.gz
-  # - TensorFlow 1.12 downloads it from https://github.com/google/flatbuffers/archive/1f5eae5d6a1.tar.gz
-  #
-  # Let's use 1.10 released in 2018.10 (compatible with 1f5eae5d6a1).
-  #
-  # TODO Manage multiple versions
-  envoption(FLATBUFFERS_URL https://github.com/google/flatbuffers/archive/v1.10.0.tar.gz)
-  ExternalSource_Download(FLATBUFFERS
-    DIRNAME FLATBUFFERS
-    CHECKSUM MD5=f7d19a3f021d93422b0bc287d7148cd2
-    URL ${FLATBUFFERS_URL}
-  )
-
-  set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
-  set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_FlatBuffersSource_import)
-
-_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake
deleted file mode 100644
index ac9e22e51..000000000
--- a/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(NOT PACKAGE_FIND_VERSION)
-  # This package works only when find_package(...) call has no EXACT option
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(NOT PACKAGE_FIND_VERSION)
diff --git a/infra/cmake/packages/Fp16SourceConfig.cmake b/infra/cmake/packages/Fp16SourceConfig.cmake
new file mode 100644
index 000000000..3df4e4cc5
--- /dev/null
+++ b/infra/cmake/packages/Fp16SourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_Fp16Source_import)
+  if(NOT ${DOWNLOAD_FP16})
+    set(Fp16Source_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_FP16})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  # fp16 commit in xnnpack 8b283aa30a31
+  envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.tar.gz)
+  ExternalSource_Download(FP16
+    DIRNAME FP16
+    URL ${FP16_URL})
+
+  set(Fp16Source_DIR ${FP16_SOURCE_DIR} PARENT_SCOPE)
+  set(Fp16Source_FOUND TRUE PARENT_SCOPE)
+endfunction(_Fp16Source_import)
+
+_Fp16Source_import()
diff --git a/infra/cmake/packages/FxdivSourceConfig.cmake b/infra/cmake/packages/FxdivSourceConfig.cmake
new file mode 100644
index 000000000..4427bf292
--- /dev/null
+++ b/infra/cmake/packages/FxdivSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_FxdivSource_import)
+  if(NOT ${DOWNLOAD_FXDIV})
+    set(FxdivSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_FXDIV})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  # fxdiv commit in xnnpack 8b283aa30a31
+  envoption(FXDIV_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FXdiv/archive/f8c5354679ec2597792bc70a9e06eff50c508b9a.tar.gz)
+  ExternalSource_Download(FXDIV
+    DIRNAME FXDIV
+    URL ${FXDIV_URL})
+
+  set(FxdivSource_DIR ${FXDIV_SOURCE_DIR} PARENT_SCOPE)
+  set(FxdivSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_FxdivSource_import)
+
+_FxdivSource_import()
diff --git a/infra/cmake/packages/GEMMLowpSourceConfig.cmake b/infra/cmake/packages/GEMMLowpSourceConfig.cmake
index 6e1cfa9c9..3b3560359 100644
--- a/infra/cmake/packages/GEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/GEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_GEMMLowpSource_import)
 
   # NOTE TensorFlow 1.12 uses the following URL
   #      TensorFlow 1.13.1 uses the following URL
-  envoption(GEMMLOWP_URL https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz)
 
   ExternalSource_Download(GEMMLOWP ${GEMMLOWP_URL})
 
diff --git a/infra/cmake/packages/GFlagsSourceConfig.cmake b/infra/cmake/packages/GFlagsSourceConfig.cmake
index 3e70d89fc..2f9b7537f 100644
--- a/infra/cmake/packages/GFlagsSourceConfig.cmake
+++ b/infra/cmake/packages/GFlagsSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_GFlagsSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(GFLAGS_URL https://github.com/gflags/gflags/archive/v2.2.1.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(GFLAGS_URL ${EXTERNAL_DOWNLOAD_SERVER}/gflags/gflags/archive/v2.2.1.tar.gz)
 
   ExternalSource_Download(GFLAGS ${GFLAGS_URL})
 
diff --git a/infra/cmake/packages/GTestConfig.cmake b/infra/cmake/packages/GTestConfig.cmake
index 62a15e0cc..c844f4c63 100644
--- a/infra/cmake/packages/GTestConfig.cmake
+++ b/infra/cmake/packages/GTestConfig.cmake
@@ -6,6 +6,7 @@ function(_GTest_build)
   nnas_find_package(GTestSource QUIET)
 
   if(NOT GTestSource_FOUND)
+    message(STATUS "GTest_build skip: NOT GTestSource_FOUND")
     return()
   endif(NOT GTestSource_FOUND)
 
@@ -13,9 +14,14 @@ function(_GTest_build)
   ExternalBuild_CMake(CMAKE_DIR   ${GTestSource_DIR}
                       BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/GTEST/build
                       INSTALL_DIR ${EXT_OVERLAY_DIR}
-                      IDENTIFIER  "1.8.0-fix1"
+                      IDENTIFIER  "1.11.0"
                       PKG_NAME    "GTEST")
 
+  set(GTEST_FOUND TRUE PARENT_SCOPE)
+  set(GTEST_INCLUDE_DIRS ${EXT_OVERLAY_DIR}/include PARENT_SCOPE)
+  set(GTEST_LIBRARIES ${EXT_OVERLAY_DIR}/lib/libgtest.a PARENT_SCOPE)
+  set(GTEST_MAIN_LIBRARIES ${EXT_OVERLAY_DIR}/lib/libgtest_main.a PARENT_SCOPE)
+
 endfunction(_GTest_build)
 
 _GTest_build()
@@ -24,7 +30,12 @@ _GTest_build()
 # Note: cmake supports GTest and does not find GTestConfig.cmake or GTest-config.cmake.
 # Refer to "https://cmake.org/cmake/help/v3.5/module/FindGTest.html"
 # find_package(GTest) creates options like GTEST_FOUND, not GTest_FOUND.
-find_package(GTest)
+if(NOT GTEST_FOUND)
+  message(STATUS "GTEST_FOUND false: call find_package(GTest)")
+  # Reset package config directory cache to prevent recursive find
+  unset(GTest_DIR CACHE)
+  find_package(GTest)
+endif(NOT GTEST_FOUND)
 find_package(Threads)
 
 if(${GTEST_FOUND} AND TARGET Threads::Threads)
diff --git a/infra/cmake/packages/GTestSourceConfig.cmake b/infra/cmake/packages/GTestSourceConfig.cmake
index 8b7495fbc..643c3d109 100644
--- a/infra/cmake/packages/GTestSourceConfig.cmake
+++ b/infra/cmake/packages/GTestSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_GTestSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(GTEST_URL https://github.com/google/googletest/archive/release-1.8.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(GTEST_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/googletest/archive/release-1.11.0.tar.gz)
 
   ExternalSource_Download(GTEST ${GTEST_URL})
 
diff --git a/infra/cmake/packages/GoogleDoubleConversionConfig.cmake b/infra/cmake/packages/GoogleDoubleConversionConfig.cmake
deleted file mode 100644
index 3fdc86102..000000000
--- a/infra/cmake/packages/GoogleDoubleConversionConfig.cmake
+++ /dev/null
@@ -1,52 +0,0 @@
-# https://github.com/google/double-conversion
-set(GOOGLE_DOUBLE_CONVERSION_PREFIX "/usr" CACHE PATH "Google DoubleConversion install prefix")
-
-function(_GoogleDoubleConversion_import)
-  # Find the header & lib
-  find_library(GoogleDoubleConversion_LIB
-    NAMES double-conversion
-    PATHS "${GOOGLE_DOUBLE_CONVERSION_PREFIX}/lib"
-  )
-
-  find_path(GoogleDoubleConversion_INCLUDE_DIR
-    NAMES double-conversion/double-conversion.h
-    PATHS "${GOOGLE_DOUBLE_CONVERSION_PREFIX}/include"
-  )
-
-  # TODO Version check
-  set(GoogleDoubleConversion_FOUND TRUE)
-
-  if(NOT GoogleDoubleConversion_LIB)
-    set(GoogleDoubleConversion_FOUND FALSE)
-  endif(NOT GoogleDoubleConversion_LIB)
-
-  if(NOT GoogleDoubleConversion_INCLUDE_DIR)
-    set(GoogleDoubleConversion_FOUND FALSE)
-  endif(NOT GoogleDoubleConversion_INCLUDE_DIR)
-
-  set(GoogleDoubleConversion_FOUND ${GoogleDoubleConversion_FOUND} PARENT_SCOPE)
-
-  unset(MESSAGE)
-  list(APPEND MESSAGE "Found Google Double Conversion")
-
-  if(NOT GoogleDoubleConversion_FOUND)
-    list(APPEND MESSAGE ": FALSE")
-  else(NOT GoogleDoubleConversion_FOUND)
-    list(APPEND MESSAGE " (include: ${GoogleDoubleConversion_INCLUDE_DIR} library: ${GoogleDoubleConversion_LIB})")
-
-    # Add target
-    if(NOT TARGET google_double_conversion)
-      # NOTE IMPORTED target may be more appropriate for this case
-      add_library(google_double_conversion INTERFACE)
-      target_link_libraries(google_double_conversion INTERFACE ${GoogleDoubleConversion_LIB})
-      target_include_directories(google_double_conversion INTERFACE ${GoogleDoubleConversion_INCLUDE_DIR})
-
-      add_library(Google::DoubleConversion ALIAS google_double_conversion)
-    endif(NOT TARGET google_double_conversion)
-  endif(NOT GoogleDoubleConversion_FOUND)
-
-  message(STATUS ${MESSAGE})
-  set(GoogleDoubleConversion_FOUND ${GoogleDoubleConversion_FOUND} PARENT_SCOPE)
-endfunction(_GoogleDoubleConversion_import)
-
-_GoogleDoubleConversion_import()
diff --git a/infra/cmake/packages/GoogleNSyncConfig.cmake b/infra/cmake/packages/GoogleNSyncConfig.cmake
deleted file mode 100644
index 1fdf8cc20..000000000
--- a/infra/cmake/packages/GoogleNSyncConfig.cmake
+++ /dev/null
@@ -1,62 +0,0 @@
-# https://github.com/google/nsync
-set(GOOGLE_NSYNC_PREFIX "/usr" CACHE PATH "Where to find Google NSync library")
-
-function(_GoogleNSync_import)
-  # Find the header & lib
-  find_library(GoogleNSync_C_LIB
-    NAMES nsync
-    PATHS "${GOOGLE_NSYNC_PREFIX}/lib"
-  )
-
-  find_library(GoogleNSync_CPP_LIB
-    NAMES nsync_cpp
-    PATHS "${GOOGLE_NSYNC_PREFIX}/lib"
-  )
-
-  find_path(GoogleNSync_INCLUDE_DIR
-    NAMES nsync.h
-    PATHS "${GOOGLE_NSYNC_PREFIX}/include"
-  )
-
-  message(STATUS "GoogleNSync_C_LIB: ${GoogleNSync_C_LIB}")
-  message(STATUS "GoogleNSync_CPP_LIB: ${GoogleNSync_CPP_LIB}")
-  message(STATUS "GoogleNSync_INCLUDE_DIR: ${GoogleNSync_INCLUDE_DIR}")
-
-  set(GoogleNSync_FOUND TRUE)
-
-  if(NOT GoogleNSync_C_LIB)
-    set(GoogleNSync_FOUND FALSE)
-  endif(NOT GoogleNSync_C_LIB)
-
-  if(NOT GoogleNSync_CPP_LIB)
-    set(GoogleNSync_FOUND FALSE)
-  endif(NOT GoogleNSync_CPP_LIB)
-
-  if(NOT GoogleNSync_INCLUDE_DIR)
-    set(GoogleNSync_FOUND FALSE)
-  endif(NOT GoogleNSync_INCLUDE_DIR)
-
-  unset(MESSAGE)
-  list(APPEND MESSAGE "Found Google NSync")
-
-  if(NOT GoogleNSync_FOUND)
-    list(APPEND MESSAGE ": FALSE")
-  else(NOT GoogleNSync_FOUND)
-    list(APPEND MESSAGE " (include: ${GoogleNSync_INCLUDE_DIR} library: ${GoogleNSync_C_LIB} ${GoogleNSync_CPP_LIB})")
-
-    # Add target
-    if(NOT TARGET google_nsync)
-      # NOTE IMPORTED target may be more appropriate for this case
-      add_library(google_nsync INTERFACE)
-      target_link_libraries(google_nsync INTERFACE ${GoogleNSync_C_LIB} ${GoogleNSync_CPP_LIB})
-      target_include_directories(google_nsync INTERFACE ${GoogleNSync_INCLUDE_DIR})
-
-      add_library(Google::NSync ALIAS google_nsync)
-    endif(NOT TARGET google_nsync)
-  endif(NOT GoogleNSync_FOUND)
-
-  message(STATUS ${MESSAGE})
-  set(GoogleNSync_FOUND ${GoogleNSync_FOUND} PARENT_SCOPE)
-endfunction(_GoogleNSync_import)
-
-_GoogleNSync_import()
diff --git a/infra/cmake/packages/H5Tinit.c.linux-armv7l b/infra/cmake/packages/H5Tinit.c.linux-armv7l
new file mode 100644
index 000000000..b0f6a470d
--- /dev/null
+++ b/infra/cmake/packages/H5Tinit.c.linux-armv7l
@@ -0,0 +1,977 @@
+/* Generated automatically by H5detect -- do not edit */
+
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group.                                               *
+ * Copyright by the Board of Trustees of the University of Illinois.         *
+ * All rights reserved.                                                      *
+ *                                                                           *
+ * This file is part of HDF5.  The full HDF5 copyright notice, including     *
+ * terms governing use, modification, and redistribution, is contained in    *
+ * the files COPYING and Copyright.html.  COPYING can be found at the root   *
+ * of the source code distribution tree; Copyright.html can be found at the  *
+ * root level of an installed copy of the electronic HDF5 document set and   *
+ * is linked from the top-level documents page.  It can also be found at     *
+ * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
+ * access to either file, you may request a copy from help@hdfgroup.org.     *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * Created:		Mar 31, 2022
+ *			Ubuntu <ubuntu@rpi4>
+ *
+ * Purpose:		This machine-generated source code contains
+ *			information about the various integer and
+ *			floating point numeric formats found on this
+ *			architecture.  The parameters below should be
+ *			checked carefully and errors reported to the
+ *			HDF5 maintainer.
+ *			
+ *			Each of the numeric formats listed below are
+ *			printed from most significant bit to least
+ *			significant bit even though the actual bytes
+ *			might be stored in a different order in
+ *			memory.	 The integers above each binary byte
+ *			indicate the relative order of the bytes in
+ *			memory; little-endian machines have
+ *			decreasing numbers while big-endian machines
+ *			have increasing numbers.
+ *			
+ *			The fields of the numbers are printed as
+ *			letters with `S' for the mantissa sign bit,
+ *			`M' for the mantissa magnitude, and `E' for
+ *			the exponent.  The exponent has an associated
+ *			bias which can be subtracted to find the
+ *			true exponent.	The radix point is assumed
+ *			to be before the first `M' bit.	 Any bit
+ *			of a floating-point value not falling into one
+ *			of these categories is printed as a question
+ *			mark.  Bits of integer types are printed as
+ *			`I' for 2's complement and `U' for magnitude.
+ *			
+ *			If the most significant bit of the normalized
+ *			mantissa (always a `1' except for `0.0') is
+ *			not stored then an `implicit=yes' appears
+ *			under the field description.  In thie case,
+ *			the radix point is still assumed to be
+ *			before the first `M' but after the implicit
+ *			bit.
+ *
+ * Modifications:
+ *
+ *	DO NOT MAKE MODIFICATIONS TO THIS FILE!
+ *	It was generated by code in `H5detect.c'.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/****************/
+/* Module Setup */
+/****************/
+
+#define H5T_PACKAGE /*suppress error about including H5Tpkg.h*/
+
+
+/***********/
+/* Headers */
+/***********/
+#include "H5private.h"		/* Generic Functions			*/
+#include "H5Eprivate.h"		/* Error handling		  	*/
+#include "H5FLprivate.h"	/* Free Lists				*/
+#include "H5Iprivate.h"		/* IDs			  		*/
+#include "H5Tpkg.h"		/* Datatypes 				*/
+
+
+/****************/
+/* Local Macros */
+/****************/
+
+
+/******************/
+/* Local Typedefs */
+/******************/
+
+
+/********************/
+/* Package Typedefs */
+/********************/
+
+
+/********************/
+/* Local Prototypes */
+/********************/
+
+
+/********************/
+/* Public Variables */
+/********************/
+
+
+/*****************************/
+/* Library Private Variables */
+/*****************************/
+
+
+/*********************/
+/* Package Variables */
+/*********************/
+
+
+
+/*******************/
+/* Local Variables */
+/*******************/
+
+
+
+/*-------------------------------------------------------------------------
+ * Function:	H5TN_init_interface
+ *
+ * Purpose:	Initialize pre-defined native datatypes from code generated
+ *              during the library configuration by H5detect.
+ *
+ * Return:	Success:	non-negative
+ *		Failure:	negative
+ *
+ * Programmer:	Robb Matzke
+ *              Wednesday, December 16, 1998
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5TN_init_interface(void)
+{
+    H5T_t	*dt = NULL;
+    herr_t	ret_value = SUCCEED;
+
+    FUNC_ENTER_NOAPI(FAIL)
+
+   /*
+    *    0
+    * IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_SCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_SCHAR_ALIGN_g = 1;
+    H5T_NATIVE_SCHAR_COMP_ALIGN_g = 1;
+
+   /*
+    *    0
+    * UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UCHAR_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_SHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_SHORT_ALIGN_g = 1;
+    H5T_NATIVE_SHORT_COMP_ALIGN_g = 2;
+
+   /*
+    *    1        0
+    * UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_USHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_USHORT_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_ALIGN_g = 1;
+    H5T_NATIVE_INT_COMP_ALIGN_g = 4;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_LONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_LONG_ALIGN_g = 1;
+    H5T_NATIVE_LONG_COMP_ALIGN_g = 4;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_ULONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_ULONG_ALIGN_g = 1;
+
+   /*
+    *    0
+    * IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_LEAST8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_LEAST8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_FAST8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_FAST8_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT16_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT16_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_LEAST16_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_LEAST16_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_FAST16_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_FAST16_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_LEAST32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_LEAST32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_FAST32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_FAST32_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_LEAST64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_LEAST64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_FAST64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_FAST64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_LLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_LLONG_ALIGN_g = 1;
+    H5T_NATIVE_LLONG_COMP_ALIGN_g = 8;
+
+   /*
+    *    7        6        5        4
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_ULLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_ULLONG_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * SEEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
+    * Implicit bit? yes
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_FLOAT;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.f.sign = 31;
+    dt->shared->u.atomic.u.f.epos = 23;
+    dt->shared->u.atomic.u.f.esize = 8;
+    dt->shared->u.atomic.u.f.ebias = 0x0000007f;
+    dt->shared->u.atomic.u.f.mpos = 0;
+    dt->shared->u.atomic.u.f.msize = 23;
+    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+    if((H5T_NATIVE_FLOAT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_FLOAT_ALIGN_g = 1;
+    H5T_NATIVE_FLOAT_COMP_ALIGN_g = 4;
+
+   /*
+    *    7        6        5        4
+    * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
+    *    3        2        1        0
+    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
+    * Implicit bit? yes
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_FLOAT;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.f.sign = 63;
+    dt->shared->u.atomic.u.f.epos = 52;
+    dt->shared->u.atomic.u.f.esize = 11;
+    dt->shared->u.atomic.u.f.ebias = 0x000003ff;
+    dt->shared->u.atomic.u.f.mpos = 0;
+    dt->shared->u.atomic.u.f.msize = 52;
+    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+    if((H5T_NATIVE_DOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_DOUBLE_ALIGN_g = 1;
+    H5T_NATIVE_DOUBLE_COMP_ALIGN_g = 8;
+
+   /*
+    *    7        6        5        4
+    * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
+    *    3        2        1        0
+    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
+    * Implicit bit? yes
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_FLOAT;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.f.sign = 63;
+    dt->shared->u.atomic.u.f.epos = 52;
+    dt->shared->u.atomic.u.f.esize = 11;
+    dt->shared->u.atomic.u.f.ebias = 0x000003ff;
+    dt->shared->u.atomic.u.f.mpos = 0;
+    dt->shared->u.atomic.u.f.msize = 52;
+    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+    if((H5T_NATIVE_LDOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_LDOUBLE_ALIGN_g = 1;
+    H5T_NATIVE_LDOUBLE_COMP_ALIGN_g = 8;
+
+    /* Set the native order for this machine */
+    H5T_native_order_g = H5T_ORDER_LE;
+
+    /* Structure alignment for pointers, hvl_t, hobj_ref_t, hdset_reg_ref_t */
+    H5T_POINTER_COMP_ALIGN_g = 4;
+    H5T_HVL_COMP_ALIGN_g = 4;
+    H5T_HOBJREF_COMP_ALIGN_g = 8;
+    H5T_HDSETREGREF_COMP_ALIGN_g = 1;
+
+done:
+    if(ret_value < 0) {
+        if(dt != NULL) {
+            dt->shared = H5FL_FREE(H5T_shared_t, dt->shared);
+            dt = H5FL_FREE(H5T_t, dt);
+        } /* end if */
+    } /* end if */
+
+    FUNC_LEAVE_NOAPI(ret_value);
+} /* end H5TN_init_interface() */
+
+/****************************************/
+/* ALIGNMENT and signal-handling status */
+/****************************************/
+/* Signal() support: yes */
+/* setjmp() support: yes */
+/* longjmp() support: yes */
+/* sigsetjmp() support: yes */
+/* siglongjmp() support: yes */
+/* sigprocmask() support: yes */
+
+/******************************/
+/* signal handlers statistics */
+/******************************/
+/* signal_handlers tested: 15 times */
+/* sigbus_handler called: 5 times */
+/* sigsegv_handler called: 5 times */
+/* sigill_handler called: 5 times */
diff --git a/infra/cmake/packages/HDF5Config.cmake b/infra/cmake/packages/HDF5Config.cmake
index 19803f1ea..4ab338144 100644
--- a/infra/cmake/packages/HDF5Config.cmake
+++ b/infra/cmake/packages/HDF5Config.cmake
@@ -6,9 +6,24 @@ function(_HDF5_build)
   nnas_find_package(HDF5Source QUIET)
 
   if(NOT HDF5Source_FOUND)
+    message(STATUS "HD5Config skip: HDF5Source NOT FOUND")
     return()
   endif(NOT HDF5Source_FOUND)
 
+  if(DEFINED ENV{BUILD_HOST_EXEC})
+    set(EXTERNAL_H5MAKE_LIBSETTINGS $ENV{BUILD_HOST_EXEC}/externals/HDF5/build/bin/H5make_libsettings)
+    set(ENV{EXTERNAL_H5MAKE_LIBSETTINGS} ${EXTERNAL_H5MAKE_LIBSETTINGS})
+
+    # NOTE https://github.com/Samsung/ONE/issues/8762
+    # TODO generalize to select 'linux-armv7l'
+    set(H5TINIT_C_FROM_NATIVE ${CMAKE_CURRENT_LIST_DIR}/H5Tinit.c.linux-armv7l)
+    set(H5TINIT_C_COPY ${CMAKE_BINARY_DIR}/externals/HDF5/build/H5Tinit.c)
+    message(STATUS "Copy H5Tinit.c generated from target native build")
+    execute_process(
+      COMMAND ${CMAKE_COMMAND} -E copy "${H5TINIT_C_FROM_NATIVE}" "${H5TINIT_C_COPY}"
+    )
+  endif(DEFINED ENV{BUILD_HOST_EXEC})
+
   nnas_include(ExternalBuildTools)
   ExternalBuild_CMake(CMAKE_DIR   ${HDF5Source_DIR}
                       BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/HDF5/build
@@ -26,6 +41,7 @@ _HDF5_build()
 
 find_path(HDF5_CONFIG_DIR "hdf5-config.cmake"
           PATHS ${EXT_OVERLAY_DIR}
+          NO_CMAKE_FIND_ROOT_PATH
           PATH_SUFFIXES
             cmake
             share/cmake
diff --git a/infra/cmake/packages/HDF5Source.patch b/infra/cmake/packages/HDF5Source.patch
new file mode 100644
index 000000000..b8602a08a
--- /dev/null
+++ b/infra/cmake/packages/HDF5Source.patch
@@ -0,0 +1,195 @@
+Only in HDF5: build
+diff -r -u a/config/cmake/ConfigureChecks.cmake b/config/cmake/ConfigureChecks.cmake
+--- a/config/cmake/ConfigureChecks.cmake
++++ b/config/cmake/ConfigureChecks.cmake
+@@ -109,15 +109,15 @@
+ if (NOT WINDOWS)
+   CHECK_FUNCTION_EXISTS(clock_gettime CLOCK_GETTIME_IN_LIBC)
+   CHECK_LIBRARY_EXISTS(rt clock_gettime "" CLOCK_GETTIME_IN_LIBRT)
+-  CHECK_LIBRARY_EXISTS(posix4 clock_gettime "" CLOCK_GETTIME_IN_LIBPOSIX4)
++  #CHECK_LIBRARY_EXISTS(posix4 clock_gettime "" CLOCK_GETTIME_IN_LIBPOSIX4)
+   if (CLOCK_GETTIME_IN_LIBC)
+     set (H5_HAVE_CLOCK_GETTIME 1)
+   elseif (CLOCK_GETTIME_IN_LIBRT)
+     set (H5_HAVE_CLOCK_GETTIME 1)
+     list (APPEND LINK_LIBS rt)
+-  elseif (CLOCK_GETTIME_IN_LIBPOSIX4)
+-    set (H5_HAVE_CLOCK_GETTIME 1)
+-    list (APPEND LINK_LIBS posix4)
++  #elseif (CLOCK_GETTIME_IN_LIBPOSIX4)
++  #  set (H5_HAVE_CLOCK_GETTIME 1)
++  #  list (APPEND LINK_LIBS posix4)
+   endif (CLOCK_GETTIME_IN_LIBC)
+ endif (NOT WINDOWS)
+ #-----------------------------------------------------------------------------
+@@ -130,12 +130,17 @@
+   if (HDF5_ENABLE_DIRECT_VFD)
+     set (msg "Performing TEST_DIRECT_VFD_WORKS")
+     set (MACRO_CHECK_FUNCTION_DEFINITIONS "-DTEST_DIRECT_VFD_WORKS -D_GNU_SOURCE ${CMAKE_REQUIRED_FLAGS}")
++    if(NOT CMAKE_CROSSCOMPILING)
+     TRY_RUN (TEST_DIRECT_VFD_WORKS_RUN   TEST_DIRECT_VFD_WORKS_COMPILE
+         ${CMAKE_BINARY_DIR}
+         ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+         CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS}
+         OUTPUT_VARIABLE OUTPUT
+     )
++    else(NOT CMAKE_CROSSCOMPILING)
++      set(TEST_DIRECT_VFD_WORKS_RUN 0)
++      set(TEST_DIRECT_VFD_WORKS_COMPILE TRUE)
++    endif(NOT CMAKE_CROSSCOMPILING)
+     if (TEST_DIRECT_VFD_WORKS_COMPILE)
+       if (TEST_DIRECT_VFD_WORKS_RUN  MATCHES 0)
+         HDF_FUNCTION_TEST (HAVE_DIRECT)
+@@ -221,7 +226,12 @@
+ # The machine's conversion gets the correct value.  We define the macro and disable
+ # this kind of test until we figure out what algorithm they use.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LDOUBLE_TO_LONG_SPECIAL  "Checking IF your system converts long double to (unsigned) long values with special algorithm")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_LDOUBLE_TO_LONG_SPECIAL_RUN 1)
++  set(H5_LDOUBLE_TO_LONG_SPECIAL_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine is using a special algorithm
+ # to convert some values of '(unsigned) long' to 'long double' values.  
+@@ -230,7 +240,12 @@
+ # ..., 7fffff..., the compiler uses a unknown algorithm.  We define a 
+ # macro and skip the test for now until we know about the algorithm.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LONG_TO_LDOUBLE_SPECIAL "Checking IF your system can convert (unsigned) long to long double values with special algorithm")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_LONG_TO_LDOUBLE_SPECIAL_RUN 1)
++  set(H5_LONG_TO_LDOUBLE_SPECIAL_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine can accurately convert
+ # 'long double' to '(unsigned) long long' values.  (This flag should be set for
+@@ -240,7 +255,12 @@
+ # 0x4351ccf385ebc8a0dfcc... or 0x4351ccf385ebc8a0ffcc... will make the converted
+ # values wildly wrong.  This test detects this wrong behavior and disable the test.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LDOUBLE_TO_LLONG_ACCURATE "Checking IF correctly converting long double to (unsigned) long long values")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_LDOUBLE_TO_LLONG_ACCURATE_RUN 0)
++  set(H5_LDOUBLE_TO_LLONG_ACCURATE_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine can accurately convert
+ # '(unsigned) long long' to 'long double' values.  (This flag should be set for
+@@ -248,11 +268,21 @@
+ # 007fff..., 00ffff..., 01ffff..., ..., 7fffff..., the converted values are twice
+ # as big as they should be.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LLONG_TO_LDOUBLE_CORRECT "Checking IF correctly converting (unsigned) long long to long double values")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_LLONG_TO_LDOUBLE_CORRECT_RUN 0)
++  set(H5_LLONG_TO_LDOUBLE_CORRECT_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Check if pointer alignments are enforced
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_NO_ALIGNMENT_RESTRICTIONS "Checking IF alignment restrictions are strictly enforced")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_NO_ALIGNMENT_RESTRICTIONS_RUN 0)
++  set(H5_NO_ALIGNMENT_RESTRICTIONS_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ 
+ # -----------------------------------------------------------------------
+ # wrapper script variables
+diff -r -u a/config/cmake_ext_mod/ConfigureChecks.cmake b/config/cmake_ext_mod/ConfigureChecks.cmake
+--- a/config/cmake_ext_mod/ConfigureChecks.cmake
++++ b/config/cmake_ext_mod/ConfigureChecks.cmake
+@@ -272,12 +272,17 @@
+   # http://www.gnu.org/s/libc/manual/html_node/Feature-Test-Macros.html
+   set (HDF_EXTRA_C_FLAGS -D_POSIX_C_SOURCE=199506L)
+   # _BSD_SOURCE deprecated in GLIBC >= 2.20
++  if(NOT CMAKE_CROSSCOMPILING)
+   TRY_RUN (HAVE_DEFAULT_SOURCE_RUN HAVE_DEFAULT_SOURCE_COMPILE
+         ${CMAKE_BINARY_DIR}
+         ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+         CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=-DHAVE_DEFAULT_SOURCE
+         OUTPUT_VARIABLE OUTPUT
+     )
++  else(NOT CMAKE_CROSSCOMPILING)
++    set(HAVE_DEFAULT_SOURCE_RUN 1)
++    set(HAVE_DEFAULT_SOURCE_COMPILE TRUE)
++  endif(NOT CMAKE_CROSSCOMPILING)
+   if (HAVE_DEFAULT_SOURCE_COMPILE AND HAVE_DEFAULT_SOURCE_RUN)
+     set (HDF_EXTRA_FLAGS -D_DEFAULT_SOURCE)
+   else (HAVE_DEFAULT_SOURCE_COMPILE AND HAVE_DEFAULT_SOURCE_RUN)
+@@ -287,12 +292,17 @@
+   option (HDF_ENABLE_LARGE_FILE "Enable support for large (64-bit) files on Linux." ON)
+   if (HDF_ENABLE_LARGE_FILE)
+     set (msg "Performing TEST_LFS_WORKS")
++    if(NOT CMAKE_CROSSCOMPILING)
+     TRY_RUN (TEST_LFS_WORKS_RUN   TEST_LFS_WORKS_COMPILE
+         ${CMAKE_BINARY_DIR}
+         ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+         CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=-DTEST_LFS_WORKS
+         OUTPUT_VARIABLE OUTPUT
+     )
++    else(NOT CMAKE_CROSSCOMPILING)
++      set(TEST_LFS_WORKS_RUN 0)
++      set(TEST_LFS_WORKS_COMPILE TRUE)
++    endif(NOT CMAKE_CROSSCOMPILING)
+     if (TEST_LFS_WORKS_COMPILE)
+       if (TEST_LFS_WORKS_RUN  MATCHES 0)
+         set (TEST_LFS_WORKS 1 CACHE INTERNAL ${msg})
+@@ -702,7 +712,8 @@
+   set (CURRENT_TEST_DEFINITIONS "-DPRINTF_LL_WIDTH")
+   if (${HDF_PREFIX}_SIZEOF_LONG_LONG)
+     set (CURRENT_TEST_DEFINITIONS "${CURRENT_TEST_DEFINITIONS} -DHAVE_LONG_LONG")
+   endif (${HDF_PREFIX}_SIZEOF_LONG_LONG)
++  if(NOT CMAKE_CROSSCOMPILING)
+   TRY_RUN (${HDF_PREFIX}_PRINTF_LL_TEST_RUN   ${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE
+       ${CMAKE_BINARY_DIR}
+       ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+@@ -722,6 +733,13 @@
+         "Test ${HDF_PREFIX}_PRINTF_LL_WIDTH failed with the following output:\n ${OUTPUT}\n"
+     )
+   endif (${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE)
++  else(NOT CMAKE_CROSSCOMPILING)
++    set (${HDF_PREFIX}_PRINTF_LL_TEST_RUN 1)
++    set (${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE 1)
++    set (${HDF_PREFIX}_PRINTF_LL_WIDTH "\"L\"")
++    set (${HDF_PREFIX}_PRINTF_LL "L")
++    set (PRINT_LL_FOUND 1)
++  endif(NOT CMAKE_CROSSCOMPILING)
+ 
+   if (PRINT_LL_FOUND)
+     message (STATUS "Checking for appropriate format for 64 bit long: found ${${HDF_PREFIX}_PRINTF_LL_WIDTH}")
+diff -r -u a/src/CMakeLists.txt b/src/CMakeLists.txt
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -616,6 +616,7 @@
+   target_link_libraries (H5detect "ws2_32.lib")
+ endif (MSVC OR MINGW)
+ 
++if (NOT CMAKE_CROSSCOMPILING)
+ set (CMD $<TARGET_FILE:H5detect>)
+ add_custom_command (
+     OUTPUT ${HDF5_BINARY_DIR}/H5Tinit.c
+@@ -623,6 +624,7 @@
+     ARGS > ${HDF5_BINARY_DIR}/H5Tinit.c
+     DEPENDS H5detect
+ )
++endif (NOT CMAKE_CROSSCOMPILING)
+ 
+ add_executable (H5make_libsettings ${HDF5_SRC_DIR}/H5make_libsettings.c)
+ TARGET_C_PROPERTIES (H5make_libsettings STATIC " " " ")
+@@ -631,6 +633,10 @@
+ endif (MSVC OR MINGW)
+ 
+ set (CMD $<TARGET_FILE:H5make_libsettings>)
++# for cross compile
++if (DEFINED ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
++  set(CMD $ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
++endif (DEFINED ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
+ add_custom_command (
+     OUTPUT ${HDF5_BINARY_DIR}/H5lib_settings.c
+     COMMAND ${CMD}
diff --git a/infra/cmake/packages/HDF5SourceConfig.cmake b/infra/cmake/packages/HDF5SourceConfig.cmake
index 134efa6f4..3440dbd20 100644
--- a/infra/cmake/packages/HDF5SourceConfig.cmake
+++ b/infra/cmake/packages/HDF5SourceConfig.cmake
@@ -7,9 +7,11 @@ function(_HDF5Source_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(HDF5_URL https://github.com/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(HDF5_URL ${EXTERNAL_DOWNLOAD_SERVER}/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz)
 
-  ExternalSource_Download(HDF5 ${HDF5_URL})
+  ExternalSource_Download(HDF5 ${HDF5_URL}
+                          PATCH ${CMAKE_CURRENT_LIST_DIR}/HDF5Source.patch)
 
   set(HDF5Source_DIR ${HDF5_SOURCE_DIR} PARENT_SCOPE)
   set(HDF5Source_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/JsoncppConfig.cmake b/infra/cmake/packages/JsoncppConfig.cmake
new file mode 100644
index 000000000..3c5c3e78a
--- /dev/null
+++ b/infra/cmake/packages/JsoncppConfig.cmake
@@ -0,0 +1,34 @@
+function(_Jsoncpp_import)
+  nnas_find_package(JsoncppSource QUIET)
+
+  if(NOT JsoncppSource_FOUND)
+    set(Jsoncpp_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT JsoncppSource_FOUND)
+
+  nnas_include(ExternalBuildTools)
+  ExternalBuild_CMake(CMAKE_DIR   ${JsoncppSource_DIR}
+                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/JSONCPP/build
+                      INSTALL_DIR ${EXT_OVERLAY_DIR}
+                      IDENTIFIER  "1.9.5"
+                      PKG_NAME    "JSONCPP"
+                      EXTRA_OPTS "-DBUILD_STATIC_LIBS=ON"
+                                 "-DBUILD_SHARED_LIBS=OFF"
+                                 "-DJSONCPP_WITH_TESTS=OFF"
+                                 "-DJSONCPP_WITH_POST_BUILD_UNITTEST=OFF")
+
+  find_path(Jsoncpp_INCLUDE_DIRS
+            NAMES json.h
+            PATHS ${EXT_OVERLAY_DIR}
+            NO_CMAKE_FIND_ROOT_PATH
+            PATH_SUFFIXES include/json)
+  find_file(Jsoncpp_STATIC_LIB
+            NAMES libjsoncpp.a
+            PATHS ${EXT_OVERLAY_DIR}
+            NO_CMAKE_FIND_ROOT_PATH
+            PATH_SUFFIXES lib)
+
+  set(Jsoncpp_FOUND TRUE PARENT_SCOPE)
+endfunction(_Jsoncpp_import)
+
+_Jsoncpp_import()
diff --git a/infra/cmake/packages/JsoncppSourceConfig.cmake b/infra/cmake/packages/JsoncppSourceConfig.cmake
new file mode 100644
index 000000000..8d672854b
--- /dev/null
+++ b/infra/cmake/packages/JsoncppSourceConfig.cmake
@@ -0,0 +1,19 @@
+function(_JsoncppSource_import)
+  if(NOT DOWNLOAD_JSONCPP)
+    set(JsoncppSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_JSONCPP)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(JSONCPP_URL ${EXTERNAL_DOWNLOAD_SERVER}/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz)
+
+  ExternalSource_Download(JSONCPP ${JSONCPP_URL})
+
+  set(JsoncppSource_DIR ${JSONCPP_SOURCE_DIR} PARENT_SCOPE)
+  set(JsoncppSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_JsoncppSource_import)
+
+_JsoncppSource_import()
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
new file mode 100644
index 000000000..e55647da8
--- /dev/null
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
@@ -0,0 +1,16 @@
+function(_MbedOSSource_import)
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(MBEDOS_6_15_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
+  set(MBEDOS_6_15_SHA256 529b04c41f3020ed8a62f12d47f2d3de87e1b07fb13708534534a587f7ea048e)
+
+  ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL}
+          CHECKSUM "SHA256=${MBEDOS_6_15_SHA256}")
+
+  set(MbedOSSource_DIR ${MBEDOS_SOURCE_DIR} PARENT_SCOPE)
+  set(MbedOSSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_MbedOSSource_import)
+
+_MbedOSSource_import()
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake
new file mode 100644
index 000000000..acdd54ad6
--- /dev/null
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "6.15")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/NEON2SSESourceConfig.cmake b/infra/cmake/packages/NEON2SSESourceConfig.cmake
index 5970ec73e..82c71e2a8 100644
--- a/infra/cmake/packages/NEON2SSESourceConfig.cmake
+++ b/infra/cmake/packages/NEON2SSESourceConfig.cmake
@@ -7,12 +7,13 @@ function(_NEON2SSESource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  # NOTE TensorFlow 1.12 downloads NEON2SSE from the following URL
   # NOTE TensorFlow 1.13.1 downloads NEON2SSE from the following URL
-  # NOTE TensorFlow 2.2 downloads NEON2SSE from the following URL
-  envoption(NEON2SSE_1_12_URL https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz)
+  # NOTE TensorFlow 2.8.0 downloads NEON2SSE from the following URL
+  # NOTE commit c12f8932c3be5aebaf35562d699f645686c4e2c3 will resolve build fail on debug build
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(NEON2SSE_URL ${EXTERNAL_DOWNLOAD_SERVER}/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz)
 
-  ExternalSource_Download(NEON2SSE ${NEON2SSE_1_12_URL})
+  ExternalSource_Download(NEON2SSE ${NEON2SSE_URL})
 
   set(NEON2SSESource_DIR ${NEON2SSE_SOURCE_DIR} PARENT_SCOPE)
   set(NEON2SSESource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/NoniusSourceConfig.cmake b/infra/cmake/packages/NoniusSourceConfig.cmake
index 0af23ef0e..17965f1eb 100644
--- a/infra/cmake/packages/NoniusSourceConfig.cmake
+++ b/infra/cmake/packages/NoniusSourceConfig.cmake
@@ -20,7 +20,7 @@ function(_NoniusSource_import)
   endif(BUILD_KBENCHMARK)
 
   set(NoniusSource_DIR ${NONIUS_SOURCE_DIR} PARENT_SCOPE)
-  set(NoniusSource_FOUND ${NONIUS_SOURCE_GET} PARENT_SCOPE)
+  set(NoniusSource_FOUND TRUE PARENT_SCOPE)
 endfunction(_NoniusSource_import)
 
 _NoniusSource_import()
diff --git a/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake b/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
index c9fb5e490..fe21f6d3d 100644
--- a/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
+++ b/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_ONNXSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(ONNX_1_4_1_URL https://github.com/onnx/onnx/archive/v1.4.1.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(ONNX_1_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/onnx/onnx/archive/v1.4.1.zip)
 
   ExternalSource_Download(ONNX DIRNAME ONNX-1.4.1
                                CHECKSUM MD5=604b43a22fbc758f32ae9f3a4fb9d397
diff --git a/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake b/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
index ef903f834..b2ad08b90 100644
--- a/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
+++ b/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_ONNXSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(ONNX_1_6_0_URL https://github.com/onnx/onnx/archive/v1.6.0.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(ONNX_1_6_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/onnx/onnx/archive/v1.6.0.zip)
 
   ExternalSource_Download(ONNX DIRNAME ONNX-1.6.0
                                CHECKSUM MD5=cbdc547a527f1b59c7f066c8d258b966
diff --git a/infra/cmake/packages/OouraFFTSourceConfig.cmake b/infra/cmake/packages/OouraFFTSourceConfig.cmake
new file mode 100644
index 000000000..d84b5b20f
--- /dev/null
+++ b/infra/cmake/packages/OouraFFTSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_OouraFFTSource_import)
+  if(NOT DOWNLOAD_OOURAFFT)
+    set(OouraFFTSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_OOURAFFT)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # NOTE TensorFlow 2.3 downloads OOURAFFT from the following URL
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(OOURAFFT_URL ${EXTERNAL_DOWNLOAD_SERVER}/petewarden/OouraFFT/archive/v1.0.tar.gz)
+
+  ExternalSource_Download(OOURAFFT ${OOURAFFT_URL})
+
+  set(OouraFFTSource_DIR ${OOURAFFT_SOURCE_DIR} PARENT_SCOPE)
+  set(OouraFFTSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_OouraFFTSource_import)
+
+_OouraFFTSource_import()
diff --git a/infra/cmake/packages/Opencl_HeadersConfig.cmake b/infra/cmake/packages/Opencl_HeadersConfig.cmake
new file mode 100644
index 000000000..ec7c65a73
--- /dev/null
+++ b/infra/cmake/packages/Opencl_HeadersConfig.cmake
@@ -0,0 +1,27 @@
+function(_Opencl_Headers_import)
+  nnas_find_package(Opencl_HeadersSource QUIET)
+
+  # NOTE This line prevents multiple definitions of target
+  if(TARGET OpenCL_Headers)
+    set(Opencl_HeadersSource_DIR ${Opencl_HeadersSource_DIR} PARENT_SCOPE)
+    set(Opencl_Headers_FOUND TRUE PARENT_SCOPE)
+    return()
+  endif(TARGET OpenCL_Headers)
+
+  if(NOT Opencl_HeadersSource_FOUND)
+    message(STATUS "Opencl_Headers: Source not found")
+    set(Opencl_Headers_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT Opencl_HeadersSource_FOUND)
+
+  # We don't need test builds and installs, we only need headers.
+  # add_extdirectory("${Opencl_HeadersSource_DIR}" OPENCL_HEADERS EXCLUDE_FROM_ALL)
+
+  add_library(OpenCL_Headers INTERFACE)
+  target_include_directories(OpenCL_Headers INTERFACE ${Opencl_HeadersSource_DIR})
+
+  set(Opencl_Headers_DIR ${Opencl_HeadersSource_DIR} PARENT_SCOPE)
+  set(Opencl_Headers_FOUND TRUE PARENT_SCOPE)
+endfunction(_Opencl_Headers_import)
+
+_Opencl_Headers_import()
diff --git a/infra/cmake/packages/Opencl_HeadersSourceConfig.cmake b/infra/cmake/packages/Opencl_HeadersSourceConfig.cmake
new file mode 100644
index 000000000..04858aa41
--- /dev/null
+++ b/infra/cmake/packages/Opencl_HeadersSourceConfig.cmake
@@ -0,0 +1,22 @@
+function(_Opencl_HeadersSource_import)
+  if(NOT DOWNLOAD_OPENCL_HEADERS)
+    set(Opencl_HeadersSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_OPENCL_HEADERS)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(OPENCL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/OpenCL-Headers/archive/v2021.04.29.tar.gz)
+
+  ExternalSource_Download(OPENCL_HEADERS
+    DIRNAME OPENCL_HEADERS
+    URL ${OPENCL_HEADERS_URL}
+    CHECKSUM MD5=5a7ea04265119aa76b4ecbd95f258219)
+
+  set(Opencl_HeadersSource_DIR ${OPENCL_HEADERS_SOURCE_DIR} PARENT_SCOPE)
+  set(Opencl_HeadersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_Opencl_HeadersSource_import)
+
+_Opencl_HeadersSource_import()
diff --git a/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake b/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake
new file mode 100644
index 000000000..c5a774a73
--- /dev/null
+++ b/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_Opengl_HeadersSource_import)
+  if(NOT DOWNLOAD_OPENGL_HEADERS)
+    set(Opengl_HeadersSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_OPENGL_HEADERS)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(OPENGL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/OpenGL-Registry/archive/0cb0880d91581d34f96899c86fc1bf35627b4b81.zip)
+
+  ExternalSource_Download(OPENGL_HEADERS
+    DIRNAME OPENGL_HEADERS
+    URL ${OPENGL_HEADERS_URL})
+
+  set(Opengl_HeadersSource_DIR ${OPENGL_HEADERS_SOURCE_DIR} PARENT_SCOPE)
+  set(Opengl_HeadersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_Opengl_HeadersSource_import)
+
+_Opengl_HeadersSource_import()
diff --git a/infra/cmake/packages/ProtobufConfig.cmake b/infra/cmake/packages/ProtobufConfig.cmake
index 3c8d2320f..f8e9ff1f9 100644
--- a/infra/cmake/packages/ProtobufConfig.cmake
+++ b/infra/cmake/packages/ProtobufConfig.cmake
@@ -51,17 +51,34 @@ function(_Protobuf_build)
     return()
   endif(NOT ProtobufSource_FOUND)
 
+  # set 'EXTERNAL_JS_EMBED' environment variable
+  if(NOT DEFINED ENV{EXTERNAL_JS_EMBED})
+    if(DEFINED ENV{BUILD_HOST_EXEC})
+      set(EXTERNAL_JS_EMBED $ENV{BUILD_HOST_EXEC}/externals/PROTOBUF/build/js_embed)
+      set(ENV{EXTERNAL_JS_EMBED} ${EXTERNAL_JS_EMBED})
+    endif(DEFINED ENV{BUILD_HOST_EXEC})
+  endif(NOT DEFINED ENV{EXTERNAL_JS_EMBED})
+
   nnas_include(ExternalBuildTools)
   ExternalBuild_CMake(CMAKE_DIR   ${ProtobufSource_DIR}/cmake
                       BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/PROTOBUF/build
                       INSTALL_DIR ${EXT_OVERLAY_DIR}
                       BUILD_FLAGS -fPIC
                       EXTRA_OPTS  -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_WITH_ZLIB=OFF
-                      IDENTIFIER  "3.5.2-fix1"
+                      IDENTIFIER  "3.5.2-fix2"
                       PKG_NAME    "PROTOBUF")
 
 endfunction(_Protobuf_build)
 
+set(PROTOC_PATH $<TARGET_FILE:protobuf::protoc>)
+
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  set(PROTOC_PATH $ENV{BUILD_HOST_EXEC}/overlay/bin/protoc)
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+if(DEFINED ENV{EXTERNAL_PROTOC})
+  set(PROTOC_PATH $ENV{EXTERNAL_PROTOC})
+endif(DEFINED ENV{EXTERNAL_PROTOC})
+
 _Protobuf_build()
 
 if(USE_PROTOBUF_LEGACY_IMPORT)
@@ -96,7 +113,7 @@ if(Protobuf_FOUND)
 
     add_custom_command(OUTPUT ${OUTPUT_FILES}
                        COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:protobuf::protoc>" --cpp_out "${abs_output_dir}" -I "${abs_proto_dir}" ${PROTO_FILES}
+                       COMMAND "${PROTOC_PATH}" --cpp_out "${abs_output_dir}" -I "${abs_proto_dir}" ${PROTO_FILES}
                        DEPENDS ${PROTO_FILES})
 
     set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
diff --git a/infra/cmake/packages/ProtobufSource.patch b/infra/cmake/packages/ProtobufSource.patch
new file mode 100644
index 000000000..9a83a80e4
--- /dev/null
+++ b/infra/cmake/packages/ProtobufSource.patch
@@ -0,0 +1,18 @@
+--- a/cmake/libprotoc.cmake
++++ b/cmake/libprotoc.cmake
+@@ -209,10 +209,14 @@
+   ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types/timestamp.js
+ )
+ add_executable(js_embed ${protobuf_source_dir}/src/google/protobuf/compiler/js/embed.cc)
++set(JS_EMBED_EXEC "js_embed")
++if(DEFINED ENV{EXTERNAL_JS_EMBED})
++  set(JS_EMBED_EXEC "$ENV{EXTERNAL_JS_EMBED}")
++endif()
+ add_custom_command(
+   OUTPUT ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
+   DEPENDS js_embed ${js_well_known_types_sources}
+-  COMMAND js_embed ${js_well_known_types_sources} > ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
++  COMMAND ${JS_EMBED_EXEC} ${js_well_known_types_sources} > ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
+ )
+
+ add_library(libprotoc ${protobuf_SHARED_OR_STATIC}
diff --git a/infra/cmake/packages/ProtobufSourceConfig.cmake b/infra/cmake/packages/ProtobufSourceConfig.cmake
index 6b35ae7dc..a1704e53d 100644
--- a/infra/cmake/packages/ProtobufSourceConfig.cmake
+++ b/infra/cmake/packages/ProtobufSourceConfig.cmake
@@ -7,9 +7,11 @@ function(_ProtobufSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(PROTOBUF_URL https://github.com/protocolbuffers/protobuf/archive/v3.5.2.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(PROTOBUF_URL ${EXTERNAL_DOWNLOAD_SERVER}/protocolbuffers/protobuf/archive/v3.5.2.tar.gz)
 
-  ExternalSource_Download(PROTOBUF ${PROTOBUF_URL})
+  ExternalSource_Download(PROTOBUF ${PROTOBUF_URL}
+                          PATCH ${CMAKE_CURRENT_LIST_DIR}/ProtobufSource.patch)
 
   set(ProtobufSource_DIR ${PROTOBUF_SOURCE_DIR} PARENT_SCOPE)
   set(ProtobufSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/PsimdSourceConfig.cmake b/infra/cmake/packages/PsimdSourceConfig.cmake
new file mode 100644
index 000000000..1da5cdc5e
--- /dev/null
+++ b/infra/cmake/packages/PsimdSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_PsimdSource_import)
+  if(NOT ${DOWNLOAD_PSIMD})
+    set(PsimdSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_PSIMD})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  # psimd commit in xnnpack 8b283aa30a31
+  envoption(PSIMD_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.tar.gz)
+  ExternalSource_Download(PSIMD
+    DIRNAME PSIMD
+    URL ${PSIMD_URL})
+
+  set(PsimdSource_DIR ${PSIMD_SOURCE_DIR} PARENT_SCOPE)
+  set(PsimdSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_PsimdSource_import)
+
+_PsimdSource_import()
diff --git a/infra/cmake/packages/PthreadpoolSourceConfig.cmake b/infra/cmake/packages/PthreadpoolSourceConfig.cmake
new file mode 100644
index 000000000..4e1910a84
--- /dev/null
+++ b/infra/cmake/packages/PthreadpoolSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_PthreadpoolSource_import)
+  if(NOT ${DOWNLOAD_PTHREADPOOL})
+    set(PthreadpoolSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_PTHREADPOOL})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  # pthreadpool commit in xnnpack 8b283aa30a31
+  envoption(PTHREADPOOL_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/pthreadpool/archive/029c88620802e1361ccf41d1970bd5b07fd6b7bb.tar.gz)
+  ExternalSource_Download(PTHREADPOOL
+    DIRNAME PTHREADPOOL
+    URL ${PTHREADPOOL_URL})
+
+  set(PthreadpoolSource_DIR ${PTHREADPOOL_SOURCE_DIR} PARENT_SCOPE)
+  set(PthreadpoolSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_PthreadpoolSource_import)
+
+_PthreadpoolSource_import()
diff --git a/infra/cmake/packages/Pybind11SourceConfig.cmake b/infra/cmake/packages/Pybind11SourceConfig.cmake
index 76f51e4d3..2f6425355 100644
--- a/infra/cmake/packages/Pybind11SourceConfig.cmake
+++ b/infra/cmake/packages/Pybind11SourceConfig.cmake
@@ -7,7 +7,8 @@ function(_Pybind11Source_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(PYBIND11_URL https://github.com/pybind/pybind11/archive/v2.5.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(PYBIND11_URL ${EXTERNAL_DOWNLOAD_SERVER}/pybind/pybind11/archive/v2.5.0.tar.gz)
 
   ExternalSource_Download(PYBIND11 ${PYBIND11_URL})
 
diff --git a/infra/cmake/packages/PytorchSourceConfig.cmake b/infra/cmake/packages/PytorchSourceConfig.cmake
index 0212f2f4b..94757f865 100644
--- a/infra/cmake/packages/PytorchSourceConfig.cmake
+++ b/infra/cmake/packages/PytorchSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_PytorchSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(PYTORCH_URL https://github.com/pytorch/pytorch/archive/v0.4.1.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(PYTORCH_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/pytorch/archive/v0.4.1.tar.gz)
 
   ExternalSource_Download(PYTORCH ${PYTORCH_URL})
 
diff --git a/infra/cmake/packages/RuySourceConfig.cmake b/infra/cmake/packages/RuySourceConfig.cmake
new file mode 100644
index 000000000..4faf0bb9f
--- /dev/null
+++ b/infra/cmake/packages/RuySourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_RuySource_import)
+  if(NOT ${DOWNLOAD_RUY})
+    set(RuySource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_RUY})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # NOTE Downloads ruy source used by tensorflow v2.3.0
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.tar.gz)
+  ExternalSource_Download(RUY
+    DIRNAME RUY
+    URL ${RUY_URL})
+
+  set(RuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+  set(RuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_RuySource_import)
+
+_RuySource_import()
diff --git a/infra/cmake/packages/TensorFlow-1.13/TensorFlowConfig.cmake b/infra/cmake/packages/TensorFlow-1.13/TensorFlowConfig.cmake
new file mode 100644
index 000000000..8fedc9537
--- /dev/null
+++ b/infra/cmake/packages/TensorFlow-1.13/TensorFlowConfig.cmake
@@ -0,0 +1,56 @@
+set(TENSORFLOW_PREFIX "/usr" CACHE PATH "The location of pre-installed TensorFlow 1.13 library")
+set(TENSORFLOW_VERSION_REQUIRED "1.13")
+
+# TODO Build TensorFlow from the (downloaded) source
+
+function(_TensorFlow_import)
+  # Clean cache
+  unset(TensorFlow_LIB CACHE)
+  unset(TensorFlow_INCLUDE_DIR CACHE)
+  # Find the header & lib
+  find_library(TensorFlow_LIB NAMES tensorflow PATHS "${TENSORFLOW_PREFIX}/lib")
+  find_path(TensorFlow_INCLUDE_DIR NAMES tensorflow/c/c_api.h PATHS "${TENSORFLOW_PREFIX}/include")
+
+  if(NOT TensorFlow_LIB OR NOT TensorFlow_INCLUDE_DIR)
+    message(STATUS "Found TensorFlow: FALSE")
+
+    set(TensorFlow_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT TensorFlow_LIB OR NOT TensorFlow_INCLUDE_DIR)
+
+  # Check TensorFlow version
+  try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
+    ${CMAKE_BINARY_DIR}
+    ${CMAKE_CURRENT_LIST_DIR}/TensorFlowVersionChecker.c
+    COMPILE_DEFINITIONS -I${TensorFlow_INCLUDE_DIR}
+    LINK_LIBRARIES ${TensorFlow_LIB}
+    ARGS ${TENSORFLOW_VERSION_REQUIRED})
+
+  if(NOT COMPILE_RESULT_VAR)
+    message(STATUS "Failed to build TensorFlowVersionChecker. Your libtensorflow may be built on different version of Ubuntu.")
+    message(STATUS "Found TensorFlow: FALSE")
+    set(TensorFlow_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT COMPILE_RESULT_VAR)
+
+  if(NOT RUN_RESULT_VAR EQUAL 0)
+    message(STATUS "you need tensorflow version ${TENSORFLOW_VERSION_REQUIRED}")
+    message(STATUS "Found TensorFlow: FALSE")
+    set(TensorFlow_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT RUN_RESULT_VAR EQUAL 0)
+
+  # Add tensorflow target (if necessary)
+  if(NOT TARGET tensorflow-1.13)
+    message(STATUS "Found TensorFlow (include: ${TensorFlow_INCLUDE_DIR}, library: ${TensorFlow_LIB})")
+
+    # NOTE IMPORTED target may be more appropriate for this case
+    add_library(tensorflow-1.13 INTERFACE)
+    target_link_libraries(tensorflow-1.13 INTERFACE ${TensorFlow_LIB})
+    target_include_directories(tensorflow-1.13 INTERFACE ${TensorFlow_INCLUDE_DIR})
+  endif(NOT TARGET tensorflow-1.13)
+
+  set(TensorFlow_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlow_import)
+
+_TensorFlow_import()
diff --git a/infra/cmake/packages/TensorFlow-1.13/TensorFlowConfigVersion.cmake b/infra/cmake/packages/TensorFlow-1.13/TensorFlowConfigVersion.cmake
new file mode 100644
index 000000000..b5a37ddba
--- /dev/null
+++ b/infra/cmake/packages/TensorFlow-1.13/TensorFlowConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "1.13")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlow-1.13/TensorFlowVersionChecker.c b/infra/cmake/packages/TensorFlow-1.13/TensorFlowVersionChecker.c
new file mode 100644
index 000000000..fcd6be122
--- /dev/null
+++ b/infra/cmake/packages/TensorFlow-1.13/TensorFlowVersionChecker.c
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <tensorflow/c/c_api.h>
+
+int main(int argc, char **argv)
+{
+  if (argc >= 2 && !strncmp(argv[1], TF_Version(), 4))
+    return 0;
+  return 255;
+}
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
index f84675596..8120ebca2 100644
--- a/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowEigenSource_import)
 
   # Exact version used by TensorFlow v2.1.0.
   # See tensorflow/tensorflow/workspace.bzl.
-  envoption(TENSORFLOW_2_1_0_EIGEN_URL https://gitlab.com/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+  envoption(TENSORFLOW_2_1_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz)
 
   ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.1.0-EIGEN ${TENSORFLOW_2_1_0_EIGEN_URL})
 
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.3.0-rc0Config.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.3.0-rc0Config.cmake
deleted file mode 100644
index 207f7b5bd..000000000
--- a/infra/cmake/packages/TensorFlowEigenSource-2.3.0-rc0Config.cmake
+++ /dev/null
@@ -1,23 +0,0 @@
-function(_TensorFlowEigenSource_import)
-  if(NOT DOWNLOAD_EIGEN)
-    set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT DOWNLOAD_EIGEN)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # NOTE TensorFlow 2.3.0-rc0 uses the following URL
-  envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
-  envoption(TENSORFLOW_2_3_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz)
-
-  ExternalSource_Download(EIGEN
-    DIRNAME TENSORFLOW-2.3.0-EIGEN
-    URL ${TENSORFLOW_2_3_0_EIGEN_URL}
-)
-
-  set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
-  set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_TensorFlowEigenSource_import)
-
-_TensorFlowEigenSource_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake
new file mode 100644
index 000000000..a9ec75d34
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowEigenSource_import)
+  if(NOT DOWNLOAD_EIGEN)
+    set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_EIGEN)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.6.0.
+  # See tensorflow/third_party/eigen3/workspace.bzl.
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+  envoption(TENSORFLOW_2_6_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/12e8d57108c50d8a63605c6eb0144c838c128337/eigen-12e8d57108c50d8a63605c6eb0144c838c128337.tar.gz)
+
+  ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.6.0-EIGEN ${TENSORFLOW_2_6_0_EIGEN_URL})
+
+  set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowEigenSource_import)
+
+_TensorFlowEigenSource_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake
new file mode 100644
index 000000000..38ad0aa31
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake
new file mode 100644
index 000000000..6f59f0771
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowEigenSource_import)
+  if(NOT DOWNLOAD_EIGEN)
+    set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_EIGEN)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.8.0.
+  # See tensorflow/third_party/eigen3/workspace.bzl.
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+  envoption(TENSORFLOW_2_8_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/008ff3483a8c5604639e1c4d204eae30ad737af6/eigen-e1dd31ce174c3d26fbe38388f64b09d2adbd7557a59e90e6f545a288cc1755fc.tar.gz)
+
+  ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.8.0-EIGEN ${TENSORFLOW_2_8_0_EIGEN_URL})
+
+  set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowEigenSource_import)
+
+_TensorFlowEigenSource_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake
new file mode 100644
index 000000000..2ad2e241e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
index 035264fa9..421be6c66 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
 
   # Exact version used by TensorFlow v2.1.0.
   # See tensorflow/tensorflow/workspace.bzl.
-  envoption(TENSORFLOW_2_1_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_1_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip)
 
   ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.1.0-GEMMLOWP ${TENSORFLOW_2_1_0_GEMMLOWP_URL})
 
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
index bc13d6227..44c56a6be 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
 
   # Exact version used by TensorFlow v2.3.0.
   # See tensorflow/tensorflow/workspace.bzl.
-  envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
 
   ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.3.0-GEMMLOWP ${TENSORFLOW_2_3_0_GEMMLOWP_URL})
 
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
new file mode 100644
index 000000000..76cdfdd6c
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowGEMMLowpSource_import)
+  if(NOT DOWNLOAD_GEMMLOWP)
+    set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_GEMMLOWP)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.6.0.
+  # See tensorflow/third_party/gemmlowp/workspace.bzl.
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+  ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.6.0-GEMMLOWP ${TENSORFLOW_2_6_0_GEMMLOWP_URL})
+
+  set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
new file mode 100644
index 000000000..38ad0aa31
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
new file mode 100644
index 000000000..3e17490c3
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowGEMMLowpSource_import)
+  if(NOT DOWNLOAD_GEMMLOWP)
+    set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_GEMMLOWP)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.8.0.
+  # See tensorflow/third_party/gemmlowp/workspace.bzl.
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+  ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.8.0-GEMMLOWP ${TENSORFLOW_2_8_0_GEMMLOWP_URL})
+
+  set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
new file mode 100644
index 000000000..2ad2e241e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowLite-1.12/Lite/CMakeLists.txt b/infra/cmake/packages/TensorFlowLite-1.12/Lite/CMakeLists.txt
deleted file mode 100644
index 337d6b24f..000000000
--- a/infra/cmake/packages/TensorFlowLite-1.12/Lite/CMakeLists.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-# NOTE The followings SHOULD be defined before using this CMakeLists.txt
-#
-#  'TensorFlowSource_DIR' variable
-#  'FlatBuffersSource_DIR' variable
-#  'eigen' target
-#  'gemmlowp' target
-#  'neon2sse' target
-#  'farmhash' target
-#  'abseil' target
-#
-message(STATUS "Build TensorFlow Lite from ${TensorFlowSource_DIR}")
-
-set(TensorFlowLiteSource_DIR ${TensorFlowSource_DIR}/tensorflow/contrib/lite)
-
-file(GLOB CORE_SRCS "${TensorFlowLiteSource_DIR}/*.c" "${TensorFlowLiteSource_DIR}/*.cc" "${TensorFlowLiteSource_DIR}/c/*.c" "${TensorFlowLiteSource_DIR}/core/api/*.cc")
-file(GLOB_RECURSE CORE_TESTS "${TensorFlowLiteSource_DIR}/*test*.cc")
-list(REMOVE_ITEM CORE_SRCS ${CORE_TESTS})
-
-file(GLOB_RECURSE KERNEL_SRCS "${TensorFlowLiteSource_DIR}/kernels/*.cc")
-file(GLOB_RECURSE KERNEL_TESTS "${TensorFlowLiteSource_DIR}/kernels/*test*.cc")
-list(REMOVE_ITEM KERNEL_SRCS ${KERNEL_TESTS})
-# Exclude buggy kernel(s) from the build
-#list(REMOVE_ITEM KERNEL_SRCS "${TensorFlowLiteSource_DIR}/kernels/internal/spectrogram.cc")
-
-list(APPEND SRCS ${CORE_SRCS})
-list(APPEND SRCS ${KERNEL_SRCS})
-
-include(CheckCXXCompilerFlag)
-
-CHECK_CXX_COMPILER_FLAG(-Wno-extern-c-compat COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
-
-add_library(tensorflowlite-1.12 ${SRCS})
-set_target_properties(tensorflowlite-1.12 PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(tensorflowlite-1.12 PUBLIC ${TensorFlowSource_DIR})
-target_include_directories(tensorflowlite-1.12 PUBLIC ${FlatBuffersSource_DIR}/include)
-target_compile_options(tensorflowlite-1.12 PUBLIC -Wno-ignored-attributes)
-if(COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
-  target_compile_options(tensorflowlite-1.12 PUBLIC -Wno-extern-c-compat)
-endif(COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
-target_compile_definitions(tensorflowlite-1.12 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK")
-target_link_libraries(tensorflowlite-1.12 eigen-fd6845384b86 gemmlowp neon2sse farmhash abseil dl)
diff --git a/infra/cmake/packages/TensorFlowLite-1.12/TensorFlowLiteConfig.cmake b/infra/cmake/packages/TensorFlowLite-1.12/TensorFlowLiteConfig.cmake
deleted file mode 100644
index ff15d8576..000000000
--- a/infra/cmake/packages/TensorFlowLite-1.12/TensorFlowLiteConfig.cmake
+++ /dev/null
@@ -1,62 +0,0 @@
-function(_TensorFlowLite_import)
-  nnas_find_package(TensorFlowSource EXACT 1.12 QUIET)
-
-  if(NOT TensorFlowSource_FOUND)
-    set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT TensorFlowSource_FOUND)
-
-  # TensorFlow 1.12 downloads FlatBuffers from https://github.com/google/flatbuffers/archive/1f5eae5d6a1.tar.gz
-  #
-  # Let's use 1.10 released in 2018.10 (compatible with 1f5eae5d6a1).
-  nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
-
-  if(NOT FlatBuffersSource_FOUND)
-    set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT FlatBuffersSource_FOUND)
-
-  nnas_find_package(Farmhash QUIET)
-
-  if(NOT Farmhash_FOUND)
-    set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT Farmhash_FOUND)
-
-  nnas_find_package(Eigen-fd6845384b86 QUIET)
-
-  if(NOT Eigen-fd6845384b86_FOUND)
-    set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT Eigen-fd6845384b86_FOUND)
-
-  nnas_find_package(GEMMLowp QUIET)
-
-  if(NOT GEMMLowp_FOUND)
-    set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT GEMMLowp_FOUND)
-
-  nnas_find_package(NEON2SSE QUIET)
-
-  if(NOT NEON2SSE_FOUND)
-    set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT NEON2SSE_FOUND)
-
-  nnas_find_package(Abseil QUIET)
-
-  if(NOT Abseil_FOUND)
-    set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT Abseil_FOUND)
-
-  if(NOT TARGET tensorflowlite-1.12)
-    nnas_include(ExternalProjectTools)
-    add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/Lite" tflite-1.12)
-  endif(NOT TARGET tensorflowlite-1.12)
-
-  set(TensorFlowLite_FOUND TRUE PARENT_SCOPE)
-endfunction(_TensorFlowLite_import)
-
-_TensorFlowLite_import()
diff --git a/infra/cmake/packages/TensorFlowLite-1.12/TensorFlowLiteConfigVersion.cmake b/infra/cmake/packages/TensorFlowLite-1.12/TensorFlowLiteConfigVersion.cmake
deleted file mode 100644
index 4a57b655b..000000000
--- a/infra/cmake/packages/TensorFlowLite-1.12/TensorFlowLiteConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "1.12")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt b/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt
index c35617497..a57d7f4cb 100644
--- a/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt
+++ b/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt
@@ -1,7 +1,9 @@
 # NOTE The followings SHOULD be defined before using this CMakeLists.txt
+# NOTE TensorFlow 1.13.1 uses flatbuffers-1.10
+#      but we use flatbuffers-2.0 to match with all other modules flatbuffers version.
 #
 #  'TensorFlowSource_DIR' variable
-#  'FlatBuffersSource_DIR' variable
+#  'flatbuffers-2.0' target
 #  'eigen' target
 #  'gemmlowp' target
 #  'neon2sse' target
@@ -37,10 +39,9 @@ CHECK_CXX_COMPILER_FLAG(-Wno-extern-c-compat COMPILER_SUPPORT_EXTERN_C_COMPAT_WA
 add_library(tensorflowlite-1.13.1 ${SRCS})
 set_target_properties(tensorflowlite-1.13.1 PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(tensorflowlite-1.13.1 PUBLIC ${TensorFlowSource_DIR})
-target_include_directories(tensorflowlite-1.13.1 PUBLIC ${FlatBuffersSource_DIR}/include)
 target_compile_options(tensorflowlite-1.13.1 PUBLIC -Wno-ignored-attributes)
 if(COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
   target_compile_options(tensorflowlite-1.13.1 PUBLIC -Wno-extern-c-compat)
 endif(COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
 target_compile_definitions(tensorflowlite-1.13.1 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK")
-target_link_libraries(tensorflowlite-1.13.1 eigen gemmlowp neon2sse farmhash abseil dl)
+target_link_libraries(tensorflowlite-1.13.1 flatbuffers-2.0 eigen gemmlowp neon2sse farmhash abseil dl)
diff --git a/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake b/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
index 2c6bd9f7a..ea2065850 100644
--- a/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
+++ b/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
@@ -6,12 +6,12 @@ function(_TensorFlowLite_import)
     return()
   endif(NOT TensorFlowSource_FOUND)
 
-  nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
+  nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
 
-  if(NOT FlatBuffersSource_FOUND)
+  if(NOT FlatBuffers_FOUND)
     set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
     return()
-  endif(NOT FlatBuffersSource_FOUND)
+  endif(NOT FlatBuffers_FOUND)
 
   nnas_find_package(Farmhash QUIET)
 
diff --git a/infra/cmake/packages/TensorFlowProtoText-1.12/TensorFlowProtoTextConfig.cmake b/infra/cmake/packages/TensorFlowProtoText-1.12/TensorFlowProtoTextConfig.cmake
deleted file mode 100644
index 9c7c79679..000000000
--- a/infra/cmake/packages/TensorFlowProtoText-1.12/TensorFlowProtoTextConfig.cmake
+++ /dev/null
@@ -1,104 +0,0 @@
-function(_TensorFlowProtoText_import)
-  macro(require_package PKGNAME)
-    nnas_find_package(${PKGNAME} ${ARGN} QUIET)
-    if(NOT ${PKGNAME}_FOUND)
-      message(STATUS "Found TensorFlowProtoText: FALSE (${PKGNAME} is missing)")
-      set(TensorFlowProtoText_FOUND FALSE PARENT_SCOPE)
-      return()
-    endif(NOT ${PKGNAME}_FOUND)
-  endmacro(require_package)
-
-  require_package(TensorFlowSource EXACT 1.12)
-  require_package(Abseil)
-  require_package(Eigen-fd6845384b86)
-  require_package(Protobuf)
-  require_package(GoogleDoubleConversion)
-  require_package(GoogleNSync)
-
-  if(NOT TARGET tensorflow-prototext-1.12)
-    nnas_include(ExternalProjectTools)
-    add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/build" TensorFlowProtoText-1.12)
-  endif(NOT TARGET tensorflow-prototext-1.12)
-
-  set(TensorFlowProtoText_FOUND TRUE PARENT_SCOPE)
-endfunction(_TensorFlowProtoText_import)
-
-_TensorFlowProtoText_import()
-
-if(TensorFlowProtoText_FOUND)
-  # CMAKE_CURRENT_LIST_DIR
-  #
-  # ... The value has dynamic scope. ... Therefore the value of the variable inside a macro
-  # or function is the directory of the file invoking the bottom-most entry on the call stack,
-  # not the directory of the file containing the macro or function definition.
-  #
-  # Reference: https://cmake.org/cmake/help/v3.1/variable/CMAKE_CURRENT_LIST_DIR.html
-  set(TENSORLFLOW_PROTO_TEXT_1_12_CMAKE_DIR
-    "${CMAKE_CURRENT_LIST_DIR}" CACHE INTERNAL
-    "Where to find make_directories"
-  )
-
-  # Comments from "gen_proto_text_functions.cc"
-  # >
-  # > Main program to take input protos and write output pb_text source files that
-  # > contain generated proto text input and output functions.
-  # >
-  # > Main expects:
-  # > - First argument is output path
-  # > - Second argument is the relative path of the protos to the root. E.g.,
-  # >   for protos built by a rule in tensorflow/core, this will be
-  # >   tensorflow/core.
-  # > - Then any number of source proto file names, plus one source name must be
-  # >   placeholder.txt from this gen tool's package.  placeholder.txt is
-  # >   ignored for proto resolution, but is used to determine the root at which
-  # >   the build tool has placed the source proto files.
-  # >
-  function(ProtoText_Generate PREFIX OUTPUT_DIR)
-    # THIS SHOULD SUCCEED!
-    nnas_find_package(TensorFlowSource EXACT 1.12 REQUIRED)
-
-    set(OUTPUT_REL "tensorflow")
-    set(PROTO_DIR "${TensorFlowSource_DIR}")
-
-    set(PROTO_INPUTS ${ARGN})
-    list(APPEND PROTO_INPUTS "tensorflow/tools/proto_text/placeholder.txt")
-
-    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_proto_dir ${TensorFlowSource_DIR} ABSOLUTE)
-
-    # Let's reset variables before using them
-    # NOTE This DOES NOT AFFECT variables in the parent scope
-    unset(PROTO_FILES)
-    unset(OUTPUT_DIRS)
-    unset(OUTPUT_FILES)
-
-    foreach(proto ${PROTO_INPUTS})
-      get_filename_component(fil "${proto}" NAME)
-      get_filename_component(dir "${proto}" DIRECTORY)
-
-      get_filename_component(fil_we "${fil}" NAME_WE)
-
-      get_filename_component(abs_fil "${abs_proto_base}/${proto}" ABSOLUTE)
-      get_filename_component(abs_dir "${abs_fil}" DIRECTORY)
-
-      list(APPEND PROTO_FILES "${abs_proto_dir}/${proto}")
-
-      if(NOT ${fil} STREQUAL "placeholder.txt")
-        list(APPEND OUTPUT_DIRS "${abs_output_dir}/${dir}")
-        list(APPEND OUTPUT_FILES "${abs_output_dir}/${dir}/${fil_we}.pb_text.h")
-        list(APPEND OUTPUT_FILES "${abs_output_dir}/${dir}/${fil_we}.pb_text-impl.h")
-        list(APPEND OUTPUT_FILES "${abs_output_dir}/${dir}/${fil_we}.pb_text.cc")
-      endif(NOT ${fil} STREQUAL "placeholder.txt")
-    endforeach()
-
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-      # "make_directory" in CMake 3.1 cannot create multiple directories at once.
-      # COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUT_DIRS}
-      COMMAND "${TENSORLFLOW_PROTO_TEXT_1_12_CMAKE_DIR}/make_directories.sh" ${OUTPUT_DIRS}
-      COMMAND "$<TARGET_FILE:tensorflow-prototext-1.12>" "${abs_output_dir}/${OUTPUT_REL}" "${OUTPUT_REL}" ${PROTO_FILES}
-      DEPENDS ${PROTO_FILES})
-
-    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
-    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
-  endfunction(ProtoText_Generate)
-endif(TensorFlowProtoText_FOUND)
diff --git a/infra/cmake/packages/TensorFlowProtoText-1.12/TensorFlowProtoTextConfigVersion.cmake b/infra/cmake/packages/TensorFlowProtoText-1.12/TensorFlowProtoTextConfigVersion.cmake
deleted file mode 100644
index 4a57b655b..000000000
--- a/infra/cmake/packages/TensorFlowProtoText-1.12/TensorFlowProtoTextConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "1.12")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowProtoText-1.12/build/CMakeLists.txt b/infra/cmake/packages/TensorFlowProtoText-1.12/build/CMakeLists.txt
deleted file mode 100644
index ac8e43b7a..000000000
--- a/infra/cmake/packages/TensorFlowProtoText-1.12/build/CMakeLists.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-message(STATUS "Build TensorFlowProtoText from '${TensorFlowSource_DIR}'")
-
-#
-# Build "proto_text" tool
-#
-unset(SOURCE_FILES)
-
-macro(Source_Add RPATH)
-  list(APPEND SOURCE_FILES "${TensorFlowSource_DIR}/${RPATH}")
-endmacro(Source_Add)
-
-# This list comes from "tensorflow/contrib/makefile/proto_text_cc_files.txt"
-Source_Add(tensorflow/core/lib/core/status.cc)
-Source_Add(tensorflow/core/lib/core/threadpool.cc)
-Source_Add(tensorflow/core/lib/hash/hash.cc)
-Source_Add(tensorflow/core/lib/io/inputstream_interface.cc)
-Source_Add(tensorflow/core/lib/io/random_inputstream.cc)
-Source_Add(tensorflow/core/lib/io/buffered_inputstream.cc)
-Source_Add(tensorflow/core/lib/io/inputbuffer.cc)
-Source_Add(tensorflow/core/lib/io/iterator.cc)
-Source_Add(tensorflow/core/lib/io/path.cc)
-Source_Add(tensorflow/core/lib/strings/numbers.cc)
-Source_Add(tensorflow/core/lib/strings/scanner.cc)
-Source_Add(tensorflow/core/lib/strings/str_util.cc)
-Source_Add(tensorflow/core/lib/strings/strcat.cc)
-Source_Add(tensorflow/core/lib/strings/stringprintf.cc)
-Source_Add(tensorflow/core/lib/strings/proto_text_util.cc)
-Source_Add(tensorflow/core/platform/cpu_info.cc)
-Source_Add(tensorflow/core/platform/denormal.cc)
-Source_Add(tensorflow/core/platform/env.cc)
-Source_Add(tensorflow/core/platform/env_time.cc)
-Source_Add(tensorflow/core/platform/file_system.cc)
-Source_Add(tensorflow/core/platform/file_system_helper.cc)
-Source_Add(tensorflow/core/platform/protobuf_util.cc)
-Source_Add(tensorflow/core/platform/setround.cc)
-Source_Add(tensorflow/core/platform/tracing.cc)
-Source_Add(tensorflow/core/platform/posix/env.cc)
-Source_Add(tensorflow/core/platform/posix/env_time.cc)
-Source_Add(tensorflow/core/platform/posix/error.cc)
-Source_Add(tensorflow/core/platform/posix/load_library.cc)
-Source_Add(tensorflow/core/platform/posix/port.cc)
-Source_Add(tensorflow/core/platform/posix/posix_file_system.cc)
-Source_Add(tensorflow/core/platform/default/logging.cc)
-Source_Add(tensorflow/core/platform/default/mutex.cc)
-Source_Add(tensorflow/core/platform/default/protobuf.cc)
-
-Source_Add(tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc)
-Source_Add(tensorflow/tools/proto_text/gen_proto_text_functions.cc)
-
-unset(PROTO_FILES)
-
-macro(Proto_Add RPATH)
-  list(APPEND PROTO_FILES "${RPATH}")
-endmacro(Proto_Add)
-
-Proto_Add(tensorflow/core/lib/core/error_codes.proto)
-Proto_Add(tensorflow/core/framework/types.proto)
-Proto_Add(tensorflow/core/framework/tensor.proto)
-Proto_Add(tensorflow/core/framework/tensor_shape.proto)
-Proto_Add(tensorflow/core/framework/summary.proto)
-Proto_Add(tensorflow/core/framework/resource_handle.proto)
-
-Protobuf_Generate(PROTO_TEXT_PROTO
-  "${CMAKE_CURRENT_BINARY_DIR}/generated/proto_text"
-  "${TensorFlowSource_DIR}"
-  ${PROTO_FILES}
-)
-
-add_executable(tensorflow-prototext-1.12 ${SOURCE_FILES} ${PROTO_TEXT_PROTO_SOURCES})
-target_include_directories(tensorflow-prototext-1.12 PRIVATE ${TensorFlowSource_DIR})
-target_include_directories(tensorflow-prototext-1.12 PRIVATE ${PROTO_TEXT_PROTO_INCLUDE_DIRS})
-
-target_link_libraries(tensorflow-prototext-1.12 PRIVATE abseil)
-target_link_libraries(tensorflow-prototext-1.12 PRIVATE eigen-fd6845384b86)
-target_link_libraries(tensorflow-prototext-1.12 PRIVATE ${PROTO_TEXT_PROTO_LIBRARIES})
-target_link_libraries(tensorflow-prototext-1.12 PRIVATE Google::DoubleConversion)
-target_link_libraries(tensorflow-prototext-1.12 PRIVATE Google::NSync)
-target_link_libraries(tensorflow-prototext-1.12 PRIVATE dl)
diff --git a/infra/cmake/packages/TensorFlowProtoText-1.12/make_directories.sh b/infra/cmake/packages/TensorFlowProtoText-1.12/make_directories.sh
deleted file mode 100755
index 1fb2ab683..000000000
--- a/infra/cmake/packages/TensorFlowProtoText-1.12/make_directories.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-while [[ $# -ne 0 ]]; do
-  DIR=$1; shift
-  mkdir -p "${DIR}"
-done
diff --git a/infra/cmake/packages/TensorFlowProtoText-1.13.1/TensorFlowProtoTextConfig.cmake b/infra/cmake/packages/TensorFlowProtoText-1.13.1/TensorFlowProtoTextConfig.cmake
deleted file mode 100644
index 738b28240..000000000
--- a/infra/cmake/packages/TensorFlowProtoText-1.13.1/TensorFlowProtoTextConfig.cmake
+++ /dev/null
@@ -1,104 +0,0 @@
-function(_TensorFlowProtoText_import)
-  macro(require_package PKGNAME)
-    nnas_find_package(${PKGNAME} ${ARGN} QUIET)
-    if(NOT ${PKGNAME}_FOUND)
-      message(STATUS "Found TensorFlowProtoText: FALSE (${PKGNAME} is missing)")
-      set(TensorFlowProtoText_FOUND FALSE PARENT_SCOPE)
-      return()
-    endif(NOT ${PKGNAME}_FOUND)
-  endmacro(require_package)
-
-  require_package(TensorFlowSource EXACT 1.13.1)
-  require_package(Abseil)
-  require_package(Eigen)
-  require_package(Protobuf)
-  require_package(GoogleDoubleConversion)
-  require_package(GoogleNSync)
-
-  if(NOT TARGET tensorflow-prototext-1.13.1)
-    nnas_include(ExternalProjectTools)
-    add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/build" TensorFlowProtoText-1.13.1)
-  endif(NOT TARGET tensorflow-prototext-1.13.1)
-
-  set(TensorFlowProtoText_FOUND TRUE PARENT_SCOPE)
-endfunction(_TensorFlowProtoText_import)
-
-_TensorFlowProtoText_import()
-
-if(TensorFlowProtoText_FOUND)
-  # CMAKE_CURRENT_LIST_DIR
-  #
-  # ... The value has dynamic scope. ... Therefore the value of the variable inside a macro
-  # or function is the directory of the file invoking the bottom-most entry on the call stack,
-  # not the directory of the file containing the macro or function definition.
-  #
-  # Reference: https://cmake.org/cmake/help/v3.1/variable/CMAKE_CURRENT_LIST_DIR.html
-  set(TENSORLFLOW_PROTO_TEXT_1_13_1_CMAKE_DIR
-    "${CMAKE_CURRENT_LIST_DIR}" CACHE INTERNAL
-    "Where to find make_directories"
-  )
-
-  # Comments from "gen_proto_text_functions.cc"
-  # >
-  # > Main program to take input protos and write output pb_text source files that
-  # > contain generated proto text input and output functions.
-  # >
-  # > Main expects:
-  # > - First argument is output path
-  # > - Second argument is the relative path of the protos to the root. E.g.,
-  # >   for protos built by a rule in tensorflow/core, this will be
-  # >   tensorflow/core.
-  # > - Then any number of source proto file names, plus one source name must be
-  # >   placeholder.txt from this gen tool's package.  placeholder.txt is
-  # >   ignored for proto resolution, but is used to determine the root at which
-  # >   the build tool has placed the source proto files.
-  # >
-  function(ProtoText_Generate PREFIX OUTPUT_DIR)
-    # THIS SHOULD SUCCEED!
-    nnas_find_package(TensorFlowSource EXACT 1.13.1 REQUIRED)
-
-    set(OUTPUT_REL "tensorflow")
-    set(PROTO_DIR "${TensorFlowSource_DIR}")
-
-    set(PROTO_INPUTS ${ARGN})
-    list(APPEND PROTO_INPUTS "tensorflow/tools/proto_text/placeholder.txt")
-
-    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_proto_dir ${TensorFlowSource_DIR} ABSOLUTE)
-
-    # Let's reset variables before using them
-    # NOTE This DOES NOT AFFECT variables in the parent scope
-    unset(PROTO_FILES)
-    unset(OUTPUT_DIRS)
-    unset(OUTPUT_FILES)
-
-    foreach(proto ${PROTO_INPUTS})
-      get_filename_component(fil "${proto}" NAME)
-      get_filename_component(dir "${proto}" DIRECTORY)
-
-      get_filename_component(fil_we "${fil}" NAME_WE)
-
-      get_filename_component(abs_fil "${abs_proto_base}/${proto}" ABSOLUTE)
-      get_filename_component(abs_dir "${abs_fil}" DIRECTORY)
-
-      list(APPEND PROTO_FILES "${abs_proto_dir}/${proto}")
-
-      if(NOT ${fil} STREQUAL "placeholder.txt")
-        list(APPEND OUTPUT_DIRS "${abs_output_dir}/${dir}")
-        list(APPEND OUTPUT_FILES "${abs_output_dir}/${dir}/${fil_we}.pb_text.h")
-        list(APPEND OUTPUT_FILES "${abs_output_dir}/${dir}/${fil_we}.pb_text-impl.h")
-        list(APPEND OUTPUT_FILES "${abs_output_dir}/${dir}/${fil_we}.pb_text.cc")
-      endif(NOT ${fil} STREQUAL "placeholder.txt")
-    endforeach()
-
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-      # "make_directory" in CMake 3.1 cannot create multiple directories at once.
-      # COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUT_DIRS}
-      COMMAND "${TENSORLFLOW_PROTO_TEXT_1_13_1_CMAKE_DIR}/make_directories.sh" ${OUTPUT_DIRS}
-      COMMAND "$<TARGET_FILE:tensorflow-prototext-1.13.1>" "${abs_output_dir}/${OUTPUT_REL}" "${OUTPUT_REL}" ${PROTO_FILES}
-      DEPENDS ${PROTO_FILES})
-
-    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
-    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
-  endfunction(ProtoText_Generate)
-endif(TensorFlowProtoText_FOUND)
diff --git a/infra/cmake/packages/TensorFlowProtoText-1.13.1/TensorFlowProtoTextConfigVersion.cmake b/infra/cmake/packages/TensorFlowProtoText-1.13.1/TensorFlowProtoTextConfigVersion.cmake
deleted file mode 100644
index ed79ecd91..000000000
--- a/infra/cmake/packages/TensorFlowProtoText-1.13.1/TensorFlowProtoTextConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "1.13.1")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowProtoText-1.13.1/build/CMakeLists.txt b/infra/cmake/packages/TensorFlowProtoText-1.13.1/build/CMakeLists.txt
deleted file mode 100644
index 7079aea03..000000000
--- a/infra/cmake/packages/TensorFlowProtoText-1.13.1/build/CMakeLists.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-message(STATUS "Build TensorFlowProtoText from '${TensorFlowSource_DIR}'")
-
-#
-# Build "proto_text" tool
-#
-unset(SOURCE_FILES)
-
-macro(Source_Add RPATH)
-  list(APPEND SOURCE_FILES "${TensorFlowSource_DIR}/${RPATH}")
-endmacro(Source_Add)
-
-# This list comes from "tensorflow/contrib/makefile/proto_text_cc_files.txt"
-Source_Add(tensorflow/core/lib/core/status.cc)
-Source_Add(tensorflow/core/lib/core/threadpool.cc)
-Source_Add(tensorflow/core/lib/hash/hash.cc)
-Source_Add(tensorflow/core/lib/io/inputstream_interface.cc)
-Source_Add(tensorflow/core/lib/io/random_inputstream.cc)
-Source_Add(tensorflow/core/lib/io/buffered_inputstream.cc)
-Source_Add(tensorflow/core/lib/io/inputbuffer.cc)
-Source_Add(tensorflow/core/lib/io/iterator.cc)
-Source_Add(tensorflow/core/lib/io/path.cc)
-Source_Add(tensorflow/core/lib/strings/numbers.cc)
-Source_Add(tensorflow/core/lib/strings/scanner.cc)
-Source_Add(tensorflow/core/lib/strings/str_util.cc)
-Source_Add(tensorflow/core/lib/strings/strcat.cc)
-Source_Add(tensorflow/core/lib/strings/stringprintf.cc)
-Source_Add(tensorflow/core/lib/strings/proto_text_util.cc)
-Source_Add(tensorflow/core/platform/cpu_info.cc)
-Source_Add(tensorflow/core/platform/denormal.cc)
-Source_Add(tensorflow/core/platform/env.cc)
-Source_Add(tensorflow/core/platform/env_time.cc)
-Source_Add(tensorflow/core/platform/file_system.cc)
-Source_Add(tensorflow/core/platform/file_system_helper.cc)
-Source_Add(tensorflow/core/platform/protobuf_util.cc)
-Source_Add(tensorflow/core/platform/setround.cc)
-Source_Add(tensorflow/core/platform/tracing.cc)
-Source_Add(tensorflow/core/platform/posix/env.cc)
-Source_Add(tensorflow/core/platform/posix/env_time.cc)
-Source_Add(tensorflow/core/platform/posix/error.cc)
-Source_Add(tensorflow/core/platform/posix/load_library.cc)
-Source_Add(tensorflow/core/platform/posix/port.cc)
-Source_Add(tensorflow/core/platform/posix/posix_file_system.cc)
-Source_Add(tensorflow/core/platform/default/logging.cc)
-Source_Add(tensorflow/core/platform/default/mutex.cc)
-Source_Add(tensorflow/core/platform/default/protobuf.cc)
-
-Source_Add(tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc)
-Source_Add(tensorflow/tools/proto_text/gen_proto_text_functions.cc)
-
-unset(PROTO_FILES)
-
-macro(Proto_Add RPATH)
-  list(APPEND PROTO_FILES "${RPATH}")
-endmacro(Proto_Add)
-
-Proto_Add(tensorflow/core/lib/core/error_codes.proto)
-Proto_Add(tensorflow/core/framework/types.proto)
-Proto_Add(tensorflow/core/framework/tensor.proto)
-Proto_Add(tensorflow/core/framework/tensor_shape.proto)
-Proto_Add(tensorflow/core/framework/summary.proto)
-Proto_Add(tensorflow/core/framework/resource_handle.proto)
-
-Protobuf_Generate(PROTO_TEXT_PROTO
-  "${CMAKE_CURRENT_BINARY_DIR}/generated/proto_text"
-  "${TensorFlowSource_DIR}"
-  ${PROTO_FILES}
-)
-
-add_executable(tensorflow-prototext-1.13.1 ${SOURCE_FILES} ${PROTO_TEXT_PROTO_SOURCES})
-target_include_directories(tensorflow-prototext-1.13.1 PRIVATE ${TensorFlowSource_DIR})
-target_include_directories(tensorflow-prototext-1.13.1 PRIVATE ${PROTO_TEXT_PROTO_INCLUDE_DIRS})
-
-target_link_libraries(tensorflow-prototext-1.13.1 PRIVATE abseil)
-target_link_libraries(tensorflow-prototext-1.13.1 PRIVATE eigen)
-target_link_libraries(tensorflow-prototext-1.13.1 PRIVATE ${PROTO_TEXT_PROTO_LIBRARIES})
-target_link_libraries(tensorflow-prototext-1.13.1 PRIVATE Google::DoubleConversion)
-target_link_libraries(tensorflow-prototext-1.13.1 PRIVATE Google::NSync)
-target_link_libraries(tensorflow-prototext-1.13.1 PRIVATE dl)
diff --git a/infra/cmake/packages/TensorFlowProtoText-1.13.1/make_directories.sh b/infra/cmake/packages/TensorFlowProtoText-1.13.1/make_directories.sh
deleted file mode 100755
index 1fb2ab683..000000000
--- a/infra/cmake/packages/TensorFlowProtoText-1.13.1/make_directories.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-while [[ $# -ne 0 ]]; do
-  DIR=$1; shift
-  mkdir -p "${DIR}"
-done
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
index 3dbf05ece..3a7dc893c 100644
--- a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowRuySource_import)
 
   # Exact version used by TensorFlow v2.3.0.
   # See tensorflow/third_party/ruy/workspace.bzl
-  envoption(TENSORFLOW_2_3_0_RUY_URL https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_3_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
 
   ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.3.0-RUY ${TENSORFLOW_2_3_0_RUY_URL})
 
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
new file mode 100644
index 000000000..e4dd4f2bf
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowRuySource_import)
+  if(NOT DOWNLOAD_RUY)
+    set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_RUY)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.6.0.
+  # See tensorflow/third_party/ruy/workspace.bzl
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_6_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
+
+  ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.6.0-RUY ${TENSORFLOW_2_6_0_RUY_URL})
+
+  set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake
new file mode 100644
index 000000000..38ad0aa31
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake
new file mode 100644
index 000000000..2ead7cd51
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowRuySource_import)
+  if(NOT DOWNLOAD_RUY)
+    set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_RUY)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.8.0.
+  # See tensorflow/third_party/ruy/workspace.bzl
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_8_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
+
+  ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.8.0-RUY ${TENSORFLOW_2_8_0_RUY_URL})
+
+  set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake
new file mode 100644
index 000000000..2ad2e241e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-1.12/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-1.12/TensorFlowSourceConfig.cmake
deleted file mode 100644
index fb9b4c789..000000000
--- a/infra/cmake/packages/TensorFlowSource-1.12/TensorFlowSourceConfig.cmake
+++ /dev/null
@@ -1,18 +0,0 @@
-function(_TensorFlowSource_import)
-  if(NOT DOWNLOAD_TENSORFLOW)
-    set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT DOWNLOAD_TENSORFLOW)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  envoption(TENSORFLOW_1_12_URL https://github.com/tensorflow/tensorflow/archive/v1.12.0.tar.gz)
-
-  ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-1.12 ${TENSORFLOW_1_12_URL})
-
-  set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
-  set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_TensorFlowSource_import)
-
-_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-1.12/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-1.12/TensorFlowSourceConfigVersion.cmake
deleted file mode 100644
index 8cfdbf8e5..000000000
--- a/infra/cmake/packages/TensorFlowSource-1.12/TensorFlowSourceConfigVersion.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-set(PACKAGE_VERSION "1.12")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
index bcdf9f28c..33538c234 100644
--- a/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_1_14_URL https://github.com/tensorflow/tensorflow/archive/v1.14.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_1_14_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v1.14.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-1.14 ${TENSORFLOW_1_14_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
index 0d2a95056..aabc22f72 100644
--- a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_1_0_URL https://github.com/tensorflow/tensorflow/archive/v2.1.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_1_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.1.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.1.0 ${TENSORFLOW_2_1_0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.12.1/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.12.1/TensorFlowSourceConfig.cmake
new file mode 100644
index 000000000..81fc6aecb
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.12.1/TensorFlowSourceConfig.cmake
@@ -0,0 +1,19 @@
+function(_TensorFlowSource_import)
+  if(NOT DOWNLOAD_TENSORFLOW)
+    set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_TENSORFLOW)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_12_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.12.1.tar.gz)
+
+  ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.12.1 ${TENSORFLOW_2_12_1_URL})
+
+  set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-2.12.1/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.12.1/TensorFlowSourceConfigVersion.cmake
new file mode 100644
index 000000000..8566d0816
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.12.1/TensorFlowSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.12.1")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
index 71220d743..7dabf88c8 100644
--- a/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_2_0_URL https://github.com/tensorflow/tensorflow/archive/v2.2.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_2_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.2.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.2.0 ${TENSORFLOW_2_2_0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
index 82df579a1..967d49e87 100644
--- a/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
@@ -10,7 +10,8 @@ function(_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_3_0_RC0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0-rc0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_3_0_RC0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.3.0-rc0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0-RC0 ${TENSORFLOW_2_3_0_RC0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
index 5c3a0f8cc..0ad0cda0b 100644
--- a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_3_0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_3_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.3.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0 ${TENSORFLOW_2_3_0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
new file mode 100644
index 000000000..9a7af17b1
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
@@ -0,0 +1,19 @@
+function(_TensorFlowSource_import)
+  if(NOT DOWNLOAD_TENSORFLOW)
+    set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_TENSORFLOW)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_6_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.6.0.tar.gz)
+
+  ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.6.0 ${TENSORFLOW_2_6_0_URL})
+
+  set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake
new file mode 100644
index 000000000..38ad0aa31
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
new file mode 100644
index 000000000..988a0f49f
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
@@ -0,0 +1,19 @@
+function(_TensorFlowSource_import)
+  if(NOT DOWNLOAD_TENSORFLOW)
+    set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_TENSORFLOW)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_8_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.8.0.tar.gz)
+
+  ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.8.0 ${TENSORFLOW_2_8_0_URL})
+
+  set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake
new file mode 100644
index 000000000..2ad2e241e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/VulkanSourceConfig.cmake b/infra/cmake/packages/VulkanSourceConfig.cmake
new file mode 100644
index 000000000..76b69898e
--- /dev/null
+++ b/infra/cmake/packages/VulkanSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_VulkanSource_import)
+  if(NOT ${DOWNLOAD_VULKAN})
+    set(VulkanSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_VULKAN})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(VULKAN_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/Vulkan-Headers/archive/ec2db85225ab410bc6829251bef6c578aaed5868.tar.gz)
+  ExternalSource_Download(VULKAN
+    DIRNAME VULKAN
+    URL ${VULKAN_URL})
+
+  set(VulkanSource_DIR ${VULKAN_SOURCE_DIR} PARENT_SCOPE)
+  set(VulkanSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_VulkanSource_import)
+
+_VulkanSource_import()
diff --git a/infra/cmake/packages/XnnpackSourceConfig.cmake b/infra/cmake/packages/XnnpackSourceConfig.cmake
new file mode 100644
index 000000000..36a920408
--- /dev/null
+++ b/infra/cmake/packages/XnnpackSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_XnnpackSource_import)
+  if(NOT ${DOWNLOAD_XNNPACK})
+    set(XnnpackSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_XNNPACK})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  # xnnpack commit in tflite v2.3
+  envoption(XNNPACK_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/XNNPACK/archive/8b283aa30a3186c6e640aed520543e9c067132d.tar.gz)
+  ExternalSource_Download(XNNPACK
+    DIRNAME XNNPACK
+    URL ${XNNPACK_URL})
+
+  set(XnnpackSource_DIR ${XNNPACK_SOURCE_DIR} PARENT_SCOPE)
+  set(XnnpackSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_XnnpackSource_import)
+
+_XnnpackSource_import()
diff --git a/infra/command/build-docker-image b/infra/command/build-docker-image
index 7653a0c88..f4e2069c0 100644
--- a/infra/command/build-docker-image
+++ b/infra/command/build-docker-image
@@ -5,15 +5,17 @@ function Usage()
   echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
   echo ""
   echo "Options:"
-  echo "      --extension               dockerfile extension in infra/docker"
+  echo "      --codename                ubuntu codename, default image name is nnfw/one-devtools:[codename]"
   echo "Options can use as docker build option:"
   docker build --help
 }
 
-DOCKER_FILE_RPATH_BASE="infra/docker/Dockerfile"
+DOCKER_FILE_RPATH_BASE="infra/docker"
 DOCKER_BUILD_ARGS=()
-DOCKER_FILE_RPATH=${DOCKER_FILE_RPATH_BASE}
-DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-nnas}
+
+# Default setting
+UBUNTU_CODENAME="bionic"
+DOCKER_TAG="latest"
 
 while [[ $# -gt 0 ]]
 do
@@ -25,15 +27,14 @@ do
       Usage
       exit 1
       ;;
-    --extension)
-      DOCKER_FILE_RPATH="${DOCKER_FILE_RPATH_BASE}.$2"
-      shift
-      shift
+    --codename)
+      UBUNTU_CODENAME=$2
+      DOCKER_TAG=$2
+      shift 2
       ;;
     -t|--tag)
       DOCKER_IMAGE_NAME="$2"
-      shift
-      shift
+      shift 2
       ;;
     *)
       DOCKER_BUILD_ARGS+=(${1})
@@ -42,6 +43,14 @@ do
   esac
 done
 
+DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-nnfw/one-devtools:$DOCKER_TAG}
+DOCKER_FILE_RPATH=$DOCKER_FILE_RPATH_BASE/$UBUNTU_CODENAME/Dockerfile
+
+HOST_ARCH=$(uname -m)
+if [[ -n $HOST_ARCH  && $HOST_ARCH != "x86_64" ]]; then
+  DOCKER_FILE_RPATH=$DOCKER_FILE_RPATH.$HOST_ARCH
+fi
+
 DOCKER_BUILD_ARGS+=("-t ${DOCKER_IMAGE_NAME}")
 
 docker build --build-arg http_proxy="${http_proxy}" \
diff --git a/infra/command/docker-run b/infra/command/docker-run
index 08610bff2..9a186b3d2 100644
--- a/infra/command/docker-run
+++ b/infra/command/docker-run
@@ -1,10 +1,19 @@
 #!/bin/bash
 
 import "docker.configuration"
+USER_MODE=0
+
+if [[ $1 == '--user' ]]; then
+  DOCKER_RUN_OPTS+=" -u $(stat -c "%u" $NNAS_PROJECT_PATH):$(stat -c "%g" $NNAS_PROJECT_PATH)"
+  USER_MODE=1
+  shift
+fi
 
 docker run ${DOCKER_RUN_OPTS} ${DOCKER_ENV_VARS} ${DOCKER_VOLUMES} ${DOCKER_IMAGE_NAME} "$@"
 EXITCODE=$?
 
-docker_cleanup
+if [ $USER_MODE -eq 0 ]; then
+  docker_cleanup
+fi
 
 exit ${EXITCODE}
diff --git a/infra/command/format b/infra/command/format
index e34c9150b..461da6f85 100644
--- a/infra/command/format
+++ b/infra/command/format
@@ -4,6 +4,7 @@ INVALID_EXIT=0
 FILES_TO_CHECK=()
 DIRECTORIES_TO_BE_TESTED=()
 DIRECTORIES_NOT_TO_BE_TESTED=()
+DEFAULT_CLANG_FORMAT="clang-format-8"
 CLANG_FORMAT_CANDIDATES=()
 PATCH_FILE=format.patch
 CHECK_DIFF_ONLY="0"
@@ -16,7 +17,7 @@ function Usage()
   echo "If <file>s are given, it reformats the files"
   echo ""
   echo "Options:"
-  echo "      --clang-format <TOOL>     clang format bin (default: clang-format-3.9, clang-format)"
+  echo "      --clang-format <TOOL>     clang format bin (default: $DEFAULT_CLANG_FORMAT)"
   echo "      --diff-only               check diff files with master"
   echo "      --staged-only             check git staged files"
 }
@@ -65,39 +66,14 @@ function command_exists() {
   command -v $1 > /dev/null 2>&1
 }
 
-function exclude_symbolic_links() {
-  # Check all files (CMakeLists.txt, *.cl, ... not only for C++, Python)
-  if [[ ${#FILES_TO_CHECK} -ne 0 ]]; then
-    FILES_EXCLUDE_SYMLINKS=$(file ${FILES_TO_CHECK} | grep -v "symbolic link" | cut -d':' -f1)
-    FILES_TO_CHECK=${FILES_EXCLUDE_SYMLINKS}
-  fi
-}
-
 function check_newline() {
-  FILES_TO_CHECK_CR=()
-  for f in ${FILES_TO_CHECK[@]}; do
-    # Manually ignore style checking
-    if [[ ${f} == !(*.svg|*.pdf|*.png) ]]; then
-      FILES_TO_CHECK_CR+=("${f}")
-    fi
-  done
+  # Exclude binary (refer .gitattributes file)
+  # TODO Remove svg file excluding
+  #   .svg: xml type ML for vector graphic
+  FILES_TO_CHECK_EOF=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep -v '((\.caffemodel)|(\.png)|(\.pdf)|(\.h5)|(\.pdf)|(\.tar.gz)|(\.tflite)|(\.pdf)|(\.bmp)|(\.svg))$'`
 
-  # Check all files (CMakeLists.txt, *.cl, ... not only for C++, Python)
-  if [[ ${#FILES_TO_CHECK_CR} -ne 0 ]]; then
-    CRCHECK=$(file ${FILES_TO_CHECK_CR} | grep 'with CR')
-  else
-    return
-  fi
-  FILES_TO_FIX=($(echo "$CRCHECK" | grep "with CRLF line" | cut -d':' -f1))
-  for f in ${FILES_TO_FIX[@]}; do
-    tr -d '\r' < $f > $f.fixed && cat $f.fixed > $f && rm $f.fixed
-  done
-  FILES_TO_FIX=($(echo "${CRCHECK}" | grep "with CR line" | cut -d':' -f1))
-  for f in ${FILES_TO_FIX[@]}; do
-    tr '\r' '\n' < $f > $f.fixed && cat $f.fixed > $f && rm $f.fixed
-  done
-  # Check no new line at end of file
-  for f in ${FILES_TO_CHECK_CR[@]}; do
+  for f in ${FILES_TO_CHECK_EOF[@]}; do
+    # Check no new line at end of file
     if diff /dev/null "$f" | tail -1 | grep '^\\ No newline' > /dev/null; then
       echo >> "$f"
     fi
@@ -106,22 +82,19 @@ function check_newline() {
 
 function check_permission() {
   # Check all files except script
-  FILES_TO_CHECK_PERMISSION=()
-  for f in ${FILES_TO_CHECK[@]}; do
-    # Manually ignore permission checking
-    if [[ ${f} == !(nnas|nnfw|nncc|*.sh|*.py|*/gradlew) ]] || [[ ${f} == tests/nnapi/specs/**/*.py ]]; then
-      FILES_TO_CHECK_PERMISSION+=("${f}")
-    fi
-  done
+  # Manually ignore permission checking
+  FILES_TO_CHECK_PERMISSION=$(git ls-files -c -s --exclude-standard ${FILES_TO_CHECK[@]} | egrep '^100755' | cut -f2)
+  FILES_TO_CHECK_PERMISSION=`echo "$FILES_TO_CHECK_PERMISSION" | tr ' ' '\n' | egrep -v '((^nnas)|(^nnfw)|(^nncc)|(\.sh)|(\.py)|(/gradlew))$'`
+  FILES_TO_CHECK_PERMISSION=`echo "$FILES_TO_CHECK_PERMISSION" | egrep -v '((^infra/debian/compiler/rules)|(^infra/debian/runtime/rules))$'`
+  FILES_TO_CHECK_PERMISSION+=`echo && echo "$FILES_TO_CHECK" | egrep '^tests/nnapi/specs/.*.py$'`
+  # Transform to array
+  FILES_TO_CHECK_PERMISSION=($FILES_TO_CHECK_PERMISSION)
 
   if [[ ${#FILES_TO_CHECK_PERMISSION} -eq 0 ]]; then
     return
   fi
-  for FILE_TO_CHECK in ${FILES_TO_CHECK_PERMISSION[@]}; do
-    RESULT=$(stat -c '%A' ${FILE_TO_CHECK} | grep 'x')
-    if [ "${RESULT}" != "" ]; then
-      chmod a-x ${FILE_TO_CHECK}
-    fi
+  for f in ${FILES_TO_CHECK_PERMISSION[@]}; do
+    chmod a-x $f
   done
 }
 
@@ -131,9 +104,7 @@ function check_cpp_files() {
     return
   fi
 
-  CLANG_FORMAT_CANDIDATES+=("clang-format-3.9")
-  CLANG_FORMAT_CANDIDATES+=("clang-format")
-
+  CLANG_FORMAT_CANDIDATES+=($DEFAULT_CLANG_FORMAT)
   for CLANG_FORMAT_CANDIDATE in ${CLANG_FORMAT_CANDIDATES[@]}; do
     if command_exists ${CLANG_FORMAT_CANDIDATE} ; then
       CLANG_FORMAT="${CLANG_FORMAT_CANDIDATE}"
@@ -142,29 +113,23 @@ function check_cpp_files() {
   done
 
   if [[ -z ${CLANG_FORMAT}  ]]; then
-    echo "[ERROR] clang-format is unavailable"
+    echo "[ERROR] $CLANG_FORMAT is unavailable"
     echo
-    echo "Please install clang-format before running format check"
+    echo "        Please install $DEFAULT_CLANG_FORMAT before running format check"
     exit 1
   fi
 
-  # Check c++ files
-  FILES_TO_CHECK_CPP=()
-  for f in ${FILES_TO_CHECK[@]}; do
-    # Manually ignore style checking
-    if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
-      continue
-    fi
-
-    # File extension to check
-    if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
-      FILES_TO_CHECK_CPP+=("${f}")
-    fi
-  done
+  # Check c++ files: replace ' ' with newline, check with grep
+  FILES_TO_CHECK_CPP=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '((\.c[cl]?)|(\.cpp)|(\.h(pp)?))$'`
+  # Manually ignore style checking
+  FILES_TO_CHECK_CPP=`echo "$FILES_TO_CHECK_CPP" | egrep -v '((/NeuralNetworks\.h)|(/NeuralNetworksExtensions\.h))$'`
+  # Transform to array
+  FILES_TO_CHECK_CPP=($FILES_TO_CHECK_CPP)
 
   # Skip by '.FORMATDENY' file
   for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
     FILES_TO_CHECK_CPP=(${FILES_TO_CHECK_CPP[*]/$s*/})
+    FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8=(${FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8[*]/$s*/})
   done
 
   if [[ ${#FILES_TO_CHECK_CPP} -ne 0 ]]; then
@@ -189,20 +154,21 @@ function check_python_files() {
   fi
 
   # Check python files
-  FILES_TO_CHECK_PYTHON=()
-  for f in ${FILES_TO_CHECK[@]}; do
-    # File extension to check
-    if [[ ${f} == *.py ]]; then
-      FILES_TO_CHECK_PYTHON+=("${f}")
-    fi
-  done
+  FILES_TO_CHECK_PYTHON=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`)
+  # Exceptional case: one-cmds don't have '.py' extension: ignore non-python source (cmake, etc) and ignore shell script: one-prepare-venv
+  FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/one-cmds/[^(\./)]*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`)
+  # Exceptional case: onecc-docker don't have '.py' extension.
+  FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/onecc-docker/onecc-docker$'`)
+  # Exceptional case: visq don't have '.py' extension.
+  FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/visq/visq$'`)
+
   for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
     skip=${s#'.'/}/
     FILES_TO_CHECK_PYTHON=(${FILES_TO_CHECK_PYTHON[*]/$skip*/})
   done
 
   if [[ ${#FILES_TO_CHECK_PYTHON} -ne 0 ]]; then
-    yapf -i --style='{based_on_style: pep8, column_limit: 90}' ${FILES_TO_CHECK_PYTHON[@]}
+    yapf -i ${FILES_TO_CHECK_PYTHON[@]}
     EXIT_CODE=$?
     if [[ ${EXIT_CODE} -ne 0 ]]; then
       INVALID_EXIT=${EXIT_CODE}
@@ -220,7 +186,13 @@ fi
 __Check_CPP=${CHECK_CPP:-"1"}
 __Check_PYTHON=${CHECK_PYTHON:-"1"}
 
-FILES_TO_CHECK=$(git ls-files -c --exclude-standard ${DIRECTORIES_TO_BE_TESTED[@]})
+# Git file mode
+#   120000: symbolic link
+#   160000: git link
+#   100755: regular executable
+#   100644: regular readable
+# Reference: https://github.com/git/git/blob/cd42415/Documentation/technical/index-format.txt#L72-L81
+FILES_TO_CHECK=$(git ls-files -c -s --exclude-standard ${DIRECTORIES_TO_BE_TESTED[@]} | egrep -v '^1[26]0000' | cut -f2)
 if [[ "${CHECK_DIFF_ONLY}" = "1" ]]; then
   MASTER_EXIST=$(git rev-parse --verify master)
   CURRENT_BRANCH=$(git branch | grep \* | cut -d ' ' -f2-)
@@ -235,6 +207,7 @@ if [[ "${CHECK_DIFF_ONLY}" = "1" ]]; then
     else
       FILES_TO_CHECK=$(git diff --name-only --diff-filter=d HEAD~${DIFF_COMMITS})
     fi
+    FILES_TO_CHECK=$(git ls-files -c -s --exclude-standard ${FILES_TO_CHECK[@]} | egrep -v '^1[26]0000' | cut -f2)
   fi
 fi
 
@@ -242,7 +215,6 @@ for DIR_NOT_TO_BE_TESTED in $(git ls-files -co --exclude-standard '*/.FORMATDENY
   DIRECTORIES_NOT_TO_BE_TESTED+=($(dirname "${DIR_NOT_TO_BE_TESTED}"))
 done
 
-exclude_symbolic_links
 check_newline
 check_permission
 check_cpp_files
diff --git a/infra/command/gen-coverage-report b/infra/command/gen-coverage-report
index c3a8202e7..c841dc0cb 100644
--- a/infra/command/gen-coverage-report
+++ b/infra/command/gen-coverage-report
@@ -66,13 +66,10 @@ done
 "${LCOV_PATH}" -e "${RAW_COVERAGE_INFO_PATH}" -o "${EXTRACTED_COVERAGE_INFO_PATH}" \
   "${CANDIDATES[@]}"
 
-# Exclude *.test.cpp files from coverage report
-"${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
-  '*.test.cpp'
-
+# Exclude test files from coverage report
 # Exclude flatbuffer generated files from coverage report
 "${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
-  '*_schema_generated.h'
+  '*.test.cpp' '*.test.cc' '*/test/*' '*/tests/*' '*_schema_generated.h'
 
 # Final coverage data
 cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH}
diff --git a/infra/config/docker.configuration b/infra/config/docker.configuration
index 08931cd28..2e001373b 100644
--- a/infra/config/docker.configuration
+++ b/infra/config/docker.configuration
@@ -3,7 +3,7 @@
 # Don't run this script
 [[ "${BASH_SOURCE[0]}" == "${0}" ]] && echo "Please don't execute ${BASH_SOURCE[0]}" && exit 1
 
-DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-nnas}
+DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-nnfw/one-devtools}
 echo "Using docker image ${DOCKER_IMAGE_NAME}"
 
 if [ -z "`docker images ${DOCKER_IMAGE_NAME}`" ]; then
diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog
new file mode 100644
index 000000000..c4d358481
--- /dev/null
+++ b/infra/debian/compiler/changelog
@@ -0,0 +1,113 @@
+one (1.24.0) bionic; urgency=medium
+
+  * Introduce _one-import-onnx_ extension interface
+  * _onecc_ supports profiling of multiple backends with a single cfg file
+  * Enable more Quantize operator: FloorMod, Squeeze
+  * _visq_ supports multi-out nodes
+  * _onecc_ introduces `dynamic_batch_to_single_batch option` option.
+
+ -- seongwoo <seongwoo@sw>  Thu, 18 Jul 2023 14:10:22 +0900
+
+one (1.23.0) bionic; urgency=medium
+
+  * Support more Op(s): GeLU
+  * Support more option(s): `--fuse-gelu`
+  * Support multiple backends compilation with a single configuration file
+  * Upgrade Circle schema to 0.5
+
+ -- seongwoo <seongwoo@sw>  Thu, 18 May 2023 19:10:21 +0900
+
+one (1.22.0) bionic; urgency=medium
+
+  * Introduce new optimization options: `unroll_unidirseqlstm`, `forward_transpose_op`, `fold_fully_connected`, `fuse_prelu`
+  * Support more Ops for fake quantization: `Depth2Space`, `Space2Depth`, `Pack`, `Unpack`, `Abs`
+  * Support more Ops for quantization: `Abs`, `ReduceProd`
+  * Introduce _visq_ tool for quantization error visualization
+  * Introduce _Environment_ section into configuration file
+  * Improve speed of `convert_nchw_to_nhwc` option
+  * Support `Add`, `Mul` of index-type (int32, int64) tensors in _one-quantize_
+  * Support ubuntu 20.04
+
+ -- seongwoo <mhs4670go@naver.com>  Fri, 24 Mar 2023 13:58:16 +0900
+
+one (1.21.0) bionic; urgency=medium
+
+  * Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool
+  * Introduced new tools `one-infer`, `circle-operator`, `circle-interpreter`
+  * Introduced `Workflow`(WIP) in `one-cmds`
+  * New option `quant_config` in `one-quantize`
+  * New option `fake_quantize` in `one-quantize`
+  * More Ops supported: Densify
+  * More Ops for quantization: ReduceMax
+  * More Ops for mixed-precision quantization (MPQ): LeakyRelu, Neg, Relu6, Squeeze
+  * More Ops for `convert_nchw_to_nhwc` option: LogSoftmax, ReduceMax, SplitV, Softmax
+  * New optimization options in `one-optimize`: `replace_non_const_fc_with_bmm`, `resolve_customop_splitv`, `fold_densify`
+  * Improved reshape elimination in `convert_nchw_to_nhwc` option.
+  * Support fusion of Channel-wise Add + Relu with TConv
+  * Support negative axis in ArgMin/Max
+  * Show errors for unrecognized options in `one-optimize`
+  * Fix shape inference for `StridedSlice`
+  * Fix FuseBatchNormWithTConvPass to support TConv with bias
+  * Deprecate `--O1` option in `circle2circle`
+  * Support gcc-11
+  * Support limited Float16 for kernels constants with dequantization to Float32
+
+ -- seongwoo <mhs4670go@naver.com>  Wed, 06 Sep 2022 12:00:00 +0900
+
+one (1.20.0) bionic; urgency=medium
+
+  * luci-interpreter supports multiple kernels with PAL layer including Cortext-M
+  * luci-interpreter supports integer tensor for partly kernels
+  * luci import support constant without coping to reduce memory for luci-interpreter
+  * Reduce duplicate codes to package released modules
+  * Limited support for ONNX LSTM/RNN unrolling while importing
+  * Limited support for ARM32 cross build
+  * Support new operator: SVDF
+  * New virtual CircleVariable to support tensor with variable
+  * Support quantization of BatchMatMul Op
+  * Support mixed(UINT8 + INT16) quantization
+  * Support backward propagation of quantization parameters
+  * Upgrade default python to version 3.8
+  * Support TensorFlow 2.8.0, ONNX-TF 1.10.0, ONNX 1.11.0
+  * Upgrade circle schema to follow tflite schema v3b
+  * Refactor to mio-tflite280, mio-circle04 with version and helpers methods
+  * Use one flatbuffers 2.0 version
+  * Drop support for TensorFlow 1.x
+  * Fix for several bugs, performance enhancements, and typos
+
+ -- seongwoo <mhs4670go@naver.com>  Tue, 26 Apr 2022 12:00:00 +0900
+
+one (1.19.0) bionic; urgency=medium
+
+  * `circle-quantizer` supports input/output type option
+  * Introduce configuration file for optimization options
+
+ -- seongwoo <mhs4670go@naver.com>  Wed, 10 Nov 2021 15:53:39 +0900
+
+one (1.18.0) bionic; urgency=medium
+
+  * More optimization pass
+
+ -- seongwoo <mhs4670go@naver.com>  Fri, 15 Oct 2021 15:23:20 +0900
+
+one (1.17.0) bionic; urgency=medium
+
+  * More optimization pass
+  * Add new InstanceNorm pattern in `FuseInstanceNormPass`
+  * Add verbose option
+  * Introduce `onecc` driver to `one-cmds`
+  * Introduce `one-profile` driver to `one-cmds`
+
+ -- seongwoo <mhs4670go@naver.com>  Fri, 20 Aug 2021 17:50:20 +0900
+
+one (1.16.1) bionic; urgency=medium
+
+  * Extends the point where `one-codegen` finds backends.
+
+ -- seongwoo chae <mhs4670go@naver.com>  Wed, 26 May 2021 18:06:53 +0900
+
+one (1.16.0) bionic; urgency=low
+
+  * Initial release.
+
+ -- seongwoo chae <mhs4670go@naver.com>  Mon, 26 Apr 2021 14:34:57 +0900
diff --git a/infra/debian/compiler/compat b/infra/debian/compiler/compat
new file mode 100644
index 000000000..ec635144f
--- /dev/null
+++ b/infra/debian/compiler/compat
@@ -0,0 +1 @@
+9
diff --git a/infra/debian/compiler/control b/infra/debian/compiler/control
new file mode 100644
index 000000000..b3a3c1bf7
--- /dev/null
+++ b/infra/debian/compiler/control
@@ -0,0 +1,25 @@
+Source: one
+Section: devel
+Priority: extra
+Maintainer: Neural Network Acceleration Solution Developers <nnfw@samsung.com>
+Build-Depends: cmake, debhelper (>=9), dh-python, python3-all, python3.8, python3.8-venv
+Standards-Version: 3.9.8
+Homepage: https://github.com/Samsung/ONE
+
+Package: one-compiler
+Architecture: amd64
+Multi-Arch: foreign
+Depends: ${misc:Depends}, ${shlibs:Depends}, python3-venv, python3-pip, python3.8, python3.8-venv
+Description: On-device Neural Engine compiler package
+
+Package: one-compiler-dev
+Architecture: amd64
+Multi-Arch: same
+Depends: one-compiler, ${shlibs:Depends}, ${misc:Depends}
+Description: one-compiler development package
+
+Package: one-compiler-test
+Architecture: amd64
+Multi-Arch: same
+Depends: one-compiler, ${shlibs:Depends}, ${misc:Depends}
+Description: one-compiler test package
diff --git a/infra/debian/compiler/copyright b/infra/debian/compiler/copyright
new file mode 100644
index 000000000..bb64695a4
--- /dev/null
+++ b/infra/debian/compiler/copyright
@@ -0,0 +1,3 @@
+Files: *
+License: Proprietary
+Copyright (c) <2018> <Samsung Electronics Co.,Ltd.>
diff --git a/infra/debian/compiler/docs/one-build.1 b/infra/debian/compiler/docs/one-build.1
new file mode 100644
index 000000000..672d39f7f
--- /dev/null
+++ b/infra/debian/compiler/docs/one-build.1
@@ -0,0 +1,96 @@
+.TH ONE-BUILD "1" "August 2021" "one-build version 1.17.0" "User Commands"
+.SH NAME
+one-build \- run ONE drivers
+.SH DESCRIPTION
+usage: one\-build [\-h] [\-v] [\-V] [\-C CONFIG]
+.PP
+\fBone\-build\fR is a command line tool that runs ONE drivers in customized order.
+.SS "Configuration file:"
+\fBone\-build\fR takes input as a configuration file that supports ini format.
+A configuration file consists of sections, each led by a [section] header.
+Each section is the ONE driver you want to run, and consists of commands in a key/value combination to pass to the driver.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.SH EXAMPLES
+Before you run \fBone\-build\fR, you must write a configuration file.
+.PP
+$ cat one-build.template.cfg
+.PP
+[one-build]
+.br
+one-import-tf=True
+.br
+one-import-tflite=False
+.br
+one-import-bcq=False
+.br
+one-import-onnx=False
+.br
+one-optimize=True
+.br
+one-quantize=False
+.br
+one-pack=True
+.br
+one-codegen=False
+.PP
+[one-import-tf]
+.br
+input_path=/path/to/inception_v3.pb
+.br
+output_path=inception_v3.circle
+.br
+input_arrays=input
+.br
+input_shapes=1,299,299,3
+.br
+output_arrays=InceptionV3/Predictions/Reshape_1
+.br
+converter_version=v1
+.br
+model_format=graph_def
+.PP
+[one-optimize]
+.br
+input_path=inception_v3.circle
+.br
+output_path=inception_v3.opt.circle
+.br
+generate_profile_data=False
+.PP
+[one-pack]
+.br
+input_path=inception_v3.opt.circle
+.br
+output_path=inception_v3_pack
+.PP
+\fBone\-build\fR section decides whether to use each driver or not.
+If the value is False, even if the corresponding section exists, the driver won't be executed.
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-build
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-build
+programs are properly installed at your site, the command
+.IP
+.B info one-build
+.PP
+should give you access to the complete manual.
+
diff --git a/infra/debian/compiler/docs/one-codegen.1 b/infra/debian/compiler/docs/one-codegen.1
new file mode 100644
index 000000000..b5296a018
--- /dev/null
+++ b/infra/debian/compiler/docs/one-codegen.1
@@ -0,0 +1,39 @@
+.TH ONE-CODEGEN "1" "August 2021" "one-codegen version 1.17.0" "User Commands"
+.SH NAME
+one-codegen \- geneate codes
+.SH DESCRIPTION
+usage: one\-codegen [\-h] [\-v] [\-C CONFIG] [\-b BACKEND] [\-\-] [COMMANDS FOR BACKEND]
+.PP
+\fBone\-codegen\fR is a command line tool for code generation.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-b\fR BACKEND, \fB\-\-backend\fR BACKEND
+backend name to use
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-codegen
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-codegen
+programs are properly installed at your site, the command
+.IP
+.B info one-codegen
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-import-bcq.1 b/infra/debian/compiler/docs/one-import-bcq.1
new file mode 100644
index 000000000..b8a85cee4
--- /dev/null
+++ b/infra/debian/compiler/docs/one-import-bcq.1
@@ -0,0 +1,61 @@
+.TH ONE-IMPORT-BCQ "1" "August 2021" "one-import-bcq version 1.17.0" "User Commands"
+.SH NAME
+one-import-bcq \- convert TensorFlow with BCQ to circle
+.SH DESCRIPTION
+usage: one\-import\-bcq [\-h] [\-v] [\-V] [\-C CONFIG] [\-\-v1 | \-\-v2] [\-i INPUT_PATH]
+.br
+[\-o OUTPUT_PATH] [\-I INPUT_ARRAYS] [\-s INPUT_SHAPES]
+.br
+[\-O OUTPUT_ARRAYS]
+.PP
+\fBone\-import\-bcq\fR is a command line tool to convert TensorFlow with BCQ to circle.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-\-v1\fR
+use TensorFlow Lite Converter 1.x
+.TP
+\fB\-\-v2\fR
+use TensorFlow Lite Converter 2.x
+.TP
+\fB\-i\fR INPUT_PATH, \fB\-\-input_path\fR INPUT_PATH
+full filepath of the input file
+.TP
+\fB\-o\fR OUTPUT_PATH, \fB\-\-output_path\fR OUTPUT_PATH
+full filepath of the output file
+.TP
+\fB\-I\fR INPUT_ARRAYS, \fB\-\-input_arrays\fR INPUT_ARRAYS
+names of the input arrays, comma\-separated
+.TP
+\fB\-s\fR INPUT_SHAPES, \fB\-\-input_shapes\fR INPUT_SHAPES
+shapes corresponding to \fB\-\-input_arrays\fR, colon\-separated (ex:"1,4,4,3:1,20,20,3")
+.TP
+\fB\-O\fR OUTPUT_ARRAYS, \fB\-\-output_arrays\fR OUTPUT_ARRAYS
+names of the output arrays, comma\-separated
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-import-bcq
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-import-bcq
+programs are properly installed at your site, the command
+.IP
+.B info one-import-bcq
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-import-onnx.1 b/infra/debian/compiler/docs/one-import-onnx.1
new file mode 100644
index 000000000..1953544dc
--- /dev/null
+++ b/infra/debian/compiler/docs/one-import-onnx.1
@@ -0,0 +1,63 @@
+.TH ONE-IMPORT-ONNX "1" "August 2021" "one-import-onnx version 1.17.0" "User Commands"
+.SH NAME
+one-import-onnx \- convert ONNX to circle
+.SH DESCRIPTION
+usage: one\-import\-onnx [\-h] [\-v] [\-V] [\-C CONFIG] [\-i INPUT_PATH]
+.br
+[\-o OUTPUT_PATH] [\-I INPUT_ARRAYS] [\-O OUTPUT_ARRAYS]
+.br
+[\-\-model_format MODEL_FORMAT]
+.br
+[\-\-converter_version CONVERTER_VERSION]
+.br
+[\-\-save_intermediate]
+.PP
+\fBone\-import\-onnx\fR is a command line tool to convert ONNX to circle.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-\-save_intermediate\fR
+Save intermediate files to output folder
+.TP
+\fB\-i\fR INPUT_PATH, \fB\-\-input_path\fR INPUT_PATH
+full filepath of the input file
+.TP
+\fB\-o\fR OUTPUT_PATH, \fB\-\-output_path\fR OUTPUT_PATH
+full filepath of the output file
+.TP
+\fB\-I\fR INPUT_ARRAYS, \fB\-\-input_arrays\fR INPUT_ARRAYS
+names of the input arrays, comma\-separated
+.TP
+\fB\-O\fR OUTPUT_ARRAYS, \fB\-\-output_arrays\fR OUTPUT_ARRAYS
+names of the output arrays, comma\-separated
+.HP
+\fB\-\-model_format\fR MODEL_FORMAT
+.HP
+\fB\-\-converter_version\fR CONVERTER_VERSION
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-import-onnx
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-import-onnx
+programs are properly installed at your site, the command
+.IP
+.B info one-import-onnx
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-import-tf.1 b/infra/debian/compiler/docs/one-import-tf.1
new file mode 100644
index 000000000..9f05a888f
--- /dev/null
+++ b/infra/debian/compiler/docs/one-import-tf.1
@@ -0,0 +1,77 @@
+.TH ONE-IMPORT-TF "1" "August 2021" "one-import-tf version 1.17.0" "User Commands"
+.SH NAME
+one-import-tf \- convert TensorFlow to circle
+.SH DESCRIPTION
+usage: one\-import\-tf [\-h] [\-v] [\-V] [\-C CONFIG] [\-\-v1 | \-\-v2]
+.br
+[\-\-graph_def | \-\-saved_model | \-\-keras_model]
+.br
+[\-i INPUT_PATH] [\-o OUTPUT_PATH] [\-I INPUT_ARRAYS]
+.br
+[\-s INPUT_SHAPES] [\-O OUTPUT_ARRAYS]
+.br
+[\-\-save_intermediate]
+.PP
+\fBone\-import\-tf\fR is a command line tool to convert TensorFlow model to circle.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-\-save_intermediate\fR
+Save intermediate files to output folder
+.TP
+\fB\-\-v1\fR
+use TensorFlow Lite Converter 1.x
+.TP
+\fB\-\-v2\fR
+use TensorFlow Lite Converter 2.x
+.TP
+\fB\-\-graph_def\fR
+use graph def file(default)
+.TP
+\fB\-\-saved_model\fR
+use saved model
+.TP
+\fB\-\-keras_model\fR
+use keras model
+.TP
+\fB\-i\fR INPUT_PATH, \fB\-\-input_path\fR INPUT_PATH
+full filepath of the input file
+.TP
+\fB\-o\fR OUTPUT_PATH, \fB\-\-output_path\fR OUTPUT_PATH
+full filepath of the output file
+.TP
+\fB\-I\fR INPUT_ARRAYS, \fB\-\-input_arrays\fR INPUT_ARRAYS
+names of the input arrays, comma\-separated
+.TP
+\fB\-s\fR INPUT_SHAPES, \fB\-\-input_shapes\fR INPUT_SHAPES
+shapes corresponding to \fB\-\-input_arrays\fR, colon\-separated (ex:"1,4,4,3:1,20,20,3")
+.TP
+\fB\-O\fR OUTPUT_ARRAYS, \fB\-\-output_arrays\fR OUTPUT_ARRAYS
+names of the output arrays, comma\-separated
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-import-tf
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-import-tf
+programs are properly installed at your site, the command
+.IP
+.B info one-import-tf
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-import-tflite.1 b/infra/debian/compiler/docs/one-import-tflite.1
new file mode 100644
index 000000000..ef63146ac
--- /dev/null
+++ b/infra/debian/compiler/docs/one-import-tflite.1
@@ -0,0 +1,44 @@
+.TH ONE-IMPORT-TFLITE "1" "August 2021" "one-import-tflite version 1.17.0" "User Commands"
+.SH NAME
+one-import-tflite \- convert TensorFlow lite to circle
+.SH DESCRIPTION
+usage: one\-import\-tflite [\-h] [\-v] [\-V] [\-C CONFIG] [\-i INPUT_PATH]
+.br
+[\-o OUTPUT_PATH]
+.PP
+\fBone\-import\-tflite\fR is a command line tool to convert TensorFlow lite to circle.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-i\fR INPUT_PATH, \fB\-\-input_path\fR INPUT_PATH
+full filepath of the input file
+.TP
+\fB\-o\fR OUTPUT_PATH, \fB\-\-output_path\fR OUTPUT_PATH
+full filepath of the output file
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-import-tflite
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-import-tflite
+programs are properly installed at your site, the command
+.IP
+.B info one-import-tflite
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-import.1 b/infra/debian/compiler/docs/one-import.1
new file mode 100644
index 000000000..674e9ada3
--- /dev/null
+++ b/infra/debian/compiler/docs/one-import.1
@@ -0,0 +1,35 @@
+.TH ONE-IMPORT "1" "August 2021" "one-import version 1.17.0" "User Commands"
+.SH NAME
+one-import \- convert various format to circle
+.SH SYNOPSIS
+usage: one\-import [\-h] [\-C CONFIG] [\-v] driver
+.SH DESCRIPTION
+\fBone\-import\fR is a command line tool to convert various format to circle.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fBdriver\fR driver name to run (supported: tf, tflite, bcq, onnx)
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-import
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-import
+programs are properly installed at your site, the command
+.IP
+.B info one-import
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-infer.1 b/infra/debian/compiler/docs/one-infer.1
new file mode 100644
index 000000000..a1bafbb12
--- /dev/null
+++ b/infra/debian/compiler/docs/one-infer.1
@@ -0,0 +1,46 @@
+.TH ONE-INFER "1" "July 2022" "one-infer version 1.21.0" "User Commands"
+.SH NAME
+one-infer \- manual page for one-infer version 1.21.0
+.SH DESCRIPTION
+usage: one\-infer [\-h] [\-v] [\-C CONFIG] [\-d DRIVER | \fB\-b\fR BACKEND] [\-\-post\-process POST_PROCESS] [\-\-] [COMMANDS FOR BACKEND DRIVER]
+.PP
+command line tool to infer model
+.SS "optional arguments:"
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-d\fR DRIVER, \fB\-\-driver\fR DRIVER
+backend inference driver name to execute
+.TP
+\fB\-b\fR BACKEND, \fB\-\-backend\fR BACKEND
+backend name to use
+.TP
+\fB\-\-post\-process\fR POST_PROCESS
+post processing script to convert I/O data to standard
+format
+.SH COPYRIGHT
+Copyright \(co 2020\-2022 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-infer
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-infer
+programs are properly installed at your site, the command
+.IP
+.B info one-infer
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-optimize.1 b/infra/debian/compiler/docs/one-optimize.1
new file mode 100644
index 000000000..58b2c60bd
--- /dev/null
+++ b/infra/debian/compiler/docs/one-optimize.1
@@ -0,0 +1,222 @@
+.TH ONE-OPTIMIZE "1" "August 2021" "one-optimize version 1.17.0" "User Commands"
+.SH NAME
+one-optimize \- optimize circle model
+.SH DESCRIPTION
+usage: one\-optimize [\-h] [\-v] [\-V] [\-C CONFIG] [\-p]
+.br
+[\-\-change_outputs CHANGE_OUTPUTS] [\-i INPUT_PATH]
+.br
+[\-o OUTPUT_PATH] [\-\-O1] [\-\-convert_nchw_to_nhwc]
+.br
+[\-\-nchw_to_nhwc_input_shape] [\-\-nchw_to_nhwc_output_shape]
+.br
+[\-\-fold_add_v2] [\-\-fold_cast] [\-\-fold_dequantize]
+.br
+[\-\-fold_sparse_to_dense] [\-\-forward_reshape_to_unaryop]
+.br
+[\-\-fuse_add_with_tconv] [\-\-fuse_batchnorm_with_conv]
+.br
+[\-\-fuse_batchnorm_with_dwconv]
+.br
+[\-\-fuse_batchnorm_with_tconv] [\-\-fuse_bcq]
+.br
+[\-\-fuse_preactivation_batchnorm]
+.br
+[\-\-make_batchnorm_gamma_positive]
+.br
+[\-\-fuse_activation_function] [\-\-fuse_instnorm]
+.br
+[\-\-replace_cw_mul_add_with_depthwise_conv]
+.br
+[\-\-remove_fakequant] [\-\-remove_quantdequant]
+.br
+[\-\-remove_redundant_reshape]
+.br
+[\-\-remove_redundant_transpose]
+.br
+[\-\-remove_unnecessary_reshape]
+.br
+[\-\-remove_unnecessary_slice]
+.br
+[\-\-remove_unnecessary_strided_slice]
+.br
+[\-\-remove_unnecessary_split] [\-\-resolve_customop_add]
+.br
+[\-\-resolve_customop_batchmatmul]
+.br
+[\-\-resolve_customop_matmul]
+.br
+[\-\-shuffle_weight_to_16x1float32]
+.br
+[\-\-substitute_pack_to_reshape]
+.br
+[\-\-substitute_squeeze_to_reshape]
+.br
+[\-\-substitute_transpose_to_reshape]
+.br
+[\-\-transform_min_max_to_relu6]
+.br
+[\-\-transform_min_relu_to_relu6]
+.PP
+\fBone\-optimize\fR is a command line tool to optimize circle model.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.SS "arguments for utility:"
+.TP
+\fB\-p\fR, \fB\-\-generate_profile_data\fR
+generate profiling data
+.TP
+\fB\-\-change_outputs\fR CHANGE_OUTPUTS
+Experimental: Change first subgraph output nodes to
+CSV names
+.SS "arguments for optimization:"
+.TP
+\fB\-i\fR INPUT_PATH, \fB\-\-input_path\fR INPUT_PATH
+full filepath of the input file
+.TP
+\fB\-o\fR OUTPUT_PATH, \fB\-\-output_path\fR OUTPUT_PATH
+full filepath of the output file
+.TP
+\fB\-\-O1\fR
+enable O1 optimization pass
+.TP
+\fB\-\-convert_nchw_to_nhwc\fR
+Experimental: This will convert NCHW operators to NHWC
+under the assumption that input model is NCHW.
+.TP
+\fB\-\-nchw_to_nhwc_input_shape\fR
+convert the input shape of the model (argument for
+convert_nchw_to_nhwc)
+.TP
+\fB\-\-nchw_to_nhwc_output_shape\fR
+convert the output shape of the model (argument for
+convert_nchw_to_nhwc)
+.TP
+\fB\-\-fold_add_v2\fR
+fold AddV2 op with constant inputs
+.TP
+\fB\-\-fold_cast\fR
+fold Cast op with constant input
+.TP
+\fB\-\-fold_dequantize\fR
+fold Dequantize op
+.TP
+\fB\-\-fold_sparse_to_dense\fR
+fold SparseToDense op
+.TP
+\fB\-\-forward_reshape_to_unaryop\fR
+Forward Reshape op
+.TP
+\fB\-\-fuse_add_with_tconv\fR
+fuse Add op to Transposed Convolution op
+.TP
+\fB\-\-fuse_batchnorm_with_conv\fR
+fuse BatchNorm op to Convolution op
+.TP
+\fB\-\-fuse_batchnorm_with_dwconv\fR
+fuse BatchNorm op to Depthwise Convolution op
+.TP
+\fB\-\-fuse_batchnorm_with_tconv\fR
+fuse BatchNorm op to Transposed Convolution op
+.TP
+\fB\-\-fuse_bcq\fR
+apply Binary Coded Quantization
+.TP
+\fB\-\-fuse_preactivation_batchnorm\fR
+fuse BatchNorm operators of pre\-activations to
+Convolution op
+.TP
+\fB\-\-make_batchnorm_gamma_positive\fR
+make negative gamma of BatchNorm to a small positive
+value (1e\-10). Note that this pass can change the
+execution result of the model. So, use it only when
+the impact is known to be acceptable.
+.TP
+\fB\-\-fuse_activation_function\fR
+fuse Activation function to a preceding operator
+.TP
+\fB\-\-fuse_instnorm\fR
+fuse ops to InstanceNorm operator
+.TP
+\fB\-\-replace_cw_mul_add_with_depthwise_conv\fR
+replace channel\-wise Mul/Add with DepthwiseConv2D
+.TP
+\fB\-\-remove_fakequant\fR
+remove FakeQuant ops
+.TP
+\fB\-\-remove_quantdequant\fR
+remove Quantize\-Dequantize sequence
+.TP
+\fB\-\-remove_redundant_reshape\fR
+fuse or remove subsequent Reshape ops
+.TP
+\fB\-\-remove_redundant_transpose\fR
+fuse or remove subsequent Transpose ops
+.TP
+\fB\-\-remove_unnecessary_reshape\fR
+remove unnecessary reshape ops
+.TP
+\fB\-\-remove_unnecessary_slice\fR
+remove unnecessary slice ops
+.TP
+\fB\-\-remove_unnecessary_strided_slice\fR
+remove unnecessary strided slice ops
+.TP
+\fB\-\-remove_unnecessary_split\fR
+remove unnecessary split ops
+.TP
+\fB\-\-resolve_customop_add\fR
+convert Custom(Add) op to Add op
+.TP
+\fB\-\-resolve_customop_batchmatmul\fR
+convert Custom(BatchMatmul) op to BatchMatmul op
+.TP
+\fB\-\-resolve_customop_matmul\fR
+convert Custom(Matmul) op to Matmul op
+.TP
+\fB\-\-shuffle_weight_to_16x1float32\fR
+convert weight format of FullyConnected op to
+SHUFFLED16x1FLOAT32. Note that it only converts
+weights whose row is a multiple of 16
+.TP
+\fB\-\-substitute_pack_to_reshape\fR
+convert single input Pack op to Reshape op
+.TP
+\fB\-\-substitute_squeeze_to_reshape\fR
+convert certain condition Squeeze to Reshape
+.TP
+\fB\-\-substitute_transpose_to_reshape\fR
+convert certain condition Transpose to Reshape
+.TP
+\fB\-\-transform_min_max_to_relu6\fR
+transform Minimum\-Maximum pattern to Relu6 op
+.TP
+\fB\-\-transform_min_relu_to_relu6\fR
+transform Minimum(6)\-Relu pattern to Relu6 op
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-optimize
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-optimize
+programs are properly installed at your site, the command
+.IP
+.B info one-optimize
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-pack.1 b/infra/debian/compiler/docs/one-pack.1
new file mode 100644
index 000000000..dd0422146
--- /dev/null
+++ b/infra/debian/compiler/docs/one-pack.1
@@ -0,0 +1,42 @@
+.TH ONE-PACK "1" "August 2021" "one-pack version 1.17.0" "User Commands"
+.SH NAME
+one-pack \- package circle and metadata into nnpackage
+.SH DESCRIPTION
+usage: one\-pack [\-h] [\-v] [\-V] [\-C CONFIG] [\-i INPUT_PATH] [\-o OUTPUT_PATH]
+.PP
+\fBone\-pack\fR is a command line tool to package circle and metadata into nnpackage.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-i\fR INPUT_PATH, \fB\-\-input_path\fR INPUT_PATH
+full filepath of the input file
+.TP
+\fB\-o\fR OUTPUT_PATH, \fB\-\-output_path\fR OUTPUT_PATH
+full filepath of the output file
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-pack
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-pack
+programs are properly installed at your site, the command
+.IP
+.B info one-pack
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-partition.1 b/infra/debian/compiler/docs/one-partition.1
new file mode 100644
index 000000000..5b6fe933d
--- /dev/null
+++ b/infra/debian/compiler/docs/one-partition.1
@@ -0,0 +1,56 @@
+.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.47.6.
+.TH ONE-PARTITION "1" "June 2022" "one-partition version 1.21.0" "User Commands"
+.SH NAME
+one-partition \- manual page for one-partition version 1.21.0
+.SH DESCRIPTION
+usage: one\-partition [\-h] [\-v] [\-V] [\-C CONFIG] [\-\-backends BACKENDS]
+.TP
+[\-\-default DEFAULT] [\-\-part_file PART_FILE]
+[\-\-input_file INPUT_FILE] [\-\-work_path WORK_PATH]
+.PP
+command line tool to partition circle model by multiple backends
+.SS "optional arguments:"
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-\-backends\fR BACKENDS
+backends in CSV to use for partitioning
+.TP
+\fB\-\-default\fR DEFAULT
+default backend to assign
+.TP
+\fB\-\-part_file\fR PART_FILE
+partition file which provides backend to assign
+.TP
+\fB\-\-input_file\fR INPUT_FILE
+input circle model filename
+.TP
+\fB\-\-work_path\fR WORK_PATH
+work path of partition, input files exist and output
+files are produced
+.SH COPYRIGHT
+Copyright \(co 2020\-2022 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-partition
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-partition
+programs are properly installed at your site, the command
+.IP
+.B info one-partition
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-profile.1 b/infra/debian/compiler/docs/one-profile.1
new file mode 100644
index 000000000..3952c4484
--- /dev/null
+++ b/infra/debian/compiler/docs/one-profile.1
@@ -0,0 +1,39 @@
+.TH ONE-PROFILE "1" "August 2021" "one-profile version 1.17.0" "User Commands"
+.SH NAME
+one-profile \- profile backend model file
+.SH DESCRIPTION
+usage: one\-profile [\-h] [\-v] [\-V] [\-C CONFIG] [\-b BACKEND] [\-\-] [COMMANDS FOR BACKEND]
+.PP
+\fBone\-profile\fR is a command line tool for profiling backend model.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-b\fR BACKEND, \fB\-\-backend\fR BACKEND
+backend name to use
+.SH COPYRIGHT
+Copyright \(co 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-profile
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-profile
+programs are properly installed at your site, the command
+.IP
+.B info one-profile
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-quantize.1 b/infra/debian/compiler/docs/one-quantize.1
new file mode 100644
index 000000000..43c4c0321
--- /dev/null
+++ b/infra/debian/compiler/docs/one-quantize.1
@@ -0,0 +1,83 @@
+.TH ONE-QUANTIZE "1" "August 2021" "one-quantize version 1.17.0" "User Commands"
+.SH NAME
+one-quantize \- quantize circle model
+.SH DESCRIPTION
+usage: one\-quantize [\-h] [\-v] [\-V] [\-C CONFIG] [\-i INPUT_PATH] [\-d INPUT_DATA]
+.br
+[\-f INPUT_DATA_FORMAT] [\-o OUTPUT_PATH] [\-p]
+.br
+[\-\-input_dtype INPUT_DTYPE]
+.br
+[\-\-quantized_dtype QUANTIZED_DTYPE]
+.br
+[\-\-granularity GRANULARITY]
+.br
+[\-\-min_percentile MIN_PERCENTILE]
+.br
+[\-\-max_percentile MAX_PERCENTILE] [\-\-mode MODE]
+.PP
+\fBone\-quantize\fR is a command line tool to quantize circle model.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-i\fR INPUT_PATH, \fB\-\-input_path\fR INPUT_PATH
+full filepath of the input file
+.TP
+\fB\-d\fR INPUT_DATA, \fB\-\-input_data\fR INPUT_DATA
+full filepath of the input data file. if not
+specified, run with random input data.
+.TP
+\fB\-o\fR OUTPUT_PATH, \fB\-\-output_path\fR OUTPUT_PATH
+full filepath of the output file
+.TP
+\fB\-p\fR, \fB\-\-generate_profile_data\fR
+generate profiling data
+.SS "arguments for quantization:"
+.TP
+\fB\-\-input_dtype\fR INPUT_DTYPE
+input data type (supported: float32, default=float32)
+.TP
+\fB\-\-quantized_dtype\fR QUANTIZED_DTYPE
+output quantized data type (supported: uint8, int16,
+default=uint8)
+.TP
+\fB\-\-granularity\fR GRANULARITY
+quantize granularity (supported: layer, channel,
+default=layer)
+.TP
+\fB\-\-min_percentile\fR MIN_PERCENTILE
+minimum percentile (0.0~100.0, default=1.0)
+.TP
+\fB\-\-max_percentile\fR MAX_PERCENTILE
+maximum percentile (0.0~100.0, default=99.0)
+.TP
+\fB\-\-mode\fR MODE
+record mode (supported: percentile/moving_average,
+default=percentile)
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-quantize
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-quantize
+programs are properly installed at your site, the command
+.IP
+.B info one-quantize
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/onecc.1 b/infra/debian/compiler/docs/onecc.1
new file mode 100644
index 000000000..352b30a00
--- /dev/null
+++ b/infra/debian/compiler/docs/onecc.1
@@ -0,0 +1,170 @@
+.\" Manpage for onecc.
+.\" Contact nnfw@samsung.com to correct errors or typos.
+.TH ONECC "1" "August 2021" "onecc version 1.17.0" "User Commands"
+.SH NAME
+onecc \- run ONE driver via several commands or configuration file
+.SH SYNOPSIS
+\fBonecc\fR [\-h] [\-v] [\-C CONFIG] [COMMAND <args>]
+.SH DESCRIPTION
+\fBonecc\fR is a command line tool to execute ONE driver via several commands or configuration file.
+.SS "Configuration file:"
+\fBonecc\fR takes input as a configuration file that supports ini format.
+A configuration file consists of sections, each led by a [section] header.
+Each section is the ONE driver you want to run, and consists of commands in a key/value combination to pass to the driver.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.SS compile to circle model
+.TP
+\fBimport\fR
+Convert given model to circle. See one\-import(1) for details.
+.TP
+\fBoptimize\fR
+Optimize circle model. See one-optimize(1) for details.
+.TP
+\fBquantize\fR
+Quantize circle model. See one-quantize(1) for details.
+.SS package circle model
+.TP
+\fBpack\fR
+Package circle and metadata into nnpackage. See one-pack(1) for details.
+.SS run backend tools
+.TP
+\fBcodegen\fR
+Code generation tool. See one-codegen(1) for details.
+.TP
+\fBprofile\fR
+Profile backend model file. See one-profile(1) for details.
+.SH EXAMPLES
+.SS Use command line interface
+.TP
+\fBonecc import tf --v1 -i\fR \fIinput_path\fR \fB-o\fR \fIoutput_path\fR \fB-I\fR \fIinput_arrays\fR \fB-s\fR \fIinput_shapes\fR \fB-O\fR \fIoutput_arrays\fR
+import tf model
+.TP
+\fBonecc import tflite -i\fR \fIinput_path\fR \fB-o\fR \fIoutput_path\fR
+import tflite model
+.TP
+\fBonecc import onnx -i\fR \fIinput_path\fR \fB-o\fR \fIoutput_path\fR
+import onnx model
+.TP
+\fBonecc optimize -i\fR \fIinput_path\fR \fB-o\fR \fIoutput_path\fR \fIoptimize_arguments\fR
+optimize circle model
+.TP
+\fBonecc quantize -i\fR \fIinput_path\fR \fB-o\fR \fIoutput_path\fR \fB-d\fR \fIinput_data\fR
+quantize circle model
+.TP
+\fBonecc pack -i\fR \fIinput_path\fR \fB-o\fR \fIoutput_path\fR
+package circle and metadata into nnpackage
+.TP
+\fBonecc codegen -b\fR \fIbackend\fR \fB--\fR \fIbackends_arguments\fR
+generate backend code
+.TP
+\fBonecc profile -b\fR \fIbackend\fR \fB--\fR \fIbackends_arguments\fR
+profile backend model
+.PP
+.SS Use configuration file
+.PP
+The configuration file should be written in the following format:
+.IP
+[onecc]
+.br
+one-import-tf=True
+.br
+one-import-tflite=False
+.br
+one-import-bcq=False
+.br
+one-import-onnx=False
+.br
+one-optimize=True
+.br
+one-quantize=True
+.br
+one-pack=True
+.br
+one-codegen=True
+.br
+one-profile=True
+.IP
+[one-import-tf]
+.br
+input_path=/path/to/inception_v3.pb
+.br
+output_path=inception_v3.circle
+.br
+input_arrays=input
+.br
+input_shapes=1,299,299,3
+.br
+output_arrays=InceptionV3/Predictions/Reshape_1
+.br
+converter_version=v1
+.br
+model_format=graph_def
+.IP
+[one-optimize]
+.br
+input_path=inception_v3.circle
+.br
+output_path=inception_v3.opt.circle
+.br
+generate_profile_data=False
+.IP
+[one-quantize]
+.br
+input_path=inception_v3.opt.circle
+.br
+output_path=inception_v3.quantized.circle
+.br
+input_data=inception_v3_test_data.h5
+.IP
+[one-pack]
+.br
+input_path=inception_v3.quantized.circle
+.br
+output_path=inception_v3_pack
+.IP
+[one-codegen]
+.br
+backend=dummy
+.br
+command=-o sample.out inception_v3.quantized.circle
+.IP
+[one-profile]
+.br
+backend=dummy
+.br
+command=sample.out
+.TP
+\fBonecc -C\fR \fIconfiguration file\fR
+Run ONE driver according to configuration section parameter
+.PP
+\fBonecc\fR section decides whether to use each driver or not.
+If the value is False, even if the corresponding section exists, the driver won't be executed.
+.SH COPYRIGHT
+Copyright \(co 2020\-2021 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B onecc
+is maintained as a Texinfo manual. If the
+.B info
+and
+.B onecc
+programs are properly installed at your site, the command
+.IP
+.B info onecc
+.PP
+should give you access to the complete manual.
+
diff --git a/infra/debian/compiler/one-compiler-dev.install b/infra/debian/compiler/one-compiler-dev.install
new file mode 100644
index 000000000..47f53ad20
--- /dev/null
+++ b/infra/debian/compiler/one-compiler-dev.install
@@ -0,0 +1,10 @@
+# {FILES_TO_INSTALL} {DEST_DIR}
+# bin
+usr/bin/circledump usr/share/one/bin/
+usr/bin/circle-opselector usr/share/one/bin/
+usr/bin/circle-tensordump usr/share/one/bin/
+usr/bin/tflchef usr/share/one/bin/
+usr/bin/tflchef-file usr/share/one/bin/
+usr/bin/tflchef-reverse usr/share/one/bin/
+# include
+usr/include/* usr/share/one/include/
diff --git a/infra/debian/compiler/one-compiler-dev.links b/infra/debian/compiler/one-compiler-dev.links
new file mode 100644
index 000000000..89a654db9
--- /dev/null
+++ b/infra/debian/compiler/one-compiler-dev.links
@@ -0,0 +1,6 @@
+# bin
+usr/share/one/bin/circledump usr/bin/circledump
+usr/share/one/bin/circle-tensordump usr/bin/circle-tensordump
+usr/share/one/bin/tflchef usr/bin/tflchef
+usr/share/one/bin/tflchef-file usr/bin/tflchef-file
+usr/share/one/bin/tflchef-reverse usr/bin/tflchef-reverse
diff --git a/infra/debian/compiler/one-compiler-test.install b/infra/debian/compiler/one-compiler-test.install
new file mode 100644
index 000000000..fb9714da0
--- /dev/null
+++ b/infra/debian/compiler/one-compiler-test.install
@@ -0,0 +1,5 @@
+# {FILES_TO_INSTALL} {DEST_DIR}
+# bin
+usr/bin/luci_eval_driver usr/share/one/bin/
+# test
+usr/test/* usr/share/one/test/
diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install
new file mode 100644
index 000000000..700cc2d0a
--- /dev/null
+++ b/infra/debian/compiler/one-compiler.install
@@ -0,0 +1,61 @@
+# {FILES_TO_INSTALL} {DEST_DIR}
+# bin
+usr/bin/circle2circle usr/share/one/bin/
+usr/bin/circle-eval-diff usr/share/one/bin/
+usr/bin/circle-interpreter usr/share/one/bin/
+usr/bin/circle-mpqsolver usr/share/one/bin/
+usr/bin/circle-operator usr/share/one/bin/
+usr/bin/circle-partitioner usr/share/one/bin/
+usr/bin/circle-quantizer usr/share/one/bin/
+usr/bin/dalgona usr/share/one/bin/
+usr/bin/generate_bcq_metadata.py usr/share/one/bin/
+usr/bin/generate_bcq_output_arrays.py usr/share/one/bin/
+usr/bin/model2nnpkg usr/share/one/bin/
+usr/bin/onecc usr/share/one/bin/
+usr/bin/onecc.template.cfg usr/share/one/bin/
+usr/bin/one-build usr/share/one/bin/
+usr/bin/one-build.template.cfg usr/share/one/bin/
+usr/bin/one-codegen usr/share/one/bin/
+usr/bin/one-import usr/share/one/bin/
+usr/bin/one-import-bcq usr/share/one/bin/
+usr/bin/one-import-onnx usr/share/one/bin/
+usr/bin/one-import-tf usr/share/one/bin/
+usr/bin/one-import-tflite usr/share/one/bin/
+usr/bin/one-infer usr/share/one/bin/
+usr/bin/one-optimize usr/share/one/bin/
+usr/bin/one-pack usr/share/one/bin/
+usr/bin/one-partition usr/share/one/bin/
+usr/bin/one-prepare-venv usr/share/one/bin/
+usr/bin/one-profile usr/share/one/bin/
+usr/bin/one-quantize usr/share/one/bin/
+usr/bin/one-version usr/share/one/bin/
+usr/bin/onelib/backends.py usr/share/one/bin/onelib/
+usr/bin/onelib/constant.py usr/share/one/bin/onelib/
+usr/bin/onelib/make_cmd.py usr/share/one/bin/onelib/
+usr/bin/onelib/CfgRunner.py usr/share/one/bin/onelib/
+usr/bin/onelib/OptionBuilder.py usr/share/one/bin/onelib/
+usr/bin/onelib/TopologicalSortHelper.py usr/share/one/bin/onelib/
+usr/bin/onelib/WorkflowRunner.py usr/share/one/bin/onelib/
+usr/bin/onelib/Command.py usr/share/one/bin/onelib/
+usr/bin/onelib/utils.py usr/share/one/bin/onelib/
+usr/bin/onelib/export_constant.py usr/share/one/bin/onelib/
+usr/bin/onnx_legalizer.py usr/share/one/bin/
+usr/bin/rawdata2hdf5 usr/share/one/bin/
+usr/bin/record-minmax usr/share/one/bin/
+usr/bin/tf2nnpkg usr/share/one/bin/
+usr/bin/tf2tfliteV2.py usr/share/one/bin/
+usr/bin/tflite2circle usr/share/one/bin/
+usr/bin/visq usr/share/one/bin/
+usr/bin/visqlib/DumpFakeQuantFM.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/DumpFP32FM.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/Palette.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/QErrorComputer.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/Util.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/DotBuilder.py usr/share/one/bin/visqlib/
+usr/bin/circle/*.py usr/share/one/bin/circle/
+# lib
+usr/lib/* usr/share/one/lib/
+# doc
+usr/doc/* usr/share/one/doc/
+# optimization
+usr/optimization/* usr/share/one/optimization/
diff --git a/infra/debian/compiler/one-compiler.links b/infra/debian/compiler/one-compiler.links
new file mode 100644
index 000000000..9e464352a
--- /dev/null
+++ b/infra/debian/compiler/one-compiler.links
@@ -0,0 +1,17 @@
+# bin
+usr/share/one/bin/one-build usr/bin/one-build
+usr/share/one/bin/onecc usr/bin/onecc
+# lib
+usr/share/one/lib/libloco.so usr/lib/libloco.so
+usr/share/one/lib/libluci_env.so usr/lib/libluci_env.so
+usr/share/one/lib/libluci_export.so usr/lib/libluci_export.so
+usr/share/one/lib/libluci_import.so usr/lib/libluci_import.so
+usr/share/one/lib/libluci_interpreter.so usr/lib/libluci_interpreter.so
+usr/share/one/lib/libluci_lang.so usr/lib/libluci_lang.so
+usr/share/one/lib/libluci_logex.so usr/lib/libluci_logex.so
+usr/share/one/lib/libluci_log.so usr/lib/libluci_log.so
+usr/share/one/lib/libluci_partition.so usr/lib/libluci_partition.so
+usr/share/one/lib/libluci_pass.so usr/lib/libluci_pass.so
+usr/share/one/lib/libluci_profile.so usr/lib/libluci_profile.so
+usr/share/one/lib/libluci_plan.so usr/lib/libluci_plan.so
+usr/share/one/lib/libluci_service.so usr/lib/libluci_service.so
diff --git a/infra/debian/compiler/one-compiler.manpages b/infra/debian/compiler/one-compiler.manpages
new file mode 100644
index 000000000..e0284ae4e
--- /dev/null
+++ b/infra/debian/compiler/one-compiler.manpages
@@ -0,0 +1,14 @@
+debian/docs/one-build.1
+debian/docs/one-codegen.1
+debian/docs/one-infer.1
+debian/docs/one-import.1
+debian/docs/one-import-bcq.1
+debian/docs/one-import-onnx.1
+debian/docs/one-import-tf.1
+debian/docs/one-import-tflite.1
+debian/docs/one-optimize.1
+debian/docs/one-pack.1
+debian/docs/one-partition.1
+debian/docs/one-profile.1
+debian/docs/one-quantize.1
+debian/docs/onecc.1
diff --git a/infra/debian/compiler/postinst b/infra/debian/compiler/postinst
new file mode 100644
index 000000000..d84e8e042
--- /dev/null
+++ b/infra/debian/compiler/postinst
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# https://www.debian.org/doc/debian-policy/ch-maintainerscripts.html
+# Boradly speaking, the `postinst` is called after a package is unpacked.
+
+set -e
+
+# This script is invoked as root except environmental variables,
+# which causes invalid permission problem.
+# e.g. When `pip` installs user packages, it proceeds based on $HOME.
+# To proper installation, $HOME should be root.
+su - $(whoami) -p -c '/usr/share/one/bin/one-prepare-venv' # $(whoami) = root
diff --git a/infra/debian/compiler/postrm b/infra/debian/compiler/postrm
new file mode 100644
index 000000000..2972f28db
--- /dev/null
+++ b/infra/debian/compiler/postrm
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -e
+
+case "$1" in
+  remove|purge)
+    rm -rf /usr/share/one/
+  ;;
+  upgrade)
+    # DO NOTHING
+  ;;
+  failed-upgrade|abort-install|abort-upgrade)
+    # DO NOTHING
+  ;;
+  *)
+    # DO NOTHING
+  ;;
+esac
diff --git a/infra/debian/compiler/rules b/infra/debian/compiler/rules
new file mode 100755
index 000000000..e83680da8
--- /dev/null
+++ b/infra/debian/compiler/rules
@@ -0,0 +1,19 @@
+#!/usr/bin/make -f
+export DH_VERBOSE = 1
+export NNAS_BUILD_PREFIX = build
+export PRESET = 20230413
+export _DESTDIR = debian/tmp/usr
+
+%:
+	dh $@
+
+override_dh_auto_build:
+	./nnas create-package --preset $(PRESET) --prefix "$(_DESTDIR)"
+
+override_dh_auto_install:
+	cmake --build "$(NNAS_BUILD_PREFIX)/nncc" -- install
+
+override_dh_install:
+	install -T -m 755 -D "infra/packaging/res/tf2nnpkg.${PRESET}" "$(_DESTDIR)/bin/tf2nnpkg"
+	dh_install
+
diff --git a/infra/debian/compiler/source/format b/infra/debian/compiler/source/format
new file mode 100644
index 000000000..89ae9db8f
--- /dev/null
+++ b/infra/debian/compiler/source/format
@@ -0,0 +1 @@
+3.0 (native)
diff --git a/infra/debian/compiler/source/local-options b/infra/debian/compiler/source/local-options
new file mode 100644
index 000000000..296a73032
--- /dev/null
+++ b/infra/debian/compiler/source/local-options
@@ -0,0 +1,2 @@
+# This is for reproducible building. Otherwise, `debuild` recognizes build artifacts as source files.
+diff-ignore="build|externals"
diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog
new file mode 100644
index 000000000..e07c50c21
--- /dev/null
+++ b/infra/debian/runtime/changelog
@@ -0,0 +1,38 @@
+one (1.21.0) bionic; urgency=low
+
+  * Runtime supports to run nnpackage with two models
+  * Conv2D and Depthwise Conv2D supports per-channel quantization of uint8 type.
+  * TRIX backend supports batch execution which run in parallel with multicore
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Tue, 06 Sep 2022 12:00:00 +0900
+
+one (1.20.0) bionic; urgency=low
+
+  * Introduce TRIX backend
+  * API supports new data type NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Wed, 26 Apr 2022 12:00:00 +0900
+
+one (1.19.0) bionic; urgency=low
+
+  * Synch up version with ONE Compiler
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Wed, 10 Nov 2021 14:23:00 +0900
+
+one (1.18.0) bionic; urgency=low
+
+  * Synch up version with ONE Compiler
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Fri, 15 Oct 2021 15:23:00 +0900
+
+one (1.17.0) bionic; urgency=low
+
+  * New gpu_gl backend supports the following operations : Add, Convolution, Depthwise Convolution, Pooling, Reshape, Relu, Softmax
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Fri, 20 Aug 2021 17:00:00 +0900
+
+one (1.16.0) bionic; urgency=low
+
+  * Initial release.
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Mon, 05 Jul 2021 17:11:00 +0900
diff --git a/infra/debian/runtime/compat b/infra/debian/runtime/compat
new file mode 100644
index 000000000..ec635144f
--- /dev/null
+++ b/infra/debian/runtime/compat
@@ -0,0 +1 @@
+9
diff --git a/infra/debian/runtime/control b/infra/debian/runtime/control
new file mode 100644
index 000000000..20543baee
--- /dev/null
+++ b/infra/debian/runtime/control
@@ -0,0 +1,19 @@
+Source: one
+Section: devel
+Priority: extra
+Maintainer: Neural Network Acceleration Solution Developers <nnfw@samsung.com>
+Build-Depends: cmake, debhelper (>=9), dh-python, python3-all
+Standards-Version: 3.9.8
+Homepage: https://github.com/Samsung/ONE
+
+Package: nnfw
+Architecture: amd64
+Multi-Arch: same
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: one-runtime package
+
+Package: nnfw-dev
+Architecture: amd64
+Multi-Arch: same
+Depends: nnfw, ${shlibs:Depends}, ${misc:Depends}
+Description: one-runtime development package
diff --git a/infra/debian/runtime/copyright b/infra/debian/runtime/copyright
new file mode 100644
index 000000000..bb64695a4
--- /dev/null
+++ b/infra/debian/runtime/copyright
@@ -0,0 +1,3 @@
+Files: *
+License: Proprietary
+Copyright (c) <2018> <Samsung Electronics Co.,Ltd.>
diff --git a/infra/debian/runtime/nnfw-dev.install b/infra/debian/runtime/nnfw-dev.install
new file mode 100644
index 000000000..f246e7c24
--- /dev/null
+++ b/infra/debian/runtime/nnfw-dev.install
@@ -0,0 +1,4 @@
+# {FILES_TO_INSTALL} {DEST_DIR}
+# include
+usr/include/nnfw usr/include/
+usr/lib/pkgconfig/*.pc usr/lib/pkgconfig/
diff --git a/infra/debian/runtime/nnfw.install b/infra/debian/runtime/nnfw.install
new file mode 100644
index 000000000..44be07c9c
--- /dev/null
+++ b/infra/debian/runtime/nnfw.install
@@ -0,0 +1,3 @@
+# {FILES_TO_INSTALL} {DEST_DIR}
+# lib
+usr/lib/*.so usr/lib/
diff --git a/infra/debian/runtime/rules b/infra/debian/runtime/rules
new file mode 100755
index 000000000..a228196e9
--- /dev/null
+++ b/infra/debian/runtime/rules
@@ -0,0 +1,22 @@
+#!/usr/bin/make -f
+DEBVER := $(shell dpkg-parsechangelog -SVersion)
+export DH_VERBOSE = 1
+export _DESTDIR = debian/tmp/
+export BUILD_TYPE=release
+export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_ONERT_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_8_0=0 -DBUILD_TENSORFLOW_LITE=0
+export DEBIAN_BUILD=1
+export INSTALL_PATH=debian/tmp/usr/
+%:
+	dh $@
+
+override_dh_auto_build:
+	make -f Makefile.template
+override_dh_auto_install:
+	make -f Makefile.template install
+override_dh_install:
+	install -d debian/tmp/usr/lib/pkgconfig
+	sed -i 's:@libdir@:\/usr\/lib:g' ./packaging/nnfw.pc.in
+	sed -i 's:@includedir@:\/usr\/include:g' ./packaging/nnfw.pc.in
+	sed -i 's:@version@:${DEBVER}:g' ./packaging/nnfw.pc.in
+	install -m 0644 packaging/nnfw.pc.in -T debian/tmp/usr/lib/pkgconfig/nnfw.pc
+	dh_install
diff --git a/infra/debian/runtime/source/format b/infra/debian/runtime/source/format
new file mode 100644
index 000000000..89ae9db8f
--- /dev/null
+++ b/infra/debian/runtime/source/format
@@ -0,0 +1 @@
+3.0 (native)
diff --git a/infra/debian/runtime/source/local-options b/infra/debian/runtime/source/local-options
new file mode 100644
index 000000000..296a73032
--- /dev/null
+++ b/infra/debian/runtime/source/local-options
@@ -0,0 +1,2 @@
+# This is for reproducible building. Otherwise, `debuild` recognizes build artifacts as source files.
+diff-ignore="build|externals"
diff --git a/infra/docker/Dockerfile b/infra/docker/Dockerfile
deleted file mode 100644
index 052cc4fb6..000000000
--- a/infra/docker/Dockerfile
+++ /dev/null
@@ -1,66 +0,0 @@
-FROM ubuntu:16.04
-
-ARG UBUNTU_MIRROR
-
-RUN if [ -n "$http_proxy" ] ; then echo "Acquire::http::proxy \"${http_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$https_proxy" ] ; then echo "Acquire::https::proxy \"${https_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$UBUNTU_MIRROR" ] ; then sed "s/archive.ubuntu.com/${UBUNTU_MIRROR}/g" -i /etc/apt/sources.list ; fi
-
-# Install 'add-apt-repository'
-RUN apt-get update && apt-get -qqy install software-properties-common
-
-# Build tool
-RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov
-
-# Install extra dependencies (Caffe, nnkit)
-RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
-
-# Install protocol buffer
-RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
-
-# Additonal tools
-RUN apt-get update && apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
-RUN pip3 install --upgrade pip
-RUN pip3 install yapf==0.22.0 numpy
-
-# Install google test (source)
-RUN apt-get update && apt-get -qqy install libgtest-dev
-
-###
-### NOTE: Don't add new package install using apt-get or pip below this line
-###
-
-# Install native build tool gcc version 6.x
-RUN add-apt-repository ppa:ubuntu-toolchain-r/test && apt-get update && apt-get -qqy install gcc-6 g++-6
-RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 --slave /usr/bin/g++ g++ /usr/bin/g++-6 && update-alternatives --config gcc
-
-# Install cross build tool gcc version 6.x
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.3-2017.02/arm-linux-gnueabihf/gcc-linaro-6.3.1-2017.02-x86_64_arm-linux-gnueabihf.tar.xz -O gcc-hardfp.tar.xz -nv
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.2-2016.11/arm-linux-gnueabi/gcc-linaro-6.2.1-2016.11-x86_64_arm-linux-gnueabi.tar.xz -O gcc-softfp.tar.xz -nv
-RUN wget https://releases.linaro.org/components/toolchain/binaries/6.2-2016.11/aarch64-linux-gnu/gcc-linaro-6.2.1-2016.11-x86_64_aarch64-linux-gnu.tar.xz -O gcc-aarch64.tar.xz -nv
-RUN tar -xf gcc-hardfp.tar.xz -C /opt/ && rm -rf gcc-hardfp.tar.xz
-RUN tar -xf gcc-softfp.tar.xz -C /opt/ && rm -rf gcc-softfp.tar.xz
-RUN tar -xf gcc-aarch64.tar.xz -C /opt/ && rm -rf gcc-aarch64.tar.xz
-ENV PATH "/opt/gcc-linaro-6.2.1-2016.11-x86_64_arm-linux-gnueabi/bin:/opt/gcc-linaro-6.3.1-2017.02-x86_64_arm-linux-gnueabihf/bin:/opt/gcc-linaro-6.2.1-2016.11-x86_64_aarch64-linux-gnu/bin:$PATH"
-
-###
-### NOTE: Don't add build & install process using installed buildtool above this line
-###
-
-# Build and install google test static libraries
-WORKDIR /root/gtest
-RUN cmake /usr/src/gtest
-RUN make
-RUN mv *.a /usr/lib
-WORKDIR /root
-RUN rm -rf gtest
-
-# Install gbs & sdb
-RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubuntu_16.04/ /' | cat >> /etc/apt/sources.list
-RUN apt-get update && apt-get -qqy install gbs
-RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_3.1.4_ubuntu-64.zip -O sdb.zip
-RUN unzip -d tmp sdb.zip && rm sdb.zip
-RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp
-
-# Clean archives (to reduce image size)
-RUN apt-get clean -y
diff --git a/infra/docker/Dockerfile.1804 b/infra/docker/Dockerfile.1804
deleted file mode 100644
index cc31bba1f..000000000
--- a/infra/docker/Dockerfile.1804
+++ /dev/null
@@ -1,41 +0,0 @@
-FROM ubuntu:18.04
-
-ARG UBUNTU_MIRROR
-
-# Install 'add-apt-repository'
-RUN apt-get update && apt-get -qqy install software-properties-common
-
-# Build tool
-RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-arm-linux-gnueabihf g++-aarch64-linux-gnu
-
-# Install extra dependencies (Caffe, nnkit)
-RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
-
-# Install protocol buffer
-RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
-
-# Additonal tools
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
-RUN pip3 install --upgrade pip
-RUN pip3 install yapf==0.22.0 numpy
-
-# Install google test (source)
-RUN apt-get update && apt-get -qqy install libgtest-dev
-
-# Build and install google test static libraries
-WORKDIR /root/gtest
-RUN cmake /usr/src/gtest
-RUN make
-RUN mv *.a /usr/lib
-WORKDIR /root
-RUN rm -rf gtest
-
-# Install gbs & sdb
-RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubuntu_18.04/ /' | cat >> /etc/apt/sources.list
-RUN apt-get update && apt-get -qqy install gbs
-RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_3.1.4_ubuntu-64.zip -O sdb.zip
-RUN unzip -d tmp sdb.zip && rm sdb.zip
-RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp
-
-# Clean archives (to reduce image size)
-RUN apt-get clean -y
diff --git a/infra/docker/bionic/Dockerfile b/infra/docker/bionic/Dockerfile
new file mode 100644
index 000000000..383fddc2d
--- /dev/null
+++ b/infra/docker/bionic/Dockerfile
@@ -0,0 +1,145 @@
+# Copyright 2016-2020 Jing Li
+# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:18.04
+
+ARG UBUNTU_MIRROR
+
+# Install 'add-apt-repository'
+RUN apt-get update && apt-get -qqy install software-properties-common
+
+# Git repo for latest version (github checkout@v2 action requires v2.18)
+RUN add-apt-repository ppa:git-core/ppa -y
+
+# Build tool
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git g++-arm-linux-gnueabihf g++-aarch64-linux-gnu
+
+# ARM none eabi build tool
+RUN apt-get update && apt-get -qqy install gcc-arm-none-eabi
+
+# Debian build tool
+RUN apt-get update && apt-get -qqy install fakeroot devscripts debhelper python3-all
+
+# Install extra dependencies (Caffe, nnkit)
+RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
+
+# Install protocol buffer
+RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
+
+# Additonal tools
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive \
+    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN apt-get update && apt-get -qqy install python3.8 python3.8-venv python3.8-dev
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
+RUN python3.8 -m pip install --upgrade pip
+RUN python3.8 -m pip install numpy flatbuffers
+
+# Install google test (source)
+RUN apt-get update && apt-get -qqy install libgtest-dev
+
+# Install build tool gcc version 8.x and set alternative link (c++17 support)
+RUN apt-get update && apt-get -qqy install g++-8 g++-8-arm-linux-gnueabihf g++-8-aarch64-linux-gnu
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 80 \
+    --slave /usr/bin/g++ g++ /usr/bin/g++-8 \
+    --slave /usr/bin/gcov gcov /usr/bin/gcov-8
+RUN update-alternatives --install /usr/bin/arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc /usr/bin/arm-linux-gnueabihf-gcc-8 80 \
+    --slave /usr/bin/arm-linux-gnueabihf-g++ arm-linux-gnueabihf-g++ /usr/bin/arm-linux-gnueabihf-g++-8 \
+    --slave /usr/bin/arm-linux-gnueabihf-gcov arm-linux-gnueabihf-gcov /usr/bin/arm-linux-gnueabihf-gcov-8
+RUN update-alternatives --install /usr/bin/aarch64-linux-gnu-gcc aarch64-linux-gnu-gcc /usr/bin/aarch64-linux-gnu-gcc-8 80 \
+    --slave /usr/bin/aarch64-linux-gnu-g++ aarch64-linux-gnu-g++ /usr/bin/aarch64-linux-gnu-g++-8 \
+    --slave /usr/bin/aarch64-linux-gnu-gcov aarch64-linux-gnu-gcov /usr/bin/aarch64-linux-gnu-gcov-8
+
+# Install lcov 1.14-2 for gcc-8 support
+#   Default version lcov 1.13-3 can't support gcc-8
+#   lcov 1.13-4 with gcc-8 have bug: reports no coverage for class declaration
+WORKDIR /root/lcov
+RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/l/lcov/lcov_1.14-2_all.deb
+RUN apt-get update && apt-get -qqy install libperlio-gzip-perl libjson-perl
+RUN dpkg -i lcov_1.14-2_all.deb
+WORKDIR /root
+RUN rm -rf /root/lcov
+
+# Build and install google test static libraries
+WORKDIR /root/gtest
+RUN cmake /usr/src/gtest
+RUN make
+RUN mv *.a /usr/lib
+WORKDIR /root
+RUN rm -rf gtest
+
+# Install gbs & sdb
+RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubuntu_18.04/ /' | cat >> /etc/apt/sources.list
+RUN apt-get update && apt-get -qqy install gbs
+RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_3.1.4_ubuntu-64.zip -O sdb.zip
+RUN unzip -d tmp sdb.zip && rm sdb.zip
+RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
+
+# Install java
+RUN apt-get install -y --no-install-recommends openjdk-8-jdk
+
+# download and install Gradle
+# https://services.gradle.org/distributions/
+ARG GRADLE_VERSION=6.4.1
+ARG GRADLE_DIST=bin
+RUN cd /opt && \
+    wget -q https://services.gradle.org/distributions/gradle-${GRADLE_VERSION}-${GRADLE_DIST}.zip && \
+    unzip gradle*.zip && \
+    ls -d */ | sed 's/\/*$//g' | xargs -I{} mv {} gradle && \
+    rm gradle*.zip
+
+# download and install Android SDK
+# https://developer.android.com/studio#command-tools
+ARG ANDROID_SDK_VERSION=6514223
+ENV ANDROID_SDK_ROOT /opt/android-sdk
+RUN mkdir -p ${ANDROID_SDK_ROOT}/cmdline-tools && \
+    wget -q https://dl.google.com/android/repository/commandlinetools-linux-${ANDROID_SDK_VERSION}_latest.zip && \
+    unzip *tools*linux*.zip -d ${ANDROID_SDK_ROOT}/cmdline-tools && \
+    rm *tools*linux*.zip
+
+# accept the license agreements of the SDK components
+RUN mkdir -p ${ANDROID_SDK_ROOT}/licenses
+RUN echo 24333f8a63b6825ea9c5514f83c2829b004d1fee > ${ANDROID_SDK_ROOT}/licenses/android-sdk-license
+RUN echo d56f5187479451eabf01fb78af6dfcb131a6481e >> ${ANDROID_SDK_ROOT}/licenses/android-sdk-license
+
+# Env variable for gradle build
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
+ENV GRADLE_HOME /opt/gradle
+ENV PATH ${PATH}:${GRADLE_HOME}/bin:${ANDROID_SDK_ROOT}/cmdline-tools/tools/bin:${ANDROID_SDK_ROOT}/platform-tools
+ENV ANDROID_HOME ${ANDROID_SDK_ROOT}
+
+# Install NDK
+RUN sdkmanager --install "ndk;20.0.5594570"
+RUN sdkmanager "platform-tools"
+
+# Env for ko encoding build
+ENV LC_ALL "C.UTF-8"
+
+# setup adb server
+EXPOSE 5037
+
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+# Clean archives (to reduce image size)
+RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
diff --git a/infra/docker/bionic/Dockerfile.aarch64 b/infra/docker/bionic/Dockerfile.aarch64
new file mode 100644
index 000000000..08d712c96
--- /dev/null
+++ b/infra/docker/bionic/Dockerfile.aarch64
@@ -0,0 +1,92 @@
+# Copyright 2016-2020 Jing Li
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:18.04
+
+ARG UBUNTU_MIRROR
+
+# Install 'add-apt-repository'
+RUN apt-get update && apt-get -qqy install software-properties-common
+
+# Git repo for latest version (github checkout@v2 action requires v2.18)
+RUN add-apt-repository ppa:git-core/ppa -y
+
+# Build tool
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git g++-arm-linux-gnueabihf
+
+# ARM none eabi build tool
+RUN apt-get update && apt-get -qqy install gcc-arm-none-eabi
+
+# Debian build tool
+RUN apt-get update && apt-get -qqy install fakeroot devscripts debhelper python3-all
+
+# Install extra dependencies (Caffe, nnkit)
+RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
+
+# Install protocol buffer
+RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
+
+# Additonal tools
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive \
+    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN apt-get update && apt-get -qqy install python3.8 python3.8-venv python3.8-dev
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
+RUN python3.8 -m pip install --upgrade pip
+RUN python3.8 -m pip install numpy flatbuffers
+
+# Install google test (source)
+RUN apt-get update && apt-get -qqy install libgtest-dev
+
+# Install build tool gcc version 8.x and set alternative link (c++17 support)
+RUN apt-get update && apt-get -qqy install g++-8 g++-8-arm-linux-gnueabihf
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 80 \
+    --slave /usr/bin/g++ g++ /usr/bin/g++-8 \
+    --slave /usr/bin/gcov gcov /usr/bin/gcov-8
+RUN update-alternatives --install /usr/bin/arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc /usr/bin/arm-linux-gnueabihf-gcc-8 80 \
+    --slave /usr/bin/arm-linux-gnueabihf-g++ arm-linux-gnueabihf-g++ /usr/bin/arm-linux-gnueabihf-g++-8 \
+    --slave /usr/bin/arm-linux-gnueabihf-gcov arm-linux-gnueabihf-gcov /usr/bin/arm-linux-gnueabihf-gcov-8
+
+# Install lcov 1.14-2 for gcc-8 support
+#   Default version lcov 1.13-3 can't support gcc-8
+#   lcov 1.13-4 with gcc-8 have bug: reports no coverage for class declaration
+WORKDIR /root/lcov
+RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/l/lcov/lcov_1.14-2_all.deb
+RUN apt-get update && apt-get -qqy install libperlio-gzip-perl libjson-perl
+RUN dpkg -i lcov_1.14-2_all.deb
+WORKDIR /root
+RUN rm -rf /root/lcov
+
+# Build and install google test static libraries
+WORKDIR /root/gtest
+RUN cmake /usr/src/gtest
+RUN make
+RUN mv *.a /usr/lib
+WORKDIR /root
+RUN rm -rf gtest
+
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+# Clean archives (to reduce image size)
+RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
diff --git a/infra/docker/focal/Dockerfile b/infra/docker/focal/Dockerfile
new file mode 100644
index 000000000..0c6c582e9
--- /dev/null
+++ b/infra/docker/focal/Dockerfile
@@ -0,0 +1,108 @@
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:20.04
+
+ARG UBUNTU_MIRROR
+
+# Install 'add-apt-repository'
+RUN apt-get update && apt-get -qqy install software-properties-common
+
+# Build tool
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-arm-linux-gnueabihf g++-aarch64-linux-gnu
+
+# Debian build tool
+RUN apt-get update && apt-get -qqy install fakeroot devscripts debhelper python3-all dh-python
+
+# Install extra dependencies (Caffe, nnkit)
+RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
+
+# Install protocol buffer
+RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
+
+# Additonal tools
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive \
+    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
+
+# Install google test (source)
+RUN apt-get update && apt-get -qqy install libgtest-dev
+
+# Install gbs & sdb
+RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubuntu_20.04/ /' | cat >> /etc/apt/sources.list
+RUN apt-get update && apt-get -qqy install gbs
+RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_4.2.19_ubuntu-64.zip -O sdb.zip
+RUN unzip -d tmp sdb.zip && rm sdb.zip
+RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
+
+# ARM none eabi build tool
+RUN apt-get update && apt-get -qqy install gcc-arm-none-eabi
+
+# Install java
+RUN apt-get install -y --no-install-recommends openjdk-8-jdk
+
+# download and install Gradle
+# https://services.gradle.org/distributions/
+ARG GRADLE_VERSION=6.4.1
+ARG GRADLE_DIST=bin
+RUN cd /opt && \
+    wget -q https://services.gradle.org/distributions/gradle-${GRADLE_VERSION}-${GRADLE_DIST}.zip && \
+    unzip gradle*.zip && \
+    ls -d */ | sed 's/\/*$//g' | xargs -I{} mv {} gradle && \
+    rm gradle*.zip
+
+# download and install Android SDK
+# https://developer.android.com/studio#command-tools
+ARG ANDROID_SDK_VERSION=6514223
+ENV ANDROID_SDK_ROOT /opt/android-sdk
+RUN mkdir -p ${ANDROID_SDK_ROOT}/cmdline-tools && \
+    wget -q https://dl.google.com/android/repository/commandlinetools-linux-${ANDROID_SDK_VERSION}_latest.zip && \
+    unzip *tools*linux*.zip -d ${ANDROID_SDK_ROOT}/cmdline-tools && \
+    rm *tools*linux*.zip
+
+# accept the license agreements of the SDK components
+RUN mkdir -p ${ANDROID_SDK_ROOT}/licenses
+RUN echo 24333f8a63b6825ea9c5514f83c2829b004d1fee > ${ANDROID_SDK_ROOT}/licenses/android-sdk-license
+RUN echo d56f5187479451eabf01fb78af6dfcb131a6481e >> ${ANDROID_SDK_ROOT}/licenses/android-sdk-license
+
+# Env variable for gradle build
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
+ENV GRADLE_HOME /opt/gradle
+ENV PATH ${PATH}:${GRADLE_HOME}/bin:${ANDROID_SDK_ROOT}/cmdline-tools/tools/bin:${ANDROID_SDK_ROOT}/platform-tools
+ENV ANDROID_HOME ${ANDROID_SDK_ROOT}
+
+# Install NDK
+RUN sdkmanager --install "ndk;20.0.5594570"
+RUN sdkmanager "platform-tools"
+
+# Env for ko encoding build
+ENV LC_ALL "C.UTF-8"
+
+# setup adb server
+EXPOSE 5037
+
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+# Clean archives (to reduce image size)
+RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
diff --git a/infra/docker/focal/Dockerfile.aarch64 b/infra/docker/focal/Dockerfile.aarch64
new file mode 100644
index 000000000..b63bbb10f
--- /dev/null
+++ b/infra/docker/focal/Dockerfile.aarch64
@@ -0,0 +1,62 @@
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:20.04
+
+ARG UBUNTU_MIRROR
+
+# Install 'add-apt-repository'
+RUN apt-get update && apt-get -qqy install software-properties-common
+
+# Build tool
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-arm-linux-gnueabihf
+
+# Debian build tool
+RUN apt-get update && apt-get -qqy install fakeroot devscripts debhelper python3-all dh-python
+
+# Install extra dependencies (Caffe, nnkit)
+RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
+
+# Install protocol buffer
+RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
+
+# Additonal tools
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive \
+    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
+
+# Install google test (source)
+RUN apt-get update && apt-get -qqy install libgtest-dev
+
+# Install gbs & sdb
+RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubuntu_20.04/ /' | cat >> /etc/apt/sources.list
+RUN apt-get update && apt-get -qqy install gbs
+RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_4.2.19_ubuntu-64.zip -O sdb.zip
+RUN unzip -d tmp sdb.zip && rm sdb.zip
+RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
+
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+# Clean archives (to reduce image size)
+RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
diff --git a/infra/docker/jammy/Dockerfile b/infra/docker/jammy/Dockerfile
new file mode 100644
index 000000000..aa500b0f4
--- /dev/null
+++ b/infra/docker/jammy/Dockerfile
@@ -0,0 +1,60 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:jammy
+
+ARG UBUNTU_MIRROR
+
+# Install 'add-apt-repository'
+RUN apt-get update && apt-get -qqy install software-properties-common
+
+# Build tool
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-arm-linux-gnueabihf g++-aarch64-linux-gnu
+
+# Debian build tool
+RUN apt-get update && apt-get -qqy install fakeroot devscripts debhelper python3-all dh-python
+
+# Install extra dependencies (Caffe, nnkit)
+RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
+
+# Install protocol buffer
+RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
+
+# Additonal tools
+# TODO install clang-format (No official clang-format-8 package for ubuntu jammy)
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive \
+    apt-get -qqy install doxygen graphviz wget zip unzip python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
+
+# Install google test (source)
+RUN apt-get update && apt-get -qqy install libgtest-dev
+
+# TODO: Install gbs & sdb
+# gbs & sdb are not support ubuntu jammy yet
+
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN apt-get update && apt-get -qqy install sudo
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+# Clean archives (to reduce image size)
+RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
diff --git a/infra/docker/jammy/Dockerfile.aarch64 b/infra/docker/jammy/Dockerfile.aarch64
new file mode 100644
index 000000000..a6a449dd6
--- /dev/null
+++ b/infra/docker/jammy/Dockerfile.aarch64
@@ -0,0 +1,60 @@
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:jammy
+
+ARG UBUNTU_MIRROR
+
+# Install 'add-apt-repository'
+RUN apt-get update && apt-get -qqy install software-properties-common
+
+# Build tool
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-arm-linux-gnueabihf
+
+# Debian build tool
+RUN apt-get update && apt-get -qqy install fakeroot devscripts debhelper python3-all dh-python
+
+# Install extra dependencies (Caffe, nnkit)
+RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
+
+# Install protocol buffer
+RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
+
+# Additonal tools
+# TODO install clang-format (No official clang-format-8 package for ubuntu jammy)
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive \
+    apt-get -qqy install doxygen graphviz wget zip unzip python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
+
+# Install google test (source)
+RUN apt-get update && apt-get -qqy install libgtest-dev
+
+# TODO: Install gbs & sdb
+# gbs & sdb are not support ubuntu jammy yet
+
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN apt-get update && apt-get -qqy install sudo
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+# Clean archives (to reduce image size)
+RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
diff --git a/infra/doxygen/Doxyfile b/infra/doxygen/Doxyfile
index 0dc6fdfff..af2adfcc8 100644
--- a/infra/doxygen/Doxyfile
+++ b/infra/doxygen/Doxyfile
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING      = UTF-8
 # title of most generated pages and in a few other places.
 # The default value is: My Project.
 
-PROJECT_NAME           = nnas
+PROJECT_NAME           = "ONE - On-device Neural Engine"
 
 # The PROJECT_NUMBER tag can be used to enter a project or revision number. This
 # could be handy for archiving the generated documentation or if some version
@@ -252,7 +252,7 @@ TCL_SUBST              =
 # members will be omitted, etc.
 # The default value is: NO.
 
-OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_FOR_C  = YES
 
 # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
 # Python sources only. Doxygen will then generate output that is more tailored
@@ -623,13 +623,13 @@ STRICT_PROTO_MATCHING  = NO
 # list. This list is created by putting \todo commands in the documentation.
 # The default value is: YES.
 
-GENERATE_TODOLIST      = YES
+GENERATE_TODOLIST      = NO
 
 # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
 # list. This list is created by putting \test commands in the documentation.
 # The default value is: YES.
 
-GENERATE_TESTLIST      = YES
+GENERATE_TESTLIST      = NO
 
 # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
 # list. This list is created by putting \bug commands in the documentation.
@@ -642,7 +642,7 @@ GENERATE_BUGLIST       = YES
 # the documentation.
 # The default value is: YES.
 
-GENERATE_DEPRECATEDLIST= YES
+GENERATE_DEPRECATEDLIST= NO
 
 # The ENABLED_SECTIONS tag can be used to enable conditional documentation
 # sections, marked by \if <section_label> ... \endif and \cond <section_label>
@@ -790,7 +790,14 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  =
+INPUT                  = README.md \
+                         docs/howto/ \
+                         docs/overview/ \
+                         docs/runtime/ \
+                         compute/ \
+                         compiler/ \
+                         onert-micro/ \
+                         runtime/
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@@ -873,23 +880,14 @@ RECURSIVE              = YES
 # Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE                = Product/ \
-                         build/ \
-                         doxygen/ \
-                         report/ \
-                         externals/ \
-                         packaging/ \
-                         runtimes/contrib/ \
-                         runtimes/pure_arm_compute/ \
-                         tests/ \
-                         tools/
+EXCLUDE                =
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
 # from the input.
 # The default value is: NO.
 
-EXCLUDE_SYMLINKS       = NO
+EXCLUDE_SYMLINKS       = YES
 
 # If the value of the INPUT tag contains directories, you can use the
 # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
@@ -898,7 +896,17 @@ EXCLUDE_SYMLINKS       = NO
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories for example use the pattern */test/*
 
-EXCLUDE_PATTERNS       =
+EXCLUDE_PATTERNS       = *.test.* \
+                         */test/* \
+                         */tests/* \
+                         */unittest/* \
+                         *_generated.* \
+                         */3rdparty/* \
+                         */contrib/* \
+                         */compiler/*/*.md \
+                         */compute/*/*.md \
+                         */runtime/*/*.md
+
 
 # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 # (namespaces, classes, functions, etc.) that should be excluded from the
@@ -991,7 +999,7 @@ FILTER_SOURCE_PATTERNS =
 # (index.html). This can be useful if you have a project on for instance GitHub
 # and want to reuse the introduction page also for the doxygen output.
 
-USE_MDFILE_AS_MAINPAGE = docs/nnfw/roadmap.md
+USE_MDFILE_AS_MAINPAGE = README.md
 
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
@@ -1010,7 +1018,7 @@ SOURCE_BROWSER         = YES
 # classes and enums directly into the documentation.
 # The default value is: NO.
 
-INLINE_SOURCES         = NO
+INLINE_SOURCES         = YES
 
 # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
 # special comment blocks from generated source code fragments. Normal C, C++ and
@@ -1023,13 +1031,13 @@ STRIP_CODE_COMMENTS    = YES
 # function all documented functions referencing it will be listed.
 # The default value is: NO.
 
-REFERENCED_BY_RELATION = NO
+REFERENCED_BY_RELATION = YES
 
 # If the REFERENCES_RELATION tag is set to YES then for each documented function
 # all documented entities called/used by that function will be listed.
 # The default value is: NO.
 
-REFERENCES_RELATION    = NO
+REFERENCES_RELATION    = YES
 
 # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
 # to YES then the hyperlinks from functions in REFERENCES_RELATION and
@@ -2265,7 +2273,7 @@ DOT_FONTPATH           =
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-CLASS_GRAPH            = YES
+CLASS_GRAPH            = NO
 
 # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
 # graph for each documented class showing the direct and indirect implementation
@@ -2310,7 +2318,7 @@ UML_LIMIT_NUM_FIELDS   = 10
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-TEMPLATE_RELATIONS     = NO
+TEMPLATE_RELATIONS     = YES
 
 # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
 # YES then doxygen will generate a graph for each documented file showing the
@@ -2319,7 +2327,7 @@ TEMPLATE_RELATIONS     = NO
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-INCLUDE_GRAPH          = YES
+INCLUDE_GRAPH          = NO
 
 # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
 # set to YES then doxygen will generate a graph for each documented file showing
@@ -2328,7 +2336,7 @@ INCLUDE_GRAPH          = YES
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-INCLUDED_BY_GRAPH      = YES
+INCLUDED_BY_GRAPH      = NO
 
 # If the CALL_GRAPH tag is set to YES then doxygen will generate a call
 # dependency graph for every global function or class method.
@@ -2340,7 +2348,7 @@ INCLUDED_BY_GRAPH      = YES
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-CALL_GRAPH             = YES
+CALL_GRAPH             = NO
 
 # If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
 # dependency graph for every global function or class method.
@@ -2352,7 +2360,7 @@ CALL_GRAPH             = YES
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-CALLER_GRAPH           = YES
+CALLER_GRAPH           = NO
 
 # If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
 # hierarchy of all classes instead of a textual one.
@@ -2401,7 +2409,7 @@ INTERACTIVE_SVG        = NO
 # found. If left blank, it is assumed the dot tool can be found in the path.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_PATH               = /usr/local/bin/dot
+DOT_PATH               =
 
 # The DOTFILE_DIRS tag can be used to specify one or more directories that
 # contain dot files that are included in the documentation (see the \dotfile
@@ -2450,7 +2458,7 @@ PLANTUML_INCLUDE_PATH  =
 # Minimum value: 0, maximum value: 10000, default value: 50.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_GRAPH_MAX_NODES    = 50
+DOT_GRAPH_MAX_NODES    = 500
 
 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
 # generated by dot. A depth value of 3 means that only nodes reachable from the
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt
index d416db2fd..bd53c33b1 100644
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.10)
 
 project(nncc)
 
@@ -11,11 +11,6 @@ set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
 set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/")
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 
-# This feature works with CMake 3.5.2 or later. However, using previous versions does not produce
-# an error. We are still officially using CMake 3.1.0, but put this code for the sake of semantic
-# support in various development tools.
-# Todo: Someday, CMake needs to be updated to 3.7.2 or later to take advantage of improvements
-#       such as `cmake-server`.
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." CACHE
@@ -40,12 +35,19 @@ macro(nnas_include PREFIX)
 endmacro(nnas_include)
 
 macro(nnas_find_package PREFIX)
-  find_package(${PREFIX} CONFIG NO_DEFAULT_PATH
-    PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
-    ${ARGN}
-  )
+  find_package(${PREFIX}
+               CONFIG NO_DEFAULT_PATH
+               PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
+               ${ARGN})
 endmacro(nnas_find_package)
 
+macro(nnas_find_package_folder PREFIX FIND_FOLDER)
+  find_package(${PREFIX}
+               CONFIG NO_DEFAULT_PATH
+               PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages ${FIND_FOLDER}
+               ${ARGN})
+endmacro(nnas_find_package_folder)
+
 # nncc_find_resource(NAME) will update the following variables
 #
 #   NAME_FOUND
@@ -81,30 +83,12 @@ message(STATUS "Use '${CMAKE_BUILD_TYPE}' configuration")
 #
 set(THREADS_PREFER_PTHREAD_FLAG TRUE)
 
-###
-### Configuration
-###
-option(DOWNLOAD_PROTOBUF "Download Protocol Buffer source" ON)
-option(BUILD_PROTOBUF "Locally build Protocol Buffer from the downloaded source" ON)
-option(DOWNLOAD_EIGEN "Download Eigen source" ON)
-option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
-option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
-option(DOWNLOAD_RUY "Download ruy source" ON)
-option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
-option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
-option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
-option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
-option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
-option(DOWNLOAD_CAFFE "Download Caffe source" ON)
-option(DOWNLOAD_PYTORCH "Download Pytorch source" ON)
-option(DOWNLOAD_ONNX "Download ONNX source" ON)
-option(DOWNLOAD_ABSEIL "Download Abseil-cpp source" ON)
-option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
-
-option(DOWNLOAD_GTEST "Download Google Test source" ON)
-option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
-option(DOWNLOAD_HDF5 "Download HDF5 source" ON)
-option(BUILD_HDF5 "Build HDF5 from the downloaded source" ON)
+# identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
+# note: this should be placed before flags and options setting
+nnas_include(IdentifyPlatform)
+
+# Configuration flags
+include("cmake/CfgOptionFlags.cmake")
 
 nnas_find_package(GTest QUIET)
 
@@ -123,11 +107,9 @@ if(${ENABLE_TEST})
   include(CTest)
 endif(${ENABLE_TEST})
 
-option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
-
-# This option might be turned ON for Windows native build.
-# Check our ProtobufConfig.cmake for its usage.
-option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF)
+# apply compilation flags
+# NOTE this should be after all option
+include("cmake/ApplyCompileFlags.cmake")
 
 ###
 ### Target
diff --git a/infra/nncc/Makefile.arm32 b/infra/nncc/Makefile.arm32
new file mode 100644
index 000000000..9ba57ddb2
--- /dev/null
+++ b/infra/nncc/Makefile.arm32
@@ -0,0 +1,152 @@
+#
+# NOTE this is provided as experimental Makefile to ARM32 cross building
+#      some modules of compiler.
+#
+
+BUILD_TYPE?=Debug
+BUILD_JOBS?=1
+
+CURRENT_DIR=$(shell pwd)
+BUILDFOLDER=build
+ARM32_FOLDER=arm32
+ROOTFS_ARM?=$(CURRENT_DIR)/tools/cross/rootfs/arm
+NNCC_CFG_OPTION_EXTRA?=
+
+TYPE_FOLDER=$(shell echo $(BUILD_TYPE) | tr A-Z a-z)
+
+BUILD_ARM32_FOLDER=$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER)
+BUILD_ARM32_HOST=$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).host
+
+ARM32_INSTALL_FOLDER=$(CURRENT_DIR)/$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).install
+ARM32_INSTALL_HOST=$(CURRENT_DIR)/$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).host.install
+
+# ARM32 build
+ARM32_BUILD_ITEMS:=angkor;cwrap;pepper-str;pepper-strcast;pp
+ARM32_BUILD_ITEMS+=;pepper-csv2vec;crew
+ARM32_BUILD_ITEMS+=;oops;pepper-assert
+ARM32_BUILD_ITEMS+=;hermes;hermes-std
+ARM32_BUILD_ITEMS+=;loco;locop;logo-core;logo
+ARM32_BUILD_ITEMS+=;safemain;mio-circle05;mio-tflite280;mio-circle06;mio-tflite2121
+ARM32_BUILD_ITEMS+=;dio-hdf5
+ARM32_BUILD_ITEMS+=;luci-compute
+ARM32_BUILD_ITEMS+=;foder;circle-verify;souschef;arser;vconone
+ARM32_BUILD_ITEMS+=;luci
+ARM32_BUILD_ITEMS+=;luci-interpreter
+ARM32_BUILD_ITEMS+=;tflite2circle
+ARM32_BUILD_ITEMS+=;tflchef;circlechef
+ARM32_BUILD_ITEMS+=;circle2circle;record-minmax;circle-quantizer
+ARM32_BUILD_ITEMS+=;luci-eval-driver;luci-value-test
+
+ARM32_TOOLCHAIN_FILE=cmake/buildtool/cross/toolchain_armv7l-linux.cmake
+
+ARM32_HOST_ITEMS:=angkor;cwrap;pepper-str;pepper-strcast;pp
+ARM32_HOST_ITEMS+=;pepper-csv2vec
+ARM32_HOST_ITEMS+=;oops
+ARM32_HOST_ITEMS+=;hermes;hermes-std
+ARM32_HOST_ITEMS+=;loco;locop;logo-core;logo
+ARM32_HOST_ITEMS+=;safemain;mio-circle05;mio-tflite280;mio-circle06;mio-tflite2121
+ARM32_HOST_ITEMS+=;luci-compute
+ARM32_HOST_ITEMS+=;foder;circle-verify;souschef;arser;vconone
+ARM32_HOST_ITEMS+=;luci
+ARM32_HOST_ITEMS+=;luci-interpreter
+ARM32_HOST_ITEMS+=;tflite2circle
+ARM32_HOST_ITEMS+=;tflchef;circlechef
+ARM32_HOST_ITEMS+=;circle-tensordump
+ARM32_HOST_ITEMS+=;circle2circle
+ARM32_HOST_ITEMS+=;common-artifacts
+ARM32_HOST_ITEMS+=;luci-eval-driver;luci-value-test
+
+
+_EMPTY_:=
+_SPACE_:=$(_EMPTY_) $(_EMPTY_)
+ARM32_BUILD_WHITELIST=$(subst $(_SPACE_),,$(ARM32_BUILD_ITEMS))
+ARM32_HOST_WHITELIST=$(subst $(_SPACE_),,$(ARM32_HOST_ITEMS))
+
+NNCC_CFG_OPTION+= -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_COVERAGE=OFF -DEXTERNALS_BUILD_THREADS=$(BUILD_JOBS)
+
+NNCC_CFG_STRICT= -DENABLE_STRICT_BUILD=ON
+
+INT_TARGETS:=int_configure_arm32 int_configure_arm32_host \
+  int_build_arm32 int_build_arm32_host int_test_arm32_host int_test
+
+NNCC_ARM32_DEBUG= -DBUILD_WHITELIST="$(ARM32_BUILD_WHITELIST)"
+NNCC_ARM32_DEBUG_HOST= -DBUILD_WHITELIST="$(ARM32_HOST_WHITELIST)"
+
+DEF_TARGETS:=all
+
+VAL_TARGETS:=cfg debug test_prep test
+
+.PHONY: $(INT_TARGETS) $(DEF_TARGETS) $(VAL_TARGETS)
+
+.DEFAULT_GOAL: help
+
+help:
+	@echo "cfg      : debug configure"
+	@echo "debug    : debug build"
+	@echo "test_prep: debug test preparation"
+	@echo "test     : debug test in target"
+
+###############################################################################
+# do not call int_xxxx directly as the depend on environment variables
+
+#
+# configures
+#
+
+int_configure_arm32_host:
+	NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc configure \
+		$(NNCC_CFG_OPTION) \
+		$(NNCC_ARM32_DEBUG_HOST) $(NNCC_CFG_STRICT) \
+		-DCMAKE_INSTALL_PREFIX="$(ARM32_INSTALL_HOST)" \
+		-DENABLE_TEST=ON
+
+int_configure_arm32:
+	ROOTFS_DIR=$(ROOTFS_ARM) TARGET_ARCH=armv7l \
+	BUILD_HOST_EXEC=$(CURRENT_DIR)/$(BUILD_ARM32_HOST) \
+	NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc configure \
+		$(NNCC_CFG_OPTION) $(NNCC_CFG_OPTION_EXTRA) \
+		$(NNCC_ARM32_DEBUG) $(NNCC_CFG_STRICT) \
+		-DCMAKE_TOOLCHAIN_FILE=$(ARM32_TOOLCHAIN_FILE) \
+		-DCMAKE_INSTALL_PREFIX="$(ARM32_INSTALL_FOLDER)" \
+		-DBUILD_ARM32_NEON=ON \
+		-DENABLE_TEST=ON
+
+# TODO remove BUILD_ARM32_NEON=ON as default is ON, after a while.
+#      explictly added to prevent using cached 'BUILD_ARM32_NEON=OFF'
+
+#
+# builds
+#
+int_build_arm32_host:
+	NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc build -j$(BUILD_JOBS)
+
+int_build_arm32:
+	ROOTFS_DIR=$(ROOTFS_ARM) TARGET_ARCH=armv7l \
+	BUILD_HOST_EXEC=$(CURRENT_DIR)/$(BUILD_ARM32_HOST) \
+	NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc build -j$(BUILD_JOBS)
+
+#
+# host test; run test in host to generate random input and expected outputs
+#
+int_test_arm32_host:
+	NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc test
+
+#
+# tests: run in ARM32 Ubuntu 18.04 device
+#
+int_test:
+	NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc test
+
+################################################################################
+
+all: int_configure_arm32_host int_build_arm32_host int_configure_arm32 int_build_arm32
+
+cfg: int_configure_arm32_host int_build_arm32_host int_configure_arm32
+
+debug: int_build_arm32
+
+# NOTE before run test in ARM32, run test in host is required to prepare test data
+test_prep: int_test_arm32_host
+
+# NOTE run test in ARM32 Ubuntu 18.04 device
+test: int_test
diff --git a/infra/nncc/cmake/ApplyCompileFlags.cmake b/infra/nncc/cmake/ApplyCompileFlags.cmake
new file mode 100644
index 000000000..0cc5f9cd1
--- /dev/null
+++ b/infra/nncc/cmake/ApplyCompileFlags.cmake
@@ -0,0 +1,35 @@
+#
+# Platform independent compile flag setting
+#
+# flags for build type: debug, release
+set(CMAKE_C_FLAGS_DEBUG     "-O0 -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O0 -g -DDEBUG")
+set(CMAKE_C_FLAGS_RELEASE   "-O3 -DNDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
+
+#
+# Platform specific compile flag setting
+#
+if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+  include("${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+endif()
+
+#
+# Apply compile flags
+# note: this should be placed after cmake/buildtool/config/config_xxx.cmake files
+#
+# add common flags
+foreach(FLAG ${FLAGS_COMMON})
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
+
+# add c flags
+foreach(FLAG ${FLAGS_CONLY})
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+endforeach()
+
+# add cxx flags
+foreach(FLAG ${FLAGS_CXXONLY})
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
diff --git a/infra/nncc/cmake/CfgOptionFlags.cmake b/infra/nncc/cmake/CfgOptionFlags.cmake
new file mode 100644
index 000000000..773a1f7d0
--- /dev/null
+++ b/infra/nncc/cmake/CfgOptionFlags.cmake
@@ -0,0 +1,58 @@
+#
+# Platform specific configuration
+# note: this should be placed before default setting for option setting priority
+#       (platform specific setting have higher priority)
+#
+include("cmake/options/options_${TARGET_PLATFORM}.cmake")
+
+###
+### Configuration
+###
+option(DOWNLOAD_PROTOBUF "Download Protocol Buffer source" ON)
+option(BUILD_PROTOBUF "Locally build Protocol Buffer from the downloaded source" ON)
+option(DOWNLOAD_EIGEN "Download Eigen source" ON)
+option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
+option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_RUY "Download ruy source" ON)
+option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
+option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
+option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
+option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
+option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
+option(DOWNLOAD_CAFFE "Download Caffe source" ON)
+option(DOWNLOAD_PYTORCH "Download Pytorch source" ON)
+option(DOWNLOAD_ONNX "Download ONNX source" ON)
+option(DOWNLOAD_ABSEIL "Download Abseil-cpp source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download OpenCl Header source" ON)
+option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
+option(DOWNLOAD_JSONCPP "Download Jsoncpp source" ON)
+
+option(DOWNLOAD_GTEST "Download Google Test source" ON)
+option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
+option(DOWNLOAD_HDF5 "Download HDF5 source" ON)
+option(BUILD_HDF5 "Build HDF5 from the downloaded source" ON)
+
+option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
+
+# This option might be turned ON for Windows native build.
+# Check our ProtobufConfig.cmake for its usage.
+option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF)
+
+# This option might be turned ON for MCU builds of luci related components.
+# It specify which library type to use for build:
+# if set ON - luci libraries are static, otherwise - shared.
+option(STATIC_LUCI "Build luci as a static libraries" OFF)
+
+# Disable PIC(Position-Independent Code) option for luci-interpreter related components.
+# This option might be turned ON for MCU builds.
+#
+# Enabled PIC requires additional efforts for correct linkage, such as
+# implementation of trampoline functions and support of various address tables.
+# PIC is used for dynamic libraries, MCU builds of interpreter
+# do not benefit from it, so we prefer to disable PIC.
+option(NNCC_LIBRARY_NO_PIC "Disable PIC option for libraries" OFF)
+
+# one-cmds PyTorch importer is an experimental feature, it is not used in default configuration.
+# This option enables installation of one-import-pytorch utility and
+# generation of related testsuite.
+option(ENABLE_ONE_IMPORT_PYTORCH "Enable deploy of one-cmds pytoch importer and related tests" OFF)
diff --git a/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake b/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake
new file mode 100644
index 000000000..544be030a
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake
@@ -0,0 +1,66 @@
+set(CMAKE_SYSTEM_NAME Generic)
+
+set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_C_COMPILER "${C_COMPILER}")
+set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
+set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
+set(CMAKE_OBJCOPY "${OBJCOPY}")
+
+set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU")
+
+# Convert TARGET_CPU=Cortex-M33+nofp+nodsp into
+#   - CMAKE_SYSTEM_PROCESSOR=cortex-m33
+#   - TARGET_CPU_FEATURES=no-fp;no-dsp
+string(REPLACE "+" ";" TARGET_CPU_FEATURES ${TARGET_CPU})
+list(POP_FRONT TARGET_CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
+string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} CMAKE_SYSTEM_PROCESSOR)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".elf")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Select C/C++ version
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_CXX_STANDARD 14)
+
+# Compile options
+add_compile_options(
+        -mcpu=${TARGET_CPU}
+        -mthumb
+        "$<$<CONFIG:DEBUG>:-gdwarf-3>"
+        "$<$<COMPILE_LANGUAGE:CXX>:-funwind-tables;-frtti;-fexceptions>")
+
+# Compile definescd
+add_compile_definitions(
+        "$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>")
+
+# Link options
+add_link_options(
+        -mcpu=${TARGET_CPU}
+        -mthumb
+        --specs=nosys.specs)
+
+# Set floating point unit
+if("${TARGET_CPU}" MATCHES "\\+fp")
+    set(FLOAT hard)
+elseif("${TARGET_CPU}" MATCHES "\\+nofp")
+    set(FLOAT soft)
+elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
+        "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
+    set(FLOAT hard)
+else()
+    set(FLOAT soft)
+endif()
+
+if (FLOAT)
+    add_compile_options(-mfloat-abi=${FLOAT})
+    add_link_options(-mfloat-abi=${FLOAT})
+endif()
+
+# Compilation warnings
+add_compile_options(
+        -Wno-all
+)
diff --git a/infra/nncc/cmake/buildtool/config/config_aarch64-linux.cmake b/infra/nncc/cmake/buildtool/config/config_aarch64-linux.cmake
new file mode 100644
index 000000000..fcae94f28
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_aarch64-linux.cmake
@@ -0,0 +1,13 @@
+#
+# aarch64 linux compile options
+#
+
+message(STATUS "Building for aarch64 Linux")
+
+# include linux common
+include("${CMAKE_CURRENT_LIST_DIR}/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-march=armv8-a"
+    )
diff --git a/infra/nncc/cmake/buildtool/config/config_aarch64-tizen.cmake b/infra/nncc/cmake/buildtool/config/config_aarch64-tizen.cmake
new file mode 100644
index 000000000..0f304ecf3
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_aarch64-tizen.cmake
@@ -0,0 +1,17 @@
+#
+# aarch64 tizen compile options
+#
+
+message(STATUS "Building for AARCH64 Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for aarch64-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    )
diff --git a/infra/nncc/cmake/buildtool/config/config_armv7hl-tizen.cmake b/infra/nncc/cmake/buildtool/config/config_armv7hl-tizen.cmake
new file mode 100644
index 000000000..fc6876a23
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_armv7hl-tizen.cmake
@@ -0,0 +1,29 @@
+#
+# armv7l tizen compile options
+#
+
+message(STATUS "Building for ARMv7hl(hardfp) Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-mtune=cortex-a8"
+    "-mfloat-abi=hard"
+    "-funsafe-math-optimizations"
+    )
+
+if(BUILD_ARM32_NEON)
+    set(FLAGS_COMMON ${FLAGS_COMMON}
+        "-mfpu=neon-vfpv4"
+        "-ftree-vectorize"
+    )
+else(BUILD_ARM32_NEON)
+    message(STATUS "ARMv7l: NEON is disabled")
+endif(BUILD_ARM32_NEON)
diff --git a/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake b/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake
new file mode 100644
index 000000000..87704db33
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake
@@ -0,0 +1,25 @@
+#
+# armv7l linux compile options
+#
+
+message(STATUS "Building for ARMv7l Linux")
+
+# include linux common
+include("${CMAKE_CURRENT_LIST_DIR}/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-march=armv7-a"
+    "-mtune=cortex-a8"
+    "-mfloat-abi=hard"
+    "-mfp16-format=ieee"
+    )
+
+if(BUILD_ARM32_NEON)
+  set(FLAGS_COMMON ${FLAGS_COMMON}
+      "-mfpu=vfpv3-d16"
+      "-ftree-vectorize"
+      )
+else(BUILD_ARM32_NEON)
+  message(STATUS "ARMv7l: NEON is disabled")
+endif(BUILD_ARM32_NEON)
diff --git a/infra/nncc/cmake/buildtool/config/config_armv7l-tizen.cmake b/infra/nncc/cmake/buildtool/config/config_armv7l-tizen.cmake
new file mode 100644
index 000000000..b1ffe65c1
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_armv7l-tizen.cmake
@@ -0,0 +1,29 @@
+#
+# armv7l tizen compile options
+#
+
+message(STATUS "Building for ARMv7l(softfp) Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-mtune=cortex-a8"
+    "-mfloat-abi=softfp"
+    "-funsafe-math-optimizations"
+    )
+
+if(BUILD_ARM32_NEON)
+    set(FLAGS_COMMON ${FLAGS_COMMON}
+        "-mfpu=neon-vfpv4"
+        "-ftree-vectorize"
+    )
+else(BUILD_ARM32_NEON)
+    message(STATUS "ARMv7l: NEON is disabled")
+endif(BUILD_ARM32_NEON)
diff --git a/infra/nncc/cmake/buildtool/config/config_i686-tizen.cmake b/infra/nncc/cmake/buildtool/config/config_i686-tizen.cmake
new file mode 100644
index 000000000..3929e07fd
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_i686-tizen.cmake
@@ -0,0 +1,17 @@
+#
+# i686 tizen compile options
+#
+
+message(STATUS "Building for i686 Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for i686-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    )
diff --git a/infra/nncc/cmake/buildtool/config/config_linux.cmake b/infra/nncc/cmake/buildtool/config/config_linux.cmake
new file mode 100644
index 000000000..d7b17cfef
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_linux.cmake
@@ -0,0 +1,11 @@
+#
+# linux common compile options
+#
+
+# Disable annoying ABI compatibility warning.
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+  list(APPEND FLAGS_CXXONLY "-Wno-psabi")
+endif()
+
+# lib pthread as a variable (pthread must be disabled on android)
+set(LIB_PTHREAD pthread)
diff --git a/infra/nncc/cmake/buildtool/config/config_x86_64-tizen.cmake b/infra/nncc/cmake/buildtool/config/config_x86_64-tizen.cmake
new file mode 100644
index 000000000..0f304ecf3
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_x86_64-tizen.cmake
@@ -0,0 +1,17 @@
+#
+# aarch64 tizen compile options
+#
+
+message(STATUS "Building for AARCH64 Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for aarch64-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    )
diff --git a/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake b/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
new file mode 100644
index 000000000..4956d91f9
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
@@ -0,0 +1,38 @@
+#
+# config for arm-linux
+#
+include(CMakeForceCompiler)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR armv7l)
+
+set(CMAKE_C_COMPILER   arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
+
+# where is the target environment
+set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../..")
+set(ROOTFS_ARM "${NNAS_PROJECT_SOURCE_DIR}/tools/cross/rootfs/arm")
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/modules/OptionTools.cmake")
+
+envoption(ROOTFS_DIR ${ROOTFS_ARM})
+if(NOT EXISTS "${ROOTFS_DIR}/lib/arm-linux-gnueabihf")
+  message(FATAL_ERROR "Please prepare RootFS for ARM")
+endif()
+
+set(CMAKE_SYSROOT ${ROOTFS_DIR})
+set(CMAKE_SHARED_LINKER_FLAGS
+    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
+    CACHE INTERNAL "" FORCE)
+set(CMAKE_EXE_LINKER_FLAGS
+    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
+    CACHE INTERNAL "" FORCE)
+
+# search for programs in the build host directories
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+# for libraries and headers in the target directories
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Set cache variable to ignore try-run error by find_package(Threads REQUIRED) on cross build
+set(THREADS_PTHREAD_ARG "2" CACHE STRING "Result from TRY_RUN" FORCE)
diff --git a/infra/nncc/cmake/options/options_aarch64-darwin.cmake b/infra/nncc/cmake/options/options_aarch64-darwin.cmake
new file mode 100644
index 000000000..89398bdf4
--- /dev/null
+++ b/infra/nncc/cmake/options/options_aarch64-darwin.cmake
@@ -0,0 +1,4 @@
+#
+# aarch64 darwin cmake options
+#
+
diff --git a/infra/nncc/cmake/options/options_aarch64-linux.cmake b/infra/nncc/cmake/options/options_aarch64-linux.cmake
new file mode 100644
index 000000000..becd574b1
--- /dev/null
+++ b/infra/nncc/cmake/options/options_aarch64-linux.cmake
@@ -0,0 +1,4 @@
+#
+# aarch64 linux cmake options
+#
+
diff --git a/infra/nncc/cmake/options/options_aarch64-tizen.cmake b/infra/nncc/cmake/options/options_aarch64-tizen.cmake
new file mode 100644
index 000000000..be97cb314
--- /dev/null
+++ b/infra/nncc/cmake/options/options_aarch64-tizen.cmake
@@ -0,0 +1,4 @@
+#
+# aarch64 tizen cmake options
+#
+
diff --git a/infra/nncc/cmake/options/options_armv7em-generic.cmake b/infra/nncc/cmake/options/options_armv7em-generic.cmake
new file mode 100644
index 000000000..d671b73f1
--- /dev/null
+++ b/infra/nncc/cmake/options/options_armv7em-generic.cmake
@@ -0,0 +1,3 @@
+#
+# armv7em generic cmake options
+#
diff --git a/infra/nncc/cmake/options/options_armv7hl-tizen.cmake b/infra/nncc/cmake/options/options_armv7hl-tizen.cmake
new file mode 100644
index 000000000..e787ecef8
--- /dev/null
+++ b/infra/nncc/cmake/options/options_armv7hl-tizen.cmake
@@ -0,0 +1,5 @@
+#
+# armv7hl tizen cmake options
+#
+
+option(BUILD_ARM32_NEON "Use NEON for ARM32 build" ON)
diff --git a/infra/nncc/cmake/options/options_armv7l-linux.cmake b/infra/nncc/cmake/options/options_armv7l-linux.cmake
new file mode 100644
index 000000000..d1cc367ee
--- /dev/null
+++ b/infra/nncc/cmake/options/options_armv7l-linux.cmake
@@ -0,0 +1,5 @@
+#
+# armv7l linux cmake options
+#
+
+option(BUILD_ARM32_NEON "Use NEON for ARM32 cross build" ON)
diff --git a/infra/nncc/cmake/options/options_armv7l-tizen.cmake b/infra/nncc/cmake/options/options_armv7l-tizen.cmake
new file mode 100644
index 000000000..9a96f403a
--- /dev/null
+++ b/infra/nncc/cmake/options/options_armv7l-tizen.cmake
@@ -0,0 +1,5 @@
+#
+# armv7l tizen cmake options
+#
+
+option(BUILD_ARM32_NEON "Use NEON for ARM32 build" ON)
diff --git a/infra/nncc/cmake/options/options_i686-tizen.cmake b/infra/nncc/cmake/options/options_i686-tizen.cmake
new file mode 100644
index 000000000..028efca97
--- /dev/null
+++ b/infra/nncc/cmake/options/options_i686-tizen.cmake
@@ -0,0 +1,3 @@
+#
+# i686 tizen cmake options
+#
diff --git a/infra/nncc/cmake/options/options_riscv64-tizen.cmake b/infra/nncc/cmake/options/options_riscv64-tizen.cmake
new file mode 100644
index 000000000..d26d03473
--- /dev/null
+++ b/infra/nncc/cmake/options/options_riscv64-tizen.cmake
@@ -0,0 +1,3 @@
+#
+# riscv64 tizen cmake options
+#
diff --git a/infra/nncc/cmake/options/options_x86_64-darwin.cmake b/infra/nncc/cmake/options/options_x86_64-darwin.cmake
new file mode 100644
index 000000000..1a29135b4
--- /dev/null
+++ b/infra/nncc/cmake/options/options_x86_64-darwin.cmake
@@ -0,0 +1,4 @@
+#
+# x86_64 darwin cmake options
+#
+
diff --git a/infra/nncc/cmake/options/options_x86_64-linux.cmake b/infra/nncc/cmake/options/options_x86_64-linux.cmake
new file mode 100644
index 000000000..0fb72f18b
--- /dev/null
+++ b/infra/nncc/cmake/options/options_x86_64-linux.cmake
@@ -0,0 +1,3 @@
+#
+# x86_64 linux cmake options
+#
diff --git a/infra/nncc/cmake/options/options_x86_64-tizen.cmake b/infra/nncc/cmake/options/options_x86_64-tizen.cmake
new file mode 100644
index 000000000..a29a0afc2
--- /dev/null
+++ b/infra/nncc/cmake/options/options_x86_64-tizen.cmake
@@ -0,0 +1,3 @@
+#
+# x86_64 tizen cmake options
+#
diff --git a/infra/nncc/command/utcount b/infra/nncc/command/utcount
index d06c5c9de..65aea8bae 100644
--- a/infra/nncc/command/utcount
+++ b/infra/nncc/command/utcount
@@ -9,15 +9,17 @@ if [[ ! -d "${BUILD_WORKSPACE_PATH}" ]]; then
   exit 255
 fi
 
-BUILD_ITEMS="angkor cwrap pepper-str pepper-strcast pp stdex \
+BUILD_ITEMS="angkor cwrap pepper-str pepper-strcast pp \
 oops pepper-assert \
 hermes hermes-std \
 loco locop locomotiv logo-core logo \
-foder souschef arser vconone \
-safemain mio-circle mio-tflite \
+foder souschef arser vconone crew \
+safemain mio-circle mio-tflite mio-tflite260 \
 tflite2circle \
 luci \
 luci-interpreter \
+luci-eval-driver \
+luci-pass-value-test \
 luci-value-test \
 record-minmax \
 circle2circle circle-quantizer"
diff --git a/infra/nncc/config/docker.configuration b/infra/nncc/config/docker.configuration
index 7078585a2..2765c3642 100644
--- a/infra/nncc/config/docker.configuration
+++ b/infra/nncc/config/docker.configuration
@@ -1,4 +1,4 @@
-DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-nnas}
+DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-nnfw/one-devtools}
 echo "Using docker image ${DOCKER_IMAGE_NAME}"
 
 if [ -z "`docker images ${DOCKER_IMAGE_NAME}`" ]; then
@@ -11,7 +11,7 @@ DOCKER_PATH="$NNCC_PROJECT_PATH"
 
 export GIT_SSL_NO_VERIFY=1
 
-DOCKER_VOLUMES=" -v $HOST_PATH:$DOCKER_PATH"
+DOCKER_VOLUMES+=" -v $HOST_PATH:$DOCKER_PATH"
 
 DOCKER_ENV_VARS+=" -e http_proxy"
 DOCKER_ENV_VARS+=" -e no_proxy"
diff --git a/infra/nnfw/CMakeLists.txt b/infra/nnfw/CMakeLists.txt
index 5caf37fbf..857c15bf5 100644
--- a/infra/nnfw/CMakeLists.txt
+++ b/infra/nnfw/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.5.1)
+cmake_minimum_required(VERSION 3.16.3)
 
 project(nnfw)
 
@@ -6,9 +6,12 @@ enable_testing()
 
 set(CMAKE_SKIP_BUILD_RPATH FALSE)
 set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
-set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/")
+set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/../lib/nnfw/odc:$ORIGIN/")
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 
+### CMAKE_BUILD_TYPE_LC: Build type lower case
+string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LC)
+
 set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." CACHE
   INTERNAL "Where to find nnas top-level source directory"
 )
@@ -52,14 +55,15 @@ macro(nnas_find_package PREFIX)
   )
 endmacro(nnas_find_package)
 
+# C++14 feature requires 5 or later
+# Using std::unordered_map shows build fail under 6.2
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2)
+  message(FATAL "Runtime build requires GNU Compiler version 6.2 or later.")
+endif()
+
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
-# This feature works with CMake 3.5.2 or later. However, using previous versions does not produce
-# an error. We are still officially using CMake 3.5.1, but put this code for the sake of semantic
-# support in various development tools.
-# Todo: Someday, CMake needs to be updated to 3.7.2 or later to take advantage of improvements
-#       such as `cmake-server`.
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
diff --git a/infra/nnfw/cmake/ApplyCompileFlags.cmake b/infra/nnfw/cmake/ApplyCompileFlags.cmake
index b042b0c42..b1c7ff568 100644
--- a/infra/nnfw/cmake/ApplyCompileFlags.cmake
+++ b/infra/nnfw/cmake/ApplyCompileFlags.cmake
@@ -31,3 +31,13 @@ endforeach()
 foreach(FLAG ${FLAGS_CXXONLY})
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
 endforeach()
+
+# lib pthread as a variable (finding pthread build option must be disabled on android)
+# Define here to use on external lib build
+set(LIB_PTHREAD lib_pthread)
+add_library(${LIB_PTHREAD} INTERFACE)
+if(NOT TARGET_OS STREQUAL "android")
+  # Get compile option (ex. "-pthread" on linux GNU build tool)
+  find_package(Threads)
+  target_link_libraries(${LIB_PTHREAD} INTERFACE Threads::Threads)
+endif()
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake
index b3d058164..8d37cf27c 100644
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -15,29 +15,28 @@ option(ENABLE_COVERAGE "Build for coverage test" OFF)
 option(BUILD_EXT_MULTITHREAD "Build external build using multi thread" ON)
 option(BUILD_ONERT "Build onert" ON)
 option(BUILD_LOGGING "Build logging runtime" ON)
-CMAKE_DEPENDENT_OPTION(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test"
-                       # Set BUILD_RUNTIME_NNAPI_TEST as ON
-                       #   if CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2
-                       ON "CMAKE_COMPILER_IS_GNUCC;NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2"
-                       # Otherwise set BUILD_RUNTIME_NNAPI_TEST as OFF
-                       OFF)
+option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" ON)
 option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON)
 option(BUILD_TFLITE_RUN "Build tflite-run" ON)
 option(BUILD_TFLITE_VANILLA_RUN "Build tflite-vanilla-run" OFF)
-option(BUILD_TFLITE_BENCHMARK_MODEL "Build tflite benchmark model" OFF)
-option(BUILD_NNAPI_TEST "Build nnapi_test" ON)
-option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
+option(BUILD_ONERT_RUN "Build onert_run" ON)
+option(BUILD_ONERT_TRAIN "Build onert_train" ON)
 option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
 option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
-option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" ON)
+option(BUILD_TRIX_LOADER "Build trix loader" ON)
+option(BUILD_TFLITE_COMPARATOR_TEST_TOOL "Build tflite loader testing tool" ON)
 option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
 option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
 option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
+option(BUILD_NPUD "Build NPU daemon" OFF)
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
+option(BUILD_MINMAX_H5DUMPER "Build minmax h5dumper" ON)
+option(ENABLE_ONERT_TRAIN "Enable onert training feature" ON)
 #
 # Default build configuration for contrib
 #
-option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" OFF)
 option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" OFF)
 option(BUILD_BENCHMARK_ACL "Build ARM Compute Library Benchmarks" OFF)
 option(BUILD_DETECTION_APP "Build detection example app" OFF)
@@ -53,13 +52,13 @@ option(BUILD_MLAPSE "Build mlapse benchmark toolkit" OFF)
 #
 option(BUILD_KBENCHMARK "Build kernel benchmark tool" OFF)
 option(BUILD_OPENCL_TOOL "Build OpenCL tool" OFF)
-option(BUILD_NNAPI_QUICKCHECK "Build NN API Quickcheck tools" OFF)
 option(BUILD_TFLITE_ACCURACY "Build tflite accuracy tool" OFF)
 #
 # Default external libraries source download and build configuration
 #
 option(DOWNLOAD_TENSORFLOW "Download Tensorflow source" ON)
 option(DOWNLOAD_ABSEIL "Download Abseil source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" OFF)
 option(DOWNLOAD_EIGEN "Download Eigen source" ON)
 option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
 option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
@@ -69,13 +68,28 @@ option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" ON)
 option(DOWNLOAD_NONIUS "Download nonius source" ON)
 option(DOWNLOAD_BOOST "Download boost source" OFF)
 option(DOWNLOAD_RUY "Download ruy source" ON)
+option(DOWNLOAD_CPUINFO "Download cpuinfo source" ON)
+option(DOWNLOAD_OOURAFFT "Download Ooura FFT source" ON)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
 option(BUILD_BOOST "Build boost source" OFF)
 option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
-option(BUILD_TENSORFLOW_LITE_2_3_0 "Build TensorFlow Lite 2.3.0 from the downloaded source" OFF)
-option(BUILD_GTEST "Download and build Google Test" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" OFF)
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
+option(DEBUG_ARMCOMPUTE "Build ARM Compute as debug type" OFF)
 option(BUILD_RUY "Build ruy library from the downloaded source" ON)
+option(BUILD_CPUINFO "Build cpuinfo library from the downloaded source" ON)
 option(PROFILE_RUY "Enable ruy library profiling" OFF)
+option(DOWNLOAD_XNNPACK "Download xnnpack source" ON)
+option(BUILD_XNNPACK "Build xnnpack library from the downloaded source" ON)
+option(DOWNLOAD_PTHREADPOOL "Download pthreadpool source" ON)
+option(BUILD_PTHREADPOOL "Build pthreadpool library from the source" ON)
+option(DOWNLOAD_PSIMD "Download psimd source" ON)
+option(BUILD_PSIMD "Build psimd library from the source" ON)
+option(DOWNLOAD_FP16 "Download fp16 source" ON)
+option(BUILD_FP16 "Build fp16 library from the source" ON)
+option(DOWNLOAD_FXDIV "Download fxdiv source" ON)
+option(BUILD_FXDIV "Build fxdiv library from the source" ON)
+
 
 #
 ## Default sample build configuration
diff --git a/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake b/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
index e0c81dee7..fb63b3c47 100644
--- a/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
@@ -1,8 +1,5 @@
 include("cmake/buildtool/config/config_linux.cmake")
 
-# On Android, pthread is contained in bionic(libc)
-set(LIB_PTHREAD "")
-
 # SIMD for aarch64
 set(FLAGS_COMMON ${FLAGS_COMMON}
     "-ftree-vectorize"
diff --git a/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake b/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake
new file mode 100644
index 000000000..dec1b4afb
--- /dev/null
+++ b/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake
@@ -0,0 +1,22 @@
+#
+# armv7l tizen compile options
+#
+
+message(STATUS "Building for ARMv7hl(hardfp) Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-mtune=cortex-a8"
+    "-mfloat-abi=hard"
+    "-mfpu=neon-vfpv4"
+    "-funsafe-math-optimizations"
+    "-ftree-vectorize"
+    )
diff --git a/infra/nnfw/cmake/buildtool/config/config_i686-tizen.cmake b/infra/nnfw/cmake/buildtool/config/config_i686-tizen.cmake
new file mode 100644
index 000000000..3929e07fd
--- /dev/null
+++ b/infra/nnfw/cmake/buildtool/config/config_i686-tizen.cmake
@@ -0,0 +1,17 @@
+#
+# i686 tizen compile options
+#
+
+message(STATUS "Building for i686 Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for i686-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    )
diff --git a/infra/nnfw/cmake/buildtool/config/config_linux.cmake b/infra/nnfw/cmake/buildtool/config/config_linux.cmake
index 86dd0f217..681d165d2 100644
--- a/infra/nnfw/cmake/buildtool/config/config_linux.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_linux.cmake
@@ -2,20 +2,16 @@
 # linux common compile options
 #
 
-# remove warning from arm cl
+# Remove warning: ignoring attributes on template argument (ACL, Eigen, etc)
 # https://github.com/ARM-software/ComputeLibrary/issues/330
-set(GCC_VERSION_DISABLE_WARNING 6.0)
-if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER GCC_VERSION_DISABLE_WARNING)
-  message(STATUS "GCC version higher than ${GCC_VERSION_DISABLE_WARNING}")
-  set(FLAGS_CXXONLY ${FLAGS_CXXONLY}
-      "-Wno-ignored-attributes"
-      )
-endif()
+set(FLAGS_CXXONLY ${FLAGS_CXXONLY} "-Wno-ignored-attributes")
 
 # Disable annoying ABI compatibility warning.
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
   list(APPEND FLAGS_CXXONLY "-Wno-psabi")
 endif()
 
-# lib pthread as a variable (pthread must be disabled on android)
-set(LIB_PTHREAD pthread)
+# Build fail on memcpy (ex. compute/cker/include/cker/Shape.h:211:16)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0)
+  list(APPEND FLAGS_CXXONLY "-Wno-error=stringop-overflow -Wno-error=array-bounds")
+endif()
diff --git a/infra/nnfw/cmake/buildtool/config/config_riscv64-tizen.cmake b/infra/nnfw/cmake/buildtool/config/config_riscv64-tizen.cmake
new file mode 100644
index 000000000..2345da47c
--- /dev/null
+++ b/infra/nnfw/cmake/buildtool/config/config_riscv64-tizen.cmake
@@ -0,0 +1,17 @@
+#
+# riscv64 tizen compile options
+#
+
+message(STATUS "Building for RISC-V64 Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for riscv64-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    )
diff --git a/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake b/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
index dbd45fc03..52d6c6b2b 100644
--- a/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
@@ -7,6 +7,3 @@ message(STATUS "Building for x86-64 Darwin")
 set(FLAGS_COMMON ${FLAGS_COMMON}
     "-msse4"
     )
-
-# lib pthread as a variable (pthread must be disabled on android)
-set(LIB_PTHREAD pthread)
diff --git a/infra/nnfw/cmake/buildtool/config/config_x86_64-tizen.cmake b/infra/nnfw/cmake/buildtool/config/config_x86_64-tizen.cmake
new file mode 100644
index 000000000..0f304ecf3
--- /dev/null
+++ b/infra/nnfw/cmake/buildtool/config/config_x86_64-tizen.cmake
@@ -0,0 +1,17 @@
+#
+# aarch64 tizen compile options
+#
+
+message(STATUS "Building for AARCH64 Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for aarch64-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    )
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
index 3356aa72d..07b26a937 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
@@ -21,12 +21,6 @@ endif()
 
 set(CMAKE_SYSROOT ${ROOTFS_DIR})
 set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
 
 # search for programs in the build host directories
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
index 4d5d7ac56..cab7325dd 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
@@ -23,12 +23,6 @@ endif()
 
 set(CMAKE_SYSROOT ${ROOTFS_DIR})
 set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
 
 # search for programs in the build host directories
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
index 8f2cb6735..c69259f85 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
@@ -21,12 +21,6 @@ endif()
 
 set(CMAKE_SYSROOT ${ROOTFS_DIR})
 set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
 
 # search for programs in the build host directories
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake
deleted file mode 100644
index 72513cdc1..000000000
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# config for arm-linux
-#
-include(CMakeForceCompiler)
-
-set(CMAKE_SYSTEM_NAME Linux)
-set(CMAKE_SYSTEM_PROCESSOR armv7l)
-
-set(CMAKE_C_COMPILER   arm-linux-gnueabi-gcc)
-set(CMAKE_CXX_COMPILER arm-linux-gnueabi-g++)
-
-set(TIZEN_TOOLCHAIN "armv7l-tizen-linux-gnueabi/6.2.1")
-
-# where is the target environment
-set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../..")
-set(ROOTFS_ARM "${NNAS_PROJECT_SOURCE_DIR}/tools/cross/rootfs/armel")
-include("${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/modules/OptionTools.cmake")
-
-envoption(ROOTFS_DIR ${ROOTFS_ARM})
-if(NOT EXISTS "${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
-  message(FATAL_ERROR "Please prepare RootFS for tizen ARM softfp")
-endif()
-
-set(CMAKE_SYSROOT ${ROOTFS_DIR})
-set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-
-# search for programs in the build host directories
-set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-
-# for libraries and headers in the target directories
-set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
-set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-
-# Set cache variable to ignore try-run error by find_package(Threads REQUIRED) on cross build
-set(THREADS_PTHREAD_ARG "2" CACHE STRING "Result from TRY_RUN" FORCE)
-
-
-add_compile_options(-mthumb)
-add_compile_options(-mfpu=neon-vfpv4)
-add_compile_options(-mfloat-abi=softfp)
-add_compile_options(--sysroot=${ROOTFS_DIR})
-
-set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}")
-
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}")
-
-include_directories(SYSTEM ${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/)
-include_directories(SYSTEM ${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/armv7l-tizen-linux-gnueabi)
-add_compile_options(-Wno-deprecated-declarations) # compile-time option
-add_compile_options(-D__extern_always_inline=inline) # compile-time option
-
-set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -B${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
-set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_DIR}/lib")
-set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_DIR}/usr/lib")
-set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
-
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -B${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_DIR}/lib")
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_DIR}/usr/lib")
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
diff --git a/infra/nnfw/cmake/options/options_aarch64-android.cmake b/infra/nnfw/cmake/options/options_aarch64-android.cmake
index d720b202a..5de2be333 100644
--- a/infra/nnfw/cmake/options/options_aarch64-android.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-android.cmake
@@ -1,18 +1,18 @@
 # aarch64 android cmake options
 #
-option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
-# NOTE BUILD_ANDROID_TFLITE(JNI lib) is disabled due to BuiltinOpResolver issue.
-# tensorflow-lite does not build BuiltinOpResolver but JNI lib need it
-# Related Issue : #1403
-option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" ON)
 option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" ON)
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
 # Need boost library
 option(DOWNLOAD_BOOST "Download boost source" ON)
 option(BUILD_BOOST "Build boost source" ON)
-option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" OFF)
-option(BUILD_NNAPI_TEST "Build nnapi_test" OFF)
-option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
-option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
-option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" OFF)
 option(BUILD_LOGGING "Build logging runtime" OFF)
+
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
+
+option(BUILD_MINMAX_H5DUMPER "Build minmax h5dumper" OFF)
diff --git a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
index 57d4c1061..cccd77f98 100644
--- a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
@@ -3,8 +3,17 @@
 #
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
 
 option(BUILD_LOGGING "Build logging runtime" OFF)
 option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
+
+option(BUILD_MINMAX_H5DUMPER "Build minmax h5dumper" OFF)
+option(ENABLE_ONERT_TRAIN "Enable onert training feature" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake b/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake
new file mode 100644
index 000000000..07dc0404e
--- /dev/null
+++ b/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake
@@ -0,0 +1,27 @@
+#
+# armv7hl tizen cmake options
+#
+option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
+
+option(BUILD_LOGGING "Build logging runtime" OFF)
+option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
+
+option(BUILD_MINMAX_H5DUMPER "Build minmax h5dumper" OFF)
+option(ENABLE_ONERT_TRAIN "Enable onert training feature" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7l-linux.cmake b/infra/nnfw/cmake/options/options_armv7l-linux.cmake
index e10e573c4..c73a2befa 100644
--- a/infra/nnfw/cmake/options/options_armv7l-linux.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-linux.cmake
@@ -3,3 +3,10 @@
 #
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
 option(BUILD_OPENCL_TOOL "Build OpenCL tool" ON)
+
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
index c27a7ad01..4fdcbc33f 100644
--- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
@@ -3,8 +3,25 @@
 #
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
 
 option(BUILD_LOGGING "Build logging runtime" OFF)
 option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
+
+option(BUILD_MINMAX_H5DUMPER "Build minmax h5dumper" OFF)
+option(ENABLE_ONERT_TRAIN "Enable onert training feature" OFF)
diff --git a/infra/nnfw/cmake/options/options_i686-tizen.cmake b/infra/nnfw/cmake/options/options_i686-tizen.cmake
new file mode 100644
index 000000000..bdeb2d9ce
--- /dev/null
+++ b/infra/nnfw/cmake/options/options_i686-tizen.cmake
@@ -0,0 +1,21 @@
+#
+# i686 tizen cmake options
+#
+option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
+
+option(BUILD_LOGGING "Build logging runtime" OFF)
+option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
+option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
+
+option(BUILD_MINMAX_H5DUMPER "Build minmax h5dumper" OFF)
+option(ENABLE_ONERT_TRAIN "Enable onert training feature" OFF)
diff --git a/infra/nnfw/cmake/options/options_riscv64-tizen.cmake b/infra/nnfw/cmake/options/options_riscv64-tizen.cmake
new file mode 100644
index 000000000..c2f8c79f1
--- /dev/null
+++ b/infra/nnfw/cmake/options/options_riscv64-tizen.cmake
@@ -0,0 +1,20 @@
+#
+# riscv64 tizen cmake options
+#
+option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
+
+option(BUILD_LOGGING "Build logging runtime" OFF)
+option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
+option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
+
+option(BUILD_NPUD "Build NPU daemon" OFF)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
+
+option(BUILD_MINMAX_H5DUMPER "Build minmax h5dumper" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-darwin.cmake b/infra/nnfw/cmake/options/options_x86_64-darwin.cmake
index 97642e6ce..135cfbf6e 100644
--- a/infra/nnfw/cmake/options/options_x86_64-darwin.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-darwin.cmake
@@ -3,3 +3,4 @@
 #
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-linux.cmake b/infra/nnfw/cmake/options/options_x86_64-linux.cmake
index 97642e6ce..1cb72d593 100644
--- a/infra/nnfw/cmake/options/options_x86_64-linux.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-linux.cmake
@@ -2,4 +2,5 @@
 # x86_64 linux cmake options
 #
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
 option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
new file mode 100644
index 000000000..70da68c82
--- /dev/null
+++ b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
@@ -0,0 +1,21 @@
+#
+# x86_64 linux cmake options
+#
+option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
+
+option(BUILD_LOGGING "Build logging runtime" OFF)
+option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
+option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
+
+option(BUILD_MINMAX_H5DUMPER "Build minmax h5dumper" OFF)
+option(ENABLE_ONERT_TRAIN "Enable onert training feature" OFF)
diff --git a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
index 67f6ef8a2..acc244aa1 100644
--- a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
+++ b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
@@ -1,7 +1,7 @@
 function(_ARMCompute_Import)
   include(FindPackageHandleStandardArgs)
 
-  list(APPEND ARMCompute_LIB_SEARCH_PATHS ${ARMCompute_PREFIX})
+  list(APPEND ARMCompute_LIB_SEARCH_PATHS ${ARMCompute_PREFIX}/lib)
 
   find_path(INCLUDE_DIR NAMES arm_compute/core/ITensor.h PATHS ${ARMCompute_INCLUDE_SEARCH_PATHS})
 
@@ -11,14 +11,25 @@ function(_ARMCompute_Import)
 
   message(STATUS "Search acl in ${ARMCompute_LIB_SEARCH_PATHS}")
 
-  if(NOT INCLUDE_DIR)
+  # ARMCompute v21.02 moves some headers into "src/".
+  # And we cannot build armcompute-ex library without these headers.
+  # So we need to download and use source code if our build root doesn't have headers in "src/" (tizen's devel package includes these headers).
+  # TODO Don't use headers in "src/"
+  find_path(HEADER_SRC_DIR NAMES src/core/CL/ICLKernel.h PATHS ${ARMCompute_INCLUDE_SEARCH_PATHS})
+  if(NOT INCLUDE_DIR OR NOT HEADER_SRC_DIR)
     nnas_find_package(ARMComputeSource QUIET)
     if (NOT ARMComputeSource_FOUND)
       set(ARMCompute_FOUND FALSE PARENT_SCOPE)
       return()
     endif()
-    set(INCLUDE_DIR ${ARMComputeSource_DIR} ${ARMComputeSource_DIR}/include)
-  endif(NOT INCLUDE_DIR)
+
+    # Clean if INCLUDE_DIR is NOT_FOUND
+    if(NOT INCLUDE_DIR)
+      unset(INCLUDE_DIR)
+    endif(NOT INCLUDE_DIR)
+
+    list(APPEND INCLUDE_DIR ${ARMComputeSource_DIR} ${ARMComputeSource_DIR}/include)
+  endif(NOT INCLUDE_DIR OR NOT HEADER_SRC_DIR)
 
   if(NOT CORE_LIBRARY)
     set(ARMCompute_FOUND FALSE PARENT_SCOPE)
@@ -62,34 +73,19 @@ function(_ARMCompute_Import)
   set(ARMCompute_FOUND TRUE PARENT_SCOPE)
 endfunction(_ARMCompute_Import)
 
-### Check whether library exists
-function(_ARMCompute_Check VAR LIBDIR)
-  set(FOUND TRUE)
-
-  if(NOT EXISTS "${LIBDIR}/libarm_compute_core.so")
-    set(FOUND FALSE)
-  endif()
-
-  if(NOT EXISTS "${LIBDIR}/libarm_compute.so")
-    set(FOUND FALSE)
-  endif()
-
-  if(NOT EXISTS "${LIBDIR}/libarm_compute_graph.so")
-    set(FOUND FALSE)
-  endif()
-
-  set(${VAR} ${FOUND} PARENT_SCOPE)
-endfunction(_ARMCompute_Check)
-
 # Let's build and install ARMCompute libraries
-# NOTE This function silently returns on error
-function(_ARMCompute_Build ARMCompute_INSTALL_PREFIX)
-  ### Check whether library exists
-  _ARMCompute_Check(ARMCompute_FOUND ${ARMCompute_INSTALL_PREFIX})
-
-  if(ARMCompute_FOUND)
-    return()
-  endif(ARMCompute_FOUND)
+function(_ARMCompute_Build ARMComputeInstall_DIR)
+  set(PKG_NAME "ARMCOMPUTE")
+  set(PKG_IDENTIFIER "21.02")
+  set(INSTALL_STAMP_PATH "${ARMComputeInstall_DIR}/${PKG_NAME}.stamp")
+  set(ARMComputeBuild_DIR "${CMAKE_BINARY_DIR}/externals/armcompute")
+
+  if(EXISTS ${INSTALL_STAMP_PATH})
+    file(READ ${INSTALL_STAMP_PATH} READ_IDENTIFIER)
+    if("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
+      return()
+    endif("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
+  endif(EXISTS ${INSTALL_STAMP_PATH})
 
   ### Let's build with SCONS
   nnas_find_package(ARMComputeSource QUIET)
@@ -105,13 +101,16 @@ function(_ARMCompute_Build ARMCompute_INSTALL_PREFIX)
     return()
   endif(NOT SCONS_PATH)
 
-  if(CMAKE_BUILD_TYPE)
-    string(TOLOWER "${CMAKE_BUILD_TYPE}" SCON_BUILD_TYPE)
-  else(CMAKE_BUILD_TYPE)
+  if(DEBUG_ARMCOMPUTE)
+    set(SCON_BUILD_TYPE "debug")
+  else(DEBUG_ARMCOMPUTE)
     set(SCON_BUILD_TYPE "release")
-  endif(CMAKE_BUILD_TYPE)
+  endif(DEBUG_ARMCOMPUTE)
 
   #### Architecture-specific configurations
+
+  #### BUILD_DIR is in source tree to reduce CI build overhead
+  #### TODO Change BUILD_DIR to ${ARMComputeBuild_DIR}
   if(TARGET_ARCH STREQUAL "armv7l")
     set(BUILD_ARCH "armv7a")
     set(BUILD_DIR "${BUILD_ARCH}-${TARGET_OS}.${SCON_BUILD_TYPE}")
@@ -137,12 +136,19 @@ function(_ARMCompute_Build ARMCompute_INSTALL_PREFIX)
   list(APPEND SCONS_OPTIONS "Werror=0")
   list(APPEND SCONS_OPTIONS "os=${TARGET_OS}")
 
-  if(DEFINED ACL_BUILD_THREADS)
-    set(N ${ACL_BUILD_THREADS})
-  else(DEFINED ACL_BUILD_THREADS)
+  #### Disable test build
+  list(APPEND SCONS_OPTIONS "benchmark_tests=0")
+  list(APPEND SCONS_OPTIONS "validation_tests=0")
+  list(APPEND SCONS_OPTIONS "benchmark_examples=0")
+  list(APPEND SCONS_OPTIONS "validate_examples=0")
+  list(APPEND SCONS_OPTIONS "reference_openmp=0")
+
+  if(DEFINED EXTERNALS_BUILD_THREADS)
+    set(N ${EXTERNALS_BUILD_THREADS})
+  else(DEFINED EXTERNALS_BUILD_THREADS)
     include(ProcessorCount)
     ProcessorCount(N)
-  endif(DEFINED ACL_BUILD_THREADS)
+  endif(DEFINED EXTERNALS_BUILD_THREADS)
 
   if((NOT N EQUAL 0) AND BUILD_EXT_MULTITHREAD)
     list(APPEND SCONS_OPTIONS -j${N})
@@ -155,26 +161,34 @@ function(_ARMCompute_Build ARMCompute_INSTALL_PREFIX)
     list(APPEND SCONS_OPTIONS "build_dir=${BUILD_DIR}")
   endif(DEFINED BUILD_DIR)
 
+  list(APPEND SCONS_OPTIONS "install_dir=${ARMComputeInstall_DIR}")
+
+  set(SCONS_CC "gcc")
+  set(SCONS_CXX "g++")
+  if(ANDROID)
+    list(APPEND SCONS_OPTIONS "toolchain_prefix=${ANDROID_TOOLCHAIN_PREFIX}")
+    list(APPEND SCONS_OPTIONS "compiler_prefix=${ANDROID_TOOLCHAIN_ROOT}/bin/aarch64-linux-android${ANDROID_API_LEVEL}-")
+    set(SCONS_CC "clang")
+    set(SCONS_CXX "clang++")
+  endif(ANDROID)
+
   message(STATUS "Build ARMCompute with ${SCONS_PATH} ('${SCONS_OPTIONS}'")
 
   # Build ARMCompute libraries with SCONS
-  # NOTE ARMCompute SConstruct unconditioanlly appends "arm-linux-gnueabihf-" prefix for linux
-  execute_process(COMMAND /usr/bin/env CC=gcc CXX=g++ "${SCONS_PATH}" ${SCONS_OPTIONS}
+  # NOTE ARMCompute build process don't allow logging by using OUTPUT_FILE and ERROR_FILE option
+  execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${ARMComputeInstall_DIR}")
+  execute_process(COMMAND /usr/bin/env CC=${SCONS_CC} CXX=${SCONS_CXX} "${SCONS_PATH}" ${SCONS_OPTIONS}
                   WORKING_DIRECTORY ${ARMComputeSource_DIR}
-                  RESULT_VARIABLE ARMCompute_BUILD)
+                  RESULT_VARIABLE BUILD_EXITCODE)
 
-  # Install ARMCompute libraries to overlay
-  execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${ARMCompute_INSTALL_PREFIX}"
-                  WORKING_DIRECTORY ${ARMComputeSource_DIR}
-                  RESULT_VARIABLE ARMCompute_BUILD)
-  execute_process(COMMAND ${CMAKE_COMMAND} -E copy "build/${BUILD_DIR}/libarm_compute_core.so" "${ARMCompute_INSTALL_PREFIX}"
-                  COMMAND ${CMAKE_COMMAND} -E copy "build/${BUILD_DIR}/libarm_compute.so" "${ARMCompute_INSTALL_PREFIX}"
-                  COMMAND ${CMAKE_COMMAND} -E copy "build/${BUILD_DIR}/libarm_compute_graph.so" "${ARMCompute_INSTALL_PREFIX}"
-                  WORKING_DIRECTORY ${ARMComputeSource_DIR}
-                  RESULT_VARIABLE ARMCompute_BUILD)
+  if(NOT BUILD_EXITCODE EQUAL 0)
+    message(FATAL_ERROR "${PKG_NAME} Package: Build and install failed (check '${BUILD_LOG_PATH}' for details)")
+  endif(NOT BUILD_EXITCODE EQUAL 0)
+
+  file(WRITE "${INSTALL_STAMP_PATH}" "${PKG_IDENTIFIER}")
 endfunction(_ARMCompute_Build)
 
-set(ARMCompute_PREFIX ${EXT_OVERLAY_DIR}/lib)
+set(ARMCompute_PREFIX ${EXT_OVERLAY_DIR})
 if(BUILD_ARMCOMPUTE)
   _ARMCompute_Build("${ARMCompute_PREFIX}")
 endif(BUILD_ARMCOMPUTE)
diff --git a/infra/nnfw/cmake/packages/BoostConfig.cmake b/infra/nnfw/cmake/packages/BoostConfig.cmake
index 4f60e9107..f2759f8e1 100644
--- a/infra/nnfw/cmake/packages/BoostConfig.cmake
+++ b/infra/nnfw/cmake/packages/BoostConfig.cmake
@@ -16,6 +16,18 @@ function(_Boost_Build Boost_PREFIX)
   set(BoostBuild_DIR ${CMAKE_BINARY_DIR}/externals/boost)
   set(BoostInstall_DIR ${Boost_PREFIX})
 
+  set(INSTALL_STAMP_PATH "${BoostInstall_DIR}/BOOST.stamp")
+  set(BUILD_LOG_PATH "${BoostBuild_DIR}/BOOST.log")
+  set(PKG_NAME "BOOST")
+  set(PKG_IDENTIFIER "1.58.0")
+
+  if(EXISTS ${INSTALL_STAMP_PATH})
+    file(READ ${INSTALL_STAMP_PATH} READ_IDENTIFIER)
+    if("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
+      return()
+    endif("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
+  endif(EXISTS ${INSTALL_STAMP_PATH})
+
   unset(Boost_Options)
 
   list(APPEND Boost_Options --build-dir=${BoostBuild_DIR})
@@ -25,6 +37,17 @@ function(_Boost_Build Boost_PREFIX)
   list(APPEND Boost_Options --with-system)
   list(APPEND Boost_Options --with-filesystem)
 
+  if(DEFINED EXTERNALS_BUILD_THREADS)
+    set(N ${EXTERNALS_BUILD_THREADS})
+  else(DEFINED EXTERNALS_BUILD_THREADS)
+    include(ProcessorCount)
+    ProcessorCount(N)
+  endif(DEFINED EXTERNALS_BUILD_THREADS)
+
+  if((NOT N EQUAL 0) AND BUILD_EXT_MULTITHREAD)
+    list(APPEND Boost_Options -j${N})
+  endif()
+
   set(JAM_FILENAME ${BoostBuild_DIR}/user-config.jam)
 
   if(ANDROID)
@@ -41,7 +64,15 @@ function(_Boost_Build Boost_PREFIX)
   # Install Boost libraries
   execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory "${BoostInstall_DIR}")
   execute_process(COMMAND /usr/bin/env BOOST_BUILD_PATH="${BoostBuild_DIR}" ${BoostSource_DIR}/b2 install ${Boost_Options}
-                  WORKING_DIRECTORY ${BoostSource_DIR})
+                  WORKING_DIRECTORY ${BoostSource_DIR}
+                  OUTPUT_FILE ${BUILD_LOG_PATH}
+                  RESULT_VARIABLE BUILD_EXITCODE)
+
+  if(NOT BUILD_EXITCODE EQUAL 0)
+    message(FATAL_ERROR "${PKG_NAME} Package: Build and install failed (check '${BUILD_LOG_PATH}' for details)")
+  endif(NOT BUILD_EXITCODE EQUAL 0)
+
+  file(WRITE "${INSTALL_STAMP_PATH}" "${PKG_IDENTIFIER}")
 
 endfunction(_Boost_Build)
 
diff --git a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
new file mode 100644
index 000000000..dddec8988
--- /dev/null
+++ b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
@@ -0,0 +1,39 @@
+function(_CpuInfo_Build)
+  nnas_find_package(CpuInfoSource QUIET)
+
+  # NOTE This line prevents multiple definitions of cpuinfo target
+  if(TARGET cpuinfo)
+    set(CpuInfoSource_DIR ${CpuInfoSource_DIR} PARENT_SCOPE)
+    set(CpuInfo_FOUND TRUE PARENT_SCOPE)
+    return()
+  endif(TARGET cpuinfo)
+
+  if(NOT CpuInfoSource_FOUND)
+    message(STATUS "CPUINFO: Source not found")
+    set(CpuInfo_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT CpuInfoSource_FOUND)
+
+  nnas_include(ExternalProjectTools)
+
+  # Set build option
+  # - Static (position independent)
+  # - No logging
+  # - Library only (CPUINFO_RUNTIME_TYPE is not used)
+  set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "")
+  set(CPUINFO_LOG_LEVEL "none" CACHE STRING "")
+  set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "")
+  set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "")
+  set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "")
+  set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "")
+  add_extdirectory("${CpuInfoSource_DIR}" cpuinfo EXCLUDE_FROM_ALL)
+  set_target_properties(cpuinfo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  set(CpuInfoSource_DIR ${CpuInfoSource_DIR} PARENT_SCOPE)
+  set(CpuInfo_FOUND TRUE PARENT_SCOPE)
+endfunction(_CpuInfo_Build)
+
+if(BUILD_CPUINFO)
+  _CpuInfo_Build()
+else(BUILD_CPUINFO)
+  set(CpuInfo_FOUND FALSE)
+endif(BUILD_CPUINFO)
diff --git a/infra/nnfw/cmake/packages/EigenConfig.cmake b/infra/nnfw/cmake/packages/EigenConfig.cmake
index e71830a16..15378827e 100644
--- a/infra/nnfw/cmake/packages/EigenConfig.cmake
+++ b/infra/nnfw/cmake/packages/EigenConfig.cmake
@@ -1,5 +1,5 @@
 function(_Eigen_import)
-  nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
+  nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 QUIET)
 
   if(NOT TensorFlowEigenSource_FOUND)
     set(Eigen_FOUND FALSE PARENT_SCOPE)
diff --git a/infra/nnfw/cmake/packages/FarmhashSourceConfig.cmake b/infra/nnfw/cmake/packages/FarmhashSourceConfig.cmake
deleted file mode 100644
index ab53f97b2..000000000
--- a/infra/nnfw/cmake/packages/FarmhashSourceConfig.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-function(_FarmhashSource_import)
-  if(NOT ${DOWNLOAD_FARMHASH})
-    set(FarmhashSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT ${DOWNLOAD_FARMHASH})
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # NOTE TensorFlow 1.12 downloads farmhash from the following URL
-  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(FARMHASH_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
-  ExternalSource_Download("farmhash" ${FARMHASH_URL})
-
-  set(FarmhashSource_DIR ${farmhash_SOURCE_DIR} PARENT_SCOPE)
-  set(FarmhashSource_FOUND ${farmhash_SOURCE_GET} PARENT_SCOPE)
-endfunction(_FarmhashSource_import)
-
-_FarmhashSource_import()
diff --git a/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake b/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake
index 13ad1113a..032724ae2 100644
--- a/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake
+++ b/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake
@@ -6,8 +6,8 @@ function(_FlatBuffers_import)
     return()
   endif(Flatbuffers_FOUND)
 
-  # NOTE Tizen uses 1.11
-  nnas_find_package(FlatBuffersSource EXACT 1.11 QUIET)
+  # NOTE Tizen uses 2.0
+  nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
 
   if(NOT FlatBuffersSource_FOUND)
     set(FlatBuffers_FOUND FALSE PARENT_SCOPE)
diff --git a/infra/nnfw/cmake/packages/Fp16Config.cmake b/infra/nnfw/cmake/packages/Fp16Config.cmake
new file mode 100644
index 000000000..6c31613c0
--- /dev/null
+++ b/infra/nnfw/cmake/packages/Fp16Config.cmake
@@ -0,0 +1,30 @@
+function(_Fp16_Build)
+  nnas_find_package(Fp16Source QUIET)
+
+  # NOTE This line prevents multiple definitions of target
+  if(TARGET fp16)
+    set(Fp16Source_DIR ${Fp16Source_DIR} PARENT_SCOPE)
+    set(Fp16_FOUND TRUE PARENT_SCOPE)
+    return()
+  endif(TARGET fp16)
+
+  if(NOT Fp16Source_FOUND)
+    message(STATUS "FP16: Source not found")
+    set(Fp16_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT Fp16Source_FOUND)
+
+  set(FP16_BUILD_TESTS OFF CACHE BOOL "Build FP16 unit tests")
+  set(FP16_BUILD_BENCHMARKS OFF CACHE BOOL "Build FP16 micro-benchmarks")
+  nnas_find_package(PsimdSource)
+  set(PSIMD_SOURCE_DIR ${PsimdSource_DIR} CACHE STRING "String to disable download PSIMD on fp16")
+  add_extdirectory("${Fp16Source_DIR}" FP16 EXCLUDE_FROM_ALL)
+  set(Fp16Source_DIR ${Fp16Source_DIR} PARENT_SCOPE)
+  set(Fp16_FOUND TRUE PARENT_SCOPE)
+endfunction(_Fp16_Build)
+
+if(BUILD_FP16)
+  _Fp16_Build()
+else()
+  set(Fp16_FOUND FALSE)
+endif()
diff --git a/infra/nnfw/cmake/packages/FxdivConfig.cmake b/infra/nnfw/cmake/packages/FxdivConfig.cmake
new file mode 100644
index 000000000..6f268aec8
--- /dev/null
+++ b/infra/nnfw/cmake/packages/FxdivConfig.cmake
@@ -0,0 +1,29 @@
+function(_Fxdiv_Build)
+  nnas_find_package(FxdivSource QUIET)
+
+  # NOTE This line prevents multiple definitions of target
+  if(TARGET fxdiv)
+    set(FxdivSource_DIR ${FxdivSource_DIR} PARENT_SCOPE)
+    set(Fxdiv_FOUND TRUE PARENT_SCOPE)
+    return()
+  endif(TARGET fxdiv)
+
+  if(NOT FxdivSource_FOUND)
+    message(STATUS "FXDIV: Source not found")
+    set(Fxdiv_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT FxdivSource_FOUND)
+
+  set(FXDIV_BUILD_TESTS OFF CACHE BOOL "Build FXdiv unit tests")
+  set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "Build FXdiv micro-benchmarks")
+
+  add_extdirectory("${FxdivSource_DIR}" FXDIV EXCLUDE_FROM_ALL)
+  set(FxdivSource_DIR ${FxdivSource_DIR} PARENT_SCOPE)
+  set(Fxdiv_FOUND TRUE PARENT_SCOPE)
+endfunction(_Fxdiv_Build)
+
+if(BUILD_FXDIV)
+  _Fxdiv_Build()
+else()
+  set(Fxdiv_FOUND FALSE)
+endif()
diff --git a/infra/nnfw/cmake/packages/GEMMLowpConfig.cmake b/infra/nnfw/cmake/packages/GEMMLowpConfig.cmake
index ddfcc787e..b321961ca 100644
--- a/infra/nnfw/cmake/packages/GEMMLowpConfig.cmake
+++ b/infra/nnfw/cmake/packages/GEMMLowpConfig.cmake
@@ -1,5 +1,5 @@
 function(_GEMMLowp_import)
-  nnfw_find_package(GEMMLowpSource QUIET)
+  nnas_find_package(GEMMLowpSource QUIET)
 
   if(NOT GEMMLowpSource_FOUND)
     set(GEMMLowp_FOUND FALSE PARENT_SCOPE)
diff --git a/infra/nnfw/cmake/packages/GEMMLowpSourceConfig.cmake b/infra/nnfw/cmake/packages/GEMMLowpSourceConfig.cmake
deleted file mode 100644
index 97c8e0597..000000000
--- a/infra/nnfw/cmake/packages/GEMMLowpSourceConfig.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-function(_GEMMLowpSource_import)
-  if(NOT ${DOWNLOAD_GEMMLOWP})
-    set(GEMMLowpSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT ${DOWNLOAD_GEMMLOWP})
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # NOTE TensorFlow 1.12 uses the following URL
-  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz)
-  ExternalSource_Download("gemmlowp" ${GEMMLOWP_URL})
-
-  set(GEMMLowpSource_DIR ${gemmlowp_SOURCE_DIR} PARENT_SCOPE)
-  set(GEMMLowpSource_FOUND ${gemmlowp_SOURCE_GET} PARENT_SCOPE)
-endfunction(_GEMMLowpSource_import)
-
-_GEMMLowpSource_import()
diff --git a/infra/nnfw/cmake/packages/GLib2.0Config.cmake b/infra/nnfw/cmake/packages/GLib2.0Config.cmake
new file mode 100644
index 000000000..d4c6bf241
--- /dev/null
+++ b/infra/nnfw/cmake/packages/GLib2.0Config.cmake
@@ -0,0 +1,41 @@
+function(_GLIB_2_0_import)
+  find_library(GLIB_LIBRARIES
+    NAMES glib-2.0)
+  
+  get_filename_component(GLIB_LIBRARY_DIR ${GLIB_LIBRARIES} DIRECTORY)
+  find_path(GLIBCONFIG_INCLUDE_DIR
+    NAMES glibconfig.h
+    PATHS ${GLIB_LIBRARY_DIR}
+    PATH_SUFFIXES glib-2.0/include
+    NO_CMAKE_FIND_ROOT_PATH)
+
+  find_path(GLIB_INCLUDE_DIR
+    NAMES glib.h
+    PATH_SUFFIXES glib-2.0)
+  
+  set(GLIB_FOUND TRUE)
+
+  if(NOT GLIB_LIBRARIES)
+    set(GLIB_FOUND FALSE)
+  endif(NOT GLIB_LIBRARIES)
+
+  if(NOT GLIBCONFIG_INCLUDE_DIR)
+    set(GLIB_FOUND FALSE)
+  endif(NOT GLIBCONFIG_INCLUDE_DIR)
+
+  if(NOT GLIB_INCLUDE_DIR)
+    set(GLIB_FOUND FALSE)
+  endif(NOT GLIB_INCLUDE_DIR)
+
+  set(GLIB_INCLUDE_DIRS ${GLIB_INCLUDE_DIR} ${GLIBCONFIG_INCLUDE_DIR})
+
+  if(NOT GLIB_FOUND)
+    message(STATUS "Failed to find GLib 2.0")
+  endif(NOT GLIB_FOUND)
+
+  set(GLIB2.0_FOUND ${GLIB_FOUND} PARENT_SCOPE)
+  set(GLIB2.0_INCLUDE_DIRS ${GLIB_INCLUDE_DIRS} PARENT_SCOPE)
+  set(GLIB2.0_LIBRARIES ${GLIB_LIBRARIES} PARENT_SCOPE)
+endfunction(_GLIB_2_0_import)
+
+_GLIB_2_0_import()
diff --git a/infra/nnfw/cmake/packages/GObject2.0Config.cmake b/infra/nnfw/cmake/packages/GObject2.0Config.cmake
new file mode 100644
index 000000000..f1bfb3aba
--- /dev/null
+++ b/infra/nnfw/cmake/packages/GObject2.0Config.cmake
@@ -0,0 +1,30 @@
+function(_GOBJECT_2_0_import)
+  nnfw_find_package(GLib2.0 REQUIRED)
+
+  find_library(GOBJECT_LIBRARIES
+    NAMES gobject-2.0)
+
+  # The gobject-2.0 requires glib-2.0 and access the header file based on
+  # the glib-2.0 include directory.
+  set(GOBJECT_INCLUDE_DIRS ${GLIB2.0_INCLUDE_DIRS})
+
+  set(GOBJECT_FOUND TRUE)
+
+  if(NOT GOBJECT_LIBRARIES)
+    set(GOBJECT_FOUND FALSE)
+  endif(NOT GOBJECT_LIBRARIES)
+
+  if(NOT GOBJECT_INCLUDE_DIRS)
+    set(GOBJECT_FOUND FALSE)
+  endif(NOT GOBJECT_INCLUDE_DIRS)
+
+  if(NOT GOBJECT_FOUND)
+    message(STATUS "Failed to find gobject-2.0")
+  endif(NOT GOBJECT_FOUND)
+
+  set(GOBJECT2.0_FOUND ${GOBJECT_FOUND} PARENT_SCOPE)
+  set(GOBJECT2.0_INCLUDE_DIRS ${GOBJECT_INCLUDE_DIRS} PARENT_SCOPE)
+  set(GOBJECT2.0_LIBRARIES ${GOBJECT_LIBRARIES} PARENT_SCOPE)
+endfunction(_GOBJECT_2_0_import)
+
+_GOBJECT_2_0_import()
diff --git a/infra/nnfw/cmake/packages/GTestConfig.cmake b/infra/nnfw/cmake/packages/GTestConfig.cmake
index f3aadf998..d0f7b1845 100644
--- a/infra/nnfw/cmake/packages/GTestConfig.cmake
+++ b/infra/nnfw/cmake/packages/GTestConfig.cmake
@@ -1,26 +1,26 @@
-if(${BUILD_GTEST})
-  nnas_include(ExternalSourceTools)
-  nnas_include(ExternalProjectTools)
-  nnas_include(OptionTools)
+if(${DOWNLOAD_GTEST})
+  nnas_find_package(GTestSource QUIET)
 
-  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(GTEST_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/googletest/archive/release-1.8.0.tar.gz)
-  ExternalSource_Download("gtest" ${GTEST_URL})
-
-  # gtest_SOURCE_DIR is used in gtest subdirectorty's cmake
-  set(sourcedir_gtest ${gtest_SOURCE_DIR})
-  unset(gtest_SOURCE_DIR)
+  if(NOT GTestSource_FOUND)
+    set(GTest_FOUND FALSE)
+    return()
+  endif(NOT GTestSource_FOUND)
 
   if(NOT TARGET gtest_main)
-    add_extdirectory(${sourcedir_gtest} gtest EXCLUDE_FROM_ALL)
+    nnas_include(ExternalProjectTools)
+    add_extdirectory(${GTestSource_DIR} gtest EXCLUDE_FROM_ALL)
   endif(NOT TARGET gtest_main)
 
   set(GTest_FOUND TRUE)
   return()
-endif(${BUILD_GTEST})
+endif(${DOWNLOAD_GTEST})
 
 ### Find and use pre-installed Google Test
-find_package(GTest)
+if(NOT GTest_FOUND)
+  # Reset package config directory cache to prevent recursive find
+  unset(GTest_DIR CACHE)
+  find_package(GTest)
+endif(NOT GTest_FOUND)
 find_package(Threads)
 
 if(${GTEST_FOUND} AND TARGET Threads::Threads)
@@ -44,7 +44,7 @@ if(${GTEST_FOUND} AND TARGET Threads::Threads)
       add_library(gmock INTERFACE)
       target_include_directories(gmock INTERFACE ${GMOCK_INCLUDE_DIR})
       target_link_libraries(gmock INTERFACE ${GMOCK_LIBRARIES} Threads::Threads)
-    endif(GMOCK_LIBRARIES)
+    endif(GMOCK_LIBRARIES AND GMOCK_INCLUDE_DIR)
   endif(NOT TARGET gmock)
 
   if(NOT TARGET gmock_main)
diff --git a/infra/nnfw/cmake/packages/Gio2.0Config.cmake b/infra/nnfw/cmake/packages/Gio2.0Config.cmake
new file mode 100644
index 000000000..26d36072f
--- /dev/null
+++ b/infra/nnfw/cmake/packages/Gio2.0Config.cmake
@@ -0,0 +1,32 @@
+function(_GIO_2_0_import)
+  nnfw_find_package(GLib2.0 REQUIRED)
+  nnfw_find_package(GObject2.0 REQUIRED)
+
+  find_library(GIO_LIBRARIES
+    NAMES gio-2.0)
+
+  # The gio-2.0 requires glib-2.0 and access the header file based on
+  # the glib-2.0 include directory.
+  set(GIO_INCLUDE_DIRS ${GLIB2.0_INCLUDE_DIRS} ${GOBJECT2.0_INCLUDE_DIRS})
+  set(GIO_LIBRARIES ${GIO_LIBRARIES} ${GOBJECT2.0_LIBRARIES})
+
+  set(GIO_FOUND TRUE)
+
+  if(NOT GIO_LIBRARIES)
+    set(GIO_FOUND FALSE)
+  endif(NOT GIO_LIBRARIES)
+
+  if(NOT GIO_INCLUDE_DIRS)
+    set(GIO_FOUND FALSE)
+  endif(NOT GIO_INCLUDE_DIRS)
+
+  if(NOT GIO_FOUND)
+    message(STATUS "Failed to find gio-2.0")
+  endif(NOT GIO_FOUND)
+
+  set(GIO2.0_FOUND ${GIO_FOUND} PARENT_SCOPE)
+  set(GIO2.0_INCLUDE_DIRS ${GIO_INCLUDE_DIRS} PARENT_SCOPE)
+  set(GIO2.0_LIBRARIES ${GIO_LIBRARIES} PARENT_SCOPE)
+endfunction(_GIO_2_0_import)
+
+_GIO_2_0_import()
diff --git a/infra/nnfw/cmake/packages/Giounix2.0Config.cmake b/infra/nnfw/cmake/packages/Giounix2.0Config.cmake
new file mode 100644
index 000000000..69f5e0e04
--- /dev/null
+++ b/infra/nnfw/cmake/packages/Giounix2.0Config.cmake
@@ -0,0 +1,30 @@
+function(_GIO_UNIX_2_0_import)
+  nnfw_find_package(Gio2.0 REQUIRED)
+
+  find_path(GIO_UNIX_INCLUDE_DIR
+    NAMES gio/gunixfdlist.h
+    PATH_SUFFIXES glib-2.0)
+
+  # The gio-unix-2.0 requires gio-2.0 and link the gio-2.0 library.
+  set(GIO_UNIX_LIBRARIES ${GIO2.0_LIBRARIES})
+
+  set(GIO_UNIX_FOUND TRUE)
+
+  if(NOT GIO_UNIX_LIBRARIES)
+    set(GIO_UNIX_FOUND FALSE)
+  endif(NOT GIO_UNIX_LIBRARIES)
+
+  if(NOT GIO_UNIX_INCLUDE_DIR)
+    set(GIO_UNIX_FOUND FALSE)
+  endif(NOT GIO_UNIX_INCLUDE_DIR)
+
+  if(NOT GIO_UNIX_FOUND)
+    message(STATUS "Failed to find gio-unix-2.0")
+  endif(NOT GIO_UNIX_FOUND)
+
+  set(GIO_UNIX_2.0_FOUND ${GIO_UNIX_FOUND} PARENT_SCOPE)
+  set(GIO_UNIX_2.0_INCLUDE_DIRS ${GIO_UNIX_INCLUDE_DIR} PARENT_SCOPE)
+  set(GIO_UNIX_2.0_LIBRARIES ${GIO_UNIX_LIBRARIES} PARENT_SCOPE)
+endfunction(_GIO_UNIX_2_0_import)
+
+_GIO_UNIX_2_0_import()
diff --git a/infra/nnfw/cmake/packages/LuciConfig.cmake b/infra/nnfw/cmake/packages/LuciConfig.cmake
new file mode 100644
index 000000000..426556b3a
--- /dev/null
+++ b/infra/nnfw/cmake/packages/LuciConfig.cmake
@@ -0,0 +1,43 @@
+# Assume that luci and related libraries and headers are installed on overlay directory
+
+set(Luci_FOUND FALSE)
+
+find_path(LUCI_HEADERS
+    NAMES loco.h luci/IR/CircleNode.h
+    PATHS ${EXT_OVERLAY_DIR}/include)
+
+macro(_load_library LUCI_NAME)
+    add_library(luci::${LUCI_NAME} SHARED IMPORTED)
+    find_library(LUCI_LIB_PATH_${LUCI_NAME} NAMES luci_${LUCI_NAME} PATHS ${EXT_OVERLAY_DIR}/lib)
+    if (NOT LUCI_LIB_PATH_${LUCI_NAME})
+        return()
+    endif()
+    set_target_properties(luci::${LUCI_NAME} PROPERTIES
+        IMPORTED_LOCATION ${LUCI_LIB_PATH_${LUCI_NAME}}
+        INTERFACE_INCLUDE_DIRECTORIES ${LUCI_HEADERS})
+endmacro()
+
+_load_library(env)
+_load_library(export)
+_load_library(import)
+_load_library(lang)
+_load_library(logex)
+_load_library(log)
+_load_library(partition)
+_load_library(pass)
+_load_library(plan)
+_load_library(profile)
+_load_library(service)
+
+# Need luci::loco to avoid "DSO missing from command line" link error
+# TODO Find better way to do this
+add_library(luci::loco SHARED IMPORTED)
+find_library(LOCO_LIB_PATH NAMES loco PATHS ${EXT_OVERLAY_DIR}/lib)
+if (NOT LOCO_LIB_PATH)
+    return()
+endif()
+set_target_properties(luci::loco PROPERTIES
+    IMPORTED_LOCATION ${LOCO_LIB_PATH}
+    INTERFACE_INCLUDE_DIRECTORIES ${LUCI_HEADERS})
+
+set(Luci_FOUND TRUE)
diff --git a/infra/nnfw/cmake/packages/NEON2SSESourceConfig.cmake b/infra/nnfw/cmake/packages/NEON2SSESourceConfig.cmake
deleted file mode 100644
index 7bae616e7..000000000
--- a/infra/nnfw/cmake/packages/NEON2SSESourceConfig.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-function(_NEON2SSESource_import)
-  if(NOT ${DOWNLOAD_NEON2SSE})
-    set(NEON2SSESource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT ${DOWNLOAD_NEON2SSE})
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # NOTE TensorFlow 1.12 downloads NEON2SSE from the following URL
-  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(NEON2SSE_URL ${EXTERNAL_DOWNLOAD_SERVER}/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz)
-  ExternalSource_Download("neon_2_sse" ${NEON2SSE_URL})
-
-  set(NEON2SSESource_DIR ${neon_2_sse_SOURCE_DIR} PARENT_SCOPE)
-  set(NEON2SSESource_FOUND ${neon_2_sse_SOURCE_GET} PARENT_SCOPE)
-endfunction(_NEON2SSESource_import)
-
-_NEON2SSESource_import()
diff --git a/infra/nnfw/cmake/packages/PsimdConfig.cmake b/infra/nnfw/cmake/packages/PsimdConfig.cmake
new file mode 100644
index 000000000..a3587b6cf
--- /dev/null
+++ b/infra/nnfw/cmake/packages/PsimdConfig.cmake
@@ -0,0 +1,26 @@
+function(_Psimd_Build)
+  nnas_find_package(PsimdSource QUIET)
+
+  # NOTE This line prevents multiple definitions of target
+  if(TARGET psimd)
+    set(PsimdSource_DIR ${PsimdSource_DIR} PARENT_SCOPE)
+    set(Psimd_FOUND TRUE PARENT_SCOPE)
+    return()
+  endif(TARGET psimd)
+
+  if(NOT PsimdSource_FOUND)
+    message(STATUS "PSIMD: Source not found")
+    set(Psimd_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT PsimdSource_FOUND)
+
+  add_extdirectory("${PsimdSource_DIR}" PSIMD EXCLUDE_FROM_ALL)
+  set(PsimdSource_DIR ${PsimdSource_DIR} PARENT_SCOPE)
+  set(Psimd_FOUND TRUE PARENT_SCOPE)
+endfunction(_Psimd_Build)
+
+if(BUILD_PSIMD)
+  _Psimd_Build()
+else()
+  set(Psimd_FOUND FALSE)
+endif()
diff --git a/infra/nnfw/cmake/packages/PthreadpoolConfig.cmake b/infra/nnfw/cmake/packages/PthreadpoolConfig.cmake
new file mode 100644
index 000000000..6283826f6
--- /dev/null
+++ b/infra/nnfw/cmake/packages/PthreadpoolConfig.cmake
@@ -0,0 +1,35 @@
+function(_Pthreadpool_Build)
+  nnas_find_package(PthreadpoolSource QUIET)
+
+  # NOTE This line prevents multiple definitions of target
+  if(TARGET pthreadpool)
+    set(PthreadpoolSource_DIR ${PthreadpoolSource_DIR} PARENT_SCOPE)
+    set(Pthreadpool_FOUND TRUE PARENT_SCOPE)
+    return()
+  endif(TARGET pthreadpool)
+
+  if(NOT PthreadpoolSource_FOUND)
+    message(STATUS "PTHREADPOOL: Source not found")
+    set(Pthreadpool_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT PthreadpoolSource_FOUND)
+
+  SET(PTHREADPOOL_BUILD_TESTS OFF CACHE BOOL "Build pthreadpool unit tests")
+  SET(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE BOOL "Build pthreadpool micro-benchmarks")
+
+  nnas_find_package(FxdivSource)
+  set(FXDIV_SOURCE_DIR ${FxdivSource_DIR} CACHE STRING "String to disable download FXDIV")
+
+  add_extdirectory("${PthreadpoolSource_DIR}" PTHREADPOOL EXCLUDE_FROM_ALL)
+  set_target_properties(pthreadpool PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  # Suppress warnings generated by pthreadpool
+  set_target_properties(pthreadpool PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
+  set(PthreadpoolSource_DIR ${PthreadpoolSource_DIR} PARENT_SCOPE)
+  set(Pthreadpool_FOUND TRUE PARENT_SCOPE)
+endfunction(_Pthreadpool_Build)
+
+if(BUILD_PTHREADPOOL)
+  _Pthreadpool_Build()
+else()
+  set(Pthreadpool_FOUND FALSE)
+endif()
diff --git a/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt b/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
index f4d9f8881..a1c4656e3 100644
--- a/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
+++ b/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(RUY_BASE ${RuySource_DIR}/ruy)
+set(RUY_BASE ${TensorFlowRuySource_DIR}/ruy)
 
 #
 # Ruy library
@@ -13,6 +13,7 @@ list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/example.cc")
 list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/example_advanced.cc")
 list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/tune_tool.cc")
 list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/pmu.cc")
+list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/create_trmul_params.cc")
 
 list(APPEND RUY_INSTRUMENTATION_SRCS "${RUY_BASE}/profiler/instrumentation.cc")
 
@@ -21,12 +22,16 @@ if(PROFILE_RUY)
   list(APPEND RUY_PROFILER_SRCS "${RUY_BASE}/profiler/treeview.cc")
 endif(PROFILE_RUY)
 
-list(APPEND RUY_INCLUDES "${RuySource_DIR}")
+list(APPEND RUY_INCLUDES "${TensorFlowRuySource_DIR}")
 
 add_library(ruy STATIC ${RUY_SRCS})
 target_include_directories(ruy SYSTEM PUBLIC ${RUY_INCLUDES})
 target_compile_options(ruy PRIVATE -O3)
 
+target_include_directories(ruy PRIVATE ${CpuInfoSource_DIR})
+target_link_libraries(ruy PRIVATE cpuinfo)
+target_compile_definitions(ruy PRIVATE RUY_HAVE_CPUINFO)
+
 add_library(ruy_instrumentation ${RUY_INSTRUMENTATION_SRCS})
 target_include_directories(ruy_instrumentation SYSTEM PUBLIC ${RUY_INCLUDES})
 target_compile_options(ruy_instrumentation PRIVATE -O3)
diff --git a/infra/nnfw/cmake/packages/RuyConfig.cmake b/infra/nnfw/cmake/packages/RuyConfig.cmake
index 278e33cb3..6f5f4b71e 100644
--- a/infra/nnfw/cmake/packages/RuyConfig.cmake
+++ b/infra/nnfw/cmake/packages/RuyConfig.cmake
@@ -1,22 +1,42 @@
-function(_Ruy_import)
+function(_Ruy_Build)
   # NOTE This line prevents multiple definitions of ruy target
   if(TARGET ruy)
-    set(Ruy_FOUND TRUE)
+    set(Ruy_FOUND TRUE PARENT_SCOPE)
     return()
   endif(TARGET ruy)
 
-  nnfw_find_package(RuySource QUIET)
+  nnas_find_package(TensorFlowRuySource EXACT 2.8 QUIET)
+  nnfw_find_package(CpuInfo QUIET)
 
-  if(NOT RuySource_FOUND)
+  if(NOT TensorFlowRuySource_FOUND)
+    message(STATUS "RUY: Source not found")
     set(Ruy_FOUND FALSE PARENT_SCOPE)
     return()
-  endif(NOT RuySource_FOUND)
+  endif(NOT TensorFlowRuySource_FOUND)
 
-  if(BUILD_RUY)
-    add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/Ruy" ruy)
-  endif(BUILD_RUY)
+  if (NOT CpuInfo_FOUND)
+    message(STATUS "RUY: CPUINFO not found")
+    set(Ruy_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT CpuInfo_FOUND)
+
+  # Ruy's cmake requires cmake >= 3.14
+  # If we ready cmake >= 3.14, enable below comment out code
+  #if(PROFILE_RUY)
+  #  # Will be used on ruy build
+  #  set(RUY_PROFILER ON)
+  #endif(PROFILE_RUY)
+  #add_extdirectory("${RuySource_DIR}" Ruy)
+  #
+  ## Ignore warning from ruy
+  #target_compile_options(ruy INTERFACE -Wno-comment)
 
+  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/Ruy" ruy)
   set(Ruy_FOUND TRUE PARENT_SCOPE)
-endfunction(_Ruy_import)
+endfunction(_Ruy_Build)
 
-_Ruy_import()
+if(BUILD_RUY)
+  _Ruy_Build()
+else(BUILD_RUY)
+  set(Ruy_FOUND FASLE)
+endif(BUILD_RUY)
diff --git a/infra/nnfw/cmake/packages/RuySourceConfig.cmake b/infra/nnfw/cmake/packages/RuySourceConfig.cmake
deleted file mode 100644
index 08170fb4f..000000000
--- a/infra/nnfw/cmake/packages/RuySourceConfig.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-function(_RuySource_import)
-  if(NOT ${DOWNLOAD_RUY})
-    set(RuySource_DIR FALSE PARENT_SCOPE)
-    return()
-  endif(NOT ${DOWNLOAD_RUY})
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # NOTE Downloads source from latest ruy library (2020-04-10)
-  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/2e2658f964638ab7aa562d4b48b76007d44e38f0.tar.gz)
-  ExternalSource_Download("ruy" ${RUY_URL})
-
-  set(RuySource_DIR ${ruy_SOURCE_DIR} PARENT_SCOPE)
-  set(RuySource_FOUND ${ruy_SOURCE_GET} PARENT_SCOPE)
-endfunction(_RuySource_import)
-
-_RuySource_import()
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake b/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake
new file mode 100644
index 000000000..dfc10ebf2
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake
@@ -0,0 +1,42 @@
+# Looking for pre-installed TRIX engine package
+set(TRIX_ENGINE_PREFIX "/usr" CACHE PATH "Where to find TRIX engine header and library")
+
+function(_TRIXEngine_import)
+  # Find the header & lib
+  find_library(TRIXEngine_LIB
+    NAMES npu-engine
+    PATHS "${TRIX_ENGINE_PREFIX}/lib"
+  )
+
+  find_path(TRIXEngine_INCLUDE_DIR
+    NAMES libnpuhost.h
+    PATHS "${TRIX_ENGINE_PREFIX}/include/npu-engine"
+  )
+
+  set(TRIXEngine_FOUND TRUE)
+
+  if(NOT TRIXEngine_LIB)
+    set(TRIXEngine_FOUND FALSE)
+  endif(NOT TRIXEngine_LIB)
+
+  if(NOT TRIXEngine_INCLUDE_DIR)
+    set(TRIXEngine_FOUND FALSE)
+  endif(NOT TRIXEngine_INCLUDE_DIR)
+
+  if(NOT TRIXEngine_FOUND)
+    message(STATUS "Failed to find TRIX Engine")
+  else(NOT TRIXEngine_FOUND)
+
+    # Add target
+    if(NOT TARGET trix_engine)
+      add_library(trix_engine INTERFACE)
+      target_link_libraries(trix_engine INTERFACE ${TRIXEngine_LIB})
+      target_include_directories(trix_engine INTERFACE ${TRIXEngine_INCLUDE_DIR})
+    endif(NOT TARGET trix_engine)
+  endif(NOT TRIXEngine_FOUND)
+
+  set(TRIXEngine_FOUND ${TRIXEngine_FOUND} PARENT_SCOPE)
+  set(TRIXEngine_INCLUDE_DIRS ${TRIXEngine_INCLUDE_DIR} PARENT_SCOPE)
+endfunction(_TRIXEngine_import)
+
+_TRIXEngine_import()
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake
new file mode 100644
index 000000000..0e0a0436e
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake
@@ -0,0 +1,104 @@
+# This script need to set:
+#
+# VARIABLE                   | description
+# ---                        | ---
+# PACKAGE_VERSION            | full provided version string
+# PACKAGE_VERSION_EXACT      | true if version is exact match
+# PACKAGE_VERSION_COMPATIBLE | true if version is compatible
+# PACKAGE_VERSION_UNSUITABLE | true if unsuitable as any version
+#
+# Reference: https://cmake.org/cmake/help/v3.10/command/find_package.html
+
+set(TRIX_ENGINE_PREFIX "/usr" CACHE PATH "Where to find TRIX engine header and library")
+
+if(NOT PACKAGE_FIND_VERSION)
+  message(FATAL_ERROR "Please pass version requirement to use TRIX Engine dependency")
+endif()
+
+# Find the header & lib from TRIX_ENGINE_PREFIX
+find_library(TRIXEngine_LIB
+  NAMES npu-engine
+  HINTS "${TRIX_ENGINE_PREFIX}/lib"
+)
+find_path(TRIXEngine_INCLUDE_DIR
+  NAMES libnpuhost.h
+  HINTS "${TRIX_ENGINE_PREFIX}/include/npu-engine"
+)
+
+if(NOT TRIXEngine_INCLUDE_DIR OR NOT TRIXEngine_LIB)
+  set(PACKAGE_VERSION_EXACT FALSE)
+  set(PACKAGE_VERSION_COMPATIBLE FALSE)
+  set(PACKAGE_VERSION_UNSUITABLE TRUE)
+  return()
+endif(NOT TRIXEngine_INCLUDE_DIR OR NOT TRIXEngine_LIB)
+
+# TODO Assert TRIX_ENGINE_PREFIX is directory
+
+# TODO Can we run this only once per configure?
+try_run(MAJOR_VER MAJOR_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.major"
+  SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.major.cpp"
+  CMAKE_FLAGS
+  "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+  "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+if(NOT MAJOR_COMPILABLE)
+  # This means VERSION < 2.2.7
+  # `getVersion` API introduced from TRIX Engine 2.2.7
+  if(PACKAGE_FIND_VERSION VERSION_GREATER_EQUAL 2.2.7)
+    set(PACKAGE_VERSION_EXACT FALSE)
+    set(PACKAGE_VERSION_COMPATIBLE FALSE)
+    set(PACKAGE_VERSION_UNSUITABLE TRUE)
+    return()
+  else()
+    # TODO How to support this case?
+    message(FATAL_ERROR "TRIX Engine version is too low (< 2.2.7)")
+  endif()
+endif(NOT MAJOR_COMPILABLE)
+
+try_run(MINOR_VER MINOR_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.minor"
+  SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.minor.cpp"
+  CMAKE_FLAGS
+  "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+  "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+try_run(EXTRA_VER EXTRA_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.extra"
+  SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.extra.cpp"
+  CMAKE_FLAGS
+  "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+  "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+macro(assert)
+  # if(NOT ${ARGV}) makes error when ARGV starts with 'NOT'
+  if(${ARGV})
+    # Do nothing
+  else(${ARGV})
+    message(FATAL_ERROR "Internal error ${ARGV}")
+  endif(${ARGV})
+endmacro(assert)
+
+assert(MAJOR_COMPILABLE)
+assert(MINOR_COMPILABLE)
+assert(EXTRA_COMPILABLE)
+assert(NOT MAJOR_VER STREQUAL FAILED_TO_RUN)
+assert(NOT MINOR_VER STREQUAL FAILED_TO_RUN)
+assert(NOT EXTRA_VER STREQUAL FAILED_TO_RUN)
+
+set(PACKAGE_VERSION ${MAJOR_VER}.${MINOR_VER}.${EXTRA_VER})
+
+if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+else()
+  set(PACKAGE_VERSION_EXACT FALSE)
+endif()
+
+# Assume TRIX Engine is backward compatible
+if(PACKAGE_VERSION VERSION_GREATER_EQUAL PACKAGE_FIND_VERSION)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+else()
+  set(PACKAGE_VERSION_COMPATIBLE FALSE)
+endif()
+
+set(PACKAGE_VERSION_UNSUITABLE FALSE)
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp
new file mode 100644
index 000000000..05fe70ddb
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+  uint32_t ret = 0;
+  getVersion(nullptr, nullptr, &ret);
+  return ret;
+}
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp
new file mode 100644
index 000000000..a3de06d65
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+  uint32_t ret = 0;
+  getVersion(&ret, nullptr, nullptr);
+  return ret;
+}
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp
new file mode 100644
index 000000000..1193a5c18
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+  uint32_t ret = 0;
+  getVersion(nullptr, &ret, nullptr);
+  return ret;
+}
diff --git a/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfig.cmake
deleted file mode 100644
index 253b290bd..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfig.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-function(_Eigen_import)
-  nnas_find_package(EigenSource QUIET)
-
-  if(NOT EigenSource_FOUND)
-    set(TensorFlowEigen_1_13_1_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT EigenSource_FOUND)
-
-  if(NOT TARGET eigen-tf-1.13.1)
-    add_library(eigen-tf-1.13.1 INTERFACE)
-    target_include_directories(eigen-tf-1.13.1 SYSTEM INTERFACE "${EigenSource_DIR}")
-    # Add EIGEN_MPL2_ONLY to remove license issue posibility
-    target_compile_definitions(eigen-tf-1.13.1 INTERFACE EIGEN_MPL2_ONLY)
-  endif(NOT TARGET eigen-tf-1.13.1)
-
-  set(TensorFlowEigen_1_13_1_FOUND TRUE PARENT_SCOPE)
-endfunction(_Eigen_import)
-
-_Eigen_import()
diff --git a/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfigVersion.cmake
deleted file mode 100644
index ed79ecd91..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "1.13.1")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/cmake/packages/TensorFlowGpuConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowGpuConfig.cmake
new file mode 100644
index 000000000..5d20dd3c4
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowGpuConfig.cmake
@@ -0,0 +1,51 @@
+# TensorFlowGpuConfig.cmake
+macro(return_unless VAR)
+if(NOT ${VAR})
+  message("TensorFlowGpu: ${VAR} NOT TRUE")
+  set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
+  return()
+endif(NOT ${VAR})
+endmacro(return_unless)
+
+function(_Build_TfliteGpuDelagate_)
+  nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowSource_FOUND)
+
+  nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowGEMMLowpSource_FOUND)
+
+  nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowEigenSource_FOUND)
+
+  nnas_find_package(AbseilSource REQUIRED)
+  return_unless(AbseilSource_FOUND)
+
+  nnas_find_package(Farmhash REQUIRED)
+  return_unless(Farmhash_FOUND)
+
+  nnas_find_package(Fp16Source REQUIRED)
+  return_unless(Fp16Source_FOUND)
+
+  nnas_find_package(VulkanSource QUIET)
+  return_unless(VulkanSource_FOUND)
+
+  nnas_find_package(Opengl_HeadersSource QUIET)
+  return_unless(Opengl_HeadersSource_FOUND)
+
+  nnas_find_package(Egl_HeadersSource QUIET)
+  return_unless(Egl_HeadersSource_FOUND)
+
+  if(NOT TARGET TensorFlowGpu)
+    nnas_include(ExternalProjectTools)
+    add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLiteGpu" TensorFlowLiteGpu)
+  endif()
+  set(TensorFlowSource_DIR ${TensorFlowSource_DIR} PARENT_SCOPE)
+  set(TensorFlowGpu_DIR ${TensorFlowGpu_DIR} PARENT_SCOPE)
+endfunction(_Build_TfliteGpuDelagate_)
+
+if(BUILD_TENSORFLOW_LITE_GPU)
+  _Build_TfliteGpuDelagate_()
+  set(TensorFlowGpu_FOUND TRUE PARENT_SCOPE)
+else(BUILD_TENSORFLOW_LITE_GPU)
+  set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
+endif(BUILD_TENSORFLOW_LITE_GPU)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
deleted file mode 100644
index 2c9618d68..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
-
-#
-# Tensorflow Lite library
-#
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c" "${TENSORFLOW_LITE_BASE}/*.cc" "${TENSORFLOW_LITE_BASE}/core/*.cc")
-file(GLOB TFLITE_CORE_TESTS "${TENSORFLOW_LITE_BASE}/*test*.cc")
-list(REMOVE_ITEM TFLITE_CORE_SRCS ${TFLITE_CORE_TESTS})
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-file(GLOB_RECURSE TFLITE_KERNEL_TESTS "${TENSORFLOW_LITE_BASE}/kernels/*test*.cc")
-list(REMOVE_ITEM TFLITE_KERNEL_SRCS ${TFLITE_KERNEL_TESTS})
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-file(GLOB TFLITE_LIB_TESTS "${TENSORFLOW_LITE_BASE}/c/*test*.cc")
-list(REMOVE_ITEM TFLITE_LIB_SRCS ${TFLITE_LIB_TESTS})
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c" "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-file(GLOB TFLITE_API_TESTS "${TENSORFLOW_LITE_BASE}/core/api/*test*.cc")
-list(REMOVE_ITEM TFLITE_API_SRCS ${TFLITE_API_TESTS})
-
-file(GLOB TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/*.cc")
-file(GLOB TFLITE_PROFILING_TESTS "${TENSORFLOW_LITE_BASE}/profiling/*test*.cc")
-list(REMOVE_ITEM TFLITE_PROFILING_SRCS ${TFLITE_PROFILING_TESTS})
-
-# We will use our own BuiltinOpResolver
-list(REMOVE_ITEM TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/register.cc")
-# We will use our own summarizer
-list(REMOVE_ITEM TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/profile_summarizer.cc")
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-
-list(APPEND TFLITE_SRCS "${FarmhashSource_DIR}/src/farmhash.cc")
-
-list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${AbseilSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${GEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${FarmhashSource_DIR}/src")
-
-if(NEON2SSESource_FOUND)
-  list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
-endif(NEON2SSESource_FOUND)
-
-# This kernels are not used on nnfw
-## spectrogram
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/kernels/audio_spectrogram.cc")
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/kernels/audio_spectrogram_test.cc")
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/kernels/internal/spectrogram.cc")
-
-add_library(tensorflow-lite STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_compile_definitions(tensorflow-lite PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK")
-set_property(TARGET tensorflow-lite PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite eigen-tf-1.13.1 flatbuffers::flatbuffers ${LIB_PTHREAD} dl)
-
-if(ANDROID)
-  target_link_libraries(tensorflow-lite log)
-  target_include_directories(tensorflow-lite PUBLIC "${NDK_DIR}/..")
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
deleted file mode 100644
index 4cd7610e6..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
+++ /dev/null
@@ -1,73 +0,0 @@
-# NOTE This line prevents multiple definitions of tensorflow-lite target
-if(TARGET tensorflow-lite)
-  set(TensorFlowLite_FOUND TRUE)
-  return()
-endif(TARGET tensorflow-lite)
-
-if(BUILD_TENSORFLOW_LITE)
-  macro(return_unless VAR)
-    if(NOT ${VAR})
-      set(TensorFlowLite_FOUND PARENT_SCOPE)
-      return()
-    endif(NOT ${VAR})
-  endmacro(return_unless)
-
-  # Required packages
-  nnas_find_package(AbseilSource QUIET)
-  return_unless(AbseilSource_FOUND)
-  nnfw_find_package(TensorFlowEigen EXACT 1.13.1 QUIET)
-  return_unless(TensorFlowEigen_1_13_1_FOUND)
-  nnfw_find_package(FarmhashSource QUIET)
-  return_unless(FarmhashSource_FOUND)
-  nnfw_find_package(FlatBuffers QUIET)
-  return_unless(FlatBuffers_FOUND)
-  nnfw_find_package(GEMMLowpSource QUIET)
-  return_unless(GEMMLowpSource_FOUND)
-  nnas_find_package(TensorFlowSource EXACT 1.13.1 QUIET)
-  return_unless(TensorFlowSource_FOUND)
-
-  # Optional packages
-  nnfw_find_package(NEON2SSESource QUIET)
-
-  nnas_include(ExternalProjectTools)
-  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite)
-
-  set(TensorFlowLite_FOUND TRUE)
-  return()
-endif(BUILD_TENSORFLOW_LITE)
-
-# Use pre-built TensorFlow Lite
-find_path(TFLITE_INCLUDE_DIR NAMES  tensorflow/lite/interpreter.h)
-find_library(TFLITE_LIB NAMES       tensorflow-lite)
-
-if(NOT TFLITE_INCLUDE_DIR)
-  set(TensorFlowLite_FOUND FALSE)
-  return()
-endif(NOT TFLITE_INCLUDE_DIR)
-
-if(NOT TFLITE_LIB)
-  set(TensorFlowLite_FOUND FALSE)
-  return()
-endif(NOT TFLITE_LIB)
-
-message(STATUS "Found TensorFlow Lite: TRUE (include: ${TFLITE_INCLUDE_DIR}, lib: ${TFLITE_LIB}")
-
-# TODO Use IMPORTED target
-add_library(tensorflow-lite INTERFACE)
-target_include_directories(tensorflow-lite SYSTEM INTERFACE ${TFLITE_INCLUDE_DIR})
-target_link_libraries(tensorflow-lite INTERFACE ${TFLITE_LIB})
-find_package(Flatbuffers)
-if(Flatbuffers_FOUND)
-  target_link_libraries(tensorflow-lite INTERFACE flatbuffers::flatbuffers)
-endif(Flatbuffers_FOUND)
-
-# Prefer -pthread to -lpthread
-set(THREADS_PREFER_PTHREAD_FLAG TRUE)
-set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
-find_package(Threads QUIET)
-
-if(Threads_FOUND)
-  target_link_libraries(tensorflow-lite INTERFACE ${CMAKE_THREAD_LIBS_INIT})
-endif(Threads_FOUND)
-
-set(TensorFlowLite_FOUND TRUE)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfigVersion.cmake
deleted file mode 100644
index ed79ecd91..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "1.13.1")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt
deleted file mode 100644
index 20547b92d..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
-#
-# Tensorflow Lite library 2.3.0
-#
-set(TENSORFLOW_LITE_BASE ${TFLiteVanillaTensorFlowSource_DIR}/tensorflow/lite)
-
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
-     "${TENSORFLOW_LITE_BASE}/*.cc"
-     "${TENSORFLOW_LITE_BASE}/core/*.cc")
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
-     "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
-
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
-
-file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
-
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
-
-# externals
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFarmhashSource_DIR}/src/farmhash.cc")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg2d.c")
-list(APPEND TFLITE_SRCS "${TFLiteVanillaFlatBuffersSource_DIR}/src/util.cpp")
-
-# externals - absl
-file(GLOB_RECURSE ABSL_SRCS "${TFLiteVanillaAbslSource_DIR}/absl/*.cc")
-file(GLOB_RECURSE ABSL_EXCLS "${TFLiteVanillaAbslSource_DIR}/absl/*test*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/*benchmark*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/synchronization/*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/debugging/*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/hash/*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/flags/*.cc"
-     "${TFLiteVanillaAbslSource_DIR}/absl/random/*.cc")
-list(REMOVE_ITEM ABSL_SRCS ${ABSL_EXCLS})
-list(APPEND TFLITE_SRCS ${ABSL_SRCS})
-
-# externals - ruy
-file(GLOB RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/*.cc")
-file(GLOB_RECURSE RUY_EXCLS "${TFLiteVanillaRuySource_DIR}/ruy/*test*.cc"
-      "${TFLiteVanillaRuySource_DIR}/ruy/*benchmark*.cc"
-      "${TFLiteVanillaRuySource_DIR}/ruy/*example*.cc")
-list(REMOVE_ITEM RUY_SRCS ${RUY_EXCLS})
-# Temporary fix for ruy compilation error.
-# TODO(b/158800055): Remove this hack once the ruy version is correctly bumped.
-list(REMOVE_ITEM RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/prepare_packed_matrices.cc")
-list(APPEND TFLITE_SRCS ${RUY_SRCS})
-
-
-# Build with mmap? true
-# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
-set(BUILD_WITH_MMAP TRUE)
-if(${BUILD_WITH_MMAP})
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
-else()
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
-endif()
-
-# Build with nnapi? true
-# caution: this nnapi delegate comes from tflite, not ours.
-set(BUILD_WITH_NNAPI TRUE)
-if(${BUILD_WITH_NNAPI})
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
-else()
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
-endif()
-
-# ios: we don't support ios
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
-
-# android
-if(NOT ANDROID)
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
-endif()
-
-# exclude some source files
-file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
-     "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
-     "${TENSORFLOW_LITE_BASE}/*example*.cc"
-     "${TENSORFLOW_LITE_BASE}/*tool*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
-
-# include headers
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaTensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaEigenSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaAbslSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaGEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaNEON2SSESource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFarmhashSource_DIR}/src")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFlatBuffersSource_DIR}/include")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFP16Source_DIR}/include")
-list(APPEND TFLITE_INCLUDES "${TFLiteVanillaRuySource_DIR}")
-
-add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV")
-set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite-2.3.0 eigen ${LIB_PTHREAD} dl)
-if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
-  target_link_libraries(tensorflow-lite-2.3.0 rt)
-endif()
-
-if(ANDROID)
-  target_link_libraries(tensorflow-lite-2.3.0 log)
-  target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake
deleted file mode 100644
index d00ca96a6..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake
+++ /dev/null
@@ -1,100 +0,0 @@
-if(BUILD_TENSORFLOW_LITE_2_3_0)
-  macro(return_unless VAR)
-  if(NOT ${VAR})
-    message("${VAR} NOT TRUE")
-    set(TensorFlowLite_2_3_0_FOUND PARENT_SCOPE)
-    return()
-  endif(NOT ${VAR})
-  endmacro(return_unless)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
-
-  set(absl_url "https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_Absl" ${absl_url})
-  set(TFLiteVanillaAbslSource_DIR "${TFLiteVanilla_Absl_SOURCE_DIR}")
-  if (NOT TFLiteVanillaAbslSource_DIR STREQUAL "")
-    set(TFLiteVanillaAbslSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaAbslSource_FOUND)
-
-  set(eigen_url "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_Eigen" ${eigen_url})
-  set(TFLiteVanillaEigenSource_DIR "${TFLiteVanilla_Eigen_SOURCE_DIR}")
-  if (NOT TFLiteVanillaEigenSource_DIR STREQUAL "")
-    set(TFLiteVanillaEigenSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaEigenSource_FOUND)
-
-  set(farmhash_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_Farmhash" ${farmhash_url})
-  set(TFLiteVanillaFarmhashSource_DIR "${TFLiteVanilla_Farmhash_SOURCE_DIR}")
-  if (NOT TFLiteVanillaFarmhashSource_DIR STREQUAL "")
-    set(TFLiteVanillaFarmhashSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaFarmhashSource_FOUND)
-
-  set(fft2d_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/petewarden/OouraFFT/archive/v1.0.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_FFT2D" ${fft2d_url})
-  set(TFLiteVanillaFFT2DSource_DIR "${TFLiteVanilla_FFT2D_SOURCE_DIR}")
-  if (NOT TFLiteVanillaFFT2DSource_DIR STREQUAL "")
-    set(TFLiteVanillaFFT2DSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaFFT2DSource_FOUND)
-
-  set(flatbuffers_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_FlatBuffers" ${flatbuffers_url})
-  set(TFLiteVanillaFlatBuffersSource_DIR "${TFLiteVanilla_FlatBuffers_SOURCE_DIR}")
-  if (NOT TFLiteVanillaFlatBuffersSource_DIR STREQUAL "")
-    set(TFLiteVanillaFlatBuffersSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaFlatBuffersSource_FOUND)
-
-  set(fp16_url "https://github.com/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.zip")
-  ExternalSource_Download("TFLiteVanilla_FP16" ${fp16_url})
-  set(TFLiteVanillaFP16Source_DIR "${TFLiteVanilla_FP16_SOURCE_DIR}")
-  if (NOT TFLiteVanillaFP16Source_DIR STREQUAL "")
-    set(TFLiteVanillaFP16Source_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaFP16Source_FOUND)
-
-  set(gemmlowp_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip")
-  ExternalSource_Download("TFLiteVanilla_GEMMLowp" ${gemmlowp_url})
-  set(TFLiteVanillaGEMMLowpSource_DIR "${TFLiteVanilla_GEMMLowp_SOURCE_DIR}")
-  if (NOT TFLiteVanillaGEMMLowpSource_DIR STREQUAL "")
-    set(TFLiteVanillaGEMMLowpSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaGEMMLowpSource_FOUND)
-
-  set(neon2sse_url "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_NEON2SSE" ${neon2sse_url})
-  set(TFLiteVanillaNEON2SSESource_DIR "${TFLiteVanilla_NEON2SSE_SOURCE_DIR}")
-  if (NOT TFLiteVanillaNEON2SSESource_DIR STREQUAL "")
-    set(TFLiteVanillaNEON2SSESource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaNEON2SSESource_FOUND)
-
-  set(tensorflow_url "https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz")
-  ExternalSource_Download("TFLiteVanilla_TensorFlow" ${tensorflow_url})
-  set(TFLiteVanillaTensorFlowSource_DIR "${TFLiteVanilla_TensorFlow_SOURCE_DIR}")
-  if (NOT TFLiteVanillaTensorFlowSource_DIR STREQUAL "")
-    set(TFLiteVanillaTensorFlowSource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaTensorFlowSource_FOUND)
-
-  set(ruy_url "https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip")
-  ExternalSource_Download("TFLiteVanilla_Ruy" ${ruy_url})
-  set(TFLiteVanillaRuySource_DIR "${TFLiteVanilla_Ruy_SOURCE_DIR}")
-  if (NOT TFLiteVanillaRuySource_DIR STREQUAL "")
-    set(TFLiteVanillaRuySource_FOUND TRUE)
-  endif()
-  return_unless(TFLiteVanillaRuySource_FOUND)
-
-  nnas_include(ExternalProjectTools)
-  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite-2.3.0" tflite-2.3.0)
-
-  set(TensorFlowLite_2_3_0_FOUND TRUE)
-  return()
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt
new file mode 100644
index 000000000..cbc10d279
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt
@@ -0,0 +1,185 @@
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.8.0/tensorflow/lite/CMakeLists.txt
+#
+# Tensorflow Lite library 2.8.0
+#
+set(TFLITE_SOURCE_DIR ${TensorFlowSource_DIR}/tensorflow/lite)
+
+# Generate TensorFlow Lite FlatBuffer code.
+# We used to have an actual compilation logic with flatc but decided to use
+# schema_generated.h since flatc doesn't work with cross compilation.
+set(TFLITE_FLATBUFFERS_SCHEMA_DIR "${TFLITE_SOURCE_DIR}/schema")
+
+macro(populate_source_vars SOURCE_DIR SOURCES_VAR)
+  cmake_parse_arguments(ARGS "RECURSE" "" "FILTER" ${ARGN})
+  if(ARGS_RECURSE)
+    set(GLOB_OP GLOB_RECURSE)
+  else()
+    set(GLOB_OP GLOB)
+  endif()
+  set(DEFAULT_FILE_FILTER ".*(_test|test_util)\\.(c|cc|h)$")
+  file(${GLOB_OP} FOUND_SOURCES "${SOURCE_DIR}/*.*")
+  list(FILTER FOUND_SOURCES INCLUDE REGEX ".*\\.(c|cc|h)$")
+  list(FILTER FOUND_SOURCES EXCLUDE REGEX "${DEFAULT_FILE_FILTER}")
+  foreach(FILE_FILTER ${ARGS_FILTER})
+    list(FILTER FOUND_SOURCES EXCLUDE REGEX "${FILE_FILTER}")
+  endforeach()
+  list(APPEND ${SOURCES_VAR} ${FOUND_SOURCES})
+endmacro()
+# Simplifies inclusion of non-test sources and headers from a directory
+# relative to TFLITE_SOURCE_DIR. See populate_source_vars() for the
+# description of arguments including and following SOURCES_VAR.
+macro(populate_tflite_source_vars RELATIVE_DIR SOURCES_VAR)
+  populate_source_vars(
+    "${TFLITE_SOURCE_DIR}/${RELATIVE_DIR}" ${SOURCES_VAR} ${ARGN}
+  )
+endmacro()
+
+# Build a list of source files to compile into the TF Lite library.
+populate_tflite_source_vars("." TFLITE_SRCS)
+
+# This particular file is excluded because the more explicit approach to enable
+# XNNPACK delegate is preferred to the weak-symbol one.
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*tflite_with_xnnpack\\.cc$")
+
+# Exclude Flex related files.
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*with_selected_ops\\.cc$")
+
+# Use MMAP
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*mmap_allocation_disabled\\.cc$")
+
+if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
+  list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*minimal_logging_android\\.cc$")
+endif()
+if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "iOS")
+  list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*minimal_logging_ios\\.cc$")
+endif()
+
+populate_tflite_source_vars("core" TFLITE_CORE_SRCS)
+populate_tflite_source_vars("core/api" TFLITE_CORE_API_SRCS)
+populate_tflite_source_vars("c" TFLITE_C_SRCS)
+populate_tflite_source_vars("delegates" TFLITE_DELEGATES_SRCS)
+
+# Enable NNAPI
+populate_tflite_source_vars("delegates/nnapi"
+TFLITE_DELEGATES_NNAPI_SRCS
+FILTER "(_test_list|_disabled)\\.(cc|h)$"
+)
+populate_tflite_source_vars(
+"nnapi" TFLITE_NNAPI_SRCS FILTER "(_disabled)\\.(cc|h)$"
+)
+
+# Disable XNNPack
+
+# Enable  experimental support for resource (need for build success)
+populate_tflite_source_vars("experimental/resource"
+TFLITE_EXPERIMENTAL_RESOURCE_SRCS
+)
+
+# Enable Ruy
+populate_tflite_source_vars("experimental/ruy"
+  TFLITE_EXPERIMENTAL_RUY_SRCS
+  FILTER
+  ".*(test(_fast|_slow|_special_specs))\\.(cc|h)$"
+  ".*(benchmark|tune_tool|example)\\.(cc|h)$"
+)
+populate_tflite_source_vars("experimental/ruy/profiler"
+  TFLITE_EXPERIMENTAL_RUY_PROFILER_SRCS
+  FILTER ".*(test|test_instrumented_library)\\.(cc|h)$"
+)
+list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DTFLITE_WITH_RUY")
+
+populate_tflite_source_vars("kernels"
+  TFLITE_KERNEL_SRCS
+  FILTER "(.*_test_util_internal|test_.*|.*_ops_wrapper)\\.(cc|h)"
+)
+populate_tflite_source_vars("kernels/internal" TFLITE_KERNEL_INTERNAL_SRCS)
+populate_tflite_source_vars("kernels/internal/optimized"
+  TFLITE_KERNEL_INTERNAL_OPT_SRCS
+)
+populate_tflite_source_vars("kernels/internal/optimized/integer_ops"
+  TFLITE_KERNEL_INTERNAL_OPT_INTEGER_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/optimized/sparse_ops"
+  TFLITE_KERNEL_INTERNAL_OPT_SPARSE_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference"
+  TFLITE_KERNEL_INTERNAL_REF_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference/integer_ops"
+  TFLITE_KERNEL_INTERNAL_REF_INTEGER_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference/sparse_ops"
+  TFLITE_KERNEL_INTERNAL_REF_SPARSE_OPS_SRCS
+)
+set(TFLITE_PROFILER_SRCS ${TFLITE_SOURCE_DIR}/profiling/platform_profiler.cc)
+if(CMAKE_SYSTEM_NAME MATCHES "Android")
+  list(APPEND TFLITE_PROFILER_SRCS
+    ${TFLITE_SOURCE_DIR}/profiling/atrace_profiler.cc
+  )
+endif()
+
+# Common include directories
+set(TFLITE_INCLUDE_DIRS
+  "${TENSORFLOW_SOURCE_DIR}"
+  "${TFLITE_FLATBUFFERS_SCHEMA_DIR}"
+)
+
+# include headers
+list(APPEND TFLITE_INCLUDE_DIRS "${TensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDE_DIRS "${TensorFlowGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDE_DIRS "${Fp16Source_DIR}/include")
+#list(APPEND TFLITE_INCLUDE_DIRS "${Pybind11Source_DIR}/include")
+list(APPEND TFLITE_INCLUDE_DIRS "${CpuInfoSource_DIR}")
+
+if(NEON2SSESource_FOUND)
+  list(APPEND TFLITE_INCLUDE_DIRS "${NEON2SSESource_DIR}")
+endif(NEON2SSESource_FOUND)
+
+# TFLite library
+add_library(tensorflow-lite-2.8.0 STATIC
+  ${TFLITE_CORE_API_SRCS}
+  ${TFLITE_CORE_SRCS}
+  ${TFLITE_C_SRCS}
+  ${TFLITE_DELEGATES_NNAPI_SRCS}
+  ${TFLITE_DELEGATES_SRCS}
+  ${TFLITE_EXPERIMENTAL_RESOURCE_SRCS}
+  ${TFLITE_EXPERIMENTAL_RUY_PROFILER_SRCS}
+  ${TFLITE_EXPERIMENTAL_RUY_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_OPT_INTEGER_OPS_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_OPT_SPARSE_OPS_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_OPT_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_REF_INTEGER_OPS_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_REF_SPARSE_OPS_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_REF_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_SRCS}
+  ${TFLITE_KERNEL_SRCS}
+  ${TFLITE_NNAPI_SRCS}
+  ${TFLITE_SRCS}
+  ${TFLITE_PROFILER_SRCS}
+  ${TFLITE_SOURCE_DIR}/kernels/internal/utils/sparsity_format_converter.cc
+  ${TFLITE_SOURCE_DIR}/schema/schema_utils.cc
+  ${OouraFFTSource_DIR}/fftsg.c
+  ${OouraFFTSource_DIR}/fftsg2d.c
+)
+target_include_directories(tensorflow-lite-2.8.0
+  SYSTEM PUBLIC
+    ${TFLITE_INCLUDE_DIRS}
+)
+
+target_compile_definitions(tensorflow-lite-2.8.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO -DNNAPI_VERBOSE_VALIDATION")
+set_property(TARGET tensorflow-lite-2.8.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(tensorflow-lite-2.8.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
+if(NOT ANDROID)
+  target_link_libraries(tensorflow-lite-2.8.0 rt)
+endif()
+
+# Define TF_LITE_DISABLE_X86_NEON for debug build
+# If we upgrade NEON2SSE version, we can remove below line
+if(NEON2SSESource_FOUND)
+  target_compile_definitions(tensorflow-lite-2.8.0 PRIVATE $<$<CONFIG:Debug>:TF_LITE_DISABLE_X86_NEON>)
+endif(NEON2SSESource_FOUND)
+
+if(ANDROID)
+  target_link_libraries(tensorflow-lite-2.8.0 log)
+  #target_include_directories(tensorflow-lite-2.8.0 PUBLIC "${NDK_DIR}/..")
+endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake
new file mode 100644
index 000000000..60f7f5450
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake
@@ -0,0 +1,96 @@
+# NOTE This line prevents multiple definitions of tensorflow-lite target
+if(TARGET tensorflow-lite-2.8.0)
+  set(TensorFlowLite_FOUND TRUE)
+  return()
+endif(TARGET tensorflow-lite-2.8.0)
+
+if(BUILD_TENSORFLOW_LITE)
+  macro(return_unless VAR)
+  if(NOT ${VAR})
+    message("TFLite 2.8: ${VAR} NOT TRUE")
+    set(TensorFlowLite_FOUND FALSE)
+    return()
+  endif(NOT ${VAR})
+  endmacro(return_unless)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowSource_FOUND)
+
+  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.8.0/tensorflow/workspace2.bzl
+  nnas_find_package(Abseil QUIET)
+  return_unless(Abseil_FOUND)
+  nnfw_find_package(Eigen QUIET)
+  return_unless(Eigen_FOUND)
+  nnas_find_package(Farmhash QUIET)
+  return_unless(Farmhash_FOUND)
+  nnfw_find_package(FlatBuffers QUIET)
+  return_unless(FlatBuffers_FOUND)
+  nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowGEMMLowpSource_FOUND)
+  nnas_find_package(OouraFFTSource QUIET)
+  return_unless(OouraFFTSource_FOUND)
+  nnfw_find_package(Ruy QUIET)
+  return_unless(Ruy_FOUND)
+
+  # TensorFlow Lite requires FP16 library's header only
+  nnas_find_package(Fp16Source QUIET)
+  return_unless(Fp16Source_FOUND)
+
+  # TensorFlow Lite requires Pybind11 library's header only
+  # But Pybind11 requires python3-dev package
+  # TODO Enable below by installing package on build system
+  #nnas_find_package(Pybind11Source QUIET)
+  #return_unless(Pybind11Source_FOUND)
+
+  # Optional packages
+  nnas_find_package(NEON2SSESource QUIET)
+
+  nnas_include(ExternalProjectTools)
+  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.8.0)
+
+  set(TensorFlowLite_FOUND TRUE)
+  return()
+endif()
+
+# Use pre-built TensorFlow Lite
+find_path(TFLITE_INCLUDE_DIR NAMES  tensorflow/lite/c/c_api.h)
+find_library(TFLITE_LIB NAMES       tensorflow2-lite)
+
+if(NOT TFLITE_INCLUDE_DIR)
+  # Tizen install TensorFlow Lite 2.8 headers in /usr/include/tensorflow2
+  find_path(TFLITE_INCLUDE_DIR NAMES tensorflow/lite/c/c_api.h PATHS "/usr/include/tensorflow2")
+  if(NOT TFLITE_INCLUDE_DIR)
+    set(TensorFlowLite_FOUND FALSE)
+    return()
+  endif(NOT TFLITE_INCLUDE_DIR)
+endif(NOT TFLITE_INCLUDE_DIR)
+
+if(NOT TFLITE_LIB)
+  set(TensorFlowLite_FOUND FALSE)
+  return()
+endif(NOT TFLITE_LIB)
+
+message(STATUS "Found TensorFlow Lite: TRUE (include: ${TFLITE_INCLUDE_DIR}, lib: ${TFLITE_LIB}")
+
+# TODO Use IMPORTED target
+add_library(tensorflow-lite-2.8.0 INTERFACE)
+target_include_directories(tensorflow-lite-2.8.0 SYSTEM INTERFACE ${TFLITE_INCLUDE_DIR})
+target_link_libraries(tensorflow-lite-2.8.0 INTERFACE ${TFLITE_LIB})
+find_package(Flatbuffers)
+if(Flatbuffers_FOUND)
+  target_link_libraries(tensorflow-lite-2.8.0 INTERFACE flatbuffers::flatbuffers)
+endif(Flatbuffers_FOUND)
+
+# Prefer -pthread to -lpthread
+set(THREADS_PREFER_PTHREAD_FLAG TRUE)
+set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
+find_package(Threads QUIET)
+
+if(Threads_FOUND)
+  target_link_libraries(tensorflow-lite-2.8.0 INTERFACE ${CMAKE_THREAD_LIBS_INIT})
+endif(Threads_FOUND)
+
+set(TensorFlowLite_FOUND TRUE)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake
new file mode 100644
index 000000000..cd49d7b72
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake
@@ -0,0 +1,9 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt
new file mode 100644
index 000000000..73264d107
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt
@@ -0,0 +1,73 @@
+#
+# Tensorflow Lite GPU delegate library 2.8.0
+#
+
+set(LIB_TENSORFLOW_GPU_DELEGATE "TensorFlowGpu")
+
+#TENSORFLOWGPU_SOURCE_DIR
+set(TENSORFLOWSOURCE_DIR ${TensorFlowSource_DIR})
+set(TENSORFLOW_LITE_BASE ${TENSORFLOWSOURCE_DIR}/tensorflow/lite)
+set(REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE "${TENSORFLOW_LITE_BASE}/delegates/gpu")
+
+set(SRC_BASE "${REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE}")
+file(GLOB GPU_CL_SRC_LIST   "${SRC_BASE}/cl/*.cc"
+                                    "${SRC_BASE}/cl/kernels/*.cc"
+                                    "${SRC_BASE}/common/*.cc"
+                                    "${SRC_BASE}/common/selectors/*.cc"
+                                    "${SRC_BASE}/common/selectors/default/*.cc"
+                                    "${SRC_BASE}/common/task/*.cc"
+                                    "${SRC_BASE}/common/tasks/*.cc"
+                                    "${SRC_BASE}/common/tasks/special/*.cc"
+                                    "${SRC_BASE}/common/memory_management/*.cc"
+                                    "${SRC_BASE}/common/transformations/*.cc"
+                                     )
+
+file(GLOB REMOVE_TEST_SRCS          "${SRC_BASE}/cl/*_test*.cc"
+                                    "${SRC_BASE}/cl/testing/*.cc"
+                                    "${SRC_BASE}/cl/kernels/*_test*.cc"
+                                    "${SRC_BASE}/common/*_test*.cc"
+                                    "${SRC_BASE}/common/tasks/*_test*.cc"
+                                    "${SRC_BASE}/common/transformations/*_test*.cc"
+                                    )
+# Not available
+file(GLOB REMOVE_SRCS               "${SRC_BASE}/cl/*gl*.cc"
+                                    "${SRC_BASE}/cl/gpu_api_delegate.cc"
+                                    "${SRC_BASE}/cl/serialization.cc"
+                                    "${SRC_BASE}/common/lstm_parser.cc"
+                                    "${SRC_BASE}/common/model_builder.cc"
+                                    "${SRC_BASE}/common/model_builder_helper.cc"
+                                    "${SRC_BASE}/common/object_reader.cc"
+                                    "${SRC_BASE}/common/quantization_util.cc"
+                                    "${SRC_BASE}/common/memory_management/*_test.cc"
+                                    )
+
+list(APPEND GPU_CL_SRC_LIST "${TENSORFLOW_LITE_BASE}/experimental/acceleration/compatibility/android_info.cc")
+
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_TEST_SRCS})
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_SRCS})
+list(APPEND TFLITE_GPU_SRCS ${GPU_CL_SRC_LIST})
+
+add_library(${LIB_TENSORFLOW_GPU_DELEGATE} STATIC ${TFLITE_GPU_SRCS})
+
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Opencl_Headers_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Fp16Source_DIR}/include")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowGEMMLowpSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowEigenSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${VulkanSource_DIR}/include")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Opengl_HeadersSource_DIR}/api")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Egl_HeadersSource_DIR}/api")
+
+target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE abseil farmhash fp16 flatbuffers)
+
+# GL codes are not used on gpu_cl
+target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DCL_DELEGATE_NO_GL")
+target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DTFLITE_GPU_BINARY_RELEASE" "-DEGL_NO_X11")
+
+# deprecated-copy warning on header (gcc 9.4.0)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.4)
+  target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PUBLIC "-Wno-deprecated-copy")
+endif()
+
+# Applying PIC first, currently used on gpu_cl only
+set_target_properties(${LIB_TENSORFLOW_GPU_DELEGATE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/infra/nnfw/cmake/packages/XnnpackConfig.cmake b/infra/nnfw/cmake/packages/XnnpackConfig.cmake
new file mode 100644
index 000000000..101d757ec
--- /dev/null
+++ b/infra/nnfw/cmake/packages/XnnpackConfig.cmake
@@ -0,0 +1,41 @@
+function(_Xnnpack_Build)
+  nnas_find_package(XnnpackSource QUIET)
+  nnfw_find_package(Fxdiv QUIET)
+  nnfw_find_package(CpuInfo QUIET)
+  nnfw_find_package(Pthreadpool QUIET)
+  nnfw_find_package(Psimd QUIET)
+  nnfw_find_package(Fp16 QUIET)
+
+  # NOTE This line prevents multiple definitions of cpuinfo target
+  if(TARGET XNNPACK)
+    set(XnnpackSource_DIR ${XnnpackSource_DIR} PARENT_SCOPE)
+    set(Xnnpack_FOUND TRUE PARENT_SCOPE)
+    return()
+  endif(TARGET XNNPACK)
+
+  if(NOT XnnpackSource_FOUND)
+    message(STATUS "XNNPACK: Source not found")
+    set(Xnnpack_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT XnnpackSource_FOUND)
+
+  set(XNNPACK_BUILD_TESTS OFF CACHE BOOL "Build XNNPACK unit tests")
+  set(XNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "Build XNNPACK benchmarks")
+  set(XNNPACK_USE_SYSTEM_LIBS ON CACHE BOOL "Use system-provided dependency libraries")
+
+  add_extdirectory("${XnnpackSource_DIR}" XNNPACK EXCLUDE_FROM_ALL)
+  set_target_properties(XNNPACK PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  # Suppress warnings generated by xnnpack
+  set_target_properties(XNNPACK PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
+  set(XnnpackSource_DIR ${XnnpackSource_DIR} PARENT_SCOPE)
+  set(Xnnpack_FOUND TRUE PARENT_SCOPE)
+endfunction(_Xnnpack_Build)
+
+string(REGEX REPLACE "-flto" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+string(REGEX REPLACE "-flto" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+if(BUILD_XNNPACK)
+  _Xnnpack_Build()
+else(BUILD_XNNPACK)
+  set(Xnnpack_FOUND FALSE)
+endif(BUILD_XNNPACK)
diff --git a/infra/nnfw/command/build b/infra/nnfw/command/build
index b0301d2f4..4a3601ed2 100644
--- a/infra/nnfw/command/build
+++ b/infra/nnfw/command/build
@@ -8,4 +8,4 @@ if [[ ! -d "${BUILD_PATH}" ]]; then
 fi
 
 cd ${BUILD_PATH}
-make "$@"
+cmake --build . -- "$@"
diff --git a/infra/nnfw/command/count-unittest b/infra/nnfw/command/count-unittest
index 7957f36e7..3ce7bbac3 100644
--- a/infra/nnfw/command/count-unittest
+++ b/infra/nnfw/command/count-unittest
@@ -69,6 +69,6 @@ TOTAL_NEG_TCS=$(echo "$TEST_LIST" | grep '^  neg_' | wc -l)
 TOTAL_POS_TCS=$(echo "$TEST_LIST" | grep '^  neg_' -v | wc -l)
 
 # Report stats
-echo "TOTAL NUMBER OF TEST CASES          : $TOTAL_TCS"
-echo "TOTAL NUMBER OF POSTIVE TEST CASES  : $TOTAL_NEG_TCS"
-echo "TOTAL NUMBER OF NEGATIVE TEST CASES : $TOTAL_POS_TCS"
+printf "TOTAL NUMBER OF TEST CASES          : %5d\n" $TOTAL_TCS
+printf "TOTAL NUMBER OF POSTIVE TEST CASES  : %5d\n" $TOTAL_POS_TCS
+printf "TOTAL NUMBER OF NEGATIVE TEST CASES : %5d\n" $TOTAL_NEG_TCS
diff --git a/infra/nnfw/command/prepare-model b/infra/nnfw/command/prepare-model
new file mode 100644
index 000000000..35600e152
--- /dev/null
+++ b/infra/nnfw/command/prepare-model
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+import "build.configuration"
+
+# This command is used to download test materials on host environment
+# by using test command on host
+
+# Common variables
+DRIVER_PATH=$NNFW_PROJECT_PATH/tests/scripts
+CACHE_PATH=${CACHE_PATH:-$WORKSPACE_PATH/out/test/cache}
+
+COMMAND_FILE=$DRIVER_PATH/command/prepare-model
+if [[ ! -f $COMMAND_FILE ]]; then
+  echo "ERROR: '$COMMAND' is not supported"
+  exit 255
+fi
+
+source $COMMAND_FILE $@
diff --git a/infra/nnfw/config/docker.configuration b/infra/nnfw/config/docker.configuration
index 962c02c7f..c61ab0ff2 100644
--- a/infra/nnfw/config/docker.configuration
+++ b/infra/nnfw/config/docker.configuration
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-nnas}
+DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-nnfw/one-devtools}
 echo "Using docker image ${DOCKER_IMAGE_NAME}"
 
 if [ -z "`docker images ${DOCKER_IMAGE_NAME}`" ]; then
diff --git a/infra/nnfw/config/gbs.conf b/infra/nnfw/config/gbs.conf
index bad9eb204..5bb7b0ca3 100644
--- a/infra/nnfw/config/gbs.conf
+++ b/infra/nnfw/config/gbs.conf
@@ -3,20 +3,37 @@
 profile = profile.tizen
 
 [profile.tizen]
-user=obs_viewer
-obs = obs.tizen
-repos = repo.tizen_one,repo.tizen_base,repo.tizen_mobile
-buildroot = /home/GBS-ROOT/
+repos = repo.base, repo.unified
 
-[obs.tizen]
-url = http://api.tizen.org
+[profile.tizen_8]
+repos = repo.base_8, repo.unified_8
 
-[repo.tizen_mobile]
-url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/packages/
+[profile.tizen-dev]
+repos = repo.base-dev, repo.unified-dev
 
-[repo.tizen_base]
-url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
+[profile.tizen-riscv]
+repos = repo.base-riscv, repo.unified-riscv
 
-[repo.tizen_one]
-url = http://nnfw.mooo.com/archive/tizen/
+[repo.unified]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen-7.0/Tizen-7.0-Unified/latest/repos/standard/packages/
 
+[repo.base]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen-7.0/Tizen-7.0-Base/latest/repos/standard/packages/
+
+[repo.unified_8]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen-8.0/Tizen-8.0-Unified/latest/repos/standard/packages/
+
+[repo.base_8]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen-8.0/Tizen-8.0-Base/latest/repos/standard/packages/
+
+[repo.unified-dev]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen/Tizen-Unified-Dev/latest/repos/standard/packages/
+
+[repo.base-dev]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen/Tizen-Base-Dev/latest/repos/standard/packages/
+
+[repo.unified-riscv]
+url = https://download.tizen.org/snapshots/TIZEN/Tizen/Tizen-Unified-RISCV/latest/repos/standard/packages/
+
+[repo.base-riscv]
+url = https://download.tizen.org/snapshots/TIZEN/Tizen/Tizen-Base-RISCV/latest/repos/standard/packages/
diff --git a/infra/onert-micro/CMakeLists.txt b/infra/onert-micro/CMakeLists.txt
new file mode 100644
index 000000000..21533c11f
--- /dev/null
+++ b/infra/onert-micro/CMakeLists.txt
@@ -0,0 +1,61 @@
+cmake_minimum_required(VERSION 3.15)
+
+project(onert-micro)
+
+enable_testing()
+
+set(CMAKE_CXX_STANDARD 14)
+
+set(CMAKE_SKIP_BUILD_RPATH FALSE)
+set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/")
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if (NOT DEFINED TARGET_ARCH)
+    set(TARGET_ARCH "armv7em")
+endif()
+
+if (NOT DEFINED TARGET_CPU)
+    set(TARGET_CPU "cortex-m7")
+endif()
+
+if (NOT DEFINED TARGET_OS)
+    set(TARGET_OS "generic")
+endif()
+
+include(utils.cmake)
+
+nnas_find_package(GTest QUIET)
+
+option(ENABLE_TEST "Build Tests using Google Test" ${GTest_FOUND})
+
+if(${ENABLE_TEST} AND NOT ${GTest_FOUND})
+    message(FATAL_ERROR "Google Test is required to enable test")
+endif(${ENABLE_TEST} AND NOT ${GTest_FOUND})
+
+option(ENABLE_COVERAGE "Build for coverage test" OFF)
+if(${ENABLE_COVERAGE} AND NOT ${ENABLE_TEST})
+    message(FATAL_ERROR "Test should be enabled to measure test coverage")
+endif(${ENABLE_COVERAGE} AND NOT ${ENABLE_TEST})
+
+if(${ENABLE_TEST})
+    include(CTest)
+endif(${ENABLE_TEST})
+
+###
+### Target
+###
+add_library(onert_micro_common INTERFACE)
+if(ENABLE_STRICT_BUILD)
+    target_compile_options(onert_micro_common INTERFACE -Werror -Wall -Wextra -Wno-reorder)
+endif(ENABLE_STRICT_BUILD)
+
+add_library(onert_micro_coverage INTERFACE)
+if(ENABLE_COVERAGE)
+    target_compile_options(onert_micro_coverage INTERFACE -g -O0 -fprofile-arcs -ftest-coverage)
+    target_link_libraries(onert_micro_coverage INTERFACE gcov)
+endif(ENABLE_COVERAGE)
+
+add_subdirectory("${NNAS_PROJECT_SOURCE_DIR}/onert-micro" "${CMAKE_BINARY_DIR}/onert-micro")
diff --git a/infra/onert-micro/cmake/ApplyCompileFlags.cmake b/infra/onert-micro/cmake/ApplyCompileFlags.cmake
new file mode 100644
index 000000000..fb99fbd26
--- /dev/null
+++ b/infra/onert-micro/cmake/ApplyCompileFlags.cmake
@@ -0,0 +1,35 @@
+#
+# Platform independent compile flag setting
+#
+# flags for build type: debug, release
+set(CMAKE_C_FLAGS_DEBUG     "-O0 -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O0 -g -DDEBUG")
+set(CMAKE_C_FLAGS_RELEASE   "-O3 -DNDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
+
+#
+# Platform specific compile flag setting
+#
+if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+    include("${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+endif()
+
+#
+# Apply compile flags
+# note: this should be placed after cmake/buildtool/config/config_xxx.cmake files
+#
+# add common flags
+foreach(FLAG ${FLAGS_COMMON})
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
+
+# add c flags
+foreach(FLAG ${FLAGS_CONLY})
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+endforeach()
+
+# add cxx flags
+foreach(FLAG ${FLAGS_CXXONLY})
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
diff --git a/infra/onert-micro/cmake/CfgOptionFlags.cmake b/infra/onert-micro/cmake/CfgOptionFlags.cmake
new file mode 100644
index 000000000..ffbc7b255
--- /dev/null
+++ b/infra/onert-micro/cmake/CfgOptionFlags.cmake
@@ -0,0 +1,18 @@
+# Platform specific configuration
+# note: this should be placed before default setting for option setting priority
+#       (platform specific setting have higher priority)
+#
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/options/options_${TARGET_PLATFORM}.cmake")
+
+###
+### Configuration
+###
+option(DOWNLOAD_RUY "Download ruy source" ON)
+option(DOWNLOAD_EIGEN "Download Eigen source" ON)
+option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
+option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
+option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
+
+option(DOWNLOAD_GTEST "Download Google Test source" ON)
+option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
diff --git a/infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake b/infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake
new file mode 100644
index 000000000..544be030a
--- /dev/null
+++ b/infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake
@@ -0,0 +1,66 @@
+set(CMAKE_SYSTEM_NAME Generic)
+
+set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_C_COMPILER "${C_COMPILER}")
+set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
+set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
+set(CMAKE_OBJCOPY "${OBJCOPY}")
+
+set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU")
+
+# Convert TARGET_CPU=Cortex-M33+nofp+nodsp into
+#   - CMAKE_SYSTEM_PROCESSOR=cortex-m33
+#   - TARGET_CPU_FEATURES=no-fp;no-dsp
+string(REPLACE "+" ";" TARGET_CPU_FEATURES ${TARGET_CPU})
+list(POP_FRONT TARGET_CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
+string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} CMAKE_SYSTEM_PROCESSOR)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".elf")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Select C/C++ version
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_CXX_STANDARD 14)
+
+# Compile options
+add_compile_options(
+        -mcpu=${TARGET_CPU}
+        -mthumb
+        "$<$<CONFIG:DEBUG>:-gdwarf-3>"
+        "$<$<COMPILE_LANGUAGE:CXX>:-funwind-tables;-frtti;-fexceptions>")
+
+# Compile definescd
+add_compile_definitions(
+        "$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>")
+
+# Link options
+add_link_options(
+        -mcpu=${TARGET_CPU}
+        -mthumb
+        --specs=nosys.specs)
+
+# Set floating point unit
+if("${TARGET_CPU}" MATCHES "\\+fp")
+    set(FLOAT hard)
+elseif("${TARGET_CPU}" MATCHES "\\+nofp")
+    set(FLOAT soft)
+elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
+        "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
+    set(FLOAT hard)
+else()
+    set(FLOAT soft)
+endif()
+
+if (FLOAT)
+    add_compile_options(-mfloat-abi=${FLOAT})
+    add_link_options(-mfloat-abi=${FLOAT})
+endif()
+
+# Compilation warnings
+add_compile_options(
+        -Wno-all
+)
diff --git a/infra/onert-micro/cmake/buildtool/config/config_linux.cmake b/infra/onert-micro/cmake/buildtool/config/config_linux.cmake
new file mode 100644
index 000000000..d7b17cfef
--- /dev/null
+++ b/infra/onert-micro/cmake/buildtool/config/config_linux.cmake
@@ -0,0 +1,11 @@
+#
+# linux common compile options
+#
+
+# Disable annoying ABI compatibility warning.
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+  list(APPEND FLAGS_CXXONLY "-Wno-psabi")
+endif()
+
+# lib pthread as a variable (pthread must be disabled on android)
+set(LIB_PTHREAD pthread)
diff --git a/infra/onert-micro/cmake/buildtool/config/config_x86_64-linux.cmake b/infra/onert-micro/cmake/buildtool/config/config_x86_64-linux.cmake
new file mode 100644
index 000000000..528e48396
--- /dev/null
+++ b/infra/onert-micro/cmake/buildtool/config/config_x86_64-linux.cmake
@@ -0,0 +1,12 @@
+#
+# x86_64 linux compile options
+#
+message(STATUS "Building for x86-64 Linux")
+
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# SIMD for x86
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-msse4"
+    )
diff --git a/infra/onert-micro/cmake/options/options_armv7-r-generic.cmake b/infra/onert-micro/cmake/options/options_armv7-r-generic.cmake
new file mode 100644
index 000000000..d671b73f1
--- /dev/null
+++ b/infra/onert-micro/cmake/options/options_armv7-r-generic.cmake
@@ -0,0 +1,3 @@
+#
+# armv7em generic cmake options
+#
diff --git a/infra/onert-micro/cmake/options/options_armv7em-generic.cmake b/infra/onert-micro/cmake/options/options_armv7em-generic.cmake
new file mode 100644
index 000000000..d671b73f1
--- /dev/null
+++ b/infra/onert-micro/cmake/options/options_armv7em-generic.cmake
@@ -0,0 +1,3 @@
+#
+# armv7em generic cmake options
+#
diff --git a/infra/onert-micro/cmake/options/options_armv8-m-generic.cmake b/infra/onert-micro/cmake/options/options_armv8-m-generic.cmake
new file mode 100644
index 000000000..cbd70de7d
--- /dev/null
+++ b/infra/onert-micro/cmake/options/options_armv8-m-generic.cmake
@@ -0,0 +1,3 @@
+#
+# armv8-m generic cmake options
+#
diff --git a/infra/onert-micro/cmake/options/options_x86_64-linux.cmake b/infra/onert-micro/cmake/options/options_x86_64-linux.cmake
new file mode 100644
index 000000000..0fb72f18b
--- /dev/null
+++ b/infra/onert-micro/cmake/options/options_x86_64-linux.cmake
@@ -0,0 +1,3 @@
+#
+# x86_64 linux cmake options
+#
diff --git a/infra/onert-micro/utils.cmake b/infra/onert-micro/utils.cmake
new file mode 100644
index 000000000..4c78e2cb9
--- /dev/null
+++ b/infra/onert-micro/utils.cmake
@@ -0,0 +1,53 @@
+set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." CACHE
+        INTERNAL "Where to find nnas top-level source directory"
+        )
+
+set(NNAS_EXTERNALS_DIR
+        "${NNAS_PROJECT_SOURCE_DIR}/externals" CACHE
+        INTERNAL "Where to download external dependencies"
+        )
+set(ONERT_MICRO_OVERLAY_DIR "${CMAKE_BINARY_DIR}/overlay" CACHE
+        INTERNAL "Where locally built external dependencies are installed")
+
+# Share package build script with runtime
+set(EXT_OVERLAY_DIR ${ONERT_MICRO_OVERLAY_DIR})
+
+# This allows find_package to access configurations installed inside overlay
+list(APPEND CMAKE_PREFIX_PATH "${EXT_OVERLAY_DIR}")
+
+macro(nnas_include PREFIX)
+    include("${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/modules/${PREFIX}.cmake")
+endmacro(nnas_include)
+
+macro(nnas_find_package PREFIX)
+    find_package(${PREFIX}
+            CONFIG NO_DEFAULT_PATH
+            PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
+            ${ARGN})
+endmacro(nnas_find_package)
+
+macro(nnas_find_package_folder PREFIX FIND_FOLDER)
+    find_package(${PREFIX}
+            CONFIG NO_DEFAULT_PATH
+            PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages ${FIND_FOLDER}
+            ${ARGN})
+endmacro(nnas_find_package_folder)
+
+###
+### CMake configuration
+###
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Type of build" FORCE)
+endif(NOT CMAKE_BUILD_TYPE)
+message(STATUS "Use '${CMAKE_BUILD_TYPE}' configuration")
+
+# identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
+# note: this should be placed before flags and options setting
+nnas_include(IdentifyPlatform)
+
+# Configuration flags
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/CfgOptionFlags.cmake")
+
+# apply compilation flags
+# NOTE this should be after all option
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/ApplyCompileFlags.cmake")
diff --git a/infra/packaging/build b/infra/packaging/build
index e941a724b..16bce7e0e 100644
--- a/infra/packaging/build
+++ b/infra/packaging/build
@@ -8,7 +8,10 @@ if [[ -z "${NNAS_PROJECT_PATH}" ]]; then
 fi
 
 # The default preset
-PRESET="20200630"
+PRESET="20230413"
+
+# Test is enabled by default
+DISABLE_TEST=false
 
 EXTRA_OPTIONS=()
 while [ "$#" -ne 0 ]; do
@@ -23,6 +26,10 @@ while [ "$#" -ne 0 ]; do
       PRESET="$2"
       shift 2
       ;;
+    '--notest')
+      DISABLE_TEST=true
+      shift
+      ;;
     '--')
       shift
       while [ "$#" -ne 0 ]; do
@@ -44,6 +51,10 @@ if [[ -z "${NNAS_INSTALL_PREFIX}" ]]; then
   exit 255
 fi
 
+if [[ "${DISABLE_TEST}" == "true" ]]; then
+  EXTRA_OPTIONS+=("-DENABLE_TEST=OFF")
+fi
+
 PRESET_PATH="${SCRIPT_PATH}/preset/${PRESET}"
 
 if [[ ! -f "${PRESET_PATH}" ]]; then
diff --git a/infra/packaging/preset/20200630 b/infra/packaging/preset/20200630
index 5d1635809..a1721d941 100644
--- a/infra/packaging/preset/20200630
+++ b/infra/packaging/preset/20200630
@@ -9,7 +9,7 @@ function preset_configure()
 {
   REQUIRED_UNITS=()
   # Common Libraries
-  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
   REQUIRED_UNITS+=("oops" "pepper-assert" "foder")
   REQUIRED_UNITS+=("souschef")
   REQUIRED_UNITS+=("safemain")
@@ -26,7 +26,7 @@ function preset_configure()
   # Tools
   REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
   REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
-  REQUIRED_UNITS+=("record-minmax" "circle-quantizer")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
   REQUIRED_UNITS+=("one-cmds")
   REQUIRED_UNITS+=("bcq-tools")
 
diff --git a/infra/packaging/preset/20200731_windows b/infra/packaging/preset/20200731_windows
index 65d179eaf..078c7db47 100644
--- a/infra/packaging/preset/20200731_windows
+++ b/infra/packaging/preset/20200731_windows
@@ -4,7 +4,7 @@ function preset_configure()
 {
   REQUIRED_UNITS=()
   # Common Libraries
-  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
   REQUIRED_UNITS+=("oops" "pepper-assert" "foder")
   REQUIRED_UNITS+=("souschef")
   REQUIRED_UNITS+=("safemain")
@@ -21,15 +21,15 @@ function preset_configure()
   # Tools
   REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
   REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
-  REQUIRED_UNITS+=("record-minmax" "circle-quantizer")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
   REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
 
   NPROC=$(cat /proc/cpuinfo | grep -c processor)
 
   # TODO Use "nncc configure" and "nncc build"
   cmake \
     -G "MSYS Makefiles" \
-    -DTF2NNPKG_FOR_WINDOWS=ON \
     -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
     -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
     -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
diff --git a/infra/packaging/preset/20210406 b/infra/packaging/preset/20210406
new file mode 100644
index 000000000..caddb0a53
--- /dev/null
+++ b/infra/packaging/preset/20210406
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20210406"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20210406_windows b/infra/packaging/preset/20210406_windows
new file mode 100644
index 000000000..5d4bd8d5f
--- /dev/null
+++ b/infra/packaging/preset/20210406_windows
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20210406" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/preset/20210706 b/infra/packaging/preset/20210706
new file mode 100644
index 000000000..ef6b6e521
--- /dev/null
+++ b/infra/packaging/preset/20210706
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20210706"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20210706_windows b/infra/packaging/preset/20210706_windows
new file mode 100644
index 000000000..857540870
--- /dev/null
+++ b/infra/packaging/preset/20210706_windows
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20210706" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/preset/20210910 b/infra/packaging/preset/20210910
new file mode 100644
index 000000000..d00b1ccad
--- /dev/null
+++ b/infra/packaging/preset/20210910
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20210910"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20210910_windows b/infra/packaging/preset/20210910_windows
new file mode 100644
index 000000000..642bdbd76
--- /dev/null
+++ b/infra/packaging/preset/20210910_windows
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20210910" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/preset/20220323 b/infra/packaging/preset/20220323
new file mode 100644
index 000000000..69251d03d
--- /dev/null
+++ b/infra/packaging/preset/20220323
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20220323"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20220323_windows b/infra/packaging/preset/20220323_windows
new file mode 100644
index 000000000..c5a3f0ef9
--- /dev/null
+++ b/infra/packaging/preset/20220323_windows
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20220323" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/preset/20221125 b/infra/packaging/preset/20221125
new file mode 100644
index 000000000..d798087ec
--- /dev/null
+++ b/infra/packaging/preset/20221125
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20221125"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Compute
+  REQUIRED_UNITS+=("luci-compute")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Python interface for circle schema
+  REQUIRED_UNITS+=("pics")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+  REQUIRED_UNITS+=("circle-opselector")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20221125_windows b/infra/packaging/preset/20221125_windows
new file mode 100644
index 000000000..75c64260a
--- /dev/null
+++ b/infra/packaging/preset/20221125_windows
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+PRESET="20221125"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Compute
+  REQUIRED_UNITS+=("luci-compute")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Python interface for circle schema
+  REQUIRED_UNITS+=("pics")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/preset/20230413 b/infra/packaging/preset/20230413
new file mode 100644
index 000000000..85ce6cbc6
--- /dev/null
+++ b/infra/packaging/preset/20230413
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20230413"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle05" "mio-tflite2121" "mio-circle06")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Compute
+  REQUIRED_UNITS+=("luci-compute")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Python interface for circle schema
+  REQUIRED_UNITS+=("pics")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5" "circle-mpqsolver")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+  REQUIRED_UNITS+=("circle-opselector")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20230413_windows b/infra/packaging/preset/20230413_windows
new file mode 100644
index 000000000..8015de86d
--- /dev/null
+++ b/infra/packaging/preset/20230413_windows
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+PRESET="20230413"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle05" "mio-tflite2121" "mio-circle06")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Compute
+  REQUIRED_UNITS+=("luci-compute")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Python interface for circle schema
+  REQUIRED_UNITS+=("pics")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5" "circle-mpqsolver")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/preset/20230907 b/infra/packaging/preset/20230907
new file mode 100644
index 000000000..44bc4e00f
--- /dev/null
+++ b/infra/packaging/preset/20230907
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20230907"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite2121" "mio-circle06")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Compute
+  REQUIRED_UNITS+=("luci-compute")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Python interface for circle schema
+  REQUIRED_UNITS+=("pics")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5" "circle-mpqsolver")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+  REQUIRED_UNITS+=("circle-opselector")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20230907_windows b/infra/packaging/preset/20230907_windows
new file mode 100644
index 000000000..5dcb36ab0
--- /dev/null
+++ b/infra/packaging/preset/20230907_windows
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+PRESET="20230907"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite2121" "mio-circle06")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Compute
+  REQUIRED_UNITS+=("luci-compute")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Python interface for circle schema
+  REQUIRED_UNITS+=("pics")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5" "circle-mpqsolver")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20200630 b/infra/packaging/res/tf2nnpkg.20200630
index 7846fd388..b7091541a 100644
--- a/infra/packaging/res/tf2nnpkg.20200630
+++ b/infra/packaging/res/tf2nnpkg.20200630
@@ -92,16 +92,39 @@ OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' '
 
 INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
 
+# Generate BCQ information metadata
+# If model has no BCQ information or invalid information, pb file is not changed.
+"${ROOT}/bin/generate_bcq_metadata" \
+--input_path "${GRAPHDEF_FILE}" \
+--output_path "${TMPDIR}/${MODEL_NAME}_withmeta.pb" \
+--output_arrays "${OUTPUT}"
+
+# Generate BCQ information nodes as output_arrays
+# If model has no BCQ information, output_arrays would be empty.
+"${ROOT}/bin/generate_bcq_output_arrays" \
+--input_path "${TMPDIR}/${MODEL_NAME}_withmeta.pb" \
+--metadata_path "${TMPDIR}/${MODEL_NAME}_metadata_arrays.txt" \
+--output_arrays_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt"
+
 # generate tflite file
-python "${ROOT}/bin/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${GRAPHDEF_FILE} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---input_arrays ${INPUT} --input_shapes ${INPUT_SHAPES} \
---output_arrays ${OUTPUT}
+TF2TFLITE_CONVERT_SCRIPT="python ${ROOT}/bin/tf2tfliteV2.py ${TF_INTERFACE} "
+TF2TFLITE_CONVERT_SCRIPT+="--input_path ${TMPDIR}/${MODEL_NAME}_withmeta.pb "
+TF2TFLITE_CONVERT_SCRIPT+="--input_arrays ${INPUT} "
+TF2TFLITE_CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+TF2TFLITE_CONVERT_SCRIPT+="--output_arrays "
+TF2TFLITE_CONVERT_SCRIPT+="$(cat ${TMPDIR}/${MODEL_NAME}_metadata_arrays.txt)"
+TF2TFLITE_CONVERT_SCRIPT+="${OUTPUT}"
+TF2TFLITE_CONVERT_SCRIPT+="$(cat ${TMPDIR}/${MODEL_NAME}_output_arrays.txt) "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  TF2TFLITE_CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
+
+${TF2TFLITE_CONVERT_SCRIPT}
 
 # convert .tflite to .circle
 "${ROOT}/bin/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" "${TMPDIR}/${MODEL_NAME}.tmp.circle"
 
 # optimize
-"${ROOT}/bin/circle2circle" --all "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
 
 "${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/packaging/res/tf2nnpkg.20210406 b/infra/packaging/res/tf2nnpkg.20210406
new file mode 100644
index 000000000..0d44818a1
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20210406
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/packaging/res/tf2nnpkg.20210706 b/infra/packaging/res/tf2nnpkg.20210706
new file mode 100644
index 000000000..0d44818a1
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20210706
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/packaging/res/tf2nnpkg.20210910 b/infra/packaging/res/tf2nnpkg.20210910
new file mode 100644
index 000000000..0d44818a1
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20210910
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/packaging/res/tf2nnpkg.20220323 b/infra/packaging/res/tf2nnpkg.20220323
new file mode 100644
index 000000000..5f43b2386
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20220323
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/packaging/res/tf2nnpkg.20221125 b/infra/packaging/res/tf2nnpkg.20221125
new file mode 100644
index 000000000..a7446e6fe
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20221125
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg" -o "${OUTPUT_DIR}" -m "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/packaging/res/tf2nnpkg.20230413 b/infra/packaging/res/tf2nnpkg.20230413
new file mode 100644
index 000000000..a7446e6fe
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20230413
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg" -o "${OUTPUT_DIR}" -m "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/packaging/res/tf2nnpkg.20230907 b/infra/packaging/res/tf2nnpkg.20230907
new file mode 100644
index 000000000..a7446e6fe
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20230907
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg" -o "${OUTPUT_DIR}" -m "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/build-tcm.sh b/infra/scripts/build-tcm.sh
index 38533c1f9..768cff762 100755
--- a/infra/scripts/build-tcm.sh
+++ b/infra/scripts/build-tcm.sh
@@ -2,13 +2,16 @@
 #
 # STEP 1
 #   Download latest TCM tool from 
-#   https://github.sec.samsung.net/RS-TCM/tca-standalone/releases/download/v0.0.8/tca-standalone-0.0.8.jar
+#   https://github.sec.samsung.net/RS-TCM/tca-standalone/releases/download/1.0.2/tca-standalone-1.0.2.jar
 #
 # STEP 2
 #   Create symbolic link `./src` for source directory to be analyzed which has `.ahub` configuration.
 #
 # STEP 3
-#   run this `build-tcm.sh` script.
+#   run this script in `build-tcm.sh [test_target]` format.
+#   ex) $ build_tcm.sh                # to analyze both NN Runtime and NN Compiler
+#   ex) $ build_tcm.sh NN_Runtime     # to analyze NN Runtime only
+#   ex) $ build_tcm.sh NN_Compiler    # to analyze NN Compiler only
 #
 # See the following link for additional details.
 #   https://github.sec.samsung.net/RS-TCM/tca-standalone/wiki/Tutorials-CPP-Gtest
@@ -16,9 +19,10 @@
 
 echo ${PROJECT_DIR:=${PWD}}
 
-java -jar $PROJECT_DIR/tca-standalone-0.0.8.jar \
+java -jar $PROJECT_DIR/tca-standalone-1.0.2.jar \
   --outdir=$PROJECT_DIR/tcm-output \
   --config=$PROJECT_DIR/src/.ahub/tcchecker-tca/config.yaml \
   --local=$PROJECT_DIR/src \
   --logfile=$PROJECT_DIR/tcm-output/tcm.log \
   --debug
+  $@
diff --git a/infra/scripts/build_android_runtime_release.sh b/infra/scripts/build_android_runtime_release.sh
deleted file mode 100755
index fe933c648..000000000
--- a/infra/scripts/build_android_runtime_release.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare pre-built armcompute library
-# android build requires pre-built armcompute library
-if [ ! -n "$EXT_ACL_FOLDER" ]; then
-  echo "Please set EXT_ACL_FOLDER to use pre-built armcompute library"
-  exit 1
-fi
-
-# prepare ndk
-if [ ! -n "$NDK_DIR" ]; then
-  export NDK_DIR=$ROOT_PATH/tools/cross/ndk/r20/ndk
-  echo "It will use default external path"
-fi
-
-export TARGET_OS=android
-export CROSS_BUILD=1
-make -f Makefile.template
diff --git a/infra/scripts/common.sh b/infra/scripts/common.sh
index a10aac271..0beaf6766 100755
--- a/infra/scripts/common.sh
+++ b/infra/scripts/common.sh
@@ -31,11 +31,11 @@ function CheckTestPrepared()
 {
   # Model download server setting
   if [[ -z "${MODELFILE_SERVER}" ]]; then
-    echo "[WARNING] Model file server is not set"
-    echo "          Try to use pre-downloaed model"
+    echo "Model file server is not set. Try to use default setting."
   else
     echo "Model Server: ${MODELFILE_SERVER}"
   fi
+  $INSTALL_PATH/test/onert-test prepare-model
 }
 
 # $1: (required) backend
@@ -50,10 +50,10 @@ function TFLiteModelVerification()
 
   export BACKENDS=$1
   if [[ "$2" == "" ]]; then
-    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+    $INSTALL_PATH/test/onert-test verify-tflite \
       --reportdir=$ROOT_PATH/$3
   else
-    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+    $INSTALL_PATH/test/onert-test verify-tflite \
       --list=$2 \
       --reportdir=$ROOT_PATH/$3
   fi
@@ -74,7 +74,7 @@ function NNAPIGTest()
 
   # Backup original nnapi_gtest.skip
   # TODO Pass skiplist to test-driver.sh
-  SKIPLIST_FILE="${INSTALL_PATH}/unittest/nnapi_gtest.skip"
+  SKIPLIST_FILE="${INSTALL_PATH}/nnapi-gtest/nnapi_gtest.skip"
   BACKUP_FILE="${SKIPLIST_FILE}.backup"
   if [[ "$2" != "" ]]; then
     cp ${SKIPLIST_FILE} ${BACKUP_FILE}
@@ -84,7 +84,7 @@ function NNAPIGTest()
   export BACKENDS=$1
   $INSTALL_PATH/test/onert-test unittest \
     --reportdir=$ROOT_PATH/$3 \
-    --unittestdir=$INSTALL_PATH/unittest
+    --unittestdir=$INSTALL_PATH/nnapi-gtest
   unset BACKENDS
 
   # TODO Pass skiplist to test-driver.sh
@@ -129,27 +129,3 @@ function NNPackageTest()
 
   popd > /dev/null
 }
-
-# $1: (required) backend
-# $2: (required) test list file relative path from nnfw root directory
-#                pass empty string if there is no skiplist
-# $3: (required) relative path to report from nnfw root directory
-function TFLiteLoaderTest()
-{
-  [[ $# -ne 3 ]] && echo "TFLiteLoaderTest: Invalid function argument setting" && exit 1
-
-  pushd ${ROOT_PATH} > /dev/null
-
-  export BACKENDS=$1
-  if [[ "$2" == "" ]]; then
-    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
-      --reportdir=$ROOT_PATH/$3
-  else
-    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
-      --list=$2 \
-      --reportdir=$ROOT_PATH/$3
-  fi
-  unset BACKENDS
-
-  popd > /dev/null
-}
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh
index a0323e0a0..8b361a7ea 100644
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -1,23 +1,38 @@
 #!/bin/bash
 
+# NOTE this file is sourced from, for the purpose of
+# - configure_compiler_coverage.sh: to get test coverage for release criteria
+
 # Don't run this script
 [[ "${BASH_SOURCE[0]}" == "${0}" ]] && echo "Please don't execute ${BASH_SOURCE[0]}, source it" && return
 
-DEBUG_BUILD_ITEMS="angkor;cwrap;pepper-str;pepper-strcast;pp;stdex"
-DEBUG_BUILD_ITEMS+=";oops;pepper-assert"
+DEBUG_BUILD_ITEMS="angkor;cwrap;pepper-str;pepper-strcast;pp"
+DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec"
 DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
 DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
-DEBUG_BUILD_ITEMS+=";foder;souschef;arser;vconone"
-DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite"
+DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
+DEBUG_BUILD_ITEMS+=";safemain;mio-circle05;mio-tflite280;mio-circle06;mio-tflite2121;dio-hdf5"
+DEBUG_BUILD_ITEMS+=";luci-compute"
 DEBUG_BUILD_ITEMS+=";tflite2circle"
 DEBUG_BUILD_ITEMS+=";luci"
 DEBUG_BUILD_ITEMS+=";luci-interpreter"
-DEBUG_BUILD_ITEMS+=";luci-value-test"
+DEBUG_BUILD_ITEMS+=";luci-eval-driver;luci-pass-value-test;luci-value-test"
 DEBUG_BUILD_ITEMS+=";circle2circle;record-minmax;circle-quantizer"
+DEBUG_BUILD_ITEMS+=";circle-eval-diff"
+DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver;circle-operator"
 DEBUG_BUILD_ITEMS+=";circle-verify"
+DEBUG_BUILD_ITEMS+=";circle-tensordump;circle-opselector"
 DEBUG_BUILD_ITEMS+=";tflchef;circlechef"
 DEBUG_BUILD_ITEMS+=";common-artifacts"
 DEBUG_BUILD_ITEMS+=";circle2circle-dredd-recipe-test"
 DEBUG_BUILD_ITEMS+=";record-minmax-conversion-test"
 DEBUG_BUILD_ITEMS+=";tf2tfliteV2;tf2tfliteV2-conversion-test"
 DEBUG_BUILD_ITEMS+=";tflite2circle-conversion-test"
+DEBUG_BUILD_ITEMS+=";pota-quantization-value-test;pics"
+DEBUG_BUILD_ITEMS+=";circle-part-value-test"
+DEBUG_BUILD_ITEMS+=";circle-quantizer-dredd-recipe-test"
+DEBUG_BUILD_ITEMS+=";circle-operator-test"
+DEBUG_BUILD_ITEMS+=";circle-interpreter;circle-interpreter-test"
+DEBUG_BUILD_ITEMS+=";dalgona;dalgona-test"
+DEBUG_BUILD_ITEMS+=";visq"
+DEBUG_BUILD_ITEMS+=";circle-mpqsolver"
diff --git a/infra/scripts/docker_build_cross_aarch64_runtime.sh b/infra/scripts/docker_build_cross_aarch64_runtime.sh
deleted file mode 100755
index 011d14c18..000000000
--- a/infra/scripts/docker_build_cross_aarch64_runtime.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=aarch64"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_cross_arm_runtime.sh b/infra/scripts/docker_build_cross_arm_runtime.sh
deleted file mode 100755
index 551fb5700..000000000
--- a/infra/scripts/docker_build_cross_arm_runtime.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_cross_arm_runtime_release.sh b/infra/scripts/docker_build_cross_arm_runtime_release.sh
deleted file mode 100755
index 876f318f4..000000000
--- a/infra/scripts/docker_build_cross_arm_runtime_release.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e BUILD_TYPE=release"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_cross_coverage.sh b/infra/scripts/docker_build_cross_coverage.sh
deleted file mode 100755
index f42251baa..000000000
--- a/infra/scripts/docker_build_cross_coverage.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-NNAS_WORKSPACE=${NNAS_WORKSPACE:-build}
-if [[ -z "${ARCHIVE_PATH}" ]]; then
-  ARCHIVE_PATH=${NNAS_WORKSPACE}/archive
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e COVERAGE_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
-     make all install build_coverage_suite"
-./nnfw docker-run bash -c "$CMD"
-
-mkdir -p ${ARCHIVE_PATH}
-# TODO change workspace usage in makefile
-mv Product/out/coverage-suite.tar.gz ${ARCHIVE_PATH}/
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh
index 6cdfdf01b..dd9d0bd9b 100755
--- a/infra/scripts/docker_build_nncc.sh
+++ b/infra/scripts/docker_build_nncc.sh
@@ -2,6 +2,10 @@
 
 [[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
 
+unset RELEASE_VERSION
+# TODO need more better argument parsing
+RELEASE_VERSION="$1"
+
 CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ROOT_PATH="$CURRENT_PATH/../../"
 
@@ -23,18 +27,20 @@ else
 fi
 
 # prepare tensorflow
-if [ -d $TENSORFLOW_PREFIX ]; then
+if [ -n "$TENSORFLOW_PREFIX" ]; then
   DOCKER_OPTS+=" -v $TENSORFLOW_PREFIX:/opt/tensorflow"
   CONFIG_OPTIONS+=" -DTENSORFLOW_PREFIX=/opt/tensorflow"
 fi
 
 # prepare onnx
-if [ -d $ONNXRUNTIME_PREFIX ]; then
+if [ -n "$ONNXRUNTIME_PREFIX" ]; then
   DOCKER_OPTS+=" -v $ONNXRUNTIME_PREFIX:/opt/onnxruntime"
   CONFIG_OPTIONS+=" -DONNXRUNTIME_PREFIX=/opt/onnxruntime"
 fi
 
 # docker image name
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
 if [[ -z $DOCKER_IMAGE_NAME ]]; then
   echo "It will use default docker image name"
 fi
@@ -54,20 +60,18 @@ pushd $ROOT_PATH > /dev/null
 mkdir -p ${NNCC_INSTALL_PREFIX}
 ./nncc docker-run ./nnas create-package --prefix "${PWD}/${NNCC_INSTALL_PREFIX}" -- "${CONFIG_OPTIONS}"
 
-# create python virtual environment
-./nncc docker-run python3 -m venv "${NNCC_INSTALL_PREFIX}/bin/venv"
+mkdir -p ${ARCHIVE_PATH}
+tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} \
+    --exclude test --exclude tflchef* --exclude circle-tensordump --exclude circledump ./
+tar -zcf ${ARCHIVE_PATH}/nncc-test-package.tar.gz -C ${NNCC_INSTALL_PREFIX} ./test
 
-# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
-# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
-./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
-  -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-  install -U pip==20.2.1 setuptools==49.3.0
-./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
-  -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-  install tensorflow-cpu==2.3.0
+if [ -z ${RELEASE_VERSION} ] || [ ${RELEASE_VERSION} == "nightly" ]; then
+  ./nncc docker-run /bin/bash -c \
+	  'dch -v $(${PWD}/${NNCC_INSTALL_PREFIX}/bin/one-version)~$(date "+%y%m%d%H") "nightly release" -D $(lsb_release --short --codename)'
+  ./nncc docker-run dch -r ''
+fi
 
-mkdir -p ${ARCHIVE_PATH}
-tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} --exclude "bin/venv" ./
-tar -zcf ${ARCHIVE_PATH}/nncc-venv-package.tar.gz -C ${NNCC_INSTALL_PREFIX} bin/venv
+./nncc docker-run debuild --preserve-env --no-lintian -us -uc \
+        -b --buildinfo-option=-ubuild --changes-option=-ubuild
 
 popd > /dev/null
diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh
deleted file mode 100755
index 16fcf3fa7..000000000
--- a/infra/scripts/docker_build_test_x64.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-# Disable nnpackage_run build: mismatch between buildtool for CI and installed hdf5
-CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF' && \
-     export BUILD_TYPE=Release && \
-     cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-# Model download server setting
-if [[ -z $MODELFILE_SERVER ]]; then
-  echo "Need model file server setting"
-  exit 1
-fi
-
-export DOCKER_ENV_VARS=" -e MODELFILE_SERVER=$MODELFILE_SERVER"
-./nnfw docker-run-user ./infra/scripts/test_ubuntu_runtime.sh --backend cpu
-./nnfw docker-run-user ./infra/scripts/test_ubuntu_runtime.sh --interp
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_tizen_cross.sh b/infra/scripts/docker_build_tizen_cross.sh
deleted file mode 100755
index ee0f183f1..000000000
--- a/infra/scripts/docker_build_tizen_cross.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e TARGET_OS=tizen"
-DOCKER_ENV_VARS+=" -e BUILD_TYPE=release"
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="export OPTIONS+=' -DGENERATE_RUNTIME_NNAPI_TESTS=ON' && \
-     cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_tizen_gbs.sh b/infra/scripts/docker_build_tizen_gbs.sh
deleted file mode 100755
index 2d508f4c7..000000000
--- a/infra/scripts/docker_build_tizen_gbs.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-GBS_RPM_DIR=$ROOT_PATH/Product/out/rpm
-mkdir -p $GBS_RPM_DIR
-DOCKER_VOLUMES=" -v $GBS_RPM_DIR:/opt/rpm"
-
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name for tizen gbs build"
-  DOCKER_IMAGE_NAME="nnfw_docker_tizen"
-fi
-
-DOCKER_ENV_VARS=" --privileged"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-CMD="gbs -c $ROOT_PATH/infra/nnfw/config/gbs.conf build \
-         -A armv7l --profile=profile.tizen --clean --include-all --define '$GBS_DEFINE' && \
-     cp -rf /home/GBS-ROOT/local/repos/tizen/armv7l/RPMS/*.rpm /opt/rpm/"
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh
index 55adaa15d..8a73dd380 100755
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -28,18 +28,20 @@ else
 fi
 
 # prepare tensorflow
-if [ -d $TENSORFLOW_PREFIX ]; then
+if [ -n "$TENSORFLOW_PREFIX" ]; then
   DOCKER_OPTS+=" -v $TENSORFLOW_PREFIX:/opt/tensorflow"
   CONFIG_OPTIONS+=" -DTENSORFLOW_PREFIX=/opt/tensorflow"
 fi
 
 # prepare onnx
-if [ -d $ONNXRUNTIME_PREFIX ]; then
+if [ -n "$ONNXRUNTIME_PREFIX" ]; then
   DOCKER_OPTS+=" -v $ONNXRUNTIME_PREFIX:/opt/onnxruntime"
   CONFIG_OPTIONS+=" -DONNXRUNTIME_PREFIX=/opt/onnxruntime"
 fi
 
 # docker image name
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
+# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
 if [[ -z $DOCKER_IMAGE_NAME ]]; then
   echo "It will use default docker image name"
 fi
@@ -59,16 +61,19 @@ pushd $ROOT_PATH > /dev/null
 
 REQUIRED_UNITS=()
 # Common Libraries
-REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
-REQUIRED_UNITS+=("oops" "safemain" "foder" "arser" "vconone")
+REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+REQUIRED_UNITS+=("pepper-csv2vec")
+REQUIRED_UNITS+=("oops" "safemain" "foder" "crew" "arser" "vconone")
 # Hermes Logging Framework
 REQUIRED_UNITS+=("hermes" "hermes-std")
 # loco IR and related utilities
 REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+# Compute
+REQUIRED_UNITS+=("luci-compute")
 # Circle compiler library (.circle -> .circle)
 REQUIRED_UNITS+=("luci")
 # Flatbuffer I/O
-REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+REQUIRED_UNITS+=("mio-tflite280" "mio-circle05" "mio-tflite2121" "mio-circle06")
 # Tools
 REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter")
 REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify")
diff --git a/infra/scripts/docker_coverage_report.sh b/infra/scripts/docker_coverage_report.sh
deleted file mode 100755
index 677462d63..000000000
--- a/infra/scripts/docker_coverage_report.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-
-# coverage test data: ${ARCHIVE_PATH}/coverage-data.tar.gz
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# docker image name
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-NNAS_WORKSPACE=${NNAS_WORKSPACE:-build}
-if [[ -z "${ARCHIVE_PATH}" ]]; then
-  ARCHIVE_PATH=${NNAS_WORKSPACE}/archive
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-tar -zxf ${ARCHIVE_PATH}/coverage-data.tar.gz
-
-CMD="GCOV_PATH=arm-linux-gnueabihf-gcov NNAS_WORKSPACE=Product ./nnas gen-coverage-report runtime compute &&
-     tar -zcf coverage/coverage_report.tar.gz coverage/html &&
-     python runtime/3rdparty/lcov-to-cobertura-xml/lcov_cobertura.py coverage/coverage.info -o coverage/nnfw_coverage.xml"
-
-./nnfw docker-run-user bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/test_arm_nnpkg.sh b/infra/scripts/test_arm_nnpkg.sh
index d00eb730f..74fae6bd8 100755
--- a/infra/scripts/test_arm_nnpkg.sh
+++ b/infra/scripts/test_arm_nnpkg.sh
@@ -10,7 +10,4 @@ do
   NNPackageTest ${BACKEND} "Product/out/test/list/nnpkg_test_list.armv7l-linux.${BACKEND}"
 done
 
-# Interpreter test
-export DISABLE_COMPILE=1
-NNPackageTest "interp" "Product/out/test/list/nnpkg_test_list.noarch.interp"
 unset DISABLE_COMPILE
diff --git a/infra/scripts/test_coverage.sh b/infra/scripts/test_coverage.sh
index 12a9942ab..97043ceed 100755
--- a/infra/scripts/test_coverage.sh
+++ b/infra/scripts/test_coverage.sh
@@ -6,7 +6,7 @@
 set -eo pipefail
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 
-CheckTestPrepared
+pushd $ROOT_PATH > /dev/null
 
 NNAS_WORKSPACE=${NNAS_WORKSPACE:-build}
 if [[ -z "${ARCHIVE_PATH}" ]]; then
@@ -14,30 +14,27 @@ if [[ -z "${ARCHIVE_PATH}" ]]; then
   echo "Default archive directory including nncc package and resources: ${ARCHIVE_PATH}"
 fi
 
-pushd $ROOT_PATH > /dev/null
-
 tar -zxf ${ARCHIVE_PATH}/coverage-suite.tar.gz -C ./
 
+CheckTestPrepared
+
 if [[ ! -e $ROOT_PATH/tests/scripts/build_path_depth.txt ]]; then
   echo "Cannot find prefix strip file"
   exit 1
 fi
 export GCOV_PREFIX_STRIP=`cat $ROOT_PATH/tests/scripts/build_path_depth.txt`
 
-./infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader
+TENSOR_LOGGING=trace_log.txt ./infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --nnapi-frontend
 ./infra/scripts/test_ubuntu_runtime.sh --backend acl_neon
 ./infra/scripts/test_ubuntu_runtime.sh --backend cpu
 
 # Enable all logs (mixed backend)
-TENSOR_LOGGING=trace_log.txt ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
+ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
 # Enable trace event (acl_cl default backend)
 export TRACE_FILEPATH=trace.json
-TFLiteModelVerification "acl_cl" "Product/out/test/list/frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
+TFLiteModelVerification "acl_cl" "Product/out/test/list/tflite_comparator.armv7l.acl_cl.list" "report/acl_cl/trace"
 unset TRACE_FILEPATH
 
-# Interpreter
-./infra/scripts/test_ubuntu_runtime.sh --interp
-
 # nnpackage test suite
 if [[ -e ${ARCHIVE_PATH}/nnpkg-test-suite.tar.gz ]]; then
   tar -zxf ${ARCHIVE_PATH}/nnpkg-test-suite.tar.gz -C ./
diff --git a/infra/scripts/test_ubuntu_npud.sh b/infra/scripts/test_ubuntu_npud.sh
new file mode 100755
index 000000000..3b3304240
--- /dev/null
+++ b/infra/scripts/test_ubuntu_npud.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+set -eo pipefail
+
+CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_PATH="$(cd ${CURRENT_PATH}/../../ && pwd)"
+
+# Install path on CI
+INSTALL_PATH="$ROOT_PATH/Product/out"
+MODEL_PATH="${INSTALL_PATH}/npud-gtest/models"
+
+# Install dbus configuration file
+DBUS_CONF="${INSTALL_PATH}/share/org.tizen.npud.conf"
+mkdir -p /usr/share/dbus-1/system.d/
+cp ${DBUS_CONF} /usr/share/dbus-1/system.d/
+
+service dbus restart
+
+function TestPrepared()
+{
+  if [[ -z "${MODELFILE}" ]]; then
+    echo "Model file is not set. Try to use default setting."
+    exit 1
+  fi
+
+  mkdir -p ${MODEL_PATH}
+  if [[ "${MODELFILE: -7}" == ".tar.gz" ]]; then
+    curl -o model.tar.gz -kLsSO ${MODELFILE}
+    tar -zxf model.tar.gz -C ${MODEL_PATH}
+  else
+    echo "The file format is not supported."
+    echo "Supported format: tar.gz"
+    exit 1
+  fi
+}
+
+function TestCleanUp()
+{
+  rm -rf ${MODEL_PATH}
+}
+
+function NpudTest()
+{
+  pushd ${ROOT_PATH} > /dev/null
+
+  $INSTALL_PATH/npud-gtest/npud_gtest
+  EXITCODE=$?
+  if [ ${EXITCODE} -ne 0 ]; then
+    exit ${EXITCODE}
+  fi
+
+  popd > /dev/null
+}
+
+TestPrepared
+
+DEVICE_MODULE_PATH=${INSTALL_PATH}/lib GTEST_MODEL_PATH=${MODEL_PATH} NpudTest
+
+TestCleanUp
diff --git a/infra/scripts/test_ubuntu_runtime.sh b/infra/scripts/test_ubuntu_runtime.sh
index f250df5a0..9a98e5bd3 100755
--- a/infra/scripts/test_ubuntu_runtime.sh
+++ b/infra/scripts/test_ubuntu_runtime.sh
@@ -3,11 +3,10 @@
 set -eo pipefail
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 
+: ${TEST_ARCH:=$(uname -m | tr '[:upper:]' '[:lower:]')}
 BACKEND="cpu"
-TEST_ARCH=$(uname -m | tr '[:upper:]' '[:lower:]')
 TEST_OS="linux"
 TEST_PLATFORM="$TEST_ARCH-$TEST_OS"
-TFLITE_LOADER="0"
 LINEAR_ONLY="0"
 RUN_INTERP="0"
 
@@ -17,7 +16,6 @@ function Usage()
   echo ""
   echo "Options:"
   echo "      --backend <BACKEND>     Runtime backend to test (default: ${BACKEND})"
-  echo "      --tflite-loader         Enable TFLite Loader test"
   echo "      --linear-only           Use Linear executor only"
 }
 
@@ -37,18 +35,10 @@ do
       BACKEND=$(echo ${1#*=} | tr '[:upper:]' '[:lower:]')
       shift
       ;;
-    --tflite-loader)
-      TFLITE_LOADER="1"
-      shift
-      ;;
     --linear-only)
       LINEAR_ONLY="1"
       shift
       ;;
-    --interp)
-      RUN_INTERP="1"
-      shift;
-      ;;
     *)
       # Ignore
       shift
@@ -58,52 +48,26 @@ done
 
 CheckTestPrepared
 
-if [ $RUN_INTERP = "1" ]; then
-  TEST_PLATFORM="noarch"
-  TEST_ARCH="noarch"
-  BACKEND="interp"
-  echo "[[ Interpreter test ]]"
-else
-  echo "[[ ${TEST_PLATFORM}: ${BACKEND} backend test ]]"
-fi
+echo "[[ ${TEST_PLATFORM}: ${BACKEND} backend test ]]"
 
-UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
-FRAMEWORK_TESTLIST="Product/out/test/list/frameworktest_list.${TEST_ARCH}.${BACKEND}.txt"
+UNITTEST_SKIPLIST="Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
+TFLITE_TESTLIST="Product/out/test/list/tflite_comparator.${TEST_ARCH}.${BACKEND}.list"
 REPORT_BASE="report/${BACKEND}"
 EXECUTORS=("Linear" "Dataflow" "Parallel")
 
 if [ $LINEAR_ONLY = "1" ]; then
   EXECUTORS=("Linear")
 fi
-if [ $RUN_INTERP = "1" ]; then
-  EXECUTORS=("Interpreter")
-fi
 
 for EXECUTOR in "${EXECUTORS[@]}";
 do
   echo "[EXECUTOR]: ${EXECUTOR}"
   REPORT_PATH="${REPORT_BASE}/${EXECUTOR}"
 
-  if [ $EXECUTOR = "Interpreter" ]; then
-    export DISABLE_COMPILE=1
-    BACKEND=""
-  else
-    export EXECUTOR="${EXECUTOR}"
-  fi
+  export EXECUTOR="${EXECUTOR}"
 
   NNAPIGTest "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
-  TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_PATH}"
+  TFLiteModelVerification "${BACKEND}" "${TFLITE_TESTLIST}" "${REPORT_PATH}"
 
-  if [ $EXECUTOR = "Interpreter" ]; then
-    unset DISABLE_COMPILE
-  else
-    unset EXECUTOR
-  fi
+  unset EXECUTOR
 done
-
-# Current support acl_cl backend testlist only
-# TODO Support more backends
-TFLITE_LOADER_TESTLIST="Product/out/test/list/tflite_loader_list.${TEST_ARCH}.txt"
-if [[ $TFLITE_LOADER = "1" ]]; then
-  TFLiteLoaderTest "${BACKEND}" "${TFLITE_LOADER_TESTLIST}" "${REPORT_BASE}/loader/${EXECUTOR}"
-fi
diff --git a/infra/scripts/test_ubuntu_runtime_mixed.sh b/infra/scripts/test_ubuntu_runtime_mixed.sh
index 24fde8896..a6fd2a41d 100755
--- a/infra/scripts/test_ubuntu_runtime_mixed.sh
+++ b/infra/scripts/test_ubuntu_runtime_mixed.sh
@@ -6,7 +6,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 CheckTestPrepared
 
 # TODO Get argument for mix configuration
-TEST_ARCH=$(uname -m | tr '[:upper:]' '[:lower:]')
+: ${TEST_ARCH:=$(uname -m | tr '[:upper:]' '[:lower:]')}
 TEST_OS="linux"
 
 # nnfw_api_gtest
@@ -17,8 +17,7 @@ pushd ${ROOT_PATH} > /dev/null
 echo ""
 echo "==== Run standalone unittest begin ===="
 echo ""
-Product/out/test/onert-test prepare-model --model=nnpackage
-Product/out/test/onert-test unittest --unittestdir=Product/out/unittest_standalone
+Product/out/test/onert-test unittest --unittestdir=Product/out/unittest
 echo ""
 echo "==== Run standalone unittest end ===="
 echo ""
@@ -33,14 +32,14 @@ popd > /dev/null
 BACKENDS=(acl_cl acl_neon cpu)
 
 # Get the intersect of framework test list files
-TESTLIST_PREFIX="Product/out/test/list/frameworktest_list.${TEST_ARCH}"
-SKIPLIST_PREFIX="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
-sort $TESTLIST_PREFIX.${BACKENDS[0]}.txt > $TESTLIST_PREFIX.intersect.txt
+TESTLIST_PREFIX="Product/out/test/list/tflite_comparator.${TEST_ARCH}"
+SKIPLIST_PREFIX="Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
+sort $TESTLIST_PREFIX.${BACKENDS[0]}.list > $TESTLIST_PREFIX.intersect.list
 sort $SKIPLIST_PREFIX.${BACKENDS[0]} > $SKIPLIST_PREFIX.union
 for BACKEND in "${BACKENDS[@]:1}"; do
-    comm -12 <(sort $TESTLIST_PREFIX.intersect.txt) <(sort $TESTLIST_PREFIX.$BACKEND.txt) > $TESTLIST_PREFIX.intersect.next.txt
+    comm -12 <(sort $TESTLIST_PREFIX.intersect.list) <(sort $TESTLIST_PREFIX.$BACKEND.list) > $TESTLIST_PREFIX.intersect.next.list
     comm <(sort $SKIPLIST_PREFIX.union) <(sort $SKIPLIST_PREFIX.$BACKEND) | tr -d "[:blank:]" > $SKIPLIST_PREFIX.union.next
-    mv $TESTLIST_PREFIX.intersect.next.txt $TESTLIST_PREFIX.intersect.txt
+    mv $TESTLIST_PREFIX.intersect.next.list $TESTLIST_PREFIX.intersect.list
     mv $SKIPLIST_PREFIX.union.next $SKIPLIST_PREFIX.union
 done
 popd > /dev/null
@@ -56,8 +55,9 @@ echo "GeneratedTests.squeeze_relaxed" >> $SKIPLIST_PREFIX.union
 
 # Run the test
 export OP_BACKEND_Conv2D="cpu"
-export OP_BACKEND_MaxPool2D="acl_cl"
-export OP_BACKEND_AvgPool2D="acl_neon"
+export OP_BACKEND_Pool2D="acl_cl"
+export OP_BACKEND_FullyConnected="acl_neon"
 export ACL_LAYOUT="NCHW"
-NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
-TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed"
+export RUY_THREADS=4
+NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
+TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.list" "report/mixed"
diff --git a/infra/scripts/tizen_xu4_test.sh b/infra/scripts/tizen_xu4_test.sh
index 05e55848c..5610756b7 100755
--- a/infra/scripts/tizen_xu4_test.sh
+++ b/infra/scripts/tizen_xu4_test.sh
@@ -25,26 +25,18 @@ function install_model()
 {
     # download tflite model files
     pushd $HOST_HOME
-    tests/scripts/models/run_test.sh --download=on --run=off
+    TEMP_PATH=$(mktemp -d)
+    CACHE_PATH=$TEMP_PATH/cache
+    mkdir -p $CACHE_PATH
+    ./nnfw prepare-model --cachedir=$CACHE_PATH
     # TODO Since this command removes model file(.zip),
     # We must always download the file unlike model file(.tflite).
     # Because caching applies only to tflite file.
-    find tests -name "*.zip" -exec rm {} \;
-    tar -zcf cache.tar.gz -C tests/scripts/models cache
-    $SDB_CMD push cache.tar.gz $TEST_ROOT/.
-    rm -rf cache.tar.gz
-    $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test/models
-
-    # download api test model file for nnfw_api_gtest
-    MODEL_CACHE_DIR=$(mktemp -d)
-    tests/scripts/models/run_test.sh --download=on --run=off \
-        --configdir=tests/scripts/models/nnfw_api_gtest \
-        --cachedir=$MODEL_CACHE_DIR
-    tar -zcf $MODEL_CACHE_DIR/api_model_test.tar.gz -C $MODEL_CACHE_DIR .
-    $SDB_CMD push $MODEL_CACHE_DIR/api_model_test.tar.gz $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
-    $SDB_CMD shell tar -zxf $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/api_model_test.tar.gz \
-    -C $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
-    rm -rf $MODEL_CACHE_DIR
+    find $CACHE_PATH -name "*.zip" -exec rm {} \;
+    tar -zcf $TEMP_PATH/cache.tar.gz -C $TEMP_PATH cache
+    $SDB_CMD push $TEMP_PATH/cache.tar.gz $TEST_ROOT/
+    rm -rf $TEMP_PATH
+    $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test
     popd
 }
 
@@ -160,11 +152,10 @@ if [ $RUN_TEST = "0" ]; then
 fi
 
 if [ -z "${GCOV_DIR}" ]; then
-  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
-  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
-  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
-  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
-  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --interp"
+  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
+  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
+  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
+  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
 else
   mkdir -p ${GCOV_DIR}
   rm -rf ${GCOV_DIR}/*
@@ -176,11 +167,10 @@ else
   GCOV_DATA_PATH="/opt/usr/nnfw-gcov"
 
   # TODO For coverage check, we run acl_cl and mixed test
-  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
-  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
-  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
-  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
-  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --interp"
+  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
+  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
+  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
+  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
 
   # More test to check coverage
   ${SDB_CMD} shell "rm -rf ${GCOV_DATA_PATH} && mkdir -p ${GCOV_DATA_PATH}"
diff --git a/infra/scripts/unittest_compiler_xml.sh b/infra/scripts/unittest_compiler_xml.sh
index 46d3bc813..6e9e8ad7f 100755
--- a/infra/scripts/unittest_compiler_xml.sh
+++ b/infra/scripts/unittest_compiler_xml.sh
@@ -7,7 +7,9 @@ set -eo pipefail
 CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ROOT_PATH="$CURRENT_PATH/../../"
 NNCC_WORKSPACE=${NNCC_WORKSPACE:-${ROOT_PATH}build}
-UNITTEST_REPORT_DIR=${NNCC_WORKSPACE}/unittest_compiler_xml
+
+# Use fixed absolute report dir for CI
+UNITTEST_REPORT_DIR=${ROOT_PATH}build/unittest_compiler_xml
 
 for i in "$@"
 do
@@ -25,5 +27,10 @@ fi
 
 for TEST_BIN in `find ${NNCC_WORKSPACE}/compiler -type f -executable -name *_test`; do
   TEST_NAME="$(basename -- $TEST_BIN)"
-  LUGI_LOG=999 $TEST_BIN --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_NAME.xml"
+  TEST_DIR="$(dirname $TEST_BIN)"
+
+  # Execute on test directory to find related file
+  pushd $TEST_DIR > /dev/null
+  LUGI_LOG=999 ./$TEST_NAME --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_NAME.xml"
+  popd > /dev/null
 done
diff --git a/nnpackage/examples/README.md b/nnpackage/examples/README.md
new file mode 100644
index 000000000..951048bec
--- /dev/null
+++ b/nnpackage/examples/README.md
@@ -0,0 +1,39 @@
+# NNPackage example
+
+## Package version 1.3.0
+
+### two_tflites
+
+- Model file: two TensorFlow Lite models
+- It has two tflite models with pkg-input, pkg-output and model-connect fields.
+
+## Package version 1.1.0
+
+### one_op_in_tflite
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+
+## Package version 1.0.0
+
+### add
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+
+### add_invalid_manifest
+
+- Model file: TensorFlow Lite model
+- Only one `ADD` operation
+- Invalid menifest: invalid json format
+
+### if_dynamic
+
+- Model file: TensorFlow Lite model
+- `IF` operation example with input and output example
+
+### while_dynamic
+
+- Model file: TensorFlow Lite model
+- `WHILE` operation example with input and output example
+
diff --git a/nnpackage/examples/one_op_in_tflite/add.tflite b/nnpackage/examples/v1.0.0/add/add.tflite
index e748b6843..e748b6843 100644
--- a/nnpackage/examples/one_op_in_tflite/add.tflite
+++ b/nnpackage/examples/v1.0.0/add/add.tflite
diff --git a/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST b/nnpackage/examples/v1.0.0/add/metadata/MANIFEST
index 1d96cce1b..1d96cce1b 100644
--- a/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST
+++ b/nnpackage/examples/v1.0.0/add/metadata/MANIFEST
diff --git a/nnpackage/examples/v1.0.0/add_invalid_manifest/add.tflite b/nnpackage/examples/v1.0.0/add_invalid_manifest/add.tflite
new file mode 100644
index 000000000..e748b6843
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/add_invalid_manifest/add.tflite
diff --git a/nnpackage/examples/v1.0.0/add_invalid_manifest/metadata/MANIFEST b/nnpackage/examples/v1.0.0/add_invalid_manifest/metadata/MANIFEST
new file mode 100644
index 000000000..8b18e4edd
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/add_invalid_manifest/metadata/MANIFEST
@@ -0,0 +1,7 @@
+{
+  "major-version" : "1"
+  "minor-version" : "0"
+  "patch-version" : "0"
+  "models"      : [ "add.tflite" ]
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.0.0/if_dynamic/if_dynamic.tflite b/nnpackage/examples/v1.0.0/if_dynamic/if_dynamic.tflite
new file mode 100644
index 000000000..680a8b17e
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/if_dynamic/if_dynamic.tflite
diff --git a/nnpackage/examples/v1.0.0/if_dynamic/metadata/MANIFEST b/nnpackage/examples/v1.0.0/if_dynamic/metadata/MANIFEST
new file mode 100644
index 000000000..0fea9800f
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/if_dynamic/metadata/MANIFEST
@@ -0,0 +1,7 @@
+{
+  "major-version" : "1",
+  "minor-version" : "0",
+  "patch-version" : "0",
+  "models"      : [ "if_dynamic.tflite" ],
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/expected.h5 b/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/expected.h5
new file mode 100644
index 000000000..d1a47b9e3
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/expected.h5
diff --git a/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/input.h5 b/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/input.h5
new file mode 100644
index 000000000..1309ed51a
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/if_dynamic/metadata/tc/input.h5
diff --git a/nnpackage/examples/v1.0.0/while_dynamic/metadata/MANIFEST b/nnpackage/examples/v1.0.0/while_dynamic/metadata/MANIFEST
new file mode 100644
index 000000000..cfe19ad2c
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/while_dynamic/metadata/MANIFEST
@@ -0,0 +1,7 @@
+{
+  "major-version" : "1",
+  "minor-version" : "0",
+  "patch-version" : "0",
+  "models"      : [ "while_dynamic.tflite" ],
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/expected.h5 b/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/expected.h5
new file mode 100644
index 000000000..5d5eec6f8
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/expected.h5
diff --git a/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/input.h5 b/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/input.h5
new file mode 100644
index 000000000..75f09095c
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/while_dynamic/metadata/tc/input.h5
diff --git a/nnpackage/examples/v1.0.0/while_dynamic/while_dynamic.tflite b/nnpackage/examples/v1.0.0/while_dynamic/while_dynamic.tflite
new file mode 100644
index 000000000..6f201d504
--- /dev/null
+++ b/nnpackage/examples/v1.0.0/while_dynamic/while_dynamic.tflite
diff --git a/nnpackage/examples/v1.1.0/one_op_in_tflite/add.tflite b/nnpackage/examples/v1.1.0/one_op_in_tflite/add.tflite
new file mode 100644
index 000000000..e748b6843
--- /dev/null
+++ b/nnpackage/examples/v1.1.0/one_op_in_tflite/add.tflite
diff --git a/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/MANIFEST b/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/MANIFEST
new file mode 100644
index 000000000..3ed12f99d
--- /dev/null
+++ b/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/MANIFEST
@@ -0,0 +1,8 @@
+{
+  "major-version" : "1",
+  "minor-version" : "1",
+  "patch-version" : "0",
+  "configs"     : [ "config.cfg" ],
+  "models"      : [ "add.tflite" ],
+  "model-types" : [ "tflite" ]
+}
diff --git a/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/config.cfg b/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/config.cfg
new file mode 100644
index 000000000..776fa7024
--- /dev/null
+++ b/nnpackage/examples/v1.1.0/one_op_in_tflite/metadata/config.cfg
@@ -0,0 +1 @@
+BACKENDS="cpu"
diff --git a/nnpackage/examples/v1.3.0/two_tflites/README.md b/nnpackage/examples/v1.3.0/two_tflites/README.md
new file mode 100644
index 000000000..3fcbe2d90
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/README.md
@@ -0,0 +1,28 @@
+## How to create
+
+```
+$ wget https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz
+$ tar -zxf mobilenet_v1_1.0_224.tgz
+
+$ python tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite <( echo 0-1 ) mv1.0_1.tflite
+$ python tools/tflitefile_tool/select_operator.py mv1.0_1.tflite <( echo 0 ) mv1.0.tflite
+$ python tools/tflitefile_tool/select_operator.py mv1.0_1.tflite <( echo 1 ) mv1.1.tflite
+
+# make sure three tflite is valid
+$ ./Product/out/bin/tflite_comparator mv1.0_1.tflite
+$ ./Product/out/bin/tflite_comparator mv1.0.tflite
+$ ./Product/out/bin/tflite_comparator mv1.1.tflite
+
+$ tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -m mv1.0.tflite mv1.1.tflite -p two_tflites
+$ cat two_tflites/metadata/MANIFEST
+{
+  "major-version" : "1",
+  "minor-version" : "2",
+  "patch-version" : "0",
+  "configs"     : [  ],
+  "models"      : [ "mv1.0.tflite", "mv1.1.tflite" ],
+  "model-types" : [ "tflite", "tflite" ]
+}
+
+# update minor-version, and add additional fields manually
+```
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST b/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST
new file mode 100644
index 000000000..9d9e21ac1
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST
@@ -0,0 +1,11 @@
+{
+  "major-version" : "1",
+  "minor-version" : "3",
+  "patch-version" : "0",
+  "configs"       : [  ],
+  "models"        : [ "mv1.0.tflite", "mv1.1.tflite" ],
+  "model-types"   : [ "tflite", "tflite" ],
+  "pkg-inputs"    : [ "0:0:0" ],
+  "pkg-outputs"   : [ "1:0:0" ],
+  "model-connect" : [ { "from" : "0:0:0", "to" : [ "1:0:0" ] } ]
+}
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5 b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5
new file mode 100644
index 000000000..59a6b9040
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5 b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5
new file mode 100644
index 000000000..2251157c7
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5
diff --git a/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite b/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite
new file mode 100644
index 000000000..03f30c747
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite
diff --git a/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite b/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite
new file mode 100644
index 000000000..e3b4f8db7
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite
diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs
index 89e458c4f..cdc10361b 100644
--- a/nnpackage/schema/circle_schema.fbs
+++ b/nnpackage/schema/circle_schema.fbs
@@ -1,4 +1,4 @@
-// Copyright (c) 2019~2020 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright (c) 2019~2023 Samsung Electronics Co., Ltd. All Rights Reserved
 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -27,6 +27,10 @@
 //              `BATCH_MATMUL` operator, `FLOAT64` tensor type,
 //              `asymmetric_quantize_inputs` for several operator options
 // Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
+// Version 0.5: Base up to TensorFlow Lite v2.10.1 schema.
+// Version 0.6: Base up to TensorFlow Lite v2.13.0 schema.
 
 namespace circle;
 
@@ -51,6 +55,16 @@ enum TensorType : byte {
   COMPLEX64 = 8,
   INT8 = 9,
   FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+  UINT16 = 16,
+  INT4 = 17,
 }
 
 // Custom quantization parameters for experimenting with new quantization
@@ -190,6 +204,16 @@ table SparsityParameters {
   dim_metadata:[DimensionMetadata];
 }
 
+// The nested tensor type for VARIANT type.
+table VariantSubType {
+  // The tensor shape.
+  shape:[int];
+  type:TensorType;
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+}
+
 table Tensor {
   // The tensor shape. The meaning of each entry is operator-specific but
   // builtin ops use: [batch size, height, width, number of channels] (That's
@@ -217,14 +241,27 @@ table Tensor {
   // Encodes `shape` with unknown dimensions. Unknown dimensions are
   // represented with -1.
   shape_signature:[int]; // Optional.
+
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+
+  // The nested Tensor types for VARIANT type. This is always empty for
+  // non-VARIANT types. This is optional because the nested type can be omitted.
+  // Currently only 1 subtype is supported. The field is defined as an array for
+  // flexibility of supporting multiple subtypes in the future.
+  variant_tensors:[VariantSubType];
 }
 
 // A list of builtin operators. Builtin operators are slightly faster than custom
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
-
-enum BuiltinOperator : ubyte {
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  BCQ_GATHER = -4,
+  BCQ_FULLY_CONNECTED = -3,
+  INSTANCE_NORM = -2,
   ADD = 0,
   AVERAGE_POOL_2D = 1,
   CONCATENATION = 2,
@@ -257,7 +294,6 @@ enum BuiltinOperator : ubyte {
   SPACE_TO_DEPTH = 26,
   SVDF = 27,
   TANH = 28,
-  // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
   CONCAT_EMBEDDINGS = 29,
   SKIP_GRAM = 30,
   CALL = 31,
@@ -359,10 +395,43 @@ enum BuiltinOperator : ubyte {
   DENSIFY = 124,
   SEGMENT_SUM = 125,
   BATCH_MATMUL = 126,
-  BCQ_GATHER = 252,
-  BCQ_FULLY_CONNECTED = 253,
-  INSTANCE_NORM = 254,
-}
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+  DYNAMIC_UPDATE_SLICE = 151,
+  RELU_0_TO_1 = 152,
+  UNSORTED_SEGMENT_PROD = 153,
+  UNSORTED_SEGMENT_MAX = 154,
+  UNSORTED_SEGMENT_SUM = 155,
+  ATAN2 = 156,
+  UNSORTED_SEGMENT_MIN = 157,
+  SIGN = 158,
+  BITCAST = 159,
+  BITWISE_XOR = 160,
+  RIGHT_SHIFT = 161,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
 
 // Options for the builtin operators.
 union BuiltinOptions {
@@ -467,6 +536,31 @@ union BuiltinOptions {
   DensifyOptions,
   SegmentSumOptions,
   BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+  DynamicUpdateSliceOptions,
+  UnsortedSegmentProdOptions,
+  UnsortedSegmentMaxOptions,
+  UnsortedSegmentMinOptions,
+  UnsortedSegmentSumOptions,
+  ATan2Options,
+  SignOptions,
+  BitcastOptions,
+  BitwiseXorOptions,
+  RightShiftOptions,
   BCQGatherOptions = 252,
   BCQFullyConnectedOptions = 253,
   InstanceNormOptions = 254,
@@ -492,6 +586,18 @@ table Conv2DOptions {
   dilation_h_factor:int = 1;
 }
 
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
 table Pool2DOptions {
   padding:Padding;
   stride_w:int;
@@ -564,6 +670,7 @@ table BidirectionalSequenceRNNOptions {
 enum FullyConnectedOptionsWeightsFormat: byte {
   DEFAULT = 0,
   SHUFFLED4x16INT8 = 1,
+  SHUFFLED16x1FLOAT32 = 127
 }
 
 // An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
@@ -597,6 +704,8 @@ table ConcatenationOptions {
 
 table AddOptions {
   fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
 }
 
 table MulOptions {
@@ -604,6 +713,7 @@ table MulOptions {
 }
 
 table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
   fused_activation_function:ActivationFunctionType;
 }
 
@@ -645,8 +755,11 @@ table UnidirectionalSequenceLSTMOptions {
   // If true then first dimension is sequence, otherwise batch.
   time_major:bool;
 
-  // Parameter for Unidirectional Sequence LSTM version 4.
+  // Parameter for Unidirectional Sequence LSTM version 3.
   asymmetric_quantize_inputs:bool;
+
+  // Parameter for unidirectional sequence RNN version 4.
+  diagonal_recurrent_tensors:bool;
 }
 
 table BidirectionalSequenceLSTMOptions {
@@ -677,6 +790,7 @@ table ResizeBilinearOptions {
 
 table ResizeNearestNeighborOptions {
   align_corners: bool;
+  half_pixel_centers: bool;
 }
 
 // A call operation options
@@ -717,6 +831,8 @@ table DepthToSpaceOptions {
 
 table SubOptions {
   fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
 }
 
 table DivOptions {
@@ -738,6 +854,8 @@ table EmbeddingLookupSparseOptions {
 
 table GatherOptions {
   axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
 }
 
 table TransposeOptions {
@@ -820,9 +938,13 @@ table SliceOptions {
 }
 
 table TransposeConvOptions {
+  // Parameters supported by version 1, 2, 3:
   padding:Padding;
   stride_w:int;
   stride_h:int;
+
+  // Parameters supported by version 4:
+  fused_activation_function:ActivationFunctionType = NONE;
 }
 
 table ExpandDimsOptions {
@@ -960,6 +1082,10 @@ table IfOptions {
   else_subgraph_index:int;
 }
 
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
 table WhileOptions {
   cond_subgraph_index:int;
   body_subgraph_index:int;
@@ -986,6 +1112,92 @@ table SegmentSumOptions {
 table BatchMatMulOptions {
   adjoint_lhs:bool;
   adjoint_rhs:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+table DynamicUpdateSliceOptions {
+}
+
+table UnsortedSegmentProdOptions {
+}
+
+table UnsortedSegmentMaxOptions {
+}
+
+table UnsortedSegmentSumOptions {
+}
+
+table ATan2Options {
+}
+
+table UnsortedSegmentMinOptions{
+}
+
+table SignOptions {
+}
+
+table BitcastOptions {
+}
+
+table BitwiseXorOptions {
+}
+
+table RightShiftOptions {
 }
 
 table BCQGatherOptions {
@@ -1006,12 +1218,21 @@ table InstanceNormOptions {
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
-  builtin_code:BuiltinOperator;
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
   custom_code:string;
 
   // The version of the operator. The version need to be bumped whenever new
   // parameters are introduced into an op.
   version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
 }
 
 enum CustomOptionsFormat : byte {
@@ -1102,6 +1323,35 @@ table Metadata {
   buffer:uint;
 }
 
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
 table Model {
   // Version of the schema.
   version:uint;
@@ -1130,6 +1380,9 @@ table Model {
 
   // Metadata about the model.
   metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
 }
 
 root_type Model;
diff --git a/nnpackage/spec/10_packaging_and_manifest.md b/nnpackage/spec/10_packaging_and_manifest.md
index d4e6ec8bd..6aa0db74e 100644
--- a/nnpackage/spec/10_packaging_and_manifest.md
+++ b/nnpackage/spec/10_packaging_and_manifest.md
@@ -18,11 +18,13 @@ For `model` and `custom_op`, see [20_model_and_operators.md](20_model_and_operat
 nnpackage
 ├── custom_op
 ├── metadata
-│   └── MANIFEST
+│   ├── MANIFEST
+│   └── config.cfg
 └── mymodel.model
 ```
 
 - `mymodel.model` is a model file that has computation graph and weights.
+- `config.cfg` is a configuration file that has parameters to configure onert.
 - `metadata` is a directory that contains all metadata including `MANIFEST`.
 - `MANIFEST` is a collection of attributes about this package.
 - `custom_op` is a directory that contains implementation objects.
@@ -61,6 +63,11 @@ For detail, see [semantic versioning 2.0.0](https://semver.org/)
 
 `patch-version` is the patch version of `nnpackage`.
 
+#### configs
+
+`configs` is an array of configuration file names placed in `metadata` folder. This can be empty or
+attribute itself can be omitted. As of now we only support only one item.
+
 #### models
 
 `models` is an array of path to model files, which is relative path from top level directory of this package.
@@ -76,6 +83,7 @@ It can have the values (case-sensitive) in following table.
 |--------|------------------------|
 | tflite | tensorflow lite schema |
 | circle | nnpackage schema       |
+| tvn    | trix-engine binary     |
 
 ### Example
 
@@ -84,9 +92,25 @@ Here is an example of `MANIFEST`.
 ```
 {
     "major-version" : "1",
-    "minor-version" : "0",
+    "minor-version" : "2",
     "patch-version" : "0",
-    "models"      : [ "mymodel.model", "yourmodel.model" ],
-    "model-types" : [ "tflite", "circle" ]
+    "configs"     : [ "model.cfg" ],
+    "models"      : [ "mymodel.model", "yourmodel.model", "binmodel.tvn" ],
+    "model-types" : [ "tflite", "circle", "tvn" ]
 }
 ```
+
+## 5. Configuration file
+
+Configuration file is a human readable plain text file having one `key=value` in each line.
+- `#` is used as comment and will be ignored afterwards.
+- all leading and trailing white spaces will be ignored in both `key` and `value`.
+
+For example
+```
+BACKENDS=cpu
+# leading/trailing space is ignored
+ EXCUTOR=Linear # some comment
+```
+
+Refer `runtime/onert/core/include/util/Config.lst` file for more information of `key`.
diff --git a/onert-micro/CMakeLists.txt b/onert-micro/CMakeLists.txt
new file mode 100644
index 000000000..416281d0f
--- /dev/null
+++ b/onert-micro/CMakeLists.txt
@@ -0,0 +1,252 @@
+set(ARM_C_COMPILER "arm-none-eabi-gcc")
+set(ARM_ASM_COMPILER "arm-none-eabi-gcc")
+set(ARM_CXX_COMPILER "arm-none-eabi-g++")
+set(ARM_OBJCOPY "arm-none-eabi-objcopy")
+
+find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
+
+if (NOT ARM_C_COMPILER_PATH)
+    message(STATUS "Build luci-micro: FALSE(ARM compiler is NOT FOUND)")
+endif ()
+
+if (NOT_BUILD_EXTERNALS)
+    message(STATUS "USE LOCAL EXTERNAL")
+
+    set(ONERT_MICRO_EXTERNAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/externals")
+    set(Flatbuffers_DIR "${ONERT_MICRO_EXTERNAL_DIR}")
+    set(FlatBuffersSource_DIR "${Flatbuffers_DIR}")
+    set (EXT_OVERLAY_DIR "${ONERT_MICRO_EXTERNAL_DIR}")
+    set(GENERATED_INCLUDE_DIR "${ONERT_MICRO_EXTERNAL_DIR}/gen")
+
+    add_subdirectory(externals)
+else()
+    message(STATUS "DOWNLOAD AND BUILD EXTERNALS")
+
+    nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+    if (NOT FlatBuffers_FOUND)
+        message(STATUS "Build luci-micro: FALSE(FlatBuffers 2.0 NOT FOUND)")
+        return()
+    endif (NOT FlatBuffers_FOUND)
+
+    message(STATUS "Build luci-micro: TRUE")
+
+    set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.4/circle_schema.fbs")
+
+    # NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+    add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+            COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+            WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+            DEPENDS "${SCHEMA_FILE}"
+            )
+
+    FlatBuffers_Target(luci_micro_circle_schema
+            OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/circle-generated/circle"
+            INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+            SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+            SCHEMA_FILES "schema.fbs"
+            )
+    set(Flatbuffers_DIR "${CMAKE_CURRENT_BINARY_DIR}/../../overlay/lib/cmake/flatbuffers")
+    set (EXT_OVERLAY_DIR "${CMAKE_CURRENT_BINARY_DIR}/../../overlay")
+    set(GENERATED_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen")
+endif()
+
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+add_subdirectory(luci-interpreter/src/core/reader)
+
+# Choosing Kernel: reference mcu, optimized cmsisnn, optimized linux
+if (NOT KERNELS)
+    message(STATUS "KERNEL variable is not defined, default reference mcu kernels will be used")
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/mcu")
+elseif("${KERNELS}" STREQUAL "mcu")
+    message(STATUS "ONERT_MICRO will use reference mcu kernels")
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/mcu")
+elseif("${KERNELS}" STREQUAL "cmsisnn")
+    message(STATUS "ONERT_MICRO will use optimized cmsisnn kernels")
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/cmsisnn")
+elseif("${KERNELS}" STREQUAL "linux")
+    message(STATUS "ONERT_MICRO will use optimized linux kernels")
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/linux")
+else()
+    message(STATUS "Build onert-micro: FAILED (Non-existent kernel variable. Choose one of the following options: mcu, cmsisnn, linux)")
+    return()
+endif()
+
+if (USE_STATIC_ALLOC)
+    # TODO: enable it
+    message(STATUS "FAILED ONERT-MICRO is not support Static Memory Manager now")
+    return()
+else()
+    message(STATUS "USE_STATIC_ALLOC variable is not defined, default dynamic memory manager will be used")
+endif()
+
+set(CMAKE_ARM_OPTIONS
+        -DLUCI_INTERPRETER_STATIC=ON
+        -DLUCI_STATIC=ON
+        -DBUILD_CMSIS_NN_FUNCTIONS=ON
+        -DTARGET_CPU=${TARGET_CPU}
+        -DTARGET_ARCH=${TARGET_ARCH}
+        "-DEXT_OVERLAY_DIR=${EXT_OVERLAY_DIR}"
+        "-DFlatbuffers_DIR=${Flatbuffers_DIR}"
+        "-DFlatBuffersSource_DIR=${FlatBuffersSource_DIR}"
+        "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake"
+        "-DLUCI_INTERPRETER_PAL_DIR=${LUCI_INTERPRETER_PAL_DIR}"
+        "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
+        "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
+        -DC_COMPILER=${ARM_C_COMPILER}
+        -DCXX_COMPILER=${ARM_CXX_COMPILER}
+        -DASM_COMPILER=${ARM_ASM_COMPILER}
+        -DOBJCOPY=${ARM_OBJCOPY}
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+        -DENABLE_TEST=OFF
+        -DBUILD_GTEST=OFF
+        "-DNNAS_ROOT=${NNAS_PROJECT_SOURCE_DIR}"
+        -DENABLE_STRICT_BUILD=OFF
+        "-DGENERATED_INCLUDE_DIR=${GENERATED_INCLUDE_DIR}"
+        )
+
+if (GENERATE_KERNELS_LIST_FROM)
+    set(GENERATED_KERNELS_LIST_PATH "${LUCI_INTERPRETER_PAL_DIR}/GeneratedKernelsToBuild.lst")
+    list(APPEND CMAKE_ARM_OPTIONS "-DLUCI_INTERPRETER_KERNELS_BUILD_LIST=${GENERATED_KERNELS_LIST_PATH}")
+endif ()
+
+if (NOT_BUILD_EXTERNALS)
+    list(APPEND CMAKE_ARM_OPTIONS "-DNOT_BUILD_EXTERNALS=ON")
+else()
+    list(APPEND CMAKE_ARM_OPTIONS "-DNOT_BUILD_EXTERNALS=OFF")
+endif ()
+
+if (ENABLE_ONERT_MICRO_TEST)
+    message(STATUS "Enable tests")
+    list(APPEND CMAKE_ARM_OPTIONS "-DENABLE_TEST=ON")
+endif ()
+
+if (DIS_QUANT)
+    message(STATUS "ONERT-MICRO will not use part for QUANTIZED models")
+    add_definitions(-DDIS_QUANT)
+    list(APPEND CMAKE_ARM_OPTIONS "-DDIS_QUANT=ON")
+endif()
+
+if (DIS_FLOAT)
+    message(STATUS "ONERT-MICRO will not use part for FLOAT models")
+    add_definitions(-DDIS_FLOAT)
+    list(APPEND CMAKE_ARM_OPTIONS "-DDIS_FLOAT=ON")
+endif()
+
+if (DIS_DYN_SHAPES)
+    message(STATUS "ONERT-MICRO will not use dynamic shapes")
+    add_definitions(-DDIS_DYN_SHAPES)
+endif()
+
+set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm")
+file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}")
+
+set(MICRO_ARM_BUILD_DEPENDENCY "${MICRO_ARM_BUILD_DIR}/CMakeCache.txt")
+
+add_custom_command(
+        OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
+        COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
+        WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+        DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
+        VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm_cmake DEPENDS "${MICRO_ARM_BUILD_DEPENDENCY}")
+
+# Generate KernelsToBuild list from circle model
+if (GENERATE_KERNELS_LIST_FROM)
+    add_executable(generator_kernels_list_exec helpers/GenerateKernelsListHelper.cpp)
+
+    target_link_libraries(generator_kernels_list_exec luci_micro_circle_reader)
+    target_link_libraries(generator_kernels_list_exec luci_micro_circle_schema)
+
+    target_include_directories(generator_kernels_list_exec PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+
+    add_custom_target(
+            generate_kernels_list ALL
+            COMMAND generator_kernels_list_exec ${GENERATE_KERNELS_LIST_FROM} ${GENERATED_KERNELS_LIST_PATH}
+            COMMENT "Generating KernelsToBuild list"
+    )
+    add_dependencies(generate_kernels_list luci_micro_circle_reader)
+    add_dependencies(luci_interpreter_micro_arm_cmake generate_kernels_list)
+
+endif ()
+
+# To remove GENERATE_KERNELS_LIST_FROM and KERNELS variable from cmake cache
+unset(GENERATE_KERNELS_LIST_FROM CACHE)
+unset(KERNELS CACHE)
+unset(USE_STATIC_KERNEL CACHE)
+unset(DIS_QUANT CACHE)
+unset(DIS_FLOAT CACHE)
+unset(ENABLE_ONERT_MICRO_TEST CACHE)
+unset(NOT_BUILD_EXTERNALS CACHE)
+
+set(MICRO_ARM_BINARY "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/libluci_interpreter_micro.a")
+
+add_custom_command(
+        OUTPUT "${MICRO_ARM_BINARY}"
+        COMMAND "${CMAKE_MAKE_PROGRAM}" luci_interpreter_micro -j ${CPU_COUNT}
+        WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+        DEPENDS luci_interpreter_micro_arm_cmake luci_micro_circle_schema
+        VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm DEPENDS "${MICRO_ARM_BINARY}")
+
+add_subdirectory(eval-driver)
+
+if (NOT DEFINED BUILD_TEST)
+    return()
+endif ()
+
+#MBED OS QEMU build
+nnas_find_package(MbedOSSource EXACT 6.15 QUIET)
+
+if (NOT MbedOSSource_FOUND)
+    message(STATUS "Skipping luci-micro: MbedOSSource not found")
+    return()
+endif ()
+
+set(MBED_OS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/tests/mbed-os")
+file(MAKE_DIRECTORY "${MBED_OS_BUILD_DIR}")
+
+set(MBED_OS_BUILD_DEPENDENCY "${MBED_OS_BUILD_DIR}/CMakeCache.txt")
+
+set(ONERTMICRO_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/onert-micro")
+
+add_custom_command(
+        OUTPUT "${MBED_OS_BUILD_DEPENDENCY}"
+        COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os"
+        -DMICRO_ARM_BUILD_DIR=${MICRO_ARM_BUILD_DIR}
+        -DMbedOSSource_DIR=${MbedOSSource_DIR}
+        -DFlatBuffersSource_DIR=${FlatBuffersSource_DIR}
+        -DONERTMICRO_SRC_DIR=${ONERTMICRO_SRC_DIR}
+        WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+        DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os/CMakeLists.txt"
+        VERBATIM
+)
+
+add_custom_target(mbed_os_cmake DEPENDS "${MBED_OS_BUILD_DEPENDENCY}")
+
+set(MBED_OS_BINARY "${MBED_OS_BUILD_DIR}/libmbed_os.a")
+
+add_custom_command(
+        OUTPUT "${MBED_OS_BINARY}"
+        COMMAND "${CMAKE_MAKE_PROGRAM}" mbed_os -j ${CPU_COUNT}
+        WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+        DEPENDS mbed_os_cmake
+        VERBATIM
+)
+
+add_custom_target(mbed_os_arm DEPENDS "${MBED_OS_BINARY}")
+
+set(BUILD_TEST_BINARY "${MBED_OS_BUILD_DIR}/build_test.bin")
+
+add_custom_command(
+        OUTPUT "${BUILD_TEST_BINARY}"
+        COMMAND "${CMAKE_MAKE_PROGRAM}" build_test -j ${CPU_COUNT}
+        WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+        DEPENDS mbed_os_arm "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os/main.cpp" ${MICRO_ARM_BINARY}
+        VERBATIM
+)
+add_custom_target(onert_micro_build_test_arm DEPENDS "${BUILD_TEST_BINARY}")
diff --git a/onert-micro/README.md b/onert-micro/README.md
new file mode 100644
index 000000000..062dbdb79
--- /dev/null
+++ b/onert-micro/README.md
@@ -0,0 +1,128 @@
+# onert-micro
+
+`onert-micro`(a.k.a `luci-micro`) is MCU specialized build of luci-interpreter with several benchmark applications.
+
+## Contents
+
+onert-micro contains cmake infrastructure to build:
+- stand-alone interpreter library
+- benchmark applications using luci interpreter on arm MCUs
+
+## How to build stand alone library
+
+Stand-alone library is simply built by `luci_interpreter_micro_arm` target.
+Result library will be placed in  `<ONE root>/build/compiler/luci-micro/standalone_arm/luci-interpreter/src/libluci_interpreter.a`.
+
+### Prerequisites
+
+- Everything you need for ONE project: see [how-to-build-compiler.md](../../docs/howto/how-to-build-compiler.md)
+- arm-none-eabi-gcc and arm-none-eabi-g++ compilers
+
+To install needed arm compilers on ubuntu:
+```
+$ sudo apt-get install gcc-arm-none-eabi
+```
+
+**cmake build**
+
+``` bash
+$ cd <path to ONE>
+$ mkdir build
+# cd build
+$ cmake ../infra/onert-micro
+$ make -j$(nproc) luci_interpreter_micro_arm
+```
+
+### Known issues
+
+Interpreter uses TensorFlow headers that produces warnings.
+
+`Linux` x86 build uses "-isystem" flag to suppress warnings from external sources,
+but some old arm compilers have issues with it:
+[bug](https://bugs.launchpad.net/gcc-arm-embedded/+bug/1698539)
+
+`-isystem` hack is disabled for MCU build, because of this MCU build is broken if `-Werror` flag is set.
+
+## How to use
+
+### Convert tflite model to circle model
+
+To inference with tflite model, you need to convert it to circle model format(https://github.com/Samsung/ONE/blob/master/res/CircleSchema/0.4/circle_schema.fbs).
+Please refer to `tflite2circle` tool(https://github.com/Samsung/ONE/tree/master/compiler/tflite2circle) for this purpose.
+
+### Convert to c array model
+
+Many MCU platforms are lack of file system support. The proper way to provide a model to onert-micro is to convert it into c array so that it can be compiled into MCU binary. 
+
+``` bash
+xxi -i model.circle > model.h
+```
+
+Then, model.h looks like this: 
+
+``` cpp
+unsigned char model_circle[] = {
+  0x22, 0x01, 0x00, 0x00, 0xf0, 0x00, 0x0e, 0x00,
+  // .....
+};
+unsigned int model_circle_len = 1004;
+```
+
+### API
+
+Once you have c array model, you are ready to use onert-micro.
+
+To run a model with onert-micro, follow the instruction: 
+
+1. Include onert-micro header
+
+``` cpp
+#include <luci_interpreter/Interpreter.h>
+```
+
+2. Create interpreter instance
+
+onert-micro interpreter expects model as c array as mentioned in [Previous Section](#convert-to-c-array-model).
+
+``` cpp
+#include "model.h"
+
+luci_interpreter::Interpreter interpreter(model_circle, true);
+```
+
+3. Feed input data
+
+To feed input data into interpreter, we need to do two steps: 1) allocate input tensors and 2) copy input into input tensors.
+
+``` cpp
+    for (int32_t i = 0; i < num_inputs; i++)
+    {
+      auto input_data = reinterpret_cast<char *>(interpreter.allocateInputTensor(i));
+      readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data,
+                       interpreter.getInputDataSizeByIndex(i));
+    }
+```
+
+4. Do inference
+
+``` cpp
+    interpreter.interpret();
+```
+
+5. Get output data
+
+``` cpp
+    auto data = interpreter.readOutputTensor(i);
+```
+
+
+### Reduce Binary Size
+
+onert-micro provides compile flags to generate reduced-size binary.
+
+- `DIS_QUANT` : Flag for Disabling Quantized Type Operation
+- `DIS_FLOAT` : Flag for Disabling Float Operation
+- `DIS_DYN_SHAPES` : Flag for Disabling Dynamic Shape Support
+
+Also, you can build onert-micro library only with kernels in target models.
+For this, please remove all the kernels from [KernelsToBuild.lst](./luci-interpreter/pal/mcu/KernelsToBuild.lst) except kernels in your target model. 
diff --git a/onert-micro/eval-driver/CMakeLists.txt b/onert-micro/eval-driver/CMakeLists.txt
new file mode 100644
index 000000000..2a1b73ad6
--- /dev/null
+++ b/onert-micro/eval-driver/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(SRCS_EVAL_TESTER Driver.cpp)
+
+add_executable(onert_micro_eval_driver ${SRCS_EVAL_TESTER})
+
+# This variable is needed to separate standalone interpreter libraries from the libraries used in driver
+set(READER_SUFFIX "_driver")
+
+add_subdirectory(${NNAS_PROJECT_SOURCE_DIR}/onert-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
+
+target_include_directories(onert_micro_eval_driver PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+target_link_libraries(onert_micro_eval_driver PUBLIC luci_interpreter_micro)
+
+install(TARGETS onert_micro_eval_driver DESTINATION bin)
diff --git a/onert-micro/eval-driver/Driver.cpp b/onert-micro/eval-driver/Driver.cpp
new file mode 100644
index 000000000..d29c41da3
--- /dev/null
+++ b/onert-micro/eval-driver/Driver.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <stdexcept>
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <iostream>
+
+namespace
+{
+
+using DataBuffer = std::vector<char>;
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.read(data, data_size).fail())
+    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+} // namespace
+
+/*
+ * @brief EvalDriver main
+ *
+ *        Driver for testing luci-inerpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+  if (argc != 5)
+  {
+    std::cerr
+      << "Usage: " << argv[0]
+      << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+    return EXIT_FAILURE;
+  }
+
+  const char *filename = argv[1];
+  const int32_t num_inputs = atoi(argv[2]);
+  const char *input_prefix = argv[3];
+  const char *output_file = argv[4];
+
+  std::ifstream file(filename, std::ios::binary | std::ios::in);
+  if (!file.good())
+  {
+    std::string errmsg = "Failed to open file";
+    throw std::runtime_error(errmsg.c_str());
+  }
+
+  file.seekg(0, std::ios::end);
+  auto fileSize = file.tellg();
+  file.seekg(0, std::ios::beg);
+
+  // reserve capacity
+  DataBuffer model_data(fileSize);
+
+  // read the data
+  file.read(model_data.data(), fileSize);
+  if (file.fail())
+  {
+    std::string errmsg = "Failed to read file";
+    throw std::runtime_error(errmsg.c_str());
+  }
+
+  // Create interpreter.
+  luci_interpreter::Interpreter interpreter(model_data.data(), true);
+
+  // Set input.
+  // Data for n'th input is read from ${input_prefix}n
+  // (ex: Add.circle.input0, Add.circle.input1 ..)
+  int num_inference = 1;
+  for (int j = 0; j < num_inference; ++j)
+  {
+    for (int32_t i = 0; i < num_inputs; i++)
+    {
+      auto input_data = reinterpret_cast<char *>(interpreter.allocateInputTensor(i));
+      readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data,
+                       interpreter.getInputDataSizeByIndex(i));
+    }
+
+    // Do inference.
+    interpreter.interpret();
+  }
+
+  // Get output.
+  int num_outputs = 1;
+  for (int i = 0; i < num_outputs; i++)
+  {
+    auto data = interpreter.readOutputTensor(i);
+
+    // Output data is written in ${output_file}
+    // (ex: Add.circle.output0)
+    writeDataToFile(std::string(output_file) + std::to_string(i), reinterpret_cast<char *>(data),
+                    interpreter.getOutputDataSizeByIndex(i));
+  }
+  return EXIT_SUCCESS;
+}
+
+int entry(int argc, char **argv);
+
+#ifdef NDEBUG
+int main(int argc, char **argv)
+{
+  try
+  {
+    return entry(argc, argv);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "ERROR: " << e.what() << std::endl;
+  }
+
+  return 255;
+}
+#else  // NDEBUG
+int main(int argc, char **argv)
+{
+  // NOTE main does not catch internal exceptions for debug build to make it easy to
+  //      check the stacktrace with a debugger
+  return entry(argc, argv);
+}
+#endif // !NDEBUG
diff --git a/onert-micro/externals/CMakeLists.txt b/onert-micro/externals/CMakeLists.txt
new file mode 100644
index 000000000..221001cfe
--- /dev/null
+++ b/onert-micro/externals/CMakeLists.txt
@@ -0,0 +1,9 @@
+unset(OUTPUT_FILES)
+set(OUTPUT_FILES "${ONERT_MICRO_EXTERNAL_DIR}/gen/circle-generated/circle/schema_generated.h")
+set(TGT luci_micro_circle_schema)
+
+# NOTE This header-only library
+add_library(${TGT} STATIC ${OUTPUT_FILES})
+set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(${TGT} PUBLIC "${ONERT_MICRO_EXTERNAL_DIR}/gen")
+target_include_directories(${TGT} PUBLIC "${ONERT_MICRO_EXTERNAL_DIR}")
diff --git a/onert-micro/externals/flatbuffers/base.h b/onert-micro/externals/flatbuffers/base.h
new file mode 100644
index 000000000..56bf9b427
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/base.h
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef FLATBUFFERS_BASE_H_
+#define FLATBUFFERS_BASE_H_
+
+// clang-format off
+
+// If activate should be declared and included first.
+#if defined(FLATBUFFERS_MEMORY_LEAK_TRACKING) && \
+    defined(_MSC_VER) && defined(_DEBUG)
+  // The _CRTDBG_MAP_ALLOC inside <crtdbg.h> will replace
+  // calloc/free (etc) to its debug version using #define directives.
+  #define _CRTDBG_MAP_ALLOC
+  #include <stdlib.h>
+  #include <crtdbg.h>
+  // Replace operator new by trace-enabled version.
+  #define DEBUG_NEW new(_NORMAL_BLOCK, __FILE__, __LINE__)
+  #define new DEBUG_NEW
+#endif
+
+#if !defined(FLATBUFFERS_ASSERT)
+#include <assert.h>
+#define FLATBUFFERS_ASSERT assert
+#elif defined(FLATBUFFERS_ASSERT_INCLUDE)
+// Include file with forward declaration
+#include FLATBUFFERS_ASSERT_INCLUDE
+#endif
+
+#ifndef ARDUINO
+#include <cstdint>
+#endif
+
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+
+#if defined(ARDUINO) && !defined(ARDUINOSTL_M_H)
+  #include <utility.h>
+#else
+  #include <utility>
+#endif
+
+#include <string>
+#include <type_traits>
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <iterator>
+#include <memory>
+
+#if defined(__unix__) && !defined(FLATBUFFERS_LOCALE_INDEPENDENT)
+  #include <unistd.h>
+#endif
+
+#ifdef _STLPORT_VERSION
+  #define FLATBUFFERS_CPP98_STL
+#endif
+
+#ifdef __ANDROID__
+  #include <android/api-level.h>
+#endif
+
+#if defined(__ICCARM__)
+#include <intrinsics.h>
+#endif
+
+// Note the __clang__ check is needed, because clang presents itself
+// as an older GNUC compiler (4.2).
+// Clang 3.3 and later implement all of the ISO C++ 2011 standard.
+// Clang 3.4 and later implement all of the ISO C++ 2014 standard.
+// http://clang.llvm.org/cxx_status.html
+
+// Note the MSVC value '__cplusplus' may be incorrect:
+// The '__cplusplus' predefined macro in the MSVC stuck at the value 199711L,
+// indicating (erroneously!) that the compiler conformed to the C++98 Standard.
+// This value should be correct starting from MSVC2017-15.7-Preview-3.
+// The '__cplusplus' will be valid only if MSVC2017-15.7-P3 and the `/Zc:__cplusplus` switch is set.
+// Workaround (for details see MSDN):
+// Use the _MSC_VER and _MSVC_LANG definition instead of the __cplusplus  for compatibility.
+// The _MSVC_LANG macro reports the Standard version regardless of the '/Zc:__cplusplus' switch.
+
+#if defined(__GNUC__) && !defined(__clang__)
+  #define FLATBUFFERS_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#else
+  #define FLATBUFFERS_GCC 0
+#endif
+
+#if defined(__clang__)
+  #define FLATBUFFERS_CLANG (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
+#else
+  #define FLATBUFFERS_CLANG 0
+#endif
+
+/// @cond FLATBUFFERS_INTERNAL
+#if __cplusplus <= 199711L && \
+    (!defined(_MSC_VER) || _MSC_VER < 1600) && \
+    (!defined(__GNUC__) || \
+      (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ < 40400))
+  #error A C++11 compatible compiler with support for the auto typing is \
+         required for FlatBuffers.
+  #error __cplusplus _MSC_VER __GNUC__  __GNUC_MINOR__  __GNUC_PATCHLEVEL__
+#endif
+
+#if !defined(__clang__) && \
+    defined(__GNUC__) && \
+    (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ < 40600)
+  // Backwards compatibility for g++ 4.4, and 4.5 which don't have the nullptr
+  // and constexpr keywords. Note the __clang__ check is needed, because clang
+  // presents itself as an older GNUC compiler.
+  #ifndef nullptr_t
+    const class nullptr_t {
+    public:
+      template<class T> inline operator T*() const { return 0; }
+    private:
+      void operator&() const;
+    } nullptr = {};
+  #endif
+  #ifndef constexpr
+    #define constexpr const
+  #endif
+#endif
+
+// The wire format uses a little endian encoding (since that's efficient for
+// the common platforms).
+#if defined(__s390x__)
+  #define FLATBUFFERS_LITTLEENDIAN 0
+#endif // __s390x__
+#if !defined(FLATBUFFERS_LITTLEENDIAN)
+  #if defined(__GNUC__) || defined(__clang__) || defined(__ICCARM__)
+    #if (defined(__BIG_ENDIAN__) || \
+         (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+      #define FLATBUFFERS_LITTLEENDIAN 0
+    #else
+      #define FLATBUFFERS_LITTLEENDIAN 1
+    #endif // __BIG_ENDIAN__
+  #elif defined(_MSC_VER)
+    #if defined(_M_PPC)
+      #define FLATBUFFERS_LITTLEENDIAN 0
+    #else
+      #define FLATBUFFERS_LITTLEENDIAN 1
+    #endif
+  #else
+    #error Unable to determine endianness, define FLATBUFFERS_LITTLEENDIAN.
+  #endif
+#endif // !defined(FLATBUFFERS_LITTLEENDIAN)
+
+#define FLATBUFFERS_VERSION_MAJOR 2
+#define FLATBUFFERS_VERSION_MINOR 0
+#define FLATBUFFERS_VERSION_REVISION 0
+#define FLATBUFFERS_STRING_EXPAND(X) #X
+#define FLATBUFFERS_STRING(X) FLATBUFFERS_STRING_EXPAND(X)
+namespace flatbuffers {
+  // Returns version as string  "MAJOR.MINOR.REVISION".
+  const char* FLATBUFFERS_VERSION();
+}
+
+#if (!defined(_MSC_VER) || _MSC_VER > 1600) && \
+    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 407)) || \
+    defined(__clang__)
+  #define FLATBUFFERS_FINAL_CLASS final
+  #define FLATBUFFERS_OVERRIDE override
+  #define FLATBUFFERS_EXPLICIT_CPP11 explicit
+  #define FLATBUFFERS_VTABLE_UNDERLYING_TYPE : flatbuffers::voffset_t
+#else
+  #define FLATBUFFERS_FINAL_CLASS
+  #define FLATBUFFERS_OVERRIDE
+  #define FLATBUFFERS_EXPLICIT_CPP11
+  #define FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+#endif
+
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \
+    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)) || \
+    (defined(__cpp_constexpr) && __cpp_constexpr >= 200704)
+  #define FLATBUFFERS_CONSTEXPR constexpr
+  #define FLATBUFFERS_CONSTEXPR_CPP11 constexpr
+  #define FLATBUFFERS_CONSTEXPR_DEFINED
+#else
+  #define FLATBUFFERS_CONSTEXPR const
+  #define FLATBUFFERS_CONSTEXPR_CPP11
+#endif
+
+#if (defined(__cplusplus) && __cplusplus >= 201402L) || \
+    (defined(__cpp_constexpr) && __cpp_constexpr >= 201304)
+  #define FLATBUFFERS_CONSTEXPR_CPP14 FLATBUFFERS_CONSTEXPR_CPP11
+#else
+  #define FLATBUFFERS_CONSTEXPR_CPP14
+#endif
+
+#if (defined(__GXX_EXPERIMENTAL_CXX0X__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)) || \
+    (defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023026)) || \
+    defined(__clang__)
+  #define FLATBUFFERS_NOEXCEPT noexcept
+#else
+  #define FLATBUFFERS_NOEXCEPT
+#endif
+
+// NOTE: the FLATBUFFERS_DELETE_FUNC macro may change the access mode to
+// private, so be sure to put it at the end or reset access mode explicitly.
+#if (!defined(_MSC_VER) || _MSC_FULL_VER >= 180020827) && \
+    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 404)) || \
+    defined(__clang__)
+  #define FLATBUFFERS_DELETE_FUNC(func) func = delete
+#else
+  #define FLATBUFFERS_DELETE_FUNC(func) private: func
+#endif
+
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \
+    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
+    defined(__clang__)
+  #define FLATBUFFERS_DEFAULT_DECLARATION
+#endif
+
+// Check if we can use template aliases
+// Not possible if Microsoft Compiler before 2012
+// Possible is the language feature __cpp_alias_templates is defined well
+// Or possible if the C++ std is C+11 or newer
+#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \
+    || (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
+    || (defined(__cplusplus) && __cplusplus >= 201103L)
+  #define FLATBUFFERS_TEMPLATES_ALIASES
+#endif
+
+#ifndef FLATBUFFERS_HAS_STRING_VIEW
+  // Only provide flatbuffers::string_view if __has_include can be used
+  // to detect a header that provides an implementation
+  #if defined(__has_include)
+    // Check for std::string_view (in c++17)
+    #if __has_include(<string_view>) && (__cplusplus >= 201606 || (defined(_HAS_CXX17) && _HAS_CXX17))
+      #include <string_view>
+      namespace flatbuffers {
+        typedef std::string_view string_view;
+      }
+      #define FLATBUFFERS_HAS_STRING_VIEW 1
+    // Check for std::experimental::string_view (in c++14, compiler-dependent)
+    #elif __has_include(<experimental/string_view>) && (__cplusplus >= 201411)
+      #include <experimental/string_view>
+      namespace flatbuffers {
+        typedef std::experimental::string_view string_view;
+      }
+      #define FLATBUFFERS_HAS_STRING_VIEW 1
+    // Check for absl::string_view
+    #elif __has_include("absl/strings/string_view.h")
+      #include "absl/strings/string_view.h"
+      namespace flatbuffers {
+        typedef absl::string_view string_view;
+      }
+      #define FLATBUFFERS_HAS_STRING_VIEW 1
+    #endif
+  #endif // __has_include
+#endif // !FLATBUFFERS_HAS_STRING_VIEW
+
+#ifndef FLATBUFFERS_HAS_NEW_STRTOD
+  // Modern (C++11) strtod and strtof functions are available for use.
+  // 1) nan/inf strings as argument of strtod;
+  // 2) hex-float  as argument of  strtod/strtof.
+  #if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
+      (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
+      (defined(__clang__))
+    #define FLATBUFFERS_HAS_NEW_STRTOD 1
+  #endif
+#endif // !FLATBUFFERS_HAS_NEW_STRTOD
+
+#ifndef FLATBUFFERS_LOCALE_INDEPENDENT
+  // Enable locale independent functions {strtof_l, strtod_l,strtoll_l, strtoull_l}.
+  #if ((defined(_MSC_VER) && _MSC_VER >= 1800)            || \
+       (defined(_XOPEN_VERSION) && (_XOPEN_VERSION>=700)) && (!defined(__ANDROID_API__) || (defined(__ANDROID_API__) && (__ANDROID_API__>=21))))
+    #define FLATBUFFERS_LOCALE_INDEPENDENT 1
+  #else
+    #define FLATBUFFERS_LOCALE_INDEPENDENT 0
+  #endif
+#endif  // !FLATBUFFERS_LOCALE_INDEPENDENT
+
+// Suppress Undefined Behavior Sanitizer (recoverable only). Usage:
+// - __supress_ubsan__("undefined")
+// - __supress_ubsan__("signed-integer-overflow")
+#if defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >=7))
+  #define __supress_ubsan__(type) __attribute__((no_sanitize(type)))
+#elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)
+  #define __supress_ubsan__(type) __attribute__((no_sanitize_undefined))
+#else
+  #define __supress_ubsan__(type)
+#endif
+
+// This is constexpr function used for checking compile-time constants.
+// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`.
+template<typename T> FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) {
+  return !!t;
+}
+
+// Enable C++ attribute [[]] if std:c++17 or higher.
+#if ((__cplusplus >= 201703L) \
+    || (defined(_MSVC_LANG) &&  (_MSVC_LANG >= 201703L)))
+  // All attributes unknown to an implementation are ignored without causing an error.
+  #define FLATBUFFERS_ATTRIBUTE(attr) [[attr]]
+
+  #define FLATBUFFERS_FALLTHROUGH() [[fallthrough]]
+#else
+  #define FLATBUFFERS_ATTRIBUTE(attr)
+
+  #if FLATBUFFERS_CLANG >= 30800
+    #define FLATBUFFERS_FALLTHROUGH() [[clang::fallthrough]]
+  #elif FLATBUFFERS_GCC >= 70300
+    #define FLATBUFFERS_FALLTHROUGH() [[gnu::fallthrough]]
+  #else
+    #define FLATBUFFERS_FALLTHROUGH()
+  #endif
+#endif
+
+/// @endcond
+
+/// @file
+namespace flatbuffers {
+
+/// @cond FLATBUFFERS_INTERNAL
+// Our default offset / size type, 32bit on purpose on 64bit systems.
+// Also, using a consistent offset type maintains compatibility of serialized
+// offset values between 32bit and 64bit systems.
+typedef uint32_t uoffset_t;
+
+// Signed offsets for references that can go in both directions.
+typedef int32_t soffset_t;
+
+// Offset/index used in v-tables, can be changed to uint8_t in
+// format forks to save a bit of space if desired.
+typedef uint16_t voffset_t;
+
+typedef uintmax_t largest_scalar_t;
+
+// In 32bits, this evaluates to 2GB - 1
+#define FLATBUFFERS_MAX_BUFFER_SIZE ((1ULL << (sizeof(::flatbuffers::soffset_t) * 8 - 1)) - 1)
+
+// We support aligning the contents of buffers up to this size.
+#define FLATBUFFERS_MAX_ALIGNMENT 16
+
+inline bool VerifyAlignmentRequirements(size_t align, size_t min_align = 1) {
+  return (min_align <= align) && (align <= (FLATBUFFERS_MAX_ALIGNMENT)) &&
+         (align & (align - 1)) == 0;  // must be power of 2
+}
+
+#if defined(_MSC_VER)
+  #pragma warning(disable: 4351) // C4351: new behavior: elements of array ... will be default initialized
+  #pragma warning(push)
+  #pragma warning(disable: 4127) // C4127: conditional expression is constant
+#endif
+
+template<typename T> T EndianSwap(T t) {
+  #if defined(_MSC_VER)
+    #define FLATBUFFERS_BYTESWAP16 _byteswap_ushort
+    #define FLATBUFFERS_BYTESWAP32 _byteswap_ulong
+    #define FLATBUFFERS_BYTESWAP64 _byteswap_uint64
+  #elif defined(__ICCARM__)
+    #define FLATBUFFERS_BYTESWAP16 __REV16
+    #define FLATBUFFERS_BYTESWAP32 __REV
+    #define FLATBUFFERS_BYTESWAP64(x) \
+       ((__REV(static_cast<uint32_t>(x >> 32U))) | (static_cast<uint64_t>(__REV(static_cast<uint32_t>(x)))) << 32U)
+  #else
+    #if defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ < 408 && !defined(__clang__)
+      // __builtin_bswap16 was missing prior to GCC 4.8.
+      #define FLATBUFFERS_BYTESWAP16(x) \
+        static_cast<uint16_t>(__builtin_bswap32(static_cast<uint32_t>(x) << 16))
+    #else
+      #define FLATBUFFERS_BYTESWAP16 __builtin_bswap16
+    #endif
+    #define FLATBUFFERS_BYTESWAP32 __builtin_bswap32
+    #define FLATBUFFERS_BYTESWAP64 __builtin_bswap64
+  #endif
+  if (sizeof(T) == 1) {   // Compile-time if-then's.
+    return t;
+  } else if (sizeof(T) == 2) {
+    union { T t; uint16_t i; } u = { t };
+    u.i = FLATBUFFERS_BYTESWAP16(u.i);
+    return u.t;
+  } else if (sizeof(T) == 4) {
+    union { T t; uint32_t i; } u = { t };
+    u.i = FLATBUFFERS_BYTESWAP32(u.i);
+    return u.t;
+  } else if (sizeof(T) == 8) {
+    union { T t; uint64_t i; } u = { t };
+    u.i = FLATBUFFERS_BYTESWAP64(u.i);
+    return u.t;
+  } else {
+    FLATBUFFERS_ASSERT(0);
+    return t;
+  }
+}
+
+#if defined(_MSC_VER)
+  #pragma warning(pop)
+#endif
+
+
+template<typename T> T EndianScalar(T t) {
+  #if FLATBUFFERS_LITTLEENDIAN
+    return t;
+  #else
+    return EndianSwap(t);
+  #endif
+}
+
+template<typename T>
+// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details.
+__supress_ubsan__("alignment")
+T ReadScalar(const void *p) {
+  return EndianScalar(*reinterpret_cast<const T *>(p));
+}
+
+// See https://github.com/google/flatbuffers/issues/5950
+
+#if (FLATBUFFERS_GCC >= 100000) && (FLATBUFFERS_GCC < 110000)
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
+
+template<typename T>
+// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details.
+__supress_ubsan__("alignment")
+void WriteScalar(void *p, T t) {
+  *reinterpret_cast<T *>(p) = EndianScalar(t);
+}
+
+template<typename T> struct Offset;
+template<typename T> __supress_ubsan__("alignment") void WriteScalar(void *p, Offset<T> t) {
+  *reinterpret_cast<uoffset_t *>(p) = EndianScalar(t.o);
+}
+
+#if (FLATBUFFERS_GCC >= 100000) && (FLATBUFFERS_GCC < 110000)
+  #pragma GCC diagnostic pop
+#endif
+
+// Computes how many bytes you'd have to pad to be able to write an
+// "scalar_size" scalar if the buffer had grown to "buf_size" (downwards in
+// memory).
+__supress_ubsan__("unsigned-integer-overflow")
+inline size_t PaddingBytes(size_t buf_size, size_t scalar_size) {
+  return ((~buf_size) + 1) & (scalar_size - 1);
+}
+
+}  // namespace flatbuffers
+#endif  // FLATBUFFERS_BASE_H_
diff --git a/onert-micro/externals/flatbuffers/code_generators.h b/onert-micro/externals/flatbuffers/code_generators.h
new file mode 100644
index 000000000..3908ea582
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/code_generators.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2014 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_CODE_GENERATORS_H_
+#define FLATBUFFERS_CODE_GENERATORS_H_
+
+#include <map>
+#include <sstream>
+
+#include "flatbuffers/idl.h"
+
+namespace flatbuffers
+{
+
+// Utility class to assist in generating code through use of text templates.
+//
+// Example code:
+//   CodeWriter code("\t");
+//   code.SetValue("NAME", "Foo");
+//   code += "void {{NAME}}() { printf("%s", "{{NAME}}"); }";
+//   code.SetValue("NAME", "Bar");
+//   code += "void {{NAME}}() { printf("%s", "{{NAME}}"); }";
+//   std::cout << code.ToString() << std::endl;
+//
+// Output:
+//  void Foo() { printf("%s", "Foo"); }
+//  void Bar() { printf("%s", "Bar"); }
+class CodeWriter
+{
+public:
+  CodeWriter(std::string pad = std::string()) : pad_(pad), cur_ident_lvl_(0), ignore_ident_(false)
+  {
+  }
+
+  // Clears the current "written" code.
+  void Clear()
+  {
+    stream_.str("");
+    stream_.clear();
+  }
+
+  // Associates a key with a value.  All subsequent calls to operator+=, where
+  // the specified key is contained in {{ and }} delimiters will be replaced by
+  // the given value.
+  void SetValue(const std::string &key, const std::string &value) { value_map_[key] = value; }
+
+  std::string GetValue(const std::string &key) const
+  {
+    const auto it = value_map_.find(key);
+    return it == value_map_.end() ? "" : it->second;
+  }
+
+  // Appends the given text to the generated code as well as a newline
+  // character.  Any text within {{ and }} delimiters is replaced by values
+  // previously stored in the CodeWriter by calling SetValue above.  The newline
+  // will be suppressed if the text ends with the \\ character.
+  void operator+=(std::string text);
+
+  // Returns the current contents of the CodeWriter as a std::string.
+  std::string ToString() const { return stream_.str(); }
+
+  // Increase ident level for writing code
+  void IncrementIdentLevel() { cur_ident_lvl_++; }
+  // Decrease ident level for writing code
+  void DecrementIdentLevel()
+  {
+    if (cur_ident_lvl_)
+      cur_ident_lvl_--;
+  }
+
+  void SetPadding(const std::string &padding) { pad_ = padding; }
+
+private:
+  std::map<std::string, std::string> value_map_;
+  std::stringstream stream_;
+  std::string pad_;
+  int cur_ident_lvl_;
+  bool ignore_ident_;
+
+  // Add ident padding (tab or space) based on ident level
+  void AppendIdent(std::stringstream &stream);
+};
+
+class BaseGenerator
+{
+public:
+  virtual bool generate() = 0;
+
+  static std::string NamespaceDir(const Parser &parser, const std::string &path,
+                                  const Namespace &ns, const bool dasherize = false);
+
+  static std::string ToDasherizedCase(const std::string pascal_case);
+
+  std::string GeneratedFileName(const std::string &path, const std::string &file_name,
+                                const IDLOptions &options) const;
+
+protected:
+  BaseGenerator(const Parser &parser, const std::string &path, const std::string &file_name,
+                std::string qualifying_start, std::string qualifying_separator,
+                std::string default_extension)
+    : parser_(parser), path_(path), file_name_(file_name), qualifying_start_(qualifying_start),
+      qualifying_separator_(qualifying_separator), default_extension_(default_extension)
+  {
+  }
+  virtual ~BaseGenerator() {}
+
+  // No copy/assign.
+  BaseGenerator &operator=(const BaseGenerator &);
+  BaseGenerator(const BaseGenerator &);
+
+  std::string NamespaceDir(const Namespace &ns, const bool dasherize = false) const;
+
+  static const char *FlatBuffersGeneratedWarning();
+
+  static std::string FullNamespace(const char *separator, const Namespace &ns);
+
+  static std::string LastNamespacePart(const Namespace &ns);
+
+  // tracks the current namespace for early exit in WrapInNameSpace
+  // c++, java and csharp returns a different namespace from
+  // the following default (no early exit, always fully qualify),
+  // which works for js and php
+  virtual const Namespace *CurrentNameSpace() const { return nullptr; }
+
+  // Ensure that a type is prefixed with its namespace even within
+  // its own namespace to avoid conflict between generated method
+  // names and similarly named classes or structs
+  std::string WrapInNameSpace(const Namespace *ns, const std::string &name) const;
+
+  std::string WrapInNameSpace(const Definition &def) const;
+
+  std::string GetNameSpace(const Definition &def) const;
+
+  const Parser &parser_;
+  const std::string &path_;
+  const std::string &file_name_;
+  const std::string qualifying_start_;
+  const std::string qualifying_separator_;
+  const std::string default_extension_;
+};
+
+struct CommentConfig
+{
+  const char *first_line;
+  const char *content_line_prefix;
+  const char *last_line;
+};
+
+extern void GenComment(const std::vector<std::string> &dc, std::string *code_ptr,
+                       const CommentConfig *config, const char *prefix = "");
+
+class FloatConstantGenerator
+{
+public:
+  virtual ~FloatConstantGenerator() {}
+  std::string GenFloatConstant(const FieldDef &field) const;
+
+private:
+  virtual std::string Value(double v, const std::string &src) const = 0;
+  virtual std::string Inf(double v) const = 0;
+  virtual std::string NaN(double v) const = 0;
+
+  virtual std::string Value(float v, const std::string &src) const = 0;
+  virtual std::string Inf(float v) const = 0;
+  virtual std::string NaN(float v) const = 0;
+
+  template <typename T> std::string GenFloatConstantImpl(const FieldDef &field) const;
+};
+
+class SimpleFloatConstantGenerator : public FloatConstantGenerator
+{
+public:
+  SimpleFloatConstantGenerator(const char *nan_number, const char *pos_inf_number,
+                               const char *neg_inf_number);
+
+private:
+  std::string Value(double v, const std::string &src) const FLATBUFFERS_OVERRIDE;
+  std::string Inf(double v) const FLATBUFFERS_OVERRIDE;
+  std::string NaN(double v) const FLATBUFFERS_OVERRIDE;
+
+  std::string Value(float v, const std::string &src) const FLATBUFFERS_OVERRIDE;
+  std::string Inf(float v) const FLATBUFFERS_OVERRIDE;
+  std::string NaN(float v) const FLATBUFFERS_OVERRIDE;
+
+  const std::string nan_number_;
+  const std::string pos_inf_number_;
+  const std::string neg_inf_number_;
+};
+
+// C++, C#, Java like generator.
+class TypedFloatConstantGenerator : public FloatConstantGenerator
+{
+public:
+  TypedFloatConstantGenerator(const char *double_prefix, const char *single_prefix,
+                              const char *nan_number, const char *pos_inf_number,
+                              const char *neg_inf_number = "");
+
+private:
+  std::string Value(double v, const std::string &src) const FLATBUFFERS_OVERRIDE;
+  std::string Inf(double v) const FLATBUFFERS_OVERRIDE;
+
+  std::string NaN(double v) const FLATBUFFERS_OVERRIDE;
+
+  std::string Value(float v, const std::string &src) const FLATBUFFERS_OVERRIDE;
+  std::string Inf(float v) const FLATBUFFERS_OVERRIDE;
+  std::string NaN(float v) const FLATBUFFERS_OVERRIDE;
+
+  std::string MakeNaN(const std::string &prefix) const;
+  std::string MakeInf(bool neg, const std::string &prefix) const;
+
+  const std::string double_prefix_;
+  const std::string single_prefix_;
+  const std::string nan_number_;
+  const std::string pos_inf_number_;
+  const std::string neg_inf_number_;
+};
+
+} // namespace flatbuffers
+
+#endif // FLATBUFFERS_CODE_GENERATORS_H_
diff --git a/onert-micro/externals/flatbuffers/flatbuffers.h b/onert-micro/externals/flatbuffers/flatbuffers.h
new file mode 100644
index 000000000..3005d8921
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/flatbuffers.h
@@ -0,0 +1,3078 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2014 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_H_
+#define FLATBUFFERS_H_
+
+#include "flatbuffers/base.h"
+#include "flatbuffers/stl_emulation.h"
+
+#ifndef FLATBUFFERS_CPP98_STL
+#include <functional>
+#endif
+
+#if defined(FLATBUFFERS_NAN_DEFAULTS)
+#include <cmath>
+#endif
+
+namespace flatbuffers
+{
+// Generic 'operator==' with conditional specialisations.
+// T e - new value of a scalar field.
+// T def - default of scalar (is known at compile-time).
+template <typename T> inline bool IsTheSameAs(T e, T def) { return e == def; }
+
+#if defined(FLATBUFFERS_NAN_DEFAULTS) && defined(FLATBUFFERS_HAS_NEW_STRTOD) && \
+  (FLATBUFFERS_HAS_NEW_STRTOD > 0)
+// Like `operator==(e, def)` with weak NaN if T=(float|double).
+template <typename T> inline bool IsFloatTheSameAs(T e, T def)
+{
+  return (e == def) || ((def != def) && (e != e));
+}
+template <> inline bool IsTheSameAs<float>(float e, float def) { return IsFloatTheSameAs(e, def); }
+template <> inline bool IsTheSameAs<double>(double e, double def)
+{
+  return IsFloatTheSameAs(e, def);
+}
+#endif
+
+// Check 'v' is out of closed range [low; high].
+// Workaround for GCC warning [-Werror=type-limits]:
+// comparison is always true due to limited range of data type.
+template <typename T> inline bool IsOutRange(const T &v, const T &low, const T &high)
+{
+  return (v < low) || (high < v);
+}
+
+// Check 'v' is in closed range [low; high].
+template <typename T> inline bool IsInRange(const T &v, const T &low, const T &high)
+{
+  return !IsOutRange(v, low, high);
+}
+
+// Wrapper for uoffset_t to allow safe template specialization.
+// Value is allowed to be 0 to indicate a null object (see e.g. AddOffset).
+template <typename T> struct Offset
+{
+  uoffset_t o;
+  Offset() : o(0) {}
+  Offset(uoffset_t _o) : o(_o) {}
+  Offset<void> Union() const { return Offset<void>(o); }
+  bool IsNull() const { return !o; }
+};
+
+inline void EndianCheck()
+{
+  int endiantest = 1;
+  // If this fails, see FLATBUFFERS_LITTLEENDIAN above.
+  FLATBUFFERS_ASSERT(*reinterpret_cast<char *>(&endiantest) == FLATBUFFERS_LITTLEENDIAN);
+  (void)endiantest;
+}
+
+template <typename T> FLATBUFFERS_CONSTEXPR size_t AlignOf()
+{
+#ifdef _MSC_VER
+  return __alignof(T);
+#else
+#ifndef alignof
+  return __alignof__(T);
+#else
+  return alignof(T);
+#endif
+#endif
+  // clang-format on
+}
+
+// When we read serialized data from memory, in the case of most scalars,
+// we want to just read T, but in the case of Offset, we want to actually
+// perform the indirection and return a pointer.
+// The template specialization below does just that.
+// It is wrapped in a struct since function templates can't overload on the
+// return type like this.
+// The typedef is for the convenience of callers of this function
+// (avoiding the need for a trailing return decltype)
+template <typename T> struct IndirectHelper
+{
+  typedef T return_type;
+  typedef T mutable_return_type;
+  static const size_t element_stride = sizeof(T);
+  static return_type Read(const uint8_t *p, uoffset_t i)
+  {
+    return EndianScalar((reinterpret_cast<const T *>(p))[i]);
+  }
+};
+template <typename T> struct IndirectHelper<Offset<T>>
+{
+  typedef const T *return_type;
+  typedef T *mutable_return_type;
+  static const size_t element_stride = sizeof(uoffset_t);
+  static return_type Read(const uint8_t *p, uoffset_t i)
+  {
+    p += i * sizeof(uoffset_t);
+    return reinterpret_cast<return_type>(p + ReadScalar<uoffset_t>(p));
+  }
+};
+template <typename T> struct IndirectHelper<const T *>
+{
+  typedef const T *return_type;
+  typedef T *mutable_return_type;
+  static const size_t element_stride = sizeof(T);
+  static return_type Read(const uint8_t *p, uoffset_t i)
+  {
+    return reinterpret_cast<const T *>(p + i * sizeof(T));
+  }
+};
+
+// An STL compatible iterator implementation for Vector below, effectively
+// calling Get() for every element.
+template <typename T, typename IT> struct VectorIterator
+{
+  typedef std::random_access_iterator_tag iterator_category;
+  typedef IT value_type;
+  typedef ptrdiff_t difference_type;
+  typedef IT *pointer;
+  typedef IT &reference;
+
+  VectorIterator(const uint8_t *data, uoffset_t i)
+    : data_(data + IndirectHelper<T>::element_stride * i)
+  {
+  }
+  VectorIterator(const VectorIterator &other) : data_(other.data_) {}
+  VectorIterator() : data_(nullptr) {}
+
+  VectorIterator &operator=(const VectorIterator &other)
+  {
+    data_ = other.data_;
+    return *this;
+  }
+
+#if !defined(FLATBUFFERS_CPP98_STL)
+  VectorIterator &operator=(VectorIterator &&other)
+  {
+    data_ = other.data_;
+    return *this;
+  }
+#endif // !defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+
+  bool operator==(const VectorIterator &other) const { return data_ == other.data_; }
+
+  bool operator<(const VectorIterator &other) const { return data_ < other.data_; }
+
+  bool operator!=(const VectorIterator &other) const { return data_ != other.data_; }
+
+  difference_type operator-(const VectorIterator &other) const
+  {
+    return (data_ - other.data_) / IndirectHelper<T>::element_stride;
+  }
+
+  // Note: return type is incompatible with the standard
+  // `reference operator*()`.
+  IT operator*() const { return IndirectHelper<T>::Read(data_, 0); }
+
+  // Note: return type is incompatible with the standard
+  // `pointer operator->()`.
+  IT operator->() const { return IndirectHelper<T>::Read(data_, 0); }
+
+  VectorIterator &operator++()
+  {
+    data_ += IndirectHelper<T>::element_stride;
+    return *this;
+  }
+
+  VectorIterator operator++(int)
+  {
+    VectorIterator temp(data_, 0);
+    data_ += IndirectHelper<T>::element_stride;
+    return temp;
+  }
+
+  VectorIterator operator+(const uoffset_t &offset) const
+  {
+    return VectorIterator(data_ + offset * IndirectHelper<T>::element_stride, 0);
+  }
+
+  VectorIterator &operator+=(const uoffset_t &offset)
+  {
+    data_ += offset * IndirectHelper<T>::element_stride;
+    return *this;
+  }
+
+  VectorIterator &operator--()
+  {
+    data_ -= IndirectHelper<T>::element_stride;
+    return *this;
+  }
+
+  VectorIterator operator--(int)
+  {
+    VectorIterator temp(data_, 0);
+    data_ -= IndirectHelper<T>::element_stride;
+    return temp;
+  }
+
+  VectorIterator operator-(const uoffset_t &offset) const
+  {
+    return VectorIterator(data_ - offset * IndirectHelper<T>::element_stride, 0);
+  }
+
+  VectorIterator &operator-=(const uoffset_t &offset)
+  {
+    data_ -= offset * IndirectHelper<T>::element_stride;
+    return *this;
+  }
+
+private:
+  const uint8_t *data_;
+};
+
+template <typename Iterator> struct VectorReverseIterator : public std::reverse_iterator<Iterator>
+{
+  explicit VectorReverseIterator(Iterator iter) : std::reverse_iterator<Iterator>(iter) {}
+
+  // Note: return type is incompatible with the standard
+  // `reference operator*()`.
+  typename Iterator::value_type operator*() const
+  {
+    auto tmp = std::reverse_iterator<Iterator>::current;
+    return *--tmp;
+  }
+
+  // Note: return type is incompatible with the standard
+  // `pointer operator->()`.
+  typename Iterator::value_type operator->() const
+  {
+    auto tmp = std::reverse_iterator<Iterator>::current;
+    return *--tmp;
+  }
+};
+
+struct String;
+
+// This is used as a helper type for accessing vectors.
+// Vector::data() assumes the vector elements start after the length field.
+template <typename T> class Vector
+{
+public:
+  typedef VectorIterator<T, typename IndirectHelper<T>::mutable_return_type> iterator;
+  typedef VectorIterator<T, typename IndirectHelper<T>::return_type> const_iterator;
+  typedef VectorReverseIterator<iterator> reverse_iterator;
+  typedef VectorReverseIterator<const_iterator> const_reverse_iterator;
+
+  uoffset_t size() const { return EndianScalar(length_); }
+
+  // Deprecated: use size(). Here for backwards compatibility.
+  FLATBUFFERS_ATTRIBUTE(deprecated("use size() instead"))
+  uoffset_t Length() const { return size(); }
+
+  typedef typename IndirectHelper<T>::return_type return_type;
+  typedef typename IndirectHelper<T>::mutable_return_type mutable_return_type;
+  typedef return_type value_type;
+
+  return_type Get(uoffset_t i) const
+  {
+    FLATBUFFERS_ASSERT(i < size());
+    return IndirectHelper<T>::Read(Data(), i);
+  }
+
+  return_type operator[](uoffset_t i) const { return Get(i); }
+
+  // If this is a Vector of enums, T will be its storage type, not the enum
+  // type. This function makes it convenient to retrieve value with enum
+  // type E.
+  template <typename E> E GetEnum(uoffset_t i) const { return static_cast<E>(Get(i)); }
+
+  // If this a vector of unions, this does the cast for you. There's no check
+  // to make sure this is the right type!
+  template <typename U> const U *GetAs(uoffset_t i) const
+  {
+    return reinterpret_cast<const U *>(Get(i));
+  }
+
+  // If this a vector of unions, this does the cast for you. There's no check
+  // to make sure this is actually a string!
+  const String *GetAsString(uoffset_t i) const { return reinterpret_cast<const String *>(Get(i)); }
+
+  const void *GetStructFromOffset(size_t o) const
+  {
+    return reinterpret_cast<const void *>(Data() + o);
+  }
+
+  iterator begin() { return iterator(Data(), 0); }
+  const_iterator begin() const { return const_iterator(Data(), 0); }
+
+  iterator end() { return iterator(Data(), size()); }
+  const_iterator end() const { return const_iterator(Data(), size()); }
+
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+
+  const_iterator cbegin() const { return begin(); }
+
+  const_iterator cend() const { return end(); }
+
+  const_reverse_iterator crbegin() const { return rbegin(); }
+
+  const_reverse_iterator crend() const { return rend(); }
+
+  // Change elements if you have a non-const pointer to this object.
+  // Scalars only. See reflection.h, and the documentation.
+  void Mutate(uoffset_t i, const T &val)
+  {
+    FLATBUFFERS_ASSERT(i < size());
+    WriteScalar(data() + i, val);
+  }
+
+  // Change an element of a vector of tables (or strings).
+  // "val" points to the new table/string, as you can obtain from
+  // e.g. reflection::AddFlatBuffer().
+  void MutateOffset(uoffset_t i, const uint8_t *val)
+  {
+    FLATBUFFERS_ASSERT(i < size());
+    static_assert(sizeof(T) == sizeof(uoffset_t), "Unrelated types");
+    WriteScalar(data() + i, static_cast<uoffset_t>(val - (Data() + i * sizeof(uoffset_t))));
+  }
+
+  // Get a mutable pointer to tables/strings inside this vector.
+  mutable_return_type GetMutableObject(uoffset_t i) const
+  {
+    FLATBUFFERS_ASSERT(i < size());
+    return const_cast<mutable_return_type>(IndirectHelper<T>::Read(Data(), i));
+  }
+
+  // The raw data in little endian format. Use with care.
+  const uint8_t *Data() const { return reinterpret_cast<const uint8_t *>(&length_ + 1); }
+
+  uint8_t *Data() { return reinterpret_cast<uint8_t *>(&length_ + 1); }
+
+  // Similarly, but typed, much like std::vector::data
+  const T *data() const { return reinterpret_cast<const T *>(Data()); }
+  T *data() { return reinterpret_cast<T *>(Data()); }
+
+  template <typename K> return_type LookupByKey(K key) const
+  {
+    void *search_result =
+      std::bsearch(&key, Data(), size(), IndirectHelper<T>::element_stride, KeyCompare<K>);
+
+    if (!search_result)
+    {
+      return nullptr; // Key not found.
+    }
+
+    const uint8_t *element = reinterpret_cast<const uint8_t *>(search_result);
+
+    return IndirectHelper<T>::Read(element, 0);
+  }
+
+protected:
+  // This class is only used to access pre-existing data. Don't ever
+  // try to construct these manually.
+  Vector();
+
+  uoffset_t length_;
+
+private:
+  // This class is a pointer. Copying will therefore create an invalid object.
+  // Private and unimplemented copy constructor.
+  Vector(const Vector &);
+  Vector &operator=(const Vector &);
+
+  template <typename K> static int KeyCompare(const void *ap, const void *bp)
+  {
+    const K *key = reinterpret_cast<const K *>(ap);
+    const uint8_t *data = reinterpret_cast<const uint8_t *>(bp);
+    auto table = IndirectHelper<T>::Read(data, 0);
+
+    // std::bsearch compares with the operands transposed, so we negate the
+    // result here.
+    return -table->KeyCompareWithValue(*key);
+  }
+};
+
+// Represent a vector much like the template above, but in this case we
+// don't know what the element types are (used with reflection.h).
+class VectorOfAny
+{
+public:
+  uoffset_t size() const { return EndianScalar(length_); }
+
+  const uint8_t *Data() const { return reinterpret_cast<const uint8_t *>(&length_ + 1); }
+  uint8_t *Data() { return reinterpret_cast<uint8_t *>(&length_ + 1); }
+
+protected:
+  VectorOfAny();
+
+  uoffset_t length_;
+
+private:
+  VectorOfAny(const VectorOfAny &);
+  VectorOfAny &operator=(const VectorOfAny &);
+};
+
+#ifndef FLATBUFFERS_CPP98_STL
+template <typename T, typename U> Vector<Offset<T>> *VectorCast(Vector<Offset<U>> *ptr)
+{
+  static_assert(std::is_base_of<T, U>::value, "Unrelated types");
+  return reinterpret_cast<Vector<Offset<T>> *>(ptr);
+}
+
+template <typename T, typename U> const Vector<Offset<T>> *VectorCast(const Vector<Offset<U>> *ptr)
+{
+  static_assert(std::is_base_of<T, U>::value, "Unrelated types");
+  return reinterpret_cast<const Vector<Offset<T>> *>(ptr);
+}
+#endif
+
+// Convenient helper function to get the length of any vector, regardless
+// of whether it is null or not (the field is not set).
+template <typename T> static inline size_t VectorLength(const Vector<T> *v)
+{
+  return v ? v->size() : 0;
+}
+
+// This is used as a helper type for accessing arrays.
+template <typename T, uint16_t length> class Array
+{
+  typedef typename flatbuffers::integral_constant<bool, flatbuffers::is_scalar<T>::value>
+    scalar_tag;
+  typedef
+    typename flatbuffers::conditional<scalar_tag::value, T, const T *>::type IndirectHelperType;
+
+public:
+  typedef uint16_t size_type;
+  typedef typename IndirectHelper<IndirectHelperType>::return_type return_type;
+  typedef VectorIterator<T, return_type> const_iterator;
+  typedef VectorReverseIterator<const_iterator> const_reverse_iterator;
+
+  FLATBUFFERS_CONSTEXPR uint16_t size() const { return length; }
+
+  return_type Get(uoffset_t i) const
+  {
+    FLATBUFFERS_ASSERT(i < size());
+    return IndirectHelper<IndirectHelperType>::Read(Data(), i);
+  }
+
+  return_type operator[](uoffset_t i) const { return Get(i); }
+
+  // If this is a Vector of enums, T will be its storage type, not the enum
+  // type. This function makes it convenient to retrieve value with enum
+  // type E.
+  template <typename E> E GetEnum(uoffset_t i) const { return static_cast<E>(Get(i)); }
+
+  const_iterator begin() const { return const_iterator(Data(), 0); }
+  const_iterator end() const { return const_iterator(Data(), size()); }
+
+  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+
+  const_iterator cbegin() const { return begin(); }
+  const_iterator cend() const { return end(); }
+
+  const_reverse_iterator crbegin() const { return rbegin(); }
+  const_reverse_iterator crend() const { return rend(); }
+
+  // Get a mutable pointer to elements inside this array.
+  // This method used to mutate arrays of structs followed by a @p Mutate
+  // operation. For primitive types use @p Mutate directly.
+  // @warning Assignments and reads to/from the dereferenced pointer are not
+  //  automatically converted to the correct endianness.
+  typename flatbuffers::conditional<scalar_tag::value, void, T *>::type
+  GetMutablePointer(uoffset_t i) const
+  {
+    FLATBUFFERS_ASSERT(i < size());
+    return const_cast<T *>(&data()[i]);
+  }
+
+  // Change elements if you have a non-const pointer to this object.
+  void Mutate(uoffset_t i, const T &val) { MutateImpl(scalar_tag(), i, val); }
+
+  // The raw data in little endian format. Use with care.
+  const uint8_t *Data() const { return data_; }
+
+  uint8_t *Data() { return data_; }
+
+  // Similarly, but typed, much like std::vector::data
+  const T *data() const { return reinterpret_cast<const T *>(Data()); }
+  T *data() { return reinterpret_cast<T *>(Data()); }
+
+  // Copy data from a span with endian conversion.
+  // If this Array and the span overlap, the behavior is undefined.
+  void CopyFromSpan(flatbuffers::span<const T, length> src)
+  {
+    const auto p1 = reinterpret_cast<const uint8_t *>(src.data());
+    const auto p2 = Data();
+    FLATBUFFERS_ASSERT(!(p1 >= p2 && p1 < (p2 + length)) && !(p2 >= p1 && p2 < (p1 + length)));
+    (void)p1;
+    (void)p2;
+
+    CopyFromSpanImpl(flatbuffers::integral_constant < bool,
+                     !scalar_tag::value || sizeof(T) == 1 || FLATBUFFERS_LITTLEENDIAN > (), src);
+  }
+
+protected:
+  void MutateImpl(flatbuffers::integral_constant<bool, true>, uoffset_t i, const T &val)
+  {
+    FLATBUFFERS_ASSERT(i < size());
+    WriteScalar(data() + i, val);
+  }
+
+  void MutateImpl(flatbuffers::integral_constant<bool, false>, uoffset_t i, const T &val)
+  {
+    *(GetMutablePointer(i)) = val;
+  }
+
+  void CopyFromSpanImpl(flatbuffers::integral_constant<bool, true>,
+                        flatbuffers::span<const T, length> src)
+  {
+    // Use std::memcpy() instead of std::copy() to avoid preformance degradation
+    // due to aliasing if T is char or unsigned char.
+    // The size is known at compile time, so memcpy would be inlined.
+    std::memcpy(data(), src.data(), length * sizeof(T));
+  }
+
+  // Copy data from flatbuffers::span with endian conversion.
+  void CopyFromSpanImpl(flatbuffers::integral_constant<bool, false>,
+                        flatbuffers::span<const T, length> src)
+  {
+    for (size_type k = 0; k < length; k++)
+    {
+      Mutate(k, src[k]);
+    }
+  }
+
+  // This class is only used to access pre-existing data. Don't ever
+  // try to construct these manually.
+  // 'constexpr' allows us to use 'size()' at compile time.
+  // @note Must not use 'FLATBUFFERS_CONSTEXPR' here, as const is not allowed on
+  //  a constructor.
+#if defined(__cpp_constexpr)
+  constexpr Array();
+#else
+  Array();
+#endif
+
+  uint8_t data_[length * sizeof(T)];
+
+private:
+  // This class is a pointer. Copying will therefore create an invalid object.
+  // Private and unimplemented copy constructor.
+  Array(const Array &);
+  Array &operator=(const Array &);
+};
+
+// Specialization for Array[struct] with access using Offset<void> pointer.
+// This specialization used by idl_gen_text.cpp.
+template <typename T, uint16_t length> class Array<Offset<T>, length>
+{
+  static_assert(flatbuffers::is_same<T, void>::value, "unexpected type T");
+
+public:
+  typedef const void *return_type;
+
+  const uint8_t *Data() const { return data_; }
+
+  // Make idl_gen_text.cpp::PrintContainer happy.
+  return_type operator[](uoffset_t) const
+  {
+    FLATBUFFERS_ASSERT(false);
+    return nullptr;
+  }
+
+private:
+  // This class is only used to access pre-existing data.
+  Array();
+  Array(const Array &);
+  Array &operator=(const Array &);
+
+  uint8_t data_[1];
+};
+
+// Cast a raw T[length] to a raw flatbuffers::Array<T, length>
+// without endian conversion. Use with care.
+template <typename T, uint16_t length> Array<T, length> &CastToArray(T (&arr)[length])
+{
+  return *reinterpret_cast<Array<T, length> *>(arr);
+}
+
+template <typename T, uint16_t length> const Array<T, length> &CastToArray(const T (&arr)[length])
+{
+  return *reinterpret_cast<const Array<T, length> *>(arr);
+}
+
+template <typename E, typename T, uint16_t length>
+Array<E, length> &CastToArrayOfEnum(T (&arr)[length])
+{
+  static_assert(sizeof(E) == sizeof(T), "invalid enum type E");
+  return *reinterpret_cast<Array<E, length> *>(arr);
+}
+
+template <typename E, typename T, uint16_t length>
+const Array<E, length> &CastToArrayOfEnum(const T (&arr)[length])
+{
+  static_assert(sizeof(E) == sizeof(T), "invalid enum type E");
+  return *reinterpret_cast<const Array<E, length> *>(arr);
+}
+
+// Lexicographically compare two strings (possibly containing nulls), and
+// return true if the first is less than the second.
+static inline bool StringLessThan(const char *a_data, uoffset_t a_size, const char *b_data,
+                                  uoffset_t b_size)
+{
+  const auto cmp = memcmp(a_data, b_data, (std::min)(a_size, b_size));
+  return cmp == 0 ? a_size < b_size : cmp < 0;
+}
+
+struct String : public Vector<char>
+{
+  const char *c_str() const { return reinterpret_cast<const char *>(Data()); }
+  std::string str() const { return std::string(c_str(), size()); }
+
+#ifdef FLATBUFFERS_HAS_STRING_VIEW
+  flatbuffers::string_view string_view() const { return flatbuffers::string_view(c_str(), size()); }
+#endif // FLATBUFFERS_HAS_STRING_VIEW
+  // clang-format on
+
+  bool operator<(const String &o) const
+  {
+    return StringLessThan(this->data(), this->size(), o.data(), o.size());
+  }
+};
+
+// Convenience function to get std::string from a String returning an empty
+// string on null pointer.
+static inline std::string GetString(const String *str) { return str ? str->str() : ""; }
+
+// Convenience function to get char* from a String returning an empty string on
+// null pointer.
+static inline const char *GetCstring(const String *str) { return str ? str->c_str() : ""; }
+
+#ifdef FLATBUFFERS_HAS_STRING_VIEW
+// Convenience function to get string_view from a String returning an empty
+// string_view on null pointer.
+static inline flatbuffers::string_view GetStringView(const String *str)
+{
+  return str ? str->string_view() : flatbuffers::string_view();
+}
+#endif // FLATBUFFERS_HAS_STRING_VIEW
+
+// Allocator interface. This is flatbuffers-specific and meant only for
+// `vector_downward` usage.
+class Allocator
+{
+public:
+  virtual ~Allocator() {}
+
+  // Allocate `size` bytes of memory.
+  virtual uint8_t *allocate(size_t size) = 0;
+
+  // Deallocate `size` bytes of memory at `p` allocated by this allocator.
+  virtual void deallocate(uint8_t *p, size_t size) = 0;
+
+  // Reallocate `new_size` bytes of memory, replacing the old region of size
+  // `old_size` at `p`. In contrast to a normal realloc, this grows downwards,
+  // and is intended specifcally for `vector_downward` use.
+  // `in_use_back` and `in_use_front` indicate how much of `old_size` is
+  // actually in use at each end, and needs to be copied.
+  virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size, size_t new_size,
+                                       size_t in_use_back, size_t in_use_front)
+  {
+    FLATBUFFERS_ASSERT(new_size > old_size); // vector_downward only grows
+    uint8_t *new_p = allocate(new_size);
+    memcpy_downward(old_p, old_size, new_p, new_size, in_use_back, in_use_front);
+    deallocate(old_p, old_size);
+    return new_p;
+  }
+
+protected:
+  // Called by `reallocate_downward` to copy memory from `old_p` of `old_size`
+  // to `new_p` of `new_size`. Only memory of size `in_use_front` and
+  // `in_use_back` will be copied from the front and back of the old memory
+  // allocation.
+  void memcpy_downward(uint8_t *old_p, size_t old_size, uint8_t *new_p, size_t new_size,
+                       size_t in_use_back, size_t in_use_front)
+  {
+    memcpy(new_p + new_size - in_use_back, old_p + old_size - in_use_back, in_use_back);
+    memcpy(new_p, old_p, in_use_front);
+  }
+};
+
+// DefaultAllocator uses new/delete to allocate memory regions
+class DefaultAllocator : public Allocator
+{
+public:
+  uint8_t *allocate(size_t size) FLATBUFFERS_OVERRIDE { return new uint8_t[size]; }
+
+  void deallocate(uint8_t *p, size_t) FLATBUFFERS_OVERRIDE { delete[] p; }
+
+  static void dealloc(void *p, size_t) { delete[] static_cast<uint8_t *>(p); }
+};
+
+// These functions allow for a null allocator to mean use the default allocator,
+// as used by DetachedBuffer and vector_downward below.
+// This is to avoid having a statically or dynamically allocated default
+// allocator, or having to move it between the classes that may own it.
+inline uint8_t *Allocate(Allocator *allocator, size_t size)
+{
+  return allocator ? allocator->allocate(size) : DefaultAllocator().allocate(size);
+}
+
+inline void Deallocate(Allocator *allocator, uint8_t *p, size_t size)
+{
+  if (allocator)
+    allocator->deallocate(p, size);
+  else
+    DefaultAllocator().deallocate(p, size);
+}
+
+inline uint8_t *ReallocateDownward(Allocator *allocator, uint8_t *old_p, size_t old_size,
+                                   size_t new_size, size_t in_use_back, size_t in_use_front)
+{
+  return allocator
+           ? allocator->reallocate_downward(old_p, old_size, new_size, in_use_back, in_use_front)
+           : DefaultAllocator().reallocate_downward(old_p, old_size, new_size, in_use_back,
+                                                    in_use_front);
+}
+
+// DetachedBuffer is a finished flatbuffer memory region, detached from its
+// builder. The original memory region and allocator are also stored so that
+// the DetachedBuffer can manage the memory lifetime.
+class DetachedBuffer
+{
+public:
+  DetachedBuffer()
+    : allocator_(nullptr), own_allocator_(false), buf_(nullptr), reserved_(0), cur_(nullptr),
+      size_(0)
+  {
+  }
+
+  DetachedBuffer(Allocator *allocator, bool own_allocator, uint8_t *buf, size_t reserved,
+                 uint8_t *cur, size_t sz)
+    : allocator_(allocator), own_allocator_(own_allocator), buf_(buf), reserved_(reserved),
+      cur_(cur), size_(sz)
+  {
+  }
+
+#if !defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+  DetachedBuffer(DetachedBuffer &&other)
+    : allocator_(other.allocator_), own_allocator_(other.own_allocator_), buf_(other.buf_),
+      reserved_(other.reserved_), cur_(other.cur_), size_(other.size_)
+  {
+    other.reset();
+  }
+#endif // !defined(FLATBUFFERS_CPP98_STL)
+
+#if !defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+  DetachedBuffer &operator=(DetachedBuffer &&other)
+  {
+    if (this == &other)
+      return *this;
+
+    destroy();
+
+    allocator_ = other.allocator_;
+    own_allocator_ = other.own_allocator_;
+    buf_ = other.buf_;
+    reserved_ = other.reserved_;
+    cur_ = other.cur_;
+    size_ = other.size_;
+
+    other.reset();
+
+    return *this;
+  }
+#endif // !defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+
+  ~DetachedBuffer() { destroy(); }
+
+  const uint8_t *data() const { return cur_; }
+
+  uint8_t *data() { return cur_; }
+
+  size_t size() const { return size_; }
+
+#if 0 // disabled for now due to the ordering of classes in this header
+  template <class T>
+  bool Verify() const {
+    Verifier verifier(data(), size());
+    return verifier.Verify<T>(nullptr);
+  }
+
+  template <class T>
+  const T* GetRoot() const {
+    return flatbuffers::GetRoot<T>(data());
+  }
+
+  template <class T>
+  T* GetRoot() {
+    return flatbuffers::GetRoot<T>(data());
+  }
+#endif
+
+#if !defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+  // These may change access mode, leave these at end of public section
+  FLATBUFFERS_DELETE_FUNC(DetachedBuffer(const DetachedBuffer &other));
+  FLATBUFFERS_DELETE_FUNC(DetachedBuffer &operator=(const DetachedBuffer &other));
+#endif // !defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+
+protected:
+  Allocator *allocator_;
+  bool own_allocator_;
+  uint8_t *buf_;
+  size_t reserved_;
+  uint8_t *cur_;
+  size_t size_;
+
+  inline void destroy()
+  {
+    if (buf_)
+      Deallocate(allocator_, buf_, reserved_);
+    if (own_allocator_ && allocator_)
+    {
+      delete allocator_;
+    }
+    reset();
+  }
+
+  inline void reset()
+  {
+    allocator_ = nullptr;
+    own_allocator_ = false;
+    buf_ = nullptr;
+    reserved_ = 0;
+    cur_ = nullptr;
+    size_ = 0;
+  }
+};
+
+// This is a minimal replication of std::vector<uint8_t> functionality,
+// except growing from higher to lower addresses. i.e push_back() inserts data
+// in the lowest address in the vector.
+// Since this vector leaves the lower part unused, we support a "scratch-pad"
+// that can be stored there for temporary data, to share the allocated space.
+// Essentially, this supports 2 std::vectors in a single buffer.
+class vector_downward
+{
+public:
+  explicit vector_downward(size_t initial_size, Allocator *allocator, bool own_allocator,
+                           size_t buffer_minalign)
+    : allocator_(allocator), own_allocator_(own_allocator), initial_size_(initial_size),
+      buffer_minalign_(buffer_minalign), reserved_(0), buf_(nullptr), cur_(nullptr),
+      scratch_(nullptr)
+  {
+  }
+
+#if !defined(FLATBUFFERS_CPP98_STL)
+  vector_downward(vector_downward &&other)
+#else
+  vector_downward(vector_downward &other)
+#endif // defined(FLATBUFFERS_CPP98_STL)
+    // clang-format on
+    : allocator_(other.allocator_), own_allocator_(other.own_allocator_),
+      initial_size_(other.initial_size_), buffer_minalign_(other.buffer_minalign_),
+      reserved_(other.reserved_), buf_(other.buf_), cur_(other.cur_), scratch_(other.scratch_)
+  {
+    // No change in other.allocator_
+    // No change in other.initial_size_
+    // No change in other.buffer_minalign_
+    other.own_allocator_ = false;
+    other.reserved_ = 0;
+    other.buf_ = nullptr;
+    other.cur_ = nullptr;
+    other.scratch_ = nullptr;
+  }
+
+#if !defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+  vector_downward &operator=(vector_downward &&other)
+  {
+    // Move construct a temporary and swap idiom
+    vector_downward temp(std::move(other));
+    swap(temp);
+    return *this;
+  }
+#endif // defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+
+  ~vector_downward()
+  {
+    clear_buffer();
+    clear_allocator();
+  }
+
+  void reset()
+  {
+    clear_buffer();
+    clear();
+  }
+
+  void clear()
+  {
+    if (buf_)
+    {
+      cur_ = buf_ + reserved_;
+    }
+    else
+    {
+      reserved_ = 0;
+      cur_ = nullptr;
+    }
+    clear_scratch();
+  }
+
+  void clear_scratch() { scratch_ = buf_; }
+
+  void clear_allocator()
+  {
+    if (own_allocator_ && allocator_)
+    {
+      delete allocator_;
+    }
+    allocator_ = nullptr;
+    own_allocator_ = false;
+  }
+
+  void clear_buffer()
+  {
+    if (buf_)
+      Deallocate(allocator_, buf_, reserved_);
+    buf_ = nullptr;
+  }
+
+  // Relinquish the pointer to the caller.
+  uint8_t *release_raw(size_t &allocated_bytes, size_t &offset)
+  {
+    auto *buf = buf_;
+    allocated_bytes = reserved_;
+    offset = static_cast<size_t>(cur_ - buf_);
+
+    // release_raw only relinquishes the buffer ownership.
+    // Does not deallocate or reset the allocator. Destructor will do that.
+    buf_ = nullptr;
+    clear();
+    return buf;
+  }
+
+  // Relinquish the pointer to the caller.
+  DetachedBuffer release()
+  {
+    // allocator ownership (if any) is transferred to DetachedBuffer.
+    DetachedBuffer fb(allocator_, own_allocator_, buf_, reserved_, cur_, size());
+    if (own_allocator_)
+    {
+      allocator_ = nullptr;
+      own_allocator_ = false;
+    }
+    buf_ = nullptr;
+    clear();
+    return fb;
+  }
+
+  size_t ensure_space(size_t len)
+  {
+    FLATBUFFERS_ASSERT(cur_ >= scratch_ && scratch_ >= buf_);
+    if (len > static_cast<size_t>(cur_ - scratch_))
+    {
+      reallocate(len);
+    }
+    // Beyond this, signed offsets may not have enough range:
+    // (FlatBuffers > 2GB not supported).
+    FLATBUFFERS_ASSERT(size() < FLATBUFFERS_MAX_BUFFER_SIZE);
+    return len;
+  }
+
+  inline uint8_t *make_space(size_t len)
+  {
+    size_t space = ensure_space(len);
+    cur_ -= space;
+    return cur_;
+  }
+
+  // Returns nullptr if using the DefaultAllocator.
+  Allocator *get_custom_allocator() { return allocator_; }
+
+  uoffset_t size() const
+  {
+    return static_cast<uoffset_t>(reserved_ - static_cast<size_t>(cur_ - buf_));
+  }
+
+  uoffset_t scratch_size() const { return static_cast<uoffset_t>(scratch_ - buf_); }
+
+  size_t capacity() const { return reserved_; }
+
+  uint8_t *data() const
+  {
+    FLATBUFFERS_ASSERT(cur_);
+    return cur_;
+  }
+
+  uint8_t *scratch_data() const
+  {
+    FLATBUFFERS_ASSERT(buf_);
+    return buf_;
+  }
+
+  uint8_t *scratch_end() const
+  {
+    FLATBUFFERS_ASSERT(scratch_);
+    return scratch_;
+  }
+
+  uint8_t *data_at(size_t offset) const { return buf_ + reserved_ - offset; }
+
+  void push(const uint8_t *bytes, size_t num)
+  {
+    if (num > 0)
+    {
+      memcpy(make_space(num), bytes, num);
+    }
+  }
+
+  // Specialized version of push() that avoids memcpy call for small data.
+  template <typename T> void push_small(const T &little_endian_t)
+  {
+    make_space(sizeof(T));
+    *reinterpret_cast<T *>(cur_) = little_endian_t;
+  }
+
+  template <typename T> void scratch_push_small(const T &t)
+  {
+    ensure_space(sizeof(T));
+    *reinterpret_cast<T *>(scratch_) = t;
+    scratch_ += sizeof(T);
+  }
+
+  // fill() is most frequently called with small byte counts (<= 4),
+  // which is why we're using loops rather than calling memset.
+  void fill(size_t zero_pad_bytes)
+  {
+    make_space(zero_pad_bytes);
+    for (size_t i = 0; i < zero_pad_bytes; i++)
+      cur_[i] = 0;
+  }
+
+  // Version for when we know the size is larger.
+  // Precondition: zero_pad_bytes > 0
+  void fill_big(size_t zero_pad_bytes) { memset(make_space(zero_pad_bytes), 0, zero_pad_bytes); }
+
+  void pop(size_t bytes_to_remove) { cur_ += bytes_to_remove; }
+  void scratch_pop(size_t bytes_to_remove) { scratch_ -= bytes_to_remove; }
+
+  void swap(vector_downward &other)
+  {
+    using std::swap;
+    swap(allocator_, other.allocator_);
+    swap(own_allocator_, other.own_allocator_);
+    swap(initial_size_, other.initial_size_);
+    swap(buffer_minalign_, other.buffer_minalign_);
+    swap(reserved_, other.reserved_);
+    swap(buf_, other.buf_);
+    swap(cur_, other.cur_);
+    swap(scratch_, other.scratch_);
+  }
+
+  void swap_allocator(vector_downward &other)
+  {
+    using std::swap;
+    swap(allocator_, other.allocator_);
+    swap(own_allocator_, other.own_allocator_);
+  }
+
+private:
+  // You shouldn't really be copying instances of this class.
+  FLATBUFFERS_DELETE_FUNC(vector_downward(const vector_downward &));
+  FLATBUFFERS_DELETE_FUNC(vector_downward &operator=(const vector_downward &));
+
+  Allocator *allocator_;
+  bool own_allocator_;
+  size_t initial_size_;
+  size_t buffer_minalign_;
+  size_t reserved_;
+  uint8_t *buf_;
+  uint8_t *cur_;     // Points at location between empty (below) and used (above).
+  uint8_t *scratch_; // Points to the end of the scratchpad in use.
+
+  void reallocate(size_t len)
+  {
+    auto old_reserved = reserved_;
+    auto old_size = size();
+    auto old_scratch_size = scratch_size();
+    reserved_ += (std::max)(len, old_reserved ? old_reserved / 2 : initial_size_);
+    reserved_ = (reserved_ + buffer_minalign_ - 1) & ~(buffer_minalign_ - 1);
+    if (buf_)
+    {
+      buf_ =
+        ReallocateDownward(allocator_, buf_, old_reserved, reserved_, old_size, old_scratch_size);
+    }
+    else
+    {
+      buf_ = Allocate(allocator_, reserved_);
+    }
+    cur_ = buf_ + reserved_ - old_size;
+    scratch_ = buf_ + old_scratch_size;
+  }
+};
+
+// Converts a Field ID to a virtual table offset.
+inline voffset_t FieldIndexToOffset(voffset_t field_id)
+{
+  // Should correspond to what EndTable() below builds up.
+  const int fixed_fields = 2; // Vtable size and Object Size.
+  return static_cast<voffset_t>((field_id + fixed_fields) * sizeof(voffset_t));
+}
+
+template <typename T, typename Alloc> const T *data(const std::vector<T, Alloc> &v)
+{
+  // Eventually the returned pointer gets passed down to memcpy, so
+  // we need it to be non-null to avoid undefined behavior.
+  static uint8_t t;
+  return v.empty() ? reinterpret_cast<const T *>(&t) : &v.front();
+}
+template <typename T, typename Alloc> T *data(std::vector<T, Alloc> &v)
+{
+  // Eventually the returned pointer gets passed down to memcpy, so
+  // we need it to be non-null to avoid undefined behavior.
+  static uint8_t t;
+  return v.empty() ? reinterpret_cast<T *>(&t) : &v.front();
+}
+
+/// @endcond
+
+/// @addtogroup flatbuffers_cpp_api
+/// @{
+/// @class FlatBufferBuilder
+/// @brief Helper class to hold data needed in creation of a FlatBuffer.
+/// To serialize data, you typically call one of the `Create*()` functions in
+/// the generated code, which in turn call a sequence of `StartTable`/
+/// `PushElement`/`AddElement`/`EndTable`, or the builtin `CreateString`/
+/// `CreateVector` functions. Do this is depth-first order to build up a tree to
+/// the root. `Finish()` wraps up the buffer ready for transport.
+class FlatBufferBuilder
+{
+public:
+  /// @brief Default constructor for FlatBufferBuilder.
+  /// @param[in] initial_size The initial size of the buffer, in bytes. Defaults
+  /// to `1024`.
+  /// @param[in] allocator An `Allocator` to use. If null will use
+  /// `DefaultAllocator`.
+  /// @param[in] own_allocator Whether the builder/vector should own the
+  /// allocator. Defaults to / `false`.
+  /// @param[in] buffer_minalign Force the buffer to be aligned to the given
+  /// minimum alignment upon reallocation. Only needed if you intend to store
+  /// types with custom alignment AND you wish to read the buffer in-place
+  /// directly after creation.
+  explicit FlatBufferBuilder(size_t initial_size = 1024, Allocator *allocator = nullptr,
+                             bool own_allocator = false,
+                             size_t buffer_minalign = AlignOf<largest_scalar_t>())
+    : buf_(initial_size, allocator, own_allocator, buffer_minalign), num_field_loc(0),
+      max_voffset_(0), nested(false), finished(false), minalign_(1), force_defaults_(false),
+      dedup_vtables_(true), string_pool(nullptr)
+  {
+    EndianCheck();
+  }
+
+/// @brief Move constructor for FlatBufferBuilder.
+#if !defined(FLATBUFFERS_CPP98_STL)
+  FlatBufferBuilder(FlatBufferBuilder &&other)
+#else
+  FlatBufferBuilder(FlatBufferBuilder &other)
+#endif // #if !defined(FLATBUFFERS_CPP98_STL)
+    : buf_(1024, nullptr, false, AlignOf<largest_scalar_t>()), num_field_loc(0), max_voffset_(0),
+      nested(false), finished(false), minalign_(1), force_defaults_(false), dedup_vtables_(true),
+      string_pool(nullptr)
+  {
+    EndianCheck();
+    // Default construct and swap idiom.
+    // Lack of delegating constructors in vs2010 makes it more verbose than needed.
+    Swap(other);
+  }
+
+#if !defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+  /// @brief Move assignment operator for FlatBufferBuilder.
+  FlatBufferBuilder &operator=(FlatBufferBuilder &&other)
+  {
+    // Move construct a temporary and swap idiom
+    FlatBufferBuilder temp(std::move(other));
+    Swap(temp);
+    return *this;
+  }
+#endif // defined(FLATBUFFERS_CPP98_STL)
+  // clang-format on
+
+  void Swap(FlatBufferBuilder &other)
+  {
+    using std::swap;
+    buf_.swap(other.buf_);
+    swap(num_field_loc, other.num_field_loc);
+    swap(max_voffset_, other.max_voffset_);
+    swap(nested, other.nested);
+    swap(finished, other.finished);
+    swap(minalign_, other.minalign_);
+    swap(force_defaults_, other.force_defaults_);
+    swap(dedup_vtables_, other.dedup_vtables_);
+    swap(string_pool, other.string_pool);
+  }
+
+  ~FlatBufferBuilder()
+  {
+    if (string_pool)
+      delete string_pool;
+  }
+
+  void Reset()
+  {
+    Clear();      // clear builder state
+    buf_.reset(); // deallocate buffer
+  }
+
+  /// @brief Reset all the state in this FlatBufferBuilder so it can be reused
+  /// to construct another buffer.
+  void Clear()
+  {
+    ClearOffsets();
+    buf_.clear();
+    nested = false;
+    finished = false;
+    minalign_ = 1;
+    if (string_pool)
+      string_pool->clear();
+  }
+
+  /// @brief The current size of the serialized buffer, counting from the end.
+  /// @return Returns an `uoffset_t` with the current size of the buffer.
+  uoffset_t GetSize() const { return buf_.size(); }
+
+  /// @brief Get the serialized buffer (after you call `Finish()`).
+  /// @return Returns an `uint8_t` pointer to the FlatBuffer data inside the
+  /// buffer.
+  uint8_t *GetBufferPointer() const
+  {
+    Finished();
+    return buf_.data();
+  }
+
+  /// @brief Get the serialized buffer (after you call `Finish()`) as a span.
+  /// @return Returns a constructed flatbuffers::span that is a view over the
+  /// FlatBuffer data inside the buffer.
+  flatbuffers::span<uint8_t> GetBufferSpan() const
+  {
+    Finished();
+    return flatbuffers::span<uint8_t>(buf_.data(), buf_.size());
+  }
+
+  /// @brief Get a pointer to an unfinished buffer.
+  /// @return Returns a `uint8_t` pointer to the unfinished buffer.
+  uint8_t *GetCurrentBufferPointer() const { return buf_.data(); }
+
+  /// @brief Get the released pointer to the serialized buffer.
+  /// @warning Do NOT attempt to use this FlatBufferBuilder afterwards!
+  /// @return A `FlatBuffer` that owns the buffer and its allocator and
+  /// behaves similar to a `unique_ptr` with a deleter.
+  FLATBUFFERS_ATTRIBUTE(deprecated("use Release() instead"))
+  DetachedBuffer ReleaseBufferPointer()
+  {
+    Finished();
+    return buf_.release();
+  }
+
+  /// @brief Get the released DetachedBuffer.
+  /// @return A `DetachedBuffer` that owns the buffer and its allocator.
+  DetachedBuffer Release()
+  {
+    Finished();
+    return buf_.release();
+  }
+
+  /// @brief Get the released pointer to the serialized buffer.
+  /// @param size The size of the memory block containing
+  /// the serialized `FlatBuffer`.
+  /// @param offset The offset from the released pointer where the finished
+  /// `FlatBuffer` starts.
+  /// @return A raw pointer to the start of the memory block containing
+  /// the serialized `FlatBuffer`.
+  /// @remark If the allocator is owned, it gets deleted when the destructor is
+  /// called..
+  uint8_t *ReleaseRaw(size_t &size, size_t &offset)
+  {
+    Finished();
+    return buf_.release_raw(size, offset);
+  }
+
+  /// @brief get the minimum alignment this buffer needs to be accessed
+  /// properly. This is only known once all elements have been written (after
+  /// you call Finish()). You can use this information if you need to embed
+  /// a FlatBuffer in some other buffer, such that you can later read it
+  /// without first having to copy it into its own buffer.
+  size_t GetBufferMinAlignment() const
+  {
+    Finished();
+    return minalign_;
+  }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  void Finished() const
+  {
+    // If you get this assert, you're attempting to get access a buffer
+    // which hasn't been finished yet. Be sure to call
+    // FlatBufferBuilder::Finish with your root table.
+    // If you really need to access an unfinished buffer, call
+    // GetCurrentBufferPointer instead.
+    FLATBUFFERS_ASSERT(finished);
+  }
+  /// @endcond
+
+  /// @brief In order to save space, fields that are set to their default value
+  /// don't get serialized into the buffer.
+  /// @param[in] fd When set to `true`, always serializes default values that
+  /// are set. Optional fields which are not set explicitly, will still not be
+  /// serialized.
+  void ForceDefaults(bool fd) { force_defaults_ = fd; }
+
+  /// @brief By default vtables are deduped in order to save space.
+  /// @param[in] dedup When set to `true`, dedup vtables.
+  void DedupVtables(bool dedup) { dedup_vtables_ = dedup; }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  void Pad(size_t num_bytes) { buf_.fill(num_bytes); }
+
+  void TrackMinAlign(size_t elem_size)
+  {
+    if (elem_size > minalign_)
+      minalign_ = elem_size;
+  }
+
+  void Align(size_t elem_size)
+  {
+    TrackMinAlign(elem_size);
+    buf_.fill(PaddingBytes(buf_.size(), elem_size));
+  }
+
+  void PushFlatBuffer(const uint8_t *bytes, size_t size)
+  {
+    PushBytes(bytes, size);
+    finished = true;
+  }
+
+  void PushBytes(const uint8_t *bytes, size_t size) { buf_.push(bytes, size); }
+
+  void PopBytes(size_t amount) { buf_.pop(amount); }
+
+  template <typename T> void AssertScalarT()
+  {
+    // The code assumes power of 2 sizes and endian-swap-ability.
+    static_assert(flatbuffers::is_scalar<T>::value, "T must be a scalar type");
+  }
+
+  // Write a single aligned scalar to the buffer
+  template <typename T> uoffset_t PushElement(T element)
+  {
+    AssertScalarT<T>();
+    T litle_endian_element = EndianScalar(element);
+    Align(sizeof(T));
+    buf_.push_small(litle_endian_element);
+    return GetSize();
+  }
+
+  template <typename T> uoffset_t PushElement(Offset<T> off)
+  {
+    // Special case for offsets: see ReferTo below.
+    return PushElement(ReferTo(off.o));
+  }
+
+  // When writing fields, we track where they are, so we can create correct
+  // vtables later.
+  void TrackField(voffset_t field, uoffset_t off)
+  {
+    FieldLoc fl = {off, field};
+    buf_.scratch_push_small(fl);
+    num_field_loc++;
+    max_voffset_ = (std::max)(max_voffset_, field);
+  }
+
+  // Like PushElement, but additionally tracks the field this represents.
+  template <typename T> void AddElement(voffset_t field, T e, T def)
+  {
+    // We don't serialize values equal to the default.
+    if (IsTheSameAs(e, def) && !force_defaults_)
+      return;
+    auto off = PushElement(e);
+    TrackField(field, off);
+  }
+
+  template <typename T> void AddElement(voffset_t field, T e)
+  {
+    auto off = PushElement(e);
+    TrackField(field, off);
+  }
+
+  template <typename T> void AddOffset(voffset_t field, Offset<T> off)
+  {
+    if (off.IsNull())
+      return; // Don't store.
+    AddElement(field, ReferTo(off.o), static_cast<uoffset_t>(0));
+  }
+
+  template <typename T> void AddStruct(voffset_t field, const T *structptr)
+  {
+    if (!structptr)
+      return; // Default, don't store.
+    Align(AlignOf<T>());
+    buf_.push_small(*structptr);
+    TrackField(field, GetSize());
+  }
+
+  void AddStructOffset(voffset_t field, uoffset_t off) { TrackField(field, off); }
+
+  // Offsets initially are relative to the end of the buffer (downwards).
+  // This function converts them to be relative to the current location
+  // in the buffer (when stored here), pointing upwards.
+  uoffset_t ReferTo(uoffset_t off)
+  {
+    // Align to ensure GetSize() below is correct.
+    Align(sizeof(uoffset_t));
+    // Offset must refer to something already in buffer.
+    FLATBUFFERS_ASSERT(off && off <= GetSize());
+    return GetSize() - off + static_cast<uoffset_t>(sizeof(uoffset_t));
+  }
+
+  void NotNested()
+  {
+    // If you hit this, you're trying to construct a Table/Vector/String
+    // during the construction of its parent table (between the MyTableBuilder
+    // and table.Finish().
+    // Move the creation of these sub-objects to above the MyTableBuilder to
+    // not get this assert.
+    // Ignoring this assert may appear to work in simple cases, but the reason
+    // it is here is that storing objects in-line may cause vtable offsets
+    // to not fit anymore. It also leads to vtable duplication.
+    FLATBUFFERS_ASSERT(!nested);
+    // If you hit this, fields were added outside the scope of a table.
+    FLATBUFFERS_ASSERT(!num_field_loc);
+  }
+
+  // From generated code (or from the parser), we call StartTable/EndTable
+  // with a sequence of AddElement calls in between.
+  uoffset_t StartTable()
+  {
+    NotNested();
+    nested = true;
+    return GetSize();
+  }
+
+  // This finishes one serialized object by generating the vtable if it's a
+  // table, comparing it against existing vtables, and writing the
+  // resulting vtable offset.
+  uoffset_t EndTable(uoffset_t start)
+  {
+    // If you get this assert, a corresponding StartTable wasn't called.
+    FLATBUFFERS_ASSERT(nested);
+    // Write the vtable offset, which is the start of any Table.
+    // We fill it's value later.
+    auto vtableoffsetloc = PushElement<soffset_t>(0);
+    // Write a vtable, which consists entirely of voffset_t elements.
+    // It starts with the number of offsets, followed by a type id, followed
+    // by the offsets themselves. In reverse:
+    // Include space for the last offset and ensure empty tables have a
+    // minimum size.
+    max_voffset_ =
+      (std::max)(static_cast<voffset_t>(max_voffset_ + sizeof(voffset_t)), FieldIndexToOffset(0));
+    buf_.fill_big(max_voffset_);
+    auto table_object_size = vtableoffsetloc - start;
+    // Vtable use 16bit offsets.
+    FLATBUFFERS_ASSERT(table_object_size < 0x10000);
+    WriteScalar<voffset_t>(buf_.data() + sizeof(voffset_t),
+                           static_cast<voffset_t>(table_object_size));
+    WriteScalar<voffset_t>(buf_.data(), max_voffset_);
+    // Write the offsets into the table
+    for (auto it = buf_.scratch_end() - num_field_loc * sizeof(FieldLoc); it < buf_.scratch_end();
+         it += sizeof(FieldLoc))
+    {
+      auto field_location = reinterpret_cast<FieldLoc *>(it);
+      auto pos = static_cast<voffset_t>(vtableoffsetloc - field_location->off);
+      // If this asserts, it means you've set a field twice.
+      FLATBUFFERS_ASSERT(!ReadScalar<voffset_t>(buf_.data() + field_location->id));
+      WriteScalar<voffset_t>(buf_.data() + field_location->id, pos);
+    }
+    ClearOffsets();
+    auto vt1 = reinterpret_cast<voffset_t *>(buf_.data());
+    auto vt1_size = ReadScalar<voffset_t>(vt1);
+    auto vt_use = GetSize();
+    // See if we already have generated a vtable with this exact same
+    // layout before. If so, make it point to the old one, remove this one.
+    if (dedup_vtables_)
+    {
+      for (auto it = buf_.scratch_data(); it < buf_.scratch_end(); it += sizeof(uoffset_t))
+      {
+        auto vt_offset_ptr = reinterpret_cast<uoffset_t *>(it);
+        auto vt2 = reinterpret_cast<voffset_t *>(buf_.data_at(*vt_offset_ptr));
+        auto vt2_size = ReadScalar<voffset_t>(vt2);
+        if (vt1_size != vt2_size || 0 != memcmp(vt2, vt1, vt1_size))
+          continue;
+        vt_use = *vt_offset_ptr;
+        buf_.pop(GetSize() - vtableoffsetloc);
+        break;
+      }
+    }
+    // If this is a new vtable, remember it.
+    if (vt_use == GetSize())
+    {
+      buf_.scratch_push_small(vt_use);
+    }
+    // Fill the vtable offset we created above.
+    // The offset points from the beginning of the object to where the
+    // vtable is stored.
+    // Offsets default direction is downward in memory for future format
+    // flexibility (storing all vtables at the start of the file).
+    WriteScalar(buf_.data_at(vtableoffsetloc),
+                static_cast<soffset_t>(vt_use) - static_cast<soffset_t>(vtableoffsetloc));
+
+    nested = false;
+    return vtableoffsetloc;
+  }
+
+  FLATBUFFERS_ATTRIBUTE(deprecated("call the version above instead"))
+  uoffset_t EndTable(uoffset_t start, voffset_t /*numfields*/) { return EndTable(start); }
+
+  // This checks a required field has been set in a given table that has
+  // just been constructed.
+  template <typename T> void Required(Offset<T> table, voffset_t field);
+
+  uoffset_t StartStruct(size_t alignment)
+  {
+    Align(alignment);
+    return GetSize();
+  }
+
+  uoffset_t EndStruct() { return GetSize(); }
+
+  void ClearOffsets()
+  {
+    buf_.scratch_pop(num_field_loc * sizeof(FieldLoc));
+    num_field_loc = 0;
+    max_voffset_ = 0;
+  }
+
+  // Aligns such that when "len" bytes are written, an object can be written
+  // after it with "alignment" without padding.
+  void PreAlign(size_t len, size_t alignment)
+  {
+    TrackMinAlign(alignment);
+    buf_.fill(PaddingBytes(GetSize() + len, alignment));
+  }
+  template <typename T> void PreAlign(size_t len)
+  {
+    AssertScalarT<T>();
+    PreAlign(len, sizeof(T));
+  }
+  /// @endcond
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const char pointer to the data to be stored as a string.
+  /// @param[in] len The number of bytes that should be stored from `str`.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(const char *str, size_t len)
+  {
+    NotNested();
+    PreAlign<uoffset_t>(len + 1); // Always 0-terminated.
+    buf_.fill(1);
+    PushBytes(reinterpret_cast<const uint8_t *>(str), len);
+    PushElement(static_cast<uoffset_t>(len));
+    return Offset<String>(GetSize());
+  }
+
+  /// @brief Store a string in the buffer, which is null-terminated.
+  /// @param[in] str A const char pointer to a C-string to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(const char *str) { return CreateString(str, strlen(str)); }
+
+  /// @brief Store a string in the buffer, which is null-terminated.
+  /// @param[in] str A char pointer to a C-string to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(char *str) { return CreateString(str, strlen(str)); }
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const reference to a std::string to store in the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(const std::string &str)
+  {
+    return CreateString(str.c_str(), str.length());
+  }
+#ifdef FLATBUFFERS_HAS_STRING_VIEW
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const string_view to copy in to the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(flatbuffers::string_view str)
+  {
+    return CreateString(str.data(), str.size());
+  }
+#endif // FLATBUFFERS_HAS_STRING_VIEW
+  // clang-format on
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const pointer to a `String` struct to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts
+  Offset<String> CreateString(const String *str)
+  {
+    return str ? CreateString(str->c_str(), str->size()) : 0;
+  }
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const reference to a std::string like type with support
+  /// of T::c_str() and T::length() to store in the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  template <typename T> Offset<String> CreateString(const T &str)
+  {
+    return CreateString(str.c_str(), str.length());
+  }
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string.
+  /// @param[in] str A const char pointer to the data to be stored as a string.
+  /// @param[in] len The number of bytes that should be stored from `str`.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateSharedString(const char *str, size_t len)
+  {
+    if (!string_pool)
+      string_pool = new StringOffsetMap(StringOffsetCompare(buf_));
+    auto size_before_string = buf_.size();
+    // Must first serialize the string, since the set is all offsets into
+    // buffer.
+    auto off = CreateString(str, len);
+    auto it = string_pool->find(off);
+    // If it exists we reuse existing serialized data!
+    if (it != string_pool->end())
+    {
+      // We can remove the string we serialized.
+      buf_.pop(buf_.size() - size_before_string);
+      return *it;
+    }
+    // Record this string for future use.
+    string_pool->insert(off);
+    return off;
+  }
+
+#ifdef FLATBUFFERS_HAS_STRING_VIEW
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string.
+  /// @param[in] str A const std::string_view to store in the buffer.
+  /// @return Returns the offset in the buffer where the string starts
+  Offset<String> CreateSharedString(const flatbuffers::string_view str)
+  {
+    return CreateSharedString(str.data(), str.size());
+  }
+#else
+  /// @brief Store a string in the buffer, which null-terminated.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string.
+  /// @param[in] str A const char pointer to a C-string to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateSharedString(const char *str)
+  {
+    return CreateSharedString(str, strlen(str));
+  }
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string.
+  /// @param[in] str A const reference to a std::string to store in the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateSharedString(const std::string &str)
+  {
+    return CreateSharedString(str.c_str(), str.length());
+  }
+#endif
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string.
+  /// @param[in] str A const pointer to a `String` struct to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts
+  Offset<String> CreateSharedString(const String *str)
+  {
+    return CreateSharedString(str->c_str(), str->size());
+  }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  uoffset_t EndVector(size_t len)
+  {
+    FLATBUFFERS_ASSERT(nested); // Hit if no corresponding StartVector.
+    nested = false;
+    return PushElement(static_cast<uoffset_t>(len));
+  }
+
+  void StartVector(size_t len, size_t elemsize)
+  {
+    NotNested();
+    nested = true;
+    PreAlign<uoffset_t>(len * elemsize);
+    PreAlign(len * elemsize, elemsize); // Just in case elemsize > uoffset_t.
+  }
+
+  // Call this right before StartVector/CreateVector if you want to force the
+  // alignment to be something different than what the element size would
+  // normally dictate.
+  // This is useful when storing a nested_flatbuffer in a vector of bytes,
+  // or when storing SIMD floats, etc.
+  void ForceVectorAlignment(size_t len, size_t elemsize, size_t alignment)
+  {
+    FLATBUFFERS_ASSERT(VerifyAlignmentRequirements(alignment));
+    PreAlign(len * elemsize, alignment);
+  }
+
+  // Similar to ForceVectorAlignment but for String fields.
+  void ForceStringAlignment(size_t len, size_t alignment)
+  {
+    FLATBUFFERS_ASSERT(VerifyAlignmentRequirements(alignment));
+    PreAlign((len + 1) * sizeof(char), alignment);
+  }
+
+  /// @endcond
+
+  /// @brief Serialize an array into a FlatBuffer `vector`.
+  /// @tparam T The data type of the array elements.
+  /// @param[in] v A pointer to the array of type `T` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T> Offset<Vector<T>> CreateVector(const T *v, size_t len)
+  {
+    // If this assert hits, you're specifying a template argument that is
+    // causing the wrong overload to be selected, remove it.
+    AssertScalarT<T>();
+    StartVector(len, sizeof(T));
+    if (len == 0)
+    {
+      return Offset<Vector<T>>(EndVector(len));
+    }
+
+#if FLATBUFFERS_LITTLEENDIAN
+    PushBytes(reinterpret_cast<const uint8_t *>(v), len * sizeof(T));
+#else
+    if (sizeof(T) == 1)
+    {
+      PushBytes(reinterpret_cast<const uint8_t *>(v), len);
+    }
+    else
+    {
+      for (auto i = len; i > 0;)
+      {
+        PushElement(v[--i]);
+      }
+    }
+#endif
+    // clang-format on
+    return Offset<Vector<T>>(EndVector(len));
+  }
+
+  template <typename T> Offset<Vector<Offset<T>>> CreateVector(const Offset<T> *v, size_t len)
+  {
+    StartVector(len, sizeof(Offset<T>));
+    for (auto i = len; i > 0;)
+    {
+      PushElement(v[--i]);
+    }
+    return Offset<Vector<Offset<T>>>(EndVector(len));
+  }
+
+  /// @brief Serialize a `std::vector` into a FlatBuffer `vector`.
+  /// @tparam T The data type of the `std::vector` elements.
+  /// @param v A const reference to the `std::vector` to serialize into the
+  /// buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T> Offset<Vector<T>> CreateVector(const std::vector<T> &v)
+  {
+    return CreateVector(data(v), v.size());
+  }
+
+  // vector<bool> may be implemented using a bit-set, so we can't access it as
+  // an array. Instead, read elements manually.
+  // Background: https://isocpp.org/blog/2012/11/on-vectorbool
+  Offset<Vector<uint8_t>> CreateVector(const std::vector<bool> &v)
+  {
+    StartVector(v.size(), sizeof(uint8_t));
+    for (auto i = v.size(); i > 0;)
+    {
+      PushElement(static_cast<uint8_t>(v[--i]));
+    }
+    return Offset<Vector<uint8_t>>(EndVector(v.size()));
+  }
+
+#ifndef FLATBUFFERS_CPP98_STL
+  /// @brief Serialize values returned by a function into a FlatBuffer `vector`.
+  /// This is a convenience function that takes care of iteration for you.
+  /// @tparam T The data type of the `std::vector` elements.
+  /// @param f A function that takes the current iteration 0..vector_size-1 and
+  /// returns any type that you can construct a FlatBuffers vector out of.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T>
+  Offset<Vector<T>> CreateVector(size_t vector_size, const std::function<T(size_t i)> &f)
+  {
+    std::vector<T> elems(vector_size);
+    for (size_t i = 0; i < vector_size; i++)
+      elems[i] = f(i);
+    return CreateVector(elems);
+  }
+#endif
+  // clang-format on
+
+  /// @brief Serialize values returned by a function into a FlatBuffer `vector`.
+  /// This is a convenience function that takes care of iteration for you.
+  /// @tparam T The data type of the `std::vector` elements.
+  /// @param f A function that takes the current iteration 0..vector_size-1,
+  /// and the state parameter returning any type that you can construct a
+  /// FlatBuffers vector out of.
+  /// @param state State passed to f.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T, typename F, typename S>
+  Offset<Vector<T>> CreateVector(size_t vector_size, F f, S *state)
+  {
+    std::vector<T> elems(vector_size);
+    for (size_t i = 0; i < vector_size; i++)
+      elems[i] = f(i, state);
+    return CreateVector(elems);
+  }
+
+  /// @brief Serialize a `std::vector<std::string>` into a FlatBuffer `vector`.
+  /// This is a convenience function for a common case.
+  /// @param v A const reference to the `std::vector` to serialize into the
+  /// buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  Offset<Vector<Offset<String>>> CreateVectorOfStrings(const std::vector<std::string> &v)
+  {
+    std::vector<Offset<String>> offsets(v.size());
+    for (size_t i = 0; i < v.size(); i++)
+      offsets[i] = CreateString(v[i]);
+    return CreateVector(offsets);
+  }
+
+  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @param[in] v A pointer to the array of type `T` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T> Offset<Vector<const T *>> CreateVectorOfStructs(const T *v, size_t len)
+  {
+    StartVector(len * sizeof(T) / AlignOf<T>(), AlignOf<T>());
+    PushBytes(reinterpret_cast<const uint8_t *>(v), sizeof(T) * len);
+    return Offset<Vector<const T *>>(EndVector(len));
+  }
+
+  /// @brief Serialize an array of native structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @tparam S The data type of the native struct array elements.
+  /// @param[in] v A pointer to the array of type `S` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @param[in] pack_func Pointer to a function to convert the native struct
+  /// to the FlatBuffer struct.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfNativeStructs(const S *v, size_t len,
+                                                        T((*const pack_func)(const S &)))
+  {
+    FLATBUFFERS_ASSERT(pack_func);
+    std::vector<T> vv(len);
+    std::transform(v, v + len, vv.begin(), pack_func);
+    return CreateVectorOfStructs<T>(data(vv), vv.size());
+  }
+
+  /// @brief Serialize an array of native structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @tparam S The data type of the native struct array elements.
+  /// @param[in] v A pointer to the array of type `S` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfNativeStructs(const S *v, size_t len)
+  {
+    extern T Pack(const S &);
+    return CreateVectorOfNativeStructs(v, len, Pack);
+  }
+
+#ifndef FLATBUFFERS_CPP98_STL
+  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @param[in] filler A function that takes the current iteration 0..vector_size-1
+  /// and a pointer to the struct that must be filled.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  /// This is mostly useful when flatbuffers are generated with mutation
+  /// accessors.
+  template <typename T>
+  Offset<Vector<const T *>> CreateVectorOfStructs(size_t vector_size,
+                                                  const std::function<void(size_t i, T *)> &filler)
+  {
+    T *structs = StartVectorOfStructs<T>(vector_size);
+    for (size_t i = 0; i < vector_size; i++)
+    {
+      filler(i, structs);
+      structs++;
+    }
+    return EndVectorOfStructs<T>(vector_size);
+  }
+#endif
+  // clang-format on
+
+  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @param[in] f A function that takes the current iteration 0..vector_size-1,
+  /// a pointer to the struct that must be filled and the state argument.
+  /// @param[in] state Arbitrary state to pass to f.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  /// This is mostly useful when flatbuffers are generated with mutation
+  /// accessors.
+  template <typename T, typename F, typename S>
+  Offset<Vector<const T *>> CreateVectorOfStructs(size_t vector_size, F f, S *state)
+  {
+    T *structs = StartVectorOfStructs<T>(vector_size);
+    for (size_t i = 0; i < vector_size; i++)
+    {
+      f(i, structs, state);
+      structs++;
+    }
+    return EndVectorOfStructs<T>(vector_size);
+  }
+
+  /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T, typename Alloc>
+  Offset<Vector<const T *>> CreateVectorOfStructs(const std::vector<T, Alloc> &v)
+  {
+    return CreateVectorOfStructs(data(v), v.size());
+  }
+
+  /// @brief Serialize a `std::vector` of native structs into a FlatBuffer
+  /// `vector`.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @tparam S The data type of the `std::vector` native struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @param[in] pack_func Pointer to a function to convert the native struct
+  /// to the FlatBuffer struct.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfNativeStructs(const std::vector<S> &v,
+                                                        T((*const pack_func)(const S &)))
+  {
+    return CreateVectorOfNativeStructs<T, S>(data(v), v.size(), pack_func);
+  }
+
+  /// @brief Serialize a `std::vector` of native structs into a FlatBuffer
+  /// `vector`.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @tparam S The data type of the `std::vector` native struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfNativeStructs(const std::vector<S> &v)
+  {
+    return CreateVectorOfNativeStructs<T, S>(data(v), v.size());
+  }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  template <typename T> struct StructKeyComparator
+  {
+    bool operator()(const T &a, const T &b) const { return a.KeyCompareLessThan(&b); }
+
+    FLATBUFFERS_DELETE_FUNC(StructKeyComparator &operator=(const StructKeyComparator &));
+  };
+  /// @endcond
+
+  /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector`
+  /// in sorted order.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T> Offset<Vector<const T *>> CreateVectorOfSortedStructs(std::vector<T> *v)
+  {
+    return CreateVectorOfSortedStructs(data(*v), v->size());
+  }
+
+  /// @brief Serialize a `std::vector` of native structs into a FlatBuffer
+  /// `vector` in sorted order.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @tparam S The data type of the `std::vector` native struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfSortedNativeStructs(std::vector<S> *v)
+  {
+    return CreateVectorOfSortedNativeStructs<T, S>(data(*v), v->size());
+  }
+
+  /// @brief Serialize an array of structs into a FlatBuffer `vector` in sorted
+  /// order.
+  /// @tparam T The data type of the struct array elements.
+  /// @param[in] v A pointer to the array of type `T` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T> Offset<Vector<const T *>> CreateVectorOfSortedStructs(T *v, size_t len)
+  {
+    std::sort(v, v + len, StructKeyComparator<T>());
+    return CreateVectorOfStructs(v, len);
+  }
+
+  /// @brief Serialize an array of native structs into a FlatBuffer `vector` in
+  /// sorted order.
+  /// @tparam T The data type of the struct array elements.
+  /// @tparam S The data type of the native struct array elements.
+  /// @param[in] v A pointer to the array of type `S` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfSortedNativeStructs(S *v, size_t len)
+  {
+    extern T Pack(const S &);
+    typedef T (*Pack_t)(const S &);
+    std::vector<T> vv(len);
+    std::transform(v, v + len, vv.begin(), static_cast<Pack_t &>(Pack));
+    return CreateVectorOfSortedStructs<T>(vv, len);
+  }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  template <typename T> struct TableKeyComparator
+  {
+    TableKeyComparator(vector_downward &buf) : buf_(buf) {}
+    TableKeyComparator(const TableKeyComparator &other) : buf_(other.buf_) {}
+    bool operator()(const Offset<T> &a, const Offset<T> &b) const
+    {
+      auto table_a = reinterpret_cast<T *>(buf_.data_at(a.o));
+      auto table_b = reinterpret_cast<T *>(buf_.data_at(b.o));
+      return table_a->KeyCompareLessThan(table_b);
+    }
+    vector_downward &buf_;
+
+  private:
+    FLATBUFFERS_DELETE_FUNC(TableKeyComparator &operator=(const TableKeyComparator &other));
+  };
+  /// @endcond
+
+  /// @brief Serialize an array of `table` offsets as a `vector` in the buffer
+  /// in sorted order.
+  /// @tparam T The data type that the offset refers to.
+  /// @param[in] v An array of type `Offset<T>` that contains the `table`
+  /// offsets to store in the buffer in sorted order.
+  /// @param[in] len The number of elements to store in the `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T>
+  Offset<Vector<Offset<T>>> CreateVectorOfSortedTables(Offset<T> *v, size_t len)
+  {
+    std::sort(v, v + len, TableKeyComparator<T>(buf_));
+    return CreateVector(v, len);
+  }
+
+  /// @brief Serialize an array of `table` offsets as a `vector` in the buffer
+  /// in sorted order.
+  /// @tparam T The data type that the offset refers to.
+  /// @param[in] v An array of type `Offset<T>` that contains the `table`
+  /// offsets to store in the buffer in sorted order.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template <typename T>
+  Offset<Vector<Offset<T>>> CreateVectorOfSortedTables(std::vector<Offset<T>> *v)
+  {
+    return CreateVectorOfSortedTables(data(*v), v->size());
+  }
+
+  /// @brief Specialized version of `CreateVector` for non-copying use cases.
+  /// Write the data any time later to the returned buffer pointer `buf`.
+  /// @param[in] len The number of elements to store in the `vector`.
+  /// @param[in] elemsize The size of each element in the `vector`.
+  /// @param[out] buf A pointer to a `uint8_t` pointer that can be
+  /// written to at a later time to serialize the data into a `vector`
+  /// in the buffer.
+  uoffset_t CreateUninitializedVector(size_t len, size_t elemsize, uint8_t **buf)
+  {
+    NotNested();
+    StartVector(len, elemsize);
+    buf_.make_space(len * elemsize);
+    auto vec_start = GetSize();
+    auto vec_end = EndVector(len);
+    *buf = buf_.data_at(vec_start);
+    return vec_end;
+  }
+
+  /// @brief Specialized version of `CreateVector` for non-copying use cases.
+  /// Write the data any time later to the returned buffer pointer `buf`.
+  /// @tparam T The data type of the data that will be stored in the buffer
+  /// as a `vector`.
+  /// @param[in] len The number of elements to store in the `vector`.
+  /// @param[out] buf A pointer to a pointer of type `T` that can be
+  /// written to at a later time to serialize the data into a `vector`
+  /// in the buffer.
+  template <typename T> Offset<Vector<T>> CreateUninitializedVector(size_t len, T **buf)
+  {
+    AssertScalarT<T>();
+    return CreateUninitializedVector(len, sizeof(T), reinterpret_cast<uint8_t **>(buf));
+  }
+
+  template <typename T>
+  Offset<Vector<const T *>> CreateUninitializedVectorOfStructs(size_t len, T **buf)
+  {
+    return CreateUninitializedVector(len, sizeof(T), reinterpret_cast<uint8_t **>(buf));
+  }
+
+  // @brief Create a vector of scalar type T given as input a vector of scalar
+  // type U, useful with e.g. pre "enum class" enums, or any existing scalar
+  // data of the wrong type.
+  template <typename T, typename U> Offset<Vector<T>> CreateVectorScalarCast(const U *v, size_t len)
+  {
+    AssertScalarT<T>();
+    AssertScalarT<U>();
+    StartVector(len, sizeof(T));
+    for (auto i = len; i > 0;)
+    {
+      PushElement(static_cast<T>(v[--i]));
+    }
+    return Offset<Vector<T>>(EndVector(len));
+  }
+
+  /// @brief Write a struct by itself, typically to be part of a union.
+  template <typename T> Offset<const T *> CreateStruct(const T &structobj)
+  {
+    NotNested();
+    Align(AlignOf<T>());
+    buf_.push_small(structobj);
+    return Offset<const T *>(GetSize());
+  }
+
+  /// @brief The length of a FlatBuffer file header.
+  static const size_t kFileIdentifierLength = 4;
+
+  /// @brief Finish serializing a buffer by writing the root offset.
+  /// @param[in] file_identifier If a `file_identifier` is given, the buffer
+  /// will be prefixed with a standard FlatBuffers file header.
+  template <typename T> void Finish(Offset<T> root, const char *file_identifier = nullptr)
+  {
+    Finish(root.o, file_identifier, false);
+  }
+
+  /// @brief Finish a buffer with a 32 bit size field pre-fixed (size of the
+  /// buffer following the size field). These buffers are NOT compatible
+  /// with standard buffers created by Finish, i.e. you can't call GetRoot
+  /// on them, you have to use GetSizePrefixedRoot instead.
+  /// All >32 bit quantities in this buffer will be aligned when the whole
+  /// size pre-fixed buffer is aligned.
+  /// These kinds of buffers are useful for creating a stream of FlatBuffers.
+  template <typename T>
+  void FinishSizePrefixed(Offset<T> root, const char *file_identifier = nullptr)
+  {
+    Finish(root.o, file_identifier, true);
+  }
+
+  void SwapBufAllocator(FlatBufferBuilder &other) { buf_.swap_allocator(other.buf_); }
+
+protected:
+  // You shouldn't really be copying instances of this class.
+  FlatBufferBuilder(const FlatBufferBuilder &);
+  FlatBufferBuilder &operator=(const FlatBufferBuilder &);
+
+  void Finish(uoffset_t root, const char *file_identifier, bool size_prefix)
+  {
+    NotNested();
+    buf_.clear_scratch();
+    // This will cause the whole buffer to be aligned.
+    PreAlign((size_prefix ? sizeof(uoffset_t) : 0) + sizeof(uoffset_t) +
+               (file_identifier ? kFileIdentifierLength : 0),
+             minalign_);
+    if (file_identifier)
+    {
+      FLATBUFFERS_ASSERT(strlen(file_identifier) == kFileIdentifierLength);
+      PushBytes(reinterpret_cast<const uint8_t *>(file_identifier), kFileIdentifierLength);
+    }
+    PushElement(ReferTo(root)); // Location of root.
+    if (size_prefix)
+    {
+      PushElement(GetSize());
+    }
+    finished = true;
+  }
+
+  struct FieldLoc
+  {
+    uoffset_t off;
+    voffset_t id;
+  };
+
+  vector_downward buf_;
+
+  // Accumulating offsets of table members while it is being built.
+  // We store these in the scratch pad of buf_, after the vtable offsets.
+  uoffset_t num_field_loc;
+  // Track how much of the vtable is in use, so we can output the most compact
+  // possible vtable.
+  voffset_t max_voffset_;
+
+  // Ensure objects are not nested.
+  bool nested;
+
+  // Ensure the buffer is finished before it is being accessed.
+  bool finished;
+
+  size_t minalign_;
+
+  bool force_defaults_; // Serialize values equal to their defaults anyway.
+
+  bool dedup_vtables_;
+
+  struct StringOffsetCompare
+  {
+    StringOffsetCompare(const vector_downward &buf) : buf_(&buf) {}
+    bool operator()(const Offset<String> &a, const Offset<String> &b) const
+    {
+      auto stra = reinterpret_cast<const String *>(buf_->data_at(a.o));
+      auto strb = reinterpret_cast<const String *>(buf_->data_at(b.o));
+      return StringLessThan(stra->data(), stra->size(), strb->data(), strb->size());
+    }
+    const vector_downward *buf_;
+  };
+
+  // For use with CreateSharedString. Instantiated on first use only.
+  typedef std::set<Offset<String>, StringOffsetCompare> StringOffsetMap;
+  StringOffsetMap *string_pool;
+
+private:
+  // Allocates space for a vector of structures.
+  // Must be completed with EndVectorOfStructs().
+  template <typename T> T *StartVectorOfStructs(size_t vector_size)
+  {
+    StartVector(vector_size * sizeof(T) / AlignOf<T>(), AlignOf<T>());
+    return reinterpret_cast<T *>(buf_.make_space(vector_size * sizeof(T)));
+  }
+
+  // End the vector of structues in the flatbuffers.
+  // Vector should have previously be started with StartVectorOfStructs().
+  template <typename T> Offset<Vector<const T *>> EndVectorOfStructs(size_t vector_size)
+  {
+    return Offset<Vector<const T *>>(EndVector(vector_size));
+  }
+};
+/// @}
+
+/// @cond FLATBUFFERS_INTERNAL
+// Helpers to get a typed pointer to the root object contained in the buffer.
+template <typename T> T *GetMutableRoot(void *buf)
+{
+  EndianCheck();
+  return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(buf) +
+                               EndianScalar(*reinterpret_cast<uoffset_t *>(buf)));
+}
+
+template <typename T> const T *GetRoot(const void *buf)
+{
+  return GetMutableRoot<T>(const_cast<void *>(buf));
+}
+
+template <typename T> const T *GetSizePrefixedRoot(const void *buf)
+{
+  return GetRoot<T>(reinterpret_cast<const uint8_t *>(buf) + sizeof(uoffset_t));
+}
+
+/// Helpers to get a typed pointer to objects that are currently being built.
+/// @warning Creating new objects will lead to reallocations and invalidates
+/// the pointer!
+template <typename T> T *GetMutableTemporaryPointer(FlatBufferBuilder &fbb, Offset<T> offset)
+{
+  return reinterpret_cast<T *>(fbb.GetCurrentBufferPointer() + fbb.GetSize() - offset.o);
+}
+
+template <typename T> const T *GetTemporaryPointer(FlatBufferBuilder &fbb, Offset<T> offset)
+{
+  return GetMutableTemporaryPointer<T>(fbb, offset);
+}
+
+/// @brief Get a pointer to the the file_identifier section of the buffer.
+/// @return Returns a const char pointer to the start of the file_identifier
+/// characters in the buffer.  The returned char * has length
+/// 'flatbuffers::FlatBufferBuilder::kFileIdentifierLength'.
+/// This function is UNDEFINED for FlatBuffers whose schema does not include
+/// a file_identifier (likely points at padding or the start of a the root
+/// vtable).
+inline const char *GetBufferIdentifier(const void *buf, bool size_prefixed = false)
+{
+  return reinterpret_cast<const char *>(buf) +
+         ((size_prefixed) ? 2 * sizeof(uoffset_t) : sizeof(uoffset_t));
+}
+
+// Helper to see if the identifier in a buffer has the expected value.
+inline bool BufferHasIdentifier(const void *buf, const char *identifier, bool size_prefixed = false)
+{
+  return strncmp(GetBufferIdentifier(buf, size_prefixed), identifier,
+                 FlatBufferBuilder::kFileIdentifierLength) == 0;
+}
+
+// Helper class to verify the integrity of a FlatBuffer
+class Verifier FLATBUFFERS_FINAL_CLASS
+{
+public:
+  Verifier(const uint8_t *buf, size_t buf_len, uoffset_t _max_depth = 64,
+           uoffset_t _max_tables = 1000000, bool _check_alignment = true)
+    : buf_(buf), size_(buf_len), depth_(0), max_depth_(_max_depth), num_tables_(0),
+      max_tables_(_max_tables), upper_bound_(0), check_alignment_(_check_alignment)
+  {
+    FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
+  }
+
+  // Central location where any verification failures register.
+  bool Check(bool ok) const
+  {
+#ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE
+    FLATBUFFERS_ASSERT(ok);
+#endif
+#ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
+    if (!ok)
+      upper_bound_ = 0;
+#endif
+    // clang-format on
+    return ok;
+  }
+
+  // Verify any range within the buffer.
+  bool Verify(size_t elem, size_t elem_len) const
+  {
+#ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
+    auto upper_bound = elem + elem_len;
+    if (upper_bound_ < upper_bound)
+      upper_bound_ = upper_bound;
+#endif
+    // clang-format on
+    return Check(elem_len < size_ && elem <= size_ - elem_len);
+  }
+
+  template <typename T> bool VerifyAlignment(size_t elem) const
+  {
+    return Check((elem & (sizeof(T) - 1)) == 0 || !check_alignment_);
+  }
+
+  // Verify a range indicated by sizeof(T).
+  template <typename T> bool Verify(size_t elem) const
+  {
+    return VerifyAlignment<T>(elem) && Verify(elem, sizeof(T));
+  }
+
+  bool VerifyFromPointer(const uint8_t *p, size_t len)
+  {
+    auto o = static_cast<size_t>(p - buf_);
+    return Verify(o, len);
+  }
+
+  // Verify relative to a known-good base pointer.
+  bool Verify(const uint8_t *base, voffset_t elem_off, size_t elem_len) const
+  {
+    return Verify(static_cast<size_t>(base - buf_) + elem_off, elem_len);
+  }
+
+  template <typename T> bool Verify(const uint8_t *base, voffset_t elem_off) const
+  {
+    return Verify(static_cast<size_t>(base - buf_) + elem_off, sizeof(T));
+  }
+
+  // Verify a pointer (may be NULL) of a table type.
+  template <typename T> bool VerifyTable(const T *table) { return !table || table->Verify(*this); }
+
+  // Verify a pointer (may be NULL) of any vector type.
+  template <typename T> bool VerifyVector(const Vector<T> *vec) const
+  {
+    return !vec || VerifyVectorOrString(reinterpret_cast<const uint8_t *>(vec), sizeof(T));
+  }
+
+  // Verify a pointer (may be NULL) of a vector to struct.
+  template <typename T> bool VerifyVector(const Vector<const T *> *vec) const
+  {
+    return VerifyVector(reinterpret_cast<const Vector<T> *>(vec));
+  }
+
+  // Verify a pointer (may be NULL) to string.
+  bool VerifyString(const String *str) const
+  {
+    size_t end;
+    return !str || (VerifyVectorOrString(reinterpret_cast<const uint8_t *>(str), 1, &end) &&
+                    Verify(end, 1) &&          // Must have terminator
+                    Check(buf_[end] == '\0')); // Terminating byte must be 0.
+  }
+
+  // Common code between vectors and strings.
+  bool VerifyVectorOrString(const uint8_t *vec, size_t elem_size, size_t *end = nullptr) const
+  {
+    auto veco = static_cast<size_t>(vec - buf_);
+    // Check we can read the size field.
+    if (!Verify<uoffset_t>(veco))
+      return false;
+    // Check the whole array. If this is a string, the byte past the array
+    // must be 0.
+    auto size = ReadScalar<uoffset_t>(vec);
+    auto max_elems = FLATBUFFERS_MAX_BUFFER_SIZE / elem_size;
+    if (!Check(size < max_elems))
+      return false; // Protect against byte_size overflowing.
+    auto byte_size = sizeof(size) + elem_size * size;
+    if (end)
+      *end = veco + byte_size;
+    return Verify(veco, byte_size);
+  }
+
+  // Special case for string contents, after the above has been called.
+  bool VerifyVectorOfStrings(const Vector<Offset<String>> *vec) const
+  {
+    if (vec)
+    {
+      for (uoffset_t i = 0; i < vec->size(); i++)
+      {
+        if (!VerifyString(vec->Get(i)))
+          return false;
+      }
+    }
+    return true;
+  }
+
+  // Special case for table contents, after the above has been called.
+  template <typename T> bool VerifyVectorOfTables(const Vector<Offset<T>> *vec)
+  {
+    if (vec)
+    {
+      for (uoffset_t i = 0; i < vec->size(); i++)
+      {
+        if (!vec->Get(i)->Verify(*this))
+          return false;
+      }
+    }
+    return true;
+  }
+
+  __supress_ubsan__("unsigned-integer-overflow") bool VerifyTableStart(const uint8_t *table)
+  {
+    // Check the vtable offset.
+    auto tableo = static_cast<size_t>(table - buf_);
+    if (!Verify<soffset_t>(tableo))
+      return false;
+    // This offset may be signed, but doing the subtraction unsigned always
+    // gives the result we want.
+    auto vtableo = tableo - static_cast<size_t>(ReadScalar<soffset_t>(table));
+    // Check the vtable size field, then check vtable fits in its entirety.
+    return VerifyComplexity() && Verify<voffset_t>(vtableo) &&
+           VerifyAlignment<voffset_t>(ReadScalar<voffset_t>(buf_ + vtableo)) &&
+           Verify(vtableo, ReadScalar<voffset_t>(buf_ + vtableo));
+  }
+
+  template <typename T> bool VerifyBufferFromStart(const char *identifier, size_t start)
+  {
+    if (identifier && !Check((size_ >= 2 * sizeof(flatbuffers::uoffset_t) &&
+                              BufferHasIdentifier(buf_ + start, identifier))))
+    {
+      return false;
+    }
+
+    // Call T::Verify, which must be in the generated code for this type.
+    auto o = VerifyOffset(start);
+    return o && reinterpret_cast<const T *>(buf_ + start + o)->Verify(*this)
+#ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
+           && GetComputedSize()
+#endif
+      ;
+    // clang-format on
+  }
+
+  // Verify this whole buffer, starting with root type T.
+  template <typename T> bool VerifyBuffer() { return VerifyBuffer<T>(nullptr); }
+
+  template <typename T> bool VerifyBuffer(const char *identifier)
+  {
+    return VerifyBufferFromStart<T>(identifier, 0);
+  }
+
+  template <typename T> bool VerifySizePrefixedBuffer(const char *identifier)
+  {
+    return Verify<uoffset_t>(0U) && ReadScalar<uoffset_t>(buf_) == size_ - sizeof(uoffset_t) &&
+           VerifyBufferFromStart<T>(identifier, sizeof(uoffset_t));
+  }
+
+  uoffset_t VerifyOffset(size_t start) const
+  {
+    if (!Verify<uoffset_t>(start))
+      return 0;
+    auto o = ReadScalar<uoffset_t>(buf_ + start);
+    // May not point to itself.
+    if (!Check(o != 0))
+      return 0;
+    // Can't wrap around / buffers are max 2GB.
+    if (!Check(static_cast<soffset_t>(o) >= 0))
+      return 0;
+    // Must be inside the buffer to create a pointer from it (pointer outside
+    // buffer is UB).
+    if (!Verify(start + o, 1))
+      return 0;
+    return o;
+  }
+
+  uoffset_t VerifyOffset(const uint8_t *base, voffset_t start) const
+  {
+    return VerifyOffset(static_cast<size_t>(base - buf_) + start);
+  }
+
+  // Called at the start of a table to increase counters measuring data
+  // structure depth and amount, and possibly bails out with false if
+  // limits set by the constructor have been hit. Needs to be balanced
+  // with EndTable().
+  bool VerifyComplexity()
+  {
+    depth_++;
+    num_tables_++;
+    return Check(depth_ <= max_depth_ && num_tables_ <= max_tables_);
+  }
+
+  // Called at the end of a table to pop the depth count.
+  bool EndTable()
+  {
+    depth_--;
+    return true;
+  }
+
+  // Returns the message size in bytes
+  size_t GetComputedSize() const
+  {
+#ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
+    uintptr_t size = upper_bound_;
+    // Align the size to uoffset_t
+    size = (size - 1 + sizeof(uoffset_t)) & ~(sizeof(uoffset_t) - 1);
+    return (size > size_) ? 0 : size;
+#else
+    // Must turn on FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE for this to work.
+    (void)upper_bound_;
+    FLATBUFFERS_ASSERT(false);
+    return 0;
+#endif
+    // clang-format on
+  }
+
+private:
+  const uint8_t *buf_;
+  size_t size_;
+  uoffset_t depth_;
+  uoffset_t max_depth_;
+  uoffset_t num_tables_;
+  uoffset_t max_tables_;
+  mutable size_t upper_bound_;
+  bool check_alignment_;
+};
+
+// Convenient way to bundle a buffer and its length, to pass it around
+// typed by its root.
+// A BufferRef does not own its buffer.
+struct BufferRefBase
+{
+}; // for std::is_base_of
+template <typename T> struct BufferRef : BufferRefBase
+{
+  BufferRef() : buf(nullptr), len(0), must_free(false) {}
+  BufferRef(uint8_t *_buf, uoffset_t _len) : buf(_buf), len(_len), must_free(false) {}
+
+  ~BufferRef()
+  {
+    if (must_free)
+      free(buf);
+  }
+
+  const T *GetRoot() const { return flatbuffers::GetRoot<T>(buf); }
+
+  bool Verify()
+  {
+    Verifier verifier(buf, len);
+    return verifier.VerifyBuffer<T>(nullptr);
+  }
+
+  uint8_t *buf;
+  uoffset_t len;
+  bool must_free;
+};
+
+// "structs" are flat structures that do not have an offset table, thus
+// always have all members present and do not support forwards/backwards
+// compatible extensions.
+
+class Struct FLATBUFFERS_FINAL_CLASS
+{
+public:
+  template <typename T> T GetField(uoffset_t o) const { return ReadScalar<T>(&data_[o]); }
+
+  template <typename T> T GetStruct(uoffset_t o) const { return reinterpret_cast<T>(&data_[o]); }
+
+  const uint8_t *GetAddressOf(uoffset_t o) const { return &data_[o]; }
+  uint8_t *GetAddressOf(uoffset_t o) { return &data_[o]; }
+
+private:
+  // private constructor & copy constructor: you obtain instances of this
+  // class by pointing to existing data only
+  Struct();
+  Struct(const Struct &);
+  Struct &operator=(const Struct &);
+
+  uint8_t data_[1];
+};
+
+// "tables" use an offset table (possibly shared) that allows fields to be
+// omitted and added at will, but uses an extra indirection to read.
+class Table
+{
+public:
+  const uint8_t *GetVTable() const { return data_ - ReadScalar<soffset_t>(data_); }
+
+  // This gets the field offset for any of the functions below it, or 0
+  // if the field was not present.
+  voffset_t GetOptionalFieldOffset(voffset_t field) const
+  {
+    // The vtable offset is always at the start.
+    auto vtable = GetVTable();
+    // The first element is the size of the vtable (fields + type id + itself).
+    auto vtsize = ReadScalar<voffset_t>(vtable);
+    // If the field we're accessing is outside the vtable, we're reading older
+    // data, so it's the same as if the offset was 0 (not present).
+    return field < vtsize ? ReadScalar<voffset_t>(vtable + field) : 0;
+  }
+
+  template <typename T> T GetField(voffset_t field, T defaultval) const
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return field_offset ? ReadScalar<T>(data_ + field_offset) : defaultval;
+  }
+
+  template <typename P> P GetPointer(voffset_t field)
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    auto p = data_ + field_offset;
+    return field_offset ? reinterpret_cast<P>(p + ReadScalar<uoffset_t>(p)) : nullptr;
+  }
+  template <typename P> P GetPointer(voffset_t field) const
+  {
+    return const_cast<Table *>(this)->GetPointer<P>(field);
+  }
+
+  template <typename P> P GetStruct(voffset_t field) const
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    auto p = const_cast<uint8_t *>(data_ + field_offset);
+    return field_offset ? reinterpret_cast<P>(p) : nullptr;
+  }
+
+  template <typename Raw, typename Face>
+  flatbuffers::Optional<Face> GetOptional(voffset_t field) const
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    auto p = data_ + field_offset;
+    return field_offset ? Optional<Face>(static_cast<Face>(ReadScalar<Raw>(p))) : Optional<Face>();
+  }
+
+  template <typename T> bool SetField(voffset_t field, T val, T def)
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    if (!field_offset)
+      return IsTheSameAs(val, def);
+    WriteScalar(data_ + field_offset, val);
+    return true;
+  }
+  template <typename T> bool SetField(voffset_t field, T val)
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    if (!field_offset)
+      return false;
+    WriteScalar(data_ + field_offset, val);
+    return true;
+  }
+
+  bool SetPointer(voffset_t field, const uint8_t *val)
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    if (!field_offset)
+      return false;
+    WriteScalar(data_ + field_offset, static_cast<uoffset_t>(val - (data_ + field_offset)));
+    return true;
+  }
+
+  uint8_t *GetAddressOf(voffset_t field)
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return field_offset ? data_ + field_offset : nullptr;
+  }
+  const uint8_t *GetAddressOf(voffset_t field) const
+  {
+    return const_cast<Table *>(this)->GetAddressOf(field);
+  }
+
+  bool CheckField(voffset_t field) const { return GetOptionalFieldOffset(field) != 0; }
+
+  // Verify the vtable of this table.
+  // Call this once per table, followed by VerifyField once per field.
+  bool VerifyTableStart(Verifier &verifier) const { return verifier.VerifyTableStart(data_); }
+
+  // Verify a particular field.
+  template <typename T> bool VerifyField(const Verifier &verifier, voffset_t field) const
+  {
+    // Calling GetOptionalFieldOffset should be safe now thanks to
+    // VerifyTable().
+    auto field_offset = GetOptionalFieldOffset(field);
+    // Check the actual field.
+    return !field_offset || verifier.Verify<T>(data_, field_offset);
+  }
+
+  // VerifyField for required fields.
+  template <typename T> bool VerifyFieldRequired(const Verifier &verifier, voffset_t field) const
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return verifier.Check(field_offset != 0) && verifier.Verify<T>(data_, field_offset);
+  }
+
+  // Versions for offsets.
+  bool VerifyOffset(const Verifier &verifier, voffset_t field) const
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return !field_offset || verifier.VerifyOffset(data_, field_offset);
+  }
+
+  bool VerifyOffsetRequired(const Verifier &verifier, voffset_t field) const
+  {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return verifier.Check(field_offset != 0) && verifier.VerifyOffset(data_, field_offset);
+  }
+
+private:
+  // private constructor & copy constructor: you obtain instances of this
+  // class by pointing to existing data only
+  Table();
+  Table(const Table &other);
+  Table &operator=(const Table &);
+
+  uint8_t data_[1];
+};
+
+// This specialization allows avoiding warnings like:
+// MSVC C4800: type: forcing value to bool 'true' or 'false'.
+template <>
+inline flatbuffers::Optional<bool> Table::GetOptional<uint8_t, bool>(voffset_t field) const
+{
+  auto field_offset = GetOptionalFieldOffset(field);
+  auto p = data_ + field_offset;
+  return field_offset ? Optional<bool>(ReadScalar<uint8_t>(p) != 0) : Optional<bool>();
+}
+
+template <typename T> void FlatBufferBuilder::Required(Offset<T> table, voffset_t field)
+{
+  auto table_ptr = reinterpret_cast<const Table *>(buf_.data_at(table.o));
+  bool ok = table_ptr->GetOptionalFieldOffset(field) != 0;
+  // If this fails, the caller will show what field needs to be set.
+  FLATBUFFERS_ASSERT(ok);
+  (void)ok;
+}
+
+/// @brief This can compute the start of a FlatBuffer from a root pointer, i.e.
+/// it is the opposite transformation of GetRoot().
+/// This may be useful if you want to pass on a root and have the recipient
+/// delete the buffer afterwards.
+inline const uint8_t *GetBufferStartFromRootPointer(const void *root)
+{
+  auto table = reinterpret_cast<const Table *>(root);
+  auto vtable = table->GetVTable();
+  // Either the vtable is before the root or after the root.
+  auto start = (std::min)(vtable, reinterpret_cast<const uint8_t *>(root));
+  // Align to at least sizeof(uoffset_t).
+  start = reinterpret_cast<const uint8_t *>(reinterpret_cast<uintptr_t>(start) &
+                                            ~(sizeof(uoffset_t) - 1));
+  // Additionally, there may be a file_identifier in the buffer, and the root
+  // offset. The buffer may have been aligned to any size between
+  // sizeof(uoffset_t) and FLATBUFFERS_MAX_ALIGNMENT (see "force_align").
+  // Sadly, the exact alignment is only known when constructing the buffer,
+  // since it depends on the presence of values with said alignment properties.
+  // So instead, we simply look at the next uoffset_t values (root,
+  // file_identifier, and alignment padding) to see which points to the root.
+  // None of the other values can "impersonate" the root since they will either
+  // be 0 or four ASCII characters.
+  static_assert(FlatBufferBuilder::kFileIdentifierLength == sizeof(uoffset_t),
+                "file_identifier is assumed to be the same size as uoffset_t");
+  for (auto possible_roots = FLATBUFFERS_MAX_ALIGNMENT / sizeof(uoffset_t) + 1; possible_roots;
+       possible_roots--)
+  {
+    start -= sizeof(uoffset_t);
+    if (ReadScalar<uoffset_t>(start) + start == reinterpret_cast<const uint8_t *>(root))
+      return start;
+  }
+  // We didn't find the root, either the "root" passed isn't really a root,
+  // or the buffer is corrupt.
+  // Assert, because calling this function with bad data may cause reads
+  // outside of buffer boundaries.
+  FLATBUFFERS_ASSERT(false);
+  return nullptr;
+}
+
+/// @brief This return the prefixed size of a FlatBuffer.
+inline uoffset_t GetPrefixedSize(const uint8_t *buf) { return ReadScalar<uoffset_t>(buf); }
+
+// Base class for native objects (FlatBuffer data de-serialized into native
+// C++ data structures).
+// Contains no functionality, purely documentative.
+struct NativeTable
+{
+};
+
+/// @brief Function types to be used with resolving hashes into objects and
+/// back again. The resolver gets a pointer to a field inside an object API
+/// object that is of the type specified in the schema using the attribute
+/// `cpp_type` (it is thus important whatever you write to this address
+/// matches that type). The value of this field is initially null, so you
+/// may choose to implement a delayed binding lookup using this function
+/// if you wish. The resolver does the opposite lookup, for when the object
+/// is being serialized again.
+typedef uint64_t hash_value_t;
+// clang-format off
+#ifdef FLATBUFFERS_CPP98_STL
+  typedef void (*resolver_function_t)(void **pointer_adr, hash_value_t hash);
+  typedef hash_value_t (*rehasher_function_t)(void *pointer);
+#else
+  typedef std::function<void (void **pointer_adr, hash_value_t hash)>
+          resolver_function_t;
+  typedef std::function<hash_value_t (void *pointer)> rehasher_function_t;
+#endif
+// clang-format on
+
+// Helper function to test if a field is present, using any of the field
+// enums in the generated code.
+// `table` must be a generated table type. Since this is a template parameter,
+// this is not typechecked to be a subclass of Table, so beware!
+// Note: this function will return false for fields equal to the default
+// value, since they're not stored in the buffer (unless force_defaults was
+// used).
+template <typename T> bool IsFieldPresent(const T *table, typename T::FlatBuffersVTableOffset field)
+{
+  // Cast, since Table is a private baseclass of any table types.
+  return reinterpret_cast<const Table *>(table)->CheckField(static_cast<voffset_t>(field));
+}
+
+// Utility function for reverse lookups on the EnumNames*() functions
+// (in the generated C++ code)
+// names must be NULL terminated.
+inline int LookupEnum(const char **names, const char *name)
+{
+  for (const char **p = names; *p; p++)
+    if (!strcmp(*p, name))
+      return static_cast<int>(p - names);
+  return -1;
+}
+
+// These macros allow us to layout a struct with a guarantee that they'll end
+// up looking the same on different compilers and platforms.
+// It does this by disallowing the compiler to do any padding, and then
+// does padding itself by inserting extra padding fields that make every
+// element aligned to its own size.
+// Additionally, it manually sets the alignment of the struct as a whole,
+// which is typically its largest element, or a custom size set in the schema
+// by the force_align attribute.
+// These are used in the generated code only.
+
+// clang-format off
+#if defined(_MSC_VER)
+  #define FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(alignment) \
+    __pragma(pack(1)) \
+    struct __declspec(align(alignment))
+  #define FLATBUFFERS_STRUCT_END(name, size) \
+    __pragma(pack()) \
+    static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#elif defined(__GNUC__) || defined(__clang__) || defined(__ICCARM__)
+  #define FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(alignment) \
+    _Pragma("pack(1)") \
+    struct __attribute__((aligned(alignment)))
+  #define FLATBUFFERS_STRUCT_END(name, size) \
+    _Pragma("pack()") \
+    static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#else
+  #error Unknown compiler, please define structure alignment macros
+#endif
+// clang-format on
+
+// Minimal reflection via code generation.
+// Besides full-fat reflection (see reflection.h) and parsing/printing by
+// loading schemas (see idl.h), we can also have code generation for mimimal
+// reflection data which allows pretty-printing and other uses without needing
+// a schema or a parser.
+// Generate code with --reflect-types (types only) or --reflect-names (names
+// also) to enable.
+// See minireflect.h for utilities using this functionality.
+
+// These types are organized slightly differently as the ones in idl.h.
+enum SequenceType
+{
+  ST_TABLE,
+  ST_STRUCT,
+  ST_UNION,
+  ST_ENUM
+};
+
+// Scalars have the same order as in idl.h
+// clang-format off
+#define FLATBUFFERS_GEN_ELEMENTARY_TYPES(ET) \
+  ET(ET_UTYPE) \
+  ET(ET_BOOL) \
+  ET(ET_CHAR) \
+  ET(ET_UCHAR) \
+  ET(ET_SHORT) \
+  ET(ET_USHORT) \
+  ET(ET_INT) \
+  ET(ET_UINT) \
+  ET(ET_LONG) \
+  ET(ET_ULONG) \
+  ET(ET_FLOAT) \
+  ET(ET_DOUBLE) \
+  ET(ET_STRING) \
+  ET(ET_SEQUENCE)  // See SequenceType.
+
+enum ElementaryType {
+  #define FLATBUFFERS_ET(E) E,
+    FLATBUFFERS_GEN_ELEMENTARY_TYPES(FLATBUFFERS_ET)
+  #undef FLATBUFFERS_ET
+};
+
+inline const char * const *ElementaryTypeNames() {
+  static const char * const names[] = {
+    #define FLATBUFFERS_ET(E) #E,
+      FLATBUFFERS_GEN_ELEMENTARY_TYPES(FLATBUFFERS_ET)
+    #undef FLATBUFFERS_ET
+  };
+  return names;
+}
+// clang-format on
+
+// Basic type info cost just 16bits per field!
+// We're explicitly defining the signedness since the signedness of integer
+// bitfields is otherwise implementation-defined and causes warnings on older
+// GCC compilers.
+struct TypeCode
+{
+  // ElementaryType
+  unsigned short base_type : 4;
+  // Either vector (in table) or array (in struct)
+  unsigned short is_repeating : 1;
+  // Index into type_refs below, or -1 for none.
+  signed short sequence_ref : 11;
+};
+
+static_assert(sizeof(TypeCode) == 2, "TypeCode");
+
+struct TypeTable;
+
+// Signature of the static method present in each type.
+typedef const TypeTable *(*TypeFunction)();
+
+struct TypeTable
+{
+  SequenceType st;
+  size_t num_elems;              // of type_codes, values, names (but not type_refs).
+  const TypeCode *type_codes;    // num_elems count
+  const TypeFunction *type_refs; // less than num_elems entries (see TypeCode).
+  const int16_t *array_sizes;    // less than num_elems entries (see TypeCode).
+  const int64_t *values;         // Only set for non-consecutive enum/union or structs.
+  const char *const *names;      // Only set if compiled with --reflect-names.
+};
+
+// String which identifies the current version of FlatBuffers.
+// flatbuffer_version_string is used by Google developers to identify which
+// applications uploaded to Google Play are using this library.  This allows
+// the development team at Google to determine the popularity of the library.
+// How it works: Applications that are uploaded to the Google Play Store are
+// scanned for this version string.  We track which applications are using it
+// to measure popularity.  You are free to remove it (of course) but we would
+// appreciate if you left it in.
+
+// Weak linkage is culled by VS & doesn't work on cygwin.
+// clang-format off
+#if !defined(_WIN32) && !defined(__CYGWIN__)
+
+extern volatile __attribute__((weak)) const char *flatbuffer_version_string;
+volatile __attribute__((weak)) const char *flatbuffer_version_string =
+  "FlatBuffers "
+  FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MAJOR) "."
+  FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MINOR) "."
+  FLATBUFFERS_STRING(FLATBUFFERS_VERSION_REVISION);
+
+#endif  // !defined(_WIN32) && !defined(__CYGWIN__)
+
+#define FLATBUFFERS_DEFINE_BITMASK_OPERATORS(E, T)\
+    inline E operator | (E lhs, E rhs){\
+        return E(T(lhs) | T(rhs));\
+    }\
+    inline E operator & (E lhs, E rhs){\
+        return E(T(lhs) & T(rhs));\
+    }\
+    inline E operator ^ (E lhs, E rhs){\
+        return E(T(lhs) ^ T(rhs));\
+    }\
+    inline E operator ~ (E lhs){\
+        return E(~T(lhs));\
+    }\
+    inline E operator |= (E &lhs, E rhs){\
+        lhs = lhs | rhs;\
+        return lhs;\
+    }\
+    inline E operator &= (E &lhs, E rhs){\
+        lhs = lhs & rhs;\
+        return lhs;\
+    }\
+    inline E operator ^= (E &lhs, E rhs){\
+        lhs = lhs ^ rhs;\
+        return lhs;\
+    }\
+    inline bool operator !(E rhs) \
+    {\
+        return !bool(T(rhs)); \
+    }
+/// @endcond
+}  // namespace flatbuffers
+
+// clang-format on
+
+#endif // FLATBUFFERS_H_
diff --git a/onert-micro/externals/flatbuffers/flatc.h b/onert-micro/externals/flatbuffers/flatc.h
new file mode 100644
index 000000000..594bf792a
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/flatc.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_FLATC_H_
+#define FLATBUFFERS_FLATC_H_
+
+#include <functional>
+#include <limits>
+#include <string>
+
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/idl.h"
+#include "flatbuffers/util.h"
+
+namespace flatbuffers
+{
+
+extern void LogCompilerWarn(const std::string &warn);
+extern void LogCompilerError(const std::string &err);
+
+class FlatCompiler
+{
+public:
+  // Output generator for the various programming languages and formats we
+  // support.
+  struct Generator
+  {
+    typedef bool (*GenerateFn)(const flatbuffers::Parser &parser, const std::string &path,
+                               const std::string &file_name);
+    typedef std::string (*MakeRuleFn)(const flatbuffers::Parser &parser, const std::string &path,
+                                      const std::string &file_name);
+
+    GenerateFn generate;
+    const char *generator_opt_short;
+    const char *generator_opt_long;
+    const char *lang_name;
+    bool schema_only;
+    GenerateFn generateGRPC;
+    flatbuffers::IDLOptions::Language lang;
+    const char *generator_help;
+    MakeRuleFn make_rule;
+  };
+
+  typedef void (*WarnFn)(const FlatCompiler *flatc, const std::string &warn, bool show_exe_name);
+
+  typedef void (*ErrorFn)(const FlatCompiler *flatc, const std::string &err, bool usage,
+                          bool show_exe_name);
+
+  // Parameters required to initialize the FlatCompiler.
+  struct InitParams
+  {
+    InitParams() : generators(nullptr), num_generators(0), warn_fn(nullptr), error_fn(nullptr) {}
+
+    const Generator *generators;
+    size_t num_generators;
+    WarnFn warn_fn;
+    ErrorFn error_fn;
+  };
+
+  explicit FlatCompiler(const InitParams &params) : params_(params) {}
+
+  int Compile(int argc, const char **argv);
+
+  std::string GetUsageString(const char *program_name) const;
+
+private:
+  void ParseFile(flatbuffers::Parser &parser, const std::string &filename,
+                 const std::string &contents, std::vector<const char *> &include_directories) const;
+
+  void LoadBinarySchema(Parser &parser, const std::string &filename, const std::string &contents);
+
+  void Warn(const std::string &warn, bool show_exe_name = true) const;
+
+  void Error(const std::string &err, bool usage = true, bool show_exe_name = true) const;
+
+  InitParams params_;
+};
+
+} // namespace flatbuffers
+
+#endif // FLATBUFFERS_FLATC_H_
diff --git a/onert-micro/externals/flatbuffers/flexbuffers.h b/onert-micro/externals/flatbuffers/flexbuffers.h
new file mode 100644
index 000000000..f6fcbf34b
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/flexbuffers.h
@@ -0,0 +1,1852 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_FLEXBUFFERS_H_
+#define FLATBUFFERS_FLEXBUFFERS_H_
+
+#include <map>
+// Used to select STL variant.
+#include "flatbuffers/base.h"
+// We use the basic binary writing functions from the regular FlatBuffers.
+#include "flatbuffers/util.h"
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4127) // C4127: conditional expression is constant
+#endif
+
+namespace flexbuffers
+{
+
+class Reference;
+class Map;
+
+// These are used in the lower 2 bits of a type field to determine the size of
+// the elements (and or size field) of the item pointed to (e.g. vector).
+enum BitWidth
+{
+  BIT_WIDTH_8 = 0,
+  BIT_WIDTH_16 = 1,
+  BIT_WIDTH_32 = 2,
+  BIT_WIDTH_64 = 3,
+};
+
+// These are used as the upper 6 bits of a type field to indicate the actual
+// type.
+enum Type
+{
+  FBT_NULL = 0,
+  FBT_INT = 1,
+  FBT_UINT = 2,
+  FBT_FLOAT = 3,
+  // Types above stored inline, types below store an offset.
+  FBT_KEY = 4,
+  FBT_STRING = 5,
+  FBT_INDIRECT_INT = 6,
+  FBT_INDIRECT_UINT = 7,
+  FBT_INDIRECT_FLOAT = 8,
+  FBT_MAP = 9,
+  FBT_VECTOR = 10,     // Untyped.
+  FBT_VECTOR_INT = 11, // Typed any size (stores no type table).
+  FBT_VECTOR_UINT = 12,
+  FBT_VECTOR_FLOAT = 13,
+  FBT_VECTOR_KEY = 14,
+  // DEPRECATED, use FBT_VECTOR or FBT_VECTOR_KEY instead.
+  // Read test.cpp/FlexBuffersDeprecatedTest() for details on why.
+  FBT_VECTOR_STRING_DEPRECATED = 15,
+  FBT_VECTOR_INT2 = 16, // Typed tuple (no type table, no size field).
+  FBT_VECTOR_UINT2 = 17,
+  FBT_VECTOR_FLOAT2 = 18,
+  FBT_VECTOR_INT3 = 19, // Typed triple (no type table, no size field).
+  FBT_VECTOR_UINT3 = 20,
+  FBT_VECTOR_FLOAT3 = 21,
+  FBT_VECTOR_INT4 = 22, // Typed quad (no type table, no size field).
+  FBT_VECTOR_UINT4 = 23,
+  FBT_VECTOR_FLOAT4 = 24,
+  FBT_BLOB = 25,
+  FBT_BOOL = 26,
+  FBT_VECTOR_BOOL = 36, // To Allow the same type of conversion of type to vector type
+};
+
+inline bool IsInline(Type t) { return t <= FBT_FLOAT || t == FBT_BOOL; }
+
+inline bool IsTypedVectorElementType(Type t)
+{
+  return (t >= FBT_INT && t <= FBT_STRING) || t == FBT_BOOL;
+}
+
+inline bool IsTypedVector(Type t)
+{
+  return (t >= FBT_VECTOR_INT && t <= FBT_VECTOR_STRING_DEPRECATED) || t == FBT_VECTOR_BOOL;
+}
+
+inline bool IsFixedTypedVector(Type t) { return t >= FBT_VECTOR_INT2 && t <= FBT_VECTOR_FLOAT4; }
+
+inline Type ToTypedVector(Type t, size_t fixed_len = 0)
+{
+  FLATBUFFERS_ASSERT(IsTypedVectorElementType(t));
+  switch (fixed_len)
+  {
+    case 0:
+      return static_cast<Type>(t - FBT_INT + FBT_VECTOR_INT);
+    case 2:
+      return static_cast<Type>(t - FBT_INT + FBT_VECTOR_INT2);
+    case 3:
+      return static_cast<Type>(t - FBT_INT + FBT_VECTOR_INT3);
+    case 4:
+      return static_cast<Type>(t - FBT_INT + FBT_VECTOR_INT4);
+    default:
+      FLATBUFFERS_ASSERT(0);
+      return FBT_NULL;
+  }
+}
+
+inline Type ToTypedVectorElementType(Type t)
+{
+  FLATBUFFERS_ASSERT(IsTypedVector(t));
+  return static_cast<Type>(t - FBT_VECTOR_INT + FBT_INT);
+}
+
+inline Type ToFixedTypedVectorElementType(Type t, uint8_t *len)
+{
+  FLATBUFFERS_ASSERT(IsFixedTypedVector(t));
+  auto fixed_type = t - FBT_VECTOR_INT2;
+  *len = static_cast<uint8_t>(fixed_type / 3 + 2); // 3 types each, starting from length 2.
+  return static_cast<Type>(fixed_type % 3 + FBT_INT);
+}
+
+// TODO: implement proper support for 8/16bit floats, or decide not to
+// support them.
+typedef int16_t half;
+typedef int8_t quarter;
+
+// TODO: can we do this without conditionals using intrinsics or inline asm
+// on some platforms? Given branch prediction the method below should be
+// decently quick, but it is the most frequently executed function.
+// We could do an (unaligned) 64-bit read if we ifdef out the platforms for
+// which that doesn't work (or where we'd read into un-owned memory).
+template <typename R, typename T1, typename T2, typename T4, typename T8>
+R ReadSizedScalar(const uint8_t *data, uint8_t byte_width)
+{
+  return byte_width < 4 ? (byte_width < 2 ? static_cast<R>(flatbuffers::ReadScalar<T1>(data))
+                                          : static_cast<R>(flatbuffers::ReadScalar<T2>(data)))
+                        : (byte_width < 8 ? static_cast<R>(flatbuffers::ReadScalar<T4>(data))
+                                          : static_cast<R>(flatbuffers::ReadScalar<T8>(data)));
+}
+
+inline int64_t ReadInt64(const uint8_t *data, uint8_t byte_width)
+{
+  return ReadSizedScalar<int64_t, int8_t, int16_t, int32_t, int64_t>(data, byte_width);
+}
+
+inline uint64_t ReadUInt64(const uint8_t *data, uint8_t byte_width)
+{
+#if defined(_MSC_VER) && ((defined(_M_X64) && !defined(_M_ARM64EC)) || defined _M_IX86)
+  uint64_t u = 0;
+  __movsb(reinterpret_cast<uint8_t *>(&u), reinterpret_cast<const uint8_t *>(data), byte_width);
+  return flatbuffers::EndianScalar(u);
+#else
+  return ReadSizedScalar<uint64_t, uint8_t, uint16_t, uint32_t, uint64_t>(data, byte_width);
+#endif
+  // clang-format on
+}
+
+inline double ReadDouble(const uint8_t *data, uint8_t byte_width)
+{
+  return ReadSizedScalar<double, quarter, half, float, double>(data, byte_width);
+}
+
+inline const uint8_t *Indirect(const uint8_t *offset, uint8_t byte_width)
+{
+  return offset - ReadUInt64(offset, byte_width);
+}
+
+template <typename T> const uint8_t *Indirect(const uint8_t *offset)
+{
+  return offset - flatbuffers::ReadScalar<T>(offset);
+}
+
+inline BitWidth WidthU(uint64_t u)
+{
+#define FLATBUFFERS_GET_FIELD_BIT_WIDTH(value, width) \
+  {                                                   \
+    if (!((u) & ~((1ULL << (width)) - 1ULL)))         \
+      return BIT_WIDTH_##width;                       \
+  }
+  FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 8);
+  FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 16);
+  FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 32);
+#undef FLATBUFFERS_GET_FIELD_BIT_WIDTH
+  return BIT_WIDTH_64;
+}
+
+inline BitWidth WidthI(int64_t i)
+{
+  auto u = static_cast<uint64_t>(i) << 1;
+  return WidthU(i >= 0 ? u : ~u);
+}
+
+inline BitWidth WidthF(double f)
+{
+  return static_cast<double>(static_cast<float>(f)) == f ? BIT_WIDTH_32 : BIT_WIDTH_64;
+}
+
+// Base class of all types below.
+// Points into the data buffer and allows access to one type.
+class Object
+{
+public:
+  Object(const uint8_t *data, uint8_t byte_width) : data_(data), byte_width_(byte_width) {}
+
+protected:
+  const uint8_t *data_;
+  uint8_t byte_width_;
+};
+
+// Object that has a size, obtained either from size prefix, or elsewhere.
+class Sized : public Object
+{
+public:
+  // Size prefix.
+  Sized(const uint8_t *data, uint8_t byte_width) : Object(data, byte_width), size_(read_size()) {}
+  // Manual size.
+  Sized(const uint8_t *data, uint8_t byte_width, size_t sz) : Object(data, byte_width), size_(sz) {}
+  size_t size() const { return size_; }
+  // Access size stored in `byte_width_` bytes before data_ pointer.
+  size_t read_size() const
+  {
+    return static_cast<size_t>(ReadUInt64(data_ - byte_width_, byte_width_));
+  }
+
+protected:
+  size_t size_;
+};
+
+class String : public Sized
+{
+public:
+  // Size prefix.
+  String(const uint8_t *data, uint8_t byte_width) : Sized(data, byte_width) {}
+  // Manual size.
+  String(const uint8_t *data, uint8_t byte_width, size_t sz) : Sized(data, byte_width, sz) {}
+
+  size_t length() const { return size(); }
+  const char *c_str() const { return reinterpret_cast<const char *>(data_); }
+  std::string str() const { return std::string(c_str(), size()); }
+
+  static String EmptyString()
+  {
+    static const char *empty_string = "";
+    return String(reinterpret_cast<const uint8_t *>(empty_string), 1, 0);
+  }
+  bool IsTheEmptyString() const { return data_ == EmptyString().data_; }
+};
+
+class Blob : public Sized
+{
+public:
+  Blob(const uint8_t *data_buf, uint8_t byte_width) : Sized(data_buf, byte_width) {}
+
+  static Blob EmptyBlob()
+  {
+    static const uint8_t empty_blob[] = {0 /*len*/};
+    return Blob(empty_blob + 1, 1);
+  }
+  bool IsTheEmptyBlob() const { return data_ == EmptyBlob().data_; }
+  const uint8_t *data() const { return data_; }
+};
+
+class Vector : public Sized
+{
+public:
+  Vector(const uint8_t *data, uint8_t byte_width) : Sized(data, byte_width) {}
+
+  Reference operator[](size_t i) const;
+
+  static Vector EmptyVector()
+  {
+    static const uint8_t empty_vector[] = {0 /*len*/};
+    return Vector(empty_vector + 1, 1);
+  }
+  bool IsTheEmptyVector() const { return data_ == EmptyVector().data_; }
+};
+
+class TypedVector : public Sized
+{
+public:
+  TypedVector(const uint8_t *data, uint8_t byte_width, Type element_type)
+    : Sized(data, byte_width), type_(element_type)
+  {
+  }
+
+  Reference operator[](size_t i) const;
+
+  static TypedVector EmptyTypedVector()
+  {
+    static const uint8_t empty_typed_vector[] = {0 /*len*/};
+    return TypedVector(empty_typed_vector + 1, 1, FBT_INT);
+  }
+  bool IsTheEmptyVector() const { return data_ == TypedVector::EmptyTypedVector().data_; }
+
+  Type ElementType() { return type_; }
+
+  friend Reference;
+
+private:
+  Type type_;
+
+  friend Map;
+};
+
+class FixedTypedVector : public Object
+{
+public:
+  FixedTypedVector(const uint8_t *data, uint8_t byte_width, Type element_type, uint8_t len)
+    : Object(data, byte_width), type_(element_type), len_(len)
+  {
+  }
+
+  Reference operator[](size_t i) const;
+
+  static FixedTypedVector EmptyFixedTypedVector()
+  {
+    static const uint8_t fixed_empty_vector[] = {0 /* unused */};
+    return FixedTypedVector(fixed_empty_vector, 1, FBT_INT, 0);
+  }
+  bool IsTheEmptyFixedTypedVector() const
+  {
+    return data_ == FixedTypedVector::EmptyFixedTypedVector().data_;
+  }
+
+  Type ElementType() { return type_; }
+  uint8_t size() { return len_; }
+
+private:
+  Type type_;
+  uint8_t len_;
+};
+
+class Map : public Vector
+{
+public:
+  Map(const uint8_t *data, uint8_t byte_width) : Vector(data, byte_width) {}
+
+  Reference operator[](const char *key) const;
+  Reference operator[](const std::string &key) const;
+
+  Vector Values() const { return Vector(data_, byte_width_); }
+
+  TypedVector Keys() const
+  {
+    const size_t num_prefixed_fields = 3;
+    auto keys_offset = data_ - byte_width_ * num_prefixed_fields;
+    return TypedVector(Indirect(keys_offset, byte_width_),
+                       static_cast<uint8_t>(ReadUInt64(keys_offset + byte_width_, byte_width_)),
+                       FBT_KEY);
+  }
+
+  static Map EmptyMap()
+  {
+    static const uint8_t empty_map[] = {
+      0 /*keys_len*/, 0 /*keys_offset*/, 1 /*keys_width*/, 0 /*len*/
+    };
+    return Map(empty_map + 4, 1);
+  }
+
+  bool IsTheEmptyMap() const { return data_ == EmptyMap().data_; }
+};
+
+template <typename T> void AppendToString(std::string &s, T &&v, bool keys_quoted)
+{
+  s += "[ ";
+  for (size_t i = 0; i < v.size(); i++)
+  {
+    if (i)
+      s += ", ";
+    v[i].ToString(true, keys_quoted, s);
+  }
+  s += " ]";
+}
+
+class Reference
+{
+public:
+  Reference() : data_(nullptr), parent_width_(0), byte_width_(BIT_WIDTH_8), type_(FBT_NULL) {}
+
+  Reference(const uint8_t *data, uint8_t parent_width, uint8_t byte_width, Type type)
+    : data_(data), parent_width_(parent_width), byte_width_(byte_width), type_(type)
+  {
+  }
+
+  Reference(const uint8_t *data, uint8_t parent_width, uint8_t packed_type)
+    : data_(data), parent_width_(parent_width)
+  {
+    byte_width_ = 1U << static_cast<BitWidth>(packed_type & 3);
+    type_ = static_cast<Type>(packed_type >> 2);
+  }
+
+  Type GetType() const { return type_; }
+
+  bool IsNull() const { return type_ == FBT_NULL; }
+  bool IsBool() const { return type_ == FBT_BOOL; }
+  bool IsInt() const { return type_ == FBT_INT || type_ == FBT_INDIRECT_INT; }
+  bool IsUInt() const { return type_ == FBT_UINT || type_ == FBT_INDIRECT_UINT; }
+  bool IsIntOrUint() const { return IsInt() || IsUInt(); }
+  bool IsFloat() const { return type_ == FBT_FLOAT || type_ == FBT_INDIRECT_FLOAT; }
+  bool IsNumeric() const { return IsIntOrUint() || IsFloat(); }
+  bool IsString() const { return type_ == FBT_STRING; }
+  bool IsKey() const { return type_ == FBT_KEY; }
+  bool IsVector() const { return type_ == FBT_VECTOR || type_ == FBT_MAP; }
+  bool IsUntypedVector() const { return type_ == FBT_VECTOR; }
+  bool IsTypedVector() const { return flexbuffers::IsTypedVector(type_); }
+  bool IsFixedTypedVector() const { return flexbuffers::IsFixedTypedVector(type_); }
+  bool IsAnyVector() const { return (IsTypedVector() || IsFixedTypedVector() || IsVector()); }
+  bool IsMap() const { return type_ == FBT_MAP; }
+  bool IsBlob() const { return type_ == FBT_BLOB; }
+  bool AsBool() const
+  {
+    return (type_ == FBT_BOOL ? ReadUInt64(data_, parent_width_) : AsUInt64()) != 0;
+  }
+
+  // Reads any type as a int64_t. Never fails, does most sensible conversion.
+  // Truncates floats, strings are attempted to be parsed for a number,
+  // vectors/maps return their size. Returns 0 if all else fails.
+  int64_t AsInt64() const
+  {
+    if (type_ == FBT_INT)
+    {
+      // A fast path for the common case.
+      return ReadInt64(data_, parent_width_);
+    }
+    else
+      switch (type_)
+      {
+        case FBT_INDIRECT_INT:
+          return ReadInt64(Indirect(), byte_width_);
+        case FBT_UINT:
+          return ReadUInt64(data_, parent_width_);
+        case FBT_INDIRECT_UINT:
+          return ReadUInt64(Indirect(), byte_width_);
+        case FBT_FLOAT:
+          return static_cast<int64_t>(ReadDouble(data_, parent_width_));
+        case FBT_INDIRECT_FLOAT:
+          return static_cast<int64_t>(ReadDouble(Indirect(), byte_width_));
+        case FBT_NULL:
+          return 0;
+        case FBT_STRING:
+          return flatbuffers::StringToInt(AsString().c_str());
+        case FBT_VECTOR:
+          return static_cast<int64_t>(AsVector().size());
+        case FBT_BOOL:
+          return ReadInt64(data_, parent_width_);
+        default:
+          // Convert other things to int.
+          return 0;
+      }
+  }
+
+  // TODO: could specialize these to not use AsInt64() if that saves
+  // extension ops in generated code, and use a faster op than ReadInt64.
+  int32_t AsInt32() const { return static_cast<int32_t>(AsInt64()); }
+  int16_t AsInt16() const { return static_cast<int16_t>(AsInt64()); }
+  int8_t AsInt8() const { return static_cast<int8_t>(AsInt64()); }
+
+  uint64_t AsUInt64() const
+  {
+    if (type_ == FBT_UINT)
+    {
+      // A fast path for the common case.
+      return ReadUInt64(data_, parent_width_);
+    }
+    else
+      switch (type_)
+      {
+        case FBT_INDIRECT_UINT:
+          return ReadUInt64(Indirect(), byte_width_);
+        case FBT_INT:
+          return ReadInt64(data_, parent_width_);
+        case FBT_INDIRECT_INT:
+          return ReadInt64(Indirect(), byte_width_);
+        case FBT_FLOAT:
+          return static_cast<uint64_t>(ReadDouble(data_, parent_width_));
+        case FBT_INDIRECT_FLOAT:
+          return static_cast<uint64_t>(ReadDouble(Indirect(), byte_width_));
+        case FBT_NULL:
+          return 0;
+        case FBT_STRING:
+          return flatbuffers::StringToUInt(AsString().c_str());
+        case FBT_VECTOR:
+          return static_cast<uint64_t>(AsVector().size());
+        case FBT_BOOL:
+          return ReadUInt64(data_, parent_width_);
+        default:
+          // Convert other things to uint.
+          return 0;
+      }
+  }
+
+  uint32_t AsUInt32() const { return static_cast<uint32_t>(AsUInt64()); }
+  uint16_t AsUInt16() const { return static_cast<uint16_t>(AsUInt64()); }
+  uint8_t AsUInt8() const { return static_cast<uint8_t>(AsUInt64()); }
+
+  double AsDouble() const
+  {
+    if (type_ == FBT_FLOAT)
+    {
+      // A fast path for the common case.
+      return ReadDouble(data_, parent_width_);
+    }
+    else
+      switch (type_)
+      {
+        case FBT_INDIRECT_FLOAT:
+          return ReadDouble(Indirect(), byte_width_);
+        case FBT_INT:
+          return static_cast<double>(ReadInt64(data_, parent_width_));
+        case FBT_UINT:
+          return static_cast<double>(ReadUInt64(data_, parent_width_));
+        case FBT_INDIRECT_INT:
+          return static_cast<double>(ReadInt64(Indirect(), byte_width_));
+        case FBT_INDIRECT_UINT:
+          return static_cast<double>(ReadUInt64(Indirect(), byte_width_));
+        case FBT_NULL:
+          return 0.0;
+        case FBT_STRING:
+        {
+          double d;
+          flatbuffers::StringToNumber(AsString().c_str(), &d);
+          return d;
+        }
+        case FBT_VECTOR:
+          return static_cast<double>(AsVector().size());
+        case FBT_BOOL:
+          return static_cast<double>(ReadUInt64(data_, parent_width_));
+        default:
+          // Convert strings and other things to float.
+          return 0;
+      }
+  }
+
+  float AsFloat() const { return static_cast<float>(AsDouble()); }
+
+  const char *AsKey() const
+  {
+    if (type_ == FBT_KEY || type_ == FBT_STRING)
+    {
+      return reinterpret_cast<const char *>(Indirect());
+    }
+    else
+    {
+      return "";
+    }
+  }
+
+  // This function returns the empty string if you try to read something that
+  // is not a string or key.
+  String AsString() const
+  {
+    if (type_ == FBT_STRING)
+    {
+      return String(Indirect(), byte_width_);
+    }
+    else if (type_ == FBT_KEY)
+    {
+      auto key = Indirect();
+      return String(key, byte_width_, strlen(reinterpret_cast<const char *>(key)));
+    }
+    else
+    {
+      return String::EmptyString();
+    }
+  }
+
+  // Unlike AsString(), this will convert any type to a std::string.
+  std::string ToString() const
+  {
+    std::string s;
+    ToString(false, false, s);
+    return s;
+  }
+
+  // Convert any type to a JSON-like string. strings_quoted determines if
+  // string values at the top level receive "" quotes (inside other values
+  // they always do). keys_quoted determines if keys are quoted, at any level.
+  // TODO(wvo): add further options to have indentation/newlines.
+  void ToString(bool strings_quoted, bool keys_quoted, std::string &s) const
+  {
+    if (type_ == FBT_STRING)
+    {
+      String str(Indirect(), byte_width_);
+      if (strings_quoted)
+      {
+        flatbuffers::EscapeString(str.c_str(), str.length(), &s, true, false);
+      }
+      else
+      {
+        s.append(str.c_str(), str.length());
+      }
+    }
+    else if (IsKey())
+    {
+      auto str = AsKey();
+      if (keys_quoted)
+      {
+        flatbuffers::EscapeString(str, strlen(str), &s, true, false);
+      }
+      else
+      {
+        s += str;
+      }
+    }
+    else if (IsInt())
+    {
+      s += flatbuffers::NumToString(AsInt64());
+    }
+    else if (IsUInt())
+    {
+      s += flatbuffers::NumToString(AsUInt64());
+    }
+    else if (IsFloat())
+    {
+      s += flatbuffers::NumToString(AsDouble());
+    }
+    else if (IsNull())
+    {
+      s += "null";
+    }
+    else if (IsBool())
+    {
+      s += AsBool() ? "true" : "false";
+    }
+    else if (IsMap())
+    {
+      s += "{ ";
+      auto m = AsMap();
+      auto keys = m.Keys();
+      auto vals = m.Values();
+      for (size_t i = 0; i < keys.size(); i++)
+      {
+        keys[i].ToString(true, keys_quoted, s);
+        s += ": ";
+        vals[i].ToString(true, keys_quoted, s);
+        if (i < keys.size() - 1)
+          s += ", ";
+      }
+      s += " }";
+    }
+    else if (IsVector())
+    {
+      AppendToString<Vector>(s, AsVector(), keys_quoted);
+    }
+    else if (IsTypedVector())
+    {
+      AppendToString<TypedVector>(s, AsTypedVector(), keys_quoted);
+    }
+    else if (IsFixedTypedVector())
+    {
+      AppendToString<FixedTypedVector>(s, AsFixedTypedVector(), keys_quoted);
+    }
+    else if (IsBlob())
+    {
+      auto blob = AsBlob();
+      flatbuffers::EscapeString(reinterpret_cast<const char *>(blob.data()), blob.size(), &s, true,
+                                false);
+    }
+    else
+    {
+      s += "(?)";
+    }
+  }
+
+  // This function returns the empty blob if you try to read a not-blob.
+  // Strings can be viewed as blobs too.
+  Blob AsBlob() const
+  {
+    if (type_ == FBT_BLOB || type_ == FBT_STRING)
+    {
+      return Blob(Indirect(), byte_width_);
+    }
+    else
+    {
+      return Blob::EmptyBlob();
+    }
+  }
+
+  // This function returns the empty vector if you try to read a not-vector.
+  // Maps can be viewed as vectors too.
+  Vector AsVector() const
+  {
+    if (type_ == FBT_VECTOR || type_ == FBT_MAP)
+    {
+      return Vector(Indirect(), byte_width_);
+    }
+    else
+    {
+      return Vector::EmptyVector();
+    }
+  }
+
+  TypedVector AsTypedVector() const
+  {
+    if (IsTypedVector())
+    {
+      auto tv = TypedVector(Indirect(), byte_width_, ToTypedVectorElementType(type_));
+      if (tv.type_ == FBT_STRING)
+      {
+        // These can't be accessed as strings, since we don't know the bit-width
+        // of the size field, see the declaration of
+        // FBT_VECTOR_STRING_DEPRECATED above for details.
+        // We change the type here to be keys, which are a subtype of strings,
+        // and will ignore the size field. This will truncate strings with
+        // embedded nulls.
+        tv.type_ = FBT_KEY;
+      }
+      return tv;
+    }
+    else
+    {
+      return TypedVector::EmptyTypedVector();
+    }
+  }
+
+  FixedTypedVector AsFixedTypedVector() const
+  {
+    if (IsFixedTypedVector())
+    {
+      uint8_t len = 0;
+      auto vtype = ToFixedTypedVectorElementType(type_, &len);
+      return FixedTypedVector(Indirect(), byte_width_, vtype, len);
+    }
+    else
+    {
+      return FixedTypedVector::EmptyFixedTypedVector();
+    }
+  }
+
+  Map AsMap() const
+  {
+    if (type_ == FBT_MAP)
+    {
+      return Map(Indirect(), byte_width_);
+    }
+    else
+    {
+      return Map::EmptyMap();
+    }
+  }
+
+  template <typename T> T As() const;
+
+  // Experimental: Mutation functions.
+  // These allow scalars in an already created buffer to be updated in-place.
+  // Since by default scalars are stored in the smallest possible space,
+  // the new value may not fit, in which case these functions return false.
+  // To avoid this, you can construct the values you intend to mutate using
+  // Builder::ForceMinimumBitWidth.
+  bool MutateInt(int64_t i)
+  {
+    if (type_ == FBT_INT)
+    {
+      return Mutate(data_, i, parent_width_, WidthI(i));
+    }
+    else if (type_ == FBT_INDIRECT_INT)
+    {
+      return Mutate(Indirect(), i, byte_width_, WidthI(i));
+    }
+    else if (type_ == FBT_UINT)
+    {
+      auto u = static_cast<uint64_t>(i);
+      return Mutate(data_, u, parent_width_, WidthU(u));
+    }
+    else if (type_ == FBT_INDIRECT_UINT)
+    {
+      auto u = static_cast<uint64_t>(i);
+      return Mutate(Indirect(), u, byte_width_, WidthU(u));
+    }
+    else
+    {
+      return false;
+    }
+  }
+
+  bool MutateBool(bool b)
+  {
+    return type_ == FBT_BOOL && Mutate(data_, b, parent_width_, BIT_WIDTH_8);
+  }
+
+  bool MutateUInt(uint64_t u)
+  {
+    if (type_ == FBT_UINT)
+    {
+      return Mutate(data_, u, parent_width_, WidthU(u));
+    }
+    else if (type_ == FBT_INDIRECT_UINT)
+    {
+      return Mutate(Indirect(), u, byte_width_, WidthU(u));
+    }
+    else if (type_ == FBT_INT)
+    {
+      auto i = static_cast<int64_t>(u);
+      return Mutate(data_, i, parent_width_, WidthI(i));
+    }
+    else if (type_ == FBT_INDIRECT_INT)
+    {
+      auto i = static_cast<int64_t>(u);
+      return Mutate(Indirect(), i, byte_width_, WidthI(i));
+    }
+    else
+    {
+      return false;
+    }
+  }
+
+  bool MutateFloat(float f)
+  {
+    if (type_ == FBT_FLOAT)
+    {
+      return MutateF(data_, f, parent_width_, BIT_WIDTH_32);
+    }
+    else if (type_ == FBT_INDIRECT_FLOAT)
+    {
+      return MutateF(Indirect(), f, byte_width_, BIT_WIDTH_32);
+    }
+    else
+    {
+      return false;
+    }
+  }
+
+  bool MutateFloat(double d)
+  {
+    if (type_ == FBT_FLOAT)
+    {
+      return MutateF(data_, d, parent_width_, WidthF(d));
+    }
+    else if (type_ == FBT_INDIRECT_FLOAT)
+    {
+      return MutateF(Indirect(), d, byte_width_, WidthF(d));
+    }
+    else
+    {
+      return false;
+    }
+  }
+
+  bool MutateString(const char *str, size_t len)
+  {
+    auto s = AsString();
+    if (s.IsTheEmptyString())
+      return false;
+    // This is very strict, could allow shorter strings, but that creates
+    // garbage.
+    if (s.length() != len)
+      return false;
+    memcpy(const_cast<char *>(s.c_str()), str, len);
+    return true;
+  }
+  bool MutateString(const char *str) { return MutateString(str, strlen(str)); }
+  bool MutateString(const std::string &str) { return MutateString(str.data(), str.length()); }
+
+private:
+  const uint8_t *Indirect() const { return flexbuffers::Indirect(data_, parent_width_); }
+
+  template <typename T>
+  bool Mutate(const uint8_t *dest, T t, size_t byte_width, BitWidth value_width)
+  {
+    auto fits = static_cast<size_t>(static_cast<size_t>(1U) << value_width) <= byte_width;
+    if (fits)
+    {
+      t = flatbuffers::EndianScalar(t);
+      memcpy(const_cast<uint8_t *>(dest), &t, byte_width);
+    }
+    return fits;
+  }
+
+  template <typename T>
+  bool MutateF(const uint8_t *dest, T t, size_t byte_width, BitWidth value_width)
+  {
+    if (byte_width == sizeof(double))
+      return Mutate(dest, static_cast<double>(t), byte_width, value_width);
+    if (byte_width == sizeof(float))
+      return Mutate(dest, static_cast<float>(t), byte_width, value_width);
+    FLATBUFFERS_ASSERT(false);
+    return false;
+  }
+
+  const uint8_t *data_;
+  uint8_t parent_width_;
+  uint8_t byte_width_;
+  Type type_;
+};
+
+// Template specialization for As().
+template <> inline bool Reference::As<bool>() const { return AsBool(); }
+
+template <> inline int8_t Reference::As<int8_t>() const { return AsInt8(); }
+template <> inline int16_t Reference::As<int16_t>() const { return AsInt16(); }
+template <> inline int32_t Reference::As<int32_t>() const { return AsInt32(); }
+template <> inline int64_t Reference::As<int64_t>() const { return AsInt64(); }
+
+template <> inline uint8_t Reference::As<uint8_t>() const { return AsUInt8(); }
+template <> inline uint16_t Reference::As<uint16_t>() const { return AsUInt16(); }
+template <> inline uint32_t Reference::As<uint32_t>() const { return AsUInt32(); }
+template <> inline uint64_t Reference::As<uint64_t>() const { return AsUInt64(); }
+
+template <> inline double Reference::As<double>() const { return AsDouble(); }
+template <> inline float Reference::As<float>() const { return AsFloat(); }
+
+template <> inline String Reference::As<String>() const { return AsString(); }
+template <> inline std::string Reference::As<std::string>() const { return AsString().str(); }
+
+template <> inline Blob Reference::As<Blob>() const { return AsBlob(); }
+template <> inline Vector Reference::As<Vector>() const { return AsVector(); }
+template <> inline TypedVector Reference::As<TypedVector>() const { return AsTypedVector(); }
+template <> inline FixedTypedVector Reference::As<FixedTypedVector>() const
+{
+  return AsFixedTypedVector();
+}
+template <> inline Map Reference::As<Map>() const { return AsMap(); }
+
+inline uint8_t PackedType(BitWidth bit_width, Type type)
+{
+  return static_cast<uint8_t>(bit_width | (type << 2));
+}
+
+inline uint8_t NullPackedType() { return PackedType(BIT_WIDTH_8, FBT_NULL); }
+
+// Vector accessors.
+// Note: if you try to access outside of bounds, you get a Null value back
+// instead. Normally this would be an assert, but since this is "dynamically
+// typed" data, you may not want that (someone sends you a 2d vector and you
+// wanted 3d).
+// The Null converts seamlessly into a default value for any other type.
+// TODO(wvo): Could introduce an #ifdef that makes this into an assert?
+inline Reference Vector::operator[](size_t i) const
+{
+  auto len = size();
+  if (i >= len)
+    return Reference(nullptr, 1, NullPackedType());
+  auto packed_type = (data_ + len * byte_width_)[i];
+  auto elem = data_ + i * byte_width_;
+  return Reference(elem, byte_width_, packed_type);
+}
+
+inline Reference TypedVector::operator[](size_t i) const
+{
+  auto len = size();
+  if (i >= len)
+    return Reference(nullptr, 1, NullPackedType());
+  auto elem = data_ + i * byte_width_;
+  return Reference(elem, byte_width_, 1, type_);
+}
+
+inline Reference FixedTypedVector::operator[](size_t i) const
+{
+  if (i >= len_)
+    return Reference(nullptr, 1, NullPackedType());
+  auto elem = data_ + i * byte_width_;
+  return Reference(elem, byte_width_, 1, type_);
+}
+
+template <typename T> int KeyCompare(const void *key, const void *elem)
+{
+  auto str_elem =
+    reinterpret_cast<const char *>(Indirect<T>(reinterpret_cast<const uint8_t *>(elem)));
+  auto skey = reinterpret_cast<const char *>(key);
+  return strcmp(skey, str_elem);
+}
+
+inline Reference Map::operator[](const char *key) const
+{
+  auto keys = Keys();
+  // We can't pass keys.byte_width_ to the comparison function, so we have
+  // to pick the right one ahead of time.
+  int (*comp)(const void *, const void *) = nullptr;
+  switch (keys.byte_width_)
+  {
+    case 1:
+      comp = KeyCompare<uint8_t>;
+      break;
+    case 2:
+      comp = KeyCompare<uint16_t>;
+      break;
+    case 4:
+      comp = KeyCompare<uint32_t>;
+      break;
+    case 8:
+      comp = KeyCompare<uint64_t>;
+      break;
+  }
+  auto res = std::bsearch(key, keys.data_, keys.size(), keys.byte_width_, comp);
+  if (!res)
+    return Reference(nullptr, 1, NullPackedType());
+  auto i = (reinterpret_cast<uint8_t *>(res) - keys.data_) / keys.byte_width_;
+  return (*static_cast<const Vector *>(this))[i];
+}
+
+inline Reference Map::operator[](const std::string &key) const { return (*this)[key.c_str()]; }
+
+inline Reference GetRoot(const uint8_t *buffer, size_t size)
+{
+  // See Finish() below for the serialization counterpart of this.
+  // The root starts at the end of the buffer, so we parse backwards from there.
+  auto end = buffer + size;
+  auto byte_width = *--end;
+  auto packed_type = *--end;
+  end -= byte_width; // The root data item.
+  return Reference(end, byte_width, packed_type);
+}
+
+inline Reference GetRoot(const std::vector<uint8_t> &buffer)
+{
+  return GetRoot(flatbuffers::vector_data(buffer), buffer.size());
+}
+
+// Flags that configure how the Builder behaves.
+// The "Share" flags determine if the Builder automatically tries to pool
+// this type. Pooling can reduce the size of serialized data if there are
+// multiple maps of the same kind, at the expense of slightly slower
+// serialization (the cost of lookups) and more memory use (std::set).
+// By default this is on for keys, but off for strings.
+// Turn keys off if you have e.g. only one map.
+// Turn strings on if you expect many non-unique string values.
+// Additionally, sharing key vectors can save space if you have maps with
+// identical field populations.
+enum BuilderFlag
+{
+  BUILDER_FLAG_NONE = 0,
+  BUILDER_FLAG_SHARE_KEYS = 1,
+  BUILDER_FLAG_SHARE_STRINGS = 2,
+  BUILDER_FLAG_SHARE_KEYS_AND_STRINGS = 3,
+  BUILDER_FLAG_SHARE_KEY_VECTORS = 4,
+  BUILDER_FLAG_SHARE_ALL = 7,
+};
+
+class Builder FLATBUFFERS_FINAL_CLASS
+{
+public:
+  Builder(size_t initial_size = 256, BuilderFlag flags = BUILDER_FLAG_SHARE_KEYS)
+    : buf_(initial_size), finished_(false), has_duplicate_keys_(false), flags_(flags),
+      force_min_bit_width_(BIT_WIDTH_8), key_pool(KeyOffsetCompare(buf_)),
+      string_pool(StringOffsetCompare(buf_))
+  {
+    buf_.clear();
+  }
+
+#ifdef FLATBUFFERS_DEFAULT_DECLARATION
+  Builder(Builder &&) = default;
+  Builder &operator=(Builder &&) = default;
+#endif
+
+  /// @brief Get the serialized buffer (after you call `Finish()`).
+  /// @return Returns a vector owned by this class.
+  const std::vector<uint8_t> &GetBuffer() const
+  {
+    Finished();
+    return buf_;
+  }
+
+  // Size of the buffer. Does not include unfinished values.
+  size_t GetSize() const { return buf_.size(); }
+
+  // Reset all state so we can re-use the buffer.
+  void Clear()
+  {
+    buf_.clear();
+    stack_.clear();
+    finished_ = false;
+    // flags_ remains as-is;
+    force_min_bit_width_ = BIT_WIDTH_8;
+    key_pool.clear();
+    string_pool.clear();
+  }
+
+  // All value constructing functions below have two versions: one that
+  // takes a key (for placement inside a map) and one that doesn't (for inside
+  // vectors and elsewhere).
+
+  void Null() { stack_.push_back(Value()); }
+  void Null(const char *key)
+  {
+    Key(key);
+    Null();
+  }
+
+  void Int(int64_t i) { stack_.push_back(Value(i, FBT_INT, WidthI(i))); }
+  void Int(const char *key, int64_t i)
+  {
+    Key(key);
+    Int(i);
+  }
+
+  void UInt(uint64_t u) { stack_.push_back(Value(u, FBT_UINT, WidthU(u))); }
+  void UInt(const char *key, uint64_t u)
+  {
+    Key(key);
+    UInt(u);
+  }
+
+  void Float(float f) { stack_.push_back(Value(f)); }
+  void Float(const char *key, float f)
+  {
+    Key(key);
+    Float(f);
+  }
+
+  void Double(double f) { stack_.push_back(Value(f)); }
+  void Double(const char *key, double d)
+  {
+    Key(key);
+    Double(d);
+  }
+
+  void Bool(bool b) { stack_.push_back(Value(b)); }
+  void Bool(const char *key, bool b)
+  {
+    Key(key);
+    Bool(b);
+  }
+
+  void IndirectInt(int64_t i) { PushIndirect(i, FBT_INDIRECT_INT, WidthI(i)); }
+  void IndirectInt(const char *key, int64_t i)
+  {
+    Key(key);
+    IndirectInt(i);
+  }
+
+  void IndirectUInt(uint64_t u) { PushIndirect(u, FBT_INDIRECT_UINT, WidthU(u)); }
+  void IndirectUInt(const char *key, uint64_t u)
+  {
+    Key(key);
+    IndirectUInt(u);
+  }
+
+  void IndirectFloat(float f) { PushIndirect(f, FBT_INDIRECT_FLOAT, BIT_WIDTH_32); }
+  void IndirectFloat(const char *key, float f)
+  {
+    Key(key);
+    IndirectFloat(f);
+  }
+
+  void IndirectDouble(double f) { PushIndirect(f, FBT_INDIRECT_FLOAT, WidthF(f)); }
+  void IndirectDouble(const char *key, double d)
+  {
+    Key(key);
+    IndirectDouble(d);
+  }
+
+  size_t Key(const char *str, size_t len)
+  {
+    auto sloc = buf_.size();
+    WriteBytes(str, len + 1);
+    if (flags_ & BUILDER_FLAG_SHARE_KEYS)
+    {
+      auto it = key_pool.find(sloc);
+      if (it != key_pool.end())
+      {
+        // Already in the buffer. Remove key we just serialized, and use
+        // existing offset instead.
+        buf_.resize(sloc);
+        sloc = *it;
+      }
+      else
+      {
+        key_pool.insert(sloc);
+      }
+    }
+    stack_.push_back(Value(static_cast<uint64_t>(sloc), FBT_KEY, BIT_WIDTH_8));
+    return sloc;
+  }
+
+  size_t Key(const char *str) { return Key(str, strlen(str)); }
+  size_t Key(const std::string &str) { return Key(str.c_str(), str.size()); }
+
+  size_t String(const char *str, size_t len)
+  {
+    auto reset_to = buf_.size();
+    auto sloc = CreateBlob(str, len, 1, FBT_STRING);
+    if (flags_ & BUILDER_FLAG_SHARE_STRINGS)
+    {
+      StringOffset so(sloc, len);
+      auto it = string_pool.find(so);
+      if (it != string_pool.end())
+      {
+        // Already in the buffer. Remove string we just serialized, and use
+        // existing offset instead.
+        buf_.resize(reset_to);
+        sloc = it->first;
+        stack_.back().u_ = sloc;
+      }
+      else
+      {
+        string_pool.insert(so);
+      }
+    }
+    return sloc;
+  }
+  size_t String(const char *str) { return String(str, strlen(str)); }
+  size_t String(const std::string &str) { return String(str.c_str(), str.size()); }
+  void String(const flexbuffers::String &str) { String(str.c_str(), str.length()); }
+
+  void String(const char *key, const char *str)
+  {
+    Key(key);
+    String(str);
+  }
+  void String(const char *key, const std::string &str)
+  {
+    Key(key);
+    String(str);
+  }
+  void String(const char *key, const flexbuffers::String &str)
+  {
+    Key(key);
+    String(str);
+  }
+
+  size_t Blob(const void *data, size_t len) { return CreateBlob(data, len, 0, FBT_BLOB); }
+  size_t Blob(const std::vector<uint8_t> &v)
+  {
+    return CreateBlob(flatbuffers::vector_data(v), v.size(), 0, FBT_BLOB);
+  }
+
+  // TODO(wvo): support all the FlexBuffer types (like flexbuffers::String),
+  // e.g. Vector etc. Also in overloaded versions.
+  // Also some FlatBuffers types?
+
+  size_t StartVector() { return stack_.size(); }
+  size_t StartVector(const char *key)
+  {
+    Key(key);
+    return stack_.size();
+  }
+  size_t StartMap() { return stack_.size(); }
+  size_t StartMap(const char *key)
+  {
+    Key(key);
+    return stack_.size();
+  }
+
+  // TODO(wvo): allow this to specify an aligment greater than the natural
+  // alignment.
+  size_t EndVector(size_t start, bool typed, bool fixed)
+  {
+    auto vec = CreateVector(start, stack_.size() - start, 1, typed, fixed);
+    // Remove temp elements and return vector.
+    stack_.resize(start);
+    stack_.push_back(vec);
+    return static_cast<size_t>(vec.u_);
+  }
+
+  size_t EndMap(size_t start)
+  {
+    // We should have interleaved keys and values on the stack.
+    // Make sure it is an even number:
+    auto len = stack_.size() - start;
+    FLATBUFFERS_ASSERT(!(len & 1));
+    len /= 2;
+    // Make sure keys are all strings:
+    for (auto key = start; key < stack_.size(); key += 2)
+    {
+      FLATBUFFERS_ASSERT(stack_[key].type_ == FBT_KEY);
+    }
+    // Now sort values, so later we can do a binary search lookup.
+    // We want to sort 2 array elements at a time.
+    struct TwoValue
+    {
+      Value key;
+      Value val;
+    };
+    // TODO(wvo): strict aliasing?
+    // TODO(wvo): allow the caller to indicate the data is already sorted
+    // for maximum efficiency? With an assert to check sortedness to make sure
+    // we're not breaking binary search.
+    // Or, we can track if the map is sorted as keys are added which would be
+    // be quite cheap (cheaper than checking it here), so we can skip this
+    // step automatically when appliccable, and encourage people to write in
+    // sorted fashion.
+    // std::sort is typically already a lot faster on sorted data though.
+    auto dict = reinterpret_cast<TwoValue *>(flatbuffers::vector_data(stack_) + start);
+    std::sort(dict, dict + len, [&](const TwoValue &a, const TwoValue &b) -> bool {
+      auto as = reinterpret_cast<const char *>(flatbuffers::vector_data(buf_) + a.key.u_);
+      auto bs = reinterpret_cast<const char *>(flatbuffers::vector_data(buf_) + b.key.u_);
+      auto comp = strcmp(as, bs);
+      // We want to disallow duplicate keys, since this results in a
+      // map where values cannot be found.
+      // But we can't assert here (since we don't want to fail on
+      // random JSON input) or have an error mechanism.
+      // Instead, we set has_duplicate_keys_ in the builder to
+      // signal this.
+      // TODO: Have to check for pointer equality, as some sort
+      // implementation apparently call this function with the same
+      // element?? Why?
+      if (!comp && &a != &b)
+        has_duplicate_keys_ = true;
+      return comp < 0;
+    });
+    // First create a vector out of all keys.
+    // TODO(wvo): if kBuilderFlagShareKeyVectors is true, see if we can share
+    // the first vector.
+    auto keys = CreateVector(start, len, 2, true, false);
+    auto vec = CreateVector(start + 1, len, 2, false, false, &keys);
+    // Remove temp elements and return map.
+    stack_.resize(start);
+    stack_.push_back(vec);
+    return static_cast<size_t>(vec.u_);
+  }
+
+  // Call this after EndMap to see if the map had any duplicate keys.
+  // Any map with such keys won't be able to retrieve all values.
+  bool HasDuplicateKeys() const { return has_duplicate_keys_; }
+
+  template <typename F> size_t Vector(F f)
+  {
+    auto start = StartVector();
+    f();
+    return EndVector(start, false, false);
+  }
+  template <typename F, typename T> size_t Vector(F f, T &state)
+  {
+    auto start = StartVector();
+    f(state);
+    return EndVector(start, false, false);
+  }
+  template <typename F> size_t Vector(const char *key, F f)
+  {
+    auto start = StartVector(key);
+    f();
+    return EndVector(start, false, false);
+  }
+  template <typename F, typename T> size_t Vector(const char *key, F f, T &state)
+  {
+    auto start = StartVector(key);
+    f(state);
+    return EndVector(start, false, false);
+  }
+
+  template <typename T> void Vector(const T *elems, size_t len)
+  {
+    if (flatbuffers::is_scalar<T>::value)
+    {
+      // This path should be a lot quicker and use less space.
+      ScalarVector(elems, len, false);
+    }
+    else
+    {
+      auto start = StartVector();
+      for (size_t i = 0; i < len; i++)
+        Add(elems[i]);
+      EndVector(start, false, false);
+    }
+  }
+  template <typename T> void Vector(const char *key, const T *elems, size_t len)
+  {
+    Key(key);
+    Vector(elems, len);
+  }
+  template <typename T> void Vector(const std::vector<T> &vec)
+  {
+    Vector(flatbuffers::vector_data(vec), vec.size());
+  }
+
+  template <typename F> size_t TypedVector(F f)
+  {
+    auto start = StartVector();
+    f();
+    return EndVector(start, true, false);
+  }
+  template <typename F, typename T> size_t TypedVector(F f, T &state)
+  {
+    auto start = StartVector();
+    f(state);
+    return EndVector(start, true, false);
+  }
+  template <typename F> size_t TypedVector(const char *key, F f)
+  {
+    auto start = StartVector(key);
+    f();
+    return EndVector(start, true, false);
+  }
+  template <typename F, typename T> size_t TypedVector(const char *key, F f, T &state)
+  {
+    auto start = StartVector(key);
+    f(state);
+    return EndVector(start, true, false);
+  }
+
+  template <typename T> size_t FixedTypedVector(const T *elems, size_t len)
+  {
+    // We only support a few fixed vector lengths. Anything bigger use a
+    // regular typed vector.
+    FLATBUFFERS_ASSERT(len >= 2 && len <= 4);
+    // And only scalar values.
+    static_assert(flatbuffers::is_scalar<T>::value, "Unrelated types");
+    return ScalarVector(elems, len, true);
+  }
+
+  template <typename T> size_t FixedTypedVector(const char *key, const T *elems, size_t len)
+  {
+    Key(key);
+    return FixedTypedVector(elems, len);
+  }
+
+  template <typename F> size_t Map(F f)
+  {
+    auto start = StartMap();
+    f();
+    return EndMap(start);
+  }
+  template <typename F, typename T> size_t Map(F f, T &state)
+  {
+    auto start = StartMap();
+    f(state);
+    return EndMap(start);
+  }
+  template <typename F> size_t Map(const char *key, F f)
+  {
+    auto start = StartMap(key);
+    f();
+    return EndMap(start);
+  }
+  template <typename F, typename T> size_t Map(const char *key, F f, T &state)
+  {
+    auto start = StartMap(key);
+    f(state);
+    return EndMap(start);
+  }
+  template <typename T> void Map(const std::map<std::string, T> &map)
+  {
+    auto start = StartMap();
+    for (auto it = map.begin(); it != map.end(); ++it)
+      Add(it->first.c_str(), it->second);
+    EndMap(start);
+  }
+
+  // If you wish to share a value explicitly (a value not shared automatically
+  // through one of the BUILDER_FLAG_SHARE_* flags) you can do so with these
+  // functions. Or if you wish to turn those flags off for performance reasons
+  // and still do some explicit sharing. For example:
+  // builder.IndirectDouble(M_PI);
+  // auto id = builder.LastValue();  // Remember where we stored it.
+  // .. more code goes here ..
+  // builder.ReuseValue(id);  // Refers to same double by offset.
+  // LastValue works regardless of whether the value has a key or not.
+  // Works on any data type.
+  struct Value;
+  Value LastValue() { return stack_.back(); }
+  void ReuseValue(Value v) { stack_.push_back(v); }
+  void ReuseValue(const char *key, Value v)
+  {
+    Key(key);
+    ReuseValue(v);
+  }
+
+  // Overloaded Add that tries to call the correct function above.
+  void Add(int8_t i) { Int(i); }
+  void Add(int16_t i) { Int(i); }
+  void Add(int32_t i) { Int(i); }
+  void Add(int64_t i) { Int(i); }
+  void Add(uint8_t u) { UInt(u); }
+  void Add(uint16_t u) { UInt(u); }
+  void Add(uint32_t u) { UInt(u); }
+  void Add(uint64_t u) { UInt(u); }
+  void Add(float f) { Float(f); }
+  void Add(double d) { Double(d); }
+  void Add(bool b) { Bool(b); }
+  void Add(const char *str) { String(str); }
+  void Add(const std::string &str) { String(str); }
+  void Add(const flexbuffers::String &str) { String(str); }
+
+  template <typename T> void Add(const std::vector<T> &vec) { Vector(vec); }
+
+  template <typename T> void Add(const char *key, const T &t)
+  {
+    Key(key);
+    Add(t);
+  }
+
+  template <typename T> void Add(const std::map<std::string, T> &map) { Map(map); }
+
+  template <typename T> void operator+=(const T &t) { Add(t); }
+
+  // This function is useful in combination with the Mutate* functions above.
+  // It forces elements of vectors and maps to have a minimum size, such that
+  // they can later be updated without failing.
+  // Call with no arguments to reset.
+  void ForceMinimumBitWidth(BitWidth bw = BIT_WIDTH_8) { force_min_bit_width_ = bw; }
+
+  void Finish()
+  {
+    // If you hit this assert, you likely have objects that were never included
+    // in a parent. You need to have exactly one root to finish a buffer.
+    // Check your Start/End calls are matched, and all objects are inside
+    // some other object.
+    FLATBUFFERS_ASSERT(stack_.size() == 1);
+
+    // Write root value.
+    auto byte_width = Align(stack_[0].ElemWidth(buf_.size(), 0));
+    WriteAny(stack_[0], byte_width);
+    // Write root type.
+    Write(stack_[0].StoredPackedType(), 1);
+    // Write root size. Normally determined by parent, but root has no parent :)
+    Write(byte_width, 1);
+
+    finished_ = true;
+  }
+
+private:
+  void Finished() const
+  {
+    // If you get this assert, you're attempting to get access a buffer
+    // which hasn't been finished yet. Be sure to call
+    // Builder::Finish with your root object.
+    FLATBUFFERS_ASSERT(finished_);
+  }
+
+  // Align to prepare for writing a scalar with a certain size.
+  uint8_t Align(BitWidth alignment)
+  {
+    auto byte_width = 1U << alignment;
+    buf_.insert(buf_.end(), flatbuffers::PaddingBytes(buf_.size(), byte_width), 0);
+    return static_cast<uint8_t>(byte_width);
+  }
+
+  void WriteBytes(const void *val, size_t size)
+  {
+    buf_.insert(buf_.end(), reinterpret_cast<const uint8_t *>(val),
+                reinterpret_cast<const uint8_t *>(val) + size);
+  }
+
+  template <typename T> void Write(T val, size_t byte_width)
+  {
+    FLATBUFFERS_ASSERT(sizeof(T) >= byte_width);
+    val = flatbuffers::EndianScalar(val);
+    WriteBytes(&val, byte_width);
+  }
+
+  void WriteDouble(double f, uint8_t byte_width)
+  {
+    switch (byte_width)
+    {
+      case 8:
+        Write(f, byte_width);
+        break;
+      case 4:
+        Write(static_cast<float>(f), byte_width);
+        break;
+      // case 2: Write(static_cast<half>(f), byte_width); break;
+      // case 1: Write(static_cast<quarter>(f), byte_width); break;
+      default:
+        FLATBUFFERS_ASSERT(0);
+    }
+  }
+
+  void WriteOffset(uint64_t o, uint8_t byte_width)
+  {
+    auto reloff = buf_.size() - o;
+    FLATBUFFERS_ASSERT(byte_width == 8 || reloff < 1ULL << (byte_width * 8));
+    Write(reloff, byte_width);
+  }
+
+  template <typename T> void PushIndirect(T val, Type type, BitWidth bit_width)
+  {
+    auto byte_width = Align(bit_width);
+    auto iloc = buf_.size();
+    Write(val, byte_width);
+    stack_.push_back(Value(static_cast<uint64_t>(iloc), type, bit_width));
+  }
+
+  static BitWidth WidthB(size_t byte_width)
+  {
+    switch (byte_width)
+    {
+      case 1:
+        return BIT_WIDTH_8;
+      case 2:
+        return BIT_WIDTH_16;
+      case 4:
+        return BIT_WIDTH_32;
+      case 8:
+        return BIT_WIDTH_64;
+      default:
+        FLATBUFFERS_ASSERT(false);
+        return BIT_WIDTH_64;
+    }
+  }
+
+  template <typename T> static Type GetScalarType()
+  {
+    static_assert(flatbuffers::is_scalar<T>::value, "Unrelated types");
+    return flatbuffers::is_floating_point<T>::value
+             ? FBT_FLOAT
+             : flatbuffers::is_same<T, bool>::value
+                 ? FBT_BOOL
+                 : (flatbuffers::is_unsigned<T>::value ? FBT_UINT : FBT_INT);
+  }
+
+public:
+  // This was really intended to be private, except for LastValue/ReuseValue.
+  struct Value
+  {
+    union {
+      int64_t i_;
+      uint64_t u_;
+      double f_;
+    };
+
+    Type type_;
+
+    // For scalars: of itself, for vector: of its elements, for string: length.
+    BitWidth min_bit_width_;
+
+    Value() : i_(0), type_(FBT_NULL), min_bit_width_(BIT_WIDTH_8) {}
+
+    Value(bool b) : u_(static_cast<uint64_t>(b)), type_(FBT_BOOL), min_bit_width_(BIT_WIDTH_8) {}
+
+    Value(int64_t i, Type t, BitWidth bw) : i_(i), type_(t), min_bit_width_(bw) {}
+    Value(uint64_t u, Type t, BitWidth bw) : u_(u), type_(t), min_bit_width_(bw) {}
+
+    Value(float f) : f_(static_cast<double>(f)), type_(FBT_FLOAT), min_bit_width_(BIT_WIDTH_32) {}
+    Value(double f) : f_(f), type_(FBT_FLOAT), min_bit_width_(WidthF(f)) {}
+
+    uint8_t StoredPackedType(BitWidth parent_bit_width_ = BIT_WIDTH_8) const
+    {
+      return PackedType(StoredWidth(parent_bit_width_), type_);
+    }
+
+    BitWidth ElemWidth(size_t buf_size, size_t elem_index) const
+    {
+      if (IsInline(type_))
+      {
+        return min_bit_width_;
+      }
+      else
+      {
+        // We have an absolute offset, but want to store a relative offset
+        // elem_index elements beyond the current buffer end. Since whether
+        // the relative offset fits in a certain byte_width depends on
+        // the size of the elements before it (and their alignment), we have
+        // to test for each size in turn.
+        for (size_t byte_width = 1; byte_width <= sizeof(flatbuffers::largest_scalar_t);
+             byte_width *= 2)
+        {
+          // Where are we going to write this offset?
+          auto offset_loc =
+            buf_size + flatbuffers::PaddingBytes(buf_size, byte_width) + elem_index * byte_width;
+          // Compute relative offset.
+          auto offset = offset_loc - u_;
+          // Does it fit?
+          auto bit_width = WidthU(offset);
+          if (static_cast<size_t>(static_cast<size_t>(1U) << bit_width) == byte_width)
+            return bit_width;
+        }
+        FLATBUFFERS_ASSERT(false); // Must match one of the sizes above.
+        return BIT_WIDTH_64;
+      }
+    }
+
+    BitWidth StoredWidth(BitWidth parent_bit_width_ = BIT_WIDTH_8) const
+    {
+      if (IsInline(type_))
+      {
+        return (std::max)(min_bit_width_, parent_bit_width_);
+      }
+      else
+      {
+        return min_bit_width_;
+      }
+    }
+  };
+
+private:
+  void WriteAny(const Value &val, uint8_t byte_width)
+  {
+    switch (val.type_)
+    {
+      case FBT_NULL:
+      case FBT_INT:
+        Write(val.i_, byte_width);
+        break;
+      case FBT_BOOL:
+      case FBT_UINT:
+        Write(val.u_, byte_width);
+        break;
+      case FBT_FLOAT:
+        WriteDouble(val.f_, byte_width);
+        break;
+      default:
+        WriteOffset(val.u_, byte_width);
+        break;
+    }
+  }
+
+  size_t CreateBlob(const void *data, size_t len, size_t trailing, Type type)
+  {
+    auto bit_width = WidthU(len);
+    auto byte_width = Align(bit_width);
+    Write<uint64_t>(len, byte_width);
+    auto sloc = buf_.size();
+    WriteBytes(data, len + trailing);
+    stack_.push_back(Value(static_cast<uint64_t>(sloc), type, bit_width));
+    return sloc;
+  }
+
+  template <typename T> size_t ScalarVector(const T *elems, size_t len, bool fixed)
+  {
+    auto vector_type = GetScalarType<T>();
+    auto byte_width = sizeof(T);
+    auto bit_width = WidthB(byte_width);
+    // If you get this assert, you're trying to write a vector with a size
+    // field that is bigger than the scalars you're trying to write (e.g. a
+    // byte vector > 255 elements). For such types, write a "blob" instead.
+    // TODO: instead of asserting, could write vector with larger elements
+    // instead, though that would be wasteful.
+    FLATBUFFERS_ASSERT(WidthU(len) <= bit_width);
+    Align(bit_width);
+    if (!fixed)
+      Write<uint64_t>(len, byte_width);
+    auto vloc = buf_.size();
+    for (size_t i = 0; i < len; i++)
+      Write(elems[i], byte_width);
+    stack_.push_back(
+      Value(static_cast<uint64_t>(vloc), ToTypedVector(vector_type, fixed ? len : 0), bit_width));
+    return vloc;
+  }
+
+  Value CreateVector(size_t start, size_t vec_len, size_t step, bool typed, bool fixed,
+                     const Value *keys = nullptr)
+  {
+    FLATBUFFERS_ASSERT(!fixed || typed); // typed=false, fixed=true combination is not supported.
+    // Figure out smallest bit width we can store this vector with.
+    auto bit_width = (std::max)(force_min_bit_width_, WidthU(vec_len));
+    auto prefix_elems = 1;
+    if (keys)
+    {
+      // If this vector is part of a map, we will pre-fix an offset to the keys
+      // to this vector.
+      bit_width = (std::max)(bit_width, keys->ElemWidth(buf_.size(), 0));
+      prefix_elems += 2;
+    }
+    Type vector_type = FBT_KEY;
+    // Check bit widths and types for all elements.
+    for (size_t i = start; i < stack_.size(); i += step)
+    {
+      auto elem_width = stack_[i].ElemWidth(buf_.size(), i - start + prefix_elems);
+      bit_width = (std::max)(bit_width, elem_width);
+      if (typed)
+      {
+        if (i == start)
+        {
+          vector_type = stack_[i].type_;
+        }
+        else
+        {
+          // If you get this assert, you are writing a typed vector with
+          // elements that are not all the same type.
+          FLATBUFFERS_ASSERT(vector_type == stack_[i].type_);
+        }
+      }
+    }
+    // If you get this assert, your fixed types are not one of:
+    // Int / UInt / Float / Key.
+    FLATBUFFERS_ASSERT(!fixed || IsTypedVectorElementType(vector_type));
+    auto byte_width = Align(bit_width);
+    // Write vector. First the keys width/offset if available, and size.
+    if (keys)
+    {
+      WriteOffset(keys->u_, byte_width);
+      Write<uint64_t>(1ULL << keys->min_bit_width_, byte_width);
+    }
+    if (!fixed)
+      Write<uint64_t>(vec_len, byte_width);
+    // Then the actual data.
+    auto vloc = buf_.size();
+    for (size_t i = start; i < stack_.size(); i += step)
+    {
+      WriteAny(stack_[i], byte_width);
+    }
+    // Then the types.
+    if (!typed)
+    {
+      for (size_t i = start; i < stack_.size(); i += step)
+      {
+        buf_.push_back(stack_[i].StoredPackedType(bit_width));
+      }
+    }
+    return Value(static_cast<uint64_t>(vloc),
+                 keys ? FBT_MAP
+                      : (typed ? ToTypedVector(vector_type, fixed ? vec_len : 0) : FBT_VECTOR),
+                 bit_width);
+  }
+
+  // You shouldn't really be copying instances of this class.
+  Builder(const Builder &);
+  Builder &operator=(const Builder &);
+
+  std::vector<uint8_t> buf_;
+  std::vector<Value> stack_;
+
+  bool finished_;
+  bool has_duplicate_keys_;
+
+  BuilderFlag flags_;
+
+  BitWidth force_min_bit_width_;
+
+  struct KeyOffsetCompare
+  {
+    explicit KeyOffsetCompare(const std::vector<uint8_t> &buf) : buf_(&buf) {}
+    bool operator()(size_t a, size_t b) const
+    {
+      auto stra = reinterpret_cast<const char *>(flatbuffers::vector_data(*buf_) + a);
+      auto strb = reinterpret_cast<const char *>(flatbuffers::vector_data(*buf_) + b);
+      return strcmp(stra, strb) < 0;
+    }
+    const std::vector<uint8_t> *buf_;
+  };
+
+  typedef std::pair<size_t, size_t> StringOffset;
+  struct StringOffsetCompare
+  {
+    explicit StringOffsetCompare(const std::vector<uint8_t> &buf) : buf_(&buf) {}
+    bool operator()(const StringOffset &a, const StringOffset &b) const
+    {
+      auto stra = reinterpret_cast<const char *>(flatbuffers::vector_data(*buf_) + a.first);
+      auto strb = reinterpret_cast<const char *>(flatbuffers::vector_data(*buf_) + b.first);
+      return strncmp(stra, strb, (std::min)(a.second, b.second) + 1) < 0;
+    }
+    const std::vector<uint8_t> *buf_;
+  };
+
+  typedef std::set<size_t, KeyOffsetCompare> KeyOffsetMap;
+  typedef std::set<StringOffset, StringOffsetCompare> StringOffsetMap;
+
+  KeyOffsetMap key_pool;
+  StringOffsetMap string_pool;
+};
+
+} // namespace flexbuffers
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#endif // FLATBUFFERS_FLEXBUFFERS_H_
diff --git a/onert-micro/externals/flatbuffers/grpc.h b/onert-micro/externals/flatbuffers/grpc.h
new file mode 100644
index 000000000..184c89e08
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/grpc.h
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2014 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_GRPC_H_
+#define FLATBUFFERS_GRPC_H_
+
+// Helper functionality to glue FlatBuffers and GRPC.
+
+#include "flatbuffers/flatbuffers.h"
+#include "grpc/byte_buffer_reader.h"
+#include "grpcpp/support/byte_buffer.h"
+
+namespace flatbuffers
+{
+namespace grpc
+{
+
+// Message is a typed wrapper around a buffer that manages the underlying
+// `grpc_slice` and also provides flatbuffers-specific helpers such as `Verify`
+// and `GetRoot`. Since it is backed by a `grpc_slice`, the underlying buffer
+// is refcounted and ownership is be managed automatically.
+template <class T> class Message
+{
+public:
+  Message() : slice_(grpc_empty_slice()) {}
+
+  Message(grpc_slice slice, bool add_ref) : slice_(add_ref ? grpc_slice_ref(slice) : slice) {}
+
+  Message &operator=(const Message &other) = delete;
+
+  Message(Message &&other) : slice_(other.slice_) { other.slice_ = grpc_empty_slice(); }
+
+  Message(const Message &other) = delete;
+
+  Message &operator=(Message &&other)
+  {
+    grpc_slice_unref(slice_);
+    slice_ = other.slice_;
+    other.slice_ = grpc_empty_slice();
+    return *this;
+  }
+
+  ~Message() { grpc_slice_unref(slice_); }
+
+  const uint8_t *mutable_data() const { return GRPC_SLICE_START_PTR(slice_); }
+
+  const uint8_t *data() const { return GRPC_SLICE_START_PTR(slice_); }
+
+  size_t size() const { return GRPC_SLICE_LENGTH(slice_); }
+
+  bool Verify() const
+  {
+    Verifier verifier(data(), size());
+    return verifier.VerifyBuffer<T>(nullptr);
+  }
+
+  T *GetMutableRoot() { return flatbuffers::GetMutableRoot<T>(mutable_data()); }
+
+  const T *GetRoot() const { return flatbuffers::GetRoot<T>(data()); }
+
+  // This is only intended for serializer use, or if you know what you're doing
+  const grpc_slice &BorrowSlice() const { return slice_; }
+
+private:
+  grpc_slice slice_;
+};
+
+class MessageBuilder;
+
+// SliceAllocator is a gRPC-specific allocator that uses the `grpc_slice`
+// refcounted slices to manage memory ownership. This makes it easy and
+// efficient to transfer buffers to gRPC.
+class SliceAllocator : public Allocator
+{
+public:
+  SliceAllocator() : slice_(grpc_empty_slice()) {}
+
+  SliceAllocator(const SliceAllocator &other) = delete;
+  SliceAllocator &operator=(const SliceAllocator &other) = delete;
+
+  SliceAllocator(SliceAllocator &&other) : slice_(grpc_empty_slice())
+  {
+    // default-construct and swap idiom
+    swap(other);
+  }
+
+  SliceAllocator &operator=(SliceAllocator &&other)
+  {
+    // move-construct and swap idiom
+    SliceAllocator temp(std::move(other));
+    swap(temp);
+    return *this;
+  }
+
+  void swap(SliceAllocator &other)
+  {
+    using std::swap;
+    swap(slice_, other.slice_);
+  }
+
+  virtual ~SliceAllocator() { grpc_slice_unref(slice_); }
+
+  virtual uint8_t *allocate(size_t size) override
+  {
+    FLATBUFFERS_ASSERT(GRPC_SLICE_IS_EMPTY(slice_));
+    slice_ = grpc_slice_malloc(size);
+    return GRPC_SLICE_START_PTR(slice_);
+  }
+
+  virtual void deallocate(uint8_t *p, size_t size) override
+  {
+    FLATBUFFERS_ASSERT(p == GRPC_SLICE_START_PTR(slice_));
+    FLATBUFFERS_ASSERT(size == GRPC_SLICE_LENGTH(slice_));
+    grpc_slice_unref(slice_);
+    slice_ = grpc_empty_slice();
+  }
+
+  virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size, size_t new_size,
+                                       size_t in_use_back, size_t in_use_front) override
+  {
+    FLATBUFFERS_ASSERT(old_p == GRPC_SLICE_START_PTR(slice_));
+    FLATBUFFERS_ASSERT(old_size == GRPC_SLICE_LENGTH(slice_));
+    FLATBUFFERS_ASSERT(new_size > old_size);
+    grpc_slice old_slice = slice_;
+    grpc_slice new_slice = grpc_slice_malloc(new_size);
+    uint8_t *new_p = GRPC_SLICE_START_PTR(new_slice);
+    memcpy_downward(old_p, old_size, new_p, new_size, in_use_back, in_use_front);
+    slice_ = new_slice;
+    grpc_slice_unref(old_slice);
+    return new_p;
+  }
+
+private:
+  grpc_slice &get_slice(uint8_t *p, size_t size)
+  {
+    FLATBUFFERS_ASSERT(p == GRPC_SLICE_START_PTR(slice_));
+    FLATBUFFERS_ASSERT(size == GRPC_SLICE_LENGTH(slice_));
+    return slice_;
+  }
+
+  grpc_slice slice_;
+
+  friend class MessageBuilder;
+};
+
+// SliceAllocatorMember is a hack to ensure that the MessageBuilder's
+// slice_allocator_ member is constructed before the FlatBufferBuilder, since
+// the allocator is used in the FlatBufferBuilder ctor.
+namespace detail
+{
+struct SliceAllocatorMember
+{
+  SliceAllocator slice_allocator_;
+};
+} // namespace detail
+
+// MessageBuilder is a gRPC-specific FlatBufferBuilder that uses SliceAllocator
+// to allocate gRPC buffers.
+class MessageBuilder : private detail::SliceAllocatorMember, public FlatBufferBuilder
+{
+public:
+  explicit MessageBuilder(uoffset_t initial_size = 1024)
+    : FlatBufferBuilder(initial_size, &slice_allocator_, false)
+  {
+  }
+
+  MessageBuilder(const MessageBuilder &other) = delete;
+  MessageBuilder &operator=(const MessageBuilder &other) = delete;
+
+  MessageBuilder(MessageBuilder &&other) : FlatBufferBuilder(1024, &slice_allocator_, false)
+  {
+    // Default construct and swap idiom.
+    Swap(other);
+  }
+
+  /// Create a MessageBuilder from a FlatBufferBuilder.
+  explicit MessageBuilder(FlatBufferBuilder &&src,
+                          void (*dealloc)(void *, size_t) = &DefaultAllocator::dealloc)
+    : FlatBufferBuilder(1024, &slice_allocator_, false)
+  {
+    src.Swap(*this);
+    src.SwapBufAllocator(*this);
+    if (buf_.capacity())
+    {
+      uint8_t *buf = buf_.scratch_data(); // pointer to memory
+      size_t capacity = buf_.capacity();  // size of memory
+      slice_allocator_.slice_ = grpc_slice_new_with_len(buf, capacity, dealloc);
+    }
+    else
+    {
+      slice_allocator_.slice_ = grpc_empty_slice();
+    }
+  }
+
+  /// Move-assign a FlatBufferBuilder to a MessageBuilder.
+  /// Only FlatBufferBuilder with default allocator (basically, nullptr) is
+  /// supported.
+  MessageBuilder &operator=(FlatBufferBuilder &&src)
+  {
+    // Move construct a temporary and swap
+    MessageBuilder temp(std::move(src));
+    Swap(temp);
+    return *this;
+  }
+
+  MessageBuilder &operator=(MessageBuilder &&other)
+  {
+    // Move construct a temporary and swap
+    MessageBuilder temp(std::move(other));
+    Swap(temp);
+    return *this;
+  }
+
+  void Swap(MessageBuilder &other)
+  {
+    slice_allocator_.swap(other.slice_allocator_);
+    FlatBufferBuilder::Swap(other);
+    // After swapping the FlatBufferBuilder, we swap back the allocator, which
+    // restores the original allocator back in place. This is necessary because
+    // MessageBuilder's allocator is its own member (SliceAllocatorMember). The
+    // allocator passed to FlatBufferBuilder::vector_downward must point to this
+    // member.
+    buf_.swap_allocator(other.buf_);
+  }
+
+  // Releases the ownership of the buffer pointer.
+  // Returns the size, offset, and the original grpc_slice that
+  // allocated the buffer. Also see grpc_slice_unref().
+  uint8_t *ReleaseRaw(size_t &size, size_t &offset, grpc_slice &slice)
+  {
+    uint8_t *buf = FlatBufferBuilder::ReleaseRaw(size, offset);
+    slice = slice_allocator_.slice_;
+    slice_allocator_.slice_ = grpc_empty_slice();
+    return buf;
+  }
+
+  ~MessageBuilder() {}
+
+  // GetMessage extracts the subslice of the buffer corresponding to the
+  // flatbuffers-encoded region and wraps it in a `Message<T>` to handle buffer
+  // ownership.
+  template <class T> Message<T> GetMessage()
+  {
+    auto buf_data = buf_.scratch_data(); // pointer to memory
+    auto buf_size = buf_.capacity();     // size of memory
+    auto msg_data = buf_.data();         // pointer to msg
+    auto msg_size = buf_.size();         // size of msg
+    // Do some sanity checks on data/size
+    FLATBUFFERS_ASSERT(msg_data);
+    FLATBUFFERS_ASSERT(msg_size);
+    FLATBUFFERS_ASSERT(msg_data >= buf_data);
+    FLATBUFFERS_ASSERT(msg_data + msg_size <= buf_data + buf_size);
+    // Calculate offsets from the buffer start
+    auto begin = msg_data - buf_data;
+    auto end = begin + msg_size;
+    // Get the slice we are working with (no refcount change)
+    grpc_slice slice = slice_allocator_.get_slice(buf_data, buf_size);
+    // Extract a subslice of the existing slice (increment refcount)
+    grpc_slice subslice = grpc_slice_sub(slice, begin, end);
+    // Wrap the subslice in a `Message<T>`, but don't increment refcount
+    Message<T> msg(subslice, false);
+    return msg;
+  }
+
+  template <class T> Message<T> ReleaseMessage()
+  {
+    Message<T> msg = GetMessage<T>();
+    Reset();
+    return msg;
+  }
+
+private:
+  // SliceAllocator slice_allocator_;  // part of SliceAllocatorMember
+};
+
+} // namespace grpc
+} // namespace flatbuffers
+
+namespace grpc
+{
+
+template <class T> class SerializationTraits<flatbuffers::grpc::Message<T>>
+{
+public:
+  static grpc::Status Serialize(const flatbuffers::grpc::Message<T> &msg, grpc_byte_buffer **buffer,
+                                bool *own_buffer)
+  {
+    // We are passed in a `Message<T>`, which is a wrapper around a
+    // `grpc_slice`. We extract it here using `BorrowSlice()`. The const cast
+    // is necessary because the `grpc_raw_byte_buffer_create` func expects
+    // non-const slices in order to increment their refcounts.
+    grpc_slice *slice = const_cast<grpc_slice *>(&msg.BorrowSlice());
+    // Now use `grpc_raw_byte_buffer_create` to package the single slice into a
+    // `grpc_byte_buffer`, incrementing the refcount in the process.
+    *buffer = grpc_raw_byte_buffer_create(slice, 1);
+    *own_buffer = true;
+    return grpc::Status::OK;
+  }
+
+  // Deserialize by pulling the
+  static grpc::Status Deserialize(ByteBuffer *buf, flatbuffers::grpc::Message<T> *msg)
+  {
+    grpc_byte_buffer *buffer = *reinterpret_cast<grpc_byte_buffer **>(buf);
+    if (!buffer)
+    {
+      return ::grpc::Status(::grpc::StatusCode::INTERNAL, "No payload");
+    }
+    // Check if this is a single uncompressed slice.
+    if ((buffer->type == GRPC_BB_RAW) && (buffer->data.raw.compression == GRPC_COMPRESS_NONE) &&
+        (buffer->data.raw.slice_buffer.count == 1))
+    {
+      // If it is, then we can reference the `grpc_slice` directly.
+      grpc_slice slice = buffer->data.raw.slice_buffer.slices[0];
+      // We wrap a `Message<T>` around the slice, incrementing the refcount.
+      *msg = flatbuffers::grpc::Message<T>(slice, true);
+    }
+    else
+    {
+      // Otherwise, we need to use `grpc_byte_buffer_reader_readall` to read
+      // `buffer` into a single contiguous `grpc_slice`. The gRPC reader gives
+      // us back a new slice with the refcount already incremented.
+      grpc_byte_buffer_reader reader;
+      grpc_byte_buffer_reader_init(&reader, buffer);
+      grpc_slice slice = grpc_byte_buffer_reader_readall(&reader);
+      grpc_byte_buffer_reader_destroy(&reader);
+      // We wrap a `Message<T>` around the slice, but don't increment refcount
+      *msg = flatbuffers::grpc::Message<T>(slice, false);
+    }
+    grpc_byte_buffer_destroy(buffer);
+#if FLATBUFFERS_GRPC_DISABLE_AUTO_VERIFICATION
+    return ::grpc::Status::OK;
+#else
+    if (msg->Verify())
+    {
+      return ::grpc::Status::OK;
+    }
+    else
+    {
+      return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Message verification failed");
+    }
+#endif
+  }
+};
+
+} // namespace grpc
+
+#endif // FLATBUFFERS_GRPC_H_
diff --git a/onert-micro/externals/flatbuffers/hash.h b/onert-micro/externals/flatbuffers/hash.h
new file mode 100644
index 000000000..a83c0ff20
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/hash.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_HASH_H_
+#define FLATBUFFERS_HASH_H_
+
+#include <cstdint>
+#include <cstring>
+
+#include "flatbuffers/flatbuffers.h"
+
+namespace flatbuffers
+{
+
+template <typename T> struct FnvTraits
+{
+  static const T kFnvPrime;
+  static const T kOffsetBasis;
+};
+
+template <> struct FnvTraits<uint32_t>
+{
+  static const uint32_t kFnvPrime = 0x01000193;
+  static const uint32_t kOffsetBasis = 0x811C9DC5;
+};
+
+template <> struct FnvTraits<uint64_t>
+{
+  static const uint64_t kFnvPrime = 0x00000100000001b3ULL;
+  static const uint64_t kOffsetBasis = 0xcbf29ce484222645ULL;
+};
+
+template <typename T> T HashFnv1(const char *input)
+{
+  T hash = FnvTraits<T>::kOffsetBasis;
+  for (const char *c = input; *c; ++c)
+  {
+    hash *= FnvTraits<T>::kFnvPrime;
+    hash ^= static_cast<unsigned char>(*c);
+  }
+  return hash;
+}
+
+template <typename T> T HashFnv1a(const char *input)
+{
+  T hash = FnvTraits<T>::kOffsetBasis;
+  for (const char *c = input; *c; ++c)
+  {
+    hash ^= static_cast<unsigned char>(*c);
+    hash *= FnvTraits<T>::kFnvPrime;
+  }
+  return hash;
+}
+
+template <> inline uint16_t HashFnv1<uint16_t>(const char *input)
+{
+  uint32_t hash = HashFnv1<uint32_t>(input);
+  return (hash >> 16) ^ (hash & 0xffff);
+}
+
+template <> inline uint16_t HashFnv1a<uint16_t>(const char *input)
+{
+  uint32_t hash = HashFnv1a<uint32_t>(input);
+  return (hash >> 16) ^ (hash & 0xffff);
+}
+
+template <typename T> struct NamedHashFunction
+{
+  const char *name;
+
+  typedef T (*HashFunction)(const char *);
+  HashFunction function;
+};
+
+const NamedHashFunction<uint16_t> kHashFunctions16[] = {
+  {"fnv1_16", HashFnv1<uint16_t>},
+  {"fnv1a_16", HashFnv1a<uint16_t>},
+};
+
+const NamedHashFunction<uint32_t> kHashFunctions32[] = {
+  {"fnv1_32", HashFnv1<uint32_t>},
+  {"fnv1a_32", HashFnv1a<uint32_t>},
+};
+
+const NamedHashFunction<uint64_t> kHashFunctions64[] = {
+  {"fnv1_64", HashFnv1<uint64_t>},
+  {"fnv1a_64", HashFnv1a<uint64_t>},
+};
+
+inline NamedHashFunction<uint16_t>::HashFunction FindHashFunction16(const char *name)
+{
+  std::size_t size = sizeof(kHashFunctions16) / sizeof(kHashFunctions16[0]);
+  for (std::size_t i = 0; i < size; ++i)
+  {
+    if (std::strcmp(name, kHashFunctions16[i].name) == 0)
+    {
+      return kHashFunctions16[i].function;
+    }
+  }
+  return nullptr;
+}
+
+inline NamedHashFunction<uint32_t>::HashFunction FindHashFunction32(const char *name)
+{
+  std::size_t size = sizeof(kHashFunctions32) / sizeof(kHashFunctions32[0]);
+  for (std::size_t i = 0; i < size; ++i)
+  {
+    if (std::strcmp(name, kHashFunctions32[i].name) == 0)
+    {
+      return kHashFunctions32[i].function;
+    }
+  }
+  return nullptr;
+}
+
+inline NamedHashFunction<uint64_t>::HashFunction FindHashFunction64(const char *name)
+{
+  std::size_t size = sizeof(kHashFunctions64) / sizeof(kHashFunctions64[0]);
+  for (std::size_t i = 0; i < size; ++i)
+  {
+    if (std::strcmp(name, kHashFunctions64[i].name) == 0)
+    {
+      return kHashFunctions64[i].function;
+    }
+  }
+  return nullptr;
+}
+
+} // namespace flatbuffers
+
+#endif // FLATBUFFERS_HASH_H_
diff --git a/onert-micro/externals/flatbuffers/idl.h b/onert-micro/externals/flatbuffers/idl.h
new file mode 100644
index 000000000..de0a22ab1
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/idl.h
@@ -0,0 +1,1145 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2014 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_IDL_H_
+#define FLATBUFFERS_IDL_H_
+
+#include <map>
+#include <memory>
+#include <stack>
+
+#include "flatbuffers/base.h"
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/flexbuffers.h"
+#include "flatbuffers/hash.h"
+#include "flatbuffers/reflection.h"
+
+#if !defined(FLATBUFFERS_CPP98_STL)
+#include <functional>
+#endif // !defined(FLATBUFFERS_CPP98_STL)
+
+// This file defines the data types representing a parsed IDL (Interface
+// Definition Language) / schema file.
+
+// Limits maximum depth of nested objects.
+// Prevents stack overflow while parse scheme, or json, or flexbuffer.
+#if !defined(FLATBUFFERS_MAX_PARSING_DEPTH)
+#define FLATBUFFERS_MAX_PARSING_DEPTH 64
+#endif
+
+namespace flatbuffers
+{
+
+// The order of these matters for Is*() functions below.
+// Additionally, Parser::ParseType assumes bool..string is a contiguous range
+// of type tokens.
+// clang-format off
+#define FLATBUFFERS_GEN_TYPES_SCALAR(TD) \
+  TD(NONE,   "",       uint8_t,  byte,   byte,    byte,   uint8,   u8,   UByte, UInt8) \
+  TD(UTYPE,  "",       uint8_t,  byte,   byte,    byte,   uint8,   u8,   UByte, UInt8) /* begin scalar/int */ \
+  TD(BOOL,   "bool",   uint8_t,  boolean,bool,    bool,   bool,    bool, Boolean, Bool) \
+  TD(CHAR,   "byte",   int8_t,   byte,   int8,    sbyte,  int8,    i8,   Byte, Int8) \
+  TD(UCHAR,  "ubyte",  uint8_t,  byte,   byte,    byte,   uint8,   u8,   UByte, UInt8) \
+  TD(SHORT,  "short",  int16_t,  short,  int16,   short,  int16,   i16,  Short, Int16) \
+  TD(USHORT, "ushort", uint16_t, short,  uint16,  ushort, uint16,  u16,  UShort, UInt16) \
+  TD(INT,    "int",    int32_t,  int,    int32,   int,    int32,   i32,  Int, Int32) \
+  TD(UINT,   "uint",   uint32_t, int,    uint32,  uint,   uint32,  u32,  UInt, UInt32) \
+  TD(LONG,   "long",   int64_t,  long,   int64,   long,   int64,   i64,  Long, Int64) \
+  TD(ULONG,  "ulong",  uint64_t, long,   uint64,  ulong,  uint64,  u64,  ULong, UInt64) /* end int */ \
+  TD(FLOAT,  "float",  float,    float,  float32, float,  float32, f32,  Float, Float32) /* begin float */ \
+  TD(DOUBLE, "double", double,   double, float64, double, float64, f64,  Double, Double) /* end float/scalar */
+#define FLATBUFFERS_GEN_TYPES_POINTER(TD) \
+  TD(STRING, "string", Offset<void>, int, int, StringOffset, int, unused, Int, Offset<String>) \
+  TD(VECTOR, "",       Offset<void>, int, int, VectorOffset, int, unused, Int, Offset<UOffset>) \
+  TD(STRUCT, "",       Offset<void>, int, int, int,          int, unused, Int, Offset<UOffset>) \
+  TD(UNION,  "",       Offset<void>, int, int, int,          int, unused, Int, Offset<UOffset>)
+#define FLATBUFFERS_GEN_TYPE_ARRAY(TD) \
+  TD(ARRAY,  "",       int,          int, int, int,          int, unused, Int, Offset<UOffset>)
+// The fields are:
+// - enum
+// - FlatBuffers schema type.
+// - C++ type.
+// - Java type.
+// - Go type.
+// - C# / .Net type.
+// - Python type.
+// - Rust type.
+// - Kotlin type.
+
+// using these macros, we can now write code dealing with types just once, e.g.
+
+/*
+switch (type) {
+  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, \
+                         RTYPE, KTYPE) \
+    case BASE_TYPE_ ## ENUM: \
+      // do something specific to CTYPE here
+    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
+  #undef FLATBUFFERS_TD
+}
+*/
+
+// If not all FLATBUFFERS_GEN_() arguments are necessary for implementation
+// of FLATBUFFERS_TD, you can use a variadic macro (with __VA_ARGS__ if needed).
+// In the above example, only CTYPE is used to generate the code, it can be rewritten:
+
+/*
+switch (type) {
+  #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
+    case BASE_TYPE_ ## ENUM: \
+      // do something specific to CTYPE here
+    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
+  #undef FLATBUFFERS_TD
+}
+*/
+
+#define FLATBUFFERS_GEN_TYPES(TD) \
+        FLATBUFFERS_GEN_TYPES_SCALAR(TD) \
+        FLATBUFFERS_GEN_TYPES_POINTER(TD) \
+        FLATBUFFERS_GEN_TYPE_ARRAY(TD)
+
+// Create an enum for all the types above.
+#ifdef __GNUC__
+__extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
+#endif
+enum BaseType {
+  #define FLATBUFFERS_TD(ENUM, ...) \
+    BASE_TYPE_ ## ENUM,
+    FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
+  #undef FLATBUFFERS_TD
+};
+
+#define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
+  static_assert(sizeof(CTYPE) <= sizeof(largest_scalar_t), \
+                "define largest_scalar_t as " #CTYPE);
+  FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
+#undef FLATBUFFERS_TD
+
+inline bool IsScalar (BaseType t) { return t >= BASE_TYPE_UTYPE &&
+                                           t <= BASE_TYPE_DOUBLE; }
+inline bool IsInteger(BaseType t) { return t >= BASE_TYPE_UTYPE &&
+                                           t <= BASE_TYPE_ULONG; }
+inline bool IsFloat  (BaseType t) { return t == BASE_TYPE_FLOAT ||
+                                           t == BASE_TYPE_DOUBLE; }
+inline bool IsLong   (BaseType t) { return t == BASE_TYPE_LONG ||
+                                           t == BASE_TYPE_ULONG; }
+inline bool IsBool   (BaseType t) { return t == BASE_TYPE_BOOL; }
+inline bool IsOneByte(BaseType t) { return t >= BASE_TYPE_UTYPE &&
+                                           t <= BASE_TYPE_UCHAR; }
+
+inline bool IsUnsigned(BaseType t) {
+  return (t == BASE_TYPE_UTYPE)  || (t == BASE_TYPE_UCHAR) ||
+         (t == BASE_TYPE_USHORT) || (t == BASE_TYPE_UINT)  ||
+         (t == BASE_TYPE_ULONG);
+}
+
+// clang-format on
+
+extern const char *const kTypeNames[];
+extern const char kTypeSizes[];
+
+inline size_t SizeOf(BaseType t) { return kTypeSizes[t]; }
+
+struct StructDef;
+struct EnumDef;
+class Parser;
+
+// Represents any type in the IDL, which is a combination of the BaseType
+// and additional information for vectors/structs_.
+struct Type
+{
+  explicit Type(BaseType _base_type = BASE_TYPE_NONE, StructDef *_sd = nullptr,
+                EnumDef *_ed = nullptr, uint16_t _fixed_length = 0)
+    : base_type(_base_type), element(BASE_TYPE_NONE), struct_def(_sd), enum_def(_ed),
+      fixed_length(_fixed_length)
+  {
+  }
+
+  bool operator==(const Type &o)
+  {
+    return base_type == o.base_type && element == o.element && struct_def == o.struct_def &&
+           enum_def == o.enum_def;
+  }
+
+  Type VectorType() const { return Type(element, struct_def, enum_def, fixed_length); }
+
+  Offset<reflection::Type> Serialize(FlatBufferBuilder *builder) const;
+
+  bool Deserialize(const Parser &parser, const reflection::Type *type);
+
+  BaseType base_type;
+  BaseType element;      // only set if t == BASE_TYPE_VECTOR
+  StructDef *struct_def; // only set if t or element == BASE_TYPE_STRUCT
+  EnumDef *enum_def;     // set if t == BASE_TYPE_UNION / BASE_TYPE_UTYPE,
+                         // or for an integral type derived from an enum.
+  uint16_t fixed_length; // only set if t == BASE_TYPE_ARRAY
+};
+
+// Represents a parsed scalar value, it's type, and field offset.
+struct Value
+{
+  Value() : constant("0"), offset(static_cast<voffset_t>(~(static_cast<voffset_t>(0U)))) {}
+  Type type;
+  std::string constant;
+  voffset_t offset;
+};
+
+// Helper class that retains the original order of a set of identifiers and
+// also provides quick lookup.
+template <typename T> class SymbolTable
+{
+public:
+  ~SymbolTable()
+  {
+    for (auto it = vec.begin(); it != vec.end(); ++it)
+    {
+      delete *it;
+    }
+  }
+
+  bool Add(const std::string &name, T *e)
+  {
+    vector_emplace_back(&vec, e);
+    auto it = dict.find(name);
+    if (it != dict.end())
+      return true;
+    dict[name] = e;
+    return false;
+  }
+
+  void Move(const std::string &oldname, const std::string &newname)
+  {
+    auto it = dict.find(oldname);
+    if (it != dict.end())
+    {
+      auto obj = it->second;
+      dict.erase(it);
+      dict[newname] = obj;
+    }
+    else
+    {
+      FLATBUFFERS_ASSERT(false);
+    }
+  }
+
+  T *Lookup(const std::string &name) const
+  {
+    auto it = dict.find(name);
+    return it == dict.end() ? nullptr : it->second;
+  }
+
+public:
+  std::map<std::string, T *> dict; // quick lookup
+  std::vector<T *> vec;            // Used to iterate in order of insertion
+};
+
+// A name space, as set in the schema.
+struct Namespace
+{
+  Namespace() : from_table(0) {}
+
+  // Given a (potentially unqualified) name, return the "fully qualified" name
+  // which has a full namespaced descriptor.
+  // With max_components you can request less than the number of components
+  // the current namespace has.
+  std::string GetFullyQualifiedName(const std::string &name, size_t max_components = 1000) const;
+
+  std::vector<std::string> components;
+  size_t from_table; // Part of the namespace corresponds to a message/table.
+};
+
+inline bool operator<(const Namespace &a, const Namespace &b)
+{
+  size_t min_size = std::min(a.components.size(), b.components.size());
+  for (size_t i = 0; i < min_size; ++i)
+  {
+    if (a.components[i] != b.components[i])
+      return a.components[i] < b.components[i];
+  }
+  return a.components.size() < b.components.size();
+}
+
+// Base class for all definition types (fields, structs_, enums_).
+struct Definition
+{
+  Definition()
+    : generated(false), defined_namespace(nullptr), serialized_location(0), index(-1), refcount(1)
+  {
+  }
+
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>>
+  SerializeAttributes(FlatBufferBuilder *builder, const Parser &parser) const;
+
+  bool DeserializeAttributes(Parser &parser, const Vector<Offset<reflection::KeyValue>> *attrs);
+
+  std::string name;
+  std::string file;
+  std::vector<std::string> doc_comment;
+  SymbolTable<Value> attributes;
+  bool generated;               // did we already output code for this definition?
+  Namespace *defined_namespace; // Where it was defined.
+
+  // For use with Serialize()
+  uoffset_t serialized_location;
+  int index; // Inside the vector it is stored.
+  int refcount;
+};
+
+struct FieldDef : public Definition
+{
+  FieldDef()
+    : deprecated(false), key(false), shared(false), native_inline(false), flexbuffer(false),
+      presence(kDefault), nested_flatbuffer(NULL), padding(0)
+  {
+  }
+
+  Offset<reflection::Field> Serialize(FlatBufferBuilder *builder, uint16_t id,
+                                      const Parser &parser) const;
+
+  bool Deserialize(Parser &parser, const reflection::Field *field);
+
+  bool IsScalarOptional() const { return IsScalar(value.type.base_type) && IsOptional(); }
+  bool IsOptional() const { return presence == kOptional; }
+  bool IsRequired() const { return presence == kRequired; }
+  bool IsDefault() const { return presence == kDefault; }
+
+  Value value;
+  bool deprecated;    // Field is allowed to be present in old data, but can't be.
+                      // written in new data nor accessed in new code.
+  bool key;           // Field functions as a key for creating sorted vectors.
+  bool shared;        // Field will be using string pooling (i.e. CreateSharedString)
+                      // as default serialization behavior if field is a string.
+  bool native_inline; // Field will be defined inline (instead of as a pointer)
+                      // for native tables if field is a struct.
+  bool flexbuffer;    // This field contains FlexBuffer data.
+
+  enum Presence
+  {
+    // Field must always be present.
+    kRequired,
+    // Non-presence should be signalled to and controlled by users.
+    kOptional,
+    // Non-presence is hidden from users.
+    // Implementations may omit writing default values.
+    kDefault,
+  };
+  Presence static MakeFieldPresence(bool optional, bool required)
+  {
+    FLATBUFFERS_ASSERT(!(required && optional));
+    // clang-format off
+    return required ? FieldDef::kRequired
+         : optional ? FieldDef::kOptional
+                    : FieldDef::kDefault;
+    // clang-format on
+  }
+  Presence presence;
+
+  StructDef *nested_flatbuffer; // This field contains nested FlatBuffer data.
+  size_t padding;               // Bytes to always pad after this field.
+};
+
+struct StructDef : public Definition
+{
+  StructDef()
+    : fixed(false), predecl(true), sortbysize(true), has_key(false), minalign(1), bytesize(0)
+  {
+  }
+
+  void PadLastField(size_t min_align)
+  {
+    auto padding = PaddingBytes(bytesize, min_align);
+    bytesize += padding;
+    if (fields.vec.size())
+      fields.vec.back()->padding = padding;
+  }
+
+  Offset<reflection::Object> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;
+
+  bool Deserialize(Parser &parser, const reflection::Object *object);
+
+  SymbolTable<FieldDef> fields;
+
+  bool fixed;      // If it's struct, not a table.
+  bool predecl;    // If it's used before it was defined.
+  bool sortbysize; // Whether fields come in the declaration or size order.
+  bool has_key;    // It has a key field.
+  size_t minalign; // What the whole object needs to be aligned to.
+  size_t bytesize; // Size if fixed.
+
+  flatbuffers::unique_ptr<std::string> original_location;
+};
+
+struct EnumDef;
+struct EnumValBuilder;
+
+struct EnumVal
+{
+  Offset<reflection::EnumVal> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;
+
+  bool Deserialize(const Parser &parser, const reflection::EnumVal *val);
+
+  uint64_t GetAsUInt64() const { return static_cast<uint64_t>(value); }
+  int64_t GetAsInt64() const { return value; }
+  bool IsZero() const { return 0 == value; }
+  bool IsNonZero() const { return !IsZero(); }
+
+  std::string name;
+  std::vector<std::string> doc_comment;
+  Type union_type;
+
+private:
+  friend EnumDef;
+  friend EnumValBuilder;
+  friend bool operator==(const EnumVal &lhs, const EnumVal &rhs);
+
+  EnumVal(const std::string &_name, int64_t _val) : name(_name), value(_val) {}
+  EnumVal() : value(0) {}
+
+  int64_t value;
+};
+
+struct EnumDef : public Definition
+{
+  EnumDef() : is_union(false), uses_multiple_type_instances(false) {}
+
+  Offset<reflection::Enum> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;
+
+  bool Deserialize(Parser &parser, const reflection::Enum *values);
+
+  template <typename T> void ChangeEnumValue(EnumVal *ev, T new_val);
+  void SortByValue();
+  void RemoveDuplicates();
+
+  std::string AllFlags() const;
+  const EnumVal *MinValue() const;
+  const EnumVal *MaxValue() const;
+  // Returns the number of integer steps from v1 to v2.
+  uint64_t Distance(const EnumVal *v1, const EnumVal *v2) const;
+  // Returns the number of integer steps from Min to Max.
+  uint64_t Distance() const { return Distance(MinValue(), MaxValue()); }
+
+  EnumVal *ReverseLookup(int64_t enum_idx, bool skip_union_default = false) const;
+  EnumVal *FindByValue(const std::string &constant) const;
+
+  std::string ToString(const EnumVal &ev) const
+  {
+    return IsUInt64() ? NumToString(ev.GetAsUInt64()) : NumToString(ev.GetAsInt64());
+  }
+
+  size_t size() const { return vals.vec.size(); }
+
+  const std::vector<EnumVal *> &Vals() const { return vals.vec; }
+
+  const EnumVal *Lookup(const std::string &enum_name) const { return vals.Lookup(enum_name); }
+
+  bool is_union;
+  // Type is a union which uses type aliases where at least one type is
+  // available under two different names.
+  bool uses_multiple_type_instances;
+  Type underlying_type;
+
+private:
+  bool IsUInt64() const { return (BASE_TYPE_ULONG == underlying_type.base_type); }
+
+  friend EnumValBuilder;
+  SymbolTable<EnumVal> vals;
+};
+
+inline bool IsString(const Type &type) { return type.base_type == BASE_TYPE_STRING; }
+
+inline bool IsStruct(const Type &type)
+{
+  return type.base_type == BASE_TYPE_STRUCT && type.struct_def->fixed;
+}
+
+inline bool IsUnion(const Type &type)
+{
+  return type.enum_def != nullptr && type.enum_def->is_union;
+}
+
+inline bool IsVector(const Type &type) { return type.base_type == BASE_TYPE_VECTOR; }
+
+inline bool IsArray(const Type &type) { return type.base_type == BASE_TYPE_ARRAY; }
+
+inline bool IsSeries(const Type &type) { return IsVector(type) || IsArray(type); }
+
+inline bool IsEnum(const Type &type)
+{
+  return type.enum_def != nullptr && IsInteger(type.base_type);
+}
+
+inline size_t InlineSize(const Type &type)
+{
+  return IsStruct(type) ? type.struct_def->bytesize
+                        : (IsArray(type) ? InlineSize(type.VectorType()) * type.fixed_length
+                                         : SizeOf(type.base_type));
+}
+
+inline size_t InlineAlignment(const Type &type)
+{
+  if (IsStruct(type))
+  {
+    return type.struct_def->minalign;
+  }
+  else if (IsArray(type))
+  {
+    return IsStruct(type.VectorType()) ? type.struct_def->minalign : SizeOf(type.element);
+  }
+  else
+  {
+    return SizeOf(type.base_type);
+  }
+}
+inline bool operator==(const EnumVal &lhs, const EnumVal &rhs) { return lhs.value == rhs.value; }
+inline bool operator!=(const EnumVal &lhs, const EnumVal &rhs) { return !(lhs == rhs); }
+
+inline bool EqualByName(const Type &a, const Type &b)
+{
+  return a.base_type == b.base_type && a.element == b.element &&
+         (a.struct_def == b.struct_def || a.struct_def->name == b.struct_def->name) &&
+         (a.enum_def == b.enum_def || a.enum_def->name == b.enum_def->name);
+}
+
+struct RPCCall : public Definition
+{
+  Offset<reflection::RPCCall> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;
+
+  bool Deserialize(Parser &parser, const reflection::RPCCall *call);
+
+  StructDef *request, *response;
+};
+
+struct ServiceDef : public Definition
+{
+  Offset<reflection::Service> Serialize(FlatBufferBuilder *builder, const Parser &parser) const;
+  bool Deserialize(Parser &parser, const reflection::Service *service);
+
+  SymbolTable<RPCCall> calls;
+};
+
+// Container of options that may apply to any of the source/text generators.
+struct IDLOptions
+{
+  bool gen_jvmstatic;
+  // Use flexbuffers instead for binary and text generation
+  bool use_flexbuffers;
+  bool strict_json;
+  bool output_default_scalars_in_json;
+  int indent_step;
+  bool output_enum_identifiers;
+  bool prefixed_enums;
+  bool scoped_enums;
+  bool include_dependence_headers;
+  bool mutable_buffer;
+  bool one_file;
+  bool proto_mode;
+  bool proto_oneof_union;
+  bool generate_all;
+  bool skip_unexpected_fields_in_json;
+  bool generate_name_strings;
+  bool generate_object_based_api;
+  bool gen_compare;
+  std::string cpp_object_api_pointer_type;
+  std::string cpp_object_api_string_type;
+  bool cpp_object_api_string_flexible_constructor;
+  bool cpp_direct_copy;
+  bool gen_nullable;
+  bool java_checkerframework;
+  bool gen_generated;
+  std::string object_prefix;
+  std::string object_suffix;
+  bool union_value_namespacing;
+  bool allow_non_utf8;
+  bool natural_utf8;
+  std::string include_prefix;
+  bool keep_include_path;
+  bool binary_schema_comments;
+  bool binary_schema_builtins;
+  bool binary_schema_gen_embed;
+  std::string go_import;
+  std::string go_namespace;
+  bool protobuf_ascii_alike;
+  bool size_prefixed;
+  std::string root_type;
+  bool force_defaults;
+  bool java_primitive_has_method;
+  bool cs_gen_json_serializer;
+  std::vector<std::string> cpp_includes;
+  std::string cpp_std;
+  bool cpp_static_reflection;
+  std::string proto_namespace_suffix;
+  std::string filename_suffix;
+  std::string filename_extension;
+  bool no_warnings;
+
+  // Possible options for the more general generator below.
+  enum Language
+  {
+    kJava = 1 << 0,
+    kCSharp = 1 << 1,
+    kGo = 1 << 2,
+    kCpp = 1 << 3,
+    kPython = 1 << 5,
+    kPhp = 1 << 6,
+    kJson = 1 << 7,
+    kBinary = 1 << 8,
+    kTs = 1 << 9,
+    kJsonSchema = 1 << 10,
+    kDart = 1 << 11,
+    kLua = 1 << 12,
+    kLobster = 1 << 13,
+    kRust = 1 << 14,
+    kKotlin = 1 << 15,
+    kSwift = 1 << 16,
+    kMAX
+  };
+
+  Language lang;
+
+  enum MiniReflect
+  {
+    kNone,
+    kTypes,
+    kTypesAndNames
+  };
+
+  MiniReflect mini_reflect;
+
+  // If set, require all fields in a table to be explicitly numbered.
+  bool require_explicit_ids;
+
+  // The corresponding language bit will be set if a language is included
+  // for code generation.
+  unsigned long lang_to_generate;
+
+  // If set (default behavior), empty string fields will be set to nullptr to
+  // make the flatbuffer more compact.
+  bool set_empty_strings_to_null;
+
+  // If set (default behavior), empty vector fields will be set to nullptr to
+  // make the flatbuffer more compact.
+  bool set_empty_vectors_to_null;
+
+  IDLOptions()
+    : gen_jvmstatic(false), use_flexbuffers(false), strict_json(false),
+      output_default_scalars_in_json(false), indent_step(2), output_enum_identifiers(true),
+      prefixed_enums(true), scoped_enums(false), include_dependence_headers(true),
+      mutable_buffer(false), one_file(false), proto_mode(false), proto_oneof_union(false),
+      generate_all(false), skip_unexpected_fields_in_json(false), generate_name_strings(false),
+      generate_object_based_api(false), gen_compare(false),
+      cpp_object_api_pointer_type("std::unique_ptr"),
+      cpp_object_api_string_flexible_constructor(false), cpp_direct_copy(true), gen_nullable(false),
+      java_checkerframework(false), gen_generated(false), object_suffix("T"),
+      union_value_namespacing(true), allow_non_utf8(false), natural_utf8(false),
+      keep_include_path(false), binary_schema_comments(false), binary_schema_builtins(false),
+      binary_schema_gen_embed(false), protobuf_ascii_alike(false), size_prefixed(false),
+      force_defaults(false), java_primitive_has_method(false), cs_gen_json_serializer(false),
+      cpp_static_reflection(false), filename_suffix("_generated"), filename_extension(),
+      no_warnings(false), lang(IDLOptions::kJava), mini_reflect(IDLOptions::kNone),
+      require_explicit_ids(false), lang_to_generate(0), set_empty_strings_to_null(true),
+      set_empty_vectors_to_null(true)
+  {
+  }
+};
+
+// This encapsulates where the parser is in the current source file.
+struct ParserState
+{
+  ParserState()
+    : cursor_(nullptr), line_start_(nullptr), line_(0), token_(-1),
+      attr_is_trivial_ascii_string_(true)
+  {
+  }
+
+protected:
+  void ResetState(const char *source)
+  {
+    cursor_ = source;
+    line_ = 0;
+    MarkNewLine();
+  }
+
+  void MarkNewLine()
+  {
+    line_start_ = cursor_;
+    line_ += 1;
+  }
+
+  int64_t CursorPosition() const
+  {
+    FLATBUFFERS_ASSERT(cursor_ && line_start_ && cursor_ >= line_start_);
+    return static_cast<int64_t>(cursor_ - line_start_);
+  }
+
+  const char *cursor_;
+  const char *line_start_;
+  int line_; // the current line being parsed
+  int token_;
+
+  // Flag: text in attribute_ is true ASCII string without escape
+  // sequences. Only printable ASCII (without [\t\r\n]).
+  // Used for number-in-string (and base64 string in future).
+  bool attr_is_trivial_ascii_string_;
+  std::string attribute_;
+  std::vector<std::string> doc_comment_;
+};
+
+// A way to make error propagation less error prone by requiring values to be
+// checked.
+// Once you create a value of this type you must either:
+// - Call Check() on it.
+// - Copy or assign it to another value.
+// Failure to do so leads to an assert.
+// This guarantees that this as return value cannot be ignored.
+class CheckedError
+{
+public:
+  explicit CheckedError(bool error) : is_error_(error), has_been_checked_(false) {}
+
+  CheckedError &operator=(const CheckedError &other)
+  {
+    is_error_ = other.is_error_;
+    has_been_checked_ = false;
+    other.has_been_checked_ = true;
+    return *this;
+  }
+
+  CheckedError(const CheckedError &other)
+  {
+    *this = other; // Use assignment operator.
+  }
+
+  ~CheckedError() { FLATBUFFERS_ASSERT(has_been_checked_); }
+
+  bool Check()
+  {
+    has_been_checked_ = true;
+    return is_error_;
+  }
+
+private:
+  bool is_error_;
+  mutable bool has_been_checked_;
+};
+
+// Additionally, in GCC we can get these errors statically, for additional
+// assurance:
+// clang-format off
+#ifdef __GNUC__
+#define FLATBUFFERS_CHECKED_ERROR CheckedError \
+          __attribute__((warn_unused_result))
+#else
+#define FLATBUFFERS_CHECKED_ERROR CheckedError
+#endif
+// clang-format on
+
+class Parser : public ParserState
+{
+public:
+  explicit Parser(const IDLOptions &options = IDLOptions())
+    : current_namespace_(nullptr), empty_namespace_(nullptr),
+      flex_builder_(256, flexbuffers::BUILDER_FLAG_SHARE_ALL), root_struct_def_(nullptr),
+      opts(options), uses_flexbuffers_(false), advanced_features_(0), source_(nullptr),
+      anonymous_counter_(0), parse_depth_counter_(0)
+  {
+    if (opts.force_defaults)
+    {
+      builder_.ForceDefaults(true);
+    }
+    // Start out with the empty namespace being current.
+    empty_namespace_ = new Namespace();
+    namespaces_.push_back(empty_namespace_);
+    current_namespace_ = empty_namespace_;
+    known_attributes_["deprecated"] = true;
+    known_attributes_["required"] = true;
+    known_attributes_["key"] = true;
+    known_attributes_["shared"] = true;
+    known_attributes_["hash"] = true;
+    known_attributes_["id"] = true;
+    known_attributes_["force_align"] = true;
+    known_attributes_["bit_flags"] = true;
+    known_attributes_["original_order"] = true;
+    known_attributes_["nested_flatbuffer"] = true;
+    known_attributes_["csharp_partial"] = true;
+    known_attributes_["streaming"] = true;
+    known_attributes_["idempotent"] = true;
+    known_attributes_["cpp_type"] = true;
+    known_attributes_["cpp_ptr_type"] = true;
+    known_attributes_["cpp_ptr_type_get"] = true;
+    known_attributes_["cpp_str_type"] = true;
+    known_attributes_["cpp_str_flex_ctor"] = true;
+    known_attributes_["native_inline"] = true;
+    known_attributes_["native_custom_alloc"] = true;
+    known_attributes_["native_type"] = true;
+    known_attributes_["native_type_pack_name"] = true;
+    known_attributes_["native_default"] = true;
+    known_attributes_["flexbuffer"] = true;
+    known_attributes_["private"] = true;
+  }
+
+  ~Parser()
+  {
+    for (auto it = namespaces_.begin(); it != namespaces_.end(); ++it)
+    {
+      delete *it;
+    }
+  }
+
+  // Parse the string containing either schema or JSON data, which will
+  // populate the SymbolTable's or the FlatBufferBuilder above.
+  // include_paths is used to resolve any include statements, and typically
+  // should at least include the project path (where you loaded source_ from).
+  // include_paths must be nullptr terminated if specified.
+  // If include_paths is nullptr, it will attempt to load from the current
+  // directory.
+  // If the source was loaded from a file and isn't an include file,
+  // supply its name in source_filename.
+  // All paths specified in this call must be in posix format, if you accept
+  // paths from user input, please call PosixPath on them first.
+  bool Parse(const char *_source, const char **include_paths = nullptr,
+             const char *source_filename = nullptr);
+
+  bool ParseJson(const char *json, const char *json_filename = nullptr);
+
+  // Set the root type. May override the one set in the schema.
+  bool SetRootType(const char *name);
+
+  // Mark all definitions as already having code generated.
+  void MarkGenerated();
+
+  // Get the files recursively included by the given file. The returned
+  // container will have at least the given file.
+  std::set<std::string> GetIncludedFilesRecursive(const std::string &file_name) const;
+
+  // Fills builder_ with a binary version of the schema parsed.
+  // See reflection/reflection.fbs
+  void Serialize();
+
+  // Deserialize a schema buffer
+  bool Deserialize(const uint8_t *buf, const size_t size);
+
+  // Fills internal structure as if the schema passed had been loaded by parsing
+  // with Parse except that included filenames will not be populated.
+  bool Deserialize(const reflection::Schema *schema);
+
+  Type *DeserializeType(const reflection::Type *type);
+
+  // Checks that the schema represented by this parser is a safe evolution
+  // of the schema provided. Returns non-empty error on any problems.
+  std::string ConformTo(const Parser &base);
+
+  // Similar to Parse(), but now only accepts JSON to be parsed into a
+  // FlexBuffer.
+  bool ParseFlexBuffer(const char *source, const char *source_filename,
+                       flexbuffers::Builder *builder);
+
+  StructDef *LookupStruct(const std::string &id) const;
+  StructDef *LookupStructThruParentNamespaces(const std::string &id) const;
+
+  std::string UnqualifiedName(const std::string &fullQualifiedName);
+
+  FLATBUFFERS_CHECKED_ERROR Error(const std::string &msg);
+
+  // @brief Verify that any of 'opts.lang_to_generate' supports Optional scalars
+  // in a schema.
+  // @param opts Options used to parce a schema and generate code.
+  static bool SupportsOptionalScalars(const flatbuffers::IDLOptions &opts);
+
+private:
+  class ParseDepthGuard;
+
+  void Message(const std::string &msg);
+  void Warning(const std::string &msg);
+  FLATBUFFERS_CHECKED_ERROR ParseHexNum(int nibbles, uint64_t *val);
+  FLATBUFFERS_CHECKED_ERROR Next();
+  FLATBUFFERS_CHECKED_ERROR SkipByteOrderMark();
+  bool Is(int t) const;
+  bool IsIdent(const char *id) const;
+  FLATBUFFERS_CHECKED_ERROR Expect(int t);
+  std::string TokenToStringId(int t) const;
+  EnumDef *LookupEnum(const std::string &id);
+  FLATBUFFERS_CHECKED_ERROR ParseNamespacing(std::string *id, std::string *last);
+  FLATBUFFERS_CHECKED_ERROR ParseTypeIdent(Type &type);
+  FLATBUFFERS_CHECKED_ERROR ParseType(Type &type);
+  FLATBUFFERS_CHECKED_ERROR AddField(StructDef &struct_def, const std::string &name,
+                                     const Type &type, FieldDef **dest);
+  FLATBUFFERS_CHECKED_ERROR ParseField(StructDef &struct_def);
+  FLATBUFFERS_CHECKED_ERROR ParseString(Value &val, bool use_string_pooling);
+  FLATBUFFERS_CHECKED_ERROR ParseComma();
+  FLATBUFFERS_CHECKED_ERROR ParseAnyValue(Value &val, FieldDef *field, size_t parent_fieldn,
+                                          const StructDef *parent_struct_def, uoffset_t count,
+                                          bool inside_vector = false);
+  template <typename F>
+  FLATBUFFERS_CHECKED_ERROR ParseTableDelimiters(size_t &fieldn, const StructDef *struct_def,
+                                                 F body);
+  FLATBUFFERS_CHECKED_ERROR ParseTable(const StructDef &struct_def, std::string *value,
+                                       uoffset_t *ovalue);
+  void SerializeStruct(const StructDef &struct_def, const Value &val);
+  void SerializeStruct(FlatBufferBuilder &builder, const StructDef &struct_def, const Value &val);
+  template <typename F> FLATBUFFERS_CHECKED_ERROR ParseVectorDelimiters(uoffset_t &count, F body);
+  FLATBUFFERS_CHECKED_ERROR ParseVector(const Type &type, uoffset_t *ovalue, FieldDef *field,
+                                        size_t fieldn);
+  FLATBUFFERS_CHECKED_ERROR ParseArray(Value &array);
+  FLATBUFFERS_CHECKED_ERROR ParseNestedFlatbuffer(Value &val, FieldDef *field, size_t fieldn,
+                                                  const StructDef *parent_struct_def);
+  FLATBUFFERS_CHECKED_ERROR ParseMetaData(SymbolTable<Value> *attributes);
+  FLATBUFFERS_CHECKED_ERROR TryTypedValue(const std::string *name, int dtoken, bool check, Value &e,
+                                          BaseType req, bool *destmatch);
+  FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef *field);
+  FLATBUFFERS_CHECKED_ERROR TokenError();
+  FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now);
+  FLATBUFFERS_CHECKED_ERROR ParseFunction(const std::string *name, Value &e);
+  FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(const Type &type, std::string *result);
+  StructDef *LookupCreateStruct(const std::string &name, bool create_if_new = true,
+                                bool definition = false);
+  FLATBUFFERS_CHECKED_ERROR ParseEnum(bool is_union, EnumDef **dest);
+  FLATBUFFERS_CHECKED_ERROR ParseNamespace();
+  FLATBUFFERS_CHECKED_ERROR StartStruct(const std::string &name, StructDef **dest);
+  FLATBUFFERS_CHECKED_ERROR StartEnum(const std::string &name, bool is_union, EnumDef **dest);
+  FLATBUFFERS_CHECKED_ERROR ParseDecl();
+  FLATBUFFERS_CHECKED_ERROR ParseService();
+  FLATBUFFERS_CHECKED_ERROR ParseProtoFields(StructDef *struct_def, bool isextend,
+                                             bool inside_oneof);
+  FLATBUFFERS_CHECKED_ERROR ParseProtoOption();
+  FLATBUFFERS_CHECKED_ERROR ParseProtoKey();
+  FLATBUFFERS_CHECKED_ERROR ParseProtoDecl();
+  FLATBUFFERS_CHECKED_ERROR ParseProtoCurliesOrIdent();
+  FLATBUFFERS_CHECKED_ERROR ParseTypeFromProtoType(Type *type);
+  FLATBUFFERS_CHECKED_ERROR SkipAnyJsonValue();
+  FLATBUFFERS_CHECKED_ERROR ParseFlexBufferNumericConstant(flexbuffers::Builder *builder);
+  FLATBUFFERS_CHECKED_ERROR ParseFlexBufferValue(flexbuffers::Builder *builder);
+  FLATBUFFERS_CHECKED_ERROR StartParseFile(const char *source, const char *source_filename);
+  FLATBUFFERS_CHECKED_ERROR ParseRoot(const char *_source, const char **include_paths,
+                                      const char *source_filename);
+  FLATBUFFERS_CHECKED_ERROR DoParse(const char *_source, const char **include_paths,
+                                    const char *source_filename, const char *include_filename);
+  FLATBUFFERS_CHECKED_ERROR DoParseJson();
+  FLATBUFFERS_CHECKED_ERROR CheckClash(std::vector<FieldDef *> &fields, StructDef *struct_def,
+                                       const char *suffix, BaseType baseType);
+  FLATBUFFERS_CHECKED_ERROR ParseAlignAttribute(const std::string &align_constant, size_t min_align,
+                                                size_t *align);
+
+  bool SupportsAdvancedUnionFeatures() const;
+  bool SupportsAdvancedArrayFeatures() const;
+  bool SupportsOptionalScalars() const;
+  bool SupportsDefaultVectorsAndStrings() const;
+  Namespace *UniqueNamespace(Namespace *ns);
+
+  FLATBUFFERS_CHECKED_ERROR RecurseError();
+  template <typename F> CheckedError Recurse(F f);
+
+public:
+  SymbolTable<Type> types_;
+  SymbolTable<StructDef> structs_;
+  SymbolTable<EnumDef> enums_;
+  SymbolTable<ServiceDef> services_;
+  std::vector<Namespace *> namespaces_;
+  Namespace *current_namespace_;
+  Namespace *empty_namespace_;
+  std::string error_; // User readable error_ if Parse() == false
+
+  FlatBufferBuilder builder_; // any data contained in the file
+  flexbuffers::Builder flex_builder_;
+  flexbuffers::Reference flex_root_;
+  StructDef *root_struct_def_;
+  std::string file_identifier_;
+  std::string file_extension_;
+
+  std::map<uint64_t, std::string> included_files_;
+  std::map<std::string, std::set<std::string>> files_included_per_file_;
+  std::vector<std::string> native_included_files_;
+
+  std::map<std::string, bool> known_attributes_;
+
+  IDLOptions opts;
+  bool uses_flexbuffers_;
+
+  uint64_t advanced_features_;
+
+private:
+  const char *source_;
+
+  std::string file_being_parsed_;
+
+  std::vector<std::pair<Value, FieldDef *>> field_stack_;
+
+  int anonymous_counter_;
+  int parse_depth_counter_; // stack-overflow guard
+};
+
+// Utility functions for multiple generators:
+
+extern std::string MakeCamel(const std::string &in, bool first = true);
+
+extern std::string MakeScreamingCamel(const std::string &in);
+
+// Generate text (JSON) from a given FlatBuffer, and a given Parser
+// object that has been populated with the corresponding schema.
+// If ident_step is 0, no indentation will be generated. Additionally,
+// if it is less than 0, no linefeeds will be generated either.
+// See idl_gen_text.cpp.
+// strict_json adds "quotes" around field names if true.
+// If the flatbuffer cannot be encoded in JSON (e.g., it contains non-UTF-8
+// byte arrays in String values), returns false.
+extern bool GenerateTextFromTable(const Parser &parser, const void *table,
+                                  const std::string &tablename, std::string *text);
+extern bool GenerateText(const Parser &parser, const void *flatbuffer, std::string *text);
+extern bool GenerateTextFile(const Parser &parser, const std::string &path,
+                             const std::string &file_name);
+
+// Generate Json schema to string
+// See idl_gen_json_schema.cpp.
+extern bool GenerateJsonSchema(const Parser &parser, std::string *json);
+
+// Generate binary files from a given FlatBuffer, and a given Parser
+// object that has been populated with the corresponding schema.
+// See code_generators.cpp.
+extern bool GenerateBinary(const Parser &parser, const std::string &path,
+                           const std::string &file_name);
+
+// Generate a C++ header from the definitions in the Parser object.
+// See idl_gen_cpp.
+extern bool GenerateCPP(const Parser &parser, const std::string &path,
+                        const std::string &file_name);
+
+// Generate C# files from the definitions in the Parser object.
+// See idl_gen_csharp.cpp.
+extern bool GenerateCSharp(const Parser &parser, const std::string &path,
+                           const std::string &file_name);
+
+extern bool GenerateDart(const Parser &parser, const std::string &path,
+                         const std::string &file_name);
+
+// Generate Java files from the definitions in the Parser object.
+// See idl_gen_java.cpp.
+extern bool GenerateJava(const Parser &parser, const std::string &path,
+                         const std::string &file_name);
+
+// Generate JavaScript or TypeScript code from the definitions in the Parser
+// object. See idl_gen_js.
+extern bool GenerateTS(const Parser &parser, const std::string &path, const std::string &file_name);
+
+// Generate Go files from the definitions in the Parser object.
+// See idl_gen_go.cpp.
+extern bool GenerateGo(const Parser &parser, const std::string &path, const std::string &file_name);
+
+// Generate Php code from the definitions in the Parser object.
+// See idl_gen_php.
+extern bool GeneratePhp(const Parser &parser, const std::string &path,
+                        const std::string &file_name);
+
+// Generate Python files from the definitions in the Parser object.
+// See idl_gen_python.cpp.
+extern bool GeneratePython(const Parser &parser, const std::string &path,
+                           const std::string &file_name);
+
+// Generate Lobster files from the definitions in the Parser object.
+// See idl_gen_lobster.cpp.
+extern bool GenerateLobster(const Parser &parser, const std::string &path,
+                            const std::string &file_name);
+
+// Generate Lua files from the definitions in the Parser object.
+// See idl_gen_lua.cpp.
+extern bool GenerateLua(const Parser &parser, const std::string &path,
+                        const std::string &file_name);
+
+// Generate Rust files from the definitions in the Parser object.
+// See idl_gen_rust.cpp.
+extern bool GenerateRust(const Parser &parser, const std::string &path,
+                         const std::string &file_name);
+
+// Generate Json schema file
+// See idl_gen_json_schema.cpp.
+extern bool GenerateJsonSchema(const Parser &parser, const std::string &path,
+                               const std::string &file_name);
+
+extern bool GenerateKotlin(const Parser &parser, const std::string &path,
+                           const std::string &file_name);
+
+// Generate Swift classes.
+// See idl_gen_swift.cpp
+extern bool GenerateSwift(const Parser &parser, const std::string &path,
+                          const std::string &file_name);
+
+// Generate a schema file from the internal representation, useful after
+// parsing a .proto schema.
+extern std::string GenerateFBS(const Parser &parser, const std::string &file_name);
+extern bool GenerateFBS(const Parser &parser, const std::string &path,
+                        const std::string &file_name);
+
+// Generate a make rule for the generated TypeScript code.
+// See idl_gen_ts.cpp.
+extern std::string TSMakeRule(const Parser &parser, const std::string &path,
+                              const std::string &file_name);
+
+// Generate a make rule for the generated C++ header.
+// See idl_gen_cpp.cpp.
+extern std::string CPPMakeRule(const Parser &parser, const std::string &path,
+                               const std::string &file_name);
+
+// Generate a make rule for the generated Dart code
+// see idl_gen_dart.cpp
+extern std::string DartMakeRule(const Parser &parser, const std::string &path,
+                                const std::string &file_name);
+
+// Generate a make rule for the generated Rust code.
+// See idl_gen_rust.cpp.
+extern std::string RustMakeRule(const Parser &parser, const std::string &path,
+                                const std::string &file_name);
+
+// Generate a make rule for generated Java or C# files.
+// See code_generators.cpp.
+extern std::string JavaCSharpMakeRule(const Parser &parser, const std::string &path,
+                                      const std::string &file_name);
+
+// Generate a make rule for the generated text (JSON) files.
+// See idl_gen_text.cpp.
+extern std::string TextMakeRule(const Parser &parser, const std::string &path,
+                                const std::string &file_names);
+
+// Generate a make rule for the generated binary files.
+// See code_generators.cpp.
+extern std::string BinaryMakeRule(const Parser &parser, const std::string &path,
+                                  const std::string &file_name);
+
+// Generate GRPC Cpp interfaces.
+// See idl_gen_grpc.cpp.
+bool GenerateCppGRPC(const Parser &parser, const std::string &path, const std::string &file_name);
+
+// Generate GRPC Go interfaces.
+// See idl_gen_grpc.cpp.
+bool GenerateGoGRPC(const Parser &parser, const std::string &path, const std::string &file_name);
+
+// Generate GRPC Java classes.
+// See idl_gen_grpc.cpp
+bool GenerateJavaGRPC(const Parser &parser, const std::string &path, const std::string &file_name);
+
+// Generate GRPC Python interfaces.
+// See idl_gen_grpc.cpp.
+bool GeneratePythonGRPC(const Parser &parser, const std::string &path,
+                        const std::string &file_name);
+
+// Generate GRPC Swift interfaces.
+// See idl_gen_grpc.cpp.
+extern bool GenerateSwiftGRPC(const Parser &parser, const std::string &path,
+                              const std::string &file_name);
+
+extern bool GenerateTSGRPC(const Parser &parser, const std::string &path,
+                           const std::string &file_name);
+} // namespace flatbuffers
+
+#endif // FLATBUFFERS_IDL_H_
diff --git a/onert-micro/externals/flatbuffers/minireflect.h b/onert-micro/externals/flatbuffers/minireflect.h
new file mode 100644
index 000000000..8b733a4a5
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/minireflect.h
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_MINIREFLECT_H_
+#define FLATBUFFERS_MINIREFLECT_H_
+
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/util.h"
+
+namespace flatbuffers
+{
+
+// Utilities that can be used with the "mini reflection" tables present
+// in generated code with --reflect-types (only types) or --reflect-names
+// (also names).
+// This allows basic reflection functionality such as pretty-printing
+// that does not require the use of the schema parser or loading of binary
+// schema files at runtime (reflection.h).
+
+// For any of the functions below that take `const TypeTable *`, you pass
+// `FooTypeTable()` if the type of the root is `Foo`.
+
+// First, a generic iterator that can be used by multiple algorithms.
+
+struct IterationVisitor
+{
+  // These mark the scope of a table or struct.
+  virtual void StartSequence() {}
+  virtual void EndSequence() {}
+  // Called for each field regardless of whether it is present or not.
+  // If not present, val == nullptr. set_idx is the index of all set fields.
+  virtual void Field(size_t /*field_idx*/, size_t /*set_idx*/, ElementaryType /*type*/,
+                     bool /*is_vector*/, const TypeTable * /*type_table*/, const char * /*name*/,
+                     const uint8_t * /*val*/)
+  {
+  }
+  // Called for a value that is actually present, after a field, or as part
+  // of a vector.
+  virtual void UType(uint8_t, const char *) {}
+  virtual void Bool(bool) {}
+  virtual void Char(int8_t, const char *) {}
+  virtual void UChar(uint8_t, const char *) {}
+  virtual void Short(int16_t, const char *) {}
+  virtual void UShort(uint16_t, const char *) {}
+  virtual void Int(int32_t, const char *) {}
+  virtual void UInt(uint32_t, const char *) {}
+  virtual void Long(int64_t) {}
+  virtual void ULong(uint64_t) {}
+  virtual void Float(float) {}
+  virtual void Double(double) {}
+  virtual void String(const String *) {}
+  virtual void Unknown(const uint8_t *) {} // From a future version.
+  // These mark the scope of a vector.
+  virtual void StartVector() {}
+  virtual void EndVector() {}
+  virtual void Element(size_t /*i*/, ElementaryType /*type*/, const TypeTable * /*type_table*/,
+                       const uint8_t * /*val*/)
+  {
+  }
+  virtual ~IterationVisitor() {}
+};
+
+inline size_t InlineSize(ElementaryType type, const TypeTable *type_table)
+{
+  switch (type)
+  {
+    case ET_UTYPE:
+    case ET_BOOL:
+    case ET_CHAR:
+    case ET_UCHAR:
+      return 1;
+    case ET_SHORT:
+    case ET_USHORT:
+      return 2;
+    case ET_INT:
+    case ET_UINT:
+    case ET_FLOAT:
+    case ET_STRING:
+      return 4;
+    case ET_LONG:
+    case ET_ULONG:
+    case ET_DOUBLE:
+      return 8;
+    case ET_SEQUENCE:
+      switch (type_table->st)
+      {
+        case ST_TABLE:
+        case ST_UNION:
+          return 4;
+        case ST_STRUCT:
+          return static_cast<size_t>(type_table->values[type_table->num_elems]);
+        default:
+          FLATBUFFERS_ASSERT(false);
+          return 1;
+      }
+    default:
+      FLATBUFFERS_ASSERT(false);
+      return 1;
+  }
+}
+
+inline int64_t LookupEnum(int64_t enum_val, const int64_t *values, size_t num_values)
+{
+  if (!values)
+    return enum_val;
+  for (size_t i = 0; i < num_values; i++)
+  {
+    if (enum_val == values[i])
+      return static_cast<int64_t>(i);
+  }
+  return -1; // Unknown enum value.
+}
+
+template <typename T> const char *EnumName(T tval, const TypeTable *type_table)
+{
+  if (!type_table || !type_table->names)
+    return nullptr;
+  auto i = LookupEnum(static_cast<int64_t>(tval), type_table->values, type_table->num_elems);
+  if (i >= 0 && i < static_cast<int64_t>(type_table->num_elems))
+  {
+    return type_table->names[i];
+  }
+  return nullptr;
+}
+
+void IterateObject(const uint8_t *obj, const TypeTable *type_table, IterationVisitor *visitor);
+
+inline void IterateValue(ElementaryType type, const uint8_t *val, const TypeTable *type_table,
+                         const uint8_t *prev_val, soffset_t vector_index, IterationVisitor *visitor)
+{
+  switch (type)
+  {
+    case ET_UTYPE:
+    {
+      auto tval = ReadScalar<uint8_t>(val);
+      visitor->UType(tval, EnumName(tval, type_table));
+      break;
+    }
+    case ET_BOOL:
+    {
+      visitor->Bool(ReadScalar<uint8_t>(val) != 0);
+      break;
+    }
+    case ET_CHAR:
+    {
+      auto tval = ReadScalar<int8_t>(val);
+      visitor->Char(tval, EnumName(tval, type_table));
+      break;
+    }
+    case ET_UCHAR:
+    {
+      auto tval = ReadScalar<uint8_t>(val);
+      visitor->UChar(tval, EnumName(tval, type_table));
+      break;
+    }
+    case ET_SHORT:
+    {
+      auto tval = ReadScalar<int16_t>(val);
+      visitor->Short(tval, EnumName(tval, type_table));
+      break;
+    }
+    case ET_USHORT:
+    {
+      auto tval = ReadScalar<uint16_t>(val);
+      visitor->UShort(tval, EnumName(tval, type_table));
+      break;
+    }
+    case ET_INT:
+    {
+      auto tval = ReadScalar<int32_t>(val);
+      visitor->Int(tval, EnumName(tval, type_table));
+      break;
+    }
+    case ET_UINT:
+    {
+      auto tval = ReadScalar<uint32_t>(val);
+      visitor->UInt(tval, EnumName(tval, type_table));
+      break;
+    }
+    case ET_LONG:
+    {
+      visitor->Long(ReadScalar<int64_t>(val));
+      break;
+    }
+    case ET_ULONG:
+    {
+      visitor->ULong(ReadScalar<uint64_t>(val));
+      break;
+    }
+    case ET_FLOAT:
+    {
+      visitor->Float(ReadScalar<float>(val));
+      break;
+    }
+    case ET_DOUBLE:
+    {
+      visitor->Double(ReadScalar<double>(val));
+      break;
+    }
+    case ET_STRING:
+    {
+      val += ReadScalar<uoffset_t>(val);
+      visitor->String(reinterpret_cast<const String *>(val));
+      break;
+    }
+    case ET_SEQUENCE:
+    {
+      switch (type_table->st)
+      {
+        case ST_TABLE:
+          val += ReadScalar<uoffset_t>(val);
+          IterateObject(val, type_table, visitor);
+          break;
+        case ST_STRUCT:
+          IterateObject(val, type_table, visitor);
+          break;
+        case ST_UNION:
+        {
+          val += ReadScalar<uoffset_t>(val);
+          FLATBUFFERS_ASSERT(prev_val);
+          auto union_type = *prev_val; // Always a uint8_t.
+          if (vector_index >= 0)
+          {
+            auto type_vec = reinterpret_cast<const Vector<uint8_t> *>(prev_val);
+            union_type = type_vec->Get(static_cast<uoffset_t>(vector_index));
+          }
+          auto type_code_idx = LookupEnum(union_type, type_table->values, type_table->num_elems);
+          if (type_code_idx >= 0 && type_code_idx < static_cast<int32_t>(type_table->num_elems))
+          {
+            auto type_code = type_table->type_codes[type_code_idx];
+            switch (type_code.base_type)
+            {
+              case ET_SEQUENCE:
+              {
+                auto ref = type_table->type_refs[type_code.sequence_ref]();
+                IterateObject(val, ref, visitor);
+                break;
+              }
+              case ET_STRING:
+                visitor->String(reinterpret_cast<const String *>(val));
+                break;
+              default:
+                visitor->Unknown(val);
+            }
+          }
+          else
+          {
+            visitor->Unknown(val);
+          }
+          break;
+        }
+        case ST_ENUM:
+          FLATBUFFERS_ASSERT(false);
+          break;
+      }
+      break;
+    }
+    default:
+    {
+      visitor->Unknown(val);
+      break;
+    }
+  }
+}
+
+inline void IterateObject(const uint8_t *obj, const TypeTable *type_table,
+                          IterationVisitor *visitor)
+{
+  visitor->StartSequence();
+  const uint8_t *prev_val = nullptr;
+  size_t set_idx = 0;
+  size_t array_idx = 0;
+  for (size_t i = 0; i < type_table->num_elems; i++)
+  {
+    auto type_code = type_table->type_codes[i];
+    auto type = static_cast<ElementaryType>(type_code.base_type);
+    auto is_repeating = type_code.is_repeating != 0;
+    auto ref_idx = type_code.sequence_ref;
+    const TypeTable *ref = nullptr;
+    if (ref_idx >= 0)
+    {
+      ref = type_table->type_refs[ref_idx]();
+    }
+    auto name = type_table->names ? type_table->names[i] : nullptr;
+    const uint8_t *val = nullptr;
+    if (type_table->st == ST_TABLE)
+    {
+      val = reinterpret_cast<const Table *>(obj)->GetAddressOf(
+        FieldIndexToOffset(static_cast<voffset_t>(i)));
+    }
+    else
+    {
+      val = obj + type_table->values[i];
+    }
+    visitor->Field(i, set_idx, type, is_repeating, ref, name, val);
+    if (val)
+    {
+      set_idx++;
+      if (is_repeating)
+      {
+        auto elem_ptr = val;
+        size_t size = 0;
+        if (type_table->st == ST_TABLE)
+        {
+          // variable length vector
+          val += ReadScalar<uoffset_t>(val);
+          auto vec = reinterpret_cast<const Vector<uint8_t> *>(val);
+          elem_ptr = vec->Data();
+          size = vec->size();
+        }
+        else
+        {
+          // otherwise fixed size array
+          size = type_table->array_sizes[array_idx];
+          ++array_idx;
+        }
+        visitor->StartVector();
+        for (size_t j = 0; j < size; j++)
+        {
+          visitor->Element(j, type, ref, elem_ptr);
+          IterateValue(type, elem_ptr, ref, prev_val, static_cast<soffset_t>(j), visitor);
+          elem_ptr += InlineSize(type, ref);
+        }
+        visitor->EndVector();
+      }
+      else
+      {
+        IterateValue(type, val, ref, prev_val, -1, visitor);
+      }
+    }
+    prev_val = val;
+  }
+  visitor->EndSequence();
+}
+
+inline void IterateFlatBuffer(const uint8_t *buffer, const TypeTable *type_table,
+                              IterationVisitor *callback)
+{
+  IterateObject(GetRoot<uint8_t>(buffer), type_table, callback);
+}
+
+// Outputting a Flatbuffer to a string. Tries to conform as close to JSON /
+// the output generated by idl_gen_text.cpp.
+
+struct ToStringVisitor : public IterationVisitor
+{
+  std::string s;
+  std::string d;
+  bool q;
+  std::string in;
+  size_t indent_level;
+  bool vector_delimited;
+  ToStringVisitor(std::string delimiter, bool quotes, std::string indent, bool vdelimited = true)
+    : d(delimiter), q(quotes), in(indent), indent_level(0), vector_delimited(vdelimited)
+  {
+  }
+  ToStringVisitor(std::string delimiter)
+    : d(delimiter), q(false), in(""), indent_level(0), vector_delimited(true)
+  {
+  }
+
+  void append_indent()
+  {
+    for (size_t i = 0; i < indent_level; i++)
+    {
+      s += in;
+    }
+  }
+
+  void StartSequence()
+  {
+    s += "{";
+    s += d;
+    indent_level++;
+  }
+  void EndSequence()
+  {
+    s += d;
+    indent_level--;
+    append_indent();
+    s += "}";
+  }
+  void Field(size_t /*field_idx*/, size_t set_idx, ElementaryType /*type*/, bool /*is_vector*/,
+             const TypeTable * /*type_table*/, const char *name, const uint8_t *val)
+  {
+    if (!val)
+      return;
+    if (set_idx)
+    {
+      s += ",";
+      s += d;
+    }
+    append_indent();
+    if (name)
+    {
+      if (q)
+        s += "\"";
+      s += name;
+      if (q)
+        s += "\"";
+      s += ": ";
+    }
+  }
+  template <typename T> void Named(T x, const char *name)
+  {
+    if (name)
+    {
+      if (q)
+        s += "\"";
+      s += name;
+      if (q)
+        s += "\"";
+    }
+    else
+    {
+      s += NumToString(x);
+    }
+  }
+  void UType(uint8_t x, const char *name) { Named(x, name); }
+  void Bool(bool x) { s += x ? "true" : "false"; }
+  void Char(int8_t x, const char *name) { Named(x, name); }
+  void UChar(uint8_t x, const char *name) { Named(x, name); }
+  void Short(int16_t x, const char *name) { Named(x, name); }
+  void UShort(uint16_t x, const char *name) { Named(x, name); }
+  void Int(int32_t x, const char *name) { Named(x, name); }
+  void UInt(uint32_t x, const char *name) { Named(x, name); }
+  void Long(int64_t x) { s += NumToString(x); }
+  void ULong(uint64_t x) { s += NumToString(x); }
+  void Float(float x) { s += NumToString(x); }
+  void Double(double x) { s += NumToString(x); }
+  void String(const struct String *str)
+  {
+    EscapeString(str->c_str(), str->size(), &s, true, false);
+  }
+  void Unknown(const uint8_t *) { s += "(?)"; }
+  void StartVector()
+  {
+    s += "[";
+    if (vector_delimited)
+    {
+      s += d;
+      indent_level++;
+      append_indent();
+    }
+    else
+    {
+      s += " ";
+    }
+  }
+  void EndVector()
+  {
+    if (vector_delimited)
+    {
+      s += d;
+      indent_level--;
+      append_indent();
+    }
+    else
+    {
+      s += " ";
+    }
+    s += "]";
+  }
+  void Element(size_t i, ElementaryType /*type*/, const TypeTable * /*type_table*/,
+               const uint8_t * /*val*/)
+  {
+    if (i)
+    {
+      s += ",";
+      if (vector_delimited)
+      {
+        s += d;
+        append_indent();
+      }
+      else
+      {
+        s += " ";
+      }
+    }
+  }
+};
+
+inline std::string FlatBufferToString(const uint8_t *buffer, const TypeTable *type_table,
+                                      bool multi_line = false, bool vector_delimited = true)
+{
+  ToStringVisitor tostring_visitor(multi_line ? "\n" : " ", false, "", vector_delimited);
+  IterateFlatBuffer(buffer, type_table, &tostring_visitor);
+  return tostring_visitor.s;
+}
+
+} // namespace flatbuffers
+
+#endif // FLATBUFFERS_MINIREFLECT_H_
diff --git a/onert-micro/externals/flatbuffers/pch/flatc_pch.h b/onert-micro/externals/flatbuffers/pch/flatc_pch.h
new file mode 100644
index 000000000..988fcf371
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/pch/flatc_pch.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_FLATC_PCH_H_
+#define FLATBUFFERS_FLATC_PCH_H_
+
+// stl
+#include <cmath>
+#include <sstream>
+#include <cassert>
+#include <unordered_set>
+#include <unordered_map>
+#include <iostream>
+#include <functional>
+#include <set>
+#include <iterator>
+#include <tuple>
+
+// flatbuffers
+#include "flatbuffers/pch/pch.h"
+#include "flatbuffers/code_generators.h"
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/flexbuffers.h"
+#include "flatbuffers/idl.h"
+
+#endif // FLATBUFFERS_FLATC_PCH_H_
diff --git a/onert-micro/externals/flatbuffers/pch/pch.h b/onert-micro/externals/flatbuffers/pch/pch.h
new file mode 100644
index 000000000..0e7886fb4
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/pch/pch.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_PCH_H_
+#define FLATBUFFERS_PCH_H_
+
+// stl
+#include <cstdint>
+#include <cstring>
+#include <algorithm>
+#include <list>
+#include <string>
+#include <utility>
+#include <iomanip>
+#include <map>
+#include <memory>
+#include <limits>
+#include <stack>
+#include <vector>
+#include <type_traits>
+
+// flatbuffers
+#include "flatbuffers/util.h"
+
+#endif // FLATBUFFERS_PCH_H_
diff --git a/onert-micro/externals/flatbuffers/reflection.h b/onert-micro/externals/flatbuffers/reflection.h
new file mode 100644
index 000000000..8e2b155f7
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/reflection.h
@@ -0,0 +1,520 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_REFLECTION_H_
+#define FLATBUFFERS_REFLECTION_H_
+
+// This is somewhat of a circular dependency because flatc (and thus this
+// file) is needed to generate this header in the first place.
+// Should normally not be a problem since it can be generated by the
+// previous version of flatc whenever this code needs to change.
+// See reflection/generate_code.sh
+#include "flatbuffers/reflection_generated.h"
+
+// Helper functionality for reflection.
+
+namespace flatbuffers
+{
+
+// ------------------------- GETTERS -------------------------
+
+inline bool IsScalar(reflection::BaseType t)
+{
+  return t >= reflection::UType && t <= reflection::Double;
+}
+inline bool IsInteger(reflection::BaseType t)
+{
+  return t >= reflection::UType && t <= reflection::ULong;
+}
+inline bool IsFloat(reflection::BaseType t)
+{
+  return t == reflection::Float || t == reflection::Double;
+}
+inline bool IsLong(reflection::BaseType t)
+{
+  return t == reflection::Long || t == reflection::ULong;
+}
+
+// Size of a basic type, don't use with structs.
+inline size_t GetTypeSize(reflection::BaseType base_type)
+{
+  // This needs to correspond to the BaseType enum.
+  static size_t sizes[] = {
+    0, // None
+    1, // UType
+    1, // Bool
+    1, // Byte
+    1, // UByte
+    2, // Short
+    2, // UShort
+    4, // Int
+    4, // UInt
+    8, // Long
+    8, // ULong
+    4, // Float
+    8, // Double
+    4, // String
+    4, // Vector
+    4, // Obj
+    4, // Union
+    0, // Array. Only used in structs. 0 was chosen to prevent out-of-bounds
+       // errors.
+
+    0 // MaxBaseType. This must be kept the last entry in this array.
+  };
+  static_assert(sizeof(sizes) / sizeof(size_t) == reflection::MaxBaseType + 1,
+                "Size of sizes[] array does not match the count of BaseType "
+                "enum values.");
+  return sizes[base_type];
+}
+
+// Same as above, but now correctly returns the size of a struct if
+// the field (or vector element) is a struct.
+inline size_t GetTypeSizeInline(reflection::BaseType base_type, int type_index,
+                                const reflection::Schema &schema)
+{
+  if (base_type == reflection::Obj && schema.objects()->Get(type_index)->is_struct())
+  {
+    return schema.objects()->Get(type_index)->bytesize();
+  }
+  else
+  {
+    return GetTypeSize(base_type);
+  }
+}
+
+// Get the root, regardless of what type it is.
+inline Table *GetAnyRoot(uint8_t *flatbuf) { return GetMutableRoot<Table>(flatbuf); }
+inline const Table *GetAnyRoot(const uint8_t *flatbuf) { return GetRoot<Table>(flatbuf); }
+
+// Get a field's default, if you know it's an integer, and its exact type.
+template <typename T> T GetFieldDefaultI(const reflection::Field &field)
+{
+  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(field.type()->base_type()));
+  return static_cast<T>(field.default_integer());
+}
+
+// Get a field's default, if you know it's floating point and its exact type.
+template <typename T> T GetFieldDefaultF(const reflection::Field &field)
+{
+  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(field.type()->base_type()));
+  return static_cast<T>(field.default_real());
+}
+
+// Get a field, if you know it's an integer, and its exact type.
+template <typename T> T GetFieldI(const Table &table, const reflection::Field &field)
+{
+  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(field.type()->base_type()));
+  return table.GetField<T>(field.offset(), static_cast<T>(field.default_integer()));
+}
+
+// Get a field, if you know it's floating point and its exact type.
+template <typename T> T GetFieldF(const Table &table, const reflection::Field &field)
+{
+  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(field.type()->base_type()));
+  return table.GetField<T>(field.offset(), static_cast<T>(field.default_real()));
+}
+
+// Get a field, if you know it's a string.
+inline const String *GetFieldS(const Table &table, const reflection::Field &field)
+{
+  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::String);
+  return table.GetPointer<const String *>(field.offset());
+}
+
+// Get a field, if you know it's a vector.
+template <typename T> Vector<T> *GetFieldV(const Table &table, const reflection::Field &field)
+{
+  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::Vector &&
+                     sizeof(T) == GetTypeSize(field.type()->element()));
+  return table.GetPointer<Vector<T> *>(field.offset());
+}
+
+// Get a field, if you know it's a vector, generically.
+// To actually access elements, use the return value together with
+// field.type()->element() in any of GetAnyVectorElemI below etc.
+inline VectorOfAny *GetFieldAnyV(const Table &table, const reflection::Field &field)
+{
+  return table.GetPointer<VectorOfAny *>(field.offset());
+}
+
+// Get a field, if you know it's a table.
+inline Table *GetFieldT(const Table &table, const reflection::Field &field)
+{
+  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::Obj ||
+                     field.type()->base_type() == reflection::Union);
+  return table.GetPointer<Table *>(field.offset());
+}
+
+// Get a field, if you know it's a struct.
+inline const Struct *GetFieldStruct(const Table &table, const reflection::Field &field)
+{
+  // TODO: This does NOT check if the field is a table or struct, but we'd need
+  // access to the schema to check the is_struct flag.
+  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::Obj);
+  return table.GetStruct<const Struct *>(field.offset());
+}
+
+// Get a structure's field, if you know it's a struct.
+inline const Struct *GetFieldStruct(const Struct &structure, const reflection::Field &field)
+{
+  FLATBUFFERS_ASSERT(field.type()->base_type() == reflection::Obj);
+  return structure.GetStruct<const Struct *>(field.offset());
+}
+
+// Raw helper functions used below: get any value in memory as a 64bit int, a
+// double or a string.
+// All scalars get static_cast to an int64_t, strings use strtoull, every other
+// data type returns 0.
+int64_t GetAnyValueI(reflection::BaseType type, const uint8_t *data);
+// All scalars static cast to double, strings use strtod, every other data
+// type is 0.0.
+double GetAnyValueF(reflection::BaseType type, const uint8_t *data);
+// All scalars converted using stringstream, strings as-is, and all other
+// data types provide some level of debug-pretty-printing.
+std::string GetAnyValueS(reflection::BaseType type, const uint8_t *data,
+                         const reflection::Schema *schema, int type_index);
+
+// Get any table field as a 64bit int, regardless of what type it is.
+inline int64_t GetAnyFieldI(const Table &table, const reflection::Field &field)
+{
+  auto field_ptr = table.GetAddressOf(field.offset());
+  return field_ptr ? GetAnyValueI(field.type()->base_type(), field_ptr) : field.default_integer();
+}
+
+// Get any table field as a double, regardless of what type it is.
+inline double GetAnyFieldF(const Table &table, const reflection::Field &field)
+{
+  auto field_ptr = table.GetAddressOf(field.offset());
+  return field_ptr ? GetAnyValueF(field.type()->base_type(), field_ptr) : field.default_real();
+}
+
+// Get any table field as a string, regardless of what type it is.
+// You may pass nullptr for the schema if you don't care to have fields that
+// are of table type pretty-printed.
+inline std::string GetAnyFieldS(const Table &table, const reflection::Field &field,
+                                const reflection::Schema *schema)
+{
+  auto field_ptr = table.GetAddressOf(field.offset());
+  return field_ptr
+           ? GetAnyValueS(field.type()->base_type(), field_ptr, schema, field.type()->index())
+           : "";
+}
+
+// Get any struct field as a 64bit int, regardless of what type it is.
+inline int64_t GetAnyFieldI(const Struct &st, const reflection::Field &field)
+{
+  return GetAnyValueI(field.type()->base_type(), st.GetAddressOf(field.offset()));
+}
+
+// Get any struct field as a double, regardless of what type it is.
+inline double GetAnyFieldF(const Struct &st, const reflection::Field &field)
+{
+  return GetAnyValueF(field.type()->base_type(), st.GetAddressOf(field.offset()));
+}
+
+// Get any struct field as a string, regardless of what type it is.
+inline std::string GetAnyFieldS(const Struct &st, const reflection::Field &field)
+{
+  return GetAnyValueS(field.type()->base_type(), st.GetAddressOf(field.offset()), nullptr, -1);
+}
+
+// Get any vector element as a 64bit int, regardless of what type it is.
+inline int64_t GetAnyVectorElemI(const VectorOfAny *vec, reflection::BaseType elem_type, size_t i)
+{
+  return GetAnyValueI(elem_type, vec->Data() + GetTypeSize(elem_type) * i);
+}
+
+// Get any vector element as a double, regardless of what type it is.
+inline double GetAnyVectorElemF(const VectorOfAny *vec, reflection::BaseType elem_type, size_t i)
+{
+  return GetAnyValueF(elem_type, vec->Data() + GetTypeSize(elem_type) * i);
+}
+
+// Get any vector element as a string, regardless of what type it is.
+inline std::string GetAnyVectorElemS(const VectorOfAny *vec, reflection::BaseType elem_type,
+                                     size_t i)
+{
+  return GetAnyValueS(elem_type, vec->Data() + GetTypeSize(elem_type) * i, nullptr, -1);
+}
+
+// Get a vector element that's a table/string/vector from a generic vector.
+// Pass Table/String/VectorOfAny as template parameter.
+// Warning: does no typechecking.
+template <typename T> T *GetAnyVectorElemPointer(const VectorOfAny *vec, size_t i)
+{
+  auto elem_ptr = vec->Data() + sizeof(uoffset_t) * i;
+  return reinterpret_cast<T *>(elem_ptr + ReadScalar<uoffset_t>(elem_ptr));
+}
+
+// Get the inline-address of a vector element. Useful for Structs (pass Struct
+// as template arg), or being able to address a range of scalars in-line.
+// Get elem_size from GetTypeSizeInline().
+// Note: little-endian data on all platforms, use EndianScalar() instead of
+// raw pointer access with scalars).
+template <typename T>
+T *GetAnyVectorElemAddressOf(const VectorOfAny *vec, size_t i, size_t elem_size)
+{
+  return reinterpret_cast<T *>(vec->Data() + elem_size * i);
+}
+
+// Similarly, for elements of tables.
+template <typename T> T *GetAnyFieldAddressOf(const Table &table, const reflection::Field &field)
+{
+  return reinterpret_cast<T *>(table.GetAddressOf(field.offset()));
+}
+
+// Similarly, for elements of structs.
+template <typename T> T *GetAnyFieldAddressOf(const Struct &st, const reflection::Field &field)
+{
+  return reinterpret_cast<T *>(st.GetAddressOf(field.offset()));
+}
+
+// ------------------------- SETTERS -------------------------
+
+// Set any scalar field, if you know its exact type.
+template <typename T> bool SetField(Table *table, const reflection::Field &field, T val)
+{
+  reflection::BaseType type = field.type()->base_type();
+  if (!IsScalar(type))
+  {
+    return false;
+  }
+  FLATBUFFERS_ASSERT(sizeof(T) == GetTypeSize(type));
+  T def;
+  if (IsInteger(type))
+  {
+    def = GetFieldDefaultI<T>(field);
+  }
+  else
+  {
+    FLATBUFFERS_ASSERT(IsFloat(type));
+    def = GetFieldDefaultF<T>(field);
+  }
+  return table->SetField(field.offset(), val, def);
+}
+
+// Raw helper functions used below: set any value in memory as a 64bit int, a
+// double or a string.
+// These work for all scalar values, but do nothing for other data types.
+// To set a string, see SetString below.
+void SetAnyValueI(reflection::BaseType type, uint8_t *data, int64_t val);
+void SetAnyValueF(reflection::BaseType type, uint8_t *data, double val);
+void SetAnyValueS(reflection::BaseType type, uint8_t *data, const char *val);
+
+// Set any table field as a 64bit int, regardless of type what it is.
+inline bool SetAnyFieldI(Table *table, const reflection::Field &field, int64_t val)
+{
+  auto field_ptr = table->GetAddressOf(field.offset());
+  if (!field_ptr)
+    return val == GetFieldDefaultI<int64_t>(field);
+  SetAnyValueI(field.type()->base_type(), field_ptr, val);
+  return true;
+}
+
+// Set any table field as a double, regardless of what type it is.
+inline bool SetAnyFieldF(Table *table, const reflection::Field &field, double val)
+{
+  auto field_ptr = table->GetAddressOf(field.offset());
+  if (!field_ptr)
+    return val == GetFieldDefaultF<double>(field);
+  SetAnyValueF(field.type()->base_type(), field_ptr, val);
+  return true;
+}
+
+// Set any table field as a string, regardless of what type it is.
+inline bool SetAnyFieldS(Table *table, const reflection::Field &field, const char *val)
+{
+  auto field_ptr = table->GetAddressOf(field.offset());
+  if (!field_ptr)
+    return false;
+  SetAnyValueS(field.type()->base_type(), field_ptr, val);
+  return true;
+}
+
+// Set any struct field as a 64bit int, regardless of type what it is.
+inline void SetAnyFieldI(Struct *st, const reflection::Field &field, int64_t val)
+{
+  SetAnyValueI(field.type()->base_type(), st->GetAddressOf(field.offset()), val);
+}
+
+// Set any struct field as a double, regardless of type what it is.
+inline void SetAnyFieldF(Struct *st, const reflection::Field &field, double val)
+{
+  SetAnyValueF(field.type()->base_type(), st->GetAddressOf(field.offset()), val);
+}
+
+// Set any struct field as a string, regardless of type what it is.
+inline void SetAnyFieldS(Struct *st, const reflection::Field &field, const char *val)
+{
+  SetAnyValueS(field.type()->base_type(), st->GetAddressOf(field.offset()), val);
+}
+
+// Set any vector element as a 64bit int, regardless of type what it is.
+inline void SetAnyVectorElemI(VectorOfAny *vec, reflection::BaseType elem_type, size_t i,
+                              int64_t val)
+{
+  SetAnyValueI(elem_type, vec->Data() + GetTypeSize(elem_type) * i, val);
+}
+
+// Set any vector element as a double, regardless of type what it is.
+inline void SetAnyVectorElemF(VectorOfAny *vec, reflection::BaseType elem_type, size_t i,
+                              double val)
+{
+  SetAnyValueF(elem_type, vec->Data() + GetTypeSize(elem_type) * i, val);
+}
+
+// Set any vector element as a string, regardless of type what it is.
+inline void SetAnyVectorElemS(VectorOfAny *vec, reflection::BaseType elem_type, size_t i,
+                              const char *val)
+{
+  SetAnyValueS(elem_type, vec->Data() + GetTypeSize(elem_type) * i, val);
+}
+
+// ------------------------- RESIZING SETTERS -------------------------
+
+// "smart" pointer for use with resizing vectors: turns a pointer inside
+// a vector into a relative offset, such that it is not affected by resizes.
+template <typename T, typename U> class pointer_inside_vector
+{
+public:
+  pointer_inside_vector(T *ptr, std::vector<U> &vec)
+    : offset_(reinterpret_cast<uint8_t *>(ptr) -
+              reinterpret_cast<uint8_t *>(flatbuffers::vector_data(vec))),
+      vec_(vec)
+  {
+  }
+
+  T *operator*() const
+  {
+    return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(flatbuffers::vector_data(vec_)) +
+                                 offset_);
+  }
+  T *operator->() const { return operator*(); }
+
+private:
+  size_t offset_;
+  std::vector<U> &vec_;
+};
+
+// Helper to create the above easily without specifying template args.
+template <typename T, typename U> pointer_inside_vector<T, U> piv(T *ptr, std::vector<U> &vec)
+{
+  return pointer_inside_vector<T, U>(ptr, vec);
+}
+
+inline const char *UnionTypeFieldSuffix() { return "_type"; }
+
+// Helper to figure out the actual table type a union refers to.
+inline const reflection::Object &GetUnionType(const reflection::Schema &schema,
+                                              const reflection::Object &parent,
+                                              const reflection::Field &unionfield,
+                                              const Table &table)
+{
+  auto enumdef = schema.enums()->Get(unionfield.type()->index());
+  // TODO: this is clumsy and slow, but no other way to find it?
+  auto type_field =
+    parent.fields()->LookupByKey((unionfield.name()->str() + UnionTypeFieldSuffix()).c_str());
+  FLATBUFFERS_ASSERT(type_field);
+  auto union_type = GetFieldI<uint8_t>(table, *type_field);
+  auto enumval = enumdef->values()->LookupByKey(union_type);
+  return *enumval->object();
+}
+
+// Changes the contents of a string inside a FlatBuffer. FlatBuffer must
+// live inside a std::vector so we can resize the buffer if needed.
+// "str" must live inside "flatbuf" and may be invalidated after this call.
+// If your FlatBuffer's root table is not the schema's root table, you should
+// pass in your root_table type as well.
+void SetString(const reflection::Schema &schema, const std::string &val, const String *str,
+               std::vector<uint8_t> *flatbuf, const reflection::Object *root_table = nullptr);
+
+// Resizes a flatbuffers::Vector inside a FlatBuffer. FlatBuffer must
+// live inside a std::vector so we can resize the buffer if needed.
+// "vec" must live inside "flatbuf" and may be invalidated after this call.
+// If your FlatBuffer's root table is not the schema's root table, you should
+// pass in your root_table type as well.
+uint8_t *ResizeAnyVector(const reflection::Schema &schema, uoffset_t newsize,
+                         const VectorOfAny *vec, uoffset_t num_elems, uoffset_t elem_size,
+                         std::vector<uint8_t> *flatbuf,
+                         const reflection::Object *root_table = nullptr);
+
+template <typename T>
+void ResizeVector(const reflection::Schema &schema, uoffset_t newsize, T val, const Vector<T> *vec,
+                  std::vector<uint8_t> *flatbuf, const reflection::Object *root_table = nullptr)
+{
+  auto delta_elem = static_cast<int>(newsize) - static_cast<int>(vec->size());
+  auto newelems =
+    ResizeAnyVector(schema, newsize, reinterpret_cast<const VectorOfAny *>(vec), vec->size(),
+                    static_cast<uoffset_t>(sizeof(T)), flatbuf, root_table);
+  // Set new elements to "val".
+  for (int i = 0; i < delta_elem; i++)
+  {
+    auto loc = newelems + i * sizeof(T);
+    auto is_scalar = flatbuffers::is_scalar<T>::value;
+    if (is_scalar)
+    {
+      WriteScalar(loc, val);
+    }
+    else
+    { // struct
+      *reinterpret_cast<T *>(loc) = val;
+    }
+  }
+}
+
+// Adds any new data (in the form of a new FlatBuffer) to an existing
+// FlatBuffer. This can be used when any of the above methods are not
+// sufficient, in particular for adding new tables and new fields.
+// This is potentially slightly less efficient than a FlatBuffer constructed
+// in one piece, since the new FlatBuffer doesn't share any vtables with the
+// existing one.
+// The return value can now be set using Vector::MutateOffset or SetFieldT
+// below.
+const uint8_t *AddFlatBuffer(std::vector<uint8_t> &flatbuf, const uint8_t *newbuf, size_t newlen);
+
+inline bool SetFieldT(Table *table, const reflection::Field &field, const uint8_t *val)
+{
+  FLATBUFFERS_ASSERT(sizeof(uoffset_t) == GetTypeSize(field.type()->base_type()));
+  return table->SetPointer(field.offset(), val);
+}
+
+// ------------------------- COPYING -------------------------
+
+// Generic copying of tables from a FlatBuffer into a FlatBuffer builder.
+// Can be used to do any kind of merging/selecting you may want to do out
+// of existing buffers. Also useful to reconstruct a whole buffer if the
+// above resizing functionality has introduced garbage in a buffer you want
+// to remove.
+// Note: this does not deal with DAGs correctly. If the table passed forms a
+// DAG, the copy will be a tree instead (with duplicates). Strings can be
+// shared however, by passing true for use_string_pooling.
+
+Offset<const Table *> CopyTable(FlatBufferBuilder &fbb, const reflection::Schema &schema,
+                                const reflection::Object &objectdef, const Table &table,
+                                bool use_string_pooling = false);
+
+// Verifies the provided flatbuffer using reflection.
+// root should point to the root type for this flatbuffer.
+// buf should point to the start of flatbuffer data.
+// length specifies the size of the flatbuffer data.
+bool Verify(const reflection::Schema &schema, const reflection::Object &root, const uint8_t *buf,
+            size_t length, uoffset_t max_depth = 64, uoffset_t max_tables = 1000000);
+
+} // namespace flatbuffers
+
+#endif // FLATBUFFERS_REFLECTION_H_
diff --git a/onert-micro/externals/flatbuffers/reflection_generated.h b/onert-micro/externals/flatbuffers/reflection_generated.h
new file mode 100644
index 000000000..9c57dd1f3
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/reflection_generated.h
@@ -0,0 +1,1257 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// automatically generated by the FlatBuffers compiler, do not modify
+
+#ifndef FLATBUFFERS_GENERATED_REFLECTION_REFLECTION_H_
+#define FLATBUFFERS_GENERATED_REFLECTION_REFLECTION_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+namespace reflection
+{
+
+struct Type;
+struct TypeBuilder;
+
+struct KeyValue;
+struct KeyValueBuilder;
+
+struct EnumVal;
+struct EnumValBuilder;
+
+struct Enum;
+struct EnumBuilder;
+
+struct Field;
+struct FieldBuilder;
+
+struct Object;
+struct ObjectBuilder;
+
+struct RPCCall;
+struct RPCCallBuilder;
+
+struct Service;
+struct ServiceBuilder;
+
+struct Schema;
+struct SchemaBuilder;
+
+enum BaseType
+{
+  None = 0,
+  UType = 1,
+  Bool = 2,
+  Byte = 3,
+  UByte = 4,
+  Short = 5,
+  UShort = 6,
+  Int = 7,
+  UInt = 8,
+  Long = 9,
+  ULong = 10,
+  Float = 11,
+  Double = 12,
+  String = 13,
+  Vector = 14,
+  Obj = 15,
+  Union = 16,
+  Array = 17,
+  MaxBaseType = 18
+};
+
+inline const BaseType (&EnumValuesBaseType())[19]
+{
+  static const BaseType values[] = {None,   UType, Bool,  Byte,  UByte,      Short,  UShort,
+                                    Int,    UInt,  Long,  ULong, Float,      Double, String,
+                                    Vector, Obj,   Union, Array, MaxBaseType};
+  return values;
+}
+
+inline const char *const *EnumNamesBaseType()
+{
+  static const char *const names[20] = {"None",  "UType",  "Bool",   "Byte",        "UByte",
+                                        "Short", "UShort", "Int",    "UInt",        "Long",
+                                        "ULong", "Float",  "Double", "String",      "Vector",
+                                        "Obj",   "Union",  "Array",  "MaxBaseType", nullptr};
+  return names;
+}
+
+inline const char *EnumNameBaseType(BaseType e)
+{
+  if (flatbuffers::IsOutRange(e, None, MaxBaseType))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesBaseType()[index];
+}
+
+enum AdvancedFeatures
+{
+  AdvancedArrayFeatures = 1ULL,
+  AdvancedUnionFeatures = 2ULL,
+  OptionalScalars = 4ULL,
+  DefaultVectorsAndStrings = 8ULL
+};
+
+inline const AdvancedFeatures (&EnumValuesAdvancedFeatures())[4]
+{
+  static const AdvancedFeatures values[] = {AdvancedArrayFeatures, AdvancedUnionFeatures,
+                                            OptionalScalars, DefaultVectorsAndStrings};
+  return values;
+}
+
+inline const char *const *EnumNamesAdvancedFeatures()
+{
+  static const char *const names[9] = {"AdvancedArrayFeatures",
+                                       "AdvancedUnionFeatures",
+                                       "",
+                                       "OptionalScalars",
+                                       "",
+                                       "",
+                                       "",
+                                       "DefaultVectorsAndStrings",
+                                       nullptr};
+  return names;
+}
+
+inline const char *EnumNameAdvancedFeatures(AdvancedFeatures e)
+{
+  if (flatbuffers::IsOutRange(e, AdvancedArrayFeatures, DefaultVectorsAndStrings))
+    return "";
+  const size_t index = static_cast<size_t>(e) - static_cast<size_t>(AdvancedArrayFeatures);
+  return EnumNamesAdvancedFeatures()[index];
+}
+
+struct Type FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TypeBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_BASE_TYPE = 4,
+    VT_ELEMENT = 6,
+    VT_INDEX = 8,
+    VT_FIXED_LENGTH = 10
+  };
+  reflection::BaseType base_type() const
+  {
+    return static_cast<reflection::BaseType>(GetField<int8_t>(VT_BASE_TYPE, 0));
+  }
+  reflection::BaseType element() const
+  {
+    return static_cast<reflection::BaseType>(GetField<int8_t>(VT_ELEMENT, 0));
+  }
+  int32_t index() const { return GetField<int32_t>(VT_INDEX, -1); }
+  uint16_t fixed_length() const { return GetField<uint16_t>(VT_FIXED_LENGTH, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_BASE_TYPE) &&
+           VerifyField<int8_t>(verifier, VT_ELEMENT) && VerifyField<int32_t>(verifier, VT_INDEX) &&
+           VerifyField<uint16_t>(verifier, VT_FIXED_LENGTH) && verifier.EndTable();
+  }
+};
+
+struct TypeBuilder
+{
+  typedef Type Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_base_type(reflection::BaseType base_type)
+  {
+    fbb_.AddElement<int8_t>(Type::VT_BASE_TYPE, static_cast<int8_t>(base_type), 0);
+  }
+  void add_element(reflection::BaseType element)
+  {
+    fbb_.AddElement<int8_t>(Type::VT_ELEMENT, static_cast<int8_t>(element), 0);
+  }
+  void add_index(int32_t index) { fbb_.AddElement<int32_t>(Type::VT_INDEX, index, -1); }
+  void add_fixed_length(uint16_t fixed_length)
+  {
+    fbb_.AddElement<uint16_t>(Type::VT_FIXED_LENGTH, fixed_length, 0);
+  }
+  explicit TypeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Type> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Type>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Type> CreateType(flatbuffers::FlatBufferBuilder &_fbb,
+                                            reflection::BaseType base_type = reflection::None,
+                                            reflection::BaseType element = reflection::None,
+                                            int32_t index = -1, uint16_t fixed_length = 0)
+{
+  TypeBuilder builder_(_fbb);
+  builder_.add_index(index);
+  builder_.add_fixed_length(fixed_length);
+  builder_.add_element(element);
+  builder_.add_base_type(base_type);
+  return builder_.Finish();
+}
+
+struct KeyValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef KeyValueBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_KEY = 4,
+    VT_VALUE = 6
+  };
+  const flatbuffers::String *key() const { return GetPointer<const flatbuffers::String *>(VT_KEY); }
+  bool KeyCompareLessThan(const KeyValue *o) const { return *key() < *o->key(); }
+  int KeyCompareWithValue(const char *val) const { return strcmp(key()->c_str(), val); }
+  const flatbuffers::String *value() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_VALUE);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_KEY) &&
+           verifier.VerifyString(key()) && VerifyOffset(verifier, VT_VALUE) &&
+           verifier.VerifyString(value()) && verifier.EndTable();
+  }
+};
+
+struct KeyValueBuilder
+{
+  typedef KeyValue Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_key(flatbuffers::Offset<flatbuffers::String> key)
+  {
+    fbb_.AddOffset(KeyValue::VT_KEY, key);
+  }
+  void add_value(flatbuffers::Offset<flatbuffers::String> value)
+  {
+    fbb_.AddOffset(KeyValue::VT_VALUE, value);
+  }
+  explicit KeyValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<KeyValue> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<KeyValue>(end);
+    fbb_.Required(o, KeyValue::VT_KEY);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<KeyValue>
+CreateKeyValue(flatbuffers::FlatBufferBuilder &_fbb,
+               flatbuffers::Offset<flatbuffers::String> key = 0,
+               flatbuffers::Offset<flatbuffers::String> value = 0)
+{
+  KeyValueBuilder builder_(_fbb);
+  builder_.add_value(value);
+  builder_.add_key(key);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<KeyValue> CreateKeyValueDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                                                          const char *key = nullptr,
+                                                          const char *value = nullptr)
+{
+  auto key__ = key ? _fbb.CreateString(key) : 0;
+  auto value__ = value ? _fbb.CreateString(value) : 0;
+  return reflection::CreateKeyValue(_fbb, key__, value__);
+}
+
+struct EnumVal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef EnumValBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_VALUE = 6,
+    VT_OBJECT = 8,
+    VT_UNION_TYPE = 10,
+    VT_DOCUMENTATION = 12
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  int64_t value() const { return GetField<int64_t>(VT_VALUE, 0); }
+  bool KeyCompareLessThan(const EnumVal *o) const { return value() < o->value(); }
+  int KeyCompareWithValue(int64_t val) const
+  {
+    return static_cast<int>(value() > val) - static_cast<int>(value() < val);
+  }
+  const reflection::Object *object() const
+  {
+    return GetPointer<const reflection::Object *>(VT_OBJECT);
+  }
+  const reflection::Type *union_type() const
+  {
+    return GetPointer<const reflection::Type *>(VT_UNION_TYPE);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *documentation() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(
+      VT_DOCUMENTATION);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyField<int64_t>(verifier, VT_VALUE) &&
+           VerifyOffset(verifier, VT_OBJECT) && verifier.VerifyTable(object()) &&
+           VerifyOffset(verifier, VT_UNION_TYPE) && verifier.VerifyTable(union_type()) &&
+           VerifyOffset(verifier, VT_DOCUMENTATION) && verifier.VerifyVector(documentation()) &&
+           verifier.VerifyVectorOfStrings(documentation()) && verifier.EndTable();
+  }
+};
+
+struct EnumValBuilder
+{
+  typedef EnumVal Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(EnumVal::VT_NAME, name);
+  }
+  void add_value(int64_t value) { fbb_.AddElement<int64_t>(EnumVal::VT_VALUE, value, 0); }
+  void add_object(flatbuffers::Offset<reflection::Object> object)
+  {
+    fbb_.AddOffset(EnumVal::VT_OBJECT, object);
+  }
+  void add_union_type(flatbuffers::Offset<reflection::Type> union_type)
+  {
+    fbb_.AddOffset(EnumVal::VT_UNION_TYPE, union_type);
+  }
+  void add_documentation(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
+      documentation)
+  {
+    fbb_.AddOffset(EnumVal::VT_DOCUMENTATION, documentation);
+  }
+  explicit EnumValBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<EnumVal> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<EnumVal>(end);
+    fbb_.Required(o, EnumVal::VT_NAME);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<EnumVal> CreateEnumVal(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::String> name = 0,
+  int64_t value = 0, flatbuffers::Offset<reflection::Object> object = 0,
+  flatbuffers::Offset<reflection::Type> union_type = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> documentation =
+    0)
+{
+  EnumValBuilder builder_(_fbb);
+  builder_.add_value(value);
+  builder_.add_documentation(documentation);
+  builder_.add_union_type(union_type);
+  builder_.add_object(object);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<EnumVal> CreateEnumValDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr, int64_t value = 0,
+  flatbuffers::Offset<reflection::Object> object = 0,
+  flatbuffers::Offset<reflection::Type> union_type = 0,
+  const std::vector<flatbuffers::Offset<flatbuffers::String>> *documentation = nullptr)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto documentation__ =
+    documentation ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*documentation) : 0;
+  return reflection::CreateEnumVal(_fbb, name__, value, object, union_type, documentation__);
+}
+
+struct Enum FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef EnumBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_VALUES = 6,
+    VT_IS_UNION = 8,
+    VT_UNDERLYING_TYPE = 10,
+    VT_ATTRIBUTES = 12,
+    VT_DOCUMENTATION = 14
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  bool KeyCompareLessThan(const Enum *o) const { return *name() < *o->name(); }
+  int KeyCompareWithValue(const char *val) const { return strcmp(name()->c_str(), val); }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::EnumVal>> *values() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::EnumVal>> *>(
+      VT_VALUES);
+  }
+  bool is_union() const { return GetField<uint8_t>(VT_IS_UNION, 0) != 0; }
+  const reflection::Type *underlying_type() const
+  {
+    return GetPointer<const reflection::Type *>(VT_UNDERLYING_TYPE);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *attributes() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *>(
+      VT_ATTRIBUTES);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *documentation() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(
+      VT_DOCUMENTATION);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyOffsetRequired(verifier, VT_VALUES) &&
+           verifier.VerifyVector(values()) && verifier.VerifyVectorOfTables(values()) &&
+           VerifyField<uint8_t>(verifier, VT_IS_UNION) &&
+           VerifyOffsetRequired(verifier, VT_UNDERLYING_TYPE) &&
+           verifier.VerifyTable(underlying_type()) && VerifyOffset(verifier, VT_ATTRIBUTES) &&
+           verifier.VerifyVector(attributes()) && verifier.VerifyVectorOfTables(attributes()) &&
+           VerifyOffset(verifier, VT_DOCUMENTATION) && verifier.VerifyVector(documentation()) &&
+           verifier.VerifyVectorOfStrings(documentation()) && verifier.EndTable();
+  }
+};
+
+struct EnumBuilder
+{
+  typedef Enum Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(Enum::VT_NAME, name);
+  }
+  void add_values(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::EnumVal>>> values)
+  {
+    fbb_.AddOffset(Enum::VT_VALUES, values);
+  }
+  void add_is_union(bool is_union)
+  {
+    fbb_.AddElement<uint8_t>(Enum::VT_IS_UNION, static_cast<uint8_t>(is_union), 0);
+  }
+  void add_underlying_type(flatbuffers::Offset<reflection::Type> underlying_type)
+  {
+    fbb_.AddOffset(Enum::VT_UNDERLYING_TYPE, underlying_type);
+  }
+  void add_attributes(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes)
+  {
+    fbb_.AddOffset(Enum::VT_ATTRIBUTES, attributes);
+  }
+  void add_documentation(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
+      documentation)
+  {
+    fbb_.AddOffset(Enum::VT_DOCUMENTATION, documentation);
+  }
+  explicit EnumBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Enum> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Enum>(end);
+    fbb_.Required(o, Enum::VT_NAME);
+    fbb_.Required(o, Enum::VT_VALUES);
+    fbb_.Required(o, Enum::VT_UNDERLYING_TYPE);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Enum> CreateEnum(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::String> name = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::EnumVal>>> values = 0,
+  bool is_union = false, flatbuffers::Offset<reflection::Type> underlying_type = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes =
+    0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> documentation =
+    0)
+{
+  EnumBuilder builder_(_fbb);
+  builder_.add_documentation(documentation);
+  builder_.add_attributes(attributes);
+  builder_.add_underlying_type(underlying_type);
+  builder_.add_values(values);
+  builder_.add_name(name);
+  builder_.add_is_union(is_union);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Enum> CreateEnumDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr,
+  std::vector<flatbuffers::Offset<reflection::EnumVal>> *values = nullptr, bool is_union = false,
+  flatbuffers::Offset<reflection::Type> underlying_type = 0,
+  std::vector<flatbuffers::Offset<reflection::KeyValue>> *attributes = nullptr,
+  const std::vector<flatbuffers::Offset<flatbuffers::String>> *documentation = nullptr)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto values__ = values ? _fbb.CreateVectorOfSortedTables<reflection::EnumVal>(values) : 0;
+  auto attributes__ =
+    attributes ? _fbb.CreateVectorOfSortedTables<reflection::KeyValue>(attributes) : 0;
+  auto documentation__ =
+    documentation ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*documentation) : 0;
+  return reflection::CreateEnum(_fbb, name__, values__, is_union, underlying_type, attributes__,
+                                documentation__);
+}
+
+struct Field FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef FieldBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_TYPE = 6,
+    VT_ID = 8,
+    VT_OFFSET = 10,
+    VT_DEFAULT_INTEGER = 12,
+    VT_DEFAULT_REAL = 14,
+    VT_DEPRECATED = 16,
+    VT_REQUIRED = 18,
+    VT_KEY = 20,
+    VT_ATTRIBUTES = 22,
+    VT_DOCUMENTATION = 24,
+    VT_OPTIONAL = 26
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  bool KeyCompareLessThan(const Field *o) const { return *name() < *o->name(); }
+  int KeyCompareWithValue(const char *val) const { return strcmp(name()->c_str(), val); }
+  const reflection::Type *type() const { return GetPointer<const reflection::Type *>(VT_TYPE); }
+  uint16_t id() const { return GetField<uint16_t>(VT_ID, 0); }
+  uint16_t offset() const { return GetField<uint16_t>(VT_OFFSET, 0); }
+  int64_t default_integer() const { return GetField<int64_t>(VT_DEFAULT_INTEGER, 0); }
+  double default_real() const { return GetField<double>(VT_DEFAULT_REAL, 0.0); }
+  bool deprecated() const { return GetField<uint8_t>(VT_DEPRECATED, 0) != 0; }
+  bool required() const { return GetField<uint8_t>(VT_REQUIRED, 0) != 0; }
+  bool key() const { return GetField<uint8_t>(VT_KEY, 0) != 0; }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *attributes() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *>(
+      VT_ATTRIBUTES);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *documentation() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(
+      VT_DOCUMENTATION);
+  }
+  bool optional() const { return GetField<uint8_t>(VT_OPTIONAL, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyOffsetRequired(verifier, VT_TYPE) &&
+           verifier.VerifyTable(type()) && VerifyField<uint16_t>(verifier, VT_ID) &&
+           VerifyField<uint16_t>(verifier, VT_OFFSET) &&
+           VerifyField<int64_t>(verifier, VT_DEFAULT_INTEGER) &&
+           VerifyField<double>(verifier, VT_DEFAULT_REAL) &&
+           VerifyField<uint8_t>(verifier, VT_DEPRECATED) &&
+           VerifyField<uint8_t>(verifier, VT_REQUIRED) && VerifyField<uint8_t>(verifier, VT_KEY) &&
+           VerifyOffset(verifier, VT_ATTRIBUTES) && verifier.VerifyVector(attributes()) &&
+           verifier.VerifyVectorOfTables(attributes()) &&
+           VerifyOffset(verifier, VT_DOCUMENTATION) && verifier.VerifyVector(documentation()) &&
+           verifier.VerifyVectorOfStrings(documentation()) &&
+           VerifyField<uint8_t>(verifier, VT_OPTIONAL) && verifier.EndTable();
+  }
+};
+
+struct FieldBuilder
+{
+  typedef Field Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(Field::VT_NAME, name);
+  }
+  void add_type(flatbuffers::Offset<reflection::Type> type)
+  {
+    fbb_.AddOffset(Field::VT_TYPE, type);
+  }
+  void add_id(uint16_t id) { fbb_.AddElement<uint16_t>(Field::VT_ID, id, 0); }
+  void add_offset(uint16_t offset) { fbb_.AddElement<uint16_t>(Field::VT_OFFSET, offset, 0); }
+  void add_default_integer(int64_t default_integer)
+  {
+    fbb_.AddElement<int64_t>(Field::VT_DEFAULT_INTEGER, default_integer, 0);
+  }
+  void add_default_real(double default_real)
+  {
+    fbb_.AddElement<double>(Field::VT_DEFAULT_REAL, default_real, 0.0);
+  }
+  void add_deprecated(bool deprecated)
+  {
+    fbb_.AddElement<uint8_t>(Field::VT_DEPRECATED, static_cast<uint8_t>(deprecated), 0);
+  }
+  void add_required(bool required)
+  {
+    fbb_.AddElement<uint8_t>(Field::VT_REQUIRED, static_cast<uint8_t>(required), 0);
+  }
+  void add_key(bool key) { fbb_.AddElement<uint8_t>(Field::VT_KEY, static_cast<uint8_t>(key), 0); }
+  void add_attributes(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes)
+  {
+    fbb_.AddOffset(Field::VT_ATTRIBUTES, attributes);
+  }
+  void add_documentation(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
+      documentation)
+  {
+    fbb_.AddOffset(Field::VT_DOCUMENTATION, documentation);
+  }
+  void add_optional(bool optional)
+  {
+    fbb_.AddElement<uint8_t>(Field::VT_OPTIONAL, static_cast<uint8_t>(optional), 0);
+  }
+  explicit FieldBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Field> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Field>(end);
+    fbb_.Required(o, Field::VT_NAME);
+    fbb_.Required(o, Field::VT_TYPE);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Field> CreateField(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::String> name = 0,
+  flatbuffers::Offset<reflection::Type> type = 0, uint16_t id = 0, uint16_t offset = 0,
+  int64_t default_integer = 0, double default_real = 0.0, bool deprecated = false,
+  bool required = false, bool key = false,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes =
+    0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> documentation =
+    0,
+  bool optional = false)
+{
+  FieldBuilder builder_(_fbb);
+  builder_.add_default_real(default_real);
+  builder_.add_default_integer(default_integer);
+  builder_.add_documentation(documentation);
+  builder_.add_attributes(attributes);
+  builder_.add_type(type);
+  builder_.add_name(name);
+  builder_.add_offset(offset);
+  builder_.add_id(id);
+  builder_.add_optional(optional);
+  builder_.add_key(key);
+  builder_.add_required(required);
+  builder_.add_deprecated(deprecated);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Field> CreateFieldDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr,
+  flatbuffers::Offset<reflection::Type> type = 0, uint16_t id = 0, uint16_t offset = 0,
+  int64_t default_integer = 0, double default_real = 0.0, bool deprecated = false,
+  bool required = false, bool key = false,
+  std::vector<flatbuffers::Offset<reflection::KeyValue>> *attributes = nullptr,
+  const std::vector<flatbuffers::Offset<flatbuffers::String>> *documentation = nullptr,
+  bool optional = false)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto attributes__ =
+    attributes ? _fbb.CreateVectorOfSortedTables<reflection::KeyValue>(attributes) : 0;
+  auto documentation__ =
+    documentation ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*documentation) : 0;
+  return reflection::CreateField(_fbb, name__, type, id, offset, default_integer, default_real,
+                                 deprecated, required, key, attributes__, documentation__,
+                                 optional);
+}
+
+struct Object FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ObjectBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_FIELDS = 6,
+    VT_IS_STRUCT = 8,
+    VT_MINALIGN = 10,
+    VT_BYTESIZE = 12,
+    VT_ATTRIBUTES = 14,
+    VT_DOCUMENTATION = 16
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  bool KeyCompareLessThan(const Object *o) const { return *name() < *o->name(); }
+  int KeyCompareWithValue(const char *val) const { return strcmp(name()->c_str(), val); }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::Field>> *fields() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::Field>> *>(
+      VT_FIELDS);
+  }
+  bool is_struct() const { return GetField<uint8_t>(VT_IS_STRUCT, 0) != 0; }
+  int32_t minalign() const { return GetField<int32_t>(VT_MINALIGN, 0); }
+  int32_t bytesize() const { return GetField<int32_t>(VT_BYTESIZE, 0); }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *attributes() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *>(
+      VT_ATTRIBUTES);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *documentation() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(
+      VT_DOCUMENTATION);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyOffsetRequired(verifier, VT_FIELDS) &&
+           verifier.VerifyVector(fields()) && verifier.VerifyVectorOfTables(fields()) &&
+           VerifyField<uint8_t>(verifier, VT_IS_STRUCT) &&
+           VerifyField<int32_t>(verifier, VT_MINALIGN) &&
+           VerifyField<int32_t>(verifier, VT_BYTESIZE) && VerifyOffset(verifier, VT_ATTRIBUTES) &&
+           verifier.VerifyVector(attributes()) && verifier.VerifyVectorOfTables(attributes()) &&
+           VerifyOffset(verifier, VT_DOCUMENTATION) && verifier.VerifyVector(documentation()) &&
+           verifier.VerifyVectorOfStrings(documentation()) && verifier.EndTable();
+  }
+};
+
+struct ObjectBuilder
+{
+  typedef Object Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(Object::VT_NAME, name);
+  }
+  void add_fields(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::Field>>> fields)
+  {
+    fbb_.AddOffset(Object::VT_FIELDS, fields);
+  }
+  void add_is_struct(bool is_struct)
+  {
+    fbb_.AddElement<uint8_t>(Object::VT_IS_STRUCT, static_cast<uint8_t>(is_struct), 0);
+  }
+  void add_minalign(int32_t minalign)
+  {
+    fbb_.AddElement<int32_t>(Object::VT_MINALIGN, minalign, 0);
+  }
+  void add_bytesize(int32_t bytesize)
+  {
+    fbb_.AddElement<int32_t>(Object::VT_BYTESIZE, bytesize, 0);
+  }
+  void add_attributes(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes)
+  {
+    fbb_.AddOffset(Object::VT_ATTRIBUTES, attributes);
+  }
+  void add_documentation(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
+      documentation)
+  {
+    fbb_.AddOffset(Object::VT_DOCUMENTATION, documentation);
+  }
+  explicit ObjectBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Object> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Object>(end);
+    fbb_.Required(o, Object::VT_NAME);
+    fbb_.Required(o, Object::VT_FIELDS);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Object> CreateObject(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::String> name = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::Field>>> fields = 0,
+  bool is_struct = false, int32_t minalign = 0, int32_t bytesize = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes =
+    0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> documentation =
+    0)
+{
+  ObjectBuilder builder_(_fbb);
+  builder_.add_documentation(documentation);
+  builder_.add_attributes(attributes);
+  builder_.add_bytesize(bytesize);
+  builder_.add_minalign(minalign);
+  builder_.add_fields(fields);
+  builder_.add_name(name);
+  builder_.add_is_struct(is_struct);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Object> CreateObjectDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr,
+  std::vector<flatbuffers::Offset<reflection::Field>> *fields = nullptr, bool is_struct = false,
+  int32_t minalign = 0, int32_t bytesize = 0,
+  std::vector<flatbuffers::Offset<reflection::KeyValue>> *attributes = nullptr,
+  const std::vector<flatbuffers::Offset<flatbuffers::String>> *documentation = nullptr)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto fields__ = fields ? _fbb.CreateVectorOfSortedTables<reflection::Field>(fields) : 0;
+  auto attributes__ =
+    attributes ? _fbb.CreateVectorOfSortedTables<reflection::KeyValue>(attributes) : 0;
+  auto documentation__ =
+    documentation ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*documentation) : 0;
+  return reflection::CreateObject(_fbb, name__, fields__, is_struct, minalign, bytesize,
+                                  attributes__, documentation__);
+}
+
+struct RPCCall FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RPCCallBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_REQUEST = 6,
+    VT_RESPONSE = 8,
+    VT_ATTRIBUTES = 10,
+    VT_DOCUMENTATION = 12
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  bool KeyCompareLessThan(const RPCCall *o) const { return *name() < *o->name(); }
+  int KeyCompareWithValue(const char *val) const { return strcmp(name()->c_str(), val); }
+  const reflection::Object *request() const
+  {
+    return GetPointer<const reflection::Object *>(VT_REQUEST);
+  }
+  const reflection::Object *response() const
+  {
+    return GetPointer<const reflection::Object *>(VT_RESPONSE);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *attributes() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *>(
+      VT_ATTRIBUTES);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *documentation() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(
+      VT_DOCUMENTATION);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyOffsetRequired(verifier, VT_REQUEST) &&
+           verifier.VerifyTable(request()) && VerifyOffsetRequired(verifier, VT_RESPONSE) &&
+           verifier.VerifyTable(response()) && VerifyOffset(verifier, VT_ATTRIBUTES) &&
+           verifier.VerifyVector(attributes()) && verifier.VerifyVectorOfTables(attributes()) &&
+           VerifyOffset(verifier, VT_DOCUMENTATION) && verifier.VerifyVector(documentation()) &&
+           verifier.VerifyVectorOfStrings(documentation()) && verifier.EndTable();
+  }
+};
+
+struct RPCCallBuilder
+{
+  typedef RPCCall Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(RPCCall::VT_NAME, name);
+  }
+  void add_request(flatbuffers::Offset<reflection::Object> request)
+  {
+    fbb_.AddOffset(RPCCall::VT_REQUEST, request);
+  }
+  void add_response(flatbuffers::Offset<reflection::Object> response)
+  {
+    fbb_.AddOffset(RPCCall::VT_RESPONSE, response);
+  }
+  void add_attributes(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes)
+  {
+    fbb_.AddOffset(RPCCall::VT_ATTRIBUTES, attributes);
+  }
+  void add_documentation(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
+      documentation)
+  {
+    fbb_.AddOffset(RPCCall::VT_DOCUMENTATION, documentation);
+  }
+  explicit RPCCallBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RPCCall> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RPCCall>(end);
+    fbb_.Required(o, RPCCall::VT_NAME);
+    fbb_.Required(o, RPCCall::VT_REQUEST);
+    fbb_.Required(o, RPCCall::VT_RESPONSE);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RPCCall> CreateRPCCall(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::String> name = 0,
+  flatbuffers::Offset<reflection::Object> request = 0,
+  flatbuffers::Offset<reflection::Object> response = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes =
+    0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> documentation =
+    0)
+{
+  RPCCallBuilder builder_(_fbb);
+  builder_.add_documentation(documentation);
+  builder_.add_attributes(attributes);
+  builder_.add_response(response);
+  builder_.add_request(request);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<RPCCall> CreateRPCCallDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr,
+  flatbuffers::Offset<reflection::Object> request = 0,
+  flatbuffers::Offset<reflection::Object> response = 0,
+  std::vector<flatbuffers::Offset<reflection::KeyValue>> *attributes = nullptr,
+  const std::vector<flatbuffers::Offset<flatbuffers::String>> *documentation = nullptr)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto attributes__ =
+    attributes ? _fbb.CreateVectorOfSortedTables<reflection::KeyValue>(attributes) : 0;
+  auto documentation__ =
+    documentation ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*documentation) : 0;
+  return reflection::CreateRPCCall(_fbb, name__, request, response, attributes__, documentation__);
+}
+
+struct Service FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ServiceBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_CALLS = 6,
+    VT_ATTRIBUTES = 8,
+    VT_DOCUMENTATION = 10
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  bool KeyCompareLessThan(const Service *o) const { return *name() < *o->name(); }
+  int KeyCompareWithValue(const char *val) const { return strcmp(name()->c_str(), val); }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::RPCCall>> *calls() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::RPCCall>> *>(
+      VT_CALLS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *attributes() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>> *>(
+      VT_ATTRIBUTES);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *documentation() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(
+      VT_DOCUMENTATION);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyOffset(verifier, VT_CALLS) &&
+           verifier.VerifyVector(calls()) && verifier.VerifyVectorOfTables(calls()) &&
+           VerifyOffset(verifier, VT_ATTRIBUTES) && verifier.VerifyVector(attributes()) &&
+           verifier.VerifyVectorOfTables(attributes()) &&
+           VerifyOffset(verifier, VT_DOCUMENTATION) && verifier.VerifyVector(documentation()) &&
+           verifier.VerifyVectorOfStrings(documentation()) && verifier.EndTable();
+  }
+};
+
+struct ServiceBuilder
+{
+  typedef Service Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(Service::VT_NAME, name);
+  }
+  void add_calls(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::RPCCall>>> calls)
+  {
+    fbb_.AddOffset(Service::VT_CALLS, calls);
+  }
+  void add_attributes(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes)
+  {
+    fbb_.AddOffset(Service::VT_ATTRIBUTES, attributes);
+  }
+  void add_documentation(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>>
+      documentation)
+  {
+    fbb_.AddOffset(Service::VT_DOCUMENTATION, documentation);
+  }
+  explicit ServiceBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Service> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Service>(end);
+    fbb_.Required(o, Service::VT_NAME);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Service> CreateService(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::String> name = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::RPCCall>>> calls = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>> attributes =
+    0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> documentation =
+    0)
+{
+  ServiceBuilder builder_(_fbb);
+  builder_.add_documentation(documentation);
+  builder_.add_attributes(attributes);
+  builder_.add_calls(calls);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Service> CreateServiceDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr,
+  std::vector<flatbuffers::Offset<reflection::RPCCall>> *calls = nullptr,
+  std::vector<flatbuffers::Offset<reflection::KeyValue>> *attributes = nullptr,
+  const std::vector<flatbuffers::Offset<flatbuffers::String>> *documentation = nullptr)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto calls__ = calls ? _fbb.CreateVectorOfSortedTables<reflection::RPCCall>(calls) : 0;
+  auto attributes__ =
+    attributes ? _fbb.CreateVectorOfSortedTables<reflection::KeyValue>(attributes) : 0;
+  auto documentation__ =
+    documentation ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*documentation) : 0;
+  return reflection::CreateService(_fbb, name__, calls__, attributes__, documentation__);
+}
+
+struct Schema FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SchemaBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_OBJECTS = 4,
+    VT_ENUMS = 6,
+    VT_FILE_IDENT = 8,
+    VT_FILE_EXT = 10,
+    VT_ROOT_TABLE = 12,
+    VT_SERVICES = 14,
+    VT_ADVANCED_FEATURES = 16
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::Object>> *objects() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::Object>> *>(
+      VT_OBJECTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::Enum>> *enums() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::Enum>> *>(VT_ENUMS);
+  }
+  const flatbuffers::String *file_ident() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_FILE_IDENT);
+  }
+  const flatbuffers::String *file_ext() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_FILE_EXT);
+  }
+  const reflection::Object *root_table() const
+  {
+    return GetPointer<const reflection::Object *>(VT_ROOT_TABLE);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<reflection::Service>> *services() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<reflection::Service>> *>(
+      VT_SERVICES);
+  }
+  reflection::AdvancedFeatures advanced_features() const
+  {
+    return static_cast<reflection::AdvancedFeatures>(GetField<uint64_t>(VT_ADVANCED_FEATURES, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_OBJECTS) &&
+           verifier.VerifyVector(objects()) && verifier.VerifyVectorOfTables(objects()) &&
+           VerifyOffsetRequired(verifier, VT_ENUMS) && verifier.VerifyVector(enums()) &&
+           verifier.VerifyVectorOfTables(enums()) && VerifyOffset(verifier, VT_FILE_IDENT) &&
+           verifier.VerifyString(file_ident()) && VerifyOffset(verifier, VT_FILE_EXT) &&
+           verifier.VerifyString(file_ext()) && VerifyOffset(verifier, VT_ROOT_TABLE) &&
+           verifier.VerifyTable(root_table()) && VerifyOffset(verifier, VT_SERVICES) &&
+           verifier.VerifyVector(services()) && verifier.VerifyVectorOfTables(services()) &&
+           VerifyField<uint64_t>(verifier, VT_ADVANCED_FEATURES) && verifier.EndTable();
+  }
+};
+
+struct SchemaBuilder
+{
+  typedef Schema Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_objects(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::Object>>> objects)
+  {
+    fbb_.AddOffset(Schema::VT_OBJECTS, objects);
+  }
+  void
+  add_enums(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::Enum>>> enums)
+  {
+    fbb_.AddOffset(Schema::VT_ENUMS, enums);
+  }
+  void add_file_ident(flatbuffers::Offset<flatbuffers::String> file_ident)
+  {
+    fbb_.AddOffset(Schema::VT_FILE_IDENT, file_ident);
+  }
+  void add_file_ext(flatbuffers::Offset<flatbuffers::String> file_ext)
+  {
+    fbb_.AddOffset(Schema::VT_FILE_EXT, file_ext);
+  }
+  void add_root_table(flatbuffers::Offset<reflection::Object> root_table)
+  {
+    fbb_.AddOffset(Schema::VT_ROOT_TABLE, root_table);
+  }
+  void add_services(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::Service>>> services)
+  {
+    fbb_.AddOffset(Schema::VT_SERVICES, services);
+  }
+  void add_advanced_features(reflection::AdvancedFeatures advanced_features)
+  {
+    fbb_.AddElement<uint64_t>(Schema::VT_ADVANCED_FEATURES,
+                              static_cast<uint64_t>(advanced_features), 0);
+  }
+  explicit SchemaBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Schema> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Schema>(end);
+    fbb_.Required(o, Schema::VT_OBJECTS);
+    fbb_.Required(o, Schema::VT_ENUMS);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Schema> CreateSchema(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::Object>>> objects = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::Enum>>> enums = 0,
+  flatbuffers::Offset<flatbuffers::String> file_ident = 0,
+  flatbuffers::Offset<flatbuffers::String> file_ext = 0,
+  flatbuffers::Offset<reflection::Object> root_table = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<reflection::Service>>> services = 0,
+  reflection::AdvancedFeatures advanced_features = static_cast<reflection::AdvancedFeatures>(0))
+{
+  SchemaBuilder builder_(_fbb);
+  builder_.add_advanced_features(advanced_features);
+  builder_.add_services(services);
+  builder_.add_root_table(root_table);
+  builder_.add_file_ext(file_ext);
+  builder_.add_file_ident(file_ident);
+  builder_.add_enums(enums);
+  builder_.add_objects(objects);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Schema> CreateSchemaDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  std::vector<flatbuffers::Offset<reflection::Object>> *objects = nullptr,
+  std::vector<flatbuffers::Offset<reflection::Enum>> *enums = nullptr,
+  const char *file_ident = nullptr, const char *file_ext = nullptr,
+  flatbuffers::Offset<reflection::Object> root_table = 0,
+  std::vector<flatbuffers::Offset<reflection::Service>> *services = nullptr,
+  reflection::AdvancedFeatures advanced_features = static_cast<reflection::AdvancedFeatures>(0))
+{
+  auto objects__ = objects ? _fbb.CreateVectorOfSortedTables<reflection::Object>(objects) : 0;
+  auto enums__ = enums ? _fbb.CreateVectorOfSortedTables<reflection::Enum>(enums) : 0;
+  auto file_ident__ = file_ident ? _fbb.CreateString(file_ident) : 0;
+  auto file_ext__ = file_ext ? _fbb.CreateString(file_ext) : 0;
+  auto services__ = services ? _fbb.CreateVectorOfSortedTables<reflection::Service>(services) : 0;
+  return reflection::CreateSchema(_fbb, objects__, enums__, file_ident__, file_ext__, root_table,
+                                  services__, advanced_features);
+}
+
+inline const reflection::Schema *GetSchema(const void *buf)
+{
+  return flatbuffers::GetRoot<reflection::Schema>(buf);
+}
+
+inline const reflection::Schema *GetSizePrefixedSchema(const void *buf)
+{
+  return flatbuffers::GetSizePrefixedRoot<reflection::Schema>(buf);
+}
+
+inline const char *SchemaIdentifier() { return "BFBS"; }
+
+inline bool SchemaBufferHasIdentifier(const void *buf)
+{
+  return flatbuffers::BufferHasIdentifier(buf, SchemaIdentifier());
+}
+
+inline bool VerifySchemaBuffer(flatbuffers::Verifier &verifier)
+{
+  return verifier.VerifyBuffer<reflection::Schema>(SchemaIdentifier());
+}
+
+inline bool VerifySizePrefixedSchemaBuffer(flatbuffers::Verifier &verifier)
+{
+  return verifier.VerifySizePrefixedBuffer<reflection::Schema>(SchemaIdentifier());
+}
+
+inline const char *SchemaExtension() { return "bfbs"; }
+
+inline void FinishSchemaBuffer(flatbuffers::FlatBufferBuilder &fbb,
+                               flatbuffers::Offset<reflection::Schema> root)
+{
+  fbb.Finish(root, SchemaIdentifier());
+}
+
+inline void FinishSizePrefixedSchemaBuffer(flatbuffers::FlatBufferBuilder &fbb,
+                                           flatbuffers::Offset<reflection::Schema> root)
+{
+  fbb.FinishSizePrefixed(root, SchemaIdentifier());
+}
+
+} // namespace reflection
+
+#endif // FLATBUFFERS_GENERATED_REFLECTION_REFLECTION_H_
diff --git a/onert-micro/externals/flatbuffers/registry.h b/onert-micro/externals/flatbuffers/registry.h
new file mode 100644
index 000000000..c06bd5687
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/registry.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_REGISTRY_H_
+#define FLATBUFFERS_REGISTRY_H_
+
+#include "flatbuffers/idl.h"
+
+namespace flatbuffers
+{
+
+// Convenience class to easily parse or generate text for arbitrary FlatBuffers.
+// Simply pre-populate it with all schema filenames that may be in use, and
+// This class will look them up using the file_identifier declared in the
+// schema.
+class Registry
+{
+public:
+  // Call this for all schemas that may be in use. The identifier has
+  // a function in the generated code, e.g. MonsterIdentifier().
+  void Register(const char *file_identifier, const char *schema_path)
+  {
+    Schema schema;
+    schema.path_ = schema_path;
+    schemas_[file_identifier] = schema;
+  }
+
+  // Generate text from an arbitrary FlatBuffer by looking up its
+  // file_identifier in the registry.
+  bool FlatBufferToText(const uint8_t *flatbuf, size_t len, std::string *dest)
+  {
+    // Get the identifier out of the buffer.
+    // If the buffer is truncated, exit.
+    if (len < sizeof(uoffset_t) + FlatBufferBuilder::kFileIdentifierLength)
+    {
+      lasterror_ = "buffer truncated";
+      return false;
+    }
+    std::string ident(reinterpret_cast<const char *>(flatbuf) + sizeof(uoffset_t),
+                      FlatBufferBuilder::kFileIdentifierLength);
+    // Load and parse the schema.
+    Parser parser;
+    if (!LoadSchema(ident, &parser))
+      return false;
+    // Now we're ready to generate text.
+    if (!GenerateText(parser, flatbuf, dest))
+    {
+      lasterror_ = "unable to generate text for FlatBuffer binary";
+      return false;
+    }
+    return true;
+  }
+
+  // Converts a binary buffer to text using one of the schemas in the registry,
+  // use the file_identifier to indicate which.
+  // If DetachedBuffer::data() is null then parsing failed.
+  DetachedBuffer TextToFlatBuffer(const char *text, const char *file_identifier)
+  {
+    // Load and parse the schema.
+    Parser parser;
+    if (!LoadSchema(file_identifier, &parser))
+      return DetachedBuffer();
+    // Parse the text.
+    if (!parser.Parse(text))
+    {
+      lasterror_ = parser.error_;
+      return DetachedBuffer();
+    }
+    // We have a valid FlatBuffer. Detach it from the builder and return.
+    return parser.builder_.Release();
+  }
+
+  // Modify any parsing / output options used by the other functions.
+  void SetOptions(const IDLOptions &opts) { opts_ = opts; }
+
+  // If schemas used contain include statements, call this function for every
+  // directory the parser should search them for.
+  void AddIncludeDirectory(const char *path) { include_paths_.push_back(path); }
+
+  // Returns a human readable error if any of the above functions fail.
+  const std::string &GetLastError() { return lasterror_; }
+
+private:
+  bool LoadSchema(const std::string &ident, Parser *parser)
+  {
+    // Find the schema, if not, exit.
+    auto it = schemas_.find(ident);
+    if (it == schemas_.end())
+    {
+      // Don't attach the identifier, since it may not be human readable.
+      lasterror_ = "identifier for this buffer not in the registry";
+      return false;
+    }
+    auto &schema = it->second;
+    // Load the schema from disk. If not, exit.
+    std::string schematext;
+    if (!LoadFile(schema.path_.c_str(), false, &schematext))
+    {
+      lasterror_ = "could not load schema: " + schema.path_;
+      return false;
+    }
+    // Parse schema.
+    parser->opts = opts_;
+    if (!parser->Parse(schematext.c_str(), vector_data(include_paths_), schema.path_.c_str()))
+    {
+      lasterror_ = parser->error_;
+      return false;
+    }
+    return true;
+  }
+
+  struct Schema
+  {
+    std::string path_;
+    // TODO(wvo) optionally cache schema file or parsed schema here.
+  };
+
+  std::string lasterror_;
+  IDLOptions opts_;
+  std::vector<const char *> include_paths_;
+  std::map<std::string, Schema> schemas_;
+};
+
+} // namespace flatbuffers
+
+#endif // FLATBUFFERS_REGISTRY_H_
diff --git a/onert-micro/externals/flatbuffers/stl_emulation.h b/onert-micro/externals/flatbuffers/stl_emulation.h
new file mode 100644
index 000000000..3f11fb9cb
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/stl_emulation.h
@@ -0,0 +1,674 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_STL_EMULATION_H_
+#define FLATBUFFERS_STL_EMULATION_H_
+
+// clang-format off
+#include "flatbuffers/base.h"
+
+#include <string>
+#include <type_traits>
+#include <vector>
+#include <memory>
+#include <limits>
+
+#if defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL)
+  #define FLATBUFFERS_CPP98_STL
+#endif  // defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL)
+
+#if defined(FLATBUFFERS_CPP98_STL)
+  #include <cctype>
+#endif  // defined(FLATBUFFERS_CPP98_STL)
+
+// Detect C++17 compatible compiler.
+// __cplusplus >= 201703L - a compiler has support of 'static inline' variables.
+#if defined(FLATBUFFERS_USE_STD_OPTIONAL) \
+    || (defined(__cplusplus) && __cplusplus >= 201703L) \
+    || (defined(_MSVC_LANG) &&  (_MSVC_LANG >= 201703L))
+  #include <optional>
+  #ifndef FLATBUFFERS_USE_STD_OPTIONAL
+    #define FLATBUFFERS_USE_STD_OPTIONAL
+  #endif
+#endif // defined(FLATBUFFERS_USE_STD_OPTIONAL) ...
+
+// The __cpp_lib_span is the predefined feature macro.
+#if defined(FLATBUFFERS_USE_STD_SPAN)
+    #include <span>
+#elif defined(__cpp_lib_span) && defined(__has_include)
+  #if __has_include(<span>)
+    #include <span>
+    #define FLATBUFFERS_USE_STD_SPAN
+  #endif
+#else
+  // Disable non-trivial ctors if FLATBUFFERS_SPAN_MINIMAL defined.
+  #if !defined(FLATBUFFERS_TEMPLATES_ALIASES) || defined(FLATBUFFERS_CPP98_STL)
+    #define FLATBUFFERS_SPAN_MINIMAL
+  #else
+    // Enable implicit construction of a span<T,N> from a std::array<T,N>.
+    #include <array>
+  #endif
+#endif // defined(FLATBUFFERS_USE_STD_SPAN)
+
+// This header provides backwards compatibility for C++98 STLs like stlport.
+namespace flatbuffers {
+
+// Retrieve ::back() from a string in a way that is compatible with pre C++11
+// STLs (e.g stlport).
+inline char& string_back(std::string &value) {
+  return value[value.length() - 1];
+}
+
+inline char string_back(const std::string &value) {
+  return value[value.length() - 1];
+}
+
+// Helper method that retrieves ::data() from a vector in a way that is
+// compatible with pre C++11 STLs (e.g stlport).
+template <typename T> inline T *vector_data(std::vector<T> &vector) {
+  // In some debug environments, operator[] does bounds checking, so &vector[0]
+  // can't be used.
+  return vector.empty() ? nullptr : &vector[0];
+}
+
+template <typename T> inline const T *vector_data(
+    const std::vector<T> &vector) {
+  return vector.empty() ? nullptr : &vector[0];
+}
+
+template <typename T, typename V>
+inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
+  #if defined(FLATBUFFERS_CPP98_STL)
+    vector->push_back(data);
+  #else
+    vector->emplace_back(std::forward<V>(data));
+  #endif  // defined(FLATBUFFERS_CPP98_STL)
+}
+
+#ifndef FLATBUFFERS_CPP98_STL
+  #if defined(FLATBUFFERS_TEMPLATES_ALIASES)
+    template <typename T>
+    using numeric_limits = std::numeric_limits<T>;
+  #else
+    template <typename T> class numeric_limits :
+      public std::numeric_limits<T> {};
+  #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
+#else
+  template <typename T> class numeric_limits :
+      public std::numeric_limits<T> {
+    public:
+      // Android NDK fix.
+      static T lowest() {
+        return std::numeric_limits<T>::min();
+      }
+  };
+
+  template <> class numeric_limits<float> :
+      public std::numeric_limits<float> {
+    public:
+      static float lowest() { return -FLT_MAX; }
+  };
+
+  template <> class numeric_limits<double> :
+      public std::numeric_limits<double> {
+    public:
+      static double lowest() { return -DBL_MAX; }
+  };
+
+  template <> class numeric_limits<unsigned long long> {
+   public:
+    static unsigned long long min() { return 0ULL; }
+    static unsigned long long max() { return ~0ULL; }
+    static unsigned long long lowest() {
+      return numeric_limits<unsigned long long>::min();
+    }
+  };
+
+  template <> class numeric_limits<long long> {
+   public:
+    static long long min() {
+      return static_cast<long long>(1ULL << ((sizeof(long long) << 3) - 1));
+    }
+    static long long max() {
+      return static_cast<long long>(
+          (1ULL << ((sizeof(long long) << 3) - 1)) - 1);
+    }
+    static long long lowest() {
+      return numeric_limits<long long>::min();
+    }
+  };
+#endif  // FLATBUFFERS_CPP98_STL
+
+#if defined(FLATBUFFERS_TEMPLATES_ALIASES)
+  #ifndef FLATBUFFERS_CPP98_STL
+    template <typename T> using is_scalar = std::is_scalar<T>;
+    template <typename T, typename U> using is_same = std::is_same<T,U>;
+    template <typename T> using is_floating_point = std::is_floating_point<T>;
+    template <typename T> using is_unsigned = std::is_unsigned<T>;
+    template <typename T> using is_enum = std::is_enum<T>;
+    template <typename T> using make_unsigned = std::make_unsigned<T>;
+    template<bool B, class T, class F>
+    using conditional = std::conditional<B, T, F>;
+    template<class T, T v>
+    using integral_constant = std::integral_constant<T, v>;
+    template <bool B>
+    using bool_constant = integral_constant<bool, B>;
+  #else
+    // Map C++ TR1 templates defined by stlport.
+    template <typename T> using is_scalar = std::tr1::is_scalar<T>;
+    template <typename T, typename U> using is_same = std::tr1::is_same<T,U>;
+    template <typename T> using is_floating_point =
+        std::tr1::is_floating_point<T>;
+    template <typename T> using is_unsigned = std::tr1::is_unsigned<T>;
+    template <typename T> using is_enum = std::tr1::is_enum<T>;
+    // Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned.
+    template<typename T> struct make_unsigned {
+      static_assert(is_unsigned<T>::value, "Specialization not implemented!");
+      using type = T;
+    };
+    template<> struct make_unsigned<char> { using type = unsigned char; };
+    template<> struct make_unsigned<short> { using type = unsigned short; };
+    template<> struct make_unsigned<int> { using type = unsigned int; };
+    template<> struct make_unsigned<long> { using type = unsigned long; };
+    template<>
+    struct make_unsigned<long long> { using type = unsigned long long; };
+    template<bool B, class T, class F>
+    using conditional = std::tr1::conditional<B, T, F>;
+    template<class T, T v>
+    using integral_constant = std::tr1::integral_constant<T, v>;
+    template <bool B>
+    using bool_constant = integral_constant<bool, B>;
+  #endif  // !FLATBUFFERS_CPP98_STL
+#else
+  // MSVC 2010 doesn't support C++11 aliases.
+  template <typename T> struct is_scalar : public std::is_scalar<T> {};
+  template <typename T, typename U> struct is_same : public std::is_same<T,U> {};
+  template <typename T> struct is_floating_point :
+        public std::is_floating_point<T> {};
+  template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
+  template <typename T> struct is_enum : public std::is_enum<T> {};
+  template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
+  template<bool B, class T, class F>
+  struct conditional : public std::conditional<B, T, F> {};
+  template<class T, T v>
+  struct integral_constant : public std::integral_constant<T, v> {};
+  template <bool B>
+  struct bool_constant : public integral_constant<bool, B> {};
+#endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
+
+#ifndef FLATBUFFERS_CPP98_STL
+  #if defined(FLATBUFFERS_TEMPLATES_ALIASES)
+    template <class T> using unique_ptr = std::unique_ptr<T>;
+  #else
+    // MSVC 2010 doesn't support C++11 aliases.
+    // We're manually "aliasing" the class here as we want to bring unique_ptr
+    // into the flatbuffers namespace.  We have unique_ptr in the flatbuffers
+    // namespace we have a completely independent implementation (see below)
+    // for C++98 STL implementations.
+    template <class T> class unique_ptr : public std::unique_ptr<T> {
+     public:
+      unique_ptr() {}
+      explicit unique_ptr(T* p) : std::unique_ptr<T>(p) {}
+      unique_ptr(std::unique_ptr<T>&& u) { *this = std::move(u); }
+      unique_ptr(unique_ptr&& u) { *this = std::move(u); }
+      unique_ptr& operator=(std::unique_ptr<T>&& u) {
+        std::unique_ptr<T>::reset(u.release());
+        return *this;
+      }
+      unique_ptr& operator=(unique_ptr&& u) {
+        std::unique_ptr<T>::reset(u.release());
+        return *this;
+      }
+      unique_ptr& operator=(T* p) {
+        return std::unique_ptr<T>::operator=(p);
+      }
+    };
+  #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
+#else
+  // Very limited implementation of unique_ptr.
+  // This is provided simply to allow the C++ code generated from the default
+  // settings to function in C++98 environments with no modifications.
+  template <class T> class unique_ptr {
+   public:
+    typedef T element_type;
+
+    unique_ptr() : ptr_(nullptr) {}
+    explicit unique_ptr(T* p) : ptr_(p) {}
+    unique_ptr(unique_ptr&& u) : ptr_(nullptr) { reset(u.release()); }
+    unique_ptr(const unique_ptr& u) : ptr_(nullptr) {
+      reset(const_cast<unique_ptr*>(&u)->release());
+    }
+    ~unique_ptr() { reset(); }
+
+    unique_ptr& operator=(const unique_ptr& u) {
+      reset(const_cast<unique_ptr*>(&u)->release());
+      return *this;
+    }
+
+    unique_ptr& operator=(unique_ptr&& u) {
+      reset(u.release());
+      return *this;
+    }
+
+    unique_ptr& operator=(T* p) {
+      reset(p);
+      return *this;
+    }
+
+    const T& operator*() const { return *ptr_; }
+    T* operator->() const { return ptr_; }
+    T* get() const noexcept { return ptr_; }
+    explicit operator bool() const { return ptr_ != nullptr; }
+
+    // modifiers
+    T* release() {
+      T* value = ptr_;
+      ptr_ = nullptr;
+      return value;
+    }
+
+    void reset(T* p = nullptr) {
+      T* value = ptr_;
+      ptr_ = p;
+      if (value) delete value;
+    }
+
+    void swap(unique_ptr& u) {
+      T* temp_ptr = ptr_;
+      ptr_ = u.ptr_;
+      u.ptr_ = temp_ptr;
+    }
+
+   private:
+    T* ptr_;
+  };
+
+  template <class T> bool operator==(const unique_ptr<T>& x,
+                                     const unique_ptr<T>& y) {
+    return x.get() == y.get();
+  }
+
+  template <class T, class D> bool operator==(const unique_ptr<T>& x,
+                                              const D* y) {
+    return static_cast<D*>(x.get()) == y;
+  }
+
+  template <class T> bool operator==(const unique_ptr<T>& x, intptr_t y) {
+    return reinterpret_cast<intptr_t>(x.get()) == y;
+  }
+
+  template <class T> bool operator!=(const unique_ptr<T>& x, decltype(nullptr)) {
+    return !!x;
+  }
+
+  template <class T> bool operator!=(decltype(nullptr), const unique_ptr<T>& x) {
+    return !!x;
+  }
+
+  template <class T> bool operator==(const unique_ptr<T>& x, decltype(nullptr)) {
+    return !x;
+  }
+
+  template <class T> bool operator==(decltype(nullptr), const unique_ptr<T>& x) {
+    return !x;
+  }
+
+#endif  // !FLATBUFFERS_CPP98_STL
+
+#ifdef FLATBUFFERS_USE_STD_OPTIONAL
+template<class T>
+using Optional = std::optional<T>;
+using nullopt_t = std::nullopt_t;
+inline constexpr nullopt_t nullopt = std::nullopt;
+
+#else
+// Limited implementation of Optional<T> type for a scalar T.
+// This implementation limited by trivial types compatible with
+// std::is_arithmetic<T> or std::is_enum<T> type traits.
+
+// A tag to indicate an empty flatbuffers::optional<T>.
+struct nullopt_t {
+  explicit FLATBUFFERS_CONSTEXPR_CPP11 nullopt_t(int) {}
+};
+
+#if defined(FLATBUFFERS_CONSTEXPR_DEFINED)
+  namespace internal {
+    template <class> struct nullopt_holder {
+      static constexpr nullopt_t instance_ = nullopt_t(0);
+    };
+    template<class Dummy>
+    constexpr nullopt_t nullopt_holder<Dummy>::instance_;
+  }
+  static constexpr const nullopt_t &nullopt = internal::nullopt_holder<void>::instance_;
+
+#else
+  namespace internal {
+    template <class> struct nullopt_holder {
+      static const nullopt_t instance_;
+    };
+    template<class Dummy>
+    const nullopt_t nullopt_holder<Dummy>::instance_  = nullopt_t(0);
+  }
+  static const nullopt_t &nullopt = internal::nullopt_holder<void>::instance_;
+
+#endif
+
+template<class T>
+class Optional FLATBUFFERS_FINAL_CLASS {
+  // Non-scalar 'T' would extremely complicated Optional<T>.
+  // Use is_scalar<T> checking because flatbuffers flatbuffers::is_arithmetic<T>
+  // isn't implemented.
+  static_assert(flatbuffers::is_scalar<T>::value, "unexpected type T");
+
+ public:
+  ~Optional() {}
+
+  FLATBUFFERS_CONSTEXPR_CPP11 Optional() FLATBUFFERS_NOEXCEPT
+    : value_(), has_value_(false) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP11 Optional(nullopt_t) FLATBUFFERS_NOEXCEPT
+    : value_(), has_value_(false) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP11 Optional(T val) FLATBUFFERS_NOEXCEPT
+    : value_(val), has_value_(true) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP11 Optional(const Optional &other) FLATBUFFERS_NOEXCEPT
+    : value_(other.value_), has_value_(other.has_value_) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(const Optional &other) FLATBUFFERS_NOEXCEPT {
+    value_ = other.value_;
+    has_value_ = other.has_value_;
+    return *this;
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(nullopt_t) FLATBUFFERS_NOEXCEPT {
+    value_ = T();
+    has_value_ = false;
+    return *this;
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(T val) FLATBUFFERS_NOEXCEPT {
+    value_ = val;
+    has_value_ = true;
+    return *this;
+  }
+
+  void reset() FLATBUFFERS_NOEXCEPT {
+    *this = nullopt;
+  }
+
+  void swap(Optional &other) FLATBUFFERS_NOEXCEPT {
+    std::swap(value_, other.value_);
+    std::swap(has_value_, other.has_value_);
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP11 FLATBUFFERS_EXPLICIT_CPP11 operator bool() const FLATBUFFERS_NOEXCEPT {
+    return has_value_;
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP11 bool has_value() const FLATBUFFERS_NOEXCEPT {
+    return has_value_;
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP11 const T& operator*() const FLATBUFFERS_NOEXCEPT {
+    return value_;
+  }
+
+  const T& value() const {
+    FLATBUFFERS_ASSERT(has_value());
+    return value_;
+  }
+
+  T value_or(T default_value) const FLATBUFFERS_NOEXCEPT {
+    return has_value() ? value_ : default_value;
+  }
+
+ private:
+  T value_;
+  bool has_value_;
+};
+
+template<class T>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& opt, nullopt_t) FLATBUFFERS_NOEXCEPT {
+  return !opt;
+}
+template<class T>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(nullopt_t, const Optional<T>& opt) FLATBUFFERS_NOEXCEPT {
+  return !opt;
+}
+
+template<class T, class U>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& lhs, const U& rhs) FLATBUFFERS_NOEXCEPT {
+  return static_cast<bool>(lhs) && (*lhs == rhs);
+}
+
+template<class T, class U>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const T& lhs, const Optional<U>& rhs) FLATBUFFERS_NOEXCEPT {
+  return static_cast<bool>(rhs) && (lhs == *rhs);
+}
+
+template<class T, class U>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& lhs, const Optional<U>& rhs) FLATBUFFERS_NOEXCEPT {
+  return static_cast<bool>(lhs) != static_cast<bool>(rhs)
+              ? false
+              : !static_cast<bool>(lhs) ? false : (*lhs == *rhs);
+}
+#endif // FLATBUFFERS_USE_STD_OPTIONAL
+
+
+// Very limited and naive partial implementation of C++20 std::span<T,Extent>.
+#if defined(FLATBUFFERS_USE_STD_SPAN)
+  inline constexpr std::size_t dynamic_extent = std::dynamic_extent;
+  template<class T, std::size_t Extent = std::dynamic_extent>
+  using span = std::span<T, Extent>;
+
+#else // !defined(FLATBUFFERS_USE_STD_SPAN)
+FLATBUFFERS_CONSTEXPR std::size_t dynamic_extent = static_cast<std::size_t>(-1);
+
+// Exclude this code if MSVC2010 or non-STL Android is active.
+// The non-STL Android doesn't have `std::is_convertible` required for SFINAE.
+#if !defined(FLATBUFFERS_SPAN_MINIMAL)
+namespace internal {
+  // This is SFINAE helper class for checking of a common condition:
+  // > This overload only participates in overload resolution
+  // > Check whether a pointer to an array of U can be converted
+  // > to a pointer to an array of E.
+  // This helper is used for checking of 'U -> const U'.
+  template<class E, std::size_t Extent, class U, std::size_t N>
+  struct is_span_convertable {
+    using type =
+      typename std::conditional<std::is_convertible<U (*)[], E (*)[]>::value
+                                && (Extent == dynamic_extent || N == Extent),
+                                int, void>::type;
+  };
+
+}  // namespace internal
+#endif  // !defined(FLATBUFFERS_SPAN_MINIMAL)
+
+// T - element type; must be a complete type that is not an abstract
+// class type.
+// Extent - the number of elements in the sequence, or dynamic.
+template<class T, std::size_t Extent = dynamic_extent>
+class span FLATBUFFERS_FINAL_CLASS {
+ public:
+  typedef T element_type;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef std::size_t size_type;
+
+  static FLATBUFFERS_CONSTEXPR size_type extent = Extent;
+
+  // Returns the number of elements in the span.
+  FLATBUFFERS_CONSTEXPR_CPP11 size_type size() const FLATBUFFERS_NOEXCEPT {
+    return count_;
+  }
+
+  // Returns the size of the sequence in bytes.
+  FLATBUFFERS_CONSTEXPR_CPP11
+  size_type size_bytes() const FLATBUFFERS_NOEXCEPT {
+    return size() * sizeof(element_type);
+  }
+
+  // Checks if the span is empty.
+  FLATBUFFERS_CONSTEXPR_CPP11 bool empty() const FLATBUFFERS_NOEXCEPT {
+    return size() == 0;
+  }
+
+  // Returns a pointer to the beginning of the sequence.
+  FLATBUFFERS_CONSTEXPR_CPP11 pointer data() const FLATBUFFERS_NOEXCEPT {
+    return data_;
+  }
+
+  // Returns a reference to the idx-th element of the sequence.
+  // The behavior is undefined if the idx is greater than or equal to size().
+  FLATBUFFERS_CONSTEXPR_CPP11 reference operator[](size_type idx) const {
+    return data()[idx];
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP11 span(const span &other) FLATBUFFERS_NOEXCEPT
+      : data_(other.data_), count_(other.count_) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP14 span &operator=(const span &other)
+      FLATBUFFERS_NOEXCEPT {
+    data_ = other.data_;
+    count_ = other.count_;
+  }
+
+  // Limited implementation of
+  // `template <class It> constexpr std::span(It first, size_type count);`.
+  //
+  // Constructs a span that is a view over the range [first, first + count);
+  // the resulting span has: data() == first and size() == count.
+  // The behavior is undefined if [first, first + count) is not a valid range,
+  // or if (extent != flatbuffers::dynamic_extent && count != extent).
+  FLATBUFFERS_CONSTEXPR_CPP11
+  explicit span(pointer first, size_type count) FLATBUFFERS_NOEXCEPT
+    : data_ (Extent == dynamic_extent ? first : (Extent == count ? first : nullptr)),
+      count_(Extent == dynamic_extent ? count : (Extent == count ? Extent : 0)) {
+      // Make span empty if the count argument is incompatible with span<T,N>.
+  }
+
+  // Exclude this code if MSVC2010 is active. The MSVC2010 isn't C++11
+  // compliant, it doesn't support default template arguments for functions.
+  #if defined(FLATBUFFERS_SPAN_MINIMAL)
+  FLATBUFFERS_CONSTEXPR_CPP11 span() FLATBUFFERS_NOEXCEPT : data_(nullptr),
+                                                            count_(0) {
+    static_assert(extent == 0 || extent == dynamic_extent, "invalid span");
+  }
+
+  #else
+  // Constructs an empty span whose data() == nullptr and size() == 0.
+  // This overload only participates in overload resolution if
+  // extent == 0 || extent == flatbuffers::dynamic_extent.
+  // A dummy template argument N is need dependency for SFINAE.
+  template<std::size_t N = 0,
+    typename internal::is_span_convertable<element_type, Extent, element_type, (N - N)>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span() FLATBUFFERS_NOEXCEPT : data_(nullptr),
+                                                            count_(0) {
+    static_assert(extent == 0 || extent == dynamic_extent, "invalid span");
+  }
+
+  // Constructs a span that is a view over the array arr; the resulting span
+  // has size() == N and data() == std::data(arr). These overloads only
+  // participate in overload resolution if
+  // extent == std::dynamic_extent || N == extent is true and
+  // std::remove_pointer_t<decltype(std::data(arr))>(*)[]
+  // is convertible to element_type (*)[].
+  template<std::size_t N,
+    typename internal::is_span_convertable<element_type, Extent, element_type, N>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span(element_type (&arr)[N]) FLATBUFFERS_NOEXCEPT
+      : data_(arr), count_(N) {}
+
+  template<class U, std::size_t N,
+    typename internal::is_span_convertable<element_type, Extent, U, N>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span(std::array<U, N> &arr) FLATBUFFERS_NOEXCEPT
+     : data_(arr.data()), count_(N) {}
+
+  //template<class U, std::size_t N,
+  //  int = 0>
+  //FLATBUFFERS_CONSTEXPR_CPP11 span(std::array<U, N> &arr) FLATBUFFERS_NOEXCEPT
+  //   : data_(arr.data()), count_(N) {}
+
+  template<class U, std::size_t N,
+    typename internal::is_span_convertable<element_type, Extent, U, N>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span(const std::array<U, N> &arr) FLATBUFFERS_NOEXCEPT
+    : data_(arr.data()), count_(N) {}
+
+  // Converting constructor from another span s;
+  // the resulting span has size() == s.size() and data() == s.data().
+  // This overload only participates in overload resolution
+  // if extent == std::dynamic_extent || N == extent is true and U (*)[]
+  // is convertible to element_type (*)[].
+  template<class U, std::size_t N,
+    typename internal::is_span_convertable<element_type, Extent, U, N>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span(const flatbuffers::span<U, N> &s) FLATBUFFERS_NOEXCEPT
+      : span(s.data(), s.size()) {
+  }
+
+  #endif  // !defined(FLATBUFFERS_SPAN_MINIMAL)
+
+ private:
+  // This is a naive implementation with 'count_' member even if (Extent != dynamic_extent).
+  pointer const data_;
+  const size_type count_;
+};
+
+ #if !defined(FLATBUFFERS_SPAN_MINIMAL)
+  template<class U, std::size_t N>
+  FLATBUFFERS_CONSTEXPR_CPP11
+  flatbuffers::span<U, N> make_span(U(&arr)[N]) FLATBUFFERS_NOEXCEPT {
+    return span<U, N>(arr);
+  }
+
+  template<class U, std::size_t N>
+  FLATBUFFERS_CONSTEXPR_CPP11
+  flatbuffers::span<const U, N> make_span(const U(&arr)[N]) FLATBUFFERS_NOEXCEPT {
+    return span<const U, N>(arr);
+  }
+
+  template<class U, std::size_t N>
+  FLATBUFFERS_CONSTEXPR_CPP11
+  flatbuffers::span<U, N> make_span(std::array<U, N> &arr) FLATBUFFERS_NOEXCEPT {
+    return span<U, N>(arr);
+  }
+
+  template<class U, std::size_t N>
+  FLATBUFFERS_CONSTEXPR_CPP11
+  flatbuffers::span<const U, N> make_span(const std::array<U, N> &arr) FLATBUFFERS_NOEXCEPT {
+    return span<const U, N>(arr);
+  }
+
+  template<class U, std::size_t N>
+  FLATBUFFERS_CONSTEXPR_CPP11
+  flatbuffers::span<U, dynamic_extent> make_span(U *first, std::size_t count) FLATBUFFERS_NOEXCEPT {
+    return span<U, dynamic_extent>(first, count);
+  }
+
+  template<class U, std::size_t N>
+  FLATBUFFERS_CONSTEXPR_CPP11
+  flatbuffers::span<const U, dynamic_extent> make_span(const U *first, std::size_t count) FLATBUFFERS_NOEXCEPT {
+    return span<const U, dynamic_extent>(first, count);
+  }
+#endif
+
+#endif  // defined(FLATBUFFERS_USE_STD_SPAN)
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_STL_EMULATION_H_
diff --git a/onert-micro/externals/flatbuffers/util.h b/onert-micro/externals/flatbuffers/util.h
new file mode 100644
index 000000000..e255801af
--- /dev/null
+++ b/onert-micro/externals/flatbuffers/util.h
@@ -0,0 +1,799 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2014 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_UTIL_H_
+#define FLATBUFFERS_UTIL_H_
+
+#include <errno.h>
+
+#include "flatbuffers/base.h"
+#include "flatbuffers/stl_emulation.h"
+
+#ifndef FLATBUFFERS_PREFER_PRINTF
+#include <sstream>
+#else // FLATBUFFERS_PREFER_PRINTF
+#include <float.h>
+#include <stdio.h>
+#endif // FLATBUFFERS_PREFER_PRINTF
+
+#include <iomanip>
+#include <string>
+
+namespace flatbuffers
+{
+
+// @locale-independent functions for ASCII characters set.
+
+// Fast checking that character lies in closed range: [a <= x <= b]
+// using one compare (conditional branch) operator.
+inline bool check_ascii_range(char x, char a, char b)
+{
+  FLATBUFFERS_ASSERT(a <= b);
+  // (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`.
+  // The x, a, b will be promoted to int and subtracted without overflow.
+  return static_cast<unsigned int>(x - a) <= static_cast<unsigned int>(b - a);
+}
+
+// Case-insensitive isalpha
+inline bool is_alpha(char c)
+{
+  // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
+  return check_ascii_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
+}
+
+// Check for uppercase alpha
+inline bool is_alpha_upper(char c) { return check_ascii_range(c, 'A', 'Z'); }
+
+// Check (case-insensitive) that `c` is equal to alpha.
+inline bool is_alpha_char(char c, char alpha)
+{
+  FLATBUFFERS_ASSERT(is_alpha(alpha));
+  // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
+  return ((c & 0xDF) == (alpha & 0xDF));
+}
+
+// https://en.cppreference.com/w/cpp/string/byte/isxdigit
+// isdigit and isxdigit are the only standard narrow character classification
+// functions that are not affected by the currently installed C locale. although
+// some implementations (e.g. Microsoft in 1252 codepage) may classify
+// additional single-byte characters as digits.
+inline bool is_digit(char c) { return check_ascii_range(c, '0', '9'); }
+
+inline bool is_xdigit(char c)
+{
+  // Replace by look-up table.
+  return is_digit(c) || check_ascii_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
+}
+
+// Case-insensitive isalnum
+inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
+
+inline char CharToUpper(char c)
+{
+  return static_cast<char>(::toupper(static_cast<unsigned char>(c)));
+}
+
+inline char CharToLower(char c)
+{
+  return static_cast<char>(::tolower(static_cast<unsigned char>(c)));
+}
+
+// @end-locale-independent functions for ASCII character set
+
+#ifdef FLATBUFFERS_PREFER_PRINTF
+template <typename T> size_t IntToDigitCount(T t)
+{
+  size_t digit_count = 0;
+  // Count the sign for negative numbers
+  if (t < 0)
+    digit_count++;
+  // Count a single 0 left of the dot for fractional numbers
+  if (-1 < t && t < 1)
+    digit_count++;
+  // Count digits until fractional part
+  T eps = std::numeric_limits<float>::epsilon();
+  while (t <= (-1 + eps) || (1 - eps) <= t)
+  {
+    t /= 10;
+    digit_count++;
+  }
+  return digit_count;
+}
+
+template <typename T> size_t NumToStringWidth(T t, int precision = 0)
+{
+  size_t string_width = IntToDigitCount(t);
+  // Count the dot for floating point numbers
+  if (precision)
+    string_width += (precision + 1);
+  return string_width;
+}
+
+template <typename T> std::string NumToStringImplWrapper(T t, const char *fmt, int precision = 0)
+{
+  size_t string_width = NumToStringWidth(t, precision);
+  std::string s(string_width, 0x00);
+  // Allow snprintf to use std::string trailing null to detect buffer overflow
+  snprintf(const_cast<char *>(s.data()), (s.size() + 1), fmt, string_width, t);
+  return s;
+}
+#endif // FLATBUFFERS_PREFER_PRINTF
+
+// Convert an integer or floating point value to a string.
+// In contrast to std::stringstream, "char" values are
+// converted to a string of digits, and we don't use scientific notation.
+template <typename T> std::string NumToString(T t)
+{
+  // clang-format off
+
+  #ifndef FLATBUFFERS_PREFER_PRINTF
+    std::stringstream ss;
+    ss << t;
+    return ss.str();
+  #else // FLATBUFFERS_PREFER_PRINTF
+    auto v = static_cast<long long>(t);
+    return NumToStringImplWrapper(v, "%.*lld");
+  #endif // FLATBUFFERS_PREFER_PRINTF
+  // clang-format on
+}
+// Avoid char types used as character data.
+template <> inline std::string NumToString<signed char>(signed char t)
+{
+  return NumToString(static_cast<int>(t));
+}
+template <> inline std::string NumToString<unsigned char>(unsigned char t)
+{
+  return NumToString(static_cast<int>(t));
+}
+template <> inline std::string NumToString<char>(char t)
+{
+  return NumToString(static_cast<int>(t));
+}
+#if defined(FLATBUFFERS_CPP98_STL)
+template <> inline std::string NumToString<long long>(long long t)
+{
+  char buf[21]; // (log((1 << 63) - 1) / log(10)) + 2
+  snprintf(buf, sizeof(buf), "%lld", t);
+  return std::string(buf);
+}
+
+template <> inline std::string NumToString<unsigned long long>(unsigned long long t)
+{
+  char buf[22]; // (log((1 << 63) - 1) / log(10)) + 1
+  snprintf(buf, sizeof(buf), "%llu", t);
+  return std::string(buf);
+}
+#endif // defined(FLATBUFFERS_CPP98_STL)
+
+// Special versions for floats/doubles.
+template <typename T> std::string FloatToString(T t, int precision)
+{
+  // clang-format off
+
+  #ifndef FLATBUFFERS_PREFER_PRINTF
+    // to_string() prints different numbers of digits for floats depending on
+    // platform and isn't available on Android, so we use stringstream
+    std::stringstream ss;
+    // Use std::fixed to suppress scientific notation.
+    ss << std::fixed;
+    // Default precision is 6, we want that to be higher for doubles.
+    ss << std::setprecision(precision);
+    ss << t;
+    auto s = ss.str();
+  #else // FLATBUFFERS_PREFER_PRINTF
+    auto v = static_cast<double>(t);
+    auto s = NumToStringImplWrapper(v, "%0.*f", precision);
+  #endif // FLATBUFFERS_PREFER_PRINTF
+  // clang-format on
+  // Sadly, std::fixed turns "1" into "1.00000", so here we undo that.
+  auto p = s.find_last_not_of('0');
+  if (p != std::string::npos)
+  {
+    // Strip trailing zeroes. If it is a whole number, keep one zero.
+    s.resize(p + (s[p] == '.' ? 2 : 1));
+  }
+  return s;
+}
+
+template <> inline std::string NumToString<double>(double t) { return FloatToString(t, 12); }
+template <> inline std::string NumToString<float>(float t) { return FloatToString(t, 6); }
+
+// Convert an integer value to a hexadecimal string.
+// The returned string length is always xdigits long, prefixed by 0 digits.
+// For example, IntToStringHex(0x23, 8) returns the string "00000023".
+inline std::string IntToStringHex(int i, int xdigits)
+{
+  FLATBUFFERS_ASSERT(i >= 0);
+  // clang-format off
+
+  #ifndef FLATBUFFERS_PREFER_PRINTF
+    std::stringstream ss;
+    ss << std::setw(xdigits) << std::setfill('0') << std::hex << std::uppercase
+       << i;
+    return ss.str();
+  #else // FLATBUFFERS_PREFER_PRINTF
+    return NumToStringImplWrapper(i, "%.*X", xdigits);
+  #endif // FLATBUFFERS_PREFER_PRINTF
+  // clang-format on
+}
+
+// clang-format off
+// Use locale independent functions {strtod_l, strtof_l, strtoll_l, strtoull_l}.
+#if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && (FLATBUFFERS_LOCALE_INDEPENDENT > 0)
+  class ClassicLocale {
+    #ifdef _MSC_VER
+      typedef _locale_t locale_type;
+    #else
+      typedef locale_t locale_type;  // POSIX.1-2008 locale_t type
+    #endif
+    ClassicLocale();
+    ~ClassicLocale();
+    locale_type locale_;
+    static ClassicLocale instance_;
+  public:
+    static locale_type Get() { return instance_.locale_; }
+  };
+
+  #ifdef _MSC_VER
+    #define __strtoull_impl(s, pe, b) _strtoui64_l(s, pe, b, ClassicLocale::Get())
+    #define __strtoll_impl(s, pe, b) _strtoi64_l(s, pe, b, ClassicLocale::Get())
+    #define __strtod_impl(s, pe) _strtod_l(s, pe, ClassicLocale::Get())
+    #define __strtof_impl(s, pe) _strtof_l(s, pe, ClassicLocale::Get())
+  #else
+    #define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b, ClassicLocale::Get())
+    #define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b, ClassicLocale::Get())
+    #define __strtod_impl(s, pe) strtod_l(s, pe, ClassicLocale::Get())
+    #define __strtof_impl(s, pe) strtof_l(s, pe, ClassicLocale::Get())
+  #endif
+#else
+  #define __strtod_impl(s, pe) strtod(s, pe)
+  #define __strtof_impl(s, pe) static_cast<float>(strtod(s, pe))
+  #ifdef _MSC_VER
+    #define __strtoull_impl(s, pe, b) _strtoui64(s, pe, b)
+    #define __strtoll_impl(s, pe, b) _strtoi64(s, pe, b)
+  #else
+    #define __strtoull_impl(s, pe, b) strtoull(s, pe, b)
+    #define __strtoll_impl(s, pe, b) strtoll(s, pe, b)
+  #endif
+#endif
+
+inline void strtoval_impl(int64_t *val, const char *str, char **endptr,
+                                 int base) {
+    *val = __strtoll_impl(str, endptr, base);
+}
+
+inline void strtoval_impl(uint64_t *val, const char *str, char **endptr,
+                                 int base) {
+  *val = __strtoull_impl(str, endptr, base);
+}
+
+inline void strtoval_impl(double *val, const char *str, char **endptr) {
+  *val = __strtod_impl(str, endptr);
+}
+
+// UBSAN: double to float is safe if numeric_limits<float>::is_iec559 is true.
+__supress_ubsan__("float-cast-overflow")
+inline void strtoval_impl(float *val, const char *str, char **endptr) {
+  *val = __strtof_impl(str, endptr);
+}
+#undef __strtoull_impl
+#undef __strtoll_impl
+#undef __strtod_impl
+#undef __strtof_impl
+// clang-format on
+
+// Adaptor for strtoull()/strtoll().
+// Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
+// while strtoll with base=0 interprets first leading zero as octal prefix.
+// In future, it is possible to add prefixed 0b0101.
+// 1) Checks errno code for overflow condition (out of range).
+// 2) If base <= 0, function try to detect base of number by prefix.
+//
+// Return value (like strtoull and strtoll, but reject partial result):
+// - If successful, an integer value corresponding to the str is returned.
+// - If full string conversion can't be performed, 0 is returned.
+// - If the converted value falls out of range of corresponding return type, a
+// range error occurs. In this case value MAX(T)/MIN(T) is returned.
+template <typename T>
+inline bool StringToIntegerImpl(T *val, const char *const str, const int base = 0,
+                                const bool check_errno = true)
+{
+  // T is int64_t or uint64_T
+  FLATBUFFERS_ASSERT(str);
+  if (base <= 0)
+  {
+    auto s = str;
+    while (*s && !is_digit(*s))
+      s++;
+    if (s[0] == '0' && is_alpha_char(s[1], 'X'))
+      return StringToIntegerImpl(val, str, 16, check_errno);
+    // if a prefix not match, try base=10
+    return StringToIntegerImpl(val, str, 10, check_errno);
+  }
+  else
+  {
+    if (check_errno)
+      errno = 0; // clear thread-local errno
+    auto endptr = str;
+    strtoval_impl(val, str, const_cast<char **>(&endptr), base);
+    if ((*endptr != '\0') || (endptr == str))
+    {
+      *val = 0;     // erase partial result
+      return false; // invalid string
+    }
+    // errno is out-of-range, return MAX/MIN
+    if (check_errno && errno)
+      return false;
+    return true;
+  }
+}
+
+template <typename T> inline bool StringToFloatImpl(T *val, const char *const str)
+{
+  // Type T must be either float or double.
+  FLATBUFFERS_ASSERT(str && val);
+  auto end = str;
+  strtoval_impl(val, str, const_cast<char **>(&end));
+  auto done = (end != str) && (*end == '\0');
+  if (!done)
+    *val = 0; // erase partial result
+  return done;
+}
+
+// Convert a string to an instance of T.
+// Return value (matched with StringToInteger64Impl and strtod):
+// - If successful, a numeric value corresponding to the str is returned.
+// - If full string conversion can't be performed, 0 is returned.
+// - If the converted value falls out of range of corresponding return type, a
+// range error occurs. In this case value MAX(T)/MIN(T) is returned.
+template <typename T> inline bool StringToNumber(const char *s, T *val)
+{
+  // Assert on `unsigned long` and `signed long` on LP64.
+  // If it is necessary, it could be solved with flatbuffers::enable_if<B,T>.
+  static_assert(sizeof(T) < sizeof(int64_t), "unexpected type T");
+  FLATBUFFERS_ASSERT(s && val);
+  int64_t i64;
+  // The errno check isn't needed, will return MAX/MIN on overflow.
+  if (StringToIntegerImpl(&i64, s, 0, false))
+  {
+    const int64_t max = (flatbuffers::numeric_limits<T>::max)();
+    const int64_t min = flatbuffers::numeric_limits<T>::lowest();
+    if (i64 > max)
+    {
+      *val = static_cast<T>(max);
+      return false;
+    }
+    if (i64 < min)
+    {
+      // For unsigned types return max to distinguish from
+      // "no conversion can be performed" when 0 is returned.
+      *val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
+      return false;
+    }
+    *val = static_cast<T>(i64);
+    return true;
+  }
+  *val = 0;
+  return false;
+}
+
+template <> inline bool StringToNumber<int64_t>(const char *str, int64_t *val)
+{
+  return StringToIntegerImpl(val, str);
+}
+
+template <> inline bool StringToNumber<uint64_t>(const char *str, uint64_t *val)
+{
+  if (!StringToIntegerImpl(val, str))
+    return false;
+  // The strtoull accepts negative numbers:
+  // If the minus sign was part of the input sequence, the numeric value
+  // calculated from the sequence of digits is negated as if by unary minus
+  // in the result type, which applies unsigned integer wraparound rules.
+  // Fix this behaviour (except -0).
+  if (*val)
+  {
+    auto s = str;
+    while (*s && !is_digit(*s))
+      s++;
+    s = (s > str) ? (s - 1) : s; // step back to one symbol
+    if (*s == '-')
+    {
+      // For unsigned types return the max to distinguish from
+      // "no conversion can be performed".
+      *val = (flatbuffers::numeric_limits<uint64_t>::max)();
+      return false;
+    }
+  }
+  return true;
+}
+
+template <> inline bool StringToNumber(const char *s, float *val)
+{
+  return StringToFloatImpl(val, s);
+}
+
+template <> inline bool StringToNumber(const char *s, double *val)
+{
+  return StringToFloatImpl(val, s);
+}
+
+inline int64_t StringToInt(const char *s, int base = 10)
+{
+  int64_t val;
+  return StringToIntegerImpl(&val, s, base) ? val : 0;
+}
+
+inline uint64_t StringToUInt(const char *s, int base = 10)
+{
+  uint64_t val;
+  return StringToIntegerImpl(&val, s, base) ? val : 0;
+}
+
+typedef bool (*LoadFileFunction)(const char *filename, bool binary, std::string *dest);
+typedef bool (*FileExistsFunction)(const char *filename);
+
+LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function);
+
+FileExistsFunction SetFileExistsFunction(FileExistsFunction file_exists_function);
+
+// Check if file "name" exists.
+bool FileExists(const char *name);
+
+// Check if "name" exists and it is also a directory.
+bool DirExists(const char *name);
+
+// Load file "name" into "buf" returning true if successful
+// false otherwise.  If "binary" is false data is read
+// using ifstream's text mode, otherwise data is read with
+// no transcoding.
+bool LoadFile(const char *name, bool binary, std::string *buf);
+
+// Save data "buf" of length "len" bytes into a file
+// "name" returning true if successful, false otherwise.
+// If "binary" is false data is written using ifstream's
+// text mode, otherwise data is written with no
+// transcoding.
+bool SaveFile(const char *name, const char *buf, size_t len, bool binary);
+
+// Save data "buf" into file "name" returning true if
+// successful, false otherwise.  If "binary" is false
+// data is written using ifstream's text mode, otherwise
+// data is written with no transcoding.
+inline bool SaveFile(const char *name, const std::string &buf, bool binary)
+{
+  return SaveFile(name, buf.c_str(), buf.size(), binary);
+}
+
+// Functionality for minimalistic portable path handling.
+
+// The functions below behave correctly regardless of whether posix ('/') or
+// Windows ('/' or '\\') separators are used.
+
+// Any new separators inserted are always posix.
+FLATBUFFERS_CONSTEXPR char kPathSeparator = '/';
+
+// Returns the path with the extension, if any, removed.
+std::string StripExtension(const std::string &filepath);
+
+// Returns the extension, if any.
+std::string GetExtension(const std::string &filepath);
+
+// Return the last component of the path, after the last separator.
+std::string StripPath(const std::string &filepath);
+
+// Strip the last component of the path + separator.
+std::string StripFileName(const std::string &filepath);
+
+// Concatenates a path with a filename, regardless of whether the path
+// ends in a separator or not.
+std::string ConCatPathFileName(const std::string &path, const std::string &filename);
+
+// Replaces any '\\' separators with '/'
+std::string PosixPath(const char *path);
+
+// This function ensure a directory exists, by recursively
+// creating dirs for any parts of the path that don't exist yet.
+void EnsureDirExists(const std::string &filepath);
+
+// Obtains the absolute path from any other path.
+// Returns the input path if the absolute path couldn't be resolved.
+std::string AbsolutePath(const std::string &filepath);
+
+// To and from UTF-8 unicode conversion functions
+
+// Convert a unicode code point into a UTF-8 representation by appending it
+// to a string. Returns the number of bytes generated.
+inline int ToUTF8(uint32_t ucc, std::string *out)
+{
+  FLATBUFFERS_ASSERT(!(ucc & 0x80000000)); // Top bit can't be set.
+  // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
+  for (int i = 0; i < 6; i++)
+  {
+    // Max bits this encoding can represent.
+    uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
+    if (ucc < (1u << max_bits))
+    { // does it fit?
+      // Remaining bits not encoded in the first byte, store 6 bits each
+      uint32_t remain_bits = i * 6;
+      // Store first byte:
+      (*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) | (ucc >> remain_bits));
+      // Store remaining bytes:
+      for (int j = i - 1; j >= 0; j--)
+      {
+        (*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
+      }
+      return i + 1; // Return the number of bytes added.
+    }
+  }
+  FLATBUFFERS_ASSERT(0); // Impossible to arrive here.
+  return -1;
+}
+
+// Converts whatever prefix of the incoming string corresponds to a valid
+// UTF-8 sequence into a unicode code. The incoming pointer will have been
+// advanced past all bytes parsed.
+// returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
+// this case).
+inline int FromUTF8(const char **in)
+{
+  int len = 0;
+  // Count leading 1 bits.
+  for (int mask = 0x80; mask >= 0x04; mask >>= 1)
+  {
+    if (**in & mask)
+    {
+      len++;
+    }
+    else
+    {
+      break;
+    }
+  }
+  if ((static_cast<unsigned char>(**in) << len) & 0x80)
+    return -1; // Bit after leading 1's must be 0.
+  if (!len)
+    return *(*in)++;
+  // UTF-8 encoded values with a length are between 2 and 4 bytes.
+  if (len < 2 || len > 4)
+  {
+    return -1;
+  }
+  // Grab initial bits of the code.
+  int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
+  for (int i = 0; i < len - 1; i++)
+  {
+    if ((**in & 0xC0) != 0x80)
+      return -1; // Upper bits must 1 0.
+    ucc <<= 6;
+    ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code.
+  }
+  // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for
+  // UTF-16 surrogate pairs).
+  if (ucc >= 0xD800 && ucc <= 0xDFFF)
+  {
+    return -1;
+  }
+  // UTF-8 must represent code points in their shortest possible encoding.
+  switch (len)
+  {
+    case 2:
+      // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF.
+      if (ucc < 0x0080 || ucc > 0x07FF)
+      {
+        return -1;
+      }
+      break;
+    case 3:
+      // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF.
+      if (ucc < 0x0800 || ucc > 0xFFFF)
+      {
+        return -1;
+      }
+      break;
+    case 4:
+      // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF.
+      if (ucc < 0x10000 || ucc > 0x10FFFF)
+      {
+        return -1;
+      }
+      break;
+  }
+  return ucc;
+}
+
+#ifndef FLATBUFFERS_PREFER_PRINTF
+// Wraps a string to a maximum length, inserting new lines where necessary. Any
+// existing whitespace will be collapsed down to a single space. A prefix or
+// suffix can be provided, which will be inserted before or after a wrapped
+// line, respectively.
+inline std::string WordWrap(const std::string in, size_t max_length,
+                            const std::string wrapped_line_prefix,
+                            const std::string wrapped_line_suffix)
+{
+  std::istringstream in_stream(in);
+  std::string wrapped, line, word;
+
+  in_stream >> word;
+  line = word;
+
+  while (in_stream >> word)
+  {
+    if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) < max_length)
+    {
+      line += " " + word;
+    }
+    else
+    {
+      wrapped += line + wrapped_line_suffix + "\n";
+      line = wrapped_line_prefix + word;
+    }
+  }
+  wrapped += line;
+
+  return wrapped;
+}
+#endif // !FLATBUFFERS_PREFER_PRINTF
+
+inline bool EscapeString(const char *s, size_t length, std::string *_text, bool allow_non_utf8,
+                         bool natural_utf8)
+{
+  std::string &text = *_text;
+  text += "\"";
+  for (uoffset_t i = 0; i < length; i++)
+  {
+    char c = s[i];
+    switch (c)
+    {
+      case '\n':
+        text += "\\n";
+        break;
+      case '\t':
+        text += "\\t";
+        break;
+      case '\r':
+        text += "\\r";
+        break;
+      case '\b':
+        text += "\\b";
+        break;
+      case '\f':
+        text += "\\f";
+        break;
+      case '\"':
+        text += "\\\"";
+        break;
+      case '\\':
+        text += "\\\\";
+        break;
+      default:
+        if (c >= ' ' && c <= '~')
+        {
+          text += c;
+        }
+        else
+        {
+          // Not printable ASCII data. Let's see if it's valid UTF-8 first:
+          const char *utf8 = s + i;
+          int ucc = FromUTF8(&utf8);
+          if (ucc < 0)
+          {
+            if (allow_non_utf8)
+            {
+              text += "\\x";
+              text += IntToStringHex(static_cast<uint8_t>(c), 2);
+            }
+            else
+            {
+              // There are two cases here:
+              //
+              // 1) We reached here by parsing an IDL file. In that case,
+              // we previously checked for non-UTF-8, so we shouldn't reach
+              // here.
+              //
+              // 2) We reached here by someone calling GenerateText()
+              // on a previously-serialized flatbuffer. The data might have
+              // non-UTF-8 Strings, or might be corrupt.
+              //
+              // In both cases, we have to give up and inform the caller
+              // they have no JSON.
+              return false;
+            }
+          }
+          else
+          {
+            if (natural_utf8)
+            {
+              // utf8 points to past all utf-8 bytes parsed
+              text.append(s + i, static_cast<size_t>(utf8 - s - i));
+            }
+            else if (ucc <= 0xFFFF)
+            {
+              // Parses as Unicode within JSON's \uXXXX range, so use that.
+              text += "\\u";
+              text += IntToStringHex(ucc, 4);
+            }
+            else if (ucc <= 0x10FFFF)
+            {
+              // Encode Unicode SMP values to a surrogate pair using two \u
+              // escapes.
+              uint32_t base = ucc - 0x10000;
+              auto high_surrogate = (base >> 10) + 0xD800;
+              auto low_surrogate = (base & 0x03FF) + 0xDC00;
+              text += "\\u";
+              text += IntToStringHex(high_surrogate, 4);
+              text += "\\u";
+              text += IntToStringHex(low_surrogate, 4);
+            }
+            // Skip past characters recognized.
+            i = static_cast<uoffset_t>(utf8 - s - 1);
+          }
+        }
+        break;
+    }
+  }
+  text += "\"";
+  return true;
+}
+
+inline std::string BufferToHexText(const void *buffer, size_t buffer_size, size_t max_length,
+                                   const std::string &wrapped_line_prefix,
+                                   const std::string &wrapped_line_suffix)
+{
+  std::string text = wrapped_line_prefix;
+  size_t start_offset = 0;
+  const char *s = reinterpret_cast<const char *>(buffer);
+  for (size_t i = 0; s && i < buffer_size; i++)
+  {
+    // Last iteration or do we have more?
+    bool have_more = i + 1 < buffer_size;
+    text += "0x";
+    text += IntToStringHex(static_cast<uint8_t>(s[i]), 2);
+    if (have_more)
+    {
+      text += ',';
+    }
+    // If we have more to process and we reached max_length
+    if (have_more && text.size() + wrapped_line_suffix.size() >= start_offset + max_length)
+    {
+      text += wrapped_line_suffix;
+      text += '\n';
+      start_offset = text.size();
+      text += wrapped_line_prefix;
+    }
+  }
+  text += wrapped_line_suffix;
+  return text;
+}
+
+// Remove paired quotes in a string: "text"|'text' -> text.
+std::string RemoveStringQuotes(const std::string &s);
+
+// Change th global C-locale to locale with name <locale_name>.
+// Returns an actual locale name in <_value>, useful if locale_name is "" or
+// null.
+bool SetGlobalTestLocale(const char *locale_name, std::string *_value = nullptr);
+
+// Read (or test) a value of environment variable.
+bool ReadEnvironmentVariable(const char *var_name, std::string *_value = nullptr);
+
+// MSVC specific: Send all assert reports to STDOUT to prevent CI hangs.
+void SetupDefaultCRTReportMode();
+
+} // namespace flatbuffers
+
+#endif // FLATBUFFERS_UTIL_H_
diff --git a/onert-micro/externals/gen/circle-generated/circle/schema_generated.h b/onert-micro/externals/gen/circle-generated/circle/schema_generated.h
new file mode 100644
index 000000000..2531319f9
--- /dev/null
+++ b/onert-micro/externals/gen/circle-generated/circle/schema_generated.h
@@ -0,0 +1,24984 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// automatically generated by the FlatBuffers compiler, do not modify
+
+#ifndef FLATBUFFERS_GENERATED_SCHEMA_CIRCLE_H_
+#define FLATBUFFERS_GENERATED_SCHEMA_CIRCLE_H_
+
+#include "flatbuffers/flatbuffers.h"
+
+namespace circle
+{
+
+struct CustomQuantization;
+struct CustomQuantizationBuilder;
+struct CustomQuantizationT;
+
+struct QuantizationParameters;
+struct QuantizationParametersBuilder;
+struct QuantizationParametersT;
+
+struct Int32Vector;
+struct Int32VectorBuilder;
+struct Int32VectorT;
+
+struct Uint16Vector;
+struct Uint16VectorBuilder;
+struct Uint16VectorT;
+
+struct Uint8Vector;
+struct Uint8VectorBuilder;
+struct Uint8VectorT;
+
+struct DimensionMetadata;
+struct DimensionMetadataBuilder;
+struct DimensionMetadataT;
+
+struct SparsityParameters;
+struct SparsityParametersBuilder;
+struct SparsityParametersT;
+
+struct Tensor;
+struct TensorBuilder;
+struct TensorT;
+
+struct Conv2DOptions;
+struct Conv2DOptionsBuilder;
+struct Conv2DOptionsT;
+
+struct Conv3DOptions;
+struct Conv3DOptionsBuilder;
+struct Conv3DOptionsT;
+
+struct Pool2DOptions;
+struct Pool2DOptionsBuilder;
+struct Pool2DOptionsT;
+
+struct DepthwiseConv2DOptions;
+struct DepthwiseConv2DOptionsBuilder;
+struct DepthwiseConv2DOptionsT;
+
+struct ConcatEmbeddingsOptions;
+struct ConcatEmbeddingsOptionsBuilder;
+struct ConcatEmbeddingsOptionsT;
+
+struct LSHProjectionOptions;
+struct LSHProjectionOptionsBuilder;
+struct LSHProjectionOptionsT;
+
+struct SVDFOptions;
+struct SVDFOptionsBuilder;
+struct SVDFOptionsT;
+
+struct RNNOptions;
+struct RNNOptionsBuilder;
+struct RNNOptionsT;
+
+struct SequenceRNNOptions;
+struct SequenceRNNOptionsBuilder;
+struct SequenceRNNOptionsT;
+
+struct BidirectionalSequenceRNNOptions;
+struct BidirectionalSequenceRNNOptionsBuilder;
+struct BidirectionalSequenceRNNOptionsT;
+
+struct FullyConnectedOptions;
+struct FullyConnectedOptionsBuilder;
+struct FullyConnectedOptionsT;
+
+struct SoftmaxOptions;
+struct SoftmaxOptionsBuilder;
+struct SoftmaxOptionsT;
+
+struct ConcatenationOptions;
+struct ConcatenationOptionsBuilder;
+struct ConcatenationOptionsT;
+
+struct AddOptions;
+struct AddOptionsBuilder;
+struct AddOptionsT;
+
+struct MulOptions;
+struct MulOptionsBuilder;
+struct MulOptionsT;
+
+struct L2NormOptions;
+struct L2NormOptionsBuilder;
+struct L2NormOptionsT;
+
+struct LocalResponseNormalizationOptions;
+struct LocalResponseNormalizationOptionsBuilder;
+struct LocalResponseNormalizationOptionsT;
+
+struct LSTMOptions;
+struct LSTMOptionsBuilder;
+struct LSTMOptionsT;
+
+struct UnidirectionalSequenceLSTMOptions;
+struct UnidirectionalSequenceLSTMOptionsBuilder;
+struct UnidirectionalSequenceLSTMOptionsT;
+
+struct BidirectionalSequenceLSTMOptions;
+struct BidirectionalSequenceLSTMOptionsBuilder;
+struct BidirectionalSequenceLSTMOptionsT;
+
+struct ResizeBilinearOptions;
+struct ResizeBilinearOptionsBuilder;
+struct ResizeBilinearOptionsT;
+
+struct ResizeNearestNeighborOptions;
+struct ResizeNearestNeighborOptionsBuilder;
+struct ResizeNearestNeighborOptionsT;
+
+struct CallOptions;
+struct CallOptionsBuilder;
+struct CallOptionsT;
+
+struct PadOptions;
+struct PadOptionsBuilder;
+struct PadOptionsT;
+
+struct PadV2Options;
+struct PadV2OptionsBuilder;
+struct PadV2OptionsT;
+
+struct ReshapeOptions;
+struct ReshapeOptionsBuilder;
+struct ReshapeOptionsT;
+
+struct SpaceToBatchNDOptions;
+struct SpaceToBatchNDOptionsBuilder;
+struct SpaceToBatchNDOptionsT;
+
+struct BatchToSpaceNDOptions;
+struct BatchToSpaceNDOptionsBuilder;
+struct BatchToSpaceNDOptionsT;
+
+struct SkipGramOptions;
+struct SkipGramOptionsBuilder;
+struct SkipGramOptionsT;
+
+struct SpaceToDepthOptions;
+struct SpaceToDepthOptionsBuilder;
+struct SpaceToDepthOptionsT;
+
+struct DepthToSpaceOptions;
+struct DepthToSpaceOptionsBuilder;
+struct DepthToSpaceOptionsT;
+
+struct SubOptions;
+struct SubOptionsBuilder;
+struct SubOptionsT;
+
+struct DivOptions;
+struct DivOptionsBuilder;
+struct DivOptionsT;
+
+struct TopKV2Options;
+struct TopKV2OptionsBuilder;
+struct TopKV2OptionsT;
+
+struct EmbeddingLookupSparseOptions;
+struct EmbeddingLookupSparseOptionsBuilder;
+struct EmbeddingLookupSparseOptionsT;
+
+struct GatherOptions;
+struct GatherOptionsBuilder;
+struct GatherOptionsT;
+
+struct TransposeOptions;
+struct TransposeOptionsBuilder;
+struct TransposeOptionsT;
+
+struct ExpOptions;
+struct ExpOptionsBuilder;
+struct ExpOptionsT;
+
+struct CosOptions;
+struct CosOptionsBuilder;
+struct CosOptionsT;
+
+struct ReducerOptions;
+struct ReducerOptionsBuilder;
+struct ReducerOptionsT;
+
+struct SqueezeOptions;
+struct SqueezeOptionsBuilder;
+struct SqueezeOptionsT;
+
+struct SplitOptions;
+struct SplitOptionsBuilder;
+struct SplitOptionsT;
+
+struct SplitVOptions;
+struct SplitVOptionsBuilder;
+struct SplitVOptionsT;
+
+struct StridedSliceOptions;
+struct StridedSliceOptionsBuilder;
+struct StridedSliceOptionsT;
+
+struct LogSoftmaxOptions;
+struct LogSoftmaxOptionsBuilder;
+struct LogSoftmaxOptionsT;
+
+struct CastOptions;
+struct CastOptionsBuilder;
+struct CastOptionsT;
+
+struct DequantizeOptions;
+struct DequantizeOptionsBuilder;
+struct DequantizeOptionsT;
+
+struct MaximumMinimumOptions;
+struct MaximumMinimumOptionsBuilder;
+struct MaximumMinimumOptionsT;
+
+struct TileOptions;
+struct TileOptionsBuilder;
+struct TileOptionsT;
+
+struct ArgMaxOptions;
+struct ArgMaxOptionsBuilder;
+struct ArgMaxOptionsT;
+
+struct ArgMinOptions;
+struct ArgMinOptionsBuilder;
+struct ArgMinOptionsT;
+
+struct GreaterOptions;
+struct GreaterOptionsBuilder;
+struct GreaterOptionsT;
+
+struct GreaterEqualOptions;
+struct GreaterEqualOptionsBuilder;
+struct GreaterEqualOptionsT;
+
+struct LessOptions;
+struct LessOptionsBuilder;
+struct LessOptionsT;
+
+struct LessEqualOptions;
+struct LessEqualOptionsBuilder;
+struct LessEqualOptionsT;
+
+struct NegOptions;
+struct NegOptionsBuilder;
+struct NegOptionsT;
+
+struct SelectOptions;
+struct SelectOptionsBuilder;
+struct SelectOptionsT;
+
+struct SliceOptions;
+struct SliceOptionsBuilder;
+struct SliceOptionsT;
+
+struct TransposeConvOptions;
+struct TransposeConvOptionsBuilder;
+struct TransposeConvOptionsT;
+
+struct ExpandDimsOptions;
+struct ExpandDimsOptionsBuilder;
+struct ExpandDimsOptionsT;
+
+struct SparseToDenseOptions;
+struct SparseToDenseOptionsBuilder;
+struct SparseToDenseOptionsT;
+
+struct EqualOptions;
+struct EqualOptionsBuilder;
+struct EqualOptionsT;
+
+struct NotEqualOptions;
+struct NotEqualOptionsBuilder;
+struct NotEqualOptionsT;
+
+struct ShapeOptions;
+struct ShapeOptionsBuilder;
+struct ShapeOptionsT;
+
+struct RankOptions;
+struct RankOptionsBuilder;
+struct RankOptionsT;
+
+struct PowOptions;
+struct PowOptionsBuilder;
+struct PowOptionsT;
+
+struct FakeQuantOptions;
+struct FakeQuantOptionsBuilder;
+struct FakeQuantOptionsT;
+
+struct PackOptions;
+struct PackOptionsBuilder;
+struct PackOptionsT;
+
+struct LogicalOrOptions;
+struct LogicalOrOptionsBuilder;
+struct LogicalOrOptionsT;
+
+struct OneHotOptions;
+struct OneHotOptionsBuilder;
+struct OneHotOptionsT;
+
+struct AbsOptions;
+struct AbsOptionsBuilder;
+struct AbsOptionsT;
+
+struct HardSwishOptions;
+struct HardSwishOptionsBuilder;
+struct HardSwishOptionsT;
+
+struct LogicalAndOptions;
+struct LogicalAndOptionsBuilder;
+struct LogicalAndOptionsT;
+
+struct LogicalNotOptions;
+struct LogicalNotOptionsBuilder;
+struct LogicalNotOptionsT;
+
+struct UnpackOptions;
+struct UnpackOptionsBuilder;
+struct UnpackOptionsT;
+
+struct FloorDivOptions;
+struct FloorDivOptionsBuilder;
+struct FloorDivOptionsT;
+
+struct SquareOptions;
+struct SquareOptionsBuilder;
+struct SquareOptionsT;
+
+struct ZerosLikeOptions;
+struct ZerosLikeOptionsBuilder;
+struct ZerosLikeOptionsT;
+
+struct FillOptions;
+struct FillOptionsBuilder;
+struct FillOptionsT;
+
+struct FloorModOptions;
+struct FloorModOptionsBuilder;
+struct FloorModOptionsT;
+
+struct RangeOptions;
+struct RangeOptionsBuilder;
+struct RangeOptionsT;
+
+struct LeakyReluOptions;
+struct LeakyReluOptionsBuilder;
+struct LeakyReluOptionsT;
+
+struct SquaredDifferenceOptions;
+struct SquaredDifferenceOptionsBuilder;
+struct SquaredDifferenceOptionsT;
+
+struct MirrorPadOptions;
+struct MirrorPadOptionsBuilder;
+struct MirrorPadOptionsT;
+
+struct UniqueOptions;
+struct UniqueOptionsBuilder;
+struct UniqueOptionsT;
+
+struct ReverseV2Options;
+struct ReverseV2OptionsBuilder;
+struct ReverseV2OptionsT;
+
+struct AddNOptions;
+struct AddNOptionsBuilder;
+struct AddNOptionsT;
+
+struct GatherNdOptions;
+struct GatherNdOptionsBuilder;
+struct GatherNdOptionsT;
+
+struct WhereOptions;
+struct WhereOptionsBuilder;
+struct WhereOptionsT;
+
+struct ReverseSequenceOptions;
+struct ReverseSequenceOptionsBuilder;
+struct ReverseSequenceOptionsT;
+
+struct MatrixDiagOptions;
+struct MatrixDiagOptionsBuilder;
+struct MatrixDiagOptionsT;
+
+struct QuantizeOptions;
+struct QuantizeOptionsBuilder;
+struct QuantizeOptionsT;
+
+struct MatrixSetDiagOptions;
+struct MatrixSetDiagOptionsBuilder;
+struct MatrixSetDiagOptionsT;
+
+struct IfOptions;
+struct IfOptionsBuilder;
+struct IfOptionsT;
+
+struct CallOnceOptions;
+struct CallOnceOptionsBuilder;
+struct CallOnceOptionsT;
+
+struct WhileOptions;
+struct WhileOptionsBuilder;
+struct WhileOptionsT;
+
+struct NonMaxSuppressionV4Options;
+struct NonMaxSuppressionV4OptionsBuilder;
+struct NonMaxSuppressionV4OptionsT;
+
+struct NonMaxSuppressionV5Options;
+struct NonMaxSuppressionV5OptionsBuilder;
+struct NonMaxSuppressionV5OptionsT;
+
+struct ScatterNdOptions;
+struct ScatterNdOptionsBuilder;
+struct ScatterNdOptionsT;
+
+struct SelectV2Options;
+struct SelectV2OptionsBuilder;
+struct SelectV2OptionsT;
+
+struct DensifyOptions;
+struct DensifyOptionsBuilder;
+struct DensifyOptionsT;
+
+struct SegmentSumOptions;
+struct SegmentSumOptionsBuilder;
+struct SegmentSumOptionsT;
+
+struct BatchMatMulOptions;
+struct BatchMatMulOptionsBuilder;
+struct BatchMatMulOptionsT;
+
+struct CumsumOptions;
+struct CumsumOptionsBuilder;
+struct CumsumOptionsT;
+
+struct BroadcastToOptions;
+struct BroadcastToOptionsBuilder;
+struct BroadcastToOptionsT;
+
+struct Rfft2dOptions;
+struct Rfft2dOptionsBuilder;
+struct Rfft2dOptionsT;
+
+struct HashtableOptions;
+struct HashtableOptionsBuilder;
+struct HashtableOptionsT;
+
+struct HashtableFindOptions;
+struct HashtableFindOptionsBuilder;
+struct HashtableFindOptionsT;
+
+struct HashtableImportOptions;
+struct HashtableImportOptionsBuilder;
+struct HashtableImportOptionsT;
+
+struct HashtableSizeOptions;
+struct HashtableSizeOptionsBuilder;
+struct HashtableSizeOptionsT;
+
+struct VarHandleOptions;
+struct VarHandleOptionsBuilder;
+struct VarHandleOptionsT;
+
+struct ReadVariableOptions;
+struct ReadVariableOptionsBuilder;
+struct ReadVariableOptionsT;
+
+struct AssignVariableOptions;
+struct AssignVariableOptionsBuilder;
+struct AssignVariableOptionsT;
+
+struct RandomOptions;
+struct RandomOptionsBuilder;
+struct RandomOptionsT;
+
+struct BCQGatherOptions;
+struct BCQGatherOptionsBuilder;
+struct BCQGatherOptionsT;
+
+struct BCQFullyConnectedOptions;
+struct BCQFullyConnectedOptionsBuilder;
+struct BCQFullyConnectedOptionsT;
+
+struct InstanceNormOptions;
+struct InstanceNormOptionsBuilder;
+struct InstanceNormOptionsT;
+
+struct OperatorCode;
+struct OperatorCodeBuilder;
+struct OperatorCodeT;
+
+struct Operator;
+struct OperatorBuilder;
+struct OperatorT;
+
+struct SubGraph;
+struct SubGraphBuilder;
+struct SubGraphT;
+
+struct Buffer;
+struct BufferBuilder;
+struct BufferT;
+
+struct Metadata;
+struct MetadataBuilder;
+struct MetadataT;
+
+struct TensorMap;
+struct TensorMapBuilder;
+struct TensorMapT;
+
+struct SignatureDef;
+struct SignatureDefBuilder;
+struct SignatureDefT;
+
+struct Model;
+struct ModelBuilder;
+struct ModelT;
+
+enum TensorType : int8_t
+{
+  TensorType_FLOAT32 = 0,
+  TensorType_FLOAT16 = 1,
+  TensorType_INT32 = 2,
+  TensorType_UINT8 = 3,
+  TensorType_INT64 = 4,
+  TensorType_STRING = 5,
+  TensorType_BOOL = 6,
+  TensorType_INT16 = 7,
+  TensorType_COMPLEX64 = 8,
+  TensorType_INT8 = 9,
+  TensorType_FLOAT64 = 10,
+  TensorType_COMPLEX128 = 11,
+  TensorType_UINT64 = 12,
+  TensorType_RESOURCE = 13,
+  TensorType_VARIANT = 14,
+  TensorType_UINT32 = 15,
+  TensorType_MIN = TensorType_FLOAT32,
+  TensorType_MAX = TensorType_UINT32
+};
+
+inline const TensorType (&EnumValuesTensorType())[16]
+{
+  static const TensorType values[] = {
+    TensorType_FLOAT32,   TensorType_FLOAT16,  TensorType_INT32,   TensorType_UINT8,
+    TensorType_INT64,     TensorType_STRING,   TensorType_BOOL,    TensorType_INT16,
+    TensorType_COMPLEX64, TensorType_INT8,     TensorType_FLOAT64, TensorType_COMPLEX128,
+    TensorType_UINT64,    TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32};
+  return values;
+}
+
+inline const char *const *EnumNamesTensorType()
+{
+  static const char *const names[17] = {"FLOAT32", "FLOAT16",    "INT32",  "UINT8",     "INT64",
+                                        "STRING",  "BOOL",       "INT16",  "COMPLEX64", "INT8",
+                                        "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE",  "VARIANT",
+                                        "UINT32",  nullptr};
+  return names;
+}
+
+inline const char *EnumNameTensorType(TensorType e)
+{
+  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT32))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesTensorType()[index];
+}
+
+enum QuantizationDetails : uint8_t
+{
+  QuantizationDetails_NONE = 0,
+  QuantizationDetails_CustomQuantization = 1,
+  QuantizationDetails_MIN = QuantizationDetails_NONE,
+  QuantizationDetails_MAX = QuantizationDetails_CustomQuantization
+};
+
+inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
+{
+  static const QuantizationDetails values[] = {QuantizationDetails_NONE,
+                                               QuantizationDetails_CustomQuantization};
+  return values;
+}
+
+inline const char *const *EnumNamesQuantizationDetails()
+{
+  static const char *const names[3] = {"NONE", "CustomQuantization", nullptr};
+  return names;
+}
+
+inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
+{
+  if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesQuantizationDetails()[index];
+}
+
+template <typename T> struct QuantizationDetailsTraits
+{
+  static const QuantizationDetails enum_value = QuantizationDetails_NONE;
+};
+
+template <> struct QuantizationDetailsTraits<circle::CustomQuantization>
+{
+  static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
+};
+
+struct QuantizationDetailsUnion
+{
+  QuantizationDetails type;
+  void *value;
+
+  QuantizationDetailsUnion() : type(QuantizationDetails_NONE), value(nullptr) {}
+  QuantizationDetailsUnion(QuantizationDetailsUnion &&u) FLATBUFFERS_NOEXCEPT
+    : type(QuantizationDetails_NONE),
+      value(nullptr)
+  {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+  }
+  QuantizationDetailsUnion(const QuantizationDetailsUnion &);
+  QuantizationDetailsUnion &operator=(const QuantizationDetailsUnion &u)
+  {
+    QuantizationDetailsUnion t(u);
+    std::swap(type, t.type);
+    std::swap(value, t.value);
+    return *this;
+  }
+  QuantizationDetailsUnion &operator=(QuantizationDetailsUnion &&u) FLATBUFFERS_NOEXCEPT
+  {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+    return *this;
+  }
+  ~QuantizationDetailsUnion() { Reset(); }
+
+  void Reset();
+
+#ifndef FLATBUFFERS_CPP98_STL
+  template <typename T> void Set(T &&val)
+  {
+    using RT = typename std::remove_reference<T>::type;
+    Reset();
+    type = QuantizationDetailsTraits<typename RT::TableType>::enum_value;
+    if (type != QuantizationDetails_NONE)
+    {
+      value = new RT(std::forward<T>(val));
+    }
+  }
+#endif // FLATBUFFERS_CPP98_STL
+
+  static void *UnPack(const void *obj, QuantizationDetails type,
+                      const flatbuffers::resolver_function_t *resolver);
+  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+
+  circle::CustomQuantizationT *AsCustomQuantization()
+  {
+    return type == QuantizationDetails_CustomQuantization
+             ? reinterpret_cast<circle::CustomQuantizationT *>(value)
+             : nullptr;
+  }
+  const circle::CustomQuantizationT *AsCustomQuantization() const
+  {
+    return type == QuantizationDetails_CustomQuantization
+             ? reinterpret_cast<const circle::CustomQuantizationT *>(value)
+             : nullptr;
+  }
+};
+
+bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
+                               QuantizationDetails type);
+bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
+                                     const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+                                     const flatbuffers::Vector<uint8_t> *types);
+
+enum DimensionType : int8_t
+{
+  DimensionType_DENSE = 0,
+  DimensionType_SPARSE_CSR = 1,
+  DimensionType_MIN = DimensionType_DENSE,
+  DimensionType_MAX = DimensionType_SPARSE_CSR
+};
+
+inline const DimensionType (&EnumValuesDimensionType())[2]
+{
+  static const DimensionType values[] = {DimensionType_DENSE, DimensionType_SPARSE_CSR};
+  return values;
+}
+
+inline const char *const *EnumNamesDimensionType()
+{
+  static const char *const names[3] = {"DENSE", "SPARSE_CSR", nullptr};
+  return names;
+}
+
+inline const char *EnumNameDimensionType(DimensionType e)
+{
+  if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesDimensionType()[index];
+}
+
+enum SparseIndexVector : uint8_t
+{
+  SparseIndexVector_NONE = 0,
+  SparseIndexVector_Int32Vector = 1,
+  SparseIndexVector_Uint16Vector = 2,
+  SparseIndexVector_Uint8Vector = 3,
+  SparseIndexVector_MIN = SparseIndexVector_NONE,
+  SparseIndexVector_MAX = SparseIndexVector_Uint8Vector
+};
+
+inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4]
+{
+  static const SparseIndexVector values[] = {SparseIndexVector_NONE, SparseIndexVector_Int32Vector,
+                                             SparseIndexVector_Uint16Vector,
+                                             SparseIndexVector_Uint8Vector};
+  return values;
+}
+
+inline const char *const *EnumNamesSparseIndexVector()
+{
+  static const char *const names[5] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
+                                       nullptr};
+  return names;
+}
+
+inline const char *EnumNameSparseIndexVector(SparseIndexVector e)
+{
+  if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesSparseIndexVector()[index];
+}
+
+template <typename T> struct SparseIndexVectorTraits
+{
+  static const SparseIndexVector enum_value = SparseIndexVector_NONE;
+};
+
+template <> struct SparseIndexVectorTraits<circle::Int32Vector>
+{
+  static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
+};
+
+template <> struct SparseIndexVectorTraits<circle::Uint16Vector>
+{
+  static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
+};
+
+template <> struct SparseIndexVectorTraits<circle::Uint8Vector>
+{
+  static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
+};
+
+struct SparseIndexVectorUnion
+{
+  SparseIndexVector type;
+  void *value;
+
+  SparseIndexVectorUnion() : type(SparseIndexVector_NONE), value(nullptr) {}
+  SparseIndexVectorUnion(SparseIndexVectorUnion &&u) FLATBUFFERS_NOEXCEPT
+    : type(SparseIndexVector_NONE),
+      value(nullptr)
+  {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+  }
+  SparseIndexVectorUnion(const SparseIndexVectorUnion &);
+  SparseIndexVectorUnion &operator=(const SparseIndexVectorUnion &u)
+  {
+    SparseIndexVectorUnion t(u);
+    std::swap(type, t.type);
+    std::swap(value, t.value);
+    return *this;
+  }
+  SparseIndexVectorUnion &operator=(SparseIndexVectorUnion &&u) FLATBUFFERS_NOEXCEPT
+  {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+    return *this;
+  }
+  ~SparseIndexVectorUnion() { Reset(); }
+
+  void Reset();
+
+#ifndef FLATBUFFERS_CPP98_STL
+  template <typename T> void Set(T &&val)
+  {
+    using RT = typename std::remove_reference<T>::type;
+    Reset();
+    type = SparseIndexVectorTraits<typename RT::TableType>::enum_value;
+    if (type != SparseIndexVector_NONE)
+    {
+      value = new RT(std::forward<T>(val));
+    }
+  }
+#endif // FLATBUFFERS_CPP98_STL
+
+  static void *UnPack(const void *obj, SparseIndexVector type,
+                      const flatbuffers::resolver_function_t *resolver);
+  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+
+  circle::Int32VectorT *AsInt32Vector()
+  {
+    return type == SparseIndexVector_Int32Vector ? reinterpret_cast<circle::Int32VectorT *>(value)
+                                                 : nullptr;
+  }
+  const circle::Int32VectorT *AsInt32Vector() const
+  {
+    return type == SparseIndexVector_Int32Vector
+             ? reinterpret_cast<const circle::Int32VectorT *>(value)
+             : nullptr;
+  }
+  circle::Uint16VectorT *AsUint16Vector()
+  {
+    return type == SparseIndexVector_Uint16Vector ? reinterpret_cast<circle::Uint16VectorT *>(value)
+                                                  : nullptr;
+  }
+  const circle::Uint16VectorT *AsUint16Vector() const
+  {
+    return type == SparseIndexVector_Uint16Vector
+             ? reinterpret_cast<const circle::Uint16VectorT *>(value)
+             : nullptr;
+  }
+  circle::Uint8VectorT *AsUint8Vector()
+  {
+    return type == SparseIndexVector_Uint8Vector ? reinterpret_cast<circle::Uint8VectorT *>(value)
+                                                 : nullptr;
+  }
+  const circle::Uint8VectorT *AsUint8Vector() const
+  {
+    return type == SparseIndexVector_Uint8Vector
+             ? reinterpret_cast<const circle::Uint8VectorT *>(value)
+             : nullptr;
+  }
+};
+
+bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj,
+                             SparseIndexVector type);
+bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
+                                   const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+                                   const flatbuffers::Vector<uint8_t> *types);
+
+enum BuiltinOperator : int32_t
+{
+  BuiltinOperator_BCQ_GATHER = -4,
+  BuiltinOperator_BCQ_FULLY_CONNECTED = -3,
+  BuiltinOperator_INSTANCE_NORM = -2,
+  BuiltinOperator_ADD = 0,
+  BuiltinOperator_AVERAGE_POOL_2D = 1,
+  BuiltinOperator_CONCATENATION = 2,
+  BuiltinOperator_CONV_2D = 3,
+  BuiltinOperator_DEPTHWISE_CONV_2D = 4,
+  BuiltinOperator_DEPTH_TO_SPACE = 5,
+  BuiltinOperator_DEQUANTIZE = 6,
+  BuiltinOperator_EMBEDDING_LOOKUP = 7,
+  BuiltinOperator_FLOOR = 8,
+  BuiltinOperator_FULLY_CONNECTED = 9,
+  BuiltinOperator_HASHTABLE_LOOKUP = 10,
+  BuiltinOperator_L2_NORMALIZATION = 11,
+  BuiltinOperator_L2_POOL_2D = 12,
+  BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13,
+  BuiltinOperator_LOGISTIC = 14,
+  BuiltinOperator_LSH_PROJECTION = 15,
+  BuiltinOperator_LSTM = 16,
+  BuiltinOperator_MAX_POOL_2D = 17,
+  BuiltinOperator_MUL = 18,
+  BuiltinOperator_RELU = 19,
+  BuiltinOperator_RELU_N1_TO_1 = 20,
+  BuiltinOperator_RELU6 = 21,
+  BuiltinOperator_RESHAPE = 22,
+  BuiltinOperator_RESIZE_BILINEAR = 23,
+  BuiltinOperator_RNN = 24,
+  BuiltinOperator_SOFTMAX = 25,
+  BuiltinOperator_SPACE_TO_DEPTH = 26,
+  BuiltinOperator_SVDF = 27,
+  BuiltinOperator_TANH = 28,
+  BuiltinOperator_CONCAT_EMBEDDINGS = 29,
+  BuiltinOperator_SKIP_GRAM = 30,
+  BuiltinOperator_CALL = 31,
+  BuiltinOperator_CUSTOM = 32,
+  BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
+  BuiltinOperator_PAD = 34,
+  BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  BuiltinOperator_GATHER = 36,
+  BuiltinOperator_BATCH_TO_SPACE_ND = 37,
+  BuiltinOperator_SPACE_TO_BATCH_ND = 38,
+  BuiltinOperator_TRANSPOSE = 39,
+  BuiltinOperator_MEAN = 40,
+  BuiltinOperator_SUB = 41,
+  BuiltinOperator_DIV = 42,
+  BuiltinOperator_SQUEEZE = 43,
+  BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  BuiltinOperator_STRIDED_SLICE = 45,
+  BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  BuiltinOperator_EXP = 47,
+  BuiltinOperator_TOPK_V2 = 48,
+  BuiltinOperator_SPLIT = 49,
+  BuiltinOperator_LOG_SOFTMAX = 50,
+  BuiltinOperator_DELEGATE = 51,
+  BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  BuiltinOperator_CAST = 53,
+  BuiltinOperator_PRELU = 54,
+  BuiltinOperator_MAXIMUM = 55,
+  BuiltinOperator_ARG_MAX = 56,
+  BuiltinOperator_MINIMUM = 57,
+  BuiltinOperator_LESS = 58,
+  BuiltinOperator_NEG = 59,
+  BuiltinOperator_PADV2 = 60,
+  BuiltinOperator_GREATER = 61,
+  BuiltinOperator_GREATER_EQUAL = 62,
+  BuiltinOperator_LESS_EQUAL = 63,
+  BuiltinOperator_SELECT = 64,
+  BuiltinOperator_SLICE = 65,
+  BuiltinOperator_SIN = 66,
+  BuiltinOperator_TRANSPOSE_CONV = 67,
+  BuiltinOperator_SPARSE_TO_DENSE = 68,
+  BuiltinOperator_TILE = 69,
+  BuiltinOperator_EXPAND_DIMS = 70,
+  BuiltinOperator_EQUAL = 71,
+  BuiltinOperator_NOT_EQUAL = 72,
+  BuiltinOperator_LOG = 73,
+  BuiltinOperator_SUM = 74,
+  BuiltinOperator_SQRT = 75,
+  BuiltinOperator_RSQRT = 76,
+  BuiltinOperator_SHAPE = 77,
+  BuiltinOperator_POW = 78,
+  BuiltinOperator_ARG_MIN = 79,
+  BuiltinOperator_FAKE_QUANT = 80,
+  BuiltinOperator_REDUCE_PROD = 81,
+  BuiltinOperator_REDUCE_MAX = 82,
+  BuiltinOperator_PACK = 83,
+  BuiltinOperator_LOGICAL_OR = 84,
+  BuiltinOperator_ONE_HOT = 85,
+  BuiltinOperator_LOGICAL_AND = 86,
+  BuiltinOperator_LOGICAL_NOT = 87,
+  BuiltinOperator_UNPACK = 88,
+  BuiltinOperator_REDUCE_MIN = 89,
+  BuiltinOperator_FLOOR_DIV = 90,
+  BuiltinOperator_REDUCE_ANY = 91,
+  BuiltinOperator_SQUARE = 92,
+  BuiltinOperator_ZEROS_LIKE = 93,
+  BuiltinOperator_FILL = 94,
+  BuiltinOperator_FLOOR_MOD = 95,
+  BuiltinOperator_RANGE = 96,
+  BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97,
+  BuiltinOperator_LEAKY_RELU = 98,
+  BuiltinOperator_SQUARED_DIFFERENCE = 99,
+  BuiltinOperator_MIRROR_PAD = 100,
+  BuiltinOperator_ABS = 101,
+  BuiltinOperator_SPLIT_V = 102,
+  BuiltinOperator_UNIQUE = 103,
+  BuiltinOperator_CEIL = 104,
+  BuiltinOperator_REVERSE_V2 = 105,
+  BuiltinOperator_ADD_N = 106,
+  BuiltinOperator_GATHER_ND = 107,
+  BuiltinOperator_COS = 108,
+  BuiltinOperator_WHERE = 109,
+  BuiltinOperator_RANK = 110,
+  BuiltinOperator_ELU = 111,
+  BuiltinOperator_REVERSE_SEQUENCE = 112,
+  BuiltinOperator_MATRIX_DIAG = 113,
+  BuiltinOperator_QUANTIZE = 114,
+  BuiltinOperator_MATRIX_SET_DIAG = 115,
+  BuiltinOperator_ROUND = 116,
+  BuiltinOperator_HARD_SWISH = 117,
+  BuiltinOperator_IF = 118,
+  BuiltinOperator_WHILE = 119,
+  BuiltinOperator_NON_MAX_SUPPRESSION_V4 = 120,
+  BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
+  BuiltinOperator_SCATTER_ND = 122,
+  BuiltinOperator_SELECT_V2 = 123,
+  BuiltinOperator_DENSIFY = 124,
+  BuiltinOperator_SEGMENT_SUM = 125,
+  BuiltinOperator_BATCH_MATMUL = 126,
+  BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  BuiltinOperator_CUMSUM = 128,
+  BuiltinOperator_CALL_ONCE = 129,
+  BuiltinOperator_BROADCAST_TO = 130,
+  BuiltinOperator_RFFT2D = 131,
+  BuiltinOperator_CONV_3D = 132,
+  BuiltinOperator_IMAG = 133,
+  BuiltinOperator_REAL = 134,
+  BuiltinOperator_COMPLEX_ABS = 135,
+  BuiltinOperator_HASHTABLE = 136,
+  BuiltinOperator_HASHTABLE_FIND = 137,
+  BuiltinOperator_HASHTABLE_IMPORT = 138,
+  BuiltinOperator_HASHTABLE_SIZE = 139,
+  BuiltinOperator_REDUCE_ALL = 140,
+  BuiltinOperator_CONV_3D_TRANSPOSE = 141,
+  BuiltinOperator_VAR_HANDLE = 142,
+  BuiltinOperator_READ_VARIABLE = 143,
+  BuiltinOperator_ASSIGN_VARIABLE = 144,
+  BuiltinOperator_BROADCAST_ARGS = 145,
+  BuiltinOperator_RANDOM_STANDARD_NORMAL = 146,
+  BuiltinOperator_MIN = BuiltinOperator_BCQ_GATHER,
+  BuiltinOperator_MAX = BuiltinOperator_RANDOM_STANDARD_NORMAL
+};
+
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[150]
+{
+  static const BuiltinOperator values[] = {BuiltinOperator_BCQ_GATHER,
+                                           BuiltinOperator_BCQ_FULLY_CONNECTED,
+                                           BuiltinOperator_INSTANCE_NORM,
+                                           BuiltinOperator_ADD,
+                                           BuiltinOperator_AVERAGE_POOL_2D,
+                                           BuiltinOperator_CONCATENATION,
+                                           BuiltinOperator_CONV_2D,
+                                           BuiltinOperator_DEPTHWISE_CONV_2D,
+                                           BuiltinOperator_DEPTH_TO_SPACE,
+                                           BuiltinOperator_DEQUANTIZE,
+                                           BuiltinOperator_EMBEDDING_LOOKUP,
+                                           BuiltinOperator_FLOOR,
+                                           BuiltinOperator_FULLY_CONNECTED,
+                                           BuiltinOperator_HASHTABLE_LOOKUP,
+                                           BuiltinOperator_L2_NORMALIZATION,
+                                           BuiltinOperator_L2_POOL_2D,
+                                           BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
+                                           BuiltinOperator_LOGISTIC,
+                                           BuiltinOperator_LSH_PROJECTION,
+                                           BuiltinOperator_LSTM,
+                                           BuiltinOperator_MAX_POOL_2D,
+                                           BuiltinOperator_MUL,
+                                           BuiltinOperator_RELU,
+                                           BuiltinOperator_RELU_N1_TO_1,
+                                           BuiltinOperator_RELU6,
+                                           BuiltinOperator_RESHAPE,
+                                           BuiltinOperator_RESIZE_BILINEAR,
+                                           BuiltinOperator_RNN,
+                                           BuiltinOperator_SOFTMAX,
+                                           BuiltinOperator_SPACE_TO_DEPTH,
+                                           BuiltinOperator_SVDF,
+                                           BuiltinOperator_TANH,
+                                           BuiltinOperator_CONCAT_EMBEDDINGS,
+                                           BuiltinOperator_SKIP_GRAM,
+                                           BuiltinOperator_CALL,
+                                           BuiltinOperator_CUSTOM,
+                                           BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
+                                           BuiltinOperator_PAD,
+                                           BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+                                           BuiltinOperator_GATHER,
+                                           BuiltinOperator_BATCH_TO_SPACE_ND,
+                                           BuiltinOperator_SPACE_TO_BATCH_ND,
+                                           BuiltinOperator_TRANSPOSE,
+                                           BuiltinOperator_MEAN,
+                                           BuiltinOperator_SUB,
+                                           BuiltinOperator_DIV,
+                                           BuiltinOperator_SQUEEZE,
+                                           BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+                                           BuiltinOperator_STRIDED_SLICE,
+                                           BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
+                                           BuiltinOperator_EXP,
+                                           BuiltinOperator_TOPK_V2,
+                                           BuiltinOperator_SPLIT,
+                                           BuiltinOperator_LOG_SOFTMAX,
+                                           BuiltinOperator_DELEGATE,
+                                           BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
+                                           BuiltinOperator_CAST,
+                                           BuiltinOperator_PRELU,
+                                           BuiltinOperator_MAXIMUM,
+                                           BuiltinOperator_ARG_MAX,
+                                           BuiltinOperator_MINIMUM,
+                                           BuiltinOperator_LESS,
+                                           BuiltinOperator_NEG,
+                                           BuiltinOperator_PADV2,
+                                           BuiltinOperator_GREATER,
+                                           BuiltinOperator_GREATER_EQUAL,
+                                           BuiltinOperator_LESS_EQUAL,
+                                           BuiltinOperator_SELECT,
+                                           BuiltinOperator_SLICE,
+                                           BuiltinOperator_SIN,
+                                           BuiltinOperator_TRANSPOSE_CONV,
+                                           BuiltinOperator_SPARSE_TO_DENSE,
+                                           BuiltinOperator_TILE,
+                                           BuiltinOperator_EXPAND_DIMS,
+                                           BuiltinOperator_EQUAL,
+                                           BuiltinOperator_NOT_EQUAL,
+                                           BuiltinOperator_LOG,
+                                           BuiltinOperator_SUM,
+                                           BuiltinOperator_SQRT,
+                                           BuiltinOperator_RSQRT,
+                                           BuiltinOperator_SHAPE,
+                                           BuiltinOperator_POW,
+                                           BuiltinOperator_ARG_MIN,
+                                           BuiltinOperator_FAKE_QUANT,
+                                           BuiltinOperator_REDUCE_PROD,
+                                           BuiltinOperator_REDUCE_MAX,
+                                           BuiltinOperator_PACK,
+                                           BuiltinOperator_LOGICAL_OR,
+                                           BuiltinOperator_ONE_HOT,
+                                           BuiltinOperator_LOGICAL_AND,
+                                           BuiltinOperator_LOGICAL_NOT,
+                                           BuiltinOperator_UNPACK,
+                                           BuiltinOperator_REDUCE_MIN,
+                                           BuiltinOperator_FLOOR_DIV,
+                                           BuiltinOperator_REDUCE_ANY,
+                                           BuiltinOperator_SQUARE,
+                                           BuiltinOperator_ZEROS_LIKE,
+                                           BuiltinOperator_FILL,
+                                           BuiltinOperator_FLOOR_MOD,
+                                           BuiltinOperator_RANGE,
+                                           BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+                                           BuiltinOperator_LEAKY_RELU,
+                                           BuiltinOperator_SQUARED_DIFFERENCE,
+                                           BuiltinOperator_MIRROR_PAD,
+                                           BuiltinOperator_ABS,
+                                           BuiltinOperator_SPLIT_V,
+                                           BuiltinOperator_UNIQUE,
+                                           BuiltinOperator_CEIL,
+                                           BuiltinOperator_REVERSE_V2,
+                                           BuiltinOperator_ADD_N,
+                                           BuiltinOperator_GATHER_ND,
+                                           BuiltinOperator_COS,
+                                           BuiltinOperator_WHERE,
+                                           BuiltinOperator_RANK,
+                                           BuiltinOperator_ELU,
+                                           BuiltinOperator_REVERSE_SEQUENCE,
+                                           BuiltinOperator_MATRIX_DIAG,
+                                           BuiltinOperator_QUANTIZE,
+                                           BuiltinOperator_MATRIX_SET_DIAG,
+                                           BuiltinOperator_ROUND,
+                                           BuiltinOperator_HARD_SWISH,
+                                           BuiltinOperator_IF,
+                                           BuiltinOperator_WHILE,
+                                           BuiltinOperator_NON_MAX_SUPPRESSION_V4,
+                                           BuiltinOperator_NON_MAX_SUPPRESSION_V5,
+                                           BuiltinOperator_SCATTER_ND,
+                                           BuiltinOperator_SELECT_V2,
+                                           BuiltinOperator_DENSIFY,
+                                           BuiltinOperator_SEGMENT_SUM,
+                                           BuiltinOperator_BATCH_MATMUL,
+                                           BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES,
+                                           BuiltinOperator_CUMSUM,
+                                           BuiltinOperator_CALL_ONCE,
+                                           BuiltinOperator_BROADCAST_TO,
+                                           BuiltinOperator_RFFT2D,
+                                           BuiltinOperator_CONV_3D,
+                                           BuiltinOperator_IMAG,
+                                           BuiltinOperator_REAL,
+                                           BuiltinOperator_COMPLEX_ABS,
+                                           BuiltinOperator_HASHTABLE,
+                                           BuiltinOperator_HASHTABLE_FIND,
+                                           BuiltinOperator_HASHTABLE_IMPORT,
+                                           BuiltinOperator_HASHTABLE_SIZE,
+                                           BuiltinOperator_REDUCE_ALL,
+                                           BuiltinOperator_CONV_3D_TRANSPOSE,
+                                           BuiltinOperator_VAR_HANDLE,
+                                           BuiltinOperator_READ_VARIABLE,
+                                           BuiltinOperator_ASSIGN_VARIABLE,
+                                           BuiltinOperator_BROADCAST_ARGS,
+                                           BuiltinOperator_RANDOM_STANDARD_NORMAL};
+  return values;
+}
+
+inline const char *const *EnumNamesBuiltinOperator()
+{
+  static const char *const names[152] = {"BCQ_GATHER",
+                                         "BCQ_FULLY_CONNECTED",
+                                         "INSTANCE_NORM",
+                                         "",
+                                         "ADD",
+                                         "AVERAGE_POOL_2D",
+                                         "CONCATENATION",
+                                         "CONV_2D",
+                                         "DEPTHWISE_CONV_2D",
+                                         "DEPTH_TO_SPACE",
+                                         "DEQUANTIZE",
+                                         "EMBEDDING_LOOKUP",
+                                         "FLOOR",
+                                         "FULLY_CONNECTED",
+                                         "HASHTABLE_LOOKUP",
+                                         "L2_NORMALIZATION",
+                                         "L2_POOL_2D",
+                                         "LOCAL_RESPONSE_NORMALIZATION",
+                                         "LOGISTIC",
+                                         "LSH_PROJECTION",
+                                         "LSTM",
+                                         "MAX_POOL_2D",
+                                         "MUL",
+                                         "RELU",
+                                         "RELU_N1_TO_1",
+                                         "RELU6",
+                                         "RESHAPE",
+                                         "RESIZE_BILINEAR",
+                                         "RNN",
+                                         "SOFTMAX",
+                                         "SPACE_TO_DEPTH",
+                                         "SVDF",
+                                         "TANH",
+                                         "CONCAT_EMBEDDINGS",
+                                         "SKIP_GRAM",
+                                         "CALL",
+                                         "CUSTOM",
+                                         "EMBEDDING_LOOKUP_SPARSE",
+                                         "PAD",
+                                         "UNIDIRECTIONAL_SEQUENCE_RNN",
+                                         "GATHER",
+                                         "BATCH_TO_SPACE_ND",
+                                         "SPACE_TO_BATCH_ND",
+                                         "TRANSPOSE",
+                                         "MEAN",
+                                         "SUB",
+                                         "DIV",
+                                         "SQUEEZE",
+                                         "UNIDIRECTIONAL_SEQUENCE_LSTM",
+                                         "STRIDED_SLICE",
+                                         "BIDIRECTIONAL_SEQUENCE_RNN",
+                                         "EXP",
+                                         "TOPK_V2",
+                                         "SPLIT",
+                                         "LOG_SOFTMAX",
+                                         "DELEGATE",
+                                         "BIDIRECTIONAL_SEQUENCE_LSTM",
+                                         "CAST",
+                                         "PRELU",
+                                         "MAXIMUM",
+                                         "ARG_MAX",
+                                         "MINIMUM",
+                                         "LESS",
+                                         "NEG",
+                                         "PADV2",
+                                         "GREATER",
+                                         "GREATER_EQUAL",
+                                         "LESS_EQUAL",
+                                         "SELECT",
+                                         "SLICE",
+                                         "SIN",
+                                         "TRANSPOSE_CONV",
+                                         "SPARSE_TO_DENSE",
+                                         "TILE",
+                                         "EXPAND_DIMS",
+                                         "EQUAL",
+                                         "NOT_EQUAL",
+                                         "LOG",
+                                         "SUM",
+                                         "SQRT",
+                                         "RSQRT",
+                                         "SHAPE",
+                                         "POW",
+                                         "ARG_MIN",
+                                         "FAKE_QUANT",
+                                         "REDUCE_PROD",
+                                         "REDUCE_MAX",
+                                         "PACK",
+                                         "LOGICAL_OR",
+                                         "ONE_HOT",
+                                         "LOGICAL_AND",
+                                         "LOGICAL_NOT",
+                                         "UNPACK",
+                                         "REDUCE_MIN",
+                                         "FLOOR_DIV",
+                                         "REDUCE_ANY",
+                                         "SQUARE",
+                                         "ZEROS_LIKE",
+                                         "FILL",
+                                         "FLOOR_MOD",
+                                         "RANGE",
+                                         "RESIZE_NEAREST_NEIGHBOR",
+                                         "LEAKY_RELU",
+                                         "SQUARED_DIFFERENCE",
+                                         "MIRROR_PAD",
+                                         "ABS",
+                                         "SPLIT_V",
+                                         "UNIQUE",
+                                         "CEIL",
+                                         "REVERSE_V2",
+                                         "ADD_N",
+                                         "GATHER_ND",
+                                         "COS",
+                                         "WHERE",
+                                         "RANK",
+                                         "ELU",
+                                         "REVERSE_SEQUENCE",
+                                         "MATRIX_DIAG",
+                                         "QUANTIZE",
+                                         "MATRIX_SET_DIAG",
+                                         "ROUND",
+                                         "HARD_SWISH",
+                                         "IF",
+                                         "WHILE",
+                                         "NON_MAX_SUPPRESSION_V4",
+                                         "NON_MAX_SUPPRESSION_V5",
+                                         "SCATTER_ND",
+                                         "SELECT_V2",
+                                         "DENSIFY",
+                                         "SEGMENT_SUM",
+                                         "BATCH_MATMUL",
+                                         "PLACEHOLDER_FOR_GREATER_OP_CODES",
+                                         "CUMSUM",
+                                         "CALL_ONCE",
+                                         "BROADCAST_TO",
+                                         "RFFT2D",
+                                         "CONV_3D",
+                                         "IMAG",
+                                         "REAL",
+                                         "COMPLEX_ABS",
+                                         "HASHTABLE",
+                                         "HASHTABLE_FIND",
+                                         "HASHTABLE_IMPORT",
+                                         "HASHTABLE_SIZE",
+                                         "REDUCE_ALL",
+                                         "CONV_3D_TRANSPOSE",
+                                         "VAR_HANDLE",
+                                         "READ_VARIABLE",
+                                         "ASSIGN_VARIABLE",
+                                         "BROADCAST_ARGS",
+                                         "RANDOM_STANDARD_NORMAL",
+                                         nullptr};
+  return names;
+}
+
+inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
+{
+  if (flatbuffers::IsOutRange(e, BuiltinOperator_BCQ_GATHER,
+                              BuiltinOperator_RANDOM_STANDARD_NORMAL))
+    return "";
+  const size_t index = static_cast<size_t>(e) - static_cast<size_t>(BuiltinOperator_BCQ_GATHER);
+  return EnumNamesBuiltinOperator()[index];
+}
+
+enum BuiltinOptions : uint8_t
+{
+  BuiltinOptions_NONE = 0,
+  BuiltinOptions_Conv2DOptions = 1,
+  BuiltinOptions_DepthwiseConv2DOptions = 2,
+  BuiltinOptions_ConcatEmbeddingsOptions = 3,
+  BuiltinOptions_LSHProjectionOptions = 4,
+  BuiltinOptions_Pool2DOptions = 5,
+  BuiltinOptions_SVDFOptions = 6,
+  BuiltinOptions_RNNOptions = 7,
+  BuiltinOptions_FullyConnectedOptions = 8,
+  BuiltinOptions_SoftmaxOptions = 9,
+  BuiltinOptions_ConcatenationOptions = 10,
+  BuiltinOptions_AddOptions = 11,
+  BuiltinOptions_L2NormOptions = 12,
+  BuiltinOptions_LocalResponseNormalizationOptions = 13,
+  BuiltinOptions_LSTMOptions = 14,
+  BuiltinOptions_ResizeBilinearOptions = 15,
+  BuiltinOptions_CallOptions = 16,
+  BuiltinOptions_ReshapeOptions = 17,
+  BuiltinOptions_SkipGramOptions = 18,
+  BuiltinOptions_SpaceToDepthOptions = 19,
+  BuiltinOptions_EmbeddingLookupSparseOptions = 20,
+  BuiltinOptions_MulOptions = 21,
+  BuiltinOptions_PadOptions = 22,
+  BuiltinOptions_GatherOptions = 23,
+  BuiltinOptions_BatchToSpaceNDOptions = 24,
+  BuiltinOptions_SpaceToBatchNDOptions = 25,
+  BuiltinOptions_TransposeOptions = 26,
+  BuiltinOptions_ReducerOptions = 27,
+  BuiltinOptions_SubOptions = 28,
+  BuiltinOptions_DivOptions = 29,
+  BuiltinOptions_SqueezeOptions = 30,
+  BuiltinOptions_SequenceRNNOptions = 31,
+  BuiltinOptions_StridedSliceOptions = 32,
+  BuiltinOptions_ExpOptions = 33,
+  BuiltinOptions_TopKV2Options = 34,
+  BuiltinOptions_SplitOptions = 35,
+  BuiltinOptions_LogSoftmaxOptions = 36,
+  BuiltinOptions_CastOptions = 37,
+  BuiltinOptions_DequantizeOptions = 38,
+  BuiltinOptions_MaximumMinimumOptions = 39,
+  BuiltinOptions_ArgMaxOptions = 40,
+  BuiltinOptions_LessOptions = 41,
+  BuiltinOptions_NegOptions = 42,
+  BuiltinOptions_PadV2Options = 43,
+  BuiltinOptions_GreaterOptions = 44,
+  BuiltinOptions_GreaterEqualOptions = 45,
+  BuiltinOptions_LessEqualOptions = 46,
+  BuiltinOptions_SelectOptions = 47,
+  BuiltinOptions_SliceOptions = 48,
+  BuiltinOptions_TransposeConvOptions = 49,
+  BuiltinOptions_SparseToDenseOptions = 50,
+  BuiltinOptions_TileOptions = 51,
+  BuiltinOptions_ExpandDimsOptions = 52,
+  BuiltinOptions_EqualOptions = 53,
+  BuiltinOptions_NotEqualOptions = 54,
+  BuiltinOptions_ShapeOptions = 55,
+  BuiltinOptions_PowOptions = 56,
+  BuiltinOptions_ArgMinOptions = 57,
+  BuiltinOptions_FakeQuantOptions = 58,
+  BuiltinOptions_PackOptions = 59,
+  BuiltinOptions_LogicalOrOptions = 60,
+  BuiltinOptions_OneHotOptions = 61,
+  BuiltinOptions_LogicalAndOptions = 62,
+  BuiltinOptions_LogicalNotOptions = 63,
+  BuiltinOptions_UnpackOptions = 64,
+  BuiltinOptions_FloorDivOptions = 65,
+  BuiltinOptions_SquareOptions = 66,
+  BuiltinOptions_ZerosLikeOptions = 67,
+  BuiltinOptions_FillOptions = 68,
+  BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
+  BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
+  BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
+  BuiltinOptions_FloorModOptions = 72,
+  BuiltinOptions_RangeOptions = 73,
+  BuiltinOptions_ResizeNearestNeighborOptions = 74,
+  BuiltinOptions_LeakyReluOptions = 75,
+  BuiltinOptions_SquaredDifferenceOptions = 76,
+  BuiltinOptions_MirrorPadOptions = 77,
+  BuiltinOptions_AbsOptions = 78,
+  BuiltinOptions_SplitVOptions = 79,
+  BuiltinOptions_UniqueOptions = 80,
+  BuiltinOptions_ReverseV2Options = 81,
+  BuiltinOptions_AddNOptions = 82,
+  BuiltinOptions_GatherNdOptions = 83,
+  BuiltinOptions_CosOptions = 84,
+  BuiltinOptions_WhereOptions = 85,
+  BuiltinOptions_RankOptions = 86,
+  BuiltinOptions_ReverseSequenceOptions = 87,
+  BuiltinOptions_MatrixDiagOptions = 88,
+  BuiltinOptions_QuantizeOptions = 89,
+  BuiltinOptions_MatrixSetDiagOptions = 90,
+  BuiltinOptions_HardSwishOptions = 91,
+  BuiltinOptions_IfOptions = 92,
+  BuiltinOptions_WhileOptions = 93,
+  BuiltinOptions_DepthToSpaceOptions = 94,
+  BuiltinOptions_NonMaxSuppressionV4Options = 95,
+  BuiltinOptions_NonMaxSuppressionV5Options = 96,
+  BuiltinOptions_ScatterNdOptions = 97,
+  BuiltinOptions_SelectV2Options = 98,
+  BuiltinOptions_DensifyOptions = 99,
+  BuiltinOptions_SegmentSumOptions = 100,
+  BuiltinOptions_BatchMatMulOptions = 101,
+  BuiltinOptions_CumsumOptions = 102,
+  BuiltinOptions_CallOnceOptions = 103,
+  BuiltinOptions_BroadcastToOptions = 104,
+  BuiltinOptions_Rfft2dOptions = 105,
+  BuiltinOptions_Conv3DOptions = 106,
+  BuiltinOptions_HashtableOptions = 107,
+  BuiltinOptions_HashtableFindOptions = 108,
+  BuiltinOptions_HashtableImportOptions = 109,
+  BuiltinOptions_HashtableSizeOptions = 110,
+  BuiltinOptions_VarHandleOptions = 111,
+  BuiltinOptions_ReadVariableOptions = 112,
+  BuiltinOptions_AssignVariableOptions = 113,
+  BuiltinOptions_RandomOptions = 114,
+  BuiltinOptions_BCQGatherOptions = 252,
+  BuiltinOptions_BCQFullyConnectedOptions = 253,
+  BuiltinOptions_InstanceNormOptions = 254,
+  BuiltinOptions_MIN = BuiltinOptions_NONE,
+  BuiltinOptions_MAX = BuiltinOptions_InstanceNormOptions
+};
+
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[118]
+{
+  static const BuiltinOptions values[] = {BuiltinOptions_NONE,
+                                          BuiltinOptions_Conv2DOptions,
+                                          BuiltinOptions_DepthwiseConv2DOptions,
+                                          BuiltinOptions_ConcatEmbeddingsOptions,
+                                          BuiltinOptions_LSHProjectionOptions,
+                                          BuiltinOptions_Pool2DOptions,
+                                          BuiltinOptions_SVDFOptions,
+                                          BuiltinOptions_RNNOptions,
+                                          BuiltinOptions_FullyConnectedOptions,
+                                          BuiltinOptions_SoftmaxOptions,
+                                          BuiltinOptions_ConcatenationOptions,
+                                          BuiltinOptions_AddOptions,
+                                          BuiltinOptions_L2NormOptions,
+                                          BuiltinOptions_LocalResponseNormalizationOptions,
+                                          BuiltinOptions_LSTMOptions,
+                                          BuiltinOptions_ResizeBilinearOptions,
+                                          BuiltinOptions_CallOptions,
+                                          BuiltinOptions_ReshapeOptions,
+                                          BuiltinOptions_SkipGramOptions,
+                                          BuiltinOptions_SpaceToDepthOptions,
+                                          BuiltinOptions_EmbeddingLookupSparseOptions,
+                                          BuiltinOptions_MulOptions,
+                                          BuiltinOptions_PadOptions,
+                                          BuiltinOptions_GatherOptions,
+                                          BuiltinOptions_BatchToSpaceNDOptions,
+                                          BuiltinOptions_SpaceToBatchNDOptions,
+                                          BuiltinOptions_TransposeOptions,
+                                          BuiltinOptions_ReducerOptions,
+                                          BuiltinOptions_SubOptions,
+                                          BuiltinOptions_DivOptions,
+                                          BuiltinOptions_SqueezeOptions,
+                                          BuiltinOptions_SequenceRNNOptions,
+                                          BuiltinOptions_StridedSliceOptions,
+                                          BuiltinOptions_ExpOptions,
+                                          BuiltinOptions_TopKV2Options,
+                                          BuiltinOptions_SplitOptions,
+                                          BuiltinOptions_LogSoftmaxOptions,
+                                          BuiltinOptions_CastOptions,
+                                          BuiltinOptions_DequantizeOptions,
+                                          BuiltinOptions_MaximumMinimumOptions,
+                                          BuiltinOptions_ArgMaxOptions,
+                                          BuiltinOptions_LessOptions,
+                                          BuiltinOptions_NegOptions,
+                                          BuiltinOptions_PadV2Options,
+                                          BuiltinOptions_GreaterOptions,
+                                          BuiltinOptions_GreaterEqualOptions,
+                                          BuiltinOptions_LessEqualOptions,
+                                          BuiltinOptions_SelectOptions,
+                                          BuiltinOptions_SliceOptions,
+                                          BuiltinOptions_TransposeConvOptions,
+                                          BuiltinOptions_SparseToDenseOptions,
+                                          BuiltinOptions_TileOptions,
+                                          BuiltinOptions_ExpandDimsOptions,
+                                          BuiltinOptions_EqualOptions,
+                                          BuiltinOptions_NotEqualOptions,
+                                          BuiltinOptions_ShapeOptions,
+                                          BuiltinOptions_PowOptions,
+                                          BuiltinOptions_ArgMinOptions,
+                                          BuiltinOptions_FakeQuantOptions,
+                                          BuiltinOptions_PackOptions,
+                                          BuiltinOptions_LogicalOrOptions,
+                                          BuiltinOptions_OneHotOptions,
+                                          BuiltinOptions_LogicalAndOptions,
+                                          BuiltinOptions_LogicalNotOptions,
+                                          BuiltinOptions_UnpackOptions,
+                                          BuiltinOptions_FloorDivOptions,
+                                          BuiltinOptions_SquareOptions,
+                                          BuiltinOptions_ZerosLikeOptions,
+                                          BuiltinOptions_FillOptions,
+                                          BuiltinOptions_BidirectionalSequenceLSTMOptions,
+                                          BuiltinOptions_BidirectionalSequenceRNNOptions,
+                                          BuiltinOptions_UnidirectionalSequenceLSTMOptions,
+                                          BuiltinOptions_FloorModOptions,
+                                          BuiltinOptions_RangeOptions,
+                                          BuiltinOptions_ResizeNearestNeighborOptions,
+                                          BuiltinOptions_LeakyReluOptions,
+                                          BuiltinOptions_SquaredDifferenceOptions,
+                                          BuiltinOptions_MirrorPadOptions,
+                                          BuiltinOptions_AbsOptions,
+                                          BuiltinOptions_SplitVOptions,
+                                          BuiltinOptions_UniqueOptions,
+                                          BuiltinOptions_ReverseV2Options,
+                                          BuiltinOptions_AddNOptions,
+                                          BuiltinOptions_GatherNdOptions,
+                                          BuiltinOptions_CosOptions,
+                                          BuiltinOptions_WhereOptions,
+                                          BuiltinOptions_RankOptions,
+                                          BuiltinOptions_ReverseSequenceOptions,
+                                          BuiltinOptions_MatrixDiagOptions,
+                                          BuiltinOptions_QuantizeOptions,
+                                          BuiltinOptions_MatrixSetDiagOptions,
+                                          BuiltinOptions_HardSwishOptions,
+                                          BuiltinOptions_IfOptions,
+                                          BuiltinOptions_WhileOptions,
+                                          BuiltinOptions_DepthToSpaceOptions,
+                                          BuiltinOptions_NonMaxSuppressionV4Options,
+                                          BuiltinOptions_NonMaxSuppressionV5Options,
+                                          BuiltinOptions_ScatterNdOptions,
+                                          BuiltinOptions_SelectV2Options,
+                                          BuiltinOptions_DensifyOptions,
+                                          BuiltinOptions_SegmentSumOptions,
+                                          BuiltinOptions_BatchMatMulOptions,
+                                          BuiltinOptions_CumsumOptions,
+                                          BuiltinOptions_CallOnceOptions,
+                                          BuiltinOptions_BroadcastToOptions,
+                                          BuiltinOptions_Rfft2dOptions,
+                                          BuiltinOptions_Conv3DOptions,
+                                          BuiltinOptions_HashtableOptions,
+                                          BuiltinOptions_HashtableFindOptions,
+                                          BuiltinOptions_HashtableImportOptions,
+                                          BuiltinOptions_HashtableSizeOptions,
+                                          BuiltinOptions_VarHandleOptions,
+                                          BuiltinOptions_ReadVariableOptions,
+                                          BuiltinOptions_AssignVariableOptions,
+                                          BuiltinOptions_RandomOptions,
+                                          BuiltinOptions_BCQGatherOptions,
+                                          BuiltinOptions_BCQFullyConnectedOptions,
+                                          BuiltinOptions_InstanceNormOptions};
+  return values;
+}
+
+inline const char *const *EnumNamesBuiltinOptions()
+{
+  static const char *const names[256] = {"NONE",
+                                         "Conv2DOptions",
+                                         "DepthwiseConv2DOptions",
+                                         "ConcatEmbeddingsOptions",
+                                         "LSHProjectionOptions",
+                                         "Pool2DOptions",
+                                         "SVDFOptions",
+                                         "RNNOptions",
+                                         "FullyConnectedOptions",
+                                         "SoftmaxOptions",
+                                         "ConcatenationOptions",
+                                         "AddOptions",
+                                         "L2NormOptions",
+                                         "LocalResponseNormalizationOptions",
+                                         "LSTMOptions",
+                                         "ResizeBilinearOptions",
+                                         "CallOptions",
+                                         "ReshapeOptions",
+                                         "SkipGramOptions",
+                                         "SpaceToDepthOptions",
+                                         "EmbeddingLookupSparseOptions",
+                                         "MulOptions",
+                                         "PadOptions",
+                                         "GatherOptions",
+                                         "BatchToSpaceNDOptions",
+                                         "SpaceToBatchNDOptions",
+                                         "TransposeOptions",
+                                         "ReducerOptions",
+                                         "SubOptions",
+                                         "DivOptions",
+                                         "SqueezeOptions",
+                                         "SequenceRNNOptions",
+                                         "StridedSliceOptions",
+                                         "ExpOptions",
+                                         "TopKV2Options",
+                                         "SplitOptions",
+                                         "LogSoftmaxOptions",
+                                         "CastOptions",
+                                         "DequantizeOptions",
+                                         "MaximumMinimumOptions",
+                                         "ArgMaxOptions",
+                                         "LessOptions",
+                                         "NegOptions",
+                                         "PadV2Options",
+                                         "GreaterOptions",
+                                         "GreaterEqualOptions",
+                                         "LessEqualOptions",
+                                         "SelectOptions",
+                                         "SliceOptions",
+                                         "TransposeConvOptions",
+                                         "SparseToDenseOptions",
+                                         "TileOptions",
+                                         "ExpandDimsOptions",
+                                         "EqualOptions",
+                                         "NotEqualOptions",
+                                         "ShapeOptions",
+                                         "PowOptions",
+                                         "ArgMinOptions",
+                                         "FakeQuantOptions",
+                                         "PackOptions",
+                                         "LogicalOrOptions",
+                                         "OneHotOptions",
+                                         "LogicalAndOptions",
+                                         "LogicalNotOptions",
+                                         "UnpackOptions",
+                                         "FloorDivOptions",
+                                         "SquareOptions",
+                                         "ZerosLikeOptions",
+                                         "FillOptions",
+                                         "BidirectionalSequenceLSTMOptions",
+                                         "BidirectionalSequenceRNNOptions",
+                                         "UnidirectionalSequenceLSTMOptions",
+                                         "FloorModOptions",
+                                         "RangeOptions",
+                                         "ResizeNearestNeighborOptions",
+                                         "LeakyReluOptions",
+                                         "SquaredDifferenceOptions",
+                                         "MirrorPadOptions",
+                                         "AbsOptions",
+                                         "SplitVOptions",
+                                         "UniqueOptions",
+                                         "ReverseV2Options",
+                                         "AddNOptions",
+                                         "GatherNdOptions",
+                                         "CosOptions",
+                                         "WhereOptions",
+                                         "RankOptions",
+                                         "ReverseSequenceOptions",
+                                         "MatrixDiagOptions",
+                                         "QuantizeOptions",
+                                         "MatrixSetDiagOptions",
+                                         "HardSwishOptions",
+                                         "IfOptions",
+                                         "WhileOptions",
+                                         "DepthToSpaceOptions",
+                                         "NonMaxSuppressionV4Options",
+                                         "NonMaxSuppressionV5Options",
+                                         "ScatterNdOptions",
+                                         "SelectV2Options",
+                                         "DensifyOptions",
+                                         "SegmentSumOptions",
+                                         "BatchMatMulOptions",
+                                         "CumsumOptions",
+                                         "CallOnceOptions",
+                                         "BroadcastToOptions",
+                                         "Rfft2dOptions",
+                                         "Conv3DOptions",
+                                         "HashtableOptions",
+                                         "HashtableFindOptions",
+                                         "HashtableImportOptions",
+                                         "HashtableSizeOptions",
+                                         "VarHandleOptions",
+                                         "ReadVariableOptions",
+                                         "AssignVariableOptions",
+                                         "RandomOptions",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "BCQGatherOptions",
+                                         "BCQFullyConnectedOptions",
+                                         "InstanceNormOptions",
+                                         nullptr};
+  return names;
+}
+
+inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
+{
+  if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_InstanceNormOptions))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesBuiltinOptions()[index];
+}
+
+template <typename T> struct BuiltinOptionsTraits
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_NONE;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Conv2DOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::DepthwiseConv2DOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ConcatEmbeddingsOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LSHProjectionOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Pool2DOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SVDFOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::RNNOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::FullyConnectedOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SoftmaxOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ConcatenationOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::AddOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::L2NormOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LocalResponseNormalizationOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LSTMOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ResizeBilinearOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::CallOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ReshapeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SkipGramOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SpaceToDepthOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::EmbeddingLookupSparseOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::MulOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::PadOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::GatherOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BatchToSpaceNDOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SpaceToBatchNDOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::TransposeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ReducerOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SubOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::DivOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SqueezeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SequenceRNNOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::StridedSliceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ExpOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::TopKV2Options>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SplitOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LogSoftmaxOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::CastOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::DequantizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::MaximumMinimumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ArgMaxOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LessOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::NegOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::PadV2Options>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<circle::GreaterOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::GreaterEqualOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LessEqualOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SelectOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SliceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::TransposeConvOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SparseToDenseOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::TileOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ExpandDimsOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::EqualOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::NotEqualOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ShapeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::PowOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ArgMinOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::FakeQuantOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::PackOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LogicalOrOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::OneHotOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LogicalAndOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LogicalNotOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::UnpackOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::FloorDivOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SquareOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ZerosLikeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::FillOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BidirectionalSequenceLSTMOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BidirectionalSequenceRNNOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::UnidirectionalSequenceLSTMOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::FloorModOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::RangeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ResizeNearestNeighborOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::LeakyReluOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SquaredDifferenceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::MirrorPadOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::AbsOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SplitVOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::UniqueOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ReverseV2Options>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<circle::AddNOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::GatherNdOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::CosOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::WhereOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::RankOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ReverseSequenceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::MatrixDiagOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::QuantizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::MatrixSetDiagOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HardSwishOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::IfOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_IfOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::WhileOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::DepthToSpaceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::NonMaxSuppressionV4Options>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options;
+};
+
+template <> struct BuiltinOptionsTraits<circle::NonMaxSuppressionV5Options>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ScatterNdOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SelectV2Options>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
+};
+
+template <> struct BuiltinOptionsTraits<circle::DensifyOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SegmentSumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BatchMatMulOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::CumsumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::CallOnceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BroadcastToOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Rfft2dOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Conv3DOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableFindOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableImportOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableSizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::VarHandleOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ReadVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::AssignVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::RandomOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BCQGatherOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BCQGatherOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BCQFullyConnectedOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BCQFullyConnectedOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::InstanceNormOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_InstanceNormOptions;
+};
+
+struct BuiltinOptionsUnion
+{
+  BuiltinOptions type;
+  void *value;
+
+  BuiltinOptionsUnion() : type(BuiltinOptions_NONE), value(nullptr) {}
+  BuiltinOptionsUnion(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT : type(BuiltinOptions_NONE),
+                                                                      value(nullptr)
+  {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+  }
+  BuiltinOptionsUnion(const BuiltinOptionsUnion &);
+  BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u)
+  {
+    BuiltinOptionsUnion t(u);
+    std::swap(type, t.type);
+    std::swap(value, t.value);
+    return *this;
+  }
+  BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT
+  {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+    return *this;
+  }
+  ~BuiltinOptionsUnion() { Reset(); }
+
+  void Reset();
+
+#ifndef FLATBUFFERS_CPP98_STL
+  template <typename T> void Set(T &&val)
+  {
+    using RT = typename std::remove_reference<T>::type;
+    Reset();
+    type = BuiltinOptionsTraits<typename RT::TableType>::enum_value;
+    if (type != BuiltinOptions_NONE)
+    {
+      value = new RT(std::forward<T>(val));
+    }
+  }
+#endif // FLATBUFFERS_CPP98_STL
+
+  static void *UnPack(const void *obj, BuiltinOptions type,
+                      const flatbuffers::resolver_function_t *resolver);
+  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+
+  circle::Conv2DOptionsT *AsConv2DOptions()
+  {
+    return type == BuiltinOptions_Conv2DOptions ? reinterpret_cast<circle::Conv2DOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::Conv2DOptionsT *AsConv2DOptions() const
+  {
+    return type == BuiltinOptions_Conv2DOptions
+             ? reinterpret_cast<const circle::Conv2DOptionsT *>(value)
+             : nullptr;
+  }
+  circle::DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions()
+  {
+    return type == BuiltinOptions_DepthwiseConv2DOptions
+             ? reinterpret_cast<circle::DepthwiseConv2DOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() const
+  {
+    return type == BuiltinOptions_DepthwiseConv2DOptions
+             ? reinterpret_cast<const circle::DepthwiseConv2DOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions()
+  {
+    return type == BuiltinOptions_ConcatEmbeddingsOptions
+             ? reinterpret_cast<circle::ConcatEmbeddingsOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() const
+  {
+    return type == BuiltinOptions_ConcatEmbeddingsOptions
+             ? reinterpret_cast<const circle::ConcatEmbeddingsOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LSHProjectionOptionsT *AsLSHProjectionOptions()
+  {
+    return type == BuiltinOptions_LSHProjectionOptions
+             ? reinterpret_cast<circle::LSHProjectionOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::LSHProjectionOptionsT *AsLSHProjectionOptions() const
+  {
+    return type == BuiltinOptions_LSHProjectionOptions
+             ? reinterpret_cast<const circle::LSHProjectionOptionsT *>(value)
+             : nullptr;
+  }
+  circle::Pool2DOptionsT *AsPool2DOptions()
+  {
+    return type == BuiltinOptions_Pool2DOptions ? reinterpret_cast<circle::Pool2DOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::Pool2DOptionsT *AsPool2DOptions() const
+  {
+    return type == BuiltinOptions_Pool2DOptions
+             ? reinterpret_cast<const circle::Pool2DOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SVDFOptionsT *AsSVDFOptions()
+  {
+    return type == BuiltinOptions_SVDFOptions ? reinterpret_cast<circle::SVDFOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::SVDFOptionsT *AsSVDFOptions() const
+  {
+    return type == BuiltinOptions_SVDFOptions
+             ? reinterpret_cast<const circle::SVDFOptionsT *>(value)
+             : nullptr;
+  }
+  circle::RNNOptionsT *AsRNNOptions()
+  {
+    return type == BuiltinOptions_RNNOptions ? reinterpret_cast<circle::RNNOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::RNNOptionsT *AsRNNOptions() const
+  {
+    return type == BuiltinOptions_RNNOptions ? reinterpret_cast<const circle::RNNOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::FullyConnectedOptionsT *AsFullyConnectedOptions()
+  {
+    return type == BuiltinOptions_FullyConnectedOptions
+             ? reinterpret_cast<circle::FullyConnectedOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::FullyConnectedOptionsT *AsFullyConnectedOptions() const
+  {
+    return type == BuiltinOptions_FullyConnectedOptions
+             ? reinterpret_cast<const circle::FullyConnectedOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SoftmaxOptionsT *AsSoftmaxOptions()
+  {
+    return type == BuiltinOptions_SoftmaxOptions
+             ? reinterpret_cast<circle::SoftmaxOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SoftmaxOptionsT *AsSoftmaxOptions() const
+  {
+    return type == BuiltinOptions_SoftmaxOptions
+             ? reinterpret_cast<const circle::SoftmaxOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ConcatenationOptionsT *AsConcatenationOptions()
+  {
+    return type == BuiltinOptions_ConcatenationOptions
+             ? reinterpret_cast<circle::ConcatenationOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ConcatenationOptionsT *AsConcatenationOptions() const
+  {
+    return type == BuiltinOptions_ConcatenationOptions
+             ? reinterpret_cast<const circle::ConcatenationOptionsT *>(value)
+             : nullptr;
+  }
+  circle::AddOptionsT *AsAddOptions()
+  {
+    return type == BuiltinOptions_AddOptions ? reinterpret_cast<circle::AddOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::AddOptionsT *AsAddOptions() const
+  {
+    return type == BuiltinOptions_AddOptions ? reinterpret_cast<const circle::AddOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::L2NormOptionsT *AsL2NormOptions()
+  {
+    return type == BuiltinOptions_L2NormOptions ? reinterpret_cast<circle::L2NormOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::L2NormOptionsT *AsL2NormOptions() const
+  {
+    return type == BuiltinOptions_L2NormOptions
+             ? reinterpret_cast<const circle::L2NormOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions()
+  {
+    return type == BuiltinOptions_LocalResponseNormalizationOptions
+             ? reinterpret_cast<circle::LocalResponseNormalizationOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() const
+  {
+    return type == BuiltinOptions_LocalResponseNormalizationOptions
+             ? reinterpret_cast<const circle::LocalResponseNormalizationOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LSTMOptionsT *AsLSTMOptions()
+  {
+    return type == BuiltinOptions_LSTMOptions ? reinterpret_cast<circle::LSTMOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::LSTMOptionsT *AsLSTMOptions() const
+  {
+    return type == BuiltinOptions_LSTMOptions
+             ? reinterpret_cast<const circle::LSTMOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ResizeBilinearOptionsT *AsResizeBilinearOptions()
+  {
+    return type == BuiltinOptions_ResizeBilinearOptions
+             ? reinterpret_cast<circle::ResizeBilinearOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ResizeBilinearOptionsT *AsResizeBilinearOptions() const
+  {
+    return type == BuiltinOptions_ResizeBilinearOptions
+             ? reinterpret_cast<const circle::ResizeBilinearOptionsT *>(value)
+             : nullptr;
+  }
+  circle::CallOptionsT *AsCallOptions()
+  {
+    return type == BuiltinOptions_CallOptions ? reinterpret_cast<circle::CallOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::CallOptionsT *AsCallOptions() const
+  {
+    return type == BuiltinOptions_CallOptions
+             ? reinterpret_cast<const circle::CallOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ReshapeOptionsT *AsReshapeOptions()
+  {
+    return type == BuiltinOptions_ReshapeOptions
+             ? reinterpret_cast<circle::ReshapeOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ReshapeOptionsT *AsReshapeOptions() const
+  {
+    return type == BuiltinOptions_ReshapeOptions
+             ? reinterpret_cast<const circle::ReshapeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SkipGramOptionsT *AsSkipGramOptions()
+  {
+    return type == BuiltinOptions_SkipGramOptions
+             ? reinterpret_cast<circle::SkipGramOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SkipGramOptionsT *AsSkipGramOptions() const
+  {
+    return type == BuiltinOptions_SkipGramOptions
+             ? reinterpret_cast<const circle::SkipGramOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SpaceToDepthOptionsT *AsSpaceToDepthOptions()
+  {
+    return type == BuiltinOptions_SpaceToDepthOptions
+             ? reinterpret_cast<circle::SpaceToDepthOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SpaceToDepthOptionsT *AsSpaceToDepthOptions() const
+  {
+    return type == BuiltinOptions_SpaceToDepthOptions
+             ? reinterpret_cast<const circle::SpaceToDepthOptionsT *>(value)
+             : nullptr;
+  }
+  circle::EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions()
+  {
+    return type == BuiltinOptions_EmbeddingLookupSparseOptions
+             ? reinterpret_cast<circle::EmbeddingLookupSparseOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() const
+  {
+    return type == BuiltinOptions_EmbeddingLookupSparseOptions
+             ? reinterpret_cast<const circle::EmbeddingLookupSparseOptionsT *>(value)
+             : nullptr;
+  }
+  circle::MulOptionsT *AsMulOptions()
+  {
+    return type == BuiltinOptions_MulOptions ? reinterpret_cast<circle::MulOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::MulOptionsT *AsMulOptions() const
+  {
+    return type == BuiltinOptions_MulOptions ? reinterpret_cast<const circle::MulOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::PadOptionsT *AsPadOptions()
+  {
+    return type == BuiltinOptions_PadOptions ? reinterpret_cast<circle::PadOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::PadOptionsT *AsPadOptions() const
+  {
+    return type == BuiltinOptions_PadOptions ? reinterpret_cast<const circle::PadOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::GatherOptionsT *AsGatherOptions()
+  {
+    return type == BuiltinOptions_GatherOptions ? reinterpret_cast<circle::GatherOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::GatherOptionsT *AsGatherOptions() const
+  {
+    return type == BuiltinOptions_GatherOptions
+             ? reinterpret_cast<const circle::GatherOptionsT *>(value)
+             : nullptr;
+  }
+  circle::BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions()
+  {
+    return type == BuiltinOptions_BatchToSpaceNDOptions
+             ? reinterpret_cast<circle::BatchToSpaceNDOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() const
+  {
+    return type == BuiltinOptions_BatchToSpaceNDOptions
+             ? reinterpret_cast<const circle::BatchToSpaceNDOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions()
+  {
+    return type == BuiltinOptions_SpaceToBatchNDOptions
+             ? reinterpret_cast<circle::SpaceToBatchNDOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() const
+  {
+    return type == BuiltinOptions_SpaceToBatchNDOptions
+             ? reinterpret_cast<const circle::SpaceToBatchNDOptionsT *>(value)
+             : nullptr;
+  }
+  circle::TransposeOptionsT *AsTransposeOptions()
+  {
+    return type == BuiltinOptions_TransposeOptions
+             ? reinterpret_cast<circle::TransposeOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::TransposeOptionsT *AsTransposeOptions() const
+  {
+    return type == BuiltinOptions_TransposeOptions
+             ? reinterpret_cast<const circle::TransposeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ReducerOptionsT *AsReducerOptions()
+  {
+    return type == BuiltinOptions_ReducerOptions
+             ? reinterpret_cast<circle::ReducerOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ReducerOptionsT *AsReducerOptions() const
+  {
+    return type == BuiltinOptions_ReducerOptions
+             ? reinterpret_cast<const circle::ReducerOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SubOptionsT *AsSubOptions()
+  {
+    return type == BuiltinOptions_SubOptions ? reinterpret_cast<circle::SubOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::SubOptionsT *AsSubOptions() const
+  {
+    return type == BuiltinOptions_SubOptions ? reinterpret_cast<const circle::SubOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::DivOptionsT *AsDivOptions()
+  {
+    return type == BuiltinOptions_DivOptions ? reinterpret_cast<circle::DivOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::DivOptionsT *AsDivOptions() const
+  {
+    return type == BuiltinOptions_DivOptions ? reinterpret_cast<const circle::DivOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::SqueezeOptionsT *AsSqueezeOptions()
+  {
+    return type == BuiltinOptions_SqueezeOptions
+             ? reinterpret_cast<circle::SqueezeOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SqueezeOptionsT *AsSqueezeOptions() const
+  {
+    return type == BuiltinOptions_SqueezeOptions
+             ? reinterpret_cast<const circle::SqueezeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SequenceRNNOptionsT *AsSequenceRNNOptions()
+  {
+    return type == BuiltinOptions_SequenceRNNOptions
+             ? reinterpret_cast<circle::SequenceRNNOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SequenceRNNOptionsT *AsSequenceRNNOptions() const
+  {
+    return type == BuiltinOptions_SequenceRNNOptions
+             ? reinterpret_cast<const circle::SequenceRNNOptionsT *>(value)
+             : nullptr;
+  }
+  circle::StridedSliceOptionsT *AsStridedSliceOptions()
+  {
+    return type == BuiltinOptions_StridedSliceOptions
+             ? reinterpret_cast<circle::StridedSliceOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::StridedSliceOptionsT *AsStridedSliceOptions() const
+  {
+    return type == BuiltinOptions_StridedSliceOptions
+             ? reinterpret_cast<const circle::StridedSliceOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ExpOptionsT *AsExpOptions()
+  {
+    return type == BuiltinOptions_ExpOptions ? reinterpret_cast<circle::ExpOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::ExpOptionsT *AsExpOptions() const
+  {
+    return type == BuiltinOptions_ExpOptions ? reinterpret_cast<const circle::ExpOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::TopKV2OptionsT *AsTopKV2Options()
+  {
+    return type == BuiltinOptions_TopKV2Options ? reinterpret_cast<circle::TopKV2OptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::TopKV2OptionsT *AsTopKV2Options() const
+  {
+    return type == BuiltinOptions_TopKV2Options
+             ? reinterpret_cast<const circle::TopKV2OptionsT *>(value)
+             : nullptr;
+  }
+  circle::SplitOptionsT *AsSplitOptions()
+  {
+    return type == BuiltinOptions_SplitOptions ? reinterpret_cast<circle::SplitOptionsT *>(value)
+                                               : nullptr;
+  }
+  const circle::SplitOptionsT *AsSplitOptions() const
+  {
+    return type == BuiltinOptions_SplitOptions
+             ? reinterpret_cast<const circle::SplitOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LogSoftmaxOptionsT *AsLogSoftmaxOptions()
+  {
+    return type == BuiltinOptions_LogSoftmaxOptions
+             ? reinterpret_cast<circle::LogSoftmaxOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::LogSoftmaxOptionsT *AsLogSoftmaxOptions() const
+  {
+    return type == BuiltinOptions_LogSoftmaxOptions
+             ? reinterpret_cast<const circle::LogSoftmaxOptionsT *>(value)
+             : nullptr;
+  }
+  circle::CastOptionsT *AsCastOptions()
+  {
+    return type == BuiltinOptions_CastOptions ? reinterpret_cast<circle::CastOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::CastOptionsT *AsCastOptions() const
+  {
+    return type == BuiltinOptions_CastOptions
+             ? reinterpret_cast<const circle::CastOptionsT *>(value)
+             : nullptr;
+  }
+  circle::DequantizeOptionsT *AsDequantizeOptions()
+  {
+    return type == BuiltinOptions_DequantizeOptions
+             ? reinterpret_cast<circle::DequantizeOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::DequantizeOptionsT *AsDequantizeOptions() const
+  {
+    return type == BuiltinOptions_DequantizeOptions
+             ? reinterpret_cast<const circle::DequantizeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::MaximumMinimumOptionsT *AsMaximumMinimumOptions()
+  {
+    return type == BuiltinOptions_MaximumMinimumOptions
+             ? reinterpret_cast<circle::MaximumMinimumOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::MaximumMinimumOptionsT *AsMaximumMinimumOptions() const
+  {
+    return type == BuiltinOptions_MaximumMinimumOptions
+             ? reinterpret_cast<const circle::MaximumMinimumOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ArgMaxOptionsT *AsArgMaxOptions()
+  {
+    return type == BuiltinOptions_ArgMaxOptions ? reinterpret_cast<circle::ArgMaxOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::ArgMaxOptionsT *AsArgMaxOptions() const
+  {
+    return type == BuiltinOptions_ArgMaxOptions
+             ? reinterpret_cast<const circle::ArgMaxOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LessOptionsT *AsLessOptions()
+  {
+    return type == BuiltinOptions_LessOptions ? reinterpret_cast<circle::LessOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::LessOptionsT *AsLessOptions() const
+  {
+    return type == BuiltinOptions_LessOptions
+             ? reinterpret_cast<const circle::LessOptionsT *>(value)
+             : nullptr;
+  }
+  circle::NegOptionsT *AsNegOptions()
+  {
+    return type == BuiltinOptions_NegOptions ? reinterpret_cast<circle::NegOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::NegOptionsT *AsNegOptions() const
+  {
+    return type == BuiltinOptions_NegOptions ? reinterpret_cast<const circle::NegOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::PadV2OptionsT *AsPadV2Options()
+  {
+    return type == BuiltinOptions_PadV2Options ? reinterpret_cast<circle::PadV2OptionsT *>(value)
+                                               : nullptr;
+  }
+  const circle::PadV2OptionsT *AsPadV2Options() const
+  {
+    return type == BuiltinOptions_PadV2Options
+             ? reinterpret_cast<const circle::PadV2OptionsT *>(value)
+             : nullptr;
+  }
+  circle::GreaterOptionsT *AsGreaterOptions()
+  {
+    return type == BuiltinOptions_GreaterOptions
+             ? reinterpret_cast<circle::GreaterOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::GreaterOptionsT *AsGreaterOptions() const
+  {
+    return type == BuiltinOptions_GreaterOptions
+             ? reinterpret_cast<const circle::GreaterOptionsT *>(value)
+             : nullptr;
+  }
+  circle::GreaterEqualOptionsT *AsGreaterEqualOptions()
+  {
+    return type == BuiltinOptions_GreaterEqualOptions
+             ? reinterpret_cast<circle::GreaterEqualOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::GreaterEqualOptionsT *AsGreaterEqualOptions() const
+  {
+    return type == BuiltinOptions_GreaterEqualOptions
+             ? reinterpret_cast<const circle::GreaterEqualOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LessEqualOptionsT *AsLessEqualOptions()
+  {
+    return type == BuiltinOptions_LessEqualOptions
+             ? reinterpret_cast<circle::LessEqualOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::LessEqualOptionsT *AsLessEqualOptions() const
+  {
+    return type == BuiltinOptions_LessEqualOptions
+             ? reinterpret_cast<const circle::LessEqualOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SelectOptionsT *AsSelectOptions()
+  {
+    return type == BuiltinOptions_SelectOptions ? reinterpret_cast<circle::SelectOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::SelectOptionsT *AsSelectOptions() const
+  {
+    return type == BuiltinOptions_SelectOptions
+             ? reinterpret_cast<const circle::SelectOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SliceOptionsT *AsSliceOptions()
+  {
+    return type == BuiltinOptions_SliceOptions ? reinterpret_cast<circle::SliceOptionsT *>(value)
+                                               : nullptr;
+  }
+  const circle::SliceOptionsT *AsSliceOptions() const
+  {
+    return type == BuiltinOptions_SliceOptions
+             ? reinterpret_cast<const circle::SliceOptionsT *>(value)
+             : nullptr;
+  }
+  circle::TransposeConvOptionsT *AsTransposeConvOptions()
+  {
+    return type == BuiltinOptions_TransposeConvOptions
+             ? reinterpret_cast<circle::TransposeConvOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::TransposeConvOptionsT *AsTransposeConvOptions() const
+  {
+    return type == BuiltinOptions_TransposeConvOptions
+             ? reinterpret_cast<const circle::TransposeConvOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SparseToDenseOptionsT *AsSparseToDenseOptions()
+  {
+    return type == BuiltinOptions_SparseToDenseOptions
+             ? reinterpret_cast<circle::SparseToDenseOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SparseToDenseOptionsT *AsSparseToDenseOptions() const
+  {
+    return type == BuiltinOptions_SparseToDenseOptions
+             ? reinterpret_cast<const circle::SparseToDenseOptionsT *>(value)
+             : nullptr;
+  }
+  circle::TileOptionsT *AsTileOptions()
+  {
+    return type == BuiltinOptions_TileOptions ? reinterpret_cast<circle::TileOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::TileOptionsT *AsTileOptions() const
+  {
+    return type == BuiltinOptions_TileOptions
+             ? reinterpret_cast<const circle::TileOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ExpandDimsOptionsT *AsExpandDimsOptions()
+  {
+    return type == BuiltinOptions_ExpandDimsOptions
+             ? reinterpret_cast<circle::ExpandDimsOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ExpandDimsOptionsT *AsExpandDimsOptions() const
+  {
+    return type == BuiltinOptions_ExpandDimsOptions
+             ? reinterpret_cast<const circle::ExpandDimsOptionsT *>(value)
+             : nullptr;
+  }
+  circle::EqualOptionsT *AsEqualOptions()
+  {
+    return type == BuiltinOptions_EqualOptions ? reinterpret_cast<circle::EqualOptionsT *>(value)
+                                               : nullptr;
+  }
+  const circle::EqualOptionsT *AsEqualOptions() const
+  {
+    return type == BuiltinOptions_EqualOptions
+             ? reinterpret_cast<const circle::EqualOptionsT *>(value)
+             : nullptr;
+  }
+  circle::NotEqualOptionsT *AsNotEqualOptions()
+  {
+    return type == BuiltinOptions_NotEqualOptions
+             ? reinterpret_cast<circle::NotEqualOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::NotEqualOptionsT *AsNotEqualOptions() const
+  {
+    return type == BuiltinOptions_NotEqualOptions
+             ? reinterpret_cast<const circle::NotEqualOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ShapeOptionsT *AsShapeOptions()
+  {
+    return type == BuiltinOptions_ShapeOptions ? reinterpret_cast<circle::ShapeOptionsT *>(value)
+                                               : nullptr;
+  }
+  const circle::ShapeOptionsT *AsShapeOptions() const
+  {
+    return type == BuiltinOptions_ShapeOptions
+             ? reinterpret_cast<const circle::ShapeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::PowOptionsT *AsPowOptions()
+  {
+    return type == BuiltinOptions_PowOptions ? reinterpret_cast<circle::PowOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::PowOptionsT *AsPowOptions() const
+  {
+    return type == BuiltinOptions_PowOptions ? reinterpret_cast<const circle::PowOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::ArgMinOptionsT *AsArgMinOptions()
+  {
+    return type == BuiltinOptions_ArgMinOptions ? reinterpret_cast<circle::ArgMinOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::ArgMinOptionsT *AsArgMinOptions() const
+  {
+    return type == BuiltinOptions_ArgMinOptions
+             ? reinterpret_cast<const circle::ArgMinOptionsT *>(value)
+             : nullptr;
+  }
+  circle::FakeQuantOptionsT *AsFakeQuantOptions()
+  {
+    return type == BuiltinOptions_FakeQuantOptions
+             ? reinterpret_cast<circle::FakeQuantOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::FakeQuantOptionsT *AsFakeQuantOptions() const
+  {
+    return type == BuiltinOptions_FakeQuantOptions
+             ? reinterpret_cast<const circle::FakeQuantOptionsT *>(value)
+             : nullptr;
+  }
+  circle::PackOptionsT *AsPackOptions()
+  {
+    return type == BuiltinOptions_PackOptions ? reinterpret_cast<circle::PackOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::PackOptionsT *AsPackOptions() const
+  {
+    return type == BuiltinOptions_PackOptions
+             ? reinterpret_cast<const circle::PackOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LogicalOrOptionsT *AsLogicalOrOptions()
+  {
+    return type == BuiltinOptions_LogicalOrOptions
+             ? reinterpret_cast<circle::LogicalOrOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::LogicalOrOptionsT *AsLogicalOrOptions() const
+  {
+    return type == BuiltinOptions_LogicalOrOptions
+             ? reinterpret_cast<const circle::LogicalOrOptionsT *>(value)
+             : nullptr;
+  }
+  circle::OneHotOptionsT *AsOneHotOptions()
+  {
+    return type == BuiltinOptions_OneHotOptions ? reinterpret_cast<circle::OneHotOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::OneHotOptionsT *AsOneHotOptions() const
+  {
+    return type == BuiltinOptions_OneHotOptions
+             ? reinterpret_cast<const circle::OneHotOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LogicalAndOptionsT *AsLogicalAndOptions()
+  {
+    return type == BuiltinOptions_LogicalAndOptions
+             ? reinterpret_cast<circle::LogicalAndOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::LogicalAndOptionsT *AsLogicalAndOptions() const
+  {
+    return type == BuiltinOptions_LogicalAndOptions
+             ? reinterpret_cast<const circle::LogicalAndOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LogicalNotOptionsT *AsLogicalNotOptions()
+  {
+    return type == BuiltinOptions_LogicalNotOptions
+             ? reinterpret_cast<circle::LogicalNotOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::LogicalNotOptionsT *AsLogicalNotOptions() const
+  {
+    return type == BuiltinOptions_LogicalNotOptions
+             ? reinterpret_cast<const circle::LogicalNotOptionsT *>(value)
+             : nullptr;
+  }
+  circle::UnpackOptionsT *AsUnpackOptions()
+  {
+    return type == BuiltinOptions_UnpackOptions ? reinterpret_cast<circle::UnpackOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::UnpackOptionsT *AsUnpackOptions() const
+  {
+    return type == BuiltinOptions_UnpackOptions
+             ? reinterpret_cast<const circle::UnpackOptionsT *>(value)
+             : nullptr;
+  }
+  circle::FloorDivOptionsT *AsFloorDivOptions()
+  {
+    return type == BuiltinOptions_FloorDivOptions
+             ? reinterpret_cast<circle::FloorDivOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::FloorDivOptionsT *AsFloorDivOptions() const
+  {
+    return type == BuiltinOptions_FloorDivOptions
+             ? reinterpret_cast<const circle::FloorDivOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SquareOptionsT *AsSquareOptions()
+  {
+    return type == BuiltinOptions_SquareOptions ? reinterpret_cast<circle::SquareOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::SquareOptionsT *AsSquareOptions() const
+  {
+    return type == BuiltinOptions_SquareOptions
+             ? reinterpret_cast<const circle::SquareOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ZerosLikeOptionsT *AsZerosLikeOptions()
+  {
+    return type == BuiltinOptions_ZerosLikeOptions
+             ? reinterpret_cast<circle::ZerosLikeOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ZerosLikeOptionsT *AsZerosLikeOptions() const
+  {
+    return type == BuiltinOptions_ZerosLikeOptions
+             ? reinterpret_cast<const circle::ZerosLikeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::FillOptionsT *AsFillOptions()
+  {
+    return type == BuiltinOptions_FillOptions ? reinterpret_cast<circle::FillOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::FillOptionsT *AsFillOptions() const
+  {
+    return type == BuiltinOptions_FillOptions
+             ? reinterpret_cast<const circle::FillOptionsT *>(value)
+             : nullptr;
+  }
+  circle::BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions()
+  {
+    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions
+             ? reinterpret_cast<circle::BidirectionalSequenceLSTMOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() const
+  {
+    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions
+             ? reinterpret_cast<const circle::BidirectionalSequenceLSTMOptionsT *>(value)
+             : nullptr;
+  }
+  circle::BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions()
+  {
+    return type == BuiltinOptions_BidirectionalSequenceRNNOptions
+             ? reinterpret_cast<circle::BidirectionalSequenceRNNOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() const
+  {
+    return type == BuiltinOptions_BidirectionalSequenceRNNOptions
+             ? reinterpret_cast<const circle::BidirectionalSequenceRNNOptionsT *>(value)
+             : nullptr;
+  }
+  circle::UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions()
+  {
+    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions
+             ? reinterpret_cast<circle::UnidirectionalSequenceLSTMOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() const
+  {
+    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions
+             ? reinterpret_cast<const circle::UnidirectionalSequenceLSTMOptionsT *>(value)
+             : nullptr;
+  }
+  circle::FloorModOptionsT *AsFloorModOptions()
+  {
+    return type == BuiltinOptions_FloorModOptions
+             ? reinterpret_cast<circle::FloorModOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::FloorModOptionsT *AsFloorModOptions() const
+  {
+    return type == BuiltinOptions_FloorModOptions
+             ? reinterpret_cast<const circle::FloorModOptionsT *>(value)
+             : nullptr;
+  }
+  circle::RangeOptionsT *AsRangeOptions()
+  {
+    return type == BuiltinOptions_RangeOptions ? reinterpret_cast<circle::RangeOptionsT *>(value)
+                                               : nullptr;
+  }
+  const circle::RangeOptionsT *AsRangeOptions() const
+  {
+    return type == BuiltinOptions_RangeOptions
+             ? reinterpret_cast<const circle::RangeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ResizeNearestNeighborOptionsT *AsResizeNearestNeighborOptions()
+  {
+    return type == BuiltinOptions_ResizeNearestNeighborOptions
+             ? reinterpret_cast<circle::ResizeNearestNeighborOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ResizeNearestNeighborOptionsT *AsResizeNearestNeighborOptions() const
+  {
+    return type == BuiltinOptions_ResizeNearestNeighborOptions
+             ? reinterpret_cast<const circle::ResizeNearestNeighborOptionsT *>(value)
+             : nullptr;
+  }
+  circle::LeakyReluOptionsT *AsLeakyReluOptions()
+  {
+    return type == BuiltinOptions_LeakyReluOptions
+             ? reinterpret_cast<circle::LeakyReluOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::LeakyReluOptionsT *AsLeakyReluOptions() const
+  {
+    return type == BuiltinOptions_LeakyReluOptions
+             ? reinterpret_cast<const circle::LeakyReluOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SquaredDifferenceOptionsT *AsSquaredDifferenceOptions()
+  {
+    return type == BuiltinOptions_SquaredDifferenceOptions
+             ? reinterpret_cast<circle::SquaredDifferenceOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SquaredDifferenceOptionsT *AsSquaredDifferenceOptions() const
+  {
+    return type == BuiltinOptions_SquaredDifferenceOptions
+             ? reinterpret_cast<const circle::SquaredDifferenceOptionsT *>(value)
+             : nullptr;
+  }
+  circle::MirrorPadOptionsT *AsMirrorPadOptions()
+  {
+    return type == BuiltinOptions_MirrorPadOptions
+             ? reinterpret_cast<circle::MirrorPadOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::MirrorPadOptionsT *AsMirrorPadOptions() const
+  {
+    return type == BuiltinOptions_MirrorPadOptions
+             ? reinterpret_cast<const circle::MirrorPadOptionsT *>(value)
+             : nullptr;
+  }
+  circle::AbsOptionsT *AsAbsOptions()
+  {
+    return type == BuiltinOptions_AbsOptions ? reinterpret_cast<circle::AbsOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::AbsOptionsT *AsAbsOptions() const
+  {
+    return type == BuiltinOptions_AbsOptions ? reinterpret_cast<const circle::AbsOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::SplitVOptionsT *AsSplitVOptions()
+  {
+    return type == BuiltinOptions_SplitVOptions ? reinterpret_cast<circle::SplitVOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::SplitVOptionsT *AsSplitVOptions() const
+  {
+    return type == BuiltinOptions_SplitVOptions
+             ? reinterpret_cast<const circle::SplitVOptionsT *>(value)
+             : nullptr;
+  }
+  circle::UniqueOptionsT *AsUniqueOptions()
+  {
+    return type == BuiltinOptions_UniqueOptions ? reinterpret_cast<circle::UniqueOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::UniqueOptionsT *AsUniqueOptions() const
+  {
+    return type == BuiltinOptions_UniqueOptions
+             ? reinterpret_cast<const circle::UniqueOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ReverseV2OptionsT *AsReverseV2Options()
+  {
+    return type == BuiltinOptions_ReverseV2Options
+             ? reinterpret_cast<circle::ReverseV2OptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ReverseV2OptionsT *AsReverseV2Options() const
+  {
+    return type == BuiltinOptions_ReverseV2Options
+             ? reinterpret_cast<const circle::ReverseV2OptionsT *>(value)
+             : nullptr;
+  }
+  circle::AddNOptionsT *AsAddNOptions()
+  {
+    return type == BuiltinOptions_AddNOptions ? reinterpret_cast<circle::AddNOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::AddNOptionsT *AsAddNOptions() const
+  {
+    return type == BuiltinOptions_AddNOptions
+             ? reinterpret_cast<const circle::AddNOptionsT *>(value)
+             : nullptr;
+  }
+  circle::GatherNdOptionsT *AsGatherNdOptions()
+  {
+    return type == BuiltinOptions_GatherNdOptions
+             ? reinterpret_cast<circle::GatherNdOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::GatherNdOptionsT *AsGatherNdOptions() const
+  {
+    return type == BuiltinOptions_GatherNdOptions
+             ? reinterpret_cast<const circle::GatherNdOptionsT *>(value)
+             : nullptr;
+  }
+  circle::CosOptionsT *AsCosOptions()
+  {
+    return type == BuiltinOptions_CosOptions ? reinterpret_cast<circle::CosOptionsT *>(value)
+                                             : nullptr;
+  }
+  const circle::CosOptionsT *AsCosOptions() const
+  {
+    return type == BuiltinOptions_CosOptions ? reinterpret_cast<const circle::CosOptionsT *>(value)
+                                             : nullptr;
+  }
+  circle::WhereOptionsT *AsWhereOptions()
+  {
+    return type == BuiltinOptions_WhereOptions ? reinterpret_cast<circle::WhereOptionsT *>(value)
+                                               : nullptr;
+  }
+  const circle::WhereOptionsT *AsWhereOptions() const
+  {
+    return type == BuiltinOptions_WhereOptions
+             ? reinterpret_cast<const circle::WhereOptionsT *>(value)
+             : nullptr;
+  }
+  circle::RankOptionsT *AsRankOptions()
+  {
+    return type == BuiltinOptions_RankOptions ? reinterpret_cast<circle::RankOptionsT *>(value)
+                                              : nullptr;
+  }
+  const circle::RankOptionsT *AsRankOptions() const
+  {
+    return type == BuiltinOptions_RankOptions
+             ? reinterpret_cast<const circle::RankOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ReverseSequenceOptionsT *AsReverseSequenceOptions()
+  {
+    return type == BuiltinOptions_ReverseSequenceOptions
+             ? reinterpret_cast<circle::ReverseSequenceOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ReverseSequenceOptionsT *AsReverseSequenceOptions() const
+  {
+    return type == BuiltinOptions_ReverseSequenceOptions
+             ? reinterpret_cast<const circle::ReverseSequenceOptionsT *>(value)
+             : nullptr;
+  }
+  circle::MatrixDiagOptionsT *AsMatrixDiagOptions()
+  {
+    return type == BuiltinOptions_MatrixDiagOptions
+             ? reinterpret_cast<circle::MatrixDiagOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::MatrixDiagOptionsT *AsMatrixDiagOptions() const
+  {
+    return type == BuiltinOptions_MatrixDiagOptions
+             ? reinterpret_cast<const circle::MatrixDiagOptionsT *>(value)
+             : nullptr;
+  }
+  circle::QuantizeOptionsT *AsQuantizeOptions()
+  {
+    return type == BuiltinOptions_QuantizeOptions
+             ? reinterpret_cast<circle::QuantizeOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::QuantizeOptionsT *AsQuantizeOptions() const
+  {
+    return type == BuiltinOptions_QuantizeOptions
+             ? reinterpret_cast<const circle::QuantizeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::MatrixSetDiagOptionsT *AsMatrixSetDiagOptions()
+  {
+    return type == BuiltinOptions_MatrixSetDiagOptions
+             ? reinterpret_cast<circle::MatrixSetDiagOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::MatrixSetDiagOptionsT *AsMatrixSetDiagOptions() const
+  {
+    return type == BuiltinOptions_MatrixSetDiagOptions
+             ? reinterpret_cast<const circle::MatrixSetDiagOptionsT *>(value)
+             : nullptr;
+  }
+  circle::HardSwishOptionsT *AsHardSwishOptions()
+  {
+    return type == BuiltinOptions_HardSwishOptions
+             ? reinterpret_cast<circle::HardSwishOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::HardSwishOptionsT *AsHardSwishOptions() const
+  {
+    return type == BuiltinOptions_HardSwishOptions
+             ? reinterpret_cast<const circle::HardSwishOptionsT *>(value)
+             : nullptr;
+  }
+  circle::IfOptionsT *AsIfOptions()
+  {
+    return type == BuiltinOptions_IfOptions ? reinterpret_cast<circle::IfOptionsT *>(value)
+                                            : nullptr;
+  }
+  const circle::IfOptionsT *AsIfOptions() const
+  {
+    return type == BuiltinOptions_IfOptions ? reinterpret_cast<const circle::IfOptionsT *>(value)
+                                            : nullptr;
+  }
+  circle::WhileOptionsT *AsWhileOptions()
+  {
+    return type == BuiltinOptions_WhileOptions ? reinterpret_cast<circle::WhileOptionsT *>(value)
+                                               : nullptr;
+  }
+  const circle::WhileOptionsT *AsWhileOptions() const
+  {
+    return type == BuiltinOptions_WhileOptions
+             ? reinterpret_cast<const circle::WhileOptionsT *>(value)
+             : nullptr;
+  }
+  circle::DepthToSpaceOptionsT *AsDepthToSpaceOptions()
+  {
+    return type == BuiltinOptions_DepthToSpaceOptions
+             ? reinterpret_cast<circle::DepthToSpaceOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::DepthToSpaceOptionsT *AsDepthToSpaceOptions() const
+  {
+    return type == BuiltinOptions_DepthToSpaceOptions
+             ? reinterpret_cast<const circle::DepthToSpaceOptionsT *>(value)
+             : nullptr;
+  }
+  circle::NonMaxSuppressionV4OptionsT *AsNonMaxSuppressionV4Options()
+  {
+    return type == BuiltinOptions_NonMaxSuppressionV4Options
+             ? reinterpret_cast<circle::NonMaxSuppressionV4OptionsT *>(value)
+             : nullptr;
+  }
+  const circle::NonMaxSuppressionV4OptionsT *AsNonMaxSuppressionV4Options() const
+  {
+    return type == BuiltinOptions_NonMaxSuppressionV4Options
+             ? reinterpret_cast<const circle::NonMaxSuppressionV4OptionsT *>(value)
+             : nullptr;
+  }
+  circle::NonMaxSuppressionV5OptionsT *AsNonMaxSuppressionV5Options()
+  {
+    return type == BuiltinOptions_NonMaxSuppressionV5Options
+             ? reinterpret_cast<circle::NonMaxSuppressionV5OptionsT *>(value)
+             : nullptr;
+  }
+  const circle::NonMaxSuppressionV5OptionsT *AsNonMaxSuppressionV5Options() const
+  {
+    return type == BuiltinOptions_NonMaxSuppressionV5Options
+             ? reinterpret_cast<const circle::NonMaxSuppressionV5OptionsT *>(value)
+             : nullptr;
+  }
+  circle::ScatterNdOptionsT *AsScatterNdOptions()
+  {
+    return type == BuiltinOptions_ScatterNdOptions
+             ? reinterpret_cast<circle::ScatterNdOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ScatterNdOptionsT *AsScatterNdOptions() const
+  {
+    return type == BuiltinOptions_ScatterNdOptions
+             ? reinterpret_cast<const circle::ScatterNdOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SelectV2OptionsT *AsSelectV2Options()
+  {
+    return type == BuiltinOptions_SelectV2Options
+             ? reinterpret_cast<circle::SelectV2OptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SelectV2OptionsT *AsSelectV2Options() const
+  {
+    return type == BuiltinOptions_SelectV2Options
+             ? reinterpret_cast<const circle::SelectV2OptionsT *>(value)
+             : nullptr;
+  }
+  circle::DensifyOptionsT *AsDensifyOptions()
+  {
+    return type == BuiltinOptions_DensifyOptions
+             ? reinterpret_cast<circle::DensifyOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::DensifyOptionsT *AsDensifyOptions() const
+  {
+    return type == BuiltinOptions_DensifyOptions
+             ? reinterpret_cast<const circle::DensifyOptionsT *>(value)
+             : nullptr;
+  }
+  circle::SegmentSumOptionsT *AsSegmentSumOptions()
+  {
+    return type == BuiltinOptions_SegmentSumOptions
+             ? reinterpret_cast<circle::SegmentSumOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::SegmentSumOptionsT *AsSegmentSumOptions() const
+  {
+    return type == BuiltinOptions_SegmentSumOptions
+             ? reinterpret_cast<const circle::SegmentSumOptionsT *>(value)
+             : nullptr;
+  }
+  circle::BatchMatMulOptionsT *AsBatchMatMulOptions()
+  {
+    return type == BuiltinOptions_BatchMatMulOptions
+             ? reinterpret_cast<circle::BatchMatMulOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::BatchMatMulOptionsT *AsBatchMatMulOptions() const
+  {
+    return type == BuiltinOptions_BatchMatMulOptions
+             ? reinterpret_cast<const circle::BatchMatMulOptionsT *>(value)
+             : nullptr;
+  }
+  circle::CumsumOptionsT *AsCumsumOptions()
+  {
+    return type == BuiltinOptions_CumsumOptions ? reinterpret_cast<circle::CumsumOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::CumsumOptionsT *AsCumsumOptions() const
+  {
+    return type == BuiltinOptions_CumsumOptions
+             ? reinterpret_cast<const circle::CumsumOptionsT *>(value)
+             : nullptr;
+  }
+  circle::CallOnceOptionsT *AsCallOnceOptions()
+  {
+    return type == BuiltinOptions_CallOnceOptions
+             ? reinterpret_cast<circle::CallOnceOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::CallOnceOptionsT *AsCallOnceOptions() const
+  {
+    return type == BuiltinOptions_CallOnceOptions
+             ? reinterpret_cast<const circle::CallOnceOptionsT *>(value)
+             : nullptr;
+  }
+  circle::BroadcastToOptionsT *AsBroadcastToOptions()
+  {
+    return type == BuiltinOptions_BroadcastToOptions
+             ? reinterpret_cast<circle::BroadcastToOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::BroadcastToOptionsT *AsBroadcastToOptions() const
+  {
+    return type == BuiltinOptions_BroadcastToOptions
+             ? reinterpret_cast<const circle::BroadcastToOptionsT *>(value)
+             : nullptr;
+  }
+  circle::Rfft2dOptionsT *AsRfft2dOptions()
+  {
+    return type == BuiltinOptions_Rfft2dOptions ? reinterpret_cast<circle::Rfft2dOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::Rfft2dOptionsT *AsRfft2dOptions() const
+  {
+    return type == BuiltinOptions_Rfft2dOptions
+             ? reinterpret_cast<const circle::Rfft2dOptionsT *>(value)
+             : nullptr;
+  }
+  circle::Conv3DOptionsT *AsConv3DOptions()
+  {
+    return type == BuiltinOptions_Conv3DOptions ? reinterpret_cast<circle::Conv3DOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::Conv3DOptionsT *AsConv3DOptions() const
+  {
+    return type == BuiltinOptions_Conv3DOptions
+             ? reinterpret_cast<const circle::Conv3DOptionsT *>(value)
+             : nullptr;
+  }
+  circle::HashtableOptionsT *AsHashtableOptions()
+  {
+    return type == BuiltinOptions_HashtableOptions
+             ? reinterpret_cast<circle::HashtableOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::HashtableOptionsT *AsHashtableOptions() const
+  {
+    return type == BuiltinOptions_HashtableOptions
+             ? reinterpret_cast<const circle::HashtableOptionsT *>(value)
+             : nullptr;
+  }
+  circle::HashtableFindOptionsT *AsHashtableFindOptions()
+  {
+    return type == BuiltinOptions_HashtableFindOptions
+             ? reinterpret_cast<circle::HashtableFindOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::HashtableFindOptionsT *AsHashtableFindOptions() const
+  {
+    return type == BuiltinOptions_HashtableFindOptions
+             ? reinterpret_cast<const circle::HashtableFindOptionsT *>(value)
+             : nullptr;
+  }
+  circle::HashtableImportOptionsT *AsHashtableImportOptions()
+  {
+    return type == BuiltinOptions_HashtableImportOptions
+             ? reinterpret_cast<circle::HashtableImportOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::HashtableImportOptionsT *AsHashtableImportOptions() const
+  {
+    return type == BuiltinOptions_HashtableImportOptions
+             ? reinterpret_cast<const circle::HashtableImportOptionsT *>(value)
+             : nullptr;
+  }
+  circle::HashtableSizeOptionsT *AsHashtableSizeOptions()
+  {
+    return type == BuiltinOptions_HashtableSizeOptions
+             ? reinterpret_cast<circle::HashtableSizeOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::HashtableSizeOptionsT *AsHashtableSizeOptions() const
+  {
+    return type == BuiltinOptions_HashtableSizeOptions
+             ? reinterpret_cast<const circle::HashtableSizeOptionsT *>(value)
+             : nullptr;
+  }
+  circle::VarHandleOptionsT *AsVarHandleOptions()
+  {
+    return type == BuiltinOptions_VarHandleOptions
+             ? reinterpret_cast<circle::VarHandleOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::VarHandleOptionsT *AsVarHandleOptions() const
+  {
+    return type == BuiltinOptions_VarHandleOptions
+             ? reinterpret_cast<const circle::VarHandleOptionsT *>(value)
+             : nullptr;
+  }
+  circle::ReadVariableOptionsT *AsReadVariableOptions()
+  {
+    return type == BuiltinOptions_ReadVariableOptions
+             ? reinterpret_cast<circle::ReadVariableOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::ReadVariableOptionsT *AsReadVariableOptions() const
+  {
+    return type == BuiltinOptions_ReadVariableOptions
+             ? reinterpret_cast<const circle::ReadVariableOptionsT *>(value)
+             : nullptr;
+  }
+  circle::AssignVariableOptionsT *AsAssignVariableOptions()
+  {
+    return type == BuiltinOptions_AssignVariableOptions
+             ? reinterpret_cast<circle::AssignVariableOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::AssignVariableOptionsT *AsAssignVariableOptions() const
+  {
+    return type == BuiltinOptions_AssignVariableOptions
+             ? reinterpret_cast<const circle::AssignVariableOptionsT *>(value)
+             : nullptr;
+  }
+  circle::RandomOptionsT *AsRandomOptions()
+  {
+    return type == BuiltinOptions_RandomOptions ? reinterpret_cast<circle::RandomOptionsT *>(value)
+                                                : nullptr;
+  }
+  const circle::RandomOptionsT *AsRandomOptions() const
+  {
+    return type == BuiltinOptions_RandomOptions
+             ? reinterpret_cast<const circle::RandomOptionsT *>(value)
+             : nullptr;
+  }
+  circle::BCQGatherOptionsT *AsBCQGatherOptions()
+  {
+    return type == BuiltinOptions_BCQGatherOptions
+             ? reinterpret_cast<circle::BCQGatherOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::BCQGatherOptionsT *AsBCQGatherOptions() const
+  {
+    return type == BuiltinOptions_BCQGatherOptions
+             ? reinterpret_cast<const circle::BCQGatherOptionsT *>(value)
+             : nullptr;
+  }
+  circle::BCQFullyConnectedOptionsT *AsBCQFullyConnectedOptions()
+  {
+    return type == BuiltinOptions_BCQFullyConnectedOptions
+             ? reinterpret_cast<circle::BCQFullyConnectedOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::BCQFullyConnectedOptionsT *AsBCQFullyConnectedOptions() const
+  {
+    return type == BuiltinOptions_BCQFullyConnectedOptions
+             ? reinterpret_cast<const circle::BCQFullyConnectedOptionsT *>(value)
+             : nullptr;
+  }
+  circle::InstanceNormOptionsT *AsInstanceNormOptions()
+  {
+    return type == BuiltinOptions_InstanceNormOptions
+             ? reinterpret_cast<circle::InstanceNormOptionsT *>(value)
+             : nullptr;
+  }
+  const circle::InstanceNormOptionsT *AsInstanceNormOptions() const
+  {
+    return type == BuiltinOptions_InstanceNormOptions
+             ? reinterpret_cast<const circle::InstanceNormOptionsT *>(value)
+             : nullptr;
+  }
+};
+
+bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
+bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
+                                const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+                                const flatbuffers::Vector<uint8_t> *types);
+
+enum Padding : int8_t
+{
+  Padding_SAME = 0,
+  Padding_VALID = 1,
+  Padding_MIN = Padding_SAME,
+  Padding_MAX = Padding_VALID
+};
+
+inline const Padding (&EnumValuesPadding())[2]
+{
+  static const Padding values[] = {Padding_SAME, Padding_VALID};
+  return values;
+}
+
+inline const char *const *EnumNamesPadding()
+{
+  static const char *const names[3] = {"SAME", "VALID", nullptr};
+  return names;
+}
+
+inline const char *EnumNamePadding(Padding e)
+{
+  if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesPadding()[index];
+}
+
+enum ActivationFunctionType : int8_t
+{
+  ActivationFunctionType_NONE = 0,
+  ActivationFunctionType_RELU = 1,
+  ActivationFunctionType_RELU_N1_TO_1 = 2,
+  ActivationFunctionType_RELU6 = 3,
+  ActivationFunctionType_TANH = 4,
+  ActivationFunctionType_SIGN_BIT = 5,
+  ActivationFunctionType_MIN = ActivationFunctionType_NONE,
+  ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
+};
+
+inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
+{
+  static const ActivationFunctionType values[] = {
+    ActivationFunctionType_NONE,  ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+    ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
+  return values;
+}
+
+inline const char *const *EnumNamesActivationFunctionType()
+{
+  static const char *const names[7] = {"NONE", "RELU",     "RELU_N1_TO_1", "RELU6",
+                                       "TANH", "SIGN_BIT", nullptr};
+  return names;
+}
+
+inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
+{
+  if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesActivationFunctionType()[index];
+}
+
+enum LSHProjectionType : int8_t
+{
+  LSHProjectionType_UNKNOWN = 0,
+  LSHProjectionType_SPARSE = 1,
+  LSHProjectionType_DENSE = 2,
+  LSHProjectionType_MIN = LSHProjectionType_UNKNOWN,
+  LSHProjectionType_MAX = LSHProjectionType_DENSE
+};
+
+inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
+{
+  static const LSHProjectionType values[] = {LSHProjectionType_UNKNOWN, LSHProjectionType_SPARSE,
+                                             LSHProjectionType_DENSE};
+  return values;
+}
+
+inline const char *const *EnumNamesLSHProjectionType()
+{
+  static const char *const names[4] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
+  return names;
+}
+
+inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
+{
+  if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesLSHProjectionType()[index];
+}
+
+enum FullyConnectedOptionsWeightsFormat : int8_t
+{
+  FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
+  FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
+  FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32 = 127,
+  FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT,
+  FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32
+};
+
+inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[3]
+{
+  static const FullyConnectedOptionsWeightsFormat values[] = {
+    FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8,
+    FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32};
+  return values;
+}
+
+inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
+{
+  switch (e)
+  {
+    case FullyConnectedOptionsWeightsFormat_DEFAULT:
+      return "DEFAULT";
+    case FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8:
+      return "SHUFFLED4x16INT8";
+    case FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32:
+      return "SHUFFLED16x1FLOAT32";
+    default:
+      return "";
+  }
+}
+
+enum LSTMKernelType : int8_t
+{
+  LSTMKernelType_FULL = 0,
+  LSTMKernelType_BASIC = 1,
+  LSTMKernelType_MIN = LSTMKernelType_FULL,
+  LSTMKernelType_MAX = LSTMKernelType_BASIC
+};
+
+inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
+{
+  static const LSTMKernelType values[] = {LSTMKernelType_FULL, LSTMKernelType_BASIC};
+  return values;
+}
+
+inline const char *const *EnumNamesLSTMKernelType()
+{
+  static const char *const names[3] = {"FULL", "BASIC", nullptr};
+  return names;
+}
+
+inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
+{
+  if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesLSTMKernelType()[index];
+}
+
+enum CombinerType : int8_t
+{
+  CombinerType_SUM = 0,
+  CombinerType_MEAN = 1,
+  CombinerType_SQRTN = 2,
+  CombinerType_MIN = CombinerType_SUM,
+  CombinerType_MAX = CombinerType_SQRTN
+};
+
+inline const CombinerType (&EnumValuesCombinerType())[3]
+{
+  static const CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN, CombinerType_SQRTN};
+  return values;
+}
+
+inline const char *const *EnumNamesCombinerType()
+{
+  static const char *const names[4] = {"SUM", "MEAN", "SQRTN", nullptr};
+  return names;
+}
+
+inline const char *EnumNameCombinerType(CombinerType e)
+{
+  if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesCombinerType()[index];
+}
+
+enum MirrorPadMode : int8_t
+{
+  MirrorPadMode_REFLECT = 0,
+  MirrorPadMode_SYMMETRIC = 1,
+  MirrorPadMode_MIN = MirrorPadMode_REFLECT,
+  MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC
+};
+
+inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
+{
+  static const MirrorPadMode values[] = {MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC};
+  return values;
+}
+
+inline const char *const *EnumNamesMirrorPadMode()
+{
+  static const char *const names[3] = {"REFLECT", "SYMMETRIC", nullptr};
+  return names;
+}
+
+inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
+{
+  if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesMirrorPadMode()[index];
+}
+
+enum CustomOptionsFormat : int8_t
+{
+  CustomOptionsFormat_FLEXBUFFERS = 0,
+  CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
+  CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
+};
+
+inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
+{
+  static const CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS};
+  return values;
+}
+
+inline const char *const *EnumNamesCustomOptionsFormat()
+{
+  static const char *const names[2] = {"FLEXBUFFERS", nullptr};
+  return names;
+}
+
+inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
+{
+  if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesCustomOptionsFormat()[index];
+}
+
+enum DataFormat : int8_t
+{
+  DataFormat_CHANNELS_LAST = 0,
+  DataFormat_CHANNELS_FIRST = 1,
+  DataFormat_MIN = DataFormat_CHANNELS_LAST,
+  DataFormat_MAX = DataFormat_CHANNELS_FIRST
+};
+
+inline const DataFormat (&EnumValuesDataFormat())[2]
+{
+  static const DataFormat values[] = {DataFormat_CHANNELS_LAST, DataFormat_CHANNELS_FIRST};
+  return values;
+}
+
+inline const char *const *EnumNamesDataFormat()
+{
+  static const char *const names[3] = {"CHANNELS_LAST", "CHANNELS_FIRST", nullptr};
+  return names;
+}
+
+inline const char *EnumNameDataFormat(DataFormat e)
+{
+  if (flatbuffers::IsOutRange(e, DataFormat_CHANNELS_LAST, DataFormat_CHANNELS_FIRST))
+    return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesDataFormat()[index];
+}
+
+struct CustomQuantizationT : public flatbuffers::NativeTable
+{
+  typedef CustomQuantization TableType;
+  std::vector<uint8_t> custom{};
+};
+
+struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CustomQuantizationT NativeTableType;
+  typedef CustomQuantizationBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_CUSTOM = 4
+  };
+  const flatbuffers::Vector<uint8_t> *custom() const
+  {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CUSTOM) &&
+           verifier.VerifyVector(custom()) && verifier.EndTable();
+  }
+  CustomQuantizationT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CustomQuantizationT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CustomQuantization>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CustomQuantizationBuilder
+{
+  typedef CustomQuantization Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
+  {
+    fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom);
+  }
+  explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CustomQuantization> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CustomQuantization>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CustomQuantization>
+CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb,
+                         flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0)
+{
+  CustomQuantizationBuilder builder_(_fbb);
+  builder_.add_custom(custom);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<CustomQuantization>
+CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                               const std::vector<uint8_t> *custom = nullptr)
+{
+  if (custom)
+  {
+    _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16);
+  }
+  auto custom__ = custom ? _fbb.CreateVector<uint8_t>(*custom) : 0;
+  return circle::CreateCustomQuantization(_fbb, custom__);
+}
+
+flatbuffers::Offset<CustomQuantization>
+CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct QuantizationParametersT : public flatbuffers::NativeTable
+{
+  typedef QuantizationParameters TableType;
+  std::vector<float> min{};
+  std::vector<float> max{};
+  std::vector<float> scale{};
+  std::vector<int64_t> zero_point{};
+  circle::QuantizationDetailsUnion details{};
+  int32_t quantized_dimension = 0;
+};
+
+struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef QuantizationParametersT NativeTableType;
+  typedef QuantizationParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_MIN = 4,
+    VT_MAX = 6,
+    VT_SCALE = 8,
+    VT_ZERO_POINT = 10,
+    VT_DETAILS_TYPE = 12,
+    VT_DETAILS = 14,
+    VT_QUANTIZED_DIMENSION = 16
+  };
+  const flatbuffers::Vector<float> *min() const
+  {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
+  }
+  const flatbuffers::Vector<float> *max() const
+  {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX);
+  }
+  const flatbuffers::Vector<float> *scale() const
+  {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE);
+  }
+  const flatbuffers::Vector<int64_t> *zero_point() const
+  {
+    return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
+  }
+  circle::QuantizationDetails details_type() const
+  {
+    return static_cast<circle::QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
+  }
+  const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
+  template <typename T> const T *details_as() const;
+  const circle::CustomQuantization *details_as_CustomQuantization() const
+  {
+    return details_type() == circle::QuantizationDetails_CustomQuantization
+             ? static_cast<const circle::CustomQuantization *>(details())
+             : nullptr;
+  }
+  int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) &&
+           verifier.VerifyVector(min()) && VerifyOffset(verifier, VT_MAX) &&
+           verifier.VerifyVector(max()) && VerifyOffset(verifier, VT_SCALE) &&
+           verifier.VerifyVector(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) &&
+           verifier.VerifyVector(zero_point()) && VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE) &&
+           VerifyOffset(verifier, VT_DETAILS) &&
+           VerifyQuantizationDetails(verifier, details(), details_type()) &&
+           VerifyField<int32_t>(verifier, VT_QUANTIZED_DIMENSION) && verifier.EndTable();
+  }
+  QuantizationParametersT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(QuantizationParametersT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<QuantizationParameters>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+template <>
+inline const circle::CustomQuantization *
+QuantizationParameters::details_as<circle::CustomQuantization>() const
+{
+  return details_as_CustomQuantization();
+}
+
+struct QuantizationParametersBuilder
+{
+  typedef QuantizationParameters Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
+  {
+    fbb_.AddOffset(QuantizationParameters::VT_MIN, min);
+  }
+  void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max)
+  {
+    fbb_.AddOffset(QuantizationParameters::VT_MAX, max);
+  }
+  void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale)
+  {
+    fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
+  }
+  void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point)
+  {
+    fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
+  }
+  void add_details_type(circle::QuantizationDetails details_type)
+  {
+    fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
+                             static_cast<uint8_t>(details_type), 0);
+  }
+  void add_details(flatbuffers::Offset<void> details)
+  {
+    fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details);
+  }
+  void add_quantized_dimension(int32_t quantized_dimension)
+  {
+    fbb_.AddElement<int32_t>(QuantizationParameters::VT_QUANTIZED_DIMENSION, quantized_dimension,
+                             0);
+  }
+  explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<QuantizationParameters> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<QuantizationParameters>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+  circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+{
+  QuantizationParametersBuilder builder_(_fbb);
+  builder_.add_quantized_dimension(quantized_dimension);
+  builder_.add_details(details);
+  builder_.add_zero_point(zero_point);
+  builder_.add_scale(scale);
+  builder_.add_max(max);
+  builder_.add_min(min);
+  builder_.add_details_type(details_type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+  const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+  const std::vector<int64_t> *zero_point = nullptr,
+  circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+{
+  auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
+  auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
+  auto scale__ = scale ? _fbb.CreateVector<float>(*scale) : 0;
+  auto zero_point__ = zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0;
+  return circle::CreateQuantizationParameters(_fbb, min__, max__, scale__, zero_point__,
+                                              details_type, details, quantized_dimension);
+}
+
+flatbuffers::Offset<QuantizationParameters>
+CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
+                             const QuantizationParametersT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Int32VectorT : public flatbuffers::NativeTable
+{
+  typedef Int32Vector TableType;
+  std::vector<int32_t> values{};
+};
+
+struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Int32VectorT NativeTableType;
+  typedef Int32VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_VALUES = 4
+  };
+  const flatbuffers::Vector<int32_t> *values() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_VALUES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+           verifier.VerifyVector(values()) && verifier.EndTable();
+  }
+  Int32VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Int32VectorT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Int32Vector>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Int32VectorBuilder
+{
+  typedef Int32Vector Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values)
+  {
+    fbb_.AddOffset(Int32Vector::VT_VALUES, values);
+  }
+  explicit Int32VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Int32Vector> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Int32Vector>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Int32Vector>
+CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb,
+                  flatbuffers::Offset<flatbuffers::Vector<int32_t>> values = 0)
+{
+  Int32VectorBuilder builder_(_fbb);
+  builder_.add_values(values);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Int32Vector>
+CreateInt32VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                        const std::vector<int32_t> *values = nullptr)
+{
+  auto values__ = values ? _fbb.CreateVector<int32_t>(*values) : 0;
+  return circle::CreateInt32Vector(_fbb, values__);
+}
+
+flatbuffers::Offset<Int32Vector>
+CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Uint16VectorT : public flatbuffers::NativeTable
+{
+  typedef Uint16Vector TableType;
+  std::vector<uint16_t> values{};
+};
+
+struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Uint16VectorT NativeTableType;
+  typedef Uint16VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_VALUES = 4
+  };
+  const flatbuffers::Vector<uint16_t> *values() const
+  {
+    return GetPointer<const flatbuffers::Vector<uint16_t> *>(VT_VALUES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+           verifier.VerifyVector(values()) && verifier.EndTable();
+  }
+  Uint16VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Uint16VectorT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Uint16Vector>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Uint16VectorBuilder
+{
+  typedef Uint16Vector Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values)
+  {
+    fbb_.AddOffset(Uint16Vector::VT_VALUES, values);
+  }
+  explicit Uint16VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Uint16Vector> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Uint16Vector>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Uint16Vector>
+CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb,
+                   flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values = 0)
+{
+  Uint16VectorBuilder builder_(_fbb);
+  builder_.add_values(values);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Uint16Vector>
+CreateUint16VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                         const std::vector<uint16_t> *values = nullptr)
+{
+  if (values)
+  {
+    _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4);
+  }
+  auto values__ = values ? _fbb.CreateVector<uint16_t>(*values) : 0;
+  return circle::CreateUint16Vector(_fbb, values__);
+}
+
+flatbuffers::Offset<Uint16Vector>
+CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Uint8VectorT : public flatbuffers::NativeTable
+{
+  typedef Uint8Vector TableType;
+  std::vector<uint8_t> values{};
+};
+
+struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Uint8VectorT NativeTableType;
+  typedef Uint8VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_VALUES = 4
+  };
+  const flatbuffers::Vector<uint8_t> *values() const
+  {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_VALUES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUES) &&
+           verifier.VerifyVector(values()) && verifier.EndTable();
+  }
+  Uint8VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Uint8VectorT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Uint8Vector>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Uint8VectorBuilder
+{
+  typedef Uint8Vector Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values)
+  {
+    fbb_.AddOffset(Uint8Vector::VT_VALUES, values);
+  }
+  explicit Uint8VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Uint8Vector> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Uint8Vector>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Uint8Vector>
+CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb,
+                  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values = 0)
+{
+  Uint8VectorBuilder builder_(_fbb);
+  builder_.add_values(values);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Uint8Vector>
+CreateUint8VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                        const std::vector<uint8_t> *values = nullptr)
+{
+  if (values)
+  {
+    _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4);
+  }
+  auto values__ = values ? _fbb.CreateVector<uint8_t>(*values) : 0;
+  return circle::CreateUint8Vector(_fbb, values__);
+}
+
+flatbuffers::Offset<Uint8Vector>
+CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DimensionMetadataT : public flatbuffers::NativeTable
+{
+  typedef DimensionMetadata TableType;
+  circle::DimensionType format = circle::DimensionType_DENSE;
+  int32_t dense_size = 0;
+  circle::SparseIndexVectorUnion array_segments{};
+  circle::SparseIndexVectorUnion array_indices{};
+};
+
+struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DimensionMetadataT NativeTableType;
+  typedef DimensionMetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FORMAT = 4,
+    VT_DENSE_SIZE = 6,
+    VT_ARRAY_SEGMENTS_TYPE = 8,
+    VT_ARRAY_SEGMENTS = 10,
+    VT_ARRAY_INDICES_TYPE = 12,
+    VT_ARRAY_INDICES = 14
+  };
+  circle::DimensionType format() const
+  {
+    return static_cast<circle::DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+  }
+  int32_t dense_size() const { return GetField<int32_t>(VT_DENSE_SIZE, 0); }
+  circle::SparseIndexVector array_segments_type() const
+  {
+    return static_cast<circle::SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
+  }
+  const void *array_segments() const { return GetPointer<const void *>(VT_ARRAY_SEGMENTS); }
+  template <typename T> const T *array_segments_as() const;
+  const circle::Int32Vector *array_segments_as_Int32Vector() const
+  {
+    return array_segments_type() == circle::SparseIndexVector_Int32Vector
+             ? static_cast<const circle::Int32Vector *>(array_segments())
+             : nullptr;
+  }
+  const circle::Uint16Vector *array_segments_as_Uint16Vector() const
+  {
+    return array_segments_type() == circle::SparseIndexVector_Uint16Vector
+             ? static_cast<const circle::Uint16Vector *>(array_segments())
+             : nullptr;
+  }
+  const circle::Uint8Vector *array_segments_as_Uint8Vector() const
+  {
+    return array_segments_type() == circle::SparseIndexVector_Uint8Vector
+             ? static_cast<const circle::Uint8Vector *>(array_segments())
+             : nullptr;
+  }
+  circle::SparseIndexVector array_indices_type() const
+  {
+    return static_cast<circle::SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
+  }
+  const void *array_indices() const { return GetPointer<const void *>(VT_ARRAY_INDICES); }
+  template <typename T> const T *array_indices_as() const;
+  const circle::Int32Vector *array_indices_as_Int32Vector() const
+  {
+    return array_indices_type() == circle::SparseIndexVector_Int32Vector
+             ? static_cast<const circle::Int32Vector *>(array_indices())
+             : nullptr;
+  }
+  const circle::Uint16Vector *array_indices_as_Uint16Vector() const
+  {
+    return array_indices_type() == circle::SparseIndexVector_Uint16Vector
+             ? static_cast<const circle::Uint16Vector *>(array_indices())
+             : nullptr;
+  }
+  const circle::Uint8Vector *array_indices_as_Uint8Vector() const
+  {
+    return array_indices_type() == circle::SparseIndexVector_Uint8Vector
+             ? static_cast<const circle::Uint8Vector *>(array_indices())
+             : nullptr;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_FORMAT) &&
+           VerifyField<int32_t>(verifier, VT_DENSE_SIZE) &&
+           VerifyField<uint8_t>(verifier, VT_ARRAY_SEGMENTS_TYPE) &&
+           VerifyOffset(verifier, VT_ARRAY_SEGMENTS) &&
+           VerifySparseIndexVector(verifier, array_segments(), array_segments_type()) &&
+           VerifyField<uint8_t>(verifier, VT_ARRAY_INDICES_TYPE) &&
+           VerifyOffset(verifier, VT_ARRAY_INDICES) &&
+           VerifySparseIndexVector(verifier, array_indices(), array_indices_type()) &&
+           verifier.EndTable();
+  }
+  DimensionMetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DimensionMetadataT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DimensionMetadata>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+template <>
+inline const circle::Int32Vector *DimensionMetadata::array_segments_as<circle::Int32Vector>() const
+{
+  return array_segments_as_Int32Vector();
+}
+
+template <>
+inline const circle::Uint16Vector *
+DimensionMetadata::array_segments_as<circle::Uint16Vector>() const
+{
+  return array_segments_as_Uint16Vector();
+}
+
+template <>
+inline const circle::Uint8Vector *DimensionMetadata::array_segments_as<circle::Uint8Vector>() const
+{
+  return array_segments_as_Uint8Vector();
+}
+
+template <>
+inline const circle::Int32Vector *DimensionMetadata::array_indices_as<circle::Int32Vector>() const
+{
+  return array_indices_as_Int32Vector();
+}
+
+template <>
+inline const circle::Uint16Vector *DimensionMetadata::array_indices_as<circle::Uint16Vector>() const
+{
+  return array_indices_as_Uint16Vector();
+}
+
+template <>
+inline const circle::Uint8Vector *DimensionMetadata::array_indices_as<circle::Uint8Vector>() const
+{
+  return array_indices_as_Uint8Vector();
+}
+
+struct DimensionMetadataBuilder
+{
+  typedef DimensionMetadata Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_format(circle::DimensionType format)
+  {
+    fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
+  }
+  void add_dense_size(int32_t dense_size)
+  {
+    fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
+  }
+  void add_array_segments_type(circle::SparseIndexVector array_segments_type)
+  {
+    fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
+                             static_cast<uint8_t>(array_segments_type), 0);
+  }
+  void add_array_segments(flatbuffers::Offset<void> array_segments)
+  {
+    fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
+  }
+  void add_array_indices_type(circle::SparseIndexVector array_indices_type)
+  {
+    fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE,
+                             static_cast<uint8_t>(array_indices_type), 0);
+  }
+  void add_array_indices(flatbuffers::Offset<void> array_indices)
+  {
+    fbb_.AddOffset(DimensionMetadata::VT_ARRAY_INDICES, array_indices);
+  }
+  explicit DimensionMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DimensionMetadata> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DimensionMetadata>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::DimensionType format = circle::DimensionType_DENSE,
+  int32_t dense_size = 0,
+  circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_segments = 0,
+  circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_indices = 0)
+{
+  DimensionMetadataBuilder builder_(_fbb);
+  builder_.add_array_indices(array_indices);
+  builder_.add_array_segments(array_segments);
+  builder_.add_dense_size(dense_size);
+  builder_.add_array_indices_type(array_indices_type);
+  builder_.add_array_segments_type(array_segments_type);
+  builder_.add_format(format);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DimensionMetadata>
+CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SparsityParametersT : public flatbuffers::NativeTable
+{
+  typedef SparsityParameters TableType;
+  std::vector<int32_t> traversal_order{};
+  std::vector<int32_t> block_map{};
+  std::vector<std::unique_ptr<circle::DimensionMetadataT>> dim_metadata{};
+};
+
+struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SparsityParametersT NativeTableType;
+  typedef SparsityParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TRAVERSAL_ORDER = 4,
+    VT_BLOCK_MAP = 6,
+    VT_DIM_METADATA = 8
+  };
+  const flatbuffers::Vector<int32_t> *traversal_order() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_TRAVERSAL_ORDER);
+  }
+  const flatbuffers::Vector<int32_t> *block_map() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *>(
+      VT_DIM_METADATA);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TRAVERSAL_ORDER) &&
+           verifier.VerifyVector(traversal_order()) && VerifyOffset(verifier, VT_BLOCK_MAP) &&
+           verifier.VerifyVector(block_map()) && VerifyOffset(verifier, VT_DIM_METADATA) &&
+           verifier.VerifyVector(dim_metadata()) && verifier.VerifyVectorOfTables(dim_metadata()) &&
+           verifier.EndTable();
+  }
+  SparsityParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SparsityParametersT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SparsityParameters>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SparsityParametersBuilder
+{
+  typedef SparsityParameters Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order)
+  {
+    fbb_.AddOffset(SparsityParameters::VT_TRAVERSAL_ORDER, traversal_order);
+  }
+  void add_block_map(flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map)
+  {
+    fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
+  }
+  void add_dim_metadata(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+      dim_metadata)
+  {
+    fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
+  }
+  explicit SparsityParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SparsityParameters> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SparsityParameters>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+    dim_metadata = 0)
+{
+  SparsityParametersBuilder builder_(_fbb);
+  builder_.add_dim_metadata(dim_metadata);
+  builder_.add_block_map(block_map);
+  builder_.add_traversal_order(traversal_order);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+  const std::vector<int32_t> *block_map = nullptr,
+  const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr)
+{
+  auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
+  auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
+  auto dim_metadata__ =
+    dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata)
+                 : 0;
+  return circle::CreateSparsityParameters(_fbb, traversal_order__, block_map__, dim_metadata__);
+}
+
+flatbuffers::Offset<SparsityParameters>
+CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TensorT : public flatbuffers::NativeTable
+{
+  typedef Tensor TableType;
+  std::vector<int32_t> shape{};
+  circle::TensorType type = circle::TensorType_FLOAT32;
+  uint32_t buffer = 0;
+  std::string name{};
+  std::unique_ptr<circle::QuantizationParametersT> quantization{};
+  bool is_variable = false;
+  std::unique_ptr<circle::SparsityParametersT> sparsity{};
+  std::vector<int32_t> shape_signature{};
+};
+
+struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TensorT NativeTableType;
+  typedef TensorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SHAPE = 4,
+    VT_TYPE = 6,
+    VT_BUFFER = 8,
+    VT_NAME = 10,
+    VT_QUANTIZATION = 12,
+    VT_IS_VARIABLE = 14,
+    VT_SPARSITY = 16,
+    VT_SHAPE_SIGNATURE = 18
+  };
+  const flatbuffers::Vector<int32_t> *shape() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
+  }
+  circle::TensorType type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_TYPE, 0));
+  }
+  uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  const circle::QuantizationParameters *quantization() const
+  {
+    return GetPointer<const circle::QuantizationParameters *>(VT_QUANTIZATION);
+  }
+  bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
+  const circle::SparsityParameters *sparsity() const
+  {
+    return GetPointer<const circle::SparsityParameters *>(VT_SPARSITY);
+  }
+  const flatbuffers::Vector<int32_t> *shape_signature() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE_SIGNATURE);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
+           verifier.VerifyVector(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) &&
+           VerifyField<uint32_t>(verifier, VT_BUFFER) && VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyOffset(verifier, VT_QUANTIZATION) &&
+           verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
+           VerifyOffset(verifier, VT_SPARSITY) && verifier.VerifyTable(sparsity()) &&
+           VerifyOffset(verifier, VT_SHAPE_SIGNATURE) && verifier.VerifyVector(shape_signature()) &&
+           verifier.EndTable();
+  }
+  TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Tensor>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TensorBuilder
+{
+  typedef Tensor Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
+  {
+    fbb_.AddOffset(Tensor::VT_SHAPE, shape);
+  }
+  void add_type(circle::TensorType type)
+  {
+    fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
+  }
+  void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0); }
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(Tensor::VT_NAME, name);
+  }
+  void add_quantization(flatbuffers::Offset<circle::QuantizationParameters> quantization)
+  {
+    fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
+  }
+  void add_is_variable(bool is_variable)
+  {
+    fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
+  }
+  void add_sparsity(flatbuffers::Offset<circle::SparsityParameters> sparsity)
+  {
+    fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
+  }
+  void add_shape_signature(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature)
+  {
+    fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature);
+  }
+  explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Tensor> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Tensor>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Tensor>
+CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
+             flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+             circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
+             flatbuffers::Offset<flatbuffers::String> name = 0,
+             flatbuffers::Offset<circle::QuantizationParameters> quantization = 0,
+             bool is_variable = false, flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
+             flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
+{
+  TensorBuilder builder_(_fbb);
+  builder_.add_shape_signature(shape_signature);
+  builder_.add_sparsity(sparsity);
+  builder_.add_quantization(quantization);
+  builder_.add_name(name);
+  builder_.add_buffer(buffer);
+  builder_.add_shape(shape);
+  builder_.add_is_variable(is_variable);
+  builder_.add_type(type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensorDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+  circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
+  const char *name = nullptr, flatbuffers::Offset<circle::QuantizationParameters> quantization = 0,
+  bool is_variable = false, flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
+  const std::vector<int32_t> *shape_signature = nullptr)
+{
+  auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto shape_signature__ = shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0;
+  return circle::CreateTensor(_fbb, shape__, type, buffer, name__, quantization, is_variable,
+                              sparsity, shape_signature__);
+}
+
+flatbuffers::Offset<Tensor>
+CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+             const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Conv2DOptionsT : public flatbuffers::NativeTable
+{
+  typedef Conv2DOptions TableType;
+  circle::Padding padding = circle::Padding_SAME;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  int32_t dilation_w_factor = 1;
+  int32_t dilation_h_factor = 1;
+};
+
+struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Conv2DOptionsT NativeTableType;
+  typedef Conv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_FUSED_ACTIVATION_FUNCTION = 10,
+    VT_DILATION_W_FACTOR = 12,
+    VT_DILATION_H_FACTOR = 14
+  };
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+  int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+  }
+  Conv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Conv2DOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Conv2DOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Conv2DOptionsBuilder
+{
+  typedef Conv2DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(circle::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Conv2DOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Conv2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+  Conv2DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Conv2DOptions>
+CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Conv3DOptionsT : public flatbuffers::NativeTable
+{
+  typedef Conv3DOptions TableType;
+  circle::Padding padding = circle::Padding_SAME;
+  int32_t stride_d = 0;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  int32_t dilation_d_factor = 1;
+  int32_t dilation_w_factor = 1;
+  int32_t dilation_h_factor = 1;
+};
+
+struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Conv3DOptionsT NativeTableType;
+  typedef Conv3DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_D = 6,
+    VT_STRIDE_W = 8,
+    VT_STRIDE_H = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_D_FACTOR = 14,
+    VT_DILATION_W_FACTOR = 16,
+    VT_DILATION_H_FACTOR = 18
+  };
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); }
+  int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+  int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_D) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+  }
+  Conv3DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Conv3DOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Conv3DOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Conv3DOptionsBuilder
+{
+  typedef Conv3DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(circle::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_d(int32_t stride_d)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_d_factor(int32_t dilation_d_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Conv3DOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Conv3DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+  Conv3DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_dilation_d_factor(dilation_d_factor);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_stride_d(stride_d);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Conv3DOptions>
+CreateConv3DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Pool2DOptionsT : public flatbuffers::NativeTable
+{
+  typedef Pool2DOptions TableType;
+  circle::Padding padding = circle::Padding_SAME;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  int32_t filter_width = 0;
+  int32_t filter_height = 0;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+};
+
+struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Pool2DOptionsT NativeTableType;
+  typedef Pool2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_FILTER_WIDTH = 10,
+    VT_FILTER_HEIGHT = 12,
+    VT_FUSED_ACTIVATION_FUNCTION = 14
+  };
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
+  int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
+           VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+  Pool2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Pool2DOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Pool2DOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Pool2DOptionsBuilder
+{
+  typedef Pool2DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(circle::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_filter_width(int32_t filter_width)
+  {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
+  }
+  void add_filter_height(int32_t filter_height)
+  {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Pool2DOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Pool2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  Pool2DOptionsBuilder builder_(_fbb);
+  builder_.add_filter_height(filter_height);
+  builder_.add_filter_width(filter_width);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Pool2DOptions>
+CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable
+{
+  typedef DepthwiseConv2DOptions TableType;
+  circle::Padding padding = circle::Padding_SAME;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  int32_t depth_multiplier = 0;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  int32_t dilation_w_factor = 1;
+  int32_t dilation_h_factor = 1;
+};
+
+struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DepthwiseConv2DOptionsT NativeTableType;
+  typedef DepthwiseConv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_DEPTH_MULTIPLIER = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_W_FACTOR = 14,
+    VT_DILATION_H_FACTOR = 16
+  };
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+  int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+  }
+  DepthwiseConv2DOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DepthwiseConv2DOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DepthwiseConv2DOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DepthwiseConv2DOptionsBuilder
+{
+  typedef DepthwiseConv2DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(circle::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_depth_multiplier(int32_t depth_multiplier)
+  {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor)
+  {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor)
+  {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+  DepthwiseConv2DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_depth_multiplier(depth_multiplier);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DepthwiseConv2DOptions>
+CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                             const DepthwiseConv2DOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ConcatEmbeddingsOptionsT : public flatbuffers::NativeTable
+{
+  typedef ConcatEmbeddingsOptions TableType;
+  int32_t num_channels = 0;
+  std::vector<int32_t> num_columns_per_channel{};
+  std::vector<int32_t> embedding_dim_per_channel{};
+};
+
+struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ConcatEmbeddingsOptionsT NativeTableType;
+  typedef ConcatEmbeddingsOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NUM_CHANNELS = 4,
+    VT_NUM_COLUMNS_PER_CHANNEL = 6,
+    VT_EMBEDDING_DIM_PER_CHANNEL = 8
+  };
+  int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); }
+  const flatbuffers::Vector<int32_t> *num_columns_per_channel() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL);
+  }
+  const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) &&
+           VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
+           verifier.VerifyVector(num_columns_per_channel()) &&
+           VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
+           verifier.VerifyVector(embedding_dim_per_channel()) && verifier.EndTable();
+  }
+  ConcatEmbeddingsOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ConcatEmbeddingsOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ConcatEmbeddingsOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ConcatEmbeddingsOptionsBuilder
+{
+  typedef ConcatEmbeddingsOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num_channels(int32_t num_channels)
+  {
+    fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
+  }
+  void add_num_columns_per_channel(
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+  {
+    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
+  }
+  void add_embedding_dim_per_channel(
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+  {
+    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
+                   embedding_dim_per_channel);
+  }
+  explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+{
+  ConcatEmbeddingsOptionsBuilder builder_(_fbb);
+  builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
+  builder_.add_num_columns_per_channel(num_columns_per_channel);
+  builder_.add_num_channels(num_channels);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+                                    const std::vector<int32_t> *num_columns_per_channel = nullptr,
+                                    const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
+{
+  auto num_columns_per_channel__ =
+    num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+  auto embedding_dim_per_channel__ =
+    embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+  return circle::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__,
+                                               embedding_dim_per_channel__);
+}
+
+flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                              const ConcatEmbeddingsOptionsT *_o,
+                              const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LSHProjectionOptionsT : public flatbuffers::NativeTable
+{
+  typedef LSHProjectionOptions TableType;
+  circle::LSHProjectionType type = circle::LSHProjectionType_UNKNOWN;
+};
+
+struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LSHProjectionOptionsT NativeTableType;
+  typedef LSHProjectionOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TYPE = 4
+  };
+  circle::LSHProjectionType type() const
+  {
+    return static_cast<circle::LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_TYPE) &&
+           verifier.EndTable();
+  }
+  LSHProjectionOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LSHProjectionOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LSHProjectionOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LSHProjectionOptionsBuilder
+{
+  typedef LSHProjectionOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_type(circle::LSHProjectionType type)
+  {
+    fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
+  }
+  explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LSHProjectionOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LSHProjectionOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LSHProjectionOptions>
+CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                           circle::LSHProjectionType type = circle::LSHProjectionType_UNKNOWN)
+{
+  LSHProjectionOptionsBuilder builder_(_fbb);
+  builder_.add_type(type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LSHProjectionOptions>
+CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SVDFOptionsT : public flatbuffers::NativeTable
+{
+  typedef SVDFOptions TableType;
+  int32_t rank = 0;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SVDFOptionsT NativeTableType;
+  typedef SVDFOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_RANK = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+  };
+  int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RANK) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  SVDFOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SVDFOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SVDFOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SVDFOptionsBuilder
+{
+  typedef SVDFOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SVDFOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SVDFOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
+{
+  SVDFOptionsBuilder builder_(_fbb);
+  builder_.add_rank(rank);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SVDFOptions>
+CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RNNOptionsT : public flatbuffers::NativeTable
+{
+  typedef RNNOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RNNOptionsT NativeTableType;
+  typedef RNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  RNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RNNOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RNNOptionsBuilder
+{
+  typedef RNNOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RNNOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RNNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
+{
+  RNNOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RNNOptions>
+CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SequenceRNNOptionsT : public flatbuffers::NativeTable
+{
+  typedef SequenceRNNOptions TableType;
+  bool time_major = false;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SequenceRNNOptionsT NativeTableType;
+  typedef SequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TIME_MAJOR = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+  };
+  bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  SequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SequenceRNNOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SequenceRNNOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SequenceRNNOptionsBuilder
+{
+  typedef SequenceRNNOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_time_major(bool time_major)
+  {
+    fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
+                             0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SequenceRNNOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SequenceRNNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
+{
+  SequenceRNNOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_time_major(time_major);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SequenceRNNOptions>
+CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BidirectionalSequenceRNNOptionsT : public flatbuffers::NativeTable
+{
+  typedef BidirectionalSequenceRNNOptions TableType;
+  bool time_major = false;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  bool merge_outputs = false;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BidirectionalSequenceRNNOptionsT NativeTableType;
+  typedef BidirectionalSequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TIME_MAJOR = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6,
+    VT_MERGE_OUTPUTS = 8,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
+  };
+  bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  BidirectionalSequenceRNNOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BidirectionalSequenceRNNOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BidirectionalSequenceRNNOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BidirectionalSequenceRNNOptionsBuilder
+{
+  typedef BidirectionalSequenceRNNOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_time_major(bool time_major)
+  {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
+                             static_cast<uint8_t>(time_major), 0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_merge_outputs(bool merge_outputs)
+  {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS,
+                             static_cast<uint8_t>(merge_outputs), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+{
+  BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_merge_outputs(merge_outputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_time_major(time_major);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BidirectionalSequenceRNNOptions>
+CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                      const BidirectionalSequenceRNNOptionsT *_o,
+                                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FullyConnectedOptionsT : public flatbuffers::NativeTable
+{
+  typedef FullyConnectedOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  circle::FullyConnectedOptionsWeightsFormat weights_format =
+    circle::FullyConnectedOptionsWeightsFormat_DEFAULT;
+  bool keep_num_dims = false;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef FullyConnectedOptionsT NativeTableType;
+  typedef FullyConnectedOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_WEIGHTS_FORMAT = 6,
+    VT_KEEP_NUM_DIMS = 8,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  circle::FullyConnectedOptionsWeightsFormat weights_format() const
+  {
+    return static_cast<circle::FullyConnectedOptionsWeightsFormat>(
+      GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+  }
+  bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT) &&
+           VerifyField<uint8_t>(verifier, VT_KEEP_NUM_DIMS) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  FullyConnectedOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FullyConnectedOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FullyConnectedOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FullyConnectedOptionsBuilder
+{
+  typedef FullyConnectedOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_weights_format(circle::FullyConnectedOptionsWeightsFormat weights_format)
+  {
+    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
+                            static_cast<int8_t>(weights_format), 0);
+  }
+  void add_keep_num_dims(bool keep_num_dims)
+  {
+    fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_KEEP_NUM_DIMS,
+                             static_cast<uint8_t>(keep_num_dims), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FullyConnectedOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FullyConnectedOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  circle::FullyConnectedOptionsWeightsFormat weights_format =
+    circle::FullyConnectedOptionsWeightsFormat_DEFAULT,
+  bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+{
+  FullyConnectedOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_keep_num_dims(keep_num_dims);
+  builder_.add_weights_format(weights_format);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FullyConnectedOptions>
+CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SoftmaxOptionsT : public flatbuffers::NativeTable
+{
+  typedef SoftmaxOptions TableType;
+  float beta = 0.0f;
+};
+
+struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SoftmaxOptionsT NativeTableType;
+  typedef SoftmaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_BETA = 4
+  };
+  float beta() const { return GetField<float>(VT_BETA, 0.0f); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_BETA) &&
+           verifier.EndTable();
+  }
+  SoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SoftmaxOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SoftmaxOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SoftmaxOptionsBuilder
+{
+  typedef SoftmaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
+  explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SoftmaxOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SoftmaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SoftmaxOptions>
+CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
+{
+  SoftmaxOptionsBuilder builder_(_fbb);
+  builder_.add_beta(beta);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SoftmaxOptions>
+CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ConcatenationOptionsT : public flatbuffers::NativeTable
+{
+  typedef ConcatenationOptions TableType;
+  int32_t axis = 0;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+};
+
+struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ConcatenationOptionsT NativeTableType;
+  typedef ConcatenationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_AXIS = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6
+  };
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+  ConcatenationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ConcatenationOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ConcatenationOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ConcatenationOptionsBuilder
+{
+  typedef ConcatenationOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ConcatenationOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ConcatenationOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  ConcatenationOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ConcatenationOptions>
+CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AddOptionsT : public flatbuffers::NativeTable
+{
+  typedef AddOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  bool pot_scale_int16 = true;
+};
+
+struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef AddOptionsT NativeTableType;
+  typedef AddOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
+  }
+  AddOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AddOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AddOptionsBuilder
+{
+  typedef AddOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
+  explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AddOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AddOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AddOptions> CreateAddOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool pot_scale_int16 = true)
+{
+  AddOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AddOptions>
+CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MulOptionsT : public flatbuffers::NativeTable
+{
+  typedef MulOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+};
+
+struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef MulOptionsT NativeTableType;
+  typedef MulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+  MulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MulOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MulOptionsBuilder
+{
+  typedef MulOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MulOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MulOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MulOptions> CreateMulOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  MulOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MulOptions>
+CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct L2NormOptionsT : public flatbuffers::NativeTable
+{
+  typedef L2NormOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+};
+
+struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef L2NormOptionsT NativeTableType;
+  typedef L2NormOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+  L2NormOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(L2NormOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<L2NormOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct L2NormOptionsBuilder
+{
+  typedef L2NormOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<L2NormOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<L2NormOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  L2NormOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<L2NormOptions>
+CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable
+{
+  typedef LocalResponseNormalizationOptions TableType;
+  int32_t radius = 0;
+  float bias = 0.0f;
+  float alpha = 0.0f;
+  float beta = 0.0f;
+};
+
+struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LocalResponseNormalizationOptionsT NativeTableType;
+  typedef LocalResponseNormalizationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_RADIUS = 4,
+    VT_BIAS = 6,
+    VT_ALPHA = 8,
+    VT_BETA = 10
+  };
+  int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); }
+  float bias() const { return GetField<float>(VT_BIAS, 0.0f); }
+  float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
+  float beta() const { return GetField<float>(VT_BETA, 0.0f); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_RADIUS) &&
+           VerifyField<float>(verifier, VT_BIAS) && VerifyField<float>(verifier, VT_ALPHA) &&
+           VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
+  }
+  LocalResponseNormalizationOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LocalResponseNormalizationOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LocalResponseNormalizationOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LocalResponseNormalizationOptionsBuilder
+{
+  typedef LocalResponseNormalizationOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_radius(int32_t radius)
+  {
+    fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
+  }
+  void add_bias(float bias)
+  {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f);
+  }
+  void add_alpha(float alpha)
+  {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f);
+  }
+  void add_beta(float beta)
+  {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
+  }
+  explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+    : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t radius = 0,
+                                        float bias = 0.0f, float alpha = 0.0f, float beta = 0.0f)
+{
+  LocalResponseNormalizationOptionsBuilder builder_(_fbb);
+  builder_.add_beta(beta);
+  builder_.add_alpha(alpha);
+  builder_.add_bias(bias);
+  builder_.add_radius(radius);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o,
+  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LSTMOptionsT : public flatbuffers::NativeTable
+{
+  typedef LSTMOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  float cell_clip = 0.0f;
+  float proj_clip = 0.0f;
+  circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LSTMOptionsT NativeTableType;
+  typedef LSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8,
+    VT_KERNEL_TYPE = 10,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+  float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+  circle::LSTMKernelType kernel_type() const
+  {
+    return static_cast<circle::LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
+  }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+           VerifyField<int8_t>(verifier, VT_KERNEL_TYPE) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  LSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LSTMOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LSTMOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LSTMOptionsBuilder
+{
+  typedef LSTMOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip)
+  {
+    fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip)
+  {
+    fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  void add_kernel_type(circle::LSTMKernelType kernel_type)
+  {
+    fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LSTMOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f,
+  circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL,
+  bool asymmetric_quantize_inputs = false)
+{
+  LSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_kernel_type(kernel_type);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LSTMOptions>
+CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable
+{
+  typedef UnidirectionalSequenceLSTMOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  float cell_clip = 0.0f;
+  float proj_clip = 0.0f;
+  bool time_major = false;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnidirectionalSequenceLSTMOptionsT NativeTableType;
+  typedef UnidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8,
+    VT_TIME_MAJOR = 10,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+  float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+  bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  UnidirectionalSequenceLSTMOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnidirectionalSequenceLSTMOptionsBuilder
+{
+  typedef UnidirectionalSequenceLSTMOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip)
+  {
+    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip)
+  {
+    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  void add_time_major(bool time_major)
+  {
+    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
+                             static_cast<uint8_t>(time_major), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+    : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
+CreateUnidirectionalSequenceLSTMOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+  bool asymmetric_quantize_inputs = false)
+{
+  UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_time_major(time_major);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o,
+  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable
+{
+  typedef BidirectionalSequenceLSTMOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  float cell_clip = 0.0f;
+  float proj_clip = 0.0f;
+  bool merge_outputs = false;
+  bool time_major = true;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BidirectionalSequenceLSTMOptionsT NativeTableType;
+  typedef BidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8,
+    VT_MERGE_OUTPUTS = 10,
+    VT_TIME_MAJOR = 12,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+  float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
+  bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
+  bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 1) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  BidirectionalSequenceLSTMOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BidirectionalSequenceLSTMOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BidirectionalSequenceLSTMOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BidirectionalSequenceLSTMOptionsBuilder
+{
+  typedef BidirectionalSequenceLSTMOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip)
+  {
+    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip)
+  {
+    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  void add_merge_outputs(bool merge_outputs)
+  {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS,
+                             static_cast<uint8_t>(merge_outputs), 0);
+  }
+  void add_time_major(bool time_major)
+  {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR,
+                             static_cast<uint8_t>(time_major), 1);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+    : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+  bool time_major = true, bool asymmetric_quantize_inputs = false)
+{
+  BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_time_major(time_major);
+  builder_.add_merge_outputs(merge_outputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BidirectionalSequenceLSTMOptions>
+CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                       const BidirectionalSequenceLSTMOptionsT *_o,
+                                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ResizeBilinearOptionsT : public flatbuffers::NativeTable
+{
+  typedef ResizeBilinearOptions TableType;
+  bool align_corners = false;
+  bool half_pixel_centers = false;
+};
+
+struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ResizeBilinearOptionsT NativeTableType;
+  typedef ResizeBilinearOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_ALIGN_CORNERS = 8,
+    VT_HALF_PIXEL_CENTERS = 10
+  };
+  bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+  bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
+  }
+  ResizeBilinearOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ResizeBilinearOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ResizeBilinearOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ResizeBilinearOptionsBuilder
+{
+  typedef ResizeBilinearOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_align_corners(bool align_corners)
+  {
+    fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS,
+                             static_cast<uint8_t>(align_corners), 0);
+  }
+  void add_half_pixel_centers(bool half_pixel_centers)
+  {
+    fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS,
+                             static_cast<uint8_t>(half_pixel_centers), 0);
+  }
+  explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ResizeBilinearOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ResizeBilinearOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ResizeBilinearOptions>
+CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+                            bool half_pixel_centers = false)
+{
+  ResizeBilinearOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
+  builder_.add_align_corners(align_corners);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ResizeBilinearOptions>
+CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ResizeNearestNeighborOptionsT : public flatbuffers::NativeTable
+{
+  typedef ResizeNearestNeighborOptions TableType;
+  bool align_corners = false;
+  bool half_pixel_centers = false;
+};
+
+struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ResizeNearestNeighborOptionsT NativeTableType;
+  typedef ResizeNearestNeighborOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_ALIGN_CORNERS = 4,
+    VT_HALF_PIXEL_CENTERS = 6
+  };
+  bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+  bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
+  }
+  ResizeNearestNeighborOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ResizeNearestNeighborOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ResizeNearestNeighborOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ResizeNearestNeighborOptionsBuilder
+{
+  typedef ResizeNearestNeighborOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_align_corners(bool align_corners)
+  {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
+                             static_cast<uint8_t>(align_corners), 0);
+  }
+  void add_half_pixel_centers(bool half_pixel_centers)
+  {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS,
+                             static_cast<uint8_t>(half_pixel_centers), 0);
+  }
+  explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ResizeNearestNeighborOptions>
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+                                   bool half_pixel_centers = false)
+{
+  ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
+  builder_.add_align_corners(align_corners);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ResizeNearestNeighborOptions>
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                   const ResizeNearestNeighborOptionsT *_o,
+                                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CallOptionsT : public flatbuffers::NativeTable
+{
+  typedef CallOptions TableType;
+  uint32_t subgraph = 0;
+};
+
+struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CallOptionsT NativeTableType;
+  typedef CallOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SUBGRAPH = 4
+  };
+  uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_SUBGRAPH) &&
+           verifier.EndTable();
+  }
+  CallOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CallOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CallOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CallOptionsBuilder
+{
+  typedef CallOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_subgraph(uint32_t subgraph)
+  {
+    fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
+  }
+  explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CallOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                          uint32_t subgraph = 0)
+{
+  CallOptionsBuilder builder_(_fbb);
+  builder_.add_subgraph(subgraph);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CallOptions>
+CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PadOptionsT : public flatbuffers::NativeTable
+{
+  typedef PadOptions TableType;
+};
+
+struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef PadOptionsT NativeTableType;
+  typedef PadOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  PadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PadOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PadOptionsBuilder
+{
+  typedef PadOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<PadOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PadOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  PadOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<PadOptions>
+CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PadV2OptionsT : public flatbuffers::NativeTable
+{
+  typedef PadV2Options TableType;
+};
+
+struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef PadV2OptionsT NativeTableType;
+  typedef PadV2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  PadV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(PadV2OptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PadV2Options>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PadV2OptionsBuilder
+{
+  typedef PadV2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<PadV2Options> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PadV2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  PadV2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<PadV2Options>
+CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReshapeOptionsT : public flatbuffers::NativeTable
+{
+  typedef ReshapeOptions TableType;
+  std::vector<int32_t> new_shape{};
+};
+
+struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReshapeOptionsT NativeTableType;
+  typedef ReshapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NEW_SHAPE = 4
+  };
+  const flatbuffers::Vector<int32_t> *new_shape() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) &&
+           verifier.VerifyVector(new_shape()) && verifier.EndTable();
+  }
+  ReshapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReshapeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReshapeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReshapeOptionsBuilder
+{
+  typedef ReshapeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
+  {
+    fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
+  }
+  explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReshapeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReshapeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReshapeOptions>
+CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                     flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0)
+{
+  ReshapeOptionsBuilder builder_(_fbb);
+  builder_.add_new_shape(new_shape);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ReshapeOptions>
+CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                           const std::vector<int32_t> *new_shape = nullptr)
+{
+  auto new_shape__ = new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0;
+  return circle::CreateReshapeOptions(_fbb, new_shape__);
+}
+
+flatbuffers::Offset<ReshapeOptions>
+CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SpaceToBatchNDOptionsT : public flatbuffers::NativeTable
+{
+  typedef SpaceToBatchNDOptions TableType;
+};
+
+struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SpaceToBatchNDOptionsT NativeTableType;
+  typedef SpaceToBatchNDOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  SpaceToBatchNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SpaceToBatchNDOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SpaceToBatchNDOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SpaceToBatchNDOptionsBuilder
+{
+  typedef SpaceToBatchNDOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SpaceToBatchNDOptions>
+CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  SpaceToBatchNDOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SpaceToBatchNDOptions>
+CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BatchToSpaceNDOptionsT : public flatbuffers::NativeTable
+{
+  typedef BatchToSpaceNDOptions TableType;
+};
+
+struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BatchToSpaceNDOptionsT NativeTableType;
+  typedef BatchToSpaceNDOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  BatchToSpaceNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BatchToSpaceNDOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BatchToSpaceNDOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BatchToSpaceNDOptionsBuilder
+{
+  typedef BatchToSpaceNDOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions>
+CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  BatchToSpaceNDOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BatchToSpaceNDOptions>
+CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SkipGramOptionsT : public flatbuffers::NativeTable
+{
+  typedef SkipGramOptions TableType;
+  int32_t ngram_size = 0;
+  int32_t max_skip_size = 0;
+  bool include_all_ngrams = false;
+};
+
+struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SkipGramOptionsT NativeTableType;
+  typedef SkipGramOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NGRAM_SIZE = 4,
+    VT_MAX_SKIP_SIZE = 6,
+    VT_INCLUDE_ALL_NGRAMS = 8
+  };
+  int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); }
+  int32_t max_skip_size() const { return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0); }
+  bool include_all_ngrams() const { return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
+           VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
+           VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) && verifier.EndTable();
+  }
+  SkipGramOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SkipGramOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SkipGramOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SkipGramOptionsBuilder
+{
+  typedef SkipGramOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_ngram_size(int32_t ngram_size)
+  {
+    fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
+  }
+  void add_max_skip_size(int32_t max_skip_size)
+  {
+    fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
+  }
+  void add_include_all_ngrams(bool include_all_ngrams)
+  {
+    fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS,
+                             static_cast<uint8_t>(include_all_ngrams), 0);
+  }
+  explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SkipGramOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SkipGramOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SkipGramOptions>
+CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0,
+                      int32_t max_skip_size = 0, bool include_all_ngrams = false)
+{
+  SkipGramOptionsBuilder builder_(_fbb);
+  builder_.add_max_skip_size(max_skip_size);
+  builder_.add_ngram_size(ngram_size);
+  builder_.add_include_all_ngrams(include_all_ngrams);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SkipGramOptions>
+CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SpaceToDepthOptionsT : public flatbuffers::NativeTable
+{
+  typedef SpaceToDepthOptions TableType;
+  int32_t block_size = 0;
+};
+
+struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SpaceToDepthOptionsT NativeTableType;
+  typedef SpaceToDepthOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_BLOCK_SIZE = 4
+  };
+  int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
+           verifier.EndTable();
+  }
+  SpaceToDepthOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SpaceToDepthOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SpaceToDepthOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SpaceToDepthOptionsBuilder
+{
+  typedef SpaceToDepthOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_block_size(int32_t block_size)
+  {
+    fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
+  }
+  explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SpaceToDepthOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SpaceToDepthOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SpaceToDepthOptions>
+CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0)
+{
+  SpaceToDepthOptionsBuilder builder_(_fbb);
+  builder_.add_block_size(block_size);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SpaceToDepthOptions>
+CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DepthToSpaceOptionsT : public flatbuffers::NativeTable
+{
+  typedef DepthToSpaceOptions TableType;
+  int32_t block_size = 0;
+};
+
+struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DepthToSpaceOptionsT NativeTableType;
+  typedef DepthToSpaceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_BLOCK_SIZE = 4
+  };
+  int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
+           verifier.EndTable();
+  }
+  DepthToSpaceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DepthToSpaceOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DepthToSpaceOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DepthToSpaceOptionsBuilder
+{
+  typedef DepthToSpaceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_block_size(int32_t block_size)
+  {
+    fbb_.AddElement<int32_t>(DepthToSpaceOptions::VT_BLOCK_SIZE, block_size, 0);
+  }
+  explicit DepthToSpaceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DepthToSpaceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DepthToSpaceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DepthToSpaceOptions>
+CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0)
+{
+  DepthToSpaceOptionsBuilder builder_(_fbb);
+  builder_.add_block_size(block_size);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DepthToSpaceOptions>
+CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SubOptionsT : public flatbuffers::NativeTable
+{
+  typedef SubOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+  bool pot_scale_int16 = true;
+};
+
+struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SubOptionsT NativeTableType;
+  typedef SubOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
+  }
+  SubOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SubOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SubOptionsBuilder
+{
+  typedef SubOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
+  explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SubOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SubOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SubOptions> CreateSubOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool pot_scale_int16 = true)
+{
+  SubOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SubOptions>
+CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DivOptionsT : public flatbuffers::NativeTable
+{
+  typedef DivOptions TableType;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+};
+
+struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DivOptionsT NativeTableType;
+  typedef DivOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_FUSED_ACTIVATION_FUNCTION = 4
+  };
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+  DivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DivOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DivOptionsBuilder
+{
+  typedef DivOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DivOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DivOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DivOptions> CreateDivOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  DivOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DivOptions>
+CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TopKV2OptionsT : public flatbuffers::NativeTable
+{
+  typedef TopKV2Options TableType;
+};
+
+struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TopKV2OptionsT NativeTableType;
+  typedef TopKV2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  TopKV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TopKV2OptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TopKV2Options>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TopKV2OptionsBuilder
+{
+  typedef TopKV2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TopKV2Options> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TopKV2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  TopKV2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<TopKV2Options>
+CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct EmbeddingLookupSparseOptionsT : public flatbuffers::NativeTable
+{
+  typedef EmbeddingLookupSparseOptions TableType;
+  circle::CombinerType combiner = circle::CombinerType_SUM;
+};
+
+struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef EmbeddingLookupSparseOptionsT NativeTableType;
+  typedef EmbeddingLookupSparseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_COMBINER = 4
+  };
+  circle::CombinerType combiner() const
+  {
+    return static_cast<circle::CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_COMBINER) &&
+           verifier.EndTable();
+  }
+  EmbeddingLookupSparseOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(EmbeddingLookupSparseOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<EmbeddingLookupSparseOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct EmbeddingLookupSparseOptionsBuilder
+{
+  typedef EmbeddingLookupSparseOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_combiner(circle::CombinerType combiner)
+  {
+    fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
+                            static_cast<int8_t>(combiner), 0);
+  }
+  explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                   circle::CombinerType combiner = circle::CombinerType_SUM)
+{
+  EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
+  builder_.add_combiner(combiner);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                   const EmbeddingLookupSparseOptionsT *_o,
+                                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GatherOptionsT : public flatbuffers::NativeTable
+{
+  typedef GatherOptions TableType;
+  int32_t axis = 0;
+  int32_t batch_dims = 0;
+};
+
+struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef GatherOptionsT NativeTableType;
+  typedef GatherOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_AXIS = 4,
+    VT_BATCH_DIMS = 6
+  };
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+           VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable();
+  }
+  GatherOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GatherOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GatherOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GatherOptionsBuilder
+{
+  typedef GatherOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+  void add_batch_dims(int32_t batch_dims)
+  {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
+  }
+  explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GatherOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GatherOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0)
+{
+  GatherOptionsBuilder builder_(_fbb);
+  builder_.add_batch_dims(batch_dims);
+  builder_.add_axis(axis);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TransposeOptionsT : public flatbuffers::NativeTable
+{
+  typedef TransposeOptions TableType;
+};
+
+struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TransposeOptionsT NativeTableType;
+  typedef TransposeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  TransposeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TransposeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TransposeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TransposeOptionsBuilder
+{
+  typedef TransposeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TransposeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TransposeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TransposeOptions>
+CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  TransposeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<TransposeOptions>
+CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ExpOptionsT : public flatbuffers::NativeTable
+{
+  typedef ExpOptions TableType;
+};
+
+struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ExpOptionsT NativeTableType;
+  typedef ExpOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  ExpOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ExpOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ExpOptionsBuilder
+{
+  typedef ExpOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ExpOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ExpOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ExpOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ExpOptions>
+CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CosOptionsT : public flatbuffers::NativeTable
+{
+  typedef CosOptions TableType;
+};
+
+struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CosOptionsT NativeTableType;
+  typedef CosOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  CosOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CosOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CosOptionsBuilder
+{
+  typedef CosOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CosOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CosOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  CosOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CosOptions>
+CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReducerOptionsT : public flatbuffers::NativeTable
+{
+  typedef ReducerOptions TableType;
+  bool keep_dims = false;
+};
+
+struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReducerOptionsT NativeTableType;
+  typedef ReducerOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_KEEP_DIMS = 4
+  };
+  bool keep_dims() const { return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) &&
+           verifier.EndTable();
+  }
+  ReducerOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReducerOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReducerOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReducerOptionsBuilder
+{
+  typedef ReducerOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_keep_dims(bool keep_dims)
+  {
+    fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0);
+  }
+  explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReducerOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReducerOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReducerOptions>
+CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = false)
+{
+  ReducerOptionsBuilder builder_(_fbb);
+  builder_.add_keep_dims(keep_dims);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ReducerOptions>
+CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SqueezeOptionsT : public flatbuffers::NativeTable
+{
+  typedef SqueezeOptions TableType;
+  std::vector<int32_t> squeeze_dims{};
+};
+
+struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SqueezeOptionsT NativeTableType;
+  typedef SqueezeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SQUEEZE_DIMS = 4
+  };
+  const flatbuffers::Vector<int32_t> *squeeze_dims() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
+           verifier.VerifyVector(squeeze_dims()) && verifier.EndTable();
+  }
+  SqueezeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SqueezeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SqueezeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SqueezeOptionsBuilder
+{
+  typedef SqueezeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
+  {
+    fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims);
+  }
+  explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SqueezeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SqueezeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SqueezeOptions>
+CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                     flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0)
+{
+  SqueezeOptionsBuilder builder_(_fbb);
+  builder_.add_squeeze_dims(squeeze_dims);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SqueezeOptions>
+CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                           const std::vector<int32_t> *squeeze_dims = nullptr)
+{
+  auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0;
+  return circle::CreateSqueezeOptions(_fbb, squeeze_dims__);
+}
+
+flatbuffers::Offset<SqueezeOptions>
+CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SplitOptionsT : public flatbuffers::NativeTable
+{
+  typedef SplitOptions TableType;
+  int32_t num_splits = 0;
+};
+
+struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SplitOptionsT NativeTableType;
+  typedef SplitOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NUM_SPLITS = 4
+  };
+  int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
+           verifier.EndTable();
+  }
+  SplitOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SplitOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SplitOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SplitOptionsBuilder
+{
+  typedef SplitOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num_splits(int32_t num_splits)
+  {
+    fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0);
+  }
+  explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SplitOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SplitOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                            int32_t num_splits = 0)
+{
+  SplitOptionsBuilder builder_(_fbb);
+  builder_.add_num_splits(num_splits);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SplitOptions>
+CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SplitVOptionsT : public flatbuffers::NativeTable
+{
+  typedef SplitVOptions TableType;
+  int32_t num_splits = 0;
+};
+
+struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SplitVOptionsT NativeTableType;
+  typedef SplitVOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NUM_SPLITS = 4
+  };
+  int32_t num_splits() const { return GetField<int32_t>(VT_NUM_SPLITS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
+           verifier.EndTable();
+  }
+  SplitVOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SplitVOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SplitVOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SplitVOptionsBuilder
+{
+  typedef SplitVOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num_splits(int32_t num_splits)
+  {
+    fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0);
+  }
+  explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SplitVOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SplitVOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              int32_t num_splits = 0)
+{
+  SplitVOptionsBuilder builder_(_fbb);
+  builder_.add_num_splits(num_splits);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SplitVOptions>
+CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct StridedSliceOptionsT : public flatbuffers::NativeTable
+{
+  typedef StridedSliceOptions TableType;
+  int32_t begin_mask = 0;
+  int32_t end_mask = 0;
+  int32_t ellipsis_mask = 0;
+  int32_t new_axis_mask = 0;
+  int32_t shrink_axis_mask = 0;
+};
+
+struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef StridedSliceOptionsT NativeTableType;
+  typedef StridedSliceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_BEGIN_MASK = 4,
+    VT_END_MASK = 6,
+    VT_ELLIPSIS_MASK = 8,
+    VT_NEW_AXIS_MASK = 10,
+    VT_SHRINK_AXIS_MASK = 12
+  };
+  int32_t begin_mask() const { return GetField<int32_t>(VT_BEGIN_MASK, 0); }
+  int32_t end_mask() const { return GetField<int32_t>(VT_END_MASK, 0); }
+  int32_t ellipsis_mask() const { return GetField<int32_t>(VT_ELLIPSIS_MASK, 0); }
+  int32_t new_axis_mask() const { return GetField<int32_t>(VT_NEW_AXIS_MASK, 0); }
+  int32_t shrink_axis_mask() const { return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_BEGIN_MASK) &&
+           VerifyField<int32_t>(verifier, VT_END_MASK) &&
+           VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK) &&
+           VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK) &&
+           VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK) && verifier.EndTable();
+  }
+  StridedSliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(StridedSliceOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<StridedSliceOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct StridedSliceOptionsBuilder
+{
+  typedef StridedSliceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_begin_mask(int32_t begin_mask)
+  {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0);
+  }
+  void add_end_mask(int32_t end_mask)
+  {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0);
+  }
+  void add_ellipsis_mask(int32_t ellipsis_mask)
+  {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0);
+  }
+  void add_new_axis_mask(int32_t new_axis_mask)
+  {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0);
+  }
+  void add_shrink_axis_mask(int32_t shrink_axis_mask)
+  {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0);
+  }
+  explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<StridedSliceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<StridedSliceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<StridedSliceOptions>
+CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_mask = 0,
+                          int32_t end_mask = 0, int32_t ellipsis_mask = 0,
+                          int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0)
+{
+  StridedSliceOptionsBuilder builder_(_fbb);
+  builder_.add_shrink_axis_mask(shrink_axis_mask);
+  builder_.add_new_axis_mask(new_axis_mask);
+  builder_.add_ellipsis_mask(ellipsis_mask);
+  builder_.add_end_mask(end_mask);
+  builder_.add_begin_mask(begin_mask);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<StridedSliceOptions>
+CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LogSoftmaxOptionsT : public flatbuffers::NativeTable
+{
+  typedef LogSoftmaxOptions TableType;
+};
+
+struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LogSoftmaxOptionsT NativeTableType;
+  typedef LogSoftmaxOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  LogSoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LogSoftmaxOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LogSoftmaxOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LogSoftmaxOptionsBuilder
+{
+  typedef LogSoftmaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LogSoftmaxOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LogSoftmaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LogSoftmaxOptions>
+CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  LogSoftmaxOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LogSoftmaxOptions>
+CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CastOptionsT : public flatbuffers::NativeTable
+{
+  typedef CastOptions TableType;
+  circle::TensorType in_data_type = circle::TensorType_FLOAT32;
+  circle::TensorType out_data_type = circle::TensorType_FLOAT32;
+};
+
+struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CastOptionsT NativeTableType;
+  typedef CastOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_IN_DATA_TYPE = 4,
+    VT_OUT_DATA_TYPE = 6
+  };
+  circle::TensorType in_data_type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
+  }
+  circle::TensorType out_data_type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE) &&
+           VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE) && verifier.EndTable();
+  }
+  CastOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CastOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CastOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CastOptionsBuilder
+{
+  typedef CastOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_in_data_type(circle::TensorType in_data_type)
+  {
+    fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
+  }
+  void add_out_data_type(circle::TensorType out_data_type)
+  {
+    fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
+  }
+  explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CastOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CastOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CastOptions>
+CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                  circle::TensorType in_data_type = circle::TensorType_FLOAT32,
+                  circle::TensorType out_data_type = circle::TensorType_FLOAT32)
+{
+  CastOptionsBuilder builder_(_fbb);
+  builder_.add_out_data_type(out_data_type);
+  builder_.add_in_data_type(in_data_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CastOptions>
+CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DequantizeOptionsT : public flatbuffers::NativeTable
+{
+  typedef DequantizeOptions TableType;
+};
+
+struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DequantizeOptionsT NativeTableType;
+  typedef DequantizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  DequantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DequantizeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DequantizeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DequantizeOptionsBuilder
+{
+  typedef DequantizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DequantizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DequantizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DequantizeOptions>
+CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  DequantizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DequantizeOptions>
+CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MaximumMinimumOptionsT : public flatbuffers::NativeTable
+{
+  typedef MaximumMinimumOptions TableType;
+};
+
+struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef MaximumMinimumOptionsT NativeTableType;
+  typedef MaximumMinimumOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  MaximumMinimumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MaximumMinimumOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MaximumMinimumOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MaximumMinimumOptionsBuilder
+{
+  typedef MaximumMinimumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MaximumMinimumOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MaximumMinimumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MaximumMinimumOptions>
+CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  MaximumMinimumOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MaximumMinimumOptions>
+CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TileOptionsT : public flatbuffers::NativeTable
+{
+  typedef TileOptions TableType;
+};
+
+struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TileOptionsT NativeTableType;
+  typedef TileOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  TileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TileOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TileOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TileOptionsBuilder
+{
+  typedef TileOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TileOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TileOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  TileOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<TileOptions>
+CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ArgMaxOptionsT : public flatbuffers::NativeTable
+{
+  typedef ArgMaxOptions TableType;
+  circle::TensorType output_type = circle::TensorType_FLOAT32;
+};
+
+struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ArgMaxOptionsT NativeTableType;
+  typedef ArgMaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_OUTPUT_TYPE = 4
+  };
+  circle::TensorType output_type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
+           verifier.EndTable();
+  }
+  ArgMaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ArgMaxOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ArgMaxOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ArgMaxOptionsBuilder
+{
+  typedef ArgMaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_output_type(circle::TensorType output_type)
+  {
+    fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
+  }
+  explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ArgMaxOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ArgMaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ArgMaxOptions>
+CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                    circle::TensorType output_type = circle::TensorType_FLOAT32)
+{
+  ArgMaxOptionsBuilder builder_(_fbb);
+  builder_.add_output_type(output_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ArgMaxOptions>
+CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ArgMinOptionsT : public flatbuffers::NativeTable
+{
+  typedef ArgMinOptions TableType;
+  circle::TensorType output_type = circle::TensorType_FLOAT32;
+};
+
+struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ArgMinOptionsT NativeTableType;
+  typedef ArgMinOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_OUTPUT_TYPE = 4
+  };
+  circle::TensorType output_type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
+           verifier.EndTable();
+  }
+  ArgMinOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ArgMinOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ArgMinOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ArgMinOptionsBuilder
+{
+  typedef ArgMinOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_output_type(circle::TensorType output_type)
+  {
+    fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
+  }
+  explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ArgMinOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ArgMinOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ArgMinOptions>
+CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                    circle::TensorType output_type = circle::TensorType_FLOAT32)
+{
+  ArgMinOptionsBuilder builder_(_fbb);
+  builder_.add_output_type(output_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ArgMinOptions>
+CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GreaterOptionsT : public flatbuffers::NativeTable
+{
+  typedef GreaterOptions TableType;
+};
+
+struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef GreaterOptionsT NativeTableType;
+  typedef GreaterOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  GreaterOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GreaterOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GreaterOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GreaterOptionsBuilder
+{
+  typedef GreaterOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GreaterOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GreaterOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GreaterOptions>
+CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  GreaterOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GreaterOptions>
+CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GreaterEqualOptionsT : public flatbuffers::NativeTable
+{
+  typedef GreaterEqualOptions TableType;
+};
+
+struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef GreaterEqualOptionsT NativeTableType;
+  typedef GreaterEqualOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  GreaterEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GreaterEqualOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GreaterEqualOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GreaterEqualOptionsBuilder
+{
+  typedef GreaterEqualOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GreaterEqualOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GreaterEqualOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GreaterEqualOptions>
+CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  GreaterEqualOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GreaterEqualOptions>
+CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LessOptionsT : public flatbuffers::NativeTable
+{
+  typedef LessOptions TableType;
+};
+
+struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LessOptionsT NativeTableType;
+  typedef LessOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  LessOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LessOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LessOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LessOptionsBuilder
+{
+  typedef LessOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LessOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LessOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  LessOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LessOptions>
+CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LessEqualOptionsT : public flatbuffers::NativeTable
+{
+  typedef LessEqualOptions TableType;
+};
+
+struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LessEqualOptionsT NativeTableType;
+  typedef LessEqualOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  LessEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LessEqualOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LessEqualOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LessEqualOptionsBuilder
+{
+  typedef LessEqualOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LessEqualOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LessEqualOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LessEqualOptions>
+CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  LessEqualOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LessEqualOptions>
+CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct NegOptionsT : public flatbuffers::NativeTable
+{
+  typedef NegOptions TableType;
+};
+
+struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef NegOptionsT NativeTableType;
+  typedef NegOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  NegOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(NegOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<NegOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct NegOptionsBuilder
+{
+  typedef NegOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<NegOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<NegOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  NegOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<NegOptions>
+CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SelectOptionsT : public flatbuffers::NativeTable
+{
+  typedef SelectOptions TableType;
+};
+
+struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SelectOptionsT NativeTableType;
+  typedef SelectOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  SelectOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SelectOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SelectOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SelectOptionsBuilder
+{
+  typedef SelectOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SelectOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SelectOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  SelectOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SelectOptions>
+CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SliceOptionsT : public flatbuffers::NativeTable
+{
+  typedef SliceOptions TableType;
+};
+
+struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SliceOptionsT NativeTableType;
+  typedef SliceOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  SliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SliceOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SliceOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SliceOptionsBuilder
+{
+  typedef SliceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SliceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SliceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  SliceOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SliceOptions>
+CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TransposeConvOptionsT : public flatbuffers::NativeTable
+{
+  typedef TransposeConvOptions TableType;
+  circle::Padding padding = circle::Padding_SAME;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+};
+
+struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TransposeConvOptionsT NativeTableType;
+  typedef TransposeConvOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8
+  };
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) && verifier.EndTable();
+  }
+  TransposeConvOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TransposeConvOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TransposeConvOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TransposeConvOptionsBuilder
+{
+  typedef TransposeConvOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(circle::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TransposeConvOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TransposeConvOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TransposeConvOptions>
+CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                           circle::Padding padding = circle::Padding_SAME, int32_t stride_w = 0,
+                           int32_t stride_h = 0)
+{
+  TransposeConvOptionsBuilder builder_(_fbb);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<TransposeConvOptions>
+CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ExpandDimsOptionsT : public flatbuffers::NativeTable
+{
+  typedef ExpandDimsOptions TableType;
+};
+
+struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ExpandDimsOptionsT NativeTableType;
+  typedef ExpandDimsOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  ExpandDimsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ExpandDimsOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ExpandDimsOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ExpandDimsOptionsBuilder
+{
+  typedef ExpandDimsOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ExpandDimsOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ExpandDimsOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ExpandDimsOptions>
+CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ExpandDimsOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ExpandDimsOptions>
+CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SparseToDenseOptionsT : public flatbuffers::NativeTable
+{
+  typedef SparseToDenseOptions TableType;
+  bool validate_indices = false;
+};
+
+struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SparseToDenseOptionsT NativeTableType;
+  typedef SparseToDenseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_VALIDATE_INDICES = 4
+  };
+  bool validate_indices() const { return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES) &&
+           verifier.EndTable();
+  }
+  SparseToDenseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SparseToDenseOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SparseToDenseOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SparseToDenseOptionsBuilder
+{
+  typedef SparseToDenseOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_validate_indices(bool validate_indices)
+  {
+    fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES,
+                             static_cast<uint8_t>(validate_indices), 0);
+  }
+  explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SparseToDenseOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SparseToDenseOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SparseToDenseOptions>
+CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_indices = false)
+{
+  SparseToDenseOptionsBuilder builder_(_fbb);
+  builder_.add_validate_indices(validate_indices);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SparseToDenseOptions>
+CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct EqualOptionsT : public flatbuffers::NativeTable
+{
+  typedef EqualOptions TableType;
+};
+
+struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef EqualOptionsT NativeTableType;
+  typedef EqualOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  EqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(EqualOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<EqualOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct EqualOptionsBuilder
+{
+  typedef EqualOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<EqualOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<EqualOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  EqualOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<EqualOptions>
+CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct NotEqualOptionsT : public flatbuffers::NativeTable
+{
+  typedef NotEqualOptions TableType;
+};
+
+struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef NotEqualOptionsT NativeTableType;
+  typedef NotEqualOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  NotEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(NotEqualOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<NotEqualOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct NotEqualOptionsBuilder
+{
+  typedef NotEqualOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<NotEqualOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<NotEqualOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<NotEqualOptions>
+CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  NotEqualOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<NotEqualOptions>
+CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ShapeOptionsT : public flatbuffers::NativeTable
+{
+  typedef ShapeOptions TableType;
+  circle::TensorType out_type = circle::TensorType_FLOAT32;
+};
+
+struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ShapeOptionsT NativeTableType;
+  typedef ShapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_OUT_TYPE = 4
+  };
+  circle::TensorType out_type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
+           verifier.EndTable();
+  }
+  ShapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ShapeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ShapeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ShapeOptionsBuilder
+{
+  typedef ShapeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_out_type(circle::TensorType out_type)
+  {
+    fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
+  }
+  explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ShapeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ShapeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ShapeOptions>
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                   circle::TensorType out_type = circle::TensorType_FLOAT32)
+{
+  ShapeOptionsBuilder builder_(_fbb);
+  builder_.add_out_type(out_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ShapeOptions>
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RankOptionsT : public flatbuffers::NativeTable
+{
+  typedef RankOptions TableType;
+};
+
+struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RankOptionsT NativeTableType;
+  typedef RankOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  RankOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RankOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RankOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RankOptionsBuilder
+{
+  typedef RankOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RankOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RankOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  RankOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RankOptions>
+CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PowOptionsT : public flatbuffers::NativeTable
+{
+  typedef PowOptions TableType;
+};
+
+struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef PowOptionsT NativeTableType;
+  typedef PowOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  PowOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(PowOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PowOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PowOptionsBuilder
+{
+  typedef PowOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<PowOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PowOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  PowOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<PowOptions>
+CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FakeQuantOptionsT : public flatbuffers::NativeTable
+{
+  typedef FakeQuantOptions TableType;
+  float min = 0.0f;
+  float max = 0.0f;
+  int32_t num_bits = 0;
+  bool narrow_range = false;
+};
+
+struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef FakeQuantOptionsT NativeTableType;
+  typedef FakeQuantOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_MIN = 4,
+    VT_MAX = 6,
+    VT_NUM_BITS = 8,
+    VT_NARROW_RANGE = 10
+  };
+  float min() const { return GetField<float>(VT_MIN, 0.0f); }
+  float max() const { return GetField<float>(VT_MAX, 0.0f); }
+  int32_t num_bits() const { return GetField<int32_t>(VT_NUM_BITS, 0); }
+  bool narrow_range() const { return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_MIN) &&
+           VerifyField<float>(verifier, VT_MAX) && VerifyField<int32_t>(verifier, VT_NUM_BITS) &&
+           VerifyField<uint8_t>(verifier, VT_NARROW_RANGE) && verifier.EndTable();
+  }
+  FakeQuantOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FakeQuantOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FakeQuantOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FakeQuantOptionsBuilder
+{
+  typedef FakeQuantOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
+  void add_max(float max) { fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f); }
+  void add_num_bits(int32_t num_bits)
+  {
+    fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0);
+  }
+  void add_narrow_range(bool narrow_range)
+  {
+    fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range),
+                             0);
+  }
+  explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FakeQuantOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FakeQuantOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FakeQuantOptions>
+CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, float max = 0.0f,
+                       int32_t num_bits = 0, bool narrow_range = false)
+{
+  FakeQuantOptionsBuilder builder_(_fbb);
+  builder_.add_num_bits(num_bits);
+  builder_.add_max(max);
+  builder_.add_min(min);
+  builder_.add_narrow_range(narrow_range);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FakeQuantOptions>
+CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PackOptionsT : public flatbuffers::NativeTable
+{
+  typedef PackOptions TableType;
+  int32_t values_count = 0;
+  int32_t axis = 0;
+};
+
+struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef PackOptionsT NativeTableType;
+  typedef PackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_VALUES_COUNT = 4,
+    VT_AXIS = 6
+  };
+  int32_t values_count() const { return GetField<int32_t>(VT_VALUES_COUNT, 0); }
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_VALUES_COUNT) &&
+           VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+  }
+  PackOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(PackOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PackOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PackOptionsBuilder
+{
+  typedef PackOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_values_count(int32_t values_count)
+  {
+    fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0);
+  }
+  void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0); }
+  explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<PackOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PackOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PackOptions>
+CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0, int32_t axis = 0)
+{
+  PackOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_values_count(values_count);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<PackOptions>
+CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LogicalOrOptionsT : public flatbuffers::NativeTable
+{
+  typedef LogicalOrOptions TableType;
+};
+
+struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LogicalOrOptionsT NativeTableType;
+  typedef LogicalOrOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  LogicalOrOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LogicalOrOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LogicalOrOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LogicalOrOptionsBuilder
+{
+  typedef LogicalOrOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LogicalOrOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LogicalOrOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LogicalOrOptions>
+CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  LogicalOrOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LogicalOrOptions>
+CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct OneHotOptionsT : public flatbuffers::NativeTable
+{
+  typedef OneHotOptions TableType;
+  int32_t axis = 0;
+};
+
+struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef OneHotOptionsT NativeTableType;
+  typedef OneHotOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_AXIS = 4
+  };
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
+           verifier.EndTable();
+  }
+  OneHotOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(OneHotOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<OneHotOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct OneHotOptionsBuilder
+{
+  typedef OneHotOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
+  explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<OneHotOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<OneHotOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              int32_t axis = 0)
+{
+  OneHotOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<OneHotOptions>
+CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AbsOptionsT : public flatbuffers::NativeTable
+{
+  typedef AbsOptions TableType;
+};
+
+struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef AbsOptionsT NativeTableType;
+  typedef AbsOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  AbsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(AbsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AbsOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AbsOptionsBuilder
+{
+  typedef AbsOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AbsOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AbsOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  AbsOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AbsOptions>
+CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HardSwishOptionsT : public flatbuffers::NativeTable
+{
+  typedef HardSwishOptions TableType;
+};
+
+struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HardSwishOptionsT NativeTableType;
+  typedef HardSwishOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  HardSwishOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HardSwishOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HardSwishOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HardSwishOptionsBuilder
+{
+  typedef HardSwishOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HardSwishOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HardSwishOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HardSwishOptions>
+CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HardSwishOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HardSwishOptions>
+CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LogicalAndOptionsT : public flatbuffers::NativeTable
+{
+  typedef LogicalAndOptions TableType;
+};
+
+struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LogicalAndOptionsT NativeTableType;
+  typedef LogicalAndOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  LogicalAndOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LogicalAndOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LogicalAndOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LogicalAndOptionsBuilder
+{
+  typedef LogicalAndOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LogicalAndOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LogicalAndOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LogicalAndOptions>
+CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  LogicalAndOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LogicalAndOptions>
+CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LogicalNotOptionsT : public flatbuffers::NativeTable
+{
+  typedef LogicalNotOptions TableType;
+};
+
+struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LogicalNotOptionsT NativeTableType;
+  typedef LogicalNotOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  LogicalNotOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LogicalNotOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LogicalNotOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LogicalNotOptionsBuilder
+{
+  typedef LogicalNotOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LogicalNotOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LogicalNotOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LogicalNotOptions>
+CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  LogicalNotOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LogicalNotOptions>
+CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UnpackOptionsT : public flatbuffers::NativeTable
+{
+  typedef UnpackOptions TableType;
+  int32_t num = 0;
+  int32_t axis = 0;
+};
+
+struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnpackOptionsT NativeTableType;
+  typedef UnpackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NUM = 4,
+    VT_AXIS = 6
+  };
+  int32_t num() const { return GetField<int32_t>(VT_NUM, 0); }
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_NUM) &&
+           VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+  }
+  UnpackOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnpackOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnpackOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnpackOptionsBuilder
+{
+  typedef UnpackOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
+  void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0); }
+  explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnpackOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnpackOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              int32_t num = 0, int32_t axis = 0)
+{
+  UnpackOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_num(num);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnpackOptions>
+CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FloorDivOptionsT : public flatbuffers::NativeTable
+{
+  typedef FloorDivOptions TableType;
+};
+
+struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef FloorDivOptionsT NativeTableType;
+  typedef FloorDivOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  FloorDivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FloorDivOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FloorDivOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FloorDivOptionsBuilder
+{
+  typedef FloorDivOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FloorDivOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FloorDivOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FloorDivOptions>
+CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  FloorDivOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FloorDivOptions>
+CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SquareOptionsT : public flatbuffers::NativeTable
+{
+  typedef SquareOptions TableType;
+};
+
+struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SquareOptionsT NativeTableType;
+  typedef SquareOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  SquareOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SquareOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SquareOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SquareOptionsBuilder
+{
+  typedef SquareOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SquareOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SquareOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  SquareOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SquareOptions>
+CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ZerosLikeOptionsT : public flatbuffers::NativeTable
+{
+  typedef ZerosLikeOptions TableType;
+};
+
+struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ZerosLikeOptionsT NativeTableType;
+  typedef ZerosLikeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  ZerosLikeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ZerosLikeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ZerosLikeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ZerosLikeOptionsBuilder
+{
+  typedef ZerosLikeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ZerosLikeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ZerosLikeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ZerosLikeOptions>
+CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ZerosLikeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ZerosLikeOptions>
+CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FillOptionsT : public flatbuffers::NativeTable
+{
+  typedef FillOptions TableType;
+};
+
+struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef FillOptionsT NativeTableType;
+  typedef FillOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  FillOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FillOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FillOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FillOptionsBuilder
+{
+  typedef FillOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FillOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FillOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  FillOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FillOptions>
+CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FloorModOptionsT : public flatbuffers::NativeTable
+{
+  typedef FloorModOptions TableType;
+};
+
+struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef FloorModOptionsT NativeTableType;
+  typedef FloorModOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  FloorModOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FloorModOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FloorModOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FloorModOptionsBuilder
+{
+  typedef FloorModOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FloorModOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FloorModOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FloorModOptions>
+CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  FloorModOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FloorModOptions>
+CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RangeOptionsT : public flatbuffers::NativeTable
+{
+  typedef RangeOptions TableType;
+};
+
+struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RangeOptionsT NativeTableType;
+  typedef RangeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  RangeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RangeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RangeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RangeOptionsBuilder
+{
+  typedef RangeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RangeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RangeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  RangeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RangeOptions>
+CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LeakyReluOptionsT : public flatbuffers::NativeTable
+{
+  typedef LeakyReluOptions TableType;
+  float alpha = 0.0f;
+};
+
+struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef LeakyReluOptionsT NativeTableType;
+  typedef LeakyReluOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_ALPHA = 4
+  };
+  float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_ALPHA) &&
+           verifier.EndTable();
+  }
+  LeakyReluOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LeakyReluOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LeakyReluOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LeakyReluOptionsBuilder
+{
+  typedef LeakyReluOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
+  explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LeakyReluOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LeakyReluOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LeakyReluOptions>
+CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
+{
+  LeakyReluOptionsBuilder builder_(_fbb);
+  builder_.add_alpha(alpha);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LeakyReluOptions>
+CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SquaredDifferenceOptionsT : public flatbuffers::NativeTable
+{
+  typedef SquaredDifferenceOptions TableType;
+};
+
+struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SquaredDifferenceOptionsT NativeTableType;
+  typedef SquaredDifferenceOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  SquaredDifferenceOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SquaredDifferenceOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SquaredDifferenceOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SquaredDifferenceOptionsBuilder
+{
+  typedef SquaredDifferenceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SquaredDifferenceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SquaredDifferenceOptions>
+CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  SquaredDifferenceOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SquaredDifferenceOptions>
+CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                               const SquaredDifferenceOptionsT *_o,
+                               const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MirrorPadOptionsT : public flatbuffers::NativeTable
+{
+  typedef MirrorPadOptions TableType;
+  circle::MirrorPadMode mode = circle::MirrorPadMode_REFLECT;
+};
+
+struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef MirrorPadOptionsT NativeTableType;
+  typedef MirrorPadOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_MODE = 4
+  };
+  circle::MirrorPadMode mode() const
+  {
+    return static_cast<circle::MirrorPadMode>(GetField<int8_t>(VT_MODE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
+           verifier.EndTable();
+  }
+  MirrorPadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MirrorPadOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MirrorPadOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MirrorPadOptionsBuilder
+{
+  typedef MirrorPadOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_mode(circle::MirrorPadMode mode)
+  {
+    fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
+  }
+  explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MirrorPadOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MirrorPadOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MirrorPadOptions>
+CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                       circle::MirrorPadMode mode = circle::MirrorPadMode_REFLECT)
+{
+  MirrorPadOptionsBuilder builder_(_fbb);
+  builder_.add_mode(mode);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MirrorPadOptions>
+CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UniqueOptionsT : public flatbuffers::NativeTable
+{
+  typedef UniqueOptions TableType;
+  circle::TensorType idx_out_type = circle::TensorType_INT32;
+};
+
+struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UniqueOptionsT NativeTableType;
+  typedef UniqueOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_IDX_OUT_TYPE = 4
+  };
+  circle::TensorType idx_out_type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_IDX_OUT_TYPE) &&
+           verifier.EndTable();
+  }
+  UniqueOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UniqueOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UniqueOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UniqueOptionsBuilder
+{
+  typedef UniqueOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_idx_out_type(circle::TensorType idx_out_type)
+  {
+    fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
+  }
+  explicit UniqueOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UniqueOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UniqueOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UniqueOptions>
+CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                    circle::TensorType idx_out_type = circle::TensorType_INT32)
+{
+  UniqueOptionsBuilder builder_(_fbb);
+  builder_.add_idx_out_type(idx_out_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UniqueOptions>
+CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReverseV2OptionsT : public flatbuffers::NativeTable
+{
+  typedef ReverseV2Options TableType;
+};
+
+struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReverseV2OptionsT NativeTableType;
+  typedef ReverseV2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  ReverseV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReverseV2OptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReverseV2Options>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReverseV2OptionsBuilder
+{
+  typedef ReverseV2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReverseV2Options> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReverseV2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReverseV2Options>
+CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ReverseV2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ReverseV2Options>
+CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AddNOptionsT : public flatbuffers::NativeTable
+{
+  typedef AddNOptions TableType;
+};
+
+struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef AddNOptionsT NativeTableType;
+  typedef AddNOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  AddNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(AddNOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AddNOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AddNOptionsBuilder
+{
+  typedef AddNOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AddNOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AddNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  AddNOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AddNOptions>
+CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GatherNdOptionsT : public flatbuffers::NativeTable
+{
+  typedef GatherNdOptions TableType;
+};
+
+struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef GatherNdOptionsT NativeTableType;
+  typedef GatherNdOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  GatherNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GatherNdOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GatherNdOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GatherNdOptionsBuilder
+{
+  typedef GatherNdOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GatherNdOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GatherNdOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GatherNdOptions>
+CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  GatherNdOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GatherNdOptions>
+CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct WhereOptionsT : public flatbuffers::NativeTable
+{
+  typedef WhereOptions TableType;
+};
+
+struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef WhereOptionsT NativeTableType;
+  typedef WhereOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  WhereOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(WhereOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<WhereOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct WhereOptionsBuilder
+{
+  typedef WhereOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<WhereOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<WhereOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  WhereOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<WhereOptions>
+CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReverseSequenceOptionsT : public flatbuffers::NativeTable
+{
+  typedef ReverseSequenceOptions TableType;
+  int32_t seq_dim = 0;
+  int32_t batch_dim = 0;
+};
+
+struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReverseSequenceOptionsT NativeTableType;
+  typedef ReverseSequenceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SEQ_DIM = 4,
+    VT_BATCH_DIM = 6
+  };
+  int32_t seq_dim() const { return GetField<int32_t>(VT_SEQ_DIM, 0); }
+  int32_t batch_dim() const { return GetField<int32_t>(VT_BATCH_DIM, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEQ_DIM) &&
+           VerifyField<int32_t>(verifier, VT_BATCH_DIM) && verifier.EndTable();
+  }
+  ReverseSequenceOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReverseSequenceOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReverseSequenceOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReverseSequenceOptionsBuilder
+{
+  typedef ReverseSequenceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_seq_dim(int32_t seq_dim)
+  {
+    fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_SEQ_DIM, seq_dim, 0);
+  }
+  void add_batch_dim(int32_t batch_dim)
+  {
+    fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_BATCH_DIM, batch_dim, 0);
+  }
+  explicit ReverseSequenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReverseSequenceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReverseSequenceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReverseSequenceOptions>
+CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t seq_dim = 0,
+                             int32_t batch_dim = 0)
+{
+  ReverseSequenceOptionsBuilder builder_(_fbb);
+  builder_.add_batch_dim(batch_dim);
+  builder_.add_seq_dim(seq_dim);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ReverseSequenceOptions>
+CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                             const ReverseSequenceOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MatrixDiagOptionsT : public flatbuffers::NativeTable
+{
+  typedef MatrixDiagOptions TableType;
+};
+
+struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef MatrixDiagOptionsT NativeTableType;
+  typedef MatrixDiagOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  MatrixDiagOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MatrixDiagOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MatrixDiagOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MatrixDiagOptionsBuilder
+{
+  typedef MatrixDiagOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MatrixDiagOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MatrixDiagOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MatrixDiagOptions>
+CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  MatrixDiagOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MatrixDiagOptions>
+CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct QuantizeOptionsT : public flatbuffers::NativeTable
+{
+  typedef QuantizeOptions TableType;
+};
+
+struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef QuantizeOptionsT NativeTableType;
+  typedef QuantizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  QuantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(QuantizeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<QuantizeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct QuantizeOptionsBuilder
+{
+  typedef QuantizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<QuantizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<QuantizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<QuantizeOptions>
+CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  QuantizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<QuantizeOptions>
+CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MatrixSetDiagOptionsT : public flatbuffers::NativeTable
+{
+  typedef MatrixSetDiagOptions TableType;
+};
+
+struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef MatrixSetDiagOptionsT NativeTableType;
+  typedef MatrixSetDiagOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  MatrixSetDiagOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MatrixSetDiagOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MatrixSetDiagOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MatrixSetDiagOptionsBuilder
+{
+  typedef MatrixSetDiagOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MatrixSetDiagOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MatrixSetDiagOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MatrixSetDiagOptions>
+CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  MatrixSetDiagOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MatrixSetDiagOptions>
+CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct IfOptionsT : public flatbuffers::NativeTable
+{
+  typedef IfOptions TableType;
+  int32_t then_subgraph_index = 0;
+  int32_t else_subgraph_index = 0;
+};
+
+struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef IfOptionsT NativeTableType;
+  typedef IfOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_THEN_SUBGRAPH_INDEX = 4,
+    VT_ELSE_SUBGRAPH_INDEX = 6
+  };
+  int32_t then_subgraph_index() const { return GetField<int32_t>(VT_THEN_SUBGRAPH_INDEX, 0); }
+  int32_t else_subgraph_index() const { return GetField<int32_t>(VT_ELSE_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_THEN_SUBGRAPH_INDEX) &&
+           VerifyField<int32_t>(verifier, VT_ELSE_SUBGRAPH_INDEX) && verifier.EndTable();
+  }
+  IfOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(IfOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<IfOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct IfOptionsBuilder
+{
+  typedef IfOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_then_subgraph_index(int32_t then_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(IfOptions::VT_THEN_SUBGRAPH_INDEX, then_subgraph_index, 0);
+  }
+  void add_else_subgraph_index(int32_t else_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(IfOptions::VT_ELSE_SUBGRAPH_INDEX, else_subgraph_index, 0);
+  }
+  explicit IfOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<IfOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<IfOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                      int32_t then_subgraph_index = 0,
+                                                      int32_t else_subgraph_index = 0)
+{
+  IfOptionsBuilder builder_(_fbb);
+  builder_.add_else_subgraph_index(else_subgraph_index);
+  builder_.add_then_subgraph_index(then_subgraph_index);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<IfOptions>
+CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o,
+                const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CallOnceOptionsT : public flatbuffers::NativeTable
+{
+  typedef CallOnceOptions TableType;
+  int32_t init_subgraph_index = 0;
+};
+
+struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CallOnceOptionsT NativeTableType;
+  typedef CallOnceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INIT_SUBGRAPH_INDEX = 4
+  };
+  int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) &&
+           verifier.EndTable();
+  }
+  CallOnceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CallOnceOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CallOnceOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CallOnceOptionsBuilder
+{
+  typedef CallOnceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_init_subgraph_index(int32_t init_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
+  }
+  explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CallOnceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOnceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0)
+{
+  CallOnceOptionsBuilder builder_(_fbb);
+  builder_.add_init_subgraph_index(init_subgraph_index);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct WhileOptionsT : public flatbuffers::NativeTable
+{
+  typedef WhileOptions TableType;
+  int32_t cond_subgraph_index = 0;
+  int32_t body_subgraph_index = 0;
+};
+
+struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef WhileOptionsT NativeTableType;
+  typedef WhileOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_COND_SUBGRAPH_INDEX = 4,
+    VT_BODY_SUBGRAPH_INDEX = 6
+  };
+  int32_t cond_subgraph_index() const { return GetField<int32_t>(VT_COND_SUBGRAPH_INDEX, 0); }
+  int32_t body_subgraph_index() const { return GetField<int32_t>(VT_BODY_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_COND_SUBGRAPH_INDEX) &&
+           VerifyField<int32_t>(verifier, VT_BODY_SUBGRAPH_INDEX) && verifier.EndTable();
+  }
+  WhileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(WhileOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<WhileOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct WhileOptionsBuilder
+{
+  typedef WhileOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_cond_subgraph_index(int32_t cond_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(WhileOptions::VT_COND_SUBGRAPH_INDEX, cond_subgraph_index, 0);
+  }
+  void add_body_subgraph_index(int32_t body_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(WhileOptions::VT_BODY_SUBGRAPH_INDEX, body_subgraph_index, 0);
+  }
+  explicit WhileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<WhileOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<WhileOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                            int32_t cond_subgraph_index = 0,
+                                                            int32_t body_subgraph_index = 0)
+{
+  WhileOptionsBuilder builder_(_fbb);
+  builder_.add_body_subgraph_index(body_subgraph_index);
+  builder_.add_cond_subgraph_index(cond_subgraph_index);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<WhileOptions>
+CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct NonMaxSuppressionV4OptionsT : public flatbuffers::NativeTable
+{
+  typedef NonMaxSuppressionV4Options TableType;
+};
+
+struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef NonMaxSuppressionV4OptionsT NativeTableType;
+  typedef NonMaxSuppressionV4OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  NonMaxSuppressionV4OptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(NonMaxSuppressionV4OptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<NonMaxSuppressionV4Options>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct NonMaxSuppressionV4OptionsBuilder
+{
+  typedef NonMaxSuppressionV4Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<NonMaxSuppressionV4Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<NonMaxSuppressionV4Options>
+CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  NonMaxSuppressionV4OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<NonMaxSuppressionV4Options>
+CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const NonMaxSuppressionV4OptionsT *_o,
+                                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct NonMaxSuppressionV5OptionsT : public flatbuffers::NativeTable
+{
+  typedef NonMaxSuppressionV5Options TableType;
+};
+
+struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef NonMaxSuppressionV5OptionsT NativeTableType;
+  typedef NonMaxSuppressionV5OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  NonMaxSuppressionV5OptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(NonMaxSuppressionV5OptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<NonMaxSuppressionV5Options>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct NonMaxSuppressionV5OptionsBuilder
+{
+  typedef NonMaxSuppressionV5Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<NonMaxSuppressionV5Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<NonMaxSuppressionV5Options>
+CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  NonMaxSuppressionV5OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<NonMaxSuppressionV5Options>
+CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const NonMaxSuppressionV5OptionsT *_o,
+                                 const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ScatterNdOptionsT : public flatbuffers::NativeTable
+{
+  typedef ScatterNdOptions TableType;
+};
+
+struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ScatterNdOptionsT NativeTableType;
+  typedef ScatterNdOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  ScatterNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ScatterNdOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ScatterNdOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ScatterNdOptionsBuilder
+{
+  typedef ScatterNdOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ScatterNdOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ScatterNdOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ScatterNdOptions>
+CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ScatterNdOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ScatterNdOptions>
+CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SelectV2OptionsT : public flatbuffers::NativeTable
+{
+  typedef SelectV2Options TableType;
+};
+
+struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SelectV2OptionsT NativeTableType;
+  typedef SelectV2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  SelectV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SelectV2OptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SelectV2Options>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SelectV2OptionsBuilder
+{
+  typedef SelectV2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SelectV2Options> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SelectV2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SelectV2Options>
+CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  SelectV2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SelectV2Options>
+CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DensifyOptionsT : public flatbuffers::NativeTable
+{
+  typedef DensifyOptions TableType;
+};
+
+struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DensifyOptionsT NativeTableType;
+  typedef DensifyOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  DensifyOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DensifyOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DensifyOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DensifyOptionsBuilder
+{
+  typedef DensifyOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DensifyOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DensifyOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DensifyOptions>
+CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  DensifyOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DensifyOptions>
+CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SegmentSumOptionsT : public flatbuffers::NativeTable
+{
+  typedef SegmentSumOptions TableType;
+};
+
+struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SegmentSumOptionsT NativeTableType;
+  typedef SegmentSumOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  SegmentSumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SegmentSumOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SegmentSumOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SegmentSumOptionsBuilder
+{
+  typedef SegmentSumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SegmentSumOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SegmentSumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SegmentSumOptions>
+CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  SegmentSumOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SegmentSumOptions>
+CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BatchMatMulOptionsT : public flatbuffers::NativeTable
+{
+  typedef BatchMatMulOptions TableType;
+  bool adjoint_lhs = false;
+  bool adjoint_rhs = false;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BatchMatMulOptionsT NativeTableType;
+  typedef BatchMatMulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_ADJOINT_LHS = 4,
+    VT_ADJOINT_RHS = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+  };
+  bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
+  bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
+           VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+  }
+  BatchMatMulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BatchMatMulOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BatchMatMulOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BatchMatMulOptionsBuilder
+{
+  typedef BatchMatMulOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_adjoint_lhs(bool adjoint_lhs)
+  {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_LHS, static_cast<uint8_t>(adjoint_lhs),
+                             0);
+  }
+  void add_adjoint_rhs(bool adjoint_rhs)
+  {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
+                             0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BatchMatMulOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BatchMatMulOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BatchMatMulOptions>
+CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
+                         bool adjoint_rhs = false, bool asymmetric_quantize_inputs = false)
+{
+  BatchMatMulOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_adjoint_rhs(adjoint_rhs);
+  builder_.add_adjoint_lhs(adjoint_lhs);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BatchMatMulOptions>
+CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CumsumOptionsT : public flatbuffers::NativeTable
+{
+  typedef CumsumOptions TableType;
+  bool exclusive = false;
+  bool reverse = false;
+};
+
+struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CumsumOptionsT NativeTableType;
+  typedef CumsumOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_EXCLUSIVE = 4,
+    VT_REVERSE = 6
+  };
+  bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; }
+  bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) &&
+           VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable();
+  }
+  CumsumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CumsumOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CumsumOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CumsumOptionsBuilder
+{
+  typedef CumsumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_exclusive(bool exclusive)
+  {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
+  }
+  void add_reverse(bool reverse)
+  {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
+  }
+  explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CumsumOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CumsumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              bool exclusive = false,
+                                                              bool reverse = false)
+{
+  CumsumOptionsBuilder builder_(_fbb);
+  builder_.add_reverse(reverse);
+  builder_.add_exclusive(exclusive);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CumsumOptions>
+CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BroadcastToOptionsT : public flatbuffers::NativeTable
+{
+  typedef BroadcastToOptions TableType;
+};
+
+struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BroadcastToOptionsT NativeTableType;
+  typedef BroadcastToOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  BroadcastToOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BroadcastToOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BroadcastToOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BroadcastToOptionsBuilder
+{
+  typedef BroadcastToOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BroadcastToOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BroadcastToOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  BroadcastToOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Rfft2dOptionsT : public flatbuffers::NativeTable
+{
+  typedef Rfft2dOptions TableType;
+};
+
+struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Rfft2dOptionsT NativeTableType;
+  typedef Rfft2dOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  Rfft2dOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Rfft2dOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Rfft2dOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Rfft2dOptionsBuilder
+{
+  typedef Rfft2dOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Rfft2dOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Rfft2dOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  Rfft2dOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Rfft2dOptions>
+CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HashtableOptionsT : public flatbuffers::NativeTable
+{
+  typedef HashtableOptions TableType;
+  int32_t table_id = 0;
+  circle::TensorType key_dtype = circle::TensorType_FLOAT32;
+  circle::TensorType value_dtype = circle::TensorType_FLOAT32;
+};
+
+struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableOptionsT NativeTableType;
+  typedef HashtableOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TABLE_ID = 4,
+    VT_KEY_DTYPE = 6,
+    VT_VALUE_DTYPE = 8
+  };
+  int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); }
+  circle::TensorType key_dtype() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
+  }
+  circle::TensorType value_dtype() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) &&
+           VerifyField<int8_t>(verifier, VT_KEY_DTYPE) &&
+           VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable();
+  }
+  HashtableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HashtableOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HashtableOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HashtableOptionsBuilder
+{
+  typedef HashtableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_table_id(int32_t table_id)
+  {
+    fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
+  }
+  void add_key_dtype(circle::TensorType key_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
+  }
+  void add_value_dtype(circle::TensorType value_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
+  }
+  explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0,
+                       circle::TensorType key_dtype = circle::TensorType_FLOAT32,
+                       circle::TensorType value_dtype = circle::TensorType_FLOAT32)
+{
+  HashtableOptionsBuilder builder_(_fbb);
+  builder_.add_table_id(table_id);
+  builder_.add_value_dtype(value_dtype);
+  builder_.add_key_dtype(key_dtype);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HashtableFindOptionsT : public flatbuffers::NativeTable
+{
+  typedef HashtableFindOptions TableType;
+};
+
+struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableFindOptionsT NativeTableType;
+  typedef HashtableFindOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  HashtableFindOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HashtableFindOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HashtableFindOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HashtableFindOptionsBuilder
+{
+  typedef HashtableFindOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableFindOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableFindOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableFindOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HashtableImportOptionsT : public flatbuffers::NativeTable
+{
+  typedef HashtableImportOptions TableType;
+};
+
+struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableImportOptionsT NativeTableType;
+  typedef HashtableImportOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  HashtableImportOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HashtableImportOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HashtableImportOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HashtableImportOptionsBuilder
+{
+  typedef HashtableImportOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableImportOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableImportOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableImportOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                             const HashtableImportOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HashtableSizeOptionsT : public flatbuffers::NativeTable
+{
+  typedef HashtableSizeOptions TableType;
+};
+
+struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableSizeOptionsT NativeTableType;
+  typedef HashtableSizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  HashtableSizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HashtableSizeOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HashtableSizeOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HashtableSizeOptionsBuilder
+{
+  typedef HashtableSizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableSizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableSizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct VarHandleOptionsT : public flatbuffers::NativeTable
+{
+  typedef VarHandleOptions TableType;
+  std::string container{};
+  std::string shared_name{};
+};
+
+struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef VarHandleOptionsT NativeTableType;
+  typedef VarHandleOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_CONTAINER = 4,
+    VT_SHARED_NAME = 6
+  };
+  const flatbuffers::String *container() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_CONTAINER);
+  }
+  const flatbuffers::String *shared_name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) &&
+           verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) &&
+           verifier.VerifyString(shared_name()) && verifier.EndTable();
+  }
+  VarHandleOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(VarHandleOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<VarHandleOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct VarHandleOptionsBuilder
+{
+  typedef VarHandleOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_container(flatbuffers::Offset<flatbuffers::String> container)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container);
+  }
+  void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name);
+  }
+  explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<VarHandleOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<VarHandleOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                       flatbuffers::Offset<flatbuffers::String> container = 0,
+                       flatbuffers::Offset<flatbuffers::String> shared_name = 0)
+{
+  VarHandleOptionsBuilder builder_(_fbb);
+  builder_.add_shared_name(shared_name);
+  builder_.add_container(container);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr,
+                             const char *shared_name = nullptr)
+{
+  auto container__ = container ? _fbb.CreateString(container) : 0;
+  auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0;
+  return circle::CreateVarHandleOptions(_fbb, container__, shared_name__);
+}
+
+flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReadVariableOptionsT : public flatbuffers::NativeTable
+{
+  typedef ReadVariableOptions TableType;
+};
+
+struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReadVariableOptionsT NativeTableType;
+  typedef ReadVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  ReadVariableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReadVariableOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReadVariableOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReadVariableOptionsBuilder
+{
+  typedef ReadVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReadVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReadVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ReadVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AssignVariableOptionsT : public flatbuffers::NativeTable
+{
+  typedef AssignVariableOptions TableType;
+};
+
+struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef AssignVariableOptionsT NativeTableType;
+  typedef AssignVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+  AssignVariableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(AssignVariableOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AssignVariableOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AssignVariableOptionsBuilder
+{
+  typedef AssignVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AssignVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AssignVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  AssignVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RandomOptionsT : public flatbuffers::NativeTable
+{
+  typedef RandomOptions TableType;
+  int32_t seed = 0;
+  int32_t seed2 = 0;
+};
+
+struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RandomOptionsT NativeTableType;
+  typedef RandomOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SEED = 4,
+    VT_SEED2 = 6
+  };
+  int32_t seed() const { return GetField<int32_t>(VT_SEED, 0); }
+  int32_t seed2() const { return GetField<int32_t>(VT_SEED2, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEED) &&
+           VerifyField<int32_t>(verifier, VT_SEED2) && verifier.EndTable();
+  }
+  RandomOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RandomOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RandomOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RandomOptionsBuilder
+{
+  typedef RandomOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_seed(int32_t seed) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED, seed, 0); }
+  void add_seed2(int32_t seed2) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED2, seed2, 0); }
+  explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RandomOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RandomOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              int32_t seed = 0, int32_t seed2 = 0)
+{
+  RandomOptionsBuilder builder_(_fbb);
+  builder_.add_seed2(seed2);
+  builder_.add_seed(seed);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RandomOptions>
+CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BCQGatherOptionsT : public flatbuffers::NativeTable
+{
+  typedef BCQGatherOptions TableType;
+  int32_t input_hidden_size = 0;
+  int32_t axis = 0;
+};
+
+struct BCQGatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BCQGatherOptionsT NativeTableType;
+  typedef BCQGatherOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INPUT_HIDDEN_SIZE = 4,
+    VT_AXIS = 6
+  };
+  int32_t input_hidden_size() const { return GetField<int32_t>(VT_INPUT_HIDDEN_SIZE, 0); }
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INPUT_HIDDEN_SIZE) &&
+           VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+  }
+  BCQGatherOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BCQGatherOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BCQGatherOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const BCQGatherOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BCQGatherOptionsBuilder
+{
+  typedef BCQGatherOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_input_hidden_size(int32_t input_hidden_size)
+  {
+    fbb_.AddElement<int32_t>(BCQGatherOptions::VT_INPUT_HIDDEN_SIZE, input_hidden_size, 0);
+  }
+  void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(BCQGatherOptions::VT_AXIS, axis, 0); }
+  explicit BCQGatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BCQGatherOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BCQGatherOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BCQGatherOptions>
+CreateBCQGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t input_hidden_size = 0,
+                       int32_t axis = 0)
+{
+  BCQGatherOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_input_hidden_size(input_hidden_size);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BCQGatherOptions>
+CreateBCQGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const BCQGatherOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BCQFullyConnectedOptionsT : public flatbuffers::NativeTable
+{
+  typedef BCQFullyConnectedOptions TableType;
+  int32_t weights_hidden_size = 0;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+};
+
+struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BCQFullyConnectedOptionsT NativeTableType;
+  typedef BCQFullyConnectedOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_WEIGHTS_HIDDEN_SIZE = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6
+  };
+  int32_t weights_hidden_size() const { return GetField<int32_t>(VT_WEIGHTS_HIDDEN_SIZE, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_WEIGHTS_HIDDEN_SIZE) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+  BCQFullyConnectedOptionsT *
+  UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BCQFullyConnectedOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BCQFullyConnectedOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const BCQFullyConnectedOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BCQFullyConnectedOptionsBuilder
+{
+  typedef BCQFullyConnectedOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_weights_hidden_size(int32_t weights_hidden_size)
+  {
+    fbb_.AddElement<int32_t>(BCQFullyConnectedOptions::VT_WEIGHTS_HIDDEN_SIZE, weights_hidden_size,
+                             0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(BCQFullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit BCQFullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BCQFullyConnectedOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BCQFullyConnectedOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  BCQFullyConnectedOptionsBuilder builder_(_fbb);
+  builder_.add_weights_hidden_size(weights_hidden_size);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BCQFullyConnectedOptions>
+CreateBCQFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                               const BCQFullyConnectedOptionsT *_o,
+                               const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct InstanceNormOptionsT : public flatbuffers::NativeTable
+{
+  typedef InstanceNormOptions TableType;
+  float epsilon = 0.0f;
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE;
+};
+
+struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef InstanceNormOptionsT NativeTableType;
+  typedef InstanceNormOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_EPSILON = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6
+  };
+  float epsilon() const { return GetField<float>(VT_EPSILON, 0.0f); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_EPSILON) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+  InstanceNormOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(InstanceNormOptionsT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<InstanceNormOptions>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const InstanceNormOptionsT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct InstanceNormOptionsBuilder
+{
+  typedef InstanceNormOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_epsilon(float epsilon)
+  {
+    fbb_.AddElement<float>(InstanceNormOptions::VT_EPSILON, epsilon, 0.0f);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(InstanceNormOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit InstanceNormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<InstanceNormOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<InstanceNormOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  InstanceNormOptionsBuilder builder_(_fbb);
+  builder_.add_epsilon(epsilon);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<InstanceNormOptions>
+CreateInstanceNormOptions(flatbuffers::FlatBufferBuilder &_fbb, const InstanceNormOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct OperatorCodeT : public flatbuffers::NativeTable
+{
+  typedef OperatorCode TableType;
+  int8_t deprecated_builtin_code = 0;
+  std::string custom_code{};
+  int32_t version = 1;
+  circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD;
+};
+
+struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef OperatorCodeT NativeTableType;
+  typedef OperatorCodeBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_DEPRECATED_BUILTIN_CODE = 4,
+    VT_CUSTOM_CODE = 6,
+    VT_VERSION = 8,
+    VT_BUILTIN_CODE = 10
+  };
+  int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); }
+  const flatbuffers::String *custom_code() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
+  }
+  int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+  circle::BuiltinOperator builtin_code() const
+  {
+    return static_cast<circle::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) &&
+           VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
+           VerifyField<int32_t>(verifier, VT_VERSION) &&
+           VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable();
+  }
+  OperatorCodeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(OperatorCodeT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<OperatorCode>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct OperatorCodeBuilder
+{
+  typedef OperatorCode Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_deprecated_builtin_code(int8_t deprecated_builtin_code)
+  {
+    fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
+  }
+  void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
+  {
+    fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
+  }
+  void add_version(int32_t version)
+  {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
+  }
+  void add_builtin_code(circle::BuiltinOperator builtin_code)
+  {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
+  }
+  explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<OperatorCode> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<OperatorCode>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<OperatorCode>
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1,
+                   circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD)
+{
+  OperatorCodeBuilder builder_(_fbb);
+  builder_.add_builtin_code(builtin_code);
+  builder_.add_version(version);
+  builder_.add_custom_code(custom_code);
+  builder_.add_deprecated_builtin_code(deprecated_builtin_code);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<OperatorCode>
+CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+                         const char *custom_code = nullptr, int32_t version = 1,
+                         circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD)
+{
+  auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
+  return circle::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version,
+                                    builtin_code);
+}
+
+flatbuffers::Offset<OperatorCode>
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct OperatorT : public flatbuffers::NativeTable
+{
+  typedef Operator TableType;
+  uint32_t opcode_index = 0;
+  std::vector<int32_t> inputs{};
+  std::vector<int32_t> outputs{};
+  circle::BuiltinOptionsUnion builtin_options{};
+  std::vector<uint8_t> custom_options{};
+  circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS;
+  std::vector<bool> mutating_variable_inputs{};
+  std::vector<int32_t> intermediates{};
+};
+
+struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef OperatorT NativeTableType;
+  typedef OperatorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_OPCODE_INDEX = 4,
+    VT_INPUTS = 6,
+    VT_OUTPUTS = 8,
+    VT_BUILTIN_OPTIONS_TYPE = 10,
+    VT_BUILTIN_OPTIONS = 12,
+    VT_CUSTOM_OPTIONS = 14,
+    VT_CUSTOM_OPTIONS_FORMAT = 16,
+    VT_MUTATING_VARIABLE_INPUTS = 18,
+    VT_INTERMEDIATES = 20
+  };
+  uint32_t opcode_index() const { return GetField<uint32_t>(VT_OPCODE_INDEX, 0); }
+  const flatbuffers::Vector<int32_t> *inputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+  }
+  const flatbuffers::Vector<int32_t> *outputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+  }
+  circle::BuiltinOptions builtin_options_type() const
+  {
+    return static_cast<circle::BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+  }
+  const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
+  template <typename T> const T *builtin_options_as() const;
+  const circle::Conv2DOptions *builtin_options_as_Conv2DOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_Conv2DOptions
+             ? static_cast<const circle::Conv2DOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_DepthwiseConv2DOptions
+             ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ConcatEmbeddingsOptions
+             ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LSHProjectionOptions
+             ? static_cast<const circle::LSHProjectionOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::Pool2DOptions *builtin_options_as_Pool2DOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_Pool2DOptions
+             ? static_cast<const circle::Pool2DOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SVDFOptions *builtin_options_as_SVDFOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SVDFOptions
+             ? static_cast<const circle::SVDFOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::RNNOptions *builtin_options_as_RNNOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_RNNOptions
+             ? static_cast<const circle::RNNOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_FullyConnectedOptions
+             ? static_cast<const circle::FullyConnectedOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SoftmaxOptions
+             ? static_cast<const circle::SoftmaxOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ConcatenationOptions
+             ? static_cast<const circle::ConcatenationOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::AddOptions *builtin_options_as_AddOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_AddOptions
+             ? static_cast<const circle::AddOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::L2NormOptions *builtin_options_as_L2NormOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_L2NormOptions
+             ? static_cast<const circle::L2NormOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LocalResponseNormalizationOptions *
+  builtin_options_as_LocalResponseNormalizationOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LocalResponseNormalizationOptions
+             ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LSTMOptions *builtin_options_as_LSTMOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LSTMOptions
+             ? static_cast<const circle::LSTMOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ResizeBilinearOptions
+             ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::CallOptions *builtin_options_as_CallOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_CallOptions
+             ? static_cast<const circle::CallOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ReshapeOptions *builtin_options_as_ReshapeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ReshapeOptions
+             ? static_cast<const circle::ReshapeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SkipGramOptions *builtin_options_as_SkipGramOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SkipGramOptions
+             ? static_cast<const circle::SkipGramOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SpaceToDepthOptions
+             ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::EmbeddingLookupSparseOptions *
+  builtin_options_as_EmbeddingLookupSparseOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_EmbeddingLookupSparseOptions
+             ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::MulOptions *builtin_options_as_MulOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_MulOptions
+             ? static_cast<const circle::MulOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::PadOptions *builtin_options_as_PadOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_PadOptions
+             ? static_cast<const circle::PadOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::GatherOptions *builtin_options_as_GatherOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_GatherOptions
+             ? static_cast<const circle::GatherOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_BatchToSpaceNDOptions
+             ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SpaceToBatchNDOptions
+             ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::TransposeOptions *builtin_options_as_TransposeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_TransposeOptions
+             ? static_cast<const circle::TransposeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ReducerOptions *builtin_options_as_ReducerOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ReducerOptions
+             ? static_cast<const circle::ReducerOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SubOptions *builtin_options_as_SubOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SubOptions
+             ? static_cast<const circle::SubOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::DivOptions *builtin_options_as_DivOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_DivOptions
+             ? static_cast<const circle::DivOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SqueezeOptions *builtin_options_as_SqueezeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SqueezeOptions
+             ? static_cast<const circle::SqueezeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SequenceRNNOptions
+             ? static_cast<const circle::SequenceRNNOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_StridedSliceOptions
+             ? static_cast<const circle::StridedSliceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ExpOptions *builtin_options_as_ExpOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ExpOptions
+             ? static_cast<const circle::ExpOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::TopKV2Options *builtin_options_as_TopKV2Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_TopKV2Options
+             ? static_cast<const circle::TopKV2Options *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SplitOptions *builtin_options_as_SplitOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SplitOptions
+             ? static_cast<const circle::SplitOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LogSoftmaxOptions
+             ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::CastOptions *builtin_options_as_CastOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_CastOptions
+             ? static_cast<const circle::CastOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::DequantizeOptions *builtin_options_as_DequantizeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_DequantizeOptions
+             ? static_cast<const circle::DequantizeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_MaximumMinimumOptions
+             ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ArgMaxOptions
+             ? static_cast<const circle::ArgMaxOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LessOptions *builtin_options_as_LessOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LessOptions
+             ? static_cast<const circle::LessOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::NegOptions *builtin_options_as_NegOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_NegOptions
+             ? static_cast<const circle::NegOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::PadV2Options *builtin_options_as_PadV2Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_PadV2Options
+             ? static_cast<const circle::PadV2Options *>(builtin_options())
+             : nullptr;
+  }
+  const circle::GreaterOptions *builtin_options_as_GreaterOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_GreaterOptions
+             ? static_cast<const circle::GreaterOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_GreaterEqualOptions
+             ? static_cast<const circle::GreaterEqualOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LessEqualOptions *builtin_options_as_LessEqualOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LessEqualOptions
+             ? static_cast<const circle::LessEqualOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SelectOptions *builtin_options_as_SelectOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SelectOptions
+             ? static_cast<const circle::SelectOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SliceOptions *builtin_options_as_SliceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SliceOptions
+             ? static_cast<const circle::SliceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_TransposeConvOptions
+             ? static_cast<const circle::TransposeConvOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SparseToDenseOptions
+             ? static_cast<const circle::SparseToDenseOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::TileOptions *builtin_options_as_TileOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_TileOptions
+             ? static_cast<const circle::TileOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ExpandDimsOptions
+             ? static_cast<const circle::ExpandDimsOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::EqualOptions *builtin_options_as_EqualOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_EqualOptions
+             ? static_cast<const circle::EqualOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::NotEqualOptions *builtin_options_as_NotEqualOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_NotEqualOptions
+             ? static_cast<const circle::NotEqualOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ShapeOptions *builtin_options_as_ShapeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ShapeOptions
+             ? static_cast<const circle::ShapeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::PowOptions *builtin_options_as_PowOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_PowOptions
+             ? static_cast<const circle::PowOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ArgMinOptions *builtin_options_as_ArgMinOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ArgMinOptions
+             ? static_cast<const circle::ArgMinOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_FakeQuantOptions
+             ? static_cast<const circle::FakeQuantOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::PackOptions *builtin_options_as_PackOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_PackOptions
+             ? static_cast<const circle::PackOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LogicalOrOptions
+             ? static_cast<const circle::LogicalOrOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::OneHotOptions *builtin_options_as_OneHotOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_OneHotOptions
+             ? static_cast<const circle::OneHotOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LogicalAndOptions
+             ? static_cast<const circle::LogicalAndOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LogicalNotOptions
+             ? static_cast<const circle::LogicalNotOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::UnpackOptions *builtin_options_as_UnpackOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_UnpackOptions
+             ? static_cast<const circle::UnpackOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::FloorDivOptions *builtin_options_as_FloorDivOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_FloorDivOptions
+             ? static_cast<const circle::FloorDivOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SquareOptions *builtin_options_as_SquareOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SquareOptions
+             ? static_cast<const circle::SquareOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ZerosLikeOptions
+             ? static_cast<const circle::ZerosLikeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::FillOptions *builtin_options_as_FillOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_FillOptions
+             ? static_cast<const circle::FillOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::BidirectionalSequenceLSTMOptions *
+  builtin_options_as_BidirectionalSequenceLSTMOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceLSTMOptions
+             ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::BidirectionalSequenceRNNOptions *
+  builtin_options_as_BidirectionalSequenceRNNOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceRNNOptions
+             ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::UnidirectionalSequenceLSTMOptions *
+  builtin_options_as_UnidirectionalSequenceLSTMOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions
+             ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::FloorModOptions *builtin_options_as_FloorModOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_FloorModOptions
+             ? static_cast<const circle::FloorModOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::RangeOptions *builtin_options_as_RangeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_RangeOptions
+             ? static_cast<const circle::RangeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ResizeNearestNeighborOptions *
+  builtin_options_as_ResizeNearestNeighborOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ResizeNearestNeighborOptions
+             ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LeakyReluOptions
+             ? static_cast<const circle::LeakyReluOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SquaredDifferenceOptions
+             ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_MirrorPadOptions
+             ? static_cast<const circle::MirrorPadOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::AbsOptions *builtin_options_as_AbsOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_AbsOptions
+             ? static_cast<const circle::AbsOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SplitVOptions *builtin_options_as_SplitVOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SplitVOptions
+             ? static_cast<const circle::SplitVOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::UniqueOptions *builtin_options_as_UniqueOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_UniqueOptions
+             ? static_cast<const circle::UniqueOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ReverseV2Options *builtin_options_as_ReverseV2Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ReverseV2Options
+             ? static_cast<const circle::ReverseV2Options *>(builtin_options())
+             : nullptr;
+  }
+  const circle::AddNOptions *builtin_options_as_AddNOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_AddNOptions
+             ? static_cast<const circle::AddNOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::GatherNdOptions *builtin_options_as_GatherNdOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_GatherNdOptions
+             ? static_cast<const circle::GatherNdOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::CosOptions *builtin_options_as_CosOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_CosOptions
+             ? static_cast<const circle::CosOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::WhereOptions *builtin_options_as_WhereOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_WhereOptions
+             ? static_cast<const circle::WhereOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::RankOptions *builtin_options_as_RankOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_RankOptions
+             ? static_cast<const circle::RankOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ReverseSequenceOptions
+             ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_MatrixDiagOptions
+             ? static_cast<const circle::MatrixDiagOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::QuantizeOptions *builtin_options_as_QuantizeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_QuantizeOptions
+             ? static_cast<const circle::QuantizeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_MatrixSetDiagOptions
+             ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HardSwishOptions *builtin_options_as_HardSwishOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HardSwishOptions
+             ? static_cast<const circle::HardSwishOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::IfOptions *builtin_options_as_IfOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_IfOptions
+             ? static_cast<const circle::IfOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::WhileOptions *builtin_options_as_WhileOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_WhileOptions
+             ? static_cast<const circle::WhileOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_DepthToSpaceOptions
+             ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV4Options
+             ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options())
+             : nullptr;
+  }
+  const circle::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV5Options
+             ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ScatterNdOptions
+             ? static_cast<const circle::ScatterNdOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SelectV2Options *builtin_options_as_SelectV2Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SelectV2Options
+             ? static_cast<const circle::SelectV2Options *>(builtin_options())
+             : nullptr;
+  }
+  const circle::DensifyOptions *builtin_options_as_DensifyOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_DensifyOptions
+             ? static_cast<const circle::DensifyOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SegmentSumOptions
+             ? static_cast<const circle::SegmentSumOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_BatchMatMulOptions
+             ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::CumsumOptions *builtin_options_as_CumsumOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_CumsumOptions
+             ? static_cast<const circle::CumsumOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::CallOnceOptions *builtin_options_as_CallOnceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_CallOnceOptions
+             ? static_cast<const circle::CallOnceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_BroadcastToOptions
+             ? static_cast<const circle::BroadcastToOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_Rfft2dOptions
+             ? static_cast<const circle::Rfft2dOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::Conv3DOptions *builtin_options_as_Conv3DOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_Conv3DOptions
+             ? static_cast<const circle::Conv3DOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HashtableOptions *builtin_options_as_HashtableOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HashtableOptions
+             ? static_cast<const circle::HashtableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HashtableFindOptions
+             ? static_cast<const circle::HashtableFindOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HashtableImportOptions
+             ? static_cast<const circle::HashtableImportOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HashtableSizeOptions
+             ? static_cast<const circle::HashtableSizeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::VarHandleOptions *builtin_options_as_VarHandleOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_VarHandleOptions
+             ? static_cast<const circle::VarHandleOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ReadVariableOptions
+             ? static_cast<const circle::ReadVariableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_AssignVariableOptions
+             ? static_cast<const circle::AssignVariableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::RandomOptions *builtin_options_as_RandomOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_RandomOptions
+             ? static_cast<const circle::RandomOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions
+             ? static_cast<const circle::BCQGatherOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::BCQFullyConnectedOptions *builtin_options_as_BCQFullyConnectedOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_BCQFullyConnectedOptions
+             ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::InstanceNormOptions *builtin_options_as_InstanceNormOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_InstanceNormOptions
+             ? static_cast<const circle::InstanceNormOptions *>(builtin_options())
+             : nullptr;
+  }
+  const flatbuffers::Vector<uint8_t> *custom_options() const
+  {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
+  }
+  circle::CustomOptionsFormat custom_options_format() const
+  {
+    return static_cast<circle::CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+  }
+  const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS);
+  }
+  const flatbuffers::Vector<int32_t> *intermediates() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INTERMEDIATES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
+           VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+           VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
+           VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
+           VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
+           VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && verifier.VerifyVector(custom_options()) &&
+           VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
+           VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) &&
+           verifier.VerifyVector(mutating_variable_inputs()) &&
+           VerifyOffset(verifier, VT_INTERMEDIATES) && verifier.VerifyVector(intermediates()) &&
+           verifier.EndTable();
+  }
+  OperatorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Operator>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+template <>
+inline const circle::Conv2DOptions *Operator::builtin_options_as<circle::Conv2DOptions>() const
+{
+  return builtin_options_as_Conv2DOptions();
+}
+
+template <>
+inline const circle::DepthwiseConv2DOptions *
+Operator::builtin_options_as<circle::DepthwiseConv2DOptions>() const
+{
+  return builtin_options_as_DepthwiseConv2DOptions();
+}
+
+template <>
+inline const circle::ConcatEmbeddingsOptions *
+Operator::builtin_options_as<circle::ConcatEmbeddingsOptions>() const
+{
+  return builtin_options_as_ConcatEmbeddingsOptions();
+}
+
+template <>
+inline const circle::LSHProjectionOptions *
+Operator::builtin_options_as<circle::LSHProjectionOptions>() const
+{
+  return builtin_options_as_LSHProjectionOptions();
+}
+
+template <>
+inline const circle::Pool2DOptions *Operator::builtin_options_as<circle::Pool2DOptions>() const
+{
+  return builtin_options_as_Pool2DOptions();
+}
+
+template <>
+inline const circle::SVDFOptions *Operator::builtin_options_as<circle::SVDFOptions>() const
+{
+  return builtin_options_as_SVDFOptions();
+}
+
+template <>
+inline const circle::RNNOptions *Operator::builtin_options_as<circle::RNNOptions>() const
+{
+  return builtin_options_as_RNNOptions();
+}
+
+template <>
+inline const circle::FullyConnectedOptions *
+Operator::builtin_options_as<circle::FullyConnectedOptions>() const
+{
+  return builtin_options_as_FullyConnectedOptions();
+}
+
+template <>
+inline const circle::SoftmaxOptions *Operator::builtin_options_as<circle::SoftmaxOptions>() const
+{
+  return builtin_options_as_SoftmaxOptions();
+}
+
+template <>
+inline const circle::ConcatenationOptions *
+Operator::builtin_options_as<circle::ConcatenationOptions>() const
+{
+  return builtin_options_as_ConcatenationOptions();
+}
+
+template <>
+inline const circle::AddOptions *Operator::builtin_options_as<circle::AddOptions>() const
+{
+  return builtin_options_as_AddOptions();
+}
+
+template <>
+inline const circle::L2NormOptions *Operator::builtin_options_as<circle::L2NormOptions>() const
+{
+  return builtin_options_as_L2NormOptions();
+}
+
+template <>
+inline const circle::LocalResponseNormalizationOptions *
+Operator::builtin_options_as<circle::LocalResponseNormalizationOptions>() const
+{
+  return builtin_options_as_LocalResponseNormalizationOptions();
+}
+
+template <>
+inline const circle::LSTMOptions *Operator::builtin_options_as<circle::LSTMOptions>() const
+{
+  return builtin_options_as_LSTMOptions();
+}
+
+template <>
+inline const circle::ResizeBilinearOptions *
+Operator::builtin_options_as<circle::ResizeBilinearOptions>() const
+{
+  return builtin_options_as_ResizeBilinearOptions();
+}
+
+template <>
+inline const circle::CallOptions *Operator::builtin_options_as<circle::CallOptions>() const
+{
+  return builtin_options_as_CallOptions();
+}
+
+template <>
+inline const circle::ReshapeOptions *Operator::builtin_options_as<circle::ReshapeOptions>() const
+{
+  return builtin_options_as_ReshapeOptions();
+}
+
+template <>
+inline const circle::SkipGramOptions *Operator::builtin_options_as<circle::SkipGramOptions>() const
+{
+  return builtin_options_as_SkipGramOptions();
+}
+
+template <>
+inline const circle::SpaceToDepthOptions *
+Operator::builtin_options_as<circle::SpaceToDepthOptions>() const
+{
+  return builtin_options_as_SpaceToDepthOptions();
+}
+
+template <>
+inline const circle::EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<circle::EmbeddingLookupSparseOptions>() const
+{
+  return builtin_options_as_EmbeddingLookupSparseOptions();
+}
+
+template <>
+inline const circle::MulOptions *Operator::builtin_options_as<circle::MulOptions>() const
+{
+  return builtin_options_as_MulOptions();
+}
+
+template <>
+inline const circle::PadOptions *Operator::builtin_options_as<circle::PadOptions>() const
+{
+  return builtin_options_as_PadOptions();
+}
+
+template <>
+inline const circle::GatherOptions *Operator::builtin_options_as<circle::GatherOptions>() const
+{
+  return builtin_options_as_GatherOptions();
+}
+
+template <>
+inline const circle::BatchToSpaceNDOptions *
+Operator::builtin_options_as<circle::BatchToSpaceNDOptions>() const
+{
+  return builtin_options_as_BatchToSpaceNDOptions();
+}
+
+template <>
+inline const circle::SpaceToBatchNDOptions *
+Operator::builtin_options_as<circle::SpaceToBatchNDOptions>() const
+{
+  return builtin_options_as_SpaceToBatchNDOptions();
+}
+
+template <>
+inline const circle::TransposeOptions *
+Operator::builtin_options_as<circle::TransposeOptions>() const
+{
+  return builtin_options_as_TransposeOptions();
+}
+
+template <>
+inline const circle::ReducerOptions *Operator::builtin_options_as<circle::ReducerOptions>() const
+{
+  return builtin_options_as_ReducerOptions();
+}
+
+template <>
+inline const circle::SubOptions *Operator::builtin_options_as<circle::SubOptions>() const
+{
+  return builtin_options_as_SubOptions();
+}
+
+template <>
+inline const circle::DivOptions *Operator::builtin_options_as<circle::DivOptions>() const
+{
+  return builtin_options_as_DivOptions();
+}
+
+template <>
+inline const circle::SqueezeOptions *Operator::builtin_options_as<circle::SqueezeOptions>() const
+{
+  return builtin_options_as_SqueezeOptions();
+}
+
+template <>
+inline const circle::SequenceRNNOptions *
+Operator::builtin_options_as<circle::SequenceRNNOptions>() const
+{
+  return builtin_options_as_SequenceRNNOptions();
+}
+
+template <>
+inline const circle::StridedSliceOptions *
+Operator::builtin_options_as<circle::StridedSliceOptions>() const
+{
+  return builtin_options_as_StridedSliceOptions();
+}
+
+template <>
+inline const circle::ExpOptions *Operator::builtin_options_as<circle::ExpOptions>() const
+{
+  return builtin_options_as_ExpOptions();
+}
+
+template <>
+inline const circle::TopKV2Options *Operator::builtin_options_as<circle::TopKV2Options>() const
+{
+  return builtin_options_as_TopKV2Options();
+}
+
+template <>
+inline const circle::SplitOptions *Operator::builtin_options_as<circle::SplitOptions>() const
+{
+  return builtin_options_as_SplitOptions();
+}
+
+template <>
+inline const circle::LogSoftmaxOptions *
+Operator::builtin_options_as<circle::LogSoftmaxOptions>() const
+{
+  return builtin_options_as_LogSoftmaxOptions();
+}
+
+template <>
+inline const circle::CastOptions *Operator::builtin_options_as<circle::CastOptions>() const
+{
+  return builtin_options_as_CastOptions();
+}
+
+template <>
+inline const circle::DequantizeOptions *
+Operator::builtin_options_as<circle::DequantizeOptions>() const
+{
+  return builtin_options_as_DequantizeOptions();
+}
+
+template <>
+inline const circle::MaximumMinimumOptions *
+Operator::builtin_options_as<circle::MaximumMinimumOptions>() const
+{
+  return builtin_options_as_MaximumMinimumOptions();
+}
+
+template <>
+inline const circle::ArgMaxOptions *Operator::builtin_options_as<circle::ArgMaxOptions>() const
+{
+  return builtin_options_as_ArgMaxOptions();
+}
+
+template <>
+inline const circle::LessOptions *Operator::builtin_options_as<circle::LessOptions>() const
+{
+  return builtin_options_as_LessOptions();
+}
+
+template <>
+inline const circle::NegOptions *Operator::builtin_options_as<circle::NegOptions>() const
+{
+  return builtin_options_as_NegOptions();
+}
+
+template <>
+inline const circle::PadV2Options *Operator::builtin_options_as<circle::PadV2Options>() const
+{
+  return builtin_options_as_PadV2Options();
+}
+
+template <>
+inline const circle::GreaterOptions *Operator::builtin_options_as<circle::GreaterOptions>() const
+{
+  return builtin_options_as_GreaterOptions();
+}
+
+template <>
+inline const circle::GreaterEqualOptions *
+Operator::builtin_options_as<circle::GreaterEqualOptions>() const
+{
+  return builtin_options_as_GreaterEqualOptions();
+}
+
+template <>
+inline const circle::LessEqualOptions *
+Operator::builtin_options_as<circle::LessEqualOptions>() const
+{
+  return builtin_options_as_LessEqualOptions();
+}
+
+template <>
+inline const circle::SelectOptions *Operator::builtin_options_as<circle::SelectOptions>() const
+{
+  return builtin_options_as_SelectOptions();
+}
+
+template <>
+inline const circle::SliceOptions *Operator::builtin_options_as<circle::SliceOptions>() const
+{
+  return builtin_options_as_SliceOptions();
+}
+
+template <>
+inline const circle::TransposeConvOptions *
+Operator::builtin_options_as<circle::TransposeConvOptions>() const
+{
+  return builtin_options_as_TransposeConvOptions();
+}
+
+template <>
+inline const circle::SparseToDenseOptions *
+Operator::builtin_options_as<circle::SparseToDenseOptions>() const
+{
+  return builtin_options_as_SparseToDenseOptions();
+}
+
+template <>
+inline const circle::TileOptions *Operator::builtin_options_as<circle::TileOptions>() const
+{
+  return builtin_options_as_TileOptions();
+}
+
+template <>
+inline const circle::ExpandDimsOptions *
+Operator::builtin_options_as<circle::ExpandDimsOptions>() const
+{
+  return builtin_options_as_ExpandDimsOptions();
+}
+
+template <>
+inline const circle::EqualOptions *Operator::builtin_options_as<circle::EqualOptions>() const
+{
+  return builtin_options_as_EqualOptions();
+}
+
+template <>
+inline const circle::NotEqualOptions *Operator::builtin_options_as<circle::NotEqualOptions>() const
+{
+  return builtin_options_as_NotEqualOptions();
+}
+
+template <>
+inline const circle::ShapeOptions *Operator::builtin_options_as<circle::ShapeOptions>() const
+{
+  return builtin_options_as_ShapeOptions();
+}
+
+template <>
+inline const circle::PowOptions *Operator::builtin_options_as<circle::PowOptions>() const
+{
+  return builtin_options_as_PowOptions();
+}
+
+template <>
+inline const circle::ArgMinOptions *Operator::builtin_options_as<circle::ArgMinOptions>() const
+{
+  return builtin_options_as_ArgMinOptions();
+}
+
+template <>
+inline const circle::FakeQuantOptions *
+Operator::builtin_options_as<circle::FakeQuantOptions>() const
+{
+  return builtin_options_as_FakeQuantOptions();
+}
+
+template <>
+inline const circle::PackOptions *Operator::builtin_options_as<circle::PackOptions>() const
+{
+  return builtin_options_as_PackOptions();
+}
+
+template <>
+inline const circle::LogicalOrOptions *
+Operator::builtin_options_as<circle::LogicalOrOptions>() const
+{
+  return builtin_options_as_LogicalOrOptions();
+}
+
+template <>
+inline const circle::OneHotOptions *Operator::builtin_options_as<circle::OneHotOptions>() const
+{
+  return builtin_options_as_OneHotOptions();
+}
+
+template <>
+inline const circle::LogicalAndOptions *
+Operator::builtin_options_as<circle::LogicalAndOptions>() const
+{
+  return builtin_options_as_LogicalAndOptions();
+}
+
+template <>
+inline const circle::LogicalNotOptions *
+Operator::builtin_options_as<circle::LogicalNotOptions>() const
+{
+  return builtin_options_as_LogicalNotOptions();
+}
+
+template <>
+inline const circle::UnpackOptions *Operator::builtin_options_as<circle::UnpackOptions>() const
+{
+  return builtin_options_as_UnpackOptions();
+}
+
+template <>
+inline const circle::FloorDivOptions *Operator::builtin_options_as<circle::FloorDivOptions>() const
+{
+  return builtin_options_as_FloorDivOptions();
+}
+
+template <>
+inline const circle::SquareOptions *Operator::builtin_options_as<circle::SquareOptions>() const
+{
+  return builtin_options_as_SquareOptions();
+}
+
+template <>
+inline const circle::ZerosLikeOptions *
+Operator::builtin_options_as<circle::ZerosLikeOptions>() const
+{
+  return builtin_options_as_ZerosLikeOptions();
+}
+
+template <>
+inline const circle::FillOptions *Operator::builtin_options_as<circle::FillOptions>() const
+{
+  return builtin_options_as_FillOptions();
+}
+
+template <>
+inline const circle::BidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<circle::BidirectionalSequenceLSTMOptions>() const
+{
+  return builtin_options_as_BidirectionalSequenceLSTMOptions();
+}
+
+template <>
+inline const circle::BidirectionalSequenceRNNOptions *
+Operator::builtin_options_as<circle::BidirectionalSequenceRNNOptions>() const
+{
+  return builtin_options_as_BidirectionalSequenceRNNOptions();
+}
+
+template <>
+inline const circle::UnidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<circle::UnidirectionalSequenceLSTMOptions>() const
+{
+  return builtin_options_as_UnidirectionalSequenceLSTMOptions();
+}
+
+template <>
+inline const circle::FloorModOptions *Operator::builtin_options_as<circle::FloorModOptions>() const
+{
+  return builtin_options_as_FloorModOptions();
+}
+
+template <>
+inline const circle::RangeOptions *Operator::builtin_options_as<circle::RangeOptions>() const
+{
+  return builtin_options_as_RangeOptions();
+}
+
+template <>
+inline const circle::ResizeNearestNeighborOptions *
+Operator::builtin_options_as<circle::ResizeNearestNeighborOptions>() const
+{
+  return builtin_options_as_ResizeNearestNeighborOptions();
+}
+
+template <>
+inline const circle::LeakyReluOptions *
+Operator::builtin_options_as<circle::LeakyReluOptions>() const
+{
+  return builtin_options_as_LeakyReluOptions();
+}
+
+template <>
+inline const circle::SquaredDifferenceOptions *
+Operator::builtin_options_as<circle::SquaredDifferenceOptions>() const
+{
+  return builtin_options_as_SquaredDifferenceOptions();
+}
+
+template <>
+inline const circle::MirrorPadOptions *
+Operator::builtin_options_as<circle::MirrorPadOptions>() const
+{
+  return builtin_options_as_MirrorPadOptions();
+}
+
+template <>
+inline const circle::AbsOptions *Operator::builtin_options_as<circle::AbsOptions>() const
+{
+  return builtin_options_as_AbsOptions();
+}
+
+template <>
+inline const circle::SplitVOptions *Operator::builtin_options_as<circle::SplitVOptions>() const
+{
+  return builtin_options_as_SplitVOptions();
+}
+
+template <>
+inline const circle::UniqueOptions *Operator::builtin_options_as<circle::UniqueOptions>() const
+{
+  return builtin_options_as_UniqueOptions();
+}
+
+template <>
+inline const circle::ReverseV2Options *
+Operator::builtin_options_as<circle::ReverseV2Options>() const
+{
+  return builtin_options_as_ReverseV2Options();
+}
+
+template <>
+inline const circle::AddNOptions *Operator::builtin_options_as<circle::AddNOptions>() const
+{
+  return builtin_options_as_AddNOptions();
+}
+
+template <>
+inline const circle::GatherNdOptions *Operator::builtin_options_as<circle::GatherNdOptions>() const
+{
+  return builtin_options_as_GatherNdOptions();
+}
+
+template <>
+inline const circle::CosOptions *Operator::builtin_options_as<circle::CosOptions>() const
+{
+  return builtin_options_as_CosOptions();
+}
+
+template <>
+inline const circle::WhereOptions *Operator::builtin_options_as<circle::WhereOptions>() const
+{
+  return builtin_options_as_WhereOptions();
+}
+
+template <>
+inline const circle::RankOptions *Operator::builtin_options_as<circle::RankOptions>() const
+{
+  return builtin_options_as_RankOptions();
+}
+
+template <>
+inline const circle::ReverseSequenceOptions *
+Operator::builtin_options_as<circle::ReverseSequenceOptions>() const
+{
+  return builtin_options_as_ReverseSequenceOptions();
+}
+
+template <>
+inline const circle::MatrixDiagOptions *
+Operator::builtin_options_as<circle::MatrixDiagOptions>() const
+{
+  return builtin_options_as_MatrixDiagOptions();
+}
+
+template <>
+inline const circle::QuantizeOptions *Operator::builtin_options_as<circle::QuantizeOptions>() const
+{
+  return builtin_options_as_QuantizeOptions();
+}
+
+template <>
+inline const circle::MatrixSetDiagOptions *
+Operator::builtin_options_as<circle::MatrixSetDiagOptions>() const
+{
+  return builtin_options_as_MatrixSetDiagOptions();
+}
+
+template <>
+inline const circle::HardSwishOptions *
+Operator::builtin_options_as<circle::HardSwishOptions>() const
+{
+  return builtin_options_as_HardSwishOptions();
+}
+
+template <> inline const circle::IfOptions *Operator::builtin_options_as<circle::IfOptions>() const
+{
+  return builtin_options_as_IfOptions();
+}
+
+template <>
+inline const circle::WhileOptions *Operator::builtin_options_as<circle::WhileOptions>() const
+{
+  return builtin_options_as_WhileOptions();
+}
+
+template <>
+inline const circle::DepthToSpaceOptions *
+Operator::builtin_options_as<circle::DepthToSpaceOptions>() const
+{
+  return builtin_options_as_DepthToSpaceOptions();
+}
+
+template <>
+inline const circle::NonMaxSuppressionV4Options *
+Operator::builtin_options_as<circle::NonMaxSuppressionV4Options>() const
+{
+  return builtin_options_as_NonMaxSuppressionV4Options();
+}
+
+template <>
+inline const circle::NonMaxSuppressionV5Options *
+Operator::builtin_options_as<circle::NonMaxSuppressionV5Options>() const
+{
+  return builtin_options_as_NonMaxSuppressionV5Options();
+}
+
+template <>
+inline const circle::ScatterNdOptions *
+Operator::builtin_options_as<circle::ScatterNdOptions>() const
+{
+  return builtin_options_as_ScatterNdOptions();
+}
+
+template <>
+inline const circle::SelectV2Options *Operator::builtin_options_as<circle::SelectV2Options>() const
+{
+  return builtin_options_as_SelectV2Options();
+}
+
+template <>
+inline const circle::DensifyOptions *Operator::builtin_options_as<circle::DensifyOptions>() const
+{
+  return builtin_options_as_DensifyOptions();
+}
+
+template <>
+inline const circle::SegmentSumOptions *
+Operator::builtin_options_as<circle::SegmentSumOptions>() const
+{
+  return builtin_options_as_SegmentSumOptions();
+}
+
+template <>
+inline const circle::BatchMatMulOptions *
+Operator::builtin_options_as<circle::BatchMatMulOptions>() const
+{
+  return builtin_options_as_BatchMatMulOptions();
+}
+
+template <>
+inline const circle::CumsumOptions *Operator::builtin_options_as<circle::CumsumOptions>() const
+{
+  return builtin_options_as_CumsumOptions();
+}
+
+template <>
+inline const circle::CallOnceOptions *Operator::builtin_options_as<circle::CallOnceOptions>() const
+{
+  return builtin_options_as_CallOnceOptions();
+}
+
+template <>
+inline const circle::BroadcastToOptions *
+Operator::builtin_options_as<circle::BroadcastToOptions>() const
+{
+  return builtin_options_as_BroadcastToOptions();
+}
+
+template <>
+inline const circle::Rfft2dOptions *Operator::builtin_options_as<circle::Rfft2dOptions>() const
+{
+  return builtin_options_as_Rfft2dOptions();
+}
+
+template <>
+inline const circle::Conv3DOptions *Operator::builtin_options_as<circle::Conv3DOptions>() const
+{
+  return builtin_options_as_Conv3DOptions();
+}
+
+template <>
+inline const circle::HashtableOptions *
+Operator::builtin_options_as<circle::HashtableOptions>() const
+{
+  return builtin_options_as_HashtableOptions();
+}
+
+template <>
+inline const circle::HashtableFindOptions *
+Operator::builtin_options_as<circle::HashtableFindOptions>() const
+{
+  return builtin_options_as_HashtableFindOptions();
+}
+
+template <>
+inline const circle::HashtableImportOptions *
+Operator::builtin_options_as<circle::HashtableImportOptions>() const
+{
+  return builtin_options_as_HashtableImportOptions();
+}
+
+template <>
+inline const circle::HashtableSizeOptions *
+Operator::builtin_options_as<circle::HashtableSizeOptions>() const
+{
+  return builtin_options_as_HashtableSizeOptions();
+}
+
+template <>
+inline const circle::VarHandleOptions *
+Operator::builtin_options_as<circle::VarHandleOptions>() const
+{
+  return builtin_options_as_VarHandleOptions();
+}
+
+template <>
+inline const circle::ReadVariableOptions *
+Operator::builtin_options_as<circle::ReadVariableOptions>() const
+{
+  return builtin_options_as_ReadVariableOptions();
+}
+
+template <>
+inline const circle::AssignVariableOptions *
+Operator::builtin_options_as<circle::AssignVariableOptions>() const
+{
+  return builtin_options_as_AssignVariableOptions();
+}
+
+template <>
+inline const circle::RandomOptions *Operator::builtin_options_as<circle::RandomOptions>() const
+{
+  return builtin_options_as_RandomOptions();
+}
+
+template <>
+inline const circle::BCQGatherOptions *
+Operator::builtin_options_as<circle::BCQGatherOptions>() const
+{
+  return builtin_options_as_BCQGatherOptions();
+}
+
+template <>
+inline const circle::BCQFullyConnectedOptions *
+Operator::builtin_options_as<circle::BCQFullyConnectedOptions>() const
+{
+  return builtin_options_as_BCQFullyConnectedOptions();
+}
+
+template <>
+inline const circle::InstanceNormOptions *
+Operator::builtin_options_as<circle::InstanceNormOptions>() const
+{
+  return builtin_options_as_InstanceNormOptions();
+}
+
+struct OperatorBuilder
+{
+  typedef Operator Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_opcode_index(uint32_t opcode_index)
+  {
+    fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
+  }
+  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
+  {
+    fbb_.AddOffset(Operator::VT_INPUTS, inputs);
+  }
+  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
+  {
+    fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
+  }
+  void add_builtin_options_type(circle::BuiltinOptions builtin_options_type)
+  {
+    fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
+                             static_cast<uint8_t>(builtin_options_type), 0);
+  }
+  void add_builtin_options(flatbuffers::Offset<void> builtin_options)
+  {
+    fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
+  }
+  void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options)
+  {
+    fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
+  }
+  void add_custom_options_format(circle::CustomOptionsFormat custom_options_format)
+  {
+    fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
+                            static_cast<int8_t>(custom_options_format), 0);
+  }
+  void add_mutating_variable_inputs(
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+  {
+    fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
+  }
+  void add_intermediates(flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates)
+  {
+    fbb_.AddOffset(Operator::VT_INTERMEDIATES, intermediates);
+  }
+  explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Operator> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Operator>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Operator> CreateOperator(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+  circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+{
+  OperatorBuilder builder_(_fbb);
+  builder_.add_intermediates(intermediates);
+  builder_.add_mutating_variable_inputs(mutating_variable_inputs);
+  builder_.add_custom_options(custom_options);
+  builder_.add_builtin_options(builtin_options);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  builder_.add_opcode_index(opcode_index);
+  builder_.add_custom_options_format(custom_options_format);
+  builder_.add_builtin_options_type(builtin_options_type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Operator> CreateOperatorDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  const std::vector<uint8_t> *custom_options = nullptr,
+  circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+  const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+  const std::vector<int32_t> *intermediates = nullptr)
+{
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
+  auto mutating_variable_inputs__ =
+    mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+  auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
+  return circle::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type,
+                                builtin_options, custom_options__, custom_options_format,
+                                mutating_variable_inputs__, intermediates__);
+}
+
+flatbuffers::Offset<Operator>
+CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SubGraphT : public flatbuffers::NativeTable
+{
+  typedef SubGraph TableType;
+  std::vector<std::unique_ptr<circle::TensorT>> tensors{};
+  std::vector<int32_t> inputs{};
+  std::vector<int32_t> outputs{};
+  std::vector<std::unique_ptr<circle::OperatorT>> operators{};
+  std::string name{};
+  circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST;
+};
+
+struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SubGraphT NativeTableType;
+  typedef SubGraphBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TENSORS = 4,
+    VT_INPUTS = 6,
+    VT_OUTPUTS = 8,
+    VT_OPERATORS = 10,
+    VT_NAME = 12,
+    VT_DATA_FORMAT = 14
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>> *tensors() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>> *>(VT_TENSORS);
+  }
+  const flatbuffers::Vector<int32_t> *inputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+  }
+  const flatbuffers::Vector<int32_t> *outputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *operators() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *>(
+      VT_OPERATORS);
+  }
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  circle::DataFormat data_format() const
+  {
+    return static_cast<circle::DataFormat>(GetField<int8_t>(VT_DATA_FORMAT, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) &&
+           verifier.VerifyVector(tensors()) && verifier.VerifyVectorOfTables(tensors()) &&
+           VerifyOffset(verifier, VT_INPUTS) && verifier.VerifyVector(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+           VerifyOffset(verifier, VT_OPERATORS) && verifier.VerifyVector(operators()) &&
+           verifier.VerifyVectorOfTables(operators()) && VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyField<int8_t>(verifier, VT_DATA_FORMAT) &&
+           verifier.EndTable();
+  }
+  SubGraphT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SubGraph>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SubGraphBuilder
+{
+  typedef SubGraph Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void
+  add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors)
+  {
+    fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
+  }
+  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs)
+  {
+    fbb_.AddOffset(SubGraph::VT_INPUTS, inputs);
+  }
+  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs)
+  {
+    fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
+  }
+  void add_operators(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators)
+  {
+    fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
+  }
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(SubGraph::VT_NAME, name);
+  }
+  void add_data_format(circle::DataFormat data_format)
+  {
+    fbb_.AddElement<int8_t>(SubGraph::VT_DATA_FORMAT, static_cast<int8_t>(data_format), 0);
+  }
+  explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SubGraph> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SubGraph>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraph(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0,
+  flatbuffers::Offset<flatbuffers::String> name = 0,
+  circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
+{
+  SubGraphBuilder builder_(_fbb);
+  builder_.add_name(name);
+  builder_.add_operators(operators);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  builder_.add_tensors(tensors);
+  builder_.add_data_format(data_format);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr,
+  const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
+{
+  auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<circle::Tensor>>(*tensors) : 0;
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto operators__ =
+    operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return circle::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__,
+                                data_format);
+}
+
+flatbuffers::Offset<SubGraph>
+CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BufferT : public flatbuffers::NativeTable
+{
+  typedef Buffer TableType;
+  std::vector<uint8_t> data{};
+};
+
+struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BufferT NativeTableType;
+  typedef BufferBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_DATA = 4
+  };
+  const flatbuffers::Vector<uint8_t> *data() const
+  {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) &&
+           verifier.VerifyVector(data()) && verifier.EndTable();
+  }
+  BufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Buffer>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BufferBuilder
+{
+  typedef Buffer Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
+  {
+    fbb_.AddOffset(Buffer::VT_DATA, data);
+  }
+  explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Buffer> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Buffer>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Buffer>
+CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
+             flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0)
+{
+  BufferBuilder builder_(_fbb);
+  builder_.add_data(data);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                                                      const std::vector<uint8_t> *data = nullptr)
+{
+  if (data)
+  {
+    _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16);
+  }
+  auto data__ = data ? _fbb.CreateVector<uint8_t>(*data) : 0;
+  return circle::CreateBuffer(_fbb, data__);
+}
+
+flatbuffers::Offset<Buffer>
+CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+             const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MetadataT : public flatbuffers::NativeTable
+{
+  typedef Metadata TableType;
+  std::string name{};
+  uint32_t buffer = 0;
+};
+
+struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef MetadataT NativeTableType;
+  typedef MetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_BUFFER = 6
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_BUFFER) &&
+           verifier.EndTable();
+  }
+  MetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MetadataT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Metadata>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MetadataBuilder
+{
+  typedef Metadata Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(Metadata::VT_NAME, name);
+  }
+  void add_buffer(uint32_t buffer) { fbb_.AddElement<uint32_t>(Metadata::VT_BUFFER, buffer, 0); }
+  explicit MetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Metadata> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Metadata>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Metadata>
+CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb,
+               flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t buffer = 0)
+{
+  MetadataBuilder builder_(_fbb);
+  builder_.add_buffer(buffer);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                                                          const char *name = nullptr,
+                                                          uint32_t buffer = 0)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return circle::CreateMetadata(_fbb, name__, buffer);
+}
+
+flatbuffers::Offset<Metadata>
+CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TensorMapT : public flatbuffers::NativeTable
+{
+  typedef TensorMap TableType;
+  std::string name{};
+  uint32_t tensor_index = 0;
+};
+
+struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TensorMapT NativeTableType;
+  typedef TensorMapBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_TENSOR_INDEX = 6
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) &&
+           verifier.EndTable();
+  }
+  TensorMapT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TensorMapT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TensorMap>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TensorMapBuilder
+{
+  typedef TensorMap Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(TensorMap::VT_NAME, name);
+  }
+  void add_tensor_index(uint32_t tensor_index)
+  {
+    fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
+  }
+  explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TensorMap> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TensorMap>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb,
+                flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0)
+{
+  TensorMapBuilder builder_(_fbb);
+  builder_.add_tensor_index(tensor_index);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                                                            const char *name = nullptr,
+                                                            uint32_t tensor_index = 0)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return circle::CreateTensorMap(_fbb, name__, tensor_index);
+}
+
+flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o,
+                const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SignatureDefT : public flatbuffers::NativeTable
+{
+  typedef SignatureDef TableType;
+  std::vector<std::unique_ptr<circle::TensorMapT>> inputs{};
+  std::vector<std::unique_ptr<circle::TensorMapT>> outputs{};
+  std::string signature_key{};
+  uint32_t subgraph_index = 0;
+};
+
+struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SignatureDefT NativeTableType;
+  typedef SignatureDefBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INPUTS = 4,
+    VT_OUTPUTS = 6,
+    VT_SIGNATURE_KEY = 8,
+    VT_SUBGRAPH_INDEX = 12
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *inputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>(
+      VT_INPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *outputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>(
+      VT_OUTPUTS);
+  }
+  const flatbuffers::String *signature_key() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY);
+  }
+  uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) &&
+           verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+           verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) &&
+           verifier.VerifyString(signature_key()) &&
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable();
+  }
+  SignatureDefT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SignatureDefT *_o,
+                const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SignatureDef>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SignatureDefBuilder
+{
+  typedef SignatureDef Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_inputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
+  }
+  void add_outputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
+  }
+  void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key)
+  {
+    fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key);
+  }
+  void add_subgraph_index(uint32_t subgraph_index)
+  {
+    fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
+  }
+  explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SignatureDef> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SignatureDef>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs = 0,
+  flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0)
+{
+  SignatureDefBuilder builder_(_fbb);
+  builder_.add_subgraph_index(subgraph_index);
+  builder_.add_signature_key(signature_key);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<circle::TensorMap>> *inputs = nullptr,
+  const std::vector<flatbuffers::Offset<circle::TensorMap>> *outputs = nullptr,
+  const char *signature_key = nullptr, uint32_t subgraph_index = 0)
+{
+  auto inputs__ = inputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*inputs) : 0;
+  auto outputs__ =
+    outputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*outputs) : 0;
+  auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0;
+  return circle::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__, subgraph_index);
+}
+
+flatbuffers::Offset<SignatureDef>
+CreateSignatureDef(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ModelT : public flatbuffers::NativeTable
+{
+  typedef Model TableType;
+  uint32_t version = 0;
+  std::vector<std::unique_ptr<circle::OperatorCodeT>> operator_codes{};
+  std::vector<std::unique_ptr<circle::SubGraphT>> subgraphs{};
+  std::string description{};
+  std::vector<std::unique_ptr<circle::BufferT>> buffers{};
+  std::vector<int32_t> metadata_buffer{};
+  std::vector<std::unique_ptr<circle::MetadataT>> metadata{};
+  std::vector<std::unique_ptr<circle::SignatureDefT>> signature_defs{};
+};
+
+struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ModelT NativeTableType;
+  typedef ModelBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_VERSION = 4,
+    VT_OPERATOR_CODES = 6,
+    VT_SUBGRAPHS = 8,
+    VT_DESCRIPTION = 10,
+    VT_BUFFERS = 12,
+    VT_METADATA_BUFFER = 14,
+    VT_METADATA = 16,
+    VT_SIGNATURE_DEFS = 18
+  };
+  uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *>(
+      VT_OPERATOR_CODES);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *>(
+      VT_SUBGRAPHS);
+  }
+  const flatbuffers::String *description() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>> *buffers() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>> *>(VT_BUFFERS);
+  }
+  const flatbuffers::Vector<int32_t> *metadata_buffer() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *metadata() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>(
+      VT_METADATA);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *>(
+      VT_SIGNATURE_DEFS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) &&
+           VerifyOffset(verifier, VT_OPERATOR_CODES) && verifier.VerifyVector(operator_codes()) &&
+           verifier.VerifyVectorOfTables(operator_codes()) &&
+           VerifyOffset(verifier, VT_SUBGRAPHS) && verifier.VerifyVector(subgraphs()) &&
+           verifier.VerifyVectorOfTables(subgraphs()) && VerifyOffset(verifier, VT_DESCRIPTION) &&
+           verifier.VerifyString(description()) && VerifyOffset(verifier, VT_BUFFERS) &&
+           verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
+           VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
+           VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
+           verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
+           verifier.VerifyVector(signature_defs()) &&
+           verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable();
+  }
+  ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Model>
+  Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ModelBuilder
+{
+  typedef Model Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
+  void add_operator_codes(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+      operator_codes)
+  {
+    fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
+  }
+  void add_subgraphs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs)
+  {
+    fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
+  }
+  void add_description(flatbuffers::Offset<flatbuffers::String> description)
+  {
+    fbb_.AddOffset(Model::VT_DESCRIPTION, description);
+  }
+  void
+  add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers)
+  {
+    fbb_.AddOffset(Model::VT_BUFFERS, buffers);
+  }
+  void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer)
+  {
+    fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
+  }
+  void add_metadata(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata)
+  {
+    fbb_.AddOffset(Model::VT_METADATA, metadata);
+  }
+  void add_signature_defs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>>
+      signature_defs)
+  {
+    fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
+  }
+  explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Model> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Model>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Model> CreateModel(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+    operator_codes = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0,
+  flatbuffers::Offset<flatbuffers::String> description = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>>
+    signature_defs = 0)
+{
+  ModelBuilder builder_(_fbb);
+  builder_.add_signature_defs(signature_defs);
+  builder_.add_metadata(metadata);
+  builder_.add_metadata_buffer(metadata_buffer);
+  builder_.add_buffers(buffers);
+  builder_.add_description(description);
+  builder_.add_subgraphs(subgraphs);
+  builder_.add_operator_codes(operator_codes);
+  builder_.add_version(version);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Model> CreateModelDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr,
+  const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr,
+  const char *description = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
+  const std::vector<int32_t> *metadata_buffer = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr,
+  const std::vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs = nullptr)
+{
+  auto operator_codes__ =
+    operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
+                   : 0;
+  auto subgraphs__ =
+    subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0;
+  auto description__ = description ? _fbb.CreateString(description) : 0;
+  auto buffers__ = buffers ? _fbb.CreateVector<flatbuffers::Offset<circle::Buffer>>(*buffers) : 0;
+  auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
+  auto metadata__ =
+    metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
+  auto signature_defs__ =
+    signature_defs ? _fbb.CreateVector<flatbuffers::Offset<circle::SignatureDef>>(*signature_defs)
+                   : 0;
+  return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__,
+                             metadata_buffer__, metadata__, signature_defs__);
+}
+
+flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+                                       const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+inline CustomQuantizationT *
+CustomQuantization::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<CustomQuantizationT>(new CustomQuantizationT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CustomQuantization::UnPackTo(CustomQuantizationT *_o,
+                                         const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = custom();
+    if (_e)
+    {
+      _o->custom.resize(_e->size());
+      std::copy(_e->begin(), _e->end(), _o->custom.begin());
+    }
+  }
+}
+
+inline flatbuffers::Offset<CustomQuantization>
+CustomQuantization::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateCustomQuantization(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CustomQuantization>
+CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const CustomQuantizationT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  _fbb.ForceVectorAlignment(_o->custom.size(), sizeof(uint8_t), 16);
+  auto _custom = _o->custom.size() ? _fbb.CreateVector(_o->custom) : 0;
+  return circle::CreateCustomQuantization(_fbb, _custom);
+}
+
+inline QuantizationParametersT *
+QuantizationParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<QuantizationParametersT>(new QuantizationParametersT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+QuantizationParameters::UnPackTo(QuantizationParametersT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = min();
+    if (_e)
+    {
+      _o->min.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->min[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = max();
+    if (_e)
+    {
+      _o->max.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->max[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = scale();
+    if (_e)
+    {
+      _o->scale.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->scale[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = zero_point();
+    if (_e)
+    {
+      _o->zero_point.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->zero_point[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = details_type();
+    _o->details.type = _e;
+  }
+  {
+    auto _e = details();
+    if (_e)
+      _o->details.value = circle::QuantizationDetailsUnion::UnPack(_e, details_type(), _resolver);
+  }
+  {
+    auto _e = quantized_dimension();
+    _o->quantized_dimension = _e;
+  }
+}
+
+inline flatbuffers::Offset<QuantizationParameters>
+QuantizationParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                             const QuantizationParametersT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateQuantizationParameters(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<QuantizationParameters>
+CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
+                             const QuantizationParametersT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const QuantizationParametersT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _min = _o->min.size() ? _fbb.CreateVector(_o->min) : 0;
+  auto _max = _o->max.size() ? _fbb.CreateVector(_o->max) : 0;
+  auto _scale = _o->scale.size() ? _fbb.CreateVector(_o->scale) : 0;
+  auto _zero_point = _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0;
+  auto _details_type = _o->details.type;
+  auto _details = _o->details.Pack(_fbb);
+  auto _quantized_dimension = _o->quantized_dimension;
+  return circle::CreateQuantizationParameters(_fbb, _min, _max, _scale, _zero_point, _details_type,
+                                              _details, _quantized_dimension);
+}
+
+inline Int32VectorT *Int32Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<Int32VectorT>(new Int32VectorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Int32Vector::UnPackTo(Int32VectorT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = values();
+    if (_e)
+    {
+      _o->values.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->values[_i] = _e->Get(_i);
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<Int32Vector>
+Int32Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateInt32Vector(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Int32Vector>
+CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Int32VectorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
+  return circle::CreateInt32Vector(_fbb, _values);
+}
+
+inline Uint16VectorT *Uint16Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<Uint16VectorT>(new Uint16VectorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Uint16Vector::UnPackTo(Uint16VectorT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = values();
+    if (_e)
+    {
+      _o->values.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->values[_i] = _e->Get(_i);
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<Uint16Vector>
+Uint16Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateUint16Vector(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Uint16Vector>
+CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Uint16VectorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  _fbb.ForceVectorAlignment(_o->values.size(), sizeof(uint16_t), 4);
+  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
+  return circle::CreateUint16Vector(_fbb, _values);
+}
+
+inline Uint8VectorT *Uint8Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<Uint8VectorT>(new Uint8VectorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Uint8Vector::UnPackTo(Uint8VectorT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = values();
+    if (_e)
+    {
+      _o->values.resize(_e->size());
+      std::copy(_e->begin(), _e->end(), _o->values.begin());
+    }
+  }
+}
+
+inline flatbuffers::Offset<Uint8Vector>
+Uint8Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateUint8Vector(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Uint8Vector>
+CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Uint8VectorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  _fbb.ForceVectorAlignment(_o->values.size(), sizeof(uint8_t), 4);
+  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
+  return circle::CreateUint8Vector(_fbb, _values);
+}
+
+inline DimensionMetadataT *
+DimensionMetadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<DimensionMetadataT>(new DimensionMetadataT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DimensionMetadata::UnPackTo(DimensionMetadataT *_o,
+                                        const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = format();
+    _o->format = _e;
+  }
+  {
+    auto _e = dense_size();
+    _o->dense_size = _e;
+  }
+  {
+    auto _e = array_segments_type();
+    _o->array_segments.type = _e;
+  }
+  {
+    auto _e = array_segments();
+    if (_e)
+      _o->array_segments.value =
+        circle::SparseIndexVectorUnion::UnPack(_e, array_segments_type(), _resolver);
+  }
+  {
+    auto _e = array_indices_type();
+    _o->array_indices.type = _e;
+  }
+  {
+    auto _e = array_indices();
+    if (_e)
+      _o->array_indices.value =
+        circle::SparseIndexVectorUnion::UnPack(_e, array_indices_type(), _resolver);
+  }
+}
+
+inline flatbuffers::Offset<DimensionMetadata>
+DimensionMetadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateDimensionMetadata(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DimensionMetadata>
+CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DimensionMetadataT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _format = _o->format;
+  auto _dense_size = _o->dense_size;
+  auto _array_segments_type = _o->array_segments.type;
+  auto _array_segments = _o->array_segments.Pack(_fbb);
+  auto _array_indices_type = _o->array_indices.type;
+  auto _array_indices = _o->array_indices.Pack(_fbb);
+  return circle::CreateDimensionMetadata(_fbb, _format, _dense_size, _array_segments_type,
+                                         _array_segments, _array_indices_type, _array_indices);
+}
+
+inline SparsityParametersT *
+SparsityParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SparsityParametersT>(new SparsityParametersT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SparsityParameters::UnPackTo(SparsityParametersT *_o,
+                                         const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = traversal_order();
+    if (_e)
+    {
+      _o->traversal_order.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->traversal_order[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = block_map();
+    if (_e)
+    {
+      _o->block_map.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->block_map[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = dim_metadata();
+    if (_e)
+    {
+      _o->dim_metadata.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->dim_metadata[_i] =
+          std::unique_ptr<circle::DimensionMetadataT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<SparsityParameters>
+SparsityParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSparsityParameters(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SparsityParameters>
+CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SparsityParametersT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _traversal_order = _o->traversal_order.size() ? _fbb.CreateVector(_o->traversal_order) : 0;
+  auto _block_map = _o->block_map.size() ? _fbb.CreateVector(_o->block_map) : 0;
+  auto _dim_metadata = _o->dim_metadata.size()
+                         ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(
+                             _o->dim_metadata.size(),
+                             [](size_t i, _VectorArgs *__va) {
+                               return CreateDimensionMetadata(
+                                 *__va->__fbb, __va->__o->dim_metadata[i].get(), __va->__rehasher);
+                             },
+                             &_va)
+                         : 0;
+  return circle::CreateSparsityParameters(_fbb, _traversal_order, _block_map, _dim_metadata);
+}
+
+inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<TensorT>(new TensorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = shape();
+    if (_e)
+    {
+      _o->shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->shape[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = type();
+    _o->type = _e;
+  }
+  {
+    auto _e = buffer();
+    _o->buffer = _e;
+  }
+  {
+    auto _e = name();
+    if (_e)
+      _o->name = _e->str();
+  }
+  {
+    auto _e = quantization();
+    if (_e)
+      _o->quantization = std::unique_ptr<circle::QuantizationParametersT>(_e->UnPack(_resolver));
+  }
+  {
+    auto _e = is_variable();
+    _o->is_variable = _e;
+  }
+  {
+    auto _e = sparsity();
+    if (_e)
+      _o->sparsity = std::unique_ptr<circle::SparsityParametersT>(_e->UnPack(_resolver));
+  }
+  {
+    auto _e = shape_signature();
+    if (_e)
+    {
+      _o->shape_signature.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->shape_signature[_i] = _e->Get(_i);
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<Tensor> Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                                const TensorT *_o,
+                                                const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateTensor(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
+                                                const TensorT *_o,
+                                                const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TensorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0;
+  auto _type = _o->type;
+  auto _buffer = _o->buffer;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  auto _quantization =
+    _o->quantization ? CreateQuantizationParameters(_fbb, _o->quantization.get(), _rehasher) : 0;
+  auto _is_variable = _o->is_variable;
+  auto _sparsity = _o->sparsity ? CreateSparsityParameters(_fbb, _o->sparsity.get(), _rehasher) : 0;
+  auto _shape_signature = _o->shape_signature.size() ? _fbb.CreateVector(_o->shape_signature) : 0;
+  return circle::CreateTensor(_fbb, _shape, _type, _buffer, _name, _quantization, _is_variable,
+                              _sparsity, _shape_signature);
+}
+
+inline Conv2DOptionsT *
+Conv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<Conv2DOptionsT>(new Conv2DOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Conv2DOptions::UnPackTo(Conv2DOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  }
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  }
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = dilation_w_factor();
+    _o->dilation_w_factor = _e;
+  }
+  {
+    auto _e = dilation_h_factor();
+    _o->dilation_h_factor = _e;
+  }
+}
+
+inline flatbuffers::Offset<Conv2DOptions>
+Conv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateConv2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Conv2DOptions>
+CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Conv2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _dilation_w_factor = _o->dilation_w_factor;
+  auto _dilation_h_factor = _o->dilation_h_factor;
+  return circle::CreateConv2DOptions(_fbb, _padding, _stride_w, _stride_h,
+                                     _fused_activation_function, _dilation_w_factor,
+                                     _dilation_h_factor);
+}
+
+inline Conv3DOptionsT *
+Conv3DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<Conv3DOptionsT>(new Conv3DOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Conv3DOptions::UnPackTo(Conv3DOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  }
+  {
+    auto _e = stride_d();
+    _o->stride_d = _e;
+  }
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  }
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = dilation_d_factor();
+    _o->dilation_d_factor = _e;
+  }
+  {
+    auto _e = dilation_w_factor();
+    _o->dilation_w_factor = _e;
+  }
+  {
+    auto _e = dilation_h_factor();
+    _o->dilation_h_factor = _e;
+  }
+}
+
+inline flatbuffers::Offset<Conv3DOptions>
+Conv3DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateConv3DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Conv3DOptions>
+CreateConv3DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Conv3DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_d = _o->stride_d;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _dilation_d_factor = _o->dilation_d_factor;
+  auto _dilation_w_factor = _o->dilation_w_factor;
+  auto _dilation_h_factor = _o->dilation_h_factor;
+  return circle::CreateConv3DOptions(_fbb, _padding, _stride_d, _stride_w, _stride_h,
+                                     _fused_activation_function, _dilation_d_factor,
+                                     _dilation_w_factor, _dilation_h_factor);
+}
+
+inline Pool2DOptionsT *
+Pool2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<Pool2DOptionsT>(new Pool2DOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Pool2DOptions::UnPackTo(Pool2DOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  }
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  }
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  }
+  {
+    auto _e = filter_width();
+    _o->filter_width = _e;
+  }
+  {
+    auto _e = filter_height();
+    _o->filter_height = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+}
+
+inline flatbuffers::Offset<Pool2DOptions>
+Pool2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreatePool2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Pool2DOptions>
+CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Pool2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _filter_width = _o->filter_width;
+  auto _filter_height = _o->filter_height;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return circle::CreatePool2DOptions(_fbb, _padding, _stride_w, _stride_h, _filter_width,
+                                     _filter_height, _fused_activation_function);
+}
+
+inline DepthwiseConv2DOptionsT *
+DepthwiseConv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<DepthwiseConv2DOptionsT>(new DepthwiseConv2DOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  }
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  }
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  }
+  {
+    auto _e = depth_multiplier();
+    _o->depth_multiplier = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = dilation_w_factor();
+    _o->dilation_w_factor = _e;
+  }
+  {
+    auto _e = dilation_h_factor();
+    _o->dilation_h_factor = _e;
+  }
+}
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions>
+DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                             const DepthwiseConv2DOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateDepthwiseConv2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions>
+CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                             const DepthwiseConv2DOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DepthwiseConv2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _depth_multiplier = _o->depth_multiplier;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _dilation_w_factor = _o->dilation_w_factor;
+  auto _dilation_h_factor = _o->dilation_h_factor;
+  return circle::CreateDepthwiseConv2DOptions(_fbb, _padding, _stride_w, _stride_h,
+                                              _depth_multiplier, _fused_activation_function,
+                                              _dilation_w_factor, _dilation_h_factor);
+}
+
+inline ConcatEmbeddingsOptionsT *
+ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ConcatEmbeddingsOptionsT>(new ConcatEmbeddingsOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+ConcatEmbeddingsOptions::UnPackTo(ConcatEmbeddingsOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = num_channels();
+    _o->num_channels = _e;
+  }
+  {
+    auto _e = num_columns_per_channel();
+    if (_e)
+    {
+      _o->num_columns_per_channel.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->num_columns_per_channel[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = embedding_dim_per_channel();
+    if (_e)
+    {
+      _o->embedding_dim_per_channel.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->embedding_dim_per_channel[_i] = _e->Get(_i);
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+ConcatEmbeddingsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                              const ConcatEmbeddingsOptionsT *_o,
+                              const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateConcatEmbeddingsOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                              const ConcatEmbeddingsOptionsT *_o,
+                              const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ConcatEmbeddingsOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _num_channels = _o->num_channels;
+  auto _num_columns_per_channel =
+    _o->num_columns_per_channel.size() ? _fbb.CreateVector(_o->num_columns_per_channel) : 0;
+  auto _embedding_dim_per_channel =
+    _o->embedding_dim_per_channel.size() ? _fbb.CreateVector(_o->embedding_dim_per_channel) : 0;
+  return circle::CreateConcatEmbeddingsOptions(_fbb, _num_channels, _num_columns_per_channel,
+                                               _embedding_dim_per_channel);
+}
+
+inline LSHProjectionOptionsT *
+LSHProjectionOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LSHProjectionOptionsT>(new LSHProjectionOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LSHProjectionOptions::UnPackTo(LSHProjectionOptionsT *_o,
+                                           const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = type();
+    _o->type = _e;
+  }
+}
+
+inline flatbuffers::Offset<LSHProjectionOptions>
+LSHProjectionOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLSHProjectionOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LSHProjectionOptions>
+CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LSHProjectionOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _type = _o->type;
+  return circle::CreateLSHProjectionOptions(_fbb, _type);
+}
+
+inline SVDFOptionsT *SVDFOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SVDFOptionsT>(new SVDFOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SVDFOptions::UnPackTo(SVDFOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = rank();
+    _o->rank = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<SVDFOptions>
+SVDFOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSVDFOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SVDFOptions>
+CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SVDFOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _rank = _o->rank;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateSVDFOptions(_fbb, _rank, _fused_activation_function,
+                                   _asymmetric_quantize_inputs);
+}
+
+inline RNNOptionsT *RNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<RNNOptionsT>(new RNNOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void RNNOptions::UnPackTo(RNNOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<RNNOptions>
+RNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateRNNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RNNOptions>
+CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const RNNOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateRNNOptions(_fbb, _fused_activation_function, _asymmetric_quantize_inputs);
+}
+
+inline SequenceRNNOptionsT *
+SequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SequenceRNNOptionsT>(new SequenceRNNOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SequenceRNNOptions::UnPackTo(SequenceRNNOptionsT *_o,
+                                         const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = time_major();
+    _o->time_major = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<SequenceRNNOptions>
+SequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSequenceRNNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SequenceRNNOptions>
+CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SequenceRNNOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _time_major = _o->time_major;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateSequenceRNNOptions(_fbb, _time_major, _fused_activation_function,
+                                          _asymmetric_quantize_inputs);
+}
+
+inline BidirectionalSequenceRNNOptionsT *
+BidirectionalSequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o =
+    std::unique_ptr<BidirectionalSequenceRNNOptionsT>(new BidirectionalSequenceRNNOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+BidirectionalSequenceRNNOptions::UnPackTo(BidirectionalSequenceRNNOptionsT *_o,
+                                          const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = time_major();
+    _o->time_major = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = merge_outputs();
+    _o->merge_outputs = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceRNNOptions>
+BidirectionalSequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                      const BidirectionalSequenceRNNOptionsT *_o,
+                                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateBidirectionalSequenceRNNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceRNNOptions>
+CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                      const BidirectionalSequenceRNNOptionsT *_o,
+                                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BidirectionalSequenceRNNOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _time_major = _o->time_major;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _merge_outputs = _o->merge_outputs;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateBidirectionalSequenceRNNOptions(
+    _fbb, _time_major, _fused_activation_function, _merge_outputs, _asymmetric_quantize_inputs);
+}
+
+inline FullyConnectedOptionsT *
+FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<FullyConnectedOptionsT>(new FullyConnectedOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FullyConnectedOptions::UnPackTo(FullyConnectedOptionsT *_o,
+                                            const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = weights_format();
+    _o->weights_format = _e;
+  }
+  {
+    auto _e = keep_num_dims();
+    _o->keep_num_dims = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<FullyConnectedOptions>
+FullyConnectedOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateFullyConnectedOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FullyConnectedOptions>
+CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const FullyConnectedOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _weights_format = _o->weights_format;
+  auto _keep_num_dims = _o->keep_num_dims;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateFullyConnectedOptions(_fbb, _fused_activation_function, _weights_format,
+                                             _keep_num_dims, _asymmetric_quantize_inputs);
+}
+
+inline SoftmaxOptionsT *
+SoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SoftmaxOptionsT>(new SoftmaxOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SoftmaxOptions::UnPackTo(SoftmaxOptionsT *_o,
+                                     const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = beta();
+    _o->beta = _e;
+  }
+}
+
+inline flatbuffers::Offset<SoftmaxOptions>
+SoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSoftmaxOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SoftmaxOptions>
+CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SoftmaxOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _beta = _o->beta;
+  return circle::CreateSoftmaxOptions(_fbb, _beta);
+}
+
+inline ConcatenationOptionsT *
+ConcatenationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ConcatenationOptionsT>(new ConcatenationOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ConcatenationOptions::UnPackTo(ConcatenationOptionsT *_o,
+                                           const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+}
+
+inline flatbuffers::Offset<ConcatenationOptions>
+ConcatenationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateConcatenationOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ConcatenationOptions>
+CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ConcatenationOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _axis = _o->axis;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return circle::CreateConcatenationOptions(_fbb, _axis, _fused_activation_function);
+}
+
+inline AddOptionsT *AddOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<AddOptionsT>(new AddOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void AddOptions::UnPackTo(AddOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = pot_scale_int16();
+    _o->pot_scale_int16 = _e;
+  }
+}
+
+inline flatbuffers::Offset<AddOptions>
+AddOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateAddOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AddOptions>
+CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const AddOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _pot_scale_int16 = _o->pot_scale_int16;
+  return circle::CreateAddOptions(_fbb, _fused_activation_function, _pot_scale_int16);
+}
+
+inline MulOptionsT *MulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<MulOptionsT>(new MulOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MulOptions::UnPackTo(MulOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+}
+
+inline flatbuffers::Offset<MulOptions>
+MulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateMulOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MulOptions>
+CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MulOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return circle::CreateMulOptions(_fbb, _fused_activation_function);
+}
+
+inline L2NormOptionsT *
+L2NormOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<L2NormOptionsT>(new L2NormOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void L2NormOptions::UnPackTo(L2NormOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+}
+
+inline flatbuffers::Offset<L2NormOptions>
+L2NormOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateL2NormOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<L2NormOptions>
+CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const L2NormOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return circle::CreateL2NormOptions(_fbb, _fused_activation_function);
+}
+
+inline LocalResponseNormalizationOptionsT *
+LocalResponseNormalizationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o =
+    std::unique_ptr<LocalResponseNormalizationOptionsT>(new LocalResponseNormalizationOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+LocalResponseNormalizationOptions::UnPackTo(LocalResponseNormalizationOptionsT *_o,
+                                            const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = radius();
+    _o->radius = _e;
+  }
+  {
+    auto _e = bias();
+    _o->bias = _e;
+  }
+  {
+    auto _e = alpha();
+    _o->alpha = _e;
+  }
+  {
+    auto _e = beta();
+    _o->beta = _e;
+  }
+}
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+LocalResponseNormalizationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                        const LocalResponseNormalizationOptionsT *_o,
+                                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                        const LocalResponseNormalizationOptionsT *_o,
+                                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LocalResponseNormalizationOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _radius = _o->radius;
+  auto _bias = _o->bias;
+  auto _alpha = _o->alpha;
+  auto _beta = _o->beta;
+  return circle::CreateLocalResponseNormalizationOptions(_fbb, _radius, _bias, _alpha, _beta);
+}
+
+inline LSTMOptionsT *LSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LSTMOptionsT>(new LSTMOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LSTMOptions::UnPackTo(LSTMOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = cell_clip();
+    _o->cell_clip = _e;
+  }
+  {
+    auto _e = proj_clip();
+    _o->proj_clip = _e;
+  }
+  {
+    auto _e = kernel_type();
+    _o->kernel_type = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<LSTMOptions>
+LSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LSTMOptions>
+CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LSTMOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  auto _kernel_type = _o->kernel_type;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateLSTMOptions(_fbb, _fused_activation_function, _cell_clip, _proj_clip,
+                                   _kernel_type, _asymmetric_quantize_inputs);
+}
+
+inline UnidirectionalSequenceLSTMOptionsT *
+UnidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o =
+    std::unique_ptr<UnidirectionalSequenceLSTMOptionsT>(new UnidirectionalSequenceLSTMOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o,
+                                            const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = cell_clip();
+    _o->cell_clip = _e;
+  }
+  {
+    auto _e = proj_clip();
+    _o->proj_clip = _e;
+  }
+  {
+    auto _e = time_major();
+    _o->time_major = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
+UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                        const UnidirectionalSequenceLSTMOptionsT *_o,
+                                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateUnidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
+CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                        const UnidirectionalSequenceLSTMOptionsT *_o,
+                                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const UnidirectionalSequenceLSTMOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  auto _time_major = _o->time_major;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateUnidirectionalSequenceLSTMOptions(_fbb, _fused_activation_function,
+                                                         _cell_clip, _proj_clip, _time_major,
+                                                         _asymmetric_quantize_inputs);
+}
+
+inline BidirectionalSequenceLSTMOptionsT *
+BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o =
+    std::unique_ptr<BidirectionalSequenceLSTMOptionsT>(new BidirectionalSequenceLSTMOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+BidirectionalSequenceLSTMOptions::UnPackTo(BidirectionalSequenceLSTMOptionsT *_o,
+                                           const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = cell_clip();
+    _o->cell_clip = _e;
+  }
+  {
+    auto _e = proj_clip();
+    _o->proj_clip = _e;
+  }
+  {
+    auto _e = merge_outputs();
+    _o->merge_outputs = _e;
+  }
+  {
+    auto _e = time_major();
+    _o->time_major = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions>
+BidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                       const BidirectionalSequenceLSTMOptionsT *_o,
+                                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateBidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions>
+CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                       const BidirectionalSequenceLSTMOptionsT *_o,
+                                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BidirectionalSequenceLSTMOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  auto _merge_outputs = _o->merge_outputs;
+  auto _time_major = _o->time_major;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateBidirectionalSequenceLSTMOptions(_fbb, _fused_activation_function,
+                                                        _cell_clip, _proj_clip, _merge_outputs,
+                                                        _time_major, _asymmetric_quantize_inputs);
+}
+
+inline ResizeBilinearOptionsT *
+ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ResizeBilinearOptionsT>(new ResizeBilinearOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ResizeBilinearOptions::UnPackTo(ResizeBilinearOptionsT *_o,
+                                            const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = align_corners();
+    _o->align_corners = _e;
+  }
+  {
+    auto _e = half_pixel_centers();
+    _o->half_pixel_centers = _e;
+  }
+}
+
+inline flatbuffers::Offset<ResizeBilinearOptions>
+ResizeBilinearOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateResizeBilinearOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ResizeBilinearOptions>
+CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ResizeBilinearOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _align_corners = _o->align_corners;
+  auto _half_pixel_centers = _o->half_pixel_centers;
+  return circle::CreateResizeBilinearOptions(_fbb, _align_corners, _half_pixel_centers);
+}
+
+inline ResizeNearestNeighborOptionsT *
+ResizeNearestNeighborOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ResizeNearestNeighborOptionsT>(new ResizeNearestNeighborOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+ResizeNearestNeighborOptions::UnPackTo(ResizeNearestNeighborOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = align_corners();
+    _o->align_corners = _e;
+  }
+  {
+    auto _e = half_pixel_centers();
+    _o->half_pixel_centers = _e;
+  }
+}
+
+inline flatbuffers::Offset<ResizeNearestNeighborOptions>
+ResizeNearestNeighborOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                   const ResizeNearestNeighborOptionsT *_o,
+                                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateResizeNearestNeighborOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ResizeNearestNeighborOptions>
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                   const ResizeNearestNeighborOptionsT *_o,
+                                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ResizeNearestNeighborOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _align_corners = _o->align_corners;
+  auto _half_pixel_centers = _o->half_pixel_centers;
+  return circle::CreateResizeNearestNeighborOptions(_fbb, _align_corners, _half_pixel_centers);
+}
+
+inline CallOptionsT *CallOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<CallOptionsT>(new CallOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CallOptions::UnPackTo(CallOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = subgraph();
+    _o->subgraph = _e;
+  }
+}
+
+inline flatbuffers::Offset<CallOptions>
+CallOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateCallOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CallOptions>
+CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const CallOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _subgraph = _o->subgraph;
+  return circle::CreateCallOptions(_fbb, _subgraph);
+}
+
+inline PadOptionsT *PadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<PadOptionsT>(new PadOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void PadOptions::UnPackTo(PadOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<PadOptions>
+PadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreatePadOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PadOptions>
+CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const PadOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreatePadOptions(_fbb);
+}
+
+inline PadV2OptionsT *PadV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<PadV2OptionsT>(new PadV2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void PadV2Options::UnPackTo(PadV2OptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<PadV2Options>
+PadV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreatePadV2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PadV2Options>
+CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const PadV2OptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreatePadV2Options(_fbb);
+}
+
+inline ReshapeOptionsT *
+ReshapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ReshapeOptionsT>(new ReshapeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReshapeOptions::UnPackTo(ReshapeOptionsT *_o,
+                                     const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = new_shape();
+    if (_e)
+    {
+      _o->new_shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->new_shape[_i] = _e->Get(_i);
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<ReshapeOptions>
+ReshapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateReshapeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReshapeOptions>
+CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ReshapeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0;
+  return circle::CreateReshapeOptions(_fbb, _new_shape);
+}
+
+inline SpaceToBatchNDOptionsT *
+SpaceToBatchNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SpaceToBatchNDOptionsT>(new SpaceToBatchNDOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SpaceToBatchNDOptions::UnPackTo(SpaceToBatchNDOptionsT *_o,
+                                            const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SpaceToBatchNDOptions>
+SpaceToBatchNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSpaceToBatchNDOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SpaceToBatchNDOptions>
+CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SpaceToBatchNDOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateSpaceToBatchNDOptions(_fbb);
+}
+
+inline BatchToSpaceNDOptionsT *
+BatchToSpaceNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<BatchToSpaceNDOptionsT>(new BatchToSpaceNDOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BatchToSpaceNDOptions::UnPackTo(BatchToSpaceNDOptionsT *_o,
+                                            const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions>
+BatchToSpaceNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateBatchToSpaceNDOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions>
+CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BatchToSpaceNDOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateBatchToSpaceNDOptions(_fbb);
+}
+
+inline SkipGramOptionsT *
+SkipGramOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SkipGramOptionsT>(new SkipGramOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SkipGramOptions::UnPackTo(SkipGramOptionsT *_o,
+                                      const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = ngram_size();
+    _o->ngram_size = _e;
+  }
+  {
+    auto _e = max_skip_size();
+    _o->max_skip_size = _e;
+  }
+  {
+    auto _e = include_all_ngrams();
+    _o->include_all_ngrams = _e;
+  }
+}
+
+inline flatbuffers::Offset<SkipGramOptions>
+SkipGramOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSkipGramOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SkipGramOptions>
+CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SkipGramOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _ngram_size = _o->ngram_size;
+  auto _max_skip_size = _o->max_skip_size;
+  auto _include_all_ngrams = _o->include_all_ngrams;
+  return circle::CreateSkipGramOptions(_fbb, _ngram_size, _max_skip_size, _include_all_ngrams);
+}
+
+inline SpaceToDepthOptionsT *
+SpaceToDepthOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SpaceToDepthOptionsT>(new SpaceToDepthOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SpaceToDepthOptions::UnPackTo(SpaceToDepthOptionsT *_o,
+                                          const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = block_size();
+    _o->block_size = _e;
+  }
+}
+
+inline flatbuffers::Offset<SpaceToDepthOptions>
+SpaceToDepthOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSpaceToDepthOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SpaceToDepthOptions>
+CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SpaceToDepthOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _block_size = _o->block_size;
+  return circle::CreateSpaceToDepthOptions(_fbb, _block_size);
+}
+
+inline DepthToSpaceOptionsT *
+DepthToSpaceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<DepthToSpaceOptionsT>(new DepthToSpaceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DepthToSpaceOptions::UnPackTo(DepthToSpaceOptionsT *_o,
+                                          const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = block_size();
+    _o->block_size = _e;
+  }
+}
+
+inline flatbuffers::Offset<DepthToSpaceOptions>
+DepthToSpaceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateDepthToSpaceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DepthToSpaceOptions>
+CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DepthToSpaceOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _block_size = _o->block_size;
+  return circle::CreateDepthToSpaceOptions(_fbb, _block_size);
+}
+
+inline SubOptionsT *SubOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SubOptionsT>(new SubOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SubOptions::UnPackTo(SubOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+  {
+    auto _e = pot_scale_int16();
+    _o->pot_scale_int16 = _e;
+  }
+}
+
+inline flatbuffers::Offset<SubOptions>
+SubOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSubOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SubOptions>
+CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SubOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _pot_scale_int16 = _o->pot_scale_int16;
+  return circle::CreateSubOptions(_fbb, _fused_activation_function, _pot_scale_int16);
+}
+
+inline DivOptionsT *DivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<DivOptionsT>(new DivOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DivOptions::UnPackTo(DivOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+}
+
+inline flatbuffers::Offset<DivOptions>
+DivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateDivOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DivOptions>
+CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DivOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return circle::CreateDivOptions(_fbb, _fused_activation_function);
+}
+
+inline TopKV2OptionsT *
+TopKV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<TopKV2OptionsT>(new TopKV2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TopKV2Options::UnPackTo(TopKV2OptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<TopKV2Options>
+TopKV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateTopKV2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TopKV2Options>
+CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TopKV2OptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateTopKV2Options(_fbb);
+}
+
+inline EmbeddingLookupSparseOptionsT *
+EmbeddingLookupSparseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<EmbeddingLookupSparseOptionsT>(new EmbeddingLookupSparseOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+EmbeddingLookupSparseOptions::UnPackTo(EmbeddingLookupSparseOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = combiner();
+    _o->combiner = _e;
+  }
+}
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+EmbeddingLookupSparseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                   const EmbeddingLookupSparseOptionsT *_o,
+                                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                   const EmbeddingLookupSparseOptionsT *_o,
+                                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const EmbeddingLookupSparseOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _combiner = _o->combiner;
+  return circle::CreateEmbeddingLookupSparseOptions(_fbb, _combiner);
+}
+
+inline GatherOptionsT *
+GatherOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<GatherOptionsT>(new GatherOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GatherOptions::UnPackTo(GatherOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  }
+  {
+    auto _e = batch_dims();
+    _o->batch_dims = _e;
+  }
+}
+
+inline flatbuffers::Offset<GatherOptions>
+GatherOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateGatherOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const GatherOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _axis = _o->axis;
+  auto _batch_dims = _o->batch_dims;
+  return circle::CreateGatherOptions(_fbb, _axis, _batch_dims);
+}
+
+inline TransposeOptionsT *
+TransposeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<TransposeOptionsT>(new TransposeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TransposeOptions::UnPackTo(TransposeOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<TransposeOptions>
+TransposeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateTransposeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TransposeOptions>
+CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TransposeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateTransposeOptions(_fbb);
+}
+
+inline ExpOptionsT *ExpOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ExpOptionsT>(new ExpOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ExpOptions::UnPackTo(ExpOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ExpOptions>
+ExpOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateExpOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ExpOptions>
+CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ExpOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateExpOptions(_fbb);
+}
+
+inline CosOptionsT *CosOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<CosOptionsT>(new CosOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CosOptions::UnPackTo(CosOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<CosOptions>
+CosOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateCosOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CosOptions>
+CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const CosOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateCosOptions(_fbb);
+}
+
+inline ReducerOptionsT *
+ReducerOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ReducerOptionsT>(new ReducerOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReducerOptions::UnPackTo(ReducerOptionsT *_o,
+                                     const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = keep_dims();
+    _o->keep_dims = _e;
+  }
+}
+
+inline flatbuffers::Offset<ReducerOptions>
+ReducerOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateReducerOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReducerOptions>
+CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ReducerOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _keep_dims = _o->keep_dims;
+  return circle::CreateReducerOptions(_fbb, _keep_dims);
+}
+
+inline SqueezeOptionsT *
+SqueezeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SqueezeOptionsT>(new SqueezeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SqueezeOptions::UnPackTo(SqueezeOptionsT *_o,
+                                     const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = squeeze_dims();
+    if (_e)
+    {
+      _o->squeeze_dims.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->squeeze_dims[_i] = _e->Get(_i);
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<SqueezeOptions>
+SqueezeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSqueezeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SqueezeOptions>
+CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SqueezeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _squeeze_dims = _o->squeeze_dims.size() ? _fbb.CreateVector(_o->squeeze_dims) : 0;
+  return circle::CreateSqueezeOptions(_fbb, _squeeze_dims);
+}
+
+inline SplitOptionsT *SplitOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SplitOptionsT>(new SplitOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SplitOptions::UnPackTo(SplitOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = num_splits();
+    _o->num_splits = _e;
+  }
+}
+
+inline flatbuffers::Offset<SplitOptions>
+SplitOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSplitOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SplitOptions>
+CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SplitOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _num_splits = _o->num_splits;
+  return circle::CreateSplitOptions(_fbb, _num_splits);
+}
+
+inline SplitVOptionsT *
+SplitVOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SplitVOptionsT>(new SplitVOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SplitVOptions::UnPackTo(SplitVOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = num_splits();
+    _o->num_splits = _e;
+  }
+}
+
+inline flatbuffers::Offset<SplitVOptions>
+SplitVOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSplitVOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SplitVOptions>
+CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SplitVOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _num_splits = _o->num_splits;
+  return circle::CreateSplitVOptions(_fbb, _num_splits);
+}
+
+inline StridedSliceOptionsT *
+StridedSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<StridedSliceOptionsT>(new StridedSliceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void StridedSliceOptions::UnPackTo(StridedSliceOptionsT *_o,
+                                          const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = begin_mask();
+    _o->begin_mask = _e;
+  }
+  {
+    auto _e = end_mask();
+    _o->end_mask = _e;
+  }
+  {
+    auto _e = ellipsis_mask();
+    _o->ellipsis_mask = _e;
+  }
+  {
+    auto _e = new_axis_mask();
+    _o->new_axis_mask = _e;
+  }
+  {
+    auto _e = shrink_axis_mask();
+    _o->shrink_axis_mask = _e;
+  }
+}
+
+inline flatbuffers::Offset<StridedSliceOptions>
+StridedSliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateStridedSliceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<StridedSliceOptions>
+CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const StridedSliceOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _begin_mask = _o->begin_mask;
+  auto _end_mask = _o->end_mask;
+  auto _ellipsis_mask = _o->ellipsis_mask;
+  auto _new_axis_mask = _o->new_axis_mask;
+  auto _shrink_axis_mask = _o->shrink_axis_mask;
+  return circle::CreateStridedSliceOptions(_fbb, _begin_mask, _end_mask, _ellipsis_mask,
+                                           _new_axis_mask, _shrink_axis_mask);
+}
+
+inline LogSoftmaxOptionsT *
+LogSoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LogSoftmaxOptionsT>(new LogSoftmaxOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LogSoftmaxOptions::UnPackTo(LogSoftmaxOptionsT *_o,
+                                        const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LogSoftmaxOptions>
+LogSoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLogSoftmaxOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LogSoftmaxOptions>
+CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LogSoftmaxOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateLogSoftmaxOptions(_fbb);
+}
+
+inline CastOptionsT *CastOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<CastOptionsT>(new CastOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CastOptions::UnPackTo(CastOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = in_data_type();
+    _o->in_data_type = _e;
+  }
+  {
+    auto _e = out_data_type();
+    _o->out_data_type = _e;
+  }
+}
+
+inline flatbuffers::Offset<CastOptions>
+CastOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateCastOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CastOptions>
+CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const CastOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _in_data_type = _o->in_data_type;
+  auto _out_data_type = _o->out_data_type;
+  return circle::CreateCastOptions(_fbb, _in_data_type, _out_data_type);
+}
+
+inline DequantizeOptionsT *
+DequantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<DequantizeOptionsT>(new DequantizeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DequantizeOptions::UnPackTo(DequantizeOptionsT *_o,
+                                        const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<DequantizeOptions>
+DequantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateDequantizeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DequantizeOptions>
+CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DequantizeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateDequantizeOptions(_fbb);
+}
+
+inline MaximumMinimumOptionsT *
+MaximumMinimumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<MaximumMinimumOptionsT>(new MaximumMinimumOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MaximumMinimumOptions::UnPackTo(MaximumMinimumOptionsT *_o,
+                                            const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<MaximumMinimumOptions>
+MaximumMinimumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateMaximumMinimumOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MaximumMinimumOptions>
+CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MaximumMinimumOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateMaximumMinimumOptions(_fbb);
+}
+
+inline TileOptionsT *TileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<TileOptionsT>(new TileOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TileOptions::UnPackTo(TileOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<TileOptions>
+TileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateTileOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TileOptions>
+CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TileOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateTileOptions(_fbb);
+}
+
+inline ArgMaxOptionsT *
+ArgMaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ArgMaxOptionsT>(new ArgMaxOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ArgMaxOptions::UnPackTo(ArgMaxOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = output_type();
+    _o->output_type = _e;
+  }
+}
+
+inline flatbuffers::Offset<ArgMaxOptions>
+ArgMaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateArgMaxOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ArgMaxOptions>
+CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ArgMaxOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _output_type = _o->output_type;
+  return circle::CreateArgMaxOptions(_fbb, _output_type);
+}
+
+inline ArgMinOptionsT *
+ArgMinOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ArgMinOptionsT>(new ArgMinOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ArgMinOptions::UnPackTo(ArgMinOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = output_type();
+    _o->output_type = _e;
+  }
+}
+
+inline flatbuffers::Offset<ArgMinOptions>
+ArgMinOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateArgMinOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ArgMinOptions>
+CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ArgMinOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _output_type = _o->output_type;
+  return circle::CreateArgMinOptions(_fbb, _output_type);
+}
+
+inline GreaterOptionsT *
+GreaterOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<GreaterOptionsT>(new GreaterOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GreaterOptions::UnPackTo(GreaterOptionsT *_o,
+                                     const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<GreaterOptions>
+GreaterOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateGreaterOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GreaterOptions>
+CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const GreaterOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateGreaterOptions(_fbb);
+}
+
+inline GreaterEqualOptionsT *
+GreaterEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<GreaterEqualOptionsT>(new GreaterEqualOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GreaterEqualOptions::UnPackTo(GreaterEqualOptionsT *_o,
+                                          const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<GreaterEqualOptions>
+GreaterEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateGreaterEqualOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GreaterEqualOptions>
+CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const GreaterEqualOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateGreaterEqualOptions(_fbb);
+}
+
+inline LessOptionsT *LessOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LessOptionsT>(new LessOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LessOptions::UnPackTo(LessOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LessOptions>
+LessOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLessOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LessOptions>
+CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LessOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateLessOptions(_fbb);
+}
+
+inline LessEqualOptionsT *
+LessEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LessEqualOptionsT>(new LessEqualOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LessEqualOptions::UnPackTo(LessEqualOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LessEqualOptions>
+LessEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLessEqualOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LessEqualOptions>
+CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LessEqualOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateLessEqualOptions(_fbb);
+}
+
+inline NegOptionsT *NegOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<NegOptionsT>(new NegOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void NegOptions::UnPackTo(NegOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<NegOptions>
+NegOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateNegOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<NegOptions>
+CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const NegOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateNegOptions(_fbb);
+}
+
+inline SelectOptionsT *
+SelectOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SelectOptionsT>(new SelectOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SelectOptions::UnPackTo(SelectOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SelectOptions>
+SelectOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSelectOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SelectOptions>
+CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SelectOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateSelectOptions(_fbb);
+}
+
+inline SliceOptionsT *SliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SliceOptionsT>(new SliceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SliceOptions::UnPackTo(SliceOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SliceOptions>
+SliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSliceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SliceOptions>
+CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SliceOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateSliceOptions(_fbb);
+}
+
+inline TransposeConvOptionsT *
+TransposeConvOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<TransposeConvOptionsT>(new TransposeConvOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TransposeConvOptions::UnPackTo(TransposeConvOptionsT *_o,
+                                           const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  }
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  }
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  }
+}
+
+inline flatbuffers::Offset<TransposeConvOptions>
+TransposeConvOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateTransposeConvOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TransposeConvOptions>
+CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TransposeConvOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  return circle::CreateTransposeConvOptions(_fbb, _padding, _stride_w, _stride_h);
+}
+
+inline ExpandDimsOptionsT *
+ExpandDimsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ExpandDimsOptionsT>(new ExpandDimsOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ExpandDimsOptions::UnPackTo(ExpandDimsOptionsT *_o,
+                                        const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ExpandDimsOptions>
+ExpandDimsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateExpandDimsOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ExpandDimsOptions>
+CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ExpandDimsOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateExpandDimsOptions(_fbb);
+}
+
+inline SparseToDenseOptionsT *
+SparseToDenseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SparseToDenseOptionsT>(new SparseToDenseOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SparseToDenseOptions::UnPackTo(SparseToDenseOptionsT *_o,
+                                           const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = validate_indices();
+    _o->validate_indices = _e;
+  }
+}
+
+inline flatbuffers::Offset<SparseToDenseOptions>
+SparseToDenseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSparseToDenseOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SparseToDenseOptions>
+CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SparseToDenseOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _validate_indices = _o->validate_indices;
+  return circle::CreateSparseToDenseOptions(_fbb, _validate_indices);
+}
+
+inline EqualOptionsT *EqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<EqualOptionsT>(new EqualOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void EqualOptions::UnPackTo(EqualOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<EqualOptions>
+EqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateEqualOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<EqualOptions>
+CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const EqualOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateEqualOptions(_fbb);
+}
+
+inline NotEqualOptionsT *
+NotEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<NotEqualOptionsT>(new NotEqualOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void NotEqualOptions::UnPackTo(NotEqualOptionsT *_o,
+                                      const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<NotEqualOptions>
+NotEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateNotEqualOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<NotEqualOptions>
+CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const NotEqualOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateNotEqualOptions(_fbb);
+}
+
+inline ShapeOptionsT *ShapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ShapeOptionsT>(new ShapeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ShapeOptions::UnPackTo(ShapeOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = out_type();
+    _o->out_type = _e;
+  }
+}
+
+inline flatbuffers::Offset<ShapeOptions>
+ShapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateShapeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ShapeOptions>
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ShapeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _out_type = _o->out_type;
+  return circle::CreateShapeOptions(_fbb, _out_type);
+}
+
+inline RankOptionsT *RankOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<RankOptionsT>(new RankOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void RankOptions::UnPackTo(RankOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<RankOptions>
+RankOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateRankOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RankOptions>
+CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const RankOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateRankOptions(_fbb);
+}
+
+inline PowOptionsT *PowOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<PowOptionsT>(new PowOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void PowOptions::UnPackTo(PowOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<PowOptions>
+PowOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreatePowOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PowOptions>
+CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const PowOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreatePowOptions(_fbb);
+}
+
+inline FakeQuantOptionsT *
+FakeQuantOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<FakeQuantOptionsT>(new FakeQuantOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FakeQuantOptions::UnPackTo(FakeQuantOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = min();
+    _o->min = _e;
+  }
+  {
+    auto _e = max();
+    _o->max = _e;
+  }
+  {
+    auto _e = num_bits();
+    _o->num_bits = _e;
+  }
+  {
+    auto _e = narrow_range();
+    _o->narrow_range = _e;
+  }
+}
+
+inline flatbuffers::Offset<FakeQuantOptions>
+FakeQuantOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateFakeQuantOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FakeQuantOptions>
+CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const FakeQuantOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _min = _o->min;
+  auto _max = _o->max;
+  auto _num_bits = _o->num_bits;
+  auto _narrow_range = _o->narrow_range;
+  return circle::CreateFakeQuantOptions(_fbb, _min, _max, _num_bits, _narrow_range);
+}
+
+inline PackOptionsT *PackOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<PackOptionsT>(new PackOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void PackOptions::UnPackTo(PackOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = values_count();
+    _o->values_count = _e;
+  }
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  }
+}
+
+inline flatbuffers::Offset<PackOptions>
+PackOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreatePackOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PackOptions>
+CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const PackOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _values_count = _o->values_count;
+  auto _axis = _o->axis;
+  return circle::CreatePackOptions(_fbb, _values_count, _axis);
+}
+
+inline LogicalOrOptionsT *
+LogicalOrOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LogicalOrOptionsT>(new LogicalOrOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LogicalOrOptions::UnPackTo(LogicalOrOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LogicalOrOptions>
+LogicalOrOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLogicalOrOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LogicalOrOptions>
+CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LogicalOrOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateLogicalOrOptions(_fbb);
+}
+
+inline OneHotOptionsT *
+OneHotOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<OneHotOptionsT>(new OneHotOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void OneHotOptions::UnPackTo(OneHotOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  }
+}
+
+inline flatbuffers::Offset<OneHotOptions>
+OneHotOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateOneHotOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<OneHotOptions>
+CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const OneHotOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _axis = _o->axis;
+  return circle::CreateOneHotOptions(_fbb, _axis);
+}
+
+inline AbsOptionsT *AbsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<AbsOptionsT>(new AbsOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void AbsOptions::UnPackTo(AbsOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<AbsOptions>
+AbsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateAbsOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AbsOptions>
+CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o,
+                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const AbsOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateAbsOptions(_fbb);
+}
+
+inline HardSwishOptionsT *
+HardSwishOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<HardSwishOptionsT>(new HardSwishOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HardSwishOptions::UnPackTo(HardSwishOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<HardSwishOptions>
+HardSwishOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateHardSwishOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HardSwishOptions>
+CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const HardSwishOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateHardSwishOptions(_fbb);
+}
+
+inline LogicalAndOptionsT *
+LogicalAndOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LogicalAndOptionsT>(new LogicalAndOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LogicalAndOptions::UnPackTo(LogicalAndOptionsT *_o,
+                                        const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LogicalAndOptions>
+LogicalAndOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLogicalAndOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LogicalAndOptions>
+CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LogicalAndOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateLogicalAndOptions(_fbb);
+}
+
+inline LogicalNotOptionsT *
+LogicalNotOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LogicalNotOptionsT>(new LogicalNotOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LogicalNotOptions::UnPackTo(LogicalNotOptionsT *_o,
+                                        const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LogicalNotOptions>
+LogicalNotOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLogicalNotOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LogicalNotOptions>
+CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LogicalNotOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateLogicalNotOptions(_fbb);
+}
+
+inline UnpackOptionsT *
+UnpackOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<UnpackOptionsT>(new UnpackOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UnpackOptions::UnPackTo(UnpackOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = num();
+    _o->num = _e;
+  }
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  }
+}
+
+inline flatbuffers::Offset<UnpackOptions>
+UnpackOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateUnpackOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnpackOptions>
+CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const UnpackOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _num = _o->num;
+  auto _axis = _o->axis;
+  return circle::CreateUnpackOptions(_fbb, _num, _axis);
+}
+
+inline FloorDivOptionsT *
+FloorDivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<FloorDivOptionsT>(new FloorDivOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FloorDivOptions::UnPackTo(FloorDivOptionsT *_o,
+                                      const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FloorDivOptions>
+FloorDivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateFloorDivOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FloorDivOptions>
+CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const FloorDivOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateFloorDivOptions(_fbb);
+}
+
+inline SquareOptionsT *
+SquareOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SquareOptionsT>(new SquareOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SquareOptions::UnPackTo(SquareOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SquareOptions>
+SquareOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSquareOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SquareOptions>
+CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SquareOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateSquareOptions(_fbb);
+}
+
+inline ZerosLikeOptionsT *
+ZerosLikeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ZerosLikeOptionsT>(new ZerosLikeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ZerosLikeOptions::UnPackTo(ZerosLikeOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ZerosLikeOptions>
+ZerosLikeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateZerosLikeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ZerosLikeOptions>
+CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ZerosLikeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateZerosLikeOptions(_fbb);
+}
+
+inline FillOptionsT *FillOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<FillOptionsT>(new FillOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FillOptions::UnPackTo(FillOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FillOptions>
+FillOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateFillOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FillOptions>
+CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const FillOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateFillOptions(_fbb);
+}
+
+inline FloorModOptionsT *
+FloorModOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<FloorModOptionsT>(new FloorModOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FloorModOptions::UnPackTo(FloorModOptionsT *_o,
+                                      const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FloorModOptions>
+FloorModOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateFloorModOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FloorModOptions>
+CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const FloorModOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateFloorModOptions(_fbb);
+}
+
+inline RangeOptionsT *RangeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<RangeOptionsT>(new RangeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void RangeOptions::UnPackTo(RangeOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<RangeOptions>
+RangeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateRangeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RangeOptions>
+CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const RangeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateRangeOptions(_fbb);
+}
+
+inline LeakyReluOptionsT *
+LeakyReluOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<LeakyReluOptionsT>(new LeakyReluOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LeakyReluOptions::UnPackTo(LeakyReluOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = alpha();
+    _o->alpha = _e;
+  }
+}
+
+inline flatbuffers::Offset<LeakyReluOptions>
+LeakyReluOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateLeakyReluOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LeakyReluOptions>
+CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LeakyReluOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _alpha = _o->alpha;
+  return circle::CreateLeakyReluOptions(_fbb, _alpha);
+}
+
+inline SquaredDifferenceOptionsT *
+SquaredDifferenceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SquaredDifferenceOptionsT>(new SquaredDifferenceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+SquaredDifferenceOptions::UnPackTo(SquaredDifferenceOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SquaredDifferenceOptions>
+SquaredDifferenceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                               const SquaredDifferenceOptionsT *_o,
+                               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSquaredDifferenceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SquaredDifferenceOptions>
+CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                               const SquaredDifferenceOptionsT *_o,
+                               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SquaredDifferenceOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateSquaredDifferenceOptions(_fbb);
+}
+
+inline MirrorPadOptionsT *
+MirrorPadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<MirrorPadOptionsT>(new MirrorPadOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MirrorPadOptions::UnPackTo(MirrorPadOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = mode();
+    _o->mode = _e;
+  }
+}
+
+inline flatbuffers::Offset<MirrorPadOptions>
+MirrorPadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateMirrorPadOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MirrorPadOptions>
+CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MirrorPadOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _mode = _o->mode;
+  return circle::CreateMirrorPadOptions(_fbb, _mode);
+}
+
+inline UniqueOptionsT *
+UniqueOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<UniqueOptionsT>(new UniqueOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UniqueOptions::UnPackTo(UniqueOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = idx_out_type();
+    _o->idx_out_type = _e;
+  }
+}
+
+inline flatbuffers::Offset<UniqueOptions>
+UniqueOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateUniqueOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UniqueOptions>
+CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const UniqueOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _idx_out_type = _o->idx_out_type;
+  return circle::CreateUniqueOptions(_fbb, _idx_out_type);
+}
+
+inline ReverseV2OptionsT *
+ReverseV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ReverseV2OptionsT>(new ReverseV2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReverseV2Options::UnPackTo(ReverseV2OptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ReverseV2Options>
+ReverseV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateReverseV2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReverseV2Options>
+CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ReverseV2OptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateReverseV2Options(_fbb);
+}
+
+inline AddNOptionsT *AddNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<AddNOptionsT>(new AddNOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void AddNOptions::UnPackTo(AddNOptionsT *_o,
+                                  const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<AddNOptions>
+AddNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateAddNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AddNOptions>
+CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o,
+                  const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const AddNOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateAddNOptions(_fbb);
+}
+
+inline GatherNdOptionsT *
+GatherNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<GatherNdOptionsT>(new GatherNdOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GatherNdOptions::UnPackTo(GatherNdOptionsT *_o,
+                                      const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<GatherNdOptions>
+GatherNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateGatherNdOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GatherNdOptions>
+CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const GatherNdOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateGatherNdOptions(_fbb);
+}
+
+inline WhereOptionsT *WhereOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<WhereOptionsT>(new WhereOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void WhereOptions::UnPackTo(WhereOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<WhereOptions>
+WhereOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateWhereOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<WhereOptions>
+CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const WhereOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateWhereOptions(_fbb);
+}
+
+inline ReverseSequenceOptionsT *
+ReverseSequenceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ReverseSequenceOptionsT>(new ReverseSequenceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+ReverseSequenceOptions::UnPackTo(ReverseSequenceOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = seq_dim();
+    _o->seq_dim = _e;
+  }
+  {
+    auto _e = batch_dim();
+    _o->batch_dim = _e;
+  }
+}
+
+inline flatbuffers::Offset<ReverseSequenceOptions>
+ReverseSequenceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                             const ReverseSequenceOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateReverseSequenceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReverseSequenceOptions>
+CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                             const ReverseSequenceOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ReverseSequenceOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _seq_dim = _o->seq_dim;
+  auto _batch_dim = _o->batch_dim;
+  return circle::CreateReverseSequenceOptions(_fbb, _seq_dim, _batch_dim);
+}
+
+inline MatrixDiagOptionsT *
+MatrixDiagOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<MatrixDiagOptionsT>(new MatrixDiagOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MatrixDiagOptions::UnPackTo(MatrixDiagOptionsT *_o,
+                                        const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<MatrixDiagOptions>
+MatrixDiagOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateMatrixDiagOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MatrixDiagOptions>
+CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MatrixDiagOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateMatrixDiagOptions(_fbb);
+}
+
+inline QuantizeOptionsT *
+QuantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<QuantizeOptionsT>(new QuantizeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void QuantizeOptions::UnPackTo(QuantizeOptionsT *_o,
+                                      const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<QuantizeOptions>
+QuantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateQuantizeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<QuantizeOptions>
+CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const QuantizeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateQuantizeOptions(_fbb);
+}
+
+inline MatrixSetDiagOptionsT *
+MatrixSetDiagOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<MatrixSetDiagOptionsT>(new MatrixSetDiagOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MatrixSetDiagOptions::UnPackTo(MatrixSetDiagOptionsT *_o,
+                                           const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<MatrixSetDiagOptions>
+MatrixSetDiagOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateMatrixSetDiagOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MatrixSetDiagOptions>
+CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MatrixSetDiagOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateMatrixSetDiagOptions(_fbb);
+}
+
+inline IfOptionsT *IfOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<IfOptionsT>(new IfOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void IfOptions::UnPackTo(IfOptionsT *_o,
+                                const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = then_subgraph_index();
+    _o->then_subgraph_index = _e;
+  }
+  {
+    auto _e = else_subgraph_index();
+    _o->else_subgraph_index = _e;
+  }
+}
+
+inline flatbuffers::Offset<IfOptions>
+IfOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o,
+                const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateIfOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<IfOptions>
+CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o,
+                const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const IfOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _then_subgraph_index = _o->then_subgraph_index;
+  auto _else_subgraph_index = _o->else_subgraph_index;
+  return circle::CreateIfOptions(_fbb, _then_subgraph_index, _else_subgraph_index);
+}
+
+inline CallOnceOptionsT *
+CallOnceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<CallOnceOptionsT>(new CallOnceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CallOnceOptions::UnPackTo(CallOnceOptionsT *_o,
+                                      const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = init_subgraph_index();
+    _o->init_subgraph_index = _e;
+  }
+}
+
+inline flatbuffers::Offset<CallOnceOptions>
+CallOnceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateCallOnceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const CallOnceOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _init_subgraph_index = _o->init_subgraph_index;
+  return circle::CreateCallOnceOptions(_fbb, _init_subgraph_index);
+}
+
+inline WhileOptionsT *WhileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<WhileOptionsT>(new WhileOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void WhileOptions::UnPackTo(WhileOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = cond_subgraph_index();
+    _o->cond_subgraph_index = _e;
+  }
+  {
+    auto _e = body_subgraph_index();
+    _o->body_subgraph_index = _e;
+  }
+}
+
+inline flatbuffers::Offset<WhileOptions>
+WhileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateWhileOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<WhileOptions>
+CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const WhileOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _cond_subgraph_index = _o->cond_subgraph_index;
+  auto _body_subgraph_index = _o->body_subgraph_index;
+  return circle::CreateWhileOptions(_fbb, _cond_subgraph_index, _body_subgraph_index);
+}
+
+inline NonMaxSuppressionV4OptionsT *
+NonMaxSuppressionV4Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<NonMaxSuppressionV4OptionsT>(new NonMaxSuppressionV4OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+NonMaxSuppressionV4Options::UnPackTo(NonMaxSuppressionV4OptionsT *_o,
+                                     const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<NonMaxSuppressionV4Options>
+NonMaxSuppressionV4Options::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const NonMaxSuppressionV4OptionsT *_o,
+                                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateNonMaxSuppressionV4Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<NonMaxSuppressionV4Options>
+CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const NonMaxSuppressionV4OptionsT *_o,
+                                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const NonMaxSuppressionV4OptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateNonMaxSuppressionV4Options(_fbb);
+}
+
+inline NonMaxSuppressionV5OptionsT *
+NonMaxSuppressionV5Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<NonMaxSuppressionV5OptionsT>(new NonMaxSuppressionV5OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+NonMaxSuppressionV5Options::UnPackTo(NonMaxSuppressionV5OptionsT *_o,
+                                     const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<NonMaxSuppressionV5Options>
+NonMaxSuppressionV5Options::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const NonMaxSuppressionV5OptionsT *_o,
+                                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateNonMaxSuppressionV5Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<NonMaxSuppressionV5Options>
+CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb,
+                                 const NonMaxSuppressionV5OptionsT *_o,
+                                 const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const NonMaxSuppressionV5OptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateNonMaxSuppressionV5Options(_fbb);
+}
+
+inline ScatterNdOptionsT *
+ScatterNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ScatterNdOptionsT>(new ScatterNdOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ScatterNdOptions::UnPackTo(ScatterNdOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ScatterNdOptions>
+ScatterNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateScatterNdOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ScatterNdOptions>
+CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ScatterNdOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateScatterNdOptions(_fbb);
+}
+
+inline SelectV2OptionsT *
+SelectV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SelectV2OptionsT>(new SelectV2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SelectV2Options::UnPackTo(SelectV2OptionsT *_o,
+                                      const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SelectV2Options>
+SelectV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSelectV2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SelectV2Options>
+CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o,
+                      const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SelectV2OptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateSelectV2Options(_fbb);
+}
+
+inline DensifyOptionsT *
+DensifyOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<DensifyOptionsT>(new DensifyOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DensifyOptions::UnPackTo(DensifyOptionsT *_o,
+                                     const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<DensifyOptions>
+DensifyOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateDensifyOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DensifyOptions>
+CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o,
+                     const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DensifyOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateDensifyOptions(_fbb);
+}
+
+inline SegmentSumOptionsT *
+SegmentSumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SegmentSumOptionsT>(new SegmentSumOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SegmentSumOptions::UnPackTo(SegmentSumOptionsT *_o,
+                                        const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SegmentSumOptions>
+SegmentSumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSegmentSumOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SegmentSumOptions>
+CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o,
+                        const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SegmentSumOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateSegmentSumOptions(_fbb);
+}
+
+inline BatchMatMulOptionsT *
+BatchMatMulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<BatchMatMulOptionsT>(new BatchMatMulOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BatchMatMulOptions::UnPackTo(BatchMatMulOptionsT *_o,
+                                         const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = adjoint_lhs();
+    _o->adjoint_lhs = _e;
+  }
+  {
+    auto _e = adjoint_rhs();
+    _o->adjoint_rhs = _e;
+  }
+  {
+    auto _e = asymmetric_quantize_inputs();
+    _o->asymmetric_quantize_inputs = _e;
+  }
+}
+
+inline flatbuffers::Offset<BatchMatMulOptions>
+BatchMatMulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateBatchMatMulOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BatchMatMulOptions>
+CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BatchMatMulOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _adjoint_lhs = _o->adjoint_lhs;
+  auto _adjoint_rhs = _o->adjoint_rhs;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return circle::CreateBatchMatMulOptions(_fbb, _adjoint_lhs, _adjoint_rhs,
+                                          _asymmetric_quantize_inputs);
+}
+
+inline CumsumOptionsT *
+CumsumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<CumsumOptionsT>(new CumsumOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CumsumOptions::UnPackTo(CumsumOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = exclusive();
+    _o->exclusive = _e;
+  }
+  {
+    auto _e = reverse();
+    _o->reverse = _e;
+  }
+}
+
+inline flatbuffers::Offset<CumsumOptions>
+CumsumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateCumsumOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CumsumOptions>
+CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const CumsumOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _exclusive = _o->exclusive;
+  auto _reverse = _o->reverse;
+  return circle::CreateCumsumOptions(_fbb, _exclusive, _reverse);
+}
+
+inline BroadcastToOptionsT *
+BroadcastToOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<BroadcastToOptionsT>(new BroadcastToOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BroadcastToOptions::UnPackTo(BroadcastToOptionsT *_o,
+                                         const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<BroadcastToOptions>
+BroadcastToOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateBroadcastToOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o,
+                         const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BroadcastToOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateBroadcastToOptions(_fbb);
+}
+
+inline Rfft2dOptionsT *
+Rfft2dOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<Rfft2dOptionsT>(new Rfft2dOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Rfft2dOptions::UnPackTo(Rfft2dOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<Rfft2dOptions>
+Rfft2dOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateRfft2dOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Rfft2dOptions>
+CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Rfft2dOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateRfft2dOptions(_fbb);
+}
+
+inline HashtableOptionsT *
+HashtableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<HashtableOptionsT>(new HashtableOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HashtableOptions::UnPackTo(HashtableOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = table_id();
+    _o->table_id = _e;
+  }
+  {
+    auto _e = key_dtype();
+    _o->key_dtype = _e;
+  }
+  {
+    auto _e = value_dtype();
+    _o->value_dtype = _e;
+  }
+}
+
+inline flatbuffers::Offset<HashtableOptions>
+HashtableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateHashtableOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const HashtableOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _table_id = _o->table_id;
+  auto _key_dtype = _o->key_dtype;
+  auto _value_dtype = _o->value_dtype;
+  return circle::CreateHashtableOptions(_fbb, _table_id, _key_dtype, _value_dtype);
+}
+
+inline HashtableFindOptionsT *
+HashtableFindOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<HashtableFindOptionsT>(new HashtableFindOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HashtableFindOptions::UnPackTo(HashtableFindOptionsT *_o,
+                                           const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<HashtableFindOptions>
+HashtableFindOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateHashtableFindOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const HashtableFindOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateHashtableFindOptions(_fbb);
+}
+
+inline HashtableImportOptionsT *
+HashtableImportOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<HashtableImportOptionsT>(new HashtableImportOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+HashtableImportOptions::UnPackTo(HashtableImportOptionsT *_o,
+                                 const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<HashtableImportOptions>
+HashtableImportOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                             const HashtableImportOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateHashtableImportOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                             const HashtableImportOptionsT *_o,
+                             const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const HashtableImportOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateHashtableImportOptions(_fbb);
+}
+
+inline HashtableSizeOptionsT *
+HashtableSizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<HashtableSizeOptionsT>(new HashtableSizeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HashtableSizeOptions::UnPackTo(HashtableSizeOptionsT *_o,
+                                           const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+HashtableSizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateHashtableSizeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o,
+                           const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const HashtableSizeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateHashtableSizeOptions(_fbb);
+}
+
+inline VarHandleOptionsT *
+VarHandleOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<VarHandleOptionsT>(new VarHandleOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void VarHandleOptions::UnPackTo(VarHandleOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = container();
+    if (_e)
+      _o->container = _e->str();
+  }
+  {
+    auto _e = shared_name();
+    if (_e)
+      _o->shared_name = _e->str();
+  }
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+VarHandleOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateVarHandleOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const VarHandleOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _container = _o->container.empty() ? 0 : _fbb.CreateString(_o->container);
+  auto _shared_name = _o->shared_name.empty() ? 0 : _fbb.CreateString(_o->shared_name);
+  return circle::CreateVarHandleOptions(_fbb, _container, _shared_name);
+}
+
+inline ReadVariableOptionsT *
+ReadVariableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ReadVariableOptionsT>(new ReadVariableOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReadVariableOptions::UnPackTo(ReadVariableOptionsT *_o,
+                                          const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ReadVariableOptions>
+ReadVariableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateReadVariableOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ReadVariableOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateReadVariableOptions(_fbb);
+}
+
+inline AssignVariableOptionsT *
+AssignVariableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<AssignVariableOptionsT>(new AssignVariableOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void AssignVariableOptions::UnPackTo(AssignVariableOptionsT *_o,
+                                            const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<AssignVariableOptions>
+AssignVariableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateAssignVariableOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o,
+                            const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const AssignVariableOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  return circle::CreateAssignVariableOptions(_fbb);
+}
+
+inline RandomOptionsT *
+RandomOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<RandomOptionsT>(new RandomOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void RandomOptions::UnPackTo(RandomOptionsT *_o,
+                                    const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = seed();
+    _o->seed = _e;
+  }
+  {
+    auto _e = seed2();
+    _o->seed2 = _e;
+  }
+}
+
+inline flatbuffers::Offset<RandomOptions>
+RandomOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateRandomOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RandomOptions>
+CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT *_o,
+                    const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const RandomOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _seed = _o->seed;
+  auto _seed2 = _o->seed2;
+  return circle::CreateRandomOptions(_fbb, _seed, _seed2);
+}
+
+inline BCQGatherOptionsT *
+BCQGatherOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<BCQGatherOptionsT>(new BCQGatherOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BCQGatherOptions::UnPackTo(BCQGatherOptionsT *_o,
+                                       const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = input_hidden_size();
+    _o->input_hidden_size = _e;
+  }
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  }
+}
+
+inline flatbuffers::Offset<BCQGatherOptions>
+BCQGatherOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BCQGatherOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateBCQGatherOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BCQGatherOptions>
+CreateBCQGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const BCQGatherOptionsT *_o,
+                       const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BCQGatherOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _input_hidden_size = _o->input_hidden_size;
+  auto _axis = _o->axis;
+  return circle::CreateBCQGatherOptions(_fbb, _input_hidden_size, _axis);
+}
+
+inline BCQFullyConnectedOptionsT *
+BCQFullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<BCQFullyConnectedOptionsT>(new BCQFullyConnectedOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void
+BCQFullyConnectedOptions::UnPackTo(BCQFullyConnectedOptionsT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = weights_hidden_size();
+    _o->weights_hidden_size = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+}
+
+inline flatbuffers::Offset<BCQFullyConnectedOptions>
+BCQFullyConnectedOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                               const BCQFullyConnectedOptionsT *_o,
+                               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateBCQFullyConnectedOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BCQFullyConnectedOptions>
+CreateBCQFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                               const BCQFullyConnectedOptionsT *_o,
+                               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BCQFullyConnectedOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _weights_hidden_size = _o->weights_hidden_size;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return circle::CreateBCQFullyConnectedOptions(_fbb, _weights_hidden_size,
+                                                _fused_activation_function);
+}
+
+inline InstanceNormOptionsT *
+InstanceNormOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<InstanceNormOptionsT>(new InstanceNormOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void InstanceNormOptions::UnPackTo(InstanceNormOptionsT *_o,
+                                          const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = epsilon();
+    _o->epsilon = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+}
+
+inline flatbuffers::Offset<InstanceNormOptions>
+InstanceNormOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const InstanceNormOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateInstanceNormOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<InstanceNormOptions>
+CreateInstanceNormOptions(flatbuffers::FlatBufferBuilder &_fbb, const InstanceNormOptionsT *_o,
+                          const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const InstanceNormOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _epsilon = _o->epsilon;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return circle::CreateInstanceNormOptions(_fbb, _epsilon, _fused_activation_function);
+}
+
+inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<OperatorCodeT>(new OperatorCodeT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void OperatorCode::UnPackTo(OperatorCodeT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = deprecated_builtin_code();
+    _o->deprecated_builtin_code = _e;
+  }
+  {
+    auto _e = custom_code();
+    if (_e)
+      _o->custom_code = _e->str();
+  }
+  {
+    auto _e = version();
+    _o->version = _e;
+  }
+  {
+    auto _e = builtin_code();
+    _o->builtin_code = _e;
+  }
+}
+
+inline flatbuffers::Offset<OperatorCode>
+OperatorCode::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateOperatorCode(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<OperatorCode>
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const OperatorCodeT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _deprecated_builtin_code = _o->deprecated_builtin_code;
+  auto _custom_code = _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code);
+  auto _version = _o->version;
+  auto _builtin_code = _o->builtin_code;
+  return circle::CreateOperatorCode(_fbb, _deprecated_builtin_code, _custom_code, _version,
+                                    _builtin_code);
+}
+
+inline OperatorT *Operator::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<OperatorT>(new OperatorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Operator::UnPackTo(OperatorT *_o,
+                               const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = opcode_index();
+    _o->opcode_index = _e;
+  }
+  {
+    auto _e = inputs();
+    if (_e)
+    {
+      _o->inputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->inputs[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = outputs();
+    if (_e)
+    {
+      _o->outputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->outputs[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = builtin_options_type();
+    _o->builtin_options.type = _e;
+  }
+  {
+    auto _e = builtin_options();
+    if (_e)
+      _o->builtin_options.value =
+        circle::BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver);
+  }
+  {
+    auto _e = custom_options();
+    if (_e)
+    {
+      _o->custom_options.resize(_e->size());
+      std::copy(_e->begin(), _e->end(), _o->custom_options.begin());
+    }
+  }
+  {
+    auto _e = custom_options_format();
+    _o->custom_options_format = _e;
+  }
+  {
+    auto _e = mutating_variable_inputs();
+    if (_e)
+    {
+      _o->mutating_variable_inputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->mutating_variable_inputs[_i] = _e->Get(_i) != 0;
+      }
+    }
+  }
+  {
+    auto _e = intermediates();
+    if (_e)
+    {
+      _o->intermediates.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->intermediates[_i] = _e->Get(_i);
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<Operator>
+Operator::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateOperator(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Operator>
+CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const OperatorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _opcode_index = _o->opcode_index;
+  auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
+  auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
+  auto _builtin_options_type = _o->builtin_options.type;
+  auto _builtin_options = _o->builtin_options.Pack(_fbb);
+  auto _custom_options = _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0;
+  auto _custom_options_format = _o->custom_options_format;
+  auto _mutating_variable_inputs =
+    _o->mutating_variable_inputs.size() ? _fbb.CreateVector(_o->mutating_variable_inputs) : 0;
+  auto _intermediates = _o->intermediates.size() ? _fbb.CreateVector(_o->intermediates) : 0;
+  return circle::CreateOperator(_fbb, _opcode_index, _inputs, _outputs, _builtin_options_type,
+                                _builtin_options, _custom_options, _custom_options_format,
+                                _mutating_variable_inputs, _intermediates);
+}
+
+inline SubGraphT *SubGraph::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SubGraphT>(new SubGraphT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SubGraph::UnPackTo(SubGraphT *_o,
+                               const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = tensors();
+    if (_e)
+    {
+      _o->tensors.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->tensors[_i] = std::unique_ptr<circle::TensorT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+  {
+    auto _e = inputs();
+    if (_e)
+    {
+      _o->inputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->inputs[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = outputs();
+    if (_e)
+    {
+      _o->outputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->outputs[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = operators();
+    if (_e)
+    {
+      _o->operators.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->operators[_i] = std::unique_ptr<circle::OperatorT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+  {
+    auto _e = name();
+    if (_e)
+      _o->name = _e->str();
+  }
+  {
+    auto _e = data_format();
+    _o->data_format = _e;
+  }
+}
+
+inline flatbuffers::Offset<SubGraph>
+SubGraph::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSubGraph(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SubGraph>
+CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SubGraphT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _tensors =
+    _o->tensors.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::Tensor>>(
+          _o->tensors.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(), __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
+  auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
+  auto _operators =
+    _o->operators.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(
+          _o->operators.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateOperator(*__va->__fbb, __va->__o->operators[i].get(), __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  auto _data_format = _o->data_format;
+  return circle::CreateSubGraph(_fbb, _tensors, _inputs, _outputs, _operators, _name, _data_format);
+}
+
+inline BufferT *Buffer::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<BufferT>(new BufferT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Buffer::UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = data();
+    if (_e)
+    {
+      _o->data.resize(_e->size());
+      std::copy(_e->begin(), _e->end(), _o->data.begin());
+    }
+  }
+}
+
+inline flatbuffers::Offset<Buffer> Buffer::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                                const BufferT *_o,
+                                                const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateBuffer(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Buffer> CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
+                                                const BufferT *_o,
+                                                const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BufferT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  _fbb.ForceVectorAlignment(_o->data.size(), sizeof(uint8_t), 16);
+  auto _data = _o->data.size() ? _fbb.CreateVector(_o->data) : 0;
+  return circle::CreateBuffer(_fbb, _data);
+}
+
+inline MetadataT *Metadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<MetadataT>(new MetadataT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Metadata::UnPackTo(MetadataT *_o,
+                               const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = name();
+    if (_e)
+      _o->name = _e->str();
+  }
+  {
+    auto _e = buffer();
+    _o->buffer = _e;
+  }
+}
+
+inline flatbuffers::Offset<Metadata>
+Metadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateMetadata(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Metadata>
+CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o,
+               const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MetadataT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  auto _buffer = _o->buffer;
+  return circle::CreateMetadata(_fbb, _name, _buffer);
+}
+
+inline TensorMapT *TensorMap::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<TensorMapT>(new TensorMapT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TensorMap::UnPackTo(TensorMapT *_o,
+                                const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = name();
+    if (_e)
+      _o->name = _e->str();
+  }
+  {
+    auto _e = tensor_index();
+    _o->tensor_index = _e;
+  }
+}
+
+inline flatbuffers::Offset<TensorMap>
+TensorMap::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o,
+                const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateTensorMap(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o,
+                const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TensorMapT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  auto _tensor_index = _o->tensor_index;
+  return circle::CreateTensorMap(_fbb, _name, _tensor_index);
+}
+
+inline SignatureDefT *SignatureDef::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<SignatureDefT>(new SignatureDefT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SignatureDef::UnPackTo(SignatureDefT *_o,
+                                   const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = inputs();
+    if (_e)
+    {
+      _o->inputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->inputs[_i] = std::unique_ptr<circle::TensorMapT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+  {
+    auto _e = outputs();
+    if (_e)
+    {
+      _o->outputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->outputs[_i] = std::unique_ptr<circle::TensorMapT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+  {
+    auto _e = signature_key();
+    if (_e)
+      _o->signature_key = _e->str();
+  }
+  {
+    auto _e = subgraph_index();
+    _o->subgraph_index = _e;
+  }
+}
+
+inline flatbuffers::Offset<SignatureDef>
+SignatureDef::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateSignatureDef(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SignatureDef>
+CreateSignatureDef(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o,
+                   const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SignatureDefT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _inputs =
+    _o->inputs.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(
+          _o->inputs.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateTensorMap(*__va->__fbb, __va->__o->inputs[i].get(), __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  auto _outputs =
+    _o->outputs.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(
+          _o->outputs.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateTensorMap(*__va->__fbb, __va->__o->outputs[i].get(), __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  auto _signature_key = _o->signature_key.empty() ? 0 : _fbb.CreateString(_o->signature_key);
+  auto _subgraph_index = _o->subgraph_index;
+  return circle::CreateSignatureDef(_fbb, _inputs, _outputs, _signature_key, _subgraph_index);
+}
+
+inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const
+{
+  auto _o = std::unique_ptr<ModelT>(new ModelT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const
+{
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = version();
+    _o->version = _e;
+  }
+  {
+    auto _e = operator_codes();
+    if (_e)
+    {
+      _o->operator_codes.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->operator_codes[_i] =
+          std::unique_ptr<circle::OperatorCodeT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+  {
+    auto _e = subgraphs();
+    if (_e)
+    {
+      _o->subgraphs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->subgraphs[_i] = std::unique_ptr<circle::SubGraphT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+  {
+    auto _e = description();
+    if (_e)
+      _o->description = _e->str();
+  }
+  {
+    auto _e = buffers();
+    if (_e)
+    {
+      _o->buffers.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->buffers[_i] = std::unique_ptr<circle::BufferT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+  {
+    auto _e = metadata_buffer();
+    if (_e)
+    {
+      _o->metadata_buffer.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->metadata_buffer[_i] = _e->Get(_i);
+      }
+    }
+  }
+  {
+    auto _e = metadata();
+    if (_e)
+    {
+      _o->metadata.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->metadata[_i] = std::unique_ptr<circle::MetadataT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+  {
+    auto _e = signature_defs();
+    if (_e)
+    {
+      _o->signature_defs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++)
+      {
+        _o->signature_defs[_i] =
+          std::unique_ptr<circle::SignatureDefT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  }
+}
+
+inline flatbuffers::Offset<Model> Model::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                                              const ModelT *_o,
+                                              const flatbuffers::rehasher_function_t *_rehasher)
+{
+  return CreateModel(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb,
+                                              const ModelT *_o,
+                                              const flatbuffers::rehasher_function_t *_rehasher)
+{
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs
+  {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ModelT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _version = _o->version;
+  auto _operator_codes =
+    _o->operator_codes.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(
+          _o->operator_codes.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateOperatorCode(*__va->__fbb, __va->__o->operator_codes[i].get(),
+                                      __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  auto _subgraphs =
+    _o->subgraphs.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(
+          _o->subgraphs.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateSubGraph(*__va->__fbb, __va->__o->subgraphs[i].get(), __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  auto _description = _o->description.empty() ? 0 : _fbb.CreateString(_o->description);
+  auto _buffers =
+    _o->buffers.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::Buffer>>(
+          _o->buffers.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  auto _metadata_buffer = _o->metadata_buffer.size() ? _fbb.CreateVector(_o->metadata_buffer) : 0;
+  auto _metadata =
+    _o->metadata.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(
+          _o->metadata.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateMetadata(*__va->__fbb, __va->__o->metadata[i].get(), __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  auto _signature_defs =
+    _o->signature_defs.size()
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::SignatureDef>>(
+          _o->signature_defs.size(),
+          [](size_t i, _VectorArgs *__va) {
+            return CreateSignatureDef(*__va->__fbb, __va->__o->signature_defs[i].get(),
+                                      __va->__rehasher);
+          },
+          &_va)
+      : 0;
+  return circle::CreateModel(_fbb, _version, _operator_codes, _subgraphs, _description, _buffers,
+                             _metadata_buffer, _metadata, _signature_defs);
+}
+
+inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
+                                      QuantizationDetails type)
+{
+  switch (type)
+  {
+    case QuantizationDetails_NONE:
+    {
+      return true;
+    }
+    case QuantizationDetails_CustomQuantization:
+    {
+      auto ptr = reinterpret_cast<const circle::CustomQuantization *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    default:
+      return true;
+  }
+}
+
+inline bool
+VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
+                                const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+                                const flatbuffers::Vector<uint8_t> *types)
+{
+  if (!values || !types)
+    return !values && !types;
+  if (values->size() != types->size())
+    return false;
+  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+  {
+    if (!VerifyQuantizationDetails(verifier, values->Get(i),
+                                   types->GetEnum<QuantizationDetails>(i)))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void *QuantizationDetailsUnion::UnPack(const void *obj, QuantizationDetails type,
+                                              const flatbuffers::resolver_function_t *resolver)
+{
+  switch (type)
+  {
+    case QuantizationDetails_CustomQuantization:
+    {
+      auto ptr = reinterpret_cast<const circle::CustomQuantization *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    default:
+      return nullptr;
+  }
+}
+
+inline flatbuffers::Offset<void>
+QuantizationDetailsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                               const flatbuffers::rehasher_function_t *_rehasher) const
+{
+  switch (type)
+  {
+    case QuantizationDetails_CustomQuantization:
+    {
+      auto ptr = reinterpret_cast<const circle::CustomQuantizationT *>(value);
+      return CreateCustomQuantization(_fbb, ptr, _rehasher).Union();
+    }
+    default:
+      return 0;
+  }
+}
+
+inline QuantizationDetailsUnion::QuantizationDetailsUnion(const QuantizationDetailsUnion &u)
+  : type(u.type), value(nullptr)
+{
+  switch (type)
+  {
+    case QuantizationDetails_CustomQuantization:
+    {
+      value =
+        new circle::CustomQuantizationT(*reinterpret_cast<circle::CustomQuantizationT *>(u.value));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+inline void QuantizationDetailsUnion::Reset()
+{
+  switch (type)
+  {
+    case QuantizationDetails_CustomQuantization:
+    {
+      auto ptr = reinterpret_cast<circle::CustomQuantizationT *>(value);
+      delete ptr;
+      break;
+    }
+    default:
+      break;
+  }
+  value = nullptr;
+  type = QuantizationDetails_NONE;
+}
+
+inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj,
+                                    SparseIndexVector type)
+{
+  switch (type)
+  {
+    case SparseIndexVector_NONE:
+    {
+      return true;
+    }
+    case SparseIndexVector_Int32Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Int32Vector *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case SparseIndexVector_Uint16Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Uint16Vector *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case SparseIndexVector_Uint8Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Uint8Vector *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    default:
+      return true;
+  }
+}
+
+inline bool
+VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
+                              const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+                              const flatbuffers::Vector<uint8_t> *types)
+{
+  if (!values || !types)
+    return !values && !types;
+  if (values->size() != types->size())
+    return false;
+  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+  {
+    if (!VerifySparseIndexVector(verifier, values->Get(i), types->GetEnum<SparseIndexVector>(i)))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void *SparseIndexVectorUnion::UnPack(const void *obj, SparseIndexVector type,
+                                            const flatbuffers::resolver_function_t *resolver)
+{
+  switch (type)
+  {
+    case SparseIndexVector_Int32Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Int32Vector *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case SparseIndexVector_Uint16Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Uint16Vector *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case SparseIndexVector_Uint8Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Uint8Vector *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    default:
+      return nullptr;
+  }
+}
+
+inline flatbuffers::Offset<void>
+SparseIndexVectorUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                             const flatbuffers::rehasher_function_t *_rehasher) const
+{
+  switch (type)
+  {
+    case SparseIndexVector_Int32Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Int32VectorT *>(value);
+      return CreateInt32Vector(_fbb, ptr, _rehasher).Union();
+    }
+    case SparseIndexVector_Uint16Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Uint16VectorT *>(value);
+      return CreateUint16Vector(_fbb, ptr, _rehasher).Union();
+    }
+    case SparseIndexVector_Uint8Vector:
+    {
+      auto ptr = reinterpret_cast<const circle::Uint8VectorT *>(value);
+      return CreateUint8Vector(_fbb, ptr, _rehasher).Union();
+    }
+    default:
+      return 0;
+  }
+}
+
+inline SparseIndexVectorUnion::SparseIndexVectorUnion(const SparseIndexVectorUnion &u)
+  : type(u.type), value(nullptr)
+{
+  switch (type)
+  {
+    case SparseIndexVector_Int32Vector:
+    {
+      value = new circle::Int32VectorT(*reinterpret_cast<circle::Int32VectorT *>(u.value));
+      break;
+    }
+    case SparseIndexVector_Uint16Vector:
+    {
+      value = new circle::Uint16VectorT(*reinterpret_cast<circle::Uint16VectorT *>(u.value));
+      break;
+    }
+    case SparseIndexVector_Uint8Vector:
+    {
+      value = new circle::Uint8VectorT(*reinterpret_cast<circle::Uint8VectorT *>(u.value));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+inline void SparseIndexVectorUnion::Reset()
+{
+  switch (type)
+  {
+    case SparseIndexVector_Int32Vector:
+    {
+      auto ptr = reinterpret_cast<circle::Int32VectorT *>(value);
+      delete ptr;
+      break;
+    }
+    case SparseIndexVector_Uint16Vector:
+    {
+      auto ptr = reinterpret_cast<circle::Uint16VectorT *>(value);
+      delete ptr;
+      break;
+    }
+    case SparseIndexVector_Uint8Vector:
+    {
+      auto ptr = reinterpret_cast<circle::Uint8VectorT *>(value);
+      delete ptr;
+      break;
+    }
+    default:
+      break;
+  }
+  value = nullptr;
+  type = SparseIndexVector_NONE;
+}
+
+inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
+                                 BuiltinOptions type)
+{
+  switch (type)
+  {
+    case BuiltinOptions_NONE:
+    {
+      return true;
+    }
+    case BuiltinOptions_Conv2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Conv2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DepthwiseConv2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ConcatEmbeddingsOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LSHProjectionOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LSHProjectionOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Pool2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Pool2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SVDFOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SVDFOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FullyConnectedOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FullyConnectedOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SoftmaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SoftmaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ConcatenationOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ConcatenationOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AddOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AddOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_L2NormOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::L2NormOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LocalResponseNormalizationOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ResizeBilinearOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ResizeBilinearOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CallOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReshapeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReshapeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SkipGramOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SkipGramOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SpaceToDepthOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SpaceToDepthOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::EmbeddingLookupSparseOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MulOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MulOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_PadOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PadOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GatherOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GatherOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BatchToSpaceNDOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SpaceToBatchNDOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TransposeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TransposeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReducerOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReducerOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SubOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SubOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DivOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DivOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SqueezeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SqueezeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SequenceRNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SequenceRNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_StridedSliceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::StridedSliceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ExpOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ExpOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TopKV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::TopKV2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SplitOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SplitOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LogSoftmaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogSoftmaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CastOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CastOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DequantizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DequantizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MaximumMinimumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MaximumMinimumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ArgMaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ArgMaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LessOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LessOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_NegOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::NegOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_PadV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::PadV2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GreaterOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GreaterOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GreaterEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GreaterEqualOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LessEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LessEqualOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SelectOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SelectOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SliceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SliceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TransposeConvOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TransposeConvOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SparseToDenseOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SparseToDenseOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TileOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TileOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ExpandDimsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ExpandDimsOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_EqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::EqualOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_NotEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::NotEqualOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ShapeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ShapeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_PowOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PowOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ArgMinOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ArgMinOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FakeQuantOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FakeQuantOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_PackOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PackOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LogicalOrOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalOrOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_OneHotOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::OneHotOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LogicalAndOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalAndOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LogicalNotOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalNotOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnpackOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnpackOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FloorDivOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FloorDivOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SquareOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SquareOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ZerosLikeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ZerosLikeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FillOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FillOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BidirectionalSequenceLSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BidirectionalSequenceRNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnidirectionalSequenceLSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FloorModOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FloorModOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RangeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RangeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ResizeNearestNeighborOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LeakyReluOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LeakyReluOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SquaredDifferenceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SquaredDifferenceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MirrorPadOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MirrorPadOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AbsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AbsOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SplitVOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SplitVOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UniqueOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UniqueOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReverseV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::ReverseV2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AddNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AddNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GatherNdOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GatherNdOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CosOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CosOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_WhereOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::WhereOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RankOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RankOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReverseSequenceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReverseSequenceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MatrixDiagOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MatrixDiagOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_QuantizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::QuantizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MatrixSetDiagOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MatrixSetDiagOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HardSwishOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HardSwishOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_IfOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::IfOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_WhileOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::WhileOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DepthToSpaceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DepthToSpaceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options:
+    {
+      auto ptr = reinterpret_cast<const circle::NonMaxSuppressionV4Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options:
+    {
+      auto ptr = reinterpret_cast<const circle::NonMaxSuppressionV5Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ScatterNdOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ScatterNdOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SelectV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::SelectV2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DensifyOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DensifyOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SegmentSumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SegmentSumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BatchMatMulOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BatchMatMulOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CumsumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CumsumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CallOnceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BroadcastToOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Rfft2dOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Conv3DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableFindOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableImportOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableSizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::VarHandleOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReadVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AssignVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RandomOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BCQGatherOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BCQGatherOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BCQFullyConnectedOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BCQFullyConnectedOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_InstanceNormOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::InstanceNormOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    default:
+      return true;
+  }
+}
+
+inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
+                                       const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+                                       const flatbuffers::Vector<uint8_t> *types)
+{
+  if (!values || !types)
+    return !values && !types;
+  if (values->size() != types->size())
+    return false;
+  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i)
+  {
+    if (!VerifyBuiltinOptions(verifier, values->Get(i), types->GetEnum<BuiltinOptions>(i)))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type,
+                                         const flatbuffers::resolver_function_t *resolver)
+{
+  switch (type)
+  {
+    case BuiltinOptions_Conv2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Conv2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DepthwiseConv2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ConcatEmbeddingsOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LSHProjectionOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LSHProjectionOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_Pool2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Pool2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SVDFOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SVDFOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FullyConnectedOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FullyConnectedOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SoftmaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SoftmaxOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ConcatenationOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ConcatenationOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AddOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AddOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_L2NormOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::L2NormOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LocalResponseNormalizationOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ResizeBilinearOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ResizeBilinearOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CallOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CallOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReshapeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReshapeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SkipGramOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SkipGramOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SpaceToDepthOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SpaceToDepthOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::EmbeddingLookupSparseOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MulOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MulOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_PadOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PadOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GatherOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GatherOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BatchToSpaceNDOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SpaceToBatchNDOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TransposeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TransposeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReducerOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReducerOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SubOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SubOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DivOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DivOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SqueezeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SqueezeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SequenceRNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SequenceRNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_StridedSliceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::StridedSliceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ExpOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ExpOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TopKV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::TopKV2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SplitOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SplitOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LogSoftmaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogSoftmaxOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CastOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CastOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DequantizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DequantizeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MaximumMinimumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MaximumMinimumOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ArgMaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ArgMaxOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LessOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LessOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_NegOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::NegOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_PadV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::PadV2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GreaterOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GreaterOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GreaterEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GreaterEqualOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LessEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LessEqualOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SelectOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SelectOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SliceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SliceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TransposeConvOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TransposeConvOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SparseToDenseOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SparseToDenseOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TileOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TileOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ExpandDimsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ExpandDimsOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_EqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::EqualOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_NotEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::NotEqualOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ShapeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ShapeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_PowOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PowOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ArgMinOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ArgMinOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FakeQuantOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FakeQuantOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_PackOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PackOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LogicalOrOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalOrOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_OneHotOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::OneHotOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LogicalAndOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalAndOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LogicalNotOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalNotOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UnpackOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnpackOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FloorDivOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FloorDivOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SquareOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SquareOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ZerosLikeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ZerosLikeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FillOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FillOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BidirectionalSequenceLSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BidirectionalSequenceRNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnidirectionalSequenceLSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FloorModOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FloorModOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RangeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RangeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ResizeNearestNeighborOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LeakyReluOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LeakyReluOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SquaredDifferenceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SquaredDifferenceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MirrorPadOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MirrorPadOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AbsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AbsOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SplitVOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SplitVOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UniqueOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UniqueOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReverseV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::ReverseV2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AddNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AddNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GatherNdOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GatherNdOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CosOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CosOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_WhereOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::WhereOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RankOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RankOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReverseSequenceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReverseSequenceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MatrixDiagOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MatrixDiagOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_QuantizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::QuantizeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MatrixSetDiagOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MatrixSetDiagOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HardSwishOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HardSwishOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_IfOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::IfOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_WhileOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::WhileOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DepthToSpaceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DepthToSpaceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options:
+    {
+      auto ptr = reinterpret_cast<const circle::NonMaxSuppressionV4Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options:
+    {
+      auto ptr = reinterpret_cast<const circle::NonMaxSuppressionV5Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ScatterNdOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ScatterNdOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SelectV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::SelectV2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DensifyOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DensifyOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SegmentSumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SegmentSumOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BatchMatMulOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BatchMatMulOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CumsumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CumsumOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CallOnceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BroadcastToOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Rfft2dOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Conv3DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableFindOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableImportOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableSizeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::VarHandleOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReadVariableOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AssignVariableOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RandomOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BCQGatherOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BCQGatherOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BCQFullyConnectedOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BCQFullyConnectedOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_InstanceNormOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::InstanceNormOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    default:
+      return nullptr;
+  }
+}
+
+inline flatbuffers::Offset<void>
+BuiltinOptionsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb,
+                          const flatbuffers::rehasher_function_t *_rehasher) const
+{
+  switch (type)
+  {
+    case BuiltinOptions_Conv2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Conv2DOptionsT *>(value);
+      return CreateConv2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DepthwiseConv2DOptionsT *>(value);
+      return CreateDepthwiseConv2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ConcatEmbeddingsOptionsT *>(value);
+      return CreateConcatEmbeddingsOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LSHProjectionOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LSHProjectionOptionsT *>(value);
+      return CreateLSHProjectionOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_Pool2DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Pool2DOptionsT *>(value);
+      return CreatePool2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SVDFOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SVDFOptionsT *>(value);
+      return CreateSVDFOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RNNOptionsT *>(value);
+      return CreateRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FullyConnectedOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FullyConnectedOptionsT *>(value);
+      return CreateFullyConnectedOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SoftmaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SoftmaxOptionsT *>(value);
+      return CreateSoftmaxOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ConcatenationOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ConcatenationOptionsT *>(value);
+      return CreateConcatenationOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AddOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AddOptionsT *>(value);
+      return CreateAddOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_L2NormOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::L2NormOptionsT *>(value);
+      return CreateL2NormOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LocalResponseNormalizationOptionsT *>(value);
+      return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LSTMOptionsT *>(value);
+      return CreateLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ResizeBilinearOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ResizeBilinearOptionsT *>(value);
+      return CreateResizeBilinearOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CallOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CallOptionsT *>(value);
+      return CreateCallOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReshapeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReshapeOptionsT *>(value);
+      return CreateReshapeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SkipGramOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SkipGramOptionsT *>(value);
+      return CreateSkipGramOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SpaceToDepthOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SpaceToDepthOptionsT *>(value);
+      return CreateSpaceToDepthOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::EmbeddingLookupSparseOptionsT *>(value);
+      return CreateEmbeddingLookupSparseOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MulOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MulOptionsT *>(value);
+      return CreateMulOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_PadOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PadOptionsT *>(value);
+      return CreatePadOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GatherOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GatherOptionsT *>(value);
+      return CreateGatherOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BatchToSpaceNDOptionsT *>(value);
+      return CreateBatchToSpaceNDOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SpaceToBatchNDOptionsT *>(value);
+      return CreateSpaceToBatchNDOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TransposeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TransposeOptionsT *>(value);
+      return CreateTransposeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReducerOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReducerOptionsT *>(value);
+      return CreateReducerOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SubOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SubOptionsT *>(value);
+      return CreateSubOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DivOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DivOptionsT *>(value);
+      return CreateDivOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SqueezeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SqueezeOptionsT *>(value);
+      return CreateSqueezeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SequenceRNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SequenceRNNOptionsT *>(value);
+      return CreateSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_StridedSliceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::StridedSliceOptionsT *>(value);
+      return CreateStridedSliceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ExpOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ExpOptionsT *>(value);
+      return CreateExpOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TopKV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::TopKV2OptionsT *>(value);
+      return CreateTopKV2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SplitOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SplitOptionsT *>(value);
+      return CreateSplitOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LogSoftmaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogSoftmaxOptionsT *>(value);
+      return CreateLogSoftmaxOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CastOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CastOptionsT *>(value);
+      return CreateCastOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DequantizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DequantizeOptionsT *>(value);
+      return CreateDequantizeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MaximumMinimumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MaximumMinimumOptionsT *>(value);
+      return CreateMaximumMinimumOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ArgMaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ArgMaxOptionsT *>(value);
+      return CreateArgMaxOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LessOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LessOptionsT *>(value);
+      return CreateLessOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_NegOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::NegOptionsT *>(value);
+      return CreateNegOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_PadV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::PadV2OptionsT *>(value);
+      return CreatePadV2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GreaterOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GreaterOptionsT *>(value);
+      return CreateGreaterOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GreaterEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GreaterEqualOptionsT *>(value);
+      return CreateGreaterEqualOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LessEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LessEqualOptionsT *>(value);
+      return CreateLessEqualOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SelectOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SelectOptionsT *>(value);
+      return CreateSelectOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SliceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SliceOptionsT *>(value);
+      return CreateSliceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TransposeConvOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TransposeConvOptionsT *>(value);
+      return CreateTransposeConvOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SparseToDenseOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SparseToDenseOptionsT *>(value);
+      return CreateSparseToDenseOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TileOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::TileOptionsT *>(value);
+      return CreateTileOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ExpandDimsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ExpandDimsOptionsT *>(value);
+      return CreateExpandDimsOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_EqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::EqualOptionsT *>(value);
+      return CreateEqualOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_NotEqualOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::NotEqualOptionsT *>(value);
+      return CreateNotEqualOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ShapeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ShapeOptionsT *>(value);
+      return CreateShapeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_PowOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PowOptionsT *>(value);
+      return CreatePowOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ArgMinOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ArgMinOptionsT *>(value);
+      return CreateArgMinOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FakeQuantOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FakeQuantOptionsT *>(value);
+      return CreateFakeQuantOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_PackOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::PackOptionsT *>(value);
+      return CreatePackOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LogicalOrOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalOrOptionsT *>(value);
+      return CreateLogicalOrOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_OneHotOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::OneHotOptionsT *>(value);
+      return CreateOneHotOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LogicalAndOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalAndOptionsT *>(value);
+      return CreateLogicalAndOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LogicalNotOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LogicalNotOptionsT *>(value);
+      return CreateLogicalNotOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UnpackOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnpackOptionsT *>(value);
+      return CreateUnpackOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FloorDivOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FloorDivOptionsT *>(value);
+      return CreateFloorDivOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SquareOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SquareOptionsT *>(value);
+      return CreateSquareOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ZerosLikeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ZerosLikeOptionsT *>(value);
+      return CreateZerosLikeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FillOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FillOptionsT *>(value);
+      return CreateFillOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BidirectionalSequenceLSTMOptionsT *>(value);
+      return CreateBidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BidirectionalSequenceRNNOptionsT *>(value);
+      return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnidirectionalSequenceLSTMOptionsT *>(value);
+      return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FloorModOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::FloorModOptionsT *>(value);
+      return CreateFloorModOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RangeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RangeOptionsT *>(value);
+      return CreateRangeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ResizeNearestNeighborOptionsT *>(value);
+      return CreateResizeNearestNeighborOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LeakyReluOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::LeakyReluOptionsT *>(value);
+      return CreateLeakyReluOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SquaredDifferenceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SquaredDifferenceOptionsT *>(value);
+      return CreateSquaredDifferenceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MirrorPadOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MirrorPadOptionsT *>(value);
+      return CreateMirrorPadOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AbsOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AbsOptionsT *>(value);
+      return CreateAbsOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SplitVOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SplitVOptionsT *>(value);
+      return CreateSplitVOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UniqueOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UniqueOptionsT *>(value);
+      return CreateUniqueOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReverseV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::ReverseV2OptionsT *>(value);
+      return CreateReverseV2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AddNOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AddNOptionsT *>(value);
+      return CreateAddNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GatherNdOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GatherNdOptionsT *>(value);
+      return CreateGatherNdOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CosOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CosOptionsT *>(value);
+      return CreateCosOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_WhereOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::WhereOptionsT *>(value);
+      return CreateWhereOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RankOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RankOptionsT *>(value);
+      return CreateRankOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReverseSequenceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReverseSequenceOptionsT *>(value);
+      return CreateReverseSequenceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MatrixDiagOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MatrixDiagOptionsT *>(value);
+      return CreateMatrixDiagOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_QuantizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::QuantizeOptionsT *>(value);
+      return CreateQuantizeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MatrixSetDiagOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::MatrixSetDiagOptionsT *>(value);
+      return CreateMatrixSetDiagOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HardSwishOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HardSwishOptionsT *>(value);
+      return CreateHardSwishOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_IfOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::IfOptionsT *>(value);
+      return CreateIfOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_WhileOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::WhileOptionsT *>(value);
+      return CreateWhileOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DepthToSpaceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DepthToSpaceOptionsT *>(value);
+      return CreateDepthToSpaceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options:
+    {
+      auto ptr = reinterpret_cast<const circle::NonMaxSuppressionV4OptionsT *>(value);
+      return CreateNonMaxSuppressionV4Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options:
+    {
+      auto ptr = reinterpret_cast<const circle::NonMaxSuppressionV5OptionsT *>(value);
+      return CreateNonMaxSuppressionV5Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ScatterNdOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ScatterNdOptionsT *>(value);
+      return CreateScatterNdOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SelectV2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::SelectV2OptionsT *>(value);
+      return CreateSelectV2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DensifyOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DensifyOptionsT *>(value);
+      return CreateDensifyOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SegmentSumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SegmentSumOptionsT *>(value);
+      return CreateSegmentSumOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BatchMatMulOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BatchMatMulOptionsT *>(value);
+      return CreateBatchMatMulOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CumsumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CumsumOptionsT *>(value);
+      return CreateCumsumOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CallOnceOptionsT *>(value);
+      return CreateCallOnceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BroadcastToOptionsT *>(value);
+      return CreateBroadcastToOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Rfft2dOptionsT *>(value);
+      return CreateRfft2dOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Conv3DOptionsT *>(value);
+      return CreateConv3DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableOptionsT *>(value);
+      return CreateHashtableOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableFindOptionsT *>(value);
+      return CreateHashtableFindOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableImportOptionsT *>(value);
+      return CreateHashtableImportOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableSizeOptionsT *>(value);
+      return CreateHashtableSizeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::VarHandleOptionsT *>(value);
+      return CreateVarHandleOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReadVariableOptionsT *>(value);
+      return CreateReadVariableOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AssignVariableOptionsT *>(value);
+      return CreateAssignVariableOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RandomOptionsT *>(value);
+      return CreateRandomOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BCQGatherOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BCQGatherOptionsT *>(value);
+      return CreateBCQGatherOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BCQFullyConnectedOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BCQFullyConnectedOptionsT *>(value);
+      return CreateBCQFullyConnectedOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_InstanceNormOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::InstanceNormOptionsT *>(value);
+      return CreateInstanceNormOptions(_fbb, ptr, _rehasher).Union();
+    }
+    default:
+      return 0;
+  }
+}
+
+inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u)
+  : type(u.type), value(nullptr)
+{
+  switch (type)
+  {
+    case BuiltinOptions_Conv2DOptions:
+    {
+      value = new circle::Conv2DOptionsT(*reinterpret_cast<circle::Conv2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions:
+    {
+      value = new circle::DepthwiseConv2DOptionsT(
+        *reinterpret_cast<circle::DepthwiseConv2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions:
+    {
+      value = new circle::ConcatEmbeddingsOptionsT(
+        *reinterpret_cast<circle::ConcatEmbeddingsOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LSHProjectionOptions:
+    {
+      value = new circle::LSHProjectionOptionsT(
+        *reinterpret_cast<circle::LSHProjectionOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_Pool2DOptions:
+    {
+      value = new circle::Pool2DOptionsT(*reinterpret_cast<circle::Pool2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SVDFOptions:
+    {
+      value = new circle::SVDFOptionsT(*reinterpret_cast<circle::SVDFOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RNNOptions:
+    {
+      value = new circle::RNNOptionsT(*reinterpret_cast<circle::RNNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FullyConnectedOptions:
+    {
+      value = new circle::FullyConnectedOptionsT(
+        *reinterpret_cast<circle::FullyConnectedOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SoftmaxOptions:
+    {
+      value = new circle::SoftmaxOptionsT(*reinterpret_cast<circle::SoftmaxOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ConcatenationOptions:
+    {
+      value = new circle::ConcatenationOptionsT(
+        *reinterpret_cast<circle::ConcatenationOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AddOptions:
+    {
+      value = new circle::AddOptionsT(*reinterpret_cast<circle::AddOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_L2NormOptions:
+    {
+      value = new circle::L2NormOptionsT(*reinterpret_cast<circle::L2NormOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions:
+    {
+      value = new circle::LocalResponseNormalizationOptionsT(
+        *reinterpret_cast<circle::LocalResponseNormalizationOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LSTMOptions:
+    {
+      value = new circle::LSTMOptionsT(*reinterpret_cast<circle::LSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ResizeBilinearOptions:
+    {
+      value = new circle::ResizeBilinearOptionsT(
+        *reinterpret_cast<circle::ResizeBilinearOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CallOptions:
+    {
+      value = new circle::CallOptionsT(*reinterpret_cast<circle::CallOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReshapeOptions:
+    {
+      value = new circle::ReshapeOptionsT(*reinterpret_cast<circle::ReshapeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SkipGramOptions:
+    {
+      value = new circle::SkipGramOptionsT(*reinterpret_cast<circle::SkipGramOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SpaceToDepthOptions:
+    {
+      value = new circle::SpaceToDepthOptionsT(
+        *reinterpret_cast<circle::SpaceToDepthOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions:
+    {
+      value = new circle::EmbeddingLookupSparseOptionsT(
+        *reinterpret_cast<circle::EmbeddingLookupSparseOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MulOptions:
+    {
+      value = new circle::MulOptionsT(*reinterpret_cast<circle::MulOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_PadOptions:
+    {
+      value = new circle::PadOptionsT(*reinterpret_cast<circle::PadOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GatherOptions:
+    {
+      value = new circle::GatherOptionsT(*reinterpret_cast<circle::GatherOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions:
+    {
+      value = new circle::BatchToSpaceNDOptionsT(
+        *reinterpret_cast<circle::BatchToSpaceNDOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions:
+    {
+      value = new circle::SpaceToBatchNDOptionsT(
+        *reinterpret_cast<circle::SpaceToBatchNDOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TransposeOptions:
+    {
+      value =
+        new circle::TransposeOptionsT(*reinterpret_cast<circle::TransposeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReducerOptions:
+    {
+      value = new circle::ReducerOptionsT(*reinterpret_cast<circle::ReducerOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SubOptions:
+    {
+      value = new circle::SubOptionsT(*reinterpret_cast<circle::SubOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DivOptions:
+    {
+      value = new circle::DivOptionsT(*reinterpret_cast<circle::DivOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SqueezeOptions:
+    {
+      value = new circle::SqueezeOptionsT(*reinterpret_cast<circle::SqueezeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SequenceRNNOptions:
+    {
+      value =
+        new circle::SequenceRNNOptionsT(*reinterpret_cast<circle::SequenceRNNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_StridedSliceOptions:
+    {
+      value = new circle::StridedSliceOptionsT(
+        *reinterpret_cast<circle::StridedSliceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ExpOptions:
+    {
+      value = new circle::ExpOptionsT(*reinterpret_cast<circle::ExpOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TopKV2Options:
+    {
+      value = new circle::TopKV2OptionsT(*reinterpret_cast<circle::TopKV2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SplitOptions:
+    {
+      value = new circle::SplitOptionsT(*reinterpret_cast<circle::SplitOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LogSoftmaxOptions:
+    {
+      value =
+        new circle::LogSoftmaxOptionsT(*reinterpret_cast<circle::LogSoftmaxOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CastOptions:
+    {
+      value = new circle::CastOptionsT(*reinterpret_cast<circle::CastOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DequantizeOptions:
+    {
+      value =
+        new circle::DequantizeOptionsT(*reinterpret_cast<circle::DequantizeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MaximumMinimumOptions:
+    {
+      value = new circle::MaximumMinimumOptionsT(
+        *reinterpret_cast<circle::MaximumMinimumOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ArgMaxOptions:
+    {
+      value = new circle::ArgMaxOptionsT(*reinterpret_cast<circle::ArgMaxOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LessOptions:
+    {
+      value = new circle::LessOptionsT(*reinterpret_cast<circle::LessOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_NegOptions:
+    {
+      value = new circle::NegOptionsT(*reinterpret_cast<circle::NegOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_PadV2Options:
+    {
+      value = new circle::PadV2OptionsT(*reinterpret_cast<circle::PadV2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GreaterOptions:
+    {
+      value = new circle::GreaterOptionsT(*reinterpret_cast<circle::GreaterOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GreaterEqualOptions:
+    {
+      value = new circle::GreaterEqualOptionsT(
+        *reinterpret_cast<circle::GreaterEqualOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LessEqualOptions:
+    {
+      value =
+        new circle::LessEqualOptionsT(*reinterpret_cast<circle::LessEqualOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SelectOptions:
+    {
+      value = new circle::SelectOptionsT(*reinterpret_cast<circle::SelectOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SliceOptions:
+    {
+      value = new circle::SliceOptionsT(*reinterpret_cast<circle::SliceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TransposeConvOptions:
+    {
+      value = new circle::TransposeConvOptionsT(
+        *reinterpret_cast<circle::TransposeConvOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SparseToDenseOptions:
+    {
+      value = new circle::SparseToDenseOptionsT(
+        *reinterpret_cast<circle::SparseToDenseOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TileOptions:
+    {
+      value = new circle::TileOptionsT(*reinterpret_cast<circle::TileOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ExpandDimsOptions:
+    {
+      value =
+        new circle::ExpandDimsOptionsT(*reinterpret_cast<circle::ExpandDimsOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_EqualOptions:
+    {
+      value = new circle::EqualOptionsT(*reinterpret_cast<circle::EqualOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_NotEqualOptions:
+    {
+      value = new circle::NotEqualOptionsT(*reinterpret_cast<circle::NotEqualOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ShapeOptions:
+    {
+      value = new circle::ShapeOptionsT(*reinterpret_cast<circle::ShapeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_PowOptions:
+    {
+      value = new circle::PowOptionsT(*reinterpret_cast<circle::PowOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ArgMinOptions:
+    {
+      value = new circle::ArgMinOptionsT(*reinterpret_cast<circle::ArgMinOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FakeQuantOptions:
+    {
+      value =
+        new circle::FakeQuantOptionsT(*reinterpret_cast<circle::FakeQuantOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_PackOptions:
+    {
+      value = new circle::PackOptionsT(*reinterpret_cast<circle::PackOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LogicalOrOptions:
+    {
+      value =
+        new circle::LogicalOrOptionsT(*reinterpret_cast<circle::LogicalOrOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_OneHotOptions:
+    {
+      value = new circle::OneHotOptionsT(*reinterpret_cast<circle::OneHotOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LogicalAndOptions:
+    {
+      value =
+        new circle::LogicalAndOptionsT(*reinterpret_cast<circle::LogicalAndOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LogicalNotOptions:
+    {
+      value =
+        new circle::LogicalNotOptionsT(*reinterpret_cast<circle::LogicalNotOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UnpackOptions:
+    {
+      value = new circle::UnpackOptionsT(*reinterpret_cast<circle::UnpackOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FloorDivOptions:
+    {
+      value = new circle::FloorDivOptionsT(*reinterpret_cast<circle::FloorDivOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SquareOptions:
+    {
+      value = new circle::SquareOptionsT(*reinterpret_cast<circle::SquareOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ZerosLikeOptions:
+    {
+      value =
+        new circle::ZerosLikeOptionsT(*reinterpret_cast<circle::ZerosLikeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FillOptions:
+    {
+      value = new circle::FillOptionsT(*reinterpret_cast<circle::FillOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions:
+    {
+      value = new circle::BidirectionalSequenceLSTMOptionsT(
+        *reinterpret_cast<circle::BidirectionalSequenceLSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions:
+    {
+      value = new circle::BidirectionalSequenceRNNOptionsT(
+        *reinterpret_cast<circle::BidirectionalSequenceRNNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
+    {
+      value = new circle::UnidirectionalSequenceLSTMOptionsT(
+        *reinterpret_cast<circle::UnidirectionalSequenceLSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FloorModOptions:
+    {
+      value = new circle::FloorModOptionsT(*reinterpret_cast<circle::FloorModOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RangeOptions:
+    {
+      value = new circle::RangeOptionsT(*reinterpret_cast<circle::RangeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions:
+    {
+      value = new circle::ResizeNearestNeighborOptionsT(
+        *reinterpret_cast<circle::ResizeNearestNeighborOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LeakyReluOptions:
+    {
+      value =
+        new circle::LeakyReluOptionsT(*reinterpret_cast<circle::LeakyReluOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SquaredDifferenceOptions:
+    {
+      value = new circle::SquaredDifferenceOptionsT(
+        *reinterpret_cast<circle::SquaredDifferenceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MirrorPadOptions:
+    {
+      value =
+        new circle::MirrorPadOptionsT(*reinterpret_cast<circle::MirrorPadOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AbsOptions:
+    {
+      value = new circle::AbsOptionsT(*reinterpret_cast<circle::AbsOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SplitVOptions:
+    {
+      value = new circle::SplitVOptionsT(*reinterpret_cast<circle::SplitVOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UniqueOptions:
+    {
+      value = new circle::UniqueOptionsT(*reinterpret_cast<circle::UniqueOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReverseV2Options:
+    {
+      value =
+        new circle::ReverseV2OptionsT(*reinterpret_cast<circle::ReverseV2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AddNOptions:
+    {
+      value = new circle::AddNOptionsT(*reinterpret_cast<circle::AddNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GatherNdOptions:
+    {
+      value = new circle::GatherNdOptionsT(*reinterpret_cast<circle::GatherNdOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CosOptions:
+    {
+      value = new circle::CosOptionsT(*reinterpret_cast<circle::CosOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_WhereOptions:
+    {
+      value = new circle::WhereOptionsT(*reinterpret_cast<circle::WhereOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RankOptions:
+    {
+      value = new circle::RankOptionsT(*reinterpret_cast<circle::RankOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReverseSequenceOptions:
+    {
+      value = new circle::ReverseSequenceOptionsT(
+        *reinterpret_cast<circle::ReverseSequenceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MatrixDiagOptions:
+    {
+      value =
+        new circle::MatrixDiagOptionsT(*reinterpret_cast<circle::MatrixDiagOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_QuantizeOptions:
+    {
+      value = new circle::QuantizeOptionsT(*reinterpret_cast<circle::QuantizeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MatrixSetDiagOptions:
+    {
+      value = new circle::MatrixSetDiagOptionsT(
+        *reinterpret_cast<circle::MatrixSetDiagOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HardSwishOptions:
+    {
+      value =
+        new circle::HardSwishOptionsT(*reinterpret_cast<circle::HardSwishOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_IfOptions:
+    {
+      value = new circle::IfOptionsT(*reinterpret_cast<circle::IfOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_WhileOptions:
+    {
+      value = new circle::WhileOptionsT(*reinterpret_cast<circle::WhileOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DepthToSpaceOptions:
+    {
+      value = new circle::DepthToSpaceOptionsT(
+        *reinterpret_cast<circle::DepthToSpaceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options:
+    {
+      value = new circle::NonMaxSuppressionV4OptionsT(
+        *reinterpret_cast<circle::NonMaxSuppressionV4OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options:
+    {
+      value = new circle::NonMaxSuppressionV5OptionsT(
+        *reinterpret_cast<circle::NonMaxSuppressionV5OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ScatterNdOptions:
+    {
+      value =
+        new circle::ScatterNdOptionsT(*reinterpret_cast<circle::ScatterNdOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SelectV2Options:
+    {
+      value = new circle::SelectV2OptionsT(*reinterpret_cast<circle::SelectV2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DensifyOptions:
+    {
+      value = new circle::DensifyOptionsT(*reinterpret_cast<circle::DensifyOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SegmentSumOptions:
+    {
+      value =
+        new circle::SegmentSumOptionsT(*reinterpret_cast<circle::SegmentSumOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BatchMatMulOptions:
+    {
+      value =
+        new circle::BatchMatMulOptionsT(*reinterpret_cast<circle::BatchMatMulOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CumsumOptions:
+    {
+      value = new circle::CumsumOptionsT(*reinterpret_cast<circle::CumsumOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      value = new circle::CallOnceOptionsT(*reinterpret_cast<circle::CallOnceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      value =
+        new circle::BroadcastToOptionsT(*reinterpret_cast<circle::BroadcastToOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      value = new circle::Rfft2dOptionsT(*reinterpret_cast<circle::Rfft2dOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      value = new circle::Conv3DOptionsT(*reinterpret_cast<circle::Conv3DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      value =
+        new circle::HashtableOptionsT(*reinterpret_cast<circle::HashtableOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      value = new circle::HashtableFindOptionsT(
+        *reinterpret_cast<circle::HashtableFindOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      value = new circle::HashtableImportOptionsT(
+        *reinterpret_cast<circle::HashtableImportOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      value = new circle::HashtableSizeOptionsT(
+        *reinterpret_cast<circle::HashtableSizeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      value =
+        new circle::VarHandleOptionsT(*reinterpret_cast<circle::VarHandleOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      value = new circle::ReadVariableOptionsT(
+        *reinterpret_cast<circle::ReadVariableOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      value = new circle::AssignVariableOptionsT(
+        *reinterpret_cast<circle::AssignVariableOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      value = new circle::RandomOptionsT(*reinterpret_cast<circle::RandomOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BCQGatherOptions:
+    {
+      value =
+        new circle::BCQGatherOptionsT(*reinterpret_cast<circle::BCQGatherOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BCQFullyConnectedOptions:
+    {
+      value = new circle::BCQFullyConnectedOptionsT(
+        *reinterpret_cast<circle::BCQFullyConnectedOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_InstanceNormOptions:
+    {
+      value = new circle::InstanceNormOptionsT(
+        *reinterpret_cast<circle::InstanceNormOptionsT *>(u.value));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+inline void BuiltinOptionsUnion::Reset()
+{
+  switch (type)
+  {
+    case BuiltinOptions_Conv2DOptions:
+    {
+      auto ptr = reinterpret_cast<circle::Conv2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions:
+    {
+      auto ptr = reinterpret_cast<circle::DepthwiseConv2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ConcatEmbeddingsOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LSHProjectionOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LSHProjectionOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_Pool2DOptions:
+    {
+      auto ptr = reinterpret_cast<circle::Pool2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SVDFOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SVDFOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RNNOptions:
+    {
+      auto ptr = reinterpret_cast<circle::RNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FullyConnectedOptions:
+    {
+      auto ptr = reinterpret_cast<circle::FullyConnectedOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SoftmaxOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SoftmaxOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ConcatenationOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ConcatenationOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AddOptions:
+    {
+      auto ptr = reinterpret_cast<circle::AddOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_L2NormOptions:
+    {
+      auto ptr = reinterpret_cast<circle::L2NormOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LocalResponseNormalizationOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LSTMOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ResizeBilinearOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ResizeBilinearOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CallOptions:
+    {
+      auto ptr = reinterpret_cast<circle::CallOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReshapeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ReshapeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SkipGramOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SkipGramOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SpaceToDepthOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SpaceToDepthOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions:
+    {
+      auto ptr = reinterpret_cast<circle::EmbeddingLookupSparseOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MulOptions:
+    {
+      auto ptr = reinterpret_cast<circle::MulOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_PadOptions:
+    {
+      auto ptr = reinterpret_cast<circle::PadOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GatherOptions:
+    {
+      auto ptr = reinterpret_cast<circle::GatherOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions:
+    {
+      auto ptr = reinterpret_cast<circle::BatchToSpaceNDOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SpaceToBatchNDOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TransposeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::TransposeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReducerOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ReducerOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SubOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SubOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DivOptions:
+    {
+      auto ptr = reinterpret_cast<circle::DivOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SqueezeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SqueezeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SequenceRNNOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SequenceRNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_StridedSliceOptions:
+    {
+      auto ptr = reinterpret_cast<circle::StridedSliceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ExpOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ExpOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TopKV2Options:
+    {
+      auto ptr = reinterpret_cast<circle::TopKV2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SplitOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SplitOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LogSoftmaxOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LogSoftmaxOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CastOptions:
+    {
+      auto ptr = reinterpret_cast<circle::CastOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DequantizeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::DequantizeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MaximumMinimumOptions:
+    {
+      auto ptr = reinterpret_cast<circle::MaximumMinimumOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ArgMaxOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ArgMaxOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LessOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LessOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_NegOptions:
+    {
+      auto ptr = reinterpret_cast<circle::NegOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_PadV2Options:
+    {
+      auto ptr = reinterpret_cast<circle::PadV2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GreaterOptions:
+    {
+      auto ptr = reinterpret_cast<circle::GreaterOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GreaterEqualOptions:
+    {
+      auto ptr = reinterpret_cast<circle::GreaterEqualOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LessEqualOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LessEqualOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SelectOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SelectOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SliceOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SliceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TransposeConvOptions:
+    {
+      auto ptr = reinterpret_cast<circle::TransposeConvOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SparseToDenseOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SparseToDenseOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TileOptions:
+    {
+      auto ptr = reinterpret_cast<circle::TileOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ExpandDimsOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ExpandDimsOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_EqualOptions:
+    {
+      auto ptr = reinterpret_cast<circle::EqualOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_NotEqualOptions:
+    {
+      auto ptr = reinterpret_cast<circle::NotEqualOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ShapeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ShapeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_PowOptions:
+    {
+      auto ptr = reinterpret_cast<circle::PowOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ArgMinOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ArgMinOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FakeQuantOptions:
+    {
+      auto ptr = reinterpret_cast<circle::FakeQuantOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_PackOptions:
+    {
+      auto ptr = reinterpret_cast<circle::PackOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LogicalOrOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LogicalOrOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_OneHotOptions:
+    {
+      auto ptr = reinterpret_cast<circle::OneHotOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LogicalAndOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LogicalAndOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LogicalNotOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LogicalNotOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UnpackOptions:
+    {
+      auto ptr = reinterpret_cast<circle::UnpackOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FloorDivOptions:
+    {
+      auto ptr = reinterpret_cast<circle::FloorDivOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SquareOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SquareOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ZerosLikeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ZerosLikeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FillOptions:
+    {
+      auto ptr = reinterpret_cast<circle::FillOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions:
+    {
+      auto ptr = reinterpret_cast<circle::BidirectionalSequenceLSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions:
+    {
+      auto ptr = reinterpret_cast<circle::BidirectionalSequenceRNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
+    {
+      auto ptr = reinterpret_cast<circle::UnidirectionalSequenceLSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FloorModOptions:
+    {
+      auto ptr = reinterpret_cast<circle::FloorModOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RangeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::RangeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ResizeNearestNeighborOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LeakyReluOptions:
+    {
+      auto ptr = reinterpret_cast<circle::LeakyReluOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SquaredDifferenceOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SquaredDifferenceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MirrorPadOptions:
+    {
+      auto ptr = reinterpret_cast<circle::MirrorPadOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AbsOptions:
+    {
+      auto ptr = reinterpret_cast<circle::AbsOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SplitVOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SplitVOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UniqueOptions:
+    {
+      auto ptr = reinterpret_cast<circle::UniqueOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReverseV2Options:
+    {
+      auto ptr = reinterpret_cast<circle::ReverseV2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AddNOptions:
+    {
+      auto ptr = reinterpret_cast<circle::AddNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GatherNdOptions:
+    {
+      auto ptr = reinterpret_cast<circle::GatherNdOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CosOptions:
+    {
+      auto ptr = reinterpret_cast<circle::CosOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_WhereOptions:
+    {
+      auto ptr = reinterpret_cast<circle::WhereOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RankOptions:
+    {
+      auto ptr = reinterpret_cast<circle::RankOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReverseSequenceOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ReverseSequenceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MatrixDiagOptions:
+    {
+      auto ptr = reinterpret_cast<circle::MatrixDiagOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_QuantizeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::QuantizeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MatrixSetDiagOptions:
+    {
+      auto ptr = reinterpret_cast<circle::MatrixSetDiagOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HardSwishOptions:
+    {
+      auto ptr = reinterpret_cast<circle::HardSwishOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_IfOptions:
+    {
+      auto ptr = reinterpret_cast<circle::IfOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_WhileOptions:
+    {
+      auto ptr = reinterpret_cast<circle::WhileOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DepthToSpaceOptions:
+    {
+      auto ptr = reinterpret_cast<circle::DepthToSpaceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options:
+    {
+      auto ptr = reinterpret_cast<circle::NonMaxSuppressionV4OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options:
+    {
+      auto ptr = reinterpret_cast<circle::NonMaxSuppressionV5OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ScatterNdOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ScatterNdOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SelectV2Options:
+    {
+      auto ptr = reinterpret_cast<circle::SelectV2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DensifyOptions:
+    {
+      auto ptr = reinterpret_cast<circle::DensifyOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SegmentSumOptions:
+    {
+      auto ptr = reinterpret_cast<circle::SegmentSumOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BatchMatMulOptions:
+    {
+      auto ptr = reinterpret_cast<circle::BatchMatMulOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CumsumOptions:
+    {
+      auto ptr = reinterpret_cast<circle::CumsumOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      auto ptr = reinterpret_cast<circle::CallOnceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      auto ptr = reinterpret_cast<circle::BroadcastToOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      auto ptr = reinterpret_cast<circle::Rfft2dOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      auto ptr = reinterpret_cast<circle::Conv3DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      auto ptr = reinterpret_cast<circle::HashtableOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      auto ptr = reinterpret_cast<circle::HashtableFindOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      auto ptr = reinterpret_cast<circle::HashtableImportOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      auto ptr = reinterpret_cast<circle::HashtableSizeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      auto ptr = reinterpret_cast<circle::VarHandleOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      auto ptr = reinterpret_cast<circle::ReadVariableOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      auto ptr = reinterpret_cast<circle::AssignVariableOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      auto ptr = reinterpret_cast<circle::RandomOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BCQGatherOptions:
+    {
+      auto ptr = reinterpret_cast<circle::BCQGatherOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BCQFullyConnectedOptions:
+    {
+      auto ptr = reinterpret_cast<circle::BCQFullyConnectedOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_InstanceNormOptions:
+    {
+      auto ptr = reinterpret_cast<circle::InstanceNormOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    default:
+      break;
+  }
+  value = nullptr;
+  type = BuiltinOptions_NONE;
+}
+
+inline const circle::Model *GetModel(const void *buf)
+{
+  return flatbuffers::GetRoot<circle::Model>(buf);
+}
+
+inline const circle::Model *GetSizePrefixedModel(const void *buf)
+{
+  return flatbuffers::GetSizePrefixedRoot<circle::Model>(buf);
+}
+
+inline const char *ModelIdentifier() { return "CIR0"; }
+
+inline bool ModelBufferHasIdentifier(const void *buf)
+{
+  return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier());
+}
+
+inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier)
+{
+  return verifier.VerifyBuffer<circle::Model>(ModelIdentifier());
+}
+
+inline bool VerifySizePrefixedModelBuffer(flatbuffers::Verifier &verifier)
+{
+  return verifier.VerifySizePrefixedBuffer<circle::Model>(ModelIdentifier());
+}
+
+inline const char *ModelExtension() { return "circle"; }
+
+inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
+                              flatbuffers::Offset<circle::Model> root)
+{
+  fbb.Finish(root, ModelIdentifier());
+}
+
+inline void FinishSizePrefixedModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
+                                          flatbuffers::Offset<circle::Model> root)
+{
+  fbb.FinishSizePrefixed(root, ModelIdentifier());
+}
+
+inline std::unique_ptr<circle::ModelT>
+UnPackModel(const void *buf, const flatbuffers::resolver_function_t *res = nullptr)
+{
+  return std::unique_ptr<circle::ModelT>(GetModel(buf)->UnPack(res));
+}
+
+inline std::unique_ptr<circle::ModelT>
+UnPackSizePrefixedModel(const void *buf, const flatbuffers::resolver_function_t *res = nullptr)
+{
+  return std::unique_ptr<circle::ModelT>(GetSizePrefixedModel(buf)->UnPack(res));
+}
+
+} // namespace circle
+
+#endif // FLATBUFFERS_GENERATED_SCHEMA_CIRCLE_H_
diff --git a/onert-micro/helpers/GenerateKernelsListHelper.cpp b/onert-micro/helpers/GenerateKernelsListHelper.cpp
new file mode 100644
index 000000000..68b194f39
--- /dev/null
+++ b/onert-micro/helpers/GenerateKernelsListHelper.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <iostream>
+#include <fstream>
+#include <set>
+
+std::string get_register_kernel_str(const circle::BuiltinOperator builtin_operator)
+{
+  switch (builtin_operator)
+  {
+    case circle::BuiltinOperator_ADD:
+      return "REGISTER_KERNEL(ADD, Add)";
+    case circle::BuiltinOperator_ARG_MAX:
+      return "REGISTER_KERNEL(ARG_MAX, ArgMax)";
+    case circle::BuiltinOperator_AVERAGE_POOL_2D:
+      return "REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)";
+    case circle::BuiltinOperator_BATCH_TO_SPACE_ND:
+      return "REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)";
+    case circle::BuiltinOperator_CAST:
+      return "REGISTER_KERNEL(CAST, Cast)";
+    case circle::BuiltinOperator_CONCATENATION:
+      return "REGISTER_KERNEL(CONCATENATION, Concatenation)";
+    case circle::BuiltinOperator_CONV_2D:
+      return "REGISTER_KERNEL(CONV_2D, Conv2D)";
+    case circle::BuiltinOperator_DEPTH_TO_SPACE:
+      return "REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)";
+    case circle::BuiltinOperator_DEPTHWISE_CONV_2D:
+      return "REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)";
+    case circle::BuiltinOperator_DEQUANTIZE:
+      return "REGISTER_KERNEL(DEQUANTIZE, Dequantize)";
+    case circle::BuiltinOperator_DIV:
+      return "REGISTER_KERNEL(DIV, Div)";
+    case circle::BuiltinOperator_ELU:
+      return "REGISTER_KERNEL(ELU, Elu)";
+    case circle::BuiltinOperator_EXP:
+      return "REGISTER_KERNEL(EXP, Exp)";
+    case circle::BuiltinOperator_EXPAND_DIMS:
+      return "REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)";
+    case circle::BuiltinOperator_FILL:
+      return "REGISTER_KERNEL(FILL, Fill)";
+    case circle::BuiltinOperator_FLOOR:
+      return "REGISTER_KERNEL(FLOOR, Floor)";
+    case circle::BuiltinOperator_FLOOR_DIV:
+      return "REGISTER_KERNEL(FLOOR_DIV, FloorDiv)";
+    case circle::BuiltinOperator_EQUAL:
+      return "REGISTER_KERNEL(EQUAL, Equal)";
+    case circle::BuiltinOperator_FULLY_CONNECTED:
+      return "REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)";
+    case circle::BuiltinOperator_GREATER:
+      return "REGISTER_KERNEL(GREATER, Greater)";
+    case circle::BuiltinOperator_GREATER_EQUAL:
+      return "REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)";
+    case circle::BuiltinOperator_INSTANCE_NORM:
+      return "REGISTER_KERNEL(INSTANCE_NORM, InstanceNorm)";
+    case circle::BuiltinOperator_L2_NORMALIZATION:
+      return "REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)";
+    case circle::BuiltinOperator_L2_POOL_2D:
+      return "REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)";
+    case circle::BuiltinOperator_LEAKY_RELU:
+      return "REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)";
+    case circle::BuiltinOperator_LESS:
+      return "REGISTER_KERNEL(LESS, Less)";
+    case circle::BuiltinOperator_LESS_EQUAL:
+      return "REGISTER_KERNEL(LESS_EQUAL, LessEqual)";
+    case circle::BuiltinOperator_LOGICAL_AND:
+      return "REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)";
+    case circle::BuiltinOperator_LOGICAL_NOT:
+      return "REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)";
+    case circle::BuiltinOperator_LOGICAL_OR:
+      return "REGISTER_KERNEL(LOGICAL_OR, LogicalOr)";
+    case circle::BuiltinOperator_LOGISTIC:
+      return "REGISTER_KERNEL(LOGISTIC, Logistic)";
+    case circle::BuiltinOperator_GATHER:
+      return "REGISTER_KERNEL(GATHER, Gather)";
+    case circle::BuiltinOperator_MAXIMUM:
+      return "REGISTER_KERNEL(MAXIMUM, Maximum)";
+    case circle::BuiltinOperator_MAX_POOL_2D:
+      return "REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)";
+    case circle::BuiltinOperator_MINIMUM:
+      return "REGISTER_KERNEL(MINIMUM, Minimum)";
+    case circle::BuiltinOperator_MIRROR_PAD:
+      return "REGISTER_KERNEL(MIRROR_PAD, MirrorPad)";
+    case circle::BuiltinOperator_MUL:
+      return "REGISTER_KERNEL(MUL, Mul)";
+    case circle::BuiltinOperator_NEG:
+      return "REGISTER_KERNEL(NEG, Neg)";
+    case circle::BuiltinOperator_NOT_EQUAL:
+      return "REGISTER_KERNEL(NOT_EQUAL, NotEqual)";
+    case circle::BuiltinOperator_PAD:
+      return "REGISTER_KERNEL(PAD, Pad)";
+    case circle::BuiltinOperator_PADV2:
+      return "REGISTER_KERNEL(PADV2, PadV2)";
+    case circle::BuiltinOperator_PACK:
+      return "REGISTER_KERNEL(PACK, Pack)";
+    case circle::BuiltinOperator_PRELU:
+      return "REGISTER_KERNEL(PRELU, PRelu)";
+    case circle::BuiltinOperator_QUANTIZE:
+      return "REGISTER_KERNEL(QUANTIZE, Quantize)";
+    case circle::BuiltinOperator_REDUCE_PROD:
+      return "REGISTER_KERNEL(REDUCE_PROD, ReduceCommon)";
+    case circle::BuiltinOperator_RESHAPE:
+      return "REGISTER_KERNEL(RESHAPE, Reshape)";
+    case circle::BuiltinOperator_RESIZE_BILINEAR:
+      return "REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)";
+    case circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+      return "REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)";
+    case circle::BuiltinOperator_RSQRT:
+      return "REGISTER_KERNEL(RSQRT, Rsqrt)";
+    case circle::BuiltinOperator_SHAPE:
+      return "REGISTER_KERNEL(SHAPE, Shape)";
+    case circle::BuiltinOperator_SOFTMAX:
+      return "REGISTER_KERNEL(SOFTMAX, Softmax)";
+    case circle::BuiltinOperator_SPACE_TO_BATCH_ND:
+      return "REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)";
+    case circle::BuiltinOperator_SPACE_TO_DEPTH:
+      return "REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)";
+    case circle::BuiltinOperator_SLICE:
+      return "REGISTER_KERNEL(SLICE, Slice)";
+    case circle::BuiltinOperator_STRIDED_SLICE:
+      return "REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)";
+    case circle::BuiltinOperator_SQRT:
+      return "REGISTER_KERNEL(SQRT, Sqrt)";
+    case circle::BuiltinOperator_SQUARE:
+      return "REGISTER_KERNEL(SQUARE, Square)";
+    case circle::BuiltinOperator_SQUARED_DIFFERENCE:
+      return "REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)";
+    case circle::BuiltinOperator_SQUEEZE:
+      return "REGISTER_KERNEL(SQUEEZE, Squeeze)";
+    case circle::BuiltinOperator_SUB:
+      return "REGISTER_KERNEL(SUB, Sub)";
+    case circle::BuiltinOperator_SVDF:
+      return "REGISTER_KERNEL(SVDF, SVDF)";
+    case circle::BuiltinOperator_SPLIT:
+      return "REGISTER_KERNEL(SPLIT, Split)";
+    case circle::BuiltinOperator_SPLIT_V:
+      return "REGISTER_KERNEL(SPLIT_V, SplitV)";
+    case circle::BuiltinOperator_TANH:
+      return "REGISTER_KERNEL(TANH, Tanh)";
+    case circle::BuiltinOperator_TRANSPOSE:
+      return "REGISTER_KERNEL(TRANSPOSE, Transpose)";
+    case circle::BuiltinOperator_TRANSPOSE_CONV:
+      return "REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)";
+    case circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
+      return "REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)";
+    case circle::BuiltinOperator_WHILE:
+      return "REGISTER_KERNEL(WHILE, While)";
+    default:
+      assert(false && "Not supported kernel");
+  }
+}
+
+std::vector<char> loadFile(const std::string &path)
+{
+  std::ifstream file(path, std::ios::binary | std::ios::in);
+  if (!file.good())
+  {
+    assert(false && "Failed to open file");
+  }
+
+  file.unsetf(std::ios::skipws);
+
+  file.seekg(0, std::ios::end);
+  auto fileSize = file.tellg();
+  file.seekg(0, std::ios::beg);
+
+  // reserve capacity
+  std::vector<char> data(fileSize);
+
+  // read the data
+  file.read(data.data(), fileSize);
+  if (file.fail())
+  {
+    assert(false && "Failed to read file");
+  }
+
+  return data;
+}
+
+// Parse model and write to std::ofstream &os models operations
+void run(std::ofstream &os, const circle::Model *model)
+{
+  luci_interpreter::CircleReader reader;
+  reader.parse(model);
+  const uint32_t subgraph_size = reader.num_subgraph();
+
+  // Set to avoid duplication in generated list
+  std::set<circle::BuiltinOperator> operations_set;
+
+  for (uint32_t g = 0; g < subgraph_size; g++)
+  {
+    reader.select_subgraph(g);
+    auto ops = reader.operators();
+    for (uint32_t i = 0; i < ops.size(); ++i)
+    {
+      const auto op = ops.at(i);
+      auto op_builtin_operator = reader.builtin_code(op);
+
+      auto result = operations_set.insert(op_builtin_operator);
+      if (result.second)
+      {
+        os << get_register_kernel_str(op_builtin_operator) << std::endl;
+      }
+    }
+  }
+}
+
+int main(int argc, char **argv)
+{
+  if (argc != 3)
+  {
+    assert(false && "Should be 2 arguments: circle model path, and path for generated model\n");
+  }
+
+  std::string model_file(argv[1]);
+  std::string generated_file_path(argv[2]);
+
+  std::vector<char> model_data = loadFile(model_file);
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << model_file << "'" << std::endl;
+    return 255;
+  }
+
+  // Open or create file
+  std::ofstream out;
+  out.open(generated_file_path);
+
+  if (out.is_open())
+    run(out, circle_model);
+  else
+    std::cout << "SMTH GOES WRONG WHILE OPEN FILE" << std::endl;
+  return 0;
+}
diff --git a/onert-micro/luci-interpreter/CMakeLists.txt b/onert-micro/luci-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..1bdfa493f
--- /dev/null
+++ b/onert-micro/luci-interpreter/CMakeLists.txt
@@ -0,0 +1,37 @@
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
+set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+set(LUCI_INTERPRETER_PAL_COMMON_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/common")
+if (NOT LUCI_INTERPRETER_PAL_DIR)
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/mcu")
+endif()
+
+if (NOT LUCI_INTERPRETER_KERNELS_BUILD_LIST)
+    set(KERNEL_REGISTER_FILE "${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst")
+else()
+    set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_KERNELS_BUILD_LIST})
+endif()
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+    set(LUCI_INTERPRETER_SUFFIX "")
+else()
+    set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
+
+if (DIS_QUANT)
+    add_definitions(-DDIS_QUANT)
+endif()
+
+if (DIS_FLOAT)
+    add_definitions(-DDIS_FLOAT)
+endif()
+
+add_compile_options(-fno-exceptions)
+add_compile_options(-Os)
+
+# AFAIK, this will enable leak sanitizer, too
+if(ENABLE_SANITIZER)
+    add_compile_options(-fsanitize=address)
+    add_link_options(-fsanitize=address)
+endif(ENABLE_SANITIZER)
+
+add_subdirectory(src)
diff --git a/onert-micro/luci-interpreter/README.md b/onert-micro/luci-interpreter/README.md
new file mode 100644
index 000000000..77ec5c81c
--- /dev/null
+++ b/onert-micro/luci-interpreter/README.md
@@ -0,0 +1,158 @@
+# luci-interpreter
+
+`luci-interpreter` is an inference engine for neural networks represented in luci IR.
+See `compiler/luci/lang` directory for details about IR.
+You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`.
+
+`luci-interpreter` provides:
+- Basic inference functionality, input setters and output getters
+- Interface for inspecting hidden interpreter state, like activation values during inference
+- Customization mechanisms to fit the interpreter to specific platforms, like MCUs
+
+Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory
+
+## Basic usage
+
+Minimal usage includes:
+- Setting input data
+- Running inference
+- Fetching inference results
+
+Interpreter object is reusable and can run multiple inferences.
+Elements in tensors (input/output/internal) are stored contiguously and have C-like layout:
+This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1.
+
+Input and output tensors have the same indexes as in original luci model. 
+
+**Usage example:**
+``` c++
+// Note getTensorSize is a function that computes tensor size,
+// it is not part of interpreter and should be implemented by user 
+
+luci_interpreter::Interpreter interpreter(luci_module);
+
+// Set inputs
+// assuming model has only one input and one output
+const auto input_nodes = loco::input_nodes(module->graph());
+
+const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]);
+std::vector<char> input_data(getTensorSize(input_node));
+// Initialize input data here
+
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+// Start inference
+interpreter.interpret();
+
+// Fetch inference results
+const auto output_nodes = loco::output_nodes(module->graph());
+const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
+std::vector<char> output_data(getTensorSize(output_node));
+interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+```
+
+## Inspecting intermediate state
+
+Interpreter provides interfaces to investigate internal state of interpreter during inference.
+
+This is done by "observer" mechanism:
+- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object
+- `ExecutionObserver` defines several callback methods user can override to inject custom code
+
+ExecutionObserver provides three callbacks:
+- `postTensorWrite` checks contents of output tensor after operation execution
+- `preOperatorExecute` notifies that interpreter is going to execute operation
+- `postOperatorExecute` notifies that interpreter has finished execution of an operation
+
+See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details.
+
+**Usage example:**
+``` c++
+class CustomExecutionObserver: public luci_interpreter::ExecutionObserver
+{
+public:
+  void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override
+  {
+    if (tensor->element_type() != loco::DataType::FLOAT32)
+      return;
+    for (int i = 0; i < tensor->shape().num_elements(); ++i)
+      std::cout << tensor->data<float>[i] << ", ";
+  }
+
+  // User observer can override only needed methods,
+  // others will inherit empty implementation from base observer.
+
+  // void preOperatorExecute(const luci::CircleNode *node);
+  // void postOperatorExecute(const luci::CircleNode *node);
+};
+
+luci_interpreter::Interpreter interpreter(module);
+CustomExecutionObserver observer;
+interpreter.attachObserver(&observer);
+
+// initialize input_data
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+interpreter.interpret();
+```
+
+## Customizing inference
+
+### Memory manager
+
+Interpreter provides a handle for altering default memory management mechanisms.
+
+This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details.
+
+This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory.
+
+User can construct an interpreter with one of predefined memory managers or their own custom memory manager.
+Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object. 
+
+List of predefined memory managers:
+- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests.
+- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
+- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
+
+**SimpleMemoryManager usage example:**
+
+No need to select anything, to use this memory manager.
+``` c++
+luci_interpreter::Interpreter interpreter(module);
+```
+
+**TestMemoryManager usage example:**
+
+``` c++
+luci_interpreter::TestMemoryManager mm;
+luci_interpreter::Interpreter interpreter(module, &mm);
+```
+
+**BuddyMemoryManager usage example:**
+
+`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation.
+
+This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations.
+
+Limitations
+- Current implementation uses only lower power-of-two bytes of given buffer.
+
+  For example for 1000 bytes buffer, only lower 512 bytes will be used.
+- Current implementation can handle maximum 4 gigabyte memory pool
+
+``` c++
+  constexpr int buffer_size = 2048;
+  static uint8_t buffer[buffer_size];
+  luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size);
+  luci_interpreter::Interpreter interpreter(module.get(), &memory_manager);
+```
+
+**StaticMemoryManager usage example:**
+``` c++
+TBD when it is merged
+```
+
+## Further reading
+
+If you want to participate in development, please read `DEVELOPER.md` for SW architecture details.
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/Interpreter.h b/onert-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
new file mode 100644
index 000000000..0d294753d
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_INTERPRETER_H
+#define LUCI_INTERPRETER_INTERPRETER_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#ifdef USE_STATIC_ALLOC
+#include "luci_interpreter/InterpreterConfigure.h"
+#include "memory_managers/StaticMemoryManager.h"
+#else
+#include "memory_managers/SimpleMemoryManager.h"
+#endif // USE_STATIC_ALLOC
+
+#include "loader/ModuleLoader.h"
+#include <memory>
+
+namespace luci_interpreter
+{
+
+class Interpreter
+{
+public:
+  // Construct default interpreter with dynamic allocations and with input allocations
+  explicit Interpreter(const char *model_data_raw, bool dealloc_input);
+
+#ifdef USE_STATIC_ALLOC
+  // Construct interpreter with configurations
+  explicit Interpreter(const char *model_data_raw, const InterpreterConfigure &configuration);
+#endif // USE_STATIC_ALLOC
+
+  ~Interpreter();
+
+  void allocateAndWriteInputTensor(int32_t input_tensor_index, const void *data, size_t data_size);
+  uint8_t *allocateInputTensor(int32_t input_tensor_index);
+
+  uint8_t *readOutputTensor(int32_t output_tensor_index);
+
+  int32_t getInputDataSizeByIndex(int32_t input_tensor_index);
+  int32_t getOutputDataSizeByIndex(int32_t output_tensor_index);
+
+  void interpret();
+
+private:
+  // _default_memory_manager should be before _runtime_module due to
+  // the order of deletion in the destructor
+  MemoryManager _memory_manager{};
+  RuntimeModule _runtime_module{};
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_INTERPRETER_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/InterpreterConfigure.h b/onert-micro/luci-interpreter/include/luci_interpreter/InterpreterConfigure.h
new file mode 100644
index 000000000..5619c93f0
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/InterpreterConfigure.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_INTERPRETER_CONFIGURE_H
+#define ONERT_MICRO_INTERPRETER_CONFIGURE_H
+
+namespace luci_interpreter
+{
+#ifdef USE_STATIC_ALLOC
+
+enum MemoryManagerType
+{
+  STATIC,
+  DYNAMIC
+};
+
+class InterpreterConfigure
+{
+public:
+  void setAllocateInputValue(bool allocate_input) { _allocate_input = allocate_input; }
+  bool getAllocateInputValue() const { return _allocate_input; }
+
+  InterpreterConfigure &setMemoryManager(MemoryManagerType mm_type)
+  {
+    switch (mm_type)
+    {
+      case MemoryManagerType::STATIC:
+        _use_static_manager = true;
+        break;
+      case MemoryManagerType::DYNAMIC:
+        _use_static_manager = false;
+        break;
+      default:
+        assert(false);
+    }
+    return *this;
+  }
+
+  // TODO: remove this method
+  InterpreterConfigure &configStaticMemoryManager(uint32_t input_buf_size, uint32_t temp_buf_size,
+                                                  uint32_t output_buf_size)
+  {
+    assert(_use_static_manager);
+    _input_buf_size = input_buf_size;
+    _temp_buf_size = temp_buf_size;
+    _output_buf_size = output_buf_size;
+    return *this;
+  }
+
+  bool isStaticManager() const { return _use_static_manager; }
+
+private:
+  bool _use_static_manager = false;
+  bool _allocate_input = true;
+
+public:
+  // TODO: remove it and read these values from circle file
+  uint32_t _input_buf_size = 0;
+  uint32_t _temp_buf_size = 0;
+  uint32_t _output_buf_size = 0;
+};
+
+#endif
+
+} // namespace luci_interpreter
+
+#endif // ONERT_MICRO_INTERPRETER_CONFIGURE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/TrainingSettings.h b/onert-micro/luci-interpreter/include/luci_interpreter/TrainingSettings.h
new file mode 100644
index 000000000..40eae7ea5
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/TrainingSettings.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef ENABLE_TRAINING
+
+#ifndef LUCI_INTERPRETER_TRAINING_SETTINGS_H
+#define LUCI_INTERPRETER_TRAINING_SETTINGS_H
+
+#include <stdint.h>
+
+namespace luci_interpreter
+{
+
+namespace training
+{
+
+enum Status
+{
+  Ok,
+  Error,
+  EnableTrainModeError,
+  DoubleTrainModeError
+};
+
+enum MetricsTypeEnum
+{
+  MSE,
+  MAE
+};
+
+enum LossTypeEnum
+{
+  MSE_Loss
+};
+
+enum OptimizerTypeEnum
+{
+  SGD
+};
+
+struct TrainingSettings
+{
+  MetricsTypeEnum metric = MSE;
+  LossTypeEnum error_type = MSE_Loss;
+  OptimizerTypeEnum optimizer_type = SGD;
+  uint32_t number_of_epochs = 1;
+  uint32_t batch_size = 1;
+  float learning_rate = 0.00001;
+  uint32_t number_of_last_trainable_layers = 1;
+};
+
+} // namespace training
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TRAINING_SETTINGS_H
+
+#endif // ENABLE_TRAINING
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/DataType.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
new file mode 100644
index 000000000..e34d22485
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
+#define LUCI_INTERPRETER_CORE_DATATYPE_H
+
+#include <cstdint>
+#include <cstddef>
+#include <string>
+#include <cassert>
+
+namespace luci_interpreter
+{
+// TODO check can we remove it
+/**
+ * @brief "scalar" value type
+ */
+enum class DataType
+{
+  Unknown, // Unknown type (serves as a default value)
+
+  U8,  // 8-bit unsigned integer
+  U16, // 16-bit unsigned integer
+  U32, // 32-bit unsigned integer
+  U64, // 64-bit unsigned integer
+
+  S8,  // 8-bit signed integer
+  S16, // 16-bit signed integer
+  S32, // 32-bit signed integer
+  S64, // 64-bit signed integer
+
+  FLOAT16, // IEEE 16-bit floating-point
+  FLOAT32, // IEEE 32-bit floating-point
+  FLOAT64, // IEEE 64-bit floating-point
+
+  // WARNING the size of Bool may vary for NN frameworks
+  // TODO we need to find a way to resolve this issue
+  BOOL, // Boolean
+};
+
+/**
+ * @brief C++ scalar type corresponding to each DataType
+ */
+template <DataType DT> struct DataTypeImpl
+{
+  // using Type = ...
+};
+
+// TODO Support other enum values
+template <> struct DataTypeImpl<DataType::S8>
+{
+  // Use C++ int8_t type for 8bit integer
+  using Type = int8_t;
+};
+
+template <> struct DataTypeImpl<DataType::U8>
+{
+  // Use C++ uint8_t type for unsigned 8bit integer
+  using Type = uint8_t;
+};
+
+template <> struct DataTypeImpl<DataType::S16>
+{
+  // Use C++ int16_t type for 16bit integer
+  using Type = int16_t;
+};
+
+template <> struct DataTypeImpl<DataType::U16>
+{
+  // Use C++ uint16_t type for unsigned 16bit integer
+  using Type = uint16_t;
+};
+
+template <> struct DataTypeImpl<DataType::S32>
+{
+  // Use C++ int32_t type for 32bit integer
+  using Type = int32_t;
+};
+
+template <> struct DataTypeImpl<DataType::U32>
+{
+  // Use C++ uint32_t type for unsigned 32bit integer
+  using Type = uint32_t;
+};
+
+template <> struct DataTypeImpl<DataType::S64>
+{
+  // Use C++ int64_t type for 64bit integer
+  using Type = int64_t;
+};
+
+template <> struct DataTypeImpl<DataType::U64>
+{
+  // Use C++ uint64_t type for unsigned 64bit integer
+  using Type = uint64_t;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT16>
+{
+  // float16 type with 16bit value, encoded with help of FP16 library
+  // https://github.com/Maratyszcza/FP16/
+  using Type = uint16_t;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT32>
+{
+  // Use C++ float type for IEEE 32-bit floating-point numbers
+  using Type = float;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT64>
+{
+  // Use C++ double type for IEEE 64-bit floating-point numbers
+  using Type = double;
+};
+
+// NOTE DataTypeImpl for BOOL is subject to change
+template <> struct DataTypeImpl<DataType::BOOL>
+{
+  // Use C++ uint8_t type for bool
+  using Type = uint8_t;
+};
+
+/**
+ * @brief Returns the size of the data type.
+ * @note If you need the size at compile time, use `sizeof(typename DataTypeImpl<DT>::Type)`.
+ */
+inline uint32_t size(DataType data_type)
+{
+  switch (data_type)
+  {
+    case DataType::S8:
+      return sizeof(DataTypeImpl<DataType::S8>::Type);
+    case DataType::U8:
+      return sizeof(DataTypeImpl<DataType::U8>::Type);
+    case DataType::S16:
+      return sizeof(DataTypeImpl<DataType::S16>::Type);
+    case DataType::U16:
+      return sizeof(DataTypeImpl<DataType::U16>::Type);
+    case DataType::S32:
+      return sizeof(DataTypeImpl<DataType::S32>::Type);
+    case DataType::U32:
+      return sizeof(DataTypeImpl<DataType::U32>::Type);
+    case DataType::S64:
+      return sizeof(DataTypeImpl<DataType::S64>::Type);
+    case DataType::U64:
+      return sizeof(DataTypeImpl<DataType::U64>::Type);
+    case DataType::FLOAT16:
+      return sizeof(DataTypeImpl<DataType::FLOAT16>::Type);
+    case DataType::FLOAT32:
+      return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
+    case DataType::FLOAT64:
+      return sizeof(DataTypeImpl<DataType::FLOAT64>::Type);
+    case DataType::BOOL:
+      return sizeof(DataTypeImpl<DataType::BOOL>::Type);
+    default:
+      // TODO Support remaining data types.
+      assert(false);
+      return UINT32_MAX; // Avoid compiler warning.
+  }
+}
+
+inline size_t getDataTypeSize(DataType data_type) { return size(data_type); }
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/ParamsType.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/ParamsType.h
new file mode 100644
index 000000000..af0687f60
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/core/ParamsType.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
+#define __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
+
+#include <vector>
+#include <cstdint>
+#include <utility>
+
+namespace luci_interpreter
+{
+// TODO check can we remove it
+enum class FusedActFunc
+{
+  UNDEFINED, // This is not defined by TFLite or Circle. This was added to
+  // prevent programming error.
+  NONE,
+  RELU,
+  RELU_N1_TO_1,
+  RELU6,
+  TANH,
+  SIGN_BIT
+};
+
+enum class Padding
+{
+  UNDEFINED, // This is not defined by TFLite. This was added to prevent programming error.
+
+  SAME,
+  VALID,
+};
+
+enum class MirrorPadMode
+{
+  UNDEFINED, // This is not defined by Circle. This was added to prevent programming error.
+
+  REFLECT,
+  SYMMETRIC,
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
new file mode 100644
index 000000000..37c9ed9b4
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
+#define LUCI_INTERPRETER_CORE_TENSOR_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+static constexpr int kMaxSmallSize = 5;
+
+class RuntimeShape
+{
+public:
+  RuntimeShape(const RuntimeShape &other) : _size(other.dimensionsCount())
+  {
+    std::memcpy(dimsData(), other.dimsData(), sizeof(int32_t) * _size);
+  }
+
+  // Returns the total count of elements, that is the size when flattened into a
+  // vector.
+  inline int flatSize() const
+  {
+    int buffer_size = 1;
+    const int *dims_data = reinterpret_cast<const int *>(dimsData());
+    for (int i = 0; i < _size; i++)
+    {
+      buffer_size *= dims_data[i];
+    }
+    return buffer_size;
+  }
+
+  inline int32_t *dimsData() { return _dims; }
+  inline const int32_t *dimsData() const { return _dims; }
+
+  RuntimeShape() : _size(0) {}
+
+  explicit RuntimeShape(int dimensions_count) : _size(dimensions_count)
+  {
+    assert(dimensions_count <= kMaxSmallSize);
+    assert(dimensions_count >= 0);
+  }
+
+  RuntimeShape(int dimensions_count, const int32_t *dims_data) : _size(0)
+  {
+    resize(dimensions_count);
+    int32_t *dst_dims = dimsData();
+    std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
+  }
+
+  RuntimeShape(int new_shape_size, const RuntimeShape &shape, int pad_value) : _size(0)
+  {
+    resize(new_shape_size);
+    const int size_increase = new_shape_size - shape.dimensionsCount();
+    for (int i = 0; i < size_increase; ++i)
+    {
+      setDim(i, pad_value);
+    }
+    std::memcpy(dimsData() + size_increase, shape.dimsData(),
+                sizeof(int32_t) * shape.dimensionsCount());
+  }
+
+  RuntimeShape(int shape_size, int32_t value) : _size(0)
+  {
+    resize(shape_size);
+    for (int i = 0; i < shape_size; ++i)
+    {
+      setDim(i, value);
+    }
+  }
+
+  inline static RuntimeShape extendedShape(int new_shape_size, const RuntimeShape &shape)
+  {
+    return RuntimeShape(new_shape_size, shape, 1);
+  }
+
+  bool operator==(const RuntimeShape &comp) const
+  {
+    return this->_size == comp._size &&
+           std::memcmp(dimsData(), comp.dimsData(), _size * sizeof(int32_t)) == 0;
+  }
+
+  inline int32_t dimensionsCount() const { return _size; }
+
+  inline int32_t dims(int i) const
+  {
+    assert(i <= _size);
+    assert(i >= 0);
+    return _dims[i];
+  }
+  inline void setDim(int i, int32_t val)
+  {
+    assert(i <= _size);
+    assert(i >= 0);
+    _dims[i] = val;
+  }
+
+  inline void resize(int dimensions_count)
+  {
+    assert(dimensions_count <= kMaxSmallSize);
+    assert(dimensions_count >= 0);
+    _size = dimensions_count;
+  }
+
+private:
+  int32_t _size;
+  int32_t _dims[kMaxSmallSize];
+};
+
+class Tensor
+{
+public:
+#ifndef DIS_QUANT
+  static float scale(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return 0;
+    }
+
+    return *quant_params->scale()->cbegin();
+  }
+
+  static int32_t zero_point(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return 0;
+    }
+
+    return *quant_params->zero_point()->cbegin();
+  }
+
+  static const std::vector<float> scales(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return {};
+    }
+    assert(quant_params->scale() != nullptr);
+    std::vector<float> scales(quant_params->scale()->cbegin(), quant_params->scale()->cend());
+
+    return scales;
+  }
+
+  static const std::vector<int32_t> zero_points(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return {};
+    }
+    assert(quant_params->zero_point() != nullptr);
+    std::vector<int32_t> zero_points(quant_params->zero_point()->cbegin(),
+                                     quant_params->zero_point()->cend());
+
+    return zero_points;
+  }
+
+  static int32_t quantized_dimension(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return 0;
+    }
+    return quant_params->quantized_dimension();
+  }
+#endif
+
+  static bool is_constant_tensor(const luci_interpreter::CircleReader *reader,
+                                 const circle::Tensor *circle_tensor)
+  {
+    return reader->buffers()[circle_tensor->buffer()]->data() != nullptr;
+  }
+
+  static DataType element_type(const circle::Tensor *circle_tensor)
+  {
+    return luci_datatype(circle_tensor->type());
+  }
+
+  static VectorWrapper<int32_t> tensor_shape(const circle::Tensor *circle_tensor)
+  {
+    return wrap(circle_tensor->shape());
+  }
+
+  static int num_dims(const circle::Tensor *circle_tensor)
+  {
+    // TODO check removing of wrap
+    auto const const_dims = wrap(circle_tensor->shape());
+    return const_dims.size();
+  }
+
+  static int32_t dim(const circle::Tensor *circle_tensor, int i)
+  {
+    // TODO check removing of wrap
+    assert(i >= 0);
+    auto const const_dims = wrap(circle_tensor->shape());
+    assert(i < const_dims.size());
+
+    return const_dims[i];
+  }
+
+  static int32_t num_elements(const circle::Tensor *circle_tensor)
+  {
+    int32_t result = 1;
+    auto const const_dims = wrap(circle_tensor->shape());
+    for (const int32_t dim : const_dims)
+    {
+      result *= dim;
+    }
+    return result;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_TENSOR_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReader.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReader.h
new file mode 100644
index 000000000..14021fc9d
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReader.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
+#define __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
+
+#include "luci_interpreter/core/ParamsType.h"
+#include "luci_interpreter/core/DataType.h"
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+namespace
+{
+
+using ExecutionPlanTable = std::map<uint32_t, std::vector<uint32_t>>;
+
+template <typename VECTORTYPE> uint32_t read_u32(const VECTORTYPE &buffer, uint32_t idx)
+{
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+  uint32_t val = 0;
+  val += (buffer.at(idx + 0) << 0 * 8);
+  val += (buffer.at(idx + 1) << 1 * 8);
+  val += (buffer.at(idx + 2) << 2 * 8);
+  val += (buffer.at(idx + 3) << 3 * 8);
+  return val;
+}
+
+} // namespace
+
+namespace read_metadata
+{
+
+template <typename VECTORTYPE>
+ExecutionPlanTable decode_execution_plan(const VECTORTYPE &execution_plan_data)
+{
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+  ExecutionPlanTable execution_plan_table;
+  uint32_t idx = 0;
+
+  if (execution_plan_data.size() < 4)
+    assert(false && "Op table decode error : invalid entry number");
+
+  uint32_t entry_number = read_u32(execution_plan_data, idx);
+  idx += sizeof(uint32_t);
+
+  while (idx < execution_plan_data.size())
+  {
+    if (idx + 2 * sizeof(uint32_t) > execution_plan_data.size())
+      assert(false && "Op table decode error : invalid entry item");
+
+    uint32_t id = read_u32(execution_plan_data, idx);
+    idx += sizeof(uint32_t);
+
+    uint32_t size = read_u32(execution_plan_data, idx);
+
+    if (size == 0)
+      assert(false && "Op table decode error : empty execution plan entry");
+
+    idx += sizeof(uint32_t);
+
+    if (idx + sizeof(uint32_t) * size > execution_plan_data.size())
+      assert(false && "Source table decode error : invalid entry data");
+
+    std::vector<uint32_t> execution_plan_vector;
+    uint32_t position = read_u32(execution_plan_data, idx);
+    idx += sizeof(uint32_t);
+
+    for (uint32_t j = 1; j < size; ++j)
+    {
+      uint32_t execution_plan_inform = read_u32(execution_plan_data, idx);
+      idx += sizeof(uint32_t);
+
+      execution_plan_vector.push_back(execution_plan_inform);
+    }
+
+    if (!execution_plan_table.insert({position, execution_plan_vector}).second)
+      assert(false && "Op table decode error : duplicated origin ID");
+  }
+
+  if (idx != execution_plan_data.size())
+    assert(false && "Op table decode error : data size invalid");
+
+  if (execution_plan_table.size() != entry_number)
+    assert(false && "Op table decode error : entry number invalid");
+
+  return execution_plan_table;
+}
+
+} // namespace read_metadata
+#endif
+
+DataType luci_datatype(circle::TensorType type);
+FusedActFunc luci_actfunc(circle::ActivationFunctionType type);
+Padding luci_padding(circle::Padding padding);
+MirrorPadMode luci_mirrorpad_mode(circle::MirrorPadMode mode);
+
+/**
+ * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity
+ */
+template <typename T> class VectorWrapper
+{
+public:
+  explicit VectorWrapper(const flatbuffers::Vector<T> *ptr);
+
+  const T *data() const;
+  uint32_t size() const;
+
+  using iterator = typename flatbuffers::Vector<T>::const_iterator;
+  iterator begin() const;
+  iterator end() const;
+
+  using value_type = typename flatbuffers::Vector<T>::return_type;
+  value_type at(uint32_t i) const;
+  value_type operator[](uint32_t i) const;
+
+  bool null() const;
+  bool empty() const;
+
+private:
+  const flatbuffers::Vector<T> *_vector;
+};
+
+template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec)
+{
+  return VectorWrapper<T>(vec);
+}
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class CircleReader
+{
+public:
+  using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>;
+  using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>;
+  using CircleOperators = VectorWrapper<flatbuffers::Offset<circle::Operator>>;
+  using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>;
+  using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>;
+
+public:
+  CircleReader() = default;
+
+public: // direct API
+  CircleOperatorCodes opcodes() const { return wrap(_model->operator_codes()); }
+  CircleBuffers buffers() const { return wrap(_model->buffers()); }
+  CircleTensors tensors() const { return wrap(_current_subgraph->tensors()); }
+  CircleOperators operators() const { return wrap(_current_subgraph->operators()); }
+  VectorWrapper<int32_t> inputs() const { return wrap(_current_subgraph->inputs()); }
+  VectorWrapper<int32_t> outputs() const { return wrap(_current_subgraph->outputs()); }
+  circle::DataFormat data_format() const { return _current_subgraph->data_format(); }
+  CircleMetadataSet metadata() const { return wrap(_model->metadata()); }
+
+  uint32_t num_subgraph() const { return wrap(_model->subgraphs()).size(); }
+  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
+
+public:
+  bool parse(const circle::Model *model);
+  bool select_subgraph(uint32_t subgraph);
+  uint32_t get_current_subgraph_index() const { return _current_subgraph_index; }
+
+private:
+  const circle::Model *_model{nullptr};
+  const circle::SubGraph *_current_subgraph{nullptr};
+  uint32_t _current_subgraph_index{0};
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReaderHelper.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReaderHelper.h
new file mode 100644
index 000000000..c1122cf06
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReaderHelper.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
+#define __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <vector>
+
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+  if (flat_array == nullptr)
+  {
+    assert(false && "flat array is nullptr");
+  }
+
+  std::vector<T> ret(flat_array->Length());
+  for (uint32_t i = 0; i < flat_array->Length(); i++)
+  {
+    ret[i] = flat_array->Get(i);
+  }
+  return ret;
+}
+
+} // namespace circle
+
+#endif // __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/onert-micro-version.h b/onert-micro/luci-interpreter/include/luci_interpreter/onert-micro-version.h
new file mode 100644
index 000000000..5a3839693
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/onert-micro-version.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_MICRO_VERSION_H__
+#define __ONERT_MICRO_VERSION_H__
+
+/**
+ * ONERT_MICRO_VERSION is a uint32 value representing onert-micro version
+ * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
+ */
+#define ONERT_MICRO_VERSION 0x01000000
+
+#endif // __ONERT_MICRO_VERSION_H__
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/TestDataBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/TestDataBase.h
new file mode 100644
index 000000000..9cd21485c
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/TestDataBase.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_TEST_DATA_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_TEST_DATA_BASE_H
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename U = T> class TestDataBase
+{
+public:
+  virtual ~TestDataBase() = default;
+
+  virtual const unsigned char *get_model_ptr() = 0;
+
+  virtual const std::vector<T> &get_input_data_by_index(int i) = 0;
+  virtual const std::vector<U> &get_output_data_by_index(int i) = 0;
+};
+
+class NegTestDataBase
+{
+public:
+  virtual ~NegTestDataBase() = default;
+  virtual const unsigned char *get_model_ptr() = 0;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_TEST_DATA_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/FloatAbsKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/FloatAbsKernel.h
new file mode 100644
index 000000000..a7e8ccc79
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/FloatAbsKernel.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_ABS_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_ABS_KERNEL_H
+
+#include "TestDataAbsBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace abs_float
+{
+/*
+ * Abs Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *           Abs
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x4e, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {4.5279765,  -6.575015,  -10.009525, 3.1054533,   -0.49870253,
+                                       -9.601274,  8.061923,   8.651763,   5.580226,    -22.497627,
+                                       -5.331085,  -0.5524021, -11.368782, -0.61816937, 11.072669,
+                                       -10.092069, 7.357945,   8.606385};
+
+const std::vector<float> reference_output_data = {
+  4.5279765, 6.575015, 10.009525, 3.1054533, 0.49870253, 9.601274,  8.061923,  8.651763, 5.580226,
+  22.497627, 5.331085, 0.5524021, 11.368782, 0.61816937, 11.072669, 10.092069, 7.357945, 8.606385};
+
+} // namespace abs_float
+
+class TestDataFloatAbs : public TestDataAbsBase<float>
+{
+public:
+  TestDataFloatAbs()
+  {
+    _input_data = abs_float::input_data;
+    _reference_output_data = abs_float::reference_output_data;
+    _test_kernel_model_circle = abs_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatAbs() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_ABS_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/NegAbsKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/NegAbsKernel.h
new file mode 100644
index 000000000..aaf167f56
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/NegAbsKernel.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_ABS_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_ABS_KERNEL_H
+
+#include "TestDataAbsBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_abs_input_output_type_mismatch
+{
+/*
+ * Abs Kernel with input output type mismatch (should be equal):
+ *
+ *      Input(1, 3, 3, 2) - Float
+ *            |
+ *           Abs
+ *            |
+ *      Output(1, 3, 3, 2) - Int
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x4e, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_abs_input_output_type_mismatch
+
+namespace neg_abs_input_output_shape_mismatch
+{
+/*
+ * Abs Kernel with input output type mismatch (should be equal):
+ *
+ *      Input(1, 3, 3, 2) - Float
+ *            |
+ *           Abs
+ *            |
+ *      Output(3, 3, 2) - Float
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x20, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x4e, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x65, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_abs_input_output_shape_mismatch
+
+class NegTestDataInputOutputTypeMismatchAbsKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchAbsKernel()
+  {
+    _test_kernel_model_circle = neg_abs_input_output_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchAbsKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInputOutputShapeMismatchAbsKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputShapeMismatchAbsKernel()
+  {
+    _test_kernel_model_circle = neg_abs_input_output_shape_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputShapeMismatchAbsKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_ABS_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/TestDataAbsBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/TestDataAbsBase.h
new file mode 100644
index 000000000..1d7fa584b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/abs/TestDataAbsBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ABS_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_ABS_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataAbsBase : public TestDataBase<T>
+{
+public:
+  TestDataAbsBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ABS_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/FloatAddKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/FloatAddKernel.h
new file mode 100644
index 000000000..5f9e7429c
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/FloatAddKernel.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_FLOAT_H
+
+#include "TestDataAddBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace add_float_with_broadcasting
+{
+
+/*
+ * Add Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 1)
+ *       \             /
+ *     Add(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+
+const unsigned char test_add_kernel_float_with_broadcasting_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x9c, 0xfe, 0xff, 0xff, 0xa0, 0xfe, 0xff, 0xff, 0xa4, 0xfe, 0xff, 0xff, 0xa8, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00,
+  0x41, 0x64, 0x64, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xc6, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x90, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63,
+  0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {11.547888, 16.922003,  23.412094, 7.1120033, -1.9457912,
+                                        26.603596, -13.668177, 16.682764, 21.436306, 11.578255};
+const std::vector<float> input2_data = {-18.080006, 4.956518};
+const std::vector<float> reference_output_data = {-6.532118,  -1.1580029, 5.3320885,  -10.968002,
+                                                  -20.025797, 31.560114,  -8.7116585, 21.639282,
+                                                  26.392824,  16.534773};
+} // namespace add_float_with_broadcasting
+
+namespace add_float_no_broadcasting
+{
+/*
+ * Add Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Add(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_add_kernel_float_no_broadcasting_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x9c, 0xfe, 0xff, 0xff, 0xa0, 0xfe, 0xff, 0xff, 0xa4, 0xfe, 0xff, 0xff, 0xa8, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00,
+  0x41, 0x64, 0x64, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xc6, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x90, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63,
+  0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+std::vector<float> input1_data = {-18.994303, -21.966125, 13.298149, 14.595678, 14.874681,
+                                  -7.8676147, 18.542614,  12.96068,  17.352306, 1.6868477};
+std::vector<float> input2_data = {13.860439,  7.7335033, 8.465873,   6.483177,  12.05286,
+                                  -14.429752, 47.21386,  -14.103956, 23.698446, 28.710766};
+std::vector<float> reference_output_data = {-5.1338634, -14.232622, 21.764023, 21.078856,
+                                            26.927542,  -22.297367, 65.75647,  -1.1432762,
+                                            41.05075,   30.397614};
+} // namespace add_float_no_broadcasting
+
+class TestDataFloatAdd : public TestDataAddBase<float>
+{
+public:
+  explicit TestDataFloatAdd(bool is_with_broadcast) : TestDataAddBase<float>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = add_float_with_broadcasting::input1_data;
+      _input2_data = add_float_with_broadcasting::input2_data;
+      _reference_output_data = add_float_with_broadcasting::reference_output_data;
+      _test_add_kernel_model_circle =
+        add_float_with_broadcasting::test_add_kernel_float_with_broadcasting_model_circle;
+    }
+    else
+    {
+      _input1_data = add_float_no_broadcasting::input1_data;
+      _input2_data = add_float_no_broadcasting::input2_data;
+      _reference_output_data = add_float_no_broadcasting::reference_output_data;
+      _test_add_kernel_model_circle =
+        add_float_no_broadcasting::test_add_kernel_float_no_broadcasting_model_circle;
+    }
+  }
+
+  ~TestDataFloatAdd() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/IntAddKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/IntAddKernel.h
new file mode 100644
index 000000000..6bc73ff1b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/IntAddKernel.h
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_INT_H
+#define LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_INT_H
+
+#include "TestDataAddBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace add_int_with_broadcasting
+{
+
+/*
+ * Add Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 1)
+ *       \             /
+ *     Add(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_add_kernel_int32_with_broadcasting_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x90, 0xfe, 0xff, 0xff, 0x94, 0xfe, 0xff, 0xff, 0x98, 0xfe, 0xff, 0xff, 0x9c, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff,
+  0x03, 0x00, 0x00, 0x00, 0x41, 0x64, 0x64, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x18, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x0b, 0x00, 0x00, 0x00,
+  0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const unsigned char test_add_kernel_int64_with_broadcasting_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x30, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x32, 0x2e, 0x36, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xb8, 0xfe, 0xff, 0xff, 0xbc, 0xfe, 0xff, 0xff, 0xc0, 0xfe, 0xff, 0xff, 0xc4, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x10, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff,
+  0x03, 0x00, 0x00, 0x00, 0x41, 0x64, 0x64, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x1c, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x04, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int64_t> input1_data = {-5, 5, 5, 5, -13, -5, -3, 21, 5, -4};
+const std::vector<int64_t> input2_data = {6, -14};
+const std::vector<int64_t> reference_output_data = {1, 11, 11, 11, -7, -19, -17, 7, -9, -18};
+
+const std::vector<int32_t> input1_data_32 = {-5, 5, 5, 5, -13, -5, -3, 21, 5, -4};
+const std::vector<int32_t> input2_data_32 = {6, -14};
+const std::vector<int32_t> reference_output_data_32 = {1, 11, 11, 11, -7, -19, -17, 7, -9, -18};
+} // namespace add_int_with_broadcasting
+
+namespace add_int_no_broadcasting
+{
+/*
+ * Add Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Add(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_add_kernel_int32_no_broadcasting_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x90, 0xfe, 0xff, 0xff, 0x94, 0xfe, 0xff, 0xff, 0x98, 0xfe, 0xff, 0xff, 0x9c, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff,
+  0x03, 0x00, 0x00, 0x00, 0x41, 0x64, 0x64, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x18, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x0b, 0x00, 0x00, 0x00,
+  0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const unsigned char test_add_kernel_int64_no_broadcasting_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x30, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x32, 0x2e, 0x36, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xb8, 0xfe, 0xff, 0xff, 0xbc, 0xfe, 0xff, 0xff, 0xc0, 0xfe, 0xff, 0xff, 0xc4, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x10, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff,
+  0x03, 0x00, 0x00, 0x00, 0x41, 0x64, 0x64, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x1c, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x04, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+std::vector<int64_t> input1_data = {5, -5, -3, 15, 15, -11, 13, 6, 15, 15};
+std::vector<int64_t> input2_data = {15, 5, 5, 15, 7, 4, 13, 5, 6, 13};
+std::vector<int64_t> reference_output_data = {20, 0, 2, 30, 22, -7, 26, 11, 21, 28};
+
+std::vector<int32_t> input1_data_32 = {5, -5, -3, 15, 15, -11, 13, 6, 15, 15};
+std::vector<int32_t> input2_data_32 = {15, 5, 5, 15, 7, 4, 13, 5, 6, 13};
+std::vector<int32_t> reference_output_data_32 = {20, 0, 2, 30, 22, -7, 26, 11, 21, 28};
+
+} // namespace add_int_no_broadcasting
+
+class TestData64IntAdd : public TestDataAddBase<int64_t>
+{
+public:
+  explicit TestData64IntAdd(bool is_with_broadcast) : TestDataAddBase<int64_t>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = add_int_with_broadcasting::input1_data;
+      _input2_data = add_int_with_broadcasting::input2_data;
+      _reference_output_data = add_int_with_broadcasting::reference_output_data;
+      _test_add_kernel_model_circle =
+        add_int_with_broadcasting::test_add_kernel_int64_with_broadcasting_model_circle;
+    }
+    else
+    {
+      _input1_data = add_int_no_broadcasting::input1_data;
+      _input2_data = add_int_no_broadcasting::input2_data;
+      _reference_output_data = add_int_no_broadcasting::reference_output_data;
+      _test_add_kernel_model_circle =
+        add_int_no_broadcasting::test_add_kernel_int64_no_broadcasting_model_circle;
+    }
+  }
+
+  ~TestData64IntAdd() override = default;
+};
+
+class TestData32IntAdd : public TestDataAddBase<int32_t>
+{
+public:
+  explicit TestData32IntAdd(bool is_with_broadcast) : TestDataAddBase<int32_t>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = add_int_with_broadcasting::input1_data_32;
+      _input2_data = add_int_with_broadcasting::input2_data_32;
+      _reference_output_data = add_int_with_broadcasting::reference_output_data_32;
+      _test_add_kernel_model_circle =
+        add_int_with_broadcasting::test_add_kernel_int32_with_broadcasting_model_circle;
+    }
+    else
+    {
+      _input1_data = add_int_no_broadcasting::input1_data_32;
+      _input2_data = add_int_no_broadcasting::input2_data_32;
+      _reference_output_data = add_int_no_broadcasting::reference_output_data_32;
+      _test_add_kernel_model_circle =
+        add_int_no_broadcasting::test_add_kernel_int32_no_broadcasting_model_circle;
+    }
+  }
+
+  ~TestData32IntAdd() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_INT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/NegAddKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/NegAddKernel.h
new file mode 100644
index 000000000..44af3afca
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/NegAddKernel.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_ADD_KERNEl_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_ADD_KERNEL_H
+
+#include "TestDataAddBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_add_input_type_mismatch
+{
+/*
+ * Add Kernel with mismatching input type:
+ *
+ * Input_1(1, 4, 4) - Int32   Input_2(1, 4, 4) - Float32
+ *       \                    /
+ *           Add(no broadcast)
+ *              |
+ *          Output(1, 4, 4)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x9c, 0xfe, 0xff, 0xff, 0xa0, 0xfe, 0xff, 0xff, 0xa4, 0xfe, 0xff, 0xff, 0xa8, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_add_input_type_mismatch
+
+namespace neg_add_no_quant_params
+{
+/*
+ * Add Kernel in int16 type without quant params:
+ *
+ * Input_1(1, 4, 4) - Int16   Input_2(1, 4, 4) - Float16
+ *       \                    /
+ *       Add(no broadcast, no quant params)
+ *              |
+ *          Output(1, 4, 4) - Int16
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x74, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa0, 0xfe, 0xff, 0xff, 0xa4, 0xfe, 0xff, 0xff, 0xa8, 0xfe, 0xff, 0xff, 0xac, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x18, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x07, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xcc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_add_no_quant_params
+
+class NegTestDataInputMismatchAddKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputMismatchAddKernel()
+  {
+    _test_kernel_model_circle = neg_add_input_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputMismatchAddKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataNoQuantParamsS16AddKernel : public NegTestDataBase
+{
+public:
+  NegTestDataNoQuantParamsS16AddKernel()
+  {
+    _test_kernel_model_circle = neg_add_no_quant_params::test_kernel_model_circle;
+  }
+
+  ~NegTestDataNoQuantParamsS16AddKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_U8_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/TestDataAddBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/TestDataAddBase.h
new file mode 100644
index 000000000..8ffa7fbea
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add/TestDataAddBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataAddBase : public TestDataBase<T>
+{
+public:
+  explicit TestDataAddBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataAddBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_add_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_add_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ADD_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/FloatAddNKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/FloatAddNKernel.h
new file mode 100644
index 000000000..95fd9666a
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/FloatAddNKernel.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ADD_N_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_ADD_N_KERNEL_FLOAT_H
+
+#include "TestDataAddNBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace add_n_float
+{
+/*
+ * AddN Kernel:
+ *
+ * Input_1(1, 4, 4, 3)  Input_2(1, 4, 4, 3)  Input_2(1, 4, 4, 3)
+ *                   \         |            /
+ *                            AddN
+ *                             |
+ *                      Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xcc, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x52, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa8, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x74, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x33,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x6a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  6.427741,   -5.5336685, -6.2939873, 8.121177,   -3.0810785, -10.091913, -2.4849992, 1.774254,
+  13.800129,  -22.152988, 12.696625,  0.2929567,  8.354922,   5.7603703,  -5.8994074, -18.484707,
+  8.926415,   15.341654,  4.8836274,  5.289197,   -9.766411,  13.688,     -2.9341066, 7.2281685,
+  -7.639269,  -19.849337, -10.183603, -3.39605,   7.0884247,  -4.100115,  -13.114395, 20.1859,
+  8.514902,   -2.8839726, -4.8524947, -7.7189302, 12.958817,  0.7408314,  -14.70222,  -4.840035,
+  -5.6485643, 9.197558,   4.805386,   -5.6769075, -4.0590677, -6.5562315, 7.0200677,  -0.99683046};
+const std::vector<float> input2_data = {
+  -4.0646977, 3.8888195,  -0.45402065, -2.352005,  4.839372,   -16.821068, 6.937857,   6.233658,
+  16.912395,  -7.1494417, 6.260419,    -0.9814551, 5.560984,   2.5352159,  3.3039222,  -13.475629,
+  21.22035,   -14.078774, 5.582642,    4.1715817,  -3.3241076, 1.4076965,  -1.1146233, -5.846616,
+  -9.14507,   -7.9248514, 3.61239,     -4.173052,  1.4289827,  1.0473942,  -8.506401,  -11.117105,
+  11.395946,  -3.0757384, -13.336702,  1.6729355,  9.1125765,  -5.3872676, -17.386013, 1.4701926,
+  -16.397867, 9.311203,   -9.718552,   -8.854298,  8.296376,   3.8650365,  8.381851,   -6.6090994};
+const std::vector<float> input3_data = {
+  11.190196,  5.642186,   -7.297735,   11.227684,  -1.727619, 1.9045501, -9.593952,  -16.171299,
+  -6.0474806, 3.3553686,  19.021252,   -3.9855165, 5.2290893, 8.515632,  -8.236364,  12.097031,
+  7.9482317,  1.4470768,  -0.58474195, -2.9820383, 9.381822,  8.335634,  0.9053579,  -0.120785415,
+  7.994109,   -7.4182167, 9.492107,    0.7696781,  6.868584,  6.2453837, 1.7782576,  5.5902786,
+  9.0994215,  -8.651535,  -0.6730907,  1.4408729,  5.3254695, 4.124748,  8.724231,   -6.1463547,
+  2.751103,   -4.8675337, -9.386753,   -15.851856, 5.0927544, 0.2861572, -2.6495001, -4.626466};
+const std::vector<float> reference_output_data = {
+  13.55324,  3.997337,   -14.045743, 16.996857,  0.030674577, -25.00843,  -5.141094,  -8.163387,
+  24.665043, -25.94706,  37.978294,  -4.674015,  19.144997,   16.811218,  -10.83185,  -19.863304,
+  38.094997, 2.7099562,  9.881528,   6.4787407,  -3.7086973,  23.431332,  -3.143372,  1.2607672,
+  -8.790231, -35.192406, 2.9208941,  -6.7994237, 15.385992,   3.1926632,  -19.842539, 14.659074,
+  29.01027,  -14.611246, -18.862288, -4.6051216, 27.396862,   -0.521688,  -23.364002, -9.516197,
+  -19.29533, 13.641229,  -14.299919, -30.38306,  9.330063,    -2.4050379, 12.7524185, -12.232395};
+
+} // namespace add_n_float
+
+class TestDataFloatAddN : public TestDataAddNBase<float>
+{
+public:
+  TestDataFloatAddN()
+  {
+    _input1_data = add_n_float::input1_data;
+    _input2_data = add_n_float::input2_data;
+    _input3_data = add_n_float::input3_data;
+    _reference_output_data = add_n_float::reference_output_data;
+    _test_kernel_model_circle = add_n_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatAddN() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ADD_N_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/NegAddNKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/NegAddNKernel.h
new file mode 100644
index 000000000..762edd456
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/NegAddNKernel.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_ADD_N_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_ADD_N_KERNEL_H
+
+#include "TestDataAddNBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace input_type_mismatch_add_n
+{
+/*
+ * AddN Kernel with input1 type != input2 type:
+ *
+ * Input_1(1, 4, 4, 3)-Float  Input_2(1, 4, 4, 3)  Input_2(1, 4, 4, 3) - Int32
+ *                   \         |                 /
+ *                            AddN
+ *                             |
+ *                      Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0xc0, 0x01, 0x00, 0x00, 0xdc, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x52, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x90, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x33,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x6a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace input_type_mismatch_add_n
+
+class TestDataInputTypeMismatchAddN : public NegTestDataBase
+{
+public:
+  TestDataInputTypeMismatchAddN()
+  {
+    _test_kernel_model_circle = input_type_mismatch_add_n::test_kernel_model_circle;
+  }
+
+  ~TestDataInputTypeMismatchAddN() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_ADD_N_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/TestDataAddNBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/TestDataAddNBase.h
new file mode 100644
index 000000000..6b0784536
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/add_n/TestDataAddNBase.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ADD_N_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_ADD_N_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataAddNBase : public TestDataBase<T>
+{
+public:
+  TestDataAddNBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      case 2:
+        return _input3_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _input3_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ADD_N_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/FloatArgMaxKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/FloatArgMaxKernel.h
new file mode 100644
index 000000000..9d9be08c3
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/FloatArgMaxKernel.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ARG_MAX_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_ARG_MAX_KERNEL_FLOAT_H
+
+#include "TestDataArgMaxBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace argmax_float
+{
+
+/*
+ * ArgMax Kernel:
+ *
+ * Input_1(4, 5)   Input_2(scalar = 1)
+ *       \             /
+ *            ArgMax
+ *              |
+ *            Output(4)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x28, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x14, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x61, 0x72, 0x67, 0x6d,
+  0x61, 0x78, 0x2f, 0x64, 0x69, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  16.19906,   15.062998,   19.666464, -20.029007, -15.745727, 0.014214589, -6.178691,
+  7.236639,   -4.1050725,  15.853367, -4.012241,  -11.736127, -0.40098614, -17.605135,
+  -7.4517574, -0.11448864, 13.045483, 7.009659,   5.189774,   -6.1986547};
+
+const std::vector<int32_t> reference_output_data = {2, 4, 2, 1};
+
+} // namespace argmax_float
+
+class TestDataFloatArgMax : public TestDataArgMaxBase<float, int32_t>
+{
+public:
+  TestDataFloatArgMax()
+  {
+    _input1_data = argmax_float::input1_data;
+    _reference_output_data = argmax_float::reference_output_data;
+    _test_kernel_model_circle = argmax_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatArgMax() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ARG_MAX_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/NegArgMaxKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/NegArgMaxKernel.h
new file mode 100644
index 000000000..9d6aff6c7
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/NegArgMaxKernel.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_ARG_MAX_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_ARG_MAX_KERNEL_H
+
+#include "TestDataArgMaxBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace wrong_output_type_arg_max
+{
+
+/*
+ * ArgMax Kernel with wrong output type (Float32):
+ *
+ * Input_1(4, 5)   Input_2(scalar = 1)
+ *       \             /
+ *            ArgMax
+ *              |
+ *            Output(4) - Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x74, 0x01, 0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x28, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x14, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x61, 0x72, 0x67, 0x6d, 0x61, 0x78, 0x2f, 0x64,
+  0x69, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace wrong_output_type_arg_max
+
+class TestDataOutputWrongOutputArgMax : public NegTestDataBase
+{
+public:
+  TestDataOutputWrongOutputArgMax()
+  {
+    _test_kernel_model_circle = wrong_output_type_arg_max::test_kernel_model_circle;
+  }
+
+  ~TestDataOutputWrongOutputArgMax() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_ARG_MAX_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/TestDataArgMaxBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/TestDataArgMaxBase.h
new file mode 100644
index 000000000..5100d49c4
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmax/TestDataArgMaxBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ARG_MAX_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_ARG_MAX_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename O> class TestDataArgMaxBase : public TestDataBase<T, O>
+{
+public:
+  TestDataArgMaxBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<O> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<O> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ARG_MAX_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/FloatArgMinKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/FloatArgMinKernel.h
new file mode 100644
index 000000000..0b915ae28
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/FloatArgMinKernel.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ARG_MIN_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_ARG_MIN_KERNEL_FLOAT_H
+
+#include "TestDataArgMinBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace argmin_float
+{
+
+/*
+ * ArgMin Kernel:
+ *
+ * Input_1(4, 5)   Input_2(scalar = 1)
+ *       \             /
+ *            ArgMin
+ *              |
+ *            Output(4)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x39, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x14, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x61, 0x72, 0x67, 0x6d,
+  0x69, 0x6e, 0x2f, 0x64, 0x69, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4f, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  16.19906,   15.062998,   19.666464, -20.029007, -15.745727, 0.014214589, -6.178691,
+  7.236639,   -4.1050725,  15.853367, -4.012241,  -11.736127, -0.40098614, -17.605135,
+  -7.4517574, -0.11448864, 13.045483, 7.009659,   5.189774,   -6.1986547};
+
+const std::vector<int32_t> reference_output_data = {3, 1, 3, 4};
+
+} // namespace argmin_float
+
+class TestDataFloatArgMin : public TestDataArgMinBase<float, int32_t>
+{
+public:
+  TestDataFloatArgMin()
+  {
+    _input1_data = argmin_float::input1_data;
+    _reference_output_data = argmin_float::reference_output_data;
+    _test_kernel_model_circle = argmin_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatArgMin() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ARG_MIN_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/NegArgMinKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/NegArgMinKernel.h
new file mode 100644
index 000000000..0eaeb66b8
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/NegArgMinKernel.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_ARG_MIN_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_ARG_MIN_KERNEL_H
+
+#include "TestDataArgMinBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace wrong_output_type_arg_min
+{
+
+/*
+ * ArgMin Kernel with wrong output type (Float32):
+ *
+ * Input_1(4, 5)   Input_2(scalar = 1)
+ *       \             /
+ *            ArgMin
+ *              |
+ *            Output(4) - Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x68, 0x01, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x39,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x14, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x61, 0x72, 0x67, 0x6d,
+  0x69, 0x6e, 0x2f, 0x64, 0x69, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4f, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace wrong_output_type_arg_min
+
+class TestDataOutputWrongOutputArgMin : public NegTestDataBase
+{
+public:
+  TestDataOutputWrongOutputArgMin()
+  {
+    _test_kernel_model_circle = wrong_output_type_arg_min::test_kernel_model_circle;
+  }
+
+  ~TestDataOutputWrongOutputArgMin() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_ARG_MIN_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/TestDataArgMinBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/TestDataArgMinBase.h
new file mode 100644
index 000000000..8ae22c4e0
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/argmin/TestDataArgMinBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ARG_MIN_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_ARG_MIN_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename O> class TestDataArgMinBase : public TestDataBase<T, O>
+{
+public:
+  TestDataArgMinBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<O> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<O> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ARG_MIN_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/FloatAveragePool2DKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/FloatAveragePool2DKernel.h
new file mode 100644
index 000000000..58790e94b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/FloatAveragePool2DKernel.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_AVERAGE_POOL2D_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_AVERAGE_POOL2D_KERNEL_H
+
+#include "TestDataAveragePool2DBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace average_pool2d_float
+{
+/*
+ * AveragePool2D Kernel:
+ *
+ *      Input(1, 8, 8, 1)
+ *            |
+ *       AveragePool2D
+ *            |
+ *      Output(1, 7, 7, 1)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x48, 0x01, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x17, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  -10.829727, -5.0753784, -0.46581638, 1.1337309,  -12.983028, -1.7739874, -3.8115792, -1.470794,
+  0.8652655,  3.5751436,  3.142009,    -2.881177,  0.21681504, -0.6201232, -1.168152,  -5.972758,
+  6.4394593,  0.60464424, -1.8141485,  -17.769108, -13.40761,  6.139243,   -2.3543136, -2.5557704,
+  15.057343,  7.4924536,  -20.035614,  4.250232,   5.9063106,  10.382995,  -7.45354,   3.7568998,
+  5.0376787,  2.825182,   -7.3617344,  -3.2233214, 2.2610564,  -6.776909,  -2.56466,   17.584259,
+  15.771288,  5.9487047,  -0.11435696, 8.510494,   9.547339,   11.753286,  12.103353,  -14.300014,
+  4.453389,   11.3001,    -7.494295,   9.240987,   -2.8403296, -1.9216467, 8.1578245,  -7.334697,
+  1.2287734,  0.7231084,  9.715425,    -7.466359,  -15.67608,  6.574766,   4.489766,   -1.6495954};
+
+const std::vector<float> reference_output_data{
+  -2.866174, 0.29398948, 0.23218668,  -3.6284149, -3.790081,  -1.8434604, -3.1058207,
+  2.871128,  1.376912,   -4.830606,   -8.46027,   -1.9179188, 0.4991635,  -3.0127485,
+  7.3984747, -3.4381661, -8.842159,   -5.2550435, 2.2552347,  1.678596,   -2.151681,
+  7.6031637, -4.269928,  -6.5926094,  2.2985694,  2.9433632,  -1.6030285, 2.83074,
+  7.395714,  0.32444882, -0.54722965, 4.273892,   4.196193,   3.6287675,  3.2057345,
+  9.368371,  2.4100385,  2.535707,    6.1146226,  4.134662,   7.523204,   -0.3433833,
+  4.426343,  3.5610845,  0.9989393,   -4.1854453, -3.4658222, 4.3251777,  0.91582465};
+
+} // namespace average_pool2d_float
+
+class TestDataFloatAveragePool2D : public TestDataAveragePool2DBase<float>
+{
+public:
+  TestDataFloatAveragePool2D()
+  {
+    _input_data = average_pool2d_float::input_data;
+    _reference_output_data = average_pool2d_float::reference_output_data;
+    _test_kernel_model_circle = average_pool2d_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatAveragePool2D() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_AVERAGE_POOL2D_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/NegAveragePool2DKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/NegAveragePool2DKernel.h
new file mode 100644
index 000000000..0c1c5a29b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/NegAveragePool2DKernel.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_AVERAGE_POOL2D_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_AVERAGE_POOL2D_KERNEL_H
+
+#include "TestDataAveragePool2DBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_average_pool2d_kernel
+{
+/*
+ * AveragePool2D Kernel with input_type != output_type:
+ *
+ *      Input(1, 8, 8, 1) = Float32
+ *            |
+ *         AveragePool2D
+ *            |
+ *      Output(1, 7, 7, 1) = Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x58, 0x01, 0x00, 0x00, 0x74, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x17, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x50, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_average_pool2d_kernel
+
+class NegTestDataInputOutputTypeMismatchAveragePool2DKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchAveragePool2DKernel()
+  {
+    _test_kernel_model_circle = neg_average_pool2d_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchAveragePool2DKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_AVERAGE_POOL2D_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/TestDataAveragePool2DBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/TestDataAveragePool2DBase.h
new file mode 100644
index 000000000..c7de059cd
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/average_pool_2d/TestDataAveragePool2DBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_AVERAGE_POOL_2D_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_AVERAGE_POOL_2D_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataAveragePool2DBase : public TestDataBase<T>
+{
+public:
+  TestDataAveragePool2DBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_AVERAGE_POOL_2D_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/FloatConcatenationKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/FloatConcatenationKernel.h
new file mode 100644
index 000000000..2908bef0c
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/FloatConcatenationKernel.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_FLOAT_H
+
+#include "TestDataConcatenationBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace concatenation_float
+{
+
+/*
+ * Concatenation Kernel:
+ *
+ * Input_1(1, 4, 4, 1)   Input_2(1, 4, 4, 2)
+ *       \             /
+ *         Concatenation
+ *              |
+ *       Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  -2.0978436, -25.836285, 7.9663463,  -52.951416, -9.174321, -10.963295, 1.4337301,  -39.000927,
+  -11.76153,  -24.070623, -21.426125, -32.041714, -2.29292,  -7.595441,  -15.297459, -20.068735};
+const std::vector<float> input2_data = {
+  -23.881905, -8.470397,  -52.49611,  -21.756306, 1.581161,   -26.177217, -39.463478, -2.172443,
+  -30.588694, -7.90017,   -17.604687, -27.376356, -26.49272,  -15.772057, 8.418157,   -36.710365,
+  -35.77088,  -27.592611, -5.0617495, -7.8632812, 10.318075,  -33.451294, -53.1594,   -33.312737,
+  -6.132754,  -21.647987, -38.427383, -41.75349,  -22.417152, -1.243468,  -19.772722, -55.897236};
+const std::vector<float> reference_output_data = {
+  -2.0978436, -23.881905, -8.470397,  -25.836285, -52.49611,  -21.756306, 7.9663463,  1.581161,
+  -26.177217, -52.951416, -39.463478, -2.172443,  -9.174321,  -30.588694, -7.90017,   -10.963295,
+  -17.604687, -27.376356, 1.4337301,  -26.49272,  -15.772057, -39.000927, 8.418157,   -36.710365,
+  -11.76153,  -35.77088,  -27.592611, -24.070623, -5.0617495, -7.8632812, -21.426125, 10.318075,
+  -33.451294, -32.041714, -53.1594,   -33.312737, -2.29292,   -6.132754,  -21.647987, -7.595441,
+  -38.427383, -41.75349,  -15.297459, -22.417152, -1.243468,  -20.068735, -19.772722, -55.897236};
+
+} // namespace concatenation_float
+
+class TestDataFloatConcatenation : public TestDataConcatenationBase<float>
+{
+public:
+  TestDataFloatConcatenation()
+  {
+    _input1_data = concatenation_float::input1_data;
+    _input2_data = concatenation_float::input2_data;
+    _reference_output_data = concatenation_float::reference_output_data;
+    _test_kernel_model_circle = concatenation_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatConcatenation() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/IntConcatenationKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/IntConcatenationKernel.h
new file mode 100644
index 000000000..fe0b7f511
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/IntConcatenationKernel.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_INT_H
+#define LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_INT_H
+
+#include "TestDataConcatenationBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace concatenation_int32
+{
+
+/*
+ * Concatenation Kernel:
+ *
+ * Input_1(1, 4, 4, 1)   Input_2(1, 4, 4, 2)
+ *       \             /
+ *         Concatenation
+ *              |
+ *       Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xcc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {-9,  -22, -32, 7,   -23, -8, -23, -32,
+                                          -31, -25, -8,  -22, -23, 1,  -24, -32};
+const std::vector<int32_t> input2_data = {-29, -31, -8,  -23, 16,  -23, -38, 7,   -36, -22, -32,
+                                          -24, -23, -18, -33, -23, -38, -24, -38, -14, -16, -13,
+                                          -15, -22, -38, -53, -5,  -40, -23, -22, -23, -41};
+const std::vector<int32_t> reference_output_data = {
+  -9,  -29, -31, -22, -8,  -23, -32, 16,  -23, 7,   -38, 7,   -23, -36, -22, -8,
+  -32, -24, -23, -23, -18, -32, -33, -23, -31, -38, -24, -25, -38, -14, -8,  -16,
+  -13, -22, -15, -22, -23, -38, -53, 1,   -5,  -40, -24, -23, -22, -32, -23, -41};
+
+} // namespace concatenation_int32
+
+namespace concatenation_int64
+{
+
+/*
+ * Concatenation Kernel:
+ *
+ * Input_1(1, 4, 4, 1)   Input_2(1, 4, 4, 2)
+ *       \             /
+ *         Concatenation
+ *              |
+ *       Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x04, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xcc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x04, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x04, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int64_t> input1_data = {35, 35, 35, 35, 30, 40, 45, 30,
+                                          35, 35, 35, 40, 35, 30, 25, 30};
+const std::vector<int64_t> input2_data = {25, 35, 35, 30, 40, 35, 30, 35, 35, 35, 40,
+                                          25, 35, 30, 40, 35, 35, 35, 30, 30, 35, 35,
+                                          45, 50, 45, 35, 35, 40, 35, 35, 30, 35};
+const std::vector<int64_t> reference_output_data = {
+  35, 25, 35, 35, 35, 30, 35, 40, 35, 35, 30, 35, 30, 35, 35, 40, 40, 25, 45, 35, 30, 30, 40, 35,
+  35, 35, 35, 35, 30, 30, 35, 35, 35, 40, 45, 50, 35, 45, 35, 30, 35, 40, 25, 35, 35, 30, 30, 35};
+
+} // namespace concatenation_int64
+
+class TestDataS32Concatenation : public TestDataConcatenationBase<int32_t>
+{
+public:
+  TestDataS32Concatenation()
+  {
+    _input1_data = concatenation_int32::input1_data;
+    _input2_data = concatenation_int32::input2_data;
+    _reference_output_data = concatenation_int32::reference_output_data;
+    _test_kernel_model_circle = concatenation_int32::test_kernel_model_circle;
+  }
+
+  ~TestDataS32Concatenation() override = default;
+};
+
+class TestDataS64Concatenation : public TestDataConcatenationBase<int64_t>
+{
+public:
+  TestDataS64Concatenation()
+  {
+    _input1_data = concatenation_int64::input1_data;
+    _input2_data = concatenation_int64::input2_data;
+    _reference_output_data = concatenation_int64::reference_output_data;
+    _test_kernel_model_circle = concatenation_int64::test_kernel_model_circle;
+  }
+
+  ~TestDataS64Concatenation() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_INT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/NegConcatenationKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/NegConcatenationKernel.h
new file mode 100644
index 000000000..c2594d5f2
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/NegConcatenationKernel.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_CONCATENATION_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_CONCATENATION_KERNEL_H
+
+#include "TestDataConcatenationBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace input_type_mismatch_concatenation
+{
+
+/*
+ * Concatenation Kernel with input1 type != input2 type:
+ *
+ * Input_1(1, 4, 4, 1)- Int32   Input_2(1, 4, 4, 2) - Float32
+ *       \             /
+ *         Concatenation
+ *              |
+ *       Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x88, 0x01, 0x00, 0x00, 0xa4, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace input_type_mismatch_concatenation
+
+namespace concatenation_with_relu
+{
+
+/*
+ * Concatenation Kernel with relu activation:
+ *
+ * Input_1(1, 4, 4, 1)- Float32   Input_2(1, 4, 4, 2) - Float32
+ *       \             /
+ *         Concatenation(ReLU)
+ *              |
+ *       Output(1, 4, 4, 3) - Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+  0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace concatenation_with_relu
+
+namespace concatenation_with_wrong_axis
+{
+
+/*
+ * Concatenation Kernel with wrong axis params:
+ *
+ * Input_1(1, 4, 4, 1)- Float32   Input_2(1, 4, 4, 2) - Float32
+ *       \             /
+ *         Concatenation(axis = 6, should be < 4)
+ *              |
+ *       Output(1, 4, 4, 3) - Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+  0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace concatenation_with_wrong_axis
+
+class TestDataInputTypeMismatchConcatenation : public NegTestDataBase
+{
+public:
+  TestDataInputTypeMismatchConcatenation()
+  {
+    _test_kernel_model_circle = input_type_mismatch_concatenation::test_kernel_model_circle;
+  }
+
+  ~TestDataInputTypeMismatchConcatenation() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class TestDataReluConcatenation : public NegTestDataBase
+{
+public:
+  TestDataReluConcatenation()
+  {
+    _test_kernel_model_circle = concatenation_with_relu::test_kernel_model_circle;
+  }
+
+  ~TestDataReluConcatenation() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class TestDataWrongAxisConcatenation : public NegTestDataBase
+{
+public:
+  TestDataWrongAxisConcatenation()
+  {
+    _test_kernel_model_circle = concatenation_with_wrong_axis::test_kernel_model_circle;
+  }
+
+  ~TestDataWrongAxisConcatenation() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_CONCATENATION_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/TestDataConcatenationBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/TestDataConcatenationBase.h
new file mode 100644
index 000000000..5b94eb063
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/concatenation/TestDataConcatenationBase.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataConcatenationBase : public TestDataBase<T>
+{
+public:
+  TestDataConcatenationBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_CONCATENATION_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/FloatConv2DKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/FloatConv2DKernel.h
new file mode 100644
index 000000000..eef549cbf
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/FloatConv2DKernel.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_FLOAT_H
+
+#include "TestDataConv2DBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace conv2d_float
+{
+
+/*
+ * Conv2D Kernel:
+ *
+ * Input(1, 4, 3, 2)   Weight(1, 2, 2, 2)   Bias(2)
+ *              \            |             /
+ *               \           |            /
+ *                    FullyConnected
+ *                           |
+ *                    Output(1, 2, 2, 2)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x9c, 0x00, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x2c, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x88, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xea, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+  0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0, 0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0,
+  0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40,
+  0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1,
+  0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40, 0x00, 0x00, 0xa0, 0x40, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x98, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x84, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6b, 0x65, 0x72, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  18.776451, 25.97969,   -9.277071, -3.5493946, 12.334248, 5.50226,   -2.224743,  -7.2292213,
+  10.259663, -1.0846977, 15.823856, 3.3193378,  4.9413986, 4.3529205, -10.353054, 3.7166824,
+  27.324902, -6.2231064, 10.370632, 22.661959,  20.206001, 8.245907,  9.984943,   21.379955};
+
+const std::vector<float> reference_output_data = {1.0177879, 128.43202, 0.0, 55.28556,
+                                                  39.483513, 0.0,       0.0, 7.0231743};
+
+} // namespace conv2d_float
+
+class TestDataFloatConv2D : public TestDataConv2DBase<float>
+{
+public:
+  TestDataFloatConv2D()
+  {
+    _input_data = conv2d_float::input_data;
+    _reference_output_data = conv2d_float::reference_output_data;
+    _test_kernel_model_circle = conv2d_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatConv2D() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/NegConv2DKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/NegConv2DKernel.h
new file mode 100644
index 000000000..52baf89db
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/NegConv2DKernel.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_CONV_2D_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_CONV_2D_KERNEL_H
+
+#include "TestDataConv2DBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_conv2d_input_type_mismatch
+{
+
+/*
+ * Conv2D Kernel with input type mismatch (input_type should be equal to weight_type):
+ *
+ * Input(1, 3, 3, 2) - Float32   Weight(1, 1, 1, 2) - Int32   Bias(1)
+ *              \                       |                   /
+ *               \                      |                  /
+ *                                   Conv2D
+ *                                     |
+ *                             Output(1, 3, 3, 1)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00, 0xfc, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xee, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0xfe, 0x4c, 0xbf, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+  0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa8, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x74, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6b, 0x65, 0x72, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_conv2d_input_type_mismatch
+
+namespace neg_conv2d_bias_wrong_type
+{
+/*
+ * Conv2D Kernel with wrong bias type (should be equal to input_type):
+ *
+ * Input(1, 3, 3, 2) - Float32   Weight(1, 1, 1, 2) - Float32   Bias(1) - Int32
+ *              \                       |                   /
+ *               \                      |                  /
+ *                                   Conv2D
+ *                                     |
+ *                             Output(1, 3, 3, 1)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00, 0xfc, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xee, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x98, 0xb5, 0x03, 0xbe, 0x6b, 0x02, 0xeb, 0x3e,
+  0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa8, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x74, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6b, 0x65, 0x72, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_conv2d_bias_wrong_type
+
+namespace neg_conv2d_invalid_input_shape
+{
+/*
+ * Conv2D Kernel with invalid input shape (rank should be == 4):
+ *
+ * Input(1, 1, 3, 3, 2) - Float32   Weight(1, 1, 1, 2) - Float32   Bias(1) - Int32
+ *              \                       |                   /
+ *               \                      |                  /
+ *                                   Conv2D
+ *                                     |
+ *                             Output(1, 1, 3, 3, 1)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0xd8, 0x01, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xee, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xcb, 0xe5, 0x9c, 0x3f, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x5b, 0xe7, 0x8c, 0xbf, 0x06, 0xc5, 0x2f, 0x3f,
+  0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x9c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x80, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0xb0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6b, 0x65, 0x72, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace neg_conv2d_invalid_input_shape
+
+class NegTestDataInputMismatchConv2DKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputMismatchConv2DKernel()
+  {
+    _test_kernel_model_circle = neg_conv2d_input_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputMismatchConv2DKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataWrongBiasTypeConv2DKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongBiasTypeConv2DKernel()
+  {
+    _test_kernel_model_circle = neg_conv2d_bias_wrong_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongBiasTypeConv2DKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidInputTypeConv2DKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidInputTypeConv2DKernel()
+  {
+    _test_kernel_model_circle = neg_conv2d_bias_wrong_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidInputTypeConv2DKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_CONV_2D_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/TestDataConv2DBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/TestDataConv2DBase.h
new file mode 100644
index 000000000..ce888ac3a
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/TestDataConv2DBase.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataConv2DBase : public TestDataBase<T>
+{
+public:
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/U8Conv2DKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/U8Conv2DKernel.h
new file mode 100644
index 000000000..dd992be1d
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/conv2d/U8Conv2DKernel.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_U8_H
+#define LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_U8_H
+
+#include "TestDataConv2DBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace conv2d_u8
+{
+
+/*
+ * Conv2D Kernel:
+ *
+ * Input(1, 3, 3, 2)   Weight(1, 1, 1, 2)   Bias(1)
+ *              \            |             /
+ *               \           |            /
+ *                    FullyConnected
+ *                           |
+ *                    Output(1, 3, 3, 1)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0xe0, 0x02, 0x00, 0x00, 0xfc, 0x02, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xee, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x65, 0x01, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x8d, 0x44, 0x00, 0x00,
+  0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x5c, 0x01, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xc2, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x48, 0x00, 0x00, 0x00, 0xb4, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0xa1, 0xa0, 0x20, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x40, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x32, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x44, 0x00, 0x00, 0x00, 0x24, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x87, 0x7b, 0x24, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x44, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa1, 0xa0, 0x20, 0x3d,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0,
+  0x03, 0x00, 0x00, 0x00, 0x6b, 0x65, 0x72, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
+  0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x6f, 0x12, 0x83, 0x3b, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<uint8_t> input_data = {5, 11, 5, 251, 5,   252, 22, 14, 251,
+                                         5, 23, 5, 6,   245, 5,   33, 15, 252};
+
+const std::vector<uint8_t> reference_output_data = {103, 70, 70, 105, 142, 106, 71, 100, 71};
+
+} // namespace conv2d_u8
+
+class TestDataU8Conv2D : public TestDataConv2DBase<uint8_t>
+{
+public:
+  TestDataU8Conv2D()
+  {
+    _input_data = conv2d_u8::input_data;
+    _reference_output_data = conv2d_u8::reference_output_data;
+    _test_kernel_model_circle = conv2d_u8::test_kernel_model_circle;
+  }
+
+  ~TestDataU8Conv2D() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_CONV_2D_KERNEL_U8_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h
new file mode 100644
index 000000000..ed9408a16
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/FloatDivKernel.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H
+
+#include "TestDataDivBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace div_float_with_broadcasting
+{
+
+/*
+ * Div Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 1)
+ *       \             /
+ *     Div(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {8.432024,  5.4664106,  16.856224,  -10.004156, -14.128681,
+                                        12.695552, -7.5779333, -1.1460792, 15.574873,  -12.670321};
+const std::vector<float> input2_data = {-2.0361109, -9.528288};
+const std::vector<float> reference_output_data = {-4.14124,  -2.6847312, -8.278638, 4.913365,
+                                                  6.939053,  -1.3324064, 0.795309,  0.120281756,
+                                                  -1.634593, 1.3297584};
+
+} // namespace div_float_with_broadcasting
+
+namespace div_float_no_broadcasting
+{
+/*
+ * Div Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Div(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+std::vector<float> input1_data = {3.563036,  13.645134, 0.427146,   11.032923, 0.4189046,
+                                  15.737275, 7.7726707, 0.75059056, -7.833488, 3.0679407};
+std::vector<float> input2_data = {-0.62832826, 7.937863,   -14.899745, 0.2819096,  -5.8306913,
+                                  8.6010685,   -10.391579, -3.312385,  -11.495937, 5.5657125};
+std::vector<float> reference_output_data = {-5.67066,    1.7189934, -0.028668007, 39.136383,
+                                            -0.07184476, 1.8296884, -0.74797785,  -0.22660124,
+                                            0.6814136,   0.55122155};
+
+} // namespace div_float_no_broadcasting
+
+class TestDataFloatDiv : public TestDataDivBase<float>
+{
+public:
+  explicit TestDataFloatDiv(bool is_with_broadcast) : TestDataDivBase<float>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = div_float_with_broadcasting::input1_data;
+      _input2_data = div_float_with_broadcasting::input2_data;
+      _reference_output_data = div_float_with_broadcasting::reference_output_data;
+      _test_kernel_model_circle = div_float_with_broadcasting::test_kernel_model_circle;
+    }
+    else
+    {
+      _input1_data = div_float_no_broadcasting::input1_data;
+      _input2_data = div_float_no_broadcasting::input2_data;
+      _reference_output_data = div_float_no_broadcasting::reference_output_data;
+      _test_kernel_model_circle = div_float_no_broadcasting::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataFloatDiv() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h
new file mode 100644
index 000000000..df42f95fe
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/NegDivKernel.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H
+
+#include "TestDataDivBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace input_1_wrong_type
+{
+
+/*
+ * Div Kernel with input type mismatch:
+ *
+ * Input_1(2, 5) - Int32   Input_2(2, 1) - Float
+ *       \             /
+ *     Div(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace input_1_wrong_type
+
+namespace input_2_wrong_type
+{
+
+/*
+ * DIV Kernel with input type mismatch:
+ *
+ * Input_1(2, 5)- Float   Input_2(2, 1) - Int32
+ *       \             /
+ *     Div(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace input_2_wrong_type
+
+class NegTestDataInput1WrongTypeDiv : public NegTestDataBase
+{
+public:
+  NegTestDataInput1WrongTypeDiv()
+  {
+    _test_kernel_model_circle = input_1_wrong_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInput1WrongTypeDiv() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInput2WrongTypeDiv : public NegTestDataBase
+{
+public:
+  NegTestDataInput2WrongTypeDiv()
+  {
+    _test_kernel_model_circle = input_2_wrong_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInput2WrongTypeDiv() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_DIV_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h
new file mode 100644
index 000000000..e4894e1ae
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/div/TestDataDivBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataDivBase : public TestDataBase<T>
+{
+public:
+  explicit TestDataDivBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataDivBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_DIV_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/FloatEluKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/FloatEluKernel.h
new file mode 100644
index 000000000..688933c49
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/FloatEluKernel.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_ELU_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_ELU_KERNEL_H
+
+#include "TestDataEluBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace elu_float
+{
+/*
+ * Elu Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *           Elu
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x14, 0x01, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6f, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-10.526339, -13.810211, -15.052563, -8.425814,  3.6157331,
+                                       6.429944,   -11.151951, 7.7377386,  -2.4822063, 0.17121133,
+                                       3.6448252,  -6.6318836, 1.5882887,  -4.6597095, 12.6291065,
+                                       -3.8438618, -2.5688074, -4.7900896};
+
+const std::vector<float> reference_output_data = {
+  -0.9999732,  -0.999999,  -0.9999997, -0.9997809,  3.6157331,  6.429944,
+  -0.99998564, 7.7377386,  -0.9164414, 0.17121133,  3.6448252,  -0.9986823,
+  1.5882887,   -0.9905308, 12.6291065, -0.97858924, -0.9233731, -0.9916883};
+
+} // namespace elu_float
+
+class TestDataFloatElu : public TestDataEluBase<float>
+{
+public:
+  TestDataFloatElu()
+  {
+    _input_data = elu_float::input_data;
+    _reference_output_data = elu_float::reference_output_data;
+    _test_kernel_model_circle = elu_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatElu() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_ELU_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/NegEluKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/NegEluKernel.h
new file mode 100644
index 000000000..db7ad33ac
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/NegEluKernel.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_ELU_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_ELU_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_output_type_mismatch_kernel
+{
+/*
+ * Elu Kernel with input output type mismatch:
+ *
+ *      Input(1, 3, 3, 2) - Float32
+ *            |
+ *           Elu
+ *            |
+ *      Output(1, 3, 3, 2) - Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6f, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_output_type_mismatch_kernel
+
+class NegTestDataInputOutputTypeMismatchEluKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchEluKernel()
+  {
+    _test_kernel_model_circle = neg_input_output_type_mismatch_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchEluKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_ELU_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/TestDataEluBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/TestDataEluBase.h
new file mode 100644
index 000000000..50422374c
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/elu/TestDataEluBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_ELU_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_ELU_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataEluBase : public TestDataBase<T>
+{
+public:
+  TestDataEluBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_ELU_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/FloatEqualKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/FloatEqualKernel.h
new file mode 100644
index 000000000..1fce14f47
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/FloatEqualKernel.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_EQUAL_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_EQUAL_KERNEL_FLOAT_H
+
+#include "TestDataEqualBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace neg_equal_float_with_no_broadcasting
+{
+
+/*
+ * Equal Kernel with input type mismatch
+ *
+ * Input_1(1, 4, 4, 3)-Float32   Input_2(1, 4, 4, 3)-Int32
+ *                    \             /
+ *                    Equal(no broadcast)
+ *                            |
+ *                    Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x47, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {};
+
+const std::vector<float> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_equal_float_with_no_broadcasting
+
+namespace equal_float_with_no_broadcasting
+{
+
+/*
+ * Equal Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Equal(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {0.0, 33.11, -1.1, 0.0, 5.5, -2.0, 0.0, -1.0, -4.5, 1.1};
+
+const std::vector<float> input2_data = {0.0, 33.11, 1.2, 0.0, 5.5, -2.0, 0.01, -1.0, -4.5, 1.12};
+
+const std::vector<bool> reference_output_data = {true, true,  false, true, true,
+                                                 true, false, true,  true, false};
+
+} // namespace equal_float_with_no_broadcasting
+
+namespace equal_float_with_broadcasting
+{
+
+/*
+ * Equal Kernel:
+ *
+ * Input_1(2, 5)   Input_2(1, 5)
+ *       \             /
+ *     Equal(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {-0.0, 0.0, -0.0, 0.0, 0.0, -0.0, 0.0, -0.0, -0.0, 0.0};
+
+const std::vector<float> input2_data = {0.0, -0.0, -0.0, 0.0, -0.0};
+
+const std::vector<bool> reference_output_data = {true, true, true, true, true,
+                                                 true, true, true, true, true};
+
+} // namespace equal_float_with_broadcasting
+
+namespace neg_equal_float_with_broadcasting
+{
+
+/*
+ * Equal Kernel with input type mismatch:
+ *
+ * Input_1(2, 5)-float  Input_2(1, 5)-Int32
+ *       \             /
+ *     Equal(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x68, 0x01, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {};
+
+const std::vector<float> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_equal_float_with_broadcasting
+
+class TestDataFloatEqual : public TestDataEqualBase<float, bool>
+{
+public:
+  explicit TestDataFloatEqual(bool is_with_broadcast, bool is_neg)
+    : TestDataEqualBase<float, bool>(is_with_broadcast)
+  {
+    if (not is_with_broadcast)
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_equal_float_with_no_broadcasting::input1_data;
+        _input2_data = neg_equal_float_with_no_broadcasting::input2_data;
+        _reference_output_data = neg_equal_float_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_equal_float_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = equal_float_with_no_broadcasting::input1_data;
+        _input2_data = equal_float_with_no_broadcasting::input2_data;
+        _reference_output_data = equal_float_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = equal_float_with_no_broadcasting::test_kernel_model_circle;
+      }
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_equal_float_with_broadcasting::input1_data;
+        _input2_data = neg_equal_float_with_broadcasting::input2_data;
+        _reference_output_data = neg_equal_float_with_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_equal_float_with_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = equal_float_with_broadcasting::input1_data;
+        _input2_data = equal_float_with_broadcasting::input2_data;
+        _reference_output_data = equal_float_with_broadcasting::reference_output_data;
+        _test_kernel_model_circle = equal_float_with_broadcasting::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataFloatEqual() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_EQUAL_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/IntEqualKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/IntEqualKernel.h
new file mode 100644
index 000000000..964ad6dc2
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/IntEqualKernel.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_EQUAL_KERNEL_INT_H
+#define LUCI_INTERPRETER_TEST_MODELS_EQUAL_KERNEL_INT_H
+
+#include "TestDataEqualBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace equal_int_with_no_broadcasting
+{
+
+/*
+ * Equal Kernel:
+ *
+ * Input_1(1, 5)   Input_2(1, 5)
+ *       \             /
+ *     Equal(no broadcast)
+ *              |
+ *          Output(1, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x60, 0x01, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xd4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x47, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {22, 31, 14, 5, 3};
+
+const std::vector<int32_t> input2_data = {3, 14, 14, 5, 5};
+
+const std::vector<bool> reference_output_data = {false, false, true, true, false};
+
+} // namespace equal_int_with_no_broadcasting
+
+namespace neg_equal_int_with_no_broadcasting
+{
+
+/*
+ * Equal Kernel with input type mismatch:
+ *
+ * Input_1(1, 5)-int   Input_2(1, 5)-float
+ *       \             /
+ *     Equal(no broadcast)
+ *              |
+ *          Output(1, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x68, 0x01, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {};
+
+const std::vector<int32_t> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_equal_int_with_no_broadcasting
+
+namespace equal_int_with_broadcasting
+{
+/*
+ * Equal Kernel:
+ *
+ * Input_1(2, 5)   Input_2(1, 5)
+ *       \             /
+ *     Equal(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x60, 0x01, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xd4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x47, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {12, 0, 0, -12, 0, 0, 12, -12, 0, -12};
+
+const std::vector<int32_t> input2_data = {0, 12, 0, 0, 0};
+
+const std::vector<bool> reference_output_data = {false, false, true,  false, true,
+                                                 true,  true,  false, true,  false};
+
+} // namespace equal_int_with_broadcasting
+
+namespace neg_equal_int_with_broadcasting
+{
+
+/*
+ * Equal Kernel with input type mismatch:
+ *
+ * Input_1(2, 5)   Input_2(1, 5)
+ *       \             /
+ *     Equal(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x68, 0x01, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {};
+
+const std::vector<int32_t> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_equal_int_with_broadcasting
+
+class TestDataIntEqual : public TestDataEqualBase<int32_t, bool>
+{
+public:
+  explicit TestDataIntEqual(bool is_with_broadcast, bool is_neg)
+    : TestDataEqualBase<int32_t, bool>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_equal_int_with_broadcasting::input1_data;
+        _input2_data = neg_equal_int_with_broadcasting::input2_data;
+        _reference_output_data = neg_equal_int_with_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_equal_int_with_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = equal_int_with_broadcasting::input1_data;
+        _input2_data = equal_int_with_broadcasting::input2_data;
+        _reference_output_data = equal_int_with_broadcasting::reference_output_data;
+        _test_kernel_model_circle = equal_int_with_broadcasting::test_kernel_model_circle;
+      }
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_equal_int_with_no_broadcasting::input1_data;
+        _input2_data = neg_equal_int_with_no_broadcasting::input2_data;
+        _reference_output_data = neg_equal_int_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_equal_int_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = equal_int_with_no_broadcasting::input1_data;
+        _input2_data = equal_int_with_no_broadcasting::input2_data;
+        _reference_output_data = equal_int_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = equal_int_with_no_broadcasting::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataIntEqual() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_INT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/TestDataEqualBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/TestDataEqualBase.h
new file mode 100644
index 000000000..c790f271d
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/equal/TestDataEqualBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_EQUAL_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_EQUAL_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename U> class TestDataEqualBase : public TestDataBase<T, U>
+{
+public:
+  explicit TestDataEqualBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataEqualBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<U> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<U> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_EQUAL_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/FloatExpKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/FloatExpKernel.h
new file mode 100644
index 000000000..25370b3ab
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/FloatExpKernel.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_EXP_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_EXP_KERNEL_H
+
+#include "TestDataExpBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace exp_float
+{
+/*
+ * Exp Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *           Exp
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x21, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2f, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-18.223001, 5.126108,    -14.195034, 14.899473, -5.9829874,
+                                       4.536052,   6.039304,    6.410294,   -7.008984, -16.432007,
+                                       15.697407,  -1.1735272,  -2.2951646, -1.262989, 7.4088907,
+                                       0.24651751, -10.7505045, 4.2837596};
+const std::vector<float> reference_output_data = {
+  1.2185715e-08, 168.36061,  6.8418734e-07, 2956371.5,    0.002521283,   93.32166,
+  419.6008,      608.07245,  0.0009037262,  7.305839e-08, 6565943.5,     0.30927414,
+  0.10074481,    0.28280744, 1650.5944,     1.2795616,    2.1434593e-05, 72.51254};
+
+} // namespace exp_float
+
+class TestDataFloatExp : public TestDataExpBase<float>
+{
+public:
+  TestDataFloatExp()
+  {
+    _input_data = exp_float::input_data;
+    _reference_output_data = exp_float::reference_output_data;
+    _test_kernel_model_circle = exp_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatExp() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_EXP_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/NegExpKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/NegExpKernel.h
new file mode 100644
index 000000000..ed31853ae
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/NegExpKernel.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_EXP_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_EXP_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_output_type_mismatch_kernel
+{
+/*
+ * Exp Kernel with input output type mismatch:
+ *
+ *      Input(1, 3, 3, 2) - Float32
+ *            |
+ *           Exp
+ *            |
+ *      Output(1, 3, 3, 2) - Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x21, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2f, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_output_type_mismatch_kernel
+
+class NegTestDataInputOutputTypeMismatchExpKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchExpKernel()
+  {
+    _test_kernel_model_circle = neg_input_output_type_mismatch_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchExpKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_EXP_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/TestDataExpBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/TestDataExpBase.h
new file mode 100644
index 000000000..6427096e3
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/exp/TestDataExpBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_EXP_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_EXP_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataExpBase : public TestDataBase<T>
+{
+public:
+  TestDataExpBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_EXP_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/expand_dims/ExpandDimsKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/expand_dims/ExpandDimsKernel.h
new file mode 100644
index 000000000..010d972c5
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/expand_dims/ExpandDimsKernel.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_EXPAND_DIMS_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_EXPAND_DIMS_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace neg_test_expand_dims
+{
+
+/*
+ * ExpandDims Kernel:
+ *
+ *     Input(3, 3)   Const([1]) - wrong type for const
+ *          |       /
+ *       ExpandDims
+ *          |
+ *     Output(3, 1, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x8c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa8, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x46, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_test_expand_dims
+
+namespace expand_dims_kernel
+{
+/*
+ * ExpandDims Kernel:
+ *
+ *     Input(3, 3)   Const([1])
+ *          |       /
+ *       ExpandDims
+ *          |
+ *     Output(3, 1, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x8c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa8, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x46, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {10.438837, -1.168417,  -6.455261, -1.3638954, 31.58745,
+                                       29.395872, -10.366383, 7.6131954, 9.837751};
+
+const std::vector<float> reference_output_data = {10.438837,  -1.168417, -6.455261,
+                                                  -1.3638954, 31.58745,  29.395872,
+                                                  -10.366383, 7.6131954, 9.837751};
+
+} // namespace expand_dims_kernel
+
+template <typename T> class TestDataExpandDimsKernel : public TestDataBase<T>
+{
+public:
+  TestDataExpandDimsKernel()
+  {
+    _input_data = expand_dims_kernel::input_data;
+    _reference_output_data = expand_dims_kernel::reference_output_data;
+    _test_kernel_model_circle = expand_dims_kernel::test_kernel_model_circle;
+  }
+
+  ~TestDataExpandDimsKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidInputTypeExpandDimsKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidInputTypeExpandDimsKernel()
+  {
+    _test_kernel_model_circle = neg_test_expand_dims::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidInputTypeExpandDimsKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_EXPAND_DIMS_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fill/FillKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fill/FillKernel.h
new file mode 100644
index 000000000..e567361ba
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fill/FillKernel.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FILL_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FILL_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace fill_kernel
+{
+/*
+ * Fill Kernel:
+ *
+ *     Dims(3, 2)      Input(scalar)
+ *           \            /
+ *                Fill
+ *                 |
+ *                 |
+ *              Output(3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, 0x8c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00,
+  0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x44, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x64, 0x69, 0x6d, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x5e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x5e, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {1.1f};
+
+const std::vector<float> reference_output_data = {1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 1.1f};
+} // namespace fill_kernel
+
+template <typename T> class TestDataFillKernel : public TestDataBase<T>
+{
+public:
+  TestDataFillKernel()
+  {
+    _input_data = fill_kernel::input_data;
+    _reference_output_data = fill_kernel::reference_output_data;
+    _test_kernel_model_circle = fill_kernel::test_kernel_model_circle;
+  }
+
+  ~TestDataFillKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FILL_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fill/NegFillKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fill/NegFillKernel.h
new file mode 100644
index 000000000..85f0e70a5
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fill/NegFillKernel.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_FILL_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_FILL_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_output_type_mismatch_fill_kernel
+{
+/*
+ * Fill Kernel with input output type mismatch (should be equal):
+ *
+ *     Dims(3, 2) -Float32      Input(scalar) - Float32
+ *           \                /
+ *                  Fill
+ *                   |
+ *                   |
+ *              Output(3, 2) - Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75,
+  0x65, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x64, 0x69, 0x6d, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5e,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_output_type_mismatch_fill_kernel
+
+namespace neg_wrong_input_shape_fill_kernel
+{
+/*
+ * Fill Kernel with input wrong shape (should be scalar):
+ *
+ *     Dims(3, 2) -Float32      Input(5) - Float32
+ *           \                /
+ *                  Fill
+ *                   |
+ *                   |
+ *              Output(3, 2) - Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xd8, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x64, 0x69, 0x6d, 0x73,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5e, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace neg_wrong_input_shape_fill_kernel
+
+class NegTestDataInputTypeMismatchFillKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputTypeMismatchFillKernel()
+  {
+    _test_kernel_model_circle =
+      neg_input_output_type_mismatch_fill_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputTypeMismatchFillKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataWrongInputShapeFillKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongInputShapeFillKernel()
+  {
+    _test_kernel_model_circle = neg_wrong_input_shape_fill_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongInputShapeFillKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_FILL_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/FloatFullyConnectedKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/FloatFullyConnectedKernel.h
new file mode 100644
index 000000000..55ee09fc3
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/FloatFullyConnectedKernel.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_FLOAT_H
+
+#include "TestDataFullyConnectedBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace fully_connected_float
+{
+
+/*
+ * FullyConnected Kernel:
+ *
+ * Input(1, 16)   Weight(4, 16)   Bias(4)
+ *            \        |         /
+ *             \       |        /
+ *               FullyConnected
+ *                     |
+ *                Output(1, 4)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x60, 0x01, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00, 0xc4, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x4c, 0x01, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0xc0, 0x00, 0x00, 0x80, 0x40,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0,
+  0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0,
+  0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40,
+  0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40,
+  0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0,
+  0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0,
+  0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40,
+  0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40,
+  0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0,
+  0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0,
+  0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40,
+  0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40,
+  0x00, 0x00, 0xa0, 0x40, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0xc0,
+  0x00, 0x00, 0x80, 0xc0, 0x00, 0x00, 0xa0, 0xc0, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0xe0, 0xc0,
+  0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x80, 0x40, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x40, 0x40,
+  0x00, 0x00, 0x80, 0xbf, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0xc0, 0xc0, 0x00, 0x00, 0xe0, 0x40,
+  0x00, 0x00, 0xa0, 0x40, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00,
+  0x54, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x90, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x75, 0x74, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  17.491695, 15.660671, 4.7347794,  -15.796822, 20.4776,    18.438372, -0.7529831, 10.671711,
+  10.699566, 3.1682281, -22.776001, 1.527811,   -0.1198349, -5.748741, -5.1772327, 20.06879};
+
+const std::vector<float> reference_output_data = {263.84323, 260.84323, 259.84323, 266.84323};
+
+} // namespace fully_connected_float
+
+class TestDataFloatFullyConnected : public TestDataFullyConnectedBase<float>
+{
+public:
+  TestDataFloatFullyConnected()
+  {
+    _input_data = fully_connected_float::input_data;
+    _reference_output_data = fully_connected_float::reference_output_data;
+    _test_kernel_model_circle = fully_connected_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatFullyConnected() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/NegFullyConnectedKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/NegFullyConnectedKernel.h
new file mode 100644
index 000000000..785c4ce3e
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/NegFullyConnectedKernel.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_FULLY_CONNECTED_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_FULLY_CONNECTED_KERNEL_H
+
+#include "TestDataFullyConnectedBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_fully_connected_wrong_weight_type
+{
+/*
+ * FullyConnected Kernel with wrong weight type (Int16):
+ *
+ * Input(1, 4)   Weight(4, 4) - Int16   Bias(4)
+ *            \        |            /
+ *             \       |           /
+ *               FullyConnected
+ *                     |
+ *                Output(1, 4)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0xb4, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x75, 0x74, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07,
+  0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace neg_fully_connected_wrong_weight_type
+
+namespace neg_fully_connected_wrong_weight_shape
+{
+/*
+ * FullyConnected Kernel with wrong weight shape (rank should be 2):
+ *
+ * Input(1, 64)   Weight(1, 8, 64)    Bias(8)
+ *            \        |            /
+ *             \       |           /
+ *               FullyConnected
+ *                     |
+ *                Output(1, 8)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x8c, 0x01, 0x00, 0x00, 0xa8, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
+  0x54, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x75, 0x74, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0xb0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x69, 0x6e, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_fully_connected_wrong_weight_shape
+
+namespace neg_fully_connected_wrong_bias_shape
+{
+/*
+ * FullyConnected Kernel with wrong bias shape should be equal to output.dim(1):
+ *
+ * Input(1, 64)   Weight(1, 8, 64)    Bias(15)
+ *            \        |            /
+ *             \       |           /
+ *               FullyConnected
+ *                     |
+ *                Output(1, 8)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x88, 0x01, 0x00, 0x00, 0xa4, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00,
+  0x54, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x90, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x75, 0x74, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0f, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace neg_fully_connected_wrong_bias_shape
+
+class NegTestDataWrongWeightTypeFullyConnectedKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongWeightTypeFullyConnectedKernel()
+  {
+    _test_kernel_model_circle = neg_fully_connected_wrong_weight_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongWeightTypeFullyConnectedKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataWrongWeightShapeFullyConnectedKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongWeightShapeFullyConnectedKernel()
+  {
+    _test_kernel_model_circle = neg_fully_connected_wrong_weight_shape::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongWeightShapeFullyConnectedKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataWrongBiasShapeFullyConnectedKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongBiasShapeFullyConnectedKernel()
+  {
+    _test_kernel_model_circle = neg_fully_connected_wrong_bias_shape::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongBiasShapeFullyConnectedKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_FULLY_CONNECTED_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/TestDataFullyConnectedBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/TestDataFullyConnectedBase.h
new file mode 100644
index 000000000..582b3db40
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/TestDataFullyConnectedBase.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataFullyConnectedBase : public TestDataBase<T>
+{
+public:
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/U8FullyConnectedKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/U8FullyConnectedKernel.h
new file mode 100644
index 000000000..f5d970600
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/fully_connected/U8FullyConnectedKernel.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_U8_H
+#define LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_U8_H
+
+#include "TestDataFullyConnectedBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace fully_connected_u8
+{
+
+/*
+ * FullyConnected Kernel:
+ *
+ * Input(1, 4)   Weight(4, 4)   Bias(4)
+ *            \        |         /
+ *             \       |        /
+ *               FullyConnected
+ *                     |
+ *                Output(1, 4)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0xa0, 0x02, 0x00, 0x00, 0xbc, 0x02, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x03, 0x16, 0x04, 0x2d, 0x17,
+  0x0b, 0x2b, 0x05, 0x2c, 0x06, 0x2c, 0x4e, 0x6f, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x24, 0x01, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xfa, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x44, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x99, 0x96, 0xfd, 0x3b,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x75, 0x74, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x4c, 0x00, 0x00, 0x00,
+  0x84, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf0, 0xf1, 0xae, 0x3b, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x68, 0x68, 0x2b, 0x3a, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<uint8_t> input_data = {5, 3, 251, 5};
+
+const std::vector<uint8_t> reference_output_data = {5, 10, 5, 13};
+
+} // namespace fully_connected_u8
+
+class TestDataU8FullyConnected : public TestDataFullyConnectedBase<uint8_t>
+{
+public:
+  TestDataU8FullyConnected()
+  {
+    _input_data = fully_connected_u8::input_data;
+    _reference_output_data = fully_connected_u8::reference_output_data;
+    _test_kernel_model_circle = fully_connected_u8::test_kernel_model_circle;
+  }
+
+  ~TestDataU8FullyConnected() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FULLY_CONNECTED_KERNEL_U8_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/FloatGatherKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/FloatGatherKernel.h
new file mode 100644
index 000000000..570ebbf02
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/FloatGatherKernel.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_GATHER_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_GATHER_KERNEL_H
+
+#include "TestDataGatherBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace gather_float
+{
+/*
+ * Gather Kernel:
+ *
+ * Input(1, 2, 3, 4)   Indices(1, 2)
+ *       \            /
+ *      Gather(axis=2)
+ *          |
+ *      Output(1, 2, 2, 4)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x88, 0x01, 0x00, 0x00, 0xa4, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x72, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x70, 0x61, 0x72, 0x61, 0x6d, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  -21.338402, 27.906258, 12.78171,   9.978121,  25.450222, -11.091215, -8.654621, 1.2170105,
+  -4.5460815, 19.334154, 6.8392344,  5.622982,  2.2990818, -8.733818,  8.312399,  -2.8866997,
+  13.171104,  3.4856339, -17.577343, 20.683546, 1.0197582, 27.619759,  -6.016859, -4.398407};
+
+const std::vector<float> reference_output_data = {
+  25.450222, -11.091215, -8.654621,  1.2170105, -4.5460815, 19.334154, 6.8392344, 5.622982,
+  13.171104, 3.4856339,  -17.577343, 20.683546, 1.0197582,  27.619759, -6.016859, -4.398407};
+
+} // namespace gather_float
+
+class TestDataFloatGather : public TestDataGatherBase<float>
+{
+public:
+  TestDataFloatGather()
+  {
+    _input_data = gather_float::input_data;
+    _reference_output_data = gather_float::reference_output_data;
+    _test_kernel_model_circle = gather_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatGather() override = default;
+};
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_GATHER_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/IntGatherKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/IntGatherKernel.h
new file mode 100644
index 000000000..5c7edef11
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/IntGatherKernel.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_GATHER_INT_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_GATHER_INT_KERNEL_H
+
+#include "TestDataGatherBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace gather_int
+{
+/*
+ * Gather Kernel:
+ *
+ * Input(1, 2, 3, 4)   Indices(1, 2)
+ *       \            /
+ *      Gather(axis=2)
+ *          |
+ *      Output(1, 2, 2, 4)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x72, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x70, 0x61, 0x72, 0x61,
+  0x6d, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int> input_data = {4,  14, 14,  22, 5,  -5,  -4, -3,  5,  15, 13, 5,
+                                     -3, 5,  -13, 15, -6, -13, -4, -12, -5, 5,  5,  -5};
+
+const std::vector<int> reference_output_data = {5,  -5,  -4, -3,  5,  15, 13, 5,
+                                                -6, -13, -4, -12, -5, 5,  5,  -5};
+
+} // namespace gather_int
+
+class TestDataIntGather : public TestDataGatherBase<int>
+{
+public:
+  TestDataIntGather()
+  {
+    _input_data = gather_int::input_data;
+    _reference_output_data = gather_int::reference_output_data;
+    _test_kernel_model_circle = gather_int::test_kernel_model_circle;
+  }
+
+  ~TestDataIntGather() override = default;
+};
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_GATHER_INT_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/NegGatherKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/NegGatherKernel.h
new file mode 100644
index 000000000..a902b7288
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/NegGatherKernel.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_GATHER_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_GATHER_KERNEL_H
+
+#include "TestDataGatherBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_gather_mismatch_input_output_type
+{
+/*
+ * Gather Kernel with  input output type mismatch (should be equal):
+ *
+ * Input(1, 2, 3, 4) - S16   Indices(1, 2)
+ *       \                 /
+ *           Gather(axis=2)
+ *                 |
+ *          Output(1, 2, 2, 4) - Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x8c, 0x01, 0x00, 0x00, 0xa8, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x72, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_gather_mismatch_input_output_type
+
+namespace neg_gather_wrong_position_type
+{
+/*
+ * Gather Kernel with wrong Indices type(should be S32):
+ *
+ * Input(1, 2, 3, 4) - S16   Indices(1, 2) - Float32
+ *       \                 /
+ *           Gather(axis=2)
+ *                 |
+ *          Output(1, 2, 2, 4) - Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x72, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+  0x00, 0x00, 0x00, 0x40, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xb0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xdc, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x70, 0x61, 0x72, 0x61, 0x6d, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_gather_wrong_position_type
+
+namespace neg_gather_wrong_axis
+{
+/*
+ * Gather Kernel with wrong axis value(should be < rank(input)):
+ *
+ * Input(1, 2, 3, 4) - S16   Indices(1, 2) - Float32
+ *       \                 /
+ *           Gather(axis=10)
+ *                 |
+ *          Output(1, 2, 2, 4) - Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x88, 0x01, 0x00, 0x00, 0xa4, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x72, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17,
+  0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x70, 0x61, 0x72, 0x61, 0x6d, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_gather_wrong_axis
+
+class NegTestDataInputOutputTypeMismatchGatherKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchGatherKernel()
+  {
+    _test_kernel_model_circle = neg_gather_mismatch_input_output_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchGatherKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataWrongPositionTypeGatherKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongPositionTypeGatherKernel()
+  {
+    _test_kernel_model_circle = neg_gather_wrong_position_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongPositionTypeGatherKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataWrongAxisGatherKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongAxisGatherKernel()
+  {
+    _test_kernel_model_circle = neg_gather_wrong_axis::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongAxisGatherKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_GATHER_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/TestDataGatherBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/TestDataGatherBase.h
new file mode 100644
index 000000000..e38fcb86e
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/gather/TestDataGatherBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_GATHER_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_GATHER_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataGatherBase : public TestDataBase<T>
+{
+public:
+  TestDataGatherBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_GATHER_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater/FloatGreaterKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater/FloatGreaterKernel.h
new file mode 100644
index 000000000..dcb7b6886
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater/FloatGreaterKernel.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_GREATER_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_GREATER_KERNEL_FLOAT_H
+
+#include "TestDataGreaterBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace greater_float
+{
+
+/*
+ * Greater Kernel:
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     Greater(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  -0.01787583, -0.8314556, -0.47624078, -1.9747407,    -0.51676583, -0.20183715, -1.9358647,
+  0.7616414,   -0.0899037, 2.048367,    -1.3174965,    1.5267943,   0.68707687,  1.3464743,
+  0.98674047,  -1.4853697, 1.9973947,   0.5170953,     0.37471953,  -1.6011852,  0.32045737,
+  -0.6598305,  -1.7946662, 1.2349467,   1.3320708,     0.5151753,   1.345111,    -0.16560331,
+  0.82792366,  -1.734876,  0.043626763, -0.0118546495, 0.31535238,  0.1888555,   -0.32523626,
+  -0.997665,   0.5819472,  -2.3194845,  -1.6897905,    0.9981752,   -1.2897044,  0.75768864,
+  0.56781554,  -1.0565805, -1.4891449,  0.2493645,     -1.1312587,  0.6837854};
+
+const std::vector<float> input2_data = {
+  0.30809638,  -0.28285328, -0.8437058,  1.7689779,   0.5182942,    0.571205,    -0.89484423,
+  0.28100377,  0.5453497,   1.3848042,   -0.04359268, -1.7448778,   -0.5375435,  -0.85059136,
+  -0.77961826, -0.4916915,  1.3359088,   -0.09580261, 0.6158275,    -0.05056348, 0.90505254,
+  0.94226706,  1.136139,    -0.45077038, -0.5018571,  -1.1543767,   0.85094684,  -0.13731039,
+  -0.3298641,  0.9474698,   -0.48497504, -0.14864737, -0.009302358, -1.1259161,  0.44226727,
+  1.0149708,   0.36024934,  0.4969523,   0.45014778,  -0.34718898,  1.2260172,   0.35304692,
+  -1.3037513,  -0.2565706,  0.18085766,  -0.7099202,  -0.9203537,   -1.2257448};
+
+const std::vector<bool> reference_output_data = {
+  false, false, true,  false, false, false, false, true,  false, true,  false, true,
+  true,  true,  true,  false, true,  true,  false, false, false, false, false, true,
+  true,  true,  true,  false, true,  false, true,  true,  true,  true,  false, false,
+  true,  false, false, true,  false, true,  true,  false, false, true,  false, true};
+
+} // namespace greater_float
+
+namespace neg_greater_float_with_no_broadcasting
+{
+
+/*
+ * Greater Kernel with input type mismatch:
+ *
+ * Input_1(1, 4, 4, 3)-float   Input_2(1, 4, 4, 3)-int
+ *       \             /
+ *     Greater(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x3d, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {};
+
+const std::vector<float> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_greater_float_with_no_broadcasting
+
+class TestDataFloatGreater : public TestDataGreaterBase<float, bool>
+{
+public:
+  explicit TestDataFloatGreater(bool is_with_broadcast, bool is_neg)
+    : TestDataGreaterBase<float, bool>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      assert(false && "Not impl yet");
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_greater_float_with_no_broadcasting::input1_data;
+        _input2_data = neg_greater_float_with_no_broadcasting::input2_data;
+        _reference_output_data = neg_greater_float_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle =
+          neg_greater_float_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = greater_float::input1_data;
+        _input2_data = greater_float::input2_data;
+        _reference_output_data = greater_float::reference_output_data;
+        _test_kernel_model_circle = greater_float::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataFloatGreater() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_GREATER_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater/TestDataGreaterBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater/TestDataGreaterBase.h
new file mode 100644
index 000000000..7c2005a64
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater/TestDataGreaterBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_GREATER_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_GREATER_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename U> class TestDataGreaterBase : public TestDataBase<T, U>
+{
+public:
+  explicit TestDataGreaterBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataGreaterBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<U> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<U> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_GREATER_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater_equal/FloatGreaterEqualKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater_equal/FloatGreaterEqualKernel.h
new file mode 100644
index 000000000..14226ab40
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater_equal/FloatGreaterEqualKernel.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_GREATER_EQUAL_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_GREATER_EQUAL_KERNEL_FLOAT_H
+
+#include "TestDataGreaterEqualBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace greater_equal_float
+{
+
+/*
+ * GreaterEqual Kernel:
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     GreaterEqual(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2d, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  -0.01787583, -0.8314556, -0.47624078, -1.9747407,    -0.51676583, -0.20183715, -1.9358647,
+  0.7616414,   -0.0899037, 2.048367,    -1.3174965,    1.5267943,   0.68707687,  1.3464743,
+  0.98674047,  -1.4853697, 1.9973947,   0.5170953,     0.37471953,  -1.6011852,  0.32045737,
+  -0.6598305,  -1.7946662, 1.2349467,   1.3320708,     0.5151753,   1.345111,    -0.16560331,
+  0.82792366,  -1.734876,  0.043626763, -0.0118546495, 0.31535238,  0.1888555,   -0.32523626,
+  -0.997665,   0.5819472,  -2.3194845,  -1.6897905,    0.9981752,   -1.2897044,  0.75768864,
+  0.56781554,  -1.0565805, -1.4891449,  0.2493645,     -1.1312587,  0.6837854};
+
+const std::vector<float> input2_data = {
+  0.30809638,  -0.28285328, -0.8437058,  1.7689779,   0.5182942,    0.571205,    -0.89484423,
+  0.28100377,  0.5453497,   1.3848042,   -0.04359268, -1.7448778,   -0.5375435,  -0.85059136,
+  -0.77961826, -0.4916915,  1.3359088,   -0.09580261, 0.6158275,    -0.05056348, 0.90505254,
+  0.94226706,  1.136139,    -0.45077038, -0.5018571,  -1.1543767,   0.85094684,  -0.13731039,
+  -0.3298641,  0.9474698,   -0.48497504, -0.14864737, -0.009302358, -1.1259161,  0.44226727,
+  1.0149708,   0.36024934,  0.4969523,   0.45014778,  -0.34718898,  1.2260172,   0.35304692,
+  -1.3037513,  -0.2565706,  0.18085766,  -0.7099202,  -0.9203537,   -1.2257448};
+
+const std::vector<bool> reference_output_data = {
+  false, false, true,  false, false, false, false, true,  false, true,  false, true,
+  true,  true,  true,  false, true,  true,  false, false, false, false, false, true,
+  true,  true,  true,  false, true,  false, true,  true,  true,  true,  false, false,
+  true,  false, false, true,  false, true,  true,  false, false, true,  false, true};
+
+} // namespace greater_equal_float
+
+namespace neg_greater_equal_float_with_no_broadcasting
+{
+
+/*
+ * GreaterEqual Kernel with input type mismatch:
+ *
+ * Input_1(1, 4, 4, 3)-float   Input_2(1, 4, 4, 3)-int
+ *       \             /
+ *     GreaterEqual(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+
+};
+
+const std::vector<float> input1_data = {};
+
+const std::vector<float> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_greater_equal_float_with_no_broadcasting
+
+class TestDataFloatGreaterEqual : public TestDataGreaterEqualBase<float, bool>
+{
+public:
+  explicit TestDataFloatGreaterEqual(bool is_with_broadcast, bool is_neg)
+    : TestDataGreaterEqualBase<float, bool>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      assert(false && "Not impl yet");
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_greater_equal_float_with_no_broadcasting::input1_data;
+        _input2_data = neg_greater_equal_float_with_no_broadcasting::input2_data;
+        _reference_output_data =
+          neg_greater_equal_float_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle =
+          neg_greater_equal_float_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = greater_equal_float::input1_data;
+        _input2_data = greater_equal_float::input2_data;
+        _reference_output_data = greater_equal_float::reference_output_data;
+        _test_kernel_model_circle = greater_equal_float::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataFloatGreaterEqual() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_GREATER_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater_equal/TestDataGreaterEqualBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater_equal/TestDataGreaterEqualBase.h
new file mode 100644
index 000000000..9153c357a
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/greater_equal/TestDataGreaterEqualBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_GREATER_EQUAL_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_GREATER_EQUAL_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename U> class TestDataGreaterEqualBase : public TestDataBase<T, U>
+{
+public:
+  explicit TestDataGreaterEqualBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataGreaterEqualBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<U> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<U> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_GREATER_EQUAL_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/FloatLeakyReLUKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/FloatLeakyReLUKernel.h
new file mode 100644
index 000000000..6d1e81f7a
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/FloatLeakyReLUKernel.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_LEAKY_RELU_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_LEAKY_RELU_KERNEL_H
+
+#include "TestDataLeakyReLUBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace leaky_relu_float
+{
+/*
+ * Leaky_ReLU Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *        Leaky_ReLU
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x4c, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4b, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x62, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-19.160503, 32.721092,  9.924562,  -5.1152186, -4.792659,
+                                       5.404521,   21.64431,   4.1435075, -9.379442,  10.986649,
+                                       -19.975468, -2.6520946, 9.306602,  -12.589155, -2.9080758,
+                                       21.732197,  -2.6927,    -2.0605793};
+
+const std::vector<float> reference_output_data = {
+  -38.321007, 32.721092, 9.924562,   -10.230437, -9.585318,  5.404521,
+  21.64431,   4.1435075, -18.758884, 10.986649,  -39.950935, -5.304189,
+  9.306602,   -25.17831, -5.8161516, 21.732197,  -5.3854,    -4.1211586};
+
+} // namespace leaky_relu_float
+
+class TestDataFloatLeakyReLU : public TestDataLeakyReLUBase<float>
+{
+public:
+  TestDataFloatLeakyReLU()
+  {
+    _input_data = leaky_relu_float::input_data;
+    _reference_output_data = leaky_relu_float::reference_output_data;
+    _test_kernel_model_circle = leaky_relu_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatLeakyReLU() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_LEAKY_RELU_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/NegLeakyReLUKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/NegLeakyReLUKernel.h
new file mode 100644
index 000000000..1d6ebde76
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/NegLeakyReLUKernel.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_LEAKY_RELU_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_LEAKY_RELU_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_output_type_mismatch_kernel
+{
+/*
+ * LeakyReLU Kernel with input output type mismatch:
+ *
+ *      Input(1, 3, 3, 2) - Float32
+ *            |
+ *         LeakyReLU
+ *            |
+ *      Output(1, 3, 3, 2) - Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x5c, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4b, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x62, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_output_type_mismatch_kernel
+
+class NegTestDataInputOutputTypeMismatchLeakyReLUKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchLeakyReLUKernel()
+  {
+    _test_kernel_model_circle = neg_input_output_type_mismatch_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchLeakyReLUKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_LEAKY_RELU_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/TestDataLeakyReLUBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/TestDataLeakyReLUBase.h
new file mode 100644
index 000000000..ed8346a3b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/leaky_relu/TestDataLeakyReLUBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LEAKY_RELU_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_LEAKY_RELU_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataLeakyReLUBase : public TestDataBase<T>
+{
+public:
+  TestDataLeakyReLUBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LEAKY_RELU_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/FloatLessKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/FloatLessKernel.h
new file mode 100644
index 000000000..56556d1f4
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/FloatLessKernel.h
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_FLOAT_H
+
+#include "TestDataLessBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace neg_less_float_with_no_broadcasting
+{
+
+/*
+ * Less Kernel with input type mismatch
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     Less(no broadcast)
+ *              |
+ *      Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x94, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x3a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {};
+
+const std::vector<float> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_less_float_with_no_broadcasting
+
+namespace less_float_with_no_broadcasting
+{
+
+/*
+ * Less Kernel:
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     Less(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  -17.109156, -14.220402, 0.6967888,  8.5724945,  -9.638723,  -15.630436,  5.7072678,  -28.719913,
+  26.073956,  28.855093,  -15.886295, -11.779518, 7.330929,   -0.13710785, 15.124775,  9.482744,
+  1.2159233,  -11.10869,  -7.9041195, 32.05063,   1.4171028,  -9.373051,   -3.7985916, -0.6265869,
+  1.1357956,  1.3873901,  -6.6756783, 7.348675,   -3.1261578, 13.6670475,  13.453075,  2.7914486,
+  24.654053,  23.756575,  3.0251846,  -6.2888947, 15.202778,  -6.0607758,  1.6703491,  -18.499111,
+  10.935091,  1.846302,   -16.904373, 0.9744568,  -1.1621361, -2.4073143,  4.1701775,  4.268633};
+
+const std::vector<float> input2_data = {
+  14.16371,   -9.996677,  23.359705,   -5.8362885, 8.50577,    3.890133,   26.986832, 11.293919,
+  -7.2040367, -5.077221,  -11.096642,  20.064266,  20.187872,  -2.297474,  19.889074, -24.76117,
+  -9.60951,   25.72523,   22.055315,   20.373281,  -4.083912,  -1.6361217, -4.452694, 22.31394,
+  1.7857666,  -3.4938774, -0.95575714, -6.792,     24.483788,  14.758501,  8.167406,  -13.673744,
+  1.8645649,  -5.4475937, 11.297581,   38.236015,  -4.01342,   26.875057,  0.6700249, 39.450253,
+  -11.167023, 13.393299,  -0.7329292,  10.980518,  -3.8029938, -16.393318, 5.341381,  -40.322437};
+
+const std::vector<bool> reference_output_data = {
+  true,  true,  true,  false, true,  true, true,  true,  false, false, true,  true,
+  true,  false, true,  false, false, true, true,  false, false, true,  false, true,
+  true,  false, true,  false, true,  true, false, false, false, false, true,  true,
+  false, true,  false, true,  false, true, true,  true,  false, false, true,  false};
+
+} // namespace less_float_with_no_broadcasting
+
+namespace less_float_with_broadcasting
+{
+
+/*
+ * Less Kernel:
+ *
+ * Input_1(2, 5)   Input_2(1, 5)
+ *       \             /
+ *     Less(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0x30, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x31, 0x34, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xbc, 0xfe, 0xff, 0xff, 0xc0, 0xfe, 0xff, 0xff, 0xc4, 0xfe, 0xff, 0xff, 0xc8, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x14, 0x00, 0x00, 0x00, 0x88, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x4c, 0x65, 0x73, 0x73, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xc6, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0xb8, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {11.259954, 0.61867523, -7.2976017, 16.326784,  -1.7243233,
+                                        -9.790066, -2.8924255, -1.1683407, -7.3359947, 22.979622};
+
+const std::vector<float> input2_data = {0.67757416, 10.977215, 6.6511993, -7.3085804, 8.511749};
+
+const std::vector<bool> reference_output_data = {false, true, true, false, true,
+                                                 true,  true, true, true,  false};
+
+} // namespace less_float_with_broadcasting
+
+namespace neg_less_float_with_broadcasting
+{
+
+/*
+ * Less Kernel with input type mismatch:
+ *
+ * Input_1(2, 5)   Input_2(1, 5)
+ *       \             /
+ *     Less(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x68, 0x01, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {};
+
+const std::vector<float> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_less_float_with_broadcasting
+
+class TestDataFloatLess : public TestDataLessBase<float, bool>
+{
+public:
+  explicit TestDataFloatLess(bool is_with_broadcast, bool is_neg)
+    : TestDataLessBase<float, bool>(is_with_broadcast)
+  {
+    if (not is_with_broadcast)
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_less_float_with_no_broadcasting::input1_data;
+        _input2_data = neg_less_float_with_no_broadcasting::input2_data;
+        _reference_output_data = neg_less_float_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_less_float_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = less_float_with_no_broadcasting::input1_data;
+        _input2_data = less_float_with_no_broadcasting::input2_data;
+        _reference_output_data = less_float_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = less_float_with_no_broadcasting::test_kernel_model_circle;
+      }
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_less_float_with_broadcasting::input1_data;
+        _input2_data = neg_less_float_with_broadcasting::input2_data;
+        _reference_output_data = neg_less_float_with_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_less_float_with_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = less_float_with_broadcasting::input1_data;
+        _input2_data = less_float_with_broadcasting::input2_data;
+        _reference_output_data = less_float_with_broadcasting::reference_output_data;
+        _test_kernel_model_circle = less_float_with_broadcasting::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataFloatLess() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/IntLessKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/IntLessKernel.h
new file mode 100644
index 000000000..750d45d67
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/IntLessKernel.h
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_INT_H
+#define LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_INT_H
+
+#include "TestDataLessBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace less_int_with_no_broadcasting
+{
+
+/*
+ * Less Kernel:
+ *
+ * Input_1(1, 5)   Input_2(1, 5)
+ *       \             /
+ *     Less(with broadcast)
+ *              |
+ *          Output(1, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x0c, 0x02, 0x00, 0x00, 0x28, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x31, 0x34, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xc4, 0xfe, 0xff, 0xff, 0xc8, 0xfe, 0xff, 0xff, 0xcc, 0xfe, 0xff, 0xff, 0xd0, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x8e, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x14, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x4c, 0x65, 0x73, 0x73, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00,
+  0xb4, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c,
+  0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c,
+  0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {22, 31, 14, 5, 3};
+
+const std::vector<int32_t> input2_data = {3, 14, 5, 14, 5};
+
+const std::vector<bool> reference_output_data = {false, false, false, true, true};
+
+} // namespace less_int_with_no_broadcasting
+
+namespace neg_less_int_with_no_broadcasting
+{
+
+/*
+ * Less Kernel with input type mismatch:
+ *
+ * Input_1(1, 5)   Input_2(1, 5)
+ *       \             /
+ *     Less(with broadcast)
+ *              |
+ *          Output(1, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x60, 0x01, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xd4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x3a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {};
+
+const std::vector<int32_t> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_less_int_with_no_broadcasting
+
+namespace less_int_with_broadcasting
+{
+
+/*
+ * Less Kernel:
+ *
+ * Input_1(2, 5)   Input_2(1, 5)
+ *       \             /
+ *     Less(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x0c, 0x02, 0x00, 0x00, 0x28, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x31, 0x34, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xc4, 0xfe, 0xff, 0xff, 0xc8, 0xfe, 0xff, 0xff, 0xcc, 0xfe, 0xff, 0xff, 0xd0, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x8e, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x14, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x4c, 0x65, 0x73, 0x73, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00,
+  0xb4, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c,
+  0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c,
+  0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {-4, 15, 4, 21, -3, 13, 5, -3, -5, 5};
+
+const std::vector<int32_t> input2_data = {5, 5, 6, 5, 6};
+
+const std::vector<bool> reference_output_data = {true,  false, true, false, true,
+                                                 false, false, true, true,  true};
+
+} // namespace less_int_with_broadcasting
+
+namespace neg_less_int_with_broadcasting
+{
+
+/*
+ * Less Kernel with input type mismatch:
+ *
+ * Input_1(2, 5)   Input_2(1, 5)
+ *       \             /
+ *     Less(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x60, 0x01, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xd4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x3a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {};
+
+const std::vector<int32_t> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_less_int_with_broadcasting
+
+class TestDataIntLess : public TestDataLessBase<int32_t, bool>
+{
+public:
+  explicit TestDataIntLess(bool is_with_broadcast, bool is_neg)
+    : TestDataLessBase<int32_t, bool>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_less_int_with_broadcasting::input1_data;
+        _input2_data = neg_less_int_with_broadcasting::input2_data;
+        _reference_output_data = neg_less_int_with_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_less_int_with_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = less_int_with_broadcasting::input1_data;
+        _input2_data = less_int_with_broadcasting::input2_data;
+        _reference_output_data = less_int_with_broadcasting::reference_output_data;
+        _test_kernel_model_circle = less_int_with_broadcasting::test_kernel_model_circle;
+      }
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_less_int_with_no_broadcasting::input1_data;
+        _input2_data = neg_less_int_with_no_broadcasting::input2_data;
+        _reference_output_data = neg_less_int_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_less_int_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = less_int_with_no_broadcasting::input1_data;
+        _input2_data = less_int_with_no_broadcasting::input2_data;
+        _reference_output_data = less_int_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = less_int_with_no_broadcasting::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataIntLess() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_INT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/NegTestDataLessKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/NegTestDataLessKernel.h
new file mode 100644
index 000000000..c81212a76
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/NegTestDataLessKernel.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_NEG_TEST_MODELS_LESS_KERNEL_H
+#define LUCI_INTERPRETER_NEG_TEST_MODELS_LESS_KERNEL_H
+
+#include "TestDataLessBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace neg_less_kernel_wrong_output
+{
+
+/*
+ * Less Kernel with wrong output type
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     Less(no broadcast)
+ *              |
+ *      Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x6c, 0x01, 0x00, 0x00, 0x88, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_less_kernel_wrong_output
+
+class NegTestDataLessKernel : public NegTestDataBase
+{
+public:
+  NegTestDataLessKernel()
+  {
+    _test_kernel_model_circle = neg_less_kernel_wrong_output::test_kernel_model_circle;
+  }
+
+  ~NegTestDataLessKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_NEG_TEST_MODELS_LESS_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/QuantLessKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/QuantLessKernel.h
new file mode 100644
index 000000000..df51e057e
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/QuantLessKernel.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_QUANT_H
+#define LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_QUANT_H
+
+#include "TestDataLessBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace less_uint8_with_no_broadcasting
+{
+
+/*
+ * Less Kernel:
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     Less(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x2c, 0x02, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0xcc, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x4c, 0x00, 0x00, 0x00,
+  0x7c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x81, 0x80, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xbf, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
+  0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x81, 0x80, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xbf, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<uint8_t> input1_data = {
+  14,  5,   251, 243, 14,  243, 251, 5,   34, 252, 245, 252, 11, 250, 31, 14,
+  5,   250, 21,  5,   24,  233, 5,   235, 23, 25,  15,  6,   22, 251, 23, 252,
+  242, 5,   14,  21,  234, 242, 5,   253, 16, 244, 5,   13,  21, 5,   15, 252};
+
+const std::vector<uint8_t> input2_data = {5,   2,   5,   14,  14,  24,  15,  5, 13, 4,   5,   251,
+                                          241, 14,  253, 253, 5,   254, 245, 5, 13, 40,  253, 5,
+                                          253, 235, 5,   5,   252, 252, 5,   5, 5,  5,   252, 253,
+                                          252, 13,  251, 251, 7,   253, 13,  5, 5,  253, 5,   5};
+
+const std::vector<bool> reference_output_data = {
+  false, false, false, false, false, false, false, false, false, false, false, false,
+  true,  false, true,  true,  false, true,  true,  false, false, false, true,  false,
+  true,  true,  false, false, true,  true,  false, false, false, false, true,  true,
+  true,  false, true,  false, false, true,  true,  false, false, true,  false, false};
+
+} // namespace less_uint8_with_no_broadcasting
+
+namespace neg_less_uint8_with_no_broadcasting
+{
+
+/*
+ * Less Kernel with input type mismatch:
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     Less(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {};
+
+const std::vector<uint8_t> input1_data = {};
+
+const std::vector<uint8_t> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_less_uint8_with_no_broadcasting
+
+class TestDataQuantLess : public TestDataLessBase<uint8_t, bool>
+{
+public:
+  explicit TestDataQuantLess(bool is_with_broadcast, bool is_neg)
+    : TestDataLessBase<uint8_t, bool>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      assert(false && "Not impl yet");
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_less_uint8_with_no_broadcasting::input1_data;
+        _input2_data = neg_less_uint8_with_no_broadcasting::input2_data;
+        _reference_output_data = neg_less_uint8_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = neg_less_uint8_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = less_uint8_with_no_broadcasting::input1_data;
+        _input2_data = less_uint8_with_no_broadcasting::input2_data;
+        _reference_output_data = less_uint8_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle = less_uint8_with_no_broadcasting::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataQuantLess() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_QUANT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/TestDataLessBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/TestDataLessBase.h
new file mode 100644
index 000000000..1264ab301
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less/TestDataLessBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename U> class TestDataLessBase : public TestDataBase<T, U>
+{
+public:
+  explicit TestDataLessBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataLessBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<U> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<U> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less_equal/FloatLessEqualKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less_equal/FloatLessEqualKernel.h
new file mode 100644
index 000000000..c20966d83
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less_equal/FloatLessEqualKernel.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LESS_EQUAL_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_LESS_EQUAL_KERNEL_FLOAT_H
+
+#include "TestDataLessEqualBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace less_equal_float
+{
+
+/*
+ * LessEqual Kernel:
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     LessEqual(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  -0.01787583, -0.8314556, -0.47624078, -1.9747407,    -0.51676583, -0.20183715, -1.9358647,
+  0.7616414,   -0.0899037, 2.048367,    -1.3174965,    1.5267943,   0.68707687,  1.3464743,
+  0.98674047,  -1.4853697, 1.9973947,   0.5170953,     0.37471953,  -1.6011852,  0.32045737,
+  -0.6598305,  -1.7946662, 1.2349467,   1.3320708,     0.5151753,   1.345111,    -0.16560331,
+  0.82792366,  -1.734876,  0.043626763, -0.0118546495, 0.31535238,  0.1888555,   -0.32523626,
+  -0.997665,   0.5819472,  -2.3194845,  -1.6897905,    0.9981752,   -1.2897044,  0.75768864,
+  0.56781554,  -1.0565805, -1.4891449,  0.2493645,     -1.1312587,  0.6837854};
+
+const std::vector<float> input2_data = {
+  0.30809638,  -0.28285328, -0.8437058,  1.7689779,   0.5182942,    0.571205,    -0.89484423,
+  0.28100377,  0.5453497,   1.3848042,   -0.04359268, -1.7448778,   -0.5375435,  -0.85059136,
+  -0.77961826, -0.4916915,  1.3359088,   -0.09580261, 0.6158275,    -0.05056348, 0.90505254,
+  0.94226706,  1.136139,    -0.45077038, -0.5018571,  -1.1543767,   0.85094684,  -0.13731039,
+  -0.3298641,  0.9474698,   -0.48497504, -0.14864737, -0.009302358, -1.1259161,  0.44226727,
+  1.0149708,   0.36024934,  0.4969523,   0.45014778,  -0.34718898,  1.2260172,   0.35304692,
+  -1.3037513,  -0.2565706,  0.18085766,  -0.7099202,  -0.9203537,   -1.2257448};
+
+const std::vector<bool> reference_output_data = {
+  true,  true,  false, true,  true,  true,  true,  false, true,  false, true, false,
+  false, false, false, true,  false, false, true,  true,  true,  true,  true, false,
+  false, false, false, true,  false, true,  false, false, false, false, true, true,
+  false, true,  true,  false, true,  false, false, true,  true,  false, true, false};
+
+} // namespace less_equal_float
+
+namespace neg_less_equal_float_with_no_broadcasting
+{
+
+/*
+ * LessEqual Kernel with input type mismatch:
+ *
+ * Input_1(1, 4, 4, 3)-float   Input_2(1, 4, 4, 3)-int
+ *       \             /
+ *     LessEqual(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x3f, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {};
+
+const std::vector<float> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_less_equal_float_with_no_broadcasting
+
+class TestDataFloatLessEqual : public TestDataLessEqualBase<float, bool>
+{
+public:
+  explicit TestDataFloatLessEqual(bool is_with_broadcast, bool is_neg)
+    : TestDataLessEqualBase<float, bool>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      assert(false && "Not impl yet");
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_less_equal_float_with_no_broadcasting::input1_data;
+        _input2_data = neg_less_equal_float_with_no_broadcasting::input2_data;
+        _reference_output_data = neg_less_equal_float_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle =
+          neg_less_equal_float_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = less_equal_float::input1_data;
+        _input2_data = less_equal_float::input2_data;
+        _reference_output_data = less_equal_float::reference_output_data;
+        _test_kernel_model_circle = less_equal_float::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataFloatLessEqual() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LESS_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less_equal/TestDataLessEqualBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less_equal/TestDataLessEqualBase.h
new file mode 100644
index 000000000..460773710
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/less_equal/TestDataLessEqualBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LESS_EQUAL_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_LESS_EQUAL_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename U> class TestDataLessEqualBase : public TestDataBase<T, U>
+{
+public:
+  explicit TestDataLessEqualBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataLessEqualBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<U> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<U> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LESS_EQUAL_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/BoolLogicalAndKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/BoolLogicalAndKernel.h
new file mode 100644
index 000000000..8b87fa28e
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/BoolLogicalAndKernel.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_BOOL_LOGICAL_AND_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_BOOL_LOGICAL_AND_KERNEL_H
+
+#include "TestDataLogicalAndBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace logical_and_bool
+{
+/*
+ * LogicalAnd Kernel:
+ *
+ * Input(1, 4, 4, 3)  Input(1, 4, 4, 3)
+ *        |           |
+ *          LogicalAnd
+ *              |
+ *      Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xcc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<bool> input1_data = {
+  true, false, true, true, true, false, true, true, true, true, true,  true,
+  true, true,  true, true, true, true,  true, true, true, true, true,  true,
+  true, true,  true, true, true, true,  true, true, true, true, true,  true,
+  true, true,  true, true, true, true,  true, true, true, true, false, false};
+const std::vector<bool> input2_data = {
+  true, true, true, true, true, true, true, true, true, true, true, true, true, true,  true, true,
+  true, true, true, true, true, true, true, true, true, true, true, true, true, true,  true, true,
+  true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, false};
+
+const std::vector<bool> reference_output_data = {
+  true, false, true, true, true, false, true, true, true, true,  true,  true,
+  true, true,  true, true, true, true,  true, true, true, true,  true,  true,
+  true, true,  true, true, true, true,  true, true, true, true,  true,  true,
+  true, true,  true, true, true, true,  true, true, true, false, false, false};
+
+} // namespace logical_and_bool
+
+class TestDataBoolLogicalAnd : public TestDataLogicalAndBase<bool>
+{
+public:
+  TestDataBoolLogicalAnd()
+  {
+    _input1_data = logical_and_bool::input1_data;
+    _input2_data = logical_and_bool::input2_data;
+    _reference_output_data = logical_and_bool::reference_output_data;
+    _test_kernel_model_circle = logical_and_bool::test_kernel_model_circle;
+  }
+
+  ~TestDataBoolLogicalAnd() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_LOGICAL_AND_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/NegLogicalAndKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/NegLogicalAndKernel.h
new file mode 100644
index 000000000..6624fe2a2
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/NegLogicalAndKernel.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_LOGICAL_AND_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_LOGICAL_AND_KERNEL_H
+
+#include "TestDataLogicalAndBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_logical_and_inputs_type_mismatch
+{
+/*
+ * LogicalAnd Kernel with input types mismatch:
+ *
+ * Input(1, 4, 4, 3)-Bool  Input(1, 4, 4, 3)-Float32
+ *        |           |
+ *          LogicalAnd
+ *              |
+ *      Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x94, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x56, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_logical_and_inputs_type_mismatch
+
+class NegTestDataInputTypeMismatchLogicalAndKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputTypeMismatchLogicalAndKernel()
+  {
+    _test_kernel_model_circle = neg_logical_and_inputs_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputTypeMismatchLogicalAndKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_LOGICAL_AND_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/TestDataLogicalAndBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/TestDataLogicalAndBase.h
new file mode 100644
index 000000000..c2842a333
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_and/TestDataLogicalAndBase.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LOGICAL_AND_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_LOGICAL_AND_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataLogicalAndBase : public TestDataBase<T>
+{
+public:
+  TestDataLogicalAndBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LOGICAL_AND_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/BoolLogicalOrKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/BoolLogicalOrKernel.h
new file mode 100644
index 000000000..90b7511db
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/BoolLogicalOrKernel.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_BOOL_LOGICAL_OR_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_BOOL_LOGICAL_OR_KERNEL_H
+
+#include "TestDataLogicalOrBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace logical_or_bool
+{
+/*
+ * LogicalOr Kernel:
+ *
+ * Input(1, 4, 4, 3)  Input(1, 4, 4, 3)
+ *        |           |
+ *          LogicalOr
+ *              |
+ *      Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xcc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<bool> input1_data = {
+  true, false, true, true, true, false, true, true, true, true, true,  true,
+  true, true,  true, true, true, true,  true, true, true, true, true,  true,
+  true, true,  true, true, true, true,  true, true, true, true, true,  true,
+  true, true,  true, true, true, true,  true, true, true, true, false, false};
+const std::vector<bool> input2_data = {
+  true, false, true, true, true, true, true, true, true, true,  true,  true,
+  true, true,  true, true, true, true, true, true, true, true,  true,  true,
+  true, false, true, true, true, true, true, true, true, true,  true,  true,
+  true, true,  true, true, true, true, true, true, true, false, false, false};
+
+const std::vector<bool> reference_output_data = {
+  true, false, true, true, true, true, true, true, true, true, true,  true,
+  true, true,  true, true, true, true, true, true, true, true, true,  true,
+  true, true,  true, true, true, true, true, true, true, true, true,  true,
+  true, true,  true, true, true, true, true, true, true, true, false, false};
+
+} // namespace logical_or_bool
+
+class TestDataBoolLogicalOr : public TestDataLogicalOrBase<bool>
+{
+public:
+  TestDataBoolLogicalOr()
+  {
+    _input1_data = logical_or_bool::input1_data;
+    _input2_data = logical_or_bool::input2_data;
+    _reference_output_data = logical_or_bool::reference_output_data;
+    _test_kernel_model_circle = logical_or_bool::test_kernel_model_circle;
+  }
+
+  ~TestDataBoolLogicalOr() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_LOGICAL_OR_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/NegLogicalOrKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/NegLogicalOrKernel.h
new file mode 100644
index 000000000..1225d98fb
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/NegLogicalOrKernel.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_LOGICAL_OR_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_LOGICAL_OR_KERNEL_H
+
+#include "TestDataLogicalOrBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_logical_or_inputs_type_mismatch
+{
+/*
+ * LogicalOr Kernel with input types mismatch:
+ *
+ * Input(1, 4, 4, 3)-Bool  Input(1, 4, 4, 3)-Float32
+ *        |           |
+ *          LogicalOr
+ *              |
+ *      Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x94, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x54, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_logical_or_inputs_type_mismatch
+
+class NegTestDataInputTypeMismatchLogicalOrKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputTypeMismatchLogicalOrKernel()
+  {
+    _test_kernel_model_circle = neg_logical_or_inputs_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputTypeMismatchLogicalOrKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_LOGICAL_OR_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/TestDataLogicalOrBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/TestDataLogicalOrBase.h
new file mode 100644
index 000000000..af9fee2ad
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logical_or/TestDataLogicalOrBase.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LOGICAL_OR_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_LOGICAL_OR_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataLogicalOrBase : public TestDataBase<T>
+{
+public:
+  TestDataLogicalOrBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LOGICAL_OR_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/FloatLogisticKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/FloatLogisticKernel.h
new file mode 100644
index 000000000..23f9f7b8b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/FloatLogisticKernel.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_LOGISTIC_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_LOGISTIC_KERNEL_H
+
+#include "TestDataLogisticBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace logistic_float
+{
+/*
+ * Logistic Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *         Logistic
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x14, 0x01, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {29.353455, 12.060211,  11.372606,  -9.009369,  3.0267563,
+                                       5.1447716, 21.289762,  19.976126,  8.726238,   4.8797092,
+                                       3.64571,   34.80062,   -6.9072685, -2.2714958, -16.44065,
+                                       0.334301,  -20.372694, 4.1522675};
+
+const std::vector<float> reference_output_data = {
+  1.0,          0.99999416,  0.99998844,    0.00012225899, 0.9537683,     0.994204,
+  1.0,          1.0,         0.99983776,    0.9924581,     0.97456115,    1.0,
+  0.0009994869, 0.093511336, 7.2429586e-08, 0.5828055,     1.4198792e-09, 0.98451483};
+
+} // namespace logistic_float
+
+class TestDataFloatLogistic : public TestDataLogisticBase<float>
+{
+public:
+  TestDataFloatLogistic()
+  {
+    _input_data = logistic_float::input_data;
+    _reference_output_data = logistic_float::reference_output_data;
+    _test_kernel_model_circle = logistic_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatLogistic() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_LOGISTIC_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/NegLogisticKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/NegLogisticKernel.h
new file mode 100644
index 000000000..fec9689ba
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/NegLogisticKernel.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_LOGISTIC_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_LOGISTIC_KERNEL_H
+
+#include "TestDataLogisticBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_logistic_input_output_type_mismatch
+{
+/*
+ * Logistic Kernel with input output type mismatch (should be equal):
+ *
+ *      Input(1, 3, 3, 2) - Float
+ *            |
+ *         Logistic
+ *            |
+ *      Output(1, 3, 3, 2) - Int
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_logistic_input_output_type_mismatch
+
+namespace neg_logistic_no_quant_params
+{
+/*
+ * Logistic Kernel with UINT8 type and without quant params:
+ *
+ *      Input(1, 3, 3, 2) - UINT8
+ *            |
+ *         Logistic (no quant params)
+ *            |
+ *      Output(1, 3, 3, 2) - UINT8
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_logistic_no_quant_params
+
+class NegTestDataInputOutputTypeMismatchLogisticKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchLogisticKernel()
+  {
+    _test_kernel_model_circle = neg_logistic_input_output_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchLogisticKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataNoQuantParamsLogisticKernel : public NegTestDataBase
+{
+public:
+  NegTestDataNoQuantParamsLogisticKernel()
+  {
+    _test_kernel_model_circle = neg_logistic_no_quant_params::test_kernel_model_circle;
+  }
+
+  ~NegTestDataNoQuantParamsLogisticKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_LOGISTIC_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/TestDataLogisticBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/TestDataLogisticBase.h
new file mode 100644
index 000000000..faffd2f76
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/logistic/TestDataLogisticBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_LOGISTIC_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_LOGISTIC_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataLogisticBase : public TestDataBase<T>
+{
+public:
+  TestDataLogisticBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_LOGISTIC_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/FloatMaxPool2DKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/FloatMaxPool2DKernel.h
new file mode 100644
index 000000000..927859fd2
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/FloatMaxPool2DKernel.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_MAXPOOL2D_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_MAXPOOL2D_KERNEL_H
+
+#include "TestDataMaxPool2DBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace maxpool2d_float
+{
+/*
+ * maxpool2d Kernel:
+ *
+ *      Input(1, 3, 5, 1)
+ *            |
+ *           MaxPool2D
+ *            |
+ *      Output(1, 2, 2, 1)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xbc, 0x00, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x2c, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xa8, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x96, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x08, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x08, 0x00,
+  0x00, 0x00, 0x07, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x39, 0x2e, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xd0, 0xfe, 0xff, 0xff, 0xd4, 0xfe, 0xff, 0xff, 0xd8, 0xfe, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x17, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x5c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff,
+  0x11, 0x00, 0x00, 0x00, 0x50, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x65, 0x64, 0x43,
+  0x61, 0x6c, 0x6c, 0x3a, 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x73, 0x65, 0x72, 0x76,
+  0x69, 0x6e, 0x67, 0x5f, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x78, 0x3a, 0x30, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x11, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  1,  -1, 0,  -2, 2,  //
+  -7, -6, -5, -4, -3, //
+  5,  4,  3,  6,  7,  //
+};
+
+const std::vector<float> reference_output_data{
+  1, 2, //
+  5, 7, //
+};
+
+} // namespace maxpool2d_float
+
+class TestDataFloatMaxPool2D : public TestDataMaxPool2DBase<float>
+{
+public:
+  TestDataFloatMaxPool2D()
+  {
+    _input_data = maxpool2d_float::input_data;
+    _reference_output_data = maxpool2d_float::reference_output_data;
+    _test_kernel_model_circle = maxpool2d_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatMaxPool2D() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_MAXPOOL2D_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/NegMaxPool2DKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/NegMaxPool2DKernel.h
new file mode 100644
index 000000000..dbba73663
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/NegMaxPool2DKernel.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_MAXPOOL2D_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_MAXPOOL2D_KERNEL_H
+
+#include "TestDataMaxPool2DBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_maxpool2d_kernel
+{
+/*
+ * maxpool2d Kernel with input_type != output_type:
+ *
+ *      Input(1, 8, 8, 1) = Float32
+ *            |
+ *           MaxPool2D
+ *            |
+ *      Output(1, 7, 7, 1) = Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x58, 0x01, 0x00, 0x00, 0x74, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x17, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x50, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace neg_maxpool2d_kernel
+
+namespace neg_invalid_input_shape_maxpool2d_kernel
+{
+/*
+ * maxpool2d Kernel with invalid input shape rank=5 (should be == 4):
+ *
+ *      Input(1, 1, 8, 8, 1) = Int32
+ *            |
+ *           MaxPool2D
+ *            |
+ *      Output(1, 7, 7, 1) = Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x4c, 0x01, 0x00, 0x00, 0x68, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x17, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_invalid_input_shape_maxpool2d_kernel
+
+namespace neg_no_quant_params_maxpool2d_kernel
+{
+/*
+ * maxpool2d Kernel S16 without quant params:
+ *
+ *      Input(1, 1, 8, 8, 1) = INT16
+ *            |
+ *           MaxPool2D (no quant params)
+ *            |
+ *      Output(1, 7, 7, 1) = INT16
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00, 0x6c, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x17, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x07, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x11, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_no_quant_params_maxpool2d_kernel
+
+class NegTestDataInputOutputTypeMismatchMaxPool2DKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchMaxPool2DKernel()
+  {
+    _test_kernel_model_circle = neg_maxpool2d_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchMaxPool2DKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidInputShapeMaxPool2DKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidInputShapeMaxPool2DKernel()
+  {
+    _test_kernel_model_circle = neg_invalid_input_shape_maxpool2d_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidInputShapeMaxPool2DKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataNoQuantParamsMaxPool2DKernel : public NegTestDataBase
+{
+public:
+  NegTestDataNoQuantParamsMaxPool2DKernel()
+  {
+    _test_kernel_model_circle = neg_no_quant_params_maxpool2d_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataNoQuantParamsMaxPool2DKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_MAXPOOL2D_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/TestDataMaxPool2DBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/TestDataMaxPool2DBase.h
new file mode 100644
index 000000000..71a81cefa
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/maxpool2d/TestDataMaxPool2DBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_MAXPOOL2D_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_MAXPOOL2D_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataMaxPool2DBase : public TestDataBase<T>
+{
+public:
+  TestDataMaxPool2DBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_MAXPOOL2D_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/FloatMulKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/FloatMulKernel.h
new file mode 100644
index 000000000..444bb534b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/FloatMulKernel.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_FLOAT_H
+
+#include "TestDataMulBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace mul_float_with_broadcasting
+{
+
+/*
+ * Mul Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 1)
+ *       \             /
+ *     Mul(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x0c, 0x02, 0x00, 0x00, 0x28, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xc4, 0xfe, 0xff, 0xff, 0xc8, 0xfe, 0xff, 0xff, 0xcc, 0xfe, 0xff, 0xff, 0xd0, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x3c, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00,
+  0x4d, 0x75, 0x6c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xc6, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0xb8, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63,
+  0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c,
+  0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {-16.302355, -4.020832, -7.797722, -10.514711, -13.203278,
+                                        -4.742243,  14.114815, 13.727003, 7.3895016,  -2.0813313};
+const std::vector<float> input2_data = {-3.132759, 9.31464};
+const std::vector<float> reference_output_data = {51.07135,  12.596298,  24.428385, 32.940056,
+                                                  41.362686, -44.172283, 131.47443, 127.86209,
+                                                  68.83054,  -19.386852};
+
+} // namespace mul_float_with_broadcasting
+
+namespace mul_float_no_broadcasting
+{
+/*
+ * Mul Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Mul(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x0c, 0x02, 0x00, 0x00, 0x28, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xc4, 0xfe, 0xff, 0xff, 0xc8, 0xfe, 0xff, 0xff, 0xcc, 0xfe, 0xff, 0xff, 0xd0, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x3c, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00,
+  0x4d, 0x75, 0x6c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xc6, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0xb8, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63,
+  0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c,
+  0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+std::vector<float> input1_data = {18.033651, -15.601158, 3.842373,  17.90259, -12.840965,
+                                  19.272898, 22.070192,  12.965511, 23.20587, -7.0852413};
+std::vector<float> input2_data = {20.24965,   4.263008, 16.145443, 25.501696,   -8.905457,
+                                  -2.0830078, 28.85225, 24.545036, -13.7073345, 9.774281};
+std::vector<float> reference_output_data = {365.1751,  -66.507866, 62.03681,  456.5464,
+                                            114.35466, -40.145596, 636.77466, 318.23895,
+                                            -318.0906, -69.253136};
+
+} // namespace mul_float_no_broadcasting
+
+class TestDataFloatMul : public TestDataMulBase<float>
+{
+public:
+  explicit TestDataFloatMul(bool is_with_broadcast) : TestDataMulBase<float>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = mul_float_with_broadcasting::input1_data;
+      _input2_data = mul_float_with_broadcasting::input2_data;
+      _reference_output_data = mul_float_with_broadcasting::reference_output_data;
+      _test_kernel_model_circle = mul_float_with_broadcasting::test_kernel_model_circle;
+    }
+    else
+    {
+      _input1_data = mul_float_no_broadcasting::input1_data;
+      _input2_data = mul_float_no_broadcasting::input2_data;
+      _reference_output_data = mul_float_no_broadcasting::reference_output_data;
+      _test_kernel_model_circle = mul_float_no_broadcasting::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataFloatMul() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/IntMulKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/IntMulKernel.h
new file mode 100644
index 000000000..7dc1c538b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/IntMulKernel.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_INT_H
+#define LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_INT_H
+
+#include "TestDataMulBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace mul_int_with_broadcasting
+{
+
+/*
+ * Mul Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 1)
+ *       \             /
+ *     Mul(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xb8, 0xfe, 0xff, 0xff, 0xbc, 0xfe, 0xff, 0xff, 0xc0, 0xfe, 0xff, 0xff, 0xc4, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff,
+  0x03, 0x00, 0x00, 0x00, 0x4d, 0x75, 0x6c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {-5, 5, 5, -4, -4, 15, -13, 23, 5, 5};
+const std::vector<int32_t> input2_data = {-2, 14};
+const std::vector<int32_t> reference_output_data = {10, -10, -10, 8, 8, 210, -182, 322, 70, 70};
+
+} // namespace mul_int_with_broadcasting
+
+namespace mul_int_no_broadcasting
+{
+/*
+ * Mul Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Mul(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xb8, 0xfe, 0xff, 0xff, 0xbc, 0xfe, 0xff, 0xff, 0xc0, 0xfe, 0xff, 0xff, 0xc4, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff,
+  0x03, 0x00, 0x00, 0x00, 0x4d, 0x75, 0x6c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+std::vector<int32_t> input1_data = {5, 3, -3, 5, 14, 14, -3, 33, -11, 24};
+std::vector<int32_t> input2_data = {5, -3, 5, 5, 25, 5, -4, -2, 5, 25};
+std::vector<int32_t> reference_output_data = {25, -9, -15, 25, 350, 70, 12, -66, -55, 600};
+
+} // namespace mul_int_no_broadcasting
+
+class TestDataIntMul : public TestDataMulBase<int32_t>
+{
+public:
+  explicit TestDataIntMul(bool is_with_broadcast) : TestDataMulBase<int32_t>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = mul_int_with_broadcasting::input1_data;
+      _input2_data = mul_int_with_broadcasting::input2_data;
+      _reference_output_data = mul_int_with_broadcasting::reference_output_data;
+      _test_kernel_model_circle = mul_int_with_broadcasting::test_kernel_model_circle;
+    }
+    else
+    {
+      _input1_data = mul_int_no_broadcasting::input1_data;
+      _input2_data = mul_int_no_broadcasting::input2_data;
+      _reference_output_data = mul_int_no_broadcasting::reference_output_data;
+      _test_kernel_model_circle = mul_int_no_broadcasting::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataIntMul() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_INT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/NegMulKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/NegMulKernel.h
new file mode 100644
index 000000000..cbbdbdd6e
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/NegMulKernel.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_MUL_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_MUL_KERNEL_H
+
+#include "TestDataMulBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace input_1_wrong_type
+{
+
+/*
+ * Mul Kernel with input type mismatch:
+ *
+ * Input_1(2, 5) - Int32   Input_2(2, 1) - Float
+ *       \             /
+ *     Mul(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xdc, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace input_1_wrong_type
+
+namespace input_2_wrong_type
+{
+
+/*
+ * Mul Kernel with input type mismatch:
+ *
+ * Input_1(2, 5)- Float   Input_2(2, 1) - Int32
+ *       \             /
+ *     Mul(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace input_2_wrong_type
+
+namespace unsupported_type
+{
+
+/*
+ * Mul Kernel with unsupported type:
+ *
+ * Input_1(2, 5)- Int16   Input_2(2, 1) - Int16
+ *       \             /
+ *     Mul(with broadcast)
+ *              |
+ *          Output(2, 5) - Int16
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x60, 0x01, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xd4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x12, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace unsupported_type
+
+class NegTestDataInput1WrongTypeMul : public NegTestDataBase
+{
+public:
+  NegTestDataInput1WrongTypeMul()
+  {
+    _test_kernel_model_circle = input_1_wrong_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInput1WrongTypeMul() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInput2WrongTypeMul : public NegTestDataBase
+{
+public:
+  NegTestDataInput2WrongTypeMul()
+  {
+    _test_kernel_model_circle = input_2_wrong_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInput2WrongTypeMul() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInt16TypeMul : public NegTestDataBase
+{
+public:
+  NegTestDataInt16TypeMul()
+  {
+    _test_kernel_model_circle = unsupported_type::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInt16TypeMul() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_MUL_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/TestDataMulBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/TestDataMulBase.h
new file mode 100644
index 000000000..1b64982eb
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/mul/TestDataMulBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataMulBase : public TestDataBase<T>
+{
+public:
+  explicit TestDataMulBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataMulBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_MUL_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/FloatNegKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/FloatNegKernel.h
new file mode 100644
index 000000000..3e3566648
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/FloatNegKernel.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_NEG_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_NEG_KERNEL_H
+
+#include "TestDataNegBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_float
+{
+/*
+ * Neg Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *           Neg
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x2a, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-2.3082886, 5.8998604, 7.297842,  30.999863,  15.692827,
+                                       -18.824865, 22.614136, 5.7466774, 6.65571,    -1.1786385,
+                                       3.8724442,  9.483013,  19.376131, -6.1562176, -5.4431114,
+                                       9.304043,   22.674402, -2.3587227};
+
+const std::vector<float> reference_output_data = {
+  2.3082886,  -5.8998604, -7.297842, -30.999863, -15.692827, 18.824865,
+  -22.614136, -5.7466774, -6.65571,  1.1786385,  -3.8724442, -9.483013,
+  -19.376131, 6.1562176,  5.4431114, -9.304043,  -22.674402, 2.3587227};
+
+} // namespace neg_float
+
+class TestDataFloatNeg : public TestDataNegBase<float>
+{
+public:
+  TestDataFloatNeg()
+  {
+    _input_data = neg_float::input_data;
+    _reference_output_data = neg_float::reference_output_data;
+    _test_kernel_model_circle = neg_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatNeg() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_NEG_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/NegNegKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/NegNegKernel.h
new file mode 100644
index 000000000..43f9c0996
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/NegNegKernel.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_NEG_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_NEG_KERNEL_H
+
+#include "TestDataNegBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_output_type_mismatch_neg_kernel
+{
+/*
+ * Negate Kernel with input output type mismatch (should be equal):
+ *
+ *      Input(1, 3, 3, 2) - Float
+ *            |
+ *           Negate
+ *            |
+ *      Output(1, 3, 3, 2) - Int
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x2a, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_input_output_type_mismatch_neg_kernel
+
+namespace neg_invalid_input_shape_neg_kernel
+{
+/*
+ * Nagate Kernel with invalid input shape rank=5 (should be == 4):
+ *
+ *      Input(1, 1, 8, 8, 1) = Float32
+ *            |
+ *           Negate
+ *            |
+ *      Output(1, 7, 7, 1) = Float32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x28, 0x01, 0x00, 0x00, 0x44, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x2a, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace neg_invalid_input_shape_neg_kernel
+
+class NegTestDataInputOutputTypeMismatchNegKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchNegKernel()
+  {
+    _test_kernel_model_circle = neg_input_output_type_mismatch_neg_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchNegKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidInputShapeNegKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidInputShapeNegKernel()
+  {
+    _test_kernel_model_circle = neg_invalid_input_shape_neg_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidInputShapeNegKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_NEG_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/TestDataNegBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/TestDataNegBase.h
new file mode 100644
index 000000000..fcb5704ab
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/neg/TestDataNegBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataNegBase : public TestDataBase<T>
+{
+public:
+  TestDataNegBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/notequal/FloatNotEqualKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/notequal/FloatNotEqualKernel.h
new file mode 100644
index 000000000..51fd85d75
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/notequal/FloatNotEqualKernel.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NOT_EQUAL_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_NOT_EQUAL_KERNEL_FLOAT_H
+
+#include "TestDataNotEqualBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace not_equal_float
+{
+
+/*
+ * NotEqual Kernel:
+ *
+ * Input_1(1, 4, 4, 3)   Input_2(1, 4, 4, 3)
+ *       \             /
+ *     NotEqual(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {
+  -0.01787583, -0.8314556, -0.47624078, -1.9747407,    -0.51676583, -0.20183715, -1.9358647,
+  0.7616414,   -0.0899037, 2.048367,    -1.3174965,    1.5267943,   0.68707687,  1.3464743,
+  0.98674047,  -1.4853697, 1.9973947,   0.5170953,     0.37471953,  -1.6011852,  0.32045737,
+  -0.6598305,  -1.7946662, 1.2349467,   1.3320708,     0.5151753,   1.345111,    -0.16560331,
+  0.82792366,  -1.734876,  0.043626763, -0.0118546495, 0.31535238,  0.1888555,   -0.32523626,
+  -0.997665,   0.5819472,  -2.3194845,  -1.6897905,    0.9981752,   -1.2897044,  0.75768864,
+  0.56781554,  -1.0565805, -1.4891449,  0.2493645,     -1.1312587,  0.6837854};
+
+const std::vector<float> input2_data = {
+  0.30809638,  -0.28285328, -0.8437058,  1.7689779,   0.5182942,    0.571205,    -0.89484423,
+  0.28100377,  0.5453497,   1.3848042,   -0.04359268, -1.7448778,   -0.5375435,  -0.85059136,
+  -0.77961826, -0.4916915,  1.3359088,   -0.09580261, 0.6158275,    -0.05056348, 0.90505254,
+  0.94226706,  1.136139,    -0.45077038, -0.5018571,  -1.1543767,   0.85094684,  -0.13731039,
+  -0.3298641,  0.9474698,   -0.48497504, -0.14864737, -0.009302358, -1.1259161,  0.44226727,
+  1.0149708,   0.36024934,  0.4969523,   0.45014778,  -0.34718898,  1.2260172,   0.35304692,
+  -1.3037513,  -0.2565706,  0.18085766,  -0.7099202,  -0.9203537,   -1.2257448};
+
+const std::vector<bool> reference_output_data = {
+  true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+  true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
+  true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true};
+
+} // namespace not_equal_float
+
+namespace neg_not_equal_float_with_no_broadcasting
+{
+
+/*
+ * NotEqual Kernel with input type mismatch:
+ *
+ * Input_1(1, 4, 4, 3)-float   Input_2(1, 4, 4, 3)-int
+ *       \             /
+ *     NotEqual(no broadcast)
+ *              |
+ *          Output(1, 4, 4, 3)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x48, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {};
+
+const std::vector<float> input2_data = {};
+
+const std::vector<bool> reference_output_data = {};
+
+} // namespace neg_not_equal_float_with_no_broadcasting
+
+class TestDataFloatNotEqual : public TestDataNotEqualBase<float, bool>
+{
+public:
+  explicit TestDataFloatNotEqual(bool is_with_broadcast, bool is_neg)
+    : TestDataNotEqualBase<float, bool>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      assert(false && "Not impl yet");
+    }
+    else
+    {
+      if (is_neg)
+      {
+        _input1_data = neg_not_equal_float_with_no_broadcasting::input1_data;
+        _input2_data = neg_not_equal_float_with_no_broadcasting::input2_data;
+        _reference_output_data = neg_not_equal_float_with_no_broadcasting::reference_output_data;
+        _test_kernel_model_circle =
+          neg_not_equal_float_with_no_broadcasting::test_kernel_model_circle;
+      }
+      else
+      {
+        _input1_data = not_equal_float::input1_data;
+        _input2_data = not_equal_float::input2_data;
+        _reference_output_data = not_equal_float::reference_output_data;
+        _test_kernel_model_circle = not_equal_float::test_kernel_model_circle;
+      }
+    }
+  }
+
+  ~TestDataFloatNotEqual() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NOT_EQUAL_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/notequal/TestDataNotEqualBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/notequal/TestDataNotEqualBase.h
new file mode 100644
index 000000000..786be2170
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/notequal/TestDataNotEqualBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NOT_EQUAL_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_NOT_EQUAL_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T, typename U> class TestDataNotEqualBase : public TestDataBase<T, U>
+{
+public:
+  explicit TestDataNotEqualBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataNotEqualBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<U> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<U> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NOT_EQUAL_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pack/PackKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pack/PackKernel.h
new file mode 100644
index 000000000..22d9f666d
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pack/PackKernel.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_PACK_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_PACK_KERNEL_H
+
+#include "TestDataPackBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace pack_float
+{
+/*
+ * Pack Kernel:
+ *
+ * Input(2, 4, 3)   Input(2, 4, 3)
+ *        \             /
+ *          \         /
+ *              Pack
+ *                |
+ *          Output(2, 2, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+  0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x73, 0x74, 0x61, 0x63,
+  0x6b, 0x5f, 0x34, 0x64, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x31, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x53,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data_1 = {
+  -0.17432976, -10.103649, 4.2064724,   -7.185501, 7.6475716, -34.405083, 14.065273,  -17.566177,
+  16.921495,   -8.886711,  16.913736,   -8.991537, 18.480549, 17.71526,   -3.8370514, 16.570705,
+  -14.831467,  17.709942,  0.026670456, -6.250948, 10.977406, 22.907639,  -37.32604,  -1.3433037};
+
+const std::vector<float> input_data_2 = {
+  -22.672482, 10.947399, -9.828194, -3.2829914, 14.490927,  24.998316,  33.86125,   -17.046562,
+  7.1629715,  21.064964, 5.813303,  -16.67994,  -22.828697, -7.9325237, -23.776447, -17.539246,
+  -3.8784523, 14.898129, 27.151598, -3.9495945, 21.426613,  -8.786135,  0.22362137, -7.534506};
+
+const std::vector<float> reference_output_data = {
+  -1.7432976e-01, -1.0103649e+01, 4.2064724e+00,  -7.1855011e+00, 7.6475716e+00,  -3.4405083e+01,
+  1.4065273e+01,  -1.7566177e+01, 1.6921495e+01,  -8.8867111e+00, 1.6913736e+01,  -8.9915371e+00,
+  -2.2672482e+01, 1.0947399e+01,  -9.8281937e+00, -3.2829914e+00, 1.4490927e+01,  2.4998316e+01,
+  3.3861252e+01,  -1.7046562e+01, 7.1629715e+00,  2.1064964e+01,  5.8133030e+00,  -1.6679939e+01,
+  1.8480549e+01,  1.7715260e+01,  -3.8370514e+00, 1.6570705e+01,  -1.4831467e+01, 1.7709942e+01,
+  2.6670456e-02,  -6.2509480e+00, 1.0977406e+01,  2.2907639e+01,  -3.7326038e+01, -1.3433037e+00,
+  -2.2828697e+01, -7.9325237e+00, -2.3776447e+01, -1.7539246e+01, -3.8784523e+00, 1.4898129e+01,
+  2.7151598e+01,  -3.9495945e+00, 2.1426613e+01,  -8.7861347e+00, 2.2362137e-01,  -7.5345058e+00};
+} // namespace pack_float
+
+namespace pack_int
+{
+/*
+ * Pack Kernel:
+ *
+ * Input(2, 8)        Input(2, 8)
+ *        \             /
+ *          \         /
+ *              Pack
+ *                |
+ *          Output(2, 2, 8)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xb8, 0x00, 0x00, 0x00, 0x24, 0x02, 0x00, 0x00, 0x40, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x06, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x6a, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa4, 0xfe, 0xff, 0xff, 0xa8, 0xfe, 0xff, 0xff,
+  0xac, 0xfe, 0xff, 0xff, 0xb0, 0xfe, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x3b, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
+  0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x14, 0x00, 0x00, 0x00,
+  0x7c, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00,
+  0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x53, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x53, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input_data_1 = {-5, 5, -5, -5, 15, 13, -4, 14,
+                                           5,  5, 5,  5,  12, 21, -5, 6};
+const std::vector<int32_t> input_data_2 = {4, 5, 5, 5, 5, 5, 15, 4, -3, 11, 15, 32, -13, 13, -3, 5};
+
+const std::vector<int32_t> reference_output_data = {-5, 5,  -5, -5, 15, 13, -4,  14, 5,  5, 5,
+                                                    5,  12, 21, -5, 6,  4,  5,   5,  5,  5, 5,
+                                                    15, 4,  -3, 11, 15, 32, -13, 13, -3, 5};
+
+} // namespace pack_int
+
+namespace pack_quant_u8
+{
+/*
+ * Pack Kernel:
+ *
+ * Input(2, 4, 3)        Input(2, 4, 3)
+ *        \             /
+ *          \         /
+ *              Pack
+ *                |
+ *          Output(2, 2, 4, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x54, 0x02, 0x00, 0x00, 0x70, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
+  0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
+  0x80, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x50, 0x00, 0x00, 0x00,
+  0x0c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x73, 0x74, 0x61, 0x63, 0x6b, 0x5f, 0x34, 0x64, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x48, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x07, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x31, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x69, 0x6e, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x53, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x53, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<uint8_t> input_data_1 = {5, 243, 251, 5, 6, 7,   13,  23,  23, 5, 13, 5,
+                                           7, 244, 13,  5, 5, 244, 252, 253, 5,  5, 5,  5};
+
+const std::vector<uint8_t> input_data_2 = {15, 30,  252, 7, 252, 40,  245, 13,  13, 14, 21, 5,
+                                           5,  245, 251, 5, 251, 223, 5,   251, 22, 15, 15, 15};
+
+const std::vector<uint8_t> reference_output_data = {
+  5,   243, 251, 5,  6,  7,   13,  23, 23,  5,   13, 5,   15, 30,  252, 7,
+  252, 40,  245, 13, 13, 14,  21,  5,  7,   244, 13, 5,   5,  244, 252, 253,
+  5,   5,   5,   5,  5,  245, 251, 5,  251, 223, 5,  251, 22, 15,  15,  15};
+
+} // namespace pack_quant_u8
+
+class TestDataFloatPack : public TestDataPackBase<float>
+{
+public:
+  TestDataFloatPack()
+  {
+    _input_data_1 = pack_float::input_data_1;
+    _input_data_2 = pack_float::input_data_2;
+    _reference_output_data = pack_float::reference_output_data;
+    _test_kernel_model_circle = pack_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatPack() override = default;
+};
+
+class TestDataIntPack : public TestDataPackBase<int32_t>
+{
+public:
+  TestDataIntPack()
+  {
+    _input_data_1 = pack_int::input_data_1;
+    _input_data_2 = pack_int::input_data_2;
+    _reference_output_data = pack_int::reference_output_data;
+    _test_kernel_model_circle = pack_int::test_kernel_model_circle;
+  }
+
+  ~TestDataIntPack() override = default;
+};
+
+class TestDataQuantU8Pack : public TestDataPackBase<uint8_t>
+{
+public:
+  TestDataQuantU8Pack()
+  {
+    _input_data_1 = pack_quant_u8::input_data_1;
+    _input_data_2 = pack_quant_u8::input_data_2;
+    _reference_output_data = pack_quant_u8::reference_output_data;
+    _test_kernel_model_circle = pack_quant_u8::test_kernel_model_circle;
+  }
+
+  ~TestDataQuantU8Pack() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_PACK_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pack/TestDataPackBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pack/TestDataPackBase.h
new file mode 100644
index 000000000..b587df311
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pack/TestDataPackBase.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_PACK_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_PACK_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataPackBase : public TestDataBase<T>
+{
+public:
+  TestDataPackBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data_1;
+      case 1:
+        return _input_data_2;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data_1;
+  std::vector<T> _input_data_2;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_PACK_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/FloatPadKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/FloatPadKernel.h
new file mode 100644
index 000000000..ddfd3cf98
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/FloatPadKernel.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_PAD_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_PAD_KERNEL_H
+
+#include "TestDataPadBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace pad_float
+{
+/*
+ * Pad Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *           Pad
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0xb8, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x07, 0x00, 0x00, 0x00, 0x70, 0x61, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  -0.7942257,  -1.3318212,  -0.7918672, -1.0024637,  -0.23364098, 0.49224994,
+  -0.23747201, -0.14768714, 1.4870708,  -0.79761434, -0.27848604, 1.1856802,
+  1.1039438,   -0.34465268, -1.5857629, 3.0654314,   0.13304773,  0.067413524};
+
+const std::vector<float> reference_output_data = {
+  0.0,         0.0,         0.0,         0.0, 0.0,         0.0,         0.0,
+  0.0,         0.0,         0.0,         0.0, 0.0,         0.0,         0.0,
+  0.0,         0.0,         0.0,         0.0, -0.7942257,  -1.3318212,  -0.7918672,
+  -1.0024637,  -0.23364098, 0.49224994,  0.0, 0.0,         0.0,         0.0,
+  0.0,         0.0,         0.0,         0.0, -0.23747201, -0.14768714, 1.4870708,
+  -0.79761434, -0.27848604, 1.1856802,   0.0, 0.0,         0.0,         0.0,
+  0.0,         0.0,         0.0,         0.0, 1.1039438,   -0.34465268, -1.5857629,
+  3.0654314,   0.13304773,  0.067413524, 0.0, 0.0,         0.0,         0.0,
+  0.0,         0.0,         0.0,         0.0, 0.0,         0.0,         0.0,
+  0.0,         0.0,         0.0,         0.0, 0.0,         0.0,         0.0};
+
+} // namespace pad_float
+
+class TestDataFloatPad : public TestDataPadBase<float>
+{
+public:
+  TestDataFloatPad()
+  {
+    _input_data = pad_float::input_data;
+    _reference_output_data = pad_float::reference_output_data;
+    _test_kernel_model_circle = pad_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatPad() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_PAD_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/NegPadKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/NegPadKernel.h
new file mode 100644
index 000000000..15936e449
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/NegPadKernel.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_PAD_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_PAD_KERNEL_H
+
+#include "TestDataPadBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_pad_input_output_type_mismatch
+{
+/*
+ * Pad Kernel with input output type mismatch (should be equal):
+ *
+ *      Input(1, 3, 3, 2) - Float
+ *            |
+ *           Pad
+ *            |
+ *      Output(1, 3, 3, 2) - Int
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, 0xbc, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x70, 0x61, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x22, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_pad_input_output_type_mismatch
+
+class NegTestDataInputOutputTypeMismatchPadKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchPadKernel()
+  {
+    _test_kernel_model_circle = neg_pad_input_output_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchPadKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_PAD_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/TestDataPadBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/TestDataPadBase.h
new file mode 100644
index 000000000..e5c72def0
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad/TestDataPadBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_PAD_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_PAD_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataPadBase : public TestDataBase<T>
+{
+public:
+  TestDataPadBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_PAD_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/FloatPadV2Kernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/FloatPadV2Kernel.h
new file mode 100644
index 000000000..0aa00f884
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/FloatPadV2Kernel.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_PADV2_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_PADV2_KERNEL_H
+
+#include "TestDataPadV2Base.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace padV2_float
+{
+/*
+ * PadV2 Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *           PadV2
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xee, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x70, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+  0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x70, 0x61, 0x64, 0x64,
+  0x69, 0x6e, 0x67, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  -0.7942257,  -1.3318212,  -0.7918672, -1.0024637,  -0.23364098, 0.49224994,
+  -0.23747201, -0.14768714, 1.4870708,  -0.79761434, -0.27848604, 1.1856802,
+  1.1039438,   -0.34465268, -1.5857629, 3.0654314,   0.13304773,  0.067413524};
+
+const std::vector<float> reference_output_data = {
+  1.0,         1.0,         1.0,         1.0, 1.0,         1.0,         1.0,
+  1.0,         1.0,         1.0,         1.0, 1.0,         1.0,         1.0,
+  1.0,         1.0,         1.0,         1.0, -0.7942257,  -1.3318212,  -0.7918672,
+  -1.0024637,  -0.23364098, 0.49224994,  1.0, 1.0,         1.0,         1.0,
+  1.0,         1.0,         1.0,         1.0, -0.23747201, -0.14768714, 1.4870708,
+  -0.79761434, -0.27848604, 1.1856802,   1.0, 1.0,         1.0,         1.0,
+  1.0,         1.0,         1.0,         1.0, 1.1039438,   -0.34465268, -1.5857629,
+  3.0654314,   0.13304773,  0.067413524, 1.0, 1.0,         1.0,         1.0,
+  1.0,         1.0,         1.0,         1.0, 1.0,         1.0,         1.0,
+  1.0,         1.0,         1.0,         1.0, 1.0,         1.0,         1.0};
+
+} // namespace padV2_float
+
+class TestDataFloatPadV2 : public TestDataPadV2Base<float>
+{
+public:
+  TestDataFloatPadV2()
+  {
+    _input_data = padV2_float::input_data;
+    _reference_output_data = padV2_float::reference_output_data;
+    _test_kernel_model_circle = padV2_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatPadV2() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_PADV2_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/NegPadV2Kernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/NegPadV2Kernel.h
new file mode 100644
index 000000000..2591a6016
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/NegPadV2Kernel.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_PADV2_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_PADV2_KERNEL_H
+
+#include "TestDataPadV2Base.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_padV2_input_output_type_mismatch
+{
+/*
+ * PadV2 Kernel with input output type mismatch (should be equal):
+ *
+ *      Input(1, 3, 3, 2) - Float
+ *            |
+ *           PadV2
+ *            |
+ *      Output(1, 3, 3, 2) - Int
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0xe8, 0x01, 0x00, 0x00, 0x04, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xee, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xb0, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x9c, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x0f, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c,
+  0x75, 0x65, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x70, 0x61, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_padV2_input_output_type_mismatch
+
+class NegTestDataInputOutputTypeMismatchPadV2Kernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchPadV2Kernel()
+  {
+    _test_kernel_model_circle = neg_padV2_input_output_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchPadV2Kernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_PADV2_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/TestDataPadV2Base.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/TestDataPadV2Base.h
new file mode 100644
index 000000000..b94eb3cfa
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/pad_v2/TestDataPadV2Base.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_PADV2_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_PADV2_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataPadV2Base : public TestDataBase<T>
+{
+public:
+  TestDataPadV2Base() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_PADV2_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/NegReduceProdKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/NegReduceProdKernel.h
new file mode 100644
index 000000000..73d996b60
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/NegReduceProdKernel.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_REDUCE_PROD_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_REDUCE_PROD_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_wrong_type_mismatch_reduce_prod_kernel
+{
+/*
+ * ReduceProd Kernel with wrong input type:
+ *
+ * Input(5, 5) - Int16   Axis(1)
+ *       \                /
+ *      ReduceProd(keep_dims=false)
+ *          |
+ *      Output(5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+  0x72, 0x65, 0x64, 0x75, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65,
+  0x73, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x51, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_wrong_type_mismatch_reduce_prod_kernel
+
+namespace neg_axis_wrong_type_mismatch_reduce_prod_kernel
+{
+/*
+ * ReduceProd Kernel with wrong axis type:
+ *
+ * Input(5, 5)         Axis(1) - Float32
+ *       \                /
+ *      ReduceProd(keep_dims=false)
+ *          |
+ *      Output(5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x6c, 0x01, 0x00, 0x00, 0x88, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xb4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+  0x72, 0x65, 0x64, 0x75, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65,
+  0x73, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_axis_wrong_type_mismatch_reduce_prod_kernel
+
+class NegTestDataWrongInputTypeReduceProdKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongInputTypeReduceProdKernel()
+  {
+    _test_kernel_model_circle =
+      neg_input_wrong_type_mismatch_reduce_prod_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongInputTypeReduceProdKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataWrongAxisTypeReduceProdKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongAxisTypeReduceProdKernel()
+  {
+    _test_kernel_model_circle =
+      neg_axis_wrong_type_mismatch_reduce_prod_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongAxisTypeReduceProdKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_REDUCE_PROD_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/ReduceProdKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/ReduceProdKernel.h
new file mode 100644
index 000000000..be0a19ab5
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/ReduceProdKernel.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_REDUCE_PROD_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_REDUCE_PROD_KERNEL_H
+
+#include "TestDataReduceCommonBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace reduce_prod_float
+{
+/*
+ * ReduceProd Kernel:
+ *
+ * Input(5, 5)   Axis(1)
+ *       \       /
+ *      ReduceProd(keep_dims=false)
+ *          |
+ *      Output(5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xd0, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x3c, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xbc, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x76, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0xda, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0xfe, 0xff, 0xff, 0xcc, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x92, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x50, 0x72, 0x6f, 0x64,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x14, 0x00, 0x00, 0x00, 0xc4, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x6e, 0x73,
+  0x74, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63,
+  0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x51, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {11.411349,  -16.127048, 2.1805973,  2.4134026,   -24.384453,
+                                       7.066084,   -2.4375877, -6.3261166, 12.296496,   -5.2269707,
+                                       -1.1958504, 39.85154,   11.011908,  -15.1922455, -5.623905,
+                                       12.9133,    17.127638,  -5.8921337, 32.048306,   -1.0499363,
+                                       3.921646,   -0.9553833, 0.16646576, -19.362396,  2.1621552};
+
+const std::vector<float> reference_output_data = {-4.8831299e+03, -2.5635121e+04, 1.4899535e+02,
+                                                  2.7976750e+05, 1.6272373e+03};
+} // namespace reduce_prod_float
+
+namespace reduce_prod_int
+{
+/*
+ * ReduceProd Kernel:
+ *
+ * Input(5, 5)   Axis(1)
+ *       \       /
+ *      ReduceProd(keep_dims=false)
+ *          |
+ *      Output(5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xd0, 0x00, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xbc, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x76, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0xda, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0xfe, 0xff, 0xff, 0xd4, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x40, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x9e, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x14, 0x00, 0x00, 0x00, 0x90, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x50, 0x72, 0x6f, 0x64, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xce, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x14, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00,
+  0x43, 0x6f, 0x6e, 0x73, 0x74, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input_data = {-2, -3, -5, 15, -11, 5, 5,   5,  7,  15, -12, 5, 5,
+                                         -3, 15, 22, -5, 24,  5, -13, -6, -4, -5, -3,  24};
+
+const std::vector<int32_t> reference_output_data = {-15840, -1500, 15000, 4725, 772200};
+} // namespace reduce_prod_int
+
+class TestDataFloatReduceProd : public TestDataReduceCommonBase<float>
+{
+public:
+  TestDataFloatReduceProd()
+  {
+    _input_data = reduce_prod_float::input_data;
+    _reference_output_data = reduce_prod_float::reference_output_data;
+    _test_kernel_model_circle = reduce_prod_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatReduceProd() override = default;
+};
+
+class TestDataIntReduceProd : public TestDataReduceCommonBase<int32_t>
+{
+public:
+  TestDataIntReduceProd()
+  {
+    _input_data = reduce_prod_int::input_data;
+    _reference_output_data = reduce_prod_int::reference_output_data;
+    _test_kernel_model_circle = reduce_prod_int::test_kernel_model_circle;
+  }
+
+  ~TestDataIntReduceProd() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_REDUCE_PROD_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/TestDataReduceCommonBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/TestDataReduceCommonBase.h
new file mode 100644
index 000000000..2cdddd735
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reduce_common/TestDataReduceCommonBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_REDUCE_COMMON_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_REDUCE_COMMON_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataReduceCommonBase : public TestDataBase<T>
+{
+public:
+  TestDataReduceCommonBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_REDUCE_COMMON_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/FloatReLUKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/FloatReLUKernel.h
new file mode 100644
index 000000000..39cbdab74
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/FloatReLUKernel.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_RELU_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_RELU_KERNEL_H
+
+#include "TestDataReLUBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace relu_float
+{
+/*
+ * ReLU Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *           ReLU
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x14, 0x01, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {4.5651245, -1.783557,  10.147356, 1.359064,  19.900585,
+                                       31.432447, 3.4538271,  -3.425167, 11.351466, -2.519806,
+                                       -2.702178, -15.201234, 15.547801, 10.433272, 4.301023,
+                                       5.4106083, 0.14018308, -16.32785};
+const std::vector<float> reference_output_data = {
+  4.5651245, 0.0, 10.147356, 1.359064,  19.900585, 31.432447, 3.4538271, 0.0,        11.351466,
+  0.0,       0.0, 0.0,       15.547801, 10.433272, 4.301023,  5.4106083, 0.14018308, 0.0};
+
+} // namespace relu_float
+
+class TestDataFloatReLU : public TestDataReLUBase<float>
+{
+public:
+  TestDataFloatReLU()
+  {
+    _input_data = relu_float::input_data;
+    _reference_output_data = relu_float::reference_output_data;
+    _test_kernel_model_circle = relu_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatReLU() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_RELU_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/NegReLUKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/NegReLUKernel.h
new file mode 100644
index 000000000..54d1cb108
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/NegReLUKernel.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_RELU_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_RELU_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_output_type_mismatch_kernel
+{
+/*
+ * ReLU Kernel with input output type mismatch:
+ *
+ *      Input(1, 3, 3, 2) - Float32
+ *            |
+ *          ReLU
+ *            |
+ *      Output(1, 3, 3, 2) - Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_output_type_mismatch_kernel
+
+class NegTestDataInputOutputTypeMismatchReLUKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchReLUKernel()
+  {
+    _test_kernel_model_circle = neg_input_output_type_mismatch_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchReLUKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_RELU_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/TestDataReLUBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/TestDataReLUBase.h
new file mode 100644
index 000000000..5e76fedf6
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu/TestDataReLUBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_RELU_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_RELU_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataReLUBase : public TestDataBase<T>
+{
+public:
+  TestDataReLUBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_RELU_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/FloatReLU6Kernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/FloatReLU6Kernel.h
new file mode 100644
index 000000000..7be41c12c
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/FloatReLU6Kernel.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_RELU6_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_RELU6_KERNEL_H
+
+#include "TestDataReLU6Base.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace relu6_float
+{
+/*
+ * ReLU6 Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *          ReLU6
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x14, 0x01, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {4.2436867,  11.136094, -10.418385, 9.319618,  -4.471156,
+                                       -20.418179, 18.783192, 7.1997013,  -9.659637, 6.2115526,
+                                       -4.2794833, 10.500693, 8.646875,   3.8725555, -21.104343,
+                                       -7.6522045, 1.0404004, 7.109288};
+const std::vector<float> reference_output_data = {4.2436867, 6.0,       0.0, 6.0, 0.0,       0.0,
+                                                  6.0,       6.0,       0.0, 6.0, 0.0,       6.0,
+                                                  6.0,       3.8725555, 0.0, 0.0, 1.0404004, 6.0};
+
+} // namespace relu6_float
+
+class TestDataFloatReLU6 : public TestDataReLU6Base<float>
+{
+public:
+  TestDataFloatReLU6()
+  {
+    _input_data = relu6_float::input_data;
+    _reference_output_data = relu6_float::reference_output_data;
+    _test_kernel_model_circle = relu6_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatReLU6() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_RELU6_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/NegReLU6Kernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/NegReLU6Kernel.h
new file mode 100644
index 000000000..e402d2759
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/NegReLU6Kernel.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_RELU6_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_RELU6_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_output_type_mismatch_kernel
+{
+/*
+ * ReLU6 Kernel with input output type mismatch:
+ *
+ *      Input(1, 3, 3, 2) - Float32
+ *            |
+ *          ReLU6
+ *            |
+ *      Output(1, 3, 3, 2) - Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_output_type_mismatch_kernel
+
+class NegTestDataInputOutputTypeMismatchReLU6Kernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchReLU6Kernel()
+  {
+    _test_kernel_model_circle = neg_input_output_type_mismatch_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchReLU6Kernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_LAEKY_RELU6_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/TestDataReLU6Base.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/TestDataReLU6Base.h
new file mode 100644
index 000000000..8e598328d
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/relu6/TestDataReLU6Base.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_RELU6_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_RELU6_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataReLU6Base : public TestDataBase<T>
+{
+public:
+  TestDataReLU6Base() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_RELU6_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reshape/ReshapeKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reshape/ReshapeKernel.h
new file mode 100644
index 000000000..60a42ff9f
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/reshape/ReshapeKernel.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_RESHAPE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_RESHAPE_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace neg_reshape_kernel
+{
+/*
+ * Reshape Kernel with not const shape params
+ *
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
+  0x80, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11,
+  0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa8, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x73, 0x68, 0x61, 0x70, 0x65, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-9.297554,  6.094736,   13.846724, -30.348026, 12.606297,
+                                       -25.089138, -18.258347, -8.119066, 0.24100876, 6.95887};
+
+const std::vector<float> reference_output_data = {-9.297554,  6.094736,   13.846724,  -30.348026,
+                                                  12.606297,  -25.089138, -18.258347, -8.119066,
+                                                  0.24100876, 6.95887};
+
+} // namespace neg_reshape_kernel
+
+namespace reshape_kernel
+{
+/*
+ * Reshape Kernel:
+ *
+ *   Input(1, 1, 1, 10)   Const([-1, 10])
+ *          \           /
+ *             Reshape
+ *               |
+ *          Output(1, 10)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x8c, 0x01, 0x00, 0x00, 0xa8, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x72, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+  0x0a, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11,
+  0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x73, 0x68, 0x61, 0x70, 0x65, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-9.297554,  6.094736,   13.846724, -30.348026, 12.606297,
+                                       -25.089138, -18.258347, -8.119066, 0.24100876, 6.95887};
+
+const std::vector<float> reference_output_data = {-9.297554,  6.094736,   13.846724,  -30.348026,
+                                                  12.606297,  -25.089138, -18.258347, -8.119066,
+                                                  0.24100876, 6.95887};
+
+} // namespace reshape_kernel
+
+template <typename T> class TestDataReshapeKernel : public TestDataBase<T>
+{
+public:
+  TestDataReshapeKernel(bool is_neg)
+  {
+    if (not is_neg)
+    {
+      _input_data = reshape_kernel::input_data;
+      _reference_output_data = reshape_kernel::reference_output_data;
+      _test_kernel_model_circle = reshape_kernel::test_kernel_model_circle;
+    }
+    else
+    {
+      _input_data = neg_reshape_kernel::input_data;
+      _reference_output_data = neg_reshape_kernel::reference_output_data;
+      _test_kernel_model_circle = neg_reshape_kernel::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataReshapeKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_RESHAPE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/FloatResizeBilinearKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/FloatResizeBilinearKernel.h
new file mode 100644
index 000000000..0ada18eae
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/FloatResizeBilinearKernel.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_RESIZE_BILINEAR_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_RESIZE_BILINEAR_KERNEL_H
+
+#include "TestDataResizeBilinearBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace resize_bilinear_float
+{
+/*
+ * ResizeBilinear Kernel:
+ *
+ *      align_corners = false; half_pixel_centers = false;
+ *
+ *                       Input(2, 2, 2, 1) FLOAT
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) FLOAT
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, 0xa0, 0x01,
+  0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00,
+  0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff,
+  0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00,
+  0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
+  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x00, 0x00, 0xa0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00,
+  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00,
+  0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x73, 0x69,
+  0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00,
+  0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e,
+  0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00
+
+};
+
+const std::vector<float> input_data = {
+  3,  6,  //
+  9,  12, //
+  4,  10, //
+  10, 16  //
+};
+
+const std::vector<float> reference_output_data = {
+  3,  5,  6,  //
+  7,  9,  10, //
+  9,  11, 12, //
+  4,  8,  10, //
+  8,  12, 14, //
+  10, 14, 16, //
+};
+
+} // namespace resize_bilinear_float
+
+namespace resize_bilinear_float_half_pixel_centers
+{
+/*
+ * ResizeBilinear Kernel:
+ *
+ *        align_corners = false; half_pixel_centers = true;
+ *
+ *                       Input(2, 2, 2, 1) FLOAT
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) FLOAT
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  1, 2, //
+  3, 4, //
+  1, 2, //
+  3, 4  //
+};
+
+const std::vector<float> reference_output_data = {
+  1, 1.5, 2, //
+  2, 2.5, 3, //
+  3, 3.5, 4, //
+  1, 1.5, 2, //
+  2, 2.5, 3, //
+  3, 3.5, 4, //
+};
+
+} // namespace resize_bilinear_float_half_pixel_centers
+
+class TestDataFloatResizeBilinear : public TestDataResizeBilinearBase<float>
+{
+public:
+  TestDataFloatResizeBilinear(bool half_pixel_centers)
+  {
+    if (!half_pixel_centers)
+    {
+      _input_data = resize_bilinear_float::input_data;
+      _reference_output_data = resize_bilinear_float::reference_output_data;
+      _test_kernel_model_circle = resize_bilinear_float::test_kernel_model_circle;
+    }
+    else
+    {
+      _input_data = resize_bilinear_float_half_pixel_centers::input_data;
+      _reference_output_data = resize_bilinear_float_half_pixel_centers::reference_output_data;
+      _test_kernel_model_circle =
+        resize_bilinear_float_half_pixel_centers::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataFloatResizeBilinear() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_RESIZE_BILINEAR_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/NegResizeBilinearKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/NegResizeBilinearKernel.h
new file mode 100644
index 000000000..70133dcdb
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/NegResizeBilinearKernel.h
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_RESIZE_BILINEAR_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_RESIZE_BILINEAR_KERNEL_H
+
+#include "TestDataResizeBilinearBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_invalid_input_shape_float_resize_bilinear_kernel
+{
+/*
+ * ResizeBilinear Kernel (invalid_input_shape, dimensions should be 4):
+ *
+ *      align_corners = false; half_pixel_centers = true;
+ *
+ *                       Input(2, 2, 2) FLOAT
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) FLOAT
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0xac, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x17, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace neg_invalid_input_shape_float_resize_bilinear_kernel
+
+namespace neg_invalid_param_float_resize_bilinear_kernel
+{
+/*
+ * ResizeBilinear Kernel (invalid params: should be only param true "align_corners" or
+ * "half_pixel_centers" ):
+ *
+ *      align_corners = true; half_pixel_centers = true;
+ *
+ *                       Input(2, 2, 2, 1) FLOAT
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) FLOAT
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00,
+  0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0x04,
+  0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00,
+  0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00,
+  0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00,
+  0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x70, 0x00,
+  0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x0f, 0x18, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x06, 0x00, 0x0c,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
+  0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00,
+  0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0,
+  0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
+  0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00,
+  0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10,
+  0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
+  0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00,
+  0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10,
+  0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00,
+  0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x17,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00,
+  0x00
+
+};
+
+} // namespace  neg_invalid_param_float_resize_bilinear_kernel
+
+namespace neg_invalid_size_shape_dimensions_float_resize_bilinear_kernel
+{
+/*
+ * ResizeBilinear Kernel (invalid dimensions of the size shape  ):
+ *
+ *      align_corners = false; half_pixel_centers = false;
+ *
+ *                       Input(2, 2, 2, 1) FLOAT
+ *                               |
+ *                               |   Constant Input(1) [3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) FLOAT
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
+  0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x17, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00
+
+};
+
+} // namespace  neg_invalid_size_shape_dimensions_float_resize_bilinear_kernel
+
+namespace neg_invalid_input_shape_uint8_resize_bilinear_kernel
+{
+/*
+ * ResizeBilinear Kernel (invalid_input_shape, dimensions should be 4):
+ *
+ *      align_corners = false; half_pixel_centers = true;
+ *
+ *                       Input(2, 2, 2) UINT8
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) UINT8
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x3c, 0x02, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x5a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x44, 0x00, 0x00, 0x00, 0x4c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_invalid_input_shape_uint8_resize_bilinear_kernel
+
+namespace neg_invalid_param_uint8_resize_bilinear_kernel
+{
+/*
+ * ResizeBilinear Kernel (invalid params: should be only param true "align_corners" or
+ * "half_pixel_centers" ):
+ *
+ *      align_corners = true; half_pixel_centers = true;
+ *
+ *                       Input(2, 2, 2, 1) UINT8
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) UINT8
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e,
+  0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00,
+  0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x3c,
+  0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04,
+  0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00,
+  0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94,
+  0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00,
+  0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00,
+  0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x0f, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02,
+  0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x03, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x00, 0x00, 0x5a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x02,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x44, 0x00, 0x00, 0x00, 0x4c, 0xff, 0xff, 0xff,
+  0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00,
+  0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00,
+  0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00,
+  0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d,
+  0x31, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00,
+  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x17, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65,
+  0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00
+
+};
+} // namespace neg_invalid_param_uint8_resize_bilinear_kernel
+
+namespace neg_invalid_size_shape_dimensions_uint8_resize_bilinear_kernel
+{
+/*
+ * ResizeBilinear Kernel (invalid dimensions of the size shape  ):
+ *
+ *      align_corners = false; half_pixel_centers = false;
+ *
+ *                       Input(2, 2, 2, 1) UINT8
+ *                               |
+ *                               |   Constant Input(1) [3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) UINT8
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x44, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
+  0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x17, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00
+
+};
+
+} // namespace neg_invalid_size_shape_dimensions_uint8_resize_bilinear_kernel
+
+class NegTestDataInvalidInputShapeFloatResizeBilinearKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidInputShapeFloatResizeBilinearKernel()
+  {
+    _test_kernel_model_circle =
+      neg_invalid_input_shape_float_resize_bilinear_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidInputShapeFloatResizeBilinearKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidParamFloatResizeBilinearKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidParamFloatResizeBilinearKernel()
+  {
+    _test_kernel_model_circle =
+      neg_invalid_param_float_resize_bilinear_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidParamFloatResizeBilinearKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidSizeShapeDimensionsFloatResizeBilinearKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidSizeShapeDimensionsFloatResizeBilinearKernel()
+  {
+    _test_kernel_model_circle =
+      neg_invalid_size_shape_dimensions_float_resize_bilinear_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidSizeShapeDimensionsFloatResizeBilinearKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidInputShapeUint8ResizeBilinearKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidInputShapeUint8ResizeBilinearKernel()
+  {
+    _test_kernel_model_circle =
+      neg_invalid_input_shape_uint8_resize_bilinear_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidInputShapeUint8ResizeBilinearKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidParamUint8ResizeBilinearKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidParamUint8ResizeBilinearKernel()
+  {
+    _test_kernel_model_circle =
+      neg_invalid_param_uint8_resize_bilinear_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidParamUint8ResizeBilinearKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInvalidSizeShapeDimensionsUint8ResizeBilinearKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInvalidSizeShapeDimensionsUint8ResizeBilinearKernel()
+  {
+    _test_kernel_model_circle =
+      neg_invalid_size_shape_dimensions_uint8_resize_bilinear_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInvalidSizeShapeDimensionsUint8ResizeBilinearKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_RESIZE_BILINEAR_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/TestDataResizeBilinearBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/TestDataResizeBilinearBase.h
new file mode 100644
index 000000000..218085111
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/TestDataResizeBilinearBase.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_RESIZE_BILINEAR_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_RESIZE_BILINEAR_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataResizeBilinearBase : public TestDataBase<T>
+{
+public:
+  TestDataResizeBilinearBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_RESIZE_BILINEAR_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/U8ResizeBilinearKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/U8ResizeBilinearKernel.h
new file mode 100644
index 000000000..7c422b1e4
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/resize_bilinear/U8ResizeBilinearKernel.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_UINT8_RESIZE_BILINEAR_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_UINT8_RESIZE_BILINEAR_KERNEL_H
+
+#include "TestDataResizeBilinearBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace resize_bilinear_uint8
+{
+/*
+ * ResizeBilinear Kernel:
+ *
+ *        align_corners = false; half_pixel_centers = false;
+ *
+ *                       Input(2, 2, 2, 1) UINT8
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) UINT8
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x5a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x48, 0x00, 0x00, 0x00, 0x4c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00
+
+};
+
+const std::vector<uint8_t> input_data = {
+  3,  6,  //
+  9,  12, //
+  4,  10, //
+  10, 16  //
+};
+
+const std::vector<uint8_t> reference_output_data = {
+  3,  5,  6,  //
+  7,  9,  10, //
+  9,  11, 12, //
+  4,  8,  10, //
+  8,  12, 14, //
+  10, 14, 16, //
+};
+
+} // namespace resize_bilinear_uint8
+
+namespace resize_bilinear_uint8_half_pixel_centers
+{
+/*
+ * ResizeBilinear Kernel:
+ *
+ *        align_corners = false; half_pixel_centers = true;
+ *
+ *                       Input(2, 2, 2, 1) UINT8
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) UINT8
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x28, 0x02, 0x00, 0x00, 0x44, 0x02, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x18, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x07, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x5a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x48, 0x00, 0x00, 0x00, 0x4c, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x54, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00
+
+};
+
+const std::vector<uint8_t> input_data = {
+  1, 2, //
+  3, 4, //
+  1, 2, //
+  3, 4  //
+};
+
+const std::vector<uint8_t> reference_output_data = {
+  1, 2, 2, //
+  2, 3, 3, //
+  3, 4, 4, //
+  1, 2, 2, //
+  2, 3, 3, //
+  3, 4, 4, //
+};
+
+} // namespace resize_bilinear_uint8_half_pixel_centers
+
+class TestDataUint8ResizeBilinear : public TestDataResizeBilinearBase<uint8_t>
+{
+public:
+  TestDataUint8ResizeBilinear(bool half_pixel_centers)
+  {
+    if (!half_pixel_centers)
+    {
+      _input_data = resize_bilinear_uint8::input_data;
+      _reference_output_data = resize_bilinear_uint8::reference_output_data;
+      _test_kernel_model_circle = resize_bilinear_uint8::test_kernel_model_circle;
+    }
+    else
+    {
+      _input_data = resize_bilinear_uint8_half_pixel_centers::input_data;
+      _reference_output_data = resize_bilinear_uint8_half_pixel_centers::reference_output_data;
+      _test_kernel_model_circle =
+        resize_bilinear_uint8_half_pixel_centers::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataUint8ResizeBilinear() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_UINT8_RESIZE_BILINEAR_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/shape/NegShapeKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/shape/NegShapeKernel.h
new file mode 100644
index 000000000..5c7737a4a
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/shape/NegShapeKernel.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_SHAPE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_SHAPE_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_output_wrong_type_shape_kernel
+{
+/*
+ * Shape Kernel with wrong output type (should be INT32):
+ *
+ *     Input(2, 3, 4)
+ *          |
+ *        Shape
+ *          |
+ *      Output(3) - FLOAT32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x20, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x37, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xe0, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x4d, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x4d, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_output_wrong_type_shape_kernel
+
+class NegTestDataWrongOutputTypeShapeKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWrongOutputTypeShapeKernel()
+  {
+    _test_kernel_model_circle = neg_output_wrong_type_shape_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWrongOutputTypeShapeKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_SHAPE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/shape/ShapeKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/shape/ShapeKernel.h
new file mode 100644
index 000000000..4be3f56f2
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/shape/ShapeKernel.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SHAPE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_SHAPE_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace shape_kernel
+{
+/*
+ * Shape Kernel:
+ *
+ *     Input(2, 3, 4)
+ *          |
+ *        Shape
+ *          |
+ *      Output(3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x4c, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x37, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x4d, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x4d, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  9.817013,  -10.169584, -11.175514, 6.3500366,  -39.949837, 2.3447914, 14.254675,  20.6128,
+  8.819141,  -10.237312, -5.171467,  4.7246437,  11.657671,  20.094395, 11.213078,  -13.8377495,
+  10.846771, -15.841316, 7.4385757,  -6.9196777, 12.076214,  18.011564, -14.684473, 2.7402115};
+
+const std::vector<int32_t> reference_output_data = {2, 3, 4};
+} // namespace shape_kernel
+
+template <typename T, typename U> class TestDataShapeKernel : public TestDataBase<T, U>
+{
+public:
+  TestDataShapeKernel()
+  {
+    _input_data = shape_kernel::input_data;
+    _reference_output_data = shape_kernel::reference_output_data;
+    _test_kernel_model_circle = shape_kernel::test_kernel_model_circle;
+  }
+
+  ~TestDataShapeKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<U> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<U> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SHAPE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/FloatSliceKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/FloatSliceKernel.h
new file mode 100644
index 000000000..6586d37d0
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/FloatSliceKernel.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_SLICE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_SLICE_KERNEL_H
+
+#include "TestDataSliceBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace slice_float
+{
+/*
+ * Slice Kernel:
+ *
+ *      Input(3, 2, 3)
+ *            |
+ *         Slice
+ *            |
+ *      Output(1, 1, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0xc8, 0x01, 0x00, 0x00, 0xe4, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x54, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x30, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x7c, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-6.0019245, -25.824707, -46.067307, -17.168013, -9.692509,
+                                       -42.846222, -18.903988, -26.145718, -10.458343, -27.042469,
+                                       -34.02651,  -43.133247, 0.57390976, 9.837246,   -22.825436,
+                                       9.404066,   -16.980595, -16.267637};
+
+const std::vector<float> reference_output_data = {-18.903988, -26.145718, -10.458343};
+
+} // namespace slice_float
+
+class TestDataFloatSlice : public TestDataSliceBase<float>
+{
+public:
+  TestDataFloatSlice()
+  {
+    _input_data = slice_float::input_data;
+    _reference_output_data = slice_float::reference_output_data;
+    _test_kernel_model_circle = slice_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatSlice() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_SLICE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/NegSliceKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/NegSliceKernel.h
new file mode 100644
index 000000000..2b59e8e5a
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/NegSliceKernel.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_SLICE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_SLICE_KERNEL_H
+
+#include "TestDataSliceBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace slice_type_mismatch
+{
+/*
+ * Slice Kernel with input type != output_type:
+ *
+ *      Input(3, 2, 3) - Float32
+ *            |
+ *         Slice
+ *            |
+ *      Output(1, 1, 3) - Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0xcc, 0x01, 0x00, 0x00, 0xe8, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x54, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x30, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xac, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x62, 0x65, 0x67, 0x69,
+  0x6e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace slice_type_mismatch
+
+namespace slice_wrong_begin_type
+{
+/*
+ * Slice Kernel with wrong begin type (should be int32 or int64):
+ *
+ *      Input(3, 2, 3)   Begin(3) - Float32
+ *            |          /
+ *               Slice
+ *                |
+ *      Output(1, 1, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0xc4, 0x01, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x54, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x30, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0xdc, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace slice_wrong_begin_type
+
+namespace slice_wrong_size_type
+{
+/*
+ * Slice Kernel with wrong size type (should be int32 or int64):
+ *
+ *      Input(3, 2, 3)   Size(3) - Float32
+ *            |          /
+ *               Slice
+ *                |
+ *      Output(1, 1, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0xc4, 0x01, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x54, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x30, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xa8, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace slice_wrong_size_type
+
+namespace slice_wrong_input_shape
+{
+/*
+ * Slice Kernel with wrong input shape (rank is 6 but should be <= 5):
+ *
+ *      Input(3, 2, 3, 1, 1, 1)
+ *                |
+ *               Slice
+ *                |
+ *      Output(1, 1, 3, 1, 1, 1)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00, 0xfc, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x54, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff,
+  0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x30, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xac, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x70, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x41, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+} // namespace slice_wrong_input_shape
+
+class TestDataTypeMismatchSlice : public NegTestDataBase
+{
+public:
+  TestDataTypeMismatchSlice()
+  {
+    _test_kernel_model_circle = slice_type_mismatch::test_kernel_model_circle;
+  }
+
+  ~TestDataTypeMismatchSlice() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class TestDataWrongBeginTypeSlice : public NegTestDataBase
+{
+public:
+  TestDataWrongBeginTypeSlice()
+  {
+    _test_kernel_model_circle = slice_wrong_begin_type::test_kernel_model_circle;
+  }
+
+  ~TestDataWrongBeginTypeSlice() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class TestDataWrongSizeTypeSlice : public NegTestDataBase
+{
+public:
+  TestDataWrongSizeTypeSlice()
+  {
+    _test_kernel_model_circle = slice_wrong_size_type::test_kernel_model_circle;
+  }
+
+  ~TestDataWrongSizeTypeSlice() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class TestDataWrongInputShapeSlice : public NegTestDataBase
+{
+public:
+  TestDataWrongInputShapeSlice()
+  {
+    _test_kernel_model_circle = slice_wrong_input_shape::test_kernel_model_circle;
+  }
+
+  ~TestDataWrongInputShapeSlice() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_SLICE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/QuantS16SliceKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/QuantS16SliceKernel.h
new file mode 100644
index 000000000..7d3d5a77f
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/QuantS16SliceKernel.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_QUANT_S16_SLICE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_QUANT_S16_SLICE_KERNEL_H
+
+#include "TestDataSliceBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace slice_int16
+{
+/*
+ * Slice Kernel:
+ *
+ *      Input(3, 2, 3)
+ *            |
+ *         Slice
+ *            |
+ *      Output(1, 1, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x58, 0x02, 0x00, 0x00, 0x74, 0x02, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0xa8, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x3a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x44, 0x00, 0x00, 0x00,
+  0x2c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0xba, 0xc0, 0x40, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x3e, 0x3f, 0xbf, 0x40,
+  0x01, 0x00, 0x00, 0x00, 0xc2, 0xc0, 0xc0, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x62, 0x65, 0x67, 0x69,
+  0x6e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07,
+  0x50, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0xba, 0xc0, 0x40, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x3e, 0x3f, 0xbf, 0x40,
+  0x01, 0x00, 0x00, 0x00, 0xc2, 0xc0, 0xc0, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int16_t> input_data = {-24, -47, -23, -26, -23, -25, -24, -52, -14,
+                                         -23, -23, -23, -41, -24, -26, -22, -53, -23};
+
+const std::vector<int16_t> reference_output_data = {-24, -52, -14};
+
+} // namespace slice_int16
+
+class TestDataS16Slice : public TestDataSliceBase<int16_t>
+{
+public:
+  TestDataS16Slice()
+  {
+    _input_data = slice_int16::input_data;
+    _reference_output_data = slice_int16::reference_output_data;
+    _test_kernel_model_circle = slice_int16::test_kernel_model_circle;
+  }
+
+  ~TestDataS16Slice() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_QUANT_S16_SLICE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/QuantU8SliceKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/QuantU8SliceKernel.h
new file mode 100644
index 000000000..1f688d2d5
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/QuantU8SliceKernel.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_QUANT_U8_SLICE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_QUANT_U8_SLICE_KERNEL_H
+
+#include "TestDataSliceBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace slice_uint8
+{
+/*
+ * Slice Kernel:
+ *
+ *      Input(3, 2, 3)
+ *            |
+ *         Slice
+ *            |
+ *      Output(1, 1, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x58, 0x02, 0x00, 0x00, 0x74, 0x02, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0xa8, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x3a, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x44, 0x00, 0x00, 0x00,
+  0x2c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0xba, 0xc0, 0x40, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x3e, 0x3f, 0xbf, 0x40,
+  0x01, 0x00, 0x00, 0x00, 0xc2, 0xc0, 0xc0, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x62, 0x65, 0x67, 0x69,
+  0x6e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
+  0x50, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0xba, 0xc0, 0x40, 0x3d, 0x01, 0x00, 0x00, 0x00, 0x3e, 0x3f, 0xbf, 0x40,
+  0x01, 0x00, 0x00, 0x00, 0xc2, 0xc0, 0xc0, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<uint8_t> input_data = {3,   218, 233, 232, 234, 242, 238, 233, 218,
+                                         240, 1,   233, 225, 242, 218, 249, 203, 225};
+
+const std::vector<uint8_t> reference_output_data = {238, 233, 218};
+
+} // namespace slice_uint8
+
+class TestDataU8Slice : public TestDataSliceBase<uint8_t>
+{
+public:
+  TestDataU8Slice()
+  {
+    _input_data = slice_uint8::input_data;
+    _reference_output_data = slice_uint8::reference_output_data;
+    _test_kernel_model_circle = slice_uint8::test_kernel_model_circle;
+  }
+
+  ~TestDataU8Slice() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_QUANT_U8_SLICE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/TestDataSliceBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/TestDataSliceBase.h
new file mode 100644
index 000000000..759a2488d
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/slice/TestDataSliceBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SLICE_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_SLICE_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataSliceBase : public TestDataBase<T>
+{
+public:
+  TestDataSliceBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SLICE_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/FloatSplitKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/FloatSplitKernel.h
new file mode 100644
index 000000000..7a9862780
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/FloatSplitKernel.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_FLOAT_H
+
+#include "TestDataSplitBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace split_float
+{
+
+/*
+ * Split Kernel:
+ *
+ *      Input(6, 1, 2)              Split_dim(scalar=0)
+ *                 \          |             /
+ *                          Split
+ *                    /               \
+ *        Output(3, 1, 2)       Output(3, 1, 2)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0xbc, 0x01, 0x00, 0x00, 0xd8, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x72, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x23, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa8, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x74, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x14, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x73, 0x70, 0x6c, 0x69,
+  0x74, 0x5f, 0x64, 0x69, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {20.07405,  30.467144, 34.943245, 38.18743,
+                                       39.956573, 30.283304, 42.44296,  41.62288,
+                                       16.24289,  38.450634, 27.079258, 47.636314};
+
+const std::vector<float> reference_output_data_1 = {20.07405, 30.467144, 34.943245,
+                                                    38.18743, 39.956573, 30.283304};
+const std::vector<float> reference_output_data_2 = {42.44296,  41.62288,  16.24289,
+                                                    38.450634, 27.079258, 47.636314};
+
+} // namespace split_float
+
+class TestDataFloatSplit : public TestDataSplitBase<float>
+{
+public:
+  TestDataFloatSplit()
+  {
+    _input_data = split_float::input_data;
+    _reference_output_data_1 = split_float::reference_output_data_1;
+    _reference_output_data_2 = split_float::reference_output_data_2;
+    _test_kernel_model_circle = split_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatSplit() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/IntSplitKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/IntSplitKernel.h
new file mode 100644
index 000000000..87b898d58
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/IntSplitKernel.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_INT_H
+#define LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_INT_H
+
+#include "TestDataSplitBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace split_int
+{
+
+/*
+ * Split Kernel:
+ *
+ *      Input(6, 1, 2)              Split_dim(scalar=0)
+ *                 \          |             /
+ *                          Split
+ *                    /               \
+ *        Output(3, 1, 2)       Output(3, 1, 2)
+ */
+
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0xbc, 0x01, 0x00, 0x00, 0xd8, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x72, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x23, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa4, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x78, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xa8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xd8, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x14, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x5f, 0x64, 0x69,
+  0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input_data = {40, 40, 30, 40, 35, 25, 35, 30, 35, 30, 40, 35};
+
+const std::vector<int32_t> reference_output_data_1 = {40, 40, 30, 40, 35, 25};
+const std::vector<int32_t> reference_output_data_2 = {35, 30, 35, 30, 40, 35};
+
+} // namespace split_int
+
+class TestDataIntSplit : public TestDataSplitBase<int32_t>
+{
+public:
+  TestDataIntSplit()
+  {
+    _input_data = split_int::input_data;
+    _reference_output_data_1 = split_int::reference_output_data_1;
+    _reference_output_data_2 = split_int::reference_output_data_2;
+    _test_kernel_model_circle = split_int::test_kernel_model_circle;
+  }
+
+  ~TestDataIntSplit() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_INT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/TestDataSplitBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/TestDataSplitBase.h
new file mode 100644
index 000000000..7ddcd75d5
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split/TestDataSplitBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataSplitBase : public TestDataBase<T>
+{
+public:
+  TestDataSplitBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _reference_output_data_1;
+      case 1:
+        return _reference_output_data_2;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data_1;
+  std::vector<T> _reference_output_data_2;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SPLIT_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split_v/SplitVKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split_v/SplitVKernel.h
new file mode 100644
index 000000000..bb87c4068
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/split_v/SplitVKernel.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SPLIT_V_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_SPLIT_V_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace split_v_kernel
+{
+/*
+ * SplitV Kernel:
+ *
+ * Input(6, 1, 2)   Size_splits([1, 2, 3])  Split_dim(scalar=0)
+ *               \            |             /
+ *                          SplitV
+ *                    /       |        \
+ *       Output(1, 1, 2)  Output(2, 1, 2)  Output(3, 1, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x6c, 0x00, 0x00, 0x00, 0x4c, 0x02, 0x00, 0x00, 0x68, 0x02, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x56, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0xf0, 0xff, 0xff, 0xff, 0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4f, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x08, 0x01, 0x00, 0x00, 0xcc, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x33,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x48, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x74, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+  0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x14, 0x00, 0x00, 0x00,
+  0x09, 0x00, 0x00, 0x00, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x5f, 0x64, 0x69, 0x6d, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x14, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x73, 0x69, 0x7a, 0x65, 0x5f, 0x73, 0x70, 0x6c,
+  0x69, 0x74, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-3.9030151, -4.558613,   -12.806297, -2.64188,
+                                       17.035677,  26.150639,   -12.618465, 0.8286438,
+                                       -4.850197,  -0.20810127, 3.8918018,  4.1862106};
+
+const std::vector<float> reference_output_data_1 = {-3.9030151, -4.558613};
+const std::vector<float> reference_output_data_2 = {-12.806297, -2.64188, 17.035677, 26.150639};
+const std::vector<float> reference_output_data_3 = {-12.618465,  0.8286438, -4.850197,
+                                                    -0.20810127, 3.8918018, 4.1862106};
+
+} // namespace split_v_kernel
+
+template <typename T> class TestDataSplitVKernel : public TestDataBase<T>
+{
+public:
+  TestDataSplitVKernel()
+  {
+    _input_data = split_v_kernel::input_data;
+    _reference_output_data_1 = split_v_kernel::reference_output_data_1;
+    _reference_output_data_2 = split_v_kernel::reference_output_data_2;
+    _reference_output_data_3 = split_v_kernel::reference_output_data_3;
+    _test_kernel_model_circle = split_v_kernel::test_kernel_model_circle;
+  }
+
+  ~TestDataSplitVKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _reference_output_data_1;
+      case 1:
+        return _reference_output_data_2;
+      case 2:
+        return _reference_output_data_3;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data_1;
+  std::vector<T> _reference_output_data_2;
+  std::vector<T> _reference_output_data_3;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SPLIT_V_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/strided_slice/StridedSliceKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/strided_slice/StridedSliceKernel.h
new file mode 100644
index 000000000..4d69013ff
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/strided_slice/StridedSliceKernel.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_STRIDED_SLICE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_STRIDED_SLICE_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace strided_slice_kernel
+{
+/*
+ * StridedSlice Kernel:
+ *
+ *     Input(3, 2, 3)  Begin([1, 0, 0]) End([2, 1, 3]) Strides([1, 1, 1])
+ *           |                      |      |                     |
+ *           -------------------    |      |    ------------------
+ *                              \   |      |   /
+ *                                StridedSlice
+ *                                      |
+ *                               Output(1, 1, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x84, 0x00, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x2c, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xce, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0xe6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff,
+  0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20,
+  0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x58, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0xb4, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x73, 0x74, 0x72, 0x69, 0x64, 0x65, 0x73, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xdc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x65, 0x6e, 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x62, 0x65, 0x67, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x2d, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {15.78072,  -20.820415, 17.688091, 12.961567,  -5.469105,
+                                       -7.867565, -6.989258,  -9.068207, 4.974188,   6.2882156,
+                                       7.269455,  6.161186,   15.821367, -17.094833, 24.529251,
+                                       1.1271019, -8.563269,  -7.494442};
+
+const std::vector<float> reference_output_data = {-6.989258, -9.068207, 4.974188};
+
+} // namespace strided_slice_kernel
+
+template <typename T> class TestDataStridedSliceKernel : public TestDataBase<T>
+{
+public:
+  TestDataStridedSliceKernel()
+  {
+    _input_data = strided_slice_kernel::input_data;
+    _reference_output_data = strided_slice_kernel::reference_output_data;
+    _test_kernel_model_circle = strided_slice_kernel::test_kernel_model_circle;
+  }
+
+  ~TestDataStridedSliceKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_STRIDED_SLICE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/FloatSubKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/FloatSubKernel.h
new file mode 100644
index 000000000..25f83865b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/FloatSubKernel.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_FLOAT_H
+#define LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_FLOAT_H
+
+#include "TestDataSubBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace sub_float_with_broadcasting
+{
+
+/*
+ * Sub Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 1)
+ *       \             /
+ *     Sub(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x0c, 0x02, 0x00, 0x00, 0x28, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x36, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xc4, 0xfe, 0xff, 0xff, 0xc8, 0xfe, 0xff, 0xff, 0xcc, 0xfe, 0xff, 0xff, 0xd0, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x3c, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00,
+  0x53, 0x75, 0x62, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xc6, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0xb8, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63,
+  0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c,
+  0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input1_data = {-8.326889, -1.022953, 0.1884613, 22.581654, 40.9168,
+                                        36.762,    -9.715984, 4.0496464, 31.648043, 11.501019};
+const std::vector<float> input2_data = {15.127094, 3.3150635};
+const std::vector<float> reference_output_data = {-23.453983, -16.150047, -14.938633, 7.4545593,
+                                                  25.789707,  33.446938,  -13.031048, 0.7345829,
+                                                  28.33298,   8.185955};
+
+} // namespace sub_float_with_broadcasting
+
+namespace sub_float_no_broadcasting
+{
+/*
+ * Sub Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Sub(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x0c, 0x02, 0x00, 0x00, 0x28, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x36, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xc4, 0xfe, 0xff, 0xff, 0xc8, 0xfe, 0xff, 0xff, 0xcc, 0xfe, 0xff, 0xff, 0xd0, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x3c, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00,
+  0x53, 0x75, 0x62, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0xc6, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0xb8, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63,
+  0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c,
+  0x64, 0x65, 0x72, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+std::vector<float> input1_data = {-12.091457, 7.402893, 5.7398167, 1.6491795,  12.5600815,
+                                  13.821102,  8.744585, 11.44549,  -0.7579155, 4.1162605};
+std::vector<float> input2_data = {13.1849,   12.109516, -0.026350021, -5.5562515, -3.3528423,
+                                  10.397262, 25.990755, 19.270943,    -26.921743, 3.6311188};
+std::vector<float> reference_output_data = {-25.276358, -4.706623, 5.7661667, 7.205431,
+                                            15.912924,  3.4238405, -17.24617, -7.825453,
+                                            26.163828,  0.48514175};
+
+} // namespace sub_float_no_broadcasting
+
+class TestDataFloatSub : public TestDataSubBase<float>
+{
+public:
+  explicit TestDataFloatSub(bool is_with_broadcast) : TestDataSubBase<float>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = sub_float_with_broadcasting::input1_data;
+      _input2_data = sub_float_with_broadcasting::input2_data;
+      _reference_output_data = sub_float_with_broadcasting::reference_output_data;
+      _test_kernel_model_circle = sub_float_with_broadcasting::test_kernel_model_circle;
+    }
+    else
+    {
+      _input1_data = sub_float_no_broadcasting::input1_data;
+      _input2_data = sub_float_no_broadcasting::input2_data;
+      _reference_output_data = sub_float_no_broadcasting::reference_output_data;
+      _test_kernel_model_circle = sub_float_no_broadcasting::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataFloatSub() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_FLOAT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/IntSubKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/IntSubKernel.h
new file mode 100644
index 000000000..81d0edc87
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/IntSubKernel.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_INT_H
+#define LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_INT_H
+
+#include "TestDataSubBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace sub_int_with_broadcasting
+{
+
+/*
+ * Sub Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 1)
+ *       \             /
+ *     Sub(with broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x36, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xb8, 0xfe, 0xff, 0xff, 0xbc, 0xfe, 0xff, 0xff, 0xc0, 0xfe, 0xff, 0xff, 0xc4, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff,
+  0x03, 0x00, 0x00, 0x00, 0x53, 0x75, 0x62, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input1_data = {13, 23, 5, -3, 15, 5, 5, 5, 13, 12};
+const std::vector<int32_t> input2_data = {5, 13};
+const std::vector<int32_t> reference_output_data = {8, 18, 0, -8, 10, -8, -8, -8, 0, -1};
+
+} // namespace sub_int_with_broadcasting
+
+namespace sub_int_no_broadcasting
+{
+/*
+ * Sub Kernel:
+ *
+ * Input_1(2, 5)   Input_2(2, 5)
+ *       \             /
+ *     Sub(no broadcast)
+ *              |
+ *          Output(2, 5)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xc0, 0x00, 0x00, 0x00, 0x18, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0xac, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
+  0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0a, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x36, 0x2e, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xb8, 0xfe, 0xff, 0xff, 0xbc, 0xfe, 0xff, 0xff, 0xc0, 0xfe, 0xff, 0xff, 0xc4, 0xfe, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x92, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff,
+  0x03, 0x00, 0x00, 0x00, 0x53, 0x75, 0x62, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff,
+  0x0d, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x5f,
+  0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x50, 0x6c, 0x61, 0x63, 0x65, 0x68, 0x6f, 0x6c, 0x64, 0x65, 0x72, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+
+std::vector<int32_t> input1_data = {5, 7, 13, -3, 13, 7, -5, 5, 13, 5};
+std::vector<int32_t> input2_data = {-5, -11, 5, 5, 14, 5, 42, 6, 5, 15};
+std::vector<int32_t> reference_output_data = {10, 18, 8, -8, -1, 2, -47, -1, 8, -10};
+
+} // namespace sub_int_no_broadcasting
+
+class TestDataIntSub : public TestDataSubBase<int32_t>
+{
+public:
+  explicit TestDataIntSub(bool is_with_broadcast) : TestDataSubBase<int32_t>(is_with_broadcast)
+  {
+    if (is_with_broadcast)
+    {
+      _input1_data = sub_int_with_broadcasting::input1_data;
+      _input2_data = sub_int_with_broadcasting::input2_data;
+      _reference_output_data = sub_int_with_broadcasting::reference_output_data;
+      _test_kernel_model_circle = sub_int_with_broadcasting::test_kernel_model_circle;
+    }
+    else
+    {
+      _input1_data = sub_int_no_broadcasting::input1_data;
+      _input2_data = sub_int_no_broadcasting::input2_data;
+      _reference_output_data = sub_int_no_broadcasting::reference_output_data;
+      _test_kernel_model_circle = sub_int_no_broadcasting::test_kernel_model_circle;
+    }
+  }
+
+  ~TestDataIntSub() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_INT_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/NegSubKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/NegSubKernel.h
new file mode 100644
index 000000000..d5ea240d4
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/NegSubKernel.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_SUB_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_SUB_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_inputs_type_mismatch_sub_kernel
+{
+/*
+ * Sub Kernel with inputs type mismatch:
+ *
+ * Input_1(1, 5, 2, 3)-Int   Input_2(1, 5, 2, 3)-Float
+ *       \             /
+ *       Sub(no broadcast)
+ *              |
+ *          Output(1, 5, 2, 3)-Float
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
+  0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x94, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_inputs_type_mismatch_sub_kernel
+
+namespace neg_input_output_type_mismatch_sub_kernel
+{
+/*
+ * Sub Kernel with input output types mismatch:
+ *
+ * Input_1(1, 5, 2, 3)-Float   Input_2(1, 5, 2, 3)-Float
+ *       \             /
+ *       Sub(no broadcast)
+ *              |
+ *          Output(1, 5, 2, 3)-Int
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
+  0x40, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_output_type_mismatch_sub_kernel
+
+namespace neg_no_quant_params_sub_kernel
+{
+/*
+ * Sub S16 Kernel without quant params:
+ *
+ * Input_1(1, 5, 2, 3)-Int16   Input_2(1, 5, 2, 3)-Int16
+ *       \             /
+ *       Sub(no broadcast, no quant params)
+ *              |
+ *          Output(1, 5, 2, 3)-Int16
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x88, 0xff, 0xff, 0xff, 0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x10, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xcc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x32, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x11, 0x00, 0x00, 0x00,
+  0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c,
+  0x65, 0x00, 0x00, 0x00};
+} // namespace neg_no_quant_params_sub_kernel
+
+class NegTestDataInputsTypeMismatchSubKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputsTypeMismatchSubKernel()
+  {
+    _test_kernel_model_circle = neg_inputs_type_mismatch_sub_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputsTypeMismatchSubKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataInputOutputTypeMismatchSubKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchSubKernel()
+  {
+    _test_kernel_model_circle = neg_input_output_type_mismatch_sub_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchSubKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+class NegTestDataNoQuantParamsSubKernel : public NegTestDataBase
+{
+public:
+  NegTestDataNoQuantParamsSubKernel()
+  {
+    _test_kernel_model_circle = neg_no_quant_params_sub_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataNoQuantParamsSubKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_SUB_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/TestDataSubBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/TestDataSubBase.h
new file mode 100644
index 000000000..8c0775e7b
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/sub/TestDataSubBase.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataSubBase : public TestDataBase<T>
+{
+public:
+  explicit TestDataSubBase(bool)
+  {
+    // Do nothing
+  }
+
+  TestDataSubBase() = delete;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input1_data;
+      case 1:
+        return _input2_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input1_data;
+  std::vector<T> _input2_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_SUB_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/FloatTanhKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/FloatTanhKernel.h
new file mode 100644
index 000000000..82ce797cc
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/FloatTanhKernel.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_TANH_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_TANH_KERNEL_H
+
+#include "TestDataTanhBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace tanh_float
+{
+/*
+ * Tanh Kernel:
+ *
+ *      Input(1, 3, 3, 2)
+ *            |
+ *           Tanh
+ *            |
+ *      Output(1, 3, 3, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x14, 0x01, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xd4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {-18.051472, -25.18281,  -1.566864,  10.271992, 9.358999,
+                                       30.315918,  22.305614,  4.2511578,  5.960436,  -8.210682,
+                                       21.21092,   -18.009472, -13.981232, -3.081173, 7.770035,
+                                       7.670355,   21.851545,  17.201824};
+
+const std::vector<float> reference_output_data = {
+  -1.0, -1.0, -0.9165255, 1.0,  1.0,        1.0,       1.0,        0.99959403, 0.99998677,
+  -1.0, 1.0,  -1.0,       -1.0, -0.9957943, 0.9999997, 0.99999964, 1.0,        1.0};
+
+} // namespace tanh_float
+
+class TestDataFloatTanh : public TestDataTanhBase<float>
+{
+public:
+  TestDataFloatTanh()
+  {
+    _input_data = tanh_float::input_data;
+    _reference_output_data = tanh_float::reference_output_data;
+    _test_kernel_model_circle = tanh_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatTanh() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_TANH_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/NegTanhKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/NegTanhKernel.h
new file mode 100644
index 000000000..3ff0b6832
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/NegTanhKernel.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_TANH_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_TANH_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace neg_input_output_type_mismatch_kernel
+{
+/*
+ * Tanh Kernel with input output type mismatch:
+ *
+ *      Input(1, 3, 3, 2) - Float32
+ *            |
+ *           Tanh
+ *            |
+ *      Output(1, 3, 3, 2) - Int32
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x2c, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff,
+  0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x69, 0x66, 0x6d, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_input_output_type_mismatch_kernel
+
+class NegTestDataInputOutputTypeMismatchTanhKernel : public NegTestDataBase
+{
+public:
+  NegTestDataInputOutputTypeMismatchTanhKernel()
+  {
+    _test_kernel_model_circle = neg_input_output_type_mismatch_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataInputOutputTypeMismatchTanhKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_NEG_TANH_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/TestDataTanhBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/TestDataTanhBase.h
new file mode 100644
index 000000000..2e12dfec1
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/tanh/TestDataTanhBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_TANH_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_TANH_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataTanhBase : public TestDataBase<T>
+{
+public:
+  TestDataTanhBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_TANH_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/transpose/TransposeKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/transpose/TransposeKernel.h
new file mode 100644
index 000000000..ebeda65c0
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/transpose/TransposeKernel.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_TRANSPOSE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_TRANSPOSE_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace transpose_kernel
+{
+/*
+ * Transpose Kernel:
+ *
+ *  Input(3, 8, 1)   Perm([1, 2, 0])
+ *               \  /
+ *             Transpose
+ *                 |
+ *           Output(8, 1, 3)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x4c, 0x00, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x98, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x38, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x8c, 0xff, 0xff, 0xff, 0x90, 0xff, 0xff, 0xff, 0x94, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+  0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xa4, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x70, 0x65, 0x72, 0x6d,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  20.761623,  13.150787, 7.0800495, -5.8079357, -4.265215,  -2.801073,  14.42078,   8.526264,
+  27.862984,  1.067873,  19.894545, 0.25564194, 10.414932,  3.317482,   -0.727417,  -5.702162,
+  -2.8192825, 19.296608, 23.116634, 6.5216866,  -7.0733185, -3.3730087, -34.845665, 28.050354};
+
+const std::vector<float> reference_output_data = {
+  20.761623, 27.862984,  -2.8192825, 13.150787, 1.067873,   19.296608, 7.0800495,  19.894545,
+  23.116634, -5.8079357, 0.25564194, 6.5216866, -4.265215,  10.414932, -7.0733185, -2.801073,
+  3.317482,  -3.3730087, 14.42078,   -0.727417, -34.845665, 8.526264,  -5.702162,  28.050354};
+} // namespace transpose_kernel
+
+template <typename T> class TestDataTransposeKernel : public TestDataBase<T>
+{
+public:
+  TestDataTransposeKernel()
+  {
+    _input_data = transpose_kernel::input_data;
+    _reference_output_data = transpose_kernel::reference_output_data;
+    _test_kernel_model_circle = transpose_kernel::test_kernel_model_circle;
+  }
+
+  ~TestDataTransposeKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_TRANSPOSE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h
new file mode 100644
index 000000000..ea9674875
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_FLOAT_UNIDIRECTIONAL_LSTM_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_FLOAT_UNIDIRECTIONAL_LSTM_KERNEL_H
+
+#include "TestDataUnidirectionalLSTMBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace unidir_lstm_float
+{
+/*
+ * UnidirectionalLSTM Kernel:
+ *
+ *      Input(1, 4, 4)
+ *            |
+ *      UnidirectionalLSTM
+ *            |
+ *      Output(1, 4, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0xb4, 0x01, 0x00, 0x00, 0xd0, 0x05, 0x00, 0x00, 0xec, 0x05, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+  0xa0, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x8c, 0x01, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00,
+  0x64, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00,
+  0xe4, 0x00, 0x00, 0x00, 0xcc, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x3c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0xfe, 0xff, 0xff,
+  0xd2, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x84, 0x9b, 0x3c, 0xbe,
+  0x48, 0xfc, 0x22, 0xbf, 0x35, 0x43, 0xba, 0x3e, 0x18, 0x5c, 0xdb, 0x3e, 0x4b, 0x05, 0xdd, 0xbe,
+  0xf5, 0x0f, 0x1e, 0xbf, 0x1f, 0x2e, 0x09, 0x3f, 0x9e, 0xb5, 0x2f, 0x3f, 0xfe, 0xfe, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xca, 0xfe, 0x05, 0x3f, 0xea, 0x91, 0x06, 0xbe,
+  0x77, 0xf4, 0xa7, 0xbe, 0x3f, 0x02, 0x23, 0xbf, 0xd1, 0xdc, 0x94, 0xbd, 0xc2, 0xdd, 0xb1, 0xbe,
+  0x45, 0x13, 0xc8, 0x3e, 0x7f, 0x6b, 0xef, 0x3e, 0x2a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0xec, 0xde, 0xe2, 0xbe, 0xf6, 0x46, 0x01, 0xbf, 0xed, 0x4d, 0x97, 0xbd,
+  0xf2, 0xed, 0x09, 0xbf, 0x88, 0x4c, 0xe1, 0x3e, 0x60, 0x74, 0x89, 0x3e, 0x29, 0x79, 0x75, 0x3c,
+  0x9b, 0x8f, 0xdb, 0xbe, 0x56, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0xe7, 0xc8, 0x6a, 0x3e, 0x16, 0x06, 0x8b, 0xbd, 0x49, 0xf5, 0xe5, 0x3e, 0x01, 0xfb, 0xf0, 0x3e,
+  0x7c, 0x48, 0x10, 0xbf, 0x12, 0xd8, 0x94, 0xbe, 0x9a, 0xec, 0xaf, 0x3e, 0x4c, 0x1a, 0xdb, 0xbe,
+  0x82, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
+  0x00, 0x00, 0x80, 0x3f, 0x96, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x8d, 0xd4, 0xdb, 0xbd, 0xfe, 0x63, 0xd1, 0x3e, 0x9f, 0x60, 0x1a, 0x3d,
+  0xa1, 0x48, 0x0b, 0xbf, 0xc6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x3a, 0xb2, 0xca, 0x3e, 0x58, 0x1a, 0x04, 0xbf, 0xe6, 0x76, 0x9f, 0x3e, 0x61, 0xa7, 0xd8, 0x3e,
+  0xe2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xf1, 0x48, 0x5c, 0xbe,
+  0xcd, 0x54, 0xad, 0x3c, 0x9f, 0x8e, 0xbf, 0x3e, 0x69, 0xac, 0xfd, 0x3d, 0x00, 0x00, 0x06, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x93, 0x70, 0x21, 0xbf, 0x76, 0x27, 0x8e, 0x3c, 0x97, 0xe3, 0xc5, 0x3e, 0xe5, 0x7d, 0x8c, 0x3e,
+  0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00,
+  0xcc, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47,
+  0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x00, 0x04,
+  0x01, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x06, 0x00, 0x00, 0x00,
+  0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
+  0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0xe4, 0x02, 0x00, 0x00, 0x98, 0x02, 0x00, 0x00, 0x54, 0x02, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00,
+  0xec, 0x01, 0x00, 0x00, 0xb8, 0x01, 0x00, 0x00, 0x88, 0x01, 0x00, 0x00, 0x58, 0x01, 0x00, 0x00,
+  0x24, 0x01, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xbc, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x60, 0xfd, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x53, 0x74, 0x61, 0x74,
+  0x65, 0x66, 0x75, 0x6c, 0x50, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x65, 0x64, 0x43,
+  0x61, 0x6c, 0x6c, 0x3a, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xec, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+  0x0c, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
+  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x6c, 0x73, 0x74, 0x6d, 0x2f,
+  0x7a, 0x65, 0x72, 0x6f, 0x73, 0x31, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0xdc, 0xfd, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f,
+  0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x39, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0c, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f,
+  0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x38, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x3c, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f,
+  0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x37, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f,
+  0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x36, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x9c, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+  0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f,
+  0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x35, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xc8, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61,
+  0x6e, 0x74, 0x34, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xf4, 0xfe, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+  0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x33, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x24, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+  0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x32, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x54, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+  0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x31, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff,
+  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x14, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x15, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x6c,
+  0x73, 0x74, 0x6d, 0x2f, 0x7a, 0x65, 0x72, 0x6f, 0x73, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x24, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x5f,
+  0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x31, 0x3a,
+  0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+  0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x2c, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69,
+  0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<float> input_data = {
+  -3.509163, -18.256927, 6.4799614, 10.296598, 30.371328, 18.692572, 10.12867,   -26.44944,
+  25.324795, 3.8303719,  20.93112,  22.603086, -4.308655, 2.3276749, -5.9565907, 25.611776};
+
+const std::vector<float> reference_output_data = {0.7613201,      -0.7570043,   0.0480366,
+                                                  -4.3364323e-11, -0.7613433,   1.3437739e-08,
+                                                  -0.7613537,     -7.000451e-08};
+
+} // namespace unidir_lstm_float
+
+class TestDataFloatUnidirectionalLSTM : public TestDataUnidirectionalLSTMBase<float>
+{
+public:
+  TestDataFloatUnidirectionalLSTM()
+  {
+    _input_data = unidir_lstm_float::input_data;
+    _reference_output_data = unidir_lstm_float::reference_output_data;
+    _test_kernel_model_circle = unidir_lstm_float::test_kernel_model_circle;
+  }
+
+  ~TestDataFloatUnidirectionalLSTM() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_FLOAT_UNIDIRECTIONAL_LSTM_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/QuantS8UnidirectionalLSTM.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/QuantS8UnidirectionalLSTM.h
new file mode 100644
index 000000000..d4bd798a3
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/QuantS8UnidirectionalLSTM.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_QUANT_S8_UNIDIRECTIONAL_LSTM_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_QUANT_S8_UNIDIRECTIONAL_LSTM_KERNEL_H
+
+#include "TestDataUnidirectionalLSTMBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+namespace unidir_lstm_int8
+{
+/*
+ * UnidirectionalLSTM Kernel:
+ *
+ *      Input(1, 20, 20)
+ *            |
+ *      UnidirectionalLSTM
+ *            |
+ *      Output(1, 20, 2)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x1c, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x54, 0x02, 0x00, 0x00, 0xd8, 0x0a, 0x00, 0x00, 0xf4, 0x0a, 0x00, 0x00,
+  0x13, 0x00, 0x00, 0x00, 0x40, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, 0x1c, 0x02, 0x00, 0x00,
+  0xfc, 0x01, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, 0xec, 0x01, 0x00, 0x00, 0xd8, 0x01, 0x00, 0x00,
+  0xc4, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00,
+  0x2c, 0x01, 0x00, 0x00, 0xf4, 0x00, 0x00, 0x00, 0xbc, 0x00, 0x00, 0x00, 0xa8, 0x00, 0x00, 0x00,
+  0x94, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x2e, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x0e, 0x00, 0x08, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xeb, 0x03, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00,
+  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x32, 0x2e, 0x31, 0x31,
+  0x2e, 0x30, 0x00, 0x00, 0x92, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x31, 0x2e, 0x31, 0x33, 0x2e, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x90, 0xfe, 0xff, 0xff, 0xb2, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xff, 0x7f, 0xff, 0x7f, 0xc2, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0xd2, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+  0x7f, 0xdb, 0x47, 0xd4, 0x00, 0xba, 0x1c, 0x49, 0xd9, 0xb1, 0x98, 0xa7, 0x76, 0x4d, 0x90, 0xab,
+  0x19, 0x5d, 0x72, 0xfa, 0xe1, 0x95, 0x07, 0x05, 0x63, 0xfb, 0x6e, 0x59, 0xd9, 0x0e, 0x94, 0x04,
+  0x01, 0x72, 0x3b, 0x8e, 0x58, 0x4a, 0xd1, 0xd6, 0x06, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0xbd, 0x21, 0x0b, 0x84, 0xf4, 0xc6, 0x67, 0x3f, 0x19, 0xc2, 0x5f, 0x92,
+  0x19, 0x4a, 0x9a, 0xef, 0xbb, 0x79, 0x45, 0x93, 0x09, 0x17, 0x50, 0xdd, 0x2e, 0x5c, 0xe3, 0x5f,
+  0xc9, 0x81, 0xb9, 0x1a, 0x5e, 0x45, 0x84, 0xe1, 0xb4, 0xe3, 0x58, 0x1b, 0x3a, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x03, 0xa1, 0x30, 0x2e, 0x08, 0x1d, 0x39, 0xe1,
+  0xed, 0xea, 0x19, 0xc0, 0x86, 0xe4, 0x71, 0x50, 0xc6, 0x4f, 0x05, 0xbc, 0xf7, 0xdf, 0x93, 0xea,
+  0x94, 0xbd, 0x00, 0x49, 0x2a, 0x2a, 0xd0, 0x5e, 0x06, 0x81, 0xdd, 0xe0, 0xaf, 0xf9, 0x71, 0x24,
+  0x6e, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x9b, 0xf8, 0x5a, 0xaa,
+  0x6c, 0x6a, 0x2f, 0x8a, 0x9f, 0xe1, 0x7e, 0x85, 0x81, 0xe1, 0xea, 0x6d, 0x89, 0x28, 0xd1, 0x49,
+  0x82, 0x6b, 0x25, 0x48, 0x1a, 0xc6, 0xce, 0x0b, 0xcd, 0xd2, 0x24, 0x51, 0xf3, 0x48, 0xf6, 0x6f,
+  0xe2, 0x5c, 0x36, 0x5b, 0xa2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x7f, 0xd4, 0xbf, 0xc1, 0xb2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf6, 0x10, 0x7f, 0x09, 0xc2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xf4, 0x81, 0x11, 0x1f, 0xd2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x7f, 0x5f, 0x7e, 0x84, 0xc4, 0xff, 0xff, 0xff, 0xc8, 0xff, 0xff, 0xff, 0xea, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x2c, 0x03, 0x00, 0x00, 0x2c, 0x03, 0x00, 0x00, 0xfc, 0xff, 0xff, 0xff,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00,
+  0xc8, 0x00, 0x00, 0x00, 0xcc, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x47, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x41,
+  0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
+  0x09, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x05, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0d, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x15, 0x00, 0x00, 0x00, 0x18, 0x07, 0x00, 0x00, 0xb0, 0x06, 0x00, 0x00, 0x54, 0x06, 0x00, 0x00,
+  0xf8, 0x05, 0x00, 0x00, 0x9c, 0x05, 0x00, 0x00, 0x40, 0x05, 0x00, 0x00, 0xe4, 0x04, 0x00, 0x00,
+  0x88, 0x04, 0x00, 0x00, 0x2c, 0x04, 0x00, 0x00, 0xd0, 0x03, 0x00, 0x00, 0x74, 0x03, 0x00, 0x00,
+  0x18, 0x03, 0x00, 0x00, 0xbc, 0x02, 0x00, 0x00, 0x58, 0x02, 0x00, 0x00, 0xe4, 0x01, 0x00, 0x00,
+  0xac, 0x01, 0x00, 0x00, 0x64, 0x01, 0x00, 0x00, 0x2c, 0x01, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00,
+  0x84, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x4a, 0xf9, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x48, 0x00, 0x00, 0x00,
+  0x3c, 0xf9, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x1d, 0x87, 0xfd, 0x3b, 0x19, 0x00, 0x00, 0x00, 0x53, 0x74, 0x61, 0x74, 0x65, 0x66, 0x75, 0x6c,
+  0x50, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x65, 0x64, 0x43, 0x61, 0x6c, 0x6c, 0x3a,
+  0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x09, 0x50, 0x00, 0x00, 0x00, 0xb4, 0xf9, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1d, 0x87, 0xfd, 0x3b, 0x23, 0x00, 0x00, 0x00,
+  0x65, 0x66, 0x66, 0x65, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x68, 0x69, 0x64, 0x64, 0x65, 0x6e,
+  0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6d, 0x65, 0x64, 0x69,
+  0x61, 0x74, 0x65, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff,
+  0x08, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x70, 0x75,
+  0x74, 0x5f, 0x74, 0x6f, 0x5f, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65,
+  0x72, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x94, 0xff, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x1a, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x6f, 0x5f, 0x63, 0x65, 0x6c,
+  0x6c, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x74, 0x65, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0xff, 0xff, 0xff, 0x08, 0x00, 0x00, 0x00,
+  0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x6f,
+  0x5f, 0x66, 0x6f, 0x72, 0x67, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6d, 0x65, 0x64,
+  0x69, 0x61, 0x74, 0x65, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+  0x08, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x69, 0x6e, 0x70, 0x75,
+  0x74, 0x5f, 0x74, 0x6f, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72,
+  0x6d, 0x65, 0x64, 0x69, 0x61, 0x74, 0x65, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xa0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x07, 0x40, 0x00, 0x00, 0x00, 0x04, 0xfb, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x12, 0x00, 0x00, 0x00,
+  0x74, 0x66, 0x6c, 0x2e, 0x70, 0x73, 0x65, 0x75, 0x64, 0x6f, 0x5f, 0x71, 0x63, 0x6f, 0x6e, 0x73,
+  0x74, 0x31, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x10, 0x00, 0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00,
+  0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x09, 0x40, 0x00, 0x00, 0x00, 0x74, 0xfb, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1d, 0x87, 0xfd, 0x3b, 0x11, 0x00, 0x00, 0x00,
+  0x74, 0x66, 0x6c, 0x2e, 0x70, 0x73, 0x65, 0x75, 0x64, 0x6f, 0x5f, 0x71, 0x63, 0x6f, 0x6e, 0x73,
+  0x74, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xe2, 0xfb, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00, 0xd4, 0xfb, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x67, 0x21, 0x6d, 0x3b, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74,
+  0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x39, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3a, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00,
+  0x2c, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xec, 0x45, 0x69, 0x3b,
+  0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61,
+  0x6e, 0x74, 0x38, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x92, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00, 0x84, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0xb9, 0xbc, 0x68, 0x3b, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74,
+  0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x37, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xea, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00,
+  0xdc, 0xfc, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3f, 0xac, 0x6e, 0x3b,
+  0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61,
+  0x6e, 0x74, 0x36, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x42, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00, 0x34, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x24, 0x56, 0x7e, 0x3b, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74,
+  0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x33, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x9a, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00,
+  0x8c, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0xa6, 0x65, 0x3b,
+  0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61,
+  0x6e, 0x74, 0x32, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xf2, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00, 0xe4, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x38, 0x10, 0xb8, 0x3b, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74,
+  0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x31, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x4a, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x38, 0x00, 0x00, 0x00,
+  0x3c, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x5f, 0x5f, 0x7d, 0x3b,
+  0x0e, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61,
+  0x6e, 0x74, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0xa2, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x3c, 0x00, 0x00, 0x00, 0x94, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x08, 0x38, 0xa5, 0x3a, 0x10, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74,
+  0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x34, 0x32, 0x00, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xfa, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x3c, 0x00, 0x00, 0x00,
+  0xec, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x28, 0x1c, 0xa1, 0x3a,
+  0x10, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61,
+  0x6e, 0x74, 0x34, 0x31, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x52, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x3c, 0x00, 0x00, 0x00, 0x44, 0xff, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xb4, 0x26, 0xa4, 0x3a, 0x0f, 0x00, 0x00, 0x00,
+  0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x34, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xaa, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x38, 0x00, 0x00, 0x00,
+  0x9c, 0xff, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x21, 0x7b, 0xa1, 0x3a,
+  0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61,
+  0x6e, 0x74, 0x35, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x13, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x20, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
+  0x50, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00,
+  0x0c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0xa3, 0x36, 0xb1, 0x3e,
+  0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x5f, 0x64, 0x65, 0x66, 0x61,
+  0x75, 0x6c, 0x74, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x31, 0x3a, 0x30, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c,
+  0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63,
+  0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int8_t> input_data = {
+  -19, -32, -10, -8,  -23, -21, -19, -8,  -10, -19, -35, -28, -19, -19, -8,  -17, -17, -19, 5,
+  -19, -30, -21, -30, -8,  -28, -37, -30, -17, -10, -28, -30, -17, -19, -21, -19, -19, 12,  -6,
+  -10, -8,  -10, -8,  -21, -19, -30, -19, -8,  -6,  -24, -17, -8,  -8,  -19, -10, -28, -48, -10,
+  -19, -19, -17, 1,   -30, -21, -17, 12,  -19, -1,  -19, -17, -1,  -30, -19, -19, -28, -1,  -10,
+  -32, -19, -19, -21, -19, -39, -8,  -6,  -12, -21, -28, -19, -15, -26, -19, -17, -19, -19, -19,
+  -19, -28, -19, -17, -30, 3,   -10, -28, -30, -10, -19, -19, -21, -8,  -28, -19, -19, -28, -17,
+  -19, -19, -19, -17, -17, -8,  -12, -19, -19, -19, -30, -19, -8,  -10, -28, -19, -30, -28, -26,
+  -8,  -19, -19, -10, -30, -30, -39, -21, -39, -19, 1,   -8,  -19, -21, -10, -8,  -10, -30, -30,
+  -19, -30, -19, -21, -19, -19, -21, -41, -17, -41, -19, -10, -19, -10, -30, -19, 3,   -6,  -23,
+  -28, -23, -10, -19, -10, -19, 1,   -19, -30, -10, -8,  -17, -19, -17, -26, -19, -19, -19, -26,
+  -23, -28, -19, -28, -12, -19, -30, -19, -19, -17, -28, -19, -28, -30, -8,  -15, -1,  -8,  -19,
+  -19, -19, -21, -17, -8,  -19, -21, -28, -30, 3,   -28, -19, -19, -10, -19, -6,  -28, -19, -17,
+  -46, -19, -19, -19, -10, 3,   -19, -19, -19, -19, -19, -17, -21, -32, -19, -30, -19, -19, -19,
+  -17, -24, -10, -28, -8,  -32, -10, -26, -8,  -30, -17, -19, -17, -17, -8,  -19, 3,   -1,  -30,
+  -19, -15, -19, -37, -19, -19, -19, -19, -19, -21, -8,  -21, -19, -50, -30, -10, -6,  -19, -19,
+  -24, -21, -17, -10, -17, -19, -19, -19, -17, -15, -21, -50, -19, -1,  -10, -19, -19, -19, -39,
+  -28, -17, 10,  -19, 1,   1,   -8,  -15, -19, -23, -10, -8,  -19, 3,   -19, 3,   -19, -30, -23,
+  -48, -8,  -8,  -28, -10, -37, -19, -15, -19, -26, -10, -1,  -10, -19, -19, -32, -17, -30, -17,
+  -21, -32, -19, 5,   -21, -12, -1,  -19, -10, -30, -19, -41, -17, -23, -10, -26, -28, -30, -30,
+  -21, -19, -10, -10, -19, -8,  -12, -19, -19, 1,   -19, -1,  -10, -19, -56, -21, -10, -41, -17,
+  -28, -10, -25, -30, -21, -48, -30, -30, -21, -19, -19, -19, -30, -8,  12,  -10, -8,  -6,  -28,
+  -17};
+
+const std::vector<int8_t> reference_output_data = {
+  0,  85,  0, 98,  0,  66,  29, -7,  13, -95, -5, -90, 0,  -7,  0,  -4, 4,  -74, 32, -96,
+  21, -86, 7, -98, 48, -89, 0,  -98, 90, -97, 96, 82,  46, -35, 32, -1, -2, -85, -2, -97};
+
+} // namespace unidir_lstm_int8
+
+class TestDataInt8UnidirectionalLSTM : public TestDataUnidirectionalLSTMBase<int8_t>
+{
+public:
+  TestDataInt8UnidirectionalLSTM()
+  {
+    _input_data = unidir_lstm_int8::input_data;
+    _reference_output_data = unidir_lstm_int8::reference_output_data;
+    _test_kernel_model_circle = unidir_lstm_int8::test_kernel_model_circle;
+  }
+
+  ~TestDataInt8UnidirectionalLSTM() override = default;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_QUANT_S8_UNIDIRECTIONAL_LSTM_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/TestDataUnidirectionalLSTMBase.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/TestDataUnidirectionalLSTMBase.h
new file mode 100644
index 000000000..942265517
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/unidirectional_lstm/TestDataUnidirectionalLSTMBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_UNIDIRECTIONAL_LSTM_KERNEL_BASE_H
+#define LUCI_INTERPRETER_TEST_MODELS_UNIDIRECTIONAL_LSTM_KERNEL_BASE_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+template <typename T> class TestDataUnidirectionalLSTMBase : public TestDataBase<T>
+{
+public:
+  TestDataUnidirectionalLSTMBase() = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_UNIDIRECTIONAL_LSTM_KERNEL_BASE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/while/NegWhileKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/while/NegWhileKernel.h
new file mode 100644
index 000000000..d193fbfa8
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/while/NegWhileKernel.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_NEG_WHILE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_NEG_WHILE_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace neg_while_kernel
+{
+/*
+ * While Kernel with wrong output type for Cond Graph (should be boolean but using int32):
+ *    Main graph:
+ *            Input(1)
+ *                |
+ *             While
+ *                |
+ *           Output(1)
+ *
+ *    Cond graph:
+ *            CInput(1)
+ *                |
+ *            Less (Const = 10)
+ *                |
+ *            COutput(1) - int32, but should be bool
+ *
+ *    Body Graph:
+ *            BInput(1)
+ *                |
+ *           Add (Const = 1)
+ *                |
+ *            BOutput(1)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x78, 0x03, 0x00, 0x00, 0xb0, 0x03, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x94, 0xfc, 0xff, 0xff, 0x04, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0xb8, 0xfc, 0xff, 0xff, 0xbc, 0xfc, 0xff, 0xff,
+  0xc0, 0xfc, 0xff, 0xff, 0xc4, 0xfc, 0xff, 0xff, 0xc8, 0xfc, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00,
+  0x10, 0x02, 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0a, 0xfe, 0xff, 0xff,
+  0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x57, 0x48, 0x49, 0x4c, 0x45, 0x5f, 0x42, 0x4f,
+  0x44, 0x59, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd0, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x62, 0x6f, 0x66, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0xf8, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x62, 0x69, 0x66, 0x6d, 0x33, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x1c, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x62, 0x69, 0x66, 0x6d,
+  0x31, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff, 0xff,
+  0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x5c, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x57, 0x48, 0x49, 0x4c, 0x45, 0x5f, 0x43, 0x4f,
+  0x4e, 0x44, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xff, 0xff,
+  0x00, 0x00, 0x00, 0x29, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x24, 0xfe, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc4, 0xfe, 0xff, 0xff,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x66, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x63, 0x69, 0x66, 0x6d,
+  0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x63, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x5d, 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xdc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf4, 0xff, 0xff, 0xff,
+  0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+} // namespace neg_while_kernel
+
+class NegTestDataWhileKernel : public NegTestDataBase
+{
+public:
+  NegTestDataWhileKernel()
+  {
+    _test_kernel_model_circle = neg_while_kernel::test_kernel_model_circle;
+  }
+
+  ~NegTestDataWhileKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+protected:
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_WHILE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/test_models/while/WhileKernel.h b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/while/WhileKernel.h
new file mode 100644
index 000000000..73c27cde9
--- /dev/null
+++ b/onert-micro/luci-interpreter/include/luci_interpreter/test_models/while/WhileKernel.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MODELS_WHILE_KERNEL_H
+#define LUCI_INTERPRETER_TEST_MODELS_WHILE_KERNEL_H
+
+#include "luci_interpreter/test_models/TestDataBase.h"
+
+namespace luci_interpreter
+{
+namespace test_kernel
+{
+
+namespace while_kernel
+{
+/*
+ * While Kernel:
+ *    Main graph:
+ *            Input(1)
+ *                |
+ *             While
+ *                |
+ *           Output(1)
+ *
+ *    Cond graph:
+ *            CInput(1)
+ *                |
+ *            Less (Const = 10)
+ *                |
+ *            COutput(1)
+ *
+ *    Body Graph:
+ *            BInput(1)
+ *                |
+ *           Add (Const = 1)
+ *                |
+ *           BOutput(1)
+ */
+const unsigned char test_kernel_model_circle[] = {
+  0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x7c, 0x00, 0x00, 0x00, 0x78, 0x03, 0x00, 0x00, 0xb0, 0x03, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+  0x48, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x94, 0xfc, 0xff, 0xff, 0x04, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0xb8, 0xfc, 0xff, 0xff, 0xbc, 0xfc, 0xff, 0xff,
+  0xc0, 0xfc, 0xff, 0xff, 0xc4, 0xfc, 0xff, 0xff, 0xc8, 0xfc, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00,
+  0x10, 0x02, 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0a, 0xfe, 0xff, 0xff,
+  0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+  0x68, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x57, 0x48, 0x49, 0x4c, 0x45, 0x5f, 0x42, 0x4f,
+  0x44, 0x59, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+  0x30, 0xfd, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd0, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+  0x62, 0x6f, 0x66, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0xf8, 0xfd, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x62, 0x69, 0x66, 0x6d, 0x33, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x1c, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x62, 0x69, 0x66, 0x6d,
+  0x31, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff, 0xff,
+  0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+  0x5c, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x57, 0x48, 0x49, 0x4c, 0x45, 0x5f, 0x43, 0x4f,
+  0x4e, 0x44, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xff, 0xff,
+  0x00, 0x00, 0x00, 0x29, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x24, 0xfe, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x58, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc4, 0xfe, 0xff, 0xff,
+  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x10, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0x63, 0x6f, 0x66, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x63, 0x69, 0x66, 0x6d,
+  0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00,
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+  0x63, 0x69, 0x66, 0x6d, 0x31, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00,
+  0x0e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+  0x70, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x69, 0x6e,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
+  0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x5d, 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
+  0x04, 0x00, 0x00, 0x00, 0xdc, 0xff, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x6f, 0x66, 0x6d, 0x00,
+  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0f, 0x00,
+  0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x69, 0x66, 0x6d, 0x31,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+  0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf4, 0xff, 0xff, 0xff,
+  0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x0c, 0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a,
+  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d,
+  0x74, 0x66, 0x6c, 0x69, 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00};
+
+const std::vector<int32_t> input_data = {35};
+
+const std::vector<int32_t> reference_output_data = {35};
+} // namespace while_kernel
+
+template <typename T> class TestDataWhileKernel : public TestDataBase<T>
+{
+public:
+  TestDataWhileKernel()
+  {
+    _input_data = while_kernel::input_data;
+    _reference_output_data = while_kernel::reference_output_data;
+    _test_kernel_model_circle = while_kernel::test_kernel_model_circle;
+  }
+
+  ~TestDataWhileKernel() override = default;
+
+  const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; }
+
+  const std::vector<T> &get_input_data_by_index(int i) override final
+  {
+    switch (i)
+    {
+      case 0:
+        return _input_data;
+      default:
+        assert(false && "Wrong input index");
+    }
+  }
+
+  const std::vector<T> &get_output_data_by_index(int i) override final
+  {
+    assert(i == 0);
+    return _reference_output_data;
+  }
+
+protected:
+  std::vector<T> _input_data;
+  std::vector<T> _reference_output_data;
+  const unsigned char *_test_kernel_model_circle;
+};
+
+} // namespace test_kernel
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MODELS_WHILE_KERNEL_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
new file mode 100644
index 000000000..353e11797
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -0,0 +1,61 @@
+REGISTER_KERNEL(ADD, Add)
+REGISTER_KERNEL(ARG_MAX, ArgMax)
+REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
+REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)
+REGISTER_KERNEL(CAST, Cast)
+REGISTER_KERNEL(CONCATENATION, Concatenation)
+REGISTER_KERNEL(CONV_2D, Conv2D)
+REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)
+REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)
+REGISTER_KERNEL(DEQUANTIZE, Dequantize)
+REGISTER_KERNEL(DIV, Div)
+REGISTER_KERNEL(ELU, Elu)
+REGISTER_KERNEL(EXP, Exp)
+REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
+REGISTER_KERNEL(FILL, Fill)
+REGISTER_KERNEL(FLOOR, Floor)
+REGISTER_KERNEL(FLOOR_DIV, FloorDiv)
+REGISTER_KERNEL(EQUAL, Equal)
+REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
+REGISTER_KERNEL(GREATER, Greater)
+REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)
+REGISTER_KERNEL(INSTANCE_NORM, InstanceNorm)
+REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)
+REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)
+REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)
+REGISTER_KERNEL(LESS, Less)
+REGISTER_KERNEL(LESS_EQUAL, LessEqual)
+REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)
+REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)
+REGISTER_KERNEL(LOGICAL_OR, LogicalOr)
+REGISTER_KERNEL(LOGISTIC, Logistic)
+REGISTER_KERNEL(MAXIMUM, Maximum)
+REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
+REGISTER_KERNEL(MINIMUM, Minimum)
+REGISTER_KERNEL(MIRROR_PAD, MirrorPad)
+REGISTER_KERNEL(MUL, Mul)
+REGISTER_KERNEL(NEG, Neg)
+REGISTER_KERNEL(NOT_EQUAL, NotEqual)
+REGISTER_KERNEL(PAD, Pad)
+REGISTER_KERNEL(PADV2, PadV2)
+REGISTER_KERNEL(PRELU, PRelu)
+REGISTER_KERNEL(QUANTIZE, Quantize)
+REGISTER_KERNEL(RESHAPE, Reshape)
+REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)
+REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)
+REGISTER_KERNEL(RSQRT, Rsqrt)
+REGISTER_KERNEL(SHAPE, Shape)
+REGISTER_KERNEL(SOFTMAX, Softmax)
+REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)
+REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)
+REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)
+REGISTER_KERNEL(SQRT, Sqrt)
+REGISTER_KERNEL(SQUARE, Square)
+REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)
+REGISTER_KERNEL(SQUEEZE, Squeeze)
+REGISTER_KERNEL(SUB, Sub)
+REGISTER_KERNEL(SVDF, SVDF)
+REGISTER_KERNEL(TANH, Tanh)
+REGISTER_KERNEL(TRANSPOSE, Transpose)
+REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)
+REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
new file mode 100644
index 000000000..bd47a88cb
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_conv_params conv_params;
+    conv_params.dilation.h = params.dilation_height_factor;
+    conv_params.dilation.w = params.dilation_width_factor;
+
+    assert(conv_params.dilation.h == 1);
+    assert(conv_params.dilation.w == 1);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+    conv_params.activation.min = params.quantized_activation_min;
+    conv_params.activation.max = params.quantized_activation_max;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    quant_params.multiplier = const_cast<int32_t *>(mult);
+    quant_params.shift = const_cast<int32_t *>(shifts);
+
+    assert(conv_params.activation.min <= conv_params.activation.max);
+    assert(input_shape.DimensionsCount() == 4);
+    assert(filter_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+                                       &filter_dims, filter_data, &bias_dims, bias_data,
+                                       &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                  filter_shape, filter_data, bias_shape, bias_data,
+                                                  output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_conv_params conv_params;
+  conv_params.dilation.h = params.dilation_height_factor;
+  conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == luci_interpreter::DataType::S8 && conv_params.dilation.h == 1 &&
+      conv_params.dilation.w == 1)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batches;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_height;
+    filter_dims.w = filter_width;
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batches;
+    output_dims.h = output_height;
+    output_dims.w = output_width;
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+                                                                     &filter_dims, &output_dims);
+
+    luci_interpreter::Shape scratchpad_shape{buf_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
new file mode 100644
index 000000000..32e905761
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  assert(output_shape.DimensionsCount() == 2);
+
+  const int batches = output_shape.Dims(0);
+  const int output_depth = output_shape.Dims(1);
+
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+
+  cmsis_nn_fc_params fc_params;
+  fc_params.input_offset = params.input_offset;
+  fc_params.output_offset = params.output_offset;
+  fc_params.filter_offset = params.weights_offset;
+  fc_params.activation.min = params.quantized_activation_min;
+  fc_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_per_tensor_quant_params quant_params;
+  quant_params.multiplier = params.output_multiplier;
+  quant_params.shift = params.output_shift;
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = batches;
+  input_dims.h = 1;
+  input_dims.w = 1;
+  input_dims.c = accum_depth;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = accum_depth;
+  filter_dims.h = 1;
+  filter_dims.w = 1;
+  filter_dims.c = output_depth;
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = batches;
+  output_dims.h = 1;
+  output_dims.w = 1;
+  output_dims.c = output_depth;
+
+  int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+  auto buffer = std::make_unique<int8_t[]>(buf_size);
+  assert(buffer != nullptr);
+
+  cmsis_nn_context ctx;
+  ctx.buf = buffer.get();
+  ctx.size = buf_size;
+
+  auto res =
+    arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
+                           filter_data, &bias_dims, bias_data, &output_dims, output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALMul.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALSub.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/cmsisnn/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALUnidirectionalSequenceLSTM.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALUnidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..1a86e74ab
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/cmsisnn/PALUnidirectionalSequenceLSTM.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "arm_nnfunctions.h"
+#include "core/KernelParams.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
+#include "fixedpoint/fixedpoint.h"
+
+namespace luci_interpreter_pal
+{
+namespace lstm
+{
+
+inline cmsis_nn_lstm_params
+convert_lstm_params(const luci_interpreter::IntegerLSTMParams &params_in, bool time_major,
+                    int32_t output_zeropoint, const int32_t *input_gate_bias,
+                    const int32_t *forget_gate_bias, const int32_t *cell_gate_bias,
+                    const int32_t *output_gate_bias, int16_t *input_layer_norm_coefficients,
+                    int16_t *forget_layer_norm_coefficients, int16_t *cell_layer_norm_coefficients,
+                    int16_t *output_layer_norm_coefficients)
+{
+  cmsis_nn_lstm_params params_out;
+
+  params_out.time_major = time_major;
+
+  // Multipliers and shifts for weights
+  params_out.input_to_input_scaling.multiplier = params_in.effective_input_to_input_scale_a;
+  params_out.input_to_input_scaling.shift = params_in.effective_input_to_input_scale_b;
+  params_out.recurrent_to_input_scaling.multiplier = params_in.effective_recurrent_to_input_scale_a;
+  params_out.recurrent_to_input_scaling.shift = params_in.effective_recurrent_to_input_scale_b;
+  params_out.cell_to_input_scaling.multiplier = params_in.effective_cell_to_input_scale_a;
+  params_out.cell_to_input_scaling.shift = params_in.effective_cell_to_input_scale_b;
+  params_out.input_to_forget_scaling.multiplier = params_in.effective_input_to_forget_scale_a;
+  params_out.input_to_forget_scaling.shift = params_in.effective_input_to_forget_scale_b;
+  params_out.recurrent_to_forget_scaling.multiplier =
+    params_in.effective_recurrent_to_forget_scale_a;
+  params_out.recurrent_to_forget_scaling.shift = params_in.effective_recurrent_to_forget_scale_b;
+  params_out.cell_to_forget_scaling.multiplier = params_in.effective_cell_to_forget_scale_a;
+  params_out.cell_to_forget_scaling.shift = params_in.effective_cell_to_forget_scale_b;
+  params_out.input_to_cell_scaling.multiplier = params_in.effective_input_to_cell_scale_a;
+  params_out.input_to_cell_scaling.shift = params_in.effective_input_to_cell_scale_b;
+  params_out.recurrent_to_cell_scaling.multiplier = params_in.effective_recurrent_to_cell_scale_a;
+  params_out.recurrent_to_cell_scaling.shift = params_in.effective_recurrent_to_cell_scale_b;
+  params_out.input_to_output_scaling.multiplier = params_in.effective_input_to_output_scale_a;
+  params_out.input_to_output_scaling.shift = params_in.effective_input_to_output_scale_b;
+
+  params_out.recurrent_to_output_scaling.multiplier =
+    params_in.effective_recurrent_to_output_scale_a;
+  params_out.recurrent_to_output_scaling.shift = params_in.effective_recurrent_to_output_scale_b;
+  params_out.cell_to_output_scaling.multiplier = params_in.effective_cell_to_output_scale_a;
+  params_out.cell_to_output_scaling.shift = params_in.effective_cell_to_output_scale_b;
+  params_out.projection_scaling.multiplier = params_in.effective_proj_scale_a;
+  params_out.projection_scaling.shift = params_in.effective_proj_scale_b;
+
+  params_out.layer_norm_input_scaling.multiplier = params_in.layer_norm_input_scale_a;
+  params_out.layer_norm_input_scaling.shift = params_in.layer_norm_input_scale_b;
+  params_out.layer_norm_forget_scaling.multiplier = params_in.layer_norm_forget_scale_a;
+  params_out.layer_norm_forget_scaling.shift = params_in.layer_norm_forget_scale_b;
+  params_out.layer_norm_cell_scaling.multiplier = params_in.layer_norm_cell_scale_a;
+  params_out.layer_norm_cell_scaling.shift = params_in.layer_norm_cell_scale_b;
+  params_out.layer_norm_output_scaling.multiplier = params_in.layer_norm_output_scale_a;
+  params_out.layer_norm_output_scaling.shift = params_in.layer_norm_output_scale_b;
+
+  params_out.clip.cell = params_in.quantized_cell_clip;
+  params_out.clip.projection = params_in.quantized_proj_clip;
+
+  params_out.cell_state_shift = params_in.cell_scale;
+
+  params_out.hidden_offset = params_in.hidden_zp;
+  params_out.output_state_offset = output_zeropoint;
+
+  params_out.guard.input_variance = params_in.input_variance_guard;
+  params_out.guard.forget_variance = params_in.forget_variance_guard;
+  params_out.guard.cell_variance = params_in.cell_variance_guard;
+  params_out.guard.output_variance = params_in.output_variance_guard;
+
+  params_out.i2f_effective_bias = params_in.input_to_forget_effective_bias.data();
+  params_out.r2f_effective_bias = params_in.recurrent_to_forget_effective_bias.data();
+  params_out.i2c_effective_bias = params_in.input_to_cell_effective_bias.data();
+  params_out.r2c_effective_bias = params_in.recurrent_to_cell_effective_bias.data();
+  params_out.i2o_effective_bias = params_in.input_to_output_effective_bias.data();
+  params_out.r2o_effective_bias = params_in.recurrent_to_output_effective_bias.data();
+  params_out.i2i_effective_bias = params_in.input_to_input_effective_bias.data();
+  params_out.r2i_effective_bias = params_in.recurrent_to_input_effective_bias.data();
+  params_out.projection_effective_bias = params_in.projection_effective_bias.data();
+
+  params_out.hidden_scaling.multiplier = params_in.effective_hidden_scale_a;
+  params_out.hidden_scaling.shift = params_in.effective_hidden_scale_b;
+
+  params_out.input_gate_bias = input_gate_bias;
+  params_out.forget_gate_bias = forget_gate_bias;
+  params_out.cell_gate_bias = cell_gate_bias;
+  params_out.output_gate_bias = output_gate_bias;
+
+  params_out.layer_norm.input_weight = input_layer_norm_coefficients;
+  params_out.layer_norm.forget_weight = forget_layer_norm_coefficients;
+  params_out.layer_norm.cell_weight = cell_layer_norm_coefficients;
+  params_out.layer_norm.output_weight = output_layer_norm_coefficients;
+
+  params_out.activation.min = std::numeric_limits<int16_t>::min();
+  params_out.activation.max = std::numeric_limits<int16_t>::max();
+
+  return params_out;
+}
+
+} // namespace lstm
+
+void eval_integer_8x8_16_lstm(
+  const luci_interpreter::Tensor *input, const luci_interpreter::Tensor *input_to_input_weights,
+  const luci_interpreter::Tensor *input_to_forget_weights,
+  const luci_interpreter::Tensor *input_to_cell_weights,
+  const luci_interpreter::Tensor *input_to_output_weights,
+  const luci_interpreter::Tensor *recurrent_to_input_weights,
+  const luci_interpreter::Tensor *recurrent_to_forget_weights,
+  const luci_interpreter::Tensor *recurrent_to_cell_weights,
+  const luci_interpreter::Tensor *recurrent_to_output_weights,
+  const luci_interpreter::Tensor *cell_to_input_weights,
+  const luci_interpreter::Tensor *cell_to_forget_weights,
+  const luci_interpreter::Tensor *cell_to_output_weights,
+  const luci_interpreter::Tensor *input_layer_norm_coefficients,
+  const luci_interpreter::Tensor *forget_layer_norm_coefficients,
+  const luci_interpreter::Tensor *cell_layer_norm_coefficients,
+  const luci_interpreter::Tensor *output_layer_norm_coefficients,
+  const luci_interpreter::Tensor *input_gate_bias, const luci_interpreter::Tensor *forget_gate_bias,
+  const luci_interpreter::Tensor *cell_gate_bias, const luci_interpreter::Tensor *output_gate_bias,
+  const luci_interpreter::Tensor *projection_weights,
+  const luci_interpreter::Tensor *projection_bias,
+  const luci_interpreter::UnidirectionalSequenceLSTMParams &params, bool forward_sequence,
+  bool time_major, const luci_interpreter::IntegerLSTMParams &integer_lstm_param,
+  int32_t output_state_zp, luci_interpreter::Tensor *output_state,
+  luci_interpreter::Tensor *cell_state, luci_interpreter::Tensor *output, int16_t *scratch0,
+  int16_t *scratch1, int16_t *scratch2, int16_t *scratch3, int8_t *scratch4, int32_t *scratch5)
+{
+  // CMSIS-NN does not support these configurations currently.
+  // Please use MCU kernels instead
+  const bool use_layer_norm = (forget_layer_norm_coefficients != nullptr);
+  const bool use_peephole = (cell_to_output_weights != nullptr);
+  const bool use_projection = (projection_weights != nullptr);
+  const bool use_cifg = (input_to_input_weights == nullptr);
+  const bool unsupported_config = use_layer_norm || use_peephole || use_projection || use_cifg;
+
+  if (unsupported_config)
+  {
+    assert(false && "CMSIS-NN does not support these configurations currently");
+    return;
+  }
+
+  const auto input_shape = input->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() >= 2 && input_shape.num_dims() <= 3);
+
+  cmsis_nn_lstm_context scratch_buffers;
+  scratch_buffers.input_gate = scratch0;
+  scratch_buffers.forget_gate = scratch1;
+  scratch_buffers.cell_gate = scratch2;
+  scratch_buffers.output_gate = scratch3;
+  scratch_buffers.scratch = scratch4;
+
+  cmsis_nn_lstm_params cmsis_lstm_params = lstm::convert_lstm_params(
+    integer_lstm_param, time_major, output_state_zp,
+    luci_interpreter::kernels::getTensorData<int32_t>(input_gate_bias),
+    luci_interpreter::kernels::getTensorData<int32_t>(forget_gate_bias),
+    luci_interpreter::kernels::getTensorData<int32_t>(cell_gate_bias),
+    luci_interpreter::kernels::getTensorData<int32_t>(output_gate_bias),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(input_layer_norm_coefficients)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(forget_layer_norm_coefficients)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(cell_layer_norm_coefficients)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(output_layer_norm_coefficients)));
+
+  const int n_input = input_shape.dim(input_shape.num_dims() - 1);
+  int max_time, n_batch;
+  if (input_shape.num_dims() == 2)
+  {
+    max_time = 1;
+    n_batch = input_shape.dim(0);
+  }
+  else
+  {
+    max_time = (time_major) ? input_shape.dim(0) : input_shape.dim(1);
+    n_batch = (time_major) ? input_shape.dim(1) : input_shape.dim(0);
+  }
+
+  // n_cell and n_output will be the same size when there is no projection.
+  const int n_cell = input_to_output_weights->shape().dim(0);
+  const int n_output = recurrent_to_output_weights->shape().dim(1);
+
+  cmsis_nn_lstm_dims lstm_dims;
+  lstm_dims.num_inputs = n_input;
+  lstm_dims.num_outputs = n_output;
+  lstm_dims.num_batches = n_batch;
+  lstm_dims.max_time = max_time;
+
+  arm_lstm_unidirectional_s16_s8(
+    &scratch_buffers, const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input)),
+    &lstm_dims,
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_input_weights)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_forget_weights)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_cell_weights)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_output_weights)),
+    const_cast<int8_t *>(
+      luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_input_weights)),
+    const_cast<int8_t *>(
+      luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_forget_weights)),
+    const_cast<int8_t *>(
+      luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_cell_weights)),
+    const_cast<int8_t *>(
+      luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_output_weights)),
+    const_cast<int16_t *>(luci_interpreter::kernels::getTensorData<int16_t>(cell_to_input_weights)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(cell_to_forget_weights)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(cell_to_output_weights)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(projection_weights)),
+    &cmsis_lstm_params,
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(output_state)),
+    const_cast<int16_t *>(luci_interpreter::kernels::getTensorData<int16_t>(cell_state)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(output)));
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/pal.cmake b/onert-micro/luci-interpreter/pal/cmsisnn/pal.cmake
new file mode 100644
index 000000000..511047450
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -0,0 +1,83 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 REQUIRED)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 REQUIRED)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 REQUIRED)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 REQUIRED)
+    nnas_find_package(CMSIS-NN EXACT 4.0.0 REQUIRED)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    if (NOT CMSISSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+        return()
+    endif ()
+
+    if (NOT CMSIS_NNSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: CMSIS-NN not found")
+        return()
+    endif ()
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    file(GLOB_RECURSE PAL_SOURCES "${CMSIS_NNSource_DIR}/Source/ActivationFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/BasicMathFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/ConcatenationFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/ConvolutionFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/FullyConnectedFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/LSTMFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/NNSupportFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/PoolingFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/ReshapeFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/SoftmaxFunctions/*.c")
+
+    list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+    set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+            "${CMSIS_NNSource_DIR}"
+    )
+
+    set(CMSIS_PATH ${CMSISSource_DIR} CACHE INTERNAL "CMSIS_PATH")
+    add_subdirectory(${CMSIS_NNSource_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+
+    target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
+            "${CMSISSource_DIR}/CMSIS/DSP/Include"
+            "${CMSISSource_DIR}/CMSIS/Core/Include"
+            "${CMSIS_NNSource_DIR}/Include")
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
diff --git a/onert-micro/luci-interpreter/pal/common/PALAbs.h b/onert-micro/luci-interpreter/pal/common/PALAbs.h
new file mode 100644
index 000000000..a1f5ae374
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALAbs.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ABS_H
+#define LUCI_INTERPRETER_PAL_ABS_H
+
+#include <cmath>
+
+namespace luci_interpreter_pal
+{
+
+inline void Abs(const int flat_size, const float *input_data, float *output_data)
+{
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = std::abs(input_data[i]);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ABS_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALAddCommon.h b/onert-micro/luci-interpreter/pal/common/PALAddCommon.h
new file mode 100644
index 000000000..57f9b107e
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALAddCommon.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ADD_COMMON_H
+#define LUCI_INTERPRETER_PAL_ADD_COMMON_H
+
+#include "Params.h"
+#include "PALUtils.h"
+#include "ProcessBroadcastShapes.h"
+
+namespace luci_interpreter_pal
+{
+
+// TODO: check if there real activation value
+template <typename T>
+inline void Add(const ArithmeticParams &params, const int flat_size, const T *input1_data,
+                const T *input2_data, T *output_data)
+{
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  for (int i = 0; i < flat_size; ++i)
+    output_data[i] =
+      std::min(std::max(input1_data[i] + input2_data[i], activation_min), activation_max);
+}
+
+template <typename T>
+inline void
+BroadcastAdd4DSlow(const ArithmeticParams &params,
+                   const luci_interpreter::RuntimeShape &input1_shape, const T *input1_data,
+                   const luci_interpreter::RuntimeShape &input2_shape, const T *input2_data,
+                   const luci_interpreter::RuntimeShape &output_shape, T *output_data)
+{
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
+  const luci_interpreter::RuntimeShape extended_output_shape =
+    luci_interpreter::RuntimeShape::extendedShape(4, output_shape);
+
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest stride,
+  // typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for the
+  // best cache behavior.
+  for (int b = 0; b < extended_output_shape.dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.dims(3); ++c)
+        {
+          const int output_data_offset =
+            ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
+              extended_output_shape.dims(3) +
+            c;
+
+          output_data[output_data_offset] =
+            std::min(std::max(input1_data[subscriptToIndex(desc1, b, y, x, c)] +
+                                input2_data[subscriptToIndex(desc2, b, y, x, c)],
+                              activation_min),
+                     activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ADD_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALAddN.h b/onert-micro/luci-interpreter/pal/common/PALAddN.h
new file mode 100644
index 000000000..63fbc03c0
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALAddN.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ADD_N_H
+#define LUCI_INTERPRETER_PAL_ADD_N_H
+
+#include "Params.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+// T is expected to be either float or int.
+template <typename T>
+inline void AddN(const size_t flat_size, const size_t num_inputs, const T *const *input_data,
+                 T *output_data)
+{
+  // All inputs and output should have the same shape, this is checked during
+  // Prepare stage.
+  for (size_t i = 0; i < flat_size; ++i)
+  {
+    T x = 0;
+    for (size_t j = 0; j < num_inputs; ++j)
+    {
+      x += input_data[j][i];
+    }
+    output_data[i] = x;
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ADD_N_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALArgMinMax.h b/onert-micro/luci-interpreter/pal/common/PALArgMinMax.h
new file mode 100644
index 000000000..58602d8b9
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALArgMinMax.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARG_MIN_MAX_H
+#define LUCI_INTERPRETER_PAL_ARG_MIN_MAX_H
+
+#include "Params.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T1, typename T2, typename T3, typename Cmp>
+void ArgMinMax(const luci_interpreter::RuntimeShape &input1_shape, const T1 *input1_data,
+               const T3 *input2_data, const luci_interpreter::RuntimeShape &output_shape,
+               T2 *output_data, const Cmp &cmp)
+{
+  int axis = input2_data[0];
+  if (axis < 0)
+  {
+    axis += input1_shape.dimensionsCount();
+  }
+  const int axis_size = input1_shape.dims(axis);
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input1_shape.dims(i);
+  }
+
+  int inner_size = 1;
+  const int dims_count = input1_shape.dimensionsCount();
+  for (int i = axis + 1; i < dims_count; ++i)
+  {
+    inner_size *= input1_shape.dims(i);
+  }
+  for (int outer = 0; outer < outer_size; ++outer)
+  {
+    for (int inner = 0; inner < inner_size; ++inner)
+    {
+      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
+      T2 min_max_index = 0;
+      for (int i = 1; i < axis_size; ++i)
+      {
+        const auto &curr_value = input1_data[(outer * axis_size + i) * inner_size + inner];
+        if (cmp(curr_value, min_max_value))
+        {
+          min_max_value = curr_value;
+          min_max_index = static_cast<T2>(i);
+        }
+      }
+      output_data[outer * inner_size + inner] = min_max_index;
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARG_MIN_MAX_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALAveragePool2DCommon.h b/onert-micro/luci-interpreter/pal/common/PALAveragePool2DCommon.h
new file mode 100644
index 000000000..ec6bb55b5
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALAveragePool2DCommon.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_COMMON_H
+#define LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_COMMON_H
+
+#include "Params.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+// TODO: reduce code duplication with MaxPool
+inline void AveragePool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
+                        const float *input_data, const luci_interpreter::RuntimeShape &output_shape,
+                        float *output_data)
+{
+  const int batches = input_shape.dims(0);
+  const int depth = output_shape.dims(3);
+  const int input_height = input_shape.dims(1);
+  const int input_width = input_shape.dims(2);
+  const int output_height = output_shape.dims(1);
+  const int output_width = output_shape.dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int channel = 0; channel < depth; ++channel)
+        {
+          const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+          // Compute the boundaries of the filter region clamped so as to
+          // ensure that the filter window fits in the input array.
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+
+          float total = 0.f;
+          float filter_count = 0;
+
+          for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+          {
+            for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+            {
+              const int in_x = in_x_origin + filter_x;
+              const int in_y = in_y_origin + filter_y;
+
+              const int input_data_offset =
+                ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
+                  input_shape.dims(3) +
+                channel;
+
+              total += input_data[input_data_offset];
+              filter_count++;
+            }
+          }
+          const int output_data_offset =
+            ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
+              output_shape.dims(3) +
+            channel;
+
+          assert(filter_count != 0);
+          const float average = total / filter_count;
+
+          output_data[output_data_offset] =
+            std::min(std::max(average, params.float_activation_min), params.float_activation_max);
+        }
+      }
+    }
+  }
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALComparisons.h b/onert-micro/luci-interpreter/pal/common/PALComparisons.h
new file mode 100644
index 000000000..bb855a1ad
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALComparisons.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_COMPARISONS_H
+#define LUCI_INTERPRETER_PAL_COMPARISONS_H
+
+#include "Params.h"
+#include "ProcessBroadcastShapes.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+namespace
+{
+
+struct BroadcastComparison4DSlowCommon
+{
+  const luci_interpreter::RuntimeShape output_shape;
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+};
+
+inline BroadcastComparison4DSlowCommon
+BroadcastComparison4DSlowPreprocess(const luci_interpreter::RuntimeShape &unextended_input1_shape,
+                                    const luci_interpreter::RuntimeShape &unextended_input2_shape,
+                                    const luci_interpreter::RuntimeShape &unextended_output_shape)
+{
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+  return {luci_interpreter::RuntimeShape::extendedShape(4, unextended_output_shape), desc1, desc2};
+}
+
+} // namespace
+
+template <typename T> inline bool LessFn(T lhs, T rhs) { return lhs < rhs; }
+template <typename T> inline bool LessEqualFn(T lhs, T rhs) { return lhs <= rhs; }
+template <typename T> inline bool EqualFn(T lhs, T rhs) { return lhs == rhs; }
+template <typename T> inline bool GreaterFn(T lhs, T rhs) { return lhs > rhs; }
+template <typename T> inline bool GreaterEqualFn(T lhs, T rhs) { return lhs >= rhs; }
+template <typename T> inline bool NotEqualFn(T lhs, T rhs) { return lhs != rhs; }
+
+template <typename T>
+inline void ComparisonNoScaling(const int64_t flat_size, const T *input1_data, const T *input2_data,
+                                bool *output_data, bool F(T, T))
+{
+  for (int64_t i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = F(input1_data[i], input2_data[i]);
+  }
+}
+
+template <typename T>
+inline void BroadcastComparison4DSlowWithScaling(
+  const ComparisonParams &op_params, const luci_interpreter::RuntimeShape &unextended_input1_shape,
+  const T *input1_data, const luci_interpreter::RuntimeShape &unextended_input2_shape,
+  const T *input2_data, const luci_interpreter::RuntimeShape &unextended_output_shape,
+  bool *output_data, bool F(T, T))
+{
+  const BroadcastComparison4DSlowCommon dims = BroadcastComparison4DSlowPreprocess(
+    unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+
+  int left_shift = op_params.left_shift;
+  int32_t input1_offset = op_params.input1_offset;
+  int32_t input1_multiplier = op_params.input1_multiplier;
+  int input1_shift = op_params.input1_shift;
+  int32_t input2_offset = op_params.input2_offset;
+  int32_t input2_multiplier = op_params.input2_multiplier;
+  int input2_shift = op_params.input2_shift;
+
+  for (int b = 0; b < dims.output_shape.dims(0); ++b)
+  {
+    for (int y = 0; y < dims.output_shape.dims(1); ++y)
+    {
+      for (int x = 0; x < dims.output_shape.dims(2); ++x)
+      {
+        for (int c = 0; c < dims.output_shape.dims(3); ++c)
+        {
+          const int32_t input1_val =
+            input1_offset + input1_data[subscriptToIndex(dims.desc1, b, y, x, c)];
+          const int32_t input2_val =
+            input2_offset + input2_data[subscriptToIndex(dims.desc2, b, y, x, c)];
+          const int32_t shifted_input1_val = input1_val * (1 << left_shift);
+          const int32_t shifted_input2_val = input2_val * (1 << left_shift);
+          const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
+            shifted_input1_val, input1_multiplier, input1_shift);
+          const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
+            shifted_input2_val, input2_multiplier, input2_shift);
+
+          const int output_data_offset =
+            ((b * dims.output_shape.dims(1) + y) * dims.output_shape.dims(2) + x) *
+              dims.output_shape.dims(3) +
+            c;
+          output_data[output_data_offset] = F(scaled_input1_val, scaled_input2_val);
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void ComparisonWithScaling(const ComparisonParams &op_params, const int64_t flat_size,
+                                  const T *input1_data, const T *input2_data, bool *output_data,
+                                  bool F(T, T))
+{
+  int left_shift = op_params.left_shift;
+  int32_t input1_offset = op_params.input1_offset;
+  int32_t input1_multiplier = op_params.input1_multiplier;
+  int input1_shift = op_params.input1_shift;
+  int32_t input2_offset = op_params.input2_offset;
+  int32_t input2_multiplier = op_params.input2_multiplier;
+  int input2_shift = op_params.input2_shift;
+
+  for (int64_t i = 0; i < flat_size; ++i)
+  {
+    const int32_t input1_val = input1_offset + input1_data[i];
+    const int32_t input2_val = input2_offset + input2_data[i];
+    const int32_t shifted_input1_val = input1_val * (1 << left_shift);
+    const int32_t shifted_input2_val = input2_val * (1 << left_shift);
+    const int32_t scaled_input1_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, input1_multiplier, input1_shift);
+    const int32_t scaled_input2_val = multiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input2_val, input2_multiplier, input2_shift);
+    output_data[i] = F(scaled_input1_val, scaled_input2_val);
+  }
+}
+
+template <typename T>
+inline void BroadcastComparison4DSlowNoScaling(
+  const ComparisonParams &op_params, const luci_interpreter::RuntimeShape &unextended_input1_shape,
+  const T *input1_data, const luci_interpreter::RuntimeShape &unextended_input2_shape,
+  const T *input2_data, const luci_interpreter::RuntimeShape &unextended_output_shape,
+  bool *output_data, bool F(T, T))
+{
+  const BroadcastComparison4DSlowCommon dims = BroadcastComparison4DSlowPreprocess(
+    unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+
+  for (int b = 0; b < dims.output_shape.dims(0); ++b)
+  {
+    for (int y = 0; y < dims.output_shape.dims(1); ++y)
+    {
+      for (int x = 0; x < dims.output_shape.dims(2); ++x)
+      {
+        for (int c = 0; c < dims.output_shape.dims(3); ++c)
+        {
+          const int output_data_offset =
+            ((b * dims.output_shape.dims(1) + y) * dims.output_shape.dims(2) + x) *
+              dims.output_shape.dims(3) +
+            c;
+          output_data[output_data_offset] =
+            F(input1_data[subscriptToIndex(dims.desc1, b, y, x, c)],
+              input2_data[subscriptToIndex(dims.desc2, b, y, x, c)]);
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_COMPARISONS_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALConcatenation.h b/onert-micro/luci-interpreter/pal/common/PALConcatenation.h
new file mode 100644
index 000000000..2bd385810
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALConcatenation.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONCATENATION_H
+#define LUCI_INTERPRETER_PAL_CONCATENATION_H
+
+#include "Params.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename Scalar>
+inline void Concatenation(const ConcatenationParams &params,
+                          const luci_interpreter::RuntimeShape *const *input_shapes,
+                          const Scalar *const *input_data,
+                          const luci_interpreter::RuntimeShape &output_shape, Scalar *output_data)
+{
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+  const int concat_dimensions = output_shape.dimensionsCount();
+
+  int64_t concat_size = 0;
+  for (int i = 0; i < inputs_count; i++)
+  {
+    concat_size += input_shapes[i]->dims(axis);
+  }
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= output_shape.dims(i);
+  }
+  // For all input arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < concat_dimensions; ++i)
+  {
+    base_inner_size *= output_shape.dims(i);
+  }
+
+  Scalar *output_ptr = output_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      const int copy_size = input_shapes[i]->dims(axis) * base_inner_size;
+      const Scalar *input_ptr = input_data[i] + k * copy_size;
+      memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
+      output_ptr += copy_size;
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONCATENATION_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALConv2DCommon.h b/onert-micro/luci-interpreter/pal/common/PALConv2DCommon.h
new file mode 100644
index 000000000..04b92cd48
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALConv2DCommon.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_COMMON_H
+#define LUCI_INTERPRETER_PAL_CONV2D_COMMON_H
+#include "Params.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const ConvParams &params, const int32_t *input_shape,
+                        const float *input_data, const int32_t *filter_shape,
+                        const float *filter_data, const float *bias_data,
+                        const int32_t *output_shape, float *output_data)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+
+  const auto batches = input_shape[0];
+  const int input_height = input_shape[1];
+  const int input_width = input_shape[2];
+  const int input_depth = input_shape[3];
+  const int output_depth = filter_shape[0];
+  const int filter_height = filter_shape[1];
+  const int filter_width = filter_shape[2];
+  const int output_height = output_shape[1];
+  const int output_width = output_shape[2];
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      const int in_y_origin = (out_y * stride_height) - pad_height;
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        const int in_x_origin = (out_x * stride_width) - pad_width;
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+        {
+          float total = 0.f;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            const int in_y = in_y_origin + dilation_height_factor * filter_y;
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+
+              // Zero padding by omitting the areas outside the image.
+              const bool is_point_inside_image =
+                (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+
+              if (!is_point_inside_image)
+              {
+                continue;
+              }
+
+              for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+              {
+                const int input_data_offset =
+                  ((batch * input_height + in_y) * input_width + in_x) * input_depth + in_channel;
+
+                const int filter_data_offset =
+                  ((out_channel * filter_height + filter_y) * filter_width + filter_x) *
+                    input_depth +
+                  in_channel;
+
+                const float input_value = input_data[input_data_offset];
+                const float filter_value = filter_data[filter_data_offset];
+                total += (input_value * filter_value);
+              }
+            }
+          }
+          // float bias_value = 0.0f;
+          if (bias_data)
+          {
+            total += bias_data[out_channel];
+          }
+
+          const int output_data_offset =
+            ((batch * output_height + out_y) * output_width + out_x) * output_depth + out_channel;
+
+          output_data[output_data_offset] =
+            std::min(std::max(total, output_activation_min), output_activation_max);
+        }
+      }
+    }
+  }
+}
+
+static inline void Conv(const ConvParams &params, const int32_t *input_shape,
+                        const uint8_t *input_data, const int32_t *filter_shape,
+                        const uint8_t *filter_data, const int32_t *bias_data,
+                        const int32_t *output_shape, uint8_t *output_data)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int32_t input_offset = params.input_offset;
+  const int32_t filter_offset = params.weights_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+
+  const auto batches = input_shape[0];
+  const int input_height = input_shape[1];
+  const int input_width = input_shape[2];
+  const int input_depth = input_shape[3];
+  const int output_depth = filter_shape[0];
+  const int filter_height = filter_shape[1];
+  const int filter_width = filter_shape[2];
+  const int output_height = output_shape[1];
+  const int output_width = output_shape[2];
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      const int in_y_origin = (out_y * stride_height) - pad_height;
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        const int in_x_origin = (out_x * stride_width) - pad_width;
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+        {
+          int32_t acc = 0;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            const int in_y = in_y_origin + dilation_height_factor * filter_y;
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+
+              // Zero padding by omitting the areas outside the image.
+              const bool is_point_inside_image =
+                (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+
+              if (!is_point_inside_image)
+              {
+                continue;
+              }
+
+              for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+              {
+                const int input_data_offset =
+                  ((batch * input_height + in_y) * input_width + in_x) * input_depth + in_channel;
+
+                const int filter_data_offset =
+                  ((out_channel * filter_height + filter_y) * filter_width + filter_x) *
+                    input_depth +
+                  in_channel;
+
+                const int32_t input_val = input_data[input_data_offset];
+                const int32_t filter_val = filter_data[filter_data_offset];
+                acc += (filter_val + filter_offset) * (input_val + input_offset);
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_channel];
+          }
+          acc = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+          acc += output_offset;
+          acc = std::max(acc, output_activation_min);
+          acc = std::min(acc, output_activation_max);
+
+          const int output_data_offset =
+            ((batch * output_height + out_y) * output_width + out_x) * output_depth + out_channel;
+
+          output_data[output_data_offset] = static_cast<uint8_t>(acc);
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALDiv.h b/onert-micro/luci-interpreter/pal/common/PALDiv.h
new file mode 100644
index 000000000..cca85cd22
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALDiv.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DIV_COMMON_H
+#define LUCI_INTERPRETER_PAL_DIV_COMMON_H
+
+#include "Params.h"
+#include "PALUtils.h"
+#include "ProcessBroadcastShapes.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+inline void Div(const ArithmeticParams &params, const int flat_size, const T *input1_data,
+                const T *input2_data, T *output_data)
+{
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  for (int i = 0; i < flat_size; ++i)
+    output_data[i] =
+      std::min(std::max(input1_data[i] / input2_data[i], activation_min), activation_max);
+}
+
+template <typename T>
+inline void DivScalar(const ArithmeticParams &params, const int flat_size, const T *input_data,
+                      const T scalar_value, T *output_data)
+{
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  for (int i = 0; i < flat_size; ++i)
+    output_data[i] =
+      std::min(std::max(input_data[i] / scalar_value, activation_min), activation_max);
+}
+
+template <typename T>
+inline void
+BroadcastDiv4DSlow(const ArithmeticParams &params,
+                   const luci_interpreter::RuntimeShape &input1_shape, const T *input1_data,
+                   const luci_interpreter::RuntimeShape &input2_shape, const T *input2_data,
+                   const luci_interpreter::RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = input1_shape.flatSize();
+
+  if (params.broadcast_category == BroadcastableOpCategory::kScalarFirstBroadcast)
+  {
+    return DivScalar(params, flat_size, input2_data, input1_data[0], output_data);
+  }
+  else if (params.broadcast_category == BroadcastableOpCategory::kScalarSecondBroadcast)
+  {
+    return DivScalar(params, flat_size, input1_data, input2_data[0], output_data);
+  }
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
+  const luci_interpreter::RuntimeShape extended_output_shape =
+    luci_interpreter::RuntimeShape::extendedShape(4, output_shape);
+
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest stride,
+  // typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for the
+  // best cache behavior.
+  for (int b = 0; b < extended_output_shape.dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.dims(3); ++c)
+        {
+          const int output_data_offset =
+            ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
+              extended_output_shape.dims(3) +
+            c;
+
+          output_data[output_data_offset] =
+            std::min(std::max(input1_data[subscriptToIndex(desc1, b, y, x, c)] /
+                                input2_data[subscriptToIndex(desc2, b, y, x, c)],
+                              activation_min),
+                     activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DIV_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALElu.h b/onert-micro/luci-interpreter/pal/common/PALElu.h
new file mode 100644
index 000000000..661bd07ee
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALElu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include "PALUtils.h"
+#include <cmath>
+
+namespace luci_interpreter_pal
+{
+
+inline void Elu(const int flat_size, const float *input_data, float *output_data)
+{
+  for (int i = 0; i < flat_size; i++)
+  {
+    float val = input_data[i];
+    float result = val < 0.0f ? std::expm1(val) : val;
+    output_data[i] = result;
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALExp.h b/onert-micro/luci-interpreter/pal/common/PALExp.h
new file mode 100644
index 000000000..5cf1ef6ed
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALExp.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_EXP_H
+#define LUCI_INTERPRETER_PAL_EXP_H
+
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+inline void Exp(const int flat_size, const float *input_data, float *output_data)
+{
+  for (int i = 0; i < flat_size; i++)
+  {
+    const float val = input_data[i];
+    const float result = std::exp(val);
+    output_data[i] = result;
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_EXP_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALFullyConnectedCommon.h b/onert-micro/luci-interpreter/pal/common/PALFullyConnectedCommon.h
new file mode 100644
index 000000000..14934cc72
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALFullyConnectedCommon.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLY_CONNECTED_COMMON_H
+#define LUCI_INTERPRETER_PAL_FULLY_CONNECTED_COMMON_H
+
+#include "PALUtils.h"
+#include "Params.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename InputType, typename WeightType, typename OutputType, typename BiasType>
+inline void FullyConnected(const FullyConnectedParams &params, const int32_t *input_shape,
+                           const InputType *input_data, const int32_t *filter_shape,
+                           const WeightType *filter_data, const BiasType *bias_data,
+                           const int32_t *output_shape, OutputType *output_data)
+{
+  const int32_t input_offset = params.input_offset;
+  const int32_t filter_offset = params.weights_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+
+  const int batches = input_shape[0];
+  const int output_depth = output_shape[1];
+  const int accum_depth = filter_shape[1];
+
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int out_c = 0; out_c < output_depth; ++out_c)
+    {
+      BiasType acc = 0;
+      for (int d = 0; d < accum_depth; ++d)
+      {
+        int32_t input_val = input_data[b * accum_depth + d];
+        int32_t filter_val = filter_data[out_c * accum_depth + d];
+        acc += (filter_val + filter_offset) * (input_val + input_offset);
+      }
+      if (bias_data)
+      {
+        acc += bias_data[out_c];
+      }
+      int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+      acc_scaled += output_offset;
+      acc_scaled = std::max(acc_scaled, output_activation_min);
+      acc_scaled = std::min(acc_scaled, output_activation_max);
+      output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
+    }
+  }
+}
+template <>
+inline void FullyConnected(const FullyConnectedParams &params, const int32_t *input_shape,
+                           const float *input_data, const int32_t *filter_shape,
+                           const float *filter_data, const float *bias_data,
+                           const int32_t *output_shape, float *output_data)
+{
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+
+  const int batches = input_shape[0];
+  const int output_depth = output_shape[1];
+  const int accum_depth = filter_shape[1];
+
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int out_c = 0; out_c < output_depth; ++out_c)
+    {
+      float total = 0.f;
+      for (int d = 0; d < accum_depth; ++d)
+      {
+        total += input_data[b * accum_depth + d] * filter_data[out_c * accum_depth + d];
+      }
+      float bias_value = 0.0f;
+      if (bias_data)
+      {
+        bias_value = bias_data[out_c];
+      }
+      output_data[out_c + output_depth * b] =
+        std::min(std::max(total + bias_value, output_activation_min), output_activation_max);
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLY_CONNECTED_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALLogicalCommon.h b/onert-micro/luci-interpreter/pal/common/PALLogicalCommon.h
new file mode 100644
index 000000000..18173f583
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALLogicalCommon.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGICAL_COMMON_H
+#define LUCI_INTERPRETER_PAL_LOGICAL_COMMON_H
+
+namespace luci_interpreter_pal
+{
+
+inline void LogicalCommon(const int flat_size, const bool *input1_data, const bool *input2_data,
+                          bool *output_data, bool (*f)(bool, bool))
+{
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = f(input1_data[i], input2_data[i]);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGICAL_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALLogistic.h b/onert-micro/luci-interpreter/pal/common/PALLogistic.h
new file mode 100644
index 000000000..c0e3a3c18
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALLogistic.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGISTIC_H
+#define LUCI_INTERPRETER_PAL_LOGISTIC_H
+
+#include "Params.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+inline void Logistic(const int flat_size, const float *input_data, float *output_data)
+{
+  const float cutoff_upper = 16.619047164916992188f;
+  const float cutoff_lower = -9.f;
+
+  // Rational for using approximation in reference kernel.
+  // 0. This approximation gives enough precision for float.
+  // 1. This works around an issue on an embedded chipset where exp() does not
+  // return correctly as expected - exp(x) should return inf when overflown
+  // not 1.701417   IEEE 754 defines representation for inf.
+  // 2. This will speed up calculation and is matching the behavior in the
+  // optimized kernels. (check the definition of scalar_logistic_op<float>)
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    float val = input_data[i];
+    float result;
+    if (val > cutoff_upper)
+    {
+      result = 1.0f;
+    }
+    else if (val < cutoff_lower)
+    {
+      result = std::exp(val);
+    }
+    else
+    {
+      result = 1.f / (1.f + std::exp(-val));
+    }
+    output_data[i] = result;
+  }
+}
+
+inline void Logistic(const int flat_size, const int8_t *input_data, float input_scale,
+                     int input_zero_point, int8_t *output_data, float output_scale,
+                     int output_zero_point)
+{
+  const float cutoff_upper = 16.619047164916992188f;
+  const float cutoff_lower = -9.f;
+
+  // Rational for using approximation in reference kernel.
+  // 0. This approximation gives enough precision for float.
+  // 1. This works around an issue on an embedded chipset where exp() does not
+  // return correctly as expected - exp(x) should return inf when overflown
+  // not 1.701417   IEEE 754 defines representation for inf.
+  // 2. This will speed up calculation and is matching the behavior in the
+  // optimized kernels. (check the definition of scalar_logistic_op<float>)
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // Dequantize.
+    float val = static_cast<float>((input_data[i] - input_zero_point) * input_scale);
+    float result;
+    if (val > cutoff_upper)
+    {
+      result = 1.0f;
+    }
+    else if (val < cutoff_lower)
+    {
+      result = std::exp(val);
+    }
+    else
+    {
+      result = 1.f / (1.f + std::exp(-val));
+    }
+    // Requantize
+    int8_t output = static_cast<int8_t>(result / output_scale + output_zero_point);
+    output_data[i] = output;
+  }
+}
+
+inline void Logistic(int32_t input_multiplier, int32_t input_left_shift, int32_t input_size,
+                     const int16_t *ptr_input_data, int16_t *ptr_output_data)
+{
+  // We use the LUT for sigmoid and take into account, that
+  // tanh(x) = 2*sigmoid(2*x) - 1
+
+  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
+  // In case of general parameter scale, multiplier 3 is taken into account
+  // in TanhPrepare function and it is included in
+  // input_multiplier already.
+  if (input_multiplier == 0)
+  { // power of two case
+    input_multiplier = 3 << input_left_shift;
+    input_left_shift = 0;
+  }
+
+  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
+
+  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++)
+  {
+    int32_t input_data = ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
+
+    // We do interpolation on unsigned values.
+    uint32_t abs_input_data = abs(input_data);
+
+    // We divide by 2 power of 9, because
+    // we need to divide by 2 in power of 7 for
+    // the input conversion + 1/4 from the scale above.
+
+    // Define uh as uint32_t type not to make this function overflow.
+    uint32_t uh = abs_input_data >> 9;
+    uint32_t result;
+
+    if (uh >= 255)
+    {
+      // Saturate to maximum.
+      result = 0x7FFF << 10;
+    }
+    else
+    {
+      uint32_t ua = sigmoid_table_uint16[uh];
+      uint32_t ub = sigmoid_table_uint16[uh + 1];
+      uint32_t ut = abs_input_data & 0x1ff;
+      // Interpolation is done using the fractional bit.
+      result = (ua << 9) + ut * (ub - ua);
+    }
+
+    result = (input_data >= 0) ? (result + (1 << 9)) : ((1 << (16 + 9)) - result + (1 << 9) - 1);
+
+    // Back to 16-bit.
+    result >>= 10;
+
+    *ptr_output_data = result;
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGISTIC_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALMaxPool2DCommon.h b/onert-micro/luci-interpreter/pal/common/PALMaxPool2DCommon.h
new file mode 100644
index 000000000..034319b8a
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALMaxPool2DCommon.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MAX_POOL_2D_COMMON_H
+#define LUCI_INTERPRETER_PAL_MAX_POOL_2D_COMMON_H
+
+#include "Params.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
+                    const float *input_data, const luci_interpreter::RuntimeShape &output_shape,
+                    float *output_data)
+{
+  const int batches = input_shape.dims(0);
+  const int depth = output_shape.dims(3);
+  const int input_height = input_shape.dims(1);
+  const int input_width = input_shape.dims(2);
+  const int output_height = output_shape.dims(1);
+  const int output_width = output_shape.dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int channel = 0; channel < depth; ++channel)
+        {
+          const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+          // Compute the boundaries of the filter region clamped so as to
+          // ensure that the filter window fits in the input array.
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+          float max = std::numeric_limits<float>::lowest();
+          for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+          {
+            for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+            {
+              const int in_x = in_x_origin + filter_x;
+              const int in_y = in_y_origin + filter_y;
+
+              const int input_data_offset =
+                ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
+                  input_shape.dims(3) +
+                channel;
+
+              max = std::max(max, input_data[input_data_offset]);
+            }
+          }
+          const int output_data_offset =
+            ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
+              output_shape.dims(3) +
+            channel;
+
+          output_data[output_data_offset] =
+            std::min(std::max(max, params.float_activation_min), params.float_activation_max);
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
+                    const T *input_data, const luci_interpreter::RuntimeShape &output_shape,
+                    T *output_data)
+{
+  const int batches = input_shape.dims(0);
+  const int depth = output_shape.dims(3);
+  const int input_height = input_shape.dims(1);
+  const int input_width = input_shape.dims(2);
+  const int output_height = output_shape.dims(1);
+  const int output_width = output_shape.dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int channel = 0; channel < depth; ++channel)
+        {
+          const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+          // Compute the boundaries of the filter region clamped so as to
+          // ensure that the filter window fits in the input array.
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+          T max = std::numeric_limits<T>::lowest();
+          for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+          {
+            for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+            {
+              const int in_x = in_x_origin + filter_x;
+              const int in_y = in_y_origin + filter_y;
+
+              const int input_data_offset =
+                ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
+                  input_shape.dims(3) +
+                channel;
+
+              max = std::max(max, input_data[input_data_offset]);
+            }
+          }
+          max = std::max<T>(max, params.quantized_activation_min);
+          max = std::min<T>(max, params.quantized_activation_max);
+
+          const int output_data_offset =
+            ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
+              output_shape.dims(3) +
+            channel;
+
+          output_data[output_data_offset] = static_cast<T>(max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MAX_POOL_2D_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALMulCommon.h b/onert-micro/luci-interpreter/pal/common/PALMulCommon.h
new file mode 100644
index 000000000..f17104030
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALMulCommon.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_COMMON_H
+#define LUCI_INTERPRETER_PAL_MUL_COMMON_H
+
+#include "Params.h"
+#include "PALUtils.h"
+#include "ProcessBroadcastShapes.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+inline void Mul(const ArithmeticParams &params, const int flat_size, const T *input1_data,
+                const T *input2_data, T *output_data)
+{
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  for (int i = 0; i < flat_size; ++i)
+    output_data[i] =
+      std::min(std::max(input1_data[i] * input2_data[i], activation_min), activation_max);
+}
+
+template <typename T>
+inline void MulScalar(const ArithmeticParams &params, const int flat_size, const T *input_data,
+                      const T scalar_value, T *output_data)
+{
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  for (int i = 0; i < flat_size; ++i)
+    output_data[i] =
+      std::min(std::max(input_data[i] * scalar_value, activation_min), activation_max);
+}
+
+template <typename T>
+inline void
+BroadcastMul4DSlow(const ArithmeticParams &params,
+                   const luci_interpreter::RuntimeShape &input1_shape, const T *input1_data,
+                   const luci_interpreter::RuntimeShape &input2_shape, const T *input2_data,
+                   const luci_interpreter::RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = input1_shape.flatSize();
+
+  if (params.broadcast_category == BroadcastableOpCategory::kScalarFirstBroadcast)
+  {
+    return MulScalar(params, flat_size, input2_data, input1_data[0], output_data);
+  }
+  else if (params.broadcast_category == BroadcastableOpCategory::kScalarSecondBroadcast)
+  {
+    return MulScalar(params, flat_size, input1_data, input2_data[0], output_data);
+  }
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
+  const luci_interpreter::RuntimeShape extended_output_shape =
+    luci_interpreter::RuntimeShape::extendedShape(4, output_shape);
+
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest stride,
+  // typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for the
+  // best cache behavior.
+  for (int b = 0; b < extended_output_shape.dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.dims(3); ++c)
+        {
+          const int output_data_offset =
+            ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
+              extended_output_shape.dims(3) +
+            c;
+
+          output_data[output_data_offset] =
+            std::min(std::max(input1_data[subscriptToIndex(desc1, b, y, x, c)] *
+                                input2_data[subscriptToIndex(desc2, b, y, x, c)],
+                              activation_min),
+                     activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALNeg.h b/onert-micro/luci-interpreter/pal/common/PALNeg.h
new file mode 100644
index 000000000..4c9153886
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALNeg.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+inline void Negate(const luci_interpreter::RuntimeShape &input_shape, const T *input_data,
+                   const luci_interpreter::RuntimeShape &output_shape, T *output_data)
+
+{
+  // check that input and output dimensions are equal
+  int N = input_shape.dimensionsCount();
+  assert(N == output_shape.dimensionsCount());
+
+  // check that sizes of all dimensions are equal
+  for (int i = 0; i < N; ++i)
+  {
+    assert(input_shape.dims(i) == output_shape.dims(i));
+  }
+
+  const int flat_size = input_shape.flatSize();
+
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = -input_data[i];
+  }
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALPad.h b/onert-micro/luci-interpreter/pal/common/PALPad.h
new file mode 100644
index 000000000..f9dd73f1b
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALPad.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_PAD_H
+#define LUCI_INTERPRETER_PAL_PAD_H
+
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+constexpr int PadKernelMaxDimensionCount() { return 5; }
+
+void Pad(const PadParams &op_params, const luci_interpreter::RuntimeShape &input_shape,
+         const float *input_data, const float *pad_value_ptr,
+         const luci_interpreter::RuntimeShape &output_shape, float *output_data)
+{
+  // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
+  // pad them to 5 dims (yes, we are "padding the padding").
+  int left_padding_copy[PadKernelMaxDimensionCount()];
+  for (int i = 0; i < PadKernelMaxDimensionCount(); i++)
+  {
+    left_padding_copy[i] = 0;
+  }
+  for (int i = 0; i < op_params.left_padding_count; ++i)
+  {
+    left_padding_copy[i + PadKernelMaxDimensionCount() - op_params.left_padding_count] =
+      op_params.left_padding[i];
+  }
+  int right_padding_copy[PadKernelMaxDimensionCount()];
+  for (int i = 0; i < PadKernelMaxDimensionCount(); i++)
+  {
+    right_padding_copy[i] = 0;
+  }
+  for (int i = 0; i < op_params.right_padding_count; ++i)
+  {
+    right_padding_copy[i + PadKernelMaxDimensionCount() - op_params.right_padding_count] =
+      op_params.right_padding[i];
+  }
+  const auto extended_output =
+    luci_interpreter::RuntimeShape::extendedShape(PadKernelMaxDimensionCount(), output_shape);
+  const int output_batch = extended_output.dims(0);
+  const int output_plane = extended_output.dims(1);
+  const int output_height = extended_output.dims(2);
+  const int output_width = extended_output.dims(3);
+  const int output_depth = extended_output.dims(4);
+
+  const int left_b_padding = left_padding_copy[0];
+  const int left_p_padding = left_padding_copy[1];
+  const int left_h_padding = left_padding_copy[2];
+  const int left_w_padding = left_padding_copy[3];
+  const int left_d_padding = left_padding_copy[4];
+
+  const int right_b_padding = right_padding_copy[0];
+  const int right_p_padding = right_padding_copy[1];
+  const int right_h_padding = right_padding_copy[2];
+  const int right_w_padding = right_padding_copy[3];
+  const int right_d_padding = right_padding_copy[4];
+
+  const float pad_value = *pad_value_ptr;
+
+  const float *in_ptr = input_data;
+  float *out_ptr = output_data;
+  for (int out_b = 0; out_b < output_batch; ++out_b)
+  {
+    for (int out_p = 0; out_p < output_plane; ++out_p)
+    {
+      for (int out_h = 0; out_h < output_height; ++out_h)
+      {
+        for (int out_w = 0; out_w < output_width; ++out_w)
+        {
+          for (int out_d = 0; out_d < output_depth; ++out_d)
+          {
+            if (out_b < left_b_padding || out_b >= output_batch - right_b_padding ||
+                out_p < left_p_padding || out_p >= output_plane - right_p_padding ||
+                out_h < left_h_padding || out_h >= output_height - right_h_padding ||
+                out_w < left_w_padding || out_w >= output_width - right_w_padding ||
+                out_d < left_d_padding || out_d >= output_depth - right_d_padding)
+            {
+              *out_ptr++ = pad_value;
+            }
+            else
+            {
+              *out_ptr++ = *in_ptr++;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_PAD_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALReduceCommon.h b/onert-micro/luci-interpreter/pal/common/PALReduceCommon.h
new file mode 100644
index 000000000..a5b0e10dd
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALReduceCommon.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_TANH_H
+#define LUCI_INTERPRETER_PAL_TANH_H
+
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+namespace
+{
+// This method parses the input 'axis' to remove duplicates and handle negative
+// values, and returns a valid 'out_axis'
+inline bool resolveAxis(const int num_dims, const int *axis, const int64_t num_axis,
+                        int *out_num_axis)
+{
+  int out_axis[2];
+  *out_num_axis = 0; // Just in case.
+  // Short-circuit axis resolution for scalars; the axis will go unused.
+  if (num_dims == 0)
+  {
+    return true;
+  }
+  // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
+  for (int64_t idx = 0; idx < num_axis; ++idx)
+  {
+    // Handle negative index. A positive index 'p_idx' can be represented as a
+    // negative index 'n_idx' as: n_idx = p_idx-num_dims
+    // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1]  */
+    int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
+    if (current < 0 || current >= num_dims)
+    {
+      return false;
+    }
+    bool is_dup = false;
+    for (int j = 0; j < *out_num_axis; ++j)
+    {
+      if (out_axis[j] == current)
+      {
+        is_dup = true;
+        break;
+      }
+    }
+    if (!is_dup)
+    {
+      out_axis[*out_num_axis] = current;
+      *out_num_axis += 1;
+    }
+  }
+  return true;
+}
+
+} // namespace
+
+// Computes the generic value (i.e., sum/max/min/prod) of elements across
+// dimensions given in axis. It needs to pass in init_value and reducer.
+template <typename T>
+inline void ReduceGeneric(const T *input_data, const int *input_dims, const int input_num_dims,
+                          T *output_data, const int *axis, const int64_t num_axis_dimensions,
+                          T init_value, const int output_flat_size, T reducer(const T, const T))
+{
+  // Return early when input shape has zero dim.
+  for (int i = 0; i < input_num_dims; ++i)
+  {
+    if (input_dims[i] == 0)
+      return;
+  }
+
+  for (size_t idx = 0; idx < output_flat_size; ++idx)
+  {
+    output_data[idx] = init_value;
+  }
+
+  // Resolve axis.
+  int num_resolved_axis = 0;
+  if (!resolveAxis(input_num_dims, axis, num_axis_dimensions, &num_resolved_axis))
+  {
+    return;
+  }
+
+  int temp_index[5];
+  // Reset input iterator.
+  for (int idx = 0; idx < input_num_dims; ++idx)
+  {
+    temp_index[idx] = 0;
+  }
+  // Iterate through input_data.
+  do
+  {
+    size_t input_offset = reducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr);
+    size_t output_offset =
+      reducedOutputOffset(input_num_dims, input_dims, temp_index, num_resolved_axis, axis);
+    output_data[output_offset] = reducer(output_data[output_offset], input_data[input_offset]);
+  } while (nextIndex(input_num_dims, input_dims, temp_index));
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_TANH_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALReluCommon.h b/onert-micro/luci-interpreter/pal/common/PALReluCommon.h
new file mode 100644
index 000000000..260586ab3
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALReluCommon.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU_COMMON_H
+#define LUCI_INTERPRETER_PAL_RELU_COMMON_H
+
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+inline void ReLUCommon(const int flat_size, const float *input_data, float *output_data,
+                       const float alpha, const bool is_relu_6)
+{
+  const float relu_6_value = 6.0f;
+  for (int i = 0; i < flat_size; i++)
+  {
+    const float val = input_data[i];
+    float result = val > 0 ? val : val * alpha;
+    result = is_relu_6 ? (result > relu_6_value ? relu_6_value : result) : result;
+    output_data[i] = result;
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALResizeBilinear.h b/onert-micro/luci-interpreter/pal/common/PALResizeBilinear.h
new file mode 100644
index 000000000..19686b702
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALResizeBilinear.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_COMMON_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_COMMON_H
+
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+// Offset function for positining corresponding index in input data
+// int i0 - batches, int i1 - height, int i2 - width, int i3 - depth
+inline int Offset(const luci_interpreter::RuntimeShape &shape, int i0, int i1, int i2, int i3)
+{
+  assert(shape.dimensionsCount() == 4);
+
+  const int32_t *dims_data = reinterpret_cast<const int32_t *>(shape.dimsData());
+  LUCI_INTERPRETER_CHECK(i0 >= 0 && i0 < dims_data[0]);
+  LUCI_INTERPRETER_CHECK(i1 >= 0 && i1 < dims_data[1]);
+  LUCI_INTERPRETER_CHECK(i2 >= 0 && i2 < dims_data[2]);
+  LUCI_INTERPRETER_CHECK(i3 >= 0 && i3 < dims_data[3]);
+  return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
+}
+
+inline void ComputeInterpolationValues(const float value, const float scale,
+                                       const bool half_pixel_centers, int32_t input_size,
+                                       float *scaled_value, int32_t *lower_bound,
+                                       int32_t *upper_bound)
+{
+  if (half_pixel_centers)
+  {
+    *scaled_value = (value + 0.5f) * scale - 0.5f;
+  }
+  else
+  {
+    *scaled_value = value * scale;
+  }
+  float scaled_value_floor = std::floor(*scaled_value);
+  *lower_bound = std::max(static_cast<int32_t>(scaled_value_floor), static_cast<int32_t>(0));
+  *upper_bound = std::min(static_cast<int32_t>(std::ceil(*scaled_value)), input_size - 1);
+}
+
+template <typename T>
+static inline void
+ResizeBilinear(const circle::ResizeBilinearOptions *op_params,
+               const luci_interpreter::RuntimeShape &unextended_input_shape, const T *input_data,
+               const luci_interpreter::RuntimeShape &unextended_output_size_shape,
+               const int32_t *output_size_data,
+               const luci_interpreter::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  // If half_pixel_centers is True, align_corners must be False.
+  LUCI_INTERPRETER_CHECK(!op_params->half_pixel_centers() || !op_params->align_corners());
+
+  assert(unextended_input_shape.dimensionsCount() >= 4);
+  assert(unextended_output_size_shape.dimensionsCount() >= 1);
+  assert(unextended_output_shape.dimensionsCount() >= 4);
+  const luci_interpreter::RuntimeShape input_shape =
+    luci_interpreter::RuntimeShape::extendedShape(4, unextended_input_shape);
+  const luci_interpreter::RuntimeShape output_size_shape =
+    luci_interpreter::RuntimeShape::extendedShape(4, unextended_output_size_shape);
+  const luci_interpreter::RuntimeShape output_shape =
+    luci_interpreter::RuntimeShape::extendedShape(4, unextended_output_shape);
+
+  int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+  int32_t input_height = input_shape.dims(1);
+  int32_t input_width = input_shape.dims(2);
+  int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+  assert(output_size_shape.dims(0) == 1);
+  assert(output_size_shape.dims(1) == 1);
+  assert(output_size_shape.dims(2) == 1);
+  assert(output_size_shape.dims(3) == 2);
+
+  int32_t output_height = output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
+  int32_t output_width = output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
+
+  float height_scale = static_cast<float>(input_height) / output_height;
+  float width_scale = static_cast<float>(input_width) / output_width;
+  if (op_params->align_corners() && output_height > 1)
+  {
+    height_scale = static_cast<float>(input_height - 1) / (output_height - 1);
+  }
+  if (op_params->align_corners() && output_width > 1)
+  {
+    width_scale = static_cast<float>(input_width - 1) / (output_width - 1);
+  }
+  const float rounding_offset = std::numeric_limits<T>::is_integer ? .5f : .0f;
+
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int y = 0; y < output_height; ++y)
+    {
+      float input_y;
+      int32_t y0, y1;
+      ComputeInterpolationValues(y, height_scale, op_params->half_pixel_centers(), input_height,
+                                 &input_y, &y0, &y1);
+      for (int x = 0; x < output_width; ++x)
+      {
+        float input_x;
+        int32_t x0, x1;
+        ComputeInterpolationValues(x, width_scale, op_params->half_pixel_centers(), input_width,
+                                   &input_x, &x0, &x1);
+        for (int c = 0; c < depth; ++c)
+        {
+          T interpolation = static_cast<T>(
+            input_data[Offset(input_shape, b, y0, x0, c)] * (1 - (input_y - y0)) *
+              (1 - (input_x - x0)) +
+            input_data[Offset(input_shape, b, y1, x0, c)] * (input_y - y0) * (1 - (input_x - x0)) +
+            input_data[Offset(input_shape, b, y0, x1, c)] * (1 - (input_y - y0)) * (input_x - x0) +
+            input_data[Offset(input_shape, b, y1, x1, c)] * (input_y - y0) * (input_x - x0) +
+            rounding_offset);
+          output_data[Offset(output_shape, b, y, x, c)] = interpolation;
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALSoftmax.h b/onert-micro/luci-interpreter/pal/common/PALSoftmax.h
new file mode 100644
index 000000000..a67785675
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALSoftmax.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_COMMON_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_COMMON_H
+
+namespace luci_interpreter_pal
+{
+namespace
+{
+
+inline int flatSizeSkipDim(const luci_interpreter::RuntimeShape &shape, int skip_dim)
+{
+  const int dims_count = shape.dimensionsCount();
+  const auto *dims_data = shape.dimsData();
+  int flat_size = 1;
+  for (int i = 0; i < dims_count; ++i)
+  {
+    flat_size *= (i == skip_dim) ? 1 : dims_data[i];
+  }
+  return flat_size;
+}
+
+} // namespace
+
+inline void Softmax(const double beta, const luci_interpreter::RuntimeShape &input_shape,
+                    const float *input_data, float *output_data)
+{
+  const int trailing_dim = input_shape.dimensionsCount() - 1;
+  const int outer_size = flatSizeSkipDim(input_shape, trailing_dim);
+
+  const int depth = input_shape.dims(trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    // Find max element value which we'll use to ensure numerical stability
+    // taking advantage of the following equality:
+    // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
+    float max = std::numeric_limits<float>::lowest();
+    for (int c = 0; c < depth; ++c)
+    {
+      max = std::max(max, input_data[i * depth + c]);
+    }
+
+    // Compute sum.
+    float sum = 0.f;
+    for (int c = 0; c < depth; ++c)
+    {
+      const float exp_c = std::exp((input_data[i * depth + c] - max) * static_cast<float>(beta));
+      output_data[i * depth + c] = exp_c;
+      sum += exp_c;
+    }
+
+    // Compute result.
+    for (int c = 0; c < depth; ++c)
+    {
+      output_data[i * depth + c] = output_data[i * depth + c] / sum;
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALStridedSlice.h b/onert-micro/luci-interpreter/pal/common/PALStridedSlice.h
new file mode 100644
index 000000000..15b3209c1
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALStridedSlice.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_STRIDED_SLICE_H
+#define LUCI_INTERPRETER_PAL_STRIDED_SLICE_H
+
+#include "Params.h"
+
+namespace luci_interpreter_pal
+{
+
+namespace
+{
+// Use until std::clamp() is available from C++17.
+inline int clamp(const int v, const int lo, const int hi)
+{
+  if (hi < v)
+    return hi;
+  if (v < lo)
+    return lo;
+  return v;
+}
+
+inline bool loopCondition(int index, int stop, int stride)
+{
+  // True when we have reached the end of an axis and should loop.
+  return stride > 0 ? index >= stop : index <= stop;
+}
+
+// Return the "real" index for the end of iteration along that axis. This is an
+// "end" in the traditional C sense, in that it points to one past the last
+// element. ie. So if you were iterating through all elements of a 1D array of
+// size 4, this function would return 4 as the stop, because it is one past the
+// "real" indices of 0, 1, 2 & 3.
+inline int stopForAxis(const StridedSliceParams &params,
+                       const luci_interpreter::RuntimeShape &input_shape, int axis,
+                       int start_for_axis)
+{
+  const auto end_mask = params.end_mask;
+  const auto shrink_axis_mask = params.shrink_axis_mask;
+  const auto *stop_indices = params.stop_indices;
+  const auto *strides = params.strides;
+  const int axis_size = input_shape.dims(axis);
+  if (axis_size == 0)
+  {
+    return 0;
+  }
+
+  // Begin with the specified index
+  const bool shrink_axis = shrink_axis_mask & (1 << axis);
+  int stop = stop_indices[axis];
+
+  // When shrinking an axis, the end position does not matter (and can be
+  // incorrect when negative indexing is used, see Issue #19260). Always use
+  // start_for_axis + 1 to generate a length 1 slice, since start_for_axis has
+  // already been adjusted for negative indices.
+  if (shrink_axis)
+  {
+    return start_for_axis + 1;
+  }
+
+  // end_mask override
+  if (end_mask & (1 << axis))
+  {
+    if (strides[axis] > 0)
+    {
+      // Forward iteration - use the last element. These values will get
+      // clamped below
+      stop = std::numeric_limits<int>::max();
+    }
+    else
+    {
+      // Backward iteration - use the first element.
+      stop = std::numeric_limits<int>::lowest();
+    }
+  }
+
+  // Handle negative indices
+  if (stop < 0)
+  {
+    stop += axis_size;
+  }
+
+  // Clamping
+  // Because the end index points one past the last element, we need slightly
+  // different clamping ranges depending on the direction.
+  if (strides[axis] > 0)
+  {
+    // Forward iteration
+    stop = clamp(stop, 0, axis_size);
+  }
+  else
+  {
+    // Backward iteration
+    stop = clamp(stop, -1, axis_size - 1);
+  }
+
+  return stop;
+}
+
+// Return the index for the first element along that axis. This index will be a
+// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
+// that can be used to index directly into the data.
+inline int startForAxis(const StridedSliceParams &params,
+                        const luci_interpreter::RuntimeShape &input_shape, int axis)
+{
+  const auto begin_mask = params.begin_mask;
+  const auto *start_indices = params.start_indices;
+  const auto *strides = params.strides;
+  const int axis_size = input_shape.dims(axis);
+  if (axis_size == 0)
+  {
+    return 0;
+  }
+  // Begin with the specified index.
+  int start = start_indices[axis];
+
+  // begin_mask override
+  if (begin_mask & 1 << axis)
+  {
+    if (strides[axis] > 0)
+    {
+      // Forward iteration - use the first element. These values will get
+      // clamped below (Note: We could have set them to 0 and axis_size-1, but
+      // use lowest() and max() to maintain symmetry with StopForAxis())
+      start = std::numeric_limits<int>::lowest();
+    }
+    else
+    {
+      // Backward iteration - use the last element.
+      start = std::numeric_limits<int>::max();
+    }
+  }
+
+  // Handle negative indices
+  if (start < 0)
+  {
+    start += axis_size;
+  }
+
+  // Clamping
+  if (strides[axis] > 0)
+  {
+    // Forward iteration
+    start = clamp(start, 0, axis_size);
+  }
+  else
+  {
+    // Backward iteration
+    start = clamp(start, -1, axis_size - 1);
+  }
+
+  return start;
+}
+
+inline void stridedSlicePadIndices(StridedSliceParams *p, int dim_count)
+{
+  const int pad_count = dim_count - p->start_indices_count;
+
+  // Pad indices at start, so move arrays by pad_count.
+  for (int i = p->start_indices_count - 1; i >= 0; --i)
+  {
+    p->strides[i + pad_count] = p->strides[i];
+    p->start_indices[i + pad_count] = p->start_indices[i];
+    p->stop_indices[i + pad_count] = p->stop_indices[i];
+  }
+  for (int i = 0; i < pad_count; ++i)
+  {
+    p->start_indices[i] = 0;
+    p->stop_indices[i] = 1;
+    p->strides[i] = 1;
+  }
+
+  // Pad masks with 0s or 1s as required.
+  p->shrink_axis_mask <<= pad_count;
+  p->ellipsis_mask <<= pad_count;
+  p->new_axis_mask <<= pad_count;
+  p->begin_mask <<= pad_count;
+  p->end_mask <<= pad_count;
+  p->begin_mask |= (1 << pad_count) - 1;
+  p->end_mask |= (1 << pad_count) - 1;
+
+  p->start_indices_count = dim_count;
+  p->stop_indices_count = dim_count;
+  p->strides_count = dim_count;
+}
+
+} // namespace
+
+template <typename T>
+inline void StridedSlice(StridedSliceParams &op_params,
+                         const luci_interpreter::RuntimeShape &unextended_input_shape,
+                         const T *input_data, T *output_data)
+{
+  const luci_interpreter::RuntimeShape input_shape =
+    luci_interpreter::RuntimeShape::extendedShape(5, unextended_input_shape);
+
+  // Reverse and pad to 5 dimensions because that is what the runtime code
+  // requires (ie. all shapes must be 5D and are given backwards).
+  stridedSlicePadIndices(&op_params, 5);
+
+  const int start_0 = startForAxis(op_params, input_shape, 0);
+  const int stop_0 = stopForAxis(op_params, input_shape, 0, start_0);
+  const int start_1 = startForAxis(op_params, input_shape, 1);
+  const int stop_1 = stopForAxis(op_params, input_shape, 1, start_1);
+  const int start_2 = startForAxis(op_params, input_shape, 2);
+  const int stop_2 = stopForAxis(op_params, input_shape, 2, start_2);
+  const int start_3 = startForAxis(op_params, input_shape, 3);
+  const int stop_3 = stopForAxis(op_params, input_shape, 3, start_3);
+  const int start_4 = startForAxis(op_params, input_shape, 4);
+  const int stop_4 = stopForAxis(op_params, input_shape, 4, start_4);
+
+  for (int offset_0 = start_0 * input_shape.dims(1), end_0 = stop_0 * input_shape.dims(1),
+           step_0 = op_params.strides[0] * input_shape.dims(1);
+       !loopCondition(offset_0, end_0, op_params.strides[0]); offset_0 += step_0)
+  {
+    for (int offset_1 = (offset_0 + start_1) * input_shape.dims(2),
+             end_1 = (offset_0 + stop_1) * input_shape.dims(2),
+             step_1 = op_params.strides[1] * input_shape.dims(2);
+         !loopCondition(offset_1, end_1, op_params.strides[1]); offset_1 += step_1)
+    {
+      for (int offset_2 = (offset_1 + start_2) * input_shape.dims(3),
+               end_2 = (offset_1 + stop_2) * input_shape.dims(3),
+               step_2 = op_params.strides[2] * input_shape.dims(3);
+           !loopCondition(offset_2, end_2, op_params.strides[2]); offset_2 += step_2)
+      {
+        for (int offset_3 = (offset_2 + start_3) * input_shape.dims(4),
+                 end_3 = (offset_2 + stop_3) * input_shape.dims(4),
+                 step_3 = op_params.strides[3] * input_shape.dims(4);
+             !loopCondition(offset_3, end_3, op_params.strides[3]); offset_3 += step_3)
+        {
+          for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
+               !loopCondition(offset_4, end_4, op_params.strides[4]);
+               offset_4 += op_params.strides[4])
+          {
+            *output_data++ = input_data[offset_4];
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_STRIDED_SLICE_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALSub.h b/onert-micro/luci-interpreter/pal/common/PALSub.h
new file mode 100644
index 000000000..faa94fdd3
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALSub.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_COMMON_H
+#define LUCI_INTERPRETER_PAL_SUB_COMMON_H
+
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const ArithmeticParams &params, const int flat_size, const T *input1_data,
+                       const T *input2_data, T *output_data)
+{
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  for (int i = 0; i < flat_size; ++i)
+    output_data[i] =
+      std::min(std::max(input1_data[i] - input2_data[i], activation_min), activation_max);
+}
+
+template <typename T>
+inline void
+BroadcastSub4DSlow(const ArithmeticParams &params,
+                   const luci_interpreter::RuntimeShape &input1_shape, const T *input1_data,
+                   const luci_interpreter::RuntimeShape &input2_shape, const T *input2_data,
+                   const luci_interpreter::RuntimeShape &output_shape, T *output_data)
+{
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
+  const luci_interpreter::RuntimeShape extended_output_shape =
+    luci_interpreter::RuntimeShape::extendedShape(4, output_shape);
+
+  T activation_min, activation_max;
+  getActivationParams(params, &activation_min, &activation_max);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest stride,
+  // typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for the
+  // best cache behavior.
+  for (int b = 0; b < extended_output_shape.dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.dims(3); ++c)
+        {
+          const int output_data_offset =
+            ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
+              extended_output_shape.dims(3) +
+            c;
+
+          output_data[output_data_offset] =
+            std::min(std::max(input1_data[subscriptToIndex(desc1, b, y, x, c)] -
+                                input2_data[subscriptToIndex(desc2, b, y, x, c)],
+                              activation_min),
+                     activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALTanh.h b/onert-micro/luci-interpreter/pal/common/PALTanh.h
new file mode 100644
index 000000000..506657ebe
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALTanh.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_TANH_H
+#define LUCI_INTERPRETER_PAL_TANH_H
+
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+inline void Tanh(const int flat_size, const float *input_data, float *output_data)
+{
+  for (int i = 0; i < flat_size; i++)
+  {
+    float val = input_data[i];
+    float result = std::tanh(val);
+    output_data[i] = result;
+  }
+}
+
+inline void Tanh(int32_t input_multiplier, int32_t input_left_shift, const int flat_size,
+                 const int16_t *ptr_input_data, int16_t *ptr_output_data)
+{
+  // We use the LUT for sigmoid and take into account, that
+  // tanh(x) = 2*sigmoid(2*x) - 1
+
+  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
+  // In case of general parameter scale, multiplier 3 is taken into account
+  // in TanhPrepare function and it is included in
+  // input_multiplier already.
+
+  if (input_multiplier == 0)
+  { // power of two case
+    input_multiplier = 3 << input_left_shift;
+    input_left_shift = 0;
+  }
+
+  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
+
+  for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++)
+  {
+    int32_t input_data = ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
+
+    uint32_t abs_input_data = abs(input_data);
+    uint32_t uh = abs_input_data >> 8;
+    int32_t result;
+
+    if (uh >= 255)
+    {
+      // Saturate to maximum.
+      result = 0xFFFF << 8;
+    }
+    else
+    {
+      uint32_t ua = sigmoid_table_uint16[uh];
+      uint32_t ub = sigmoid_table_uint16[uh + 1];
+
+      uint8_t ut = abs_input_data & 0xFF;
+
+      result = (ua << 8) + ut * (ub - ua);
+    }
+
+    result = (input_data >= 0) ? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
+                               : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
+
+    // Convert back to 16-bit.
+    result >>= (9 - 1);
+
+    *ptr_output_data = result;
+  }
+}
+
+#if 0
+inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
+                 int32_t input_multiplier, int32_t input_shift,
+                 const int flat_size, const int8_t* input_data, int8_t* output_data) {
+  // Integer bits must be in sync with Prepare() function.
+  static constexpr int32_t kInputIntegerBits = 4;
+  static constexpr int32_t kOutputScale = 7;
+  static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
+  static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
+
+  for (int i = 0; i < flat_size; ++i) {
+    const int32_t input =
+      static_cast<int32_t>(input_data[i]) - input_zero_point;
+    if (input <= -input_range_radius) {
+      output_data[i] = kMinInt8;
+    } else if (input >= input_range_radius) {
+      output_data[i] = kMaxInt8;
+    } else {
+      const int32_t input_in_q4 =
+        multiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
+      const int32_t output_in_q0 = std::tanh(input_in_q4);
+
+      int32_t output_in_q24 =
+        roundingDivideByPOT(output_in_q0, 31 - kOutputScale);
+      output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
+      output_data[i] = static_cast<int8_t>(output_in_q24);
+    }
+  }
+}
+#endif // 0
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_TANH_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALTranspose.h b/onert-micro/luci-interpreter/pal/common/PALTranspose.h
new file mode 100644
index 000000000..3381992c8
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALTranspose.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_TRANSPOSE_H
+#define LUCI_INTERPRETER_PAL_TRANSPOSE_H
+
+#include "PALUtils.h"
+#include "ProcessBroadcastShapes.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T, int N>
+void TransposeImpl(const TransposeParams &params,
+                   const luci_interpreter::RuntimeShape &unextended_input_shape,
+                   const T *input_data,
+                   const luci_interpreter::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  const int unextended_input_size = unextended_input_shape.dimensionsCount();
+  const int unextended_output_size = unextended_output_shape.dimensionsCount();
+
+  const int input_ext_size = N - unextended_input_size;
+  const int output_ext_size = N - unextended_output_size;
+  NdArrayDesc<N> input_desc;
+  NdArrayDesc<N> output_desc;
+  copyDimsToDesc(luci_interpreter::RuntimeShape::extendedShape(N, unextended_input_shape),
+                 &input_desc);
+  copyDimsToDesc(luci_interpreter::RuntimeShape::extendedShape(N, unextended_output_shape),
+                 &output_desc);
+
+  // The perm data is extended to match the output, each index incremented by
+  // the amount of front padding of the input shape.
+  int extended_perm[N];
+  for (int i = 0; i < N; ++i)
+  {
+    extended_perm[i] = i < output_ext_size ? i : params.perm[i - output_ext_size] + input_ext_size;
+  }
+
+  // Permutes the input shape so we don't need to permute the indexes inside
+  // the loop. Check to make sure output_dims is matching input_dims.
+  NdArrayDesc<N> perm_input_desc;
+  for (int k = 0; k < N; ++k)
+  {
+    perm_input_desc.extents[k] = input_desc.extents[extended_perm[k]];
+    perm_input_desc.strides[k] = input_desc.strides[extended_perm[k]];
+  }
+
+  // Naive transpose loop (iterate on output index and compute input index).
+  auto tranpose_func = [&](int indexes[N]) {
+    output_data[subscriptToIndex(output_desc, indexes)] =
+      input_data[subscriptToIndex(perm_input_desc, indexes)];
+  };
+  NDOpsHelper<N>(output_desc, tranpose_func);
+}
+
+template <typename T, int N = 5>
+void Transpose(const TransposeParams &params,
+               const luci_interpreter::RuntimeShape &unextended_input_shape, const T *input_data,
+               const luci_interpreter::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  // Transpose kernel only does rearranging values not numeric evaluations on
+  // each cell. It's safe to implement per size of scalar type and this trick
+  // keeps the total code size in a reasonable range.
+  switch (sizeof(T))
+  {
+    case 1:
+      TransposeImpl<int8_t, N>(params, unextended_input_shape,
+                               reinterpret_cast<const int8_t *>(input_data),
+                               unextended_output_shape, reinterpret_cast<int8_t *>(output_data));
+      break;
+    case 2:
+      TransposeImpl<int16_t, N>(params, unextended_input_shape,
+                                reinterpret_cast<const int16_t *>(input_data),
+                                unextended_output_shape, reinterpret_cast<int16_t *>(output_data));
+      break;
+
+    case 4:
+      TransposeImpl<int32_t, N>(params, unextended_input_shape,
+                                reinterpret_cast<const int32_t *>(input_data),
+                                unextended_output_shape, reinterpret_cast<int32_t *>(output_data));
+      break;
+    case 8:
+      TransposeImpl<int64_t, N>(params, unextended_input_shape,
+                                reinterpret_cast<const int64_t *>(input_data),
+                                unextended_output_shape, reinterpret_cast<int64_t *>(output_data));
+      break;
+  }
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_TRANSPOSE_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALUnidirectionalSequenceLSTMCommon.h b/onert-micro/luci-interpreter/pal/common/PALUnidirectionalSequenceLSTMCommon.h
new file mode 100644
index 000000000..ad9631cf2
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALUnidirectionalSequenceLSTMCommon.h
@@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_COMMON_H
+#define LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_COMMON_H
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "PALTanh.h"
+#include "PALLogistic.h"
+#include "PALFullyConnected.h"
+#include "PALMul.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+namespace lstm_internal
+{
+namespace
+{
+// Possible fused activation functions.
+typedef enum
+{
+  kTfLiteActNone = 0,
+  kTfLiteActRelu,
+  kTfLiteActReluN1To1, // min(max(-1, x), 1)
+  kTfLiteActRelu6,     // min(max(0, x), 6)
+  kTfLiteActTanh,
+  kTfLiteActSignBit,
+  kTfLiteActSigmoid,
+} FusedActivation;
+
+} // namespace
+
+#ifndef DIS_QUANT
+
+template <typename InputType, typename OutputType>
+void mulElementwise(int size, const ArithmeticParams *params, const InputType *input1_data,
+                    const InputType *input2_data, OutputType *output_data)
+{
+  for (int i = 0; i < size; ++i)
+  {
+    const int32_t input1_val = params->input1_offset + input1_data[i];
+    const int32_t input2_val = params->input2_offset + input2_data[i];
+    const int32_t unclamped_result =
+      params->output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                            params->output_multiplier,
+                                                            params->output_shift);
+    const int32_t clamped_output =
+      std::min(params->quantized_activation_max,
+               std::max(params->quantized_activation_min, unclamped_result));
+    output_data[i] = static_cast<OutputType>(clamped_output);
+  }
+}
+
+// Input and output have the same shape in LSTM
+void mul(const luci_interpreter::RuntimeShape &shape, const ArithmeticParams *params,
+         const int16_t *input1_data, const int16_t *input2_data, int8_t *output_data)
+{
+  return mulElementwise<int16_t, int8_t>(shape.flatSize(), params, input1_data, input2_data,
+                                         output_data);
+}
+
+// Input and output have the same shape in LSTM
+void mul(const luci_interpreter::RuntimeShape &shape, const ArithmeticParams *params,
+         const int16_t *input1_data, const int16_t *input2_data, int16_t *output_data)
+{
+  return mulElementwise(shape.flatSize(), params, input1_data, input2_data, output_data);
+}
+
+void addElementWise(const int16_t *input_1, const int16_t *input_2, int n_batch, int n_input,
+                    int16_t *output)
+{
+  for (int batch = 0; batch < n_batch; ++batch)
+  {
+    for (int i = 0; i < n_input; ++i)
+    {
+      const int index = batch * n_input + i;
+      int32_t sum = input_1[index] + input_2[index];
+      const int32_t sum_clamped =
+        std::min(static_cast<int32_t>(std::numeric_limits<int16_t>::max()),
+                 std::max(static_cast<int32_t>(std::numeric_limits<int16_t>::min()), sum));
+      output[index] = static_cast<int16_t>(sum_clamped);
+    }
+  }
+}
+
+void tanh(int32_t cell_state_scale_power, const luci_interpreter::RuntimeShape &input_data_shape,
+          int16_t *input_data, const luci_interpreter::RuntimeShape &output_data_shape,
+          int16_t *output_data)
+{
+  int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3;
+  int32_t input_multiplier = 0;
+  if (tanh_input_left_shift < 0) /* handling negative shift value */
+  {
+    tanh_input_left_shift = -tanh_input_left_shift;
+    input_multiplier = 3;
+  }
+  const int flat_size = input_data_shape.flatSize();
+  luci_interpreter_pal::Tanh(input_multiplier, tanh_input_left_shift, flat_size, input_data,
+                             output_data);
+}
+
+void sigmoid(const luci_interpreter::RuntimeShape &data_shape, int16_t *data)
+{
+  luci_interpreter_pal::Logistic(0, 0, data_shape.flatSize(), data, data);
+}
+
+void clipping(const int v_size, const luci_interpreter::lstm::CellStateInfo *cell_state_info,
+              int16_t *vector)
+{
+  for (int i = 0; i < v_size; i++)
+  {
+    vector[i] = std::max(std::min(cell_state_info->quantized_cell_clip, vector[i]),
+                         static_cast<int16_t>(-cell_state_info->quantized_cell_clip));
+  }
+}
+#endif // DIS_QUANT
+
+#ifndef DIS_FLOAT
+// Input and output have the same shape in LSTM
+void mul(const luci_interpreter::RuntimeShape &shape, const ArithmeticParams *params,
+         const float *input1_data, const float *input2_data, float *output_data)
+{
+  const int flat_size = shape.flatSize();
+  return luci_interpreter_pal::Mul(*params, flat_size, input1_data, input2_data, output_data);
+}
+
+void addElementWise(const float *input_1, const float *input_2, int n_batch, int n_input,
+                    float *output)
+{
+  for (int batch = 0; batch < n_batch; ++batch)
+  {
+    for (int i = 0; i < n_input; ++i)
+    {
+      const int index = batch * n_input + i;
+      output[index] = input_1[index] + input_2[index];
+    }
+  }
+}
+
+void tanh(int32_t, const luci_interpreter::RuntimeShape &input_data_shape, float *input_data,
+          const luci_interpreter::RuntimeShape &output_data_shape, float *output_data)
+{
+  const int flat_size = input_data_shape.flatSize();
+  luci_interpreter_pal::Tanh(flat_size, input_data, output_data);
+}
+
+void sigmoid(const luci_interpreter::RuntimeShape &data_shape, float *data)
+{
+  const int flat_size = data_shape.flatSize();
+  luci_interpreter_pal::Logistic(flat_size, data, data);
+}
+
+void clipping(const int v_size, const luci_interpreter::lstm::CellStateInfo *cell_state_info,
+              float *vector)
+{
+  for (int i = 0; i < v_size; i++)
+  {
+    vector[i] =
+      std::max(std::min(cell_state_info->cell_clip, vector[i]), -cell_state_info->cell_clip);
+  }
+}
+#endif // DIS_FLOAT
+
+// Size information about the LSTM kernel, which is deduced from tensors stored
+// in the flat buffer file.
+struct LstmSizeInfo
+{
+  bool time_major;
+  int32_t batch_size;
+  int32_t time_steps;
+  int32_t input_dimension;
+  int32_t state_dimension;
+};
+
+class LstmStepManager
+{
+public:
+  LstmStepManager() = delete;
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  explicit LstmStepManager(const LstmSizeInfo &size_info) : size_info_(size_info) {}
+
+  void updateTime()
+  {
+    current_time_ += 1;
+    // default as one batch per inference
+    int input_step = size_info_.input_dimension;
+    int output_step = size_info_.state_dimension;
+    // time major: batch inference
+    if (size_info_.time_major)
+    {
+      input_step = input_step * size_info_.batch_size;
+      output_step = output_step * size_info_.batch_size;
+    }
+
+    input_offset_ += input_step;
+    output_offset_ += output_step;
+  }
+
+  void updateBatch()
+  {
+    current_batch_ += 1;
+    // batch inference for time major: no action needed
+    if (size_info_.time_major)
+    {
+      return;
+    }
+    // otherwise: singe batch inference, go to the next batch
+    hidden_state_offset_ += size_info_.state_dimension;
+    cell_state_offset_ += size_info_.state_dimension;
+  }
+
+  void resetTime() { current_time_ = 0; }
+
+  luci_interpreter::RuntimeShape inputShape() const
+  {
+    int batch_size = 1;
+    if (size_info_.time_major)
+    {
+      batch_size = size_info_.batch_size;
+    }
+    const int dims[2] = {batch_size, size_info_.input_dimension};
+    const int32_t *dims_data = reinterpret_cast<const int32_t *>(dims);
+    return luci_interpreter::RuntimeShape(2, dims_data);
+  }
+
+  luci_interpreter::RuntimeShape stateShape() const
+  {
+    int batch_size = 1;
+    if (size_info_.time_major)
+    {
+      batch_size = size_info_.batch_size;
+    }
+    const int dims[2] = {batch_size, size_info_.state_dimension};
+    const int32_t *dims_data = reinterpret_cast<const int32_t *>(dims);
+    return luci_interpreter::RuntimeShape(2, dims_data);
+  }
+
+  int inputOffset() const { return input_offset_; }
+
+  int outputOffset() const { return output_offset_; }
+
+  int hiddenStateOffset() const { return hidden_state_offset_; }
+
+  int cellStateOffset() const { return cell_state_offset_; }
+
+private:
+  int32_t current_time_ = 0;
+  int32_t current_batch_ = 0;
+  int32_t input_offset_ = 0;
+  int32_t output_offset_ = 0;
+  int32_t hidden_state_offset_ = 0;
+  int32_t cell_state_offset_ = 0;
+
+  const LstmSizeInfo &size_info_;
+};
+
+// Calculates a single LSTM gate.
+// Implements the following formula:
+//   gate = activate(FC(input) + FC(recurrent))
+// Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void calculateLstmGate(const LstmStepManager *step_info,
+                       const luci_interpreter::lstm::GateParameters *gate_params,
+                       // Input FC
+                       ActivationType *input_data, const circle::Tensor *input_weight,
+                       const circle::Tensor *input_bias,
+                       // Recurrent FC
+                       ActivationType *recurrent_data, const circle::Tensor *recurrent_weight,
+                       const circle::Tensor *recurrent_bias,
+                       // Output
+                       CellType *gate_output,
+                       // Scratch arrays
+                       CellType *fc_output_buffer, const FusedActivation activation,
+                       luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+  // Input FC
+  const auto gate_output_shape = step_info->stateShape();
+  {
+    FullyConnectedParams op_params{};
+    op_params.input_offset = gate_params->input_fc_params.input_offset;
+    op_params.weights_offset = gate_params->input_fc_params.weights_offset;
+    op_params.output_offset = gate_params->input_fc_params.output_offset;
+    op_params.output_multiplier = gate_params->input_fc_params.output_multiplier;
+    op_params.output_shift = gate_params->input_fc_params.output_shift;
+    op_params.quantized_activation_min = gate_params->input_fc_params.quantized_activation_min;
+    op_params.quantized_activation_max = gate_params->input_fc_params.quantized_activation_max;
+    op_params.float_activation_max = gate_params->input_fc_params.float_activation_max;
+    op_params.float_activation_min = gate_params->input_fc_params.float_activation_min;
+
+    int32_t input_weight_shape[luci_interpreter::kMaxSmallSize];
+    luci_interpreter::kernels::getTensorDims(input_weight, runtime_graph, input_weight_shape);
+
+    FullyConnected(op_params, step_info->inputShape().dimsData(),
+                   input_data + step_info->inputOffset(), input_weight_shape,
+                   luci_interpreter::kernels::getTensorData<WeightType>(
+                     runtime_graph->getConstDataByTensor(input_weight)),
+                   luci_interpreter::kernels::getTensorData<BiasType>(
+                     runtime_graph->getConstDataByTensor(input_bias)),
+                   gate_output_shape.dimsData(), gate_output);
+  }
+
+  // Recurrent FC
+  {
+    FullyConnectedParams op_params{};
+    op_params.input_offset = gate_params->recurrent_fc_params.input_offset;
+    op_params.weights_offset = gate_params->recurrent_fc_params.weights_offset;
+    op_params.output_offset = gate_params->recurrent_fc_params.output_offset;
+    op_params.output_multiplier = gate_params->recurrent_fc_params.output_multiplier;
+    op_params.output_shift = gate_params->recurrent_fc_params.output_shift;
+    op_params.quantized_activation_min = gate_params->recurrent_fc_params.quantized_activation_min;
+    op_params.quantized_activation_max = gate_params->recurrent_fc_params.quantized_activation_max;
+    op_params.float_activation_max = gate_params->recurrent_fc_params.float_activation_max;
+    op_params.float_activation_min = gate_params->recurrent_fc_params.float_activation_min;
+
+    int32_t recurrent_weight_shape[luci_interpreter::kMaxSmallSize];
+    luci_interpreter::kernels::getTensorDims(recurrent_weight, runtime_graph,
+                                             recurrent_weight_shape);
+
+    FullyConnected(op_params, step_info->stateShape().dimsData(),
+                   recurrent_data + step_info->hiddenStateOffset(), recurrent_weight_shape,
+                   luci_interpreter::kernels::getTensorData<WeightType>(
+                     runtime_graph->getConstDataByTensor(recurrent_weight)),
+                   luci_interpreter::kernels::getTensorData<BiasType>(
+                     runtime_graph->getConstDataByTensor(recurrent_bias)),
+                   gate_output_shape.dimsData(), fc_output_buffer);
+
+    addElementWise(gate_output, fc_output_buffer, /*n_batch=*/gate_output_shape.dimsData()[0],
+                   /*n_state=*/gate_output_shape.dimsData()[1], gate_output);
+
+    switch (activation)
+    {
+      case FusedActivation::kTfLiteActSigmoid:
+        sigmoid(gate_output_shape, gate_output);
+        break;
+      case FusedActivation::kTfLiteActTanh:
+      {
+        // Set the scale power to -12 to avoid shift
+        tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output, gate_output_shape,
+             gate_output);
+      }
+      break;
+      default:
+        // Only Sigmoid or Tanh is used.
+        assert(false && "Only Sigmoid or Tanh is used");
+    }
+  }
+}
+
+// Update the hidden state of the LSTM kernel using the following formula:
+// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means
+// element wise multiplication
+template <typename CellType, typename ActivationType>
+void updateLstmHidden(const LstmStepManager *step_info, CellType *cell_state_data_base,
+                      ActivationType *hidden_state_data, const CellType *output_gate_output,
+                      const ArithmeticParams *mul_params, int32_t cell_state_scale_power,
+                      CellType *buffer)
+{
+  auto cell_state_shape = step_info->stateShape();
+  CellType *cell_state_data = cell_state_data_base + step_info->cellStateOffset();
+  // Tanh(cell_state)
+  tanh(cell_state_scale_power, cell_state_shape, cell_state_data, cell_state_shape, buffer);
+  // Update the hidden state
+  mul(cell_state_shape, mul_params, buffer, output_gate_output,
+      hidden_state_data + step_info->hiddenStateOffset());
+}
+
+// Update the cell state using the output from the forget gate, input gate, and
+// cell gate Formula: updated_cell_state = forget_gate_output*cell_state +
+// input_gate_output * cell_gate_output, where * denotes element wise
+// multiplication
+template <typename CellType>
+void updateLstmCell(const LstmStepManager *step_info, CellType *cell_state_data,
+                    // Gate outputs
+                    CellType *forget_gate_output, const CellType *input_gate_output,
+                    const CellType *cell_gate_output,
+                    // Mul parameters
+                    const ArithmeticParams &forget_cell_mul_params,
+                    const ArithmeticParams &input_mul_params,
+                    const luci_interpreter::lstm::CellStateInfo *cell_state_info, CellType *buffer)
+{
+  auto cell_state_shape = step_info->stateShape();
+  // Forget Gate x Cell State
+  mul(cell_state_shape, &forget_cell_mul_params, forget_gate_output,
+      cell_state_data + step_info->cellStateOffset(),
+      cell_state_data + step_info->cellStateOffset());
+  // Input Gate x Cell Gate
+  mul(cell_state_shape, &input_mul_params, input_gate_output, cell_gate_output, buffer);
+
+  // Update the cell state
+  addElementWise(cell_state_data + step_info->cellStateOffset(), buffer,
+                 /*n_batch=*/cell_state_shape.dimsData()[0],
+                 /*n_state=*/cell_state_shape.dimsData()[1],
+                 cell_state_data + step_info->cellStateOffset());
+
+  if (cell_state_info->cell_clip > 0)
+  {
+    clipping(cell_state_shape.flatSize(), cell_state_info,
+             cell_state_data + step_info->cellStateOffset());
+  }
+}
+
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void lstmStep(luci_interpreter::lstm::LSTMStruct *lstm_struct,
+              luci_interpreter::lstm::LSTMParameters *lstm_params, LstmStepManager *step_info,
+              luci_interpreter::lstm::CellStateInfo *cell_state_info,
+              ActivationType *output_state_data, CellType *cell_state_data, CellType *scratch0,
+              CellType *scratch1, CellType *scratch2, CellType *scratch3,
+              luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+  /*Step1: Calculate gate outputs to prepare cell state update*/
+  CellType *gate_internal_buffer = scratch3;
+  CellType *forget_gate_output = scratch0;
+
+  auto input_data = luci_interpreter::kernels::getTensorData<ActivationType>(
+    runtime_graph->getDataByTensor(lstm_struct->input()));
+
+  calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+    step_info, &lstm_params->forget_gate_parameters,
+    // Input FC
+    input_data, lstm_struct->input_to_forget_weights(), lstm_struct->forget_gate_bias(),
+    // Recurrent FC
+    output_state_data, lstm_struct->recurrent_to_forget_weights(), nullptr,
+    // Output
+    forget_gate_output, gate_internal_buffer, FusedActivation::kTfLiteActSigmoid, runtime_graph);
+
+  // Input Gate calculation;
+  CellType *input_gate_output = scratch1;
+  calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+    step_info, &lstm_params->input_gate_parameters,
+    // Input FC
+    input_data, lstm_struct->input_to_input_weights(), lstm_struct->input_gate_bias(),
+    // Recurrent FC
+    output_state_data, lstm_struct->recurrent_to_input_weights(),
+    /*recurrent_bias*/ nullptr,
+    // Output
+    input_gate_output,
+    // Scratch arrays
+    gate_internal_buffer, FusedActivation::kTfLiteActSigmoid, runtime_graph);
+
+  // Cell Gate calculation
+  CellType *cell_gate_output = scratch2;
+  calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+    step_info, &lstm_params->cell_gate_parameters,
+    // Input FC
+    input_data, lstm_struct->input_to_cell_weights(), lstm_struct->cell_gate_bias(),
+    // Recurrent FC
+    output_state_data, lstm_struct->recurrent_to_cell_weights(),
+    /*recurrent_bias*/ nullptr,
+    // Output
+    cell_gate_output,
+    // Scratch arrays
+    gate_internal_buffer, FusedActivation::kTfLiteActTanh, runtime_graph);
+
+  /*Step2: update the cell state */
+  {
+    // const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters;
+    CellType *updated_input_buffer = scratch1; // reuse buffer
+
+    updateLstmCell<CellType>(
+      step_info, cell_state_data, forget_gate_output, input_gate_output, cell_gate_output,
+      lstm_params->inter_gate_parameters.forget_cell_mul_params,
+      lstm_params->inter_gate_parameters.input_mul_params, cell_state_info, updated_input_buffer);
+  }
+
+  {
+    /*Step3: update the hidden state */
+    CellType *output_gate_output = scratch1; // reuse buffer
+    calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, &lstm_params->output_gate_parameters,
+      // Input FC
+      input_data, lstm_struct->input_to_output_weights(), lstm_struct->output_gate_bias(),
+      // Recurrent FC
+      output_state_data, lstm_struct->recurrent_to_output_weights(), nullptr,
+      // Output
+      output_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, FusedActivation::kTfLiteActSigmoid, runtime_graph);
+    CellType *tanh_activated_cell_buffer = scratch0; // reuse buffer
+    updateLstmHidden<CellType, ActivationType>(
+      step_info, cell_state_data, output_state_data, output_gate_output,
+      &lstm_params->inter_gate_parameters.output_mul_params,
+      cell_state_info->cell_state_scale_power, tanh_activated_cell_buffer);
+
+    ActivationType *output_ptr = luci_interpreter::kernels::getTensorData<ActivationType>(
+      runtime_graph->getDataByTensor(lstm_struct->output()));
+    std::memcpy(output_ptr + step_info->outputOffset(),
+                output_state_data + step_info->hiddenStateOffset(),
+                step_info->stateShape().flatSize() * sizeof(ActivationType));
+  }
+}
+
+} // namespace lstm_internal
+
+// Evaluate the LSTM kernel with (potential) multi-steps and multi-batch input
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void evalLSTM(luci_interpreter::lstm::LSTMStruct *lstm_struct,
+              luci_interpreter::lstm::LSTMParameters *lstm_params,
+              luci_interpreter::lstm::CellStateInfo *cell_state_info,
+              ActivationType *output_state_data, CellType *cell_state_data, CellType *scratch0,
+              CellType *scratch1, CellType *scratch2, CellType *scratch3,
+              luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+  lstm_internal::LstmSizeInfo size_info;
+
+  size_info.time_major = lstm_struct->options->time_major();
+  size_info.batch_size = size_info.time_major
+                           ? luci_interpreter::Tensor::dim(lstm_struct->input(), 1)
+                           : luci_interpreter::Tensor::dim(lstm_struct->input(), 0);
+  size_info.time_steps = size_info.time_major
+                           ? luci_interpreter::Tensor::dim(lstm_struct->input(), 0)
+                           : luci_interpreter::Tensor::dim(lstm_struct->input(), 1);
+  size_info.input_dimension = luci_interpreter::Tensor::dim(lstm_struct->input(), 2);
+  size_info.state_dimension = luci_interpreter::Tensor::dim(lstm_struct->output_state(), 1);
+
+  lstm_internal::LstmStepManager step_info(size_info);
+
+  // time is the first dimention, enable batch computation
+  if (size_info.time_major)
+  {
+    for (int t = 0; t < size_info.time_steps; t++)
+    {
+      lstm_internal::lstmStep<ActivationType, WeightType, CellType, BiasType>(
+        lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data,
+        scratch0, scratch1, scratch2, scratch3, runtime_graph);
+      // prepare for the next time step
+      step_info.updateTime();
+    }
+  }
+  else
+  {
+    // batch first, unable to size the input data. single batch inference
+    for (int b = 0; b < size_info.batch_size; b++)
+    {
+      for (int t = 0; t < size_info.time_steps; t++)
+      {
+        lstm_internal::lstmStep<ActivationType, WeightType, CellType, BiasType>(
+          lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data,
+          scratch0, scratch1, scratch2, scratch3, runtime_graph);
+        // prepare for the next time step
+        step_info.updateTime();
+      }
+      // prepare for the next batch
+      step_info.updateBatch();
+      step_info.resetTime();
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_COMMON_H
diff --git a/onert-micro/luci-interpreter/pal/common/PALUtils.h b/onert-micro/luci-interpreter/pal/common/PALUtils.h
new file mode 100644
index 000000000..1e05bfc7b
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALUtils.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_UTILS_H
+#define LUCI_INTERPRETER_PAL_UTILS_H
+
+#include <cassert>
+
+namespace luci_interpreter_pal
+{
+
+// Table of sigmoid(i/24) at 0.16 format - 256 elements.
+// We use combined sigmoid and tanh look-up table, since
+// tanh(x) = 2*sigmoid(2*x) -1.
+// Both functions are symmetric, so the LUT table is only needed
+// for the absolute value of the input.
+static const uint16_t sigmoid_table_uint16[256] = {
+  32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498, 40149, 40794, 41432,
+  42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255, 46817, 47369, 47911, 48443, 48964, 49475,
+  49975, 50464, 50942, 51409, 51865, 52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485,
+  55834, 56174, 56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288, 59519,
+  59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441, 61599, 61750, 61896, 62036,
+  62172, 62302, 62428, 62549, 62666, 62778, 62886, 62990, 63090, 63186, 63279, 63368, 63454, 63536,
+  63615, 63691, 63765, 63835, 63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405,
+  64450, 64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845, 64873, 64900,
+  64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097, 65115, 65132, 65149, 65164, 65179,
+  65194, 65208, 65221, 65234, 65246, 65258, 65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337,
+  65345, 65352, 65360, 65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
+  65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465, 65468, 65471, 65474,
+  65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491, 65493, 65495, 65497, 65498, 65500, 65501,
+  65503, 65504, 65505, 65507, 65508, 65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517,
+  65517, 65518, 65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525, 65525,
+  65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529, 65529, 65529, 65530, 65530,
+  65530, 65530, 65531, 65531, 65531, 65531, 65531, 65532, 65532, 65532, 65532, 65532, 65532, 65533,
+  65533, 65533, 65533, 65533, 65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
+  65534, 65534, 65534, 65535};
+
+inline std::int32_t saturatingRoundingDoublingHighMul(std::int32_t a, std::int32_t b)
+{
+  bool overflow = a == b && a == std::numeric_limits<std::int32_t>::min();
+  std::int64_t a_64(a);
+  std::int64_t b_64(b);
+  std::int64_t ab_64 = a_64 * b_64;
+  std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+  std::int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
+  return overflow ? std::numeric_limits<std::int32_t>::max() : ab_x2_high32;
+}
+
+// Correctly-rounded-to-nearest division by a power-of-two.
+// Also known as a rounding arithmetic right shift.
+inline int32_t roundingDivideByPOT(int32_t x, int32_t exponent)
+{
+  assert(exponent >= 0);
+  assert(exponent <= 31);
+  const int32_t mask = int32_t((1ll << exponent) - 1);
+  const int32_t zero = int32_t(0);
+  const int32_t one = int32_t(1);
+  const int32_t remainder = x & mask;
+  const int32_t threshold = (mask >> 1) + ((x < zero ? one : zero) & one);
+  return (x >> exponent) + ((remainder > threshold ? one : zero) & one);
+}
+
+inline int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
+{
+  int left_shift = shift > 0 ? shift : 0;
+  int right_shift = shift > 0 ? 0 : -shift;
+  return roundingDivideByPOT(
+    saturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+}
+
+inline int32_t multiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x,
+                                                              int32_t quantized_multiplier,
+                                                              int left_shift)
+{
+  return roundingDivideByPOT(saturatingRoundingDoublingHighMul(x, quantized_multiplier),
+                             -left_shift);
+}
+
+template <typename P> inline void getActivationParams(const P &params, int32_t *min, int32_t *max)
+{
+  *min = params.quantized_activation_min;
+  *max = params.quantized_activation_max;
+}
+
+template <typename P> inline void getActivationParams(const P &params, float *min, float *max)
+{
+  *min = params.float_activation_min;
+  *max = params.float_activation_max;
+}
+
+template <typename P> inline void getActivationParams(const P &params, int64_t *min, int64_t *max)
+{
+  *min = params.int64_activation_min;
+  *max = params.int64_activation_max;
+}
+
+// Gets offset of index if reducing on axis. When reducing, the flattened offset
+// will not change, if the input index changes on the given axis. For example,
+// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
+// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
+// offset.
+inline size_t reducedOutputOffset(const int num_dims, const int *dims, const int *index,
+                                  const int num_axis, const int *axis)
+{
+  if (num_dims == 0)
+  {
+    return 0;
+  }
+  size_t offset = 0;
+  for (int idx = 0; idx < num_dims; ++idx)
+  {
+    // if we need to skip this axis
+    bool is_axis = false;
+    if (axis != nullptr)
+    {
+      for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
+      {
+        if (idx == axis[axis_idx])
+        {
+          is_axis = true;
+          break;
+        }
+      }
+    }
+    if (!is_axis)
+    {
+      offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
+    }
+  }
+  return offset;
+}
+
+// Gets next index to iterate through a multidimensional array.
+inline bool nextIndex(const int num_dims, const int *dims, int *current)
+{
+  if (num_dims == 0)
+  {
+    return false;
+  }
+  int carry = 1;
+  for (int idx = num_dims - 1; idx >= 0; --idx)
+  {
+    int current_val = current[idx] + carry;
+    if (dims[idx] == current_val)
+    {
+      current[idx] = 0;
+    }
+    else
+    {
+      current[idx] = current_val;
+      carry = 0;
+      break;
+    }
+  }
+  return (carry == 0);
+}
+
+// Get common shape dim, assert that they all agree.
+inline int MatchingDim(const luci_interpreter::RuntimeShape &shape1, int index1,
+                       const luci_interpreter::RuntimeShape &shape2, int index2)
+{
+  assert(shape1.dims(index1) == shape2.dims(index2));
+  return shape1.dims(index1);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_UTILS_H
diff --git a/onert-micro/luci-interpreter/pal/common/Params.h b/onert-micro/luci-interpreter/pal/common/Params.h
new file mode 100644
index 000000000..0dea29432
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/Params.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_PARAMS_H
+#define LUCI_INTERPRETER_PAL_PARAMS_H
+
+namespace luci_interpreter_pal
+{
+
+struct PadParams
+{
+  int8_t left_padding_count;
+  int32_t left_padding[5];
+  int8_t right_padding_count;
+  int32_t right_padding[5];
+};
+
+struct FullyConnectedParams
+{
+  int32_t input_offset;
+  int32_t weights_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // uint8_t, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+  // Mark the operands as cacheable if they are unchanging, e.g. weights.
+  bool lhs_cacheable;
+  bool rhs_cacheable;
+};
+
+enum class PaddingType : uint8_t
+{
+  None,
+  Same,
+  Valid
+};
+
+struct PaddingValues
+{
+  int16_t width;
+  int16_t height;
+  // offset is used for calculating "remaining" padding, for example, `width`
+  // is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is
+  // 1 + 1 = 2.
+  int16_t width_offset;
+  // Same as width_offset except it's over the height dimension.
+  int16_t height_offset;
+};
+
+struct ConvParams
+{
+  PaddingType padding_type;
+  PaddingValues padding_values;
+  int16_t stride_width;
+  int16_t stride_height;
+  int16_t dilation_width_factor;
+  int16_t dilation_height_factor;
+  // uint8_t inference params.
+  int32_t input_offset;
+  int32_t weights_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // uint8_t, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+};
+
+enum class BroadcastableOpCategory : uint8_t
+{
+  kNone,
+  kNonBroadcast,              // Matching input shapes.
+  kFirstInputBroadcastsFast,  // Fivefold nested loops.
+  kSecondInputBroadcastsFast, // Fivefold nested loops.
+  kGenericBroadcast,          // Fall-back.
+  kScalarFirstBroadcast,      // Scalar
+  kScalarSecondBroadcast,     // Scalar
+};
+
+struct ConcatenationParams
+{
+  int8_t axis;
+  const int32_t *input_zeropoint;
+  const float *input_scale;
+  uint16_t inputs_count;
+  int32_t output_zeropoint;
+  float output_scale;
+};
+
+struct TransposeParams
+{
+  int8_t perm_count;
+  int32_t perm[5];
+};
+
+struct ComparisonParams
+{
+  // uint8_t inference params.
+  int left_shift;
+  int32_t input1_offset;
+  int32_t input1_multiplier;
+  int input1_shift;
+  int32_t input2_offset;
+  int32_t input2_multiplier;
+  int input2_shift;
+  // Shape dependent / common to inference types.
+  bool is_broadcast;
+};
+
+struct StridedSliceParams
+{
+  int8_t start_indices_count;
+  int32_t start_indices[5];
+  int8_t stop_indices_count;
+  int32_t stop_indices[5];
+  int8_t strides_count;
+  int32_t strides[5];
+
+  int16_t begin_mask;
+  int16_t ellipsis_mask;
+  int16_t end_mask;
+  int16_t new_axis_mask;
+  int16_t shrink_axis_mask;
+};
+
+// For Add, Sub, Mul ops.
+struct ArithmeticParams
+{
+  // Shape dependent / common to data / op types.
+  BroadcastableOpCategory broadcast_category;
+  // uint8_t inference params.
+  int32_t input1_offset;
+  int32_t input2_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // Add / Sub, not Mul, uint8_t inference params.
+  int left_shift;
+  int32_t input1_multiplier;
+  int input1_shift;
+  int32_t input2_multiplier;
+  int input2_shift;
+  // uint8_t, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+  // int64_t activation params.
+  int64_t int64_activation_min;
+  int64_t int64_activation_max;
+
+  // Processed output dimensions.
+  // Let input "a" be the one that broadcasts in the faster-changing dimension.
+  // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
+  // {b0, b1, b2, b3, b4},
+  // broadcast_shape[4] = b0 = a0.
+  // broadcast_shape[3] = b1; a1 = 1.
+  // broadcast_shape[2] = b2 = a2.
+  // broadcast_shape[1] = a3; b3 = 1.
+  // broadcast_shape[0] = b4 = a4.
+  int broadcast_shape[5];
+};
+
+enum class FusedActivationFunctionType : uint8_t
+{
+  kNone,
+  kRelu6,
+  kRelu1,
+  kRelu
+};
+
+struct PoolParams
+{
+  FusedActivationFunctionType activation;
+  PaddingType padding_type;
+  PaddingValues padding_values;
+  int stride_height;
+  int stride_width;
+  int filter_height;
+  int filter_width;
+  // uint8_t, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+};
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_PARAMS_H
diff --git a/onert-micro/luci-interpreter/pal/common/ProcessBroadcastShapes.h b/onert-micro/luci-interpreter/pal/common/ProcessBroadcastShapes.h
new file mode 100644
index 000000000..05ce8024e
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/ProcessBroadcastShapes.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_PROCESS_BROADCAST_SHAPES_H
+#define LUCI_INTERPRETER_PAL_PROCESS_BROADCAST_SHAPES_H
+
+namespace luci_interpreter_pal
+{
+
+// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
+// BROADCASTING.
+//
+// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
+// rectangular array of numbers.
+//
+// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
+// However, as Dims<N> is to be deprecated, this class exists as an adaptor
+// to enable simple unoptimized implementations of element-wise broadcasting
+// operations.
+template <int N> struct NdArrayDesc
+{
+  // The "extent" of each dimension. Indices along dimension d must be in the
+  // half-open interval [0, extents[d]).
+  int extents[N];
+
+  // The number of *elements* (not bytes) between consecutive indices of each
+  // dimension.
+  int strides[N];
+};
+
+// Copies dims to desc, calculating strides.
+template <int N>
+inline void copyDimsToDesc(const luci_interpreter::RuntimeShape &input_shape,
+                           NdArrayDesc<N> *desc_out)
+{
+  int desc_stride = 1;
+  for (int i = N - 1; i >= 0; --i)
+  {
+    desc_out->extents[i] = input_shape.dims(i);
+    desc_out->strides[i] = desc_stride;
+    desc_stride *= input_shape.dims(i);
+  }
+}
+
+template <int N, int DIM, typename Calc>
+typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(const NdArrayDesc<N> &output,
+                                                                  const Calc &calc, int indexes[N])
+{
+  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
+  {
+    calc(indexes);
+  }
+}
+
+template <int N, int DIM, typename Calc>
+typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(const NdArrayDesc<N> &output,
+                                                                  const Calc &calc, int indexes[N])
+{
+  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM])
+  {
+    NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
+  }
+}
+
+// Execute the calc function in the innermost iteration based on the shape of
+// the output. The calc function should take a single argument of type int[N].
+template <int N, typename Calc>
+inline void NDOpsHelper(const NdArrayDesc<N> &output, const Calc &calc)
+{
+  int indexes[N] = {0};
+  NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
+}
+
+template <int N>
+inline void NdArrayDescsForElementwiseBroadcast(const luci_interpreter::RuntimeShape &input0_shape,
+                                                const luci_interpreter::RuntimeShape &input1_shape,
+                                                NdArrayDesc<N> *desc0_out,
+                                                NdArrayDesc<N> *desc1_out)
+{
+
+  auto extended_input0_shape = luci_interpreter::RuntimeShape::extendedShape(N, input0_shape);
+  auto extended_input1_shape = luci_interpreter::RuntimeShape::extendedShape(N, input1_shape);
+
+  // Copy dims to desc, calculating strides.
+  copyDimsToDesc<N>(extended_input0_shape, desc0_out);
+  copyDimsToDesc<N>(extended_input1_shape, desc1_out);
+
+  // Walk over each dimension. If the extents are equal do nothing.
+  // Otherwise, set the desc with extent 1 to have extent equal to the other and
+  // stride 0.
+  for (int i = 0; i < N; ++i)
+  {
+    const int extent0 = extended_input0_shape.dims(i);
+    const int extent1 = extended_input1_shape.dims(i);
+    if (extent0 != extent1)
+    {
+      if (extent0 == 1)
+      {
+        desc0_out->strides[i] = 0;
+        desc0_out->extents[i] = extent1;
+      }
+      else
+      {
+        desc1_out->strides[i] = 0;
+        desc1_out->extents[i] = extent0;
+      }
+    }
+  }
+}
+
+inline int subscriptToIndex(const NdArrayDesc<4> &desc, int i0, int i1, int i2, int i3)
+{
+  return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + i3 * desc.strides[3];
+}
+
+inline int subscriptToIndex(const NdArrayDesc<5> &desc, int indexes[5])
+{
+  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
+         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + indexes[4] * desc.strides[4];
+}
+
+// Consolidates dimensions in broadcast inputs, checks for five-fold pattern.
+//
+// For example, if sequence of dimensions of one input is
+// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ...
+// we can consolidate these as
+// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1.
+//
+// The category is updated in the less-frequent case of shapes that are
+// not suited to a fivefold-loop broadcast.
+//
+// Falls back to generic pattern when it does not know how to process properly.
+//
+// Returns true iff there is some sort of broadcast, which includes five-fold
+// patterns and falling back to generic broadcast.
+inline bool ProcessBroadcastShapes(const luci_interpreter::RuntimeShape &shape0,
+                                   const luci_interpreter::RuntimeShape &shape1,
+                                   luci_interpreter_pal::ArithmeticParams *params)
+{
+  const int dims_count = std::max(shape0.dimensionsCount(), shape1.dimensionsCount());
+
+  params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
+
+  auto extended_shape0 = luci_interpreter::RuntimeShape::extendedShape(dims_count, shape0);
+  auto extended_shape1 = luci_interpreter::RuntimeShape::extendedShape(dims_count, shape1);
+
+  // Check for "exact" match, implicitly accepting any scalar shapes.
+  if (extended_shape0 == extended_shape1)
+  {
+    params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
+    return false;
+  }
+
+  if (shape0.flatSize() == 1)
+  {
+    params->broadcast_category = BroadcastableOpCategory::kScalarFirstBroadcast;
+    return true;
+  }
+  else if (shape1.flatSize() == 1)
+  {
+    params->broadcast_category = BroadcastableOpCategory::kScalarSecondBroadcast;
+    return true;
+  }
+
+  for (int i = dims_count - 1; i >= 0; --i)
+  {
+    if (extended_shape0.dims(i) == extended_shape1.dims(i))
+    {
+      continue;
+    }
+    else if (extended_shape0.dims(i) == 1)
+    {
+      params->broadcast_category = BroadcastableOpCategory::kFirstInputBroadcastsFast;
+      return true;
+    }
+    else if (extended_shape1.dims(i) == 1)
+    {
+      params->broadcast_category = BroadcastableOpCategory::kSecondInputBroadcastsFast;
+      return true;
+    }
+    else
+    {
+      // This case is erroneous: there is a dimension that does not match and
+      // is not a broadcast from one shape to the other.
+      params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_PROCESS_BROADCAST_SHAPES_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
new file mode 100644
index 000000000..42883192e
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -0,0 +1,47 @@
+REGISTER_KERNEL(ABS, Abs)
+REGISTER_KERNEL(ADD, Add)
+REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
+REGISTER_KERNEL(ARG_MAX, ArgMax)
+REGISTER_KERNEL(ARG_MIN, ArgMin)
+REGISTER_KERNEL(DIV, Div)
+REGISTER_KERNEL(ADD_N, AddN)
+REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
+REGISTER_KERNEL(CONV_2D, Conv2D)
+REGISTER_KERNEL(LOGISTIC, Logistic)
+REGISTER_KERNEL(GATHER, Gather)
+REGISTER_KERNEL(EXP, Exp)
+REGISTER_KERNEL(GREATER, Greater)
+REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)
+REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
+REGISTER_KERNEL(ELU, Elu)
+REGISTER_KERNEL(EQUAL, Equal)
+REGISTER_KERNEL(FILL, Fill)
+REGISTER_KERNEL(PACK, Pack)
+REGISTER_KERNEL(PAD, Pad)
+REGISTER_KERNEL(PADV2, PadV2)
+REGISTER_KERNEL(RESHAPE, Reshape)
+REGISTER_KERNEL(RELU, Relu)
+REGISTER_KERNEL(RELU6, Relu6)
+REGISTER_KERNEL(REDUCE_PROD, ReduceCommon)
+REGISTER_KERNEL(LESS, Less)
+REGISTER_KERNEL(LESS_EQUAL, LessEqual)
+REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)
+REGISTER_KERNEL(LOGICAL_OR, LogicalOr)
+REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)
+REGISTER_KERNEL(MUL, Mul)
+REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
+REGISTER_KERNEL(CONCATENATION, Concatenation)
+REGISTER_KERNEL(SHAPE, Shape)
+REGISTER_KERNEL(NOT_EQUAL, NotEqual)
+REGISTER_KERNEL(SLICE, Slice)
+REGISTER_KERNEL(SUB, Sub)
+REGISTER_KERNEL(SPLIT, Split)
+REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)
+REGISTER_KERNEL(SPLIT_V, SplitV)
+REGISTER_KERNEL(TANH, Tanh)
+REGISTER_KERNEL(TRANSPOSE, Transpose)
+REGISTER_KERNEL(SOFTMAX, Softmax)
+REGISTER_KERNEL(WHILE, While)
+REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)
+REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)
+REGISTER_KERNEL(NEG, Neg)
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALAdd.h b/onert-micro/luci-interpreter/pal/mcu/PALAdd.h
new file mode 100644
index 000000000..d9d1f7865
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALAdd.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ADD_H
+#define LUCI_INTERPRETER_PAL_ADD_H
+
+#include "PALAddCommon.h"
+
+namespace luci_interpreter_pal
+{
+template <>
+inline void Add<int8_t>(const ArithmeticParams &, const int, const int8_t *, const int8_t *,
+                        int8_t *)
+{
+  assert(false && "Not IMPL yet");
+}
+
+template <>
+inline void Add<int16_t>(const ArithmeticParams &, const int, const int16_t *, const int16_t *,
+                         int16_t *)
+{
+  assert(false && "Not IMPL yet");
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ADD_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALAveragePool2D.h b/onert-micro/luci-interpreter/pal/mcu/PALAveragePool2D.h
new file mode 100644
index 000000000..ef5fe7223
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALAveragePool2D.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_H
+
+#include "PALAveragePool2DCommon.h"
+
+namespace luci_interpreter_pal
+{
+// TODO: add S8 and S16 kernel
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGE_POOL_2D_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALConv2d.h b/onert-micro/luci-interpreter/pal/mcu/PALConv2d.h
new file mode 100644
index 000000000..c979f7610
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALConv2d.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+#include "PALConv2DCommon.h"
+
+namespace luci_interpreter_pal
+{
+static inline void QuantizedConvPerChannel(const ConvParams &, const int32_t *, const int8_t *,
+                                           const int32_t *, const int8_t *, const int32_t *,
+                                           const int32_t *, int8_t *)
+{
+  assert(false && "Not supported yet");
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALFullyConnected.h b/onert-micro/luci-interpreter/pal/mcu/PALFullyConnected.h
new file mode 100644
index 000000000..4a024b137
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALFullyConnected.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLY_CONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLY_CONNECTED_H
+
+#include "PALFullyConnectedCommon.h"
+
+namespace luci_interpreter_pal
+{
+
+template <>
+inline void
+FullyConnected(const luci_interpreter_pal::FullyConnectedParams &params, const int32_t *input_shape,
+               const int8_t *input_data, const int32_t *filter_shape, const int8_t *filter_data,
+               const int32_t *bias_data, const int32_t *output_shape, int8_t *output_data)
+{
+  // MARK: At this moment this operation doesn't support
+  assert(false && "FullyConnected INT8 NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)filter_shape;
+  (void)filter_data;
+  (void)bias_data;
+  (void)output_shape;
+  (void)output_data;
+}
+
+template <>
+inline void FullyConnected(const luci_interpreter_pal::FullyConnectedParams &, const int32_t *,
+                           const int16_t *, const int32_t *, const int8_t *, const int64_t *,
+                           const int32_t *, int16_t *)
+{
+  // MARK: At this moment this operation doesn't support
+  assert(false && "FullyConnected INT8 NYI");
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLY_CONNECTED_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h b/onert-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALMaxPool2D.h b/onert-micro/luci-interpreter/pal/mcu/PALMaxPool2D.h
new file mode 100644
index 000000000..a0fff0c6d
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALMaxPool2D.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MAX_POOL_2D_H
+#define LUCI_INTERPRETER_PAL_MAX_POOL_2D_H
+
+#include "PALMaxPool2DCommon.h"
+
+namespace luci_interpreter_pal
+{
+// TODO: Add INT8, INT16 kernels
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MAX_POOL_2D_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALMul.h b/onert-micro/luci-interpreter/pal/mcu/PALMul.h
new file mode 100644
index 000000000..7b55cd1c8
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALMul.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include "PALMulCommon.h"
+
+namespace luci_interpreter_pal
+{
+
+template <>
+inline void Mul<int8_t>(const ArithmeticParams &, const int, const int8_t *, const int8_t *,
+                        int8_t *)
+{
+  assert(false && "Not IMPL yet");
+}
+
+template <>
+inline void Mul<int16_t>(const ArithmeticParams &, const int, const int16_t *, const int16_t *,
+                         int16_t *)
+{
+  assert(false && "Not IMPL yet");
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALUnidirectionalSequenceLSTM.h b/onert-micro/luci-interpreter/pal/mcu/PALUnidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..35592ac66
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/PALUnidirectionalSequenceLSTM.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "PALUnidirectionalSequenceLSTMCommon.h"
+
+namespace luci_interpreter_pal
+{
+// Evaluate the LSTM kernel with (potential) multi-steps and multi-batch input
+template <>
+void evalLSTM<int8_t, int8_t, int16_t, int32_t>(
+  luci_interpreter::lstm::LSTMStruct *lstm_struct,
+  luci_interpreter::lstm::LSTMParameters *lstm_params,
+  luci_interpreter::lstm::CellStateInfo *cell_state_info, int8_t *output_state_data,
+  int16_t *cell_state_data, int16_t *scratch0, int16_t *scratch1, int16_t *scratch2,
+  int16_t *scratch3, luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+  lstm_internal::LstmSizeInfo size_info;
+
+  size_info.time_major = lstm_struct->options->time_major();
+  size_info.batch_size = size_info.time_major
+                           ? luci_interpreter::Tensor::dim(lstm_struct->input(), 1)
+                           : luci_interpreter::Tensor::dim(lstm_struct->input(), 0);
+  size_info.time_steps = size_info.time_major
+                           ? luci_interpreter::Tensor::dim(lstm_struct->input(), 0)
+                           : luci_interpreter::Tensor::dim(lstm_struct->input(), 1);
+  size_info.input_dimension = luci_interpreter::Tensor::dim(lstm_struct->input(), 2);
+  size_info.state_dimension = luci_interpreter::Tensor::dim(lstm_struct->output_state(), 1);
+
+  lstm_internal::LstmStepManager step_info(size_info);
+
+  // time is the first dimention, enable batch computation
+  if (size_info.time_major)
+  {
+    for (int t = 0; t < size_info.time_steps; t++)
+    {
+      lstm_internal::lstmStep<int8_t, int8_t, int16_t, int32_t>(
+        lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data,
+        scratch0, scratch1, scratch2, scratch3, runtime_graph);
+      // prepare for the next time step
+      step_info.updateTime();
+    }
+  }
+  else
+  {
+    // batch first, unable to size the input data. single batch inference
+    for (int b = 0; b < size_info.batch_size; b++)
+    {
+      for (int t = 0; t < size_info.time_steps; t++)
+      {
+        lstm_internal::lstmStep<int8_t, int8_t, int16_t, int32_t>(
+          lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data,
+          scratch0, scratch1, scratch2, scratch3, runtime_graph);
+        // prepare for the next time step
+        step_info.updateTime();
+      }
+      // prepare for the next batch
+      step_info.updateBatch();
+      step_info.resetTime();
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/pal.cmake b/onert-micro/luci-interpreter/pal/mcu/pal.cmake
new file mode 100644
index 000000000..fe528ad56
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/mcu/pal.cmake
@@ -0,0 +1,7 @@
+macro(initialize_pal)
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PUBLIC ${LUCI_INTERPRETER_PAL_DIR})
+endmacro()
diff --git a/onert-micro/luci-interpreter/requires.cmake b/onert-micro/luci-interpreter/requires.cmake
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/onert-micro/luci-interpreter/requires.cmake
diff --git a/onert-micro/luci-interpreter/src/CMakeLists.txt b/onert-micro/luci-interpreter/src/CMakeLists.txt
new file mode 100644
index 000000000..75d6836df
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/CMakeLists.txt
@@ -0,0 +1,45 @@
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
+
+initialize_pal()
+
+if (NOT PAL_INITIALIZED)
+  message("PAL Failed to initialize, skip luci-interpreter")
+  return()
+endif()
+
+message(STATUS "LUCI INTERPRETER BEGIN")
+
+set(LUCI_INTERPRETER_BINARY "luci_interpreter_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_MEMORY_MANAGER "luci_interpreter_micro_memory_manager${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import_micro${LUCI_INTERPRETER_SUFFIX}")
+
+add_subdirectory(memory_managers)
+message(STATUS "LUCI INTERPRETER MEMORY MANAGER")
+add_subdirectory(core)
+message(STATUS "LUCI INTERPRETER CORE")
+add_subdirectory(kernels)
+message(STATUS "LUCI INTERPRETER KERNELS")
+add_subdirectory(loader)
+message(STATUS "LUCI INTERPRETER LOADER")
+
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_KERNELS})
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_KERNELS})
+
+message(STATUS "LUCI INTERPTER INITALIZED")
+
+set(SOURCES
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h" Interpreter.cpp)
+
+add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+        PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER} ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE})
+
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
diff --git a/onert-micro/luci-interpreter/src/Interpreter.cpp b/onert-micro/luci-interpreter/src/Interpreter.cpp
new file mode 100644
index 000000000..658b28ff7
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/Interpreter.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/Interpreter.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+// Construct static interpreter with configurations
+Interpreter::Interpreter(const char *model_data_raw, const InterpreterConfigure &configuration)
+{
+  _runtime_module = std::make_unique<RuntimeModule>();
+
+  _memory_manager = StaticMemoryManager(configuration._input_buf_size, configuration._temp_buf_size,
+                                        configuration._output_buf_size)
+
+    // Note:
+    // configuration._input_buf_size, configuration._temp_buf_size, configuration._output_buf_size
+    // will be removed and will be read from circle file
+    if (configuration.isStaticManager())
+  {
+    _memory_manager = std::make_unique<StaticMemoryManager>(
+      configuration._input_buf_size, configuration._temp_buf_size, configuration._output_buf_size);
+  }
+  else { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+  _memory_manager->is_allocate_input(configuration.getAllocateInputValue());
+
+  ModuleLoader loader();
+  ModuleLoader::load(_runtime_module.get(), _memory_manager.get(),
+                     /* is_static_allocations */ configuration.isStaticManager(), model_data_raw);
+
+  ModuleLoader loader(_runtime_module.get(), _memory_manager.get());
+  loader.load(configuration.isStaticManager(), model_data_raw);
+}
+#else
+
+// Construct default interpreter with dynamic allocations and with input allocations
+Interpreter::Interpreter(const char *model_data_raw, bool dealloc_input)
+{
+  ModuleLoader::load(&_runtime_module, &_memory_manager, model_data_raw, dealloc_input);
+}
+
+#endif // USE_STATIC_ALLOC
+
+Interpreter::~Interpreter() = default;
+
+void Interpreter::interpret() { _runtime_module.execute(); }
+
+int32_t Interpreter::getInputDataSizeByIndex(int32_t input_tensor_index)
+{
+  auto *runtime_graph = _runtime_module.getMainGraph();
+
+  return runtime_graph->getInputDataSizeByIndex(input_tensor_index);
+}
+
+int32_t Interpreter::getOutputDataSizeByIndex(int32_t output_tensor_index)
+{
+  auto *runtime_graph = _runtime_module.getMainGraph();
+
+  return runtime_graph->getOutputDataSizeByIndex(output_tensor_index);
+}
+
+void Interpreter::allocateAndWriteInputTensor(int32_t input_tensor_index, const void *data,
+                                              size_t data_size)
+{
+  assert(data_size > 0);
+  assert(data != nullptr);
+  assert(input_tensor_index >= 0);
+  auto *runtime_graph = _runtime_module.getMainGraph();
+  auto tensor_data = runtime_graph->configureGraphInput(input_tensor_index);
+
+  std::memcpy(tensor_data, data, data_size);
+}
+
+uint8_t *Interpreter::allocateInputTensor(int32_t input_tensor_index)
+{
+  assert(input_tensor_index >= 0);
+
+  auto *runtime_graph = _runtime_module.getMainGraph();
+
+  return runtime_graph->configureGraphInput(input_tensor_index);
+}
+
+uint8_t *Interpreter::readOutputTensor(int32_t output_tensor_index)
+{
+  auto *runtime_graph = _runtime_module.getMainGraph();
+
+  return runtime_graph->getOutputDataByIndex(output_tensor_index);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/core/CMakeLists.txt b/onert-micro/luci-interpreter/src/core/CMakeLists.txt
new file mode 100644
index 000000000..48d92aa37
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/core/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
+    RuntimeGraph.h
+    RuntimeGraph.cpp
+    RuntimeModule.h)
+
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+    set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+add_subdirectory(reader)
+
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC "luci_micro_circle_reader${READER_SUFFIX}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_micro_circle_schema)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER})
+
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
diff --git a/onert-micro/luci-interpreter/src/core/RuntimeGraph.cpp b/onert-micro/luci-interpreter/src/core/RuntimeGraph.cpp
new file mode 100644
index 000000000..917ebbbbf
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -0,0 +1,443 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeGraph.h"
+#include "kernels/KernelBuilder.h"
+
+#include <algorithm>
+#include <map>
+
+namespace luci_interpreter
+{
+
+// IBaseRuntimeGraph
+RuntimeGraph::RuntimeGraph(SimpleMemoryManager *memory_manager, CircleReader *circle_reader,
+                           RuntimeModule *runtime_module, uint32_t subgraph_index)
+  : _memory_manager(memory_manager),
+    _tensor_to_data(std::unordered_map<const circle::Tensor *, uint8_t *>{}),
+    _runtime_module(runtime_module), _reader(circle_reader),
+    _inplace_op_indexes(std::unordered_set<const circle::Operator *>{}),
+    _subgraph_index(subgraph_index)
+{
+}
+
+RuntimeGraph::~RuntimeGraph()
+{
+  for (auto &idx_to_tensor : _tensor_to_data)
+  {
+    auto *data = idx_to_tensor.second;
+
+    _memory_manager->release_memory(data);
+  }
+}
+
+// TODO: modify this
+void RuntimeGraph::buildAllocDeallocPlan(bool dealloc_input)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  invalidate();
+  using Lifetime = std::pair<int32_t, int32_t>;
+  std::map<const circle::Tensor *, Lifetime> lifetimes;
+  const size_t num_kernels = _reader->operators().size();
+
+  if (dealloc_input)
+  {
+    for (const auto input_ind : _reader->inputs())
+    {
+      const auto raw_tensor = _reader->tensors()[input_ind];
+
+      assert(lifetimes.count(raw_tensor) == 0);
+      lifetimes[raw_tensor] = Lifetime(-1, 0);
+    }
+  }
+
+  for (int32_t index = 0; index < num_kernels; ++index)
+  {
+    const auto kernel = _reader->operators().at(index);
+    assert(kernel != nullptr);
+
+    for (int32_t j = 0; j < kernel->inputs()->size(); ++j)
+    {
+      const auto input_index = kernel->inputs()->operator[](j);
+
+      if (input_index == -1)
+        continue;
+
+      const auto raw_tensor = _reader->tensors()[input_index];
+
+      // Pass constant tensors
+      if (Tensor::is_constant_tensor(_reader, raw_tensor))
+        continue;
+
+      if (lifetimes.count(raw_tensor) > 0)
+      {
+        if (_inplace_op_indexes.find(kernel) != _inplace_op_indexes.end())
+          lifetimes.at(raw_tensor).second = -1;
+        else
+          lifetimes.at(raw_tensor).second = index;
+      }
+    }
+
+    for (int32_t j = 0; j < kernel->outputs()->size(); ++j)
+    {
+      const auto output_index = kernel->outputs()->operator[](j);
+      const auto raw_tensor = _reader->tensors()[output_index];
+
+      assert(lifetimes.count(raw_tensor) == 0);
+      if (_inplace_op_indexes.find(kernel) != _inplace_op_indexes.end())
+        lifetimes[raw_tensor] = Lifetime(-1, index);
+      else
+        lifetimes[raw_tensor] = Lifetime(index, index);
+    }
+  }
+
+  for (const auto output_ind : _reader->outputs())
+  {
+    const auto raw_tensor = _reader->tensors()[output_ind];
+
+    if (lifetimes.count(raw_tensor) > 0)
+      lifetimes.at(raw_tensor).second = num_kernels;
+  }
+
+  _alloc_plan.assign(num_kernels, std::vector<const circle::Tensor *>());
+  _dealloc_plan.assign(num_kernels + 1, std::vector<const circle::Tensor *>());
+  for (const auto &item : lifetimes)
+  {
+    if (item.second.first != -1)
+      _alloc_plan[item.second.first].push_back(item.first);
+    if (item.second.second != -1)
+      _dealloc_plan[item.second.second].push_back(item.first);
+  }
+  _is_valid = true;
+}
+
+void RuntimeGraph::allocate(size_t kernel_index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  assert(_is_valid && kernel_index < _alloc_plan.size());
+  for (const circle::Tensor *tensor : _alloc_plan[kernel_index])
+  {
+    if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
+    {
+      auto *data = _tensor_to_data.at(tensor);
+      _memory_manager->release_memory(data);
+    }
+    auto *data = _memory_manager->allocate_memory(tensor);
+    _tensor_to_data[tensor] = data;
+  }
+}
+
+#ifndef DIS_DYN_SHAPES
+void RuntimeGraph::addDynamicShapeTensor(const circle::Tensor *tensor,
+                                         luci_interpreter::RuntimeShape &&shapes)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  _dynamic_tensor_shapes[tensor] = std::move(shapes);
+}
+
+luci_interpreter::RuntimeShape *RuntimeGraph::getDynamicShapeTensor(const circle::Tensor *tensor)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  auto it = _dynamic_tensor_shapes.find(tensor);
+
+  return it == _dynamic_tensor_shapes.end() ? nullptr : &_dynamic_tensor_shapes[tensor];
+}
+
+void RuntimeGraph::removeDynamicShapeTensor(const circle::Tensor *tensor)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  auto it = _dynamic_tensor_shapes.find(tensor);
+
+  assert(it != _dynamic_tensor_shapes.end());
+
+  _dynamic_tensor_shapes.erase(it);
+}
+
+#endif // DIS_DYN_SHAPES
+
+void RuntimeGraph::deallocate(size_t kernel_index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  assert(_is_valid && kernel_index < _dealloc_plan.size());
+  for (const circle::Tensor *tensor : _dealloc_plan[kernel_index])
+  {
+    const auto it = _tensor_to_data.find(tensor);
+    assert(it != _tensor_to_data.end());
+
+    auto *data = _tensor_to_data.at(tensor);
+    _memory_manager->release_memory(data);
+
+    _tensor_to_data.erase(it);
+  }
+}
+
+void RuntimeGraph::resetTensorData(uint8_t *new_data, const circle::Tensor *tensor)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  auto tensor_it = _tensor_to_data.find(tensor);
+  if (tensor_it != _tensor_to_data.end())
+  {
+    auto *data = _tensor_to_data.at(tensor);
+    _memory_manager->release_memory(data);
+  }
+
+  _tensor_to_data[tensor] = new_data;
+}
+
+void RuntimeGraph::resetOutputTensorsData()
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  const auto graph_inputs = _reader->inputs();
+  for (int i = 0; i < _reader->outputs().size(); ++i)
+  {
+    const auto tensor_index = _reader->outputs()[i];
+    assert(tensor_index != -1);
+
+    if (std::find(graph_inputs.begin(), graph_inputs.end(), tensor_index) != graph_inputs.end())
+      return;
+
+    const auto tensor = _reader->tensors()[tensor_index];
+    assert(tensor != nullptr);
+
+    auto tensor_it = _tensor_to_data.find(tensor);
+    if (tensor_it != _tensor_to_data.end())
+    {
+      auto *data = _tensor_to_data.at(tensor);
+      _memory_manager->release_memory(data);
+      _tensor_to_data.erase(tensor_it);
+    }
+  }
+}
+
+uint8_t *RuntimeGraph::configureGraphInput(int32_t input_index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+
+  const auto tensor_index = _reader->inputs()[input_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  auto *data = _memory_manager->allocate_memory(tensor);
+  configureGraphInput(input_index, data);
+
+  return data;
+}
+
+void RuntimeGraph::configureGraphInput(int32_t input_index, uint8_t *data)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  resetOutputTensorsData();
+
+  const auto tensor_index = _reader->inputs()[input_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
+  {
+    auto *data_prev = _tensor_to_data.at(tensor);
+    if (data_prev != data)
+      _memory_manager->release_memory(data_prev);
+  }
+  _tensor_to_data[tensor] = data;
+}
+
+int32_t RuntimeGraph::getInputDataSizeByIndex(int32_t input_index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  const auto tensor_index = _reader->inputs()[input_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  return Tensor::num_elements(tensor) * size(Tensor::element_type(tensor));
+}
+
+int32_t RuntimeGraph::getNumOfInputTensors()
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  return _reader->inputs().size();
+}
+
+int32_t RuntimeGraph::getNumOfOutputTensors()
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  return _reader->outputs().size();
+}
+
+const circle::Tensor *RuntimeGraph::getInputTensorByIndex(int32_t input_index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+
+  const auto tensor_index = _reader->inputs()[input_index];
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+const circle::Tensor *RuntimeGraph::getOutputTensorByIndex(int32_t input_index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+
+  const auto tensor_index = _reader->outputs()[input_index];
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+int32_t RuntimeGraph::getOutputDataSizeByIndex(int32_t output_index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+
+  const auto tensor_index = _reader->outputs()[output_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  return Tensor::num_elements(tensor) * size(Tensor::element_type(tensor));
+}
+
+uint8_t *RuntimeGraph::getOutputDataByIndex(int32_t output_index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+
+  const auto tensor_index = _reader->outputs()[output_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  assert(_tensor_to_data.find(tensor) != _tensor_to_data.end());
+
+  return _tensor_to_data[tensor];
+}
+
+uint8_t *RuntimeGraph::getDataByTensor(const circle::Tensor *raw_tensor)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+
+  if (raw_tensor == nullptr)
+    return nullptr;
+
+  if (_tensor_to_data.find(raw_tensor) == _tensor_to_data.end())
+  {
+    return nullptr;
+  }
+
+  return _tensor_to_data.at(raw_tensor);
+}
+
+void RuntimeGraph::clearTensors() { _tensor_to_data.clear(); }
+
+void RuntimeGraph::makeInplaceOperation(const circle::Tensor *removing_tensor,
+                                        const circle::Tensor *dst_tensor)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  assert(removing_tensor != nullptr);
+
+  auto src_it = _tensor_to_data.find(removing_tensor);
+
+  if (src_it == _tensor_to_data.end())
+    return;
+
+  auto *data = _tensor_to_data[removing_tensor];
+
+  _tensor_to_data.erase(src_it);
+
+  if (dst_tensor == nullptr)
+  {
+    delete[] data;
+    return;
+  }
+
+  assert(_tensor_to_data.find(dst_tensor) == _tensor_to_data.end() &&
+         "Failed makeInplaceOperation");
+  _tensor_to_data[dst_tensor] = data;
+}
+
+uint8_t *RuntimeGraph::getConstDataByTensor(const circle::Tensor *raw_tensor)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  if (raw_tensor == nullptr)
+    return nullptr;
+
+  auto const &buffer = wrap(_reader->buffers()[raw_tensor->buffer()]->data());
+
+  return const_cast<uint8_t *>(buffer.data());
+}
+
+const circle::Tensor *RuntimeGraph::getCircleTensorByIndex(int32_t index)
+{
+  assert(_reader->get_current_subgraph_index() == _subgraph_index);
+  if (index < 0)
+    return nullptr;
+
+  const auto raw_tensor = _reader->tensors()[index];
+
+  return raw_tensor;
+}
+
+void RuntimeGraph::configure(bool dealloc_input)
+{
+  selectOwnSubgraph();
+
+  for (uint32_t i = 0; i < _reader->operators().size(); ++i)
+  {
+    const auto op = _reader->operators().at(i);
+    assert(op != nullptr);
+
+    const auto opcode = _reader->builtin_code(op);
+
+    kernel_configure.configure_kernel(op, opcode, this);
+  }
+
+  if (not _is_valid)
+    buildAllocDeallocPlan(dealloc_input);
+
+  _is_valid = true;
+}
+
+void RuntimeGraph::setDataToTensor(const circle::Tensor *tensor, uint8_t *data)
+{
+  _tensor_to_data[tensor] = data;
+}
+
+void RuntimeGraph::execute()
+{
+  selectOwnSubgraph();
+
+  if (not _is_valid)
+    configure(true);
+
+  const auto operators_size = _reader->operators().size();
+  const auto operators = _reader->operators();
+
+  for (uint32_t i = 0; i < operators_size; ++i)
+  {
+    const auto op = operators.at(i);
+    assert(op != nullptr);
+
+    const auto opcode = _reader->builtin_code(op);
+
+    allocate(i);
+
+    kernel_executor.execute_kernel(op, opcode, this);
+
+    deallocate(i);
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/core/RuntimeGraph.h b/onert-micro/luci-interpreter/src/core/RuntimeGraph.h
new file mode 100644
index 000000000..baac0b1b9
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/core/RuntimeGraph.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+
+#include "luci_interpreter/core/Tensor.h"
+#ifdef USE_STATIC_ALLOC
+#include "memory_managers/StaticMemoryManager.h"
+#else
+#include "memory_managers/SimpleMemoryManager.h"
+#endif // USE_STATIC_ALLOC
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <memory>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule;
+
+#ifdef USE_STATIC_ALLOC
+// TODO: Enable it
+#if 0
+class StaticRuntimeGraph final : public IBaseRuntimeGraph
+{
+public:
+  explicit StaticRuntimeGraph(IMemoryManager *memory_manager, CircleReader *circle_reader);
+  ~StaticRuntimeGraph() final;
+
+  void configureGraphInputs() final;
+  void execute() final;
+  void configure() final;
+
+  void configure_kernels() final;
+};
+#endif
+#else
+
+class RuntimeGraph
+{
+public:
+  RuntimeGraph() = delete;
+
+  explicit RuntimeGraph(SimpleMemoryManager *memory_manager, CircleReader *circle_reader,
+                        RuntimeModule *runtime_module, uint32_t subgraph_index);
+  ~RuntimeGraph();
+
+  Tensor *addTensor(const circle::Tensor *raw_tensor, std::unique_ptr<Tensor> &&tensor);
+
+  const circle::Tensor *getCircleTensorByIndex(int32_t index);
+
+  void makeInplaceOperation(const circle::Tensor *src_tensor, const circle::Tensor *dst_tensor);
+
+  uint8_t *getDataByTensor(const circle::Tensor *raw_tensor);
+  uint8_t *getConstDataByTensor(const circle::Tensor *raw_tensor);
+
+  uint8_t *configureGraphInput(int32_t input_index);
+  void configureGraphInput(int32_t input_index, uint8_t *data);
+
+  int32_t getInputDataSizeByIndex(int32_t input_index);
+  int32_t getOutputDataSizeByIndex(int32_t output_index);
+
+  int32_t getNumOfInputTensors();
+  int32_t getNumOfOutputTensors();
+
+  const circle::Tensor *getInputTensorByIndex(int32_t input_index);
+  const circle::Tensor *getOutputTensorByIndex(int32_t input_index);
+
+  uint8_t *getOutputDataByIndex(int32_t output_index);
+
+  void addInplaceOpIndex(const circle::Operator *op) { _inplace_op_indexes.insert(op); }
+
+  void execute();
+  void configure(bool dealloc_input);
+
+  void invalidate() { _is_valid = false; }
+  bool isValid() const { return _is_valid; }
+
+  void selectOwnSubgraph() { _reader->select_subgraph(_subgraph_index); };
+  void resetOutputTensorsData();
+
+  void clearTensors();
+
+  void setDataToTensor(const circle::Tensor *tensor, uint8_t *data);
+
+  void resetTensorData(uint8_t *new_data, const circle::Tensor *tensor);
+
+  RuntimeModule *getRuntimeModule() { return _runtime_module; };
+
+  bool is_inplace_op(const circle::Operator *op)
+  {
+    return _inplace_op_indexes.find(op) != _inplace_op_indexes.end();
+  }
+
+#ifndef DIS_DYN_SHAPES
+  void addDynamicShapeTensor(const circle::Tensor *tensor, luci_interpreter::RuntimeShape &&shapes);
+
+  luci_interpreter::RuntimeShape *getDynamicShapeTensor(const circle::Tensor *tensor);
+
+  void removeDynamicShapeTensor(const circle::Tensor *tensor);
+#endif // DIS_DYN_SHAPES
+
+private:
+  void buildAllocDeallocPlan(bool dealloc_input);
+  void allocate(size_t kernel_index);
+  void deallocate(size_t kernel_index);
+
+private:
+  SimpleMemoryManager *_memory_manager;
+  CircleReader *_reader;
+  RuntimeModule *_runtime_module;
+
+  std::unordered_map<const circle::Tensor *, uint8_t *> _tensor_to_data;
+  std::unordered_set<const circle::Operator *> _inplace_op_indexes;
+
+  bool _is_valid = false;
+
+  // Tensors that are not used anymore after given op
+  std::vector<std::vector<const circle::Tensor *>> _alloc_plan;
+  std::vector<std::vector<const circle::Tensor *>> _dealloc_plan;
+
+  uint32_t _subgraph_index;
+
+#ifndef DIS_DYN_SHAPES
+  std::unordered_map<const circle::Tensor *, luci_interpreter::RuntimeShape> _dynamic_tensor_shapes;
+#endif // DIS_DYN_SHAPES
+};
+
+#endif // USE_STATIC_ALLOC
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
diff --git a/onert-micro/luci-interpreter/src/core/RuntimeModule.h b/onert-micro/luci-interpreter/src/core/RuntimeModule.h
new file mode 100644
index 000000000..d42698277
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/core/RuntimeModule.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+
+#include "core/RuntimeGraph.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+using BaseRuntimeGraph = StaticRuntimeGraph;
+using MemoryManager = StaticMemoryManager;
+#else
+using BaseRuntimeGraph = RuntimeGraph;
+using MemoryManager = SimpleMemoryManager;
+#endif // USE_STATIC_ALLOC
+
+class RuntimeModule
+{
+public:
+  RuntimeModule() = default;
+
+  void addGraph(MemoryManager *memory_manager)
+  {
+    _graphs.emplace_back(memory_manager, &_circle_reader, this,
+                         _circle_reader.get_current_subgraph_index());
+  }
+
+  BaseRuntimeGraph *getRuntimeGraphAt(uint32_t pos) { return &_graphs.at(pos); }
+
+  void execute() { getMainGraph()->execute(); }
+
+  CircleReader &getCircleReader() { return _circle_reader; }
+
+  BaseRuntimeGraph *getMainGraph() const { return const_cast<BaseRuntimeGraph *>(&_graphs[0]); }
+
+  void selectSubgraph(uint32_t index) { _circle_reader.select_subgraph(index); }
+
+private:
+  std::vector<BaseRuntimeGraph> _graphs;
+
+  CircleReader _circle_reader;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
diff --git a/onert-micro/luci-interpreter/src/core/reader/CMakeLists.txt b/onert-micro/luci-interpreter/src/core/reader/CMakeLists.txt
new file mode 100644
index 000000000..0de1731bb
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/core/reader/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MICRO_READER_SOURCE
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/reader/CircleMicroReader.h"
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+        "CircleMicroReader.cpp"
+        "CircleMicroReaderHelper.cpp"
+        )
+
+add_library("luci_micro_circle_reader${READER_SUFFIX}" STATIC ${MICRO_READER_SOURCE})
+target_link_libraries("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC luci_micro_circle_schema)
+
+target_include_directories("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC "${GENERATED_INCLUDE_DIR}")
+target_include_directories("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
diff --git a/onert-micro/luci-interpreter/src/core/reader/CircleMicroReader.cpp b/onert-micro/luci-interpreter/src/core/reader/CircleMicroReader.cpp
new file mode 100644
index 000000000..06d93f27f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/core/reader/CircleMicroReader.cpp
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+#include "luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+
+// TODO check can we remove it
+DataType luci_datatype(const circle::TensorType type)
+{
+  switch (type)
+  {
+    case circle::TensorType_FLOAT32:
+      return DataType::FLOAT32;
+    case circle::TensorType_FLOAT16:
+      return DataType::FLOAT16;
+    case circle::TensorType_INT32:
+      return DataType::S32;
+    case circle::TensorType_UINT8:
+      return DataType::U8;
+    case circle::TensorType_INT64:
+      return DataType::S64;
+    case circle::TensorType_BOOL:
+      return DataType::BOOL;
+    case circle::TensorType_INT16:
+      return DataType::S16;
+    case circle::TensorType_COMPLEX64:
+      break;
+    case circle::TensorType_INT8:
+      return DataType::S8;
+    default:
+      break;
+  }
+  assert(false);
+  return DataType::Unknown;
+}
+
+FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
+{
+  switch (type)
+  {
+    case circle::ActivationFunctionType::ActivationFunctionType_NONE:
+      return FusedActFunc::NONE;
+    case circle::ActivationFunctionType::ActivationFunctionType_RELU:
+      return FusedActFunc::RELU;
+    case circle::ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
+      return FusedActFunc::RELU_N1_TO_1;
+    case circle::ActivationFunctionType::ActivationFunctionType_RELU6:
+      return FusedActFunc::RELU6;
+    case circle::ActivationFunctionType::ActivationFunctionType_TANH:
+      return FusedActFunc::TANH;
+    case circle::ActivationFunctionType::ActivationFunctionType_SIGN_BIT:
+      return FusedActFunc::SIGN_BIT;
+    default:
+      break;
+  }
+  assert(false);
+  return FusedActFunc::UNDEFINED;
+}
+
+Padding luci_padding(const circle::Padding padding)
+{
+  switch (padding)
+  {
+    case circle::Padding::Padding_SAME:
+      return Padding::SAME;
+    case circle::Padding::Padding_VALID:
+      return Padding::VALID;
+  }
+  assert(false);
+  return Padding::UNDEFINED;
+}
+
+MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode)
+{
+  switch (mode)
+  {
+    case circle::MirrorPadMode::MirrorPadMode_REFLECT:
+      return MirrorPadMode::REFLECT;
+    case circle::MirrorPadMode::MirrorPadMode_SYMMETRIC:
+      return MirrorPadMode::SYMMETRIC;
+  }
+  assert(false);
+  return MirrorPadMode::UNDEFINED;
+}
+
+circle::BuiltinOperator CircleReader::builtin_code(const circle::Operator *op) const
+{
+  assert(op != nullptr);
+
+  const auto op_codes = opcodes();
+  uint32_t index = op->opcode_index();
+  assert(index < op_codes.size());
+  const auto opcode = op_codes[index];
+  assert(opcode != nullptr);
+
+  return circle::builtin_code_neutral(opcode);
+}
+
+bool CircleReader::parse(const circle::Model *model)
+{
+  assert(model != nullptr);
+
+  // for direct pointer access
+  _model = model;
+
+  return true;
+}
+
+bool CircleReader::select_subgraph(uint32_t sgindex)
+{
+  if (num_subgraph() <= sgindex)
+  {
+    assert(false);
+    return false;
+  }
+
+  // for direct pointer access
+  auto subgraphs = _model->subgraphs();
+  assert(subgraphs != nullptr);
+
+  _current_subgraph = subgraphs->Get(sgindex);
+  assert(_current_subgraph != nullptr);
+
+  _current_subgraph_index = sgindex;
+
+  return true;
+}
+
+template <typename T>
+VectorWrapper<T>::VectorWrapper(const flatbuffers::Vector<T> *ptr) : _vector(ptr)
+{
+  // Do nothing
+}
+
+template <typename T> uint32_t VectorWrapper<T>::size() const
+{
+  return null() ? 0 : _vector->size();
+}
+
+template <typename T> const T *VectorWrapper<T>::data() const
+{
+  return null() ? nullptr : _vector->data();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::begin() const
+{
+  return null() ? iterator(nullptr, 0) : _vector->begin();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::end() const
+{
+  return null() ? begin() : _vector->end();
+}
+
+template <typename T> typename VectorWrapper<T>::value_type VectorWrapper<T>::at(uint32_t i) const
+{
+  if (i >= size())
+  {
+    // TODO find better error message
+    assert(false && "Access to prohibited vector element");
+  }
+
+  return _vector->Get(i);
+}
+
+template <typename T>
+typename VectorWrapper<T>::value_type VectorWrapper<T>::operator[](uint32_t i) const
+{
+  return at(i);
+}
+
+template <typename T> bool VectorWrapper<T>::null() const { return _vector == nullptr; }
+template <typename T> bool VectorWrapper<T>::empty() const { return size() == 0; }
+
+#define REGISTER_WRAPPER(T) template class VectorWrapper<T>
+REGISTER_WRAPPER(flatbuffers::Offset<circle::SubGraph>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Buffer>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Tensor>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Operator>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::OperatorCode>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Metadata>);
+REGISTER_WRAPPER(int32_t);
+REGISTER_WRAPPER(uint8_t);
+#undef REGISTER_WRAPPER
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/core/reader/CircleMicroReaderHelper.cpp b/onert-micro/luci-interpreter/src/core/reader/CircleMicroReaderHelper.cpp
new file mode 100644
index 000000000..8cc0058d7
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/core/reader/CircleMicroReaderHelper.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  if (opcode->deprecated_builtin_code() == 127)
+  {
+    assert(opcode->builtin_code() >= 127);
+    return opcode->builtin_code();
+  }
+  // There was no 255(-1) value in v0.3
+  assert(opcode->deprecated_builtin_code() != -1);
+  return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return (::circle::BuiltinOperator_MIN <= code && code <= ::circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+} // namespace circle
diff --git a/onert-micro/luci-interpreter/src/kernels/Abs.cpp b/onert-micro/luci-interpreter/src/kernels/Abs.cpp
new file mode 100644
index 000000000..98acd13e2
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Abs.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "PALAbs.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleAbs(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) == Tensor::num_dims(output));
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(input) == Tensor::num_elements(output));
+}
+
+void execute_kernel_CircleAbs(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  const uint8_t *input_data = runtime_graph->getDataByTensor(input);
+  uint8_t *output_data = runtime_graph->getDataByTensor(output);
+
+  if (is_inplace)
+  {
+    output_data = const_cast<uint8_t *>(input_data);
+  }
+
+  assert(input_data != nullptr);
+  assert(output_data != nullptr);
+
+  const int flat_size = kernels::getTensorRuntimeShape(input, runtime_graph).flatSize();
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Abs(flat_size, kernels::getTensorData<float>(input_data),
+                                kernels::getTensorData<float>(output_data));
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+
+  if (is_inplace)
+  {
+    runtime_graph->makeInplaceOperation(input, output);
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Abs.test.cpp b/onert-micro/luci-interpreter/src/kernels/Abs.test.cpp
new file mode 100644
index 000000000..1e44e2f6c
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Abs.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/abs/FloatAbsKernel.h"
+#include "luci_interpreter/test_models/abs/NegAbsKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class AbsTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkAbsKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(AbsTest, Float_P)
+{
+  test_kernel::TestDataFloatAbs test_data_kernel;
+  std::vector<float> output_data_vector = checkAbsKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(AbsTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchAbsKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(AbsTest, Input_output_shape_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputShapeMismatchAbsKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Add.cpp b/onert-micro/luci-interpreter/src/kernels/Add.cpp
new file mode 100644
index 000000000..6e0398cc7
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Add.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+#include "PALAdd.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+
+#ifndef DIS_QUANT
+  if (Tensor::element_type(kernel.input1()) == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::zero_points(kernel.input1()).size() == 1 &&
+                           Tensor::zero_points(kernel.input2()).size() == 1);
+    LUCI_INTERPRETER_CHECK(Tensor::zero_point(kernel.input1()) == 0 &&
+                           Tensor::zero_point(kernel.input2()) == 0 &&
+                           Tensor::zero_point(kernel.output()) == 0);
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *options = cur_op->builtin_options_as_AddOptions();
+
+  luci_interpreter::RuntimeShape input_shape1 =
+    kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph);
+  luci_interpreter::RuntimeShape input_shape2 =
+    kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      auto tiso_func = luci_interpreter_pal::Add<float>;
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<float>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                              std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                       options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+#endif // DIS_FLOAT
+    case DataType::S64:
+    {
+      auto tiso_func = luci_interpreter_pal::Add<int64_t>;
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<int64_t>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                                std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                         options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+    case DataType::S32:
+    {
+      auto tiso_func = luci_interpreter_pal::Add<int32_t>;
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<int32_t>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                                std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                         options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Add.test.cpp b/onert-micro/luci-interpreter/src/kernels/Add.test.cpp
new file mode 100644
index 000000000..6df81d3fe
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Add.test.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/add/FloatAddKernel.h"
+#include "luci_interpreter/test_models/add/IntAddKernel.h"
+#include "luci_interpreter/test_models/add/NegAddKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class AddTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkAddKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(AddTest, Float_P)
+{
+  // No broadcast
+  {
+    const bool is_with_broadcast = false;
+    test_kernel::TestDataFloatAdd test_data_float_add_no_broadcasting(is_with_broadcast);
+    std::vector<float> output_data_vector = checkAddKernel(&test_data_float_add_no_broadcasting);
+    EXPECT_THAT(output_data_vector,
+                kernels::testing::FloatArrayNear(
+                  test_data_float_add_no_broadcasting.get_output_data_by_index(0), 0.0001f));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestDataFloatAdd test_data_float_add_with_broadcasting(is_with_broadcast);
+    std::vector<float> output_data_vector = checkAddKernel(&test_data_float_add_with_broadcasting);
+    EXPECT_THAT(output_data_vector,
+                kernels::testing::FloatArrayNear(
+                  test_data_float_add_with_broadcasting.get_output_data_by_index(0), 0.0001f));
+  }
+}
+
+TEST_F(AddTest, INT64_P)
+{
+  // No broadcast
+  {
+    const bool is_with_broadcast = false;
+    test_kernel::TestData64IntAdd test_data_int64_add_no_broadcasting(is_with_broadcast);
+    const auto output_data_vector = checkAddKernel(&test_data_int64_add_no_broadcasting);
+    EXPECT_THAT(output_data_vector,
+                test_data_int64_add_no_broadcasting.get_output_data_by_index(0));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestData64IntAdd test_data_int64_add_with_broadcasting(is_with_broadcast);
+    const auto output_data_vector = checkAddKernel(&test_data_int64_add_with_broadcasting);
+    EXPECT_THAT(output_data_vector,
+                test_data_int64_add_with_broadcasting.get_output_data_by_index(0));
+  }
+}
+
+TEST_F(AddTest, INT32_P)
+{
+  // No broadcast
+  {
+    const bool is_with_broadcast = false;
+    test_kernel::TestData32IntAdd test_data_int32_add_no_broadcasting(is_with_broadcast);
+    const auto output_data_vector = checkAddKernel<int32_t>(&test_data_int32_add_no_broadcasting);
+    EXPECT_THAT(output_data_vector,
+                test_data_int32_add_no_broadcasting.get_output_data_by_index(0));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestData32IntAdd test_data_int32_add_with_broadcasting(is_with_broadcast);
+    const auto output_data_vector = checkAddKernel<int32_t>(&test_data_int32_add_with_broadcasting);
+    EXPECT_THAT(output_data_vector,
+                test_data_int32_add_with_broadcasting.get_output_data_by_index(0));
+  }
+}
+
+TEST_F(AddTest, Input_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputMismatchAddKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(AddTest, No_quant_params_NEG)
+{
+  test_kernel::NegTestDataNoQuantParamsS16AddKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add tests for U8 and S16
+// TODO: add tests for inplace optimizations for all types
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/AddN.cpp b/onert-micro/luci-interpreter/src/kernels/AddN.cpp
new file mode 100644
index 000000000..0df3c448b
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/AddN.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALAddN.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+template <typename T>
+void evalGeneric(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto output_index = cur_op->outputs()->operator[](0);
+  assert(output_index != -1);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const auto input_sizes = cur_op->inputs()->size();
+
+  auto input1_index = cur_op->inputs()->operator[](0);
+  const auto *tensor1 = runtime_graph->getCircleTensorByIndex(input1_index);
+
+  const int flat_size = Tensor::num_elements(tensor1);
+
+  std::vector<const T *> all_input_data;
+  for (int32_t i = 0; i < input_sizes; ++i)
+  {
+    auto input_index = cur_op->inputs()->operator[](i);
+    const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+
+    const auto *tensor_data = runtime_graph->getDataByTensor(tensor);
+    if (tensor_data == nullptr)
+      tensor_data = runtime_graph->getConstDataByTensor(tensor);
+
+    auto *data = reinterpret_cast<const T *>(tensor_data);
+    all_input_data.push_back(data);
+  }
+
+  auto *output_data = reinterpret_cast<T *>(runtime_graph->getDataByTensor(output));
+
+  luci_interpreter_pal::AddN(flat_size, input_sizes, all_input_data.data(), output_data);
+}
+
+} // namespace
+
+void configure_kernel_CircleAddN(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const int num_inputs = cur_op->inputs()->size();
+
+  LUCI_INTERPRETER_CHECK(num_inputs >= 2);
+  LUCI_INTERPRETER_CHECK(cur_op->outputs()->size() == 1);
+
+  const auto input1_index = cur_op->inputs()->operator[](0);
+  assert(input1_index != -1);
+
+  const auto input1_tensor = runtime_graph->getCircleTensorByIndex(input1_index);
+  assert(input1_tensor != nullptr);
+
+  for (int i = 1; i < num_inputs; ++i)
+  {
+    const auto input_index = cur_op->inputs()->operator[](i);
+    assert(input_index != -1);
+
+    const auto input_tensor = runtime_graph->getCircleTensorByIndex(input_index);
+    assert(input_tensor != nullptr);
+
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input1_tensor) ==
+                           Tensor::element_type(input_tensor));
+    LUCI_INTERPRETER_CHECK(Tensor::num_dims(input1_tensor) == Tensor::num_dims(input_tensor));
+    LUCI_INTERPRETER_CHECK(Tensor::num_elements(input1_tensor) ==
+                           Tensor::num_elements(input_tensor));
+  }
+}
+
+void execute_kernel_CircleAddN(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto output_index = cur_op->outputs()->operator[](0);
+  assert(output_index != -1);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  switch (Tensor::element_type(output))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      evalGeneric<float>(cur_op, runtime_graph);
+    }
+    break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/AddN.test.cpp b/onert-micro/luci-interpreter/src/kernels/AddN.test.cpp
new file mode 100644
index 000000000..cee0bbc7b
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/AddN.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/add_n/FloatAddNKernel.h"
+#include "luci_interpreter/test_models/add_n/NegAddNKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class AddNTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkAddNKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 3);
+
+  // set 1 input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set 2 input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  // set 3 input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(2));
+    std::copy(test_data_base->get_input_data_by_index(2).begin(),
+              test_data_base->get_input_data_by_index(2).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(AddNTest, Float_P)
+{
+  test_kernel::TestDataFloatAddN test_data_kernel;
+  std::vector<float> output_data_vector = checkAddNKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(AddNTest, InputTypeMismatch_NEG)
+{
+  test_kernel::TestDataInputTypeMismatchAddN test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ArgMax.cpp b/onert-micro/luci-interpreter/src/kernels/ArgMax.cpp
new file mode 100644
index 000000000..be6291c99
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ArgMax.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALArgMinMax.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleArgMax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+  // dim tensor must be a scalar or has one element
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input2()) == 0 or
+                         Tensor::num_elements(kernel.input2()) == 1);
+  // value and output type must match
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::S32);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input2()) == DataType::S32);
+}
+
+void execute_kernel_CircleArgMax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const circle::Tensor *input = kernel.input1();
+  const circle::Tensor *output = kernel.output();
+
+  kernels::TISOData tiso_data = kernel.readData();
+  const auto input_data = tiso_data.input1_data;
+  const auto axis_data = tiso_data.input2_data;
+  auto output_data = tiso_data.output_data;
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      luci_interpreter_pal::ArgMinMax(
+        kernels::getTensorRuntimeShape(input, runtime_graph),
+        kernels::getTensorData<float>(input_data), kernels::getTensorData<int32_t>(axis_data),
+        kernels::getTensorRuntimeShape(output, runtime_graph),
+        kernels::getTensorData<int32_t>(output_data), std::greater<float>());
+    }
+    break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported ArgMax input type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ArgMax.test.cpp b/onert-micro/luci-interpreter/src/kernels/ArgMax.test.cpp
new file mode 100644
index 000000000..297ae2940
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/argmax/FloatArgMaxKernel.h"
+#include "luci_interpreter/test_models/argmax/NegArgMaxKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ArgMaxTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename O>
+std::vector<O> checkKernel(test_kernel::TestDataBase<T, O> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  O *output_data = reinterpret_cast<O *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(O));
+  std::vector<O> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ArgMaxTest, MainTest_P)
+{
+  test_kernel::TestDataFloatArgMax test_data_kernel;
+  std::vector<int32_t> output_data_vector = checkKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ArgMaxTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::TestDataOutputWrongOutputArgMax test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ArgMin.cpp b/onert-micro/luci-interpreter/src/kernels/ArgMin.cpp
new file mode 100644
index 000000000..ed1249602
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ArgMin.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALArgMinMax.h"
+
+namespace luci_interpreter
+{
+
+// TODO: reduce code duplication with arg max
+void configure_kernel_CircleArgMin(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+  // dim tensor must be a scalar or has one element
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input2()) == 0 or
+                         Tensor::num_elements(kernel.input2()) == 1);
+  // value and output type must match
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::S32);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input2()) == DataType::S32);
+}
+
+void execute_kernel_CircleArgMin(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const circle::Tensor *input = kernel.input1();
+  const circle::Tensor *output = kernel.output();
+
+  kernels::TISOData tiso_data = kernel.readData();
+  const auto input_data = tiso_data.input1_data;
+  const auto axis_data = tiso_data.input2_data;
+  auto output_data = tiso_data.output_data;
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      luci_interpreter_pal::ArgMinMax(
+        kernels::getTensorRuntimeShape(input, runtime_graph),
+        kernels::getTensorData<float>(input_data), kernels::getTensorData<int32_t>(axis_data),
+        kernels::getTensorRuntimeShape(output, runtime_graph),
+        kernels::getTensorData<int32_t>(output_data), std::less<float>());
+    }
+    break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported ArgMax input type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ArgMin.test.cpp b/onert-micro/luci-interpreter/src/kernels/ArgMin.test.cpp
new file mode 100644
index 000000000..b2756637b
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ArgMin.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/argmin/FloatArgMinKernel.h"
+#include "luci_interpreter/test_models/argmin/NegArgMinKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ArgMinTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename O>
+std::vector<O> checkKernel(test_kernel::TestDataBase<T, O> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  O *output_data = reinterpret_cast<O *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(O));
+  std::vector<O> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ArgMinTest, MainTest_P)
+{
+  test_kernel::TestDataFloatArgMin test_data_kernel;
+  std::vector<int32_t> output_data_vector = checkKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ArgMinTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::TestDataOutputWrongOutputArgMin test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/AveragePool2D.cpp b/onert-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
new file mode 100644
index 000000000..ccdda8406
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Utils.h"
+#include "PALAveragePool2D.h"
+
+namespace luci_interpreter
+{
+
+// TODO: reduce code duplication with MaxPool2D
+void configure_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
+                                          BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+  assert(Tensor::num_dims(input) == 4);
+}
+
+void execute_kernel_CircleAveragePool2D(const circle::Operator *cur_op,
+                                        BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const auto *options = cur_op->builtin_options_as_Pool2DOptions();
+
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t input_width = Tensor::dim(input, 2);
+
+  const int32_t output_height = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_height, options->filter_height(), options->stride_h());
+  const int32_t output_width = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_width, options->filter_width(), options->stride_w());
+
+  const auto padding_height = kernels::computePadding(options->stride_h(), 1, input_height,
+                                                      options->filter_height(), output_height);
+  const auto padding_width = kernels::computePadding(options->stride_w(), 1, input_width,
+                                                     options->filter_width(), output_width);
+
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  float activation_min{};
+  float activation_max{};
+  kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+                                    &activation_min, &activation_max);
+  luci_interpreter_pal::PoolParams params{};
+  params.padding_values.height = padding_height;
+  params.padding_values.width = padding_width;
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.filter_height = options->filter_height();
+  params.filter_width = options->filter_width();
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      luci_interpreter_pal::AveragePool(
+        params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+        kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/onert-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp
new file mode 100644
index 000000000..6bc326453
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/average_pool_2d/FloatAveragePool2DKernel.h"
+#include "luci_interpreter/test_models/average_pool_2d/NegAveragePool2DKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class AveragePool2DTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkAveragePool2DKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(AveragePool2DTest, Float_P)
+{
+  test_kernel::TestDataFloatAveragePool2D test_data_kernel;
+  std::vector<float> output_data_vector = checkAveragePool2DKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.001f));
+}
+
+TEST_F(AveragePool2DTest, InputOutputTypeMismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchAveragePool2DKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/BatchMatMul.cpp b/onert-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
new file mode 100644
index 000000000..065e444f6
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+  tflite::RuntimeShape swapped_shape(shape);
+  const int32_t dims = shape.DimensionsCount();
+  swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+  swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+  return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+                         Tensor *y_tmp, const BatchMatMulParams &params)
+  : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+  auto lhs = x();
+  auto rhs = y();
+  auto adj_x = params().adj_x;
+  auto adj_y = params().adj_y;
+
+  // TODO Support non-float types
+  if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+    assert(false && "Unsupported type.");
+
+  LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+  auto lhs_rank = lhs->shape().num_dims();
+  auto rhs_rank = rhs->shape().num_dims();
+  LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+  LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+  auto lhs_scratchpad = temp_lhs();
+  auto rhs_scratchpad = temp_rhs();
+  luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+                                              getTensorShape(rhs));
+
+  auto output_rank = std::max(lhs_rank, rhs_rank);
+
+  auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+  auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+  // Ensure any batch dimensions obey broacasting rules.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    if (lhs_dim != rhs_dim)
+    {
+      if (lhs_dim != 1)
+      {
+        LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+      }
+    }
+  }
+
+  // Ensure other dimensions work for matrix multiplication.
+  int accum_dim_lhs =
+    adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+  int accum_dim_rhs =
+    adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+  LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+  Shape output_shape(output_rank);
+  // Fill in any broadcast dimensions.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    int broadcast_dim = lhs_dim;
+    if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+    {
+      broadcast_dim = rhs_dim;
+    }
+    output_shape.dim(i) = broadcast_dim;
+  }
+  // Fill in the matmul dimensions.
+  int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+  int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+  output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+  output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+  output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+  tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+  tflite::RuntimeShape shape(getTensorShape(tensor_in));
+  tflite::TransposeParams params;
+  int rank = shape.DimensionsCount();
+  params.perm_count = rank;
+  for (int i = 0; i < rank - 2; ++i)
+  {
+    params.perm[i] = i;
+  }
+  // Transpose the last two dimensions.
+  params.perm[rank - 2] = rank - 1;
+  params.perm[rank - 1] = rank - 2;
+  transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+  transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+  switch (tensor_in->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+                                       transposed_shape, getTensorData<float>(tensor_out));
+      break;
+    default:
+      assert(false && "Only suppport fp32 BatchMatMul for now.");
+  }
+}
+
+void BatchMatMul::execute() const
+{
+  auto lhs = x();
+  auto rhs = y();
+
+  bool adj_x = params().adj_x;
+  bool adj_y = params().adj_y;
+
+  auto orig_lhs_shape = getTensorShape(lhs);
+  auto orig_rhs_shape = getTensorShape(rhs);
+
+  auto rhs_tensor = adj_y ? rhs : temp_rhs();
+  auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+  if (not adj_y)
+  {
+    TransposeRowsColumns(rhs, temp_rhs());
+  }
+  if (adj_x)
+  {
+    TransposeRowsColumns(lhs, temp_lhs());
+  }
+  tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+  tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+                                        getTensorData<float>(lhs_tensor), getTensorShape(output()),
+                                        getTensorData<float>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/BatchMatMul.h b/onert-micro/luci-interpreter/src/kernels/BatchMatMul.h
new file mode 100644
index 000000000..744f49795
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/BatchMatMul.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchMatMul : public KernelWithParams<BatchMatMulParams>
+{
+public:
+  BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp,
+              const BatchMatMulParams &params);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  Tensor *temp_lhs() const { return _outputs[1]; }
+  Tensor *temp_rhs() const { return _outputs[2]; }
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
diff --git a/onert-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/onert-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp
new file mode 100644
index 000000000..edfa3a685
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class BatchMatMulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(BatchMatMulTest, Float)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = true;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = true;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_BatchSizeTwo)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632.,
+                              767., 800., 833., 866.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_DiffBatch)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4}));
+}
+
+TEST_F(BatchMatMulTest, Invalid_Shape_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Batch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank_NEG)
+{
+  Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank2_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, TypeMisMatch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::U8, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
new file mode 100644
index 000000000..9ebe28991
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                               Tensor *output)
+  : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *crops_data = crops()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+  for (int i = 0; i < spatial_dims_num * 2; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+  }
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+    output_batch_size = output_batch_size / block_shape_data[i];
+    output_shape.dim(i + 1) =
+      input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+  }
+
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.h b/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.h
new file mode 100644
index 000000000..57703ea5d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchToSpaceND : public Kernel
+{
+public:
+  BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *crops() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
diff --git a/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
new file mode 100644
index 000000000..52647a763
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor crops_tensor =
+    makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class BatchToSpaceNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes);
+
+TYPED_TEST(BatchToSpaceNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2},
+                   /*output_shape=*/{1, 4, 4, 1},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                   /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0},
+                   /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/BinaryOpCommon.h b/onert-micro/luci-interpreter/src/kernels/BinaryOpCommon.h
new file mode 100644
index 000000000..1fd27ea63
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/BinaryOpCommon.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+
+#include "TISOKernel.h"
+#include "ProcessBroadcastShapes.h"
+
+#include "Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(luci_interpreter_pal::ArithmeticParams &p, Activation act)
+{
+  static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+  if (std::is_same<T, float>::value)
+    calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+  if (std::is_same<T, int32_t>::value)
+    calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+  else
+    calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
+} // namespace
+
+template <typename T, typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t,
+          typename Options = nullptr_t>
+void evalTISOKernel(TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func,
+                    kernels::TISOKernel *kernel, kernels::TISOData *kernel_data,
+                    const Options *options, RuntimeShape &&input_shape_1,
+                    RuntimeShape &&input_shape_2)
+{
+  const auto *output = kernel->output();
+
+  luci_interpreter_pal::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, luci_actfunc(options->fused_activation_function()));
+
+  const bool need_broadcast =
+    luci_interpreter_pal::ProcessBroadcastShapes(input_shape_1, input_shape_2, &params);
+
+  if (need_broadcast)
+  {
+    tiso_broadcast_func(params, input_shape_1, kernels::getTensorData<T>(kernel_data->input1_data),
+                        input_shape_2, kernels::getTensorData<T>(kernel_data->input2_data),
+                        kernels::getTensorShape(output),
+                        kernels::getTensorData<T>(kernel_data->output_data));
+  }
+  else
+  {
+    const int flat_size = input_shape_1.flatSize();
+    tiso_func(params, flat_size, kernels::getTensorData<T>(kernel_data->input1_data),
+              kernels::getTensorData<T>(kernel_data->input2_data),
+              kernels::getTensorData<T>(kernel_data->output_data));
+  }
+}
+
+template <typename T, typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t,
+          typename Options = nullptr_t>
+void evalTISOInplaceKernel(TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func,
+                           kernels::TISOKernel *kernel, const Options *options,
+                           RuntimeShape &&input_shape_1, RuntimeShape &&input_shape_2)
+{
+  uint8_t *inplace_data_ptr = nullptr;
+  circle::Tensor *input_inplace_tensor = nullptr;
+
+  kernels::TISOData kernel_data = kernel->readInplaceData(inplace_data_ptr, input_inplace_tensor);
+
+  evalTISOKernel<T, TISOFunc, TISOBroadcastFunc, Options>(
+    tiso_func, tiso_broadcast_func, kernel, &kernel_data, options, std::move(input_shape_1),
+    std::move(input_shape_2));
+
+  BaseRuntimeGraph *runtime_graph = kernel->runtime_graph();
+
+  runtime_graph->makeInplaceOperation(input_inplace_tensor, kernel->output());
+  if (input_inplace_tensor == kernel->input1())
+  {
+    runtime_graph->makeInplaceOperation(kernel->input2(), nullptr);
+  }
+  else
+  {
+    runtime_graph->makeInplaceOperation(kernel->input1(), nullptr);
+  }
+}
+
+#ifndef DIS_QUANT
+template <typename T, typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t,
+          typename Options = nullptr_t>
+void evalTISOQuantizedKernel(TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func,
+                             kernels::TISOKernel *kernel, kernels::TISOData *kernel_data,
+                             const Options *options)
+{
+  const auto *input1 = kernel->input1();
+  const auto *input2 = kernel->input2();
+  const auto *output = kernel->output();
+
+  const auto input1_scale = static_cast<double>(Tensor::scale(input1));
+  const auto input2_scale = static_cast<double>(Tensor::scale(input2));
+  const auto output_scale = static_cast<double>(Tensor::scale(output));
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  kernels::quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier,
+                                               &input1_shift);
+  kernels::quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier,
+                                               &input2_shift);
+  kernels::quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier,
+                                               &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  luci_interpreter_pal::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -Tensor::zero_point(input1); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -Tensor::zero_point(input2); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = Tensor::zero_point(output);
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = luci_interpreter_pal::ProcessBroadcastShapes(
+    kernels::getTensorShape(input1), kernels::getTensorShape(input2), &params);
+
+  if (need_broadcast)
+  {
+    tiso_broadcast_func(
+      params, kernels::getTensorShape(input1), kernels::getTensorData<T>(kernel_data->input1_data),
+      kernels::getTensorShape(input2), kernels::getTensorData<T>(kernel_data->input2_data),
+      kernels::getTensorShape(output), kernels::getTensorData<T>(kernel_data->output_data));
+  }
+  else
+  {
+    tiso_func(params, kernels::getTensorShape(input1),
+              kernels::getTensorData<uint8_t>(kernel_data->input1_data),
+              kernels::getTensorShape(input2), kernels::getTensorData<T>(kernel_data->input2_data),
+              kernels::getTensorShape(output), kernels::getTensorData<T>(kernel_data->output_data));
+  }
+}
+
+template <typename T, typename TISOFunc = nullptr_t, typename TISOBroadcastFunc = nullptr_t,
+          typename Options = nullptr_t>
+void evalTISOInplaceQuantizedKernel(TISOFunc tiso_func, TISOBroadcastFunc tiso_broadcast_func,
+                                    kernels::TISOKernel *kernel, const Options *options)
+{
+  uint8_t *inplace_data_ptr = nullptr;
+  circle::Tensor *input_inplace_tensor = nullptr;
+
+  kernels::TISOData kernel_data = kernel->readInplaceData(inplace_data_ptr, input_inplace_tensor);
+
+  evalTISOQuantizedKernel<T, TISOFunc, TISOBroadcastFunc, Options>(tiso_func, tiso_broadcast_func,
+                                                                   kernel, &kernel_data, options);
+
+  kernel->runtime_graph()->makeInplaceOperation(input_inplace_tensor, kernel->output());
+  if (input_inplace_tensor == kernel->input1())
+  {
+    kernel->runtime_graph()->makeInplaceOperation(kernel->input2(), nullptr);
+  }
+  else
+  {
+    kernel->runtime_graph()->makeInplaceOperation(kernel->input1(), nullptr);
+  }
+}
+
+#endif // DIS_QUANT
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Builders.h b/onert-micro/luci-interpreter/src/kernels/Builders.h
new file mode 100644
index 000000000..a49f71e6c
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Builders.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+#include "core/RuntimeModule.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+#ifdef USE_STATIC_ALLOC
+using BaseRuntimeGraph = StaticRuntimeGraph;
+#else
+using BaseRuntimeGraph = RuntimeGraph;
+#endif // USE_STATIC_ALLOC
+} // namespace
+
+#define REGISTER_KERNEL(builtin_operator, name)                        \
+  void configure_kernel_Circle##name(const circle::Operator *cur_op,   \
+                                     BaseRuntimeGraph *runtime_graph); \
+                                                                       \
+  void execute_kernel_Circle##name(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
diff --git a/onert-micro/luci-interpreter/src/kernels/CMakeLists.txt b/onert-micro/luci-interpreter/src/kernels/CMakeLists.txt
new file mode 100644
index 000000000..66c0a9e9d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/CMakeLists.txt
@@ -0,0 +1,47 @@
+set(SOURCES
+        BinaryOpCommon.h
+        Utils.h
+        Utils.cpp
+        Builders.h
+        KernelBuilder.h
+        KernelBuilder.cpp
+        SISOKernel.h
+        TISOKernel.h
+        MISOKernel.h
+        PadCommon.cpp)
+
+macro(REGISTER_KERNEL OPERATOR, NODE)
+  list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
+
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_PAL_COMMON_DIR})
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+macro(REGISTER_KERNEL OPERATOR, NODE)
+  list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test onert_micro_coverage)
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC onert_micro_coverage)
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_LOADER})
diff --git a/onert-micro/luci-interpreter/src/kernels/Cast.cpp b/onert-micro/luci-interpreter/src/kernels/Cast.cpp
new file mode 100644
index 000000000..82b187a1d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Cast.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/Utils.h"
+
+namespace
+{
+
+using namespace luci_interpreter;
+using namespace luci_interpreter::kernels;
+
+template <typename InT, typename OutT>
+void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
+{
+  std::transform(in_data, in_data + elements_count, out_data,
+                 [](InT a) { return static_cast<OutT>(a); });
+}
+
+template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
+{
+  auto const out_type = out_tensor->element_type();
+  auto const elements_count = out_tensor->shape().num_elements();
+
+  switch (out_type)
+  {
+    case DataType::U8:
+      cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
+      break;
+    case DataType::U16:
+      cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
+      break;
+    case DataType::U32:
+      cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
+      break;
+    case DataType::U64:
+      cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
+      break;
+    case DataType::S8:
+      cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
+      break;
+    case DataType::S16:
+      cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
+      break;
+    case DataType::S32:
+      cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
+      break;
+    case DataType::S64:
+      cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
+      break;
+    case DataType::FLOAT32:
+      cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
+      break;
+    case DataType::BOOL:
+      cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
+      break;
+    default:
+      assert(false && "Unsupported output type.");
+  }
+}
+
+void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
+{
+  auto in_type = in_tensor->element_type();
+
+  switch (in_type)
+  {
+    case DataType::U8:
+      cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
+      break;
+    case DataType::U16:
+      cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
+      break;
+    case DataType::U32:
+      cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
+      break;
+    case DataType::U64:
+      cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
+      break;
+    case DataType::S8:
+      cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
+      break;
+    case DataType::S16:
+      cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
+      break;
+    case DataType::S32:
+      cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
+      break;
+    case DataType::S64:
+      cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
+      break;
+    case DataType::FLOAT32:
+      cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
+      break;
+    case DataType::BOOL:
+      cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
+      break;
+    default:
+      assert(false && "Unsupported input type.");
+  }
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Cast::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() != DataType::Unknown);
+  LUCI_INTERPRETER_CHECK(output()->element_type() != DataType::Unknown);
+
+  const Shape &shape = input()->shape();
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(shape);
+}
+
+void Cast::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  cast_from_tensor_to_tensor(input(), output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Cast.h b/onert-micro/luci-interpreter/src/kernels/Cast.h
new file mode 100644
index 000000000..f0bd02037
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Cast.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CAST_H
+#define LUCI_INTERPRETER_KERNELS_CAST_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Cast : public Kernel
+{
+public:
+  Cast(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CAST_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Cast.test.cpp b/onert-micro/luci-interpreter/src/kernels/Cast.test.cpp
new file mode 100644
index 000000000..4713ad34c
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Cast.test.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data,
+           std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = getElementType<T1>();
+  constexpr DataType output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T>
+void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data,
+                 std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = loco::DataType::BOOL;
+  constexpr DataType output_type = getElementType<T>();
+  std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted;
+  for (auto elem : input_data)
+  {
+    input_data_converted.push_back(elem);
+  }
+
+  Tensor input_tensor =
+    makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T> class CastTest : public ::testing::Test
+{
+};
+
+using IntDataTypes =
+  ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
+TYPED_TEST_SUITE(CastTest, IntDataTypes);
+
+TYPED_TEST(CastTest, FloatToInt)
+{
+  Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          },
+                          /*output_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToFloat)
+{
+  Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          },
+                          /*output_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          });
+  SUCCEED();
+}
+
+template <typename T1, typename T2> void check_int()
+{
+  Check<T1, T2>(/*shape=*/{1, 1, 1, 4},
+                /*input_data=*/
+                {
+                  1, 9, 7, 3, //
+                },
+                /*output_data=*/
+                {
+                  1, 9, 7, 3, //
+                });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToInt)
+{
+  check_int<TypeParam, uint8_t>();
+  check_int<TypeParam, uint16_t>();
+  check_int<TypeParam, uint32_t>();
+  check_int<TypeParam, uint64_t>();
+  check_int<TypeParam, int8_t>();
+  check_int<TypeParam, int16_t>();
+  check_int<TypeParam, int32_t>();
+  check_int<TypeParam, int64_t>();
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToBool)
+{
+  Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           1, 0, 7, 0, //
+                         },
+                         /*output_data=*/
+                         {
+                           true, false, true, false, //
+                         });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, BoolToInt)
+{
+  CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           true, false, false, true, //
+                         },
+                         /*output_data=*/
+                         {
+                           1, 0, 0, 1, //
+                         });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToBool)
+{
+  Check<float, bool>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       1.0f, 0.0f, 7.0f, 0.0f, //
+                     },
+                     /*output_data=*/
+                     {
+                       true, false, true, false, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToFloat)
+{
+  CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       true, false, false, true, //
+                     },
+                     /*output_data=*/
+                     {
+                       1.0f, 0.0f, 0.0f, 1.0f, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToFloat)
+{
+  Check<float, float>(/*shape=*/{1, 1, 1, 4},
+                      /*input_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      },
+                      /*output_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToBool)
+{
+  CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4},
+                    /*input_data=*/
+                    {
+                      true, true, false, false, //
+                    },
+                    /*output_data=*/
+                    {
+                      true, true, false, false, //
+                    });
+  SUCCEED();
+}
+
+TEST(CastTest, UnsupportedType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::Unknown);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+  SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Concatenation.cpp b/onert-micro/luci-interpreter/src/kernels/Concatenation.cpp
new file mode 100644
index 000000000..c8c096ef9
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Concatenation.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALConcatenation.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+template <typename T>
+void evalGeneric(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(output_index != -1);
+
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const auto *options = cur_op->builtin_options_as_ConcatenationOptions();
+
+  int axis = options->axis();
+  if (axis < 0)
+    axis += Tensor::num_dims(output);
+
+  const auto input_sizes = cur_op->inputs()->size();
+
+  std::vector<const T *> all_input_data;
+  std::vector<luci_interpreter::RuntimeShape> all_shape;
+  std::vector<luci_interpreter::RuntimeShape *> all_shape_ptr;
+
+  for (int32_t i = 0; i < input_sizes; ++i)
+  {
+    auto input_index = cur_op->inputs()->operator[](i);
+    const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+
+    const auto *tensor_data = runtime_graph->getDataByTensor(tensor);
+    if (tensor_data == nullptr)
+      tensor_data = runtime_graph->getConstDataByTensor(tensor);
+
+    auto *data = reinterpret_cast<const T *>(tensor_data);
+
+    auto runtime_shape = kernels::getTensorRuntimeShape(tensor, runtime_graph);
+
+    all_input_data.push_back(data);
+    all_shape.push_back(runtime_shape);
+  }
+
+  for (luci_interpreter::RuntimeShape &shape : all_shape)
+  {
+    all_shape_ptr.push_back(&shape);
+  }
+
+  auto *output_data = reinterpret_cast<T *>(runtime_graph->getDataByTensor(output));
+
+  luci_interpreter_pal::ConcatenationParams params{};
+  params.axis = axis;
+  params.inputs_count = all_shape.size();
+  luci_interpreter_pal::Concatenation(params, all_shape_ptr.data(), all_input_data.data(),
+                                      kernels::getTensorShape(output), output_data);
+}
+
+} // namespace
+
+void configure_kernel_CircleConcatenation(const circle::Operator *cur_op,
+                                          BaseRuntimeGraph *runtime_graph)
+{
+  const int num_inputs = cur_op->inputs()->size();
+  LUCI_INTERPRETER_CHECK(num_inputs > 0);
+
+  auto input_index = cur_op->inputs()->operator[](0);
+  auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto *t0 = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto *output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const auto *params = cur_op->builtin_options_as_ConcatenationOptions();
+
+  // TODO: Support concat with fused activation function
+  LUCI_INTERPRETER_CHECK(luci_actfunc(params->fused_activation_function()) == FusedActFunc::NONE);
+
+  int axis = params->axis();
+  if (axis < 0)
+    axis += Tensor::num_dims(t0);
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis < Tensor::num_dims(t0));
+
+  for (int i = 1; i < num_inputs; ++i)
+  {
+    input_index = cur_op->inputs()->operator[](i);
+    const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(tensor) == Tensor::element_type(t0));
+    LUCI_INTERPRETER_CHECK(Tensor::num_dims(tensor) == Tensor::num_dims(t0));
+  }
+
+#ifndef DIS_QUANT
+  // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+  // should be the same
+  for (int i = 1; i < num_inputs; ++i)
+  {
+    input_index = cur_op->inputs()->operator[](i);
+    const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+    if (Tensor::element_type(tensor) == DataType::S8)
+    {
+      LUCI_INTERPRETER_CHECK(Tensor::quantized_dimension(tensor) ==
+                             Tensor::quantized_dimension(output));
+
+      LUCI_INTERPRETER_CHECK(Tensor::zero_points(tensor).size() == Tensor::scales(tensor).size());
+      LUCI_INTERPRETER_CHECK(Tensor::zero_points(tensor) == Tensor::zero_points(output));
+      LUCI_INTERPRETER_CHECK(Tensor::scales(tensor) == Tensor::scales(output));
+    }
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleConcatenation(const circle::Operator *cur_op,
+                                        BaseRuntimeGraph *runtime_graph)
+{
+  int num_inputs = cur_op->inputs()->size();
+  LUCI_INTERPRETER_CHECK(num_inputs > 0);
+
+  const auto input_index = cur_op->inputs()->operator[](0);
+  assert(input_index != -1);
+  const auto *t0 = runtime_graph->getCircleTensorByIndex(input_index);
+
+  switch (Tensor::element_type(t0))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalGeneric<float>(cur_op, runtime_graph);
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      evalGeneric<int8_t>(cur_op, runtime_graph);
+      break;
+#endif // DIS_QUANT
+    case DataType::S32:
+      evalGeneric<int32_t>(cur_op, runtime_graph);
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>(cur_op, runtime_graph);
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Concatenation.test.cpp b/onert-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
new file mode 100644
index 000000000..6f00941c0
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/concatenation/FloatConcatenationKernel.h"
+#include "luci_interpreter/test_models/concatenation/IntConcatenationKernel.h"
+#include "luci_interpreter/test_models/concatenation/NegConcatenationKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ConcatenationTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkConcatenationKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ConcatenationTest, Float_P)
+{
+  test_kernel::TestDataFloatConcatenation test_data_kernel;
+  std::vector<float> output_data_vector = checkConcatenationKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(ConcatenationTest, Int32_P)
+{
+  test_kernel::TestDataS32Concatenation test_data_kernel;
+  std::vector<int32_t> output_data_vector = checkConcatenationKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ConcatenationTest, Int64_P)
+{
+  test_kernel::TestDataS64Concatenation test_data_kernel;
+  std::vector<int64_t> output_data_vector = checkConcatenationKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ConcatenationTest, InputTypeMismatch_NEG)
+{
+  test_kernel::TestDataInputTypeMismatchConcatenation test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(ConcatenationTest, InputOutputTypeMismatch_NEG)
+{
+  test_kernel::TestDataReluConcatenation test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(ConcatenationTest, WrongAxis_NEG)
+{
+  test_kernel::TestDataWrongAxisConcatenation test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add tests for S8
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Conv2D.cpp b/onert-micro/luci-interpreter/src/kernels/Conv2D.cpp
new file mode 100644
index 000000000..75bccbcb8
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Conv2D.cpp
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALConv2d.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+int32_t compute_padding_h(const circle::Tensor *input, const circle::Tensor *filter,
+                          const circle::Conv2DOptions *options)
+{
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t filter_height = Tensor::dim(filter, 1);
+  const int32_t output_height =
+    kernels::computeOutputSize(luci_padding(options->padding()), input_height, filter_height,
+                               options->stride_h(), options->dilation_h_factor());
+
+  const auto padding_height = kernels::computePadding(
+    options->stride_h(), options->dilation_h_factor(), input_height, filter_height, output_height);
+  return padding_height;
+}
+
+int32_t compute_padding_w(const circle::Tensor *input, const circle::Tensor *filter,
+                          const circle::Conv2DOptions *options)
+{
+  const int32_t input_width = Tensor::dim(input, 2);
+  const int32_t filter_width = Tensor::dim(filter, 2);
+  const int32_t output_width =
+    kernels::computeOutputSize(luci_padding(options->padding()), input_width, filter_width,
+                               options->stride_w(), options->dilation_w_factor());
+
+  const auto padding_width = kernels::computePadding(
+    options->stride_w(), options->dilation_w_factor(), input_width, filter_width, output_width);
+
+  return padding_width;
+}
+
+#ifndef DIS_FLOAT
+
+void evalFloat(const circle::Tensor *input, const circle::Tensor *filter,
+               const circle::Tensor *bias, const circle::Tensor *output,
+               const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  float activation_min{};
+  float activation_max{};
+  kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+                                    &activation_min, &activation_max);
+
+  luci_interpreter_pal::ConvParams params{};
+  params.padding_values.height = compute_padding_h(input, filter, options);
+  params.padding_values.width = compute_padding_w(input, filter, options);
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.dilation_height_factor = options->dilation_h_factor();
+  params.dilation_width_factor = options->dilation_w_factor();
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *filter_data = runtime_graph->getConstDataByTensor(filter);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  int32_t input_shape[kMaxSmallSize];
+  kernels::getTensorDims(input, runtime_graph, input_shape);
+
+  int32_t filter_shape[kMaxSmallSize];
+  kernels::getTensorDims(filter, runtime_graph, filter_shape);
+
+  int32_t output_shape[kMaxSmallSize];
+  kernels::getTensorDims(output, runtime_graph, output_shape);
+
+  luci_interpreter_pal::Conv(params, input_shape, kernels::getTensorData<float>(input_data),
+                             filter_shape, kernels::getTensorData<float>(filter_data),
+                             kernels::getTensorData<float>(bias_data), output_shape,
+                             kernels::getTensorData<float>(output_data));
+}
+
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *filter,
+                   const circle::Tensor *bias, const circle::Tensor *output,
+                   const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_scale = static_cast<double>(Tensor::scale(input));
+  const auto filter_scale = static_cast<double>(Tensor::scale(filter));
+  const auto output_scale = static_cast<double>(Tensor::scale(output));
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  luci_interpreter_pal::ConvParams params{};
+  params.padding_values.height = compute_padding_h(input, filter, options);
+  params.padding_values.width = compute_padding_w(input, filter, options);
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.dilation_height_factor = options->dilation_h_factor();
+  params.dilation_width_factor = options->dilation_w_factor();
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -Tensor::zero_point(input);    // Note the '-'.
+  params.weights_offset = -Tensor::zero_point(filter); // Note the '-'.
+  params.output_offset = Tensor::zero_point(output);
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *filter_data = runtime_graph->getConstDataByTensor(filter);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  int32_t input_shape[kMaxSmallSize];
+  kernels::getTensorDims(input, runtime_graph, input_shape);
+
+  int32_t filter_shape[kMaxSmallSize];
+  kernels::getTensorDims(filter, runtime_graph, filter_shape);
+
+  int32_t output_shape[kMaxSmallSize];
+  kernels::getTensorDims(output, runtime_graph, output_shape);
+
+  luci_interpreter_pal::Conv(params, input_shape, kernels::getTensorData<uint8_t>(input_data),
+                             filter_shape, kernels::getTensorData<uint8_t>(filter_data),
+                             kernels::getTensorData<int32_t>(bias_data), output_shape,
+                             kernels::getTensorData<uint8_t>(output_data));
+}
+
+void evalQuantizedPerChannel(const circle::Tensor *input, const circle::Tensor *filter,
+                             const circle::Tensor *bias, const circle::Tensor *output,
+                             const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  auto *raw_input_data = runtime_graph->getDataByTensor(input);
+  auto *raw_output_data = runtime_graph->getDataByTensor(output);
+
+  auto *raw_filter_data = runtime_graph->getConstDataByTensor(filter);
+  auto *raw_bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  const auto *input_data = kernels::getTensorData<uint8_t>(raw_input_data);
+  const auto *filter_data = kernels::getTensorData<uint8_t>(raw_filter_data);
+  const auto *bias_data = kernels::getTensorData<int32_t>(raw_bias_data);
+  auto *output_data = kernels::getTensorData<uint8_t>(raw_output_data);
+
+  const int32_t batches = Tensor::dim(input, 0);
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t input_width = Tensor::dim(input, 2);
+  const int32_t input_depth = Tensor::dim(input, 3);
+  const int32_t output_depth = Tensor::dim(filter, 0);
+  const int32_t filter_height = Tensor::dim(filter, 1);
+  const int32_t filter_width = Tensor::dim(filter, 2);
+  const int32_t output_height = Tensor::dim(output, 1);
+  const int32_t output_width = Tensor::dim(output, 2);
+
+  const int32_t stride_height = options->stride_h();
+  const int32_t stride_width = options->stride_w();
+  const int32_t dilation_height_factor = options->dilation_h_factor();
+  const int32_t dilation_width_factor = options->dilation_w_factor();
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale = kernels::getQuantizedConvolutionMultiplers(
+    Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
+
+  const std::vector<kernels::ChannelQuantMultipliers> multipliers_raw =
+    kernels::quantizeMultipliers(effective_output_scale);
+  kernels::BroadcastableWrapper<kernels::ChannelQuantMultipliers> quant_multipliers(
+    multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin =
+            out_y * stride_height - compute_padding_h(input, filter, options);
+          const int32_t in_x_origin =
+            out_x * stride_width - compute_padding_w(input, filter, options);
+          int32_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const uint8_t input_val =
+                    input_data[kernels::calcOffset(input, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[kernels::calcOffset(filter, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int32_t>(input_val - Tensor::zero_point(input)) *
+                         static_cast<int32_t>(filter_val - Tensor::zero_points(filter)[out_c]);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = luci_interpreter_pal::multiplyByQuantizedMultiplier(
+            acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+
+          scaled_acc += Tensor::zero_point(output);
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+          output_data[kernels::calcOffset(output, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+#endif // DIS_QUANT
+
+} // namespace
+
+void configure_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto filter_index = cur_op->inputs()->operator[](1);
+  const auto bias_index = cur_op->inputs()->operator[](2);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(filter_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto filter = runtime_graph->getCircleTensorByIndex(filter_index);
+  const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(filter != nullptr);
+
+  auto filter_data = runtime_graph->getConstDataByTensor(filter);
+
+  assert(filter_data != nullptr);
+
+  const auto *options = cur_op->builtin_options_as_Conv2DOptions();
+
+  if (Tensor::element_type(input) == DataType::FLOAT32 &&
+      Tensor::element_type(filter) == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::FLOAT32);
+  }
+#ifndef DIS_QUANT
+  else if (Tensor::element_type(input) == DataType::U8 &&
+           Tensor::element_type(filter) == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S32);
+  }
+  else if (Tensor::element_type(input) == DataType::S8 &&
+           Tensor::element_type(filter) == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S32);
+    LUCI_INTERPRETER_CHECK(Tensor::num_dims(filter) == 4);
+    LUCI_INTERPRETER_CHECK(Tensor::scales(filter).size() ==
+                           static_cast<size_t>(Tensor::dim(filter, 0)));
+    for (auto zerop : Tensor::zero_points(filter))
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+  }
+  else if (Tensor::element_type(input) == DataType::S16 &&
+           Tensor::element_type(filter) == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S64);
+  }
+#endif // DIS_QUANT
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == Tensor::element_type(input));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) == 4 && Tensor::num_dims(filter) == 4);
+
+  const int32_t output_depth = Tensor::dim(filter, 0);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(filter, 3) == Tensor::dim(input, 3));
+
+  LUCI_INTERPRETER_CHECK(bias == nullptr ||
+                         (Tensor::num_dims(bias) == 1 && Tensor::dim(bias, 0) == output_depth));
+
+  switch (options->fused_activation_function())
+  {
+    case circle::ActivationFunctionType_NONE:
+    case circle::ActivationFunctionType_RELU:
+    case circle::ActivationFunctionType_RELU6:
+    case circle::ActivationFunctionType_RELU_N1_TO_1:
+      break;
+    default:
+      assert(false && "Unsupported fused activation");
+  }
+}
+
+void execute_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto weight_index = cur_op->inputs()->operator[](1);
+  const auto bias_index = cur_op->inputs()->operator[](2);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(weight_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+  const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(weights != nullptr);
+  assert(output != nullptr);
+
+  const auto *options = cur_op->builtin_options_as_Conv2DOptions();
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      if (Tensor::element_type(weights) == DataType::FLOAT32)
+      {
+        evalFloat(input, weights, bias, output, options, runtime_graph);
+        break;
+      }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::U8:
+      if (Tensor::scales(weights).size() == 1)
+      {
+        evalQuantized(input, weights, bias, output, options, runtime_graph);
+      }
+      else if (Tensor::scales(weights).size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(Tensor::num_dims(weights) == 4);
+        LUCI_INTERPRETER_CHECK(Tensor::scales(weights).size() ==
+                               static_cast<size_t>(Tensor::dim(weights, 0)));
+        evalQuantizedPerChannel(input, weights, bias, output, options, runtime_graph);
+      }
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Conv2D.test.cpp b/onert-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
new file mode 100644
index 000000000..c373bd28f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/conv2d/FloatConv2DKernel.h"
+#include "luci_interpreter/test_models/conv2d/U8Conv2DKernel.h"
+#include "luci_interpreter/test_models/conv2d/NegConv2DKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class Conv2DTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkConv2DKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(Conv2DTest, Float_P)
+{
+  test_kernel::TestDataFloatConv2D test_data_kernel;
+  std::vector<float> output_data_vector = checkConv2DKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(Conv2DTest, U8_P)
+{
+  test_kernel::TestDataU8Conv2D test_data_kernel;
+  std::vector<uint8_t> output_data_vector = checkConv2DKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(Conv2DTest, Input_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputMismatchConv2DKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(Conv2DTest, Wrong_bias_type_NEG)
+{
+  test_kernel::NegTestDataWrongBiasTypeConv2DKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(Conv2DTest, Invalid_input_type_NEG)
+{
+  test_kernel::NegTestDataInvalidInputTypeConv2DKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/DepthToSpace.cpp b/onert-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
new file mode 100644
index 000000000..937958ba8
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpace.h"
+#include "Utils.h"
+#include "PALDepthToSpace.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
+  : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+{
+}
+
+void DepthToSpace::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8)
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  const int32_t input_channels = input()->shape().dim(3);
+  int32_t output_height = input_height * block_size;
+  int32_t output_width = input_width * block_size;
+  int32_t output_channels = input_channels / block_size / block_size;
+
+  LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
+  LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
+  LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = output_channels;
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void DepthToSpace::execute() const
+{
+  tflite::DepthToSpaceParams op_params;
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported Type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/DepthToSpace.h b/onert-micro/luci-interpreter/src/kernels/DepthToSpace.h
new file mode 100644
index 000000000..63ce37610
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/DepthToSpace.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthToSpace : public KernelWithParams<DepthToSpaceParams>
+{
+public:
+  DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
diff --git a/onert-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/onert-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp
new file mode 100644
index 000000000..88e6e07f1
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthToSpace.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class DepthToSpaceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes);
+
+TYPED_TEST(DepthToSpaceTest, SimpleCase)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+  std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
+  std::vector<int32_t> output_shape{1, 2, 4, 1};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(DepthToSpaceTest, InvalidInputShape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InvalidBlockSize_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 3;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..201eaf3ac
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALDepthwiseConv2d.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
+                                 Tensor *output, Tensor *scratchpad,
+                                 const DepthwiseConv2DParams &params)
+  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void DepthwiseConv2D::configure()
+{
+  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (1) | float float  float float  |
+  // (2) | float int8   float float  | hybrid
+  // (3) | uint8 uint8  int32 uint8  | quantized
+  // (4) | int8  int8   int32 int8   | quantized per channel
+  // (5) | int16 int8   int64 int16  | quantized per channel 16x8
+  //
+  // We only support (1), (3) and (4) for now, and additionally the following:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (5) | int16 int16  int64 int16  |
+  //
+  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
+  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+                           filter()->scales().size());
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+  }
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  // Filter format: [1, H, W, O].
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t channels_out = filter_shape.dim(3);
+
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == channels_out));
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
+
+  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+                                   input_height, filter_height, output_height);
+  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+                                  filter_width, output_width);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize({batches, output_height, output_width, channels_out});
+
+  tflite::DepthwiseParams params{};
+
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
+}
+
+void DepthwiseConv2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      if (filter()->element_type() == DataType::FLOAT32)
+      {
+        evalFloat();
+        break;
+      }
+      assert(false && "Unsupported type.");
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(3)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void DepthwiseConv2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          for (int m = 0; m < depth_multiplier; ++m)
+          {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - _padding_width;
+            const int in_y_origin = (out_y * stride_height) - _padding_height;
+            int32_t acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                if (is_point_inside_image)
+                {
+                  int32_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+                  int32_t filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+                  acc += (filter_val - filter()->zero_points()[output_channel]) *
+                         (input_val - input()->zero_point());
+                }
+              }
+            }
+            if (bias_data)
+            {
+              acc += bias_data[output_channel];
+            }
+            int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
+            int output_shift = quant_multipliers[output_channel].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+            scaled_acc += output()->zero_point();
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+            output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
+              static_cast<uint8_t>(scaled_acc);
+          }
+        }
+      }
+    }
+  }
+}
+
+void DepthwiseConv2D::evalQuantized() const
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto filter_scale = static_cast<double>(filter()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+
+  params.padding_type = tflite::PaddingType::kSame;
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = 1; // unused in tflite code
+  params.output_shift = 0;      // unused in tflite code
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void DepthwiseConv2D::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          for (int32_t m = 0; m < depth_multiplier; ++m)
+          {
+            const int32_t out_c = m + in_c * depth_multiplier;
+            const int32_t in_y_origin = out_y * stride_height - _padding_height;
+            const int32_t in_x_origin = out_x * stride_width - _padding_width;
+            int64_t acc = 0;
+            for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+                const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+                if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+            if (bias_data != nullptr)
+            {
+              acc += bias_data[out_c];
+            }
+
+            int32_t output_multiplier = quant_multipliers[out_c].multiplier;
+            int output_shift = quant_multipliers[out_c].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+
+            output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h b/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h
new file mode 100644
index 000000000..3d1faf6c1
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
+{
+public:
+  DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+                  Tensor *scratchpad, const DepthwiseConv2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
diff --git a/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..6b4673f3e
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DepthwiseConv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DepthwiseConv2DTest, Float)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8)
+{
+  std::vector<float> input_data{
+    1, 2, 7,  8,  // column 1
+    3, 4, 9,  10, // column 2
+    5, 6, 11, 12, // column 3
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, 4};
+
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S64, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+
+  float input_scale = 0.25;
+  std::vector<float> filter_scales{0.2f, 1.f, 0.5f, 0.1f};
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_scales[i] * input_scale);
+  std::vector<int32_t> zerop(4, 0);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(1, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<int32_t> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{4, 2, 2};
+  Shape filter_shape{2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{2, 1, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 4, 2};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Dequantize.cpp b/onert-micro/luci-interpreter/src/kernels/Dequantize.cpp
new file mode 100644
index 000000000..a0973423d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Dequantize.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 ||
+                         input()->element_type() == DataType::U8 ||
+                         input()->element_type() == DataType::S16);
+
+  LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+  if (input()->element_type() == DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+  tflite::DequantizationParams op_params;
+  op_params.zero_point = input()->zero_point();
+  op_params.scale = input()->scale();
+
+  switch (input()->element_type())
+  {
+    case DataType::U8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case DataType::S8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case DataType::S16:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int16_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Dequantize.h b/onert-micro/luci-interpreter/src/kernels/Dequantize.h
new file mode 100644
index 000000000..5565df0e4
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Dequantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Dequantize : public Kernel
+{
+public:
+  Dequantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Dequantize.test.cpp b/onert-micro/luci-interpreter/src/kernels/Dequantize.test.cpp
new file mode 100644
index 000000000..0cab633d6
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Dequantize.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DequantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DequantizeTest, Uint8)
+{
+  std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint8)
+{
+  std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint16)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidOutputType_NEG)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Div.cpp b/onert-micro/luci-interpreter/src/kernels/Div.cpp
new file mode 100644
index 000000000..f8a469061
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Div.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+#include "PALDiv.h"
+
+namespace luci_interpreter
+{
+
+// TODO: reduce code duplication with Mul
+void configure_kernel_CircleDiv(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+}
+
+void execute_kernel_CircleDiv(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *options = cur_op->builtin_options_as_DivOptions();
+
+  luci_interpreter::RuntimeShape input_shape1 =
+    kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph);
+  luci_interpreter::RuntimeShape input_shape2 =
+    kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      auto tiso_func = luci_interpreter_pal::Div<float>;
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastDiv4DSlow<float>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                              std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                       options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Div.test.cpp b/onert-micro/luci-interpreter/src/kernels/Div.test.cpp
new file mode 100644
index 000000000..d6014397c
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Div.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/div/FloatDivKernel.h"
+#include "luci_interpreter/test_models/div/NegDivKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class DivTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkDivKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(DivTest, Float_P)
+{
+  // No broadcast
+  {
+    const bool is_with_broadcast = false;
+    test_kernel::TestDataFloatDiv test_data_kernel(is_with_broadcast);
+    std::vector<float> output_data_vector = checkDivKernel(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                      test_data_kernel.get_output_data_by_index(0), 0.0001f));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestDataFloatDiv test_data_kernel(is_with_broadcast);
+    std::vector<float> output_data_vector = checkDivKernel(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                      test_data_kernel.get_output_data_by_index(0), 0.0001f));
+  }
+}
+
+TEST_F(DivTest, Wrong_Input1_Type_NEG)
+{
+  test_kernel::NegTestDataInput1WrongTypeDiv test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(DivTest, Wrong_Input2_Type_NEG)
+{
+  test_kernel::NegTestDataInput2WrongTypeDiv test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add tests for inplace optimizations for all types
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Elu.cpp b/onert-micro/luci-interpreter/src/kernels/Elu.cpp
new file mode 100644
index 000000000..3c3459178
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Elu.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "SISOKernel.h"
+
+#include "PALElu.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleElu(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
+                         Tensor::element_type(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(kernel.input()) ==
+                         Tensor::num_elements(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input()) == Tensor::num_dims(kernel.output()));
+}
+
+void execute_kernel_CircleElu(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *input_data = runtime_graph->getDataByTensor(kernel.input());
+  assert(input_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(kernel.output());
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      const float *input_data_float = kernels::getTensorData<float>(input_data);
+      float *output_data_float = kernels::getTensorData<float>(output_data);
+      if (is_inplace)
+      {
+        output_data_float = const_cast<float *>(input_data_float);
+      }
+
+      assert(output_data_float);
+
+      const int flat_size =
+        kernels::getTensorRuntimeShape(kernel.input(), runtime_graph).flatSize();
+
+      luci_interpreter_pal::Elu(flat_size, input_data_float, output_data_float);
+      break;
+    }
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type");
+  }
+
+  if (is_inplace)
+    runtime_graph->makeInplaceOperation(kernel.input(), kernel.output());
+}
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Elu.test.cpp b/onert-micro/luci-interpreter/src/kernels/Elu.test.cpp
new file mode 100644
index 000000000..71a9692c3
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Elu.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/elu/FloatEluKernel.h"
+#include "luci_interpreter/test_models/elu/NegEluKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class EluTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkEluKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(EluTest, Float_P)
+{
+  test_kernel::TestDataFloatElu test_data_kernel;
+  std::vector<float> output_data_vector = checkEluKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(EluTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchEluKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Equal.cpp b/onert-micro/luci-interpreter/src/kernels/Equal.cpp
new file mode 100644
index 000000000..76968a364
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Equal.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALComparisons.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+// TODO: reduce code duplication with less
+template <typename T>
+void evalGeneric(const circle::Tensor *x, const circle::Tensor *y, const circle::Tensor *output,
+                 BaseRuntimeGraph *runtime_graph)
+{
+  auto x_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(x));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(x));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(y));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(y));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
+
+  luci_interpreter_pal::ComparisonParams op_params;
+  op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
+
+  if (op_params.is_broadcast)
+  {
+    luci_interpreter_pal::BroadcastComparison4DSlowNoScaling<T>(
+      op_params, kernels::getTensorShape(x), x_data, kernels::getTensorShape(y), y_data,
+      kernels::getTensorShape(output), output_data, luci_interpreter_pal::EqualFn);
+  }
+  else
+  {
+    const int64_t flat_size = kernels::getTensorShape(x).flatSize();
+    luci_interpreter_pal::ComparisonNoScaling<T>(flat_size, x_data, y_data, output_data,
+                                                 luci_interpreter_pal::EqualFn);
+  }
+}
+
+} // namespace
+
+void configure_kernel_CircleEqual(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::BOOL);
+}
+
+void execute_kernel_CircleEqual(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+    case DataType::S64:
+      evalGeneric<int64_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalGeneric<float>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Equal.test.cpp b/onert-micro/luci-interpreter/src/kernels/Equal.test.cpp
new file mode 100644
index 000000000..15bcd1919
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Equal.test.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/equal/FloatEqualKernel.h"
+#include "luci_interpreter/test_models/equal/IntEqualKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class EqualTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename U>
+std::vector<U> checkEqualKernel(test_kernel::TestDataBase<T, U> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  U *output_data = reinterpret_cast<U *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(U));
+  std::vector<U> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(EqualTest, FloatNoBroadcast_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatEqual test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkEqualKernel<float, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(EqualTest, FloatWithBroadcast_P)
+{
+  const bool is_with_broadcast = true;
+  test_kernel::TestDataFloatEqual test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkEqualKernel<float, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(EqualTest, FloatNoBroadcast_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatEqual test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkEqualKernel(&test_data_kernel), "");
+}
+
+TEST_F(EqualTest, FloatWithBroadcast_NEG)
+{
+  const bool is_with_broadcast = true;
+  test_kernel::TestDataFloatEqual test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkEqualKernel(&test_data_kernel), "");
+}
+
+TEST_F(EqualTest, IntWithBroadcast_P)
+{
+  const bool is_with_broadcast = true;
+  test_kernel::TestDataIntEqual test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkEqualKernel<int32_t, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(EqualTest, IntNoBroadcast_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataIntEqual test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkEqualKernel<int32_t, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(EqualTest, IntWithBroadcast_NEG)
+{
+  const bool is_with_broadcast = true;
+  test_kernel::TestDataIntEqual test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkEqualKernel(&test_data_kernel), "");
+}
+
+TEST_F(EqualTest, IntNoBroadcast_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataIntEqual test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkEqualKernel(&test_data_kernel), "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Exp.cpp b/onert-micro/luci-interpreter/src/kernels/Exp.cpp
new file mode 100644
index 000000000..b46b354ec
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Exp.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "SISOKernel.h"
+
+#include "PALExp.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleExp(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
+                         Tensor::element_type(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(kernel.input()) ==
+                         Tensor::num_elements(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input()) == Tensor::num_dims(kernel.output()));
+}
+
+void execute_kernel_CircleExp(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *input_data = runtime_graph->getDataByTensor(kernel.input());
+  assert(input_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(kernel.output());
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      const float *input_data_float = kernels::getTensorData<float>(input_data);
+      float *output_data_float = kernels::getTensorData<float>(output_data);
+      if (is_inplace)
+      {
+        output_data_float = const_cast<float *>(input_data_float);
+      }
+
+      assert(output_data_float);
+
+      const int flat_size =
+        kernels::getTensorRuntimeShape(kernel.input(), runtime_graph).flatSize();
+
+      luci_interpreter_pal::Exp(flat_size, input_data_float, output_data_float);
+      break;
+    }
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type");
+  }
+
+  if (is_inplace)
+    runtime_graph->makeInplaceOperation(kernel.input(), kernel.output());
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Exp.test.cpp b/onert-micro/luci-interpreter/src/kernels/Exp.test.cpp
new file mode 100644
index 000000000..21d21e7e2
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Exp.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/exp/FloatExpKernel.h"
+#include "luci_interpreter/test_models/exp/NegExpKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkExpKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ExpTest, Float_P)
+{
+  test_kernel::TestDataFloatExp test_data_kernel;
+  std::vector<float> output_data_vector = checkExpKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(ExpTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchExpKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ExpandDims.cpp b/onert-micro/luci-interpreter/src/kernels/ExpandDims.cpp
new file mode 100644
index 000000000..1035bb8f4
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ExpandDims.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleExpandDims(const circle::Operator *cur_op,
+                                       BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto axis_index = cur_op->inputs()->operator[](1);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(axis_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto axis = runtime_graph->getCircleTensorByIndex(axis_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(axis != nullptr);
+  assert(output != nullptr);
+
+  auto axis_data = runtime_graph->getConstDataByTensor(axis);
+  assert(axis_data != nullptr);
+
+  int32_t axis_value;
+
+  switch (Tensor::element_type(axis))
+  {
+    case DataType::S32:
+      axis_value = *reinterpret_cast<int32_t *>(axis_data);
+      break;
+    case DataType::S64:
+      axis_value = static_cast<int32_t>(*reinterpret_cast<int64_t *>(axis_data));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+
+  if (axis_value < 0)
+  {
+    axis_value += Tensor::num_dims(input) + 1;
+  }
+
+  LUCI_INTERPRETER_CHECK(axis_value <= Tensor::num_dims(input) and axis_value >= 0);
+}
+
+void execute_kernel_CircleExpandDims(const circle::Operator *cur_op,
+                                     BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  if (is_inplace)
+  {
+    runtime_graph->makeInplaceOperation(input, output);
+    return;
+  }
+
+  // Just copy input to output
+  const auto input_data = runtime_graph->getDataByTensor(input);
+  auto output_data = runtime_graph->getDataByTensor(output);
+
+  assert(input_data != nullptr);
+  assert(output_data != nullptr);
+
+  const size_t element_size = getDataTypeSize(Tensor::element_type(input));
+  const int32_t num_elements = Tensor::num_elements(input);
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp b/onert-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
new file mode 100644
index 000000000..b7448cd8b
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/expand_dims/ExpandDimsKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkExpandDimsKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ExpandDimsTest, MainTest_P)
+{
+  test_kernel::TestDataExpandDimsKernel<float> test_data_kernel;
+  std::vector<float> output_data_vector = checkExpandDimsKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ExpandDimsTest, WrongAxisType_NEG)
+{
+  test_kernel::NegTestDataInvalidInputTypeExpandDimsKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Fill.cpp b/onert-micro/luci-interpreter/src/kernels/Fill.cpp
new file mode 100644
index 000000000..8bc501463
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Fill.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "TISOKernel.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+template <typename T> void fillImpl(const size_t flat_size, const T *value_data, T *output_data)
+{
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = *value_data;
+  }
+}
+
+} // namespace
+
+void configure_kernel_CircleFill(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+  // value tensor must be a scalar or has one element
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input2()) == 0 or
+                         Tensor::num_elements(kernel.input2()) == 1);
+  // value and output type must match
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input2()) ==
+                         Tensor::element_type(kernel.output()));
+}
+
+void execute_kernel_CircleFill(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const circle::Tensor *value = kernel.input2();
+  const circle::Tensor *output = kernel.output();
+
+  kernels::TISOData tiso_data = kernel.readData();
+  const uint8_t *value_data = tiso_data.input2_data;
+  uint8_t *output_data = tiso_data.output_data;
+
+  const size_t flat_size = Tensor::num_elements(output);
+
+  switch (Tensor::element_type(value))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      fillImpl<float>(flat_size, kernels::getTensorData<float>(value_data),
+                      kernels::getTensorData<float>(output_data));
+      break;
+#endif // DIS_FLOAT
+    case DataType::S32:
+      fillImpl<int32_t>(flat_size, kernels::getTensorData<int32_t>(value_data),
+                        kernels::getTensorData<int32_t>(output_data));
+      break;
+#ifndef DIS_QUANT
+    case DataType::U8:
+      fillImpl<uint8_t>(flat_size, kernels::getTensorData<uint8_t>(value_data),
+                        kernels::getTensorData<uint8_t>(output_data));
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Not impl yet");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Fill.test.cpp b/onert-micro/luci-interpreter/src/kernels/Fill.test.cpp
new file mode 100644
index 000000000..a95c7a35f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Fill.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/fill/FillKernel.h"
+#include "luci_interpreter/test_models/fill/NegFillKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class FillTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkFillKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(FillTest, MainTest_P)
+{
+  test_kernel::TestDataFillKernel<float> test_data_kernel;
+  std::vector<float> output_data_vector = checkFillKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(FillTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputTypeMismatchFillKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(FillTest, Wrong_input_shape_NEG)
+{
+  test_kernel::NegTestDataWrongInputShapeFillKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Floor.cpp b/onert-micro/luci-interpreter/src/kernels/Floor.cpp
new file mode 100644
index 000000000..f7871b5c8
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Floor.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/floor.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Floor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Floor::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Floor::evalFloat() const
+{
+  tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Floor.h b/onert-micro/luci-interpreter/src/kernels/Floor.h
new file mode 100644
index 000000000..ca3ad5997
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Floor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Floor : public Kernel
+{
+public:
+  Floor(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Floor.test.cpp b/onert-micro/luci-interpreter/src/kernels/Floor.test.cpp
new file mode 100644
index 000000000..30076fb54
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Floor.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorTest, SimpleFloat)
+{
+  std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0.2, 8.6, 2.4,  4.3,  // Row 1
+    3,   7.1, 10.5, -0.9, // Row 2
+  };
+
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1};
+  std::vector<float> ref_output_data{
+    0, 8, 2,  4,  // Row 1
+    3, 7, 10, -1, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/FloorDiv.cpp b/onert-micro/luci-interpreter/src/kernels/FloorDiv.cpp
new file mode 100644
index 000000000..6a8631a16
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/FloorDiv.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+void FloorDiv::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorDiv::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void FloorDiv::evalFloat() const
+{
+  auto FloorDivFunc = [](float x, float y) -> float {
+    return std::floor(static_cast<double>(x) / static_cast<double>(y));
+  };
+
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+
+  // Check the denominator
+  for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+  }
+
+  if (x()->shape() != y()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+  else
+  {
+    tflite::reference_ops::BinaryFunction<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/FloorDiv.h b/onert-micro/luci-interpreter/src/kernels/FloorDiv.h
new file mode 100644
index 000000000..e9c47d81a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/FloorDiv.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FloorDiv : public Kernel
+{
+public:
+  FloorDiv(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
diff --git a/onert-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp b/onert-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp
new file mode 100644
index 000000000..3e1b5f18e
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorDivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorDivTest, FloatSimple)
+{
+  Shape x_shape{2, 3};
+  std::vector<float> x_data{
+    0.5, 2.4,  3.1,  // Row 1
+    1.9, -1.9, -2.8, // Row 2
+  };
+
+  Shape y_shape = x_shape;
+  std::vector<float> y_data{
+    2.0, 0.5,  3.0,  // Row 1
+    1.0, -1.0, -2.0, // Row 2
+  };
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  std::vector<float> ref_output_data{
+    0, 4, 1, // Row 1
+    1, 1, 1, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, FloatBroadcast)
+{
+  Shape x_shape{1, 3};
+  std::vector<float> x_data{
+    0.5, 2.4, -3.1, // Row 1
+  };
+
+  Shape y_shape{3, 3};
+  std::vector<float> y_data{
+    1.0, 1.0,  1.0,  // Row 1
+    2.0, -0.5, -2.0, // Row 2
+    0.3, 0.7,  0.9,  // Row 3
+  };
+
+  std::vector<int32_t> ref_output_shape{3, 3};
+  std::vector<float> ref_output_data{
+    0, 2,  -4, // Row 1
+    0, -5, 1,  // Row 2
+    1, 3,  -4, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, DivByZero_NEG)
+{
+  Shape shape{3};
+  std::vector<float> x_data{1, 0, -1};
+  std::vector<float> y_data{0, 0, 0};
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorDivTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/FullyConnected.cpp b/onert-micro/luci-interpreter/src/kernels/FullyConnected.cpp
new file mode 100644
index 000000000..0493eaa47
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALFullyConnected.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+void evalFloat(const circle::Tensor *input, const circle::Tensor *weights,
+               const circle::Tensor *bias, const circle::Tensor *output,
+               const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  float activation_min{};
+  float activation_max{};
+  kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+                                    &activation_min, &activation_max);
+
+  luci_interpreter_pal::FullyConnectedParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *weights_data = runtime_graph->getConstDataByTensor(weights);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  assert(input_data != nullptr);
+  assert(weights_data != nullptr);
+  assert(output_data != nullptr);
+
+  int32_t input_shape[kMaxSmallSize];
+  kernels::getTensorDims(input, runtime_graph, input_shape);
+
+  int32_t weight_shape[kMaxSmallSize];
+  kernels::getTensorDims(weights, runtime_graph, weight_shape);
+
+  int32_t output_shape[kMaxSmallSize];
+  kernels::getTensorDims(output, runtime_graph, output_shape);
+
+  luci_interpreter_pal::FullyConnected(
+    params, input_shape, kernels::getTensorData<float>(input_data), weight_shape,
+    kernels::getTensorData<float>(weights_data), kernels::getTensorData<float>(bias_data),
+    output_shape, kernels::getTensorData<float>(output_data));
+}
+
+#ifndef DIS_QUANT
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *weights,
+                   const circle::Tensor *bias, const circle::Tensor *output,
+                   const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier = kernels::getQuantizedConvolutionMultipler(
+    Tensor::scale(input), Tensor::scale(weights), Tensor::scale(output));
+  kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &output_activation_min,
+                                             &output_activation_max);
+
+  int32_t input_offset = -Tensor::zero_point(input);
+  int32_t filter_offset = -Tensor::zero_point(weights);
+  int32_t output_offset = Tensor::zero_point(output);
+
+  luci_interpreter_pal::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *weights_data = runtime_graph->getConstDataByTensor(weights);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  assert(input_data != nullptr);
+  assert(weights_data != nullptr);
+  assert(output_data != nullptr);
+
+  int32_t input_shape[kMaxSmallSize];
+  kernels::getTensorDims(input, runtime_graph, input_shape);
+
+  int32_t weights_shape[kMaxSmallSize];
+  kernels::getTensorDims(weights, runtime_graph, weights_shape);
+
+  int32_t output_shape[kMaxSmallSize];
+  kernels::getTensorDims(output, runtime_graph, output_shape);
+
+  luci_interpreter_pal::FullyConnected(
+    op_params, input_shape, kernels::getTensorData<uint8_t>(input_data), weights_shape,
+    kernels::getTensorData<uint8_t>(weights_data), kernels::getTensorData<int32_t>(bias_data),
+    output_shape, kernels::getTensorData<uint8_t>(output_data));
+}
+#endif
+
+} // namespace
+
+void configure_kernel_CircleFullyConnected(const circle::Operator *cur_op,
+                                           BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto weight_index = cur_op->inputs()->operator[](1);
+  const auto bias_index = cur_op->inputs()->operator[](2);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(weight_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+  const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(weights != nullptr);
+  assert(output != nullptr);
+
+#ifndef DIS_FLOAT
+  if (Tensor::element_type(weights) == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::FLOAT32)
+  }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+  else if (Tensor::element_type(weights) == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::U8);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::U8);
+    LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::S32)
+  }
+  else if (Tensor::element_type(weights) == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::S8);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::S8);
+    LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::S32)
+  }
+#endif // DIS_QUANT
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(weights) == 2);
+  LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::num_elements(bias) == Tensor::dim(weights, 0));
+
+#ifdef DIS_DYN_SHAPES
+  int32_t input_num_elements = Tensor::num_elements(input);
+  LUCI_INTERPRETER_CHECK(input_num_elements % Tensor::dim(weights, 1) == 0);
+#endif // DIS_DYN_SHAPES
+
+  if (bias)
+    LUCI_INTERPRETER_CHECK(Tensor::num_elements(bias) == Tensor::dim(weights, 0));
+
+  const auto *options = cur_op->builtin_options_as_FullyConnectedOptions();
+
+  // TODO: handle with it
+  assert(options->keep_num_dims() == false);
+}
+
+// TODO think how remove unused param
+void execute_kernel_CircleFullyConnected(const circle::Operator *cur_op,
+                                         BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto weight_index = cur_op->inputs()->operator[](1);
+  const auto bias_index = cur_op->inputs()->operator[](2);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(weight_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+  const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(weights != nullptr);
+  assert(output != nullptr);
+
+  const auto *options = cur_op->builtin_options_as_FullyConnectedOptions();
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_QUANT
+    case DataType::U8:
+      evalQuantized(input, weights, bias, output, options, runtime_graph);
+      break;
+#endif // DIS_QUANT
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalFloat(input, weights, bias, output, options, runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp b/onert-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
new file mode 100644
index 000000000..cbccd5d4f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/fully_connected/FloatFullyConnectedKernel.h"
+#include "luci_interpreter/test_models/fully_connected/U8FullyConnectedKernel.h"
+#include "luci_interpreter/test_models/fully_connected/NegFullyConnectedKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class FullyConnectedTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkFullyConnectedKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(FullyConnectedTest, Float_P)
+{
+  test_kernel::TestDataFloatFullyConnected test_data_kernel;
+  std::vector<float> output_data_vector = checkFullyConnectedKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(FullyConnectedTest, U8_P)
+{
+  test_kernel::TestDataU8FullyConnected test_data_kernel;
+  std::vector<uint8_t> output_data_vector = checkFullyConnectedKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(FullyConnectedTest, Wrong_weight_type_NEG)
+{
+  test_kernel::NegTestDataWrongWeightTypeFullyConnectedKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(FullyConnectedTest, Wrong_weight_shape_NEG)
+{
+  test_kernel::NegTestDataWrongWeightShapeFullyConnectedKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(FullyConnectedTest, Wrong_bias_shape_NEG)
+{
+  test_kernel::NegTestDataWrongBiasShapeFullyConnectedKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add tests for S8
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Gather.cpp b/onert-micro/luci-interpreter/src/kernels/Gather.cpp
new file mode 100644
index 000000000..b146b8993
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Gather.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+template <typename InputT, typename CoordsT = int32_t>
+void gather(const circle::GatherOptions *options, kernels::TISOKernel *kernel)
+{
+  kernels::TISOData tiso_data = kernel->readData();
+
+  const InputT *input_data = kernels::getTensorData<InputT>(tiso_data.input1_data);
+  const CoordsT *coords_data = kernels::getTensorData<CoordsT>(tiso_data.input2_data);
+  InputT *output_data = kernels::getTensorData<InputT>(tiso_data.output_data);
+
+  const circle::Tensor *input = kernel->input1();
+  const circle::Tensor *coords = kernel->input2();
+
+  const int input_dims_size = Tensor::num_dims(input);
+  int axis = options->axis();
+  if (axis < 0)
+  {
+    axis += input_dims_size;
+  }
+
+  int batch_dims = options->batch_dims();
+  // batch_dims should be in range: [-rank(coords), rank(coords)].
+  // Negative batch_dims is added with rank of coords.
+  const int coords_dims_size = Tensor::num_dims(coords);
+  if (batch_dims < 0)
+  {
+    batch_dims += coords_dims_size;
+  }
+
+  const int axis_size = Tensor::dim(input, axis);
+
+  int batch_size = 1;
+  for (int i = 0; i < batch_dims; ++i)
+  {
+    batch_size *= Tensor::dim(input, i);
+  }
+  int outer_size = 1;
+  for (int i = batch_dims; i < axis; ++i)
+  {
+    outer_size *= Tensor::dim(input, i);
+  }
+  int inner_size = 1;
+  for (int i = axis + 1; i < input_dims_size; ++i)
+  {
+    inner_size *= Tensor::dim(input, i);
+  }
+  int coord_size = 1;
+  for (int i = batch_dims; i < coords_dims_size; ++i)
+  {
+    coord_size *= Tensor::dim(coords, i);
+  }
+
+  for (int batch = 0; batch < batch_size; ++batch)
+  {
+    for (int outer = 0; outer < outer_size; ++outer)
+    {
+      for (int coord = 0; coord < coord_size; ++coord)
+      {
+        auto x = coords_data[coord];
+        std::memcpy(
+          output_data + (((batch * outer_size) + outer) * coord_size + coord) * inner_size,
+          input_data +
+            (((batch * outer_size) + outer) * axis_size + coords_data[batch * coord_size + coord]) *
+              inner_size,
+          sizeof(InputT) * inner_size);
+      }
+    }
+  }
+}
+
+} // namespace
+
+void configure_kernel_CircleGather(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *options = cur_op->builtin_options_as_GatherOptions();
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input2()) == DataType::S32);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) == DataType::FLOAT32 or
+                         Tensor::element_type(kernel.input1()) == DataType::S8 or
+                         Tensor::element_type(kernel.input1()) == DataType::S32);
+
+  int32_t axis = options->axis();
+  int32_t num_dims = Tensor::num_dims(kernel.input1());
+  if (axis < 0)
+  {
+    axis += num_dims;
+  }
+
+  LUCI_INTERPRETER_CHECK(axis >= 0 and axis < num_dims);
+
+  int32_t batch_dims = options->batch_dims();
+  int32_t coords_num_dims = Tensor::num_dims(kernel.input2());
+  // batch_dims should be in range: [-rank(coords), rank(coords)].
+  // Negative batch_dims is added with rank of coords.
+  if (batch_dims < 0)
+  {
+    batch_dims += coords_num_dims;
+  }
+  LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+  LUCI_INTERPRETER_CHECK(batch_dims >= 0 and batch_dims < num_dims);
+  LUCI_INTERPRETER_CHECK(batch_dims <= coords_num_dims);
+  for (int i = 0; i < batch_dims; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::dim(kernel.input1(), i) == Tensor::dim(kernel.input2(), i));
+  }
+}
+
+void execute_kernel_CircleGather(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *options = cur_op->builtin_options_as_GatherOptions();
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      return gather<float, int32_t>(options, &kernel);
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      return gather<int8_t, int32_t>(options, &kernel);
+#endif // DIS_QUANT
+    case DataType::S32:
+      return gather<int32_t, int32_t>(options, &kernel);
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Gather.test.cpp b/onert-micro/luci-interpreter/src/kernels/Gather.test.cpp
new file mode 100644
index 000000000..c1fa5efab
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Gather.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/gather/FloatGatherKernel.h"
+#include "luci_interpreter/test_models/gather/IntGatherKernel.h"
+#include "luci_interpreter/test_models/gather/NegGatherKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class GatherTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkGatherKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(GatherTest, Gather_Float_P)
+{
+  test_kernel::TestDataFloatGather test_data_float_gather;
+  std::vector<float> output_data_vector = checkGatherKernel(&test_data_float_gather);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_float_gather.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(GatherTest, Gather_Int_P)
+{
+  test_kernel::TestDataIntGather test_data_int_gather;
+  std::vector<int32_t> output_data_vector = checkGatherKernel(&test_data_int_gather);
+  EXPECT_THAT(output_data_vector, test_data_int_gather.get_output_data_by_index(0));
+}
+
+TEST_F(GatherTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchGatherKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(GatherTest, Wrong_position_type_NEG)
+{
+  test_kernel::NegTestDataWrongPositionTypeGatherKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(GatherTest, Wrong_axis_NEG)
+{
+  test_kernel::NegTestDataWrongAxisGatherKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+// TODO: add S8 test
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Greater.cpp b/onert-micro/luci-interpreter/src/kernels/Greater.cpp
new file mode 100644
index 000000000..b073a4a67
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Greater.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALComparisons.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+// TODO: reduce code duplication with less
+template <typename T>
+void evalGeneric(const circle::Tensor *x, const circle::Tensor *y, const circle::Tensor *output,
+                 BaseRuntimeGraph *runtime_graph)
+{
+  auto x_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(x));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(x));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(y));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(y));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
+
+  luci_interpreter_pal::ComparisonParams op_params;
+  op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
+
+  const int64_t flat_size = kernels::getTensorShape(x).flatSize();
+  luci_interpreter_pal::ComparisonNoScaling<T>(flat_size, x_data, y_data, output_data,
+                                               luci_interpreter_pal::GreaterFn);
+}
+
+} // namespace
+
+void configure_kernel_CircleGreater(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::BOOL);
+}
+
+void execute_kernel_CircleGreater(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+    case DataType::S64:
+      evalGeneric<int64_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalGeneric<float>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Greater.test.cpp b/onert-micro/luci-interpreter/src/kernels/Greater.test.cpp
new file mode 100644
index 000000000..cd938f00d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Greater.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/greater/FloatGreaterKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename U>
+std::vector<U> checkGreaterKernel(test_kernel::TestDataBase<T, U> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  U *output_data = reinterpret_cast<U *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(U));
+  std::vector<U> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(GreaterTest, FloatNoBroadcast_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatGreater test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkGreaterKernel<float, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(GreaterTest, FloatNoBroadcast_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatGreater test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkGreaterKernel(&test_data_kernel), "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/GreaterEqual.cpp b/onert-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
new file mode 100644
index 000000000..dfb585d5a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALComparisons.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+// TODO: reduce code duplication with less
+template <typename T>
+void evalGeneric(const circle::Tensor *x, const circle::Tensor *y, const circle::Tensor *output,
+                 BaseRuntimeGraph *runtime_graph)
+{
+  auto x_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(x));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(x));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(y));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(y));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
+
+  luci_interpreter_pal::ComparisonParams op_params;
+  op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
+
+  const int64_t flat_size = kernels::getTensorShape(x).flatSize();
+  luci_interpreter_pal::ComparisonNoScaling<T>(flat_size, x_data, y_data, output_data,
+                                               luci_interpreter_pal::GreaterEqualFn);
+}
+
+} // namespace
+
+void configure_kernel_CircleGreaterEqual(const circle::Operator *cur_op,
+                                         BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::BOOL);
+}
+
+void execute_kernel_CircleGreaterEqual(const circle::Operator *cur_op,
+                                       BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+    case DataType::S64:
+      evalGeneric<int64_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalGeneric<float>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/onert-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp
new file mode 100644
index 000000000..4bc2e2b36
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/greater_equal/FloatGreaterEqualKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterEqualTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename U>
+std::vector<U> checkGreaterEqualKernel(test_kernel::TestDataBase<T, U> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  U *output_data = reinterpret_cast<U *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(U));
+  std::vector<U> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(GreaterEqualTest, FloatNoBroadcast_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatGreaterEqual test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkGreaterEqualKernel<float, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(GreaterEqualTest, FloatNoBroadcast_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatGreaterEqual test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkGreaterEqualKernel(&test_data_kernel), "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/If.cpp b/onert-micro/luci-interpreter/src/kernels/If.cpp
new file mode 100644
index 000000000..971708bca
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/If.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/If.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static std::vector<const Tensor *> joinInputs(const Tensor *cond,
+                                              const std::vector<const Tensor *> &inputs)
+{
+  std::vector<const Tensor *> result{cond};
+  result.insert(result.cend(), inputs.cbegin(), inputs.cend());
+  return result;
+}
+
+If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+       RuntimeGraph *then_graph, RuntimeGraph *else_graph)
+  : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
+    _else_graph(else_graph)
+{
+}
+
+void If::configure()
+{
+  LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL);
+  LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1);
+
+  for (RuntimeGraph *graph : {_then_graph, _else_graph})
+  {
+    (void)graph;
+    LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1);
+    LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size());
+  }
+}
+
+void If::execute() const
+{
+  const bool cond_value = cond()->data<bool>()[0];
+
+  RuntimeGraph *active_graph = cond_value ? _then_graph : _else_graph;
+  const auto &graph_inputs = active_graph->getInputTensors();
+  const auto &graph_outputs = active_graph->getOutputTensors();
+
+  // Copy kernel inputs to active graph inputs.
+  for (size_t i = 0; i < getInputTensors().size() - 1; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type());
+    graph_inputs[i]->resize(input(i)->shape());
+
+    const int32_t num_elements = input(i)->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(input(i)->element_type());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(graph_inputs[i]);
+    std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size);
+  }
+
+  active_graph->execute();
+
+  // Copy graph outputs to kernel outputs.
+  for (size_t i = 0; i < getOutputTensors().size(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type());
+    output(i)->resize(graph_outputs[i]->shape());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(output(i));
+
+    const int32_t num_elements = output(i)->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(output(i)->element_type());
+    std::memcpy(output(i)->data<void>(), graph_outputs[i]->data<void>(),
+                num_elements * element_size);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/If.h b/onert-micro/luci-interpreter/src/kernels/If.h
new file mode 100644
index 000000000..fa6ab371a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/If.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_IF_H
+#define LUCI_INTERPRETER_KERNELS_IF_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class If : public Kernel
+{
+public:
+  If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+     RuntimeGraph *then_graph, RuntimeGraph *else_graph);
+
+  const Tensor *cond() const { return _inputs[0]; }
+  const Tensor *input(int index) const { return _inputs[1 + index]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  RuntimeGraph *const _then_graph;
+  RuntimeGraph *const _else_graph;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_IF_H
diff --git a/onert-micro/luci-interpreter/src/kernels/If.test.cpp b/onert-micro/luci-interpreter/src/kernels/If.test.cpp
new file mode 100644
index 000000000..c5f4faf75
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/If.test.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/If.h"
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class IfTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input1 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *input2 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *output = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input1, input2});
+  graph->setOutputTensors({output});
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Add>(input1, input2, output, params));
+
+  return graph;
+}
+
+RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input1 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *input2 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *output = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input1, input2});
+  graph->setOutputTensors({output});
+
+  MulParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Mul>(input1, input2, output, params));
+
+  return graph;
+}
+
+TEST_F(IfTest, CondTrue)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  kernel.configure();
+  _memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9}));
+}
+
+TEST_F(IfTest, CondFalse)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  kernel.configure();
+  _memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14}));
+}
+
+TEST_F(IfTest, InvalidCondType_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(IfTest, InvalidCondElementNum_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/InstanceNorm.cpp b/onert-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
new file mode 100644
index 000000000..577dc6454
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/InstanceNorm.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/common.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
+                           Tensor *output, const InstanceNormParams &params)
+  : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+{
+}
+
+void InstanceNorm::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
+                         gamma()->shape().dim(0) == 1);
+  LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
+                         beta()->shape().dim(0) == 1);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void InstanceNorm::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void InstanceNorm::evalFloat() const
+{
+  float activation_min, activation_max;
+  calculateActivationRange(params().activation, &activation_min, &activation_max);
+  auto input_shape = getTensorShape(input());
+  auto output_shape = getTensorShape(output());
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+  const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+  const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+  const float *input_data = getTensorData<float>(input());
+  const float *gamma_data = getTensorData<float>(gamma());
+  auto gamma_shape = getTensorShape(gamma());
+  bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
+  const float *beta_data = getTensorData<float>(beta());
+  auto beta_shape = getTensorShape(beta());
+  bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
+  float *output_data = getTensorData<float>(output());
+  for (int32_t batch = 0; batch < batches; batch++)
+  {
+    for (int32_t channel = 0; channel < channels; channel++)
+    {
+      double sum = 0.0f;
+      double square_sum = 0.0f;
+      int32_t size = heights * widths;
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+          sum += input_val;
+          square_sum += (input_val * input_val);
+        }
+      }
+      double mean = sum / size;
+      double var = square_sum / size - mean * mean;
+
+      double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+      double beta = single_beta ? beta_data[0] : beta_data[channel];
+      double a = gamma / (std::sqrt(var + params().epsilon));
+      double b = -mean * a + beta;
+
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_value =
+            input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+          double output_value = input_value * a + b;
+          output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
+            tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+                                                 activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/InstanceNorm.h b/onert-micro/luci-interpreter/src/kernels/InstanceNorm.h
new file mode 100644
index 000000000..a70a84e0a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/InstanceNorm.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+#define LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class InstanceNorm : public KernelWithParams<InstanceNormParams>
+{
+public:
+  InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, Tensor *output,
+               const InstanceNormParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *gamma() const { return _inputs[1]; }
+  const Tensor *beta() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
diff --git a/onert-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/onert-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp
new file mode 100644
index 000000000..04400c3c0
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/InstanceNorm.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class InstanceNormTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(InstanceNormTest, Simple)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST_F(InstanceNormTest, Single_gamma_beta)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
+}
+
+TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/KernelBuilder.cpp b/onert-micro/luci-interpreter/src/kernels/KernelBuilder.cpp
new file mode 100644
index 000000000..5d1e885d0
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/KernelBuilder.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelBuilder.h"
+
+namespace luci_interpreter
+{
+
+void KernelConfigureRegistry::configure_kernel(const circle::Operator *cur_op,
+                                               circle::BuiltinOperator opcode,
+                                               BaseRuntimeGraph *runtime_graph) const
+{
+  auto specific_configure_func = get_kernel_configure_func(opcode);
+  if (specific_configure_func == nullptr)
+    assert(false && "Unsupported operator");
+
+  specific_configure_func(cur_op, runtime_graph);
+}
+
+void KernelExecuteRegistry::execute_kernel(const circle::Operator *cur_op,
+                                           circle::BuiltinOperator opcode,
+                                           BaseRuntimeGraph *runtime_graph) const
+{
+  auto specific_execute_func = get_kernel_execute_func(opcode);
+  if (specific_execute_func == nullptr)
+    assert(false && "Unsupported operator");
+
+  specific_execute_func(cur_op, runtime_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/KernelBuilder.h b/onert-micro/luci-interpreter/src/kernels/KernelBuilder.h
new file mode 100644
index 000000000..3d10f4918
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/KernelBuilder.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
+#define LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
+
+#include "core/RuntimeModule.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+#include "Builders.h"
+
+#include <memory>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+#define REGISTER_KERNEL(builtin_operator, name) BuiltinOperator_##builtin_operator,
+
+enum class BuilderID
+{
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+  Size // casts to count of values in BuilderId enum
+};
+
+#undef REGISTER_KERNEL
+
+constexpr BuilderID get_builder_id(circle::BuiltinOperator opcode)
+{
+  switch (opcode)
+  {
+#define REGISTER_KERNEL(builtin_operator, name)    \
+  case circle::BuiltinOperator_##builtin_operator: \
+    return BuilderID::BuiltinOperator_##builtin_operator;
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+    default:
+      assert(false && "Unsupported operation");
+  }
+}
+
+class KernelConfigureRegistry
+{
+public:
+  using KernelConfigureFunc = void(const circle::Operator *, BaseRuntimeGraph *);
+
+  constexpr KernelConfigureRegistry() : _operator_configure()
+  {
+#define REGISTER_KERNEL(builtin_operator, name)                            \
+  register_kernel_configure(BuilderID::BuiltinOperator_##builtin_operator, \
+                            configure_kernel_Circle##name);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+  }
+
+  void configure_kernel(const circle::Operator *cur_op, circle::BuiltinOperator opcode,
+                        BaseRuntimeGraph *runtime_graph) const;
+
+private:
+  constexpr KernelConfigureFunc *get_kernel_configure_func(circle::BuiltinOperator opcode) const
+  {
+    const auto builder_id_opcode = size_t(get_builder_id(opcode));
+    assert(builder_id_opcode < size_t(BuilderID::Size));
+    return _operator_configure[builder_id_opcode];
+  }
+
+  constexpr void register_kernel_configure(BuilderID id, KernelConfigureFunc *func)
+  {
+    assert(size_t(id) < size_t(BuilderID::Size));
+    _operator_configure[size_t(id)] = func;
+  }
+
+private:
+  KernelConfigureFunc *_operator_configure[size_t(BuilderID::Size)];
+};
+
+class KernelExecuteRegistry
+{
+public:
+  using KernelExecuteFunc = void(const circle::Operator *, BaseRuntimeGraph *);
+
+  constexpr KernelExecuteRegistry() : _operator_execute()
+  {
+#define REGISTER_KERNEL(builtin_operator, name)                          \
+  register_kernel_execute(BuilderID::BuiltinOperator_##builtin_operator, \
+                          execute_kernel_Circle##name);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+  }
+
+  void execute_kernel(const circle::Operator *cur_op, circle::BuiltinOperator opcode,
+                      BaseRuntimeGraph *runtime_graph) const;
+
+private:
+  constexpr KernelExecuteFunc *get_kernel_execute_func(circle::BuiltinOperator opcode) const
+  {
+    const auto tmp = size_t(get_builder_id(opcode));
+    assert(tmp < size_t(BuilderID::Size));
+    return _operator_execute[tmp];
+  }
+
+  constexpr void register_kernel_execute(BuilderID id, KernelExecuteFunc *func)
+  {
+    assert(size_t(id) < size_t(BuilderID::Size));
+    _operator_execute[size_t(id)] = func;
+  }
+
+private:
+  KernelExecuteFunc *_operator_execute[size_t(BuilderID::Size)];
+};
+
+// Global constexpr kernel configure and kernel executor
+constexpr KernelConfigureRegistry kernel_configure;
+constexpr KernelExecuteRegistry kernel_executor;
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
diff --git a/onert-micro/luci-interpreter/src/kernels/L2Normalize.cpp b/onert-micro/luci-interpreter/src/kernels/L2Normalize.cpp
new file mode 100644
index 000000000..97c9db86b
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Normalize.h"
+#include "kernels/Utils.h"
+
+#include "PALL2Normalize.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
+  : KernelWithParams<L2NormParams>({input}, {output}, params)
+{
+}
+
+void L2Normalize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (output()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
+  }
+  LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void L2Normalize::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>(0);
+      break;
+    case DataType::U8:
+      eval<uint8_t>(input()->zero_point());
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> void L2Normalize::eval(int32_t zero_point) const
+{
+  tflite::L2NormalizationParams op_params{};
+  op_params.input_zero_point = zero_point;
+  luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+                                        getTensorData<T>(input()), getTensorShape(output()),
+                                        getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/L2Normalize.h b/onert-micro/luci-interpreter/src/kernels/L2Normalize.h
new file mode 100644
index 000000000..6c7dac698
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/L2Normalize.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+#define LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Normalize : public KernelWithParams<L2NormParams>
+{
+public:
+  L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval(int32_t zero_point) const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
diff --git a/onert-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp b/onert-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp
new file mode 100644
index 000000000..6f960e8b4
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/L2Normalize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::pair<float, int32_t> quant_param =
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class L2NormalizeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(L2NormalizeTest, DataTypes);
+
+TYPED_TEST(L2NormalizeTest, Simple)
+{
+  Check<TypeParam>({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1},
+                   {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05});
+}
+
+TEST(L2NormalizeTest, ActivationType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::RELU6;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/L2Pool2D.cpp b/onert-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
new file mode 100644
index 000000000..e465c220a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALL2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void L2Pool2D::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int batches = input()->shape().dim(0);
+  int height = input()->shape().dim(1);
+  int width = input()->shape().dim(2);
+  int channels_out = input()->shape().dim(3);
+
+  // Matching GetWindowedOutputSize in TensorFlow.
+  auto padding = params().padding;
+  int out_width, out_height;
+  out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
+  out_height =
+    computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+  _padding_width =
+    computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+  _padding_height =
+    computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize({batches, out_height, out_width, channels_out});
+}
+
+void L2Pool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      float activation_min, activation_max;
+      calculateActivationRange(params().activation, &activation_min, &activation_max);
+      tflite::PoolParams op_params;
+      op_params.stride_height = params().stride_height;
+      op_params.stride_width = params().stride_width;
+      op_params.filter_height = params().filter_height;
+      op_params.filter_width = params().filter_width;
+      op_params.padding_values.height = _padding_height;
+      op_params.padding_values.width = _padding_width;
+      op_params.float_activation_min = activation_min;
+      op_params.float_activation_max = activation_max;
+      luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+                                   getTensorData<float>(input()), getTensorShape(output()),
+                                   getTensorData<float>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/L2Pool2D.h b/onert-micro/luci-interpreter/src/kernels/L2Pool2D.h
new file mode 100644
index 000000000..d40f5f478
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/L2Pool2D.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+#define LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Pool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _padding_height = 0;
+  int32_t _padding_width = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2POOL2D_H
diff --git a/onert-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/onert-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp
new file mode 100644
index 000000000..7245456cb
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class L2Pool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(L2Pool2DTest, FloatNone)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -1, -6, 2,  4, //
+    -3, -2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.53553, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu1)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU_N1_TO_1;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.353553, 1.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu6)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU6;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.353553, 6.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingSame)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::SAME;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingSameStride)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::SAME;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
+  // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingValidStride)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.0, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LeakyRelu.cpp b/onert-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
new file mode 100644
index 000000000..7f032b5a0
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "SISOKernel.h"
+
+#include "PALReluCommon.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleLeakyRelu(const circle::Operator *cur_op,
+                                      BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
+                         Tensor::element_type(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input()) == Tensor::num_dims(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(kernel.input()) ==
+                         Tensor::num_elements(kernel.output()));
+}
+
+void execute_kernel_CircleLeakyRelu(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *input_data = runtime_graph->getDataByTensor(kernel.input());
+  assert(input_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(kernel.output());
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  const auto options = cur_op->builtin_options_as_LeakyReluOptions();
+
+  switch (Tensor::element_type(kernel.input()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      const float *input_data_float = kernels::getTensorData<float>(input_data);
+      float *output_data_float = kernels::getTensorData<float>(output_data);
+      if (is_inplace)
+      {
+        output_data_float = const_cast<float *>(input_data_float);
+      }
+
+      assert(output_data_float);
+      const int flat_size =
+        kernels::getTensorRuntimeShape(kernel.input(), runtime_graph).flatSize();
+
+      luci_interpreter_pal::ReLUCommon(flat_size, input_data_float, output_data_float,
+                                       options->alpha(), false);
+      break;
+    }
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type");
+  }
+
+  if (is_inplace)
+    runtime_graph->makeInplaceOperation(kernel.input(), kernel.output());
+}
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/onert-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp
new file mode 100644
index 000000000..049ba21fb
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/leaky_relu/FloatLeakyReLUKernel.h"
+#include "luci_interpreter/test_models/leaky_relu/NegLeakyReLUKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class LeakyReLUTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkLeakyReLUKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(LeakyReLUTest, Float_P)
+{
+  test_kernel::TestDataFloatLeakyReLU test_data_kernel;
+  std::vector<float> output_data_vector = checkLeakyReLUKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(LeakyReLUTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchLeakyReLUKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Less.cpp b/onert-micro/luci-interpreter/src/kernels/Less.cpp
new file mode 100644
index 000000000..4a688e463
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Less.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALComparisons.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+#ifndef DIS_QUANT
+void evalQuantized(const circle::Tensor *x, const circle::Tensor *y, const circle::Tensor *output,
+                   BaseRuntimeGraph *runtime_graph)
+{
+  auto x_data = kernels::getTensorData<uint8_t>(runtime_graph->getDataByTensor(x));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<uint8_t>(runtime_graph->getConstDataByTensor(x));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<uint8_t>(runtime_graph->getDataByTensor(y));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<uint8_t>(runtime_graph->getConstDataByTensor(y));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
+
+  int32_t x_multiplier;
+  int x_shift;
+
+  int32_t y_multiplier;
+  int y_shift;
+
+  kernels::quantizeMultiplierSmallerThanOneExp(Tensor::scale(x), &x_multiplier, &x_shift);
+  kernels::quantizeMultiplierSmallerThanOneExp(Tensor::scale(y), &y_multiplier, &y_shift);
+
+  luci_interpreter_pal::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -Tensor::zero_point(x); // Note the '-'
+  op_params.input1_shift = x_shift;
+  op_params.input1_multiplier = x_multiplier;
+  op_params.input2_offset = -Tensor::zero_point(y); // Note the '-'
+  op_params.input2_shift = y_shift;
+  op_params.input2_multiplier = y_multiplier;
+  op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
+
+  if (op_params.is_broadcast)
+  {
+    luci_interpreter_pal::BroadcastComparison4DSlowWithScaling<uint8_t>(
+      op_params, kernels::getTensorShape(x), x_data, kernels::getTensorShape(y), y_data,
+      kernels::getTensorShape(output), output_data, luci_interpreter_pal::LessFn);
+  }
+  else
+  {
+    const int64_t flat_size = kernels::getTensorShape(x).flatSize();
+    luci_interpreter_pal::ComparisonWithScaling<uint8_t>(op_params, flat_size, x_data, y_data,
+                                                         output_data, luci_interpreter_pal::LessFn);
+  }
+}
+#endif // DIS_QUANT
+
+template <typename T>
+void evalGeneric(const circle::Tensor *x, const circle::Tensor *y, const circle::Tensor *output,
+                 BaseRuntimeGraph *runtime_graph)
+{
+  auto x_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(x));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(x));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(y));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(y));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
+
+  luci_interpreter_pal::ComparisonParams op_params;
+  op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
+
+  if (op_params.is_broadcast)
+  {
+    luci_interpreter_pal::BroadcastComparison4DSlowNoScaling<T>(
+      op_params, kernels::getTensorShape(x), x_data, kernels::getTensorShape(y), y_data,
+      kernels::getTensorShape(output), output_data, luci_interpreter_pal::LessFn);
+  }
+  else
+  {
+    const int64_t flat_size = kernels::getTensorShape(x).flatSize();
+    luci_interpreter_pal::ComparisonNoScaling<T>(flat_size, x_data, y_data, output_data,
+                                                 luci_interpreter_pal::LessFn);
+  }
+}
+
+} // namespace
+
+void configure_kernel_CircleLess(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::BOOL);
+}
+
+void execute_kernel_CircleLess(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+    case DataType::S64:
+      evalGeneric<int64_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#ifndef DIS_QUANT
+    case DataType::U8:
+      evalQuantized(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#endif // DIS_QUANT
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalGeneric<float>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Less.test.cpp b/onert-micro/luci-interpreter/src/kernels/Less.test.cpp
new file mode 100644
index 000000000..08279e9d1
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Less.test.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/less/FloatLessKernel.h"
+#include "luci_interpreter/test_models/less/IntLessKernel.h"
+#include "luci_interpreter/test_models/less/QuantLessKernel.h"
+#include "luci_interpreter/test_models/less/NegTestDataLessKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class LessTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename U>
+std::vector<U> checkLessKernel(test_kernel::TestDataBase<T, U> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  U *output_data = reinterpret_cast<U *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(U));
+  std::vector<U> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(LessTest, FloatNoBroadcast_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatLess test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkLessKernel<float, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LessTest, FloatWithBroadcast_P)
+{
+  const bool is_with_broadcast = true;
+  test_kernel::TestDataFloatLess test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkLessKernel<float, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LessTest, FloatNoBroadcast_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatLess test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkLessKernel(&test_data_kernel), "");
+}
+
+TEST_F(LessTest, FloatWithBroadcast_NEG)
+{
+  const bool is_with_broadcast = true;
+  test_kernel::TestDataFloatLess test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkLessKernel(&test_data_kernel), "");
+}
+
+TEST_F(LessTest, IntWithBroadcast_P)
+{
+  const bool is_with_broadcast = true;
+  test_kernel::TestDataIntLess test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkLessKernel<int32_t, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LessTest, IntNoBroadcast_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataIntLess test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkLessKernel<int32_t, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LessTest, IntWithBroadcast_NEG)
+{
+  const bool is_with_broadcast = true;
+  test_kernel::TestDataIntLess test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkLessKernel(&test_data_kernel), "");
+}
+
+TEST_F(LessTest, IntNoBroadcast_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataIntLess test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkLessKernel(&test_data_kernel), "");
+}
+
+TEST_F(LessTest, Quant_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataQuantLess test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkLessKernel<uint8_t, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LessTest, Quant_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataQuantLess test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkLessKernel(&test_data_kernel), "");
+}
+
+TEST_F(LessTest, Wrong_Output_Type_NEG)
+{
+  test_kernel::NegTestDataLessKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LessEqual.cpp b/onert-micro/luci-interpreter/src/kernels/LessEqual.cpp
new file mode 100644
index 000000000..8928ba4e1
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LessEqual.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALComparisons.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+// TODO: reduce code duplication with less
+template <typename T>
+void evalGeneric(const circle::Tensor *x, const circle::Tensor *y, const circle::Tensor *output,
+                 BaseRuntimeGraph *runtime_graph)
+{
+  auto x_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(x));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(x));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(y));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(y));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
+
+  luci_interpreter_pal::ComparisonParams op_params;
+  op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
+
+  const int64_t flat_size = kernels::getTensorShape(x).flatSize();
+  luci_interpreter_pal::ComparisonNoScaling<T>(flat_size, x_data, y_data, output_data,
+                                               luci_interpreter_pal::LessEqualFn);
+}
+
+} // namespace
+
+void configure_kernel_CircleLessEqual(const circle::Operator *cur_op,
+                                      BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::BOOL);
+}
+
+void execute_kernel_CircleLessEqual(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+    case DataType::S64:
+      evalGeneric<int64_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalGeneric<float>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LessEqual.test.cpp b/onert-micro/luci-interpreter/src/kernels/LessEqual.test.cpp
new file mode 100644
index 000000000..e53dc05a4
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/less_equal/FloatLessEqualKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class LessEqualTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename U>
+std::vector<U> checkLessEqualKernel(test_kernel::TestDataBase<T, U> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  U *output_data = reinterpret_cast<U *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(U));
+  std::vector<U> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(LessEqualTest, FloatNoBroadcast_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatLessEqual test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkLessEqualKernel<float, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LessEqualTest, FloatNoBroadcast_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatLessEqual test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkLessEqualKernel(&test_data_kernel), "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..bf08db0a3
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+
+#include "kernels/Utils.h"
+
+#include "PALLocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LocalResponseNormalization::LocalResponseNormalization(
+  const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
+  : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+{
+}
+
+void LocalResponseNormalization::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void LocalResponseNormalization::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::LocalResponseNormalizationParams op_params;
+      op_params.range = params().radius;
+      op_params.bias = params().bias;
+      op_params.alpha = params().alpha;
+      op_params.beta = params().beta;
+      luci_interpreter_pal::LocalResponseNormalization(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h b/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h
new file mode 100644
index 000000000..60408a104
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LocalResponseNormalization : public KernelWithParams<LocalResponseNormalizationParams>
+{
+public:
+  LocalResponseNormalization(const Tensor *input, Tensor *output,
+                             const LocalResponseNormalizationParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
diff --git a/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
new file mode 100644
index 000000000..4a9d4739f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LocalResponseNormalizationTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LocalResponseNormalizationTest, SameAsL2Norm)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
+}
+
+TEST_F(LocalResponseNormalizationTest, WithAlpha)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}));
+}
+
+TEST_F(LocalResponseNormalizationTest, WithBias)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 9.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}));
+}
+
+TEST_F(LocalResponseNormalizationTest, SmallRadius)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 2;
+  params.bias = 9.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}));
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogSoftmax.cpp b/onert-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
new file mode 100644
index 000000000..b467cb06b
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
+
+#include "PALLogSoftmax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogSoftmax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
+
+    tflite::SoftmaxParams params{};
+
+    params.table = _table;
+    params.beta = 1.0;
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void LogSoftmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void LogSoftmax::evalFloat() const
+{
+  tflite::SoftmaxParams params{};
+  tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
+                                    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LogSoftmax::evalQuantized() const
+{
+  const auto input_shape = getTensorShape(input());
+  const auto output_shape = getTensorShape(output());
+  const auto input_scale = input()->scale();
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  const float beta = 1.0;
+
+  tflite::SoftmaxParams params{};
+
+  params.table = const_cast<float *>(_table);
+  params.zero_point = output()->zero_point();
+  params.scale = output()->scale();
+
+  luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
+  luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                   output_data);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogSoftmax.h b/onert-micro/luci-interpreter/src/kernels/LogSoftmax.h
new file mode 100644
index 000000000..18477fbe3
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogSoftmax.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogSoftmax : public Kernel
+{
+public:
+  LogSoftmax(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+  float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
diff --git a/onert-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/onert-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp
new file mode 100644
index 000000000..50dcd5c28
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogSoftmaxTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogSoftmaxTest, Float)
+{
+  Shape input_shape{2, 4};
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(LogSoftmaxTest, Uint8)
+{
+  float kMin = -10;
+  float kMax = 10;
+  float kLogSoftmaxQuantizedTolerance = 16. / 256;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
+}
+
+TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG)
+{
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalAnd.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
new file mode 100644
index 000000000..7e440cca4
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALLogicalCommon.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+bool LogicalAnd(bool x, bool y) { return x && y; }
+} // namespace
+
+void configure_kernel_CircleLogicalAnd(const circle::Operator *cur_op,
+                                       BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) == DataType::BOOL);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::BOOL);
+
+  // TODO support broadcast
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(kernel.input1()) ==
+                         Tensor::num_elements(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input1()) == Tensor::num_dims(kernel.input2()));
+}
+
+// TODO: add inplace
+// TODO: reduce code duplication with LogicalOr
+void execute_kernel_CircleLogicalAnd(const circle::Operator *cur_op,
+                                     BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  auto x_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(kernel.input1()));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<bool>(runtime_graph->getConstDataByTensor(kernel.input1()));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(kernel.input2()));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<bool>(runtime_graph->getConstDataByTensor(kernel.input2()));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(kernel.output()));
+
+  const int64_t flat_size = kernels::getTensorShape(kernel.input1()).flatSize();
+  luci_interpreter_pal::LogicalCommon(flat_size, x_data, y_data, output_data, LogicalAnd);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp
new file mode 100644
index 000000000..2f72c0832
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/logical_and/BoolLogicalAndKernel.h"
+#include "luci_interpreter/test_models/logical_and/NegLogicalAndKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalAndTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkLogicalAndKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(LogicalAndTest, Bool_P)
+{
+  test_kernel::TestDataBoolLogicalAnd test_data_kernel;
+  std::vector<bool> output_data_vector = checkLogicalAndKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LogicalAndTest, Input_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputTypeMismatchLogicalAndKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalNot.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalNot.cpp
new file mode 100644
index 000000000..4ba4499e9
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogicalNot.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogicalNot::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void LogicalNot::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalNot();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+inline void LogicalNot::evalLogicalNot() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  bool *output_data = getTensorData<bool>(output());
+  const bool *input_data = getTensorData<bool>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = !input_data[i];
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp
new file mode 100644
index 000000000..3cbf27f6b
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalNotTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalNotTest, Basic)
+{
+  Shape input_shape{1, 1, 1, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(false, true, true, false));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalNotTest, OutputTypeInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                        _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalNotTest, InputTypeInvalid_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalOr.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalOr.cpp
new file mode 100644
index 000000000..207c73964
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogicalOr.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALLogicalCommon.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+bool LogicalOr(bool x, bool y) { return x || y; }
+} // namespace
+
+void configure_kernel_CircleLogicalOr(const circle::Operator *cur_op,
+                                      BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) == DataType::BOOL);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::BOOL);
+
+  // TODO support broadcast
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(kernel.input1()) ==
+                         Tensor::num_elements(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input1()) == Tensor::num_dims(kernel.input2()));
+}
+
+// TODO: add inplace
+// TODO: reduce code duplication with LogicalAnd
+void execute_kernel_CircleLogicalOr(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  auto x_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(kernel.input1()));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<bool>(runtime_graph->getConstDataByTensor(kernel.input1()));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(kernel.input2()));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<bool>(runtime_graph->getConstDataByTensor(kernel.input2()));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(kernel.output()));
+
+  const int64_t flat_size = kernels::getTensorShape(kernel.input1()).flatSize();
+  luci_interpreter_pal::LogicalCommon(flat_size, x_data, y_data, output_data, LogicalOr);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp
new file mode 100644
index 000000000..baa338d5a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/logical_or/BoolLogicalOrKernel.h"
+#include "luci_interpreter/test_models/logical_or/NegLogicalOrKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalOrTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkLogicalOrKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(LogicalOrTest, Bool_P)
+{
+  test_kernel::TestDataBoolLogicalOr test_data_kernel;
+  std::vector<bool> output_data_vector = checkLogicalOrKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LogicalOrTest, Input_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputTypeMismatchLogicalOrKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Logistic.cpp b/onert-micro/luci-interpreter/src/kernels/Logistic.cpp
new file mode 100644
index 000000000..4dbc15356
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Logistic.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "PALLogistic.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleLogistic(const circle::Operator *cur_op,
+                                     BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+
+#ifndef DIS_QUANT
+  if (Tensor::element_type(input) == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::scale(output) == 1. / 256);
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleLogistic(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  const uint8_t *input_data = runtime_graph->getDataByTensor(input);
+  uint8_t *output_data = runtime_graph->getDataByTensor(output);
+
+  if (is_inplace)
+  {
+    output_data = const_cast<uint8_t *>(input_data);
+  }
+
+  assert(input_data != nullptr);
+  assert(output_data != nullptr);
+
+  const int flat_size = kernels::getTensorRuntimeShape(input, runtime_graph).flatSize();
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Logistic(flat_size, kernels::getTensorData<float>(input_data),
+                                     kernels::getTensorData<float>(output_data));
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      luci_interpreter_pal::Logistic(flat_size, kernels::getTensorData<int8_t>(input_data),
+                                     Tensor::scale(input), Tensor::zero_point(input),
+                                     kernels::getTensorData<int8_t>(output_data),
+                                     Tensor::scale(output), Tensor::zero_point(output));
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+
+  if (is_inplace)
+  {
+    runtime_graph->makeInplaceOperation(input, output);
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Logistic.test.cpp b/onert-micro/luci-interpreter/src/kernels/Logistic.test.cpp
new file mode 100644
index 000000000..36e347038
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/logistic/FloatLogisticKernel.h"
+#include "luci_interpreter/test_models/logistic/NegLogisticKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class LogisticTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkLogisticKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(LogisticTest, Float_P)
+{
+  test_kernel::TestDataFloatLogistic test_data_kernel;
+  std::vector<float> output_data_vector = checkLogisticKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(LogisticTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchLogisticKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(LogisticTest, No_quant_params_NEG)
+{
+  test_kernel::NegTestDataNoQuantParamsLogisticKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+// TODO: add S8 test
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/MISOKernel.h b/onert-micro/luci-interpreter/src/kernels/MISOKernel.h
new file mode 100644
index 000000000..96d20af3f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/MISOKernel.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MISO_KERNEL_H
+#define LUCI_INTERPRETER_KERNELS_MISO_KERNEL_H
+
+#include "Builders.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Multiple input single output kernel
+class MISOKernel
+{
+public:
+  MISOKernel(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+  {
+    const auto input1_index = cur_op->inputs()->operator[](0);
+    const auto input2_index = cur_op->inputs()->operator[](1);
+    const auto input3_index = cur_op->inputs()->operator[](2);
+    const auto input4_index =
+      cur_op->inputs()->size() == 4 ? cur_op->inputs()->operator[](3) : -1; // optional
+
+    const auto output_index = cur_op->outputs()->operator[](0);
+
+    assert(input1_index != -1);
+    assert(input2_index != -1);
+    assert(input3_index != -1);
+
+    assert(output_index != -1);
+
+    _input1_tensor = runtime_graph->getCircleTensorByIndex(input1_index);
+    _input2_tensor = runtime_graph->getCircleTensorByIndex(input2_index);
+    _input3_tensor = runtime_graph->getCircleTensorByIndex(input3_index);
+    _output_tensor = runtime_graph->getCircleTensorByIndex(output_index);
+
+    // optional
+    if (input4_index != -1)
+      _input4_tensor = runtime_graph->getCircleTensorByIndex(input4_index);
+    else
+      _input4_tensor = nullptr;
+
+    assert(_input1_tensor != nullptr);
+    assert(_input2_tensor != nullptr);
+    assert(_input3_tensor != nullptr);
+    assert(_output_tensor != nullptr);
+  }
+
+  const circle::Tensor *input1() const { return _input1_tensor; }
+  const circle::Tensor *input2() const { return _input2_tensor; }
+  const circle::Tensor *input3() const { return _input3_tensor; }
+
+  const circle::Tensor *input4() const
+  {
+    assert(_input4_tensor != nullptr);
+    return _input4_tensor;
+  }
+
+  const circle::Tensor *output() const { return _output_tensor; }
+
+private:
+  const circle::Tensor *_input1_tensor;
+  const circle::Tensor *_input2_tensor;
+  const circle::Tensor *_input3_tensor;
+  const circle::Tensor *_input4_tensor; // optional
+  const circle::Tensor *_output_tensor;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MISO_KERNEL_H
diff --git a/onert-micro/luci-interpreter/src/kernels/MaxPool2D.cpp b/onert-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
new file mode 100644
index 000000000..d85e8a980
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Utils.h"
+#include "PALMaxPool2D.h"
+
+namespace luci_interpreter
+{
+void configure_kernel_CircleMaxPool2D(const circle::Operator *cur_op,
+                                      BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+  assert(Tensor::num_dims(input) == 4);
+
+#ifndef DIS_QUANT
+  if (Tensor::element_type(input) == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(Tensor::scale(output) - Tensor::scale(input)) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(Tensor::zero_point(output) == Tensor::zero_point(input));
+  }
+  else if (Tensor::element_type(input) == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(Tensor::scale(output) - Tensor::scale(input)) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(Tensor::zero_point(input) == 0 && Tensor::zero_point(output) == 0);
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleMaxPool2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const auto *options = cur_op->builtin_options_as_Pool2DOptions();
+
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t input_width = Tensor::dim(input, 2);
+
+  const int32_t output_height = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_height, options->filter_height(), options->stride_h());
+  const int32_t output_width = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_width, options->filter_width(), options->stride_w());
+
+  const auto padding_height = kernels::computePadding(options->stride_h(), 1, input_height,
+                                                      options->filter_height(), output_height);
+  const auto padding_width = kernels::computePadding(options->stride_w(), 1, input_width,
+                                                     options->filter_width(), output_width);
+
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  float activation_min{};
+  float activation_max{};
+  kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+                                    &activation_min, &activation_max);
+  luci_interpreter_pal::PoolParams params{};
+  params.padding_values.height = padding_height;
+  params.padding_values.width = padding_width;
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.filter_height = options->filter_height();
+  params.filter_width = options->filter_width();
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      luci_interpreter_pal::MaxPool(
+        params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+        kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::U8:
+      luci_interpreter_pal::MaxPool(
+        params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
+        kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data));
+      break;
+    case DataType::S16:
+      luci_interpreter_pal::MaxPool(
+        params, kernels::getTensorShape(input), kernels::getTensorData<int16_t>(input_data),
+        kernels::getTensorShape(output), kernels::getTensorData<int16_t>(output_data));
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/onert-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
new file mode 100644
index 000000000..d7af7d7c5
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/maxpool2d/FloatMaxPool2DKernel.h"
+#include "luci_interpreter/test_models/maxpool2d/NegMaxPool2DKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class MaxPool2DTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkMaxPool2DKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(MaxPool2DTest, Float_P)
+{
+  test_kernel::TestDataFloatMaxPool2D test_data_kernel;
+  std::vector<float> output_data_vector = checkMaxPool2DKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.01f));
+}
+
+TEST_F(MaxPool2DTest, InputOutputTypeMismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchMaxPool2DKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(MaxPool2DTest, Invalid_input_shape_NEG)
+{
+  test_kernel::NegTestDataInvalidInputShapeMaxPool2DKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(MaxPool2DTest, No_quant_params_NEG)
+{
+  test_kernel::NegTestDataNoQuantParamsMaxPool2DKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add S16 test
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Maximum.cpp b/onert-micro/luci-interpreter/src/kernels/Maximum.cpp
new file mode 100644
index 000000000..1a7ee4c72
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Maximum.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Maximum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Maximum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMaximum<float>();
+      break;
+    case DataType::U8:
+      evalMaximum<uint8_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> inline void Maximum::evalMaximum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::max(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Maximum.h b/onert-micro/luci-interpreter/src/kernels/Maximum.h
new file mode 100644
index 000000000..3c99e69c7
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Maximum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Maximum : public Kernel
+{
+public:
+  Maximum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMaximum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Maximum.test.cpp b/onert-micro/luci-interpreter/src/kernels/Maximum.test.cpp
new file mode 100644
index 000000000..e4a505b03
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Maximum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaximumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaximumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MaximumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Mean.cpp b/onert-micro/luci-interpreter/src/kernels/Mean.cpp
new file mode 100644
index 000000000..4128aa68d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Mean.cpp
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
+{
+  params->axis_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    params->axis[i] = static_cast<int16>(axes_data[i]);
+  }
+  for (int i = num_axes; i < 4; ++i)
+  {
+    params->axis[i] = 1;
+  }
+}
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+           Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+                                    params)
+{
+}
+
+void Mean::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  assert(num_axes <= 4);
+  // TODO: enable it only if kernel with dynamic shapes
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+  _need_temporaries = !(
+    _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+    ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
+  if (_need_temporaries)
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->resize(Shape(input_num_dims));
+    resolved_axes->resize(Shape(num_axes));
+    temp_sum->resize(output()->shape());
+  }
+  else
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->set_allocatable(false);
+    resolved_axes->set_allocatable(false);
+    temp_sum->set_allocatable(false);
+  }
+}
+
+void Mean::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Mean::evalFloat() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<float>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<float>(temp_sum));
+  }
+}
+
+void Mean::evalQuantized() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                input()->zero_point(), input()->scale(), getTensorShape(output()),
+                                getTensorData<uint8_t>(output()), output()->zero_point(),
+                                output()->scale());
+  }
+  else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
+  {
+    tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<int>(temp_sum));
+  }
+  else
+  {
+    tflite::reference_ops::QuantizedMeanOrSum<>(
+      getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+      getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+      getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+      _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+      getTensorData<int>(temp_sum),
+      /*compute_sum=*/false);
+  }
+}
+
+void Mean::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  const int num_axes = axes()->shape().num_elements();
+
+  constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+      ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+  {
+    const int32_t batches = input_shape.dim(0);
+    const int32_t input_height = input_shape.dim(1);
+    const int32_t input_width = input_shape.dim(2);
+    const int32_t depth = input_shape.dim(3);
+    assert(output_shape.num_dims() == 4);
+    assert(output_shape.dim(0) == batches);
+    assert(output_shape.dim(1) == 1);
+    assert(output_shape.dim(2) == 1);
+    assert(output_shape.dim(3) == depth);
+
+    const double real_multiplier =
+      static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+    int32_t output_multiplier{};
+    int output_shift{};
+    quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+    const int32_t num_elements_in_axes = input_height * input_width;
+
+    for (int32_t batch = 0; batch < batches; ++batch)
+    {
+      for (int32_t c = 0; c < depth; ++c)
+      {
+        int32_t acc = 0;
+        for (int32_t in_y = 0; in_y < input_height; ++in_y)
+        {
+          for (int32_t in_x = 0; in_x < input_width; ++in_x)
+          {
+            acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+          }
+        }
+        int32_t scaled_acc =
+          tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+        // Divide by the number of elements rounding to the nearest integer.
+        scaled_acc = scaled_acc > 0
+                       ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+                       : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+        scaled_acc = std::max(scaled_acc, output_min);
+        scaled_acc = std::min(scaled_acc, output_max);
+
+        output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+      }
+    }
+  }
+  else
+  {
+    assert(false && "Unsupported configuration.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Mean.h b/onert-micro/luci-interpreter/src/kernels/Mean.h
new file mode 100644
index 000000000..ed07ae561
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Mean.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MEAN_H
+#define LUCI_INTERPRETER_KERNELS_MEAN_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mean : public KernelWithParams<ReducerParams>
+{
+public:
+  Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+       Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  bool _need_temporaries = false;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MEAN_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Mean.test.cpp b/onert-micro/luci-interpreter/src/kernels/Mean.test.cpp
new file mode 100644
index 000000000..d2c00935a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Mean.test.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MeanTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MeanTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{10.5, 12.5, 14.5};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, FloatKeepDims4DMean)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+  std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0, -3, -3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{12, 13};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, Uint8KeepDims)
+{
+  float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+  std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  std::vector<int32_t> axis_data{1};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.3, 0.35, 0.55};
+  std::initializer_list<int32_t> ref_output_shape{3, 1};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, Uint8NotKeepDims)
+{
+  float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+  std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  std::vector<int32_t> axis_data{1};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.4, 0.4};
+  std::initializer_list<int32_t> ref_output_shape{1, 2};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, SInt16KeepDims4D)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  std::vector<int32_t> axes_data{1, 2};
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get());
+  Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Minimum.cpp b/onert-micro/luci-interpreter/src/kernels/Minimum.cpp
new file mode 100644
index 000000000..f74e6c039
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Minimum.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Minimum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Minimum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMinimum<float>();
+      break;
+    case DataType::U8:
+      evalMinimum<uint8_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> inline void Minimum::evalMinimum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::min(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Minimum.h b/onert-micro/luci-interpreter/src/kernels/Minimum.h
new file mode 100644
index 000000000..5ff4035b4
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Minimum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MINIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Minimum : public Kernel
+{
+public:
+  Minimum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMinimum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Minimum.test.cpp b/onert-micro/luci-interpreter/src/kernels/Minimum.test.cpp
new file mode 100644
index 000000000..9a143643f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Minimum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MinimumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MinimumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MinimumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/MirrorPad.cpp b/onert-micro/luci-interpreter/src/kernels/MirrorPad.cpp
new file mode 100644
index 000000000..d9e60b060
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+
+#include "kernels/Utils.h"
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+                     const MirrorPadParams &params)
+  : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
+{
+}
+
+void MirrorPad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    assert(false && "Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output);
+
+void MirrorPad::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+
+      MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output)
+{
+  auto const input_dims = input.shape().num_dims();
+  auto const input_data = input.data<T>();
+  auto const paddings_data = paddings.data<int32_t>();
+  auto const output_data = output.data<T>();
+
+  auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+  auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+  auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+  auto const input_d = input.shape().dim(input_dims - 1);
+
+  auto const input_h_offset = input_d * input_w;
+  auto const input_b_offset = input_h_offset * input_h;
+
+  auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+  auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+  auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+  auto const output_d = output.shape().dim(input_dims - 1);
+
+  auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+  auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+  auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+  auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+  auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+  auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+  auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+  auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+  const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+  const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+                                                                      auto b) {
+    return d + w * input_d + h * input_h_offset + b * input_b_offset;
+  };
+
+  const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+    bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+    return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+  };
+
+  const T *in_ptr = input_data;
+  T *out_ptr = output_data;
+
+  for (int32_t b = 0; b < output_b; ++b)
+  {
+    for (int32_t h = 0; h < output_h; ++h)
+    {
+      for (int32_t w = 0; w < output_w; ++w)
+      {
+        for (int32_t d = 0; d < output_d; ++d)
+        {
+          if (b < left_b_pad || b >= output_b - right_b_pad || //
+              h < left_h_pad || h >= output_h - right_h_pad || //
+              w < left_w_pad || w >= output_w - right_w_pad || //
+              d < left_d_pad || d >= output_d - right_d_pad)
+          {
+            if (mode == MirrorPadMode::REFLECT)
+            {
+              *out_ptr++ = input_data[offset_index(
+                positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+                positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+            }
+            else
+            {
+              *out_ptr++ = input_data[offset_index(
+                symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+                symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+            }
+          }
+          else
+          {
+            *out_ptr++ = *in_ptr++;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/MirrorPad.h b/onert-micro/luci-interpreter/src/kernels/MirrorPad.h
new file mode 100644
index 000000000..d3e6e858a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/MirrorPad.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+#define LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MirrorPad : public KernelWithParams<MirrorPadParams>
+{
+public:
+  MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+            const MirrorPadParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
diff --git a/onert-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp b/onert-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp
new file mode 100644
index 000000000..740d8cb22
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MirrorPadTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode)
+  {
+    MirrorPadParams params{};
+    params.mode = mode;
+
+    MirrorPad kernel(&input, &padding, &output, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output);
+    kernel.execute();
+  }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MirrorPadTest, FloatReflect)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric2Dim)
+{
+  Shape input_shape = {3, 1};
+  Shape padding_shape = {2, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f};
+  std::vector<int> padding_data{1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0};
+  std::initializer_list<int32_t> ref_output_shape{6, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Reflect)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Symmetric)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, UnsupportedDim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+TEST_F(MirrorPadTest, InvalidInputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Mul.cpp b/onert-micro/luci-interpreter/src/kernels/Mul.cpp
new file mode 100644
index 000000000..75f9b9047
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Mul.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+#include "PALMul.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleMul(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+#ifndef DIS_QUANT
+  if (Tensor::element_type(kernel.input1()) == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::zero_points(kernel.input1()).size() == 1 &&
+                           Tensor::zero_points(kernel.input2()).size() == 1);
+    LUCI_INTERPRETER_CHECK(Tensor::zero_point(kernel.input1()) == 0 &&
+                           Tensor::zero_point(kernel.input2()) == 0 &&
+                           Tensor::zero_point(kernel.output()) == 0);
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleMul(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *options = cur_op->builtin_options_as_MulOptions();
+
+  luci_interpreter::RuntimeShape input_shape1 =
+    kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph);
+  luci_interpreter::RuntimeShape input_shape2 =
+    kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      auto tiso_func = luci_interpreter_pal::Mul<float>;
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastMul4DSlow<float>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                              std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                       options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+#endif // DIS_FLOAT
+    case DataType::S64:
+    {
+      auto tiso_func = luci_interpreter_pal::Mul<int64_t>;
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastMul4DSlow<int64_t>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                                std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                         options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+    case DataType::S32:
+    {
+      auto tiso_func = luci_interpreter_pal::Mul<int32_t>;
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastMul4DSlow<int32_t>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                                std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                         options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+#if 0
+#ifndef DIS_QUANT
+      // TODO: check quantize Mul
+    case DataType::U8:
+    {
+      auto tiso_func = [](const luci_interpreter_pal::ArithmeticParams &params,
+                          const luci_interpreter::RuntimeShape &input1_shape, const uint8_t *input1_data,
+                          const luci_interpreter::RuntimeShape &input2_shape, const uint8_t *input2_data,
+                          const luci_interpreter::RuntimeShape &output_shape, uint8_t *output_data) {
+        luci_interpreter_pal::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
+                                  output_shape, output_data);
+      };
+      auto broadcast_tiso_func =
+        [](const luci_interpreter_pal::ArithmeticParams &params, const luci_interpreter::RuntimeShape &input1_shape,
+           const uint8_t *input1_data, const luci_interpreter::RuntimeShape &input2_shape,
+           const uint8_t *input2_data, const luci_interpreter::RuntimeShape &output_shape,
+           uint8_t *output_data) {
+          luci_interpreter_pal::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                                   input2_data, output_shape, output_data);
+        };
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceQuantizedKernel<uint8_t>(tiso_func, broadcast_tiso_func, &kernel,
+                                                         options);
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOQuantizedKernel<uint8_t>(tiso_func, broadcast_tiso_func, &kernel,
+                                                  &kernel_data, options);
+      }
+    }
+    break;
+#endif // DIS_QUANT
+#endif // 0
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Mul.test.cpp b/onert-micro/luci-interpreter/src/kernels/Mul.test.cpp
new file mode 100644
index 000000000..391c783a1
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Mul.test.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/mul/FloatMulKernel.h"
+#include "luci_interpreter/test_models/mul/IntMulKernel.h"
+#include "luci_interpreter/test_models/mul/NegMulKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class MulTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkMulKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(MulTest, Float_P)
+{
+  // No broadcast
+  {
+    const bool is_with_broadcast = false;
+    test_kernel::TestDataFloatMul test_data_kernel(is_with_broadcast);
+    std::vector<float> output_data_vector = checkMulKernel(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                      test_data_kernel.get_output_data_by_index(0), 0.0001f));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestDataFloatMul test_data_kernel(is_with_broadcast);
+    std::vector<float> output_data_vector = checkMulKernel(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                      test_data_kernel.get_output_data_by_index(0), 0.0001f));
+  }
+}
+
+TEST_F(MulTest, INT_P)
+{
+  // No broadcast
+  {
+    const bool is_with_broadcast = false;
+    test_kernel::TestDataIntMul test_data_kernel(is_with_broadcast);
+    const auto output_data_vector = checkMulKernel<int32_t>(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestDataIntMul test_data_kernel(is_with_broadcast);
+    const auto output_data_vector = checkMulKernel<int32_t>(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+  }
+}
+
+TEST_F(MulTest, Wrong_Input1_Type_NEG)
+{
+  test_kernel::NegTestDataInput1WrongTypeMul test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(MulTest, Wrong_Input2_Type_NEG)
+{
+  test_kernel::NegTestDataInput2WrongTypeMul test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(MulTest, Wrong_Ouput_Type_NEG)
+{
+  test_kernel::NegTestDataInt16TypeMul test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add tests for U8 and S16
+// TODO: add tests for inplace optimizations for all types
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Neg.cpp b/onert-micro/luci-interpreter/src/kernels/Neg.cpp
new file mode 100644
index 000000000..3f08df319
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Neg.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+#include "PALNeg.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleNeg(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+
+  assert(Tensor::num_dims(input) == 4);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  // output-> resize(input->shape());
+}
+
+void execute_kernel_CircleNeg(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const uint8_t *input_data = runtime_graph->getDataByTensor(input);
+  uint8_t *output_data = runtime_graph->getDataByTensor(output);
+
+  assert(input_data != nullptr);
+  assert(output_data != nullptr);
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+
+      luci_interpreter_pal::Negate(
+        kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+        kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Neg.test.cpp b/onert-micro/luci-interpreter/src/kernels/Neg.test.cpp
new file mode 100644
index 000000000..0cb0a272b
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Neg.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/neg/FloatNegKernel.h"
+#include "luci_interpreter/test_models/neg/NegNegKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class NegTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkNegKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(NegTest, Float_P)
+{
+  test_kernel::TestDataFloatNeg test_data_kernel;
+  std::vector<float> output_data_vector = checkNegKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(NegTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchNegKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(NegTest, Invalid_input_shape_NEG)
+{
+  test_kernel::NegTestDataInvalidInputShapeNegKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/NotEqual.cpp b/onert-micro/luci-interpreter/src/kernels/NotEqual.cpp
new file mode 100644
index 000000000..92f646f95
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/NotEqual.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALComparisons.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+// TODO: reduce code duplication with less
+template <typename T>
+void evalGeneric(const circle::Tensor *x, const circle::Tensor *y, const circle::Tensor *output,
+                 BaseRuntimeGraph *runtime_graph)
+{
+  auto x_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(x));
+  if (x_data == nullptr)
+    x_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(x));
+
+  assert(x_data != nullptr);
+
+  auto y_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(y));
+  if (y_data == nullptr)
+    y_data = kernels::getTensorData<T>(runtime_graph->getConstDataByTensor(y));
+
+  assert(y_data != nullptr);
+
+  auto output_data = kernels::getTensorData<bool>(runtime_graph->getDataByTensor(output));
+
+  luci_interpreter_pal::ComparisonParams op_params;
+  op_params.is_broadcast = Tensor::num_elements(x) != Tensor::num_elements(y);
+
+  const int64_t flat_size = kernels::getTensorShape(x).flatSize();
+  luci_interpreter_pal::ComparisonNoScaling<T>(flat_size, x_data, y_data, output_data,
+                                               luci_interpreter_pal::NotEqualFn);
+}
+
+} // namespace
+
+void configure_kernel_CircleNotEqual(const circle::Operator *cur_op,
+                                     BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::BOOL);
+}
+
+void execute_kernel_CircleNotEqual(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+    case DataType::S64:
+      evalGeneric<int64_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalGeneric<float>(kernel.input1(), kernel.input2(), kernel.output(), runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/NotEqual.test.cpp b/onert-micro/luci-interpreter/src/kernels/NotEqual.test.cpp
new file mode 100644
index 000000000..520a02ea5
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/notequal/FloatNotEqualKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class NotEqualTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename U>
+std::vector<U> checkNotEqualKernel(test_kernel::TestDataBase<T, U> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  U *output_data = reinterpret_cast<U *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(U));
+  std::vector<U> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(NotEqualTest, FloatNoBroadcast_P)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatNotEqual test_data_kernel(is_with_broadcast, false);
+  std::vector<bool> output_data_vector = checkNotEqualKernel<float, bool>(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(NotEqualTest, FloatNoBroadcast_NEG)
+{
+  const bool is_with_broadcast = false;
+  test_kernel::TestDataFloatNotEqual test_data_kernel(is_with_broadcast, true);
+  EXPECT_DEATH(checkNotEqualKernel(&test_data_kernel), "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/OneHot.cpp b/onert-micro/luci-interpreter/src/kernels/OneHot.cpp
new file mode 100644
index 000000000..fa99895fe
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/OneHot.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+                       const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+                       Tensor *output_tensor)
+{
+  // define input shape and correct axis
+  auto const &input_shape = indices_tensor->shape();
+  axis = axis == -1 ? input_shape.num_dims() : axis;
+
+  // TODO support other integer input types
+  auto const *indices = getTensorData<int32_t>(indices_tensor);
+  auto const on_value = getTensorData<T>(on_value_tensor)[0];
+  auto const off_value = getTensorData<T>(off_value_tensor)[0];
+  auto *output = getTensorData<T>(output_tensor);
+
+  // prefix_dim_size == # of elements before the axis
+  // depth == # of elements per axis
+  // suffix_dim_size == # of elements after the axis
+  auto prefix_dim_size = 1;
+  for (int32_t i = 0; i < axis; ++i)
+  {
+    prefix_dim_size *= input_shape.dim(i);
+  }
+  assert(prefix_dim_size > 0);
+  auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+  // View the indices as a matrix of size:
+  //     prefix_dim_size x suffix_dim_size
+  // View the output as a matrix of size:
+  //     prefix_dim_size x depth x suffix_dim_size
+  // Then the output is:
+  //     output(i, j, k) == (indices(i, k) == j) ? on : off
+  for (int32_t i = 0; i < prefix_dim_size; ++i)
+    for (int32_t j = 0; j < depth; ++j)
+      for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+        *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+               const Tensor *off_value, Tensor *output, const OneHotParams &params)
+  : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+  // Do nothing
+}
+
+void OneHot::configure()
+{
+  // check types
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+  // check shape dependent parameters
+  LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+  // define parameters that affect the output shape
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const &input_shape = indices()->shape();
+  auto const input_dims = input_shape.num_dims();
+  auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+  // define output shape
+  Shape output_shape(input_shape.num_dims() + 1);
+  {
+    for (int32_t d = 0; d < axis; ++d)
+      output_shape.dim(d) = input_shape.dim(d);
+
+    output_shape.dim(axis) = depth_value;
+
+    for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+      output_shape.dim(d) = input_shape.dim(d - 1);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  // reshape output
+  output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const axis = params().axis;
+
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case DataType::U8:
+      OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case DataType::S16:
+      OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    default:
+      // TODO Support other data types
+      assert(false && "Not supported, yet!");
+      break;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/OneHot.h b/onert-micro/luci-interpreter/src/kernels/OneHot.h
new file mode 100644
index 000000000..572f857ae
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/OneHot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H
+#define LUCI_INTERPRETER_KERNELS_ONEHOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class OneHot : public KernelWithParams<OneHotParams>
+{
+public:
+  OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+         const Tensor *off_value, Tensor *output, const OneHotParams &params);
+
+  const Tensor *indices() const { return _inputs[0]; }
+  const Tensor *depth() const { return _inputs[1]; }
+  const Tensor *on_value() const { return _inputs[2]; }
+  const Tensor *off_value() const { return _inputs[3]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H
diff --git a/onert-micro/luci-interpreter/src/kernels/OneHot.test.cpp b/onert-micro/luci-interpreter/src/kernels/OneHot.test.cpp
new file mode 100644
index 000000000..45b6968fa
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/OneHot.test.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data,
+           std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data,
+           int32_t axis, std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr auto input_type = getElementType<T1>();
+  constexpr auto output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  OneHotParams params{};
+  params.axis = axis;
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class OneHotTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(OneHotTest, DataTypes);
+
+TYPED_TEST(OneHotTest, BasicPattern)
+{
+  // axis 0
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/0,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 1, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 1, 0, //
+                              0, 1, 0, //
+                            });
+  // axis 1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+
+                              0, 0, 1, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+                            });
+  // axis -1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/-1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              0, 0, 0, 0, //
+
+                              0, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              1, 0, 0, 0, //
+                            });
+}
+
+TEST(OneHotTest, UnsupportedInputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // input type should be integer
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get());
+
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, OutputTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+
+  // type of on_value, off_value and output_tensor should be same
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, InvalidAxis_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  // axis should be in [-1, input_shape.rank]
+  OneHotParams params = {-2};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/PRelu.cpp b/onert-micro/luci-interpreter/src/kernels/PRelu.cpp
new file mode 100644
index 000000000..3d64215c5
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/PRelu.cpp
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+PRelu::~PRelu()
+{
+  // Destructor declared to delete vector of alpha quantized data properly
+}
+
+void PRelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+  LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
+
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+    _alpha_multipliers.resize(1);
+    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+                       &_alpha_multipliers[0].shift);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    // Common check for correctness of quant params
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+    for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
+    }
+    // PRelu specific checks for CWQ
+    LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+    LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+                           alpha()->shape().dim(alpha()->quantized_dimension()));
+    LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+                           input()->shape().dim(input()->shape().num_dims() - 1));
+
+    // all dimension of alpha except last one should be size 1
+    for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+    }
+
+    std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+    _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
+}
+
+void PRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void PRelu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto alpha_data = getTensorData<float>(alpha());
+  const auto size = getTensorShape(input()).FlatSize();
+  auto output_data = getTensorData<float>(output());
+
+  auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+      getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+      PReluFunc);
+  }
+  else
+  {
+    for (auto i = decltype(size){0}; i < size; ++i)
+    {
+      if (input_data[i] >= 0)
+        output_data[i] = input_data[i];
+      else
+        output_data[i] = input_data[i] * alpha_data[i];
+    }
+  }
+}
+
+void PRelu::evalQuantized() const
+{
+  tflite::PreluParams op_params{};
+
+  op_params.input_offset = -input()->zero_point(); // Note the '-'.
+  op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_shift_1 = _output_shift_identity;
+  op_params.output_multiplier_1 = _output_multiplier_identity;
+  op_params.output_shift_2 = _alpha_multipliers[0].shift;
+  op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastPrelu4DSlow(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Prelu<uint8_t>(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
+                                       const ChannelQuantMultipliers &identity_mult,
+                                       const ChannelQuantMultipliers &alpha_mult)
+{
+  constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
+  constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t output_val =
+    input_val >= 0
+      ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+                                              identity_mult.multiplier, identity_mult.shift)
+      : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+                                              alpha_mult.multiplier, alpha_mult.shift);
+  const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+  return clamped_output;
+}
+
+void PRelu::evalQuantizedS16() const
+{
+  // Note that this kernel assumes alpha is CWQ
+  tflite::RuntimeShape input_shape = getTensorShape(input());
+  const int16_t *input_data = input()->data<int16_t>();
+  const int16_t *alpha_data = alpha()->data<int16_t>();
+  int16_t *output_data = output()->data<int16_t>();
+
+  const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+  const int last_dim = input()->shape().num_dims() - 1;
+
+  int32_t outer_dims_size = 1;
+  for (int i = 0; i < last_dim; ++i)
+    outer_dims_size *= input_shape.Dims(i);
+  int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+  for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+    for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+    {
+      const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+      size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+      offset += quant_channel;
+
+      output_data[offset] =
+        evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+    }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/PRelu.h b/onert-micro/luci-interpreter/src/kernels/PRelu.h
new file mode 100644
index 000000000..f7735d418
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/PRelu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H
+#define LUCI_INTERPRETER_KERNELS_PRELU_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class PRelu : public Kernel
+{
+public:
+  PRelu(const Tensor *input, const Tensor *alpha, Tensor *output);
+
+  ~PRelu();
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *alpha() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  std::vector<ChannelQuantMultipliers> _alpha_multipliers;
+  // TODO merge this into one ChannelQuantMultiplier object
+  int32_t _output_multiplier_identity = 0;
+  int _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PRELU_H
diff --git a/onert-micro/luci-interpreter/src/kernels/PRelu.test.cpp b/onert-micro/luci-interpreter/src/kernels/PRelu.test.cpp
new file mode 100644
index 000000000..6d97382de
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/PRelu.test.cpp
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> alpha_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::initializer_list<T> alpha_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(PReluTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*alpha_data=*/
+               {
+                 0.0f, 0.5f, 0.1f, // Row 1
+                 0.0f, 0.5f, 0.1f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -0.5f, -0.2f, // Row 2
+               });
+
+  SUCCEED();
+}
+
+TEST(PReluTest, FloatBroadcast)
+{
+  Check<float>(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3},
+               /*output_shape=*/{1, 2, 2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 0.0f, 0.0f,    // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,    // Row 1, Column 2
+                 -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+                 -2.0f, -2.0f, -2.0f, // Row 2, Column 2
+               },
+               /*alpha_data=*/
+               {0.0f, 1.0f, 2.0f},
+               /*output_data=*/
+               {
+                 0.0f, 0.0f, 0.0f,   // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,   // Row 1, Column 2
+                 0.0f, -1.0f, -2.0f, // Row 2, Column 1
+                 0.0f, -2.0f, -4.0f, // Row 2, Column 2
+               });
+
+  SUCCEED();
+}
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PReluTest, Uint8Simple)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+
+  SUCCEED();
+}
+
+TEST(PReluTest, Uint8Broadcast)
+{
+  std::vector<float> input_data{
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+  std::vector<float> ref_quant_output_data{
+    128, 128, 128, // Row 1, Column 1
+    192, 192, 192, // Row 1, Column 2
+    128, 64,  192, // Row 2, Column 1
+    128, 112, 144  // Row 2, Column 2
+  };
+  float kQuantizedTolerance = 2 * (1. / 256);
+  const float kMin = -1;
+  const float kMax = 127.f / 128.f;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_quant_output_data));
+}
+
+TEST(PReluTest, SInt16_LWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  // Rewrite this test in case layer-wise quantization for sint16 is supported
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_Simple)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f, 0.05f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f};
+  std::vector<int32_t> zerop{0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape1)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape2)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+
+  std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
+  std::vector<int32_t> zerop{0, 0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Alpha_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST(PReluTest, Input_Output_U8_CWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Output_S16_CWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Mixing_U8_S16_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pack.cpp b/onert-micro/luci-interpreter/src/kernels/Pack.cpp
new file mode 100644
index 000000000..7277f2293
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Pack.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "Utils.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+template <typename T>
+void packImpl(const circle::Tensor *input0, const circle::Tensor *output,
+              const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+              uint8_t *output_data_raw)
+{
+  const auto *options = cur_op->builtin_options_as_PackOptions();
+
+  const int values_count = options->values_count();
+  int axis = options->axis();
+  const int dimensions = Tensor::num_dims(output);
+
+  const auto input_dims = wrap(input0->shape());
+  const auto output_dims = wrap(output->shape());
+
+  if (axis < 0)
+  {
+    axis += dimensions;
+  }
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+    outer_size *= output_dims[i];
+
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i)
+    copy_size *= output_dims[i];
+
+  int input_size = 1;
+  for (int i = 0; i < input_dims.size(); ++i)
+    input_size *= input_dims[i];
+
+  assert(input_size == copy_size * outer_size);
+
+  T *output_data = kernels::getTensorData<T>(output_data_raw);
+  assert(output_data != nullptr);
+
+  for (int i = 0; i < values_count; ++i)
+  {
+    const auto input_index = cur_op->inputs()->operator[](i);
+    assert(input_index != -1);
+    const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+
+    auto input_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(input));
+    assert(input_data != nullptr);
+    for (int k = 0; k < outer_size; ++k)
+    {
+      const T *input_ptr = input_data + copy_size * k;
+      int loc = k * values_count * copy_size + i * copy_size;
+      T *output_ptr = output_data + loc;
+      for (int j = 0; j < copy_size; ++j)
+        output_ptr[j] = input_ptr[j];
+    }
+  }
+}
+
+} // namespace
+
+void configure_kernel_CirclePack(const circle::Operator *, BaseRuntimeGraph *)
+{
+  // Do nothing
+}
+
+void execute_kernel_CirclePack(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+  assert(output_index != -1);
+  assert(input_index != -1);
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  auto output_data = runtime_graph->getDataByTensor(output);
+  assert(output_data != nullptr);
+
+  switch (Tensor::element_type(output))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      packImpl<float>(input, output, cur_op, runtime_graph, output_data);
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      packImpl<int8_t>(input, output, cur_op, runtime_graph, output_data);
+      break;
+    case DataType::U8:
+      packImpl<uint8_t>(input, output, cur_op, runtime_graph, output_data);
+      break;
+#endif // DIS_QUANT
+    case DataType::S32:
+      packImpl<int32_t>(input, output, cur_op, runtime_graph, output_data);
+      break;
+    case DataType::S64:
+      packImpl<int64_t>(input, output, cur_op, runtime_graph, output_data);
+      break;
+    default:
+      assert(false && "Unsupported types");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pack.test.cpp b/onert-micro/luci-interpreter/src/kernels/Pack.test.cpp
new file mode 100644
index 000000000..db820b5e8
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Pack.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/pack/PackKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class PackTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkPackKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // Set input 1 data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // Set input 2 data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(PackTest, Float_P)
+{
+  test_kernel::TestDataFloatPack test_data_pack_kernel;
+  std::vector<float> output_data_vector = checkPackKernel(&test_data_pack_kernel);
+  EXPECT_THAT(output_data_vector, test_data_pack_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(PackTest, Int_P)
+{
+  test_kernel::TestDataIntPack test_data_pack_kernel;
+  std::vector<int32_t> output_data_vector = checkPackKernel(&test_data_pack_kernel);
+  EXPECT_THAT(output_data_vector, test_data_pack_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(PackTest, QuantU8_P)
+{
+  test_kernel::TestDataQuantU8Pack test_data_pack_kernel;
+  std::vector<uint8_t> output_data_vector = checkPackKernel(&test_data_pack_kernel);
+  EXPECT_THAT(output_data_vector, test_data_pack_kernel.get_output_data_by_index(0));
+}
+
+// TODO: add negative tests?
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pad.cpp b/onert-micro/luci-interpreter/src/kernels/Pad.cpp
new file mode 100644
index 000000000..18af756a6
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Pad.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "PadCommon.h"
+
+namespace luci_interpreter
+{
+void configure_kernel_CirclePad(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  configure_kernel_CirclePadCommon(cur_op, runtime_graph);
+}
+
+void execute_kernel_CirclePad(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  execute_kernel_CirclePadCommon(cur_op, runtime_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pad.test.cpp b/onert-micro/luci-interpreter/src/kernels/Pad.test.cpp
new file mode 100644
index 000000000..3c835cbc0
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Pad.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/pad/FloatPadKernel.h"
+#include "luci_interpreter/test_models/pad/NegPadKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class PadTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkPadKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(PadTest, Float_P)
+{
+  test_kernel::TestDataFloatPad test_data_kernel;
+  std::vector<float> output_data_vector = checkPadKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(PadTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchPadKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/PadCommon.cpp b/onert-micro/luci-interpreter/src/kernels/PadCommon.cpp
new file mode 100644
index 000000000..92cd11758
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/PadCommon.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PadCommon.h"
+#include "PALPad.h"
+
+namespace luci_interpreter
+{
+void configure_kernel_CirclePadCommon(const circle::Operator *cur_op,
+                                      BaseRuntimeGraph *runtime_graph)
+{
+  const auto num_inputs = cur_op->inputs()->size();
+
+  const auto input1_index = cur_op->inputs()->operator[](0);
+  const auto input2_index = cur_op->inputs()->operator[](1);
+  const auto input3_index = num_inputs == 3 ? cur_op->inputs()->operator[](2) : -1;
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input1_index != -1);
+  assert(input2_index != -1);
+  assert(input3_index != -1 or num_inputs == 2);
+  assert(output_index != -1);
+
+  const auto input1_tensor = runtime_graph->getCircleTensorByIndex(input1_index);
+  const auto input2_tensor = runtime_graph->getCircleTensorByIndex(input2_index);
+  const auto input3_tensor =
+    num_inputs == 3 ? runtime_graph->getCircleTensorByIndex(input3_index) : nullptr;
+  const auto output_tensor = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input1_tensor != nullptr);
+  assert(input2_tensor != nullptr);
+  assert(input3_tensor != nullptr or num_inputs == 2);
+  assert(output_tensor != nullptr);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input2_tensor) == DataType::S32);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input1_tensor) ==
+                         Tensor::element_type(output_tensor));
+  if (input3_tensor != nullptr)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input3_tensor) ==
+                           Tensor::element_type(input1_tensor));
+    // Value is scalar
+    LUCI_INTERPRETER_CHECK(Tensor::num_elements(input3_tensor) == 1);
+  }
+
+  // Check shapes
+  const int32_t *paddings_data =
+    kernels::getTensorData<int32_t>(runtime_graph->getConstDataByTensor(input2_tensor));
+  for (int i = 0; i < Tensor::num_dims(output_tensor); i++)
+  {
+    int output_dim = Tensor::dim(output_tensor, i);
+    int expected_dim =
+      Tensor::dim(input1_tensor, i) + paddings_data[i * 2] + paddings_data[i * 2 + 1];
+    LUCI_INTERPRETER_CHECK(output_dim == expected_dim);
+  }
+}
+
+void execute_kernel_CirclePadCommon(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto num_inputs = cur_op->inputs()->size();
+
+  const auto input1_index = cur_op->inputs()->operator[](0);
+  const auto input2_index = cur_op->inputs()->operator[](1);
+  const auto input3_index = num_inputs == 3 ? cur_op->inputs()->operator[](2) : -1;
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input1_index != -1);
+  assert(input2_index != -1);
+  assert(input3_index != -1 or num_inputs == 2);
+  assert(output_index != -1);
+
+  const auto input1_tensor = runtime_graph->getCircleTensorByIndex(input1_index);
+  const auto input2_tensor = runtime_graph->getCircleTensorByIndex(input2_index);
+  const auto input3_tensor =
+    num_inputs == 3 ? runtime_graph->getCircleTensorByIndex(input3_index) : nullptr;
+  const auto output_tensor = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input1_tensor != nullptr);
+  assert(input2_tensor != nullptr);
+  assert(input3_tensor != nullptr or num_inputs == 2);
+  assert(output_tensor != nullptr);
+
+  luci_interpreter_pal::PadParams pad_params;
+  const int num_input_dimensions = Tensor::num_dims(input1_tensor);
+  pad_params.left_padding_count = num_input_dimensions;
+  pad_params.right_padding_count = num_input_dimensions;
+
+  const int32_t *paddings_data =
+    kernels::getTensorData<int32_t>(runtime_graph->getConstDataByTensor(input2_tensor));
+  for (int idx = num_input_dimensions - 1; idx >= 0; --idx)
+  {
+    pad_params.left_padding[idx] = paddings_data[idx * 2];
+    pad_params.right_padding[idx] = paddings_data[idx * 2 + 1];
+  }
+
+  auto *input1_data = runtime_graph->getDataByTensor(input1_tensor);
+  if (input1_data == nullptr)
+    input1_data = runtime_graph->getConstDataByTensor(input1_tensor);
+  assert(input1_data);
+
+  auto *input2_data = runtime_graph->getConstDataByTensor(input2_tensor);
+  assert(input2_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(output_tensor);
+  assert(output_data);
+
+  switch (Tensor::element_type(input1_tensor))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      float pad_value =
+        input3_tensor == nullptr
+          ? 0.f
+          : *kernels::getTensorData<float>(runtime_graph->getConstDataByTensor(input3_tensor));
+      luci_interpreter_pal::Pad(pad_params, kernels::getTensorShape(input1_tensor),
+                                kernels::getTensorData<float>(input1_data), &pad_value,
+                                kernels::getTensorShape(output_tensor),
+                                kernels::getTensorData<float>(output_data));
+    }
+    break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/PadCommon.h b/onert-micro/luci-interpreter/src/kernels/PadCommon.h
new file mode 100644
index 000000000..d8fa4bed3
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/PadCommon.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_COMMON_H
+#define LUCI_INTERPRETER_KERNELS_PAD_COMMON_H
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+void configure_kernel_CirclePadCommon(const circle::Operator *cur_op,
+                                      BaseRuntimeGraph *runtime_graph);
+
+void execute_kernel_CirclePadCommon(const circle::Operator *cur_op,
+                                    BaseRuntimeGraph *runtime_graph);
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_COMMON_H
diff --git a/onert-micro/luci-interpreter/src/kernels/PadV2.cpp b/onert-micro/luci-interpreter/src/kernels/PadV2.cpp
new file mode 100644
index 000000000..7e0f61b8d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/PadV2.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "PadCommon.h"
+
+namespace luci_interpreter
+{
+void configure_kernel_CirclePadV2(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  configure_kernel_CirclePadCommon(cur_op, runtime_graph);
+}
+
+void execute_kernel_CirclePadV2(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  execute_kernel_CirclePadCommon(cur_op, runtime_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/PadV2.test.cpp b/onert-micro/luci-interpreter/src/kernels/PadV2.test.cpp
new file mode 100644
index 000000000..8be5b0252
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/PadV2.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/pad_v2/FloatPadV2Kernel.h"
+#include "luci_interpreter/test_models/pad_v2/NegPadV2Kernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class PadV2Test : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkPadV2Kernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(PadV2Test, Float_P)
+{
+  test_kernel::TestDataFloatPadV2 test_data_kernel;
+  std::vector<float> output_data_vector = checkPadV2Kernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(PadV2Test, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchPadV2Kernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pow.cpp b/onert-micro/luci-interpreter/src/kernels/Pow.cpp
new file mode 100644
index 000000000..6345bf324
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Pow.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Pow::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Pow::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>();
+      break;
+    case DataType::S32:
+      eval<int32_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> void Pow::eval() const
+{
+  tflite::ArithmeticParams params{};
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                                              getTensorShape(input2()), getTensorData<T>(input2()),
+                                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pow.h b/onert-micro/luci-interpreter/src/kernels/Pow.h
new file mode 100644
index 000000000..8ff865e40
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Pow.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_POW_H
+#define LUCI_INTERPRETER_KERNELS_POW_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pow : public Kernel
+{
+public:
+  Pow(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_POW_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Pow.test.cpp b/onert-micro/luci-interpreter/src/kernels/Pow.test.cpp
new file mode 100644
index 000000000..0e858115d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Pow.test.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class PowTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(PowTest, SimplePow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 1, 3, 2};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, FloatBroadcastPow)
+{
+  std::initializer_list<int32_t> input1_shape = {1, 3};
+  std::initializer_list<int32_t> input2_shape = {3, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 0.3f, 0.4f};
+  std::vector<float> test_outputs{0.786f,   1.18126f, 0.9791f, 0.6968f, 1.28386f,
+                                  0.96888f, 0.6178f,  1.3953f, 0.9587f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(PowTest, IntPow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 3};
+
+  std::vector<int32_t> input_data{2, 3, 4};
+  std::vector<int32_t> test_outputs{4, 27, 256};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Input_Type_Mismatch_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Quantize.cpp b/onert-micro/luci-interpreter/src/kernels/Quantize.cpp
new file mode 100644
index 000000000..9f622d077
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Quantize.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+  int32_t multiplier;
+  int shift;
+
+  const double effective_output_scale = input->scale() / output->scale();
+  quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+  const auto input_shape = getTensorShape(input);
+  const auto output_shape = getTensorShape(output);
+  const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+  const auto input_data = getTensorData<input_dtype>(input);
+
+  switch (output->element_type())
+  {
+    case DataType::S8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int8_t>(output));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<uint8_t>(output));
+      break;
+    case DataType::S16:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int16_t>(output));
+      break;
+    default:
+      assert(false && "Unsupported quantized type, yet!");
+  }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+  if (input()->element_type() == DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8 ||
+                             output()->element_type() == DataType::S8 ||
+                             output()->element_type() == DataType::S16);
+      break;
+    }
+    case DataType::S16:
+    case DataType::S8:
+    case DataType::U8:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8 ||
+                             output()->element_type() == DataType::U8 ||
+                             output()->element_type() == DataType::S16);
+      if (output()->element_type() == DataType::S16)
+      {
+        LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+      }
+      break;
+    }
+    default:
+      assert(false && "Unsupported type");
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      tflite::QuantizationParams op_params;
+      op_params.zero_point = output()->zero_point();
+      op_params.scale = output()->scale();
+      const auto input_data = getTensorData<float>(input());
+
+      switch (output()->element_type())
+      {
+        case DataType::S8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()), getTensorData<int8_t>(output()));
+          break;
+        }
+        case DataType::U8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+          break;
+        }
+        case DataType::S16:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<int16_t>(output()));
+          break;
+        }
+        default:
+          assert(false && "Unsupported type.");
+      }
+      break;
+    }
+    case DataType::S16:
+    {
+      call_requantize<int16_t>(input(), output());
+      break;
+    }
+    case DataType::S8:
+    {
+      call_requantize<int8_t>(input(), output());
+      break;
+    }
+    case DataType::U8:
+    {
+      call_requantize<uint8_t>(input(), output());
+      break;
+    }
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Quantize.h b/onert-micro/luci-interpreter/src/kernels/Quantize.h
new file mode 100644
index 000000000..006c5366f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Quantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Quantize : public Kernel
+{
+public:
+  Quantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Quantize.test.cpp b/onert-micro/luci-interpreter/src/kernels/Quantize.test.cpp
new file mode 100644
index 000000000..22e67fe3f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Quantize.test.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class QuantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(QuantizeTest, FloatUint8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt16)
+{
+  std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64};
+
+  std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400,  -200,
+                                       200,    400,    600,  12700, 12800};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int16)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int8Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Uint8Uint8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ReduceCommon.cpp b/onert-micro/luci-interpreter/src/kernels/ReduceCommon.cpp
new file mode 100644
index 000000000..c93329cd8
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ReduceCommon.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "TISOKernel.h"
+
+#include "PALReduceCommon.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+template <typename T>
+void reduceProdGeneric(kernels::TISOData *tiso_data, const circle::Tensor *input,
+                       const circle::Tensor *axis, const circle::Tensor *output, bool keep_dims)
+{
+  const int input_rank = Tensor::num_dims(input);
+  const int num_axis = Tensor::num_elements(axis);
+
+  auto const input_dims = wrap(input->shape());
+  const auto output_shape = kernels::getTensorShape(output);
+
+  luci_interpreter_pal::ReduceGeneric<T>(
+    kernels::getTensorData<T>(tiso_data->input1_data),
+    reinterpret_cast<const int *>(input_dims.data()), input_rank,
+    kernels::getTensorData<T>(tiso_data->output_data),
+    kernels::getTensorData<int>(tiso_data->input2_data), num_axis,
+    /*init_value=*/T(1), output_shape.flatSize(),
+    [](const T current, const T in) -> T { return in * current; });
+}
+
+} // namespace
+
+void configure_kernel_CircleReduceCommon(const circle::Operator *cur_op,
+                                         BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) == DataType::S32 or
+                         Tensor::element_type(kernel.input1()) == DataType::FLOAT32 or
+                         Tensor::element_type(kernel.input1()) == DataType::S64);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input2()) == DataType::S32);
+}
+
+void execute_kernel_CircleReduceCommon(const circle::Operator *cur_op,
+                                       BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+  kernels::TISOData tiso_data = kernel.readData();
+
+  const auto *input = kernel.input1();
+  const auto *axis = kernel.input2();
+  const auto *output = kernel.output();
+
+  const auto *options = cur_op->builtin_options_as_ReducerOptions();
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      reduceProdGeneric<float>(&tiso_data, input, axis, output, options->keep_dims());
+      break;
+#endif // DIS_FLOAT
+    case DataType::S32:
+      reduceProdGeneric<int32_t>(&tiso_data, input, axis, output, options->keep_dims());
+      break;
+    case DataType::S64:
+      reduceProdGeneric<int64_t>(&tiso_data, input, axis, output, options->keep_dims());
+      break;
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ReduceCommon.test.cpp b/onert-micro/luci-interpreter/src/kernels/ReduceCommon.test.cpp
new file mode 100644
index 000000000..33574600d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ReduceCommon.test.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/reduce_common/ReduceProdKernel.h"
+#include "luci_interpreter/test_models/reduce_common/NegReduceProdKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ReduceCommonTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkReduceCommonKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ReduceCommonTest, Reduce_Prod_Float_P)
+{
+  test_kernel::TestDataFloatReduceProd test_data_float_reduce_prod;
+  std::vector<float> output_data_vector = checkReduceCommonKernel(&test_data_float_reduce_prod);
+  EXPECT_THAT(output_data_vector,
+              kernels::testing::FloatArrayNear(
+                test_data_float_reduce_prod.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(ReduceCommonTest, Reduce_Prod_Int_P)
+{
+  test_kernel::TestDataIntReduceProd test_data_int_reduce_prod;
+  std::vector<int32_t> output_data_vector = checkReduceCommonKernel(&test_data_int_reduce_prod);
+  EXPECT_THAT(output_data_vector, test_data_int_reduce_prod.get_output_data_by_index(0));
+}
+
+TEST_F(ReduceCommonTest, Wrong_input_type_NEG)
+{
+  test_kernel::NegTestDataWrongInputTypeReduceProdKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(ReduceCommonTest, Wrong_axis_type_NEG)
+{
+  test_kernel::NegTestDataWrongAxisTypeReduceProdKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Relu.cpp b/onert-micro/luci-interpreter/src/kernels/Relu.cpp
new file mode 100644
index 000000000..c38f33238
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Relu.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "SISOKernel.h"
+
+#include "PALReluCommon.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleRelu(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
+                         Tensor::element_type(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input()) == Tensor::num_dims(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(kernel.input()) ==
+                         Tensor::num_elements(kernel.output()));
+}
+
+void execute_kernel_CircleRelu(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *input_data = runtime_graph->getDataByTensor(kernel.input());
+  assert(input_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(kernel.output());
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      const float *input_data_float = kernels::getTensorData<float>(input_data);
+      float *output_data_float = kernels::getTensorData<float>(output_data);
+      if (is_inplace)
+      {
+        output_data_float = const_cast<float *>(input_data_float);
+      }
+
+      assert(output_data_float);
+      const int flat_size =
+        kernels::getTensorRuntimeShape(kernel.input(), runtime_graph).flatSize();
+
+      luci_interpreter_pal::ReLUCommon(flat_size, input_data_float, output_data_float, 0.0f, false);
+      break;
+    }
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type");
+  }
+
+  if (is_inplace)
+    runtime_graph->makeInplaceOperation(kernel.input(), kernel.output());
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Relu.test.cpp b/onert-micro/luci-interpreter/src/kernels/Relu.test.cpp
new file mode 100644
index 000000000..b0cb220b9
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Relu.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/relu/FloatReLUKernel.h"
+#include "luci_interpreter/test_models/relu/NegReLUKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ReLUTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkReLUKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ReLUTest, Float_P)
+{
+  test_kernel::TestDataFloatReLU test_data_kernel;
+  std::vector<float> output_data_vector = checkReLUKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(ReLUTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchReLUKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Relu6.cpp b/onert-micro/luci-interpreter/src/kernels/Relu6.cpp
new file mode 100644
index 000000000..718622858
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Relu6.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "SISOKernel.h"
+
+#include "PALReluCommon.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleRelu6(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
+                         Tensor::element_type(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input()) == Tensor::num_dims(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(kernel.input()) ==
+                         Tensor::num_elements(kernel.output()));
+}
+
+void execute_kernel_CircleRelu6(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *input_data = runtime_graph->getDataByTensor(kernel.input());
+  assert(input_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(kernel.output());
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      const float *input_data_float = kernels::getTensorData<float>(input_data);
+      float *output_data_float = kernels::getTensorData<float>(output_data);
+      if (is_inplace)
+      {
+        output_data_float = const_cast<float *>(input_data_float);
+      }
+
+      assert(output_data_float);
+      const int flat_size =
+        kernels::getTensorRuntimeShape(kernel.input(), runtime_graph).flatSize();
+
+      luci_interpreter_pal::ReLUCommon(flat_size, input_data_float, output_data_float, 0.0f, true);
+      break;
+    }
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type");
+  }
+
+  if (is_inplace)
+    runtime_graph->makeInplaceOperation(kernel.input(), kernel.output());
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Relu6.test.cpp b/onert-micro/luci-interpreter/src/kernels/Relu6.test.cpp
new file mode 100644
index 000000000..1b784f0d3
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/relu6/FloatReLU6Kernel.h"
+#include "luci_interpreter/test_models/relu6/NegReLU6Kernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ReLU6Test : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkReLU6Kernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ReLU6Test, Float_P)
+{
+  test_kernel::TestDataFloatReLU6 test_data_kernel;
+  std::vector<float> output_data_vector = checkReLU6Kernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(ReLU6Test, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchReLU6Kernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Reshape.cpp b/onert-micro/luci-interpreter/src/kernels/Reshape.cpp
new file mode 100644
index 000000000..7fe3e5636
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Reshape.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "Utils.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleReshape(const circle::Operator *, BaseRuntimeGraph *)
+{
+  // Do nothing
+}
+
+// TODO: reduce code duplication with ExpandDims
+void execute_kernel_CircleReshape(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto shape_index = cur_op->inputs()->operator[](1);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(shape_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto shape = runtime_graph->getCircleTensorByIndex(shape_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+  if (is_inplace)
+  {
+    runtime_graph->makeInplaceOperation(input, output);
+    return;
+  }
+
+  const auto input_data = runtime_graph->getDataByTensor(input);
+  auto shape_data = runtime_graph->getConstDataByTensor(shape);
+  auto output_data = runtime_graph->getDataByTensor(output);
+
+  assert(input_data != nullptr);
+  assert(output_data != nullptr);
+
+#ifndef DIS_DYN_SHAPES
+  if (shape_data == nullptr)
+  {
+    shape_data = runtime_graph->getDataByTensor(shape);
+    assert(shape_data != nullptr);
+
+    assert(Tensor::element_type(shape) == DataType::S32);
+
+    const int32_t *shape_data_int = kernels::getTensorData<int32_t>(shape_data);
+    const auto num_elements = Tensor::num_elements(shape);
+
+    luci_interpreter::RuntimeShape dynamic_shape(num_elements);
+    int32_t data_size = 1;
+    for (int i = 0; i < num_elements; ++i)
+    {
+      dynamic_shape.setDim(i, shape_data_int[i]);
+      data_size *= shape_data_int[i];
+    }
+    data_size *= size(Tensor::element_type(output));
+
+    runtime_graph->addDynamicShapeTensor(output, std::move(dynamic_shape));
+
+    if (data_size == 0)
+    {
+      runtime_graph->resetTensorData(nullptr, output);
+      return;
+    }
+
+    auto new_output_data = new uint8_t[data_size];
+    output_data = new_output_data;
+    runtime_graph->resetTensorData(new_output_data, output);
+  }
+#else
+  assert(shape_data != nullptr);
+#endif // DIS_DYN_SHAPES
+
+  const size_t element_size = getDataTypeSize(Tensor::element_type(input));
+  const int32_t num_elements = Tensor::num_elements(input);
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Reshape.test.cpp b/onert-micro/luci-interpreter/src/kernels/Reshape.test.cpp
new file mode 100644
index 000000000..ac1acfb1d
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/reshape/ReshapeKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ReshapeTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkReshapeKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ReshapeTest, MainTest_P)
+{
+  test_kernel::TestDataReshapeKernel<float> test_data_kernel(false);
+  std::vector<float> output_data_vector = checkReshapeKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ReshapeTest, MainTest_NEG)
+{
+  test_kernel::TestDataReshapeKernel<float> test_data_kernel(true);
+  EXPECT_DEATH(checkReshapeKernel(&test_data_kernel), "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp b/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
new file mode 100644
index 000000000..7ce3833b3
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+#include "PALResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+/*
+ * ResizeBilinear Kernel:
+ * Description: resizing input Tensor by input constants using Bilinear Interpolation
+ * 2 Inputs: Input tensor ( Shape dimensions count = 4); Input constant (Shape dimensions count = 1,
+ * Num elements =2) Parameters: align_corners; half_pixel_centers;
+ *
+ * Example:
+ *                       Input(2, 2, 2, 1)
+ *                               |
+ *                               |   Constant Input(2) [3,3] INT32
+ *                               |  /
+ *                          ResizeBilinear
+ *                                 |
+ *                         Output(2, 3, 3, 1) UINT8
+ */
+
+void configure_kernel_CircleResizeBilinear(const circle::Operator *cur_op,
+                                           BaseRuntimeGraph *runtime_graph)
+{
+  // Check of the size of input. Should be 2
+  assert(cur_op->inputs()->size() == 2);
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto size_index = cur_op->inputs()->operator[](1);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(size_index != -1);
+  assert(output_index != -1);
+  // Get tensors
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto size = runtime_graph->getCircleTensorByIndex(size_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+  // Check of the Input shape
+  assert(kernels::getTensorShape(input).dimensionsCount() == 4);
+  // Check of the Const input size shape
+  assert(kernels::getTensorShape(size).dimensionsCount() == 1);
+  assert(Tensor::element_type(size) == DataType::S32);
+  assert(kernels::getTensorShape(size).dims(0) == 2);
+
+  const auto *params = cur_op->builtin_options_as_ResizeBilinearOptions();
+  if (params->half_pixel_centers() && params->align_corners())
+    assert(false && "If half_pixel_centers is True, align_corners must be False.");
+}
+
+void execute_kernel_CircleResizeBilinear(const circle::Operator *cur_op,
+                                         BaseRuntimeGraph *runtime_graph)
+{
+  assert(cur_op->inputs()->size() == 2);
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto size_index = cur_op->inputs()->operator[](1);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(size_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto size = runtime_graph->getCircleTensorByIndex(size_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const uint8_t *input_data = runtime_graph->getDataByTensor(input);
+  const uint8_t *size_data = runtime_graph->getConstDataByTensor(size);
+  uint8_t *output_data = runtime_graph->getDataByTensor(output);
+
+  assert(input_data != nullptr);
+  assert(size_data != nullptr);
+  assert(output_data != nullptr);
+
+  // Get parameters
+  const auto *op_params = cur_op->builtin_options_as_ResizeBilinearOptions();
+
+  switch (Tensor::element_type(output))
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+        kernels::getTensorShape(size), kernels::getTensorData<int32_t>(size_data),
+        kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
+        kernels::getTensorShape(size), kernels::getTensorData<int32_t>(size_data),
+        kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.h b/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.h
new file mode 100644
index 000000000..b7bdc2ab7
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeBilinear : public KernelWithParams<ResizeBilinearParams>
+{
+public:
+  ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output,
+                 const ResizeBilinearParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
diff --git a/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
new file mode 100644
index 000000000..d8ecae5a0
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "loader/ModuleLoader.h"
+#include "luci_interpreter/test_models/resize_bilinear/FloatResizeBilinearKernel.h"
+#include "luci_interpreter/test_models/resize_bilinear/U8ResizeBilinearKernel.h"
+#include "luci_interpreter/test_models/resize_bilinear/NegResizeBilinearKernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ResizeBilinearTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkResizeBilinearKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ResizeBilinearTest, Float_P)
+{
+  test_kernel::TestDataFloatResizeBilinear test_data_kernel(false);
+  std::vector<float> output_data_vector = checkResizeBilinearKernel(&test_data_kernel);
+
+  EXPECT_THAT(output_data_vector,
+              FloatArrayNear(test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(ResizeBilinearTest, HalfPixelCenter_Float_P)
+{
+
+  test_kernel::TestDataFloatResizeBilinear test_data_kernel(true);
+  std::vector<float> output_data_vector = checkResizeBilinearKernel(&test_data_kernel);
+
+  EXPECT_THAT(output_data_vector,
+              FloatArrayNear(test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(ResizeBilinearTest, Uint8_P)
+{
+  test_kernel::TestDataUint8ResizeBilinear test_data_kernel(false);
+  std::vector<uint8_t> output_data_vector = checkResizeBilinearKernel<uint8_t>(&test_data_kernel);
+
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ResizeBilinearTest, HalfPixelCenter_Uint8_P)
+{
+  test_kernel::TestDataUint8ResizeBilinear test_data_kernel(true);
+  std::vector<uint8_t> output_data_vector = checkResizeBilinearKernel<uint8_t>(&test_data_kernel);
+
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ResizeBilinearTest, InvalidInputShape_Float_NEG)
+{
+
+  test_kernel::NegTestDataInvalidInputShapeFloatResizeBilinearKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(ResizeBilinearTest, InvalidParams_Float_NEG)
+{
+
+  test_kernel::NegTestDataInvalidParamFloatResizeBilinearKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(ResizeBilinearTest, InvalidSizeShape_Float_NEG)
+{
+
+  test_kernel::NegTestDataInvalidSizeShapeDimensionsFloatResizeBilinearKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(ResizeBilinearTest, InvalidInputShape_uint8_NEG)
+{
+
+  test_kernel::NegTestDataInvalidInputShapeUint8ResizeBilinearKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(ResizeBilinearTest, InvalidParams_uint8_NEG)
+{
+
+  test_kernel::NegTestDataInvalidParamUint8ResizeBilinearKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(ResizeBilinearTest, InvalidSizeShape_uint8_NEG)
+{
+
+  test_kernel::NegTestDataInvalidSizeShapeDimensionsUint8ResizeBilinearKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..57ca4d535
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
+                                             Tensor *output,
+                                             const ResizeNearestNeighborParams &params)
+  : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeNearestNeighbor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void ResizeNearestNeighbor::execute() const
+{
+  tflite::ResizeNearestNeighborParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
new file mode 100644
index 000000000..137d031cf
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeNearestNeighbor : public KernelWithParams<ResizeNearestNeighborParams>
+{
+public:
+  ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output,
+                        const ResizeNearestNeighborParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
diff --git a/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
new file mode 100644
index 000000000..7ade02a6f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> quant_param =
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes);
+
+TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 3, 6,    //
+                     3, 3, 6,    //
+                     9, 9, 12,   //
+                     4, 4, 10,   //
+                     4, 4, 10,   //
+                     10, 10, 16, //
+                   },
+                   false, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
+                   },
+                   true, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
+                   },
+                   false, true);
+}
+
+TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ReverseV2.cpp b/onert-micro/luci-interpreter/src/kernels/ReverseV2.cpp
new file mode 100644
index 000000000..76eadbdc8
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ReverseV2.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
+  : Kernel({input, axes}, {output})
+{
+}
+
+void ReverseV2::configure()
+{
+  assert(axes()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+  if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+      input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+      input()->element_type() != DataType::S64)
+  {
+    assert(false && "Unsupported input type.");
+  }
+  if (axes()->element_type() != DataType::S32)
+  {
+    assert(false && "Unsupported axes type.");
+  }
+  if (axes()->shape().num_elements() > 1)
+  {
+    assert(false && "Current implementation does not support more than 1 axis.");
+  }
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+  {
+    assert(false && "Invalid axes value");
+  }
+  assert(input()->element_type() == output()->element_type());
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void ReverseV2::execute() const
+{
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+                                            getTensorData<float>(input()), getTensorShape(output()),
+                                            getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Reverse<uint8_t>(
+        axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported output type");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ReverseV2.h b/onert-micro/luci-interpreter/src/kernels/ReverseV2.h
new file mode 100644
index 000000000..51211c703
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ReverseV2.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H
+#define LUCI_INTERPRETER_KERNELS_REVERSE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReverseV2 : public Kernel
+{
+public:
+  ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H
diff --git a/onert-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp b/onert-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp
new file mode 100644
index 000000000..c0025faca
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class ReverseV2Test : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ReverseV2Test, DataTypes);
+
+TYPED_TEST(ReverseV2Test, MultiDimensions)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // TypeParam
+  std::vector<TypeParam> input_data{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
+  Shape input_shape{4, 3, 2};
+  std::vector<int32_t> axis_data{1};
+  Shape axis_shape{1};
+
+  std::vector<TypeParam> output_data{5,  6,  3,  4,  1,  2,  11, 12, 9,  10, 7,  8,
+                                     17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
+  std::vector<int32_t> output_shape{4, 3, 2};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Rsqrt.cpp b/onert-micro/luci-interpreter/src/kernels/Rsqrt.cpp
new file mode 100644
index 000000000..c45c3e4ca
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Rsqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    assert(false && "Input/output tensor data type mismatch.");
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Rsqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = 1.f / std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Rsqrt.h b/onert-micro/luci-interpreter/src/kernels/Rsqrt.h
new file mode 100644
index 000000000..adc5bcfa2
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Rsqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H
+#define LUCI_INTERPRETER_KERNELS_RSQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Rsqrt : public Kernel
+{
+public:
+  Rsqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp b/onert-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp
new file mode 100644
index 000000000..3c6494232
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(RsqrtTest, SimpleRsqrt)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      5, 4, 8, 2,     //
+      6, 7.5, 9, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.44721360, 0.5, 0.35355339, 0.70710678,       //
+      0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+    });
+}
+
+TEST(RsqrtTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(RsqrtTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SISOKernel.h b/onert-micro/luci-interpreter/src/kernels/SISOKernel.h
new file mode 100644
index 000000000..98e352ab6
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SISOKernel.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SISO_KERNEL_H
+#define LUCI_INTERPRETER_KERNELS_SISO_KERNEL_H
+
+#include "Builders.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Single input single output kernel
+class SISOKernel
+{
+public:
+  SISOKernel(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+  {
+    const auto input_index = cur_op->inputs()->operator[](0);
+    const auto output_index = cur_op->outputs()->operator[](0);
+
+    assert(input_index != -1);
+    assert(output_index != -1);
+
+    _input_tensor = runtime_graph->getCircleTensorByIndex(input_index);
+    _output_tensor = runtime_graph->getCircleTensorByIndex(output_index);
+
+    assert(_input_tensor != nullptr);
+    assert(_output_tensor != nullptr);
+  }
+
+  const circle::Tensor *input() const { return _input_tensor; }
+  const circle::Tensor *output() const { return _output_tensor; }
+
+private:
+  const circle::Tensor *_input_tensor;
+  const circle::Tensor *_output_tensor;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SISO_KERNEL_H
diff --git a/onert-micro/luci-interpreter/src/kernels/SVDF.cpp b/onert-micro/luci-interpreter/src/kernels/SVDF.cpp
new file mode 100644
index 000000000..a0ff30255
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SVDF.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+  switch (activation)
+  {
+    case FusedActFunc::RELU:
+      return kTfLiteActRelu;
+    case FusedActFunc::RELU6:
+      return kTfLiteActRelu6;
+    case FusedActFunc::RELU_N1_TO_1:
+      return kTfLiteActReluN1To1;
+    case FusedActFunc::TANH:
+      return kTfLiteActTanh;
+    case FusedActFunc::SIGN_BIT:
+      return kTfLiteActSignBit;
+    case FusedActFunc::NONE:
+      return kTfLiteActNone;
+    default:
+      assert(false && "Unsupported activation type");
+  }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+           const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+           Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+           Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+           const SVDFParams &params)
+  : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+                                 {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+                                  scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+                                 params)
+{
+  // Do nothing
+}
+
+void SVDF::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const Shape &weight_features_shape = weight_feature()->shape();
+  const Shape &weight_time_shape = weight_time()->shape();
+
+  // Validate Input Tensor:
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32 ||
+                         input()->element_type() == DataType::S8);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+  // Validate inputs and output types
+  if (input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == DataType::S16 ||
+                           weight_time()->element_type() == DataType::S8);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == DataType::S32);
+
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == DataType::S16 ||
+                           input_activation_state()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+
+    // Note: now tflite support only ReLU activation for integer SVDF
+    LUCI_INTERPRETER_CHECK(params().activation == FusedActFunc::RELU);
+  }
+  else if (weight_feature()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == DataType::FLOAT32);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  }
+  else if ((weight_feature()->element_type() == DataType::U8 ||
+            weight_feature()->element_type() == DataType::S8) &&
+           input()->element_type() == DataType::FLOAT32)
+  {
+    // TODO:: support hybrid SVDF op
+    assert(false && "Hybrid type is not currently supported");
+  }
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+
+  // Check all the parameters of tensor match within themselves and match the
+  // input configuration.
+  const int rank = params().svdf_rank;
+  const int batch_size = input_shape.dim(0);
+  const int num_filters = weight_features_shape.dim(0);
+  LUCI_INTERPRETER_CHECK(rank != 0);
+  LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+  const int num_units = num_filters / rank;
+  const int memory_size = weight_time_shape.dim(1);
+
+  // Validate Weight_Feature Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+  // Validate Weight_Time Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+  // Validate Bias
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+  // Validate Input Activation State
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+  // Resize scratchpad_state to input_activation_state
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+  // TODO: enable it only if kernel with dynamic shapes
+  // Resize output tensor
+  output()->resize({batch_size, num_units});
+
+  luci_interpreter_pal::SetupScratchpadTensor(
+    input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+    getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+    getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+  switch (weight_feature()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S8:
+    {
+      if (input()->element_type() == DataType::S8)
+        evalInteger();
+      else
+        // TODO:: support hybrid SVDF op
+        assert(false && "Hybrid type is not currently supported");
+      break;
+    }
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+void SVDF::evalInteger() const
+{
+  const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+                                                     input_activation_state()->scale());
+  const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+                                                     weight_time()->scale() / output()->scale());
+
+  int32_t effective_scale_1_a;
+  int effective_scale_1_b;
+  int32_t effective_scale_2_a;
+  int effective_scale_2_b;
+
+  tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+  tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad = getOutputTensors()[2];
+  auto output_temp = getOutputTensors()[3];
+
+  int32_t input_zp = input()->zero_point();
+  int32_t output_zp = output()->zero_point();
+  luci_interpreter_pal::IntegerSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+    getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+    getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+    getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+    effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad_1 = getOutputTensors()[2];
+
+  luci_interpreter_pal::FloatSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+    getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+    getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SVDF.h b/onert-micro/luci-interpreter/src/kernels/SVDF.h
new file mode 100644
index 000000000..335a6cd8f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SVDF.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H
+#define LUCI_INTERPRETER_KERNELS_SVDF_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SVDF : public KernelWithParams<SVDFParams>
+{
+public:
+  SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+       const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+       Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+       Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+       const SVDFParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *weight_feature() const { return _inputs[1]; }
+  const Tensor *weight_time() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
+  const Tensor *input_activation_state() const { return _inputs[4]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SVDF_H
diff --git a/onert-micro/luci-interpreter/src/kernels/SVDF.test.cpp b/onert-micro/luci-interpreter/src/kernels/SVDF.test.cpp
new file mode 100644
index 000000000..82bd9b009
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SVDF.test.cpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SVDFTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SVDFTest, FullIntegerTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape bias_shape{units};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083,
+                                0.17660543, 0.52949083, -0.77931279};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1);
+  std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+  std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1);
+  std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512);
+  std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16);
+
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::S8>(
+    weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second,
+    weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor = makeInputTensor<DataType::S16>(
+    weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second,
+    weight_time_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(
+    DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::RELU;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0};
+
+  std::vector<int32_t> ref_output_shape{batches, units};
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, FloatTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465,
+                                0.35867718, 0.36897406,  0.73463392};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.014899,    -0.0517661, -0.143725, -0.00271883,
+                                     -0.03004015, 0.09565311, 0.1587342, 0.00784263};
+
+  std::vector<float> ref_output_shape{batches, units};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, Unsupported_Type_Configure_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SVDFTest, Invalid_Input_Shape_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t right_input_size = 3;
+  const int32_t wrong_input_size = 4;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, wrong_input_size};
+  Shape weight_feature_shape{num_filters, right_input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Shape.cpp b/onert-micro/luci-interpreter/src/kernels/Shape.cpp
new file mode 100644
index 000000000..31a0b62bc
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Shape.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "SISOKernel.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+void configure_kernel_CircleShape(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.output()) == DataType::S32);
+}
+
+void execute_kernel_CircleShape(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  const circle::Tensor *input = kernel.input();
+  const circle::Tensor *output = kernel.output();
+
+  assert(Tensor::element_type(output) == DataType::S32);
+  int32_t *output_data = kernels::getTensorData<int32_t>(runtime_graph->getDataByTensor(output));
+
+  const int rank = Tensor::num_dims(input);
+  for (int i = 0; i < rank; ++i)
+  {
+    output_data[i] = Tensor::dim(input, i);
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Shape.test.cpp b/onert-micro/luci-interpreter/src/kernels/Shape.test.cpp
new file mode 100644
index 000000000..f20eb784a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Shape.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/shape/ShapeKernel.h"
+#include "luci_interpreter/test_models/shape/NegShapeKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class ShapeTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T, typename U>
+std::vector<U> checkShapeKernel(test_kernel::TestDataBase<T, U> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  U *output_data = reinterpret_cast<U *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(U));
+  std::vector<U> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(ShapeTest, MainTest_P)
+{
+  test_kernel::TestDataShapeKernel<float, int32_t> test_data_shape_kernel;
+  std::vector<int32_t> output_data_vector = checkShapeKernel(&test_data_shape_kernel);
+  EXPECT_THAT(output_data_vector, test_data_shape_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(ShapeTest, Wrong_output_type_NEG)
+{
+  test_kernel::NegTestDataWrongOutputTypeShapeKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Slice.cpp b/onert-micro/luci-interpreter/src/kernels/Slice.cpp
new file mode 100644
index 000000000..34e549820
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Slice.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "MISOKernel.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+const int max_dim = 5;
+
+struct SliceParams
+{
+  int8_t begin_count;
+  int32_t begin[5];
+  int8_t size_count;
+  int32_t size[5];
+};
+
+template <typename T>
+inline void slice(const luci_interpreter::SliceParams &op_params,
+                  const luci_interpreter::RuntimeShape &input_shape, const T *input_data,
+                  const luci_interpreter::RuntimeShape &output_shape, T *output_data)
+{
+  const luci_interpreter::RuntimeShape ext_shape =
+    luci_interpreter::RuntimeShape::extendedShape(5, input_shape);
+  const int begin_count = op_params.begin_count;
+  const int size_count = op_params.size_count;
+  // We front-pad the begin and size vectors.
+  int start[5];
+  int stop[5];
+  for (int i = 0; i < 5; ++i)
+  {
+    int padded_i = 5 - i;
+    start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+    stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+                ? ext_shape.dims(i)
+                : start[i] + op_params.size[size_count - padded_i];
+  }
+
+  for (int i0 = start[0]; i0 < stop[0]; ++i0)
+  {
+    for (int i1 = start[1]; i1 < stop[1]; ++i1)
+    {
+      for (int i2 = start[2]; i2 < stop[2]; ++i2)
+      {
+        for (int i3 = start[3]; i3 < stop[3]; ++i3)
+        {
+          for (int i4 = start[4]; i4 < stop[4]; ++i4)
+          {
+            auto position =
+              (((i0 * ext_shape.dims(1) + i1) * ext_shape.dims(2) + i2) * ext_shape.dims(3) + i3) *
+                ext_shape.dims(4) +
+              i4;
+            *output_data++ = input_data[position];
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+void getBeginAndSizeVectors(int dimensions, const uint8_t *begin_data, const uint8_t *size_data,
+                            int32_t *begins, int32_t *sizes)
+{
+  int offset = max_dim - dimensions;
+  for (int idx = 0; idx < dimensions; ++idx)
+  {
+    begins[offset + idx] = kernels::getTensorData<T>(begin_data)[idx];
+    sizes[offset + idx] = kernels::getTensorData<T>(size_data)[idx];
+  }
+}
+} // namespace
+
+void configure_kernel_CircleSlice(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::MISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input2()) == DataType::S32 ||
+                         Tensor::element_type(kernel.input2()) == DataType::S64);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input3()) == DataType::S32 ||
+                         Tensor::element_type(kernel.input3()) == DataType::S64);
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input2()) == 1);
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input3()) == 1);
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input1()) <= max_dim);
+}
+
+void execute_kernel_CircleSlice(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::MISOKernel kernel(cur_op, runtime_graph);
+
+  bool is_dynamic_shapes = false;
+
+  const circle::Tensor *input = kernel.input1();
+  const circle::Tensor *begin = kernel.input2();
+  const circle::Tensor *size_tensor = kernel.input3();
+  const circle::Tensor *output = kernel.output();
+
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  if (input_data == nullptr)
+    input_data = runtime_graph->getConstDataByTensor(input);
+  assert(input_data);
+
+  const auto *begin_data = runtime_graph->getDataByTensor(begin);
+  if (begin_data == nullptr)
+  {
+    begin_data = runtime_graph->getConstDataByTensor(begin);
+    is_dynamic_shapes = true;
+  }
+  assert(begin_data);
+
+  const auto *size_data = runtime_graph->getDataByTensor(size_tensor);
+  if (size_data == nullptr)
+  {
+    size_data = runtime_graph->getConstDataByTensor(size_tensor);
+    is_dynamic_shapes = true;
+  }
+  assert(size_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(output);
+  assert(output_data);
+
+  SliceParams op_params{};
+  op_params.begin_count = max_dim;
+  op_params.size_count = max_dim;
+  for (int i = 0; i < max_dim; i++)
+  {
+    op_params.begin[i] = 0;
+    op_params.size[i] = 1;
+  }
+  auto num_dim = Tensor::num_dims(input);
+
+  if (Tensor::element_type(begin) == DataType::S32)
+  {
+    getBeginAndSizeVectors<int32_t>(num_dim, begin_data, size_data, op_params.begin,
+                                    op_params.size);
+  }
+  else if (Tensor::element_type(begin) == DataType::S64)
+  {
+    getBeginAndSizeVectors<int64_t>(num_dim, begin_data, size_data, op_params.begin,
+                                    op_params.size);
+  }
+  else
+  {
+    assert(false && "Unsupported type");
+  }
+
+#ifndef DIS_DYN_SHAPES
+  if (is_dynamic_shapes)
+  {
+    int32_t data_size = 1;
+    luci_interpreter::RuntimeShape dynamic_shapes(max_dim - num_dim + 1);
+    int offset = max_dim - Tensor::num_dims(input);
+    for (int i = 0; i <= max_dim - num_dim; ++i)
+    {
+      if (i + offset > 4)
+        return;
+      auto cur_size = op_params.size[i + offset] != -1
+                        ? op_params.size[i + offset]
+                        : Tensor::dim(input, i) - op_params.begin[i + offset];
+      data_size *= cur_size;
+
+      dynamic_shapes.setDim(i, cur_size);
+    }
+    data_size *= size(Tensor::element_type(output));
+
+    runtime_graph->addDynamicShapeTensor(output, std::move(dynamic_shapes));
+
+    if (data_size == 0)
+    {
+      runtime_graph->resetTensorData(nullptr, output);
+      return;
+    }
+
+    auto new_output_data = new uint8_t[data_size];
+    output_data = new_output_data;
+    runtime_graph->resetTensorData(new_output_data, output);
+  }
+#else
+  assert(is_dynamic_shapes == false);
+#endif // DIS_DYN_SHAPES
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      slice<float>(op_params, kernels::getTensorShape(input),
+                   kernels::getTensorData<float>(input_data), kernels::getTensorShape(output),
+                   kernels::getTensorData<float>(output_data));
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::U8:
+      slice<uint8_t>(op_params, kernels::getTensorShape(input),
+                     kernels::getTensorData<uint8_t>(input_data), kernels::getTensorShape(output),
+                     kernels::getTensorData<uint8_t>(output_data));
+      break;
+    case DataType::S8:
+      slice<int8_t>(op_params, kernels::getTensorShape(input),
+                    kernels::getTensorData<int8_t>(input_data), kernels::getTensorShape(output),
+                    kernels::getTensorData<int8_t>(output_data));
+      break;
+    case DataType::S16:
+      slice<int16_t>(op_params, kernels::getTensorShape(input),
+                     kernels::getTensorData<int16_t>(input_data), kernels::getTensorShape(output),
+                     kernels::getTensorData<int16_t>(output_data));
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported input type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Slice.test.cpp b/onert-micro/luci-interpreter/src/kernels/Slice.test.cpp
new file mode 100644
index 000000000..0bd13d748
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Slice.test.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/slice/FloatSliceKernel.h"
+#include "luci_interpreter/test_models/slice/QuantU8SliceKernel.h"
+#include "luci_interpreter/test_models/slice/QuantS16SliceKernel.h"
+#include "luci_interpreter/test_models/slice/NegSliceKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class SliceTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkSliceKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(SliceTest, Float_P)
+{
+  test_kernel::TestDataFloatSlice test_data_kernel;
+  std::vector<float> output_data_vector = checkSliceKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(SliceTest, U8_P)
+{
+  test_kernel::TestDataU8Slice test_data_kernel;
+  std::vector<uint8_t> output_data_vector = checkSliceKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(SliceTest, INT16_P)
+{
+  test_kernel::TestDataS16Slice test_data_kernel;
+  std::vector<int16_t> output_data_vector = checkSliceKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(SliceTest, TypeMismatch_NEG)
+{
+  test_kernel::TestDataTypeMismatchSlice test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(SliceTest, WrongBeginType_NEG)
+{
+  test_kernel::TestDataWrongBeginTypeSlice test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(SliceTest, WrongSizeType_NEG)
+{
+  test_kernel::TestDataWrongSizeTypeSlice test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(SliceTest, WrongInputShape_NEG)
+{
+  test_kernel::TestDataWrongInputShapeSlice test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add S8 test
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Softmax.cpp b/onert-micro/luci-interpreter/src/kernels/Softmax.cpp
new file mode 100644
index 000000000..4647cc94e
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Softmax.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "SISOKernel.h"
+
+#include "PALSoftmax.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+#ifndef DIS_FLOAT
+void evalFloat(const circle::Tensor *input, const circle::Tensor *output,
+               const circle::SoftmaxOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  luci_interpreter_pal::Softmax(options->beta(), kernels::getTensorShape(input),
+                                kernels::getTensorData<float>(input_data),
+                                kernels::getTensorData<float>(output_data));
+}
+#endif // DIS_FLOAT
+
+} // namespace
+
+void configure_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
+                         Tensor::element_type(kernel.output()));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input()) >= 1);
+
+#ifndef DIS_QUANT
+  if (Tensor::element_type(kernel.input()) == DataType::U8 ||
+      Tensor::element_type(kernel.input()) == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) == DataType::S8 ||
+                           Tensor::zero_point(kernel.output()) == 0);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) == DataType::U8 ||
+                           Tensor::zero_point(kernel.output()) ==
+                             std::numeric_limits<int8_t>::min());
+  }
+#endif
+}
+
+void execute_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *options = cur_op->builtin_options_as_SoftmaxOptions();
+
+  switch (Tensor::element_type(kernel.input()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalFloat(kernel.input(), kernel.output(), options, runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Softmax.test.cpp b/onert-micro/luci-interpreter/src/kernels/Softmax.test.cpp
new file mode 100644
index 000000000..f026e89c4
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/Softmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+                          std::max<float>(std::max<float>(input_data), 0.f));
+  std::pair<float, int32_t> output_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+                          std::max<float>(std::max<float>(output_data), 0.f));
+  Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+                                                             input_quant_param.second, input_data,
+                                                             memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class SoftmaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
+
+TYPED_TEST(SoftmaxTest, Simple)
+{
+  Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
+                   {
+                     5, -9, 8,  //
+                     -7, 2, -4, //
+                     1, -2, 9,  //
+                     3, -6, -1, //
+                   },
+                   {
+                     0.38514, 0.09497, 0.51989, //
+                     0.20792, 0.51141, 0.28067, //
+                     0.25212, 0.18678, 0.56110, //
+                     0.48149, 0.19576, 0.32275, //
+                   });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
diff --git a/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
new file mode 100644
index 000000000..0a5b44725
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include "PALSpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+                               const Tensor *paddings, Tensor *output)
+  : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *paddings_data = paddings()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    int final_dim_size =
+      (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+    LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+    output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+    output_batch_size = output_batch_size * block_shape_data[i];
+  }
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+  switch (input()->element_type())
+  {
+    tflite::SpaceToBatchParams op_params;
+    case DataType::FLOAT32:
+      op_params.output_offset = 0;
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<float>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      op_params.output_offset = output()->zero_point();
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.h b/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.h
new file mode 100644
index 000000000..0893003bb
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToBatchND : public Kernel
+{
+public:
+  SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *paddings() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
diff --git a/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
new file mode 100644
index 000000000..3a8b0a812
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> paddings_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape,
+  std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
+  std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class SpaceToBatchNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes);
+
+TYPED_TEST(SpaceToBatchNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2},
+                   /*paddings_shape=*/{2, 2},
+                   /*output_shape=*/{6, 2, 2, 1},
+                   /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0},
+                   /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0},
+                   /*output_data=*/{0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -1.0, 0, -0.7,
+                                    0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 1.0});
+}
+
+TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp b/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
new file mode 100644
index 000000000..06cc5faae
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepth.h"
+#include "Utils.h"
+#include "PALSpaceToDepth.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
+  : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+{
+}
+
+void SpaceToDepth::configure()
+{
+  assert(input()->shape().num_dims() == 4);
+  assert(output()->element_type() == DataType::FLOAT32 ||
+         output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
+         output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
+  assert(input()->element_type() == output()->element_type());
+
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  int32_t output_height = input_height / block_size;
+  int32_t output_width = input_width / block_size;
+
+  assert(input_height == output_height * block_size);
+  assert(input_width == output_width * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void SpaceToDepth::execute() const
+{
+  tflite::SpaceToDepthParams op_params{};
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.h b/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.h
new file mode 100644
index 000000000..e66316b11
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+#define LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToDepth : public KernelWithParams<SpaceToDepthParams>
+{
+public:
+  SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
diff --git a/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
new file mode 100644
index 000000000..4af488618
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToDepth.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SpaceToDepthTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes);
+
+TYPED_TEST(SpaceToDepthTest, SimpleCase)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<TypeParam>();
+  std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
+  Shape input_shape{1, 2, 2, 2};
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
+  std::vector<int32_t> output_shape{1, 1, 1, 8};
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SpaceToDepthParams params{};
+  params.block_size = 2;
+
+  SpaceToDepth kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Split.cpp b/onert-micro/luci-interpreter/src/kernels/Split.cpp
new file mode 100644
index 000000000..70513a5cf
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Split.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "Utils.h"
+#include "Split.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleSplit(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto axis_index = cur_op->inputs()->operator[](1);
+
+  LUCI_INTERPRETER_CHECK(input_index != -1);
+  LUCI_INTERPRETER_CHECK(axis_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto axis = runtime_graph->getCircleTensorByIndex(axis_index);
+
+  LUCI_INTERPRETER_CHECK(input != nullptr);
+  LUCI_INTERPRETER_CHECK(axis != nullptr);
+}
+
+void execute_kernel_CircleSplit(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](1);
+  const auto axis_index = cur_op->inputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(axis_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto axis = runtime_graph->getCircleTensorByIndex(axis_index);
+
+  assert(input != nullptr);
+  assert(axis != nullptr);
+
+  const auto *axis_data = runtime_graph->getDataByTensor(axis);
+  if (axis_data == nullptr)
+    axis_data = runtime_graph->getConstDataByTensor(axis);
+
+  assert(axis_data);
+
+  int32_t axis_value = (kernels::getTensorData<int32_t>(axis_data))[0];
+  if (axis_value < 0)
+    axis_value += Tensor::num_dims(input);
+
+  assert(axis_value >= 0);
+  assert(axis_value < Tensor::num_dims(input));
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      return splitImpl<float>(cur_op, input, axis_value, runtime_graph);
+    }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+    {
+      return splitImpl<int8_t>(cur_op, input, axis_value, runtime_graph);
+    }
+    case DataType::S16:
+    {
+      return splitImpl<int16_t>(cur_op, input, axis_value, runtime_graph);
+    }
+#endif // DIS_QUANT
+    case DataType::S32:
+    {
+      return splitImpl<int32_t>(cur_op, input, axis_value, runtime_graph);
+    }
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Split.h b/onert-micro/luci-interpreter/src/kernels/Split.h
new file mode 100644
index 000000000..99ffae866
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Split.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_IMPL_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_IMPL_H
+
+#include "Builders.h"
+#include "Utils.h"
+
+namespace luci_interpreter
+{
+
+template <typename T>
+void splitImpl(const circle::Operator *cur_op, const circle::Tensor *input, int axis_value,
+               BaseRuntimeGraph *runtime_graph)
+{
+  const int output_count = cur_op->outputs()->size();
+
+  const auto output0_index = cur_op->outputs()->operator[](0);
+  assert(output0_index != -1);
+
+  const auto output0 = runtime_graph->getCircleTensorByIndex(output0_index);
+  assert(output0 != nullptr);
+
+  const int split_dimensions = Tensor::num_dims(input);
+
+  assert(axis_value < split_dimensions);
+  assert(Tensor::num_dims(output0) == split_dimensions);
+
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis_value; ++i)
+  {
+    outer_size *= Tensor::dim(input, i);
+  }
+
+  int64_t base_inner_size = 1;
+  for (int i = axis_value + 1; i < split_dimensions; ++i)
+  {
+    base_inner_size *= Tensor::dim(input, i);
+  }
+
+  const T *input_ptr = kernels::getTensorData<T>(runtime_graph->getDataByTensor(input));
+  assert(input_ptr != nullptr);
+  for (int k = 0; k < outer_size; ++k)
+  {
+    for (int i = 0; i < output_count; ++i)
+    {
+      const auto output_index = cur_op->outputs()->operator[](i);
+      assert(output_index != -1);
+
+      const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+      assert(output != nullptr);
+
+      T *output_data = kernels::getTensorData<T>(runtime_graph->getDataByTensor(output));
+      assert(output_data != nullptr);
+      const int copy_size = Tensor::dim(output, axis_value) * base_inner_size;
+      T *output_ptr = output_data + k * copy_size;
+      assert(output_ptr != nullptr);
+      for (int j = 0; j < copy_size; ++j)
+        output_ptr[j] = input_ptr[j];
+      input_ptr += copy_size;
+    }
+  }
+}
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_IMPL_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Split.test.cpp b/onert-micro/luci-interpreter/src/kernels/Split.test.cpp
new file mode 100644
index 000000000..c091903d1
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Split.test.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/split/FloatSplitKernel.h"
+#include "luci_interpreter/test_models/split/IntSplitKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class SplitTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<std::vector<T>> checkSplitKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 2);
+
+  std::vector<std::vector<T>> result;
+
+  for (int i = 0; i < 2; ++i)
+  {
+    T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(i));
+    const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(i) / sizeof(T));
+    std::vector<T> output_data_vector(output_data, output_data + num_elements);
+    result.push_back(output_data_vector);
+  }
+
+  return result;
+}
+
+TEST_F(SplitTest, Float_P)
+{
+  test_kernel::TestDataFloatSplit test_data_kernel;
+  const auto output_data_vector = checkSplitKernel(&test_data_kernel);
+
+  for (int i = 0; i < 2; ++i)
+  {
+    EXPECT_THAT(output_data_vector[i], test_data_kernel.get_output_data_by_index(i));
+  }
+}
+
+TEST_F(SplitTest, Int_P)
+{
+  test_kernel::TestDataIntSplit test_data_kernel;
+  const auto output_data_vector = checkSplitKernel(&test_data_kernel);
+
+  for (int i = 0; i < 2; ++i)
+  {
+    EXPECT_THAT(output_data_vector[i], test_data_kernel.get_output_data_by_index(i));
+  }
+}
+
+// TODO: add negative tests?
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SplitV.cpp b/onert-micro/luci-interpreter/src/kernels/SplitV.cpp
new file mode 100644
index 000000000..b78a394e4
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SplitV.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "Utils.h"
+#include "Split.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleSplitV(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto axis_index = cur_op->inputs()->operator[](2);
+  LUCI_INTERPRETER_CHECK(axis_index != -1);
+
+  const auto axis = runtime_graph->getCircleTensorByIndex(axis_index);
+  LUCI_INTERPRETER_CHECK(axis != nullptr);
+
+  // Dynamic output tensors are needed if axis tensor is not constant
+  // Now doesn't support dynamic shapes for SplitV
+  LUCI_INTERPRETER_CHECK(runtime_graph->getConstDataByTensor(axis) != nullptr);
+}
+
+void execute_kernel_CircleSplitV(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto axis_index = cur_op->inputs()->operator[](2);
+
+  assert(input_index != -1);
+  assert(axis_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto axis = runtime_graph->getCircleTensorByIndex(axis_index);
+
+  assert(input != nullptr);
+  assert(axis != nullptr);
+
+  const auto *axis_data = runtime_graph->getDataByTensor(axis);
+  if (axis_data == nullptr)
+    axis_data = runtime_graph->getConstDataByTensor(axis);
+
+  assert(axis_data);
+
+  int32_t axis_value = (kernels::getTensorData<int32_t>(axis_data))[0];
+  if (axis_value < 0)
+    axis_value += Tensor::num_dims(input);
+
+  assert(axis_value >= 0);
+  assert(axis_value < Tensor::num_dims(input));
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      return splitImpl<float>(cur_op, input, axis_value, runtime_graph);
+    }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+    {
+      return splitImpl<int8_t>(cur_op, input, axis_value, runtime_graph);
+    }
+    case DataType::S16:
+    {
+      return splitImpl<int16_t>(cur_op, input, axis_value, runtime_graph);
+    }
+#endif // DIS_QUANT
+    case DataType::S32:
+    {
+      return splitImpl<int32_t>(cur_op, input, axis_value, runtime_graph);
+    }
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SplitV.test.cpp b/onert-micro/luci-interpreter/src/kernels/SplitV.test.cpp
new file mode 100644
index 000000000..d2e7d19a2
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/split_v/SplitVKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class SplitVTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<std::vector<T>> checkSplitKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 3);
+
+  std::vector<std::vector<T>> result;
+
+  for (int i = 0; i < 3; ++i)
+  {
+    T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(i));
+    const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(i) / sizeof(T));
+    std::vector<T> output_data_vector(output_data, output_data + num_elements);
+    result.push_back(output_data_vector);
+  }
+
+  return result;
+}
+
+TEST_F(SplitVTest, MainTest_P)
+{
+  test_kernel::TestDataSplitVKernel<float> test_data_kernel;
+  const auto output_data_vector = checkSplitKernel(&test_data_kernel);
+
+  for (int i = 0; i < 3; ++i)
+  {
+    EXPECT_THAT(output_data_vector[i], test_data_kernel.get_output_data_by_index(i));
+  }
+}
+
+// TODO: add negative tests?
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Sqrt.cpp b/onert-micro/luci-interpreter/src/kernels/Sqrt.cpp
new file mode 100644
index 000000000..eed50dfaf
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Sqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    assert(false && "Input/output tensor data type mismatch.");
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Sqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Sqrt.h b/onert-micro/luci-interpreter/src/kernels/Sqrt.h
new file mode 100644
index 000000000..4034655ed
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Sqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H
+#define LUCI_INTERPRETER_KERNELS_SQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sqrt : public Kernel
+{
+public:
+  Sqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQRT_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Sqrt.test.cpp b/onert-micro/luci-interpreter/src/kernels/Sqrt.test.cpp
new file mode 100644
index 000000000..96835fbfc
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(SqrtTest, SimpleSqrt)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, 8, 2, 4,    //
+      3, 7, 10, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.0, 2.8284271, 1.4142136, 2,                //
+      1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+    });
+}
+
+TEST(SqrtTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(SqrtTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Square.cpp b/onert-micro/luci-interpreter/src/kernels/Square.cpp
new file mode 100644
index 000000000..6386b91f2
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Square.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Square::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    assert(false && "Input/output tensor data type mismatch.");
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Square::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Square::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = (*i) * (*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Square.h b/onert-micro/luci-interpreter/src/kernels/Square.h
new file mode 100644
index 000000000..73ed5a707
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Square.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUARE_H
+#define LUCI_INTERPRETER_KERNELS_SQUARE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Square : public Kernel
+{
+public:
+  Square(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUARE_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Square.test.cpp b/onert-micro/luci-interpreter/src/kernels/Square.test.cpp
new file mode 100644
index 000000000..51662dea7
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Square.test.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquareTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Square kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SquaredDifference.cpp b/onert-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
new file mode 100644
index 000000000..27f395a95
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalSquaredDifference<float>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+                          const T difference = x - y;
+                          return difference * difference;
+                        });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SquaredDifference.h b/onert-micro/luci-interpreter/src/kernels/SquaredDifference.h
new file mode 100644
index 000000000..9327caf93
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SquaredDifference.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SquaredDifference : public Kernel
+{
+public:
+  SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalSquaredDifference() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
diff --git a/onert-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/onert-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp
new file mode 100644
index 000000000..2819c01e2
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquaredDifferenceTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(SquaredDifferenceTest, FloatBroadcast)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape1{3, 1, 2};
+  Shape input_shape2{1};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{1.0};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Squeeze.cpp b/onert-micro/luci-interpreter/src/kernels/Squeeze.cpp
new file mode 100644
index 000000000..9736dce3a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Squeeze.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
+  : KernelWithParams<SqueezeParams>({input}, {output}, params)
+{
+}
+
+void Squeeze::configure()
+{
+  int input_num_dims = input()->shape().num_dims();
+  int num_squeeze_dims = params().squeeze_dims.size();
+  assert(input_num_dims <= 8);
+  bool should_squeeze[8] = {false};
+  int num_squeezed_dims = 0;
+  if (num_squeeze_dims == 0)
+  {
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      if (input()->shape().dim(idx) == 1)
+      {
+        should_squeeze[idx] = true;
+        ++num_squeezed_dims;
+      }
+    }
+  }
+  else
+  {
+    for (int idx = 0; idx < num_squeeze_dims; ++idx)
+    {
+      int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
+                                                   : params().squeeze_dims[idx];
+      assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
+      if (!should_squeeze[current])
+        ++num_squeezed_dims;
+      should_squeeze[current] = true;
+    }
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  Shape output_shape(input_num_dims - num_squeezed_dims);
+  for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
+  {
+    if (!should_squeeze[in_idx])
+    {
+      output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
+    }
+  }
+  output()->resize(output_shape);
+}
+
+void Squeeze::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+  std::memcpy(output_data, input_data,
+              getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Squeeze.h b/onert-micro/luci-interpreter/src/kernels/Squeeze.h
new file mode 100644
index 000000000..687af5158
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Squeeze.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+#define LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Squeeze : public KernelWithParams<SqueezeParams>
+{
+public:
+  Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUEEZE_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Squeeze.test.cpp b/onert-micro/luci-interpreter/src/kernels/Squeeze.test.cpp
new file mode 100644
index 000000000..1bc0b6459
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data,
+           std::initializer_list<int32_t> squeeze_dims)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SqueezeParams params{};
+  params.squeeze_dims = squeeze_dims;
+
+  Squeeze kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class SqueezeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SqueezeTest, DataTypes);
+
+TYPED_TEST(SqueezeTest, TotalTest)
+{
+  Check<TypeParam>(
+    /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                     13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    {-1, 0});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/StridedSlice.cpp b/onert-micro/luci-interpreter/src/kernels/StridedSlice.cpp
new file mode 100644
index 000000000..3968fb972
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "MISOKernel.h"
+
+#include "PALStridedSlice.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+luci_interpreter_pal::StridedSliceParams
+buildStridedSliceParams(int32_t dims, const int32_t *begin, const int32_t *end,
+                        const int32_t *strides, const circle::StridedSliceOptions *options)
+{
+  luci_interpreter_pal::StridedSliceParams op_params;
+  op_params.start_indices_count = dims;
+  op_params.stop_indices_count = dims;
+  op_params.strides_count = dims;
+
+  for (int i = 0; i < dims; ++i)
+  {
+    op_params.start_indices[i] = begin[i];
+    op_params.stop_indices[i] = end[i];
+    op_params.strides[i] = strides[i];
+  }
+
+  op_params.begin_mask = options->begin_mask();
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = options->end_mask();
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = options->shrink_axis_mask();
+  return op_params;
+}
+
+} // namespace
+
+void configure_kernel_CircleStridedSlice(const circle::Operator *cur_op,
+                                         BaseRuntimeGraph *runtime_graph)
+{
+  kernels::MISOKernel miso_kernel(cur_op, runtime_graph);
+
+  const circle::Tensor *input = miso_kernel.input1();
+  const circle::Tensor *begin = miso_kernel.input2();
+  const circle::Tensor *end = miso_kernel.input3();
+  const circle::Tensor *strides = miso_kernel.input4();
+
+  LUCI_INTERPRETER_CHECK(strides != nullptr);
+
+  const circle::Tensor *output = miso_kernel.output();
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(begin) == DataType::S32);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(end) == DataType::S32);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(strides) == DataType::S32);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+}
+
+void execute_kernel_CircleStridedSlice(const circle::Operator *cur_op,
+                                       BaseRuntimeGraph *runtime_graph)
+{
+  kernels::MISOKernel miso_kernel(cur_op, runtime_graph);
+
+  const circle::Tensor *input = miso_kernel.input1();
+  const circle::Tensor *begin = miso_kernel.input2();
+  const circle::Tensor *end = miso_kernel.input3();
+  const circle::Tensor *strides = miso_kernel.input4();
+  const circle::Tensor *output = miso_kernel.output();
+
+  const int32_t dims = Tensor::num_dims(input);
+
+  const uint8_t *input_data = runtime_graph->getDataByTensor(input);
+  const int32_t *begin_data =
+    kernels::getTensorData<int32_t>(runtime_graph->getConstDataByTensor(begin));
+  const int32_t *end_data =
+    kernels::getTensorData<int32_t>(runtime_graph->getConstDataByTensor(end));
+  const int32_t *strides_data =
+    kernels::getTensorData<int32_t>(runtime_graph->getConstDataByTensor(strides));
+  uint8_t *output_data = runtime_graph->getDataByTensor(output);
+
+  LUCI_INTERPRETER_CHECK(input_data != nullptr);
+  LUCI_INTERPRETER_CHECK(begin_data != nullptr);
+  LUCI_INTERPRETER_CHECK(end_data != nullptr);
+  LUCI_INTERPRETER_CHECK(strides_data != nullptr);
+  LUCI_INTERPRETER_CHECK(output_data != nullptr);
+
+  const auto *options = cur_op->builtin_options_as_StridedSliceOptions();
+
+  auto op_params = buildStridedSliceParams(dims, begin_data, end_data, strides_data, options);
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      luci_interpreter_pal::StridedSlice(op_params, kernels::getTensorShape(input),
+                                         kernels::getTensorData<float>(input_data),
+                                         kernels::getTensorData<float>(output_data));
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::U8:
+      luci_interpreter_pal::StridedSlice(op_params, kernels::getTensorShape(input), input_data,
+                                         output_data);
+      break;
+    case DataType::S8:
+      luci_interpreter_pal::StridedSlice(op_params, kernels::getTensorShape(input), input_data,
+                                         output_data);
+      break;
+#endif
+    case DataType::S32:
+      luci_interpreter_pal::StridedSlice(op_params, kernels::getTensorShape(input),
+                                         kernels::getTensorData<int32_t>(input_data),
+                                         kernels::getTensorData<int32_t>(output_data));
+      break;
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp b/onert-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp
new file mode 100644
index 000000000..3aa2285c5
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/strided_slice/StridedSliceKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class StridedSliceTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkStridedSliceKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(StridedSliceTest, MainTest_P)
+{
+  test_kernel::TestDataStridedSliceKernel<float> test_data_kernel;
+  std::vector<float> output_data_vector = checkStridedSliceKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+// TODO: add negative tests?
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Sub.cpp b/onert-micro/luci-interpreter/src/kernels/Sub.cpp
new file mode 100644
index 000000000..5eaed325a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Sub.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+#include "PALSub.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleSub(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
+                         Tensor::element_type(kernel.input2()));
+#ifndef DIS_QUANT
+  if (Tensor::element_type(kernel.input1()) == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::zero_points(kernel.input1()).size() == 1 &&
+                           Tensor::zero_points(kernel.input2()).size() == 1);
+    LUCI_INTERPRETER_CHECK(Tensor::zero_point(kernel.input1()) == 0 &&
+                           Tensor::zero_point(kernel.input2()) == 0 &&
+                           Tensor::zero_point(kernel.output()) == 0);
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleSub(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *options = cur_op->builtin_options_as_SubOptions();
+
+  luci_interpreter::RuntimeShape input_shape1 =
+    kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph);
+  luci_interpreter::RuntimeShape input_shape2 =
+    kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input1()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      auto tiso_func = luci_interpreter_pal::Sub<float>;
+
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastSub4DSlow<float>;
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                              std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                       options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+#endif // DIS_FLOAT
+    case DataType::S64:
+    {
+      auto tiso_func = luci_interpreter_pal::Sub<int64_t>;
+
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastSub4DSlow<int64_t>;
+
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                                std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                         options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+    case DataType::S32:
+    {
+      auto tiso_func = luci_interpreter_pal::Sub<int32_t>;
+
+      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastSub4DSlow<int32_t>;
+
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, options,
+                                                std::move(input_shape1), std::move(input_shape2));
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
+                                         options, std::move(input_shape1), std::move(input_shape2));
+      }
+    }
+    break;
+// TODO: fix it
+#if 0
+#ifndef DIS_QUANT
+    case DataType::U8:
+    {
+      auto tiso_func = [](const tflite::ArithmeticParams &params,
+                          const tflite::RuntimeShape &input1_shape, const uint8_t *input1_data,
+                          const tflite::RuntimeShape &input2_shape, const uint8_t *input2_data,
+                          const tflite::RuntimeShape &output_shape, uint8_t *output_data) {
+        tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                                   output_shape, output_data);
+      };
+      auto broadcast_tiso_func =
+        [](const tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+           const uint8_t *input1_data, const tflite::RuntimeShape &input2_shape,
+           const uint8_t *input2_data, const tflite::RuntimeShape &output_shape,
+           uint8_t *output_data) {
+          tflite::reference_ops::BroadcastSubSlow(params, input1_shape, input1_data, input2_shape,
+                                                  input2_data, output_shape, output_data);
+        };
+      if (is_inplace)
+      {
+        kernels::evalTISOInplaceQuantizedKernel<uint8_t>(tiso_func, broadcast_tiso_func, &kernel,
+                                                         options);
+      }
+      else
+      {
+        kernels::TISOData kernel_data = kernel.readData();
+        kernels::evalTISOQuantizedKernel<uint8_t>(tiso_func, broadcast_tiso_func, &kernel,
+                                                  &kernel_data, options);
+      }
+    }
+    break;
+#endif // DIS_QUANT
+#endif // 0
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Sub.test.cpp b/onert-micro/luci-interpreter/src/kernels/Sub.test.cpp
new file mode 100644
index 000000000..ff267b12c
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Sub.test.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/sub/FloatSubKernel.h"
+#include "luci_interpreter/test_models/sub/IntSubKernel.h"
+#include "luci_interpreter/test_models/sub/NegSubKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class SubTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkSubKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 2);
+
+  // set left input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  // set right input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(1));
+    std::copy(test_data_base->get_input_data_by_index(1).begin(),
+              test_data_base->get_input_data_by_index(1).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(SubTest, Float_P)
+{
+  // No broadcast
+  {
+    const bool is_with_broadcast = false;
+    test_kernel::TestDataFloatSub test_data_float_kernel(is_with_broadcast);
+    std::vector<float> output_data_vector = checkSubKernel(&test_data_float_kernel);
+    EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                      test_data_float_kernel.get_output_data_by_index(0), 0.0001f));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestDataFloatSub test_data_float_kernel(is_with_broadcast);
+    std::vector<float> output_data_vector = checkSubKernel(&test_data_float_kernel);
+    EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                      test_data_float_kernel.get_output_data_by_index(0), 0.0001f));
+  }
+}
+
+TEST_F(SubTest, INT_P)
+{
+  // No broadcast
+  {
+    const bool is_with_broadcast = false;
+    test_kernel::TestDataIntSub test_data_kernel(is_with_broadcast);
+    const auto output_data_vector = checkSubKernel<int32_t>(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+  }
+  // With broadcast
+  {
+    const bool is_with_broadcast = true;
+    test_kernel::TestDataIntSub test_data_kernel(is_with_broadcast);
+    const auto output_data_vector = checkSubKernel<int32_t>(&test_data_kernel);
+    EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+  }
+}
+
+TEST_F(SubTest, Inputs_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputsTypeMismatchSubKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(SubTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchSubKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+TEST_F(SubTest, No_quant_params_NEG)
+{
+  test_kernel::NegTestDataNoQuantParamsSubKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add tests for U8 and S16
+// TODO: add tests for inplace optimizations for all types
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/TISOKernel.h b/onert-micro/luci-interpreter/src/kernels/TISOKernel.h
new file mode 100644
index 000000000..48dec74e3
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/TISOKernel.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TISO_KERNEL_H
+#define LUCI_INTERPRETER_KERNELS_TISO_KERNEL_H
+
+#include "Builders.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+struct TISOData
+{
+  uint8_t *input1_data = nullptr;
+  uint8_t *input2_data = nullptr;
+  uint8_t *output_data = nullptr;
+};
+
+// Two input single output kernel
+class TISOKernel
+{
+public:
+  TISOKernel() = delete;
+
+  explicit TISOKernel(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+    : _runtime_graph(runtime_graph)
+  {
+    const auto input1_index = cur_op->inputs()->operator[](0);
+    const auto input2_index = cur_op->inputs()->operator[](1);
+    const auto output_index = cur_op->outputs()->operator[](0);
+
+    assert(input1_index != -1);
+    assert(input2_index != -1);
+    assert(output_index != -1);
+
+    _input1_tensor = _runtime_graph->getCircleTensorByIndex(input1_index);
+    _input2_tensor = _runtime_graph->getCircleTensorByIndex(input2_index);
+    _output_tensor = _runtime_graph->getCircleTensorByIndex(output_index);
+
+    assert(_input1_tensor != nullptr);
+    assert(_input2_tensor != nullptr);
+    assert(_output_tensor != nullptr);
+  }
+
+  const circle::Tensor *input1() const { return _input1_tensor; }
+  const circle::Tensor *input2() const { return _input2_tensor; }
+  const circle::Tensor *output() const { return _output_tensor; }
+
+  BaseRuntimeGraph *runtime_graph() const { return _runtime_graph; }
+
+  TISOData readData()
+  {
+    auto *input1_data = _runtime_graph->getDataByTensor(_input1_tensor);
+    if (input1_data == nullptr)
+      input1_data = _runtime_graph->getConstDataByTensor(_input1_tensor);
+    assert(input1_data);
+
+    auto *input2_data = _runtime_graph->getDataByTensor(_input2_tensor);
+    if (input2_data == nullptr)
+      input2_data = _runtime_graph->getConstDataByTensor(_input2_tensor);
+    assert(input2_data);
+
+    auto *output_data = _runtime_graph->getDataByTensor(_output_tensor);
+    assert(output_data);
+
+    return {input1_data, input2_data, output_data};
+  }
+
+  TISOData readInplaceData(uint8_t *&inplace_data_ptr, circle::Tensor *&input_inplace_tensor)
+  {
+    auto *input1_data = _runtime_graph->getDataByTensor(_input1_tensor);
+    if (input1_data != nullptr)
+    {
+      inplace_data_ptr = const_cast<uint8_t *>(input1_data);
+      input_inplace_tensor = const_cast<circle::Tensor *>(_input1_tensor);
+    }
+    if (input1_data == nullptr)
+      input1_data = _runtime_graph->getConstDataByTensor(_input1_tensor);
+
+    assert(input1_data);
+
+    auto *input2_data = _runtime_graph->getDataByTensor(_input2_tensor);
+    if (inplace_data_ptr == nullptr)
+    {
+      assert(input2_data != nullptr);
+      inplace_data_ptr = const_cast<uint8_t *>(input2_data);
+      input_inplace_tensor = const_cast<circle::Tensor *>(_input2_tensor);
+    }
+    if (input2_data == nullptr)
+      input2_data = _runtime_graph->getConstDataByTensor(_input2_tensor);
+    assert(input2_data);
+
+    assert(_runtime_graph->getDataByTensor(_output_tensor) == nullptr);
+
+    auto *output_data = inplace_data_ptr;
+    assert(output_data);
+
+    return {input1_data, input2_data, output_data};
+  }
+
+private:
+  const circle::Tensor *_input1_tensor;
+  const circle::Tensor *_input2_tensor;
+  const circle::Tensor *_output_tensor;
+
+  BaseRuntimeGraph *_runtime_graph;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TISO_KERNEL_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Tanh.cpp b/onert-micro/luci-interpreter/src/kernels/Tanh.cpp
new file mode 100644
index 000000000..809a91457
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Tanh.cpp
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+#include "SISOKernel.h"
+
+#include "PALTanh.h"
+
+namespace luci_interpreter
+{
+
+#ifndef DIS_QUANT
+
+namespace
+{
+void calculateArithmeticData(const circle::Tensor *input, const circle::Tensor *output,
+                             int32_t &input_zero_point, int32_t &input_range_radius,
+                             int32_t &input_multiplier, int &input_left_shift)
+{
+  const auto input_dtype = Tensor::element_type(input);
+  switch (input_dtype)
+  {
+    // TODO: enable it
+#if 0
+    case DataType::S8:
+    {
+      static constexpr int input_integer_bits = 4;
+      const double input_real_multiplier = static_cast<double>(Tensor::scale(input)) *
+                                           static_cast<double>(1 << (31 - input_integer_bits));
+
+      const double q = std::frexp(input_real_multiplier, &input_left_shift);
+      input_multiplier = static_cast<int32_t>(std::round(q * (1ll << 31)));
+      input_range_radius = kernels::calculateInputRadius(input_integer_bits, input_left_shift, 31);
+    }
+    break;
+#endif
+    case DataType::S16:
+    {
+      static constexpr int input_integer_bits = 3;
+      static constexpr int output_fractional_bits = 15;
+
+      // These operators are implemented in fixed-point arithmetic,
+      // which intrinsically wants symmetric ranges (zero_point==0)
+      // and power-of-two scales (power-of-two is abbreviated below as POT).
+      // While more general support would be possible by means of rescaling,
+      // that would add some overhead and some loss of accuracy and wouldn't
+      // be used at the moment as current quantized LSTM applications are
+      // happy with symmetric, power-of-two-scales quantization. So we just
+      // implement that narrow case only for now.
+
+      int input_scale_log2_rounded;
+      bool param_scale_pot = kernels::checkedLog2(Tensor::scale(input), &input_scale_log2_rounded);
+
+      input_left_shift = (15 - input_integer_bits) + input_scale_log2_rounded;
+      param_scale_pot &= (input_left_shift == 0 || input_left_shift == 1);
+
+      if (param_scale_pot)
+      {
+        input_multiplier = 0;
+      }
+      else
+      {
+        // Calculate multiplier to change input scale to 1/(3*4096)
+        // as required by the table lookup.
+        // The number 3.0 in the multiplier comes from here,
+        // because the interval is [-10.7, 10.7] instead of [-8, 8].
+        // So, in this scaling +/-2^17 represents +/-10.7.
+
+        double multiplier = static_cast<double>(Tensor::scale(input)) * 4096.0 * 3.0;
+        input_left_shift = 0;
+
+        while (multiplier <= 32767.0 / 2.0 && input_left_shift <= 30)
+        {
+          input_left_shift++;
+          multiplier = multiplier * 2.0;
+        }
+
+        input_multiplier = static_cast<int32_t>(multiplier);
+      }
+
+      int output_scale_log2_rounded;
+      kernels::checkedLog2(Tensor::scale(output), &output_scale_log2_rounded);
+      assert(output_scale_log2_rounded == -output_fractional_bits);
+    }
+    break;
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+} // namespace
+
+void evalInteger(const circle::Tensor *input, const circle::Tensor *output,
+                 BaseRuntimeGraph *runtime_graph)
+{
+  int32_t input_zero_point = 0;
+  int32_t input_range_radius = 0;
+  int32_t input_multiplier = 0;
+  int input_left_shift = 0;
+
+  calculateArithmeticData(input, output, input_zero_point, input_range_radius, input_multiplier,
+                          input_left_shift);
+
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  assert(input_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(output);
+  assert(output_data);
+
+  const int flat_size = kernels::getTensorRuntimeShape(input, runtime_graph).flatSize();
+
+  const auto input_dtype = Tensor::element_type(input);
+  switch (input_dtype)
+  {
+    // TODO: enable it
+#if 0
+    case DataType::S8:
+      luci_interpreter_pal::Tanh(
+        input_zero_point, input_range_radius, input_multiplier, input_left_shift,
+        flat_size, kernels::getTensorData<int8_t>(input_data), kernels::getTensorData<int8_t>(output_data));
+      break;
+#endif // 0
+    case DataType::S16:
+      luci_interpreter_pal::Tanh(input_multiplier, input_left_shift, flat_size,
+                                 kernels::getTensorData<int16_t>(input_data),
+                                 kernels::getTensorData<int16_t>(output_data));
+      break;
+    default:
+      assert(false && "Not support yet");
+  }
+}
+#endif // DIS_QUANT
+
+void configure_kernel_CircleTanh(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
+                         Tensor::element_type(kernel.output()));
+}
+
+void execute_kernel_CircleTanh(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::SISOKernel kernel(cur_op, runtime_graph);
+
+  const auto *input_data = runtime_graph->getDataByTensor(kernel.input());
+  assert(input_data);
+
+  auto *output_data = runtime_graph->getDataByTensor(kernel.output());
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  switch (Tensor::element_type(kernel.input()))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+    {
+      const float *input_data_float = kernels::getTensorData<float>(input_data);
+      float *output_data_float = kernels::getTensorData<float>(output_data);
+      if (is_inplace)
+      {
+        output_data_float = const_cast<float *>(input_data_float);
+      }
+
+      assert(output_data_float);
+
+      const int flat_size =
+        kernels::getTensorRuntimeShape(kernel.input(), runtime_graph).flatSize();
+
+      luci_interpreter_pal::Tanh(flat_size, input_data_float, output_data_float);
+      break;
+    }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S16:
+      // TODO: enable it
+#if 0
+    case DataType::S8:
+#endif
+      evalInteger(kernel.input(), kernel.output(), runtime_graph);
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type");
+  }
+
+  if (is_inplace)
+    runtime_graph->makeInplaceOperation(kernel.input(), kernel.output());
+}
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Tanh.test.cpp b/onert-micro/luci-interpreter/src/kernels/Tanh.test.cpp
new file mode 100644
index 000000000..ff55a95a1
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/tanh/FloatTanhKernel.h"
+#include "luci_interpreter/test_models/tanh/NegTanhKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class TanhTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkTanhKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(TanhTest, Float_P)
+{
+  test_kernel::TestDataFloatTanh test_data_kernel;
+  std::vector<float> output_data_vector = checkTanhKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(TanhTest, Input_output_type_mismatch_NEG)
+{
+  test_kernel::NegTestDataInputOutputTypeMismatchTanhKernel test_data_kernel;
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+// TODO: add S16 test
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/TestUtils.cpp b/onert-micro/luci-interpreter/src/kernels/TestUtils.cpp
new file mode 100644
index 000000000..7e4bf7dce
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/TestUtils.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+using ::testing::FloatNear;
+using ::testing::Matcher;
+
+Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
+{
+  std::vector<Matcher<float>> matchers;
+  matchers.reserve(values.size());
+  for (const float v : values)
+  {
+    matchers.emplace_back(FloatNear(v, max_abs_error));
+  }
+  return ElementsAreArray(matchers);
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/TestUtils.h b/onert-micro/luci-interpreter/src/kernels/TestUtils.h
new file mode 100644
index 000000000..492044c89
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/TestUtils.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+#define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <type_traits>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+// Array version of `::testing::FloatNear` matcher.
+::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values,
+                                                      float max_abs_error = 1.0e-5f);
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TESTUTILS_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Transpose.cpp b/onert-micro/luci-interpreter/src/kernels/Transpose.cpp
new file mode 100644
index 000000000..a6018c4ff
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Transpose.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "TISOKernel.h"
+#include "kernels/Utils.h"
+
+#include "PALTranspose.h"
+
+namespace luci_interpreter
+{
+void configure_kernel_CircleTranspose(const circle::Operator *cur_op,
+                                      BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input2()) == DataType::S32);
+
+  const int32_t dims = Tensor::num_dims(kernel.input1());
+  const int32_t *perm_data =
+    kernels::getTensorData<int32_t>(runtime_graph->getConstDataByTensor(kernel.input2()));
+
+  // Ensure validity of the permutations tensor as a 1D tensor
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(kernel.input2()) == 1);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(kernel.input2(), 0) == dims);
+
+  for (int idx = 0; idx < dims; ++idx)
+    LUCI_INTERPRETER_CHECK(perm_data[idx] >= 0 and perm_data[idx] < dims);
+}
+
+void execute_kernel_CircleTranspose(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  kernels::TISOKernel kernel(cur_op, runtime_graph);
+
+  const circle::Tensor *input = kernel.input1();
+  const circle::Tensor *perm = kernel.input2();
+  const circle::Tensor *output = kernel.output();
+
+  kernels::TISOData tiso_data = kernel.readData();
+  const int32_t *perm_data = kernels::getTensorData<int32_t>(tiso_data.input2_data);
+
+  const int32_t size = Tensor::dim(perm, 0);
+  luci_interpreter_pal::TransposeParams params;
+  params.perm_count = size;
+  for (int i = 0; i < size; ++i)
+    params.perm[i] = perm_data[i];
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Transpose(params, kernels::getTensorShape(input),
+                                      kernels::getTensorData<float>(tiso_data.input1_data),
+                                      kernels::getTensorShape(output),
+                                      kernels::getTensorData<float>(tiso_data.output_data));
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::U8:
+      luci_interpreter_pal::Transpose(params, kernels::getTensorShape(input),
+                                      kernels::getTensorData<uint8_t>(tiso_data.input1_data),
+                                      kernels::getTensorShape(output),
+                                      kernels::getTensorData<uint8_t>(tiso_data.output_data));
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Transpose.test.cpp b/onert-micro/luci-interpreter/src/kernels/Transpose.test.cpp
new file mode 100644
index 000000000..dda521243
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/transpose/TransposeKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class TransposeTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkTransposeKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(TransposeTest, MainTest_P)
+{
+  test_kernel::TestDataTransposeKernel<float> test_data_kernel;
+  std::vector<float> output_data_vector = checkTransposeKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+// TODO: add negative tests?
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/TransposeConv.cpp b/onert-micro/luci-interpreter/src/kernels/TransposeConv.cpp
new file mode 100644
index 000000000..f8483ea36
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                             const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                             const TransposeConvParams &params)
+  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+                                          {output, scratch_tensor}, params)
+{
+}
+
+TransposeConv::~TransposeConv()
+{
+  // Define destructor here, to delete vector of qunatized multipliers properly
+}
+
+void TransposeConv::configure()
+{
+  assert(output_shape()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() == 4);
+  assert(filter()->shape().num_dims() == 4);
+  assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
+         input()->element_type() == DataType::S16);
+  assert(input()->element_type() == output()->element_type());
+  assert(input()->shape().dim(3) == filter()->shape().dim(3));
+
+  const int num_dims = output_shape()->shape().dim(0);
+  Shape out_shape(num_dims);
+  const auto *shape_data = getTensorData<int32_t>(output_shape());
+  for (int i = 0; i < num_dims; i++)
+    out_shape.dim(i) = shape_data[i];
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(out_shape);
+
+  const int32_t filter_height = filter()->shape().dim(1);
+  const int32_t filter_width = filter()->shape().dim(2);
+  const int32_t output_height = out_shape.dim(1);
+  const int32_t output_width = out_shape.dim(2);
+
+  const int32_t unused_output_height =
+    computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+  const int32_t unused_output_width =
+    computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+
+  _padding_height =
+    computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+  _padding_width =
+    computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->resize(output()->shape());
+    const std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+    _quant_multipliers = quantizeMultipliers(real_multipliers);
+  }
+  else
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->set_allocatable(false);
+  }
+}
+
+void TransposeConv::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(0)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void TransposeConv::evalFloat() const
+{
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<float>(input()),   //
+                                       getTensorShape(filter()), getTensorData<float>(filter()), //
+                                       getTensorShape(bias()), getTensorData<float>(bias()),     //
+                                       getTensorShape(output()), getTensorData<float>(output()), //
+                                       tflite::RuntimeShape(), nullptr);
+}
+
+void TransposeConv::evalQuantized() const
+{
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  // The kernel expects input and filter zero points to be negated.
+  op_params.input_offset = -input()->zero_point();    // Note the '-'.
+  op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_multiplier = _quant_multipliers[0].multiplier;
+  op_params.output_shift = _quant_multipliers[0].shift;
+  op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
+  op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
+
+  auto scratch_tensor = getOutputTensors()[1];
+
+  tflite::reference_ops::TransposeConv(
+    op_params,                                                  //
+    getTensorShape(input()), getTensorData<uint8_t>(input()),   //
+    getTensorShape(filter()), getTensorData<uint8_t>(filter()), //
+    getTensorShape(bias()), getTensorData<int32_t>(bias()),     //
+    getTensorShape(output()), getTensorData<uint8_t>(output()), //
+    tflite::RuntimeShape(), nullptr,                            //
+    getTensorData<int32_t>(scratch_tensor));
+}
+
+void TransposeConv::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const uint8_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int32_t>(input_val - input()->zero_point()) *
+                    static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc += output()->zero_point();
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void TransposeConv::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/TransposeConv.h b/onert-micro/luci-interpreter/src/kernels/TransposeConv.h
new file mode 100644
index 000000000..cea0cf3c7
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/TransposeConv.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class TransposeConv : public KernelWithParams<TransposeConvParams>
+{
+public:
+  TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                const TransposeConvParams &params);
+
+  ~TransposeConv();
+
+  const Tensor *output_shape() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *input() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  std::vector<ChannelQuantMultipliers> _quant_multipliers;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
diff --git a/onert-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp b/onert-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp
new file mode 100644
index 000000000..4856e1b87
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T, typename B>
+void Check(std::initializer_list<int32_t> output_shape_shape,
+           std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
+           std::initializer_list<T> input_data, std::initializer_list<B> bias_data,
+           std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
+           int32_t stride_width)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get());
+  Tensor weight_tensor =
+    makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get());
+  Tensor input_data_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+  DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  TransposeConvParams params{};
+  params.padding = padding;
+  params.stride_height = stride_height;
+  params.stride_width = stride_width;
+
+  if (bias_data.size() != 0)
+  {
+    Tensor bias_tensor =
+      makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get());
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
+                         &output_tensor, &scratch_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
+    kernel.execute();
+  }
+  else
+  {
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
+                         &output_tensor, &scratch_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
+    kernel.execute();
+  }
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+TEST(TransposeConvTest, FloatSimple)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*bias_data=*/{},
+    /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, FloatTwoFiltersTest)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+    /*bias_data=*/{},
+    /*output_data=*/
+    {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, SimpleBiasTest)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+    /*input_shape=*/{1, 2, 2, 1},
+    /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
+    /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+    /*input_data=*/{1, 2, 3, 4},
+    /*bias_data=*/{3, 4},
+    /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
+                     24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
+                     64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+    /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, UInt8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first,
+                                                      0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, UInt8_CWQ)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  const int32_t output_channels = 2;
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16_CWQ_weights)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  const int output_channels = 2;
+  const Shape input_shape{1, 2, 2, 1};
+  const Shape filter_shape{output_channels, 3, 3, 1};
+  const Shape bias_shape{output_channels};
+  std::vector<int32_t> output_shape_data{1, 5, 5, output_channels};
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  const float input_scale = 0.25;
+  const float output_scale = 0.5;
+  const std::vector<float> filter_scales{0.2f, 0.5f};
+  std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale};
+  const std::vector<int32_t> zerop(2, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp
new file mode 100644
index 000000000..63ef363ac
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALUnidirectionalSequenceLSTM.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+#ifndef DIS_QUANT
+
+bool checkedLog2(const float x, int *log2_result)
+{
+  // Using TfLiteRound instead of std::round and std::log instead of
+  // std::log2 to work around these functions being missing in a toolchain
+  // used in some TensorFlow tests as of May 2018.
+  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
+  const float x_log2_rounded = std::round(x_log2);
+  const float x_log2_fracpart = x_log2 - x_log2_rounded;
+
+  *log2_result = static_cast<int>(x_log2_rounded);
+  return std::abs(x_log2_fracpart) < 1e-3f;
+}
+
+// Create parameters for element wise multiplication that happens in a) cell
+// state update ; b) hidden state update
+// Note that all the output of gates are symmetrically quantized so only scales
+// are required for input. However, during the hidden state update phase, the
+// output is the updated hidden state, which is asymmetrically quantized. Thus
+// output may require zero point
+luci_interpreter_pal::ArithmeticParams
+createInterGateParams(const float input1_scale, const float input2_scale, const float output_scale,
+                      const DataType output_type, const int output_zp)
+{
+  luci_interpreter_pal::ArithmeticParams op_params;
+  if (output_type == DataType::S16)
+  {
+    op_params.quantized_activation_min = std::numeric_limits<int16_t>::min();
+    op_params.quantized_activation_max = std::numeric_limits<int16_t>::max();
+  }
+  else if (output_type == DataType::S8)
+  {
+    op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
+    op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
+  }
+
+  op_params.input1_offset = 0; // symmetric
+  op_params.input2_offset = 0; // symmetric
+  op_params.output_offset = output_zp;
+
+  const double input_product_scale =
+    static_cast<double>(input1_scale) * static_cast<double>(input2_scale);
+  double effective_scale = input_product_scale / static_cast<double>(output_scale);
+  auto output_shift = static_cast<int>(op_params.output_shift);
+  kernels::quantizeMultiplier(effective_scale, &op_params.output_multiplier, &output_shift);
+  op_params.output_shift = output_shift;
+  return op_params;
+}
+
+void createGateParams(const circle::Tensor *input, const circle::Tensor *input_weight,
+                      const circle::Tensor *input_bias, const circle::Tensor *hidden_state,
+                      const circle::Tensor *hidden_state_weight,
+                      const float nonlinear_activation_input_scale, const DataType cell_type,
+                      lstm::GateParameters *gate_params)
+{
+  // Input CalculateOpDataFullyConnected
+  {
+    luci_interpreter_pal::FullyConnectedParams input_gate_params;
+    double real_multiplier = 0.0;
+    int output_shift;
+    int32_t output_activation_min;
+    int32_t output_activation_max;
+    int32_t output_multiplier;
+    real_multiplier = kernels::getQuantizedConvolutionMultipler(
+      Tensor::scale(input), Tensor::scale(input_weight), nonlinear_activation_input_scale);
+    kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+    kernels::calculateActivationRangeQuantized(FusedActFunc::NONE, 0,
+                                               nonlinear_activation_input_scale, cell_type,
+                                               &output_activation_min, &output_activation_max);
+
+    input_gate_params.output_shift = output_shift;
+    input_gate_params.output_multiplier = output_multiplier;
+    input_gate_params.quantized_activation_max = output_activation_max;
+    input_gate_params.quantized_activation_min = output_activation_min;
+    input_gate_params.input_offset = -Tensor::zero_point(input);
+    input_gate_params.weights_offset = -Tensor::zero_point(input_weight);
+    input_gate_params.output_offset = 0;
+
+    gate_params->input_fc_params = input_gate_params;
+  }
+
+  // Recurrent CalculateOpDataFullyConnected
+  {
+    luci_interpreter_pal::FullyConnectedParams recurrent_gate_params;
+    double real_multiplier = 0.0;
+    int output_shift;
+    int32_t output_activation_min;
+    int32_t output_activation_max;
+    int32_t output_multiplier;
+    real_multiplier = kernels::getQuantizedConvolutionMultipler(Tensor::scale(hidden_state),
+                                                                Tensor::scale(hidden_state_weight),
+                                                                nonlinear_activation_input_scale);
+    kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+    kernels::calculateActivationRangeQuantized(FusedActFunc::NONE, 0,
+                                               nonlinear_activation_input_scale, cell_type,
+                                               &output_activation_min, &output_activation_max);
+
+    recurrent_gate_params.output_shift = output_shift;
+    recurrent_gate_params.output_multiplier = output_multiplier;
+    recurrent_gate_params.quantized_activation_max = output_activation_max;
+    recurrent_gate_params.quantized_activation_min = output_activation_min;
+    recurrent_gate_params.input_offset = -Tensor::zero_point(hidden_state);
+    recurrent_gate_params.weights_offset = -Tensor::zero_point(hidden_state_weight);
+    recurrent_gate_params.output_offset = 0;
+
+    gate_params->recurrent_fc_params = recurrent_gate_params;
+  }
+}
+
+void prepareGateParamsInteger(lstm::LSTMStruct *lstm_struct,
+                              lstm::LSTMParameters *quant_lstm_params)
+{
+  float nonlinear_input_scale = 0.00024414062; // 2^-12 Q3.12 -> Q0.15
+
+  createGateParams(lstm_struct->input(), lstm_struct->input_to_forget_weights(),
+                   lstm_struct->forget_gate_bias(), lstm_struct->output_state(),
+                   lstm_struct->recurrent_to_forget_weights(), nonlinear_input_scale, DataType::S16,
+                   &quant_lstm_params->forget_gate_parameters);
+
+  createGateParams(lstm_struct->input(), lstm_struct->input_to_input_weights(),
+                   lstm_struct->input_gate_bias(), lstm_struct->output_state(),
+                   lstm_struct->recurrent_to_input_weights(), nonlinear_input_scale, DataType::S16,
+                   &quant_lstm_params->input_gate_parameters);
+
+  // lstm::GateParameters cell_gate_parameters;
+  createGateParams(lstm_struct->input(), lstm_struct->input_to_cell_weights(),
+                   lstm_struct->cell_gate_bias(), lstm_struct->output_state(),
+                   lstm_struct->recurrent_to_cell_weights(), nonlinear_input_scale, DataType::S16,
+                   &quant_lstm_params->cell_gate_parameters);
+
+  // lstm::GateParameters output_gate_parameters;
+  createGateParams(lstm_struct->input(), lstm_struct->input_to_output_weights(),
+                   lstm_struct->output_gate_bias(), lstm_struct->output_state(),
+                   lstm_struct->recurrent_to_output_weights(), nonlinear_input_scale, DataType::S16,
+                   &quant_lstm_params->output_gate_parameters);
+
+  // Inter gate multiplication parameters
+  float nonlinear_output_scale = 0.00003051757; // 2^-15 Q3.12 -> Q0.15
+  float cell_state_scale =
+    Tensor::scale(lstm_struct->cell_state()); // lstm_tensors.CellStateTensor()->params.scale;
+  // forget gate output (nonlinear output) x cell state -> cell state
+  quant_lstm_params->inter_gate_parameters.forget_cell_mul_params = createInterGateParams(
+    nonlinear_output_scale, cell_state_scale, cell_state_scale, DataType::S16, 0);
+
+  // input gate output x cell gate output -> cell state
+  quant_lstm_params->inter_gate_parameters.input_mul_params = createInterGateParams(
+    nonlinear_output_scale, nonlinear_output_scale, cell_state_scale, DataType::S16, 0);
+
+  // tanh output x output gate output -> hidden state (potentially asymmetric)
+  quant_lstm_params->inter_gate_parameters.output_mul_params = createInterGateParams(
+    nonlinear_output_scale, nonlinear_output_scale, Tensor::scale(lstm_struct->output_state()),
+    Tensor::element_type(lstm_struct->output_state()),
+    Tensor::zero_point(lstm_struct->output_state()));
+}
+
+// Create the additional information about the cell state, which include:
+// cell_state_scale_power: used in integer nonlinear function (e.g., tanh)
+// quantized_cell_clip: quantized cell clip range
+lstm::CellStateInfo createLstmCellStateInfo(const float cell_state_scale, const float cell_clip)
+{
+  lstm::CellStateInfo cell_state_info;
+  // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale
+  int buffer;
+  checkedLog2(cell_state_scale, &buffer);
+  cell_state_info.cell_state_scale_power = buffer;
+  // Cell state specifics
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.quantized_cell_clip = static_cast<int16_t>(std::min(
+    std::max(static_cast<double>(cell_clip) / static_cast<double>(cell_state_scale), -32768.0),
+    32767.0));
+  return cell_state_info;
+}
+
+void evalInt8(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph, bool)
+{
+  lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+  lstm::LSTMParameters quant_lstm_params;
+  prepareGateParamsInteger(&lstm_struct, &quant_lstm_params);
+
+  lstm::CellStateInfo cell_state_info = createLstmCellStateInfo(
+    luci_interpreter::Tensor::scale(lstm_struct.cell_state()), lstm_struct.options->cell_clip());
+
+  const bool time_major = lstm_struct.options->time_major();
+  const auto batch_size =
+    time_major ? Tensor::dim(lstm_struct.input(), 1) : Tensor::dim(lstm_struct.input(), 0);
+  const auto state_dimension = Tensor::dim(lstm_struct.output_state(), 1);
+  const auto cell_state_type_size = getDataTypeSize(Tensor::element_type(lstm_struct.cell_state()));
+
+  auto scratch_0_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_1_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_2_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_3_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+
+  // Create and fill with 0 output state tensor
+  auto output_state_data =
+    std::make_unique<int8_t[]>(Tensor::num_elements(lstm_struct.output_state()));
+  std::fill_n(output_state_data.get(), Tensor::num_elements(lstm_struct.output_state()), 0);
+
+  // Create and fill with 0 cell state tensor
+  auto cell_state_data =
+    std::make_unique<int16_t[]>(Tensor::num_elements(lstm_struct.cell_state()));
+  std::fill_n(cell_state_data.get(), Tensor::num_elements(lstm_struct.cell_state()), 0);
+
+  luci_interpreter_pal::evalLSTM<int8_t, int8_t, int16_t, int32_t>(
+    &lstm_struct, &quant_lstm_params, &cell_state_info, output_state_data.get(),
+    cell_state_data.get(), kernels::getTensorData<int16_t>(scratch_0_data.get()),
+    kernels::getTensorData<int16_t>(scratch_1_data.get()),
+    kernels::getTensorData<int16_t>(scratch_2_data.get()),
+    kernels::getTensorData<int16_t>(scratch_3_data.get()), runtime_graph);
+}
+
+#endif // DIS_QUANT
+
+#ifndef DIS_FLOAT
+luci_interpreter_pal::FullyConnectedParams createFcParamsFloat()
+{
+  luci_interpreter_pal::FullyConnectedParams op_params;
+  kernels::calculateActivationRange(FusedActFunc::NONE, &op_params.float_activation_min,
+                                    &op_params.float_activation_max);
+  op_params.quantized_activation_max = op_params.float_activation_max;
+  op_params.quantized_activation_min = op_params.float_activation_min;
+  return op_params;
+}
+
+lstm::GateParameters createGateParamsFloat()
+{
+  lstm::GateParameters gate_params;
+
+  gate_params.input_fc_params = createFcParamsFloat();
+  gate_params.recurrent_fc_params = createFcParamsFloat();
+
+  return gate_params;
+}
+
+lstm::CellStateInfo createLstmCellStateInfoFloat(const float cell_clip)
+{
+  lstm::CellStateInfo cell_state_info;
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.cell_state_scale_power = 0; // no quantization
+  cell_state_info.quantized_cell_clip = 0;    // no quantization
+  return cell_state_info;
+}
+
+void prepareGateParamsFloat(lstm::LSTMParameters *float_lstm_params)
+{
+  // Gate Parameters
+  float_lstm_params->forget_gate_parameters = createGateParamsFloat();
+  float_lstm_params->input_gate_parameters = createGateParamsFloat();
+  float_lstm_params->cell_gate_parameters = createGateParamsFloat();
+  float_lstm_params->output_gate_parameters = createGateParamsFloat();
+
+  // Inter gate multiplication parameters
+  luci_interpreter_pal::ArithmeticParams op_params;
+  kernels::calculateActivationRange(FusedActFunc::NONE, &op_params.float_activation_min,
+                                    &op_params.float_activation_max);
+  op_params.quantized_activation_max = op_params.float_activation_max;
+  op_params.quantized_activation_min = op_params.float_activation_min;
+  float_lstm_params->inter_gate_parameters.forget_cell_mul_params = op_params;
+  float_lstm_params->inter_gate_parameters.input_mul_params = op_params;
+  float_lstm_params->inter_gate_parameters.output_mul_params = op_params;
+}
+
+void evalFloat(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph, bool)
+{
+  lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+  lstm::CellStateInfo cell_state_info =
+    createLstmCellStateInfoFloat(lstm_struct.options->cell_clip());
+
+  lstm::LSTMParameters lstm_params;
+  prepareGateParamsFloat(&lstm_params);
+
+  const bool time_major = lstm_struct.options->time_major();
+  const auto batch_size =
+    time_major ? Tensor::dim(lstm_struct.input(), 1) : Tensor::dim(lstm_struct.input(), 0);
+  const auto state_dimension = Tensor::dim(lstm_struct.output_state(), 1);
+  const auto cell_state_type_size = getDataTypeSize(Tensor::element_type(lstm_struct.cell_state()));
+
+  auto scratch_0_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_1_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_2_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_3_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+
+  // Create and fill with 0 output state tensor
+  auto output_state_data =
+    std::make_unique<float[]>(Tensor::num_elements(lstm_struct.output_state()));
+  std::fill_n(output_state_data.get(), Tensor::num_elements(lstm_struct.output_state()), 0);
+
+  // Create and fill with 0 cell state tensor
+  auto cell_state_data = std::make_unique<float[]>(Tensor::num_elements(lstm_struct.cell_state()));
+  std::fill_n(cell_state_data.get(), Tensor::num_elements(lstm_struct.cell_state()), 0);
+
+  luci_interpreter_pal::evalLSTM<float, float, float, float>(
+    &lstm_struct, &lstm_params, &cell_state_info, output_state_data.get(), cell_state_data.get(),
+    kernels::getTensorData<float>(scratch_0_data.get()),
+    kernels::getTensorData<float>(scratch_1_data.get()),
+    kernels::getTensorData<float>(scratch_2_data.get()),
+    kernels::getTensorData<float>(scratch_3_data.get()), runtime_graph);
+}
+#endif // DIS_FLOAT
+
+void validateWeightTensorSize(const circle::Tensor *weight_tensor, int dim1_size, int dim2_size)
+{
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(weight_tensor) == 2);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(weight_tensor, 0) == dim1_size);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(weight_tensor, 1) == dim2_size);
+}
+
+void validateTensorsSize(lstm::LSTMStruct *lstm_struct, const bool time_major)
+{
+  const auto batch_size =
+    time_major ? Tensor::dim(lstm_struct->input(), 1) : Tensor::dim(lstm_struct->input(), 0);
+
+  const auto input_dimension = Tensor::dim(lstm_struct->input(), 2);
+  const auto state_dimension = Tensor::dim(lstm_struct->output_state(), 1);
+
+  // Input FC weights
+  for (int32_t i = 1; i < 5; i++)
+  {
+    validateWeightTensorSize(lstm_struct->get_internal_tensor(i), state_dimension, input_dimension);
+  }
+
+  // Recurrent FC weights
+  for (int32_t i = 5; i < 9; i++)
+  {
+    validateWeightTensorSize(lstm_struct->get_internal_tensor(i), state_dimension, state_dimension);
+  }
+
+  // Biases
+  for (int32_t i = 12; i < 16; i++)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::num_dims(lstm_struct->get_internal_tensor(i)) == 1);
+    LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->get_internal_tensor(i), 0) == state_dimension);
+  }
+
+  // Check the shape of input state tensors.
+  // These tensor may be 1D or 2D. It's fine as long as the total size is
+  // correct.
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(lstm_struct->output_state()) ==
+                         batch_size * state_dimension);
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(lstm_struct->cell_state()) ==
+                         batch_size * state_dimension);
+
+  // Check the shape of output tensor against that of input tensor
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(lstm_struct->output()) == 3);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->input(), 0) ==
+                         Tensor::dim(lstm_struct->output(), 0));
+  LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->input(), 1) ==
+                         Tensor::dim(lstm_struct->output(), 1));
+  LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->output(), 2) == state_dimension);
+}
+
+} // namespace
+
+void configure_kernel_CircleUnidirectionalSequenceLSTM(const circle::Operator *cur_op,
+                                                       BaseRuntimeGraph *runtime_graph)
+{
+  lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(lstm_struct.input()) == DataType::FLOAT32 or
+                         Tensor::element_type(lstm_struct.input()) == DataType::S8);
+
+  lstm_struct.validateTensorTypes();
+
+  const bool time_major = lstm_struct.options->time_major();
+
+  validateTensorsSize(&lstm_struct, time_major);
+
+  // No peephole
+  for (int32_t i = 9; i < 12; ++i)
+    LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+
+  // No projection
+  for (int32_t i = 16; i < 18; ++i)
+    LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+
+  // No internal layer norm
+  for (int32_t i = 20; i < 24; ++i)
+    LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+}
+
+void execute_kernel_CircleUnidirectionalSequenceLSTM(const circle::Operator *cur_op,
+                                                     BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  assert(input_index != -1);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalFloat(cur_op, runtime_graph, is_inplace);
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      evalInt8(cur_op, runtime_graph, is_inplace);
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h
new file mode 100644
index 000000000..38c1212c1
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "PALUtils.h"
+
+namespace luci_interpreter
+{
+namespace lstm
+{
+
+struct LSTMStruct
+{
+  LSTMStruct() = delete;
+  LSTMStruct(const LSTMStruct &) = delete;
+
+  explicit LSTMStruct(const circle::Operator *cur_op,
+                      luci_interpreter::BaseRuntimeGraph *runtime_graph)
+  {
+    const auto input_index = cur_op->inputs()->operator[](0);
+    const auto input_to_input_weights_index = cur_op->inputs()->operator[](1);
+    const auto input_to_forget_weights_index = cur_op->inputs()->operator[](2);
+    const auto input_to_cell_weights_index = cur_op->inputs()->operator[](3);
+    const auto input_to_output_weights_index = cur_op->inputs()->operator[](4);
+    assert(input_index != -1);
+    // input_to_input_weights_index - optional
+    assert(input_to_forget_weights_index != -1);
+    assert(input_to_cell_weights_index != -1);
+    assert(input_to_output_weights_index != -1);
+    internal_tensors[0] = runtime_graph->getCircleTensorByIndex(input_index);
+    internal_tensors[1] = runtime_graph->getCircleTensorByIndex(input_to_input_weights_index);
+    internal_tensors[2] = runtime_graph->getCircleTensorByIndex(input_to_forget_weights_index);
+    internal_tensors[3] = runtime_graph->getCircleTensorByIndex(input_to_cell_weights_index);
+    internal_tensors[4] = runtime_graph->getCircleTensorByIndex(input_to_output_weights_index);
+
+    const auto recurrent_to_input_weights_index = cur_op->inputs()->operator[](5);
+    const auto recurrent_to_forget_weights_index = cur_op->inputs()->operator[](6);
+    const auto recurrent_to_cell_weights_index = cur_op->inputs()->operator[](7);
+    const auto recurrent_to_output_weights_index = cur_op->inputs()->operator[](8);
+    // recurrent_to_input_weights_index - optional
+    assert(recurrent_to_forget_weights_index != -1);
+    assert(recurrent_to_cell_weights_index != -1);
+    assert(recurrent_to_output_weights_index != -1);
+    internal_tensors[5] = runtime_graph->getCircleTensorByIndex(recurrent_to_input_weights_index);
+    internal_tensors[6] = runtime_graph->getCircleTensorByIndex(recurrent_to_forget_weights_index);
+    internal_tensors[7] = runtime_graph->getCircleTensorByIndex(recurrent_to_cell_weights_index);
+    internal_tensors[8] = runtime_graph->getCircleTensorByIndex(recurrent_to_output_weights_index);
+
+    const auto cell_to_input_weights_index = cur_op->inputs()->operator[](9);
+    const auto cell_to_forget_weights_index = cur_op->inputs()->operator[](10);
+    const auto cell_to_output_weights_index = cur_op->inputs()->operator[](11);
+    // optional cell_to_input_weights_index
+    // optional cell_to_forget_weights_index
+    // optional cell_to_output_weights_index
+    internal_tensors[9] = runtime_graph->getCircleTensorByIndex(cell_to_input_weights_index);
+    internal_tensors[10] = runtime_graph->getCircleTensorByIndex(cell_to_forget_weights_index);
+    internal_tensors[11] = runtime_graph->getCircleTensorByIndex(cell_to_output_weights_index);
+
+    const auto input_gate_bias_index = cur_op->inputs()->operator[](12);
+    const auto forget_gate_bias_index = cur_op->inputs()->operator[](13);
+    const auto cell_gate_bias_index = cur_op->inputs()->operator[](14);
+    const auto output_gate_bias_index = cur_op->inputs()->operator[](15);
+    // optional input_gate_bias_index
+    assert(forget_gate_bias_index != -1);
+    assert(cell_gate_bias_index != -1);
+    assert(output_gate_bias_index != -1);
+    internal_tensors[12] = runtime_graph->getCircleTensorByIndex(input_gate_bias_index);
+    internal_tensors[13] = runtime_graph->getCircleTensorByIndex(forget_gate_bias_index);
+    internal_tensors[14] = runtime_graph->getCircleTensorByIndex(cell_gate_bias_index);
+    internal_tensors[15] = runtime_graph->getCircleTensorByIndex(output_gate_bias_index);
+
+    const auto projection_weights_index = cur_op->inputs()->operator[](16);
+    const auto projection_bias_index = cur_op->inputs()->operator[](17);
+    // optional projection_weights_index
+    // optional projection_bias_index
+    internal_tensors[16] = runtime_graph->getCircleTensorByIndex(projection_weights_index);
+    internal_tensors[17] = runtime_graph->getCircleTensorByIndex(projection_bias_index);
+
+    const auto output_state_index = cur_op->inputs()->operator[](18);
+    const auto cell_state_index = cur_op->inputs()->operator[](19);
+    assert(output_state_index != -1);
+    assert(cell_state_index != -1);
+    internal_tensors[18] = runtime_graph->getCircleTensorByIndex(output_state_index);
+    internal_tensors[19] = runtime_graph->getCircleTensorByIndex(cell_state_index);
+
+    const auto input_layer_norm_coefficients_index = cur_op->inputs()->operator[](20);
+    const auto forget_layer_norm_coefficients_index = cur_op->inputs()->operator[](21);
+    const auto cell_layer_norm_coefficients_index = cur_op->inputs()->operator[](22);
+    const auto output_layer_norm_coefficients_index = cur_op->inputs()->operator[](23);
+    // optional input_layer_norm_coefficients_index
+    // optional forget_layer_norm_coefficients_index
+    // optional cell_layer_norm_coefficients_index
+    // optional output_layer_norm_coefficients_index
+    internal_tensors[20] =
+      runtime_graph->getCircleTensorByIndex(input_layer_norm_coefficients_index);
+    internal_tensors[21] =
+      runtime_graph->getCircleTensorByIndex(forget_layer_norm_coefficients_index);
+    internal_tensors[22] =
+      runtime_graph->getCircleTensorByIndex(cell_layer_norm_coefficients_index);
+    internal_tensors[23] =
+      runtime_graph->getCircleTensorByIndex(output_layer_norm_coefficients_index);
+
+    const auto output_index = cur_op->outputs()->operator[](0);
+    assert(output_index != -1);
+    output_internal = runtime_graph->getCircleTensorByIndex(output_index);
+
+    options = cur_op->builtin_options_as_UnidirectionalSequenceLSTMOptions();
+  }
+
+  void validateTensorTypes()
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input()) == Tensor::element_type(output_state()));
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(output()) == Tensor::element_type(input()));
+
+    for (int32_t i = 1; i < 9; ++i)
+    {
+      LUCI_INTERPRETER_CHECK(internal_tensors[i] == nullptr or
+                             Tensor::element_type(input_to_forget_weights()) ==
+                               Tensor::element_type(internal_tensors[i]));
+    }
+
+    for (int32_t i = 12; i < 16; ++i)
+    {
+      LUCI_INTERPRETER_CHECK(internal_tensors[i] == nullptr or
+                             Tensor::element_type(forget_gate_bias()) ==
+                               Tensor::element_type(internal_tensors[i]));
+    }
+  }
+
+  const circle::Tensor *input() { return internal_tensors[0]; };
+
+  const circle::Tensor *input_to_input_weights() { return internal_tensors[1]; };
+  const circle::Tensor *input_to_forget_weights() { return internal_tensors[2]; };
+  const circle::Tensor *input_to_cell_weights() { return internal_tensors[3]; };
+  const circle::Tensor *input_to_output_weights() { return internal_tensors[4]; };
+
+  const circle::Tensor *recurrent_to_input_weights() { return internal_tensors[5]; };
+  const circle::Tensor *recurrent_to_forget_weights() { return internal_tensors[6]; };
+  const circle::Tensor *recurrent_to_cell_weights() { return internal_tensors[7]; };
+  const circle::Tensor *recurrent_to_output_weights() { return internal_tensors[8]; };
+
+  const circle::Tensor *cell_to_input_weights() { return internal_tensors[9]; };
+  const circle::Tensor *cell_to_forget_weights() { return internal_tensors[10]; };
+  const circle::Tensor *cell_to_output_weights() { return internal_tensors[11]; };
+
+  const circle::Tensor *input_gate_bias() { return internal_tensors[12]; };
+  const circle::Tensor *forget_gate_bias() { return internal_tensors[13]; };
+  const circle::Tensor *cell_gate_bias() { return internal_tensors[14]; };
+  const circle::Tensor *output_gate_bias() { return internal_tensors[15]; };
+
+  const circle::Tensor *projection_weights() { return internal_tensors[16]; };
+  const circle::Tensor *projection_bias() { return internal_tensors[17]; };
+
+  const circle::Tensor *output_state() { return internal_tensors[18]; };
+  const circle::Tensor *cell_state() { return internal_tensors[19]; };
+
+  const circle::Tensor *input_layer_norm_coefficients() { return internal_tensors[20]; };
+  const circle::Tensor *forget_layer_norm_coefficients() { return internal_tensors[21]; };
+  const circle::Tensor *cell_layer_norm_coefficients() { return internal_tensors[22]; };
+  const circle::Tensor *output_layer_norm_coefficients() { return internal_tensors[23]; };
+  const circle::Tensor *output() { return output_internal; };
+
+  const circle::UnidirectionalSequenceLSTMOptions *options;
+
+  const circle::Tensor *get_internal_tensor(int i) { return internal_tensors[i]; }
+
+private:
+  const circle::Tensor *output_internal;
+  const circle::Tensor *internal_tensors[24];
+};
+
+struct GateParameters
+{
+  luci_interpreter_pal::FullyConnectedParams input_fc_params;
+  luci_interpreter_pal::FullyConnectedParams recurrent_fc_params;
+};
+
+struct InterGateParameters
+{
+  luci_interpreter_pal::ArithmeticParams forget_cell_mul_params;
+  luci_interpreter_pal::ArithmeticParams input_mul_params;
+  luci_interpreter_pal::ArithmeticParams output_mul_params;
+};
+
+struct CellStateInfo
+{
+  float cell_clip;
+  // clipping range for cell state only 16 bits cell is supported (could be
+  // generalized through templatation)
+  int16_t quantized_cell_clip;
+  // 2^-cell_state_scale_power = cell state scale, required by integer tanh
+  // computation
+  int32_t cell_state_scale_power;
+};
+
+struct LSTMParameters
+{
+  GateParameters forget_gate_parameters;
+  GateParameters input_gate_parameters;
+  GateParameters cell_gate_parameters;
+  GateParameters output_gate_parameters;
+  InterGateParameters inter_gate_parameters;
+};
+
+} // namespace lstm
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
diff --git a/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp
new file mode 100644
index 000000000..1a094ee88
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h"
+#include "luci_interpreter/test_models/unidirectional_lstm/QuantS8UnidirectionalLSTM.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class UnidirectionalLSTMTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T>
+std::vector<T> checkUnidirectionalSequenceLSTMKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(UnidirectionalLSTMTest, Float_P)
+{
+  test_kernel::TestDataFloatUnidirectionalLSTM test_data_kernel;
+  std::vector<float> output_data_vector = checkUnidirectionalSequenceLSTMKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, kernels::testing::FloatArrayNear(
+                                    test_data_kernel.get_output_data_by_index(0), 0.0001f));
+}
+
+TEST_F(UnidirectionalLSTMTest, Int8_P)
+{
+  test_kernel::TestDataInt8UnidirectionalLSTM test_data_kernel;
+  std::vector<int8_t> output_data_vector = checkUnidirectionalSequenceLSTMKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+// TODO: add negative tests?
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Unpack.cpp b/onert-micro/luci-interpreter/src/kernels/Unpack.cpp
new file mode 100644
index 000000000..80f4d1f28
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Unpack.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
+  : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+{
+}
+
+void Unpack::configure()
+{
+  const Shape &input_shape = input()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += input()->shape().num_dims();
+  assert(axis >= 0 && axis < input_shape.num_dims());
+
+  Shape output_shape(input_shape.num_dims() - 1);
+  int out_index = 0;
+  for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
+  {
+    if (in_index != axis)
+      output_shape.dim(out_index++) = input_shape.dim(in_index);
+  }
+
+  // TODO: enable it only if kernel with dynamic shapes
+  for (Tensor *output : _outputs)
+  {
+    assert(output->element_type() == input()->element_type());
+    output->resize(output_shape);
+  }
+}
+
+template <typename T> void Unpack::executeImpl() const
+{
+  tflite::UnpackParams params{};
+  params.axis = _params.axis;
+  params.num_split = _outputs.size();
+  VectorOfTensors<T, false> all_outputs(_outputs);
+  tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
+                                   **all_outputs.shapes(), all_outputs.data());
+}
+
+void Unpack::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      return executeImpl<float>();
+    case DataType::U8:
+      return executeImpl<uint8_t>();
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Unpack.h b/onert-micro/luci-interpreter/src/kernels/Unpack.h
new file mode 100644
index 000000000..f4a44ecad
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Unpack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNPACK_H
+#define LUCI_INTERPRETER_KERNELS_UNPACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Unpack : public KernelWithParams<UnpackParams>
+{
+public:
+  Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void executeImpl() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNPACK_H
diff --git a/onert-micro/luci-interpreter/src/kernels/Unpack.test.cpp b/onert-micro/luci-interpreter/src/kernels/Unpack.test.cpp
new file mode 100644
index 000000000..9384ddc83
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
+           const std::vector<std::initializer_list<int32_t>> &exp_output_shape,
+           std::vector<std::initializer_list<T>> exp_output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis);
+
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_outputs);
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    output_tensors.push_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_outputs);
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  UnpackParams params{};
+  params.axis = axis;
+
+  Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params);
+  kernel.configure();
+  for (int i = 0; i < num_outputs; i++)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(exp_output_data[i]));
+  }
+}
+
+template <typename T> class UnpackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(UnpackTest, DataTypes);
+
+TYPED_TEST(UnpackTest, ThreeOutputs)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{2}, {2}, {2}},
+                   /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsAxisOne)
+{
+  Check<TypeParam>(/*axis=*/1, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{3}, {3}},
+                   /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisOne)
+{
+  Check<TypeParam>(/*axis=*/-1, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{3}, {3}},
+                   /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisTwo)
+{
+  Check<TypeParam>(/*axis=*/-2, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{2}, {2}, {2}},
+                   /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, OneOutput)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{1, 6},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{6}},
+                   /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs)
+{
+  Check<TypeParam>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
+                   /*exp_output_shape=*/{{2, 2}, {2, 2}},
+                   /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}});
+}
+
+TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs)
+{
+  Check<TypeParam>(
+    /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
+    /*exp_output_data=*/
+    {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
+}
+
+TYPED_TEST(UnpackTest, VectorToScalar)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{5},
+                   /*input_data=*/{1, 2, 3, 4, 5},
+                   /*exp_output_shape=*/{{}, {}, {}, {}, {}},
+                   /*exp_output_data=*/{{1}, {2}, {3}, {4}, {5}});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Utils.cpp b/onert-micro/luci-interpreter/src/kernels/Utils.cpp
new file mode 100644
index 000000000..35ab82180
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Utils.cpp
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Utils.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor,
+                                                     BaseRuntimeGraph *runtime_graph)
+{
+  luci_interpreter::RuntimeShape input_shape = getTensorShape(circle_tensor);
+
+#ifndef DIS_DYN_SHAPES
+  auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(circle_tensor);
+  if (dynamic_shape_vector != nullptr)
+  {
+    input_shape.resize(dynamic_shape_vector->dimensionsCount());
+
+    for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
+    {
+      input_shape.setDim(n, dynamic_shape_vector->dims(n));
+    }
+  }
+#endif // DIS_DYN_SHAPES
+  return input_shape;
+}
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
+{
+  switch (activation)
+  {
+    case Activation::NONE:
+      *activation_min = std::numeric_limits<T>::lowest();
+      *activation_max = std::numeric_limits<T>::max();
+      break;
+    case Activation::RELU:
+      *activation_min = 0;
+      *activation_max = std::numeric_limits<T>::max();
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = -1;
+      *activation_max = 1;
+      break;
+    case Activation::RELU6:
+      *activation_min = 0;
+      *activation_max = 6;
+      break;
+    default:
+      assert(false && "Unsupported activation.");
+  }
+}
+
+void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
+                                    int32_t n_col, int32_t *output)
+{
+  for (int i = 0; i < n_row; ++i)
+  {
+    int32_t row_sum = 0;
+    for (int j = 0; j < n_col; ++j)
+    {
+      row_sum += *matrix++;
+    }
+    output[i] += row_sum * scalar;
+  }
+}
+
+template void calculateActivationRange(Activation activation, float *activation_min,
+                                       float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+                                       int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+                                       int64_t *activation_max);
+
+#ifndef DIS_QUANT
+bool checkedLog2(const float x, int *log2_result)
+{
+  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
+  const float x_log2_rounded = std::round(x_log2);
+  const float x_log2_fracpart = x_log2 - x_log2_rounded;
+
+  *log2_result = static_cast<int>(x_log2_rounded);
+  return std::abs(x_log2_fracpart) < 1e-3f;
+}
+
+int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
+{
+  const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
+                                    (1LL << (total_signed_bits - input_integer_bits)) /
+                                    (1LL << input_left_shift);
+  // Tighten bound using floor.  Suppose that we could use the exact value.
+  // After scaling the difference, the result would be at the maximum.  Thus we
+  // must ensure that our value has lower magnitude.
+  return static_cast<int>(std::floor(max_input_rescaled));
+}
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+                                                  int32_t zero_point, float scale,
+                                                  int32_t *activation_min, int32_t *activation_max)
+{
+  auto quantize = [scale, zero_point](float x) {
+    return zero_point + static_cast<int32_t>(std::round(x / scale));
+  };
+
+  switch (activation)
+  {
+    case Activation::NONE:
+    case Activation::TANH:
+      *activation_min = qmin;
+      *activation_max = qmax;
+      break;
+    case Activation::RELU:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = qmax;
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = std::max(qmin, quantize(-1.0f));
+      *activation_max = std::min(qmax, quantize(1.0f));
+      break;
+    case Activation::RELU6:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = std::min(qmax, quantize(6.0f));
+      break;
+    default:
+      assert(false && "Unsupported activation.");
+  }
+}
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+                                                  const circle::Tensor *output,
+                                                  int32_t *activation_min, int32_t *activation_max)
+{
+  const float scale = Tensor::scale(output);
+  const int32_t zero_point = Tensor::zero_point(output);
+
+  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, zero_point, zero_point,
+                                        activation_min, activation_max);
+}
+
+void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
+                                       float output_scale, DataType data_type,
+                                       int32_t *activation_min, int32_t *activation_max)
+{
+  int32_t qmin{};
+  int32_t qmax{};
+  switch (data_type)
+  {
+    case DataType::U8:
+      qmin = 0;
+      qmax = std::numeric_limits<uint8_t>::max();
+      break;
+    case DataType::S8:
+      qmin = -std::numeric_limits<int8_t>::max();
+      qmax = std::numeric_limits<int8_t>::max();
+      break;
+    case DataType::S16:
+      // For now, assume that signed int16 type implies signed symmetric quantization.
+      assert(output_zero_point == 0);
+      qmin = -std::numeric_limits<int16_t>::max();
+      qmax = std::numeric_limits<int16_t>::max();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+
+  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
+                                        activation_min, activation_max);
+}
+
+void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max)
+{
+  assert(Tensor::zero_points(output).size() == 1);
+  const float scale = Tensor::scale(output);
+  const int32_t zero_point = Tensor::zero_point(output);
+  calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
+                                    activation_min, activation_max);
+}
+
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+  if (double_multiplier == 0.0)
+  {
+    *quantized_multiplier = 0;
+    *shift = 0;
+    return;
+  }
+
+  const double q = std::frexp(double_multiplier, shift);
+  auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
+
+  if (q_fixed == (int64_t(1) << 31))
+  {
+    q_fixed /= 2;
+    ++*shift;
+  }
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  // A shift amount smaller than -31 would cause all bits to be shifted out
+  // and thus all results would be zero. We implement that instead with
+  // q_fixed==0, so as to avoid hitting issues with right-shift
+  // operations with shift amounts greater than 31. Note that this happens
+  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+  // that we're effectively flushing tiny double_multiplier's to zero.
+  // We could conceivably handle values in the range (roughly) [32, 63]
+  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+  // the present handling is just doing 'flush denormals to zero'. We could
+  // reconsider and actually generate nonzero denormals if a need arises.
+  if (*shift < -31)
+  {
+    *shift = 0;
+    q_fixed = 0;
+  }
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift)
+{
+  assert(double_multiplier < 1.0);
+  assert(double_multiplier > 0.0);
+  int shift;
+  quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+  assert(shift <= 0);
+  *left_shift = shift;
+}
+#endif
+
+luci_interpreter::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
+                                                          const circle::Tensor *input2)
+{
+  const int num_input1_dims = Tensor::num_dims(input1);
+  const int num_input2_dims = Tensor::num_dims(input2);
+  const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
+  luci_interpreter::RuntimeShape output_shape(num_out_dims);
+
+  for (int i = 0; i < num_out_dims; ++i)
+  {
+    const int32_t input1_dim =
+      i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
+    const int32_t input2_dim =
+      i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
+
+    bool need_broadcast = input1_dim != input2_dim;
+    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
+    output_shape.setDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
+  }
+
+  return output_shape;
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Utils.h b/onert-micro/luci-interpreter/src/kernels/Utils.h
new file mode 100644
index 000000000..a01d72dfa
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/Utils.h
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
+#define LUCI_INTERPRETER_KERNELS_UTILS_H
+
+#include "luci_interpreter/core/Tensor.h"
+#include "Builders.h"
+#include "Params.h"
+#include <cassert>
+#include <cstdint>
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+using Activation = luci_interpreter::FusedActFunc;
+
+#define LUCI_INTERPRETER_CHECK(cond)                 \
+  if (!(cond))                                       \
+  {                                                  \
+    assert(false && "LUCI_INTERPRETER_CHECK fails"); \
+  }
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                              int32_t filter_size, int32_t out_size)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                                        int32_t filter_size, int32_t out_size, int32_t *offset)
+{
+  int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
+  total_padding = total_padding > 0 ? total_padding : 0;
+  *offset = total_padding % 2;
+  return total_padding / 2;
+}
+
+inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
+                                 int32_t stride, int32_t dilation_rate = 1)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding)
+  {
+    case Padding::SAME:
+      assert(stride != 0);
+      return (image_size + stride - 1) / stride;
+    case Padding::VALID:
+      assert(stride != 0);
+      return (image_size + stride - effective_filter_size) / stride;
+    default:
+      assert(false);
+      return 0;
+  }
+}
+
+inline int32_t calcOffset(const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2,
+                          int32_t d3)
+{
+
+  return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) *
+           Tensor::dim(tensor, 3) +
+         d3;
+}
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
+
+luci_interpreter::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
+                                                          const circle::Tensor *input2);
+
+// Helper wrapper to hide broadcast logic
+template <typename T> class BroadcastableWrapper
+{
+public:
+  BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
+
+  T operator[](int idx) { return _v[idx * _stride]; }
+
+private:
+  const std::vector<T> &_v;
+  int _stride;
+};
+
+inline luci_interpreter::RuntimeShape getTensorShape(const circle::Tensor *tensor)
+{
+  if (tensor == nullptr)
+    return luci_interpreter::RuntimeShape();
+
+  auto const tensor_shape = Tensor::tensor_shape(tensor);
+
+  luci_interpreter::RuntimeShape runtime_shape(tensor_shape.size());
+  for (int i = 0; i < tensor_shape.size(); ++i)
+  {
+    runtime_shape.setDim(i, tensor_shape[i]);
+  }
+  return runtime_shape;
+}
+
+inline void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph,
+                          int32_t *dims)
+{
+  if (tensor == nullptr)
+  {
+    dims = nullptr;
+    return;
+  }
+
+#ifndef DIS_DYN_SHAPES
+  auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(tensor);
+  if (dynamic_shape_vector != nullptr)
+  {
+    for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
+    {
+      dims[n] = dynamic_shape_vector->dims(n);
+    }
+  }
+  else
+  {
+    auto const tensor_shape = Tensor::tensor_shape(tensor);
+    assert(tensor_shape.size() <= kMaxSmallSize);
+    for (int i = 0; i < tensor_shape.size(); ++i)
+    {
+      dims[i] = tensor_shape[i];
+    }
+  }
+#else
+  auto const tensor_shape = Tensor::tensor_shape(tensor);
+  assert(tensor_shape.size() <= kMaxSmallSize);
+  for (int i = 0; i < tensor_shape.size(); ++i)
+  {
+    dims[i] = tensor_shape[i];
+  }
+#endif // DIS_DYN_SHAPES
+}
+
+template <typename T> const T *getTensorData(const uint8_t *tensor_data)
+{
+  return tensor_data != nullptr ? reinterpret_cast<const T *>(tensor_data) : nullptr;
+}
+
+template <typename T> inline T *getTensorData(uint8_t *tensor_data)
+{
+  return tensor_data != nullptr ? reinterpret_cast<T *>(tensor_data) : nullptr;
+}
+
+luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor,
+                                                     BaseRuntimeGraph *runtime_graph);
+
+// A list of tensors in a format that can be used by kernels like split and
+// concatenation.
+template <typename T, bool is_const> class VectorOfTensors
+{
+public:
+  using ElementT = typename std::conditional<is_const, const T, T>::type;
+  using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
+  {
+    const int num_tensors = tensor_list.size();
+
+    all_data_.reserve(num_tensors);
+    all_shape_.reserve(num_tensors);
+    all_shape_ptr_.reserve(num_tensors);
+
+    for (TensorT *tensor : tensor_list)
+    {
+      all_data_.push_back(getTensorData<T>(tensor));
+      all_shape_.push_back(getTensorShape(tensor));
+    }
+
+    // Taking the pointer from inside a std::vector is only OK if the vector is
+    // never modified, so we populate all_shape in the previous loop and then we
+    // are free to grab iterators here.
+    for (luci_interpreter::RuntimeShape &shape : all_shape_)
+    {
+      all_shape_ptr_.push_back(&shape);
+    }
+  }
+  // Return a pointer to the data pointers of all tensors in the list. For
+  // example:
+  //   float* const* f = v.data();
+  //   f[0][1] is the second element of the first tensor.
+  ElementT *const *data() const { return all_data_.data(); }
+
+  // Return a pointer the shape pointers of all tensors in the list. For
+  // example:
+  //   const RuntimeShape* const* d = v.dims();
+  //   dims[1] are the dimensions of the second tensor in the list.
+  const luci_interpreter::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
+
+private:
+  std::vector<ElementT *> all_data_;
+  std::vector<luci_interpreter::RuntimeShape> all_shape_;
+  std::vector<luci_interpreter::RuntimeShape *> all_shape_ptr_;
+};
+
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+  return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
+                                    int32_t n_col, int32_t *output);
+
+#ifndef DIS_QUANT
+bool checkedLog2(const float x, int *log2_result);
+
+int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits);
+
+void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max);
+
+void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
+                                       float output_scale, DataType data_type,
+                                       int32_t *activation_min, int32_t *activation_max);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of its exponent.
+//
+// Handles an arbitrary positive multiplier. The 'shift' output-value is
+// basically the 'floating-point exponent' of the multiplier:
+// Negative for a right-shift (when the multiplier is <1), positive for a
+// left-shift (when the multiplier is >1)
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of NEGATIVE its exponent ---
+// this is intended as a RIGHT-shift.
+//
+// Restricted to the case where the multiplier < 1 (and non-negative).
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift);
+
+inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
+                                               float output_scale)
+{
+  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+  LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
+  return input_product_scale / static_cast<double>(output_scale);
+}
+
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
+inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
+                                                             const std::vector<float> &filter_scale,
+                                                             float output_scale)
+{
+  std::vector<double> effective_output_scales;
+  size_t n = filter_scale.size();
+  effective_output_scales.reserve(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    effective_output_scales.push_back(
+      getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+  }
+  return effective_output_scales;
+}
+
+struct ChannelQuantMultipliers
+{
+  int shift;
+  int32_t multiplier;
+  ChannelQuantMultipliers() = default;
+};
+
+inline std::vector<ChannelQuantMultipliers>
+quantizeMultipliers(const std::vector<double> &effective_scale)
+{
+  size_t n = effective_scale.size();
+  std::vector<ChannelQuantMultipliers> params(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
+  }
+  return params;
+}
+
+// A list of quantized tensors in a format that can be used by kernels like
+// split and concatenation.
+template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
+{
+public:
+  using typename VectorOfTensors<uint8_t, is_const>::TensorT;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
+    : VectorOfTensors<uint8_t, is_const>(tensor_list)
+  {
+    for (TensorT *tensor : tensor_list)
+    {
+      zero_point_.push_back(tensor->zero_point());
+      scale_.push_back(tensor->scale());
+    }
+  }
+
+  const float *scale() const { return scale_.data(); }
+  const int32_t *zero_point() const { return zero_point_.data(); }
+
+private:
+  std::vector<int32_t> zero_point_;
+  std::vector<float> scale_;
+};
+#endif // DIS_QUANT
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
diff --git a/onert-micro/luci-interpreter/src/kernels/While.cpp b/onert-micro/luci-interpreter/src/kernels/While.cpp
new file mode 100644
index 000000000..6826c2b26
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/While.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleWhile(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  auto *main_runtime_graph = runtime_graph;
+
+  auto *runtime_module = runtime_graph->getRuntimeModule();
+
+  const auto *options = cur_op->builtin_options_as_WhileOptions();
+  const auto body_subgraph_index = options->body_subgraph_index();
+  const auto cond_subgraph_index = options->cond_subgraph_index();
+
+  auto *cond_runtime_graph = runtime_module->getRuntimeGraphAt(cond_subgraph_index);
+  auto *body_runtime_graph = runtime_module->getRuntimeGraphAt(body_subgraph_index);
+
+  body_runtime_graph->selectOwnSubgraph();
+  const auto body_input_size = body_runtime_graph->getNumOfInputTensors();
+  const auto body_output_size = body_runtime_graph->getNumOfOutputTensors();
+  LUCI_INTERPRETER_CHECK(body_input_size == cur_op->inputs()->size());
+  LUCI_INTERPRETER_CHECK(body_output_size == cur_op->outputs()->size());
+  LUCI_INTERPRETER_CHECK(body_output_size == cur_op->inputs()->size());
+  body_runtime_graph->invalidate();
+  body_runtime_graph->configure(false);
+
+  cond_runtime_graph->selectOwnSubgraph();
+  const auto cond_input_size = cond_runtime_graph->getNumOfInputTensors();
+  const auto cond_output_size = cond_runtime_graph->getNumOfOutputTensors();
+  LUCI_INTERPRETER_CHECK(cond_input_size == cur_op->inputs()->size());
+  LUCI_INTERPRETER_CHECK(cond_output_size == 1);
+  const circle::Tensor *cond_output_tensor = cond_runtime_graph->getOutputTensorByIndex(0);
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(cond_output_tensor) == DataType::BOOL);
+  cond_runtime_graph->invalidate();
+  cond_runtime_graph->configure(false);
+
+  main_runtime_graph->selectOwnSubgraph();
+}
+
+void execute_kernel_CircleWhile(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  auto *main_runtime_graph = runtime_graph;
+  auto *runtime_module = runtime_graph->getRuntimeModule();
+
+  const auto input_size = cur_op->inputs()->size();
+
+  std::vector<uint8_t *> operation_inputs_data(input_size);
+  std::vector<uint8_t *> operation_outputs_data;
+
+  std::vector<int32_t> input_sizes(input_size);
+
+  bool is_inplace = runtime_graph->is_inplace_op(cur_op);
+
+  for (int32_t i = 0; i < input_size; ++i)
+  {
+    const auto op_input_index = cur_op->inputs()->operator[](i);
+    const auto op_output_index = cur_op->outputs()->operator[](i);
+    assert(op_input_index != -1);
+    assert(op_output_index != -1);
+    const auto input = main_runtime_graph->getCircleTensorByIndex(op_input_index);
+    const auto output = main_runtime_graph->getCircleTensorByIndex(op_output_index);
+
+    input_sizes[i] = Tensor::num_elements(input) * size(Tensor::element_type(input));
+
+    auto *input_data = main_runtime_graph->getDataByTensor(input);
+
+    uint8_t *tensor_data = nullptr;
+    if (is_inplace)
+    {
+      if (input_data == nullptr)
+      {
+        tensor_data = new uint8_t[input_sizes[i]];
+        input_data = main_runtime_graph->getConstDataByTensor(input);
+        assert(input_data != nullptr);
+        std::memcpy(tensor_data, input_data, input_sizes[i]);
+      }
+      else
+      {
+        tensor_data = input_data;
+      }
+    }
+    else
+    {
+      if (input_data == nullptr)
+        input_data = main_runtime_graph->getConstDataByTensor(input);
+      assert(input_data != nullptr);
+      tensor_data = main_runtime_graph->getDataByTensor(output);
+      assert(tensor_data != nullptr);
+      std::memcpy(tensor_data, input_data, input_sizes[i]);
+    }
+    assert(tensor_data != nullptr);
+
+    operation_inputs_data[i] = tensor_data;
+  }
+
+  const auto *options = cur_op->builtin_options_as_WhileOptions();
+  const auto body_subgraph_index = options->body_subgraph_index();
+  const auto cond_subgraph_index = options->cond_subgraph_index();
+
+  auto *cond_runtime_graph = runtime_module->getRuntimeGraphAt(cond_subgraph_index);
+  auto *body_runtime_graph = runtime_module->getRuntimeGraphAt(body_subgraph_index);
+
+  do
+  {
+    cond_runtime_graph->selectOwnSubgraph();
+
+    for (int32_t i = 0; i < input_size; ++i)
+      cond_runtime_graph->configureGraphInput(i, operation_inputs_data[i]);
+
+    cond_runtime_graph->execute();
+
+    bool cond_value = (cond_runtime_graph->getOutputDataByIndex(0))[0];
+    if (!cond_value)
+      break;
+
+    body_runtime_graph->selectOwnSubgraph();
+    for (int32_t i = 0; i < input_size; ++i)
+      body_runtime_graph->configureGraphInput(i, operation_inputs_data[i]);
+
+    body_runtime_graph->execute();
+
+    for (int32_t i = 0; i < input_size; ++i)
+    {
+      auto cur_output_body_data = body_runtime_graph->getOutputDataByIndex(i);
+      if (cur_output_body_data == nullptr)
+        continue;
+      std::memcpy(operation_inputs_data[i], cur_output_body_data, input_sizes[i]);
+    }
+  } while (true);
+
+  cond_runtime_graph->resetOutputTensorsData();
+  cond_runtime_graph->clearTensors();
+
+  body_runtime_graph->selectOwnSubgraph();
+  body_runtime_graph->resetOutputTensorsData();
+  body_runtime_graph->clearTensors();
+
+  main_runtime_graph->selectOwnSubgraph();
+
+  if (is_inplace)
+  {
+    for (int32_t i = 0; i < input_size; ++i)
+    {
+      const auto op_input_index = cur_op->inputs()->operator[](i);
+      const auto op_output_index = cur_op->outputs()->operator[](i);
+      assert(op_input_index != -1);
+      assert(op_output_index != -1);
+      const auto input = main_runtime_graph->getCircleTensorByIndex(op_input_index);
+      const auto output = main_runtime_graph->getCircleTensorByIndex(op_output_index);
+
+      if (main_runtime_graph->getDataByTensor(input))
+      {
+        main_runtime_graph->makeInplaceOperation(input, output);
+      }
+      else
+      {
+        main_runtime_graph->setDataToTensor(output, operation_inputs_data[i]);
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/While.test.cpp b/onert-micro/luci-interpreter/src/kernels/While.test.cpp
new file mode 100644
index 000000000..4873b9c04
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/kernels/While.test.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/test_models/while/WhileKernel.h"
+#include "luci_interpreter/test_models/while/NegWhileKernel.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class WhileTest : public ::testing::Test
+{
+  // Do nothing
+};
+
+template <typename T> std::vector<T> checkWhileKernel(test_kernel::TestDataBase<T> *test_data_base)
+{
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_base->get_model_ptr());
+  ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input);
+
+  auto *main_runtime_graph = runtime_module.getMainGraph();
+  assert(main_runtime_graph->getNumOfInputTensors() == 1);
+
+  // Set input data
+  {
+    auto *input_tensor_data = reinterpret_cast<T *>(main_runtime_graph->configureGraphInput(0));
+    std::copy(test_data_base->get_input_data_by_index(0).begin(),
+              test_data_base->get_input_data_by_index(0).end(), input_tensor_data);
+  }
+
+  runtime_module.execute();
+
+  assert(main_runtime_graph->getNumOfOutputTensors() == 1);
+
+  T *output_data = reinterpret_cast<T *>(main_runtime_graph->getOutputDataByIndex(0));
+  const size_t num_elements = (main_runtime_graph->getOutputDataSizeByIndex(0) / sizeof(T));
+  std::vector<T> output_data_vector(output_data, output_data + num_elements);
+  return output_data_vector;
+}
+
+TEST_F(WhileTest, MainTest_P)
+{
+  test_kernel::TestDataWhileKernel<int32_t> test_data_kernel;
+  std::vector<int32_t> output_data_vector = checkWhileKernel(&test_data_kernel);
+  EXPECT_THAT(output_data_vector, test_data_kernel.get_output_data_by_index(0));
+}
+
+TEST_F(WhileTest, MainTest_NEG)
+{
+  test_kernel::NegTestDataWhileKernel test_data_kernel;
+
+  MemoryManager memory_manager{};
+  RuntimeModule runtime_module{};
+  bool dealloc_input = true;
+  // Load model with single op
+  auto *model_data_raw = reinterpret_cast<const char *>(test_data_kernel.get_model_ptr());
+  EXPECT_DEATH(ModuleLoader::load(&runtime_module, &memory_manager, model_data_raw, dealloc_input),
+               "");
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/CMakeLists.txt b/onert-micro/luci-interpreter/src/loader/CMakeLists.txt
new file mode 100644
index 000000000..0ef63e296
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/loader/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+    GraphLoader.h
+    GraphLoader.cpp
+    ModuleLoader.h
+    ModuleLoader.cpp)
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+        PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER} ${LUCI_INTERPRETER_CORE}
+        PRIVATE ${LUCI_INTERPRETER_KERNELS})
diff --git a/onert-micro/luci-interpreter/src/loader/GraphLoader.cpp b/onert-micro/luci-interpreter/src/loader/GraphLoader.cpp
new file mode 100644
index 000000000..26e207ac0
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/loader/GraphLoader.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+bool isInplaceOperation(const circle::BuiltinOperator &op)
+{
+  switch (op)
+  {
+    case circle::BuiltinOperator_ABS:
+    case circle::BuiltinOperator_LOGISTIC:
+    case circle::BuiltinOperator_RESHAPE:
+    case circle::BuiltinOperator_ELU:
+    case circle::BuiltinOperator_EXPAND_DIMS:
+    case circle::BuiltinOperator_EXP:
+    case circle::BuiltinOperator_TANH:
+    case circle::BuiltinOperator_LEAKY_RELU:
+    case circle::BuiltinOperator_RELU:
+    case circle::BuiltinOperator_RELU6:
+    case circle::BuiltinOperator_ADD:
+    case circle::BuiltinOperator_MUL:
+    case circle::BuiltinOperator_SUB:
+    case circle::BuiltinOperator_WHILE:
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool isSingleUsageOfTensor(CircleReader *reader, const int32_t tensor_index)
+{
+  uint32_t usage_count = 0;
+
+  const auto operators = reader->operators();
+  for (uint32_t i = 0; i < operators.size(); ++i)
+  {
+    const auto *op = operators.at(i);
+    assert(op != nullptr);
+
+    const auto *op_inputs = op->inputs();
+    for (int32_t j = 0; j < op_inputs->size(); ++j)
+    {
+      const auto input_index = op_inputs->operator[](j);
+      if (input_index == tensor_index)
+      {
+        if (++usage_count > 1)
+          return false;
+      }
+    }
+  }
+
+  // Let's check that it is not graph output
+  if (usage_count == 1)
+  {
+    const auto &outputs_indexes = reader->outputs();
+    bool is_graph_output = (std::find(outputs_indexes.begin(), outputs_indexes.end(),
+                                      tensor_index) != outputs_indexes.end());
+    if (is_graph_output)
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace
+
+void GraphLoader::checkInplaceOps(CircleReader *reader, RuntimeGraph *runtime_graph)
+{
+  const auto operators = reader->operators();
+  const auto graph_outputs = reader->outputs();
+  for (uint32_t i = 0; i < operators.size(); ++i)
+  {
+    const auto *op = operators.at(i);
+    assert(op != nullptr);
+
+    // Check inplace optimization for operation with single input and single output
+    if (isInplaceOperation(reader->builtin_code(op)))
+    {
+      const auto *op_inputs = op->inputs();
+      const auto *op_outputs = op->outputs();
+
+      bool is_inplace = true;
+      auto non_const_input_it = op_inputs->begin();
+      while (true)
+      {
+        non_const_input_it =
+          std::find_if(non_const_input_it, op_inputs->end(), [&reader](const auto input_idx) {
+            if (input_idx == -1)
+              return false;
+
+            return not Tensor::is_constant_tensor(reader, reader->tensors()[input_idx]);
+          });
+
+        if (non_const_input_it == op_inputs->end())
+          break;
+
+        auto dist = std::distance(op_inputs->begin(), non_const_input_it);
+
+        const auto non_const_input_idx = *non_const_input_it;
+
+        // Check single usage of input tensor
+        if (not isSingleUsageOfTensor(reader, non_const_input_idx))
+        {
+          is_inplace = false;
+          break;
+        }
+
+        // Let's check single usage of output tensor
+        if (dist >= op_outputs->size() and op_outputs->size() == 1)
+          dist = 0;
+        assert(dist < op_outputs->size());
+        const auto output_index = op_outputs->operator[](dist);
+        if (not isSingleUsageOfTensor(reader, output_index))
+        {
+          is_inplace = false;
+          break;
+        }
+
+        // Check that num elements are equal
+        {
+          const auto *input_non_const_tensor = reader->tensors().at(non_const_input_idx);
+          const auto *output_tensor = reader->tensors().at(output_index);
+          if (Tensor::num_elements(input_non_const_tensor) != Tensor::num_elements(output_tensor))
+          {
+            is_inplace = false;
+            break;
+          }
+        }
+
+        // Let's check that output is not a graph output tensor
+        // TODO: check this statement
+        {
+          if (std::find(graph_outputs.begin(), graph_outputs.end(), output_index) !=
+              graph_outputs.end())
+          {
+            is_inplace = false;
+            break;
+          }
+        }
+
+        non_const_input_it++;
+      }
+
+      if (is_inplace)
+        runtime_graph->addInplaceOpIndex(op);
+    }
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/GraphLoader.h b/onert-micro/luci-interpreter/src/loader/GraphLoader.h
new file mode 100644
index 000000000..3265f110e
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/loader/GraphLoader.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+
+#include "core/RuntimeGraph.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class GraphLoader
+{
+public:
+  static void checkInplaceOps(CircleReader *reader, RuntimeGraph *runtime_graph);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
diff --git a/onert-micro/luci-interpreter/src/loader/ModuleLoader.cpp b/onert-micro/luci-interpreter/src/loader/ModuleLoader.cpp
new file mode 100644
index 000000000..dcfa32983
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleLoader.h"
+
+#include "GraphLoader.h"
+
+namespace luci_interpreter
+{
+
+void ModuleLoader::load(RuntimeModule *runtime_module, SimpleMemoryManager *memory_manager,
+                        const char *model_data_raw, bool dealloc_input)
+{
+  const circle::Model *model = circle::GetModel(model_data_raw);
+
+  CircleReader &reader = runtime_module->getCircleReader();
+  if (!reader.parse(model))
+    assert(false && "Error during parse");
+
+  for (size_t i = 0; i < reader.num_subgraph(); ++i)
+  {
+    if (!reader.select_subgraph(i))
+      assert(false && "Error during select subgraph");
+    runtime_module->addGraph(memory_manager);
+
+#ifndef USE_STATIC_ALLOC
+    auto *runtime_graph = runtime_module->getRuntimeGraphAt(i);
+    // For Dynamic memory manager we can use inplace optimization
+    GraphLoader::checkInplaceOps(&reader, runtime_graph);
+#endif // USE_STATIC_ALLOC
+  }
+
+  // For Dynamic Memory manager we build memory allocate/deallocate plan and then configure kernels.
+  // For Static Memory manager we only configure kernels.
+  for (size_t i = 0; i < reader.num_subgraph(); ++i)
+  {
+    auto *runtime_graph = runtime_module->getRuntimeGraphAt(i);
+#ifdef USE_STATIC_ALLOC
+    runtime_graph->configure_kernels();
+#else
+    runtime_graph->configure(dealloc_input);
+#endif // USE_STATIC_ALLOC
+  }
+
+  // Select main subgraph
+  reader.select_subgraph(0);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/ModuleLoader.h b/onert-micro/luci-interpreter/src/loader/ModuleLoader.h
new file mode 100644
index 000000000..bfe3e7076
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/loader/ModuleLoader.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
+#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
+
+#include "core/RuntimeModule.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ModuleLoader
+{
+public:
+  static void load(RuntimeModule *runtime_module, MemoryManager *memory_manager,
+                   const char *model_data_raw, bool dealloc_input);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
diff --git a/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.cpp b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.cpp
new file mode 100644
index 000000000..ca1c92bac
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+  int32_t p = lowerLog2(memSize);
+
+  // We assume that the requested size of memory does not exceed 4 GB
+  assert(p < 32);
+  memSize = 1 << p;
+
+  _start_block = reinterpret_cast<Block *>(memory_start);
+  _start_block->size = memSize - sizeof(Block);
+  _start_block->is_free = true;
+  _start_block->self = _start_block;
+  _num_blocks = 0;
+  _size = _start_block->size;
+
+  for (auto &_free_block : _free_blocks)
+    _free_block = nullptr;
+
+  addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  const size_t element_size = getDataTypeSize(tensor.element_type());
+  const int32_t num_elements = tensor.shape().num_elements();
+  auto size = num_elements * element_size;
+  auto footprint = size + sizeof(Block);
+  auto l = (footprint & (footprint - 1)) == 0
+             ? lowerLog2(footprint)
+             : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+  while (l < 32 && !_free_blocks[l])
+    l++;
+
+  assert(l < 32);
+
+  Block *tmp;
+  tmp = _free_blocks[l];
+  removeFromBlocks(tmp, l);
+
+  while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+  {
+    divideBlock(tmp, l);
+    l--;
+  }
+
+  tmp->is_free = false;
+  tmp->self = tmp;
+  _num_blocks++;
+
+  auto *data = (uint8_t *)(tmp + 1);
+  tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  auto data = tensor.data<void>();
+  auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+  assert(tmp->self == tmp);
+
+  tmp->is_free = true;
+  addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+  while (tmp)
+    if (tmp->size == _size)
+      break;
+    else
+      tmp = mergeBlock(tmp);
+
+  _num_blocks--;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
+
+#endif
diff --git a/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.h b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.h
new file mode 100644
index 000000000..ea56e97af
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.h
@@ -0,0 +1,148 @@
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+  BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  struct Block
+  {
+    Block *next_free;
+    bool is_free;
+    uint32_t size;
+    // debug field
+    Block *self;
+  };
+
+  Block *_start_block;
+  int32_t _num_blocks;
+  uint32_t _size;
+  Block *_free_blocks[32]{};
+
+  static int32_t lowerLog2(uint32_t val)
+  {
+    int32_t i = 0;
+    while (val >>= 1)
+      i++;
+
+    return i;
+  }
+
+  void addToBlocks(Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    block->next_free = _free_blocks[l];
+    _free_blocks[l] = block;
+  }
+
+  void removeFromBlocks(const Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    Block *tmp = _free_blocks[l];
+
+    if (block == tmp)
+    {
+      _free_blocks[l] = block->next_free;
+      return;
+    }
+
+    while (tmp)
+    {
+      if (tmp->next_free == block)
+      {
+        tmp->next_free = block->next_free;
+        return;
+      }
+
+      tmp = tmp->next_free;
+    }
+  }
+
+  void divideBlock(Block *block, int32_t l)
+  {
+    int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+    removeFromBlocks(block, l);
+
+    // there is no need to add to the free_blocks list here
+    block->is_free = true;
+    block->size = size;
+    block->self = block;
+
+    Block *buddy;
+    buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+    buddy->is_free = true;
+    buddy->size = size;
+    buddy->self = buddy;
+
+    addToBlocks(buddy, l - 1);
+  }
+
+  Block *mergeBlock(Block *block)
+  {
+    Block *buddy;
+
+    const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+    const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+    buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
+
+    if (!buddy->is_free || buddy->size != block->size)
+      return nullptr;
+
+    if (block > buddy)
+    {
+      Block *x = block;
+      block = buddy;
+      buddy = x;
+    }
+
+    removeFromBlocks(block, l);
+    removeFromBlocks(buddy, l);
+
+    block->size = block->size * 2 + sizeof(Block);
+    block->is_free = true;
+    block->self = block;
+
+    addToBlocks(block, l + 1);
+
+    return block;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+#endif
diff --git a/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.test.cpp b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.test.cpp
new file mode 100644
index 000000000..996b36d26
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.test.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+  auto mem_pool = std::make_unique<uint8_t[]>(200);
+  auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+  Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+  buddy_memory_manager->allocate_memory(first_tensor);
+
+  uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+  first_tensor.writeData(data_1, 8);
+  uint8_t array_1[8];
+  first_tensor.readData(array_1, 8);
+  for (int i = 0; i < 8; i++)
+  {
+    EXPECT_EQ(data_1[i], array_1[i]);
+  }
+
+  Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+  buddy_memory_manager->allocate_memory(second_tensor);
+
+  uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+  second_tensor.writeData(data_2, 10);
+
+  uint8_t array_2[2][5];
+  second_tensor.readData(array_2, 10);
+  for (int i = 0; i < 2; i++)
+  {
+    for (int j = 0; j < 5; j++)
+    {
+      EXPECT_EQ(data_2[i][j], array_2[i][j]);
+    }
+  }
+
+  buddy_memory_manager->release_memory(first_tensor);
+  EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+  buddy_memory_manager->release_memory(second_tensor);
+  EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
+
+#endif
diff --git a/onert-micro/luci-interpreter/src/memory_managers/CMakeLists.txt b/onert-micro/luci-interpreter/src/memory_managers/CMakeLists.txt
new file mode 100644
index 000000000..e783d8d4f
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/CMakeLists.txt
@@ -0,0 +1,21 @@
+set(SOURCES
+        "SimpleMemoryManager.h" SimpleMemoryManager.cpp
+        "TestMemoryManager.h" TestMemoryManager.cpp
+        "BuddyMemoryManager.h" BuddyMemoryManager.cpp
+        "StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+add_library(${LUCI_INTERPRETER_MEMORY_MANAGER} STATIC ${SOURCES})
+target_include_directories(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC "luci_micro_circle_reader${READER_SUFFIX}")
+target_link_libraries(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC luci_micro_circle_schema)
+
+if(NOT ENABLE_TEST)
+    return()
+endif(NOT ENABLE_TEST)
+
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test_micro ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test_micro ${LUCI_INTERPRETER_BINARY})
diff --git a/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.cpp b/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.cpp
new file mode 100644
index 000000000..e8caa7c2a
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef USE_STATIC_ALLOC
+
+#include "SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+uint8_t *SimpleMemoryManager::allocate_memory(const circle::Tensor *tensor)
+{
+  const auto element_size = getDataTypeSize(Tensor::element_type(tensor));
+  const auto num_elements = Tensor::num_elements(tensor);
+
+  assert(element_size * num_elements > 0);
+
+  return new uint8_t[num_elements * element_size];
+}
+
+void SimpleMemoryManager::release_memory(uint8_t *data)
+{
+  if (data == nullptr)
+    return;
+
+  delete[] data;
+}
+
+} // namespace luci_interpreter
+
+#endif // USE_STATIC_ALLOC
diff --git a/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.h b/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.h
new file mode 100644
index 000000000..0817fb2cd
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef USE_STATIC_ALLOC
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager
+{
+public:
+  uint8_t *allocate_memory(const circle::Tensor *tensor);
+  void release_memory(uint8_t *data);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#endif // USE_STATIC_ALLOC
diff --git a/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.cpp b/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.cpp
new file mode 100644
index 000000000..08d085063
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef USE_STATIC_ALLOC
+
+#include "StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+uint8_t *StaticMemoryManager::allocate_memory(int32_t offset)
+{
+  assert(_buffer_ptr != nullptr);
+  return _buffer_ptr + offset;
+}
+
+uint8_t *StaticMemoryManager::allocate_memory_for_input(int32_t offset)
+{
+  assert(_input_buffer_ptr != nullptr);
+  return _input_buffer_ptr + offset;
+}
+
+uint8_t *StaticMemoryManager::allocate_memory_for_output(int32_t offset)
+{
+  assert(_output_buffer_ptr != nullptr);
+  return _output_buffer_ptr + offset;
+}
+
+void StaticMemoryManager::allocate_input_buf()
+{
+  assert(_input_req_size > 0);
+  if (_input_buffer_ptr == nullptr)
+    _input_buffer_ptr = new uint8_t[_input_req_size];
+}
+
+void StaticMemoryManager::allocate_output_buf()
+{
+  assert(_output_req_size > 0);
+  if (_output_buffer_ptr == nullptr)
+    _output_buffer_ptr = new uint8_t[_output_req_size];
+}
+
+void StaticMemoryManager::allocate_computing_buf()
+{
+  assert(_buffer_req_size > 0);
+  if (_buffer_ptr == nullptr)
+    _buffer_ptr = new uint8_t[_buffer_req_size];
+}
+
+void StaticMemoryManager::release_computing_buf()
+{
+  delete[] _buffer_ptr;
+  _buffer_ptr = nullptr;
+}
+
+void StaticMemoryManager::release_input_buf()
+{
+  delete[] _input_buffer_ptr;
+  _input_buffer_ptr = nullptr;
+}
+
+void StaticMemoryManager::release_output_buf()
+{
+  delete[] _output_buffer_ptr;
+  _output_buffer_ptr = nullptr;
+}
+
+} // namespace luci_interpreter
+
+#endif // USE_STATIC_ALLOC
diff --git a/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.h b/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.h
new file mode 100644
index 000000000..2971e3866
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef USE_STATIC_ALLOC
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager
+{
+public:
+  StaticMemoryManager() = delete;
+
+  // To initialize static memory manager with precalculating required buffers size for input,
+  // output and for intermediate computations buffers.
+  // Using Static Memory Manager with common buffer for input, output, and for intermediate
+  // computations
+  // TODO remove this *_req_size to read it from circle file
+  explicit StaticMemoryManager(int32_t input_req_size, int32_t buffer_req_size,
+                               int32_t output_req_size)
+    : _input_buffer_ptr(nullptr), _buffer_ptr(nullptr), _output_buffer_ptr(nullptr),
+      _input_req_size(input_req_size), _buffer_req_size(buffer_req_size),
+      _output_req_size(output_req_size)
+  { /* Do nothing */
+  }
+
+  // To set a pointer for tensor in _buffer_ptr with right offset
+  uint8_t *allocate_memory(int32_t offset);
+  // To set a pointer for tensor in input_buffer with right offset
+  uint8_t *allocate_memory_for_input(int32_t offset);
+  // To set a pointer for tensor in output_buffer with right offset
+  uint8_t *allocate_memory_for_output(int32_t offset);
+
+  // Methods to set data pointer for tensor
+  // To allocate input memory buffer with _input_req_size * size_type bytes. Result pointer -
+  // _input_buffer_ptr
+  void allocate_input_buf();
+  // To allocate input memory buffer with _output_req_size * size_type bytes. Result pointer -
+  // _output_buffer_ptr
+  void allocate_output_buf();
+  // To allocate intermediate computing memory buffer with _buffer_req_size * size_type bytes.
+  // Result pointer - _buffer_ptr
+  void allocate_computing_buf();
+
+  // To delete memory for intermediate computing buffer
+  void release_computing_buf();
+  // To delete memory for input buffer
+  void release_input_buf();
+  // To delete memory for output buffer
+  void release_output_buf();
+
+private:
+  // Stores a pointer to the beginning of the allocated memory buffer.
+  uint8_t *_buffer_ptr;
+  uint8_t *_input_buffer_ptr;
+  uint8_t *_output_buffer_ptr;
+
+  // TODO remove this fields to read it from circle file
+  int32_t _input_req_size{};
+  int32_t _buffer_req_size{};
+  int32_t _output_req_size{};
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#endif // USE_STATIC_ALLOC
diff --git a/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.cpp b/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.cpp
new file mode 100644
index 000000000..803e038d2
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO Enable it
+
+#if 0
+
+#include "TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  allocations.push_back(data);
+  tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
+
+#endif
diff --git a/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.h b/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.h
new file mode 100644
index 000000000..25ee38dd8
--- /dev/null
+++ b/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO Enable it
+
+#if 0
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+  ~TestMemoryManager() override
+  {
+    for (auto allocation : allocations)
+    {
+      delete[] allocation;
+    }
+  }
+
+private:
+  std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#endif
diff --git a/onert-micro/requires.cmake b/onert-micro/requires.cmake
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/onert-micro/requires.cmake
@@ -0,0 +1 @@
+
diff --git a/onert-micro/standalone/CMakeLists.txt b/onert-micro/standalone/CMakeLists.txt
new file mode 100644
index 000000000..e8495974c
--- /dev/null
+++ b/onert-micro/standalone/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.15)
+project(luci_interpreter_micro_standalone)
+
+include(${NNAS_ROOT}/infra/onert-micro/utils.cmake)
+
+if (NOT ${NOT_BUILD_EXTERNALS})
+    nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
+    include_directories(${FlatBuffersSource_DIR}/include)
+else()
+    include_directories(${FlatBuffersSource_DIR})
+endif()
+
+# TODO: fix luci/plan for new luci-micro without luci/IR
+add_subdirectory(${NNAS_PROJECT_SOURCE_DIR}/onert-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
diff --git a/onert-micro/tests/mbed-os/CMakeLists.txt b/onert-micro/tests/mbed-os/CMakeLists.txt
new file mode 100644
index 000000000..8785ddf39
--- /dev/null
+++ b/onert-micro/tests/mbed-os/CMakeLists.txt
@@ -0,0 +1,194 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.9)
+SET(CMAKE_SYSTEM_NAME Generic)
+SET(CMAKE_CROSSCOMPILING TRUE)
+
+# force compiler settings
+SET(CMAKE_C_COMPILER_WORKS TRUE)
+SET(CMAKE_CXX_COMPILER_WORKS TRUE)
+
+# force cmake compilers
+SET(CMAKE_ASM_COMPILER "arm-none-eabi-gcc")
+SET(CMAKE_C_COMPILER "arm-none-eabi-gcc")
+SET(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
+SET(ELF2BIN "arm-none-eabi-objcopy")
+
+
+# if the environment does not specify build type, set to Debug
+IF (NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug"
+            CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+            FORCE)
+ENDIF ()
+
+# here starts the project
+PROJECT(mbed-os-example-onert-micro C CXX ASM)
+
+# uncomment below to have a verbose build process
+#SET(CMAKE_VERBOSE_MAKEFILE ON)
+
+SET(LD_SYS_LIBS "-Wl,--start-group -lstdc++ -lsupc++ -lm -lc -lgcc -lnosys  -Wl,--end-group")
+
+
+SET(CMAKE_C_FLAGS "-g3 -std=gnu11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000000 -DMBED_RAM_SIZE=0x20000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_CXX_FLAGS "-g3 -std=gnu++14 -frtti -Wvla -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000000 -DMBED_RAM_SIZE=0x20000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_ASM_FLAGS "-g3 -x assembler-with-cpp -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp ")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_CXX_LINK_FLAGS "-Wl,--gc-sections -Wl,--wrap,main -Wl,--wrap,_malloc_r -Wl,--wrap,_free_r")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,_realloc_r -Wl,--wrap,__memalign_r -Wl,--wrap,__calloc_r")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,exit -Wl,--wrap,atexit -Wl,-n -Wl,--wrap,printf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,sprintf -Wl,--wrap,snprintf -Wl,--wrap,vprintf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,vsprintf -Wl,--wrap,vsnprintf -Wl,--wrap,fprintf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,vfprintf -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -mfloat-abi=softfp -Wall -Wextra -Wno-unused-parameter")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wno-missing-field-initializers -fmessage-length=0 -fexceptions")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -ffunction-sections -fdata-sections -funsigned-char -MMD")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fomit-frame-pointer -Og -DMBED_DEBUG -DMBED_TRAP_ERRORS_ENABLED=1")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000400")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_RAM_SIZE=0x1FC00 -DMBED_RAM1_START=0x24000000")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_RAM1_SIZE=0x80000 -DMBED_BOOT_STACK_SIZE=1024 -DXIP_ENABLE=0")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} ${LD_SYS_LIBS} -T ${CMAKE_BINARY_DIR}/build_test_pp.link_script.ld")
+
+ADD_DEFINITIONS(
+        -DARM_MATH_CM7
+        -DCOMPONENT_FLASHIAP=1
+        -DDEVICE_ANALOGIN=1
+        -DDEVICE_ANALOGOUT=1
+        -DDEVICE_CAN=1
+        -DDEVICE_CRC=1
+        -DDEVICE_EMAC=1
+        -DDEVICE_FLASH=1
+        -DDEVICE_I2C=1
+        -DDEVICE_I2CSLAVE=1
+        -DDEVICE_I2C_ASYNCH=1
+        -DDEVICE_INTERRUPTIN=1
+        -DDEVICE_LPTICKER=1
+        -DDEVICE_MPU=1
+        -DDEVICE_PORTIN=1
+        -DDEVICE_PORTINOUT=1
+        -DDEVICE_PORTOUT=1
+        -DDEVICE_PWMOUT=1
+        -DDEVICE_RESET_REASON=1
+        -DDEVICE_RTC=1
+        -DDEVICE_SERIAL=1
+        -DDEVICE_SERIAL_FC=1
+        -DDEVICE_SLEEP=1
+        -DDEVICE_SPI=1
+        -DDEVICE_SPISLAVE=1
+        -DDEVICE_SPI_ASYNCH=1
+        -DDEVICE_STDIO_MESSAGES=1
+        -DDEVICE_TRNG=1
+        -DDEVICE_USBDEVICE=1
+        -DDEVICE_USTICKER=1
+        -DDEVICE_WATCHDOG=1
+        -DEXTRA_IDLE_STACK_REQUIRED
+        -DMBED_BUILD_TIMESTAMP=1640167847.81
+        -DMBED_TICKLESS
+        -DSTM32H743xx
+        -DTARGET_CORTEX
+        -DTARGET_CORTEX_M
+        -DTARGET_FF_ARDUINO_UNO
+        -DTARGET_LIKE_CORTEX_M7
+        -DTARGET_LIKE_MBED
+        -DTARGET_M7
+        -DTARGET_MCU_STM32
+        -DTARGET_MCU_STM32H7
+        -DTARGET_MCU_STM32H743xI
+        -DTARGET_NAME=NUCLEO_H743ZI2
+        -DTARGET_NUCLEO_H743ZI2
+        -DTARGET_RELEASE
+        -DTARGET_RTOS_M4_M7
+        -DTARGET_STM
+        -DTARGET_STM32H7
+        -DTARGET_STM32H743xI
+        -DTOOLCHAIN_GCC
+        -DTOOLCHAIN_GCC_ARM
+        -DTRANSACTION_QUEUE_SIZE_SPI=2
+        -DUSE_FULL_LL_DRIVER
+        -DUSE_HAL_DRIVER
+        -D__CMSIS_RTOS
+        -D__CORTEX_M7
+        -D__FPU_PRESENT=1
+        -D__MBED_CMSIS_RTOS_CM
+        -D__MBED__=1
+        -DMBED_MEM_TRACING_ENABLED=0
+)
+
+include(mbed-sources.cmake)
+
+set_sources_mbed(${MbedOSSource_DIR})
+list(APPEND SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/startup_stm32h743xx.S")
+
+
+add_library(mbed_os STATIC ${SOURCES})
+
+target_include_directories_mbed(mbed_os ${MbedOSSource_DIR})
+
+
+SET_TARGET_PROPERTIES(mbed_os PROPERTIES ENABLE_EXPORTS 1)
+# add syslibs dependencies to create the correct linker order
+TARGET_LINK_LIBRARIES(mbed_os -lstdc++ -lsupc++ -lm -lc -lgcc -lnosys)
+
+add_executable(build_test main.cpp)
+
+target_link_libraries(build_test mbed_os)
+target_include_directories_mbed(build_test ${MbedOSSource_DIR})
+
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/core/reader/libluci_micro_circle_reader.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/core/libluci_interpreter_core_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/kernels/libluci_interpreter_kernels_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/kernels/libluci_interpreter_mcu_pal.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/loader/libluci_interpreter_loader_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/libluci_interpreter_micro.a")
+
+target_include_directories(build_test PRIVATE
+        ${ONERTMICRO_SRC_DIR}/luci-interpreter/include
+        ${CMAKE_CURRENT_SOURCE_DIR}
+        ${FlatBuffersSource_DIR}/include
+        )
+
+add_custom_command(TARGET build_test PRE_LINK
+        COMMAND "arm-none-eabi-cpp" -E -P -Wl,--gc-sections -Wl,--wrap,main -Wl,--wrap,_malloc_r
+        -Wl,--wrap,_free_r -Wl,--wrap,_realloc_r -Wl,--wrap,_memalign_r -Wl,--wrap,_calloc_r
+        -Wl,--wrap,exit -Wl,--wrap,atexit -Wl,-n -Wl,--wrap,printf -Wl,--wrap,sprintf
+        -Wl,--wrap,snprintf -Wl,--wrap,vprintf -Wl,--wrap,vsprintf -Wl,--wrap,vsnprintf
+        -Wl,--wrap,fprintf -Wl,--wrap,vfprintf -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp
+        -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -fmessage-length=0
+        -fexceptions -ffunction-sections -fdata-sections -funsigned-char -MMD -fomit-frame-pointer
+        -Og -DMBED_DEBUG -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb
+        -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000 -DMBED_ROM_SIZE=0x200000
+        -DMBED_RAM_START=0x20000400 -DMBED_RAM_SIZE=0x1FC00 -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000
+        -DMBED_BOOT_STACK_SIZE=1024 -DXIP_ENABLE=0
+        ${MbedOSSource_DIR}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TOOLCHAIN_GCC_ARM/STM32H743xI.ld
+        -o ${CMAKE_CURRENT_BINARY_DIR}/build_test_pp.link_script.ld
+
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/build_test_pp.link_script.ld"
+        )
+
+add_custom_command(TARGET build_test POST_BUILD
+        COMMAND ${ELF2BIN} -O binary $<TARGET_FILE:build_test> $<TARGET_FILE:build_test>.bin
+        COMMAND ${CMAKE_COMMAND} -E echo "-- built: $<TARGET_FILE:build_test>.bin"
+        )
diff --git a/onert-micro/tests/mbed-os/main.cpp b/onert-micro/tests/mbed-os/main.cpp
new file mode 100644
index 000000000..cdbe14b52
--- /dev/null
+++ b/onert-micro/tests/mbed-os/main.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// base app for qemu
+static volatile unsigned int *const UART_DR = (unsigned int *)0x40011004;
+
+void uart_print(const char *s)
+{
+  while (*s != '\0')
+  {
+    *UART_DR = *s;
+    s++;
+  }
+}
+
+int main() { uart_print("Hello, World!\n"); }
diff --git a/onert-micro/tests/mbed-os/mbed-sources.cmake b/onert-micro/tests/mbed-os/mbed-sources.cmake
new file mode 100644
index 000000000..25f9e31e8
--- /dev/null
+++ b/onert-micro/tests/mbed-os/mbed-sources.cmake
@@ -0,0 +1,1589 @@
+macro(set_sources_mbed)
+    set(SOURCES
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Include/cmsis_os2.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Include/os_tick.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config/RTX_Config.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config/RTX_Config.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_def.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_evr.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_os.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include1/cmsis_os.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Library/cmsis_os1.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/TOOLCHAIN_GCC/TARGET_RTOS_M4_M7/irq_cm4f.S
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_c.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_ca.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_cm.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_delay.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_evflags.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_evr.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_kernel.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_lib.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_lib.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_memory.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_mempool.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_msgqueue.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_mutex.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_semaphore.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_system.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_thread.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_timer.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Source/os_systick.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Source/os_tick_ptim.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cachel1_armv7.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armcc.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armclang.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armclang_ltm.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_compiler.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_gcc.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_iccarm.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_version.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv81mml.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv8mbl.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv8mml.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm0.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm0plus.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm1.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm23.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm3.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm33.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm35p.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm4.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm55.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm7.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_sc000.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_sc300.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/mpu_armv7.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/mpu_armv8.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/pmu_armv8.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/tz_context.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Source/mbed_tz_context.c
+            ${ARGV0}/cmsis/device/RTE/include/RTE_Components.h
+            ${ARGV0}/cmsis/device/mbed_cmsis_conf.h
+            ${ARGV0}/cmsis/device/rtos/TOOLCHAIN_GCC_ARM/mbed_boot_gcc_arm.c
+            ${ARGV0}/cmsis/device/rtos/include/mbed_boot.h
+            ${ARGV0}/cmsis/device/rtos/include/mbed_rtx_conf.h
+            ${ARGV0}/cmsis/device/rtos/include/mbed_rtx_storage.h
+            ${ARGV0}/cmsis/device/rtos/source/mbed_boot.c
+            ${ARGV0}/cmsis/device/rtos/source/mbed_rtos_rtx.c
+            ${ARGV0}/cmsis/device/rtos/source/mbed_rtx_handlers.c
+            ${ARGV0}/cmsis/device/rtos/source/mbed_rtx_idle.cpp
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/ATHandler.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularContext.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularDevice.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularInformation.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularNetwork.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularSMS.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularContext.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularDevice.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularInformation.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularNetwork.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularSMS.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularStack.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_ControlPlane_netif.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/APN_db.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularCommon.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularList.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularLog.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularUtil.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/device/CellularStateMachine.h
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularContext.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularDevice.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularInformation.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularNetwork.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularSMS.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularStack.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_ControlPlane_netif.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/common/APN_db.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/common/CellularLog.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/common/CellularUtil.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/device/ATHandler.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/device/CellularContext.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/device/CellularDevice.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/device/CellularStateMachine.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/atmel-rf-driver/NanostackRfPhyAtmel.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/AT86RF215Reg.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/AT86RFReg.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/NanostackRfPhyAT86RF215.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/NanostackRfPhyAtmel.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/at24mac.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/at24mac.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/rfbits.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/mcr20a-rf-driver/NanostackRfPhyMcr20a.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Drv.c
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Drv.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Overwrites.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Reg.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/NanostackRfPhyMcr20a.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/XcvrSpi.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/NanostackRfPhys2lp.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/at24mac_s2lp.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/at24mac_s2lp.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/rf_configuration.c
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/rf_configuration.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/s2lpReg.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/stm-s2lp-rf-driver/NanostackRfPhys2lp.h
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP.cpp
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP.h
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION.h
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularInformation.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularInformation.h
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP/GENERIC_AT3GPP.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP/GENERIC_AT3GPP.h
+            ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP.cpp
+            ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP.h
+            ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularInformation.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularInformation.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularInformation.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularInformation.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_ControlPlane_netif.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_ControlPlane_netif.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/EC2X/QUECTEL_EC2X.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/EC2X/QUECTEL_EC2X.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularInformation.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularInformation.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT.cpp
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT.h
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/HE910/TELIT_HE910.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/HE910/TELIT_HE910.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularSMS.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularSMS.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/PPP/UBLOX_PPP.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/PPP/UBLOX_PPP.h
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/TARGET_NUCLEO_H743ZI2/stm32h7_eth_init.c
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742/lan8742.c
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742/lan8742.h
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/stm32xx_emac_config.h
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_emac.cpp
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_emac.h
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_eth_irq_callback.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt_stm32l4.c
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt_stm32l4.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/ccm_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/ccm_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/cryp_stm32.c
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/cryp_stm32.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/gcm_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/gcm_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/hash_stm32.c
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/hash_stm32.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/md5_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/md5_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha1_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha1_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha256_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha256_alt.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512Driver.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512SPITransportDriver.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512TransportDriver.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512Driver.cpp
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512SPITransportDriver.cpp
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512TransportDriver.cpp
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_callback.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_cmd.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_cmd.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_hw.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_hw.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_internal.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_irq.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_irq.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_poll.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_poll.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_registers.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_registers.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_rf.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_rf.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_timer.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_timer.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_transceive.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_transceive.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_types.h
+            ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266/ESP8266.cpp
+            ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266/ESP8266.h
+            ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266Interface.cpp
+            ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266Interface.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_coap_header.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_coap_protocol.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_config.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/include/sn_coap_header_internal.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/include/sn_coap_protocol_internal.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_builder.c
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_header_check.c
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_parser.c
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_protocol.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/common_functions.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip4string.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip6string.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip_fsc.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_list.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_nvm_helper.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_types.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmemLIB.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmem_tracker.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmem_tracker_lib.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform/arm_hal_interrupt.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform/arm_hal_nvm.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/IPv6_fcf_lib/ip_fsc.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libBits/common_functions.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libList/ns_list.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip4string/ip4tos.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip4string/stoip4.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip6string/ip6tos.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip6string/stoip6.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nsdynmemLIB/nsdynmemLIB.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nsdynmemtracker/nsdynmem_tracker_lib.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nvmHelper/ns_nvm_helper.c
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/arc4.h
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/des.h
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/md4.h
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/md5.h
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/sha1.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ccp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap-md5.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap-new.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap_ms.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/eap.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ecp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/eui64.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/fsm.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ipcp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ipv6cp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/lcp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/magic.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/mppe.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_impl.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_opts.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_service.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_service_if.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppapi.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppcrypt.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppdebug.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppoe.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppol2tp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppos.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/upap.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/vj.h
+            ${ARGV0}/connectivity/libraries/ppp/source/auth.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ccp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/chap-md5.c
+            ${ARGV0}/connectivity/libraries/ppp/source/chap-new.c
+            ${ARGV0}/connectivity/libraries/ppp/source/chap_ms.c
+            ${ARGV0}/connectivity/libraries/ppp/source/demand.c
+            ${ARGV0}/connectivity/libraries/ppp/source/eap.c
+            ${ARGV0}/connectivity/libraries/ppp/source/eui64.c
+            ${ARGV0}/connectivity/libraries/ppp/source/fsm.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ipcp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ipv6cp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/lcp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/magic.c
+            ${ARGV0}/connectivity/libraries/ppp/source/mppe.c
+            ${ARGV0}/connectivity/libraries/ppp/source/multilink.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_arc4.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_des.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_md4.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_md5.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_sha1.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ppp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ppp_ecp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ppp_service.cpp
+            ${ARGV0}/connectivity/libraries/ppp/source/ppp_service_if.cpp
+            ${ARGV0}/connectivity/libraries/ppp/source/pppapi.c
+            ${ARGV0}/connectivity/libraries/ppp/source/pppcrypt.c
+            ${ARGV0}/connectivity/libraries/ppp/source/pppoe.c
+            ${ARGV0}/connectivity/libraries/ppp/source/pppol2tp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/pppos.cpp
+            ${ARGV0}/connectivity/libraries/ppp/source/upap.c
+            ${ARGV0}/connectivity/libraries/ppp/source/utils.c
+            ${ARGV0}/connectivity/libraries/ppp/source/vj.c
+            ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaRadio.h
+            ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANBase.h
+            ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANInterface.h
+            ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANStack.h
+            ${ARGV0}/connectivity/lorawan/include/lorawan/lorawan_types.h
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMac.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMac.h
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacChannelPlan.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacChannelPlan.h
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCommand.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCommand.h
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCrypto.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCrypto.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHY.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHY.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAS923.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAS923.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAU915.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAU915.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN470.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN470.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN779.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN779.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU433.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU433.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU868.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU868.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYIN865.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYIN865.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYKR920.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYKR920.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYUS915.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYUS915.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/lora_phy_ds.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/loraphy_target.h
+            ${ARGV0}/connectivity/lorawan/source/LoRaWANInterface.cpp
+            ${ARGV0}/connectivity/lorawan/source/LoRaWANStack.cpp
+            ${ARGV0}/connectivity/lorawan/system/LoRaWANTimer.cpp
+            ${ARGV0}/connectivity/lorawan/system/LoRaWANTimer.h
+            ${ARGV0}/connectivity/lorawan/system/lorawan_data_structures.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFC.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCController.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCControllerDriver.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCDefinitions.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCEEPROM.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCEEPROMDriver.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCNDEFCapable.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCRemoteEndpoint.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCRemoteInitiator.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCTarget.h
+            ${ARGV0}/connectivity/nfc/include/nfc/Type4RemoteInitiator.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/MessageBuilder.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/MessageParser.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/Record.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/RecordParser.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/Mime.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/SimpleMessageParser.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/Text.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/URI.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/util.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer_builder.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer_reader.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_debug.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_macros.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_stream.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer.c
+            ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer_builder.c
+            ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer_reader.c
+            ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_stream.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/ndef/ndef.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/ndef/ndef.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/nfc_common.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/nfc_errors.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_debug.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_scheduler.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_scheduler.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_transport.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_transport.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_app.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_app.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_defs.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep_target.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep_target.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/type4/type4_target.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/type4/type4_target.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/protocols.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver_internal.h
+            ${ARGV0}/connectivity/nfc/source/NFCController.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCControllerDriver.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCEEPROM.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCEEPROMDriver.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCNDEFCapable.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCRemoteEndpoint.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCRemoteInitiator.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCTarget.cpp
+            ${ARGV0}/connectivity/nfc/source/Type4RemoteInitiator.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/MessageBuilder.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/MessageParser.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/RecordParser.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/Mime.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/SimpleMessageParser.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/Text.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/URI.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/util.cpp
+            ${ARGV0}/drivers/device_key/include/device_key/DeviceKey.h
+            ${ARGV0}/drivers/device_key/source/DeviceKey.cpp
+            ${ARGV0}/drivers/include/drivers/AnalogIn.h
+            ${ARGV0}/drivers/include/drivers/AnalogOut.h
+            ${ARGV0}/drivers/include/drivers/BufferedSerial.h
+            ${ARGV0}/drivers/include/drivers/BusIn.h
+            ${ARGV0}/drivers/include/drivers/BusInOut.h
+            ${ARGV0}/drivers/include/drivers/BusOut.h
+            ${ARGV0}/drivers/include/drivers/CAN.h
+            ${ARGV0}/drivers/include/drivers/DigitalIn.h
+            ${ARGV0}/drivers/include/drivers/DigitalInOut.h
+            ${ARGV0}/drivers/include/drivers/DigitalOut.h
+            ${ARGV0}/drivers/include/drivers/FlashIAP.h
+            ${ARGV0}/drivers/include/drivers/HighResClock.h
+            ${ARGV0}/drivers/include/drivers/I2C.h
+            ${ARGV0}/drivers/include/drivers/I2CSlave.h
+            ${ARGV0}/drivers/include/drivers/InterruptIn.h
+            ${ARGV0}/drivers/include/drivers/LowPowerClock.h
+            ${ARGV0}/drivers/include/drivers/LowPowerTicker.h
+            ${ARGV0}/drivers/include/drivers/LowPowerTimeout.h
+            ${ARGV0}/drivers/include/drivers/LowPowerTimer.h
+            ${ARGV0}/drivers/include/drivers/MbedCRC.h
+            ${ARGV0}/drivers/include/drivers/OSPI.h
+            ${ARGV0}/drivers/include/drivers/PortIn.h
+            ${ARGV0}/drivers/include/drivers/PortInOut.h
+            ${ARGV0}/drivers/include/drivers/PortOut.h
+            ${ARGV0}/drivers/include/drivers/PwmOut.h
+            ${ARGV0}/drivers/include/drivers/QSPI.h
+            ${ARGV0}/drivers/include/drivers/RawCAN.h
+            ${ARGV0}/drivers/include/drivers/RealTimeClock.h
+            ${ARGV0}/drivers/include/drivers/ResetReason.h
+            ${ARGV0}/drivers/include/drivers/SPI.h
+            ${ARGV0}/drivers/include/drivers/SPISlave.h
+            ${ARGV0}/drivers/include/drivers/SerialBase.h
+            ${ARGV0}/drivers/include/drivers/SerialWireOutput.h
+            ${ARGV0}/drivers/include/drivers/Ticker.h
+            ${ARGV0}/drivers/include/drivers/TickerDataClock.h
+            ${ARGV0}/drivers/include/drivers/Timeout.h
+            ${ARGV0}/drivers/include/drivers/Timer.h
+            ${ARGV0}/drivers/include/drivers/TimerEvent.h
+            ${ARGV0}/drivers/include/drivers/UnbufferedSerial.h
+            ${ARGV0}/drivers/include/drivers/Watchdog.h
+            ${ARGV0}/drivers/include/drivers/interfaces/InterfaceCAN.h
+            ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalIn.h
+            ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalInOut.h
+            ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalOut.h
+            ${ARGV0}/drivers/source/AnalogIn.cpp
+            ${ARGV0}/drivers/source/AnalogOut.cpp
+            ${ARGV0}/drivers/source/BufferedSerial.cpp
+            ${ARGV0}/drivers/source/BusIn.cpp
+            ${ARGV0}/drivers/source/BusInOut.cpp
+            ${ARGV0}/drivers/source/BusOut.cpp
+            ${ARGV0}/drivers/source/CAN.cpp
+            ${ARGV0}/drivers/source/DigitalIn.cpp
+            ${ARGV0}/drivers/source/DigitalInOut.cpp
+            ${ARGV0}/drivers/source/DigitalOut.cpp
+            ${ARGV0}/drivers/source/FlashIAP.cpp
+            ${ARGV0}/drivers/source/I2C.cpp
+            ${ARGV0}/drivers/source/I2CSlave.cpp
+            ${ARGV0}/drivers/source/InterruptIn.cpp
+            ${ARGV0}/drivers/source/MbedCRC.cpp
+            ${ARGV0}/drivers/source/OSPI.cpp
+            ${ARGV0}/drivers/source/PortIn.cpp
+            ${ARGV0}/drivers/source/PortInOut.cpp
+            ${ARGV0}/drivers/source/PortOut.cpp
+            ${ARGV0}/drivers/source/PwmOut.cpp
+            ${ARGV0}/drivers/source/QSPI.cpp
+            ${ARGV0}/drivers/source/ResetReason.cpp
+            ${ARGV0}/drivers/source/SPI.cpp
+            ${ARGV0}/drivers/source/SPISlave.cpp
+            ${ARGV0}/drivers/source/SerialBase.cpp
+            ${ARGV0}/drivers/source/SerialWireOutput.cpp
+            ${ARGV0}/drivers/source/Ticker.cpp
+            ${ARGV0}/drivers/source/Timeout.cpp
+            ${ARGV0}/drivers/source/Timer.cpp
+            ${ARGV0}/drivers/source/TimerEvent.cpp
+            ${ARGV0}/drivers/source/UnbufferedSerial.cpp
+            ${ARGV0}/drivers/source/Watchdog.cpp
+            ${ARGV0}/drivers/usb/include/usb/USBAudio.h
+            ${ARGV0}/drivers/usb/include/usb/USBCDC.h
+            ${ARGV0}/drivers/usb/include/usb/USBCDC_ECM.h
+            ${ARGV0}/drivers/usb/include/usb/USBHID.h
+            ${ARGV0}/drivers/usb/include/usb/USBKeyboard.h
+            ${ARGV0}/drivers/usb/include/usb/USBMIDI.h
+            ${ARGV0}/drivers/usb/include/usb/USBMSD.h
+            ${ARGV0}/drivers/usb/include/usb/USBMouse.h
+            ${ARGV0}/drivers/usb/include/usb/USBMouseKeyboard.h
+            ${ARGV0}/drivers/usb/include/usb/USBSerial.h
+            ${ARGV0}/drivers/usb/include/usb/internal/AsyncOp.h
+            ${ARGV0}/drivers/usb/include/usb/internal/ByteBuffer.h
+            ${ARGV0}/drivers/usb/include/usb/internal/EndpointResolver.h
+            ${ARGV0}/drivers/usb/include/usb/internal/LinkEntry.h
+            ${ARGV0}/drivers/usb/include/usb/internal/LinkedList.h
+            ${ARGV0}/drivers/usb/include/usb/internal/LinkedListBase.h
+            ${ARGV0}/drivers/usb/include/usb/internal/MIDIMessage.h
+            ${ARGV0}/drivers/usb/include/usb/internal/OperationList.h
+            ${ARGV0}/drivers/usb/include/usb/internal/OperationListBase.h
+            ${ARGV0}/drivers/usb/include/usb/internal/PolledQueue.h
+            ${ARGV0}/drivers/usb/include/usb/internal/Task.h
+            ${ARGV0}/drivers/usb/include/usb/internal/TaskBase.h
+            ${ARGV0}/drivers/usb/include/usb/internal/TaskQueue.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBAudio_Types.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBDescriptor.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBDevice.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBDevice_Types.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBHID_Types.h
+            ${ARGV0}/drivers/usb/source/AsyncOp.cpp
+            ${ARGV0}/drivers/usb/source/ByteBuffer.cpp
+            ${ARGV0}/drivers/usb/source/EndpointResolver.cpp
+            ${ARGV0}/drivers/usb/source/LinkedListBase.cpp
+            ${ARGV0}/drivers/usb/source/OperationListBase.cpp
+            ${ARGV0}/drivers/usb/source/PolledQueue.cpp
+            ${ARGV0}/drivers/usb/source/TaskBase.cpp
+            ${ARGV0}/drivers/usb/source/USBAudio.cpp
+            ${ARGV0}/drivers/usb/source/USBCDC.cpp
+            ${ARGV0}/drivers/usb/source/USBCDC_ECM.cpp
+            ${ARGV0}/drivers/usb/source/USBDevice.cpp
+            ${ARGV0}/drivers/usb/source/USBHID.cpp
+            ${ARGV0}/drivers/usb/source/USBKeyboard.cpp
+            ${ARGV0}/drivers/usb/source/USBMIDI.cpp
+            ${ARGV0}/drivers/usb/source/USBMSD.cpp
+            ${ARGV0}/drivers/usb/source/USBMouse.cpp
+            ${ARGV0}/drivers/usb/source/USBMouseKeyboard.cpp
+            ${ARGV0}/drivers/usb/source/USBSerial.cpp
+            ${ARGV0}/events/include/events/Event.h
+            ${ARGV0}/events/include/events/EventQueue.h
+            ${ARGV0}/events/include/events/UserAllocatedEvent.h
+            ${ARGV0}/events/include/events/equeue.h
+            ${ARGV0}/events/include/events/internal/equeue_platform.h
+            ${ARGV0}/events/include/events/mbed_events.h
+            ${ARGV0}/events/include/events/mbed_shared_queues.h
+            ${ARGV0}/events/source/EventQueue.cpp
+            ${ARGV0}/events/source/equeue.c
+            ${ARGV0}/events/source/equeue_mbed.cpp
+            ${ARGV0}/events/source/equeue_posix.c
+            ${ARGV0}/events/source/mbed_shared_queues.cpp
+            ${ARGV0}/features/frameworks/greentea-client/greentea-client/greentea_metrics.h
+            ${ARGV0}/features/frameworks/greentea-client/greentea-client/test_env.h
+            ${ARGV0}/features/frameworks/greentea-client/source/greentea_metrics.cpp
+            ${ARGV0}/features/frameworks/greentea-client/source/greentea_test_env.cpp
+            ${ARGV0}/features/frameworks/mbed-client-cli/mbed-client-cli/ns_cmdline.h
+            ${ARGV0}/features/frameworks/mbed-client-cli/source/ns_cmdline.c
+            ${ARGV0}/features/frameworks/mbed-greentea-io/mbed_io.cpp
+            ${ARGV0}/features/frameworks/unity/source/unity.c
+            ${ARGV0}/features/frameworks/unity/unity/unity.h
+            ${ARGV0}/features/frameworks/unity/unity/unity_config.h
+            ${ARGV0}/features/frameworks/unity/unity/unity_internals.h
+            ${ARGV0}/features/frameworks/utest/mbed-utest-shim.cpp
+            ${ARGV0}/features/frameworks/utest/source/unity_handler.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_case.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_default_handlers.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_greentea_handlers.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_harness.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_print.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_shim.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_stack_trace.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_types.cpp
+            ${ARGV0}/features/frameworks/utest/utest/unity_handler.h
+            ${ARGV0}/features/frameworks/utest/utest/utest.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_case.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_default_handlers.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_harness.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_print.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_scheduler.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_shim.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_specification.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_stack_trace.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_types.h
+            ${ARGV0}/hal/include/hal/LowPowerTickerWrapper.h
+            ${ARGV0}/hal/include/hal/PinNameAliases.h
+            ${ARGV0}/hal/include/hal/analogin_api.h
+            ${ARGV0}/hal/include/hal/analogout_api.h
+            ${ARGV0}/hal/include/hal/buffer.h
+            ${ARGV0}/hal/include/hal/can_api.h
+            ${ARGV0}/hal/include/hal/can_helper.h
+            ${ARGV0}/hal/include/hal/crc_api.h
+            ${ARGV0}/hal/include/hal/critical_section_api.h
+            ${ARGV0}/hal/include/hal/dma_api.h
+            ${ARGV0}/hal/include/hal/flash_api.h
+            ${ARGV0}/hal/include/hal/gpio_api.h
+            ${ARGV0}/hal/include/hal/gpio_irq_api.h
+            ${ARGV0}/hal/include/hal/i2c_api.h
+            ${ARGV0}/hal/include/hal/itm_api.h
+            ${ARGV0}/hal/include/hal/lp_ticker_api.h
+            ${ARGV0}/hal/include/hal/mbed_lp_ticker_wrapper.h
+            ${ARGV0}/hal/include/hal/mpu_api.h
+            ${ARGV0}/hal/include/hal/ospi_api.h
+            ${ARGV0}/hal/include/hal/pinmap.h
+            ${ARGV0}/hal/include/hal/port_api.h
+            ${ARGV0}/hal/include/hal/pwmout_api.h
+            ${ARGV0}/hal/include/hal/qspi_api.h
+            ${ARGV0}/hal/include/hal/reset_reason_api.h
+            ${ARGV0}/hal/include/hal/rtc_api.h
+            ${ARGV0}/hal/include/hal/serial_api.h
+            ${ARGV0}/hal/include/hal/sleep_api.h
+            ${ARGV0}/hal/include/hal/spi_api.h
+            ${ARGV0}/hal/include/hal/static_pinmap.h
+            ${ARGV0}/hal/include/hal/ticker_api.h
+            ${ARGV0}/hal/include/hal/trng_api.h
+            ${ARGV0}/hal/include/hal/us_ticker_api.h
+            ${ARGV0}/hal/include/hal/watchdog_api.h
+            ${ARGV0}/hal/source/LowPowerTickerWrapper.cpp
+            ${ARGV0}/hal/source/mbed_compat.c
+            ${ARGV0}/hal/source/mbed_critical_section_api.c
+            ${ARGV0}/hal/source/mbed_flash_api.c
+            ${ARGV0}/hal/source/mbed_gpio.c
+            ${ARGV0}/hal/source/mbed_gpio_irq.c
+            ${ARGV0}/hal/source/mbed_itm_api.c
+            ${ARGV0}/hal/source/mbed_lp_ticker_api.c
+            ${ARGV0}/hal/source/mbed_lp_ticker_wrapper.cpp
+            ${ARGV0}/hal/source/mbed_pinmap_common.c
+            ${ARGV0}/hal/source/mbed_pinmap_default.cpp
+            ${ARGV0}/hal/source/mbed_ticker_api.c
+            ${ARGV0}/hal/source/mbed_us_ticker_api.c
+            ${ARGV0}/hal/source/mpu/mbed_mpu_v7m.c
+            ${ARGV0}/hal/source/mpu/mbed_mpu_v8m.c
+            ${ARGV0}/hal/source/static_pinmap.cpp
+            ${ARGV0}/hal/usb/include/usb/USBPhy.h
+            ${ARGV0}/hal/usb/include/usb/USBPhyEvents.h
+            ${ARGV0}/hal/usb/include/usb/USBPhyTypes.h
+            ${ARGV0}/hal/usb/include/usb/usb_phy_api.h
+            ${ARGV0}/hal/usb/source/mbed_usb_phy.cpp
+            ${ARGV0}/mbed.h
+            ${ARGV0}/platform/cxxsupport/mstd_algorithm
+            ${ARGV0}/platform/cxxsupport/mstd_atomic
+            ${ARGV0}/platform/cxxsupport/mstd_cstddef
+            ${ARGV0}/platform/cxxsupport/mstd_functional
+            ${ARGV0}/platform/cxxsupport/mstd_iterator
+            ${ARGV0}/platform/cxxsupport/mstd_memory
+            ${ARGV0}/platform/cxxsupport/mstd_mutex
+            ${ARGV0}/platform/cxxsupport/mstd_mutex.cpp
+            ${ARGV0}/platform/cxxsupport/mstd_new
+            ${ARGV0}/platform/cxxsupport/mstd_span
+            ${ARGV0}/platform/cxxsupport/mstd_tuple
+            ${ARGV0}/platform/cxxsupport/mstd_type_traits
+            ${ARGV0}/platform/cxxsupport/mstd_utility
+            ${ARGV0}/platform/include/platform/ATCmdParser.h
+            ${ARGV0}/platform/include/platform/CThunk.h
+            ${ARGV0}/platform/include/platform/Callback.h
+            ${ARGV0}/platform/include/platform/CircularBuffer.h
+            ${ARGV0}/platform/include/platform/CriticalSectionLock.h
+            ${ARGV0}/platform/include/platform/DeepSleepLock.h
+            ${ARGV0}/platform/include/platform/DirHandle.h
+            ${ARGV0}/platform/include/platform/FileBase.h
+            ${ARGV0}/platform/include/platform/FileHandle.h
+            ${ARGV0}/platform/include/platform/FileLike.h
+            ${ARGV0}/platform/include/platform/FilePath.h
+            ${ARGV0}/platform/include/platform/FileSystemHandle.h
+            ${ARGV0}/platform/include/platform/FileSystemLike.h
+            ${ARGV0}/platform/include/platform/LocalFileSystem.h
+            ${ARGV0}/platform/include/platform/NonCopyable.h
+            ${ARGV0}/platform/include/platform/PlatformMutex.h
+            ${ARGV0}/platform/include/platform/ScopedLock.h
+            ${ARGV0}/platform/include/platform/ScopedRamExecutionLock.h
+            ${ARGV0}/platform/include/platform/ScopedRomWriteLock.h
+            ${ARGV0}/platform/include/platform/SharedPtr.h
+            ${ARGV0}/platform/include/platform/SingletonPtr.h
+            ${ARGV0}/platform/include/platform/Span.h
+            ${ARGV0}/platform/include/platform/Stream.h
+            ${ARGV0}/platform/include/platform/Transaction.h
+            ${ARGV0}/platform/include/platform/internal/CThunkBase.h
+            ${ARGV0}/platform/include/platform/internal/SysTimer.h
+            ${ARGV0}/platform/include/platform/internal/mbed_atomic_impl.h
+            ${ARGV0}/platform/include/platform/internal/mbed_error_hist.h
+            ${ARGV0}/platform/include/platform/internal/mbed_fault_handler.h
+            ${ARGV0}/platform/include/platform/internal/mbed_os_timer.h
+            ${ARGV0}/platform/include/platform/mbed_application.h
+            ${ARGV0}/platform/include/platform/mbed_assert.h
+            ${ARGV0}/platform/include/platform/mbed_atomic.h
+            ${ARGV0}/platform/include/platform/mbed_chrono.h
+            ${ARGV0}/platform/include/platform/mbed_critical.h
+            ${ARGV0}/platform/include/platform/mbed_debug.h
+            ${ARGV0}/platform/include/platform/mbed_enum_flags.h
+            ${ARGV0}/platform/include/platform/mbed_error.h
+            ${ARGV0}/platform/include/platform/mbed_interface.h
+            ${ARGV0}/platform/include/platform/mbed_mem_trace.h
+            ${ARGV0}/platform/include/platform/mbed_mktime.h
+            ${ARGV0}/platform/include/platform/mbed_mpu_mgmt.h
+            ${ARGV0}/platform/include/platform/mbed_poll.h
+            ${ARGV0}/platform/include/platform/mbed_power_mgmt.h
+            ${ARGV0}/platform/include/platform/mbed_preprocessor.h
+            ${ARGV0}/platform/include/platform/mbed_retarget.h
+            ${ARGV0}/platform/include/platform/mbed_rtc_time.h
+            ${ARGV0}/platform/include/platform/mbed_semihost_api.h
+            ${ARGV0}/platform/include/platform/mbed_stats.h
+            ${ARGV0}/platform/include/platform/mbed_thread.h
+            ${ARGV0}/platform/include/platform/mbed_toolchain.h
+            ${ARGV0}/platform/include/platform/mbed_version.h
+            ${ARGV0}/platform/include/platform/mbed_wait_api.h
+            ${ARGV0}/platform/include/platform/platform.h
+            ${ARGV0}/platform/mbed-trace/include/mbed-trace/mbed_trace.h
+            ${ARGV0}/platform/mbed-trace/include/mbed-trace/ns_trace.h
+            ${ARGV0}/platform/mbed-trace/source/mbed_trace.c
+            ${ARGV0}/platform/randlib/include/mbed-client-randlib/platform/arm_hal_random.h
+            ${ARGV0}/platform/randlib/include/mbed-client-randlib/randLIB.h
+            ${ARGV0}/platform/randlib/source/randLIB.c
+            ${ARGV0}/platform/source/ATCmdParser.cpp
+            ${ARGV0}/platform/source/CThunkBase.cpp
+            ${ARGV0}/platform/source/CriticalSectionLock.cpp
+            ${ARGV0}/platform/source/DeepSleepLock.cpp
+            ${ARGV0}/platform/source/FileBase.cpp
+            ${ARGV0}/platform/source/FileHandle.cpp
+            ${ARGV0}/platform/source/FilePath.cpp
+            ${ARGV0}/platform/source/FileSystemHandle.cpp
+            ${ARGV0}/platform/source/LocalFileSystem.cpp
+            ${ARGV0}/platform/source/Stream.cpp
+            ${ARGV0}/platform/source/SysTimer.cpp
+            ${ARGV0}/platform/source/TARGET_CORTEX_M/TOOLCHAIN_GCC/except.S
+            ${ARGV0}/platform/source/TARGET_CORTEX_M/mbed_fault_handler.c
+            ${ARGV0}/platform/source/mbed_alloc_wrappers.cpp
+            ${ARGV0}/platform/source/mbed_application.c
+            ${ARGV0}/platform/source/mbed_assert.c
+            ${ARGV0}/platform/source/mbed_atomic_impl.c
+            ${ARGV0}/platform/source/mbed_board.c
+            ${ARGV0}/platform/source/mbed_crash_data_offsets.h
+            ${ARGV0}/platform/source/mbed_critical.c
+            ${ARGV0}/platform/source/mbed_error.c
+            ${ARGV0}/platform/source/mbed_error_hist.c
+            ${ARGV0}/platform/source/mbed_interface.c
+            ${ARGV0}/platform/source/mbed_mem_trace.cpp
+            ${ARGV0}/platform/source/mbed_mktime.c
+            ${ARGV0}/platform/source/mbed_mpu_mgmt.c
+            ${ARGV0}/platform/source/mbed_os_timer.cpp
+            ${ARGV0}/platform/source/mbed_poll.cpp
+            ${ARGV0}/platform/source/mbed_power_mgmt.c
+            ${ARGV0}/platform/source/mbed_retarget.cpp
+            ${ARGV0}/platform/source/mbed_rtc_time.cpp
+            ${ARGV0}/platform/source/mbed_sdk_boot.c
+            ${ARGV0}/platform/source/mbed_semihost_api.c
+            ${ARGV0}/platform/source/mbed_stats.c
+            ${ARGV0}/platform/source/mbed_thread.cpp
+            ${ARGV0}/platform/source/mbed_wait_api_no_rtos.c
+            ${ARGV0}/platform/source/minimal-printf/mbed_printf_armlink_overrides.c
+            ${ARGV0}/platform/source/minimal-printf/mbed_printf_implementation.c
+            ${ARGV0}/platform/source/minimal-printf/mbed_printf_implementation.h
+            ${ARGV0}/platform/source/minimal-printf/mbed_printf_wrapper.c
+            ${ARGV0}/platform/source/newlib_nano_malloc_workaround.c
+            ${ARGV0}/rtos/include/rtos/ConditionVariable.h
+            ${ARGV0}/rtos/include/rtos/EventFlags.h
+            ${ARGV0}/rtos/include/rtos/Kernel.h
+            ${ARGV0}/rtos/include/rtos/Mail.h
+            ${ARGV0}/rtos/include/rtos/MemoryPool.h
+            ${ARGV0}/rtos/include/rtos/Mutex.h
+            ${ARGV0}/rtos/include/rtos/Queue.h
+            ${ARGV0}/rtos/include/rtos/Semaphore.h
+            ${ARGV0}/rtos/include/rtos/ThisThread.h
+            ${ARGV0}/rtos/include/rtos/Thread.h
+            ${ARGV0}/rtos/include/rtos/internal/mbed_rtos1_types.h
+            ${ARGV0}/rtos/include/rtos/internal/mbed_rtos_storage.h
+            ${ARGV0}/rtos/include/rtos/mbed_rtos_types.h
+            ${ARGV0}/rtos/include/rtos/rtos.h
+            ${ARGV0}/rtos/source/ConditionVariable.cpp
+            ${ARGV0}/rtos/source/EventFlags.cpp
+            ${ARGV0}/rtos/source/Kernel.cpp
+            ${ARGV0}/rtos/source/Mutex.cpp
+            ${ARGV0}/rtos/source/Semaphore.cpp
+            ${ARGV0}/rtos/source/ThisThread.cpp
+            ${ARGV0}/rtos/source/Thread.cpp
+            ${ARGV0}/rtos/source/rtos_handlers.h
+            ${ARGV0}/rtos/source/rtos_idle.h
+            ${ARGV0}/storage/blockdevice/COMPONENT_FLASHIAP/include/FlashIAP/FlashIAPBlockDevice.h
+            ${ARGV0}/storage/blockdevice/COMPONENT_FLASHIAP/source/FlashIAPBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/include/blockdevice/BlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/BufferedBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ChainingBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ExhaustibleBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/FlashSimBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/HeapBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/MBRBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ObservingBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ProfilingBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ReadOnlyBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/SlicingBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/internal/SFDP.h
+            ${ARGV0}/storage/blockdevice/source/BufferedBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ChainingBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ExhaustibleBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/FlashSimBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/HeapBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/MBRBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ObservingBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ProfilingBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ReadOnlyBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/SFDP.cpp
+            ${ARGV0}/storage/blockdevice/source/SlicingBlockDevice.cpp
+            ${ARGV0}/storage/filesystem/fat/ChaN/diskio.h
+            ${ARGV0}/storage/filesystem/fat/ChaN/ff.cpp
+            ${ARGV0}/storage/filesystem/fat/ChaN/ff.h
+            ${ARGV0}/storage/filesystem/fat/ChaN/ffconf.h
+            ${ARGV0}/storage/filesystem/fat/ChaN/ffunicode.cpp
+            ${ARGV0}/storage/filesystem/fat/ChaN/integer.h
+            ${ARGV0}/storage/filesystem/fat/include/fat/FATFileSystem.h
+            ${ARGV0}/storage/filesystem/fat/source/FATFileSystem.cpp
+            ${ARGV0}/storage/filesystem/include/filesystem/Dir.h
+            ${ARGV0}/storage/filesystem/include/filesystem/File.h
+            ${ARGV0}/storage/filesystem/include/filesystem/FileSystem.h
+            ${ARGV0}/storage/filesystem/include/filesystem/mbed_filesystem.h
+            ${ARGV0}/storage/filesystem/littlefs/include/littlefs/LittleFileSystem.h
+            ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs.c
+            ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs.h
+            ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs_util.c
+            ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs_util.h
+            ${ARGV0}/storage/filesystem/littlefs/source/LittleFileSystem.cpp
+            ${ARGV0}/storage/filesystem/littlefsv2/include/littlefsv2/LittleFileSystem2.h
+            ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2.c
+            ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2.h
+            ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2_util.c
+            ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2_util.h
+            ${ARGV0}/storage/filesystem/littlefsv2/source/LittleFileSystem2.cpp
+            ${ARGV0}/storage/filesystem/source/Dir.cpp
+            ${ARGV0}/storage/filesystem/source/File.cpp
+            ${ARGV0}/storage/filesystem/source/FileSystem.cpp
+            ${ARGV0}/storage/kvstore/direct_access_devicekey/include/direct_access_devicekey/DirectAccessDevicekey.h
+            ${ARGV0}/storage/kvstore/direct_access_devicekey/source/DirectAccessDevicekey.cpp
+            ${ARGV0}/storage/kvstore/filesystemstore/include/filesystemstore/FileSystemStore.h
+            ${ARGV0}/storage/kvstore/filesystemstore/source/FileSystemStore.cpp
+            ${ARGV0}/storage/kvstore/include/kvstore/KVStore.h
+            ${ARGV0}/storage/kvstore/kv_config/include/kv_config/kv_config.h
+            ${ARGV0}/storage/kvstore/kv_config/source/kv_config.cpp
+            ${ARGV0}/storage/kvstore/kvstore_global_api/include/kvstore_global_api/KVMap.h
+            ${ARGV0}/storage/kvstore/kvstore_global_api/include/kvstore_global_api/kvstore_global_api.h
+            ${ARGV0}/storage/kvstore/kvstore_global_api/source/KVMap.cpp
+            ${ARGV0}/storage/kvstore/kvstore_global_api/source/kvstore_global_api.cpp
+            ${ARGV0}/storage/kvstore/securestore/include/securestore/SecureStore.h
+            ${ARGV0}/storage/kvstore/securestore/source/SecureStore.cpp
+            ${ARGV0}/storage/kvstore/tdbstore/include/tdbstore/TDBStore.h
+            ${ARGV0}/storage/kvstore/tdbstore/source/TDBStore.cpp
+            ${ARGV0}/storage/platform/source/PlatformStorage.cpp
+            ${ARGV0}/targets/TARGET_STM/PeripheralPins.h
+            ${ARGV0}/targets/TARGET_STM/PinNamesTypes.h
+            ${ARGV0}/targets/TARGET_STM/PortNames.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/PeripheralNames.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h723xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h725xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h730xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h730xxq.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h733xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h735xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h742xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h743xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h745xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h747xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h750xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h753xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h755xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h757xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7a3xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7a3xxq.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b0xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b0xxq.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b3xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b3xxq.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/system_stm32h7xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/Legacy/stm32_hal_legacy.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cec.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cec.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_comp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_comp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cordic.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cordic.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cortex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cortex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dcmi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dcmi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_def.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma2d.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma2d.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dsi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dsi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dts.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dts.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_exti.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_exti.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fdcan.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fdcan.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fmac.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fmac.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gfxmmu.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gfxmmu.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hcd.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hcd.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hrtim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hrtim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hsem.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hsem.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_iwdg.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_iwdg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_jpeg.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_jpeg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_lptim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_lptim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdios.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdios.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nand.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nand.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nor.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nor.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ospi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ospi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_otfdec.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_otfdec.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pssi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pssi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_qspi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_qspi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ramecc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ramecc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sdram.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sdram.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smbus.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smbus.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spdifrx.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spdifrx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sram.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sram.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_swpmi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_swpmi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_wwdg.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_wwdg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_adc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_adc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bdma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bdma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bus.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_comp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_comp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cordic.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cordic.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cortex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crs.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crs.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dac.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dac.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_delayblock.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_delayblock.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma2d.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma2d.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dmamux.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_exti.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_exti.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmac.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmac.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_gpio.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_gpio.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hrtim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hrtim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hsem.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_i2c.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_i2c.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_iwdg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lptim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lptim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lpuart.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lpuart.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_mdma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_mdma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_opamp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_opamp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_pwr.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_pwr.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rcc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rcc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rng.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rng.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rtc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rtc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_sdmmc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_sdmmc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_spi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_spi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_swpmi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_swpmi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_system.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_tim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_tim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usart.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usart.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usb.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usb.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_utils.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_utils.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_wwdg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/stm32h7xx_hal_conf.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/system_stm32h7xx_dualcore_boot_cm4_cm7.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/system_stm32h7xx_singlecore.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/PeripheralPins.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/PinNames.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/system_clock.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/cmsis_nvic.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/analogin_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/analogout_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/cmsis.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/flash_api.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/gpio_irq_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/gpio_irq_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/i2c_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/i2c_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/objects.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pin_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pwmout_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pwmout_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/serial_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/spi_api.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/spi_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/us_ticker_data.h
+            ${ARGV0}/targets/TARGET_STM/USBPhyHw.h
+            ${ARGV0}/targets/TARGET_STM/USBPhy_STM32.cpp
+            ${ARGV0}/targets/TARGET_STM/analogin_api.c
+            ${ARGV0}/targets/TARGET_STM/analogout_api.c
+            ${ARGV0}/targets/TARGET_STM/can_api.c
+            ${ARGV0}/targets/TARGET_STM/device.h
+            ${ARGV0}/targets/TARGET_STM/gpio_api.c
+            ${ARGV0}/targets/TARGET_STM/gpio_irq_api.c
+            ${ARGV0}/targets/TARGET_STM/gpio_object.h
+            ${ARGV0}/targets/TARGET_STM/hal_tick_overrides.c
+            ${ARGV0}/targets/TARGET_STM/i2c_api.c
+            ${ARGV0}/targets/TARGET_STM/lp_ticker.c
+            ${ARGV0}/targets/TARGET_STM/lp_ticker_defines.h
+            ${ARGV0}/targets/TARGET_STM/mbed_crc_api.c
+            ${ARGV0}/targets/TARGET_STM/mbed_overrides.c
+            ${ARGV0}/targets/TARGET_STM/mbed_rtx.h
+            ${ARGV0}/targets/TARGET_STM/nvic_addr.h
+            ${ARGV0}/targets/TARGET_STM/ospi_api.c
+            ${ARGV0}/targets/TARGET_STM/pinmap.c
+            ${ARGV0}/targets/TARGET_STM/port_api.c
+            ${ARGV0}/targets/TARGET_STM/pwmout_api.c
+            ${ARGV0}/targets/TARGET_STM/qspi_api.c
+            ${ARGV0}/targets/TARGET_STM/reset_reason.c
+            ${ARGV0}/targets/TARGET_STM/rtc_api.c
+            ${ARGV0}/targets/TARGET_STM/rtc_api_hal.h
+            ${ARGV0}/targets/TARGET_STM/serial_api.c
+            ${ARGV0}/targets/TARGET_STM/serial_api_hal.h
+            ${ARGV0}/targets/TARGET_STM/sleep.c
+            ${ARGV0}/targets/TARGET_STM/stm32_assert.h
+            ${ARGV0}/targets/TARGET_STM/stm_spi_api.c
+            ${ARGV0}/targets/TARGET_STM/trng_api.c
+            ${ARGV0}/targets/TARGET_STM/us_ticker.c
+            ${ARGV0}/targets/TARGET_STM/us_ticker_defines.h
+            ${ARGV0}/targets/TARGET_STM/watchdog_api.c
+            mbed_config.h
+            )
+endmacro()
+
+macro(target_include_directories_mbed)
+    target_include_directories(${ARGV0} PRIVATE
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/Legacy
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7
+            ${ARGV1}/targets/TARGET_STM
+            ${ARGV1}/storage/kvstore/tdbstore/include/tdbstore
+            ${ARGV1}/storage/kvstore/tdbstore/include
+            ${ARGV1}/storage/kvstore/tdbstore
+            ${ARGV1}/storage/kvstore/securestore/include/securestore
+            ${ARGV1}/storage/kvstore/securestore/include
+            ${ARGV1}/storage/kvstore/securestore
+            ${ARGV1}/storage/kvstore/kvstore_global_api/include/kvstore_global_api
+            ${ARGV1}/storage/kvstore/kvstore_global_api/include
+            ${ARGV1}/storage/kvstore/kvstore_global_api
+            ${ARGV1}/storage/kvstore/kv_config/include/kv_config
+            ${ARGV1}/storage/kvstore/kv_config/include
+            ${ARGV1}/storage/kvstore/kv_config
+            ${ARGV1}/storage/kvstore/include/kvstore
+            ${ARGV1}/storage/kvstore/include
+            ${ARGV1}/storage/kvstore/filesystemstore/include/filesystemstore
+            ${ARGV1}/storage/kvstore/filesystemstore/include
+            ${ARGV1}/storage/kvstore/filesystemstore
+            ${ARGV1}/storage/kvstore/direct_access_devicekey/include/direct_access_devicekey
+            ${ARGV1}/storage/kvstore/direct_access_devicekey/include
+            ${ARGV1}/storage/kvstore/direct_access_devicekey
+            ${ARGV1}/storage/kvstore
+            ${ARGV1}/storage/filesystem/littlefsv2/littlefs
+            ${ARGV1}/storage/filesystem/littlefsv2/include/littlefsv2
+            ${ARGV1}/storage/filesystem/littlefsv2/include
+            ${ARGV1}/storage/filesystem/littlefsv2
+            ${ARGV1}/storage/filesystem/littlefs/littlefs
+            ${ARGV1}/storage/filesystem/littlefs/include/littlefs
+            ${ARGV1}/storage/filesystem/littlefs/include
+            ${ARGV1}/storage/filesystem/littlefs
+            ${ARGV1}/storage/filesystem/include/filesystem
+            ${ARGV1}/storage/filesystem/include
+            ${ARGV1}/storage/filesystem/fat/include/fat
+            ${ARGV1}/storage/filesystem/fat/include
+            ${ARGV1}/storage/filesystem/fat/ChaN
+            ${ARGV1}/storage/filesystem/fat
+            ${ARGV1}/storage/filesystem
+            ${ARGV1}/storage/blockdevice/include/blockdevice/internal
+            ${ARGV1}/storage/blockdevice/include/blockdevice
+            ${ARGV1}/storage/blockdevice/include
+            ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP/include/FlashIAP
+            ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP/include
+            ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP
+            ${ARGV1}/storage/blockdevice
+            ${ARGV1}/storage
+            ${ARGV1}/rtos/source
+            ${ARGV1}/rtos/include/rtos/internal
+            ${ARGV1}/rtos/include/rtos
+            ${ARGV1}/rtos/include
+            ${ARGV1}/rtos
+            ${ARGV1}/platform/source/minimal-printf
+            ${ARGV1}/platform/source
+            ${ARGV1}/platform/randlib/include/mbed-client-randlib/platform
+            ${ARGV1}/platform/randlib/include/mbed-client-randlib
+            ${ARGV1}/platform/randlib/include
+            ${ARGV1}/platform/randlib
+            ${ARGV1}/platform/mbed-trace/include/mbed-trace
+            ${ARGV1}/platform/mbed-trace/include
+            ${ARGV1}/platform/mbed-trace
+            ${ARGV1}/platform/include/platform/internal
+            ${ARGV1}/platform/include/platform
+            ${ARGV1}/platform/include
+            ${ARGV1}/platform/cxxsupport
+            ${ARGV1}/platform
+            ${ARGV1}/hal/usb/include/usb
+            ${ARGV1}/hal/usb/include
+            ${ARGV1}/hal/usb
+            ${ARGV1}/hal/include/hal
+            ${ARGV1}/hal/include
+            ${ARGV1}/hal
+            ${ARGV1}/features/frameworks/utest/utest
+            ${ARGV1}/features/frameworks/utest
+            ${ARGV1}/features/frameworks/unity/unity
+            ${ARGV1}/features/frameworks/unity
+            ${ARGV1}/features/frameworks/mbed-client-cli/mbed-client-cli
+            ${ARGV1}/features/frameworks/mbed-client-cli
+            ${ARGV1}/features/frameworks/greentea-client/greentea-client
+            ${ARGV1}/features/frameworks/greentea-client
+            ${ARGV1}/features/frameworks
+            ${ARGV1}/features
+            ${ARGV1}/events/include/events/internal
+            ${ARGV1}/events/include/events
+            ${ARGV1}/events/include
+            ${ARGV1}/events
+            ${ARGV1}/drivers/usb/include/usb/internal
+            ${ARGV1}/drivers/usb/include/usb
+            ${ARGV1}/drivers/usb/include
+            ${ARGV1}/drivers/usb
+            ${ARGV1}/drivers/include/drivers/interfaces
+            ${ARGV1}/drivers/include/drivers
+            ${ARGV1}/drivers/include
+            ${ARGV1}/drivers/device_key/include/device_key
+            ${ARGV1}/drivers/device_key/include
+            ${ARGV1}/drivers/device_key
+            ${ARGV1}/drivers
+            ${ARGV1}/connectivity/nfc/libraries/stack/transceiver
+            ${ARGV1}/connectivity/nfc/libraries/stack/tech/type4
+            ${ARGV1}/connectivity/nfc/libraries/stack/tech/isodep
+            ${ARGV1}/connectivity/nfc/libraries/stack/tech/iso7816
+            ${ARGV1}/connectivity/nfc/libraries/stack/tech
+            ${ARGV1}/connectivity/nfc/libraries/stack/platform
+            ${ARGV1}/connectivity/nfc/libraries/stack/ndef
+            ${ARGV1}/connectivity/nfc/libraries/stack
+            ${ARGV1}/connectivity/nfc/libraries/acore/acore
+            ${ARGV1}/connectivity/nfc/libraries/acore
+            ${ARGV1}/connectivity/nfc/libraries
+            ${ARGV1}/connectivity/nfc/include/nfc/ndef/common
+            ${ARGV1}/connectivity/nfc/include/nfc/ndef
+            ${ARGV1}/connectivity/nfc/include/nfc
+            ${ARGV1}/connectivity/nfc/include
+            ${ARGV1}/connectivity/nfc
+            ${ARGV1}/connectivity/netsocket/include/netsocket
+            ${ARGV1}/connectivity/netsocket/include
+            ${ARGV1}/connectivity/netsocket
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libNET/src
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libNET
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libDHCPv6
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/ipv6_stack
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/configs/base
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/configs
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/whiteboard
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/utils
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/random_early_detection
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/pan_blacklist
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/nist_aes_kw
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/nd_proxy
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mle_service
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/stack
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/serial
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/poll
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/mdns
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/dns
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port/cpu
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port/compiler
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mac_neighbor_table
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/load_balance
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/ieee_802_11
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/hmac
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/fnv_hash
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/fhss
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/etx
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/blacklist
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/Trickle
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/Neighbor_cache
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/tls_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/radius_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/msg_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/key_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/gkh_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/fwh_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/eap_tls_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/kmp
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/eapol
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/TLS
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/PANA
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/Common
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/RPL
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/NWK_INTERFACE/Include
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/NWK_INTERFACE
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MPL
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MLE
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC/virtual_rf
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC/IEEE802_15_4
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/DHCPv6_client
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/DHCPv6_Server
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Core/include
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Core
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Common_Protocols
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/BorderRouter
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/ws
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Thread
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/NVM
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/ND
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Mesh
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/MAC
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/IPHC_Decode
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Fragmentation
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Bootstraps
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/nanostack/platform
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/nanostack
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/source
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/nanostack-event-loop/platform
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/nanostack-event-loop
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack
+            ${ARGV1}/connectivity/nanostack/nanostack-hal-mbed-cmsis-rtos
+            ${ARGV1}/connectivity/nanostack/mbed-mesh-api/source/include
+            ${ARGV1}/connectivity/nanostack/mbed-mesh-api/source
+            ${ARGV1}/connectivity/nanostack/mbed-mesh-api/mbed-mesh-api
+            ${ARGV1}/connectivity/nanostack/mbed-mesh-api
+            ${ARGV1}/connectivity/nanostack/include/nanostack-interface
+            ${ARGV1}/connectivity/nanostack/include
+            ${ARGV1}/connectivity/nanostack/coap-service/source/include
+            ${ARGV1}/connectivity/nanostack/coap-service/source
+            ${ARGV1}/connectivity/nanostack/coap-service/coap-service
+            ${ARGV1}/connectivity/nanostack/coap-service
+            ${ARGV1}/connectivity/nanostack
+            ${ARGV1}/connectivity/mbedtls/source
+            ${ARGV1}/connectivity/mbedtls/platform/inc
+            ${ARGV1}/connectivity/mbedtls/platform
+            ${ARGV1}/connectivity/mbedtls/include/mbedtls
+            ${ARGV1}/connectivity/mbedtls/include
+            ${ARGV1}/connectivity/mbedtls
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/netif
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip/prot
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip/priv
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/sys
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/net
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/arpa
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include
+            ${ARGV1}/connectivity/lwipstack/lwip/src
+            ${ARGV1}/connectivity/lwipstack/lwip-sys/arch
+            ${ARGV1}/connectivity/lwipstack/lwip-sys
+            ${ARGV1}/connectivity/lwipstack/lwip
+            ${ARGV1}/connectivity/lwipstack/include/lwipstack
+            ${ARGV1}/connectivity/lwipstack/include
+            ${ARGV1}/connectivity/lwipstack
+            ${ARGV1}/connectivity/lorawan/system
+            ${ARGV1}/connectivity/lorawan/lorastack/phy
+            ${ARGV1}/connectivity/lorawan/lorastack/mac
+            ${ARGV1}/connectivity/lorawan/lorastack
+            ${ARGV1}/connectivity/lorawan/include/lorawan
+            ${ARGV1}/connectivity/lorawan/include
+            ${ARGV1}/connectivity/lorawan
+            ${ARGV1}/connectivity/libraries/ppp/include/ppp
+            ${ARGV1}/connectivity/libraries/ppp/include/polarssl
+            ${ARGV1}/connectivity/libraries/ppp/include
+            ${ARGV1}/connectivity/libraries/ppp
+            ${ARGV1}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform
+            ${ARGV1}/connectivity/libraries/nanostack-libservice/mbed-client-libservice
+            ${ARGV1}/connectivity/libraries/nanostack-libservice
+            ${ARGV1}/connectivity/libraries/mbed-coap/source/include
+            ${ARGV1}/connectivity/libraries/mbed-coap/source
+            ${ARGV1}/connectivity/libraries/mbed-coap/mbed-coap
+            ${ARGV1}/connectivity/libraries/mbed-coap
+            ${ARGV1}/connectivity/libraries
+            ${ARGV1}/connectivity/drivers/wifi/esp8266-driver/ESP8266
+            ${ARGV1}/connectivity/drivers/wifi/esp8266-driver
+            ${ARGV1}/connectivity/drivers/wifi
+            ${ARGV1}/connectivity/drivers/nfc/PN512/source/transceiver
+            ${ARGV1}/connectivity/drivers/nfc/PN512/source
+            ${ARGV1}/connectivity/drivers/nfc/PN512/include/nfc/controllers
+            ${ARGV1}/connectivity/drivers/nfc/PN512/include/nfc
+            ${ARGV1}/connectivity/drivers/nfc/PN512/include
+            ${ARGV1}/connectivity/drivers/nfc/PN512
+            ${ARGV1}/connectivity/drivers/nfc
+            ${ARGV1}/connectivity/drivers/mbedtls/TARGET_STM
+            ${ARGV1}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742
+            ${ARGV1}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7
+            ${ARGV1}/connectivity/drivers/emac/TARGET_STM
+            ${ARGV1}/connectivity/drivers/cellular/UBLOX/PPP
+            ${ARGV1}/connectivity/drivers/cellular/UBLOX/N2XX
+            ${ARGV1}/connectivity/drivers/cellular/UBLOX/AT
+            ${ARGV1}/connectivity/drivers/cellular/UBLOX
+            ${ARGV1}/connectivity/drivers/cellular/TELIT/ME910
+            ${ARGV1}/connectivity/drivers/cellular/TELIT/ME310
+            ${ARGV1}/connectivity/drivers/cellular/TELIT/HE910
+            ${ARGV1}/connectivity/drivers/cellular/TELIT
+            ${ARGV1}/connectivity/drivers/cellular/RiotMicro/AT
+            ${ARGV1}/connectivity/drivers/cellular/RiotMicro
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/UG96
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/M26
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/EC2X
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/BG96
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/BC95
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL
+            ${ARGV1}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP
+            ${ARGV1}/connectivity/drivers/cellular/MultiTech/DragonflyNano
+            ${ARGV1}/connectivity/drivers/cellular/MultiTech
+            ${ARGV1}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP
+            ${ARGV1}/connectivity/drivers/cellular/GENERIC
+            ${ARGV1}/connectivity/drivers/cellular/GEMALTO/CINTERION
+            ${ARGV1}/connectivity/drivers/cellular/GEMALTO
+            ${ARGV1}/connectivity/drivers/cellular/Altair/ALT1250/PPP
+            ${ARGV1}/connectivity/drivers/cellular/Altair/ALT1250
+            ${ARGV1}/connectivity/drivers/cellular/Altair
+            ${ARGV1}/connectivity/drivers/cellular
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/stm-s2lp-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/mcr20a-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/atmel-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF
+            ${ARGV1}/connectivity/drivers
+            ${ARGV1}/connectivity/cellular/include/cellular/framework/device
+            ${ARGV1}/connectivity/cellular/include/cellular/framework/common
+            ${ARGV1}/connectivity/cellular/include/cellular/framework/AT
+            ${ARGV1}/connectivity/cellular/include/cellular/framework/API
+            ${ARGV1}/connectivity/cellular/include/cellular/framework
+            ${ARGV1}/connectivity/cellular/include/cellular
+            ${ARGV1}/connectivity/cellular/include
+            ${ARGV1}/connectivity/cellular
+            ${ARGV1}/connectivity
+            ${ARGV1}/cmsis/device/rtos/include
+            ${ARGV1}/cmsis/device/rtos
+            ${ARGV1}/cmsis/device/RTE/include
+            ${ARGV1}/cmsis/device/RTE
+            ${ARGV1}/cmsis/device
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include1
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/Include
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS
+            ${ARGV1}/cmsis/CMSIS_5
+            ${ARGV1}/cmsis
+            ${ARGV1}
+            )
+
+endmacro()
diff --git a/onert-micro/tests/mbed-os/mbed_config.h b/onert-micro/tests/mbed-os/mbed_config.h
new file mode 100644
index 000000000..5649f461f
--- /dev/null
+++ b/onert-micro/tests/mbed-os/mbed_config.h
@@ -0,0 +1,488 @@
+/*
+ * mbed SDK
+ * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Automatically generated configuration file.
+// DO NOT EDIT, content will be overwritten.
+
+#ifndef __MBED_CONFIG_DATA__
+#define __MBED_CONFIG_DATA__
+
+// Configuration parameters
+#define CLOCK_SOURCE USE_PLL_HSE_EXTC | USE_PLL_HSI         // set by target:MCU_STM32H7
+#define HSE_VALUE 8000000                                   // set by target:NUCLEO_H743ZI2
+#define LPTICKER_DELAY_TICKS 0                              // set by target:MCU_STM32H7
+#define MBED_CONF_ALT1250_PPP_BAUDRATE 115200               // set by library:ALT1250_PPP
+#define MBED_CONF_ALT1250_PPP_PROVIDE_DEFAULT 0             // set by library:ALT1250_PPP
+#define MBED_CONF_ATMEL_RF_ASSUME_SPACED_SPI 1              // set by library:atmel-rf[STM]
+#define MBED_CONF_ATMEL_RF_FULL_SPI_SPEED 7500000           // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_FULL_SPI_SPEED_BYTE_SPACING 250  // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_IRQ_THREAD_STACK_SIZE 1024       // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_LOW_SPI_SPEED 3750000            // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_PROVIDE_DEFAULT 0                // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_USE_SPI_SPACING_API 0            // set by library:atmel-rf
+#define MBED_CONF_CELLULAR_AT_HANDLER_BUFFER_SIZE 32        // set by library:cellular
+#define MBED_CONF_CELLULAR_CONTROL_PLANE_OPT 0              // set by library:cellular
+#define MBED_CONF_CELLULAR_DEBUG_AT 0                       // set by library:cellular
+#define MBED_CONF_CELLULAR_MAX_CP_DATA_RECV_LEN 1358        // set by library:cellular
+#define MBED_CONF_CELLULAR_PRESENT 1                        // set by library:cellular
+#define MBED_CONF_CELLULAR_RANDOM_MAX_START_DELAY 0         // set by library:cellular
+#define MBED_CONF_CELLULAR_USE_APN_LOOKUP 0                 // set by library:cellular
+#define MBED_CONF_CELLULAR_USE_SMS 0                        // set by library:cellular
+#define MBED_CONF_DRIVERS_OSPI_CSN OSPI_FLASH1_CSN          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_DQS OSPI_FLASH1_DQS          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO0 OSPI_FLASH1_IO0          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO1 OSPI_FLASH1_IO1          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO2 OSPI_FLASH1_IO2          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO3 OSPI_FLASH1_IO3          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO4 OSPI_FLASH1_IO4          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO5 OSPI_FLASH1_IO5          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO6 OSPI_FLASH1_IO6          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO7 OSPI_FLASH1_IO7          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_SCK OSPI_FLASH1_SCK          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_CSN QSPI_FLASH1_CSN          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO0 QSPI_FLASH1_IO0          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO1 QSPI_FLASH1_IO1          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO2 QSPI_FLASH1_IO2          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO3 QSPI_FLASH1_IO3          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_SCK QSPI_FLASH1_SCK          // set by library:drivers
+#define MBED_CONF_DRIVERS_UART_SERIAL_RXBUF_SIZE 256        // set by library:drivers
+#define MBED_CONF_DRIVERS_UART_SERIAL_TXBUF_SIZE 256        // set by library:drivers
+#define MBED_CONF_ESP8266_BUILT_IN_DNS 0                    // set by library:esp8266
+#define MBED_CONF_ESP8266_DEBUG 0                           // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_OFF_TIME_MS 3               // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_ON_POLARITY 0               // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_ON_TIME_MS 3                // set by library:esp8266
+#define MBED_CONF_ESP8266_PROVIDE_DEFAULT 0                 // set by library:esp8266
+#define MBED_CONF_ESP8266_SERIAL_BAUDRATE 115200            // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_ENABLE 0                     // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER0 ""                   // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER1 ""                   // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER2 ""                   // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_TIMEZONE 0                   // set by library:esp8266
+#define MBED_CONF_ESP8266_SOCKET_BUFSIZE 8192               // set by library:esp8266
+#define MBED_CONF_EVENTS_PRESENT 1                          // set by library:events
+#define MBED_CONF_EVENTS_SHARED_DISPATCH_FROM_APPLICATION 0 // set by library:events
+#define MBED_CONF_EVENTS_SHARED_EVENTSIZE 768               // set by library:events
+#define MBED_CONF_EVENTS_SHARED_HIGHPRIO_EVENTSIZE 256      // set by library:events
+#define MBED_CONF_EVENTS_SHARED_HIGHPRIO_STACKSIZE 1024     // set by library:events
+#define MBED_CONF_EVENTS_SHARED_STACKSIZE 2048              // set by library:events
+#define MBED_CONF_EVENTS_USE_LOWPOWER_TIMER_TICKER 0        // set by library:events
+#define MBED_CONF_FAT_CHAN_FFS_DBG 0                        // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_CODE_PAGE 437                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_EXFAT 0                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_HEAPBUF 1                  // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_LOCK 0                     // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_MINIMIZE 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_NOFSINFO 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_NORTC 0                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_READONLY 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_REENTRANT 0                // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_RPATH 1                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_TIMEOUT 1000               // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_TINY 1                     // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_LFN_BUF 255                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_LFN_UNICODE 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MAX_LFN 255                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MAX_SS 4096                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MIN_SS 512                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MULTI_PARTITION 0             // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_MDAY 1                  // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_MON 1                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_YEAR 2017               // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_PRINT_FLOAT 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_PRINT_LLI 0                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_SFN_BUF 12                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_STRF_ENCODE 3                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_STR_VOLUME_ID 0               // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_SYNC_T HANDLE                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_CHMOD 0                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_EXPAND 0                  // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FASTSEEK 0                // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FIND 0                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FORWARD 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_LABEL 0                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_LFN 3                     // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_MKFS 1                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_STRFUNC 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_TRIM 1                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_VOLUMES 4                     // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_VOLUME_STRS \
+  "RAM", "NAND", "CF", "SD", "SD2", "USB", "USB2", "USB3" // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FLUSH_ON_NEW_CLUSTER 0         // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FLUSH_ON_NEW_SECTOR 1          // set by library:fat_chan
+#define MBED_CONF_FILESYSTEM_PRESENT 1                    // set by library:filesystem
+#define MBED_CONF_FLASHIAP_BLOCK_DEVICE_BASE_ADDRESS \
+  0xFFFFFFFF                                          // set by library:flashiap-block-device
+#define MBED_CONF_FLASHIAP_BLOCK_DEVICE_SIZE 0        // set by library:flashiap-block-device
+#define MBED_CONF_GEMALTO_CINTERION_BAUDRATE 115200   // set by library:GEMALTO_CINTERION
+#define MBED_CONF_GEMALTO_CINTERION_PROVIDE_DEFAULT 0 // set by library:GEMALTO_CINTERION
+#define MBED_CONF_GENERIC_AT3GPP_BAUDRATE 115200      // set by library:GENERIC_AT3GPP
+#define MBED_CONF_GENERIC_AT3GPP_PROVIDE_DEFAULT 0    // set by library:GENERIC_AT3GPP
+#define MBED_CONF_LORA_ADR_ON 1                       // set by library:lora
+#define MBED_CONF_LORA_APPLICATION_EUI             \
+  {                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  } // set by library:lora
+#define MBED_CONF_LORA_APPLICATION_KEY                                                             \
+  {                                                                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  } // set by library:lora
+#define MBED_CONF_LORA_APPSKEY                                                                     \
+  {                                                                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  }                                               // set by library:lora
+#define MBED_CONF_LORA_APP_PORT 15                // set by library:lora
+#define MBED_CONF_LORA_AUTOMATIC_UPLINK_MESSAGE 1 // set by library:lora
+#define MBED_CONF_LORA_DEVICE_ADDRESS 0x00000000  // set by library:lora
+#define MBED_CONF_LORA_DEVICE_EUI                  \
+  {                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  }                                               // set by library:lora
+#define MBED_CONF_LORA_DOWNLINK_PREAMBLE_LENGTH 5 // set by library:lora
+#define MBED_CONF_LORA_DUTY_CYCLE_ON 1            // set by library:lora
+#define MBED_CONF_LORA_DUTY_CYCLE_ON_JOIN 1       // set by library:lora
+#define MBED_CONF_LORA_FSB_MASK            \
+  {                                        \
+    0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x00FF \
+  } // set by library:lora
+#define MBED_CONF_LORA_FSB_MASK_CHINA              \
+  {                                                \
+    0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF \
+  }                                       // set by library:lora
+#define MBED_CONF_LORA_LBT_ON 0           // set by library:lora
+#define MBED_CONF_LORA_MAX_SYS_RX_ERROR 5 // set by library:lora
+#define MBED_CONF_LORA_NB_TRIALS 12       // set by library:lora
+#define MBED_CONF_LORA_NWKSKEY                                                                     \
+  {                                                                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  }                                                               // set by library:lora
+#define MBED_CONF_LORA_OVER_THE_AIR_ACTIVATION 1                  // set by library:lora
+#define MBED_CONF_LORA_PHY EU868                                  // set by library:lora
+#define MBED_CONF_LORA_PUBLIC_NETWORK 1                           // set by library:lora
+#define MBED_CONF_LORA_TX_MAX_SIZE 64                             // set by library:lora
+#define MBED_CONF_LORA_UPLINK_PREAMBLE_LENGTH 8                   // set by library:lora
+#define MBED_CONF_LORA_WAKEUP_TIME 5                              // set by library:lora
+#define MBED_CONF_LWIP_ADDR_TIMEOUT 5                             // set by library:lwip
+#define MBED_CONF_LWIP_ADDR_TIMEOUT_MODE 1                        // set by library:lwip
+#define MBED_CONF_LWIP_DEBUG_ENABLED 0                            // set by library:lwip
+#define MBED_CONF_LWIP_DEFAULT_THREAD_STACKSIZE 512               // set by library:lwip
+#define MBED_CONF_LWIP_DHCP_TIMEOUT 60                            // set by library:lwip
+#define MBED_CONF_LWIP_ENABLE_PPP_TRACE 0                         // set by library:lwip
+#define MBED_CONF_LWIP_ETHERNET_ENABLED 1                         // set by library:lwip
+#define MBED_CONF_LWIP_IPV4_ENABLED 1                             // set by library:lwip
+#define MBED_CONF_LWIP_IPV6_ENABLED 0                             // set by library:lwip
+#define MBED_CONF_LWIP_IP_VER_PREF 4                              // set by library:lwip
+#define MBED_CONF_LWIP_L3IP_ENABLED 0                             // set by library:lwip
+#define MBED_CONF_LWIP_MBOX_SIZE 8                                // set by library:lwip
+#define MBED_CONF_LWIP_MEMP_NUM_TCPIP_MSG_INPKT 8                 // set by library:lwip
+#define MBED_CONF_LWIP_MEMP_NUM_TCP_SEG 16                        // set by library:lwip
+#define MBED_CONF_LWIP_MEM_SIZE 2310                              // set by library:lwip[STM]
+#define MBED_CONF_LWIP_ND6_QUEUEING 0                             // set by library:lwip
+#define MBED_CONF_LWIP_ND6_RDNSS_MAX_DNS_SERVERS 0                // set by library:lwip
+#define MBED_CONF_LWIP_NUM_NETBUF 8                               // set by library:lwip
+#define MBED_CONF_LWIP_NUM_PBUF 8                                 // set by library:lwip
+#define MBED_CONF_LWIP_PBUF_POOL_SIZE 5                           // set by library:lwip
+#define MBED_CONF_LWIP_PPP_ENABLED 0                              // set by library:lwip
+#define MBED_CONF_LWIP_PPP_IPV4_ENABLED 0                         // set by library:lwip
+#define MBED_CONF_LWIP_PPP_IPV6_ENABLED 0                         // set by library:lwip
+#define MBED_CONF_LWIP_PPP_THREAD_STACKSIZE 768                   // set by library:lwip
+#define MBED_CONF_LWIP_PRESENT 1                                  // set by library:lwip
+#define MBED_CONF_LWIP_RAW_SOCKET_ENABLED 0                       // set by library:lwip
+#define MBED_CONF_LWIP_SOCKET_MAX 4                               // set by library:lwip
+#define MBED_CONF_LWIP_TCPIP_THREAD_PRIORITY osPriorityNormal     // set by library:lwip
+#define MBED_CONF_LWIP_TCPIP_THREAD_STACKSIZE 1200                // set by library:lwip
+#define MBED_CONF_LWIP_TCP_CLOSE_TIMEOUT 1000                     // set by library:lwip
+#define MBED_CONF_LWIP_TCP_ENABLED 1                              // set by library:lwip
+#define MBED_CONF_LWIP_TCP_MAXRTX 6                               // set by library:lwip
+#define MBED_CONF_LWIP_TCP_MSS 536                                // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SERVER_MAX 4                           // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SND_BUF (2 * TCP_MSS)                  // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SOCKET_MAX 4                           // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SYNMAXRTX 6                            // set by library:lwip
+#define MBED_CONF_LWIP_TCP_WND (4 * TCP_MSS)                      // set by library:lwip
+#define MBED_CONF_LWIP_UDP_SOCKET_MAX 4                           // set by library:lwip
+#define MBED_CONF_LWIP_USE_MBED_TRACE 0                           // set by library:lwip
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL 0              // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL_MASK 0x7fff800 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL_PAGE 0         // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_DEVICE_TYPE \
+  NET_6LOWPAN_ROUTER                                           // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PANID_FILTER 0xffff // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PSK_KEY                                                 \
+  {                                                                                                \
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf \
+  }                                                                  // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PSK_KEY_ID 1              // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_SECURITY_MODE NONE        // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_SEC_LEVEL 5               // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_HEAP_SIZE 32500                      // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_HEAP_STAT_INFO NULL                  // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_MAC_NEIGH_TABLE_SIZE 32              // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_COUNT 3                 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_IMAX 30                 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_IMIN 20                 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_SYSTEM_TIME_UPDATE_FROM_NANOSTACK 1  // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL 22             // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL_MASK 0x7fff800 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL_PAGE 0         // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_COMMISSIONING_DATASET_TIMESTAMP \
+  0x10000 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_EXTENDED_PANID \
+  {                                                          \
+    0xf1, 0xb5, 0xa1, 0xb2, 0xc4, 0xd5, 0xa1, 0xbd           \
+  } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_ML_PREFIX \
+  {                                                     \
+    0xfd, 0x0, 0x0d, 0xb8, 0x0, 0x0, 0x0, 0x0           \
+  } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_NETWORK_NAME \
+  "Thread Network"                                         // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_PANID 0x0700 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_PSKC                                                 \
+  {                                                                                                \
+    0xc8, 0xa6, 0x2e, 0xae, 0xf3, 0x68, 0xf3, 0x46, 0xa9, 0x9e, 0x57, 0x85, 0x98, 0x9d, 0x1c, 0xd0 \
+  } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_DEVICE_TYPE \
+  MESH_DEVICE_TYPE_THREAD_ROUTER // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_MASTER_KEY                                                  \
+  {                                                                                                \
+    0x10, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff \
+  }                                                             // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_PSKD "ABCDEFGH"          // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_SECURITY_POLICY 255      // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_USE_STATIC_LINK_CONFIG 1 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_USE_MALLOC_FOR_HEAP 0           // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_CHANNEL_FUNCTION 255   // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_DWELL_INTERVAL 0       // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_FIXED_CHANNEL 65535    // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_INTERVAL 0             // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_CHANNEL_PLAN_ID 255       // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_DEVICE_TYPE \
+  MESH_DEVICE_TYPE_WISUN_ROUTER                                     // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_NETWORK_NAME "Wi-SUN Network" // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_OPERATING_CLASS 255           // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_OPERATING_MODE 255            // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_PHY_MODE_ID 255               // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_REGULATORY_DOMAIN 3           // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_CHANNEL_FUNCTION 255       // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_DWELL_INTERVAL 255         // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_FIXED_CHANNEL 65535        // set by library:mbed-mesh-api
+#define MBED_CONF_MCR20A_PROVIDE_DEFAULT 0                          // set by library:mcr20a
+#define MBED_CONF_NANOSTACK_CONFIGURATION nanostack_full            // set by library:nanostack
+#define MBED_CONF_NANOSTACK_HAL_CRITICAL_SECTION_USABLE_FROM_INTERRUPT \
+  0 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_DISPATCH_FROM_APPLICATION \
+  0                                                               // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_THREAD_STACK_SIZE 6144 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_USE_MBED_EVENTS 0      // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_KVSTORE_PATH "/kv/"               // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_USE_KVSTORE 0                     // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_LIBSERVICE_NSDYNMEM_TRACKER_ENABLED \
+  0                                                          // set by library:nanostack-libservice
+#define MBED_CONF_NANOSTACK_LIBSERVICE_PRESENT 1             // set by library:nanostack-libservice
+#define MBED_CONF_NSAPI_ADD_EVENT_LISTENER_RETURN_CHANGE 0   // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_MESH_TYPE THREAD             // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_STACK LWIP                   // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_WIFI_SECURITY NONE           // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_ADDRESSES_LIMIT 10               // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_CACHE_SIZE 3                     // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_RESPONSE_WAIT_TIME 10000         // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_RETRIES 1                        // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_TOTAL_ATTEMPTS 10                // set by library:nsapi
+#define MBED_CONF_NSAPI_PRESENT 1                            // set by library:nsapi
+#define MBED_CONF_NSAPI_SOCKET_STATS_ENABLED 0               // set by library:nsapi
+#define MBED_CONF_NSAPI_SOCKET_STATS_MAX_COUNT 10            // set by library:nsapi
+#define MBED_CONF_PLATFORM_CALLBACK_COMPARABLE 1             // set by library:platform
+#define MBED_CONF_PLATFORM_CALLBACK_NONTRIVIAL 0             // set by library:platform
+#define MBED_CONF_PLATFORM_CRASH_CAPTURE_ENABLED 0           // set by library:platform
+#define MBED_CONF_PLATFORM_CTHUNK_COUNT_MAX 8                // set by library:platform
+#define MBED_CONF_PLATFORM_DEEPSLEEP_STATS_VERBOSE 0         // set by library:platform[STM]
+#define MBED_CONF_PLATFORM_DEFAULT_SERIAL_BAUD_RATE 9600     // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_ALL_THREADS_INFO 0          // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_FILENAME_CAPTURE_ENABLED 0  // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_HIST_ENABLED 0              // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_HIST_SIZE 4                 // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_REBOOT_MAX 1                // set by library:platform
+#define MBED_CONF_PLATFORM_FATAL_ERROR_AUTO_REBOOT_ENABLED 0 // set by library:platform
+#define MBED_CONF_PLATFORM_MAX_ERROR_FILENAME_LEN 16         // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_ENABLE_64_BIT 1    // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_ENABLE_FLOATING_POINT 0 // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_SET_FLOATING_POINT_MAX_DECIMALS \
+  6                                                              // set by library:platform
+#define MBED_CONF_PLATFORM_POLL_USE_LOWPOWER_TIMER 0             // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_BAUD_RATE 9600                  // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_BUFFERED_SERIAL 0               // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_CONVERT_NEWLINES 1              // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_CONVERT_TTY_NEWLINES 1          // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_FLUSH_AT_EXIT 1                 // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_MINIMAL_CONSOLE_ONLY 0          // set by library:platform
+#define MBED_CONF_PLATFORM_USE_MPU 1                             // set by library:platform
+#define MBED_CONF_PPP_ENABLED 0                                  // set by library:ppp
+#define MBED_CONF_PPP_ENABLE_TRACE 0                             // set by library:ppp
+#define MBED_CONF_PPP_IPV4_ENABLED 1                             // set by library:ppp
+#define MBED_CONF_PPP_IPV6_ENABLED 0                             // set by library:ppp
+#define MBED_CONF_PPP_MBED_EVENT_QUEUE 0                         // set by library:ppp
+#define MBED_CONF_PPP_THREAD_STACKSIZE 816                       // set by library:ppp
+#define MBED_CONF_QUECTEL_BC95_BAUDRATE 9600                     // set by library:QUECTEL_BC95
+#define MBED_CONF_QUECTEL_BC95_PROVIDE_DEFAULT 0                 // set by library:QUECTEL_BC95
+#define MBED_CONF_QUECTEL_BG96_BAUDRATE 115200                   // set by library:QUECTEL_BG96
+#define MBED_CONF_QUECTEL_BG96_PROVIDE_DEFAULT 0                 // set by library:QUECTEL_BG96
+#define MBED_CONF_QUECTEL_EC2X_BAUDRATE 115200                   // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_EC2X_PROVIDE_DEFAULT 0                 // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_EC2X_START_TIMEOUT 15000               // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_M26_BAUDRATE 115200                    // set by library:QUECTEL_M26
+#define MBED_CONF_QUECTEL_M26_PROVIDE_DEFAULT 0                  // set by library:QUECTEL_M26
+#define MBED_CONF_QUECTEL_UG96_BAUDRATE 115200                   // set by library:QUECTEL_UG96
+#define MBED_CONF_QUECTEL_UG96_PROVIDE_DEFAULT 0                 // set by library:QUECTEL_UG96
+#define MBED_CONF_RM1000_AT_BAUDRATE 230400                      // set by library:RM1000_AT
+#define MBED_CONF_RM1000_AT_PROVIDE_DEFAULT 0                    // set by library:RM1000_AT
+#define MBED_CONF_RTOS_API_PRESENT 1                             // set by library:rtos-api
+#define MBED_CONF_RTOS_ENABLE_ALL_RTX_EVENTS 0                   // set by library:rtos
+#define MBED_CONF_RTOS_EVFLAGS_NUM 0                             // set by library:rtos
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE 512                // set by library:rtos
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE_DEBUG_EXTRA 128    // set by library:rtos[STM]
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE_TICKLESS_EXTRA 256 // set by library:rtos
+#define MBED_CONF_RTOS_MAIN_THREAD_STACK_SIZE 4096               // set by library:rtos
+#define MBED_CONF_RTOS_MSGQUEUE_DATA_SIZE 0                      // set by library:rtos
+#define MBED_CONF_RTOS_MSGQUEUE_NUM 0                            // set by library:rtos
+#define MBED_CONF_RTOS_MUTEX_NUM 0                               // set by library:rtos
+#define MBED_CONF_RTOS_PRESENT 1                                 // set by library:rtos
+#define MBED_CONF_RTOS_SEMAPHORE_NUM 0                           // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_NUM 0                              // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_STACK_SIZE 4096                    // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_USER_STACK_SIZE 0                  // set by library:rtos
+#define MBED_CONF_RTOS_TIMER_NUM 0                               // set by library:rtos
+#define MBED_CONF_RTOS_TIMER_THREAD_STACK_SIZE 768               // set by library:rtos
+#define MBED_CONF_S2LP_PROVIDE_DEFAULT 0                         // set by library:s2lp
+#define MBED_CONF_SARA4_PPP_BAUDRATE 115200                      // set by library:SARA4_PPP
+#define MBED_CONF_SARA4_PPP_PROVIDE_DEFAULT 0                    // set by library:SARA4_PPP
+#define MBED_CONF_STM32_EMAC_ETH_PHY_ADDRESS 0                   // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_AUTONEGOTIATION \
+  ETH_AUTONEGOTIATION_ENABLE                                        // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_DUPLEXMODE ETH_MODE_FULLDUPLEX // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_DUPLEX_STATUS 0x0010           // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_MEDIA_INTERFACE \
+  ETH_MEDIA_INTERFACE_RMII                                   // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_RESET_DELAY 500         // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_SPEED ETH_SPEED_100M    // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_SPEED_STATUS 0x0004     // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_STATUS_REGISTER 31      // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_RXBUFNB 4                   // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_TXBUFNB 10                  // set by library:stm32-emac[STM32H7]
+#define MBED_CONF_STM32_EMAC_THREAD_STACKSIZE 1024           // set by library:stm32-emac
+#define MBED_CONF_STORAGE_DEFAULT_KV kv                      // set by library:storage
+#define MBED_CONF_STORAGE_FILESYSTEM_BLOCKDEVICE default     // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_EXTERNAL_BASE_ADDRESS 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_EXTERNAL_SIZE 0         // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_FILESYSTEM default      // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_FOLDER_PATH kvstore     // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_INTERNAL_BASE_ADDRESS 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_MOUNT_POINT kv          // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_BLOCKDEVICE \
+  default // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_EXTERNAL_BASE_ADDRESS \
+  0 // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_EXTERNAL_SIZE \
+  0 // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_FILESYSTEM \
+  default // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_FOLDER_PATH \
+  kvstore // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_MOUNT_POINT \
+  kv                                                     // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_RBP_INTERNAL_SIZE 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_STORAGE_TYPE TDB_INTERNAL      // set by library:storage[NUCLEO_H743ZI2]
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_BLOCKDEVICE default // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_EXTERNAL_BASE_ADDRESS \
+  0                                                    // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_EXTERNAL_SIZE 0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_INTERNAL_BASE_ADDRESS \
+  0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_BLOCKDEVICE \
+  default // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_EXTERNAL_BASE_ADDRESS \
+  0 // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_EXTERNAL_SIZE \
+  0 // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_RBP_INTERNAL_SIZE 0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_INTERNAL_INTERNAL_BASE_ADDRESS \
+  0                                                       // set by library:storage_tdb_internal
+#define MBED_CONF_STORAGE_TDB_INTERNAL_INTERNAL_SIZE 0    // set by library:storage_tdb_internal
+#define MBED_CONF_TARGET_BOOT_STACK_SIZE 0x400            // set by library:rtos[*]
+#define MBED_CONF_TARGET_CONSOLE_UART 1                   // set by target:Target
+#define MBED_CONF_TARGET_CUSTOM_TICKERS 1                 // set by target:Target
+#define MBED_CONF_TARGET_DEEP_SLEEP_LATENCY 4             // set by target:MCU_STM32
+#define MBED_CONF_TARGET_DEFAULT_ADC_VREF NAN             // set by target:Target
+#define MBED_CONF_TARGET_GPIO_RESET_AT_INIT 0             // set by target:MCU_STM32
+#define MBED_CONF_TARGET_I2C_TIMING_VALUE_ALGO 0          // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT 1         // set by target:MCU_STM32
+#define MBED_CONF_TARGET_INTERNAL_FLASH_UNIFORM_SECTORS 1 // set by target:Target
+#define MBED_CONF_TARGET_LPTICKER_LPTIM 1                 // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_LPTICKER_LPTIM_CLOCK 1           // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LPUART_CLOCK_SOURCE \
+  USE_LPUART_CLK_LSE | USE_LPUART_CLK_PCLK1 | USE_LPUART_CLK_PCLK3 // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LSE_AVAILABLE 1                           // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LSE_DRIVE_LOAD_LEVEL RCC_LSEDRIVE_LOW     // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_MPU_ROM_END 0x0fffffff                    // set by target:Target
+#define MBED_CONF_TARGET_NETWORK_DEFAULT_INTERFACE_TYPE ETHERNET   // set by target:NUCLEO_H743ZI2
+#define MBED_CONF_TARGET_RTC_CLOCK_SOURCE USE_RTC_CLK_LSE_OR_LSI   // set by target:MCU_STM32
+#define MBED_CONF_TARGET_SYSTEM_POWER_SUPPLY PWR_LDO_SUPPLY        // set by target:MCU_STM32H743xI
+#define MBED_CONF_TARGET_TICKLESS_FROM_US_TICKER 0                 // set by target:Target
+#define MBED_CONF_TARGET_XIP_ENABLE 0                              // set by target:Target
+#define MBED_CONF_TELIT_HE910_BAUDRATE 115200                      // set by library:TELIT_HE910
+#define MBED_CONF_TELIT_HE910_PROVIDE_DEFAULT 0                    // set by library:TELIT_HE910
+#define MBED_CONF_TELIT_ME310_BAUDRATE 115200                      // set by library:TELIT_ME310
+#define MBED_CONF_TELIT_ME310_PROVIDE_DEFAULT 0                    // set by library:TELIT_ME310
+#define MBED_CONF_TELIT_ME910_BAUDRATE 115200                      // set by library:TELIT_ME910
+#define MBED_CONF_TELIT_ME910_PROVIDE_DEFAULT 0                    // set by library:TELIT_ME910
+#define MBED_CONF_UBLOX_AT_BAUDRATE 115200                         // set by library:UBLOX_AT
+#define MBED_CONF_UBLOX_AT_PROVIDE_DEFAULT 0                       // set by library:UBLOX_AT
+#define MBED_CONF_UBLOX_N2XX_BAUDRATE 9600                         // set by library:UBLOX_N2XX
+#define MBED_CONF_UBLOX_N2XX_PROVIDE_DEFAULT 0                     // set by library:UBLOX_N2XX
+#define MBED_CONF_UBLOX_PPP_BAUDRATE 115200                        // set by library:UBLOX_PPP
+#define MBED_CONF_UBLOX_PPP_PROVIDE_DEFAULT 0                      // set by library:UBLOX_PPP
+#define MBED_CRC_TABLE_SIZE 16                                     // set by library:drivers
+#define MBED_LFS2_BLOCK_CYCLES 1024                                // set by library:littlefs2
+#define MBED_LFS2_BLOCK_SIZE 512                                   // set by library:littlefs2
+#define MBED_LFS2_CACHE_SIZE 64                                    // set by library:littlefs2
+#define MBED_LFS2_ENABLE_INFO 0                                    // set by library:littlefs2
+#define MBED_LFS2_INTRINSICS 1                                     // set by library:littlefs2
+#define MBED_LFS2_LOOKAHEAD_SIZE 64                                // set by library:littlefs2
+#define MBED_LFS_BLOCK_SIZE 512                                    // set by library:littlefs
+#define MBED_LFS_ENABLE_INFO 0                                     // set by library:littlefs
+#define MBED_LFS_INTRINSICS 1                                      // set by library:littlefs
+#define MBED_LFS_LOOKAHEAD 512                                     // set by library:littlefs
+#define MBED_LFS_PROG_SIZE 64                                      // set by library:littlefs
+#define MBED_LFS_READ_SIZE 64                                      // set by library:littlefs
+#define MBED_STACK_DUMP_ENABLED 0                                  // set by library:platform
+#define MBED_TRACE_COLOR_THEME 0                                   // set by library:mbed-trace
+#define MEM_ALLOC malloc                                           // set by library:mbed-trace
+#define MEM_FREE free                                              // set by library:mbed-trace
+#define PPP_DEBUG 0                                                // set by library:ppp
+#define STM32_D11_SPI_ETHERNET_PIN PB_5                            // set by target:NUCLEO_H743ZI2
+// Macros
+#define MBEDTLS_CIPHER_MODE_CTR // defined by library:SecureStore
+#define NSAPI_PPP_AVAILABLE \
+  (MBED_CONF_PPP_ENABLED || MBED_CONF_LWIP_PPP_ENABLED) // defined by library:ppp
+#define NSDYNMEM_TRACKER_ENABLED \
+  MBED_CONF_NANOSTACK_LIBSERVICE_NSDYNMEM_TRACKER_ENABLED // defined by library:nanostack-libservice
+#define NS_USE_EXTERNAL_MBED_TLS                          // defined by library:nanostack
+#define UNITY_INCLUDE_CONFIG_H                            // defined by library:utest
+#define _RTE_                                             // defined by library:rtos
+
+#endif
diff --git a/onert-micro/tests/mbed-os/startup_stm32h743xx.S b/onert-micro/tests/mbed-os/startup_stm32h743xx.S
new file mode 100644
index 000000000..b978ae1f0
--- /dev/null
+++ b/onert-micro/tests/mbed-os/startup_stm32h743xx.S
@@ -0,0 +1,675 @@
+.syntax unified
+.cpu cortex-m7
+.fpu softvfp
+.thumb
+
+.global  g_pfnVectors
+.global  Default_Handler
+
+.word  _sidata
+.word  _sdata
+.word  _edata
+.word  _sbss
+.word  _ebss
+.section  .text.Reset_Handler
+.weak  Reset_Handler
+.type  Reset_Handler, %function
+
+Reset_Handler:
+  ldr   sp, =_estack
+  bl  main
+
+CopyDataInit:
+  ldr  r3, =_sidata
+  ldr  r3, [r3, r1]
+  str  r3, [r0, r1]
+  adds  r1, r1, #4
+
+LoopCopyDataInit:
+  ldr  r0, =_sdata
+  ldr  r3, =_edata
+  adds  r2, r0, r1
+  cmp  r2, r3
+  bcc  CopyDataInit
+  ldr  r2, =_sbss
+  b  LoopFillZerobss
+
+FillZerobss:
+  movs  r3, #0
+  str  r3, [r2], #4
+
+LoopFillZerobss:
+  ldr  r3, = _ebss
+  cmp  r2, r3
+  bcc  FillZerobss
+
+  bl _start
+  bx lr
+
+.size  Reset_Handler, .-Reset_Handler
+
+.section  .text.Default_Handler,"ax",%progbits
+
+Default_Handler:
+Infinite_Loop:
+  b  Infinite_Loop
+  .size  Default_Handler, .-Default_Handler
+   .section  .isr_vector,"a",%progbits
+  .type  g_pfnVectors, %object
+  .size  g_pfnVectors, .-g_pfnVectors
+
+
+g_pfnVectors:
+  .word  _estack
+  .word  Reset_Handler
+
+  .word  NMI_Handler
+  .word  HardFault_Handler
+  .word  MemManage_Handler
+  .word  BusFault_Handler
+  .word  UsageFault_Handler
+  .word  0
+  .word  0
+  .word  0
+  .word  0
+  .word  SVC_Handler
+  .word  DebugMon_Handler
+  .word  0
+  .word  PendSV_Handler
+  .word  SysTick_Handler
+
+  /* External Interrupts */
+  .word     WWDG_IRQHandler                   /* Window WatchDog              */
+  .word     PVD_AVD_IRQHandler                /* PVD/AVD through EXTI Line detection */
+  .word     TAMP_STAMP_IRQHandler             /* Tamper and TimeStamps through the EXTI line */
+  .word     RTC_WKUP_IRQHandler               /* RTC Wakeup through the EXTI line */
+  .word     FLASH_IRQHandler                  /* FLASH                        */
+  .word     RCC_IRQHandler                    /* RCC                          */
+  .word     EXTI0_IRQHandler                  /* EXTI Line0                   */
+  .word     EXTI1_IRQHandler                  /* EXTI Line1                   */
+  .word     EXTI2_IRQHandler                  /* EXTI Line2                   */
+  .word     EXTI3_IRQHandler                  /* EXTI Line3                   */
+  .word     EXTI4_IRQHandler                  /* EXTI Line4                   */
+  .word     DMA1_Stream0_IRQHandler           /* DMA1 Stream 0                */
+  .word     DMA1_Stream1_IRQHandler           /* DMA1 Stream 1                */
+  .word     DMA1_Stream2_IRQHandler           /* DMA1 Stream 2                */
+  .word     DMA1_Stream3_IRQHandler           /* DMA1 Stream 3                */
+  .word     DMA1_Stream4_IRQHandler           /* DMA1 Stream 4                */
+  .word     DMA1_Stream5_IRQHandler           /* DMA1 Stream 5                */
+  .word     DMA1_Stream6_IRQHandler           /* DMA1 Stream 6                */
+  .word     ADC_IRQHandler                    /* ADC1, ADC2 and ADC3s         */
+  .word     FDCAN1_IT0_IRQHandler             /* FDCAN1 interrupt line 0      */
+  .word     FDCAN2_IT0_IRQHandler             /* FDCAN2 interrupt line 0      */
+  .word     FDCAN1_IT1_IRQHandler             /* FDCAN1 interrupt line 1      */
+  .word     FDCAN2_IT1_IRQHandler             /* FDCAN2 interrupt line 1      */
+  .word     EXTI9_5_IRQHandler                /* External Line[9:5]s          */
+  .word     TIM1_BRK_IRQHandler               /* TIM1 Break interrupt         */
+  .word     TIM1_UP_IRQHandler                /* TIM1 Update interrupt        */
+  .word     TIM1_TRG_COM_IRQHandler           /* TIM1 Trigger and Commutation interrupt */
+  .word     TIM1_CC_IRQHandler                /* TIM1 Capture Compare         */
+  .word     TIM2_IRQHandler                   /* TIM2                         */
+  .word     TIM3_IRQHandler                   /* TIM3                         */
+  .word     TIM4_IRQHandler                   /* TIM4                         */
+  .word     I2C1_EV_IRQHandler                /* I2C1 Event                   */
+  .word     I2C1_ER_IRQHandler                /* I2C1 Error                   */
+  .word     I2C2_EV_IRQHandler                /* I2C2 Event                   */
+  .word     I2C2_ER_IRQHandler                /* I2C2 Error                   */
+  .word     SPI1_IRQHandler                   /* SPI1                         */
+  .word     SPI2_IRQHandler                   /* SPI2                         */
+  .word     USART1_IRQHandler                 /* USART1                       */
+  .word     USART2_IRQHandler                 /* USART2                       */
+  .word     USART3_IRQHandler                 /* USART3                       */
+  .word     EXTI15_10_IRQHandler              /* External Line[15:10]s        */
+  .word     RTC_Alarm_IRQHandler              /* RTC Alarm (A and B) through EXTI Line */
+  .word     0                                 /* Reserved                     */
+  .word     TIM8_BRK_TIM12_IRQHandler         /* TIM8 Break and TIM12         */
+  .word     TIM8_UP_TIM13_IRQHandler          /* TIM8 Update and TIM13        */
+  .word     TIM8_TRG_COM_TIM14_IRQHandler     /* TIM8 Trigger and Commutation and TIM14 */
+  .word     TIM8_CC_IRQHandler                /* TIM8 Capture Compare         */
+  .word     DMA1_Stream7_IRQHandler           /* DMA1 Stream7                 */
+  .word     FMC_IRQHandler                    /* FMC                          */
+  .word     SDMMC1_IRQHandler                 /* SDMMC1                       */
+  .word     TIM5_IRQHandler                   /* TIM5                         */
+  .word     SPI3_IRQHandler                   /* SPI3                         */
+  .word     UART4_IRQHandler                  /* UART4                        */
+  .word     UART5_IRQHandler                  /* UART5                        */
+  .word     TIM6_DAC_IRQHandler               /* TIM6 and DAC1&2 underrun errors */
+  .word     TIM7_IRQHandler                   /* TIM7                         */
+  .word     DMA2_Stream0_IRQHandler           /* DMA2 Stream 0                */
+  .word     DMA2_Stream1_IRQHandler           /* DMA2 Stream 1                */
+  .word     DMA2_Stream2_IRQHandler           /* DMA2 Stream 2                */
+  .word     DMA2_Stream3_IRQHandler           /* DMA2 Stream 3                */
+  .word     DMA2_Stream4_IRQHandler           /* DMA2 Stream 4                */
+  .word     ETH_IRQHandler                    /* Ethernet                     */
+  .word     ETH_WKUP_IRQHandler               /* Ethernet Wakeup through EXTI line */
+  .word     FDCAN_CAL_IRQHandler              /* FDCAN calibration unit interrupt*/
+  .word     0                                 /* Reserved                     */
+  .word     0                                 /* Reserved                     */
+  .word     0                                 /* Reserved                     */
+  .word     0                                 /* Reserved                     */
+  .word     DMA2_Stream5_IRQHandler           /* DMA2 Stream 5                */
+  .word     DMA2_Stream6_IRQHandler           /* DMA2 Stream 6                */
+  .word     DMA2_Stream7_IRQHandler           /* DMA2 Stream 7                */
+  .word     USART6_IRQHandler                 /* USART6                       */
+  .word     I2C3_EV_IRQHandler                /* I2C3 event                   */
+  .word     I2C3_ER_IRQHandler                /* I2C3 error                   */
+  .word     OTG_HS_EP1_OUT_IRQHandler         /* USB OTG HS End Point 1 Out   */
+  .word     OTG_HS_EP1_IN_IRQHandler          /* USB OTG HS End Point 1 In    */
+  .word     OTG_HS_WKUP_IRQHandler            /* USB OTG HS Wakeup through EXTI */
+  .word     OTG_HS_IRQHandler                 /* USB OTG HS                   */
+  .word     DCMI_IRQHandler                   /* DCMI                         */
+  .word     0                                 /* Reserved                     */
+  .word     RNG_IRQHandler                    /* Rng                          */
+  .word     FPU_IRQHandler                    /* FPU                          */
+  .word     UART7_IRQHandler                  /* UART7                        */
+  .word     UART8_IRQHandler                  /* UART8                        */
+  .word     SPI4_IRQHandler                   /* SPI4                         */
+  .word     SPI5_IRQHandler                   /* SPI5                         */
+  .word     SPI6_IRQHandler                   /* SPI6                         */
+  .word     SAI1_IRQHandler                   /* SAI1                         */
+  .word     LTDC_IRQHandler                   /* LTDC                         */
+  .word     LTDC_ER_IRQHandler                /* LTDC error                   */
+  .word     DMA2D_IRQHandler                  /* DMA2D                        */
+  .word     SAI2_IRQHandler                   /* SAI2                         */
+  .word     QUADSPI_IRQHandler                /* QUADSPI                      */
+  .word     LPTIM1_IRQHandler                 /* LPTIM1                       */
+  .word     CEC_IRQHandler                    /* HDMI_CEC                     */
+  .word     I2C4_EV_IRQHandler                /* I2C4 Event                   */
+  .word     I2C4_ER_IRQHandler                /* I2C4 Error                   */
+  .word     SPDIF_RX_IRQHandler               /* SPDIF_RX                     */
+  .word     OTG_FS_EP1_OUT_IRQHandler         /* USB OTG FS End Point 1 Out   */
+  .word     OTG_FS_EP1_IN_IRQHandler          /* USB OTG FS End Point 1 In    */
+  .word     OTG_FS_WKUP_IRQHandler            /* USB OTG FS Wakeup through EXTI */
+  .word     OTG_FS_IRQHandler                 /* USB OTG FS                   */
+  .word     DMAMUX1_OVR_IRQHandler            /* DMAMUX1 Overrun interrupt    */
+  .word     HRTIM1_Master_IRQHandler          /* HRTIM Master Timer global Interrupt */
+  .word     HRTIM1_TIMA_IRQHandler            /* HRTIM Timer A global Interrupt */
+  .word     HRTIM1_TIMB_IRQHandler            /* HRTIM Timer B global Interrupt */
+  .word     HRTIM1_TIMC_IRQHandler            /* HRTIM Timer C global Interrupt */
+  .word     HRTIM1_TIMD_IRQHandler            /* HRTIM Timer D global Interrupt */
+  .word     HRTIM1_TIME_IRQHandler            /* HRTIM Timer E global Interrupt */
+  .word     HRTIM1_FLT_IRQHandler             /* HRTIM Fault global Interrupt   */
+  .word     DFSDM1_FLT0_IRQHandler            /* DFSDM Filter0 Interrupt        */
+  .word     DFSDM1_FLT1_IRQHandler            /* DFSDM Filter1 Interrupt        */
+  .word     DFSDM1_FLT2_IRQHandler            /* DFSDM Filter2 Interrupt        */
+  .word     DFSDM1_FLT3_IRQHandler            /* DFSDM Filter3 Interrupt        */
+  .word     SAI3_IRQHandler                   /* SAI3 global Interrupt          */
+  .word     SWPMI1_IRQHandler                 /* Serial Wire Interface 1 global interrupt */
+  .word     TIM15_IRQHandler                  /* TIM15 global Interrupt      */
+  .word     TIM16_IRQHandler                  /* TIM16 global Interrupt      */
+  .word     TIM17_IRQHandler                  /* TIM17 global Interrupt      */
+  .word     MDIOS_WKUP_IRQHandler             /* MDIOS Wakeup  Interrupt     */
+  .word     MDIOS_IRQHandler                  /* MDIOS global Interrupt      */
+  .word     JPEG_IRQHandler                   /* JPEG global Interrupt       */
+  .word     MDMA_IRQHandler                   /* MDMA global Interrupt       */
+  .word     0                                 /* Reserved                    */
+  .word     SDMMC2_IRQHandler                 /* SDMMC2 global Interrupt     */
+  .word     HSEM1_IRQHandler                  /* HSEM1 global Interrupt      */
+  .word     0                                 /* Reserved                    */
+  .word     ADC3_IRQHandler                   /* ADC3 global Interrupt       */
+  .word     DMAMUX2_OVR_IRQHandler            /* DMAMUX Overrun interrupt    */
+  .word     BDMA_Channel0_IRQHandler          /* BDMA Channel 0 global Interrupt */
+  .word     BDMA_Channel1_IRQHandler          /* BDMA Channel 1 global Interrupt */
+  .word     BDMA_Channel2_IRQHandler          /* BDMA Channel 2 global Interrupt */
+  .word     BDMA_Channel3_IRQHandler          /* BDMA Channel 3 global Interrupt */
+  .word     BDMA_Channel4_IRQHandler          /* BDMA Channel 4 global Interrupt */
+  .word     BDMA_Channel5_IRQHandler          /* BDMA Channel 5 global Interrupt */
+  .word     BDMA_Channel6_IRQHandler          /* BDMA Channel 6 global Interrupt */
+  .word     BDMA_Channel7_IRQHandler          /* BDMA Channel 7 global Interrupt */
+  .word     COMP1_IRQHandler                  /* COMP1 global Interrupt     */
+  .word     LPTIM2_IRQHandler                 /* LP TIM2 global interrupt   */
+  .word     LPTIM3_IRQHandler                 /* LP TIM3 global interrupt   */
+  .word     LPTIM4_IRQHandler                 /* LP TIM4 global interrupt   */
+  .word     LPTIM5_IRQHandler                 /* LP TIM5 global interrupt   */
+  .word     LPUART1_IRQHandler                /* LP UART1 interrupt         */
+  .word     0                                 /* Reserved                   */
+  .word     CRS_IRQHandler                    /* Clock Recovery Global Interrupt */
+  .word     ECC_IRQHandler                    /* ECC diagnostic Global Interrupt */
+  .word     SAI4_IRQHandler                   /* SAI4 global interrupt      */
+  .word     0                                 /* Reserved                   */
+  .word     0                                 /* Reserved                   */
+  .word     WAKEUP_PIN_IRQHandler             /* Interrupt for all 6 wake-up pins */
+
+   .weak      NMI_Handler
+   .thumb_set NMI_Handler,Default_Handler
+
+   .weak      HardFault_Handler
+   .thumb_set HardFault_Handler,Default_Handler
+
+   .weak      MemManage_Handler
+   .thumb_set MemManage_Handler,Default_Handler
+
+   .weak      BusFault_Handler
+   .thumb_set BusFault_Handler,Default_Handler
+
+   .weak      UsageFault_Handler
+   .thumb_set UsageFault_Handler,Default_Handler
+
+   .weak      SVC_Handler
+   .thumb_set SVC_Handler,Default_Handler
+
+   .weak      DebugMon_Handler
+   .thumb_set DebugMon_Handler,Default_Handler
+
+   .weak      PendSV_Handler
+   .thumb_set PendSV_Handler,Default_Handler
+
+   .weak      SysTick_Handler
+   .thumb_set SysTick_Handler,Default_Handler
+
+   .weak      WWDG_IRQHandler
+   .thumb_set WWDG_IRQHandler,Default_Handler
+
+   .weak      PVD_AVD_IRQHandler
+   .thumb_set PVD_AVD_IRQHandler,Default_Handler
+
+   .weak      TAMP_STAMP_IRQHandler
+   .thumb_set TAMP_STAMP_IRQHandler,Default_Handler
+
+   .weak      RTC_WKUP_IRQHandler
+   .thumb_set RTC_WKUP_IRQHandler,Default_Handler
+
+   .weak      FLASH_IRQHandler
+   .thumb_set FLASH_IRQHandler,Default_Handler
+
+   .weak      RCC_IRQHandler
+   .thumb_set RCC_IRQHandler,Default_Handler
+
+   .weak      EXTI0_IRQHandler
+   .thumb_set EXTI0_IRQHandler,Default_Handler
+
+   .weak      EXTI1_IRQHandler
+   .thumb_set EXTI1_IRQHandler,Default_Handler
+
+   .weak      EXTI2_IRQHandler
+   .thumb_set EXTI2_IRQHandler,Default_Handler
+
+   .weak      EXTI3_IRQHandler
+   .thumb_set EXTI3_IRQHandler,Default_Handler
+
+   .weak      EXTI4_IRQHandler
+   .thumb_set EXTI4_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream0_IRQHandler
+   .thumb_set DMA1_Stream0_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream1_IRQHandler
+   .thumb_set DMA1_Stream1_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream2_IRQHandler
+   .thumb_set DMA1_Stream2_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream3_IRQHandler
+   .thumb_set DMA1_Stream3_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream4_IRQHandler
+   .thumb_set DMA1_Stream4_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream5_IRQHandler
+   .thumb_set DMA1_Stream5_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream6_IRQHandler
+   .thumb_set DMA1_Stream6_IRQHandler,Default_Handler
+
+   .weak      ADC_IRQHandler
+   .thumb_set ADC_IRQHandler,Default_Handler
+
+   .weak      FDCAN1_IT0_IRQHandler
+   .thumb_set FDCAN1_IT0_IRQHandler,Default_Handler
+
+   .weak      FDCAN2_IT0_IRQHandler
+   .thumb_set FDCAN2_IT0_IRQHandler,Default_Handler
+
+   .weak      FDCAN1_IT1_IRQHandler
+   .thumb_set FDCAN1_IT1_IRQHandler,Default_Handler
+
+   .weak      FDCAN2_IT1_IRQHandler
+   .thumb_set FDCAN2_IT1_IRQHandler,Default_Handler
+
+   .weak      EXTI9_5_IRQHandler
+   .thumb_set EXTI9_5_IRQHandler,Default_Handler
+
+   .weak      TIM1_BRK_IRQHandler
+   .thumb_set TIM1_BRK_IRQHandler,Default_Handler
+
+   .weak      TIM1_UP_IRQHandler
+   .thumb_set TIM1_UP_IRQHandler,Default_Handler
+
+   .weak      TIM1_TRG_COM_IRQHandler
+   .thumb_set TIM1_TRG_COM_IRQHandler,Default_Handler
+
+   .weak      TIM1_CC_IRQHandler
+   .thumb_set TIM1_CC_IRQHandler,Default_Handler
+
+   .weak      TIM2_IRQHandler
+   .thumb_set TIM2_IRQHandler,Default_Handler
+
+   .weak      TIM3_IRQHandler
+   .thumb_set TIM3_IRQHandler,Default_Handler
+
+   .weak      TIM4_IRQHandler
+   .thumb_set TIM4_IRQHandler,Default_Handler
+
+   .weak      I2C1_EV_IRQHandler
+   .thumb_set I2C1_EV_IRQHandler,Default_Handler
+
+   .weak      I2C1_ER_IRQHandler
+   .thumb_set I2C1_ER_IRQHandler,Default_Handler
+
+   .weak      I2C2_EV_IRQHandler
+   .thumb_set I2C2_EV_IRQHandler,Default_Handler
+
+   .weak      I2C2_ER_IRQHandler
+   .thumb_set I2C2_ER_IRQHandler,Default_Handler
+
+   .weak      SPI1_IRQHandler
+   .thumb_set SPI1_IRQHandler,Default_Handler
+
+   .weak      SPI2_IRQHandler
+   .thumb_set SPI2_IRQHandler,Default_Handler
+
+   .weak      USART1_IRQHandler
+   .thumb_set USART1_IRQHandler,Default_Handler
+
+   .weak      USART2_IRQHandler
+   .thumb_set USART2_IRQHandler,Default_Handler
+
+   .weak      USART3_IRQHandler
+   .thumb_set USART3_IRQHandler,Default_Handler
+
+   .weak      EXTI15_10_IRQHandler
+   .thumb_set EXTI15_10_IRQHandler,Default_Handler
+
+   .weak      RTC_Alarm_IRQHandler
+   .thumb_set RTC_Alarm_IRQHandler,Default_Handler
+
+   .weak      TIM8_BRK_TIM12_IRQHandler
+   .thumb_set TIM8_BRK_TIM12_IRQHandler,Default_Handler
+
+   .weak      TIM8_UP_TIM13_IRQHandler
+   .thumb_set TIM8_UP_TIM13_IRQHandler,Default_Handler
+
+   .weak      TIM8_TRG_COM_TIM14_IRQHandler
+   .thumb_set TIM8_TRG_COM_TIM14_IRQHandler,Default_Handler
+
+   .weak      TIM8_CC_IRQHandler
+   .thumb_set TIM8_CC_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream7_IRQHandler
+   .thumb_set DMA1_Stream7_IRQHandler,Default_Handler
+
+   .weak      FMC_IRQHandler
+   .thumb_set FMC_IRQHandler,Default_Handler
+
+   .weak      SDMMC1_IRQHandler
+   .thumb_set SDMMC1_IRQHandler,Default_Handler
+
+   .weak      TIM5_IRQHandler
+   .thumb_set TIM5_IRQHandler,Default_Handler
+
+   .weak      SPI3_IRQHandler
+   .thumb_set SPI3_IRQHandler,Default_Handler
+
+   .weak      UART4_IRQHandler
+   .thumb_set UART4_IRQHandler,Default_Handler
+
+   .weak      UART5_IRQHandler
+   .thumb_set UART5_IRQHandler,Default_Handler
+
+   .weak      TIM6_DAC_IRQHandler
+   .thumb_set TIM6_DAC_IRQHandler,Default_Handler
+
+   .weak      TIM7_IRQHandler
+   .thumb_set TIM7_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream0_IRQHandler
+   .thumb_set DMA2_Stream0_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream1_IRQHandler
+   .thumb_set DMA2_Stream1_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream2_IRQHandler
+   .thumb_set DMA2_Stream2_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream3_IRQHandler
+   .thumb_set DMA2_Stream3_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream4_IRQHandler
+   .thumb_set DMA2_Stream4_IRQHandler,Default_Handler
+
+   .weak      ETH_IRQHandler
+   .thumb_set ETH_IRQHandler,Default_Handler
+
+   .weak      ETH_WKUP_IRQHandler
+   .thumb_set ETH_WKUP_IRQHandler,Default_Handler
+
+   .weak      FDCAN_CAL_IRQHandler
+   .thumb_set FDCAN_CAL_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream5_IRQHandler
+   .thumb_set DMA2_Stream5_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream6_IRQHandler
+   .thumb_set DMA2_Stream6_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream7_IRQHandler
+   .thumb_set DMA2_Stream7_IRQHandler,Default_Handler
+
+   .weak      USART6_IRQHandler
+   .thumb_set USART6_IRQHandler,Default_Handler
+
+   .weak      I2C3_EV_IRQHandler
+   .thumb_set I2C3_EV_IRQHandler,Default_Handler
+
+   .weak      I2C3_ER_IRQHandler
+   .thumb_set I2C3_ER_IRQHandler,Default_Handler
+
+   .weak      OTG_HS_EP1_OUT_IRQHandler
+   .thumb_set OTG_HS_EP1_OUT_IRQHandler,Default_Handler
+
+   .weak      OTG_HS_EP1_IN_IRQHandler
+   .thumb_set OTG_HS_EP1_IN_IRQHandler,Default_Handler
+
+   .weak      OTG_HS_WKUP_IRQHandler
+   .thumb_set OTG_HS_WKUP_IRQHandler,Default_Handler
+
+   .weak      OTG_HS_IRQHandler
+   .thumb_set OTG_HS_IRQHandler,Default_Handler
+
+   .weak      DCMI_IRQHandler
+   .thumb_set DCMI_IRQHandler,Default_Handler
+
+   .weak      RNG_IRQHandler
+   .thumb_set RNG_IRQHandler,Default_Handler
+
+   .weak      FPU_IRQHandler
+   .thumb_set FPU_IRQHandler,Default_Handler
+
+   .weak      UART7_IRQHandler
+   .thumb_set UART7_IRQHandler,Default_Handler
+
+   .weak      UART8_IRQHandler
+   .thumb_set UART8_IRQHandler,Default_Handler
+
+   .weak      SPI4_IRQHandler
+   .thumb_set SPI4_IRQHandler,Default_Handler
+
+   .weak      SPI5_IRQHandler
+   .thumb_set SPI5_IRQHandler,Default_Handler
+
+   .weak      SPI6_IRQHandler
+   .thumb_set SPI6_IRQHandler,Default_Handler
+
+   .weak      SAI1_IRQHandler
+   .thumb_set SAI1_IRQHandler,Default_Handler
+
+   .weak      LTDC_IRQHandler
+   .thumb_set LTDC_IRQHandler,Default_Handler
+
+   .weak      LTDC_ER_IRQHandler
+   .thumb_set LTDC_ER_IRQHandler,Default_Handler
+
+   .weak      DMA2D_IRQHandler
+   .thumb_set DMA2D_IRQHandler,Default_Handler
+
+   .weak      SAI2_IRQHandler
+   .thumb_set SAI2_IRQHandler,Default_Handler
+
+   .weak      QUADSPI_IRQHandler
+   .thumb_set QUADSPI_IRQHandler,Default_Handler
+
+   .weak      LPTIM1_IRQHandler
+   .thumb_set LPTIM1_IRQHandler,Default_Handler
+
+   .weak      CEC_IRQHandler
+   .thumb_set CEC_IRQHandler,Default_Handler
+
+   .weak      I2C4_EV_IRQHandler
+   .thumb_set I2C4_EV_IRQHandler,Default_Handler
+
+   .weak      I2C4_ER_IRQHandler
+   .thumb_set I2C4_ER_IRQHandler,Default_Handler
+
+   .weak      SPDIF_RX_IRQHandler
+   .thumb_set SPDIF_RX_IRQHandler,Default_Handler
+
+   .weak      OTG_FS_EP1_OUT_IRQHandler
+   .thumb_set OTG_FS_EP1_OUT_IRQHandler,Default_Handler
+
+   .weak      OTG_FS_EP1_IN_IRQHandler
+   .thumb_set OTG_FS_EP1_IN_IRQHandler,Default_Handler
+
+   .weak      OTG_FS_WKUP_IRQHandler
+   .thumb_set OTG_FS_WKUP_IRQHandler,Default_Handler
+
+   .weak      OTG_FS_IRQHandler
+   .thumb_set OTG_FS_IRQHandler,Default_Handler
+
+   .weak      DMAMUX1_OVR_IRQHandler
+   .thumb_set DMAMUX1_OVR_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_Master_IRQHandler
+   .thumb_set HRTIM1_Master_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIMA_IRQHandler
+   .thumb_set HRTIM1_TIMA_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIMB_IRQHandler
+   .thumb_set HRTIM1_TIMB_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIMC_IRQHandler
+   .thumb_set HRTIM1_TIMC_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIMD_IRQHandler
+   .thumb_set HRTIM1_TIMD_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIME_IRQHandler
+   .thumb_set HRTIM1_TIME_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_FLT_IRQHandler
+   .thumb_set HRTIM1_FLT_IRQHandler,Default_Handler
+
+   .weak      DFSDM1_FLT0_IRQHandler
+   .thumb_set DFSDM1_FLT0_IRQHandler,Default_Handler
+
+   .weak      DFSDM1_FLT1_IRQHandler
+   .thumb_set DFSDM1_FLT1_IRQHandler,Default_Handler
+
+   .weak      DFSDM1_FLT2_IRQHandler
+   .thumb_set DFSDM1_FLT2_IRQHandler,Default_Handler
+
+   .weak      DFSDM1_FLT3_IRQHandler
+   .thumb_set DFSDM1_FLT3_IRQHandler,Default_Handler
+
+   .weak      SAI3_IRQHandler
+   .thumb_set SAI3_IRQHandler,Default_Handler
+
+   .weak      SWPMI1_IRQHandler
+   .thumb_set SWPMI1_IRQHandler,Default_Handler
+
+   .weak      TIM15_IRQHandler
+   .thumb_set TIM15_IRQHandler,Default_Handler
+
+   .weak      TIM16_IRQHandler
+   .thumb_set TIM16_IRQHandler,Default_Handler
+
+   .weak      TIM17_IRQHandler
+   .thumb_set TIM17_IRQHandler,Default_Handler
+
+   .weak      MDIOS_WKUP_IRQHandler
+   .thumb_set MDIOS_WKUP_IRQHandler,Default_Handler
+
+   .weak      MDIOS_IRQHandler
+   .thumb_set MDIOS_IRQHandler,Default_Handler
+
+   .weak      JPEG_IRQHandler
+   .thumb_set JPEG_IRQHandler,Default_Handler
+
+   .weak      MDMA_IRQHandler
+   .thumb_set MDMA_IRQHandler,Default_Handler
+
+   .weak      SDMMC2_IRQHandler
+   .thumb_set SDMMC2_IRQHandler,Default_Handler
+
+   .weak      HSEM1_IRQHandler
+   .thumb_set HSEM1_IRQHandler,Default_Handler
+
+   .weak      ADC3_IRQHandler
+   .thumb_set ADC3_IRQHandler,Default_Handler
+
+   .weak      DMAMUX2_OVR_IRQHandler
+   .thumb_set DMAMUX2_OVR_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel0_IRQHandler
+   .thumb_set BDMA_Channel0_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel1_IRQHandler
+   .thumb_set BDMA_Channel1_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel2_IRQHandler
+   .thumb_set BDMA_Channel2_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel3_IRQHandler
+   .thumb_set BDMA_Channel3_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel4_IRQHandler
+   .thumb_set BDMA_Channel4_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel5_IRQHandler
+   .thumb_set BDMA_Channel5_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel6_IRQHandler
+   .thumb_set BDMA_Channel6_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel7_IRQHandler
+   .thumb_set BDMA_Channel7_IRQHandler,Default_Handler
+
+   .weak      COMP1_IRQHandler
+   .thumb_set COMP1_IRQHandler,Default_Handler
+
+   .weak      LPTIM2_IRQHandler
+   .thumb_set LPTIM2_IRQHandler,Default_Handler
+
+   .weak      LPTIM3_IRQHandler
+   .thumb_set LPTIM3_IRQHandler,Default_Handler
+
+   .weak      LPTIM4_IRQHandler
+   .thumb_set LPTIM4_IRQHandler,Default_Handler
+
+   .weak      LPTIM5_IRQHandler
+   .thumb_set LPTIM5_IRQHandler,Default_Handler
+
+   .weak      LPUART1_IRQHandler
+   .thumb_set LPUART1_IRQHandler,Default_Handler
+
+   .weak      CRS_IRQHandler
+   .thumb_set CRS_IRQHandler,Default_Handler
+
+   .weak      ECC_IRQHandler
+   .thumb_set ECC_IRQHandler,Default_Handler
+
+   .weak      SAI4_IRQHandler
+   .thumb_set SAI4_IRQHandler,Default_Handler
+
+   .weak      WAKEUP_PIN_IRQHandler
+   .thumb_set WAKEUP_PIN_IRQHandler,Default_Handler
diff --git a/packaging/ABSEIL.tar.gz b/packaging/ABSEIL.tar.gz
new file mode 100644
index 000000000..1d92825b5
--- /dev/null
+++ b/packaging/ABSEIL.tar.gz
diff --git a/packaging/CPUINFO.tar.gz b/packaging/CPUINFO.tar.gz
new file mode 100644
index 000000000..a74fe355a
--- /dev/null
+++ b/packaging/CPUINFO.tar.gz
diff --git a/packaging/EGL_HEADERS.tar.gz b/packaging/EGL_HEADERS.tar.gz
new file mode 100644
index 000000000..80222056c
--- /dev/null
+++ b/packaging/EGL_HEADERS.tar.gz
diff --git a/packaging/FARMHASH.tar.gz b/packaging/FARMHASH.tar.gz
new file mode 100644
index 000000000..4bf98d891
--- /dev/null
+++ b/packaging/FARMHASH.tar.gz
diff --git a/packaging/FLATBUFFERS-2.0.tar.gz b/packaging/FLATBUFFERS-2.0.tar.gz
new file mode 100644
index 000000000..809aca01a
--- /dev/null
+++ b/packaging/FLATBUFFERS-2.0.tar.gz
diff --git a/packaging/FP16.tar.gz b/packaging/FP16.tar.gz
new file mode 100644
index 000000000..78c787673
--- /dev/null
+++ b/packaging/FP16.tar.gz
diff --git a/packaging/FXDIV.tar.gz b/packaging/FXDIV.tar.gz
new file mode 100644
index 000000000..7c1b82526
--- /dev/null
+++ b/packaging/FXDIV.tar.gz
diff --git a/packaging/GEMMLOWP.tar.gz b/packaging/GEMMLOWP.tar.gz
new file mode 100644
index 000000000..198dc1414
--- /dev/null
+++ b/packaging/GEMMLOWP.tar.gz
diff --git a/packaging/NEON2SSE.tar.gz b/packaging/NEON2SSE.tar.gz
new file mode 100644
index 000000000..86410f830
--- /dev/null
+++ b/packaging/NEON2SSE.tar.gz
diff --git a/packaging/OOURAFFT.tar.gz b/packaging/OOURAFFT.tar.gz
new file mode 100644
index 000000000..85cf7fd6b
--- /dev/null
+++ b/packaging/OOURAFFT.tar.gz
diff --git a/packaging/OPENCL_HEADERS.tar.gz b/packaging/OPENCL_HEADERS.tar.gz
new file mode 100644
index 000000000..7bc3656e8
--- /dev/null
+++ b/packaging/OPENCL_HEADERS.tar.gz
diff --git a/packaging/OPENGL_HEADERS.tar.gz b/packaging/OPENGL_HEADERS.tar.gz
new file mode 100644
index 000000000..53a395a94
--- /dev/null
+++ b/packaging/OPENGL_HEADERS.tar.gz
diff --git a/packaging/PSIMD.tar.gz b/packaging/PSIMD.tar.gz
new file mode 100644
index 000000000..3ae892489
--- /dev/null
+++ b/packaging/PSIMD.tar.gz
diff --git a/packaging/PTHREADPOOL.tar.gz b/packaging/PTHREADPOOL.tar.gz
new file mode 100644
index 000000000..6cf42c0be
--- /dev/null
+++ b/packaging/PTHREADPOOL.tar.gz
diff --git a/packaging/TENSORFLOW-2.8.0-EIGEN.tar.gz b/packaging/TENSORFLOW-2.8.0-EIGEN.tar.gz
new file mode 100644
index 000000000..94a307f82
--- /dev/null
+++ b/packaging/TENSORFLOW-2.8.0-EIGEN.tar.gz
diff --git a/packaging/TENSORFLOW-2.8.0-GEMMLOWP.tar.gz b/packaging/TENSORFLOW-2.8.0-GEMMLOWP.tar.gz
new file mode 100644
index 000000000..c76e088ac
--- /dev/null
+++ b/packaging/TENSORFLOW-2.8.0-GEMMLOWP.tar.gz
diff --git a/packaging/TENSORFLOW-2.8.0-RUY.tar.gz b/packaging/TENSORFLOW-2.8.0-RUY.tar.gz
new file mode 100644
index 000000000..8e6734718
--- /dev/null
+++ b/packaging/TENSORFLOW-2.8.0-RUY.tar.gz
diff --git a/packaging/TENSORFLOW-2.8.0.tar.gz b/packaging/TENSORFLOW-2.8.0.tar.gz
new file mode 100644
index 000000000..f0f742511
--- /dev/null
+++ b/packaging/TENSORFLOW-2.8.0.tar.gz
diff --git a/packaging/VULKAN.tar.gz b/packaging/VULKAN.tar.gz
new file mode 100644
index 000000000..64ae0bdc4
--- /dev/null
+++ b/packaging/VULKAN.tar.gz
diff --git a/packaging/XNNPACK.tar.gz b/packaging/XNNPACK.tar.gz
new file mode 100644
index 000000000..2a1ce8e50
--- /dev/null
+++ b/packaging/XNNPACK.tar.gz
diff --git a/packaging/eigen.tar.gz b/packaging/eigen.tar.gz
deleted file mode 100644
index 396d12fa7..000000000
--- a/packaging/eigen.tar.gz
+++ /dev/null
diff --git a/packaging/gemmlowp.tar.gz b/packaging/gemmlowp.tar.gz
deleted file mode 100644
index 68339cdb1..000000000
--- a/packaging/gemmlowp.tar.gz
+++ /dev/null
diff --git a/packaging/gtest.tar.gz b/packaging/gtest.tar.gz
deleted file mode 100644
index 52cbbcdfe..000000000
--- a/packaging/gtest.tar.gz
+++ /dev/null
diff --git a/packaging/nnapi_test_generated.tar.gz b/packaging/nnapi_test_generated.tar.gz
index 504dbf9a6..446bd22c3 100644
--- a/packaging/nnapi_test_generated.tar.gz
+++ b/packaging/nnapi_test_generated.tar.gz
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 1b8c5fb31..522d7779a 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,48 +1,82 @@
 Name:    nnfw
 Summary: nnfw
-Version: 1.9.0
+Version: 1.25.0
 Release: 1
 Group:   Development
-License: Apache-2.0 and MIT and BSD-2-Clause
+License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
 
 Source0: %{name}-%{version}.tar.gz
 Source1: %{name}.manifest
 Source1001: nnapi_test_generated.tar.gz
-Source1002: gtest.tar.gz
-Source1003: eigen.tar.gz
-Source1004: gemmlowp.tar.gz
-Source1005: ruy.tar.gz
 Source2001: nnfw.pc.in
 Source2002: nnfw-plugin.pc.in
+Source3001: ABSEIL.tar.gz
+Source3002: CPUINFO.tar.gz
+Source3003: EGL_HEADERS.tar.gz
+Source3004: FARMHASH.tar.gz
+Source3005: FP16.tar.gz
+Source3006: FXDIV.tar.gz
+Source3007: GEMMLOWP.tar.gz
+Source3008: OOURAFFT.tar.gz
+Source3009: OPENCL_HEADERS.tar.gz
+Source3010: OPENGL_HEADERS.tar.gz
+Source3011: PSIMD.tar.gz
+Source3012: PTHREADPOOL.tar.gz
+Source3013: TENSORFLOW-2.8.0-EIGEN.tar.gz
+Source3014: TENSORFLOW-2.8.0-GEMMLOWP.tar.gz
+Source3015: TENSORFLOW-2.8.0-RUY.tar.gz
+Source3016: TENSORFLOW-2.8.0.tar.gz
+Source3017: VULKAN.tar.gz
+Source3018: XNNPACK.tar.gz
+Source3019: FLATBUFFERS-2.0.tar.gz
+Source3020: NEON2SSE.tar.gz
 
 %{!?build_type:     %define build_type      Release}
+%{!?npud_build:     %define npud_build      1}
+%{!?trix_support:   %define trix_support    1}
+%{!?odc_build:      %define odc_build       1}
 %{!?coverage_build: %define coverage_build  0}
 %{!?test_build:     %define test_build      0}
 %{!?extra_option:   %define extra_option    %{nil}}
+%{!?config_support: %define config_support  1}
+# Define nproc on gbs build option if you want to set number of build threads manually (ex. CI/CD infra)
+%define build_jobs   %{?!nproc:%{?_smp_mflags}%{?!_smp_mflags:-j4}}%{?nproc:-j%nproc}
+%{!?nproc:          %define nproc           %{?!jobs:4}%{?jobs}}
+
 %if %{coverage_build} == 1
+# Coverage test requires debug build runtime
+%define build_type Debug
 %define test_build 1
 %endif
 
-BuildRequires:  cmake
-# Require flatbuffers-devel for onert frontend (model loading)
-BuildRequires:  flatbuffers-devel
-
-%ifarch %{arm} aarch64
-# Require python for acl-ex library build pre-process
-BuildRequires:  python
-BuildRequires:  libarmcl-devel >= v20.05
+%ifarch riscv64
+# Disable npud on risc-v
+# TODO Enable on risc-v
+%define npud_build 0
 %endif
 
+BuildRequires:  cmake
+BuildRequires:  python3-setuptools
+
 Requires(post): /sbin/ldconfig
 Requires(postun): /sbin/ldconfig
 
 %if %{test_build} == 1
-BuildRequires:  boost-devel
-BuildRequires:  tensorflow-lite-devel
+BuildRequires:  pkgconfig(boost)
+BuildRequires:  pkgconfig(tensorflow2-lite)
 BuildRequires:  hdf5-devel
 BuildRequires:  libaec-devel
-BuildRequires:  zlib-devel
-BuildRequires:  libjpeg-devel
+BuildRequires:  pkgconfig(zlib)
+BuildRequires:  pkgconfig(libjpeg)
+BuildRequires:  gtest-devel
+%endif
+
+%if %{npud_build} == 1
+BuildRequires:  pkgconfig(glib-2.0)
+%endif
+
+%if %{trix_support} == 1
+BuildRequires:  pkgconfig(npu-engine)
 %endif
 
 %description
@@ -62,6 +96,14 @@ Requires: %{name}-devel = %{version}-%{release}
 %description plugin-devel
 NNFW development package for backend plugin developer
 
+%if %{odc_build} == 1
+%package odc
+Summary: NNFW On-Device Compilation Package
+
+%description odc
+NNFW package for on-device compilation
+%endif # odc_build
+
 %package minimal-app
 Summary: Minimal test binary for VD manual test
 
@@ -73,71 +115,140 @@ Minimal test binary for VD manual test
 Summary: NNFW Test
 
 %description test
-NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
+NNFW test rpm.
+If you want to use test package, you should install runtime package which is build with test build option
+If you want to get coverage info, you should install runtime package which is build with coverage build option
+# TODO Use release runtime pacakge for test
 %endif
 
-%ifarch %{arm}
+%if %{npud_build} == 1
+%package npud
+Summary: NPU daemon
+
+%description npud
+NPU daemon for optimal management of NPU hardware
+%endif
+
+%ifarch armv7l
 %define target_arch armv7l
 %endif
+%ifarch armv7hl
+%define target_arch armv7hl
+%endif
 %ifarch x86_64
 %define target_arch x86_64
 %endif
 %ifarch aarch64
 %define target_arch aarch64
 %endif
+%ifarch %ix86
+%define target_arch i686
+%endif
+%ifarch riscv64
+%define target_arch riscv64
+%endif
 
 %define install_dir %{_prefix}
 %define install_path %{buildroot}%{install_dir}
-%define build_env NNFW_WORKSPACE=build
-%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off -DBUILD_MINIMAL_SAMPLE=on
-
-# Set option for test build (and coverage test build)
+%define nnfw_workspace build
+%define build_env NNFW_WORKSPACE=%{nnfw_workspace}
+%define nncc_workspace build/nncc
+%define nncc_env NNCC_WORKSPACE=%{nncc_workspace}
+%define overlay_path %{nnfw_workspace}/overlay
+
+# Path to install test bin and scripts (test script assumes path Product/out)
+# TODO Share path with release package
 %define test_install_home /opt/usr/nnfw-test
 %define test_install_dir %{test_install_home}/Product/out
-%define test_install_path %{buildroot}%{test_install_dir}
-%define coverage_option %{nil}
+%define test_install_path %{buildroot}/%{test_install_dir}
+
+# Set option for test build (and coverage test build)
+%define option_test -DENABLE_TEST=OFF
+%define option_coverage %{nil}
 %define test_suite_list infra/scripts tests/scripts
-%define test_build_type %{build_type}
+
+%if %{test_build} == 1
+# ENVVAR_ONERT_CONFIG: Use environment variable for runtime core configuration and debug
+%define option_test -DENABLE_TEST=ON -DENVVAR_ONERT_CONFIG=ON
+%endif # test_build
+
+# Set option for configuration
+%define option_config %{nil}
+%if %{config_support} == 1
+%if %{npud_build} == 1
+# ENVVAR_NPUD_CONFIG: Use environment variable for npud configuration and debug
+%define option_config -DENVVAR_NPUD_CONFIG=ON
+%endif # npud_build
+%endif # config_support
+
 %if %{coverage_build} == 1
-%define coverage_option -DENABLE_COVERAGE=ON
-%define test_build_type Debug
-%endif
-%define test_build_env NNFW_INSTALL_PREFIX=%{test_install_path} NNFW_WORKSPACE=build_for_test
-%define test_build_options %{coverage_option} -DCMAKE_BUILD_TYPE=%{test_build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENVVAR_ONERT_CONFIG=ON
+%define option_coverage -DENABLE_COVERAGE=ON
+%endif # coverage_build
+
+%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen \\\
+        -DEXTERNALS_BUILD_THREAD=%{nproc} -DBUILD_MINIMAL_SAMPLE=ON -DNNFW_OVERLAY_DIR=$(pwd)/%{overlay_path} \\\
+        %{option_test} %{option_coverage} %{option_config} %{extra_option}
 
 %prep
 %setup -q
 cp %{SOURCE1} .
 mkdir ./externals
 tar -xf %{SOURCE1001} -C ./tests/nnapi/src/
-tar -xf %{SOURCE1002} -C ./externals
-tar -xf %{SOURCE1003} -C ./externals
-tar -xf %{SOURCE1004} -C ./externals
-tar -xf %{SOURCE1005} -C ./externals
+tar -xf %{SOURCE3001} -C ./externals
+tar -xf %{SOURCE3002} -C ./externals
+tar -xf %{SOURCE3003} -C ./externals
+tar -xf %{SOURCE3004} -C ./externals
+tar -xf %{SOURCE3005} -C ./externals
+tar -xf %{SOURCE3006} -C ./externals
+tar -xf %{SOURCE3007} -C ./externals
+tar -xf %{SOURCE3008} -C ./externals
+tar -xf %{SOURCE3009} -C ./externals
+tar -xf %{SOURCE3010} -C ./externals
+tar -xf %{SOURCE3011} -C ./externals
+tar -xf %{SOURCE3012} -C ./externals
+tar -xf %{SOURCE3013} -C ./externals
+tar -xf %{SOURCE3014} -C ./externals
+tar -xf %{SOURCE3015} -C ./externals
+tar -xf %{SOURCE3016} -C ./externals
+tar -xf %{SOURCE3017} -C ./externals
+tar -xf %{SOURCE3018} -C ./externals
+tar -xf %{SOURCE3019} -C ./externals
+tar -xf %{SOURCE3020} -C ./externals
 
 %build
-%ifarch arm armv7l aarch64
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 riscv64
+# nncc build
+%if %{odc_build} == 1
+%{nncc_env} ./nncc configure -DBUILD_GTEST=OFF -DENABLE_TEST=OFF -DEXTERNALS_BUILD_THREADS=%{nproc} -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen \
+        -DCMAKE_INSTALL_PREFIX=$(pwd)/%{overlay_path} \
+	-DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle06;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str"
+%{nncc_env} ./nncc build %{build_jobs}
+cmake --install %{nncc_workspace}
+%endif # odc_build
+
+# install angkor TensorIndex and oops InternalExn header (TODO: Remove this)
+mkdir -p %{overlay_path}/include/nncc/core/ADT/tensor
+mkdir -p %{overlay_path}/include/oops
+cp compiler/angkor/include/nncc/core/ADT/tensor/Index.h %{overlay_path}/include/nncc/core/ADT/tensor
+cp compiler/oops/include/oops/InternalExn.h %{overlay_path}/include/oops
+
 # runtime build
-%{build_env} ./nnfw configure %{build_options} %{extra_option}
-%{build_env} ./nnfw build -j4
+%{build_env} ./nnfw configure %{build_options}
+%{build_env} ./nnfw build %{build_jobs}
 # install in workspace
 # TODO Set install path
 %{build_env} ./nnfw install
 
 %if %{test_build} == 1
-# test runtime
-# TODO remove duplicated build process
-%{test_build_env} ./nnfw configure %{test_build_options} %{extra_option}
-%{test_build_env} ./nnfw build -j4
 %if %{coverage_build} == 1
 pwd > tests/scripts/build_path.txt
 %endif # coverage_build
 tar -zcf test-suite.tar.gz infra/scripts
 %endif # test_build
-%endif # arm armv7l aarch64
+%endif # arm armv7l armv7hl aarch64
 
 %install
-%ifarch arm armv7l aarch64
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 riscv64
 
 mkdir -p %{buildroot}%{_libdir}
 mkdir -p %{buildroot}%{_bindir}
@@ -160,19 +271,49 @@ install -m 0644 ./nnfw.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw.pc
 install -m 0644 ./nnfw-plugin.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw-plugin.pc
 
 %if %{test_build} == 1
-%{test_build_env} ./nnfw install
+mkdir -p %{test_install_path}/bin
+mkdir -p %{test_install_path}/nnapi-gtest
+mkdir -p %{test_install_path}/unittest
+mkdir -p %{test_install_path}/test
+
+install -m 755 build/out/bin/onert_run %{test_install_path}/bin
+install -m 755 build/out/bin/tflite_comparator %{test_install_path}/bin
+install -m 755 build/out/bin/tflite_run %{test_install_path}/bin
+install -m 755 build/out/nnapi-gtest/* %{test_install_path}/nnapi-gtest
+install -m 755 build/out/unittest/*_test %{test_install_path}/unittest
+install -m 755 build/out/unittest/test_* %{test_install_path}/unittest
+cp -r build/out/test/* %{test_install_path}/test
+cp -r build/out/unittest/nnfw_api_gtest_models %{test_install_path}/unittest
+
 # Share test script with ubuntu (ignore error if there is no list for target)
-cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{buildroot}%{test_install_dir}/unittest/.
-cp %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip
+cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{test_install_path}/nnapi-gtest/.
+cp %{test_install_path}/nnapi-gtest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{test_install_path}/nnapi-gtest/nnapi_gtest.skip
 tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home}
 
 %if %{coverage_build} == 1
 mkdir -p %{buildroot}%{test_install_home}/gcov
-find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
+find %{nnfw_workspace} -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
 install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/test/build_path.txt
 %endif # coverage_build
 %endif # test_build
 
+%if %{odc_build} == 1
+mkdir -p %{buildroot}%{_libdir}/nnfw/odc
+install -m 644 %{overlay_path}/lib/libluci*.so %{buildroot}%{_libdir}/nnfw/odc
+install -m 644 %{overlay_path}/lib/libloco*.so %{buildroot}%{_libdir}/nnfw/odc
+install -m 644 build/out/lib/nnfw/odc/*.so %{buildroot}%{_libdir}/nnfw/odc
+%endif # odc_build
+
+%if %{npud_build} == 1
+install -m 755 build/out/bin/npud %{buildroot}%{_bindir}
+
+%if %{test_build} == 1
+mkdir -p %{test_install_path}/npud-gtest
+install -m 755 build/out/npud-gtest/* %{test_install_path}/npud-gtest
+%endif # test_build
+
+%endif # npud_build
+
 %endif
 
 %post -p /sbin/ldconfig
@@ -181,14 +322,15 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
 %files
 %manifest %{name}.manifest
 %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 riscv64
 %{_libdir}/*.so
+%exclude %{_includedir}/CL/*
 %endif
 
 %files devel
 %manifest %{name}.manifest
 %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 riscv64
 %dir %{_includedir}/nnfw
 %{_includedir}/nnfw/*
 %{_libdir}/pkgconfig/nnfw.pc
@@ -197,13 +339,13 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
 %files plugin-devel
 %manifest %{name}.manifest
 %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 riscv64
 %dir %{_includedir}/onert
 %{_includedir}/onert/*
 %{_libdir}/pkgconfig/nnfw-plugin.pc
 %endif
 
-%ifarch arm armv7l aarch64
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 riscv64
 %files minimal-app
 %manifest %{name}.manifest
 %defattr(-,root,root,-)
@@ -214,12 +356,31 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
 %files test
 %manifest %{name}.manifest
 %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64
+%ifarch arm armv7l armv7hl aarch64 x86_64
 %dir %{test_install_home}
 %{test_install_home}/*
-%endif # arm armv7l aarch64
+%endif # arm armv7l armv7hl aarch64
 %endif # test_build
 
+%if %{npud_build} == 1
+%files npud
+%manifest %{name}.manifest
+%defattr(-,root,root,-)
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 riscv64
+%{_bindir}/npud
+%endif # arm armv7l armv7hl aarch64 x86_64 %ix86
+%endif # npud_build
+
+%if %{odc_build} == 1
+%files odc
+%manifest %{name}.manifest
+%defattr(-,root,root,-)
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86 riscv64
+%dir %{_libdir}/nnfw/odc
+%{_libdir}/nnfw/odc/*
+%endif # arm armv7l armv7hl aarch64 x86_64 %ix86
+%endif # odc_build
+
 %changelog
 * Thu Mar 15 2018 Chunseok Lee <chunseok.lee@samsung.com>
 - Initial spec file for nnfw
diff --git a/packaging/ruy.tar.gz b/packaging/ruy.tar.gz
deleted file mode 100644
index 98d1a1e33..000000000
--- a/packaging/ruy.tar.gz
+++ /dev/null
diff --git a/res/CircleRecipes/InstanceNorm_001/test.recipe b/res/CircleRecipes/InstanceNorm_001/test.recipe
new file mode 100644
index 000000000..ec647c36d
--- /dev/null
+++ b/res/CircleRecipes/InstanceNorm_001/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 4 }
+}
+operand {
+  name: "gamma"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "0.0123"
+    arg: "-0.3324"
+    arg: "0.2324"
+    arg: "-3.3360"
+  }
+}
+operand {
+  name: "beta"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "0.7023"
+    arg: "-0.3092"
+    arg: "0.7552"
+    arg: "0.2729"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 4 }
+}
+operation {
+  type: "InstanceNorm"
+  input: "ifm"
+  input: "gamma"
+  input: "beta"
+  output: "ofm"
+  instance_norm_options {
+    epsilon: 0.001
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/CircleRecipes/InstanceNorm_001/test.reverse b/res/CircleRecipes/InstanceNorm_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/CircleRecipes/InstanceNorm_001/test.reverse
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json b/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe b/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe
new file mode 100644
index 000000000..b9c2ab8c9
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe
@@ -0,0 +1,43 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operand {
+  name: "gamma"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "beta"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operation {
+  type: "InstanceNorm"
+  input: "ifm"
+  input: "gamma"
+  input: "beta"
+  output: "ofm"
+  instance_norm_options {
+    epsilon: 0.00001
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse b/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.rule b/res/CircleRecipes/Quant_InstanceNorm_000/test.rule
new file mode 100644
index 000000000..a17692d05
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "GAMMA_S16"             $(tensor_dtype gamma) '=' INT16
+RULE    "BETA_S16"              $(tensor_dtype beta) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json b/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe b/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe
new file mode 100644
index 000000000..b9c2ab8c9
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe
@@ -0,0 +1,43 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operand {
+  name: "gamma"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "beta"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operation {
+  type: "InstanceNorm"
+  input: "ifm"
+  input: "gamma"
+  input: "beta"
+  output: "ofm"
+  instance_norm_options {
+    epsilon: 0.00001
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse b/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.rule b/res/CircleRecipes/Quant_InstanceNorm_001/test.rule
new file mode 100644
index 000000000..e62dd4839
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "GAMMA_U8"               $(tensor_dtype gamma) '=' UINT8
+RULE    "BETA_U8"                $(tensor_dtype beta) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/CircleSchema/0.3/circle_schema.fbs b/res/CircleSchema/0.3/circle_schema.fbs
new file mode 100644
index 000000000..3972056f9
--- /dev/null
+++ b/res/CircleSchema/0.3/circle_schema.fbs
@@ -0,0 +1,1137 @@
+// Copyright (c) 2019~2020 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+//              `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+//              `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+
+enum BuiltinOperator : ubyte {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  BCQ_GATHER = 252,
+  BCQ_FULLY_CONNECTED = 253,
+  INSTANCE_NORM = 254,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  BCQGatherOptions = 252,
+  BCQFullyConnectedOptions = 253,
+  InstanceNormOptions = 254,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+  SHUFFLED16x1FLOAT32 = 127
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adjoint_lhs:bool;
+  adjoint_rhs:bool;
+}
+
+table BCQGatherOptions {
+  input_hidden_size: int;
+  axis: int;
+}
+
+table BCQFullyConnectedOptions {
+  weights_hidden_size: int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+  epsilon:float;
+  fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  builtin_code:BuiltinOperator;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+  // For 2D data, NHWC(batch, height, width, channels)
+  // For 3D data, NDHWC(batch, depth, height, width, channels)
+  CHANNELS_LAST = 0,
+  // For 2D data, NCHW(batch, channels, height, width)
+  // For 3D data, NCDHW(batch, channels, depth, height, width)
+  CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+
+  // Data format for input/output of SubGraph
+  data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+}
+
+root_type Model;
diff --git a/res/CircleSchema/0.4/circle_schema.fbs b/res/CircleSchema/0.4/circle_schema.fbs
new file mode 100644
index 000000000..8ad444d95
--- /dev/null
+++ b/res/CircleSchema/0.4/circle_schema.fbs
@@ -0,0 +1,1292 @@
+// Copyright (c) 2019~2022 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+//              `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+//              `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  BCQ_GATHER = -4,
+  BCQ_FULLY_CONNECTED = -3,
+  INSTANCE_NORM = -2,
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BCQGatherOptions = 252,
+  BCQFullyConnectedOptions = 253,
+  InstanceNormOptions = 254,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+  SHUFFLED16x1FLOAT32 = 127
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adjoint_lhs:bool;
+  adjoint_rhs:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: int;
+  seed2: int;
+}
+
+table BCQGatherOptions {
+  input_hidden_size: int;
+  axis: int;
+}
+
+table BCQFullyConnectedOptions {
+  weights_hidden_size: int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+  epsilon:float;
+  fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+  // For 2D data, NHWC(batch, height, width, channels)
+  // For 3D data, NDHWC(batch, depth, height, width, channels)
+  CHANNELS_LAST = 0,
+  // For 2D data, NCHW(batch, channels, height, width)
+  // For 3D data, NCDHW(batch, channels, depth, height, width)
+  CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+
+  // Data format for input/output of SubGraph
+  data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/CircleSchema/0.5/circle_schema.fbs b/res/CircleSchema/0.5/circle_schema.fbs
new file mode 100644
index 000000000..e0a0dc476
--- /dev/null
+++ b/res/CircleSchema/0.5/circle_schema.fbs
@@ -0,0 +1,1338 @@
+// Copyright (c) 2019~2023 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+//              `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+//              `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
+// Version 0.5: Base up to TensorFlow Lite v2.10.1 schema.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+  UINT16 = 16
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  BCQ_GATHER = -4,
+  BCQ_FULLY_CONNECTED = -3,
+  INSTANCE_NORM = -2,
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+  DYNAMIC_UPDATE_SLICE = 151,
+  RELU_0_TO_1 = 152,
+  UNSORTED_SEGMENT_PROD = 153,
+  UNSORTED_SEGMENT_MAX = 154,
+  UNSORTED_SEGMENT_SUM = 155,
+  ATAN2 = 156
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+  DynamicUpdateSliceOptions,
+  UnsortedSegmentProdOptions,
+  UnsortedSegmentMaxOptions,
+  UnsortedSegmentSumOptions,
+  ATan2Options,
+  BCQGatherOptions = 252,
+  BCQFullyConnectedOptions = 253,
+  InstanceNormOptions = 254,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+  SHUFFLED16x1FLOAT32 = 127
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adjoint_lhs:bool;
+  adjoint_rhs:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+table DynamicUpdateSliceOptions {
+}
+
+table UnsortedSegmentProdOptions {
+}
+
+table UnsortedSegmentMaxOptions {
+}
+
+table UnsortedSegmentSumOptions {
+}
+
+table ATan2Options {
+}
+
+table BCQGatherOptions {
+  input_hidden_size: int;
+  axis: int;
+}
+
+table BCQFullyConnectedOptions {
+  weights_hidden_size: int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+  epsilon:float;
+  fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+  // For 2D data, NHWC(batch, height, width, channels)
+  // For 3D data, NDHWC(batch, depth, height, width, channels)
+  CHANNELS_LAST = 0,
+  // For 2D data, NCHW(batch, channels, height, width)
+  // For 3D data, NCDHW(batch, channels, depth, height, width)
+  CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+
+  // Data format for input/output of SubGraph
+  data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/CircleSchema/0.6/circle_schema.fbs b/res/CircleSchema/0.6/circle_schema.fbs
new file mode 100644
index 000000000..cdc10361b
--- /dev/null
+++ b/res/CircleSchema/0.6/circle_schema.fbs
@@ -0,0 +1,1388 @@
+// Copyright (c) 2019~2023 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+//              `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+//              `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
+// Version 0.5: Base up to TensorFlow Lite v2.10.1 schema.
+// Version 0.6: Base up to TensorFlow Lite v2.13.0 schema.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+  UINT16 = 16,
+  INT4 = 17,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+// The nested tensor type for VARIANT type.
+table VariantSubType {
+  // The tensor shape.
+  shape:[int];
+  type:TensorType;
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+
+  // The nested Tensor types for VARIANT type. This is always empty for
+  // non-VARIANT types. This is optional because the nested type can be omitted.
+  // Currently only 1 subtype is supported. The field is defined as an array for
+  // flexibility of supporting multiple subtypes in the future.
+  variant_tensors:[VariantSubType];
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  BCQ_GATHER = -4,
+  BCQ_FULLY_CONNECTED = -3,
+  INSTANCE_NORM = -2,
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+  DYNAMIC_UPDATE_SLICE = 151,
+  RELU_0_TO_1 = 152,
+  UNSORTED_SEGMENT_PROD = 153,
+  UNSORTED_SEGMENT_MAX = 154,
+  UNSORTED_SEGMENT_SUM = 155,
+  ATAN2 = 156,
+  UNSORTED_SEGMENT_MIN = 157,
+  SIGN = 158,
+  BITCAST = 159,
+  BITWISE_XOR = 160,
+  RIGHT_SHIFT = 161,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+  DynamicUpdateSliceOptions,
+  UnsortedSegmentProdOptions,
+  UnsortedSegmentMaxOptions,
+  UnsortedSegmentMinOptions,
+  UnsortedSegmentSumOptions,
+  ATan2Options,
+  SignOptions,
+  BitcastOptions,
+  BitwiseXorOptions,
+  RightShiftOptions,
+  BCQGatherOptions = 252,
+  BCQFullyConnectedOptions = 253,
+  InstanceNormOptions = 254,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+  SHUFFLED16x1FLOAT32 = 127
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 3.
+  asymmetric_quantize_inputs:bool;
+
+  // Parameter for unidirectional sequence RNN version 4.
+  diagonal_recurrent_tensors:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  // Parameters supported by version 1, 2, 3:
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+
+  // Parameters supported by version 4:
+  fused_activation_function:ActivationFunctionType = NONE;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adjoint_lhs:bool;
+  adjoint_rhs:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+table DynamicUpdateSliceOptions {
+}
+
+table UnsortedSegmentProdOptions {
+}
+
+table UnsortedSegmentMaxOptions {
+}
+
+table UnsortedSegmentSumOptions {
+}
+
+table ATan2Options {
+}
+
+table UnsortedSegmentMinOptions{
+}
+
+table SignOptions {
+}
+
+table BitcastOptions {
+}
+
+table BitwiseXorOptions {
+}
+
+table RightShiftOptions {
+}
+
+table BCQGatherOptions {
+  input_hidden_size: int;
+  axis: int;
+}
+
+table BCQFullyConnectedOptions {
+  weights_hidden_size: int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+  epsilon:float;
+  fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+  // For 2D data, NHWC(batch, height, width, channels)
+  // For 3D data, NDHWC(batch, depth, height, width, channels)
+  CHANNELS_LAST = 0,
+  // For 2D data, NCHW(batch, channels, height, width)
+  // For 3D data, NCDHW(batch, channels, depth, height, width)
+  CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+
+  // Data format for input/output of SubGraph
+  data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/PyTorchExamples/.gitignore b/res/PyTorchExamples/.gitignore
new file mode 100644
index 000000000..77d26fcd8
--- /dev/null
+++ b/res/PyTorchExamples/.gitignore
@@ -0,0 +1,2 @@
+output/
+__pycache__
diff --git a/res/PyTorchExamples/README.md b/res/PyTorchExamples/README.md
new file mode 100644
index 000000000..4eb2520bb
--- /dev/null
+++ b/res/PyTorchExamples/README.md
@@ -0,0 +1,41 @@
+# PyTorch examples
+
+Python example to convert PyTorch to ONNX/TF/tflite models
+
+## Package versions
+
+- Python 3.X
+- torch==1.7.0
+- onnx==1.7.0
+- onnx-tf==1.6.0 (see note)
+- tensorflow-cpu==2.3.0
+- tensorflow-addons
+
+note: please install `onnx-tf` from master branch to use TensorFlow 2.x
+downloaded from https://github.com/onnx/onnx-tensorflow
+
+## Directory Layout
+
+```
+tpem.py     <- PyThorch Example Manager
+examples/
+  [EXAMPLE NAME]/
+    __init__.py
+```
+
+## Folder naming convention
+
+Follow python API name
+
+## HOWTO: Generate a tflite from examples
+
+```
+$ python3 ptem.py [EXAMPLE NAME 1] [EXANMPE NAME 2] ...
+```
+
+## HOWTO: Add a new example
+
+- create a folder name same as python API name
+- add `__init__.py` file
+- set `_model_` variable holding model of the network containing the operator
+- set `_dummy_` variable holding a dummy input for generating ONNX file
diff --git a/res/PyTorchExamples/examples/AdaptiveAvgPool2d/__init__.py b/res/PyTorchExamples/examples/AdaptiveAvgPool2d/__init__.py
new file mode 100644
index 000000000..6741e5e80
--- /dev/null
+++ b/res/PyTorchExamples/examples/AdaptiveAvgPool2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_AdaptiveAvgPool2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.AdaptiveAvgPool2d(1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_AdaptiveAvgPool2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/AdaptiveMaxPool2d/__init__.py b/res/PyTorchExamples/examples/AdaptiveMaxPool2d/__init__.py
new file mode 100644
index 000000000..58ccaf93d
--- /dev/null
+++ b/res/PyTorchExamples/examples/AdaptiveMaxPool2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_AdaptiveMaxPool2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.AdaptiveMaxPool2d(1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_AdaptiveMaxPool2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/AvgPool2d-1/__init__.py b/res/PyTorchExamples/examples/AvgPool2d-1/__init__.py
new file mode 100644
index 000000000..c3e5bb668
--- /dev/null
+++ b/res/PyTorchExamples/examples/AvgPool2d-1/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_AvgPool2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.AvgPool2d(kernel_size=2)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_AvgPool2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/AvgPool2d/__init__.py b/res/PyTorchExamples/examples/AvgPool2d/__init__.py
new file mode 100644
index 000000000..f26521f21
--- /dev/null
+++ b/res/PyTorchExamples/examples/AvgPool2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_AvgPool2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.AvgPool2d(1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_AvgPool2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/BatchNorm2d/__init__.py b/res/PyTorchExamples/examples/BatchNorm2d/__init__.py
new file mode 100644
index 000000000..e078e63f6
--- /dev/null
+++ b/res/PyTorchExamples/examples/BatchNorm2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_BatchNorm2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.BatchNorm2d(2)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_BatchNorm2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py b/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py
new file mode 100644
index 000000000..cecc9c88a
--- /dev/null
+++ b/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py
@@ -0,0 +1,49 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model equivalent to tensorflow batch_to_space, but with channels first layout
+class net_BatchToSpaceND(nn.Module):
+    def __init__(self, block_shape, crop):
+        super().__init__()
+        self.block_shape = block_shape
+        self.crop = crop
+
+    def forward(self, input):
+        # Prepare attributes
+        input_shape = list(map(int, list(input.shape)))
+        block_shape = self.block_shape
+        crop = self.crop
+
+        # number of spatial dimensions
+        m = len(block_shape)
+        # rest of dimensions
+        n = len(input.shape) - m
+        # output batch size
+        batch_size = input_shape[0] // np.product(block_shape)
+
+        unfolded_shape = list(block_shape) + [batch_size] + input_shape[1:]
+        fold_shape = [batch_size] + input_shape[1:n] + [
+            input_shape[i + n] * block_shape[i] for i in range(m)
+        ]
+        permute_dims = list(range(
+            m, m + n)) + [i + mod for i in range(m) for mod in [n + m, 0]]
+
+        # Actual model starts here
+        unfolded_input = input.reshape(unfolded_shape)
+        permuted = torch.permute(unfolded_input, permute_dims)
+        full_output = permuted.reshape(fold_shape)
+        # crop output tensor
+        crop_output = full_output
+        for i in range(m):
+            crop_size = sum(crop[i])
+            crop_output = crop_output.narrow(i + n, crop[i][0],
+                                             fold_shape[i + n] - crop_size)
+        return crop_output
+
+
+_model_ = net_BatchToSpaceND([2, 2], [[1, 0], [0, 1]])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(8, 4, 3, 3)
diff --git a/res/PyTorchExamples/examples/Bilinear/__init__.py b/res/PyTorchExamples/examples/Bilinear/__init__.py
new file mode 100644
index 000000000..cd5c30a73
--- /dev/null
+++ b/res/PyTorchExamples/examples/Bilinear/__init__.py
@@ -0,0 +1,20 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Bilinear(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Bilinear(20, 30, 40)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_Bilinear()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(128, 20), torch.randn(128, 30)]
+
+# Note: this model has problem when exporting to ONNX
diff --git a/res/PyTorchExamples/examples/ConstantPad2d-1/__init__.py b/res/PyTorchExamples/examples/ConstantPad2d-1/__init__.py
new file mode 100644
index 000000000..397bdecb0
--- /dev/null
+++ b/res/PyTorchExamples/examples/ConstantPad2d-1/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ConstantPad2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.ConstantPad2d(0, 1.5)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_ConstantPad2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/ConstantPad2d-2/__init__.py b/res/PyTorchExamples/examples/ConstantPad2d-2/__init__.py
new file mode 100644
index 000000000..d76b8ea80
--- /dev/null
+++ b/res/PyTorchExamples/examples/ConstantPad2d-2/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ConstantPad2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.ConstantPad2d((0, 0, 0, 0), 0.0)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_ConstantPad2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/ConstantPad2d-3/__init__.py b/res/PyTorchExamples/examples/ConstantPad2d-3/__init__.py
new file mode 100644
index 000000000..901d875b7
--- /dev/null
+++ b/res/PyTorchExamples/examples/ConstantPad2d-3/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ConstantPad2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.ConstantPad2d((1, 1, 1, 1), 0.0)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_ConstantPad2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/ConstantPad2d/__init__.py b/res/PyTorchExamples/examples/ConstantPad2d/__init__.py
new file mode 100644
index 000000000..9e9840aba
--- /dev/null
+++ b/res/PyTorchExamples/examples/ConstantPad2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ConstantPad2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.ConstantPad2d(0, 0.0)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_ConstantPad2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/Conv2d-dil/__init__.py b/res/PyTorchExamples/examples/Conv2d-dil/__init__.py
new file mode 100644
index 000000000..266ccfa93
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d-dil/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2dDil(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Conv2d(2, 2, 1, dilation=2)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Conv2dDil()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/Conv2d-dw/__init__.py b/res/PyTorchExamples/examples/Conv2d-dw/__init__.py
new file mode 100644
index 000000000..c31dbbdfc
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d-dw/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2dDW(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Conv2d(2, 2, 1, groups=2)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Conv2dDW()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/Conv2d-pad/__init__.py b/res/PyTorchExamples/examples/Conv2d-pad/__init__.py
new file mode 100644
index 000000000..4c2b45e18
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d-pad/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Conv2d(1, 1, 1, padding=(1, 0))
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Conv2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5, 17)
diff --git a/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py b/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py
new file mode 100644
index 000000000..235015ce2
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+
+
+# model representing YUVtoRGB conversion
+# for details see https://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB
+class net_Conv2dYUVtoRGB(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Conv2d(3, 3, 1, bias=False)
+        raw_weights = [[1.0, 0.0, 1.13983], \
+                       [1.0, -0.39465, -0.58060], \
+                       [1.0, 2.03211, 0.0]]
+        weights = torch.Tensor(raw_weights).reshape(3, 3, 1, 1)
+        self.op.weight = weight = torch.nn.Parameter(weights, requires_grad=False)
+
+    def forward(self, input):
+        return torch.clamp(self.op(input), 0.0, 1.0)
+
+
+_model_ = net_Conv2dYUVtoRGB()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 3, 4, 4)
diff --git a/res/PyTorchExamples/examples/Conv2d/__init__.py b/res/PyTorchExamples/examples/Conv2d/__init__.py
new file mode 100644
index 000000000..20516a35a
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Conv2d(2, 2, 1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Conv2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/ConvTranspose2d/__init__.py b/res/PyTorchExamples/examples/ConvTranspose2d/__init__.py
new file mode 100644
index 000000000..17b1b4214
--- /dev/null
+++ b/res/PyTorchExamples/examples/ConvTranspose2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ConvTranspose2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.ConvTranspose2d(2, 2, 1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_ConvTranspose2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/ELU/__init__.py b/res/PyTorchExamples/examples/ELU/__init__.py
new file mode 100644
index 000000000..3cc9c9616
--- /dev/null
+++ b/res/PyTorchExamples/examples/ELU/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ELU(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.ELU()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_ELU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/Flatten/__init__.py b/res/PyTorchExamples/examples/Flatten/__init__.py
new file mode 100644
index 000000000..92bd2d3a7
--- /dev/null
+++ b/res/PyTorchExamples/examples/Flatten/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Flatten(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Flatten()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Flatten()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/InstanceNorm2d/__init__.py b/res/PyTorchExamples/examples/InstanceNorm2d/__init__.py
new file mode 100644
index 000000000..29e8fe703
--- /dev/null
+++ b/res/PyTorchExamples/examples/InstanceNorm2d/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_InstanceNorm2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.InstanceNorm2d(2)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_InstanceNorm2d()
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/LPPool2d/__init__.py b/res/PyTorchExamples/examples/LPPool2d/__init__.py
new file mode 100644
index 000000000..1e3d3f4a8
--- /dev/null
+++ b/res/PyTorchExamples/examples/LPPool2d/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_LPPool2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LPPool2d(norm_type=2, kernel_size=1, stride=1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_LPPool2d()
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/LSTM-bi/__init__.py b/res/PyTorchExamples/examples/LSTM-bi/__init__.py
new file mode 100644
index 000000000..6f5cea469
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-bi/__init__.py
@@ -0,0 +1,28 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 5
+_batch_size = 3
+_input_size = 10
+_hidden_size = 20
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LSTM(_input_size, _hidden_size, _number_layers, bidirectional=True)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], (inputs[1], inputs[2]))
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_length, _batch_size, _input_size),
+    torch.randn(_number_layers * 2, _batch_size, _hidden_size),
+    torch.randn(_number_layers * 2, _batch_size, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/LSTM-nobias/__init__.py b/res/PyTorchExamples/examples/LSTM-nobias/__init__.py
new file mode 100644
index 000000000..d64704ae4
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-nobias/__init__.py
@@ -0,0 +1,28 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 2
+_batch_size = 5
+_input_size = 15
+_hidden_size = 10
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LSTM(_input_size, _hidden_size, _number_layers, bias=False)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], (inputs[1], inputs[2]))
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_length, _batch_size, _input_size),
+    torch.randn(_number_layers, _batch_size, _hidden_size),
+    torch.randn(_number_layers, _batch_size, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/LSTM-noinit/__init__.py b/res/PyTorchExamples/examples/LSTM-noinit/__init__.py
new file mode 100644
index 000000000..7aa79d6d6
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-noinit/__init__.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 1
+_batch_size = 5
+_input_size = 8
+_hidden_size = 10
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LSTM(_input_size, _hidden_size, _number_layers)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(_seq_length, _batch_size, _input_size)
diff --git a/res/PyTorchExamples/examples/LSTM/__init__.py b/res/PyTorchExamples/examples/LSTM/__init__.py
new file mode 100644
index 000000000..69a80e6dd
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM/__init__.py
@@ -0,0 +1,20 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_LSTM(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LSTM(10, 20, 1)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], (inputs[1], inputs[2]))
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(5, 3, 10), torch.randn(1, 3, 20), torch.randn(1, 3, 20)]
+
+# Note: this model has problem when converting ONNX to TensorFlow
diff --git a/res/PyTorchExamples/examples/LeakyReLU/__init__.py b/res/PyTorchExamples/examples/LeakyReLU/__init__.py
new file mode 100644
index 000000000..c7a7dd82d
--- /dev/null
+++ b/res/PyTorchExamples/examples/LeakyReLU/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_LeakyReLU(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LeakyReLU()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_LeakyReLU()
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/Linear/__init__.py b/res/PyTorchExamples/examples/Linear/__init__.py
new file mode 100644
index 000000000..b6f4553bd
--- /dev/null
+++ b/res/PyTorchExamples/examples/Linear/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Linear(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Linear(3, 6)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Linear()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/LocalResponseNorm/__init__.py b/res/PyTorchExamples/examples/LocalResponseNorm/__init__.py
new file mode 100644
index 000000000..2eea39e5e
--- /dev/null
+++ b/res/PyTorchExamples/examples/LocalResponseNorm/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_LocalResponseNorm(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LocalResponseNorm(1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_LocalResponseNorm()
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 4, 4)
diff --git a/res/PyTorchExamples/examples/LogSoftmax/__init__.py b/res/PyTorchExamples/examples/LogSoftmax/__init__.py
new file mode 100644
index 000000000..2aa6f3341
--- /dev/null
+++ b/res/PyTorchExamples/examples/LogSoftmax/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_LogSoftmax(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LogSoftmax()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_LogSoftmax()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py b/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py
new file mode 100644
index 000000000..4225cd6c1
--- /dev/null
+++ b/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_MaxPool2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.MaxPool2d(3, stride=1, return_indices=True)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_MaxPool2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 4, 4)
diff --git a/res/PyTorchExamples/examples/MaxPool2d/__init__.py b/res/PyTorchExamples/examples/MaxPool2d/__init__.py
new file mode 100644
index 000000000..d5a9cfa82
--- /dev/null
+++ b/res/PyTorchExamples/examples/MaxPool2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_MaxPool2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.MaxPool2d(1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_MaxPool2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/PReLU/__init__.py b/res/PyTorchExamples/examples/PReLU/__init__.py
new file mode 100644
index 000000000..6c0f7a2d7
--- /dev/null
+++ b/res/PyTorchExamples/examples/PReLU/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_PReLU(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.PReLU()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_PReLU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/PReLUwConv1d/__init__.py b/res/PyTorchExamples/examples/PReLUwConv1d/__init__.py
new file mode 100644
index 000000000..b2aed98b8
--- /dev/null
+++ b/res/PyTorchExamples/examples/PReLUwConv1d/__init__.py
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv1dPReLU(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op1 = nn.Conv1d(1, 1, 1)
+        self.op2 = nn.PReLU()
+
+    def forward(self, input):
+        return self.op2(self.op1(input))
+
+
+_model_ = net_Conv1dPReLU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5)
diff --git a/res/PyTorchExamples/examples/PReLUwConv2d/__init__.py b/res/PyTorchExamples/examples/PReLUwConv2d/__init__.py
new file mode 100644
index 000000000..c53807115
--- /dev/null
+++ b/res/PyTorchExamples/examples/PReLUwConv2d/__init__.py
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2dPReLU(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op1 = nn.Conv2d(1, 1, 1)
+        self.op2 = nn.PReLU()
+
+    def forward(self, input):
+        return self.op2(self.op1(input))
+
+
+_model_ = net_Conv2dPReLU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5, 5)
diff --git a/res/PyTorchExamples/examples/PixelShuffle/__init__.py b/res/PyTorchExamples/examples/PixelShuffle/__init__.py
new file mode 100644
index 000000000..14374ce11
--- /dev/null
+++ b/res/PyTorchExamples/examples/PixelShuffle/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_PixelShuffle(nn.Module):
+    def __init__(self, upscale_factor):
+        super().__init__()
+        self.op = torch.nn.PixelShuffle(upscale_factor)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_PixelShuffle(2)
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 8, 3, 3)
diff --git a/res/PyTorchExamples/examples/RNN-bi/__init__.py b/res/PyTorchExamples/examples/RNN-bi/__init__.py
new file mode 100644
index 000000000..86f6e4fc0
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-bi/__init__.py
@@ -0,0 +1,27 @@
+import torch
+import torch.nn as nn
+
+_input_size = 3
+_seq_len = 2
+_batch = 2
+_hidden_size = 5
+_num_layers = 2
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(_input_size, _hidden_size, _num_layers, bidirectional=True)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_len, _batch, _input_size),
+    torch.randn(2 * _num_layers, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/RNN-nobias/__init__.py b/res/PyTorchExamples/examples/RNN-nobias/__init__.py
new file mode 100644
index 000000000..a6a314877
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-nobias/__init__.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(_input_size, _hidden_size, 1, bias=False)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_len, _batch, _input_size),
+    torch.randn(1, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/RNN-noinit/__init__.py b/res/PyTorchExamples/examples/RNN-noinit/__init__.py
new file mode 100644
index 000000000..492c2d0ed
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-noinit/__init__.py
@@ -0,0 +1,23 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(_input_size, _hidden_size, 1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(_seq_len, _batch, _input_size)
diff --git a/res/PyTorchExamples/examples/RNN-relu/__init__.py b/res/PyTorchExamples/examples/RNN-relu/__init__.py
new file mode 100644
index 000000000..c59c42192
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-relu/__init__.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(_input_size, _hidden_size, 1, nonlinearity='relu')
+
+    def forward(self, inputs):
+        return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_len, _batch, _input_size),
+    torch.randn(1, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/RNN/__init__.py b/res/PyTorchExamples/examples/RNN/__init__.py
new file mode 100644
index 000000000..ed6e5e4a8
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN/__init__.py
@@ -0,0 +1,20 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(2, 2, 1)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(2, 2, 2), torch.randn(1, 2, 2)]
+
+# Note: this model has problem when converting ONNX to TensorFlow
diff --git a/res/PyTorchExamples/examples/ReLU/__init__.py b/res/PyTorchExamples/examples/ReLU/__init__.py
new file mode 100644
index 000000000..0278cad2d
--- /dev/null
+++ b/res/PyTorchExamples/examples/ReLU/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ReLU(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.ReLU()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_ReLU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/ReLU6/__init__.py b/res/PyTorchExamples/examples/ReLU6/__init__.py
new file mode 100644
index 000000000..28dd47377
--- /dev/null
+++ b/res/PyTorchExamples/examples/ReLU6/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ReLU6(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.ReLU6()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_ReLU6()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/Sigmoid/__init__.py b/res/PyTorchExamples/examples/Sigmoid/__init__.py
new file mode 100644
index 000000000..a34705e03
--- /dev/null
+++ b/res/PyTorchExamples/examples/Sigmoid/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Sigmoid(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Sigmoid()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Sigmoid()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/Softmax/__init__.py b/res/PyTorchExamples/examples/Softmax/__init__.py
new file mode 100644
index 000000000..fec589d46
--- /dev/null
+++ b/res/PyTorchExamples/examples/Softmax/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Softmax(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Softmax()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Softmax()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py b/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py
new file mode 100644
index 000000000..78d57fd66
--- /dev/null
+++ b/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py
@@ -0,0 +1,49 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model equivalent to tensorflow space_to_batch, but with channels first layout
+class net_SpaceToBatchND(nn.Module):
+    def __init__(self, block_shape, pad):
+        super().__init__()
+        self.block_shape = block_shape
+        self.pad = pad
+
+    def forward(self, input):
+        # Prepare attributes
+        input_shape = list(map(int, list(input.shape)))
+        block_shape = self.block_shape
+        pad = self.pad
+
+        # number of spatial dimensions
+        m = len(block_shape)
+        # rest of dimensions
+        n = len(input.shape) - m
+        # output batch size
+        batch_size = input_shape[0]
+
+        out_spatial_dim = [
+            (input_shape[i + n] + pad[i * 2] + pad[i * 2 + 1]) // block_shape[i]
+            for i in range(m)
+        ]
+        unfolded_shape = [batch_size] + input_shape[1:n] + [
+            dim for i in range(m) for dim in [out_spatial_dim[i], block_shape[i]]
+        ]
+        fold_shape = [batch_size * np.prod(block_shape)
+                      ] + input_shape[1:n] + out_spatial_dim
+        permute_dims = list(range(n + 1, n + 2 * m, 2)) + list(range(n)) + list(
+            range(n, n + 2 * m, 2))
+
+        # Actual model starts here
+        padded_input = torch.nn.functional.pad(input, pad)
+        unfolded_input = padded_input.reshape(unfolded_shape)
+        permuted = torch.permute(unfolded_input, permute_dims)
+        output = permuted.reshape(fold_shape)
+        return output
+
+
+_model_ = net_SpaceToBatchND([2, 2], [1, 0, 0, 1])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(2, 4, 5, 5)
diff --git a/res/PyTorchExamples/examples/SpaceToDepth/__init__.py b/res/PyTorchExamples/examples/SpaceToDepth/__init__.py
new file mode 100644
index 000000000..62b225ddb
--- /dev/null
+++ b/res/PyTorchExamples/examples/SpaceToDepth/__init__.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model, equivalent to torch.pixel_unshuffle from torch 1.9+
+class net_SpaceToDepth(nn.Module):
+    def __init__(self, block_size):
+        super().__init__()
+        self.block_size = block_size
+
+    def forward(self, input):
+        # Prepare attributes
+        b_size = self.block_size
+        batch, input_c, input_h, input_w = list(map(int, list(input.shape)))
+        out_c = input_c * b_size * b_size
+        out_h = input_h // b_size
+        out_w = input_w // b_size
+
+        # Actual model starts here
+        x = input.reshape(batch, input_c, out_h, b_size, out_w, b_size)
+        x = x.permute([0, 1, 3, 5, 2, 4])
+        x = x.reshape([batch, out_c, out_h, out_w])
+        return x
+
+
+_model_ = net_SpaceToDepth(2)
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 6, 6)
diff --git a/res/PyTorchExamples/examples/Tanh/__init__.py b/res/PyTorchExamples/examples/Tanh/__init__.py
new file mode 100644
index 000000000..76b46298a
--- /dev/null
+++ b/res/PyTorchExamples/examples/Tanh/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Tanh(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Tanh()
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Tanh()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/UpsamplingNearest2d/__init__.py b/res/PyTorchExamples/examples/UpsamplingNearest2d/__init__.py
new file mode 100644
index 000000000..fe065c60d
--- /dev/null
+++ b/res/PyTorchExamples/examples/UpsamplingNearest2d/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_UpsamplingNearest2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.UpsamplingNearest2d(2)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_UpsamplingNearest2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 4, 4)
diff --git a/res/PyTorchExamples/examples/abs/__init__.py b/res/PyTorchExamples/examples/abs/__init__.py
new file mode 100644
index 000000000..498d85133
--- /dev/null
+++ b/res/PyTorchExamples/examples/abs/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_abs(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.abs(input)
+
+
+_model_ = net_abs()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/add/__init__.py b/res/PyTorchExamples/examples/add/__init__.py
new file mode 100644
index 000000000..25f4592cd
--- /dev/null
+++ b/res/PyTorchExamples/examples/add/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_add(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.add(inputs[0], inputs[1])
+
+
+_model_ = net_add()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/argmax/__init__.py b/res/PyTorchExamples/examples/argmax/__init__.py
new file mode 100644
index 000000000..47eabe54b
--- /dev/null
+++ b/res/PyTorchExamples/examples/argmax/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_argmax(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.argmax(input)
+
+
+_model_ = net_argmax()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/argmin/__init__.py b/res/PyTorchExamples/examples/argmin/__init__.py
new file mode 100644
index 000000000..d2ce2a139
--- /dev/null
+++ b/res/PyTorchExamples/examples/argmin/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_argmin(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.argmin(input)
+
+
+_model_ = net_argmin()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/cat-1/__init__.py b/res/PyTorchExamples/examples/cat-1/__init__.py
new file mode 100644
index 000000000..f6a29b921
--- /dev/null
+++ b/res/PyTorchExamples/examples/cat-1/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_cat(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.cat(inputs, dim=1)
+
+
+_model_ = net_cat()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/cat/__init__.py b/res/PyTorchExamples/examples/cat/__init__.py
new file mode 100644
index 000000000..beed6d062
--- /dev/null
+++ b/res/PyTorchExamples/examples/cat/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_cat(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.cat(inputs)
+
+
+_model_ = net_cat()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/clamp/__init__.py b/res/PyTorchExamples/examples/clamp/__init__.py
new file mode 100644
index 000000000..92b72864f
--- /dev/null
+++ b/res/PyTorchExamples/examples/clamp/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_clamp(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.clamp(input, 0, 10)
+
+
+_model_ = net_clamp()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/cos/__init__.py b/res/PyTorchExamples/examples/cos/__init__.py
new file mode 100644
index 000000000..4afac71af
--- /dev/null
+++ b/res/PyTorchExamples/examples/cos/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_cos(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.cos(input)
+
+
+_model_ = net_cos()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/dist/__init__.py b/res/PyTorchExamples/examples/dist/__init__.py
new file mode 100644
index 000000000..8f1d4fd3c
--- /dev/null
+++ b/res/PyTorchExamples/examples/dist/__init__.py
@@ -0,0 +1,16 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_linarg_norm(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.dist(inputs[0], inputs[1])
+
+
+_model_ = net_linarg_norm()
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/div/__init__.py b/res/PyTorchExamples/examples/div/__init__.py
new file mode 100644
index 000000000..b94a5d9ab
--- /dev/null
+++ b/res/PyTorchExamples/examples/div/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_div(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.div(inputs[0], inputs[1])
+
+
+_model_ = net_div()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/floor/__init__.py b/res/PyTorchExamples/examples/floor/__init__.py
new file mode 100644
index 000000000..1932a5eba
--- /dev/null
+++ b/res/PyTorchExamples/examples/floor/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_floor(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.floor(input)
+
+
+_model_ = net_floor()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/floor_divide/__init__.py b/res/PyTorchExamples/examples/floor_divide/__init__.py
new file mode 100644
index 000000000..71adba1e0
--- /dev/null
+++ b/res/PyTorchExamples/examples/floor_divide/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_floor(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.floor_divide(inputs[0], inputs[1])
+
+
+_model_ = net_floor()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/ge/__init__.py b/res/PyTorchExamples/examples/ge/__init__.py
new file mode 100644
index 000000000..609463a65
--- /dev/null
+++ b/res/PyTorchExamples/examples/ge/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ge(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.ge(inputs[0], inputs[1])
+
+
+_model_ = net_ge()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/gt/__init__.py b/res/PyTorchExamples/examples/gt/__init__.py
new file mode 100644
index 000000000..594f09e81
--- /dev/null
+++ b/res/PyTorchExamples/examples/gt/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_gt(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.gt(inputs[0], inputs[1])
+
+
+_model_ = net_gt()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/interpolate/__init__.py b/res/PyTorchExamples/examples/interpolate/__init__.py
new file mode 100644
index 000000000..ba0da42f4
--- /dev/null
+++ b/res/PyTorchExamples/examples/interpolate/__init__.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+
+
+# model
+#
+# Notes:
+# - This operation requires minimum 11 onnx opset version
+# - tf_onnx 1.9 fails to convert this model using opcode version 13+, because unsqueeze operation is not supported yet
+class net_interpolate(nn.Module):
+    def __init__(self, scale_factor):
+        super().__init__()
+        self.scale_factor = scale_factor
+
+    def forward(self, input):
+        return torch.nn.functional.interpolate(
+            input,
+            scale_factor=self.scale_factor,
+            mode='bilinear',
+            align_corners=True,
+            recompute_scale_factor=True)
+
+    def onnx_opset_version(self):
+        return 11
+
+
+_model_ = net_interpolate([2, 2])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/le/__init__.py b/res/PyTorchExamples/examples/le/__init__.py
new file mode 100644
index 000000000..abe5c0007
--- /dev/null
+++ b/res/PyTorchExamples/examples/le/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_le(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.le(inputs[0], inputs[1])
+
+
+_model_ = net_le()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/log/__init__.py b/res/PyTorchExamples/examples/log/__init__.py
new file mode 100644
index 000000000..da071959f
--- /dev/null
+++ b/res/PyTorchExamples/examples/log/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_log(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.log(input)
+
+
+_model_ = net_log()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/logical_and/__init__.py b/res/PyTorchExamples/examples/logical_and/__init__.py
new file mode 100644
index 000000000..9e7d85848
--- /dev/null
+++ b/res/PyTorchExamples/examples/logical_and/__init__.py
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_logical_and(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.logical_and(inputs[0], inputs[1])
+
+
+_model_ = net_logical_and()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3).bool(), torch.randn(1, 2, 3, 3).bool()]
+
+# Note: this model has problem when exporting to ONNX
diff --git a/res/PyTorchExamples/examples/logical_or/__init__.py b/res/PyTorchExamples/examples/logical_or/__init__.py
new file mode 100644
index 000000000..fd242df90
--- /dev/null
+++ b/res/PyTorchExamples/examples/logical_or/__init__.py
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_logical_or(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.logical_or(inputs[0], inputs[1])
+
+
+_model_ = net_logical_or()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3).bool(), torch.randn(1, 2, 3, 3).bool()]
+
+# Note: this model has problem when exporting to ONNX
diff --git a/res/PyTorchExamples/examples/logical_xor/__init__.py b/res/PyTorchExamples/examples/logical_xor/__init__.py
new file mode 100644
index 000000000..ee4bbc7bf
--- /dev/null
+++ b/res/PyTorchExamples/examples/logical_xor/__init__.py
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_logical_xor(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.logical_xor(inputs[0], inputs[1])
+
+
+_model_ = net_logical_xor()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3).bool(), torch.randn(1, 2, 3, 3).bool()]
+
+# Note: this model has problem when exporting to ONNX
diff --git a/res/PyTorchExamples/examples/lt/__init__.py b/res/PyTorchExamples/examples/lt/__init__.py
new file mode 100644
index 000000000..e6dd4222e
--- /dev/null
+++ b/res/PyTorchExamples/examples/lt/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_lt(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.lt(inputs[0], inputs[1])
+
+
+_model_ = net_lt()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/matmul/__init__.py b/res/PyTorchExamples/examples/matmul/__init__.py
new file mode 100644
index 000000000..0a41d3da0
--- /dev/null
+++ b/res/PyTorchExamples/examples/matmul/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_matmul(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.matmul(inputs[0], inputs[1])
+
+
+_model_ = net_matmul()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/min-1/__init__.py b/res/PyTorchExamples/examples/min-1/__init__.py
new file mode 100644
index 000000000..1be38fd8e
--- /dev/null
+++ b/res/PyTorchExamples/examples/min-1/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_min(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.min(input, 0, True)
+
+
+_model_ = net_min()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/min/__init__.py b/res/PyTorchExamples/examples/min/__init__.py
new file mode 100644
index 000000000..21f75fedc
--- /dev/null
+++ b/res/PyTorchExamples/examples/min/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_min(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.min(input)
+
+
+_model_ = net_min()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/mul/__init__.py b/res/PyTorchExamples/examples/mul/__init__.py
new file mode 100644
index 000000000..a7a2db92f
--- /dev/null
+++ b/res/PyTorchExamples/examples/mul/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_mul(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.mul(inputs[0], inputs[1])
+
+
+_model_ = net_mul()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/ne/__init__.py b/res/PyTorchExamples/examples/ne/__init__.py
new file mode 100644
index 000000000..0c339cbad
--- /dev/null
+++ b/res/PyTorchExamples/examples/ne/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_ne(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.ne(inputs[0], inputs[1])
+
+
+_model_ = net_ne()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/neg/__init__.py b/res/PyTorchExamples/examples/neg/__init__.py
new file mode 100644
index 000000000..bde1c0628
--- /dev/null
+++ b/res/PyTorchExamples/examples/neg/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_neg(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.neg(input)
+
+
+_model_ = net_neg()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/normalize/__init__.py b/res/PyTorchExamples/examples/normalize/__init__.py
new file mode 100644
index 000000000..288353ab4
--- /dev/null
+++ b/res/PyTorchExamples/examples/normalize/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_normalize(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.nn.functional.normalize(input, p=2.0, dim=3, eps=1e-12)
+
+
+_model_ = net_normalize()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/permute/__init__.py b/res/PyTorchExamples/examples/permute/__init__.py
new file mode 100644
index 000000000..e8e6abe51
--- /dev/null
+++ b/res/PyTorchExamples/examples/permute/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_permute(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return input.permute(3, 0, 2, 1)
+
+
+_model_ = net_permute()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/pow/__init__.py b/res/PyTorchExamples/examples/pow/__init__.py
new file mode 100644
index 000000000..6781ba997
--- /dev/null
+++ b/res/PyTorchExamples/examples/pow/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_pow(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.pow(input, 5)
+
+
+_model_ = net_pow()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/reshape/__init__.py b/res/PyTorchExamples/examples/reshape/__init__.py
new file mode 100644
index 000000000..bf083a498
--- /dev/null
+++ b/res/PyTorchExamples/examples/reshape/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_reshape(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.reshape(input, (2, 9))
+
+
+_model_ = net_reshape()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/rsqrt/__init__.py b/res/PyTorchExamples/examples/rsqrt/__init__.py
new file mode 100644
index 000000000..63a233330
--- /dev/null
+++ b/res/PyTorchExamples/examples/rsqrt/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_rsqrt(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.rsqrt(input)
+
+
+_model_ = net_rsqrt()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/sin/__init__.py b/res/PyTorchExamples/examples/sin/__init__.py
new file mode 100644
index 000000000..7aed83378
--- /dev/null
+++ b/res/PyTorchExamples/examples/sin/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_sin(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.sin(input)
+
+
+_model_ = net_sin()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/slice/__init__.py b/res/PyTorchExamples/examples/slice/__init__.py
new file mode 100644
index 000000000..f00428d06
--- /dev/null
+++ b/res/PyTorchExamples/examples/slice/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_slice(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return input[:, :, :, 0:1]
+
+
+_model_ = net_slice()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/split-1/__init__.py b/res/PyTorchExamples/examples/split-1/__init__.py
new file mode 100644
index 000000000..33811fd44
--- /dev/null
+++ b/res/PyTorchExamples/examples/split-1/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_split(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.split(input, (1, 3))
+
+
+_model_ = net_split()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(4, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/split-2/__init__.py b/res/PyTorchExamples/examples/split-2/__init__.py
new file mode 100644
index 000000000..9475aebe5
--- /dev/null
+++ b/res/PyTorchExamples/examples/split-2/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_split(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.split(input, (1, 3), dim=1)
+
+
+_model_ = net_split()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 4, 3, 3)
diff --git a/res/PyTorchExamples/examples/split/__init__.py b/res/PyTorchExamples/examples/split/__init__.py
new file mode 100644
index 000000000..3a323670e
--- /dev/null
+++ b/res/PyTorchExamples/examples/split/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_split(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.split(input, 2)
+
+
+_model_ = net_split()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(2, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/sqrt/__init__.py b/res/PyTorchExamples/examples/sqrt/__init__.py
new file mode 100644
index 000000000..ceba05107
--- /dev/null
+++ b/res/PyTorchExamples/examples/sqrt/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_sqrt(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.sqrt(input)
+
+
+_model_ = net_sqrt()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/squeeze-1/__init__.py b/res/PyTorchExamples/examples/squeeze-1/__init__.py
new file mode 100644
index 000000000..9a485be7c
--- /dev/null
+++ b/res/PyTorchExamples/examples/squeeze-1/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_squeeze(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.squeeze(input, 2)
+
+
+_model_ = net_squeeze()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(2, 1, 1, 3)
diff --git a/res/PyTorchExamples/examples/squeeze/__init__.py b/res/PyTorchExamples/examples/squeeze/__init__.py
new file mode 100644
index 000000000..98e8c5753
--- /dev/null
+++ b/res/PyTorchExamples/examples/squeeze/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_squeeze(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.squeeze(input)
+
+
+_model_ = net_squeeze()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(2, 1, 3, 3)
diff --git a/res/PyTorchExamples/examples/strided_slice/__init__.py b/res/PyTorchExamples/examples/strided_slice/__init__.py
new file mode 100644
index 000000000..7277da873
--- /dev/null
+++ b/res/PyTorchExamples/examples/strided_slice/__init__.py
@@ -0,0 +1,25 @@
+import torch
+import torch.nn as nn
+
+
+# model
+#
+# Notes:
+# - This model requires opset version 10+. Previous version does not support strides.
+class net_strided_slice(nn.Module):
+    def __init__(self, begin, end, stride):
+        super().__init__()
+        self.key = [slice(begin[i], end[i], stride[i]) for i in range(len(begin))]
+
+    def forward(self, input):
+        # this is general way to do input[:, :, 1:5:2, 0:5:2]
+        return input[self.key]
+
+    def onnx_opset_version(self):
+        return 10
+
+
+_model_ = net_strided_slice([0, 0, 1, 0], [1, 3, 5, 5], [1, 1, 2, 2])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 3, 5, 5)
diff --git a/res/PyTorchExamples/examples/sub/__init__.py b/res/PyTorchExamples/examples/sub/__init__.py
new file mode 100644
index 000000000..2dc4a5ee0
--- /dev/null
+++ b/res/PyTorchExamples/examples/sub/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_sub(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.sub(inputs[0], inputs[1])
+
+
+_model_ = net_sub()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/sum/__init__.py b/res/PyTorchExamples/examples/sum/__init__.py
new file mode 100644
index 000000000..a5e93e683
--- /dev/null
+++ b/res/PyTorchExamples/examples/sum/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_sum(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.sum(input)
+
+
+_model_ = net_sum()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/where/__init__.py b/res/PyTorchExamples/examples/where/__init__.py
new file mode 100644
index 000000000..0cc7a12c2
--- /dev/null
+++ b/res/PyTorchExamples/examples/where/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_where(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return torch.where(inputs[0] > 0, inputs[0], inputs[1])
+
+
+_model_ = net_where()
+
+# dummy input for onnx generation
+_dummy_ = [torch.randn(1, 2, 3, 3), torch.ones(1, 2, 3, 3)]
diff --git a/res/PyTorchExamples/examples/zeros_like/__init__.py b/res/PyTorchExamples/examples/zeros_like/__init__.py
new file mode 100644
index 000000000..0841a5ac3
--- /dev/null
+++ b/res/PyTorchExamples/examples/zeros_like/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_zeros_like(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.zeros_like(input)
+
+
+_model_ = net_zeros_like()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/ptem.py b/res/PyTorchExamples/ptem.py
new file mode 100755
index 000000000..6cc28b66a
--- /dev/null
+++ b/res/PyTorchExamples/ptem.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# PyTorch Example manager
+
+import torch
+import onnx
+import onnx_tf
+import tensorflow as tf
+import importlib
+import argparse
+
+from pathlib import Path
+
+print("PyTorch version=", torch.__version__)
+print("ONNX version=", onnx.__version__)
+print("ONNX-TF version=", onnx_tf.__version__)
+print("TF version=", tf.__version__)
+
+parser = argparse.ArgumentParser(description='Process PyTorch python examples')
+
+parser.add_argument('examples', metavar='EXAMPLES', nargs='+')
+
+args = parser.parse_args()
+
+output_folder = "./output/"
+
+Path(output_folder).mkdir(parents=True, exist_ok=True)
+
+for example in args.examples:
+    # load example code
+    module = importlib.import_module("examples." + example)
+
+    # save .pth
+    torch.save(module._model_, output_folder + example + ".pth")
+    print("Generate '" + example + ".pth' - Done")
+
+    opset_version = 9
+    if hasattr(module._model_, 'onnx_opset_version'):
+        opset_version = module._model_.onnx_opset_version()
+
+    onnx_model_path = output_folder + example + ".onnx"
+
+    torch.onnx.export(
+        module._model_, module._dummy_, onnx_model_path, opset_version=opset_version)
+    print("Generate '" + example + ".onnx' - Done")
+
+    onnx_model = onnx.load(onnx_model_path)
+    onnx.checker.check_model(onnx_model)
+
+    inferred_model = onnx.shape_inference.infer_shapes(onnx_model)
+    onnx.checker.check_model(inferred_model)
+    onnx.save(inferred_model, onnx_model_path)
+
+    tf_prep = onnx_tf.backend.prepare(inferred_model)
+    tf_prep.export_graph(path=output_folder + example + ".TF")
+    print("Generate '" + example + " TF' - Done")
+
+    # for testing...
+    converter = tf.lite.TFLiteConverter.from_saved_model(output_folder + example + ".TF")
+    converter.allow_custom_ops = True
+    converter.experimental_new_converter = True
+    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
+
+    tflite_model = converter.convert()
+    open(output_folder + example + ".tflite", "wb").write(tflite_model)
+    print("Generate '" + example + ".tflite' - Done")
diff --git a/res/TensorFlowLiteRecipes/Add_002/test.recipe b/res/TensorFlowLiteRecipes/Add_002/test.recipe
new file mode 100644
index 000000000..12ba8000b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Add_002/test.recipe
@@ -0,0 +1,32 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 3 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Add_002/test.reverse b/res/TensorFlowLiteRecipes/Add_002/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Add_002/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Add_STR_000/test.recipe b/res/TensorFlowLiteRecipes/Add_STR_000/test.recipe
new file mode 100644
index 000000000..32f77dd36
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Add_STR_000/test.recipe
@@ -0,0 +1,33 @@
+# This is test for import/export of STRING tensortype
+# interpreter or runtime may fail as Add won't support this
+
+operand {
+  name: "ifm"
+  type: STRING
+  shape { }
+}
+operand {
+  name: "suffix"
+  type: STRING
+  shape { }
+  filler {
+    tag: "explicit"
+    arg: "Hello"
+  }
+}
+operand {
+  name: "ofm"
+  type: STRING
+  shape { }
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "suffix"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Add_STR_001/test.recipe b/res/TensorFlowLiteRecipes/Add_STR_001/test.recipe
new file mode 100644
index 000000000..b3310650c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Add_STR_001/test.recipe
@@ -0,0 +1,34 @@
+# This is test for import/export of STRING tensortype
+# interpreter or runtime may fail as Add won't support this
+
+operand {
+  name: "ifm"
+  type: STRING
+  shape { }
+}
+operand {
+  name: "suffix"
+  type: STRING
+  shape { dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "Hello"
+    arg: "World"
+  }
+}
+operand {
+  name: "ofm"
+  type: STRING
+  shape { }
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "suffix"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe
new file mode 100644
index 000000000..b31e16043
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: INT64
+  shape { dim: 1 dim: 4 }
+}
+operand {
+  name: "argmax/dim"
+  type: INT32
+  shape { }
+  filler {
+    tag: "explicit"
+    arg: "-1"
+  }
+}
+operation {
+  type: "ArgMax"
+  argmax_options {
+    output_type: INT64
+  }
+  input: "ifm"
+  input: "argmax/dim"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse
diff --git a/res/TensorFlowLiteRecipes/BroadcastTo_000/test.recipe b/res/TensorFlowLiteRecipes/BroadcastTo_000/test.recipe
new file mode 100644
index 000000000..015e40bc4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/BroadcastTo_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "bc_input"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "bc_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "bc_ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "BroadcastTo"
+  input: "bc_input"
+  input: "bc_shape"
+  output: "bc_ofm"
+}
+input: "bc_input"
+output: "bc_ofm"
diff --git a/res/TensorFlowLiteRecipes/Concatenation_001/test.recipe b/res/TensorFlowLiteRecipes/Concatenation_001/test.recipe
new file mode 100644
index 000000000..211976c8c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Concatenation_001/test.recipe
@@ -0,0 +1,32 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 3 }
+}
+operation {
+  type: "Concatenation"
+  concatenation_options {
+    axis: 3
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Concatenation_001/test.reverse b/res/TensorFlowLiteRecipes/Concatenation_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Concatenation_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe
new file mode 100644
index 000000000..2cd7b9065
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe
@@ -0,0 +1,34 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: ""
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Densify_000/test.recipe b/res/TensorFlowLiteRecipes/Densify_000/test.recipe
new file mode 100644
index 000000000..480c52f15
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Densify_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "2" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "3" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "4"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse"
+  output: "dense"
+}
+operation {
+  type: "Add"
+  input: "in"
+  input: "dense"
+  output: "out"
+  add_options {
+    activation: NONE
+  }
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/Dequantize_000/test.recipe b/res/TensorFlowLiteRecipes/Dequantize_000/test.recipe
new file mode 100644
index 000000000..bbd3220c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Dequantize_000/test.recipe
@@ -0,0 +1,18 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 4 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operation {
+  type: "Dequantize"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Dequantize_000/test.reverse b/res/TensorFlowLiteRecipes/Dequantize_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Dequantize_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Equal_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Equal_U8_000/test.recipe
new file mode 100644
index 000000000..f1fdec6b6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Equal_U8_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ifm2"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Equal"
+  equal_options {
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Equal_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Equal_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Equal_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ExpandDims_004/test.recipe b/res/TensorFlowLiteRecipes/ExpandDims_004/test.recipe
new file mode 100644
index 000000000..20e6555f7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ExpandDims_004/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 }
+}
+
+operand {
+  name: "ifm2"
+  type: INT32
+  shape { }
+  filler {
+    tag: "constant"
+    arg: "-1"
+  }
+}
+
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 1 }
+}
+
+operation {
+  type: "ExpandDims"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/FakeQuant_000/test.recipe b/res/TensorFlowLiteRecipes/FakeQuant_000/test.recipe
new file mode 100644
index 000000000..c96466f83
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FakeQuant_000/test.recipe
@@ -0,0 +1,25 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+
+operation {
+  type: "FakeQuant"
+  fakequant_options {
+    min: 0.0
+    max: 1.0
+    num_bits: 8
+    narrow_range: false
+  }
+  input: "ifm"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/FakeQuant_000/test.reverse b/res/TensorFlowLiteRecipes/FakeQuant_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FakeQuant_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_004/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_004/test.recipe
new file mode 100644
index 000000000..b89eabeeb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_004/test.recipe
@@ -0,0 +1,69 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0" arg: "4"
+    arg: "2" arg: "3" arg: "0" arg: "0"
+    arg: "5" arg: "0" arg: "0" arg: "6"
+  }
+  sparsity {
+      traversal_order { dim: 0 dim: 1 dim: 2 dim: 3 }
+      block_map { dim: 0 dim: 1 }
+      dim_metadata {
+          format: DENSE
+          dense_size: 2
+      }
+      dim_metadata {
+          format: SPARSE_CSR
+          array_segments {
+              dim: 0 dim: 2 dim: 3
+              type: UINT8VEC
+          }
+          array_indices {
+              dim: 0 dim: 1 dim: 1
+              type: UINT8VEC
+          }
+      }
+      dim_metadata {
+          format: DENSE
+          dense_size: 2
+      }
+      dim_metadata {
+          format: DENSE
+          dense_size: 2
+      }
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "-2" arg: "-3" arg: "4"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_004/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_004/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_005/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_005/test.recipe
new file mode 100644
index 000000000..0aa1dfa77
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_005/test.recipe
@@ -0,0 +1,43 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "2" arg: "3"
+    arg: "0" arg: "4" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "5" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "6"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "-2" arg: "-3" arg: "4"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe
new file mode 100644
index 000000000..b5f329b57
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 4 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 2 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "in"
+  input: "weight"
+  input: ""
+  output: "out"
+}
+input: "in"
+input: "weight"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe
new file mode 100644
index 000000000..572badfbb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "x"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "y"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 2 dim: 2 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "x"
+  input: "y"
+  input: ""
+  output: "out"
+}
+input: "x"
+input: "y"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule b/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule
new file mode 100644
index 000000000..01518e575
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule
@@ -0,0 +1,7 @@
+# To check if FullyConnected with non-const weight is replaced by MatMul
+# with replace_non_const_fc_with_batch_matmul pass
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "BATCH_MATMUL_EXIST"      $(op_count BATCH_MATMUL) '=' 1
+RULE    "NO_FULLY_CONNECTED"      $(op_count FULLY_CONNECTED) '=' 0
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_008/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_008/test.recipe
new file mode 100644
index 000000000..89b5c9916
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_008/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "x"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 4 }
+}
+operand {
+  name: "y"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 2 dim: 2 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: false
+  }
+  input: "x"
+  input: "y"
+  input: ""
+  output: "out"
+}
+input: "x"
+input: "y"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_008/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_008/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_008/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_008/test.rule b/res/TensorFlowLiteRecipes/FullyConnected_008/test.rule
new file mode 100644
index 000000000..3cf4c2616
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_008/test.rule
@@ -0,0 +1,8 @@
+# To check if FullyConnected with non-const weight is replaced by MatMul
+# with replace_non_const_fc_with_batch_matmul pass
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "BATCH_MATMUL_EXIST"      $(op_count BATCH_MATMUL) '=' 1
+RULE    "RESHAPE_EXIST"           $(op_count RESHAPE) '=' 1
+RULE    "NO_FULLY_CONNECTED"      $(op_count FULLY_CONNECTED) '=' 0
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_009/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_009/test.recipe
new file mode 100644
index 000000000..4c02ffc15
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_009/test.recipe
@@ -0,0 +1,43 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_009/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_009/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_009/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Gather_000/test.recipe b/res/TensorFlowLiteRecipes/Gather_000/test.recipe
index 4c6c99da6..b9b2412cf 100644
--- a/res/TensorFlowLiteRecipes/Gather_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Gather_000/test.recipe
@@ -24,5 +24,4 @@ operation {
   output: "ofm"
 }
 input: "param"
-input: "indices"
 output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Gather_001/test.recipe b/res/TensorFlowLiteRecipes/Gather_001/test.recipe
new file mode 100644
index 000000000..cc23cf11d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Gather_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "param"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 dim: 4 }
+}
+operand {
+  name: "indices"
+  type: INT32
+  shape { dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 dim: 4 }
+}
+operation {
+  type: "Gather"
+  gather_options {
+    axis: 3
+  }
+  input: "param"
+  input: "indices"
+  output: "ofm"
+}
+input: "param"
+input: "indices"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Gather_001/test.reverse b/res/TensorFlowLiteRecipes/Gather_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Gather_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Gelu_000/test.recipe b/res/TensorFlowLiteRecipes/Gelu_000/test.recipe
new file mode 100644
index 000000000..1c4da104c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Gelu_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Gelu"
+  gelu_options {
+    approximate: false
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Gelu_000/test.reverse b/res/TensorFlowLiteRecipes/Gelu_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Gelu_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/GreaterEqual_U8_000/test.recipe b/res/TensorFlowLiteRecipes/GreaterEqual_U8_000/test.recipe
new file mode 100644
index 000000000..031073b2f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/GreaterEqual_U8_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ifm2"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "GreaterEqual"
+  greaterequal_options {
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/GreaterEqual_U8_000/test.reverse b/res/TensorFlowLiteRecipes/GreaterEqual_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/GreaterEqual_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Greater_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Greater_U8_000/test.recipe
new file mode 100644
index 000000000..41e89c860
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Greater_U8_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ifm2"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Greater"
+  greater_options {
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Greater_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Greater_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Greater_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/HardSwish_000/test.recipe b/res/TensorFlowLiteRecipes/HardSwish_000/test.recipe
new file mode 100644
index 000000000..9dab36e20
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/HardSwish_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "HardSwish"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/HardSwish_000/test.reverse b/res/TensorFlowLiteRecipes/HardSwish_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/HardSwish_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/HardSwish_001/test.recipe b/res/TensorFlowLiteRecipes/HardSwish_001/test.recipe
new file mode 100644
index 000000000..a712e236c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/HardSwish_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 128 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 128 }
+}
+operation {
+  type: "HardSwish"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/HardSwish_001/test.rule b/res/TensorFlowLiteRecipes/HardSwish_001/test.rule
new file mode 100644
index 000000000..6970e2628
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/HardSwish_001/test.rule
@@ -0,0 +1,8 @@
+# To check if HardSwish is converted to Add, Mul and Relu6 operations
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ADD_EXIST"               $(op_count ADD) '=' 1
+RULE    "MUL_EXIST"               $(op_count MUL) '=' 2
+RULE    "RELU6_EXIST"             $(op_count RELU6) '=' 1
+RULE    "HARDSWISH_NOT_EXIST"     $(op_count HARDSWISH) '=' 0
diff --git a/res/TensorFlowLiteRecipes/LessEqual_U8_000/test.recipe b/res/TensorFlowLiteRecipes/LessEqual_U8_000/test.recipe
new file mode 100644
index 000000000..c83f4d4b4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/LessEqual_U8_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ifm2"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "LessEqual"
+  lessequal_options {
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/LessEqual_U8_000/test.reverse b/res/TensorFlowLiteRecipes/LessEqual_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/LessEqual_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Less_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Less_U8_000/test.recipe
new file mode 100644
index 000000000..a6ee99ce5
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Less_U8_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ifm2"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Less"
+  less_options {
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Less_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Less_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Less_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/LogSoftmax_U8_000/test.recipe b/res/TensorFlowLiteRecipes/LogSoftmax_U8_000/test.recipe
new file mode 100644
index 000000000..d960567e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/LogSoftmax_U8_000/test.recipe
@@ -0,0 +1,21 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  quant { min: -4.952 max: 4.939 scale: 0.0388 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  quant { min: -15.9375 max: 0 scale: 0.0625 zero_point: 255 }
+}
+operation {
+  type: "LogSoftmax"
+  log_softmax_options {
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/LogSoftmax_U8_000/test.reverse b/res/TensorFlowLiteRecipes/LogSoftmax_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/LogSoftmax_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_000/test.recipe b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_000/test.recipe
new file mode 100644
index 000000000..9218c2010
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_000/test.recipe
@@ -0,0 +1,33 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 18 dim: 18 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 9 dim: 9 dim: 1 }
+}
+operand {
+  name: "argmax"
+  type: INT64
+  shape { dim: 1 dim: 9 dim: 9 dim: 1 }
+}
+operation {
+  type: "MaxPoolWithArgmax"
+  input: "ifm"
+  output: "ofm"
+  output: "argmax"
+  max_pool_with_argmax_options {
+    padding: SAME
+    filter_width: 1
+    filter_height: 1
+    stride_w: 2
+    stride_h: 2
+    output_type: INT64
+    include_batch_in_index: false
+  }
+}
+input: "ifm"
+output: "ofm"
+output: "argmax"
diff --git a/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_000/test.rule b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_000/test.rule
new file mode 100644
index 000000000..05aa5f7ac
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_000/test.rule
@@ -0,0 +1,17 @@
+# To check if MaxPoolWithArgmax is transformed to MaxPool, ArgMax and index computation network
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ARG_MAX_COUNT"           $(op_count ARG_MAX) '=' 1
+RULE    "ARG_MAX_COUNT"           $(op_count MAX_POOL_2D) '=' 1
+RULE    "CONV_COUNT"              $(op_count CONV_2D) '=' 1
+RULE    "SPLIT_COUNT"             $(op_count SPLIT) '=' 1
+RULE    "RESHAPE_COUNT"           $(op_count RESHAPE) '=' 1
+RULE    "CAST_COUNT"              $(op_count CAST) '=' 2
+RULE    "ADD_COUNT"               $(op_count ADD) '=' 3
+RULE    "MUL_COUNT"               $(op_count MUL) '=' 5
+RULE    "FLOOR_COUNT"             $(op_count FLOOR) '=' 1
+RULE    "NEG_COUNT"               $(op_count NEG) '=' 1
+RULE    "CONCATENATION_COUNT"     $(op_count CONCATENATION) '=' 1
+RULE    "PADV2_COUNT"             $(op_count PADV2) '=' 1
+RULE    "CUSTOM_COUNT"            $(op_count 'CUSTOM(MaxPoolWithArgmax)') '=' 0
diff --git a/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_001/test.recipe b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_001/test.recipe
new file mode 100644
index 000000000..9c15a7c63
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_001/test.recipe
@@ -0,0 +1,33 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 18 dim: 18 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 9 dim: 9 dim: 1 }
+}
+operand {
+  name: "argmax"
+  type: INT32
+  shape { dim: 1 dim: 9 dim: 9 dim: 1 }
+}
+operation {
+  type: "MaxPoolWithArgmax"
+  input: "ifm"
+  output: "ofm"
+  output: "argmax"
+  max_pool_with_argmax_options {
+    padding: SAME
+    filter_width: 4
+    filter_height: 4
+    stride_w: 2
+    stride_h: 2
+    output_type: INT32
+    include_batch_in_index: false
+  }
+}
+input: "ifm"
+output: "ofm"
+output: "argmax"
diff --git a/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_001/test.rule b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_001/test.rule
new file mode 100644
index 000000000..05aa5f7ac
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_001/test.rule
@@ -0,0 +1,17 @@
+# To check if MaxPoolWithArgmax is transformed to MaxPool, ArgMax and index computation network
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ARG_MAX_COUNT"           $(op_count ARG_MAX) '=' 1
+RULE    "ARG_MAX_COUNT"           $(op_count MAX_POOL_2D) '=' 1
+RULE    "CONV_COUNT"              $(op_count CONV_2D) '=' 1
+RULE    "SPLIT_COUNT"             $(op_count SPLIT) '=' 1
+RULE    "RESHAPE_COUNT"           $(op_count RESHAPE) '=' 1
+RULE    "CAST_COUNT"              $(op_count CAST) '=' 2
+RULE    "ADD_COUNT"               $(op_count ADD) '=' 3
+RULE    "MUL_COUNT"               $(op_count MUL) '=' 5
+RULE    "FLOOR_COUNT"             $(op_count FLOOR) '=' 1
+RULE    "NEG_COUNT"               $(op_count NEG) '=' 1
+RULE    "CONCATENATION_COUNT"     $(op_count CONCATENATION) '=' 1
+RULE    "PADV2_COUNT"             $(op_count PADV2) '=' 1
+RULE    "CUSTOM_COUNT"            $(op_count 'CUSTOM(MaxPoolWithArgmax)') '=' 0
diff --git a/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_002/test.recipe b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_002/test.recipe
new file mode 100644
index 000000000..702e01634
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_002/test.recipe
@@ -0,0 +1,33 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 18 dim: 18 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 2 }
+}
+operand {
+  name: "argmax"
+  type: INT64
+  shape { dim: 1 dim: 8 dim: 8 dim: 2 }
+}
+operation {
+  type: "MaxPoolWithArgmax"
+  input: "ifm"
+  output: "ofm"
+  output: "argmax"
+  max_pool_with_argmax_options {
+    padding: VALID
+    filter_width: 4
+    filter_height: 4
+    stride_w: 2
+    stride_h: 2
+    output_type: INT64
+    include_batch_in_index: false
+  }
+}
+input: "ifm"
+output: "ofm"
+output: "argmax"
diff --git a/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_002/test.rule b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_002/test.rule
new file mode 100644
index 000000000..a388374c2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/MaxPoolWithArgmax_002/test.rule
@@ -0,0 +1,16 @@
+# To check if MaxPoolWithArgmax is transformed to MaxPool, ArgMax and index computation network
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ARG_MAX_COUNT"           $(op_count ARG_MAX) '=' 2
+RULE    "ARG_MAX_COUNT"           $(op_count MAX_POOL_2D) '=' 1
+RULE    "CONV_COUNT"              $(op_count CONV_2D) '=' 2
+RULE    "SPLIT_COUNT"             $(op_count SPLIT) '=' 1
+RULE    "RESHAPE_COUNT"           $(op_count RESHAPE) '=' 2
+RULE    "CAST_COUNT"              $(op_count CAST) '=' 3
+RULE    "ADD_COUNT"               $(op_count ADD) '=' 7
+RULE    "MUL_COUNT"               $(op_count MUL) '=' 8
+RULE    "FLOOR_COUNT"             $(op_count FLOOR) '=' 2
+RULE    "NEG_COUNT"               $(op_count NEG) '=' 2
+RULE    "CONCATENATION_COUNT"     $(op_count CONCATENATION) '=' 1
+RULE    "CUSTOM_COUNT"            $(op_count 'CUSTOM(MaxPoolWithArgmax)') '=' 0
diff --git a/res/TensorFlowLiteRecipes/Maximum_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Maximum_U8_000/test.recipe
new file mode 100644
index 000000000..4e8324ac9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Maximum_U8_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ifm2"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operation {
+  type: "Maximum"
+  maximum_options {
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Maximum_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Maximum_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Maximum_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.recipe
new file mode 100644
index 000000000..bed256328
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+  quant { min: -128 max: 127 scale: 1 zero_point: 128 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 2 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 1 dim: 1 dim: 4 }
+  quant { min: -256 max: 254 scale: 2 zero_point: 128 }
+  shape_signature { dim: -1 dim: 1 dim: 1 dim: 4 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.recipe
new file mode 100644
index 000000000..a098c628a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "-1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.recipe b/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.recipe
new file mode 100644
index 000000000..bd1a46293
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 4 }
+  shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+  shape_signature { dim: -1 dim: 4 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: false
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.reverse b/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Minimum_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Minimum_U8_000/test.recipe
new file mode 100644
index 000000000..a502e3834
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Minimum_U8_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ifm2"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "Minimum"
+  minimum_options {
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Minimum_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Minimum_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Minimum_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Mul_001/test.recipe b/res/TensorFlowLiteRecipes/Mul_001/test.recipe
new file mode 100644
index 000000000..18c19ff19
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mul_001/test.recipe
@@ -0,0 +1,32 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 3 }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Mul_001/test.reverse b/res/TensorFlowLiteRecipes/Mul_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Mul_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.recipe b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.recipe
new file mode 100644
index 000000000..5069aac09
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.recipe
@@ -0,0 +1,63 @@
+operand {
+  name: "bc_input"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "bc_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "bc_ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "BroadcastTo"
+  input: "bc_input"
+  input: "bc_shape"
+  output: "bc_ofm"
+}
+operand {
+  name: "reshape_data"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "reshape_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "reshape_ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "Reshape"
+  reshape_options {
+    new_shape: 1
+    new_shape: 2
+    new_shape: 3
+  }
+  input: "reshape_data"
+  input: "reshape_shape"
+  output: "reshape_ofm"
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "AddV2"
+  input: "bc_ofm"
+  input: "reshape_ofm"
+  output: "ofm"
+}
+input: "bc_input"
+input: "reshape_data"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.rule b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.rule
new file mode 100644
index 000000000..fdaa7904a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_000/test.rule
@@ -0,0 +1,7 @@
+# To check if BroadcastTo and AddV2 are fused to Add op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ADD_EXIST"               $(op_count ADD) '=' 1
+RULE    "NO_BroadcastTo"          $(op_count 'CUSTOM(BroadcastTo)') '=' 0
+RULE    "NO_AddV2"                $(op_count 'CUSTOM(AddV2)') '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.recipe b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.recipe
new file mode 100644
index 000000000..ca0ad8e03
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.recipe
@@ -0,0 +1,63 @@
+operand {
+  name: "bc_input"
+  type: INT64
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "bc_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "bc_ofm"
+  type: INT64
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "BroadcastTo"
+  input: "bc_input"
+  input: "bc_shape"
+  output: "bc_ofm"
+}
+operand {
+  name: "reshape_data"
+  type: INT64
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "reshape_shape"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "3" }
+}
+operand {
+  name: "reshape_ofm"
+  type: INT64
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "Reshape"
+  reshape_options {
+    new_shape: 1
+    new_shape: 2
+    new_shape: 3
+  }
+  input: "reshape_data"
+  input: "reshape_shape"
+  output: "reshape_ofm"
+}
+operand {
+  name: "ofm"
+  type: INT64
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operation {
+  type: "AddV2"
+  input: "bc_ofm"
+  input: "reshape_ofm"
+  output: "ofm"
+}
+input: "bc_input"
+input: "reshape_data"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.rule b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.rule
new file mode 100644
index 000000000..d34458999
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_BroadcastTo_AddV2_001/test.rule
@@ -0,0 +1,7 @@
+# To check if BroadcastTo and AddV2 are not fused to Add op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "BroadcastTo_EXIST"       $(op_count 'CUSTOM(BroadcastTo)') '=' 1
+RULE    "AddV2_EXIST"             $(op_count 'CUSTOM(AddV2)') '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.recipe
new file mode 100644
index 000000000..5ee07b456
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand { 
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.rule
new file mode 100644
index 000000000..00a25dfd6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_000/test.rule
@@ -0,0 +1,7 @@
+# To check if Add and Mul are fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.recipe
new file mode 100644
index 000000000..04bdd5ae0
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand { 
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: RELU
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.rule
new file mode 100644
index 000000000..7f3511a35
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_001/test.rule
@@ -0,0 +1,7 @@
+# To check if Add(with RELU) and Mul are fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.recipe
new file mode 100644
index 000000000..e3fe1e315
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand { 
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "mul_const"
+  input: "ofm_conv"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "add_const"
+  input: "ofm_mul"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.rule
new file mode 100644
index 000000000..329d1752c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_002/test.rule
@@ -0,0 +1,7 @@
+# To check if Add and Mul with reverse input sequence are fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.recipe
new file mode 100644
index 000000000..d7673169e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand { 
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: RELU
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.rule
new file mode 100644
index 000000000..9e158e3d6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Add_Mul_003/test.rule
@@ -0,0 +1,7 @@
+# To check if Add and Mul are not fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "MUL_EXIST"               $(op_count MUL) '=' 1
+RULE    "ADD_EXIST"               $(op_count ADD) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_FakeQuant_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_FakeQuant_000/test.recipe
new file mode 100644
index 000000000..e666e95e7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_FakeQuant_000/test.recipe
@@ -0,0 +1,61 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 8 dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "conv"
+}
+operation {
+  type: "FakeQuant"
+  fakequant_options {
+    min: 0.0
+    max: 1.0
+    num_bits: 8
+    narrow_range: false
+  }
+  input: "conv"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_FakeQuant_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_FakeQuant_000/test.rule
new file mode 100644
index 000000000..7fd354611
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_FakeQuant_000/test.rule
@@ -0,0 +1,7 @@
+# To check if FakeQuant is removed by remove_fakequant
+#
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_FAKE_QUANT"           $(op_count FAKE_QUANT) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.recipe
new file mode 100644
index 000000000..6d166f0bf
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.recipe
@@ -0,0 +1,121 @@
+operand {
+  name: "Placeholder"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Const_4"
+  type: FLOAT32
+  shape { }
+  filler { tag: "explicit" arg: "6" }
+}
+operand {
+  name: "Const_5"
+  type: FLOAT32
+  shape { }
+  filler { tag: "explicit" arg: "0" }
+}
+operand {
+  name: "Conv2D_1"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_2"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_21"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_11"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Minimum"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Maximum"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Conv2D_22"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Minimum_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Maximum_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "Conv2D"
+  input: "Placeholder"
+  input: "Conv2D_1"
+  input: "Conv2D_2"
+  output: "Conv2D_11"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Minimum"
+  input: "Conv2D_11"
+  input: "Const_4"
+  output: "Minimum"
+}
+operation {
+  type: "Maximum"
+  input: "Minimum"
+  input: "Const_5"
+  output: "Maximum"
+}
+operation {
+  type: "Conv2D"
+  input: "Maximum"
+  input: "Conv2D_21"
+  input: "Conv2D_2"
+  output: "Conv2D_22"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Minimum"
+  input: "Conv2D_22"
+  input: "Const_4"
+  output: "Minimum_1"
+}
+operation {
+  type: "Maximum"
+  input: "Minimum_1"
+  input: "Const_5"
+  output: "Maximum_1"
+}
+input: "Placeholder"
+output: "Maximum_1"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.rule
new file mode 100644
index 000000000..a67530afd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Min_Max_000/test.rule
@@ -0,0 +1,8 @@
+# To check if Minumum and Maximum are converte to Relu6 op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 2
+RULE    "RELU6_EXIST"             $(op_count RELU6) '=' 2
+RULE    "MIN_NOT_EXIST"           $(op_count MINUMUM) '=' 0
+RULE    "MAX_NOT_EXIST"           $(op_count MAXIMUM) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Min_Relu_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Min_Relu_000/test.recipe
new file mode 100644
index 000000000..18c12472e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Min_Relu_000/test.recipe
@@ -0,0 +1,113 @@
+operand {
+  name: "Placeholder"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Const_4"
+  type: FLOAT32
+  shape { }
+  filler { tag: "explicit" arg: "6" }
+}
+operand {
+  name: "Conv2D_1"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_2"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_21"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_11"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Minimum"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Relu"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Conv2D_22"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Minimum_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Relu_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "Conv2D"
+  input: "Placeholder"
+  input: "Conv2D_1"
+  input: "Conv2D_2"
+  output: "Conv2D_11"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Minimum"
+  input: "Conv2D_11"
+  input: "Const_4"
+  output: "Minimum"
+}
+operation {
+  type: "ReLU"
+  input: "Minimum"
+  output: "Relu"
+}
+operation {
+  type: "Conv2D"
+  input: "Relu"
+  input: "Conv2D_21"
+  input: "Conv2D_2"
+  output: "Conv2D_22"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Minimum"
+  input: "Conv2D_22"
+  input: "Const_4"
+  output: "Minimum_1"
+}
+operation {
+  type: "ReLU"
+  input: "Minimum_1"
+  output: "Relu_1"
+}
+input: "Placeholder"
+output: "Relu_1"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Min_Relu_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Min_Relu_000/test.rule
new file mode 100644
index 000000000..1256227ac
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Min_Relu_000/test.rule
@@ -0,0 +1,8 @@
+# To check if Minumum and ReLU are converte to Relu6 op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 2
+RULE    "RELU6_EXIST"             $(op_count RELU6) '=' 2
+RULE    "MIN_NOT_EXIST"           $(op_count MINUMUM) '=' 0
+RULE    "RELU_NOT_EXIST"           $(op_count RELU) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.recipe
new file mode 100644
index 000000000..19559c082
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.recipe
@@ -0,0 +1,145 @@
+#
+# generated with tflchef-reverse from PReLUwConv2d ONNX model
+#
+operand {
+  name: "input"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "mul_1/y"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.5"
+  }
+}
+operand {
+  name: "Const_2"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "0.25"
+  }
+}
+operand {
+  name: "ConvWeight"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "ConvBias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "ConvOut"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "Abs"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "Relu1"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "sub"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "mul_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "output"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  input: "input"
+  input: "ConvWeight"
+  input: "ConvBias"
+  output: "ConvOut"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Abs"
+  input: "ConvOut"
+  output: "Abs"
+}
+operation {
+  type: "ReLU"
+  input: "ConvOut"
+  output: "Relu1"
+}
+operation {
+  type: "Sub"
+  input: "ConvOut"
+  input: "Abs"
+  output: "sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "sub"
+  input: "Const_2"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "mul"
+  input: "mul_1/y"
+  output: "mul_1"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "Relu1"
+  input: "mul_1"
+  output: "output"
+  add_options {
+    activation: NONE
+  }
+}
+input: "input"
+output: "output"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.rule
new file mode 100644
index 000000000..f70180add
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.rule
@@ -0,0 +1,10 @@
+# To check if Sub-Graph can be converted to PReLU
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ABS_NOT_EXIST"           $(op_count ABS) '=' 0
+RULE    "ADD_NOT_EXIST"           $(op_count ADD) '=' 0
+RULE    "MUL_NOT_EXIST"           $(op_count MUL) '=' 0
+RULE    "RELU_NOT_EXIST"          $(op_count RELU) '=' 0
+RULE    "SUB_NOT_EXIST"           $(op_count SUB) '=' 0
+RULE    "PRELU_EXIST"             $(op_count PRELU) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe
new file mode 100644
index 000000000..c5d387293
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 8 dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operand {
+  name: "quantize"
+  type: UINT8
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+  quant { scale: 1 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "conv"
+}
+operation {
+  type: "Quantize"
+  input: "conv"
+  output: "quantize"
+}
+operation {
+  type: "Dequantize"
+  input: "quantize"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.rule
new file mode 100644
index 000000000..4bf309491
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.rule
@@ -0,0 +1,8 @@
+# To check if FakeQuant is removed by remove_quantdequant
+#
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_QUANTIZE"             $(op_count QUANTIZE) '=' 0
+RULE    "NO_DEQUANTIZE"           $(op_count DEQUANTIZE) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.recipe
new file mode 100644
index 000000000..f6be63f84
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.recipe
@@ -0,0 +1,85 @@
+operand {
+  name: "Placeholder"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Conv2D_1"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_2"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_21"
+  type: FLOAT32
+  shape { dim: 3 dim: 3 dim: 3 dim: 3 }
+  filler { tag: "gaussian" arg: "0.0" arg: "0.1" }
+}
+operand {
+  name: "Conv2D_11"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "ReLU6"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "Conv2D_22"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operand {
+  name: "ReLU6_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "Conv2D"
+  input: "Placeholder"
+  input: "Conv2D_1"
+  input: "Conv2D_2"
+  output: "Conv2D_11"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "ReLU6"
+  input: "Conv2D_11"
+  output: "ReLU6"
+}
+operation {
+  type: "Conv2D"
+  input: "ReLU6"
+  input: "Conv2D_21"
+  input: "Conv2D_2"
+  output: "Conv2D_22"
+  conv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "ReLU6"
+  input: "Conv2D_22"
+  output: "ReLU6_1"
+}
+input: "Placeholder"
+output: "ReLU6_1"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.rule
new file mode 100644
index 000000000..34d5d663d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Conv_Relu6_000/test.rule
@@ -0,0 +1,6 @@
+# To check if ReLU6 is fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 2
+RULE    "RELU6_NOT_EXIST"         $(op_count RELU6) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe
new file mode 100644
index 000000000..ea604b20f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "2" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "3" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "4"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse"
+  output: "dense"
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "dense"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe
new file mode 100644
index 000000000..6e1083fae
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe
@@ -0,0 +1,54 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse16"
+  type: FLOAT16
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "2" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "3" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "4"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense16"
+  type: FLOAT16
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "dense32"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse16"
+  output: "dense16"
+}
+operation {
+  type: "Dequantize"
+  input: "dense16"
+  output: "dense32"
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "dense32"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe
new file mode 100644
index 000000000..5f212a7a6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe
@@ -0,0 +1,41 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "float16"
+  type: FLOAT16
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "dequantized"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Dequantize"
+  input: "float16"
+  output: "dequantized"
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "dequantized"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.recipe
new file mode 100644
index 000000000..c9c66d221
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.recipe
@@ -0,0 +1,77 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 1 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "1"
+  }
+}
+operand {
+  name: "filter_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "1"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "1"
+  }
+}
+operand {
+  name: "bias_1"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "1"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 31 dim: 31 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+operand {
+  name: "output"
+  type: FLOAT32
+  shape { dim: 1 dim: 30 dim: 30 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ofm"
+  input: "filter_1"
+  input: "bias_1"
+  output: "output"
+}
+input: "ifm"
+output: "output"
diff --git a/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.rule b/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.rule
new file mode 100644
index 000000000..4469dab9e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.rule
@@ -0,0 +1,8 @@
+# To check if RemoveDuplicateConstPass removes all duplicate consts
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "FILTER_COUNT" $(const_count filter) '=' 1
+RULE "DUPLICATE_FILTER_COUNT" $(const_count filter_1) '=' 0
+RULE "BIAS_COUNT" $(const_count bias) '=' 1
+RULE "DUPLICATE_BIAS_COUNT" $(const_count bias_1) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.recipe
new file mode 100644
index 000000000..f9769273f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.recipe
@@ -0,0 +1,91 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "scale"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "shift"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "dwout"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "mulout"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    depth_multiplier: 1
+    activation : NONE
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "dwout"
+}
+operation {
+  type: "Mul"
+  input: "dwout"
+  input: "scale"
+  output: "mulout"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "mulout"
+  input: "shift"
+  output: "ofm"
+  add_options {
+    activation: RELU6
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.rule b/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.rule
new file mode 100644
index 000000000..eb0cba835
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_DwConv_BN_000/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Depthwise Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "DWCONV_EXIST"            $(op_count DEPTHWISE_CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.recipe b/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.recipe
new file mode 100644
index 000000000..4bbfd841c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.recipe
@@ -0,0 +1,91 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "scale"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "shift"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 8 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "dwout"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "mulout"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 8 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    depth_multiplier: 1
+    activation : NONE
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "dwout"
+}
+operation {
+  type: "Mul"
+  input: "dwout"
+  input: "scale"
+  output: "mulout"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "mulout"
+  input: "shift"
+  output: "ofm"
+  add_options {
+    activation: RELU6
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.rule b/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.rule
new file mode 100644
index 000000000..eb0cba835
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_DwConv_BN_001/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Depthwise Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "DWCONV_EXIST"            $(op_count DEPTHWISE_CONV_2D) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.recipe
new file mode 100644
index 000000000..a4d9f16b8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 }
+}
+operand {
+  name: "fc_ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc_wgt"
+  type: FLOAT32
+  shape { dim: 8 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc_bias"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "fc_ifm"
+  input: "fc_wgt"
+  input: "fc_bias"
+  output: "fc"
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "fc"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.rule b/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.rule
new file mode 100644
index 000000000..a70aa928c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.rule
@@ -0,0 +1,6 @@
+# To check if FullyConnected is folded
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ADD_EXIST"               $(op_count ADD) '=' 1
+RULE    "NO_FC"                   $(op_count FULLY_CONNECTED) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe
new file mode 100644
index 000000000..804d293fc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe
@@ -0,0 +1,131 @@
+operand {
+  name: "param_gather"
+  type: INT64
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "indices_gather"
+  type: INT64
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "ofm_gather"
+  type: INT64
+  shape { dim: 1 }
+}
+operand {
+  name: "shape_sparse"
+  type: INT64
+  shape { dim: 1 dim: 1 }
+  filler {
+      tag: "explicit"
+      arg: "3" arg: "5"
+  }
+}
+operand {
+  name: "values_sparse"
+  type: INT64
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "defalut_value_sparse"
+  type: INT64
+  shape {  }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "ofm_sparse"
+  type: INT64
+  shape { dim: 3 }
+}
+operand {
+  name: "add_v2_2"
+  type: INT64
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "ofm_add_v2"
+  type: INT64
+  shape { dim: 3 }
+}
+operand {
+  name: "ofm_cast"
+  type: INT32
+  shape { dim: 3 }
+}
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 5 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "0" arg: "2" arg: "1" }
+}
+operand {
+  name: "ofm_trans"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 10 }
+}
+operation {
+  type: "Gather"
+  gather_options {
+    axis: 0
+  }
+  input: "param_gather"
+  input: "indices_gather"
+  output: "ofm_gather"
+}
+operation {
+  type: "SparseToDense"
+  sparse_to_dense_options {
+    validate_indices: false
+  }
+  input: "shape_sparse"
+  input: "values_sparse"
+  input: "ofm_gather"
+  input: "defalut_value_sparse"
+  output: "ofm_sparse"
+}
+operation {
+  type: "AddV2"
+  input: "ofm_sparse"
+  input: "add_v2_2"
+  output: "ofm_add_v2"
+}
+operation {
+  type: "Cast"
+  cast_options {
+    in_data_type: INT64
+    out_data_type: INT32
+  }
+  input: "ofm_add_v2"
+  output: "ofm_cast"
+}
+operation {
+  type: "Transpose"
+  transpose_options {
+  }
+  input: "ifm"
+  input: "perm"
+  output: "ofm_trans"
+}
+operation {
+  type: "Reshape"
+  input: "ofm_trans"
+  input: "ofm_cast"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Gelu_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Gelu_000/test.recipe
new file mode 100644
index 000000000..ae7f823e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Gelu_000/test.recipe
@@ -0,0 +1,100 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "mul_sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "sqrt_2_inv"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "0.7071067690849304"
+  }
+}
+operand {
+  name: "erf"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "add_one"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "one"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "mul_half"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "half"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "0.5"
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm"
+  input: "sqrt_2_inv"
+  output: "mul_sqrt"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Erf"
+  input: "mul_sqrt"
+  output: "erf"
+}
+operation {
+  type: "Add"
+  input: "erf"
+  input: "one"
+  output: "add_one"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm"
+  input: "add_one"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "mul"
+  input: "half"
+  output: "mul_half"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "mul_half"
diff --git a/res/TensorFlowLiteRecipes/Net_Gelu_000/test.rule b/res/TensorFlowLiteRecipes/Net_Gelu_000/test.rule
new file mode 100644
index 000000000..de5a76972
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Gelu_000/test.rule
@@ -0,0 +1,8 @@
+# To check if Fuse Gelu works
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "GELU_EXIST"              $(op_count GELU) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_CUSTOM"               $(op_count 'CUSTOM(Erf)') '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Gelu_001/test.recipe b/res/TensorFlowLiteRecipes/Net_Gelu_001/test.recipe
new file mode 100644
index 000000000..76337293a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Gelu_001/test.recipe
@@ -0,0 +1,100 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "mul_sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "sqrt_2_inv"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "0.7071067690849304"
+  }
+}
+operand {
+  name: "erf"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "add_one"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "one"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "mul_half"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 3 }
+}
+operand {
+  name: "half"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "0.5"
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm"
+  input: "sqrt_2_inv"
+  output: "mul_sqrt"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Erf"
+  input: "mul_sqrt"
+  output: "erf"
+}
+operation {
+  type: "Add"
+  input: "erf"
+  input: "one"
+  output: "add_one"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm"
+  input: "half"
+  output: "mul_half"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "mul_half"
+  input: "add_one"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "mul"
diff --git a/res/TensorFlowLiteRecipes/Net_Gelu_001/test.rule b/res/TensorFlowLiteRecipes/Net_Gelu_001/test.rule
new file mode 100644
index 000000000..de5a76972
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Gelu_001/test.rule
@@ -0,0 +1,8 @@
+# To check if Fuse Gelu works
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "GELU_EXIST"              $(op_count GELU) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_CUSTOM"               $(op_count 'CUSTOM(Erf)') '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_001/test.rule b/res/TensorFlowLiteRecipes/Net_InstanceNorm_001/test.rule
index c31145aa6..85b4aee47 100644
--- a/res/TensorFlowLiteRecipes/Net_InstanceNorm_001/test.rule
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_001/test.rule
@@ -1,4 +1,4 @@
-# To check if custom op BatchMatMulV2 is converted to circle builtin op
+# To check if this network is converted to circle InstanceNorm op
 
 RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
 
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.recipe b/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.recipe
index 92087829c..a79517484 100644
--- a/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.recipe
@@ -18,7 +18,7 @@ operand {
   name: "sequential/instance_normalization/stack"
   type: INT32
   shape {
-    dim: 5
+    dim: 4
   }
   filler {
     tag: "explicit"
@@ -26,7 +26,6 @@ operand {
     arg: "32"
     arg: "32"
     arg: "8"
-    arg: "1"
   }
 }
 operand {
@@ -51,7 +50,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
   filler {
     tag: "explicit"
@@ -73,7 +71,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
   filler {
     tag: "explicit"
@@ -101,13 +98,12 @@ operand {
   name: "sequential/instance_normalization/moments/variance/reduction_indices"
   type: INT32
   shape {
-    dim: 3
+    dim: 2
   }
   filler {
     tag: "explicit"
     arg: "1"
     arg: "2"
-    arg: "4"
   }
 }
 operand {
@@ -118,7 +114,6 @@ operand {
     dim: 32
     dim: 32
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -129,7 +124,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -140,7 +134,6 @@ operand {
     dim: 32
     dim: 32
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -151,7 +144,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -162,7 +154,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -173,7 +164,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -184,7 +174,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -195,7 +184,6 @@ operand {
     dim: 32
     dim: 32
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -206,7 +194,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -217,7 +204,6 @@ operand {
     dim: 1
     dim: 1
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -228,7 +214,6 @@ operand {
     dim: 32
     dim: 32
     dim: 8
-    dim: 1
   }
 }
 operand {
@@ -242,14 +227,8 @@ operand {
   }
 }
 operation {
-  type: "Reshape"
-  input: "input_layer"
-  input: "sequential/instance_normalization/stack"
-  output: "sequential/instance_normalization/Reshape"
-}
-operation {
   type: "Mean"
-  input: "sequential/instance_normalization/Reshape"
+  input: "input_layer"
   input: "sequential/instance_normalization/moments/variance/reduction_indices"
   output: "sequential/instance_normalization/moments/mean"
   mean_options {
@@ -258,7 +237,7 @@ operation {
 }
 operation {
   type: "SquaredDifference"
-  input: "sequential/instance_normalization/Reshape"
+  input: "input_layer"
   input: "sequential/instance_normalization/moments/mean"
   output: "sequential/instance_normalization/moments/SquaredDifference"
 }
@@ -296,7 +275,7 @@ operation {
 }
 operation {
   type: "Mul"
-  input: "sequential/instance_normalization/Reshape"
+  input: "input_layer"
   input: "sequential/instance_normalization/batchnorm/mul"
   output: "sequential/instance_normalization/batchnorm/mul_1"
   mul_options {
@@ -330,11 +309,5 @@ operation {
     activation: NONE
   }
 }
-operation {
-  type: "Reshape"
-  input: "sequential/instance_normalization/batchnorm/add_1"
-  input: "sequential/instance_normalization/Shape"
-  output: "Identity"
-}
 input: "input_layer"
-output: "Identity"
+output: "sequential/instance_normalization/batchnorm/add_1"
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.rule b/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.rule
index 650827f4e..f926e75dd 100644
--- a/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.rule
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_002/test.rule
@@ -1,8 +1,8 @@
-# To check if custom op InstanceNorm is converted to circle builtin op
+# To check if this network is converted to circle InstanceNorm op
 
 RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
 
 RULE    "INSTANCE_NORM_EXIST"     $(op_count INSTANCE_NORM) '=' 1
-RULE    "RESHAPE_EXIST"           $(op_count RESHAPE) '=' 3
+RULE    "RESHAPE_EXIST"           $(op_count RESHAPE) '<=' 3
 RULE    "NO_ADD"                  $(op_count ADD) '=' 0
 RULE    "NO_MUL"                  $(op_count MUL) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_003/test.recipe b/res/TensorFlowLiteRecipes/Net_InstanceNorm_003/test.recipe
new file mode 100644
index 000000000..11abfdaf2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_003/test.recipe
@@ -0,0 +1,253 @@
+operand {
+  name: "Input"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 8
+    dim: 6
+    dim: 12
+  }
+}
+operand {
+  name: "Div"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 8
+    dim: 6
+    dim: 12
+  }
+}
+operand {
+  name: "Beta"
+  type: FLOAT32
+  shape {
+    dim: 12
+  }
+  filler {
+    tag: "explicit"
+    arg: "1.9714"
+    arg: "1.4517"
+    arg: "1.20315"
+    arg: "0.287979"
+    arg: "0.161815"
+    arg: "-0.281398"
+    arg: "2.70276"
+    arg: "-0.166962"
+    arg: "0.266389"
+    arg: "0.890943"
+    arg: "-0.279833"
+    arg: "1.82808"
+  }
+}
+operand {
+  name: "Gamma"
+  type: FLOAT32
+  shape {
+    dim: 12
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.574708"
+    arg: "0.387735"
+    arg: "0.8995"
+    arg: "0.484296"
+    arg: "2.35851"
+    arg: "1.06661"
+    arg: "0.343602"
+    arg: "2.27583"
+    arg: "1.14559"
+    arg: "0.690169"
+    arg: "1.2044"
+    arg: "0.350952"
+  }
+}
+operand {
+  name: "Pow"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 12
+  }
+}
+operand {
+  name: "Add_as_terminal"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 8
+    dim: 6
+    dim: 12
+  }
+}
+operand {
+  name: "Epsilon"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.001"
+  }
+}
+operand {
+  name: "Zero_point_five"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.5"
+  }
+}
+operand {
+  name: "Mul_gamma"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 8
+    dim: 6
+    dim: 12
+  }
+}
+operand {
+  name: "Sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 8
+    dim: 6
+    dim: 12
+  }
+}
+operand {
+  name: "SquaredDifference"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 8
+    dim: 6
+    dim: 12
+  }
+}
+operand {
+  name: "Mean_of_ifm"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 12
+  }
+}
+operand {
+  name: "Reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "Mean_as_variance"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 12
+  }
+}
+operand {
+  name: "Add_as_variance"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 12
+  }
+}
+operation {
+  type: "Mean"
+  input: "Input"
+  input: "Reduction_indices"
+  output: "Mean_of_ifm"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "SquaredDifference"
+  input: "Input"
+  input: "Mean_of_ifm"
+  output: "SquaredDifference"
+}
+operation {
+  type: "Mean"
+  input: "SquaredDifference"
+  input: "Reduction_indices"
+  output: "Mean_as_variance"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Add"
+  input: "Mean_as_variance"
+  input: "Epsilon"
+  output: "Add_as_variance"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Pow"
+  input: "Add_as_variance"
+  input: "Zero_point_five"
+  output: "Pow"
+}
+operation {
+  type: "Sub"
+  input: "Input"
+  input: "Mean_of_ifm"
+  output: "Sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Div"
+  input: "Sub"
+  input: "Pow"
+  output: "Div"
+  div_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "Div"
+  input: "Gamma"
+  output: "Mul_gamma"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "Mul_gamma"
+  input: "Beta"
+  output: "Add_as_terminal"
+  add_options {
+    activation: RELU
+  }
+}
+input: "Input"
+output: "Add_as_terminal"
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_003/test.rule b/res/TensorFlowLiteRecipes/Net_InstanceNorm_003/test.rule
new file mode 100644
index 000000000..ccdc9e6da
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_003/test.rule
@@ -0,0 +1,11 @@
+# To check if this network is converted to circle InstanceNorm op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INSTANCE_NORM_EXIST"     $(op_count INSTANCE_NORM) '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_POW"                  $(op_count POW) '=' 0
+RULE    "NO_DIV"                  $(op_count DIV) '=' 0
+RULE    "NO_SQUARED_DIFF"         $(op_count SQUARED_DIFFERENCE) '=' 0
+RULE    "NO_MEAN"                 $(op_count MEAN) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_004/test.recipe b/res/TensorFlowLiteRecipes/Net_InstanceNorm_004/test.recipe
new file mode 100644
index 000000000..0892f3e77
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_004/test.recipe
@@ -0,0 +1,294 @@
+# generated using tflchef-reverse
+# with tflite from  https://github.com/Samsung/ONE/issues/7067#issuecomment-867203553
+
+operand {
+  name: "input_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/Mean"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/Mean/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/Reshape"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+  }
+}
+operand {
+  name: "instance_normalization/Reshape_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+  }
+}
+operand {
+  name: "instance_normalization/add"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/add/y"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "1e-09"
+  }
+}
+operand {
+  name: "instance_normalization/add_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/mul"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/Sqrt"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean_1/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Square"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/truediv"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operation {
+  type: "Mean"
+  input: "input_1"
+  input: "instance_normalization/Mean/reduction_indices"
+  output: "instance_normalization/Mean"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Mean"
+  input: "input_1"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean/reduction_indices"
+  output: "instance_normalization/reduce_std/reduce_variance/Mean"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Sub"
+  input: "input_1"
+  input: "instance_normalization/Mean"
+  output: "instance_normalization/sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Sub"
+  input: "input_1"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean"
+  output: "instance_normalization/reduce_std/reduce_variance/sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Square"
+  input: "instance_normalization/reduce_std/reduce_variance/sub"
+  output: "instance_normalization/reduce_std/reduce_variance/Square"
+}
+operation {
+  type: "Mean"
+  input: "instance_normalization/reduce_std/reduce_variance/Square"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean_1/reduction_indices"
+  output: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Sqrt"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  output: "instance_normalization/reduce_std/Sqrt"
+}
+operation {
+  type: "Add"
+  input: "instance_normalization/reduce_std/Sqrt"
+  input: "instance_normalization/add/y"
+  output: "instance_normalization/add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Div"
+  input: "instance_normalization/sub"
+  input: "instance_normalization/add"
+  output: "instance_normalization/truediv"
+  div_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "instance_normalization/truediv"
+  input: "instance_normalization/Reshape"
+  output: "instance_normalization/mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "instance_normalization/mul"
+  input: "instance_normalization/Reshape_1"
+  output: "instance_normalization/add_1"
+  add_options {
+    activation: NONE
+  }
+}
+input: "input_1"
+output: "instance_normalization/add_1"
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_004/test.rule b/res/TensorFlowLiteRecipes/Net_InstanceNorm_004/test.rule
new file mode 100644
index 000000000..1a65b1af6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_004/test.rule
@@ -0,0 +1,12 @@
+# To check if this network is converted to circle InstanceNorm op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INSTANCE_NORM_EXIST"     $(op_count INSTANCE_NORM) '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_SQRT"                 $(op_count SQRT) '=' 0
+RULE    "NO_DIV"                  $(op_count DIV) '=' 0
+RULE    "NO_SUB"                  $(op_count SUB) '=' 0
+RULE    "NO_SQUARE"               $(op_count SQUARE) '=' 0
+RULE    "NO_MEAN"                 $(op_count MEAN) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_005/test.recipe b/res/TensorFlowLiteRecipes/Net_InstanceNorm_005/test.recipe
new file mode 100644
index 000000000..70952f232
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_005/test.recipe
@@ -0,0 +1,224 @@
+# generated using tflchef-reverse
+# with tflite from  https://github.com/Samsung/ONE/issues/7067#issuecomment-867203553
+
+operand {
+  name: "input_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/Mean"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/Mean/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/add"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/add/y"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "1e-09"
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/Sqrt"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean_1/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Square"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/truediv"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operation {
+  type: "Mean"
+  input: "input_1"
+  input: "instance_normalization/Mean/reduction_indices"
+  output: "instance_normalization/Mean"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Mean"
+  input: "input_1"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean/reduction_indices"
+  output: "instance_normalization/reduce_std/reduce_variance/Mean"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Sub"
+  input: "input_1"
+  input: "instance_normalization/Mean"
+  output: "instance_normalization/sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Sub"
+  input: "input_1"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean"
+  output: "instance_normalization/reduce_std/reduce_variance/sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Square"
+  input: "instance_normalization/reduce_std/reduce_variance/sub"
+  output: "instance_normalization/reduce_std/reduce_variance/Square"
+}
+operation {
+  type: "Mean"
+  input: "instance_normalization/reduce_std/reduce_variance/Square"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean_1/reduction_indices"
+  output: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Sqrt"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  output: "instance_normalization/reduce_std/Sqrt"
+}
+operation {
+  type: "Add"
+  input: "instance_normalization/reduce_std/Sqrt"
+  input: "instance_normalization/add/y"
+  output: "instance_normalization/add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Div"
+  input: "instance_normalization/sub"
+  input: "instance_normalization/add"
+  output: "instance_normalization/truediv"
+  div_options {
+    activation: NONE
+  }
+}
+input: "input_1"
+output: "instance_normalization/truediv"
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_005/test.rule b/res/TensorFlowLiteRecipes/Net_InstanceNorm_005/test.rule
new file mode 100644
index 000000000..219fbce88
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_005/test.rule
@@ -0,0 +1,11 @@
+# To check if this network is converted to circle InstanceNorm op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INSTANCE_NORM_EXIST"     $(op_count INSTANCE_NORM) '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_SQRT"                 $(op_count SQRT) '=' 0
+RULE    "NO_DIV"                  $(op_count DIV) '=' 0
+RULE    "NO_SUB"                  $(op_count SUB) '=' 0
+RULE    "NO_SQUARE"               $(op_count SQUARE) '=' 0
+RULE    "NO_MEAN"                 $(op_count MEAN) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_006/test.recipe b/res/TensorFlowLiteRecipes/Net_InstanceNorm_006/test.recipe
new file mode 100644
index 000000000..b0cafd4d5
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_006/test.recipe
@@ -0,0 +1,283 @@
+# InstanceNorm network with one element for gamma, beta
+
+operand {
+  name: "input_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/Mean"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/Mean/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/Reshape"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+  }
+}
+operand {
+  name: "instance_normalization/Reshape_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+  }
+}
+operand {
+  name: "instance_normalization/add"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/add/y"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "1e-09"
+  }
+}
+operand {
+  name: "instance_normalization/add_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/mul"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/Sqrt"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Mean_1/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/Square"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/reduce_std/reduce_variance/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operand {
+  name: "instance_normalization/truediv"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 3
+  }
+}
+operation {
+  type: "Mean"
+  input: "input_1"
+  input: "instance_normalization/Mean/reduction_indices"
+  output: "instance_normalization/Mean"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Mean"
+  input: "input_1"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean/reduction_indices"
+  output: "instance_normalization/reduce_std/reduce_variance/Mean"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Sub"
+  input: "input_1"
+  input: "instance_normalization/Mean"
+  output: "instance_normalization/sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Sub"
+  input: "input_1"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean"
+  output: "instance_normalization/reduce_std/reduce_variance/sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Square"
+  input: "instance_normalization/reduce_std/reduce_variance/sub"
+  output: "instance_normalization/reduce_std/reduce_variance/Square"
+}
+operation {
+  type: "Mean"
+  input: "instance_normalization/reduce_std/reduce_variance/Square"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean_1/reduction_indices"
+  output: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Sqrt"
+  input: "instance_normalization/reduce_std/reduce_variance/Mean_1"
+  output: "instance_normalization/reduce_std/Sqrt"
+}
+operation {
+  type: "Add"
+  input: "instance_normalization/reduce_std/Sqrt"
+  input: "instance_normalization/add/y"
+  output: "instance_normalization/add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Div"
+  input: "instance_normalization/sub"
+  input: "instance_normalization/add"
+  output: "instance_normalization/truediv"
+  div_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "instance_normalization/truediv"
+  input: "instance_normalization/Reshape"
+  output: "instance_normalization/mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "instance_normalization/mul"
+  input: "instance_normalization/Reshape_1"
+  output: "instance_normalization/add_1"
+  add_options {
+    activation: NONE
+  }
+}
+input: "input_1"
+output: "instance_normalization/add_1"
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_006/test.rule b/res/TensorFlowLiteRecipes/Net_InstanceNorm_006/test.rule
new file mode 100644
index 000000000..1a65b1af6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_006/test.rule
@@ -0,0 +1,12 @@
+# To check if this network is converted to circle InstanceNorm op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INSTANCE_NORM_EXIST"     $(op_count INSTANCE_NORM) '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_SQRT"                 $(op_count SQRT) '=' 0
+RULE    "NO_DIV"                  $(op_count DIV) '=' 0
+RULE    "NO_SUB"                  $(op_count SUB) '=' 0
+RULE    "NO_SQUARE"               $(op_count SQUARE) '=' 0
+RULE    "NO_MEAN"                 $(op_count MEAN) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_007/test.recipe b/res/TensorFlowLiteRecipes/Net_InstanceNorm_007/test.recipe
new file mode 100644
index 000000000..b8e3924ed
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_007/test.recipe
@@ -0,0 +1,184 @@
+#
+# This was generated from https://github.com/Samsung/ONE/issues/7032#issuecomment-862238083
+# And some modification
+#
+
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/beta"
+  type: FLOAT32
+  shape {
+    dim: 32
+  }
+  filler {
+    tag: "constant"
+    arg: "0"
+  }
+}
+operand {
+  name: "InstanceNorm/instancenorm/add/y"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "1e-06"
+  }
+}
+operand {
+  name: "InstanceNorm/moments/variance/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "InstanceNorm/moments/mean"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/moments/SquaredDifference"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/moments/variance"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/instancenorm/add"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/instancenorm/Rsqrt"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/instancenorm/mul_1"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/instancenorm/mul_2"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/instancenorm/sub"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operand {
+  name: "InstanceNorm/instancenorm/add_1"
+  type: FLOAT32
+  shape {
+    dim: 1 dim: 1 dim: 1 dim: 32
+  }
+}
+operation {
+  type: "Mean"
+  input: "Hole"
+  input: "InstanceNorm/moments/variance/reduction_indices"
+  output: "InstanceNorm/moments/mean"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "SquaredDifference"
+  input: "Hole"
+  input: "InstanceNorm/moments/mean"
+  output: "InstanceNorm/moments/SquaredDifference"
+}
+operation {
+  type: "Mean"
+  input: "InstanceNorm/moments/SquaredDifference"
+  input: "InstanceNorm/moments/variance/reduction_indices"
+  output: "InstanceNorm/moments/variance"
+  mean_options {
+    keep_dims: true
+  }
+}
+operation {
+  type: "Add"
+  input: "InstanceNorm/moments/variance"
+  input: "InstanceNorm/instancenorm/add/y"
+  output: "InstanceNorm/instancenorm/add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Rsqrt"
+  input: "InstanceNorm/instancenorm/add"
+  output: "InstanceNorm/instancenorm/Rsqrt"
+}
+operation {
+  type: "Mul"
+  input: "Hole"
+  input: "InstanceNorm/instancenorm/Rsqrt"
+  output: "InstanceNorm/instancenorm/mul_1"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "InstanceNorm/moments/mean"
+  input: "InstanceNorm/instancenorm/Rsqrt"
+  output: "InstanceNorm/instancenorm/mul_2"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Sub"
+  input: "InstanceNorm/beta"
+  input: "InstanceNorm/instancenorm/mul_2"
+  output: "InstanceNorm/instancenorm/sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "InstanceNorm/instancenorm/mul_1"
+  input: "InstanceNorm/instancenorm/sub"
+  output: "InstanceNorm/instancenorm/add_1"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Hole"
+output: "InstanceNorm/instancenorm/add_1"
diff --git a/res/TensorFlowLiteRecipes/Net_InstanceNorm_007/test.rule b/res/TensorFlowLiteRecipes/Net_InstanceNorm_007/test.rule
new file mode 100644
index 000000000..e8af35f05
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_InstanceNorm_007/test.rule
@@ -0,0 +1,13 @@
+# To check if this network is converted to circle InstanceNorm op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INSTANCE_NORM_EXIST"     $(op_count INSTANCE_NORM) '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_POW"                  $(op_count POW) '=' 0
+RULE    "NO_DIV"                  $(op_count DIV) '=' 0
+RULE    "NO_SQUARED_DIFF"         $(op_count SQUARED_DIFFERENCE) '=' 0
+RULE    "NO_MEAN"                 $(op_count MEAN) '=' 0
+RULE    "NO_RSQRT"                $(op_count RSQRT) '=' 0
+RULE    "NO_SUB"                  $(op_count SUB) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.recipe
new file mode 100644
index 000000000..e1d3c0a09
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.recipe
@@ -0,0 +1,86 @@
+operand {
+  name: "Const"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "6"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Const_1"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 4
+  }
+  quant {
+    min: 0
+    max: 255
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Maximum"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 4
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Minimum"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 4
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "Minimum"
+  input: "Hole"
+  input: "Const"
+  output: "Minimum"
+}
+operation {
+  type: "Maximum"
+  input: "Minimum"
+  input: "Const_1"
+  output: "Maximum"
+}
+input: "Hole"
+output: "Maximum"
diff --git a/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.rule b/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.rule
new file mode 100644
index 000000000..9d6340727
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Maximum_Minimum_000/test.rule
@@ -0,0 +1,7 @@
+# To check if Maximum and Minimum is fused to Relu6.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "RELU6_EXIST"             $(op_count RELU6) '=' 1
+RULE    "NO_MAXIMUM"              $(op_count MAXIMUM) '=' 0
+RULE    "NO_MINIMUM"              $(op_count MINIMUM) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Mean_Mean_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Mean_Mean_000/test.recipe
new file mode 100644
index 000000000..b2cd68f9f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mean_Mean_000/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "inner"
+  type: FLOAT32
+  shape { dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices1"
+  type: INT32
+  shape { dim: 2 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "reduction_indices2"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "1"}
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: false
+  }
+  input: "ifm"
+  input: "reduction_indices1"
+  output: "inner"
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: false
+  }
+  input: "inner"
+  input: "reduction_indices2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Mean_Mean_000/test.rule b/res/TensorFlowLiteRecipes/Net_Mean_Mean_000/test.rule
new file mode 100644
index 000000000..36520da1b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mean_Mean_000/test.rule
@@ -0,0 +1,5 @@
+# To check if Maximum and Minimum is fused to Relu6.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "MEAN_SINGLE"             $(op_count MEAN) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Mean_Mean_001/test.recipe b/res/TensorFlowLiteRecipes/Net_Mean_Mean_001/test.recipe
new file mode 100644
index 000000000..2dfc07107
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mean_Mean_001/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "inner"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 1 dim: 4 }
+}
+operand {
+  name: "reduction_indices1"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "2" }
+}
+operand {
+  name: "reduction_indices2"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "1"}
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 1 dim: 4}
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices1"
+  output: "inner"
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "inner"
+  input: "reduction_indices2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Mean_Mean_001/test.rule b/res/TensorFlowLiteRecipes/Net_Mean_Mean_001/test.rule
new file mode 100644
index 000000000..36520da1b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mean_Mean_001/test.rule
@@ -0,0 +1,5 @@
+# To check if Maximum and Minimum is fused to Relu6.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "MEAN_SINGLE"             $(op_count MEAN) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Mean_Transpose_Mean_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Mean_Transpose_Mean_000/test.recipe
new file mode 100644
index 000000000..b4bec1486
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mean_Transpose_Mean_000/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "inner1"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 1 dim: 4 }
+}
+operand {
+  name: "inner2"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 dim: 8 }
+}
+operand {
+  name: "reduction_indices1"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "2" }
+}
+operand {
+  name: "reduction_indices2"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "3"}
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "0" arg: "2" arg: "3" arg: "1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 dim: 1 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices1"
+  output: "inner1"
+}
+operation {
+  type: "Transpose"
+  transpose_options {
+  }
+  input: "inner1"
+  input: "perm"
+  output: "inner2"
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "inner2"
+  input: "reduction_indices2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Mean_Transpose_Mean_000/test.rule b/res/TensorFlowLiteRecipes/Net_Mean_Transpose_Mean_000/test.rule
new file mode 100644
index 000000000..36520da1b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mean_Transpose_Mean_000/test.rule
@@ -0,0 +1,5 @@
+# To check if Maximum and Minimum is fused to Relu6.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "MEAN_SINGLE"             $(op_count MEAN) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.recipe
new file mode 100644
index 000000000..3658a2bff
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.recipe
@@ -0,0 +1,171 @@
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operand {
+  name: "Weights1"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 1
+    dim: 1
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Bias1"
+  type: FLOAT32
+  shape {
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Conv1"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operand {
+  name: "Gamma"
+  type: FLOAT32
+  shape {
+    dim: 16
+  }
+  filler {
+    tag: "explicit"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "Mul"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operand {
+  name: "Beta"
+  type: FLOAT32
+  shape {
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Add"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operand {
+  name: "Weights2"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 1
+    dim: 1
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Bias2"
+  type: FLOAT32
+  shape {
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Conv2"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "Hole"
+  input: "Weights1"
+  input: "Bias1"
+  output: "Conv1"
+}
+operation {
+  type: "Mul"
+  input: "Conv1"
+  input: "Gamma"
+  output: "Mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "Mul"
+  input: "Beta"
+  output: "Add"
+  add_options {
+    activation: RELU
+  }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "Add"
+  input: "Weights2"
+  input: "Bias2"
+  output: "Conv2"
+}
+input: "Hole"
+output: "Conv2"
diff --git a/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.rule b/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.rule
new file mode 100644
index 000000000..25be16fc0
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Preactivation_BN_000/test.rule
@@ -0,0 +1,8 @@
+# To check if Preacitvation BN is fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_COUNT"              $(op_count CONV_2D) '=' 2
+RULE    "RELU_EXIST"              $(op_count RELU) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Reshape_Neg_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Reshape_Neg_000/test.recipe
new file mode 100644
index 000000000..51cf3b4ca
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Reshape_Neg_000/test.recipe
@@ -0,0 +1,35 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 dim: 6 }
+}
+operand {
+  name: "shape1"
+  type: INT32
+  shape { dim: 2 }
+  filler { tag: "explicit" arg: "6" arg: "6" }
+}
+operand {
+  name: "reshape_out"
+  type: FLOAT32
+  shape { dim: 6 dim: 6 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 6 dim: 6 }
+}
+operation {
+  type: "Reshape"
+  input: "ifm"
+  input: "shape1"
+  output: "reshape_out"
+}
+operation {
+  type: "Neg"
+  input: "reshape_out"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.recipe
new file mode 100644
index 000000000..2acb2e71b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.recipe
@@ -0,0 +1,42 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 dim: 6 }
+}
+operand {
+  name: "shape1"
+  type: INT32
+  shape { dim: 2 }
+  filler { tag: "explicit" arg: "6" arg: "6" }
+}
+operand {
+  name: "shape2"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "6" arg: "2"  arg: "3" }
+}
+operand {
+  name: "reshape_out"
+  type: FLOAT32
+  shape { dim: 6 dim: 6 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 6 dim: 2 dim: 3 }
+}
+operation {
+  type: "Reshape"
+  input: "ifm"
+  input: "shape1"
+  output: "reshape_out"
+}
+operation {
+  type: "Reshape"
+  input: "reshape_out"
+  input: "shape2"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.rule b/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.rule
new file mode 100644
index 000000000..9a70601c8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Reshape_Reshape_000/test.rule
@@ -0,0 +1,5 @@
+# To check if Redundant Reshape removed.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "RESHAPE_EXIST"             $(op_count RESHAPE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.recipe
new file mode 100644
index 000000000..b84058b0e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 1 dim: 1 }
+}
+operand {
+  name: "t1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operation {
+  type: "Squeeze"
+  squeeze_options { squeeze_dim: 3 }
+  input: "ifm"
+  output: "t1"
+}
+operation {
+  type: "Squeeze"
+  squeeze_options { squeeze_dim: 2 }
+  input: "t1"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.rule b/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.rule
new file mode 100644
index 000000000..66a105a73
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Squeeze_Squeeze_000/test.rule
@@ -0,0 +1,6 @@
+# To check if Squeeze is substituted to Reshape op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "SQUEEZE_COUNT"           $(op_count SQUEEZE) '=' 0
+RULE    "RESHAPE_COUNT"           $(op_count RESHAPE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.recipe b/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.recipe
new file mode 100644
index 000000000..04c0e9084
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.recipe
@@ -0,0 +1,77 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 4 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "0" arg: "0" arg: "0" }
+}
+operand {
+  name: "end"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "4" }
+}
+operand {
+  name: "strides"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "1" arg: "1" }
+}
+operand {
+  name: "output_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 4 }
+}
+operation {
+  type: "StridedSlice"
+  strided_slice_options {
+    begin_mask: 0
+    end_mask: 0
+    ellipsis_mask: 0
+    new_axis_mask: 0
+    shrink_axis_mask: 0
+  }
+  input: "ifm"
+  input: "begin"
+  input: "end"
+  input: "strides"
+  output: "output_1"
+}
+operand {
+  name: "begin_2"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "0" arg: "0" arg: "0" }
+}
+operand {
+  name: "end_2"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "0" arg: "1" arg: "0" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim:1 dim: 4}
+}
+operation {
+  type: "StridedSlice"
+  strided_slice_options {
+    begin_mask: 5
+    end_mask: 5
+    ellipsis_mask: 0
+    new_axis_mask: 0
+    shrink_axis_mask: 2
+  }
+  input: "output_1"
+  input: "begin_2"
+  input: "end_2"
+  input: "strides"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.rule b/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.rule
new file mode 100644
index 000000000..f1a660d19
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_StridedSlice_StridedSlice_000/test.rule
@@ -0,0 +1,5 @@
+# To check if Unnecessary StridedSlice removed.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "STRIDEDSLICE_EXIST"      $(op_count STRIDEDSLICE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe
new file mode 100644
index 000000000..dd2ab9d80
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe
@@ -0,0 +1,94 @@
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Addition"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "Addition_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+  }
+}
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "input_size"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "1"
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "input_size"
+  input: "filter"
+  input: "Hole"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "conv2d_transpose"
+  input: "Addition_add_param"
+  output: "Addition"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Hole"
+output: "Addition"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.rule
new file mode 100644
index 000000000..894d642a3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.rule
@@ -0,0 +1,6 @@
+# To check if Add op is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe
new file mode 100644
index 000000000..67cce945f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe
@@ -0,0 +1,100 @@
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Addition"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "Addition_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+    arg: "-1" arg: "-2" arg: "-3" arg: "-4"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+    arg: "-1" arg: "-2" arg: "-3" arg: "-4"
+  }
+}
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "input_size"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "1"
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "input_size"
+  input: "filter"
+  input: "Hole"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "conv2d_transpose"
+  input: "Addition_add_param"
+  output: "Addition"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Hole"
+output: "Addition"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.rule
new file mode 100644
index 000000000..86afc47f6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.rule
@@ -0,0 +1,6 @@
+# To check if Add op is not fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_FUSION"               $(op_count ADD) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe
new file mode 100644
index 000000000..9021911be
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe
@@ -0,0 +1,94 @@
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Addition"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "Addition_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+  }
+}
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "input_size"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "1"
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "input_size"
+  input: "filter"
+  input: "Hole"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "Addition_add_param"
+  input: "conv2d_transpose"
+  output: "Addition"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Hole"
+output: "Addition"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.rule
new file mode 100644
index 000000000..894d642a3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.rule
@@ -0,0 +1,6 @@
+# To check if Add op is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
index 65248f23b..065946d37 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
@@ -12,9 +12,6 @@ operand {
     arg: "0.0"
     arg: "0.1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3"
@@ -25,9 +22,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_add_param"
@@ -39,9 +33,6 @@ operand {
     tag: "explicit"
     arg: "-2.04724"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_mul_0"
@@ -52,9 +43,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_mul_0_param"
@@ -66,9 +54,6 @@ operand {
     tag: "explicit"
     arg: "2.00834"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Hole"
@@ -79,11 +64,6 @@ operand {
     dim: 2
     dim: 1
   }
-  quant {
-    min: 0
-    max: 255
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose"
@@ -94,9 +74,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose/input_sizes"
@@ -111,9 +88,6 @@ operand {
     arg: "4"
     arg: "1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operation {
   type: "TransposeConv"
@@ -125,6 +99,7 @@ operation {
     padding: VALID
     stride_w: 1
     stride_h: 1
+    activation: NONE
   }
 }
 operation {
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe
new file mode 100644
index 000000000..25557ab21
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe
@@ -0,0 +1,124 @@
+operand {
+  name: "Const_transposed"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "FusedBatchNormV3"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "FusedBatchNormV3_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "2.00834"
+  }
+}
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "1"
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "Const_transposed"
+  input: "Hole"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "conv2d_transpose"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3_add_param"
+  output: "FusedBatchNormV3"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Hole"
+output: "FusedBatchNormV3"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.rule
new file mode 100644
index 000000000..0988ecf28
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
new file mode 100644
index 000000000..48068e971
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
@@ -0,0 +1,127 @@
+# Tconv with asymmetric filter + BN + Relu6
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "5"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "FusedBatchNormV3"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+    arg: "-7.80109"
+  }
+}
+operand {
+  name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "2.00834"
+    arg: "1.00344"
+  }
+}
+operand {
+  name: "Relu6"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+  input: "Hole"
+  output: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3"
+  output: "Relu6"
+  add_options {
+    activation: RELU6
+  }
+}
+input: "Hole"
+output: "Relu6"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.rule
new file mode 100644
index 000000000..dfc392758
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.rule
@@ -0,0 +1,8 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "RELU6_EXIST"             $(op_count RELU6) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe
new file mode 100644
index 000000000..cb1959a58
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe
@@ -0,0 +1,136 @@
+operand {
+  name: "Const_transposed"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Output"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "FusedBatchNormV3_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "2.00834"
+  }
+}
+operand {
+  name: "Input"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "1"
+  }
+}
+operand {
+  name: "conv2d_transpose/bias"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "1.03"
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "Const_transposed"
+  input: "Input"
+  input: "conv2d_transpose/bias"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "conv2d_transpose"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3_add_param"
+  output: "Output"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Input"
+output: "Output"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule
new file mode 100644
index 000000000..0988ecf28
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe
new file mode 100644
index 000000000..5bc06dd1d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe
@@ -0,0 +1,150 @@
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "16"
+  }
+}
+operand {
+  name: "Const_transposed"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 3
+    dim: 3
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Input"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose/bias"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "Const_transposed"
+  input: "Input"
+  input: "conv2d_transpose/bias"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operation {
+  type: "Mul"
+  input: "conv2d_transpose"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+
+operand {
+  name: "FusedBatchNormV3_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Output"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3_add_param"
+  output: "Output"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Input"
+output: "Output"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule
new file mode 100644
index 000000000..0988ecf28
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.recipe
new file mode 100644
index 000000000..3b227dfb1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.recipe
@@ -0,0 +1,127 @@
+# Tconv with asymmetric filter + BN + ReLU
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "5"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "FusedBatchNormV3"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+    arg: "-7.80109"
+  }
+}
+operand {
+  name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "2.00834"
+    arg: "1.00344"
+  }
+}
+operand {
+  name: "Relu"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+  input: "Hole"
+  output: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3"
+  output: "Relu"
+  add_options {
+    activation: RELU
+  }
+}
+input: "Hole"
+output: "Relu"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.rule
new file mode 100644
index 000000000..241bda988
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.rule
@@ -0,0 +1,8 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "RELU_EXIST"              $(op_count RELU) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.recipe
new file mode 100644
index 000000000..1ce7c0d62
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.recipe
@@ -0,0 +1,34 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 6 dim: 5 dim: 1 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "0" arg: "3" arg: "2" arg: "1"}
+}
+operand {
+  name: "transpose"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operation {
+  type: "Transpose"
+  input: "ifm"
+  input: "perm"
+  output: "transpose"
+}
+operation {
+  type: "Abs"
+  input: "transpose"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.reverse b/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.recipe
new file mode 100644
index 000000000..daf14d502
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.recipe
@@ -0,0 +1,48 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 6 dim: 5 dim: 1 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "0" arg: "3" arg: "2" arg: "1"}
+}
+operand {
+  name: "transpose"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 6 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operation {
+  type: "Transpose"
+  input: "ifm"
+  input: "perm"
+  output: "transpose"
+}
+operation {
+  type: "Add"
+  input: "transpose"
+  input: "add_const"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.reverse b/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/NotEqual_U8_000/test.recipe b/res/TensorFlowLiteRecipes/NotEqual_U8_000/test.recipe
new file mode 100644
index 000000000..50670171a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NotEqual_U8_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ifm2"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: -1 max: 1 scale: 0.0078431373 zero_point: 128 }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "NotEqual"
+  notequal_options {
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/NotEqual_U8_000/test.reverse b/res/TensorFlowLiteRecipes/NotEqual_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NotEqual_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/PRelu_001/test.recipe b/res/TensorFlowLiteRecipes/PRelu_001/test.recipe
new file mode 100644
index 000000000..c18acdbbc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PRelu_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "alpha"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0.1" arg: "0.3" arg: "0.5"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "PRelu"
+  input: "ifm"
+  input: "alpha"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/PRelu_001/test.reverse b/res/TensorFlowLiteRecipes/PRelu_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PRelu_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/PadV2_001/test.recipe b/res/TensorFlowLiteRecipes/PadV2_001/test.recipe
new file mode 100644
index 000000000..0eafec931
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_001/test.recipe
@@ -0,0 +1,68 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "relu"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "padding"
+  type: INT32
+  shape { dim: 4 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "0"
+    arg: "1" arg: "1"
+    arg: "1" arg: "1"
+    arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "constant_values"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "-100.00"
+  }
+}
+operand {
+  name: "padv2"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "relu"
+}
+operation {
+  type: "PadV2"
+  input: "relu"
+  input: "padding"
+  input: "constant_values"
+  output: "padv2"
+}
+operation {
+  type: "MaxPool2D"
+  maxpool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_height: 3
+    filter_width: 3
+  }
+  input: "padv2"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/PadV2_001/test.rule b/res/TensorFlowLiteRecipes/PadV2_001/test.rule
new file mode 100644
index 000000000..29b080b1e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_001/test.rule
@@ -0,0 +1,8 @@
+# To check if PadV2 is converted to Pad
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "PAD_EXIST"               $(op_count PAD) '=' 1
+RULE    "MAXPOOL2D_EXIST"         $(op_count MAX_POOL_2D) '=' 1
+RULE    "RELU_EXIST"              $(op_count RELU) '=' 1
+RULE    "NO_PADV2"                $(op_count PADV2) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe
new file mode 100644
index 000000000..d357a059f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe
@@ -0,0 +1,82 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operand {
+  name: "weight_feature"
+  type: FLOAT32
+  shape { dim: 64 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "weight_time"
+  type: FLOAT32
+  shape { dim: 64 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "input_activation_state"
+  type: FLOAT32
+  is_variable: true
+  shape { dim: 1 dim: 512 }
+}
+operand {
+  name: "svdf"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operation {
+  type: "SVDF"
+  svdf_options {
+    rank: 1
+    activation: RELU
+    asymmetric_quantize_inputs: false
+  }
+  input: "ifm1"
+  input: "weight_feature"
+  input: "weight_time"
+  input: "bias"
+  input: "input_activation_state"
+  output: "svdf"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "svdf"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_Sqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_Sqrt_000/test.recipe
new file mode 100644
index 000000000..1125246d1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_Sqrt_000/test.recipe
@@ -0,0 +1,48 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "add"
+}
+operation {
+  type: "Sqrt"
+  input: "add"
+  output: "ofm1"
+}
+operation {
+  type: "Sqrt"
+  input: "add"
+  output: "ofm2"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_Sqrt_Rsqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_Sqrt_Rsqrt_000/test.recipe
new file mode 100644
index 000000000..c9cee9960
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_Sqrt_Rsqrt_000/test.recipe
@@ -0,0 +1,68 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "add"
+}
+operation {
+  type: "Sqrt"
+  input: "add"
+  output: "sqrt1"
+}
+operation {
+  type: "Sqrt"
+  input: "add"
+  output: "sqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt1"
+  output: "ofm1"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt2"
+  output: "ofm2"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_Sub_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_Sub_000/test.recipe
new file mode 100644
index 000000000..8cd878ac3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_Sub_000/test.recipe
@@ -0,0 +1,67 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm4"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "add1"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "add1"
+  input: "ifm3"
+  output: "add2"
+}
+operation {
+  type: "Sub"
+  sub_options {
+    activation: NONE
+  }
+  input: "add2"
+  input: "ifm4"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+input: "ifm3"
+input: "ifm4"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_Sub_001/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_Sub_001/test.recipe
new file mode 100644
index 000000000..3d765bfaf
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_Sub_001/test.recipe
@@ -0,0 +1,67 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm4"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "some/node/add1;and/another"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "some/node/add2;and/another"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "some/node/add1;and/another"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "some/node/add1;and/another"
+  input: "ifm3"
+  output: "some/node/add2;and/another"
+}
+operation {
+  type: "Sub"
+  sub_options {
+    activation: NONE
+  }
+  input: "some/node/add2;and/another"
+  input: "ifm4"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+input: "ifm3"
+input: "ifm4"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_Sub_002/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_Sub_002/test.recipe
new file mode 100644
index 000000000..25cce49f3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_Sub_002/test.recipe
@@ -0,0 +1,59 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "const"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "ifm"
+  input: "const"
+  output: "add1"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "add1"
+  input: "const"
+  output: "add2"
+}
+operation {
+  type: "Sub"
+  sub_options {
+    activation: NONE
+  }
+  input: "add2"
+  input: "const"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_If_Add_Sub_000/test.recipe b/res/TensorFlowLiteRecipes/Part_If_Add_Sub_000/test.recipe
new file mode 100644
index 000000000..e946d6509
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_If_Add_Sub_000/test.recipe
@@ -0,0 +1,128 @@
+version: 1
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ifm2"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ofm"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operation {
+    type: "Add"
+    input: "ifm1"
+    input: "ifm2"
+    output: "ofm"
+    add_options {
+      activation: NONE
+    }
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  name: "IF_ELSE"
+}
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ifm2"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ofm"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operation {
+    type: "Mul"
+    input: "ifm1"
+    input: "ifm2"
+    output: "ofm"
+    mul_options {
+      activation: NONE
+    }
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  name: "IF_THEN"
+}
+
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "cond"
+  type: BOOL
+  shape { }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "sub"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Sub"
+  input: "ifm1"
+  input: "ifm2"
+  output: "sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "If"
+  input: "cond"
+  input: "add"
+  input: "sub"
+  output: "ofm"
+  if_options {
+    then_subgraph_index: 2
+    else_subgraph_index: 1
+  }
+}
+input: "cond"
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
+name: "Main"
diff --git a/res/TensorFlowLiteRecipes/Part_If_Add_Sub_001/test.recipe b/res/TensorFlowLiteRecipes/Part_If_Add_Sub_001/test.recipe
new file mode 100644
index 000000000..6a3bfab75
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_If_Add_Sub_001/test.recipe
@@ -0,0 +1,204 @@
+version: 1
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ifm2"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ofm"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operation {
+    type: "Add"
+    input: "ifm1"
+    input: "ifm2"
+    output: "ofm"
+    add_options {
+      activation: NONE
+    }
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  name: "IF_THEN_THEN"
+}
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ifm2"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ofm"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operation {
+    type: "Mul"
+    input: "ifm1"
+    input: "ifm2"
+    output: "ofm"
+    mul_options {
+      activation: NONE
+    }
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  name: "IF_THEN_ELSE"
+}
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ifm2"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ofm"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operation {
+    type: "Add"
+    input: "ifm1"
+    input: "ifm2"
+    output: "ofm"
+    add_options {
+      activation: NONE
+    }
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  name: "IF_ELSE"
+}
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "ifm2"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operand {
+    name: "cond"
+    type: BOOL
+    shape { dim: 1 }
+    filler {
+      tag: "explicit"
+      arg: "T"
+    }
+  }
+  operand {
+    name: "ofm"
+    type: FLOAT32
+    shape { dim: 2 dim: 3 }
+  }
+  operation {
+    type: "If"
+    input: "cond"
+    input: "ifm1"
+    input: "ifm2"
+    output: "ofm"
+    if_options {
+      then_subgraph_index: 1
+      else_subgraph_index: 2
+    }
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  name: "IF_THEN"
+}
+
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "cond"
+  type: BOOL
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "T"
+  }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "sub"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 2 dim: 3 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Sub"
+  input: "ifm1"
+  input: "ifm2"
+  output: "sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "If"
+  input: "cond"
+  input: "add"
+  input: "sub"
+  output: "ofm"
+  if_options {
+    then_subgraph_index: 4
+    else_subgraph_index: 3
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
+name: "Main"
diff --git a/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe
new file mode 100644
index 000000000..a712d2ac3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe
@@ -0,0 +1,63 @@
+operand {
+  name: "in1"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "in2"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "sqrtout"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "fcout"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operation {
+  type: "Mul"
+  input: "in1"
+  input: "in2"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Sqrt"
+  input: "mul"
+  output: "sqrtout"
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "mul"
+  input: "weight"
+  input: ""
+  output: "fcout"
+}
+input: "in1"
+input: "in2"
+output: "fcout"
+output: "sqrtout"
diff --git a/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe
new file mode 100644
index 000000000..1d20443c8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+  name: "split_dim"
+  type: INT32
+  shape { }
+  filler { tag: "explicit" arg: "0" }
+}
+operand {
+  name: "split1"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "split2"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Split"
+  split_options {
+    num_splits: 2
+  }
+  input: "split_dim"
+  input: "ifm"
+  output: "split1"
+  output: "split2"
+}
+operation {
+  type: "Add"
+  input: "split1"
+  input: "split2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_000/test.recipe
new file mode 100644
index 000000000..e0a6fe2aa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_001/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_001/test.recipe
new file mode 100644
index 000000000..89f74772e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_001/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "sqrt"
+  output: "sqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt2"
+  output: "rsqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_002/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_002/test.recipe
new file mode 100644
index 000000000..2e7e13240
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_002/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_003/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_003/test.recipe
new file mode 100644
index 000000000..1cd57ae12
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_003/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "rsqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "rsqrt2"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_004/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_004/test.recipe
new file mode 100644
index 000000000..3b4458480
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_004/test.recipe
@@ -0,0 +1,38 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Sqrt"
+  input: "ifm"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "ofm1"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "ofm2"
+}
+input: "ifm"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_000/test.recipe
new file mode 100644
index 000000000..6618fff22
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_000/test.recipe
@@ -0,0 +1,56 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "rsqrt2"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "sqrt"
+  input: "rsqrt2"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_001/test.recipe
new file mode 100644
index 000000000..dd3f69bea
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_001/test.recipe
@@ -0,0 +1,61 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "rsqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt2"
+  output: "rsqrt3"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "sqrt"
+  input: "rsqrt3"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_002/test.recipe
new file mode 100644
index 000000000..23b7458c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_002/test.recipe
@@ -0,0 +1,71 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt4"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt"
+  output: "rsqrt2"
+}
+operation {
+  type: "Rsqrt"
+  input: "sqrt"
+  output: "rsqrt3"
+}
+operation {
+  type: "Rsqrt"
+  input: "rsqrt2"
+  output: "rsqrt4"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "rsqrt3"
+  input: "rsqrt4"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_003/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_003/test.recipe
new file mode 100644
index 000000000..c2dae2e86
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_003/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm1"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "ifm2"
+  output: "sqrt"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "rsqrt"
+  input: "sqrt"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_004/test.recipe b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_004/test.recipe
new file mode 100644
index 000000000..c1693f72e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Sqrt_Rsqrt_Add_004/test.recipe
@@ -0,0 +1,41 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "rsqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "sqrt"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Rsqrt"
+  input: "ifm"
+  output: "rsqrt"
+}
+operation {
+  type: "Sqrt"
+  input: "rsqrt"
+  output: "sqrt"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "rsqrt"
+  input: "sqrt"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe b/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe
new file mode 100644
index 000000000..ead0c33ad
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe
@@ -0,0 +1,42 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "Tanh"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operation {
+  type: "Tanh"
+  input: "in"
+  output: "Tanh"
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "Tanh"
+  input: "weight"
+  input: ""
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/Part_While_000/test.readme b/res/TensorFlowLiteRecipes/Part_While_000/test.readme
new file mode 100644
index 000000000..7e2684c38
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_While_000/test.readme
@@ -0,0 +1,4 @@
+test.readme of Part_While_000
+
+MAXIMUM and MINIMUM Op exist to make random input to 0
+to make this model loop from 0 to 10.
diff --git a/res/TensorFlowLiteRecipes/Part_While_000/test.recipe b/res/TensorFlowLiteRecipes/Part_While_000/test.recipe
new file mode 100644
index 000000000..02324dfdc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_While_000/test.recipe
@@ -0,0 +1,124 @@
+version: 1
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { dim:1 }
+  }
+  operand {
+    name: "ifm2"
+    type: FLOAT32
+    shape { dim:1 }
+    filler {
+      tag: "explicit"
+      arg: "10"
+    }
+  }
+  operand {
+    name: "ofm"
+    type: BOOL
+    shape { dim:1 }
+  }
+  operation {
+    type: "Less"
+    input: "ifm1"
+    input: "ifm2"
+    output: "ofm"
+  }
+  input: "ifm1"
+  output: "ofm"
+  name: "WHILE_COND"
+}
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { dim:1 }
+  }
+  operand {
+    name: "ifm3"
+    type: FLOAT32
+    shape { dim:1 }
+    filler {
+      tag: "explicit"
+      arg: "1"
+    }
+  }
+  operand {
+    name: "ofm"
+    type: FLOAT32
+    shape { dim:1 }
+  }
+  operation {
+    type: "Add"
+    input: "ifm1"
+    input: "ifm3"
+    output: "ofm"
+    add_options {
+      activation: NONE
+    }
+  }
+  input: "ifm1"
+  output: "ofm"
+  name: "WHILE_BODY"
+}
+
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim:1 }
+}
+operand {
+  name: "zero"
+  type: FLOAT32
+  shape { dim:1 }
+  filler {
+    tag: "explicit"
+    arg: "0"
+  }
+}
+operand {
+  name: "min"
+  type: FLOAT32
+  shape { dim:1 }
+}
+operand {
+  name: "max"
+  type: FLOAT32
+  shape { dim:1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim:1 }
+}
+operation {
+  type: "Minimum"
+  maximum_options {
+  }
+  input: "ifm1"
+  input: "zero"
+  output: "min"
+}
+operation {
+  type: "Maximum"
+  maximum_options {
+  }
+  input: "min"
+  input: "zero"
+  output: "max"
+}
+operation {
+  type: "While"
+  input: "max"
+  output: "ofm"
+  while_options {
+    body_subgraph_index: 2
+    cond_subgraph_index: 1
+  }
+}
+input: "ifm1"
+output: "ofm"
+name: "Main"
diff --git a/res/TensorFlowLiteRecipes/Part_While_000/test.rule b/res/TensorFlowLiteRecipes/Part_While_000/test.rule
new file mode 100644
index 000000000..dee60deab
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_While_000/test.rule
@@ -0,0 +1,5 @@
+# To check if this network is converted to circle InstanceNorm op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "WHILE_EXIST"             $(op_count WHILE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Part_While_001/test.readme b/res/TensorFlowLiteRecipes/Part_While_001/test.readme
new file mode 100644
index 000000000..304c49483
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_While_001/test.readme
@@ -0,0 +1,5 @@
+test.readme of Part_While_001
+
+This has WHILE Op inside WHILE_BODY subgraph.
+MAXIMUM and MINIMUM Op exist to make random input to 0
+to make this model loop from 0 to 10.
diff --git a/res/TensorFlowLiteRecipes/Part_While_001/test.recipe b/res/TensorFlowLiteRecipes/Part_While_001/test.recipe
new file mode 100644
index 000000000..c088053b3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_While_001/test.recipe
@@ -0,0 +1,203 @@
+version: 1
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { }
+  }
+  operand {
+    name: "ifm2"
+    type: FLOAT32
+    shape { }
+    filler {
+      tag: "explicit"
+      arg: "10"
+    }
+  }
+  operand {
+    name: "ofm"
+    type: BOOL
+    shape { }
+  }
+  operation {
+    type: "Less"
+    input: "ifm1"
+    input: "ifm2"
+    output: "ofm"
+  }
+  input: "ifm1"
+  output: "ofm"
+  name: "WHILE_WHILE_COND"
+}
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { }
+  }
+  operand {
+    name: "ifm3"
+    type: FLOAT32
+    shape { }
+    filler {
+      tag: "explicit"
+      arg: "1"
+    }
+  }
+  operand {
+    name: "ofm"
+    type: FLOAT32
+    shape { }
+  }
+  operation {
+    type: "Add"
+    input: "ifm1"
+    input: "ifm3"
+    output: "ofm"
+    add_options {
+      activation: NONE
+    }
+  }
+  input: "ifm1"
+  output: "ofm"
+  name: "WHILE_WHILE_BODY"
+}
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { }
+  }
+  operand {
+    name: "ifm3"
+    type: FLOAT32
+    shape { }
+    filler {
+      tag: "explicit"
+      arg: "10"
+    }
+  }
+  operand {
+    name: "ofm"
+    type: BOOL
+    shape { }
+  }
+  operation {
+    type: "Less"
+    input: "ifm1"
+    input: "ifm3"
+    output: "ofm"
+  }
+  input: "ifm1"
+  output: "ofm"
+  name: "WHILE_COND"
+}
+
+graph {
+  operand {
+    name: "ifm1"
+    type: FLOAT32
+    shape { }
+  }
+  operand {
+    name: "ifm3"
+    type: FLOAT32
+    shape { }
+    filler {
+      tag: "explicit"
+      arg: "1"
+    }
+  }
+  operand {
+    name: "add"
+    type: FLOAT32
+    shape { }
+  }
+  operand {
+    name: "ofm1"
+    type: FLOAT32
+    shape { }
+  }
+  operation {
+    type: "Add"
+    input: "ifm1"
+    input: "ifm3"
+    output: "add"
+    add_options {
+      activation: NONE
+    }
+  }
+  operation {
+    type: "While"
+    input: "add"
+    output: "ofm1"
+    while_options {
+      cond_subgraph_index: 1
+      body_subgraph_index: 2
+    }
+  }
+  input: "ifm1"
+  output: "ofm1"
+  name: "WHILE_BODY"
+}
+
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "zero"
+  type: FLOAT32
+  shape { }
+  filler {
+    tag: "explicit"
+    arg: "0"
+  }
+}
+operand {
+  name: "min"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "max"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { }
+}
+operation {
+  type: "Minimum"
+  maximum_options {
+  }
+  input: "ifm1"
+  input: "zero"
+  output: "min"
+}
+operation {
+  type: "Maximum"
+  maximum_options {
+  }
+  input: "min"
+  input: "zero"
+  output: "max"
+}
+operation {
+  type: "While"
+  input: "max"
+  output: "ofm1"
+  while_options {
+    cond_subgraph_index: 3
+    body_subgraph_index: 4
+  }
+}
+input: "ifm1"
+output: "ofm1"
+name: "Main"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe
new file mode 100644
index 000000000..5c150922e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe
@@ -0,0 +1,36 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+  quant { scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "add_const"
+  type: UINT8
+  shape { dim: 1 dim: 1 dim: 1 dim: 4 }
+  quant { scale: 1.0 zero_point: 0 }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "1"
+    arg: "2"
+    arg: "3"
+  }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+  quant { scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "add_const"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule
new file mode 100644
index 000000000..7bde66240
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule
@@ -0,0 +1,10 @@
+# To check fake quantization.
+# All Ops are float32. Quantize/Dequantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_FP32"              $(tensor_dtype ifm) '=' FLOAT32
+RULE    "ADD_CONST_FP32"        $(tensor_dtype add_const_DQ) '=' FLOAT32
+RULE    "ADD_FP32"              $(tensor_dtype ofm) '=' FLOAT32
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
+RULE    "DEQUANTIZE_OP"         $(op_count DEQUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe
new file mode 100644
index 000000000..0ae4862d1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule
new file mode 100644
index 000000000..b51f4ebbb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: U8, Add dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"                $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM1_QUANTIZE_S16"      $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "IFM2_S16"               $(tensor_dtype ifm2) '=' INT16
+RULE    "ADD_S16"                $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe
new file mode 100644
index 000000000..0ae4862d1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule
new file mode 100644
index 000000000..96a2535ef
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: S16, Add dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM1_S16"                $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM1_QUANTIZE_U8"        $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "IFM2_U8"                 $(tensor_dtype ifm2) '=' UINT8
+RULE    "ADD_U8"                  $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_U8"               $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_I8_000/test.recipe
new file mode 100644
index 000000000..80fb0d1af
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_I8_000/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.73560715
+    max: 5.34916592
+    scale: 0.0395481288
+    zero_point: -8
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "x_1"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -5.03376198
+    max: 5.14654779
+    scale: 0.0399227813
+    zero_point: -2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -6.4396615
+    max: 6.40962505
+    scale: 0.0503893606
+    zero_point: 0
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "Add"
+  input: "x"
+  input: "x_1"
+  output: "Identity"
+  add_options {
+    activation: NONE
+  }
+}
+input: "x"
+input: "x_1"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_I8_000/test.rule
new file mode 100644
index 000000000..8c39b94c4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_I8_000/test.rule
@@ -0,0 +1,7 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "IFM2_U8"              $(tensor_dtype x_1) '=' UINT8
+RULE    "OFM_U8"               $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe
new file mode 100644
index 000000000..746c34334
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+  type: "AveragePool2D"
+  averagepool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe
new file mode 100644
index 000000000..746c34334
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+  type: "AveragePool2D"
+  averagepool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_I8_000/test.recipe
new file mode 100644
index 000000000..cc5095342
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_I8_000/test.recipe
@@ -0,0 +1,51 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.91805935
+    max: 4.73869658
+    scale: 0.0378696285
+    zero_point: 2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 14
+    dim: 14
+    dim: 64
+  }
+  quant {
+    min: -4.91805935
+    max: 4.73869658
+    scale: 0.0378696285
+    zero_point: 2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "AveragePool2D"
+  input: "x"
+  output: "Identity"
+  averagepool2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+    filter_width: 2
+    filter_height: 2
+    activation: NONE
+  }
+}
+input: "x"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_I8_000/test.rule
new file mode 100644
index 000000000..35eaa9a53
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_I8_000/test.rule
@@ -0,0 +1,6 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "OFM_U8"              $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe
new file mode 100644
index 000000000..2f2e91a9e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+  type: "BatchMatMul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  batch_matmul_options {
+    adj_x: false
+    adj_y: false
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"               $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM1_QUANTIZE_S16"     $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "IFM2_U8"               $(tensor_dtype ifm2) '=' UINT8
+RULE    "IFM2_QUANTIZE_S16"     $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe
new file mode 100644
index 000000000..2f2e91a9e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+  type: "BatchMatMul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  batch_matmul_options {
+    adj_x: false
+    adj_y: false
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM1_S16"               $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM1_QUANTIZE_U8"       $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "IFM2_S16"               $(tensor_dtype ifm2) '=' INT16
+RULE    "IFM2_QUANTIZE_U8"       $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe
new file mode 100644
index 000000000..35641bd07
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Concatenation"
+  concatenation_options {
+    axis: 3
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"               $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM1_QUANTIZE_S16"     $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "IFM2_U8"               $(tensor_dtype ifm2) '=' UINT8
+RULE    "IFM2_QUANTIZE_S16"     $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe
new file mode 100644
index 000000000..35641bd07
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Concatenation"
+  concatenation_options {
+    axis: 3
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM1_S16"               $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM1_QUANTIZE_U8"       $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "IFM2_S16"               $(tensor_dtype ifm2) '=' INT16
+RULE    "IFM2_QUANTIZE_U8"       $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule
new file mode 100644
index 000000000..f7af083da
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule
@@ -0,0 +1,10 @@
+# To check float32 input.
+# Input is float32, Conv is uint8. Quantize Op is inserted at the beginning.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_FLOAT32"           $(tensor_dtype ifm) '=' FLOAT32
+RULE    "CONV_UINT8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "WEIGHTS_UINT8"           $(tensor_dtype filter) '=' UINT8
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT32
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule
new file mode 100644
index 000000000..a3f52f26d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule
@@ -0,0 +1,11 @@
+# To check float32 output.
+# Output is float32, Conv is uint8. Dequantize Op is inserted at the end.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+# Update tensor name (ofm_Dequantize) if 'create_dequantize' function is changed.
+RULE    "OUTPUT_FLOAT32"          $(tensor_dtype ofm_Dequantize) '=' FLOAT32
+RULE    "CONV_UINT8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "WEIGHTS_UINT8"           $(tensor_dtype filter) '=' UINT8
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT32
+RULE    "DEQUANTIZE_OP"           $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule
new file mode 100644
index 000000000..2187895f8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule
@@ -0,0 +1,13 @@
+# To check float32 input/output.
+# Input/Output is float32, Conv is uint8.
+# Quantize Op is inserted at the beginning, Dequantize Op is inserted at the end.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_FLOAT32"           $(tensor_dtype ifm) '=' FLOAT32
+RULE    "OUTPUT_FLOAT32"          $(tensor_dtype ofm_Dequantize) '=' FLOAT32
+RULE    "CONV_UINT8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "WEIGHTS_UINT8"           $(tensor_dtype filter) '=' UINT8
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT32
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 1
+RULE    "DEQUANTIZE_OP"           $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe
new file mode 100644
index 000000000..9cf8a0f69
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "KER_S16"               $(tensor_dtype ker) '=' INT16
+RULE    "BIAS_S64"              $(tensor_dtype bias) '=' INT64
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe
new file mode 100644
index 000000000..9cf8a0f69
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "KER_U8"                 $(tensor_dtype ker) '=' UINT8
+RULE    "BIAS_S32"               $(tensor_dtype bias) '=' INT32
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_005/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_005/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_005/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_005/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_005/test.rule
new file mode 100644
index 000000000..09931d4ed
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_005/test.rule
@@ -0,0 +1,8 @@
+# To check model can be quantized without QuantizeDequantizeWeights.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_UINT8"             $(tensor_dtype ifm) '=' UINT8
+RULE    "CONV_UINT8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "WEIGHTS_UINT8"           $(tensor_dtype filter) '=' UINT8
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT32
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_006/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_006/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_006/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_006/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_006/test.rule
new file mode 100644
index 000000000..81b13a60a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_006/test.rule
@@ -0,0 +1,8 @@
+# To check model can be quantized without QuantizeDequantizeWeights.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_INT16"             $(tensor_dtype ifm) '=' INT16
+RULE    "CONV_INT16"              $(tensor_dtype ofm) '=' INT16
+RULE    "WEIGHTS_INT16"           $(tensor_dtype filter) '=' INT16
+RULE    "BIAS_INT64"              $(tensor_dtype bias) '=' INT64
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_I8_000/test.recipe
new file mode 100644
index 000000000..44ad8ad23
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_I8_000/test.recipe
@@ -0,0 +1,472 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.60775042
+    max: 4.72273636
+    scale: 0.036590144
+    zero_point: -2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "conv2d/Conv2D"
+  type: INT8
+  shape {
+    dim: 64
+    dim: 3
+    dim: 3
+    dim: 64
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    min: -0.0944232419
+    min: -0.0943553224
+    min: -0.0940016955
+    min: -0.0902028382
+    min: -0.0940822735
+    min: -0.0943455622
+    min: -0.0938868895
+    min: -0.0932510793
+    min: -0.0943445265
+    min: -0.0938660875
+    min: -0.0932407603
+    min: -0.0922792107
+    min: -0.090897508
+    min: -0.094613649
+    min: -0.0946459174
+    min: -0.0942868292
+    min: -0.0938613489
+    min: -0.0930051133
+    min: -0.0935952961
+    min: -0.0912109241
+    min: -0.0931911319
+    min: -0.0945508927
+    min: -0.0934816301
+    min: -0.0920401588
+    min: -0.0945732072
+    min: -0.0935128182
+    min: -0.0945002586
+    min: -0.0941293538
+    min: -0.0943162367
+    min: -0.0935421884
+    min: -0.0925261453
+    min: -0.0925333053
+    min: -0.0947286934
+    min: -0.0900891423
+    min: -0.0901428238
+    min: -0.0939482
+    min: -0.0921313837
+    min: -0.0932294279
+    min: -0.0916569456
+    min: -0.0946567059
+    min: -0.0945841148
+    min: -0.0947021097
+    min: -0.092875883
+    min: -0.0915970951
+    min: -0.0944385231
+    min: -0.0938618854
+    min: -0.0934935212
+    min: -0.0932230875
+    min: -0.0930254608
+    min: -0.0945126489
+    min: -0.0945077688
+    min: -0.0938009396
+    min: -0.0940574557
+    min: -0.0939351916
+    min: -0.0946185738
+    min: -0.0913222954
+    min: -0.0927926674
+    min: -0.0934722796
+    min: -0.0944210738
+    min: -0.093838416
+    min: -0.0937703848
+    min: -0.0931927413
+    min: -0.0942146331
+    min: -0.0941971242
+    max: 0.0917563215
+    max: 0.0933615
+    max: 0.0939033106
+    max: 0.0915425941
+    max: 0.0902856588
+    max: 0.0946686193
+    max: 0.0945754126
+    max: 0.09072759
+    max: 0.0944143608
+    max: 0.0940838605
+    max: 0.0927160084
+    max: 0.0920422375
+    max: 0.0943431184
+    max: 0.0938885286
+    max: 0.0936606303
+    max: 0.0936031714
+    max: 0.0919222832
+    max: 0.0942450911
+    max: 0.0936903879
+    max: 0.0942620337
+    max: 0.0930888131
+    max: 0.094434835
+    max: 0.0916963741
+    max: 0.0938658938
+    max: 0.0945749655
+    max: 0.092634663
+    max: 0.0944076553
+    max: 0.0946160257
+    max: 0.0937980711
+    max: 0.093268238
+    max: 0.0944246724
+    max: 0.0931886435
+    max: 0.0947005823
+    max: 0.0921203196
+    max: 0.0944035128
+    max: 0.0937120244
+    max: 0.0928558633
+    max: 0.0943659618
+    max: 0.0924747065
+    max: 0.0925901
+    max: 0.0911629498
+    max: 0.0941480845
+    max: 0.0917054042
+    max: 0.0926568806
+    max: 0.0941145867
+    max: 0.092836
+    max: 0.093148917
+    max: 0.0946854576
+    max: 0.0938850194
+    max: 0.0942198783
+    max: 0.0939895958
+    max: 0.0937283114
+    max: 0.0943206325
+    max: 0.0944009274
+    max: 0.0923624262
+    max: 0.0929848477
+    max: 0.094728522
+    max: 0.0939471
+    max: 0.0935599357
+    max: 0.0942030475
+    max: 0.0937476754
+    max: 0.093198292
+    max: 0.0930417553
+    max: 0.0856469646
+    scale: 0.00074349012
+    scale: 0.000742955308
+    scale: 0.000740170828
+    scale: 0.000720807817
+    scale: 0.000740805292
+    scale: 0.000745422207
+    scale: 0.000744688266
+    scale: 0.00073426048
+    scale: 0.000743420154
+    scale: 0.000740817806
+    scale: 0.000734179222
+    scale: 0.000726608
+    scale: 0.000742859207
+    scale: 0.000744989375
+    scale: 0.000745243451
+    scale: 0.000742415956
+    scale: 0.000739065756
+    scale: 0.000742087315
+    scale: 0.000737719587
+    scale: 0.000742220727
+    scale: 0.000733788416
+    scale: 0.000744495192
+    scale: 0.000736075803
+    scale: 0.000739101553
+    scale: 0.000744684774
+    scale: 0.000736321381
+    scale: 0.000744096527
+    scale: 0.000745008059
+    scale: 0.000742647506
+    scale: 0.00073655264
+    scale: 0.000743501354
+    scale: 0.000733768858
+    scale: 0.000745895202
+    scale: 0.000725356862
+    scale: 0.000743334764
+    scale: 0.000739749637
+    scale: 0.000731148524
+    scale: 0.000743039069
+    scale: 0.000728147279
+    scale: 0.000745328376
+    scale: 0.000744756835
+    scale: 0.000745685888
+    scale: 0.000731306151
+    scale: 0.000729581749
+    scale: 0.000743610435
+    scale: 0.000739069947
+    scale: 0.000736169459
+    scale: 0.000745554804
+    scale: 0.000739252137
+    scale: 0.000744194083
+    scale: 0.000744155666
+    scale: 0.000738590083
+    scale: 0.000742682139
+    scale: 0.000743314391
+    scale: 0.000745028141
+    scale: 0.000732164131
+    scale: 0.000745893863
+    scale: 0.000739740906
+    scale: 0.000743473
+    scale: 0.000741756288
+    scale: 0.00073834951
+    scale: 0.000733844819
+    scale: 0.0007418475
+    scale: 0.000741709664
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "conv2d/BiasAdd;conv2d/Conv2D;conv2d/BiasAdd/ReadVariableOp/resource"
+  type: INT32
+  shape {
+    dim: 64
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    scale: 2.72044108e-05
+    scale: 2.71848421e-05
+    scale: 2.70829569e-05
+    scale: 2.63744623e-05
+    scale: 2.71061726e-05
+    scale: 2.72751058e-05
+    scale: 2.72482503e-05
+    scale: 2.68666972e-05
+    scale: 2.72018497e-05
+    scale: 2.7106631e-05
+    scale: 2.68637232e-05
+    scale: 2.65866911e-05
+    scale: 2.7181326e-05
+    scale: 2.72592679e-05
+    scale: 2.72685647e-05
+    scale: 2.71651061e-05
+    scale: 2.70425226e-05
+    scale: 2.71530826e-05
+    scale: 2.69932661e-05
+    scale: 2.71579629e-05
+    scale: 2.68494241e-05
+    scale: 2.72411871e-05
+    scale: 2.69331194e-05
+    scale: 2.70438322e-05
+    scale: 2.72481229e-05
+    scale: 2.69421053e-05
+    scale: 2.72265988e-05
+    scale: 2.72599518e-05
+    scale: 2.71735789e-05
+    scale: 2.69505672e-05
+    scale: 2.72048219e-05
+    scale: 2.68487074e-05
+    scale: 2.72924135e-05
+    scale: 2.65409126e-05
+    scale: 2.71987265e-05
+    scale: 2.70675464e-05
+    scale: 2.67528303e-05
+    scale: 2.71879071e-05
+    scale: 2.66430143e-05
+    scale: 2.72716734e-05
+    scale: 2.72507605e-05
+    scale: 2.72847537e-05
+    scale: 2.67585965e-05
+    scale: 2.66955012e-05
+    scale: 2.72088128e-05
+    scale: 2.70426754e-05
+    scale: 2.69365464e-05
+    scale: 2.72799571e-05
+    scale: 2.7049342e-05
+    scale: 2.72301695e-05
+    scale: 2.72287634e-05
+    scale: 2.70251167e-05
+    scale: 2.71748468e-05
+    scale: 2.71979807e-05
+    scale: 2.72606867e-05
+    scale: 2.67899904e-05
+    scale: 2.72923644e-05
+    scale: 2.70672263e-05
+    scale: 2.72037851e-05
+    scale: 2.71409699e-05
+    scale: 2.70163146e-05
+    scale: 2.68514868e-05
+    scale: 2.71443078e-05
+    scale: 2.71392637e-05
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 26
+    dim: 26
+    dim: 64
+  }
+  quant {
+    min: -4.71095943
+    max: 4.83075953
+    scale: 0.037418507
+    zero_point: -2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "Conv2D"
+  input: "x"
+  input: "conv2d/Conv2D"
+  input: "conv2d/BiasAdd;conv2d/Conv2D;conv2d/BiasAdd/ReadVariableOp/resource"
+  output: "Identity"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+input: "x"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_I8_000/test.rule
new file mode 100644
index 000000000..fa7b908f4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_I8_000/test.rule
@@ -0,0 +1,8 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "WGT_U8"              $(tensor_dtype conv2d/Conv2D) '=' UINT8
+RULE    "BIAS_S32"            $(tensor_dtype conv2d/BiasAdd\;conv2d/Conv2D\;conv2d/BiasAdd/ReadVariableOp/resource) '=' INT32
+RULE    "OFM_U8"              $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json
new file mode 100644
index 000000000..536fef232
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm_conv",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe
new file mode 100644
index 000000000..3a3dba47f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule
new file mode 100644
index 000000000..912405507
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization.
+# Conv is int16, and others u8. Quantize Ops are inserted before/after Conv.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_INT16"              $(tensor_dtype ofm_conv) '=' INT16
+RULE    "WEIGHTS_INT16"           $(tensor_dtype filter) '=' INT16
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT64
+RULE    "MUL_U8"                  $(tensor_dtype ofm_mul) '=' UINT8
+RULE    "ADD_U8"                  $(tensor_dtype ofm_add) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json
new file mode 100644
index 000000000..824f0791d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json
@@ -0,0 +1,16 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm_conv",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "ofm_mul",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe
new file mode 100644
index 000000000..3a3dba47f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule
new file mode 100644
index 000000000..7df910a40
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule
@@ -0,0 +1,14 @@
+# To check mixed-precision quantization.
+# Conv, Mul: int16, Add: u8
+# Quantize Ops are inserted before Conv and after Mul.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_INT16"              $(tensor_dtype ofm_conv) '=' INT16
+RULE    "WEIGHTS_INT16"           $(tensor_dtype filter) '=' INT16
+RULE    "BIAS_INT64"              $(tensor_dtype bias) '=' INT64
+RULE    "MUL_INT16"               $(tensor_dtype ofm_mul) '=' INT16
+RULE    "MUL_CONST_INT16"         $(tensor_dtype mul_const) '=' INT16
+RULE    "ADD_UINT8"               $(tensor_dtype ofm_add) '=' UINT8
+RULE    "ADD_CONST_UINT8"         $(tensor_dtype add_const) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json
new file mode 100644
index 000000000..824f0791d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json
@@ -0,0 +1,16 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm_conv",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "ofm_mul",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe
new file mode 100644
index 000000000..9e114b33a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe
@@ -0,0 +1,88 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_non_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_non_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+input: "mul_non_const"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule
new file mode 100644
index 000000000..b539872fc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule
@@ -0,0 +1,14 @@
+# To check mixed-precision quantization.
+# Conv, Mul: int16, Add: u8
+# Quantize Ops are inserted before Conv, after Mul, before Mul's non-const input.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_INT16"              $(tensor_dtype ofm_conv) '=' INT16
+RULE    "WEIGHTS_INT16"           $(tensor_dtype filter) '=' INT16
+RULE    "BIAS_INT64"              $(tensor_dtype bias) '=' INT64
+RULE    "MUL_INT16"               $(tensor_dtype ofm_mul) '=' INT16
+RULE    "MUL_NON_CONST_UINT8"     $(tensor_dtype mul_non_const) '=' UINT8
+RULE    "ADD_UINT8"               $(tensor_dtype ofm_add) '=' UINT8
+RULE    "ADD_CONST_UINT8"         $(tensor_dtype add_const) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.recipe
new file mode 100644
index 000000000..c0d47bfe5
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.recipe
@@ -0,0 +1,22 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "DepthToSpace"
+  depth_to_space_options {
+    block_size: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.rule
new file mode 100644
index 000000000..d2052e78d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.rule
@@ -0,0 +1,12 @@
+# To check fake quantization of DepthToSpace (D2S).
+# 1. ifm is float32.
+# 2. D2S is float32.
+# 3. Q/DQ is inserted at the beginning of the model (from ifm).
+# 4. Q/DQ is not inserted after D2S, because D2S does not change values of input.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_FP32"              $(tensor_dtype ifm) '=' FLOAT32
+RULE    "D2S_FP32"              $(tensor_dtype ofm) '=' FLOAT32
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 1
+RULE    "DEQUANTIZE_OP"         $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe
new file mode 100644
index 000000000..148256aa2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe
@@ -0,0 +1,49 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 2 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+    arg: "-9" arg: "10" arg: "-11" arg: "12"
+    arg: "5" arg: "6" arg: "7" arg: "8"
+    arg: "13" arg: "-14" arg: "15" arg: "-16"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 1 dim: 4 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 2
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+    depth_multiplier: 2
+    activation : RELU
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "KER_S16"               $(tensor_dtype ker) '=' INT16
+RULE    "BIAS_S64"              $(tensor_dtype bias) '=' INT64
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe
new file mode 100644
index 000000000..148256aa2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe
@@ -0,0 +1,49 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 2 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+    arg: "-9" arg: "10" arg: "-11" arg: "12"
+    arg: "5" arg: "6" arg: "7" arg: "8"
+    arg: "13" arg: "-14" arg: "15" arg: "-16"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 1 dim: 4 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 2
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+    depth_multiplier: 2
+    activation : RELU
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "KER_U8"                 $(tensor_dtype ker) '=' UINT8
+RULE    "BIAS_S32"               $(tensor_dtype bias) '=' INT32
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_I8_000/test.recipe
new file mode 100644
index 000000000..6fa6e98b1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_I8_000/test.recipe
@@ -0,0 +1,473 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.64391708
+    max: 4.84164238
+    scale: 0.0371982716
+    zero_point: -3
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "depthwise_conv2d/depthwise"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 64
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    min: -0.063326925
+    min: -0.0634795
+    min: -0.0753154755
+    min: -0.0825628936
+    min: -0.100263909
+    min: -0.0518069938
+    min: -0.0903094783
+    min: -0.092050083
+    min: -0.0861446783
+    min: -0.0868664607
+    min: -0.0805943534
+    min: -0.0630288422
+    min: -0.0603802204
+    min: -0.0894295871
+    min: -0.0806457847
+    min: -0.08705163
+    min: -0.0955024436
+    min: -0.099676609
+    min: -0.0426661745
+    min: -0.100863345
+    min: -0.0732110441
+    min: -0.0957087874
+    min: -0.0984382555
+    min: -0.0870531276
+    min: -0.0588906072
+    min: -0.0985565707
+    min: -0.0667493939
+    min: -0.0315393284
+    min: -0.0872938633
+    min: -0.0947047919
+    min: -0.0988249257
+    min: -0.0945888236
+    min: -0.0884424895
+    min: -0.0674908608
+    min: -0.0691238195
+    min: -0.0973802432
+    min: -0.0970137119
+    min: -0.0648144335
+    min: -0.0792933181
+    min: -0.0928001404
+    min: -0.0429098979
+    min: -0.0946487263
+    min: -0.0911387801
+    min: -0.0989532098
+    min: -0.041106537
+    min: -0.0955499113
+    min: -0.0979051739
+    min: -0.0843721703
+    min: -0.0769490227
+    min: -0.0908443704
+    min: -0.0900151655
+    min: -0.100871772
+    min: -0.0811569467
+    min: -0.0867079645
+    min: -0.0810551718
+    min: -0.0915141478
+    min: -0.0988522843
+    min: -0.0506756492
+    min: -0.0828623697
+    min: -0.0969022587
+    min: -0.0862472728
+    min: -0.0536583066
+    min: -0.0739349052
+    min: 0.0297371298
+    max: 0.0629539043
+    max: 0.081167981
+    max: 0.0732295811
+    max: 0.0943298787
+    max: 0.0894318074
+    max: 0.0948470086
+    max: 0.0968847275
+    max: 0.0914845169
+    max: 0.0937000513
+    max: 0.059604913
+    max: 0.0977034122
+    max: 0.0998425633
+    max: 0.0968779624
+    max: 0.0188128203
+    max: 0.0933915377
+    max: 0.058697015
+    max: 0.0965367705
+    max: 0.0620942265
+    max: 0.098770529
+    max: 0.0851371
+    max: 0.0981955677
+    max: 0.059944734
+    max: 0.0692644
+    max: 0.0846596062
+    max: 0.0562474579
+    max: 0.0516051352
+    max: 0.0457287
+    max: 0.088056
+    max: 0.0738085508
+    max: 0.0320325792
+    max: 0.0874649584
+    max: 0.0846493095
+    max: 0.06501019
+    max: 0.0962834805
+    max: 0.0976130068
+    max: 0.0987761915
+    max: 0.0971763879
+    max: 0.0984461755
+    max: 0.0951509
+    max: 0.097969681
+    max: 0.0811607093
+    max: 0.0462665409
+    max: 0.100696355
+    max: 0.0861465335
+    max: 0.0922105
+    max: 0.0879219174
+    max: 0.0897164643
+    max: 0.0870749801
+    max: 0.0918415785
+    max: 0.0730088204
+    max: 0.0392846316
+    max: 0.0920975059
+    max: 0.042890057
+    max: 0.0462249964
+    max: 0.0784825087
+    max: 0.0663191676
+    max: 0.0893450826
+    max: 0.0812896937
+    max: 0.0611626357
+    max: 0.0788473934
+    max: 0.0721678585
+    max: 0.0912485719
+    max: 0.0942054689
+    max: 0.0807732195
+    scale: 0.000498637208
+    scale: 0.000639117963
+    scale: 0.000593035249
+    scale: 0.000742754957
+    scale: 0.000789479585
+    scale: 0.000746826816
+    scale: 0.000762871874
+    scale: 0.000724803831
+    scale: 0.000737795664
+    scale: 0.000683987862
+    scale: 0.000769318198
+    scale: 0.000786161923
+    scale: 0.000762818614
+    scale: 0.00070417
+    scale: 0.000735366426
+    scale: 0.000685445906
+    scale: 0.000760132039
+    scale: 0.000784855161
+    scale: 0.000777720707
+    scale: 0.000794199586
+    scale: 0.000773193431
+    scale: 0.00075361249
+    scale: 0.000775104389
+    scale: 0.000685457722
+    scale: 0.000463705568
+    scale: 0.000776036
+    scale: 0.000525585783
+    scale: 0.00069335429
+    scale: 0.000687353255
+    scale: 0.000745707
+    scale: 0.000778149
+    scale: 0.000744793913
+    scale: 0.000696397561
+    scale: 0.00075813767
+    scale: 0.000768606376
+    scale: 0.000777765294
+    scale: 0.000765168399
+    scale: 0.000775166729
+    scale: 0.000749219733
+    scale: 0.000771414838
+    scale: 0.000639060687
+    scale: 0.00074526557
+    scale: 0.000792884675
+    scale: 0.000779159134
+    scale: 0.000726066937
+    scale: 0.000752361491
+    scale: 0.00077090686
+    scale: 0.000685629784
+    scale: 0.000723162
+    scale: 0.00071531
+    scale: 0.000708780834
+    scale: 0.000794265943
+    scale: 0.000639031059
+    scale: 0.00068273989
+    scale: 0.000638229714
+    scale: 0.000720583834
+    scale: 0.000778364425
+    scale: 0.000640076352
+    scale: 0.000652459625
+    scale: 0.000763009884
+    scale: 0.000679112389
+    scale: 0.000718492665
+    scale: 0.000741775322
+    scale: 0.000636009616
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    quantized_dimension: 3
+  }
+  is_variable: false
+}
+operand {
+  name: "depthwise_conv2d/BiasAdd;depthwise_conv2d/depthwise;depthwise_conv2d/BiasAdd/ReadVariableOp/resource"
+  type: INT32
+  shape {
+    dim: 64
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    scale: 1.85484423e-05
+    scale: 2.37740842e-05
+    scale: 2.20598868e-05
+    scale: 2.76292012e-05
+    scale: 2.93672765e-05
+    scale: 2.77806666e-05
+    scale: 2.83775153e-05
+    scale: 2.69614502e-05
+    scale: 2.74447229e-05
+    scale: 2.5443167e-05
+    scale: 2.86173072e-05
+    scale: 2.92438654e-05
+    scale: 2.83755344e-05
+    scale: 2.61939058e-05
+    scale: 2.73543592e-05
+    scale: 2.54974038e-05
+    scale: 2.82755973e-05
+    scale: 2.91952547e-05
+    scale: 2.89298659e-05
+    scale: 2.95428526e-05
+    scale: 2.87614585e-05
+    scale: 2.80330823e-05
+    scale: 2.88325427e-05
+    scale: 2.54978422e-05
+    scale: 1.72490454e-05
+    scale: 2.88671981e-05
+    scale: 1.95508819e-05
+    scale: 2.57915817e-05
+    scale: 2.55683535e-05
+    scale: 2.77390118e-05
+    scale: 2.89457985e-05
+    scale: 2.77050458e-05
+    scale: 2.59047865e-05
+    scale: 2.82014116e-05
+    scale: 2.85908282e-05
+    scale: 2.89315249e-05
+    scale: 2.84629423e-05
+    scale: 2.8834862e-05
+    scale: 2.78696789e-05
+    scale: 2.86952982e-05
+    scale: 2.37719523e-05
+    scale: 2.77225918e-05
+    scale: 2.949394e-05
+    scale: 2.89833733e-05
+    scale: 2.70084347e-05
+    scale: 2.79865471e-05
+    scale: 2.86764025e-05
+    scale: 2.55042432e-05
+    scale: 2.69003776e-05
+    scale: 2.66082952e-05
+    scale: 2.63654219e-05
+    scale: 2.9545321e-05
+    scale: 2.377085e-05
+    scale: 2.53967446e-05
+    scale: 2.37410422e-05
+    scale: 2.68044732e-05
+    scale: 2.89538111e-05
+    scale: 2.38097346e-05
+    scale: 2.427037e-05
+    scale: 2.83826485e-05
+    scale: 2.52618065e-05
+    scale: 2.6726686e-05
+    scale: 2.75927596e-05
+    scale: 2.36584583e-05
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 26
+    dim: 26
+    dim: 64
+  }
+  quant {
+    min: -1.07687819
+    max: 1.02835441
+    scale: 0.0082558142
+    zero_point: 2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "DepthwiseConv2D"
+  input: "x"
+  input: "depthwise_conv2d/depthwise"
+  input: "depthwise_conv2d/BiasAdd;depthwise_conv2d/depthwise;depthwise_conv2d/BiasAdd/ReadVariableOp/resource"
+  output: "Identity"
+  depthwiseconv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    depth_multiplier: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+input: "x"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_I8_000/test.rule
new file mode 100644
index 000000000..2c2a2a762
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_I8_000/test.rule
@@ -0,0 +1,8 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "WGT_U8"              $(tensor_dtype depthwise_conv2d/depthwise) '=' UINT8
+RULE    "BIAS_S32"            $(tensor_dtype depthwise_conv2d/BiasAdd\;depthwise_conv2d/depthwise\;depthwise_conv2d/BiasAdd/ReadVariableOp/resource) '=' INT32
+RULE    "OFM_U8"              $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json
new file mode 100644
index 000000000..ad2bad697
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "out",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe
new file mode 100644
index 000000000..0ecb5618b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe
@@ -0,0 +1,55 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 16 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "-2" arg: "-3" arg: "4"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule
new file mode 100644
index 000000000..f54256084
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IN_U8"                 $(tensor_dtype in) '=' UINT8
+RULE    "IN_QUANTIZE_S16"       $(tensor_dtype in_Quantize) '=' INT16
+RULE    "WEIGHT_S16"            $(tensor_dtype weight) '=' INT16
+RULE    "BIAS_S64"              $(tensor_dtype bias) '=' INT64
+RULE    "TARGET_S16"            $(tensor_dtype out) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype out_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json
new file mode 100644
index 000000000..ff3eb9791
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "out",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe
new file mode 100644
index 000000000..0ecb5618b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe
@@ -0,0 +1,55 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 16 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "-2" arg: "-3" arg: "4"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule
new file mode 100644
index 000000000..4acd22946
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IN_S16"                 $(tensor_dtype in) '=' INT16
+RULE    "IN_QUANTIZE_U8"         $(tensor_dtype in_Quantize) '=' UINT8
+RULE    "WEIGHT_U8"              $(tensor_dtype weight) '=' UINT8
+RULE    "BIAS_S32"               $(tensor_dtype bias) '=' INT32
+RULE    "TARGET_U8"              $(tensor_dtype out) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype out_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe
new file mode 100644
index 000000000..836a37305
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "LeakyRelu"
+  leaky_relu_options {
+    alpha: 2.0
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe
new file mode 100644
index 000000000..836a37305
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "LeakyRelu"
+  leaky_relu_options {
+    alpha: 2.0
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe
new file mode 100644
index 000000000..dca24da4c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Logistic"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe
new file mode 100644
index 000000000..dca24da4c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Logistic"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe
new file mode 100644
index 000000000..718630f08
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+  type: "MaxPool2D"
+  maxpool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe
new file mode 100644
index 000000000..718630f08
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+  type: "MaxPool2D"
+  maxpool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_I8_000/test.recipe
new file mode 100644
index 000000000..23717132e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_I8_000/test.recipe
@@ -0,0 +1,51 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 128
+  }
+  quant {
+    min: -4.71396
+    max: 4.72839499
+    scale: 0.0370288454
+    zero_point: -1
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 14
+    dim: 14
+    dim: 128
+  }
+  quant {
+    min: -4.71396
+    max: 4.72839499
+    scale: 0.0370288454
+    zero_point: -1
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "MaxPool2D"
+  input: "x"
+  output: "Identity"
+  maxpool2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+    filter_width: 2
+    filter_height: 2
+    activation: NONE
+  }
+}
+input: "x"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_I8_000/test.rule
new file mode 100644
index 000000000..35eaa9a53
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_I8_000/test.rule
@@ -0,0 +1,6 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "OFM_U8"              $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe
new file mode 100644
index 000000000..d383997d3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "-1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe
new file mode 100644
index 000000000..d383997d3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "-1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_I8_000/test.recipe
new file mode 100644
index 000000000..bccdff277
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_I8_000/test.recipe
@@ -0,0 +1,63 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 7
+    dim: 7
+    dim: 128
+  }
+  quant {
+    min: -4.52155876
+    max: 4.65688181
+    scale: 0.0359938815
+    zero_point: -2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Mean/reduction_indices"
+  type: INT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "2"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 128
+  }
+  quant {
+    min: -0.548125625
+    max: 0.485593677
+    scale: 0.0040538013
+    zero_point: 7
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "Mean"
+  input: "x"
+  input: "Mean/reduction_indices"
+  output: "Identity"
+  mean_options {
+    keep_dims: true
+  }
+}
+input: "x"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_I8_000/test.rule
new file mode 100644
index 000000000..89610199f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_I8_000/test.rule
@@ -0,0 +1,7 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "AXIS_S32"            $(tensor_dtype Mean/reduction_indices) '=' INT32
+RULE    "OFM_U8"              $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe
new file mode 100644
index 000000000..43ca30dec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"               $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM1_QUANTIZE_S16"     $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "IFM2_U8"               $(tensor_dtype ifm2) '=' UINT8
+RULE    "IFM2_QUANTIZE_S16"     $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe
new file mode 100644
index 000000000..43ca30dec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM1_S16"               $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM1_QUANTIZE_U8"       $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "IFM2_S16"               $(tensor_dtype ifm2) '=' INT16
+RULE    "IFM2_QUANTIZE_U8"       $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_I8_000/test.recipe
new file mode 100644
index 000000000..af37f4a5e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_I8_000/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.91511106
+    max: 4.63654947
+    scale: 0.0374574922
+    zero_point: 3
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "x_1"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.70411062
+    max: 4.48266
+    scale: 0.0360265523
+    zero_point: 3
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -10.2211161
+    max: 10.4154072
+    scale: 0.0809275433
+    zero_point: -2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "Mul"
+  input: "x"
+  input: "x_1"
+  output: "Identity"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "x"
+input: "x_1"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_I8_000/test.rule
new file mode 100644
index 000000000..8c39b94c4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_I8_000/test.rule
@@ -0,0 +1,7 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "IFM2_U8"              $(tensor_dtype x_1) '=' UINT8
+RULE    "OFM_U8"               $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe
new file mode 100644
index 000000000..447e4a1ab
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Neg"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe
new file mode 100644
index 000000000..447e4a1ab
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Neg"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe
new file mode 100644
index 000000000..c18acdbbc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "alpha"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0.1" arg: "0.3" arg: "0.5"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "PRelu"
+  input: "ifm"
+  input: "alpha"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule
new file mode 100644
index 000000000..81436146c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "ALPHA_S16"             $(tensor_dtype alpha) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe
new file mode 100644
index 000000000..c18acdbbc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "alpha"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0.1" arg: "0.3" arg: "0.5"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "PRelu"
+  input: "ifm"
+  input: "alpha"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule
new file mode 100644
index 000000000..5b9416017
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "ALPHA_U8"               $(tensor_dtype alpha) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_I8_000/test.recipe
new file mode 100644
index 000000000..2b8caa930
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_I8_000/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.600914
+    max: 4.44562244
+    scale: 0.0354766138
+    zero_point: 2
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "p_re_lu/add;p_re_lu/Relu;p_re_lu/Neg_1;p_re_lu/Relu_1;p_re_lu/mul"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 1
+    dim: 64
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    min: -0.391680807
+    max: 0.391578436
+    scale: 0.00308410078
+    zero_point: 0
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -1.45854628
+    max: 4.44562244
+    scale: 0.0231536031
+    zero_point: -65
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "PRelu"
+  input: "x"
+  input: "p_re_lu/add;p_re_lu/Relu;p_re_lu/Neg_1;p_re_lu/Relu_1;p_re_lu/mul"
+  output: "Identity"
+}
+input: "x"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_I8_000/test.rule
new file mode 100644
index 000000000..e55dfba9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_I8_000/test.rule
@@ -0,0 +1,7 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "ALPHA_U8"            $(tensor_dtype p_re_lu/add\;p_re_lu/Relu\;p_re_lu/Neg_1\;p_re_lu/Relu_1\;p_re_lu/mul) '=' UINT8
+RULE    "OFM_U8"              $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe
new file mode 100644
index 000000000..2cc980b9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "padding"
+  type: INT32
+  shape { dim: 4 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "0"
+    arg: "1" arg: "1"
+    arg: "2" arg: "2"
+    arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+  type: "Pad"
+  input: "ifm"
+  input: "padding"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe
new file mode 100644
index 000000000..2cc980b9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "padding"
+  type: INT32
+  shape { dim: 4 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "0"
+    arg: "1" arg: "1"
+    arg: "2" arg: "2"
+    arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+  type: "Pad"
+  input: "ifm"
+  input: "padding"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe
new file mode 100644
index 000000000..226593593
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU6"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe
new file mode 100644
index 000000000..226593593
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU6"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe
new file mode 100644
index 000000000..8eaa3602f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe
new file mode 100644
index 000000000..8eaa3602f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_I8_000/test.recipe
new file mode 100644
index 000000000..0cce090c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_I8_000/test.recipe
@@ -0,0 +1,43 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.62792873
+    max: 4.3443079
+    scale: 0.0351852402
+    zero_point: 4
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: 0
+    max: 4.3443079
+    scale: 0.0170365013
+    zero_point: -128
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "ReLU"
+  input: "x"
+  output: "Identity"
+}
+input: "x"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_I8_000/test.rule
new file mode 100644
index 000000000..35eaa9a53
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_I8_000/test.rule
@@ -0,0 +1,6 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "OFM_U8"              $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe
new file mode 100644
index 000000000..cdca58980
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 10 }
+}
+operation {
+  type: "Reshape"
+  reshape_options {
+    new_shape: 10
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe
new file mode 100644
index 000000000..cdca58980
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 10 }
+}
+operation {
+  type: "Reshape"
+  reshape_options {
+    new_shape: 10
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe
new file mode 100644
index 000000000..3dd4c761c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 2 }
+  filler {
+    tag: "constant" arg: "16" arg: "16"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "ResizeBilinear"
+  input: "ifm1"
+  input: "size"
+  output: "ofm"
+  resize_bilinear_options {
+    align_corners: false
+    half_pixel_centers: false
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule
new file mode 100644
index 000000000..3a3429d41
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe
new file mode 100644
index 000000000..3dd4c761c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 2 }
+  filler {
+    tag: "constant" arg: "16" arg: "16"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "ResizeBilinear"
+  input: "ifm1"
+  input: "size"
+  output: "ofm"
+  resize_bilinear_options {
+    align_corners: false
+    half_pixel_centers: false
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule
new file mode 100644
index 000000000..2c5fcd5a3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe
new file mode 100644
index 000000000..ef6b964c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 8 }
+}
+operand {
+    name: "size"
+    type: INT32
+    shape { dim: 2 }
+    filler { tag: "explicit" arg: "16" arg: "16" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operation {
+  type: "ResizeNearestNeighbor"
+  resize_nearest_neighbor_options {
+    align_corners: true
+  }
+  input: "ifm"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe
new file mode 100644
index 000000000..ef6b964c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 8 }
+}
+operand {
+    name: "size"
+    type: INT32
+    shape { dim: 2 }
+    filler { tag: "explicit" arg: "16" arg: "16" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operation {
+  type: "ResizeNearestNeighbor"
+  resize_nearest_neighbor_options {
+    align_corners: true
+  }
+  input: "ifm"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe
new file mode 100644
index 000000000..2f9ccddfa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe
@@ -0,0 +1,37 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "1" arg: "3"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+  type: "Slice"
+  input: "ifm"
+  input: "begin"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe
new file mode 100644
index 000000000..2f9ccddfa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe
@@ -0,0 +1,37 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "1" arg: "3"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+  type: "Slice"
+  input: "ifm"
+  input: "begin"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe
new file mode 100644
index 000000000..ce9abf555
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Softmax"
+  softmax_options {
+    beta: 0.0
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe
new file mode 100644
index 000000000..ce9abf555
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Softmax"
+  softmax_options {
+    beta: 0.0
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.recipe
new file mode 100644
index 000000000..ec403dd86
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.recipe
@@ -0,0 +1,22 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 2 dim: 2 dim: 12 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "SpaceToDepth"
+  space_to_depth_options {
+    block_size: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.rule b/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.rule
new file mode 100644
index 000000000..bb0c8cf73
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.rule
@@ -0,0 +1,12 @@
+# To check fake quantization of SpaceToDepth (S2D).
+# 1. ifm is float32.
+# 2. S2D is float32.
+# 3. Q/DQ is inserted at the beginning of the model (from ifm).
+# 4. Q/DQ is not inserted after S2D, because S2D does not change values of input.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_FP32"              $(tensor_dtype ifm) '=' FLOAT32
+RULE    "S2D_FP32"              $(tensor_dtype ofm) '=' FLOAT32
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 1
+RULE    "DEQUANTIZE_OP"         $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json
new file mode 100644
index 000000000..102e05fc7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+  "default_quantization_dtype" : "uint8",
+  "default_granularity" : "channel",
+  "layers" : [
+    {
+      "name" : "ofm1",
+      "dtype" : "int16",
+      "granularity" : "channel"
+    }
+  ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe
new file mode 100644
index 000000000..ef7908979
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+  name: "split_dim"
+  type: INT32
+  shape { }
+  filler { tag: "explicit" arg: "0" }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Split"
+  split_options {
+    num_splits: 2
+  }
+  input: "split_dim"
+  input: "ifm"
+  output: "ofm1"
+  output: "ofm2"
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Add"
+  input: "ofm1"
+  input: "ofm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule
new file mode 100644
index 000000000..dc1ed874e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization for multiple output node.
+# Split: int16, Add: u8
+# Quantize Ops are inserted before Split and after all Split output nodes.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_UINT8"             $(tensor_dtype ifm) '=' UINT8
+RULE    "SPLIT_OUT_1_INT16"       $(tensor_dtype ofm1) '=' INT16
+RULE    "SPLIT_OUT_2_INT16"       $(tensor_dtype ofm2) '=' INT16
+RULE    "ADD_UINT8"               $(tensor_dtype ofm) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json
new file mode 100644
index 000000000..272081b27
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+  "default_quantization_dtype" : "uint8",
+  "default_granularity" : "channel",
+  "layers" : [
+    {
+      "name" : "ofm2",
+      "dtype" : "int16",
+      "granularity" : "channel"
+    }
+  ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe
new file mode 100644
index 000000000..ef7908979
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+  name: "split_dim"
+  type: INT32
+  shape { }
+  filler { tag: "explicit" arg: "0" }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Split"
+  split_options {
+    num_splits: 2
+  }
+  input: "split_dim"
+  input: "ifm"
+  output: "ofm1"
+  output: "ofm2"
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Add"
+  input: "ofm1"
+  input: "ofm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule
new file mode 100644
index 000000000..dc1ed874e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization for multiple output node.
+# Split: int16, Add: u8
+# Quantize Ops are inserted before Split and after all Split output nodes.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_UINT8"             $(tensor_dtype ifm) '=' UINT8
+RULE    "SPLIT_OUT_1_INT16"       $(tensor_dtype ofm1) '=' INT16
+RULE    "SPLIT_OUT_2_INT16"       $(tensor_dtype ofm2) '=' INT16
+RULE    "ADD_UINT8"               $(tensor_dtype ofm) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe
new file mode 100644
index 000000000..7bdf87d47
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Tanh"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe
new file mode 100644
index 000000000..7bdf87d47
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Tanh"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe
new file mode 100644
index 000000000..9462e1351
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe
@@ -0,0 +1,55 @@
+operand {
+  name: "out_shape"
+  type: INT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "4" arg: "4" arg: "3" 
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3"
+  }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+
+operation {
+  type: "TransposeConv"
+  transpose_conv_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+  input: "out_shape"
+  input: "ker"
+  input: "ifm"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "KER_S16"               $(tensor_dtype ker) '=' INT16
+RULE    "BIAS_S64"              $(tensor_dtype bias) '=' INT64
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe
new file mode 100644
index 000000000..9462e1351
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe
@@ -0,0 +1,55 @@
+operand {
+  name: "out_shape"
+  type: INT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "4" arg: "4" arg: "3" 
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3"
+  }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+
+operation {
+  type: "TransposeConv"
+  transpose_conv_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+  input: "out_shape"
+  input: "ker"
+  input: "ifm"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "KER_U8"                 $(tensor_dtype ker) '=' UINT8
+RULE    "BIAS_S32"               $(tensor_dtype bias) '=' INT32
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_I8_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_I8_000/test.recipe
new file mode 100644
index 000000000..12a5a34be
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_I8_000/test.recipe
@@ -0,0 +1,344 @@
+operand {
+  name: "x"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 28
+    dim: 28
+    dim: 64
+  }
+  quant {
+    min: -4.60648251
+    max: 4.56543779
+    scale: 0.0359683186
+    zero_point: 0
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "conv2d_transpose/stack"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "30"
+    arg: "30"
+    arg: "64"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "conv2d_transpose/conv2d_transpose"
+  type: INT8
+  shape {
+    dim: 64
+    dim: 3
+    dim: 3
+    dim: 64
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    min: -0.0946341679
+    min: -0.0944282487
+    min: -0.0918895602
+    min: -0.0925355926
+    min: -0.0947051644
+    min: -0.0941944346
+    min: -0.0941459388
+    min: -0.0919710547
+    min: -0.0916045085
+    min: -0.093310751
+    min: -0.0928586572
+    min: -0.0935951397
+    min: -0.0942160562
+    min: -0.0933434889
+    min: -0.0935866162
+    min: -0.0925118327
+    min: -0.0942456797
+    min: -0.0939968601
+    min: -0.0924739465
+    min: -0.0941662267
+    min: -0.087930195
+    min: -0.0947148651
+    min: -0.0943328366
+    min: -0.0936802849
+    min: -0.0942436531
+    min: -0.0941923857
+    min: -0.0940833613
+    min: -0.0932623446
+    min: -0.0930994451
+    min: -0.0943730548
+    min: -0.0884840712
+    min: -0.0939643234
+    min: -0.0940853506
+    min: -0.0935710147
+    min: -0.0936997607
+    min: -0.0912955627
+    min: -0.0921618789
+    min: -0.0947207361
+    min: -0.0931588039
+    min: -0.0941777378
+    min: -0.0935421512
+    min: -0.09219338
+    min: -0.0944892615
+    min: -0.0936101824
+    min: -0.0937219635
+    min: -0.0916510522
+    min: -0.0941754729
+    min: -0.0943513513
+    min: -0.0933411643
+    min: -0.0934003219
+    min: -0.0936945155
+    min: -0.0943120122
+    min: -0.0936696753
+    min: -0.0941288397
+    min: -0.0931984335
+    min: -0.0946868435
+    min: -0.0931827351
+    min: -0.0937825665
+    min: -0.0920807496
+    min: -0.0920289457
+    min: -0.0932372659
+    min: -0.0933243334
+    min: -0.091677241
+    min: -0.0938237533
+    max: 0.0929114074
+    max: 0.0939644054
+    max: 0.0936235785
+    max: 0.0945259407
+    max: 0.091929324
+    max: 0.0927724242
+    max: 0.0939807221
+    max: 0.0932361633
+    max: 0.0926983431
+    max: 0.0941318944
+    max: 0.0939000174
+    max: 0.0943209827
+    max: 0.0936219618
+    max: 0.0933490321
+    max: 0.0942894742
+    max: 0.0940931812
+    max: 0.0910203531
+    max: 0.0894439071
+    max: 0.0919351205
+    max: 0.0924752355
+    max: 0.09339872
+    max: 0.0942701921
+    max: 0.0933261067
+    max: 0.0920859501
+    max: 0.0946150199
+    max: 0.0933252722
+    max: 0.0941924453
+    max: 0.0936042666
+    max: 0.0931428894
+    max: 0.0926085934
+    max: 0.0915477723
+    max: 0.0934655741
+    max: 0.0947353616
+    max: 0.0938620269
+    max: 0.0939659476
+    max: 0.0943774134
+    max: 0.0933601633
+    max: 0.0943998545
+    max: 0.0934786722
+    max: 0.0935663804
+    max: 0.0902687311
+    max: 0.0915351138
+    max: 0.0941802114
+    max: 0.0939741656
+    max: 0.0947180837
+    max: 0.0943731889
+    max: 0.0931445807
+    max: 0.0917655453
+    max: 0.0940534249
+    max: 0.0941507071
+    max: 0.093019031
+    max: 0.0945132151
+    max: 0.0944205299
+    max: 0.0932926387
+    max: 0.0919418409
+    max: 0.0931737125
+    max: 0.0918817222
+    max: 0.0942185298
+    max: 0.094377391
+    max: 0.0927698091
+    max: 0.0935381278
+    max: 0.0942255259
+    max: 0.0910803
+    max: 0.0942931473
+    scale: 0.000745150901
+    scale: 0.000743529527
+    scale: 0.000737193506
+    scale: 0.000744298741
+    scale: 0.000745709927
+    scale: 0.000741688476
+    scale: 0.000741306576
+    scale: 0.000734143
+    scale: 0.000729908235
+    scale: 0.00074119604
+    scale: 0.00073937024
+    scale: 0.000742684875
+    scale: 0.000741858734
+    scale: 0.000735031732
+    scale: 0.000742436794
+    scale: 0.000740891206
+    scale: 0.000742092
+    scale: 0.00074013276
+    scale: 0.000728141284
+    scale: 0.000741466356
+    scale: 0.000735423
+    scale: 0.000745786354
+    scale: 0.00074277824
+    scale: 0.00073764
+    scale: 0.000745000143
+    scale: 0.000741672353
+    scale: 0.000741672819
+    scale: 0.000737041468
+    scale: 0.000733408553
+    scale: 0.000743094948
+    scale: 0.00072084862
+    scale: 0.000739876588
+    scale: 0.000745947706
+    scale: 0.000739071053
+    scale: 0.000739889336
+    scale: 0.000743129232
+    scale: 0.000735119393
+    scale: 0.000745832571
+    scale: 0.00073605252
+    scale: 0.000741557
+    scale: 0.000736552349
+    scale: 0.000725932128
+    scale: 0.000744009914
+    scale: 0.000739954063
+    scale: 0.000745811674
+    scale: 0.000743096
+    scale: 0.000741539174
+    scale: 0.00074292405
+    scale: 0.000740578165
+    scale: 0.000741344178
+    scale: 0.000737752067
+    scale: 0.000744198565
+    scale: 0.000743468758
+    scale: 0.000741171942
+    scale: 0.000733845925
+    scale: 0.000745565689
+    scale: 0.000733722351
+    scale: 0.000741878175
+    scale: 0.000743129058
+    scale: 0.000730470929
+    scale: 0.000736520684
+    scale: 0.000741933298
+    scale: 0.000721868
+    scale: 0.000742465723
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    zero_point: 0
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Identity"
+  type: INT8
+  shape {
+    dim: 1
+    dim: 30
+    dim: 30
+    dim: 64
+  }
+  quant {
+    min: -4.55380917
+    max: 4.73831749
+    scale: 0.0364397131
+    zero_point: -3
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/stack"
+  input: "conv2d_transpose/conv2d_transpose"
+  input: "x"
+  output: "Identity"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+  }
+}
+input: "x"
+output: "Identity"
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_I8_000/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_I8_000/test.rule
new file mode 100644
index 000000000..a3ac17450
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_I8_000/test.rule
@@ -0,0 +1,8 @@
+# To check requantization.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"              $(tensor_dtype x) '=' UINT8
+RULE    "INPUT_SIZE_S32"      $(tensor_dtype conv2d_transpose/stack) '=' INT32
+RULE    "WGT_U8"              $(tensor_dtype conv2d_transpose/conv2d_transpose) '=' UINT8
+RULE    "OFM_U8"              $(tensor_dtype Identity) '=' UINT8
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe
new file mode 100644
index 000000000..82a85c13b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 1 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "0" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 8 dim: 1 dim: 3 }
+}
+
+operation {
+  type: "Transpose"
+  transpose_options {
+  }
+  input: "ifm"
+  input: "perm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe
new file mode 100644
index 000000000..82a85c13b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 1 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "0" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 8 dim: 1 dim: 3 }
+}
+
+operation {
+  type: "Transpose"
+  transpose_options {
+  }
+  input: "ifm"
+  input: "perm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quantize_000/test.recipe b/res/TensorFlowLiteRecipes/Quantize_000/test.recipe
new file mode 100644
index 000000000..061551a87
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantize_000/test.recipe
@@ -0,0 +1,18 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 4 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "Quantize"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quantize_000/test.reverse b/res/TensorFlowLiteRecipes/Quantize_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantize_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quantize_001/test.recipe b/res/TensorFlowLiteRecipes/Quantize_001/test.recipe
new file mode 100644
index 000000000..943341be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantize_001/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2  }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_c"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm_c"
+}
+operand {
+  name: "ofm_q"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "Quantize"
+  input: "ofm_c"
+  output: "ofm_q"
+}
+operand {
+  name: "ofm"
+  type: INT16
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+  quant { min: -255 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "Quantize"
+  input: "ofm_q"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quantize_001/test.reverse b/res/TensorFlowLiteRecipes/Quantize_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantize_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_001/test.recipe b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_001/test.recipe
new file mode 100644
index 000000000..1c6e8000c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_001/test.recipe
@@ -0,0 +1,421 @@
+operand {
+  name: "Const_8"
+  type: INT32
+  shape {
+    dim: 4
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_8/perm"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "3"
+    arg: "1"
+    arg: "2"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_9/perm"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "2"
+    arg: "3"
+    arg: "1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "batchnorm/mul"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.00498116"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "batchnorm/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.0332279"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_2"
+  type: FLOAT32
+  shape {
+    dim: 8
+    dim: 3
+    dim: 3
+    dim: 1
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_1_weight"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_2_weight"
+  type: FLOAT32
+  shape {
+    dim: 8
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_5;PartitionedCall/transpose_5"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "1"
+    arg: "128"
+    arg: "128"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_7"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "130"
+    arg: "130"
+    arg: "1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Pad_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 130
+    dim: 130
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_4"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 130
+    dim: 130
+    dim: 16
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_1_out"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 128
+    dim: 128
+    dim: 1
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_51"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 128
+    dim: 128
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "batchnorm/mul_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 128
+    dim: 128
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "batchnorm/add_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 128
+    dim: 128
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Pad_2"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 130
+    dim: 130
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_71"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 130
+    dim: 130
+    dim: 1
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_2_out"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 128
+    dim: 128
+    dim: 8
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_8"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 8
+    dim: 128
+    dim: 128
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "Transpose"
+  input: "Pad_1"
+  input: "transpose_9/perm"
+  output: "transpose_4"
+}
+operation {
+  type: "Conv2D"
+  input: "transpose_4"
+  input: "convolution_1"
+  input: "convolution_1_weight"
+  output: "convolution_1_out"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Reshape"
+  input: "convolution_1_out"
+  input: "transpose_5;PartitionedCall/transpose_5"
+  output: "transpose_51"
+}
+operation {
+  type: "Mul"
+  input: "transpose_51"
+  input: "batchnorm/mul"
+  output: "batchnorm/mul_1"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "batchnorm/mul_1"
+  input: "batchnorm/sub"
+  output: "batchnorm/add_1"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Pad"
+  input: "batchnorm/add_1"
+  input: "Const_8"
+  output: "Pad_2"
+}
+operation {
+  type: "Reshape"
+  input: "Pad_2"
+  input: "transpose_7"
+  output: "transpose_71"
+}
+operation {
+  type: "Conv2D"
+  input: "transpose_71"
+  input: "convolution_2"
+  input: "convolution_2_weight"
+  output: "convolution_2_out"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Transpose"
+  input: "convolution_2_out"
+  input: "transpose_8/perm"
+  output: "transpose_8"
+}
+input: "Pad_1"
+output: "transpose_8"
diff --git a/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_001/test.rule b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_001/test.rule
new file mode 100644
index 000000000..5aa380447
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_001/test.rule
@@ -0,0 +1,9 @@
+# To check ONNX conversion is OK
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 2
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_RESHAPE"              $(op_count RESHAPE) '=' 0
+RULE    "NO_TRANSPOSE"            $(op_count TRANSPOSE) '=' 0
diff --git a/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_MeanMean_001/test.recipe b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_MeanMean_001/test.recipe
new file mode 100644
index 000000000..8acba41bf
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_MeanMean_001/test.recipe
@@ -0,0 +1,309 @@
+operand {
+  name: "Mean_4/reduction_indices"
+  type: INT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "3"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Mean_5/reduction_indices"
+  type: INT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "2"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_73/perm"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "2"
+    arg: "3"
+    arg: "1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_8/perm"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "3"
+    arg: "1"
+    arg: "2"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "batchnorm_24/mul"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 256
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "1.0"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "batchnorm_24/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 256
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.0"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_12"
+  type: FLOAT32
+  shape {
+    dim: 256
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_121"
+  type: FLOAT32
+  shape {
+    dim: 256
+    dim: 1
+    dim: 1
+    dim: 256
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Relu_23"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 256
+    dim: 5
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_73"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 5
+    dim: 256
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "convolution_122"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 5
+    dim: 256
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "transpose_74"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 256
+    dim: 5
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "batchnorm_24/mul_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 256
+    dim: 5
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Relu_24"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 256
+    dim: 5
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Mean_4"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 256
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "Mean_5"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 256
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "Transpose"
+  input: "Relu_23"
+  input: "transpose_73/perm"
+  output: "transpose_73"
+}
+operation {
+  type: "Conv2D"
+  input: "transpose_73"
+  input: "convolution_121"
+  input: "convolution_12"
+  output: "convolution_122"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Transpose"
+  input: "convolution_122"
+  input: "transpose_8/perm"
+  output: "transpose_74"
+}
+operation {
+  type: "Mul"
+  input: "transpose_74"
+  input: "batchnorm_24/mul"
+  output: "batchnorm_24/mul_1"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "batchnorm_24/mul_1"
+  input: "batchnorm_24/sub"
+  output: "Relu_24"
+  add_options {
+    activation: RELU
+  }
+}
+operation {
+  type: "Mean"
+  input: "Relu_24"
+  input: "Mean_4/reduction_indices"
+  output: "Mean_4"
+  mean_options {
+    keep_dims: false
+  }
+}
+operation {
+  type: "Mean"
+  input: "Mean_4"
+  input: "Mean_5/reduction_indices"
+  output: "Mean_5"
+  mean_options {
+    keep_dims: false
+  }
+}
+input: "Relu_23"
+output: "Mean_5"
diff --git a/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_MeanMean_001/test.rule b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_MeanMean_001/test.rule
new file mode 100644
index 000000000..f1ac77cfb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_MeanMean_001/test.rule
@@ -0,0 +1,9 @@
+# To check ONNX conversion is OK
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "ONE_MEAN"                $(op_count MEAN) '=' 1
+RULE    "NO_TRANSPOSE"            $(op_count TRANSPOSE) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
diff --git a/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_Relu6_001/test.recipe b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_Relu6_001/test.recipe
new file mode 100644
index 000000000..e36e4e806
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_Relu6_001/test.recipe
@@ -0,0 +1,277 @@
+operand {
+  name: "input0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 32
+    dim: 32
+  }
+}
+operand {
+  name: "Const_95"
+  type: INT32
+  shape {
+    dim: 4
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+  }
+}
+operand {
+  name: "Pad"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 34
+    dim: 34
+  }
+}
+operand {
+  name: "transpose_158/perm"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "2"
+    arg: "3"
+    arg: "1"
+  }
+}
+operand {
+  name: "transpose_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 34
+    dim: 34
+    dim: 3
+  }
+}
+operand {
+  name: "convolution"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 3
+    dim: 3
+    dim: 3
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "convolution_41"
+  type: FLOAT32
+  shape {
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "convolution1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 16
+  }
+}
+operand {
+  name: "transpose_159/perm"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "3"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "transpose_2"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 16
+  }
+}
+operand {
+  name: "batchnorm/mul"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.001"
+  }
+}
+operand {
+  name: "batchnorm/mul_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 16
+  }
+}
+operand {
+  name: "batchnorm/sub"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.0"
+  }
+}
+operand {
+  name: "batchnorm/add_1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 16
+  }
+}
+operand {
+  name: "clip_by_value_9/Minimum/y"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "6"
+  }
+}
+operand {
+  name: "clip_by_value/Minimum"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 16
+  }
+}
+operand {
+  name: "clip_by_value_9/y"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+  }
+}
+operand {
+  name: "clip_by_value"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 16
+    dim: 16
+    dim: 16
+  }
+}
+operation {
+  type: "Pad"
+  input: "input0"
+  input: "Const_95"
+  output: "Pad"
+}
+operation {
+  type: "Transpose"
+  input: "Pad"
+  input: "transpose_158/perm"
+  output: "transpose_1"
+}
+operation {
+  type: "Conv2D"
+  input: "transpose_1"
+  input: "convolution"
+  input: "convolution_41"
+  output: "convolution1"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Transpose"
+  input: "convolution1"
+  input: "transpose_159/perm"
+  output: "transpose_2"
+}
+operation {
+  type: "Mul"
+  input: "transpose_2"
+  input: "batchnorm/mul"
+  output: "batchnorm/mul_1"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "batchnorm/mul_1"
+  input: "batchnorm/sub"
+  output: "batchnorm/add_1"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Minimum"
+  input: "batchnorm/add_1"
+  input: "clip_by_value_9/Minimum/y"
+  output: "clip_by_value/Minimum"
+}
+operation {
+  type: "Maximum"
+  input: "clip_by_value/Minimum"
+  input: "clip_by_value_9/y"
+  output: "clip_by_value"
+}
+input: "input0"
+output: "clip_by_value"
diff --git a/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_Relu6_001/test.rule b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_Relu6_001/test.rule
new file mode 100644
index 000000000..ac81f4326
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Conv_BN_Relu6_001/test.rule
@@ -0,0 +1,11 @@
+# To check ONNX conversion is OK
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_EXIST"              $(op_count CONV_2D) '=' 1
+RULE    "NO_TRANSPOSE"            $(op_count TRANSPOSE) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_RELU6"                $(op_count RELU6) '=' 0
+RULE    "NO_MINIMUM"              $(op_count MINIMUM) '=' 0
+RULE    "NO_MAXIMUM"              $(op_count MAXIMUM) '=' 0
diff --git a/res/TensorFlowLiteRecipes/REGRESS_ONNX_Mul_Mul_000/test.recipe b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Mul_Mul_000/test.recipe
new file mode 100644
index 000000000..a3a8a2672
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Mul_Mul_000/test.recipe
@@ -0,0 +1,88 @@
+operand {
+  name: "Input"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 32
+    dim: 32
+  }
+}
+operand {
+  name: "Const"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+  }
+}
+operand {
+  name: "Mul1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 32
+    dim: 32
+  }
+}
+operand {
+  name: "ShapeConst"
+  type: INT32
+  shape {
+    dim: 3
+  }
+  filler {
+    tag: "explicit"
+    arg: "3"
+    arg: "32"
+    arg: "32"
+  }
+}
+operand {
+  name: "Reshape"
+  type: FLOAT32
+  shape {
+    dim: 3
+    dim: 32
+    dim: 32
+  }
+}
+operand {
+  name: "Mul2"
+  type: FLOAT32
+  shape {
+    dim: 3
+    dim: 32
+    dim: 32
+  }
+}
+operation {
+  type: "Mul"
+  input: "Input"
+  input: "Const"
+  output: "Mul1"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Reshape"
+  input: "Mul1"
+  input: "ShapeConst"
+  output: "Reshape"
+}
+operation {
+  type: "Mul"
+  input: "Reshape"
+  input: "Const"
+  output: "Mul2"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "Input"
+output: "Mul2"
diff --git a/res/TensorFlowLiteRecipes/REGRESS_ONNX_Mul_Mul_000/test.rule b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Mul_Mul_000/test.rule
new file mode 100644
index 000000000..9ad8e9fb3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/REGRESS_ONNX_Mul_Mul_000/test.rule
@@ -0,0 +1,7 @@
+# To check NCHW to NHWC conversion is OK
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "MUL_EXIST"               $(op_count MUL) '=' 2
+RULE    "RESHAPE_EXIST"           $(op_count RESHAPE) '=' 1
+RULE    "TRANSPOSE"               $(op_count TRANSPOSE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.recipe
new file mode 100644
index 000000000..e6dee0e7d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.recipe
@@ -0,0 +1,19 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU6"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.recipe
new file mode 100644
index 000000000..21c237ff8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.recipe
@@ -0,0 +1,19 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLUN1To1"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.recipe
new file mode 100644
index 000000000..fa4293e35
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.recipe
@@ -0,0 +1,19 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.recipe
new file mode 100644
index 000000000..427bd05f1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm"
+  type: BOOL
+  shape { dim: 1 dim: 3 dim: 4 }
+  shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "1" arg: "2"
+  }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { }
+}
+operation {
+  type: "ReduceAny"
+  reduce_any_options {
+    keep_dims: false
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.recipe b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.recipe
new file mode 100644
index 000000000..9c3a5e877
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.recipe
@@ -0,0 +1,32 @@
+operand {
+  name: "ifm"
+  type: BOOL
+  shape { dim: 1 dim: 3 dim: 4 }
+  shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "1"
+  }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 1 dim: 4 }
+  shape_signature { dim: -1 dim: 4 }
+}
+operation {
+  type: "ReduceAny"
+  reduce_any_options {
+    keep_dims: false
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.reverse b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.recipe b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.recipe
new file mode 100644
index 000000000..109a3cbac
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm"
+  type: BOOL
+  shape { dim: 1 dim: 3 dim: 4 }
+  shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "1" arg: "2"
+  }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 1 dim: 1 dim: 1 }
+}
+operation {
+  type: "ReduceAny"
+  reduce_any_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.reverse b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.recipe b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.recipe
new file mode 100644
index 000000000..1355f2b33
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm"
+  type: BOOL
+  shape { dim: 2 dim: 1 dim: 4 }
+  shape_signature { dim: 2 dim: -1 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "1"
+  }
+}
+operand {
+  name: "ofm"
+  type: BOOL
+  shape { dim: 2 dim: 1 dim: 4 }
+}
+operation {
+  type: "ReduceAny"
+  reduce_any_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.reverse b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.recipe
new file mode 100644
index 000000000..01669bee2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "axis"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "-1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+  type: "ReduceMax"
+  reduce_max_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "axis"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.recipe
new file mode 100644
index 000000000..50603ba5f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "axis"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "-1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+  type: "ReduceMin"
+  reduce_min_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "axis"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.recipe
new file mode 100644
index 000000000..e81db67df
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 4 }
+  shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "1" arg: "2"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { }
+}
+operation {
+  type: "ReduceProd"
+  reduce_prod_options {
+    keep_dims: false
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.recipe b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.recipe
new file mode 100644
index 000000000..f2811b373
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.recipe
@@ -0,0 +1,32 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 4 }
+  shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "1"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+  shape_signature { dim: -1 dim: 4 }
+}
+operation {
+  type: "ReduceProd"
+  reduce_prod_options {
+    keep_dims: false
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.reverse b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.recipe b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.recipe
new file mode 100644
index 000000000..c1e14c511
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 4 }
+  shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "1" arg: "2"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 }
+}
+operation {
+  type: "ReduceProd"
+  reduce_prod_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.reverse b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.reverse
diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.recipe b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.recipe
new file mode 100644
index 000000000..4e4633f6f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 2 dim: 1 dim: 4 }
+  shape_signature { dim: 2 dim: -1 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "1"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 2 dim: 1 dim: 4 }
+}
+operation {
+  type: "ReduceProd"
+  reduce_prod_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.reverse b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.reverse
diff --git a/res/TensorFlowLiteRecipes/SVDF_000/test.recipe b/res/TensorFlowLiteRecipes/SVDF_000/test.recipe
new file mode 100644
index 000000000..cd45f1b56
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_000/test.recipe
@@ -0,0 +1,62 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight_feature"
+  type: FLOAT32
+  shape { dim: 64 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "weight_time"
+  type: FLOAT32
+  shape { dim: 64 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "input_activation_state"
+  type: FLOAT32
+  is_variable: true
+  shape { dim: 1 dim: 512 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operation {
+  type: "SVDF"
+  svdf_options {
+    rank: 1
+    activation: RELU
+    asymmetric_quantize_inputs: false
+  }
+  input: "ifm"
+  input: "weight_feature"
+  input: "weight_time"
+  input: "bias"
+  input: "input_activation_state"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/SVDF_000/test.reverse b/res/TensorFlowLiteRecipes/SVDF_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/SVDF_001/test.recipe b/res/TensorFlowLiteRecipes/SVDF_001/test.recipe
new file mode 100644
index 000000000..38b76c2a4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_001/test.recipe
@@ -0,0 +1,52 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight_feature"
+  type: FLOAT32
+  shape { dim: 64 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "weight_time"
+  type: FLOAT32
+  shape { dim: 64 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "input_activation_state"
+  type: FLOAT32
+  is_variable: true
+  shape { dim: 1 dim: 512 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operation {
+  type: "SVDF"
+  svdf_options {
+    rank: 1
+    activation: RELU
+    asymmetric_quantize_inputs: false
+  }
+  input: "ifm"
+  input: "weight_feature"
+  input: "weight_time"
+  input: ""
+  input: "input_activation_state"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/SVDF_001/test.reverse b/res/TensorFlowLiteRecipes/SVDF_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
new file mode 100644
index 000000000..81e1e56e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
@@ -0,0 +1,81 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm1"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm2"
+  mul_options {
+    activation: 0
+  }
+}
+operation {
+  type: "Sub"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm3"
+  sub_options {
+    activation: 0
+  }
+}
+signature_def {
+  inputs: {
+    name: "ifm1"
+    tensor_index: 0
+  }
+  inputs: {
+    name: "ifm2"
+    tensor_index: 1
+  }
+  outputs {
+    name: "ofm2"
+    tensor_index: 3
+  }
+  outputs {
+    name: "ofm3"
+    tensor_index: 4
+  }
+  outputs {
+    name: "ofm1"
+    tensor_index: 2
+  }
+  signature_key: "serving_default"
+  subgraph_index: 0
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe
new file mode 100644
index 000000000..a1731f99e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe
@@ -0,0 +1,81 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm1"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm2"
+  mul_options {
+    activation: 0
+  }
+}
+operation {
+  type: "Sub"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm3"
+  sub_options {
+    activation: 0
+  }
+}
+signature_def {
+  inputs: {
+    name: "ifm1"
+    tensor_index: 0
+  }
+  inputs: {
+    name: "ifm2"
+    tensor_index: 1
+  }
+  outputs {
+    name: "out3"
+    tensor_index: 3
+  }
+  outputs {
+    name: "out2"
+    tensor_index: 4
+  }
+  outputs {
+    name: "out1"
+    tensor_index: 2
+  }
+  signature_key: "serving_default"
+  subgraph_index: 0
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Slice_001/test.recipe b/res/TensorFlowLiteRecipes/Slice_001/test.recipe
new file mode 100644
index 000000000..20f1baab3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Slice_001/test.recipe
@@ -0,0 +1,37 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "-1" arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "1" arg: "3"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+  type: "Slice"
+  input: "ifm"
+  input: "begin"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Slice_001/test.reverse b/res/TensorFlowLiteRecipes/Slice_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Slice_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
index 1754f9a58..6d258e73f 100644
--- a/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
@@ -2,7 +2,6 @@ operand {
   name: "ifm"
   type: FLOAT32
   shape { dim: 1 dim: 3 dim: 3 dim: 2 }
-  filler { tag: "constant" arg: "3.5" }
 }
 operand {
   name: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Squeeze_001/test.recipe b/res/TensorFlowLiteRecipes/Squeeze_001/test.recipe
new file mode 100644
index 000000000..9ac441574
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Squeeze_001/test.recipe
@@ -0,0 +1,18 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 5 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 4 dim: 5 }
+}
+operation {
+  type: "Squeeze"
+  squeeze_options { }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Squeeze_001/test.reverse b/res/TensorFlowLiteRecipes/Squeeze_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Squeeze_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/StridedSlice_003/test.recipe b/res/TensorFlowLiteRecipes/StridedSlice_003/test.recipe
new file mode 100644
index 000000000..c5e408f1f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/StridedSlice_003/test.recipe
@@ -0,0 +1,58 @@
+# Recipe for StridedSlice that will converted to Reshape by SubstituteStridedSliceToReshapePass
+#
+# shrink_axis_mask will remove axis 0
+
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 10 dim: 1 dim: 4 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "end"
+  type: INT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "10" arg: "1" arg: "100"
+  }
+}
+operand {
+  name: "strides"
+  type: INT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "1" arg: "1" arg: "1"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim:10 dim: 1 dim: 4}
+}
+operation {
+  type: "StridedSlice"
+  strided_slice_options {
+    begin_mask: 0
+    end_mask: 0
+    ellipsis_mask: 0
+    new_axis_mask: 0
+    shrink_axis_mask: 1
+  }
+  input: "ifm"
+  input: "begin"
+  input: "end"
+  input: "strides"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/StridedSlice_003/test.rule b/res/TensorFlowLiteRecipes/StridedSlice_003/test.rule
new file mode 100644
index 000000000..d65d43c9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/StridedSlice_003/test.rule
@@ -0,0 +1,6 @@
+# To check if Add and Mul are fused to Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "RESHAPE_EXIST"           $(op_count RESHAPE) '=' 1
+RULE    "NO_STRIDEDSLICE"         $(op_count STRIDEDSLICE) '=' 0
diff --git a/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe b/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe
new file mode 100644
index 000000000..edc8efd9d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe
@@ -0,0 +1,46 @@
+#
+# Failed case from https://github.com/Samsung/ONE/issues/9439
+#
+operand {
+  name: "Placeholder"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 32 }
+  is_variable: false
+}
+operand {
+  name: "strided_slice/stack_2"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "1" arg: "-1" arg: "1" arg: "1" }
+  is_variable: false
+}
+operand {
+  name: "strided_slice/stack"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "0" arg: "0" arg: "0" arg: "0" }
+  is_variable: false
+}
+operand {
+  name: "strided_slice"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 32 }
+  is_variable: false
+}
+operation {
+  type: "StridedSlice"
+  input: "Placeholder"
+  input: "strided_slice/stack"
+  input: "strided_slice/stack"
+  input: "strided_slice/stack_2"
+  output: "strided_slice"
+  strided_slice_options {
+    begin_mask: 15
+    end_mask: 15
+    ellipsis_mask: 0
+    new_axis_mask: 0
+    shrink_axis_mask: 0
+  }
+}
+input: "Placeholder"
+output: "strided_slice"
diff --git a/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse b/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.recipe
new file mode 100644
index 000000000..99b089e4d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "-1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+  shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+  type: "Sum"
+  sum_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Sum_dynamic_001/test.recipe b/res/TensorFlowLiteRecipes/Sum_dynamic_001/test.recipe
new file mode 100644
index 000000000..46fac492f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Sum_dynamic_001/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 4 }
+  shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+  shape_signature { dim: -1 dim: 4 }
+}
+operation {
+  type: "Sum"
+  sum_options {
+    keep_dims: false
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/TransposeConv_000/test.recipe b/res/TensorFlowLiteRecipes/TransposeConv_000/test.recipe
index 1313e2683..4e49b2dfd 100644
--- a/res/TensorFlowLiteRecipes/TransposeConv_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/TransposeConv_000/test.recipe
@@ -34,6 +34,7 @@ operation {
     padding: SAME
     stride_w: 1
     stride_h: 1
+    activation: NONE
   }
   input: "out_shape"
   input: "ker"
diff --git a/res/TensorFlowLiteRecipes/TransposeConv_001/test.recipe b/res/TensorFlowLiteRecipes/TransposeConv_001/test.recipe
index ad76100d2..2871df203 100644
--- a/res/TensorFlowLiteRecipes/TransposeConv_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/TransposeConv_001/test.recipe
@@ -35,6 +35,7 @@ operation {
     padding: SAME
     stride_w: 1
     stride_h: 1
+    activation: NONE
   }
   input: "out_shape"
   input: "ker"
diff --git a/res/TensorFlowLiteRecipes/Transpose_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Transpose_U8_000/test.recipe
new file mode 100644
index 000000000..db9e25f62
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Transpose_U8_000/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 3 dim: 8 dim: 1 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "0" }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 8 dim: 1 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+
+operation {
+  type: "Transpose"
+  transpose_options {
+  }
+  input: "ifm"
+  input: "perm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Transpose_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Transpose_U8_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Transpose_U8_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_000/test.recipe b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_000/test.recipe
new file mode 100644
index 000000000..773d44343
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_000/test.recipe
@@ -0,0 +1,185 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "input_to_input_weights"
+  type: FLOAT32
+  shape { dim: 4 dim: 2 }
+}
+operand {
+  name: "input_to_forget_weights"
+  type: FLOAT32
+  shape { dim: 4 dim: 2 }
+}
+operand {
+  name: "input_to_cell_weights"
+  type: FLOAT32
+  shape { dim: 4 dim: 2 }
+}
+operand {
+  name: "input_to_output_weights"
+  type: FLOAT32
+  shape { dim: 4 dim: 2 }
+}
+operand {
+  name: "recurrent_to_input_weights"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "recurrent_to_forget_weights"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "recurrent_to_cell_weights"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "recurrent_to_output_weights"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "cell_to_input_weights"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "cell_to_forget_weights"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "cell_to_output_weights"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "input_gate_bias"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "forget_gate_bias"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "cell_gate_bias"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "output_gate_bias"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "projection_weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "projection_bias"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "activation_state"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operand {
+  name: "cell_state"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operand {
+  name: "input_layer_norm_coefficients"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "forget_layer_norm_coefficients"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "cell_layer_norm_coefficients"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "output_layer_norm_coefficients"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operation {
+  type: "UnidirectionalSequenceLSTM"
+  unidirectional_sequence_lstm_options {
+    activation: NONE
+    cell_clip: 0.0
+    proj_clip: 0.0
+    time_major: false
+    asymmetric_quantize_inputs: false
+  }
+  input: "ifm"
+  input: "input_to_input_weights"
+  input: "input_to_forget_weights"
+  input: "input_to_cell_weights"
+  input: "input_to_output_weights"
+  input: "recurrent_to_input_weights"
+  input: "recurrent_to_forget_weights"
+  input: "recurrent_to_cell_weights"
+  input: "recurrent_to_output_weights"
+  input: "cell_to_input_weights"
+  input: "cell_to_forget_weights"
+  input: "cell_to_output_weights"
+  input: "input_gate_bias"
+  input: "forget_gate_bias"
+  input: "cell_gate_bias"
+  input: "output_gate_bias"
+  input: "projection_weight"
+  input: "projection_bias"
+  input: "activation_state"
+  input: "cell_state"
+  input: "input_layer_norm_coefficients"
+  input: "forget_layer_norm_coefficients"
+  input: "cell_layer_norm_coefficients"
+  input: "output_layer_norm_coefficients"
+  output: "ofm"
+}
+input: "ifm"
+input: "input_to_input_weights"
+input: "input_to_forget_weights"
+input: "input_to_cell_weights"
+input: "input_to_output_weights"
+input: "recurrent_to_input_weights"
+input: "recurrent_to_forget_weights"
+input: "recurrent_to_cell_weights"
+input: "recurrent_to_output_weights"
+input: "cell_to_input_weights"
+input: "cell_to_forget_weights"
+input: "cell_to_output_weights"
+input: "input_gate_bias"
+input: "forget_gate_bias"
+input: "cell_gate_bias"
+input: "output_gate_bias"
+input: "projection_weight"
+input: "projection_bias"
+input: "activation_state"
+input: "cell_state"
+input: "input_layer_norm_coefficients"
+input: "forget_layer_norm_coefficients"
+input: "cell_layer_norm_coefficients"
+input: "output_layer_norm_coefficients"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_000/test.reverse b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_001/test.recipe b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_001/test.recipe
new file mode 100644
index 000000000..5938cc115
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_001/test.recipe
@@ -0,0 +1,323 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 28 dim: 28 }
+}
+operand {
+  name: "input_to_input_weights"
+  type: FLOAT32
+  shape { dim: 20 dim: 28 }
+  filler {
+    tag: "explicit"
+    arg: "0.1687648445367813" arg: "0.04799923673272133" arg: "0.195631742477417" arg: "0.10485544055700302" arg: "0.018675213679671288" arg: "0.13739116489887238" arg: "0.0898093432188034" arg: "-0.28823068737983704" arg: "-0.02585843950510025" arg: "0.05994327738881111" arg: "0.07523486018180847" arg: "0.0797467827796936" arg: "0.3736445903778076" arg: "0.6627118587493896" arg: "0.3780449628829956" arg: "0.36176905035972595" arg: "-0.2041059285402298" arg: "0.1464163213968277" arg: "0.4136067032814026" arg: "0.1049080342054367" arg: "0.11873452365398407" arg: "-0.05727154389023781" arg: "-0.04963447153568268" arg: "-0.332282155752182" arg: "0.07995595782995224" arg: "-0.20255199074745178" arg: "-0.05633578822016716" arg: "0.11420387774705887"
+    arg: "-0.029032165184617043" arg: "0.0007604139973409474" arg: "-0.31187641620635986" arg: "0.1813918948173523" arg: "-0.31930115818977356" arg: "0.05086275562644005" arg: "-0.029340023174881935" arg: "0.039944298565387726" arg: "0.002410847693681717" arg: "-0.32042407989501953" arg: "0.22277581691741943" arg: "0.44808948040008545" arg: "0.439309686422348" arg: "0.3975866138935089" arg: "0.30034393072128296" arg: "0.1280607134103775" arg: "-0.06160789355635643" arg: "-0.09105906635522842" arg: "-0.23636692762374878" arg: "-0.3308735489845276" arg: "-0.572564423084259" arg: "-0.6935749053955078" arg: "-0.5299585461616516" arg: "-0.27702340483665466" arg: "-0.7308681011199951" arg: "-0.6580930352210999" arg: "-0.4219287633895874" arg: "0.20912277698516846"
+    arg: "0.060638219118118286" arg: "-0.12734581530094147" arg: "0.14102879166603088" arg: "-0.2901698052883148" arg: "-0.24771066009998322" arg: "-0.3001070022583008" arg: "-0.22395247220993042" arg: "-0.4311608672142029" arg: "-0.17570453882217407" arg: "0.08624797314405441" arg: "0.018354324623942375" arg: "0.5221205949783325" arg: "0.14471562206745148" arg: "0.03486153483390808" arg: "0.16570599377155304" arg: "0.16822484135627747" arg: "0.7482292056083679" arg: "0.7351004481315613" arg: "0.27890756726264954" arg: "0.5524212121963501" arg: "0.23095451295375824" arg: "0.4314809739589691" arg: "0.3363182246685028" arg: "0.6496651768684387" arg: "0.5523284077644348" arg: "0.4055400490760803" arg: "0.4644913971424103" arg: "0.09175155311822891"
+    arg: "0.21605326235294342" arg: "0.08236871659755707" arg: "-0.17103230953216553" arg: "-0.18498405814170837" arg: "-0.5800216794013977" arg: "-0.487117737531662" arg: "-0.7577510476112366" arg: "-0.4389793574810028" arg: "-0.6259695291519165" arg: "-0.6731855273246765" arg: "-0.5440476536750793" arg: "-0.4302021861076355" arg: "-0.28402388095855713" arg: "-0.22358544170856476" arg: "-0.30889278650283813" arg: "0.12872418761253357" arg: "0.03388393670320511" arg: "0.034014951437711716" arg: "0.2464621216058731" arg: "-0.08322205394506454" arg: "0.20342972874641418" arg: "-0.14718593657016754" arg: "0.2358621209859848" arg: "0.2880614697933197" arg: "0.08535374701023102" arg: "-0.09865032881498337" arg: "-0.44678133726119995" arg: "-0.11892740428447723"
+    arg: "0.1754414290189743" arg: "0.24821344017982483" arg: "0.10287455469369888" arg: "0.2701801657676697" arg: "0.41744735836982727" arg: "0.19831198453903198" arg: "0.46229296922683716" arg: "0.16203390061855316" arg: "0.12368197739124298" arg: "-0.27287790179252625" arg: "0.014193423092365265" arg: "0.11754778772592545" arg: "-0.05268847197294235" arg: "0.017977338284254074" arg: "0.381906121969223" arg: "0.17318789660930634" arg: "0.1804313063621521" arg: "-0.021003693342208862" arg: "0.5185583829879761" arg: "0.5880140662193298" arg: "0.4118947982788086" arg: "0.0662691667675972" arg: "0.08239153027534485" arg: "0.04901377111673355" arg: "0.12195281684398651" arg: "0.46632856130599976" arg: "0.18819667398929596" arg: "0.18338985741138458"
+    arg: "0.1751364767551422" arg: "0.04932933673262596" arg: "0.10589705407619476" arg: "0.02743552438914776" arg: "0.15879607200622559" arg: "0.3464271128177643" arg: "-0.08866819739341736" arg: "0.10017956793308258" arg: "0.14074550569057465" arg: "0.07490764558315277" arg: "0.34844619035720825" arg: "0.27873194217681885" arg: "0.4934106469154358" arg: "0.6267357468605042" arg: "0.653296709060669" arg: "0.6742697358131409" arg: "0.09091877192258835" arg: "0.08090687543153763" arg: "0.007206875365227461" arg: "0.06743039935827255" arg: "-0.12605983018875122" arg: "0.15661095082759857" arg: "0.24821621179580688" arg: "0.12353820353746414" arg: "0.1812787503004074" arg: "-0.32736217975616455" arg: "-0.0818324014544487" arg: "-0.15149752795696259"
+    arg: "-0.24534951150417328" arg: "0.3147708773612976" arg: "-0.008836879394948483" arg: "-0.1994941234588623" arg: "-0.049111880362033844" arg: "-0.20707322657108307" arg: "-0.1682814508676529" arg: "-0.010347025468945503" arg: "-0.5228656530380249" arg: "-0.5135005712509155" arg: "-0.3480781614780426" arg: "-0.38772332668304443" arg: "-0.4124152958393097" arg: "-0.39354074001312256" arg: "-0.4979104995727539" arg: "-0.19147707521915436" arg: "0.0627184733748436" arg: "0.46287989616394043" arg: "0.07998469471931458" arg: "0.0790494829416275" arg: "0.09622958302497864" arg: "0.014114361256361008" arg: "-0.1053328886628151" arg: "0.03503574803471565" arg: "0.16327714920043945" arg: "0.4359706938266754" arg: "0.05958576500415802" arg: "-0.6027227640151978"
+    arg: "0.057672709226608276" arg: "-0.036423951387405396" arg: "-0.10990197956562042" arg: "0.0010471289278939366" arg: "0.16711515188217163" arg: "0.32950177788734436" arg: "0.15729546546936035" arg: "0.10592831671237946" arg: "-0.3053370416164398" arg: "-0.06242264807224274" arg: "-0.005419928580522537" arg: "0.2704862654209137" arg: "0.4702851474285126" arg: "-0.07878115773200989" arg: "-0.23939359188079834" arg: "-0.046783994883298874" arg: "0.2417677342891693" arg: "0.07949794828891754" arg: "-0.01079419907182455" arg: "-0.20259179174900055" arg: "-0.3341178894042969" arg: "-0.06668252497911453" arg: "-0.4118092954158783" arg: "-0.25006234645843506" arg: "-0.2406432181596756" arg: "-0.2872427701950073" arg: "-0.33487430214881897" arg: "0.2951969802379608"
+    arg: "0.25783705711364746" arg: "0.03347025439143181" arg: "0.03592922165989876" arg: "-0.14409473538398743" arg: "-0.2313976287841797" arg: "-0.05575039982795715" arg: "-0.3067474663257599" arg: "-0.3740054965019226" arg: "-0.173713818192482" arg: "-0.5401638746261597" arg: "-0.26812097430229187" arg: "-0.2669502794742584" arg: "0.0020368772093206644" arg: "0.5259052515029907" arg: "0.12416208535432816" arg: "0.295955628156662" arg: "0.17083768546581268" arg: "0.3295002579689026" arg: "0.1629410982131958" arg: "0.6651543378829956" arg: "0.3124505877494812" arg: "0.20944754779338837" arg: "0.22139877080917358" arg: "0.09963700920343399" arg: "0.14622727036476135" arg: "0.24759505689144135" arg: "0.29243841767311096" arg: "0.3123530149459839"
+    arg: "-0.14453744888305664" arg: "-0.22069278359413147" arg: "0.2322058528661728" arg: "0.03341517224907875" arg: "0.1755218654870987" arg: "-0.00033268495462834835" arg: "-0.17320746183395386" arg: "-0.013570177368819714" arg: "-0.20040804147720337" arg: "-0.22750437259674072" arg: "-0.22218504548072815" arg: "-0.20970220863819122" arg: "-0.1999887377023697" arg: "-0.18349596858024597" arg: "-0.12978042662143707" arg: "0.34552574157714844" arg: "0.10098802298307419" arg: "-0.1387794464826584" arg: "0.01755024679005146" arg: "0.037359848618507385" arg: "0.1385539323091507" arg: "0.5478288531303406" arg: "0.4302785396575928" arg: "0.31248337030410767" arg: "0.4476741850376129" arg: "0.4958922266960144" arg: "0.3047107756137848" arg: "-0.1087426096200943"
+    arg: "-0.2650264799594879" arg: "0.001212756964378059" arg: "-0.1455630362033844" arg: "-0.06888622045516968" arg: "-0.39034101366996765" arg: "-0.15759187936782837" arg: "-0.0332697331905365" arg: "-0.20074871182441711" arg: "0.04032357037067413" arg: "-0.15028415620326996" arg: "-0.094477079808712" arg: "-0.08076327294111252" arg: "0.2434137463569641" arg: "0.6036377549171448" arg: "0.2083856165409088" arg: "0.421690434217453" arg: "0.31900280714035034" arg: "0.4197741746902466" arg: "0.168703094124794" arg: "-0.04694703593850136" arg: "0.016685811802744865" arg: "-0.24497397243976593" arg: "-0.12784908711910248" arg: "0.04392942041158676" arg: "0.43009427189826965" arg: "0.1300940066576004" arg: "-0.006342500913888216" arg: "0.28684887290000916"
+    arg: "0.2645731270313263" arg: "-0.06917668879032135" arg: "0.04553110525012016" arg: "0.0005655331769958138" arg: "0.02004513144493103" arg: "-0.12128561735153198" arg: "0.23945191502571106" arg: "0.011923068203032017" arg: "0.049406301230192184" arg: "0.10682254284620285" arg: "-0.15404820442199707" arg: "0.04071756824851036" arg: "-0.04888581857085228" arg: "0.03098251111805439" arg: "-0.11950475722551346" arg: "-0.47736215591430664" arg: "-0.5907397270202637" arg: "-0.5694231390953064" arg: "-0.3115452826023102" arg: "-0.3244591951370239" arg: "-0.44562792778015137" arg: "-0.2824826240539551" arg: "-0.608927845954895" arg: "-0.5112527012825012" arg: "-0.5793758630752563" arg: "-0.7528161406517029" arg: "-0.10694423317909241" arg: "0.03750178962945938"
+    arg: "-0.09262563288211823" arg: "0.33829057216644287" arg: "0.19356343150138855" arg: "-0.32529792189598083" arg: "-0.11209913343191147" arg: "-0.17982369661331177" arg: "-0.02031439170241356" arg: "-0.04840415343642235" arg: "0.2679027318954468" arg: "-0.0351799800992012" arg: "0.22578150033950806" arg: "0.024941330775618553" arg: "-0.22684139013290405" arg: "0.0645766407251358" arg: "0.45471301674842834" arg: "0.006177396513521671" arg: "-0.07253464311361313" arg: "-0.031976472586393356" arg: "-0.1588464081287384" arg: "-0.35738927125930786" arg: "-0.2679489850997925" arg: "0.13583332300186157" arg: "0.6051817536354065" arg: "0.7277238368988037" arg: "0.7886832356452942" arg: "0.30304884910583496" arg: "0.23822274804115295" arg: "-0.21040984988212585"
+    arg: "0.13347174227237701" arg: "-0.018098508939146996" arg: "0.3020147383213043" arg: "0.36663228273391724" arg: "0.19701610505580902" arg: "0.32974785566329956" arg: "0.4301891624927521" arg: "0.07097901403903961" arg: "0.3667917549610138" arg: "0.3058207929134369" arg: "0.047205567359924316" arg: "0.19449062645435333" arg: "0.40699872374534607" arg: "0.04719206318259239" arg: "0.13794705271720886" arg: "-0.12094765901565552" arg: "-0.11291661113500595" arg: "0.0622805655002594" arg: "0.202839195728302" arg: "0.3466202914714813" arg: "0.2170470952987671" arg: "0.3939531445503235" arg: "0.5407551527023315" arg: "0.43948447704315186" arg: "0.3990897834300995" arg: "0.0408027209341526" arg: "-0.1760343313217163" arg: "0.014698908664286137"
+    arg: "-0.04886699095368385" arg: "0.021053045988082886" arg: "0.5374348759651184" arg: "0.08056395500898361" arg: "-0.22925740480422974" arg: "-0.3281041383743286" arg: "-0.07999913394451141" arg: "-0.021695485338568687" arg: "0.011688797734677792" arg: "0.03478331118822098" arg: "0.2215108573436737" arg: "0.20613346993923187" arg: "0.06630691885948181" arg: "0.1218089833855629" arg: "0.11808548867702484" arg: "0.45628872513771057" arg: "0.2919962406158447" arg: "0.14256659150123596" arg: "0.175963893532753" arg: "0.04495575651526451" arg: "0.2504587471485138" arg: "0.026869049295783043" arg: "0.47956186532974243" arg: "0.02193787880241871" arg: "-0.4510112404823303" arg: "-0.313761830329895" arg: "-0.1106211319565773" arg: "0.2789842486381531"
+    arg: "0.06086614355444908" arg: "0.258357435464859" arg: "-0.21907173097133636" arg: "0.3981928527355194" arg: "0.4303799867630005" arg: "0.3879024386405945" arg: "0.43367868661880493" arg: "0.4597713351249695" arg: "0.45437106490135193" arg: "-0.02416928857564926" arg: "-0.05651269108057022" arg: "0.28281864523887634" arg: "0.0635884702205658" arg: "0.2554715573787689" arg: "0.242105171084404" arg: "0.4487742483615875" arg: "0.31888487935066223" arg: "-0.004678715486079454" arg: "-0.2514997124671936" arg: "-0.0040764473378658295" arg: "-0.32342639565467834" arg: "-0.05584603548049927" arg: "-0.06270022690296173" arg: "-0.036144010722637177" arg: "-0.15446072816848755" arg: "0.23341621458530426" arg: "0.33822396397590637" arg: "0.05738767236471176"
+    arg: "0.10699111223220825" arg: "-0.03672357276082039" arg: "0.05017268285155296" arg: "0.0031773506198078394" arg: "0.21564021706581116" arg: "-0.016393177211284637" arg: "0.04495749995112419" arg: "0.08746970444917679" arg: "0.2627675533294678" arg: "-0.06982193887233734" arg: "0.16397050023078918" arg: "0.12711408734321594" arg: "0.1515779048204422" arg: "0.5583046674728394" arg: "0.6618807911872864" arg: "0.6948606967926025" arg: "0.7728397846221924" arg: "0.5394765734672546" arg: "0.21606062352657318" arg: "-0.045014407485723495" arg: "-0.00790402665734291" arg: "0.05512310191988945" arg: "-0.10717213153839111" arg: "0.12510299682617188" arg: "-0.2419642060995102" arg: "-0.5152121782302856" arg: "-0.22660385072231293" arg: "-0.2283792793750763"
+    arg: "-0.16621996462345123" arg: "-0.15625415742397308" arg: "0.4551774561405182" arg: "-0.03949269279837608" arg: "-0.012321516871452332" arg: "-0.0024175785947591066" arg: "-0.2590198516845703" arg: "-0.05285951495170593" arg: "-0.2730681002140045" arg: "-0.39435532689094543" arg: "-0.23795633018016815" arg: "-0.1415511518716812" arg: "0.3277718722820282" arg: "0.18961961567401886" arg: "0.4701646864414215" arg: "0.7281239032745361" arg: "0.25955408811569214" arg: "0.18941733241081238" arg: "0.06163005158305168" arg: "0.008561286143958569" arg: "0.03527892380952835" arg: "0.04559394717216492" arg: "0.12878121435642242" arg: "0.453266441822052" arg: "0.2222425490617752" arg: "-0.10644187033176422" arg: "-0.2403181493282318" arg: "-0.3840152323246002"
+    arg: "-0.10421296954154968" arg: "0.0513484813272953" arg: "0.043097011744976044" arg: "-0.22645732760429382" arg: "-0.13321255147457123" arg: "0.16552734375" arg: "0.3591425120830536" arg: "0.27922341227531433" arg: "0.02332492358982563" arg: "-0.1489277184009552" arg: "0.03313438966870308" arg: "0.6452564001083374" arg: "0.7131020426750183" arg: "0.3456207513809204" arg: "0.08521326631307602" arg: "0.3773330748081207" arg: "-0.06791231781244278" arg: "-0.042457882314920425" arg: "-0.2748822271823883" arg: "-0.45328542590141296" arg: "-0.13675789535045624" arg: "-0.35200127959251404" arg: "-0.3977891802787781" arg: "-0.22524717450141907" arg: "-0.5592325329780579" arg: "-0.6521109342575073" arg: "-0.4787251651287079" arg: "0.4251031279563904"
+    arg: "-0.10961339622735977" arg: "0.08067493140697479" arg: "0.19314810633659363" arg: "0.15304067730903625" arg: "0.08039616048336029" arg: "-0.12978368997573853" arg: "0.23539943993091583" arg: "-0.0029274635016918182" arg: "0.19411355257034302" arg: "0.019054999575018883" arg: "0.19173188507556915" arg: "-0.09392274171113968" arg: "0.38477426767349243" arg: "0.04760168865323067" arg: "0.3185006082057953" arg: "0.5135385394096375" arg: "0.10360299050807953" arg: "0.23731642961502075" arg: "0.30989235639572144" arg: "0.22052974998950958" arg: "0.3194608986377716" arg: "0.5341771245002747" arg: "0.518899142742157" arg: "0.23889583349227905" arg: "0.30273208022117615" arg: "-0.019300086423754692" arg: "0.039211057126522064" arg: "-0.10289957374334335"
+  }
+}
+operand {
+  name: "input_to_forget_weights"
+  type: FLOAT32
+  shape { dim: 20 dim: 28 }
+  filler {
+    tag: "explicit"
+    arg: "-0.07628004252910614" arg: "-0.10215829312801361" arg: "-0.05716871842741966" arg: "0.10802043229341507" arg: "0.17770273983478546" arg: "0.2961052358150482" arg: "0.20247122645378113" arg: "0.2446555495262146" arg: "0.040852244943380356" arg: "-0.11335061490535736" arg: "0.10778427124023438" arg: "0.02703988179564476" arg: "0.10350820422172546" arg: "0.022522294893860817" arg: "-0.2625943720340729" arg: "-0.4493379592895508" arg: "-0.3724125623703003" arg: "-0.0114969527348876" arg: "0.3713493049144745" arg: "0.15514639019966125" arg: "0.02222340926527977" arg: "-0.1641593724489212" arg: "-0.1006893664598465" arg: "-0.36185842752456665" arg: "-0.29800334572792053" arg: "-0.1352705955505371" arg: "-0.1678757518529892" arg: "-0.04674749821424484"
+    arg: "0.1935243457555771" arg: "-0.3558758497238159" arg: "-0.01225559413433075" arg: "0.17600563168525696" arg: "-0.2398902177810669" arg: "0.021266596391797066" arg: "-0.012361799366772175" arg: "-0.07168065011501312" arg: "-0.046216897666454315" arg: "0.2807506322860718" arg: "0.31839627027511597" arg: "0.22673124074935913" arg: "-0.1492728590965271" arg: "-0.22067101299762726" arg: "-0.13988566398620605" arg: "-0.26826149225234985" arg: "-0.12911173701286316" arg: "-0.25909167528152466" arg: "-0.09522391110658646" arg: "-0.2712244987487793" arg: "-0.0641704648733139" arg: "-0.06269702315330505" arg: "0.05739550665020943" arg: "-0.024462971836328506" arg: "-0.07414258271455765" arg: "-0.1316741555929184" arg: "0.2740451693534851" arg: "0.008587119169533253"
+    arg: "0.09579362720251083" arg: "-0.10534976422786713" arg: "-0.20309729874134064" arg: "0.05385243892669678" arg: "-0.12469331920146942" arg: "0.19276906549930573" arg: "-0.14719951152801514" arg: "0.04517911374568939" arg: "0.17648524045944214" arg: "-0.07139024138450623" arg: "0.3134388327598572" arg: "0.30480578541755676" arg: "-0.20624959468841553" arg: "-0.28569987416267395" arg: "-0.463299423456192" arg: "-0.22555772960186005" arg: "0.08225655555725098" arg: "0.22282175719738007" arg: "0.2178572118282318" arg: "0.1534086912870407" arg: "0.0010919382330030203" arg: "-0.049574099481105804" arg: "-0.14441633224487305" arg: "0.10646001249551773" arg: "-0.25545066595077515" arg: "-0.1575625091791153" arg: "0.022244272753596306" arg: "-0.13968679308891296"
+    arg: "-0.021634351462125778" arg: "0.08071571588516235" arg: "0.1013035699725151" arg: "0.016050167381763458" arg: "0.2625211775302887" arg: "0.4876948893070221" arg: "0.43568745255470276" arg: "0.565092146396637" arg: "0.29359373450279236" arg: "0.21053338050842285" arg: "0.3244520127773285" arg: "0.3750203549861908" arg: "0.21750782430171967" arg: "0.0762089267373085" arg: "-0.08389374613761902" arg: "-0.1069231778383255" arg: "-0.03870454430580139" arg: "-0.05467657372355461" arg: "-0.1801743060350418" arg: "-0.16087990999221802" arg: "-0.142457976937294" arg: "-0.17574858665466309" arg: "-0.052057504653930664" arg: "-0.022876683622598648" arg: "0.11125413328409195" arg: "0.04914820194244385" arg: "-0.46803581714630127" arg: "0.06290580332279205"
+    arg: "-0.07477306574583054" arg: "0.21766617894172668" arg: "-0.22997407615184784" arg: "0.034543294459581375" arg: "-0.013903528451919556" arg: "-0.18765689432621002" arg: "-0.0831596627831459" arg: "-0.24837662279605865" arg: "-0.3817770779132843" arg: "-0.27927181124687195" arg: "-0.1098317876458168" arg: "-0.10399161279201508" arg: "0.07339660823345184" arg: "0.24020932614803314" arg: "0.054254304617643356" arg: "-0.29698851704597473" arg: "-0.8958835601806641" arg: "-0.5978922247886658" arg: "-0.25239694118499756" arg: "0.3084123134613037" arg: "0.7305506467819214" arg: "0.4846576452255249" arg: "0.2881616950035095" arg: "0.41025322675704956" arg: "0.5327757596969604" arg: "0.32260239124298096" arg: "0.31244078278541565" arg: "0.19641001522541046"
+    arg: "0.4518318772315979" arg: "0.0918312519788742" arg: "0.21931017935276031" arg: "-0.2809772789478302" arg: "0.04512198641896248" arg: "-0.12032749503850937" arg: "0.13106974959373474" arg: "0.16737745702266693" arg: "-0.06265480816364288" arg: "-0.12420768290758133" arg: "0.026834065094590187" arg: "0.057427167892456055" arg: "0.13727468252182007" arg: "0.17098888754844666" arg: "0.1360159069299698" arg: "-0.29676762223243713" arg: "0.02863573096692562" arg: "0.11650068312883377" arg: "0.08228091150522232" arg: "0.0004863425565417856" arg: "0.014332028105854988" arg: "0.21943029761314392" arg: "0.2125539928674698" arg: "0.07996927946805954" arg: "0.39755111932754517" arg: "0.24480314552783966" arg: "-0.03607768565416336" arg: "-0.04457511007785797"
+    arg: "-0.1347951889038086" arg: "-0.12047966569662094" arg: "0.10575897246599197" arg: "0.05170439928770065" arg: "-0.1254684031009674" arg: "-0.014801939949393272" arg: "0.21512190997600555" arg: "-0.10265995562076569" arg: "0.27830609679222107" arg: "0.023503802716732025" arg: "-0.028669001534581184" arg: "0.39614439010620117" arg: "0.6066746711730957" arg: "0.2178766429424286" arg: "-0.011598336510360241" arg: "0.3191794753074646" arg: "0.21298977732658386" arg: "-0.10889667272567749" arg: "-0.019525714218616486" arg: "0.030179403722286224" arg: "0.15849147737026215" arg: "0.22630034387111664" arg: "0.009056099690496922" arg: "-0.05425706133246422" arg: "-0.0458354689180851" arg: "-0.21161434054374695" arg: "-0.1947891265153885" arg: "-0.263959139585495"
+    arg: "0.027783045545220375" arg: "-0.04745020717382431" arg: "0.3300989270210266" arg: "0.28470176458358765" arg: "-0.17005865275859833" arg: "-0.07439357787370682" arg: "0.1667899340391159" arg: "0.45858997106552124" arg: "0.37785887718200684" arg: "0.6112445592880249" arg: "0.3187272250652313" arg: "-0.06941720843315125" arg: "-0.36996451020240784" arg: "-0.31387877464294434" arg: "-0.639223575592041" arg: "-0.44944101572036743" arg: "-0.06550875306129456" arg: "0.06676022708415985" arg: "0.007136007770895958" arg: "-0.21558785438537598" arg: "-0.15798640251159668" arg: "0.01950899139046669" arg: "-0.2211693674325943" arg: "0.013605713844299316" arg: "-0.19591258466243744" arg: "-0.38659483194351196" arg: "0.013284237124025822" arg: "-0.22611309587955475"
+    arg: "-0.15597084164619446" arg: "0.17743876576423645" arg: "-0.037310726940631866" arg: "-0.024244168773293495" arg: "0.2377604991197586" arg: "0.44537392258644104" arg: "0.31278473138809204" arg: "0.1272803544998169" arg: "0.013553287833929062" arg: "0.254456490278244" arg: "0.08175382763147354" arg: "-0.20459230244159698" arg: "-0.2673284709453583" arg: "-0.2404957413673401" arg: "-0.14864800870418549" arg: "0.20921550691127777" arg: "0.29682257771492004" arg: "0.21920187771320343" arg: "0.2311340570449829" arg: "0.011728049255907536" arg: "-0.14417873322963715" arg: "-0.18177829682826996" arg: "-0.05804318189620972" arg: "-0.4260285198688507" arg: "-0.23524904251098633" arg: "0.12287767231464386" arg: "0.2705034911632538" arg: "0.07427411526441574"
+    arg: "-0.0016438784077763557" arg: "0.24241822957992554" arg: "0.48539218306541443" arg: "-0.04733000695705414" arg: "0.26057326793670654" arg: "0.1521688848733902" arg: "0.051105279475450516" arg: "0.30896538496017456" arg: "0.2954115569591522" arg: "0.14518126845359802" arg: "-0.07679435610771179" arg: "-0.17924435436725616" arg: "-0.3208692669868469" arg: "-0.7328397631645203" arg: "-0.02372976578772068" arg: "0.1274699866771698" arg: "-0.06885138899087906" arg: "-0.35549283027648926" arg: "-0.4305097162723541" arg: "-0.3341798782348633" arg: "-0.22487765550613403" arg: "-0.0018067393684759736" arg: "0.30461153388023376" arg: "-0.04262997582554817" arg: "0.18400070071220398" arg: "0.16000767052173615" arg: "0.5017845034599304" arg: "-0.26547643542289734"
+    arg: "-0.22279107570648193" arg: "-0.37462612986564636" arg: "-0.11375132948160172" arg: "-0.318785697221756" arg: "-0.04256489500403404" arg: "0.023857025429606438" arg: "0.24660463631153107" arg: "0.2548283040523529" arg: "0.1715206801891327" arg: "-0.19845722615718842" arg: "-0.5138258934020996" arg: "-0.20912210643291473" arg: "-0.13793939352035522" arg: "0.12139105796813965" arg: "-0.08564136922359467" arg: "-0.11496538668870926" arg: "0.06404484808444977" arg: "0.06541986018419266" arg: "-0.05833537131547928" arg: "0.33602604269981384" arg: "0.24190761148929596" arg: "0.23126089572906494" arg: "0.18084779381752014" arg: "0.10192841291427612" arg: "-0.19622290134429932" arg: "0.2051597237586975" arg: "0.37465494871139526" arg: "0.2135562300682068"
+    arg: "0.20038118958473206" arg: "-0.02999887615442276" arg: "0.27408668398857117" arg: "0.0680413618683815" arg: "0.521243155002594" arg: "0.33456870913505554" arg: "0.11001615226268768" arg: "0.4307146668434143" arg: "0.3331093490123749" arg: "0.3356601595878601" arg: "0.05947132036089897" arg: "0.26261594891548157" arg: "0.15773697197437286" arg: "0.19511407613754272" arg: "-0.14718492329120636" arg: "-0.3149983882904053" arg: "-0.3379979133605957" arg: "-0.2451634705066681" arg: "-0.03271918371319771" arg: "0.2850451171398163" arg: "0.041262079030275345" arg: "0.05697742477059364" arg: "0.10827737301588058" arg: "-0.10368494689464569" arg: "-0.6196221113204956" arg: "-0.44554245471954346" arg: "-0.18561461567878723" arg: "0.07922625541687012"
+    arg: "0.020840616896748543" arg: "0.08754434436559677" arg: "-0.07456904649734497" arg: "0.0035276953130960464" arg: "0.27878332138061523" arg: "0.11924267560243607" arg: "-0.023688653483986855" arg: "0.049631692469120026" arg: "-0.18165265023708344" arg: "0.13382099568843842" arg: "-0.14947009086608887" arg: "-0.27999353408813477" arg: "0.11579195410013199" arg: "0.23342294991016388" arg: "0.26458871364593506" arg: "0.16960042715072632" arg: "0.2244962602853775" arg: "0.251582533121109" arg: "0.047410279512405396" arg: "-0.3055225610733032" arg: "-0.0922807902097702" arg: "-0.0008149942150339484" arg: "-0.0030961039010435343" arg: "0.3732677698135376" arg: "0.358204185962677" arg: "0.15451878309249878" arg: "0.28581294417381287" arg: "-0.008904639631509781"
+    arg: "0.01313134003430605" arg: "0.10318631678819656" arg: "0.12054811418056488" arg: "0.2703510820865631" arg: "-0.012742577120661736" arg: "-0.062323760241270065" arg: "0.09344484657049179" arg: "0.022521527484059334" arg: "0.15448175370693207" arg: "0.14388494193553925" arg: "-0.23548845946788788" arg: "-0.10205905884504318" arg: "-0.28026899695396423" arg: "-0.5158746838569641" arg: "-0.2526220381259918" arg: "-0.018526393920183182" arg: "-0.2256275862455368" arg: "-0.1908768117427826" arg: "-0.013978122733533382" arg: "-0.0744546428322792" arg: "-0.14520809054374695" arg: "-0.05685105547308922" arg: "0.02905760332942009" arg: "0.08797142654657364" arg: "-0.17073869705200195" arg: "0.1176731064915657" arg: "-0.07420363277196884" arg: "0.05669660493731499"
+    arg: "-0.2321733981370926" arg: "0.15404652059078217" arg: "-0.2614485025405884" arg: "-0.19935357570648193" arg: "-0.12706783413887024" arg: "0.13061459362506866" arg: "-0.04816088452935219" arg: "-0.06196342036128044" arg: "0.09632396697998047" arg: "0.5340875387191772" arg: "0.12526774406433105" arg: "0.018420275300741196" arg: "-0.012295903638005257" arg: "-0.07571853697299957" arg: "0.003750501200556755" arg: "0.21794241666793823" arg: "0.2857806086540222" arg: "-0.11748607456684113" arg: "0.010374456644058228" arg: "-0.1973239928483963" arg: "0.056574393063783646" arg: "-0.16627934575080872" arg: "-0.08241312205791473" arg: "-0.28860169649124146" arg: "-0.6715773940086365" arg: "-0.4119777977466583" arg: "-0.05268547311425209" arg: "0.09599226713180542"
+    arg: "-0.23489901423454285" arg: "-0.210902139544487" arg: "-0.4165542423725128" arg: "-0.1252453476190567" arg: "0.0586412139236927" arg: "-0.32756853103637695" arg: "-0.03619222715497017" arg: "-0.14618682861328125" arg: "-0.15533234179019928" arg: "-0.015258180908858776" arg: "0.23733921349048615" arg: "0.4892650842666626" arg: "0.9553998708724976" arg: "0.443565309047699" arg: "-0.022181924432516098" arg: "-0.09372548013925552" arg: "-0.04591056704521179" arg: "-0.03852088004350662" arg: "-0.18001845479011536" arg: "0.13030503690242767" arg: "0.33781635761260986" arg: "-0.00877282302826643" arg: "-0.11648697406053543" arg: "0.17854802310466766" arg: "0.33019575476646423" arg: "0.110745869576931" arg: "0.16787387430667877" arg: "-0.003876873990520835"
+    arg: "-0.23130182921886444" arg: "0.15203757584095" arg: "-0.1979227513074875" arg: "-0.27961796522140503" arg: "-0.16501222550868988" arg: "-0.1727743148803711" arg: "-0.08420325070619583" arg: "-0.03661131486296654" arg: "0.022991381585597992" arg: "0.3267931640148163" arg: "0.17916983366012573" arg: "-0.21305087208747864" arg: "-0.4732128381729126" arg: "-0.14580094814300537" arg: "0.21602611243724823" arg: "0.4339281916618347" arg: "0.021353665739297867" arg: "0.1897697150707245" arg: "-0.18888473510742188" arg: "0.10367560386657715" arg: "-0.17847439646720886" arg: "0.10388167202472687" arg: "-0.04788142070174217" arg: "-0.05652203410863876" arg: "-0.23099040985107422" arg: "-0.16391621530056" arg: "0.06451118737459183" arg: "0.13220994174480438"
+    arg: "0.2434273064136505" arg: "-0.11095169931650162" arg: "0.13803741335868835" arg: "0.21899642050266266" arg: "-0.05742649734020233" arg: "0.2324332445859909" arg: "0.3781294524669647" arg: "0.0380890890955925" arg: "0.34526804089546204" arg: "0.31572604179382324" arg: "0.18964345753192902" arg: "0.21782329678535461" arg: "0.23522818088531494" arg: "-0.3473344147205353" arg: "0.1344681978225708" arg: "-0.11885730922222137" arg: "-0.1880505084991455" arg: "-0.1195577010512352" arg: "-0.060020171105861664" arg: "0.289211243391037" arg: "0.012796066701412201" arg: "-0.03657015040516853" arg: "0.09980439394712448" arg: "0.3341149389743805" arg: "0.15382571518421173" arg: "-0.09110640734434128" arg: "-0.2671661674976349" arg: "-0.3361131548881531"
+    arg: "0.04869367554783821" arg: "-0.29407668113708496" arg: "-0.21786509454250336" arg: "-0.021602999418973923" arg: "0.055243782699108124" arg: "0.3378455638885498" arg: "0.22096234560012817" arg: "0.4419727325439453" arg: "0.21845094859600067" arg: "0.3761645555496216" arg: "-0.047762319445610046" arg: "-0.21078196167945862" arg: "-0.32595154643058777" arg: "-0.3116377592086792" arg: "-0.2809823453426361" arg: "-0.36722978949546814" arg: "-0.721940279006958" arg: "-0.4297282099723816" arg: "-0.48180773854255676" arg: "-0.4869074821472168" arg: "-0.5374395847320557" arg: "-0.28183409571647644" arg: "-0.18897175788879395" arg: "-0.2543872594833374" arg: "-0.27631592750549316" arg: "0.06477966159582138" arg: "0.2254650592803955" arg: "0.3261754810810089"
+    arg: "-0.18023546040058136" arg: "-0.3768996596336365" arg: "-0.22269578278064728" arg: "0.018682828173041344" arg: "-0.11290131509304047" arg: "-0.39474353194236755" arg: "-0.03385370224714279" arg: "0.21576857566833496" arg: "0.177229642868042" arg: "0.042106978595256805" arg: "-0.24501168727874756" arg: "0.08456140011548996" arg: "-0.2069004774093628" arg: "0.09091164171695709" arg: "0.10230324417352676" arg: "0.12411151826381683" arg: "-0.04576163366436958" arg: "-0.09247612208127975" arg: "-0.2126251608133316" arg: "0.17474356293678284" arg: "0.024457167834043503" arg: "0.044812608510255814" arg: "-0.24063478410243988" arg: "-0.18941839039325714" arg: "-0.060114409774541855" arg: "0.05715743824839592" arg: "-0.04718327894806862" arg: "-0.1155826672911644"
+  }
+}
+operand {
+  name: "input_to_cell_weights"
+  type: FLOAT32
+  shape { dim: 20 dim: 28 }
+  filler {
+    tag: "explicit"
+    arg: "-0.14322419464588165" arg: "-0.03543514385819435" arg: "-0.37075191736221313" arg: "-0.11541029065847397" arg: "-0.16138359904289246" arg: "0.12561601400375366" arg: "-0.02921752631664276" arg: "0.03920969367027283" arg: "-0.06703276187181473" arg: "0.1305120289325714" arg: "0.05381251871585846" arg: "0.004025470931082964" arg: "-0.23657721281051636" arg: "-0.18540850281715393" arg: "0.1416579633951187" arg: "0.5400514602661133" arg: "0.3134595453739166" arg: "-0.08414574712514877" arg: "-0.25919538736343384" arg: "-0.2152969390153885" arg: "-0.13050059974193573" arg: "-0.08802143484354019" arg: "-0.01510115247219801" arg: "0.008360753767192364" arg: "0.2349795401096344" arg: "0.1903218775987625" arg: "-0.06257583945989609" arg: "-0.28226447105407715"
+    arg: "0.10948476195335388" arg: "0.1869562417268753" arg: "-0.022029563784599304" arg: "-0.05470750108361244" arg: "0.26676875352859497" arg: "0.0009490635129623115" arg: "0.04640969634056091" arg: "0.17441663146018982" arg: "-0.15971945226192474" arg: "0.09322939813137054" arg: "0.12405950576066971" arg: "-0.07882469892501831" arg: "-0.03132086992263794" arg: "-0.3313288986682892" arg: "-0.26879292726516724" arg: "-0.22513961791992188" arg: "-0.17825880646705627" arg: "-0.28025585412979126" arg: "-0.026966359466314316" arg: "0.23260623216629028" arg: "-0.20367872714996338" arg: "0.04657059162855148" arg: "-0.1342175006866455" arg: "-0.039766665548086166" arg: "0.055720679461956024" arg: "-0.08040647208690643" arg: "-0.2785663306713104" arg: "-0.20658431947231293"
+    arg: "-0.2845764458179474" arg: "-0.12577861547470093" arg: "0.27772387862205505" arg: "0.31503239274024963" arg: "0.18268036842346191" arg: "0.08590365201234818" arg: "0.2304043024778366" arg: "-0.1624426394701004" arg: "-0.06931187212467194" arg: "0.06844695657491684" arg: "-0.21547579765319824" arg: "-0.25547099113464355" arg: "-0.17882278561592102" arg: "-0.05633649230003357" arg: "0.40184134244918823" arg: "0.22536933422088623" arg: "0.25438448786735535" arg: "0.0452042818069458" arg: "0.18264786899089813" arg: "0.08751898258924484" arg: "0.2497790902853012" arg: "0.10087688267230988" arg: "0.20765537023544312" arg: "0.14631463587284088" arg: "0.21937739849090576" arg: "0.473107248544693" arg: "0.4417702257633209" arg: "0.11503241211175919"
+    arg: "0.0008742575882934034" arg: "0.06207135319709778" arg: "-0.08988912403583527" arg: "0.05438712611794472" arg: "0.5871970653533936" arg: "0.48566651344299316" arg: "0.5400002002716064" arg: "0.6258884072303772" arg: "0.7265797853469849" arg: "0.7820643782615662" arg: "0.7500579953193665" arg: "0.711792528629303" arg: "0.5214825868606567" arg: "0.38864561915397644" arg: "0.14345985651016235" arg: "0.13323532044887543" arg: "-0.05931813269853592" arg: "0.08922331035137177" arg: "0.0952056497335434" arg: "0.06592926383018494" arg: "0.016503572463989258" arg: "0.0451623909175396" arg: "0.02558143064379692" arg: "-0.07171337306499481" arg: "0.016712144017219543" arg: "-0.2027517557144165" arg: "-0.2736990451812744" arg: "-0.47692665457725525"
+    arg: "-0.048994168639183044" arg: "-0.060884907841682434" arg: "0.05837605521082878" arg: "0.13532890379428864" arg: "0.08775680512189865" arg: "0.20183657109737396" arg: "0.20148716866970062" arg: "0.18878254294395447" arg: "0.053453002125024796" arg: "0.24967534840106964" arg: "0.05353411287069321" arg: "0.03676244989037514" arg: "0.02308550290763378" arg: "0.03319866210222244" arg: "0.26316580176353455" arg: "-0.08851990103721619" arg: "-0.02426389791071415" arg: "-0.5592305064201355" arg: "-0.6976458430290222" arg: "-0.30749642848968506" arg: "0.17886534333229065" arg: "-0.08636200428009033" arg: "0.3144587576389313" arg: "0.04458259046077728" arg: "0.03932805731892586" arg: "0.2133030742406845" arg: "0.46437597274780273" arg: "0.3068107068538666"
+    arg: "0.02069045975804329" arg: "0.09891658276319504" arg: "0.14216284453868866" arg: "-0.17771059274673462" arg: "0.017059292644262314" arg: "0.14904333651065826" arg: "-0.012604858726263046" arg: "0.20474286377429962" arg: "0.03844248130917549" arg: "0.03829401358962059" arg: "-0.08334273099899292" arg: "-0.3701476454734802" arg: "-0.4113644063472748" arg: "-0.142000213265419" arg: "0.1457018107175827" arg: "-0.13357846438884735" arg: "0.0584041103720665" arg: "-0.12736332416534424" arg: "-0.10378235578536987" arg: "-0.1306902915239334" arg: "-0.13070425391197205" arg: "-0.07379347831010818" arg: "-0.1626625508069992" arg: "-0.268823504447937" arg: "-0.11865845322608948" arg: "-0.17326758801937103" arg: "-0.538648784160614" arg: "-0.3999563753604889"
+    arg: "-0.0882401168346405" arg: "0.08313216269016266" arg: "0.2604030966758728" arg: "0.12791591882705688" arg: "-0.23485024273395538" arg: "-0.20249411463737488" arg: "-0.07451540231704712" arg: "0.10412992537021637" arg: "-0.0015763905830681324" arg: "0.1795114427804947" arg: "0.18463212251663208" arg: "0.23361526429653168" arg: "0.4148138165473938" arg: "0.4245856702327728" arg: "0.26099058985710144" arg: "-0.21013644337654114" arg: "-0.07617087662220001" arg: "-0.08196636289358139" arg: "-0.1636158674955368" arg: "-0.08406206220388412" arg: "-0.1299818903207779" arg: "-0.05562596768140793" arg: "0.11821522563695908" arg: "0.14262455701828003" arg: "0.1502220183610916" arg: "0.5341688394546509" arg: "0.21171703934669495" arg: "-0.07061432301998138"
+    arg: "0.25366824865341187" arg: "0.24823081493377686" arg: "0.16083313524723053" arg: "0.2527167499065399" arg: "0.23111572861671448" arg: "0.058159034699201584" arg: "-0.09976936876773834" arg: "-0.05263379588723183" arg: "0.03194894641637802" arg: "0.0328059084713459" arg: "-0.02465866319835186" arg: "-0.31380391120910645" arg: "-0.2766170799732208" arg: "-0.3121233880519867" arg: "-0.12022770196199417" arg: "-0.08422422409057617" arg: "0.09094454348087311" arg: "0.03537796065211296" arg: "-0.003948139026761055" arg: "0.08172310143709183" arg: "-0.027632299810647964" arg: "-0.08769393712282181" arg: "0.33617734909057617" arg: "0.3525821268558502" arg: "-0.089286670088768" arg: "-0.11947164684534073" arg: "0.05121488869190216" arg: "0.0871497243642807"
+    arg: "0.13053961098194122" arg: "0.02525678649544716" arg: "0.2623661160469055" arg: "-0.031171713024377823" arg: "0.03309366852045059" arg: "0.26168009638786316" arg: "0.2352420836687088" arg: "0.20169517397880554" arg: "0.04541448503732681" arg: "0.23629099130630493" arg: "0.21913982927799225" arg: "0.10123161971569061" arg: "0.021161029115319252" arg: "-0.0648307353258133" arg: "0.059982750564813614" arg: "0.05455026403069496" arg: "-0.031897980719804764" arg: "0.17262086272239685" arg: "0.018163474276661873" arg: "-0.1253158450126648" arg: "0.09696071594953537" arg: "0.08505688607692719" arg: "-0.07572611421346664" arg: "0.1470259726047516" arg: "0.38780710101127625" arg: "0.24831682443618774" arg: "0.2748945355415344" arg: "-0.05742323026061058"
+    arg: "-0.12352827936410904" arg: "-0.05199163407087326" arg: "0.32428112626075745" arg: "0.05729059875011444" arg: "-0.008717969991266727" arg: "-0.05560842528939247" arg: "0.15452425181865692" arg: "0.19150300323963165" arg: "0.27473723888397217" arg: "0.21984589099884033" arg: "0.17149868607521057" arg: "0.10660523176193237" arg: "-0.14198485016822815" arg: "-0.20040656626224518" arg: "-0.2936631441116333" arg: "-0.2659694254398346" arg: "-0.10012303292751312" arg: "0.18225309252738953" arg: "0.301752507686615" arg: "0.07418902218341827" arg: "0.415781170129776" arg: "0.20212115347385406" arg: "0.40611782670021057" arg: "0.4555768370628357" arg: "0.5562334656715393" arg: "0.28083983063697815" arg: "-0.0601402223110199" arg: "-0.006858934182673693"
+    arg: "-0.1211753711104393" arg: "-0.2732102572917938" arg: "0.05482182651758194" arg: "-0.21921852231025696" arg: "0.05309062823653221" arg: "-0.1784476339817047" arg: "-0.2689800560474396" arg: "-0.3562871515750885" arg: "-0.18823957443237305" arg: "-0.29951784014701843" arg: "-0.13354501128196716" arg: "0.11218584328889847" arg: "0.3258461654186249" arg: "0.22131332755088806" arg: "0.40942832827568054" arg: "0.21726171672344208" arg: "-0.026453329250216484" arg: "0.2588849663734436" arg: "-0.011599023826420307" arg: "0.13138829171657562" arg: "-0.02606634423136711" arg: "-0.022416120395064354" arg: "-0.21375882625579834" arg: "-0.07201182097196579" arg: "-0.07097262144088745" arg: "-0.21650902926921844" arg: "0.10855670273303986" arg: "-0.22552341222763062"
+    arg: "-0.018311869353055954" arg: "-0.01195024698972702" arg: "-0.18364953994750977" arg: "-0.11891163140535355" arg: "-0.08975464850664139" arg: "-0.03872677683830261" arg: "-0.1561228632926941" arg: "0.08454786241054535" arg: "0.07633733749389648" arg: "-0.0810701847076416" arg: "0.07330472767353058" arg: "0.08883491903543472" arg: "0.10723859816789627" arg: "0.13773062825202942" arg: "-0.022353384643793106" arg: "-0.06568673998117447" arg: "-0.10638830810785294" arg: "-0.4936034679412842" arg: "-0.3799048960208893" arg: "-0.34529590606689453" arg: "-0.5483863353729248" arg: "-0.21261106431484222" arg: "-0.525896430015564" arg: "-0.27971628308296204" arg: "-0.45189738273620605" arg: "-0.11105624586343765" arg: "-0.13174773752689362" arg: "-0.2162051945924759"
+    arg: "-0.05546094477176666" arg: "0.07048869132995605" arg: "-0.009296463802456856" arg: "0.17465157806873322" arg: "-0.13712915778160095" arg: "-0.18633928894996643" arg: "-0.09989194571971893" arg: "-0.1971769630908966" arg: "-0.0062749506905674934" arg: "-0.06607092171907425" arg: "-0.051594603806734085" arg: "0.09821145981550217" arg: "0.12090478837490082" arg: "0.2579136788845062" arg: "-0.11981549859046936" arg: "-0.16096201539039612" arg: "-0.20961809158325195" arg: "-0.3041784167289734" arg: "-0.12349500507116318" arg: "0.012003187090158463" arg: "-0.008235737681388855" arg: "0.157791867852211" arg: "0.034064337611198425" arg: "0.37696516513824463" arg: "0.41845211386680603" arg: "0.34168341755867004" arg: "0.1698485016822815" arg: "0.12188931554555893"
+    arg: "0.2781696915626526" arg: "-0.16692659258842468" arg: "0.19220511615276337" arg: "0.2646377682685852" arg: "0.430692583322525" arg: "0.1798837035894394" arg: "0.304645836353302" arg: "0.2644904553890228" arg: "0.4548005759716034" arg: "0.11960816383361816" arg: "0.28225284814834595" arg: "0.2256070226430893" arg: "0.11082617938518524" arg: "0.010454414412379265" arg: "0.029175851494073868" arg: "-0.07078122347593307" arg: "0.1820518672466278" arg: "-0.044396884739398956" arg: "0.19291731715202332" arg: "0.16714687645435333" arg: "-0.07964447140693665" arg: "0.17365328967571259" arg: "0.04092591255903244" arg: "0.11975318193435669" arg: "0.295398473739624" arg: "0.2982410788536072" arg: "-0.02866576984524727" arg: "0.016789700835943222"
+    arg: "-0.012119884602725506" arg: "0.05438081920146942" arg: "-0.22899110615253448" arg: "-0.2591288983821869" arg: "-0.054602570831775665" arg: "-0.14894776046276093" arg: "0.1144491583108902" arg: "-0.12802022695541382" arg: "0.2082153707742691" arg: "0.2738417685031891" arg: "0.2696736752986908" arg: "0.20811103284358978" arg: "-0.23155954480171204" arg: "-0.16073471307754517" arg: "0.19584868848323822" arg: "0.33939072489738464" arg: "0.3128233253955841" arg: "0.235674649477005" arg: "0.2830577492713928" arg: "0.006479979958385229" arg: "0.044619105756282806" arg: "-0.1676308512687683" arg: "-0.2677950859069824" arg: "-0.43867629766464233" arg: "-0.011492089368402958" arg: "0.15210457146167755" arg: "0.07373664528131485" arg: "-0.1156914010643959"
+    arg: "-0.013776483945548534" arg: "-0.018127629533410072" arg: "0.05377393960952759" arg: "0.29386937618255615" arg: "0.2213279902935028" arg: "0.07069018483161926" arg: "0.21774591505527496" arg: "0.2090938538312912" arg: "0.24711604416370392" arg: "0.6695317625999451" arg: "0.46329638361930847" arg: "0.09031569212675095" arg: "0.18645460903644562" arg: "-0.30952781438827515" arg: "0.03084215708076954" arg: "-0.07405883073806763" arg: "0.23570561408996582" arg: "-0.042296942323446274" arg: "0.06679017841815948" arg: "0.04472753405570984" arg: "0.10317760705947876" arg: "0.02574877440929413" arg: "0.16524934768676758" arg: "-0.18901412189006805" arg: "-0.002970139030367136" arg: "0.1698213666677475" arg: "-0.058196987956762314" arg: "-0.16562488675117493"
+    arg: "-0.05622567981481552" arg: "-0.15602625906467438" arg: "0.03411481902003288" arg: "0.05091657117009163" arg: "-0.06706684082746506" arg: "0.2647324204444885" arg: "-0.1097978800535202" arg: "-0.01716734655201435" arg: "0.07656659185886383" arg: "0.08834546059370041" arg: "0.10541308671236038" arg: "0.09015700966119766" arg: "-0.05200522020459175" arg: "-0.5801278948783875" arg: "-0.28525060415267944" arg: "0.23951329290866852" arg: "0.15975099802017212" arg: "0.07012801617383957" arg: "-0.008377078920602798" arg: "-0.05613056570291519" arg: "0.017154719680547714" arg: "-0.11925014108419418" arg: "-0.1528388410806656" arg: "-0.08968795090913773" arg: "0.10360284149646759" arg: "-0.0059541938826441765" arg: "-0.15356747806072235" arg: "-0.06125378981232643"
+    arg: "-0.2849915325641632" arg: "0.04452868923544884" arg: "-0.1001860573887825" arg: "-0.051032423973083496" arg: "-0.3187020719051361" arg: "-0.4221560060977936" arg: "-0.026225173845887184" arg: "-0.17684528231620789" arg: "-0.23219993710517883" arg: "-0.31752654910087585" arg: "-0.16388170421123505" arg: "-0.408907026052475" arg: "-0.22536908090114594" arg: "-0.06049299240112305" arg: "0.16050300002098083" arg: "-0.0070655131712555885" arg: "-0.04127126559615135" arg: "0.025422271341085434" arg: "0.14560039341449738" arg: "0.016476156190037727" arg: "-0.11462834477424622" arg: "0.04149504005908966" arg: "-0.028964219614863396" arg: "-0.17375178635120392" arg: "0.2279641181230545" arg: "0.16986951231956482" arg: "-0.021325843408703804" arg: "0.07627731561660767"
+    arg: "0.26788678765296936" arg: "0.18480995297431946" arg: "0.27279922366142273" arg: "0.09604113548994064" arg: "0.11791739612817764" arg: "0.158638134598732" arg: "-0.06370259821414948" arg: "0.012692139483988285" arg: "-0.12293782830238342" arg: "0.15729208290576935" arg: "-0.03052298165857792" arg: "0.011496515944600105" arg: "-0.31932440400123596" arg: "-0.400392085313797" arg: "-0.3828366696834564" arg: "-0.03249126672744751" arg: "0.19932818412780762" arg: "0.15364520251750946" arg: "0.04440109804272652" arg: "0.08241084218025208" arg: "0.3154240846633911" arg: "0.30371105670928955" arg: "0.2279568612575531" arg: "0.15496046841144562" arg: "0.20785588026046753" arg: "0.061398304998874664" arg: "-0.4503801465034485" arg: "-0.29032525420188904"
+    arg: "-0.14293214678764343" arg: "-0.07592987269163132" arg: "-0.13592901825904846" arg: "0.0590178519487381" arg: "0.05973733589053154" arg: "0.16063377261161804" arg: "0.0970887765288353" arg: "0.005558301229029894" arg: "-0.06746333837509155" arg: "-0.08073955029249191" arg: "-0.11676499992609024" arg: "-0.030398180708289146" arg: "0.020192358642816544" arg: "-0.20045405626296997" arg: "-0.33653098344802856" arg: "-0.009017355740070343" arg: "-0.20934724807739258" arg: "0.1834343820810318" arg: "-0.2903430759906769" arg: "-0.12888988852500916" arg: "-0.39840051531791687" arg: "-0.19070746004581451" arg: "-0.40943092107772827" arg: "-0.2717587947845459" arg: "0.20603778958320618" arg: "-0.3883904814720154" arg: "-0.039434246718883514" arg: "-0.082768514752388"
+  }
+}
+operand {
+  name: "input_to_output_weights"
+  type: FLOAT32
+  shape { dim: 20 dim: 28 }
+  filler {
+    tag: "explicit"
+    arg: "-0.13361161947250366" arg: "0.2535432279109955" arg: "-0.12359361350536346" arg: "-0.01544901356101036" arg: "-0.1801277995109558" arg: "-0.17746007442474365" arg: "-0.15477000176906586" arg: "0.05789067968726158" arg: "-0.05010955408215523" arg: "0.16625314950942993" arg: "-0.05529220774769783" arg: "0.13654044270515442" arg: "0.08228389918804169" arg: "-0.2581821084022522" arg: "-0.2971959412097931" arg: "0.019851312041282654" arg: "0.06781011819839478" arg: "0.256449818611145" arg: "0.9392919540405273" arg: "0.6971920132637024" arg: "0.9516055583953857" arg: "0.564976692199707" arg: "0.4223553538322449" arg: "-0.4222773611545563" arg: "-0.08974764496088028" arg: "-0.24621441960334778" arg: "-0.39981308579444885" arg: "-0.22260607779026031"
+    arg: "-0.08982681483030319" arg: "0.031929973512887955" arg: "0.07003000378608704" arg: "0.1441173106431961" arg: "-0.3362192213535309" arg: "0.002978335367515683" arg: "-0.17029152810573578" arg: "0.09627201408147812" arg: "0.08026549220085144" arg: "-0.0912403017282486" arg: "0.2660815119743347" arg: "0.759759247303009" arg: "0.5078319311141968" arg: "0.5035433173179626" arg: "0.3447149693965912" arg: "-0.003492701565846801" arg: "0.31759369373321533" arg: "0.009249270893633366" arg: "0.1448427438735962" arg: "-0.2437012791633606" arg: "-0.23719677329063416" arg: "-0.4108390510082245" arg: "-0.021009216085076332" arg: "-0.06669881194829941" arg: "-0.27399614453315735" arg: "-0.3611904978752136" arg: "-0.10668569058179855" arg: "0.04301835596561432"
+    arg: "0.14136508107185364" arg: "-0.20753467082977295" arg: "-0.12640978395938873" arg: "-0.0813758373260498" arg: "0.061406463384628296" arg: "0.22040338814258575" arg: "-0.10975504666566849" arg: "0.2034149467945099" arg: "0.045333147048950195" arg: "0.2408442199230194" arg: "-0.07402602583169937" arg: "-0.05966627970337868" arg: "0.222041517496109" arg: "-0.0912318304181099" arg: "0.5839067697525024" arg: "0.638180136680603" arg: "1.0717300176620483" arg: "0.8844493627548218" arg: "0.9932028651237488" arg: "0.7096611857414246" arg: "0.6521549224853516" arg: "0.2312447875738144" arg: "0.3880407214164734" arg: "0.48241227865219116" arg: "0.5459808111190796" arg: "0.21763679385185242" arg: "0.10172371566295624" arg: "0.02386627160012722"
+    arg: "0.22363805770874023" arg: "0.055896203964948654" arg: "0.061871618032455444" arg: "-0.17662213742733002" arg: "0.26071617007255554" arg: "0.47994768619537354" arg: "0.4157676696777344" arg: "0.3473230302333832" arg: "0.16535861790180206" arg: "0.3827962279319763" arg: "0.0706290602684021" arg: "0.38254064321517944" arg: "0.28826048970222473" arg: "0.3427451252937317" arg: "0.6095309257507324" arg: "0.3840809166431427" arg: "0.42494848370552063" arg: "0.5521677732467651" arg: "0.36389851570129395" arg: "0.32664236426353455" arg: "0.5916943550109863" arg: "0.2420167326927185" arg: "0.5305866599082947" arg: "0.2550817131996155" arg: "-0.2333785742521286" arg: "-0.472649484872818" arg: "-0.3964008092880249" arg: "-0.1272299885749817"
+    arg: "0.03291049599647522" arg: "0.0799076184630394" arg: "-0.14570799469947815" arg: "0.45667150616645813" arg: "0.2630525827407837" arg: "0.10890132933855057" arg: "-0.054551925510168076" arg: "0.0046729231253266335" arg: "0.21604961156845093" arg: "0.2429116815328598" arg: "-0.04739723354578018" arg: "-0.01825188286602497" arg: "-0.08609837293624878" arg: "0.20959249138832092" arg: "0.06693773716688156" arg: "-0.2576264441013336" arg: "-0.524071216583252" arg: "-0.23866695165634155" arg: "-0.110318124294281" arg: "0.21306151151657104" arg: "0.22546638548374176" arg: "0.04744942858815193" arg: "0.02165571227669716" arg: "-0.06357958167791367" arg: "-0.29460445046424866" arg: "0.09107953310012817" arg: "0.3577098548412323" arg: "0.2221963107585907"
+    arg: "0.01575949229300022" arg: "0.14965087175369263" arg: "0.1605227142572403" arg: "-0.1556941121816635" arg: "-0.16914859414100647" arg: "-0.01264619454741478" arg: "0.08487699925899506" arg: "-0.02691168338060379" arg: "-0.13130810856819153" arg: "0.1136908307671547" arg: "0.20643149316310883" arg: "0.5454611778259277" arg: "0.7623800039291382" arg: "0.7830140590667725" arg: "0.7355128526687622" arg: "0.6468384265899658" arg: "-0.00944082997739315" arg: "0.05574082210659981" arg: "0.02945263683795929" arg: "0.1267295628786087" arg: "0.20847344398498535" arg: "0.0877644270658493" arg: "0.5400432348251343" arg: "0.523216187953949" arg: "0.24031612277030945" arg: "-0.0941137745976448" arg: "-0.226273775100708" arg: "0.17517033219337463"
+    arg: "0.22727088630199432" arg: "-0.04092717543244362" arg: "-0.14521321654319763" arg: "-0.0876765251159668" arg: "-0.07828030735254288" arg: "-0.239556223154068" arg: "-0.28606486320495605" arg: "0.0778198391199112" arg: "-0.07059259712696075" arg: "0.01951042376458645" arg: "0.23620696365833282" arg: "0.1878870725631714" arg: "0.33765503764152527" arg: "0.476667582988739" arg: "-0.06353191286325455" arg: "0.017065072432160378" arg: "0.1915859431028366" arg: "0.5179688334465027" arg: "0.11117789149284363" arg: "0.20390978455543518" arg: "0.20907467603683472" arg: "0.21745766699314117" arg: "-0.33880436420440674" arg: "-0.39202067255973816" arg: "-0.6140245795249939" arg: "-0.015891058370471" arg: "-0.37634962797164917" arg: "-0.14018163084983826"
+    arg: "0.08723758161067963" arg: "-0.18214963376522064" arg: "0.04190217703580856" arg: "-0.05375084653496742" arg: "-0.09821449220180511" arg: "0.17345662415027618" arg: "-0.043591149151325226" arg: "0.2588083744049072" arg: "0.15015676617622375" arg: "0.39709022641181946" arg: "0.5637708902359009" arg: "0.44008538126945496" arg: "0.12622207403182983" arg: "-0.11229363828897476" arg: "-0.3005681335926056" arg: "-0.19021284580230713" arg: "-0.01430613361299038" arg: "0.08379453420639038" arg: "0.3318374454975128" arg: "-0.28072816133499146" arg: "-0.48265987634658813" arg: "-0.024674715474247932" arg: "0.04502909258008003" arg: "-0.17233917117118835" arg: "-0.11240006238222122" arg: "-0.2408943474292755" arg: "-0.3078864514827728" arg: "-0.16831211745738983"
+    arg: "0.12704502046108246" arg: "0.00693404208868742" arg: "0.3220159411430359" arg: "0.5156370997428894" arg: "0.3838233947753906" arg: "0.1953398436307907" arg: "0.17885588109493256" arg: "-0.09935329854488373" arg: "0.22020603716373444" arg: "0.2726762294769287" arg: "0.6566031575202942" arg: "0.4547414183616638" arg: "0.774775505065918" arg: "0.1015879288315773" arg: "0.11886601150035858" arg: "-0.05910573527216911" arg: "0.19138272106647491" arg: "0.33189404010772705" arg: "0.46459195017814636" arg: "0.5477021336555481" arg: "-0.03558523580431938" arg: "-0.26250338554382324" arg: "-0.18095482885837555" arg: "-0.6443997621536255" arg: "-0.5638570785522461" arg: "0.0682566836476326" arg: "0.1611909568309784" arg: "0.05711650475859642"
+    arg: "0.19428735971450806" arg: "0.18257075548171997" arg: "0.19051998853683472" arg: "0.0003557652235031128" arg: "0.23444350063800812" arg: "0.269832044839859" arg: "0.3485797345638275" arg: "0.4130585491657257" arg: "-0.15780729055404663" arg: "-0.33516737818717957" arg: "0.0878380611538887" arg: "0.01651749014854431" arg: "-0.32947224378585815" arg: "-0.09176459908485413" arg: "0.5332760214805603" arg: "0.2153072953224182" arg: "0.17049799859523773" arg: "0.1438971608877182" arg: "0.41453301906585693" arg: "0.03268708288669586" arg: "-0.10276871919631958" arg: "-0.051143575459718704" arg: "0.5329627394676208" arg: "0.626736044883728" arg: "0.6361001133918762" arg: "0.2502281665802002" arg: "0.07913760840892792" arg: "0.007263735868036747"
+    arg: "0.24060982465744019" arg: "-0.05091336369514465" arg: "0.035534802824258804" arg: "-0.07280046492815018" arg: "-0.1301843523979187" arg: "0.11763674765825272" arg: "0.10463829338550568" arg: "0.27148929238319397" arg: "-0.09400584548711777" arg: "-0.1651712954044342" arg: "0.17628471553325653" arg: "-0.08193076401948929" arg: "-0.15348084270954132" arg: "0.36272093653678894" arg: "0.34180593490600586" arg: "0.22390322387218475" arg: "0.824752151966095" arg: "0.778559148311615" arg: "0.8345740437507629" arg: "0.6250406503677368" arg: "0.8019649982452393" arg: "0.4851066470146179" arg: "0.5757401585578918" arg: "0.2889276444911957" arg: "0.2547096610069275" arg: "0.3385365605354309" arg: "0.1420331448316574" arg: "0.36819931864738464"
+    arg: "0.2515231668949127" arg: "0.030855854973196983" arg: "-0.10836786776781082" arg: "0.10643685609102249" arg: "-0.09548310190439224" arg: "0.04187479987740517" arg: "0.31773850321769714" arg: "0.3311438262462616" arg: "0.5351244211196899" arg: "0.1878986656665802" arg: "0.30104926228523254" arg: "0.4463382959365845" arg: "0.281780868768692" arg: "-0.1471078246831894" arg: "0.1783357411623001" arg: "0.07812053710222244" arg: "0.18911871314048767" arg: "0.4206305146217346" arg: "0.6459701657295227" arg: "0.4032178223133087" arg: "0.5664452314376831" arg: "0.33140894770622253" arg: "-0.025404682382941246" arg: "-0.3444240987300873" arg: "-0.2518601715564728" arg: "-0.5755497813224792" arg: "-0.42604678869247437" arg: "-0.09392133355140686"
+    arg: "0.2687152326107025" arg: "0.4339213967323303" arg: "0.2603331208229065" arg: "-0.025584589689970016" arg: "0.02291446179151535" arg: "0.21942569315433502" arg: "0.5105418562889099" arg: "0.24948522448539734" arg: "0.34695175290107727" arg: "0.32354483008384705" arg: "0.038374610245227814" arg: "-0.015621446073055267" arg: "0.41838541626930237" arg: "0.5760942697525024" arg: "0.5837766528129578" arg: "0.6488270163536072" arg: "0.03806944563984871" arg: "-0.30252325534820557" arg: "-0.5109604001045227" arg: "-0.6022301912307739" arg: "-0.4811290502548218" arg: "-0.23938359320163727" arg: "0.15395738184452057" arg: "0.31103089451789856" arg: "0.33056965470314026" arg: "0.3005286157131195" arg: "0.5069742798805237" arg: "-0.21318034827709198"
+    arg: "0.12214536964893341" arg: "-0.15474587678909302" arg: "0.3912317454814911" arg: "0.5372982621192932" arg: "0.4572385549545288" arg: "0.16643570363521576" arg: "0.0814824178814888" arg: "0.3027104139328003" arg: "0.23147137463092804" arg: "0.3813971281051636" arg: "0.15540477633476257" arg: "0.08324414491653442" arg: "0.519047200679779" arg: "0.37160855531692505" arg: "0.6911864280700684" arg: "0.21933679282665253" arg: "-0.009083807468414307" arg: "0.4009028375148773" arg: "0.5206535458564758" arg: "0.2980058789253235" arg: "0.4483773112297058" arg: "0.5295672416687012" arg: "0.6978735327720642" arg: "0.33932334184646606" arg: "0.3536893129348755" arg: "0.4484431743621826" arg: "-0.09894105792045593" arg: "-0.017690571025013924"
+    arg: "-0.1264471709728241" arg: "0.004865952301770449" arg: "-0.020671315491199493" arg: "-0.3312399685382843" arg: "-0.1591784507036209" arg: "0.22974173724651337" arg: "0.11263400316238403" arg: "-0.058258578181266785" arg: "-0.17727409303188324" arg: "0.22839052975177765" arg: "0.14666402339935303" arg: "-0.11730131506919861" arg: "0.05218665674328804" arg: "0.1637987196445465" arg: "0.13374973833560944" arg: "0.35711121559143066" arg: "0.5225153565406799" arg: "0.29755473136901855" arg: "0.2492614984512329" arg: "-0.18074239790439606" arg: "-0.14335356652736664" arg: "0.0032210154458880424" arg: "0.1897306889295578" arg: "0.22594426572322845" arg: "-0.5188416838645935" arg: "-0.35428524017333984" arg: "-0.030699916183948517" arg: "-0.08329521119594574"
+    arg: "0.0687800794839859" arg: "0.14417889714241028" arg: "0.08634546399116516" arg: "0.4291350245475769" arg: "0.2597505748271942" arg: "0.17120565474033356" arg: "0.006312726065516472" arg: "-0.08433850854635239" arg: "0.024435490369796753" arg: "0.01323175523430109" arg: "0.442842036485672" arg: "0.3250333070755005" arg: "0.36304062604904175" arg: "0.27663564682006836" arg: "0.554750382900238" arg: "0.40236881375312805" arg: "0.19184589385986328" arg: "0.4051419496536255" arg: "0.3190324902534485" arg: "0.202935591340065" arg: "0.23588521778583527" arg: "-0.060444705188274384" arg: "-0.1226918026804924" arg: "0.1633310168981552" arg: "0.13458260893821716" arg: "0.11727706342935562" arg: "0.3460041284561157" arg: "0.06878886371850967"
+    arg: "0.04063422232866287" arg: "-0.006821052171289921" arg: "0.3323805630207062" arg: "0.26635101437568665" arg: "0.3779240548610687" arg: "0.11733505874872208" arg: "-0.10527531802654266" arg: "-0.07571443170309067" arg: "-0.08959870040416718" arg: "0.34649038314819336" arg: "0.11251195520162582" arg: "-0.004889118485152721" arg: "-0.12246599048376083" arg: "-0.007686110679060221" arg: "-0.15067224204540253" arg: "-0.1337168663740158" arg: "-0.13265375792980194" arg: "-0.26213783025741577" arg: "0.011739661917090416" arg: "-0.2025691419839859" arg: "-0.09236078709363937" arg: "0.1839291900396347" arg: "0.039318203926086426" arg: "0.15644147992134094" arg: "0.08048530668020248" arg: "0.027235517278313637" arg: "-0.07634953409433365" arg: "-0.010376683436334133"
+    arg: "0.08199792355298996" arg: "-0.1779499351978302" arg: "0.07380770146846771" arg: "-0.0894157737493515" arg: "0.05990520119667053" arg: "0.2665881812572479" arg: "0.03642373904585838" arg: "-0.03619125485420227" arg: "0.011516132391989231" arg: "-0.213419109582901" arg: "0.01569538190960884" arg: "-0.1833057552576065" arg: "0.2472868114709854" arg: "0.8453921675682068" arg: "0.9291183948516846" arg: "0.9661735892295837" arg: "0.6590875387191772" arg: "0.6245248913764954" arg: "0.2572375535964966" arg: "-0.24237875640392303" arg: "0.010541471652686596" arg: "-0.22616639733314514" arg: "-0.027475513517856598" arg: "0.17520084977149963" arg: "-0.2040407955646515" arg: "0.046643588691949844" arg: "-0.263759046792984" arg: "-0.187980055809021"
+    arg: "-0.1663954257965088" arg: "0.13280753791332245" arg: "0.24432727694511414" arg: "-0.1797582507133484" arg: "-0.1539366990327835" arg: "0.4611888825893402" arg: "0.20097453892230988" arg: "0.32330435514450073" arg: "0.2807258665561676" arg: "0.31781595945358276" arg: "0.5915217995643616" arg: "0.3510398864746094" arg: "0.32014137506484985" arg: "0.355925977230072" arg: "0.20221084356307983" arg: "-0.06906703114509583" arg: "-0.1754205822944641" arg: "-0.04989638179540634" arg: "-0.363330215215683" arg: "0.0022096107713878155" arg: "-0.35765916109085083" arg: "0.12441515922546387" arg: "0.2386118769645691" arg: "0.14590708911418915" arg: "-0.20669437944889069" arg: "-0.13032864034175873" arg: "0.15810780227184296" arg: "0.10193713754415512"
+    arg: "0.3005771040916443" arg: "0.3376172184944153" arg: "0.15595310926437378" arg: "0.03881113976240158" arg: "0.049130022525787354" arg: "0.0412493497133255" arg: "0.5658847093582153" arg: "0.7288451194763184" arg: "0.08432513475418091" arg: "0.20872049033641815" arg: "0.2760712504386902" arg: "0.3288831114768982" arg: "0.29803207516670227" arg: "-0.14779740571975708" arg: "-0.04237861558794975" arg: "0.2661236822605133" arg: "0.5379334688186646" arg: "0.5947390198707581" arg: "-0.0807252898812294" arg: "-0.28580325841903687" arg: "-0.5205297470092773" arg: "-0.4381696879863739" arg: "-0.15092800557613373" arg: "0.048043206334114075" arg: "0.09882169216871262" arg: "0.17096076905727386" arg: "-0.2508130967617035" arg: "0.05531834065914154"
+  }
+}
+operand {
+  name: "recurrent_to_input_weights"
+  type: FLOAT32
+  shape { dim: 20 dim: 20 }
+  filler {
+    tag: "explicit"
+    arg: "-0.4770118296146393" arg: "0.21366995573043823" arg: "0.46016961336135864" arg: "0.3251325190067291" arg: "0.13871631026268005" arg: "0.21228066086769104" arg: "-0.23697999119758606" arg: "-0.1725820153951645" arg: "0.16972437500953674" arg: "0.011942930519580841" arg: "0.2916385233402252" arg: "0.2334175705909729" arg: "0.8671041131019592" arg: "0.2224881947040558" arg: "-0.15734145045280457" arg: "-0.06410238891839981" arg: "-0.2018616646528244" arg: "0.2584409713745117" arg: "-0.3724243938922882" arg: "-0.3660326302051544"
+    arg: "-0.3494759500026703" arg: "0.4290483593940735" arg: "-0.11313329637050629" arg: "-0.059981122612953186" arg: "-0.14534664154052734" arg: "0.13317358493804932" arg: "-0.31460973620414734" arg: "0.4654754102230072" arg: "0.5217755436897278" arg: "0.36345264315605164" arg: "-0.06475342065095901" arg: "-0.611393392086029" arg: "-0.35462483763694763" arg: "0.21069355309009552" arg: "0.05305428430438042" arg: "-0.08702543377876282" arg: "-0.0562891848385334" arg: "-0.13899517059326172" arg: "0.3408608138561249" arg: "0.09720013290643692"
+    arg: "-0.4325777590274811" arg: "0.09612088650465012" arg: "0.11070075631141663" arg: "0.14977702498435974" arg: "-0.282085120677948" arg: "0.5554621815681458" arg: "-0.09023251384496689" arg: "0.12265370041131973" arg: "-0.04457980766892433" arg: "0.2098589986562729" arg: "0.21766719222068787" arg: "-0.19030693173408508" arg: "0.1261812448501587" arg: "-0.04023653268814087" arg: "0.09346041828393936" arg: "-0.24297039210796356" arg: "0.2858717441558838" arg: "-0.07445301115512848" arg: "-0.26624348759651184" arg: "-0.013775470666587353"
+    arg: "-0.06638309359550476" arg: "0.037332624197006226" arg: "-0.5706251263618469" arg: "0.6612618565559387" arg: "-0.21594902873039246" arg: "0.09637858718633652" arg: "0.4018155038356781" arg: "0.1754107028245926" arg: "0.18305723369121552" arg: "0.409424751996994" arg: "0.13018175959587097" arg: "0.45919686555862427" arg: "-0.3296961486339569" arg: "-0.27102723717689514" arg: "0.15050001442432404" arg: "-0.06254072487354279" arg: "-0.36290204524993896" arg: "-0.3503246605396271" arg: "0.40385496616363525" arg: "-0.32791852951049805"
+    arg: "0.009967965073883533" arg: "0.11481080204248428" arg: "0.25612783432006836" arg: "0.12932859361171722" arg: "0.2397402822971344" arg: "0.10742141306400299" arg: "0.505053699016571" arg: "-0.2892862856388092" arg: "0.4383614659309387" arg: "0.025106344372034073" arg: "0.43115267157554626" arg: "0.3953210115432739" arg: "0.13628928363323212" arg: "-0.1098363846540451" arg: "-0.11229805648326874" arg: "-0.09960231184959412" arg: "-0.0775841549038887" arg: "0.08288741856813431" arg: "0.21580594778060913" arg: "-0.07877210527658463"
+    arg: "-0.2979702353477478" arg: "-0.051671307533979416" arg: "0.05603557080030441" arg: "-0.07683657109737396" arg: "0.05431858450174332" arg: "0.5497500896453857" arg: "-0.3987792432308197" arg: "-0.18461892008781433" arg: "-0.021399449557065964" arg: "-0.08834270387887955" arg: "-0.38572776317596436" arg: "0.2081238031387329" arg: "0.11147842556238174" arg: "-0.18445543944835663" arg: "-0.14360877871513367" arg: "-0.02369718812406063" arg: "0.13527068495750427" arg: "0.15338219702243805" arg: "-0.33769914507865906" arg: "0.12053044140338898"
+    arg: "-0.31656137108802795" arg: "0.022205566987395287" arg: "-1.0957515239715576" arg: "-0.07192184031009674" arg: "-0.3568742573261261" arg: "0.08484519273042679" arg: "0.3926958739757538" arg: "0.15565809607505798" arg: "-0.08836834877729416" arg: "0.27079248428344727" arg: "0.038257431238889694" arg: "-0.08028512448072433" arg: "0.29435107111930847" arg: "-0.07749350368976593" arg: "-0.34698745608329773" arg: "-0.2787376940250397" arg: "0.1498851329088211" arg: "-0.20681093633174896" arg: "0.20965063571929932" arg: "0.14793993532657623"
+    arg: "-0.45168495178222656" arg: "-0.19468553364276886" arg: "0.004608047194778919" arg: "0.3809444308280945" arg: "-0.20797111093997955" arg: "-0.120585598051548" arg: "0.2809143364429474" arg: "0.06513983756303787" arg: "0.18850207328796387" arg: "0.2409287691116333" arg: "0.2608538866043091" arg: "-0.37541523575782776" arg: "-0.023761505261063576" arg: "0.1088205948472023" arg: "0.24460943043231964" arg: "0.3784101605415344" arg: "-0.1123291477560997" arg: "-0.1368710845708847" arg: "-0.5494782328605652" arg: "-0.18547306954860687"
+    arg: "0.3815309405326843" arg: "0.1690528243780136" arg: "-0.35149693489074707" arg: "-0.02372279018163681" arg: "0.10354622453451157" arg: "0.549765408039093" arg: "0.019203156232833862" arg: "-0.3717760443687439" arg: "0.0764758288860321" arg: "-0.2072433978319168" arg: "0.1771903783082962" arg: "0.44540902972221375" arg: "-0.32312753796577454" arg: "-0.2570071518421173" arg: "0.3598842918872833" arg: "0.01568111963570118" arg: "-0.10515885800123215" arg: "-0.0006535121938213706" arg: "-0.4027051329612732" arg: "-0.08736834675073624"
+    arg: "0.4984769821166992" arg: "-0.0011503007262945175" arg: "-0.535097062587738" arg: "0.23247945308685303" arg: "0.10292237997055054" arg: "-0.2671816647052765" arg: "0.46480339765548706" arg: "-0.3894844353199005" arg: "0.3963298201560974" arg: "0.14017800986766815" arg: "0.3199640214443207" arg: "0.4258514642715454" arg: "-0.16700509190559387" arg: "0.09393472969532013" arg: "0.010149846784770489" arg: "0.2868942320346832" arg: "-0.3524361252784729" arg: "-0.6936826705932617" arg: "0.003629873273894191" arg: "-0.09144237637519836"
+    arg: "-0.34517648816108704" arg: "0.1484774649143219" arg: "-0.24635784327983856" arg: "0.01039072871208191" arg: "0.38751891255378723" arg: "0.2944512963294983" arg: "-0.2962084412574768" arg: "0.08631572872400284" arg: "0.03221822530031204" arg: "-0.08134875446557999" arg: "-0.5519762635231018" arg: "0.06120099127292633" arg: "-0.049402546137571335" arg: "-0.4067457318305969" arg: "0.4055297076702118" arg: "0.06430382281541824" arg: "0.11064586043357849" arg: "0.06960950791835785" arg: "-0.31485283374786377" arg: "0.14682182669639587"
+    arg: "-0.19570083916187286" arg: "0.10931383073329926" arg: "-0.007622078992426395" arg: "-0.05675305798649788" arg: "-0.8250163793563843" arg: "-0.11235163360834122" arg: "-0.029461843892931938" arg: "0.7492899298667908" arg: "-0.27355697751045227" arg: "0.3595489263534546" arg: "0.23662255704402924" arg: "-0.3644302189350128" arg: "0.6238518357276917" arg: "-0.3704565465450287" arg: "0.19363875687122345" arg: "0.3572763204574585" arg: "-0.23370115458965302" arg: "0.42023247480392456" arg: "0.15355002880096436" arg: "-0.39801692962646484"
+    arg: "0.2795780897140503" arg: "0.019917918369174004" arg: "-0.14301127195358276" arg: "0.4369097948074341" arg: "-0.33128276467323303" arg: "-0.12198600172996521" arg: "0.6699290871620178" arg: "0.27996954321861267" arg: "-0.04728970676660538" arg: "-0.0063692545518279076" arg: "0.33021485805511475" arg: "0.048498980700969696" arg: "0.10616268217563629" arg: "0.21613231301307678" arg: "0.5218581557273865" arg: "-0.4211953282356262" arg: "0.10934742540121078" arg: "-0.3572162687778473" arg: "-0.04984986037015915" arg: "-0.3323499262332916"
+    arg: "0.05725480243563652" arg: "-0.6297563314437866" arg: "0.24617090821266174" arg: "0.016658928245306015" arg: "-0.3822592496871948" arg: "0.16846376657485962" arg: "-0.02593623474240303" arg: "0.5345171689987183" arg: "0.00866254698485136" arg: "-0.4684853255748749" arg: "0.5099982619285583" arg: "-0.1846589744091034" arg: "-0.31118252873420715" arg: "0.30074822902679443" arg: "0.4734266400337219" arg: "0.446226567029953" arg: "-0.2197871059179306" arg: "0.13501974940299988" arg: "-0.3300747275352478" arg: "-0.35672369599342346"
+    arg: "0.09107185155153275" arg: "0.15899214148521423" arg: "0.3112131953239441" arg: "-0.016727039590477943" arg: "-0.051409196108579636" arg: "0.1564004272222519" arg: "-0.4456101953983307" arg: "0.27580249309539795" arg: "0.0816519483923912" arg: "0.18884021043777466" arg: "-0.35784390568733215" arg: "0.15665903687477112" arg: "0.5751363635063171" arg: "-0.08250349014997482" arg: "-0.052263204008340836" arg: "-0.40821653604507446" arg: "0.14680282771587372" arg: "-0.1555611938238144" arg: "-0.02860925905406475" arg: "-0.03125927224755287"
+    arg: "0.13248908519744873" arg: "-0.8433090448379517" arg: "0.6264944076538086" arg: "-0.013466065749526024" arg: "-0.480976939201355" arg: "0.4193423092365265" arg: "-0.744616687297821" arg: "0.8992355465888977" arg: "-0.3339115083217621" arg: "-0.22039049863815308" arg: "0.18902333080768585" arg: "-0.39615511894226074" arg: "-0.010607750155031681" arg: "0.20654703676700592" arg: "0.25115591287612915" arg: "-0.2122495174407959" arg: "0.025297891348600388" arg: "0.35302531719207764" arg: "-0.0321347676217556" arg: "-0.3839147686958313"
+    arg: "-0.10077176988124847" arg: "-0.45120227336883545" arg: "0.3416426479816437" arg: "0.07022065669298172" arg: "-0.6492688059806824" arg: "-0.010763137601315975" arg: "-0.041585713624954224" arg: "0.45695760846138" arg: "0.4389442801475525" arg: "0.07174579054117203" arg: "0.19659492373466492" arg: "-0.2505846619606018" arg: "-0.5589239001274109" arg: "0.4465855360031128" arg: "0.8945375680923462" arg: "0.47595348954200745" arg: "0.01687660627067089" arg: "0.05361022800207138" arg: "-0.4034039378166199" arg: "-0.15716853737831116"
+    arg: "0.37725311517715454" arg: "-0.21682827174663544" arg: "-0.3331523537635803" arg: "0.4478318691253662" arg: "0.04937843605875969" arg: "0.11571618914604187" arg: "-0.31114915013313293" arg: "0.5024285316467285" arg: "-0.045635756105184555" arg: "0.0683443546295166" arg: "0.3868362605571747" arg: "0.020012596622109413" arg: "-0.17692965269088745" arg: "-0.09838074445724487" arg: "0.4211101830005646" arg: "0.04282836988568306" arg: "-0.002688082167878747" arg: "-0.04299991950392723" arg: "-0.4223831295967102" arg: "-0.4769083559513092"
+    arg: "-0.06549199670553207" arg: "0.3752592206001282" arg: "0.03317650780081749" arg: "0.5872426629066467" arg: "-0.1182107925415039" arg: "-0.10290710628032684" arg: "0.10983741283416748" arg: "0.1915282905101776" arg: "0.043863292783498764" arg: "0.2396492213010788" arg: "0.02648579329252243" arg: "-0.5836915969848633" arg: "0.04271770641207695" arg: "-0.07343849539756775" arg: "0.1856769174337387" arg: "-0.1381441354751587" arg: "0.006854575593024492" arg: "-0.12527717649936676" arg: "0.5925910472869873" arg: "-0.1231672465801239"
+    arg: "-0.06842511147260666" arg: "0.35675689578056335" arg: "-0.07970980554819107" arg: "0.09421294182538986" arg: "0.0697932317852974" arg: "0.23825913667678833" arg: "0.2665153443813324" arg: "-1.03485107421875" arg: "0.06772775202989578" arg: "0.06777352839708328" arg: "0.046700987964868546" arg: "0.2833155393600464" arg: "0.41568082571029663" arg: "-0.3085348904132843" arg: "-0.10529476404190063" arg: "-0.15378959476947784" arg: "0.1103232204914093" arg: "-0.10541176050901413" arg: "0.41803064942359924" arg: "0.3228841722011566"
+  }
+}
+operand {
+  name: "recurrent_to_forget_weights"
+  type: FLOAT32
+  shape { dim: 20 dim: 20 }
+  filler {
+    tag: "explicit"
+    arg: "0.06594990938901901" arg: "-0.06807135045528412" arg: "-0.4602802097797394" arg: "0.35252904891967773" arg: "-0.008028666488826275" arg: "-0.03798177093267441" arg: "0.1020055040717125" arg: "-0.3012649416923523" arg: "-0.4212363660335541" arg: "-0.03982044756412506" arg: "-0.050789862871170044" arg: "0.27053192257881165" arg: "-0.013990324921905994" arg: "-0.0896860808134079" arg: "0.1068492904305458" arg: "0.015795979648828506" arg: "-0.1659027636051178" arg: "-0.15145719051361084" arg: "-0.2084323912858963" arg: "0.057955797761678696"
+    arg: "-0.23830100893974304" arg: "-0.003962437156587839" arg: "-0.45242246985435486" arg: "0.09086526930332184" arg: "0.19679458439350128" arg: "-0.1694013625383377" arg: "0.009338092990219593" arg: "0.15751825273036957" arg: "-0.015515184961259365" arg: "0.20204924046993256" arg: "-0.09540849179029465" arg: "-0.04590551182627678" arg: "-0.24671320617198944" arg: "-0.27835050225257874" arg: "0.13569574058055878" arg: "0.40812498331069946" arg: "-0.1699746996164322" arg: "-0.1630825698375702" arg: "-0.07755500078201294" arg: "0.06263996660709381"
+    arg: "0.23931466042995453" arg: "-0.029721643775701523" arg: "-0.2644506096839905" arg: "-0.33931082487106323" arg: "0.19949766993522644" arg: "0.22016771137714386" arg: "0.2121492326259613" arg: "-0.15880468487739563" arg: "0.24859464168548584" arg: "-0.2104686051607132" arg: "-0.23938705027103424" arg: "0.0718555748462677" arg: "-0.349223256111145" arg: "-0.09349290281534195" arg: "-0.0252213254570961" arg: "0.06639551371335983" arg: "-0.046982403844594955" arg: "0.118290975689888" arg: "-0.054646264761686325" arg: "0.32363349199295044"
+    arg: "0.034989047795534134" arg: "-0.08760252594947815" arg: "0.21847350895404816" arg: "-0.4630540907382965" arg: "0.39738836884498596" arg: "-0.040741026401519775" arg: "-0.43425223231315613" arg: "-0.19687709212303162" arg: "-0.14320023357868195" arg: "-0.2363135814666748" arg: "-0.1969219595193863" arg: "-0.20639191567897797" arg: "0.12733085453510284" arg: "-0.3910582959651947" arg: "-0.44535571336746216" arg: "-0.1720532774925232" arg: "0.013997661881148815" arg: "0.3502558469772339" arg: "0.2705589830875397" arg: "0.23238658905029297"
+    arg: "-0.03908773139119148" arg: "0.011227560229599476" arg: "0.4438604414463043" arg: "0.044431619346141815" arg: "-0.14284561574459076" arg: "0.17171142995357513" arg: "0.39687684178352356" arg: "-0.04913221672177315" arg: "0.1656711995601654" arg: "0.06323841214179993" arg: "0.07228634506464005" arg: "-0.045438170433044434" arg: "0.03332178294658661" arg: "0.020702671259641647" arg: "-0.02522851713001728" arg: "-0.014919421635568142" arg: "-0.17055924236774445" arg: "0.027965081855654716" arg: "-0.2815409302711487" arg: "-0.24543267488479614"
+    arg: "-0.09287944436073303" arg: "0.019474849104881287" arg: "0.09233348816633224" arg: "-0.055800918489694595" arg: "0.2498578131198883" arg: "0.042439963668584824" arg: "-0.13415414094924927" arg: "0.5308498740196228" arg: "0.3631361126899719" arg: "-0.19753535091876984" arg: "-0.32000917196273804" arg: "0.01533063966780901" arg: "-0.46180611848831177" arg: "0.042543888092041016" arg: "-0.14118818938732147" arg: "0.03781934827566147" arg: "0.26562583446502686" arg: "0.22630012035369873" arg: "-0.2770325839519501" arg: "-0.16677603125572205"
+    arg: "-0.2678513824939728" arg: "-0.10348694026470184" arg: "-0.0867864191532135" arg: "0.06361433863639832" arg: "0.014271223917603493" arg: "-0.12187133729457855" arg: "0.03993409126996994" arg: "0.028004109859466553" arg: "-0.2755664885044098" arg: "-0.04777361452579498" arg: "0.04847913607954979" arg: "-0.1460455060005188" arg: "0.23377567529678345" arg: "-0.22863848507404327" arg: "-0.14881330728530884" arg: "-0.132281094789505" arg: "0.13625505566596985" arg: "0.18803201615810394" arg: "-0.06886259466409683" arg: "0.006628264673054218"
+    arg: "-0.18729551136493683" arg: "-0.45488521456718445" arg: "0.24042674899101257" arg: "-0.26106804609298706" arg: "0.08683270215988159" arg: "-0.10342814028263092" arg: "-0.07613679021596909" arg: "0.21038036048412323" arg: "-0.2822016179561615" arg: "-0.008857419714331627" arg: "-0.22856365144252777" arg: "0.25842568278312683" arg: "0.03938071057200432" arg: "0.27398109436035156" arg: "0.00563707435503602" arg: "0.04277018457651138" arg: "0.008633948862552643" arg: "0.18542668223381042" arg: "-0.0034568854607641697" arg: "-0.05330372974276543"
+    arg: "0.234075129032135" arg: "-0.0890774056315422" arg: "0.5264164805412292" arg: "-0.27344802021980286" arg: "-0.27058038115501404" arg: "0.01804373227059841" arg: "-0.30603522062301636" arg: "0.24496032297611237" arg: "0.013210487551987171" arg: "-0.07897075265645981" arg: "0.022406281903386116" arg: "0.0693880245089531" arg: "0.015409570187330246" arg: "-0.0077253966592252254" arg: "-0.16814486682415009" arg: "0.13719123601913452" arg: "0.1842775046825409" arg: "0.23263384401798248" arg: "-0.2894793450832367" arg: "-0.0586865171790123"
+    arg: "0.16423609852790833" arg: "0.21610338985919952" arg: "-0.004741444252431393" arg: "-0.06914521008729935" arg: "0.01680983044207096" arg: "-0.15836265683174133" arg: "0.22845181822776794" arg: "0.05326155573129654" arg: "-0.1423141062259674" arg: "0.3005701005458832" arg: "-0.4672607183456421" arg: "-0.023611126467585564" arg: "0.0609925203025341" arg: "0.10335300117731094" arg: "-0.06812556833028793" arg: "-0.07960234582424164" arg: "-0.09673355519771576" arg: "-0.002028367016464472" arg: "0.2790486216545105" arg: "0.16641387343406677"
+    arg: "0.17312130331993103" arg: "-0.12452541291713715" arg: "0.32220134139060974" arg: "-0.22103938460350037" arg: "0.030806513503193855" arg: "-0.1734783947467804" arg: "-0.15974026918411255" arg: "-0.14125876128673553" arg: "0.2410593330860138" arg: "-0.5250580310821533" arg: "-0.04526519402861595" arg: "0.32462355494499207" arg: "0.06616294384002686" arg: "-0.11993109434843063" arg: "0.048295579850673676" arg: "0.042772307991981506" arg: "-0.03536328673362732" arg: "0.3457142114639282" arg: "0.12001463770866394" arg: "0.22245542705059052"
+    arg: "0.02782665565609932" arg: "0.07261228561401367" arg: "-0.33622199296951294" arg: "0.23261497914791107" arg: "0.04636847600340843" arg: "-0.027293216437101364" arg: "0.17709527909755707" arg: "0.018871014937758446" arg: "-0.4241866171360016" arg: "-0.07660052180290222" arg: "-0.3715123236179352" arg: "0.21518565714359283" arg: "0.3018551170825958" arg: "-0.2709880769252777" arg: "-0.1473710685968399" arg: "-0.2565970718860626" arg: "-0.2993161678314209" arg: "0.1733904629945755" arg: "-0.2439367175102234" arg: "0.26016315817832947"
+    arg: "0.21543648838996887" arg: "0.02984066680073738" arg: "0.2857840359210968" arg: "-0.09354538470506668" arg: "0.3686164617538452" arg: "-0.17137302458286285" arg: "-0.13334709405899048" arg: "0.15443699061870575" arg: "-0.3447284698486328" arg: "-0.0766822099685669" arg: "-0.18963581323623657" arg: "-0.07595658302307129" arg: "0.04707604646682739" arg: "-0.23405563831329346" arg: "0.05423225834965706" arg: "-0.23418886959552765" arg: "-0.03189626708626747" arg: "0.2605202794075012" arg: "0.05496497079730034" arg: "0.173336461186409"
+    arg: "0.27640455961227417" arg: "-0.24286918342113495" arg: "-0.24134227633476257" arg: "-0.15636584162712097" arg: "0.2677306830883026" arg: "-0.2062496393918991" arg: "0.32234105467796326" arg: "-0.24469925463199615" arg: "-0.3751060664653778" arg: "-0.23786574602127075" arg: "0.03635139390826225" arg: "0.12451396137475967" arg: "0.26129764318466187" arg: "-0.12637533247470856" arg: "-0.0780411958694458" arg: "0.06617061048746109" arg: "-0.25668978691101074" arg: "0.23007889091968536" arg: "-0.08478987962007523" arg: "0.220413938164711"
+    arg: "-0.10401985049247742" arg: "0.0647420585155487" arg: "0.09111618995666504" arg: "-0.04593143239617348" arg: "0.08350320905447006" arg: "0.023905832320451736" arg: "0.16202807426452637" arg: "0.25432881712913513" arg: "0.17261511087417603" arg: "-0.011524937115609646" arg: "0.07423079758882523" arg: "0.033635564148426056" arg: "0.014234645292162895" arg: "-0.2424505203962326" arg: "0.14718832075595856" arg: "0.14837898313999176" arg: "0.04802917316555977" arg: "0.059234943240880966" arg: "0.2068481594324112" arg: "-0.09739648550748825"
+    arg: "-0.026647251099348068" arg: "-0.32062458992004395" arg: "0.16718854010105133" arg: "-0.32557788491249084" arg: "0.3088855445384979" arg: "-0.1289512664079666" arg: "-0.042579133063554764" arg: "-0.27093860507011414" arg: "-0.2899383306503296" arg: "-0.3538142442703247" arg: "0.4299084544181824" arg: "0.0619647242128849" arg: "0.5301066637039185" arg: "0.08283061534166336" arg: "-0.043685220181941986" arg: "-0.2241324931383133" arg: "-0.3621082305908203" arg: "0.014637312851846218" arg: "-0.6699166893959045" arg: "0.1908542513847351"
+    arg: "0.09265121817588806" arg: "-0.1539815366268158" arg: "0.07899756729602814" arg: "-0.04436815530061722" arg: "-0.2454068809747696" arg: "-0.18386529386043549" arg: "-0.14677776396274567" arg: "0.6323122978210449" arg: "0.39544856548309326" arg: "0.15971237421035767" arg: "0.10913989692926407" arg: "-0.042497217655181885" arg: "-0.23099665343761444" arg: "0.04052138328552246" arg: "0.34883034229278564" arg: "0.0847955048084259" arg: "-0.08281111717224121" arg: "-0.061811413615942" arg: "-0.21920911967754364" arg: "-0.08061020076274872"
+    arg: "0.1676846295595169" arg: "-0.18172425031661987" arg: "0.2923012375831604" arg: "-0.14758338034152985" arg: "0.40604183077812195" arg: "0.1405867487192154" arg: "-0.23895759880542755" arg: "-0.12314226478338242" arg: "-0.25169745087623596" arg: "-0.31885266304016113" arg: "-0.07341024279594421" arg: "0.0072786142118275166" arg: "-0.0969509556889534" arg: "-0.2571040391921997" arg: "-0.33312639594078064" arg: "-0.2451372891664505" arg: "0.1654350608587265" arg: "0.033568061888217926" arg: "0.014660677872598171" arg: "0.377450555562973"
+    arg: "0.11867852509021759" arg: "0.0411519892513752" arg: "-0.4462774097919464" arg: "0.1362692266702652" arg: "0.14434905350208282" arg: "0.045803915709257126" arg: "-0.07773952186107635" arg: "0.27392011880874634" arg: "-0.14940162003040314" arg: "0.055528268218040466" arg: "-0.5712833404541016" arg: "-0.12384487688541412" arg: "-0.16526257991790771" arg: "-0.14264139533042908" arg: "-0.144387885928154" arg: "0.26574134826660156" arg: "-0.17008106410503387" arg: "0.18583066761493683" arg: "-0.17407818138599396" arg: "0.0841611996293068"
+    arg: "0.07560589909553528" arg: "0.10793375223875046" arg: "0.1858903020620346" arg: "0.17929036915302277" arg: "-0.014105351641774178" arg: "0.07215336710214615" arg: "0.034808021038770676" arg: "-0.023832565173506737" arg: "-0.005115351639688015" arg: "0.03793272748589516" arg: "-0.06749884784221649" arg: "-0.2857394814491272" arg: "-0.22204333543777466" arg: "0.07521218806505203" arg: "-0.22578758001327515" arg: "0.10578799247741699" arg: "-0.0599808394908905" arg: "-0.03470684587955475" arg: "0.04690929129719734" arg: "0.009294633753597736"
+  }
+}
+operand {
+  name: "recurrent_to_cell_weights"
+  type: FLOAT32
+  shape { dim: 20 dim: 20 }
+  filler {
+    tag: "explicit"
+    arg: "0.15393993258476257" arg: "-0.1119932010769844" arg: "0.18920856714248657" arg: "0.1281609982252121" arg: "-0.12339968234300613" arg: "-0.08566756546497345" arg: "-0.13214115798473358" arg: "-0.0587150976061821" arg: "-0.1808837354183197" arg: "-0.08994600921869278" arg: "0.18792179226875305" arg: "0.31002986431121826" arg: "-0.23494939506053925" arg: "-0.13358882069587708" arg: "0.21817533671855927" arg: "-0.1144614890217781" arg: "-0.032731179147958755" arg: "0.1316293478012085" arg: "-0.21290943026542664" arg: "0.033711988478899"
+    arg: "0.2093488723039627" arg: "0.009070725180208683" arg: "-0.34243470430374146" arg: "0.2588043808937073" arg: "-0.12327506393194199" arg: "-0.06977886706590652" arg: "0.18403127789497375" arg: "-0.037799157202243805" arg: "-0.10396076738834381" arg: "0.4311140179634094" arg: "-0.16276852786540985" arg: "0.3511127233505249" arg: "0.1728871464729309" arg: "-0.3596697151660919" arg: "-0.029892150312662125" arg: "-0.13553234934806824" arg: "0.03372793272137642" arg: "-0.3119524121284485" arg: "0.04722945764660835" arg: "-0.0335264652967453"
+    arg: "-0.19715268909931183" arg: "0.18051989376544952" arg: "-0.25875094532966614" arg: "-0.2308073490858078" arg: "0.12977799773216248" arg: "0.11133529990911484" arg: "-0.10884438455104828" arg: "-0.006393382791429758" arg: "-0.0046616485342383385" arg: "0.07372598350048065" arg: "-0.17514462769031525" arg: "-0.06907986104488373" arg: "-0.1077096164226532" arg: "-0.2481498420238495" arg: "-0.14520783722400665" arg: "-0.06911041587591171" arg: "-0.02821161597967148" arg: "0.14755520224571228" arg: "0.3900660574436188" arg: "0.1893186867237091"
+    arg: "0.09458756446838379" arg: "0.054943062365055084" arg: "0.4107792377471924" arg: "0.05842319130897522" arg: "0.09731859713792801" arg: "-0.06020563840866089" arg: "0.2529062032699585" arg: "-0.2720320224761963" arg: "-0.10796058923006058" arg: "-0.08254134654998779" arg: "-0.07210174947977066" arg: "-0.36896562576293945" arg: "0.08478402346372604" arg: "0.15760378539562225" arg: "-0.12006833404302597" arg: "0.008080476894974709" arg: "0.3506588339805603" arg: "0.25160735845565796" arg: "0.08777479827404022" arg: "0.273798406124115"
+    arg: "0.01723896898329258" arg: "0.11717648804187775" arg: "-0.2846356928348541" arg: "-0.07879329472780228" arg: "0.2186465710401535" arg: "0.09551840275526047" arg: "0.04083137586712837" arg: "-0.0763259083032608" arg: "-0.17741726338863373" arg: "-0.22416481375694275" arg: "0.08309032022953033" arg: "-0.07316568493843079" arg: "-0.004594864323735237" arg: "0.04726291820406914" arg: "-0.060947902500629425" arg: "-0.02379523031413555" arg: "0.1387377828359604" arg: "0.0520065538585186" arg: "-0.009234771132469177" arg: "0.20820368826389313"
+    arg: "0.2090202122926712" arg: "0.09088768064975739" arg: "0.0712779089808464" arg: "0.4721727669239044" arg: "-0.2852536737918854" arg: "-0.030319523066282272" arg: "-0.15199345350265503" arg: "0.03313468396663666" arg: "0.130229651927948" arg: "0.11190740019083023" arg: "-0.033711377531290054" arg: "0.26203152537345886" arg: "0.1747232973575592" arg: "0.06380274146795273" arg: "0.10935788601636887" arg: "0.03934641182422638" arg: "-0.24481335282325745" arg: "-0.2071755826473236" arg: "0.21853256225585938" arg: "-0.05010126531124115"
+    arg: "-0.08034007996320724" arg: "0.06709744036197662" arg: "0.15941183269023895" arg: "0.3035742938518524" arg: "0.06431770324707031" arg: "0.22864562273025513" arg: "0.2153673619031906" arg: "-0.16501116752624512" arg: "0.08141324669122696" arg: "-0.1909857988357544" arg: "0.09936768561601639" arg: "0.05850536748766899" arg: "-0.007407554890960455" arg: "-0.0750204399228096" arg: "-0.011765131726861" arg: "-0.1525736153125763" arg: "0.2009558528661728" arg: "0.057866111397743225" arg: "-0.3028014302253723" arg: "0.0406017005443573"
+    arg: "0.3206914961338043" arg: "0.013377382420003414" arg: "0.08953910320997238" arg: "0.1381121575832367" arg: "-0.19908195734024048" arg: "-0.20327427983283997" arg: "0.10023070126771927" arg: "0.0965537428855896" arg: "0.09599238634109497" arg: "0.012801108881831169" arg: "-0.008848292753100395" arg: "-0.0921083316206932" arg: "-0.2611875534057617" arg: "0.20738714933395386" arg: "0.18287070095539093" arg: "0.06643958389759064" arg: "0.08770095556974411" arg: "-0.20850636065006256" arg: "-0.037345774471759796" arg: "-0.3267252743244171"
+    arg: "0.04550359770655632" arg: "0.1751193106174469" arg: "0.0021270427387207747" arg: "-0.12569129467010498" arg: "0.09540387243032455" arg: "0.2459857165813446" arg: "0.24060799181461334" arg: "-0.00685726385563612" arg: "-0.07234424352645874" arg: "0.09571491926908493" arg: "-0.3154931366443634" arg: "0.13833951950073242" arg: "0.17639833688735962" arg: "0.3401899039745331" arg: "-0.25327083468437195" arg: "0.003526201006025076" arg: "0.30618157982826233" arg: "-0.31580427289009094" arg: "0.03264538198709488" arg: "-0.102194644510746"
+    arg: "0.14761067926883698" arg: "0.02882370911538601" arg: "0.08630412817001343" arg: "0.18815916776657104" arg: "-0.17160621285438538" arg: "-0.3442608118057251" arg: "-0.12482235580682755" arg: "-0.3455544710159302" arg: "-0.0847967267036438" arg: "0.16379626095294952" arg: "0.11732957512140274" arg: "0.18391959369182587" arg: "0.2112390100955963" arg: "0.15884174406528473" arg: "-0.1210162416100502" arg: "0.03699047863483429" arg: "0.07807657867670059" arg: "-0.14232687652111053" arg: "0.007268161047250032" arg: "0.05068487673997879"
+    arg: "0.04104536026716232" arg: "0.201126366853714" arg: "0.19676734507083893" arg: "-0.03136518597602844" arg: "0.057088401168584824" arg: "0.217696413397789" arg: "0.0404636487364769" arg: "-0.16933280229568481" arg: "0.11017945408821106" arg: "0.16551776230335236" arg: "0.15519888699054718" arg: "0.20411789417266846" arg: "0.17852722108364105" arg: "0.24985377490520477" arg: "0.03789833188056946" arg: "-0.19242724776268005" arg: "0.0679841935634613" arg: "-0.08297871053218842" arg: "-0.017301911488175392" arg: "-0.07224911451339722"
+    arg: "-0.15029805898666382" arg: "-0.022065505385398865" arg: "0.3310281038284302" arg: "-0.0074359094724059105" arg: "0.1291237622499466" arg: "-0.3258497416973114" arg: "-0.020609457045793533" arg: "0.07960690557956696" arg: "0.18422964215278625" arg: "-0.015700064599514008" arg: "-0.0406377948820591" arg: "0.07060065865516663" arg: "-0.05204642191529274" arg: "-0.0752851590514183" arg: "0.29478275775909424" arg: "0.09939233958721161" arg: "-0.1349070519208908" arg: "0.08028685301542282" arg: "-0.1612706184387207" arg: "-0.3498779833316803"
+    arg: "0.09452734142541885" arg: "-0.14093227684497833" arg: "0.1231885701417923" arg: "-0.17776770889759064" arg: "-0.21792110800743103" arg: "0.008278626017272472" arg: "-0.11924610286951065" arg: "0.12319722771644592" arg: "0.09757496416568756" arg: "0.3845261037349701" arg: "0.06491772085428238" arg: "0.02287365309894085" arg: "0.10664971172809601" arg: "-0.2075091153383255" arg: "0.16306491196155548" arg: "-0.09945328533649445" arg: "0.03647858276963234" arg: "0.31740331649780273" arg: "-0.0575806088745594" arg: "-0.15998433530330658"
+    arg: "0.05049542337656021" arg: "0.10578017681837082" arg: "0.4129166305065155" arg: "-0.26145657896995544" arg: "0.09070956707000732" arg: "0.13368085026741028" arg: "0.047888197004795074" arg: "-0.26703593134880066" arg: "0.03738849610090256" arg: "0.0096968412399292" arg: "-0.02515929564833641" arg: "-0.08761339634656906" arg: "-0.08801304548978806" arg: "0.11130105704069138" arg: "-0.1670377254486084" arg: "-0.38100311160087585" arg: "0.08594627678394318" arg: "0.15826018154621124" arg: "0.15142755210399628" arg: "0.3665761351585388"
+    arg: "-0.2884967029094696" arg: "0.034480463713407516" arg: "0.080620676279068" arg: "0.0942501500248909" arg: "-0.17697006464004517" arg: "-0.04557788744568825" arg: "0.12102261930704117" arg: "-0.08095056563615799" arg: "0.20667794346809387" arg: "0.0005014429334551096" arg: "-0.24826794862747192" arg: "-0.06553015112876892" arg: "-0.24456636607646942" arg: "-0.004251034930348396" arg: "-0.02371463179588318" arg: "0.13635343313217163" arg: "0.1759263128042221" arg: "-0.2496115118265152" arg: "0.3222438097000122" arg: "-0.06805617362260818"
+    arg: "0.3151903748512268" arg: "-0.07969710230827332" arg: "-0.338588684797287" arg: "0.09406647831201553" arg: "-0.22847072780132294" arg: "-0.13792040944099426" arg: "0.2320234179496765" arg: "-0.10531327873468399" arg: "-0.21394342184066772" arg: "-0.05493509769439697" arg: "0.0776442140340805" arg: "-0.07514091581106186" arg: "0.23195593059062958" arg: "-0.016244227066636086" arg: "-0.12812721729278564" arg: "0.1941227912902832" arg: "-0.08763367682695389" arg: "-0.17611214518547058" arg: "0.05966200307011604" arg: "0.03175244480371475"
+    arg: "-0.3181533217430115" arg: "0.14994001388549805" arg: "-0.0519041046500206" arg: "0.10318135470151901" arg: "0.12232168763875961" arg: "0.12503929436206818" arg: "-0.042770031839609146" arg: "0.028692282736301422" arg: "0.041286103427410126" arg: "0.008777778595685959" arg: "-0.15041261911392212" arg: "-0.3636454939842224" arg: "-0.2648666799068451" arg: "0.12697425484657288" arg: "-0.04115947335958481" arg: "-0.1794285625219345" arg: "0.3467434346675873" arg: "0.09371137619018555" arg: "0.39284154772758484" arg: "0.10154542326927185"
+    arg: "0.03654832765460014" arg: "-0.0001514707983005792" arg: "0.05111170932650566" arg: "-0.43743401765823364" arg: "0.19728508591651917" arg: "0.10978388041257858" arg: "-0.2930853068828583" arg: "0.31976282596588135" arg: "0.07013546675443649" arg: "0.045205868780612946" arg: "0.12697115540504456" arg: "-0.17158143222332" arg: "-0.4531923532485962" arg: "-0.03989870846271515" arg: "0.028936142101883888" arg: "0.16511154174804688" arg: "-0.10384245961904526" arg: "-0.14950263500213623" arg: "0.10117360949516296" arg: "-0.1518079936504364"
+    arg: "0.19067397713661194" arg: "-0.011113183572888374" arg: "-0.3417884409427643" arg: "0.005517064593732357" arg: "-0.041157711297273636" arg: "-0.3314608931541443" arg: "0.012132381089031696" arg: "-0.034892488270998" arg: "-0.07512284815311432" arg: "0.1292932778596878" arg: "-0.22919918596744537" arg: "0.2461051344871521" arg: "0.22426217794418335" arg: "0.006595896556973457" arg: "-0.1439153254032135" arg: "-0.26290032267570496" arg: "-0.1798022985458374" arg: "-0.12984399497509003" arg: "0.065561443567276" arg: "-0.06624792516231537"
+    arg: "-0.19407491385936737" arg: "-0.0409831777215004" arg: "-0.008038614876568317" arg: "-0.03238639608025551" arg: "-0.04466156288981438" arg: "0.1601162552833557" arg: "-0.052234116941690445" arg: "0.15304076671600342" arg: "0.000367005035514012" arg: "0.022618206217885017" arg: "0.15538087487220764" arg: "0.25994208455085754" arg: "0.12255910038948059" arg: "-0.13710808753967285" arg: "-0.016998453065752983" arg: "0.2590976059436798" arg: "-0.011389931663870811" arg: "-0.034473538398742676" arg: "0.05619646608829498" arg: "0.08633460104465485"
+  }
+}
+operand {
+  name: "recurrent_to_output_weights"
+  type: FLOAT32
+  shape { dim: 20 dim: 20 }
+  filler {
+    tag: "explicit"
+    arg: "-0.35909947752952576" arg: "0.12187856435775757" arg: "0.14746889472007751" arg: "0.3129103481769562" arg: "-0.5547925233840942" arg: "-0.26812028884887695" arg: "0.2384958267211914" arg: "-0.47153329849243164" arg: "0.07567869871854782" arg: "0.28245386481285095" arg: "0.4810175597667694" arg: "0.031078442931175232" arg: "0.39412668347358704" arg: "0.001231769216246903" arg: "-0.0179451797157526" arg: "-0.3484187126159668" arg: "-0.1315481960773468" arg: "-0.19073595106601715" arg: "0.2959749102592468" arg: "-0.15430164337158203"
+    arg: "0.16478729248046875" arg: "-0.18185187876224518" arg: "-0.42523953318595886" arg: "0.28228330612182617" arg: "-0.5983712077140808" arg: "-0.31367194652557373" arg: "-0.3297293186187744" arg: "0.1790262758731842" arg: "-0.06721899658441544" arg: "0.27287885546684265" arg: "0.1248977854847908" arg: "0.529021680355072" arg: "-0.3688035011291504" arg: "-0.2915802299976349" arg: "0.5874091982841492" arg: "0.6444711685180664" arg: "-0.5507888793945312" arg: "-0.4991227984428406" arg: "-0.5332760810852051" arg: "-0.17446967959403992"
+    arg: "-0.10843317955732346" arg: "-0.2629949450492859" arg: "0.21471929550170898" arg: "0.09879318624734879" arg: "-0.0769701600074768" arg: "-0.23829951882362366" arg: "-0.14967726171016693" arg: "0.1522980034351349" arg: "0.0016457909950986505" arg: "0.07313574850559235" arg: "0.359075129032135" arg: "-0.29160916805267334" arg: "-0.1623256802558899" arg: "0.3452284634113312" arg: "0.11389480531215668" arg: "0.056126005947589874" arg: "0.1680738776922226" arg: "0.054511312395334244" arg: "-0.3061401844024658" arg: "-0.37002867460250854"
+    arg: "0.8169177174568176" arg: "-0.10186938941478729" arg: "0.035952117294073105" arg: "0.31021371483802795" arg: "-0.3045564293861389" arg: "0.16454839706420898" arg: "-0.007755322381854057" arg: "0.3747217655181885" arg: "0.028079498559236526" arg: "0.6176130771636963" arg: "0.3060242235660553" arg: "-0.1116616427898407" arg: "-0.0698426142334938" arg: "0.02596282958984375" arg: "0.40301159024238586" arg: "0.22842562198638916" arg: "-0.7979361414909363" arg: "-0.21555794775485992" arg: "0.22447574138641357" arg: "-0.09180140495300293"
+    arg: "0.6331534385681152" arg: "-0.07374905049800873" arg: "0.0910644456744194" arg: "0.7608184218406677" arg: "-0.05133755877614021" arg: "-0.2353716641664505" arg: "0.7358492612838745" arg: "-0.2672101557254791" arg: "0.23726515471935272" arg: "0.21083518862724304" arg: "0.39143991470336914" arg: "0.4840562045574188" arg: "0.42181020975112915" arg: "-0.02092970348894596" arg: "-0.00017688501975499094" arg: "-0.02578321099281311" arg: "-0.33251720666885376" arg: "-0.2569231390953064" arg: "-0.27487626671791077" arg: "-0.30479907989501953"
+    arg: "0.12656816840171814" arg: "-0.35632675886154175" arg: "0.05882206931710243" arg: "0.1031598150730133" arg: "-0.07713407278060913" arg: "0.06100684776902199" arg: "-0.1301981508731842" arg: "0.18870045244693756" arg: "0.34141841530799866" arg: "-0.5403046011924744" arg: "0.36341556906700134" arg: "0.4443875551223755" arg: "0.11223088949918747" arg: "0.07594747096300125" arg: "0.17846737802028656" arg: "0.30533328652381897" arg: "-0.22884678840637207" arg: "-0.2669167220592499" arg: "-0.7768117189407349" arg: "-0.37001490592956543"
+    arg: "0.1587837189435959" arg: "-0.191584974527359" arg: "-0.6631219387054443" arg: "0.06661315262317657" arg: "-0.4396548271179199" arg: "-0.4596345126628876" arg: "0.1870720386505127" arg: "0.1951659470796585" arg: "-0.08539465069770813" arg: "0.3470593988895416" arg: "0.19268564879894257" arg: "-0.046538181602954865" arg: "0.01603168435394764" arg: "-0.07825833559036255" arg: "0.09992441534996033" arg: "0.08686434477567673" arg: "-0.38704144954681396" arg: "-0.47036734223365784" arg: "0.0524212010204792" arg: "-0.372523695230484"
+    arg: "0.006748664658516645" arg: "-0.08730413764715195" arg: "-0.16790643334388733" arg: "0.31276655197143555" arg: "-0.44439390301704407" arg: "-0.23598124086856842" arg: "0.0694812685251236" arg: "0.38802817463874817" arg: "-0.012220374308526516" arg: "0.38149744272232056" arg: "-0.03641294315457344" arg: "0.0744020864367485" arg: "-0.08323682844638824" arg: "0.11382298171520233" arg: "0.2919921278953552" arg: "0.31642037630081177" arg: "-0.401195764541626" arg: "0.09580203890800476" arg: "-0.1458958089351654" arg: "-0.3990739583969116"
+    arg: "-0.017351288348436356" arg: "-0.15279312431812286" arg: "0.21107575297355652" arg: "0.23132845759391785" arg: "0.12567712366580963" arg: "0.0009088824735954404" arg: "-0.5392304062843323" arg: "-0.503669023513794" arg: "0.1523285210132599" arg: "0.2695973813533783" arg: "0.2366502732038498" arg: "0.3115360140800476" arg: "-0.3943549692630768" arg: "0.6869263648986816" arg: "0.20123623311519623" arg: "-0.003731918754056096" arg: "0.2607108950614929" arg: "-0.3499254584312439" arg: "-0.004152949899435043" arg: "-0.1376078873872757"
+    arg: "0.4573622941970825" arg: "0.008549842983484268" arg: "0.1646938920021057" arg: "-0.15896114706993103" arg: "-0.4295574128627777" arg: "0.06403962522745132" arg: "-0.012177926488220692" arg: "0.5018934607505798" arg: "0.0375320166349411" arg: "0.43595317006111145" arg: "-0.05773438140749931" arg: "0.13049593567848206" arg: "-0.1468954086303711" arg: "-0.4093998372554779" arg: "0.4959154427051544" arg: "0.7173134684562683" arg: "-0.5174667239189148" arg: "-0.16707409918308258" arg: "-0.06118558719754219" arg: "-0.11275004595518112"
+    arg: "0.08968205004930496" arg: "0.3198257088661194" arg: "0.07224604487419128" arg: "0.5600743889808655" arg: "0.024834752082824707" arg: "-0.02439100854098797" arg: "-0.1513833850622177" arg: "0.13906888663768768" arg: "0.06407716870307922" arg: "0.5332576036453247" arg: "0.24956916272640228" arg: "-0.044385701417922974" arg: "-0.4433465301990509" arg: "-0.19094131886959076" arg: "0.4768398106098175" arg: "0.21503591537475586" arg: "-0.218861386179924" arg: "-0.4321509003639221" arg: "-0.24130387604236603" arg: "-0.07977084070444107"
+    arg: "0.002716424874961376" arg: "-0.1713045984506607" arg: "-0.12604790925979614" arg: "-0.03560760244727135" arg: "-0.5757992267608643" arg: "-0.1557251513004303" arg: "-0.05827505886554718" arg: "0.3337538540363312" arg: "-0.4115898907184601" arg: "0.5126633048057556" arg: "0.14806263148784637" arg: "0.40081098675727844" arg: "0.8833869695663452" arg: "-0.19723086059093475" arg: "0.09533816576004028" arg: "0.03869156911969185" arg: "-0.2973725199699402" arg: "0.022853707894682884" arg: "-0.0228166151791811" arg: "-0.4052131772041321"
+    arg: "0.12930545210838318" arg: "0.01575206033885479" arg: "-0.21314911544322968" arg: "0.5510196685791016" arg: "0.06540991365909576" arg: "-0.07084762305021286" arg: "0.3234975337982178" arg: "0.19345852732658386" arg: "0.16359369456768036" arg: "-0.02992691472172737" arg: "0.07857825607061386" arg: "0.3506908714771271" arg: "0.16494658589363098" arg: "0.07570064812898636" arg: "0.32486459612846375" arg: "-0.14951008558273315" arg: "-0.022363830357789993" arg: "-0.42179420590400696" arg: "-0.24661937355995178" arg: "-0.08302409946918488"
+    arg: "0.2494393140077591" arg: "-0.12944501638412476" arg: "-0.010796070098876953" arg: "0.15976394712924957" arg: "-0.01106332242488861" arg: "0.25831347703933716" arg: "0.18664048612117767" arg: "-0.03495928645133972" arg: "0.01873226836323738" arg: "0.02704462595283985" arg: "0.1773315966129303" arg: "0.09905895590782166" arg: "0.137725368142128" arg: "-0.4195314347743988" arg: "0.20205777883529663" arg: "0.25744083523750305" arg: "-0.4343162178993225" arg: "0.08337675034999847" arg: "-0.24768808484077454" arg: "0.05348324030637741"
+    arg: "-0.07421242445707321" arg: "0.08401253819465637" arg: "0.24182510375976562" arg: "-0.19996227324008942" arg: "-0.26596978306770325" arg: "-0.10460428893566132" arg: "-0.09030365198850632" arg: "0.3622499406337738" arg: "0.32519716024398804" arg: "0.3067288398742676" arg: "-0.0695832222700119" arg: "-0.10316962748765945" arg: "-0.09733156114816666" arg: "0.4681766629219055" arg: "0.3733525574207306" arg: "-0.013295430690050125" arg: "-0.11883660405874252" arg: "-0.10412082821130753" arg: "0.05678151175379753" arg: "-0.11783196032047272"
+    arg: "0.048583026975393295" arg: "-0.9528340101242065" arg: "0.10752814263105392" arg: "0.273784339427948" arg: "0.23048622906208038" arg: "-0.2551514804363251" arg: "-0.21344983577728271" arg: "0.2589189112186432" arg: "-0.1326867789030075" arg: "-0.14273332059383392" arg: "0.11125936359167099" arg: "0.10763772577047348" arg: "-0.3638816177845001" arg: "0.6586386561393738" arg: "0.6191070675849915" arg: "0.2745305895805359" arg: "-0.21111124753952026" arg: "0.23943224549293518" arg: "-0.5838378667831421" arg: "-0.7447165250778198"
+    arg: "0.27415889501571655" arg: "-0.10696078091859818" arg: "-0.1905016303062439" arg: "0.17716637253761292" arg: "-0.17008160054683685" arg: "-0.38646024465560913" arg: "0.17075011134147644" arg: "-0.0971580222249031" arg: "0.36582818627357483" arg: "0.3553922176361084" arg: "0.3533395528793335" arg: "0.46518567204475403" arg: "0.12306690216064453" arg: "0.3765827715396881" arg: "0.27485108375549316" arg: "0.026894697919487953" arg: "-0.13947726786136627" arg: "-0.4675980508327484" arg: "0.000053708172345068306" arg: "-0.1514354646205902"
+    arg: "0.034218866378068924" arg: "-0.3962448537349701" arg: "-0.08128349483013153" arg: "0.10788826644420624" arg: "-0.3110845983028412" arg: "0.25610488653182983" arg: "-0.5693814754486084" arg: "0.6281890273094177" arg: "0.0010718648554757237" arg: "-0.21038493514060974" arg: "0.18425892293453217" arg: "-0.35341814160346985" arg: "-0.2984526455402374" arg: "0.29100173711776733" arg: "0.4346262514591217" arg: "-0.02309197559952736" arg: "0.06577077507972717" arg: "-0.24334858357906342" arg: "-0.34281492233276367" arg: "-0.4032599627971649"
+    arg: "0.34936246275901794" arg: "0.3322518467903137" arg: "-0.2656654119491577" arg: "0.22830642759799957" arg: "-0.11201204359531403" arg: "-0.1707642823457718" arg: "0.007749658077955246" arg: "0.43952593207359314" arg: "0.14750634133815765" arg: "0.42360368371009827" arg: "0.1105399876832962" arg: "-0.06718066334724426" arg: "-0.175845667719841" arg: "0.023229194805026054" arg: "0.35441142320632935" arg: "0.35180309414863586" arg: "-0.561530351638794" arg: "-0.1788090020418167" arg: "0.05351807549595833" arg: "-0.3240300118923187"
+    arg: "0.2829385995864868" arg: "0.09240324050188065" arg: "0.10970980674028397" arg: "1.01627779006958" arg: "-0.3717207908630371" arg: "-0.2776918113231659" arg: "0.6677582263946533" arg: "-0.2235853224992752" arg: "-0.06214175000786781" arg: "0.23073340952396393" arg: "0.3371483087539673" arg: "-0.029265087097883224" arg: "0.25156235694885254" arg: "0.43319517374038696" arg: "0.035503044724464417" arg: "0.12156634777784348" arg: "-0.24198615550994873" arg: "-0.42002007365226746" arg: "-0.11373946070671082" arg: "-0.28098201751708984"
+  }
+}
+operand {
+  name: "input_gate_bias"
+  type: FLOAT32
+  shape { dim: 20 }
+  filler {
+    tag: "explicit"
+    arg: "0.39238446950912476" arg: "-0.040046464651823044" arg: "0.13657712936401367" arg: "0.35934528708457947" arg: "0.321681946516037" arg: "0.0616583526134491" arg: "0.11477429419755936" arg: "0.20044274628162384" arg: "0.011154969222843647" arg: "0.24244074523448944" arg: "0.27598848938941956" arg: "0.4028998911380768" arg: "0.21931242942810059" arg: "0.3108941316604614" arg: "0.1841004192829132" arg: "0.14638805389404297" arg: "0.46200960874557495" arg: "0.24594353139400482" arg: "0.07526364177465439" arg: "-0.22416549921035767"
+  }
+}
+operand {
+  name: "forget_gate_bias"
+  type: FLOAT32
+  shape { dim: 20 }
+  filler {
+    tag: "explicit"
+    arg: "1.2047474384307861" arg: "1.2191035747528076" arg: "0.871356725692749" arg: "1.0395587682724" arg: "1.150162935256958" arg: "1.0623992681503296" arg: "1.0699368715286255" arg: "1.0769526958465576" arg: "1.1270850896835327" arg: "1.151424527168274" arg: "1.1118133068084717" arg: "1.150691032409668" arg: "0.9700227975845337" arg: "1.0458472967147827" arg: "1.0566719770431519" arg: "1.036710262298584" arg: "1.1118052005767822" arg: "0.9024409651756287" arg: "0.968490481376648" arg: "1.0276471376419067"
+  }
+}
+operand {
+  name: "cell_gate_bias"
+  type: FLOAT32
+  shape { dim: 20 }
+  filler {
+    tag: "explicit"
+    arg: "0.027094807475805283" arg: "0.08994408696889877" arg: "0.048134010285139084" arg: "-0.24551978707313538" arg: "0.016918446868658066" arg: "0.0765792727470398" arg: "-0.0031757261604070663" arg: "0.1118675172328949" arg: "-0.0806640088558197" arg: "0.003836719784885645" arg: "-0.02241756208240986" arg: "0.1585727483034134" arg: "0.07568418234586716" arg: "-0.008664635010063648" arg: "-0.0036717928014695644" arg: "-0.036391645669937134" arg: "-0.012257440015673637" arg: "0.05013420805335045" arg: "-0.014501656405627728" arg: "0.22225865721702576"
+  }
+}
+operand {
+  name: "output_gate_bias"
+  type: FLOAT32
+  shape { dim: 20 }
+  filler {
+    tag: "explicit"
+    arg: "0.2127157747745514" arg: "0.3538936972618103" arg: "0.283548504114151" arg: "1.0181398391723633" arg: "0.40145981311798096" arg: "0.27438417077064514" arg: "0.2998640537261963" arg: "0.5031589865684509" arg: "0.0011858611833304167" arg: "0.5359497666358948" arg: "0.5380197763442993" arg: "0.7726592421531677" arg: "0.27104392647743225" arg: "0.4670105576515198" arg: "0.47913044691085815" arg: "0.4600663185119629" arg: "0.3923473060131073" arg: "-0.03211608901619911" arg: "0.6604049205780029" arg: "0.2065485268831253"
+  }
+}
+operand {
+  name: "activation_state"
+  type: FLOAT32
+  shape { dim: 1 dim: 20 }
+  filler {
+    tag: "explicit"
+  }
+}
+operand {
+  name: "cell_state"
+  type: FLOAT32
+  shape { dim: 1 dim: 20 }
+  filler {
+    tag: "explicit"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 28 dim: 20 }
+}
+operation {
+  type: "UnidirectionalSequenceLSTM"
+  unidirectional_sequence_lstm_options {
+    activation: TANH
+    cell_clip: 10.0
+    proj_clip: 0.0
+    time_major: false
+    asymmetric_quantize_inputs: false
+  }
+  input: "ifm"
+  input: "input_to_input_weights"
+  input: "input_to_forget_weights"
+  input: "input_to_cell_weights"
+  input: "input_to_output_weights"
+  input: "recurrent_to_input_weights"
+  input: "recurrent_to_forget_weights"
+  input: "recurrent_to_cell_weights"
+  input: "recurrent_to_output_weights"
+  input: ""
+  input: ""
+  input: ""
+  input: "input_gate_bias"
+  input: "forget_gate_bias"
+  input: "cell_gate_bias"
+  input: "output_gate_bias"
+  input: ""
+  input: ""
+  input: "activation_state"
+  input: "cell_state"
+  input: ""
+  input: ""
+  input: ""
+  input: ""
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_001/test.reverse b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.recipe b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.recipe
new file mode 100644
index 000000000..687f4f5ac
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.recipe
@@ -0,0 +1,236 @@
+#
+# NOTE generated by tflchef-reverse with res/TensorFlowPythonExamples/examples/LSTM_retseq
+#
+operand {
+  name: "serving_default_input_1:0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+  }
+}
+operand {
+  name: "sequential/lstm/zeros"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+  }
+  is_variable: true
+}
+operand {
+  name: "arith.constant"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.630624"
+    arg: "0.0173528"
+    arg: "0.386502"
+    arg: "0.274398"
+  }
+}
+operand {
+  name: "arith.constant1"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.215122"
+    arg: "0.0211586"
+    arg: "0.374135"
+    arg: "0.123864"
+  }
+}
+operand {
+  name: "arith.constant2"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.395891"
+    arg: "-0.516027"
+    arg: "0.311454"
+    arg: "0.423152"
+  }
+}
+operand {
+  name: "arith.constant3"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.107339"
+    arg: "0.408966"
+    arg: "0.0376898"
+    arg: "-0.544077"
+  }
+}
+operand {
+  name: "arith.constant4"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "0"
+  }
+}
+operand {
+  name: "arith.constant5"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "1"
+  }
+}
+operand {
+  name: "arith.constant6"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.229282"
+    arg: "-0.0678827"
+    arg: "0.449137"
+    arg: "0.470665"
+    arg: "-0.563606"
+    arg: "-0.290711"
+    arg: "0.343602"
+    arg: "-0.427935"
+  }
+}
+operand {
+  name: "arith.constant7"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.443107"
+    arg: "-0.504989"
+    arg: "-0.0738791"
+    arg: "-0.538787"
+    arg: "0.440037"
+    arg: "0.268466"
+    arg: "0.0149825"
+    arg: "-0.42883"
+  }
+}
+operand {
+  name: "arith.constant8"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.523419"
+    arg: "-0.131416"
+    arg: "-0.328037"
+    arg: "-0.636753"
+    arg: "-0.0726868"
+    arg: "-0.347395"
+    arg: "0.390772"
+    arg: "0.467617"
+  }
+}
+operand {
+  name: "arith.constant9"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.184187"
+    arg: "-0.636662"
+    arg: "0.363794"
+    arg: "0.428437"
+    arg: "-0.431681"
+    arg: "-0.617431"
+    arg: "0.53586"
+    arg: "0.686365"
+  }
+}
+operand {
+  name: "sequential/lstm/zeros1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+  }
+  is_variable: true
+}
+operand {
+  name: "StatefulPartitionedCall:0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 2
+  }
+}
+operation {
+  type: "UnidirectionalSequenceLSTM"
+  input: "serving_default_input_1:0"
+  input: "arith.constant9"
+  input: "arith.constant8"
+  input: "arith.constant7"
+  input: "arith.constant6"
+  input: "arith.constant3"
+  input: "arith.constant2"
+  input: "arith.constant1"
+  input: "arith.constant"
+  input: ""
+  input: ""
+  input: ""
+  input: "arith.constant4"
+  input: "arith.constant5"
+  input: "arith.constant4"
+  input: "arith.constant4"
+  input: ""
+  input: ""
+  input: "sequential/lstm/zeros"
+  input: "sequential/lstm/zeros1"
+  input: ""
+  input: ""
+  input: ""
+  input: ""
+  output: "StatefulPartitionedCall:0"
+  unidirectional_sequence_lstm_options {
+    activation: TANH
+    cell_clip: 10
+    proj_clip: 0
+    time_major: false
+    asymmetric_quantize_inputs: false
+  }
+}
+input: "serving_default_input_1:0"
+output: "StatefulPartitionedCall:0"
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.reverse b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.reverse
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.recipe b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.recipe
new file mode 100644
index 000000000..3e4f6f1b5
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.recipe
@@ -0,0 +1,193 @@
+operand {
+  name: "serving_default_input_16:0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+  }
+  is_variable: false
+}
+operand {
+  name: "sequential_15/lstm_15/zeros"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  is_variable: true
+}
+operand {
+  name: "arith.constant"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.960517"
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.182756"
+  }
+}
+operand {
+  name: "arith.constant2"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.028718"
+  }
+}
+operand {
+  name: "arith.constant3"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.207806"
+  }
+}
+operand {
+  name: "arith.constant4"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+  }
+}
+operand {
+  name: "arith.constant5"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+  }
+}
+operand {
+  name: "arith.constant6"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.476033"
+  }
+}
+operand {
+  name: "arith.constant7"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.152916"
+  }
+}
+operand {
+  name: "arith.constant8"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.308059"
+  }
+}
+operand {
+  name: "arith.constant9"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.329067"
+  }
+}
+operand {
+  name: "sequential_15/lstm_15/zeros1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  is_variable: true
+}
+operand {
+  name: "StatefulPartitionedCall:0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+  }
+}
+operation {
+  type: "UnidirectionalSequenceLSTM"
+  input: "serving_default_input_16:0"
+  input: "arith.constant9"
+  input: "arith.constant8"
+  input: "arith.constant7"
+  input: "arith.constant6"
+  input: "arith.constant3"
+  input: "arith.constant2"
+  input: "arith.constant1"
+  input: "arith.constant"
+  input: ""
+  input: ""
+  input: ""
+  input: "arith.constant4"
+  input: "arith.constant5"
+  input: "arith.constant4"
+  input: "arith.constant4"
+  input: ""
+  input: ""
+  input: "sequential_15/lstm_15/zeros"
+  input: "sequential_15/lstm_15/zeros1"
+  input: ""
+  input: ""
+  input: ""
+  input: ""
+  output: "StatefulPartitionedCall:0"
+  unidirectional_sequence_lstm_options {
+    activation: TANH
+    cell_clip: 10
+    proj_clip: 0
+    time_major: false
+    asymmetric_quantize_inputs: false
+  }
+}
+input: "serving_default_input_16:0"
+output: "StatefulPartitionedCall:0"
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.rule b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.rule
new file mode 100644
index 000000000..2bda75c30
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.rule
@@ -0,0 +1,7 @@
+# To check if Unroll of UnidriectionalSequenceLSTM works
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_UNIDIRSEQLSTM"        $(op_count UNIDIRECTIONAL_SEQUENCE_LSTM) '=' 0
+RULE    "YES_LOGISTICS"           $(op_count LOGISTICS) '=' 3
+RULE    "YES_MUL"                 $(op_count MUL) '=' 3
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.recipe b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.recipe
new file mode 100644
index 000000000..47e6437df
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.recipe
@@ -0,0 +1,425 @@
+operand {
+  name: "serving_default_input_48:0"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 3
+    dim: 4
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "sequential_47/lstm_46/zeros"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: true
+}
+operand {
+  name: "arith.constant"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.182069"
+    arg: "0.158518"
+    arg: "-0.249876"
+    arg: "-0.223681"
+    arg: "-0.0251322"
+    arg: "-0.234799"
+    arg: "0.0315703"
+    arg: "0.0713779"
+    arg: "-0.398819"
+    arg: "-0.331811"
+    arg: "-0.24586"
+    arg: "-0.034448"
+    arg: "-0.187116"
+    arg: "-0.224618"
+    arg: "0.280953"
+    arg: "-0.0503904"
+    arg: "0.0335912"
+    arg: "0.34419"
+    arg: "0.0784627"
+    arg: "0.246556"
+    arg: "-0.446514"
+    arg: "0.175145"
+    arg: "0.494241"
+    arg: "0.120458"
+    arg: "0.106793"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant1"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.409107"
+    arg: "-0.406787"
+    arg: "0.111563"
+    arg: "-0.194133"
+    arg: "-0.229023"
+    arg: "0.287904"
+    arg: "-0.344601"
+    arg: "0.0946776"
+    arg: "-0.198879"
+    arg: "0.532953"
+    arg: "0.105883"
+    arg: "0.113309"
+    arg: "-0.100015"
+    arg: "0.262142"
+    arg: "-0.223262"
+    arg: "-0.00894637"
+    arg: "-0.0819539"
+    arg: "0.195495"
+    arg: "-0.291116"
+    arg: "-0.0707405"
+    arg: "0.274591"
+    arg: "0.313034"
+    arg: "0.396099"
+    arg: "-0.186455"
+    arg: "0.0721643"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant2"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.0879868"
+    arg: "0.20888"
+    arg: "0.0121427"
+    arg: "-0.537515"
+    arg: "-0.20519"
+    arg: "-0.0189587"
+    arg: "0.269877"
+    arg: "-0.182624"
+    arg: "-0.0591339"
+    arg: "0.0318922"
+    arg: "-0.227111"
+    arg: "-0.149458"
+    arg: "-0.172937"
+    arg: "0.0187907"
+    arg: "0.0670664"
+    arg: "-0.121135"
+    arg: "-0.058337"
+    arg: "-0.0598793"
+    arg: "-0.362267"
+    arg: "0.0774832"
+    arg: "0.199173"
+    arg: "-0.0380472"
+    arg: "0.107854"
+    arg: "0.0658764"
+    arg: "0.0537086"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant3"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.017441"
+    arg: "0.272052"
+    arg: "-0.00516871"
+    arg: "-0.0291451"
+    arg: "0.0884765"
+    arg: "0.0531231"
+    arg: "0.0352237"
+    arg: "-0.00947183"
+    arg: "0.00681541"
+    arg: "-0.000782808"
+    arg: "0.201295"
+    arg: "0.26533"
+    arg: "-0.436603"
+    arg: "-0.0725246"
+    arg: "0.390646"
+    arg: "-0.393321"
+    arg: "-0.447548"
+    arg: "-0.021616"
+    arg: "-0.0852413"
+    arg: "0.143229"
+    arg: "0.0062271"
+    arg: "0.222503"
+    arg: "0.195852"
+    arg: "-0.112013"
+    arg: "0.322707"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant4"
+  type: FLOAT32
+  shape {
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant5"
+  type: FLOAT32
+  shape {
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant6"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.110842"
+    arg: "0.362487"
+    arg: "-0.193648"
+    arg: "0.0795254"
+    arg: "-0.154508"
+    arg: "0.0420029"
+    arg: "-0.320009"
+    arg: "-0.299519"
+    arg: "0.0381875"
+    arg: "-0.439949"
+    arg: "-0.290634"
+    arg: "0.0254151"
+    arg: "-0.138734"
+    arg: "0.328987"
+    arg: "0.449845"
+    arg: "0.0656276"
+    arg: "0.0410624"
+    arg: "-0.35757"
+    arg: "0.234629"
+    arg: "-0.310387"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant7"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.418478"
+    arg: "-0.197303"
+    arg: "-0.0769891"
+    arg: "-0.352671"
+    arg: "-0.27675"
+    arg: "-0.221081"
+    arg: "-0.238606"
+    arg: "-0.0518556"
+    arg: "-0.470707"
+    arg: "0.162187"
+    arg: "-0.0575043"
+    arg: "-0.194339"
+    arg: "0.0110147"
+    arg: "-0.0778302"
+    arg: "0.0032438"
+    arg: "0.305049"
+    arg: "0.353269"
+    arg: "-0.257547"
+    arg: "-0.472484"
+    arg: "-0.0296589"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant8"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.233865"
+    arg: "0.260391"
+    arg: "0.343597"
+    arg: "0.403272"
+    arg: "-0.0299743"
+    arg: "-0.137641"
+    arg: "0.13583"
+    arg: "0.212403"
+    arg: "0.0147645"
+    arg: "-0.382367"
+    arg: "-0.368439"
+    arg: "0.260765"
+    arg: "-0.0455869"
+    arg: "0.329342"
+    arg: "-0.216915"
+    arg: "-0.441979"
+    arg: "0.147086"
+    arg: "0.131922"
+    arg: "-0.44475"
+    arg: "0.0715657"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant9"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.0868829"
+    arg: "0.127576"
+    arg: "-0.48598"
+    arg: "0.32627"
+    arg: "0.360762"
+    arg: "-0.235853"
+    arg: "-0.223454"
+    arg: "0.265532"
+    arg: "-0.163921"
+    arg: "0.130234"
+    arg: "0.411861"
+    arg: "-0.0193611"
+    arg: "0.165723"
+    arg: "0.326238"
+    arg: "0.119351"
+    arg: "-0.0257632"
+    arg: "0.455063"
+    arg: "-0.0131663"
+    arg: "-0.157016"
+    arg: "0.482517"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "sequential_47/lstm_46/zeros1"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: true
+}
+operand {
+  name: "StatefulPartitionedCall:0"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 3
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "UnidirectionalSequenceLSTM"
+  input: "serving_default_input_48:0"
+  input: "arith.constant9"
+  input: "arith.constant8"
+  input: "arith.constant7"
+  input: "arith.constant6"
+  input: "arith.constant3"
+  input: "arith.constant2"
+  input: "arith.constant1"
+  input: "arith.constant"
+  input: ""
+  input: ""
+  input: ""
+  input: "arith.constant4"
+  input: "arith.constant5"
+  input: "arith.constant4"
+  input: "arith.constant4"
+  input: ""
+  input: ""
+  input: "sequential_47/lstm_46/zeros"
+  input: "sequential_47/lstm_46/zeros1"
+  input: ""
+  input: ""
+  input: ""
+  input: ""
+  output: "StatefulPartitionedCall:0"
+  unidirectional_sequence_lstm_options {
+    activation: TANH
+    cell_clip: 10
+    proj_clip: 0
+    time_major: false
+    asymmetric_quantize_inputs: false
+  }
+}
+input: "serving_default_input_48:0"
+output: "StatefulPartitionedCall:0"
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.rule b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.rule
new file mode 100644
index 000000000..ccc7febf9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.rule
@@ -0,0 +1,6 @@
+# To check if Unroll of UnidriectionalSequenceLSTM works
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_UNIDIRSEQLSTM"        $(op_count UNIDIRECTIONAL_SEQUENCE_LSTM) '=' 0
+RULE    "YES_FC"                  $(op_count FULLY_CONNECTED) '=' 5
diff --git a/res/TensorFlowLiteRecipes/Unique_000/test.recipe b/res/TensorFlowLiteRecipes/Unique_000/test.recipe
index 3110b5ed9..887380c48 100644
--- a/res/TensorFlowLiteRecipes/Unique_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_000/test.recipe
@@ -6,7 +6,7 @@ operand {
 operand {
   name: "ofm"
   type: FLOAT32
-  shape { dim: 0 }
+  shape { }
 }
 operand {
   name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_001/test.recipe b/res/TensorFlowLiteRecipes/Unique_001/test.recipe
index d654f79b9..9beb51690 100644
--- a/res/TensorFlowLiteRecipes/Unique_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_001/test.recipe
@@ -6,7 +6,7 @@ operand {
 operand {
   name: "ofm"
   type: FLOAT32
-  shape { dim: 0 }
+  shape { }
 }
 operand {
   name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_002/test.recipe b/res/TensorFlowLiteRecipes/Unique_002/test.recipe
index d9f2393b8..67b947ff8 100644
--- a/res/TensorFlowLiteRecipes/Unique_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_002/test.recipe
@@ -6,7 +6,7 @@ operand {
 operand {
   name: "ofm"
   type: INT32
-  shape { dim: 0 }
+  shape { }
 }
 operand {
   name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_003/test.recipe b/res/TensorFlowLiteRecipes/Unique_003/test.recipe
index de9e87af9..375db66e8 100644
--- a/res/TensorFlowLiteRecipes/Unique_003/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_003/test.recipe
@@ -6,7 +6,7 @@ operand {
 operand {
   name: "ofm"
   type: INT32
-  shape { dim: 0 }
+  shape { }
 }
 operand {
   name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
index 3906d2c5e..d3985e401 100644
--- a/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
@@ -7,7 +7,7 @@ operand {
 operand {
   name: "ofm"
   type: UINT8
-  shape { dim: 0 }
+  shape { }
 }
 operand {
   name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe b/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
index 2bac10ae7..b08dd85cc 100644
--- a/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
@@ -7,7 +7,7 @@ operand {
 operand {
   name: "ofm"
   type: UINT8
-  shape { dim: 0 }
+  shape { }
 }
 operand {
   name: "ofm_idx"
diff --git a/res/TensorFlowLiteSchema/2.10.1/schema.fbs b/res/TensorFlowLiteSchema/2.10.1/schema.fbs
new file mode 100644
index 000000000..2c9b0a84d
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.10.1/schema.fbs
@@ -0,0 +1,1306 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+//             version 3 and 3a.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+  UINT16 = 16
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+  DYNAMIC_UPDATE_SLICE = 151,
+  RELU_0_TO_1 = 152,
+  UNSORTED_SEGMENT_PROD = 153,
+  UNSORTED_SEGMENT_MAX = 154,
+  UNSORTED_SEGMENT_SUM = 155,
+  ATAN2 = 156
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+  DynamicUpdateSliceOptions,
+  UnsortedSegmentProdOptions,
+  UnsortedSegmentMaxOptions,
+  UnsortedSegmentSumOptions,
+  ATan2Options
+}
+
+// LINT.IfChange
+enum Padding : byte { SAME, VALID }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// LINT.IfChange
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+// LINT.IfChange
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+// LINT.IfChange
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+// LINT.IfChange
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+table DynamicUpdateSliceOptions {
+}
+
+table UnsortedSegmentProdOptions {
+}
+
+table UnsortedSegmentMaxOptions {
+}
+
+table UnsortedSegmentSumOptions {
+}
+
+table ATan2Options {
+}
+
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/2.12.1/schema.fbs b/res/TensorFlowLiteSchema/2.12.1/schema.fbs
new file mode 100644
index 000000000..05a906db0
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.12.1/schema.fbs
@@ -0,0 +1,1340 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+//             version 3 and 3a.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+  UINT16 = 16,
+  INT4 = 17,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+// The nested tensor type for VARIANT type.
+table VariantSubType {
+  // The tensor shape.
+  shape:[int];
+  type:TensorType;
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+
+  // The nested Tensor types for VARIANT type. This is always empty for
+  // non-VARIANT types. This is optional because the nested type can be omitted.
+  // Currently only 1 subtype is supported. The field is defined as an array for
+  // flexibility of supporting multiple subtypes in the future.
+  variant_tensors:[VariantSubType];
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+  DYNAMIC_UPDATE_SLICE = 151,
+  RELU_0_TO_1 = 152,
+  UNSORTED_SEGMENT_PROD = 153,
+  UNSORTED_SEGMENT_MAX = 154,
+  UNSORTED_SEGMENT_SUM = 155,
+  ATAN2 = 156,
+  UNSORTED_SEGMENT_MIN = 157,
+  SIGN = 158
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+  DynamicUpdateSliceOptions,
+  UnsortedSegmentProdOptions,
+  UnsortedSegmentMaxOptions,
+  UnsortedSegmentMinOptions,
+  UnsortedSegmentSumOptions,
+  ATan2Options,
+  SignOptions
+}
+
+// LINT.IfChange
+enum Padding : byte { SAME, VALID }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// LINT.IfChange
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+// LINT.IfChange
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+// LINT.IfChange
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 3.
+  asymmetric_quantize_inputs:bool;
+
+  // Parameter for unidirectional sequence RNN version 4.
+  diagonal_recurrent_tensors:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  // Parameters supported by version 1, 2, 3:
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+
+  // Parameters supported by version 4:
+  fused_activation_function:ActivationFunctionType = NONE;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+// LINT.IfChange
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+table DynamicUpdateSliceOptions {
+}
+
+table UnsortedSegmentProdOptions {
+}
+
+table UnsortedSegmentMaxOptions {
+}
+
+table UnsortedSegmentSumOptions {
+}
+
+table ATan2Options {
+}
+
+table UnsortedSegmentMinOptions{
+}
+
+table SignOptions {
+}
+
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/2.6.0/schema.fbs b/res/TensorFlowLiteSchema/2.6.0/schema.fbs
new file mode 100644
index 000000000..6fc51f838
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.6.0/schema.fbs
@@ -0,0 +1,1240 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Exported method name for this signature.
+  method_name:string;
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  key:string;
+
+  // Subgraph index of the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/2.7.0/schema.fbs b/res/TensorFlowLiteSchema/2.7.0/schema.fbs
new file mode 100644
index 000000000..3e0b999f5
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.7.0/schema.fbs
@@ -0,0 +1,1250 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+//             version 3 and 3a.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: int;
+  seed2: int;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/2.8.0/schema.fbs b/res/TensorFlowLiteSchema/2.8.0/schema.fbs
new file mode 100644
index 000000000..af55a262f
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.8.0/schema.fbs
@@ -0,0 +1,1264 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+//             version 3 and 3a.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/SCHEMA.lst b/res/TensorFlowLiteSchema/SCHEMA.lst
index 73dfacd7b..d7308eeea 100644
--- a/res/TensorFlowLiteSchema/SCHEMA.lst
+++ b/res/TensorFlowLiteSchema/SCHEMA.lst
@@ -6,3 +6,5 @@ VERSION,URL
 2.2.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.2.0/tensorflow/lite/schema/schema.fbs
 2.3.0-rc0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0-rc0/tensorflow/lite/schema/schema.fbs
 2.3.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0/tensorflow/lite/schema/schema.fbs
+2.6.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.6.0/tensorflow/lite/schema/schema.fbs
+2.10.1,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.10.1/tensorflow/lite/schema/schema.fbs
diff --git a/res/TensorFlowPythonExamples/.gitignore b/res/TensorFlowPythonExamples/.gitignore
index bee8a64b7..9410ad2e0 100644
--- a/res/TensorFlowPythonExamples/.gitignore
+++ b/res/TensorFlowPythonExamples/.gitignore
@@ -1 +1,2 @@
 __pycache__
+output/
diff --git a/res/TensorFlowPythonExamples/README.md b/res/TensorFlowPythonExamples/README.md
index faa0d53ba..eb0395d10 100644
--- a/res/TensorFlowPythonExamples/README.md
+++ b/res/TensorFlowPythonExamples/README.md
@@ -2,8 +2,9 @@
 
 ## Prerequisite
 
-- Python 3.X
-- TensorFlow 1.15
+- Python 3.8
+- TensorFlow 2.8.0
+- NOTE some examples may use old versions
 
 ## Directory Layout
 
@@ -20,7 +21,14 @@ Follow python API name
 
 ## HOWTO: Create a Python environment
 
-TBA
+Install release debian packages in https://github.com/Samsung/ONE/releases
+and enter virtual environment.
+```
+source /usr/share/one/bin/venv/bin/activate
+```
+You may have to prepare for the first time. Read [how-to-prepare-virtualenv.txt]
+(https://github.com/Samsung/ONE/blob/master/compiler/one-cmds/how-to-prepare-virtualenv.txt)
+for more information.
 
 ## HOWTO: Generate a pbtxt from examples
 
diff --git a/res/TensorFlowPythonExamples/examples/AddV2/__init__.py b/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
index 8114c50b1..0cfa27e43 100644
--- a/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.raw_ops.AddV2(x=lhs_, y=rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py b/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
index b9f7a1cc1..5f851cb21 100644
--- a/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 5, 4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.raw_ops.BatchMatMulV2(x=lhs_, y=rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
new file mode 100644
index 000000000..b4f0297a3
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole")
+
+op_uni_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)
+op_bidi_ = tf.compat.v1.keras.layers.Bidirectional(op_uni_)(in_)
diff --git a/res/TensorFlowPythonExamples/examples/GRU_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/GRU_unroll/__init__.py
new file mode 100644
index 000000000..51cb893dc
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/GRU_unroll/__init__.py
@@ -0,0 +1,8 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.GRU(2, input_shape=shape, unroll=True))
diff --git a/res/TensorFlowPythonExamples/examples/LSTM_batsize/__init__.py b/res/TensorFlowPythonExamples/examples/LSTM_batsize/__init__.py
new file mode 100644
index 000000000..dfcd46919
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/LSTM_batsize/__init__.py
@@ -0,0 +1,10 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289766546
diff --git a/res/TensorFlowPythonExamples/examples/LSTM_retseq/__init__.py b/res/TensorFlowPythonExamples/examples/LSTM_retseq/__init__.py
new file mode 100644
index 000000000..2748c8096
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/LSTM_retseq/__init__.py
@@ -0,0 +1,10 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape, return_sequences=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289768739
diff --git a/res/TensorFlowPythonExamples/examples/LSTM_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/LSTM_unroll/__init__.py
new file mode 100644
index 000000000..d21c2b54c
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/LSTM_unroll/__init__.py
@@ -0,0 +1,10 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1288436802
diff --git a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
index 99940bf85..995efd5ee 100644
--- a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 input_ = tf.compat.v1.placeholder(shape=[1, 1, 1, 1], dtype=tf.float32)
 paddings_ = tf.compat.v1.constant(
     np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.int32))
diff --git a/res/TensorFlowPythonExamples/examples/RNN_GRUCell_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/RNN_GRUCell_unroll/__init__.py
new file mode 100644
index 000000000..f2d503693
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/RNN_GRUCell_unroll/__init__.py
@@ -0,0 +1,9 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+grucell = keras.layers.GRUCell(2)
+model.add(keras.layers.RNN(grucell, input_shape=shape, unroll=True))
diff --git a/res/TensorFlowPythonExamples/examples/RNN_LSTMCell_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/RNN_LSTMCell_unroll/__init__.py
new file mode 100644
index 000000000..927f6b644
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/RNN_LSTMCell_unroll/__init__.py
@@ -0,0 +1,11 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+lstmcell = keras.layers.LSTMCell(2)
+model.add(keras.layers.RNN(lstmcell, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289820894
diff --git a/res/TensorFlowPythonExamples/examples/SimpleRNN_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/SimpleRNN_unroll/__init__.py
new file mode 100644
index 000000000..8219e6be5
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/SimpleRNN_unroll/__init__.py
@@ -0,0 +1,10 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.SimpleRNN(2, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289811569
diff --git a/res/TensorFlowPythonExamples/examples/abs/__init__.py b/res/TensorFlowPythonExamples/examples/abs/__init__.py
index fd5515595..83ac3cb33 100755
--- a/res/TensorFlowPythonExamples/examples/abs/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/abs/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 abs_ = tf.compat.v1.abs(in_)
diff --git a/res/TensorFlowPythonExamples/examples/add/__init__.py b/res/TensorFlowPythonExamples/examples/add/__init__.py
index 7e283f35f..39790a0e5 100755
--- a/res/TensorFlowPythonExamples/examples/add/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/add/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.add(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/add_n/__init__.py b/res/TensorFlowPythonExamples/examples/add_n/__init__.py
index afd068d0d..c8e23c940 100644
--- a/res/TensorFlowPythonExamples/examples/add_n/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/add_n/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in1_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 in2_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 in3_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/argmax/__init__.py b/res/TensorFlowPythonExamples/examples/argmax/__init__.py
index 059df97f9..b8791b46e 100755
--- a/res/TensorFlowPythonExamples/examples/argmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/argmax/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.math.argmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/argmin/__init__.py b/res/TensorFlowPythonExamples/examples/argmin/__init__.py
index f9a54627f..39f3278a5 100644
--- a/res/TensorFlowPythonExamples/examples/argmin/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/argmin/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.math.argmin(in_)
diff --git a/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py b/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py
new file mode 100644
index 000000000..c430749f3
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py
@@ -0,0 +1,10 @@
+import tensorflow as tf
+import numpy as np
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
+
+filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
+
+op_ = tf.compat.v1.nn.atrous_conv2d(in_, filters, 2, "VALID")
diff --git a/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py b/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
index a8ab0ddc4..814cf5787 100644
--- a/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 8, 8, 1), name="Hole")
 op_ = tf.compat.v1.nn.avg_pool2d(in_, (2, 2), 1, "VALID")
diff --git a/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py b/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
index e86555220..4a7787073 100644
--- a/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 mean = tf.compat.v1.constant([1., 2., 3.])
 variance = tf.compat.v1.constant([4., 5., 6.])
 offset = tf.compat.v1.constant([7., 8., 9.])
diff --git a/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py b/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
index 1dd08b0ee..9efa85c2d 100644
--- a/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[4, 1, 1, 1], name="Hole")
 cr_ = tf.constant([[0, 0], [0, 0]], name="Hole")
 op_ = tf.batch_to_space(in_, cr_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
index eb8a69bc3..72ffe10ae 100755
--- a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1, 2, 3), name="Hole")
 op_ = tf.nn.bias_add(in_, bias=[1.0, 1.0, -1.0], data_format="NHWC")
diff --git a/res/TensorFlowPythonExamples/examples/cast/__init__.py b/res/TensorFlowPythonExamples/examples/cast/__init__.py
index 4c0adc09f..5919e0de2 100644
--- a/res/TensorFlowPythonExamples/examples/cast/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cast/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 cast_ = tf.cast(in_, tf.int32)
diff --git a/res/TensorFlowPythonExamples/examples/ceil/__init__.py b/res/TensorFlowPythonExamples/examples/ceil/__init__.py
index 5178f8fe8..79737c8ab 100755
--- a/res/TensorFlowPythonExamples/examples/ceil/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/ceil/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.ceil(in_)
diff --git a/res/TensorFlowPythonExamples/examples/concat/__init__.py b/res/TensorFlowPythonExamples/examples/concat/__init__.py
index ec59b242f..c1c7b1aeb 100644
--- a/res/TensorFlowPythonExamples/examples/concat/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/concat/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in1_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole1")
 in2_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2, 4), name="Hole2")
 concat_ = tf.compat.v1.concat([in1_, in2_], axis=-2)
diff --git a/res/TensorFlowPythonExamples/examples/cond/__init__.py b/res/TensorFlowPythonExamples/examples/cond/__init__.py
index deafbb162..660ec9b84 100644
--- a/res/TensorFlowPythonExamples/examples/cond/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cond/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleX")
 y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleY")
 z_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleZ")
diff --git a/res/TensorFlowPythonExamples/examples/cond_1/__init__.py b/res/TensorFlowPythonExamples/examples/cond_1/__init__.py
new file mode 100644
index 000000000..da8809482
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/cond_1/__init__.py
@@ -0,0 +1,28 @@
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleX")
+y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleY")
+z_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleZ")
+
+
+def fn01(a, b):
+    return tf.math.multiply(a, b, name="Hole0M")
+
+
+def fn02(a, b):
+    return tf.math.add(a, b, name="Hole0A")
+
+
+def fn1(c, x, y, z):
+    return tf.cond(c, lambda: fn01(x, y), lambda: fn02(y, z), name="Cond0")
+
+
+def fn2(a, b):
+    return tf.math.add(a, b, name="HoleA")
+
+
+pr_ = tf.compat.v1.placeholder(tf.bool, shape=[], name="HoleC")
+op_ = tf.cond(pr_, lambda: fn1(pr_, x_, y_, z_), lambda: fn2(y_, z_), name="Cond")
+re_ = tf.identity(op_, name="HoleR")
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
index fa4f72f99..7cf8dee52 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
 
 filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
index 680bb36fd..812fef12b 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
 
 filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
index 17fd6e20a..cd317cee9 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 input_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 8, 8, 1), name="Hole")
 kernel_ = tf.compat.v1.placeholder(tf.float32, shape=(3, 3, 1, 1), name="Hole")
 op_ = tf.compat.v1.nn.conv2d_transpose(
diff --git a/res/TensorFlowPythonExamples/examples/cos/__init__.py b/res/TensorFlowPythonExamples/examples/cos/__init__.py
index cfce5d830..3271ddb96 100755
--- a/res/TensorFlowPythonExamples/examples/cos/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cos/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.cos(in_)
diff --git a/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py b/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
index 0cbc304fa..c11766ed0 100644
--- a/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 1, 1, 4], name="Hole")
 op_ = tf.nn.depth_to_space(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
index 7df1938cc..a9c8b33eb 100644
--- a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 4), name="Hole")
 
 filters = np.array(
diff --git a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
index 4800ebd82..8fbd0da49 100644
--- a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 4), name="Hole")
 
 filters = np.array(
diff --git a/res/TensorFlowPythonExamples/examples/div/__init__.py b/res/TensorFlowPythonExamples/examples/div/__init__.py
index 2887771ff..9acf9166b 100755
--- a/res/TensorFlowPythonExamples/examples/div/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/div/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.div(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/elu/__init__.py b/res/TensorFlowPythonExamples/examples/elu/__init__.py
index b41f65111..91c620927 100755
--- a/res/TensorFlowPythonExamples/examples/elu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/elu/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 elu_ = tf.compat.v1.nn.elu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/exp/__init__.py b/res/TensorFlowPythonExamples/examples/exp/__init__.py
index e83638436..5a7c88d8c 100644
--- a/res/TensorFlowPythonExamples/examples/exp/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/exp/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.exp(in_)
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
index ab6a87fc7..1f99c1107 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 # example 1 where input has all known dims and axis is const
 
 in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, 3), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
index 36c54753b..1b1626a32 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 # example 2 where input has unknown dim and axis is const
 
 in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(None, None), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
index 6304c2344..c73b0ba2f 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 # example 3 where input has all known dim and axis is not const
 
 in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, 3), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/fake_quant_with_min_max_vars/__init__.py b/res/TensorFlowPythonExamples/examples/fake_quant_with_min_max_vars/__init__.py
new file mode 100644
index 000000000..c4c928466
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/fake_quant_with_min_max_vars/__init__.py
@@ -0,0 +1,27 @@
+import tensorflow as tf
+import numpy as np
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
+
+filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
+strides = (1, 2, 2, 1)
+cv_ = tf.compat.v1.nn.conv2d(in_, filters, strides, "VALID", data_format="NHWC")
+
+op_ = tf.compat.v1.fake_quant_with_min_max_vars(cv_, 0.0, 1.0, 8, False)
+'''
+NOTE:
+'fake_quant_with_min_max_vars' is converted to QUANTIZE-DEQUANTIZE in tflite.
+To produce tflite with FAKE_QUANT Op, you need to change tf2tfliteV2.py with
+
+converter.experimental_new_converter = False
+
+and then run
+
+python3 ../../compiler/tf2tfliteV2/tf2tfliteV2.py --v2 --graph_def \
+-i ./fake_quant_with_min_max_vars.pbtxt \
+-o ./fake_quant_with_min_max_vars.tflite \
+-I Hole \
+-O FakeQuantWithMinMaxVars
+'''
diff --git a/res/TensorFlowPythonExamples/examples/fill/__init__.py b/res/TensorFlowPythonExamples/examples/fill/__init__.py
index f8413bb36..1c9d20476 100644
--- a/res/TensorFlowPythonExamples/examples/fill/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/fill/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(), name="Hole")
 op_ = tf.compat.v1.fill((3, 4), in_)
diff --git a/res/TensorFlowPythonExamples/examples/flatten/__init__.py b/res/TensorFlowPythonExamples/examples/flatten/__init__.py
new file mode 100644
index 000000000..3f135688e
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/flatten/__init__.py
@@ -0,0 +1,7 @@
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 3), name="Hole")
+
+op_ = tf.compat.v1.layers.flatten(in_)
diff --git a/res/TensorFlowPythonExamples/examples/floor/__init__.py b/res/TensorFlowPythonExamples/examples/floor/__init__.py
index 3b3f5bfc3..0357cee3b 100755
--- a/res/TensorFlowPythonExamples/examples/floor/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floor/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.floor(in_)
diff --git a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
index 34f413f2b..5714bf563 100755
--- a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.floordiv(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/floormod/__init__.py b/res/TensorFlowPythonExamples/examples/floormod/__init__.py
index c06e2a9ed..f4e1a5f33 100644
--- a/res/TensorFlowPythonExamples/examples/floormod/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floormod/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.floormod(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py b/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
index 5e13b0d82..628420c3b 100644
--- a/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 scale = tf.compat.v1.constant([1., 2., 3.])
 offset = tf.compat.v1.constant([4., 5., 6.])
 mean = tf.constant([1., 2., 3.])
diff --git a/res/TensorFlowPythonExamples/examples/gather/__init__.py b/res/TensorFlowPythonExamples/examples/gather/__init__.py
index 173be4a97..67b4d07fc 100644
--- a/res/TensorFlowPythonExamples/examples/gather/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/gather/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 param_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 2, 3, 4), name="Hole")
 indices_ = tf.constant([1, 2])
 op_ = tf.gather(param_, indices_, axis=2)
diff --git a/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py b/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
index 1ff11d568..8c0df3629 100644
--- a/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 param_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2, 2), name="Hole")
 indices_ = tf.constant([[0, 1], [1, 0]])
 op_ = tf.gather_nd(param_, indices_)
diff --git a/res/TensorFlowPythonExamples/examples/greater/__init__.py b/res/TensorFlowPythonExamples/examples/greater/__init__.py
index e88f57471..b8578e3b2 100755
--- a/res/TensorFlowPythonExamples/examples/greater/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/greater/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.greater(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
index b15fbd324..cf10e4d4e 100755
--- a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.greater_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/gru/__init__.py b/res/TensorFlowPythonExamples/examples/gru/__init__.py
new file mode 100755
index 000000000..0d4718937
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/gru/__init__.py
@@ -0,0 +1,14 @@
+import tensorflow as tf
+from tensorflow import keras
+
+tf.compat.v1.disable_eager_execution()
+
+model = keras.Sequential()
+shape = (4, 4)
+model.add(keras.layers.GRU(2, input_shape=shape))
+
+# Note that this code will generate pb model only with TF 1.x.x
+#
+# to save model in TF 2.x.x use
+# - to dump keras model: model.save("gru.h5")
+# - to dump saved model: tf.saved_model.save(model, "gru")
diff --git a/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py b/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py
new file mode 100644
index 000000000..62a774e4a
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py
@@ -0,0 +1,24 @@
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+sess = tf.Session()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 3), name="Hole")
+norm_ = tf.contrib.layers.instance_norm(in_)
+
+# we need to save checkpoint to freeze dropped model
+init = tf.initialize_all_variables()
+sess.run(init)
+
+saver = tf.train.Saver()
+saver.save(sess, './ckpt/instance_norm.ckpt')
+
+# use below command to freeze this model after running tfpem.py
+'''
+freeze_graph --input_graph instance_norm.pbtxt \
+--input_binary=false \
+--input_checkpoint=./ckpt/instance_norm.ckpt \
+--output_node_names=InstanceNorm/instancenorm/add_1 \
+--output_graph instance_norm_fr.pbtxt
+'''
diff --git a/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py b/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
index 0dda6bfc8..fe26e0684 100644
--- a/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 arg = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.math.l2_normalize(arg)
diff --git a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
index d595edbd0..c1899de56 100755
--- a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.leaky_relu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/less/__init__.py b/res/TensorFlowPythonExamples/examples/less/__init__.py
index 41ba18c62..6fee74aa5 100755
--- a/res/TensorFlowPythonExamples/examples/less/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/less/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.less(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
index d60bf2a73..fdca6490a 100755
--- a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.less_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py b/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
index eca6b2267..c358bd06e 100644
--- a/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 4, 20), name="Hole")
 op_ = tf.compat.v1.nn.lrn(x_, 5, 1.0, 1.0, 0.5)
diff --git a/res/TensorFlowPythonExamples/examples/log/__init__.py b/res/TensorFlowPythonExamples/examples/log/__init__.py
index cb206c058..d8787ef7d 100644
--- a/res/TensorFlowPythonExamples/examples/log/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.math.log(in_)
diff --git a/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py b/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
index 651888c71..a13f211c7 100644
--- a/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.log_softmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py b/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
index c3d458942..856ebd968 100644
--- a/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4, 5), name="Hole")
 op_ = tf.compat.v1.nn.log_softmax(in_, axis=1)
diff --git a/res/TensorFlowPythonExamples/examples/logical_and/__init__.py b/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
index f546fae9f..d0c4ea2ac 100755
--- a/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.logical_and(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
index f1bcc2c8f..532d5ff1f 100755
--- a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.logical_not(in_)
diff --git a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
index 991d61ab9..ce584eaf4 100755
--- a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.logical_or(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/lstm/__init__.py b/res/TensorFlowPythonExamples/examples/lstm/__init__.py
new file mode 100755
index 000000000..99ef3c27f
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/lstm/__init__.py
@@ -0,0 +1,14 @@
+import tensorflow as tf
+from tensorflow import keras
+
+tf.compat.v1.disable_eager_execution()
+
+model = keras.Sequential()
+shape = (4, 4)
+model.add(keras.layers.LSTM(2, input_shape=shape))
+
+# Note that this code will generate pb model only with TF 1.x.x
+#
+# to save model in TF 2.x.x use
+# - to dump keras model: model.save("lstm.h5")
+# - to dump saved model: tf.saved_model.save(model, "lstm")
diff --git a/res/TensorFlowPythonExamples/examples/matmul/__init__.py b/res/TensorFlowPythonExamples/examples/matmul/__init__.py
index 760241de7..6f049e50a 100755
--- a/res/TensorFlowPythonExamples/examples/matmul/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matmul/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 4), name="Hole")
 rhs_ = tf.compat.v1.constant(dtype=tf.float32, shape=(4, 4), name="Hole", value=1.0)
 op_ = tf.compat.v1.matmul(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
index 43d4d8754..a708f35c4 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.matrix_band_part(in_, 1, -1)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
index 384a29853..cd789eaca 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.matrix_diag(in_)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
index e8878f02f..55b869037 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
 diag_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
 op_ = tf.compat.v1.matrix_set_diag(in_, diag_)
diff --git a/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py b/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
index 487858cc5..78daa034c 100755
--- a/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 4, 1), name="Hole")
 op_ = tf.compat.v1.nn.max_pool_with_argmax(
     in_, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding="VALID")
diff --git a/res/TensorFlowPythonExamples/examples/maximum/__init__.py b/res/TensorFlowPythonExamples/examples/maximum/__init__.py
index a96fe03a7..0656ba4e6 100755
--- a/res/TensorFlowPythonExamples/examples/maximum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/maximum/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.maximum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/minimum/__init__.py b/res/TensorFlowPythonExamples/examples/minimum/__init__.py
index ef664dbf6..ebd795e38 100755
--- a/res/TensorFlowPythonExamples/examples/minimum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/minimum/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.minimum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/multiply/__init__.py b/res/TensorFlowPythonExamples/examples/multiply/__init__.py
index da8885660..68dff1e61 100755
--- a/res/TensorFlowPythonExamples/examples/multiply/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/multiply/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.multiply(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/negative/__init__.py b/res/TensorFlowPythonExamples/examples/negative/__init__.py
index 86713da7b..473dc9b97 100644
--- a/res/TensorFlowPythonExamples/examples/negative/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/negative/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 neg_ = tf.math.negative(in_)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
index b8f010c67..2598b531b 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 max_output_size = tf.compat.v1.constant(4)
 
 in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
index 42e7bf06c..932ad3534 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 max_output_size = tf.compat.v1.constant(6)
 iou_threshold = tf.compat.v1.constant(0.5)
 score_threshold = tf.compat.v1.constant(0.6)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
index 32c6173b0..c251b9271 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 max_output_size = tf.compat.v1.constant(4)
 
 in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
index 415f9209f..a7185c3ee 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 max_output_size = tf.compat.v1.constant(6)
 iou_threshold = tf.compat.v1.constant(0.5)
 score_threshold = tf.compat.v1.constant(0.6)
diff --git a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
index 95073fe4a..955eb1f9f 100755
--- a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.not_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/one_hot/__init__.py b/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
index 49e0346d3..b99bb9ca0 100644
--- a/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 indice_ = tf.compat.v1.placeholder(tf.int32, shape=(1, 2, 3, 4), name='Hole')
 depth_ = tf.compat.v1.placeholder(tf.int32, shape=(), name='Hole')
 on_value_ = tf.compat.v1.placeholder(tf.int32, shape=(), name='Hole')
diff --git a/res/TensorFlowPythonExamples/examples/pack/__init__.py b/res/TensorFlowPythonExamples/examples/pack/__init__.py
index 609bc9b76..4f1c46baa 100755
--- a/res/TensorFlowPythonExamples/examples/pack/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pack/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_1 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
 in_2 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
 op_ = tf.compat.v1.stack([in_1, in_2])
diff --git a/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py b/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
index dc877f119..a78e21571 100644
--- a/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
 paddings_ = tf.constant([[1, 1], [2, 2]], name="Hole")
 op_ = tf.pad(tensor_, paddings_, "REFLECT")
diff --git a/res/TensorFlowPythonExamples/examples/pad/__init__.py b/res/TensorFlowPythonExamples/examples/pad/__init__.py
index ac5cf81fa..7097b7592 100755
--- a/res/TensorFlowPythonExamples/examples/pad/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pad/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
 paddings_ = tf.compat.v1.constant([[1, 1], [2, 2]], name="Hole")
 op_ = tf.compat.v1.pad(tensor_, paddings_)
diff --git a/res/TensorFlowPythonExamples/examples/pow/__init__.py b/res/TensorFlowPythonExamples/examples/pow/__init__.py
index 960032a84..12a19f2b0 100755
--- a/res/TensorFlowPythonExamples/examples/pow/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pow/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.pow(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/prelu/__init__.py b/res/TensorFlowPythonExamples/examples/prelu/__init__.py
index b0e7c7b9d..7e43f5101 100644
--- a/res/TensorFlowPythonExamples/examples/prelu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/prelu/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 input_tensor = tf.compat.v1.placeholder(
     dtype=tf.float32, name="input", shape=[1, 4, 4, 3])
 prelu = tf.keras.layers.PReLU(shared_axes=[1, 2])
diff --git a/res/TensorFlowPythonExamples/examples/range/__init__.py b/res/TensorFlowPythonExamples/examples/range/__init__.py
index 0f032e9d1..9b57167b0 100644
--- a/res/TensorFlowPythonExamples/examples/range/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/range/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 # this modified example comes from TF API reference
 start = 1
 limit = 10
diff --git a/res/TensorFlowPythonExamples/examples/rank/__init__.py b/res/TensorFlowPythonExamples/examples/rank/__init__.py
index c9b970718..ab2bc79dc 100644
--- a/res/TensorFlowPythonExamples/examples/rank/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/rank/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4, 3, 3), name="Hole")
 rank_ = tf.compat.v1.rank(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
index eb9167f72..2fee752d4 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 input_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(2, 4), name="Hole")
 op_ = tf.compat.v1.reduce_all(input_, axis=1, keepdims=False)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
index f87c25166..0e87a0c6e 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(2, 2), name="Hole")
 op_ = tf.compat.v1.math.reduce_any(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
index 27e48df72..dc5e0d648 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
 op_ = tf.compat.v1.math.reduce_max(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
index b3cf0346a..fe81336d4 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
 op_ = tf.compat.v1.math.reduce_min(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
index 4d134ae32..9fe2ee295 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
 op_ = tf.compat.v1.math.reduce_prod(in_)
diff --git a/res/TensorFlowPythonExamples/examples/relu/__init__.py b/res/TensorFlowPythonExamples/examples/relu/__init__.py
index a144a1212..69e075332 100755
--- a/res/TensorFlowPythonExamples/examples/relu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/relu/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.relu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/relu6/__init__.py b/res/TensorFlowPythonExamples/examples/relu6/__init__.py
index f58ae7c2c..d581d3936 100755
--- a/res/TensorFlowPythonExamples/examples/relu6/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/relu6/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.relu6(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reshape/__init__.py b/res/TensorFlowPythonExamples/examples/reshape/__init__.py
index f451bacb9..c60c0a6d8 100644
--- a/res/TensorFlowPythonExamples/examples/reshape/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reshape/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.reshape(in_, shape=[2, 2, 2, 2])
diff --git a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
index 422bf1db5..773fc07c9 100755
--- a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole")
 op_ = tf.compat.v1.image.resize_bilinear(in_, [16, 16])
diff --git a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
index a14022948..3e688d328 100755
--- a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole")
 op_ = tf.compat.v1.image.resize_nearest_neighbor(in_, [16, 16])
diff --git a/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py b/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
index aebd4fc50..4b7a9cf26 100755
--- a/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 8), name="Hole")
 op_ = tf.compat.v1.reverse_sequence(in_, [7, 2, 3, 5], seq_axis=1, batch_axis=0)
diff --git a/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py b/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
index e6afc995c..0404cd660 100755
--- a/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4, 5), name="Hole")
 op_ = tf.compat.v1.reverse_v2(in_, [3, 2])
diff --git a/res/TensorFlowPythonExamples/examples/rnn/__init__.py b/res/TensorFlowPythonExamples/examples/rnn/__init__.py
new file mode 100755
index 000000000..9c1e69c2e
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/rnn/__init__.py
@@ -0,0 +1,14 @@
+import tensorflow as tf
+from tensorflow import keras
+
+tf.compat.v1.disable_eager_execution()
+
+model = keras.Sequential()
+shape = (4, 4)
+model.add(keras.layers.SimpleRNN(2, input_shape=shape))
+
+# Note that this code will generate pb model only with TF 1.x.x
+#
+# to save model in TF 2.x.x use
+# - to dump keras model: model.save("rnn.h5")
+# - to dump saved model: tf.saved_model.save(model, "rnn")
diff --git a/res/TensorFlowPythonExamples/examples/round/__init__.py b/res/TensorFlowPythonExamples/examples/round/__init__.py
index 9a00ad558..6cda033e2 100755
--- a/res/TensorFlowPythonExamples/examples/round/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/round/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.round(in_)
diff --git a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
index 90500bd11..dc81e48aa 100755
--- a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.rsqrt(in_)
diff --git a/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py b/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
index e094b5705..0158e3ca6 100644
--- a/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
@@ -2,6 +2,8 @@
 
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 indices = tf.compat.v1.constant([[0], [2]])
 updates = tf.compat.v1.constant([[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
                                  [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8,
diff --git a/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py b/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
index 24d15bb8b..c15746a66 100755
--- a/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4, 4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(4, ), name="Hole")
 op_ = tf.compat.v1.math.segment_sum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/shape/__init__.py b/res/TensorFlowPythonExamples/examples/shape/__init__.py
index 4c13a338f..b719eb9fc 100644
--- a/res/TensorFlowPythonExamples/examples/shape/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/shape/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 2, 3), name="Hole")
 op_ = tf.compat.v1.shape(in_)
diff --git a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
index 43328f2cb..1749071f0 100755
--- a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.sigmoid(in_)
diff --git a/res/TensorFlowPythonExamples/examples/sin/__init__.py b/res/TensorFlowPythonExamples/examples/sin/__init__.py
index 0bfdcffed..75ea73b85 100644
--- a/res/TensorFlowPythonExamples/examples/sin/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sin/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.sin(in_)
diff --git a/res/TensorFlowPythonExamples/examples/slice/__init__.py b/res/TensorFlowPythonExamples/examples/slice/__init__.py
index 45f9044d1..b734dc22b 100644
--- a/res/TensorFlowPythonExamples/examples/slice/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/slice/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 2, 3), name="Hole")
 op_ = tf.compat.v1.slice(in_, [1, 0, 0], [1, 1, 3])
diff --git a/res/TensorFlowPythonExamples/examples/softmax/__init__.py b/res/TensorFlowPythonExamples/examples/softmax/__init__.py
index 5b8d1cdfb..3c93e8a2b 100755
--- a/res/TensorFlowPythonExamples/examples/softmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/softmax/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.softmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
index e088012e9..b0e3d85ab 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
 pd_ = tf.constant([[0, 0], [0, 0]], name="Hole")
 op_ = tf.space_to_batch(in_, pd_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
index 760195063..892796b12 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
 bs_ = tf.constant([2, 2], name="Hole")
 pd_ = tf.constant([[0, 0], [0, 0]], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
index e9bc945bb..e146f6aa3 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
 op_ = tf.nn.space_to_depth(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py b/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
index 5fe0bc4d0..0ce8f0bdd 100644
--- a/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.sparse_placeholder(tf.float32, name="Hole")
 op_ = tf.compat.v1.sparse_tensor_to_dense(in_)
diff --git a/res/TensorFlowPythonExamples/examples/split/__init__.py b/res/TensorFlowPythonExamples/examples/split/__init__.py
index 4226f30de..11f542751 100644
--- a/res/TensorFlowPythonExamples/examples/split/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/split/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 3), name="Hole")
 op_ = tf.compat.v1.split(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/split_2/__init__.py b/res/TensorFlowPythonExamples/examples/split_2/__init__.py
index 03777df15..6212c6e81 100644
--- a/res/TensorFlowPythonExamples/examples/split_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/split_2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 3), name="Hole")
 op_ = tf.compat.v1.split(in_, [1, 2, 1])
diff --git a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
index 4aab5da9c..8e304e80c 100755
--- a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.sqrt(in_)
diff --git a/res/TensorFlowPythonExamples/examples/square/__init__.py b/res/TensorFlowPythonExamples/examples/square/__init__.py
index 2d03e9b89..f0c3e4410 100644
--- a/res/TensorFlowPythonExamples/examples/square/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/square/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.math.square(in_)
diff --git a/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py b/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
index baacf5622..6e86f843d 100755
--- a/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.squared_difference(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py b/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
index d054f01a2..ba2348c1e 100755
--- a/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 1, 4), name="Hole")
 op_ = tf.compat.v1.squeeze(in_)
diff --git a/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py b/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
index 5715bed0e..d6134589a 100755
--- a/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 1, 1), name="Hole")
 op_ = tf.compat.v1.squeeze(in_, (0, 2))
diff --git a/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py b/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
index 2d7234df2..a6fa99a75 100644
--- a/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 2, 3), name="Hole")
 op_ = tf.compat.v1.strided_slice(in_, [1, 0, 0], [2, 1, 3], [1, 1, 1])
diff --git a/res/TensorFlowPythonExamples/examples/subtract/__init__.py b/res/TensorFlowPythonExamples/examples/subtract/__init__.py
index feb11b12e..39cdbc3a2 100755
--- a/res/TensorFlowPythonExamples/examples/subtract/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/subtract/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.subtract(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/sum/__init__.py b/res/TensorFlowPythonExamples/examples/sum/__init__.py
index 69297d6a0..14e408ca0 100644
--- a/res/TensorFlowPythonExamples/examples/sum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sum/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 4), name="Hole")
 op_ = tf.compat.v1.reduce_sum(in_, -1, True)
diff --git a/res/TensorFlowPythonExamples/examples/tanh/__init__.py b/res/TensorFlowPythonExamples/examples/tanh/__init__.py
index dd202a78d..ccd37579a 100755
--- a/res/TensorFlowPythonExamples/examples/tanh/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/tanh/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.tanh(in_)
diff --git a/res/TensorFlowPythonExamples/examples/tile/__init__.py b/res/TensorFlowPythonExamples/examples/tile/__init__.py
index aad4e73dd..f5d4ef8e4 100755
--- a/res/TensorFlowPythonExamples/examples/tile/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/tile/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
 multiples_ = tf.compat.v1.constant([1, 2], name="Hole")
 op_ = tf.compat.v1.tile(in_, multiples_)
diff --git a/res/TensorFlowPythonExamples/examples/top_k/__init__.py b/res/TensorFlowPythonExamples/examples/top_k/__init__.py
index e7b823400..05c330630 100644
--- a/res/TensorFlowPythonExamples/examples/top_k/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/top_k/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4], name="Hole")
 op_ = tf.compat.v1.math.top_k(in_, k=1)
diff --git a/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py
new file mode 100644
index 000000000..3dde2b9c9
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py
@@ -0,0 +1,6 @@
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole")
+op_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)(in_)
diff --git a/res/TensorFlowPythonExamples/examples/unique/__init__.py b/res/TensorFlowPythonExamples/examples/unique/__init__.py
index ad65757d0..00e4f3caf 100644
--- a/res/TensorFlowPythonExamples/examples/unique/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/unique/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(6), name="Hole")
 op_ = tf.compat.v1.unique(in_)
diff --git a/res/TensorFlowPythonExamples/examples/unstack/__init__.py b/res/TensorFlowPythonExamples/examples/unstack/__init__.py
index e4ffa2119..2a178569f 100644
--- a/res/TensorFlowPythonExamples/examples/unstack/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/unstack/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4, 2, 3, 4], name="Hole")
 unpack_ = tf.compat.v1.unstack(in_, axis=0)
diff --git a/res/TensorFlowPythonExamples/examples/where/__init__.py b/res/TensorFlowPythonExamples/examples/where/__init__.py
index 69c89c8db..94b747259 100644
--- a/res/TensorFlowPythonExamples/examples/where/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
 where_ = tf.compat.v1.where(in_)
diff --git a/res/TensorFlowPythonExamples/examples/where_2/__init__.py b/res/TensorFlowPythonExamples/examples/where_2/__init__.py
index 78c50e0fe..19ad0f2f0 100644
--- a/res/TensorFlowPythonExamples/examples/where_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_b_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
 in_x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name="Hole")
 in_y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/where_v2/__init__.py b/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
index de87af72a..b6cc7de9e 100644
--- a/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
 where_v2_ = tf.compat.v1.where_v2(in_)
diff --git a/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py b/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
index 4ce17ca11..e3ffe03b7 100644
--- a/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_b_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[3], name="Hole")
 in_x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 1], name="Hole")
 in_y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, 3], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/while/__init__.py b/res/TensorFlowPythonExamples/examples/while/__init__.py
index fadaa73e2..15ff4eb65 100644
--- a/res/TensorFlowPythonExamples/examples/while/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 i = tf.compat.v1.constant(0, name="Hole")
 
 c = lambda i: tf.compat.v1.less(i, 10)
diff --git a/res/TensorFlowPythonExamples/examples/while_2/__init__.py b/res/TensorFlowPythonExamples/examples/while_2/__init__.py
index af1c74582..9e26639bf 100644
--- a/res/TensorFlowPythonExamples/examples/while_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 i = tf.constant(0, shape=[1, 0], dtype=tf.int32, name='i')
 x = tf.compat.v1.placeholder(shape=[1, 1], dtype=tf.int32, name='Hole')
 
diff --git a/res/TensorFlowPythonExamples/examples/while_3/__init__.py b/res/TensorFlowPythonExamples/examples/while_3/__init__.py
index 840846e7e..30ce15a1e 100644
--- a/res/TensorFlowPythonExamples/examples/while_3/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while_3/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 x = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole')
 i = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole_2')
 
diff --git a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
index 5230bbac6..16414cea2 100755
--- a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 16, 16, 3), name="Hole")
 op_ = tf.compat.v1.image.yuv_to_rgb(in_)
diff --git a/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py b/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
index 7daf85e84..d4080ec43 100644
--- a/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.zeros_like(in_)
diff --git a/res/TensorFlowPythonExamples/tfpem.py b/res/TensorFlowPythonExamples/tfpem.py
index dfcc20bd3..e8279f9ce 100755
--- a/res/TensorFlowPythonExamples/tfpem.py
+++ b/res/TensorFlowPythonExamples/tfpem.py
@@ -6,22 +6,45 @@ import tensorflow as tf
 import importlib
 import argparse
 
+from pathlib import Path
+from tensorflow import keras
+
 parser = argparse.ArgumentParser(description='Process TensorFlow Python Examples')
 
-parser.add_argument('--mode', metavar='MODE', choices=['pbtxt'], default='pbtxt')
 parser.add_argument('examples', metavar='EXAMPLES', nargs='+')
 
 args = parser.parse_args()
 
-if args.mode == 'pbtxt':
-    for example in args.examples:
-        print("Generate '" + example + ".pbtxt'")
+output_folder = "./output/"
+
+Path(output_folder).mkdir(parents=True, exist_ok=True)
+
+for example in args.examples:
+    print("Generate '" + example + ".pbtxt'")
+
+    tf.compat.v1.reset_default_graph()
+    # https://stackoverflow.com/questions/37808866/proper-way-to-dynamically-import-a-module-with-relative-imports
+    m = importlib.import_module("examples." + example)
+
+    with open(output_folder + example + ".pbtxt", "w") as f:
+        f.write(str(tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True)))
+
+    print("Generate '" + example + ".pbtxt' - Done")
 
-        tf.compat.v1.reset_default_graph()
-        # https://stackoverflow.com/questions/37808866/proper-way-to-dynamically-import-a-module-with-relative-imports
-        importlib.import_module("examples." + example)
+    # keras sequential?
+    if hasattr(m, 'model') and isinstance(m.model, keras.Sequential):
+        print("Generate '" + example + ".h5'")
+        m.model.save(output_folder + example + ".h5")
+        print("Generate '" + example + ".h5' - Done")
 
-        with open(example + ".pbtxt", "w") as f:
-            f.write(str(tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True)))
+        # tflite export for experiments
+        converter = tf.lite.TFLiteConverter.from_keras_model(m.model)
+        converter.allow_custom_ops = True
+        converter.experimental_new_converter = True
+        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
+        converter._experimental_lower_tensor_list_ops = False
 
-        print("Generate '" + example + ".pbtxt' - Done")
+        tflite_model = converter.convert()
+        with open(output_folder + example + ".tflite", "wb") as f:
+            f.write(tflite_model)
+        print("Generate '" + example + ".tflite' - Done")
diff --git a/res/TensorFlowPythonModels/examples/minimum-maximum/__init__.py b/res/TensorFlowPythonModels/examples/minimum-maximum/__init__.py
new file mode 100644
index 000000000..fe074b49c
--- /dev/null
+++ b/res/TensorFlowPythonModels/examples/minimum-maximum/__init__.py
@@ -0,0 +1,15 @@
+import tensorflow as tf
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 16, 160, 160), name="Hole")
+
+upper_ = tf.compat.v1.constant(6.)
+lower_ = tf.compat.v1.constant(0.)
+
+min_ = tf.compat.v1.minimum(in_, upper_)
+max_ = tf.compat.v1.maximum(min_, lower_)
+'''
+python ../../compiler/tf2tfliteV2/tf2tfliteV2.py --v1 \
+-i minimum-maximum.pbtxt \
+-o minimum-maximum.tflite \
+-I Hole -O Maximum
+'''
diff --git a/res/TensorFlowPythonModels/tfpem.py b/res/TensorFlowPythonModels/tfpem.py
index 01627eb99..542085bb6 100644..100755
--- a/res/TensorFlowPythonModels/tfpem.py
+++ b/res/TensorFlowPythonModels/tfpem.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # TensorFlow Python Example Manager
 
 import tensorflow as tf
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
index 69dfcc7b2..3d71f89aa 100644
--- a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
@@ -47,12 +47,11 @@ extern "C" {
     }                                                 \
   } while (0)
 #else // __TIZEN__
-#define LEVEL_TO_STR(level)   \
-  (((level) == ERROR)         \
-       ? "ERROR"              \
-       : ((level) == WARNING) \
-             ? "WARNING"      \
-             : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
+#define LEVEL_TO_STR(level)                  \
+  (((level) == ERROR) ? "ERROR"              \
+                      : ((level) == WARNING) \
+                          ? "WARNING"        \
+                          : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
 #define TFLITE_NATIVE_LOG(log_level, format, args...)      \
   do                                                       \
   {                                                        \
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
index b099ba9ba..2fb98cc93 100644
--- a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
@@ -26,7 +26,8 @@
 extern "C" {
 #endif /*__cplusplus*/
 
-typedef enum {
+typedef enum
+{
   /** 32-bit signed integer. */
   INT32 = 1,
 
diff --git a/runtime/contrib/android/api/Android.mk b/runtime/contrib/android/api/Android.mk
index a056eff9d..3c768cca5 100644
--- a/runtime/contrib/android/api/Android.mk
+++ b/runtime/contrib/android/api/Android.mk
@@ -4,7 +4,5 @@ include $(CLEAR_VARS)
 API_ROOT_PATH := $(LOCAL_PATH)
 PREBUILT_LIB :=
 
-include $(API_ROOT_PATH)/prebuilt/Android.mk
+include $(API_ROOT_PATH)/Prebuilt.mk
 include $(API_ROOT_PATH)/src/main/native/Android.mk
-
-#$(warning $(PREBUILT_LIB))
diff --git a/runtime/contrib/android/api/Prebuilt.mk b/runtime/contrib/android/api/Prebuilt.mk
new file mode 100644
index 000000000..63cf2bc7e
--- /dev/null
+++ b/runtime/contrib/android/api/Prebuilt.mk
@@ -0,0 +1,46 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+ifndef ONERT_PREBUILT_LIB_DIR
+$(error ONERT_PREBUILT_LIB_DIR is not set)
+endif
+
+# libnnfw
+include $(CLEAR_VARS)
+LOCAL_MODULE := nnfw-dev
+PREBUILT_LIB += nnfw-dev
+LOCAL_SRC_FILES := \
+		$(ONERT_PREBUILT_LIB_DIR)/libnnfw-dev.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# libonert_core
+include $(CLEAR_VARS)
+LOCAL_MODULE := onert_core
+PREBUILT_LIB += onert_core
+LOCAL_SRC_FILES := \
+		$(ONERT_PREBUILT_LIB_DIR)/libonert_core.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# backend_cpu
+include $(CLEAR_VARS)
+LOCAL_MODULE := backend_cpu
+PREBUILT_LIB += backend_cpu
+LOCAL_SRC_FILES := \
+		$(ONERT_PREBUILT_LIB_DIR)/libbackend_cpu.so
+include $(PREBUILT_SHARED_LIBRARY)
+
+# TODO Support backend acl
+# backend_acl
+ifeq ($(ONERT_CONTAINS_ACL), 1)
+	$(error containing acl backend doesn't supported yet)
+endif
+
+# backend_ext
+ifneq ($(ONERT_EXT_PREBUILT_LIB), )
+include $(CLEAR_VARS)
+LOCAL_MODULE := backend_ext
+PREBUILT_LIB += backend_ext
+LOCAL_SRC_FILES := \
+		$(ONERT_EXT_PREBUILT_LIB)
+include $(PREBUILT_SHARED_LIBRARY)
+endif
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index def89eeac..eed8f274e 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,11 +8,39 @@ android {
         minSdkVersion 26
         targetSdkVersion 29
         versionCode 1
-        versionName "1.9.0"
+        versionName "1.25.0"
 
         externalNativeBuild {
             ndkBuild {
-                arguments "ONERT_API_INC_DIR=${project.projectDir}/../../../onert/api/include"
+                def onert_header_dir
+                if (project.hasProperty('onertHeaderDir'))
+                    onert_header_dir = project.onertHeaderDir
+                else
+                    onert_header_dir = "${project.projectDir}/../../../onert/api/include"
+
+                def onert_lib_dir
+                if (project.hasProperty('onertLibDir'))
+                    onert_lib_dir = project.onertLibDir
+                else
+                    onert_lib_dir = "${project.projectDir}/../../../../Product/out/lib"
+
+                def onert_contains_acl
+                if (project.hasProperty('onertContainsAcl'))
+                    onert_contains_acl = 1
+                else
+                    onert_contains_acl = 0
+
+                def onert_ext_lib
+                if (project.hasProperty('onertExtLib'))
+                    onert_ext_lib = project.onertExtLib
+                else
+                    onert_ext_lib = ""
+
+                arguments "ONERT_API_INC_DIR=$onert_header_dir",
+                          "ONERT_PREBUILT_LIB_DIR=$onert_lib_dir",
+                          "ONERT_CONTAINS_ACL=$onert_contains_acl",
+                          "ONERT_EXT_PREBUILT_LIB=$onert_ext_lib"
+
                 abiFilters 'arm64-v8a'
             }
         }
diff --git a/runtime/contrib/android/api/prebuilt/Android.mk b/runtime/contrib/android/api/prebuilt/Android.mk
deleted file mode 100644
index e8a9f0755..000000000
--- a/runtime/contrib/android/api/prebuilt/Android.mk
+++ /dev/null
@@ -1,9 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-PREBUILT_PATH := $(LOCAL_PATH)
-include $(PREBUILT_PATH)/backend_cpu/Android.mk
-include $(PREBUILT_PATH)/circle_loader/Android.mk
-include $(PREBUILT_PATH)/nnfw-dev/Android.mk
-include $(PREBUILT_PATH)/onert_core/Android.mk
-include $(PREBUILT_PATH)/tensorflowlite_jni/Android.mk
-include $(PREBUILT_PATH)/tflite_loader/Android.mk
diff --git a/runtime/contrib/android/api/prebuilt/backend_cpu/Android.mk b/runtime/contrib/android/api/prebuilt/backend_cpu/Android.mk
deleted file mode 100644
index ccda9ea90..000000000
--- a/runtime/contrib/android/api/prebuilt/backend_cpu/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := backend_cpu
-PREBUILT_LIB += backend_cpu
-LOCAL_SRC_FILES := \
-		libbackend_cpu.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/backend_cpu/libbackend_cpu.so b/runtime/contrib/android/api/prebuilt/backend_cpu/libbackend_cpu.so
deleted file mode 120000
index 3d577cf5c..000000000
--- a/runtime/contrib/android/api/prebuilt/backend_cpu/libbackend_cpu.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libbackend_cpu.so
-\ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/circle_loader/Android.mk b/runtime/contrib/android/api/prebuilt/circle_loader/Android.mk
deleted file mode 100644
index 2e481e93e..000000000
--- a/runtime/contrib/android/api/prebuilt/circle_loader/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := circle_loader
-PREBUILT_LIB += circle_loader
-LOCAL_SRC_FILES := \
-		libcircle_loader.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/circle_loader/libcircle_loader.so b/runtime/contrib/android/api/prebuilt/circle_loader/libcircle_loader.so
deleted file mode 120000
index 528d7017f..000000000
--- a/runtime/contrib/android/api/prebuilt/circle_loader/libcircle_loader.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libcircle_loader.so
-\ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/nnfw-dev/Android.mk b/runtime/contrib/android/api/prebuilt/nnfw-dev/Android.mk
deleted file mode 100644
index 10cb8f6f4..000000000
--- a/runtime/contrib/android/api/prebuilt/nnfw-dev/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := nnfw-dev
-PREBUILT_LIB += nnfw-dev
-LOCAL_SRC_FILES := \
-		libnnfw-dev.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/nnfw-dev/libnnfw-dev.so b/runtime/contrib/android/api/prebuilt/nnfw-dev/libnnfw-dev.so
deleted file mode 120000
index 1913db8d7..000000000
--- a/runtime/contrib/android/api/prebuilt/nnfw-dev/libnnfw-dev.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libnnfw-dev.so
-\ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/onert_core/Android.mk b/runtime/contrib/android/api/prebuilt/onert_core/Android.mk
deleted file mode 100644
index a6682a24f..000000000
--- a/runtime/contrib/android/api/prebuilt/onert_core/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := onert_core
-PREBUILT_LIB += onert_core
-LOCAL_SRC_FILES := \
-		libonert_core.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/onert_core/libonert_core.so b/runtime/contrib/android/api/prebuilt/onert_core/libonert_core.so
deleted file mode 120000
index bafe11cb9..000000000
--- a/runtime/contrib/android/api/prebuilt/onert_core/libonert_core.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libonert_core.so
-\ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/Android.mk b/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/Android.mk
deleted file mode 100644
index 823cf0747..000000000
--- a/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := tensorflowlite_jni
-PREBUILT_LIB += tensorflowlite_jni
-LOCAL_SRC_FILES := \
-		libtensorflowlite_jni.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/libtensorflowlite_jni.so b/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/libtensorflowlite_jni.so
deleted file mode 120000
index d3d72a5a7..000000000
--- a/runtime/contrib/android/api/prebuilt/tensorflowlite_jni/libtensorflowlite_jni.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libtensorflowlite_jni.so
-\ No newline at end of file
diff --git a/runtime/contrib/android/api/prebuilt/tflite_loader/Android.mk b/runtime/contrib/android/api/prebuilt/tflite_loader/Android.mk
deleted file mode 100644
index 135ac1dad..000000000
--- a/runtime/contrib/android/api/prebuilt/tflite_loader/Android.mk
+++ /dev/null
@@ -1,7 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-LOCAL_MODULE := tflite_loader
-PREBUILT_LIB += tflite_loader
-LOCAL_SRC_FILES := \
-		libtflite_loader.so
-include $(PREBUILT_SHARED_LIBRARY)
diff --git a/runtime/contrib/android/api/prebuilt/tflite_loader/libtflite_loader.so b/runtime/contrib/android/api/prebuilt/tflite_loader/libtflite_loader.so
deleted file mode 120000
index 4c001aec0..000000000
--- a/runtime/contrib/android/api/prebuilt/tflite_loader/libtflite_loader.so
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../Product/out/lib/libtflite_loader.so
-\ No newline at end of file
diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.cpp b/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
index 1644e0f7f..72e73bee6 100644
--- a/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
+++ b/runtime/contrib/android/api/src/main/native/onert-native-api.cpp
@@ -52,7 +52,7 @@ JNIEXPORT void JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeCloseSe
 }
 
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeLoadModelFromFile(
-    JNIEnv *env, jobject, jlong handle, jstring jnnpkg_path)
+  JNIEnv *env, jobject, jlong handle, jstring jnnpkg_path)
 {
   if (jni_helper::verifyHandle(handle) == JNI_FALSE)
     return JNI_FALSE;
@@ -103,7 +103,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeRun
 }
 
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInput(
-    JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
+  JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
 {
   if (jni_helper::verifyHandle(handle) == JNI_FALSE)
     return JNI_FALSE;
@@ -121,8 +121,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
 
   if (jni::setInput(handle, params) == false)
   {
-    __android_log_print(ANDROID_LOG_ERROR, JTAG, "%s] failed native setOutput",
-                        __PRETTY_FUNCTION__);
+    __android_log_print(ANDROID_LOG_ERROR, JTAG, "%s] failed native setInput", __PRETTY_FUNCTION__);
     return JNI_FALSE;
   }
 
@@ -130,7 +129,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
 }
 
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutput(
-    JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
+  JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
 {
   if (jni_helper::verifyHandle(handle) == JNI_FALSE)
     return JNI_FALSE;
@@ -157,7 +156,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
 }
 
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInputLayout(
-    JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
+  JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
 {
   if (jni_helper::verifyHandle(handle) == JNI_FALSE)
     return JNI_FALSE;
@@ -179,7 +178,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
 }
 
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutputLayout(
-    JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
+  JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
 {
   if (jni_helper::verifyHandle(handle) == JNI_FALSE)
     return JNI_FALSE;
@@ -235,7 +234,7 @@ JNIEXPORT jint JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutp
 }
 
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetAvailableBackends(
-    JNIEnv *env, jobject, jlong handle, jstring jbackends)
+  JNIEnv *env, jobject, jlong handle, jstring jbackends)
 {
   if (jni_helper::verifyHandle(handle) == JNI_FALSE)
     return JNI_FALSE;
@@ -256,7 +255,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
 }
 
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetInputTensorInfo(
-    JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
+  JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
 {
   if (jni_helper::verifyHandle(handle) == JNI_FALSE)
     return JNI_FALSE;
@@ -278,7 +277,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet
 }
 
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutputTensorInfo(
-    JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
+  JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
 {
   if (jni_helper::verifyHandle(handle) == JNI_FALSE)
     return JNI_FALSE;
diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.h b/runtime/contrib/android/api/src/main/native/onert-native-api.h
index 13768d470..7997530ac 100644
--- a/runtime/contrib/android/api/src/main/native/onert-native-api.h
+++ b/runtime/contrib/android/api/src/main/native/onert-native-api.h
@@ -46,7 +46,7 @@ JNIEXPORT void JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeCloseSe
  * Signature: (JLjava/lang/String;)Z
  */
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeLoadModelFromFile(
-    JNIEnv *, jobject, jlong, jstring);
+  JNIEnv *, jobject, jlong, jstring);
 
 /*
  * Class:     com_samsung_onert_NativeSessionWrapper
@@ -71,7 +71,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeRun
  * Signature: (JIILjava/nio/ByteBuffer;I)Z
  */
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInput(
-    JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
+  JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
 
 /*
  * Class:     com_samsung_onert_NativeSessionWrapper
@@ -79,7 +79,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
  * Signature: (JIILjava/nio/ByteBuffer;I)Z
  */
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutput(
-    JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
+  JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
 
 /*
  * Class:     com_samsung_onert_NativeSessionWrapper
@@ -87,7 +87,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
  * Signature: (JII)Z
  */
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInputLayout(
-    JNIEnv *, jobject, jlong, jint, jint);
+  JNIEnv *, jobject, jlong, jint, jint);
 
 /*
  * Class:     com_samsung_onert_NativeSessionWrapper
@@ -95,7 +95,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet
  * Signature: (JII)Z
  */
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutputLayout(
-    JNIEnv *, jobject, jlong, jint, jint);
+  JNIEnv *, jobject, jlong, jint, jint);
 
 /*
  * Class:     com_samsung_onert_NativeSessionWrapper
@@ -121,7 +121,7 @@ JNIEXPORT jint JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutp
  * Signature: (JILcom/samsung/onert/NativeSessionWrapper/InternalTensorInfo;)Z
  */
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetInputTensorInfo(
-    JNIEnv *, jobject, jlong, jint, jobject);
+  JNIEnv *, jobject, jlong, jint, jobject);
 
 /*
  * Class:     com_samsung_onert_NativeSessionWrapper
@@ -129,7 +129,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet
  * Signature: (JILcom/samsung/onert/NativeSessionWrapper/InternalTensorInfo;)Z
  */
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutputTensorInfo(
-    JNIEnv *, jobject, jlong, jint, jobject);
+  JNIEnv *, jobject, jlong, jint, jobject);
 
 /*
  * Class:     com_samsung_onert_NativeSessionWrapper
@@ -137,7 +137,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet
  * Signature: (JLjava/lang/String;)Z
  */
 JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetAvailableBackends(
-    JNIEnv *, jobject, jlong, jstring);
+  JNIEnv *, jobject, jlong, jstring);
 
 #ifdef __cplusplus
 }
diff --git a/runtime/contrib/android_benchmark_app/CMakeLists.txt b/runtime/contrib/android_benchmark_app/CMakeLists.txt
index 55dbf0024..63e4fc545 100644
--- a/runtime/contrib/android_benchmark_app/CMakeLists.txt
+++ b/runtime/contrib/android_benchmark_app/CMakeLists.txt
@@ -55,7 +55,7 @@ target_link_libraries(android_benchmark_native nnfw_lib_tflite)
 target_link_libraries(android_benchmark_native nnfw_lib_misc)
 target_link_libraries(android_benchmark_native log)
 
-nnas_find_package(FlatBuffersSource EXACT 1.11 REQUIRED)
+nnas_find_package(FlatBuffersSource EXACT 2.0 REQUIRED)
 target_include_directories(android_benchmark_native PUBLIC ${FlatBuffersSource_DIR}/include .)
 
 add_custom_target(android-benchmark-apk ALL
diff --git a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
index 4b0e4395f..8df179a3d 100644
--- a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
+++ b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
@@ -173,7 +173,7 @@ inline void runBenchmark(JNIEnv *env, jobject thisObj, Activity &act)
 }
 
 JNIEXPORT void JNICALL Java_com_ndk_tflbench_MainActivity_runInterpreterBenchmark(
-    JNIEnv *env, jobject thisObj, jobject model_buffer)
+  JNIEnv *env, jobject thisObj, jobject model_buffer)
 {
   setTitle(env, thisObj, "Running Interpreter Benchmark");
 
diff --git a/runtime/contrib/android_tflite/CMakeLists.txt b/runtime/contrib/android_tflite/CMakeLists.txt
deleted file mode 100644
index c035cedf5..000000000
--- a/runtime/contrib/android_tflite/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-if(NOT BUILD_ANDROID_TFLITE)
-  return()
-endif(NOT BUILD_ANDROID_TFLITE)
-
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 REQUIRED)
-
-if(NOT DEFINED NDK_DIR)
-  message(FATAL_ERROR "NDK_DIR should be specified via environment variable")
-endif()
-message(STATUS "Found NDK: ${NDK_DIR}")
-
-#
-# Tensorflow Lite JNI library
-#
-set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/lite")
-set(TFLITE_JNI_BASE ${TENSORFLOW_LITE_BASE}/java/src/main/native)
-set(TFLITE_JNI_SRCS ${TFLITE_JNI_BASE}/exception_jni.cc
-                    ${TFLITE_JNI_BASE}/nativeinterpreterwrapper_jni.cc
-                    ${TFLITE_JNI_BASE}/tensor_jni.cc
-                    ${TFLITE_JNI_BASE}/tensorflow_lite_jni.cc
-                    ${CMAKE_CURRENT_SOURCE_DIR}/builtin_ops_jni.cc # Use nnfw's OpResolver
-                    )
-set(TFLITE_JNI_INCLUDES ${TENSORFLOW_LITE_BASE}/java/src/native)
-
-# TODO use tensorflow-lite static library instead of compiling all the sources again
-add_library(tensorflowlite_jni SHARED ${TFLITE_JNI_SRCS} ${TFLITE_SRCS})
-target_include_directories(tensorflowlite_jni PUBLIC ${TFLITE_JNI_INCLUDES} ${TFLITE_INCLUDES})
-target_link_libraries(tensorflowlite_jni eigen ${LIB_PTHREAD} dl)
-target_link_libraries(tensorflowlite_jni log)
-target_link_libraries(tensorflowlite_jni nnfw_lib_tflite)
-install(TARGETS tensorflowlite_jni DESTINATION lib)
diff --git a/runtime/contrib/android_tflite/builtin_ops_jni.cc b/runtime/contrib/android_tflite/builtin_ops_jni.cc
deleted file mode 100644
index 5770701ea..000000000
--- a/runtime/contrib/android_tflite/builtin_ops_jni.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the License);
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/kernels/register.h"
-#include "tflite/ext/kernels/register.h"
-
-namespace tflite
-{
-
-std::unique_ptr<OpResolver> CreateOpResolver()
-{
-  return std::unique_ptr<::nnfw::tflite::BuiltinOpResolver>(
-      new ::nnfw::tflite::BuiltinOpResolver());
-}
-
-} // namespace tflite
diff --git a/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc b/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc
index d9d2700ee..2affbe066 100644
--- a/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc
+++ b/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc
@@ -31,8 +31,8 @@ cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void
   static auto isOriginalFunctionCallSuccessful = [](cl_mem result) -> bool { return result; };
 
   static auto originalFunction =
-      findFunctionByName<cl_mem, cl_context, cl_mem_flags, size_t, void *, cl_int *>(
-          "clCreateBuffer");
+    findFunctionByName<cl_mem, cl_context, cl_mem_flags, size_t, void *, cl_int *>(
+      "clCreateBuffer");
   cl_mem result = originalFunction(context, flags, size, host_ptr, errcode_ret);
   if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
   {
diff --git a/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h
index 89797ad50..3186c7ffb 100644
--- a/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h
+++ b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h
@@ -60,7 +60,7 @@ private:
   {
     uint8_t *ptr_to_the_free_space_after_allocation = _ptr_to_free_space_start + size;
     size_t size_of_reserved_space_after_allocation =
-        ptr_to_the_free_space_after_allocation - _buffer;
+      ptr_to_the_free_space_after_allocation - _buffer;
     if (size_of_reserved_space_after_allocation >= MAX_SIZE)
     {
       return false;
diff --git a/runtime/contrib/heap_trace/src/trace.cc b/runtime/contrib/heap_trace/src/trace.cc
index 020aeb90e..39a0c465b 100644
--- a/runtime/contrib/heap_trace/src/trace.cc
+++ b/runtime/contrib/heap_trace/src/trace.cc
@@ -72,7 +72,7 @@ void Trace::logAllocationEvent(cl_mem memory_ptr, size_t size_of_allocated_space
   if (found_memory_space_description == _memory_in_use_on_gpu.end())
   {
     _memory_in_use_on_gpu.insert(
-        std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes)));
+      std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes)));
     _total_allocated_bytes_on_gpu += size_of_allocated_space_in_bytes;
     if (_peak_heap_usage_on_gpu < _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu)
     {
diff --git a/runtime/contrib/heap_trace/src/trace.h b/runtime/contrib/heap_trace/src/trace.h
index 647c51d54..33e67e58d 100644
--- a/runtime/contrib/heap_trace/src/trace.h
+++ b/runtime/contrib/heap_trace/src/trace.h
@@ -31,7 +31,7 @@ class Trace
     size_t size;
 
     MemoryTraits(size_t init_counter_value, size_t size_of_allocated_memory)
-        : ref_counter(init_counter_value), size(size_of_allocated_memory)
+      : ref_counter(init_counter_value), size(size_of_allocated_memory)
     {
     }
   };
diff --git a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
index 49b8fd994..a5700b28d 100644
--- a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
@@ -94,9 +94,9 @@ TEST_F(ClReleaseMemObjectStub, must_log_deallocation_event_only_if_reference_cou
   clReleaseMemObject(mem);
   GlobalTrace.reset();
   ASSERT_STREQ(
-      getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
-      "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
-      "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n");
+    getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
+    "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+    "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n");
 }
 
 TEST_F(ClReleaseMemObjectStub, must_not_log_deallocation_event_if_original_function_failed)
diff --git a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
index ea3eb8256..182f52c21 100644
--- a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
@@ -87,8 +87,8 @@ TEST_F(MallocStub, should_allocate_memory_from_pool_for_symbol_searcher_internal
 }
 
 TEST_F(
-    MallocStub,
-    should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero)
+  MallocStub,
+  should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero)
 {
   void *p = malloc(0);
   free(p);
diff --git a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
index 59660fad4..e81c5dc22 100644
--- a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
@@ -86,16 +86,16 @@ TEST_F(ReallocStub, should_work_as_malloc_when_incoming_ptr_is_equal_to_nullptr)
 
   ASSERT_TRUE(p);
   ASSERT_STREQ(
-      getContentOfFile("./realloc_interception_test.log").c_str(),
-      "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn "
-      "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+    getContentOfFile("./realloc_interception_test.log").c_str(),
+    "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn "
+    "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
 
   free(p);
 }
 
 TEST_F(
-    ReallocStub,
-    should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null)
+  ReallocStub,
+  should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null)
 {
   void *p = realloc(nullptr, 0);
   free(p);
diff --git a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
index 59fdeedc9..9ed933119 100644
--- a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
@@ -70,7 +70,7 @@ TEST_F(SymbolSearcher,
   fs::path pathToTestSample2 = exePath() / "libtest_sample2.so";
   void *test_sample2_handle = dlopen(pathToTestSample2.c_str(), RTLD_NOW);
   void *func_addr_in_test_sample2 =
-      dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3");
+    dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3");
 
   ASSERT_TRUE(test_sample2_handle);
   ASSERT_TRUE((void *)funcDefinedInTestSample3_ButWrappedInTestSample1 !=
diff --git a/runtime/contrib/heap_trace/tests/src/trace_test.cc b/runtime/contrib/heap_trace/tests/src/trace_test.cc
index 1cf4c530b..4f359bb6d 100644
--- a/runtime/contrib/heap_trace/tests/src/trace_test.cc
+++ b/runtime/contrib/heap_trace/tests/src/trace_test.cc
@@ -114,15 +114,15 @@ TEST_F(Trace, should_work_correctly_in_multithreaded_environment)
   GlobalTrace.reset();
 
   string thisShouldBeInLogFile =
-      "Total allocated: " +
-      to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
-      " B, Total deallocated: " +
-      to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
+    "Total allocated: " +
+    to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
+    " B, Total deallocated: " +
+    to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
   string andThisToo =
-      "Total allocated: " +
-      to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
-      " B, Total deallocated: " +
-      to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
+    "Total allocated: " +
+    to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
+    " B, Total deallocated: " +
+    to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
   ASSERT_TRUE(getContentOfFile("./trace_test.log").find(thisShouldBeInLogFile) != string::npos);
   ASSERT_TRUE(getContentOfFile("./trace_test.log").find(andThisToo) != string::npos);
 }
diff --git a/runtime/contrib/labs/jniacl/src/jniacl_main.cc b/runtime/contrib/labs/jniacl/src/jniacl_main.cc
index 01b928981..1a34aa70e 100644
--- a/runtime/contrib/labs/jniacl/src/jniacl_main.cc
+++ b/runtime/contrib/labs/jniacl/src/jniacl_main.cc
@@ -36,12 +36,13 @@ Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject)
   TargetHint target_hint = TargetHint::OPENCL;
   bool autoinc = true;
 
-  graph << target_hint << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
-                                 std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
+  graph << target_hint
+        << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
+                  std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
         << arm_compute::graph::ConvolutionLayer(
-               3U, 3U, 1U, std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
-               std::unique_ptr<BiasAccessor>(new BiasAccessor()),
-               arm_compute::PadStrideInfo(1, 1, 0, 0))
+             3U, 3U, 1U, std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
+             std::unique_ptr<BiasAccessor>(new BiasAccessor()),
+             arm_compute::PadStrideInfo(1, 1, 0, 0))
         << Tensor(std::unique_ptr<OutputAccessor>(new OutputAccessor()));
 
   graph.run();
diff --git a/runtime/contrib/labs/opencl_test/src/opencl_test.cc b/runtime/contrib/labs/opencl_test/src/opencl_test.cc
index 1faa91478..8c1eb25a6 100644
--- a/runtime/contrib/labs/opencl_test/src/opencl_test.cc
+++ b/runtime/contrib/labs/opencl_test/src/opencl_test.cc
@@ -126,7 +126,7 @@ public:
     {
       cl_int buildErr = CL_SUCCESS;
       auto buildInfo = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
-      for (auto &pair : buildInfo)
+      for (const auto &pair : buildInfo)
       {
         std::cerr << pair.second << std::endl << std::endl;
       }
@@ -199,7 +199,7 @@ void checkContextMem()
   try
   {
     auto kernel_functor = cl::KernelFunctor<cl_int, cl::Buffer, cl::Buffer, cl_int, cl_int>(
-        gpu.program_, "memory_test"); // name should be same as cl function name
+      gpu.program_, "memory_test"); // name should be same as cl function name
 
     // create a queue per device and queue a kernel job
 
@@ -256,7 +256,7 @@ void printHelp()
   std::cout << "opencl information: \n\n";
   std::cout << "\t -h : help\n";
   std::cout
-      << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
+    << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
   std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n";
 }
 
@@ -270,7 +270,7 @@ void printHelp()
 int kernel_idx[MAX_DEVICE_NUM];
 unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed.
 unsigned char
-    kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed
+  kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed
 int device_num;
 std::mutex kernel_complete_handler_mutex;
 
@@ -319,7 +319,7 @@ void testSync()
   try
   {
     auto kernel_functor = cl::KernelFunctor<cl::Buffer, cl_int>(
-        gpu.program_, "test"); // name should be same as cl function name
+      gpu.program_, "test"); // name should be same as cl function name
 
     // variable init
     cl::Event ev[MAX_DEVICE_NUM];
diff --git a/runtime/contrib/labs/tflite_examples/src/conv.cpp b/runtime/contrib/labs/tflite_examples/src/conv.cpp
index e8542c3f5..0b5f946bc 100644
--- a/runtime/contrib/labs/tflite_examples/src/conv.cpp
+++ b/runtime/contrib/labs/tflite_examples/src/conv.cpp
@@ -217,7 +217,7 @@ int main(int argc, char **argv)
   // Configure Filter
   const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W;
   float kernel_data[kernel_size] = {
-      0.0f,
+    0.0f,
   };
 
   // Fill kernel data in NHWC order
@@ -243,13 +243,13 @@ int main(int argc, char **argv)
   }
 
   interp.SetTensorParametersReadOnly(
-      2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
-      quantization, reinterpret_cast<const char *>(kernel_data), sizeof(kernel_data));
+    2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
+    quantization, reinterpret_cast<const char *>(kernel_data), sizeof(kernel_data));
 
   // Configure Bias
   const uint32_t bias_size = bias.size();
   float bias_data[bias_size] = {
-      0.0f,
+    0.0f,
   };
 
   // Fill bias data
diff --git a/runtime/contrib/style_transfer_app/CMakeLists.txt b/runtime/contrib/style_transfer_app/CMakeLists.txt
index b137231ea..9ffbeaec7 100644
--- a/runtime/contrib/style_transfer_app/CMakeLists.txt
+++ b/runtime/contrib/style_transfer_app/CMakeLists.txt
@@ -32,7 +32,7 @@ endif(JPEG_FOUND)
 target_link_libraries(style_transfer_app onert_core onert tflite_loader)
 target_link_libraries(style_transfer_app tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
 target_link_libraries(style_transfer_app nnfw-dev)
-target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
+target_link_libraries(tflite_comparator ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
 if(JPEG_FOUND)
   target_link_libraries(style_transfer_app ${JPEG_LIBRARIES})
 endif(JPEG_FOUND)
diff --git a/runtime/contrib/style_transfer_app/src/bitmap_helper.cc b/runtime/contrib/style_transfer_app/src/bitmap_helper.cc
index 6211ea476..0f687b2ee 100644
--- a/runtime/contrib/style_transfer_app/src/bitmap_helper.cc
+++ b/runtime/contrib/style_transfer_app/src/bitmap_helper.cc
@@ -49,10 +49,10 @@ unsigned char *BitmapHelper::createBitmapFileHeader(int height, int width, int p
   int fileSize = fileHeaderSize + infoHeaderSize + (bytesPerPixel * width + paddingSize) * height;
 
   static unsigned char fileHeader[] = {
-      0, 0,       /// signature
-      0, 0, 0, 0, /// image file size in bytes
-      0, 0, 0, 0, /// reserved
-      0, 0, 0, 0, /// start of pixel array
+    0, 0,       /// signature
+    0, 0, 0, 0, /// image file size in bytes
+    0, 0, 0, 0, /// reserved
+    0, 0, 0, 0, /// start of pixel array
   };
 
   fileHeader[0] = (unsigned char)('B');
@@ -69,17 +69,17 @@ unsigned char *BitmapHelper::createBitmapFileHeader(int height, int width, int p
 unsigned char *BitmapHelper::createBitmapInfoHeader(int height, int width)
 {
   static unsigned char infoHeader[] = {
-      0, 0, 0, 0, /// header size
-      0, 0, 0, 0, /// image width
-      0, 0, 0, 0, /// image height
-      0, 0,       /// number of color planes
-      0, 0,       /// bits per pixel
-      0, 0, 0, 0, /// compression
-      0, 0, 0, 0, /// image size
-      0, 0, 0, 0, /// horizontal resolution
-      0, 0, 0, 0, /// vertical resolution
-      0, 0, 0, 0, /// colors in color table
-      0, 0, 0, 0, /// important color count
+    0, 0, 0, 0, /// header size
+    0, 0, 0, 0, /// image width
+    0, 0, 0, 0, /// image height
+    0, 0,       /// number of color planes
+    0, 0,       /// bits per pixel
+    0, 0, 0, 0, /// compression
+    0, 0, 0, 0, /// image size
+    0, 0, 0, 0, /// horizontal resolution
+    0, 0, 0, 0, /// vertical resolution
+    0, 0, 0, 0, /// colors in color table
+    0, 0, 0, 0, /// important color count
   };
 
   // Minus height means top to bottom write
@@ -191,7 +191,7 @@ int BitmapHelper::read_bmp(const std::string &input_bmp_name, std::vector<float>
   // Decode image, allocating tensor once the image size is known
   const uint8_t *bmp_pixels = &img_bytes[header_size];
   std::vector<uint8_t> bmp =
-      decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down);
+    decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down);
   for (uint32_t j = 0; j < bmp.size(); j++)
   {
     input.push_back(static_cast<float>(bmp[j]));
diff --git a/runtime/contrib/style_transfer_app/src/jpeg_helper.cc b/runtime/contrib/style_transfer_app/src/jpeg_helper.cc
index ed5ae25a1..1554524f8 100644
--- a/runtime/contrib/style_transfer_app/src/jpeg_helper.cc
+++ b/runtime/contrib/style_transfer_app/src/jpeg_helper.cc
@@ -26,7 +26,7 @@ namespace StyleTransferApp
 {
 
 JpegHelper::JpegHelper(int bytes_per_pixel, J_COLOR_SPACE color_space)
-    : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space)
+  : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space)
 {
   // DO NOTHING
 }
diff --git a/runtime/contrib/style_transfer_app/src/style_transfer_app.cc b/runtime/contrib/style_transfer_app/src/style_transfer_app.cc
index eed0c4288..ab8735d43 100644
--- a/runtime/contrib/style_transfer_app/src/style_transfer_app.cc
+++ b/runtime/contrib/style_transfer_app/src/style_transfer_app.cc
@@ -68,10 +68,10 @@ uint64_t num_elems(const nnfw_tensorinfo *ti)
 NNFW_STATUS resolve_op_backend(nnfw_session *session)
 {
   static std::unordered_map<std::string, std::string> operation_map = {
-      {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"},      {"CONV_2D", "OP_BACKEND_Conv2D"},
-      {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
-      {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"},         {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
-      {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"},        {"ADD", "OP_BACKEND_Add"}};
+    {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"},      {"CONV_2D", "OP_BACKEND_Conv2D"},
+    {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
+    {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"},         {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
+    {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"},        {"ADD", "OP_BACKEND_Add"}};
 
   for (auto i : operation_map)
   {
diff --git a/runtime/contrib/tflite_classify/src/ImageClassifier.cc b/runtime/contrib/tflite_classify/src/ImageClassifier.cc
index fae4f066c..1d92d6c86 100644
--- a/runtime/contrib/tflite_classify/src/ImageClassifier.cc
+++ b/runtime/contrib/tflite_classify/src/ImageClassifier.cc
@@ -24,9 +24,9 @@ ImageClassifier::ImageClassifier(const std::string &model_file, const std::strin
                                  const int input_size, const int image_mean, const int image_std,
                                  const std::string &input_name, const std::string &output_name,
                                  const bool use_nnapi)
-    : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size),
-      _image_mean(image_mean), _image_std(image_std), _input_name(input_name),
-      _output_name(output_name)
+  : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size),
+    _image_mean(image_mean), _image_std(image_std), _input_name(input_name),
+    _output_name(output_name)
 {
   // Load label
   std::ifstream label_stream(label_file.c_str());
diff --git a/runtime/contrib/tflite_classify/src/InferenceInterface.cc b/runtime/contrib/tflite_classify/src/InferenceInterface.cc
index 160943477..562ff2ad6 100644
--- a/runtime/contrib/tflite_classify/src/InferenceInterface.cc
+++ b/runtime/contrib/tflite_classify/src/InferenceInterface.cc
@@ -20,7 +20,7 @@ using namespace tflite;
 using namespace tflite::ops::builtin;
 
 InferenceInterface::InferenceInterface(const std::string &model_file, const bool use_nnapi)
-    : _interpreter(nullptr), _model(nullptr), _sess(nullptr)
+  : _interpreter(nullptr), _model(nullptr), _sess(nullptr)
 {
   // Load model
   StderrReporter error_reporter;
diff --git a/runtime/contrib/tflite_classify/src/tflite_classify.cc b/runtime/contrib/tflite_classify/src/tflite_classify.cc
index 51758e2a6..7bed77875 100644
--- a/runtime/contrib/tflite_classify/src/tflite_classify.cc
+++ b/runtime/contrib/tflite_classify/src/tflite_classify.cc
@@ -60,9 +60,8 @@ int main(const int argc, char **argv)
   }
 
   // Create ImageClassifier
-  std::unique_ptr<ImageClassifier> classifier(
-      new ImageClassifier(MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME,
-                          OUTPUT_NAME, use_nnapi));
+  std::unique_ptr<ImageClassifier> classifier(new ImageClassifier(
+    MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME, OUTPUT_NAME, use_nnapi));
 
   // Cam setting
   cv::VideoCapture cap(0);
diff --git a/runtime/libs/benchmark/include/benchmark/CsvWriter.h b/runtime/libs/benchmark/include/benchmark/CsvWriter.h
index 5c259d7ed..d926bad8f 100644
--- a/runtime/libs/benchmark/include/benchmark/CsvWriter.h
+++ b/runtime/libs/benchmark/include/benchmark/CsvWriter.h
@@ -17,6 +17,7 @@
 #ifndef __NNFW_BENCHMARK_CSV_WRITER_H__
 #define __NNFW_BENCHMARK_CSV_WRITER_H__
 
+#include <cstdint>
 #include <vector>
 #include <string>
 #include <fstream>
diff --git a/runtime/libs/benchmark/include/benchmark/MemoryInfo.h b/runtime/libs/benchmark/include/benchmark/MemoryInfo.h
new file mode 100644
index 000000000..6e8e12ba4
--- /dev/null
+++ b/runtime/libs/benchmark/include/benchmark/MemoryInfo.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_BENCHMARK_MEMORY_INFO_H__
+#define __NNFW_BENCHMARK_MEMORY_INFO_H__
+
+#include <cstdint>
+#include <string>
+
+namespace benchmark
+{
+
+bool prepareVmRSS();
+bool prepareVmHWM();
+bool prepareGpuMemory();
+bool preparePssSum();
+
+uint32_t getVmRSS();
+uint32_t getVmHWM();
+uint32_t getGpuMemory(const std::string &process_name);
+uint32_t getPssSum();
+
+std::string getProcessName();
+
+} // namespace benchmark
+
+#endif // __NNFW_BENCHMARK_MEMORY_INFO_H__
diff --git a/runtime/libs/benchmark/include/benchmark/MemoryPoller.h b/runtime/libs/benchmark/include/benchmark/MemoryPoller.h
index 48caa3b3a..47db3fd77 100644
--- a/runtime/libs/benchmark/include/benchmark/MemoryPoller.h
+++ b/runtime/libs/benchmark/include/benchmark/MemoryPoller.h
@@ -57,10 +57,6 @@ public:
 private:
   void process();
   bool prepareMemoryPolling();
-  uint32_t getVmRSS();
-  uint32_t getVmHWM();
-  uint32_t getGpuMemory();
-  uint32_t getPssSum();
 
 private:
   std::chrono::milliseconds _duration;
diff --git a/runtime/libs/benchmark/include/benchmark/Phase.h b/runtime/libs/benchmark/include/benchmark/Phase.h
index 5eceb04c5..9b91a4391 100644
--- a/runtime/libs/benchmark/include/benchmark/Phase.h
+++ b/runtime/libs/benchmark/include/benchmark/Phase.h
@@ -19,6 +19,7 @@
 
 #include "Types.h"
 
+#include <cstdint>
 #include <string>
 #include <vector>
 
diff --git a/runtime/libs/benchmark/include/benchmark/Phases.h b/runtime/libs/benchmark/include/benchmark/Phases.h
index 936a89742..7d642782a 100644
--- a/runtime/libs/benchmark/include/benchmark/Phases.h
+++ b/runtime/libs/benchmark/include/benchmark/Phases.h
@@ -50,6 +50,9 @@ public:
   const MemoryPoller &mem_poll() const { return *_mem_poll; }
   const Phase &at(const std::string &tag) const { return _phases.at(tag); }
 
+  uint32_t mem_before_init() const { return _mem_before_init; }
+  uint32_t mem_after_run() const { return _mem_after_run; }
+
 private:
   void run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc *post, uint32_t loop_num,
            bool option_disable);
@@ -58,6 +61,8 @@ private:
   const PhaseOption _option;
   std::unordered_map<std::string, Phase> _phases;
   std::unique_ptr<MemoryPoller> _mem_poll;
+  uint32_t _mem_before_init;
+  uint32_t _mem_after_run;
 };
 
 } // namespace benchmark
diff --git a/runtime/libs/benchmark/include/benchmark/Result.h b/runtime/libs/benchmark/include/benchmark/Result.h
index 69084b300..4046d7c07 100644
--- a/runtime/libs/benchmark/include/benchmark/Result.h
+++ b/runtime/libs/benchmark/include/benchmark/Result.h
@@ -25,7 +25,7 @@
 namespace benchmark
 {
 
-// Data class between runner(nnpackage_run and tflite_run) and libbenchmark
+// Data class between runner(onert_run and tflite_run) and libbenchmark
 class Result
 {
 public:
@@ -34,6 +34,8 @@ public:
   double time[PhaseEnum::END_OF_PHASE][FigureType::END_OF_FIG_TYPE];
   uint32_t memory[PhaseEnum::END_OF_PHASE][MemoryType::END_OF_MEM_TYPE];
   bool print_memory = false;
+  uint32_t init_memory = 0;
+  uint32_t peak_memory = 0;
 };
 
 // TODO Support not only stdout but also ostream
diff --git a/runtime/libs/benchmark/src/CsvWriter.cpp b/runtime/libs/benchmark/src/CsvWriter.cpp
index 5f47c6511..6233129e7 100644
--- a/runtime/libs/benchmark/src/CsvWriter.cpp
+++ b/runtime/libs/benchmark/src/CsvWriter.cpp
@@ -35,7 +35,7 @@ CsvWriter::CsvWriter(const std::string &csv_filename) : CsvWriter(csv_filename,
 }
 
 CsvWriter::CsvWriter(const std::string &csv_filename, const std::vector<std::string> &header)
-    : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0)
+  : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0)
 {
   assert(csv_filename.empty() == false);
   assert(header.size() != 0);
diff --git a/runtime/libs/benchmark/src/MemoryInfo.cpp b/runtime/libs/benchmark/src/MemoryInfo.cpp
new file mode 100644
index 000000000..20d262961
--- /dev/null
+++ b/runtime/libs/benchmark/src/MemoryInfo.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "benchmark/MemoryInfo.h"
+
+#include <vector>
+#include <algorithm>
+#include <fstream>
+#include <sstream>
+#include <cassert>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+namespace
+{
+
+const std::string proc_status_path("/proc/self/status");
+const std::string gpu_memory_path("/sys/kernel/debug/mali0/gpu_memory");
+const std::string proc_smaps_path("/proc/self/smaps");
+
+bool isStrNumber(const std::string &s)
+{
+  return !s.empty() &&
+         std::find_if(s.begin(), s.end(), [](char c) { return !std::isdigit(c); }) == s.end();
+}
+
+std::vector<std::string> splitLine(std::string line, std::string delimiters = " \n\t")
+{
+  std::vector<std::string> words;
+  size_t prev = 0, pos;
+
+  while ((pos = line.find_first_of(delimiters, prev)) != std::string::npos)
+  {
+    if (pos > prev)
+      words.emplace_back(line.substr(prev, pos - prev));
+    prev = pos + 1;
+  }
+
+  if (prev < line.length())
+    words.emplace_back(line.substr(prev, std::string::npos));
+
+  return words;
+}
+
+std::vector<std::string> getValueFromFileStatus(const std::string &file, const std::string &key)
+{
+  std::ifstream ifs(file);
+  assert(ifs.is_open());
+
+  std::string line;
+  std::vector<std::string> val;
+
+  bool found = false;
+  while (std::getline(ifs, line))
+  {
+    if (line.find(key) != std::string::npos)
+    {
+      found = true;
+      break;
+    }
+  }
+  ifs.close();
+
+  if (!found)
+  {
+    // NOTE. the process which uses gpu resources cannot be there yet at the model-load phase.
+    // At that time, just return empty.
+    return val;
+  }
+
+  val = splitLine(line);
+  return val;
+}
+
+// Because of smaps' structure, returns sum value as uint32_t
+uint32_t getSumValueFromFileSmaps(const std::string &file, const std::string &key)
+{
+  std::ifstream ifs(file);
+  assert(ifs.is_open());
+
+  std::string line;
+  uint32_t sum = 0;
+  while (std::getline(ifs, line))
+  {
+    if (line.find(key) != std::string::npos)
+    {
+      // an example by splitLine()
+      // `Pss:                   0 kB`
+      // val[0]: "Pss:", val[1]: "0" val[2]: "kB"
+      auto val = splitLine(line);
+      assert(val.size() != 0);
+      // SwapPss could show so that check where Pss is at the beginning
+      if (val[0].find("Pss") != 0)
+      {
+        continue;
+      }
+      sum += std::stoul(val[1]);
+    }
+  }
+
+  return sum;
+}
+
+} // namespace
+
+namespace benchmark
+{
+
+bool prepareVmRSS() { return std::ifstream(proc_status_path).is_open(); }
+
+bool prepareVmHWM() { return std::ifstream(proc_status_path).is_open(); }
+
+bool prepareGpuMemory() { return std::ifstream(gpu_memory_path).is_open(); }
+
+bool preparePssSum() { return std::ifstream(proc_smaps_path).is_open(); }
+
+uint32_t getVmRSS()
+{
+  auto val = getValueFromFileStatus(proc_status_path, "VmRSS");
+  if (val.size() == 0)
+    return 0;
+  assert(isStrNumber(val[1]));
+  return std::stoul(val[1]);
+}
+
+uint32_t getVmHWM()
+{
+  auto val = getValueFromFileStatus(proc_status_path, "VmHWM");
+  if (val.size() == 0)
+    return 0;
+  // key: value
+  assert(isStrNumber(val[1]));
+  return std::stoul(val[1]);
+}
+
+uint32_t getGpuMemory(const std::string &process_name)
+{
+  assert(!process_name.empty());
+  auto val = getValueFromFileStatus(gpu_memory_path, process_name);
+  if (val.size() == 0)
+    return 0;
+  // process_name -> pid -> gpu_mem -> max_gpu_mem
+  assert(isStrNumber(val[2]));
+  return std::stoul(val[2]);
+}
+
+uint32_t getPssSum() { return getSumValueFromFileSmaps(proc_smaps_path, "Pss"); }
+
+std::string getProcessName()
+{
+  auto val = getValueFromFileStatus(proc_status_path, "Name");
+  assert(val.size() >= 2);
+  return val[1];
+}
+
+} // namespace benchmark
diff --git a/runtime/libs/benchmark/src/MemoryPoller.cpp b/runtime/libs/benchmark/src/MemoryPoller.cpp
index 61fdecd46..62339306e 100644
--- a/runtime/libs/benchmark/src/MemoryPoller.cpp
+++ b/runtime/libs/benchmark/src/MemoryPoller.cpp
@@ -16,111 +16,18 @@
 
 #include "benchmark/MemoryPoller.h"
 #include "benchmark/Types.h"
+#include "benchmark/MemoryInfo.h"
 
 #include <vector>
-#include <fstream>
-#include <sstream>
 #include <stdexcept>
 #include <cassert>
 #include <iostream>
 
-namespace
-{
-
-const std::string proc_status_path("/proc/self/status");
-const std::string gpu_memory_path("/sys/kernel/debug/mali0/gpu_memory");
-const std::string proc_smaps_path("/proc/self/smaps");
-
-bool isStrNumber(const std::string &s)
-{
-  return !s.empty() &&
-         std::find_if(s.begin(), s.end(), [](char c) { return !std::isdigit(c); }) == s.end();
-}
-
-std::vector<std::string> splitLine(std::string line, std::string delimiters = " \n\t")
-{
-  std::vector<std::string> words;
-  size_t prev = 0, pos;
-
-  while ((pos = line.find_first_of(delimiters, prev)) != std::string::npos)
-  {
-    if (pos > prev)
-      words.emplace_back(line.substr(prev, pos - prev));
-    prev = pos + 1;
-  }
-
-  if (prev < line.length())
-    words.emplace_back(line.substr(prev, std::string::npos));
-
-  return words;
-}
-
-std::vector<std::string> getValueFromFileStatus(const std::string &file, const std::string &key)
-{
-  std::ifstream ifs(file);
-  assert(ifs.is_open());
-
-  std::string line;
-  std::vector<std::string> val;
-
-  bool found = false;
-  while (std::getline(ifs, line))
-  {
-    if (line.find(key) != std::string::npos)
-    {
-      found = true;
-      break;
-    }
-  }
-  ifs.close();
-
-  if (!found)
-  {
-    // NOTE. the process which uses gpu resources cannot be there yet at the model-load phase.
-    // At that time, just return empty.
-    return val;
-  }
-
-  val = splitLine(line);
-  return val;
-}
-
-// Because of smaps' structure, returns sum value as uint32_t
-uint32_t getSumValueFromFileSmaps(const std::string &file, const std::string &key)
-{
-  std::ifstream ifs(file);
-  assert(ifs.is_open());
-
-  std::string line;
-  uint32_t sum = 0;
-  while (std::getline(ifs, line))
-  {
-    if (line.find(key) != std::string::npos)
-    {
-      // an example by splitLine()
-      // `Pss:                   0 kB`
-      // val[0]: "Pss:", val[1]: "0" val[2]: "kB"
-      auto val = splitLine(line);
-      assert(val.size() != 0);
-      // SwapPss could show so that check where Pss is at the beginning
-      if (val[0].find("Pss") != 0)
-      {
-        continue;
-      }
-      sum += std::stoul(val[1]);
-    }
-  }
-
-  return sum;
-}
-
-} // namespace
-
 namespace benchmark
 {
 
 MemoryPoller::MemoryPoller(std::chrono::milliseconds duration, bool gpu_poll)
-    : _duration(duration), _run(false), _term(false), _gpu_poll(gpu_poll)
+  : _duration(duration), _run(false), _term(false), _gpu_poll(gpu_poll)
 {
   if (prepareMemoryPolling() == false)
     throw std::runtime_error("failed to prepare memory pooling");
@@ -168,7 +75,7 @@ bool MemoryPoller::end(PhaseEnum phase)
   mem = getVmRSS();
   if (_gpu_poll)
   {
-    mem += getGpuMemory();
+    mem += getGpuMemory(_process_name);
   }
   if (mem > _rss_map[phase])
     _rss_map[phase] = mem;
@@ -176,7 +83,7 @@ bool MemoryPoller::end(PhaseEnum phase)
   mem = getVmHWM();
   if (_gpu_poll)
   {
-    mem += getGpuMemory();
+    mem += getGpuMemory(_process_name);
   }
   _hwm_map[phase] = mem;
 
@@ -208,13 +115,13 @@ void MemoryPoller::process()
     uint32_t cur_hwm = getVmHWM();
     if (_gpu_poll)
     {
-      auto gpu_mem = getGpuMemory();
+      auto gpu_mem = getGpuMemory(_process_name);
       cur_rss += gpu_mem;
       cur_hwm += gpu_mem;
     }
     uint32_t cur_pss = getPssSum();
 
-    for (auto &phase : _phases)
+    for (const auto &phase : _phases)
     {
       auto &rss = _rss_map.at(phase);
       if (rss < cur_rss)
@@ -236,77 +143,33 @@ void MemoryPoller::process()
 bool MemoryPoller::prepareMemoryPolling()
 {
   // VmRSS
+  if (!prepareVmRSS())
   {
-    std::ifstream ifs(proc_status_path);
-    if (!ifs.is_open())
-    {
-      std::cerr << "failed to open " << proc_status_path << std::endl;
-      return false;
-    }
-    ifs.close();
+    std::cerr << "failed to prepare parsing vmrss" << std::endl;
+    return false;
   }
 
   // (Additionally) GpuMemory
   if (_gpu_poll)
   {
-    std::ifstream ifs(gpu_memory_path);
-    if (!ifs.is_open())
+    if (!prepareGpuMemory())
     {
-      std::cerr << "failed to open " << gpu_memory_path << std::endl;
+      std::cerr << "failed to prepare parsing gpu memory" << std::endl;
       return false;
     }
-    ifs.close();
 
     // Needs process name
-    auto val = getValueFromFileStatus(proc_status_path, "Name");
-    assert(val.size() != 0);
-    _process_name = val[1];
+    _process_name = getProcessName();
   }
 
   // PSS
+  if (!preparePssSum())
   {
-    std::ifstream ifs(proc_smaps_path);
-    if (!ifs.is_open())
-    {
-      std::cerr << "failed to open " << proc_smaps_path << std::endl;
-      return false;
-    }
-    ifs.close();
+    std::cerr << "failed to prepare parsing pss sum" << std::endl;
+    return false;
   }
 
   return true;
 }
 
-uint32_t MemoryPoller::getVmRSS()
-{
-  auto val = getValueFromFileStatus(proc_status_path, "VmRSS");
-  if (val.size() == 0)
-    return 0;
-  assert(isStrNumber(val[1]));
-  return std::stoul(val[1]);
-}
-
-uint32_t MemoryPoller::getVmHWM()
-{
-  auto val = getValueFromFileStatus(proc_status_path, "VmHWM");
-  if (val.size() == 0)
-    return 0;
-  // key: value
-  assert(isStrNumber(val[1]));
-  return std::stoul(val[1]);
-}
-
-uint32_t MemoryPoller::getGpuMemory()
-{
-  assert(!_process_name.empty());
-  auto val = getValueFromFileStatus(gpu_memory_path, _process_name);
-  if (val.size() == 0)
-    return 0;
-  // process_name -> pid -> gpu_mem -> max_gpu_mem
-  assert(isStrNumber(val[2]));
-  return std::stoul(val[2]);
-}
-
-uint32_t MemoryPoller::getPssSum() { return getSumValueFromFileSmaps(proc_smaps_path, "Pss"); }
-
 } // namespace benchmark
diff --git a/runtime/libs/benchmark/src/Phases.cpp b/runtime/libs/benchmark/src/Phases.cpp
index 9ab67cfd9..d8d9c3cb7 100644
--- a/runtime/libs/benchmark/src/Phases.cpp
+++ b/runtime/libs/benchmark/src/Phases.cpp
@@ -17,20 +17,21 @@
 
 #include "benchmark/Phases.h"
 #include "benchmark/Types.h"
+#include "benchmark/MemoryInfo.h"
 
 #include <cassert>
 #include <chrono>
 #include <iostream>
-#include <sys/time.h>
+#include <time.h>
 
 namespace
 {
 
 uint64_t nowMicros()
 {
-  struct timeval tv;
-  gettimeofday(&tv, nullptr);
-  return static_cast<uint64_t>(tv.tv_sec) * 1e6 + tv.tv_usec;
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return static_cast<uint64_t>(ts.tv_nsec) / 1e3 + static_cast<uint64_t>(ts.tv_sec) * 1e6;
 }
 
 void SleepForMicros(uint64_t micros)
@@ -41,13 +42,16 @@ void SleepForMicros(uint64_t micros)
   sleep_time.tv_nsec = micros * 1e3;
   nanosleep(&sleep_time, nullptr);
 }
-}
+} // namespace
 
 namespace benchmark
 {
 
-Phases::Phases(const PhaseOption &option) : _option(option)
+Phases::Phases(const PhaseOption &option) : _option(option), _mem_before_init(0), _mem_after_run(0)
 {
+  assert(prepareVmRSS());
+  _mem_before_init = getVmHWM();
+
   if (_option.memory)
   {
     _mem_poll = std::make_unique<MemoryPoller>(std::chrono::milliseconds(option.memory_interval),
@@ -93,6 +97,8 @@ void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc
     }
   }
 
+  _mem_after_run = getVmHWM();
+
   if (p == PhaseEnum::END_OF_PHASE)
   {
     return;
diff --git a/runtime/libs/benchmark/src/Result.cpp b/runtime/libs/benchmark/src/Result.cpp
index df573da92..8c1e2d2ea 100644
--- a/runtime/libs/benchmark/src/Result.cpp
+++ b/runtime/libs/benchmark/src/Result.cpp
@@ -57,7 +57,7 @@ double minTimeMs(const benchmark::Phase &phase)
 double geomeanTimeMs(const benchmark::Phase &phase)
 {
   double log_sum = 0.0;
-  for (auto t_us : phase.time)
+  for (auto &&t_us : phase.time)
   {
     log_sum += std::log(t_us / 1e3);
   }
@@ -77,9 +77,9 @@ uint32_t averageMemoryKb(const benchmark::Phase &phase, int type)
   return average<uint32_t, uint32_t>(phase.memory[type]);
 }
 
-uint32_t peakMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
-                                         [benchmark::MemoryType::END_OF_MEM_TYPE],
-                    int type)
+uint32_t peakMemory(
+  const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE][benchmark::MemoryType::END_OF_MEM_TYPE],
+  int type)
 {
   using namespace benchmark;
   // tricky. handle WARMUP as EXECUTE
@@ -88,7 +88,7 @@ uint32_t peakMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
 }
 
 void printResultTime(
-    const double time[benchmark::PhaseEnum::END_OF_PHASE][benchmark::FigureType::END_OF_FIG_TYPE])
+  const double time[benchmark::PhaseEnum::END_OF_PHASE][benchmark::FigureType::END_OF_FIG_TYPE])
 {
   using namespace benchmark;
 
@@ -119,8 +119,8 @@ void printResultTime(
   std::cout << "===================================" << std::endl;
 }
 
-void printResultMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
-                                            [benchmark::MemoryType::END_OF_MEM_TYPE])
+void printResultMemory(
+  const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE][benchmark::MemoryType::END_OF_MEM_TYPE])
 {
   using namespace benchmark;
 
@@ -141,6 +141,15 @@ void printResultMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
   }
 }
 
+void printUsedPeakMemory(uint32_t init_memory, uint32_t peak_memory)
+{
+  uint32_t used_peak_memory = peak_memory - init_memory;
+  std::cout << "Used Peak Memory : " << used_peak_memory << " kb" << std::endl;
+  std::cout << "- HWM after run  : " << peak_memory << " kb" << std::endl;
+  std::cout << "- HWM before init: " << init_memory << " kb" << std::endl;
+  std::cout << "===================================" << std::endl;
+}
+
 } // namespace
 
 namespace benchmark
@@ -148,16 +157,16 @@ namespace benchmark
 
 Result::Result(const Phases &phases)
 {
-  const auto option = phases.option();
+  const auto &option = phases.option();
   {
     for (int i = PhaseEnum::MODEL_LOAD; i <= PhaseEnum::PREPARE; ++i)
     {
-      auto phase = phases.at(gPhaseStrings[i]);
+      const auto &phase = phases.at(gPhaseStrings[i]);
       time[i][FigureType::MEAN] = averageTimeMs(phase);
     }
 
     int i = PhaseEnum::EXECUTE;
-    auto exec_phase = phases.at(gPhaseStrings[i]);
+    const auto &exec_phase = phases.at(gPhaseStrings[i]);
     time[i][FigureType::MEAN] = averageTimeMs(exec_phase);
     time[i][FigureType::MAX] = maxTimeMs(exec_phase);
     time[i][FigureType::MIN] = minTimeMs(exec_phase);
@@ -175,6 +184,8 @@ Result::Result(const Phases &phases)
       }
     }
   }
+  init_memory = phases.mem_before_init();
+  peak_memory = phases.mem_after_run();
 }
 
 void printResult(const Result &result)
@@ -185,6 +196,7 @@ void printResult(const Result &result)
     return;
 
   printResultMemory(result.memory);
+  printUsedPeakMemory(result.init_memory, result.peak_memory);
 }
 
 // TODO There are necessary for a kind of output data file so that it doesn't have to be csv file
diff --git a/runtime/libs/misc/CMakeLists.txt b/runtime/libs/misc/CMakeLists.txt
index 557d403ec..3e02adbc3 100644
--- a/runtime/libs/misc/CMakeLists.txt
+++ b/runtime/libs/misc/CMakeLists.txt
@@ -1,11 +1,22 @@
 # Library `nnfw_lib_misc`
-file(GLOB_RECURSE NNFW_UTILITY_SRCS "src/*.cpp")
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(nnfw_lib_misc STATIC ${NNFW_UTILITY_SRCS})
+add_library(nnfw_lib_misc STATIC ${SOURCES})
 target_include_directories(nnfw_lib_misc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 set_target_properties(nnfw_lib_misc PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_link_libraries(nnfw_lib_misc PRIVATE nnfw_common)
 target_link_libraries(nnfw_lib_misc PRIVATE nnfw_coverage)
 
-add_executable(nnfw_tensor_index_iterator "examples/tensor_index_iterator.cpp")
-target_link_libraries(nnfw_tensor_index_iterator nnfw_lib_misc)
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+add_executable(nnfw_lib_misc_test ${TESTS})
+target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_lib_misc)
+target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_coverage)
+target_link_libraries(nnfw_lib_misc_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(nnfw_lib_misc_test nnfw_lib_misc_test)
+install(TARGETS nnfw_lib_misc_test DESTINATION unittest)
diff --git a/runtime/libs/misc/examples/tensor_index_iterator.cpp b/runtime/libs/misc/examples/tensor_index_iterator.cpp
deleted file mode 100644
index 590b433df..000000000
--- a/runtime/libs/misc/examples/tensor_index_iterator.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "misc/tensor/IndexIterator.h"
-
-#include <array>
-
-#include <iostream>
-#include <algorithm>
-
-#include <cassert>
-
-void test_iterate(void)
-{
-  const nnfw::misc::tensor::Shape shape{3, 4, 7};
-
-  std::array<int, 3 * 4 * 7> array;
-
-  array.fill(0);
-
-  using nnfw::misc::tensor::Index;
-  using nnfw::misc::tensor::iterate;
-
-  iterate(shape) << [&](const Index &index) {
-    assert(index.rank() == shape.rank());
-
-    const uint32_t rank = index.rank();
-
-    uint32_t offset = index.at(0);
-
-    for (uint32_t axis = 1; axis < rank; ++axis)
-    {
-      offset *= shape.dim(axis);
-      offset += index.at(axis);
-    }
-
-    array[offset] += 1;
-  };
-
-  assert(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
-}
-
-int main(int argc, char **argv)
-{
-  test_iterate();
-
-  nnfw::misc::tensor::Shape shape{3, 4, 3, 4};
-
-  std::cout << "Iterate over tensor{3, 4, 3, 4}" << std::endl;
-
-  nnfw::misc::tensor::iterate(shape) << [](const nnfw::misc::tensor::Index &index) {
-    std::cout << "rank: " << index.rank() << std::endl;
-
-    for (uint32_t d = 0; d < index.rank(); ++d)
-    {
-      std::cout << "  offset(" << d << ") = " << index.at(d) << std::endl;
-    }
-  };
-
-  return 0;
-}
diff --git a/runtime/libs/misc/include/misc/EnvConfigSource.h b/runtime/libs/misc/include/misc/EnvConfigSource.h
new file mode 100644
index 000000000..63c8ae9c0
--- /dev/null
+++ b/runtime/libs/misc/include/misc/EnvConfigSource.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_ENV_CONFIG_SOURCE_H__
+#define __NNFW_MISC_ENV_CONFIG_SOURCE_H__
+
+#include "GeneralConfigSource.h"
+
+#include <unordered_map>
+
+namespace nnfw
+{
+namespace misc
+{
+
+class EnvConfigSource final : public GeneralConfigSource
+{
+public:
+  std::string get(const std::string &key) const override;
+
+private:
+  std::unordered_map<std::string, std::string> _default_attributes;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/GeneralConfigSource.h b/runtime/libs/misc/include/misc/GeneralConfigSource.h
new file mode 100644
index 000000000..a3de66e81
--- /dev/null
+++ b/runtime/libs/misc/include/misc/GeneralConfigSource.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
+#define __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
+
+#include "IConfigSource.h"
+
+#include <unordered_map>
+
+namespace nnfw
+{
+namespace misc
+{
+
+class GeneralConfigSource : public IConfigSource
+{
+public:
+  GeneralConfigSource() = default;
+
+  std::string get(const std::string &key) const override;
+  void set(const std::string &key, const std::string &val);
+
+private:
+  std::unordered_map<std::string, std::string> _map;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/IConfigSource.h b/runtime/libs/misc/include/misc/IConfigSource.h
new file mode 100644
index 000000000..fe2c48ecf
--- /dev/null
+++ b/runtime/libs/misc/include/misc/IConfigSource.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_I_CONFIG_SOURCE_H__
+#define __NNFW_MISC_I_CONFIG_SOURCE_H__
+
+#include <string>
+
+namespace nnfw
+{
+namespace misc
+{
+
+struct IConfigSource
+{
+  /**
+   * @brief Destroy the IConfigSource object
+   */
+  virtual ~IConfigSource() = default;
+
+  /**
+   * @brief get the value for the matching key
+   *
+   * @param key string key to search
+   * @return string value associated with the key
+   */
+  virtual std::string get(const std::string &key) const = 0;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_I_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/RandomGenerator.h b/runtime/libs/misc/include/misc/RandomGenerator.h
index 8d26b8c74..8da4f7f20 100644
--- a/runtime/libs/misc/include/misc/RandomGenerator.h
+++ b/runtime/libs/misc/include/misc/RandomGenerator.h
@@ -76,6 +76,7 @@ private:
   std::normal_distribution<float> _dist;
 };
 
+template <> int8_t RandomGenerator::generate<int8_t>(void);
 template <> uint8_t RandomGenerator::generate<uint8_t>(void);
 template <> bool RandomGenerator::generate<bool>(void);
 template <> int32_t RandomGenerator::generate<int32_t>(void);
diff --git a/runtime/libs/misc/include/misc/feature/Index.h b/runtime/libs/misc/include/misc/feature/Index.h
index a361d8dd2..09d65a59a 100644
--- a/runtime/libs/misc/include/misc/feature/Index.h
+++ b/runtime/libs/misc/include/misc/feature/Index.h
@@ -62,7 +62,7 @@ public:
    * @param[in]  col   The width index
    */
   Index(int32_t batch, int32_t ch, int32_t row, int32_t col)
-      : _batch{batch}, _ch{ch}, _row{row}, _col{col}
+    : _batch{batch}, _ch{ch}, _row{row}, _col{col}
   {
     // DO NOTHING
   }
diff --git a/runtime/libs/misc/include/misc/feature/Shape.h b/runtime/libs/misc/include/misc/feature/Shape.h
index 09881f58b..2c31b457c 100644
--- a/runtime/libs/misc/include/misc/feature/Shape.h
+++ b/runtime/libs/misc/include/misc/feature/Shape.h
@@ -64,7 +64,7 @@ struct Shape
    * @param[in]  width  The width value
    */
   Shape(int32_t batch, int32_t depth, int32_t height, int32_t width)
-      : N{batch}, C{depth}, H{height}, W{width}
+    : N{batch}, C{depth}, H{height}, W{width}
   {
     // DO NOTHING
   }
diff --git a/runtime/libs/misc/include/misc/kernel/Shape.h b/runtime/libs/misc/include/misc/kernel/Shape.h
index 27d6a8bf0..176db0a11 100644
--- a/runtime/libs/misc/include/misc/kernel/Shape.h
+++ b/runtime/libs/misc/include/misc/kernel/Shape.h
@@ -55,7 +55,7 @@ struct Shape
    * @param[in] width The width index
    */
   Shape(int32_t count, int32_t depth, int32_t height, int32_t width)
-      : N{count}, C{depth}, H{height}, W{width}
+    : N{count}, C{depth}, H{height}, W{width}
   {
     // DO NOTHING
   }
diff --git a/runtime/libs/misc/include/misc/polymorphic_downcast.h b/runtime/libs/misc/include/misc/polymorphic_downcast.h
index 412b864e6..ee885eb70 100644
--- a/runtime/libs/misc/include/misc/polymorphic_downcast.h
+++ b/runtime/libs/misc/include/misc/polymorphic_downcast.h
@@ -27,9 +27,7 @@ namespace misc
 
 template <typename DstType, typename SrcType> inline DstType polymorphic_downcast(SrcType *x)
 {
-#ifndef __ANDROID__
   assert(dynamic_cast<DstType>(x) == x);
-#endif
   return static_cast<DstType>(x);
 }
 
diff --git a/runtime/libs/misc/include/misc/string_helpers.h b/runtime/libs/misc/include/misc/string_helpers.h
index 46fecca71..c9d72034f 100644
--- a/runtime/libs/misc/include/misc/string_helpers.h
+++ b/runtime/libs/misc/include/misc/string_helpers.h
@@ -50,7 +50,7 @@ inline std::vector<std::string> split(const std::string &s, char delim)
   std::vector<std::string> elems;
   while (std::getline(ss, item, delim))
   {
-    elems.push_back(std::move(item));
+    elems.push_back(item);
   }
   return elems;
 }
diff --git a/runtime/libs/misc/include/misc/tensor/Object.h b/runtime/libs/misc/include/misc/tensor/Object.h
index cba4f1baf..15ad6da4f 100644
--- a/runtime/libs/misc/include/misc/tensor/Object.h
+++ b/runtime/libs/misc/include/misc/tensor/Object.h
@@ -74,9 +74,8 @@ public:
       _values.resize(_shape.dim(0) * _stride.at(0));
 
       // Set 'value'
-      iterate(_shape) << [this, &fn](const Index &index) {
-        _values.at(_stride.offset(index)) = fn(_shape, index);
-      };
+      iterate(_shape) <<
+        [this, &fn](const Index &index) { _values.at(_stride.offset(index)) = fn(_shape, index); };
     }
   }
 
diff --git a/runtime/libs/misc/include/misc/tensor/Zipper.h b/runtime/libs/misc/include/misc/tensor/Zipper.h
index 8f0ec4ab6..b1ca3d003 100644
--- a/runtime/libs/misc/include/misc/tensor/Zipper.h
+++ b/runtime/libs/misc/include/misc/tensor/Zipper.h
@@ -48,7 +48,7 @@ public:
    * @param[in] rhs     @c Reader object of a tensor
    */
   Zipper(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs)
-      : _shape{shape}, _lhs{lhs}, _rhs{rhs}
+    : _shape{shape}, _lhs{lhs}, _rhs{rhs}
   {
     // DO NOTHING
   }
@@ -63,7 +63,7 @@ public:
   template <typename Callable> void zip(Callable cb) const
   {
     iterate(_shape) <<
-        [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); };
+      [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); };
   }
 
 private:
diff --git a/runtime/libs/misc/src/EnvConfigSource.cpp b/runtime/libs/misc/src/EnvConfigSource.cpp
new file mode 100644
index 000000000..3abc9d196
--- /dev/null
+++ b/runtime/libs/misc/src/EnvConfigSource.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/EnvConfigSource.h"
+
+#include <cstdlib>
+
+namespace nnfw
+{
+namespace misc
+{
+
+std::string EnvConfigSource::get(const std::string &key) const
+{
+  const char *value = std::getenv(key.c_str());
+  if (value != nullptr)
+  {
+    return value;
+  }
+  else
+  {
+    return GeneralConfigSource::get(key);
+  }
+}
+
+} // namespace misc
+} // namespace nnfw
diff --git a/runtime/libs/misc/src/GeneralConfigSource.cpp b/runtime/libs/misc/src/GeneralConfigSource.cpp
new file mode 100644
index 000000000..298c1663e
--- /dev/null
+++ b/runtime/libs/misc/src/GeneralConfigSource.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/GeneralConfigSource.h"
+
+namespace nnfw
+{
+namespace misc
+{
+
+std::string GeneralConfigSource::get(const std::string &key) const
+{
+  auto itr = _map.find(key);
+  if (itr == _map.end())
+  {
+    return "";
+  }
+  else
+  {
+    return itr->second;
+  }
+}
+
+void GeneralConfigSource::set(const std::string &key, const std::string &val) { _map[key] = val; }
+
+} // namespace misc
+} // namespace nnfw
diff --git a/runtime/libs/misc/src/RandomGenerator.cpp b/runtime/libs/misc/src/RandomGenerator.cpp
index e7fbc10ca..af072326b 100644
--- a/runtime/libs/misc/src/RandomGenerator.cpp
+++ b/runtime/libs/misc/src/RandomGenerator.cpp
@@ -21,6 +21,34 @@ namespace nnfw
 namespace misc
 {
 
+template <> int8_t RandomGenerator::generate<int8_t>(void)
+{
+  // The value of type_range is 255.
+  float type_range = static_cast<float>(std::numeric_limits<int8_t>::max()) -
+                     static_cast<float>(std::numeric_limits<int8_t>::min());
+  // Most _dist values range from -5.0 to 5.0.
+  float min_range = -5.0f;
+  float max_range = 5.0f;
+  // NOTE shifted_relative_val has Gaussian distribution that origin mean was 0 and standard
+  // deviation was 2. And then its values are distributed and shift to that mean is 127.5 and range
+  // is about [0, 255].
+  float shifted_relative_val = (_dist(_rand) - min_range) * type_range / (max_range - min_range);
+
+  // shifted_relative_val is adjusted to be mapped to end points of the range, if it is out of range
+  // values.
+  if (shifted_relative_val < -128.0f)
+  {
+    return -128;
+  }
+  else if (shifted_relative_val > type_range)
+  {
+    return 127;
+  }
+
+  // Convert shifted_relative_val from float to int8
+  return static_cast<int8_t>(shifted_relative_val);
+}
+
 template <> uint8_t RandomGenerator::generate<uint8_t>(void)
 {
   // The value of type_range is 255.
diff --git a/runtime/libs/misc/src/string_helpers.test.cpp b/runtime/libs/misc/src/string_helpers.test.cpp
new file mode 100644
index 000000000..1111425d0
--- /dev/null
+++ b/runtime/libs/misc/src/string_helpers.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/string_helpers.h"
+
+#include <gtest/gtest.h>
+
+TEST(StringHelpersTest, split)
+{
+  const std::string example = "abc;def;ghi";
+
+  auto str_vector = nnfw::misc::split(example, ';');
+
+  ASSERT_EQ(str_vector.size(), 3);
+  EXPECT_STREQ(str_vector[0].c_str(), "abc");
+  EXPECT_STREQ(str_vector[1].c_str(), "def");
+  EXPECT_STREQ(str_vector[2].c_str(), "ghi");
+}
+
+TEST(StringHelpersTest, neg_split_empty)
+{
+  const std::string example = "";
+
+  auto str_vector = nnfw::misc::split(example, ';');
+
+  ASSERT_EQ(str_vector.size(), 0);
+}
+
+TEST(StringHelpersTest, neg_nonsplit)
+{
+  const std::string example = "abc;def;ghi";
+
+  auto str_vector = nnfw::misc::split(example, ':');
+
+  ASSERT_EQ(str_vector.size(), 1);
+  EXPECT_STREQ(str_vector[0].c_str(), example.c_str());
+}
+
+TEST(StringHelpersTest, append)
+{
+  auto append_str = nnfw::misc::str("abc", "-", 1);
+
+  EXPECT_STREQ(append_str.c_str(), "abc-1");
+}
+
+TEST(StringHelpersTest, neg_append_nullstr)
+{
+  const char *null_str = nullptr;
+  auto append_str = nnfw::misc::str(null_str, null_str);
+
+  ASSERT_EQ(append_str.size(), 0);
+}
+
+TEST(StringHelpersTest, join)
+{
+  const std::vector<std::string> example = {"abc", "def", "ghi"};
+
+  auto join_str = nnfw::misc::join(example.begin(), example.end(), ";");
+  EXPECT_STREQ(join_str.c_str(), "abc;def;ghi");
+}
+
+TEST(StringHelpersTest, neg_join_empty)
+{
+  const std::vector<std::string> example = {};
+
+  auto join_str = nnfw::misc::join(example.begin(), example.end(), ";");
+  ASSERT_EQ(join_str.size(), 0);
+}
diff --git a/runtime/libs/misc/src/tensor/Comparator.cpp b/runtime/libs/misc/src/tensor/Comparator.cpp
index 80a18c11a..5fcf38cc8 100644
--- a/runtime/libs/misc/src/tensor/Comparator.cpp
+++ b/runtime/libs/misc/src/tensor/Comparator.cpp
@@ -33,18 +33,18 @@ std::vector<Diff<float>> Comparator::compare(const Shape &shape, const Reader<fl
   std::vector<Diff<float>> res;
 
   zip(shape, expected, obtained) <<
-      [&](const Index &index, float expected_value, float obtained_value) {
-        if (!_compare_fn(expected_value, obtained_value))
-        {
-          res.emplace_back(index, expected_value, obtained_value);
-        }
-
-        // Update max_diff_index, if necessary
-        if (observer != nullptr)
-        {
-          observer->notify(index, expected_value, obtained_value);
-        }
-      };
+    [&](const Index &index, float expected_value, float obtained_value) {
+      if (!_compare_fn(expected_value, obtained_value))
+      {
+        res.emplace_back(index, expected_value, obtained_value);
+      }
+
+      // Update max_diff_index, if necessary
+      if (observer != nullptr)
+      {
+        observer->notify(index, expected_value, obtained_value);
+      }
+    };
 
   return res;
 }
diff --git a/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp
new file mode 100644
index 000000000..4cff6067f
--- /dev/null
+++ b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/tensor/IndexEnumerator.h"
+
+#include <vector>
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+using nnfw::misc::tensor::Shape;
+using nnfw::misc::tensor::Index;
+using nnfw::misc::tensor::IndexEnumerator;
+
+TEST(MiscIndexEnumeratorTest, iterate_full_range)
+{
+  const uint32_t H = 3;
+  const uint32_t W = 4;
+
+  const Shape shape{H, W};
+
+  std::vector<uint32_t> count;
+
+  count.resize(H * W, 0);
+
+  for (IndexEnumerator e{shape}; e.valid(); e.advance())
+  {
+    const auto &ind = e.curr();
+
+    ASSERT_EQ(2, ind.rank());
+    count.at(ind.at(0) * W + ind.at(1)) += 1;
+  }
+
+  ASSERT_TRUE(std::all_of(count.begin(), count.end(), [](uint32_t n) { return n == 1; }));
+}
+
+TEST(MiscIndexEnumeratorTest, neg_zero_rank_shape)
+{
+  // Test abnormal case of empty shape
+  // It is expected not to throw any exception, do nothing
+  const Shape shape{};
+  IndexEnumerator e{shape};
+  ASSERT_NO_THROW(e.valid());
+  ASSERT_NO_THROW(e.advance());
+  SUCCEED();
+}
diff --git a/runtime/libs/misc/src/tensor/IndexIterator.test.cpp b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp
new file mode 100644
index 000000000..875786bdd
--- /dev/null
+++ b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/tensor/IndexIterator.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <array>
+
+using namespace nnfw::misc::tensor;
+
+TEST(MiscIndexIteratorTest, iterate)
+{
+  const Shape shape{3, 4, 7};
+
+  std::array<int, 3 * 4 * 7> array;
+
+  array.fill(0);
+
+  iterate(shape) << [&](const Index &index) {
+    assert(index.rank() == shape.rank());
+
+    const uint32_t rank = index.rank();
+
+    uint32_t offset = index.at(0);
+
+    for (uint32_t axis = 1; axis < rank; ++axis)
+    {
+      offset *= shape.dim(axis);
+      offset += index.at(axis);
+    }
+
+    array[offset] += 1;
+  };
+
+  ASSERT_TRUE(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
+}
+
+TEST(MiscIndexIteratorTest, neg_zero_rank_shape)
+{
+  // Test abnormal case of empty shape
+  // It is expected not to throw any exception, do nothing
+  const Shape shape{};
+
+  ASSERT_NO_THROW(iterate(shape) << ([](const Index &index) {}));
+  SUCCEED();
+}
diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt
index b040f5115..8d0ba0487 100644
--- a/runtime/libs/ndarray/CMakeLists.txt
+++ b/runtime/libs/ndarray/CMakeLists.txt
@@ -3,8 +3,6 @@ add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp)
 set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 target_include_directories(ndarray PUBLIC include)
-#can't make this private because of c++ templates
-target_include_directories(ndarray PUBLIC src)
 
 option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types")
 
@@ -15,5 +13,16 @@ endif()
 target_link_libraries(ndarray PRIVATE nnfw_common)
 target_link_libraries(ndarray PRIVATE nnfw_coverage)
 
-add_subdirectory(test)
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+add_executable(ndarray_test src/Array.test.cpp src/ContiguousSpan.test.cpp)
+target_link_libraries(ndarray_test PRIVATE ndarray)
+target_link_libraries(ndarray_test PRIVATE nnfw_coverage)
+target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(ndarray_test ndarray_test)
+install(TARGETS ndarray_test DESTINATION unittest)
+
 add_subdirectory(example)
diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h
index 3890cc26b..568fe1c77 100644
--- a/runtime/libs/ndarray/include/ndarray/Array.h
+++ b/runtime/libs/ndarray/include/ndarray/Array.h
@@ -22,37 +22,21 @@
 #include "ContiguousSpan.h"
 #include "Shape.h"
 
-#if __cplusplus < 201402L
-#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions
-#else
-#include <utility>
-#endif
-
 #include <algorithm>
-#include <cassert>
-#include <type_traits>
 #include <array>
-#include <tuple>
+#include <cassert>
 #include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
 
 namespace ndarray
 {
 
-// there is no index_sequence before c++14
-#if __cplusplus < 201402L
-
-template <size_t... Nums> using index_sequence = cxx14::index_sequence<Nums...>;
-
-template <size_t Num> using make_index_sequence = cxx14::make_index_sequence<Num>;
-
-#else
-
 template <size_t... Nums> using index_sequence = std::index_sequence<Nums...>;
 
 template <size_t _Num> using make_index_sequence = std::make_index_sequence<_Num>;
 
-#endif //__cplusplus < 201402L
-
 struct Strides
 {
   explicit Strides(Shape s) : _strides{} { fillStrides(s); }
@@ -157,8 +141,8 @@ private:
   size_t offset(index_sequence<Nums...> seq, Ts... x) const noexcept
   {
     static_assert(
-        sizeof...(Ts) == sizeof...(Nums),
-        "Sanity check failed. Generated index sequence size is not equal to argument count");
+      sizeof...(Ts) == sizeof...(Nums),
+      "Sanity check failed. Generated index sequence size is not equal to argument count");
 
     return _strides.offset(seq, x...);
   }
diff --git a/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h
index 8caa6a686..b322b77db 100644
--- a/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h
+++ b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h
@@ -37,7 +37,7 @@ public:
 
   template <typename It>
   explicit ContiguousSpan(It first, It last) noexcept
-      : _data(&*first), _len(std::distance(first, last))
+    : _data(&*first), _len(std::distance(first, last))
   {
   }
 
diff --git a/runtime/libs/ndarray/src/Array.test.cpp b/runtime/libs/ndarray/src/Array.test.cpp
new file mode 100644
index 000000000..15e67600d
--- /dev/null
+++ b/runtime/libs/ndarray/src/Array.test.cpp
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/Array.h"
+
+#include <gtest/gtest.h>
+
+using namespace ndarray;
+
+TEST(NDArrayArrayTests, basic_data_test)
+{
+  float raw_data[] = {1, 2, 3, 4};
+  int32_t raw_data_int[] = {1, 2, 3, 4};
+  uint32_t raw_data_uint[] = {1, 2, 3, 4};
+  int8_t raw_data_int8[] = {1, 2, 3, 4};
+
+  Array<float> data22{raw_data, {2, 2}};
+  Array<int32_t> data22_int{raw_data_int, {2, 2}};
+  Array<uint32_t> data22_uint{raw_data_uint, {2, 2}};
+  Array<int8_t> data22_int8{raw_data_int8, {2, 2}};
+
+  ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
+  ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
+  ASSERT_EQ(data22.shape().rank(), 2);
+  ASSERT_EQ(data22.shape().dim(0), 2);
+  ASSERT_EQ(data22.shape().dim(1), 2);
+
+  Array<float> data14{raw_data, {1, 4}};
+  ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
+  ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
+  ASSERT_EQ(data14.shape().rank(), 2);
+  ASSERT_EQ(data14.shape().dim(0), 1);
+  ASSERT_EQ(data14.shape().dim(1), 4);
+
+  // <float, false>
+  {
+    ContiguousSpan<float> cs = data22.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<float> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+    float sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<float> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<float>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_FLOAT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_FLOAT_EQ(cs4->at(3), 4);
+  }
+
+  // <float, true>
+  {
+    ContiguousSpan<float, true> cs = data22.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<float, true> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+    float sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_FLOAT_EQ(sum, 10);
+
+    std::vector<float> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<float, true>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_FLOAT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_FLOAT_EQ(cs4->at(3), 4);
+  }
+
+  // <int32_t, false>
+  {
+    ContiguousSpan<int32_t> cs = data22_int.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<int32_t> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_EQ(cs2.at(3), 4);
+
+    int32_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<int32_t> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<int32_t>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+  }
+
+  // <int32_t, true>
+  {
+    ContiguousSpan<int32_t, true> cs = data22_int.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<int32_t, true> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_EQ(cs2.at(3), 4);
+
+    int32_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<int32_t> array_data{1, 2, 3, 4};
+    auto cs3 =
+      std::make_unique<ContiguousSpan<int32_t, true>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+  }
+
+  // <uint32_t, false>
+  {
+    ContiguousSpan<uint32_t> cs = data22_uint.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<uint32_t> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_EQ(cs2.at(3), 4);
+
+    uint32_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<uint32_t> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<uint32_t>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+  }
+
+  // <uint32_t, true>
+  {
+    ContiguousSpan<uint32_t, true> cs = data22_uint.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<uint32_t, true> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_EQ(cs2.at(3), 4);
+
+    uint32_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<uint32_t> array_data{1, 2, 3, 4};
+    auto cs3 =
+      std::make_unique<ContiguousSpan<uint32_t, true>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+  }
+
+  // <int8_t, false>
+  {
+    ContiguousSpan<int8_t> cs = data22_int8.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<int8_t> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+    int8_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<int8_t> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<int8_t>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+
+    auto cs5 = ContiguousSpan<int8_t>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs5.size(), 4);
+    ASSERT_EQ(cs5.at(3), 4);
+  }
+
+  // <int8_t, true>
+  {
+    ContiguousSpan<int8_t, true> cs = data22_int8.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<int8_t, true> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+    int8_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<int8_t> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<int8_t, true>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+
+    auto cs5 = ContiguousSpan<int8_t, true>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs5.size(), 4);
+    ASSERT_EQ(cs5.at(3), 4);
+  }
+
+  Array<float> lv = std::move(data14);
+  ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
+  ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
+}
+
+TEST(NDArrayArrayTests, slice_write_test)
+{
+  // float
+  {
+    float raw_data[4] = {0};
+
+    Array<float> data22{raw_data, {2, 2}};
+
+    data22.slice(1) = {1, 2};
+
+    ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
+    ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
+    ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
+    ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
+  }
+
+  // int32_t
+  {
+    int32_t raw_data[4] = {0};
+    Array<int32_t> data22{raw_data, {2, 2}};
+
+    data22.slice(1) = {1, 2};
+
+    ASSERT_EQ(data22.at(0, 0), 0);
+    ASSERT_EQ(data22.at(0, 1), 0);
+    ASSERT_EQ(data22.at(1, 0), 1);
+    ASSERT_EQ(data22.at(1, 1), 2);
+  }
+
+  // uint32_t
+  {
+    uint32_t raw_data[4] = {0};
+    Array<uint32_t> data22{raw_data, {2, 2}};
+
+    data22.slice(1) = {1, 2};
+
+    ASSERT_EQ(data22.at(0, 0), 0);
+    ASSERT_EQ(data22.at(0, 1), 0);
+    ASSERT_EQ(data22.at(1, 0), 1);
+    ASSERT_EQ(data22.at(1, 1), 2);
+  }
+
+  // int8_t
+  {
+    int8_t raw_data[4] = {0};
+    Array<int8_t> data22{raw_data, {2, 2}};
+
+    data22.slice(1) = {1, 2};
+
+    ASSERT_EQ(data22.at(0, 0), 0);
+    ASSERT_EQ(data22.at(0, 1), 0);
+    ASSERT_EQ(data22.at(1, 0), 1);
+    ASSERT_EQ(data22.at(1, 1), 2);
+  }
+}
+
+TEST(NDArrayArrayTests, slice_read_test)
+{
+  // float
+  {
+    float raw_data[4] = {1, 2, 3, 4};
+
+    Array<float> data22{raw_data, {2, 2}};
+
+    auto slice = data22.slice(1);
+
+    ASSERT_FLOAT_EQ(slice[0], 3);
+    ASSERT_FLOAT_EQ(slice[1], 4);
+  }
+
+  // int32_t
+  {
+    int32_t raw_data[4] = {1, 2, 3, 4};
+
+    Array<int32_t> data22{raw_data, {2, 2}};
+
+    auto slice = data22.slice(1);
+
+    ASSERT_EQ(slice[0], 3);
+    ASSERT_EQ(slice[1], 4);
+  }
+
+  // uint32_t
+  {
+    uint32_t raw_data[4] = {1, 2, 3, 4};
+
+    Array<uint32_t> data22{raw_data, {2, 2}};
+
+    auto slice = data22.slice(1);
+
+    ASSERT_EQ(slice[0], 3);
+    ASSERT_EQ(slice[1], 4);
+  }
+
+  // int8_t
+  {
+    int8_t raw_data[4] = {1, 2, 3, 4};
+
+    Array<int8_t> data22{raw_data, {2, 2}};
+
+    auto slice = data22.slice(1);
+
+    ASSERT_EQ(slice[0], 3);
+    ASSERT_EQ(slice[1], 4);
+  }
+}
+
+TEST(NDArrayArrayTests, multidim_test)
+{
+  // float
+  {
+    float raw_data[5] = {0, 1, 2, 3, 4};
+
+    Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
+
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+  }
+
+  // int32_t
+  {
+    int32_t raw_data[5] = {0, 1, 2, 3, 4};
+
+    Array<int32_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+  }
+
+  // uint32_t
+  {
+    uint32_t raw_data[5] = {0, 1, 2, 3, 4};
+
+    Array<uint32_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+  }
+
+  // int8_t
+  {
+    int8_t raw_data[5] = {0, 1, 2, 3, 4};
+
+    Array<int8_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+  }
+}
diff --git a/runtime/libs/ndarray/src/ContiguousSpan.test.cpp b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp
new file mode 100644
index 000000000..26efcc645
--- /dev/null
+++ b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/ContiguousSpan.h"
+
+#include <gtest/gtest.h>
+
+using namespace ndarray;
+
+TEST(NDArrayContiguousSpanTests, slice_assign_test)
+{
+  // float
+  {
+    std::vector<float> v1{1, 2, 3, 4, 5};
+    std::vector<float> v2(5);
+
+    ContiguousSpan<float> span1(v1.begin(), v1.end());
+    ContiguousSpan<float> span2(v2.begin(), v2.end());
+
+    span2.assign(span1);
+
+    ASSERT_EQ(v1, v2);
+    ASSERT_EQ(span1.size(), 5);
+    ASSERT_EQ(span2.size(), 5);
+
+    ASSERT_EQ(span2.at(2), 3);
+    ASSERT_EQ(span2.at(4), 5);
+
+    ASSERT_EQ(*(span1.data() + 2), *(span2.data() + 2));
+
+    ContiguousSpan<float> span3(span2.offset(1));
+    ASSERT_EQ(span3.size(), 4);
+    ASSERT_EQ(span3.at(0), 2);
+    ASSERT_EQ(span3.at(1), 3);
+    ASSERT_EQ(span3[2], 4);
+    ASSERT_EQ(span3[3], 5);
+
+    // const
+    ContiguousSpan<float, true> span4(v1.begin(), v1.end());
+    ASSERT_EQ(span4.size(), 5);
+    ASSERT_EQ(span4.at(0), 1);
+    ASSERT_EQ(span4.at(1), 2);
+    ASSERT_EQ(span4.at(2), 3);
+    ASSERT_EQ(span4[3], 4);
+    ASSERT_EQ(span4[4], 5);
+
+    ContiguousSpan<float, true> span5(span4.offset(1));
+    ASSERT_EQ(span5.size(), 4);
+    ASSERT_EQ(span5.at(0), 2);
+    ASSERT_EQ(span5.at(1), 3);
+    ASSERT_EQ(span5[2], 4);
+    ASSERT_EQ(span5[3], 5);
+  }
+
+  // int32_t
+  {
+    std::vector<int32_t> v1{1, 2, 3, 4, 5};
+    std::vector<int32_t> v2(5);
+
+    ContiguousSpan<int32_t> span1(v1.begin(), v1.end());
+    ContiguousSpan<int32_t> span2(v2.begin(), v2.end());
+
+    span2.assign(span1);
+
+    ASSERT_EQ(v1, v2);
+    ASSERT_EQ(span1.size(), 5);
+    ASSERT_EQ(span2.size(), 5);
+
+    ASSERT_EQ(span2.at(2), 3);
+    ASSERT_EQ(span2.at(4), 5);
+
+    ASSERT_EQ(*(span1.data() + 2), *(span2.data() + 2));
+
+    ContiguousSpan<int32_t> span3(span2.offset(1));
+    ASSERT_EQ(span3.size(), 4);
+    ASSERT_EQ(span3.at(0), 2);
+    ASSERT_EQ(span3.at(1), 3);
+    ASSERT_EQ(span3[2], 4);
+    ASSERT_EQ(span3[3], 5);
+
+    // const
+    ContiguousSpan<int32_t, true> span4(v1.begin(), v1.end());
+    ASSERT_EQ(span4.size(), 5);
+    ASSERT_EQ(span4.at(0), 1);
+    ASSERT_EQ(span4.at(1), 2);
+    ASSERT_EQ(span4.at(2), 3);
+    ASSERT_EQ(span4[3], 4);
+    ASSERT_EQ(span4[4], 5);
+
+    ContiguousSpan<int32_t, true> span5(span4.offset(1));
+    ASSERT_EQ(span5.size(), 4);
+    ASSERT_EQ(span5.at(0), 2);
+    ASSERT_EQ(span5.at(1), 3);
+    ASSERT_EQ(span5[2], 4);
+    ASSERT_EQ(span5[3], 5);
+  }
+
+  // uint32_t
+  {
+    std::vector<uint32_t> v1{1, 2, 3, 4, 5};
+    std::vector<uint32_t> v2(5);
+
+    ContiguousSpan<uint32_t> span1(v1.begin(), v1.end());
+    ContiguousSpan<uint32_t> span2(v2.begin(), v2.end());
+
+    span2.assign(span1);
+
+    ASSERT_EQ(v1, v2);
+    ASSERT_EQ(span1.size(), 5);
+    ASSERT_EQ(span2.size(), 5);
+
+    ASSERT_EQ(span2.at(2), 3);
+    ASSERT_EQ(span2.at(4), 5);
+
+    ASSERT_EQ(*(span1.data() + 2), *(span2.data() + 2));
+
+    ContiguousSpan<uint32_t> span3(span2.offset(1));
+    ASSERT_EQ(span3.size(), 4);
+    ASSERT_EQ(span3.at(0), 2);
+    ASSERT_EQ(span3.at(1), 3);
+    ASSERT_EQ(span3[2], 4);
+    ASSERT_EQ(span3[3], 5);
+
+    // const
+    ContiguousSpan<uint32_t, true> span4(v1.begin(), v1.end());
+    ASSERT_EQ(span4.size(), 5);
+    ASSERT_EQ(span4.at(0), 1);
+    ASSERT_EQ(span4.at(1), 2);
+    ASSERT_EQ(span4.at(2), 3);
+    ASSERT_EQ(span4[3], 4);
+    ASSERT_EQ(span4[4], 5);
+
+    ContiguousSpan<uint32_t, true> span5(span4.offset(1));
+    ASSERT_EQ(span5.size(), 4);
+    ASSERT_EQ(span5.at(0), 2);
+    ASSERT_EQ(span5.at(1), 3);
+    ASSERT_EQ(span5[2], 4);
+    ASSERT_EQ(span5[3], 5);
+  }
+
+  // int8_t
+  {
+    std::vector<int8_t> v1{1, 2, 3, 4, 5};
+    std::vector<int8_t> v2(5);
+
+    ContiguousSpan<int8_t> span1(v1.begin(), v1.end());
+    ContiguousSpan<int8_t> span2(v2.begin(), v2.end());
+
+    span2.assign(span1);
+
+    ASSERT_EQ(v1, v2);
+    ASSERT_EQ(span1.size(), 5);
+    ASSERT_EQ(span2.size(), 5);
+
+    ASSERT_EQ(span2.at(2), 3);
+    ASSERT_EQ(span2.at(4), 5);
+
+    ASSERT_EQ(*(span1.data() + 2), *(span2.data() + 2));
+
+    ContiguousSpan<int8_t> span3(span2.offset(1));
+    ASSERT_EQ(span3.size(), 4);
+    ASSERT_EQ(span3.at(0), 2);
+    ASSERT_EQ(span3.at(1), 3);
+    ASSERT_EQ(span3[2], 4);
+    ASSERT_EQ(span3[3], 5);
+
+    // const
+    ContiguousSpan<int8_t, true> span4(v1.begin(), v1.end());
+    ASSERT_EQ(span4.size(), 5);
+    ASSERT_EQ(span4.at(0), 1);
+    ASSERT_EQ(span4.at(1), 2);
+    ASSERT_EQ(span4.at(2), 3);
+    ASSERT_EQ(span4[3], 4);
+    ASSERT_EQ(span4[4], 5);
+
+    ContiguousSpan<int8_t, true> span5(span4.offset(1));
+    ASSERT_EQ(span5.size(), 4);
+    ASSERT_EQ(span5.at(0), 2);
+    ASSERT_EQ(span5.at(1), 3);
+    ASSERT_EQ(span5[2], 4);
+    ASSERT_EQ(span5[3], 5);
+  }
+}
diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h
deleted file mode 100644
index 81135b3f2..000000000
--- a/runtime/libs/ndarray/src/detail/cxx14.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_CXX14_H_
-#define _NDARRAY_CXX14_H_
-
-namespace ndarray
-{
-
-namespace cxx14
-{
-
-template <size_t... Nums> struct index_sequence
-{
-  using value_type = size_t;
-
-  static constexpr std::size_t size() noexcept { return sizeof...(Nums); }
-};
-
-namespace detail
-{
-
-template <size_t v, typename Seq> struct _append;
-
-template <size_t v, size_t... Nums> struct _append<v, index_sequence<Nums...>>
-{
-  using result = index_sequence<Nums..., v>;
-};
-
-template <size_t Len> struct make_index_sequence
-{
-  using result =
-      typename detail::_append<Len - 1, typename make_index_sequence<Len - 1>::result>::result;
-};
-
-template <> struct make_index_sequence<1>
-{
-  using result = index_sequence<0>;
-};
-
-template <> struct make_index_sequence<0>
-{
-  using result = index_sequence<>;
-};
-
-} // namespace detail
-
-template <size_t Num> using make_index_sequence = typename detail::make_index_sequence<Num>::result;
-
-} // namespace cxx14
-
-} // namespace ndarray
-
-#endif //_NDARRAY_CXX14_H_
diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt
deleted file mode 100644
index 16f8779ee..000000000
--- a/runtime/libs/ndarray/test/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-if(NOT BUILD_NDARRAY_TEST)
-    return()
-endif()
-
-add_executable(ndarray_test ndarray_test.cpp)
-
-target_link_libraries(ndarray_test PRIVATE ndarray)
-
-nnfw_find_package(GTest)
-if(NOT GTest_FOUND)
-    message(STATUS "GTest not avaialble. Skipping NDArray test build")
-    return()
-endif(NOT GTest_FOUND)
-
-target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
-
-add_test(ndarray_test ndarray_test)
diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp
deleted file mode 100644
index 0aa948c72..000000000
--- a/runtime/libs/ndarray/test/ndarray_test.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "ndarray/Array.h"
-
-using namespace ndarray;
-
-TEST(NDArray_tests, basic_data_test)
-{
-
-  float raw_data[] = {1, 2, 3, 4};
-
-  Array<float> data22{raw_data, {2, 2}};
-
-  ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
-  ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
-  ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
-  ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
-
-  Array<float> data14{raw_data, {1, 4}};
-  ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
-  ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
-  ASSERT_FLOAT_EQ(data22.at(0, 2), 3);
-  ASSERT_FLOAT_EQ(data22.at(0, 3), 4);
-}
-
-TEST(NDArray_tests, slice_write_test)
-{
-  float raw_data[4] = {0};
-
-  Array<float> data22{raw_data, {2, 2}};
-
-  data22.slice(1) = {1, 2};
-
-  ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
-  ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
-  ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
-  ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
-}
-
-TEST(NDArray_tests, slice_read_test)
-{
-  float raw_data[4] = {1, 2, 3, 4};
-
-  Array<float> data22{raw_data, {2, 2}};
-
-  auto slice = data22.slice(1);
-
-  ASSERT_FLOAT_EQ(slice[0], 3);
-  ASSERT_FLOAT_EQ(slice[1], 4);
-}
-
-TEST(NDArray_tests, multidim_test)
-{
-  float raw_data[5] = {0, 1, 2, 3, 4};
-
-  Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
-
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
-}
-
-TEST(NDArray_tests, slice_assign_test)
-{
-  std::vector<float> v1{1, 2, 3, 4, 5};
-  std::vector<float> v2(5);
-
-  ContiguousSpan<float> span1(v1.begin(), v1.end());
-  ContiguousSpan<float> span2(v2.begin(), v2.end());
-
-  span2.assign(span1);
-
-  ASSERT_EQ(v1, v2);
-}
diff --git a/runtime/libs/nnapi/CMakeLists.txt b/runtime/libs/nnapi/CMakeLists.txt
index a5d9490d1..73f82b909 100644
--- a/runtime/libs/nnapi/CMakeLists.txt
+++ b/runtime/libs/nnapi/CMakeLists.txt
@@ -1,3 +1,4 @@
-add_subdirectories()
+add_library(nnfw_lib_nnapi INTERFACE)
 
-add_library(nnfw_lib_nnapi ALIAS nnfw_lib_nnapi_1_2)
+target_include_directories(nnfw_lib_nnapi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(nnfw_lib_nnapi INTERFACE nnfw-nnapi-header)
diff --git a/runtime/libs/nnapi/v1.2/include/NeuralNetworksExShim.h b/runtime/libs/nnapi/include/NeuralNetworksExShim.h
index 855613241..855613241 100644
--- a/runtime/libs/nnapi/v1.2/include/NeuralNetworksExShim.h
+++ b/runtime/libs/nnapi/include/NeuralNetworksExShim.h
diff --git a/runtime/libs/nnapi/v1.2/include/NeuralNetworksLoadHelpers.h b/runtime/libs/nnapi/include/NeuralNetworksLoadHelpers.h
index 1c482b54c..1c482b54c 100644
--- a/runtime/libs/nnapi/v1.2/include/NeuralNetworksLoadHelpers.h
+++ b/runtime/libs/nnapi/include/NeuralNetworksLoadHelpers.h
diff --git a/runtime/libs/nnapi/include/NeuralNetworksShim.h b/runtime/libs/nnapi/include/NeuralNetworksShim.h
new file mode 100644
index 000000000..2e8ccdb76
--- /dev/null
+++ b/runtime/libs/nnapi/include/NeuralNetworksShim.h
@@ -0,0 +1,1554 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// NOTE This header is derived from part of the following file
+// https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/nnapi/NeuralNetworksShim.h
+
+#ifndef __NEURAL_NETWORKS_SHIM_H__
+#define __NEURAL_NETWORKS_SHIM_H__
+
+#include "NeuralNetworksTypes.h"
+#include "NeuralNetworksLoadHelpers.h"
+
+// This interface is now deprecated. You should use instead
+// nnapi_implementation.
+
+// TODO(b/123017568): Update all current usages of this file.
+
+// NN api types based on NNAPI header file
+// https://developer.android.com/ndk/reference/group/neural-networks
+
+/**
+ * Creates a shared memory object from a file descriptor.
+ *
+ * The shared memory is backed by a file descriptor via mmap.
+ * See {@link ANeuralNetworksMemory} for a description on how to use
+ * this shared memory.
+ *
+ * @param size The requested size in bytes.
+ *             Must not be larger than the file size.
+ * @param prot The desired memory protection for the mapping.
+ *             It is either PROT_NONE or the bitwise OR of one or
+ *             more of the following flags: PROT_READ, PROT_WRITE.
+ * @param fd The requested file descriptor.
+ *           The file descriptor has to be mmap-able. The file
+ *           descriptor will be duplicated.
+ * @param offset The offset to the beginning of the file of the area to map.
+ *               The offset has to be aligned to a page size.
+ * @param memory The memory object to be created.
+ *               Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
+ */
+inline int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
+                                              ANeuralNetworksMemory **memory)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemory_createFromFd);
+  EXECUTE_FUNCTION_RETURN(size, protect, fd, offset, memory);
+}
+
+/**
+ * Delete a memory object.
+ *
+ * Destroys the object used by the run time to keep track of the memory.
+ * This will free the underlying actual memory if no other code has open
+ * handles to this memory.
+ *
+ * @param memory The memory object to be freed.
+ */
+inline void ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemory_free);
+  EXECUTE_FUNCTION(memory);
+}
+
+/**
+ * Create an empty {@link ANeuralNetworksModel}.
+ *
+ * <p>This only creates the object. Computation is performed once
+ * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ *
+ * The model should be constructed with calls to
+ * {@link ANeuralNetworksModel_addOperation} and
+ * {@link ANeuralNetworksModel_addOperand}
+ *
+ * <p>{@link ANeuralNetworksModel_finish} should be called once the model
+ * has been fully constructed.</p>
+ *
+ * <p>{@link ANeuralNetworksModel_free} should be called once the model
+ * is no longer needed.</p>
+ *
+ * @param model The {@link ANeuralNetworksModel} to be created.
+ *              Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_create(ANeuralNetworksModel **model)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_create);
+  EXECUTE_FUNCTION_RETURN(model);
+}
+
+/**
+ * Destroy a model.
+ *
+ * The model need not have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be destroyed. Passing NULL is acceptable and
+ *              results in no operation.
+ */
+inline void ANeuralNetworksModel_free(ANeuralNetworksModel *model)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_free);
+  EXECUTE_FUNCTION(model);
+}
+
+/**
+ * Indicate that we have finished modifying a model. Required before
+ * calling {@link ANeuralNetworksCompilation_compile}.
+ *
+ * An application is responsible to make sure that no other thread uses
+ * the model at the same time.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be finished.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_finish(ANeuralNetworksModel *model)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_finish);
+  EXECUTE_FUNCTION_RETURN(model);
+}
+
+/**
+ * Add an operand to a model.
+ *
+ * The order in which the operands are added is important. The first one added
+ * to a model will have the index value 0, the second 1, etc. These indexes are
+ * used as operand identifiers in {@link ANeuralNetworksModel_addOperation},
+ * {@link ANeuralNetworksExecution_setInput},
+ * {@link ANeuralNetworksExecution_setInputFromMemory},
+ * {@link ANeuralNetworksExecution_setOutput},
+ * {@link ANeuralNetworksExecution_setOutputFromMemory} and
+ * {@link ANeuralNetworksExecution_setOperandValue}.
+ *
+ * To build a model that can accommodate inputs of various sizes, as you may
+ * want to do for a CNN, set the size of the dimensions that will vary at run
+ * time to 0. If you do so, provide the full dimensions when calling
+ * {@link ANeuralNetworksExecution_setInput} or {@link
+ * ANeuralNetworksExecution_setInputFromMemory}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param type The {@link ANeuralNetworksOperandType} that describes the shape
+ * of the operand.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model,
+                                           const ANeuralNetworksOperandType *type)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_addOperand);
+  EXECUTE_FUNCTION_RETURN(model, type);
+}
+
+/**
+ * Sets an operand to a constant value.
+ *
+ * For scalar values, the content of buffer is copied into the model.
+ *
+ * For tensor values, a pointer to the buffer is stored within the model.
+ * The application is responsible for not changing the content of this region
+ * until all executions using this model have completed. As the data may
+ * be copied during processing, modifying the data after this call yields
+ * undefined results.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param buffer A pointer to the data to use.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index,
+                                                const void *buffer, size_t length)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_setOperandValue);
+  EXECUTE_FUNCTION_RETURN(model, index, buffer, length);
+}
+
+/**
+ * Sets an operand's per channel quantization parameters.
+ *
+ * Sets parameters required by a tensor of type
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}.
+ * This function must be called for every tensor of type
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} before
+ * calling {@link ANeuralNetworksModel_finish}.
+ *
+ * Available since API level 29.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param channelQuant The per channel quantization parameters for the operand.
+ *                    No memory in this struct needs to outlive the call to
+ *                    this function.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
+  ANeuralNetworksModel *model, int32_t index,
+  const ANeuralNetworksSymmPerChannelQuantParams *channelQuant)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_setOperandSymmPerChannelQuantParams);
+  EXECUTE_FUNCTION_RETURN(model, index, channelQuant);
+}
+
+/**
+ * Sets an operand to a value stored in a memory object.
+ *
+ * The content of the memory is not copied. A reference to that memory is stored
+ * inside the model. The application is responsible for not changing the content
+ * of the memory region until all executions using this model have completed.
+ * As the data may be copied during processing, modifying the data after this
+ * call yields undefined results.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param buffer A pointer to the data to use.
+ * @param memory The memory containing the data.
+ * @param offset This specifies the location of the data within the memory.
+ *               The offset is in bytes from the start of memory.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model,
+                                                          int32_t index,
+                                                          const ANeuralNetworksMemory *memory,
+                                                          size_t offset, size_t length)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_setOperandValueFromMemory);
+  EXECUTE_FUNCTION_RETURN(model, index, memory, offset, length);
+}
+
+/**
+ * Add an operation to a model.
+ *
+ * @param model The model to be modified.
+ * @param type The type of the operation.
+ * @param inputCount The number of entries in the inputs array.
+ * @param inputs An array of indexes identifying each operand.
+ * @param outputCount The number of entries in the outputs array.
+ * @param outputs An array of indexes identifying each operand.
+ *
+ * The operands specified by inputs and outputs must have been
+ * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
+                                             ANeuralNetworksOperationType type, uint32_t inputCount,
+                                             const uint32_t *inputs, uint32_t outputCount,
+                                             const uint32_t *outputs)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_addOperation);
+  EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount, outputs);
+}
+
+/**
+ * Specifies which operands will be the model's inputs and outputs.
+ *
+ * An operand cannot be used for both input and output. Doing so will
+ * return an error.
+ *
+ * @param model The model to be modified.
+ * @param inputCount The number of entries in the inputs array.
+ * @param inputs An array of indexes identifying the input operands.
+ * @param outputCount The number of entries in the outputs array.
+ * @param outputs An array of indexes identifying the output operands.
+ *
+ * The operands specified by inputs and outputs must have been
+ * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ */
+inline int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model,
+                                                         uint32_t inputCount,
+                                                         const uint32_t *inputs,
+                                                         uint32_t outputCount,
+                                                         const uint32_t *outputs)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_identifyInputsAndOutputs);
+  EXECUTE_FUNCTION_RETURN(model, inputCount, inputs, outputCount, outputs);
+}
+
+/**
+ * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
+ * calculated with range and/or precision as low as that of the IEEE 754 16-bit
+ * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * must be calculated using at least the range and precision of the IEEE 754
+ * 32-bit floating-point format.
+ *
+ * @param model The model to be modified.
+ * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
+ *              calculated with range and/or precision as low as that of the
+ *              IEEE 754 16-bit floating point format. 'false' indicates
+ *              {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using
+ *              at least the range and precision of the IEEE 754 32-bit floating
+ *              point format.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * Available since API level 28.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ */
+inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel *model,
+                                                                 bool allow)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
+  EXECUTE_FUNCTION_RETURN(model, allow);
+}
+
+/**
+ * Create a {@link ANeuralNetworksCompilation} to compile the given model.
+ * This only creates the object. Compilation is only performed once
+ * {@link ANeuralNetworksCompilation_start} is invoked.
+ *
+ * <p>The provided model must outlive the compilation.</p>
+ *
+ * The model must already have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded
+ * usage.
+ *
+ * @param model The {@link ANeuralNetworksModel} to be compiled.
+ * @param compilation The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+ *         if the model is invalid.
+ */
+inline int ANeuralNetworksCompilation_create(ANeuralNetworksModel *model,
+                                             ANeuralNetworksCompilation **compilation)
+{
+  LOAD_FUNCTION(ANeuralNetworksCompilation_create);
+  EXECUTE_FUNCTION_RETURN(model, compilation);
+}
+
+/**
+ * Destroy a compilation.
+ *
+ * <p>If called on a compilation for which
+ * {@link ANeuralNetworksCompilation_start} has been called, the
+ * function will return immediately but will mark the compilation to be deleted
+ * once the compilation completes. The {@link ANeuralNetworksCompilation_wait}
+ * will return ERROR_DELETED.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded
+ * usage.
+ *
+ * @param compilation The compilation to be destroyed. Passing NULL is
+ * acceptable and results in no operation.
+ */
+inline void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation)
+{
+  LOAD_FUNCTION(ANeuralNetworksCompilation_free);
+  EXECUTE_FUNCTION(compilation);
+}
+
+/**
+ * Sets the execution preference.
+ *
+ * <p>Provides guidance to the runtime when trade-offs are possible.</p>
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded
+ * usage.
+ *
+ * @param compilation The compilation to be modified.
+ * @param preference Either {@link PREFER_LOW_POWER},
+ *                  {@link PREFER_SINGLE_FAST_ANSWER}, or
+ *                  {@link PREFER_SUSTAINED_SPEED}.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compilation,
+                                                    int32_t preference)
+{
+  LOAD_FUNCTION(ANeuralNetworksCompilation_setPreference);
+  EXECUTE_FUNCTION_RETURN(compilation, preference);
+}
+
+/**
+ * Waits until the compilation completes.
+ *
+ * More than one thread can wait on a compilation. When the compilation
+ * completes, all threads will be released.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded
+ * usage.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the compilation completed normally.
+ */
+inline int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
+{
+  LOAD_FUNCTION(ANeuralNetworksCompilation_finish);
+  EXECUTE_FUNCTION_RETURN(compilation);
+}
+/**
+ * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
+ * This only creates the object. Computation is only performed once
+ * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ *
+ * <p>The provided compilation must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
+ * @param execution The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+ *         if the compilation is invalid.
+ */
+inline int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
+                                           ANeuralNetworksExecution **execution)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_create);
+  EXECUTE_FUNCTION_RETURN(compilation, execution);
+}
+
+/**
+ * Destroy an execution.
+ *
+ * <p>If called on an execution for which
+ * {@link ANeuralNetworksExecution_startCompute} has been called, the
+ * function will return immediately but will mark the execution to be deleted
+ * once the computation completes.   The {link ANeuralNetworksExecution_wait}
+ * will return ANEURALNETWORKS_ERROR_DELETED.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be destroyed. Passing NULL is acceptable
+ * and results in no operation.
+ */
+inline void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_free);
+  EXECUTE_FUNCTION(execution);
+}
+
+/**
+ * Associate a user buffer with an input of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * <p>The provided buffer must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the input argument we are setting. It is
+ *              an index into the lists passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link
+ * ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This should be used to specify the
+ *             dimensions that were set to 0 when the operand was added to the
+ *             model. All other properties of the type must be the same as
+ *             specified in the model. If the type is the same as specified
+ *             when the model was built, NULL can be passed.
+ * @param buffer The buffer containing the data.
+ * @param length The length in bytes of the buffer.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
+ * the name is not recognized or the buffer is too small for the input.
+ */
+inline int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index,
+                                             const ANeuralNetworksOperandType *type,
+                                             const void *buffer, size_t length)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_setInput);
+  EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
+}
+
+/**
+ * Associate part of a memory object with an input of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * <p>The provided memory must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the input argument we are setting. It is
+ *              an index into the lists passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link
+ * ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ *             dimensions that were set to 0 when the operand was added to the
+ *             model. All other values must be the same as specified in the
+ *             model. If the type is the same as specified when the model
+ *             was built, NULL can be passed.
+ * @param memory The memory containing the data.
+ * @param offset This specifies the location of the data within the memory.
+ *               The offset is in bytes from the start of memory.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
+ * the name is not recognized or the buffer is too small for the input.
+ */
+inline int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution,
+                                                       int32_t index,
+                                                       const ANeuralNetworksOperandType *type,
+                                                       const ANeuralNetworksMemory *memory,
+                                                       size_t offset, size_t length)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_setInputFromMemory);
+  EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
+}
+
+/**
+ * Associate a user buffer with an output of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * <p>The provided buffer must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the output argument we are setting. It is
+ *              an index into the lists passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link
+ * ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ *             dimensions that were set to 0 when the operand was added to the
+ *             model. All other values must be the same as specified in the
+ *             model. If the type is the same as specified when the model
+ *             was built, NULL can be passed.
+ * @param buffer The buffer where the data is to be written.
+ * @param length The length in bytes of the buffer.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
+ * the name is not recognized or the buffer is too small for the output.
+ */
+inline int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index,
+                                              const ANeuralNetworksOperandType *type, void *buffer,
+                                              size_t length)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_setOutput);
+  EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
+}
+
+/**
+ * Associate part of a memory object with an output of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * <p>The provided memory must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the output argument we are setting. It is
+ *              an index into the lists passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link
+ * ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ *             dimensions that were set to 0 when the operand was added to the
+ *             model. All other values must be the same as specified in the
+ *             model. If the type is the same as specified when the model
+ *             was built, NULL can be passed.
+ * @param memory The memory where the data is to be stored.
+ * @param offset This specifies the location of the data within the memory.
+ *               The offset is in bytes from the start of memory.
+ * @param length The length in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
+ * the name is not recognized or the buffer is too small for the output.
+ */
+inline int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution,
+                                                        int32_t index,
+                                                        const ANeuralNetworksOperandType *type,
+                                                        const ANeuralNetworksMemory *memory,
+                                                        size_t offset, size_t length)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_setOutputFromMemory);
+  EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
+}
+
+/**
+ * Schedule evaluation of the execution.
+ *
+ * <p>Schedules evaluation of the execution. Once the model has been
+ * applied and the outputs are ready to be consumed, the execution will be
+ * signaled. Use {@link ANeuralNetworksExecution_wait} to wait for that signal.
+ * </p>
+ *
+ * Multiple executions can be scheduled and evaluated concurrently, and
+ * compilations can be performed concurrently with executions. The runtime makes
+ * no guarantee on the ordering of the completion of compilations and
+ * executions. If it's important to the application, the application should
+ * enforce the ordering by using {@link ANeuralNetworksCompilation_wait} and
+ * {@link ANeuralNetworksExecution_wait}.
+ *
+ * ANeuralNetworksExecution_wait must be called to recuperate the resources used
+ * by the execution.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be scheduled and executed.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
+                                                 ANeuralNetworksEvent **event)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_startCompute);
+  EXECUTE_FUNCTION_RETURN(execution, event);
+}
+
+/**
+ * Waits until the execution completes.
+ *
+ * More than one thread can wait on an event. When the execution completes,
+ * all threads will be released.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
+ */
+inline int ANeuralNetworksEvent_wait(ANeuralNetworksEvent *event)
+{
+  LOAD_FUNCTION(ANeuralNetworksEvent_wait);
+  EXECUTE_FUNCTION_RETURN(event);
+}
+
+/**
+ * Destroys the event.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ */
+inline void ANeuralNetworksEvent_free(ANeuralNetworksEvent *event)
+{
+  LOAD_FUNCTION(ANeuralNetworksEvent_free);
+  EXECUTE_FUNCTION(event);
+}
+
+/**
+ * Get the number of available devices.
+ *
+ * @param numDevices Used to return the number of devices.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworks_getDeviceCount(uint32_t *numDevices)
+{
+  LOAD_FUNCTION(ANeuralNetworks_getDeviceCount);
+  EXECUTE_FUNCTION_RETURN(numDevices);
+}
+
+/**
+ * Get the representation of the specified device.
+ *
+ * @param devIndex The index of the specified device. Must be less than the
+ *                 number of available devices.
+ * @param device The representation of the specified device.
+ *               The same representation will always be returned for the
+ *               specified device.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
+ */
+
+inline int ANeuralNetworks_getDevice(uint32_t devIndex, ANeuralNetworksDevice **device)
+{
+  LOAD_FUNCTION(ANeuralNetworks_getDevice);
+  EXECUTE_FUNCTION_RETURN(devIndex, device);
+}
+
+/**
+ * Get the name of the specified device.
+ *
+ * @param device The representation of the specified device.
+ * @param name   The returned name of the specified device. The name will be in
+ *               UTF-8 and will be null-terminated. It will be recognizable as a
+ *               known device name rather than a cryptic string. For devices
+ *               with API level 29 and above, the format of the name is
+ *               {VENDOR}-{DEVICE}, e.g. “google-ipu”. For devices with feature
+ *               level 28 or lower, the name will always be “unknown-device”.
+ *               The name will remain valid for the duration of the application.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworksDevice_getName(const ANeuralNetworksDevice *device, const char **name)
+{
+  LOAD_FUNCTION(ANeuralNetworksDevice_getName);
+  EXECUTE_FUNCTION_RETURN(device, name);
+}
+
+/**
+ * Get the version of the driver implementation of the specified device.
+ *
+ * It’s the responsibility of the driver implementor to insure that this version
+ * string uniquely distinguishes this implementation from all previous
+ * implementations.
+ *
+ * This version string must not be confused with the feature level which is
+ * solely defined by {@link ANeuralNetworksDevice_getFeatureLevel}. There is no
+ * implicit ordering of the versions. For example, it is not possible to filter
+ * all drivers older than a certain version.
+ *
+ * Application developers may use this version string to avoid or prefer
+ * specific driver implementations. For example, an application may want to do
+ * so because:
+ *     - A specific version of the driver does not provide the required
+ * performance, perhaps because of a performance regression.
+ *     - A specific version of the driver has a bug or returns results that
+ * don’t match the minimum precision requirement for the application.
+ *
+ * @param device  The representation of the specified device.
+ * @param version The returned version string of the driver for the specified
+ *                device. The string will be in UTF-8 and will be
+ *                null-terminated. For devices with feature level 28 or lower,
+ *                "UNKNOWN" will be returned. The version string will remain
+ *                valid for the duration of the application.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworksDevice_getVersion(const ANeuralNetworksDevice *device,
+                                            const char **version)
+{
+  LOAD_FUNCTION(ANeuralNetworksDevice_getVersion);
+  EXECUTE_FUNCTION_RETURN(device, version);
+}
+
+/**
+ * Get the supported NNAPI version of the specified device.
+ *
+ * Each device has a supported feature level, which is the most advanced feature
+ * this driver implements. For example, if the driver implements the features
+ * introduced in Android P, but does not implement the features introduced after
+ * Android P, the value would be 28. Developers could decide whether or not the
+ * specified device should be used for a Model that has certain feature
+ * requirements.
+ *
+ * @param device       The representation of the specified device.
+ * @param featureLevel The API level of the most advanced feature this driver
+ *                     implements.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworksDevice_getFeatureLevel(const ANeuralNetworksDevice *device,
+                                                 int64_t *featureLevel)
+{
+  LOAD_FUNCTION(ANeuralNetworksDevice_getFeatureLevel);
+  EXECUTE_FUNCTION_RETURN(device, featureLevel);
+}
+
+/**
+ * Get the supported operations for a specified set of devices. If multiple
+ * devices are selected, the supported operation list is a union of supported
+ * operations of all selected devices.
+ *
+ * @param model        The model to be queried.
+ * @param devices      The set of devices. Must not contain duplicates.
+ * @param numDevices   The number of devices in the set.
+ * @param supportedOps The boolean array to be filled. True means supported. The
+ *                     size of the boolean array must be at least as large as
+ *                     the number of operations in the model. The order of
+ *                     elements in the supportedOps array matches the order in
+ *                     which the corresponding operations were added to the
+ *                     model.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
+ */
+inline int
+ANeuralNetworksModel_getSupportedOperationsForDevices(const ANeuralNetworksModel *model,
+                                                      const ANeuralNetworksDevice *const *devices,
+                                                      uint32_t numDevices, bool *supportedOps)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_getSupportedOperationsForDevices);
+  EXECUTE_FUNCTION_RETURN(model, devices, numDevices, supportedOps);
+}
+
+/**
+ * Create a {@link ANeuralNetworksCompilation} to compile the given model for a
+ * specified set of devices. If more than one device is specified, the
+ * compilation will distribute the workload automatically across the devices.
+ * The model must be fully supported by the specified set of devices. This means
+ * that ANeuralNetworksModel_getSupportedOperationsForDevices() must have
+ * returned true for every operation for that model/devices pair.
+ *
+ * @param model       The {@link ANeuralNetworksModel} to be compiled.
+ * @param devices     The set of devices. Must not contain duplicates.
+ * @param numDevices  The number of devices in the set.
+ * @param compilation The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+ *         if the model is invalid.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworksCompilation_createForDevices(ANeuralNetworksModel *model,
+                                                       const ANeuralNetworksDevice *const *devices,
+                                                       uint32_t numDevices,
+                                                       ANeuralNetworksCompilation **compilation)
+{
+  LOAD_FUNCTION(ANeuralNetworksCompilation_createForDevices);
+  EXECUTE_FUNCTION_RETURN(model, devices, numDevices, compilation);
+}
+
+/**
+ * Sets the compilation caching signature and the cache directory.
+ *
+ * Provides optional caching information to the runtime for faster repeated
+ * compilation.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded
+ * usage.
+ *
+ * @param compilation The compilation to be modified.
+ * @param cacheDir The cache directory to store and retrieve caching data. It is
+ *                 recommended to use the code_cache provided by the Android
+ *                 runtime. If not using the code_cache, the user should choose
+ *                 a directory local to the application, and is responsible to
+ *                 manage and clean the cache entries.
+ * @param token The token provided by the user to specify a model, must be of
+ *              length ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN. The user should
+ *              ensure that the token is unique to a model within the
+ *              application. The NNAPI runtime will not detected token
+ *              collisions. If there is a collision, the compilation outcome may
+ *              be incorrect without notifying with error.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworksCompilation_setCaching(ANeuralNetworksCompilation *compilation,
+                                                 const char *cacheDir, const uint8_t *token)
+{
+  LOAD_FUNCTION(ANeuralNetworksCompilation_setCaching);
+  EXECUTE_FUNCTION_RETURN(compilation, cacheDir, token);
+}
+
+/**
+ * Schedule synchronous evaluation of the execution.
+ *
+ * <p>Schedules synchronous evaluation of the execution. Returns once the
+ * execution has completed and the outputs are ready to be consumed.
+ * </p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * See {@link ANeuralNetworksExecution_startCompute} for asynchronous execution.
+ * Synchronous execution incurs lower overhead than asynchronous execution.
+ *
+ * Available since API level 29.
+ *
+ * @param execution The execution to be scheduled and executed.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
+ *         ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory
+ *         cannot be properly mapped.
+ */
+inline int ANeuralNetworksExecution_compute(ANeuralNetworksExecution *execution)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_compute);
+  EXECUTE_FUNCTION_RETURN(execution);
+}
+
+/**
+ * Get the dimensional information of the specified output operand of the model
+ * of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * On asynchronous execution initiated by {@link
+ * ANeuralNetworksExecution_startCompute},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function to
+ * recuperate the resources used by the execution.
+ *
+ * @param execution The execution to be queried.
+ * @param index The index of the output argument we are querying. It is
+ *              an index into the lists passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link
+ * ANeuralNetworksModel_addOperand}.
+ * @param rank The rank of the output operand.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful,
+ * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is provided an
+ * insufficient buffer at execution time, ANEURALNETWORKS_BAD_DATA if the index
+ * is invalid.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution *execution,
+                                                         int32_t index, uint32_t *rank)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_getOutputOperandRank);
+  EXECUTE_FUNCTION_RETURN(execution, index, rank);
+}
+
+/**
+ * Get the dimensional information of the specified output operand of the model
+ * of the
+ * {@link ANeuralNetworksExecution}. The target output operand cannot be a
+ * scalar.
+ *
+ * On asynchronous execution initiated by
+ * {@link ANeuralNetworksExecution_startCompute},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function to
+ * recuperate the resources used by the execution.
+ *
+ * @param execution The execution to be queried.
+ * @param index The index of the output argument we are querying. It is an index
+ *              into the lists passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with
+ *              {@link ANeuralNetworksModel_addOperand}.
+ * @param dimensions The dimension array to be filled. The size of the array
+ *                   must be exactly as large as the rank of the output operand
+ *                   to be queried in the model.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful,
+ * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is provided an
+ * insufficient buffer at execution time, ANEURALNETWORKS_BAD_DATA if the index
+ * is invalid or if the target is a scalar.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution *execution,
+                                                               int32_t index, uint32_t *dimensions)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_getOutputOperandDimensions);
+  EXECUTE_FUNCTION_RETURN(execution, index, dimensions);
+}
+
+/**
+ * Create a {@link ANeuralNetworksBurst} to apply the given compilation.
+ * This only creates the burst object. Computation is only performed once
+ * {@link ANeuralNetworksExecution_burstCompute} is invoked with a valid
+ * {@link ANeuralNetworksExecution} and {@link ANeuralNetworksBurst}.
+ *
+ * <p>The provided compilation must outlive the burst object.</p>
+ *
+ * Available since API level 29.
+ *
+ * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
+ * @param burst The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+ *         if the compilation is invalid.
+ */
+inline int ANeuralNetworksBurst_create(ANeuralNetworksCompilation *compilation,
+                                       ANeuralNetworksBurst **burst)
+{
+  LOAD_FUNCTION(ANeuralNetworksBurst_create);
+  EXECUTE_FUNCTION_RETURN(compilation, burst);
+}
+
+/**
+ * Destroys the burst object.
+ *
+ * Available since API level 29.
+ *
+ * @param burst The burst object to be destroyed. Passing NULL is acceptable and
+ *              results in no operation.
+ */
+inline void ANeuralNetworksBurst_free(ANeuralNetworksBurst *burst)
+{
+  LOAD_FUNCTION(ANeuralNetworksBurst_free);
+  EXECUTE_FUNCTION(burst);
+}
+
+/**
+ * Schedule synchronous evaluation of the execution on a burst object.
+ *
+ * <p>Schedules synchronous evaluation of the execution. Returns once the
+ * execution has completed and the outputs are ready to be consumed.</p>
+ *
+ * <p>There must be at most one {@link ANeuralNetworksExecution} processing at
+ * any given time for any given burst object. Any
+ * {@link ANeuralNetworksExecution} launched before the previous has finished
+ * will result in ANEURALNETWORKS_BAD_STATE.</p>
+ *
+ * Available since API level 29.
+ *
+ * @param burst The burst object to execute on.
+ * @param execution The execution to be scheduled and executed. The execution
+ *                  must be created from the same {@link
+ *                  ANeuralNetworksCompilation} as the burst object.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
+ */
+inline int ANeuralNetworksExecution_burstCompute(ANeuralNetworksExecution *execution,
+                                                 ANeuralNetworksBurst *burst)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_burstCompute);
+  EXECUTE_FUNCTION_RETURN(execution, burst);
+}
+
+/**
+ * Creates a shared memory object from an AHardwareBuffer handle.
+ *
+ * If the shared memory is backed by an AHardwareBuffer of
+ * AHARDWAREBUFFER_FORMAT_BLOB format, it can be used the same way as shared
+ * memory created from a file handle. See
+ * {@link ANeuralNetworksMemory} for a description on how to use this shared
+ * memory.
+ *
+ * If the shared memory is backed by an AHardwareBuffer of a format other than
+ * AHARDWAREBUFFER_FORMAT_BLOB, it can only be used for Model inputs and
+ * outputs. When calling {@link ANeuralNetworksExecution_setInputFromMemory} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory} with the shared memory,
+ * both offset and length must be set to zero and the entire memory region will
+ * be associated with the specified input or output operand. There is no
+ * guarantee that an arbitrary AHardwareBuffer_Format and
+ * AHardwareBuffer_UsageFlags combination can be used by arbitrary devices. The
+ * execution will fail if selected set of devices cannot consume the buffer.
+ *
+ * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with shared
+ * memory backed by an AHardwareBuffer of a format other than
+ * AHARDWAREBUFFER_FORMAT_BLOB is disallowed.
+ *
+ * TODO(miaowang): add documentation about intended usage with introspection
+ * API.
+ *
+ * Available since API level 29.
+ *
+ * @param ahwb The AHardwareBuffer handle.
+ * @param memory The memory object to be created.
+ *               Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
+ *
+ * @see AHardwareBuffer
+ */
+inline int ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer *ahwb,
+                                                           ANeuralNetworksMemory **memory)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemory_createFromAHardwareBuffer);
+  EXECUTE_FUNCTION_RETURN(ahwb, memory);
+}
+
+/**
+ * Specifies whether duration of the {@link ANeuralNetworksExecution} is to be
+ * measured. By default, duration is not measured.
+ *
+ * The {@link ANeuralNetworksExecution} must have been created with
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * Available since API level 29.
+ *
+ * @param execution The execution to be modified.
+ * @param measure 'true' if duration is to be measured, 'false' if not.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksExecution_setMeasureTiming(ANeuralNetworksExecution *execution,
+                                                     bool measure)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_setMeasureTiming);
+  EXECUTE_FUNCTION_RETURN(execution, measure);
+}
+
+/**
+ * Get the time spent in the specified {@link ANeuralNetworksExecution}, in
+ * nanoseconds. The execution must have completed.
+ *
+ * @param execution The execution to be queried.
+ * @param durationCode The measurement to be queried, specified by {@link
+ * DurationCode}.
+ * @param duration The returned duration. If no measurement was requested by
+ *                 {@link ANeuralNetworksExecution_setMeasureTiming}, or for
+ * some other reason the duration is not available, UINT64_MAX will be returned.
+ *                 A particular device need not support any given measurement.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksExecution_getDuration(const ANeuralNetworksExecution *execution,
+                                                int32_t durationCode, uint64_t *duration)
+{
+  LOAD_FUNCTION(ANeuralNetworksExecution_getDuration);
+  EXECUTE_FUNCTION_RETURN(execution, durationCode, duration);
+}
+
+/**
+ * Queries whether an extension is supported by the driver implementation of
+ * the specified device.
+ *
+ * @param device The representation of the specified device.
+ * @param extension The extension name.
+ * @param isExtensionSupported The boolean value indicating whether the
+ * extension is supported.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
+ */
+inline int ANeuralNetworksDevice_getExtensionSupport(const ANeuralNetworksDevice *device,
+                                                     const char *extensionName,
+                                                     bool *isExtensionSupported)
+{
+  LOAD_FUNCTION(ANeuralNetworksDevice_getExtensionSupport);
+  EXECUTE_FUNCTION_RETURN(device, extensionName, isExtensionSupported);
+}
+
+/**
+ * Creates an operand type from an extension name and an extension operand code.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * Available since API level 29.
+ *
+ * @param model The model to contain the operand.
+ * @param extensionName The extension name.
+ * @param operandCodeWithinExtension The extension operand code.
+ * @param type The operand type.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_getExtensionOperandType(ANeuralNetworksModel *model,
+                                                        const char *extensionName,
+                                                        uint16_t operandCodeWithinExtension,
+                                                        int32_t *type)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_getExtensionOperandType);
+  EXECUTE_FUNCTION_RETURN(model, extensionName, operandCodeWithinExtension, type);
+}
+
+/**
+ * Creates an operation type from an extension name and an extension operation
+ * code.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * Available since API level 29.
+ *
+ * @param model The model to contain the operation.
+ * @param extensionName The extension name.
+ * @param operationCodeWithinExtension The extension operation code.
+ * @param type The operation type.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_getExtensionOperationType(ANeuralNetworksModel *model,
+                                                          const char *extensionName,
+                                                          uint16_t operationCodeWithinExtension,
+                                                          ANeuralNetworksOperationType *type)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_getExtensionOperationType);
+  EXECUTE_FUNCTION_RETURN(model, extensionName, operationCodeWithinExtension, type);
+}
+
+/**
+ * Sets extension operand parameters.
+ *
+ * Available since API level 29.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param data A pointer to the extension operand data.
+ *             The data does not have to outlive the call to this function.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksModel_setOperandExtensionData(ANeuralNetworksModel *model, int32_t index,
+                                                        const void *data, size_t length)
+{
+  LOAD_FUNCTION(ANeuralNetworksModel_setOperandExtensionData);
+  EXECUTE_FUNCTION_RETURN(model, index, data, length);
+}
+#if __ANDROID_API__ >= 30
+/**
+ * Create a {@link ANeuralNetworksMemoryDesc} with no properties.
+ *
+ * This only creates the memory descriptor. Its properties should be set with
+ * calls to
+ * {@link ANeuralNetworksMemoryDesc_addInputRole},
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}, and
+ * {@link ANeuralNetworksMemoryDesc_setDimensions}.
+ *
+ * {@link ANeuralNetworksMemoryDesc_finish} must be called once all properties
+ * have been set.
+ *
+ * {@link ANeuralNetworksMemoryDesc_free} must be called once the memory
+ * descriptor is no longer needed.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The {@link ANeuralNetworksMemoryDesc} to be created.
+ *             Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksMemoryDesc_create(ANeuralNetworksMemoryDesc **desc)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemoryDesc_create);
+  EXECUTE_FUNCTION_RETURN(desc);
+}
+
+/**
+ * Destroy a memory descriptor.
+ *
+ * The memory descriptor need not have been finished by a call to
+ * {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be destroyed. Passing NULL is acceptable
+ * and results in no operation.
+ */
+inline void ANeuralNetworksMemoryDesc_free(ANeuralNetworksMemoryDesc *desc)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemoryDesc_free);
+  EXECUTE_FUNCTION(desc);
+}
+
+/**
+ * Specify that a memory object will be playing the role of an output to an
+ * execution created from a particular compilation.
+ *
+ * The compilation and the output index fully specify an output operand. This
+ * function may be invoked multiple times on the same memory descriptor with
+ * different output operands, and the same output operand may be specified on
+ * multiple memory descriptors. However, specifying the same output operand on
+ * the same memory descriptor object more than once will return an error.
+ *
+ * The dimensions of the corresponding model operands of all the roles specified
+ * by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be compatible with each
+ * other. Two dimensions are incompatible if both ranks are fully specified but
+ * have different values, or if there is at least one axis that is fully
+ * specified in both but has different values.
+ *
+ * At least one of {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be called on the memory
+ * descriptor before invoking {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * Attempting to modify a memory descriptor once
+ * {@link ANeuralNetworksMemoryDesc_finish} has been called will return an
+ * error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param compilation The compilation object. It must already have been finished
+ * by calling {@link ANeuralNetworksCompilation_finish}, and must outlive the
+ * memory descriptor.
+ * @param index The index of the output argument we are referencing from the
+ *              compilation. It is an index into the outputs list passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link
+ * ANeuralNetworksModel_addOperand}.
+ * @param frequency A floating-point value within the range (0.0, 1.0].
+ * Describes how likely the memory is to be used in the specified role. This is
+ *                  provided as a hint to optimize the case when multiple roles
+ * prefer different memory locations or data layouts.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksMemoryDesc_addOutputRole(ANeuralNetworksMemoryDesc *desc,
+                                                   const ANeuralNetworksCompilation *compilation,
+                                                   int32_t index, float frequency)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemoryDesc_addOutputRole);
+  EXECUTE_FUNCTION_RETURN(desc, compilation, index, frequency);
+}
+
+/**
+ * Specify that a memory object will be playing the role of an input to an
+ * execution created from a particular compilation.
+ *
+ * The compilation and the input index fully specify an input operand. This
+ * function may be invoked multiple times on the same memory descriptor with
+ * different input operands, and the same input operand may be specified on
+ * multiple memory descriptors. However, specifying the same input operand on
+ * the same memory descriptor more than once will return an error.
+ *
+ * The dimensions of the corresponding model operands of all the roles specified
+ * by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be compatible with each
+ * other. Two dimensions are incompatible if both ranks are fully specified but
+ * have different values, or if there is at least one axis that is fully
+ * specified in both but has different values.
+ *
+ * At least one of {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be called on a memory
+ * descriptor before invoking {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * Attempting to modify a memory descriptor once
+ * {@link ANeuralNetworksMemoryDesc_finish} has been called will return an
+ * error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param compilation The compilation object. It must already have been finished
+ * by calling {@link ANeuralNetworksCompilation_finish}, and must outlive the
+ * memory descriptor.
+ * @param index The index of the input argument we are referencing from the
+ * compilation. It is an index into the inputs list passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link
+ * ANeuralNetworksModel_addOperand}.
+ * @param frequency A floating-point value within the range (0.0, 1.0].
+ * Describes how likely the memory is to be used in the specified role. This is
+ *                  provided as a hint to optimize the case when different roles
+ * prefer different memory locations or data layouts.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksMemoryDesc_addInputRole(ANeuralNetworksMemoryDesc *desc,
+                                                  const ANeuralNetworksCompilation *compilation,
+                                                  uint32_t index, float frequency)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemoryDesc_addInputRole);
+  EXECUTE_FUNCTION_RETURN(desc, compilation, index, frequency);
+}
+
+/**
+ * Set the dimensional information of the memory descriptor.
+ *
+ * The specified dimensions must be compatible with the dimensions of the
+ * corresponding model operands of all the roles specified by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}. Two dimensions are
+ * incompatible if both ranks are fully specified but have different values, or
+ * if there is at least one axis that is fully specified in both but has
+ * different values.
+ *
+ * Attempting to modify a memory descriptor once
+ * {@link ANeuralNetworksMemoryDesc_finish} has been called will return an
+ * error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param rank The number of dimensions. Must be 0 for scalars.
+ * @param dimensions An array of dimensions. An entry with the value 0 indicates
+ * that the corresponding axis has an unknown size.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksMemoryDesc_setDimensions(ANeuralNetworksMemoryDesc *desc, uint32_t rank,
+                                                   const uint32_t *dimensions)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemoryDesc_setDimensions);
+  EXECUTE_FUNCTION_RETURN(desc, rank, dimensions);
+}
+
+/**
+ * Indicate that we have finished modifying a memory descriptor. Required before
+ * calling
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ *
+ * This function must only be called once for a given memory descriptor.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be finished.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksMemoryDesc_finish(ANeuralNetworksMemoryDesc *desc)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemoryDesc_finish);
+  EXECUTE_FUNCTION_RETURN(desc);
+}
+
+/**
+ * Creates a memory object from a memory descriptor.
+ *
+ * The memory object is created with an uninitialized buffer. A memory object
+ * with an uninitialized buffer may only be used according to the roles
+ * specified by
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}, or as the destination memory
+ * in
+ * {@link ANeuralNetworksMemory_copy}. The buffer of a memory object is
+ * initialized after the memory object is used as an output in a successful
+ * execution, or used as the destination memory in a successful {@link
+ * ANeuralNetworksMemory_copy}. A memory object with an initialized buffer may
+ * be used according to all roles specified in
+ * {@link ANeuralNetworksMemoryDesc}, or as the source or destination memory in
+ * {@link ANeuralNetworksMemory_copy}. The buffer of a memory object will return
+ * to the uninitialized state if the memory object is used as an output in a
+ * failed execution, or used as the destination memory in a failed {@link
+ * ANeuralNetworksMemory_copy}.
+ *
+ * The dimensions of the memory descriptor are deduced from the dimensions of
+ * the corresponding model operands of all the roles specified by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}, as well as the dimensions
+ * set by the call to {@link ANeuralNetworksMemoryDesc_setDimensions}, if any.
+ * The memory descriptor may have unspecified dimensions or rank. In such a
+ * case, the same memory object may be used with different shapes of outputs in
+ * different executions. When the memory is used as an input, the input shape
+ * must be the same as the output shape from the last execution using this
+ * memory object as an output, or the last
+ * {@link ANeuralNetworkMemory_copy} using this memory object as the destination
+ * memory. Creating a memory object with unspecified dimensions or rank may fail
+ * for certain sets of roles.
+ *
+ * Using the memory in roles or shapes that are not compatible with the rules
+ * specified above will return an error.
+ *
+ * When calling {@link ANeuralNetworksExecution_setInputFromMemory} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory} with the memory object,
+ * both offset and length must be set to zero and the entire memory region will
+ * be associated with the specified input or output operand.
+ *
+ * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with the
+ * memory created from this function will return an error.
+ *
+ * {@link ANeuralNetworksMemory_free} must be called once the memory is no
+ * longer needed.
+ *
+ * Attempting to create memory from an unfinished memory descriptor will return
+ * an error.
+ *
+ * The provided {@link ANeuralNetworksMemoryDesc} need not outlive the
+ * {@link ANeuralNetworksMemory} object.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor.
+ * @param memory The memory object to be created.
+ *               Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful; ANEURALNETWORKS_OP_FAILED if
+ * the memory is created with unspecified dimensions or rank and it is not
+ * supported for this set of roles.
+ */
+inline int ANeuralNetworksMemory_createFromDesc(const ANeuralNetworksMemoryDesc *desc,
+                                                ANeuralNetworksMemory **memory)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemory_createFromDesc);
+  EXECUTE_FUNCTION_RETURN(desc, memory);
+}
+
+/**
+ * Copies data from one memory object to another.
+ *
+ * If at most one of the src and dst is created from
+ * {@link ANeuralNetworksMemory_createFromDesc}, the src and dst must have the
+ * same logical size:
+ * - If the memory is created from {@link ANeuralNetworksMemory_createFromFd},
+ * or if it is created from {@link
+ * ANeuralNetworksMemory_createFromAHardwareBuffer} with format of
+ * AHARDWAREBUFFER_FORMAT_BLOB, the logical size equals the size of the memory.
+ * - If the memory is created from
+ *   {@link ANeuralNetworksMemory_createFromAHardwareBuffer} with a format other
+ * than AHARDWAREBUFFER_FORMAT_BLOB, the logical size equals the size when there
+ * is no padding and the data is tightly packed. This function may fail if the
+ *   AHardwareBuffer cannot be accessed.
+ * - If the memory is created from {@link ANeuralNetworksMemory_createFromDesc},
+ * the logical size equals the size indicated by the {@link OperandCode}
+ * multiplied by the number of elements. This function will fail if the number
+ * of elements is unknown.
+ *
+ * If both src and dst are created from {@link
+ * ANeuralNetworksMemory_createFromDesc}, they must have compatible dimensions.
+ * Two dimensions are incompatible if both ranks are fully specified but have
+ * different values, or if there is at least one axis that is fully specified in
+ * both but has different values. The dst may have unspecified dimensions or
+ * rank. In such a case, the dimensions of dst will get updated according to the
+ * dimensions of the src.
+ *
+ * In both cases, if the src is created from
+ * {@link ANeuralNetworksMemory_createFromDesc}, it must have been used as an
+ * output in a successful execution, or used as the destination memory in a
+ * successful
+ * {@link ANeuralNetworksMemory_copy}.
+ *
+ * The src and dst may have different data layout, in which case the data
+ * copying is performed logically with data layout transformation.
+ *
+ * Available since API level 30.
+ *
+ * @param src The source memory object.
+ * @param dst The destination memory object.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+inline int ANeuralNetworksMemory_copy(const ANeuralNetworksMemory *src,
+                                      const ANeuralNetworksMemory *dst)
+{
+  LOAD_FUNCTION(ANeuralNetworksMemory_copy);
+  EXECUTE_FUNCTION_RETURN(src, dst);
+}
+#endif // __ANDROID_API__ >= 30
+/**/
+
+#endif // __NEURAL_NETWORKS_SHIM_H__
diff --git a/runtime/libs/nnapi/include/NeuralNetworksTypes.h b/runtime/libs/nnapi/include/NeuralNetworksTypes.h
new file mode 100644
index 000000000..35c7a5802
--- /dev/null
+++ b/runtime/libs/nnapi/include/NeuralNetworksTypes.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// NOTE This header is derived from part of the following file
+// https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/nnapi/NeuralNetworksTypes.h
+
+#ifndef __NEURAL_NETWORKS_TYPES_H__
+#define __NEURAL_NETWORKS_TYPES_H__
+
+#include "NeuralNetworks.h"
+
+// NN api types based on NNAPI header file
+// https://developer.android.com/ndk/reference/group/neural-networks
+
+// nn api function types
+
+typedef int (*ANeuralNetworksMemory_createFromFd_fn)(size_t size, int protect, int fd,
+                                                     size_t offset, ANeuralNetworksMemory **memory);
+
+typedef void (*ANeuralNetworksMemory_free_fn)(ANeuralNetworksMemory *memory);
+
+typedef int (*ANeuralNetworksModel_create_fn)(ANeuralNetworksModel **model);
+
+typedef int (*ANeuralNetworksModel_finish_fn)(ANeuralNetworksModel *model);
+
+typedef void (*ANeuralNetworksModel_free_fn)(ANeuralNetworksModel *model);
+
+typedef int (*ANeuralNetworksCompilation_create_fn)(ANeuralNetworksModel *model,
+                                                    ANeuralNetworksCompilation **compilation);
+
+typedef void (*ANeuralNetworksCompilation_free_fn)(ANeuralNetworksCompilation *compilation);
+
+typedef int (*ANeuralNetworksCompilation_setPreference_fn)(ANeuralNetworksCompilation *compilation,
+                                                           int32_t preference);
+
+typedef int (*ANeuralNetworksCompilation_finish_fn)(ANeuralNetworksCompilation *compilation);
+
+typedef int (*ANeuralNetworksModel_addOperand_fn)(ANeuralNetworksModel *model,
+                                                  const ANeuralNetworksOperandType *type);
+
+typedef int (*ANeuralNetworksModel_setOperandValue_fn)(ANeuralNetworksModel *model, int32_t index,
+                                                       const void *buffer, size_t length);
+
+typedef int (*ANeuralNetworksModel_setOperandSymmPerChannelQuantParams_fn)(
+  ANeuralNetworksModel *model, int32_t index,
+  const ANeuralNetworksSymmPerChannelQuantParams *channelQuant);
+
+typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
+  ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
+  size_t length);
+
+typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model,
+                                                    ANeuralNetworksOperationType type,
+                                                    uint32_t inputCount, const uint32_t *inputs,
+                                                    uint32_t outputCount, const uint32_t *outputs);
+
+typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)(ANeuralNetworksModel *model,
+                                                                uint32_t inputCount,
+                                                                const uint32_t *inputs,
+                                                                uint32_t outputCount,
+                                                                const uint32_t *outputs);
+
+typedef int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16_fn)(ANeuralNetworksModel *model,
+                                                                        bool allow);
+
+typedef int (*ANeuralNetworksExecution_create_fn)(ANeuralNetworksCompilation *compilation,
+                                                  ANeuralNetworksExecution **execution);
+
+typedef void (*ANeuralNetworksExecution_free_fn)(ANeuralNetworksExecution *execution);
+
+typedef int (*ANeuralNetworksExecution_setInput_fn)(ANeuralNetworksExecution *execution,
+                                                    int32_t index,
+                                                    const ANeuralNetworksOperandType *type,
+                                                    const void *buffer, size_t length);
+
+typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
+  ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+  const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+
+typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution,
+                                                     int32_t index,
+                                                     const ANeuralNetworksOperandType *type,
+                                                     void *buffer, size_t length);
+
+typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
+  ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+  const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+
+typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution,
+                                                        ANeuralNetworksEvent **event);
+
+typedef int (*ANeuralNetworksEvent_wait_fn)(ANeuralNetworksEvent *event);
+
+typedef void (*ANeuralNetworksEvent_free_fn)(ANeuralNetworksEvent *event);
+
+typedef int (*ASharedMemory_create_fn)(const char *name, size_t size);
+
+typedef int (*ANeuralNetworks_getDeviceCount_fn)(uint32_t *numDevices);
+
+typedef int (*ANeuralNetworks_getDevice_fn)(uint32_t devIndex, ANeuralNetworksDevice **device);
+
+typedef int (*ANeuralNetworksDevice_getName_fn)(const ANeuralNetworksDevice *device,
+                                                const char **name);
+
+typedef int (*ANeuralNetworksDevice_getType_fn)(const ANeuralNetworksDevice *device, int32_t *type);
+
+typedef int (*ANeuralNetworksDevice_getVersion_fn)(const ANeuralNetworksDevice *device,
+                                                   const char **version);
+
+typedef int (*ANeuralNetworksDevice_getFeatureLevel_fn)(const ANeuralNetworksDevice *device,
+                                                        int64_t *featureLevel);
+
+typedef int (*ANeuralNetworksModel_getSupportedOperationsForDevices_fn)(
+  const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices,
+  uint32_t numDevices, bool *supportedOps);
+
+typedef int (*ANeuralNetworksCompilation_createForDevices_fn)(
+  ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices,
+  ANeuralNetworksCompilation **compilation);
+
+typedef int (*ANeuralNetworksCompilation_setCaching_fn)(ANeuralNetworksCompilation *compilation,
+                                                        const char *cacheDir, const uint8_t *token);
+
+#if __ANDROID_API__ >= 30
+typedef int (*ANeuralNetworksCompilation_setTimeout_fn)(ANeuralNetworksCompilation *compilation,
+                                                        uint64_t duration);
+
+typedef int (*ANeuralNetworksCompilation_setPriority_fn)(ANeuralNetworksCompilation *compilation,
+                                                         int priority);
+#endif // __ANDROID_API__ >= 30
+
+typedef int (*ANeuralNetworksExecution_compute_fn)(ANeuralNetworksExecution *execution);
+
+#if __ANDROID_API__ >= 30
+typedef int (*ANeuralNetworksExecution_setTimeout_fn)(ANeuralNetworksExecution *execution,
+                                                      uint64_t duration);
+
+typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)(ANeuralNetworksExecution *execution,
+                                                          uint64_t duration);
+#endif // __ANDROID_API__ >= 30
+
+typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(ANeuralNetworksExecution *execution,
+                                                                int32_t index, uint32_t *rank);
+
+typedef int (*ANeuralNetworksExecution_getOutputOperandDimensions_fn)(
+  ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions);
+
+typedef int (*ANeuralNetworksBurst_create_fn)(ANeuralNetworksCompilation *compilation,
+                                              ANeuralNetworksBurst **burst);
+
+typedef void (*ANeuralNetworksBurst_free_fn)(ANeuralNetworksBurst *burst);
+
+typedef int (*ANeuralNetworksExecution_burstCompute_fn)(ANeuralNetworksExecution *execution,
+                                                        ANeuralNetworksBurst *burst);
+
+typedef int (*ANeuralNetworksMemory_createFromAHardwareBuffer_fn)(const AHardwareBuffer *ahwb,
+                                                                  ANeuralNetworksMemory **memory);
+
+typedef int (*ANeuralNetworksExecution_setMeasureTiming_fn)(ANeuralNetworksExecution *execution,
+                                                            bool measure);
+
+typedef int (*ANeuralNetworksExecution_getDuration_fn)(const ANeuralNetworksExecution *execution,
+                                                       int32_t durationCode, uint64_t *duration);
+
+typedef int (*ANeuralNetworksDevice_getExtensionSupport_fn)(const ANeuralNetworksDevice *device,
+                                                            const char *extensionName,
+                                                            bool *isExtensionSupported);
+
+typedef int (*ANeuralNetworksModel_getExtensionOperandType_fn)(ANeuralNetworksModel *model,
+                                                               const char *extensionName,
+                                                               uint16_t operandCodeWithinExtension,
+                                                               int32_t *type);
+
+typedef int (*ANeuralNetworksModel_getExtensionOperationType_fn)(
+  ANeuralNetworksModel *model, const char *extensionName, uint16_t operationCodeWithinExtension,
+  ANeuralNetworksOperationType *type);
+
+typedef int (*ANeuralNetworksModel_setOperandExtensionData_fn)(ANeuralNetworksModel *model,
+                                                               int32_t index, const void *data,
+                                                               size_t length);
+
+#if __ANDROID_API__ >= 30
+typedef int (*ANeuralNetworksMemoryDesc_create_fn)(ANeuralNetworksMemoryDesc **desc);
+
+typedef void (*ANeuralNetworksMemoryDesc_free_fn)(ANeuralNetworksMemoryDesc *desc);
+
+typedef int (*ANeuralNetworksMemoryDesc_addInputRole_fn)(
+  ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, int32_t index,
+  float frequency);
+
+typedef int (*ANeuralNetworksMemoryDesc_addOutputRole_fn)(
+  ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index,
+  float frequency);
+
+typedef int (*ANeuralNetworksMemoryDesc_setDimensions_fn)(ANeuralNetworksMemoryDesc *desc,
+                                                          uint32_t rank,
+                                                          const uint32_t *dimensions);
+
+typedef int (*ANeuralNetworksMemoryDesc_finish_fn)(ANeuralNetworksMemoryDesc *desc);
+
+typedef int (*ANeuralNetworksMemory_createFromDesc_fn)(const ANeuralNetworksMemoryDesc *desc,
+                                                       ANeuralNetworksMemory **memory);
+
+typedef int (*ANeuralNetworksMemory_copy_fn)(const ANeuralNetworksMemory *src,
+                                             const ANeuralNetworksMemory *dst);
+#endif // __ANDROID_API__ >= 30
+#endif // __NEURAL_NETWORKS_TYPES_H__
diff --git a/runtime/libs/nnapi/v1.1/CMakeLists.txt b/runtime/libs/nnapi/v1.1/CMakeLists.txt
deleted file mode 100644
index dc018c60f..000000000
--- a/runtime/libs/nnapi/v1.1/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(nnfw_lib_nnapi_1_1 INTERFACE)
-
-target_include_directories(nnfw_lib_nnapi_1_1 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(nnfw_lib_nnapi_1_1 INTERFACE nnfw-nnapi-header)
diff --git a/runtime/libs/nnapi/v1.1/include/NeuralNetworksExShim.h b/runtime/libs/nnapi/v1.1/include/NeuralNetworksExShim.h
deleted file mode 100644
index f684dab90..000000000
--- a/runtime/libs/nnapi/v1.1/include/NeuralNetworksExShim.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-   Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-/**
- * @file     NeuralNetworksExShim.h
- * @brief    This file contains an actual implementation of
- *           ANeuralNetworksModel_addOperationEx function
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef NN_API_EX_SHIM_H
-#define NN_API_EX_SHIM_H
-
-#include "NeuralNetworksEx.h"
-#include "NeuralNetworksLoadHelpers.h"
-
-typedef int (*ANeuralNetworksModel_addOperationEx_fn)(ANeuralNetworksModel *model,
-                                                      ANeuralNetworksOperationTypeEx type,
-                                                      uint32_t inputCount, const uint32_t *inputs,
-                                                      uint32_t outputCount,
-                                                      const uint32_t *outputs);
-
-/**
- * @brief Add an extended operation to a model.
- *
- * @param[in] model The model to be modified.
- * @param[in] type The type of extended operation.
- * @param[in] inputCount The number of entries in the inputs array.
- * @param[in] inputs An array of indexes identifying each operand.
- * @param[in] outputCount The number of entries in the outputs array.
- * @param[in] outputs An array of indexes identifying each operand.
- *
- * @note The operands specified by inputs and outputs must have been
- *       previously added by calls to {@link ANeuralNetworksModel_addOperand}.\n
- *       Attempting to modify a model once {@link ANeuralNetworksModel_finish}
- *       has been called will return an error.\n
- *       See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-
-inline int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
-                                               ANeuralNetworksOperationTypeEx type,
-                                               uint32_t inputCount, const uint32_t *inputs,
-                                               uint32_t outputCount, const uint32_t *outputs)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_addOperationEx);
-  EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount, outputs);
-}
-
-#endif // NN_API_EX_SHIM_H
diff --git a/runtime/libs/nnapi/v1.1/include/NeuralNetworksLoadHelpers.h b/runtime/libs/nnapi/v1.1/include/NeuralNetworksLoadHelpers.h
deleted file mode 100644
index 201465f9c..000000000
--- a/runtime/libs/nnapi/v1.1/include/NeuralNetworksLoadHelpers.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-   Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from part of the following file (in TensorFlow v1.12)
-//       'externals/tensorflow/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h'
-
-/**
- * @file NeuralNetworksLoadHelpers.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains functions to load NN API runtime library
- */
-
-#ifndef __NEURAL_NETWORKS_LOAD_HELPER_H__
-#define __NEURAL_NETWORKS_LOAD_HELPER_H__
-
-#include <dlfcn.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-/**
- * @brief Print log data
- * @param[in] format    Format string of @c printf
- * @param[in] args      Argument after format string. (Same with @c printf)
- */
-#define NNAPI_LOG(format, ...) printf(format "\n", __VA_ARGS__);
-
-/**
- * @brief Create a function pointer named @c fn after loading NN API library
- * @param[in] name    Name of a function
- */
-#define LOAD_FUNCTION(name) \
-  static name##_fn fn = reinterpret_cast<name##_fn>(nnfw::loadFunction(#name));
-
-/**
- * @brief Run @c fn function. @c fn is created by @ref LOAD_FUNCTION
- * @param[in] args    List of arguments for the function @c fn
- */
-#define EXECUTE_FUNCTION(...) \
-  if (fn != nullptr) {        \
-    fn(__VA_ARGS__);          \
-  }
-
-/**
- * @brief Run @c fn function. @c fn is created by @ref LOAD_FUNCTION
- * @param[in] args    List of arguments for the function @c fn
- * @return            the return value of @c fn
- */
-#define EXECUTE_FUNCTION_RETURN(...) return fn != nullptr ? fn(__VA_ARGS__) : 0;
-
-namespace nnfw
-{
-
-/**
- * @brief Load NN API library
- * @param[in] name path of NN API library
- * @return a symbol table handle of NN API library
- */
-inline void* loadLibrary(const char* name) {
-  // TODO: change RTLD_LOCAL? Assumes there can be multiple instances of nn
-  // api RT
-  void* handle = nullptr;
-#if 1 //#ifdef __ANDROID__
-  handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL);
-  if (handle == nullptr) {
-    NNAPI_LOG("nnapi error: unable to open library %s", name);
-    NNAPI_LOG("             %s", dlerror());
-  }
-#endif
-  return handle;
-}
-
-/**
- * @brief Load libneuralnetworks.so and return handle of library
- * @return a symbol table handle of NN API library
- */
-inline void* getLibraryHandle() {
-  static void* handle = loadLibrary("libneuralnetworks.so");
-  return handle;
-}
-
-/**
- * @brief Return function ptr in libneuralnetworks.so
- * @param[in] name    Name of function
- * @return function pointer
- */
-inline void* loadFunction(const char* name) {
-  void* fn = nullptr;
-  if (getLibraryHandle() != nullptr) {
-    fn = dlsym(getLibraryHandle(), name);
-  }
-  if (fn == nullptr) {
-    NNAPI_LOG("nnapi error: unable to open function %s", name);
-    NNAPI_LOG("             %s", dlerror());
-    abort();
-  }
-  else {
-#ifdef _GNU_SOURCE
-    Dl_info info;
-    if (dladdr(fn, &info))
-    {
-      NNAPI_LOG("nnapi function '%s' is loaded from '%s' ", name, info.dli_fname);
-    }
-    else
-    {
-      NNAPI_LOG("nnapi function '%s' is failed to load", name);
-    }
-
-#endif // _GNU_SOURCE
-  }
-  return fn;
-}
-
-/**
- * @brief Check if libneuralnetworks.so can be loaded
- * @return @c true if loading is successful, otherwise @c false.
- */
-inline bool NNAPIExists() {
-  static bool nnapi_is_available = getLibraryHandle();
-  return nnapi_is_available;
-}
-
-} // namespace nnfw
-
-#endif // __NEURAL_NETWORKS_LOAD_HELPER_H__
diff --git a/runtime/libs/nnapi/v1.1/include/NeuralNetworksShim.h b/runtime/libs/nnapi/v1.1/include/NeuralNetworksShim.h
deleted file mode 100644
index 60b16f766..000000000
--- a/runtime/libs/nnapi/v1.1/include/NeuralNetworksShim.h
+++ /dev/null
@@ -1,709 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-   Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from part of the following file (in TensorFlow v1.12)
-//       'externals/tensorflow/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h'
-#ifndef __NEURAL_NETWORKS_SHIM__
-#define __NEURAL_NETWORKS_SHIM__
-
-#include "NeuralNetworks.h"
-#include "NeuralNetworksLoadHelpers.h"
-
-// nn api function types
-
-typedef int (*ANeuralNetworksMemory_createFromFd_fn)(
-    size_t size, int protect, int fd, size_t offset,
-    ANeuralNetworksMemory** memory);
-
-typedef void (*ANeuralNetworksMemory_free_fn)(ANeuralNetworksMemory* memory);
-
-typedef int (*ANeuralNetworksModel_create_fn)(ANeuralNetworksModel** model);
-
-typedef int (*ANeuralNetworksModel_finish_fn)(ANeuralNetworksModel* model);
-
-typedef void (*ANeuralNetworksModel_free_fn)(ANeuralNetworksModel* model);
-
-typedef int (*ANeuralNetworksCompilation_create_fn)(
-    ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation);
-
-typedef void (*ANeuralNetworksCompilation_free_fn)(
-    ANeuralNetworksCompilation* compilation);
-
-typedef int (*ANeuralNetworksCompilation_setPreference_fn)(
-    ANeuralNetworksCompilation* compilation, int32_t preference);
-
-typedef int (*ANeuralNetworksCompilation_finish_fn)(
-    ANeuralNetworksCompilation* compilation);
-
-typedef int (*ANeuralNetworksModel_addOperand_fn)(
-    ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type);
-
-typedef int (*ANeuralNetworksModel_setOperandValue_fn)(
-    ANeuralNetworksModel* model, int32_t index, const void* buffer,
-    size_t length);
-
-typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
-    ANeuralNetworksModel* model, int32_t index,
-    const ANeuralNetworksMemory* memory, size_t offset, size_t length);
-
-typedef int (*ANeuralNetworksModel_addOperation_fn)(
-    ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
-    uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
-    const uint32_t* outputs);
-
-typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)(
-    ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
-    uint32_t outputCount, const uint32_t* outputs);
-
-typedef int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16_fn)(
-    ANeuralNetworksModel* model, bool allow);
-
-typedef int (*ANeuralNetworksExecution_create_fn)(
-    ANeuralNetworksCompilation* compilation,
-    ANeuralNetworksExecution** execution);
-
-typedef void (*ANeuralNetworksExecution_free_fn)(
-    ANeuralNetworksExecution* execution);
-
-typedef int (*ANeuralNetworksExecution_setInput_fn)(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const void* buffer, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
-    size_t offset, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setOutput_fn)(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, void* buffer, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
-    size_t offset, size_t length);
-
-typedef int (*ANeuralNetworksExecution_startCompute_fn)(
-    ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event);
-
-typedef int (*ANeuralNetworksEvent_wait_fn)(ANeuralNetworksEvent* event);
-
-typedef void (*ANeuralNetworksEvent_free_fn)(ANeuralNetworksEvent* event);
-
-/**
- * Creates a shared memory object from a file descriptor.
- *
- * The shared memory is backed by a file descriptor via mmap.
- * See {@link ANeuralNetworksMemory} for a description on how to use
- * this shared memory.
- *
- * @param size The requested size in bytes.
- *             Must not be larger than the file size.
- * @param prot The desired memory protection for the mapping.
- *             It is either PROT_NONE or the bitwise OR of one or
- *             more of the following flags: PROT_READ, PROT_WRITE.
- * @param fd The requested file descriptor.
- *           The file descriptor has to be mmap-able. The file
- *           descriptor will be duplicated.
- * @param offset The offset to the beginning of the file of the area to map.
- *               The offset has to be aligned to a page size.
- * @param memory The memory object to be created.
- *               Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
- */
-inline int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd,
-                                              size_t offset,
-                                              ANeuralNetworksMemory** memory) {
-  LOAD_FUNCTION(ANeuralNetworksMemory_createFromFd);
-  EXECUTE_FUNCTION_RETURN(size, protect, fd, offset, memory);
-}
-
-/**
- * Delete a memory object.
- *
- * Destroys the object used by the run time to keep track of the memory.
- * This will free the underlying actual memory if no other code has open
- * handles to this memory.
- *
- * @param memory The memory object to be freed.
- */
-inline void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) {
-  LOAD_FUNCTION(ANeuralNetworksMemory_free);
-  EXECUTE_FUNCTION(memory);
-}
-
-/**
- * Create an empty {@link ANeuralNetworksModel}.
- *
- * <p>This only creates the object. Computation is performed once
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
- *
- * The model should be constructed with calls to
- * {@link ANeuralNetworksModel_addOperation} and
- * {@link ANeuralNetworksModel_addOperand}
- *
- * <p>{@link ANeuralNetworksModel_finish} should be called once the model
- * has been fully constructed.</p>
- *
- * <p>{@link ANeuralNetworksModel_free} should be called once the model
- * is no longer needed.</p>
- *
- * @param model The {@link ANeuralNetworksModel} to be created.
- *              Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_create(ANeuralNetworksModel** model) {
-  LOAD_FUNCTION(ANeuralNetworksModel_create);
-  EXECUTE_FUNCTION_RETURN(model);
-}
-
-/**
- * Destroy a model.
- *
- * The model need not have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be destroyed. Passing NULL is acceptable and
- *              results in no operation.
- */
-inline void ANeuralNetworksModel_free(ANeuralNetworksModel* model) {
-  LOAD_FUNCTION(ANeuralNetworksModel_free);
-  EXECUTE_FUNCTION(model);
-}
-
-/**
- * Indicate that we have finished modifying a model. Required before
- * calling {@link ANeuralNetworksCompilation_compile}.
- *
- * An application is responsible to make sure that no other thread uses
- * the model at the same time.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be finished.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) {
-  LOAD_FUNCTION(ANeuralNetworksModel_finish);
-  EXECUTE_FUNCTION_RETURN(model);
-}
-
-/**
- * Add an operand to a model.
- *
- * The order in which the operands are added is important. The first one added
- * to a model will have the index value 0, the second 1, etc. These indexes are
- * used as operand identifiers in {@link ANeuralNetworksModel_addOperation},
- * {@link ANeuralNetworksExecution_setInput},
- * {@link ANeuralNetworksExecution_setInputFromMemory},
- * {@link ANeuralNetworksExecution_setOutput},
- * {@link ANeuralNetworksExecution_setOutputFromMemory} and
- * {@link ANeuralNetworksExecution_setOperandValue}.
- *
- * To build a model that can accommodate inputs of various sizes, as you may
- * want to do for a CNN, set the size of the dimensions that will vary at run
- * time to 0. If you do so, provide the full dimensions when calling
- * {@link ANeuralNetworksExecution_setInput} or {@link
- * ANeuralNetworksExecution_setInputFromMemory}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param type The {@link ANeuralNetworksOperandType} that describes the shape
- * of the operand.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_addOperand(
-    ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) {
-  LOAD_FUNCTION(ANeuralNetworksModel_addOperand);
-  EXECUTE_FUNCTION_RETURN(model, type);
-}
-
-/**
- * Sets an operand to a constant value.
- *
- * For scalar values, the content of buffer is copied into the model.
- *
- * For tensor values, a pointer to the buffer is stored within the model.
- * The application is responsible for not changing the content of this region
- * until all executions using this model have completed. As the data may
- * be copied during processing, modifying the data after this call yields
- * undefined results.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param buffer A pointer to the data to use.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model,
-                                                int32_t index,
-                                                const void* buffer,
-                                                size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksModel_setOperandValue);
-  EXECUTE_FUNCTION_RETURN(model, index, buffer, length);
-}
-
-/**
- * Sets an operand to a value stored in a memory object.
- *
- * The content of the memory is not copied. A reference to that memory is stored
- * inside the model. The application is responsible for not changing the content
- * of the memory region until all executions using this model have completed.
- * As the data may be copied during processing, modifying the data after this
- * call yields undefined results.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param buffer A pointer to the data to use.
- * @param memory The memory containing the data.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandValueFromMemory(
-    ANeuralNetworksModel* model, int32_t index,
-    const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksModel_setOperandValueFromMemory);
-  EXECUTE_FUNCTION_RETURN(model, index, memory, offset, length);
-}
-
-/**
- * Add an operation to a model.
- *
- * @param model The model to be modified.
- * @param type The type of the operation.
- * @param inputCount The number of entries in the inputs array.
- * @param inputs An array of indexes identifying each operand.
- * @param outputCount The number of entries in the outputs array.
- * @param outputs An array of indexes identifying each operand.
- *
- * The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model,
-                                             ANeuralNetworksOperationType type,
-                                             uint32_t inputCount,
-                                             const uint32_t* inputs,
-                                             uint32_t outputCount,
-                                             const uint32_t* outputs) {
-  LOAD_FUNCTION(ANeuralNetworksModel_addOperation);
-  EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount,
-                          outputs);
-}
-
-/**
- * Specifies which operands will be the model's inputs and outputs.
- *
- * An operand cannot be used for both input and output. Doing so will
- * return an error.
- *
- * @param model The model to be modified.
- * @param inputCount The number of entries in the inputs array.
- * @param inputs An array of indexes identifying the input operands.
- * @param outputCount The number of entries in the outputs array.
- * @param outputs An array of indexes identifying the output operands.
- *
- * The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- */
-inline int ANeuralNetworksModel_identifyInputsAndOutputs(
-    ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
-    uint32_t outputCount, const uint32_t* outputs) {
-  LOAD_FUNCTION(ANeuralNetworksModel_identifyInputsAndOutputs);
-  EXECUTE_FUNCTION_RETURN(model, inputCount, inputs, outputCount, outputs);
-}
-
-/**
- * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
- * calculated with range and/or precision as low as that of the IEEE 754 16-bit
- * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * must be calculated using at least the range and precision of the IEEE 754
- * 32-bit floating-point format.
- *
- * @param model The model to be modified.
- * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
- *              calculated with range and/or precision as low as that of the
- *              IEEE 754 16-bit floating point format. 'false' indicates
- *              {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using
- *              at least the range and precision of the IEEE 754 32-bit floating
- *              point format.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * Available since API level 28.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- */
-inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16(
-    ANeuralNetworksModel* model, bool allow) {
-  LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
-  EXECUTE_FUNCTION_RETURN(model, allow);
-}
-
-/**
- * Create a {@link ANeuralNetworksCompilation} to compile the given model.
- * This only creates the object. Compilation is only performed once
- * {@link ANeuralNetworksCompilation_start} is invoked.
- *
- * <p>The provided model must outlive the compilation.</p>
- *
- * The model must already have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param model The {@link ANeuralNetworksModel} to be compiled.
- * @param compilation The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- *         if the model is invalid.
- */
-inline int ANeuralNetworksCompilation_create(
-    ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) {
-  LOAD_FUNCTION(ANeuralNetworksCompilation_create);
-  EXECUTE_FUNCTION_RETURN(model, compilation);
-}
-
-/**
- * Destroy a compilation.
- *
- * <p>If called on a compilation for which
- * {@link ANeuralNetworksCompilation_start} has been called, the
- * function will return immediately but will mark the compilation to be deleted
- * once the compilation completes. The {@link ANeuralNetworksCompilation_wait}
- * will return ERROR_DELETED.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be destroyed. Passing NULL is
- * acceptable and results in no operation.
- */
-inline void ANeuralNetworksCompilation_free(
-    ANeuralNetworksCompilation* compilation) {
-  LOAD_FUNCTION(ANeuralNetworksCompilation_free);
-  EXECUTE_FUNCTION(compilation);
-}
-
-/**
- * Sets the execution preference.
- *
- * <p>Provides guidance to the runtime when trade-offs are possible.</p>
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be modified.
- * @param preference Either {@link PREFER_LOW_POWER},
- *                  {@link PREFER_SINGLE_FAST_ANSWER}, or
- *                  {@link PREFER_SUSTAINED_SPEED}.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksCompilation_setPreference(
-    ANeuralNetworksCompilation* compilation, int32_t preference) {
-  LOAD_FUNCTION(ANeuralNetworksCompilation_setPreference);
-  EXECUTE_FUNCTION_RETURN(compilation, preference);
-}
-
-/**
- * Waits until the compilation completes.
- *
- * More than one thread can wait on a compilation. When the compilation
- * completes, all threads will be released.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the compilation completed normally.
- */
-inline int ANeuralNetworksCompilation_finish(
-    ANeuralNetworksCompilation* compilation) {
-  LOAD_FUNCTION(ANeuralNetworksCompilation_finish);
-  EXECUTE_FUNCTION_RETURN(compilation);
-}
-/**
- * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
- * This only creates the object. Computation is only performed once
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
- *
- * <p>The provided compilation must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
- * @param execution The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- *         if the compilation is invalid.
- */
-inline int ANeuralNetworksExecution_create(
-    ANeuralNetworksCompilation* compilation,
-    ANeuralNetworksExecution** execution) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_create);
-  EXECUTE_FUNCTION_RETURN(compilation, execution);
-}
-
-/**
- * Destroy an execution.
- *
- * <p>If called on an execution for which
- * {@link ANeuralNetworksExecution_startCompute} has been called, the
- * function will return immediately but will mark the execution to be deleted
- * once the computation completes.   The {link ANeuralNetworksExecution_wait}
- * will return ANEURALNETWORKS_ERROR_DELETED.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be destroyed. Passing NULL is acceptable
- * and results in no operation.
- */
-inline void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_free);
-  EXECUTE_FUNCTION(execution);
-}
-
-/**
- * Associate a user buffer with an input of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided buffer must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the input argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This should be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other properties of the type must be the same as
- *             specified in the model. If the type is the same as specified
- *             when the model was built, NULL can be passed.
- * @param buffer The buffer containing the data.
- * @param length The length in bytes of the buffer.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the input.
- */
-inline int ANeuralNetworksExecution_setInput(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const void* buffer, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_setInput);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
-}
-
-/**
- * Associate part of a memory object with an input of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided memory must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the input argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param memory The memory containing the data.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the input.
- */
-inline int ANeuralNetworksExecution_setInputFromMemory(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
-    size_t offset, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_setInputFromMemory);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
-}
-
-/**
- * Associate a user buffer with an output of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided buffer must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the output argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param buffer The buffer where the data is to be written.
- * @param length The length in bytes of the buffer.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the output.
- */
-inline int ANeuralNetworksExecution_setOutput(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, void* buffer, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_setOutput);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
-}
-
-/**
- * Associate part of a memory object with an output of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided memory must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the output argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param memory The memory where the data is to be stored.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The length in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the output.
- */
-inline int ANeuralNetworksExecution_setOutputFromMemory(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
-    size_t offset, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_setOutputFromMemory);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
-}
-
-/**
- * Schedule evaluation of the execution.
- *
- * <p>Schedules evaluation of the execution. Once the model has been
- * applied and the outputs are ready to be consumed, the execution will be
- * signaled. Use {@link ANeuralNetworksExecution_wait} to wait for that signal.
- * </p>
- *
- * Multiple executions can be scheduled and evaluated concurrently, and
- * compilations can be performed concurrently with executions. The runtime makes
- * no guarantee on the ordering of the completion of compilations and
- * executions. If it's important to the application, the application should
- * enforce the ordering by using {@link ANeuralNetworksCompilation_wait} and
- * {@link ANeuralNetworksExecution_wait}.
- *
- * ANeuralNetworksExecution_wait must be called to recuperate the resources used
- * by the execution.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be scheduled and executed.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksExecution_startCompute(
-    ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_startCompute);
-  EXECUTE_FUNCTION_RETURN(execution, event);
-}
-
-/**
- * Waits until the execution completes.
- *
- * More than one thread can wait on an event. When the execution completes,
- * all threads will be released.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
- */
-inline int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) {
-  LOAD_FUNCTION(ANeuralNetworksEvent_wait);
-  EXECUTE_FUNCTION_RETURN(event);
-}
-
-/**
- * Destroys the event.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- */
-inline void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) {
-  LOAD_FUNCTION(ANeuralNetworksEvent_free);
-  EXECUTE_FUNCTION(event);
-}
-
-#endif  // __NEURAL_NETWORKS_SHIM__
diff --git a/runtime/libs/nnapi/v1.2/CMakeLists.txt b/runtime/libs/nnapi/v1.2/CMakeLists.txt
deleted file mode 100644
index 21ec3015f..000000000
--- a/runtime/libs/nnapi/v1.2/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_library(nnfw_lib_nnapi_1_2 INTERFACE)
-
-target_include_directories(nnfw_lib_nnapi_1_2 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(nnfw_lib_nnapi_1_2 INTERFACE nnfw-nnapi-header)
diff --git a/runtime/libs/nnapi/v1.2/include/NeuralNetworksShim.h b/runtime/libs/nnapi/v1.2/include/NeuralNetworksShim.h
deleted file mode 100644
index 80082383f..000000000
--- a/runtime/libs/nnapi/v1.2/include/NeuralNetworksShim.h
+++ /dev/null
@@ -1,1136 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// NOTE This header is derived from part of the following file
-// https://github.com/tensorflow/tensorflow/blob/a59ad83d06abd38b5e142c41043db8886a92fca8/tensorflow/lite/nnapi/NeuralNetworksShim.h
-
-#ifndef __NEURAL_NETWORKS_SHIM_H__
-#define __NEURAL_NETWORKS_SHIM_H__
-
-#include "NeuralNetworksTypes.h"
-#include "NeuralNetworksLoadHelpers.h"
-
-// This interface is now deprecated. You should use instead
-// nnapi_implementation.
-
-// TODO(b/123017568): Update all current usages of this file.
-
-// NN api types based on NNAPI header file
-// https://developer.android.com/ndk/reference/group/neural-networks
-
-/**
- * Creates a shared memory object from a file descriptor.
- *
- * The shared memory is backed by a file descriptor via mmap.
- * See {@link ANeuralNetworksMemory} for a description on how to use
- * this shared memory.
- *
- * @param size The requested size in bytes.
- *             Must not be larger than the file size.
- * @param prot The desired memory protection for the mapping.
- *             It is either PROT_NONE or the bitwise OR of one or
- *             more of the following flags: PROT_READ, PROT_WRITE.
- * @param fd The requested file descriptor.
- *           The file descriptor has to be mmap-able. The file
- *           descriptor will be duplicated.
- * @param offset The offset to the beginning of the file of the area to map.
- *               The offset has to be aligned to a page size.
- * @param memory The memory object to be created.
- *               Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
- */
-inline int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
-                                              ANeuralNetworksMemory **memory)
-{
-  LOAD_FUNCTION(ANeuralNetworksMemory_createFromFd);
-  EXECUTE_FUNCTION_RETURN(size, protect, fd, offset, memory);
-}
-
-/**
- * Delete a memory object.
- *
- * Destroys the object used by the run time to keep track of the memory.
- * This will free the underlying actual memory if no other code has open
- * handles to this memory.
- *
- * @param memory The memory object to be freed.
- */
-inline void ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory)
-{
-  LOAD_FUNCTION(ANeuralNetworksMemory_free);
-  EXECUTE_FUNCTION(memory);
-}
-
-/**
- * Create an empty {@link ANeuralNetworksModel}.
- *
- * <p>This only creates the object. Computation is performed once
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
- *
- * The model should be constructed with calls to
- * {@link ANeuralNetworksModel_addOperation} and
- * {@link ANeuralNetworksModel_addOperand}
- *
- * <p>{@link ANeuralNetworksModel_finish} should be called once the model
- * has been fully constructed.</p>
- *
- * <p>{@link ANeuralNetworksModel_free} should be called once the model
- * is no longer needed.</p>
- *
- * @param model The {@link ANeuralNetworksModel} to be created.
- *              Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_create(ANeuralNetworksModel **model)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_create);
-  EXECUTE_FUNCTION_RETURN(model);
-}
-
-/**
- * Destroy a model.
- *
- * The model need not have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be destroyed. Passing NULL is acceptable and
- *              results in no operation.
- */
-inline void ANeuralNetworksModel_free(ANeuralNetworksModel *model)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_free);
-  EXECUTE_FUNCTION(model);
-}
-
-/**
- * Indicate that we have finished modifying a model. Required before
- * calling {@link ANeuralNetworksCompilation_compile}.
- *
- * An application is responsible to make sure that no other thread uses
- * the model at the same time.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be finished.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_finish(ANeuralNetworksModel *model)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_finish);
-  EXECUTE_FUNCTION_RETURN(model);
-}
-
-/**
- * Add an operand to a model.
- *
- * The order in which the operands are added is important. The first one added
- * to a model will have the index value 0, the second 1, etc. These indexes are
- * used as operand identifiers in {@link ANeuralNetworksModel_addOperation},
- * {@link ANeuralNetworksExecution_setInput},
- * {@link ANeuralNetworksExecution_setInputFromMemory},
- * {@link ANeuralNetworksExecution_setOutput},
- * {@link ANeuralNetworksExecution_setOutputFromMemory} and
- * {@link ANeuralNetworksExecution_setOperandValue}.
- *
- * To build a model that can accommodate inputs of various sizes, as you may
- * want to do for a CNN, set the size of the dimensions that will vary at run
- * time to 0. If you do so, provide the full dimensions when calling
- * {@link ANeuralNetworksExecution_setInput} or {@link
- * ANeuralNetworksExecution_setInputFromMemory}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param type The {@link ANeuralNetworksOperandType} that describes the shape
- * of the operand.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model,
-                                           const ANeuralNetworksOperandType *type)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_addOperand);
-  EXECUTE_FUNCTION_RETURN(model, type);
-}
-
-/**
- * Sets an operand to a constant value.
- *
- * For scalar values, the content of buffer is copied into the model.
- *
- * For tensor values, a pointer to the buffer is stored within the model.
- * The application is responsible for not changing the content of this region
- * until all executions using this model have completed. As the data may
- * be copied during processing, modifying the data after this call yields
- * undefined results.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param buffer A pointer to the data to use.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index,
-                                                const void *buffer, size_t length)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_setOperandValue);
-  EXECUTE_FUNCTION_RETURN(model, index, buffer, length);
-}
-
-/**
- * Sets an operand's per channel quantization parameters.
- *
- * Sets parameters required by a tensor of type
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}.
- * This function must be called for every tensor of type
- * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} before
- * calling {@link ANeuralNetworksModel_finish}.
- *
- * Available since API level 29.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param channelQuant The per channel quantization parameters for the operand.
- *                    No memory in this struct needs to outlive the call to
- *                    this function.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
-    ANeuralNetworksModel *model, int32_t index,
-    const ANeuralNetworksSymmPerChannelQuantParams *channelQuant)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_setOperandSymmPerChannelQuantParams);
-  EXECUTE_FUNCTION_RETURN(model, index, channelQuant);
-}
-
-/**
- * Sets an operand to a value stored in a memory object.
- *
- * The content of the memory is not copied. A reference to that memory is stored
- * inside the model. The application is responsible for not changing the content
- * of the memory region until all executions using this model have completed.
- * As the data may be copied during processing, modifying the data after this
- * call yields undefined results.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param buffer A pointer to the data to use.
- * @param memory The memory containing the data.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model,
-                                                          int32_t index,
-                                                          const ANeuralNetworksMemory *memory,
-                                                          size_t offset, size_t length)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_setOperandValueFromMemory);
-  EXECUTE_FUNCTION_RETURN(model, index, memory, offset, length);
-}
-
-/**
- * Add an operation to a model.
- *
- * @param model The model to be modified.
- * @param type The type of the operation.
- * @param inputCount The number of entries in the inputs array.
- * @param inputs An array of indexes identifying each operand.
- * @param outputCount The number of entries in the outputs array.
- * @param outputs An array of indexes identifying each operand.
- *
- * The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
-                                             ANeuralNetworksOperationType type, uint32_t inputCount,
-                                             const uint32_t *inputs, uint32_t outputCount,
-                                             const uint32_t *outputs)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_addOperation);
-  EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount, outputs);
-}
-
-/**
- * Specifies which operands will be the model's inputs and outputs.
- *
- * An operand cannot be used for both input and output. Doing so will
- * return an error.
- *
- * @param model The model to be modified.
- * @param inputCount The number of entries in the inputs array.
- * @param inputs An array of indexes identifying the input operands.
- * @param outputCount The number of entries in the outputs array.
- * @param outputs An array of indexes identifying the output operands.
- *
- * The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- */
-inline int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model,
-                                                         uint32_t inputCount,
-                                                         const uint32_t *inputs,
-                                                         uint32_t outputCount,
-                                                         const uint32_t *outputs)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_identifyInputsAndOutputs);
-  EXECUTE_FUNCTION_RETURN(model, inputCount, inputs, outputCount, outputs);
-}
-
-/**
- * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
- * calculated with range and/or precision as low as that of the IEEE 754 16-bit
- * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * must be calculated using at least the range and precision of the IEEE 754
- * 32-bit floating-point format.
- *
- * @param model The model to be modified.
- * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
- *              calculated with range and/or precision as low as that of the
- *              IEEE 754 16-bit floating point format. 'false' indicates
- *              {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using
- *              at least the range and precision of the IEEE 754 32-bit floating
- *              point format.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * Available since API level 28.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- */
-inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel *model,
-                                                                 bool allow)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
-  EXECUTE_FUNCTION_RETURN(model, allow);
-}
-
-/**
- * Create a {@link ANeuralNetworksCompilation} to compile the given model.
- * This only creates the object. Compilation is only performed once
- * {@link ANeuralNetworksCompilation_start} is invoked.
- *
- * <p>The provided model must outlive the compilation.</p>
- *
- * The model must already have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param model The {@link ANeuralNetworksModel} to be compiled.
- * @param compilation The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- *         if the model is invalid.
- */
-inline int ANeuralNetworksCompilation_create(ANeuralNetworksModel *model,
-                                             ANeuralNetworksCompilation **compilation)
-{
-  LOAD_FUNCTION(ANeuralNetworksCompilation_create);
-  EXECUTE_FUNCTION_RETURN(model, compilation);
-}
-
-/**
- * Destroy a compilation.
- *
- * <p>If called on a compilation for which
- * {@link ANeuralNetworksCompilation_start} has been called, the
- * function will return immediately but will mark the compilation to be deleted
- * once the compilation completes. The {@link ANeuralNetworksCompilation_wait}
- * will return ERROR_DELETED.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be destroyed. Passing NULL is
- * acceptable and results in no operation.
- */
-inline void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation)
-{
-  LOAD_FUNCTION(ANeuralNetworksCompilation_free);
-  EXECUTE_FUNCTION(compilation);
-}
-
-/**
- * Sets the execution preference.
- *
- * <p>Provides guidance to the runtime when trade-offs are possible.</p>
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be modified.
- * @param preference Either {@link PREFER_LOW_POWER},
- *                  {@link PREFER_SINGLE_FAST_ANSWER}, or
- *                  {@link PREFER_SUSTAINED_SPEED}.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compilation,
-                                                    int32_t preference)
-{
-  LOAD_FUNCTION(ANeuralNetworksCompilation_setPreference);
-  EXECUTE_FUNCTION_RETURN(compilation, preference);
-}
-
-/**
- * Waits until the compilation completes.
- *
- * More than one thread can wait on a compilation. When the compilation
- * completes, all threads will be released.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the compilation completed normally.
- */
-inline int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
-{
-  LOAD_FUNCTION(ANeuralNetworksCompilation_finish);
-  EXECUTE_FUNCTION_RETURN(compilation);
-}
-/**
- * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
- * This only creates the object. Computation is only performed once
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
- *
- * <p>The provided compilation must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
- * @param execution The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- *         if the compilation is invalid.
- */
-inline int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
-                                           ANeuralNetworksExecution **execution)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_create);
-  EXECUTE_FUNCTION_RETURN(compilation, execution);
-}
-
-/**
- * Destroy an execution.
- *
- * <p>If called on an execution for which
- * {@link ANeuralNetworksExecution_startCompute} has been called, the
- * function will return immediately but will mark the execution to be deleted
- * once the computation completes.   The {link ANeuralNetworksExecution_wait}
- * will return ANEURALNETWORKS_ERROR_DELETED.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be destroyed. Passing NULL is acceptable
- * and results in no operation.
- */
-inline void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_free);
-  EXECUTE_FUNCTION(execution);
-}
-
-/**
- * Associate a user buffer with an input of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided buffer must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the input argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This should be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other properties of the type must be the same as
- *             specified in the model. If the type is the same as specified
- *             when the model was built, NULL can be passed.
- * @param buffer The buffer containing the data.
- * @param length The length in bytes of the buffer.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the input.
- */
-inline int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index,
-                                             const ANeuralNetworksOperandType *type,
-                                             const void *buffer, size_t length)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_setInput);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
-}
-
-/**
- * Associate part of a memory object with an input of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided memory must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the input argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param memory The memory containing the data.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the input.
- */
-inline int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution,
-                                                       int32_t index,
-                                                       const ANeuralNetworksOperandType *type,
-                                                       const ANeuralNetworksMemory *memory,
-                                                       size_t offset, size_t length)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_setInputFromMemory);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
-}
-
-/**
- * Associate a user buffer with an output of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided buffer must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the output argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param buffer The buffer where the data is to be written.
- * @param length The length in bytes of the buffer.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the output.
- */
-inline int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index,
-                                              const ANeuralNetworksOperandType *type, void *buffer,
-                                              size_t length)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_setOutput);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
-}
-
-/**
- * Associate part of a memory object with an output of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided memory must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the output argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param memory The memory where the data is to be stored.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The length in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the output.
- */
-inline int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution,
-                                                        int32_t index,
-                                                        const ANeuralNetworksOperandType *type,
-                                                        const ANeuralNetworksMemory *memory,
-                                                        size_t offset, size_t length)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_setOutputFromMemory);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
-}
-
-/**
- * Schedule evaluation of the execution.
- *
- * <p>Schedules evaluation of the execution. Once the model has been
- * applied and the outputs are ready to be consumed, the execution will be
- * signaled. Use {@link ANeuralNetworksExecution_wait} to wait for that signal.
- * </p>
- *
- * Multiple executions can be scheduled and evaluated concurrently, and
- * compilations can be performed concurrently with executions. The runtime makes
- * no guarantee on the ordering of the completion of compilations and
- * executions. If it's important to the application, the application should
- * enforce the ordering by using {@link ANeuralNetworksCompilation_wait} and
- * {@link ANeuralNetworksExecution_wait}.
- *
- * ANeuralNetworksExecution_wait must be called to recuperate the resources used
- * by the execution.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be scheduled and executed.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
-                                                 ANeuralNetworksEvent **event)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_startCompute);
-  EXECUTE_FUNCTION_RETURN(execution, event);
-}
-
-/**
- * Waits until the execution completes.
- *
- * More than one thread can wait on an event. When the execution completes,
- * all threads will be released.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
- */
-inline int ANeuralNetworksEvent_wait(ANeuralNetworksEvent *event)
-{
-  LOAD_FUNCTION(ANeuralNetworksEvent_wait);
-  EXECUTE_FUNCTION_RETURN(event);
-}
-
-/**
- * Destroys the event.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- */
-inline void ANeuralNetworksEvent_free(ANeuralNetworksEvent *event)
-{
-  LOAD_FUNCTION(ANeuralNetworksEvent_free);
-  EXECUTE_FUNCTION(event);
-}
-
-/**
- * Get the number of available devices.
- *
- * @param numDevices Used to return the number of devices.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- *
- * Available since API level 29.
- */
-inline int ANeuralNetworks_getDeviceCount(uint32_t *numDevices)
-{
-  LOAD_FUNCTION(ANeuralNetworks_getDeviceCount);
-  EXECUTE_FUNCTION_RETURN(numDevices);
-}
-
-/**
- * Get the representation of the specified device.
- *
- * @param devIndex The index of the specified device. Must be less than the
- *                 number of available devices.
- * @param device The representation of the specified device.
- *               The same representation will always be returned for the
- *               specified device.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- *
- * Available since API level 29.
- */
-
-inline int ANeuralNetworks_getDevice(uint32_t devIndex, ANeuralNetworksDevice **device)
-{
-  LOAD_FUNCTION(ANeuralNetworks_getDevice);
-  EXECUTE_FUNCTION_RETURN(devIndex, device);
-}
-
-/**
- * Get the name of the specified device.
- *
- * @param device The representation of the specified device.
- * @param name   The returned name of the specified device. The name will be in
- *               UTF-8 and will be null-terminated. It will be recognizable as a
- *               known device name rather than a cryptic string. For devices
- *               with API level 29 and above, the format of the name is
- *               {VENDOR}-{DEVICE}, e.g. “google-ipu”. For devices with feature
- *               level 28 or lower, the name will always be “unknown-device”.
- *               The name will remain valid for the duration of the application.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- *
- * Available since API level 29.
- */
-inline int ANeuralNetworksDevice_getName(const ANeuralNetworksDevice *device, const char **name)
-{
-  LOAD_FUNCTION(ANeuralNetworksDevice_getName);
-  EXECUTE_FUNCTION_RETURN(device, name);
-}
-
-/**
- * Get the version of the driver implementation of the specified device.
- *
- * It’s the responsibility of the driver implementor to insure that this version
- * string uniquely distinguishes this implementation from all previous
- * implementations.
- *
- * This version string must not be confused with the feature level which is
- * solely defined by {@link ANeuralNetworksDevice_getFeatureLevel}. There is no
- * implicit ordering of the versions. For example, it is not possible to filter
- * all drivers older than a certain version.
- *
- * Application developers may use this version string to avoid or prefer
- * specific driver implementations. For example, an application may want to do
- * so because:
- *     - A specific version of the driver does not provide the required
- * performance, perhaps because of a performance regression.
- *     - A specific version of the driver has a bug or returns results that
- * don’t match the minimum precision requirement for the application.
- *
- * @param device  The representation of the specified device.
- * @param version The returned version string of the driver for the specified
- *                device. The string will be in UTF-8 and will be
- *                null-terminated. For devices with feature level 28 or lower,
- *                "UNKNOWN" will be returned. The version string will remain
- *                valid for the duration of the application.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- *
- * Available since API level 29.
- */
-inline int ANeuralNetworksDevice_getVersion(const ANeuralNetworksDevice *device,
-                                            const char **version)
-{
-  LOAD_FUNCTION(ANeuralNetworksDevice_getVersion);
-  EXECUTE_FUNCTION_RETURN(device, version);
-}
-
-/**
- * Get the supported NNAPI version of the specified device.
- *
- * Each device has a supported feature level, which is the most advanced feature
- * this driver implements. For example, if the driver implements the features
- * introduced in Android P, but does not implement the features introduced after
- * Android P, the value would be 28. Developers could decide whether or not the
- * specified device should be used for a Model that has certain feature
- * requirements.
- *
- * @param device       The representation of the specified device.
- * @param featureLevel The API level of the most advanced feature this driver
- *                     implements.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- *
- * Available since API level 29.
- */
-inline int ANeuralNetworksDevice_getFeatureLevel(const ANeuralNetworksDevice *device,
-                                                 int64_t *featureLevel)
-{
-  LOAD_FUNCTION(ANeuralNetworksDevice_getFeatureLevel);
-  EXECUTE_FUNCTION_RETURN(device, featureLevel);
-}
-
-/**
- * Get the supported operations for a specified set of devices. If multiple
- * devices are selected, the supported operation list is a union of supported
- * operations of all selected devices.
- *
- * @param model        The model to be queried.
- * @param devices      The set of devices. Must not contain duplicates.
- * @param numDevices   The number of devices in the set.
- * @param supportedOps The boolean array to be filled. True means supported. The
- *                     size of the boolean array must be at least as large as
- *                     the number of operations in the model. The order of
- *                     elements in the supportedOps array matches the order in
- *                     which the corresponding operations were added to the
- *                     model.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- *
- * Available since API level 29.
- */
-inline int
-ANeuralNetworksModel_getSupportedOperationsForDevices(const ANeuralNetworksModel *model,
-                                                      const ANeuralNetworksDevice *const *devices,
-                                                      uint32_t numDevices, bool *supportedOps)
-{
-  LOAD_FUNCTION(ANeuralNetworksModel_getSupportedOperationsForDevices);
-  EXECUTE_FUNCTION_RETURN(model, devices, numDevices, supportedOps);
-}
-
-/**
- * Create a {@link ANeuralNetworksCompilation} to compile the given model for a
- * specified set of devices. If more than one device is specified, the
- * compilation will distribute the workload automatically across the devices.
- * The model must be fully supported by the specified set of devices. This means
- * that ANeuralNetworksModel_getSupportedOperationsForDevices() must have
- * returned true for every operation for that model/devices pair.
- *
- * @param model       The {@link ANeuralNetworksModel} to be compiled.
- * @param devices     The set of devices. Must not contain duplicates.
- * @param numDevices  The number of devices in the set.
- * @param compilation The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- *         if the model is invalid.
- *
- * Available since API level 29.
- */
-inline int ANeuralNetworksCompilation_createForDevices(ANeuralNetworksModel *model,
-                                                       const ANeuralNetworksDevice *const *devices,
-                                                       uint32_t numDevices,
-                                                       ANeuralNetworksCompilation **compilation)
-{
-  LOAD_FUNCTION(ANeuralNetworksCompilation_createForDevices);
-  EXECUTE_FUNCTION_RETURN(model, devices, numDevices, compilation);
-}
-
-/**
- * Sets the compilation caching signature and the cache directory.
- *
- * Provides optional caching information to the runtime for faster repeated
- * compilation.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be modified.
- * @param cacheDir The cache directory to store and retrieve caching data. It is
- *                 recommended to use the code_cache provided by the Android
- *                 runtime. If not using the code_cache, the user should choose
- *                 a directory local to the application, and is responsible to
- *                 manage and clean the cache entries.
- * @param token The token provided by the user to specify a model, must be of
- *              length ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN. The user should
- *              ensure that the token is unique to a model within the
- *              application. The NNAPI runtime will not detected token
- *              collisions. If there is a collision, the compilation outcome may
- *              be incorrect without notifying with error.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- *
- * Available since API level 29.
- */
-inline int ANeuralNetworksCompilation_setCaching(ANeuralNetworksCompilation *compilation,
-                                                 const char *cacheDir, const uint8_t *token)
-{
-  LOAD_FUNCTION(ANeuralNetworksCompilation_setCaching);
-  EXECUTE_FUNCTION_RETURN(compilation, cacheDir, token);
-}
-
-/**
- * Schedule synchronous evaluation of the execution.
- *
- * <p>Schedules synchronous evaluation of the execution. Returns once the
- * execution has completed and the outputs are ready to be consumed.
- * </p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * See {@link ANeuralNetworksExecution_startCompute} for asynchronous execution.
- * Synchronous execution incurs lower overhead than asynchronous execution.
- *
- * Available since API level 29.
- *
- * @param execution The execution to be scheduled and executed.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
- *         ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory
- *         cannot be properly mapped.
- */
-inline int ANeuralNetworksExecution_compute(ANeuralNetworksExecution *execution)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_compute);
-  EXECUTE_FUNCTION_RETURN(execution);
-}
-
-/**
- * Get the dimensional information of the specified output operand of the model
- * of the
- * {@link ANeuralNetworksExecution}.
- *
- * On asynchronous execution initiated by {@link
- * ANeuralNetworksExecution_startCompute},
- * {@link ANeuralNetworksEvent_wait} must be called prior to this function to
- * recuperate the resources used by the execution.
- *
- * @param execution The execution to be queried.
- * @param index The index of the output argument we are querying. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param rank The rank of the output operand.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful,
- * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is provided an
- * insufficient buffer at execution time, ANEURALNETWORKS_BAD_DATA if the index
- * is invalid.
- *
- * Available since API level 29.
- */
-inline int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution *execution,
-                                                         int32_t index, uint32_t *rank)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_getOutputOperandRank);
-  EXECUTE_FUNCTION_RETURN(execution, index, rank);
-}
-
-/**
- * Get the dimensional information of the specified output operand of the model
- * of the
- * {@link ANeuralNetworksExecution}. The target output operand cannot be a
- * scalar.
- *
- * On asynchronous execution initiated by
- * {@link ANeuralNetworksExecution_startCompute},
- * {@link ANeuralNetworksEvent_wait} must be called prior to this function to
- * recuperate the resources used by the execution.
- *
- * @param execution The execution to be queried.
- * @param index The index of the output argument we are querying. It is an index
- *              into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with
- *              {@link ANeuralNetworksModel_addOperand}.
- * @param dimensions The dimension array to be filled. The size of the array
- *                   must be exactly as large as the rank of the output operand
- *                   to be queried in the model.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful,
- * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is provided an
- * insufficient buffer at execution time, ANEURALNETWORKS_BAD_DATA if the index
- * is invalid or if the target is a scalar.
- *
- * Available since API level 29.
- */
-inline int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution *execution,
-                                                               int32_t index, uint32_t *dimensions)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_getOutputOperandDimensions);
-  EXECUTE_FUNCTION_RETURN(execution, index, dimensions);
-}
-
-/**
- * Create a {@link ANeuralNetworksBurst} to apply the given compilation.
- * This only creates the burst object. Computation is only performed once
- * {@link ANeuralNetworksExecution_burstCompute} is invoked with a valid
- * {@link ANeuralNetworksExecution} and {@link ANeuralNetworksBurst}.
- *
- * <p>The provided compilation must outlive the burst object.</p>
- *
- * Available since API level 29.
- *
- * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
- * @param burst The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- *         if the compilation is invalid.
- */
-inline int ANeuralNetworksBurst_create(ANeuralNetworksCompilation *compilation,
-                                       ANeuralNetworksBurst **burst)
-{
-  LOAD_FUNCTION(ANeuralNetworksBurst_create);
-  EXECUTE_FUNCTION_RETURN(compilation, burst);
-}
-
-/**
- * Destroys the burst object.
- *
- * Available since API level 29.
- *
- * @param burst The burst object to be destroyed. Passing NULL is acceptable and
- *              results in no operation.
- */
-inline void ANeuralNetworksBurst_free(ANeuralNetworksBurst *burst)
-{
-  LOAD_FUNCTION(ANeuralNetworksBurst_free);
-  EXECUTE_FUNCTION(burst);
-}
-
-/**
- * Schedule synchronous evaluation of the execution on a burst object.
- *
- * <p>Schedules synchronous evaluation of the execution. Returns once the
- * execution has completed and the outputs are ready to be consumed.</p>
- *
- * <p>There must be at most one {@link ANeuralNetworksExecution} processing at
- * any given time for any given burst object. Any
- * {@link ANeuralNetworksExecution} launched before the previous has finished
- * will result in ANEURALNETWORKS_BAD_STATE.</p>
- *
- * Available since API level 29.
- *
- * @param burst The burst object to execute on.
- * @param execution The execution to be scheduled and executed. The execution
- *                  must be created from the same {@link
- *                  ANeuralNetworksCompilation} as the burst object.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
- */
-inline int ANeuralNetworksExecution_burstCompute(ANeuralNetworksExecution *execution,
-                                                 ANeuralNetworksBurst *burst)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_burstCompute);
-  EXECUTE_FUNCTION_RETURN(execution, burst);
-}
-
-/**
- * Creates a shared memory object from an AHardwareBuffer handle.
- *
- * If the shared memory is backed by an AHardwareBuffer of
- * AHARDWAREBUFFER_FORMAT_BLOB format, it can be used the same way as shared
- * memory created from a file handle. See
- * {@link ANeuralNetworksMemory} for a description on how to use this shared
- * memory.
- *
- * If the shared memory is backed by an AHardwareBuffer of a format other than
- * AHARDWAREBUFFER_FORMAT_BLOB, it can only be used for Model inputs and
- * outputs. When calling {@link ANeuralNetworksExecution_setInputFromMemory} or
- * {@link ANeuralNetworksExecution_setOutputFromMemory} with the shared memory,
- * both offset and length must be set to zero and the entire memory region will
- * be associated with the specified input or output operand. There is no
- * guarantee that an arbitrary AHardwareBuffer_Format and
- * AHardwareBuffer_UsageFlags combination can be used by arbitrary devices. The
- * execution will fail if selected set of devices cannot consume the buffer.
- *
- * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with shared
- * memory backed by an AHardwareBuffer of a format other than
- * AHARDWAREBUFFER_FORMAT_BLOB is disallowed.
- *
- * TODO(miaowang): add documentation about intended usage with introspection
- * API.
- *
- * Available since API level 29.
- *
- * @param ahwb The AHardwareBuffer handle.
- * @param memory The memory object to be created.
- *               Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
- *
- * @see AHardwareBuffer
- */
-inline int ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer *ahwb,
-                                                           ANeuralNetworksMemory **memory)
-{
-  LOAD_FUNCTION(ANeuralNetworksMemory_createFromAHardwareBuffer);
-  EXECUTE_FUNCTION_RETURN(ahwb, memory);
-}
-
-/**
- * Specifies whether duration of the {@link ANeuralNetworksExecution} is to be
- * measured. By default, duration is not measured.
- *
- * The {@link ANeuralNetworksExecution} must have been created with
- * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * Available since API level 29.
- *
- * @param execution The execution to be modified.
- * @param measure 'true' if duration is to be measured, 'false' if not.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksExecution_setMeasureTiming(ANeuralNetworksExecution *execution,
-                                                     bool measure)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_setMeasureTiming);
-  EXECUTE_FUNCTION_RETURN(execution, measure);
-}
-
-/**
- * Get the time spent in the specified {@link ANeuralNetworksExecution}, in
- * nanoseconds. The execution must have completed.
- *
- * @param execution The execution to be queried.
- * @param durationCode The measurement to be queried, specified by {@link
- * DurationCode}.
- * @param duration The returned duration. If no measurement was requested by
- *                 {@link ANeuralNetworksExecution_setMeasureTiming}, or for
- * some other reason the duration is not available, UINT64_MAX will be returned.
- *                 A particular device need not support any given measurement.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksExecution_getDuration(const ANeuralNetworksExecution *execution,
-                                                int32_t durationCode, uint64_t *duration)
-{
-  LOAD_FUNCTION(ANeuralNetworksExecution_getDuration);
-  EXECUTE_FUNCTION_RETURN(execution, durationCode, duration);
-}
-
-/**/
-
-#endif // __NEURAL_NETWORKS_SHIM_H__
diff --git a/runtime/libs/nnapi/v1.2/include/NeuralNetworksTypes.h b/runtime/libs/nnapi/v1.2/include/NeuralNetworksTypes.h
deleted file mode 100644
index d74402749..000000000
--- a/runtime/libs/nnapi/v1.2/include/NeuralNetworksTypes.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// NOTE This header is derived from part of the following file
-// https://github.com/tensorflow/tensorflow/blob/a59ad83d06abd38b5e142c41043db8886a92fca8/tensorflow/lite/nnapi/NeuralNetworksTypes.h
-
-#ifndef __NEURAL_NETWORKS_TYPES_H__
-#define __NEURAL_NETWORKS_TYPES_H__
-
-#include "NeuralNetworks.h"
-
-// NN api types based on NNAPI header file
-// https://developer.android.com/ndk/reference/group/neural-networks
-
-// nn api function types
-
-typedef int (*ANeuralNetworksMemory_createFromFd_fn)(size_t size, int protect, int fd,
-                                                     size_t offset, ANeuralNetworksMemory **memory);
-
-typedef void (*ANeuralNetworksMemory_free_fn)(ANeuralNetworksMemory *memory);
-
-typedef int (*ANeuralNetworksModel_create_fn)(ANeuralNetworksModel **model);
-
-typedef int (*ANeuralNetworksModel_finish_fn)(ANeuralNetworksModel *model);
-
-typedef void (*ANeuralNetworksModel_free_fn)(ANeuralNetworksModel *model);
-
-typedef int (*ANeuralNetworksCompilation_create_fn)(ANeuralNetworksModel *model,
-                                                    ANeuralNetworksCompilation **compilation);
-
-typedef void (*ANeuralNetworksCompilation_free_fn)(ANeuralNetworksCompilation *compilation);
-
-typedef int (*ANeuralNetworksCompilation_setPreference_fn)(ANeuralNetworksCompilation *compilation,
-                                                           int32_t preference);
-
-typedef int (*ANeuralNetworksCompilation_finish_fn)(ANeuralNetworksCompilation *compilation);
-
-typedef int (*ANeuralNetworksModel_addOperand_fn)(ANeuralNetworksModel *model,
-                                                  const ANeuralNetworksOperandType *type);
-
-typedef int (*ANeuralNetworksModel_setOperandValue_fn)(ANeuralNetworksModel *model, int32_t index,
-                                                       const void *buffer, size_t length);
-
-typedef int (*ANeuralNetworksModel_setOperandSymmPerChannelQuantParams_fn)(
-    ANeuralNetworksModel *model, int32_t index,
-    const ANeuralNetworksSymmPerChannelQuantParams *channelQuant);
-
-typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
-    ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
-    size_t length);
-
-typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model,
-                                                    ANeuralNetworksOperationType type,
-                                                    uint32_t inputCount, const uint32_t *inputs,
-                                                    uint32_t outputCount, const uint32_t *outputs);
-
-typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)(ANeuralNetworksModel *model,
-                                                                uint32_t inputCount,
-                                                                const uint32_t *inputs,
-                                                                uint32_t outputCount,
-                                                                const uint32_t *outputs);
-
-typedef int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16_fn)(ANeuralNetworksModel *model,
-                                                                        bool allow);
-
-typedef int (*ANeuralNetworksExecution_create_fn)(ANeuralNetworksCompilation *compilation,
-                                                  ANeuralNetworksExecution **execution);
-
-typedef void (*ANeuralNetworksExecution_free_fn)(ANeuralNetworksExecution *execution);
-
-typedef int (*ANeuralNetworksExecution_setInput_fn)(ANeuralNetworksExecution *execution,
-                                                    int32_t index,
-                                                    const ANeuralNetworksOperandType *type,
-                                                    const void *buffer, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
-    ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
-    const ANeuralNetworksMemory *memory, size_t offset, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution,
-                                                     int32_t index,
-                                                     const ANeuralNetworksOperandType *type,
-                                                     void *buffer, size_t length);
-
-typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
-    ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
-    const ANeuralNetworksMemory *memory, size_t offset, size_t length);
-
-typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution,
-                                                        ANeuralNetworksEvent **event);
-
-typedef int (*ANeuralNetworksEvent_wait_fn)(ANeuralNetworksEvent *event);
-
-typedef void (*ANeuralNetworksEvent_free_fn)(ANeuralNetworksEvent *event);
-
-typedef int (*ASharedMemory_create_fn)(const char *name, size_t size);
-
-typedef int (*ANeuralNetworks_getDeviceCount_fn)(uint32_t *numDevices);
-
-typedef int (*ANeuralNetworks_getDevice_fn)(uint32_t devIndex, ANeuralNetworksDevice **device);
-
-typedef int (*ANeuralNetworksDevice_getName_fn)(const ANeuralNetworksDevice *device,
-                                                const char **name);
-
-typedef int (*ANeuralNetworksDevice_getType_fn)(const ANeuralNetworksDevice *device, int32_t *type);
-
-typedef int (*ANeuralNetworksDevice_getVersion_fn)(const ANeuralNetworksDevice *device,
-                                                   const char **version);
-
-typedef int (*ANeuralNetworksDevice_getFeatureLevel_fn)(const ANeuralNetworksDevice *device,
-                                                        int64_t *featureLevel);
-
-typedef int (*ANeuralNetworksModel_getSupportedOperationsForDevices_fn)(
-    const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices,
-    uint32_t numDevices, bool *supportedOps);
-
-typedef int (*ANeuralNetworksCompilation_createForDevices_fn)(
-    ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices,
-    ANeuralNetworksCompilation **compilation);
-
-typedef int (*ANeuralNetworksCompilation_setCaching_fn)(ANeuralNetworksCompilation *compilation,
-                                                        const char *cacheDir, const uint8_t *token);
-
-typedef int (*ANeuralNetworksExecution_compute_fn)(ANeuralNetworksExecution *execution);
-
-typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(ANeuralNetworksExecution *execution,
-                                                                int32_t index, uint32_t *rank);
-
-typedef int (*ANeuralNetworksExecution_getOutputOperandDimensions_fn)(
-    ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions);
-
-typedef int (*ANeuralNetworksBurst_create_fn)(ANeuralNetworksCompilation *compilation,
-                                              ANeuralNetworksBurst **burst);
-
-typedef void (*ANeuralNetworksBurst_free_fn)(ANeuralNetworksBurst *burst);
-
-typedef int (*ANeuralNetworksExecution_burstCompute_fn)(ANeuralNetworksExecution *execution,
-                                                        ANeuralNetworksBurst *burst);
-
-typedef int (*ANeuralNetworksMemory_createFromAHardwareBuffer_fn)(const AHardwareBuffer *ahwb,
-                                                                  ANeuralNetworksMemory **memory);
-
-typedef int (*ANeuralNetworksExecution_setMeasureTiming_fn)(ANeuralNetworksExecution *execution,
-                                                            bool measure);
-
-typedef int (*ANeuralNetworksExecution_getDuration_fn)(const ANeuralNetworksExecution *execution,
-                                                       int32_t durationCode, uint64_t *duration);
-
-#endif // __NEURAL_NETWORKS_TYPES_H__
diff --git a/runtime/libs/profiling/CMakeLists.txt b/runtime/libs/profiling/CMakeLists.txt
index e0398ce93..b115cc1c6 100644
--- a/runtime/libs/profiling/CMakeLists.txt
+++ b/runtime/libs/profiling/CMakeLists.txt
@@ -4,4 +4,3 @@ add_library(nnfw_lib_profiling STATIC ${SOURCES})
 set_property(TARGET nnfw_lib_profiling PROPERTY POSITION_INDEPENDENT_CODE ON)
 target_include_directories(nnfw_lib_profiling PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 target_link_libraries(nnfw_lib_profiling PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_profiling PRIVATE nnfw_coverage)
diff --git a/runtime/libs/profiling/src/profiling/time.cpp b/runtime/libs/profiling/src/profiling/time.cpp
index 4e045556e..5f6f6657e 100644
--- a/runtime/libs/profiling/src/profiling/time.cpp
+++ b/runtime/libs/profiling/src/profiling/time.cpp
@@ -25,7 +25,7 @@
 #if defined(_MSC_VER)
 #include <chrono>  // NOLINT(build/c++11)
 #else
-#include <sys/time.h>
+#include <time.h>
 #endif
 
 namespace tflite {
@@ -43,9 +43,9 @@ uint64_t NowMicros() {
 #else
 
 uint64_t NowMicros() {
-  struct timeval tv;
-  gettimeofday(&tv, nullptr);
-  return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return static_cast<uint64_t>(ts.tv_nsec) / 1e3 + static_cast<uint64_t>(ts.tv_sec) * 1e6;
 }
 
 #endif  // defined(_MSC_VER)
diff --git a/runtime/libs/rua/anchor/CMakeLists.txt b/runtime/libs/rua/anchor/CMakeLists.txt
index 6e65641f4..fb41c47ea 100644
--- a/runtime/libs/rua/anchor/CMakeLists.txt
+++ b/runtime/libs/rua/anchor/CMakeLists.txt
@@ -6,4 +6,3 @@ target_include_directories(nnfw_lib_rua_anchor PUBLIC include)
 target_link_libraries(nnfw_lib_rua_anchor PUBLIC nnfw_lib_rua_core)
 target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_lib_rua_dyn)
 target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_rua_anchor PRIVATE nnfw_coverage)
diff --git a/runtime/libs/rua/dyn/CMakeLists.txt b/runtime/libs/rua/dyn/CMakeLists.txt
index 3f9ac8928..01d8a7c02 100644
--- a/runtime/libs/rua/dyn/CMakeLists.txt
+++ b/runtime/libs/rua/dyn/CMakeLists.txt
@@ -5,4 +5,3 @@ set_target_properties(nnfw_lib_rua_dyn PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(nnfw_lib_rua_dyn PUBLIC include)
 target_link_libraries(nnfw_lib_rua_dyn PUBLIC nnfw_lib_rua_core)
 target_link_libraries(nnfw_lib_rua_dyn PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_rua_dyn PRIVATE nnfw_coverage)
diff --git a/runtime/libs/rua/dyn/src/DynamicBinder.cpp b/runtime/libs/rua/dyn/src/DynamicBinder.cpp
index fa3f0bb1e..f49892de1 100644
--- a/runtime/libs/rua/dyn/src/DynamicBinder.cpp
+++ b/runtime/libs/rua/dyn/src/DynamicBinder.cpp
@@ -97,8 +97,8 @@ typedef int (*ANeuralNetworksModel_setOperandValue_fn)(ANeuralNetworksModel *mod
                                                        const void *buffer, size_t length);
 
 typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
-    ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
-    size_t length);
+  ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
+  size_t length);
 
 typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model,
                                                     ANeuralNetworksOperationType type,
@@ -242,8 +242,8 @@ typedef int (*ANeuralNetworksExecution_setInput_fn)(ANeuralNetworksExecution *ex
                                                     const void *buffer, size_t length);
 
 typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
-    ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
-    const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+  ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+  const ANeuralNetworksMemory *memory, size_t offset, size_t length);
 
 typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution,
                                                      int32_t index,
@@ -251,8 +251,8 @@ typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *e
                                                      void *buffer, size_t length);
 
 typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
-    ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
-    const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+  ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+  const ANeuralNetworksMemory *memory, size_t offset, size_t length);
 
 typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution,
                                                         ANeuralNetworksEvent **event);
diff --git a/runtime/libs/tflite/CMakeLists.txt b/runtime/libs/tflite/CMakeLists.txt
index 93a3c9789..3c5779099 100644
--- a/runtime/libs/tflite/CMakeLists.txt
+++ b/runtime/libs/tflite/CMakeLists.txt
@@ -1,11 +1,9 @@
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 QUIET)
+nnfw_find_package(TensorFlowLite EXACT 2.8.0 QUIET)
 if(NOT TensorFlowLite_FOUND)
   message(STATUS "Check tensorflow lite library extension build: need tensorflow lite library")
   return()
 endif(NOT TensorFlowLite_FOUND)
 
-add_subdirectory(port)
-
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
@@ -13,11 +11,10 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 add_library(nnfw_lib_tflite STATIC ${SOURCES})
 set_target_properties(nnfw_lib_tflite PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(nnfw_lib_tflite PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(nnfw_lib_tflite PUBLIC tensorflow-lite-ex)
+target_link_libraries(nnfw_lib_tflite PUBLIC tensorflow-lite-2.8.0)
 target_link_libraries(nnfw_lib_tflite PUBLIC nnfw_lib_misc)
 target_link_libraries(nnfw_lib_tflite PRIVATE ${LIB_PTHREAD} dl)
 target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_common)
-target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_coverage)
 
 if(NOT ENABLE_TEST)
   return()
diff --git a/runtime/libs/tflite/include/tflite/Diff.h b/runtime/libs/tflite/include/tflite/Diff.h
index fdc1a310b..2d30d4135 100644
--- a/runtime/libs/tflite/include/tflite/Diff.h
+++ b/runtime/libs/tflite/include/tflite/Diff.h
@@ -23,7 +23,7 @@
 #ifndef __NNFW_TFLITE_DIFF_H__
 #define __NNFW_TFLITE_DIFF_H__
 
-#include "tensorflow/lite/interpreter.h"
+#include "tflite/TensorView.h"
 
 #include "misc/RandomGenerator.h"
 #include "misc/tensor/Index.h"
@@ -31,7 +31,7 @@
 #include "misc/tensor/Shape.h"
 #include "misc/tensor/Comparator.h"
 
-#include "tflite/TensorView.h"
+#include <tensorflow/lite/c/c_api.h>
 
 #include <functional>
 #include <vector>
@@ -47,7 +47,7 @@ public:
    * @param[in] comparator   Comparator object for tensor comparation
    */
   TfLiteInterpMatchApp(const nnfw::misc::tensor::Comparator &comparator)
-      : _verbose{false}, _comparator(comparator)
+    : _verbose{false}, _comparator(comparator)
   {
     // DO NOTHING
   }
@@ -65,11 +65,11 @@ private:
 public:
   /**
    * @brief Run two interpreter and return the output matching
-   * @param[in] pure   Interpreter object of expected(with TfLite)
-   * @param[in] nnapi  Interpreter object of obtained(through NNAPI)
+   * @param[in] expected    Interpreter object of expected
+   * @param[in] obtained  Interpreter object of obtained
    * @return  @c true if two Interpreter results are same, otherwise @c false
    */
-  bool run(::tflite::Interpreter &pure, ::tflite::Interpreter &nnapi) const;
+  bool run(TfLiteInterpreter &expected, TfLiteInterpreter &obtained) const;
   /**
    * @brief Compare two TensorView values and return the match result
    * @param[in] expected  TensorView object to read expected values
diff --git a/runtime/libs/tflite/include/tflite/FeatureView.h b/runtime/libs/tflite/include/tflite/FeatureView.h
deleted file mode 100644
index a8f069c40..000000000
--- a/runtime/libs/tflite/include/tflite/FeatureView.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     FeatureView.h
- * @brief    This file contains FeatureView class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_FEATURE_VIEW_H__
-#define __NNFW_TFLITE_FEATURE_VIEW_H__
-
-#include "tensorflow/lite/interpreter.h"
-
-#include "tflite/InputIndex.h"
-#include "tflite/OutputIndex.h"
-
-#include "misc/feature/Shape.h"
-#include "misc/feature/Reader.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-template <typename T> class FeatureView;
-
-/**
- * @brief Class to support reading element of float type feature
- */
-template <> class FeatureView<float> : public nnfw::misc::feature::Reader<float>
-{
-public:
-  /**
-   * @brief     Construct a new FeatureView object
-   * @param[in] interp  Interpreter to read from
-   * @param[in] index   InputIndex index of input
-   */
-  FeatureView(::tflite::Interpreter &interp, const InputIndex &index);
-  /**
-   * @brief     Construct a new FeatureView object
-   * @param[in] interp  Interpreter to read from
-   * @param[in] index   OutputIndex index of output
-   */
-  FeatureView(::tflite::Interpreter &interp, const OutputIndex &index);
-
-public:
-  /**
-   * @brief     Get value of element using channel, row and column index
-   * @param[in] ch    Channel index
-   * @param[in] row   Row index
-   * @param[in] col   Column index
-   * @return    Value of element
-   */
-  float at(uint32_t ch, uint32_t row, uint32_t col) const;
-  /**
-   * @brief     Get reference of element using channel, row and column index
-   * @param[in] ch  Channel index
-   * @param[in] row Row index
-   * @param[in] col Column index
-   * @return    Reference of element
-   */
-  float &at(uint32_t ch, uint32_t row, uint32_t col);
-
-  float at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const = 0;
-
-private:
-  /**
-   * @brief     Get offset of element from channel, row and column index
-   * @param[in] ch  Channel index
-   * @param[in] row Row index
-   * @param[in] col Column index
-   * @return    Offset of element
-   */
-  uint32_t getElementOffset(uint32_t ch, uint32_t row, uint32_t col) const
-  {
-    uint32_t res = 0;
-
-    // TensorFlow Lite assumes that NHWC ordering for tessor
-    res += row * _shape.W * _shape.C;
-    res += col * _shape.C;
-    res += ch;
-
-    return res;
-  }
-
-private:
-  nnfw::misc::feature::Shape _shape;
-  float *_base;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_FEATURE_VIEW_H__
diff --git a/runtime/libs/tflite/include/tflite/InterpreterSession.h b/runtime/libs/tflite/include/tflite/InterpreterSession.h
index deaf05a7f..8fc19494a 100644
--- a/runtime/libs/tflite/include/tflite/InterpreterSession.h
+++ b/runtime/libs/tflite/include/tflite/InterpreterSession.h
@@ -40,7 +40,7 @@ public:
    * @brief Construct a InterpreterSession object with interpreter of TfLite
    * @param[in] interp The TfLite interpreter pointer
    */
-  InterpreterSession(::tflite::Interpreter *interp) : _interp{interp}
+  InterpreterSession(TfLiteInterpreter *interp) : _interp{interp}
   {
     // DO NOTHING
   }
@@ -50,7 +50,7 @@ public:
    * @brief Get TfLite interpreter pointer
    * @return The TfLite interpreter
    */
-  ::tflite::Interpreter *interp(void) override { return _interp; }
+  TfLiteInterpreter *interp(void) override { return _interp; }
 
 public:
   /**
@@ -59,9 +59,7 @@ public:
    */
   bool prepare(void) override
   {
-    _interp->UseNNAPI(false);
-
-    if (kTfLiteOk != _interp->AllocateTensors())
+    if (kTfLiteOk != TfLiteInterpreterAllocateTensors(_interp))
     {
       return false;
     }
@@ -76,7 +74,7 @@ public:
   bool run(void) override
   {
     // Return true if Invoke returns kTfLiteOk
-    return kTfLiteOk == _interp->Invoke();
+    return kTfLiteOk == TfLiteInterpreterInvoke(_interp);
   }
 
   /**
@@ -90,7 +88,7 @@ public:
   }
 
 private:
-  ::tflite::Interpreter *const _interp;
+  TfLiteInterpreter *const _interp;
 };
 
 } // namespace tflite
diff --git a/runtime/libs/tflite/include/tflite/NNAPISession.h b/runtime/libs/tflite/include/tflite/NNAPISession.h
deleted file mode 100644
index f430e86d3..000000000
--- a/runtime/libs/tflite/include/tflite/NNAPISession.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     NNAPISession.h
- * @brief    This file contains NNAPISession class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_NNAPI_SESSION_H__
-#define __NNFW_TFLITE_NNAPI_SESSION_H__
-
-#include "Session.h"
-#include "tflite/ext/nnapi_delegate.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define NNAPI interpreter session which is inherited from Session class
- */
-class NNAPISession final : public Session
-{
-public:
-  /**
-   * @brief Construct a NNAPISession object with interpreter of TfLite
-   * @param[in] interp The TfLite interpreter pointer
-   * @note Invoke BuildGraph() of NNAPI delegate from Interpreter
-   */
-  NNAPISession(::tflite::Interpreter *interp) : _interp{interp}
-  {
-    // Construct Graph from Interpreter
-    // primary_subgraph: Experimental interface. Return 1st sugbraph
-    _delegate.BuildGraph(&interp->primary_subgraph());
-  }
-
-public:
-  /**
-   * @brief Get TfLite interpreter pointer
-   * @return The TfLite interpreter
-   */
-  ::tflite::Interpreter *interp(void) override { return _interp; }
-
-public:
-  /**
-   * @brief Prepare the TfLite interpreter session
-   * @return @c true if tensor preparation is successful, otherwise @c false
-   */
-  bool prepare(void) override
-  {
-    // Explicitly turn off T/F lite internal NNAPI delegation in order to use locally defined
-    // NNAPI delegation.
-    _interp->UseNNAPI(false);
-
-    if (kTfLiteOk != _interp->AllocateTensors())
-    {
-      return false;
-    }
-
-    return true;
-  }
-
-  /**
-   * @brief Run the Invoke function of NNAPI delegate
-   * @return @c true if Invoke() is successful, otherwise @c false
-   */
-  bool run(void) override { return kTfLiteOk == _delegate.Invoke(&_interp->primary_subgraph()); }
-
-  /**
-   * @brief Tear down TfLite interpreter session
-   * @return @c true always
-   */
-  bool teardown(void) override
-  {
-    // DO NOTHING
-    return true;
-  }
-
-private:
-  ::tflite::Interpreter *const _interp;
-  nnfw::tflite::NNAPIDelegate _delegate;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_NNAPI_SESSION_H__
diff --git a/runtime/libs/tflite/include/tflite/OutputIndex.h b/runtime/libs/tflite/include/tflite/OutputIndex.h
deleted file mode 100644
index dd1ca8d44..000000000
--- a/runtime/libs/tflite/include/tflite/OutputIndex.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     OutputIndex.h
- * @brief    This file contains OutputIndex class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_OUTPUT_INDEX_H__
-#define __NNFW_TFLITE_OUTPUT_INDEX_H__
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define OutputIndex
- */
-class OutputIndex
-{
-public:
-  /**
-   * @brief Construct a OutputIndex object with index value
-   * @param[in] index The value of index
-   */
-  OutputIndex(int index) : _index(index)
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Get index value as int
-   * @return Index valuel as int
-   */
-  int asInt(void) const { return _index; }
-
-private:
-  int _index;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_OUTPUT_INDEX_H__
diff --git a/runtime/libs/tflite/include/tflite/Quantization.h b/runtime/libs/tflite/include/tflite/Quantization.h
deleted file mode 100644
index 8272bcdc0..000000000
--- a/runtime/libs/tflite/include/tflite/Quantization.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     Quantization.h
- * @brief    This file contains BitwiseIntToFloat union and quantization related
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_QUANTIZATION_H__
-#define __NNFW_TFLITE_QUANTIZATION_H__
-
-/**
- * @brief Union to provide bitwise conversion of integer and float
- */
-union BitwiseIntToFloat {
-  int i;
-  float f;
-};
-
-static const float FLOAT_NEAREST_TO_1 = BitwiseIntToFloat{0x3f7fffff}.f;
-
-#include "tensorflow/lite/context.h"
-
-/**
- * @brief   Get TfLiteQuantizationParams object with default values
- * @return  TfLiteQuantizationParams object
- */
-TfLiteQuantizationParams make_default_quantization(void);
-
-#endif // __NNFW_TFLITE_QUANTIZATION_H__
diff --git a/runtime/libs/tflite/include/tflite/RandomInputInitializer.h b/runtime/libs/tflite/include/tflite/RandomInputInitializer.h
new file mode 100644
index 000000000..7dac3a827
--- /dev/null
+++ b/runtime/libs/tflite/include/tflite/RandomInputInitializer.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
+#define __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
+
+#include <misc/RandomGenerator.h>
+
+#include <tensorflow/lite/c/c_api.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+
+class RandomInputInitializer
+{
+public:
+  RandomInputInitializer(misc::RandomGenerator &randgen) : _randgen{randgen}
+  {
+    // DO NOTHING
+  }
+
+  void run(TfLiteInterpreter &interp);
+
+private:
+  nnfw::misc::RandomGenerator &_randgen;
+};
+
+} // namespace tflite
+} // namespace nnfw
+
+#endif // __NNFW_TFLITE_RANDOM_INPUT_INITIALIZER_H__
diff --git a/runtime/libs/tflite/include/tflite/RandomTestRunner.h b/runtime/libs/tflite/include/tflite/RandomTestRunner.h
deleted file mode 100644
index c0b304c74..000000000
--- a/runtime/libs/tflite/include/tflite/RandomTestRunner.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  RandomTestRunner.h
- * @brief This file contains class for random input testing
- */
-
-#ifndef __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
-#define __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
-
-#include "tflite/interp/Builder.h"
-
-#include <misc/RandomGenerator.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Structure for NNAPI correctness test
- */
-struct RandomTestParam
-{
-  int verbose;               //!< Verbosity of debug information
-  int tolerance;             //!< Torlerance of value difference
-  int tensor_logging = 0;    //!< Save logging to a file if not 0
-  std::string log_path = ""; //!< Path of log file, meaningful only when tensor_logging is 1
-};
-
-/**
- * @brief Class to define Random test runner
- */
-class RandomTestRunner
-{
-public:
-  /**
-   * @brief     Construct a new RandomTestRunner object
-   * @param[in] seed          Random seed value
-   * @param[in] param         RandomTestParam object for test runner
-   * @param[in] quantization  TfLiteQuantizationParams type to represent quantization value
-   */
-  RandomTestRunner(uint32_t seed, const RandomTestParam &param)
-      : _randgen{seed, 0.0f, 2.0f}, _param{param}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief     Run the random test runner
-   * @param[in] running_count  Count to run tflite interpreter with NNAPI
-   * @return    0 if test succeeds, otherwise failure
-   */
-  int run(size_t running_count);
-
-public:
-  /**
-   * @brief  Get RandomGenerator reference
-   * @return RandomGenerator reference
-   */
-  nnfw::misc::RandomGenerator &generator() { return _randgen; };
-
-public:
-  /**
-   * @brief     Compile the random test runner
-   * @param[in] builder  Interpreter Builder used to run
-   */
-  void compile(const nnfw::tflite::Builder &builder);
-
-private:
-  nnfw::misc::RandomGenerator _randgen;
-  const RandomTestParam _param;
-  std::unique_ptr<::tflite::Interpreter> _tfl_interp;
-  std::unique_ptr<::tflite::Interpreter> _nnapi;
-
-public:
-  /**
-   * @brief     Create a RandomTestRunner object
-   * @param[in] seed  Random seed value
-   * @return    RandomGenerator object
-   */
-  static RandomTestRunner make(uint32_t seed);
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
diff --git a/runtime/libs/tflite/include/tflite/Session.h b/runtime/libs/tflite/include/tflite/Session.h
index b653acf61..0aa2ce7fb 100644
--- a/runtime/libs/tflite/include/tflite/Session.h
+++ b/runtime/libs/tflite/include/tflite/Session.h
@@ -23,7 +23,7 @@
 #ifndef __NNFW_TFLITE_SESSION_H__
 #define __NNFW_TFLITE_SESSION_H__
 
-#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/c/c_api.h>
 
 namespace nnfw
 {
@@ -44,7 +44,7 @@ struct Session
    * @brief   Get the Interpreter object pointer
    * @return  The Interpreter object pointer
    */
-  virtual ::tflite::Interpreter *interp(void) = 0;
+  virtual TfLiteInterpreter *interp(void) = 0;
 
   /**
    * @brief   Prepare the session
diff --git a/runtime/libs/tflite/include/tflite/TensorLogger.h b/runtime/libs/tflite/include/tflite/TensorLogger.h
deleted file mode 100644
index a824c3411..000000000
--- a/runtime/libs/tflite/include/tflite/TensorLogger.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     TensorLogger.h
- * @brief    This file contains TensorLogger class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_LOGGER_H__
-#define __NNFW_TFLITE_TENSOR_LOGGER_H__
-
-#include "misc/tensor/IndexIterator.h"
-#include "tflite/TensorView.h"
-
-#include <tensorflow/lite/interpreter.h>
-#include <tensorflow/lite/context.h>
-#include <fstream>
-#include <iomanip>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to write input and output value / shape into a file in python form
- * @note This is a utility to write input and output value / shape into a file in python form.\n
- *       any python app can load this value by running the python code below:\n
- *       exec(open(filename).read())\n
- *       generated python code looks like the following: \n
- *       tensor_shape_gen = []\n
- *       tensor_value_gen = []\n\n
- *       tensor_shape_gen.append("{2, 1, 2}")\n
- *       tensor_value_gen.append([1, 2, 3, 4])\n\n
- *       tensor_shape_gen.append("{2}")\n
- *       tensor_value_gen.append([1, 2])\n\n
- *       tensor_shape_gen.append("{2, 1, 2}")\n
- *       tensor_value_gen.append([1, 4, 3, 8])\n
- */
-class TensorLogger
-{
-private:
-  std::ofstream _outfile;
-
-public:
-  /**
-   * @brief Get TensorLogger instance
-   * @return The TensorLogger instance
-   */
-  static TensorLogger &get()
-  {
-    static TensorLogger instance;
-    return instance;
-  }
-
-  /**
-   * @brief Save the tensor details to file from interpreter
-   * @param[in] path The file path to save
-   * @param[in] interp The TfLite interpreter
-   */
-  void save(const std::string &path, ::tflite::Interpreter &interp)
-  {
-    open(path);
-
-    int log_index = 0;
-    for (const auto id : interp.inputs())
-    {
-      _outfile << "# input tensors" << std::endl;
-      printTensor(interp, id, log_index++);
-    }
-    for (const auto id : interp.outputs())
-    {
-      _outfile << "# output tensors" << std::endl;
-      printTensor(interp, id, log_index++);
-    }
-    close();
-  }
-
-private:
-  void open(const std::string &path)
-  {
-    if (!_outfile.is_open())
-      _outfile.open(path, std::ios_base::out);
-
-    _outfile << "# ------ file: " << path << " ------" << std::endl
-             << "tensor_shape_gen = []" << std::endl
-             << "tensor_value_gen = []" << std::endl
-             << std::endl;
-  }
-
-  void printTensor(::tflite::Interpreter &interp, const int id, const int log_index)
-  {
-    const TfLiteTensor *tensor = interp.tensor(id);
-
-    _outfile << "# tensor name: " << tensor->name << std::endl;
-    _outfile << "# tflite::interpreter.tensor(" << id << ") -> "
-                                                         "tensor_value_gen["
-             << log_index << "]" << std::endl;
-
-    if (tensor->type == kTfLiteInt32)
-    {
-      printTensorShape(tensor);
-      printTensorValue<int32_t>(tensor, tensor->data.i32);
-    }
-    else if (interp.tensor(id)->type == kTfLiteUInt8)
-    {
-      printTensorShape(tensor);
-      printTensorValue<uint8_t>(tensor, tensor->data.uint8);
-    }
-    else if (tensor->type == kTfLiteFloat32)
-    {
-      printTensorShape(tensor);
-      printTensorValue<float>(tensor, tensor->data.f);
-    }
-  }
-
-  void printTensorShape(const TfLiteTensor *tensor)
-  {
-    _outfile << "tensor_shape_gen.append('{";
-
-    int r = 0;
-    for (; r < tensor->dims->size - 1; r++)
-    {
-      _outfile << tensor->dims->data[r] << ", ";
-    }
-    _outfile << tensor->dims->data[r];
-
-    _outfile << "}')" << std::endl;
-  }
-
-  template <typename T> void printTensorValue(const TfLiteTensor *tensor, T *tensor_data_ptr)
-  {
-    _outfile << "tensor_value_gen.append([";
-
-    _outfile << std::fixed << std::setprecision(10);
-
-    const T *end = reinterpret_cast<const T *>(tensor->data.raw_const + tensor->bytes);
-    for (T *ptr = tensor_data_ptr; ptr < end; ptr++)
-      _outfile << *ptr << ", ";
-
-    _outfile << "])" << std::endl << std::endl;
-  }
-
-  void close()
-  {
-    _outfile << "# --------- tensor shape and value defined above ---------" << std::endl;
-    _outfile.close();
-  }
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_LOGGER_H__
diff --git a/runtime/libs/tflite/include/tflite/TensorShapeUtils.h b/runtime/libs/tflite/include/tflite/TensorShapeUtils.h
deleted file mode 100644
index ba8687413..000000000
--- a/runtime/libs/tflite/include/tflite/TensorShapeUtils.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     TensorShapeUtils.h
- * @brief    This file contains utilities function of tensor shape
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
-#define __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
-
-#include "misc/tensor/Shape.h"
-
-#include <vector>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Converts tensor::Shape into a vector
- * @param[in] shape The tensor shape to be converted
- * @return vector value of given shape object
- */
-static inline std::vector<int32_t> as_dims(const nnfw::misc::tensor::Shape &shape)
-{
-  std::vector<int32_t> dims;
-
-  for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-  {
-    dims.emplace_back(shape.dim(axis));
-  }
-
-  return dims;
-}
-
-/**
- * @brief Broadcasts between two given shapes
- * @param[in] lhs_shape The left hand side shape
- * @param[in] rhs_shape The right hand side shape
- * @return The broadcasted shape
- */
-nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape,
-                                    const nnfw::misc::tensor::Shape &rhs_shape);
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
diff --git a/runtime/libs/tflite/include/tflite/TensorUtils.h b/runtime/libs/tflite/include/tflite/TensorUtils.h
deleted file mode 100644
index 08af1468b..000000000
--- a/runtime/libs/tflite/include/tflite/TensorUtils.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file    TensorUtils.h
- * @brief   This file contains utilities function
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_UTILS_H__
-#define __NNFW_TFLITE_TENSOR_UTILS_H__
-
-#include <tensorflow/lite/context.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Get @c true if tensor type is kTfLiteFloat32, otherwise @c false
- * @param[in] tensor The tensor object to be compared
- * @return @c true if tensor type is kTfLiteFloat32, otherwise @c false
- */
-inline bool isFloatTensor(const TfLiteTensor *tensor) { return tensor->type == kTfLiteFloat32; }
-
-/**
- * @brief Get @c true if tensor is 4-D tensor and the first dimension length is 1,
- *        otherwise @c false
- * @param[in] tensor The tensor object to be compared
- * @return @c true if tensor is 4-D tensor and the first dimension length is 1, otherwise @c false
- */
-inline bool isFeatureTensor(const TfLiteTensor *tensor)
-{
-  return (tensor->dims->size == 4) && (tensor->dims->data[0] == 1);
-}
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_UTILS_H__
diff --git a/runtime/libs/tflite/include/tflite/TensorView.h b/runtime/libs/tflite/include/tflite/TensorView.h
index ce791a73f..956fce43f 100644
--- a/runtime/libs/tflite/include/tflite/TensorView.h
+++ b/runtime/libs/tflite/include/tflite/TensorView.h
@@ -23,13 +23,13 @@
 #ifndef __NNFW_TFLITE_TENSOR_VIEW_H__
 #define __NNFW_TFLITE_TENSOR_VIEW_H__
 
-#include "tensorflow/lite/interpreter.h"
-
 #include "misc/tensor/Shape.h"
 #include "misc/tensor/Index.h"
 #include "misc/tensor/Reader.h"
 #include "misc/tensor/NonIncreasingStride.h"
 
+#include <tensorflow/lite/c/c_api.h>
+
 namespace nnfw
 {
 namespace tflite
@@ -98,19 +98,17 @@ public:
    * @param[in] tensor_index The tensor index
    * @return The new TensorView<T> object
    */
-  static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index)
+  static TensorView<T> make(const TfLiteTensor *tensor)
   {
-    auto tensor_ptr = interp.tensor(tensor_index);
-
     // Set 'shape'
-    nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size);
+    nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
 
     for (uint32_t axis = 0; axis < shape.rank(); ++axis)
     {
-      shape.dim(axis) = tensor_ptr->dims->data[axis];
+      shape.dim(axis) = TfLiteTensorDim(tensor, axis);
     }
 
-    return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index));
+    return TensorView<T>(shape, reinterpret_cast<T *>(TfLiteTensorData(tensor)));
   }
 };
 
diff --git a/runtime/libs/tflite/include/tflite/interp/Builder.h b/runtime/libs/tflite/include/tflite/interp/Builder.h
deleted file mode 100644
index 0f54e1779..000000000
--- a/runtime/libs/tflite/include/tflite/interp/Builder.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     Builder.h
- * @brief    This file contains Builder structure
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_BUILDER_H__
-
-#include <tensorflow/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Structure to Builder
- */
-struct Builder
-{
-  /**
-   * @brief Destroy the Builder object
-   */
-  virtual ~Builder() = default;
-
-  /**
-   * @brief Build a FlatBuffer model
-   * @return The TfLite interpreter object
-   */
-  virtual std::unique_ptr<::tflite::Interpreter> build(void) const = 0;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_BUILDER_H__
diff --git a/runtime/libs/tflite/include/tflite/interp/FlatBufferBuilder.h b/runtime/libs/tflite/include/tflite/interp/FlatBufferBuilder.h
deleted file mode 100644
index 2d96af50b..000000000
--- a/runtime/libs/tflite/include/tflite/interp/FlatBufferBuilder.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     FlatBufferBuilder.h
- * @brief    This file contains FlatBufferBuilder class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
-
-#include <tensorflow/lite/model.h>
-
-#include "tflite/interp/Builder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define FlatBufferBuilder which is inherited from Builder
- */
-class FlatBufferBuilder final : public Builder
-{
-public:
-  /**
-   * @brief Construct a FlatBufferBuilder object with FlatBufferModel of TfLite
-   * @param[in] model The TfLite Flatbuffer model
-   */
-  FlatBufferBuilder(const ::tflite::FlatBufferModel &model) : _model{model}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Build a FlatBuffer model
-   * @return The TfLite interpreter pointer address
-   */
-  std::unique_ptr<::tflite::Interpreter> build(void) const override;
-
-private:
-  const ::tflite::FlatBufferModel &_model;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
diff --git a/runtime/libs/tflite/include/tflite/interp/FunctionBuilder.h b/runtime/libs/tflite/include/tflite/interp/FunctionBuilder.h
deleted file mode 100644
index 7bfb8db2d..000000000
--- a/runtime/libs/tflite/include/tflite/interp/FunctionBuilder.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     FunctionBuilder.h
- * @brief    This file contains FunctionBuilder class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
-
-#include <tensorflow/lite/model.h>
-
-#include "tflite/interp/Builder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define FunctionBuilder which is inherited from Builder
- */
-class FunctionBuilder final : public Builder
-{
-public:
-  using SetupFunc = std::function<void(::tflite::Interpreter &)>;
-
-public:
-  /**
-   * @brief Construct a FunctionBuilder object with SetupFunction
-   * @param[in] fn The SetupFunc object
-   */
-  FunctionBuilder(const SetupFunc &fn) : _fn{fn}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Build a SetupFunc
-   * @return The TfLite interpreter pointer address
-   */
-  std::unique_ptr<::tflite::Interpreter> build(void) const override;
-
-private:
-  SetupFunc _fn;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
diff --git a/runtime/libs/tflite/port/1.13.1/CMakeLists.txt b/runtime/libs/tflite/port/1.13.1/CMakeLists.txt
deleted file mode 100644
index e3cf97569..000000000
--- a/runtime/libs/tflite/port/1.13.1/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-if(NOT SUPPORT_TFLITE_VERSION VERSION_EQUAL 1.13.1)
-  return()
-endif(NOT SUPPORT_TFLITE_VERSION VERSION_EQUAL 1.13.1)
-
-file(GLOB_RECURSE SOURCES "src/*.cpp")
-
-add_library(tensorflow-lite-ex STATIC ${SOURCES})
-set_target_properties(tensorflow-lite-ex PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(tensorflow-lite-ex PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(tensorflow-lite-ex PUBLIC tensorflow-lite)
-target_link_libraries(tensorflow-lite-ex PUBLIC nnfw_lib_misc nnfw_lib_rua_shim)
-target_link_libraries(tensorflow-lite-ex PRIVATE ${LIB_PTHREAD} dl)
-target_link_libraries(tensorflow-lite-ex PRIVATE nnfw_common)
-target_link_libraries(tensorflow-lite-ex PRIVATE nnfw_coverage)
diff --git a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/CustomOps.h b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/CustomOps.h
deleted file mode 100644
index c073ad58e..000000000
--- a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/CustomOps.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     CustomOps.h
- * @brief    This file contains registration of custom operands
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__
-#define __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__
-
-#include "tensorflow/lite/context.h"
-#include "tflite/ext/kernels/SquaredDifference.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-
-#define REGISTER_FUNCTION(Name)             \
-  TfLiteRegistration *Register_##Name(void) \
-  {                                         \
-    static TfLiteRegistration r = {};       \
-    r.init = Name::Init##Name;              \
-    r.free = Name::Free##Name;              \
-    r.prepare = Name::Prepare##Name;        \
-    r.invoke = Name::Eval##Name;            \
-    r.custom_name = #Name;                  \
-    return &r;                              \
-  }
-
-REGISTER_FUNCTION(SquaredDifference)
-
-#undef REGISTER_FUNCTION
-
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__
diff --git a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/SquaredDifference.h b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/SquaredDifference.h
deleted file mode 100644
index 5512ead78..000000000
--- a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/SquaredDifference.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     SquaredDifference.h
- * @brief    This file contains SquaredDifference namespace and SquaredDifference function
- *           definitions
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__
-#define __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__
-
-#include "tensorflow/lite/context.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace SquaredDifference
-{
-
-/**
- * @brief Initialize SquaredDifference operand using the contents of buffer
- * @param[in] context The TfLite context
- * @param[in] buffer The buffer with contents
- * @param[in] length The buffer length
- * @return The void pointer for user data
- */
-void *InitSquaredDifference(TfLiteContext *context, const char *buffer, size_t length);
-
-/**
- * @brief Release any memory it might have allocated via 'InitSquaredDifference'
- * @param[in] context The TfLite context
- * @param[in] buffer The buffer with contents
- * @return N/A
- */
-void FreeSquaredDifference(TfLiteContext *context, void *buffer);
-
-/**
- * @brief Prepare the SquaredDifference operand for execution
- * @param[in] context The TfLite context
- * @param[in] node The operand node
- * @return The TfLite status
- */
-TfLiteStatus PrepareSquaredDifference(TfLiteContext *context, TfLiteNode *node);
-
-/**
- * @brief Evaluation the SquaredDifference operand for execution
- * @param[in] context The TfLite context
- * @param[in] node The operand node
- * @return The TfLite status
- */
-TfLiteStatus EvalSquaredDifference(TfLiteContext *context, TfLiteNode *node);
-
-} // namespace SquaredDifference
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__
diff --git a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/register.h b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/register.h
deleted file mode 100644
index 6e32b35fb..000000000
--- a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/kernels/register.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-   Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from the following file (in TensorFlow v1.13.1)
-//        'externals/tensorflow/tensorflow/lite/kernels/register.h'
-#ifndef __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__
-#define __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__
-
-#include <unordered_map>
-#include "tensorflow/lite/context.h"
-#include "tensorflow/lite/model.h"
-
-namespace nnfw {
-namespace tflite {
-
-class BuiltinOpResolver : public ::tflite::MutableOpResolver {
- public:
-  BuiltinOpResolver();
-
-  const TfLiteRegistration* FindOp(::tflite::BuiltinOperator op,
-                                   int version) const override;
-  const TfLiteRegistration* FindOp(const char* op, int version) const override;
-};
-
-}  // namespace tflite
-}  // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__
-
-// clang-format on
diff --git a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/nnapi_delegate.h b/runtime/libs/tflite/port/1.13.1/include/tflite/ext/nnapi_delegate.h
deleted file mode 100644
index 231baa25c..000000000
--- a/runtime/libs/tflite/port/1.13.1/include/tflite/ext/nnapi_delegate.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-   Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from the following file (in TensorFlow v1.13.1)
-//        'externals/tensorflow/tensorflow/lite/nnapi_delegate.h'
-#ifndef __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__
-#define __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__
-
-#include "tensorflow/lite/allocation.h"
-#include "tensorflow/lite/c/c_api_internal.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/core/subgraph.h"
-#include "tensorflow/lite/interpreter.h"
-
-struct ANeuralNetworksModel;
-struct ANeuralNetworksMemory;
-struct ANeuralNetworksCompilation;
-
-namespace nnfw {
-namespace tflite {
-
-class NNAPIAllocation : public ::tflite::MMAPAllocation {
- public:
-  NNAPIAllocation(const char* filename, ::tflite::ErrorReporter* error_reporter);
-  ~NNAPIAllocation();
-
-  size_t offset(const void* ptr) const {
-    auto signed_offset = reinterpret_cast<const uint8_t*>(ptr) -
-                         reinterpret_cast<const uint8_t*>(mmapped_buffer_);
-
-    return static_cast<size_t>(signed_offset);
-  }
-
-  ANeuralNetworksMemory* memory() const { return handle_; }
-  bool valid() const override { return handle_ != nullptr; }
-
- private:
-  mutable ANeuralNetworksMemory* handle_ = nullptr;
-};
-
-class NNAPIDelegate {
- public:
-  ~NNAPIDelegate();
-
-  // Convert a tflite graph to NNAPI
-  TfLiteStatus BuildGraph(::tflite::Subgraph* subgraph);
-
-  // Run
-  TfLiteStatus Invoke(::tflite::Subgraph* subgraph);
-
-  // Whether the current platform supports NNAPI delegation.
-  static bool IsSupported();
-
- private:
-  // The NN API model handle
-  ANeuralNetworksModel* nn_model_ = nullptr;
-  // The NN API compilation handle
-  ANeuralNetworksCompilation* nn_compiled_model_ = nullptr;
-  // Model status
-  TfLiteStatus model_status_ = kTfLiteOk;
-
-  // List of state tensors for LSTM, RNN, SVDF.
-  // NN API does not allow ops to maintain states across multiple
-  // invocations. We need to manually create state input tensors from
-  // corresponding state output tensors of TFLite operations, and map them
-  // correctly.
-  std::vector<int> model_states_inputs_;   // holds NNAPI operand ids
-  std::vector<int> model_states_outputs_;  // holds TFLite tensor ids
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif  // __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__
-
-// clang-format on
diff --git a/runtime/libs/tflite/port/1.13.1/src/kernels/SquaredDifference.cpp b/runtime/libs/tflite/port/1.13.1/src/kernels/SquaredDifference.cpp
deleted file mode 100644
index 615878513..000000000
--- a/runtime/libs/tflite/port/1.13.1/src/kernels/SquaredDifference.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/SquaredDifference.h"
-#include "tensorflow/lite/kernels/kernel_util.h"
-
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace SquaredDifference
-{
-
-void *InitSquaredDifference(TfLiteContext *, const char *, size_t) { return nullptr; }
-
-void FreeSquaredDifference(TfLiteContext *, void *) {}
-
-TfLiteStatus PrepareSquaredDifference(TfLiteContext *context, TfLiteNode *node)
-{
-  TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2);
-  TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1);
-
-  const TfLiteTensor *input1 = ::tflite::GetInput(context, node, 0);
-  const TfLiteTensor *input2 = ::tflite::GetInput(context, node, 1);
-  TfLiteTensor *output = ::tflite::GetOutput(context, node, 0);
-
-  TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
-  TF_LITE_ENSURE_EQ(context, input1->type, output->type);
-
-  return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input1->dims));
-}
-
-TfLiteStatus EvalSquaredDifference(TfLiteContext *context, TfLiteNode *node)
-{
-
-  const TfLiteTensor *input1 = ::tflite::GetInput(context, node, 0);
-  const TfLiteTensor *input2 = ::tflite::GetInput(context, node, 1);
-
-  TfLiteTensor *output = ::tflite::GetOutput(context, node, 0);
-
-  size_t elements = ::tflite::NumElements(input1);
-
-  switch (input1->type)
-  {
-    case kTfLiteFloat32:
-    {
-      const float *in1 = input1->data.f;
-      const float *in2 = input2->data.f;
-      const float *in_end1 = in1 + elements;
-      float *out = output->data.f;
-
-      for (; in1 < in_end1; in1++, in2++, out++)
-        *out = ((*in1 - *in2) * (*in1 - *in2));
-
-      return kTfLiteOk;
-    }
-    case kTfLiteInt32:
-    {
-      const int *in1 = input1->data.i32;
-      const int *in2 = input2->data.i32;
-      const int *in_end1 = in1 + elements;
-      int *out = output->data.i32;
-
-      for (; in1 < in_end1; in1++, in2++, out++)
-        *out = ((*in1 - *in2) * (*in1 - *in2));
-
-      return kTfLiteOk;
-    }
-    case kTfLiteInt64:
-    {
-      const int64_t *in1 = input1->data.i64;
-      const int64_t *in2 = input1->data.i64;
-      const int64_t *in_end1 = in1 + elements;
-      int64_t *out = output->data.i64;
-
-      for (; in1 < in_end1; in1++, in2++, out++)
-        *out = ((*in1 - *in2) * (*in1 - *in2));
-
-      return kTfLiteOk;
-    }
-    default:
-    {
-      context->ReportError(context, "InputType is %d Unsupported", input1->type);
-      return kTfLiteError;
-    }
-  }
-}
-
-} // namespace SquaredDifference
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/port/1.13.1/src/kernels/register.cpp b/runtime/libs/tflite/port/1.13.1/src/kernels/register.cpp
deleted file mode 100644
index 89f81b612..000000000
--- a/runtime/libs/tflite/port/1.13.1/src/kernels/register.cpp
+++ /dev/null
@@ -1,314 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-   Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This code is derived from the following file (in TensorFlow v1.13.1)
-//        'externals/tensorflow/tensorflow/lite/kernels/register.cc'
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/util.h"
-#include "tflite/ext/kernels/CustomOps.h"
-
-namespace tflite {
-namespace ops {
-
-namespace custom {
-
-// Need additional external library for AUDIO_SPECTROGRAM
-//TfLiteRegistration* Register_AUDIO_SPECTROGRAM();
-TfLiteRegistration* Register_LAYER_NORM_LSTM();
-TfLiteRegistration* Register_MFCC();
-TfLiteRegistration* Register_DETECTION_POSTPROCESS();
-TfLiteRegistration* Register_RELU_1();
-
-}  // namespace custom
-}
-}
-
-namespace tflite {
-namespace ops {
-namespace builtin {
-
-TfLiteRegistration* Register_ABS();
-TfLiteRegistration* Register_RELU();
-TfLiteRegistration* Register_RELU_N1_TO_1();
-TfLiteRegistration* Register_RELU6();
-TfLiteRegistration* Register_TANH();
-TfLiteRegistration* Register_LOGISTIC();
-TfLiteRegistration* Register_AVERAGE_POOL_2D();
-TfLiteRegistration* Register_MAX_POOL_2D();
-TfLiteRegistration* Register_L2_POOL_2D();
-TfLiteRegistration* Register_CONV_2D();
-TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
-TfLiteRegistration* Register_SVDF();
-TfLiteRegistration* Register_RNN();
-TfLiteRegistration* Register_BIDIRECTIONAL_SEQUENCE_RNN();
-TfLiteRegistration* Register_UNIDIRECTIONAL_SEQUENCE_RNN();
-TfLiteRegistration* Register_EMBEDDING_LOOKUP();
-TfLiteRegistration* Register_EMBEDDING_LOOKUP_SPARSE();
-TfLiteRegistration* Register_FULLY_CONNECTED();
-TfLiteRegistration* Register_LSH_PROJECTION();
-TfLiteRegistration* Register_HASHTABLE_LOOKUP();
-TfLiteRegistration* Register_SOFTMAX();
-TfLiteRegistration* Register_CONCATENATION();
-TfLiteRegistration* Register_ADD();
-TfLiteRegistration* Register_SPACE_TO_BATCH_ND();
-TfLiteRegistration* Register_DIV();
-TfLiteRegistration* Register_SUB();
-TfLiteRegistration* Register_BATCH_TO_SPACE_ND();
-TfLiteRegistration* Register_MUL();
-TfLiteRegistration* Register_L2_NORMALIZATION();
-TfLiteRegistration* Register_LOCAL_RESPONSE_NORMALIZATION();
-TfLiteRegistration* Register_LSTM();
-TfLiteRegistration* Register_BIDIRECTIONAL_SEQUENCE_LSTM();
-TfLiteRegistration* Register_UNIDIRECTIONAL_SEQUENCE_LSTM();
-TfLiteRegistration* Register_PAD();
-TfLiteRegistration* Register_PADV2();
-TfLiteRegistration* Register_RESHAPE();
-TfLiteRegistration* Register_RESIZE_BILINEAR();
-TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
-TfLiteRegistration* Register_SKIP_GRAM();
-TfLiteRegistration* Register_SPACE_TO_DEPTH();
-TfLiteRegistration* Register_GATHER();
-TfLiteRegistration* Register_TRANSPOSE();
-TfLiteRegistration* Register_MEAN();
-TfLiteRegistration* Register_SPLIT();
-TfLiteRegistration* Register_SPLIT_V();
-TfLiteRegistration* Register_SQUEEZE();
-TfLiteRegistration* Register_STRIDED_SLICE();
-TfLiteRegistration* Register_EXP();
-TfLiteRegistration* Register_TOPK_V2();
-TfLiteRegistration* Register_LOG();
-TfLiteRegistration* Register_LOG_SOFTMAX();
-TfLiteRegistration* Register_CAST();
-TfLiteRegistration* Register_DEQUANTIZE();
-TfLiteRegistration* Register_PRELU();
-TfLiteRegistration* Register_MAXIMUM();
-TfLiteRegistration* Register_MINIMUM();
-TfLiteRegistration* Register_ARG_MAX();
-TfLiteRegistration* Register_ARG_MIN();
-TfLiteRegistration* Register_GREATER();
-TfLiteRegistration* Register_GREATER_EQUAL();
-TfLiteRegistration* Register_LESS();
-TfLiteRegistration* Register_LESS_EQUAL();
-TfLiteRegistration* Register_FLOOR();
-TfLiteRegistration* Register_TILE();
-TfLiteRegistration* Register_NEG();
-TfLiteRegistration* Register_SUM();
-TfLiteRegistration* Register_REDUCE_PROD();
-TfLiteRegistration* Register_REDUCE_MAX();
-TfLiteRegistration* Register_REDUCE_MIN();
-TfLiteRegistration* Register_REDUCE_ANY();
-TfLiteRegistration* Register_SELECT();
-TfLiteRegistration* Register_SLICE();
-TfLiteRegistration* Register_SIN();
-TfLiteRegistration* Register_TRANSPOSE_CONV();
-TfLiteRegistration* Register_EXPAND_DIMS();
-TfLiteRegistration* Register_SPARSE_TO_DENSE();
-TfLiteRegistration* Register_EQUAL();
-TfLiteRegistration* Register_NOT_EQUAL();
-TfLiteRegistration* Register_SQRT();
-TfLiteRegistration* Register_RSQRT();
-TfLiteRegistration* Register_SHAPE();
-TfLiteRegistration* Register_POW();
-TfLiteRegistration* Register_FAKE_QUANT();
-TfLiteRegistration* Register_PACK();
-TfLiteRegistration* Register_ONE_HOT();
-TfLiteRegistration* Register_LOGICAL_OR();
-TfLiteRegistration* Register_LOGICAL_AND();
-TfLiteRegistration* Register_LOGICAL_NOT();
-TfLiteRegistration* Register_UNPACK();
-TfLiteRegistration* Register_FLOOR_DIV();
-TfLiteRegistration* Register_SQUARE();
-TfLiteRegistration* Register_ZEROS_LIKE();
-TfLiteRegistration* Register_FLOOR_MOD();
-TfLiteRegistration* Register_RANGE();
-TfLiteRegistration* Register_LEAKY_RELU();
-TfLiteRegistration* Register_SQUARED_DIFFERENCE();
-TfLiteRegistration* Register_FILL();
-TfLiteRegistration* Register_MIRROR_PAD();
-
-}  // namespace builtin
-}  // namespace ops
-}  // namespace tflite
-
-namespace nnfw {
-namespace tflite {
-
-// Using namespace directive to minimize diff with upstream tensorflow
-using namespace ::tflite::ops::custom;
-using namespace ::tflite::ops::builtin;
-using namespace ::tflite;
-
-// Fix to use strict build option
-TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* /*node*/) {
-  context->ReportError(
-      context,
-      "Regular TensorFlow ops are not supported by this interpreter. Make sure "
-      "you invoke the Flex delegate before inference.");
-  return kTfLiteError;
-}
-
-const TfLiteRegistration* BuiltinOpResolver::FindOp(tflite::BuiltinOperator op,
-                                                    int version) const {
-  return MutableOpResolver::FindOp(op, version);
-}
-
-const TfLiteRegistration* BuiltinOpResolver::FindOp(const char* op,
-                                                    int version) const {
-  // Return the NULL Op for all ops whose name start with "Flex", allowing
-  // the interpreter to delegate their execution.
-  if (IsFlexOp(op)) {
-    static TfLiteRegistration null_op{
-        nullptr, nullptr, &UnsupportedTensorFlowOp,
-        nullptr, nullptr, BuiltinOperator_CUSTOM,
-        "Flex",  1};
-    return &null_op;
-  }
-  return MutableOpResolver::FindOp(op, version);
-}
-
-BuiltinOpResolver::BuiltinOpResolver() {
-  AddBuiltin(BuiltinOperator_ABS, Register_ABS());
-  AddBuiltin(BuiltinOperator_RELU, Register_RELU());
-  AddBuiltin(BuiltinOperator_RELU_N1_TO_1, Register_RELU_N1_TO_1());
-  AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
-  AddBuiltin(BuiltinOperator_TANH, Register_TANH());
-  AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC());
-  AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D());
-  AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D());
-  AddBuiltin(BuiltinOperator_L2_POOL_2D, Register_L2_POOL_2D());
-  AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D());
-  AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(),
-             /* min_version */ 1,
-             /* max_version */ 2);
-  AddBuiltin(BuiltinOperator_SVDF, Register_SVDF());
-  AddBuiltin(BuiltinOperator_RNN, Register_RNN());
-  AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
-             Register_BIDIRECTIONAL_SEQUENCE_RNN());
-  AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
-             Register_UNIDIRECTIONAL_SEQUENCE_RNN());
-  AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP, Register_EMBEDDING_LOOKUP());
-  AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
-             Register_EMBEDDING_LOOKUP_SPARSE());
-  AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(),
-             /* min_version */ 1,
-             /* max_version */ 2);
-  AddBuiltin(BuiltinOperator_LSH_PROJECTION, Register_LSH_PROJECTION());
-  AddBuiltin(BuiltinOperator_HASHTABLE_LOOKUP, Register_HASHTABLE_LOOKUP());
-  AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX());
-  AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION());
-  AddBuiltin(BuiltinOperator_ADD, Register_ADD());
-  AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND());
-  AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND());
-  AddBuiltin(BuiltinOperator_MUL, Register_MUL());
-  AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
-  AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
-             Register_LOCAL_RESPONSE_NORMALIZATION());
-  AddBuiltin(BuiltinOperator_LSTM, Register_LSTM(), /* min_version */ 1,
-             /* max_version */ 2);
-  AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
-             Register_BIDIRECTIONAL_SEQUENCE_LSTM());
-  AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
-             Register_UNIDIRECTIONAL_SEQUENCE_LSTM());
-  AddBuiltin(BuiltinOperator_PAD, Register_PAD());
-  AddBuiltin(BuiltinOperator_PADV2, Register_PADV2());
-  AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
-  AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR());
-  AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
-             Register_RESIZE_NEAREST_NEIGHBOR());
-  AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM());
-  AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH());
-  AddBuiltin(BuiltinOperator_GATHER, Register_GATHER());
-  AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE());
-  AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
-  AddBuiltin(BuiltinOperator_DIV, Register_DIV());
-  AddBuiltin(BuiltinOperator_SUB, Register_SUB());
-  AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT());
-  AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V());
-  AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE());
-  AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
-  AddBuiltin(BuiltinOperator_EXP, Register_EXP());
-  AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2());
-  AddBuiltin(BuiltinOperator_LOG, Register_LOG());
-  AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX());
-  AddBuiltin(BuiltinOperator_CAST, Register_CAST());
-  AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(),
-             /* min_version */ 1,
-             /* max_version */ 2);
-  AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
-  AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
-  AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
-  AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
-  AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
-  AddBuiltin(BuiltinOperator_GREATER, Register_GREATER());
-  AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL());
-  AddBuiltin(BuiltinOperator_LESS, Register_LESS());
-  AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL());
-  AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
-  AddBuiltin(BuiltinOperator_NEG, Register_NEG());
-  AddBuiltin(BuiltinOperator_SELECT, Register_SELECT());
-  AddBuiltin(BuiltinOperator_SLICE, Register_SLICE());
-  AddBuiltin(BuiltinOperator_SIN, Register_SIN());
-  AddBuiltin(BuiltinOperator_TRANSPOSE_CONV, Register_TRANSPOSE_CONV());
-  AddBuiltin(BuiltinOperator_TILE, Register_TILE());
-  AddBuiltin(BuiltinOperator_SUM, Register_SUM());
-  AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD());
-  AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX());
-  AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN());
-  AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY());
-  AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS());
-  AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE());
-  AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL());
-  AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL());
-  AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
-  AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
-  AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE());
-  AddBuiltin(BuiltinOperator_POW, Register_POW());
-  AddBuiltin(BuiltinOperator_FAKE_QUANT, Register_FAKE_QUANT(), 1, 2);
-  AddBuiltin(BuiltinOperator_PACK, Register_PACK());
-  AddBuiltin(BuiltinOperator_ONE_HOT, Register_ONE_HOT());
-  AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
-  AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
-  AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
-  AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK());
-  AddBuiltin(BuiltinOperator_FLOOR_DIV, Register_FLOOR_DIV());
-  AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
-  AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE());
-  AddBuiltin(BuiltinOperator_FLOOR_MOD, Register_FLOOR_MOD());
-  AddBuiltin(BuiltinOperator_RANGE, Register_RANGE());
-  AddBuiltin(BuiltinOperator_LEAKY_RELU, Register_LEAKY_RELU());
-  AddBuiltin(BuiltinOperator_SQUARED_DIFFERENCE, Register_SQUARED_DIFFERENCE());
-  AddBuiltin(BuiltinOperator_FILL, Register_FILL());
-  AddBuiltin(BuiltinOperator_MIRROR_PAD, Register_MIRROR_PAD());
-
-  AddCustom("SquaredDifference", nnfw::tflite::custom::Register_SquaredDifference());
-
-  // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that
-  // custom ops aren't always included by default.
-  AddCustom("Mfcc", tflite::ops::custom::Register_MFCC());
-  // Need additional external library for audio spectrogram
-  //AddCustom("AudioSpectrogram",
-  //          tflite::ops::custom::Register_AUDIO_SPECTROGRAM());
-  AddCustom("LayerNormLstm", tflite::ops::custom::Register_LAYER_NORM_LSTM());
-  AddCustom("Relu1", tflite::ops::custom::Register_RELU_1());
-  AddCustom("TFLite_Detection_PostProcess",
-            tflite::ops::custom::Register_DETECTION_POSTPROCESS());
-}
-
-}  // namespace tflite
-}  // namespace nnfw
diff --git a/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate.cpp b/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate.cpp
deleted file mode 100644
index 9675570ad..000000000
--- a/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate.cpp
+++ /dev/null
@@ -1,1262 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-   Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This code is derived from the following file (in TensorFlow v1.13.1)
-//        'externals/tensorflow/tensorflow/lite/nnapi_delegate.cc'
-#include "tflite/ext/nnapi_delegate.h"
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/model.h"
-#include <rua/Shim.h>
-#include "NeuralNetworksExShim.h"
-
-#ifdef __ANDROID__
-#include <android/log.h>
-#include <sys/system_properties.h>
-#endif
-
-#include <memory>
-
-namespace nnfw {
-namespace tflite {
-
-void logError(const char* format, ...) {
-  // stderr is convenient for native tests, but is not captured for apps
-  va_list args_for_stderr;
-  va_start(args_for_stderr, format);
-  vfprintf(stderr, format, args_for_stderr);
-  va_end(args_for_stderr);
-  fprintf(stderr, "\n");
-  fflush(stderr);
-#ifdef __ANDROID__
-  // produce logcat output for general consumption
-  va_list args_for_log;
-  va_start(args_for_log, format);
-  __android_log_vprint(ANDROID_LOG_ERROR, "tflite", format, args_for_log);
-  va_end(args_for_log);
-#endif
-}
-
-#define FATAL(...)       \
-  logError(__VA_ARGS__); \
-  exit(1);
-
-// TODO(aselle): Change the error model to use status codes.
-#define CHECK_TFLITE_SUCCESS(x)                                           \
-  if (x != kTfLiteOk) {                                                   \
-    FATAL("Aborting since tflite returned failure nnapi_delegate.cc:%d.", \
-          __LINE__);                                                      \
-  }
-
-#define CHECK_NN(x)                                                     \
-  if (x != ANEURALNETWORKS_NO_ERROR) {                                  \
-    FATAL("Aborting since NNAPI returned failure nnapi_delegate.cc:%d", \
-          __LINE__);                                                    \
-  }
-
-#define RETURN_ERROR_IF_TFLITE_FAILED(x)                                       \
-  if (x != kTfLiteOk) {                                                        \
-    logError(                                                                  \
-        "Returning error since TFLite returned failure nnapi_delegate.cc:%d.", \
-        __LINE__);                                                             \
-    return kTfLiteError;                                                       \
-  }
-
-#define RETURN_ERROR_IF_NN_FAILED(x)                                          \
-  if (x != ANEURALNETWORKS_NO_ERROR) {                                        \
-    logError(                                                                 \
-        "Returning error since NNAPI returned failure nnapi_delegate.cc:%d.", \
-        __LINE__);                                                            \
-    return kTfLiteError;                                                      \
-  }
-
-// Tracking of NNAPI operand ids
-static const int64_t kOperandIdNotSet = -1;
-static const int64_t kOperandNotNeeded = -2;
-
-namespace {
-
-int32_t GetAndroidSdkVersion() {
-#ifdef __ANDROID__
-  const char* sdkProp = "ro.build.version.sdk";
-  char sdkVersion[PROP_VALUE_MAX];
-  int length = __system_property_get(sdkProp, sdkVersion);
-  if (length != 0) {
-    for (int i = 0; i < length; ++i) {
-      int digit = sdkVersion[i] - '0';
-      if (digit < 0 || digit > 9) {
-        // Non-numeric SDK version, assume it's higher then expected;
-        return 0xFFFF;
-      }
-    }
-    // NOTE use std::strtol instead of atoi: security issue
-    return std::strtol(sdkVersion, NULL, 0);
-  }
-  FATAL("No %s prop", sdkProp);
-#endif  // __ANDROID__
-  return 0;
-}
-
-int32_t GetAndroidSdkVersionCached() {
-  static int32_t androidSdkVersion = GetAndroidSdkVersion();
-  return androidSdkVersion;
-}
-
-// WORKAROUND Some model have dimension zero
-// Consider scalar as vector size 1
-static const uint32_t dimension_for_scalar[1] = {1};
-
-}  // namespace
-
-NNAPIAllocation::NNAPIAllocation(const char* filename,
-                                 ::tflite::ErrorReporter* error_reporter)
-    : MMAPAllocation(filename, error_reporter) {
-  if (mmapped_buffer_ != MAP_FAILED)
-    CHECK_NN(ANeuralNetworksMemory_createFromFd(buffer_size_bytes_, PROT_READ,
-                                                mmap_fd_, 0, &handle_));
-}
-
-NNAPIAllocation::~NNAPIAllocation() {
-  if (handle_) {
-    ANeuralNetworksMemory_free(handle_);
-  }
-}
-
-NNAPIDelegate::~NNAPIDelegate() {
-  if (nn_compiled_model_) {
-    ANeuralNetworksCompilation_free(nn_compiled_model_);
-    nn_compiled_model_ = nullptr;
-  }
-  if (nn_model_) {
-    ANeuralNetworksModel_free(nn_model_);
-    nn_model_ = nullptr;
-    // TODO(aselle): Is this thread-safe and callable multiple times?
-  }
-  // ANeuralNetworksShutdown();
-}
-
-// Adds the tensors of the subgraph to the NN API model.
-TfLiteStatus addTensorOperands(::tflite::Subgraph* subgraph,
-                               ANeuralNetworksModel* nn_model,
-                               uint32_t* no_of_operands_added,
-                               std::vector<int64_t>* nnapi_ids) {
-  uint32_t next_id = 0;
-  // Allocate temporary buffer to save casted boolean tensor
-  std::unordered_map<size_t, std::unique_ptr<uint8_t[]>> const_boolean_tensors;
-
-  for (size_t i = 0; i < subgraph->tensors_size(); i++) {
-    // Skip temporaries and RNN back-edges.
-    if ((*nnapi_ids)[i] == kOperandNotNeeded) continue;
-
-    (*nnapi_ids)[i] = int64_t(next_id);
-
-    int32_t nn_type = 0;
-    // NNAPI requires 32-bit float scale to be zero, tflite doesn't care
-    float scale = 0.0f;
-    int32_t zeroPoint = 0;
-    TfLiteTensor* tensor = subgraph->tensor(i);
-    switch (tensor->type) {
-      case kTfLiteNoType:
-        // Tensors added during initialization of Ops don't have a type yet and
-        // should not be registered with the NNAPI.
-        continue;
-      case kTfLiteFloat32:
-        nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
-        break;
-      case kTfLiteUInt8:
-        // NNAPI uses ANEURALNETWORKS_TENSOR_QUANT8_ASYMM to represent uint8 type
-        // ex. ANEURALNETWORKS_CAST
-        nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
-        scale = tensor->params.scale;
-        // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type requires scale > 0,
-        // zeroPoint >= 0 and zeroPoint <= 255
-        scale = (scale == 0.0f) ? 1.0f : scale;
-        zeroPoint = tensor->params.zero_point;
-        break;
-      case kTfLiteInt32:
-        nn_type = ANEURALNETWORKS_TENSOR_INT32;
-        scale = tensor->params.scale;
-        zeroPoint = tensor->params.zero_point;
-        break;
-      case kTfLiteBool:
-        // Workaround to pass bool type under NNAPI
-        // Use bool type using ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with scale = 1.0f and zero_point = 0
-        nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
-        break;
-      default:
-        logError("Unsupported tensor type %d", tensor->type);
-        return kTfLiteError;
-    }
-    if (tensor->dims->size == 0) {
-      // WORKAROUND Some model have dimension zero
-      switch (tensor->type) {
-        case kTfLiteFloat32:
-          nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
-          break;
-        case kTfLiteInt32:
-          nn_type = ANEURALNETWORKS_TENSOR_INT32;
-          break;
-        default:
-          logError("NNAPI doesn't support tensors with rank 0 (index %d name %s)",
-                   i, tensor->name);
-          return kTfLiteError;
-      }
-    }
-    if (tensor->dims->size > 4) {
-      logError("NNAPI doesn't support tensors with rank > 4 (index %d name %s)",
-               i, tensor->name);
-      return kTfLiteError;
-    }
-    // TODO(aselle): Note, many of these are intermediate results. Do I need
-    // to ever specify these sizes. I am currently below doing setValue
-    // on all of them, but I shouldn't in the future.
-    // Answer(jeanluc): If all the operators can set the dimension correctly,
-    // you won't need to.
-    ANeuralNetworksOperandType operand_type{
-        nn_type, static_cast<uint32_t>(tensor->dims->size),
-        reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
-    if (tensor->dims->size == 0) {
-      // WORKAROUND Some model have dimension zero
-      // Consider scalar as vector size 1
-      operand_type.dimensions = dimension_for_scalar;
-      operand_type.dimensionCount = 1;
-    }
-    RETURN_ERROR_IF_NN_FAILED(
-        ANeuralNetworksModel_addOperand(nn_model, &operand_type));
-    // TODO(aselle): Based on Michael's suggestion, limiting this to read
-    // only memory
-    if (tensor->allocation_type == kTfLiteMmapRo) {
-      if (tensor->type == kTfLiteBool)
-      {
-        // ANEURALNETWORKS_TENSOR_BOOL8 tensor element size is 8 bits
-        size_t elements = tensor->bytes / sizeof(bool);
-        const_boolean_tensors[i] = std::make_unique<uint8_t[]>(elements);
-        for (size_t idx = 0; idx < elements; idx++)
-        {
-          const_boolean_tensors[i].get()[idx] = (tensor->data.b[idx] ? 0x00 : 0xff);
-        }
-        RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue(
-            nn_model, next_id, const_boolean_tensors[i].get(), tensor->bytes));
-      }
-      else if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
-              static_cast<const ::tflite::Allocation*>(tensor->allocation))) {
-        RETURN_ERROR_IF_NN_FAILED(
-            ANeuralNetworksModel_setOperandValueFromMemory(
-                nn_model, next_id, alloc->memory(),
-                alloc->offset(tensor->data.raw), tensor->bytes));
-      } else {
-        RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue(
-            nn_model, next_id, tensor->data.raw, tensor->bytes));
-      }
-    } else if (tensor->bytes == 0) {
-      // These size 0 tensors are optional tensors reserved.
-      RETURN_ERROR_IF_NN_FAILED(
-          ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0));
-    }
-
-    ++next_id;
-  }
-  *no_of_operands_added = next_id;
-  return kTfLiteOk;
-}
-
-void MapAndAddTensorIds(const int* from_ids_buf, size_t from_ids_count,
-                        std::vector<uint32_t>* into,
-                        const std::vector<int64_t>& map) {
-  for (size_t i = 0; i < from_ids_count; i++) {
-    int from_id = from_ids_buf[i];
-    if (from_id == kOptionalTensor) {
-      into->push_back(from_id);
-    } else {
-      into->push_back(map[from_id]);
-    }
-  }
-}
-
-// Adds the operations and their parameters to the NN API model.
-// 'next-id' is the operand ID of the next operand of the model.
-TfLiteStatus AddOpsAndParams(
-    ::tflite::Subgraph* subgraph, ANeuralNetworksModel* nn_model,
-    uint32_t next_id, std::vector<int>* model_state_inputs,
-    std::vector<int>* model_state_outputs,
-    const std::vector<int64_t>& tensor_id_to_nnapi_id) {
-  for (size_t i = 0; i < subgraph->nodes_size(); i++) {
-    const auto* node_and_registration = subgraph->node_and_registration(i);
-    const TfLiteNode& node = node_and_registration->first;
-    const TfLiteRegistration& registration = node_and_registration->second;
-    ::tflite::BuiltinOperator builtin =
-        static_cast<::tflite::BuiltinOperator>(registration.builtin_code);
-
-    // Add the parameters.
-    std::vector<uint32_t> augmented_inputs, augmented_outputs;
-    MapAndAddTensorIds(node.inputs->data, node.inputs->size, &augmented_inputs,
-                       tensor_id_to_nnapi_id);
-    MapAndAddTensorIds(node.outputs->data, node.outputs->size,
-                       &augmented_outputs, tensor_id_to_nnapi_id);
-
-    auto add_scalar_int32 = [&nn_model, &augmented_inputs,
-                             &next_id](int value) {
-      // Fix to use strict build option
-      ANeuralNetworksOperandType operand_type{}; operand_type.type = ANEURALNETWORKS_INT32;
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-      CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value,
-                                                    sizeof(int32_t)))
-      augmented_inputs.push_back(next_id++);
-    };
-
-    auto add_scalar_float32 = [&nn_model, &augmented_inputs,
-                               &next_id](float value) {
-      // Fix to use strict build option
-      ANeuralNetworksOperandType operand_type{}; operand_type.type = ANEURALNETWORKS_FLOAT32;
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-      CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value,
-                                                    sizeof(float)))
-      augmented_inputs.push_back(next_id++);
-    };
-
-    auto add_vector_int32 = [&](const int* values, uint32_t num_values) {
-      // Fix to use strict build option
-      ANeuralNetworksOperandType operand_type{};
-      operand_type.type = ANEURALNETWORKS_TENSOR_INT32;
-      operand_type.dimensionCount = 1;
-      operand_type.dimensions = &num_values;
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-      CHECK_NN(ANeuralNetworksModel_setOperandValue(
-          nn_model, next_id, values, sizeof(int32_t) * num_values));
-      augmented_inputs.push_back(next_id++);
-    };
-
-    // Handle state tensors of RNN, LSTM, SVDF.
-    // For each state_out tensor, a corresponding state_in operand needs to be
-    // created for NNAPI.
-    auto duplicate_state_tensor_float32 =
-        [subgraph, &nn_model, &next_id, &augmented_inputs, &model_state_inputs,
-         &model_state_outputs](int tensor_id) {
-          const TfLiteTensor* tensor = subgraph->tensor(tensor_id);
-          ANeuralNetworksOperandType operand_type{
-              ANEURALNETWORKS_TENSOR_FLOAT32,
-              static_cast<uint32_t>(tensor->dims->size),
-              reinterpret_cast<uint32_t*>(tensor->dims->data),
-              tensor->params.scale, tensor->params.zero_point};
-          CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
-          augmented_inputs.push_back(next_id);
-          model_state_inputs->push_back(next_id);
-          model_state_outputs->push_back(tensor_id);
-          next_id++;
-        };
-    auto check_and_add_activation = [&add_scalar_int32](int activation) {
-      if (activation > kTfLiteActRelu6) {
-        logError("NNAPI only supports RELU, RELU1 and RELU6 activations");
-        return kTfLiteError;
-      }
-      add_scalar_int32(activation);
-      return kTfLiteOk;
-    };
-
-    auto add_add_params = [&add_scalar_int32](void* data) {
-      auto* builtin = reinterpret_cast<TfLiteAddParams*>(data);
-      if (builtin->activation > kTfLiteActRelu6) {
-        logError("NNAPI only supports RELU, RELU1 and RELU6 activations");
-        return kTfLiteError;
-      }
-      add_scalar_int32(builtin->activation);
-      return kTfLiteOk;
-    };
-
-    auto add_pooling_params = [&add_scalar_int32,
-                               &check_and_add_activation](void* data) {
-      auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
-      add_scalar_int32(builtin->padding);
-      add_scalar_int32(builtin->stride_width);
-      add_scalar_int32(builtin->stride_height);
-      add_scalar_int32(builtin->filter_width);
-      add_scalar_int32(builtin->filter_height);
-      return check_and_add_activation(builtin->activation);
-    };
-
-    auto add_convolution_params = [&add_scalar_int32,
-                                   &check_and_add_activation](void* data) {
-      auto builtin = reinterpret_cast<TfLiteConvParams*>(data);
-      add_scalar_int32(builtin->padding);
-      add_scalar_int32(builtin->stride_width);
-      add_scalar_int32(builtin->stride_height);
-      return check_and_add_activation(builtin->activation);
-    };
-
-    auto add_depthwise_conv_params = [&add_scalar_int32,
-                                      &check_and_add_activation](void* data) {
-      auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data);
-      add_scalar_int32(builtin->padding);
-      add_scalar_int32(builtin->stride_width);
-      add_scalar_int32(builtin->stride_height);
-      add_scalar_int32(builtin->depth_multiplier);
-      return check_and_add_activation(builtin->activation);
-    };
-
-    auto add_fully_connected_params = [&check_and_add_activation](void* data) {
-      auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data);
-      return check_and_add_activation(builtin->activation);
-    };
-
-    auto add_concatenation_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(data);
-      add_scalar_int32(builtin->axis);
-      if (builtin->activation != kTfLiteActNone) {
-        logError("Concatenation does not support fused activation in NNAPI");
-        return kTfLiteError;
-      }
-      return kTfLiteOk;
-    };
-
-    auto add_softmax_params = [&add_scalar_float32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(data);
-      add_scalar_float32(builtin->beta);
-    };
-
-    auto add_space_to_depth_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(data);
-      add_scalar_int32(builtin->block_size);
-    };
-
-    auto add_lstm_params = [&add_scalar_int32,
-                            &add_scalar_float32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteLSTMParams*>(data);
-      add_scalar_int32(builtin->activation);
-      add_scalar_float32(builtin->cell_clip);
-      add_scalar_float32(builtin->proj_clip);
-    };
-
-    // LSTM in NNAPI requires scratch tensor as an output operand.
-    auto add_lstm_scratch_tensor_float32 = [subgraph, &node, &nn_model,
-                                            &next_id, &augmented_outputs]() {
-      if (node.temporaries->size == 0) return;
-      int scratch_buffer_index = node.temporaries->data[0];
-      const TfLiteTensor* tensor = subgraph->tensor(scratch_buffer_index);
-      ANeuralNetworksOperandType operand_type{
-          ANEURALNETWORKS_TENSOR_FLOAT32,
-          static_cast<uint32_t>(tensor->dims->size),
-          reinterpret_cast<uint32_t*>(tensor->dims->data), tensor->params.scale,
-          tensor->params.zero_point};
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
-      augmented_outputs.insert(augmented_outputs.begin(), next_id++);
-    };
-
-    auto add_mean_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteReducerParams*>(data);
-      add_scalar_int32(builtin->keep_dims);
-    };
-
-    auto add_svdf_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteSVDFParams*>(data);
-      add_scalar_int32(builtin->rank);
-      add_scalar_int32(builtin->activation);
-    };
-
-    auto add_rnn_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteRNNParams*>(data);
-      add_scalar_int32(builtin->activation);
-    };
-
-    auto add_squeeze_params = [&](void* data) {
-      const auto* builtin = reinterpret_cast<TfLiteSqueezeParams*>(data);
-      // Note that we add the squeeze dimensions even if the dimensions were
-      // unspecified (empty), as NNAPI requires the operand.
-      add_vector_int32(builtin->squeeze_dims,
-                       static_cast<uint32_t>(builtin->num_squeeze_dims));
-    };
-
-    // Handle optional input tensors.
-    auto add_optional_tensors = [&nn_model, &augmented_inputs,
-                                 &next_id](int nn_type) {
-      for (size_t idx = 0; idx < augmented_inputs.size(); idx++) {
-        // Fix to use strict build option
-        if (augmented_inputs[idx] == static_cast<uint32_t>(kOptionalTensor)) {
-          const std::vector<uint32_t> dim = {0, 0};
-          ANeuralNetworksOperandType operand_type{nn_type, 2, dim.data(), 0, 0};
-          CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-          CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id,
-                                                        nullptr, 0))
-          augmented_inputs[idx] = next_id++;
-        }
-      }
-    };
-
-    int nnapi_version = 10;
-#include "nnapi_delegate_ex_AddOpsAndParams_lambda.inc"
-
-    // Fix to use strict build option
-    ANeuralNetworksOperationType nn_op_type = -1;
-
-    // Using namespace directive to minimize diff with upstream tensorflow
-    namespace tflite = ::tflite;
-
-    switch (builtin) {
-      case tflite::BuiltinOperator_ADD:
-        nn_op_type = ANEURALNETWORKS_ADD;
-        RETURN_ERROR_IF_TFLITE_FAILED(add_add_params(node.builtin_data));
-        break;
-      case tflite::BuiltinOperator_MUL:
-        nn_op_type = ANEURALNETWORKS_MUL;
-        RETURN_ERROR_IF_TFLITE_FAILED(add_add_params(node.builtin_data));
-        break;
-      case tflite::BuiltinOperator_AVERAGE_POOL_2D:
-        RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
-        nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
-        break;
-      case tflite::BuiltinOperator_MAX_POOL_2D:
-        RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
-        nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
-        break;
-      case tflite::BuiltinOperator_L2_POOL_2D:
-        RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
-        nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
-        break;
-      case tflite::BuiltinOperator_CONV_2D: {
-        auto builtin = reinterpret_cast<TfLiteConvParams*>(node.builtin_data);
-        if (builtin->dilation_width_factor != 1 ||
-            builtin->dilation_height_factor != 1 || node.inputs->size != 3) {
-          logError("NNAPI does not support dilated Conv2D.");
-          return kTfLiteError;
-        }
-      }
-        RETURN_ERROR_IF_TFLITE_FAILED(
-            add_convolution_params(node.builtin_data));
-        nn_op_type = ANEURALNETWORKS_CONV_2D;
-        break;
-      case tflite::BuiltinOperator_RELU:
-        nn_op_type = ANEURALNETWORKS_RELU;
-        break;
-      case tflite::BuiltinOperator_RELU_N1_TO_1:
-        nn_op_type = ANEURALNETWORKS_RELU1;
-        break;
-      case tflite::BuiltinOperator_RELU6:
-        nn_op_type = ANEURALNETWORKS_RELU6;
-        break;
-      case tflite::BuiltinOperator_TANH:
-        nn_op_type = ANEURALNETWORKS_TANH;
-        break;
-      case tflite::BuiltinOperator_FLOOR:
-        nn_op_type = ANEURALNETWORKS_FLOOR;
-        break;
-      case tflite::BuiltinOperator_LOGISTIC:
-        nn_op_type = ANEURALNETWORKS_LOGISTIC;
-        break;
-      case tflite::BuiltinOperator_DEPTHWISE_CONV_2D:
-        RETURN_ERROR_IF_TFLITE_FAILED(
-            add_depthwise_conv_params(node.builtin_data));
-        nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
-        break;
-      case tflite::BuiltinOperator_CONCATENATION:
-        RETURN_ERROR_IF_TFLITE_FAILED(
-            add_concatenation_params(node.builtin_data));
-        nn_op_type = ANEURALNETWORKS_CONCATENATION;
-        break;
-      case tflite::BuiltinOperator_SOFTMAX:
-        add_softmax_params(node.builtin_data);
-        nn_op_type = ANEURALNETWORKS_SOFTMAX;
-        break;
-      case tflite::BuiltinOperator_FULLY_CONNECTED:
-        RETURN_ERROR_IF_TFLITE_FAILED(
-            add_fully_connected_params(node.builtin_data));
-        nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
-        break;
-      case tflite::BuiltinOperator_RESHAPE:
-        if (node.inputs->size != 2) {
-          logError("NNAPI only supports 2-input RESHAPE");
-          return kTfLiteError;
-        }
-        nn_op_type = ANEURALNETWORKS_RESHAPE;
-        // add_reshape_params(node.builtin_data);
-        break;
-      case tflite::BuiltinOperator_RESIZE_BILINEAR:
-        add_resize_bilinear_params(node.builtin_data);
-        nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
-        break;
-      case tflite::BuiltinOperator_SPACE_TO_DEPTH:
-        add_space_to_depth_params(node.builtin_data);
-        nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
-        break;
-      case tflite::BuiltinOperator_LSTM: {
-        if (node.inputs->size + /* no of params */ 3 != 21) {
-          logError("NNAPI only supports 21-input LSTMs");
-          return kTfLiteError;
-        }
-        duplicate_state_tensor_float32(
-            node.outputs->data[/*kOutputStateTensor*/ 0]);
-        duplicate_state_tensor_float32(
-            node.outputs->data[/*kCellStateTensor*/ 1]);
-        add_lstm_params(node.builtin_data);
-        add_lstm_scratch_tensor_float32();
-        add_optional_tensors(ANEURALNETWORKS_TENSOR_FLOAT32);
-        nn_op_type = ANEURALNETWORKS_LSTM;
-        break;
-      }
-      case tflite::BuiltinOperator_DEQUANTIZE:
-        nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
-        break;
-      case tflite::BuiltinOperator_SVDF: {
-        duplicate_state_tensor_float32(node.outputs->data[/*kStateTensor*/ 0]);
-        add_svdf_params(node.builtin_data);
-        nn_op_type = ANEURALNETWORKS_SVDF;
-        break;
-      }
-      case tflite::BuiltinOperator_RNN: {
-        duplicate_state_tensor_float32(
-            node.outputs->data[/*kHiddenStateTensor*/ 0]);
-        add_rnn_params(node.builtin_data);
-        nn_op_type = ANEURALNETWORKS_RNN;
-        break;
-      }
-      case tflite::BuiltinOperator_EMBEDDING_LOOKUP:
-        nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
-        break;
-      case tflite::BuiltinOperator_PAD:
-        nnapi_version = 11;  // require NNAPI 1.1
-        nn_op_type = ANEURALNETWORKS_PAD;
-        break;
-      case tflite::BuiltinOperator_MEAN:
-        nnapi_version = 11;  // require NNAPI 1.1
-        add_mean_params(node.builtin_data);
-        nn_op_type = ANEURALNETWORKS_MEAN;
-        break;
-      case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
-        nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
-        add_lrn_params(node.builtin_data);
-        break;
-      case tflite::BuiltinOperator_DIV:
-        nnapi_version = 11;  // require NNAPI 1.1
-        nn_op_type = ANEURALNETWORKS_DIV;
-        RETURN_ERROR_IF_TFLITE_FAILED(check_and_add_activation(
-            reinterpret_cast<TfLiteDivParams*>(node.builtin_data)->activation));
-        break;
-      case tflite::BuiltinOperator_SUB:
-        nnapi_version = 11;  // require NNAPI 1.1
-        nn_op_type = ANEURALNETWORKS_SUB;
-        RETURN_ERROR_IF_TFLITE_FAILED(check_and_add_activation(
-            reinterpret_cast<TfLiteSubParams*>(node.builtin_data)->activation));
-        break;
-      case tflite::BuiltinOperator_SQUEEZE:
-        nnapi_version = 11;  // requires NNAPI 1.1
-        add_squeeze_params(node.builtin_data);
-        nn_op_type = ANEURALNETWORKS_SQUEEZE;
-        break;
-      case tflite::BuiltinOperator_TRANSPOSE:
-        // The permutation input tensor value dictates the output dimensions.
-        // TODO(b/110888333): Support dynamically-sized tensors in delegates.
-        if ((node.inputs->size > 1) &&
-            (subgraph->tensor(node.inputs->data[1])->allocation_type !=
-             kTfLiteMmapRo)) {
-          logError("NNAPI does not yet support dynamic tensors.");
-          return kTfLiteError;
-        }
-        nnapi_version = 11;  // require NNAPI 1.1
-        nn_op_type = ANEURALNETWORKS_TRANSPOSE;
-        break;
-      case tflite::BuiltinOperator_L2_NORMALIZATION:
-        nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
-        if (reinterpret_cast<TfLiteL2NormParams*>(node.builtin_data)
-                ->activation != kTfLiteActNone) {
-          logError(
-              "NNAPI does not support L2Normalization with fused activations");
-          return kTfLiteError;
-        }
-        if ((node.inputs->size > 0) &&
-            (subgraph->tensor(node.inputs->data[0])->dims->size != 4)) {
-          logError("NNAPI only supports input rank 4 for L2Normalization");
-          return kTfLiteError;
-        }
-        break;
-      case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
-        if (subgraph->tensor(node.outputs->data[0])->type != kTfLiteFloat32) {
-          logError("NNAPI only support HASHTABLE_LOOKUP with float32 output",
-                   builtin);
-          return kTfLiteError;
-        }
-        nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
-        break;
-      case tflite::BuiltinOperator_SLICE:
-        nn_op_type = ANEURALNETWORKS_SLICE;
-        break;
-      case tflite::BuiltinOperator_STRIDED_SLICE:
-        add_strided_slice_params(node.builtin_data);
-        nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
-        break;
-      case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
-        nnapi_version = 11;  // require NNAPI 1.1
-        nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
-        break;
-      case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
-        nnapi_version = 11;  // require NNAPI 1.1
-        nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
-        check_batch_to_space_params();
-        break;
-      case tflite::BuiltinOperator_CAST:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_CAST;
-        break;
-      case tflite::BuiltinOperator_TOPK_V2:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_TOPK_V2;
-        break;
-      case tflite::BuiltinOperator_GREATER:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_GREATER;
-        break;
-      case tflite::BuiltinOperator_GREATER_EQUAL:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
-        break;
-      case tflite::BuiltinOperator_LESS:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_LESS;
-        break;
-      case tflite::BuiltinOperator_LESS_EQUAL:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
-        break;
-      case tflite::BuiltinOperator_GATHER:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_GATHER;
-        add_gather_params(node.builtin_data);
-        break;
-      case tflite::BuiltinOperator_SPLIT:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_SPLIT;
-        add_split_params(node.builtin_data);
-        break;
-      case tflite::BuiltinOperator_NEG:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_NEG;
-        break;
-      case tflite::BuiltinOperator_EXP:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_EXP;
-        break;
-      case tflite::BuiltinOperator_TRANSPOSE_CONV:
-        add_transpose_conv_params(node.builtin_data);
-        CHECK_NN(ANeuralNetworksModel_addOperationEx(
-            nn_model, ANEURALNETWORKS_TRANSPOSE_CONV_EX,
-            static_cast<uint32_t>(augmented_inputs.size()),
-            augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
-            reinterpret_cast<uint32_t*>(node.outputs->data)));
-        continue;
-      case tflite::BuiltinOperator_PRELU:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_PRELU;
-        break;
-      case tflite::BuiltinOperator_ARG_MAX:
-        check_arg_max_input(node.builtin_data);
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_ARGMAX;
-        break;
-      case tflite::BuiltinOperator_PACK:
-        add_pack_ex_params(node.builtin_data);
-        CHECK_NN(ANeuralNetworksModel_addOperationEx(
-            nn_model, ANEURALNETWORKS_PACK_EX,
-            static_cast<uint32_t>(augmented_inputs.size()),
-            augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
-            reinterpret_cast<uint32_t*>(node.outputs->data)));
-        continue;
-      case tflite::BuiltinOperator_UNPACK:
-        add_unpack_ex_params(node.builtin_data);
-        CHECK_NN(ANeuralNetworksModel_addOperationEx(
-            nn_model, ANEURALNETWORKS_UNPACK_EX,
-            static_cast<uint32_t>(augmented_inputs.size()),
-            augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
-            reinterpret_cast<uint32_t*>(node.outputs->data)));
-        continue;
-      case tflite::BuiltinOperator_SQRT:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_SQRT;
-        break;
-      case tflite::BuiltinOperator_RSQRT:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_RSQRT;
-        break;
-      case tflite::BuiltinOperator_EQUAL:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_EQUAL;
-        break;
-      case tflite::BuiltinOperator_NOT_EQUAL:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
-        break;
-      case tflite::BuiltinOperator_SUM:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
-        add_reducer_params(node.builtin_data);
-        break;
-      case tflite::BuiltinOperator_REDUCE_ANY:
-        add_reducer_params(node.builtin_data);
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_REDUCE_ANY;
-        break;
-      case tflite::BuiltinOperator_REDUCE_MAX:
-        add_reducer_params(node.builtin_data);
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
-        break;
-      case tflite::BuiltinOperator_REDUCE_MIN:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
-        add_reducer_params(node.builtin_data);
-        break;
-      case tflite::BuiltinOperator_LOG:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_LOG;
-        break;
-      case tflite::BuiltinOperator_LOGICAL_AND:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
-        break;
-      case tflite::BuiltinOperator_LOGICAL_OR:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
-        break;
-      case tflite::BuiltinOperator_LOGICAL_NOT:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
-        break;
-      case tflite::BuiltinOperator_SQUARED_DIFFERENCE:
-        CHECK_NN(ANeuralNetworksModel_addOperationEx(
-            nn_model, ANEURALNETWORKS_SQUARED_DIFFERENCE_EX,
-            static_cast<uint32_t>(augmented_inputs.size()),
-            augmented_inputs.data(),
-            static_cast<uint32_t>(node.outputs->size),
-            reinterpret_cast<uint32_t*>(node.outputs->data)));
-        continue;
-      case tflite::BuiltinOperator_MAXIMUM:
-        nn_op_type = ANEURALNETWORKS_MAXIMUM;
-        break;
-      case tflite::BuiltinOperator_MINIMUM:
-        nn_op_type = ANEURALNETWORKS_MINIMUM;
-        break;
-      case tflite::BuiltinOperator_ABS:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_ABS;
-        break;
-      case tflite::BuiltinOperator_ONE_HOT:
-        add_one_hot_params(node.builtin_data);
-        CHECK_NN(ANeuralNetworksModel_addOperationEx(
-            nn_model, ANEURALNETWORKS_ONE_HOT_EX,
-            static_cast<uint32_t>(augmented_inputs.size()),
-            augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
-            reinterpret_cast<uint32_t*>(node.outputs->data)));
-        continue; // _EX operator should use `continue` to skip addOperanation.
-      case tflite::BuiltinOperator_SIN:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_SIN;
-        break;
-      case tflite::BuiltinOperator_SHAPE:
-        CHECK_NN(ANeuralNetworksModel_addOperationEx(
-            nn_model, ANEURALNETWORKS_SHAPE_EX,
-            static_cast<uint32_t>(augmented_inputs.size()),
-            augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
-            reinterpret_cast<uint32_t*>(node.outputs->data)));
-        continue; // _EX operator should use `continue` to skip addOperanation.
-      case tflite::BuiltinOperator_REDUCE_PROD:
-        add_reducer_params(node.builtin_data);
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_REDUCE_PROD;
-        break;
-      case tflite::BuiltinOperator_EXPAND_DIMS:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_EXPAND_DIMS;
-        break;
-      case tflite::BuiltinOperator_POW:
-        if (!(subgraph->tensor(node.inputs->data[0])->type == kTfLiteFloat32 &&
-            subgraph->tensor(node.inputs->data[1])->type == kTfLiteFloat32)) {
-          logError("NNAPI delegate for Pow supports only float32.", builtin);
-          return kTfLiteError;
-        }
-        nn_op_type = ANEURALNETWORKS_POW;
-        break;
-      case tflite::BuiltinOperator_SELECT:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_SELECT;
-        break;
-      case tflite::BuiltinOperator_ZEROS_LIKE:
-        CHECK_NN(ANeuralNetworksModel_addOperationEx(
-            nn_model, ANEURALNETWORKS_ZEROS_LIKE_EX,
-            static_cast<uint32_t>(augmented_inputs.size()),
-            augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
-            reinterpret_cast<uint32_t*>(node.outputs->data)));
-        continue; // _EX operator should use `continue` to skip addOperanation.
-      case tflite::BuiltinOperator_TILE:
-        nnapi_version = 12;  // require NNAPI 1.2
-        nn_op_type = ANEURALNETWORKS_TILE;
-        break;
-      case tflite::BuiltinOperator_CONCAT_EMBEDDINGS:
-      case tflite::BuiltinOperator_LSH_PROJECTION:
-      case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN:
-      case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN:
-      case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE:
-      case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM:
-      case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
-      //case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
-      case tflite::BuiltinOperator_PADV2:
-      //case tflite::BuiltinOperator_RESIZE_BILINEAR:
-      case tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
-      case tflite::BuiltinOperator_CALL:
-      case tflite::BuiltinOperator_SKIP_GRAM:
-      //case tflite::BuiltinOperator_RELU_N1_TO_1:
-      //case tflite::BuiltinOperator_GATHER:
-      //case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
-      //case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
-      //case tflite::BuiltinOperator_TOPK_V2:
-      //case tflite::BuiltinOperator_SPLIT:
-      //case tflite::BuiltinOperator_STRIDED_SLICE:
-      //case tflite::BuiltinOperator_EXP:
-      case tflite::BuiltinOperator_LOG_SOFTMAX:
-      //case tflite::BuiltinOperator_DEQUANTIZE:
-      case tflite::BuiltinOperator_DELEGATE:
-      //case tflite::BuiltinOperator_CAST:
-      //case tflite::BuiltinOperator_PRELU:
-      //case tflite::BuiltinOperator_MAXIMUM:
-      //case tflite::BuiltinOperator_MINIMUM:
-      //case tflite::BuiltinOperator_ARG_MAX:
-      case tflite::BuiltinOperator_ARG_MIN:
-      //case tflite::BuiltinOperator_GREATER:
-      //case tflite::BuiltinOperator_GREATER_EQUAL:
-      //case tflite::BuiltinOperator_LESS:
-      //case tflite::BuiltinOperator_LESS_EQUAL:
-      //case tflite::BuiltinOperator_NEG:
-      //case tflite::BuiltinOperator_SELECT:
-      // case tflite::BuiltinOperator_SLICE:
-      //case tflite::BuiltinOperator_SIN:
-      //case tflite::BuiltinOperator_LOG:
-      //case tflite::BuiltinOperator_TRANSPOSE_CONV:
-      //case tflite::BuiltinOperator_TILE:
-      //case tflite::BuiltinOperator_EXPAND_DIMS:
-      case tflite::BuiltinOperator_SPARSE_TO_DENSE:
-      //case tflite::BuiltinOperator_EQUAL:
-      //case tflite::BuiltinOperator_NOT_EQUAL:
-      //case tflite::BuiltinOperator_SUM:
-      //case tflite::BuiltinOperator_REDUCE_MAX:
-      //case tflite::BuiltinOperator_REDUCE_MIN:
-      //case tflite::BuiltinOperator_REDUCE_PROD:
-      //case tflite::BuiltinOperator_SQRT:
-      //case tflite::BuiltinOperator_RSQRT:
-      //case tflite::BuiltinOperator_SHAPE:
-      //case tflite::BuiltinOperator_POW:
-      case tflite::BuiltinOperator_FAKE_QUANT:
-      //case tflite::BuiltinOperator_PACK:
-      //case tflite::BuiltinOperator_LOGICAL_OR:
-      //case tflite::BuiltinOperator_ONE_HOT:
-      //case tflite::BuiltinOperator_LOGICAL_AND:
-      //case tflite::BuiltinOperator_LOGICAL_NOT:
-      //case tflite::BuiltinOperator_UNPACK:
-      case tflite::BuiltinOperator_FLOOR_DIV:
-      //case tflite::BuiltinOperator_REDUCE_ANY:
-      case tflite::BuiltinOperator_SQUARE:
-      //case tflite::BuiltinOperator_ZEROS_LIKE:
-      case tflite::BuiltinOperator_FILL:
-      case tflite::BuiltinOperator_FLOOR_MOD:
-      case tflite::BuiltinOperator_RANGE:
-      case tflite::BuiltinOperator_LEAKY_RELU:
-      //case tflite::BuiltinOperator_SQUARED_DIFFERENCE:
-      case tflite::BuiltinOperator_MIRROR_PAD:
-      //case tflite::BuiltinOperator_ABS:
-      case tflite::BuiltinOperator_SPLIT_V:
-        logError("Op code %d is currently not delegated to NNAPI", builtin);
-        return kTfLiteError;
-        break;
-      case tflite::BuiltinOperator_CUSTOM: {
-        std::string custom_name(registration.custom_name);
-        if (custom_name.compare("SquaredDifference") == 0) {
-          CHECK_NN(ANeuralNetworksModel_addOperationEx(
-              nn_model, ANEURALNETWORKS_SQUARED_DIFFERENCE_EX,
-              static_cast<uint32_t>(augmented_inputs.size()),
-              augmented_inputs.data(),
-              static_cast<uint32_t>(node.outputs->size),
-              reinterpret_cast<uint32_t*>(node.outputs->data)));
-          continue;
-        }
-        else if (custom_name.compare("MatrixBandPart") == 0) {
-          CHECK_NN(ANeuralNetworksModel_addOperationEx(
-              nn_model, ANEURALNETWORKS_MATRIX_BAND_PART_EX,
-              static_cast<uint32_t>(augmented_inputs.size()),
-              augmented_inputs.data(),
-              static_cast<uint32_t>(node.outputs->size),
-              reinterpret_cast<uint32_t*>(node.outputs->data)));
-          continue;
-        }
-        logError("Custom operations are not supported when using NNAPI.");
-        return kTfLiteError;
-        break;
-      }
-      default:
-        // Fix to use strict build option
-        logError("Op code %d is currently not delegated to NNAPI", builtin);
-        return kTfLiteError;
-        break;
-    }
-
-    if (nnapi_version == 11 && GetAndroidSdkVersionCached() < 28) {
-      //logError("Op %d needs NNAPI1.1", builtin);
-      //return kTfLiteError;
-    }
-
-    // Add the operation.
-    RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_addOperation(
-        nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()),
-        augmented_inputs.data(),
-        static_cast<uint32_t>(augmented_outputs.size()),
-        reinterpret_cast<uint32_t*>(augmented_outputs.data())));
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus NNAPIDelegate::BuildGraph(::tflite::Subgraph* subgraph) {
-  if (nn_model_ && nn_compiled_model_) return model_status_;
-
-  // TODO(aselle): This is not correct. need to handle resize invalidation.
-  if (!nn_model_) {
-    CHECK_NN(ANeuralNetworksModel_create(&nn_model_));
-
-    // Find which tensors should be added to NNAPI. TFLite has temporaries
-    // and RNN back-edges which are are not valid for NNAPI. We look through all
-    // inputs and outputs and mark the mapping in tensor_id_to_nnapi_id with
-    // kOperandIdNotSet. addTensorOperands will replace those with the
-    // corresponding NNAPI operand ids and skip kOperandNotNeeded entries.
-    std::vector<int64_t> tensor_id_to_nnapi_id(subgraph->tensors_size(),
-                                               kOperandNotNeeded);
-    // Fix to use strict build option
-    auto set_ids_to_not_set = [&tensor_id_to_nnapi_id](const int* buf,
-                                                       int count) {
-      for (int j = 0; j < count; j++) {
-        auto tensor_id = buf[j];
-        if (tensor_id != kOptionalTensor) {
-          tensor_id_to_nnapi_id[tensor_id] = kOperandIdNotSet;
-        }
-      }
-    };
-    for (size_t i = 0; i < subgraph->nodes_size(); i++) {
-      const auto* node_and_registration = subgraph->node_and_registration(i);
-      const TfLiteNode& node = node_and_registration->first;
-      set_ids_to_not_set(node.inputs->data, node.inputs->size);
-      set_ids_to_not_set(node.outputs->data, node.outputs->size);
-    }
-    set_ids_to_not_set(subgraph->inputs().data(), subgraph->inputs().size());
-    set_ids_to_not_set(subgraph->outputs().data(), subgraph->outputs().size());
-
-    uint32_t next_id = 0;
-    RETURN_ERROR_IF_TFLITE_FAILED(addTensorOperands(
-        subgraph, nn_model_, &next_id, &tensor_id_to_nnapi_id));
-    RETURN_ERROR_IF_TFLITE_FAILED(
-        AddOpsAndParams(subgraph, nn_model_, next_id, &model_states_inputs_,
-                        &model_states_outputs_, tensor_id_to_nnapi_id));
-
-    std::vector<uint32_t> augmented_inputs;
-    MapAndAddTensorIds(subgraph->inputs().data(), subgraph->inputs().size(),
-                       &augmented_inputs, tensor_id_to_nnapi_id);
-    augmented_inputs.insert(augmented_inputs.end(),
-                            model_states_inputs_.begin(),
-                            model_states_inputs_.end());
-    std::vector<uint32_t> augmented_outputs;
-    MapAndAddTensorIds(subgraph->outputs().data(), subgraph->outputs().size(),
-                       &augmented_outputs, tensor_id_to_nnapi_id);
-    MapAndAddTensorIds(model_states_outputs_.data(),
-                       model_states_outputs_.size(), &augmented_outputs,
-                       tensor_id_to_nnapi_id);
-
-    CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs(
-        nn_model_, static_cast<uint32_t>(augmented_inputs.size()),
-        reinterpret_cast<const uint32_t*>(augmented_inputs.data()),
-        static_cast<uint32_t>(augmented_outputs.size()),
-        reinterpret_cast<const uint32_t*>(augmented_outputs.data())));
-
-    // TODO Support ANeuralNetworksModel_relaxComputationFloat32toFloat16
-    /*if (GetAndroidSdkVersionCached() >= 28) {
-      CHECK_NN(ANeuralNetworksModel_relaxComputationFloat32toFloat16(
-          nn_model_, subgraph->GetAllowFp16PrecisionForFp32()));
-    }*/
-    CHECK_NN(ANeuralNetworksModel_finish(nn_model_));
-  }
-  if (!nn_compiled_model_) {
-    CHECK_NN(ANeuralNetworksCompilation_create(nn_model_, &nn_compiled_model_));
-    CHECK_NN(ANeuralNetworksCompilation_finish(nn_compiled_model_));
-  }
-  return kTfLiteOk;
-}
-
-// Use unordered_map for temporary buffer
-#include <unordered_map>
-
-TfLiteStatus NNAPIDelegate::Invoke(::tflite::Subgraph* subgraph) {
-  if (!nn_model_) {
-    model_status_ = BuildGraph(subgraph);
-    if (model_status_ != kTfLiteOk) {
-      logError("Failed to build graph for NNAPI");
-    }
-  }
-  if (model_status_ != kTfLiteOk) {
-    return model_status_;
-  }
-
-  ANeuralNetworksExecution* execution = nullptr;
-  CHECK_NN(ANeuralNetworksExecution_create(nn_compiled_model_, &execution));
-
-  // Allocate temporary buffer to save casted boolean tensor
-  std::unordered_map<size_t, uint8_t*> input_boolean_tensors;
-  std::unordered_map<size_t, uint8_t*> output_boolean_tensors;
-  for (size_t i = 0; i < subgraph->inputs().size(); i++)
-  {
-    int input = subgraph->inputs()[i];
-    TfLiteTensor* tensor = subgraph->tensor(input);
-    if (tensor->type == kTfLiteBool)
-    {
-      size_t elements = tensor->bytes / sizeof(bool);
-      uint8_t* temp_tensor = new uint8_t[tensor->bytes / sizeof(bool)];
-      input_boolean_tensors[i] = temp_tensor;
-      for (size_t idx = 0; idx < elements; idx++)
-      {
-        temp_tensor[idx] = (tensor->data.b[idx] ? 0x00 : 0xff);
-      }
-    }
-  }
-  for (size_t i = 0; i < subgraph->outputs().size(); i++)
-  {
-    int output = subgraph->outputs()[i];
-    TfLiteTensor* tensor = subgraph->tensor(output);
-    if (tensor->type == kTfLiteBool)
-    {
-      uint8_t* temp_tensor = new uint8_t[tensor->bytes / sizeof(bool)];
-      output_boolean_tensors[i] = temp_tensor;
-    }
-  }
-
-  // Currently perform deep copy of input buffer
-  for (size_t i = 0; i < subgraph->inputs().size(); i++) {
-    int input = subgraph->inputs()[i];
-    // TODO(aselle): Is this what we want or do we want input instead?
-    // TODO(aselle): This should be called setInputValue maybe to be cons.
-    TfLiteTensor* tensor = subgraph->tensor(input);
-    // Workaround to pass bool type under NNAPI
-    // ANEURALNETWORKS_TENSOR_BOOL8 tensor element size is 8 bits
-    if (tensor->type == kTfLiteBool)
-    {
-      CHECK_NN(ANeuralNetworksExecution_setInput(
-          execution, i, nullptr, input_boolean_tensors[i], tensor->bytes * sizeof(uint8_t) / sizeof(bool)));
-    }
-    else
-    {
-      CHECK_NN(ANeuralNetworksExecution_setInput(
-          execution, i, nullptr, tensor->data.raw, tensor->bytes));
-    }
-  }
-
-  // Tell nn api where to place final data.
-  for (size_t i = 0; i < subgraph->outputs().size(); i++) {
-    int output = subgraph->outputs()[i];
-    TfLiteTensor* tensor = subgraph->tensor(output);
-
-    // Workaround to pass bool type under NNAPI
-    // ANEURALNETWORKS_TENSOR_BOOL8 tensor element size is 8 bits
-    if (tensor->type == kTfLiteBool)
-    {
-      CHECK_NN(ANeuralNetworksExecution_setOutput(
-          execution, i, nullptr, output_boolean_tensors[i], tensor->bytes * sizeof(uint8_t) / sizeof(bool)));
-    }
-    else
-    {
-      CHECK_NN(ANeuralNetworksExecution_setOutput(
-          execution, i, nullptr, tensor->data.raw, tensor->bytes));
-    }
-  }
-
-  // The state_out of previous invocation need to be mapped to state_in of
-  // current invocation.
-  for (size_t i = 0; i < model_states_outputs_.size(); i++) {
-    int state_tensor_idx = model_states_outputs_[i];
-    TfLiteTensor* tensor = subgraph->tensor(state_tensor_idx);
-    // Here we are using a deep copy for state_in tensors so that we are not
-    // reading and writing into the same buffer during a invocation.
-    // TODO(miaowang): using double shared buffer to minimize the copies.
-    CHECK_NN(ANeuralNetworksExecution_setInput(
-        execution, i + subgraph->inputs().size(), nullptr, tensor->data.raw,
-        tensor->bytes));
-    // Tell NNAPI where to output the state_out.
-    CHECK_NN(ANeuralNetworksExecution_setOutput(
-        execution, i + subgraph->outputs().size(), nullptr, tensor->data.raw,
-        tensor->bytes));
-  }
-
-  // Currently use blocking compute.
-  ANeuralNetworksEvent* event = nullptr;
-  CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event));
-  CHECK_NN(ANeuralNetworksEvent_wait(event));
-  ANeuralNetworksEvent_free(event);
-  ANeuralNetworksExecution_free(execution);
-
-  // Tell nn api where to place final data.
-  for (size_t i = 0; i < subgraph->inputs().size(); i++) {
-    int input = subgraph->inputs()[i];
-    TfLiteTensor* tensor = subgraph->tensor(input);
-
-    if (tensor->type == kTfLiteBool)
-    {
-      uint8_t* temp_tensor = input_boolean_tensors[i];
-      input_boolean_tensors[i] = nullptr;
-      delete temp_tensor;
-    }
-  }
-  for (size_t i = 0; i < subgraph->outputs().size(); i++) {
-    int output = subgraph->outputs()[i];
-    TfLiteTensor* tensor = subgraph->tensor(output);
-
-    if (tensor->type == kTfLiteBool)
-    {
-      uint8_t* temp_tensor = output_boolean_tensors[i];
-      size_t elements = tensor->bytes / sizeof(bool);
-      for (size_t idx = 0; idx < elements; idx++)
-      {
-        tensor->data.b[idx] = ((temp_tensor[idx] == 0x00) ? false : true);
-      }
-      output_boolean_tensors[i] = nullptr;
-      delete temp_tensor;
-    }
-  }
-
-#if 0
-  printf("From the NN API:\n");
-  TfLiteTensor* tensor = subgraph->tensor(subgraph->outputs()[0]);
-  if (float* data =
-          subgraph->typed_tensor<float>(subgraph->outputs()[0])) {
-    size_t num = tensor->bytes / sizeof(float);
-    for (float* p = data; p < data + num; p++) {
-      printf(" %f", *p);
-    }
-    printf("\n");
-  }
-#endif
-
-  return kTfLiteOk;
-}
-
-bool NNAPIDelegate::IsSupported() { return nnfw::NNAPIExists(); }
-
-} // namespace tflite
-} // namespace nnfw
-
-// clang-format on
diff --git a/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate_ex_AddOpsAndParams_lambda.inc b/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate_ex_AddOpsAndParams_lambda.inc
deleted file mode 100644
index 39355b106..000000000
--- a/runtime/libs/tflite/port/1.13.1/src/nnapi_delegate_ex_AddOpsAndParams_lambda.inc
+++ /dev/null
@@ -1,153 +0,0 @@
-// This file is included from AddOpsAndParams defined in nnapi_delegate.cc
-// and contains lambda for extened implementation to original Tensorflow Lite.
-    auto add_scalar_bool8 = [&nn_model, &augmented_inputs,
-                             &next_id](bool value) {
-      // Fix to use strict build option
-      int8_t casted_value = (value ? 1 : 0);
-      ANeuralNetworksOperandType operand_type{}; operand_type.type = ANEURALNETWORKS_BOOL;
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-      CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &casted_value,
-                                                    sizeof(int8_t)))
-      augmented_inputs.push_back(next_id++);
-    };
-
-    auto add_resize_bilinear_params = [&add_scalar_int32, &subgraph, &augmented_inputs](void* data) {
-      auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(data);
-      if  (builtin->align_corners) {
-        FATAL("Resize bilinear does not support align corners in NNAPI");
-      }
-
-      TfLiteTensor* tensor = subgraph->tensor(augmented_inputs.back());
-      assert(tensor->type == kTfLiteInt32);
-      assert(tensor->bytes == sizeof(int)*2);
-      augmented_inputs.pop_back();
-
-      int height = ((int*)(tensor->data.raw))[1];
-      int width = ((int*)(tensor->data.raw))[0];
-      add_scalar_int32(height);
-      add_scalar_int32(width);
-    };
-
-    auto add_transpose_conv_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(data);
-      add_scalar_int32(builtin->padding);
-      add_scalar_int32(builtin->stride_width);
-      add_scalar_int32(builtin->stride_height);
-    };
-
-    auto add_lrn_params = [&add_scalar_int32,
-                            &add_scalar_float32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(data);
-      add_scalar_int32(builtin->radius);
-      add_scalar_float32(builtin->bias);
-      add_scalar_float32(builtin->alpha);
-      add_scalar_float32(builtin->beta);
-    };
-
-    auto add_strided_slice_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(data);
-      add_scalar_int32(builtin->begin_mask);
-      add_scalar_int32(builtin->end_mask);
-      // ellipsis_mask and new_axis_mask are not supported on nn runtime
-      // cf) tflite interpreter supports both operations
-      if (builtin->ellipsis_mask) {
-        FATAL("STRIDE_SLICE does not support ellipsis_mask in NNAPI");
-      }
-      if (builtin->new_axis_mask) {
-        FATAL("STRIDE_SLICE does not support new_axis_mask in NNAPI");
-      }
-      add_scalar_int32(builtin->shrink_axis_mask);
-    };
-
-    auto add_gather_params = [&add_scalar_int32, &augmented_inputs](void* data) {
-      auto builtin = reinterpret_cast<TfLiteGatherParams*>(data);
-      if (builtin->axis != 0) {
-        FATAL("GATHER does not support axis>0 in NNAPI");
-      }
-
-      auto indices_index = augmented_inputs.back();
-      augmented_inputs.pop_back();
-      add_scalar_int32(builtin->axis);
-      augmented_inputs.push_back(indices_index);
-    };
-
-    auto add_pack_ex_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLitePackParams*>(data);
-      add_scalar_int32(builtin->values_count);
-      add_scalar_int32(builtin->axis);
-    };
-
-    auto add_unpack_ex_params = [&add_scalar_int32](void* data) {
-      auto builtin = reinterpret_cast<TfLiteUnpackParams*>(data);
-      add_scalar_int32(builtin->num);
-      add_scalar_int32(builtin->axis);
-    };
-
-    auto check_batch_to_space_params = [subgraph, &node, &augmented_inputs]() {
-
-    //If there are 3 inputs, check if crops is having default values {0, 0, 0, 0}
-    //Else unsupported by NNAPI
-
-      if(augmented_inputs.size() == 3)
-      {
-        const uint32_t crops_buffer_index = node.inputs->data[2];
-        const TfLiteTensor* crops = subgraph->tensor(crops_buffer_index);
-        const int *crops_value = crops->data.i32;
-
-        //Check if crops is having default values {0, 0, 0, 0}
-        if(crops_value[0] != 0 || crops_value[1] != 0 || crops_value[2] != 0 || crops_value[3] != 0)
-        {
-          FATAL("BATCH_TO_SPACE_ND does not support Explicit crops in NNAPI");
-        }
-        else
-        {
-          //Restrict crops input and pass only other two inputs
-          augmented_inputs.pop_back();
-        }
-      }
-    };
-
-    auto add_split_params = [&add_scalar_int32, &augmented_inputs](void* data) {
-      // swap 1st and 2nd operand order
-      auto input_tensor = augmented_inputs[1];
-      auto axis = augmented_inputs[0];
-      augmented_inputs[0] = input_tensor;
-      augmented_inputs[1] = axis;
-
-      auto builtin = reinterpret_cast<TfLiteSplitParams*>(data);
-      add_scalar_int32(builtin->num_splits);
-    };
-
-    auto check_arg_max_input = [&subgraph, &augmented_inputs](void *data) {
-      auto params = reinterpret_cast<TfLiteArgMaxParams*>(data);
-      if (params->output_type != kTfLiteInt32)
-      {
-        FATAL("Cannot handle output type in NNAPI");
-      }
-
-      TfLiteTensor* axis_tensor = subgraph->tensor(augmented_inputs.back());
-      assert(axis_tensor->type == kTfLiteInt32);
-
-      int64_t count = 1;
-      for (int i = 0; i < axis_tensor->dims->size; ++i) {
-        count *= axis_tensor->dims->data[i];
-      }
-      assert(count == 1);
-    };
-
-    auto add_reducer_params = [&add_scalar_bool8](void* data) {
-      auto builtin = reinterpret_cast<TfLiteReducerParams*>(data);
-      if (builtin == nullptr)
-      {
-        add_scalar_bool8(0);
-      }
-      else
-      {
-        add_scalar_bool8(builtin->keep_dims);
-      }
-    };
-
-    auto add_one_hot_params = [&add_scalar_int32](void* data) {
-      const auto* builtin = reinterpret_cast<TfLiteOneHotParams*>(data);
-      add_scalar_int32(builtin->axis);
-    };
diff --git a/runtime/libs/tflite/port/CMakeLists.txt b/runtime/libs/tflite/port/CMakeLists.txt
deleted file mode 100644
index 82c83f722..000000000
--- a/runtime/libs/tflite/port/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-# We may need to support multiple tensorflow version
-#   Example)
-#     For ubuntu: tensorflow lite v1.13.1
-#     For tizen: tensorflow lite v1.9
-set(SUPPORT_TFLITE_VERSION "1.13.1" CACHE STRING "Supporting TensorFlow lite version")
-
-add_subdirectories()
diff --git a/runtime/libs/tflite/src/Diff.cpp b/runtime/libs/tflite/src/Diff.cpp
index 39f994352..8165798e0 100644
--- a/runtime/libs/tflite/src/Diff.cpp
+++ b/runtime/libs/tflite/src/Diff.cpp
@@ -22,6 +22,8 @@
 #include "misc/tensor/Zipper.h"
 #include "misc/tensor/Comparator.h"
 
+#include <tensorflow/lite/c/c_api.h>
+
 #include <iostream>
 #include <cassert>
 
@@ -29,9 +31,9 @@ class DiffSummary : public nnfw::misc::tensor::Comparator::Observer
 {
 public:
   DiffSummary()
-      : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f},
-        max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f},
-        max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f}
+    : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f},
+      max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f},
+      max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f}
   {
     // DO NOTHING
   }
@@ -86,12 +88,12 @@ bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorVie
   using nnfw::misc::tensor::zip;
 
   zip(expected.shape(), expected, obtained)
-      << [&](const Index &index, T expected_value, T obtained_value) {
-           if (expected_value != obtained_value)
-           {
-             diffs.emplace_back(index, expected_value, obtained_value);
-           }
-         };
+    << [&](const Index &index, T expected_value, T obtained_value) {
+         if (expected_value != obtained_value)
+         {
+           diffs.emplace_back(index, expected_value, obtained_value);
+         }
+       };
 
   // TODO Unify summary generation code
   if (diffs.size() == 0)
@@ -121,8 +123,8 @@ bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorVie
 
 template <>
 bool TfLiteInterpMatchApp::compareSingleTensorView<float>(
-    const nnfw::tflite::TensorView<float> &expected,
-    const nnfw::tflite::TensorView<float> &obtained, int id) const
+  const nnfw::tflite::TensorView<float> &expected, const nnfw::tflite::TensorView<float> &obtained,
+  int id) const
 {
   DiffSummary summary;
 
@@ -190,53 +192,57 @@ bool TfLiteInterpMatchApp::compareSingleTensorView<float>(
 
 #include <map>
 
-bool TfLiteInterpMatchApp::run(::tflite::Interpreter &interp, ::tflite::Interpreter &nnapi) const
+bool TfLiteInterpMatchApp::run(TfLiteInterpreter &expected, TfLiteInterpreter &obtained) const
 {
-  assert(interp.outputs() == nnapi.outputs());
+  auto output_count = TfLiteInterpreterGetOutputTensorCount(&expected);
+  assert(output_count == TfLiteInterpreterGetOutputTensorCount(&obtained));
 
   bool all_matched = true;
 
-  using Comparator = std::function<bool(int id, ::tflite::Interpreter &, ::tflite::Interpreter &)>;
+  using Comparator = std::function<bool(int32_t, const TfLiteTensor *, const TfLiteTensor *)>;
 
   std::map<TfLiteType, Comparator> comparators;
 
-  comparators[kTfLiteUInt8] = [this](int id, ::tflite::Interpreter &interp,
-                                     ::tflite::Interpreter &nnapi) {
-    const auto expected = nnfw::tflite::TensorView<uint8_t>::make(interp, id);
-    const auto obtained = nnfw::tflite::TensorView<uint8_t>::make(nnapi, id);
+  comparators[kTfLiteUInt8] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+                                     const TfLiteTensor *obtained_tensor) {
+    const auto expected_view = nnfw::tflite::TensorView<uint8_t>::make(expected_tensor);
+    const auto obtained_view = nnfw::tflite::TensorView<uint8_t>::make(obtained_tensor);
 
-    return compareSingleTensorView(expected, obtained, id);
+    return compareSingleTensorView(expected_view, obtained_view, id);
   };
 
-  comparators[kTfLiteInt32] = [this](int id, ::tflite::Interpreter &interp,
-                                     ::tflite::Interpreter &nnapi) {
-    const auto expected = nnfw::tflite::TensorView<int32_t>::make(interp, id);
-    const auto obtained = nnfw::tflite::TensorView<int32_t>::make(nnapi, id);
+  comparators[kTfLiteInt32] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+                                     const TfLiteTensor *obtained_tensor) {
+    const auto expected_view = nnfw::tflite::TensorView<int32_t>::make(expected_tensor);
+    const auto obtained_view = nnfw::tflite::TensorView<int32_t>::make(obtained_tensor);
 
-    return compareSingleTensorView(expected, obtained, id);
+    return compareSingleTensorView(expected_view, obtained_view, id);
   };
 
-  comparators[kTfLiteFloat32] = [this](int id, ::tflite::Interpreter &interp,
-                                       ::tflite::Interpreter &nnapi) {
-    const auto expected = nnfw::tflite::TensorView<float>::make(interp, id);
-    const auto obtained = nnfw::tflite::TensorView<float>::make(nnapi, id);
+  comparators[kTfLiteFloat32] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+                                       const TfLiteTensor *obtained_tensor) {
+    const auto expected_view = nnfw::tflite::TensorView<float>::make(expected_tensor);
+    const auto obtained_view = nnfw::tflite::TensorView<float>::make(obtained_tensor);
 
-    return compareSingleTensorView(expected, obtained, id);
+    return compareSingleTensorView(expected_view, obtained_view, id);
   };
 
-  comparators[kTfLiteBool] = [this](int id, ::tflite::Interpreter &interp,
-                                    ::tflite::Interpreter &nnapi) {
-    const auto expected = nnfw::tflite::TensorView<bool>::make(interp, id);
-    const auto obtained = nnfw::tflite::TensorView<bool>::make(nnapi, id);
+  comparators[kTfLiteBool] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+                                    const TfLiteTensor *obtained_tensor) {
+    const auto expected_view = nnfw::tflite::TensorView<bool>::make(expected_tensor);
+    const auto obtained_view = nnfw::tflite::TensorView<bool>::make(obtained_tensor);
 
-    return compareSingleTensorView(expected, obtained, id);
+    return compareSingleTensorView(expected_view, obtained_view, id);
   };
 
-  for (const auto &id : interp.outputs())
+  for (int32_t idx = 0; idx < output_count; idx++)
   {
-    assert(interp.tensor(id)->type == nnapi.tensor(id)->type);
+    auto const expected_tensor = TfLiteInterpreterGetOutputTensor(&expected, idx);
+    auto const obtained_tensor = TfLiteInterpreterGetOutputTensor(&obtained, idx);
+    auto const tensor_type = TfLiteTensorType(expected_tensor);
+    assert(tensor_type == TfLiteTensorType(obtained_tensor));
 
-    auto it = comparators.find(interp.tensor(id)->type);
+    auto it = comparators.find(tensor_type);
 
     if (it == comparators.end())
     {
@@ -245,7 +251,7 @@ bool TfLiteInterpMatchApp::run(::tflite::Interpreter &interp, ::tflite::Interpre
 
     const auto &comparator = it->second;
 
-    if (!comparator(id, interp, nnapi))
+    if (!comparator(idx, expected_tensor, obtained_tensor))
     {
       all_matched = false;
     }
diff --git a/runtime/libs/tflite/src/FeatureView.cpp b/runtime/libs/tflite/src/FeatureView.cpp
deleted file mode 100644
index fdf5a4b00..000000000
--- a/runtime/libs/tflite/src/FeatureView.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/FeatureView.h"
-#include "tflite/TensorUtils.h"
-
-#include <cassert>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-nnfw::misc::feature::Shape getFeatureShape(const TfLiteTensor *tensor)
-{
-  nnfw::misc::feature::Shape shape{tensor->dims->data[3], tensor->dims->data[1],
-                                   tensor->dims->data[2]};
-
-  return shape;
-}
-
-FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const InputIndex &index)
-{
-  const auto tensor_index = interp.inputs().at(index.asInt());
-  auto tensor_ptr = interp.tensor(tensor_index);
-
-  assert(isFloatTensor(tensor_ptr));
-  assert(isFeatureTensor(tensor_ptr));
-
-  _shape = getFeatureShape(tensor_ptr);
-  _base = interp.typed_tensor<float>(tensor_index);
-}
-
-FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const OutputIndex &index)
-{
-  const auto tensor_index = interp.outputs().at(index.asInt());
-  auto tensor_ptr = interp.tensor(tensor_index);
-
-  assert(isFloatTensor(tensor_ptr));
-  assert(isFeatureTensor(tensor_ptr));
-
-  _shape = getFeatureShape(tensor_ptr);
-  _base = interp.typed_tensor<float>(tensor_index);
-}
-
-float FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col) const
-{
-  return *(_base + getElementOffset(ch, row, col));
-}
-
-float &FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col)
-{
-  return *(_base + getElementOffset(ch, row, col));
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/src/Quantization.cpp b/runtime/libs/tflite/src/Quantization.cpp
deleted file mode 100644
index 9c162c342..000000000
--- a/runtime/libs/tflite/src/Quantization.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/Quantization.h"
-
-TfLiteQuantizationParams make_default_quantization(void)
-{
-  return TfLiteQuantizationParams{0.0f, 0};
-}
diff --git a/runtime/libs/tflite/src/RandomInputInitializer.cpp b/runtime/libs/tflite/src/RandomInputInitializer.cpp
new file mode 100644
index 000000000..9ed90f38e
--- /dev/null
+++ b/runtime/libs/tflite/src/RandomInputInitializer.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/RandomInputInitializer.h"
+#include "tflite/TensorView.h"
+
+#include <misc/tensor/IndexIterator.h>
+
+namespace nnfw
+{
+namespace tflite
+{
+namespace
+{
+
+template <typename T>
+void setValue(nnfw::misc::RandomGenerator &randgen, const TfLiteTensor *tensor)
+{
+  auto tensor_view = nnfw::tflite::TensorView<T>::make(tensor);
+
+  nnfw::misc::tensor::iterate(tensor_view.shape())
+    << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = randgen.generate<T>(); };
+}
+
+} // namespace
+
+void RandomInputInitializer::run(TfLiteInterpreter &interp)
+{
+  const auto input_count = TfLiteInterpreterGetInputTensorCount(&interp);
+  for (int32_t idx = 0; idx < input_count; idx++)
+  {
+    auto tensor = TfLiteInterpreterGetInputTensor(&interp, idx);
+    auto const tensor_type = TfLiteTensorType(tensor);
+    switch (tensor_type)
+    {
+      case kTfLiteFloat32:
+        setValue<float>(_randgen, tensor);
+        break;
+      case kTfLiteInt32:
+        setValue<int32_t>(_randgen, tensor);
+        break;
+      case kTfLiteUInt8:
+        setValue<uint8_t>(_randgen, tensor);
+        break;
+      case kTfLiteBool:
+        setValue<bool>(_randgen, tensor);
+        break;
+      case kTfLiteInt8:
+        setValue<int8_t>(_randgen, tensor);
+        break;
+      default:
+        throw std::runtime_error{"Not supported input type"};
+    }
+  }
+}
+
+} // namespace tflite
+} // namespace nnfw
diff --git a/runtime/libs/tflite/src/RandomTestRunner.cpp b/runtime/libs/tflite/src/RandomTestRunner.cpp
deleted file mode 100644
index f7fccbf3b..000000000
--- a/runtime/libs/tflite/src/RandomTestRunner.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/RandomTestRunner.h"
-#include "tflite/Diff.h"
-#include "tflite/TensorLogger.h"
-#include "tflite/ext/nnapi_delegate.h"
-
-#include <misc/tensor/IndexIterator.h>
-#include <misc/tensor/Object.h>
-#include <misc/EnvVar.h>
-#include <misc/fp32.h>
-
-#include <cassert>
-#include <map>
-#include <functional>
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-using namespace std::placeholders;
-
-void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
-{
-  _tfl_interp = builder.build();
-  _nnapi = builder.build();
-
-  _tfl_interp->UseNNAPI(false);
-
-  // Allocate Tensors
-  _tfl_interp->AllocateTensors();
-  _nnapi->AllocateTensors();
-
-  assert(_tfl_interp->inputs() == _nnapi->inputs());
-
-  using ::tflite::Interpreter;
-  using Initializer = std::function<void(int id, Interpreter *, Interpreter *)>;
-
-  std::map<TfLiteType, Initializer> initializers;
-  std::map<TfLiteType, Initializer> reseters;
-
-  // Generate singed 32-bit integer (s32) input
-  initializers[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteInt32);
-    assert(_nnapi->tensor(id)->type == kTfLiteInt32);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    int32_t value = 0;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             // TODO Generate random values
-             tfl_interp_view.at(ind) = value;
-             nnapi_view.at(ind) = value;
-             ++value;
-           };
-  };
-
-  // Generate singed 32-bit integer (s32) input
-  reseters[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteInt32);
-    assert(_nnapi->tensor(id)->type == kTfLiteInt32);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    int32_t value = 0;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             // TODO Generate random values
-             tfl_interp_view.at(ind) = value;
-             nnapi_view.at(ind) = value;
-           };
-  };
-
-  initializers[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteUInt8);
-    assert(_nnapi->tensor(id)->type == kTfLiteUInt8);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
-        const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-        &nnfw::misc::RandomGenerator::generate<uint8_t>);
-    const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
-                                                   std::bind(fp, _randgen, _1, _2));
-    assert(tfl_interp_view.shape() == data.shape());
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             const auto value = data.at(ind);
-
-             tfl_interp_view.at(ind) = value;
-             nnapi_view.at(ind) = value;
-           };
-  };
-
-  reseters[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteUInt8);
-    assert(_nnapi->tensor(id)->type == kTfLiteUInt8);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
-        const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-        &nnfw::misc::RandomGenerator::generate<uint8_t>);
-    const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
-                                                   std::bind(fp, _randgen, _1, _2));
-    assert(tfl_interp_view.shape() == data.shape());
-
-    uint8_t value = 0;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             tfl_interp_view.at(ind) = value;
-             nnapi_view.at(ind) = value;
-           };
-  };
-
-  initializers[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteFloat32);
-    assert(_nnapi->tensor(id)->type == kTfLiteFloat32);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
-        const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-        &nnfw::misc::RandomGenerator::generate<float>);
-    const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
-                                                 std::bind(fp, _randgen, _1, _2));
-
-    assert(tfl_interp_view.shape() == data.shape());
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             const auto value = data.at(ind);
-
-             tfl_interp_view.at(ind) = value;
-             nnapi_view.at(ind) = value;
-           };
-  };
-
-  reseters[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteFloat32);
-    assert(_nnapi->tensor(id)->type == kTfLiteFloat32);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
-        const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-        &nnfw::misc::RandomGenerator::generate<float>);
-    const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
-                                                 std::bind(fp, _randgen, _1, _2));
-
-    assert(tfl_interp_view.shape() == data.shape());
-
-    float value = 0;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             tfl_interp_view.at(ind) = value;
-             nnapi_view.at(ind) = value;
-           };
-  };
-
-  initializers[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteBool);
-    assert(_nnapi->tensor(id)->type == kTfLiteBool);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-        const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-        &nnfw::misc::RandomGenerator::generate<bool>);
-    const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
-                                                std::bind(fp, _randgen, _1, _2));
-
-    assert(tfl_interp_view.shape() == data.shape());
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             const auto value = data.at(ind);
-
-             tfl_interp_view.at(ind) = value;
-             nnapi_view.at(ind) = value;
-           };
-  };
-
-  reseters[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
-    assert(_tfl_interp->tensor(id)->type == kTfLiteBool);
-    assert(_nnapi->tensor(id)->type == kTfLiteBool);
-
-    auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id);
-    auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id);
-
-    assert(tfl_interp_view.shape() == nnapi_view.shape());
-
-    auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-        const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-        &nnfw::misc::RandomGenerator::generate<bool>);
-    const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
-                                                std::bind(fp, _randgen, _1, _2));
-
-    assert(tfl_interp_view.shape() == data.shape());
-
-    bool value = false;
-
-    nnfw::misc::tensor::iterate(tfl_interp_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             tfl_interp_view.at(ind) = value;
-             nnapi_view.at(ind) = value;
-           };
-  };
-
-  // Fill IFM with random numbers
-  for (const auto id : _tfl_interp->inputs())
-  {
-    assert(_tfl_interp->tensor(id)->type == _nnapi->tensor(id)->type);
-
-    auto it = initializers.find(_tfl_interp->tensor(id)->type);
-
-    if (it == initializers.end())
-    {
-      throw std::runtime_error{"Not supported input type"};
-    }
-
-    it->second(id, _tfl_interp.get(), _nnapi.get());
-  }
-
-  // Fill OFM with 0
-  for (const auto id : _tfl_interp->outputs())
-  {
-    assert(_tfl_interp->tensor(id)->type == _nnapi->tensor(id)->type);
-
-    auto it = reseters.find(_tfl_interp->tensor(id)->type);
-
-    if (it == reseters.end())
-    {
-      throw std::runtime_error{"Not supported input type"};
-    }
-
-    it->second(id, _tfl_interp.get(), _nnapi.get());
-  }
-}
-
-int RandomTestRunner::run(size_t running_count)
-{
-  std::cout << "[NNAPI TEST] Run T/F Lite Interpreter without NNAPI" << std::endl;
-  _tfl_interp->Invoke();
-
-  nnfw::tflite::NNAPIDelegate d;
-
-  for (size_t i = 1; i <= running_count; ++i)
-  {
-    std::cout << "[NNAPI TEST #" << i << "] Run T/F Lite Interpreter with NNAPI" << std::endl;
-
-    char *env = getenv("UPSTREAM_DELEGATE");
-
-    if (env && !std::string(env).compare("1"))
-    {
-      _nnapi->UseNNAPI(true);
-      _nnapi->Invoke();
-    }
-    else
-    {
-      // WARNING
-      // primary_subgraph: Experimental interface. Return 1st sugbraph
-      // Invoke() will call BuildGraph() internally
-      if (d.Invoke(&_nnapi.get()->primary_subgraph()))
-      {
-        throw std::runtime_error{"Failed to BuildGraph"};
-      }
-    }
-
-    // Compare OFM
-    std::cout << "[NNAPI TEST #" << i << "] Compare the result" << std::endl;
-
-    const auto tolerance = _param.tolerance;
-
-    auto equals = [tolerance](float lhs, float rhs) {
-      // NOTE Hybrid approach
-      // TODO Allow users to set tolerance for absolute_epsilon_equal
-      if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
-      {
-        return true;
-      }
-
-      return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance);
-    };
-
-    nnfw::misc::tensor::Comparator comparator(equals);
-    TfLiteInterpMatchApp app(comparator);
-
-    app.verbose() = _param.verbose;
-
-    bool res = app.run(*_tfl_interp, *_nnapi);
-
-    if (!res)
-    {
-      return 255;
-    }
-
-    std::cout << "[NNAPI TEST #" << i << "] PASSED" << std::endl << std::endl;
-
-    if (_param.tensor_logging)
-      nnfw::tflite::TensorLogger::get().save(_param.log_path, *_tfl_interp);
-  }
-
-  return 0;
-}
-
-RandomTestRunner RandomTestRunner::make(uint32_t seed)
-{
-  RandomTestParam param;
-
-  param.verbose = nnfw::misc::EnvVar("VERBOSE").asInt(0);
-  param.tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
-  param.tensor_logging = nnfw::misc::EnvVar("TENSOR_LOGGING").asBool(false);
-  param.log_path = nnfw::misc::EnvVar("TENSOR_LOGGING").asString("tensor_log.txt");
-
-  return RandomTestRunner{seed, param};
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/src/TensorShapeUtils.cpp b/runtime/libs/tflite/src/TensorShapeUtils.cpp
deleted file mode 100644
index 689b6151b..000000000
--- a/runtime/libs/tflite/src/TensorShapeUtils.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the License);
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/TensorShapeUtils.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape,
-                                    const nnfw::misc::tensor::Shape &rhs_shape)
-{
-  const uint32_t lhs_rank = lhs_shape.rank();
-  const uint32_t rhs_rank = rhs_shape.rank();
-  const uint32_t out_rank = std::max(lhs_rank, rhs_rank);
-  const uint32_t lhs_rank_diff = out_rank - lhs_rank;
-  const uint32_t rhs_rank_diff = out_rank - rhs_rank;
-
-  nnfw::misc::tensor::Shape out_shape(out_rank);
-
-  for (uint32_t axis = 0; axis < out_rank; ++axis)
-  {
-    out_shape.dim(axis) = std::max(axis < lhs_rank_diff ? 1 : lhs_shape.dim(axis - lhs_rank_diff),
-                                   axis < rhs_rank_diff ? 1 : rhs_shape.dim(axis - rhs_rank_diff));
-  }
-
-  return out_shape;
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/src/interp/FlatBufferBuilder.cpp b/runtime/libs/tflite/src/interp/FlatBufferBuilder.cpp
deleted file mode 100644
index f54e67202..000000000
--- a/runtime/libs/tflite/src/interp/FlatBufferBuilder.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/interp/FlatBufferBuilder.h"
-
-#include "tflite/ext/kernels/register.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-std::unique_ptr<::tflite::Interpreter> FlatBufferBuilder::build(void) const
-{
-  std::unique_ptr<::tflite::Interpreter> interpreter;
-
-  nnfw::tflite::BuiltinOpResolver resolver;
-
-  ::tflite::InterpreterBuilder builder(_model, resolver);
-
-  builder(&interpreter);
-
-  return interpreter;
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/src/interp/FunctionBuilder.cpp b/runtime/libs/tflite/src/interp/FunctionBuilder.cpp
deleted file mode 100644
index 599a4f393..000000000
--- a/runtime/libs/tflite/src/interp/FunctionBuilder.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/interp/FunctionBuilder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-std::unique_ptr<::tflite::Interpreter> FunctionBuilder::build(void) const
-{
-  auto res = std::unique_ptr<::tflite::Interpreter>{new ::tflite::Interpreter};
-
-  _fn(*res);
-
-  return res;
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/nnapi-header/include/NeuralNetworks.h b/runtime/nnapi-header/include/NeuralNetworks.h
index 7400806d8..0c54d7582 100644
--- a/runtime/nnapi-header/include/NeuralNetworks.h
+++ b/runtime/nnapi-header/include/NeuralNetworks.h
@@ -24,8 +24,8 @@
  * @file NeuralNetworks.h
  */
 
-#ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
-#define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_H
 
 /******************************************************************
  *
@@ -43,16 +43,14 @@
  *   - DO NOT CHANGE THE LAYOUT OR SIZE OF STRUCTURES
  */
 
-// For compatibility with android, check __ANDROID_API__ is defined
-// If __ANDROID_API__ is pre-defined, this header may be used for android
-#ifndef __ANDROID_API__
-#define __ANDROID_API__ 29
-#define __ANDROID_API_Q__ 29
+// For compatibility with android, check __ANDROID__ is defined
+#ifndef __ANDROID__
+#define __ANDROID_API__ 30
 #define __INTRODUCED_IN(api_level)
 typedef struct AHardwareBuffer AHardwareBuffer;
 #else
 #include <android/hardware_buffer.h>
-#endif // __ANDROID_API__
+#endif // __ANDROID__
 #include <stddef.h>
 #include <stdint.h>
 #include <sys/cdefs.h>
@@ -62,7 +60,11 @@ __BEGIN_DECLS
 /**
  * Operand types.
  *
- * The type of operands that can be added to a model.
+ * The type of an operand in a model.
+ *
+ * Types prefaced with ANEURALNETWORKS_TENSOR_* must be used for tensor data (i.e., tensors
+ * with at least one dimension). Types not prefaced by ANEURALNETWORKS_TENSOR_* represent
+ * scalar values and must have no dimensions.
  *
  * Although we define many types, most operators accept just a few
  * types. Most used are {@link ANEURALNETWORKS_TENSOR_FLOAT32},
@@ -94,7 +96,6 @@ typedef enum {
      *   real_value = (integer_value - zeroPoint) * scale.
      */
     ANEURALNETWORKS_TENSOR_QUANT8_ASYMM = 5,
-#if __ANDROID_API__ >= __ANDROID_API_Q__
     /**
      * An 8 bit boolean scalar value.
      *
@@ -160,7 +161,6 @@ typedef enum {
      * Available since API level 29.
      */
     ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
-
     /**
      * A tensor of 16 bit unsigned integers that represent real numbers.
      *
@@ -175,7 +175,6 @@ typedef enum {
      * Available since API level 29.
      */
     ANEURALNETWORKS_TENSOR_QUANT16_ASYMM = 12,
-
     /**
      * A tensor of 8 bit signed integers that represent real numbers.
      *
@@ -188,14 +187,36 @@ typedef enum {
      * Available since API level 29.
      */
     ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
-#endif  // __ANDROID_API__ >= __ANDROID_API_Q__
+    /**
+     * A tensor of 8 bit signed integers that represent real numbers.
+     *
+     * Attached to this tensor are two numbers that can be used to convert the
+     * 8 bit integer to the real value and vice versa. These two numbers are:
+     * - scale: a 32 bit floating point value greater than zero.
+     * - zeroPoint: a 32 bit integer, in range [-128, 127].
+     *
+     * The formula is:
+     * real_value = (integer_value - zeroPoint) * scale.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
 
+    /**
+     * A reference to a model.
+     *
+     * {@link ANeuralNetworksModel_setOperandValueFromModel} must be used to set
+     * the value for an Operand of this type.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_MODEL = 15,
 } OperandCode;
 
 /**
  * Operation types.
  *
- * The type of operations that can be added to a model.
+ * The type of an operation in a model.
  *
  * Available since API level 27.
  */
@@ -231,6 +252,8 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+     * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -238,15 +261,19 @@ typedef enum {
      * * 0: A tensor.
      * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions
      *      as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scales and zeroPoint can be different from input0 scale and zeroPoint.
      * * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
      *      {@link FuseCode} values. Specifies the activation to
      *      invoke on the result.
+     *      For a {@link ANEURALNETWORKS_TENSOR_INT32} tensor,
+     *      the {@link FuseCode} must be "NONE".
      *
      * Outputs:
      * * 0: The sum, a tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
      *
      * Available since API level 27.
@@ -270,18 +297,20 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Both explicit padding and implicit padding are supported.
      *
      * Inputs (explicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
-     *      the input. Since API level 29, zero batches is supported for this
-     *      tensor.
+     *      the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
      *      the left, in the ‘width’ dimension.
      * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -307,8 +336,8 @@ typedef enum {
      *
      * Inputs (implicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
-     *      the input. Since API level 29, zero batches is supported for this
-     *      tensor.
+     *      the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
      *      padding scheme, has to be one of the
      *      {@link PaddingCode} values.
@@ -330,7 +359,8 @@ typedef enum {
      * Outputs:
      * * 0: The output 4-D tensor, of shape
      *      [batches, out_height, out_width, depth].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 27.
@@ -346,8 +376,9 @@ typedef enum {
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
-     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (full support since API
-     *   level 29, see the input section)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *   (full support since API level 29, see the input section)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -357,6 +388,9 @@ typedef enum {
      *            Before API level 29, all input tensors of
      *            {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
      *            must have the same scale and zeroPoint as the output tensor.
+     *            Input tensors of
+     *            {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+     *            are allowed to have different scale and zeroPoint.
      *            Since API level 29, zero-sized tensors are supported.
      * * n: An {@link ANEURALNETWORKS_INT32} scalar, specifying the
      *      concatenation axis.
@@ -373,7 +407,7 @@ typedef enum {
     ANEURALNETWORKS_CONCATENATION = 2,
 
     /**
-     * Performs an 2-D convolution operation.
+     * Performs a 2-D convolution operation.
      *
      * The CONV_2D op sweeps a 2-D filter that can mix channels together over a
      * batch of images, applying the filter to each window of each image of the
@@ -409,31 +443,46 @@ typedef enum {
      * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
      * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
      *
+     * Available since API level 30:
+     * * Quantized signed (since API level 30):
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized signed with filter symmetric per channel quantization (since API level 30):
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Both explicit padding and implicit padding are supported.
      *
      * Inputs (explicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
-     *      specifying the input. Since API level 29, zero batches is supported
-     *      for this tensor.
+     *      specifying the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: A 4-D tensor, of shape
      *      [depth_out, filter_height, filter_width, depth_in], specifying the
-     *      filter. For tensor of type
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
-     *      dimension (extraParams.channelQuant.channelDim) must be set to 0.
+     *      filter.
+     *      For tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      the channel dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim)
+     *      must be set to 0.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
-     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
-     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
-     *      type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     *      or {@link ANEURALNETWORKS_TENSOR_FLOAT16} the bias must be of the same type.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
      *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
-     *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
-     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
-     *      should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
-     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
      *      bias_scale[i] = input_scale * filter_scale[i].
      * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
      *      the left, in the ‘width’ dimension.
@@ -466,22 +515,25 @@ typedef enum {
      *
      * Inputs (implicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
-     *      specifying the input. Since API level 29, zero batches is supported
-     *      for this tensor.
+     *      specifying the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: A 4-D tensor, of shape
      *      [depth_out, filter_height, filter_width, depth_in], specifying the
-     *      filter. For tensor of type
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
-     *      dimension (extraParams.channelQuant.channelDim) must be set to 0.
+     *      filter.
+     *      For tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      the channel dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim)
+     *      must be set to 0.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
-     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
-     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
-     *      type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     *      or {@link ANEURALNETWORKS_TENSOR_FLOAT16} the bias must be of the same
+     *      type.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
      *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
-     *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
-     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
-     *      should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
-     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
      *      bias_scale[i] = input_scale * filter_scale[i].
      * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
      *      padding scheme, has to be one of the
@@ -509,10 +561,9 @@ typedef enum {
      *
      * Outputs:
      * * 0: The output 4-D tensor, of shape
-     *      [batches, out_height, out_width, depth_out]. Before API level 29,
-     *      for output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
-     *      the following condition must be satisfied:
-     *      output_scale > input_scale * filter_scale
+     *      [batches, out_height, out_width, depth_out].
+     *      Before API level 29, for output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      the following condition must be satisfied: output_scale > input_scale * filter_scale
      *
      * Available since API level 27.
      */
@@ -559,10 +610,23 @@ typedef enum {
      * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
      * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
      *
+     * Available since API level 30:
+     * * Quantized signed (since API level 30):
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized signed with filter symmetric per channel quantization (since API level 30):
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Both explicit padding and implicit padding are supported.
      *
@@ -570,18 +634,20 @@ typedef enum {
      * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
      *      specifying the input.
      * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
-     *      specifying the filter. For tensor of type
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
-     *      dimension (extraParams.channelQuant.channelDim) must be set to 3.
+     *      specifying the filter.
+     *      For tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      the channel dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim)
+     *      must be set to 3.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
-     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
-     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
-     *      type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     *      or {@link ANEURALNETWORKS_TENSOR_FLOAT16} the bias must be of the same type.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
      *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
-     *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
-     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
-     *      should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
-     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
      *      bias_scale[i] = input_scale * filter_scale[i].
      * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
      *      the left, in the ‘width’ dimension.
@@ -620,14 +686,15 @@ typedef enum {
      * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
      *      specifying the filter.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
-     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
-     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
-     *      type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     *      or {@link ANEURALNETWORKS_TENSOR_FLOAT16} the bias must be of the same type.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
      *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
-     *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
-     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
-     *      should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
-     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
      *      bias_scale[i] = input_scale * filter_scale[i].
      * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
      *      padding scheme, has to be one of the
@@ -654,12 +721,11 @@ typedef enum {
      *      cells between each filter element on height dimension. If this input is set,
      *      input 9 (dilation factor for width) must be specified as well.
      *      Available since API level 29.
-
      *
      * Outputs:
      * * 0: The output 4-D tensor, of shape
-     *      [batches, out_height, out_width, depth_out]. Before API level 29,
-     *      for output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      [batches, out_height, out_width, depth_out]. Before API level 29, for
+     *      output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
      *      the following condition must be satisfied:
      *      output_scale > input_scale * filter_scale
      *
@@ -686,11 +752,13 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Inputs:
      * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
@@ -705,7 +773,8 @@ typedef enum {
      * Outputs:
      * * 0: The output 4-D tensor, of shape [batch, height*block_size,
      *      width*block_size, depth/(block_size*block_size)].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 27.
@@ -723,6 +792,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} (since API level 29)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported output tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
@@ -731,7 +801,8 @@ typedef enum {
      * Supported tensor rank: up to 4
      *
      * Inputs:
-     * * 0: A tensor. Since API level 29, this tensor may be zero-sized.
+     * * 0: A tensor.
+     *      Since API level 29, this tensor may be zero-sized.
      *
      * Outputs:
      * * 0: A tensor with the same shape as input0.
@@ -761,9 +832,11 @@ typedef enum {
      * and an error must be reported.
      *
      * Supported value tensor {@link OperandCode}:
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 30)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
-     * * {@link ANEURALNETWORKS_TENSOR_INT32}
-     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 29)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported value tensor rank: from 2
      *
@@ -777,7 +850,8 @@ typedef enum {
      * * 0: A n-D tensor with the same rank and shape as the Values
      *      tensor, except for the first dimension which has the same size
      *      as Lookups' only dimension.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input1.
      *
      * Available since API level 27.
@@ -816,6 +890,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4.
      *
@@ -826,26 +901,26 @@ typedef enum {
      *      [batch_size, input_size], where "input_size" corresponds to the
      *      number of inputs to the layer, matching the second dimension of
      *      weights, and "batch_size" is calculated by dividing the number of
-     *      elements by "input_size". Since API level 29, zero batch_size is
-     *      supported for this tensor.
+     *      elements by "input_size".
+     *      Since API level 29, zero batch_size is supported for this tensor.
      * * 1: A 2-D tensor, specifying the weights, of shape
      *      [num_units, input_size], where "num_units" corresponds to the number
      *      of output nodes.
      * * 2: A 1-D tensor, of shape [num_units], specifying the bias. For input
      *      tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias should
-     *      also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input tensor
-     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be
-     *      of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
-     *      bias_scale == input_scale * filter_scale.
+     *      also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+     *      For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32},
+     *      with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
      * * 3: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
      *      {@link FuseCode} values. Specifies the activation to
      *      invoke on the result.
      *
      * Outputs:
-     * * 0: The output tensor, of shape [batch_size, num_units]. Before API
-     *      level 29, for output tensor of {@link
-     *      ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the following condition must
-     *      be satisfied: output_scale > input_scale * filter_scale.
+     * * 0: The output tensor, of shape [batch_size, num_units]. Before API level 29, for
+     *      output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the following
+     *      condition must be satisfied: output_scale > input_scale * filter_scale.
      *
      * Available since API level 27.
      */
@@ -911,7 +986,7 @@ typedef enum {
     ANEURALNETWORKS_HASHTABLE_LOOKUP = 10,
 
     /**
-     * Applies L2 normalization along the depth dimension.
+     * Applies L2 normalization along the axis dimension.
      *
      * The values in the output tensor are computed as:
      *
@@ -919,13 +994,13 @@ typedef enum {
      *         input[batch, row, col, channel] /
      *         sqrt(sum_{c} pow(input[batch, row, col, c], 2))
      *
-     * For input tensor with rank less than 4, independently normalizes each
-     * 1-D slice along dimension dim.
+     * By default the axis dimension is the last dimension of the input tensor.
      *
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      * Tensors with rank less than 4 are only supported since API level 29.
@@ -942,6 +1017,12 @@ typedef enum {
      * * 0: A tensor of the same {@link OperandCode} and same shape as input0.
      *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
      *      the scale must be 1.f / 128 and the zeroPoint must be 128.
+     *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale must be 1.f / 128 and the zeroPoint must be 0.
+     *
+     *      NOTE: Before API level 30, if the elements along an axis are all zeros,
+     *      the result is undefined. Since API level 30, if the elements along an axis
+     *      are all zeros, the result is logical zero.
      *
      * Available since API level 27.
      */
@@ -967,13 +1048,14 @@ typedef enum {
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Both explicit padding and implicit padding are supported.
      *
      * Inputs (explicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
-     *      the input. Since API level 29, zero batches is supported for this
-     *      tensor.
+     *      the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
      *      the left, in the ‘width’ dimension.
      * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -999,8 +1081,8 @@ typedef enum {
      *
      * Inputs (implicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
-     *      the input. Since API level 29, zero batches is supported for this
-     *      tensor.
+     *      the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
      *      padding scheme, has to be one of the
      *      {@link PaddingCode} values.
@@ -1095,17 +1177,20 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4.
      *
      * Inputs:
-     * * 0: A tensor, specifying the input. Since API level 29, this tensor may
-     *      be zero-sized.
+     * * 0: A tensor, specifying the input.
+     *      Since API level 29, this tensor may be zero-sized.
      *
      * Outputs:
      * * 0: The output tensor of same shape as input0.
      *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
      *      the scale must be 1.f / 256 and the zeroPoint must be 0.
+     *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale must be 1.f / 256 and the zeroPoint must be -128.
      *
      * Available since API level 27.
      */
@@ -1158,7 +1243,7 @@ typedef enum {
      * Outputs:
      * * 0: If the projection type is Sparse:
      *      Output.Dim == { Tensor[0].Dim[0] }
-     *      A tensor of int32 that represents hash signatures,
+     *      A tensor of int32 that represents hash signatures.
      *
      *      If the projection type is Dense:
      *      Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] }
@@ -1248,7 +1333,7 @@ typedef enum {
      * * The projection bias (\f$b_{proj}\f$) may (but not required to) have a
      *   value if the recurrent projection layer exists, and should otherwise
      *   have no value.
-     * * (API level >= 29) The four layer normalization weights either all have
+     * * (API level 29 or later) The four layer normalization weights either all have
      *   values or none of them have values. Additionally, if CIFG is used,
      *   input layer normalization weights tensor is omitted and the other layer
      *   normalization weights either all have values or none of them have
@@ -1406,18 +1491,20 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Both explicit padding and implicit padding are supported.
      *
      * Inputs (explicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
-     *      the input. Since API level 29, zero batches is supported for this
-     *      tensor.
+     *      the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
      *      the left, in the ‘width’ dimension.
      * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -1443,8 +1530,8 @@ typedef enum {
      *
      * Inputs (implicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
-     *      the input. Since API level 29, zero batches is supported for this
-     *      tensor.
+     *      the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
      *      padding scheme, has to be one of the
      *      {@link PaddingCode} values.
@@ -1466,7 +1553,8 @@ typedef enum {
      * Outputs:
      * * 0: The output 4-D tensor, of shape
      *      [batches, out_height, out_width, depth].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 27.
@@ -1496,6 +1584,8 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+     * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -1506,10 +1596,13 @@ typedef enum {
      * * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
      *      {@link FuseCode} values. Specifies the activation to
      *      invoke on the result.
+     *      For a {@link ANEURALNETWORKS_TENSOR_INT32} tensor,
+     *      the {@link FuseCode} must be "NONE".
      *
      * Outputs:
      * * 0: The product, a tensor of the same {@link OperandCode} as input0.
-     *      For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
      *      the following condition must be satisfied:
      *      output_scale > input1_scale * input2_scale.
      *
@@ -1528,16 +1621,18 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4.
      *
      * Inputs:
-     * * 0: A tensor, specifying the input. Since API level 29, this tensor may
-     *      be zero-sized.
+     * * 0: A tensor, specifying the input.
+     *      Since API level 29, this tensor may be zero-sized.
      *
      * Outputs:
      * * 0: The output tensor of same shape as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 27.
@@ -1555,16 +1650,18 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4.
      *
      * Inputs:
-     * * 0: A tensor, specifying the input. Since API level 29, this tensor may
-     *      be zero-sized.
+     * * 0: A tensor, specifying the input.
+     *      Since API level 29, this tensor may be zero-sized.
      *
      * Outputs:
      * * 0: The output tensor of the same shape as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 27.
@@ -1582,16 +1679,18 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4.
      *
      * Inputs:
-     * * 0: A tensor, specifying the input. Since API level 29, this tensor may
-     *      be zero-sized.
+     * * 0: A tensor, specifying the input.
+     *      Since API level 29, this tensor may be zero-sized.
      *
      * Outputs:
      * * 0: The output tensor of same shape as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 27.
@@ -1608,6 +1707,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4.
      *
@@ -1624,7 +1724,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: The output tensor, of shape specified by the input shape.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 27.
@@ -1642,18 +1743,20 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Both resizing by shape and resizing by scale are supported.
      *
      * Inputs (resizing by shape):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
-     *      the input. Since API level 29, zero batches is supported for this
-     *      tensor.
+     *      the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the output
      *      width of the output tensor.
      * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the output
@@ -1661,6 +1764,17 @@ typedef enum {
      * * 3: An optional {@link ANEURALNETWORKS_BOOL} scalar, default to false.
      *      Set to true to specify NCHW data layout for input0 and output0.
      *      Available since API level 29.
+     * * 4: Align corners. An optional {@link ANEURALNETWORKS_BOOL}
+     *      scalar, default to false.  If True, the centers of the 4 corner
+     *      pixels of the input and output tensors are aligned, preserving the
+     *      values at the corner pixels.
+     *      Available since API level 30.
+     * * 5: Half pixel centers. An optional {@link ANEURALNETWORKS_BOOL}
+     *      scalar, default to false. If True, the pixel centers are assumed to
+     *      be at (0.5, 0.5). This is the default behavior of image.resize in
+     *      TF 2.0. If this parameter is True, then align_corners parameter
+     *      must be False.
+     *      Available since API level 30.
      *
      * Inputs (resizing by scale, since API level 29):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
@@ -1679,10 +1793,24 @@ typedef enum {
      *      {@link ANEURALNETWORKS_FLOAT32} otherwise.
      * * 3: An optional {@link ANEURALNETWORKS_BOOL} scalar, default to false.
      *      Set to true to specify NCHW data layout for input0 and output0.
+     * * 4: Align corners. An optional {@link ANEURALNETWORKS_BOOL}
+     *      scalar, default to false.  If True, the centers of the 4 corner
+     *      pixels of the input and output tensors are aligned, preserving the
+     *      values at the corner pixels.
+     *      Available since API level 30.
+     * * 5: Half pixel centers. An optional {@link ANEURALNETWORKS_BOOL}
+     *      scalar, default to false. If True, the pixel centers are assumed to
+     *      be at (0.5, 0.5). This is the default behavior of image.resize in
+     *      TF 2.0. If this parameter is True, then align_corners parameter
+     *      must be False.
+     *      Available since API level 30.
      *
      * Outputs:
      * * 0: The output 4-D tensor, of shape
      *      [batches, new_height, new_width, depth].
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
      *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
@@ -1762,19 +1890,21 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4.
      * Tensors with rank other than 2 or 4 are only supported since API level 29.
      *
      * Inputs:
-     * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped. Since
-     *      API level 29, this tensor may be zero-sized.
+     * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped.
+     *      Since API level 29, this tensor may be zero-sized.
      * * 1: A scalar, specifying the positive scaling factor for the exponent,
-     *      beta. If input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the scalar must be of
-     *      {@link ANEURALNETWORKS_FLOAT32}. If input0 is of {@link
-     *      ANEURALNETWORKS_TENSOR_FLOAT16}, then the scalar must be of {@link
-     *      ANEURALNETWORKS_FLOAT16}.
+     *      beta. If input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT32},
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, the scalar
+     *      must be of {@link ANEURALNETWORKS_FLOAT32}.
+     *      If input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16}, then the
+     *      scalar must be of {@link ANEURALNETWORKS_FLOAT16}.
      * * 2: An optional {@link ANEURALNETWORKS_INT32} scalar, default to -1,
      *      specifying the dimension the activation would be performed on.
      *      Negative index is used to specify axis from the end (e.g. -1 for
@@ -1785,6 +1915,8 @@ typedef enum {
      * * 0: The output tensor of same shape as input0.
      *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
      *      the scale must be 1.f / 256 and the zeroPoint must be 0.
+     *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale must be 1.f / 256 and the zeroPoint must be -128.
      *
      * Available since API level 27.
      */
@@ -1808,11 +1940,13 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Inputs:
      * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
@@ -1827,7 +1961,8 @@ typedef enum {
      * Outputs:
      * * 0: The output 4-D tensor, of shape [batches, height/block_size,
      *      width/block_size, depth_in*block_size*block_size].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 27.
@@ -1924,17 +2059,20 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4.
      *
      * Inputs:
-     * * 0: A tensor, specifying the input. Since API level 29, this tensor may
-     *      be zero-sized.
+     * * 0: A tensor, specifying the input.
+     *      Since API level 29, this tensor may be zero-sized.
      *
      * Outputs:
      * * 0: The output tensor of same shape as input0.
      *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
      *      the scale must be 1.f / 128 and the zeroPoint must be 128.
+     *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale must be 1.f / 128 and the zeroPoint must be 0.
      *
      * Available since API level 27.
      */
@@ -1942,7 +2080,6 @@ typedef enum {
 
     // Operations below are available since API level 28.
 
-    // TODO: make the description easier to understand.
     /**
      * BatchToSpace for N-dimensional tensors.
      *
@@ -1957,11 +2094,13 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Inputs:
      * * 0: An n-D tensor, specifying the tensor to be reshaped
@@ -1974,7 +2113,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 28.
@@ -1988,6 +2128,11 @@ typedef enum {
      * dimensions. The output is the result of dividing the first input tensor
      * by the second, optionally modified by an activation function.
      *
+     * For inputs of {@link ANEURALNETWORKS_TENSOR_INT32}, performs
+     * "floor division" ("//" in Python). For example,
+     *     5 // 2 = 2
+     *    -5 // 2 = -3
+     *
      * Two dimensions are compatible when:
      *     1. they are equal, or
      *     2. one of them is 1
@@ -2008,6 +2153,7 @@ typedef enum {
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -2018,6 +2164,8 @@ typedef enum {
      * * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
      *      {@link FuseCode} values. Specifies the activation to
      *      invoke on the result.
+     *      For a {@link ANEURALNETWORKS_TENSOR_INT32} tensor,
+     *      the {@link FuseCode} must be "NONE".
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
@@ -2038,6 +2186,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -2057,23 +2206,27 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
-     *      the scale and zeroPoint must be same as input0.
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
      *
      * Available since API level 28.
      */
     ANEURALNETWORKS_MEAN = 31,
 
     /**
-     * Pads a tensor with zeros.
+     * Pads a tensor.
      *
      * This operation pads a tensor according to the specified paddings.
      *
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
-     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (full support since API
-     *   level 29, see the output section)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+     *   (full support since API level 29, see the output section)
      *
      * Supported tensor rank: up to 4
      *
@@ -2095,7 +2248,8 @@ typedef enum {
      *      of the padding:
      *          output0.dimension[i] =
      *              padding[i, 0] + input0.dimension[i] + padding[i, 1]
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      *      NOTE: Before API level 29, the pad value for
@@ -2106,7 +2260,6 @@ typedef enum {
      */
     ANEURALNETWORKS_PAD = 32,
 
-    // TODO: make the description easier to understand.
     /**
      * SpaceToBatch for N-Dimensional tensors.
      *
@@ -2121,13 +2274,15 @@ typedef enum {
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
-     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (full support since API
-     *   level 29, see the output section)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+     *   (full support since API level 29, see the output section)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
      * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since API level 29.
      *
      * Inputs:
      * * 0: An n-D tensor, specifying the input.
@@ -2148,7 +2303,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      *      NOTE: Before API level 29, the pad value for
@@ -2171,6 +2327,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -2186,8 +2343,11 @@ typedef enum {
      * * 0: A tensor of the same {@link OperandCode} as input0. Contains the
      *      same data as input, but has one or more dimensions of size 1
      *      removed.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
+     *      If all input dimensions are equal to 1 and are to be squeezed, the
+     *      output shape is [1].
      *
      * Available since API level 28.
      */
@@ -2206,6 +2366,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -2235,8 +2396,11 @@ typedef enum {
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0 and rank (n - k),
      *      where k is the number of bits set in shrink_axis_mask.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
+     *      If shrink_axis_mask is true for all input dimensions, the output
+     *      shape is [1].
      *
      * Available since API level 28.
      */
@@ -2270,6 +2434,8 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} (since API level 29)
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+     * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -2280,10 +2446,13 @@ typedef enum {
      * * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
      *      {@link FuseCode} values. Specifies the activation to
      *      invoke on the result.
+     *      For a {@link ANEURALNETWORKS_TENSOR_INT32} tensor,
+     *      the {@link FuseCode} must be "NONE".
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
      *
      * Available since API level 28.
@@ -2303,6 +2472,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -2314,7 +2484,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 28.
@@ -2329,6 +2500,7 @@ typedef enum {
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     * * {@link ANEURALNETWORKS_TENSOR_INT32} (since API level 30)
      *
      * Supported tensor rank: from 1.
      *
@@ -2350,6 +2522,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -2361,6 +2534,7 @@ typedef enum {
      *
      * Outputs:
      * * 0: An (n - 1)-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor.
+     *      If input is 1-dimensional, the output shape is [1].
      *
      * Available since API level 29.
      */
@@ -2376,6 +2550,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -2387,6 +2562,7 @@ typedef enum {
      *
      * Outputs:
      * * 0: An (n - 1)-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor.
+     *      If input is 1-dimensional, the output shape is [1].
      *
      * Available since API level 29.
      */
@@ -2419,7 +2595,8 @@ typedef enum {
      *      and height, dw and dh is the log-scale relative correction factor
      *      for the width and height. For input0 of type
      *      {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}, this tensor should be
-     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}. Zero num_rois is
+     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}. Zero num_rois is
      *      supported for this tensor.
      * * 2: An 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor, of shape
      *      [num_rois], specifying the batch index of each box. Boxes with
@@ -2441,7 +2618,54 @@ typedef enum {
     ANEURALNETWORKS_AXIS_ALIGNED_BBOX_TRANSFORM = 41,
 
     /**
-     * Performs a forward LSTM on the input followed by a backward LSTM.
+     * A recurrent neural network layer that applies an LSTM cell to a
+     * sequence of inputs in forward and backward directions.
+     *
+     * The op supports cross-linking via an auxiliary input. Regular cell feeds
+     * one input into the two RNN cells in the following way:
+     *
+     *       INPUT  (INPUT_REVERSED)
+     *         |         |
+     *    ---------------------
+     *    | FW_LSTM   BW_LSTM |
+     *    ---------------------
+     *         |         |
+     *      FW_OUT     BW_OUT
+     *
+     * An op with cross-linking takes two inputs and feeds them into the RNN
+     * cells in the following way:
+     *
+     *       AUX_INPUT   (AUX_INPUT_REVERSED)
+     *           |             |
+     *     INPUT | (INPUT_R'D.)|
+     *       |   |       |     |
+     *    -----------------------
+     *    |  \  /        \    / |
+     *    | FW_LSTM     BW_LSTM |
+     *    -----------------------
+     *         |           |
+     *      FW_OUT      BW_OUT
+     *
+     * The cross-linking mode is enabled iff auxiliary input and auxiliary
+     * weights are present. While stacking this op on top of itself, this
+     * allows to connect both forward and backward outputs from previous cell
+     * to the next cell's input.
+     *
+     * Since API level 30 parallel linking mode is supported. The mode is
+     * enabled if auxiliary input is present but auxiliary weights are omitted.
+     * In this case, the cell feeds inputs into the RNN in the following way:
+     *
+     *       INPUT (AUX_INPUT_REVERSED)
+     *         |         |
+     *    ---------------------
+     *    | FW_LSTM   BW_LSTM |
+     *    ---------------------
+     *         |         |
+     *      FW_OUT     BW_OUT
+     *
+     * While stacking this op on top of itself, this allows to connect both
+     * forward and backward outputs from previous cell to the next cell's
+     * corresponding inputs.
      *
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
@@ -2451,7 +2675,6 @@ typedef enum {
      *
      * All input and output tensors must be of the same type.
      *
-     *
      * Inputs:
      * * 0: The input.
      *      A 3-D tensor of shape:
@@ -2543,25 +2766,34 @@ typedef enum {
      * * 38: The backward input cell state.
      *       A 2-D tensor of shape [batch_size, bw_num_units].
      * * 39: The auxiliary input. Optional.
-     *       A 3-D tensor of shape [max_time, batch_size, input_size], where “batch_size”
-     *       corresponds to the batching dimension, and “input_size” is the size
-     *       of the input.
-     * * 40: The forward auxiliary input-to-input weights. Optional.
-     *       A 2-D tensor of shape [fw_num_units, input_size].
-     * * 41: The forward auxiliary input-to-forget weights. Optional.
-     *       A 2-D tensor of shape [fw_num_units, input_size].
-     * * 42: The forward auxiliary input-to-cell weights. Optional.
-     *       A 2-D tensor of shape [fw_num_units, input_size].
-     * * 43: The forward auxiliary input-to-output weights. Optional.
-     *       A 2-D tensor of shape [fw_num_units, input_size].
-     * * 44: The backward auxiliary input-to-input weights. Optional.
-     *       A 2-D tensor of shape [bw_num_units, input_size].
-     * * 45: The backward auxiliary input-to-forget weights. Optional.
-     *       A 2-D tensor of shape [bw_num_units, input_size].
-     * * 46: The backward auxiliary input-to-cell weights. Optional.
-     *       A 2-D tensor of shape [bw_num_units, input_size].
-     * * 47: The backward auxiliary input-to-output weights. Optional.
-     *       A 2-D tensor of shape [bw_num_units, input_size].
+     *       A 3-D tensor of shape [max_time, batch_size, aux_input_size],
+     *       where “batch_size” corresponds to the batching dimension, and
+     *       “aux_input_size” is the size of the auxiliary input. Optional. See
+     *       the docs above for the usage modes explanation.
+     * * 40: The forward auxiliary input-to-input weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [fw_num_units, aux_input_size].
+     * * 41: The forward auxiliary input-to-forget weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [fw_num_units, aux_input_size].
+     * * 42: The forward auxiliary input-to-cell weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [fw_num_units, aux_input_size].
+     * * 43: The forward auxiliary input-to-output weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [fw_num_units, aux_input_size].
+     * * 44: The backward auxiliary input-to-input weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [bw_num_units, aux_input_size].
+     * * 45: The backward auxiliary input-to-forget weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [bw_num_units, aux_input_size].
+     * * 46: The backward auxiliary input-to-cell weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [bw_num_units, aux_input_size].
+     * * 47: The backward auxiliary input-to-output weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [bw_num_units, aux_input_size].
      * * 48: The activation function.
      *       A value indicating the activation function:
      *       <ul>
@@ -2576,17 +2808,17 @@ typedef enum {
      *       then clipping is disabled.
      *       If all the input tensors have type {@link ANEURALNETWORKS_TENSOR_FLOAT32},
      *       this scalar must be of the type {@link ANEURALNETWORKS_FLOAT32},
-     *       otherwise if all the input tensors have the type {@link
-     *       ANEURALNETWORKS_TENSOR_FLOAT16}, this scalar must be of type {@link
-     *       ANEURALNETWORKS_FLOAT16}.
+     *       otherwise if all the input tensors have the type
+     *       {@link ANEURALNETWORKS_TENSOR_FLOAT16}, this scalar must be
+     *       of type {@link ANEURALNETWORKS_FLOAT16}.
      * * 50: The clipping threshold for the output from the
      *       projection layer, such that values are bound within
      *       [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
      *       If all the input tensors have type {@link ANEURALNETWORKS_TENSOR_FLOAT32},
      *       this scalar must be of the type {@link ANEURALNETWORKS_FLOAT32},
-     *       otherwise if all the input tensors have the type {@link
-     *       ANEURALNETWORKS_TENSOR_FLOAT16}, this scalar must be of type {@link
-     *       ANEURALNETWORKS_FLOAT16}.
+     *       otherwise if all the input tensors have the type
+     *       {@link ANEURALNETWORKS_TENSOR_FLOAT16}, this scalar must be
+     *       of type {@link ANEURALNETWORKS_FLOAT16}.
      * * 51: merge_outputs
      *       An {@link ANEURALNETWORKS_BOOL} scalar specifying if the outputs
      *       from forward and backward cells should be merged.
@@ -2633,8 +2865,36 @@ typedef enum {
      *      A 3-D tensor of shape:
      *        If time-major: [max_time, batch_size, bw_output_size]
      *        If batch-major: [batch_size, max_time, bw_output_size]
+     * * 2: The forward activation state output.
+     *      A 2-D tensor of shape [batch_size, fw_output_size] containing an
+     *      activation state from the last time step in the sequence. This
+     *      output is optional and can be omitted. If this output is present
+     *      then outputs 3-5 must be present as well.
+     *      Available since API level 30.
+     * * 3: The forward cell state output.
+     *      A tensor of shape [batch_size, fw_cell_size] containing a cell state
+     *      from the last time step in the sequence. This output is optional
+     *      and can be omitted. If this output is present
+     *      then outputs 2, 4, 5 must be present as well.
+     *      Available since API level 30.
+     * * 4: The backward activation state output.
+     *      A 2-D tensor of shape [batch_size, bw_output_size] containing an
+     *      activation state from the last time step in the sequence. This
+     *      output is optional and can be omitted. If this output is present
+     *      then outputs 2, 3, 5 must be present as well.
+     *      Available since API level 30.
+     * * 5: The backward cell state output.
+     *      A tensor of shape [batch_size, bw_cell_size] containing a cell state
+     *      from the last time step in the sequence. This output is optional
+     *      and can be omitted. If this output is present
+     *      then outputs 2-4 must be present as well.
+     *      Available since API level 30.
      *
      * Available since API level 29.
+     *
+     * Important: As of API level 29, there is no way to get the output state tensors out and NNAPI
+     * does not maintain internal states. This operator does not support the usage pattern in which
+     * multiple cells are chained and state tensors are propagated.
      */
     ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM = 42,
 
@@ -2662,8 +2922,8 @@ typedef enum {
      * * “activation” is the function passed as the “fused_activation_function”
      *   argument (if not “NONE”).
      *
-     * The op also supports an auxiliary input. Regular cell feeds one input
-     * into the two RNN cells in the following way:
+     * The op supports cross-linking via an auxiliary input. Regular cell feeds
+     * one input into the two RNN cells in the following way:
      *
      *       INPUT  (INPUT_REVERSED)
      *         |         |
@@ -2673,8 +2933,8 @@ typedef enum {
      *         |         |
      *      FW_OUT     BW_OUT
      *
-     * An op with an auxiliary input takes two inputs and feeds them into the
-     * RNN cells in the following way:
+     * An op with cross-linking takes two inputs and feeds them into the RNN
+     * cells in the following way:
      *
      *       AUX_INPUT   (AUX_INPUT_REVERSED)
      *           |             |
@@ -2687,9 +2947,26 @@ typedef enum {
      *         |           |
      *      FW_OUT      BW_OUT
      *
+     * The cross-linking mode is enabled iff auxiliary input and auxiliary
+     * weights are present. While stacking this op on top of itself, this
+     * allows to connect both forward and backward outputs from previous cell
+     * to the next cell's input.
+     *
+     * Since API level 30 parallel linking mode is supported. The mode is
+     * enabled if auxiliary input is present but auxiliary weights are omitted.
+     * In this case, the cell feeds inputs into the RNN in the following way:
+     *
+     *       INPUT (AUX_INPUT_REVERSED)
+     *         |         |
+     *    ---------------------
+     *    | FW_RNN     BW_RNN |
+     *    ---------------------
+     *         |         |
+     *      FW_OUT     BW_OUT
+     *
      * While stacking this op on top of itself, this allows to connect both
      * forward and backward outputs from previous cell to the next cell's
-     * inputs.
+     * corresponding inputs.
      *
      * Supported tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
@@ -2722,11 +2999,17 @@ typedef enum {
      *      A 2-D tensor of shape [batchSize, bwNumUnits]. Specifies a hidden
      *      state input for the first time step of the computation.
      * * 9: auxInput.
-     *      A 3-D tensor. The shape is the same as of the input 0.
+     *      A 3-D tensor. The shape is defined by the input 6 (timeMajor). If
+     *      it is set to true, then the input has a shape [maxTime, batchSize,
+     *      auxInputSize], otherwise the input has a shape [batchSize, maxTime,
+     *      auxInputSize]. Can be omitted. See the docs above for the usage
+     *      modes explanation.
      * * 10:fwAuxWeights.
-     *      A 2-D tensor of shape [fwNumUnits, inputSize].
+     *      A 2-D tensor of shape [fwNumUnits, auxInputSize]. Can be omitted.
+     *      See the docs above for the usage modes explanation.
      * * 11:bwAuxWeights.
-     *      A 2-D tensor of shape [bwNumUnits, inputSize].
+     *      A 2-D tensor of shape [bwNumUnits, auxInputSize]. Can be omitted.
+     *      See the docs above for the usage modes explanation.
      * * 12:fusedActivationFunction.
      *      A {@link FuseCode} value indicating the activation function. If
      *      “NONE” is specified then it results in a linear activation.
@@ -2752,8 +3035,24 @@ typedef enum {
      *      (timeMajor). If it is set to true, then the shape is set to
      *      [maxTime, batchSize, bwNumUnits], otherwise the shape is set to
      *      [batchSize, maxTime, bwNumUnits].
+     * * 2: The forward hidden state output.
+     *      A 2-D tensor of shape [batchSize, fwNumUnits] containing a hidden
+     *      state from the last time step in the sequence. This output is
+     *      optional and can be omitted. If this output is present then output
+     *      3 must be present as well.
+     *      Available since API level 30.
+     * * 3: The backward hidden state output.
+     *      A 2-D tensor of shape [batchSize, bwNumUnits] containing a hidden
+     *      state from the last time step in the sequence. This output is
+     *      optional and can be omitted. If this output is present then output
+     *      2 must be present as well.
+     *      Available since API level 30.
      *
      * Available since API level 29.
+     *
+     * Important: As of API level 29, there is no way to get the output state tensors out and NNAPI
+     * does not maintain internal states. This operator does not support the usage pattern in which
+     * multiple cells are chained and state tensors are propagated.
      */
     ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_RNN = 43,
 
@@ -2780,6 +3079,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Inputs:
      * * 0: A 2-D Tensor of shape [num_rois, num_classes], specifying the score
@@ -2791,7 +3091,11 @@ typedef enum {
      *      order of the boxes corresponds with input0. For input0 of type
      *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, this tensor should be of
      *      {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}, with zeroPoint of 0 and
-     *      scale of 0.125. Zero num_rois is supported for this tensor.
+     *      scale of 0.125.
+     *      For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+     *      this tensor should be of {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM},
+     *      with zeroPoint of -128 and scale of 0.125.
+     *      Zero num_rois is supported for this tensor.
      * * 2: A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor, of shape
      *      [num_rois], specifying the batch index of each box. Boxes with
      *      the same batch index are grouped together.
@@ -2818,6 +3122,8 @@ typedef enum {
      *      [num_output_rois], specifying the score of each output box. The boxes
      *      are grouped by batches, but the sequential order in each batch is not
      *      guaranteed. For type of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      guaranteed. For type of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      or {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
      *      the scale and zero point must be the same as input0.
      * * 1: A 2-D Tensor of the same {@link OperandCode} as input1, with shape
      *      [num_output_rois, 4], specifying the coordinates of each
@@ -2837,7 +3143,7 @@ typedef enum {
     ANEURALNETWORKS_BOX_WITH_NMS_LIMIT = 44,
 
     /**
-     * Casts a tensor to a new type.
+     * Casts a tensor to a type.
      *
      * This operation ignores the scale and zeroPoint of quanized tensors,
      * e.g. it treats a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} input
@@ -2848,6 +3154,14 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * Since API level 30, casting tensors of the following
+     * {@link OperandCode} to the same {@link OperandCode} is supported:
+     * * {@link ANEURALNETWORKS_TENSOR_BOOL8}
+     * * {@link ANEURALNETWORKS_TENSOR_INT32}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
      *
      * Supported tensor rank: from 1
      *
@@ -2880,6 +3194,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -2894,7 +3209,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} and same shape as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -2952,14 +3268,14 @@ typedef enum {
      * * 11: A scalar, score_threshold. Boxes with scores lower than the
      *       threshold are filtered before sending to the NMS algorithm. The
      *       scalar must be of {@link ANEURALNETWORKS_FLOAT16} if input0 is of
-     *       {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
-     *       ANEURALNETWORKS_FLOAT32} if input0 is of {@link
-     *       ANEURALNETWORKS_TENSOR_FLOAT32}.
+     *       {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+     *       {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+     *       {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
      * * 12: A scalar, specifying the IoU threshold for hard NMS. The scalar
-     *       must be of {@link ANEURALNETWORKS_FLOAT16} if input0 is of {@link
-     *       ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
-     *       ANEURALNETWORKS_FLOAT32} if input0 is of {@link
-     *       ANEURALNETWORKS_TENSOR_FLOAT32}.
+     *       must be of {@link ANEURALNETWORKS_FLOAT16} if input0 is of
+     *       {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+     *       {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+     *       {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
      * * 13: An {@link ANEURALNETWORKS_BOOL} scalar, set to true to include
      *       background class in the list of label map for the output, set
      *       to false to not include the background. When the background
@@ -2992,6 +3308,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3041,6 +3358,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3052,7 +3370,8 @@ typedef enum {
      * Outputs:
      * * 0: An (n + 1)-D tensor with the same {@link OperandCode} and data as
      *      input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -3078,6 +3397,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3092,7 +3412,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: An (n + k - 1)-D tensor with the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -3115,6 +3436,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Inputs:
      * * 0: A 4-D Tensor specifying the score of each anchor at each
@@ -3132,11 +3454,13 @@ typedef enum {
      *      dimensions is the channel dimension.
      * * 2: A 2-D Tensor of shape [num_anchors, 4], specifying the shape of each
      *      predefined anchor, with format [x1, y1, x2, y2]. For input0 of type
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, this tensor should be of
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, this tensor should be of
      *      {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}, with scale of 0.125.
      * * 3: A 2-D Tensor of shape [batches, 2], specifying the size of
      *      each image in the batch, with format [image_height, image_width].
-     *      For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, this
+     *      For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, this
      *      tensor should be of {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}, with
      *      scale of 0.125.
      * * 4: An {@link ANEURALNETWORKS_FLOAT32} scalar, specifying the ratio
@@ -3163,7 +3487,8 @@ typedef enum {
      *      [num_output_rois], specifying the score of each output box.
      *      The boxes are grouped by batches, but the sequential order in
      *      each batch is not guaranteed. For type of
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the scale and zero
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, the scale and zero
      *      point must be the same as input0.
      * * 1: A tensor of the same {@link OperandCode} as input3, of shape
      *      [num_output_rois, 4], specifying the coordinates of each output
@@ -3188,6 +3513,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3213,6 +3539,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3271,12 +3598,23 @@ typedef enum {
      * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
      * * * input.scale * filter.scale).
      *
+     * * Quantized signed (since API level 30):
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
      * * Quantized with symmetric per channel quantization for the filter:
      * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} for input, and output.
      * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
      * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
      * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
      *
+     * * Quantized signed with filter symmetric per channel quantization (since API level 30):
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
@@ -3295,8 +3633,9 @@ typedef enum {
      *      {@link ANeuralNetworksSymmPerChannelQuantParams}) must be set to 0.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
      *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
-     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
-     *      type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same type.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
      *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
      *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
      *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
@@ -3316,7 +3655,7 @@ typedef enum {
      * * 8: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when
      *      walking through input in the ‘height’ dimension.
      * * 9: An {@link ANEURALNETWORKS_INT32} scalar, specifying the number of
-            groups.
+     *      groups.
      * * 10: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the
      *       {@link FuseCode} values. Specifies the activation to
      *       invoke on the result.
@@ -3330,12 +3669,14 @@ typedef enum {
      *      [depth_out, filter_height, filter_width, depth_group], specifying
      *      the filter, where depth_out must be divisible by num_groups.  For
      *      tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
-     *      the channel dimension (channelDim at
-     *      {@link ANeuralNetworksSymmPerChannelQuantParams}) must be set to 0.
+     *      the channel dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim)
+     *      must be set to 0.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
      *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
      *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
-     *      type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same type.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
      *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
      *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
      *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
@@ -3360,7 +3701,8 @@ typedef enum {
      * Outputs:
      * * 0: The output 4-D tensor, of shape
      *      [batches, out_height, out_width, depth_out].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
      *
      * Available since API level 29.
@@ -3382,6 +3724,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
@@ -3398,13 +3741,18 @@ typedef enum {
      *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, this tensor should
      *      be of {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}, with zeroPoint
      *      of 0 and scale of 0.125.
+     *      For input0 of type
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}, this tensor
+     *      should be of {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}, with
+     *      zeroPoint of -128 and scale of 0.125.
      * * 2: An {@link ANEURALNETWORKS_BOOL} scalar, set to true to specify
      *      NCHW data layout for input0. Set to false for NHWC.
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0, with shape
      *      [num_boxes, num_keypoints], specifying score of the keypoints.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint can be different from input0 scale and zeroPoint.
      * * 1: A tensor of the same {@link OperandCode} as input1, with shape
      *      [num_boxes, num_keypoints, 2], specifying the location of
@@ -3447,19 +3795,19 @@ typedef enum {
      * * 0: An n-D tensor, specifying the tensor to be normalized.
      * * 1: A scalar, specifying gamma, the scale applied to the normalized
      *      tensor. The scalar must be of {@link ANEURALNETWORKS_FLOAT16} if
-     *      input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
-     *      ANEURALNETWORKS_FLOAT32} if input0 is of {@link
-     *      ANEURALNETWORKS_TENSOR_FLOAT32}.
+     *      input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+     *      {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+     *      {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
      * * 2: A scalar, specifying beta, the offset applied to the normalized
      *      tensor. The scalar must be of {@link ANEURALNETWORKS_FLOAT16} if
-     *      input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
-     *      ANEURALNETWORKS_FLOAT32} if input0 is of {@link
-     *      ANEURALNETWORKS_TENSOR_FLOAT32}.
+     *      input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+     *      {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+     *      {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
      * * 3: A scalar, specifying epsilon, the small value added to variance to
      *      avoid dividing by zero. The scalar must be of {@link ANEURALNETWORKS_FLOAT16} if
-     *      input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of {@link
-     *      ANEURALNETWORKS_FLOAT32} if input0 is of {@link
-     *      ANEURALNETWORKS_TENSOR_FLOAT32}.
+     *      input0 is of {@link ANEURALNETWORKS_TENSOR_FLOAT16} and of
+     *      {@link ANEURALNETWORKS_FLOAT32} if input0 is of
+     *      {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
      * * 4: An {@link ANEURALNETWORKS_BOOL} scalar, set to true to specify
      *      NCHW data layout for input0 and output0. Set to false for NHWC.
      *
@@ -3479,6 +3827,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3505,6 +3854,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3644,6 +3994,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1.
      *
@@ -3656,7 +4007,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
      *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
      *
      * Available since API level 29.
@@ -3671,6 +4023,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1.
      *
@@ -3683,7 +4036,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
      *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
      *
      * Available since API level 29.
@@ -3719,6 +4073,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3744,6 +4099,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -3761,7 +4117,8 @@ typedef enum {
      *      pad value must be of {@link ANEURALNETWORKS_FLOAT16}.
      *      For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the
      *      pad value must be of {@link ANEURALNETWORKS_FLOAT32}.
-     *      For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
      *      the pad value must be of {@link ANEURALNETWORKS_INT32}. The
      *      scale and zeroPoint are assumed to be the same as in input0.
      *
@@ -3773,7 +4130,8 @@ typedef enum {
      *      of the padding:
      *          output0.dimension[i] =
      *              padding[i, 0] + input0.dimension[i] + padding[i, 1]
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -3836,6 +4194,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -3846,8 +4205,9 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
-     *      the scale and zeroPoint can be diffent from the input0 scale and zeroPoint.
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scales and zeroPoint can be different from input0 scale and zeroPoint.
      *
      * Available since API level 29.
      */
@@ -3856,14 +4216,23 @@ typedef enum {
     /**
      * Quantizes the input tensor.
      *
-     * The formula is:
+     * The formula for {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} output tensor is:
      *
      *     output = max(0, min(255, round(input / scale) + zeroPoint)
      *
-     * Supported tensor {@link OperandCode}:
+     * The formula for {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} output
+     * tensor is:
+     *
+     *     output = max(-128, min(127, round(input / scale) + zeroPoint)
+     *
+     * Supported input tensor {@link OperandCode}:
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      *
+     * Supported output tensor {@link OperandCode}:
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
+     *
      * Supported tensor rank: from 1
      *
      * Inputs:
@@ -3871,7 +4240,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: The output tensor of same shape as input0, but with
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}.
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} or.
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}.
      *
      * Available since API level 29.
      */
@@ -3995,7 +4365,8 @@ typedef enum {
      * * 1: A scalar {@link ANEURALNETWORKS_INT32}, specifying the number of
      *      independent samples to draw for each row slice.
      * * 2: A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape [2],
-     *      specifying seeds used to initialize the random distribution.
+     *      specifying seeds used to initialize the random distribution. If both
+     *      provided seeds are 0, both will be randomly generated.
      * Outputs:
      * * 0: A 2-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape
      *      [batches, samples], containing the drawn samples.
@@ -4026,6 +4397,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
      *
      * Available since API level 29.
      */
@@ -4053,6 +4426,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
      *
      * Available since API level 29.
      */
@@ -4070,6 +4445,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -4082,7 +4458,10 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -4101,6 +4480,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: up to 4
      *
@@ -4113,7 +4493,10 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -4142,6 +4525,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
      *
      * Available since API level 29.
      */
@@ -4169,6 +4554,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
      *
      * Available since API level 29.
      */
@@ -4188,9 +4575,10 @@ typedef enum {
      * interpolation.
      *
      * Supported tensor {@link OperandCode}:
-     * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
@@ -4229,7 +4617,8 @@ typedef enum {
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0. The output
      *      shape is [num_rois, out_height, out_width, depth].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint can be different from the input0 scale and zeroPoint.
      *
      * Available since API level 29.
@@ -4252,6 +4641,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
@@ -4262,7 +4652,8 @@ typedef enum {
      * * 0: A 4-D tensor, specifying the feature map.
      * * 1: A 2-D Tensor of shape [num_rois, 4], specifying the locations of
      *      the regions of interest, each line with format [x1, y1, x2, y2].
-     *      For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      this tensor should be of {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM},
      *      with zeroPoint of 0 and scale of 0.125.
      * * 2: An 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor, of shape
@@ -4282,7 +4673,8 @@ typedef enum {
      * Outputs:
      * * 0: A tensor of the same {@link OperandCode} as input0. The output
      *      shape is [num_rois, out_height, out_width, depth].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For input0 of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -4319,6 +4711,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -4329,7 +4722,8 @@ typedef enum {
      *      true) or input2 (if false).
      * * 1: An input tensor of the same shape as input0.
      * * 2: An input tensor of the same shape and type as input1.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scales and zeroPoint can be different from input1 scale and zeroPoint.
      *
      * Outputs:
@@ -4337,6 +4731,7 @@ typedef enum {
      *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
      *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
      *
+     * Available since API level 29.
      */
     ANEURALNETWORKS_SELECT = 84,
 
@@ -4376,6 +4771,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -4388,7 +4784,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: An n-D tensor of the same type as the input containing the slice.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      its scale and zeroPoint has to be same as the input0 scale and zeroPoint.
      *
      * Available since API level 29.
@@ -4403,6 +4800,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -4415,7 +4813,8 @@ typedef enum {
      *
      * Outputs:
      * * 0 ~ (num_splits - 1): Resulting subtensors.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -4455,6 +4854,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -4465,7 +4865,8 @@ typedef enum {
      *
      * Outputs:
      * * 0: A tiled tensor of the same {@link OperandCode} and rank as `input`.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
@@ -4483,6 +4884,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_INT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: from 1
      *
@@ -4494,7 +4896,8 @@ typedef enum {
      * Outputs:
      * * 0: An n-D tensor of the same type as the input, containing the k
      *      largest elements along each last dimensional slice.
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      * * 1: An n-D tensor of type {@link ANEURALNETWORKS_TENSOR_INT32}
      *      containing the indices of values within the last dimension of input.
@@ -4531,6 +4934,18 @@ typedef enum {
      * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
      * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
      *
+     * Available since API level 30:
+     * * Quantized signed (since API level 30):
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized signed with filter symmetric per channel quantization (since API level 30):
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
      * [batch, height, width, channels]. Alternatively, the data layout could
@@ -4540,24 +4955,25 @@ typedef enum {
      *
      * Inputs (explicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
-     *      specifying the input. Since API level 29, zero batches is supported
-     *      for this tensor.
+     *      specifying the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: A 4-D tensor, of shape
      *      [depth_out, filter_height, filter_width, depth_in], specifying the
      *      filter. For tensor of type
      *      {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
-     *      dimension (extraParams.channelQuant.channelDim) must be set to 0.
+     *      dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim) must be set to 0.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
      *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
-     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias should be of the
-     *      same type. For input tensor of type
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be
-     *      of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
-     *      bias_scale == input_scale * filter_scale. For filter tensor of
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
-     *      must be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
-     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal
-     *      to bias_scale[i] = input_scale * filter_scale[i].
+     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the
+     *      same type.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32},
+     *      with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias must be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
      * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
      *      the left, in the ‘width’ dimension.
      * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -4578,24 +4994,25 @@ typedef enum {
      *
      * Inputs (implicit padding):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
-     *      specifying the input. Since API level 29, zero batches is supported
-     *      for this tensor.
+     *      specifying the input.
+     *      Since API level 29, zero batches is supported for this tensor.
      * * 1: A 4-D tensor, of shape
      *      [depth_out, filter_height, filter_width, depth_in], specifying the
      *      filter. For tensor of type
      *      {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
-     *      dimension (extraParams.channelQuant.channelDim) must be set to 0.
+     *      dimension (ANeuralNetworksSymmPerChannelQuantParams::channelDim) must be set to 0.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
      *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
      *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias should be of the
-     *      same type. For input tensor of type
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be
-     *      of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
-     *      bias_scale == input_scale * filter_scale. For filter tensor of
-     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
-     *      must be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
-     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal
-     *      to bias_scale[i] = input_scale * filter_scale[i].
+     *      same type.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     *      and {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32},
+     *      with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias must be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
      * * 3: An {@link ANEURALNETWORKS_TENSOR_INT32} tensor, specifying the output
      *      tensor shape.
      * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
@@ -4614,7 +5031,8 @@ typedef enum {
      * Outputs:
      * * 0: The output 4-D tensor, of shape
      *      [batches, out_height, out_width, depth_out].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
      *
      * Available since API level 29.
@@ -4727,8 +5145,21 @@ typedef enum {
      *      A 3-D tensor of shape:
      *        If time-major: [max_time, batch_size, output_size]
      *        If batch-major: [batch_size, max_time, output_size]
+     * * 1: A tensor of shape [batch_size, output_size] containing a hidden
+     *      state from the last time step in the sequence. This output is
+     *      optional and can be omitted. If this output is present then
+     *      output #2 must be present as well.
+     *      Available since API level 30.
+     * * 2: A tensor of shape [batch_size, cell_size] containing a cell state
+     *      from the last time step in the sequence. This output is optional
+     *      and can be omitted.
+     *      Available since API level 30.
      *
      * Available since API level 29.
+     *
+     * Important: As of API level 29, there is no way to get the output state tensors out and NNAPI
+     * does not maintain internal states. This operator does not support the usage pattern in which
+     * multiple cells are chained and state tensors are propagated.
      */
     ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM = 92,
 
@@ -4784,8 +5215,16 @@ typedef enum {
      *      it is set to 1, then the output has a shape [maxTime, batchSize,
      *      numUnits], otherwise the output has a shape [batchSize, maxTime,
      *      numUnits].
+     * * 1: A tensor of shape [batchSize, numUnits] containing hidden state
+     *      from the last time step in the sequence. This output is optional
+     *      and can be omitted.
+     *      Available since API level 30.
      *
      * Available since API level 29.
+     *
+     * Important: As of API level 29, there is no way to get the output state tensors out and NNAPI
+     * does not maintain internal states. This operator does not support the usage pattern in which
+     * multiple cells are chained and state tensors are propagated.
      */
     ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN = 93,
 
@@ -4800,6 +5239,7 @@ typedef enum {
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} (since API level 30)
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
@@ -4817,6 +5257,17 @@ typedef enum {
      *      height of the output tensor.
      * * 3: An {@link ANEURALNETWORKS_BOOL} scalar, default to false.
      *      Set to true to specify NCHW data layout for input0 and output0.
+     * * 4: Align corners. An optional {@link ANEURALNETWORKS_BOOL}
+     *      scalar, default to false.  If True, the centers of the 4 corner
+     *      pixels of the input and output tensors are aligned, preserving the
+     *      values at the corner pixels.
+     *      Available since API level 30.
+     * * 5: Half pixel centers. An optional {@link ANEURALNETWORKS_BOOL}
+     *      scalar, default to false. If True, the pixel centers are assumed to
+     *      be at (0.5, 0.5). This is the default behavior of image.resize in
+     *      TF 2.0. If this parameter is True, then align_corners parameter
+     *      must be False.
+     *      Available since API level 30.
      *
      * Inputs (resizing by scale):
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
@@ -4835,16 +5286,377 @@ typedef enum {
      *      {@link ANEURALNETWORKS_FLOAT32} otherwise.
      * * 3: An {@link ANEURALNETWORKS_BOOL} scalar, default to false.
      *      Set to true to specify NCHW data layout for input0 and output0.
+     * * 4: Align corners. An optional {@link ANEURALNETWORKS_BOOL}
+     *      scalar, default to false.  If True, the centers of the 4 corner
+     *      pixels of the input and output tensors are aligned, preserving the
+     *      values at the corner pixels.
+     *      Available since API level 30.
+     * * 5: Half pixel centers. An optional {@link ANEURALNETWORKS_BOOL}
+     *      scalar, default to false. If True, the pixel centers are assumed to
+     *      be at (0.5, 0.5). This is the default behavior of image.resize in
+     *      TF 2.0. If this parameter is True, then align_corners parameter
+     *      must be False.
+     *      Available since API level 30.
      *
      * Outputs:
      * * 0: The output 4-D tensor, of shape
      *      [batches, new_height, new_width, depth].
-     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} tensor,
+     *      For a {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} and
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED} tensor,
      *      the scale and zeroPoint must be the same as input0.
      *
      * Available since API level 29.
      */
     ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR = 94,
+
+    // Operations below are available since API level 30.
+
+    /**
+     * Quantized version of {@link ANEURALNETWORKS_LSTM}.
+     *
+     * The input and the output use asymmetric quantized types, while the rest
+     * use symmetric ones.
+     *
+     * Inputs:
+     * * 0: The input to the LSTM cell.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, inputSize]
+     * * 1: The input-to-input weights. Optional.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 2: The input-to-forget weights.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 3: The input-to-cell weights.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 4: The input-to-output weights.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 5: The recurrent-to-input weights. Optional.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 6: The recurrent-to-forget weights.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 7: The recurrent-to-cell weights.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 8: The recurrent-to-output weights.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 9: The cell-to-input weights (for peephole). Optional.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *      Shape: [numUnits]
+     * * 10: The cell-to-forget weights (for peephole). Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 11: The cell-to-output weights (for peephole). Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 12: The input gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 13: The forget gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 14: The cell bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 15: The output gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 16: The projection weights. Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     *       Shape: [outputSize, numUnits]
+     * * 17: The projection bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_INT32}
+     *       Shape: [outputSize]
+     * * 18: The output from the previous time step.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+     *       Shape: [batchSize, outputSize]
+     * * 19: The cell state from the previous time step.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *       Shape: [batchSize, numUnits]
+     * * 20: The input layer normalization weights. Used to rescale
+     *       normalized inputs to activation at input gate. Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 21: The forget layer normalization weights. Used to
+     *       rescale normalized inputs to activation at forget gate. Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 22: The cell layer normalization weights. Used to rescale
+     *       normalized inputs to activation at cell gate. Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 23: The output layer normalization weights. Used to
+     *       rescale normalized inputs to activation at output gate. Optional.
+     *       Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 24: The cell clip. If provided the cell state is clipped
+     *       by this value prior to the cell output activation. Optional.
+     *       Type: {@link ANEURALNETWORKS_FLOAT32}.
+     * * 25: The projection clip. If provided and projection is enabled,
+     *       this is used for clipping the projected values. Optional.
+     *       Type: {@link ANEURALNETWORKS_FLOAT32}.
+     * * 26: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at input gate.
+     *       Type: {@link ANEURALNETWORKS_FLOAT32}.
+     * * 27: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at forget gate.
+     *       Type: {@link ANEURALNETWORKS_FLOAT32}.
+     * * 28: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at cell gate.
+     *       Type: {@link ANEURALNETWORKS_FLOAT32}.
+     * * 29: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at output gate.
+     *       Type: {@link ANEURALNETWORKS_FLOAT32}.
+     * * 30: The zero point of the hidden state, i.e. input to
+     *       projection.
+     *       Type: {@link ANEURALNETWORKS_INT32}.
+     * * 31: The scale of the hidden state, i.e. input to
+     *       projection.
+     *       Type: {@link ANEURALNETWORKS_FLOAT32}.
+     *
+     * Outputs:
+     * * 0: The output state (out).
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, outputSize]
+     * * 1: The cell state (out).
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     *      Shape: [batchSize, numUnits]
+     * * 2: The output. This is effectively the same as the current
+     *      "output state (out)" value.
+     *      Type: {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, outputSize]
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_QUANTIZED_LSTM = 95,
+
+    /**
+     * Executes one of the two referenced models as determined by a boolean
+     * value.
+     *
+     * The inputs and outputs of the two referenced models must agree with the
+     * signature of this operation. That is, if the operation has (3 + n) inputs
+     * and m outputs, both models must have n inputs and m outputs with the same
+     * types, ranks (if specified), dimensions (if specified), scales,
+     * zeroPoints, and other operand parameters as the corresponding operation
+     * inputs and outputs.
+     *
+     * Inputs:
+     * * 0: A value of type {@link ANEURALNETWORKS_TENSOR_BOOL8} and shape [1]
+     *      that determines which of the two referenced models to execute.
+     *      The operand must have fully specified dimensions.
+     * * 1: A {@link ANEURALNETWORKS_MODEL} reference to the model to be
+     *      executed if the condition is true.
+     * * 2: A {@link ANEURALNETWORKS_MODEL} reference to the model to be
+     *      executed if the condition is false.
+     * * 3 ~ (n + 2): Inputs to be passed to the model selected for execution.
+     *
+     * Outputs:
+     * * 0 ~ (m - 1): Outputs produced by the selected model.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_IF = 96,
+
+    /**
+     * Executes the body model until the condition model outputs false.
+     *
+     * The inputs to this operation are the condition model, the body model,
+     * and operand values for the first iteration of the loop. The values are
+     * implicitly split into three groups of input-output, state-only, and
+     * input-only values, as described below.
+     *
+     * The outputs of this operation are the final values of input-output
+     * operands.
+     *
+     * Both the condition and body model receive (m + k + n) inputs.
+     * * The first m (m >= 1) inputs are input-output operands. For the first
+     *   iteration, these are initialized from the corresponding inputs of the
+     *   WHILE operation. In subsequent iterations, their values come from the
+     *   corresponding outputs of the body model produced during the previous
+     *   iteration.
+     * * The next k (k >= 0) inputs are state-only operands. They are similar to
+     *   the input-output operands, except that their values are no longer
+     *   available after the loop terminates.
+     * * The last n (n >= 0) inputs are input-only operands. Their values come
+     *   from the corresponding inputs of the WHILE operation.
+     *
+     * The body model produces (m + k) outputs.
+     * * The first m outputs are input-output operands. They become the outputs
+     *   of the WHILE operation when a termination condition is reached.
+     * * The last k outputs are state-only operands. Their values are no longer
+     *   available after the loop terminates.
+     *
+     * The numbers m, k, and n are inferred by the runtime as follows:
+     *     m = (WHILE operation output count)
+     *     k = (body model output count) - m
+     *     n = (body model input count) - m - k
+     *
+     * The pseudo-code below illustrates the flow of a WHILE operation with
+     * inputs condition, body, initial_input_output, initial_state, input_only
+     * (m = 1, k = 1, n = 1):
+     *
+     *     input_output = initial_input_output
+     *     state = initial_state
+     *     while condition(input_output, state, input_only):
+     *         input_output, state = body(input_output, state, input_only)
+     *     return input_output
+     *
+     * To prevent infinite loops, there is an implicit execution timeout
+     * associated with each loop ("loop timeout duration"). See {@link
+     * ANeuralNetworksExecution_setLoopTimeout}.
+     *
+     * Inputs:
+     * * 0: A {@link ANEURALNETWORKS_MODEL} reference to the condition
+     *      model. The model must have (m + k + n) inputs with
+     *      the same types, ranks (if specified), dimensions (if specified),
+     *      scales, zeroPoints, and other operand parameters as the
+     *      corresponding inputs of the WHILE operation and exactly one output
+     *      of {@link ANEURALNETWORKS_TENSOR_BOOL8} and shape [1].
+     *      The output operand must have fully specified dimensions.
+     * * 1: A {@link ANEURALNETWORKS_MODEL} reference to the body model.
+     *      The model must have (m + k + n) inputs and (m + k) outputs with
+     *      the same types, ranks (if specified), dimensions (if specified),
+     *      scales, zeroPoints, and other operand parameters as the
+     *      corresponding inputs and outputs of the WHILE operation.
+     * * (m inputs): Initial values for input-output operands.
+     * * (k inputs): Initial values for state-only operands.
+     * * (n inputs): Values for input-only operands.
+     *
+     * Outputs:
+     * * 0 ~ (m - 1): Outputs produced by the loop.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_WHILE = 97,
+
+    /**
+     * Computes exponential linear activation on the input tensor element-wise.
+     *
+     * The output is calculated using the following formula:
+     *
+     *     ELU(x) = max(0, x) + min(0, alpha * (exp(x) - 1))
+     *
+     * Supported tensor {@link OperandCode}:
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input. May be zero-sized.
+     * * 1: A scalar, specifying the alpha parameter.
+     *      For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT16},
+     *      the alpha value must be of {@link ANEURALNETWORKS_FLOAT16}.
+     *      For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32},
+     *      the alpha value must be of {@link ANEURALNETWORKS_FLOAT32}.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape and type as input0.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_ELU = 98,
+
+    /**
+     * Computes hard-swish activation on the input tensor element-wise.
+     *
+     * Hard swish activation is introduced in
+     * https://arxiv.org/pdf/1905.02244.pdf
+     *
+     * The output is calculated using the following formula:
+     *
+     *     h-swish(x) = x * max(0, min(6, (x + 3))) / 6
+
+     * Supported tensor {@link OperandCode}:
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input. May be zero-sized.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape and type as input0.
+     *      Scale and zero point of this tensor may be different from the input
+     *      tensor's parameters.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_HARD_SWISH = 99,
+
+    /**
+     * Creates a tensor filled with a scalar value.
+     *
+     * Supported output tensor {@link OperandCode}:
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     * * {@link ANEURALNETWORKS_TENSOR_INT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A 1-D tensor, specifying the desired output tensor shape.
+     * * 1: A scalar, specifying the value to fill the output tensors with.
+     *      For output tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT16},
+     *      the scalar must be of {@link ANEURALNETWORKS_FLOAT16}.
+     *      For output tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32},
+     *      the scalar must be of {@link ANEURALNETWORKS_FLOAT32}.
+     *      For output tensor of {@link ANEURALNETWORKS_TENSOR_INT32},
+     *      the scalar must be of {@link ANEURALNETWORKS_INT32}.
+     *
+     * Outputs:
+     * * 0: The output tensor.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_FILL = 100,
+
+    /**
+     * Returns the rank of a tensor.
+     *
+     * The rank of a tensor is the number of dimensions in it. Also known as
+     * "order", "degree", "ndims".
+     *
+     * Supported tensor {@link OperandCode}:
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT16}
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+     * * {@link ANEURALNETWORKS_TENSOR_INT32}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_BOOL8}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT16_ASYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM}
+     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: The input tensor.
+     *
+     * Outputs:
+     * * 0: A scalar of {@link ANEURALNETWORKS_INT32}, specifying the rank
+     *      of the input tensor.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_RANK = 101,
 } OperationCode;
 
 /**
@@ -4880,10 +5692,11 @@ typedef enum {
      * the same; for odd number of padding, padding to the ending is bigger
      * than the padding to the beginning by 1.
      *
-     * total_padding is a function of input, stride and filter size.
+     * total_padding is a function of input, stride, dilation and filter size.
      * It could be computed as follows:
-     *    out_size = (input + stride - 1) / stride;
-     *    needed_input = (out_size - 1) * stride + filter_size
+     *    out_size = (input + stride - 1) / stride
+     *    effective_filter_size = (filter_size - 1) * dilation + 1
+     *    needed_input = (out_size - 1) * stride + effective_filter_size
      *    total_padding = max(0, needed_input - input_size)
      *  The computation is the same for the horizontal and vertical directions.
      */
@@ -5004,6 +5817,47 @@ typedef enum {
      * Failure caused by a device not being available.
      */
     ANEURALNETWORKS_UNAVAILABLE_DEVICE = 9,
+
+    /**
+     * Failure because a deadline could not be met for a task, but future
+     * deadlines may still be met for the same task after a short delay.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT = 10,
+
+    /**
+     * Failure because a deadline could not be met for a task, and future
+     * deadlines will likely also not be met for the same task even after a
+     * short delay.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT = 11,
+
+    /**
+     * Failure because of a resource limitation within the driver, but future
+     * calls for the same task may still succeed after a short delay.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT = 12,
+
+    /**
+     * Failure because of a resource limitation within the driver, and future
+     * calls for the same task will likely also fail even after a short
+     * delay.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT = 13,
+
+    /**
+     * Failure indicating an object is in a dead state.
+     *
+     * Available since API level 30.
+     */
+    ANEURALNETWORKS_DEAD_OBJECT = 14,
 } ResultCode;
 
 /**
@@ -5024,6 +5878,48 @@ enum { ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128 };
 enum { ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN = 32 };
 
 /**
+ * Different duration measurements.
+ *
+ * Durations are measured in nanoseconds.
+ *
+ * Available since API level 29.
+ */
+typedef enum {
+    // Execution time on hardware (not driver, which runs on host processor).
+    ANEURALNETWORKS_DURATION_ON_HARDWARE = 0,
+    // Execution time in driver (including time on hardware).  Excludes overhead
+    // such as that of the runtime itself and the IPC needed for the runtime to
+    // communicate with the driver.
+    ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
+    // Execution time on hardware, after all dependencies have been signaled.
+    // If no dependencies specified (for example, if the execution was scheduled other
+    // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+    // reported time will be the same as ANEURALNETWORKS_DURATION_ON_HARDWARE.
+    // Available since API level 30.
+    ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2,
+    // Execution time in driver, after all dependencies have been signaled. Excludes
+    // overhead such as that of the runtime itself and the IPC needed for the runtime
+    // to communicate with the driver.
+    // If no dependencies specified (for example, if the execution was scheduled other
+    // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+    // reported time will be the same as ANEURALNETWORKS_DURATION_IN_DRIVER.
+    // Available since API level 30.
+    ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3,
+} DurationCode;
+
+/**
+ * Relative execution priority.
+ *
+ * Available since API level 30.
+ */
+typedef enum {
+    ANEURALNETWORKS_PRIORITY_LOW = 90,
+    ANEURALNETWORKS_PRIORITY_MEDIUM = 100,
+    ANEURALNETWORKS_PRIORITY_HIGH = 110,
+    ANEURALNETWORKS_PRIORITY_DEFAULT = ANEURALNETWORKS_PRIORITY_MEDIUM,
+} PriorityCode;
+
+/**
  * ANeuralNetworksMemory is an opaque type that represents memory.
  *
  * This type is used to represent shared memory, memory mapped files,
@@ -5049,7 +5945,21 @@ enum { ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN = 32 };
  * of the element type byte size, e.g., a tensor with
  * {@link ANEURALNETWORKS_TENSOR_FLOAT32} type must be aligned on 4-byte boundary.
  *
+ * It is the application's responsibility to ensure that there are no uses of
+ * the memory after calling {@link ANeuralNetworksMemory_free}. This includes
+ * any model which references this memory because of a call to
+ * {@link ANeuralNetworksModel_setOperandValueFromMemory}, any compilation
+ * created using such a model, any execution object or burst object created
+ * using such a compilation, or any execution which references this memory
+ * because of a call to {@link ANeuralNetworksExecution_setInputFromMemory} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory}.
+ *
  * Available since API level 27.
+ *
+ * Starting at API level 30, the application may request creation of device native memory from
+ * {@link ANeuralNetworksMemoryDesc} to avoid potential memory copying and transformation
+ * overhead between executions. See also {@link ANeuralNetworksMemoryDesc} and
+ * {@link ANeuralNetworksMemory_createFromDesc}.
  */
 typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
 
@@ -5079,9 +5989,10 @@ typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
  * modifies a model at a given time. It is however safe for more than one
  * thread to use the model once {@link ANeuralNetworksModel_finish} has returned.</p>
  *
- * <p>It is also the application's responsibility to ensure that there are no other
- * uses of the model after calling {@link ANeuralNetworksModel_free}.
- * This includes any compilation or execution object created using the model.</p>
+ * <p>It is also the application's responsibility to ensure that there are no
+ * other uses of the model after calling {@link ANeuralNetworksModel_free}.
+ * This includes any compilation, execution object or burst object created using
+ * the model.</p>
  *
  * Available since API level 27.
  */
@@ -5119,7 +6030,10 @@ typedef struct ANeuralNetworksModel ANeuralNetworksModel;
  *
  * <p>It is also the application's responsibility to ensure that there are no other
  * uses of the compilation after calling {@link ANeuralNetworksCompilation_free}.
- * This includes any execution object created using the compilation.</p>
+ * This includes any execution object or burst object created using the compilation,
+ * or any memory descriptor with the compilation as part of one of the roles specified by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} or
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}.</p>
  *
  * Available since API level 27.
  */
@@ -5139,7 +6053,8 @@ typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation;
  *        {@link ANeuralNetworksExecution_setOutput} or
  *        {@link ANeuralNetworksExecution_setOutputFromMemory}.</li>
  *    <li>Apply the model with one of the following:</li><ul>
- *        <li>Asynchronously with {@link ANeuralNetworksExecution_startCompute},
+ *        <li>Asynchronously with {@link ANeuralNetworksExecution_startCompute}
+ *            or with {@link ANeuralNetworksExecution_startComputeWithDependencies},
  *            waiting for the execution to complete with
  *            {@link ANeuralNetworksEvent_wait}.</li>
  *        <li>Synchronously with {@link ANeuralNetworksExecution_compute}.</li>
@@ -5154,38 +6069,54 @@ typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation;
  * ({@link ANeuralNetworksModel_setOperandValueFromMemory}).</p>
  *
  * <p>An execution cannot be modified once
- * {@link ANeuralNetworksExecution_compute} or
- * {@link ANeuralNetworksExecution_startCompute} has been called on it.</p>
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} has been called on it.</p>
  *
  * <p>An execution can be applied to a model with
- * {@link ANeuralNetworksExecution_compute} or
- * {@link ANeuralNetworksExecution_startCompute} only once. Create new
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} only once. Create new
  * executions to do new evaluations of the model.</p>
  *
  * <p>It is the application's responsibility to make sure that only one thread
  * modifies an execution at a given time. It is however safe for more than one
  * thread to use {@link ANeuralNetworksEvent_wait} at the same time.</p>
  *
+ * <p>It is also the application's responsibility to ensure that the execution
+ * either has never been scheduled or has completed (i.e., that
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute}, or
+ * {@link ANeuralNetworksEvent_wait} has returned) before calling
+ * {@link ANeuralNetworksExecution_free}.</p>.
+ *
  * <p>It is also the application's responsibility to ensure that there are no other
  * uses of the execution after calling {@link ANeuralNetworksExecution_free}.</p>
  *
  * <p>Multiple executions can be scheduled and evaluated concurrently, either by
- * means of {@link ANeuralNetworksExecution_compute} (which is synchronous) in
- * different threads or by means of
- * {@link ANeuralNetworksExecution_startCompute} (which is asynchronous). The
- * runtime makes no guarantee on the ordering of completion of executions. If
- * it's important to the application, the application should enforce the
- * ordering by ensuring that one execution completes before the next is
- * scheduled (for example, by scheduling all executions synchronously within a
- * single thread, or by scheduling all executions asynchronously and using
- * {@link ANeuralNetworksEvent_wait} between calls to
- * {@link ANeuralNetworksExecution_startCompute}).</p>
+ * means of {@link ANeuralNetworksExecution_compute} or
+ * {@link ANeuralNetworksExecution_burstCompute} (which are synchronous) in
+ * different threads, or by means of
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} (which are asynchronous).
+ * (Concurrent uses of {@link ANeuralNetworksExecution_burstCompute} must be on
+ * different burst objects.) The runtime makes no guarantee on the ordering of
+ * completion of executions. If it's important to the application, the
+ * application should enforce the ordering by ensuring that one execution
+ * completes before the next is scheduled (for example, by scheduling all
+ * executions synchronously within a single thread, or by scheduling all
+ * executions asynchronously and using {@link ANeuralNetworksEvent_wait} between
+ * calls to {@link ANeuralNetworksExecution_startCompute}); or by using
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} to make the execution wait for a
+ * list of events to be signaled before starting the actual evaluation.</p>
  *
  * Available since API level 27.
  */
 typedef struct ANeuralNetworksExecution ANeuralNetworksExecution;
 
-#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if __ANDROID_API__ >= 29
 /**
  * Parameters for ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL operand.
  */
@@ -5230,7 +6161,7 @@ typedef struct ANeuralNetworksSymmPerChannelQuantParams {
  * Available since API level 29.
  */
 typedef struct ANeuralNetworksBurst ANeuralNetworksBurst;
-#endif  //  __ANDROID_API__ >= __ANDROID_API_Q__
+#endif  //  __ANDROID_API__ >= 29
 
 /**
  * ANeuralNetworksOperandType describes the type of an operand.
@@ -5245,7 +6176,9 @@ typedef struct ANeuralNetworksBurst ANeuralNetworksBurst;
  *
  * If a tensor operand's type is not fully specified, the dimensions
  * of the operand are deduced from the operand types and values of the
- * operation for which that operand is an output.
+ * operation for which that operand is an output or from the corresponding
+ * {@link ANEURALNETWORKS_IF} or {@link ANEURALNETWORKS_WHILE} operation input
+ * operand type in the case of referenced model input operands.
  *
  * <p>In the following situations, a tensor operand type must be fully
  * specified:<ul>
@@ -5254,16 +6187,25 @@ typedef struct ANeuralNetworksBurst ANeuralNetworksBurst;
  *         non-nullptr buffer) or
  *         {@link ANeuralNetworksModel_setOperandValueFromMemory}.</li>
  *     <li>The operand is a model input (see
- *         {@link ANeuralNetworksModel_identifyInputsAndOutputs}).  A
- *         fully specified tensor operand type must either be provided
- *         to {@link ANeuralNetworksModel_addOperand}; or it must be
- *         provided to the corresponding
+ *         {@link ANeuralNetworksModel_identifyInputsAndOutputs}) of the main
+ *         model within a compilation.  A fully specified tensor operand type
+ *         must either be provided to {@link ANeuralNetworksModel_addOperand};
+ *         or it must be provided to the corresponding
  *         {@link ANeuralNetworksExecution_setInput}, or
  *         {@link ANeuralNetworksExecution_setInputFromMemory}.
  *         EXCEPTION: If the input is optional and omitted
  *         (by passing nullptr for buffer to
  *         {@link ANeuralNetworksExecution_setInput}) then it need
- *         not have a fully specified tensor operand type.</li></ul>
+ *         not have a fully specified tensor operand type.</li>
+ *     <li>The operand is a model output (see
+ *         {@link ANeuralNetworksModel_identifyInputsAndOutputs}) of the main
+ *         model within a compilation and is to be used with {@link
+ *         ANeuralNetworksExecution_startComputeWithDependencies}.
+ *         A fully specified tensor operand type must either be provided
+ *         to {@link ANeuralNetworksModel_addOperand}; or it must be
+ *         provided to the corresponding
+ *         {@link ANeuralNetworksExecution_setOutput}, or
+ *         {@link ANeuralNetworksExecution_setOutputFromMemory}.</li></ul>
  *
  * A tensor operand type of specified rank but some number of
  * unspecified dimensions is represented by setting dimensionCount to
@@ -5296,11 +6238,21 @@ typedef struct ANeuralNetworksOperandType {
     const uint32_t* dimensions;
 
     /**
-     * These two fields are only used for quantized tensors.
-     * They must be zero for all other types.
-     * The dequantized value of each entry is (value - zeroPoint) * scale.
+     * The quantization scale.
+     *
+     * Must be 0 when not applicable to an operand type.
+     *
+     * See {@link OperandCode}.
      */
     float scale;
+
+    /**
+     * The quantization zero point.
+     *
+     * Must be 0 when not applicable to an operand type.
+     *
+     * See {@link OperandCode}.
+     */
     int32_t zeroPoint;
 } ANeuralNetworksOperandType;
 
@@ -5314,7 +6266,7 @@ typedef int32_t ANeuralNetworksOperationType;
  */
 typedef struct ANeuralNetworksEvent ANeuralNetworksEvent;
 
-#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if __ANDROID_API__ >= 29
 
 /**
  * ANeuralNetworksDevice is an opaque type that represents a device.
@@ -5326,6 +6278,318 @@ typedef struct ANeuralNetworksEvent ANeuralNetworksEvent;
  */
 typedef struct ANeuralNetworksDevice ANeuralNetworksDevice;
 
+#endif  // __ANDROID_API__ >= 29
+
+#if __ANDROID_API__ >= 30
+
+/**
+ * ANeuralNetworksMemoryDesc is an opaque type that represents a memory descriptor.
+ *
+ * A memory descriptor describes the properties of a memory object, and is used by
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ *
+ * To use:
+ *   - Create a new memory descriptor by calling {@link ANeuralNetworksMemoryDesc_create}.
+ *   - Specify all of the intended input and output roles by calling
+ *     {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ *     {@link ANeuralNetworksMemoryDesc_addOutputRole}.
+ *   - Optionally, specify the memory dimensions by calling
+ *     {@link ANeuralNetworksMemoryDesc_setDimensions}.
+ *   - Complete the memory descriptor with {@link ANeuralNetworksMemoryDesc_finish}.
+ *   - Use the memory descriptor as many times as needed with
+ *     {@link ANeuralNetworksMemory_createFromDesc}.
+ *   - Destroy the memory descriptor with {@link ANeuralNetworksMemoryDesc_free}.
+ *
+ * A memory descriptor is completed by calling {@link ANeuralNetworksMemoryDesc_finish}.
+ * A memory descriptor is destroyed by calling {@link ANeuralNetworksMemoryDesc_free}.
+ *
+ * A memory descriptor must not be modified once {@link ANeuralNetworksMemoryDesc_finish}
+ * has been called on it.
+ *
+ * It is the application's responsibility to make sure that only
+ * one thread modifies a memory descriptor at a given time. It is however
+ * safe for more than one thread to use the memory descriptor once
+ * {@link ANeuralNetworksMemoryDesc_finish} has returned.
+ *
+ * It is also the application's responsibility to ensure that there are no other
+ * uses of the memory descriptor after calling {@link ANeuralNetworksMemoryDesc_free}.
+ * It is however safe to continue using a {@link ANeuralNetworksMemory} object created
+ * from the memory descriptor.
+ *
+ * Available since API level 30.
+ */
+typedef struct ANeuralNetworksMemoryDesc ANeuralNetworksMemoryDesc;
+
+/**
+ * Create a {@link ANeuralNetworksMemoryDesc} with no properties.
+ *
+ * This only creates the memory descriptor. Its properties should be set with calls to
+ * {@link ANeuralNetworksMemoryDesc_addInputRole},
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}, and
+ * {@link ANeuralNetworksMemoryDesc_setDimensions}.
+ *
+ * {@link ANeuralNetworksMemoryDesc_finish} must be called once all properties have been set.
+ *
+ * {@link ANeuralNetworksMemoryDesc_free} must be called once the memory descriptor
+ * is no longer needed.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The {@link ANeuralNetworksMemoryDesc} to be created.
+ *             Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_create(ANeuralNetworksMemoryDesc** desc) __INTRODUCED_IN(30);
+
+/**
+ * Destroy a memory descriptor.
+ *
+ * The memory descriptor need not have been finished by a call to
+ * {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be destroyed. Passing NULL is acceptable and
+ *             results in no operation.
+ */
+void ANeuralNetworksMemoryDesc_free(ANeuralNetworksMemoryDesc* desc) __INTRODUCED_IN(30);
+
+/**
+ * Specify that a memory object will be playing the role of an input to an execution created from a
+ * particular compilation.
+ *
+ * The compilation and the input index fully specify an input operand. This function
+ * may be invoked multiple times on the same memory descriptor with different input operands,
+ * and the same input operand may be specified on multiple memory descriptors. However,
+ * specifying the same input operand on the same memory descriptor more than once will
+ * return an error.
+ *
+ * The dimensions of the corresponding model operands of all the roles specified by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be compatible with each other. Two
+ * dimensions are incompatible if both ranks are fully specified but have different values, or if
+ * there is at least one axis that is fully specified in both but has different values.
+ *
+ * At least one of {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be called on a memory descriptor
+ * before invoking {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * Attempting to modify a memory descriptor once {@link ANeuralNetworksMemoryDesc_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param compilation The compilation object. It must already have been finished by calling
+ *                    {@link ANeuralNetworksCompilation_finish}, and must outlive the memory
+ *                    descriptor.
+ * @param index The index of the input argument we are referencing from the compilation. It is
+ *              an index into the inputs list passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param frequency A floating-point value within the range (0.0, 1.0]. Describes how likely the
+ *                  memory is to be used in the specified role. This is provided as a hint to
+ *                  optimize the case when different roles prefer different memory locations or data
+ *                  layouts.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_addInputRole(ANeuralNetworksMemoryDesc* desc,
+                                           const ANeuralNetworksCompilation* compilation,
+                                           uint32_t index, float frequency) __INTRODUCED_IN(30);
+
+/**
+ * Specify that a memory object will be playing the role of an output to an execution created from a
+ * particular compilation.
+ *
+ * The compilation and the output index fully specify an output operand. This function
+ * may be invoked multiple times on the same memory descriptor with different output operands,
+ * and the same output operand may be specified on multiple memory descriptors. However,
+ * specifying the same output operand on the same memory descriptor object more than once will
+ * return an error.
+ *
+ * The dimensions of the corresponding model operands of all the roles specified by
+ * {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be compatible with each other. Two
+ * dimensions are incompatible if both ranks are fully specified but have different values, or if
+ * there is at least one axis that is fully specified in both but has different values.
+ *
+ * At least one of {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole} must be called on the memory descriptor
+ * before invoking {@link ANeuralNetworksMemoryDesc_finish}.
+ *
+ * Attempting to modify a memory descriptor once {@link ANeuralNetworksMemoryDesc_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param compilation The compilation object. It must already have been finished by calling
+ *                    {@link ANeuralNetworksCompilation_finish}, and must outlive the memory
+ *                    descriptor.
+ * @param index The index of the output argument we are referencing from the compilation. It is
+ *              an index into the outputs list passed to
+ *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ *              the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param frequency A floating-point value within the range (0.0, 1.0]. Describes how likely the
+ *                  memory is to be used in the specified role. This is provided as a hint to
+ *                  optimize the case when multiple roles prefer different memory locations or data
+ *                  layouts.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_addOutputRole(ANeuralNetworksMemoryDesc* desc,
+                                            const ANeuralNetworksCompilation* compilation,
+                                            uint32_t index, float frequency) __INTRODUCED_IN(30);
+
+/**
+ * Set the dimensional information of the memory descriptor.
+ *
+ * The specified dimensions must be compatible with the dimensions of the corresponding model
+ * operands of all the roles specified by {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}. Two dimensions are incompatible if both ranks
+ * are fully specified but have different values, or if there is at least one axis that is fully
+ * specified in both but has different values.
+ *
+ * Attempting to modify a memory descriptor once {@link ANeuralNetworksMemoryDesc_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be modified.
+ * @param rank The number of dimensions. Must be 0 for scalars.
+ * @param dimensions An array of dimensions. An entry with the value 0 indicates that the
+ *                   corresponding axis has an unknown size.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_setDimensions(ANeuralNetworksMemoryDesc* desc, uint32_t rank,
+                                            const uint32_t* dimensions) __INTRODUCED_IN(30);
+
+/**
+ * Indicate that we have finished modifying a memory descriptor. Required before calling
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ *
+ * This function must only be called once for a given memory descriptor.
+ *
+ * See {@link ANeuralNetworksMemoryDesc} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor to be finished.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemoryDesc_finish(ANeuralNetworksMemoryDesc* desc) __INTRODUCED_IN(30);
+
+/**
+ * Creates a memory object from a memory descriptor.
+ *
+ * The memory object is created with an uninitialized buffer. A memory object with an uninitialized
+ * buffer may only be used according to the roles specified by {@link
+ * ANeuralNetworksMemoryDesc_addOutputRole}, or as the destination memory in {@link
+ * ANeuralNetworksMemory_copy}. The buffer of a memory object is initialized after the memory object
+ * is used as an output in a successful execution, or used as the destination memory in a successful
+ * {@link ANeuralNetworksMemory_copy}. A memory object with an initialized buffer may be used
+ * according to all roles specified in {@link ANeuralNetworksMemoryDesc}, or as the source or
+ * destination memory in {@link ANeuralNetworksMemory_copy}. The buffer of a memory object will
+ * return to the uninitialized state if the memory object is used as an output in a failed
+ * execution, or used as the destination memory in a failed {@link ANeuralNetworksMemory_copy}.
+ *
+ * The dimensions of the memory descriptor are deduced from the dimensions of the corresponding
+ * model operands of all the roles specified by {@link ANeuralNetworksMemoryDesc_addInputRole} and
+ * {@link ANeuralNetworksMemoryDesc_addOutputRole}, as well as the dimensions set by the call to
+ * {@link ANeuralNetworksMemoryDesc_setDimensions}, if any. The memory descriptor may have
+ * unspecified dimensions or rank. In such a case, the same memory object may be used with different
+ * shapes of outputs in different executions. When the memory is used as an input, the input shape
+ * must be the same as the output shape from the last execution using this memory object as an
+ * output, or the last {@link ANeuralNetworkMemory_copy} using this memory object as the destination
+ * memory. Creating a memory object with unspecified dimensions or rank may fail for certain sets of
+ * roles.
+ *
+ * Using the memory in roles or shapes that are not compatible with the rules specified above will
+ * return an error.
+ *
+ * When calling {@link ANeuralNetworksExecution_setInputFromMemory} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory} with the memory object,
+ * both offset and length must be set to zero and the entire memory region will be
+ * associated with the specified input or output operand.
+ *
+ * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with the memory created from this
+ * function will return an error.
+ *
+ * {@link ANeuralNetworksMemory_free} must be called once the memory is no longer needed.
+ *
+ * Attempting to create memory from an unfinished memory descriptor will return an error.
+ *
+ * The provided {@link ANeuralNetworksMemoryDesc} need not outlive the {@link ANeuralNetworksMemory}
+ * object.
+ *
+ * Available since API level 30.
+ *
+ * @param desc The memory descriptor.
+ * @param memory The memory object to be created.
+ *               Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful; ANEURALNETWORKS_OP_FAILED if the memory is
+ *         created with unspecified dimensions or rank and it is not supported for this set of
+ *         roles.
+ */
+int ANeuralNetworksMemory_createFromDesc(const ANeuralNetworksMemoryDesc* desc,
+                                         ANeuralNetworksMemory** memory) __INTRODUCED_IN(30);
+
+/**
+ * Copies data from one memory object to another.
+ *
+ * If at most one of the src and dst is created from {@link ANeuralNetworksMemory_createFromDesc},
+ * the src and dst must have the same logical size:
+ * - If the memory is created from {@link ANeuralNetworksMemory_createFromFd}, or if it is created
+ *   from {@link ANeuralNetworksMemory_createFromAHardwareBuffer} with format of
+ *   AHARDWAREBUFFER_FORMAT_BLOB, the logical size equals the size of the memory.
+ * - If the memory is created from {@link ANeuralNetworksMemory_createFromAHardwareBuffer} with a
+ *   format other than AHARDWAREBUFFER_FORMAT_BLOB, the logical size equals the size when there is
+ *   no padding and the data is tightly packed. This function may fail if the AHardwareBuffer
+ *   cannot be accessed.
+ * - If the memory is created from {@link ANeuralNetworksMemory_createFromDesc}, the logical size
+ *   equals the size indicated by the {@link OperandCode} multiplied by the number of elements. This
+ *   function will fail if the number of elements is unknown.
+ *
+ * If both src and dst are created from {@link ANeuralNetworksMemory_createFromDesc}, they must have
+ * compatible dimensions. Two dimensions are incompatible if both ranks are fully specified but
+ * have different values, or if there is at least one axis that is fully specified in both but has
+ * different values. The dst may have unspecified dimensions or rank. In such a case, the dimensions
+ * of dst will get updated according to the dimensions of the src.
+ *
+ * In both cases, if the src is created from {@link ANeuralNetworksMemory_createFromDesc}, it must
+ * have been used as an output in a successful execution, or used as the destination memory in a
+ * successful {@link ANeuralNetworksMemory_copy}.
+ *
+ * The src and dst may have different data layout, in which case the data copying is performed
+ * logically with data layout transformation.
+ *
+ * Available since API level 30.
+ *
+ * @param src The source memory object.
+ * @param dst The destination memory object.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksMemory_copy(const ANeuralNetworksMemory* src, const ANeuralNetworksMemory* dst)
+        __INTRODUCED_IN(30);
+
+#endif  // __ANDROID_API__ >= 30
+
+#if __ANDROID_API__ >= 29
+
 /**
  * Get the number of available devices.
  *
@@ -5359,7 +6623,8 @@ int ANeuralNetworks_getDevice(uint32_t devIndex, ANeuralNetworksDevice** device)
  * @param device The representation of the specified device.
  * @param name   The returned name of the specified device. The name will be in UTF-8
  *               and will be null-terminated. It will be recognizable as a known device name
- *               rather than a cryptic string. For devices with feature level 29 and above, the
+ *               rather than a cryptic string. For devices with feature level reported by
+ *               {@link ANeuralNetworksDevice_getFeatureLevel} that is 29 and above, the
  *               format of the name is {VENDOR}-{DEVICE}. For devices with feature level 28
  *               or lower, the format of the name is undefined.
  *               The name will remain valid for the duration of the application.
@@ -5439,6 +6704,26 @@ int ANeuralNetworksDevice_getVersion(const ANeuralNetworksDevice* device, const
 int ANeuralNetworksDevice_getFeatureLevel(const ANeuralNetworksDevice* device,
                                           int64_t* featureLevel) __INTRODUCED_IN(29);
 
+#if __ANDROID_API__ >= 30
+
+/**
+ * Wait until the device is in a live state.
+ *
+ * A device may encounter internal errors and temporarily enter a dead state. A
+ * call that uses a device in such a state will return with the error
+ * {@link ANEURALNETWORKS_DEAD_OBJECT}. ANeuralNetworksDevice_wait will block until
+ * the device is in a live state.
+ *
+ * @param device The representation of the specified device.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksDevice_wait(const ANeuralNetworksDevice* device) __INTRODUCED_IN(30);
+
+#endif  // __ANDROID_API__ >= 30
+
 /**
  * Get the supported operations for a specified set of devices. If multiple devices
  * are selected, the supported operation list is a union of supported operations of all
@@ -5473,6 +6758,10 @@ int ANeuralNetworksModel_getSupportedOperationsForDevices(
  * ANeuralNetworksCompilation_create}, where the runtime will attempt to recover
  * from such failures.
  *
+ * The model passed to this function is termed the "main model" of the
+ * compilation, to distinguish it from other models referred to by an Operand
+ * of type {@link ANEURALNETWORKS_MODEL} within this compilation.
+ *
  * @param model The {@link ANeuralNetworksModel} to be compiled.
  * @param devices The set of devices. Must not contain duplicates.
  * @param numDevices The number of devices in the set.
@@ -5502,7 +6791,7 @@ int ANeuralNetworksCompilation_createForDevices(ANeuralNetworksModel* model,
  *                 data. It is recommended to use the code cache directory provided
  *                 by the Android runtime. If not using the code cache directory, the
  *                 user should choose a directory local to the application, and is
- *                 responsible to managing the cache entries.
+ *                 responsible for managing the cache entries.
  * @param token The token provided by the user to specify a model must be of length
  *              ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN. The user should ensure that
  *              the token is unique to a model within the application. The NNAPI
@@ -5525,10 +6814,24 @@ int ANeuralNetworksCompilation_setCaching(ANeuralNetworksCompilation* compilatio
  * execution has completed and the outputs are ready to be consumed.
  * </p>
  *
+ * If {@link ANeuralNetworksExecution_setTimeout} was called on this execution,
+ * and the execution is not able to complete before the timeout duration is
+ * exceeded, then execution may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned. If the device has
+ * a feature level reported by {@link ANeuralNetworksDevice_getFeatureLevel}
+ * that is lower than 30, then the timeout duration hint will be ignored.
+ *
+ * If this execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * then execution will be aborted and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned.
+ *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
  *
- * See {@link ANeuralNetworksExecution_startCompute} for asynchronous execution.
- * Synchronous execution incurs lower overhead than asynchronous execution.
+ * See {@link ANeuralNetworksExecution_burstCompute} for burst synchronous execution.
+ * See {@link ANeuralNetworksExecution_startCompute} for regular asynchronous execution.
+ * See {@link ANeuralNetworksExecution_startComputeWithDependencies} for
+ * asynchronous execution with dependencies.
  *
  * Available since API level 29.
  *
@@ -5544,9 +6847,10 @@ int ANeuralNetworksExecution_compute(ANeuralNetworksExecution* execution) __INTR
  * Get the dimensional information of the specified output operand of the model of the
  * {@link ANeuralNetworksExecution}.
  *
- * On asynchronous execution initiated by {@link ANeuralNetworksExecution_startCompute},
- * {@link ANeuralNetworksEvent_wait} must be called prior to this function to recuperate
- * the resources used by the execution.
+ * The execution must have completed.  On asynchronous execution initiated by
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function.
  *
  * @param execution The execution to be queried.
  * @param index The index of the output argument we are querying. It is
@@ -5569,9 +6873,10 @@ int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution* exec
  * Get the dimensional information of the specified output operand of the model of the
  * {@link ANeuralNetworksExecution}. The target output operand cannot be a scalar.
  *
- * On asynchronous execution initiated by {@link ANeuralNetworksExecution_startCompute},
- * {@link ANeuralNetworksEvent_wait} must be called prior to this function to recuperate
- * the resources used by the execution.
+ * The execution must have completed.  On asynchronous execution initiated by
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function.
  *
  * @param execution The execution to be queried.
  * @param index The index of the output argument we are querying. It is an index into the lists
@@ -5625,11 +6930,28 @@ void ANeuralNetworksBurst_free(ANeuralNetworksBurst* burst) __INTRODUCED_IN(29);
  * <p>Schedules synchronous evaluation of the execution. Returns once the
  * execution has completed and the outputs are ready to be consumed.</p>
  *
+ * If {@link ANeuralNetworksExecution_setTimeout} was called on the execution,
+ * and the execution is not able to complete before the timeout duration is
+ * exceeded, then execution may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned.
+ *
+ * If the execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * then execution will be aborted and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned. If the device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then the
+ * timeout duration hint will be ignored.
+ *
  * <p>There must be at most one {@link ANeuralNetworksExecution} processing at
  * any given time for any given burst object. Any
  * {@link ANeuralNetworksExecution} launched before the previous has finished
  * will result in ANEURALNETWORKS_BAD_STATE.</p>
  *
+ * See {@link ANeuralNetworksExecution_compute} for synchronous execution.
+ * See {@link ANeuralNetworksExecution_startCompute} for regular asynchronous execution.
+ * See {@link ANeuralNetworksExecution_startComputeWithDependencies} for
+ * asynchronous execution with dependencies.
+ *
  * Available since API level 29.
  *
  * @param burst The burst object to execute on.
@@ -5656,14 +6978,14 @@ int ANeuralNetworksExecution_burstCompute(ANeuralNetworksExecution* execution,
  * offset and length must be set to zero and the entire memory region will be
  * associated with the specified input or output operand. There is no guarantee
  * that an arbitrary AHardwareBuffer_Format and AHardwareBuffer_UsageFlags combination
- * can be used by arbitrary devices. The execution will fail if selected set of devices
- * cannot consume the buffer.
+ * can be used by arbitrary devices. The execution will fail if the selected set of
+ * devices cannot consume the buffer.
  *
  * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with shared memory
  * backed by an AHardwareBuffer of a format other than AHARDWAREBUFFER_FORMAT_BLOB is
  * disallowed.
  *
- * TODO(miaowang): add documentation about intended usage with introspection API.
+ * The provided AHardwareBuffer must outlive the ANeuralNetworksMemory object.
  *
  * Available since API level 29.
  *
@@ -5686,8 +7008,12 @@ int ANeuralNetworksMemory_createFromAHardwareBuffer(const AHardwareBuffer* ahwb,
  *
  * By default, duration is not measured.
  *
- * The {@link ANeuralNetworksExecution} must have been created with
+ * The {@link ANeuralNetworksExecution} must have been created from an
+ * {@link ANeuralNetworksCompilation} which in turn was created from
  * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1.
+ * If the device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 29, then the
+ * duration will not be measured.
  *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
  *
@@ -5702,41 +7028,32 @@ int ANeuralNetworksExecution_setMeasureTiming(ANeuralNetworksExecution* executio
         __INTRODUCED_IN(29);
 
 /**
- * Different duration measurements.
- *
- * Durations are measured in nanoseconds.
- *
- * Available since API level 29.
- */
-typedef enum {
-    // Execution time on hardware (not driver, which runs on host processor).
-    ANEURALNETWORKS_DURATION_ON_HARDWARE = 0,
-    // Execution time in driver (including time on hardware).  Excludes overhead
-    // such as that of the runtime itself and the IPC needed for the runtime to
-    // communicate with the driver.
-    ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
-} DurationCode;
-
-/**
  * Get the time spent in the specified {@link ANeuralNetworksExecution}, in nanoseconds.
- * The execution must have completed.
  *
- * Available since API level 29.
+ * The execution must have completed.  On asynchronous execution initiated by
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies},
+ * {@link ANeuralNetworksEvent_wait} must be called prior to this function.
  *
  * @param execution The execution to be queried.
  * @param durationCode The measurement to be queried, specified by {@link DurationCode}.
  * @param duration The returned duration. If no measurement was requested by
- *                 {@link ANeuralNetworksExecution_setMeasureTiming}, or for some other
- *                 reason the duration is not available, UINT64_MAX will be returned.
- *                 A particular device need not support any given measurement.
+ *                 {@link ANeuralNetworksExecution_setMeasureTiming}, if the
+ *                 device is has a feature level reported by
+ *                 {@link ANeuralNetworksDevice_getFeatureLevel} that is lower
+ *                 than 29, or for some other reason the duration is not
+ *                 available, UINT64_MAX will be returned. A particular device
+ *                 need not support any given measurement.
  *
  * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 29.
  */
 int ANeuralNetworksExecution_getDuration(const ANeuralNetworksExecution* execution,
                                          int32_t durationCode, uint64_t* duration)
         __INTRODUCED_IN(29);
 
-#endif  // __ANDROID_API__ >= __ANDROID_API_Q__
+#endif  // __ANDROID_API__ >= 29
 
 #if __ANDROID_API__ >= 27
 
@@ -5776,7 +7093,8 @@ int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t
  *
  * Available since API level 27.
  *
- * @param memory The memory object to be freed.
+ * @param memory The memory object to be freed. Passing NULL is acceptable and
+ *               results in no operation.
  */
 void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) __INTRODUCED_IN(27);
 
@@ -5784,8 +7102,10 @@ void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) __INTRODUCED_IN(2
  * Create an empty {@link ANeuralNetworksModel}.
  *
  * <p>This only creates the object. Computation is performed once
- * {@link ANeuralNetworksExecution_compute} or
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} is invoked.
  *
  * The model should be constructed with calls to
  * {@link ANeuralNetworksModel_addOperation} and
@@ -5826,8 +7146,8 @@ void ANeuralNetworksModel_free(ANeuralNetworksModel* model) __INTRODUCED_IN(27);
  * calling {@link ANeuralNetworksCompilation_create} and
  * {@link ANeuralNetworksCompilation_createForDevices}.
  *
- * An application is responsible to make sure that no other thread uses
- * the model at the same time.
+ * An application must ensure that no other thread uses the model at the same
+ * time.
  *
  * This function must only be called once for a given model.
  *
@@ -5901,11 +7221,13 @@ int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model,
  * {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}
  * are immediately copied into the model.
  *
- * For values of length greater than {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES},
- * a pointer to the buffer is stored within the model. The application is responsible
- * for not changing the content of this region until all executions using this model
- * have completed. As the data may be copied during processing, modifying the data
- * after this call yields undefined results.
+ * For values of length greater than
+ * {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}, a pointer to
+ * the buffer is stored within the model. The application must not change the
+ * content of this region until all executions using this model have
+ * completed. As the data may be copied during processing, modifying the data
+ * after this call yields undefined results. The provided buffer must outlive
+ * this model.
  *
  * For large tensors, using {@link ANeuralNetworksModel_setOperandValueFromMemory}
  * is likely to be more efficient.
@@ -5930,7 +7252,7 @@ int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model,
 int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, int32_t index,
                                          const void* buffer, size_t length) __INTRODUCED_IN(27);
 
-#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if __ANDROID_API__ >= 29
 
 /**
  * Sets an operand's per channel quantization parameters.
@@ -5955,28 +7277,33 @@ int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
         ANeuralNetworksModel* model, int32_t index,
         const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) __INTRODUCED_IN(29);
 
-#endif  // __ANDROID_API__ >= __ANDROID_API_Q__
+#endif  // __ANDROID_API__ >= 29
 
 /**
  * Sets an operand to a value stored in a memory object.
  *
  * The content of the memory is not copied. A reference to that memory is stored
- * inside the model. The application is responsible for not changing the content
- * of the memory region until all executions using this model have completed.
- * As the data may be copied during processing, modifying the data after this call
- * yields undefined results.
+ * inside the model. The application must not change the content of the memory
+ * region until all executions using this model have completed.  As the data may
+ * be copied during processing, modifying the data after this call yields
+ * undefined results.
+ *
+ * <p>The provided memory must outlive this model.</p>
  *
  * To indicate that an optional operand should be considered missing,
  * use {@link ANeuralNetworksModel_setOperandValue} instead, passing nullptr for buffer.
  *
- * Is disallowed to set an operand value with shared memory backed by an AHardwareBuffer
+ * It is disallowed to set an operand value with shared memory backed by an AHardwareBuffer
  * of a format other than AHARDWAREBUFFER_FORMAT_BLOB.
  *
+ * It is disallowed to set an operand value with memory created from
+ * {@link ANeuralNetworksMemory_createFromDesc}.
+ *
  * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
  * called will return an error.
  *
  * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
  * AHardwareBuffer usage.
  *
  * Available since API level 27.
@@ -5996,6 +7323,39 @@ int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel* model,
                                                    size_t offset, size_t length)
         __INTRODUCED_IN(27);
 
+#if __ANDROID_API__ >= 30
+
+/**
+ * Sets an operand to a value that is a reference to another NNAPI model.
+ *
+ * The referenced model must already have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * The {@link ANeuralNetworksModel_relaxComputationFloat32toFloat16} setting of
+ * referenced models is overridden by that setting of the main model of a
+ * compilation.
+ *
+ * The referenced model must outlive the model referring to it.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param value The model to be referenced.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_setOperandValueFromModel(ANeuralNetworksModel* model, int32_t index,
+                                                  const ANeuralNetworksModel* value)
+        __INTRODUCED_IN(30);
+
+#endif  // __ANDROID_API__ >= 30
+
 /**
  * Add an operation to a model.
  *
@@ -6060,6 +7420,9 @@ int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel* model, u
  * must be calculated using at least the range and precision of the IEEE 754
  * 32-bit floating-point format.
  *
+ * The relaxComputationFloat32toFloat16 setting of the main model of
+ * a compilation overrides the values of the referenced models.
+ *
  * @param model The model to be modified.
  * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
  *              calculated with range and/or precision as low as that of the
@@ -6083,7 +7446,11 @@ int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel*
 /**
  * Create a {@link ANeuralNetworksCompilation} to compile the given model.
  *
- * <p>This only creates the object. Compilation is only performed once
+ * The model passed to this function is termed the "main model" of the
+ * compilation, to distinguish it from other models referred to by an Operand
+ * of type {@link ANEURALNETWORKS_MODEL} within this compilation.
+ *
+ * <p>This function only creates the object. Compilation is only performed once
  * {@link ANeuralNetworksCompilation_finish} is invoked.</p>
  *
  * <p>{@link ANeuralNetworksCompilation_finish} should be called once
@@ -6114,7 +7481,7 @@ int ANeuralNetworksCompilation_create(ANeuralNetworksModel* model,
  * Destroy a compilation.
  *
  * The compilation need not have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
+ * {@link ANeuralNetworksCompilation_finish}.
  *
  * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
  *
@@ -6128,7 +7495,8 @@ void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation* compilation) __
 /**
  * Sets the execution preference.
  *
- * <p>Provides guidance to the runtime when trade-offs are possible.</p>
+ * <p>Provides guidance to the runtime when trade-offs are possible. By default the runtime
+ * uses PREFER_SINGLE_FAST_ANSWER</p>
  *
  * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
  *
@@ -6146,13 +7514,19 @@ int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation* compila
 
 /**
  * Indicate that we have finished modifying a compilation. Required before
- * calling {@link ANeuralNetworksExecution_create}.
+ * calling {@link ANeuralNetworksBurst_create} or
+ * {@link ANeuralNetworksExecution_create}.
  *
- * An application is responsible to make sure that no other thread uses
- * the compilation at the same time.
+ * An application must ensure that no other thread uses the compilation at the
+ * same time.
  *
  * This function must only be called once for a given compilation.
  *
+ * If {@link ANeuralNetworksCompilation_setTimeout} was called on this
+ * compilation, and the compilation is not able to be finished before the
+ * timeout duration is exceeded, then compilation may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned.
+ *
  * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
  *
  * Available since API level 27.
@@ -6163,11 +7537,85 @@ int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation* compila
  */
 int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation* compilation) __INTRODUCED_IN(27);
 
+#if __ANDROID_API__ >= 30
+
+/**
+ * Set the execution priority.
+ *
+ * Execution priorities are relative to other executions created by the same
+ * application (specifically same uid) for the same device. Specifically,
+ * priorities of executions from one application will not affect executions from
+ * another application. Similarly, priorities of executions on one device will
+ * not affect executions on another device.
+ *
+ * Higher priority executions may use more compute resources than lower priority
+ * executions, and may preempt or starve lower priority executions.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * Available since API level 30.
+ *
+ * @param compilation The compilation to be modified.
+ * @param priority The relative priority of the execution compared to other
+ *     executions created by the application. Must be one of
+ *     ANEURALNETWORKS_PRIORITY_*.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksCompilation_setPriority(ANeuralNetworksCompilation* compilation, int priority)
+        __INTRODUCED_IN(30);
+
+/**
+ * Set the maximum expected duration for compiling the model.
+ *
+ * If the device is not able to complete the compilation within the specified
+ * duration, the compilation may be aborted. The timeout duration begins at the
+ * call to {@link ANeuralNetworksCompilation_finish}.
+ *
+ * This timeout duration acts as a hint to drivers, and can be used to both free
+ * up compute resources within the driver and return control back to the
+ * application quicker than is possible without the hint. It enables drivers
+ * that are able to estimate how long a compilation will take to abort the
+ * compilation before it has even started if the driver believes the compilation
+ * cannot be completed within the timeout duration. Similarly, it enables
+ * drivers to abort an ongoing compilation if it is taking too long. However,
+ * this call does not guarantee that the compilation will complete or abort
+ * within the timeout duration.
+ *
+ * By default (i.e., unless ANeuralNetworksCompilation_setTimeout is called),
+ * the timeout duration for compiling the model is considered infinite.
+ *
+ * The {@link ANeuralNetworksCompilation} must have been created with
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+ * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
+ * device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then the
+ * timeout duration hint will be ignored.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param compilation The compilation to be modified.
+ * @param duration The maximum amount of time in nanoseconds that is expected to
+ *     be spent finishing a compilation. If this duration is exceeded, the
+ *     compilation may be aborted. If set to 0, the timeout duration is
+ *     considered infinite.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksCompilation_setTimeout(ANeuralNetworksCompilation* compilation,
+                                          uint64_t duration) __INTRODUCED_IN(30);
+
+#endif  // __ANDROID_API__ >= 30
+
 /**
  * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
  * This only creates the object. Computation is only performed once
- * {@link ANeuralNetworksExecution_compute} or
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} is invoked.
  *
  * <p>The provided compilation must outlive the execution.</p>
  *
@@ -6187,12 +7635,16 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation* compilation,
 /**
  * Destroy an execution.
  *
- * <p>If called on an execution for which
- * {@link ANeuralNetworksExecution_startCompute} has been called, the
- * function will return immediately but will mark the execution to be deleted
- * once the computation completes. The related {@link ANeuralNetworksEvent}
- * will be signaled and the {@link ANeuralNetworksEvent_wait} will return
- * ANEURALNETWORKS_ERROR_DELETED.
+ * <p>The execution need not have been scheduled by a call to
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute},
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}; but if it has been scheduled,
+ * then the application must not call {@link ANeuralNetworksExecution_free}
+ * until the execution has completed (i.e.,
+ * {@link ANeuralNetworksExecution_burstCompute},
+ * {@link ANeuralNetworksExecution_compute}, or
+ * {@link ANeuralNetworksEvent_wait} has returned).
  *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
  *
@@ -6206,7 +7658,10 @@ void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) __INTROD
 /**
  * Associate a user buffer with an input of the model of the
  * {@link ANeuralNetworksExecution}. Evaluation of the execution must not have
- * been scheduled.
+ * been scheduled. Once evaluation of the execution has been scheduled, the
+ * application must not change the content of the buffer until the execution has
+ * completed. Evaluation of the execution will not change the content of the
+ * buffer.
  *
  * <p>The provided buffer must outlive the execution.</p>
  *
@@ -6244,9 +7699,12 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32
                                       size_t length) __INTRODUCED_IN(27);
 
 /**
- * Associate part of a memory object with an input of the model of the
+ * Associate a region of a memory object with an input of the model of the
  * {@link ANeuralNetworksExecution}. Evaluation of the execution must not have
- * been scheduled.
+ * been scheduled. Once evaluation of the execution has been scheduled, the
+ * application must not change the content of the region until the execution has
+ * completed. Evaluation of the execution will not change the content of the
+ * region.
  *
  * <p>The provided memory must outlive the execution.</p>
  *
@@ -6255,8 +7713,10 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32
  * buffer and 0 for length.
  *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
  * AHardwareBuffer usage.
+ * See {@link ANeuralNetworksMemory_createFromDesc} for information on usage of memory objects
+ * created from memory descriptors.
  *
  * Available since API level 27.
  *
@@ -6290,7 +7750,9 @@ int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution* execut
 /**
  * Associate a user buffer with an output of the model of the
  * {@link ANeuralNetworksExecution}. Evaluation of the execution must not have
- * been scheduled.
+ * been scheduled. Once evaluation of the execution has been scheduled, the
+ * application must not change the content of the buffer until the execution has
+ * completed.
  *
  * If the output is optional, you can indicate that it is omitted by
  * passing nullptr for buffer and 0 for length.
@@ -6333,9 +7795,11 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int3
                                        size_t length) __INTRODUCED_IN(27);
 
 /**
- * Associate part of a memory object with an output of the model of the
+ * Associate a region of a memory object with an output of the model of the
  * {@link ANeuralNetworksExecution}. Evaluation of the execution must not have
- * been scheduled.
+ * been scheduled. Once evaluation of the execution has been scheduled, the
+ * application must not change the content of the region until the execution has
+ * completed.
  *
  * If the output is optional, you can indicate that it is omitted by
  * using {@link ANeuralNetworksExecution_setOutput} instead, passing nullptr for
@@ -6344,8 +7808,10 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int3
  * <p>The provided memory must outlive the execution.</p>
  *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- * See {@link ANeuralNetworksMemory_createFromAHardwarBuffer} for information on
+ * See {@link ANeuralNetworksMemory_createFromAHardwareBuffer} for information on
  * AHardwareBuffer usage.
+ * See {@link ANeuralNetworksMemory_createFromDesc} for information on usage of memory objects
+ * created from memory descriptors.
  *
  * Available since API level 27.
  *
@@ -6385,8 +7851,8 @@ int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execu
 /**
  * Schedule asynchronous evaluation of the execution.
  *
- * <p>Schedules asynchronous evaluation of the execution. Once the model has
- * been applied and the outputs are ready to be consumed, the returned event
+ * <p>Schedules asynchronous evaluation of the execution. Once the execution
+ * has completed and the outputs are ready to be consumed, the returned event
  * will be signaled. Use {@link ANeuralNetworksEvent_wait} to wait for that
  * event.
  * </p>
@@ -6394,10 +7860,31 @@ int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execu
  * ANeuralNetworksEvent_wait must be called to recuperate the resources used
  * by the execution.
  *
+ * If {@link ANeuralNetworksExecution_setTimeout} was called on this execution,
+ * and the execution is not able to complete before the timeout duration is
+ * exceeded, then execution may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned through
+ * {@link ANeuralNetworksExecution_startCompute} or
+ * {@link ANeuralNetworksEvent_wait} on the event object. If the device has a
+ * feature level reported by {@link ANeuralNetworksDevice_getFeatureLevel} that
+ * is lower than 30, then the timeout duration hint will be ignored.
+ *
+ * If this execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * then execution will be aborted and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned through {@link ANeuralNetworksEvent_wait} on the event
+ * object.
+ *
+ * If the device can detect before the execution has started that the execution
+ * will not complete within the timeout duration, the device may choose to skip
+ * the execution and instead return {@link ANEURALNETWORKS_MISSED_DEADLINE_*}.
+ *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
  *
  * See {@link ANeuralNetworksExecution_compute} for synchronous execution.
- * Synchronous execution incurs lower overhead than asynchronous execution.
+ * See {@link ANeuralNetworksExecution_burstCompute} for burst synchronous execution.
+ * See {@link ANeuralNetworksExecution_startComputeWithDependencies} for
+ * asynchronous execution with dependencies.
  *
  * Available since API level 27.
  *
@@ -6405,21 +7892,129 @@ int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execu
  * @param event The event that will be signaled on completion. event is set to
  *              NULL if there's an error.
  *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
+ * @return ANEURALNETWORKS_NO_ERROR if the evaluation is successfully scheduled.
  */
 int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution* execution,
                                           ANeuralNetworksEvent** event) __INTRODUCED_IN(27);
 
+#if __ANDROID_API__ >= 30
+
+/**
+ * Set the maximum expected duration of the specified execution.
+ *
+ * If the device is not able to complete the execution within the specified
+ * duration, the execution may be aborted. The timeout duration begins at a
+ * call to one of:
+ * - {@link ANeuralNetworksExecution_burstCompute}
+ * - {@link ANeuralNetworksExecution_compute}
+ * - {@link ANeuralNetworksExecution_startCompute}
+ * - {@link ANeuralNetworksExecution_startComputeWithDependencies}
+ *
+ * This timeout duration acts as a hint to drivers, and can be used to both free
+ * up compute resources within the driver and return control back to the
+ * application quicker than is possible without the hint. It enables drivers
+ * that are able to estimate how long an execution will take to abort the
+ * execution before it has even started if the driver believes the execution
+ * cannot be completed within the timeout duration. Similarly, it enables
+ * drivers to abort an ongoing execution if it is taking too long. However, this
+ * call does not guarantee that the execution will complete or abort within the
+ * timeout duration.
+ *
+ * By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
+ * the timeout duration for execution is considered infinite.
+ *
+ * The {@link ANeuralNetworksExecution} must have been created from an
+ * {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+ * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
+ * device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then the
+ * timeout duration hint will be ignored.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param duration The maximum amount of time in nanoseconds that is expected to
+ *     be spent executing a model. If this duration is exceeded, the execution
+ *     may be aborted. If set to 0, the timeout duration is considered infinite.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksExecution_setTimeout(ANeuralNetworksExecution* execution, uint64_t duration)
+        __INTRODUCED_IN(30);
+
+/**
+ * Set the maximum duration of WHILE loops in the specified execution.
+ *
+ * This is a fuzzy per-loop timeout intended to prevent infinite loops.
+ *
+ * If a WHILE loop condition model does not output false within the specified
+ * duration, the execution will be aborted.
+ *
+ * See {@link ANeuralNetworks_getDefaultLoopTimeout} and
+ * {@link ANeuralNetworks_getMaximumLoopTimeout} for the default
+ * and maximum timeout values.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param duration The maximum amount of time in nanoseconds that can be spent
+ *     executing a WHILE loop. If the specified duration value exceeds the value
+ *     produced by {@link ANeuralNetworks_getMaximumLoopTimeout}, it will be
+ *     overridden by that value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *         ANEURALNETWORKS_BAD_STATE if execution has started.
+ *         ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksExecution_setLoopTimeout(ANeuralNetworksExecution* execution, uint64_t duration)
+        __INTRODUCED_IN(30);
+
+/**
+ * Get the default timeout value for WHILE loops.
+ *
+ * @return The default timeout value in nanoseconds.
+ *
+ * Available since API level 30.
+ */
+uint64_t ANeuralNetworks_getDefaultLoopTimeout() __INTRODUCED_IN(30);
+
+/**
+ * Get the maximum timeout value for WHILE loops.
+ *
+ * @return The maximum timeout value in nanoseconds.
+ *
+ * Available since API level 30.
+ */
+uint64_t ANeuralNetworks_getMaximumLoopTimeout() __INTRODUCED_IN(30);
+
+#endif  // __ANDROID_API__ >= 30
+
 /**
  * Waits until the execution completes.
  *
  * More than one thread can wait on an event. When the execution completes,
  * all threads will be released.
  *
+ * If {@link ANeuralNetworksExecution_setTimeout} was called on the execution
+ * corresponding to this event, and the execution is not able to complete
+ * before the duration is exceeded, the execution may be aborted, in which case
+ * {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be returned here.
+ *
+ * If the execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * the execution will be aborted, and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned here.
+ *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
  *
  * Available since API level 27.
  *
+ * @param event The event that will be signaled on completion.
  * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
  *         ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory cannot
  *         be properly mapped.
@@ -6432,13 +8027,140 @@ int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) __INTRODUCED_IN(27);
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
  *
  * Available since API level 27.
+ *
+ * @param event The event object to be destroyed. Passing NULL is acceptable and
+ *              results in no operation.
  */
 void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) __INTRODUCED_IN(27);
 
 #endif  // __ANDROID_API__ >= 27
 
+#if __ANDROID_API__ >= 30
+/**
+ * Create a {@link ANeuralNetworksEvent} from a sync_fence file descriptor.
+ *
+ * The newly created ANeuralNetworksEvent does not take ownership of the provided sync_fence_fd,
+ * it will instead dup the provided sync_fence_fd and own the duplicate.
+ *
+ * @param sync_fence_fd The sync_fence file descriptor.
+ * @param event The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksEvent_createFromSyncFenceFd(int sync_fence_fd, ANeuralNetworksEvent** event)
+        __INTRODUCED_IN(30);
+
+/**
+ * Get sync_fence file descriptor from the event.
+ *
+ * If the ANeuralNetworksEvent is not backed by a sync fence, the sync_fence_fd
+ * will be set to -1, and ANEURALNETWORKS_BAD_DATA will be returned.
+ *
+ * See {@link ANeuralNetworksEvent_createFromSyncFenceFd} and
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies} to see how to create
+ * an event backed by a sync fence.
+ *
+ * The user takes ownership of the returned fd, and must close the returned file descriptor when
+ * it is no longer needed.
+ *
+ * @param event An event that is backed by a sync fence.
+ * @param sync_fence_fd The sync_fence file descriptor. The file descriptor will
+ *                      be set to -1 if there is an error.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent* event, int* sync_fence_fd)
+        __INTRODUCED_IN(30);
+
+/**
+ * Schedule asynchronous evaluation of the execution with dependencies.
+ *
+ * The execution will wait for all the depending events to be signaled before
+ * starting the evaluation. Once the execution has completed and the outputs
+ * are ready to be consumed, the returned event will be signaled. Depending on which
+ * devices are handling the execution, the event could be backed by a sync fence.
+ * Use {@link ANeuralNetworksEvent_wait} to wait for that event.
+ *
+ * ANeuralNetworksEvent_wait must be called to recurperate the resources used
+ * by the execution.
+ *
+ * If parts of the execution are scheduled on devices that do not support fenced execution,
+ * the function call may wait for such parts to finish before returning.
+ *
+ * The function will return an error if any of the events in dependencies is already in a bad
+ * state. After the execution is scheduled, if any of the events in dependencies does not complete
+ * normally, the execution will fail, and {@link ANeuralNetworksEvent_wait} on the returned
+ * event will return an error.
+ *
+ * The function will return an error if any of the execution outputs has a tensor operand type
+ * that is not fully specified.
+ *
+ * The function can be passed a timeout duration in nanoseconds. This timeout
+ * duration acts as a hint to drivers in the same way that the timeout durations
+ * in {@link ANeuralNetworksCompilation_setTimeout} and {@link
+ * ANeuralNetworksExecution_setTimeout} act as hints to drivers. The duration
+ * begins when all waitFor sync fences have been signaled, and can be used
+ * together with {@link ANeuralNetworksExecution_setTimeout} which specifies the
+ * maximum timeout duration beginning at the call to
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ * If the duration is non-zero, the {@link ANeuralNetworksExecution} must have been created
+ * from an {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+ * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If either
+ * the timeout duration from {@link ANeuralNetworksExecution_setTimeout} or the
+ * timeout duration passed to this call is exceeded, the execution may be
+ * aborted, in which case {@link ANEURALNETWORKS_MISSED_DEADLINE_*} will be
+ * returned through {@link ANeuralNetworksExecution_startComputeWithDependencies}
+ * or {@link ANeuralNetworksEvent_wait} on the event object. If the device has a
+ * feature level reported by {@link ANeuralNetworksDevice_getFeatureLevel} that
+ * is lower than 30, then the timeout duration hints will be ignored.
+ *
+ * If this execution contains a {@link ANEURALNETWORKS_WHILE} operation, and
+ * the condition model does not output false within the loop timeout duration,
+ * then execution will be aborted and {@link ANEURALNETWORKS_MISSED_DEADLINE_*}
+ * will be returned through {@link ANeuralNetworksEvent_wait} on the event
+ * object.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * See {@link ANeuralNetworksExecution_compute} for synchronous execution.
+ * See {@link ANeuralNetworksExecution_burstCompute} for burst synchronous execution.
+ * See {@link ANeuralNetworksExecution_startCompute} for regular asynchronous execution.
+ *
+ * @param execution The execution to be scheduled and executed.
+ * @param dependencies A set of depending events. The actual evaluation will not start
+ *                     until all the events are signaled.
+ * @param num_dependencies The number of events in the dependencies set.
+ * @param duration The maximum amount of time in nanoseconds that is expected to
+ *                 be spent executing the model after all dependencies are
+ *                 signaled. If set to 0, the timeout duration is considered
+ *                 infinite.
+ * @param event The event that will be signaled on completion. event is set to
+ *              NULL if there's an error.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the evaluation is successfully scheduled.
+ *
+ * Available since API level 30.
+ */
+int ANeuralNetworksExecution_startComputeWithDependencies(
+        ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies,
+        uint32_t num_dependencies, uint64_t duration, ANeuralNetworksEvent** event)
+        __INTRODUCED_IN(30);
+
+#endif  // __ANDROID_API__ >= 30
+
 __END_DECLS
 
-#endif  // ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+#endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+
+// For compatibility with android, check __ANDROID__ is defined
+#ifndef __ANDROID__
+#undef __ANDROID_API__
+#undef __INTRODUCED_IN
+#endif // __ANDROID__
 
 /** @} */
diff --git a/runtime/nnapi-header/include/NeuralNetworksEx.h b/runtime/nnapi-header/include/NeuralNetworksEx.h
index d15262e17..f0387995d 100644
--- a/runtime/nnapi-header/include/NeuralNetworksEx.h
+++ b/runtime/nnapi-header/include/NeuralNetworksEx.h
@@ -31,7 +31,8 @@ __BEGIN_DECLS
 /**
  * @brief Extended operation types
  */
-typedef enum {
+typedef enum
+{
   /** extends operation. */
 
   /**
diff --git a/runtime/nnapi-header/include/NeuralNetworksExtensions.h b/runtime/nnapi-header/include/NeuralNetworksExtensions.h
index ca2e04567..dd51b0301 100644
--- a/runtime/nnapi-header/include/NeuralNetworksExtensions.h
+++ b/runtime/nnapi-header/include/NeuralNetworksExtensions.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
-#define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
+#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
+#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
 
 #include "NeuralNetworks.h"
 
@@ -37,7 +37,7 @@
 
 __BEGIN_DECLS
 
-#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if __ANDROID_API__ >= 29
 
 /**
  * Queries whether an extension is supported by the driver implementation of the specified device.
@@ -110,8 +110,8 @@ int ANeuralNetworksModel_setOperandExtensionData(ANeuralNetworksModel* model, in
                                                  const void* data, size_t length)
         __INTRODUCED_IN(29);
 
-#endif  // __ANDROID_API__ >= __ANDROID_API_Q__
+#endif  // __ANDROID_API__ >= 29
 
 __END_DECLS
 
-#endif  // ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
+#endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_EXTENSIONS_H
diff --git a/runtime/onert/CMakeLists.txt b/runtime/onert/CMakeLists.txt
index 88d52a5bd..74f7ae568 100644
--- a/runtime/onert/CMakeLists.txt
+++ b/runtime/onert/CMakeLists.txt
@@ -6,10 +6,5 @@ add_subdirectory(backend)
 add_subdirectory(frontend)
 add_subdirectory(core)
 add_subdirectory(api)
+add_subdirectory(odc)
 add_subdirectory(sample)
-
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-add_subdirectory(test)
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt
index 49a5aa071..badd5d133 100644
--- a/runtime/onert/api/CMakeLists.txt
+++ b/runtime/onert/api/CMakeLists.txt
@@ -9,13 +9,26 @@ add_library(${ONERT_DEV} SHARED ${API_SRC})
 set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
 
 target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
-target_link_libraries(${ONERT_DEV} PUBLIC onert_core)
+target_link_libraries(${ONERT_DEV} PRIVATE onert_core)
+target_link_libraries(${ONERT_DEV} PRIVATE nnfw_lib_misc)
 target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
+target_link_libraries(${ONERT_DEV} PRIVATE trix_loader)
 target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common)
 target_link_libraries(${ONERT_DEV} PRIVATE nnfw_coverage)
+# NOTE Below line is added to remove warning for android build
+#      It will be removed after android build uses gold linker
+if (ANDROID)
+  target_link_libraries(${ONERT_DEV} INTERFACE log)
+endif (ANDROID)
+
 target_include_directories(${ONERT_DEV} PUBLIC include)
 set_target_properties(${ONERT_DEV} PROPERTIES PUBLIC_HEADER "${NNFW_API_HEADERS}")
 
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${ONERT_DEV} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${ONERT_DEV}>)
+endif()
+
 install(TARGETS ${ONERT_DEV}
         LIBRARY DESTINATION lib
         PUBLIC_HEADER DESTINATION include/nnfw)
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index 9348df6ae..1f1541a7e 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -64,13 +64,14 @@ typedef struct nnfw_session nnfw_session;
  *
  * The type of tensor represented in {@link nnfw_tensorinfo}
  */
-typedef enum {
+typedef enum
+{
   /** A tensor of 32 bit floating point */
   NNFW_TYPE_TENSOR_FLOAT32 = 0,
   /** A tensor of 32 bit signed integer */
   NNFW_TYPE_TENSOR_INT32 = 1,
   /**
-   * A tensor of 8 bit integers that represent real numbers.
+   * A tensor of 8 bit unsigned integers that represent real numbers.
    *
    * real_value = (integer_value - zeroPoint) * scale.
    */
@@ -84,12 +85,29 @@ typedef enum {
   /** A tensor of 64 bit signed integer */
   NNFW_TYPE_TENSOR_INT64 = 5,
 
+  /**
+   * A tensor of 8 bit signed integers that represent real numbers.
+   *
+   * real_value = (integer_value - zeroPoint) * scale.
+   */
+  NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6,
+
+  /**
+   * A tensor of 16 bit signed integers that represent real numbers.
+   *
+   * real_value = (integer_value - zeroPoint) * scale.
+   *
+   * Forced to have zeroPoint equal to 0.
+   */
+  NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7,
+
 } NNFW_TYPE;
 
 /**
  * @brief Result values returned from a call to an API function
  */
-typedef enum {
+typedef enum
+{
   /** Successful */
   NNFW_STATUS_NO_ERROR = 0,
   /**
@@ -110,7 +128,8 @@ typedef enum {
 /**
  * @brief Data format of a tensor
  */
-typedef enum {
+typedef enum
+{
   /** Don't care layout */
   NNFW_LAYOUT_NONE = 0,
   /**
@@ -128,7 +147,8 @@ typedef enum {
 /**
  * @brief Information ID for retrieving information on nnfw (e.g. version)
  */
-typedef enum {
+typedef enum
+{
   /** nnfw runtime version
    * Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch.
    */
@@ -173,7 +193,7 @@ typedef struct nnfw_tensorinfo
  * And inference is performed after {@link nnfw_run} is invoked.
  *
  * <p>{@link nnfw_close_session} should be called once
- * if session is no longer need
+ * if session is no longer needed
  *
  * @param[out]  session The session to be created
  * @return      NNFW_STATUS_NO_ERROR if successful
@@ -193,7 +213,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session);
 /**
  * @brief     Load model from nnpackage file or directory
  *
- * The length of \p package_file_path must not execeed 1024 bytes including zero at the end.
+ * The length of \p package_file_path must not exceed 1024 bytes including zero at the end.
  *
  * @param[in] session           nnfw_session loading the given nnpackage file/dir
  * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded
@@ -223,11 +243,11 @@ NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
 /**
  * @brief    Set input model's tensor info for resizing
  *
- * This function can be called at any time after calling {@link nnfw_model_load_from_file}. Changing
+ * This function can be called at any time after calling {@link nnfw_load_model_from_file}. Changing
  * input tensor's shape will cause shape inference for the model. There are two different types of
  * shape inference - static and dynamic. Which one to use is depend on the current state of the
  * session.
- * When it is called after calling {@link nnfw_model_load_from_file} and before calling {@link
+ * When it is called after calling {@link nnfw_load_model_from_file} and before calling {@link
  * nnfw_prepare}, this info will be used when {@link nnfw_prepare}. And it will perform static shape
  * inference for all tensors.
  * When it is called after calling {@link nnfw_prepare} or even after {@link nnfw_run}, this info
@@ -246,7 +266,7 @@ NNFW_STATUS nnfw_set_input_tensorinfo(nnfw_session *session, uint32_t index,
  * @brief     Prepare session to be ready for inference
  *
  * This phase may finalize model compilation, scheduling, and additional settings.
- * If {@link nnfw_apply_tensor} is called to apply input tensor info different with model
+ * If {@link nnfw_apply_tensorinfo} is called to apply input tensor info different with model
  * before this function, tries to resize all tensors.
  *
  * @param[in] session the session to be prepared
@@ -289,7 +309,7 @@ NNFW_STATUS nnfw_run_async(nnfw_session *session);
 /**
  * @brief     Wait for asynchronous run to finish
  *
- * <p>This function must be called after calling {@link nnfw_run_asnyc}, and can be called only once
+ * <p>This function must be called after calling {@link nnfw_run_async}, and can be called only once
  * for a {@link nnfw_run_async} call.
  *
  * <p>When this function returns, it means that this session has finished the asynchronous run. Then
@@ -476,7 +496,7 @@ NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const cha
  * @note: The input session could be null for global information (e.g. runtime version).*
  *
  * @param[in] session session to be queried on.
- * @param[in] information ID to be queried
+ * @param[in] id ID to be queried
  * @param[out] val uint32 value to be returned.
  *
  * @return @c NNFW_STATUS_NO_ERROR if successful
diff --git a/runtime/onert/api/include/nnfw_experimental.h b/runtime/onert/api/include/nnfw_experimental.h
index 94f781988..3c8b08f52 100644
--- a/runtime/onert/api/include/nnfw_experimental.h
+++ b/runtime/onert/api/include/nnfw_experimental.h
@@ -19,6 +19,10 @@
 
 #include "nnfw.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 // Used for custom kernel development
 
 /*
@@ -96,4 +100,300 @@ NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname
  */
 NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
 
+/**
+ * @brief Set the backend for each operation in the session
+ *
+ * This function assigns backends (acl_cl, acl_neon, cpu) to each operation in the session.
+ * If successful,the function returns @c NNFW_STATUS_NO_ERROR. Otherwise, the function returns
+ * @c NNFW_STATUS_ERROR.
+ *
+ * @note The argument specifying backends must be in the format
+ *       "OP_BACKEND_MAP=\"0=acl_cl;1=cpu;2=acl_cl\"".
+ *
+ * @param[in]  session          the session object
+ * @param[in]  backend_settings String containing backend assignments indexed by operation sequence
+ * @return     @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_set_backends_per_operation(nnfw_session *session, const char *backend_settings);
+
+/*
+ * Prepare session to be ready for inference
+ * This phase may finalize model compilation, scheduling, and additional settings.
+ *
+ * @param session the session to be prepared
+ * @return NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_prepare_pipeline(nnfw_session *session, const char *map_file_path = nullptr);
+
+/**
+ * @brief     Set input buffer
+ *
+ * This function must be called after {@link nnfw_prepare_pipeline}, \p inputs given to this
+ * function can be reused for many inferences. \p lengths must be greater or equal than the operand
+ * requires. if you give empty \p inputs to this function, then this function will join all threads.
+ *
+ * @param[in] session Session to the input is to be set
+ * @param[in] inputs  Raw buffers for input, it must be \p std::vector<void *> type pointer for
+ * multiple input model
+ * @param[in] lengths Size of bytes of input buffers, it must be \p std::vector<uint32_t> type
+ * pointer for multiple input model
+ *
+ * @return    @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_push_pipeline_input(nnfw_session *session, void *inputs, void *lengths);
+
+/**
+ * @brief       Get last outputs of partitioned model in session
+ *
+ * This function must be called after {@link nnfw_prepare_pipeline}, \p outputs given to this
+ * function must be cleared for memory management.
+ *
+ * @param[in]   session Session from last outputs is to be extracted
+ * @param[out]  outputs Raw buffer for outputs, it must be \p std::vector<void *> type pointer for
+ * multiple output model
+ *
+ * @return      @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_pop_pipeline_output(nnfw_session *session, void *outputs);
+
+/**
+ *  Training C APIs
+ *
+ * Training APIs are designed to be used in the following order for training
+ * 1. nnfw_train_prepare
+ * 2. nnfw_train_set_input, nnfw_train_set_expected for inputs & expected outputs
+ * 3. nnfw_train
+ * 4. nnfw_train_get_loss
+ *
+ * If you want to inference after training with the same session, you can use the following order
+ * 1. nnfw_set_input
+ * 2. nnfw_set_output
+ * 3. nnfw_run
+ */
+
+//////////////////////////////////////////////
+// Essential APIs for training
+//////////////////////////////////////////////
+typedef enum
+{
+  NNFW_TRAIN_LOSS_MEAN_SQUARED_ERROR = 0,
+  NNFW_TRAIN_LOSS_CATEGORICAL_CROSSENTROPY = 1,
+} NNFW_TRAIN_LOSS;
+
+typedef enum
+{
+  NNFW_TRAIN_OPTIMIZER_SGD = 0,
+  NNFW_TRAIN_OPTIMIZER_ADAM = 1,
+} NNFW_TRAIN_OPTIMIZER;
+
+/**
+ * @brief Training information to prepare training
+ * @todo  Add more training information
+ *        (e.g. optimizer, loss function, ...)
+ */
+typedef struct nnfw_train_info
+{
+  /** Learning rate */
+  float learning_rate = 0.001f;
+  /** Batch size */
+  uint32_t batch_size = 1;
+  /** loss type */
+  NNFW_TRAIN_LOSS loss = NNFW_TRAIN_LOSS_MEAN_SQUARED_ERROR;
+  /** optimizer type */
+  NNFW_TRAIN_OPTIMIZER opt = NNFW_TRAIN_OPTIMIZER_SGD;
+} nnfw_train_info;
+
+/**
+ * @brief Prepare session to be ready for training
+ * @note  The session will be entered into training mode
+ *
+ * @param[in] session The session to be prepared for training
+ * @param[in] info    Training information.
+ *                    If info is nullptr, it will not change training information.
+ *                    If it is nullptr and model has not training information,
+ *                    it will use default training information.
+ *                    Default training information is {learning_rate = 0.001f, batch_size = 1}
+ *
+ * @return  @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_train_prepare(nnfw_session *session, const nnfw_train_info *info);
+
+/**
+ * @brief Set training input
+ * @note  This function should be called after {@link nnfw_train_prepare}
+ *
+ * @param[in] session     The session to be set training inputs and expected model outputs
+ * @param[in] index       The index of training input
+ * @param[in] input       The input buffers for training
+ * @param[in] input_info  The shape and type of input buffer
+ *                        If it is nullptr, it will not change shape and batch size
+ * @return  @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_train_set_input(nnfw_session *session, uint32_t index, const void *input,
+                                 const nnfw_tensorinfo *input_info);
+
+/**
+ * @brief Set training expected output
+ * @note  This function should be called after {@link nnfw_train_prepare}
+ *
+ * @param session       The session to be set training inputs and expected model outputs
+ * @param index         The index of training expected output
+ * @param expected      The expected buffers for training
+ * @param expected_info The shape and type of expected buffer
+ *                      If it is nullptr, it will not change shape and batch size
+ * @return  @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_train_set_expected(nnfw_session *session, uint32_t index, const void *expected,
+                                    const nnfw_tensorinfo *expected_info);
+
+/**
+ * @brief Train the model
+ * @note  This function should be called after {@link nnfw_train_set_input} and
+ *        {@link nnfw_train_set_expected} for each input and expected output
+ *
+ * @param[in] session The session to be trained
+ * @param[in] update_weights If true, update weights of the model
+ *                           If false, do not update weights of the model (for validation)
+ * @return  @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_train(nnfw_session *session, bool update_weights);
+
+/**
+ * @brief Get loss value for expected output
+ * @note  This function should be called after {@link nnfw_train}
+ *
+ * @param[in]   session The session to get loss value
+ * @param[in]   index   The index of loss value [0, number of expected outputs)
+ * @param[out]  loss    The loss value
+ * @return  @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_train_get_loss(nnfw_session *session, uint32_t index, float *loss);
+
+/**
+ * @brief Export circle model
+ * @note  This function should be called on training mode
+ *        This function should be called after {@link nnfw_train}
+ *
+ * @param[in] session The session to export inference model
+ * @param[in] path    The path to export inference model
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_train_export_circle(nnfw_session *session, const char *path);
+
+//////////////////////////////////////////////
+// Optional APIs for training
+//////////////////////////////////////////////
+
+/**
+ * @brief Get the training model input information
+ * @note  This function should be called after {@link nnfw_train_prepare}
+ *
+ * @param[in]   session The session to get the training model input information
+ * @param[in]   index   The index of training model input
+ * @param[out]  info    The shape and type of training model input
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_train_input_tensorinfo(nnfw_session *session, uint32_t index,
+                                        nnfw_tensorinfo *info);
+
+/**
+ * @brief Get the training model expected output information
+ * @note  This function should be called after {@link nnfw_train_prepare}
+ *
+ * @param[in]   session The session to get the training model expected output information
+ * @param[in]   index   The index of training model expected output
+ * @param[out]  info    The shape and type of training model expected output
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_train_expected_tensorinfo(nnfw_session *session, uint32_t index,
+                                           nnfw_tensorinfo *info);
+
+//////////////////////////////////////////////
+// Not planned to be implemented
+//////////////////////////////////////////////
+
+/**
+ * @brief Convert between training mode and inference mode
+ * @note  This function should be called after {@link nnfw_train} or {@link nnfw_prepare}
+ *
+ * @param[in] session The session to convert training mode to inference mode
+ * @param[in] train   If false, convert training model to inference model
+ *                    If true, convert inference model to training model
+ * @return  @c NNFW_STATUS_NO_ERROR if successful
+ */
+// NNFW_STATUS nnfw_set_training_mode(nnfw_session *session, bool train);
+
+/**
+ * @brief Set training information after prepare training
+ * @note  This function may be used after {@link nnfw_train_prepare}
+ *
+ * @param[in] session The session prepared for training
+ * @param[in] info    Training information
+ * @return  @c NNFW_STATUS_NO_ERROR if successful
+ */
+// NNFW_STATUS nnfw_train_set_traininfo(nnfw_session *session, const nnfw_train_info info);
+
+/**
+ *  On-Device Quantization APIs
+ *
+ * On-Device Quantization APIs are designed to be used in the following order
+ * 1. nnfw_set_quantization_type
+ * 2. nnfw_set_quantized_model_path
+ * 3. nnfw_quantize
+ *
+ * You should use Quantization APIs after {@link nnfw_load_model_from_file},
+ * before {@link nnfw_prepare} and {@link nnfw_set_input_tensorinfo}.
+ */
+
+/**
+ * @brief quantization type
+ */
+typedef enum
+{
+  /** default value: type not set */
+  NNFW_QUANTIZE_TYPE_NOT_SET,
+  /** asymmetric quantization with a scale and zero point */
+  NNFW_QUANTIZE_TYPE_U8_ASYM,
+  /** symmetric quantization with a scale only */
+  NNFW_QUANTIZE_TYPE_I16_SYM,
+} NNFW_QUANTIZE_TYPE;
+
+/**
+ * @brief Set quantization type
+ *
+ * This function should be called before {@link nnfw_quantize} is invoked.
+ *
+ * @param[in] session nnfw_session to set quantization type
+ * @param[in] pref @c NNFW_QUANTIZE_TYPE
+ * @return    @c NNFW_STATUS_NO_ERROR if successful,
+ *            @c NNFW_STATUS_UNEXPECTED_NULL if session is null,
+ *            otherwise return @c NNFW_STATUS_ERROR
+ */
+NNFW_STATUS nnfw_set_quantization_type(nnfw_session *session, NNFW_QUANTIZE_TYPE qtype);
+
+/**
+ * @brief Set exported quantized model path
+ *
+ * This function should be called before {@link nnfw_quantize} is invoked.
+ *
+ * TODO: If this function is not called, quantized model will not be exported
+ *
+ * @param[in] session nnfw_session to set quantized model path
+ * @param[in] path    Quantized model path
+ * @return    @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
+ */
+NNFW_STATUS nnfw_set_quantized_model_path(nnfw_session *session, const char *path);
+
+/**
+ * @brief Quantize circle model
+ *
+ * @param[in] session nnfw_session to quantize
+ * @return    @c ODC_STATUS_NO_ERROR if successful, otherwise return @c ODC_STATUS_ERROR
+ */
+NNFW_STATUS nnfw_quantize(nnfw_session *session);
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif // __NNFW_EXPERIMENTAL_H__
diff --git a/runtime/onert/api/include/nnfw_internal.h b/runtime/onert/api/include/nnfw_internal.h
index eb4b6d629..a88e32436 100644
--- a/runtime/onert/api/include/nnfw_internal.h
+++ b/runtime/onert/api/include/nnfw_internal.h
@@ -35,4 +35,13 @@ NNFW_STATUS nnfw_get_config(nnfw_session *session, const char *key, char *value,
  */
 NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size);
 
+/**
+ * @brief Load a tflite/circle model from file.
+ *
+ * @param[in] session   session
+ * @param[in] file_path Path to model file. Model type(tflite/circle) is decided by file extension
+ * @return    NFNFW_STATUS
+ */
+NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path);
+
 #endif // __NNFW_INTERNAL_H__
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 42e43760b..7a280a66d 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
  * NNFW_VERSION is a uint32 value representing nnfw runtime version
  * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
  */
-#define NNFW_VERSION 0x01000900
+#define NNFW_VERSION 0x01001900
 
 #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/CustomKernel.cc b/runtime/onert/api/src/CustomKernel.cc
index 3f3a5d81e..f094047fe 100644
--- a/runtime/onert/api/src/CustomKernel.cc
+++ b/runtime/onert/api/src/CustomKernel.cc
@@ -18,9 +18,7 @@
 
 namespace onert
 {
-namespace frontend
-{
-namespace custom
+namespace api
 {
 
 using namespace backend::custom;
@@ -64,12 +62,12 @@ public:
   }
 };
 
-Kernel::Kernel(const nnfw_custom_eval evalFunction)
-    : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
+CustomKernel::CustomKernel(const nnfw_custom_eval evalFunction)
+  : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
 {
 }
 
-void Kernel::configure(CustomKernelConfigParams &&inParams)
+void CustomKernel::configure(CustomKernelConfigParams &&inParams)
 {
   _userdata = inParams.userdata;
   _userdata_size = inParams.userdata_size;
@@ -77,7 +75,7 @@ void Kernel::configure(CustomKernelConfigParams &&inParams)
   _in_params = std::move(inParams);
 }
 
-void Kernel::run()
+void CustomKernel::run()
 {
   nnfw_custom_kernel_params params;
 
@@ -109,6 +107,5 @@ void Kernel::run()
   delete[] params.outputs;
 }
 
-} // namespace custom
-} // namespace frontend
+} // namespace api
 } // namespace onert
diff --git a/runtime/onert/api/src/CustomKernel.h b/runtime/onert/api/src/CustomKernel.h
index a42f7a639..4c41dd9ba 100644
--- a/runtime/onert/api/src/CustomKernel.h
+++ b/runtime/onert/api/src/CustomKernel.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_H__
-#define __ONERT_BACKEND_CUSTOM_KERNEL_H__
+#ifndef __ONERT_API_CUSTOM_KERNEL_H__
+#define __ONERT_API_CUSTOM_KERNEL_H__
 
 #include "nnfw_experimental.h"
 
@@ -26,15 +26,13 @@
 
 namespace onert
 {
-namespace frontend
-{
-namespace custom
+namespace api
 {
 
-class Kernel : public ::onert::exec::IFunction
+class CustomKernel : public ::onert::exec::IFunction
 {
 public:
-  explicit Kernel(nnfw_custom_eval evalFunction);
+  explicit CustomKernel(nnfw_custom_eval evalFunction);
 
   backend::custom::CustomKernelConfigParams _in_params;
 
@@ -53,8 +51,7 @@ public:
   void run() override;
 };
 
-} // namespace custom
-} // namespace frontend
+} // namespace api
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CUSTOM_KERNEL_H__
+#endif // __ONERT_API_CUSTOM_KERNEL_H__
diff --git a/runtime/onert/api/src/CustomKernelRegistry.cc b/runtime/onert/api/src/CustomKernelRegistry.cc
index 7812609d1..d97f1bb06 100644
--- a/runtime/onert/api/src/CustomKernelRegistry.cc
+++ b/runtime/onert/api/src/CustomKernelRegistry.cc
@@ -20,22 +20,39 @@
 
 namespace onert
 {
-namespace frontend
+namespace api
 {
-namespace custom
+
+class KernelBuilder : public backend::custom::IKernelBuilder
 {
+public:
+  KernelBuilder(CustomKernelRegistry *registry) : _registry(registry) {}
+
+  std::unique_ptr<exec::IFunction>
+  buildKernel(const std::string &id,
+              backend::custom::CustomKernelConfigParams &&params) const override
+  {
+    auto kernel = _registry->buildKernelForOp(id);
+    kernel->configure(std::move(params));
+
+    return kernel;
+  }
+
+private:
+  CustomKernelRegistry *_registry;
+};
 
-void KernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction)
+void CustomKernelRegistry::registerKernel(const std::string &id, nnfw_custom_eval evalFunction)
 {
   _storage.emplace(id, evalFunction);
 }
 
-std::shared_ptr<backend::custom::IKernelBuilder> KernelRegistry::getBuilder()
+std::shared_ptr<backend::custom::IKernelBuilder> CustomKernelRegistry::getBuilder()
 {
   return std::make_unique<KernelBuilder>(this);
 }
 
-std::unique_ptr<Kernel> KernelRegistry::buildKernelForOp(const std::string &id)
+std::unique_ptr<CustomKernel> CustomKernelRegistry::buildKernelForOp(const std::string &id)
 {
   auto it = _storage.find(id);
   if (it == _storage.end())
@@ -43,22 +60,8 @@ std::unique_ptr<Kernel> KernelRegistry::buildKernelForOp(const std::string &id)
     throw std::runtime_error("Unable to find associated kernel for op");
   }
 
-  return std::make_unique<Kernel>(it->second);
+  return std::make_unique<CustomKernel>(it->second);
 }
 
-// Kernel builder
-std::unique_ptr<exec::IFunction>
-KernelBuilder::buildKernel(const std::string &id,
-                           backend::custom::CustomKernelConfigParams &&params) const
-{
-  auto kernel = _registry->buildKernelForOp(id);
-  kernel->configure(std::move(params));
-
-  return kernel;
-}
-
-KernelBuilder::KernelBuilder(KernelRegistry *registry) : _registry(registry) {}
-
-} // namespace custom
-} // namespace frontend
+} // namespace api
 } // namespace onert
diff --git a/runtime/onert/api/src/CustomKernelRegistry.h b/runtime/onert/api/src/CustomKernelRegistry.h
index fe60d5bcc..d39f11ad6 100644
--- a/runtime/onert/api/src/CustomKernelRegistry.h
+++ b/runtime/onert/api/src/CustomKernelRegistry.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
-#define __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
+#ifndef __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
+#define __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
 
 #include "CustomKernel.h"
 
@@ -27,38 +27,22 @@
 
 namespace onert
 {
-namespace frontend
-{
-namespace custom
+namespace api
 {
 
-class KernelRegistry
+class CustomKernelRegistry
 {
 public:
   void registerKernel(const std::string &id, nnfw_custom_eval evalFunction);
 
   std::shared_ptr<backend::custom::IKernelBuilder> getBuilder();
-  std::unique_ptr<Kernel> buildKernelForOp(const std::string &id);
+  std::unique_ptr<CustomKernel> buildKernelForOp(const std::string &id);
 
 private:
   std::unordered_map<std::string, nnfw_custom_eval> _storage;
 };
 
-class KernelBuilder : public backend::custom::IKernelBuilder
-{
-public:
-  KernelBuilder(KernelRegistry *registry);
-
-  std::unique_ptr<exec::IFunction>
-  buildKernel(const std::string &id,
-              backend::custom::CustomKernelConfigParams &&params) const override;
-
-private:
-  KernelRegistry *_registry;
-};
-
-} // namespace custom
-} // namespace frontend
+} // namespace api
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CUSTOM_KERNEL_REGISTRY_H__
+#endif // __ONERT_API_CUSTOM_KERNEL_REGISTRY_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index ff5e679da..185738add 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -27,6 +27,8 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT8_ASYMM, 2);
 STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_BOOL, 3);
 STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_UINT8, 4);
 STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_INT64, 5);
+STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED, 6);
+STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED, 7);
 
 STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_NO_ERROR, 0);
 STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
@@ -56,15 +58,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_INFO_ID_VERSION, 0);
  * @param session the session to be created
  * @return NNFW_STATUS_NO_ERROR if successful
  */
-NNFW_STATUS nnfw_create_session(nnfw_session **session)
-{
-  NNFW_RETURN_ERROR_IF_NULL(session);
-
-  *session = new (std::nothrow) nnfw_session();
-  if (*session == nullptr)
-    return NNFW_STATUS_OUT_OF_MEMORY;
-  return NNFW_STATUS_NO_ERROR;
-}
+NNFW_STATUS nnfw_create_session(nnfw_session **session) { return nnfw_session::create(session); }
 
 /*
  * Close a session instance
@@ -89,7 +83,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session)
 NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path)
 {
   NNFW_RETURN_ERROR_IF_NULL(session);
-  return session->load_model_from_file(pacakge_file_path);
+  return session->load_model_from_nnpackage(pacakge_file_path);
 }
 
 /*
@@ -349,6 +343,12 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer,
   return session->load_circle_from_buffer(buffer, size);
 }
 
+NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->load_model_from_modelfile(file_path);
+}
+
 NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
 {
   NNFW_RETURN_ERROR_IF_NULL(session);
@@ -360,3 +360,158 @@ NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensornam
   NNFW_RETURN_ERROR_IF_NULL(session);
   return session->output_tensorindex(tensorname, index);
 }
+
+NNFW_STATUS nnfw_set_backends_per_operation(nnfw_session *session, const char *backend_settings)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->set_backends_per_operation(backend_settings);
+}
+
+NNFW_STATUS nnfw_prepare_pipeline(nnfw_session *session, const char *map_file_path)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->prepare_pipeline(map_file_path);
+}
+
+NNFW_STATUS nnfw_push_pipeline_input(nnfw_session *session, void *inputs, void *lengths)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->push_pipeline_input((std::vector<void *> *)inputs,
+                                      (std::vector<uint32_t> *)lengths);
+}
+
+NNFW_STATUS nnfw_pop_pipeline_output(nnfw_session *session, void *outputs)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->pop_pipeline_output((std::vector<void *> *)outputs);
+}
+
+// Training
+
+#ifdef ONERT_TRAIN
+
+NNFW_STATUS nnfw_train_prepare(nnfw_session *session, const nnfw_train_info *info)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->train_prepare(info);
+}
+
+NNFW_STATUS nnfw_train_input_tensorinfo(nnfw_session *session, uint32_t index,
+                                        nnfw_tensorinfo *info)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->train_input_tensorinfo(index, info);
+}
+
+NNFW_STATUS nnfw_train_expected_tensorinfo(nnfw_session *session, uint32_t index,
+                                           nnfw_tensorinfo *info)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->train_expected_tensorinfo(index, info);
+}
+
+NNFW_STATUS nnfw_train_set_input(nnfw_session *session, uint32_t index, const void *input,
+                                 const nnfw_tensorinfo *input_info)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->train_set_input(index, input, input_info);
+}
+
+NNFW_STATUS nnfw_train_set_expected(nnfw_session *session, uint32_t index, const void *expected,
+                                    const nnfw_tensorinfo *expected_info)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->train_set_expected(index, expected, expected_info);
+}
+
+NNFW_STATUS nnfw_train(nnfw_session *session, bool update_weights)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->train_run(update_weights);
+}
+
+NNFW_STATUS nnfw_train_get_loss(nnfw_session *session, uint32_t index, float *loss)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->train_get_loss(index, loss);
+}
+
+NNFW_STATUS nnfw_train_export_circle(nnfw_session *session, const char *path)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->train_export_circle(path);
+}
+
+#else // ONERT_TRAIN
+
+NNFW_STATUS nnfw_train_prepare(nnfw_session *session, const nnfw_train_info *)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_train_input_tensorinfo(nnfw_session *session, uint32_t, nnfw_tensorinfo *)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_train_expected_tensorinfo(nnfw_session *session, uint32_t, nnfw_tensorinfo *)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_train_set_input(nnfw_session *session, uint32_t, const void *,
+                                 const nnfw_tensorinfo *)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_train_set_expected(nnfw_session *session, uint32_t, const void *,
+                                    const nnfw_tensorinfo *)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_train(nnfw_session *session, bool)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_train_get_loss(nnfw_session *session, uint32_t, float *)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_train_export_circle(nnfw_session *session, const char *)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return NNFW_STATUS_ERROR;
+}
+
+#endif // ONERT_TRAIN
+
+// Quantization
+
+NNFW_STATUS nnfw_set_quantization_type(nnfw_session *session, NNFW_QUANTIZE_TYPE qtype)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->set_quantization_type(qtype);
+}
+
+NNFW_STATUS nnfw_set_quantized_model_path(nnfw_session *session, const char *path)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->set_quantized_model_path(path);
+}
+
+NNFW_STATUS nnfw_quantize(nnfw_session *session)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->quantize();
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index 81b40703f..fc02a9227 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -16,20 +16,25 @@
 
 #include "nnfw_api_internal.h"
 #include "CustomKernelRegistry.h"
-#include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
 #include "util/ConfigSource.h"
 #include "util/Exceptions.h"
+#include "util/logging.h"
 #include "exec/Execution.h"
 #include "circle_loader.h"
 #include "tflite_loader.h"
+#include "trix_loader.h"
 #include "json/json.h"
+#include "ir/NNPkg.h"
 #include "ir/OpCode.h"
+#include "util/TracingCtx.h"
+#include "odc/QuantizeManager.h"
+
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
 #include <dirent.h>
-#include <util/ConfigSource.h>
 #include <misc/string_helpers.h>
 
 /*
@@ -40,8 +45,11 @@
 #define MAX_PATH_LENGTH 1024
 #define MAX_TENSOR_NAME_LENGTH 64
 
+namespace
+{
+
 // Is null-terminating in length ?
-static bool null_terminating(const char *str, uint32_t length)
+bool null_terminating(const char *str, uint32_t length)
 {
   for (uint32_t i = 0; i < length; i++)
   {
@@ -53,7 +61,7 @@ static bool null_terminating(const char *str, uint32_t length)
   return false;
 }
 
-static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
+onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
 {
   if (layout == NNFW_LAYOUT_CHANNELS_LAST)
   {
@@ -66,7 +74,7 @@ static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
   return onert::ir::Layout::UNKNOWN;
 }
 
-NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensorname,
+NNFW_STATUS getTensorIndexImpl(const onert::ir::IGraph &graph, const char *tensorname,
                                uint32_t *index, bool is_input)
 {
   if (!tensorname || !index)
@@ -92,13 +100,159 @@ NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensor
   }
 }
 
+std::string trim(const std::string &value)
+{
+  std::string whitespace = " \t";
+  auto begin = value.find_first_not_of(whitespace);
+  if (begin == std::string::npos)
+    return ""; // no content
+
+  auto end = value.find_last_not_of(whitespace);
+  auto range = end - begin + 1;
+  return value.substr(begin, range);
+}
+
+bool loadConfigure(const std::string cfgfile, onert::util::CfgKeyValues &keyValues)
+{
+  std::ifstream ifs(cfgfile);
+  if (ifs.is_open())
+  {
+    std::string line;
+    while (std::getline(ifs, line))
+    {
+      auto cmtpos = line.find('#');
+      if (cmtpos != std::string::npos)
+      {
+        line = line.substr(0, cmtpos);
+      }
+      std::istringstream isline(line);
+      std::string key;
+      if (std::getline(isline, key, '='))
+      {
+        std::string value;
+        if (std::getline(isline, value))
+        {
+          key = trim(key);
+          keyValues[key] = trim(value);
+        }
+      }
+    }
+    ifs.close();
+    return true;
+  }
+  return false;
+}
+
+NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
+{
+  using onert::ir::DataType;
+  switch (dt)
+  {
+    case DataType::FLOAT32:
+      return NNFW_TYPE_TENSOR_FLOAT32;
+    case DataType::INT32:
+      return NNFW_TYPE_TENSOR_INT32;
+    case DataType::QUANT_UINT8_ASYMM:
+      return NNFW_TYPE_TENSOR_QUANT8_ASYMM;
+    case DataType::BOOL8:
+      return NNFW_TYPE_TENSOR_BOOL;
+    case DataType::UINT8:
+      return NNFW_TYPE_TENSOR_UINT8;
+    case DataType::INT64:
+      return NNFW_TYPE_TENSOR_INT64;
+    case DataType::QUANT_INT8_ASYMM:
+      return NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED;
+    case DataType::QUANT_INT16_SYMM:
+      return NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED;
+    case DataType::UINT32:
+    case DataType::QUANT_INT8_SYMM:
+    default:
+      throw std::runtime_error("Error: Model has type that runtime API does not support.");
+  }
+}
+
+void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape,
+                    const onert::ir::DataType &dtype)
+{
+  ti->rank = shape.rank();
+  for (int j = 0; j < ti->rank; ++j)
+  {
+    ti->dims[j] = shape.dim(j);
+  }
+  ti->dtype = datatype_to_nnfw_dtype(dtype);
+}
+
+std::unique_ptr<onert::ir::Model> loadModel(const std::string filename,
+                                            const std::string model_type)
+{
+  if (model_type == "tflite")
+    return onert::tflite_loader::loadModel(filename.c_str());
+  if (model_type == "circle")
+    return onert::circle_loader::loadModel(filename.c_str());
+  if (model_type == "tvn")
+    return onert::trix_loader::loadModel(filename.c_str());
+
+  std::cerr << "Unsupported model type" << std::endl;
+  return std::unique_ptr<onert::ir::Model>(nullptr);
+}
+
+#ifdef ONERT_TRAIN
+uint64_t getBufSize(const nnfw_tensorinfo *info)
+{
+  static int elmsize[] = {
+    sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 = 0 */
+    sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 = 1 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM = 2 */
+    sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
+    sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
+    sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+    sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
+  };
+
+  uint64_t n = 1;
+  for (int32_t i = 0; i < info->rank; ++i)
+  {
+    assert(info->dims[i] >= 0);
+    n *= info->dims[i];
+  }
+  return elmsize[info->dtype] * n;
+}
+#endif // ONERT_TRAIN
+} // namespace
+
 nnfw_session::nnfw_session()
-    : _subgraphs{nullptr}, _execution{nullptr},
-      _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
+  : _nnpkg{nullptr}, _coptions{}, _compiler_artifact{nullptr}, _execution{nullptr},
+    _kernel_registry{nullptr}, _quant_manager{nullptr}
 {
   // DO NOTHING
 }
 
+NNFW_STATUS nnfw_session::create(nnfw_session **session)
+{
+  if (session == nullptr)
+    return NNFW_STATUS_UNEXPECTED_NULL;
+  try
+  {
+    auto new_session = std::unique_ptr<nnfw_session>(new nnfw_session());
+    new_session->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+    *session = new_session.release();
+  }
+  catch (const std::bad_alloc &e)
+  {
+    std::cerr << "Error during session creation" << std::endl;
+    *session = nullptr; // Set nullptr on error to keep the old behavior
+    return NNFW_STATUS_OUT_OF_MEMORY;
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during session initialization : " << e.what() << std::endl;
+    *session = nullptr; // Set nullptr on error to keep the old behavior
+    return NNFW_STATUS_ERROR;
+  }
+  return NNFW_STATUS_NO_ERROR;
+}
+
 nnfw_session::~nnfw_session() = default;
 
 NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
@@ -112,14 +266,62 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
   if (size == 0)
     return NNFW_STATUS_ERROR;
 
-  _subgraphs = onert::circle_loader::loadModel(buffer, size);
-  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+  try
+  {
+    auto model = onert::circle_loader::loadModel(buffer, size);
+    _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model));
+    _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+    _state = State::MODEL_LOADED;
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during model loading : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+  return NNFW_STATUS_NO_ERROR;
+}
 
-  _state = State::MODEL_LOADED;
+NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
+{
+  if (!isStateInitialized())
+    return NNFW_STATUS_INVALID_STATE;
+
+  if (!model_file_path)
+  {
+    std::cerr << "Model file path is null." << std::endl;
+    return NNFW_STATUS_UNEXPECTED_NULL;
+  }
+
+  // Create quantize manager
+  _quant_manager = std::make_unique<onert::odc::QuantizeManager>(std::string(model_file_path));
+
+  std::string filename{model_file_path};
+  // TODO: Use std::filesystem::path when we can use c++17.
+  auto dotidx = filename.find_last_of('.');
+  if (dotidx == std::string::npos)
+  {
+    std::cerr << "Invalid model file path. Please use file with extension." << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+  std::string model_type = filename.substr(dotidx + 1); // + 1 to exclude dot
+  try
+  {
+    auto model = loadModel(filename, model_type);
+    if (model == nullptr)
+      return NNFW_STATUS_ERROR;
+    _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model));
+    _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+    _state = State::MODEL_LOADED;
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during model loading : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
   return NNFW_STATUS_NO_ERROR;
 }
 
-NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
 {
   if (!isStateInitialized())
     return NNFW_STATUS_INVALID_STATE;
@@ -147,8 +349,8 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
 
   try
   {
-    std::string manifest_file_name(package_dir);
-    manifest_file_name += "/metadata/MANIFEST";
+    std::string package_path(package_dir);
+    std::string manifest_file_name = package_path + "/metadata/MANIFEST";
     std::ifstream mfs(manifest_file_name);
 
     // extract the filename of the first(index 0) model
@@ -157,33 +359,79 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
     mfs >> root;
     const Json::Value &models = root["models"];
     const Json::Value &model_types = root["model-types"];
+    const Json::Value &configs = root["configs"];
 
-    auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model
-    auto model_type = model_types[0].asString(); // first model's type
-    if (model_type == "tflite")
+    if (!configs.empty() && !configs[0].empty())
     {
-      _subgraphs = onert::tflite_loader::loadModel(model_file_path.c_str());
+      auto filepath = package_path + std::string("/metadata/") + configs[0].asString();
+
+      onert::util::CfgKeyValues keyValues;
+      if (loadConfigure(filepath, keyValues))
+      {
+        onert::util::setConfigKeyValues(keyValues);
+      }
     }
-    else if (model_type == "circle")
+    _nnpkg = std::make_shared<onert::ir::NNPkg>();
+    auto num_models = models.size();
+    if (num_models == 0 || (num_models - 1) > onert::ir::ModelIndex::max())
     {
-      _subgraphs = onert::circle_loader::loadModel(model_file_path.c_str());
+      std::cerr << "Invalid model size - " << std::to_string(num_models) << std::endl;
+      return NNFW_STATUS_ERROR;
     }
-    else
+
+    // Create quantize manager
+    // TODO Support multiple models
+    auto const model_filename = package_path + std::string("/") + models[0].asString();
+    _quant_manager = std::make_unique<onert::odc::QuantizeManager>(model_filename);
+
+    for (uint16_t i = 0; i < num_models; ++i)
     {
-      std::cerr << "Unsupported model type in MANIFEST" << std::endl;
-      return NNFW_STATUS_ERROR;
+      auto model_file_path = package_path + std::string("/") + models[i].asString();
+      auto model_type = model_types[i].asString();
+      auto model = loadModel(model_file_path, model_type);
+      if (model == nullptr)
+        return NNFW_STATUS_ERROR;
+      model->bindKernelBuilder(_kernel_registry->getBuilder());
+      _nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+      _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+    }
+
+    auto toIODesc = [](std::string str) {
+      auto indices = nnfw::misc::split(str, ':');
+      if (indices.size() != 3)
+      {
+        std::cerr << "IODesc should be 3-tuple." << std::endl;
+        return onert::ir::IODesc{};
+      }
+      auto model_idx = static_cast<uint32_t>(std::stoi(indices.at(0)));
+      auto subgraph_idx = static_cast<uint32_t>(std::stoi(indices.at(1)));
+      auto operand_idx = static_cast<uint32_t>(std::stoi(indices.at(2)));
+      return onert::ir::IODesc{model_idx, subgraph_idx, operand_idx};
+    };
+    // read pkg-inputs and pkg-outputs
+    const Json::Value &pkg_inputs = root["pkg-inputs"];
+    for (uint32_t i = 0; i < pkg_inputs.size(); ++i)
+      _nnpkg->addInput(toIODesc(pkg_inputs[i].asString()));
+    const Json::Value &pkg_outputs = root["pkg-outputs"];
+    for (uint32_t i = 0; i < pkg_outputs.size(); ++i)
+      _nnpkg->addOutput(toIODesc(pkg_outputs[i].asString()));
+    // read model-connect
+    const Json::Value &fromtos = root["model-connect"];
+    for (uint32_t i = 0; i < fromtos.size(); ++i)
+    {
+      const Json::Value &tos = fromtos[i]["to"];
+      for (uint32_t j = 0; j < tos.size(); ++j)
+        _nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString()));
     }
-    _subgraphs->primary()->bindKernelBuilder(_kernel_registry->getBuilder());
+
+    _nnpkg->verify();
+    _state = State::MODEL_LOADED;
   }
   catch (const std::exception &e)
   {
     std::cerr << "Error during model loading : " << e.what() << std::endl;
     return NNFW_STATUS_ERROR;
   }
-
-  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
-
-  _state = State::MODEL_LOADED;
   return NNFW_STATUS_NO_ERROR;
 }
 
@@ -205,18 +453,12 @@ NNFW_STATUS nnfw_session::prepare()
     return NNFW_STATUS_INVALID_STATE;
   }
 
-  if (!_subgraphs || !primary_subgraph() || primary_subgraph()->isBuildingPhase())
-  {
-    std::cerr << "Error during model prepare : "
-              << "prepare should be run after load_model" << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
   try
   {
-    _subgraphs.reset();
-    std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile();
-    _execution = std::make_shared<onert::exec::Execution>(executors);
+    auto compiler = onert::compiler::CompilerFactory::get().create(_nnpkg, _coptions);
+    _nnpkg.reset();
+    _compiler_artifact = compiler->compile();
+    _execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors);
   }
   catch (const std::exception &e)
   {
@@ -228,6 +470,12 @@ NNFW_STATUS nnfw_session::prepare()
   return NNFW_STATUS_NO_ERROR;
 }
 
+NNFW_STATUS nnfw_session::prepare_pipeline(const char *)
+{
+  std::cerr << "Pipeline prepare_pipeline: deprecated feature " << std::endl;
+  return NNFW_STATUS_ERROR;
+}
+
 NNFW_STATUS nnfw_session::run()
 {
   if (!isStatePreparedOrFinishedRun())
@@ -299,8 +547,8 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
   if (!buffer && length != 0)
   {
     std::cerr
-        << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0"
-        << std::endl;
+      << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0"
+      << std::endl;
     return NNFW_STATUS_ERROR;
   }
 
@@ -328,8 +576,8 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b
   if (!buffer && length != 0)
   {
     std::cerr
-        << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0"
-        << std::endl;
+      << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0"
+      << std::endl;
     return NNFW_STATUS_ERROR;
   }
 
@@ -357,7 +605,7 @@ NNFW_STATUS nnfw_session::input_size(uint32_t *number)
       std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl;
       return NNFW_STATUS_UNEXPECTED_NULL;
     }
-    *number = primary_subgraph()->getInputs().size();
+    *number = getInputSize();
   }
   catch (const std::exception &e)
   {
@@ -379,7 +627,7 @@ NNFW_STATUS nnfw_session::output_size(uint32_t *number)
       std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl;
       return NNFW_STATUS_UNEXPECTED_NULL;
     }
-    *number = primary_subgraph()->getOutputs().size();
+    *number = getOutputSize();
   }
   catch (const std::exception &e)
   {
@@ -391,6 +639,13 @@ NNFW_STATUS nnfw_session::output_size(uint32_t *number)
 
 NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
 {
+  if (!isStatePreparedOrFinishedRun())
+  {
+    std::cerr << "Error during nnfw_session::set_input_layout : "
+              << "run should be run after prepare" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
   try
   {
     if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
@@ -399,6 +654,7 @@ NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
       std::cerr << "Error during nnfw_session::set_input_layout, not supported layout" << std::endl;
       return NNFW_STATUS_ERROR;
     }
+
     _execution->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
   }
   catch (const std::exception &e)
@@ -411,6 +667,13 @@ NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
 
 NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
 {
+  if (!isStatePreparedOrFinishedRun())
+  {
+    std::cerr << "Error during nnfw_session::set_output_layout : "
+              << "run should be run after prepare" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
   try
   {
     if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
@@ -420,6 +683,7 @@ NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
                 << std::endl;
       return NNFW_STATUS_ERROR;
     }
+
     _execution->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
   }
   catch (const std::exception &e)
@@ -430,30 +694,6 @@ NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
   return NNFW_STATUS_NO_ERROR;
 }
 
-static NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
-{
-  using onert::ir::DataType;
-  switch (dt)
-  {
-    case DataType::FLOAT32:
-      return NNFW_TYPE_TENSOR_FLOAT32;
-    case DataType::INT32:
-      return NNFW_TYPE_TENSOR_INT32;
-    case DataType::QUANT_UINT8_ASYMM:
-      return NNFW_TYPE_TENSOR_QUANT8_ASYMM;
-    case DataType::BOOL8:
-      return NNFW_TYPE_TENSOR_BOOL;
-    case DataType::UINT8:
-      return NNFW_TYPE_TENSOR_UINT8;
-    case DataType::INT64:
-      return NNFW_TYPE_TENSOR_INT64;
-    case DataType::UINT32:
-    case DataType::QUANT_INT8_SYMM:
-    default:
-      throw std::runtime_error("Error: Model has type that runtime API does not support.");
-  }
-}
-
 NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
 {
   // sanity check
@@ -481,29 +721,18 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
     }
   }
 
-  auto ind = primary_subgraph()->getInputs().at(index);
-  auto &input = primary_subgraph()->operands().at(ind);
-
   onert::ir::Shape new_shape(ti.rank);
   for (int32_t i = 0; i < ti.rank; i++)
     new_shape.dim(i) = ti.dims[i];
 
-  // if passed shape is same with the shape of model, do nothing
-  if (input.info().shape() == new_shape)
-    return NNFW_STATUS_NO_ERROR;
-
   if (!isStatePreparedOrFinishedRun())
   {
-    // In this case, if we apply input shape in primary_subgraph, it will propagate after
-    // compilation and excution
 
-    // overwrite input shape with the shape from ti
-    input.info().shape(new_shape);
+    // In this case, if we apply input shape, it will propagate after compilation and excution
+    _nnpkg->changeInputShape(index, new_shape);
   }
   else // when called after nnfw_session::prepare()
-  {
     _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
-  }
 
   return NNFW_STATUS_NO_ERROR;
 }
@@ -527,22 +756,26 @@ NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
                 << std::endl;
       return NNFW_STATUS_UNEXPECTED_NULL;
     }
-    if (index >= primary_subgraph()->getInputs().size())
+
+    if (index >= getInputSize())
     {
       std::cerr << "Error during nnfw_session::input_tensorinfo, index is out of range."
                 << std::endl;
       return NNFW_STATUS_ERROR;
     }
-    auto opidx = primary_subgraph()->getInputs().at(index);
-    auto shape = primary_subgraph()->operands().at(opidx).shape();
-    if (isStatePreparedOrFinishedRun())
-      shape = _execution->getInputShape(onert::ir::IOIndex{index});
-    ti->rank = shape.rank();
-    for (int j = 0; j < ti->rank; ++j)
+
+    if (isStateModelLoaded())
+    {
+      auto info = _nnpkg->inputInfo(index);
+      fillTensorInfo(ti, info.shape(), info.typeInfo().type());
+    }
+    else
     {
-      ti->dims[j] = shape.dim(j);
+      auto io_index = onert::ir::IOIndex{index};
+      auto shape = _execution->getInputShape(io_index);
+      auto dtype = _compiler_artifact->_executors->inputInfo(io_index).typeInfo().type();
+      fillTensorInfo(ti, shape, dtype);
     }
-    ti->dtype = datatype_to_nnfw_dtype(primary_subgraph()->operands().at(opidx).typeInfo().type());
   }
   catch (const std::exception &e)
   {
@@ -564,26 +797,27 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
     return NNFW_STATUS_UNEXPECTED_NULL;
   }
 
-  if (index >= primary_subgraph()->getOutputs().size())
-  {
-    std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
-              << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
   try
   {
-    auto opidx = primary_subgraph()->getOutputs().at(index);
-    auto shape = primary_subgraph()->operands().at(opidx).shape();
-    // If it is called after `nnfw_run` then get the shape from Execution, not from the graph
-    if (isStateFinishedRun())
-      shape = _execution->getOutputShape(onert::ir::IOIndex{index});
-    ti->rank = shape.rank();
-    for (int j = 0; j < ti->rank; ++j)
+    if (index >= getOutputSize())
     {
-      ti->dims[j] = shape.dim(j);
+      std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
+                << std::endl;
+      return NNFW_STATUS_ERROR;
+    }
+
+    if (isStateModelLoaded())
+    {
+      auto info = _nnpkg->outputInfo(index);
+      fillTensorInfo(ti, info.shape(), info.typeInfo().type());
+    }
+    else
+    {
+      auto io_index = onert::ir::IOIndex{index};
+      auto shape = _execution->getOutputShape(io_index);
+      auto dtype = _compiler_artifact->_executors->outputInfo(io_index).typeInfo().type();
+      fillTensorInfo(ti, shape, dtype);
     }
-    ti->dtype = datatype_to_nnfw_dtype(primary_subgraph()->operands().at(opidx).typeInfo().type());
   }
   catch (const std::exception &e)
   {
@@ -593,6 +827,19 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
 
   return NNFW_STATUS_NO_ERROR;
 }
+
+NNFW_STATUS nnfw_session::push_pipeline_input(std::vector<void *> *, std::vector<uint32_t> *)
+{
+  std::cerr << "Pipeline push_pipeline_input: deprecated feature " << std::endl;
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_session::pop_pipeline_output(std::vector<void *> *)
+{
+  std::cerr << "Pipeline pop_pipeline_output: deprecated feature " << std::endl;
+  return NNFW_STATUS_ERROR;
+}
+
 NNFW_STATUS nnfw_session::register_custom_operation(const std::string &id,
                                                     nnfw_custom_eval eval_func)
 {
@@ -635,7 +882,7 @@ NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
     if (null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
       return NNFW_STATUS_ERROR;
 
-    auto &options = _compiler->options();
+    auto &options = *_coptions[0];
 
     using namespace onert::util;
 
@@ -669,7 +916,7 @@ NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
       return NNFW_STATUS_ERROR;
     }
 
-    auto &opcode_to_backend = _compiler->options().manual_scheduler_options.opcode_to_backend;
+    auto &opcode_to_backend = _coptions[0]->manual_scheduler_options.opcode_to_backend;
     opcode_to_backend.emplace(onert::ir::toOpCode(key), backend);
   }
   catch (const std::exception &e)
@@ -688,7 +935,7 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
   if (!key || !value)
     return NNFW_STATUS_UNEXPECTED_NULL;
 
-  auto &options = _compiler->options();
+  auto &options = *_coptions[0];
 
   using namespace onert::util;
 
@@ -702,10 +949,6 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
   {
     options.graph_dump_level = toInt(value);
   }
-  else if (skey == config::OP_SEQ_MAX_NODE)
-  {
-    options.op_seq_max_node = toInt(value);
-  }
   else if (skey == config::EXECUTOR)
   {
     options.executor = value;
@@ -722,10 +965,6 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
   {
     options.he_profiling_mode = toBool(value);
   }
-  else if (skey == config::DISABLE_COMPILE)
-  {
-    options.disable_compile = toBool(value);
-  }
   else
   {
     return NNFW_STATUS_ERROR;
@@ -733,22 +972,45 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
   return NNFW_STATUS_NO_ERROR;
 }
 
-onert::ir::Graph *nnfw_session::primary_subgraph()
+const onert::ir::IGraph *nnfw_session::primary_subgraph()
 {
-  if (_subgraphs)
+  if (_nnpkg != nullptr)
   {
-    assert(!_execution);
-    return _subgraphs->primary().get();
+    assert(_execution == nullptr);
+    return _nnpkg->primary_model()->primary_subgraph().get();
   }
   else
   {
-    assert(_execution);
-    // TODO Remove const_cast
+    assert(_execution != nullptr);
     // We assumed the graph will not change after compilation, but shape could change
-    return const_cast<onert::ir::Graph *>(&_execution->primary_subgraph());
+    return &_execution->primary_subgraph();
   }
 }
 
+uint32_t nnfw_session::getInputSize()
+{
+  if (isStateInitialized())
+    throw std::runtime_error{"Model is not loaded yet"};
+
+  if (isStateModelLoaded())
+    return _nnpkg->inputSize();
+
+  // Session is prepared (general inference)
+  return _compiler_artifact->_executors->inputSize();
+}
+
+uint32_t nnfw_session::getOutputSize()
+{
+  if (isStateInitialized())
+    throw std::runtime_error{"Model is not loaded yet"};
+
+  if (isStateModelLoaded())
+    return _nnpkg->outputSize();
+
+  // Session is prepared (general inference)
+  return _compiler_artifact->_executors->outputSize();
+}
+
 NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_size)
 {
   if (!isStateModelLoaded())
@@ -757,7 +1019,7 @@ NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_
   if (!key || !value)
     return NNFW_STATUS_UNEXPECTED_NULL;
 
-  auto &options = _compiler->options();
+  auto &options = *_coptions[0];
 
   auto check_boundary = [](size_t dest_size, std::string &src) {
     if (dest_size < src.length() + 1 /* for '\0' */)
@@ -768,7 +1030,9 @@ NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_
     return true;
   };
 
-  if (key == onert::util::config::BACKENDS)
+  const std::string skey = key;
+
+  if (skey == onert::util::config::BACKENDS)
   {
     if (options.backend_list.size() == 0)
       return NNFW_STATUS_NO_ERROR; // no setting backend is not an error of get_config_str()
@@ -780,7 +1044,7 @@ NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_
 
     strncpy(value, str.c_str(), value_size);
   }
-  else if (key == onert::util::config::EXECUTOR)
+  else if (skey == onert::util::config::EXECUTOR)
   {
     if (!check_boundary(value_size, options.executor))
       return NNFW_STATUS_ERROR;
@@ -799,9 +1063,9 @@ bool nnfw_session::isStateInitialized()
 {
   if (_state == State::INITIALIZED)
   {
-    assert(!_subgraphs);
-    assert(!_compiler);
-    assert(!_execution);
+    assert(_nnpkg == nullptr);
+    assert(_coptions.empty());
+    assert(_execution == nullptr);
     return true;
   }
   else
@@ -814,10 +1078,9 @@ bool nnfw_session::isStateModelLoaded()
 {
   if (_state == State::MODEL_LOADED)
   {
-    assert(_subgraphs);
-    assert(_compiler);
-    assert(!_execution);
-    assert(!primary_subgraph()->isBuildingPhase());
+    assert(_nnpkg != nullptr);
+    assert(!_coptions.empty());
+    assert(_execution == nullptr);
     return true;
   }
   else
@@ -830,10 +1093,9 @@ bool nnfw_session::isStatePrepared()
 {
   if (_state == State::PREPARED)
   {
-    assert(!_subgraphs);
-    assert(_compiler);
-    assert(_execution);
-    assert(!primary_subgraph()->isBuildingPhase());
+    assert(_nnpkg == nullptr);
+    assert(!_coptions.empty());
+    assert(_execution != nullptr);
     return true;
   }
   else
@@ -846,10 +1108,9 @@ bool nnfw_session::isStateRunning()
 {
   if (_state == State::RUNNING)
   {
-    assert(!_subgraphs);
-    assert(_compiler);
-    assert(_execution);
-    assert(!primary_subgraph()->isBuildingPhase());
+    assert(_nnpkg == nullptr);
+    assert(!_coptions.empty());
+    assert(_execution != nullptr);
     return true;
   }
   return false;
@@ -859,10 +1120,9 @@ bool nnfw_session::isStateFinishedRun()
 {
   if (_state == State::FINISHED_RUN)
   {
-    assert(!_subgraphs);
-    assert(_compiler);
-    assert(_execution);
-    assert(!primary_subgraph()->isBuildingPhase());
+    assert(_nnpkg == nullptr);
+    assert(!_coptions.empty());
+    assert(_execution != nullptr);
     return true;
   }
   else
@@ -885,3 +1145,415 @@ NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *i
 {
   return getTensorIndexImpl(*primary_subgraph(), tensorname, index, false);
 }
+
+NNFW_STATUS nnfw_session::set_backends_per_operation(const char *backend_settings)
+{
+  if (backend_settings == NULL)
+    return NNFW_STATUS_ERROR;
+
+  if (!isStateModelLoaded())
+    return NNFW_STATUS_INVALID_STATE;
+
+  // Backend for all
+  auto &ms_options = _coptions[0]->manual_scheduler_options;
+  ms_options.setBackendMap(std::string{backend_settings});
+
+  return NNFW_STATUS_NO_ERROR;
+}
+
+#ifdef ONERT_TRAIN
+NNFW_STATUS nnfw_session::train_prepare(const nnfw_train_info *info)
+{
+  // We may need different state to represent training model is loaded
+  if (!isStateModelLoaded())
+  {
+    std::cerr << "Error during model prepare training: ";
+    if (_state == State::PREPARED_TRAINING)
+      std::cerr << "prepare should be run once";
+    else
+      std::cerr << "invalid state";
+    std::cerr << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
+  try
+  {
+    nnfw_train_info tinfo;
+    if (info != nullptr)
+    {
+      tinfo = *info;
+    }
+
+    auto convertLossType = [](const int &type) {
+      if (type == NNFW_TRAIN_LOSS_MEAN_SQUARED_ERROR)
+        return onert::ir::operation::Loss::Type::MEAN_SQUARED_ERROR;
+      if (type == NNFW_TRAIN_LOSS_CATEGORICAL_CROSSENTROPY)
+        return onert::ir::operation::Loss::Type::CATEGORICAL_CROSSENTROPY;
+      else
+        throw std::runtime_error("not supported loss type");
+    };
+    onert::compiler::train::LossInfo loss_info;
+    loss_info.type = convertLossType(tinfo.loss);
+
+    auto convertOptType = [](const int &type) {
+      if (type == NNFW_TRAIN_OPTIMIZER_SGD)
+        return onert::exec::train::optimizer::OptimizerCode::SGD;
+      else if (type == NNFW_TRAIN_OPTIMIZER_ADAM)
+        return onert::exec::train::optimizer::OptimizerCode::Adam;
+      else
+        throw std::runtime_error("not supported optimizer type");
+    };
+    onert::compiler::train::OptimizerInfo opt_info;
+    opt_info.learning_rate = tinfo.learning_rate;
+    opt_info.optim_code = convertOptType(tinfo.opt);
+
+    onert::compiler::train::TrainingInfo training_info;
+    training_info.setBatchSize(tinfo.batch_size);
+    training_info.setLossInfo(loss_info);
+    training_info.setOptimizerInfo(opt_info);
+
+    auto compiler =
+      onert::compiler::CompilerFactory::get().create(_nnpkg, _coptions, &training_info);
+    _nnpkg.reset();
+    _compiler_artifact = compiler->compile();
+    _execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during nnfw_session::train_prepare : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  _state = State::PREPARED_TRAINING;
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::train_input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
+{
+  if (!isStatePreparedOrFinishedTraining())
+  {
+    std::cerr << "Error during nnfw_session::train_input_tensorinfo : invalid state" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
+  // Check index is valid: [0, getInputSize())
+
+  // NYI
+  (void)index;
+  (void)ti;
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_session::train_expected_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
+{
+  if (!isStatePreparedOrFinishedTraining())
+  {
+    std::cerr << "Error during nnfw_session::train_expected_tensorinfo : invalid state"
+              << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
+  // Check index is valid: [0, getExpectedSize())
+
+  // NYI
+  (void)index;
+  (void)ti;
+  return NNFW_STATUS_ERROR;
+}
+
+NNFW_STATUS nnfw_session::train_set_input(uint32_t index, const void *input,
+                                          const nnfw_tensorinfo *input_tensorinfo)
+{
+  if (input == nullptr)
+  {
+    std::cerr << "Error during nnfw_session::train_set_input : input buffer is null" << std::endl;
+    return NNFW_STATUS_UNEXPECTED_NULL;
+  }
+
+  if (!isStatePreparedOrFinishedTraining())
+  {
+    std::cerr << "Error during nnfw_session::train_set_input : invalid state" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
+  if (index >= getInputSize())
+  {
+    std::cerr << "Error during nnfw_session::train_set_input : index is out of range" << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  try
+  {
+    auto ind = onert::ir::IOIndex(index);
+    auto size = _execution->getInputTotalSize(ind);
+    if (input_tensorinfo && getBufSize(input_tensorinfo) != size)
+    {
+      std::cerr
+        << "Error during nnfw_session::train_set_input : not supporeted to change tensorinfo"
+        << std::endl;
+      return NNFW_STATUS_ERROR;
+    }
+
+    _execution->setInput(ind, input, size);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during nnfw_session::train_set_input : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::train_set_expected(uint32_t index, const void *expected,
+                                             const nnfw_tensorinfo *expected_tensorinfo)
+{
+  if (expected == nullptr)
+  {
+    std::cerr << "Error during nnfw_session::train_set_expected : expected buffer is null"
+              << std::endl;
+    return NNFW_STATUS_UNEXPECTED_NULL;
+  }
+
+  if (!isStatePreparedOrFinishedTraining())
+  {
+    std::cerr << "Error during nnfw_session::train_set_expected : invalid state" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
+  if (index >= getOutputSize())
+  {
+    std::cerr << "Error during nnfw_session::train_set_expected : index is out of range"
+              << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  try
+  {
+    auto output_ind = onert::ir::IOIndex(index);
+    auto size = _execution->getOutputTotalSize(output_ind);
+    if (expected_tensorinfo && getBufSize(expected_tensorinfo) != size)
+    {
+      std::cerr << "Error during nnfw_session::train_set_expected : invalid tensorinfo"
+                << std::endl;
+      return NNFW_STATUS_ERROR;
+    }
+
+    // NOTE Find the loss input index
+    // Input is added as many as the number of outputs.
+    // The loss index is calculated from the value obtained by subtracting the
+    // total output(added loss input) from the total input size.
+    auto input_index = getInputSize() - getOutputSize() + index;
+    auto input_ind = onert::ir::IOIndex(input_index);
+    _execution->setInput(input_ind, expected, size);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during nnfw_session::train_set_expected : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::train_run(bool update_weights)
+{
+  if (!isStatePreparedOrFinishedTraining())
+  {
+    std::cerr << "Error during nnfw_session::train_run : invalid state" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
+  try
+  {
+    if (update_weights)
+    {
+      _execution->train(_training_step++);
+    }
+    else
+      _execution->execute();
+  }
+  catch (const onert::InsufficientBufferSizeException &e)
+  {
+    // Currently insufficient buffer always means output buffer.
+    std::cerr << "Error during nnfw_session::train_run : " << e.what() << std::endl;
+    return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during nnfw_session::train_run : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  _state = State::FINISHED_TRAINING;
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::train_get_loss(uint32_t index, float *loss)
+{
+  if (loss == nullptr)
+  {
+    std::cerr << "Error during nnfw_session::train_get_loss : loss is null" << std::endl;
+    return NNFW_STATUS_UNEXPECTED_NULL;
+  }
+
+  if (!isStateFinishedTraining())
+  {
+    std::cerr << "Error during nnfw_session::train_get_loss : invalid state" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
+  if (index >= getOutputSize())
+  {
+    std::cerr << "Error during nnfw_session::train_get_loss : index is out of range" << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  try
+  {
+    auto ind = onert::ir::IOIndex(index);
+    *loss = _execution->getLoss(ind);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during nnfw_session::train_get_loss : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::train_export_circle(const char *path)
+{
+  if (path == nullptr)
+  {
+    std::cerr << "Error during nnfw_session::train_export_circle : path is null" << std::endl;
+    return NNFW_STATUS_UNEXPECTED_NULL;
+  }
+
+  // Check training mode is enabled
+  if (!isStateFinishedTraining())
+  {
+    std::cerr << "Error during nnfw_session::train_export_circle : invalid state" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
+  // NYI
+  return NNFW_STATUS_ERROR;
+}
+
+bool nnfw_session::isStatePreparedTraining()
+{
+  if (_state == State::PREPARED_TRAINING)
+  {
+    assert(_nnpkg == nullptr);
+    assert(!_coptions.empty());
+    assert(_execution != nullptr);
+    return true;
+  }
+  else
+    return false;
+}
+
+bool nnfw_session::isStateFinishedTraining()
+{
+  if (_state == State::FINISHED_TRAINING)
+  {
+    assert(_nnpkg == nullptr);
+    assert(!_coptions.empty());
+    assert(_execution != nullptr);
+    return true;
+  }
+  else
+    return false;
+}
+
+bool nnfw_session::isStatePreparedOrFinishedTraining()
+{
+  return isStatePreparedTraining() || isStateFinishedTraining();
+}
+
+#endif // ONERT_TRAIN
+
+NNFW_STATUS nnfw_session::set_quantization_type(NNFW_QUANTIZE_TYPE qtype)
+{
+  try
+  {
+    if (!isStateModelLoaded())
+    {
+      std::cerr << "invalid state" << std::endl;
+      return NNFW_STATUS_INVALID_STATE;
+    }
+
+    bool is_q16 = false;
+    switch (qtype)
+    {
+      case NNFW_QUANTIZE_TYPE_U8_ASYM:
+        break;
+      case NNFW_QUANTIZE_TYPE_I16_SYM:
+        is_q16 = true;
+        break;
+      default:
+        return NNFW_STATUS_INVALID_STATE;
+    }
+    _quant_manager->quantizeType(is_q16);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during nnfw_session::set_quantization_type : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::set_quantized_model_path(const char *path)
+{
+  try
+  {
+    if (!isStateModelLoaded())
+    {
+      std::cerr << "invalid state" << std::endl;
+      return NNFW_STATUS_INVALID_STATE;
+    }
+
+    _quant_manager->exportModelPath(std::string(path));
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during nnfw_session::set_quantized_model_path : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::quantize()
+{
+  try
+  {
+    if (!isStateModelLoaded())
+    {
+      std::cerr << "invalid state" << std::endl;
+      return NNFW_STATUS_INVALID_STATE;
+    }
+
+    auto result = _quant_manager->quantize();
+    if (!result)
+      return NNFW_STATUS_INVALID_STATE;
+
+    // Replace model
+    // TODO Support buffer replace, not file reload
+    auto model = loadModel(_quant_manager->exportModelPath(), "circle");
+    if (model == nullptr)
+      return NNFW_STATUS_ERROR;
+    _nnpkg->replaceModel(std::move(model));
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during nnfw_session::quantize : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  return NNFW_STATUS_NO_ERROR;
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 604ba38b4..62791765e 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -20,33 +20,38 @@
 #include "nnfw.h"
 #include "nnfw_experimental.h"
 
-#include <util/GeneralConfigSource.h>
+#include <util/TracingCtx.h>
 
 #include <string>
 #include <memory>
+#include <thread>
+#include <vector>
 
 namespace onert
 {
-namespace frontend
+namespace api
 {
-namespace custom
-{
-class KernelRegistry;
-}
-} // namespace frontend
+class CustomKernelRegistry;
+} // namespace api
 namespace exec
 {
 class Execution;
 } // namespace exec
 namespace ir
 {
-class Graph;
-class Subgraphs;
+struct IGraph;
+class Model;
+class NNPkg;
 } // namespace ir
 namespace compiler
 {
-class Compiler;
+struct CompilerArtifact;
+class CompilerOptions;
 } // namespace compiler
+namespace odc
+{
+class QuantizeManager;
+} // namespace odc
 } // namespace onert
 
 struct nnfw_session
@@ -89,19 +94,31 @@ private:
    */
   enum class State
   {
-    INITIALIZED,  //< Session is initialized and nothing has done to it
-    MODEL_LOADED, //< Model is loaded
-    PREPARED,     //< Prepared(compiled) for execution
-    RUNNING,      //< Execution is in progress (only for asynchronous execution)
-    FINISHED_RUN  //< Executed at least once
+    INITIALIZED,       //< Session is initialized and nothing has done to it
+    MODEL_LOADED,      //< Model is loaded
+    PREPARED,          //< Prepared(compiled) for execution
+    RUNNING,           //< Execution is in progress (only for asynchronous execution)
+    FINISHED_RUN,      //< Executed at least once
+    PREPARED_TRAINING, //< Prepared for training
+    FINISHED_TRAINING  //< Trained at least once
   };
 
 public:
+  /**
+   * @brief Factory method. It creates and initialize nnfw_session
+   *
+   * @note  Use factory instead of constructor to get status
+   */
+  static NNFW_STATUS create(nnfw_session **session);
+
+private:
   nnfw_session();
-  ~nnfw_session();
 
-  NNFW_STATUS load_model_from_file(const char *package_file_path);
+public:
+  ~nnfw_session();
+  NNFW_STATUS load_model_from_nnpackage(const char *package_file_path);
   NNFW_STATUS prepare();
+  NNFW_STATUS prepare_pipeline(const char *map_file_path);
   NNFW_STATUS run();
 
   NNFW_STATUS run_async();
@@ -132,30 +149,69 @@ public:
   NNFW_STATUS set_config(const char *key, const char *value);
   NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
   NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+  NNFW_STATUS load_model_from_modelfile(const char *file_path);
 
   //
   // Experimental API
   //
+  NNFW_STATUS push_pipeline_input(std::vector<void *> *inputs, std::vector<uint32_t> *lengths);
+  NNFW_STATUS pop_pipeline_output(std::vector<void *> *outputs);
 
   NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
   NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
   NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+  /**
+   * @brief   Set backends with string-encoded mapping from operation index to backend type
+   *          (cpu, acl_cl)
+   */
+  NNFW_STATUS set_backends_per_operation(const char *backend_settings);
+
+#ifdef ONERT_TRAIN
+  NNFW_STATUS train_prepare(const nnfw_train_info *info);
+  NNFW_STATUS train_input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
+  NNFW_STATUS train_expected_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
+  NNFW_STATUS train_set_input(uint32_t index, const void *input,
+                              const nnfw_tensorinfo *input_tensorinfo);
+  NNFW_STATUS train_set_expected(uint32_t index, const void *expected,
+                                 const nnfw_tensorinfo *expected_tensorinfo);
+  NNFW_STATUS train_run(bool update_weights);
+  NNFW_STATUS train_get_loss(uint32_t index, float *loss);
+  NNFW_STATUS train_export_circle(const char *path);
+#endif // ONERT_TRAIN
+
+  NNFW_STATUS set_quantization_type(NNFW_QUANTIZE_TYPE qtype);
+  NNFW_STATUS set_quantized_model_path(const char *path);
+  NNFW_STATUS quantize();
 
 private:
-  onert::ir::Graph *primary_subgraph();
+  const onert::ir::IGraph *primary_subgraph();
+  uint32_t getInputSize();
+  uint32_t getOutputSize();
+
   bool isStateInitialized();
   bool isStateModelLoaded();
   bool isStatePrepared();
   bool isStateRunning();
   bool isStateFinishedRun();
   bool isStatePreparedOrFinishedRun();
+#ifdef ONERT_TRAIN
+  bool isStatePreparedTraining();
+  bool isStateFinishedTraining();
+  bool isStatePreparedOrFinishedTraining();
+#endif // ONERT_TRAIN
 
 private:
   State _state{State::INITIALIZED};
-  std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
-  std::unique_ptr<onert::compiler::Compiler> _compiler;
-  std::shared_ptr<onert::exec::Execution> _execution;
-  std::shared_ptr<onert::frontend::custom::KernelRegistry> _kernel_registry;
+  std::shared_ptr<onert::ir::NNPkg> _nnpkg;
+  std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> _coptions;
+  std::shared_ptr<onert::compiler::CompilerArtifact> _compiler_artifact;
+  std::unique_ptr<onert::exec::Execution> _execution;
+  std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
+  std::vector<std::thread> _threads;
+#ifdef ONERT_TRAIN
+  uint32_t _training_step{0};
+#endif // ONERT_TRAIN
+  std::unique_ptr<onert::odc::QuantizeManager> _quant_manager;
 };
 
 #endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/onert/api/src/nnfw_debug.cc b/runtime/onert/api/src/nnfw_debug.cc
index b9f110390..01e5bf8f2 100644
--- a/runtime/onert/api/src/nnfw_debug.cc
+++ b/runtime/onert/api/src/nnfw_debug.cc
@@ -18,12 +18,21 @@
 
 #include <util/ConfigSource.h>
 
+#define NNFW_RETURN_ERROR_IF_NULL(p)      \
+  do                                      \
+  {                                       \
+    if ((p) == NULL)                      \
+      return NNFW_STATUS_UNEXPECTED_NULL; \
+  } while (0)
+
 NNFW_STATUS nnfw_set_config(nnfw_session *session, const char *key, const char *value)
 {
+  NNFW_RETURN_ERROR_IF_NULL(session);
   return session->set_config(key, value);
 }
 
 NNFW_STATUS nnfw_get_config(nnfw_session *session, const char *key, char *value, size_t value_size)
 {
+  NNFW_RETURN_ERROR_IF_NULL(session);
   return session->get_config(key, value, value_size);
 }
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt
index 42d622aa8..e6af06afe 100644
--- a/runtime/onert/backend/CMakeLists.txt
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -1,6 +1,21 @@
+# Backend common libs
 set(LIB_ONERT_BACKEND_ACL_COMMON onert_backend_acl_common)
+set(LIB_ONERT_BACKEND_CL_COMMON onert_backend_cl_common)
+add_subdirectory(cl_common)
+add_subdirectory(acl_common)
 
+# Backends
+set(LIB_ONERT_BACKEND_CPU onert_backend_cpu)
 add_subdirectory(cpu)
 add_subdirectory(acl_cl)
 add_subdirectory(acl_neon)
-add_subdirectory(acl_common)
+add_subdirectory(ruy)
+add_subdirectory(gpu_cl)
+add_subdirectory(xnnpack)
+add_subdirectory(trix)
+
+# Backend to train
+if(ENABLE_ONERT_TRAIN)
+  add_subdirectory(train)
+endif(ENABLE_ONERT_TRAIN)
+
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 5c5041378..301ded01f 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -20,6 +20,7 @@
 #include <memory>
 #include <backend/Backend.h>
 
+#include "BackendContext.h"
 #include "Config.h"
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
 
   std::shared_ptr<IConfig> config() const override { return _config; }
 
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &,
-                                             bool is_linear_executor) const override
+  std::unique_ptr<backend::BackendContext> newContext(ContextData &&data) const override
   {
-    const auto &operands = graph.operands();
-    const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
+    const auto &graph = *data.graph;
+    const auto &operands = data.graph->operands();
+    const auto is_linear_executor = data.is_linear_executor;
+
+    auto context = std::make_unique<acl_cl::BackendContext>(this, std::move(data));
     auto tm = createTensorManager(is_linear_executor);
     auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
-    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
-    context->tensor_register = nullptr;
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr);
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
   }
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h
new file mode 100644
index 000000000..5da915825
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+
+#include <AclBackendContext.h>
+
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Optimizer;
+
+using BackendContext =
+  acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>;
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_cl/CLTimer.h b/runtime/onert/backend/acl_cl/CLTimer.h
index 722dc68ef..a9158e1af 100644
--- a/runtime/onert/backend/acl_cl/CLTimer.h
+++ b/runtime/onert/backend/acl_cl/CLTimer.h
@@ -53,8 +53,8 @@ public:
                                           const cl_event *event_wait_list, cl_event *usr_event) {
       cl_event event;
       cl_int enqueue_res =
-          this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
-                                         num_events_in_wait_list, event_wait_list, &event);
+        this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
+                                       num_events_in_wait_list, event_wait_list, &event);
       this->_measured_events.emplace_back(event);
 
       // According to spec, if NULL was provided in usr_event - event shouldn't be returned
@@ -73,7 +73,7 @@ public:
     if ((props & CL_QUEUE_PROFILING_ENABLE) == 0)
     {
       cl_scheduler.set_queue(
-          cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
+        cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
     }
   };
 
diff --git a/runtime/onert/backend/acl_cl/CMakeLists.txt b/runtime/onert/backend/acl_cl/CMakeLists.txt
index 6f91d9691..2c94ea69c 100644
--- a/runtime/onert/backend/acl_cl/CMakeLists.txt
+++ b/runtime/onert/backend/acl_cl/CMakeLists.txt
@@ -16,4 +16,9 @@ target_link_libraries(${LIB_ONERT_BACKEND_ACL_CL} PRIVATE nnfw_coverage)
 
 set_target_properties(${LIB_ONERT_BACKEND_ACL_CL} PROPERTIES OUTPUT_NAME backend_acl_cl)
 
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET onert_backend_acl_cl POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:onert_backend_acl_cl>)
+endif()
+
 install(TARGETS ${LIB_ONERT_BACKEND_ACL_CL} DESTINATION lib)
diff --git a/runtime/onert/backend/acl_cl/Config.cc b/runtime/onert/backend/acl_cl/Config.cc
index 8017bdb0b..4d12d60b3 100644
--- a/runtime/onert/backend/acl_cl/Config.cc
+++ b/runtime/onert/backend/acl_cl/Config.cc
@@ -42,12 +42,12 @@ bool Config::initialize()
   // NOTE CLKernelLibraryEx must use the same context as CLScheduler
   // It did not check whether another device is available.
   arm_compute::CLKernelLibraryEx::get().init(
-      "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
+    "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
 
   return true;
 }
 
-ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout)
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout frontend_layout)
 {
   const std::string acl_layout_str = util::getConfigString(util::config::ACL_LAYOUT);
   if (acl_layout_str == "NHWC")
diff --git a/runtime/onert/backend/acl_cl/Config.h b/runtime/onert/backend/acl_cl/Config.h
index f71e81b6a..1fa1aeb00 100644
--- a/runtime/onert/backend/acl_cl/Config.h
+++ b/runtime/onert/backend/acl_cl/Config.h
@@ -35,7 +35,7 @@ public:
   std::string id() override { return "acl_cl"; }
   bool initialize() override;
   bool supportPermutation() override { return true; }
-  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
   bool supportDynamicTensor() override { return false; }
   bool supportFP16() override { return true; }
   void sync() const override { arm_compute::CLScheduler::get().sync(); }
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index 31f1c10eb..0431bb198 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+#include <AclActivationBuilder.h>
+#include <AclFunction.h>
+#include <Convert.h>
+#include <Swizzle.h>
+
 #include "ConstantInitializer.h"
 
 namespace onert
@@ -25,7 +30,7 @@ namespace acl_cl
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
                                          const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : acl_common::AclConstantInitializer{operands, tensor_reg}
+  : acl_common::AclConstantInitializer{operands, tensor_reg}
 {
   // DO NOTHING
 }
@@ -53,21 +58,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
 
   if (block_size_obj.isConstant())
   {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
+    _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>;
   }
 
   const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
@@ -79,7 +70,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
       const auto &shape = model_obj.shape();
       const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
       assert(model_obj.shape().rank() == 2);
-      assert(obj.dimension(0) == 2);
+      assert(obj.getShape().dim(0) == 2);
       obj.access([&](ITensor &tensor) {
         for (auto i = 0; i < shape.dim(0); ++i)
         {
@@ -87,7 +78,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
           {
             const int32_t value = base[i * 2 + j];
             int32_t *into = reinterpret_cast<int32_t *>(
-                tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j}));
+              tensor.buffer() + tensor.calcOffset({shape.dim(0) - i - 1, j}));
             *into = value;
           }
         }
@@ -96,6 +87,46 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
   }
 }
 
+void ConstantInitializer::visit(const ir::operation::Reverse &node)
+{
+  const auto &output_index = node.getOutputs().at(0);
+
+  const auto &input_index = node.getInputs().at(ir::operation::Reverse::Input::INPUT);
+  const auto &input_obj = _operands.at(input_index);
+
+  const auto &axis_index = node.getInputs().at(ir::operation::Reverse::Input::AXIS);
+  const auto &axis_obj = _operands.at(axis_index);
+
+  const auto ifm_rank = input_obj.shape().rank();
+  const auto frontend_layout = this->_current_layout;
+
+  auto output_tensor = this->_tensor_reg->getITensor(output_index);
+  const auto backend_layout = output_tensor->layout();
+
+  if (axis_obj.isConstant())
+  {
+    _init_map[axis_index] = [ifm_rank, frontend_layout, backend_layout](const ir::Operand &operand,
+                                                                        backend::ITensor &obj) {
+      assert(operand.data());
+
+      const auto axis_value = *(reinterpret_cast<const int32_t *>(operand.data()->base()));
+      int32_t axis_tmp = axis_value;
+      if (axis_tmp < 0)
+      {
+        axis_tmp = axis_tmp + ifm_rank;
+      }
+
+      auto axis =
+        acl_common::ToARMComputeAxis(ifm_rank, axis_tmp, frontend_layout, backend_layout).value();
+
+      obj.access([&](ITensor &tensor) {
+        int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer());
+        *into = (int32_t)axis;
+      });
+    };
+  }
+}
+
 } // namespace acl_cl
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index 4f894fd31..fc0eca84f 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
 
 #include "AclConstantInitializer.h"
 
@@ -38,10 +38,11 @@ public:
   void visit(const ir::operation::Gather &) final;
   void visit(const ir::operation::HashtableLookup &) final;
   void visit(const ir::operation::SpaceToBatchND &) final;
+  void visit(const ir::operation::Reverse &) final;
 };
 
 } // namespace acl_cl
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index 94489253d..dcf31858e 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -42,33 +42,27 @@ namespace acl_cl
 
 using ::onert::backend::acl_common::asAclFunction;
 using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
-    ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
+  ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
 
 KernelGenerator::KernelGenerator(
-    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-    const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
-    : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
+  : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
+    _operations_ctx(graph.operations()), _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
 {
   // DO NOTHING
 }
 
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
 {
-  // TODO Move this to IKernelGenerator
-  //      (all derivatives have the same implementation for this)
-  assert(!_return_fn_seq);
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-  _return_fn_seq->enableDynamicShapeInferer(false);
-
-  _current_op_seq_layout = op_seq.getLayout();
-  for (const auto &operation_idx : op_seq.operations())
-  {
-    const auto &node = _operations_ctx.at(operation_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
-  }
+  auto ret = std::make_unique<exec::FunctionSequence>();
+  ret->enableDynamicShapeInferer(false);
+
+  const auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  ret->append(releaseFunction());
+  return ret;
 }
 
 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
@@ -76,16 +70,35 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
   const auto block_size_index{
-      node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+  const auto NNApiInputs = 2;
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    if (!_ctx.at(crops_index).isConstant())
+    {
+      throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
+    }
+
+    auto crops = _ctx.at(crops_index).asVector<int32_t>();
+    for (auto &&crop : crops)
+    {
+      if (crop != 0)
+      {
+        throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
+      }
+    }
+  }
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
 
   assert(_ctx.at(block_size_index).data());
 
   auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
-      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+    ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -98,9 +111,9 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
 
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
@@ -110,29 +123,29 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
     {
       fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-          arm_compute::ConvertPolicy::SATURATE, act_info);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE, act_info);
       break;
     }
     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
     {
       fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-          arm_compute::ConvertPolicy::SATURATE, act_info);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE, act_info);
       break;
     }
     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
     {
       fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
-          act_info);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+        arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+        act_info);
       break;
     }
     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
     {
       fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
       break;
     }
     default:
@@ -152,30 +165,30 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
   const auto ker_width = ker_shape.dim(2);
 
   const auto stride = node.param().stride;
-  const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
-                                            ker_width, ker_height);
+  const auto padding =
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
   auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
-      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
-      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+    ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+    ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -189,34 +202,35 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
   const auto ker_width = ker_shape.dim(2);
 
   const auto stride = node.param().stride;
-  const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
-                                            ker_width, ker_height);
+  const auto dilation = node.param().dilation;
+  const auto padding =
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
+  const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
 
-  {
-    auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
-        ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
-        conv_info, multiplier, act_info);
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
+    ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+    conv_info, multiplier, act_info, dilation_info);
 
-    _return_fn = asAclFunction(std::move(fn));
-  }
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Concat &node)
@@ -240,26 +254,28 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     return;
   }
 
-  auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  std::vector<::arm_compute::ICLTensor *> input_tensors;
-  for (auto &ifm_ind : input_indexes)
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
+  std::vector<const ::arm_compute::ICLTensor *> input_tensors;
+  for (const auto &ifm_ind : input_indexes)
     input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
 
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (input_indexes.size() < 2)
   {
-    fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
-                                                        output_tensor->handle());
+    ::arm_compute::ICLTensor *input_tesor =
+      _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
+
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tesor, output_tensor->handle());
   }
   else
   {
     const auto rank = _ctx.at(ofm_index).shape().rank();
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
-        acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
+      acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
     fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
-        input_tensors, output_tensor->handle(), fixed_axis);
+      input_tensors, output_tensor->handle(), fixed_axis);
   }
 
   _return_fn = asAclFunction(std::move(fn));
@@ -268,14 +284,17 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
   const auto activation = node.param().activation;
+  if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
+    throw std::runtime_error(
+      "KernelGenerator(acl_cl): FullyConnected 16x1Float32 weights is not supported.");
 
   auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
                                                 ::arm_compute::CLFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+    node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
+    std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -286,20 +305,20 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto keep_dims{node.param().keep_dims};
   const auto reduce_type = node.param().reduce_type;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
   const auto input_rank = _ctx.at(input_index).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = input_tensor->layout();
 
   std::unique_ptr<arm_compute::IFunction> fn;
   if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
     const auto acl_axes =
-        acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
+      acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
     fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
                                                               keep_dims, output_tensor->handle());
   }
@@ -308,8 +327,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
     const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
 
     fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
-        _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-        output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
   }
 
   _return_fn = asAclFunction(std::move(fn));
@@ -320,12 +339,12 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
          frontend_layout == backend_layout);
@@ -351,8 +370,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   (void)dims;
   (void)ndim;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
   auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
                                                                    output_tensor->handle());
   _return_fn = asAclFunction(std::move(fn));
@@ -365,12 +384,12 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
 
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      output_tensor->handle(), beta);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    output_tensor->handle(), beta);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -382,9 +401,9 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -416,7 +435,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
     {
       auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
                                                                  backend_layout)
-                      .value();
+                    .value();
 
       int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
       starts[axis] = begin_value;
@@ -436,7 +455,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   }
 
   auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
-      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -449,9 +468,9 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -491,7 +510,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
     {
       auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
                                                                  backend_layout)
-                      .value();
+                    .value();
 
       int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
       starts[axis] = start_value;
@@ -510,7 +529,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank,
                                                          frontend_layout, backend_layout);
   const auto shrink_axis_mask = acl_common::ReorderBits<int32_t>(
-      node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
+    node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
 
   ::arm_compute::Coordinates starts_set;
   ::arm_compute::Coordinates ends_set;
@@ -523,9 +542,22 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
     strides_set.set(i, strides[i]);
   }
 
+  // Disable applied dim_correction
+  if (inputData_tensor->num_dimensions() != inputData_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    acl_common::disableDimCorrection(inputData_tensor);
+  }
+
   auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
-      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
-      begin_mask, end_mask, shrink_axis_mask);
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+    begin_mask, end_mask, shrink_axis_mask);
+
+  // Revert disabling applied dim_correction
+  if (inputData_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(inputData_tensor);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -534,22 +566,47 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 {
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
-  const auto &perm{node.param().perm};
+  const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
 
   const auto rank = _ctx.at(ifm_idx).shape().rank();
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
-  const auto frontend_layout = _current_op_seq_layout;
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
 
-  std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
-  // Reversed
-  auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
-      rank, pv, frontend_layout, backend_layout);
+  const auto &perms = _ctx.at(perm_idx);
+  std::vector<int32_t> pv;
+  if (perms.shape() == ir::Shape{0})
+  {
+    pv.resize(rank);
+    std::iota(pv.begin(), pv.end(), 0);
+    std::reverse(pv.begin(), pv.end());
+  }
+  else
+  {
+    pv = _ctx.at(perm_idx).asVector<int32_t>();
+  }
 
-  auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
-                                                                ofm_tensor->handle(), backend_pv);
+  std::unique_ptr<arm_compute::IFunction> fn;
+  if (rank == 1)
+  {
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
+  }
+  else if (rank == 2)
+  {
+    assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
+    fn = acl_common::generateLayer<arm_compute::CLTranspose>(ifm_tensor->handle(),
+                                                             ofm_tensor->handle());
+  }
+  else
+  {
+    auto backend_pv =
+      acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
+
+    fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), backend_pv);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -559,14 +616,14 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
-  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
-      node.param().op_type, node.param().alpha, node.param().beta);
+  const ::arm_compute::ActivationLayerInfo act_info =
+    acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
 
   auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -577,9 +634,9 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
 
   std::unique_ptr<arm_compute::IFunction> fn;
   switch (node.param().op_type)
@@ -587,26 +644,26 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
     {
       fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
-          arm_compute::BinaryLogicalOperation::AND);
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+        arm_compute::BinaryLogicalOperation::AND);
       break;
     }
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
     {
       fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
       break;
     }
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
     {
       fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
       break;
     }
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
     {
       fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
       break;
     }
     default:
@@ -626,8 +683,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   std::unique_ptr<arm_compute::IFunction> fn;
   switch (node.param().op_type)
@@ -635,10 +692,10 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
     case ir::operation::ElementwiseUnary::Type::ABS:
     {
       const ::arm_compute::ActivationLayerInfo act_info{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
 
       fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
-          input_tensor->handle(), output_tensor->handle(), act_info);
+        input_tensor->handle(), output_tensor->handle(), act_info);
       break;
     }
     case ir::operation::ElementwiseUnary::Type::CAST:
@@ -647,13 +704,17 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
       {
         fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
                                                             output_tensor->handle());
-        ;
+      }
+      else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
+      {
+        fn = acl_common::generateLayer<arm_compute::CLCastBool>(input_tensor->handle(),
+                                                                output_tensor->handle());
       }
       else
       {
         // TODO Support converting float to int32 as round down
         fn = acl_common::generateLayer<arm_compute::CLCast>(
-            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+          input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
       }
       break;
     }
@@ -696,10 +757,10 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
     case ir::operation::ElementwiseUnary::Type::SQRT:
     {
       const ::arm_compute::ActivationLayerInfo act_info{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
 
       fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
-          input_tensor->handle(), output_tensor->handle(), act_info);
+        input_tensor->handle(), output_tensor->handle(), act_info);
       break;
     }
     default:
@@ -719,8 +780,8 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
                                                                    output_tensor->handle());
@@ -735,19 +796,19 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
-  auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index);
+  auto beta_tensor = _tensor_reg->getAclTensor(beta_index);
   auto epsilon = node.param().epsilon;
   auto activation = node.param().activation;
 
   auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
-      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
-      epsilon);
+    ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+    epsilon);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::LSTM &node)
@@ -764,13 +825,63 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
 
   const auto comparison_type = node.param().comparison_type;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
-  auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
-      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-      (arm_compute::ComparisonOperation)comparison_type);
+    input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+    (arm_compute::ComparisonOperation)comparison_type);
+
+  _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::OneHot &node)
+{
+  const auto output_idx{node.getOutputs().at(0)};
+  const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
+  const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
+  const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
+  const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
+  const auto depth = _ctx.at(depth_idx).asScalar<int32_t>();
+  assert(depth > 0);
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_idx);
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
+  auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
+
+  const size_t output_rank = _ctx.at(output_idx).shape().rank();
+  const auto frontend_layout = _current_layout;
+  const auto backend_layout = output_tensor->layout();
+  int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
+  axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
+
+  if (output_tensor->num_dimensions() != output_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and output_tensor is applied dim_correction
+    acl_common::disableDimCorrection(output_tensor);
+  }
+
+  std::unique_ptr<::arm_compute::IFunction> fn;
+  const auto &offvalue = _ctx.at(offvalue_idx);
+  if (offvalue.isConstant())
+  {
+    fn = acl_common::generateLayer<arm_compute::CLOneHot>(
+      indices_tensor->handle(), onvalue_tensor->handle(), output_tensor->handle(),
+      acl_common::asPixelValue(offvalue), static_cast<uint32_t>(depth), axis);
+  }
+  else
+  {
+    auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
+    fn = acl_common::generateLayer<arm_compute::CLOneHot>(
+      indices_tensor->handle(), onvalue_tensor->handle(), offvalue_tensor->handle(),
+      output_tensor->handle(), static_cast<uint32_t>(depth), axis);
+  }
+
+  if (output_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(output_tensor);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -786,41 +897,39 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : node.getInputs())
     input_indexes.emplace_back(input_index);
 
-  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index)->handle();
   std::vector<arm_compute::ICLTensor *> inputs;
   for (const auto &input_index : input_indexes)
     inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
+  const auto frontend_layout = _current_layout;
+  const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
 
   if (axis < 0)
     axis += output_rank;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
 
   // Disable applied dim_correction
-  std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
   for (const auto &input_index : input_indexes)
   {
-    size_t input_rank = _ctx.at(input_index).shape().rank();
     const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
-    orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
-    assert(input_rank == input_tensor->num_dimensions());
-    if (input_rank != input_tensor->info()->num_dimensions())
+    if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
     {
-      // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
-          _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
+      // This means that high dimension's value is 1 and input tensor is applied dim_correction
+      acl_common::disableDimCorrection(input_tensor);
     }
   }
 
   auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
 
   // Revert disabling applied dim_correction
-  assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
-  for (size_t i = 0; i < inputs.size(); ++i)
+  for (const auto &input_index : input_indexes)
   {
-    inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
+    if (input_tensor->dimension(0) == 1)
+    {
+      acl_common::enableDimCorrection(input_tensor);
+    }
   }
 
   _return_fn = asAclFunction(std::move(fn));
@@ -829,15 +938,14 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
 void KernelGenerator::visit(const ir::operation::Pool2D &node)
 {
   auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_reg, _current_op_seq_layout,
-      acl_common::convertPoolType(node.param().op_type));
+    node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
 
   const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   const auto activation = node.param().activation;
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(raw_fn)),
+    ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -845,8 +953,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(0)};
   const auto permute_type = node.getPermuteType();
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
   const auto rank = _ctx.at(ofm_idx).shape().rank();
   assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
 
@@ -879,16 +987,16 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLScale>(
-      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
-      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
-      ::arm_compute::SamplingPolicy::TOP_LEFT);
+    ifm_tensor->handle(), ofm_tensor->handle(),
+    ::arm_compute::ScaleKernelInfo{
+      ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
+      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -896,16 +1004,16 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLScale>(
-      ifm_tensor->handle(), ofm_tensor->handle(),
+    ifm_tensor->handle(), ofm_tensor->handle(),
+    ::arm_compute::ScaleKernelInfo{
       ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
-      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT});
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -914,35 +1022,35 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
 {
   const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
   const auto hidden_state_out_index{
-      node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+    node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
 
   const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
   const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
   const auto recurrent_weights_index{
-      node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+    node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
   const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
   const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
 
   const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
 
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
-  auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
-  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
-  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
   auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
 
   auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
-      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+    hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
   _return_fn = asAclFunction(std::move(copy_layer));
 
   auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
-      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+    hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
   _return_fn = asAclFunction(std::move(fn));
 }
 
@@ -951,20 +1059,20 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
   const auto block_size_index{
-      node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
-  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
 
   assert(_ctx.at(block_size_index).data());
   assert(_ctx.at(paddings_index).data());
 
   auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
-      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-      ofm_tensor->handle());
+    ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+    ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -976,11 +1084,11 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
   auto block_size = node.param().block_size;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+    ifm_tensor->handle(), ofm_tensor->handle(), block_size);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -991,12 +1099,12 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
   const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
   const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
-  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+  auto values_tensor = _tensor_reg->getAclTensor(values_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
-      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+    values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1015,19 +1123,19 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   // TODO Support optional constant dimension that normalization would be performed on
   const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
   int32_t radius =
-      2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
-  float alpha = 1.0f;                            // In the implementation to make alpha_ become 1
-  float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
-  float bias = 0.0f;                             // Don't offset the reduction.
+    2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+  float alpha = 1.0f;                          // In the implementation to make alpha_ become 1
+  float beta = 0.5f;                           // pow(reduction, -0.5) = 1 / sqrt(reduction)
+  float bias = 0.0f;                           // Don't offset the reduction.
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                radius, alpha, beta, bias, false);
 
   auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1041,16 +1149,16 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
 
-  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
-  auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
-  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
+  auto values_tensor = _tensor_reg->getAclTensor(values_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
-      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-      output_tensor->handle(), hits_tensor->handle());
+    lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+    output_tensor->handle(), hits_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1061,12 +1169,12 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
   const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
   const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
-      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+    ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1077,9 +1185,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
   const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
   const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
 
   const auto stride = node.param().stride;
 
@@ -1092,20 +1200,20 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
   if (node.param().padding.type == ir::PaddingType::VALID)
   {
     invalid_horizontal =
-        ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+      ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
     invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
   }
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
 
   const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
 
   auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
-      ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
-      invalid_vertical);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+    ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+    invalid_vertical);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1116,12 +1224,12 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
-      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+    lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1130,7 +1238,7 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
 {
   const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
   const auto outputIndices_index{
-      node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
+    node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
 
   const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
 
@@ -1140,12 +1248,12 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
 
   const auto k = node.param().k;
 
-  auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
-  auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(outputValues_index);
+  auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index);
+  auto input_tensor = _tensor_reg->getAclTensor(inputData_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
-      input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
+    input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1162,9 +1270,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
   const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
 
   // NOTE The frontend layout and backend layout must be the same for this operation.
   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1178,7 +1286,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   UNUSED_RELEASE(backend_layout);
   assert(backend_layout == ifm_tensor->layout());
   assert(backend_layout == indices_tensor->layout());
-  assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+  assert(ifm_rank < 4 || _current_layout == backend_layout);
 
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
@@ -1187,61 +1295,62 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   assert(k == indices_tensor->num_dimensions());
 
   // Disable applied dim_correction
-  const auto orig_ifm_acl_tensor_shape = ifm_tensor->info()->tensor_shape();
   if (n != ifm_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-    const auto ifm = _ctx.at(ifm_index);
-    ifm_tensor->info()->set_tensor_shape(
-        acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
+    acl_common::disableDimCorrection(ifm_tensor);
   }
-  const auto orig_indice_acl_tensor_shape = indices_tensor->info()->tensor_shape();
   if (k != indices_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and indices tensor is applied dim_correction
-    const auto indices = _ctx.at(indices_index);
-    indices_tensor->info()->set_tensor_shape(
-        acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
+    acl_common::disableDimCorrection(indices_tensor);
   }
 
   auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
-      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+    ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
 
   // Revert disabling applied dim_correction
-  ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
-  indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
+  if (ifm_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(ifm_tensor);
+  }
+  if (indices_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(indices_tensor);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
 
   auto ifm_shape = _ctx.at(ifm_index).shape();
   auto ofm_shape = _ctx.at(ofm_index).shape();
 
   assert((ifm_shape.rank() - 1) == ofm_shape.rank());
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
-  auto frontend_layout = _current_op_seq_layout;
+  auto frontend_layout = _current_layout;
   auto backend_layout = ifm_tensor->layout();
 
-  int axis_value = node.param().axis;
+  int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
   if (axis_value < 0)
   {
     axis_value += ifm_rank;
   }
 
   auto acl_axis =
-      acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
-  auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
-      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
-      ::arm_compute::ReductionOperation::ARG_IDX_MAX);
+    acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+  auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+                                             : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
+  auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
+    ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1250,21 +1359,21 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
 {
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{
-      node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+    node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
 
   auto radius = node.param().radius;
   auto alpha = node.param().alpha;
   auto beta = node.param().beta;
   auto bias = node.param().bias;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(
-      ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+    ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
 
   auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1277,11 +1386,11 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
   auto block_size = node.param().block_size;
   assert(block_size > 0);
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
-      input_tensor->handle(), output_tensor->handle(), block_size);
+    input_tensor->handle(), output_tensor->handle(), block_size);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1289,28 +1398,87 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
 void KernelGenerator::visit(const ir::operation::Split &node)
 {
   const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
 
   assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+  if (!_ctx.at(axis_index).isConstant())
+  {
+    throw std::runtime_error("Non-constant axis_index NYI for acl_cl backend");
+  }
 
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
   std::vector<ir::OperandIndex> output_indexes;
   for (const auto &output : node.getOutputs())
     output_indexes.emplace_back(output);
 
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   std::vector<arm_compute::ICLTensor *> output_tensors;
   for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
-  auto axis = node.param().axis;
+  auto axis = _ctx.at(axis_index).asScalar<int32_t>();
   if (axis < 0)
     axis += ifm_rank;
   axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
 
   auto fn =
-      acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
+    acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
+
+  _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::SplitV &node)
+{
+  const auto ifm_index{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
+  const auto size_split_index{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
+  const auto split_dim_index{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
+
+  assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+
+  const size_t ifm_rank = _ctx.at(ifm_index).shape().rank();
+  std::vector<ir::OperandIndex> output_indexes;
+  for (const auto &output : node.getOutputs())
+    output_indexes.emplace_back(output);
+
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto size_split_tensor = _tensor_reg->getAclTensor(size_split_index);
+
+  std::vector<arm_compute::ICLTensor *> output_tensors;
+  for (const auto &ofm_ind : output_indexes)
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
+
+  auto fn = std::make_unique<arm_compute::CLSplitVEx>();
+  const auto &split_dim_op = _ctx.at(split_dim_index);
+  if (split_dim_op.isConstant())
+  {
+    int32_t split_dim = split_dim_op.asScalar<int32_t>();
+    uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
+    const auto frontend_layout = _current_layout;
+    const auto backend_layout = ifm_tensor->layout();
+
+    if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
+    {
+      // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
+      acl_common::disableDimCorrection(ifm_tensor);
+    }
+
+    split_dim_revised =
+      acl_common::ToARMComputeAxis(ifm_rank, split_dim_revised, frontend_layout, backend_layout)
+        .value();
+    fn->configure(ifm_tensor->handle(), size_split_tensor->handle(), split_dim_revised,
+                  output_tensors, node.param().num_splits);
+
+    if (ifm_tensor->dimension(0) == 1)
+    {
+      acl_common::enableDimCorrection(ifm_tensor);
+    }
+  }
+  else
+  {
+    throw std::runtime_error("Non-constant split_dim NYI for acl_cl backend");
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1326,34 +1494,32 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : node.getOutputs())
     output_indexes.emplace_back(output_index);
 
-  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
   std::vector<arm_compute::ICLTensor *> outputs;
   for (const auto &output_index : output_indexes)
     outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
+  const auto frontend_layout = _current_layout;
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
   if (axis < 0)
     axis += input_rank;
   axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
 
   // Disable applied dim_correction
-  std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
-  for (const auto &output_index : output_indexes)
+  if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
   {
-    size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
-    orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
-    assert(output_rank == output_tensor->num_dimensions());
-    if (output_rank != output_tensor->info()->num_dimensions())
-    {
-      // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
-          _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
-    }
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    acl_common::disableDimCorrection(input_tensor);
   }
 
-  auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
+  auto fn =
+    acl_common::generateLayer<arm_compute::CLUnstack>(input_tensor->handle(), outputs, axis);
+
+  // Revert disabling applied dim_correction
+  if (input_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(input_tensor);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1370,14 +1536,14 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
 
   auto input_type = _ctx.at(input_index).typeInfo();
   auto data_type = acl_common::asDataType(input_type.type());
-  auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
+  auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point());
   const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
 
-  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
-  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index)->handle();
+  auto output = _tensor_reg->getAclTensor(output_index)->handle();
 
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
+  const auto frontend_layout = _current_layout;
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
 
   ::arm_compute::PaddingList padding_list;
   padding_list.resize(rank);
@@ -1386,26 +1552,31 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
     const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
 
     const auto axis =
-        acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
+      acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
     padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
   }
 
   // Disable applied dim_correction
-  size_t input_rank = _ctx.at(input_index).shape().rank();
   const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
-  assert(input_rank == input_tensor->num_dimensions());
-  if (input_rank != input_tensor->info()->num_dimensions())
+  if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
   {
-    // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-    input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
-        _ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    acl_common::disableDimCorrection(input_tensor);
   }
 
   auto fn =
-      acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
-
-  // Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
-  // It would produce a mistach of result
+    acl_common::generateLayer<arm_compute::CLPadLayerEx>(input, output, padding_list, pixel_value);
+
+  // NOTE Do not revert disabling applied dim_correction for 4D.
+  // It would produce a mistach of result by incorrect offset_first_element in
+  // ICLKernel::add_tensor_argument<3>().
+  // We have to disable applied dim_correction and not to revert enabling for the kernel that slices
+  // 4D to 3D because slicing arm_compute::Window can causes incorrect offset_first_element if the
+  // used tensor is 4D and the tensor's high dimention is 1
+  if (input_tensor->num_dimensions() < 4 && input_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(input_tensor);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1415,11 +1586,11 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
+    ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1429,11 +1600,35 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
+    ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
+
+  _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Reverse &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::Reverse::Input::AXIS)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto axis_tensor = _tensor_reg->getAclTensor(axis_index);
+
+  // WORKAROUND: acl-cl backend only allow U32 type for axis
+  //             ConstantInitializer will resolve S32 type to U32 type
+  if (_ctx.at(axis_index).isConstant() &&
+      (axis_tensor->handle()->info()->data_type() == arm_compute::DataType::S32))
+  {
+    axis_tensor->handle()->info()->set_data_type(arm_compute::DataType::U32);
+  }
+
+  auto fn = acl_common::generateLayer<arm_compute::CLReverse>(
+    ifm_tensor->handle(), ofm_tensor->handle(), axis_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index d188d6d83..dc7285349 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -17,9 +17,8 @@
 #ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
 
-#include <backend/IKernelGenerator.h>
+#include <backend/basic/KernelGeneratorBase.h>
 
-#include "ir/Operands.h"
 #include "TensorBuilder.h"
 #include "AclTensorRegistry.h"
 #include "TensorManager.h"
@@ -31,65 +30,69 @@ namespace backend
 namespace acl_cl
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public basic::KernelGeneratorBase
 {
 public:
-  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
                   const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
-  void visit(const ir::OpSequence &) override;
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
+  void visit(const ir::operation::ArgMinMax &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
   void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::ConvertFp16ToFp32 &) override;
+  void visit(const ir::operation::ConvertFp32ToFp16 &) override;
+  void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::Reshape &) override;
-  void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Softmax &) override;
-  void visit(const ir::operation::Slice &) override;
-  void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::Transpose &) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
+  void visit(const ir::operation::EmbeddingLookup &) override;
   void visit(const ir::operation::ExpandDims &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::Gather &) override;
+  void visit(const ir::operation::HashtableLookup &) override;
   void visit(const ir::operation::InstanceNorm &) override;
-  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::L2Normalization &) override;
+  void visit(const ir::operation::LocalResponseNormalization &) override;
   void visit(const ir::operation::LSTM &) override;
+  void visit(const ir::operation::OneHot &) override;
   void visit(const ir::operation::Pack &) override;
-  void visit(const ir::operation::Pool2D &) override;
+  void visit(const ir::operation::Pad &) override;
   void visit(const ir::operation::Permute &) override;
+  void visit(const ir::operation::Pool2D &) override;
+  void visit(const ir::operation::PReLU &) override;
+  void visit(const ir::operation::Reduce &) override;
+  void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
   void visit(const ir::operation::ResizeNearestNeighbor &) override;
+  void visit(const ir::operation::Reverse &) override;
   void visit(const ir::operation::RNN &) override;
+  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::L2Normalization &) override;
-  void visit(const ir::operation::HashtableLookup &) override;
-  void visit(const ir::operation::PReLU &) override;
-  void visit(const ir::operation::TransposeConv &) override;
+  void visit(const ir::operation::Split &) override;
+  void visit(const ir::operation::SplitV &) override;
   void visit(const ir::operation::SquaredDifference &) override;
+  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::StridedSlice &) override;
   void visit(const ir::operation::TopKV2 &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::LocalResponseNormalization &) override;
-  void visit(const ir::operation::DepthToSpace &) override;
-  void visit(const ir::operation::Split &) override;
+  void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::ConvertFp32ToFp16 &) override;
-  void visit(const ir::operation::ConvertFp16ToFp32 &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
+  const ir::Layout _current_layout;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
-  ir::Layout _current_op_seq_layout;
 };
 
 } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
index 9134d3fb8..0f779f483 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -16,12 +16,12 @@
 
 #include "Optimizer.h"
 
-#include "ParentInfo.h"
+#include <AclSubTensorAnalyzer.h>
 
-#include <cassert>
 #include <compiler/LoweredGraph.h>
 #include <util/logging.h>
-#include "AclSubTensorAnalyzer.h"
+
+#include <cassert>
 
 namespace onert
 {
@@ -31,8 +31,8 @@ namespace acl_cl
 {
 
 Optimizer::Optimizer(BackendContext *context)
-    : _context{context},
-      _tensor_builder{std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
+  : _context{context}, _tensor_builder{
+                         std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
 {
   assert(context);
 }
@@ -42,12 +42,12 @@ void Optimizer::optimize()
   // Concat elimination (build subtensor info)
   {
     acl_common::AclSubTensorAnalyzer sa{*_context->graph()};
-    for (auto op_info : _context->operation_list())
-    {
-      auto &op = _context->graph()->operations().at(op_info.index);
-      sa.setLayout(op_info.layout);
-      op.accept(sa);
-    }
+    sa.setUsePadding();
+    _context->graph()->operations().iterate(
+      [&](const ir::OperationIndex &, const ir::IOperation &op) {
+        sa.setLayout(_context->graph()->layout());
+        op.accept(sa);
+      });
 
     _tensor_builder->parent_map(sa.releaseParentMap());
   }
diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h
index 18d38ec1b..ad5154860 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.h
+++ b/runtime/onert/backend/acl_cl/Optimizer.h
@@ -17,8 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
 #define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
 
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
 #include "TensorBuilder.h"
 
 namespace onert
@@ -28,12 +27,12 @@ namespace backend
 namespace acl_cl
 {
 
-class Optimizer : public IOptimizer
+class Optimizer
 {
 public:
   Optimizer(BackendContext *context);
 
-  void optimize() override;
+  void optimize();
 
 private:
   BackendContext *_context;
diff --git a/runtime/onert/backend/acl_cl/TensorBuilder.h b/runtime/onert/backend/acl_cl/TensorBuilder.h
index 91502d39a..5492929fe 100644
--- a/runtime/onert/backend/acl_cl/TensorBuilder.h
+++ b/runtime/onert/backend/acl_cl/TensorBuilder.h
@@ -30,7 +30,7 @@ namespace acl_cl
 {
 
 using TensorBuilder =
-    acl_common::AclTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+  acl_common::AclTensorBuilder<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
 
 } // namespace acl_cl
 } // namespace backend
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h
index ab295dbec..2860f51f3 100644
--- a/runtime/onert/backend/acl_cl/TensorManager.h
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -41,20 +41,20 @@ namespace acl_cl
 {
 
 using MemoryManager =
-    acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+  acl_common::AclMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
 
-using LinearMemoryManager = acl_common::AclLinearMemoryManager<
-    operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
-    ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
-    ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator,
-    ::arm_compute::MemoryGroup>;
+using LinearMemoryManager =
+  acl_common::AclLinearMemoryManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
+                                     ::arm_compute::MemoryManagerOnDemand,
+                                     ::arm_compute::PoolManager, ::arm_compute::BlobLifetimeManager,
+                                     ::arm_compute::CLBufferAllocator, ::arm_compute::MemoryGroup>;
 
 using InternalBufferManager = acl_common::AclInternalBufferManager<
-    ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
-    ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>;
+  ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+  ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator>;
 
 using TensorManager =
-    acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
+  acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
 
 inline TensorManager *createTensorManager(bool is_linear_executor)
 {
diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc
index 88378b13a..82cbde02f 100644
--- a/runtime/onert/backend/acl_cl/acl_cl.cc
+++ b/runtime/onert/backend/acl_cl/acl_cl.cc
@@ -14,20 +14,11 @@
  * limitations under the License.
  */
 
-#include <util/logging.h>
-
 #include "Backend.h"
 
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'acl_cl' loaded\n";
-  return new onert::backend::acl_cl::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc b/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc
index 234229787..2c4357349 100644
--- a/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc
+++ b/runtime/onert/backend/acl_cl/operand/CLSubTensor.cc
@@ -27,9 +27,8 @@ namespace operand
 
 CLSubTensor::CLSubTensor(ICLTensor *parent, const arm_compute::TensorShape &tensor_shape,
                          const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
-    : _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(parent->handle(), tensor_shape,
-                                                                coords, extend_parent)),
-      _rank{rank}
+  : ICLTensor{rank}, _cl_sub_tensor(std::make_shared<arm_compute::CLSubTensor>(
+                       parent->handle(), tensor_shape, coords, extend_parent))
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/backend/acl_cl/operand/CLSubTensor.h b/runtime/onert/backend/acl_cl/operand/CLSubTensor.h
index fedc17fc2..0a26e4822 100644
--- a/runtime/onert/backend/acl_cl/operand/CLSubTensor.h
+++ b/runtime/onert/backend/acl_cl/operand/CLSubTensor.h
@@ -39,19 +39,16 @@ public:
               const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
 
 public:
-  size_t num_dimensions() const final { return _rank; }
-
-public:
   const arm_compute::CLSubTensor *handle() const override;
   arm_compute::CLSubTensor *handle() override;
 
 public:
   // This method is used to prevent the use of memcpy for SubTensor
   bool has_padding() const override { return true; }
+  bool is_subtensor() const final { return true; }
 
 private:
   std::shared_ptr<arm_compute::CLSubTensor> _cl_sub_tensor;
-  size_t _rank;
 };
 
 } // namespace operand
diff --git a/runtime/onert/backend/acl_cl/operand/CLTensor.cc b/runtime/onert/backend/acl_cl/operand/CLTensor.cc
index f37edff51..38ce4647f 100644
--- a/runtime/onert/backend/acl_cl/operand/CLTensor.cc
+++ b/runtime/onert/backend/acl_cl/operand/CLTensor.cc
@@ -32,7 +32,7 @@ namespace operand
 {
 
 CLTensor::CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
-    : _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _rank{rank}, _num_uses{num_uses}
+  : ICLTensor{rank}, _cl_tensor(std::make_shared<arm_compute::CLTensor>()), _num_uses{num_uses}
 {
   allocator()->init(info);
 }
diff --git a/runtime/onert/backend/acl_cl/operand/CLTensor.h b/runtime/onert/backend/acl_cl/operand/CLTensor.h
index c92208803..487d04662 100644
--- a/runtime/onert/backend/acl_cl/operand/CLTensor.h
+++ b/runtime/onert/backend/acl_cl/operand/CLTensor.h
@@ -41,9 +41,6 @@ public:
   CLTensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
 
 public:
-  size_t num_dimensions() const final { return _rank; }
-
-public:
   const arm_compute::CLTensor *handle() const override;
   arm_compute::CLTensor *handle() override;
   size_t num_uses() const { return _num_uses; }
@@ -61,7 +58,6 @@ public:
 
 private:
   std::shared_ptr<arm_compute::CLTensor> _cl_tensor;
-  size_t _rank;
   size_t _num_uses;
 };
 
diff --git a/runtime/onert/backend/acl_cl/operand/ICLTensor.cc b/runtime/onert/backend/acl_cl/operand/ICLTensor.cc
index b400ef9cf..2cee0b474 100644
--- a/runtime/onert/backend/acl_cl/operand/ICLTensor.cc
+++ b/runtime/onert/backend/acl_cl/operand/ICLTensor.cc
@@ -17,6 +17,7 @@
 #include "ICLTensor.h"
 
 #include <arm_compute/runtime/CL/CLScheduler.h>
+#include <arm_compute/core/CL/OpenCL.h>
 
 namespace onert
 {
@@ -39,6 +40,20 @@ void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
   fn(*this);
   unmap(queue);
 }
+
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool blocking)
+{
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+  queue.enqueueWriteBuffer(handle()->cl_buffer(), blocking ? CL_TRUE : CL_FALSE, 0,
+                           info()->total_size(), ptr);
+}
+
+void ICLTensor::enqueueReadBuffer(void *ptr, bool blocking)
+{
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+  queue.enqueueReadBuffer(handle()->cl_buffer(), blocking ? CL_TRUE : CL_FALSE, 0,
+                          info()->total_size(), ptr);
+}
 } // namespace operand
 } // namespace acl_cl
 } // namespace backend
diff --git a/runtime/onert/backend/acl_cl/operand/ICLTensor.h b/runtime/onert/backend/acl_cl/operand/ICLTensor.h
index 5427000f9..51152a318 100644
--- a/runtime/onert/backend/acl_cl/operand/ICLTensor.h
+++ b/runtime/onert/backend/acl_cl/operand/ICLTensor.h
@@ -33,11 +33,15 @@ namespace operand
 class ICLTensor : public acl_common::IACLTensor
 {
 public:
+  ICLTensor(size_t rank) : IACLTensor{rank} {}
   const arm_compute::ICLTensor *handle() const override = 0;
   arm_compute::ICLTensor *handle() override = 0;
 
 public:
   void access(const std::function<void(ITensor &tensor)> &fn) final;
+  bool needMemoryMap() const final { return true; }
+  void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
+  void enqueueReadBuffer(void *ptr, bool blocking = true) final;
 
 private:
   void map(cl::CommandQueue &q, bool blocking = true) { return handle()->map(q, blocking); }
diff --git a/runtime/onert/backend/acl_common/AclActivationBuilder.h b/runtime/onert/backend/acl_common/AclActivationBuilder.h
index bfdea6ea0..5d92a7856 100644
--- a/runtime/onert/backend/acl_common/AclActivationBuilder.h
+++ b/runtime/onert/backend/acl_common/AclActivationBuilder.h
@@ -49,7 +49,7 @@ std::unique_ptr<exec::IFunction>
 AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU(T_Tensor *ifm_alloc)
 {
   const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+    ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
 
   auto fn = std::make_unique<T_ActivationLayer>();
 
@@ -61,10 +61,10 @@ AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU(
 template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
 std::unique_ptr<exec::IFunction>
 AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU1(
-    T_Tensor *ifm_alloc)
+  T_Tensor *ifm_alloc)
 {
   const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+    ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
 
   auto fn = std::make_unique<T_ActivationLayer>();
 
@@ -76,10 +76,10 @@ AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU1
 template <typename T_Tensor, typename T_ActivationLayer, typename T_ExecFunction>
 std::unique_ptr<exec::IFunction>
 AclActivationBuilder<T_Tensor, T_ActivationLayer, T_ExecFunction>::generateReLU6(
-    T_Tensor *ifm_alloc)
+  T_Tensor *ifm_alloc)
 {
   const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+    ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
 
   auto fn = std::make_unique<T_ActivationLayer>();
 
diff --git a/runtime/onert/backend/acl_common/AclBackendContext.h b/runtime/onert/backend/acl_common/AclBackendContext.h
new file mode 100644
index 000000000..b8d027476
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclBackendContext.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <ir/Index.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandIndexSequence.h>
+#include <util/logging.h>
+
+#include <cl_common/BackendContext.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+// TODO Find better way to handle common code (reduce template)
+template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator,
+          typename T_Optimizer>
+class AclBackendContext
+  : public onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer,
+                                                     T_KernelGenerator>
+{
+public:
+  AclBackendContext(const Backend *backend, ContextData &&data,
+                    std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                    std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr,
+                    std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr,
+                    std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer,
+                                                T_KernelGenerator>(
+        backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen)
+  {
+    // DO NOTHING
+  }
+
+  ITensorRegistry *genTensors() override
+  {
+    optimizer->optimize();
+
+    this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      if (this->external_operands().contains(ind))
+        return;
+
+      const auto frontend_layout = this->graph()->layout();
+      const auto backend_layout = this->operand_layouts().at(ind);
+      ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                   obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+      this->tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+    });
+
+    // TODO Get compiler options from compiler, and use it rather than getting it from Env
+    if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+    {
+      this->planTensors();
+    }
+    else
+    {
+      // For the executors that does not have fixed linear execution order:
+      // To make tensors never be deallocated, this is a workaround to use static memory planner
+      this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+        if (this->tensor_builder->isRegistered(ind))
+          this->tensor_builder->notifyFirstUse(ind);
+      });
+    }
+
+    this->tensor_builder->prepare();
+
+    return this->tensor_registry.get();
+  }
+
+protected:
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout) override
+  {
+    this->tensor_builder->registerTensorInfo(ind, info, backend_layout);
+  }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<T_Optimizer> optimizer;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
index 6ad5b7b69..9748ab111 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -25,7 +25,7 @@ namespace acl_common
 
 AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
                                                const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+  : _operands{operands}, _tensor_reg{tensor_reg}, _current_layout{ir::Layout::UNKNOWN}
 {
   // DO NOTHING
 }
@@ -35,8 +35,11 @@ void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint
   assert(node.getInputs().size() > index);
 
   const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerCopyInitializer(input_index, input_obj);
+  if (input_index.valid())
+  {
+    const auto &input_obj = _operands.at(input_index);
+    registerCopyInitializer(input_index, input_obj);
+  }
 }
 
 void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
@@ -123,6 +126,94 @@ void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
   permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
 }
 
+// NOTE Workaround for 16b float type. Here, this is enough since only the size of bytes matters.
+using float16 = uint16_t;
+
+void AclConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
+                                                     const ir::Operand &obj)
+{
+  // For only CONSTANTS
+  // TODO Add to check if tensor has been allocated
+  if (!obj.isConstant())
+    return;
+
+  const auto type = obj.typeInfo().type();
+  using ir::DataType;
+
+  switch (type)
+  {
+    case DataType::FLOAT32:
+      _init_map[index] = copyInit<float>;
+      break;
+    case DataType::INT32:
+      _init_map[index] = copyInit<int32_t>;
+      break;
+    case DataType::UINT32:
+      _init_map[index] = copyInit<uint32_t>;
+      break;
+    case DataType::BOOL8:
+    case DataType::QUANT_UINT8_ASYMM:
+      _init_map[index] = copyInit<uint8_t>;
+      break;
+    case DataType::QUANT_INT8_SYMM:
+    case DataType::QUANT_INT8_ASYMM:
+      _init_map[index] = copyInit<int8_t>;
+      break;
+    case DataType::FLOAT16:
+      _init_map[index] = copyInit<float16>;
+      break;
+    case DataType::INT64:
+      _init_map[index] = copyInit<int64_t>;
+      break;
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
+void AclConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
+                                                        const ir::Operand &obj)
+{
+  // For only CONSTANTS
+  // TODO Add to check if tensor has been allocated
+  if (!obj.isConstant())
+    return;
+
+  const auto type = obj.typeInfo().type();
+  using ir::DataType;
+  using namespace std::placeholders;
+
+  switch (type)
+  {
+    case DataType::FLOAT32:
+      _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
+      break;
+    case DataType::INT32:
+      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
+      break;
+    case DataType::UINT32:
+      _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout);
+      break;
+    case DataType::BOOL8:
+    case DataType::QUANT_UINT8_ASYMM:
+      _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout);
+      break;
+    case DataType::QUANT_INT8_SYMM:
+    case DataType::QUANT_INT8_ASYMM:
+      _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout);
+      break;
+    case DataType::FLOAT16:
+      _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout);
+      break;
+    case DataType::INT64:
+      _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout);
+      break;
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
 } // namespace acl_common
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
index 52f4c54cf..65659ad50 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -17,10 +17,19 @@
 #ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
 
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
 #include "AclTensorRegistry.h"
 
+#include <unordered_map>
+#include <functional>
+
+#include <ir/Coordinates.h>
+#include <ir/Layout.h>
+#include <ir/Operand.h>
+#include <ir/Operands.h>
+#include <ir/OperationVisitor.h>
+#include <backend/ITensorRegistry.h>
+#include <util/logging.h>
+
 namespace onert
 {
 namespace backend
@@ -28,13 +37,179 @@ namespace backend
 namespace acl_common
 {
 
-class AclConstantInitializer : public IConstantInitializer
+template <typename T>
+static void Init(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj, const bool copy,
+                 const onert::ir::Layout frontend_layout = onert::ir::Layout::UNKNOWN)
+{
+  const auto shape = model_obj.shape();
+  assert(model_obj.data());
+  auto base = reinterpret_cast<const T *>(model_obj.data()->base());
+
+  obj.access([&](::onert::backend::ITensor &tensor) {
+    switch (shape.rank())
+    {
+      case 0:
+      {
+        assert(model_obj.data()->size() == sizeof(T));
+        const auto value = *reinterpret_cast<const T *>(base);
+        T *into = reinterpret_cast<T *>(tensor.buffer());
+        *into = value;
+        break;
+      }
+      case 1:
+      {
+        auto vec_size = shape.dim(0);
+        for (int32_t n = 0; n < vec_size; ++n)
+        {
+          const T *from = reinterpret_cast<const T *>(base) + n;
+          const auto value = *from;
+
+          T *into = reinterpret_cast<T *>(tensor.buffer()) + n;
+
+          *into = value;
+        }
+        break;
+      }
+      case 2:
+      {
+        const int32_t copy_len = shape.dim(1);
+
+        for (auto i = 0; i < shape.dim(0); ++i)
+        {
+          ::onert::ir::Coordinates coords{i, 0};
+          memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len,
+                 copy_len * sizeof(T));
+        }
+        break;
+      }
+      case 3:
+      {
+        const int32_t width = shape.dim(1);
+        const int32_t copy_len = shape.dim(2);
+
+        for (auto i = 0; i < shape.dim(0); ++i)
+        {
+          for (auto j = 0; j < shape.dim(1); ++j)
+          {
+            ::onert::ir::Coordinates coords{i, j, 0};
+            memcpy(tensor.buffer() + tensor.calcOffset(coords),
+                   base + i * width * copy_len + j * copy_len, copy_len * sizeof(T));
+          }
+        }
+        break;
+      }
+      case 4:
+      {
+        const int32_t height = shape.dim(1);
+        const int32_t width = shape.dim(2);
+        const int32_t copy_len = shape.dim(3);
+        for (auto i = 0; i < shape.dim(0); ++i)
+        {
+          for (auto j = 0; j < shape.dim(1); ++j)
+          {
+            for (auto k = 0; k < shape.dim(2); ++k)
+            {
+              if (copy)
+              {
+                ::onert::ir::Coordinates coords{i, j, k, 0};
+                memcpy(tensor.buffer() + tensor.calcOffset(coords),
+                       base + i * height * width * copy_len + j * width * copy_len + k * copy_len,
+                       copy_len * sizeof(T));
+              }
+              else
+              {
+                for (auto l = 0; l < shape.dim(3); ++l)
+                {
+                  const auto coords =
+                    ::onert::ir::convertCoordinates({i, j, k, l}, frontend_layout, tensor.layout());
+                  T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
+                  T value = *(base + i * height * width * copy_len + j * width * copy_len +
+                              k * copy_len + l);
+                  *into = value;
+                }
+              }
+            }
+          }
+        }
+        break;
+      }
+      default:
+        throw std::runtime_error{"Not yet supported"};
+    }
+  });
+}
+
+template <typename T>
+void copyInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
+{
+  Init<T>(model_obj, obj, true);
+}
+
+template <typename T>
+void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj,
+                 const onert::ir::Layout frontend_layout)
+{
+  const bool copy = frontend_layout == obj.layout();
+  Init<T>(model_obj, obj, copy, frontend_layout);
+}
+
+// Pre-defined initializer - fill reverse order
+template <typename T> void initReverseOrder(const ir::Operand &model_obj, backend::ITensor &obj)
+{
+  assert(model_obj.data());
+  const auto &shape = model_obj.shape();
+  const auto base = reinterpret_cast<const T *>(model_obj.data()->base());
+  assert(model_obj.shape().rank() == 1);
+  obj.access([&](ITensor &tensor) {
+    for (size_t i = 0; i < shape.num_elements(); ++i)
+    {
+      const T value = base[shape.num_elements() - i - 1];
+      T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)}));
+      *into = value;
+    }
+  });
+}
+
+class AclConstantInitializer : public ir::OperationVisitor
 {
 public:
+  void run()
+  {
+    assert(_tensor_reg);
+    for (const auto &it : _init_map)
+    {
+      const auto &ind = it.first;
+      const auto &fn = it.second;
+
+      const auto &model_obj = _operands.at(ind);
+      auto tensor_obj = _tensor_reg->getNativeITensor(ind);
+      assert(tensor_obj != nullptr);
+      fn(model_obj, *tensor_obj);
+      VERBOSE(FillOperandData) << "Fill data for operand " << ind << std::endl;
+    }
+    _init_map.clear();
+  }
+
+public:
   AclConstantInitializer(const ir::Operands &operands,
                          const std::shared_ptr<ITensorRegistry> &tensor_reg);
 
 public:
+  using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
+
+public:
+  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
+  {
+    registerPermuteInitializer(index, obj);
+  }
+  void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
+  void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
+
+public:
+  void setLayout(ir::Layout layout) { _current_layout = layout; }
+  bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
+
+public:
   void visit(const ir::operation::BatchToSpaceND &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
@@ -47,11 +222,11 @@ protected:
   void copyInputInitialize(const ir::Operation &node, uint32_t index);
   void permuteInputInitialize(const ir::Operation &node, uint32_t index);
 
-private:
-  std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
-
 protected:
+  const ir::Operands &_operands;
   std::shared_ptr<ITensorRegistry> _tensor_reg;
+  std::unordered_map<ir::OperandIndex, Initializer> _init_map;
+  ir::Layout _current_layout;
 };
 
 } // namespace acl_common
diff --git a/runtime/onert/backend/acl_common/AclInternalBufferManager.h b/runtime/onert/backend/acl_common/AclInternalBufferManager.h
index f893bb44b..cca5778d4 100644
--- a/runtime/onert/backend/acl_common/AclInternalBufferManager.h
+++ b/runtime/onert/backend/acl_common/AclInternalBufferManager.h
@@ -20,7 +20,6 @@
 #include <arm_compute/runtime/IMemoryManager.h>
 #include <cassert>
 #include <memory>
-#include <backend/IMemoryManager.h>
 
 namespace onert
 {
@@ -34,10 +33,13 @@ namespace acl_common
 /**
  * @brief Interface for InternalBufferManager which has ::arm_compute::IMemoryManager pointer
  */
-struct IInternalBufferManager : public backend::IMemoryManager
+struct IInternalBufferManager
 {
   virtual ~IInternalBufferManager() = default;
 
+  virtual void allocate(void) = 0;
+  virtual void deallocate(void) = 0;
+
   /**
    * @brief Get shared_ptr of ::arm_compute::IMemoryManager
    */
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index 372ce689e..e05d36a12 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,6 +30,20 @@ namespace backend
 namespace acl_common
 {
 
+void enableDimCorrection(IACLTensor *tensor)
+{
+  size_t input_rank = tensor->getShape().rank();
+  const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
+    .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), true);
+}
+
+void disableDimCorrection(IACLTensor *tensor)
+{
+  size_t input_rank = tensor->getShape().rank();
+  const_cast<arm_compute::TensorShape &>(tensor->info()->tensor_shape())
+    .set(input_rank - 1, tensor->info()->dimension(input_rank - 1), false);
+}
+
 template <typename Layer, typename... Args>
 std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
 {
@@ -60,49 +74,49 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   // TODO Support dynamic rnn
   // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
   const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+    node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
   const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+    node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
   const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+    node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
 
   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
   const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
   const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
   const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
   const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
   const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
   const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
   const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
   const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
   const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
   const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
   const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
   const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
   const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
   const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
   const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
   const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
   const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
   const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
   const auto cell_threshold = node.param().cell_threshold;
   const auto projection_threshold = node.param().projection_threshold;
@@ -110,8 +124,8 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
                                     operands.at(input_to_input_weights_index).shape().dim(1) != 0;
   bool has_recurrent_to_input_weights =
-      operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+    operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+    operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
   bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
   bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
   bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
@@ -138,30 +152,27 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   const auto projection_clip = projection_threshold;
   assert(cell_clip >= 0.f && projection_clip >= 0.f);
 
-  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
-  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
-  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
-  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index);
+  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index);
+  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index);
+  auto output_tensor = tensor_reg->getAclTensor(output_index);
 
-  auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+  auto input_tensor = tensor_reg->getAclTensor(input_index);
 
-  auto input_to_forget_weights_tensor =
-      tensor_reg->getAclTensor(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
-  auto input_to_output_weights_tensor =
-      tensor_reg->getAclTensor(input_to_output_weights_index).get();
+  auto input_to_forget_weights_tensor = tensor_reg->getAclTensor(input_to_forget_weights_index);
+  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index);
+  auto input_to_output_weights_tensor = tensor_reg->getAclTensor(input_to_output_weights_index);
   auto recurrent_to_forget_weights_tensor =
-      tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_tensor =
-      tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
+    tensor_reg->getAclTensor(recurrent_to_forget_weights_index);
+  auto recurrent_to_cell_weights_tensor = tensor_reg->getAclTensor(recurrent_to_cell_weights_index);
   auto recurrent_to_output_weights_tensor =
-      tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
+    tensor_reg->getAclTensor(recurrent_to_output_weights_index);
 
-  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
-  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
-  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
-  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
-  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
+  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index);
+  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index);
+  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index);
+  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index);
+  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index);
 
   auto act_info = asActivationLayerInfo(activation);
 
@@ -169,13 +180,13 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   if (has_cifg_param)
   {
     auto input_to_input_weights_tensor =
-        tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
+      tensor_reg->getAclTensor(input_to_input_weights_index); // optional
     auto recurrent_to_input_weights_tensor =
-        tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
+      tensor_reg->getAclTensor(recurrent_to_input_weights_index); // optional
     auto cell_to_input_weights_handle =
-        has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
-                           : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
+      has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index)->handle()
+                         : nullptr; // optional (non-cifg && peephole)
+    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index); // optional
     lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
                                 recurrent_to_input_weights_tensor->handle(),
                                 cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -183,32 +194,30 @@ std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
   if (has_peephole_param)
   {
     auto cell_to_forget_weights_tensor =
-        tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
+      tensor_reg->getAclTensor(cell_to_forget_weights_index); // optional
     auto cell_to_output_weights_tensor =
-        tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
+      tensor_reg->getAclTensor(cell_to_output_weights_index); // optional
     lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
                                     cell_to_output_weights_tensor->handle());
   }
   if (has_projection_param)
   {
-    auto projection_weights_tensor =
-        tensor_reg->getAclTensor(projection_weights_index).get(); // optional
-    auto projection_bias_handle =
-        has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
-                            : nullptr; // optional
+    auto projection_weights_tensor = tensor_reg->getAclTensor(projection_weights_index); // optional
+    auto projection_bias_handle = has_projection_bias
+                                    ? tensor_reg->getAclTensor(projection_bias_index)->handle()
+                                    : nullptr; // optional
     lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
   }
 
   auto fn = generateLayer<T_ACLLayer>(
-      input_tensor->handle(), input_to_forget_weights_tensor->handle(),
-      input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
-      recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
-      recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
-      cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
-      output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
-      scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
-      cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
-      projection_clip);
+    input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+    input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+    recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+    recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+    cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), output_state_in_tensor->handle(),
+    cell_state_in_tensor->handle(), scratch_buffer_tensor->handle(),
+    output_state_out_tensor->handle(), cell_state_out_tensor->handle(), output_tensor->handle(),
+    lstm_params, act_info, cell_clip, projection_clip);
 
   return std::make_unique<T_FunctionWrapper>(std::move(fn));
 }
@@ -230,14 +239,14 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
   const auto input_rank = operands.at(input_index).shape().rank();
 
   const auto output_size =
-      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
   UNUSED_RELEASE(output_size);
-  assert(operands.at(bias_index).shape().dim(0) == output_size);
+  assert(bias_index.undefined() || operands.at(bias_index).shape().dim(0) == output_size);
   assert(operands.at(weight_index).shape().dim(0) == output_size);
   const auto batch_size =
-      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
   const auto input_size =
-      operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+    operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
 
   // Check for reshaping input's shape into rank-2
   bool needs_reshape = false;
@@ -260,10 +269,10 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
     reshape.dim(1) = input_size; /* W */
   }
 
-  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
-  const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
-  const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
-  const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
+  auto output_tensor = tensor_reg->getAclTensor(output_index);
+  const auto input_tensor = tensor_reg->getAclTensor(input_index);
+  const auto weight_tensor = tensor_reg->getAclTensor(weight_index);
+  const auto bias_tensor = bias_index.undefined() ? nullptr : tensor_reg->getAclTensor(bias_index);
   const auto frontend_layout = layout;
   const auto acl_layout = output_tensor->handle()->info()->data_layout();
 
@@ -275,9 +284,10 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
   }
 
   auto fn = generateLayer<T_ACLLayer>(
-      tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
-      asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
+    tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    weight_tensor->handle(), bias_tensor != nullptr ? bias_tensor->handle() : nullptr,
+    output_tensor->handle(), needs_reshape,
+    asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
 
   return std::make_unique<T_FunctionWrapper>(std::move(fn));
 }
@@ -298,7 +308,7 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
   const auto kw = node.param().kw;
   const auto stride = node.param().stride;
   const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
 
   VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
   VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
@@ -313,12 +323,12 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
   VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
   VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
 
-  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index);
 
   ::arm_compute::PoolingLayerInfo info{
-      pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
-      asPadStrideInfo(padding, stride), true /* exclude_padding */};
+    pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+    asPadStrideInfo(padding, stride), true /* exclude_padding */};
 
   auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
 
diff --git a/runtime/onert/backend/acl_common/AclLinearMemoryManager.h b/runtime/onert/backend/acl_common/AclLinearMemoryManager.h
index 09f25e7a8..5c546b77a 100644
--- a/runtime/onert/backend/acl_common/AclLinearMemoryManager.h
+++ b/runtime/onert/backend/acl_common/AclLinearMemoryManager.h
@@ -23,7 +23,11 @@
 #include "ir/OperandIndexMap.h"
 #include "util/logging.h"
 
-namespace
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
 {
 
 template <typename T_MemoryManager, typename T_PoolManager, typename T_LifetimeManager>
@@ -33,19 +37,10 @@ std::shared_ptr<T_MemoryManager> createMemoryManager()
   std::shared_ptr<T_PoolManager> pool_mgr = std::make_shared<T_PoolManager>();
 
   std::shared_ptr<T_MemoryManager> mem_mgr =
-      std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
+    std::make_shared<T_MemoryManager>(lifetime_mgr, pool_mgr);
   return mem_mgr;
 }
 
-} // namespace
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_common
-{
-
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor, typename T_MemoryManager,
           typename T_PoolManager, typename T_LifetimeManager, typename T_Allocator,
           typename T_MemoryGroup>
@@ -53,9 +48,9 @@ class AclLinearMemoryManager : public AclMemoryManager<T_ITensor, T_Tensor, T_Su
 {
 public:
   AclLinearMemoryManager()
-      : _allocator{nullptr},
-        _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()},
-        _io_group{std::make_shared<T_MemoryGroup>(_io_manager)}
+    : _allocator{nullptr},
+      _io_manager{createMemoryManager<T_MemoryManager, T_PoolManager, T_LifetimeManager>()},
+      _io_group{std::make_shared<T_MemoryGroup>(_io_manager)}
   {
     // DO NOTHING
   }
diff --git a/runtime/onert/backend/acl_common/AclMemoryManager.h b/runtime/onert/backend/acl_common/AclMemoryManager.h
index eefcec130..8e6bdd86a 100644
--- a/runtime/onert/backend/acl_common/AclMemoryManager.h
+++ b/runtime/onert/backend/acl_common/AclMemoryManager.h
@@ -21,7 +21,6 @@
 #include <arm_compute/runtime/IMemoryManager.h>
 #include <cassert>
 
-#include "backend/IMemoryManager.h"
 #include "ir/OperandIndexMap.h"
 #include "Convert.h"
 #include "util/logging.h"
@@ -33,8 +32,7 @@ namespace backend
 namespace acl_common
 {
 
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclMemoryManager : public backend::IMemoryManager
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclMemoryManager
 {
 public:
   AclMemoryManager()
@@ -44,7 +42,7 @@ public:
 
   virtual ~AclMemoryManager() = default;
 
-  void allocate(void) override
+  virtual void allocate(void)
   {
     for (const auto &tensor_entry : _tensors)
     {
@@ -53,7 +51,7 @@ public:
     }
   }
 
-  void deallocate(void) override
+  virtual void deallocate(void)
   {
     for (const auto &tensor_entry : _tensors)
     {
@@ -62,8 +60,12 @@ public:
     }
   }
 
-  virtual void startLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
-  virtual void finishLifetime(const ir::OperandIndex &) { /* DO NOTHING */}
+  virtual void startLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
+  virtual void finishLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
 
   void buildTensor(const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank,
                    size_t num_uses)
@@ -78,7 +80,7 @@ public:
                       bool extent_parent)
   {
     auto subtensor =
-        std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent);
+      std::make_shared<T_SubTensor>(parent_tensor.get(), shape, coordinates, rank, extent_parent);
     _subtensors[child_ind] = subtensor;
   }
 
diff --git a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
index 83d7ad6fd..a0bbe7c3c 100644
--- a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
+++ b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
@@ -17,9 +17,10 @@
 #ifndef __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
 #define __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
 
+#include <cl_common/ParentInfo.h>
+
 #include <ir/OperationVisitor.h>
 #include <ir/Graph.h>
-#include "ParentInfo.h"
 
 namespace onert
 {
@@ -46,6 +47,8 @@ public:
 public:
   void setLayout(ir::Layout layout) { _current_op_layout = layout; }
 
+  void setUsePadding() { usePadding = true; }
+
   void visit(const ir::operation::Concat &node) override
   {
     //  If operator is concat, fill subsumption info
@@ -59,10 +62,22 @@ public:
     int32_t axis = axis_raw < 0 ? (axis_raw + rank) : axis_raw;
     assert(rank > axis);
 
+    // Concat elimination when axis is last dimension is not supported
+    // https://github.com/Samsung/ONE/issues/4407
+    // TODO Enable if backend don't use padding
+    if ((axis == rank - 1) && usePadding)
+      return;
+
     for (const auto &ind : inputs)
     {
-      // NOTE Not support the case that concat's input is a constant or a input of model
-      if (_graph.operands().at(ind).isConstant() || _graph.getInputs().contains(ind))
+      /**
+       * NOTE Not support below cases.
+       * 1. concat's input is a constant.
+       * 2. concat's input is a input of model.
+       * 3. concat's input already becomes a subtensor of another concat.
+       */
+      if (_graph.operands().at(ind).isConstant() || _graph.getInputs().contains(ind) ||
+          _parent_map.find(ind) != _parent_map.end())
       {
         return;
       }
@@ -80,22 +95,23 @@ public:
       }
       coordinate_info.set(axis, axis_point);
 
-      _parent_map.emplace(
-          input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
+      _parent_map.emplace(input_index,
+                          cl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
 
       axis_point += input_shape.dim(axis);
     }
   }
 
-  std::unordered_map<ir::OperandIndex, ParentInfo> &&releaseParentMap()
+  std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&releaseParentMap()
   {
     return std::move(_parent_map);
   }
 
 private:
   const ir::Graph &_graph;
-  std::unordered_map<ir::OperandIndex, ParentInfo> _parent_map;
+  std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> _parent_map;
   ir::Layout _current_op_layout{ir::Layout::UNKNOWN};
+  bool usePadding{false};
 };
 
 } // namespace acl_common
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 91452014b..b0b5ca612 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -17,19 +17,21 @@
 #ifndef __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
 #define __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
 
-#include <memory>
-#include <queue>
-
-#include <arm_compute/core/Types.h>
-#include <backend/ITensorBuilder.h>
-#include "ir/OperandIndexMap.h"
-#include <ir/Operands.h>
 #include "AclTensorManager.h"
 #include "AclTensorRegistry.h"
-#include <memory>
-#include "ParentInfo.h"
+
+#include <cl_common/LifetimeMap.h>
+#include <cl_common/ParentInfo.h>
+
+#include <ir/OperandIndexMap.h>
+#include <ir/Operands.h>
 #include <util/Utils.h>
 
+#include <arm_compute/core/Types.h>
+
+#include <memory>
+#include <queue>
+
 namespace onert
 {
 namespace backend
@@ -37,20 +39,14 @@ namespace backend
 namespace acl_common
 {
 
-enum class UsesType
-{
-  FIRST,
-  LAST
-};
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorBuilder : public ITensorBuilder
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
 {
 public:
   using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
+  // TODO Remove this alias and direct usage of this type
+  using UsesType = cl_common::UsesType;
 
-  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
-                   const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
+  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
 
   /**
    * @brief     Register tensor information to allocate on ACL-CL backend
@@ -59,18 +55,16 @@ public:
    * @param[in] layout Tensor data layout
    */
   void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout) override;
-
-  void notifyFirstUse(const ir::OperandIndex &) override;
-  void notifyLastUse(const ir::OperandIndex &) override;
+                          ir::Layout backend_layout);
 
-  bool isRegistered(const ir::OperandIndex &) const override;
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
 
-  void prepare(void) override;
-  void allocate() override;
-  void postFunctionPrepare() override;
+  bool isRegistered(const ir::OperandIndex &) const;
 
-  std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare();
 
   T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
 
@@ -81,7 +75,7 @@ public:
     _uses_count_map[index] = num_uses;
   }
 
-  void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
+  void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map)
   {
     _parent_map = std::move(parent_map);
   }
@@ -107,13 +101,12 @@ private:
   ir::OperandIndexMap<size_t> _uses_count_map;
 
   std::unique_ptr<T_AclTensorManager> _tensor_mgr;
-  std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
 
   // for linear executor
-  std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
+  cl_common::LifetimeSeq _lifetime_seq;
 
   // Extra info for concat elimination
-  ir::OperandIndexMap<ParentInfo> _parent_map;
+  ir::OperandIndexMap<cl_common::ParentInfo> _parent_map;
 };
 
 } // namespace acl_common
@@ -135,17 +128,16 @@ namespace acl_common
 {
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
-    const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
-    const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
-    : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
+                                                                     T_AclTensorManager *tensor_mgr)
+  : _operands{operands}, _tensor_mgr{tensor_mgr}
 {
   assert(_tensor_mgr);
 }
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
-    const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout)
+  const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout)
 {
   assert(_tensor_mgr->constTensors().size() == 0);
   assert(_tensor_mgr->nonconstTensors().size() == 0);
@@ -161,7 +153,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
   else
   {
     // SubTensors
-
     assert(!info.isConstant() && "Subtensors of constants are not supported yet.");
 
     // Update offset info and emplace
@@ -171,7 +162,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
     auto &offset = parent_info.coordinates;
     auto frontend_layout = parent_info.frontend_layout;
 
-    assert(obj.shape().rank() <= ir::Shape::MAX_RANK);
+    assert(obj.shape().rank() <= ir::Shape::kMaxRank);
     auto shape = obj.shape();
     if (_operands.at(parent_index).shape().rank() >= 4 && frontend_layout == ir::Layout::NHWC &&
         backend_layout == ir::Layout::NCHW)
@@ -183,7 +174,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
       offset = {offset[0], offset[3], offset[1], offset[2]};
     }
     else if (_operands.at(parent_index).shape().rank() >= 4 &&
-             frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW)
+             frontend_layout == ir::Layout::NCHW && backend_layout == ir::Layout::NHWC)
     {
       // Permutation changing layout beyond 4-D is not supported yet
       const auto parent_rank = _operands.at(parent_index).shape().rank();
@@ -211,7 +202,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyLastUse(const ir:
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isRegistered(
-    const ir::OperandIndex &ind) const
+  const ir::OperandIndex &ind) const
 {
   return _tensor_info_map.find(ind) != _tensor_info_map.end();
 }
@@ -225,61 +216,13 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void)
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
 {
-  // Update lifetime sequence to apply subtensor optimization
-
-  std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
-  std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
-      [&](ir::OperandIndex ind) -> ir::OperandIndex & {
-    ir::OperandIndex &ret = root_map[ind];
+  auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
 
-    // We know the root parent value already
-    if (ret.valid())
-      return ret;
-
-    auto itr = _parent_map.find(ind);
-    if (itr == _parent_map.end())
-    {
-      // If there is no parent, let's store the value of itself
-      return ret = ind;
-    }
-    else
-    {
-      return ret = find_root(itr->second.parent);
-    }
-  };
-
-  ir::OperandIndexMap<bool> first_use_check;
-  ir::OperandIndexMap<bool> last_use_check;
-  std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
-  for (size_t i = 0; i < _lifetime_seq.size(); i++)
+  for (const auto &entry : lifetime_map)
   {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::FIRST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (first_use_check[root_ind])
-      continue;
-    first_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::FIRST, root_ind};
-  }
-
-  for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::LAST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (last_use_check[root_ind])
-      continue;
-    last_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::LAST, root_ind};
-  }
-
-  for (auto &entry : lifetime_map)
-  {
-    auto &use = entry.second;
-    auto use_type = use.first;
-    auto use_index = use.second;
+    const auto &use = entry.second;
+    const auto &use_type = use.first;
+    const auto &use_index = use.second;
     assert(use_index.valid());
     if (use_type == UsesType::FIRST)
       _tensor_mgr->startLifetime(use_index);
@@ -306,29 +249,22 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi
 }
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::unique_ptr<ITensorManager>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
-{
-  return std::move(_tensor_mgr);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
 {
   assert(_tensor_mgr->constTensors().size() == 0);
   assert(_tensor_mgr->nonconstTensors().size() == 0);
 
   // Normal tensors
-  for (auto &entry : _tensor_info_map)
+  for (const auto &entry : _tensor_info_map)
   {
-    auto ind = entry.first;
+    const auto &ind = entry.first;
     if (_parent_map.count(ind) > 0)
       continue;
 
     const auto &info = entry.second;
     const auto &backend_layout = _tensor_layout_map[ind];
     auto tensor_info =
-        asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN, backend_layout, true);
+      asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN, backend_layout, true);
     _tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), info.isConstant(),
                              _uses_count_map[ind]);
   }
@@ -336,10 +272,10 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
   // Subtensors
   assert(_tensor_mgr->nonconstSubtensors().size() == 0);
   // TODO Iterate `_parent_map` instead, once the optimizer bug is fixed
-  //      `Optimizer` iterates the entire OpSequences, so there is a bug if iterating _parent_map
-  for (auto &entry : _tensor_info_map)
+  //      `Optimizer` iterates the entire Operations, so there is a bug if iterating _parent_map
+  for (const auto &entry : _tensor_info_map)
   {
-    auto ind = entry.first;
+    const auto &ind = entry.first;
     if (_parent_map.count(ind) == 0)
       continue;
 
@@ -383,7 +319,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
       assert(parent_tensor != nullptr);
 
       // Child's type should be same with parent
-      assert(tensor_info.typeInfo().offset() ==
+      assert(tensor_info.typeInfo().zero_point() ==
              parent_tensor->info()->quantization_info().uniform().offset);
       assert(tensor_info.typeInfo().scale() ==
              parent_tensor->info()->quantization_info().uniform().scale);
@@ -395,7 +331,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
 
       auto shape = asTensorShape(tensor_info.shape(), ir::Layout::UNKNOWN, backend_layout, true);
       ::arm_compute::Coordinates coordinates =
-          asTensorCoordinate(parent_info.coordinates, ir::Layout::UNKNOWN, backend_layout);
+        asTensorCoordinate(parent_info.coordinates, ir::Layout::UNKNOWN, backend_layout);
       _tensor_mgr->buildSubtensor(parent, current, shape, coordinates, tensor_info.shape().rank(),
                                   true);
       stack.pop();
@@ -405,9 +341,9 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
-    const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
+  const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
 {
-  for (auto &cand : seq)
+  for (const auto &cand : seq)
   {
     if (!isSubTensorOf(parent, cand))
     {
@@ -419,7 +355,7 @@ bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isSubTensorOf(
-    const ir::OperandIndex &parent, const ir::OperandIndex &child)
+  const ir::OperandIndex &parent, const ir::OperandIndex &child)
 {
   auto itr = _parent_map.find(child);
   if (itr == _parent_map.end())
diff --git a/runtime/onert/backend/acl_common/AclTensorManager.h b/runtime/onert/backend/acl_common/AclTensorManager.h
index b999a39a9..41a89fbf2 100644
--- a/runtime/onert/backend/acl_common/AclTensorManager.h
+++ b/runtime/onert/backend/acl_common/AclTensorManager.h
@@ -19,7 +19,6 @@
 
 #include <arm_compute/runtime/IMemoryManager.h>
 
-#include "backend/ITensorManager.h"
 #include "AclMemoryManager.h"
 #include "AclInternalBufferManager.h"
 #include "ir/OperandIndexMap.h"
@@ -31,8 +30,7 @@ namespace backend
 namespace acl_common
 {
 
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorManager : public backend::ITensorManager
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorManager
 {
 public:
   using T_AclMemoryManager = AclMemoryManager<T_ITensor, T_Tensor, T_SubTensor>;
@@ -97,9 +95,9 @@ namespace acl_common
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::AclTensorManager(
-    T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
-    IInternalBufferManager *inter_mgr)
-    : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr}
+  T_AclMemoryManager *const_mgr, T_AclMemoryManager *nonconst_mgr,
+  IInternalBufferManager *inter_mgr)
+  : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}, _inter_mgr{inter_mgr}
 {
   // DO NOTHING
 }
@@ -142,8 +140,8 @@ void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::deallocateInternalBuffe
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildTensor(
-    const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const,
-    size_t num_uses)
+  const ir::OperandIndex &ind, const ::arm_compute::TensorInfo &info, size_t rank, bool as_const,
+  size_t num_uses)
 {
   assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
   if (as_const)
@@ -160,9 +158,9 @@ void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildTensor(
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensor(
-    const ir::OperandIndex &parent, const ir::OperandIndex &child,
-    const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates,
-    size_t rank, bool extent_parent)
+  const ir::OperandIndex &parent, const ir::OperandIndex &child,
+  const ::arm_compute::TensorShape &shape, const ::arm_compute::Coordinates &coordinates,
+  size_t rank, bool extent_parent)
 {
   assert(_ind_to_mgr.find(child) == _ind_to_mgr.end());
   std::shared_ptr<T_ITensor> parent_tensor = findTensorAsParent(parent);
@@ -222,7 +220,12 @@ AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &i
   }
   else
   {
-    return _ind_to_mgr.at(ind).subtensors().at(ind);
+    auto subtensors = _ind_to_mgr.at(ind).subtensors();
+    auto itr = subtensors.find(ind);
+    if (itr == subtensors.end())
+      return nullptr;
+    else
+      return itr->second;
   }
 }
 
@@ -256,15 +259,15 @@ AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::internal_buffer_manager(void
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::iterate(
-    const std::function<void(const ir::OperandIndex &)> &fn)
+  const std::function<void(const ir::OperandIndex &)> &fn)
 {
-  for (auto it : _nonconst_mgr->tensors())
+  for (auto &&it : _nonconst_mgr->tensors())
     fn(it.first);
 
-  for (auto it : _nonconst_mgr->subtensors())
+  for (auto &&it : _nonconst_mgr->subtensors())
     fn(it.first);
 
-  for (auto it : _const_mgr->tensors())
+  for (auto &&it : _const_mgr->tensors())
     fn(it.first);
 }
 
@@ -281,7 +284,7 @@ void AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>::tryDeallocConstants(voi
     // used in several nodes.
     if (tensor->handle() && !tensor->handle()->is_used() && tensor->num_uses() < 2)
     {
-      VERBOSE(AclTensorManager) << "Tensor #" << ind.value()
+      VERBOSE(AclTensorManager) << "Tensor " << ind
                                 << " will be deallocated as an unused constant tensor" << std::endl;
       tensor->allocator()->free();
       tensor.reset();
diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h
index 1ef9f4b35..02d66db99 100644
--- a/runtime/onert/backend/acl_common/AclTensorRegistry.h
+++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h
@@ -36,17 +36,11 @@ template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorR
 public:
   AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
 
-  std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
-  {
-    return _tensor_mgr->at(ind);
-  }
+  ITensor *getITensor(const ir::OperandIndex &ind) override { return _tensor_mgr->at(ind).get(); }
 
-  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
-  {
-    return getITensor(ind);
-  }
+  ITensor *getNativeITensor(const ir::OperandIndex &ind) override { return getITensor(ind); }
 
-  auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+  auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind).get(); }
 
 private:
   T_AclTensorManager *_tensor_mgr;
diff --git a/runtime/onert/backend/acl_common/CMakeLists.txt b/runtime/onert/backend/acl_common/CMakeLists.txt
index d3ae5acf7..8d409a47c 100644
--- a/runtime/onert/backend/acl_common/CMakeLists.txt
+++ b/runtime/onert/backend/acl_common/CMakeLists.txt
@@ -12,6 +12,7 @@ target_include_directories(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${CMAKE_CURREN
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC onert_core)
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC arm_compute arm_compute_ex)
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC nnfw_lib_misc)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${LIB_ONERT_BACKEND_CL_COMMON})
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_common)
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_coverage)
 
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index 67dcc8192..673d524e3 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -109,11 +109,19 @@ namespace acl_common
     case ir::DataType::UINT8:
       return ::arm_compute::DataType::U8;
     case ir::DataType::QUANT_INT8_SYMM:
-      return ::arm_compute::DataType::S8;
+      return ::arm_compute::DataType::QSYMM8;
+    case ir::DataType::QUANT_INT8_ASYMM:
+      return ::arm_compute::DataType::QASYMM8_SIGNED;
     case ir::DataType::FLOAT16:
       return ::arm_compute::DataType::F16;
+    case ir::DataType::INT64:
+      return ::arm_compute::DataType::S64;
+    case ir::DataType::QUANT_INT16_ASYMM:
+      return ::arm_compute::DataType::QASYMM16;
+    case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
+      return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
     default:
-      throw std::runtime_error("Not supported, yet");
+      throw std::runtime_error("Not supported internal data type, yet");
       break;
   }
 }
@@ -128,8 +136,8 @@ namespace acl_common
                                        bool apply_dim_correction)
 {
   ::arm_compute::TensorInfo info(
-      asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1,
-      asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.offset()));
+    asTensorShape(shape, frontend_layout, backend_layout, apply_dim_correction), 1,
+    asDataType(typeInfo.type()), asQuantizationInfo(typeInfo.scale(), typeInfo.zero_point()));
   info.set_data_layout(asDataLayout(backend_layout));
   return info;
 }
@@ -154,26 +162,26 @@ namespace acl_common
       return ::arm_compute::ActivationLayerInfo{};
     case ir::Activation::RELU:
       return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
     case ir::Activation::RELU1:
       return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
     case ir::Activation::RELU6:
       return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
     // Cases for activation of LSTM.
     case ir::Activation::TANH:
       return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
     case ir::Activation::SIGMOID:
       // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
       // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
       // 0(always sigmoid) regardless of values of the parameter.
       //      If ACL support non-sigmoid logistic, should fix param values.
       return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported internal activation, yet"};
       break;
   }
 }
@@ -190,34 +198,34 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type,
         if (alpha == ir::operation::ElementwiseActivation::infinity)
         {
           return ::arm_compute::ActivationLayerInfo{
-              ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+            ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
         }
         else
         {
           return ::arm_compute::ActivationLayerInfo{
-              ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+            ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
         }
       }
       else
       {
         return ::arm_compute::ActivationLayerInfo{
-            ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
       }
     case ir::operation::ElementwiseActivation::Type::TANH:
       return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
       // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
       // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
       // 0(always sigmoid) regardless of values of the parameter.
       //      If ACL support non-sigmoid logistic, should fix param values.
       return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
     case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
       return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported internal elementwise activation, yet"};
       break;
   }
 }
@@ -293,14 +301,18 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
       return ir::DataType::UINT32;
     case ::arm_compute::DataType::QASYMM8:
       return ir::DataType::QUANT_UINT8_ASYMM;
+    case ::arm_compute::DataType::QASYMM8_SIGNED:
+      return ir::DataType::QUANT_INT8_ASYMM;
     case ::arm_compute::DataType::U8:
       return ir::DataType::UINT8;
     case ::arm_compute::DataType::QSYMM8:
       return ir::DataType::QUANT_INT8_SYMM;
     case ::arm_compute::DataType::F16:
       return ir::DataType::FLOAT16;
+    case ::arm_compute::DataType::S64:
+      return ir::DataType::INT64;
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported acl data type, yet"};
       break;
   }
 }
@@ -320,21 +332,50 @@ arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_ty
   }
 }
 
-arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
+arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 {
   switch (reduce_type_ir)
   {
     case ir::operation::Reduce::ReduceType::MAX:
-      return arm_compute::ReduceOperation::MAX;
+      return arm_compute::ReductionOperation::MAX;
     case ir::operation::Reduce::ReduceType::MIN:
-      return arm_compute::ReduceOperation::MIN;
+      return arm_compute::ReductionOperation::MIN;
     case ir::operation::Reduce::ReduceType::SUM:
-      return arm_compute::ReduceOperation::SUM;
+      return arm_compute::ReductionOperation::SUM;
     default:
       throw std::runtime_error("convertReduceType: Not supported operation yet");
   }
 }
 
+arm_compute::PixelValue asPixelValue(const ir::Operand &operand)
+{
+  assert(operand.isConstant());
+  assert(operand.shape().num_elements() == 1);
+  switch (operand.typeInfo().type())
+  {
+    case ir::DataType::INT32:
+      return arm_compute::PixelValue(operand.asScalar<int32_t>());
+    case ir::DataType::INT64:
+      return arm_compute::PixelValue(operand.asScalar<int64_t>());
+    case ir::DataType::UINT32:
+      return arm_compute::PixelValue(operand.asScalar<uint64_t>());
+    case ir::DataType::UINT8:
+      return arm_compute::PixelValue(operand.asScalar<uint8_t>());
+    case ir::DataType::FLOAT32:
+      return arm_compute::PixelValue(operand.asScalar<float>());
+    default:
+      throw std::runtime_error("asPixelValue : Not supported datatype yet");
+  }
+}
+
+arm_compute::Size2D asDilation(uint32_t dilation_width, uint32_t dilation_height)
+{
+  assert(dilation_width != 0);
+  assert(dilation_height != 0);
+
+  return arm_compute::Size2D(dilation_width, dilation_height);
+}
+
 } // namespace acl_common
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h
index 380321c07..c98db14bb 100644
--- a/runtime/onert/backend/acl_common/Convert.h
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -17,6 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_COMMON_CONVERT_H__
 #define __ONERT_BACKEND_ACL_COMMON_CONVERT_H__
 
+#include <arm_compute/core/PixelValue.h>
 #include <arm_compute/core/TensorInfo.h>
 #include <arm_compute/core/SubTensorInfo.h>
 #include <arm_compute/core/TensorShape.h>
@@ -83,7 +84,10 @@ ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
 ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
 
 arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
-arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
+arm_compute::ReductionOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
+
+arm_compute::PixelValue asPixelValue(const ir::Operand &operand);
+arm_compute::Size2D asDilation(uint32_t dilation_width, uint32_t dilation_height);
 
 } // namespace acl_common
 } // namespace backend
diff --git a/runtime/onert/backend/acl_common/IACLTensor.cc b/runtime/onert/backend/acl_common/IACLTensor.cc
index 70988bd11..9920750fc 100644
--- a/runtime/onert/backend/acl_common/IACLTensor.cc
+++ b/runtime/onert/backend/acl_common/IACLTensor.cc
@@ -25,26 +25,14 @@ namespace backend
 namespace acl_common
 {
 
-size_t IACLTensor::dimension(size_t index) const
-{
-  // Assume that the front is higher dimensional.
-  // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
-  // NOTE This tensor must not be applied dim correction
-  auto rank = num_dimensions();
-  rank = rank == 0 ? 1 : rank;
-  assert(rank > index);
-  const ARMComputeAxis reversed{(static_cast<uint32_t>(rank - index) - 1)};
-  return info()->dimension(reversed.value());
-}
-
 size_t IACLTensor::calcOffset(const ir::Coordinates &coords) const
 {
-  auto rank = num_dimensions();
+  auto rank = _rank;
   rank = rank == 0 ? 1 : rank;
-  assert(rank == coords.size());
+  assert(static_cast<size_t>(rank) == coords.size());
 
   ::arm_compute::Coordinates acl_coords;
-  for (uint32_t i = 0; i < rank; ++i)
+  for (size_t i = 0; i < rank; ++i)
   {
     const ARMComputeAxis reversed{static_cast<uint32_t>((rank - i) - 1)};
     acl_coords.set(reversed.value(), coords[i]);
@@ -66,12 +54,22 @@ float IACLTensor::data_scale() const
   return info()->quantization_info().uniform().scale;
 }
 
-int32_t IACLTensor::data_offset() const
+int32_t IACLTensor::data_zero_point() const
 {
   // FIXME What if quantization info is non-uniform?
   return info()->quantization_info().uniform().offset;
 }
 
+const std::vector<float> &IACLTensor::data_scales() const
+{
+  throw std::runtime_error("IACLTensor::data_scales() is not supported.");
+}
+
+const std::vector<int32_t> &IACLTensor::data_zero_points() const
+{
+  throw std::runtime_error("IACLTensor::data_zero_points() is not supported.");
+}
+
 } // namespace acl_common
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_common/IACLTensor.h b/runtime/onert/backend/acl_common/IACLTensor.h
index 3d1268940..7ea6327a7 100644
--- a/runtime/onert/backend/acl_common/IACLTensor.h
+++ b/runtime/onert/backend/acl_common/IACLTensor.h
@@ -19,6 +19,7 @@
 
 #include <backend/ITensor.h>
 #include <arm_compute/core/ITensor.h>
+#include "Swizzle.h"
 
 namespace onert
 {
@@ -42,17 +43,27 @@ public:
   IACLTensor(IACLTensor &&) = default;
   IACLTensor &operator=(IACLTensor &&) = default;
 
+  IACLTensor(size_t rank) : _rank{rank} {}
+
 public:
   uint8_t *buffer() const final { return handle()->buffer(); }
   size_t total_size() const final { return info()->total_size(); }
-  size_t dimension(size_t index) const final;
   size_t calcOffset(const ir::Coordinates &coords) const final;
   ir::Layout layout() const final;
   ir::DataType data_type() const final;
   float data_scale() const override;
-  int32_t data_offset() const override;
+  int32_t data_zero_point() const override;
+  const std::vector<float> &data_scales() const override;
+  const std::vector<int32_t> &data_zero_points() const override;
   bool has_padding() const override { return info()->has_padding(); }
   bool is_dynamic() const override { return false; }
+  ir::Shape getShape() const override
+  {
+    onert::ir::Shape shape(num_dimensions());
+    for (uint32_t d = 0; d < num_dimensions(); d++)
+      shape.dim(d) = dimension(d);
+    return shape;
+  }
 
 public:
   virtual const arm_compute::ITensor *handle() const = 0;
@@ -60,6 +71,22 @@ public:
 
   const arm_compute::ITensorInfo *info() const { return handle()->info(); }
   arm_compute::ITensorInfo *info() { return handle()->info(); }
+
+  size_t dimension(size_t index) const
+  {
+    // Assume that the front is higher dimensional.
+    // i.g. N: 0, C: 1, H: 2, W: 3 for NCHW layout
+    // NOTE This tensor must not be applied dim correction
+    auto rank = _rank;
+    rank = rank == 0 ? 1 : rank;
+    assert(rank > index);
+    const ARMComputeAxis reversed{(static_cast<uint32_t>(rank - index) - 1)};
+    return info()->dimension(reversed.value());
+  }
+  size_t num_dimensions() const { return _rank; }
+
+protected:
+  size_t _rank; // Actual rank (reflects extended rank)
 };
 
 } // namespace acl_common
diff --git a/runtime/onert/backend/acl_common/ParentInfo.h b/runtime/onert/backend/acl_common/ParentInfo.h
deleted file mode 100644
index 708436327..000000000
--- a/runtime/onert/backend/acl_common/ParentInfo.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
-#define __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
-
-#include <ir/Index.h>
-#include <ir/Coordinates.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_common
-{
-
-/**
- * @brief	Struct to represent parent operand in child operand
- */
-struct ParentInfo
-{
-  ir::OperandIndex parent;
-  ir::Layout frontend_layout;
-  ir::Coordinates coordinates;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
diff --git a/runtime/onert/backend/acl_common/Swizzle.h b/runtime/onert/backend/acl_common/Swizzle.h
index e1c7f8041..61338f972 100644
--- a/runtime/onert/backend/acl_common/Swizzle.h
+++ b/runtime/onert/backend/acl_common/Swizzle.h
@@ -131,7 +131,7 @@ getARMComputePermutationVector(uint32_t rank, const std::vector<int32_t> runtime
   }
 
   ::arm_compute::PermutationVector ACL_PV =
-      ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
+    ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
   ACL_PV.set_num_dimensions(rank);
 
   return ACL_PV;
@@ -146,7 +146,7 @@ inline T ReorderBits(T in, size_t numOfBits, const ir::Layout org_layout = ir::L
   for (int32_t i = numOfBits - 1; i >= 0; --i)
   {
     const uint32_t toShift =
-        numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1;
+      numOfBits - ToARMComputeAxis(numOfBits, i, org_layout, acl_layout).value() - 1;
     out += ((in & 1) << toShift);
     in >>= 1;
   }
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index 35d6e4e8e..1c7713055 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -21,6 +21,7 @@
 #include <backend/Backend.h>
 #include <ir/Operands.h>
 
+#include "BackendContext.h"
 #include "Config.h"
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
 
   std::shared_ptr<IConfig> config() const override { return _config; }
 
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &,
-                                             bool is_linear_executor) const override
+  std::unique_ptr<backend::BackendContext> newContext(ContextData &&data) const override
   {
-    const auto &operands = graph.operands();
-    const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
+    const auto &graph = *data.graph;
+    const auto &operands = data.graph->operands();
+    const auto is_linear_executor = data.is_linear_executor;
+
+    auto context = std::make_unique<acl_neon::BackendContext>(this, std::move(data));
     auto tm = createTensorManager(is_linear_executor);
     auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
-    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
-    context->tensor_register = nullptr;
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr);
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
   }
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h
new file mode 100644
index 000000000..b73dd188e
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+
+#include <AclBackendContext.h>
+
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Optimizer;
+
+using BackendContext =
+  acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>;
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_neon/CMakeLists.txt b/runtime/onert/backend/acl_neon/CMakeLists.txt
index 03d4946e0..a37256c7c 100644
--- a/runtime/onert/backend/acl_neon/CMakeLists.txt
+++ b/runtime/onert/backend/acl_neon/CMakeLists.txt
@@ -16,4 +16,9 @@ target_link_libraries(${LIB_ONERT_BACKEND_ACL_NEON} PRIVATE nnfw_coverage)
 
 set_target_properties(${LIB_ONERT_BACKEND_ACL_NEON} PROPERTIES OUTPUT_NAME backend_acl_neon)
 
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET onert_backend_acl_neon POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:onert_backend_acl_neon>)
+endif()
+
 install(TARGETS ${LIB_ONERT_BACKEND_ACL_NEON} DESTINATION lib)
diff --git a/runtime/onert/backend/acl_neon/Config.cc b/runtime/onert/backend/acl_neon/Config.cc
index 4e78efd2d..3f1758c80 100644
--- a/runtime/onert/backend/acl_neon/Config.cc
+++ b/runtime/onert/backend/acl_neon/Config.cc
@@ -27,7 +27,7 @@ namespace acl_neon
 
 bool Config::initialize() { return true; }
 
-ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout)
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout frontend_layout)
 {
   const std::string acl_layout_str = util::getConfigString(util::config::ACL_LAYOUT);
   if (acl_layout_str == "NHWC")
diff --git a/runtime/onert/backend/acl_neon/Config.h b/runtime/onert/backend/acl_neon/Config.h
index 089d9479a..ffd9b21e3 100644
--- a/runtime/onert/backend/acl_neon/Config.h
+++ b/runtime/onert/backend/acl_neon/Config.h
@@ -33,7 +33,7 @@ class Config : public IConfig
 public:
   std::string id() override { return "acl_neon"; }
   bool initialize() override;
-  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
   bool supportPermutation() override { return true; }
   bool supportDynamicTensor() override { return false; }
   bool supportFP16() override { return false; }
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
index 79edb9ded..1bd702756 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -25,7 +25,7 @@ namespace acl_neon
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
                                          const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : acl_common::AclConstantInitializer{operands, tensor_reg}
+  : acl_common::AclConstantInitializer{operands, tensor_reg}
 {
   // DO NOTHING
 }
@@ -37,21 +37,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
 
   if (block_size_obj.isConstant())
   {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
+    _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>;
   }
 
   const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
@@ -72,11 +58,11 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
           {
             const int32_t value = base[i * 2 + j];
             int32_t *into = reinterpret_cast<int32_t *>(
-                // The coordinates of NETensor are different from the coordiantes of CLTensor in
-                // this operand.
-                // NEON : {j, reversed i}
-                // CL : {reversed i, j}
-                tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1}));
+              // The coordinates of NETensor are different from the coordiantes of CLTensor in
+              // this operand.
+              // NEON : {j, reversed i}
+              // CL : {reversed i, j}
+              tensor.buffer() + tensor.calcOffset({j, shape.dim(0) - i - 1}));
             *into = value;
           }
         }
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index c7d71cdcf..9723ba012 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
 
 #include "AclConstantInitializer.h"
 
@@ -41,4 +41,4 @@ public:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index 6d53c1245..e71aa3693 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -18,7 +18,6 @@
 
 #include <arm_compute/runtime/NEON/NEFunctions.h>   // Include all ARM Compute NEON functions
 #include <arm_compute/runtime/NEON/NEFunctionsEx.h> // Include all ARM Compute EX NEON functions
-#include <arm_compute/runtime/CPP/functions/CPPOneHotEx.h>
 
 #include <AclActivationBuilder.h>
 #include <AclFunction.h>
@@ -42,59 +41,55 @@ namespace acl_neon
 
 using ::onert::backend::acl_common::asAclFunction;
 using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
-    ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
+  ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
 
 KernelGenerator::KernelGenerator(
-    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-    const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
-    : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
+  : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
+    _operations_ctx(graph.operations()), _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg(tensor_reg)
 {
   // DO NOTHING
 }
 
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
 {
-  // TODO Move this to IKernelGenerator
-  //      (all derivatives have the same implementation for this)
-  assert(!_return_fn_seq);
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-  _return_fn_seq->enableDynamicShapeInferer(false);
-
-  _current_op_seq_layout = op_seq.getLayout();
-  for (const auto &operation_idx : op_seq.operations())
-  {
-    const auto &node = _operations_ctx.at(operation_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
-  }
+  auto ret = std::make_unique<exec::FunctionSequence>();
+  ret->enableDynamicShapeInferer(false);
+
+  const auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  ret->append(releaseFunction());
+  return ret;
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
 
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto frontend_layout = _current_op_seq_layout;
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto frontend_layout = _current_layout;
   auto backend_layout = ifm_tensor->layout();
 
-  int axis_value = node.param().axis;
+  int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
   if (axis_value < 0)
   {
     axis_value += ifm_rank;
   }
   assert(axis_value >= 0 && axis_value < ifm_rank);
   const auto fixed_axis =
-      acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+    acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+  auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+                                             : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
 
   auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
-      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
-      arm_compute::ReductionOperation::ARG_IDX_MAX);
+    ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -104,16 +99,35 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
   const auto block_size_index{
-      node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+
+  const auto NNApiInputs = 2;
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    if (!_ctx.at(crops_index).isConstant())
+    {
+      throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND");
+    }
+
+    auto crops = _ctx.at(crops_index).asVector<int32_t>();
+    for (auto &&crop : crops)
+    {
+      if (crop != 0)
+      {
+        throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND");
+      }
+    }
+  }
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
 
   assert(_ctx.at(block_size_index).data());
 
   auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
-      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+    ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -126,9 +140,9 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
 
   std::unique_ptr<arm_compute::IFunction> fn;
   switch (node.param().arithmetic_type)
@@ -136,29 +150,29 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
     {
       fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-          arm_compute::ConvertPolicy::SATURATE);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE);
       break;
     }
     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
     {
       fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-          arm_compute::ConvertPolicy::SATURATE);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE);
       break;
     }
     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
     {
       // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
       fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+        arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
       break;
     }
     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
     {
       fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
-          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
       break;
     }
     default:
@@ -166,7 +180,7 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
       break;
   }
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -178,30 +192,30 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
   const auto ker_width = ker_shape.dim(2);
 
   const auto stride = node.param().stride;
-  const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
-                                            ker_width, ker_height);
+  const auto padding =
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
   auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
-      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
-      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+    ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+    ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -214,11 +228,11 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
   auto block_size = node.param().block_size;
   assert(block_size > 0);
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
-      input_tensor->handle(), output_tensor->handle(), block_size);
+    input_tensor->handle(), output_tensor->handle(), block_size);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -232,34 +246,35 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
   const auto ker_width = ker_shape.dim(2);
 
   const auto stride = node.param().stride;
-  const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
-                                            ker_width, ker_height);
+  const auto dilation = node.param().dilation;
+  const auto padding =
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
+  const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
 
-  {
-    auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
-        ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
-        conv_info, multiplier, act_info);
+  auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+    ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+    conv_info, multiplier, act_info, dilation_info);
 
-    _return_fn = asAclFunction(std::move(fn));
-  }
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Concat &node)
@@ -282,26 +297,26 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     return;
   }
 
-  auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  std::vector<::arm_compute::ITensor *> input_tensors;
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
+  std::vector<const ::arm_compute::ITensor *> input_tensors;
   for (const auto &ifm_ind : input_indexes)
     input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
 
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (input_indexes.size() < 2)
   {
-    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
-                                                        output_tensor->handle());
+    ::arm_compute::ITensor *input_tesor = _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
+    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tesor, output_tensor->handle());
   }
   else
   {
     const auto rank = _ctx.at(ofm_index).shape().rank();
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
-        acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
+      acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
     fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
-        input_tensors, output_tensor->handle(), fixed_axis);
+      input_tensors, output_tensor->handle(), fixed_axis);
   }
 
   _return_fn = asAclFunction(std::move(fn));
@@ -312,27 +327,15 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
-  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
-      node.param().op_type, node.param().alpha, node.param().beta);
+  const ::arm_compute::ActivationLayerInfo act_info =
+    acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
 
-  std::unique_ptr<arm_compute::IFunction> fn;
-  if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC)
-  {
-    // NOTE NEActivationLayer can generate produce erroneous results. it were caused by
-    // 'vexpq_f32()'.
-    // The neon function returns a value outside of the limit of representation in float as 'NaN'
-    // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
-    fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>(
-        ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-  }
-  else
-  {
-    fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
-                                                                   ofm_tensor->handle(), act_info);
-  }
+  std::unique_ptr<arm_compute::IFunction> fn =
+    acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+                                                              ofm_tensor->handle(), act_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -343,9 +346,9 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
 
   std::unique_ptr<arm_compute::IFunction> fn;
   switch (node.param().op_type)
@@ -353,25 +356,25 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
     {
       fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
       break;
     }
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
     {
       fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
       break;
     }
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
     {
       fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
       break;
     }
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
     {
       fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
-          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
       break;
     }
     default:
@@ -390,8 +393,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   std::unique_ptr<arm_compute::IFunction> fn;
   switch (node.param().op_type)
@@ -399,10 +402,10 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
     case ir::operation::ElementwiseUnary::Type::ABS:
     {
       const ::arm_compute::ActivationLayerInfo act_info{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
 
       fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
-          input_tensor->handle(), output_tensor->handle(), act_info);
+        input_tensor->handle(), output_tensor->handle(), act_info);
       break;
     }
     case ir::operation::ElementwiseUnary::Type::CAST:
@@ -412,10 +415,15 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
         fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
                                                             output_tensor->handle());
       }
+      else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
+      {
+        fn = acl_common::generateLayer<arm_compute::NECastBool>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      }
       else
       {
         fn = acl_common::generateLayer<arm_compute::NECast>(
-            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+          input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
       }
       break;
     }
@@ -458,10 +466,10 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
     case ir::operation::ElementwiseUnary::Type::SQRT:
     {
       const ::arm_compute::ActivationLayerInfo act_info{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
 
       fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
-          input_tensor->handle(), output_tensor->handle(), act_info);
+        input_tensor->handle(), output_tensor->handle(), act_info);
       break;
     }
     default:
@@ -480,12 +488,12 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
   const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
   const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
-  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+  auto values_tensor = _tensor_reg->getAclTensor(values_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
-      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+    values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -493,14 +501,17 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
   const auto activation = node.param().activation;
+  if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
+    throw std::runtime_error(
+      "KernelGenerator(acl_neon): FullyConnected 16x1Float32 weights is not supported.");
 
   auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
                                                 ::arm_compute::NEFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+    node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
+    std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -512,16 +523,16 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
 
-  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
-  auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
-  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
+  auto values_tensor = _tensor_reg->getAclTensor(values_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
-      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-      output_tensor->handle(), hits_tensor->handle());
+    lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+    output_tensor->handle(), hits_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -539,9 +550,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   // Converting in reverse order
   const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
   const auto backend_layout = ofm_tensor->layout();
   UNUSED_RELEASE(backend_layout);
 
@@ -555,7 +566,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
   assert(backend_layout == ifm_tensor->layout());
   assert(backend_layout == indices_tensor->layout());
-  assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+  assert(ifm_rank < 4 || _current_layout == backend_layout);
 
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
@@ -567,24 +578,26 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   if (n != ifm_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-    const auto ifm = _ctx.at(ifm_index);
-    ifm_tensor->info()->set_tensor_shape(
-        acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
+    acl_common::disableDimCorrection(ifm_tensor);
   }
   if (k != indices_tensor->info()->num_dimensions())
   {
     // This means that high dimension's value is 1 and indices tensor is applied dim_correction
-    const auto indices = _ctx.at(indices_index);
-    indices_tensor->info()->set_tensor_shape(
-        acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
+    acl_common::disableDimCorrection(indices_tensor);
   }
 
   auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
-      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+    ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
 
-  // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
-  // use arm_compute::TensorInfo::offset_element_in_bytes()
-  // It would create an error when the kernel accesses high dimension that its value is 1
+  // Revert disabling applied dim_correction
+  if (ifm_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(ifm_tensor);
+  }
+  if (indices_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(indices_tensor);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -596,19 +609,19 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
-  auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index);
+  auto beta_tensor = _tensor_reg->getAclTensor(beta_index);
   auto epsilon = node.param().epsilon;
   auto activation = node.param().activation;
 
   auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
-      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
-      epsilon);
+    ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+    epsilon);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -625,19 +638,19 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   // TODO Support optional constant dimension that normalization would be performed on
   const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
   int32_t radius =
-      2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
-  float alpha = 1.0f;                            // In the implementation to make alpha_ become 1
-  float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
-  float bias = 0.0f;                             // Don't offset the reduction.
+    2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+  float alpha = 1.0f;                          // In the implementation to make alpha_ become 1
+  float beta = 0.5f;                           // pow(reduction, -0.5) = 1 / sqrt(reduction)
+  float bias = 0.0f;                           // Don't offset the reduction.
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                radius, alpha, beta, bias, false);
 
   auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -646,21 +659,21 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
 {
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{
-      node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+    node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
 
   auto radius = node.param().radius;
   auto alpha = node.param().alpha;
   auto beta = node.param().beta;
   auto bias = node.param().bias;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(
-      ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+    ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
 
   auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -682,13 +695,13 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : node.getInputs())
     input_indexes.emplace_back(input_index);
 
-  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index)->handle();
   std::vector<arm_compute::ITensor *> inputs;
   for (const auto &input_index : input_indexes)
     inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
+  const auto frontend_layout = _current_layout;
+  const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
 
   if (axis < 0)
     axis += output_rank;
@@ -697,22 +710,25 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   // Disable applied dim_correction
   for (const auto &input_index : input_indexes)
   {
-    size_t input_rank = _ctx.at(input_index).shape().rank();
     const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
-    assert(input_rank == input_tensor->num_dimensions());
-    if (input_rank != input_tensor->info()->num_dimensions())
+    if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
     {
-      // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
-          _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
+      // This means that high dimension's value is 1 and input tensor is applied dim_correction
+      acl_common::disableDimCorrection(input_tensor);
     }
   }
 
   auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
 
-  // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
-  // use arm_compute::TensorInfo::offset_element_in_bytes()
-  // It would create an error when the kernel accesses high dimension that its value is 1
+  // Revert disabling applied dim_correction
+  for (const auto &input_index : input_indexes)
+  {
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
+    if (input_tensor->dimension(0) == 1)
+    {
+      acl_common::enableDimCorrection(input_tensor);
+    }
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -727,8 +743,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   auto rank = _ctx.at(input_index).shape().rank();
   auto pad_base = _ctx.at(pad_index).data()->base();
 
-  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
-  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index)->handle();
+  auto output = _tensor_reg->getAclTensor(output_index)->handle();
 
   ::arm_compute::PaddingList padding_list;
   padding_list.resize(rank);
@@ -736,10 +752,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   {
     const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
 
-    const auto frontend_layout = _current_op_seq_layout;
-    const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
+    const auto frontend_layout = _current_layout;
+    const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
     const auto axis =
-        acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
+      acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
     padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
   }
 
@@ -747,12 +763,12 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   UNUSED_RELEASE(input_type);
   assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
   assert(input->info()->quantization_info() ==
-         ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()));
+         ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point()));
   const auto pixel_value =
-      ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
+    ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
 
   auto fn =
-      acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
+    acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -760,15 +776,14 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
 void KernelGenerator::visit(const ir::operation::Pool2D &node)
 {
   auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_reg, _current_op_seq_layout,
-      acl_common::convertPoolType(node.param().op_type));
+    node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
 
   const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   const auto activation = node.param().activation;
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+    asAclFunction(std::move(raw_fn)),
+    ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -776,8 +791,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(0)};
   const auto permute_type = node.getPermuteType();
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
   const auto rank = _ctx.at(ofm_idx).shape().rank();
   assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
 
@@ -812,12 +827,12 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
   const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
   const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
-      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+    ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -828,16 +843,16 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
   const auto input_rank = _ctx.at(input_index).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = input_tensor->layout();
   const auto reduce_axes =
-      acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
+    acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
   const auto reduce_type = node.param().reduce_type;
   const auto keep_dims = node.param().keep_dims;
 
@@ -855,8 +870,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   else
   {
     fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
-        input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
-        acl_common::convertReduceType(reduce_type));
+      input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+      acl_common::convertReduceType(reduce_type));
   }
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -866,12 +881,12 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
          frontend_layout == backend_layout);
@@ -887,16 +902,17 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NEScale>(
-      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
-      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
-      ::arm_compute::SamplingPolicy::TOP_LEFT);
+    ifm_tensor->handle(), ofm_tensor->handle(),
+    ::arm_compute::ScaleKernelInfo{::arm_compute::InterpolationPolicy::BILINEAR,
+                                   ::arm_compute::BorderMode::REPLICATE,
+                                   ::arm_compute::PixelValue(0.f),
+                                   ::arm_compute::SamplingPolicy::TOP_LEFT, false /*use padding*/});
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -905,35 +921,35 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
 {
   const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
   const auto hidden_state_out_index{
-      node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+    node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
 
   const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
   const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
   const auto recurrent_weights_index{
-      node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+    node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
   const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
   const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
 
   const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
 
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
-  auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
-  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
-  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
   auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
 
   auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
-      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+    hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
   _return_fn = asAclFunction(std::move(copy_layer));
 
   auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
-      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+    hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
   _return_fn = asAclFunction(std::move(fn));
 }
 
@@ -949,8 +965,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   (void)dims;
   (void)ndim;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
   auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
                                                                    output_tensor->handle());
   _return_fn = asAclFunction(std::move(fn));
@@ -962,24 +978,13 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = input_tensor->layout();
-
-  // Disable applied dim_correction
-  const size_t input_rank = _ctx.at(input_index).shape().rank();
-  if (input_rank != input_tensor->info()->num_dimensions())
-  {
-    // This means that high dimension's value is 1 and input tensor is applied dim_correction
-    const auto input = _ctx.at(input_index);
-    input_tensor->info()->set_tensor_shape(
-        acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
-  }
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
+  // NOTE NESoftmaxLayer's default axis is -1
   auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-      output_tensor->handle(), beta);
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    output_tensor->handle(), beta);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -989,20 +994,20 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
   const auto block_size_index{
-      node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+    node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
-  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
 
   assert(_ctx.at(block_size_index).data());
   assert(_ctx.at(paddings_index).data());
 
   auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
-      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-      ofm_tensor->handle());
+    ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+    ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1014,11 +1019,11 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
   auto block_size = node.param().block_size;
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
-      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+    ifm_tensor->handle(), ofm_tensor->handle(), block_size);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1027,28 +1032,33 @@ void KernelGenerator::visit(const ir::operation::Split &node)
 {
   // TODO Support this op by SubTensor
   const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
 
   assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+  if (!_ctx.at(axis_index).isConstant())
+  {
+    throw std::runtime_error("Non-constant axis_index NYI for acl_neon backend");
+  }
 
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
   std::vector<ir::OperandIndex> output_indexes;
   for (const auto &output : node.getOutputs())
     output_indexes.emplace_back(output);
 
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   std::vector<arm_compute::ITensor *> output_tensors;
   for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
-  auto axis = node.param().axis;
+  auto axis = _ctx.at(axis_index).asScalar<int32_t>();
   if (axis < 0)
     axis += ifm_rank;
   axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
 
   auto fn =
-      acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
+    acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1059,12 +1069,12 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
-      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+    lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1076,9 +1086,9 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -1108,7 +1118,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
     {
       auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
                                                                  backend_layout)
-                      .value();
+                    .value();
 
       int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
       starts[axis] = begin_value;
@@ -1128,7 +1138,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   }
 
   auto fn = acl_common::generateLayer<arm_compute::NESlice>(
-      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1141,9 +1151,9 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -1180,7 +1190,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
     {
       auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
                                                                  backend_layout)
-                      .value();
+                    .value();
 
       int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
       starts[axis] = start_value;
@@ -1198,7 +1208,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
   const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
   const auto shrink_axis_mask =
-      acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
+    acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
 
   ::arm_compute::Coordinates starts_set;
   ::arm_compute::Coordinates ends_set;
@@ -1211,9 +1221,23 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
     strides_set.set(i, strides[i]);
   }
 
+  // Disable applied dim_correction
+  if (static_cast<size_t>(inputData_tensor->getShape().rank()) !=
+      inputData_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    acl_common::disableDimCorrection(inputData_tensor);
+  }
+
   auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
-      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
-      begin_mask, end_mask, shrink_axis_mask);
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+    begin_mask, end_mask, shrink_axis_mask);
+
+  // Revert disabling applied dim_correction
+  if (inputData_tensor->getShape().dim(0) == 1)
+  {
+    acl_common::enableDimCorrection(inputData_tensor);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1224,9 +1248,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
   const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
   const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
 
   const auto stride = node.param().stride;
 
@@ -1240,19 +1264,19 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
   if (node.param().padding.type == ir::PaddingType::VALID)
   {
     invalid_horizontal =
-        ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+      ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
     invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
   }
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
 
   const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
 
   auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
-      ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
-      invalid_horizontal, invalid_vertical);
+    ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+    invalid_horizontal, invalid_vertical);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1261,26 +1285,43 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 {
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
-  const auto &perm{node.param().perm};
+  const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
 
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
-  const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
-  const auto frontend_layout = _current_op_seq_layout;
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+  const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
-
   const auto rank = _ctx.at(ifm_idx).shape().rank();
-  std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
-  auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
-      rank, pv, frontend_layout, backend_layout);
 
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
+  const auto &perms = _ctx.at(perm_idx);
+  std::vector<int32_t> pv;
+  if (perms.shape() == ir::Shape{0})
+  {
+    pv.resize(rank);
+    std::iota(pv.begin(), pv.end(), 0);
+    std::reverse(pv.begin(), pv.end());
+  }
+  else
+  {
+    pv = _ctx.at(perm_idx).asVector<int32_t>();
+  }
+
+  std::unique_ptr<arm_compute::IFunction> fn;
+  if (rank == 1)
+  {
+    fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
+  }
+  else if (rank == 2)
   {
+    assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
     fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
                                                              ofm_tensor->handle());
   }
   else
   {
+    auto backend_pv =
+      acl_common::getARMComputePermutationVector(rank, pv, frontend_layout, backend_layout);
+
     fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
                                                            ofm_tensor->handle(), backend_pv);
   }
@@ -1298,34 +1339,33 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : node.getOutputs())
     output_indexes.emplace_back(output_index);
 
-  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
   std::vector<arm_compute::ITensor *> outputs;
   for (const auto &output_index : output_indexes)
     outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
+  const auto frontend_layout = _current_layout;
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
   if (axis < 0)
     axis += input_rank;
   axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
 
   // Disable applied dim_correction
-  std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
-  for (const auto &output_index : output_indexes)
+  if (static_cast<size_t>(input_tensor->getShape().rank()) !=
+      input_tensor->info()->num_dimensions())
   {
-    size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
-    orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
-    assert(output_rank == output_tensor->num_dimensions());
-    if (output_rank != output_tensor->info()->num_dimensions())
-    {
-      // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
-          _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
-    }
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    acl_common::disableDimCorrection(input_tensor);
   }
 
-  auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
+  auto fn =
+    acl_common::generateLayer<arm_compute::NEUnstack>(input_tensor->handle(), outputs, axis);
+
+  // Revert disabling applied dim_correction
+  if (input_tensor->getShape().dim(0) == 1)
+  {
+    acl_common::enableDimCorrection(input_tensor);
+  }
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1335,8 +1375,8 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
                                                                    output_tensor->handle());
@@ -1352,13 +1392,13 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
 
   const auto comparison_type = node.param().comparison_type;
 
-  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
-  auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
-  auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
 
   auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
-      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-      (arm_compute::ComparisonOperation)comparison_type);
+    input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+    (arm_compute::ComparisonOperation)comparison_type);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1370,17 +1410,22 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
   const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
   const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
-  const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
-  auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
-  auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
-  auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
-  auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+  auto output_tensor = _tensor_reg->getAclTensor(out_idx);
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
+  auto depth_tensor = _tensor_reg->getAclTensor(depth_idx);
+  auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
+  auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
+
+  const size_t output_rank = _ctx.at(out_idx).shape().rank();
+  const auto frontend_layout = _current_layout;
+  const auto backend_layout = output_tensor->layout();
+  int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
+  axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
-      indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
-      offvalue_tensor->handle(), output_tensor->handle(), axis);
+  auto fn = acl_common::generateLayer<arm_compute::NEOneHot>(
+    indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+    offvalue_tensor->handle(), output_tensor->handle(), axis);
   _return_fn = asAclFunction(std::move(fn));
 }
 
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index 4d269cde5..0ccf21328 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
 
-#include <backend/IKernelGenerator.h>
+#include <backend/basic/KernelGeneratorBase.h>
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
@@ -31,25 +31,28 @@ namespace backend
 namespace acl_neon
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public basic::KernelGeneratorBase
 {
 public:
-  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
                   const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
-  void visit(const ir::OpSequence &) override;
-  void visit(const ir::operation::ArgMax &) override;
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
+  void visit(const ir::operation::ArgMinMax &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
   void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::EmbeddingLookup &) override;
+  void visit(const ir::operation::ExpandDims &) override;
   void visit(const ir::operation::FullyConnected &) override;
   void visit(const ir::operation::Gather &) override;
   void visit(const ir::operation::HashtableLookup &) override;
@@ -57,36 +60,34 @@ public:
   void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::LocalResponseNormalization &) override;
   void visit(const ir::operation::LSTM &) override;
+  void visit(const ir::operation::OneHot &) override;
   void visit(const ir::operation::Pack &) override;
   void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Permute &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::PReLU &) override;
   void visit(const ir::operation::Reduce &) override;
   void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
   void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
   void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Squeeze &) override;
   void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::ExpandDims &) override;
-  void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::OneHot &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
+  const ir::Layout _current_layout;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
-  ir::Layout _current_op_seq_layout;
 };
 
 } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
index ac80901cc..f207ca8cb 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -16,12 +16,12 @@
 
 #include "Optimizer.h"
 
-#include "ParentInfo.h"
+#include <AclSubTensorAnalyzer.h>
 
-#include <cassert>
 #include <compiler/LoweredGraph.h>
 #include <util/logging.h>
-#include "AclSubTensorAnalyzer.h"
+
+#include <cassert>
 
 namespace onert
 {
@@ -31,8 +31,8 @@ namespace acl_neon
 {
 
 Optimizer::Optimizer(BackendContext *context)
-    : _context{context},
-      _tensor_builder{std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
+  : _context{context}, _tensor_builder{
+                         std::dynamic_pointer_cast<TensorBuilder>(context->tensor_builder)}
 {
   assert(context);
 }
@@ -42,14 +42,12 @@ void Optimizer::optimize()
   // Concat elimination (build subtensor info)
   {
     acl_common::AclSubTensorAnalyzer sa{*_context->graph()};
-    for (auto op_info : _context->operation_list())
-    {
-      auto &op = _context->graph()->operations().at(op_info.index);
-      sa.setLayout(op_info.layout);
-      op.accept(sa);
-    }
-
-    _tensor_builder->parent_map(sa.releaseParentMap());
+    sa.setUsePadding();
+    _context->graph()->operations().iterate(
+      [&](const ir::OperationIndex &, const ir::IOperation &op) {
+        sa.setLayout(_context->graph()->layout());
+        op.accept(sa);
+      });
   }
 }
 
diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h
index 5fe0d519c..b8fb343e9 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.h
+++ b/runtime/onert/backend/acl_neon/Optimizer.h
@@ -17,8 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
 #define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
 
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
 #include "TensorBuilder.h"
 
 namespace onert
@@ -28,12 +27,12 @@ namespace backend
 namespace acl_neon
 {
 
-class Optimizer : public IOptimizer
+class Optimizer
 {
 public:
   Optimizer(BackendContext *context);
 
-  void optimize() override;
+  void optimize();
 
 private:
   BackendContext *_context;
diff --git a/runtime/onert/backend/acl_neon/TensorBuilder.h b/runtime/onert/backend/acl_neon/TensorBuilder.h
index 070dc20ac..7b6e8406b 100644
--- a/runtime/onert/backend/acl_neon/TensorBuilder.h
+++ b/runtime/onert/backend/acl_neon/TensorBuilder.h
@@ -30,7 +30,7 @@ namespace acl_neon
 {
 
 using TensorBuilder =
-    acl_common::AclTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>;
+  acl_common::AclTensorBuilder<operand::INETensor, operand::NETensor, operand::NESubTensor>;
 
 } // namespace acl_neon
 } // namespace backend
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h
index 3b7cfbcfd..5ecc0fbb3 100644
--- a/runtime/onert/backend/acl_neon/TensorManager.h
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -41,16 +41,16 @@ namespace acl_neon
 {
 
 using MemoryManager =
-    acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>;
+  acl_common::AclMemoryManager<operand::INETensor, operand::NETensor, operand::NESubTensor>;
 
 using LinearMemoryManager = acl_common::AclLinearMemoryManager<
-    operand::INETensor, operand::NETensor, operand::NESubTensor,
-    ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
-    ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator, ::arm_compute::MemoryGroup>;
+  operand::INETensor, operand::NETensor, operand::NESubTensor, ::arm_compute::MemoryManagerOnDemand,
+  ::arm_compute::PoolManager, ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator,
+  ::arm_compute::MemoryGroup>;
 
 using InternalBufferManager = acl_common::AclInternalBufferManager<
-    ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
-    ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>;
+  ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
+  ::arm_compute::OffsetLifetimeManager, ::arm_compute::Allocator>;
 
 using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
                                                    operand::NESubTensor>;
diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc
index f490d132d..6535fb291 100644
--- a/runtime/onert/backend/acl_neon/acl_neon.cc
+++ b/runtime/onert/backend/acl_neon/acl_neon.cc
@@ -14,20 +14,11 @@
  * limitations under the License.
  */
 
-#include <util/logging.h>
-
 #include "Backend.h"
 
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'acl_neon' loaded\n";
-  return new onert::backend::acl_neon::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/acl_neon/operand/INETensor.h b/runtime/onert/backend/acl_neon/operand/INETensor.h
index db0ce6fdc..3747b12b7 100644
--- a/runtime/onert/backend/acl_neon/operand/INETensor.h
+++ b/runtime/onert/backend/acl_neon/operand/INETensor.h
@@ -33,6 +33,7 @@ namespace operand
 class INETensor : public acl_common::IACLTensor
 {
 public:
+  INETensor(size_t rank) : IACLTensor{rank} {}
   const arm_compute::ITensor *handle() const override = 0;
   arm_compute::ITensor *handle() override = 0;
   void access(const std::function<void(ITensor &tensor)> &fn) final;
diff --git a/runtime/onert/backend/acl_neon/operand/NESubTensor.cc b/runtime/onert/backend/acl_neon/operand/NESubTensor.cc
index 457addd55..fe82f6206 100644
--- a/runtime/onert/backend/acl_neon/operand/NESubTensor.cc
+++ b/runtime/onert/backend/acl_neon/operand/NESubTensor.cc
@@ -27,9 +27,8 @@ namespace operand
 
 NESubTensor::NESubTensor(INETensor *parent, const arm_compute::TensorShape &tensor_shape,
                          const arm_compute::Coordinates &coords, size_t rank, bool extend_parent)
-    : _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(parent->handle(), tensor_shape,
-                                                              coords, extend_parent)),
-      _rank{rank}
+  : INETensor{rank}, _ne_sub_tensor(std::make_shared<arm_compute::SubTensor>(
+                       parent->handle(), tensor_shape, coords, extend_parent))
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/backend/acl_neon/operand/NESubTensor.h b/runtime/onert/backend/acl_neon/operand/NESubTensor.h
index e7f77d7fc..74dbe9011 100644
--- a/runtime/onert/backend/acl_neon/operand/NESubTensor.h
+++ b/runtime/onert/backend/acl_neon/operand/NESubTensor.h
@@ -39,19 +39,16 @@ public:
               const arm_compute::Coordinates &coords, size_t rank, bool extend_parent = false);
 
 public:
-  size_t num_dimensions() const final { return _rank; }
-
-public:
   const arm_compute::SubTensor *handle() const override;
   arm_compute::SubTensor *handle() override;
 
 public:
   // This method is used to prevent the use of memcpy for SubTensor
   bool has_padding() const override { return true; }
+  bool is_subtensor() const final { return true; }
 
 private:
   std::shared_ptr<arm_compute::SubTensor> _ne_sub_tensor;
-  size_t _rank;
 };
 
 } // namespace operand
diff --git a/runtime/onert/backend/acl_neon/operand/NETensor.cc b/runtime/onert/backend/acl_neon/operand/NETensor.cc
index 53dbb3021..4b237d731 100644
--- a/runtime/onert/backend/acl_neon/operand/NETensor.cc
+++ b/runtime/onert/backend/acl_neon/operand/NETensor.cc
@@ -28,7 +28,7 @@ namespace operand
 {
 
 NETensor::NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses)
-    : _ne_tensor(std::make_shared<arm_compute::Tensor>()), _rank{rank}, _num_uses{num_uses}
+  : INETensor{rank}, _ne_tensor(std::make_shared<arm_compute::Tensor>()), _num_uses{num_uses}
 {
   allocator()->init(info);
 }
diff --git a/runtime/onert/backend/acl_neon/operand/NETensor.h b/runtime/onert/backend/acl_neon/operand/NETensor.h
index 0dd81afec..69f8b2111 100644
--- a/runtime/onert/backend/acl_neon/operand/NETensor.h
+++ b/runtime/onert/backend/acl_neon/operand/NETensor.h
@@ -40,9 +40,6 @@ public:
   NETensor(const arm_compute::TensorInfo &info, size_t rank, size_t num_uses);
 
 public:
-  size_t num_dimensions() const final { return _rank; }
-
-public:
   const arm_compute::Tensor *handle() const override;
   arm_compute::Tensor *handle() override;
   size_t num_uses() const { return _num_uses; }
@@ -52,7 +49,6 @@ public:
 
 private:
   std::shared_ptr<arm_compute::Tensor> _ne_tensor;
-  size_t _rank;
   size_t _num_uses;
 };
 
diff --git a/runtime/onert/backend/cl_common/CMakeLists.txt b/runtime/onert/backend/cl_common/CMakeLists.txt
new file mode 100644
index 000000000..c75129696
--- /dev/null
+++ b/runtime/onert/backend/cl_common/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB_RECURSE SOURCES "src/*.cc")
+
+add_library(${LIB_ONERT_BACKEND_CL_COMMON} STATIC ${SOURCES})
+
+target_include_directories(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+set_target_properties(${LIB_ONERT_BACKEND_CL_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC onert_core)
diff --git a/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
new file mode 100644
index 000000000..06aafa1b9
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <ir/Index.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandIndexSequence.h>
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+// TODO Find better way to handle common code (reduce template)
+template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator>
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
+                                                                                    kernel_gen}
+  {
+  }
+
+  FunctionMap genKernels() override
+  {
+    FunctionMap ret;
+
+    // kernel_gen
+    for (auto &&op_ind : _data.op_order)
+    {
+      auto fn_seq = kernel_gen->generate(op_ind);
+      ret.emplace_back(op_ind, std::move(fn_seq));
+    }
+
+    tensor_builder->allocate();
+    initConsts();
+
+    // NOTE For memory optimization, we want to free some operand data
+    const_cast<ir::Graph &>(*_data.graph)
+      .operands()
+      .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+    for (auto &&it : ret)
+    {
+      auto &fn_seq = it.second;
+      fn_seq->iterate([&](exec::IFunction &ifunc) {
+        ifunc.prepare();
+        tensor_builder->postFunctionPrepare();
+      });
+    }
+
+    return ret;
+  }
+
+protected:
+  void initConsts()
+  {
+    _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &op) {
+      constant_initializer->setLayout(graph()->layout());
+      op.accept(*constant_initializer);
+    });
+
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      if (_data.external_operands.contains(ind) || !operand.isConstant())
+        return;
+      const auto &obj = graph()->operands().at(ind);
+      if (obj.isConstant() && !constant_initializer->exist(ind))
+      {
+        constant_initializer->registerDefaultInitializer(ind, obj);
+      }
+    });
+
+    constant_initializer->run();
+  }
+
+  virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                  ir::Layout backend_layout) = 0;
+
+  void planTensors()
+  {
+    ir::OperandIndexMap<uint32_t> uses_map;
+    ir::OperandIndexMap<uint32_t> def_map;
+    ir::OperandIndexSequence constants;
+
+    // Prepare scanning
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      if (_data.external_operands.contains(ind))
+        return;
+
+      uses_map[ind] = obj.getUses().size();
+      def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+      if (obj.isConstant())
+        constants.append(ind);
+
+      if (!tensor_builder->isRegistered(ind))
+      {
+        // These tensors do not exist in any operation (No use and def)
+        const auto &info = obj.info();
+        const auto layout = _data.operand_layouts.at(ind);
+        // TODO Change tensor info to have permuted shape
+        registerTensorInfo(ind, info, layout);
+      }
+    });
+
+    // Start scanning to do notify{First|Last}Use for each tensor
+
+    // If a tensor is a constant, increase the use of the tensor and allocate it first.
+    // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+    // deallocated last.
+    VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+    for (const auto &ind : constants)
+    {
+      uses_map[ind]++;
+      tensor_builder->notifyFirstUse(ind);
+    }
+
+    // At each operation,
+    // 1. Scan DEF of outputs. If the DEF, allocate it
+    // 2. Scan DEF of inputs. If variable tensor, allocate it
+    // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+    for (const auto &op_ind : _data.op_order)
+    {
+      const auto &op = graph()->operations().at(op_ind);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      // Define outputs
+      for (const auto &ind : op_outputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(def_map.find(ind) != def_map.end());
+        if (def_map[ind])
+        {
+          def_map[ind] = 0;
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      // Scan variable tensors
+      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+      // non-constant because of less memory usage by memory planning in here
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        const auto &operand = graph()->operands().at(ind);
+        if (operand.info().isVariable())
+        {
+          // The variable tensor with buffer is not supported yet
+          assert(operand.data() == nullptr);
+          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+          assert(uses_map[ind] == 1 && def_map[ind] == 0);
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0)
+        {
+          // plan for deallocation of static tensornode
+          tensor_builder->notifyLastUse(ind);
+        }
+      }
+    }
+
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+      if (uses_map[ind] == 0)
+      {
+        tensor_builder->notifyLastUse(ind);
+      }
+    });
+
+    // Dispose and validate
+    for (const auto &ind : constants)
+    {
+      --uses_map[ind];
+      if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+      {
+        tensor_builder->notifyLastUse(ind);
+      }
+    }
+
+    assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+    assert(
+      std::all_of(def_map.begin(), def_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+  }
+
+public:
+  // TODO Make it protected
+  std::shared_ptr<T_TensorBuilder> tensor_builder;
+  std::shared_ptr<T_ConstantInitializer> constant_initializer;
+  std::shared_ptr<T_KernelGenerator> kernel_gen;
+};
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h
new file mode 100644
index 000000000..5fe5eec79
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
+#define __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
+
+#include "cl_common/ParentInfo.h"
+
+#include <ir/OperandIndexMap.h>
+
+#include <map>
+#include <vector>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+// TODO Abstract UserType into LifetimeMap and LifetimeSeq
+enum class UsesType
+{
+  FIRST,
+  LAST
+};
+
+// TODO Define class or struct for LifetimeMap and LifetimeSeq
+using LifetimeMap = std::map<size_t, std::pair<UsesType, ir::OperandIndex>>;
+using LifetimeSeq = std::vector<std::pair<UsesType, ir::OperandIndex>>;
+
+LifetimeMap createLifetimeMap(LifetimeSeq &seq, ir::OperandIndexMap<ParentInfo> &parent_map);
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
diff --git a/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h
new file mode 100644
index 000000000..510211cb7
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
+#define __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
+
+#include <ir/Index.h>
+#include <ir/Coordinates.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+/**
+ * @brief	Struct to represent parent operand in child operand
+ */
+struct ParentInfo
+{
+  ir::OperandIndex parent;
+  ir::Layout frontend_layout;
+  ir::Coordinates coordinates;
+};
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
diff --git a/runtime/onert/backend/cl_common/src/LifetimeMap.cc b/runtime/onert/backend/cl_common/src/LifetimeMap.cc
new file mode 100644
index 000000000..0b17c58fb
--- /dev/null
+++ b/runtime/onert/backend/cl_common/src/LifetimeMap.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cl_common/LifetimeMap.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+LifetimeMap createLifetimeMap(LifetimeSeq &lifetime_seq,
+                              ir::OperandIndexMap<ParentInfo> &parent_map)
+{
+  // Update lifetime sequence to apply subtensor optimization
+  std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
+  std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
+    [&](ir::OperandIndex ind) -> ir::OperandIndex & {
+    ir::OperandIndex &ret = root_map[ind];
+
+    // We know the root parent value already
+    if (ret.valid())
+      return ret;
+
+    auto itr = parent_map.find(ind);
+    if (itr == parent_map.end())
+    {
+      // If there is no parent, let's store the value of itself
+      return ret = ind;
+    }
+    else
+    {
+      return ret = find_root(itr->second.parent);
+    }
+  };
+
+  ir::OperandIndexMap<bool> first_use_check;
+  ir::OperandIndexMap<bool> last_use_check;
+  LifetimeMap lifetime_map;
+  for (size_t i = 0; i < lifetime_seq.size(); i++)
+  {
+    auto &entry = lifetime_seq[i];
+    if (entry.first != UsesType::FIRST)
+      continue;
+    auto root_ind = find_root(entry.second);
+    if (first_use_check[root_ind])
+      continue;
+    first_use_check[root_ind] = true;
+    lifetime_map[i] = {UsesType::FIRST, root_ind};
+  }
+
+  for (int i = lifetime_seq.size() - 1; i >= 0; i--)
+  {
+    auto &entry = lifetime_seq[i];
+    if (entry.first != UsesType::LAST)
+      continue;
+    auto root_ind = find_root(entry.second);
+    if (last_use_check[root_ind])
+      continue;
+    last_use_check[root_ind] = true;
+    lifetime_map[i] = {UsesType::LAST, root_ind};
+  }
+
+  return lifetime_map;
+}
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index fc8574b26..398c188a8 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -19,7 +19,6 @@
 
 #include "BackendContext.h"
 #include "Config.h"
-#include "ConstantInitializer.h"
 #include "KernelGenerator.h"
 
 #include <backend/Backend.h>
@@ -40,22 +39,17 @@ public:
 
   std::shared_ptr<IConfig> config() const override { return _config; }
 
-  std::unique_ptr<onert::backend::BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
-             bool) const override
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
   {
-    const auto &operands = graph.operands();
-    const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto custom_kernel_builder = data.custom_kernel_builder;
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
     auto tb = std::make_shared<TensorBuilder>(tr);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
                                                             context->external_context());
-    context->tensor_register = nullptr;
-    context->optimizer = nullptr;
     return context;
   }
 
diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
new file mode 100644
index 000000000..45de6b972
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap ret;
+
+  for (auto &&op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  basic::initConsts(*this);
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &&it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index e90b21054..69ab30c82 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -18,6 +18,8 @@
 #define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
 
 #include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
 #include "ExternalContext.h"
 
 namespace onert
@@ -30,22 +32,25 @@ namespace cpu
 class BackendContext : public onert::backend::BackendContext
 {
 public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
+  BackendContext(const Backend *backend, ContextData &&data,
                  std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
-                 std::shared_ptr<ITensorRegister> tensor_register = nullptr,
-                 std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
-                                       constant_initializer, kernel_gen, tensor_register,
-                                       optimizer),
-        _external_context(new ExternalContext)
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(new ExternalContext)
   {
   }
 
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
+
   std::shared_ptr<ExternalContext> external_context() { return _external_context; }
 
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
 private:
   // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
   //      the thread pool is also created in duplicate
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
index 01a3cd178..1383263e7 100644
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -1,18 +1,24 @@
-set(LIB_ONERT_BACKEND_CPU onert_backend_cpu)
-
 nnfw_find_package(Ruy REQUIRED)
 
 file(GLOB_RECURSE SOURCES "*.cc")
 
 add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
 
-target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker)
+target_include_directories(${LIB_ONERT_BACKEND_CPU} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker nnfw_lib_misc)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ndarray)
 
 set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
+set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_CPU} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_CPU}>)
+endif()
 
 install(TARGETS ${LIB_ONERT_BACKEND_CPU} DESTINATION lib)
diff --git a/runtime/onert/backend/cpu/Config.cc b/runtime/onert/backend/cpu/Config.cc
index 3ace47f5d..f80c2caf1 100644
--- a/runtime/onert/backend/cpu/Config.cc
+++ b/runtime/onert/backend/cpu/Config.cc
@@ -25,7 +25,7 @@ namespace cpu
 
 bool Config::initialize() { return true; }
 
-ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout) { return ir::Layout::NHWC; }
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/Config.h b/runtime/onert/backend/cpu/Config.h
index 37e49581a..841a839d1 100644
--- a/runtime/onert/backend/cpu/Config.h
+++ b/runtime/onert/backend/cpu/Config.h
@@ -33,7 +33,7 @@ class Config : public IConfig
 public:
   std::string id() override { return "cpu"; }
   bool initialize() override;
-  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
   bool supportPermutation() override { return true; }
   bool supportDynamicTensor() override { return true; }
   bool supportFP16() override { return false; }
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
deleted file mode 100644
index 6f6eb77bc..000000000
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ConstantInitializer.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
-{
-  // DO NOTHING
-}
-
-void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
-                                                     const ir::Operand &obj)
-{
-  registerExternalInitializer(index, obj);
-}
-
-void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
-                                                      const ir::Operand &obj)
-{
-  // For only CONSTANTS
-  // TODO Add to check if tensor has been allocated
-  if (!obj.isConstant())
-    return;
-
-  _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
-    auto data = model_obj.shareData();
-    assert(data && data->base());
-    ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
-    tensor.setData(data);
-  };
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerExternalInitializer(kernel_index, kernel_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
-  const auto &bias_obj = _operands.at(bias_index);
-  registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerExternalInitializer(kernel_index, kernel_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
-  const auto &bias_obj = _operands.at(bias_index);
-  registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
-  const auto &weight_obj = _operands.at(weight_index);
-  registerExternalInitializer(weight_index, weight_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
-  if (!bias_index.undefined())
-  {
-    const auto &bias_obj = _operands.at(bias_index);
-    registerExternalInitializer(bias_index, bias_obj);
-  }
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
deleted file mode 100644
index c016c83bc..000000000
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-
-#include "backend/cpu_common/TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-class ConstantInitializer : public IConstantInitializer
-{
-public:
-  ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
-  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
-  // TODO: For now the only cpu backend supports constant tensor to use data from external
-  // If the other backend supports (to do this,
-  // ExternalTensor should be abstract such as IExternal, maybe),
-  // this can be an interface of IConstantInitializer
-  void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-public:
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-
-private:
-  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
-  std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index 6627412d2..6ed4799a8 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -17,14 +17,10 @@
 #ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
 #define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
 
-#include <backend/IExternalContext.h>
 #include <util/ConfigSource.h>
 #include <ruy/context.h>
 
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 1;
-}
+#include <memory>
 
 namespace onert
 {
@@ -33,22 +29,22 @@ namespace backend
 namespace cpu
 {
 
-class ExternalContext : public IExternalContext
+class ExternalContext
 {
+private:
+  static const int kDefaultNumThreadpoolThreads = 1;
+
 public:
   ExternalContext() : _ruy_context(new ruy::Context)
   {
     setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
-#ifdef USE_RUY_GEMV
-    _ruy_context->cache_policy = ruy::kCacheLHSOnNarrowMul;
-#endif
   }
 
   void setMaxNumThreads(int max_num_threads)
   {
     const int target_num_threads =
-        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
-    _ruy_context->max_num_threads = target_num_threads;
+      max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+    _ruy_context->set_max_num_threads(target_num_threads);
   }
 
   ruy::Context *ruy_context() const { return _ruy_context.get(); }
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 74b6f0c6b..dff54c1de 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -16,12 +16,14 @@
 
 #include "KernelGenerator.h"
 
+#include "ops/AddNLayer.h"
 #include "ops/ArgMinMaxLayer.h"
 #include "ops/BatchToSpaceNDLayer.h"
 #include "ops/BinaryArithmeticLayer.h"
 #include "ops/CompareLayer.h"
 #include "ops/ConcatLayer.h"
 #include "ops/ConvolutionLayer.h"
+#include "ops/DepthToSpaceLayer.h"
 #include "ops/DepthwiseConvolutionLayer.h"
 #include "ops/EinsumLayer.h"
 #include "ops/ElementwiseActivationLayer.h"
@@ -31,13 +33,16 @@
 #include "ops/FillLayer.h"
 #include "ops/FullyConnectedLayer.h"
 #include "ops/GatherLayer.h"
+#include "ops/LSTMLayer.h"
 #include "ops/MeanLayer.h"
+#include "ops/DetectionPostProcessLayer.h"
 #include "ops/OneHotLayer.h"
 #include "ops/OperationUtils.h"
 #include "ops/PackLayer.h"
 #include "ops/PadLayer.h"
 #include "ops/PoolLayer.h"
 #include "ops/PowLayer.h"
+#include "ops/QuantizeLayer.h"
 #include "ops/RangeLayer.h"
 #include "ops/RankLayer.h"
 #include "ops/ReduceLayer.h"
@@ -70,7 +75,7 @@
 #include <memory>
 #include <util/Utils.h>
 #include <util/logging.h>
-#include <exec/DynamicShapeInference.h>
+#include <exec/DynamicShapeInferer.h>
 
 #include <stdexcept>
 
@@ -106,12 +111,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type
 {
   switch (type_ir)
   {
+    case ir::operation::ElementwiseActivation::Type::ELU:
+      return ops::ElementwiseActivationType::kElu;
     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
       return ops::ElementwiseActivationType::kLogistic;
     case ir::operation::ElementwiseActivation::Type::RELU:
       return ops::ElementwiseActivationType::kReLU;
     case ir::operation::ElementwiseActivation::Type::TANH:
       return ops::ElementwiseActivationType::kTanh;
+    case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+      return ops::ElementwiseActivationType::kLeakyReLU;
     default:
       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
   }
@@ -122,6 +131,10 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary
 {
   switch (type_ir)
   {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::FLOOR_DIV:
+      return ops::ElementwiseBinaryType::kFloorDiv;
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+      return ops::ElementwiseBinaryType::kLogicalAnd;
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
       return ops::ElementwiseBinaryType::kLogicalOr;
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
@@ -143,10 +156,14 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise
       return ops::ElementwiseUnaryType::kCast;
     case ir::operation::ElementwiseUnary::Type::COS:
       return ops::ElementwiseUnaryType::kCos;
+    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+      return ops::ElementwiseUnaryType::kDequantize;
     case ir::operation::ElementwiseUnary::Type::ERF:
       return ops::ElementwiseUnaryType::kErf;
     case ir::operation::ElementwiseUnary::Type::EXP:
       return ops::ElementwiseUnaryType::kExp;
+    case ir::operation::ElementwiseUnary::Type::FLOOR:
+      return ops::ElementwiseUnaryType::kFloor;
     case ir::operation::ElementwiseUnary::Type::LOG:
       return ops::ElementwiseUnaryType::kLog;
     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
@@ -161,6 +178,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise
       return ops::ElementwiseUnaryType::kRSqrt;
     case ir::operation::ElementwiseUnary::Type::SIN:
       return ops::ElementwiseUnaryType::kSin;
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+      return ops::ElementwiseUnaryType::kSqrt;
+    case ir::operation::ElementwiseUnary::Type::SQUARE:
+      return ops::ElementwiseUnaryType::kSquare;
     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
       return ops::ElementwiseUnaryType::kZerosLike;
     default:
@@ -204,63 +225,70 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
 } // namespace
 
 KernelGenerator::KernelGenerator(
-    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-    const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
-    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
-    const std::shared_ptr<ExternalContext> &external_context)
-    : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+  const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+  const std::shared_ptr<ExternalContext> &external_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
 {
   // DO NOTHING
 }
 
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
 {
-  assert(!_return_fn_seq);
+  auto ret = std::make_unique<exec::FunctionSequence>();
+
   assert(_tensor_builder->dynamicTensorManager());
   assert(_tensor_reg);
 
-  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
   // Prepare to handle dynamic tensors later
   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
   {
-    dyn_ctx->op_seq = &op_seq;
-    dyn_ctx->operations = &_operations_ctx;
-    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->tensor_registry = _tensor_reg;
-    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
-
-    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+    dyn_ctx->op = &_operations_ctx.at(ind);
+    dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
   }
-  _return_fn_seq->enableDynamicShapeInferer(true);
+  ret->dynamic_tensor_ctx(dyn_ctx);
 
-  _current_op_seq_layout = op_seq.getLayout();
-  for (const auto &operation_idx : op_seq.operations())
+  auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  assert(_return_fn); // _return_fn must have been generated
+  ret->append(std::move(_return_fn));
+
+  for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
   {
-    const auto &node = _operations_ctx.at(operation_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
+    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+    if (portable_tensor)
+    {
+      assert(portable_tensor->layout() == ir::Layout::NHWC);
+    }
 
-    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+    auto tensor = _tensor_reg->getNativeTensor(ind);
+    if (tensor)
     {
-      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
-      if (portable_tensor)
-      {
-        assert(portable_tensor->layout() == ir::Layout::NHWC);
-      }
-
-      auto tensor = _tensor_reg->getNativeTensor(ind);
-      if (tensor)
-      {
-        tensor->increase_ref();
-      }
+      tensor->increase_ref();
     }
   }
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::AddN &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+
+  std::vector<const IPortableTensor *> input_tensors;
+  for (const auto &input_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+  auto fn = std::make_unique<ops::AddNLayer>();
+
+  fn->configure(std::move(input_tensors), output_tensor);
+
+  _return_fn = std::move(fn);
 }
 
 void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -272,14 +300,14 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
-  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
-  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 
   const auto stride = node.param().stride;
   const auto activation = node.param().activation;
-  const auto param_padding = node.param().padding;
+  const auto &param_padding = node.param().padding;
   const auto dilation = node.param().dilation;
   auto fn = std::make_unique<ops::ConvolutionLayer>();
 
@@ -293,16 +321,16 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
     _return_fn = std::move(fn);
     return;
   }
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
   const auto ker_width = ker_shape.dim(2);
 
   const auto padding =
-      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
-                           dilation.width_factor, dilation.height_factor);
+    ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
 
   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
@@ -321,27 +349,29 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
   const auto ker_width = ker_shape.dim(2);
+  const auto dilation_width = node.param().dilation.width_factor;
+  const auto dilation_height = node.param().dilation.height_factor;
   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
-                                            ker_width, ker_height);
+                                            ker_width, ker_height, dilation_width, dilation_height);
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
-  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
-  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 
   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 
   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
-                padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
-                ofm_tensor);
+                padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
+                dilation_height, activation, ofm_tensor, _external_context);
 
   _return_fn = std::move(fn);
 }
@@ -351,13 +381,13 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   const auto ofm_index{node.getOutputs().at(0)};
 
   const auto rank = _ctx.at(ofm_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
-  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
+  for (const auto &ifm_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
   auto fn = std::make_unique<ops::ConcatLayer>();
 
@@ -372,9 +402,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
 
-  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
-  auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index);
+  auto input_alloc = _tensor_reg->getPortableTensor(input_index);
+  auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
 
   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
 
@@ -384,7 +414,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   if (node.getInputs().size() != NNApiInputs)
   {
     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
-    crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
+    crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
   }
 
   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
@@ -395,16 +425,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
 void KernelGenerator::visit(const ir::operation::Fill &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
+  // SHAPE input is used for shape inference
   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto value_tensor = _tensor_reg->getPortableTensor(value_index);
 
   auto fn = std::make_unique<ops::FillLayer>();
 
-  fn->configure(input_tensor, value_tensor, output_tensor);
+  fn->configure(value_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -418,16 +447,16 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
   const auto activation = node.param().activation;
+  const auto weights_format = node.param().weights_format;
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
-  auto bias_tensor =
-      bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
 
   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 
-  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor,
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
                 _external_context);
 
   _return_fn = std::move(fn);
@@ -438,8 +467,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   // optional 2nd input
   IPortableTensor *shape_tensor = nullptr;
@@ -447,7 +476,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   if (node.getInputs().size() == 2)
   {
     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
-    shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
+    shape_tensor = _tensor_reg->getPortableTensor(shape_index);
   }
 
   auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -461,8 +490,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   // Squeeze can share same kernel with reshape
   auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -479,8 +508,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
 
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   auto fn = std::make_unique<ops::SoftMaxLayer>();
 
@@ -497,9 +526,9 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
-  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 
   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
 
@@ -515,9 +544,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 
-  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
-  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 
   auto comparison_type = node.param().comparison_type;
 
@@ -534,9 +563,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 
   const auto backend_layout = output_tensor->layout();
   UNUSED_RELEASE(backend_layout);
@@ -553,7 +582,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   assert(backend_layout == indices_tensor->layout());
   const auto &input_shape = _ctx.at(input_index).shape();
   UNUSED_RELEASE(input_shape);
-  assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+  assert(input_shape.rank() < 4 || _current_layout == backend_layout);
 
   const auto axis_raw = node.param().axis;
   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
@@ -575,14 +604,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
 
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
-  auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
-  auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
-  auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
+  auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
+  auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
+  auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
 
   assert(indices_tensor->data_type() == OperandType::INT32);
-  assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
+  assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
 
   auto fn = std::make_unique<ops::OneHotLayer>();
 
@@ -595,12 +624,12 @@ void KernelGenerator::visit(const ir::operation::Einsum &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
+  for (const auto &ifm_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
-  const auto equation = node.param().equation;
+  const auto &equation = node.param().equation;
 
   auto fn = std::make_unique<ops::EinsumLayer>();
 
@@ -613,11 +642,11 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
 {
   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
                           std::vector<custom::TypeInfo> &types,
-                          std::vector<std::shared_ptr<IPortableTensor>> &tensors) {
-    for (auto &idx : opSeq)
+                          std::vector<IPortableTensor *> &tensors) {
+    for (const auto &idx : opSeq)
     {
       const auto &operand = _ctx.at(idx);
-      // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+      // TODO make sure using `_current_layout` is correct for custom operations
       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
       auto in_tensor = _tensor_reg->getPortableTensor(idx);
       tensors.emplace_back(in_tensor);
@@ -642,8 +671,8 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
 
@@ -659,9 +688,9 @@ void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 
   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
 
@@ -676,29 +705,35 @@ void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-
-  auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
-
-  fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
-  _return_fn = std::move(fn);
+  if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
+  {
+    auto fn = std::make_unique<ops::QuantizeLayer>();
+    fn->configure(input_tensor, output_tensor);
+    _return_fn = std::move(fn);
+  }
+  else
+  {
+    auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
+    fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
+    _return_fn = std::move(fn);
+  }
 }
 
 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+  // AXIS input is used for output shape inference
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 
-  fn->configure(input_tensor, axis_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -708,15 +743,15 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   const auto ofm_index{node.getOutputs().at(0)};
 
   const auto rank = _ctx.at(ofm_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
   assert(-rank <= axis && axis < rank);
 
-  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
+  for (const auto &ifm_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
   auto fn = std::make_unique<ops::PackLayer>();
 
@@ -730,15 +765,15 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   const auto input_index{node.getInputs().at(0)};
 
   const auto rank = _ctx.at(input_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
   assert(rank == 0 || (-rank <= axis && axis < rank));
 
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   std::vector<IPortableTensor *> output_tensors;
-  for (auto &output_idx : node.getOutputs())
-    output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
+  for (const auto &output_idx : node.getOutputs())
+    output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 
   auto fn = std::make_unique<ops::UnpackLayer>();
 
@@ -756,8 +791,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   const auto output_index{node.getOutputs().at(0)};
   assert(_ctx.at(pad_index).data());
 
-  auto input = _tensor_reg->getPortableTensor(input_index).get();
-  auto output = _tensor_reg->getPortableTensor(output_index).get();
+  auto input = _tensor_reg->getPortableTensor(input_index);
+  auto output = _tensor_reg->getPortableTensor(output_index);
   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
 
@@ -780,13 +815,15 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+  const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
 
   auto fn = std::make_unique<ops::TransposeLayer>();
 
-  fn->configure(input_tensor, output_tensor, node.param().perm);
+  fn->configure(input_tensor, perm_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -798,9 +835,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 
   const auto keep_dims = node.param().keep_dims;
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
 
   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
@@ -828,10 +865,10 @@ void KernelGenerator::visit(const ir::operation::Select &node)
   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
-  auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
-  auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
+  auto true_tensor = _tensor_reg->getPortableTensor(true_index);
+  auto false_tensor = _tensor_reg->getPortableTensor(false_index);
 
   auto fn = std::make_unique<ops::SelectLayer>();
 
@@ -847,10 +884,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
-  auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
+  auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
 
   auto fn = std::make_unique<ops::SliceLayer>();
 
@@ -867,11 +904,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
-  auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
-  auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
+  auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
+  auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
 
   auto begin_mask = node.param().begin_mask;
   auto end_mask = node.param().end_mask;
@@ -891,19 +928,18 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   assert(num_splits == static_cast<int>(node.getOutputs().size()));
 
   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
-  const auto rank = _ctx.at(input_idx).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
-  auto axis_resolved = axis < 0 ? axis + rank : axis;
+  const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
 
-  auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+  auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
 
   std::vector<IPortableTensor *> out_tensors;
-  for (auto &output_idx : node.getOutputs())
-    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
+  for (const auto &output_idx : node.getOutputs())
+    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 
   auto fn = std::make_unique<ops::SplitLayer>();
 
-  fn->configure(in_tensor, num_splits, axis_resolved, out_tensors);
+  fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
 
   _return_fn = std::move(fn);
 }
@@ -913,8 +949,8 @@ void KernelGenerator::visit(const ir::operation::Shape &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 
   auto fn = std::make_unique<ops::ShapeLayer>();
 
@@ -928,18 +964,37 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
 
-  auto output_height = node.param().height_out;
-  auto output_width = node.param().width_out;
   auto align_corners = node.param().align_corners;
   auto half_pixel_centers = node.param().half_pixel_centers;
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
 
-  fn->configure(input_tensor, output_tensor, output_height, output_width, align_corners,
-                half_pixel_centers);
+  if (node.getInputs().size() == 1)
+  {
+    fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
+                  align_corners, half_pixel_centers);
+  }
+  else
+  {
+    assert(node.getInputs().size() == 2);
+    const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
+    auto size_tensor = _tensor_reg->getPortableTensor(size_index);
+    if (size_tensor->is_constant())
+    {
+      auto size_vec = _ctx.at(size_index).asVector<int32_t>();
+      const auto height_out = size_vec[0];
+      const auto width_out = size_vec[1];
+      fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
+                    half_pixel_centers);
+    }
+    else
+    {
+      fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
+    }
+  }
 
   _return_fn = std::move(fn);
 }
@@ -950,9 +1005,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
 
   auto fn = std::make_unique<ops::ReverseLayer>();
 
@@ -961,19 +1016,19 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
-
-  const auto axis = node.param().axis;
+  const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
 
   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
 
-  fn->configure(input_tensor, output_tensor, axis, /* is_arg_max */ true);
+  fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
 
   _return_fn = std::move(fn);
 }
@@ -986,14 +1041,14 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
   const auto kh = node.param().kh;
   const auto kw = node.param().kw;
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 
   auto fn = std::make_unique<ops::PoolLayer>();
 
@@ -1010,9 +1065,9 @@ void KernelGenerator::visit(const ir::operation::Pow &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 
   auto fn = std::make_unique<ops::PowLayer>();
 
@@ -1026,8 +1081,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(0)};
 
-  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index);
+  auto input_alloc = _tensor_reg->getPortableTensor(input_index);
 
   auto fn = std::make_unique<ops::L2NormLayer>();
 
@@ -1043,10 +1098,10 @@ void KernelGenerator::visit(const ir::operation::Range &node)
   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
-  auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
-  auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto start_tensor = _tensor_reg->getPortableTensor(start_index);
+  auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
+  auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
 
   auto fn = std::make_unique<ops::RangeLayer>();
 
@@ -1059,8 +1114,8 @@ void KernelGenerator::visit(const ir::operation::Rank &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
-  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 
   auto fn = std::make_unique<ops::RankLayer>();
 
@@ -1075,9 +1130,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
-  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 
   auto fn = std::make_unique<ops::SqDiffLayer>();
 
@@ -1091,9 +1146,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
 
   auto fn = std::make_unique<ops::TileLayer>();
 
@@ -1108,10 +1163,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
-  auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
+  auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
 
   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
 
@@ -1119,15 +1174,60 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
   _return_fn = std::move(fn);
 }
 
+void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
+{
+  using NMS = ir::operation::DetectionPostProcess;
+
+  ops::DetectionPostProcessLayer::DetectionPostProcessParameters parameters;
+  parameters.scales.y = node.param().scale.y_scale;
+  parameters.scales.x = node.param().scale.x_scale;
+  parameters.scales.w = node.param().scale.w_scale;
+  parameters.scales.h = node.param().scale.h_scale;
+
+  parameters.iou_threshold = node.param().iou_threshold;
+  parameters.score_threshold = node.param().score_threshold;
+  parameters.max_boxes_per_class = node.param().max_boxes_per_class;
+  parameters.max_detections = node.param().max_detections;
+  parameters.num_classes = node.param().num_classes;
+  parameters.center_box_format = node.param().center_size_boxes;
+  parameters.max_classes_per_detection = node.param().max_classes_per_detection;
+
+  auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
+  auto scores_index = node.getInputs().at(NMS::Input::SCORES);
+  auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
+
+  auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
+  auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
+  auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
+  auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
+
+  parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
+  parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
+
+  parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
+  parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
+  parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
+
+  parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
+  parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
+  parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
+  parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
+
+  auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
+  fn->configure(std::move(parameters));
+
+  _return_fn = std::move(fn);
+}
+
 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
-  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 
   const auto adj_x = node.param().adj_x;
   const auto adj_y = node.param().adj_y;
@@ -1144,9 +1244,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
 
   auto fn = std::make_unique<ops::BroadcastToLayer>();
 
@@ -1159,14 +1259,14 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
+  for (const auto &ifm_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
   const auto epsilon = node.param().epsilon;
   const auto is_training = node.param().is_training;
-  const auto data_format = node.param().data_format;
+  const auto &data_format = node.param().data_format;
 
   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
 
@@ -1183,8 +1283,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
   const auto beta = node.param().beta;
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
 
@@ -1200,10 +1300,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
 
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
-  auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
+  auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
 
   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
 
@@ -1212,14 +1312,29 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   _return_fn = std::move(fn);
 }
 
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+  auto block_size = node.param().block_size;
+
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+  auto fn = std::make_unique<ops::DepthToSpaceLayer>();
+
+  fn->configure(input_tensor, block_size, output_tensor);
+  _return_fn = std::move(fn);
+}
+
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 {
   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
   const auto output_index{node.getOutputs().at(0)};
   auto block_size = node.param().block_size;
 
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
-  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 
   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
 
@@ -1233,9 +1348,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
 
-  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
-  auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
-  auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index);
+  auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
+  auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
 
   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
 
@@ -1252,13 +1367,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
 
-  auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
-  auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
-  auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
+  auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
+  auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
+  auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
 
   std::vector<IPortableTensor *> out_tensors;
-  for (auto &output_idx : node.getOutputs())
-    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
+  for (const auto &output_idx : node.getOutputs())
+    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 
   auto fn = std::make_unique<ops::SplitVLayer>();
 
@@ -1267,6 +1382,190 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
   _return_fn = std::move(fn);
 }
 
+void KernelGenerator::visit(const ir::operation::LSTM &node)
+{
+  const auto scratch_buffer_index{
+    node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+  const auto output_state_out_index{
+    node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+  const auto cell_state_out_index{
+    node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+  const auto input_to_input_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+  const auto input_to_forget_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+  const auto input_to_cell_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+  const auto input_to_output_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+  const auto recurrent_to_input_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+  const auto recurrent_to_forget_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+  const auto recurrent_to_cell_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+  const auto recurrent_to_output_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+  const auto cell_to_input_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+  const auto cell_to_forget_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+  const auto cell_to_output_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+  const auto input_gate_bias_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+  const auto forget_gate_bias_index{
+    node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+  const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+  const auto output_gate_bias_index{
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+  const auto projection_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+  const auto projection_bias_index{
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+  const auto output_state_in_index{
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+  const auto time_major = node.param().time_major;
+
+  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
+  // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
+  // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
+  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
+  bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
+                                    (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+                                     _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
+  bool has_recurrent_to_input_weights =
+    _ctx.exist(recurrent_to_input_weights_index) &&
+    (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+     _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+
+  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
+  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
+  // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
+  // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
+  bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
+                                    _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+  bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
+                                    _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
+
+  bool has_input_gate_bias =
+    _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
+
+  bool has_projection_weights = _ctx.exist(projection_weights_index) &&
+                                (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
+                                 _ctx.at(projection_weights_index).shape().dim(1) != 0);
+  bool has_projection_bias =
+    _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
+
+  auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
+                                 ? _tensor_reg->getPortableTensor(scratch_buffer_index)
+                                 : nullptr; // optional
+  auto output_state_out_tensor = _ctx.exist(output_state_out_index)
+                                   ? _tensor_reg->getPortableTensor(output_state_out_index)
+                                   : nullptr; // optional
+  auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
+                                 ? _tensor_reg->getPortableTensor(cell_state_out_index)
+                                 : nullptr; // optional
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+
+  auto input_to_input_weights_tensor =
+    has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
+                               : nullptr; // optional
+  auto input_to_forget_weights_tensor =
+    _tensor_reg->getPortableTensor(input_to_forget_weights_index);
+  auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
+  auto input_to_output_weights_tensor =
+    _tensor_reg->getPortableTensor(input_to_output_weights_index);
+  auto recurrent_to_input_weights_tensor =
+    has_recurrent_to_input_weights
+      ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
+      : nullptr; // optional
+  auto recurrent_to_forget_weights_tensor =
+    _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
+  auto recurrent_to_cell_weights_tensor =
+    _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
+  auto recurrent_to_output_weights_tensor =
+    _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
+
+  auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
+  auto cell_to_forget_weights_tensor =
+    has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
+                               : nullptr; // optional
+  auto cell_to_output_weights_tensor =
+    has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
+                               : nullptr; // optional
+
+  auto input_gate_bias_tensor =
+    has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
+  auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
+  auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
+  auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
+  auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
+  auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
+
+  auto projection_weights_tensor = has_projection_weights
+                                     ? _tensor_reg->getPortableTensor(projection_weights_index)
+                                     : nullptr; // optional
+  auto projection_bias_tensor = has_projection_bias
+                                  ? _tensor_reg->getPortableTensor(projection_bias_index)
+                                  : nullptr; // optional
+
+  IPortableTensor *input_layer_norm_weights_tensor = nullptr;
+  IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
+  IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
+  IPortableTensor *output_layer_norm_weights_tensor = nullptr;
+  if (node.getInputs().size() == 24)
+  {
+    const auto input_layer_norm_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
+    const auto forget_layer_norm_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
+    const auto cell_layer_norm_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
+    const auto output_layer_norm_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
+
+    input_layer_norm_weights_tensor =
+      _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
+    forget_layer_norm_weights_tensor =
+      _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
+    cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
+    output_layer_norm_weights_tensor =
+      _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
+  }
+
+  auto fn = std::make_unique<ops::LSTMLayer>();
+
+  fn->configure(
+    input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
+    input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
+    recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
+    recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
+    cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
+    forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
+    output_layer_norm_weights_tensor,
+    /*aux_input=*/nullptr,
+    /*aux_input_to_input_weights=*/nullptr,
+    /*aux_input_to_forget_weights=*/nullptr,
+    /*aux_input_to_cell_weights=*/nullptr,
+    /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
+    cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
+    projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
+    /*forward_sequence=*/true, time_major,
+    /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
+    output_tensor,
+    !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
+    !_ctx.at(cell_state_in_index).info().isVariable());
+
+  _return_fn = std::move(fn);
+}
+
 } // namespace cpu
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 786e68ee0..d7d5fe6fc 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -19,11 +19,11 @@
 
 #include "ExternalContext.h"
 #include "TensorBuilder.h"
-#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/basic/TensorRegistry.h"
 #include "Tensor.h"
 
 #include <backend/CustomKernelBuilder.h>
-#include <backend/IKernelGenerator.h>
+#include <backend/basic/KernelGeneratorBase.h>
 #include <ir/Operands.h>
 #include <ir/Operations.h>
 
@@ -34,74 +34,76 @@ namespace backend
 namespace cpu
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public basic::KernelGeneratorBase
 {
 public:
-  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder,
-                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
                   const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                   const std::shared_ptr<ExternalContext> &external_context);
 
-  using IKernelGenerator::visit;
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex op_ind) override;
 
-  void visit(const ir::OpSequence &) override;
+  void visit(const ir::operation::AddN &) override;
+  void visit(const ir::operation::ArgMinMax &) override;
+  void visit(const ir::operation::BatchMatMul &) override;
+  void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::BroadcastTo &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::Custom &node) override;
+  void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
-  void visit(const ir::operation::Fill &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Reshape &) override;
-  void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Softmax &) override;
-  void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Einsum &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::Custom &node) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::ExpandDims &) override;
-  void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Pack &) override;
-  void visit(const ir::operation::Unpack &) override;
+  void visit(const ir::operation::Fill &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::FusedBatchNorm &) override;
+  void visit(const ir::operation::Gather &) override;
+  void visit(const ir::operation::L2Normalization &) override;
+  void visit(const ir::operation::LogSoftmax &) override;
+  void visit(const ir::operation::LSTM &) override;
+  void visit(const ir::operation::MatrixBandPart &) override;
+  void visit(const ir::operation::DetectionPostProcess &) override;
   void visit(const ir::operation::OneHot &) override;
-  void visit(const ir::operation::Transpose &) override;
-  void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::Select &) override;
-  void visit(const ir::operation::Slice &) override;
-  void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::Shape &) override;
-  void visit(const ir::operation::ResizeBilinear &node) override;
-  void visit(const ir::operation::Reverse &) override;
-  void visit(const ir::operation::ArgMax &) override;
+  void visit(const ir::operation::Pack &) override;
+  void visit(const ir::operation::Pad &) override;
   void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Pow &) override;
-  void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Tile &) override;
-  void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::Range &) override;
   void visit(const ir::operation::Rank &) override;
-  void visit(const ir::operation::MatrixBandPart &) override;
-  void visit(const ir::operation::BatchMatMul &) override;
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::BroadcastTo &) override;
-  void visit(const ir::operation::FusedBatchNorm &) override;
-  void visit(const ir::operation::LogSoftmax &) override;
+  void visit(const ir::operation::Reduce &) override;
+  void visit(const ir::operation::Reshape &) override;
+  void visit(const ir::operation::ResizeBilinear &node) override;
+  void visit(const ir::operation::Reverse &) override;
+  void visit(const ir::operation::Select &) override;
+  void visit(const ir::operation::Shape &) override;
+  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::StatelessRandomUniform &) override;
+  void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::SplitV &) override;
+  void visit(const ir::operation::SquaredDifference &) override;
+  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::StatelessRandomUniform &) override;
+  void visit(const ir::operation::StridedSlice &) override;
+  void visit(const ir::operation::Tile &) override;
+  void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::Unpack &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
+  ir::Layout _current_layout;
   std::shared_ptr<TensorBuilder> _tensor_builder;
-  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
-  ir::Layout _current_op_seq_layout;
   const std::shared_ptr<ExternalContext> _external_context;
 };
 
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
deleted file mode 100644
index 78c98dabf..000000000
--- a/runtime/onert/backend/cpu/StaticTensorManager.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                                         cpu_common::DynamicTensorManager *dynamic_tensor_manager)
-    : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
-      _dynamic_tensor_manager{dynamic_tensor_manager}
-{
-  // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
-  _nonconst_mgr->allocate();
-
-  for (auto &pair : _tensors->native_tensors())
-  {
-    const auto &ind = pair.first;
-    auto tensor = pair.second;
-    if (!_as_constants[ind] && !tensor->is_dynamic())
-    {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
-                                       << "): " << static_cast<void *>(buffer) << std::endl;
-    }
-  }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
-                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
-                                      bool as_const)
-{
-  assert(!_tensors->getITensor(ind));
-  if (as_const)
-  {
-    auto tensor = std::make_shared<ExternalTensor>(tensor_info, backend_layout);
-    _tensors->setNativeTensor(ind, tensor);
-  }
-  else
-  {
-    auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
-    _tensors->setNativeTensor(ind, tensor);
-  }
-  _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
-  assert(_tensors->getITensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getITensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
-  assert(_tensors->getITensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getITensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
-  for (const auto &it : _tensors->native_tensors())
-    fn(it.first);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h
index 2af61e4e7..bcbb569ea 100644
--- a/runtime/onert/backend/cpu/StaticTensorManager.h
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -17,13 +17,7 @@
 #ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
 #define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
 
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
+#include "backend/basic/StaticTensorManager.h"
 
 namespace onert
 {
@@ -32,30 +26,7 @@ namespace backend
 namespace cpu
 {
 
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
-  StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                      cpu_common::DynamicTensorManager *dynamic_tensor_manager);
-  virtual ~StaticTensorManager() = default;
-
-  void allocateNonconsts(void);
-  void deallocateNonconsts(void);
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
-                   ir::Layout backend_layout, bool as_const);
-
-  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
-  void releasePlan(const ir::OperandIndex &ind);
-
-  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
-  std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
-  ir::OperandIndexMap<bool> _as_constants;
-  cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
-};
+using StaticTensorManager = basic::StaticTensorManager;
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 20e60260c..f42d3d068 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_BACKEND_CPU_TENSOR_H__
 #define __ONERT_BACKEND_CPU_TENSOR_H__
 
-#include <backend/cpu_common/Tensor.h>
+#include <backend/basic/Tensor.h>
 #include <ir/Data.h>
 
 namespace onert
@@ -27,77 +27,8 @@ namespace backend
 namespace cpu
 {
 
-using Tensor = cpu_common::Tensor;
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- *        instead of allocating and copying the data. ExternalTensor's data pointer points to
- *        an address of memory such as where memory is already allocated, or mmapped area.
- *        This is meaning that ExternalTensor can take all of types' ir::Data.
- *        To support this, assume below things no padding, always NHWC layout,
- *        constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
-  ExternalTensor() = delete;
-
-public:
-  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
-      : Tensor(info, layout, nullptr)
-  {
-    assert(_layout == ir::Layout::NHWC);
-    assert(_info.isConstant());
-    assert(_info.isDynamic() == false);
-  }
-
-public:
-  /**
-   * @brief     set Data to be shared from external so that this ExternalTensor will not be
-   *            allocated on CPU backend
-   * @param[in] data    data of Operand to be set
-   */
-  void setData(const std::shared_ptr<ir::Data> data)
-  {
-    assert(data != nullptr);
-    _data = data;
-    // Note. Some op such as cker::Conv could take buffer as nullptr.
-    // That's why _buffer also would be used
-    _buffer = const_cast<uint8_t *>(_data->base());
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer; }
-
-  bool is_constant() const override { return true; }
-  bool is_dynamic() const override { return false; }
-  void set_dynamic() override
-  {
-    throw std::runtime_error("This tensor does not support changing dynamic");
-  }
-
-  void setShape(const ir::Shape &) override
-  {
-    throw std::runtime_error("This tensor does not support changing shape");
-  }
-
-  void increase_ref() override { ++_num_references; }
-
-  void decrease_ref() override
-  {
-    assert(_data != nullptr);
-    assert(_num_references > 0);
-    --_num_references;
-    if (_num_references == 0)
-    {
-      _data.reset();
-      _buffer = nullptr;
-    }
-  }
-
-private:
-  std::shared_ptr<const ir::Data> _data;
-};
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
deleted file mode 100644
index 828d52f7c..000000000
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
-    : _tensor_reg{tensor_reg},
-      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
-      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
-{
-  /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                       ir::Layout layout)
-{
-  _tensor_info_map.emplace(ind, info);
-
-  // CPU backend supports only one layout as NHWC
-  assert(layout == ir::Layout::NHWC);
-  if (info.isDynamic())
-  {
-    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
-  }
-  else
-  {
-    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
-  }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
-  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
-  const auto tensor_info = _tensor_info_map.at(ind);
-
-  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
-  {
-    const auto size = tensor_info.total_size();
-    _static_tensor_mgr->claimPlan(ind, size);
-  }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
-  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
-  {
-    _static_tensor_mgr->releasePlan(ind);
-  }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
-  return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
-
-void TensorBuilder::allocate()
-{
-  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
-  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
-{
-  return std::move(_static_tensor_mgr);
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
-{
-  return std::move(_dynamic_tensor_mgr);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index b6d5f09cc..a7a410f17 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -17,16 +17,7 @@
 #ifndef __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
 #define __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
 
-#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-
-#include <backend/ITensorBuilder.h>
-#include <ir/OperandIndexMap.h>
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <unordered_map>
+#include <backend/basic/TensorBuilder.h>
 
 namespace onert
 {
@@ -35,41 +26,7 @@ namespace backend
 namespace cpu
 {
 
-class TensorBuilder : public ITensorBuilder
-{
-public:
-  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
-
-  /**
-   * @brief     Register tensor information to allocate on CPU backend
-   * @param[in] ind    Operand index
-   * @param[in] info   Operand information
-   * @param[in] layout Operand data layout
-   */
-  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout) override;
-
-  void notifyFirstUse(const ir::OperandIndex &) override;
-  void notifyLastUse(const ir::OperandIndex &) override;
-
-  bool isRegistered(const ir::OperandIndex &) const override;
-
-  void prepare(void) override;
-  void allocate() override;
-  void postFunctionPrepare() override { /* DO NOTHING */}
-
-  std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
-
-  IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
-
-  std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
-
-private:
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
-  std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
-  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
-  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-};
+using TensorBuilder = basic::TensorBuilder;
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc
index 5385bb2a3..55538e2a6 100644
--- a/runtime/onert/backend/cpu/cpu.cc
+++ b/runtime/onert/backend/cpu/cpu.cc
@@ -16,18 +16,9 @@
 
 #include "Backend.h"
 
-#include <util/logging.h>
-
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'cpu' loaded\n";
-  return new onert::backend::cpu::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'cpu' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/cpu/ops/AddNLayer.cc b/runtime/onert/backend/cpu/ops/AddNLayer.cc
new file mode 100644
index 000000000..967991295
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/AddNLayer.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AddNLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/AddN.h>
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+void AddNLayer::configure(std::vector<const IPortableTensor *> &&inputs, IPortableTensor *output)
+{
+  _inputs = std::move(inputs);
+  _output = output;
+}
+
+void AddNLayer::run()
+{
+  size_t input_size = _inputs.size();
+  if (_output->data_type() == ir::DataType::INT32)
+  {
+    std::vector<const int32_t *> input_buffers(input_size);
+    for (size_t i = 0; i < input_size; i++)
+    {
+      input_buffers[i] = getBuffer<int32_t>(_inputs[i]);
+    }
+    AddN(getShape(_inputs[0]), input_size, input_buffers.data(), getBuffer<int32_t>(_output));
+  }
+  else if (_output->data_type() == ir::DataType::FLOAT32)
+  {
+    std::vector<const float *> input_buffers(input_size);
+    for (size_t i = 0; i < input_size; i++)
+    {
+      input_buffers[i] = getBuffer<float>(_inputs[i]);
+    }
+    AddN(getShape(_inputs[0]), input_size, input_buffers.data(), getBuffer<float>(_output));
+  }
+  else
+  {
+    throw std::runtime_error("AddN: unsupported data type");
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AddNLayer.h b/runtime/onert/backend/cpu/ops/AddNLayer.h
new file mode 100644
index 000000000..b7639d149
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/AddNLayer.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_ADDNLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ADDNLAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class AddNLayer : public ::onert::exec::IFunction
+{
+public:
+  AddNLayer() : _inputs(), _output(nullptr) {}
+
+public:
+  void configure(std::vector<const IPortableTensor *> &&inputs, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  std::vector<const IPortableTensor *> _inputs;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_ADDNLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
index d7b0b2bce..a1b8bfce3 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
@@ -42,27 +42,31 @@ template <typename T> std::function<bool(T, T)> GetComparefunction(bool is_arg_m
     return std::less<T>();
   }
 }
-}
+} // namespace
 
-void ArgMinMaxLayer::configure(const IPortableTensor *input, IPortableTensor *output, int32_t axis,
-                               bool is_arg_max)
+void ArgMinMaxLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                               const IPortableTensor *axis, bool is_arg_max)
 {
   _input = input;
   _output = output;
-  if (axis < 0)
-  {
-    axis += input->num_dimensions();
-  }
   _axis = axis;
   _is_arg_max = is_arg_max;
 }
 
 void ArgMinMaxLayer::run()
 {
-#define TF_LITE_ARG_MIN_MAX(input_type, axis_type, output_type)                                 \
-  ArgMinMax(getTensorShape(_input), reinterpret_cast<const input_type *>(_input->buffer()),     \
-            getTensorShape(_output), reinterpret_cast<output_type *>(_output->buffer()), _axis, \
-            GetComparefunction<input_type>(_is_arg_max));
+  if (_axis->total_size() != sizeof(int32_t))
+  {
+    throw std::runtime_error("ArgMinMax: wrong shape of axis");
+  }
+  auto axis = *getBuffer<int32_t>(_axis);
+  if (axis < 0)
+  {
+    axis += _input->getShape().rank();
+  }
+#define TF_LITE_ARG_MIN_MAX(input_type, axis_type, output_type)                 \
+  ArgMinMax(getShape(_input), getBuffer<input_type>(_input), getShape(_output), \
+            getBuffer<output_type>(_output), axis, GetComparefunction<input_type>(_is_arg_max));
   if (_output->data_type() == ir::DataType::INT32)
   {
     switch (_input->data_type())
@@ -74,6 +78,9 @@ void ArgMinMaxLayer::run()
       case ir::DataType::UINT8:
         TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
         break;
+      case ir::DataType::QUANT_INT8_ASYMM:
+        TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
+        break;
       case ir::DataType::INT32:
         TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
         break;
@@ -92,6 +99,9 @@ void ArgMinMaxLayer::run()
       case ir::DataType::UINT8:
         TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
         break;
+      case ir::DataType::QUANT_INT8_ASYMM:
+        TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
+        break;
       case ir::DataType::INT32:
         TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
         break;
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h
index d7c021624..4c864cb98 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.h
@@ -33,18 +33,18 @@ namespace ops
 class ArgMinMaxLayer : public ::onert::exec::IFunction
 {
 public:
-  ArgMinMaxLayer() : _input(nullptr), _output(nullptr), _axis(-1), _is_arg_max(true) {}
+  ArgMinMaxLayer() : _input(nullptr), _output(nullptr), _axis(nullptr), _is_arg_max(true) {}
 
 public:
-  void configure(const IPortableTensor *indices, IPortableTensor *output, int32_t axis,
-                 bool is_arg_max);
+  void configure(const IPortableTensor *indices, IPortableTensor *output,
+                 const IPortableTensor *axis, bool is_arg_max);
 
   void run() override;
 
 private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
-  int32_t _axis;
+  const IPortableTensor *_axis;
   bool _is_arg_max;
 };
 
diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
index 7ef023788..3b08fd5b1 100644
--- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
@@ -28,8 +28,8 @@ namespace ops
 {
 
 BatchMatMulLayer::BatchMatMulLayer()
-    : _lhs(nullptr), _rhs(nullptr), _output(nullptr), _adj_x(false), _adj_y(false),
-      _kernel(new nnfw::cker::BatchMatMul())
+  : _lhs(nullptr), _rhs(nullptr), _output(nullptr), _adj_x(false), _adj_y(false),
+    _kernel(new nnfw::cker::BatchMatMul())
 {
   // DO NOTHING
 }
@@ -39,16 +39,15 @@ BatchMatMulLayer::~BatchMatMulLayer() = default;
 void BatchMatMulLayer::batchMatMulFloat32()
 {
   nnfw::cker::BatchMatMul &batchmatmul_kernel = *_kernel;
-  nnfw::cker::Shape lhs_shape = getTensorShape(_lhs);
-  nnfw::cker::Shape rhs_shape = getTensorShape(_rhs);
-  nnfw::cker::Shape output_shape = getTensorShape(_output);
+  nnfw::cker::Shape lhs_shape = getShape(_lhs);
+  nnfw::cker::Shape rhs_shape = getShape(_rhs);
+  nnfw::cker::Shape output_shape = getShape(_output);
 
   // TODO implement for constant input
 
   batchmatmul_kernel.prepare(lhs_shape, rhs_shape, _adj_x, _adj_y);
-  batchmatmul_kernel(lhs_shape, reinterpret_cast<const float *>(_lhs->buffer()), rhs_shape,
-                     reinterpret_cast<const float *>(_rhs->buffer()), _adj_x, _adj_y, output_shape,
-                     reinterpret_cast<float *>(_output->buffer()));
+  batchmatmul_kernel(lhs_shape, getBuffer<float>(_lhs), rhs_shape, getBuffer<float>(_rhs), _adj_x,
+                     _adj_y, output_shape, getBuffer<float>(_output));
 }
 
 void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, bool adj_x,
@@ -67,7 +66,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens
 
 void BatchMatMulLayer::run()
 {
-  if (_lhs->data_type() == OperandType::FLOAT32)
+  if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32))
   {
     batchMatMulFloat32();
   }
diff --git a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
index f2f10eb9d..2609481fb 100644
--- a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
@@ -28,7 +28,7 @@ namespace ops
 {
 
 BatchToSpaceNDLayer::BatchToSpaceNDLayer()
-    : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
+  : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
 {
   // DO NOTHING
 }
@@ -44,12 +44,11 @@ template <typename T> void BatchToSpaceNDLayer::batchToSpaceNDGeneric()
   }
   else
   {
-    _crops_buffer = reinterpret_cast<const int32_t *>(_crops->buffer());
+    _crops_buffer = getBuffer<int32_t>(_crops);
   }
-  nnfw::cker::BatchToSpaceND<T>(
-      getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
-      reinterpret_cast<const int32_t *>(_block_shape->buffer()), _crops_buffer,
-      getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::BatchToSpaceND<T>(getShape(_input), getBuffer<T>(_input),
+                                getBuffer<int32_t>(_block_shape), _crops_buffer, getShape(_output),
+                                getBuffer<T>(_output));
 }
 
 void BatchToSpaceNDLayer::configure(const IPortableTensor *input, IPortableTensor *output,
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
index f50c63375..e0d5a3ccb 100644
--- a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -30,31 +30,60 @@ namespace ops
 namespace
 {
 
-template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
-void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
-          nnfw::cker::BinaryArithmeticOpParam op_params)
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T> struct Eval
 {
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
-  if (need_broadcast)
+  nnfw::cker::Shape _lhs_shape;
+  nnfw::cker::Shape _rhs_shape;
+  nnfw::cker::Shape _output_shape;
+  nnfw::cker::BinaryArithmeticOpParam _op_params;
+  bool _need_broadcast;
+
+  Eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+       nnfw::cker::BinaryArithmeticOpParam op_params)
+    : _op_params(std::move(op_params)), _need_broadcast(false)
   {
-    nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
-        op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-        getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
-        reinterpret_cast<T *>(output->buffer()));
-    return;
+    if (!output->is_dynamic())
+      updateCache(lhs, rhs, output);
   }
 
-  nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
-      op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-      getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
-      reinterpret_cast<T *>(output->buffer()));
-}
+  void updateCache(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+  {
+    _lhs_shape.ReplaceWith(getShape(lhs));
+    _rhs_shape.ReplaceWith(getShape(rhs));
+    _output_shape.ReplaceWith(getShape(output));
+    _need_broadcast = nnfw::cker::ProcessBroadcastShapes(_lhs_shape, _rhs_shape, &_op_params);
+  }
+
+  void operator()(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+  {
+    // Assume dynamic tensors never become static and static ones never change shape since
+    // configure()
+    if (output->is_dynamic())
+      updateCache(lhs, rhs, output);
+    else
+      assert(_lhs_shape == getShape(lhs) && _rhs_shape == getShape(rhs) &&
+             _output_shape == getShape(output));
+    auto lhs_buffer = getBuffer<T>(lhs);
+    auto rhs_buffer = getBuffer<T>(rhs);
+    auto output_buffer = getBuffer<T>(output);
+    if (_need_broadcast)
+    {
+      nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
+        _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
+    }
+    else
+    {
+      nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
+        _op_params, _lhs_shape, lhs_buffer, _rhs_shape, rhs_buffer, _output_shape, output_buffer);
+    }
+  }
+};
 
 template <nnfw::cker::BinaryArithmeticOpType arithmetic_type>
 std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)>
-generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation,
-                      nnfw::cker::BinaryArithmeticOpParam op_params)
+generateKernelGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                      IPortableTensor *output, const ir::Activation activation,
+                      nnfw::cker::BinaryArithmeticOpParam &op_params)
 {
   switch (lhs->data_type())
   {
@@ -64,8 +93,7 @@ generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activatio
       CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
       op_params.float_activation_max = output_activation_max;
       op_params.float_activation_min = output_activation_min;
-      return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2,
-                       std::placeholders::_3, op_params);
+      return Eval<arithmetic_type, float>(lhs, rhs, output, op_params);
       break;
     }
     case OperandType::INT32:
@@ -74,8 +102,7 @@ generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activatio
       CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
       op_params.quantized_activation_max = output_activation_max;
       op_params.quantized_activation_min = output_activation_min;
-      return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2,
-                       std::placeholders::_3, op_params);
+      return Eval<arithmetic_type, int32_t>(lhs, rhs, output, op_params);
       break;
     }
     default:
@@ -88,19 +115,17 @@ void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *
                              nnfw::cker::BinaryArithmeticOpParam *params)
 {
   int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  CalculateActivationRangeQuantized(activation, output, &output_activation_min,
+                                    &output_activation_max);
   nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
   op_params.quantized_activation_max = output_activation_max;
   op_params.quantized_activation_min = output_activation_min;
   // Parameters for scaled quantized computation
   op_params.left_shift = 20;
   // Zero-points of input and output tensors
-  op_params.input1_offset = -lhs->data_offset();
-  op_params.input2_offset = -rhs->data_offset();
-  op_params.output_offset = output->data_offset();
-  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
-  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
-  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+  op_params.input1_offset = -lhs->data_zero_point();
+  op_params.input2_offset = -rhs->data_zero_point();
+  op_params.output_offset = output->data_zero_point();
 
   // Compute normalized scale for _lhs and _rhs values,
   // and represent in 32-bit fixed point
@@ -109,7 +134,7 @@ void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *
   const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
   // output scale is used to normalize final result, so we invert the scale here
   const double real_output_scale =
-      norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+    norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
 
   // Represent the scales as fixed int32_t multipliers, and int32_t shifts
   QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
@@ -122,14 +147,15 @@ void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
                         nnfw::cker::BinaryArithmeticOpParam *params)
 {
   int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  CalculateActivationRangeQuantized(activation, output, &output_activation_min,
+                                    &output_activation_max);
   nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
 
   op_params.quantized_activation_max = output_activation_max;
   op_params.quantized_activation_min = output_activation_min;
-  op_params.input1_offset = -lhs->data_offset();
-  op_params.input2_offset = -rhs->data_offset();
-  op_params.output_offset = output->data_offset();
+  op_params.input1_offset = -lhs->data_zero_point();
+  op_params.input2_offset = -rhs->data_zero_point();
+  op_params.output_offset = output->data_zero_point();
 
   double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
   QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
@@ -156,14 +182,20 @@ void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortabl
       if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
       {
         setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
-        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>,
-                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
-                            op_params);
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>(_lhs, _rhs, _output, op_params);
+      }
+      else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::ADD, int8_t>(_lhs, _rhs, _output, op_params);
       }
+
       else
       {
-        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation,
-                                                                                 op_params);
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(
+          _lhs, _rhs, _output, activation, op_params);
       }
       break;
     case ArithmeticType::kSub:
@@ -171,14 +203,21 @@ void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortabl
       {
         setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
         op_params.input2_multiplier *= -1;
-        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>,
-                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
-                            op_params);
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>(_lhs, _rhs, _output, op_params);
+      }
+      else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        op_params.input2_multiplier *= -1;
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::SUB, int8_t>(_lhs, _rhs, _output, op_params);
       }
+
       else
       {
-        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation,
-                                                                                 op_params);
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(
+          _lhs, _rhs, _output, activation, op_params);
       }
       break;
     case ArithmeticType::kMul:
@@ -186,21 +225,27 @@ void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortabl
       {
         nnfw::cker::BinaryArithmeticOpParam op_params;
         setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
-        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>,
-                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
-                            op_params);
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>(_lhs, _rhs, _output, op_params);
+      }
+      else if (_lhs->data_type() == OperandType::QUANT_INT8_ASYMM)
+      {
+        nnfw::cker::BinaryArithmeticOpParam op_params;
+        setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel =
+          Eval<nnfw::cker::BinaryArithmeticOpType::MUL, int8_t>(_lhs, _rhs, _output, op_params);
       }
       else
       {
-        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation,
-                                                                                 op_params);
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(
+          _lhs, _rhs, _output, activation, op_params);
       }
       break;
     case ArithmeticType::kDiv:
       if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
       {
         throw std::runtime_error{
-            "BinaryArithmetic(Div): Div operation does not support quantization"};
+          "BinaryArithmetic(Div): Div operation does not support quantization"};
       }
       else if (_lhs->data_type() == OperandType::INT32)
       {
@@ -208,8 +253,8 @@ void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortabl
       }
       else
       {
-        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation,
-                                                                                 op_params);
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(
+          _lhs, _rhs, _output, activation, op_params);
       }
       break;
     default:
diff --git a/runtime/onert/backend/cpu/ops/BroadcastToLayer.cc b/runtime/onert/backend/cpu/ops/BroadcastToLayer.cc
index d9c1bbfc5..d31b814bb 100644
--- a/runtime/onert/backend/cpu/ops/BroadcastToLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BroadcastToLayer.cc
@@ -49,19 +49,18 @@ void BroadcastToLayer::run()
   {
     // ToDo : It need to support INT8 and UINT8 also when will be applied quantization.
     case OperandType::FLOAT32:
-      nnfw::cker::BroadcastTo<float>(
-          getTensorShape(_input), reinterpret_cast<float *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::BroadcastTo<float>(getShape(_input), reinterpret_cast<float *>(_input->buffer()),
+                                     getShape(_output), getBuffer<float>(_output));
       break;
     case OperandType::INT32:
-      nnfw::cker::BroadcastTo<int32_t>(
-          getTensorShape(_input), reinterpret_cast<int32_t *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
+      nnfw::cker::BroadcastTo<int32_t>(getShape(_input),
+                                       reinterpret_cast<int32_t *>(_input->buffer()),
+                                       getShape(_output), getBuffer<int32_t>(_output));
       break;
     case OperandType::UINT32:
-      nnfw::cker::BroadcastTo<uint32_t>(
-          getTensorShape(_input), reinterpret_cast<uint32_t *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint32_t *>(_output->buffer()));
+      nnfw::cker::BroadcastTo<uint32_t>(getShape(_input),
+                                        reinterpret_cast<uint32_t *>(_input->buffer()),
+                                        getShape(_output), getBuffer<uint32_t>(_output));
       break;
     default:
       throw std::runtime_error{"BroadcastToLayer: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/CompareLayer.cc b/runtime/onert/backend/cpu/ops/CompareLayer.cc
index adf902aaf..b621952cc 100644
--- a/runtime/onert/backend/cpu/ops/CompareLayer.cc
+++ b/runtime/onert/backend/cpu/ops/CompareLayer.cc
@@ -49,10 +49,10 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
 {
   nnfw::cker::ComparisonParams params;
   params.left_shift = 8;
-  params.input1_offset = -lhs->data_offset();
-  params.input2_offset = -rhs->data_offset();
+  params.input1_offset = -lhs->data_zero_point();
+  params.input2_offset = -rhs->data_zero_point();
   const double norm_max_scale =
-      2 * std::max(std::abs(lhs->data_scale()), std::abs(rhs->data_scale()));
+    2 * std::max(std::abs(lhs->data_scale()), std::abs(rhs->data_scale()));
   const double adjusted_lhs_scale = lhs->data_scale() / norm_max_scale;
   const double adjusted_rhs_scale = rhs->data_scale() / norm_max_scale;
   QuantizeMultiplierSmallerThanOneExp(adjusted_lhs_scale, &params.input1_multiplier,
@@ -61,19 +61,18 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
                                       &params.input2_shift);
   params.is_broadcast = !HaveSameShapes(lhs, rhs);
 
-  using CompareFunction =
-      void (*)(ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
-               const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
-               bool *output_data);
+  using CompareFunction = void (*)(
+    ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
+    const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data);
 
   static const CompareFunction broadcast_fns[] = {
-      Broadcast4DSlowEqualWithScaling,   Broadcast4DSlowNotEqualWithScaling,
-      Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
-      Broadcast4DSlowLessWithScaling,    Broadcast4DSlowLessEqualWithScaling,
+    Broadcast4DSlowEqualWithScaling,   Broadcast4DSlowNotEqualWithScaling,
+    Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
+    Broadcast4DSlowLessWithScaling,    Broadcast4DSlowLessEqualWithScaling,
   };
   static const CompareFunction non_broadcast_fns[] = {
-      EqualWithScaling,        NotEqualWithScaling, GreaterWithScaling,
-      GreaterEqualWithScaling, LessWithScaling,     LessEqualWithScaling,
+    EqualWithScaling,        NotEqualWithScaling, GreaterWithScaling,
+    GreaterEqualWithScaling, LessWithScaling,     LessEqualWithScaling,
   };
 
   static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
@@ -85,9 +84,8 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
 
   CompareFunction fn = (params.is_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
 
-  fn(params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-     getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-     getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+  fn(params, getExtendedTensorShape(lhs), getBuffer<T>(lhs), getExtendedTensorShape(rhs),
+     getBuffer<T>(rhs), getExtendedTensorShape(output), getBuffer<bool>(output));
 }
 
 template <typename T>
@@ -97,16 +95,16 @@ void compareScalar(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
   bool requires_broadcast = !HaveSameShapes(lhs, rhs);
 
   using CompareFunction =
-      void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
-               const T *input2_data, const Shape &output_shape, bool *output_data);
+    void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
+             const T *input2_data, const Shape &output_shape, bool *output_data);
 
   static const CompareFunction broadcast_fns[] = {
-      Broadcast4DSlowEqual,        Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
-      Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess,     Broadcast4DSlowLessEqual,
+    Broadcast4DSlowEqual,        Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
+    Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess,     Broadcast4DSlowLessEqual,
   };
   static const CompareFunction non_broadcast_fns[] = {
-      EqualNoScaling,        NotEqualNoScaling, GreaterNoScaling,
-      GreaterEqualNoScaling, LessNoScaling,     LessEqualNoScaling,
+    EqualNoScaling,        NotEqualNoScaling, GreaterNoScaling,
+    GreaterEqualNoScaling, LessNoScaling,     LessEqualNoScaling,
   };
 
   static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
@@ -118,16 +116,15 @@ void compareScalar(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
 
   CompareFunction fn = (requires_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
 
-  fn(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-     getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-     getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+  fn(getExtendedTensorShape(lhs), getBuffer<T>(lhs), getExtendedTensorShape(rhs), getBuffer<T>(rhs),
+     getExtendedTensorShape(output), getBuffer<bool>(output));
 }
 
 } // namespace
 
 CompareLayer::CompareLayer()
-    : _lhs(nullptr), _rhs(nullptr), _output(nullptr),
-      _op_type(ir::operation::Comparison::ComparisonType::Equal)
+  : _lhs(nullptr), _rhs(nullptr), _output(nullptr),
+    _op_type(ir::operation::Comparison::ComparisonType::Equal)
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
index d26ed7378..5d48b0e7f 100644
--- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
@@ -49,7 +49,7 @@ template <typename T> void ConcatLayer::concatenationGeneral()
 
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    inputDims.push_back(getTensorShape(_inputs[i]));
+    inputDims.push_back(getShape(_inputs[i]));
     inputDimsPtr.push_back(&inputDims[i]);
   }
 
@@ -57,11 +57,11 @@ template <typename T> void ConcatLayer::concatenationGeneral()
 
   for (const auto input : _inputs)
   {
-    inputDataPtrs.emplace_back(reinterpret_cast<const T *>(input->buffer()));
+    inputDataPtrs.emplace_back(getBuffer<T>(input));
   }
 
   nnfw::cker::Concatenation<T>(op_params, inputDimsPtr.data(), inputDataPtrs.data(),
-                               getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+                               getShape(_output), getBuffer<T>(_output));
 }
 void ConcatLayer::concatenationQuant8()
 {
@@ -71,7 +71,7 @@ void ConcatLayer::concatenationQuant8()
   std::vector<float> input_scales(num_inputs);
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    input_zeropoints[i] = _inputs[i]->data_offset();
+    input_zeropoints[i] = _inputs[i]->data_zero_point();
     input_scales[i] = _inputs[i]->data_scale();
   }
 
@@ -80,7 +80,7 @@ void ConcatLayer::concatenationQuant8()
   op_params.inputs_count = num_inputs;
   op_params.input_zeropoint = input_zeropoints.data();
   op_params.input_scale = input_scales.data();
-  op_params.output_zeropoint = _output->data_offset();
+  op_params.output_zeropoint = _output->data_zero_point();
   op_params.output_scale = _output->data_scale();
 
   std::vector<nnfw::cker::Shape *> inputDimsPtr;
@@ -89,19 +89,18 @@ void ConcatLayer::concatenationQuant8()
   inputDims.reserve(num_inputs);
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    inputDims.push_back(getTensorShape(_inputs[i]));
+    inputDims.push_back(getShape(_inputs[i]));
     inputDimsPtr.push_back(&inputDims[i]);
   }
 
   std::vector<const uint8_t *> inputDataPtrs;
   for (const auto input : _inputs)
   {
-    inputDataPtrs.emplace_back(reinterpret_cast<const uint8_t *>(input->buffer()));
+    inputDataPtrs.emplace_back(getBuffer<uint8_t>(input));
   }
 
   nnfw::cker::ConcatenationWithScaling(op_params, inputDimsPtr.data(), inputDataPtrs.data(),
-                                       getTensorShape(_output),
-                                       reinterpret_cast<uint8_t *>(_output->buffer()));
+                                       getShape(_output), getBuffer<uint8_t>(_output));
 }
 
 void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs, int32_t axis,
@@ -117,24 +116,26 @@ void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs,
 
 void ConcatLayer::run()
 {
-  if (_output->data_type() == OperandType::FLOAT32)
+  switch (_output->data_type())
   {
-    concatenationGeneral<float>();
+    case OperandType::FLOAT32:
+      concatenationGeneral<float>();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      concatenationQuant8();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      concatenationGeneral<int8_t>();
+      break;
+    case OperandType::INT32:
+      concatenationGeneral<int32_t>();
+      break;
+    case OperandType::INT64:
+      concatenationGeneral<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Concat: unsupported data type");
   }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    concatenationQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    concatenationGeneral<int32_t>();
-  }
-  else if (_output->data_type() == OperandType::INT64)
-  {
-    concatenationGeneral<int64_t>();
-  }
-  else
-    throw std::runtime_error("Concat: unsupported data type");
 }
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index c057267d3..62e8ae4ba 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -15,6 +15,8 @@
  */
 
 #include "ConvolutionLayer.h"
+#include "OperationUtils.h"
+#include "cker/PortableTensorUtils.h"
 
 #include "../Tensor.h"
 #include "ir/Padding.h"
@@ -29,11 +31,11 @@ namespace cpu
 namespace ops
 {
 ConvolutionLayer::ConvolutionLayer()
-    : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
-      _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
-      _dilationHeightFactor(1), _activation(ir::Activation::NONE),
-      _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
+  : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+    _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+    _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+    _conv_kernel(new nnfw::cker::Conv()), _prepare(false), _is_hybrid(false)
 {
   // DO NOTHING
 }
@@ -57,18 +59,17 @@ void ConvolutionLayer::convFloat32()
   op_params.float_activation_max = output_activation_max;
 
   nnfw::cker::Conv &kernel = *_conv_kernel;
-  kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-         getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-         getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
-         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  kernel(op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
+         getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
+         getBuffer<float>(_output));
 }
 
-void ConvolutionLayer::convQuant8()
+void ConvolutionLayer::convQ8uPerTensor()
 {
   int32_t output_activation_min = 0;
   int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
 
   double real_multiplier = 0.0;
   int32_t output_multiplier = 0;
@@ -84,9 +85,9 @@ void ConvolutionLayer::convQuant8()
   op_params.padding_type = getPaddingType(_paddingType);
   op_params.padding_values.width = _paddingLeft;
   op_params.padding_values.height = _paddingTop;
-  op_params.input_offset = -_input->data_offset();
-  op_params.weights_offset = -_kernel->data_offset();
-  op_params.output_offset = _output->data_offset();
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.weights_offset = -_kernel->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
   op_params.output_multiplier = output_multiplier;
   op_params.output_shift = output_shift;
   op_params.quantized_activation_min = output_activation_min;
@@ -94,10 +95,102 @@ void ConvolutionLayer::convQuant8()
   op_params.is_replaced_weights = true;
 
   nnfw::cker::Conv &kernel = *_conv_kernel;
-  kernel(op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-         getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
-         getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
-         getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+  kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+         getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
+         getBuffer<uint8_t>(_output));
+}
+
+void ConvolutionLayer::convQ8uPerChannel()
+{
+  nnfw::cker::ConvParams op_params;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  // NOTE: The following fields of ConvParams are not used:
+  // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
+
+  nnfw::cker::Conv &kernel = *_conv_kernel;
+  kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+         getBuffer<uint8_t>(_kernel), _kernel->data_zero_points().data(), getShape(_bias),
+         getBuffer<int32_t>(_bias), getShape(_output), getBuffer<uint8_t>(_output));
+}
+
+void ConvolutionLayer::convQ8i()
+{
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
+
+  nnfw::cker::ConvParams op_params;
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
+  op_params.stride_height = _strideHeight;
+  op_params.stride_width = _strideWidth;
+  op_params.dilation_height_factor = _dilationHeightFactor;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.padding_values.height = _paddingTop;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  nnfw::cker::Conv &kernel = *_conv_kernel;
+  kernel(op_params, getShape(_input), reinterpret_cast<const int8_t *>(_input->buffer()),
+         getShape(_kernel), reinterpret_cast<const int8_t *>(_kernel->buffer()), getShape(_bias),
+         reinterpret_cast<const int32_t *>(_bias->buffer()), getShape(_output),
+         reinterpret_cast<int8_t *>(_output->buffer()));
+}
+
+void ConvolutionLayer::convQ8iHybridPerChannel()
+{
+  float output_activation_min = 0;
+  float output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+  const int batch_size = getShape(_input).Dims(0);
+  if (batch_size == 0)
+    throw std::runtime_error{"Convolution input batch_size = 0"};
+  auto input_shape = getShape(_input);
+  const int input_size = input_shape.FlatSize() / batch_size;
+
+  auto input_quantized_ptr = _hybrid_arena->input_quantized.data();
+  auto input_scaling_factors_ptr = _hybrid_arena->input_scaling_factors.data();
+  auto input_offsets_ptr = _hybrid_arena->input_offsets.data();
+  for (int b = 0; b < batch_size; ++b)
+  {
+    const int offset = b * input_size;
+    nnfw::cker::PortableAsymmetricQuantizeFloats(
+      reinterpret_cast<const float *>(_input->buffer()) + offset, input_size,
+      input_quantized_ptr + offset, &input_scaling_factors_ptr[b], &input_offsets_ptr[b]);
+  }
+  nnfw::cker::ConvParams op_params;
+  op_params.padding_type = getPaddingType(_paddingType);
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  const auto *filter_per_channel_scales = _kernel->data_scales().data();
+  nnfw::cker::reference::HybridConvPerChannel(
+    op_params, input_scaling_factors_ptr, getShape(_input), input_quantized_ptr, getShape(_kernel),
+    reinterpret_cast<const int8_t *>(_kernel->buffer()), getShape(_bias),
+    reinterpret_cast<const float *>(_bias->buffer()), getShape(_output),
+    reinterpret_cast<float *>(_output->buffer()), filter_per_channel_scales, input_offsets_ptr);
 }
 
 void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
@@ -123,12 +216,13 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
   _dilationHeightFactor = dilationHeightFactor;
   _activation = activation;
   _output = output;
+  _is_hybrid = _input->data_type() == OperandType::FLOAT32 &&
+               _kernel->data_type() == OperandType::QUANT_INT8_SYMM;
 }
 
 void ConvolutionLayer::run()
 {
   prepare();
-
   if (_input->is_dynamic() || _kernel->is_dynamic())
   {
     const auto ifm_shape = _input->getShape().asFeature(_input->layout());
@@ -150,21 +244,33 @@ void ConvolutionLayer::run()
     param_padding.param.bottom = _paddingBottom;
 
     const auto padding =
-        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
-                             _dilationWidthFactor, _dilationHeightFactor);
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           _dilationWidthFactor, _dilationHeightFactor);
 
     _paddingLeft = padding.left;
     _paddingRight = padding.right;
     _paddingTop = padding.top;
     _paddingBottom = padding.bottom;
   }
-  if (_input->data_type() == OperandType::FLOAT32)
+  if (_is_hybrid)
+  {
+    convQ8iHybridPerChannel();
+  }
+  else if (_input->data_type() == OperandType::FLOAT32)
   {
     convFloat32();
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
-    convQuant8();
+    const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+    if (per_channel_quantized)
+      convQ8uPerChannel();
+    else
+      convQ8uPerTensor();
+  }
+  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+    convQ8i();
   }
   else
   {
@@ -177,13 +283,33 @@ void ConvolutionLayer::prepare()
   if (_prepare)
     return;
 
+  if (_is_hybrid)
+  {
+    // ensure weight is per-channel quantized.
+    int32_t kernel_output_channel = getShape(_kernel).Dims(0);
+    // zero_points comes from flatbuffer vector. Its size is within uint32_t range.
+    size_t kernel_zerop_cnt = _kernel->data_scales().size();
+    // promote to int64_t to compare int32_t and uint32_t
+    if ((int64_t)kernel_output_channel != (int64_t)kernel_zerop_cnt)
+      throw std::runtime_error{"Conv2D hybrid supports only per-channel quantized weight."};
+
+    // allocate memory for activation quantization.
+    // - quantized values (int8_t type and same shape of original input)
+    // - quantization params (= scale/zeropoint for each input)
+    auto input_shape = getShape(_input);
+    const int batch_size = input_shape.Dims(0);
+    const int input_size = input_shape.FlatSize() / batch_size;
+    _hybrid_arena = std::make_unique<nnfw::cker::ConvHybridTempArena>(batch_size, input_size);
+    _prepare = true;
+    return;
+  }
+
   nnfw::cker::Conv &kernel = *_conv_kernel;
   if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
   {
     bool is_transposed = false;
-    kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-                   getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
-                   _dilationHeightFactor);
+    kernel.prepareF32(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
+                      is_transposed, _dilationWidthFactor, _dilationHeightFactor);
 
     // Decrease reference of _kernel(weights) only when _kernel is constant
     if (is_transposed)
@@ -197,14 +323,38 @@ void ConvolutionLayer::prepare()
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
            !_input->is_dynamic() && !_output->is_dynamic())
   {
-    kernel.prepareQuant(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
-                        _strideWidth, _strideHeight);
+    const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+    if (per_channel_quantized)
+    {
+      GetQuantizedConvolutionMultipliersAndShifts(
+        _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+        _kernel->data_scales().size(), getShape(_kernel).Dims(0),
+        kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift());
+    }
+    else
+    {
+      kernel.prepareQ8uPerTensor(getShape(_input), getShape(_kernel), getShape(_output),
+                                 _strideWidth, _strideHeight, _dilationWidthFactor,
+                                 _dilationHeightFactor);
+    }
+  }
+  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+    if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
+    {
+      GetQuantizedConvolutionMultipliersAndShifts(
+        _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+        _kernel->data_scales().size(), getShape(_kernel).Dims(0),
+        kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift());
+    }
+    else
+    {
+      throw std::runtime_error{"Conv2D: Int8 dynamic weight is not supported"};
+    }
   }
   _prepare = true;
 }
 
-#undef ANDROID_NN_CONV_PARAMETERS
-
 } // namespace ops
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 398892e65..5e1bd0b08 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -29,7 +29,9 @@ namespace nnfw
 namespace cker
 {
 class Conv;
-}
+struct ConvHybridTempArena;
+class Shape;
+} // namespace cker
 } // namespace nnfw
 
 namespace onert
@@ -48,10 +50,6 @@ public:
   ~ConvolutionLayer();
 
 public:
-  void convFloat32();
-
-  void convQuant8();
-
   void configure(const IPortableTensor *input, const IPortableTensor *kernel,
                  const IPortableTensor *bias, ir::PaddingType _paddingType,
                  const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
@@ -59,10 +57,15 @@ public:
                  const uint32_t strideHeight, const uint32_t dilationWidthFactor,
                  const uint32_t dilationHeightFactor, const ir::Activation activation,
                  IPortableTensor *output);
-
+  void prepare() override;
   void run() override;
 
-  void prepare() override;
+private:
+  void convFloat32();
+  void convQ8uPerTensor();
+  void convQ8uPerChannel();
+  void convQ8i();
+  void convQ8iHybridPerChannel();
 
 private:
   const IPortableTensor *_input;
@@ -84,8 +87,10 @@ private:
   ir::Activation _activation;
 
   std::unique_ptr<nnfw::cker::Conv> _conv_kernel;
+  std::unique_ptr<nnfw::cker::ConvHybridTempArena> _hybrid_arena;
 
   bool _prepare;
+  bool _is_hybrid;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
new file mode 100644
index 000000000..e23b7c14a
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpaceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/DepthToSpace.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename T> void DepthToSpaceLayer::depthToSpace()
+{
+  nnfw::cker::DepthToSpace(getShape(_input), getBuffer<T>(_input), getShape(_output),
+                           getBuffer<T>(_output), _block_size);
+}
+
+void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
+                                  IPortableTensor *output)
+{
+  _input = input;
+  _block_size = block_size;
+  _output = output;
+}
+
+void DepthToSpaceLayer::run()
+{
+  switch (_input->data_type())
+  {
+    case OperandType::FLOAT32:
+      depthToSpace<float>();
+      break;
+    case OperandType::INT32:
+      depthToSpace<int32_t>();
+      break;
+    case OperandType::INT64:
+      depthToSpace<int64_t>();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      depthToSpace<uint8_t>();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      depthToSpace<int8_t>();
+      break;
+    default:
+      throw std::runtime_error{"DepthToSpace: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
new file mode 100644
index 000000000..32e0171ce
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class DepthToSpaceLayer : public ::onert::exec::IFunction
+{
+public:
+  DepthToSpaceLayer();
+
+  void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  template <typename T> void depthToSpace();
+
+  const IPortableTensor *_input;
+  int32_t _block_size;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index e67c3f390..9e6de17f2 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -16,6 +16,7 @@
 
 #include "DepthwiseConvolutionLayer.h"
 
+#include "cker/PortableTensorUtils.h"
 #include <cker/operation/DepthwiseConv.h>
 
 namespace onert
@@ -27,14 +28,6 @@ namespace cpu
 namespace ops
 {
 
-DepthwiseConvolutionLayer::DepthwiseConvolutionLayer()
-    : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), _paddingLeft(0),
-      _paddingTop(0), _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
-      _multiplier(0), _activation(ir::Activation::NONE)
-{
-  // DO NOTHING
-}
-
 void DepthwiseConvolutionLayer::convFloat32()
 {
   float output_activation_min = 0, output_activation_max = 0;
@@ -43,27 +36,26 @@ void DepthwiseConvolutionLayer::convFloat32()
   nnfw::cker::DepthwiseConvParams op_params;
   op_params.stride_width = _strideWidth;
   op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = _dilationWidth;
+  op_params.dilation_height_factor = _dilationHeight;
   op_params.padding_values.width = _paddingLeft;
   op_params.padding_values.height = _paddingTop;
   op_params.depth_multiplier = _multiplier;
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
 
-  nnfw::cker::DepthwiseConv(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::DepthwiseConv<float, float>(
+    op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
+    getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
+    getBuffer<float>(_output), _external_context->ruy_context());
 }
 
-void DepthwiseConvolutionLayer::convQuant8()
+void DepthwiseConvolutionLayer::convQ8uPerTensor()
 {
   int32_t output_activation_min = 0;
   int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
 
   double real_multiplier = 0.0;
   int32_t output_multiplier = 0;
@@ -74,33 +66,180 @@ void DepthwiseConvolutionLayer::convQuant8()
   nnfw::cker::DepthwiseConvParams op_params;
   op_params.stride_width = _strideWidth;
   op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = _dilationWidth;
+  op_params.dilation_height_factor = _dilationHeight;
   op_params.padding_values.width = _paddingLeft;
   op_params.padding_values.height = _paddingTop;
   op_params.depth_multiplier = _multiplier;
-  op_params.input_offset = -_input->data_offset();
-  op_params.weights_offset = -_kernel->data_offset();
-  op_params.output_offset = _output->data_offset();
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.weights_offset = -_kernel->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
   op_params.output_multiplier = output_multiplier;
   op_params.output_shift = output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
-  nnfw::cker::DepthwiseConv(
-      op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-      getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+  nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
+    op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+    getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
+    getBuffer<uint8_t>(_output), _external_context->ruy_context());
 }
 
-void DepthwiseConvolutionLayer::configure(const IPortableTensor *input,
-                                          const IPortableTensor *kernel,
-                                          const IPortableTensor *bias, const uint32_t paddingLeft,
-                                          const uint32_t paddingRight, const uint32_t paddingTop,
-                                          const uint32_t paddingBottom, const uint32_t strideWidth,
-                                          const uint32_t strideHeight, const uint32_t multiplier,
-                                          const ir::Activation activation, IPortableTensor *output)
+void DepthwiseConvolutionLayer::convQ8uPerChannel()
+{
+  nnfw::cker::DepthwiseConvParams op_params;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidth;
+  op_params.dilation_height_factor = _dilationHeight;
+  op_params.depth_multiplier = _multiplier;
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  // NOTE: The following fields of ConvParams are not used:
+  // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
+
+  nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel(
+    op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+    getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),
+    _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),
+    getShape(_output), getBuffer<uint8_t>(_output));
+}
+
+void DepthwiseConvolutionLayer::convQ8i()
+{
+  if (!_prepared)
+  {
+    prepareQ8i();
+    _prepared = true;
+  }
+
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
+
+  nnfw::cker::DepthwiseConvParams op_params;
+  op_params.padding_type = nnfw::cker::PaddingType::kSame;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.depth_multiplier = _multiplier;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidth;
+  op_params.dilation_height_factor = _dilationHeight;
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.weights_offset = 0;
+  op_params.output_offset = _output->data_zero_point();
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  nnfw::cker::optimized_integer_ops::DepthwiseConvPerChannel(
+    op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+    getShape(_input), getBuffer<int8_t>(_input), getShape(_kernel), getBuffer<int8_t>(_kernel),
+    getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output), getBuffer<int8_t>(_output),
+    _external_context->ruy_context());
+}
+
+void DepthwiseConvolutionLayer::convQ8iHybridPerChannel()
+{
+  if (!_prepared)
+  {
+    prepareQ8iHybridPerChannel();
+    _prepared = true;
+  }
+
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+  auto input_shape = getShape(_input);
+  const int batch_size = input_shape.Dims(0);
+  const int input_size = input_shape.FlatSize() / batch_size;
+
+  auto scaling_factors_ptr = _input_scaling_factors.data();
+  auto input_offsets_ptr = _input_offsets.data();
+
+  for (int b = 0; b < batch_size; ++b)
+  {
+    const int offset = b * input_size;
+    nnfw::cker::PortableAsymmetricQuantizeFloats(getBuffer<float>(_input) + offset, input_size,
+                                                 _input_quantized.data() + offset,
+                                                 &scaling_factors_ptr[b], &input_offsets_ptr[b]);
+  }
+
+  nnfw::cker::DepthwiseConvParams op_params;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.depth_multiplier = _multiplier;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidth;
+  op_params.dilation_height_factor = _dilationHeight;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel(
+    op_params, _input_scaling_factors.data(), getShape(_input), _input_quantized.data(),
+    getShape(_kernel), getBuffer<int8_t>(_kernel), getShape(_bias), getBuffer<float>(_bias),
+    getShape(_output), getBuffer<float>(_output), _kernel->data_scales().data(),
+    _input_offsets.data());
+}
+
+void DepthwiseConvolutionLayer::prepareQ8i()
+{
+  GetQuantizedConvolutionMultipliersAndShifts(
+    _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+    _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
+    _per_channel_output_shift);
+}
+
+void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
+{
+  GetQuantizedConvolutionMultipliersAndShifts(
+    _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+    _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
+    _per_channel_output_shift);
+}
+
+void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel()
+{
+  // allocate memory for activation quantization.
+  // - quantized values (int8_t type and same shape of original input)
+  // - quantization params (= scale/zeropoint for each input)
+  auto input_shape = getShape(_input);
+  const int batch_size = input_shape.Dims(0);
+  const int input_size = input_shape.FlatSize() / batch_size;
+  _input_quantized.resize(input_size);
+  // TODO: Optimize the case of batch_size = 1
+  _input_scaling_factors.resize(batch_size);
+  _input_offsets.resize(batch_size);
+}
+
+void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel()
+{
+  // ensure weight is per-channel quantized.
+  int32_t kernel_input_channel = getShape(_kernel).Dims(3);
+  // zero_points comes from flatbuffer vector. Its size is within uint32_t range.
+  size_t kernel_zerop_cnt = _kernel->data_scales().size();
+  // promote to int64_t to compare int32_t and uint32_t
+  if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt)
+    throw std::runtime_error{"DConv2D hybrid supports only per-channel quantized weight."};
+}
+
+void DepthwiseConvolutionLayer::configure(
+  const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+  const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+  const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
+  const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
+  const ir::Activation activation, IPortableTensor *output,
+  const std::shared_ptr<ExternalContext> &external_context)
 {
   _input = input;
   _kernel = kernel;
@@ -112,19 +251,61 @@ void DepthwiseConvolutionLayer::configure(const IPortableTensor *input,
   _strideWidth = strideWidth;
   _strideHeight = strideHeight;
   _multiplier = multiplier;
+  _dilationWidth = dilationWidth;
+  _dilationHeight = dilationHeight;
   _activation = activation;
   _output = output;
+  _external_context = external_context;
+  _is_hybrid = _input->data_type() == OperandType::FLOAT32 &&
+               _kernel->data_type() == OperandType::QUANT_INT8_SYMM;
+
+  if (_is_hybrid)
+  {
+    ensureQ8iHybridPerChannel();
+    prepareQ8iHybridPerChannel();
+    _prepared = true;
+  }
+  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+    if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
+    {
+      prepareQ8i();
+      _prepared = true;
+    }
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
+           !_input->is_dynamic() && !_output->is_dynamic())
+  {
+    const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+    if (per_channel_quantized)
+    {
+      prepareQ8uPerChannel();
+      _prepared = true;
+    }
+  }
 }
 
 void DepthwiseConvolutionLayer::run()
 {
-  if (_input->data_type() == OperandType::FLOAT32)
+  if (_is_hybrid)
+  {
+    convQ8iHybridPerChannel();
+  }
+  else if (_input->data_type() == OperandType::FLOAT32)
   {
     convFloat32();
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
-    convQuant8();
+    const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+    if (per_channel_quantized)
+      convQ8uPerChannel();
+    else
+      convQ8uPerTensor();
+  }
+  else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+    convQ8i();
   }
   else
   {
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index c898255a3..5721f8796 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -19,6 +19,7 @@
 
 #include <backend/IPortableTensor.h>
 #include "OperationUtils.h"
+#include "../ExternalContext.h"
 
 #include <exec/IFunction.h>
 
@@ -34,39 +35,67 @@ namespace ops
 class DepthwiseConvolutionLayer : public ::onert::exec::IFunction
 {
 public:
-  DepthwiseConvolutionLayer();
+  DepthwiseConvolutionLayer() = default;
 
 public:
   void convFloat32();
 
-  void convQuant8();
+  void convQ8uPerTensor();
+  void convQ8uPerChannel();
+
+  void convQ8i();
+  void convQ8iHybridPerChannel();
 
   void configure(const IPortableTensor *input, const IPortableTensor *kernel,
                  const IPortableTensor *bias, const uint32_t paddingLeft,
                  const uint32_t paddingRight, const uint32_t paddingTop,
                  const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
-                 const uint32_t multiplier, const ir::Activation activation,
-                 IPortableTensor *output);
+                 const uint32_t multiplier, const uint32_t dilationWidth,
+                 const uint32_t dilationHeight, const ir::Activation activation,
+                 IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
 
   void run() override;
 
 private:
-  const IPortableTensor *_input;
-  const IPortableTensor *_kernel;
-  const IPortableTensor *_bias;
-  IPortableTensor *_output;
+  void prepareQ8i();
+  void prepareQ8uPerChannel();
+  void prepareQ8iHybridPerChannel();
+  void ensureQ8iHybridPerChannel();
+
+private:
+  const IPortableTensor *_input{nullptr};
+  const IPortableTensor *_kernel{nullptr};
+  const IPortableTensor *_bias{nullptr};
+  IPortableTensor *_output{nullptr};
+
+  uint32_t _paddingLeft{0};
+  uint32_t _paddingTop{0};
+  uint32_t _paddingRight{0};
+  uint32_t _paddingBottom{0};
+
+  uint32_t _strideWidth{0};
+  uint32_t _strideHeight{0};
+
+  uint32_t _multiplier{0};
+
+  uint32_t _dilationWidth{1};
+  uint32_t _dilationHeight{1};
+
+  ir::Activation _activation{ir::Activation::NONE};
 
-  uint32_t _paddingLeft;
-  uint32_t _paddingTop;
-  uint32_t _paddingRight;
-  uint32_t _paddingBottom;
+  std::shared_ptr<ExternalContext> _external_context;
 
-  uint32_t _strideWidth;
-  uint32_t _strideHeight;
+  bool _prepared{false};
 
-  uint32_t _multiplier;
+  // Per channel output multiplier and shift.
+  std::vector<int32_t> _per_channel_output_multiplier;
+  std::vector<int> _per_channel_output_shift;
 
-  ir::Activation _activation;
+  // For hybrid
+  bool _is_hybrid{false};
+  std::vector<int8_t> _input_quantized;
+  std::vector<float> _input_scaling_factors;
+  std::vector<int32_t> _input_offsets;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
new file mode 100644
index 000000000..dc9e20e0a
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DetectionPostProcessLayer.h"
+
+#include "ndarray/Array.h"
+
+#include <numeric>
+#include <utility>
+#include <cmath>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+using namespace ndarray;
+
+using CenterSizeBox = DetectionPostProcessLayer::CenterSizeBox;
+using CornerBox = DetectionPostProcessLayer::CornerBox;
+
+using NonMaxSuppressionParam = DetectionPostProcessLayer::DetectionPostProcessParameters;
+using Allocations = DetectionPostProcessLayer::Allocations;
+
+struct OutputArrays
+{
+  OutputArrays(CornerBox *coords_buf, float *scores_buf, float *classes_buf,
+               int *num_selections_buf, size_t max_detections)
+    : coords(coords_buf, {max_detections}), scores(scores_buf, {max_detections}),
+      classes(classes_buf, {max_detections}), num_selections(num_selections_buf, {1})
+  {
+  }
+
+  Array<CornerBox> coords;
+  Array<float> scores;
+  Array<float> classes;
+  Array<int> num_selections;
+};
+
+struct TemporaryArrays
+{
+  TemporaryArrays(int *selections_buffer, int max_detections)
+    : selections(selections_buffer, {static_cast<unsigned long>(max_detections)})
+  {
+  }
+
+  Array<int> selections;
+};
+
+// sort indices in decreasing order of first `k` scores
+void PartialArgSort(const ContiguousSpan<float, true> &scores,
+                    const ContiguousSpan<int, false> &indices, int k)
+{
+  std::iota(indices.begin(), indices.begin() + k, 0);
+  std::partial_sort(indices.begin(), indices.begin() + k, indices.begin() + scores.size(),
+                    [&scores](const int i, const int j) { return scores[i] > scores[j]; });
+}
+
+template <typename T> ContiguousSpan<T, false> static vecToSpan(std::vector<T> &v)
+{
+  return ContiguousSpan<T, false>{v.begin(), v.end()};
+}
+
+Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<float> &raw_anchors,
+                                   bool center_box_format, const CenterSizeBox &scales)
+{
+  auto nbatches = raw_boxes.shape().dim(0);
+  auto num_boxes = raw_boxes.shape().dim(1);
+
+  auto anchors = array_cast<const CenterSizeBox>(raw_anchors, {num_boxes});
+
+  if (!center_box_format)
+  {
+    auto boxes_p = reinterpret_cast<const CornerBox *>(raw_boxes.flat().data());
+    return {boxes_p, {num_boxes}};
+  }
+  else
+  {
+    // TODO support box center-width encoding correctly
+    // i.e anchors
+    auto boxes_p = reinterpret_cast<const CenterSizeBox *>(raw_boxes.flat().data());
+    Array<const CenterSizeBox> in_boxes{boxes_p, {num_boxes}};
+
+    auto decoded_boxes_p = new CornerBox[nbatches * num_boxes];
+    Array<CornerBox> decoded_boxes_a{decoded_boxes_p, {num_boxes}};
+
+    for (size_t i = 0; i < num_boxes; ++i)
+    {
+      const auto &anchor = anchors.at(i);
+      auto &box = decoded_boxes_a.at(i);
+      float yc = in_boxes.at(i).y / scales.y * anchor.h + anchor.y;
+      float xc = in_boxes.at(i).x / scales.x * anchor.w + anchor.x;
+      float halfh = 0.5f * std::exp(in_boxes.at(i).h / scales.h) * anchor.h;
+      float halfw = 0.5f * std::exp(in_boxes.at(i).w / scales.w) * anchor.w;
+      box.x1 = xc - halfw;
+      box.x2 = xc + halfw;
+      box.y1 = yc - halfh;
+      box.y2 = yc + halfh;
+
+      assert(box.x2 > box.x1);
+      assert(box.y2 > box.y1);
+    }
+
+    const auto &decoded_boxes_a_shape = decoded_boxes_a.shape();
+
+    return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a_shape);
+  }
+}
+
+float computeIOU(const CornerBox &box1, const CornerBox &box2)
+{
+  float area_i = (box1.y2 - box1.y1) * (box1.x2 - box1.x1);
+  float area_j = (box2.y2 - box2.y1) * (box2.x2 - box2.x1);
+  if (area_i <= 0 || area_j <= 0)
+  {
+    return 0.0;
+  }
+  float in_ymin = std::max<float>(box1.y1, box2.y1);
+  float in_xmin = std::max<float>(box1.x1, box2.x1);
+  float in_ymax = std::min<float>(box1.y2, box2.y2);
+  float in_xmax = std::min<float>(box1.x2, box2.x2);
+  float in_area = std::max<float>(in_ymax - in_ymin, 0.0) * std::max<float>(in_xmax - in_xmin, 0.0);
+
+  return in_area / (area_i + area_j - in_area);
+}
+
+int doSingleClass(const Array<const CornerBox> &boxes, const std::vector<float> &scores,
+                  const NonMaxSuppressionParam &param, TemporaryArrays &temps,
+                  size_t max_detections)
+{
+  auto num_boxes = boxes.shape().dim(0);
+
+  std::vector<int> sorted_box_indices(num_boxes);
+  PartialArgSort(ContiguousSpan<float, true>(scores.data(), num_boxes),
+                 vecToSpan(sorted_box_indices), num_boxes);
+
+  // TODO move to temp allocations
+  std::vector<int> process_box(num_boxes, 1);
+
+  size_t selected_count = 0;
+  for (size_t i = 0; i < num_boxes; ++i)
+  {
+    auto box_index = sorted_box_indices[i];
+
+    if (!process_box[box_index] || scores[box_index] < param.score_threshold)
+    {
+      continue;
+    }
+
+    temps.selections.at(selected_count) = box_index;
+    selected_count++;
+
+    if (selected_count >= max_detections)
+    {
+      break;
+    }
+
+    for (size_t j = i + 1; j < num_boxes; ++j)
+    {
+      if (!process_box[sorted_box_indices[j]])
+      {
+        continue;
+      }
+
+      float IOU = computeIOU(boxes.at(box_index), boxes.at(sorted_box_indices[j]));
+      if (IOU > param.iou_threshold)
+      {
+        process_box[sorted_box_indices[j]] = 0;
+      }
+    }
+  }
+
+  return selected_count;
+}
+
+void collectBoxes(TemporaryArrays &temporary, const Array<const CornerBox> &decoded_boxes,
+                  std::vector<float> &scores, int num_selected, OutputArrays &output,
+                  const Array<int> &sorted_classes, int detections_per_box)
+{
+  auto &selections = temporary.selections;
+
+  size_t output_box_count = 0;
+
+  for (int i = 0; i < num_selected; ++i)
+  {
+    int selected_box = selections.at(output_box_count);
+
+    for (int c = 0; c < detections_per_box; ++c)
+    {
+      output.classes.at(output_box_count) = sorted_classes.at(selected_box, c);
+      output.scores.at(output_box_count) = scores[selected_box];
+      output.coords.at(output_box_count) = decoded_boxes.at(selected_box);
+      output_box_count++;
+    }
+  }
+}
+
+void DetectionPostProcess(const Array<float> &boxes_a, const Array<float> &scores_a,
+                          Array<float> &num_selected_a, const NonMaxSuppressionParam &param,
+                          const Allocations &allocations, OutputArrays &outputs)
+{
+  TemporaryArrays temporary(allocations.selections_buffer, param.max_detections);
+
+  // Only batch of 1 is supported atm
+  auto num_boxes = boxes_a.shape().dim(1);
+  size_t num_classes = param.num_classes;
+  size_t num_classes_with_background = scores_a.shape().dim(2);
+  bool have_background = num_classes_with_background != num_classes;
+
+  size_t max_classes_per_box = std::min<size_t>(num_classes, param.max_classes_per_detection);
+
+  // TODO move this to allocations
+  std::vector<int> sorted_class_indices(num_boxes * num_classes);
+
+  Array<int> class_indices(sorted_class_indices.data(), {num_boxes, num_classes});
+
+  // TODO move to allocations
+  std::vector<float> max_scores(num_boxes);
+
+  for (size_t row = 0; row < num_boxes; row++)
+  {
+    auto box_scores = scores_a.slice(0, row).offset(have_background ? 1 : 0);
+    auto indices = class_indices.slice(row);
+
+    PartialArgSort(box_scores, indices, num_classes);
+
+    max_scores[row] = box_scores[indices[0]];
+  }
+
+  auto anchors_a =
+    Array<float>(reinterpret_cast<float *>(param.anchors_input->buffer()), {num_boxes, 4});
+  auto decoded_boxes = decodeBoxes(boxes_a, anchors_a, param.center_box_format, param.scales);
+
+  int num_selected =
+    doSingleClass(decoded_boxes, max_scores, param, temporary, param.max_detections);
+
+  collectBoxes(temporary, decoded_boxes, max_scores, num_selected, outputs, class_indices,
+               max_classes_per_box);
+
+  num_selected_a.at(0) = num_selected;
+}
+} // namespace
+
+template <typename T> Array<T> toArray(uint8_t *ptr, std::vector<int32_t> &descr)
+{
+  ndarray::Shape shape(descr.size());
+  for (size_t i = 0; i < descr.size(); ++i)
+  {
+    shape.dim(i) = descr[i];
+  }
+
+  return Array<T>{reinterpret_cast<T *>(ptr), shape};
+}
+
+void DetectionPostProcessLayer::configure(DetectionPostProcessParameters parameters)
+{
+  _parameters = std::move(parameters);
+  _allocations.selections_buffer = new int[_parameters.max_detections * 2];
+}
+
+void DetectionPostProcessLayer::run()
+{
+  auto nbatches = (unsigned int)_parameters.boxes_descr[0];
+  // no suport for batch other than 1( it's fine since tflite does not support
+  // batch for postprocess either )
+  assert(nbatches == 1);
+
+  auto boxes_a = toArray<float>(_parameters.boxes_input->buffer(), _parameters.boxes_descr);
+  auto scores_a = toArray<float>(_parameters.scores_input->buffer(), _parameters.scrores_descr);
+
+  auto num_selected_a = ndarray::Array<float>(
+    reinterpret_cast<float *>(_parameters.num_selections_output->buffer()), {nbatches});
+
+  OutputArrays outputArrays(reinterpret_cast<CornerBox *>(_parameters.box_coords_output->buffer()),
+                            reinterpret_cast<float *>(_parameters.box_scores_output->buffer()),
+                            reinterpret_cast<float *>(_parameters.box_classes_output->buffer()),
+                            reinterpret_cast<int *>(_parameters.num_selections_output->buffer()),
+                            _parameters.max_detections);
+
+  DetectionPostProcess(boxes_a, scores_a, num_selected_a, _parameters, _allocations, outputArrays);
+}
+
+DetectionPostProcessLayer::~DetectionPostProcessLayer() { delete[] _allocations.selections_buffer; }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h
new file mode 100644
index 000000000..836a70cac
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DPP_H__
+#define __ONERT_BACKEND_CPU_OPS_DPP_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class DetectionPostProcessLayer : public ::onert::exec::IFunction
+{
+public:
+  struct CornerBox
+  {
+    float y1, x1;
+    float y2, x2;
+  };
+
+  struct CenterSizeBox
+  {
+    float y, x;
+    float h, w;
+  };
+
+  struct DetectionPostProcessParameters
+  {
+    const IPortableTensor *boxes_input;
+    const IPortableTensor *scores_input;
+    const IPortableTensor *anchors_input;
+    IPortableTensor *box_coords_output;
+    IPortableTensor *box_classes_output;
+    IPortableTensor *box_scores_output;
+    IPortableTensor *num_selections_output;
+    std::vector<int32_t> boxes_descr;
+    std::vector<int32_t> scrores_descr;
+
+    uint32_t max_detections;
+    float score_threshold;
+    float iou_threshold; // intersection-over-union
+    uint32_t max_boxes_per_class;
+    bool center_box_format = false;
+    int32_t num_classes;
+    int32_t max_classes_per_detection;
+    CenterSizeBox scales;
+  };
+
+  enum SelectionFormat
+  {
+    BOX_INDEX = 1,
+    CLASS_INDEX = 0
+  };
+
+  struct Allocations
+  {
+    int *selections_buffer = nullptr;
+    // TODO move all dynamic allocations here, and into configure phase
+  };
+
+  DetectionPostProcessLayer() : _parameters{}
+  {
+    // DO NOTHING
+  }
+
+  virtual ~DetectionPostProcessLayer();
+
+public:
+  void configure(DetectionPostProcessParameters parameters);
+
+  void run() override;
+
+private:
+  DetectionPostProcessParameters _parameters;
+
+  Allocations _allocations;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DPP_H__
diff --git a/runtime/onert/backend/cpu/ops/EinsumLayer.cc b/runtime/onert/backend/cpu/ops/EinsumLayer.cc
index 8c16740a3..8e10c4642 100644
--- a/runtime/onert/backend/cpu/ops/EinsumLayer.cc
+++ b/runtime/onert/backend/cpu/ops/EinsumLayer.cc
@@ -28,7 +28,7 @@ namespace ops
 {
 
 EinsumLayer::EinsumLayer()
-    : _inputs(), _output(nullptr), _equation(), _einsum_kernel(new nnfw::cker::Einsum())
+  : _inputs(), _output(nullptr), _equation(), _einsum_kernel(new nnfw::cker::Einsum())
 {
   // DO NOTHING
 }
@@ -47,12 +47,11 @@ void EinsumLayer::einsumFloat32()
 
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    inputShapes.emplace_back(getTensorShape(_inputs[i]));
-    inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(_inputs[i]->buffer()));
+    inputShapes.emplace_back(getShape(_inputs[i]));
+    inputFloatPtrs.emplace_back(getBuffer<float>(_inputs[i]));
   }
 
-  kernel(_equation, inputShapes, inputFloatPtrs, getTensorShape(_output),
-         reinterpret_cast<float *>(_output->buffer()));
+  kernel(_equation, inputShapes, inputFloatPtrs, getShape(_output), getBuffer<float>(_output));
 }
 
 void EinsumLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
index c1d63172b..27b2cdf68 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -18,6 +18,8 @@
 
 #include "OperationUtils.h"
 
+#include <cker/operation/ELU.h>
+#include <cker/operation/LeakyReLU.h>
 #include <cker/operation/Logistic.h>
 #include <cker/operation/ReLU.h>
 #include <cker/operation/ReLU6.h>
@@ -33,7 +35,7 @@ namespace ops
 {
 
 ElementwiseActivationLayer::ElementwiseActivationLayer()
-    : _input(nullptr), _output(nullptr), _kernel()
+  : _input(nullptr), _output(nullptr), _kernel()
 {
   // DO NOTHING
 }
@@ -41,9 +43,9 @@ ElementwiseActivationLayer::ElementwiseActivationLayer()
 void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
 {
   const auto input_scale = static_cast<double>(_input->data_scale());
-  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+  const auto input_zero_point = static_cast<int32_t>(_input->data_zero_point());
   const auto output_scale = static_cast<double>(_output->data_scale());
-  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+  const auto output_zero_point = static_cast<int32_t>(_output->data_zero_point());
   const float inverse_scale = 1 / output_scale;
   int32_t maxval = std::numeric_limits<uint8_t>::max();
   int32_t minval = std::numeric_limits<uint8_t>::min();
@@ -72,9 +74,9 @@ void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivation
 void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
                                                       IPortableTensor *output)
 {
-  const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
-  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
-  uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+  const int size = MatchingFlatSize(getShape(input), getShape(output));
+  const uint8_t *input_data = getBuffer<uint8_t>(input);
+  uint8_t *output_data = getBuffer<uint8_t>(output);
 
   for (int i = 0; i < size; ++i)
   {
@@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
 
   switch (op_type)
   {
+    case ElementwiseActivationType::kElu:
+      if (input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::ELU(getShape(input), getBuffer<float>(input), getShape(output),
+                          getBuffer<float>(output));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"};
+      }
+      break;
     case ElementwiseActivationType::kLogistic:
       if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
       {
@@ -101,9 +116,8 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
       else if (_input->data_type() == OperandType::FLOAT32)
       {
         _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-          nnfw::cker::Logistic(getTensorShape(input),
-                               reinterpret_cast<const float *>(input->buffer()),
-                               getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+          nnfw::cker::Logistic(getShape(input), getBuffer<float>(input), getShape(output),
+                               getBuffer<float>(output));
         };
       }
       else
@@ -117,23 +131,20 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
         if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
         {
           _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-            nnfw::cker::ReLU(getTensorShape(input),
-                             reinterpret_cast<const float *>(input->buffer()),
-                             getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+            nnfw::cker::ReLU(getShape(input), getBuffer<float>(input), getShape(output),
+                             getBuffer<float>(output));
           };
         }
         else if (alpha == 6.f && beta == 0.f)
         {
           _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-            nnfw::cker::ReLU6(getTensorShape(input),
-                              reinterpret_cast<const float *>(input->buffer()),
-                              reinterpret_cast<float *>(output->buffer()));
+            nnfw::cker::ReLU6(getShape(input), getBuffer<float>(input), getBuffer<float>(output));
           };
         }
         else
         {
           throw std::runtime_error(
-              "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+            "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
         }
       }
       else
@@ -151,8 +162,8 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
       else if (_input->data_type() == OperandType::FLOAT32)
       {
         _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
-          nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                           getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+          nnfw::cker::Tanh(getShape(input), getBuffer<float>(input), getShape(output),
+                           getBuffer<float>(output));
         };
       }
       else
@@ -160,6 +171,20 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
         throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
       }
       break;
+    case ElementwiseActivationType::kLeakyReLU:
+      if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getShape(input),
+                                getBuffer<float>(input), getShape(output),
+                                getBuffer<float>(output));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"};
+      }
+      break;
     default:
       throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
   }
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 3ef580041..d8a90148f 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -32,9 +32,11 @@ namespace ops
 
 enum class ElementwiseActivationType
 {
+  kElu,
   kLogistic,
   kReLU,
-  kTanh
+  kTanh,
+  kLeakyReLU
 };
 
 class ElementwiseActivationLayer : public ::onert::exec::IFunction
@@ -52,7 +54,7 @@ public:
 
   void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output);
 
-private:
+protected:
   const IPortableTensor *_input;
   IPortableTensor *_output;
   uint8_t _table[256];
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
index ea3c1e7cd..391bf512c 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -18,6 +18,8 @@
 
 #include "OperationUtils.h"
 
+#include <cker/operation/FloorDiv.h>
+#include <cker/operation/LogicalAnd.h>
 #include <cker/operation/LogicalOr.h>
 #include <cker/operation/MaxMin.h>
 
@@ -33,45 +35,73 @@ namespace ops
 namespace
 {
 template <typename T>
+void FloorDivGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                     IPortableTensor *output)
+{
+  if (!HaveSameShapes(lhs, rhs))
+  {
+    nnfw::cker::FloorDivBroadcast<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs),
+                                     getBuffer<T>(rhs), getShape(output), getBuffer<T>(output));
+  }
+  else
+  {
+    nnfw::cker::FloorDivElementwise<T>(getShape(lhs), getBuffer<T>(lhs), getBuffer<T>(rhs),
+                                       getBuffer<T>(output));
+  }
+}
+
+template <typename T>
+void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                       IPortableTensor *output)
+{
+  if (!HaveSameShapes(lhs, rhs))
+  {
+    nnfw::cker::LogicalAndBroadcast<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs),
+                                       getBuffer<T>(rhs), getShape(output), getBuffer<T>(output));
+  }
+  else
+  {
+    nnfw::cker::LogicalAndElementwise<T>(getShape(lhs), getBuffer<T>(lhs), getBuffer<T>(rhs),
+                                         getBuffer<T>(output));
+  }
+}
+
+template <typename T>
 void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
                       IPortableTensor *output)
 {
   if (!HaveSameShapes(lhs, rhs))
   {
-    nnfw::cker::LogicalOrBroadcast<T>(
-        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
-        reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
-        reinterpret_cast<T *>(output->buffer()));
+    nnfw::cker::LogicalOrBroadcast<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs),
+                                      getBuffer<T>(rhs), getShape(output), getBuffer<T>(output));
   }
   else
   {
-    nnfw::cker::LogicalOrElementwise<T>(
-        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-        reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+    nnfw::cker::LogicalOrElementwise<T>(getShape(lhs), getBuffer<T>(lhs), getBuffer<T>(rhs),
+                                        getBuffer<T>(output));
   }
 }
 
 template <typename T>
 void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
 {
-  nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+  nnfw::cker::Max<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs), getBuffer<T>(rhs),
+                     getShape(output), getBuffer<T>(output));
 }
 
 template <typename T>
 void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
 {
-  nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+  nnfw::cker::Min<T>(getShape(lhs), getBuffer<T>(lhs), getShape(rhs), getBuffer<T>(rhs),
+                     getShape(output), getBuffer<T>(output));
 }
 
 bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
                        const IPortableTensor *output)
 {
   return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
-         (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+         (lhs->data_zero_point() == rhs->data_zero_point() &&
+          lhs->data_zero_point() == output->data_zero_point());
 }
 } // namespace
 
@@ -88,6 +118,30 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab
 
   switch (op_type)
   {
+    case ElementwiseBinaryType::kFloorDiv:
+      if (_lhs->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = FloorDivGeneric<float>;
+      }
+      else if (_lhs->data_type() == OperandType::INT32)
+      {
+        _kernel = FloorDivGeneric<int32_t>;
+      }
+      else
+      {
+        throw std::runtime_error{"Max: unsupported data type"};
+      }
+      break;
+    case ElementwiseBinaryType::kLogicalAnd:
+      if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalAndGeneric<bool>;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalOr: Unsupported data type"};
+      }
+      break;
     case ElementwiseBinaryType::kLogicalOr:
       if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
       {
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
index 052747a4c..af3bb63c7 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
@@ -32,6 +32,7 @@ namespace ops
 
 enum class ElementwiseBinaryType
 {
+  kFloorDiv,
   kLogicalAnd,
   kLogicalOr,
   kMax,
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
index f8f89ab15..d58937b5f 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -18,11 +18,11 @@
 
 #include "OperationUtils.h"
 
+#include <cker/operation/Dequantize.h>
 #include <cker/operation/Elementwise.h>
 #include <cker/operation/Erf.h>
 #include <cker/operation/Exp.h>
 #include <cker/operation/LogicalNot.h>
-#include <cker/operation/Quantize.h>
 #include <cker/operation/Round.h>
 
 namespace onert
@@ -38,8 +38,8 @@ namespace
 {
 void absFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Abs(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
 }
 
 template <typename FromT>
@@ -82,8 +82,8 @@ void cast(const IPortableTensor *input, IPortableTensor *output)
   const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
   auto out = *reinterpret_cast<DataPtr *>(&output_buf);
 
-  auto input_shape = getTensorShape(input);
-  auto output_shape = getTensorShape(output);
+  auto input_shape = getShape(input);
+  auto output_shape = getShape(output);
   const auto num_elements = MatchingFlatSize(input_shape, output_shape);
 
   switch (input->data_type())
@@ -114,64 +114,85 @@ void cast(const IPortableTensor *input, IPortableTensor *output)
 
 void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Cos(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
+}
+
+void dequantizeInt8(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Dequantize(getShape(input), getBuffer<int8_t>(input), getShape(output),
+                         getBuffer<float>(output), input->data_scale(), input->data_zero_point());
+}
+
+void dequantizeUint8(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Dequantize(getShape(input), getBuffer<uint8_t>(input), getShape(output),
+                         getBuffer<float>(output), input->data_scale(), input->data_zero_point());
 }
 
 void expFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Exp(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
 }
 
 void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Erf(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
 }
 
-void logFloat32(const IPortableTensor *input, IPortableTensor *output)
+void floorFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Floor(getShape(input), getBuffer<float>(input), getShape(output),
+                    getBuffer<float>(output));
 }
 
-void logicalNot(const IPortableTensor *input, IPortableTensor *output)
+void logFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
-                         getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+  nnfw::cker::Log(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
 }
 
-void negFloat32(const IPortableTensor *input, IPortableTensor *output)
+void logicalNot(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::LogicalNot(getShape(input), getBuffer<bool>(input), getShape(output),
+                         getBuffer<bool>(output));
 }
 
-template <typename InputT, typename OutputT>
-void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+template <typename T> void neg(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
-                       getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
-                       output->data_scale(), output->data_offset());
+  nnfw::cker::Neg<T>(getShape(input), getBuffer<T>(input), getShape(output), getBuffer<T>(output));
 }
 
 void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Round(getShape(input), getBuffer<float>(input), getShape(output),
+                    getBuffer<float>(output));
 }
 
 void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Rsqrt(getShape(input), getBuffer<float>(input), getShape(output),
+                    getBuffer<float>(output));
 }
 
 void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
-  nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
-                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+  nnfw::cker::Sin(getShape(input), getBuffer<float>(input), getShape(output),
+                  getBuffer<float>(output));
+}
+
+void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Sqrt(getShape(input), getBuffer<float>(input), getShape(output),
+                   getBuffer<float>(output));
+}
+
+void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Square(getShape(input), getBuffer<float>(input), getShape(output),
+                     getBuffer<float>(output));
 }
 
 template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
@@ -179,9 +200,9 @@ template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPorta
   if (!HaveSameShapes(input, output))
     throw std::runtime_error{"ZerosLike: input and output shape don't match."};
 
-  auto element_size = getTensorShape(input).FlatSize();
+  auto element_size = getShape(input).FlatSize();
 
-  memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+  memset(getBuffer<T>(output), 0, element_size * sizeof(T));
 }
 } // namespace
 
@@ -219,6 +240,21 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
         throw std::runtime_error{"Cos: Unsupported data type"};
       }
       break;
+    case ElementwiseUnaryType::kDequantize:
+      if ((input->data_type() == OperandType::QUANT_UINT8_ASYMM))
+      {
+        _kernel = dequantizeUint8;
+      }
+      else if ((input->data_type() == OperandType::QUANT_INT8_ASYMM) ||
+               (input->data_type() == OperandType::QUANT_INT8_SYMM))
+      {
+        _kernel = dequantizeInt8;
+      }
+      else
+      {
+        throw std::runtime_error{"Dequantize: Unsupported data type"};
+      }
+      break;
     case ElementwiseUnaryType::kExp:
       if ((input->data_type() == OperandType::FLOAT32))
       {
@@ -239,6 +275,16 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
         throw std::runtime_error{"Exp: Unsupported data type"};
       }
       break;
+    case ElementwiseUnaryType::kFloor:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = floorFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Floor: Unsupported data type"};
+      }
+      break;
     case ElementwiseUnaryType::kLog:
       if ((input->data_type() == OperandType::FLOAT32))
       {
@@ -262,21 +308,19 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
     case ElementwiseUnaryType::kNeg:
       if ((input->data_type() == OperandType::FLOAT32))
       {
-        _kernel = negFloat32;
+        _kernel = neg<float>;
       }
-      else
+      else if ((input->data_type() == OperandType::INT64))
       {
-        throw std::runtime_error{"Neg: Unsupported  data type"};
+        _kernel = neg<int64_t>;
       }
-      break;
-    case ElementwiseUnaryType::kQuantize:
-      if ((input->data_type() == OperandType::FLOAT32))
+      else if ((input->data_type() == OperandType::INT32))
       {
-        _kernel = affineQuantize<float, uint8_t>;
+        _kernel = neg<int32_t>;
       }
       else
       {
-        throw std::runtime_error{"Quantize: Unsupported  data type"};
+        throw std::runtime_error{"Neg: Unsupported  data type"};
       }
       break;
     case ElementwiseUnaryType::kRound:
@@ -309,6 +353,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
         throw std::runtime_error{"Sin: Unsupported  data type"};
       }
       break;
+    case ElementwiseUnaryType::kSqrt:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = sqrtFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Sqrt: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kSquare:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = squareFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Square: Unsupported  data type"};
+      }
+      break;
     case ElementwiseUnaryType::kZerosLike:
       if (input->data_type() == OperandType::FLOAT32)
       {
@@ -324,7 +388,7 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
       }
       break;
     default:
-      throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+      throw std::runtime_error{"ElementwiseUnary: Unsupported ElementwiseUnary type"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index 74968386d..54a6fc02a 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -35,8 +35,10 @@ enum class ElementwiseUnaryType
   kAbs,
   kCast,
   kCos,
+  kDequantize,
   kErf,
   kExp,
+  kFloor,
   kLog,
   kLogicalNot,
   kNeg,
@@ -44,6 +46,8 @@ enum class ElementwiseUnaryType
   kRound,
   kRSqrt,
   kSin,
+  kSqrt,
+  kSquare,
   kZerosLike
 };
 
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
index b545e6743..5ea0ea893 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
@@ -25,22 +25,19 @@ namespace cpu
 namespace ops
 {
 
-ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr)
+ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
 
-void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
-                                IPortableTensor *output)
+void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
 {
   _input = input;
-  _axis = axis;
   _output = output;
 }
 
 void ExpandDimsLayer::run()
 {
-  // TODO use _axis to calculate shape of output when _axis is not constant
   size_t count = _input->total_size();
   memcpy(_output->buffer(), _input->buffer(), count);
 }
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
index b5d4938b5..1b7ead0c3 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
@@ -36,14 +36,12 @@ public:
   ExpandDimsLayer();
 
 public:
-  void configure(const IPortableTensor *input, const IPortableTensor *axis,
-                 IPortableTensor *output);
+  void configure(const IPortableTensor *input, IPortableTensor *output);
 
   void run() override;
 
 private:
   const IPortableTensor *_input;
-  const IPortableTensor *_axis;
   IPortableTensor *_output;
 };
 
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc
index 0a95ab005..cc12fcbd8 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FillLayer.cc
@@ -29,15 +29,13 @@ namespace cpu
 namespace ops
 {
 
-FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr)
+FillLayer::FillLayer() : _value(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
 
-void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value,
-                          IPortableTensor *output)
+void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output)
 {
-  _input = input;
   _value = value;
   _output = output;
 }
@@ -47,22 +45,20 @@ void FillLayer::run()
   switch (_output->data_type())
   {
     case OperandType::FLOAT32:
-      nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-                                reinterpret_cast<float *>(_value->buffer()),
-                                getTensorShape(_output),
-                                reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::Fill<float>(getBuffer<float>(_value), getShape(_output),
+                              getBuffer<float>(_output));
       break;
     case OperandType::INT32:
-      nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-                                  reinterpret_cast<int32_t *>(_value->buffer()),
-                                  getTensorShape(_output),
-                                  reinterpret_cast<int32_t *>(_output->buffer()));
+      nnfw::cker::Fill<int32_t>(getBuffer<int32_t>(_value), getShape(_output),
+                                getBuffer<int32_t>(_output));
+      break;
+    case OperandType::INT64:
+      nnfw::cker::Fill<int64_t>(getBuffer<int64_t>(_value), getShape(_output),
+                                getBuffer<int64_t>(_output));
       break;
     case OperandType::UINT32:
-      nnfw::cker::Fill<uint32_t *>(
-          getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-          reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output),
-          reinterpret_cast<uint32_t *>(_output->buffer()));
+      nnfw::cker::Fill<uint32_t>(getBuffer<uint32_t>(_value), getShape(_output),
+                                 getBuffer<uint32_t>(_output));
       break;
     default:
       throw std::runtime_error{"Fill: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h
index 1f17d6b68..ce843654a 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.h
+++ b/runtime/onert/backend/cpu/ops/FillLayer.h
@@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction
 public:
   FillLayer();
 
-  void configure(const IPortableTensor *input, const IPortableTensor *value,
-                 IPortableTensor *output);
+  void configure(const IPortableTensor *value, IPortableTensor *output);
 
   void run() override;
 
 private:
-  const IPortableTensor *_input;
   const IPortableTensor *_value;
   IPortableTensor *_output;
 };
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
index 05da33abf..32cad84cb 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -31,9 +31,9 @@ namespace ops
 {
 
 FullyConnectedLayer::FullyConnectedLayer()
-    : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
-      _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
-      _external_context(nullptr), _is_hybrid(false)
+  : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+    _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
+    _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
 {
   // DO NOTHING
 }
@@ -42,19 +42,22 @@ FullyConnectedLayer::~FullyConnectedLayer() = default;
 
 void FullyConnectedLayer::fullyConnectedFloat32()
 {
-  float output_activation_min = 0, output_activation_max = 0;
+  nnfw::cker::FullyConnectedParams op_params;
+  float output_activation_min = 0;
+  float output_activation_max = 0;
   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
 
-  nnfw::cker::FullyConnectedParams op_params;
+  op_params.activation = convertActivationType(_activation);
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
-  op_params.activation = convertActivationType(_activation);
-
-  nnfw::cker::FullyConnected(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  // TODO Set both cachables as false when training
+  op_params.lhs_cacheable = _weights->is_constant();
+  op_params.rhs_cacheable = _input->is_constant();
+
+  nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<float>(_input),
+                             getShape(_weights), getBuffer<float>(_weights), getShape(_bias),
+                             _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
+                             getBuffer<float>(_output));
 }
 
 // executionMutex is used to protect concurrent access of non-threadsafe resources
@@ -68,23 +71,22 @@ void FullyConnectedLayer::fullyConnectedQuant8()
   int32_t output_activation_max = 0;
   GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
   QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
 
   nnfw::cker::FullyConnectedParams op_params;
-  op_params.input_offset = -_input->data_offset();
-  op_params.weights_offset = -_weights->data_offset();
-  op_params.output_offset = _output->data_offset();
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.weights_offset = -_weights->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
   op_params.output_multiplier = output_multiplier;
   op_params.output_shift = output_shift;
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
-  nnfw::cker::FullyConnected(
-      op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const uint8_t *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+  nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<uint8_t>(_input),
+                             getShape(_weights), getBuffer<uint8_t>(_weights), getShape(_bias),
+                             _bias ? getBuffer<int32_t>(_bias) : nullptr, getShape(_output),
+                             getBuffer<uint8_t>(_output));
 }
 
 void FullyConnectedLayer::fullyConnectedHybrid()
@@ -92,7 +94,7 @@ void FullyConnectedLayer::fullyConnectedHybrid()
   nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
   if (!temp_arena.prepared)
   {
-    temp_arena.prepare(getTensorShape(_input), getTensorShape(_weights));
+    temp_arena.prepare(getShape(_input), getShape(_weights));
   }
 
   nnfw::cker::FullyConnectedParams op_params;
@@ -101,20 +103,16 @@ void FullyConnectedLayer::fullyConnectedHybrid()
 
 #ifndef USE_RUY_GEMV
   nnfw::cker::FullyConnectedHybrid(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
-      _external_context->ruy_context());
+    op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+    getBuffer<int8_t>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+    getShape(_output), getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
 #else
   nnfw::cker::FullyConnectedHybrid(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights),
-      (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
-                        : reinterpret_cast<const int8_t *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
-      _external_context->ruy_context());
+    op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+    (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
+                      : getBuffer<int8_t>(_weights),
+    getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
+    getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
 
   if (_cached_weights == nullptr || _is_weights_freed)
     return;
@@ -125,8 +123,8 @@ void FullyConnectedLayer::fullyConnectedHybrid()
 
   // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
   // so that handle this case
-  const int input_size = getTensorShape(_input).FlatSize();
-  if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+  const int input_size = getShape(_input).FlatSize();
+  if (nnfw::cker::IsZeroVector(getBuffer<float>(_input), input_size))
     return;
 
   auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
@@ -143,6 +141,10 @@ void FullyConnectedLayer::fullyConnectedHybrid()
   tensor->decrease_ref();
   if (tensor->buffer() == nullptr) // ref == 0?
   {
+#if defined(__ANDROID__) && (__ANDROID_API__ >= 26)
+    // NOTE This line forces OS to release any unused memory immediately
+    mallopt(M_PURGE, 0);
+#endif
     _is_weights_freed = true;
   }
 #endif
@@ -150,28 +152,52 @@ void FullyConnectedLayer::fullyConnectedHybrid()
 
 void FullyConnectedLayer::fullyConnectedSparseWeight()
 {
+  nnfw::cker::FullyConnectedParams op_params;
+  op_params.activation = convertActivationType(_activation);
+
+  const uint16_t *w1_segments = _weights->sparsity()->w1_segments();
+  const uint16_t *w1_indices = _weights->sparsity()->w1_indices();
+
+  auto block_size = _weights->sparsity()->block_size();
+  if (block_size.size() == 0)
+  {
+    nnfw::cker::FullyConnectedSparseWeightRandom(
+      op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+      getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+      getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
+  }
+  else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
+  {
+    nnfw::cker::FullyConnectedSparseWeight16x1(
+      op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+      getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+      getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
+  }
+  else
+    throw std::runtime_error{"FullyConnected: unsupported sparsity"};
+}
+
+void FullyConnectedLayer::fullyConnected16x1Float32()
+{
+#if defined(__aarch64__) && defined(USE_NEON)
   float output_activation_min = 0, output_activation_max = 0;
   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
 
   nnfw::cker::FullyConnectedParams op_params;
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
   op_params.activation = convertActivationType(_activation);
 
-  int w0_size = getTensorShape(_weights).Dims(0);
-  const uint16_t *w1_segments = _weights->w1_segments();
-  const uint16_t *w1_indices = _weights->w1_indices();
-
-  nnfw::cker::FullyConnectedSparseWeight(
-      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
-      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
-      w1_indices);
+  nnfw::cker::FullyConnected16x1Float32(op_params, getShape(_input), getBuffer<float>(_input),
+                                        getShape(_weights), getBuffer<float>(_weights),
+                                        getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+                                        getShape(_output), getBuffer<float>(_output));
+#else
+  throw std::runtime_error{"FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
+#endif
 }
 
 void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
                                     const IPortableTensor *bias, ir::Activation activation,
+                                    ir::FullyConnectedWeightsFormat weights_format,
                                     IPortableTensor *output,
                                     const std::shared_ptr<ExternalContext> &external_context)
 {
@@ -182,6 +208,14 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl
   _output = output;
   _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
                weights->data_type() == OperandType::QUANT_INT8_SYMM;
+  _is_shuffled16x1float32 = weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32;
+#if !defined(__aarch64__) || !defined(USE_NEON)
+  if (_is_shuffled16x1float32)
+  {
+    throw std::runtime_error{
+      "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
+  }
+#endif
   _external_context = external_context;
 }
 
@@ -191,13 +225,13 @@ void FullyConnectedLayer::run()
   {
     fullyConnectedHybrid();
   }
-  else if (_weights->is_sparse())
+  else if (_weights->sparsity())
   {
     fullyConnectedSparseWeight();
   }
   else if (_input->data_type() == OperandType::FLOAT32)
   {
-    fullyConnectedFloat32();
+    _is_shuffled16x1float32 ? fullyConnected16x1Float32() : fullyConnectedFloat32();
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
@@ -213,8 +247,8 @@ void FullyConnectedLayer::prepare()
 {
   if (_bias && _bias->is_constant())
   {
-    const int bias_size = getTensorShape(_bias).FlatSize();
-    if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+    const int bias_size = getShape(_bias).FlatSize();
+    if (nnfw::cker::IsZeroVector(getBuffer<float>(_bias), bias_size))
     {
       _bias = nullptr;
     }
@@ -236,20 +270,14 @@ void FullyConnectedLayer::prepare()
   if (_input->is_dynamic() || !_weights->is_constant())
     return;
 
-  const int rows = getTensorShape(_weights).Dims(0);
+  const int rows = getShape(_weights).Dims(0);
   if (rows % 4 == 0)
   {
-    const int total_input_size = getTensorShape(_input).FlatSize();
-    const int input_size = getTensorShape(_weights).Dims(1);
-    const int batch_size = total_input_size / input_size;
-    if (batch_size <= 4)
-    {
-      // TODO If it's possible to extract precaching from ruy kernel,
-      // place this instead of below code
+    // TODO If it's possible to extract precaching from ruy kernel,
+    // place this instead of below code
 
-      // buffer will be used by ruy kernel as a cache key
-      _cached_weights = _weights->buffer();
-    }
+    // buffer will be used by ruy kernel as a cache key
+    _cached_weights = _weights->buffer();
   }
 #endif
 }
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
index f1242677c..c56398def 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
@@ -55,15 +55,18 @@ public:
 
   void fullyConnectedSparseWeight();
 
+  void fullyConnected16x1Float32();
+
   void configure(const IPortableTensor *input, const IPortableTensor *weights,
-                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output,
+                 const IPortableTensor *bias, ir::Activation activation,
+                 ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output,
                  const std::shared_ptr<ExternalContext> &external_context);
 
   void run() override;
 
   void prepare() override;
 
-private:
+protected:
   const IPortableTensor *_input;
   const IPortableTensor *_weights;
   const IPortableTensor *_bias;
@@ -74,7 +77,8 @@ private:
 
   std::shared_ptr<ExternalContext> _external_context;
 
-  bool _is_hybrid;
+  bool _is_hybrid : 1;
+  bool _is_shuffled16x1float32 : 1;
 
 #ifdef USE_RUY_GEMV
   uint8_t *_cached_weights = nullptr; // weights to be cached and a key
diff --git a/runtime/onert/backend/cpu/ops/FusedBatchNormLayer.cc b/runtime/onert/backend/cpu/ops/FusedBatchNormLayer.cc
index c2c592db7..1bec15a08 100644
--- a/runtime/onert/backend/cpu/ops/FusedBatchNormLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FusedBatchNormLayer.cc
@@ -28,8 +28,8 @@ namespace ops
 {
 
 FusedBatchNormLayer::FusedBatchNormLayer()
-    : _inputs(), _output(nullptr), _epsilon(0), _is_training(true),
-      _fusedbatchnorm_kernel(new nnfw::cker::FusedBatchNorm())
+  : _inputs(), _output(nullptr), _epsilon(0), _is_training(true),
+    _fusedbatchnorm_kernel(new nnfw::cker::FusedBatchNorm())
 {
   // DO NOTHING
 }
@@ -48,8 +48,8 @@ void FusedBatchNormLayer::fusedbatchnormFloat32()
 
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    inputShapes.emplace_back(getTensorShape(_inputs[i]));
-    inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(_inputs[i]->buffer()));
+    inputShapes.emplace_back(getShape(_inputs[i]));
+    inputFloatPtrs.emplace_back(getBuffer<float>(_inputs[i]));
   }
 
   nnfw::cker::FusedBatchNormParams param;
@@ -58,8 +58,7 @@ void FusedBatchNormLayer::fusedbatchnormFloat32()
   param.is_training = _is_training;
   param.data_format = _data_format;
 
-  kernel(inputShapes, inputFloatPtrs, getTensorShape(_output),
-         reinterpret_cast<float *>(_output->buffer()), param);
+  kernel(inputShapes, inputFloatPtrs, getShape(_output), getBuffer<float>(_output), param);
 }
 
 void FusedBatchNormLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/GatherLayer.cc b/runtime/onert/backend/cpu/ops/GatherLayer.cc
index 641daa972..f955eef16 100644
--- a/runtime/onert/backend/cpu/ops/GatherLayer.cc
+++ b/runtime/onert/backend/cpu/ops/GatherLayer.cc
@@ -51,9 +51,8 @@ template <typename InputType> void GatherLayer::runByInputType()
       using IndicesType = int32_t;
 
       nnfw::cker::Gather<InputType, IndicesType>(
-          op_params, getTensorShape(_input), reinterpret_cast<const InputType *>(_input->buffer()),
-          getTensorShape(_indices), reinterpret_cast<const IndicesType *>(_indices->buffer()),
-          getTensorShape(_output), reinterpret_cast<OutputType *>(_output->buffer()));
+        op_params, getShape(_input), getBuffer<InputType>(_input), getShape(_indices),
+        getBuffer<IndicesType>(_indices), getShape(_output), getBuffer<OutputType>(_output));
       break;
     }
     case OperandType::INT64:
@@ -61,9 +60,8 @@ template <typename InputType> void GatherLayer::runByInputType()
       using IndicesType = int64_t;
 
       nnfw::cker::Gather<InputType, IndicesType>(
-          op_params, getTensorShape(_input), reinterpret_cast<const InputType *>(_input->buffer()),
-          getTensorShape(_indices), reinterpret_cast<const IndicesType *>(_indices->buffer()),
-          getTensorShape(_output), reinterpret_cast<OutputType *>(_output->buffer()));
+        op_params, getShape(_input), getBuffer<InputType>(_input), getShape(_indices),
+        getBuffer<IndicesType>(_indices), getShape(_output), getBuffer<OutputType>(_output));
       break;
     }
     default:
diff --git a/runtime/onert/backend/cpu/ops/L2NormLayer.cc b/runtime/onert/backend/cpu/ops/L2NormLayer.cc
index 0d99b0586..fe5019de6 100644
--- a/runtime/onert/backend/cpu/ops/L2NormLayer.cc
+++ b/runtime/onert/backend/cpu/ops/L2NormLayer.cc
@@ -44,19 +44,17 @@ void L2NormLayer::run()
   switch (_input->data_type())
   {
     case OperandType::FLOAT32:
-      nnfw::cker::L2NormalizeFloat32(
-          getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::L2NormalizeFloat32(getShape(_input), getBuffer<float>(_input), getShape(_output),
+                                     getBuffer<float>(_output));
       break;
 
     case OperandType::QUANT_UINT8_ASYMM:
     {
       nnfw::cker::L2NormParams params;
-      assert(_input->data_offset() == 128);
-      params.input_zero_point = _input->data_offset();
-      nnfw::cker::L2NormalizeQuant8(
-          params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+      assert(_input->data_zero_point() == 128);
+      params.input_zero_point = _input->data_zero_point();
+      nnfw::cker::L2NormalizeQuant8(params, getShape(_input), getBuffer<uint8_t>(_input),
+                                    getShape(_output), getBuffer<uint8_t>(_output));
     }
     break;
 
diff --git a/runtime/onert/backend/cpu/ops/LSTMLayer.cc b/runtime/onert/backend/cpu/ops/LSTMLayer.cc
new file mode 100644
index 000000000..16b0feec8
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/LSTMLayer.cc
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LSTMLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/LSTM.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+T *getOptionalOutputBuffer(onert::backend::IPortableTensor *tensor, std::vector<uint8_t> *temp_vec,
+                           size_t total_size)
+{
+  if (tensor == nullptr)
+  {
+    temp_vec->reserve(total_size);
+    return reinterpret_cast<T *>(temp_vec->data());
+  }
+  else
+  {
+    assert(tensor->total_size() == total_size);
+    return getBuffer<T>(tensor);
+  }
+}
+
+inline void initializeStateBuffer(const onert::backend::IPortableTensor *tensor_in, void *buffer,
+                                  bool needs_memcpy)
+{
+  assert(tensor_in != nullptr);
+  assert(buffer != nullptr);
+  if (needs_memcpy)
+    memcpy(buffer, tensor_in->buffer(), tensor_in->total_size());
+  else
+    memset(buffer, 0, tensor_in->total_size());
+}
+} // namespace
+
+void LSTMLayer::LSTMFloat()
+{
+  auto in_shape = _input->getShape();
+  assert(in_shape.rank() >= 2 && in_shape.rank() <= 3);
+  int max_time, n_batch;
+  if (in_shape.rank() == 3)
+  {
+    max_time = (_time_major) ? in_shape.dim(0) : in_shape.dim(1);
+    n_batch = (_time_major) ? in_shape.dim(1) : in_shape.dim(0);
+  }
+  else
+  {
+    max_time = 1;
+    n_batch = in_shape.dim(0);
+  }
+  const int n_input = in_shape.dim(_input->getShape().rank() - 1);
+  const int aux_input_size = 0;
+
+  // n_cell and n_output will be the same size when there is no projection.
+  const int n_cell = _input_to_output_weights->getShape().dim(0);
+  const int n_output = _recurrent_to_output_weights->getShape().dim(1);
+
+  // Since we have already checked that weights are all there or none, we can
+  // check the existence of only one to the get the condition.
+  const bool use_cifg = (_input_to_input_weights == nullptr);
+
+  // Optional outputs
+  float *output_state_buf = getOptionalOutputBuffer<float>(_output_state, &_output_state_vec,
+                                                           _output_state_in->total_size());
+  float *cell_state_buf =
+    getOptionalOutputBuffer<float>(_cell_state, &_cell_state_vec, _cell_state_in->total_size());
+
+  initializeStateBuffer(_output_state_in, output_state_buf, _has_output_state_data);
+  initializeStateBuffer(_cell_state_in, cell_state_buf, _has_cell_state_data);
+
+  // Index the scratch buffers pointers to the global scratch buffer.
+  float *scratch_buffer_buf = getOptionalOutputBuffer<float>(
+    _scratch_buffer, &_scratch_vec, n_batch * n_cell * (use_cifg ? 3 : 4) * sizeof(float));
+  float *input_gate_scratch = nullptr;
+  float *cell_gate_scratch = nullptr;
+  float *forget_gate_scratch = nullptr;
+  float *output_gate_scratch = nullptr;
+  if (use_cifg)
+  {
+    cell_gate_scratch = scratch_buffer_buf;
+    forget_gate_scratch = scratch_buffer_buf + n_cell * n_batch;
+    output_gate_scratch = scratch_buffer_buf + 2 * n_cell * n_batch;
+  }
+  else
+  {
+    input_gate_scratch = scratch_buffer_buf;
+    cell_gate_scratch = scratch_buffer_buf + n_cell * n_batch;
+    forget_gate_scratch = scratch_buffer_buf + 2 * n_cell * n_batch;
+    output_gate_scratch = scratch_buffer_buf + 3 * n_cell * n_batch;
+  }
+
+  auto optional_tensor_ptr = [](const IPortableTensor *tensor) {
+    // If tensor is not given or the tensor size is 0, consider it was not given
+    return (tensor && tensor->total_size() > 0) ? getBuffer<float>(tensor) : nullptr;
+  };
+  // Optional inputs
+  const float *input_to_input_weights_ptr = optional_tensor_ptr(_input_to_input_weights);
+  const float *recurrent_to_input_weights_ptr = optional_tensor_ptr(_recurrent_to_input_weights);
+  const float *cell_to_input_weights_ptr = optional_tensor_ptr(_cell_to_input_weights);
+  const float *cell_to_forget_weights_ptr = optional_tensor_ptr(_cell_to_forget_weights);
+  const float *cell_to_output_weights_ptr = optional_tensor_ptr(_cell_to_output_weights);
+  const float *input_gate_bias_ptr = optional_tensor_ptr(_input_gate_bias);
+  const float *projection_weights_ptr = optional_tensor_ptr(_projection_weights);
+  const float *projection_bias_ptr = optional_tensor_ptr(_projection_bias);
+  const float *input_layer_norm_coefficients_ptr =
+    optional_tensor_ptr(_input_layer_norm_coefficients);
+  const float *forget_layer_norm_coefficients_ptr =
+    optional_tensor_ptr(_forget_layer_norm_coefficients);
+  const float *cell_layer_norm_coefficients_ptr =
+    optional_tensor_ptr(_cell_layer_norm_coefficients);
+  const float *output_layer_norm_coefficients_ptr =
+    optional_tensor_ptr(_output_layer_norm_coefficients);
+
+  // Copy out the LSTM specific params so they can be passed in the function.
+  nnfw::cker::LSTMParams lstm_params;
+  lstm_params.activation = convertActivationType(_params.activation);
+  lstm_params.cell_clip = _params.cell_threshold;
+  lstm_params.proj_clip = _params.projection_threshold;
+
+  auto out_shape = _output->getShape();
+  const int output_batch_leading_dim = out_shape.dim(out_shape.rank() - 1);
+  if (_time_major)
+  {
+    // Loop through the sequence.
+    const int input_step = n_batch * n_input;
+    const int output_step = n_batch * output_batch_leading_dim;
+    for (int t = 0; t < max_time; t++)
+    {
+      // If this is the forward_sequence, step forward, otherwise step
+      // backwards.
+      const int t_rel = _forward_sequence ? t : max_time - t - 1;
+      const float *input_ptr = getBuffer<float>(_input) + t_rel * input_step;
+      const float *aux_input_ptr = nullptr;
+      if (_aux_input)
+      {
+        aux_input_ptr = getBuffer<float>(_aux_input) + t_rel * input_step;
+      }
+      float *output_ptr = getBuffer<float>(_output) + t_rel * output_step + _output_offset;
+
+      LstmStepFloat(
+        input_ptr, input_to_input_weights_ptr, getBuffer<float>(_input_to_forget_weights),
+        getBuffer<float>(_input_to_cell_weights), getBuffer<float>(_input_to_output_weights),
+        aux_input_ptr,
+        /*aux_input_to_input_weights=*/nullptr,
+        /*aux_input_to_forget_weights=*/nullptr,
+        /*aux_input_to_cell_weights=*/nullptr,
+        /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
+        getBuffer<float>(_recurrent_to_forget_weights),
+        getBuffer<float>(_recurrent_to_cell_weights),
+        getBuffer<float>(_recurrent_to_output_weights), cell_to_input_weights_ptr,
+        cell_to_forget_weights_ptr, cell_to_output_weights_ptr, input_layer_norm_coefficients_ptr,
+        forget_layer_norm_coefficients_ptr, cell_layer_norm_coefficients_ptr,
+        output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
+        getBuffer<float>(_forget_gate_bias), getBuffer<float>(_cell_gate_bias),
+        getBuffer<float>(_output_gate_bias), projection_weights_ptr, projection_bias_ptr,
+        &lstm_params, n_batch, n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim,
+        output_state_buf, cell_state_buf, input_gate_scratch, forget_gate_scratch,
+        cell_gate_scratch, output_gate_scratch, output_ptr);
+    }
+  }
+  else
+  {
+    for (int b = 0; b < n_batch; b++)
+    {
+      const int input_step = n_input;
+      const int output_step = output_batch_leading_dim;
+      for (int t = 0; t < max_time; t++)
+      {
+        // If this is the forward_sequence, step forward, otherwise step
+        // backwards.
+        const int t_rel = _forward_sequence ? t : max_time - t - 1;
+        const int time_offset = b * max_time + t_rel;
+        const float *input_ptr = getBuffer<float>(_input) + time_offset * input_step;
+        const float *aux_input_ptr = nullptr;
+        if (_aux_input)
+        {
+          aux_input_ptr = getBuffer<float>(_aux_input) + time_offset * input_step;
+        }
+        float *output_ptr = getBuffer<float>(_output) + time_offset * output_step + _output_offset;
+
+        // Offset the {output,cell}_state pointers to the right batch.
+        float *output_state_ptr = output_state_buf + b * output_batch_leading_dim;
+        float *cell_state_ptr = cell_state_buf + b * n_cell;
+        // Offset the scratch pointers to the right batch.
+        float *input_gate_scratch_ptr =
+          input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
+        float *forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
+        float *cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
+        float *output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
+
+        LstmStepFloat(
+          input_ptr, input_to_input_weights_ptr, getBuffer<float>(_input_to_forget_weights),
+          getBuffer<float>(_input_to_cell_weights), getBuffer<float>(_input_to_output_weights),
+          aux_input_ptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, recurrent_to_input_weights_ptr,
+          getBuffer<float>(_recurrent_to_forget_weights),
+          getBuffer<float>(_recurrent_to_cell_weights),
+          getBuffer<float>(_recurrent_to_output_weights), cell_to_input_weights_ptr,
+          cell_to_forget_weights_ptr, cell_to_output_weights_ptr, input_layer_norm_coefficients_ptr,
+          forget_layer_norm_coefficients_ptr, cell_layer_norm_coefficients_ptr,
+          output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
+          getBuffer<float>(_forget_gate_bias), getBuffer<float>(_cell_gate_bias),
+          getBuffer<float>(_output_gate_bias), projection_weights_ptr, projection_bias_ptr,
+          &lstm_params, /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output,
+          output_batch_leading_dim, output_state_ptr, cell_state_ptr, input_gate_scratch_ptr,
+          forget_gate_scratch_ptr, cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr);
+      }
+    }
+  }
+}
+
+void LSTMLayer::configure(
+  const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
+  const IPortableTensor *input_to_forget_weights, const IPortableTensor *input_to_cell_weights,
+  const IPortableTensor *input_to_output_weights, const IPortableTensor *recurrent_to_input_weights,
+  const IPortableTensor *recurrent_to_forget_weights,
+  const IPortableTensor *recurrent_to_cell_weights,
+  const IPortableTensor *recurrent_to_output_weights, const IPortableTensor *cell_to_input_weights,
+  const IPortableTensor *cell_to_forget_weights, const IPortableTensor *cell_to_output_weights,
+  const IPortableTensor *input_layer_norm_weights, const IPortableTensor *forget_layer_norm_weights,
+  const IPortableTensor *cell_layer_norm_weights, const IPortableTensor *output_layer_norm_weights,
+  const IPortableTensor *aux_input, const IPortableTensor *aux_input_to_input_weights,
+  const IPortableTensor *aux_input_to_forget_weights,
+  const IPortableTensor *aux_input_to_cell_weights,
+  const IPortableTensor *aux_input_to_output_weights, const IPortableTensor *input_gate_bias,
+  const IPortableTensor *forget_gate_bias, const IPortableTensor *cell_gate_bias,
+  const IPortableTensor *output_gate_bias, const IPortableTensor *projection_weights,
+  const IPortableTensor *projection_bias, const IPortableTensor *output_state_in,
+  const IPortableTensor *cell_state_in, const ir::operation::LSTM::Param &params,
+  bool forward_sequence, bool time_major, int output_offset, IPortableTensor *scratch_buffer,
+  IPortableTensor *output_state, IPortableTensor *cell_state, IPortableTensor *output,
+  bool has_output_state_data, bool has_cell_state_data)
+{
+  _input = input;
+  _input_to_input_weights = input_to_input_weights;
+  _input_to_forget_weights = input_to_forget_weights;
+  _input_to_cell_weights = input_to_cell_weights;
+  _input_to_output_weights = input_to_output_weights;
+  _recurrent_to_input_weights = recurrent_to_input_weights;
+  _recurrent_to_forget_weights = recurrent_to_forget_weights;
+  _recurrent_to_cell_weights = recurrent_to_cell_weights;
+  _recurrent_to_output_weights = recurrent_to_output_weights;
+  _cell_to_input_weights = cell_to_input_weights;
+  _cell_to_forget_weights = cell_to_forget_weights;
+  _cell_to_output_weights = cell_to_output_weights;
+  _input_layer_norm_coefficients = input_layer_norm_weights;
+  _forget_layer_norm_coefficients = forget_layer_norm_weights;
+  _cell_layer_norm_coefficients = cell_layer_norm_weights;
+  _output_layer_norm_coefficients = output_layer_norm_weights;
+  _aux_input = aux_input, _aux_input_to_input_weights = aux_input_to_input_weights,
+  _aux_input_to_forget_weights = aux_input_to_forget_weights,
+  _aux_input_to_cell_weights = aux_input_to_cell_weights,
+  _aux_input_to_output_weights = aux_input_to_output_weights, _input_gate_bias = input_gate_bias;
+  _forget_gate_bias = forget_gate_bias;
+  _cell_gate_bias = cell_gate_bias;
+  _output_gate_bias = output_gate_bias;
+  _projection_weights = projection_weights;
+  _projection_bias = projection_bias;
+  _output_state_in = output_state_in;
+  _cell_state_in = cell_state_in;
+  _params = params;
+  _forward_sequence = forward_sequence;
+  _time_major = time_major;
+  _output_offset = output_offset;
+  _scratch_buffer = scratch_buffer;
+  _output_state = output_state;
+  _cell_state = cell_state;
+  _output = output;
+  _has_output_state_data = has_output_state_data;
+  _has_cell_state_data = has_cell_state_data;
+}
+
+void LSTMLayer::run()
+{
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    LSTMFloat();
+  }
+  else
+  {
+    throw std::runtime_error{"LSTMLayer: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LSTMLayer.h b/runtime/onert/backend/cpu/ops/LSTMLayer.h
new file mode 100644
index 000000000..72ac2ed04
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/LSTMLayer.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_LSTMLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_LSTMLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+#include <ir/InternalType.h>
+#include <ir/operation/LSTM.h>
+#include <exec/IFunction.h>
+
+namespace nnfw
+{
+namespace cker
+{
+class FCTempArena;
+}
+} // namespace nnfw
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+// TODO Support LSTM, BiDirectionalSequenceLSTM
+class LSTMLayer : public ::onert::exec::IFunction
+{
+public:
+  LSTMLayer() = default;
+
+public:
+  void LSTMFloat();
+
+  void configure(
+    const IPortableTensor *input, const IPortableTensor *input_to_input_weights,
+    const IPortableTensor *input_to_forget_weights, const IPortableTensor *input_to_cell_weights,
+    const IPortableTensor *input_to_output_weights,
+    const IPortableTensor *recurrent_to_input_weights,
+    const IPortableTensor *recurrent_to_forget_weights,
+    const IPortableTensor *recurrent_to_cell_weights,
+    const IPortableTensor *recurrent_to_output_weights,
+    const IPortableTensor *cell_to_input_weights, const IPortableTensor *cell_to_forget_weights,
+    const IPortableTensor *cell_to_output_weights, const IPortableTensor *input_layer_norm_weights,
+    const IPortableTensor *forget_layer_norm_weights,
+    const IPortableTensor *cell_layer_norm_weights,
+    const IPortableTensor *output_layer_norm_weights, const IPortableTensor *aux_input,
+    const IPortableTensor *aux_input_to_input_weights,
+    const IPortableTensor *aux_input_to_forget_weights,
+    const IPortableTensor *aux_input_to_cell_weights,
+    const IPortableTensor *aux_input_to_output_weights, const IPortableTensor *input_gate_bias,
+    const IPortableTensor *forget_gate_bias, const IPortableTensor *cell_gate_bias,
+    const IPortableTensor *output_gate_bias, const IPortableTensor *projection_weights,
+    const IPortableTensor *projection_bias, const IPortableTensor *output_state_in,
+    const IPortableTensor *cell_state_in, const ir::operation::LSTM::Param &params,
+    bool forward_sequence, bool time_major, int32_t output_offset, IPortableTensor *scratch_buffer,
+    IPortableTensor *output_state, IPortableTensor *cell_state, IPortableTensor *output,
+    bool has_output_state_data, bool has_cell_state_data);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input{nullptr};
+  const IPortableTensor *_input_to_input_weights{nullptr};
+  const IPortableTensor *_input_to_forget_weights{nullptr};
+  const IPortableTensor *_input_to_cell_weights{nullptr};
+  const IPortableTensor *_input_to_output_weights{nullptr};
+  const IPortableTensor *_recurrent_to_input_weights{nullptr};
+  const IPortableTensor *_recurrent_to_forget_weights{nullptr};
+  const IPortableTensor *_recurrent_to_cell_weights{nullptr};
+  const IPortableTensor *_recurrent_to_output_weights{nullptr};
+  const IPortableTensor *_cell_to_input_weights{nullptr};
+  const IPortableTensor *_cell_to_forget_weights{nullptr};
+  const IPortableTensor *_cell_to_output_weights{nullptr};
+  const IPortableTensor *_input_layer_norm_coefficients{nullptr};
+  const IPortableTensor *_forget_layer_norm_coefficients{nullptr};
+  const IPortableTensor *_cell_layer_norm_coefficients{nullptr};
+  const IPortableTensor *_output_layer_norm_coefficients{nullptr};
+  const IPortableTensor *_aux_input{nullptr};
+  const IPortableTensor *_aux_input_to_input_weights{nullptr};
+  const IPortableTensor *_aux_input_to_forget_weights{nullptr};
+  const IPortableTensor *_aux_input_to_cell_weights{nullptr};
+  const IPortableTensor *_aux_input_to_output_weights{nullptr};
+  const IPortableTensor *_input_gate_bias{nullptr};
+  const IPortableTensor *_forget_gate_bias{nullptr};
+  const IPortableTensor *_cell_gate_bias{nullptr};
+  const IPortableTensor *_output_gate_bias{nullptr};
+  const IPortableTensor *_projection_weights{nullptr};
+  const IPortableTensor *_projection_bias{nullptr};
+  const IPortableTensor *_output_state_in{nullptr};
+  const IPortableTensor *_cell_state_in{nullptr};
+  IPortableTensor *_scratch_buffer{nullptr};
+  IPortableTensor *_output_state{nullptr};
+  IPortableTensor *_cell_state{nullptr};
+  IPortableTensor *_output{nullptr};
+  std::vector<uint8_t> _scratch_vec{};
+  std::vector<uint8_t> _output_state_vec{};
+  std::vector<uint8_t> _cell_state_vec{};
+  ir::operation::LSTM::Param _params{};
+  bool _forward_sequence{true};
+  bool _time_major{true};
+  int32_t _output_offset{0};
+  bool _has_output_state_data{false};
+  bool _has_cell_state_data{false};
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_LSTMLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
index 1d7ee6caa..a544dd970 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -49,9 +49,8 @@ void LogSoftMaxLayer::logsoftmaxFloat32()
   nnfw::cker::SoftmaxParams op_params;
   op_params.beta = _beta;
   op_params.axis = _axis;
-  nnfw::cker::LogSoftmax(op_params, getTensorShape(_input),
-                         reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                         reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::LogSoftmax(op_params, getShape(_input), getBuffer<float>(_input), getShape(_output),
+                         getBuffer<float>(_output));
 }
 
 void LogSoftMaxLayer::logsoftmaxQuant8()
@@ -60,11 +59,11 @@ void LogSoftMaxLayer::logsoftmaxQuant8()
   op_params.beta = _beta;
   op_params.axis = _axis;
   op_params.table = _table;
-  op_params.zero_point = _output->data_offset();
+  op_params.zero_point = _output->data_zero_point();
   op_params.scale = _output->data_scale();
-  nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
-                         reinterpret_cast<const uint8_t *>(_input->buffer()),
-                         getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+  nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getShape(_input),
+                         getBuffer<uint8_t>(_input), getShape(_output),
+                         getBuffer<uint8_t>(_output));
 }
 
 void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
diff --git a/runtime/onert/backend/cpu/ops/MatrixBandPartLayer.cc b/runtime/onert/backend/cpu/ops/MatrixBandPartLayer.cc
index b770cce5d..7220a2bab 100644
--- a/runtime/onert/backend/cpu/ops/MatrixBandPartLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MatrixBandPartLayer.cc
@@ -30,7 +30,7 @@ namespace ops
 {
 
 MatrixBandPartLayer::MatrixBandPartLayer()
-    : _input(nullptr), _num_lower_diag(nullptr), _num_upper_diag(nullptr), _output(nullptr)
+  : _input(nullptr), _num_lower_diag(nullptr), _num_upper_diag(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
@@ -40,18 +40,14 @@ void MatrixBandPartLayer::matrixBandPartFloat32()
   if (_num_lower_diag->data_type() == OperandType::INT64)
   {
     nnfw::cker::MatrixBandPart<int64_t>(
-        *reinterpret_cast<const int64_t *>(_num_lower_diag->buffer()),
-        *reinterpret_cast<const int64_t *>(_num_upper_diag->buffer()), getTensorShape(_input),
-        reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-        reinterpret_cast<float *>(_output->buffer()));
+      *getBuffer<int64_t>(_num_lower_diag), *getBuffer<int64_t>(_num_upper_diag), getShape(_input),
+      getBuffer<float>(_input), getShape(_output), getBuffer<float>(_output));
   }
   else
   {
     nnfw::cker::MatrixBandPart<int32_t>(
-        *reinterpret_cast<const int32_t *>(_num_lower_diag->buffer()),
-        *reinterpret_cast<const int32_t *>(_num_upper_diag->buffer()), getTensorShape(_input),
-        reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-        reinterpret_cast<float *>(_output->buffer()));
+      *getBuffer<int32_t>(_num_lower_diag), *getBuffer<int32_t>(_num_upper_diag), getShape(_input),
+      getBuffer<float>(_input), getShape(_output), getBuffer<float>(_output));
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc
index 4921ac748..c86a9d126 100644
--- a/runtime/onert/backend/cpu/ops/MeanLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc
@@ -36,18 +36,29 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee
 
 void MeanLayer::MeanFloat32()
 {
-  nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
-                   getReducerAxes(_axes));
+  const auto inputShape = getShape(_input);
+  const auto axisVec = getReducerAxes(_axes);
+  bool axis_is_1_and_2 =
+    _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+    ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+
+  if (axis_is_1_and_2)
+  {
+    nnfw::cker::MeanAxis1And2(inputShape, getBuffer<float>(_input), getShape(_output),
+                              getBuffer<float>(_output));
+  }
+  else
+  {
+    nnfw::cker::Mean(inputShape, getBuffer<float>(_input), getShape(_output),
+                     getBuffer<float>(_output), axisVec);
+  }
 }
 
 void MeanLayer::MeanQuant8()
 {
-  nnfw::cker::MeanQ8Asymm(getTensorShape(_input),
-                          reinterpret_cast<const uint8_t *>(_input->buffer()), _input->data_scale(),
-                          _input->data_offset(), getTensorShape(_output),
-                          reinterpret_cast<uint8_t *>(_output->buffer()), _output->data_scale(),
-                          _output->data_offset(), getReducerAxes(_axes));
+  nnfw::cker::MeanQ8Asymm(getShape(_input), getBuffer<uint8_t>(_input), _input->data_scale(),
+                          _input->data_zero_point(), getShape(_output), getBuffer<uint8_t>(_output),
+                          _output->data_scale(), _output->data_zero_point(), getReducerAxes(_axes));
 }
 
 void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *axes,
@@ -57,6 +68,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a
   _axes = axes;
   _output = output;
   _keep_dims = keep_dims;
+
+  if (_input->data_type() != OperandType::FLOAT32 &&
+      _input->data_type() != OperandType::QUANT_UINT8_ASYMM)
+    throw std::runtime_error{"Mean: unsupported data type"};
 }
 
 void MeanLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/OneHotLayer.cc b/runtime/onert/backend/cpu/ops/OneHotLayer.cc
index 2a82b00ee..66773a608 100644
--- a/runtime/onert/backend/cpu/ops/OneHotLayer.cc
+++ b/runtime/onert/backend/cpu/ops/OneHotLayer.cc
@@ -33,10 +33,8 @@ template <typename T> void OneHotLayer::oneHotImpl()
 {
   // It assumes index is int32_t type.
   nnfw::cker::OneHot<T, int32_t>(
-      *reinterpret_cast<const int32_t *>(_depth->buffer()),
-      *reinterpret_cast<T *>(_on_value->buffer()), *reinterpret_cast<T *>(_off_value->buffer()),
-      _axis, getTensorShape(_indices), reinterpret_cast<const int32_t *>(_indices->buffer()),
-      getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+    *getBuffer<int32_t>(_depth), *getBuffer<T>(_on_value), *getBuffer<T>(_off_value), _axis,
+    getShape(_indices), getBuffer<int32_t>(_indices), getShape(_output), getBuffer<T>(_output));
 }
 
 void OneHotLayer::configure(const IPortableTensor *indices, const IPortableTensor *depth,
diff --git a/runtime/onert/backend/cpu/ops/OneHotLayer.h b/runtime/onert/backend/cpu/ops/OneHotLayer.h
index c05498440..b0f03a261 100644
--- a/runtime/onert/backend/cpu/ops/OneHotLayer.h
+++ b/runtime/onert/backend/cpu/ops/OneHotLayer.h
@@ -34,8 +34,8 @@ class OneHotLayer : public ::onert::exec::IFunction
 {
 public:
   OneHotLayer()
-      : _indices(nullptr), _depth(nullptr), _on_value(nullptr), _off_value(nullptr),
-        _output(nullptr), _axis(-1)
+    : _indices(nullptr), _depth(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
+      _axis(-1)
   {
     // DO NOTHING
   }
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.cc b/runtime/onert/backend/cpu/ops/OperationUtils.cc
index 2eee6dc85..686865af2 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.cc
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.cc
@@ -32,16 +32,17 @@ namespace ops
 uint32_t getNumberOfDimensions(const IPortableTensor *tensor)
 {
   assert(tensor);
-  return tensor->num_dimensions();
+  return tensor->getShape().rank();
 }
 
 uint32_t getNumberOfElements(const IPortableTensor *tensor)
 {
   assert(tensor);
   uint32_t count = 1;
-  for (size_t i = 0; i < tensor->num_dimensions(); i++)
+  auto shape = tensor->getShape();
+  for (int i = 0; i < shape.rank(); i++)
   {
-    count *= tensor->dimension(i);
+    count *= shape.dim(i);
   }
   return count;
 }
@@ -49,12 +50,13 @@ uint32_t getNumberOfElements(const IPortableTensor *tensor)
 uint32_t getSizeOfDimension(const IPortableTensor *tensor, uint32_t dimensionIdx)
 {
   assert(tensor);
-  if (dimensionIdx >= tensor->num_dimensions())
+  auto shape = tensor->getShape();
+  if (dimensionIdx >= static_cast<uint32_t>(shape.rank()))
   {
     // TODO, log the error
     return 0;
   }
-  return tensor->dimension(dimensionIdx);
+  return shape.dim(dimensionIdx);
 }
 
 void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
@@ -94,6 +96,34 @@ void GetQuantizedConvolutionMultiplier(const IPortableTensor *input, const IPort
   *multiplier = input_product_scale / output_scale;
 }
 
+void GetQuantizedConvolutionMultipliersAndShifts(
+  float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size,
+  int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
+  std::vector<int> &per_channel_output_shift)
+{
+  // Originates from tflite's PopulateConvolutionQuantizationParams()
+  per_channel_output_multiplier.resize(num_channels);
+  per_channel_output_shift.resize(num_channels);
+
+  const bool is_per_channel = filter_scales_size > 1;
+  auto per_channel_multiplier = per_channel_output_multiplier.data();
+  auto per_channel_shift = per_channel_output_shift.data();
+  for (int i = 0; i < num_channels; ++i)
+  {
+    // If per-tensor quantization parameter is specified, broadcast it along the
+    // quantization dimension (channels_out).
+    const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
+    const double filter_scale = static_cast<double>(scale);
+    const double effective_output_scale =
+      static_cast<double>(input_scale) * filter_scale / static_cast<double>(output_scale);
+    int32_t significand;
+    int channel_shift;
+    QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
+    per_channel_multiplier[i] = significand;
+    per_channel_shift[i] = channel_shift;
+  }
+}
+
 void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
                                       int *left_shift)
 {
@@ -111,13 +141,29 @@ void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantiz
   *quantized_multiplier = static_cast<int32_t>(q_fixed);
 }
 
-void CalculateActivationRangeUint8(ir::Activation activation, const IPortableTensor *output,
-                                   int32_t *act_min, int32_t *act_max)
+void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
+                                       int32_t *act_min, int32_t *act_max)
 {
-  const int32_t qmin = std::numeric_limits<uint8_t>::min();
-  const int32_t qmax = std::numeric_limits<uint8_t>::max();
+  int32_t qmin = 0;
+  int32_t qmax = 0;
+
+  switch (output->data_type())
+  {
+    case OperandType::QUANT_UINT8_ASYMM:
+      qmin = std::numeric_limits<uint8_t>::min();
+      qmax = std::numeric_limits<uint8_t>::max();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+    case OperandType::QUANT_INT8_SYMM:
+      qmin = std::numeric_limits<int8_t>::min();
+      qmax = std::numeric_limits<int8_t>::max();
+      break;
+    default:
+      throw std::runtime_error("CalculateActivationRangeQuantized: Not supported operand type.");
+  }
+
   const auto scale = output->data_scale();
-  const auto zero_point = output->data_offset();
+  const auto zero_point = output->data_zero_point();
   auto quantize = [scale, zero_point](float f) {
     return zero_point + static_cast<int32_t>(std::round(f / scale));
   };
@@ -148,7 +194,7 @@ void CalculateActivationRangeUint8(ir::Activation activation, const IPortableTen
   }
   else
   {
-    std::cout << "Unsupported fused activation function." << std::endl;
+    throw std::runtime_error{"Unsupported fused activation function."};
   }
 }
 
@@ -167,8 +213,10 @@ bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2
   if (getNumberOfDimensions(input1) != getNumberOfDimensions(input2))
     return false;
 
+  auto shape1 = input1->getShape();
+  auto shape2 = input2->getShape();
   for (uint32_t i = 0; i < getNumberOfDimensions(input1); i++)
-    if (input1->dimension(i) != input2->dimension(i))
+    if (shape1.dim(i) != shape2.dim(i))
       return false;
 
   return true;
@@ -208,7 +256,7 @@ uint32_t sizeOfData(OperandType type, const std::vector<int32_t> &dimensions)
       break;
   }
 
-  for (auto d : dimensions)
+  for (auto &&d : dimensions)
   {
     assert(d >= 0);
     size *= static_cast<uint32_t>(d);
@@ -237,20 +285,21 @@ std::vector<int32_t> getReducerAxes(const IPortableTensor *axes)
 {
   std::vector<int32_t> ret;
 
+  auto axes_vals = (axes->getShape().rank() == 0) ? 1 : axes->getShape().dim(0);
   assert(axes->layout() == ir::Layout::NHWC);
-  assert(axes->dimension(0) == axes->getShape().num_elements());
+  assert(static_cast<size_t>(axes_vals) == axes->getShape().num_elements());
   switch (axes->data_type())
   {
     case ir::DataType::INT32:
     {
-      for (size_t i = 0; i < axes->dimension(0); ++i)
-        ret.emplace_back(*(reinterpret_cast<const int32_t *>(axes->buffer()) + i));
+      for (int i = 0; i < axes_vals; ++i)
+        ret.emplace_back(*(getBuffer<int32_t>(axes) + i));
       break;
     }
     case ir::DataType::INT64:
     {
-      for (size_t i = 0; i < axes->dimension(0); ++i)
-        ret.emplace_back(*(reinterpret_cast<const int64_t *>(axes->buffer()) + i));
+      for (int i = 0; i < axes_vals; ++i)
+        ret.emplace_back(*(getBuffer<int64_t>(axes) + i));
       break;
     }
     default:
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
index 98385521a..1fefc3228 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -18,19 +18,19 @@
 #define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
 
 #include <backend/IPortableTensor.h>
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-#include <iostream>
 #include <ir/DataType.h>
-#include <ir/InternalType.h>
 #include <ir/Operand.h>
 #include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
+
+#include <cker/Shape.h>
+#include <cker/Types.h>
 
 #include <limits>
 #include <vector>
 
 using OperandType = onert::ir::DataType;
+using namespace onert::util;
 
 namespace onert
 {
@@ -74,7 +74,8 @@ inline nnfw::cker::Shape getExtendedTensorShape(const IPortableTensor *tensor)
   assert(tensor);
   const int32_t extended_rank = 4;
   int32_t raw_shape[extended_rank];
-  uint32_t src = extended_rank - tensor->num_dimensions();
+  auto shape = tensor->getShape();
+  uint32_t src = extended_rank - shape.rank();
   for (uint32_t i = 0; i < extended_rank; ++i)
   {
     if (i < src)
@@ -83,39 +84,30 @@ inline nnfw::cker::Shape getExtendedTensorShape(const IPortableTensor *tensor)
     }
     else
     {
-      raw_shape[i] = tensor->dimension(i - src);
+      raw_shape[i] = shape.dim(i - src);
     }
   }
 
   return nnfw::cker::Shape(extended_rank, raw_shape);
 }
 
-inline nnfw::cker::Shape getTensorShape(const IPortableTensor *tensor)
+inline nnfw::cker::Shape getShape(const IPortableTensor *tensor)
 {
   if (tensor == nullptr)
     return nnfw::cker::Shape();
 
+  const ir::Shape &shape = tensor->get_info().shape();
+
   assert(tensor->layout() == ir::Layout::NHWC);
-  constexpr int kMaxSmallSize = 8;
-  int32_t raw_shape_small[kMaxSmallSize];
-  std::vector<int32_t> raw_shape_vec;
-  auto rank = tensor->num_dimensions();
-  int32_t *data = nullptr;
-  if (rank > kMaxSmallSize)
-  {
-    raw_shape_vec.resize(rank);
-    data = raw_shape_vec.data();
-  }
-  else
-  {
-    data = raw_shape_small;
-  }
 
-  for (uint32_t i = 0; i < rank; ++i)
+  auto rank = shape.rank();
+  nnfw::cker::Shape ret(rank);
+  auto data = ret.DimsData();
+  for (int i = 0; i < rank; ++i)
   {
-    data[i] = tensor->dimension(i);
+    data[i] = shape.dim(i);
   }
-  return nnfw::cker::Shape(rank, data);
+  return ret;
 }
 
 inline nnfw::cker::FusedActivationFunctionType
@@ -131,6 +123,10 @@ convertActivationType(const ir::Activation activation)
       return nnfw::cker::FusedActivationFunctionType::kRelu1;
     case ir::Activation::RELU6:
       return nnfw::cker::FusedActivationFunctionType::kRelu6;
+    case ir::Activation::TANH:
+      return nnfw::cker::FusedActivationFunctionType::kTanh;
+    case ir::Activation::SIGMOID:
+      return nnfw::cker::FusedActivationFunctionType::kSigmoid;
     default:
       throw std::runtime_error{"CPU backend: Cannot convert activation type"};
   }
@@ -165,42 +161,13 @@ void GetQuantizedConvolutionMultiplier(const IPortableTensor *inputDescr,
 void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
                                       int *left_shift);
 
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::SIGMOID)
-  {
-    *activation_min = 0;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    std::cout << "Unsupported fused activation function." << std::endl;
-  }
-}
+void GetQuantizedConvolutionMultipliersAndShifts(
+  float input_scale, float output_scale, const float *filter_scales, size_t filter_scales_size,
+  int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
+  std::vector<int> &per_channel_output_shift);
 
-void CalculateActivationRangeUint8(ir::Activation activation, const IPortableTensor *output,
-                                   int32_t *act_min, int32_t *act_max);
+void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
+                                       int32_t *act_min, int32_t *act_max);
 
 bool HaveSameShapes(const IPortableTensor *input1, const IPortableTensor *input2);
 
@@ -212,6 +179,16 @@ nnfw::cker::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
 
 std::vector<int32_t> getReducerAxes(const IPortableTensor *axes);
 
+template <typename T> const T *getBuffer(const IPortableTensor *tensor)
+{
+  return reinterpret_cast<const T *>(tensor->buffer());
+}
+
+template <typename T> T *getBuffer(IPortableTensor *tensor)
+{
+  return reinterpret_cast<T *>(tensor->buffer());
+}
+
 } // namespace ops
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/PackLayer.cc b/runtime/onert/backend/cpu/ops/PackLayer.cc
index 314b192a2..beac6c73b 100644
--- a/runtime/onert/backend/cpu/ops/PackLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PackLayer.cc
@@ -48,7 +48,7 @@ template <typename T> void PackLayer::packImpl()
 
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    inputDims.push_back(getTensorShape(_inputs[i]));
+    inputDims.push_back(getShape(_inputs[i]));
     inputDimsPtr.push_back(&inputDims[i]);
   }
 
@@ -56,11 +56,10 @@ template <typename T> void PackLayer::packImpl()
 
   for (const auto input : _inputs)
   {
-    inputPtrs.emplace_back(reinterpret_cast<const T *>(input->buffer()));
+    inputPtrs.emplace_back(getBuffer<T>(input));
   }
 
-  nnfw::cker::Pack<T>(op_params, inputPtrs.data(), getTensorShape(_output),
-                      reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::Pack<T>(op_params, inputPtrs.data(), getShape(_output), getBuffer<T>(_output));
 }
 
 void PackLayer::configure(const std::vector<const IPortableTensor *> &inputs, int32_t axis,
diff --git a/runtime/onert/backend/cpu/ops/PadLayer.cc b/runtime/onert/backend/cpu/ops/PadLayer.cc
index 6a2bf9da0..d9da564c4 100644
--- a/runtime/onert/backend/cpu/ops/PadLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PadLayer.cc
@@ -28,16 +28,15 @@ namespace ops
 {
 
 PadLayer::PadLayer()
-    : _input(nullptr), _output(nullptr), _padData(), _padRank(), _constantValueData()
+  : _input(nullptr), _output(nullptr), _padData(), _padRank(), _constantValueData()
 {
   // DO NOTHING
 }
 
 template <typename T> void PadLayer::padImpl(const T *constant_value_data)
 {
-  nnfw::cker::Pad<T>(_padData, _padRank, getTensorShape(_input),
-                     reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
-                     reinterpret_cast<T *>(_output->buffer()), constant_value_data);
+  nnfw::cker::Pad<T>(_padData, _padRank, getShape(_input), getBuffer<T>(_input), getShape(_output),
+                     getBuffer<T>(_output), constant_value_data);
 }
 
 void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
@@ -52,25 +51,35 @@ void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
 
 void PadLayer::run()
 {
-  if (_input->data_type() == OperandType::FLOAT32)
+  switch (_input->data_type())
   {
-    padImpl<float>(_constantValueData.f);
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    if (_constantValueData.u8 == nullptr)
-    {
-      uint8_t pad_value = static_cast<uint8_t>(_output->data_offset());
-      padImpl<uint8_t>(&pad_value);
-    }
-    else
-    {
-      padImpl<uint8_t>(_constantValueData.u8);
-    }
-  }
-  else
-  {
-    throw std::runtime_error{"Pad: unsupported data type"};
+    case OperandType::FLOAT32:
+      padImpl<float>(_constantValueData.f);
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      if (_constantValueData.u8 == nullptr)
+      {
+        uint8_t pad_value = static_cast<uint8_t>(_output->data_zero_point());
+        padImpl<uint8_t>(&pad_value);
+      }
+      else
+      {
+        padImpl<uint8_t>(_constantValueData.u8);
+      }
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      if (_constantValueData.i8 == nullptr)
+      {
+        int8_t pad_value = static_cast<int8_t>(_output->data_zero_point());
+        padImpl<int8_t>(&pad_value);
+      }
+      else
+      {
+        padImpl<int8_t>(_constantValueData.i8);
+      }
+      break;
+    default:
+      throw std::runtime_error{"Pad: unsupported data type"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc
index 85d02a751..088ca5fd7 100644
--- a/runtime/onert/backend/cpu/ops/PoolLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc
@@ -36,18 +36,16 @@ template <typename T>
 void avgPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
                IPortableTensor *output)
 {
-  nnfw::cker::AveragePool<T>(params, getTensorShape(input),
-                             reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
-                             reinterpret_cast<T *>(output->buffer()));
+  nnfw::cker::AveragePool<T>(params, getShape(input), getBuffer<T>(input), getShape(output),
+                             getBuffer<T>(output));
 }
 
 template <typename T>
 void maxPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
                IPortableTensor *output)
 {
-  nnfw::cker::MaxPool<T>(params, getTensorShape(input),
-                         reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
-                         reinterpret_cast<T *>(output->buffer()));
+  nnfw::cker::MaxPool<T>(params, getShape(input), getBuffer<T>(input), getShape(output),
+                         getBuffer<T>(output));
 }
 
 template <typename T>
@@ -81,7 +79,11 @@ PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel()
   op_params.filter_height = kernelHeight;               \
   op_params.filter_width = kernelWidth;                 \
   op_params.padding_values.height = (int8_t)paddingTop; \
-  op_params.padding_values.width = (int8_t)paddingLeft;
+  op_params.padding_values.width = (int8_t)paddingLeft; \
+  op_params.float_activation_min = 0;                   \
+  op_params.float_activation_max = 0;                   \
+  op_params.quantized_activation_min = 0;               \
+  op_params.quantized_activation_max = 0;
 
 void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t,
                           const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth,
@@ -96,29 +98,44 @@ void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLe
   _output = output;
 
   POOLING_PARAMETERS
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    float output_activation_min = 0;
-    float output_activation_max = 0;
-    CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
-    op_params.float_activation_min = output_activation_min;
-    op_params.float_activation_max = output_activation_max;
 
-    _kernel = generateKernelGeneric<float>(op_params, op_type);
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    int32_t output_activation_min = 0;
-    int32_t output_activation_max = 0;
-    CalculateActivationRangeUint8(activation, _output, &output_activation_min,
-                                  &output_activation_max);
-    op_params.quantized_activation_min = output_activation_min;
-    op_params.quantized_activation_max = output_activation_max;
-    _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
-  }
-  else
+  switch (_input->data_type())
   {
-    throw std::runtime_error{"Pool: unsupported data type"};
+    case OperandType::FLOAT32:
+    {
+      float output_activation_min = 0;
+      float output_activation_max = 0;
+      CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+      op_params.float_activation_min = output_activation_min;
+      op_params.float_activation_max = output_activation_max;
+
+      _kernel = generateKernelGeneric<float>(op_params, op_type);
+      break;
+    }
+    case OperandType::QUANT_UINT8_ASYMM:
+    {
+      int32_t output_activation_min = 0;
+      int32_t output_activation_max = 0;
+      CalculateActivationRangeQuantized(activation, _output, &output_activation_min,
+                                        &output_activation_max);
+      op_params.quantized_activation_min = output_activation_min;
+      op_params.quantized_activation_max = output_activation_max;
+      _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+      break;
+    }
+    case OperandType::QUANT_INT8_ASYMM:
+    {
+      int32_t output_activation_min = 0;
+      int32_t output_activation_max = 0;
+      CalculateActivationRangeQuantized(activation, _output, &output_activation_min,
+                                        &output_activation_max);
+      op_params.quantized_activation_min = output_activation_min;
+      op_params.quantized_activation_max = output_activation_max;
+      _kernel = generateKernelGeneric<int8_t>(op_params, op_type);
+      break;
+    }
+    default:
+      throw std::runtime_error{"Pool: unsupported data type"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/PowLayer.cc b/runtime/onert/backend/cpu/ops/PowLayer.cc
index 04a1af1e1..efd024dee 100644
--- a/runtime/onert/backend/cpu/ops/PowLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PowLayer.cc
@@ -39,15 +39,13 @@ void PowLayer::powFloat32()
   if (!HaveSameShapes(_lhs, _rhs))
   {
     nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::POW>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      op_params, getShape(_lhs), getBuffer<float>(_lhs), getShape(_rhs), getBuffer<float>(_rhs),
+      getShape(_output), getBuffer<float>(_output));
     return;
   }
 
-  nnfw::cker::powImpl(getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-                      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-                      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::powImpl(getShape(_lhs), getBuffer<float>(_lhs), getShape(_rhs),
+                      getBuffer<float>(_rhs), getShape(_output), getBuffer<float>(_output));
 }
 
 void PowLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
new file mode 100644
index 000000000..08550e7c9
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+#include "QuantizeLayer.h"
+
+#include <cker/operation/Dequantize.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Quantize(getShape(input), getBuffer<InputT>(input), getShape(output),
+                       getBuffer<OutputT>(output), output->data_scale(), output->data_zero_point());
+}
+
+void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+
+  if ((_input->data_type() == OperandType::FLOAT32))
+  {
+    // DO NOTHING
+  }
+  else if (((input->data_type() == OperandType::QUANT_UINT8_ASYMM) &&
+            (output->data_type() == OperandType::QUANT_INT8_ASYMM)) ||
+           ((input->data_type() == OperandType::QUANT_INT8_ASYMM) &&
+            (output->data_type() == OperandType::QUANT_UINT8_ASYMM)))
+  {
+    const double effective_output_scale =
+      static_cast<double>(input->data_scale()) / static_cast<double>(output->data_scale());
+    QuantizeMultiplier(effective_output_scale, &_output_multiplier, &_output_shift);
+  }
+  else
+  {
+    throw std::runtime_error{"Quantize: Unsupported  data type"};
+  }
+}
+
+void QuantizeLayer::run()
+{
+  if ((_input->data_type() == OperandType::FLOAT32))
+  {
+    affineQuantize<float, uint8_t>(_input, _output);
+  }
+  else if ((_input->data_type() == OperandType::QUANT_UINT8_ASYMM) &&
+           (_output->data_type() == OperandType::QUANT_INT8_ASYMM))
+  {
+    nnfw::cker::Requantize<uint8_t, int8_t>(
+      getBuffer<uint8_t>(_input), MatchingFlatSize(getShape(_input), getShape(_output)),
+      _output_multiplier, _output_shift, _input->data_zero_point(), _output->data_zero_point(),
+      getBuffer<int8_t>(_output));
+  }
+  else if ((_input->data_type() == OperandType::QUANT_INT8_ASYMM) &&
+           (_output->data_type() == OperandType::QUANT_UINT8_ASYMM))
+  {
+    nnfw::cker::Requantize<int8_t, uint8_t>(
+      getBuffer<int8_t>(_input), MatchingFlatSize(getShape(_input), getShape(_output)),
+      _output_multiplier, _output_shift, _input->data_zero_point(), _output->data_zero_point(),
+      getBuffer<uint8_t>(_output));
+  }
+  else
+  {
+    throw std::runtime_error{"Quantize: Unsupported  data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
new file mode 100644
index 000000000..112d31562
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class QuantizeLayer : public ::onert::exec::IFunction
+{
+public:
+  QuantizeLayer() : _input(nullptr), _output(nullptr), _output_multiplier(0), _output_shift(0)
+  {
+    // DO NOTHING
+  }
+
+public:
+  void configure(const IPortableTensor *input, IPortableTensor *output);
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+  int32_t _output_multiplier;
+  int _output_shift;
+};
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RangeLayer.cc b/runtime/onert/backend/cpu/ops/RangeLayer.cc
index f00101fa8..a41b31b3f 100644
--- a/runtime/onert/backend/cpu/ops/RangeLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RangeLayer.cc
@@ -47,16 +47,12 @@ void RangeLayer::run()
   switch (_output->data_type())
   {
     case OperandType::FLOAT32:
-      nnfw::cker::Range<float>(reinterpret_cast<float *>(_start->buffer()),
-                               reinterpret_cast<float *>(_limit->buffer()),
-                               reinterpret_cast<float *>(_delta->buffer()),
-                               reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::Range<float>(getBuffer<float>(_start), getBuffer<float>(_limit),
+                               getBuffer<float>(_delta), getBuffer<float>(_output));
       break;
     case OperandType::INT32:
-      nnfw::cker::Range<int32_t>(reinterpret_cast<int32_t *>(_start->buffer()),
-                                 reinterpret_cast<int32_t *>(_limit->buffer()),
-                                 reinterpret_cast<int32_t *>(_delta->buffer()),
-                                 reinterpret_cast<int32_t *>(_output->buffer()));
+      nnfw::cker::Range<int32_t>(getBuffer<int32_t>(_start), getBuffer<int32_t>(_limit),
+                                 getBuffer<int32_t>(_delta), getBuffer<int32_t>(_output));
       break;
     default:
       throw std::runtime_error{"Range: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/RankLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc
index 4690bdf72..765c595ff 100644
--- a/runtime/onert/backend/cpu/ops/RankLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RankLayer.cc
@@ -40,15 +40,8 @@ void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
 
 void RankLayer::run()
 {
-  if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32)
-  {
-    int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
-    output_data[0] = _input->num_dimensions();
-  }
-  else
-  {
-    throw std::runtime_error{"Rank : unsupported data type"};
-  }
+  int32_t *output_data = getBuffer<int32_t>(_output);
+  output_data[0] = _input->getShape().rank();
 }
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index bb5f85d60..66b5abb15 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -18,6 +18,7 @@
 
 #include "OperationUtils.h"
 
+#include "cker/neon/neon_check.h"
 #include <cker/operation/Reduce.h>
 
 namespace onert
@@ -37,10 +38,10 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std:
                bool keep_dims, T init_value, nnfw::cker::Reduce &reduce_kernel,
                T reducer(const T current, const T in))
 {
-  reduce_kernel.prepare(input->num_dimensions(), axes.size());
-  bool result = reduce_kernel.ReduceGeneric<T>(
-      getTensorShape(input), reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
-      reinterpret_cast<T *>(output->buffer()), axes, keep_dims, init_value, reducer);
+  reduce_kernel.prepare(input->getShape().rank(), axes.size());
+  bool result =
+    reduce_kernel.ReduceGeneric<T>(getShape(input), getBuffer<T>(input), getShape(output),
+                                   getBuffer<T>(output), axes, keep_dims, init_value, reducer);
 
   if (!result)
   {
@@ -66,15 +67,15 @@ evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_ty
       break;
     case ReduceType::kMax:
       return std::bind(
-          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
-          keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
-          [](const T current, const T in) -> T { return (in > current) ? in : current; });
+        &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+        keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+        [](const T current, const T in) -> T { return (in > current) ? in : current; });
       break;
     case ReduceType::kMin:
       return std::bind(
-          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
-          keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
-          [](const T current, const T in) -> T { return (in < current) ? in : current; });
+        &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+        keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+        [](const T current, const T in) -> T { return (in < current) ? in : current; });
       break;
     default:
       throw std::runtime_error{"Reduce: Unsupported reduce type"};
@@ -126,21 +127,21 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
                       nnfw::cker::Reduce &reduce_kernel)
 {
   const bool same_scale = (input->data_scale() == output->data_scale() &&
-                           input->data_offset() == output->data_offset());
+                           input->data_zero_point() == output->data_zero_point());
 
-  reduce_kernel.prepare(input->num_dimensions(), axes.size());
+  reduce_kernel.prepare(input->getShape().rank(), axes.size());
 
   if (!same_scale)
   {
     std::vector<int32_t> temp_sum(output->getShape().num_elements());
     bool result = reduce_kernel.QuantizedMeanOrSum<uint8_t, int32_t>(
-        reinterpret_cast<const uint8_t *>(input->buffer()), input->data_offset(),
-        input->data_scale(), getTensorShape(input), reinterpret_cast<uint8_t *>(output->buffer()),
-        output->data_offset(), output->data_scale(), getTensorShape(output), axes, keep_dims,
-        temp_sum.data(), true, [](const int32_t current, const uint8_t in) -> int32_t {
-          const int32_t actual_in = static_cast<int32_t>(in);
-          return current + actual_in;
-        });
+      getBuffer<uint8_t>(input), input->data_zero_point(), input->data_scale(), getShape(input),
+      getBuffer<uint8_t>(output), output->data_zero_point(), output->data_scale(), getShape(output),
+      axes, keep_dims, temp_sum.data(), true,
+      [](const int32_t current, const uint8_t in) -> int32_t {
+        const int32_t actual_in = static_cast<int32_t>(in);
+        return current + actual_in;
+      });
 
     if (!result)
     {
@@ -157,8 +158,8 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
 } // namespace
 
 ReduceLayer::ReduceLayer()
-    : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
-      _kernel()
+  : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+    _kernel(), _reduceType(ReduceType::kInvalid)
 {
   // DO NOTHING
 }
@@ -171,8 +172,9 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
   _input = input;
   _axes = axes;
   _output = output;
+  _reduceType = reduceType;
 
-  switch (reduceType)
+  switch (_reduceType)
   {
     case ReduceType::kSum:
       if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
@@ -199,13 +201,22 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
       _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
       break;
     default:
-      throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
+      throw std::runtime_error{"Reduce: Unsupported reduce type"};
   }
 }
 
 void ReduceLayer::run()
 {
   const auto axes = getReducerAxes(_axes);
+#ifdef USE_NEON
+  int32_t rank = _input->getShape().rank();
+  if (_input->data_type() == ir::DataType::FLOAT32 && _reduceType == ReduceType::kSum &&
+      axes.size() == 1 && (axes[0] == -1 || axes[0] == rank - 1))
+  {
+    OptimizedReduceSum(getBuffer<float>(_input), getShape(_input), getBuffer<float>(_output));
+    return;
+  }
+#endif // NEON
   _kernel(_input, _output, axes);
 }
 
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h
index 332d399bd..e70f0fcb2 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -17,6 +17,8 @@
 #ifndef __ONERT_BACKEND_CPU_OPS_REDUCESUMLAYER_H__
 #define __ONERT_BACKEND_CPU_OPS_REDUCESUMLAYER_H__
 
+#include "cker/neon/neon_check.h"
+
 #include <backend/IPortableTensor.h>
 
 #include <exec/IFunction.h>
@@ -47,6 +49,7 @@ enum class ReduceType
   kMin,
   kAny,
   kAll,
+  kInvalid // For debug and initialize
 };
 
 class ReduceLayer : public ::onert::exec::IFunction
@@ -69,7 +72,9 @@ private:
   std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
   std::function<void(const IPortableTensor *input, IPortableTensor *output,
                      const std::vector<int> &axes)>
-      _kernel;
+    _kernel;
+
+  ReduceType _reduceType;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
index 180094bb8..c32015fdc 100644
--- a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
@@ -28,16 +28,39 @@ namespace ops
 {
 
 ResizeBilinearLayer::ResizeBilinearLayer()
-    : _input(nullptr), _output(nullptr), _output_height(0), _output_width(0), _align_corners(false),
-      _half_pixel_centers(false)
+  : _input(nullptr), _output(nullptr), _size(nullptr), _output_height(0), _output_width(0),
+    _align_corners(false), _half_pixel_centers(false)
 {
   // DO NOTHING
 }
 
 void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                    const IPortableTensor *size, bool align_corners,
+                                    bool half_pixel_centers)
+{
+  assert(!size->is_constant());
+  _input = input;
+  _output = output;
+  _size = size;
+  _align_corners = align_corners;
+  _half_pixel_centers = half_pixel_centers;
+}
+
+void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTensor *output,
                                     int32_t output_height, int32_t output_width, bool align_corners,
                                     bool half_pixel_centers)
 {
+  assert(_size == nullptr);
+  if (output_height < 0)
+  {
+    throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_height = " +
+                             std::to_string(output_height)};
+  }
+  if (output_width < 0)
+  {
+    throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_width = " +
+                             std::to_string(output_width)};
+  }
   _input = input;
   _output = output;
   _output_height = output_height;
@@ -49,23 +72,35 @@ void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTenso
 void ResizeBilinearLayer::run()
 {
   nnfw::cker::ResizeBilinearParams params;
+  if (_size == nullptr)
+  {
+    params.output_height = _output_height;
+    params.output_width = _output_width;
+  }
+  else
+  {
+    const auto size_buf = getBuffer<int32_t>(_size);
+    params.output_height = size_buf[0];
+    params.output_width = size_buf[1];
+  }
   params.align_corners = _align_corners;
   params.half_pixel_centers = _half_pixel_centers;
-  params.output_height = _output_height;
-  params.output_width = _output_width;
 
   switch (_input->data_type())
   {
     case OperandType::FLOAT32:
-      nnfw::cker::ResizeBilinear(
-          params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<float>(_input),
+                                 getShape(_output), getBuffer<float>(_output));
       break;
 
     case OperandType::QUANT_UINT8_ASYMM:
-      nnfw::cker::ResizeBilinear(
-          params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+      nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<uint8_t>(_input),
+                                 getShape(_output), getBuffer<uint8_t>(_output));
+      break;
+
+    case OperandType::QUANT_INT8_ASYMM:
+      nnfw::cker::ResizeBilinear(params, getShape(_input), getBuffer<int8_t>(_input),
+                                 getShape(_output), getBuffer<int8_t>(_output));
       break;
 
     case OperandType::UINT8:
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
index fc49b348e..d7ae1c620 100644
--- a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
@@ -36,7 +36,10 @@ public:
   ResizeBilinearLayer();
 
 public:
-  void configure(const IPortableTensor *input1, IPortableTensor *output, int32_t output_height,
+  void configure(const IPortableTensor *input1, IPortableTensor *output,
+                 const IPortableTensor *size, bool align_corners, bool half_pixel_centers);
+
+  void configure(const IPortableTensor *input, IPortableTensor *output, int32_t output_height,
                  int32_t output_width, bool align_corners, bool half_pixel_centers);
 
   void run() override;
@@ -44,6 +47,7 @@ public:
 private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
+  const IPortableTensor *_size;
   int32_t _output_height;
   int32_t _output_width;
   bool _align_corners;
diff --git a/runtime/onert/backend/cpu/ops/ReverseLayer.cc b/runtime/onert/backend/cpu/ops/ReverseLayer.cc
index 7979e77a0..cddab302a 100644
--- a/runtime/onert/backend/cpu/ops/ReverseLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReverseLayer.cc
@@ -36,18 +36,17 @@ void ReverseLayer::run()
   {
     throw std::runtime_error{"Reverse: only support 1 axis"};
   }
-  int32_t axis = *(reinterpret_cast<int32_t *>(_axis->buffer()));
+  int32_t axis = *getBuffer<int32_t>(_axis);
   if (axis < 0)
   {
-    axis += _input->num_dimensions();
+    axis += _input->getShape().rank();
   }
 
   switch (_input->data_type())
   {
     case OperandType::FLOAT32:
-      nnfw::cker::Reverse<float>(
-          axis, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      nnfw::cker::Reverse<float>(axis, getShape(_input), getBuffer<float>(_input),
+                                 getShape(_output), getBuffer<float>(_output));
       break;
     default:
       throw std::runtime_error{"Reverse: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/SelectLayer.cc b/runtime/onert/backend/cpu/ops/SelectLayer.cc
index 95cfe1df0..4c28d1471 100644
--- a/runtime/onert/backend/cpu/ops/SelectLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SelectLayer.cc
@@ -30,7 +30,7 @@ namespace ops
 {
 
 SelectLayer::SelectLayer()
-    : _cond(nullptr), _input_true(nullptr), _input_false(nullptr), _output(nullptr)
+  : _cond(nullptr), _input_true(nullptr), _input_false(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
@@ -47,11 +47,10 @@ void SelectLayer::configure(const IPortableTensor *cond, const IPortableTensor *
 void SelectLayer::run()
 {
 
-#define KERNEL_SELECT(type, op)                                                                  \
-  nnfw::cker::op(getTensorShape(_cond), reinterpret_cast<uint8_t *>(_cond->buffer()),            \
-                 getTensorShape(_input_true), reinterpret_cast<type *>(_input_true->buffer()),   \
-                 getTensorShape(_input_false), reinterpret_cast<type *>(_input_false->buffer()), \
-                 getTensorShape(_output), reinterpret_cast<type *>(_output->buffer()));
+#define KERNEL_SELECT(type, op)                                                     \
+  nnfw::cker::op(getShape(_cond), getBuffer<uint8_t>(_cond), getShape(_input_true), \
+                 getBuffer<type>(_input_true), getShape(_input_false),              \
+                 getBuffer<type>(_input_false), getShape(_output), getBuffer<type>(_output));
 
 #define KERNEL_SWITCH(type, op)                                  \
   switch (type)                                                  \
@@ -66,8 +65,8 @@ void SelectLayer::run()
 
   auto input_type = _input_true->data_type();
   bool require_broadcast =
-      !HaveSameShapes(_input_true, _cond) || !HaveSameShapes(_input_false, _cond);
-  bool rank_one_select = ((_input_true->num_dimensions() == 1) && !require_broadcast);
+    !HaveSameShapes(_input_true, _cond) || !HaveSameShapes(_input_false, _cond);
+  bool rank_one_select = ((_input_true->getShape().rank() == 1) && !require_broadcast);
 
   if (rank_one_select)
   {
diff --git a/runtime/onert/backend/cpu/ops/ShapeLayer.cc b/runtime/onert/backend/cpu/ops/ShapeLayer.cc
index bffb04bc6..46294e948 100644
--- a/runtime/onert/backend/cpu/ops/ShapeLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ShapeLayer.cc
@@ -34,25 +34,10 @@ ShapeLayer::ShapeLayer() : _input(nullptr), _output(nullptr)
 
 template <typename T> void GetRawShape(const IPortableTensor *input, T *output_data)
 {
-  for (uint32_t i = 0; i < input->num_dimensions(); ++i)
+  auto shape = input->getShape();
+  for (int i = 0; i < shape.rank(); ++i)
   {
-    output_data[i] = static_cast<T>(input->dimension(i));
-  }
-}
-
-void ShapeLayer::shape()
-{
-  if (_output->data_type() == OperandType::UINT32)
-  {
-    GetRawShape(_input, reinterpret_cast<uint32_t *>(_output->buffer()));
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    GetRawShape(_input, reinterpret_cast<int32_t *>(_output->buffer()));
-  }
-  else
-  {
-    throw std::runtime_error{"NYI : not supported output type for ShapeLayer"};
+    output_data[i] = static_cast<T>(shape.dim(i));
   }
 }
 
@@ -64,14 +49,21 @@ void ShapeLayer::configure(const IPortableTensor *input, IPortableTensor *output
 
 void ShapeLayer::run()
 {
-  if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32 ||
-      _input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  if (_output->data_type() == OperandType::UINT32)
+  {
+    GetRawShape(_input, getBuffer<uint32_t>(_output));
+  }
+  else if (_output->data_type() == OperandType::INT32)
   {
-    shape();
+    GetRawShape(_input, getBuffer<int32_t>(_output));
+  }
+  else if (_output->data_type() == OperandType::INT64)
+  {
+    GetRawShape(_input, getBuffer<int64_t>(_output));
   }
   else
   {
-    throw std::runtime_error{"Shape : unsupported data type"};
+    throw std::runtime_error{"NYI : not supported output type for ShapeLayer"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/ShapeLayer.h b/runtime/onert/backend/cpu/ops/ShapeLayer.h
index fb358c7a4..bd2a7abde 100644
--- a/runtime/onert/backend/cpu/ops/ShapeLayer.h
+++ b/runtime/onert/backend/cpu/ops/ShapeLayer.h
@@ -36,8 +36,6 @@ public:
   ShapeLayer();
 
 public:
-  void shape();
-
   void configure(const IPortableTensor *input, IPortableTensor *output);
 
   void run() override;
diff --git a/runtime/onert/backend/cpu/ops/SliceLayer.cc b/runtime/onert/backend/cpu/ops/SliceLayer.cc
index 449c073e6..6332fbb56 100644
--- a/runtime/onert/backend/cpu/ops/SliceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SliceLayer.cc
@@ -41,8 +41,8 @@ void SliceLayer::GetBeginAndSizeVectors(int dimensions, const IPortableTensor *b
 {
   for (int idx = dimensions - 1; idx >= 0; --idx)
   {
-    begins->push_back(reinterpret_cast<T *>(begin->buffer())[idx]);
-    sizes->push_back(reinterpret_cast<T *>(size->buffer())[idx]);
+    begins->push_back(getBuffer<T>(begin)[idx]);
+    sizes->push_back(getBuffer<T>(size)[idx]);
   }
 }
 
@@ -55,10 +55,21 @@ template <typename T> void SliceLayer::sliceImpl()
   begins.reserve(kMaxDim);
   sizes.reserve(kMaxDim);
 
-  GetBeginAndSizeVectors<int32_t>(_input->num_dimensions(), _begin, _size, &begins, &sizes);
+  if (_begin->data_type() == OperandType::INT32)
+  {
+    GetBeginAndSizeVectors<int32_t>(_input->getShape().rank(), _begin, _size, &begins, &sizes);
+  }
+  else if (_begin->data_type() == OperandType::INT64)
+  {
+    GetBeginAndSizeVectors<int64_t>(_input->getShape().rank(), _begin, _size, &begins, &sizes);
+  }
+  else
+  {
+    throw std::runtime_error{"Slice: unsupported begin and/or size data type"};
+  }
 
   // begins : 0-based, sizes : 1-based
-  for (int i = _input->num_dimensions(); i < kMaxDim; ++i)
+  for (int i = _input->getShape().rank(); i < kMaxDim; ++i)
   {
     begins.push_back(0);
     sizes.push_back(1);
@@ -73,9 +84,8 @@ template <typename T> void SliceLayer::sliceImpl()
     op_params.size[i] = sizes[3 - i];
   }
 
-  nnfw::cker::Slice(op_params, getExtendedTensorShape(_input),
-                    reinterpret_cast<const T *>(_input->buffer()),
-                    reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::Slice(op_params, getExtendedTensorShape(_input), getBuffer<T>(_input),
+                    getBuffer<T>(_output));
 }
 
 void SliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
index 095e67abc..320914dae 100644
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -39,8 +39,7 @@ void SoftMaxLayer::softmaxFloat32()
   if (getNumberOfDimensions(_input) == 1)
   {
     uint32_t input_size = getNumberOfElements(_input);
-    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
-                        reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(getBuffer<float>(_input), input_size, 1, _beta, getBuffer<float>(_output));
   }
   else if (getNumberOfDimensions(_input) == 2)
   {
@@ -49,69 +48,41 @@ void SoftMaxLayer::softmaxFloat32()
       throw std::runtime_error("batch_size should not be 0");
 
     uint32_t input_size = getNumberOfElements(_input) / batch_size;
-    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
-                        _beta, reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(getBuffer<float>(_input), input_size, batch_size, _beta,
+                        getBuffer<float>(_output));
   }
   else if (getNumberOfDimensions(_input) == 4)
   {
     nnfw::cker::SoftmaxParams op_params;
     op_params.beta = _beta;
-    nnfw::cker::Softmax(op_params, getTensorShape(_input),
-                        reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                        reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(op_params, getShape(_input), getBuffer<float>(_input), getShape(_output),
+                        getBuffer<float>(_output));
   }
   else
   {
-    throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
+    nnfw::cker::SoftmaxParams op_params;
+    op_params.beta = _beta;
+    nnfw::cker::reference::Softmax(op_params, getShape(_input), getBuffer<float>(_input),
+                                   getShape(_output), getBuffer<float>(_output));
   }
 }
 
-void SoftMaxLayer::softmaxQuant8()
+template <typename T> void SoftMaxLayer::softmaxQuant8()
 {
-  nnfw::cker::Shape descrIn4D(4);
-
-  if (getNumberOfDimensions(_input) == 2)
-  {
-    auto batch_size = getSizeOfDimension(_input, 0);
-    if (batch_size == 0)
-      throw std::runtime_error("batch_size should not be 0");
-
-    auto input_size = getNumberOfElements(_input) / batch_size;
-    descrIn4D.SetDim(0, batch_size);
-    descrIn4D.SetDim(1, 1);
-    descrIn4D.SetDim(2, 1);
-    descrIn4D.SetDim(3, input_size);
-  }
-  else if (getNumberOfDimensions(_input) == 4)
-  {
-    descrIn4D.SetDim(0, _input->dimension(0));
-    descrIn4D.SetDim(1, _input->dimension(1));
-    descrIn4D.SetDim(2, _input->dimension(2));
-    descrIn4D.SetDim(3, _input->dimension(3));
-  }
-  else
-  {
-    throw std::runtime_error{"only 2D and 4D tensors supported"};
-  }
-  if (_output->data_offset() != 0 || _output->data_scale() != 1.f / 256)
-  {
-    throw std::runtime_error{"incorrect scale / offset for output"};
-  }
-  static const int32_t kScaledDiffIntegerBits = 5;
-  const double input_beta_real_multiplier = std::min(
-      1.0 * _beta * _input->data_scale() * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
-  int32_t input_multiplier = 0;
-  int32_t input_left_shift = 0;
-  QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
-                                   &input_left_shift);
-  float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
-
   nnfw::cker::SoftmaxParams op_params;
-  op_params.input_multiplier = input_multiplier;
-  op_params.input_left_shift = input_left_shift;
-  op_params.diff_min = diff_min;
-  nnfw::cker::Softmax(op_params, descrIn4D, reinterpret_cast<const uint8_t *>(_input->buffer()),
-                      descrIn4D, reinterpret_cast<uint8_t *>(_output->buffer()));
+  op_params.scale = _output->data_scale();
+  op_params.zero_point = _output->data_zero_point();
+  op_params.uint8_table1 = _uint8_table1;
+  op_params.uint8_table2 = _uint8_table2;
+  op_params.table = _table;
+
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+  nnfw::cker::SoftmaxInt8LUT<T, T>(op_params, getShape(_input), getBuffer<T>(_input),
+                                   getShape(_output), getBuffer<T>(_output));
+#else
+  nnfw::cker::Softmax<T, T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_output),
+                            getBuffer<T>(_output));
+#endif
 }
 
 void SoftMaxLayer::configure(const IPortableTensor *input, const float beta,
@@ -120,21 +91,36 @@ void SoftMaxLayer::configure(const IPortableTensor *input, const float beta,
   _input = input;
   _output = output;
   _beta = beta;
+
+  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM ||
+      _input->data_type() == OperandType::QUANT_INT8_ASYMM)
+  {
+#ifdef TFLITE_SOFTMAX_USE_UINT16_LUT
+    // Only apply when both input & output are uint8/int8 & build with clang
+    // on aarch64.
+    nnfw::cker::PopulateSoftmaxUInt8LookupTable(_uint8_table1, _uint8_table2, _input->data_scale(),
+                                                _beta);
+#else
+    nnfw::cker::PopulateSoftmaxLookupTable(_table, _input->data_scale(), _beta);
+#endif
+  }
 }
 
 void SoftMaxLayer::run()
 {
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    softmaxFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    softmaxQuant8();
-  }
-  else
+  switch (_input->data_type())
   {
-    throw std::runtime_error{"SoftMax: unsupported data type"};
+    case OperandType::FLOAT32:
+      softmaxFloat32();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      softmaxQuant8<uint8_t>();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      softmaxQuant8<int8_t>();
+      break;
+    default:
+      throw std::runtime_error{"SoftMax: unsupported data type"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.h b/runtime/onert/backend/cpu/ops/SoftMaxLayer.h
index d0c704c2c..e63be0c3e 100644
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.h
@@ -38,7 +38,7 @@ public:
 public:
   void softmaxFloat32();
 
-  void softmaxQuant8();
+  template <typename T> void softmaxQuant8();
 
   void configure(const IPortableTensor *input, const float beta, IPortableTensor *output);
 
@@ -49,6 +49,10 @@ private:
   IPortableTensor *_output;
 
   float _beta;
+
+  float _table[256];
+  uint8_t _uint8_table1[256];
+  uint8_t _uint8_table2[256];
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/SpaceToBatchNDLayer.cc b/runtime/onert/backend/cpu/ops/SpaceToBatchNDLayer.cc
index 896e262ba..8dd0a01a5 100644
--- a/runtime/onert/backend/cpu/ops/SpaceToBatchNDLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SpaceToBatchNDLayer.cc
@@ -29,7 +29,7 @@ namespace cpu
 namespace ops
 {
 SpaceToBatchNDLayer::SpaceToBatchNDLayer()
-    : _input(nullptr), _block_shape(nullptr), _padding(nullptr), _output(nullptr)
+  : _input(nullptr), _block_shape(nullptr), _padding(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
@@ -38,7 +38,7 @@ SpaceToBatchNDLayer::SpaceToBatchNDLayer()
 void SpaceToBatchNDLayer::checkDimension()
 {
   const int kSpatialDimensionNum = 2;
-  if (_block_shape->dimension(0) != kSpatialDimensionNum)
+  if (_block_shape->getShape().dim(0) != kSpatialDimensionNum)
   {
     throw std::runtime_error("SpaceToBatchND : block_shape(block_size) tensor's rank is wrong\n");
   }
@@ -47,18 +47,17 @@ void SpaceToBatchNDLayer::checkDimension()
   // shape height and width.
   for (int dim = 0; dim < kSpatialDimensionNum; ++dim)
   {
-    int final_dim_size =
-        (_input->dimension(dim + 1) + reinterpret_cast<int32_t *>(_padding->buffer())[dim * 2] +
-         reinterpret_cast<int32_t *>(_padding->buffer())[dim * 2 + 1]);
+    int final_dim_size = (_input->getShape().dim(dim + 1) + getBuffer<int32_t>(_padding)[dim * 2] +
+                          getBuffer<int32_t>(_padding)[dim * 2 + 1]);
 
-    if (final_dim_size % reinterpret_cast<int32_t *>(_block_shape->buffer())[dim] != 0)
+    if (final_dim_size % getBuffer<int32_t>(_block_shape)[dim] != 0)
     {
       throw std::runtime_error(
-          "SpaceToBatchND : padded input's dimension is not a multiple of block size\n");
+        "SpaceToBatchND : padded input's dimension is not a multiple of block size\n");
     }
 
-    if ((int32_t)_output->dimension(dim + 1) !=
-        final_dim_size / reinterpret_cast<int32_t *>(_block_shape->buffer())[dim])
+    if ((int32_t)_output->getShape().dim(dim + 1) !=
+        final_dim_size / getBuffer<int32_t>(_block_shape)[dim])
     {
       throw std::runtime_error("SpaceToBatchND : wrong output dimension\n");
     }
@@ -66,7 +65,7 @@ void SpaceToBatchNDLayer::checkDimension()
 }
 
 template <> uint32_t SpaceToBatchNDLayer::getPad<float>() { return 0; }
-template <> uint32_t SpaceToBatchNDLayer::getPad<uint8_t>() { return _output->data_offset(); }
+template <> uint32_t SpaceToBatchNDLayer::getPad<uint8_t>() { return _output->data_zero_point(); }
 
 template <typename T> void SpaceToBatchNDLayer::spaceToBatchND()
 {
@@ -75,11 +74,10 @@ template <typename T> void SpaceToBatchNDLayer::spaceToBatchND()
   nnfw::cker::SpaceToBatchParams params;
   params.output_offset = getPad<T>();
 
-  nnfw::cker::SpaceToBatchND(
-      params, getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
-      getTensorShape(_block_shape), reinterpret_cast<const int32_t *>(_block_shape->buffer()),
-      getTensorShape(_padding), reinterpret_cast<const int32_t *>(_padding->buffer()),
-      getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::SpaceToBatchND(params, getShape(_input), getBuffer<T>(_input), getShape(_block_shape),
+                             getBuffer<int32_t>(_block_shape), getShape(_padding),
+                             getBuffer<int32_t>(_padding), getShape(_output),
+                             getBuffer<T>(_output));
 }
 
 void SpaceToBatchNDLayer::configure(const IPortableTensor *input,
diff --git a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
index a0869aed8..8271daf42 100644
--- a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
@@ -39,9 +39,8 @@ template <typename T> void SpaceToDepthLayer::spaceToDepth()
   nnfw::cker::SpaceToDepthParams params;
   params.block_size = _block_size;
 
-  nnfw::cker::SpaceToDepth(params, getTensorShape(_input),
-                           reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
-                           reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::SpaceToDepth(params, getShape(_input), getBuffer<T>(_input), getShape(_output),
+                           getBuffer<T>(_output));
 }
 
 void SpaceToDepthLayer::configure(const IPortableTensor *input, const int32_t block_size,
diff --git a/runtime/onert/backend/cpu/ops/SplitLayer.cc b/runtime/onert/backend/cpu/ops/SplitLayer.cc
index 1f40654c1..6e4eaccd4 100644
--- a/runtime/onert/backend/cpu/ops/SplitLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SplitLayer.cc
@@ -29,7 +29,7 @@ namespace cpu
 namespace ops
 {
 
-SplitLayer::SplitLayer() : _input(nullptr), _num_splits(0), _axis(0), _outputs()
+SplitLayer::SplitLayer() : _input(nullptr), _axis(nullptr), _num_splits(0), _outputs()
 {
   // DO NOTHING
 }
@@ -37,7 +37,16 @@ SplitLayer::SplitLayer() : _input(nullptr), _num_splits(0), _axis(0), _outputs()
 template <typename T> void SplitLayer::split(void)
 {
   nnfw::cker::SplitParams op_params;
-  op_params.axis = _axis;
+  if (_axis->total_size() != sizeof(int32_t))
+  {
+    throw std::runtime_error("ArgMinMax: wrong shape of axis");
+  }
+  auto axis = *getBuffer<int32_t>(_axis);
+  if (axis < 0)
+  {
+    axis += _input->getShape().rank();
+  }
+  op_params.axis = axis;
   op_params.num_split = _num_splits;
 
   std::vector<T *> outputPtrs;
@@ -45,16 +54,16 @@ template <typename T> void SplitLayer::split(void)
   for (const auto output : _outputs)
   {
     assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
-    outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+    outputPtrs.emplace_back(getBuffer<T>(output));
   }
 
   assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
-  nnfw::cker::Split<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
-                       getTensorShape(_outputs[0]), outputPtrs.data());
+  nnfw::cker::Split<T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_outputs[0]),
+                       outputPtrs.data());
 }
 
-void SplitLayer::configure(const IPortableTensor *input, uint16_t num_splits, int16_t axis,
-                           std::vector<IPortableTensor *> &outputs)
+void SplitLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
+                           uint16_t num_splits, std::vector<IPortableTensor *> &outputs)
 {
   assert(input != nullptr);
 
diff --git a/runtime/onert/backend/cpu/ops/SplitLayer.h b/runtime/onert/backend/cpu/ops/SplitLayer.h
index 0719a0063..090f87166 100644
--- a/runtime/onert/backend/cpu/ops/SplitLayer.h
+++ b/runtime/onert/backend/cpu/ops/SplitLayer.h
@@ -38,15 +38,15 @@ public:
 public:
   template <typename T> void split(void);
 
-  void configure(const IPortableTensor *input, uint16_t num_splits, int16_t axis,
+  void configure(const IPortableTensor *input, const IPortableTensor *axis, uint16_t num_splits,
                  std::vector<IPortableTensor *> &outputs);
 
   void run() override;
 
 private:
   const IPortableTensor *_input;
+  const IPortableTensor *_axis;
   uint16_t _num_splits;
-  int16_t _axis;
   std::vector<IPortableTensor *> _outputs;
 };
 
diff --git a/runtime/onert/backend/cpu/ops/SplitVLayer.cc b/runtime/onert/backend/cpu/ops/SplitVLayer.cc
index d6ca12442..166e6e6fd 100644
--- a/runtime/onert/backend/cpu/ops/SplitVLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SplitVLayer.cc
@@ -30,7 +30,7 @@ namespace ops
 {
 
 SplitVLayer::SplitVLayer()
-    : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
+  : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
 {
   // DO NOTHING
 }
@@ -38,7 +38,7 @@ SplitVLayer::SplitVLayer()
 template <typename T> void SplitVLayer::splitV(void)
 {
   nnfw::cker::SplitVParams op_params;
-  op_params.axis = *(reinterpret_cast<const int32_t *>(_split_dim->buffer()));
+  op_params.axis = *getBuffer<int32_t>(_split_dim);
   op_params.num_split = _num_splits;
 
   std::vector<T *> outputPtrs;
@@ -47,13 +47,13 @@ template <typename T> void SplitVLayer::splitV(void)
   for (const auto output : _outputs)
   {
     assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
-    outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
-    outshape.emplace_back(getTensorShape(output));
+    outputPtrs.emplace_back(getBuffer<T>(output));
+    outshape.emplace_back(getShape(output));
   }
 
   assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
-  nnfw::cker::SplitV<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
-                        outshape, outputPtrs.data());
+  nnfw::cker::SplitV<T>(op_params, getShape(_input), getBuffer<T>(_input), outshape,
+                        outputPtrs.data());
 }
 
 void SplitVLayer::configure(const IPortableTensor *input, const IPortableTensor *size_splits,
diff --git a/runtime/onert/backend/cpu/ops/SquaredDiffLayer.cc b/runtime/onert/backend/cpu/ops/SquaredDiffLayer.cc
index cf67a5c00..78984c5a9 100644
--- a/runtime/onert/backend/cpu/ops/SquaredDiffLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SquaredDiffLayer.cc
@@ -36,9 +36,8 @@ SqDiffLayer::SqDiffLayer() : _input1(nullptr), _input2(nullptr), _output(nullptr
 
 void SqDiffLayer::SqDiffFloat32()
 {
-  nnfw::cker::SqDiff(getTensorShape(_input1), reinterpret_cast<const float *>(_input1->buffer()),
-                     getTensorShape(_input2), reinterpret_cast<const float *>(_input2->buffer()),
-                     getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::SqDiff(getShape(_input1), getBuffer<float>(_input1), getShape(_input2),
+                     getBuffer<float>(_input2), getShape(_output), getBuffer<float>(_output));
 }
 
 void SqDiffLayer::configure(const IPortableTensor *input1, const IPortableTensor *input2,
diff --git a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
index b8dfcb4b5..587582e8f 100644
--- a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
+++ b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
@@ -28,7 +28,7 @@ namespace ops
 {
 
 StatelessRandomUniformLayer::StatelessRandomUniformLayer()
-    : _shape(nullptr), _seed(nullptr), _output(nullptr)
+  : _shape(nullptr), _seed(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
@@ -43,10 +43,9 @@ void StatelessRandomUniformLayer::configure(const IPortableTensor *shape,
 
 void StatelessRandomUniformLayer::StatelessRandomUniformFloat32()
 {
-  nnfw::cker::StatelessRandomUniform(
-      getTensorShape(_shape), reinterpret_cast<const int *>(_shape->buffer()),
-      getTensorShape(_seed), reinterpret_cast<const int *>(_seed->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  nnfw::cker::StatelessRandomUniform(getShape(_shape), getBuffer<int32_t>(_shape), getShape(_seed),
+                                     getBuffer<int32_t>(_seed), getShape(_output),
+                                     getBuffer<float>(_output));
 }
 
 void StatelessRandomUniformLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc b/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc
index dcbb87734..bb8550ad0 100644
--- a/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/StridedSliceLayer.cc
@@ -30,24 +30,23 @@ namespace ops
 {
 
 StridedSliceLayer::StridedSliceLayer()
-    : _input(nullptr), _begin(nullptr), _end(nullptr), _strides(nullptr), _output(nullptr),
-      _begin_mask(0), _ellipsis_mask(0), _end_mask(0), _new_axis_mask(0), _shrink_axis_mask(0)
+  : _input(nullptr), _begin(nullptr), _end(nullptr), _strides(nullptr), _output(nullptr),
+    _begin_mask(0), _ellipsis_mask(0), _end_mask(0), _new_axis_mask(0), _shrink_axis_mask(0)
 {
 }
 
 template <typename T> void StridedSliceLayer::stridedSliceImpl()
 {
+  const auto input_shape = getShape(_input);
+  const auto output_shape = getShape(_output);
   auto op_params = nnfw::cker::buildStridedSliceParams(
-      reinterpret_cast<uint32_t *>(_begin->buffer()), reinterpret_cast<uint32_t *>(_end->buffer()),
-      reinterpret_cast<uint32_t *>(_strides->buffer()), _begin_mask, _end_mask, _shrink_axis_mask,
-      getTensorShape(_input).DimensionsCount());
+    getBuffer<uint32_t>(_begin), getBuffer<uint32_t>(_end), getBuffer<uint32_t>(_strides),
+    _begin_mask, _end_mask, _shrink_axis_mask, input_shape.DimensionsCount());
 
-  nnfw::cker::checkOutputSize(op_params, getTensorShape(_input), getTensorShape(_output),
-                              getTensorShape(_input).DimensionsCount());
+  nnfw::cker::checkOutputSize(op_params, input_shape, output_shape, input_shape.DimensionsCount());
 
-  nnfw::cker::StridedSlice(op_params, getTensorShape(_input),
-                           reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
-                           reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::StridedSlice(op_params, input_shape, getBuffer<T>(_input), output_shape,
+                           getBuffer<T>(_output));
 }
 
 void StridedSliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
diff --git a/runtime/onert/backend/cpu/ops/TileLayer.cc b/runtime/onert/backend/cpu/ops/TileLayer.cc
index bfc371972..1f018db93 100644
--- a/runtime/onert/backend/cpu/ops/TileLayer.cc
+++ b/runtime/onert/backend/cpu/ops/TileLayer.cc
@@ -36,9 +36,8 @@ TileLayer::TileLayer() : _input(nullptr), _multipliers(nullptr), _output(nullptr
 
 void TileLayer::tileFloat32()
 {
-  TileOneDimension(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   reinterpret_cast<const int *>(_multipliers->buffer()),
-                   reinterpret_cast<float *>(_output->buffer()), 0);
+  TileOneDimension(getShape(_input), getBuffer<float>(_input), getBuffer<int>(_multipliers),
+                   getBuffer<float>(_output), 0);
 }
 
 void TileLayer::tileQuant8()
diff --git a/runtime/onert/backend/cpu/ops/TransposeLayer.cc b/runtime/onert/backend/cpu/ops/TransposeLayer.cc
index 7b232562a..850c07ab8 100644
--- a/runtime/onert/backend/cpu/ops/TransposeLayer.cc
+++ b/runtime/onert/backend/cpu/ops/TransposeLayer.cc
@@ -19,6 +19,7 @@
 #include "OperationUtils.h"
 
 #include <cker/operation/Transpose.h>
+#include <numeric>
 
 namespace onert
 {
@@ -29,7 +30,7 @@ namespace cpu
 namespace ops
 {
 
-TransposeLayer::TransposeLayer() : _input(nullptr), _output(nullptr), _perm()
+TransposeLayer::TransposeLayer() : _input(nullptr), _perm(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
@@ -37,20 +38,33 @@ TransposeLayer::TransposeLayer() : _input(nullptr), _output(nullptr), _perm()
 template <typename T> void TransposeLayer::transpose()
 {
   nnfw::cker::TransposeParams param;
-  param.perm_count = _perm.size();
-  for (size_t i = 0; i < _perm.size(); i++)
+  auto perm_shape = _perm->getShape();
+  assert(perm_shape.rank() == 1);
+
+  param.perm_count = _input->getShape().rank();
+  if (perm_shape.dim(0) == 0) // This means _perm is (n-1...0)
+  {
+    const auto begin = param.perm;
+    const auto end = param.perm + _input->getShape().rank();
+    std::iota(begin, end, 0);
+    std::reverse(begin, end);
+  }
+  else
   {
-    param.perm[i] = _perm[i];
+    assert(param.perm_count == static_cast<int>(perm_shape.dim(0)));
+    for (auto i = 0; i < param.perm_count; i++)
+    {
+      param.perm[i] = *(getBuffer<int32_t>(_perm) + i);
+    }
   }
 
-  nnfw::cker::Transpose(param, getTensorShape(_input),
-                        reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
-                        reinterpret_cast<T *>(_output->buffer()));
+  nnfw::cker::Transpose(param, getShape(_input), getBuffer<T>(_input), getShape(_output),
+                        getBuffer<T>(_output));
 }
 
 void TransposeLayer::transposeQuant8()
 {
-  if (_input->data_offset() != _output->data_offset())
+  if (_input->data_zero_point() != _output->data_zero_point())
   {
     throw std::runtime_error("TransposeLayer : qassym8 input and output offsets unmatched");
   }
@@ -63,8 +77,8 @@ void TransposeLayer::transposeQuant8()
   transpose<uint8_t>();
 }
 
-void TransposeLayer::configure(const IPortableTensor *input, IPortableTensor *output,
-                               const std::vector<int> &perm)
+void TransposeLayer::configure(const IPortableTensor *input, const IPortableTensor *perm,
+                               IPortableTensor *output)
 {
   _input = input;
   _perm = perm;
diff --git a/runtime/onert/backend/cpu/ops/TransposeLayer.h b/runtime/onert/backend/cpu/ops/TransposeLayer.h
index f9cb12770..c8e9f8ae7 100644
--- a/runtime/onert/backend/cpu/ops/TransposeLayer.h
+++ b/runtime/onert/backend/cpu/ops/TransposeLayer.h
@@ -40,15 +40,15 @@ public:
 
   void transposeQuant8();
 
-  void configure(const IPortableTensor *input, IPortableTensor *output,
-                 const std::vector<int> &perm);
+  void configure(const IPortableTensor *input, const IPortableTensor *perm,
+                 IPortableTensor *output);
 
   void run() override;
 
 private:
   const IPortableTensor *_input;
+  const IPortableTensor *_perm;
   IPortableTensor *_output;
-  std::vector<int> _perm;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/UnpackLayer.cc b/runtime/onert/backend/cpu/ops/UnpackLayer.cc
index 428b38588..f18fb9483 100644
--- a/runtime/onert/backend/cpu/ops/UnpackLayer.cc
+++ b/runtime/onert/backend/cpu/ops/UnpackLayer.cc
@@ -47,7 +47,7 @@ template <typename T> void UnpackLayer::unpackImpl()
 
   for (int32_t i = 0; i < _num_output; i++)
   {
-    outputDims.push_back(getTensorShape(_outputs[i]));
+    outputDims.push_back(getShape(_outputs[i]));
     outputDimsPtr.push_back(&outputDims[i]);
   }
 
@@ -55,11 +55,11 @@ template <typename T> void UnpackLayer::unpackImpl()
 
   for (const auto output : _outputs)
   {
-    outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+    outputPtrs.emplace_back(getBuffer<T>(output));
   }
 
-  nnfw::cker::Unpack<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
-                        getTensorShape(_outputs[0]), outputPtrs.data());
+  nnfw::cker::Unpack<T>(op_params, getShape(_input), getBuffer<T>(_input), getShape(_outputs[0]),
+                        outputPtrs.data());
 }
 
 void UnpackLayer::configure(const IPortableTensor *input, uint32_t axis, int32_t num,
diff --git a/runtime/onert/backend/gpu_cl/Backend.h b/runtime/onert/backend/gpu_cl/Backend.h
new file mode 100644
index 000000000..cdf965557
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/Backend.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_BACKEND_H__
+#define __ONERT_BACKEND_GPU_CL_BACKEND_H__
+
+#include <backend/Backend.h>
+#include <memory>
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "TensorRegistry.h"
+#include "KernelGenerator.h"
+#include "TensorManager.h"
+#include "TensorBuilder.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    const auto &graph = *data.graph;
+    const auto &operands = data.graph->operands();
+    auto context = std::make_unique<gpu_cl::BackendContext>(this, std::move(data));
+
+    auto environment = std::make_shared<tflite::gpu::cl::Environment>();
+    if (!CreateEnvironment(environment.get()).ok())
+    {
+      return nullptr;
+    }
+
+    tflite::gpu::CreateGpuModelInfo create_info;
+    create_info.precision = tflite::gpu::CalculationsPrecision::F32;
+    create_info.storage_type =
+      tflite::gpu::cl::GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
+    create_info.hints.Add(tflite::gpu::ModelHints::kFastestInference);
+
+    auto tm = createTensorManager(&environment->context(), create_info, environment);
+
+    auto tr = std::make_shared<TensorRegistry>(tm);
+
+    auto cc = std::make_shared<tflite::gpu::cl::CreationContext>();
+    cc->device = environment->GetDevicePtr();
+    cc->context = &environment->context();
+    cc->queue = environment->queue();
+    cc->cache = environment->program_cache();
+
+    auto tb = std::make_shared<TensorBuilder>(operands, tm);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, cc);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_BACKEND_H__
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.cc b/runtime/onert/backend/gpu_cl/BackendContext.cc
new file mode 100644
index 000000000..9d4577013
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/BackendContext.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "ConstantInitializer.h"
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/Operations.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+void BackendContext::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                        ir::Layout backend_layout)
+{
+  TensorType type = TensorType::TENSOR_TYPE_VALID;
+  tensor_builder->registerTensorInfo(ind, info, backend_layout, type);
+}
+
+ITensorRegistry *BackendContext::genTensors()
+{
+  ir::OperandIndexMap<TensorType> type_map;
+
+  for (const auto &ind : graph()->getInputs())
+  {
+    type_map[ind] = TensorType::TENSOR_TYPE_INPUT;
+  }
+
+  for (const auto &ind : graph()->getOutputs())
+  {
+    type_map[ind] = TensorType::TENSOR_TYPE_OUTPUT;
+  }
+  graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (external_operands().contains(ind))
+      return;
+
+    const auto frontend_layout = graph()->layout();
+    const auto backend_layout = operand_layouts().at(ind);
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    if (obj.isConstant())
+    {
+      type_map[ind] = TensorType::TENSOR_TYPE_INPUT;
+    }
+    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout, type_map[ind]);
+  });
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    planTensors();
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    });
+  }
+  tensor_builder->prepare();
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap fn_map;
+
+  for (auto &&op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    fn_map.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  kernel_gen->get_operation(fn_map);
+  tensor_builder->allocate();
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &&it : fn_map)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return fn_map;
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.h b/runtime/onert/backend/gpu_cl/BackendContext.h
new file mode 100644
index 000000000..da5daae02
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/BackendContext.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_GPU_CL_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <util/ConfigSource.h>
+
+#include <cl_common/BackendContext.h>
+
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "TensorBuilder.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class BackendContext
+  : public onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer,
+                                                     KernelGenerator>
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<TensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer,
+                                                KernelGenerator>(
+        backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen)
+  {
+    // DO NOTHING
+  }
+
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
+
+protected:
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout) override;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/gpu_cl/CMakeLists.txt b/runtime/onert/backend/gpu_cl/CMakeLists.txt
new file mode 100644
index 000000000..d62dbd84c
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/CMakeLists.txt
@@ -0,0 +1,95 @@
+set(LIB_ONERT_BACKEND_GPU_CL onert_backend_gpu_cl)
+
+if(NOT BUILD_GPU_CL)
+  return()
+endif(NOT BUILD_GPU_CL)
+
+nnas_find_package(Opencl_Headers QUIET)
+if(NOT Opencl_Headers_FOUND)
+  return()
+endif(NOT Opencl_Headers_FOUND)
+
+nnas_find_package(Farmhash QUIET)
+if(NOT Farmhash_FOUND)
+  return()
+endif(NOT Farmhash_FOUND)
+
+nnas_find_package(Abseil QUIET)
+if(NOT Abseil_FOUND)
+  return()
+endif(NOT Abseil_FOUND)
+
+nnfw_find_package(Fp16 QUIET)
+if(NOT Fp16_FOUND)
+  return()
+endif(NOT Fp16_FOUND)
+
+nnas_find_package(VulkanSource QUIET)
+if(NOT VulkanSource_FOUND)
+  return()
+endif(NOT VulkanSource_FOUND)
+
+nnas_find_package(Opengl_HeadersSource QUIET)
+if(NOT Opengl_HeadersSource_FOUND)
+  return()
+endif(NOT Opengl_HeadersSource_FOUND)
+
+nnas_find_package(Egl_HeadersSource QUIET)
+if(NOT Egl_HeadersSource_FOUND)
+  return()
+endif(NOT Egl_HeadersSource_FOUND)
+
+if (NOT ${TARGET_OS} MATCHES "tizen")
+  nnas_find_package(FlatBuffers REQUIRED)
+endif ()
+
+nnfw_find_package(TensorFlowGpu QUIET)
+if(NOT TensorFlowGpu_FOUND)
+  message(FATAL_ERROR 'TensorFlowGpu lib not found')
+  return()
+endif(NOT TensorFlowGpu_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_GPU_CL} SHARED ${SOURCES})
+
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TensorFlowSource_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${VulkanSource_DIR}/include)
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${Opengl_HeadersSource_DIR}/api)
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${Egl_HeadersSource_DIR}/api)
+
+if (${TARGET_OS} MATCHES "tizen")
+    target_compile_options(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE "-Wno-error=deprecated-copy")
+endif ()
+
+target_compile_options(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE "-DCL_TARGET_OPENCL_VERSION=220" "-DEGL_NO_X11")
+
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE abseil)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE dl)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE farmhash)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE OpenCL_Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE fp16)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE TensorFlowGpu)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${LIB_ONERT_BACKEND_CL_COMMON})
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_coverage)
+if (${TARGET_OS} MATCHES "tizen")
+  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE flatbuffers)
+else()
+  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE flatbuffers::flatbuffers)
+endif ()
+
+set_target_properties(${LIB_ONERT_BACKEND_GPU_CL} PROPERTIES OUTPUT_NAME backend_gpu_cl)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_GPU_CL} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_GPU_CL}>)
+endif()
+
+add_library(tflite_ignore_warnings INTERFACE)
+target_compile_options(tflite_ignore_warnings INTERFACE -Wno-unused-parameter -Wno-sign-compare)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE tflite_ignore_warnings)
+
+install(TARGETS ${LIB_ONERT_BACKEND_GPU_CL} DESTINATION lib)
diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
new file mode 100644
index 000000000..05dd8e2a3
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ClConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+ClConstantInitializer::ClConstantInitializer(const ir::Operands &operands,
+                                             const std::shared_ptr<ITensorRegistry> &tensor_reg)
+  : _operands{operands}, _tensor_reg{tensor_reg}, _current_layout{ir::Layout::UNKNOWN}
+{
+  // DO NOTHING
+}
+
+void ClConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+  assert(node.getInputs().size() > index);
+
+  const auto &input_index = node.getInputs().at(index);
+  if (input_index.valid())
+  {
+    const auto &input_obj = _operands.at(input_index);
+    registerCopyInitializer(input_index, input_obj);
+  }
+}
+
+void ClConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+  assert(node.getInputs().size() > index);
+
+  const auto &input_index = node.getInputs().at(index);
+  const auto &input_obj = _operands.at(input_index);
+  registerPermuteInitializer(input_index, input_obj);
+}
+
+// NOTE Workaround for 16b float type. Here, this is enough since only the size of bytes matters.
+using float16 = uint16_t;
+
+void ClConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
+                                                    const ir::Operand &obj)
+{
+  // For only CONSTANTS
+  // TODO Add to check if tensor has been allocated
+  if (!obj.isConstant())
+    return;
+
+  const auto type = obj.typeInfo().type();
+  using ir::DataType;
+
+  switch (type)
+  {
+    case DataType::FLOAT32:
+      _init_map[index] = copyInit<float>;
+      break;
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
+void ClConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
+                                                       const ir::Operand &obj)
+{
+  // For only CONSTANTS
+  // TODO Add to check if tensor has been allocated
+  if (!obj.isConstant())
+    return;
+
+  const auto type = obj.typeInfo().type();
+  using ir::DataType;
+  using namespace std::placeholders;
+
+  switch (type)
+  {
+    case DataType::FLOAT32:
+      _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
+      break;
+    case DataType::INT32:
+      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
+      break;
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
new file mode 100644
index 000000000..ad5b47d19
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
+
+#include <unordered_map>
+#include <functional>
+
+#include <ir/Coordinates.h>
+#include <ir/Layout.h>
+#include <ir/Operand.h>
+#include <ir/Operands.h>
+#include <ir/OperationVisitor.h>
+#include <backend/ITensorRegistry.h>
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+template <typename T>
+static void Init(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj, const bool copy,
+                 const onert::ir::Layout frontend_layout = onert::ir::Layout::UNKNOWN)
+{
+  const auto &shape = model_obj.shape();
+  assert(model_obj.data());
+  obj.access([&](::onert::backend::ITensor &tensor) {
+    switch (shape.rank())
+    {
+      case 0:
+      case 1:
+      case 2:
+      case 3:
+      case 4:
+        if (copy)
+        {
+          tensor.enqueueWriteBuffer(model_obj.data()->base(), true);
+        }
+        else
+        {
+          // NYI
+          (void)frontend_layout;
+          throw std::runtime_error{"Not yet supported"};
+        }
+        break;
+      default:
+        throw std::runtime_error{"Not yet supported"};
+    }
+  });
+}
+
+template <typename T>
+void copyInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
+{
+  Init<T>(model_obj, obj, true);
+}
+
+template <typename T>
+void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj,
+                 const onert::ir::Layout frontend_layout)
+{
+  const bool copy = frontend_layout == obj.layout();
+  Init<T>(model_obj, obj, copy, frontend_layout);
+}
+
+class ClConstantInitializer : public ir::OperationVisitor
+{
+public:
+  void run()
+  {
+    assert(_tensor_reg);
+    for (const auto &it : _init_map)
+    {
+      const auto &ind = it.first;
+      const auto &fn = it.second;
+
+      const auto &model_obj = _operands.at(ind);
+      auto tensor_obj = _tensor_reg->getNativeITensor(ind);
+      assert(tensor_obj != nullptr);
+      fn(model_obj, *tensor_obj);
+      VERBOSE(FillOperandData) << "Fill data for operand " << ind << std::endl;
+    }
+    _init_map.clear();
+  }
+
+public:
+  ClConstantInitializer(const ir::Operands &operands,
+                        const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+  using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
+
+public:
+  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
+  {
+    registerPermuteInitializer(index, obj);
+  }
+  void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
+  void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
+
+public:
+  void setLayout(ir::Layout layout) { _current_layout = layout; }
+  bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
+
+public:
+protected:
+  void copyInputInitialize(const ir::Operation &node, uint32_t index);
+  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+protected:
+  const ir::Operands &_operands;
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
+  std::unordered_map<ir::OperandIndex, Initializer> _init_map;
+  ir::Layout _current_layout;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClFunction.h b/runtime/onert/backend/gpu_cl/ClFunction.h
new file mode 100644
index 000000000..6afbd4910
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/ClFunction.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_GPU_CL_OPEN_CL_FUNCTION_H__
+#define __ONERT_GPU_CL_OPEN_CL_FUNCTION_H__
+
+#include <exec/IFunction.h>
+
+#include <vector>
+#include <memory>
+
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_operation.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+class ClFunction : public ::onert::exec::IFunction
+{
+public:
+  ClFunction(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
+    : _creation_context(creation_context), _gpu_operations()
+  {
+  }
+
+public:
+  void add_operation(tflite::gpu::cl::ClOperation *gpu_operation)
+  {
+    _gpu_operations.push_back(gpu_operation);
+  }
+
+  void run() override
+  {
+    for (const auto gpu_operation : _gpu_operations)
+    {
+      if (!gpu_operation->AddToQueue(_creation_context->queue).ok())
+      {
+        throw std::runtime_error("Failed to AddToQueue.");
+      }
+    }
+  }
+
+  void prepare() override
+  {
+    for (const auto gpu_operation : _gpu_operations)
+    {
+      if (!gpu_operation->GetGpuOperation().AssembleCode(_creation_context->GetGpuInfo()).ok())
+      {
+        throw std::runtime_error("Failed to AssembleCode.");
+      }
+      if (!gpu_operation->Compile(*_creation_context).ok())
+      {
+        throw std::runtime_error("Failed to Compile.");
+      }
+      if (!gpu_operation->UpdateParams().ok())
+      {
+        throw std::runtime_error("Failed to UpdateParams.");
+      }
+      gpu_operation->GetGpuOperation().args_.ReleaseCPURepresentation();
+    }
+  }
+
+private:
+  std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
+  std::vector<tflite::gpu::cl::ClOperation *> _gpu_operations;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_GPU_CL_OPEN_CL_FUNCTION_H__
diff --git a/runtime/onert/backend/gpu_cl/Config.cc b/runtime/onert/backend/gpu_cl/Config.cc
new file mode 100644
index 000000000..9b314d679
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/Config.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+#include <dlfcn.h>
+
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+
+using namespace tflite::gpu::cl;
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+bool Config::initialize()
+{
+  if (LoadOpenCL().ok())
+  {
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
+
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/Config.h b/runtime/onert/backend/gpu_cl/Config.h
new file mode 100644
index 000000000..980eb228b
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/Config.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_CONFIG_H__
+#define __ONERT_BACKEND_GPU_CL_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class Config : public IConfig
+{
+public:
+  virtual ~Config() {}
+
+public:
+  std::string id() override { return "gpu_cl"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return true; }
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_CONFIG_H__
diff --git a/runtime/onert/backend/gpu_cl/ConstantInitializer.cc b/runtime/onert/backend/gpu_cl/ConstantInitializer.cc
new file mode 100644
index 000000000..7976abea9
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/ConstantInitializer.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+  : ClConstantInitializer{operands, tensor_reg}
+{
+  // DO NOTHING
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/ConstantInitializer.h b/runtime/onert/backend/gpu_cl/ConstantInitializer.h
new file mode 100644
index 000000000..ce8131af2
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/ConstantInitializer.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_GPU_CL_CONSTANT_INITIALIZER_H__
+
+#include "ClConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class ConstantInitializer : public ClConstantInitializer
+{
+public:
+  ConstantInitializer(const ir::Operands &operands,
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+  using ClConstantInitializer::visit;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.cc b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
new file mode 100644
index 000000000..de8d3b463
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
@@ -0,0 +1,709 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdexcept>
+
+#include <backend/basic/KernelGeneratorBase.h>
+
+#include "KernelGenerator.h"
+
+#include "ClFunction.h"
+#include "TensorManager.h"
+
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/tensor.h"
+#include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/dw_convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/simple_selectors.h"
+
+#include "ir/Operations.h"
+#include "ir/Operations.Include.h"
+#include "ir/Index.h"
+#include "ir/DataType.h"
+#include "ir/InternalType.h"
+#include "exec/NopFunction.h"
+#include "exec/FunctionSequence.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+void KernelGenerator::addClNode(const std::vector<ir::OperandIndex> &inputs,
+                                const std::vector<ir::OperandIndex> &outputs,
+                                std::unique_ptr<tflite::gpu::GPUOperation> gpu_op)
+{
+  tflite::gpu::cl::CLNode cl_node;
+  cl_node.cl_operation.Init(std::move(gpu_op));
+  cl_node.inputs.resize(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i)
+  {
+    cl_node.inputs[i] = inputs[i].value();
+  }
+  cl_node.outputs.resize(outputs.size());
+  for (size_t i = 0; i < outputs.size(); ++i)
+  {
+    cl_node.outputs[i] = outputs[i].value();
+  }
+  _nodes.push_back(std::move(cl_node));
+  _operation_indexes.push_back(_operation_index);
+  return;
+}
+
+void KernelGenerator::get_operation(FunctionMap &Functions)
+{
+  size_t size = _nodes.size();
+  size_t i = 0;
+  for (auto &&it : Functions)
+  {
+    auto index = it.first;
+    auto node_index = _operation_indexes[i];
+    while (index == node_index)
+    {
+      auto &fn_seq = it.second;
+      auto &node = _nodes[i++];
+      for (size_t j = 0; j < node.inputs.size(); ++j)
+      {
+        uint32_t idx = node.inputs[j];
+        node.cl_operation.GetGpuOperation().SetSrc(
+          _tensor_reg->getClTensor(ir::OperandIndex{idx})->handle(), j);
+      }
+      for (size_t j = 0; j < node.outputs.size(); ++j)
+      {
+        uint32_t idx = node.outputs[j];
+        node.cl_operation.GetGpuOperation().SetDst(
+          _tensor_reg->getClTensor(ir::OperandIndex{idx})->handle(), j);
+      }
+      fn_seq->iterate([&](exec::IFunction &ifunc) {
+        static_cast<ClFunction &>(ifunc).add_operation(&node.cl_operation);
+      });
+      if (i == size)
+      {
+        break;
+      }
+      node_index = _operation_indexes[i];
+    }
+    if (i == size)
+    {
+      break;
+    }
+  }
+}
+
+absl::Status KernelGenerator::readConstTensor(const ir::OperandIndex &index,
+                                              tflite::gpu::TensorOrScalar *param)
+{
+  const auto &shape = _ctx.at(index).shape();
+  if (shape.rank() == 0 && shape.num_elements() == 1)
+  {
+    tflite::gpu::Tensor<tflite::gpu::Scalar, tflite::gpu::DataType::FLOAT32> tensor;
+    tensor.shape.v = 1;
+    tensor.data.resize(1);
+    std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+    *param = tensor.data[0];
+  }
+  else
+  {
+    if (CheckIfLinearConvertible(&shape))
+    {
+      tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32> tensor;
+      tensor.shape.v = shape.dim(shape.rank() - 1);
+      tensor.data.resize(shape.num_elements());
+      std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+      *param = std::move(tensor);
+    }
+    else
+    {
+      tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32> tensor;
+      if (shape.rank() == 3)
+      {
+        tensor.shape.h = shape.dim(0);
+        tensor.shape.w = shape.dim(1);
+        tensor.shape.c = shape.dim(2);
+      }
+      else if (shape.rank() == 4)
+      {
+        if (shape.dim(0) != 1)
+        {
+          return absl::UnimplementedError("Batch size is not equal to 1.");
+        }
+        tensor.shape.h = shape.dim(1);
+        tensor.shape.w = shape.dim(2);
+        tensor.shape.c = shape.dim(3);
+      }
+      else
+      {
+        return absl::InvalidArgumentError(
+          "Expected a 3D tensor of shape HxWxC or a 4D tensor of shape 1xHxWxC.");
+      }
+      tensor.data.resize(shape.num_elements());
+      std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+      *param = std::move(tensor);
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status KernelGenerator::readConstTensor(
+  const ir::OperandIndex &index,
+  absl::variant<tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32>,
+                tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32>> *alpha)
+{
+  const auto &shape = _ctx.at(index).shape();
+  if (CheckIfLinearConvertible(&shape))
+  {
+    tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32> tensor;
+    tensor.shape.v = shape.dim(shape.rank() - 1);
+    tensor.data.resize(shape.num_elements());
+    std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+    *alpha = std::move(tensor);
+  }
+  else
+  {
+    tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32> tensor;
+    if (shape.rank() == 3)
+    {
+      tensor.shape.h = shape.dim(0);
+      tensor.shape.w = shape.dim(1);
+      tensor.shape.c = shape.dim(2);
+    }
+    else if (shape.rank() == 4)
+    {
+      if (shape.dim(0) != 1)
+      {
+        return absl::UnimplementedError("Batch size is not equal to 1.");
+      }
+      tensor.shape.h = shape.dim(1);
+      tensor.shape.w = shape.dim(2);
+      tensor.shape.c = shape.dim(3);
+    }
+    else
+    {
+      return absl::InvalidArgumentError(
+        "Expected a 3D tensor of shape HxWxC or a 4D tensor of shape 1xHxWxC.");
+    }
+    tensor.data.resize(shape.num_elements());
+    std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+    *alpha = std::move(tensor);
+  }
+  return absl::OkStatus();
+}
+
+KernelGenerator::KernelGenerator(
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<TensorRegistry> &tensor_reg,
+  const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context)
+  : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
+    _operations_ctx(graph.operations()), _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg(tensor_reg), _creation_context(creation_context)
+{
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  auto fn_seq = std::make_unique<exec::FunctionSequence>();
+  fn_seq->enableDynamicShapeInferer(false);
+  _operation_index = ind;
+  const auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  fn_seq->append(releaseFunction());
+  return fn_seq;
+}
+
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
+
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+  const bool lhs_const = _ctx.at(lhs_index).isConstant();
+  const bool rhs_const = _ctx.at(rhs_index).isConstant();
+
+  if (lhs_const && rhs_const)
+  {
+    throw std::runtime_error("No runtime input tensors for " + node.name());
+  }
+
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+
+  tflite::gpu::OperationType op_type = convertArithmeticType(node.param().arithmetic_type);
+
+  if (!lhs_const && !rhs_const)
+  {
+    auto lhs_shape = _tensor_reg->getClTensor(lhs_index)->get_info()._shape;
+    auto rhs_shape = _tensor_reg->getClTensor(rhs_index)->get_info()._shape;
+
+    bool swap =
+      (op_type == tflite::gpu::OperationType::MUL) &&
+      (lhs_shape.h <= rhs_shape.h && lhs_shape.w <= rhs_shape.w && lhs_shape.c <= rhs_shape.c);
+
+    auto first_index = swap ? rhs_index : lhs_index;
+    auto second_index = swap ? lhs_index : rhs_index;
+
+    op_def.src_tensors.push_back(_tensor_reg->getClTensor(first_index)->get_info()._desc);
+    op_def.src_tensors.push_back(_tensor_reg->getClTensor(second_index)->get_info()._desc);
+    op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+
+    auto second_shape = _tensor_reg->getClTensor(second_index)->get_info()._shape;
+
+    tflite::gpu::GPUOperation operation = CreateElementwiseTwoInput(op_def, op_type, second_shape);
+    gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+
+    addClNode({first_index, second_index}, {ofm_index}, std::move(gpu_op));
+  }
+  else
+  {
+    auto non_const_index = rhs_const ? lhs_index : rhs_index;
+    auto const_index = rhs_const ? rhs_index : lhs_index;
+
+    op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+    op_def.src_tensors.push_back(_tensor_reg->getClTensor(non_const_index)->get_info()._desc);
+
+    tflite::gpu::ElementwiseAttributes attr;
+
+    if (!readConstTensor(const_index, &attr.param).ok())
+    {
+      throw std::runtime_error("BinaryArithmetic unsupported constant tensor");
+    }
+
+    tflite::gpu::GPUOperation operation =
+      CreateElementwise(_creation_context->GetGpuInfo(), op_def, op_type, attr);
+    gpu_op = absl::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+
+    addClNode({non_const_index}, {ofm_index}, std::move(gpu_op));
+  }
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+  auto output{node.getOutputs().at(0)};
+
+  auto input{node.getInputs().at(ir::operation::Conv2D::INPUT)};
+  auto kernel{node.getInputs().at(ir::operation::Conv2D::KERNEL)};
+  auto bias{node.getInputs().at(ir::operation::Conv2D::BIAS)};
+
+  const auto &param = node.param();
+
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input)->get_info()._desc);
+
+  auto input_shape = _tensor_reg->getClTensor(input)->get_info()._shape;
+  auto kernel_shape = _tensor_reg->getClTensor(kernel)->get_info()._shape;
+  auto output_shape = _tensor_reg->getClTensor(output)->get_info()._shape;
+  auto bias_shape = _tensor_reg->getClTensor(bias)->get_info()._shape;
+
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
+
+  tflite::gpu::ModelHints hints;
+  std::unique_ptr<tflite::gpu::GPUOperation>
+    gpu_op; // = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
+
+  auto kernel_tensor = _tensor_reg->getClTensor(kernel);
+  auto bias_tensor = _tensor_reg->getClTensor(bias);
+
+  tflite::gpu::Convolution2DAttributes attr;
+  attr.strides = ToHW(param.stride.vertical, param.stride.horizontal);
+  attr.dilations =
+    tflite::gpu::HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
+                    std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
+
+  bool is_weight = (_ctx.at(kernel).isConstant() ? true : false);
+
+  if (is_weight)
+  {
+    attr.weights.id = kernel.value();
+    attr.weights.shape.o = kernel_shape.b;
+    attr.weights.shape.h = kernel_shape.h;
+    attr.weights.shape.w = kernel_shape.w;
+    attr.weights.shape.i = kernel_shape.c;
+    attr.weights.data.resize(kernel_shape.DimensionsProduct());
+    memcpy(attr.weights.data.data(), _ctx.at(kernel).data()->base(), kernel_tensor->total_size());
+  }
+
+  attr.bias.id = bias.value();
+  // TODO Modify
+  attr.bias.shape.v = bias_shape.b != 1 ? bias_shape.b : bias_shape.c;
+  attr.bias.data.resize(bias_shape.DimensionsProduct());
+  memcpy(attr.bias.data.data(), _ctx.at(bias).data()->base(), bias_tensor->total_size());
+
+  UpdatePadding(param.padding.type, input_shape, &attr);
+
+  gpu_op = SelectConvolution(attr, output_shape, _creation_context->GetGpuInfo(), op_def, hints);
+
+  tflite::gpu::cl::CLNode cl_node;
+  cl_node.inputs.resize(1);
+  cl_node.inputs[0] = input.value();
+  cl_node.outputs.resize(1);
+
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+
+  const auto activation = node.param().activation;
+
+  switch (activation)
+  {
+    case ir::Activation::NONE:
+    {
+      addClNode({input}, {output}, std::move(gpu_op));
+      break;
+    }
+    case ir::Activation::RELU:
+    case ir::Activation::RELU6:
+    {
+      std::unique_ptr<tflite::gpu::GPUOperation> gpu_op_1;
+      tflite::gpu::OperationDef op_def_1;
+      const auto &shape = _ctx.at(output).shape();
+      auto new_ind = _tensor_reg->addNewClTensor(shape);
+
+      addClNode({input}, {new_ind}, std::move(gpu_op));
+
+      op_def_1.precision = tflite::gpu::CalculationsPrecision::F32;
+      op_def_1.src_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
+      op_def_1.dst_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
+
+      tflite::gpu::ReLUAttributes attr_1;
+      if (activation == ir::Activation::RELU6)
+      {
+        attr_1.clip = 6;
+      }
+      else
+      {
+        attr_1.clip = 0;
+      }
+      attr_1.alpha = 0;
+      gpu_op_1 = SelectReLU(attr_1, op_def_1);
+
+      addClNode({new_ind}, {output}, std::move(gpu_op_1));
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("gpu_cl KernelGenerator : Not supported Conv2D activiation");
+    }
+  }
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+  using ir::operation::DepthwiseConv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+  const auto stride = node.param().stride;
+  const auto dilation = node.param().dilation;
+  const auto &padding = node.param().padding;
+
+  const auto multiplier = node.param().multiplier;
+
+  bool is_weight = (_ctx.at(ker_index).isConstant() ? true : false);
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(ifm_index)->get_info()._desc);
+  auto input_shape = _tensor_reg->getClTensor(ifm_index)->get_info()._shape;
+
+  auto ker_shape = _tensor_reg->getClTensor(ker_index)->get_info()._shape;
+
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+  auto out_shape = _tensor_reg->getClTensor(ofm_index)->get_info()._shape;
+  auto bias_shape = _tensor_reg->getClTensor(bias_index)->get_info()._shape;
+
+  tflite::gpu::DepthwiseConvolution2DAttributes attr;
+  attr.strides = ToHW(stride.vertical, stride.horizontal);
+  attr.dilations = tflite::gpu::HW(std::max(static_cast<u_int32_t>(1), dilation.height_factor),
+                                   std::max(static_cast<u_int32_t>(1), dilation.width_factor));
+
+  if (is_weight)
+  {
+    attr.weights.id = ker_index.value();
+    attr.weights.shape.o = ker_shape.b;
+    attr.weights.shape.h = ker_shape.h;
+    attr.weights.shape.w = ker_shape.w;
+    attr.weights.shape.i = ker_shape.c;
+    attr.weights.data.resize(ker_shape.DimensionsProduct());
+    memcpy(attr.weights.data.data(), _ctx.at(ker_index).data()->base(),
+           _ctx.at(ker_index).operandSize());
+  }
+  attr.bias.id = bias_index.value();
+  attr.bias.shape.v = bias_shape.b != 1 ? bias_shape.b : bias_shape.c;
+  attr.bias.data.resize(bias_shape.DimensionsProduct());
+  memcpy(attr.bias.data.data(), _ctx.at(bias_index).data()->base(),
+         _ctx.at(bias_index).operandSize());
+  UpdatePadding(padding.type, input_shape, &attr);
+
+  if (multiplier != 1)
+  {
+    const int input_depth = input_shape.c;
+    const int filter_height = ker_shape.h;
+    const int filter_width = ker_shape.w;
+    const int output_depth = out_shape.c;
+
+    tflite::gpu::Tensor<tflite::gpu::OHWI, tflite::gpu::DataType::FLOAT32> weights;
+    weights.id = attr.weights.id;
+    weights.shape = tflite::gpu::OHWI(output_depth, filter_height, filter_width, input_depth);
+    weights.data.resize(weights.shape.DimensionsProduct());
+    float *dst = &weights.data[0];
+    for (int j = 0; j < output_depth; ++j)
+    {
+      const float *src = attr.weights.data.data() + j;
+      for (int i = 0; i < filter_height * filter_width; ++i)
+      {
+        *dst = *src;
+        dst++;
+        src += output_depth;
+      }
+    }
+    attr.weights = std::move(weights);
+  }
+
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+
+  if (is_weight)
+  {
+    gpu_op = SelectDWConvolution(attr, _creation_context->GetGpuInfo(), op_def);
+  }
+  else
+  {
+    if (ker_shape.b != 1)
+    {
+      throw std::runtime_error(
+        "No support of depthwise runtime weights with channel multiplier != 1");
+    }
+    gpu_op = SelectDWConvolutionDynamicWeights(attr, _creation_context->GetGpuInfo(), op_def);
+  }
+
+  const auto activation = node.param().activation;
+
+  switch (activation)
+  {
+    case ir::Activation::NONE:
+    {
+      addClNode({ifm_index}, {ofm_index}, std::move(gpu_op));
+      break;
+    }
+    case ir::Activation::RELU:
+    case ir::Activation::RELU6:
+    {
+      std::unique_ptr<tflite::gpu::GPUOperation> gpu_op_1;
+      tflite::gpu::OperationDef op_def_1;
+      const auto &shape = _ctx.at(ofm_index).shape();
+      auto new_ind = _tensor_reg->addNewClTensor(shape);
+
+      addClNode({ifm_index}, {new_ind}, std::move(gpu_op));
+
+      op_def_1.precision = tflite::gpu::CalculationsPrecision::F32;
+
+      op_def_1.src_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+      op_def_1.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+
+      tflite::gpu::ReLUAttributes attr_1;
+      if (activation == ir::Activation::RELU6)
+      {
+        attr_1.clip = 6;
+      }
+      else
+      {
+        attr_1.clip = 0;
+      }
+      attr_1.alpha = 0;
+      gpu_op_1 = SelectReLU(attr_1, op_def_1);
+
+      addClNode({new_ind}, {ofm_index}, std::move(gpu_op_1));
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("gpu_cl KernelGenerator : Not supported DepthwiseConv2D acvivation");
+    }
+  }
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+    case ir::operation::ElementwiseActivation::Type::RELU:
+    {
+      tflite::gpu::ReLUAttributes attr;
+      if (ir::operation::ElementwiseActivation::Type::LEAKY_RELU == node.param().op_type)
+      {
+        attr.alpha = node.param().alpha;
+        attr.clip = 0;
+      }
+      else
+      {
+        attr.alpha = node.param().beta;
+        attr.clip = node.param().alpha;
+      }
+      gpu_op = SelectReLU(attr, op_def);
+      break;
+    }
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+    {
+      if (_ctx.at(input_index).typeInfo().type() != ir::DataType::FLOAT32)
+      {
+        throw std::runtime_error{"Unsupported data type of LOGISTIC"};
+      }
+      tflite::gpu::GPUOperation operation =
+        CreateElementwiseOneInput(_creation_context->GetGpuInfo(), op_def,
+                                  convertElementwiseActivationType(node.param().op_type));
+      gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+      break;
+    }
+    case ir::operation::ElementwiseActivation::Type::TANH:
+    {
+      tflite::gpu::GPUOperation operation = CreateElementwiseOneInput(
+        _creation_context->GetGpuInfo(), op_def, tflite::gpu::OperationType::TANH);
+      gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+      break;
+    }
+    default:
+      throw std::runtime_error(
+        "gpu_cl KernelGenerator : Not supported operation on ElementwiseActivation");
+  }
+  addClNode({input_index}, {output_index}, std::move(gpu_op));
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+  auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
+
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+  const auto stride = node.param().stride;
+  const auto op_type = convertPoolType(node.param().op_type);
+
+  tflite::gpu::Pooling2DAttributes attributes;
+  attributes.type = op_type;
+  attributes.kernel = tflite::gpu::HW(kh > 0 ? kh : 1, kw > 0 ? kw : 1);
+  attributes.strides = tflite::gpu::HW(stride.vertical > 0 ? stride.vertical : 1,
+                                       stride.horizontal > 0 ? stride.horizontal : 1);
+
+  if (node.param().padding.type == ir::PaddingType::SAME)
+  {
+    attributes.padding = CalculateSamePadding(input_shape, attributes);
+  }
+  else
+  {
+    attributes.padding.prepended = tflite::gpu::HW(0, 0);
+    attributes.padding.appended = tflite::gpu::HW(0, 0);
+  }
+
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+  gpu_op = SelectPooling(attributes, op_def);
+
+  addClNode({input_index}, {output_index}, std::move(gpu_op));
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Reshape &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+  auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
+
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+  auto output_shape = _tensor_reg->getClTensor(output_index)->get_info()._shape;
+
+  tflite::gpu::ReshapeAttributes attr;
+  attr.new_shape = output_shape;
+
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+  const int src_channels = input_shape.c;
+  SelectReshape(src_channels, attr.new_shape.c, op_def, &gpu_op);
+
+  addClNode({input_index}, {output_index}, std::move(gpu_op));
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Softmax &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
+
+  const auto beta = node.param().beta;
+
+  if (beta != 1.0)
+  {
+    throw std::runtime_error("Softmax.beta != 1 is not supported in gpu_cl");
+  }
+
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+  auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
+
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+  SelectSoftmax(input_shape, op_def, &gpu_op);
+
+  addClNode({input_index}, {output_index}, std::move(gpu_op));
+  _return_fn = std::move(fn);
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.h b/runtime/onert/backend/gpu_cl/KernelGenerator.h
new file mode 100644
index 000000000..5e8c2621f
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
+
+#include "TensorRegistry.h"
+#include "backend/basic/TensorRegistry.h"
+#include "TensorBuilder.h"
+#include "TensorManager.h"
+
+#include "tensorflow/lite/delegates/gpu/api.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/basic/KernelGeneratorBase.h>
+#include <backend/BackendContext.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+#include <ir/Operations.Include.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context);
+
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+  void get_operation(FunctionMap &Functions);
+
+private:
+  void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::Pool2D &) override;
+  void visit(const ir::operation::Reshape &) override;
+  void visit(const ir::operation::Softmax &) override;
+  absl::Status readConstTensor(const ir::OperandIndex &index, tflite::gpu::TensorOrScalar *param);
+  absl::Status readConstTensor(
+    const ir::OperandIndex &index,
+    absl::variant<tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32>,
+                  tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32>> *alpha);
+  void addClNode(const std::vector<ir::OperandIndex> &inputs,
+                 const std::vector<ir::OperandIndex> &outputs,
+                 std::unique_ptr<tflite::gpu::GPUOperation> gpu_op);
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  ir::Layout _current_layout;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
+  std::vector<tflite::gpu::cl::CLNode> _nodes;
+  ir::OperationIndex _operation_index;
+  std::vector<ir::OperationIndex> _operation_indexes;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/gpu_cl/MemoryManager.h b/runtime/onert/backend/gpu_cl/MemoryManager.h
new file mode 100644
index 000000000..4b34c39b9
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/MemoryManager.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
+
+#include "operand/CLTensor.h"
+
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+#include "util/logging.h"
+
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class MemoryManager
+{
+public:
+  MemoryManager(tflite::gpu::cl::CLContext *context, tflite::gpu::CreateGpuModelInfo create_info,
+                const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
+    : _context{context}, _create_info{create_info}, _environment{environment}
+  {
+  }
+
+  ~MemoryManager() = default;
+
+  void allocate(void)
+  {
+    std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> converter_builder =
+      NewConverterBuilder(_environment.get());
+    for (const auto &tensor_entry : _tensors)
+    {
+      auto tensor = tensor_entry.second;
+      auto type = tensor->get_type();
+
+      if (type == TensorType::TENSOR_TYPE_DELETE)
+      {
+        continue;
+      }
+
+      const auto &shape = tensor->get_info()._shape;
+      const auto &descriptor = tensor->get_info()._desc;
+
+      if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
+      {
+        std::runtime_error("Failed to CreateTensor");
+      }
+      switch (type)
+      {
+        case TensorType::TENSOR_TYPE_INPUT:
+          tensor->writeConvertInit(converter_builder.get(), _environment);
+          break;
+        case TensorType::TENSOR_TYPE_OUTPUT:
+          tensor->readConvertInit(converter_builder.get(), _environment);
+          break;
+        default:
+          break;
+      }
+    }
+  }
+
+  void deallocate(void)
+  {
+    // NYI
+  }
+
+  void startLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
+  void finishLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, TensorType type)
+  {
+    auto data_type = DeduceDataTypeFromPrecision(_create_info.precision);
+
+    tflite::gpu::BHWC BHWC_shape = ToBHWC(info.shape());
+
+    tflite::gpu::TensorStorageType storage_type = _create_info.storage_type;
+    tflite::gpu::Layout layout =
+      BHWC_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+
+    if (!SelectBestStorageType(_environment->device().GetInfo(), BHWC_shape, storage_type,
+                               data_type, layout, &storage_type)
+           .ok())
+    {
+      throw std::runtime_error("Failed to SelectBestStorageType");
+    }
+    auto tensor = std::make_shared<operand::CLTensor>(
+      info.shape().rank(), type, BHWC_shape,
+      tflite::gpu::TensorDescriptor{data_type, storage_type, layout});
+    _tensors[ind] = tensor;
+  }
+
+  ir::OperandIndex addTensor(const ir::Shape &shape)
+  {
+    auto data_type = DeduceDataTypeFromPrecision(_create_info.precision);
+
+    tflite::gpu::BHWC BHWC_shape = ToBHWC(shape);
+
+    tflite::gpu::TensorStorageType storage_type = _create_info.storage_type;
+    tflite::gpu::Layout layout =
+      BHWC_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+
+    if (!SelectBestStorageType(_environment->device().GetInfo(), BHWC_shape, storage_type,
+                               data_type, layout, &storage_type)
+           .ok())
+    {
+      throw std::runtime_error("Failed to SelectBestStorageType");
+    }
+    auto ind = ir::OperandIndex(_new_id--);
+    auto tensor = std::make_shared<operand::CLTensor>(
+      shape.rank(), TensorType::TENSOR_TYPE_VALID, BHWC_shape,
+      tflite::gpu::TensorDescriptor{data_type, storage_type, layout});
+    _tensors[ind] = tensor;
+    return ind;
+  }
+
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &tensors(void) { return _tensors; }
+
+private:
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> _tensors;
+  tflite::gpu::cl::CLContext *_context;
+  tflite::gpu::CreateGpuModelInfo _create_info;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+  uint32_t _new_id = UINT32_MAX;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.cc b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
new file mode 100644
index 000000000..318335471
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <queue>
+
+#include "TensorBuilder.h"
+
+#include "TensorManager.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include <ir/Operands.h>
+#include <util/Utils.h>
+
+#include <cassert>
+#include <stack>
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+using UsesType = cl_common::UsesType;
+
+TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr)
+  : _operands{operands}, _tensor_mgr{tensor_mgr}
+{
+  assert(_tensor_mgr);
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout backend_layout, TensorType type)
+{
+  assert(_tensor_mgr->constTensors().size() == 0);
+  assert(_tensor_mgr->nonconstTensors().size() == 0);
+
+  _uses_count_map[ind] = _operands.at(ind).getUses().size();
+
+  _tensor_info_map.emplace(ind, info);
+  _tensor_type_map.emplace(ind, type);
+
+  _tensor_layout_map.insert({ind, backend_layout});
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  _lifetime_seq.emplace_back(UsesType::FIRST, ind);
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  _lifetime_seq.emplace_back(UsesType::LAST, ind);
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { buildTensors(); }
+
+void TensorBuilder::allocate(void)
+{
+  auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
+
+  for (const auto &entry : lifetime_map)
+  {
+    const auto &use = entry.second;
+    auto use_type = use.first;
+    auto use_index = use.second;
+    assert(use_index.valid());
+    if (use_type == UsesType::FIRST)
+      _tensor_mgr->startLifetime(use_index);
+    else
+      _tensor_mgr->finishLifetime(use_index);
+  }
+
+  _tensor_mgr->allocateConsts();
+
+  // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
+  //      After refactoring BackendContext we can uncomment this
+  // assert(_tensor_info_map.size() ==
+  //       _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map +
+  //       _parent_map.size());
+  _tensor_mgr->allocateNonconsts();
+}
+
+void TensorBuilder::postFunctionPrepare(void) { _tensor_mgr->tryDeallocConstants(); }
+
+void TensorBuilder::buildTensors(void)
+{
+  assert(_tensor_mgr->constTensors().size() == 0);
+  assert(_tensor_mgr->nonconstTensors().size() == 0);
+  // Normal tensors
+  for (const auto &entry : _tensor_info_map)
+  {
+    const auto &ind = entry.first;
+    if (_parent_map.count(ind) > 0)
+      continue;
+    auto type = _tensor_type_map.at(ind);
+    const auto &info = entry.second;
+    _tensor_mgr->buildTensor(ind, info, type);
+  }
+}
+
+ir::OperandIndex TensorBuilder::addTensor(const ir::Shape &shape)
+{
+  return _tensor_mgr->addTensor(shape);
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.h b/runtime/onert/backend/gpu_cl/TensorBuilder.h
new file mode 100644
index 000000000..e0333fef5
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
+
+#include "TensorManager.h"
+
+#include <cl_common/LifetimeMap.h>
+#include <cl_common/ParentInfo.h>
+
+#include <ir/Operands.h>
+#include <ir/OperandIndexSequence.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+class TensorBuilder
+{
+public:
+  TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr);
+
+  /**
+   * @brief     Register tensor information to allocate on ACL-CL backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Tensor information
+   * @param[in] layout Tensor data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout, TensorType type);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void prepare();
+  void allocate();
+  void postFunctionPrepare();
+
+  TensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); }
+
+  void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
+  {
+    assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
+                                                                : true);
+    _uses_count_map[index] = num_uses;
+  }
+
+  void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map)
+  {
+    _parent_map = std::move(parent_map);
+  }
+
+  bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
+
+  /**
+   * @brief     Check child tensor is allocated as subtensor of parent tensor
+   * @param[in] parent  Index of parent
+   * @param[in] child   Index of child
+   * @return    @c true if child is allocated as subtensor of parent, otherwise @c false
+   */
+  bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
+
+private:
+  void buildTensors(void);
+  ir::OperandIndex findRootParent(ir::OperandIndex index);
+  ir::OperandIndex addTensor(const ir::Shape &shape);
+
+private:
+  const ir::Operands &_operands;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+  ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
+  ir::OperandIndexMap<TensorType> _tensor_type_map;
+  ir::OperandIndexMap<size_t> _uses_count_map;
+
+  std::unique_ptr<TensorManager> _tensor_mgr;
+
+  // for linear executor
+  cl_common::LifetimeSeq _lifetime_seq;
+
+  // Extra info for concat elimination
+  ir::OperandIndexMap<cl_common::ParentInfo> _parent_map;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.cc b/runtime/onert/backend/gpu_cl/TensorManager.cc
new file mode 100644
index 000000000..02e3441ca
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorManager.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorManager.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+TensorManager::TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr)
+  : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}
+{
+  // DO NOTHING
+}
+
+void TensorManager::allocateConsts(void) { _const_mgr->allocate(); }
+
+void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); }
+
+void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
+
+void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                TensorType type)
+{
+  assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
+
+  if (info.isConstant())
+  {
+    _const_mgr->buildTensor(ind, info, type);
+    _ind_to_mgr.insert({ind, *_const_mgr});
+  }
+  else
+  {
+    _nonconst_mgr->buildTensor(ind, info, type);
+    _ind_to_mgr.insert({ind, *_nonconst_mgr});
+  }
+}
+ir::OperandIndex TensorManager::addTensor(const ir::Shape &shape)
+{
+  auto ind = _nonconst_mgr->addTensor(shape);
+  _ind_to_mgr.insert({ind, *_nonconst_mgr});
+
+  return ind;
+}
+
+void TensorManager::startLifetime(const ir::OperandIndex &ind)
+{
+  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+  _ind_to_mgr.at(ind).startLifetime(ind);
+}
+
+void TensorManager::finishLifetime(const ir::OperandIndex &ind)
+{
+  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+  _ind_to_mgr.at(ind).finishLifetime(ind);
+}
+
+std::shared_ptr<operand::ICLTensor> TensorManager::at(const ir::OperandIndex &ind)
+{
+  if (_ind_to_mgr.find(ind) == _ind_to_mgr.end())
+    return nullptr;
+
+  auto &tensors = _ind_to_mgr.at(ind).tensors();
+  if (tensors.find(ind) != tensors.end())
+  {
+    return tensors.at(ind);
+  }
+
+  return nullptr;
+}
+
+ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::constTensors(void)
+{
+  return _const_mgr->tensors();
+}
+
+ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::nonconstTensors(void)
+{
+  return _nonconst_mgr->tensors();
+}
+
+void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+  for (auto &&it : _nonconst_mgr->tensors())
+    fn(it.first);
+
+  for (auto &&it : _const_mgr->tensors())
+    fn(it.first);
+}
+
+void TensorManager::tryDeallocConstants(void)
+{
+  // NYI
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.h b/runtime/onert/backend/gpu_cl/TensorManager.h
new file mode 100644
index 000000000..5b09ac130
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorManager.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
+
+#include "MemoryManager.h"
+
+#include "Utils.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+
+#include "ir/OperandInfo.h"
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class TensorManager
+{
+public:
+  TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr);
+
+  virtual ~TensorManager() = default;
+
+  void allocateConsts(void);
+  void allocateNonconsts(void);
+  void deallocateConsts(void);
+  void deallocateNonconsts(void);
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, TensorType type);
+  ir::OperandIndex addTensor(const ir::Shape &shape);
+
+  std::shared_ptr<operand::ICLTensor> findTensorAsParent(const ir::OperandIndex &ind);
+
+  void startLifetime(const ir::OperandIndex &ind);
+  void finishLifetime(const ir::OperandIndex &ind);
+
+  std::shared_ptr<operand::ICLTensor> at(const ir::OperandIndex &ind);
+
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &constTensors(void);
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &nonconstTensors(void);
+
+  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+  void tryDeallocConstants(void);
+
+private:
+  std::unique_ptr<MemoryManager> _const_mgr;
+  std::unique_ptr<MemoryManager> _nonconst_mgr;
+  ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
+};
+
+inline TensorManager *
+createTensorManager(tflite::gpu::cl::CLContext *context,
+                    tflite::gpu::CreateGpuModelInfo create_info,
+                    const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
+{
+  VERBOSE(createTensorManager) << "GPU-CL TensorManager" << std::endl;
+  return new TensorManager(new MemoryManager(context, create_info, environment),
+                           new MemoryManager(context, create_info, environment));
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorRegistry.h b/runtime/onert/backend/gpu_cl/TensorRegistry.h
new file mode 100644
index 000000000..be342e9cb
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorRegistry.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
+
+#include "TensorManager.h"
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+/**
+ * @brief Tensor registry class for gpu-cl backends
+ *
+ * This is implemented as a wrapper of TensorManager.
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+  TensorRegistry(TensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+  ITensor *getITensor(const ir::OperandIndex &ind) override { return _tensor_mgr->at(ind).get(); }
+
+  ITensor *getNativeITensor(const ir::OperandIndex &ind) override { return getITensor(ind); }
+
+  auto getClTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind).get(); }
+
+  ir::OperandIndex addNewClTensor(const ir::Shape &shape) { return _tensor_mgr->addTensor(shape); }
+
+private:
+  TensorManager *_tensor_mgr;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/gpu_cl/Utils.h b/runtime/onert/backend/gpu_cl/Utils.h
new file mode 100644
index 000000000..1953c0e43
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/Utils.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+
+#include "absl/status/status.h"
+
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+
+#include "ir/operation/BinaryArithmetic.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
+#include "ir/operation/Pool2D.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+inline tflite::gpu::HW ToHW(int32_t h, int32_t w)
+{
+  return tflite::gpu::HW(h > 0 ? h : 1, w > 0 ? w : 1);
+}
+
+template <typename AttrT>
+inline void UpdatePadding(const ir::PaddingType type, const tflite::gpu::BHWC &input_shape,
+                          AttrT *attr)
+{
+  if (type == ir::PaddingType::SAME)
+  {
+    attr->padding = CalculateSamePadding(input_shape, *attr);
+  }
+  else
+  {
+    attr->padding.prepended = tflite::gpu::HW(0, 0);
+    attr->padding.appended = tflite::gpu::HW(0, 0);
+  }
+}
+
+inline tflite::gpu::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      return tflite::gpu::PoolingType::AVERAGE;
+    case ir::operation::Pool2D::PoolType::MAX:
+      return tflite::gpu::PoolingType::MAX;
+    default:
+      throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
+  }
+}
+
+inline tflite::gpu::BHWC ToBHWC(ir::Shape shape)
+{
+  switch (shape.rank())
+  {
+    case 1:
+      // B layout
+      return tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
+      break;
+    case 2:
+      // BC layout
+      return tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
+      break;
+    case 3:
+      // BWC layout
+      return tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
+      break;
+    case 4:
+      // BHWC layout
+      return tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
+      break;
+    default:
+      break;
+  }
+  return tflite::gpu::BHWC();
+}
+
+inline bool CheckIfLinearConvertible(const ir::Shape *shape)
+{
+  if (shape->num_elements() <= 0)
+  {
+    return false;
+  }
+  for (int i = 0; i < shape->rank() - 1; ++i)
+  {
+    if (shape->dim(i) != 1)
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline tflite::gpu::OperationType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+  switch (arithmetic_type_ir)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+      return tflite::gpu::OperationType::ADD;
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+      return tflite::gpu::OperationType::SUB;
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+      return tflite::gpu::OperationType::MUL;
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+      return tflite::gpu::OperationType::DIV;
+    default:
+      throw std::runtime_error("Unsupported ArithmeticType");
+  }
+}
+
+inline tflite::gpu::OperationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      return tflite::gpu::OperationType::SIGMOID;
+    default:
+      throw std::runtime_error("Unsupported ElementwiseActivationType");
+  }
+}
+
+enum TensorType
+{
+  TENSOR_TYPE_VALID = 0,
+  TENSOR_TYPE_INPUT = 1,
+  TENSOR_TYPE_OUTPUT = 2,
+  TENSOR_TYPE_DELETE = 3
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
diff --git a/runtime/onert/backend/gpu_cl/gpu_cl.cc b/runtime/onert/backend/gpu_cl/gpu_cl.cc
new file mode 100644
index 000000000..b771d6d29
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/gpu_cl.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+#include <util/logging.h>
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+  VERBOSE(onert_backend_create) << "'gpu_cl' loaded\n";
+  return new onert::backend::gpu_cl::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+  VERBOSE(onert_backend_destroy) << "'gpu_cl' unloaded\n";
+  delete backend;
+}
+}
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
new file mode 100644
index 000000000..1b19b10f8
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CLTensor.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+
+using namespace tflite::gpu::cl;
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+namespace operand
+{
+
+CLTensor::CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+                   tflite::gpu::TensorDescriptor desc)
+  : ICLTensor{rank, type, shape, desc}, _tensor(std::make_shared<Tensor>())
+{
+}
+
+const tflite::gpu::cl::Tensor *CLTensor::handle() const { return _tensor.get(); }
+
+tflite::gpu::cl::Tensor *CLTensor::handle() { return _tensor.get(); }
+
+} // namespace operand
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.h b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
new file mode 100644
index 000000000..269551d0c
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__
+#define __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__
+
+#include "ICLTensor.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+namespace operand
+{
+
+class CLTensor : public ICLTensor
+{
+public:
+  CLTensor() = delete;
+
+public:
+  CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+           tflite::gpu::TensorDescriptor desc);
+
+public:
+  const tflite::gpu::cl::Tensor *handle() const override;
+  tflite::gpu::cl::Tensor *handle() override;
+
+public:
+  /** Set given buffer as the buffer of the tensor
+   *
+   * @note Ownership of the memory is not transferred to this object.
+   *       Thus management (allocate/free) should be done by the client.
+   *
+   * @param[in] host_ptr Storage to be used.
+   */
+  void setBuffer(void *host_ptr);
+
+private:
+  std::shared_ptr<tflite::gpu::cl::Tensor> _tensor;
+};
+
+} // namespace operand
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
new file mode 100644
index 000000000..1e61b9928
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ICLTensor.h"
+
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+namespace operand
+{
+
+using namespace tflite::gpu;
+using namespace tflite::gpu::cl;
+using namespace tflite::gpu::internal_tensor;
+
+void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
+{
+  if (total_size() == 0)
+    return;
+
+  fn(*this);
+}
+
+void ICLTensor::writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                                 std::shared_ptr<tflite::gpu::cl::Environment> environment)
+{
+  _environment = environment;
+  TensorObjectDef input_def;
+  input_def.dimensions.b = handle()->Batch();
+  input_def.dimensions.h = handle()->Height();
+  input_def.dimensions.w = handle()->Width();
+  input_def.dimensions.c = handle()->Channels();
+  input_def.object_def.data_layout = DataLayout::BHWC;
+  input_def.object_def.data_type = DataType::FLOAT32;
+  input_def.object_def.object_type = ObjectType::CPU_MEMORY;
+  input_def.object_def.user_provided = true;
+
+  TensorObjectDef permute_def = input_def;
+  permute_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+
+  const auto &dims = permute_def.dimensions;
+  const BHWC shape(dims.b, dims.h, dims.w, dims.c);
+  const TensorDescriptor desc{
+    permute_def.object_def.data_type,
+    ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
+    Layout::BHWC};
+  if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
+  {
+    throw std::runtime_error("Failed to AllocateTensorMemory");
+  }
+
+  TensorObjectDef output_def = permute_def;
+  output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+  output_def.object_def.data_type = handle()->GetDataType();
+  input_def.object_def.user_provided = false;
+
+  if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
+  {
+    throw std::runtime_error("Failed to make converter_to");
+  }
+  if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
+  {
+    throw std::runtime_error("Failed to make converter_from");
+  }
+}
+
+void ICLTensor::readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                                std::shared_ptr<tflite::gpu::cl::Environment> environment)
+{
+  _environment = environment;
+  TensorObjectDef input_def;
+  input_def.dimensions.b = handle()->Batch();
+  input_def.dimensions.h = handle()->Height();
+  input_def.dimensions.w = handle()->Width();
+  input_def.dimensions.c = handle()->Channels();
+  input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+  input_def.object_def.data_type = handle()->GetDataType();
+  input_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+  input_def.object_def.user_provided = false;
+
+  TensorObjectDef permute_def = input_def;
+  permute_def.object_def.data_layout = DataLayout::BHWC;
+  permute_def.object_def.data_type = DataType::FLOAT32;
+  permute_def.object_def.user_provided = true;
+
+  const auto &dims = permute_def.dimensions;
+  const BHWC shape(dims.b, dims.h, dims.w, dims.c);
+  const TensorDescriptor desc{
+    permute_def.object_def.data_type,
+    ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
+    Layout::BHWC};
+  if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
+  {
+    throw std::runtime_error("Failed to AllocateTensorMemory");
+  }
+
+  TensorObjectDef output_def = permute_def;
+  output_def.object_def.object_type = ObjectType::CPU_MEMORY;
+
+  if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
+  {
+    throw std::runtime_error("Failed to make converter_from");
+  }
+  if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
+  {
+    throw std::runtime_error("Failed to make converter_to");
+  }
+}
+
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool blocking)
+{
+  TensorObject input_obj = MakeReadableCpuMemory(
+    absl::MakeSpan(static_cast<const float *>(ptr), _info._shape.DimensionsProduct()));
+
+  TensorObject output_obj;
+
+  TensorObject permute_obj;
+  if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
+  {
+    permute_obj = OpenClTexture{_cl_memory.memory()};
+  }
+  else
+  {
+    permute_obj = OpenClBuffer{_cl_memory.memory()};
+  }
+
+  if (handle()->GetStorageType() == TensorStorageType::BUFFER)
+  {
+    output_obj = OpenClBuffer{handle()->GetMemoryPtr()};
+  }
+  else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
+  {
+    output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+  }
+  else
+  {
+    output_obj = OpenClTexture{handle()->GetMemoryPtr()};
+  }
+
+  if (!_converter_to->Convert(input_obj, permute_obj).ok())
+  {
+    throw std::runtime_error("Failed to write cl buffer from cpu memory");
+  }
+
+  if (blocking && !_environment->queue()->WaitForCompletion().ok())
+  {
+    throw std::runtime_error("Failed to WaitForCompletion");
+  }
+
+  if (!_converter_from->Convert(permute_obj, output_obj).ok())
+  {
+    throw std::runtime_error("Failed to change layout");
+  }
+}
+
+void ICLTensor::enqueueReadBuffer(void *ptr, bool blocking)
+{
+  TensorObject input_obj;
+
+  if (handle()->GetStorageType() == TensorStorageType::BUFFER)
+  {
+    input_obj = OpenClBuffer{handle()->GetMemoryPtr()};
+  }
+  else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
+  {
+    input_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+  }
+  else
+  {
+    input_obj = OpenClTexture{handle()->GetMemoryPtr()};
+  }
+
+  TensorObject permute_obj;
+  if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
+  {
+    permute_obj = OpenClTexture{_cl_memory.memory()};
+  }
+  else
+  {
+    permute_obj = OpenClBuffer{_cl_memory.memory()};
+  }
+
+  TensorObject output_obj =
+    MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _info._shape.DimensionsProduct()));
+
+  if (!_converter_from->Convert(input_obj, permute_obj).ok())
+  {
+    throw std::runtime_error("Failed to change layout");
+  }
+  if (!_converter_to->Convert(permute_obj, output_obj).ok())
+  {
+    throw std::runtime_error("Failed to read cl buffer");
+  }
+
+  if (blocking && !_environment->queue()->WaitForCompletion().ok())
+  {
+    throw std::runtime_error("Failed to WaitForCompletion");
+  }
+}
+
+} // namespace operand
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
new file mode 100644
index 000000000..47420a1c2
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__
+#define __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__
+
+#include <backend/ITensor.h>
+
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+
+#include "Utils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+namespace operand
+{
+
+struct TensorInfo
+{
+  tflite::gpu::BHWC _shape;
+  tflite::gpu::TensorDescriptor _desc;
+};
+
+class ICLTensor : public ITensor
+{
+public:
+  ICLTensor() = default;
+  ICLTensor(const ICLTensor &) = delete;
+  ICLTensor &operator=(const ICLTensor &) = delete;
+  ICLTensor(ICLTensor &&) = default;
+  ICLTensor &operator=(ICLTensor &&) = default;
+
+  ICLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+            tflite::gpu::TensorDescriptor desc)
+    : _rank{rank}, _type(type), _info{shape, desc}
+  {
+  }
+
+public:
+  uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); }
+  size_t total_size() const final { return _info._shape.DimensionsProduct() * sizeof(float); }
+  size_t calcOffset(const ir::Coordinates &) const final
+  {
+    throw std::runtime_error("ICLTensor::calcOffset() is not supported.");
+  }
+  ir::Layout layout() const final { return ir::Layout::NHWC; }
+  ir::DataType data_type() const final { return ir::DataType::FLOAT32; }
+  float data_scale() const override
+  {
+    throw std::runtime_error("ICLTensor::data_scale() is not supported.");
+  }
+  int32_t data_zero_point() const override
+  {
+    throw std::runtime_error("ICLTensor::data_zero_point() is not supported.");
+  }
+  const std::vector<float> &data_scales() const override
+  {
+    throw std::runtime_error("ICLTensor::data_scales() is not supported.");
+  }
+  const std::vector<int32_t> &data_zero_points() const override
+  {
+    throw std::runtime_error("ICLTensor::data_zero_points() is not supported.");
+  }
+  bool is_dynamic() const override { return false; }
+  ir::Shape getShape() const override
+  {
+    tflite::gpu::BHWC shape = _info._shape;
+    switch (_rank)
+    {
+      case 1:
+        return ir::Shape{shape.b};
+      case 2:
+        return ir::Shape{shape.b, shape.c};
+      case 3:
+        return ir::Shape{shape.b, shape.w, shape.c};
+      case 4:
+        return ir::Shape{shape.b, shape.h, shape.w, shape.c};
+      default:
+        break;
+    }
+    return ir::Shape{};
+  }
+  bool has_padding() const override { return false; }
+  void access(const std::function<void(ITensor &tensor)> &fn) final;
+  bool needMemoryMap() const final { return true; }
+  void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
+  void enqueueReadBuffer(void *ptr, bool blocking = true) final;
+
+  void writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                        std::shared_ptr<tflite::gpu::cl::Environment> environment);
+  void readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                       std::shared_ptr<tflite::gpu::cl::Environment> environment);
+
+  TensorType get_type() { return _type; }
+  TensorType set_type(TensorType type) { return _type = type; }
+  const TensorInfo get_info() { return _info; }
+
+public:
+  virtual const tflite::gpu::cl::Tensor *handle() const = 0;
+  virtual tflite::gpu::cl::Tensor *handle() = 0;
+
+private:
+protected:
+  size_t _rank; // Actual rank (reflects extended rank)
+  TensorType _type;
+  TensorInfo _info;
+  tflite::gpu::cl::CLMemory _cl_memory;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+  std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to;
+  std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from;
+};
+
+} // namespace operand
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__
diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h
new file mode 100644
index 000000000..4077965c4
--- /dev/null
+++ b/runtime/onert/backend/ruy/Backend.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_H__
+#define __ONERT_BACKEND_RUY_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    auto custom_kernel_builder = data.custom_kernel_builder;
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
+                                                            context->external_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_H__
diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc
new file mode 100644
index 000000000..1943f70c7
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap ret;
+
+  for (auto &&op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  basic::initConsts(*this);
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &&it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h
new file mode 100644
index 000000000..0dc30f557
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+  {
+  }
+
+  ITensorRegistry *genTensors() override;
+
+  FunctionMap genKernels() override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  void planTensors(const std::vector<onert::ir::OperationIndex> &order,
+                   const compiler::GraphLowerInfo &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/CMakeLists.txt b/runtime/onert/backend/ruy/CMakeLists.txt
new file mode 100644
index 000000000..206acbfbf
--- /dev/null
+++ b/runtime/onert/backend/ruy/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LIB_ONERT_BACKEND_RUY onert_backend_ruy)
+
+nnfw_find_package(Ruy REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy)
+
+set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_RUY}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib)
diff --git a/runtime/onert/backend/ruy/Config.cc b/runtime/onert/backend/ruy/Config.cc
new file mode 100644
index 000000000..fbeb2f7f0
--- /dev/null
+++ b/runtime/onert/backend/ruy/Config.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/Config.h b/runtime/onert/backend/ruy/Config.h
new file mode 100644
index 000000000..fa6415b14
--- /dev/null
+++ b/runtime/onert/backend/ruy/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONFIG_H__
+#define __ONERT_BACKEND_RUY_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Config : public IConfig
+{
+public:
+  std::string id() override { return "ruy"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return true; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONFIG_H__
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h
new file mode 100644
index 000000000..c73ae636e
--- /dev/null
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class ExternalContext
+{
+private:
+  static const int kDefaultNumThreadpoolThreads = 4;
+
+public:
+  ExternalContext() : _ruy_context(new ::ruy::Context)
+  {
+    setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+  }
+
+  void setMaxNumThreads(int max_num_threads)
+  {
+    const int target_num_threads =
+      max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+    _ruy_context->set_max_num_threads(target_num_threads);
+  }
+
+  ::ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+  const std::unique_ptr<::ruy::Context> _ruy_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
new file mode 100644
index 000000000..e5f2dbd39
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  auto ret = std::make_unique<exec::FunctionSequence>();
+
+  assert(_tensor_builder->dynamicTensorManager());
+  assert(_tensor_reg);
+
+  // Prepare to handle dynamic tensors later
+  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+  {
+    dyn_ctx->op = &_operations_ctx.at(ind);
+    dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+  }
+  ret->dynamic_tensor_ctx(dyn_ctx);
+
+  auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  assert(_return_fn); // _return_fn must have been generated
+  ret->append(std::move(_return_fn));
+
+  for (const auto &ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
+  {
+    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+    if (portable_tensor)
+    {
+      assert(portable_tensor->layout() == ir::Layout::NHWC);
+    }
+
+    auto tensor = _tensor_reg->getNativeTensor(ind);
+    if (tensor)
+    {
+      tensor->increase_ref();
+    }
+  }
+  return ret;
+}
+
+KernelGenerator::KernelGenerator(
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+  const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+  const std::shared_ptr<ExternalContext> &external_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
+{
+  // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+  using ir::operation::Conv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  const auto stride = node.param().stride;
+  const auto activation = node.param().activation;
+  const auto &param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
+  auto fn = std::make_unique<ops::ConvolutionLayer>();
+
+  if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
+  {
+    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
+                  param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
+                  stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+                  activation, ofm_tensor, _external_context);
+
+    _return_fn = std::move(fn);
+    return;
+  }
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+
+  const auto padding =
+    ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
+                _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+  const auto activation = node.param().activation;
+  const auto weights_format = node.param().weights_format;
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::FullyConnectedLayer>();
+
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
+                _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h
new file mode 100644
index 000000000..31551c46c
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/basic/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/basic/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  const ir::Layout _current_layout;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
+  std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h
new file mode 100644
index 000000000..867e4dedb
--- /dev/null
+++ b/runtime/onert/backend/ruy/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+
+#include "backend/basic/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using StaticTensorManager = basic::StaticTensorManager;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h
new file mode 100644
index 000000000..658086018
--- /dev/null
+++ b/runtime/onert/backend/ruy/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_H__
+#define __ONERT_BACKEND_RUY_TENSOR_H__
+
+#include <backend/basic/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_H__
diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h
new file mode 100644
index 000000000..15d4e5b29
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+
+#include <backend/basic/TensorBuilder.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using TensorBuilder = basic::TensorBuilder;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..1a2441082
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "../Tensor.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer()
+  : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+    _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+    _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+    _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
+{
+  // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer() = default;
+
+void ConvolutionLayer::convFloat32()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+  nnfw::ruy::ConvParams op_params;
+  op_params.padding_type = getPaddingType(_paddingType);
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  nnfw::ruy::Conv &kernel = *_conv_kernel;
+  kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+         getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
+         getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
+         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+         _external_context->ruy_context());
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                                 const IPortableTensor *bias, const ir::PaddingType paddingType,
+                                 const uint32_t paddingLeft, const uint32_t paddingRight,
+                                 const uint32_t paddingTop, const uint32_t paddingBottom,
+                                 const uint32_t strideWidth, const uint32_t strideHeight,
+                                 const uint32_t dilationWidthFactor,
+                                 const uint32_t dilationHeightFactor,
+                                 const ir::Activation activation, IPortableTensor *output,
+                                 const std::shared_ptr<ExternalContext> &external_context)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _paddingType = paddingType;
+  _paddingLeft = paddingLeft;
+  _paddingRight = paddingRight;
+  _paddingTop = paddingTop;
+  _paddingBottom = paddingBottom;
+  _strideWidth = strideWidth;
+  _strideHeight = strideHeight;
+  _dilationWidthFactor = dilationWidthFactor;
+  _dilationHeightFactor = dilationHeightFactor;
+  _activation = activation;
+  _output = output;
+  _external_context = external_context;
+}
+
+void ConvolutionLayer::run()
+{
+  prepare();
+
+  if (_input->is_dynamic() || _kernel->is_dynamic())
+  {
+    const auto ifm_shape = _input->getShape().asFeature(_input->layout());
+    const auto ofm_shape = _output->getShape().asFeature(_input->layout());
+    // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+    const auto ker_shape = _kernel->getShape();
+    const auto ker_height = ker_shape.dim(1);
+    const auto ker_width = ker_shape.dim(2);
+
+    ir::Stride stride;
+    stride.vertical = _strideWidth;
+    stride.horizontal = _strideWidth;
+
+    ir::Padding param_padding;
+    param_padding.type = _paddingType;
+    param_padding.param.left = _paddingLeft;
+    param_padding.param.right = _paddingRight;
+    param_padding.param.top = _paddingTop;
+    param_padding.param.bottom = _paddingBottom;
+
+    const auto padding =
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           _dilationWidthFactor, _dilationHeightFactor);
+
+    _paddingLeft = padding.left;
+    _paddingRight = padding.right;
+    _paddingTop = padding.top;
+    _paddingBottom = padding.bottom;
+  }
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    convFloat32();
+  }
+  else
+  {
+    throw std::runtime_error{"Conv: unsupported data type"};
+  }
+}
+
+void ConvolutionLayer::prepare()
+{
+  if (_prepare)
+    return;
+
+  nnfw::ruy::Conv &kernel = *_conv_kernel;
+  if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
+  {
+    kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
+                   _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+  }
+  _prepare = true;
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.h b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..a55387b93
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <ruy/operation/Conv.h>
+#include <exec/IFunction.h>
+#include <functional>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class ConvolutionLayer : public ::onert::exec::IFunction
+{
+public:
+  ConvolutionLayer();
+  ~ConvolutionLayer();
+
+public:
+  void convFloat32();
+
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType _paddingType,
+                 const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideWidth,
+                 const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+                 const uint32_t dilationHeightFactor, const ir::Activation activation,
+                 IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _paddingType;
+  uint32_t _paddingLeft;
+  uint32_t _paddingTop;
+  uint32_t _paddingRight;
+  uint32_t _paddingBottom;
+
+  uint32_t _strideWidth;
+  uint32_t _strideHeight;
+  uint32_t _dilationWidthFactor;
+  uint32_t _dilationHeightFactor;
+
+  ir::Activation _activation;
+
+  std::unique_ptr<nnfw::ruy::Conv> _conv_kernel;
+
+  bool _prepare;
+
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..9c9f31179
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "../Tensor.h"
+#include <ruy/operation/FullyConnected.h>
+#include <ruy/TensorUtils.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+  : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+    _activation(ir::Activation::NONE), _external_context(nullptr)
+{
+  // DO NOTHING
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::fullyConnectedFloat32()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+  nnfw::ruy::FullyConnectedParams op_params;
+
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  op_params.activation = convertActivationType(_activation);
+  op_params.lhs_cacheable = _weights->is_constant();
+  op_params.rhs_cacheable = _input->is_constant();
+
+  nnfw::ruy::FullyConnected(
+    op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+    getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+    getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+    getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+    _external_context->ruy_context());
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+                                    const IPortableTensor *bias, ir::Activation activation,
+                                    ir::FullyConnectedWeightsFormat weights_format,
+                                    IPortableTensor *output,
+                                    const std::shared_ptr<ExternalContext> &external_context)
+{
+  UNUSED_RELEASE(weights_format);
+  _input = input;
+  _weights = weights;
+  _bias = bias;
+  _activation = activation;
+  _output = output;
+  _external_context = external_context;
+}
+
+void FullyConnectedLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    fullyConnectedFloat32();
+  }
+  else
+  {
+    throw std::runtime_error{"FullyConnected: unsupported data type"};
+  }
+}
+
+void FullyConnectedLayer::prepare()
+{
+  if (_bias && _bias->is_constant())
+  {
+    const int bias_size = getTensorShape(_bias).FlatSize();
+    if (nnfw::ruy::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+    {
+      _bias = nullptr;
+    }
+  }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..33d560f0b
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public ::onert::exec::IFunction
+{
+public:
+  FullyConnectedLayer();
+  ~FullyConnectedLayer();
+
+public:
+  void fullyConnectedFloat32();
+
+  void configure(const IPortableTensor *input, const IPortableTensor *weights,
+                 const IPortableTensor *bias, ir::Activation activation,
+                 ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output,
+                 const std::shared_ptr<ExternalContext> &external_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_weights;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::Activation _activation;
+
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.cc b/runtime/onert/backend/ruy/ops/OperationUtils.cc
new file mode 100644
index 000000000..929107b1a
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
+{
+  switch (ir_padding_type)
+  {
+    case ir::PaddingType::EXPLICIT:
+      return nnfw::ruy::PaddingType::kNone;
+    case ir::PaddingType::SAME:
+      return nnfw::ruy::PaddingType::kSame;
+    case ir::PaddingType::VALID:
+      return nnfw::ruy::PaddingType::kValid;
+    default:
+      throw std::runtime_error("Wrong padding type.");
+      break;
+  }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h
new file mode 100644
index 000000000..716400c1f
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+
+#include <backend/IPortableTensor.h>
+#include <ir/DataType.h>
+#include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
+
+#include <ruy/Shape.h>
+#include <ruy/Types.h>
+
+#include <limits>
+
+using OperandType = onert::ir::DataType;
+using namespace onert::util;
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor)
+{
+  if (tensor == nullptr)
+    return nnfw::ruy::Shape();
+
+  const ir::Shape &shape = tensor->get_info().shape();
+
+  assert(tensor->layout() == ir::Layout::NHWC);
+
+  auto rank = shape.rank();
+  nnfw::ruy::Shape ret(rank);
+  auto data = ret.DimsData();
+  for (int i = 0; i < rank; ++i)
+  {
+    data[i] = shape.dim(i);
+  }
+  return ret;
+}
+
+inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation)
+{
+  switch (activation)
+  {
+    case ir::Activation::NONE:
+      return nnfw::ruy::FusedActivationFunctionType::kNone;
+    case ir::Activation::RELU:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu;
+    case ir::Activation::RELU1:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu1;
+    case ir::Activation::RELU6:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu6;
+    case ir::Activation::TANH:
+      return nnfw::ruy::FusedActivationFunctionType::kTanh;
+    case ir::Activation::SIGMOID:
+      return nnfw::ruy::FusedActivationFunctionType::kSigmoid;
+    default:
+      throw std::runtime_error{"RUY backend: Cannot convert activation type"};
+  }
+}
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/ruy/ruy.cc b/runtime/onert/backend/ruy/ruy.cc
new file mode 100644
index 000000000..4f33590e9
--- /dev/null
+++ b/runtime/onert/backend/ruy/ruy.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/train/Backend.h b/runtime/onert/backend/train/Backend.h
new file mode 100644
index 000000000..9b8d50a56
--- /dev/null
+++ b/runtime/onert/backend/train/Backend.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_BACKEND_H__
+#define __ONERT_BACKEND_TRAIN_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+#include <backend/train/ITrainableBackend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+// TODO Unify TensorBuilder
+// TODO Unify TensorRegistry
+class Backend : public ::onert::backend::Backend, public backend::train::ITrainableBackend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    return std::make_unique<DummyBackendContext>(this, std::move(data));
+  }
+
+  std::unique_ptr<backend::train::TrainableBackendContext>
+  newContext(backend::train::TrainableContextData &&tdata) const override
+  {
+    const auto &tgraph = *tdata.tgraph;
+    auto tr = std::make_shared<TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr, "Bump");
+    auto tdata_ptr = std::make_unique<backend::train::TrainableContextData>(std::move(tdata));
+    auto context = std::make_unique<train::BackendContext>(this, std::move(tdata_ptr), tr, tb);
+
+    context->kernel_gen = std::make_shared<train::KernelGenerator>(
+      tgraph, tr, context->external_context(), context->data()->optimizer);
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_BACKEND_H__
diff --git a/runtime/onert/backend/train/BackendContext.cc b/runtime/onert/backend/train/BackendContext.cc
new file mode 100644
index 000000000..3ee9a7233
--- /dev/null
+++ b/runtime/onert/backend/train/BackendContext.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+
+#include <backend/basic/train/TrainableBackendContextHelpers.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+backend::ITensorRegistry *BackendContext::genTensors()
+{
+  return basic::train::genTensors(*this, _tensor_builder);
+}
+
+backend::train::ITensorRegistry *BackendContext::genTrainingTensors()
+{
+  const ir::train::TrainableGraph &tgraph = *trainable_graph();
+  auto tensor_builder = _tensor_builder;
+
+  tgraph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (external_operands().contains(ind))
+      return;
+    // NOTE Assuming there is no layout changes (Always assume NHWC or UNKNOWN)
+    assert(tgraph.layout() != ir::Layout::NCHW);
+
+    // TODO Different shape of deriv tensor
+    ir::OperandInfo backend_info{obj.shape(), obj.typeInfo(), obj.info().memAllocType(),
+                                 obj.isConstant()};
+    tensor_builder->registerBackwardTensorInfo(ind, backend_info, ir::Layout::NHWC);
+  });
+
+  // TODO Plan tensor builds to reduce peak memory usage
+  tgraph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+    if (tensor_builder->isRegisteredBackward(ind))
+      tensor_builder->notifyBackwardFirstUse(ind);
+  });
+
+  tensor_builder->allocateBackward();
+
+  return _tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels()
+{
+  train::FunctionMap ret;
+
+  for (const auto &op_ind : _tdata->op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  // Initialize TrainableTensors
+  trainable_graph()->operands().iterate(
+    [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      if (external_operands().contains(ind) || !operand.isConstant())
+        return;
+
+      auto tensor = tensor_registry()->getNativeITensor(ind);
+      assert(tensor != nullptr);
+
+      VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
+
+      auto data = operand.shareData();
+      assert(data && data->base());
+      auto trainable_tensor = dynamic_cast<TrainableTensor *>(tensor);
+
+      if (trainable_tensor == nullptr)
+        throw std::runtime_error{"This tensor is not trainable tensor"};
+
+      trainable_tensor->fillBuffer(data);
+    });
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::train::TrainableGraph &>(*_tdata->tgraph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  // TODO Enable
+  // for (auto &&it : ret)
+  // {
+  //   auto &fn_seq = it.second;
+  //   fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  // }
+
+  return ret;
+}
+
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/BackendContext.h b/runtime/onert/backend/train/BackendContext.h
new file mode 100644
index 000000000..b5b572b35
--- /dev/null
+++ b/runtime/onert/backend/train/BackendContext.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_TRAIN_BACKEND_CONTEXT_H__
+
+#include <backend/train/TrainableBackendContext.h>
+
+#include "ExternalContext.h"
+#include "KernelGenerator.h"
+#include "TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+// TODO Remove this class if ExecutorFactory creates trainable context only once instead of
+// replacing BackendContext
+class DummyBackendContext : public backend::BackendContext
+{
+public:
+  DummyBackendContext(const Backend *backend, ContextData &&data,
+                      std::shared_ptr<backend::ITensorRegistry> tensor_registry = nullptr)
+    : backend::BackendContext(backend, std::move(data), tensor_registry)
+  {
+  }
+
+  backend::ITensorRegistry *genTensors() override { return nullptr; }
+
+  backend::FunctionMap genKernels() override { return backend::FunctionMap{}; }
+};
+
+// TODO Unify TensorBuilder
+// TODO Unify TensorRegistry
+class BackendContext : public onert::backend::train::TrainableBackendContext
+{
+public:
+  BackendContext(const ITrainableBackend *backend, std::unique_ptr<TrainableContextData> &&tdata,
+                 std::shared_ptr<backend::train::ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::train::TrainableBackendContext(backend, std::move(tdata), tensor_registry),
+      kernel_gen{kernel_gen},
+      _external_context(new ExternalContext), _tensor_builder{tensor_builder}
+  {
+  }
+
+  backend::ITensorRegistry *genTensors() override;
+  backend::train::ITensorRegistry *genTrainingTensors() override;
+
+public:
+  FunctionMap genKernels() override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+
+private:
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/train/CMakeLists.txt b/runtime/onert/backend/train/CMakeLists.txt
new file mode 100644
index 000000000..fd50685b4
--- /dev/null
+++ b/runtime/onert/backend/train/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(LIB_ONERT_BACKEND_TRAIN onert_backend_train)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_TRAIN} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_TRAIN} PRIVATE ${LIB_ONERT_BACKEND_CPU})
+target_link_libraries(${LIB_ONERT_BACKEND_TRAIN} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_TRAIN} PRIVATE nnfw_lib_cker nnfw_lib_misc)
+target_link_libraries(${LIB_ONERT_BACKEND_TRAIN} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_TRAIN} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_TRAIN} PROPERTIES OUTPUT_NAME backend_train)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_TRAIN} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_TRAIN}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_TRAIN} DESTINATION lib)
diff --git a/runtime/onert/backend/train/Config.cc b/runtime/onert/backend/train/Config.cc
new file mode 100644
index 000000000..57a68adc4
--- /dev/null
+++ b/runtime/onert/backend/train/Config.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/Config.h b/runtime/onert/backend/train/Config.h
new file mode 100644
index 000000000..c8cf52b4d
--- /dev/null
+++ b/runtime/onert/backend/train/Config.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_CONFIG_H__
+#define __ONERT_BACKEND_TRAIN_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+class Config : public IConfig
+{
+public:
+  std::string id() override { return "train"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_CONFIG_H__
diff --git a/runtime/onert/backend/train/ExternalContext.h b/runtime/onert/backend/train/ExternalContext.h
new file mode 100644
index 000000000..c24010ea2
--- /dev/null
+++ b/runtime/onert/backend/train/ExternalContext.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_TRAIN_EXTERNAL_CONTEXT_H__
+
+#include <ExternalContext.h> // From cpu backend
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+using ExternalContext = cpu::ExternalContext;
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/train/KernelGenerator.cc b/runtime/onert/backend/train/KernelGenerator.cc
new file mode 100644
index 000000000..d3114e822
--- /dev/null
+++ b/runtime/onert/backend/train/KernelGenerator.cc
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/ElementwiseActivationLayer.h"
+#include "ops/FullyConnectedLayer.h"
+#include "ops/LossLayer.h"
+#include "ops/GradientApplier.h"
+#include "ops/PoolLayer.h"
+#include "ops/ReshapeLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+namespace
+{
+ops::ElementwiseActivationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      return ops::ElementwiseActivationType::kReLU;
+    default:
+      throw std::runtime_error("train KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::LossType convertLossType(ir::operation::Loss::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::Loss::Type::MEAN_SQUARED_ERROR:
+      return ops::LossType::kMSE;
+    default:
+      throw std::runtime_error("train KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+  switch (type_ir)
+  {
+    // TODO Implement AVG PoolType
+    case ir::operation::Pool2D::PoolType::MAX:
+      return ops::PoolType::kMax;
+    default:
+      throw std::runtime_error("train KernelGenerator : Not supported operation yet");
+  }
+}
+
+std::unique_ptr<ops::GradientApplier>
+generateGradientApplier(const std::shared_ptr<exec::train::optimizer::Optimizer> optimizer,
+                        const IPortableTensor *gradient, ITrainableTensor *trainable)
+{
+  auto update_fn = std::make_unique<ops::GradientApplier>();
+  update_fn->configure(optimizer, gradient, trainable);
+  return update_fn;
+}
+} // namespace
+
+std::unique_ptr<exec::train::TrainableFnSequence> KernelGenerator::generate(ir::OperationIndex idx)
+{
+  auto ret = std::make_unique<exec::train::TrainableFnSequence>();
+
+  const auto &op = _tgraph.operation(idx);
+  op.accept(*this);
+  assert(_return_fn);
+  ret->append(std::move(_return_fn));
+
+  for (auto &&update_fn : _update_funcs)
+    ret->append(std::move(update_fn));
+  _update_funcs.clear();
+
+  for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
+  {
+    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+    if (portable_tensor)
+    {
+      assert(portable_tensor->layout() == ir::Layout::NHWC);
+    }
+    auto tensor = _tensor_reg->getNonConstTensor(ind);
+    if (tensor)
+    {
+      tensor->increase_ref();
+    }
+  }
+  return ret;
+}
+
+KernelGenerator::KernelGenerator(const ir::train::TrainableGraph &tgraph,
+                                 const std::shared_ptr<TensorRegistry> &tensor_reg,
+                                 const std::shared_ptr<ExternalContext> &external_context,
+                                 std::shared_ptr<exec::train::optimizer::Optimizer> optimizer)
+  : backend::train::KernelGeneratorBase{tgraph}, _current_layout{tgraph.layout()},
+    _tensor_reg{tensor_reg},
+    _external_context(external_context), _optimizer{optimizer}, _update_funcs{}
+{
+  // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::train::operation::Conv2D &node)
+{
+  // TODO Generate kernel
+
+  // Generate GradientApplier
+  const auto ker_index{node.getInputs().at(ir::train::operation::Conv2D::Input::KERNEL)};
+
+  auto grad_tensor = _tensor_reg->getGradientTensor(ker_index);
+  auto ker_tensor = _tensor_reg->getTrainableTensor(ker_index);
+
+  auto update_fn = std::make_unique<ops::GradientApplier>();
+
+  update_fn->configure(_optimizer, grad_tensor, ker_tensor);
+
+  _update_funcs.emplace_back(generateGradientApplier(_optimizer, grad_tensor, ker_tensor));
+}
+
+void KernelGenerator::visit(const ir::train::operation::ElementwiseActivation &node)
+{
+  using ir::train::operation::ElementwiseActivation;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ElementwiseActivation::Input::INPUT)};
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+
+  auto deriv_input_tensor = _tensor_reg->getDerivativeTensor(input_index);
+  auto deriv_output_tensor = _tensor_reg->getDerivativeTensor(output_index);
+
+  auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
+
+  fn->configure(input_tensor, output_tensor, deriv_input_tensor, deriv_output_tensor,
+                node.param().alpha, node.param().beta,
+                convertElementwiseActivationType(node.param().op_type));
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::train::operation::FullyConnected &node)
+{
+  using ir::train::operation::FullyConnected;
+
+  const auto out_index{node.getOutputs().at(0)};
+  const auto in_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weights_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+
+  auto out_tensor = _tensor_reg->getPortableTensor(out_index);
+  auto in_tensor = _tensor_reg->getPortableTensor(in_index);
+  auto weights_tensor = _tensor_reg->getTrainableTensor(weights_index);
+  auto bias_tensor = _tensor_reg->getTrainableTensor(bias_index);
+
+  auto out_deriv_tensor = _tensor_reg->getDerivativeTensor(out_index);
+  auto in_deriv_tensor = _tensor_reg->getDerivativeTensor(in_index);
+  auto weights_grad_tensor = _tensor_reg->getGradientTensor(weights_index);
+  auto bias_grad_tensor = _tensor_reg->getGradientTensor(bias_index);
+
+  // Generate kernel
+  const auto activation = node.param().activation;
+  const auto weights_format = node.param().weights_format;
+
+  auto fn = std::make_unique<ops::FullyConnectedLayer>();
+
+  fn->configure(in_tensor, weights_tensor, bias_tensor, out_tensor, in_deriv_tensor,
+                weights_grad_tensor, bias_grad_tensor, out_deriv_tensor, activation, weights_format,
+                _external_context);
+
+  _return_fn = std::move(fn);
+
+  // Generate GradientAppliers
+  if (bias_tensor)
+    _update_funcs.emplace_back(generateGradientApplier(_optimizer, bias_grad_tensor, bias_tensor));
+  _update_funcs.emplace_back(
+    generateGradientApplier(_optimizer, weights_grad_tensor, weights_tensor));
+}
+
+void KernelGenerator::visit(const ir::train::operation::Loss &node)
+{
+  using ir::train::operation::Loss;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto y_pred_index{node.getInputs().at(Loss::Y_PRED)};
+  const auto y_true_index{node.getInputs().at(Loss::Y_TRUE)};
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto y_pred_tensor = _tensor_reg->getPortableTensor(y_pred_index);
+  auto y_true_tensor = _tensor_reg->getPortableTensor(y_true_index);
+
+  auto deriv_y_pred_tensor = _tensor_reg->getDerivativeTensor(y_pred_index);
+  auto fn = std::make_unique<ops::LossLayer>();
+
+  fn->configure(y_pred_tensor, y_true_tensor, output_tensor, deriv_y_pred_tensor,
+                convertLossType(node.param().op_type));
+
+  _return_fn = std::move(fn);
+
+  UNUSED_RELEASE(convertPoolType);
+}
+
+void KernelGenerator::visit(const ir::train::operation::Reshape &node)
+{
+  using ir::train::operation::Reshape;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+
+  auto output_deriv_tensor = _tensor_reg->getDerivativeTensor(output_index);
+  auto input_deriv_tensor = _tensor_reg->getDerivativeTensor(input_index);
+
+  // optional 2nd input
+  IPortableTensor *shape_tensor = nullptr;
+
+  if (node.getInputs().size() == 2)
+  {
+    const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
+    shape_tensor = _tensor_reg->getPortableTensor(shape_index);
+  }
+
+  auto fn = std::make_unique<ops::ReshapeLayer>();
+
+  fn->configure(input_tensor, shape_tensor, output_tensor, input_deriv_tensor, output_deriv_tensor);
+  _return_fn = std::move(fn);
+}
+
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/KernelGenerator.h b/runtime/onert/backend/train/KernelGenerator.h
new file mode 100644
index 000000000..660dc5d70
--- /dev/null
+++ b/runtime/onert/backend/train/KernelGenerator.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_TRAIN_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "backend/basic/TensorRegistry.h"
+#include "TensorBuilder.h"
+#include "Tensor.h"
+
+#include <backend/train/KernelGeneratorBase.h>
+#include <exec/train/IGradientApplier.h>
+#include <exec/train/optimizer/Optimizer.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+// TODO Unify TensorRegistry
+class KernelGenerator : public backend::train::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::train::TrainableGraph &tgraph,
+                  const std::shared_ptr<TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<ExternalContext> &external_context,
+                  std::shared_ptr<exec::train::optimizer::Optimizer> optimizer);
+
+  std::unique_ptr<exec::train::TrainableFnSequence> generate(ir::OperationIndex op_ind) override;
+
+  void visit(const ir::train::operation::Conv2D &) override;
+  void visit(const ir::train::operation::ElementwiseActivation &) override;
+  void visit(const ir::train::operation::FullyConnected &) override;
+  void visit(const ir::train::operation::Loss &) override;
+  void visit(const ir::train::operation::Reshape &node) override;
+
+private:
+  ir::Layout _current_layout;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  const std::shared_ptr<ExternalContext> _external_context;
+  std::shared_ptr<exec::train::optimizer::Optimizer> _optimizer;
+  std::vector<std::unique_ptr<exec::train::IGradientApplier>> _update_funcs;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/train/MemoryManager.h b/runtime/onert/backend/train/MemoryManager.h
new file mode 100644
index 000000000..6ac57996f
--- /dev/null
+++ b/runtime/onert/backend/train/MemoryManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_TRAIN_MEMORY_MANAGER_H__
+
+#include <backend/basic/MemoryManager.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+using MemoryManager = backend::basic::MemoryManager;
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/train/Tensor.h b/runtime/onert/backend/train/Tensor.h
new file mode 100644
index 000000000..34a3cc191
--- /dev/null
+++ b/runtime/onert/backend/train/Tensor.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_TENSOR_H__
+#define __ONERT_BACKEND_TRAIN_TENSOR_H__
+
+#include <backend/basic/Tensor.h>
+#include <backend/basic/train/TrainableTensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+// NOTE This class can be replaced with basic::Tensor if this backend supports dynamic tensors.
+class Tensor : public basic::Tensor
+{
+public:
+  Tensor() = delete;
+
+public:
+  Tensor(const ir::OperandInfo &info, const ir::Layout layout)
+    : basic::Tensor{info, layout, nullptr}
+  {
+    // DO NOTHING
+  }
+
+public:
+  bool applyShape(const ir::Shape &) override { return false; }
+};
+
+using TrainableTensor = basic::train::TrainableTensor;
+using DerivativeTensor = Tensor;
+using GradientTensor = Tensor;
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_TENSOR_H__
diff --git a/runtime/onert/backend/train/TensorBuilder.cc b/runtime/onert/backend/train/TensorBuilder.cc
new file mode 100644
index 000000000..99e06d3a4
--- /dev/null
+++ b/runtime/onert/backend/train/TensorBuilder.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
+                             const std::string planner_id)
+  : _tensor_reg{tensor_reg}, _tensor_mgr{new TensorManager(tensor_reg, planner_id)}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &index, const ir::OperandInfo &info,
+                                       ir::Layout layout)
+{
+  _tensor_info_map.emplace(index, info);
+  _as_constants[index] = info.isConstant();
+
+  // Train backend supports only one layout as NHWC
+  assert(layout == ir::Layout::NHWC);
+  assert(!info.isDynamic());
+
+  // NOTE For now, whether or not to build operands to trainable tensor depends on whether
+  //      the corresponding operand is constant.
+  if (_as_constants[index])
+  {
+    auto tensor = std::make_unique<TrainableTensor>(info, layout);
+    _tensor_reg->setTrainableTensor(index, std::move(tensor));
+  }
+  else
+  {
+    auto tensor = std::make_unique<Tensor>(info, layout);
+    _tensor_reg->setNonConstTensor(index, std::move(tensor));
+  }
+}
+
+void TensorBuilder::registerBackwardTensorInfo(const ir::OperandIndex &index,
+                                               const ir::OperandInfo &info, ir::Layout layout)
+{
+  _backward_tensor_info_map.emplace(index, info);
+
+  // Train backend supports only one layout as NHWC
+  assert(layout == ir::Layout::NHWC);
+  assert(!info.isDynamic());
+
+  // NOTE For now, whether or not to build operands to trainable tensor depends on whether
+  //      the corresponding operand is constant.
+  assert(_as_constants[index] == info.isConstant());
+  if (_as_constants[index])
+  {
+    auto tensor = std::make_unique<GradientTensor>(info, layout);
+    _tensor_reg->setGradientTensor(index, std::move(tensor));
+  }
+  else
+  {
+    auto tensor = std::make_unique<DerivativeTensor>(info, layout);
+    _tensor_reg->setDerivativeTensor(index, std::move(tensor));
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &index)
+{
+  // TODO Support momory plan
+  if (_as_constants[index])
+  {
+    _tensor_mgr->claimTrainablePlan(index);
+  }
+  else
+  {
+    _tensor_mgr->claimNonConstPlan(index);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &)
+{
+  // TODO Support momory plan
+}
+
+void TensorBuilder::notifyBackwardFirstUse(const ir::OperandIndex &index)
+{
+  // TODO Support momory plan
+  if (_as_constants[index])
+  {
+    _tensor_mgr->claimGradientPlan(index);
+  }
+  else
+  {
+    _tensor_mgr->claimDerivativePlan(index);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &index) const
+{
+  return _tensor_info_map.find(index) != _tensor_info_map.end();
+}
+
+bool TensorBuilder::isRegisteredBackward(const ir::OperandIndex &index) const
+{
+  return _backward_tensor_info_map.find(index) != _backward_tensor_info_map.end();
+}
+
+void TensorBuilder::allocate(void)
+{
+  _tensor_mgr->allocateNonConstTensors();
+  _tensor_mgr->allocateTrainableTensors();
+}
+
+void TensorBuilder::allocateBackward(void)
+{
+  _tensor_mgr->allocateDerivativeTensors();
+  _tensor_mgr->allocateGradientTensors();
+}
+
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/TensorBuilder.h b/runtime/onert/backend/train/TensorBuilder.h
new file mode 100644
index 000000000..d0738fe68
--- /dev/null
+++ b/runtime/onert/backend/train/TensorBuilder.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_TRAIN_TENSOR_BUILDER_H__
+
+#include "TensorManager.h"
+#include "TensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+// TODO Support dynamic tensors
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
+
+  /**
+   * @brief     Register tensor information to allocate on train backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  /**
+   * @brief     Register informations of tensor used only in backward to allocate on train backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerBackwardTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                  ir::Layout backend_layout);
+
+  // TODO Support memory plan of all tensors
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+  void notifyBackwardFirstUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+  bool isRegisteredBackward(const ir::OperandIndex &) const;
+
+  void allocate(void);
+  void allocateBackward(void);
+
+private:
+  const std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::unique_ptr<TensorManager> _tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+  ir::OperandIndexMap<ir::OperandInfo> _backward_tensor_info_map;
+  ir::OperandIndexMap<bool> _as_constants;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/train/TensorManager.cc b/runtime/onert/backend/train/TensorManager.cc
new file mode 100644
index 000000000..50144a78f
--- /dev/null
+++ b/runtime/onert/backend/train/TensorManager.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorManager.h"
+
+#include <util/logging.h>
+
+namespace
+{
+
+using namespace onert;
+
+template <typename Tensor>
+void allocateMemory(backend::train::MemoryManager *mgr,
+                    const ir::OperandIndexMap<std::unique_ptr<Tensor>> &tensors,
+                    const std::string tensor_type)
+{
+  mgr->allocate();
+
+  for (auto &&pair : tensors)
+  {
+    const auto &index = pair.first;
+    auto tensor = pair.second.get();
+    assert(!tensor->is_dynamic());
+
+    auto *buffer = mgr->getBuffer(index);
+    tensor->setBuffer(buffer);
+    VERBOSE(TensorManager) << tensor_type << index << " : " << static_cast<void *>(buffer)
+                           << std::endl;
+  }
+}
+
+} // namespace
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+TensorManager::TensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                             const std::string planner_id)
+  : _nonconst_mgr{new MemoryManager(planner_id)}, _trainable_mgr{new MemoryManager(planner_id)},
+    _derivative_mgr{new MemoryManager(planner_id)},
+    _gradient_mgr{new MemoryManager(planner_id)}, _tensors{reg}
+{
+  // DO NOTHING
+}
+
+void TensorManager::allocateNonConstTensors()
+{
+  allocateMemory(_nonconst_mgr.get(), _tensors->nonconst_tensors(),
+                 std::string{"          TENSOR "});
+}
+
+void TensorManager::allocateTrainableTensors()
+{
+  allocateMemory(_trainable_mgr.get(), _tensors->trainable_tensors(),
+                 std::string{"TRAINABLE TENSOR "});
+}
+
+void TensorManager::allocateDerivativeTensors()
+{
+  allocateMemory(_derivative_mgr.get(), _tensors->derivative_tensors(),
+                 std::string{"DERIVATIVE TENSOR "});
+}
+
+void TensorManager::allocateGradientTensors()
+{
+  allocateMemory(_gradient_mgr.get(), _tensors->gradient_tensors(),
+                 std::string{"GRADIENT TENSOR "});
+}
+
+void TensorManager::claimNonConstPlan(const ir::OperandIndex &index)
+{
+  auto tensor = _tensors->getNonConstTensor(index);
+  assert(tensor && !tensor->is_dynamic());
+
+  auto size = tensor->total_size();
+  _nonconst_mgr->claimPlan(index, size);
+}
+
+void TensorManager::releaseNonConstPlan(const ir::OperandIndex &index)
+{
+  assert(_tensors->getNonConstTensor(index) && !_tensors->getNonConstTensor(index)->is_dynamic());
+
+  _nonconst_mgr->releasePlan(index);
+}
+
+void TensorManager::claimTrainablePlan(const ir::OperandIndex &index)
+{
+  auto tensor = _tensors->getTrainableTensor(index);
+  assert(tensor && !tensor->is_dynamic());
+
+  auto size = tensor->total_size();
+  _trainable_mgr->claimPlan(index, size);
+}
+
+void TensorManager::releaseTrainablePlan(const ir::OperandIndex &index)
+{
+  assert(_tensors->getTrainableTensor(index) && !_tensors->getTrainableTensor(index)->is_dynamic());
+
+  _trainable_mgr->releasePlan(index);
+}
+
+void TensorManager::claimDerivativePlan(const ir::OperandIndex &index)
+{
+  auto tensor = _tensors->getDerivativeTensor(index);
+  assert(tensor && !tensor->is_dynamic());
+
+  auto size = tensor->total_size();
+  _derivative_mgr->claimPlan(index, size);
+}
+
+void TensorManager::releaseDerivativePlan(const ir::OperandIndex &index)
+{
+  assert(_tensors->getDerivativeTensor(index) &&
+         !_tensors->getDerivativeTensor(index)->is_dynamic());
+
+  _derivative_mgr->releasePlan(index);
+}
+
+void TensorManager::claimGradientPlan(const ir::OperandIndex &index)
+{
+  auto tensor = _tensors->getGradientTensor(index);
+  assert(tensor && !tensor->is_dynamic());
+
+  auto size = tensor->total_size();
+  _gradient_mgr->claimPlan(index, size);
+}
+
+void TensorManager::releaseGradientPlan(const ir::OperandIndex &index)
+{
+  assert(_tensors->getGradientTensor(index) && !_tensors->getGradientTensor(index)->is_dynamic());
+
+  _gradient_mgr->releasePlan(index);
+}
+
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/TensorManager.h b/runtime/onert/backend/train/TensorManager.h
new file mode 100644
index 000000000..06da3edd7
--- /dev/null
+++ b/runtime/onert/backend/train/TensorManager.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_TRAIN_TENSOR_MANAGER_H__
+
+#include "MemoryManager.h"
+#include "TensorRegistry.h"
+
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandInfo.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+class TensorManager
+{
+public:
+  TensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id);
+  virtual ~TensorManager() = default;
+
+  void allocateNonConstTensors();
+  void allocateTrainableTensors();
+  void allocateDerivativeTensors();
+  void allocateGradientTensors();
+  // TODO Add member functions to deallocate tensors
+
+  void claimNonConstPlan(const ir::OperandIndex &ind);
+  void releaseNonConstPlan(const ir::OperandIndex &ind);
+  void claimTrainablePlan(const ir::OperandIndex &ind);
+  void releaseTrainablePlan(const ir::OperandIndex &ind);
+  void claimDerivativePlan(const ir::OperandIndex &ind);
+  void releaseDerivativePlan(const ir::OperandIndex &ind);
+  void claimGradientPlan(const ir::OperandIndex &ind);
+  void releaseGradientPlan(const ir::OperandIndex &ind);
+
+private:
+  std::unique_ptr<MemoryManager> _nonconst_mgr;
+  std::unique_ptr<MemoryManager> _trainable_mgr;
+  std::unique_ptr<MemoryManager> _derivative_mgr;
+  std::unique_ptr<MemoryManager> _gradient_mgr;
+  const std::shared_ptr<TensorRegistry> _tensors;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/train/TensorRegistry.h b/runtime/onert/backend/train/TensorRegistry.h
new file mode 100644
index 000000000..34aeb0fcd
--- /dev/null
+++ b/runtime/onert/backend/train/TensorRegistry.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_TENSOR_REGISTRY__
+#define __ONERT_BACKEND_TRAIN_TENSOR_REGISTRY__
+
+#include <backend/train/ITensorRegistry.h>
+
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+using TensorRegistry =
+  PortableTensorRegistryTemplate<Tensor, TrainableTensor, DerivativeTensor, GradientTensor>;
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_TENSOR_REGISTRY__
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.cc b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..ac736c34d
--- /dev/null
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer() : cpu::ops::ConvolutionLayer()
+{
+  // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer() = default;
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                                 const IPortableTensor *bias, const ir::PaddingType paddingType,
+                                 const uint32_t paddingLeft, const uint32_t paddingRight,
+                                 const uint32_t paddingTop, const uint32_t paddingBottom,
+                                 const uint32_t strideWidth, const uint32_t strideHeight,
+                                 const uint32_t dilationWidthFactor,
+                                 const uint32_t dilationHeightFactor,
+                                 const ir::Activation activation, IPortableTensor *output)
+{
+  cpu::ops::ConvolutionLayer::configure(
+    input, kernel, bias, paddingType, paddingLeft, paddingRight, paddingTop, paddingBottom,
+    strideWidth, strideHeight, dilationWidthFactor, dilationHeightFactor, activation, output);
+}
+
+void ConvolutionLayer::forward(bool) { cpu::ops::ConvolutionLayer::run(); }
+void ConvolutionLayer::backward()
+{
+  // TODO Implement detail
+}
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ops/ConvolutionLayer.h b/runtime/onert/backend/train/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..ed42a2099
--- /dev/null
+++ b/runtime/onert/backend/train/ops/ConvolutionLayer.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_OPS_CONVOLUTIONLAYER_H__
+#define __ONERT_BACKEND_TRAIN_OPS_CONVOLUTIONLAYER_H__
+
+#include <ops/ConvolutionLayer.h>
+
+#include <exec/train/ITrainableFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+class ConvolutionLayer : public ::onert::exec::train::ITrainableFunction,
+                         public cpu::ops::ConvolutionLayer
+{
+public:
+  ConvolutionLayer();
+  ~ConvolutionLayer();
+
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType _paddingType,
+                 const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideWidth,
+                 const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+                 const uint32_t dilationHeightFactor, const ir::Activation activation,
+                 IPortableTensor *output);
+  void forward(bool training) override;
+  void backward() override;
+};
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_OPS_CONVOLUTIONLAYER_H__
diff --git a/runtime/onert/backend/train/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/train/ops/ElementwiseActivationLayer.cc
new file mode 100644
index 000000000..860eca43c
--- /dev/null
+++ b/runtime/onert/backend/train/ops/ElementwiseActivationLayer.cc
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseActivationLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/train/operation/ReLU.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+ElementwiseActivationLayer::ElementwiseActivationLayer() : cpu::ops::ElementwiseActivationLayer()
+{
+  // DO NOTHING
+}
+
+void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                           IPortableTensor *deriv_input,
+                                           const IPortableTensor *deriv_output, float alpha,
+                                           float beta, ElementwiseActivationType op_type)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+  assert(deriv_input != nullptr);
+  assert(deriv_output != nullptr);
+
+  _deriv_input = deriv_input;
+  _deriv_output = deriv_output;
+
+  _op_type = op_type;
+
+  switch (op_type)
+  {
+    case ElementwiseActivationType::kReLU:
+      if (input->data_type() == OperandType::FLOAT32)
+      {
+        if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+        {
+          cpu::ops::ElementwiseActivationLayer::configure(
+            input, output, alpha, beta, cpu::ops::ElementwiseActivationType::kReLU);
+
+          _backward_kernel = [](const IPortableTensor *output, const IPortableTensor *incoming,
+                                IPortableTensor *outgoing) {
+            nnfw::cker::train::ReLUGrad(getShape(output), getBuffer<float>(output),
+                                        getShape(incoming), getBuffer<float>(incoming),
+                                        getShape(outgoing), getBuffer<float>(outgoing));
+          };
+        }
+        else
+        {
+          throw std::runtime_error("train ElementwiseActivationLayer : This layer does not "
+                                   "suppport other ReLU except for ReLU(0-inf)");
+        }
+      }
+      else
+      {
+        throw std::runtime_error("train ElementwiseActivationLayer: Unsupported datatype");
+      }
+      break;
+    default:
+      throw std::runtime_error("train ElementwiseActivationLayer: Unsupported activation type yet");
+  }
+}
+
+void ElementwiseActivationLayer::forward(bool) { cpu::ops::ElementwiseActivationLayer::run(); }
+
+void ElementwiseActivationLayer::backward()
+{
+  _backward_kernel(_output, _deriv_output, _deriv_input);
+}
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/train/ops/ElementwiseActivationLayer.h
new file mode 100644
index 000000000..dac1efe92
--- /dev/null
+++ b/runtime/onert/backend/train/ops/ElementwiseActivationLayer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_OPS_ELEMENTWISEACTIVATIONLAYER_H__
+#define __ONERT_BACKEND_TRAIN_OPS_ELEMENTWISEACTIVATIONLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include <ops/ElementwiseActivationLayer.h>
+
+#include <exec/train/ITrainableFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+enum class ElementwiseActivationType
+{
+  kReLU,
+};
+
+class ElementwiseActivationLayer : public ::onert::exec::train::ITrainableFunction,
+                                   public cpu::ops::ElementwiseActivationLayer
+{
+public:
+  ElementwiseActivationLayer();
+
+  void configure(const IPortableTensor *input, IPortableTensor *output,
+                 IPortableTensor *deriv_input, const IPortableTensor *deriv_output, float alpha,
+                 float beta, ElementwiseActivationType op_type);
+  void forward(bool training) override;
+  void backward() override;
+
+private:
+  IPortableTensor *_deriv_input;
+  const IPortableTensor *_deriv_output;
+
+  ElementwiseActivationType _op_type;
+  std::function<void(const IPortableTensor *output, const IPortableTensor *incoming,
+                     IPortableTensor *outgoing)>
+    _backward_kernel;
+};
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_OPS_ELEMENTWISEACTIVATIONLAYER_H__
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.cc b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..8fdc822d2
--- /dev/null
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.cc
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/FullyConnected.h>
+#include <cker/operation/Transpose.h>
+#include <cker/train/operation/FullyConnected.h>
+#include <cker/train/operation/ReLU.h>
+
+namespace
+{
+
+using namespace onert;
+
+std::unique_ptr<backend::train::Tensor>
+createTransposedTensor(const backend::IPortableTensor *origin_tensor)
+{
+  const auto &origin_shape = origin_tensor->getShape();
+  assert(origin_shape.rank() == 2);
+
+  auto transposed_info = origin_tensor->get_info();
+  auto transposed_shape = ir::Shape{origin_shape.dim(1), origin_shape.dim(0)};
+  transposed_info.shape(transposed_shape);
+
+  return std::make_unique<backend::train::Tensor>(transposed_info, origin_tensor->layout());
+}
+
+} // namespace
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+  : cpu::ops::FullyConnectedLayer{}, _grad_weights{nullptr}, _grad_bias{nullptr},
+    _deriv_input{nullptr}, _deriv_output{nullptr}, _transposed_weights{nullptr},
+    _transposed_input{nullptr}, _transposed_deriv_output{nullptr}, _act_deriv_output{nullptr}
+{
+  // DO NOTHING
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+                                    const IPortableTensor *bias, IPortableTensor *output,
+                                    IPortableTensor *deriv_input, IPortableTensor *grad_weights,
+                                    IPortableTensor *grad_bias, const IPortableTensor *deriv_output,
+                                    ir::Activation activation,
+                                    ir::FullyConnectedWeightsFormat weights_format,
+                                    const std::shared_ptr<train::ExternalContext> &external_context)
+{
+  cpu::ops::FullyConnectedLayer::configure(input, weights, bias, activation, weights_format, output,
+                                           external_context);
+
+  _deriv_input = deriv_input;
+  _grad_weights = grad_weights;
+  _grad_bias = grad_bias;
+  _deriv_output = deriv_output;
+
+  if (weights_format != ir::FullyConnectedWeightsFormat::Default)
+    throw std::runtime_error{
+      "train FullyConnectedLayer: Weight formats other than default are not supported."};
+
+  if (input->get_info().shape().rank() != 2 || weights->get_info().shape().rank() != 2 ||
+      output->get_info().shape().rank() != 2 || deriv_input->get_info().shape().rank() != 2 ||
+      grad_weights->get_info().shape().rank() != 2 || deriv_output->get_info().shape().rank() != 2)
+    throw std::runtime_error{
+      "train FullyConnectedLayer: Input other ranks than 2 are not supported."};
+
+  _transposed_weights = createTransposedTensor(weights);
+  _transposed_weights->setBuffer(std::make_shared<basic::Allocator>(weights->total_size()));
+
+  _transposed_input = createTransposedTensor(input);
+  _transposed_input->setBuffer(std::make_shared<basic::Allocator>(input->total_size()));
+
+  _transposed_deriv_output = createTransposedTensor(deriv_output);
+  _transposed_deriv_output->setBuffer(
+    std::make_shared<basic::Allocator>(deriv_output->total_size()));
+
+  if (activation != ir::Activation::NONE)
+  {
+    _act_deriv_output =
+      std::make_unique<Tensor>(_deriv_output->get_info(), _deriv_output->layout());
+    _act_deriv_output->setBuffer(std::make_shared<basic::Allocator>(_deriv_output->total_size()));
+  }
+}
+
+void FullyConnectedLayer::forward(bool) { cpu::ops::FullyConnectedLayer::run(); }
+
+void FullyConnectedLayer::backward()
+{
+  const auto data_type = _deriv_output->data_type();
+  assert(data_type == _input->data_type());
+  switch (data_type)
+  {
+    case OperandType::FLOAT32:
+    {
+      assert(data_type == _grad_weights->data_type());
+      assert(data_type == _grad_bias->data_type());
+      backwardFloat32();
+      break;
+    }
+    default:
+      throw std::runtime_error{"train FullyConnectedLayer: unsupported data type"};
+  }
+}
+
+void FullyConnectedLayer::backwardFloat32()
+{
+  // Calculate gradient for activation
+  const IPortableTensor *backprop_act;
+  switch (_activation)
+  {
+    case ir::Activation::NONE:
+      backprop_act = _deriv_output;
+      break;
+    case ir::Activation::RELU:
+      nnfw::cker::train::ReLUGrad(getShape(_output), getBuffer<float>(_output),
+                                  getShape(_deriv_output), getBuffer<float>(_deriv_output),
+                                  getShape(_act_deriv_output.get()),
+                                  getBuffer<float>(_act_deriv_output.get()));
+      backprop_act = _act_deriv_output.get();
+      break;
+    default:
+      throw std::runtime_error("train FullyConnectedLayer: Unsupported activation type yet");
+  }
+
+  // Initialize TransposeParams
+  nnfw::cker::TransposeParams transpose_param;
+  transpose_param.perm_count = 2;
+  transpose_param.perm[0] = 1;
+  transpose_param.perm[1] = 0;
+
+  // Initialize FullyConnectedParams
+  nnfw::cker::FullyConnectedParams op_params;
+  float output_activation_min = 0;
+  float output_activation_max = 0;
+  CalculateActivationRange(ir::Activation::NONE, &output_activation_min, &output_activation_max);
+  op_params.activation = nnfw::cker::FusedActivationFunctionType::kNone;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+
+  // Transpose and compute gradient for input
+  // ∂L/∂X = fc(Incoming gradient, transposed W)
+  auto transposed_weights = _transposed_weights.get();
+  assert(transposed_weights->getShape().rank() == 2);
+  nnfw::cker::Transpose(transpose_param, getShape(_weights), getBuffer<float>(_weights),
+                        getShape(transposed_weights), getBuffer<float>(transposed_weights));
+
+  nnfw::cker::FullyConnected(op_params, getShape(backprop_act), getBuffer<float>(backprop_act),
+                             getShape(transposed_weights), getBuffer<float>(transposed_weights),
+                             getShape(nullptr), nullptr, getShape(_deriv_input),
+                             getBuffer<float>(_deriv_input));
+
+  // Transpose and compute gradient for weights
+  // ∂L/∂W = fc(transposed incomming gradient, transposed X)
+  auto transposed_input = _transposed_input.get();
+  assert(transposed_input->getShape().rank() == 2);
+  nnfw::cker::Transpose(transpose_param, getShape(_input), getBuffer<float>(_input),
+                        getShape(transposed_input), getBuffer<float>(transposed_input));
+
+  auto transposed_deriv_output = _transposed_deriv_output.get();
+  assert(transposed_deriv_output->getShape().rank() == 2);
+  nnfw::cker::Transpose(transpose_param, getShape(backprop_act), getBuffer<float>(backprop_act),
+                        getShape(transposed_deriv_output),
+                        getBuffer<float>(transposed_deriv_output));
+
+  nnfw::cker::FullyConnected(op_params, getShape(transposed_deriv_output),
+                             getBuffer<float>(transposed_deriv_output), getShape(transposed_input),
+                             getBuffer<float>(transposed_input), getShape(nullptr), nullptr,
+                             getShape(_grad_weights), getBuffer<float>(_grad_weights));
+
+  // Compute gradient for bias
+  if (_bias)
+  {
+    assert(_grad_bias);
+    nnfw::cker::train::FullyConnectedBiasGrad(getShape(backprop_act),
+                                              getBuffer<float>(backprop_act), getShape(_grad_bias),
+                                              getBuffer<float>(_grad_bias));
+  }
+}
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ops/FullyConnectedLayer.h b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..1d9b30a23
--- /dev/null
+++ b/runtime/onert/backend/train/ops/FullyConnectedLayer.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_OPS_FULLYCONNECTEDLAYER_H__
+#define __ONERT_BACKEND_TRAIN_OPS_FULLYCONNECTEDLAYER_H__
+
+#include "../ExternalContext.h"
+#include "../Tensor.h"
+
+#include <exec/train/ITrainableFunction.h>
+#include <ops/FullyConnectedLayer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public exec::train::ITrainableFunction,
+                            public cpu::ops::FullyConnectedLayer
+{
+public:
+  FullyConnectedLayer();
+  ~FullyConnectedLayer();
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *weights,
+                 const IPortableTensor *bias, IPortableTensor *output, IPortableTensor *deriv_input,
+                 IPortableTensor *grad_weights, IPortableTensor *grad_bias,
+                 const IPortableTensor *deriv_output, ir::Activation activation,
+                 ir::FullyConnectedWeightsFormat weights_format,
+                 const std::shared_ptr<train::ExternalContext> &external_context);
+
+  void forward(bool training) override;
+  void backward() override;
+
+private:
+  void backwardFloat32();
+
+private:
+  IPortableTensor *_grad_weights;
+  IPortableTensor *_grad_bias;
+  IPortableTensor *_deriv_input;
+  const IPortableTensor *_deriv_output;
+
+  // TODO Optimize memory
+  std::unique_ptr<Tensor> _transposed_weights;
+  std::unique_ptr<Tensor> _transposed_input;
+  std::unique_ptr<Tensor> _transposed_deriv_output;
+  std::unique_ptr<Tensor> _act_deriv_output;
+};
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_OPS_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/onert/backend/train/ops/GradientApplier.cc b/runtime/onert/backend/train/ops/GradientApplier.cc
new file mode 100644
index 000000000..90d1bb9d0
--- /dev/null
+++ b/runtime/onert/backend/train/ops/GradientApplier.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GradientApplier.h"
+
+#include <exec/train/optimizer/Optimizer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+GradientApplier::GradientApplier() : _optimizer{nullptr}, _gradient_tensor{}, _trainable_tensor{}
+{
+  // DO NOTHING
+}
+
+void GradientApplier::configure(std::shared_ptr<exec::train::optimizer::Optimizer> optimizer,
+                                const IPortableTensor *gradient, ITrainableTensor *trainable)
+{
+  _optimizer = optimizer;
+  _gradient_tensor = gradient;
+  _trainable_tensor = trainable;
+}
+
+void GradientApplier::applyGradient(uint32_t training_step)
+{
+  _optimizer->applyGradient(
+    std::forward_as_tuple(*_gradient_tensor, *_trainable_tensor, training_step));
+}
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ops/GradientApplier.h b/runtime/onert/backend/train/ops/GradientApplier.h
new file mode 100644
index 000000000..94234e182
--- /dev/null
+++ b/runtime/onert/backend/train/ops/GradientApplier.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_OPS_GRADIENT_APPLIER_H__
+#define __ONERT_BACKEND_TRAIN_OPS_GRADIENT_APPLIER_H__
+
+#include <exec/train/IGradientApplier.h>
+
+#include <exec/train/optimizer/Optimizer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+class GradientApplier : public ::onert::exec::train::IGradientApplier
+{
+public:
+  GradientApplier();
+  ~GradientApplier() = default;
+
+  void configure(std::shared_ptr<exec::train::optimizer::Optimizer> optimizer,
+                 const IPortableTensor *gradient, ITrainableTensor *trainable);
+  void applyGradient(uint32_t training_step) override;
+
+private:
+  std::shared_ptr<exec::train::optimizer::Optimizer> _optimizer;
+  const IPortableTensor *_gradient_tensor;
+  ITrainableTensor *_trainable_tensor;
+};
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_OPS_GRADIENT_APPLIER_H__
diff --git a/runtime/onert/backend/train/ops/LossLayer.cc b/runtime/onert/backend/train/ops/LossLayer.cc
new file mode 100644
index 000000000..d004722a0
--- /dev/null
+++ b/runtime/onert/backend/train/ops/LossLayer.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LossLayer.h"
+#include "OperationUtils.h"
+
+#include <cker/train/operation/Loss.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+LossLayer::LossLayer()
+  : _y_pred(nullptr), _y_true(nullptr), _output(nullptr), _deriv_y_pred(nullptr),
+    _loss_type(LossType::kMSE)
+{
+  // DO NOTHING
+}
+
+void LossLayer::configure(const IPortableTensor *y_pred, const IPortableTensor *y_true,
+                          IPortableTensor *output, IPortableTensor *deriv_y_pred,
+                          LossType loss_type)
+{
+  assert(y_pred != nullptr);
+  assert(y_true != nullptr);
+  assert(output != nullptr);
+  assert(deriv_y_pred != nullptr);
+  switch (loss_type)
+  {
+    case LossType::kMSE:
+      break;
+    default:
+      throw std::runtime_error("LossLayer: unsupported loss type");
+  }
+
+  _y_pred = y_pred;
+  _y_true = y_true;
+  _output = output;
+  _deriv_y_pred = deriv_y_pred;
+  _loss_type = loss_type;
+}
+
+void LossLayer::forward(bool)
+{
+  // TODO Implement this
+  switch (_loss_type)
+  {
+    case LossType::kMSE:
+      if (_y_pred->data_type() == OperandType::FLOAT32)
+      {
+        nnfw::cker::train::MSE(getShape(_y_pred), getBuffer<float>(_y_pred), getShape(_y_true),
+                               getBuffer<float>(_y_true), getShape(_output),
+                               getBuffer<float>(_output));
+      }
+      break;
+    default:
+      throw std::runtime_error("LossLayer: unsupported loss type");
+  }
+}
+
+void LossLayer::backward()
+{
+  switch (_loss_type)
+  {
+    case LossType::kMSE:
+      if (_y_pred->data_type() == OperandType::FLOAT32)
+      {
+        nnfw::cker::train::MSEGrad(getShape(_y_pred), getBuffer<float>(_y_pred), getShape(_y_true),
+                                   getBuffer<float>(_y_true), getShape(_deriv_y_pred),
+                                   getBuffer<float>(_deriv_y_pred));
+      }
+      break;
+    default:
+      throw std::runtime_error("LossLayer: unsupported loss type");
+  }
+}
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ops/LossLayer.h b/runtime/onert/backend/train/ops/LossLayer.h
new file mode 100644
index 000000000..18c6b315b
--- /dev/null
+++ b/runtime/onert/backend/train/ops/LossLayer.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_OPS_LOSSLAYER_H__
+#define __ONERT_BACKEND_TRAIN_OPS_LOSSLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include <ops/ElementwiseActivationLayer.h>
+
+#include <exec/train/ITrainableFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+enum class LossType
+{
+  kMSE,
+};
+
+class LossLayer : public ::onert::exec::train::ITrainableFunction
+{
+public:
+  LossLayer();
+
+  void configure(const IPortableTensor *y_pred, const IPortableTensor *y_true,
+                 IPortableTensor *output, IPortableTensor *deriv_y_pred, LossType loss_type);
+  void forward(bool training) override;
+  void backward() override;
+
+private:
+  const IPortableTensor *_y_pred;
+  const IPortableTensor *_y_true;
+  IPortableTensor *_output;
+  IPortableTensor *_deriv_y_pred;
+  LossType _loss_type;
+};
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_OPS_LOSSLAYER_H__
diff --git a/runtime/onert/backend/train/ops/OperationUtils.h b/runtime/onert/backend/train/ops/OperationUtils.h
new file mode 100644
index 000000000..fe0a02340
--- /dev/null
+++ b/runtime/onert/backend/train/ops/OperationUtils.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_TRAIN_OPS_OPERATION_UTILS_H__
+
+#include <ops/OperationUtils.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+using OperandType = onert::ir::DataType;
+using cpu::ops::getBuffer;
+using cpu::ops::getShape;
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/train/ops/PoolLayer.cc b/runtime/onert/backend/train/ops/PoolLayer.cc
new file mode 100644
index 000000000..c8a8422aa
--- /dev/null
+++ b/runtime/onert/backend/train/ops/PoolLayer.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+PoolLayer::PoolLayer() : cpu::ops::PoolLayer()
+{
+  // DO NOTHING
+}
+
+void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
+                          const uint32_t paddingRight, const uint32_t paddingTop,
+                          const uint32_t paddingBottom, const uint32_t strideWidth,
+                          const uint32_t strideHeight, const uint32_t kernelWidth,
+                          const uint32_t kernelHeight, const ir::Activation activation,
+                          IPortableTensor *output, const PoolType op_type)
+{
+  switch (op_type)
+  {
+    case PoolType::kMax:
+      cpu::ops::PoolLayer::configure(input, paddingLeft, paddingRight, paddingTop, paddingBottom,
+                                     strideWidth, strideHeight, kernelWidth, kernelHeight,
+                                     activation, output, cpu::ops::PoolType::kMax);
+      break;
+    default:
+      throw std::runtime_error("PoolLayer: Unsupported pool type");
+  }
+}
+
+void PoolLayer::forward(bool training)
+{
+  if (training)
+  {
+    // TODO Implement training pool layer
+  }
+  else
+  {
+    cpu::ops::PoolLayer::run();
+  }
+}
+
+void PoolLayer::backward()
+{
+  // TODO Implement detail
+}
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ops/PoolLayer.h b/runtime/onert/backend/train/ops/PoolLayer.h
new file mode 100644
index 000000000..7f93b4a97
--- /dev/null
+++ b/runtime/onert/backend/train/ops/PoolLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_OPS_POOLLAYER_H__
+#define __ONERT_BACKEND_TRAIN_OPS_POOLLAYER_H__
+
+#include <ops/PoolLayer.h>
+
+#include <exec/train/ITrainableFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+enum class PoolType
+{
+  kMax,
+};
+
+class PoolLayer : public ::onert::exec::train::ITrainableFunction, public cpu::ops::PoolLayer
+{
+public:
+  PoolLayer();
+
+public:
+  void configure(const IPortableTensor *input, const uint32_t paddingLeft,
+                 const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideWidth,
+                 const uint32_t strideHeight, const uint32_t kernelWidth,
+                 const uint32_t kernelHeight, const ir::Activation activation,
+                 IPortableTensor *output, const PoolType op_type);
+  void forward(bool training) override;
+  void backward() override;
+};
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_OPS_POOLLAYER_H__
diff --git a/runtime/onert/backend/train/ops/ReshapeLayer.cc b/runtime/onert/backend/train/ops/ReshapeLayer.cc
new file mode 100644
index 000000000..1716174a9
--- /dev/null
+++ b/runtime/onert/backend/train/ops/ReshapeLayer.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReshapeLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+ReshapeLayer::ReshapeLayer()
+  : _input{nullptr}, _shape{nullptr}, _output{nullptr}, _deriv_input{nullptr}, _deriv_output{
+                                                                                 nullptr}
+{
+  // DO NOTHING
+}
+
+void ReshapeLayer::reshapeGeneric(const IPortableTensor *input, IPortableTensor *output)
+{
+  size_t count = input->total_size();
+  memcpy(output->buffer(), input->buffer(), count);
+}
+
+void ReshapeLayer::configure(const IPortableTensor *input, const IPortableTensor *shape,
+                             IPortableTensor *output, IPortableTensor *deriv_input,
+                             const IPortableTensor *deriv_output)
+{
+  _input = input;
+  /* note : shape is optional. If not provided from model, _shape is nullptr. */
+  _shape = shape;
+  _output = output;
+
+  _deriv_input = deriv_input;
+  _deriv_output = deriv_output;
+}
+
+void ReshapeLayer::forward(bool) { reshapeGeneric(_input, _output); }
+
+void ReshapeLayer::backward() { reshapeGeneric(_deriv_output, _deriv_input); }
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/train/ops/ReshapeLayer.h b/runtime/onert/backend/train/ops/ReshapeLayer.h
new file mode 100644
index 000000000..e4f017225
--- /dev/null
+++ b/runtime/onert/backend/train/ops/ReshapeLayer.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_OPS_RESHAPELAYER_H__
+#define __ONERT_BACKEND_TRAIN_OPS_RESHAPELAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/train/ITrainableFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+namespace ops
+{
+
+class ReshapeLayer : public ::onert::exec::train::ITrainableFunction
+{
+public:
+  ReshapeLayer();
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *shape,
+                 IPortableTensor *output, IPortableTensor *deriv_input,
+                 const IPortableTensor *deriv_output);
+  void forward(bool training) override;
+  void backward() override;
+
+private:
+  void reshapeGeneric(const IPortableTensor *input, IPortableTensor *output);
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_shape;
+  IPortableTensor *_output;
+
+  IPortableTensor *_deriv_input;
+  const IPortableTensor *_deriv_output;
+};
+
+} // namespace ops
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_OPS_RESHAPELAYER_H__
diff --git a/runtime/onert/backend/train/train.cc b/runtime/onert/backend/train/train.cc
new file mode 100644
index 000000000..a77f71c43
--- /dev/null
+++ b/runtime/onert/backend/train/train.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::train::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/trix/Backend.h b/runtime/onert/backend/trix/Backend.h
new file mode 100644
index 000000000..a63839720
--- /dev/null
+++ b/runtime/onert/backend/trix/Backend.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BACKEND_H__
+#define __ONERT_BACKEND_TRIX_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, context->dev_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BACKEND_H__
diff --git a/runtime/onert/backend/trix/BackendContext.cc b/runtime/onert/backend/trix/BackendContext.cc
new file mode 100644
index 000000000..51571b458
--- /dev/null
+++ b/runtime/onert/backend/trix/BackendContext.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap ret;
+
+  for (auto &&op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  basic::initConsts(*this);
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &&it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/BackendContext.h b/runtime/onert/backend/trix/BackendContext.h
new file mode 100644
index 000000000..c0734c46d
--- /dev/null
+++ b/runtime/onert/backend/trix/BackendContext.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "DevContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _dev_context(new DevContext)
+  {
+  }
+
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
+
+  std::shared_ptr<DevContext> dev_context() { return _dev_context; }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/trix/BatchThreadPool.cc b/runtime/onert/backend/trix/BatchThreadPool.cc
new file mode 100644
index 000000000..3c2001d75
--- /dev/null
+++ b/runtime/onert/backend/trix/BatchThreadPool.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchThreadPool.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+BatchThreadPool::BatchThreadPool(size_t num_threads) : _num_threads(num_threads), _stop_all(false)
+{
+  _worker_threads.reserve(_num_threads);
+  for (uint32_t thread_num = 0; thread_num < _num_threads; ++thread_num)
+  {
+    _worker_threads.emplace_back([this, thread_num]() { this->worker(thread_num); });
+  }
+}
+
+void BatchThreadPool::worker(uint32_t thread_num)
+{
+  while (true)
+  {
+    std::unique_lock<std::mutex> lock(_m_job_queue);
+    _cv_job_queue.wait(lock, [this]() { return !this->_job_queue.empty() || _stop_all; });
+    if (_stop_all && this->_job_queue.empty())
+    {
+      return;
+    }
+
+    // Pop a job in front of queue
+    auto job = std::move(_job_queue.front());
+    _job_queue.pop();
+    lock.unlock();
+
+    // Run the job
+    job(thread_num);
+  }
+}
+
+BatchThreadPool::~BatchThreadPool()
+{
+  _stop_all = true;
+  _cv_job_queue.notify_all();
+
+  for (auto &&t : _worker_threads)
+  {
+    t.join();
+  }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/BatchThreadPool.h b/runtime/onert/backend/trix/BatchThreadPool.h
new file mode 100644
index 000000000..bc2936fb4
--- /dev/null
+++ b/runtime/onert/backend/trix/BatchThreadPool.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
+#define __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
+
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <vector>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+/**
+ * @brief Class that has a threadpool for batch-by-batch multi-threading
+ *
+ */
+class BatchThreadPool
+{
+public:
+  BatchThreadPool(size_t num_threads);
+  ~BatchThreadPool();
+
+  /**
+   * @brief
+   *
+   * @tparam F    Type of the function for job
+   * @tparam Args Type of arguments  of job
+   * @param f     Function for job
+   * @param args  Arguments of job
+   * @return std::future<typename std::result_of<F(uint32_t, Args...)>::type>
+   */
+  template <class F, class... Args>
+  std::future<typename std::result_of<F(uint32_t, Args...)>::type> enqueueJob(F &&f,
+                                                                              Args &&... args)
+  {
+    if (_stop_all)
+    {
+      throw std::runtime_error("Stop all threads in BatchThreadPool");
+    }
+
+    using return_type = typename std::result_of<F(uint32_t, Args...)>::type;
+    auto job = std::make_shared<std::packaged_task<return_type(uint32_t)>>(
+      std::bind(std::forward<F>(f), std::placeholders::_1, std::forward<Args>(args)...));
+    std::future<return_type> job_result_future = job->get_future();
+    {
+      // Push job in the assigned queue
+      std::lock_guard<std::mutex> lock(_m_job_queue);
+
+      // Push job
+      _job_queue.push([job](uint32_t thread_num) { (*job)(thread_num); });
+    }
+    _cv_job_queue.notify_one();
+
+    return job_result_future;
+  }
+
+private:
+  /**
+   * @brief Worker to run jobs
+   *
+   * @param thread_num Thread number on which worker is running
+   */
+  void worker(uint32_t thread_num);
+
+private:
+  /**
+   * @brief The number of threads
+   *
+   */
+  size_t _num_threads;
+
+  /**
+   * @brief Threads worked for jobs
+   *
+   */
+  std::vector<std::thread> _worker_threads;
+
+  /**
+   * @brief Queue for jobs
+   *
+   */
+  std::queue<std::function<void(uint32_t)>> _job_queue;
+
+  /**
+   * @brief condition_variables for _job_queue and _worker_threads
+   *
+   */
+  std::condition_variable _cv_job_queue;
+
+  /**
+   * @brief Mutex for the queue _job_queue
+   *
+   */
+  std::mutex _m_job_queue;
+
+  /**
+   * @brief Whether all threads are stopped
+   *
+   */
+  bool _stop_all;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt
new file mode 100644
index 000000000..a94be247d
--- /dev/null
+++ b/runtime/onert/backend/trix/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LIB_ONERT_BACKEND_TRIX onert_backend_trix)
+
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
+if(NOT TRIXEngine_FOUND)
+  return()
+endif(NOT TRIXEngine_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_TRIX} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE trix_engine)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_TRIX} PROPERTIES OUTPUT_NAME backend_trix)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_TRIX} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_TRIX}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_TRIX} DESTINATION lib)
diff --git a/runtime/onert/backend/trix/Config.cc b/runtime/onert/backend/trix/Config.cc
new file mode 100644
index 000000000..b536fd58c
--- /dev/null
+++ b/runtime/onert/backend/trix/Config.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/Config.h b/runtime/onert/backend/trix/Config.h
new file mode 100644
index 000000000..310c57b29
--- /dev/null
+++ b/runtime/onert/backend/trix/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_CONFIG_H__
+#define __ONERT_BACKEND_TRIX_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class Config : public IConfig
+{
+public:
+  std::string id() override { return "trix"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_CONFIG_H__
diff --git a/runtime/onert/backend/trix/Convert.cc b/runtime/onert/backend/trix/Convert.cc
new file mode 100644
index 000000000..fe003e7ea
--- /dev/null
+++ b/runtime/onert/backend/trix/Convert.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+data_layout convertDataLayout(const ir::Layout layout)
+{
+  switch (layout)
+  {
+    case ir::Layout::NCHW:
+      return DATA_LAYOUT_NCHW;
+    case ir::Layout::NHWC:
+      return DATA_LAYOUT_NHWC;
+    default:
+      throw std::runtime_error("Unknown Layout");
+  }
+}
+
+data_type convertDataType(const ir::DataType type)
+{
+  switch (type)
+  {
+    case ir::DataType::QUANT_UINT8_ASYMM:
+      return DATA_TYPE_QASYMM8;
+    case ir::DataType::QUANT_INT16_SYMM:
+      return DATA_TYPE_QSYMM16;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/Convert.h b/runtime/onert/backend/trix/Convert.h
new file mode 100644
index 000000000..662ed44b6
--- /dev/null
+++ b/runtime/onert/backend/trix/Convert.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_CONVERT_H__
+#define __ONERT_BACKEND_TRIX_CONVERT_H__
+
+#include <backend/IPortableTensor.h>
+#include <ir/DataType.h>
+#include <ir/Layout.h>
+
+#include <libnpuhost.h>
+#include <type_traits>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+/**
+ * @brief Convert type of layout from onert type to npu type
+ *
+ * @param layout Layout type in onert
+ * @return data_layout Layout type in npu
+ */
+data_layout convertDataLayout(const ir::Layout layout);
+
+/**
+ * @brief Convert type of data from onert type to npu type
+ *
+ * @param type Data type in onert
+ * @return data_type Data type in npu
+ */
+data_type convertDataType(const ir::DataType type);
+
+/**
+ * @brief Set the tensors_data_info object
+ *
+ * @tparam T Type of tensor based of IPortableTensor
+ * @param tensors Tensors that have data information
+ * @param info    tensors_data_info to be set
+ */
+template <typename T, std::enable_if_t<std::is_base_of<IPortableTensor, T>::value, bool> = true>
+void setDataInfo(const std::vector<T *> &tensors, tensors_data_info *info)
+{
+  info->num_info = static_cast<uint32_t>(tensors.size());
+
+  for (uint32_t idx = 0; idx < info->num_info; ++idx)
+  {
+    info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
+    info->info[idx].type = convertDataType(tensors[idx]->data_type());
+  }
+}
+
+/**
+ * @brief Set the generic_buffers object
+ *
+ * @tparam T Type of tensor based of IPortableTensor
+ * @param tensors Tensors that have buffer information
+ * @param buf     generic_buffers to be set
+ */
+template <typename T, std::enable_if_t<std::is_base_of<IPortableTensor, T>::value, bool> = true>
+void setBuffers(const std::vector<T *> &tensors, generic_buffers *buf)
+{
+  buf->num_buffers = static_cast<uint32_t>(tensors.size());
+
+  for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
+  {
+    buf->bufs[idx].addr = tensors[idx]->buffer();
+    buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+    buf->bufs[idx].type = BUFFER_MAPPED;
+  }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_CONVERT_H__
diff --git a/runtime/onert/backend/trix/DevContext.cc b/runtime/onert/backend/trix/DevContext.cc
new file mode 100644
index 000000000..4d58a7d9f
--- /dev/null
+++ b/runtime/onert/backend/trix/DevContext.cc
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DevContext.h"
+
+#include "Convert.h"
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+// All things related to npu device handle are gathered this Class, but when implementing npu
+// deamon, others except the context roles should be seperated.
+DevContext::DevContext() : _dev_handles{}, _model_ids{}, _meta_map{}
+{
+  auto dev_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+  if (dev_count <= 0)
+  {
+    throw std::runtime_error("Unable to find TRIX NPU device");
+  }
+
+  // Get NPU device handles
+  for (int i = 0; i < dev_count; ++i)
+  {
+    npudev_h handle;
+    if (getNPUdeviceByType(&handle, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
+    {
+      throw std::runtime_error("Failed to get TRIX NPU device handle");
+    }
+    _dev_handles.emplace_back(handle);
+  }
+
+  // NOTE Do not change the number of threads as long as jobs in thread call
+  //      the synchronous APIs such as submitNPU_request()
+  _batch_thread_pool = std::make_unique<BatchThreadPool>(_dev_handles.size());
+  // We need to careful not to create multiple `BatchThreadPool`. In case of multiple models, there
+  // may be a problem having multiple `BatchThreadPool` in current implementation. But if this
+  // creating thread pool is moved to npu deamon, I think this problem will be solved smoothly.
+}
+
+DevContext::~DevContext()
+{
+  // NOTE Must release _batch_thread_pool before releasing _dev_handles to wait for all threads to
+  //      be terminated
+  _batch_thread_pool.reset(nullptr);
+
+  for (const auto &dev_handle : _dev_handles)
+  {
+    unregisterNPUmodel_all(dev_handle);
+    putNPUdevice(dev_handle);
+  }
+}
+
+ModelID DevContext::registerModel(const std::string &model_file_path)
+{
+  if (_dev_handles.size() == 0)
+  {
+    throw std::runtime_error("No npu device is available");
+  }
+
+  std::unique_ptr<npubin_meta, decltype(&free)> meta(
+    getNPUmodel_metadata(model_file_path.c_str(), false), free);
+
+  if (meta == nullptr)
+  {
+    throw std::runtime_error("Unable to extract the model metadata");
+  }
+
+  generic_buffer file_info;
+  file_info.type = BUFFER_FILE;
+  file_info.filepath = model_file_path.c_str();
+  file_info.size = meta->size;
+
+  ModelID model_id = 0;
+
+  for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
+  {
+    // Register model for each device
+    uint32_t model_id_at_device;
+    if (registerNPUmodel(_dev_handles.at(dev_num), &file_info, &model_id_at_device) < 0)
+    {
+      throw std::runtime_error("Failed to register npu model");
+    }
+
+    if (dev_num == 0)
+    {
+      model_id = model_id_at_device;
+      _meta_map[model_id_at_device] = std::shared_ptr<npubin_meta>(std::move(meta));
+    }
+    else
+    {
+      _meta_map[model_id_at_device] = _meta_map[model_id];
+    }
+
+    _model_ids[model_id].resize(dev_num + 1);
+    _model_ids[model_id].at(dev_num) = model_id_at_device;
+  }
+
+  // Return the model id for device 0 only
+  return model_id;
+}
+
+void DevContext::unRegisterModel(ModelID model_id)
+{
+  for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
+  {
+    const auto model_id_at_device = _model_ids.at(model_id).at(dev_num);
+    const auto &dev_handle = _dev_handles.at(dev_num);
+
+    // Remove meta data
+    _meta_map.erase(model_id_at_device);
+
+    // Unregister Model for each device
+    unregisterNPUmodel(dev_handle, model_id_at_device);
+  }
+  // Remove model IDs
+  _model_ids.erase(model_id);
+}
+
+void DevContext::requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
+                            output_buffers *output_bufs, tensors_data_info *out_info,
+                            size_t batch_size)
+{
+  if (batch_size > 1)
+  {
+    if (in_info->num_info != 1)
+    {
+      throw std::runtime_error("Supported only an input that has batch now");
+    }
+    if (out_info->num_info != 1)
+    {
+      throw std::runtime_error("Supported only one output now");
+    }
+
+    if (input_bufs->bufs[0].size % batch_size != 0)
+    {
+      throw std::runtime_error("Invalid batch size. batch size :" + std::to_string(batch_size) +
+                               ", input buffer size : " + std::to_string(input_bufs->bufs[0].size));
+    }
+
+    if (output_bufs->bufs[0].size % batch_size != 0)
+    {
+      throw std::runtime_error(
+        "Invalid batch size. batch size :" + std::to_string(batch_size) +
+        ", output tensor size : " + std::to_string(output_bufs->bufs[0].size));
+    }
+
+    // inputs/outputs for each batch
+    std::vector<input_buffers> in_buffers_vec(batch_size);
+    std::vector<output_buffers> out_buffers_vec(batch_size);
+
+    // Run on thread pool
+    std::vector<std::future<int32_t>> batch_futures;
+    for (uint32_t batch_num = 0; batch_num < batch_size; ++batch_num)
+    {
+      // Enqueue jobs
+      // The in_info and out_info are always the same even if they are divided by batch, so they are
+      // used as they are.
+      auto future = _batch_thread_pool->enqueueJob(
+        [batch_size, in_info, out_info,
+         this](uint32_t dev_num, ModelID model_id, const input_buffers *input_bufs,
+               const output_buffers *output_bufs, uint32_t batch_num) -> int32_t {
+          // Set buffers of inputs/outputs for each batch
+          // TODO Support multiple inputs/outputs
+          input_buffers in_batch_buffers;
+          in_batch_buffers.num_buffers = input_bufs->num_buffers;
+          const uint64_t in_batch_offset = input_bufs->bufs[0].size / batch_size;
+          setBufferByBatch(input_bufs->bufs[0], batch_num, in_batch_offset,
+                           &in_batch_buffers.bufs[0]);
+
+          output_buffers out_batch_buffers;
+          out_batch_buffers.num_buffers = output_bufs->num_buffers;
+          const uint64_t out_batch_offset = output_bufs->bufs[0].size / batch_size;
+          setBufferByBatch(output_bufs->bufs[0], batch_num, out_batch_offset,
+                           &out_batch_buffers.bufs[0]);
+
+          try
+          {
+            // dev_num is the same as the thread number in _batch_thread_pool
+            this->runOneBatch(dev_num, model_id, &in_batch_buffers, in_info, &out_batch_buffers,
+                              out_info);
+          }
+          catch (...)
+          {
+            _eptr = std::current_exception();
+          }
+
+          return batch_num;
+        },
+        model_id, input_bufs, output_bufs, batch_num);
+      batch_futures.emplace_back(std::move(future));
+    }
+
+    for (auto &&future : batch_futures)
+    {
+      future.get();
+    }
+
+    if (_eptr)
+    {
+      std::exception_ptr eptr(nullptr);
+      _eptr.swap(eptr);
+      std::rethrow_exception(eptr);
+    }
+  }
+  else
+  {
+    runOneBatch(0, model_id, input_bufs, in_info, output_bufs, out_info);
+  }
+}
+
+void DevContext::runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
+                             tensors_data_info *in_info, output_buffers *output_bufs,
+                             tensors_data_info *out_info)
+{
+  const auto &model_id_at_device = _model_ids.at(model_id).at(dev_num);
+
+  const auto meta = _meta_map.at(model_id_at_device);
+  if (meta->input_seg_num != in_info->num_info)
+  {
+    throw std::runtime_error("The number of inputs does not match to model input seg num");
+  }
+
+  if (meta->output_seg_num != out_info->num_info)
+  {
+    throw std::runtime_error("The number of outputs does not match to model output seg num");
+  }
+
+  const auto &dev_handle = _dev_handles.at(dev_num);
+  int req_id;
+
+  if (auto error_code = createNPU_request(dev_handle, model_id_at_device, &req_id))
+  {
+    throw std::runtime_error("Unable to create NPU request with model id (" +
+                             std::to_string(model_id_at_device) + ")" +
+                             " error code : " + std::to_string(error_code));
+  }
+
+  if (auto error_code =
+        setNPU_requestData(dev_handle, req_id, input_bufs, in_info, output_bufs, out_info))
+  {
+    removeNPU_request(dev_handle, req_id);
+    throw std::runtime_error("Unable to create NPU request for model id (" +
+                             std::to_string(model_id_at_device) + ")" +
+                             " error code : " + std::to_string(error_code));
+  }
+
+  // NOTE submitNPU_request is not thread-safe(?). It is rarely hanging(unresponsive).
+  //      Ultimately, to solve this problem, we have to either use other thread-safe API or
+  //      change submitNPU_request to be thread-safe, but both works take time.
+  //      As a workaround, let's allow hanging thread.
+  // TODO Change submitNPU_request to be thread-safe or replaced with other thread-safe API
+  std::packaged_task<int(npudev_h, int)> task(submitNPU_request);
+  auto f = task.get_future();
+  std::thread thread_submit_request(std::move(task), dev_handle, req_id);
+  auto status = f.wait_until(std::chrono::system_clock::now() + std::chrono::seconds(60));
+  if (status == std::future_status::timeout)
+  {
+    // There is no way to terminate hanging submitNPU_request from the outside.
+    // If a hanging thread is detached, it will remain as a hanging thread. Even so, it's better
+    // than having the main thread hanging.
+    thread_submit_request.detach();
+
+    // TODO Enable removeNPU_request after resolving hanging.
+    // removeNPU_request(dev_handle, req_id);
+    throw std::runtime_error("The npu API \"submitNPU_request\" timeout");
+  }
+
+  auto error_code = f.get();
+  thread_submit_request.join();
+  if (error_code != 0)
+  {
+    removeNPU_request(dev_handle, req_id);
+    throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+                             ")" + " error code : " + std::to_string(error_code));
+  }
+
+  if (auto error_code = removeNPU_request(dev_handle, req_id))
+  {
+    throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
+                             ")" + " error code : " + std::to_string(error_code));
+  }
+}
+
+void DevContext::setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num,
+                                  uint64_t batch_offset, generic_buffer *batch_buf)
+{
+  batch_buf->addr = reinterpret_cast<uint8_t *>(origin_buf.addr) + batch_num * batch_offset;
+  batch_buf->size = batch_offset;
+  batch_buf->type = BUFFER_MAPPED;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h
new file mode 100644
index 000000000..cd8de97e6
--- /dev/null
+++ b/runtime/onert/backend/trix/DevContext.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+#define __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+
+#include "BatchThreadPool.h"
+
+#include <libnpuhost.h>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+using ModelID = uint32_t;
+
+/**
+ * @brief NPU device context of trix backend
+ *
+ */
+class DevContext
+{
+public:
+  /**
+   * @brief Construct a new device Context object
+   *
+   */
+  DevContext();
+
+  /**
+   * @brief Destroy the device Context object
+   *
+   */
+  ~DevContext();
+
+  DevContext(const DevContext &) = delete;
+  DevContext &operator=(const DevContext &) = delete;
+
+  /**
+   * @brief Register a trix model for all NPU devices
+   *
+   * @param model_file_path File path of a trix model
+   * @return ModelID Internal ID of the trix model
+   */
+  ModelID registerModel(const std::string &model_file_path);
+
+  /**
+   * @brief Unregister a trix model
+   *
+   * @param model_id Internal ID of the trix model to be unregistered
+   */
+  void unRegisterModel(ModelID model_id);
+
+  /**
+   * @brief Request a trix model to be run on NPU
+   *
+   * @param model_id    Internal ID of a trix model
+   * @param input_bufs  Buffer data of inputs
+   * @param in_info     Data info of inputs
+   * @param output_bufs Buffer data of outputs
+   * @param out_info    data info of outputs
+   * @param batch_size  Batch size
+   */
+  void requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
+                  output_buffers *output_bufs, tensors_data_info *out_info, size_t batch_size);
+
+private:
+  /**
+   * @brief Rquest one batch of a trix model to be run on a device of NPU
+   *
+   * @param dev_num     Device number
+   * @param model_id    Internal ID of a trix model
+   * @param input_bufs  Buffer data of inputs
+   * @param in_info     Data info of inputs
+   * @param output_bufs Buffer data of outputs
+   * @param out_info    data info of outputs
+   */
+  void runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
+                   tensors_data_info *in_info, output_buffers *output_bufs,
+                   tensors_data_info *out_info);
+
+  /**
+   * @brief Set the buffer object by batch
+   *
+   * @param origin_buf   Buffer object that has all batches
+   * @param batch_num    Batch number
+   * @param batch_offset Size of a batch
+   * @param batch_buf    One batch buffer object to be set
+   */
+  void setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num, uint64_t batch_offset,
+                        generic_buffer *batch_buf);
+
+private:
+  /**
+   * @brief NPU device handles
+   *
+   */
+  std::vector<npudev_h> _dev_handles;
+
+  /**
+   * @brief Threadpool for batch-by-batch multi-threading
+   *
+   */
+  std::unique_ptr<BatchThreadPool> _batch_thread_pool;
+
+  // TODO Change key to internal trix model context(?) if it is needed
+  /**
+   * @brief Map for ID of models
+   *        Internal Model ID : Model ID array for each device
+   *
+   */
+  std::unordered_map<ModelID, std::vector<uint32_t>> _model_ids;
+
+  /**
+   * @brief Map for meta data
+   *        Model ID at each device : meta data
+   *
+   */
+  std::unordered_map<uint32_t, std::shared_ptr<npubin_meta>> _meta_map;
+
+  /**
+   * @brief Exception pointer captured whthin threads
+   *
+   */
+  std::exception_ptr _eptr;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc
new file mode 100644
index 000000000..2783bd75b
--- /dev/null
+++ b/runtime/onert/backend/trix/KernelGenerator.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/BulkLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+KernelGenerator::KernelGenerator(const ir::Graph &graph,
+                                 const std::shared_ptr<TensorBuilder> &tensor_builder,
+                                 const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+                                 const std::shared_ptr<DevContext> &dev_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _dev_context{dev_context}
+{
+  // DO NOTHING
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  auto ret = std::make_unique<exec::FunctionSequence>();
+  ret->enableDynamicShapeInferer(false);
+
+  const auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  ret->append(releaseFunction());
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::Bulk &node)
+{
+  using ir::operation::Bulk;
+
+  std::vector<IPortableTensor *> output_tensors;
+  for (const auto &ofm_idx : node.getOutputs())
+    output_tensors.emplace_back(_tensor_reg->getPortableTensor(ofm_idx));
+
+  std::vector<const IPortableTensor *> input_tensors;
+  for (const auto &ifm_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
+
+  // parameters
+  const auto binary_path = node.param().binary_path;
+
+  auto fn = std::make_unique<ops::BulkLayer>();
+
+  fn->configure(input_tensors, output_tensors, binary_path, _dev_context);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/KernelGenerator.h b/runtime/onert/backend/trix/KernelGenerator.h
new file mode 100644
index 000000000..d87dc6952
--- /dev/null
+++ b/runtime/onert/backend/trix/KernelGenerator.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
+
+#include "TensorBuilder.h"
+#include "backend/basic/TensorRegistry.h"
+#include "Tensor.h"
+#include "DevContext.h"
+
+#include <backend/basic/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<DevContext> &dev_context);
+
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex op_ind) override;
+
+private:
+  void visit(const ir::operation::Bulk &node) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  ir::Layout _current_layout;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
+  const std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/trix/Tensor.h b/runtime/onert/backend/trix/Tensor.h
new file mode 100644
index 000000000..5138cee71
--- /dev/null
+++ b/runtime/onert/backend/trix/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_TENSOR_H__
+#define __ONERT_BACKEND_TRIX_TENSOR_H__
+
+#include <backend/basic/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_TENSOR_H__
diff --git a/runtime/onert/backend/trix/TensorBuilder.h b/runtime/onert/backend/trix/TensorBuilder.h
new file mode 100644
index 000000000..ac6ca0f9a
--- /dev/null
+++ b/runtime/onert/backend/trix/TensorBuilder.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
+
+#include <backend/basic/TensorBuilder.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+using TensorBuilder = basic::TensorBuilder;
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc
new file mode 100644
index 000000000..db5c81ba7
--- /dev/null
+++ b/runtime/onert/backend/trix/ops/BulkLayer.cc
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BulkLayer.h"
+
+#include "../Convert.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+namespace ops
+{
+
+BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _dev_context(nullptr)
+{
+  // DO NOTHING
+}
+
+BulkLayer::~BulkLayer() { _dev_context->unRegisterModel(_model_id); }
+
+void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
+                          std::vector<IPortableTensor *> &outputs, std::string binary_path,
+                          const std::shared_ptr<DevContext> &dev_context)
+{
+  _inputs = inputs;
+  _outputs = outputs;
+  _dev_context = dev_context;
+  _model_id = _dev_context->registerModel(binary_path);
+}
+
+void BulkLayer::run()
+{
+  tensors_data_info in_info;
+  tensors_data_info out_info;
+  setDataInfo(_inputs, &in_info);
+  setDataInfo(_outputs, &out_info);
+
+  input_buffers input_bufs;
+  output_buffers output_bufs;
+  setBuffers(_inputs, &input_bufs);
+  setBuffers(_outputs, &output_bufs);
+
+  size_t batch_size = 1;
+  // TODO Remove this assumption
+  if (_inputs.size() == 1 && _outputs.size() == 1 && _inputs.at(0)->getShape().dim(0) > 1)
+  {
+    batch_size = _inputs.at(0)->getShape().dim(0);
+  }
+  _dev_context->requestRun(_model_id, &input_bufs, &in_info, &output_bufs, &out_info, batch_size);
+}
+
+void BulkLayer::prepare()
+{
+  // DO NOTHING
+}
+
+} // namespace ops
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h
new file mode 100644
index 000000000..6590b6989
--- /dev/null
+++ b/runtime/onert/backend/trix/ops/BulkLayer.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
+#define __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../DevContext.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+namespace ops
+{
+
+class BulkLayer : public ::onert::exec::IFunction
+{
+public:
+  BulkLayer();
+  ~BulkLayer();
+
+public:
+  void configure(const std::vector<const IPortableTensor *> &inputs,
+                 std::vector<IPortableTensor *> &outputs, std::string binary_path,
+                 const std::shared_ptr<DevContext> &dev_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  std::vector<const IPortableTensor *> _inputs;
+  std::vector<IPortableTensor *> _outputs;
+
+  ModelID _model_id;
+  std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace ops
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
diff --git a/runtime/onert/backend/trix/trix.cc b/runtime/onert/backend/trix/trix.cc
new file mode 100644
index 000000000..816fb4406
--- /dev/null
+++ b/runtime/onert/backend/trix/trix.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::trix::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h
new file mode 100644
index 000000000..67494a534
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Backend.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    auto custom_kernel_builder = data.custom_kernel_builder;
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
+                                                            context->external_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__
diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc
new file mode 100644
index 000000000..b555a4ac6
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap ret;
+
+  for (auto &&op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  basic::initConsts(*this);
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &&it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h
new file mode 100644
index 000000000..e3b66eef3
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <util/ConfigSource.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+const int kDefaultNumThreadpoolThreads = 1;
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _external_context(nullptr)
+  {
+    int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS);
+    if (num_threads < 1)
+      num_threads = kDefaultNumThreadpoolThreads; // default num of threads
+    _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads)));
+  }
+
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/CMakeLists.txt b/runtime/onert/backend/xnnpack/CMakeLists.txt
new file mode 100644
index 000000000..e3de31e6f
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack)
+
+# Unsupported architecture
+nnfw_find_package(Xnnpack QUIET)
+if(NOT Xnnpack_FOUND)
+  return()
+endif(NOT Xnnpack_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK)
+
+set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_XNNPACK}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib)
diff --git a/runtime/onert/backend/xnnpack/Config.cc b/runtime/onert/backend/xnnpack/Config.cc
new file mode 100644
index 000000000..cc27f717f
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Config.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+Config::~Config() { xnn_deinitialize(); }
+
+bool Config::initialize()
+{
+  xnn_status status = xnn_initialize(nullptr /* allocator */);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to initialize XNNPACK"};
+  }
+  return true;
+}
+
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/Config.h b/runtime/onert/backend/xnnpack/Config.h
new file mode 100644
index 000000000..4c5fba587
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Config.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__
+#define __ONERT_BACKEND_XNNPACK_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Config : public IConfig
+{
+public:
+  virtual ~Config();
+
+public:
+  std::string id() override { return "xnnpack"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return true; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.cc b/runtime/onert/backend/xnnpack/ExternalContext.cc
new file mode 100644
index 000000000..1fbcd4f02
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ExternalContext.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExternalContext.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+ExternalContext::ExternalContext(size_t num_threads)
+  : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
+{
+  assert(_threadpool);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.h
new file mode 100644
index 000000000..682fd2e4e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ExternalContext.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+
+#include <memory>
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class ExternalContext
+{
+public:
+  ExternalContext(size_t num_threads);
+
+public:
+  pthreadpool *getThreadPool() { return _threadpool.get(); }
+
+private:
+  std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> _threadpool;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
new file mode 100644
index 000000000..b72149131
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/DepthwiseConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+KernelGenerator::KernelGenerator(
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+  const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+  const std::shared_ptr<ExternalContext> &external_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+    _external_context(external_context)
+{
+  // DO NOTHING
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  auto ret = std::make_unique<exec::FunctionSequence>();
+
+  assert(_tensor_builder->dynamicTensorManager());
+  assert(_tensor_reg);
+
+  // Prepare to handle dynamic tensors later
+  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+  {
+    dyn_ctx->op = &_operations_ctx.at(ind);
+    dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+  }
+  ret->dynamic_tensor_ctx(dyn_ctx);
+
+  auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  assert(_return_fn); // _return_fn must have been generated
+  ret->append(std::move(_return_fn));
+
+  for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
+  {
+    auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+    if (portable_tensor)
+    {
+      assert(portable_tensor->layout() == ir::Layout::NHWC);
+    }
+
+    auto tensor = _tensor_reg->getNativeTensor(ind);
+    if (tensor)
+    {
+      tensor->increase_ref();
+    }
+  }
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+  using ir::operation::Conv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  const auto stride = node.param().stride;
+  const auto activation = node.param().activation;
+  const auto &param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
+  auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context);
+
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+
+  const auto padding =
+    ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+  using ir::operation::DepthwiseConv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [1, kernel_height, kernel_width, depth_out].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+  const auto dilation_width = node.param().dilation.width_factor;
+  const auto dilation_height = node.param().dilation.height_factor;
+  const auto &param_padding = node.param().padding;
+  const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width,
+                                            ker_height, dilation_width, dilation_height);
+  const auto multiplier = node.param().multiplier;
+  const auto activation = node.param().activation;
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                multiplier, dilation_width, dilation_height, activation, ofm_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+  const auto activation = node.param().activation;
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);
+
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h
new file mode 100644
index 000000000..271a60653
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/basic/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/basic/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  ir::Layout _current_layout;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
+  std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h
new file mode 100644
index 000000000..adaa3623d
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+
+#include "backend/basic/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using StaticTensorManager = basic::StaticTensorManager;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h
new file mode 100644
index 000000000..147361109
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_H__
+
+#include <backend/basic/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h
new file mode 100644
index 000000000..cbb7c9e18
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+
+#include <backend/basic/TensorBuilder.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using TensorBuilder = basic::TensorBuilder;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..32ca99460
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context)
+  : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), _padding_right(0),
+    _padding_bottom(0), _stride_width(0), _stride_height(0), _dilation_width_factor(1),
+    _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                                 const uint32_t padding_left, const uint32_t padding_right,
+                                 const uint32_t padding_top, const uint32_t padding_bottom,
+                                 const uint32_t stride_width, const uint32_t stride_height,
+                                 const uint32_t dilation_width_factor,
+                                 const uint32_t dilation_height_factor,
+                                 const ir::Activation activation, IPortableTensor *output)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _padding_type = padding_type;
+  _padding_left = padding_left;
+  _padding_right = padding_right;
+  _padding_top = padding_top;
+  _padding_bottom = padding_bottom;
+  _stride_width = stride_width;
+  _stride_height = stride_height;
+  _dilation_width_factor = dilation_width_factor;
+  _dilation_height_factor = dilation_height_factor;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void ConvolutionLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 Convolution operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK Conv: unsupported data type"};
+  }
+}
+
+bool ConvolutionLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  // NHWC
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &kernel_shape = _kernel->getShape();
+  uint32_t kernel_height = kernel_shape.dim(1);
+  uint32_t kernel_width = kernel_shape.dim(2);
+  uint32_t output_channels = kernel_shape.dim(0);
+  uint32_t input_channels = kernel_shape.dim(3);
+  assert(static_cast<uint32_t>(_input->getShape().dim(3)) == input_channels);
+  assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+
+  enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+    _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+    _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, 1 /* groups */,
+    input_channels /* group_input_channels */, output_channels /* group_output_channels */,
+    input_channels /* input_channel_stride */, output_channels /* output_channel_stride */,
+    reinterpret_cast<const float *>(_kernel->buffer()),
+    reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
+    0, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 Convolution operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool ConvolutionLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t input_width = _input->getShape().dim(2);
+  uint32_t input_height = _input->getShape().dim(1);
+  uint32_t batch_size = _input->getShape().dim(0);
+  enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+    _kernel_op, batch_size, input_height, input_width,
+    reinterpret_cast<const float *>(_input->buffer()), reinterpret_cast<float *>(_output->buffer()),
+    _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 Convolution operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..6cbaa9f3a
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class ConvolutionLayer : public Layer
+{
+public:
+  ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                 const uint32_t padding_left, const uint32_t padding_right,
+                 const uint32_t padding_top, const uint32_t padding_bottom,
+                 const uint32_t stride_width, const uint32_t stride_height,
+                 const uint32_t dilation_width_factor, const uint32_t dilation_height_factor,
+                 const ir::Activation activation, IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _padding_type;
+  uint32_t _padding_left;
+  uint32_t _padding_top;
+  uint32_t _padding_right;
+  uint32_t _padding_bottom;
+
+  uint32_t _stride_width;
+  uint32_t _stride_height;
+  uint32_t _dilation_width_factor;
+  uint32_t _dilation_height_factor;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
new file mode 100644
index 000000000..9a671d487
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+DepthwiseConvolutionLayer::DepthwiseConvolutionLayer(
+  const std::shared_ptr<ExternalContext> external_context)
+  : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), _padding_right(0),
+    _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
+    _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void DepthwiseConvolutionLayer::configure(
+  const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+  ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
+  const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
+  const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
+  const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _padding_type = padding_type;
+  _padding_left = padding_left;
+  _padding_right = padding_right;
+  _padding_top = padding_top;
+  _padding_bottom = padding_bottom;
+  _stride_width = stride_width;
+  _stride_height = stride_height;
+  _multiplier = multiplier;
+  _dilation_width_factor = dilation_width_factor;
+  _dilation_height_factor = dilation_height_factor;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void DepthwiseConvolutionLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"};
+  }
+}
+
+bool DepthwiseConvolutionLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  // NHWC
+  // Kernel format is [1, kernel_height, kernel_width, depth_out].
+  const auto &kernel_shape = _kernel->getShape();
+  uint32_t kernel_height = kernel_shape.dim(1);
+  uint32_t kernel_width = kernel_shape.dim(2);
+  uint32_t output_channels = kernel_shape.dim(3);
+  uint32_t input_channels = _input->getShape().dim(3);
+  assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+  assert(output_channels == input_channels * _multiplier);
+
+  enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+    _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+    _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+    input_channels /* groups */, 1 /* group_input_channels */,
+    _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
+    output_channels /* output_channel_stride */, reinterpret_cast<const float *>(_kernel->buffer()),
+    reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
+    XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool DepthwiseConvolutionLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t input_width = _input->getShape().dim(2);
+  uint32_t input_height = _input->getShape().dim(1);
+  uint32_t batch_size = _input->getShape().dim(0);
+  enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+    _kernel_op, batch_size, input_height, input_width,
+    reinterpret_cast<const float *>(_input->buffer()), reinterpret_cast<float *>(_output->buffer()),
+    _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
new file mode 100644
index 000000000..10f840ae7
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class DepthwiseConvolutionLayer : public Layer
+{
+public:
+  DepthwiseConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                 const uint32_t padding_left, const uint32_t padding_right,
+                 const uint32_t padding_top, const uint32_t padding_bottom,
+                 const uint32_t stride_width, const uint32_t stride_height,
+                 const uint32_t multiplier, const uint32_t dilation_width_factor,
+                 const uint32_t dilation_height_factor, const ir::Activation activation,
+                 IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _padding_type;
+  uint32_t _padding_left;
+  uint32_t _padding_top;
+  uint32_t _padding_right;
+  uint32_t _padding_bottom;
+
+  uint32_t _stride_width;
+  uint32_t _stride_height;
+  uint32_t _multiplier;
+  uint32_t _dilation_width_factor;
+  uint32_t _dilation_height_factor;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..66171ad42
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
+  : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+    _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+                                    const IPortableTensor *bias, ir::Activation activation,
+                                    IPortableTensor *output)
+{
+  _input = input;
+  _kernel = weights;
+  _bias = bias;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void FullyConnectedLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 FullyConnected operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK FC: unsupported data type"};
+  }
+}
+
+bool FullyConnectedLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  const auto &kernel_shape = _kernel->getShape();
+  assert(kernel_shape.rank() == 2);
+  uint32_t output_channels = kernel_shape.dim(0);
+  uint32_t input_channels = kernel_shape.dim(1);
+
+  const auto &input_shape = _input->getShape();
+  const auto &output_shape = _output->getShape();
+  uint32_t flag = 0;
+  if (input_shape.rank() != output_shape.rank())
+  {
+    flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
+    assert(input_shape.num_elements() % input_channels == 0);
+  }
+  else
+  {
+    assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
+  }
+
+  assert(_kernel && _kernel->buffer());
+  const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer());
+  const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
+
+  enum xnn_status status = xnn_create_fully_connected_nc_f32(
+    input_channels, output_channels, input_channels /* input stride */,
+    output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+    output_activation_max, flag, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool FullyConnectedLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
+  enum xnn_status status = xnn_setup_fully_connected_nc_f32(
+    _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+    reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..883607ef9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public Layer
+{
+public:
+  FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *_kernel,
+                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h
new file mode 100644
index 000000000..ec07e874f
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/Layer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+
+#include <exec/IFunction.h>
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+#include "../ExternalContext.h"
+#include "../Tensor.h"
+
+#include <cassert>
+#include <memory>
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class Layer : public ::onert::exec::IFunction
+{
+public:
+  Layer(const std::shared_ptr<ExternalContext> external_context)
+    : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
+  {
+    // DO NOTHING
+  }
+
+  ~Layer()
+  {
+    if (_kernel_op)
+      xnn_delete_operator(_kernel_op);
+  }
+
+public:
+  void prepare() override
+  {
+    if (_create)
+      return;
+
+    _create = create();
+    assert(_create);
+
+    _setup = setup();
+  }
+  virtual bool create() = 0;
+  virtual bool setup() = 0;
+
+protected:
+  xnn_operator_t _kernel_op;
+  bool _create;
+  bool _setup;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
new file mode 100644
index 000000000..fe93fccc0
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+using OperandType = ir::DataType;
+using namespace onert::util; // CalculateActivationRange
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/xnnpack/xnnpack.cc b/runtime/onert/backend/xnnpack/xnnpack.cc
new file mode 100644
index 000000000..38a6c5572
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/xnnpack.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+#include <util/logging.h>
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+  VERBOSE(onert_backend_create) << "'xnnpack' loaded\n";
+  return new onert::backend::xnnpack::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+  VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n";
+  delete backend;
+}
+}
diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt
index d58b47ced..8ff3fdf42 100644
--- a/runtime/onert/core/CMakeLists.txt
+++ b/runtime/onert/core/CMakeLists.txt
@@ -2,18 +2,65 @@ file(GLOB_RECURSE SOURCES "src/*.cc")
 file(GLOB_RECURSE TESTS "*.test.cc")
 list(REMOVE_ITEM SOURCES ${TESTS})
 
+if(NOT BUILD_MINMAX_H5DUMPER)
+  file(GLOB_RECURSE SRC_TO_REMOVE "src/dumper/h5/*.cc")
+  list(REMOVE_ITEM SOURCES ${SRC_TO_REMOVE})
+  file(GLOB_RECURSE SRC_TO_REMOVE "src/exec/MinMaxRecorder.cc")
+  list(REMOVE_ITEM SOURCES ${SRC_TO_REMOVE})
+endif(NOT BUILD_MINMAX_H5DUMPER)
+
+if(NOT ENABLE_ONERT_TRAIN)
+  file(GLOB_RECURSE SRC_TRAIN "src/*/train/*.cc")
+  list(REMOVE_ITEM SOURCES ${SRC_TRAIN})
+  file(GLOB_RECURSE SRC_TRAIN "src/*/*/train/*.cc")
+  list(REMOVE_ITEM SOURCES ${SRC_TRAIN})
+endif(NOT ENABLE_ONERT_TRAIN)
+
 add_library(onert_core SHARED ${SOURCES})
 set_target_properties(onert_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+# NOTE
+# We publish public headers into developer package.
+# To avoid mistake using private header in public header, do not define
+# private target_include_directories scope for src/ directory.
 target_include_directories(onert_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(onert_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
-target_link_libraries(onert_core PUBLIC nnfw_lib_misc half)
-target_link_libraries(onert_core PRIVATE nnfw_lib_cker)
+
+target_link_libraries(onert_core PRIVATE jsoncpp half)
+target_link_libraries(onert_core PRIVATE nnfw_lib_misc nnfw_lib_cker)
 target_link_libraries(onert_core PRIVATE nnfw_common)
 target_link_libraries(onert_core PRIVATE nnfw_coverage)
 target_link_libraries(onert_core PRIVATE dl ${LIB_PTHREAD})
-target_link_libraries(onert_core PRIVATE jsoncpp)
+
+# Ruy
+nnfw_find_package(Ruy REQUIRED)
+target_link_libraries(onert_core PRIVATE ruy)
 target_link_libraries(onert_core INTERFACE ruy_instrumentation)
 
+# H5 Minmax Dumper
+if(BUILD_MINMAX_H5DUMPER)
+  nnfw_find_package(HDF5 REQUIRED)
+  target_compile_definitions(onert_core PRIVATE MINMAX_H5DUMPER=1)
+  target_include_directories(onert_core PRIVATE ${HDF5_INCLUDE_DIRS})
+  target_link_libraries(onert_core PRIVATE ${HDF5_CXX_LIBRARIES})
+endif(BUILD_MINMAX_H5DUMPER)
+
+# Training feature
+# Use public to use this flag on all modules and tests
+if(ENABLE_ONERT_TRAIN)
+  target_compile_definitions(onert_core PUBLIC ONERT_TRAIN)
+endif(ENABLE_ONERT_TRAIN)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET onert_core POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:onert_core>)
+endif()
+
+# NOTE Below line is added to remove warning for android build
+#      It will be removed after android build uses gold linker
+if (ANDROID)
+  target_link_libraries(onert_core INTERFACE log)
+endif (ANDROID)
+
 if(ENVVAR_ONERT_CONFIG)
   target_compile_definitions(onert_core PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
 endif(ENVVAR_ONERT_CONFIG)
@@ -29,12 +76,14 @@ if(NOT ENABLE_TEST)
 endif(NOT ENABLE_TEST)
 
 # Unit Tests
-set(TEST_ONERT_BACKEND_CPU_COMMON test_onert_backend_cpu_common)
+set(TEST_ONERT_CORE test_onert_core)
 
-add_executable(${TEST_ONERT_BACKEND_CPU_COMMON} ${TESTS})
+add_executable(${TEST_ONERT_CORE} ${TESTS})
 
-target_link_libraries(${TEST_ONERT_BACKEND_CPU_COMMON} onert_core)
-target_link_libraries(${TEST_ONERT_BACKEND_CPU_COMMON} gtest gtest_main dl ${LIB_PTHREAD})
+target_link_libraries(${TEST_ONERT_CORE} onert_core)
+# Requires linking nnfw_coverage: check header coverage
+target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage)
+target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
 
-add_test(${TEST_ONERT_BACKEND_CPU_COMMON} ${TEST_ONERT_BACKEND_CPU_COMMON})
-install(TARGETS ${TEST_ONERT_BACKEND_CPU_COMMON} DESTINATION unittest_standalone)
+add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
+install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest)
diff --git a/runtime/onert/core/include/backend/Backend.h b/runtime/onert/core/include/backend/Backend.h
index 4f6ebbba7..136a76fba 100644
--- a/runtime/onert/core/include/backend/Backend.h
+++ b/runtime/onert/core/include/backend/Backend.h
@@ -39,9 +39,7 @@ public:
   virtual ~Backend() = default;
   virtual std::shared_ptr<onert::backend::IConfig> config() const = 0;
 
-  virtual std::unique_ptr<BackendContext>
-  newContext(const ir::Graph &graph, const std::shared_ptr<backend::custom::IKernelBuilder> &kb,
-             bool is_linear_executor) const = 0;
+  virtual std::unique_ptr<BackendContext> newContext(ContextData &&) const = 0;
 };
 
 } // namespace backend
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
index 1eba29550..ccecc2d34 100644
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -19,6 +19,10 @@
 
 #include <memory>
 #include "ir/Graph.h"
+#include "ir/OperationIndexMap.h"
+#include "ir/OperandIndexMap.h"
+#include "compiler/GraphLowerInfo.h"
+#include "exec/FunctionSequence.h"
 
 namespace onert
 {
@@ -26,62 +30,53 @@ namespace backend
 {
 
 class Backend;
-class IConstantInitializer;
-class IKernelGenerator;
-class ITensorRegister;
 struct ITensorRegistry;
-struct ITensorBuilder;
-struct IOptimizer;
 
-class BackendContext
-{
-public:
-  struct OperationInfo
-  {
-    ir::OperationIndex index;
-    ir::Layout layout;
+using FunctionMap =
+  std::vector<std::pair<ir::OperationIndex, std::unique_ptr<exec::FunctionSequence>>>;
 
-    OperationInfo(ir::OperationIndex index, ir::Layout layout) : index{index}, layout{layout} {}
-  };
+struct ContextData
+{
+  /* A partial graph that only includes used operand/operations of the original graph */
+  std::unique_ptr<ir::Graph> graph;
+  /* A linear order of operations. This is neccessary for when a graph is not fully connected */
+  std::vector<onert::ir::OperationIndex> op_order;
+  /* Operands that are defined by other backends */
+  util::Set<ir::OperandIndex> external_operands;
+  /* Operand layout info */
+  ir::OperandIndexMap<ir::Layout> operand_layouts;
+  /* Custom kernel builder */
+  std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
+  /* Is linear executor or not */
+  bool is_linear_executor;
+};
 
+class BackendContext
+{
 public:
-  BackendContext(const Backend *backend, const ir::Graph *graph,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
-                 std::shared_ptr<ITensorRegister> tensor_register = nullptr,
-                 std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
-        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
-        kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr)
+    : _backend{backend}, _data{std::move(data)}, tensor_registry{tensor_registry}
   {
   }
 
   virtual ~BackendContext() = default;
 
-  void initialize(const std::vector<OperationInfo> &operation_list,
-                  const std::vector<ir::OperandIndex> &operand_list);
-  void initConsts();
-
   const Backend *backend() const { return _backend; }
-  const ir::Graph *graph() const { return _graph; }
-  const std::vector<OperationInfo> &operation_list() { return _operation_list; }
-  const std::vector<ir::OperandIndex> &operand_list() { return _operand_list; }
+  const ir::Graph *graph() const { return _data.graph.get(); }
+  const util::Set<ir::OperandIndex> &external_operands() const { return _data.external_operands; }
+  const ir::OperandIndexMap<ir::Layout> &operand_layouts() const { return _data.operand_layouts; }
+  const ContextData &data() const { return _data; }
+
+  virtual ITensorRegistry *genTensors() = 0;
+  virtual FunctionMap genKernels() = 0;
 
-private:
+protected:
   const Backend *_backend{nullptr};
-  const ir::Graph *_graph{nullptr};
-  std::vector<OperationInfo> _operation_list;
-  std::vector<ir::OperandIndex> _operand_list;
+  ContextData _data;
 
 public:
   std::shared_ptr<ITensorRegistry> tensor_registry;
-  std::shared_ptr<ITensorBuilder> tensor_builder;
-  std::shared_ptr<IConstantInitializer> constant_initializer;
-  std::shared_ptr<IKernelGenerator> kernel_gen;
-  std::shared_ptr<ITensorRegister> tensor_register;
-  std::shared_ptr<IOptimizer> optimizer;
 };
 
 using BackendContexts = std::unordered_map<const Backend *, std::unique_ptr<BackendContext>>;
diff --git a/runtime/onert/core/include/backend/CustomKernelBuilder.h b/runtime/onert/core/include/backend/CustomKernelBuilder.h
index 101272135..cae2fc1a3 100644
--- a/runtime/onert/core/include/backend/CustomKernelBuilder.h
+++ b/runtime/onert/core/include/backend/CustomKernelBuilder.h
@@ -49,10 +49,10 @@ struct TypeInfo
 
 struct CustomKernelConfigParams
 {
-  std::vector<std::shared_ptr<backend::IPortableTensor>> input_tensors;
+  std::vector<backend::IPortableTensor *> input_tensors;
   std::vector<TypeInfo> input_types;
 
-  std::vector<std::shared_ptr<backend::IPortableTensor>> output_tensors;
+  std::vector<backend::IPortableTensor *> output_tensors;
   std::vector<TypeInfo> output_types;
 
   char *userdata;
diff --git a/runtime/onert/core/include/backend/IConfig.h b/runtime/onert/core/include/backend/IConfig.h
index ef9c5cdb2..e297c5f1e 100644
--- a/runtime/onert/core/include/backend/IConfig.h
+++ b/runtime/onert/core/include/backend/IConfig.h
@@ -18,7 +18,7 @@
 #define __ONERT_BACKEND_ICONFIG_H__
 
 #include "ir/Layout.h"
-#include "ir/Operation.h"
+#include "ir/IOperation.h"
 #include "util/ITimer.h"
 
 #include <memory>
@@ -48,13 +48,13 @@ struct IConfig
   /**
    * @brief Returns supported layout for the given \p node and \p frontend_layout
    *
-   * @param node Operation
+   * @param node IOperation
    * @param frontend_layout The layout defined in the model
    * @return ir::Layout The layout that the backend kernel actually uses
    */
-  virtual ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) = 0;
+  virtual ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) = 0;
   /**
-   * @brief The function that is called after each OpSequence run on profiling mode.
+   * @brief The function that is called after each Operation run on profiling mode.
    *        This may be useful for profiling GPU-based or special computing units.
    */
   virtual void sync() const {}
diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/IConstantInitializer.h
deleted file mode 100644
index 149acecb4..000000000
--- a/runtime/onert/core/include/backend/IConstantInitializer.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
-
-#include <unordered_map>
-#include <functional>
-
-#include "ITensorBuilder.h"
-#include "ir/Coordinates.h"
-#include "ir/Layout.h"
-#include "ir/Operand.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include "util/logging.h"
-
-namespace
-{
-template <typename T>
-static void Init(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj, const bool copy,
-                 const onert::ir::Layout frontend_layout = onert::ir::Layout::UNKNOWN)
-{
-  const auto shape = model_obj.shape();
-  assert(model_obj.data());
-  auto base = reinterpret_cast<const T *>(model_obj.data()->base());
-
-  obj.access([&](::onert::backend::ITensor &tensor) {
-    switch (shape.rank())
-    {
-      case 0:
-      {
-        assert(model_obj.data()->size() == sizeof(T));
-        const auto value = *reinterpret_cast<const T *>(base);
-        T *into = reinterpret_cast<T *>(tensor.buffer());
-        *into = value;
-        break;
-      }
-      case 1:
-      {
-        auto vec_size = shape.dim(0);
-        for (int32_t n = 0; n < vec_size; ++n)
-        {
-          const T *from = reinterpret_cast<const T *>(base) + n;
-          const auto value = *from;
-
-          T *into = reinterpret_cast<T *>(tensor.buffer()) + n;
-
-          *into = value;
-        }
-        break;
-      }
-      case 2:
-      {
-        const int32_t copy_len = shape.dim(1);
-
-        for (auto i = 0; i < shape.dim(0); ++i)
-        {
-          ::onert::ir::Coordinates coords{i, 0};
-          memcpy(tensor.buffer() + tensor.calcOffset(coords), base + i * copy_len,
-                 copy_len * sizeof(T));
-        }
-        break;
-      }
-      case 3:
-      {
-        const int32_t width = shape.dim(1);
-        const int32_t copy_len = shape.dim(2);
-
-        for (auto i = 0; i < shape.dim(0); ++i)
-        {
-          for (auto j = 0; j < shape.dim(1); ++j)
-          {
-            ::onert::ir::Coordinates coords{i, j, 0};
-            memcpy(tensor.buffer() + tensor.calcOffset(coords),
-                   base + i * width * copy_len + j * copy_len, copy_len * sizeof(T));
-          }
-        }
-        break;
-      }
-      case 4:
-      {
-        const int32_t height = shape.dim(1);
-        const int32_t width = shape.dim(2);
-        const int32_t copy_len = shape.dim(3);
-        for (auto i = 0; i < shape.dim(0); ++i)
-        {
-          for (auto j = 0; j < shape.dim(1); ++j)
-          {
-            for (auto k = 0; k < shape.dim(2); ++k)
-            {
-              if (copy)
-              {
-                ::onert::ir::Coordinates coords{i, j, k, 0};
-                memcpy(tensor.buffer() + tensor.calcOffset(coords),
-                       base + i * height * width * copy_len + j * width * copy_len + k * copy_len,
-                       copy_len * sizeof(T));
-              }
-              else
-              {
-                for (auto l = 0; l < shape.dim(3); ++l)
-                {
-                  const auto coords = ::onert::ir::convertCoordinates({i, j, k, l}, frontend_layout,
-                                                                      tensor.layout());
-                  T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset(coords));
-                  T value = *(base + i * height * width * copy_len + j * width * copy_len +
-                              k * copy_len + l);
-                  *into = value;
-                }
-              }
-            }
-          }
-        }
-        break;
-      }
-      default:
-        throw std::runtime_error{"Not yet supported"};
-    }
-  });
-}
-
-template <typename T>
-void copyInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj)
-{
-  Init<T>(model_obj, obj, true);
-}
-
-template <typename T>
-void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &obj,
-                 const onert::ir::Layout frontend_layout)
-{
-  const bool copy = frontend_layout == obj.layout();
-  Init<T>(model_obj, obj, copy, frontend_layout);
-}
-
-} // namespace
-
-namespace onert
-{
-namespace backend
-{
-
-class IConstantInitializer : public ir::OperationVisitor
-{
-public:
-  virtual ~IConstantInitializer() = default;
-
-public:
-  void run()
-  {
-    assert(tensor_registry());
-    for (const auto &it : _init_map)
-    {
-      const auto &ind = it.first;
-      const auto &fn = it.second;
-
-      const auto &model_obj = _operands.at(ind);
-      auto tensor_obj = tensor_registry()->getNativeITensor(ind);
-      assert(tensor_obj != nullptr);
-      fn(model_obj, *tensor_obj);
-      VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
-    }
-    _init_map.clear();
-  }
-
-public:
-  IConstantInitializer(const ir::Operands &operands)
-      : _operands{operands}, _current_op_seq_layout{ir::Layout::UNKNOWN}
-  {
-  }
-
-public:
-  using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
-
-  void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
-
-protected:
-  virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
-
-public:
-  virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
-  {
-    registerPermuteInitializer(index, obj); // as default
-  }
-
-public:
-  void registerCopyInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
-  void registerPermuteInitializer(const ir::OperandIndex &index, const ir::Operand &obj);
-
-public:
-  void registerCustomInitializer(const ir::OperandIndex &index, const ir::Operand &obj,
-                                 void (*customInit)(const onert::ir::Operand &model_obj,
-                                                    onert::backend::ITensor &obj))
-  {
-    // For only CONSTANTS
-    // TODO Add to check if tensor has been allocated
-    if (!obj.isConstant())
-      return;
-
-    using namespace std::placeholders;
-    _init_map[index] = std::bind(customInit, _1, _2);
-  }
-
-public:
-  bool exist(const ir::OperandIndex &ind) { return _init_map.find(ind) != _init_map.end(); }
-
-protected:
-  const ir::Operands &_operands;
-  std::unordered_map<ir::OperandIndex, Initializer> _init_map;
-  ir::Layout _current_op_seq_layout; // TODO Rename this to _current_layout
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/include/backend/IDynamicTensorManager.h b/runtime/onert/core/include/backend/IDynamicTensorManager.h
deleted file mode 100644
index 343c52c4a..000000000
--- a/runtime/onert/core/include/backend/IDynamicTensorManager.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
-
-#include "ITensorManager.h"
-
-#include <ir/Index.h>
-#include <ir/Operation.h>
-#include <ir/Shape.h>
-#include <backend/ITensor.h>
-
-namespace onert
-{
-namespace backend
-{
-
-/**
- * @brief Interface as an abstract tensor manager, providing ways to handle memory
- *        for dynamic tensors.
- */
-struct IDynamicTensorManager : public ITensorManager
-{
-  virtual ~IDynamicTensorManager() = default;
-
-public:
-  /**
-   * @brief Set new shape and allocate memory for dynamic tensor.
-   *        If a tensor is dynamic tensor and previously allocated memory exists,
-   *        it will be deallocated.
-   *        If a tensor is static tensor (with previously allocated memory by StaticTensorManager),
-   *        tensor->buffer() will be overwrite to the dynamically allocated memory
-   * @param ind             operand index of a tensor
-   * @param new_shape       tensor's new shape. While allocating memory for this new_shape,
-   *                        tensor's shape is set to new_shape
-   */
-  virtual void applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) = 0;
-
-  /**
-   * @brief Plan when to delete a tensor. Note this planning is done at compilation time.
-   * @param op_ind        operation index
-   * @param operand_ind   operand index of input operand of first param op. Operand can be static
-   *                      or dynamic since tensor type may not be clearly known at compilation time.
-   */
-  virtual void planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) = 0;
-
-  /**
-   * @brief Deallocate input tensors of op if an input tensor is a dynamic tensor and it won't
-   *        be used anymore
-   * @note  This will work after calling planDealloc
-   */
-  virtual void deallocInput(ir::OperationIndex op_ind) = 0;
-
-  /**
-   * @brief Deallocate an output tensor if the tensor is a dynamic tensor
-   * @note  This will work after calling planDealloc
-   */
-  virtual void deallocSubgraphOutput(ir::OperandIndex ind) = 0;
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IDYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/IExternalContext.h b/runtime/onert/core/include/backend/IExternalContext.h
deleted file mode 100644
index 88ffb502c..000000000
--- a/runtime/onert/core/include/backend/IExternalContext.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
-#define __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
-
-namespace onert
-{
-namespace backend
-{
-
-struct IExternalContext
-{
-  virtual ~IExternalContext() = default;
-  virtual void setMaxNumThreads(int) = 0;
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IEXTERNAL_CONTEXT__
diff --git a/runtime/onert/core/include/backend/IKernelGenerator.h b/runtime/onert/core/include/backend/IKernelGenerator.h
deleted file mode 100644
index afc34ec21..000000000
--- a/runtime/onert/core/include/backend/IKernelGenerator.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IKERNEL_GENERATOR_H__
-#define __ONERT_BACKEND_IKERNEL_GENERATOR_H__
-
-#include <assert.h>
-#include <memory>
-#include <functional>
-
-#include "ITensorBuilder.h"
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include <memory>
-#include "exec/FunctionSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-
-class IKernelGenerator : public ir::OperationVisitor
-{
-public:
-  virtual ~IKernelGenerator() = default;
-
-  std::unique_ptr<exec::IFunction> releaseFunction()
-  {
-    assert(_return_fn);
-    return std::move(_return_fn);
-  }
-
-  std::unique_ptr<exec::FunctionSequence> generate(const ir::OpSequence &op_seq)
-  {
-    op_seq.accept(*this);
-    return std::move(_return_fn_seq);
-  }
-
-protected:
-  using OperationVisitor::visit;
-
-  void visit(const ir::OpSequence &) override
-  {
-    throw std::runtime_error("KernelGenerator: NYI for operation 'OpSequence'");
-  }
-
-#define OP(InternalName)                                                                \
-  void visit(const ir::operation::InternalName &) override                              \
-  {                                                                                     \
-    throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
-  }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
-  std::unique_ptr<exec::IFunction> _return_fn;
-  std::unique_ptr<exec::FunctionSequence> _return_fn_seq; // TODO Extract this out
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IKERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/include/backend/IMemoryManager.h b/runtime/onert/core/include/backend/IMemoryManager.h
deleted file mode 100644
index bad2fd51a..000000000
--- a/runtime/onert/core/include/backend/IMemoryManager.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IMEMORY_MANAGER_H__
-#define __ONERT_BACKEND_IMEMORY_MANAGER_H__
-
-namespace onert
-{
-namespace backend
-{
-
-struct IMemoryManager
-{
-  virtual ~IMemoryManager() = default;
-
-  virtual void allocate(void) = 0;
-  virtual void deallocate(void) = 0;
-};
-
-} // namespace backend
-} // namespace onert
-
-#include <unordered_set>
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-
-using MemoryManagerSet = std::unordered_set<std::unique_ptr<backend::IMemoryManager>>;
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IMEMORY_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/IOptimizer.h b/runtime/onert/core/include/backend/IOptimizer.h
deleted file mode 100644
index 4844d21b9..000000000
--- a/runtime/onert/core/include/backend/IOptimizer.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_I_OPTIMIZER_H__
-#define __ONERT_BACKEND_I_OPTIMIZER_H__
-
-namespace onert
-{
-namespace ir
-{
-class LoweredGraph;
-}
-} // namespace onert
-
-namespace onert
-{
-namespace backend
-{
-
-/**
- * @brief Class for backend optimizations. This is an optional class so not all backends must have
- * it.
- *
- */
-struct IOptimizer
-{
-  virtual ~IOptimizer() = default;
-  /**
-   * @brief Run optimization
-   *
-   */
-  virtual void optimize() = 0;
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_I_OPTIMIZER_H__
diff --git a/runtime/onert/core/include/backend/IPortableTensor.h b/runtime/onert/core/include/backend/IPortableTensor.h
index a05b39a33..608ca4407 100644
--- a/runtime/onert/core/include/backend/IPortableTensor.h
+++ b/runtime/onert/core/include/backend/IPortableTensor.h
@@ -18,6 +18,8 @@
 #define __ONERT_BACKEND_I_PORTABLE_TENSOR_H__
 
 #include "backend/ITensor.h"
+#include "ir/OperandInfo.h"
+#include "ir/Sparsity.h"
 
 namespace onert
 {
@@ -36,14 +38,25 @@ namespace backend
 class IPortableTensor : public ITensor
 {
 public:
-  virtual ~IPortableTensor() = default;
-  virtual bool is_sparse() const { return false; }
-  virtual const uint16_t *w1_segments() const { return nullptr; }
-  virtual const uint16_t *w1_indices() const { return nullptr; }
+  IPortableTensor(const ir::OperandInfo &info) : _info(info) {}
+
+  virtual ~IPortableTensor();
+  virtual const ir::Sparsity *sparsity() const { return nullptr; }
+  const ir::OperandInfo &get_info() const { return _info; }
+  float data_scale() const override { return _info.typeInfo().scale(); }
+  int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
+  const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
+  const std::vector<int32_t> &data_zero_points() const override
+  {
+    return _info.typeInfo().zero_points();
+  }
 
 public:
   bool has_padding() const final { return false; }
   void access(const std::function<void(ITensor &tensor)> &fn) final { fn(*this); }
+
+protected:
+  ir::OperandInfo _info;
 };
 
 } // namespace backend
diff --git a/runtime/onert/core/include/backend/IStaticTensorManager.h b/runtime/onert/core/include/backend/IStaticTensorManager.h
deleted file mode 100644
index cef1f8a0a..000000000
--- a/runtime/onert/core/include/backend/IStaticTensorManager.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
-
-#include "ITensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-
-struct IStaticTensorManager : public ITensorManager
-{
-  virtual ~IStaticTensorManager() = default;
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ISTATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h
index 12b1c5433..560416264 100644
--- a/runtime/onert/core/include/backend/ITensor.h
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -20,6 +20,7 @@
 #include <cstring>
 #include <cstdint>
 #include <functional>
+#include <stdexcept>
 
 #include "ir/DataType.h"
 #include "ir/Layout.h"
@@ -32,34 +33,38 @@ namespace onert
 namespace backend
 {
 
-struct IDynamicTensorManager;
-
 class ITensor
 {
 public:
-  virtual ~ITensor() = default;
+  virtual ~ITensor();
 
 public:
   virtual uint8_t *buffer() const = 0;
   virtual size_t total_size() const = 0;
-  virtual size_t dimension(size_t index) const = 0;
-  virtual size_t num_dimensions() const = 0;
   virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
   virtual ir::Layout layout() const = 0;
   virtual ir::DataType data_type() const = 0;
   virtual float data_scale() const = 0;
-  virtual int32_t data_offset() const = 0;
+  virtual int32_t data_zero_point() const = 0;
+  virtual const std::vector<float> &data_scales() const = 0;
+  virtual const std::vector<int32_t> &data_zero_points() const = 0;
   virtual bool has_padding() const = 0;
   virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0;
 
   /**
-   * @brief Return the dynamic tensor manager
+   * @brief Set the shape to @c shape and possibly re-allocate the buffer
    *
-   * If dynamic tensors are not supported, it returns @c nullptr .
+   * If a tensor is dynamic tensor and previously allocated memory exists,
+   * it will be deallocated.
+   * If a tensor is static tensor (with previously allocated memory by StaticTensorManager),
+   * @c buffer() will be overwriten
    *
-   * @return IDynamicTensorManager* DynamicTensorManager
+   * @param shape tensor's new shape. While allocating memory for this new_shape,
+   *              tensor's shape is set to new_shape
+   * @return true If applying shape is successful
+   * @return false If not applying shape is not supported (it throws for other errors)
    */
-  virtual IDynamicTensorManager *dynamic_tensor_manager() { return nullptr; }
+  virtual bool applyShape(const ir::Shape &) { return false; }
 
   /**
    * @brief Return true if the tensor is constant
@@ -82,6 +87,12 @@ public:
     throw std::runtime_error("This backend does not support dynamic tensor");
   }
 
+  /// @brief Dealloc the buffer (only for dynamic tensors)
+  virtual void deallocBuffer()
+  {
+    throw std::runtime_error("This backend does not support resetting buffer");
+  }
+
   /**
    * @brief Set the shape of tenser to new_shape
    * @note  Higer dimension will be placed on front.
@@ -96,7 +107,18 @@ public:
    * @brief Get ir::Shape of tensor
    * @note  Higer dimension will be placed on front.
    */
-  virtual ir::Shape getShape() const;
+  virtual ir::Shape getShape() const = 0;
+
+  virtual bool is_subtensor() const { return false; }
+  virtual bool needMemoryMap() const { return false; }
+  virtual void enqueueWriteBuffer(const void *, bool)
+  {
+    throw std::runtime_error("This backend does not support enqueueWriteBuffer");
+  }
+  virtual void enqueueReadBuffer(void *, bool)
+  {
+    throw std::runtime_error("This backend does not support enqueueReadBuffer");
+  }
 };
 
 } // namespace backend
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h
deleted file mode 100644
index f93ab81ae..000000000
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_BUILDER_H__
-#define __ONERT_BACKEND_ITENSOR_BUILDER_H__
-
-#include <map>
-
-#include "ir/Index.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operation.h"
-#include "ir/Layout.h"
-#include "ITensor.h"
-#include "ITensorManager.h"
-#include "ITensorRegistry.h"
-#include "IDynamicTensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-
-struct ITensorBuilder
-{
-  using IterateFunction = std::function<void(const ir::OperandIndex &)>;
-
-  virtual ~ITensorBuilder(void) = default;
-
-  /**
-   * @brief Register tensor information to allocate on backend
-   *
-   * @param ind Index
-   * @param info Info
-   * @param backend_layout Backend layout
-   * @param as_const Whether this tensor is constant
-   */
-  virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                  ir::Layout backend_layout) = 0;
-
-  /**
-   * @brief Check if the tensor has been registered with @c registerTensorInfo
-   *
-   * @return true If the tensor has been registered
-   * @return false Otherwise
-   */
-  virtual bool isRegistered(const ir::OperandIndex &) const = 0;
-
-public: // methods for static tensor allocation
-  /**
-   * @brief Let the tensor builder know first use(start of lifetime) of a tensor
-   *        Must be called before calling @c prepare
-   *        Must be run up to once for each tensor before calling @c notifyLastUse
-   *        NOTE: Useful only for static models
-   */
-  virtual void notifyFirstUse(const ir::OperandIndex &) = 0;
-  /**
-   * @brief Let the tensor builder know last use(end of lifetime) of a tensor
-   *        Must be run up to once for each tensor after calling @c notifyFirstUse
-   *        NOTE: Useful only for static models
-   */
-  virtual void notifyLastUse(const ir::OperandIndex &) = 0;
-  /**
-   * @brief Prepare the tensors
-   *        Before calling this, all the tensors must be registered
-   */
-  virtual void prepare(void) = 0;
-  /**
-   * @brief Allocate the tensors
-   *        Before calling this, @c prepare must be called
-   */
-  virtual void allocate() = 0;
-  /**
-   * @brief Some actions after functions' @c IFunction::prepare method.
-   *        This is called right after each function's @c IFunction::prepare function has been
-   *        called.
-   */
-  virtual void postFunctionPrepare() = 0;
-
-  /**
-   * @brief Release static @c ITensorManger object which was built
-   *        Before calling this, @c allocate must have been called
-   *
-   * @return std::unique_ptr<ITensorManager> Tensor Manager object
-   */
-  virtual std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) = 0;
-
-public: // methods for dynamic tensor allocation
-  /**
-   * @brief Get dynamicTensorManager. If a backend does not support dynamic tensor, exception
-   *        will be thrown.
-   *
-   * @return pointer of IDynamicTensorManager object
-   *
-   * @note   Since it is a pointer, its life time is from the cration of TensorBuilder
-   *         to the end of execution
-   */
-  virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
-
-  /**
-   * @brief Release dynamic @c ITensorManger object which was built
-   *        Before calling this, @c allocate must have been called
-   *
-   * @return std::unique_ptr<ITensorManager> Tensor Manager object
-   */
-  virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) { return nullptr; }
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_BUILDER_H__
diff --git a/runtime/onert/core/include/backend/ITensorManager.h b/runtime/onert/core/include/backend/ITensorManager.h
deleted file mode 100644
index 4974b6645..000000000
--- a/runtime/onert/core/include/backend/ITensorManager.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ITENSOR_MANAGER_H__
-
-namespace onert
-{
-namespace backend
-{
-
-// NOTE This name ITensorManager has been discussed whether or not the name is proper.
-// Anyone can argue with any better name.
-/**
- * @brief Interface as an abstract tensor manager which has MemoryManager
- *        This is used as a base class for IStaticTensorManager and IDynamicTensorManager
- */
-struct ITensorManager
-{
-  virtual ~ITensorManager() = default;
-};
-
-} // namespace backend
-} // namespace onert
-
-#include <unordered_set>
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-
-using TensorManagerSet = std::unordered_set<std::unique_ptr<backend::ITensorManager>>;
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/ITensorRegister.h b/runtime/onert/core/include/backend/ITensorRegister.h
deleted file mode 100644
index b8e521ce3..000000000
--- a/runtime/onert/core/include/backend/ITensorRegister.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_REGISTER_H__
-#define __ONERT_BACKEND_ITENSOR_REGISTER_H__
-
-#include "ir/LowerInfoMap.h"
-#include "ITensorBuilder.h"
-#include "ir/Layout.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace backend
-{
-
-class ITensorRegister : public ir::OperationVisitor
-{
-public:
-  virtual ~ITensorRegister() = default;
-
-public:
-  void registerTensors(const ir::OpSequence &op_seq, const ir::LowerInfoMap *lower_info_map)
-  {
-    _current_op_seq_layout = op_seq.getLayout();
-    _lower_info_map = lower_info_map;
-    assert(_lower_info_map != nullptr);
-    assert(tensor_builder().get() != nullptr);
-    op_seq.accept(*this);
-  }
-
-protected:
-  virtual const ir::Operands &operands() const = 0;
-  virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
-
-protected:
-#define OP(InternalName)                                                                   \
-  void visit(const ir::operation::InternalName &node) override                             \
-  {                                                                                        \
-    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) \
-    {                                                                                      \
-      defaultRegisterTensorInfo(ind);                                                      \
-    }                                                                                      \
-  }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
-  void defaultRegisterTensorInfo(const ir::OperandIndex &index) const
-  {
-    if (tensor_builder()->isRegistered(index))
-    {
-      return;
-    }
-
-    const auto &obj = operands().at(index);
-    const auto frontend_layout = frontendLayout();
-    const auto backend_layout = backendLayout(index);
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder()->registerTensorInfo(index, backend_info, backend_layout);
-  }
-
-protected:
-  ir::Layout frontendLayout() const { return _current_op_seq_layout; }
-  ir::Layout backendLayout(const ir::OperandIndex &index) const
-  {
-    assert(_lower_info_map != nullptr);
-    const auto lower_info = _lower_info_map->operand.at(index).get();
-    return lower_info->def_factors().getOnlyElement().layout();
-  }
-
-private:
-  ir::Layout _current_op_seq_layout;
-  const ir::LowerInfoMap *_lower_info_map{nullptr};
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_REGISTER_H__
diff --git a/runtime/onert/core/include/backend/ITensorRegistry.h b/runtime/onert/core/include/backend/ITensorRegistry.h
index 88fcb0fcd..b256a1fb8 100644
--- a/runtime/onert/core/include/backend/ITensorRegistry.h
+++ b/runtime/onert/core/include/backend/ITensorRegistry.h
@@ -43,7 +43,7 @@ struct ITensorRegistry
    *
    * @note  Return tensor cannot be used longer than dynamic tensor manager
    */
-  virtual std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &) = 0;
+  virtual ITensor *getITensor(const ir::OperandIndex &) = 0;
   /**
    * @brief Returns pointer of ITensor among native tensors
    *
@@ -51,17 +51,14 @@ struct ITensorRegistry
    *
    * @note  Returned tensor cannot be used longer than dynamic tensor manager
    */
-  virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
+  virtual ITensor *getNativeITensor(const ir::OperandIndex &) = 0;
   /**
    * @brief Set the Migrant Tensor which are from other backends
    *
    * @return true if supported
    * @return false if not supported
    */
-  virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
-  {
-    return false;
-  }
+  virtual bool setMigrantTensor(const ir::OperandIndex &, IPortableTensor *) { return false; }
 };
 
 } // namespace backend
@@ -85,41 +82,37 @@ namespace backend
 template <typename T_Tensor> class PortableTensorRegistryTemplate : public ITensorRegistry
 {
 public:
-  std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+  ITensor *getITensor(const ir::OperandIndex &ind) override
   {
     static_assert(std::is_base_of<ITensor, T_Tensor>::value, "T_Tensor must derive from ITensor.");
-    auto external_tensor = _migrant.find(ind);
-    if (external_tensor != _migrant.end())
-      return external_tensor->second;
+    auto _migrant_tensor = _migrant.find(ind);
+    if (_migrant_tensor != _migrant.end())
+      return _migrant_tensor->second;
     return getNativeTensor(ind);
   }
 
-  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
-  {
-    return getNativeTensor(ind);
-  }
+  ITensor *getNativeITensor(const ir::OperandIndex &ind) override { return getNativeTensor(ind); }
 
-  std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
+  IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
   {
-    auto external_tensor = _migrant.find(ind);
-    if (external_tensor != _migrant.end())
+    auto _migrant_tensor = _migrant.find(ind);
+    if (_migrant_tensor != _migrant.end())
     {
-      if (external_tensor->second)
-        return external_tensor->second;
+      if (_migrant_tensor->second)
+        return _migrant_tensor->second;
     }
     return getNativeTensor(ind);
   }
 
-  std::shared_ptr<T_Tensor> getNativeTensor(const ir::OperandIndex &ind)
+  T_Tensor *getNativeTensor(const ir::OperandIndex &ind)
   {
     auto tensor = _native.find(ind);
     if (tensor != _native.end())
-      return tensor->second;
+      return tensor->second.get();
     return nullptr;
   }
 
-  bool setMigrantTensor(const ir::OperandIndex &ind,
-                        const std::shared_ptr<IPortableTensor> &tensor) override
+  bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
   {
     assert(tensor != nullptr);
     auto itr = _native.find(ind);
@@ -129,25 +122,22 @@ public:
     return true;
   }
 
-  void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
+  void setNativeTensor(const ir::OperandIndex &ind, std::unique_ptr<T_Tensor> &&tensor)
   {
     assert(tensor != nullptr);
     auto itr = _migrant.find(ind);
     if (itr != _migrant.end())
       throw std::runtime_error{"Tried to set a native tensor but a migrant tensor already exists."};
-    _native[ind] = tensor;
+    _native[ind] = std::move(tensor);
   }
 
-  const ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &native_tensors() { return _native; }
+  const ir::OperandIndexMap<std::unique_ptr<T_Tensor>> &native_tensors() { return _native; }
 
-  const ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> &migrant_tensors()
-  {
-    return _migrant;
-  }
+  const ir::OperandIndexMap<IPortableTensor *> &migrant_tensors() { return _migrant; }
 
 private:
-  ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> _migrant;
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _native;
+  ir::OperandIndexMap<IPortableTensor *> _migrant;
+  ir::OperandIndexMap<std::unique_ptr<T_Tensor>> _native;
 };
 
 } // namespace backend
diff --git a/runtime/onert/core/include/backend/basic/Allocator.h b/runtime/onert/core/include/backend/basic/Allocator.h
new file mode 100644
index 000000000..ff609322a
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/Allocator.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        Allocator.h
+ * @brief       This file contains Allocator related classes
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_ALLOCATOR_H__
+#define __ONERT_BACKEND_BASIC_ALLOCATOR_H__
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Class to allocate memory
+ */
+class Allocator
+{
+public:
+  Allocator(uint32_t capacity);
+  /**
+   * @brief Get memory base pointer
+   * @return base pointer
+   */
+  uint8_t *base() const { return _base.get(); }
+  void release() { _base.reset(); }
+
+private:
+  std::unique_ptr<uint8_t[]> _base;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_ALLOCATOR_H__
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
new file mode 100644
index 000000000..7588d42f0
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
+#define __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
+
+#include <vector>
+
+#include "ir/Index.h"
+#include "compiler/GraphLowerInfo.h"
+#include "util/logging.h"
+#include "backend/ITensorRegistry.h"
+#include "backend/BackendContext.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// TODO Remove the template param BackendContext once unification of cpu backend context is done
+template <typename T_BackendContext> void planTensors(const T_BackendContext &ctx)
+{
+  const ir::Graph &graph = *ctx.graph();
+  const auto &order = ctx.data().op_order;
+  auto tensor_builder = ctx.tensor_builder;
+
+  ir::OperandIndexMap<uint32_t> uses_map;
+  ir::OperandIndexMap<uint32_t> def_map;
+  ir::OperandIndexSequence constants;
+
+  auto model_io =
+    (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+  // Prepare scanning
+  graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (ctx.external_operands().contains(ind))
+      return;
+
+    // TODO Check if we need to handle unused tensors
+
+    uses_map[ind] = obj.getUses().size();
+    def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+    if (obj.isConstant())
+      constants.append(ind);
+
+    if (!tensor_builder->isRegistered(ind))
+    {
+      // These tensors do not exist in any  (No use and def)
+      const auto &info = obj.info();
+      // NOTE Currently we only support NHWC tensors for cpu-common tensors.
+      //      There is no way to get the layout info from the backend context for now.
+      //      When we support NCHW tensors as well, we also need to change tensor info to be
+      //      permuted shape.
+      assert(ctx.operand_layouts().at(ind) == ir::Layout::NHWC);
+      tensor_builder->registerTensorInfo(ind, info, ir::Layout::NHWC);
+    }
+  });
+
+  // Start scanning to do notify{First|Last}Use for each tensor
+
+  // If a tensor is a constant, increase the use of the tensor and allocate it first.
+  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+  // deallocated last.
+  for (const auto &ind : constants)
+  {
+    uses_map[ind]++;
+    tensor_builder->notifyFirstUse(ind);
+  }
+
+  for (const auto &pair : def_map)
+  {
+    const auto &ind = pair.first;
+    const auto def_count = pair.second;
+    if (def_count == 0)
+      tensor_builder->notifyFirstUse(ind);
+  }
+
+  // This is a workaround to keep the operands over the execution
+  // (the operands look like they are unused)
+  std::vector<ir::OperandIndex> operands_last_until_end;
+  for (const auto &pair : uses_map)
+  {
+    const auto &ind = pair.first;
+    const auto use_count = pair.second;
+    if (use_count == 0)
+      operands_last_until_end.push_back(ind);
+  }
+
+  // At each operation,
+  // 1. Scan DEF of outputs. If the DEF, allocate it
+  // 2. Scan DEF of inputs. If variable tensor, allocate it
+  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+  for (const auto &op_ind : order)
+  {
+    const auto &op = graph.operations().at(op_ind);
+    auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+    auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+    // Define outputs
+    for (const auto &ind : op_outputs)
+    {
+      if (ctx.external_operands().contains(ind))
+        continue;
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      assert(def_map.find(ind) != def_map.end());
+      if (def_map[ind])
+      {
+        def_map[ind] = 0;
+        tensor_builder->notifyFirstUse(ind);
+      }
+    }
+
+    // Scan variable tensors
+    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+    // non-constant because of less memory usage by memory planning in here
+    for (const auto &ind : op_inputs)
+    {
+      if (ctx.external_operands().contains(ind))
+        continue;
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      const auto &operand = graph.operands().at(ind);
+      if (operand.info().isVariable())
+      {
+        // The variable tensor with buffer is not supported yet
+        assert(operand.data() == nullptr);
+        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+        assert(uses_map[ind] == 1 && def_map[ind] == 0);
+        tensor_builder->notifyFirstUse(ind);
+      }
+    }
+
+    for (const auto &ind : op_inputs)
+    {
+      if (ctx.external_operands().contains(ind))
+        continue;
+      if (!tensor_builder->isRegistered(ind))
+        continue;
+      assert(uses_map.find(ind) != uses_map.end());
+      assert(uses_map[ind] > 0);
+      uses_map[ind]--;
+      if (uses_map[ind] == 0)
+      {
+        // plan for deallocation of static tensornode
+        tensor_builder->notifyLastUse(ind);
+      }
+    }
+  }
+
+  for (const auto &ind : operands_last_until_end)
+  {
+    tensor_builder->notifyLastUse(ind);
+  }
+
+  // Dispose and validate
+  for (const auto &ind : constants)
+  {
+    --uses_map[ind];
+    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  }
+
+  assert(
+    std::all_of(uses_map.begin(), uses_map.end(),
+                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+  assert(
+    std::all_of(def_map.begin(), def_map.end(),
+                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContext &ctx)
+{
+  const ir::Graph &graph = *ctx.graph();
+  auto tensor_builder = ctx.tensor_builder;
+
+  auto model_io =
+    (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (ctx.external_operands().contains(ind))
+      return;
+    // NOTE Assuming there is no layout changes (Always assume NHWC or UNKNOWN)
+    assert(graph.layout() != ir::Layout::NCHW);
+    ir::OperandInfo backend_info{obj.shape(), obj.typeInfo(), obj.info().memAllocType(),
+                                 obj.isConstant()};
+    tensor_builder->registerTensorInfo(ind, backend_info, ir::Layout::NHWC);
+  });
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    basic::planTensors(ctx);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    });
+  }
+
+  tensor_builder->allocate();
+
+  return ctx.tensor_registry.get();
+}
+
+inline void initConsts(const ir::Operands &operands,
+                       const util::Set<ir::OperandIndex> &external_operands,
+                       ITensorRegistry *tensor_registry)
+{
+  operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+    if (external_operands.contains(ind) || !operand.isConstant())
+      return;
+
+    auto tensor = tensor_registry->getNativeITensor(ind);
+    assert(tensor != nullptr);
+
+    VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
+
+    auto data = operand.shareData();
+    assert(data && data->base());
+    ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
+
+    if (ext_tensor == nullptr)
+      throw std::runtime_error{"This tensor is not external tensor"};
+
+    ext_tensor->setData(data);
+  });
+}
+
+inline void initConsts(BackendContext &ctx)
+{
+  initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_BACKEND_CONTEXT_HELPERS_H__
diff --git a/runtime/onert/core/include/backend/basic/DynamicTensorManager.h b/runtime/onert/core/include/backend/basic/DynamicTensorManager.h
new file mode 100644
index 000000000..0535dd5e1
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/DynamicTensorManager.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
+
+#include "MemoryManager.h"
+#include "TensorRegistry.h"
+
+#include <ir/OperandInfo.h>
+#include <ir/IOperation.h>
+#include <ir/Index.h>
+
+#include <unordered_set>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// TODO Find optimized algorithm to manage memory.
+
+/**
+ * @brief Class to manage dynamic tensor and its memory
+ */
+class DynamicTensorManager
+{
+public:
+  DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg);
+
+  virtual ~DynamicTensorManager() = default;
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+                   ir::Layout backend_layout);
+
+  std::shared_ptr<DynamicMemoryManager> dynamic_mem_mgr() { return _dynamic_mem_mgr; }
+
+private:
+  const ITensor *getRawITensor(ir::OperandIndex ind);
+
+private:
+  /**
+   * @brief Memory manager for dynamic tensor.
+   * @todo  DynamicMemoryManager is not optimized. Optimized one is needed
+   */
+  std::shared_ptr<DynamicMemoryManager> _dynamic_mem_mgr;
+  const std::shared_ptr<TensorRegistry> _tensors;
+
+  // contains list of dynamic tensor index, which can be deallocated after running operation
+  // note: this map could contain static tensor index too. Careful use is required.
+  std::unordered_map<ir::OperationIndex, std::unordered_set<backend::ITensor *>>
+    _dealloc_tensor_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/basic/IMemoryPlanner.h b/runtime/onert/core/include/backend/basic/IMemoryPlanner.h
new file mode 100644
index 000000000..5ca2d953f
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/IMemoryPlanner.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_IMEMORY_PLANNER_H__
+#define __ONERT_BACKEND_IMEMORY_PLANNER_H__
+
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Structure to have memory offset and size
+ */
+struct Block
+{
+  uint32_t offset;
+  size_t size;
+};
+
+/**
+ * @brief Interface to plan memory
+ */
+struct IMemoryPlanner
+{
+  using MemoryPlans = ir::OperandIndexMap<Block>;
+
+  /**
+   * @brief Claim memory for operand
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  virtual void claim(const ir::OperandIndex &, size_t) = 0;
+  /**
+   * @brief Release memory for operand
+   * @param[in] index The operand index
+   */
+  virtual void release(const ir::OperandIndex &) = 0;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  virtual uint32_t capacity() = 0;
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  virtual MemoryPlans &memory_plans() = 0;
+
+  virtual ~IMemoryPlanner() = default;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_IMEMORY_PLANNER_H__
diff --git a/runtime/onert/core/include/backend/basic/KernelGeneratorBase.h b/runtime/onert/core/include/backend/basic/KernelGeneratorBase.h
new file mode 100644
index 000000000..6e123e81d
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/KernelGeneratorBase.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
+#define __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
+
+#include <assert.h>
+#include <memory>
+#include <functional>
+
+#include "ir/Graph.h"
+#include "ir/OperationVisitor.h"
+#include "exec/FunctionSequence.h"
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class KernelGeneratorBase : public ir::OperationVisitor
+{
+public:
+  virtual ~KernelGeneratorBase() = default;
+  KernelGeneratorBase(const ir::Graph &graph) : _graph{graph} {}
+
+  virtual std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) = 0;
+
+protected:
+  using OperationVisitor::visit;
+
+#define OP(InternalName)                                                                \
+  void visit(const ir::operation::InternalName &) override                              \
+  {                                                                                     \
+    throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
+  }
+#include "ir/Operations.lst"
+#undef OP
+
+protected:
+  std::unique_ptr<exec::IFunction> releaseFunction()
+  {
+    assert(_return_fn);
+    return std::move(_return_fn);
+  }
+
+protected:
+  const ir::Graph &_graph;
+  std::unique_ptr<exec::IFunction> _return_fn;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_KERNEL_GENERATOR_BASE_H__
diff --git a/runtime/onert/core/include/backend/basic/MemoryManager.h b/runtime/onert/core/include/backend/basic/MemoryManager.h
new file mode 100644
index 000000000..62618359a
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/MemoryManager.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
+
+#include "Allocator.h"
+#include "IMemoryPlanner.h"
+
+namespace onert
+{
+namespace backend
+{
+
+class ITensor;
+
+namespace basic
+{
+
+class MemoryManager
+{
+public:
+  MemoryManager();
+  MemoryManager(const std::string);
+  virtual ~MemoryManager() = default;
+
+  void allocate(void);
+  uint8_t *getBuffer(const ir::OperandIndex &ind) const;
+  void deallocate(void) { _mem_alloc->release(); }
+
+  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+  void releasePlan(const ir::OperandIndex &ind);
+
+private:
+  IMemoryPlanner *createMemoryPlanner();
+  IMemoryPlanner *createMemoryPlanner(const std::string);
+
+private:
+  ir::OperandIndexMap<Block> _tensor_mem_map;
+  std::shared_ptr<IMemoryPlanner> _mem_planner;
+  std::shared_ptr<Allocator> _mem_alloc;
+};
+
+class DynamicMemoryManager
+{
+public:
+  DynamicMemoryManager() = default;
+  virtual ~DynamicMemoryManager() = default;
+
+  std::shared_ptr<Allocator> allocate(const ITensor *tensor, uint32_t capacity);
+  void deallocate(const ITensor *tensor);
+  void deallocate(void);
+
+private:
+  std::unordered_map<const ITensor *, std::shared_ptr<Allocator>> _mem_alloc_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
new file mode 100644
index 000000000..6088306ec
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
+
+#include "backend/basic/DynamicTensorManager.h"
+#include "backend/basic/MemoryManager.h"
+#include "backend/basic/TensorRegistry.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+#include "TensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class DynamicTensorManager;
+
+class StaticTensorManager
+{
+public:
+  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                      DynamicTensorManager *dynamic_tensor_manager);
+  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
+                      DynamicTensorManager *dynamic_tensor_manager);
+  virtual ~StaticTensorManager() = default;
+
+  void allocateNonconsts(void);
+  void deallocateNonconsts(void);
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+                   ir::Layout backend_layout, bool as_const);
+
+  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+  void releasePlan(const ir::OperandIndex &ind);
+
+  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+private:
+  std::unique_ptr<MemoryManager> _nonconst_mgr;
+  const std::shared_ptr<TensorRegistry> _tensors;
+  ir::OperandIndexMap<bool> _as_constants;
+  DynamicTensorManager *_dynamic_tensor_manager;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/basic/Tensor.h b/runtime/onert/core/include/backend/basic/Tensor.h
new file mode 100644
index 000000000..da5103885
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/Tensor.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_H__
+#define __ONERT_BACKEND_BASIC_TENSOR_H__
+
+#include "Allocator.h"
+
+#include <backend/IPortableTensor.h>
+#include <ir/OperandInfo.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class DynamicMemoryManager;
+
+class Tensor : public IPortableTensor
+{
+public:
+  Tensor() = delete;
+  virtual ~Tensor();
+
+public:
+  Tensor(const ir::OperandInfo &info, const ir::Layout layout,
+         DynamicMemoryManager *dynamic_mem_mgr)
+    : IPortableTensor(info), _layout(layout), _buffer(nullptr), _num_references(0),
+      _dynamic_mem_mgr(dynamic_mem_mgr), _allocator(nullptr)
+  {
+    // DO NOTHING
+  }
+
+public:
+  // Only one of two method 'setBuffer' must be called once
+
+  /**
+   * @brief Set the Buffer object. This method is called for static and non-const tensor
+   */
+  void setBuffer(uint8_t *buffer) { _buffer = buffer; }
+
+  /**
+   * @brief Set the Buffer object. This method is called for dynamic or const tensor
+   */
+  void setBuffer(const std::shared_ptr<Allocator> &alloc)
+  {
+    _allocator = alloc;
+    _buffer = alloc->base();
+  }
+
+  /**
+   * @brief Reset the buffer and deallocate the allocation if it is managed by itself
+   */
+  void deallocBuffer() override;
+
+public:
+  uint8_t *buffer() const override { return _buffer; }
+  /**
+   * @brief Get dimension by index
+   *
+   * @param index Index to get diemension
+   * @return size_t Dimension at index
+   * @note N : dimension(0)
+   *       H : dimension(1)
+   *       W : dimension(2)
+   *       C : dimension(3)
+   */
+  size_t total_size() const override { return _info.total_size(); }
+  size_t calcOffset(const ir::Coordinates &coords) const override;
+  ir::Layout layout() const override { return _layout; }
+  ir::DataType data_type() const override { return _info.typeInfo().type(); }
+  bool is_constant() const override { return _info.isConstant(); }
+  bool is_dynamic() const override { return _info.isDynamic(); }
+  void set_dynamic() override { _info.setDynamic(); }
+  bool applyShape(const ir::Shape &new_shape) override;
+  const ir::Sparsity *sparsity() const override { return _info.typeInfo().sparsity(); }
+
+  virtual void increase_ref()
+  {
+    assert(is_dynamic() ||
+           // when not dynamic
+           (_buffer != nullptr));
+
+    ++_num_references;
+  }
+
+  virtual void decrease_ref()
+  {
+    assert(_buffer != nullptr || _allocator != nullptr);
+    assert(_num_references > 0);
+    --_num_references;
+    // constant tensor and dynamic tensor has _allocator
+    if (_num_references == 0)
+    {
+      if (_buffer != nullptr)
+        _buffer = nullptr;
+      if (_allocator != nullptr)
+      {
+        _allocator->release();
+        _allocator = nullptr;
+      }
+    }
+  }
+
+  /**
+   * @brief Reset reference count to zero and release data
+   */
+  virtual void reset_ref()
+  {
+    assert(_buffer != nullptr || _allocator != nullptr);
+    assert(_num_references > 0);
+    _num_references = 0;
+
+    // Only constant tensor has allocator pointer
+    if (_buffer != nullptr)
+      _buffer = nullptr;
+    else
+    {
+      _allocator->release();
+      _allocator = nullptr;
+    }
+  }
+
+  virtual int32_t num_references() { return _num_references; }
+
+  void setShape(const ir::Shape &new_shape) override;
+  ir::Shape getShape() const override;
+
+protected:
+  ir::Layout _layout;
+  uint8_t *_buffer;
+  int32_t _num_references;
+  DynamicMemoryManager *_dynamic_mem_mgr;
+
+private:
+  /**
+   * @brief Memory allocator for dynamic tensor and const tensor
+   *        Since maintaing _allocator and also _buffer makes confusion,
+   *        we will mainly use _buffer (not _allocator.base()) for memory pointer in this code.
+   *        _allocator(shared_ptr) is used to guarantee that we have valid _buffer.
+   */
+  std::shared_ptr<Allocator> _allocator;
+};
+
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ *        instead of allocating and copying the data. ExternalTensor's data pointer points to
+ *        an address of memory such as where memory is already allocated, or mmapped area.
+ *        This is meaning that ExternalTensor can take all of types' ir::Data.
+ *        To support this, assume below things no padding, always NHWC layout,
+ *        constant tensor and not dynamic.
+ */
+class ExternalTensor : public Tensor
+{
+public:
+  ExternalTensor() = delete;
+  virtual ~ExternalTensor();
+
+public:
+  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+    : Tensor(info, layout, nullptr)
+  {
+    assert(_layout == ir::Layout::NHWC);
+    assert(_info.isConstant());
+    assert(_info.isDynamic() == false);
+  }
+
+public:
+  /**
+   * @brief     set Data to be shared from external so that this ExternalTensor will not be
+   *            allocated on CPU backend
+   * @param[in] data    data of Operand to be set
+   */
+  void setData(const std::shared_ptr<ir::Data> data)
+  {
+    assert(data != nullptr);
+    _data = data;
+    // Note. Some op such as cker::Conv could take buffer as nullptr.
+    // That's why _buffer also would be used
+    _buffer = const_cast<uint8_t *>(_data->base());
+  }
+
+public:
+  uint8_t *buffer() const override { return _buffer; }
+
+  bool is_constant() const override { return true; }
+  bool is_dynamic() const override { return false; }
+  void set_dynamic() override
+  {
+    throw std::runtime_error("This tensor does not support changing dynamic");
+  }
+
+  void setShape(const ir::Shape &) override
+  {
+    throw std::runtime_error("This tensor does not support changing shape");
+  }
+
+  void increase_ref() override { ++_num_references; }
+
+  void decrease_ref() override
+  {
+    assert(_data != nullptr);
+    assert(_num_references > 0);
+    --_num_references;
+    if (_num_references == 0)
+    {
+      _data.reset();
+      _buffer = nullptr;
+    }
+  }
+
+  /**
+   * @brief Reset reference count to zero and release data
+   */
+  void reset_ref() override
+  {
+    assert(_data != nullptr);
+    assert(_num_references > 0);
+    _num_references = 0;
+
+    _data.reset();
+    _buffer = nullptr;
+  }
+
+  int32_t num_references() override { return _num_references; }
+
+private:
+  std::shared_ptr<const ir::Data> _data;
+};
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_H__
diff --git a/runtime/onert/core/include/backend/basic/TensorBuilder.h b/runtime/onert/core/include/backend/basic/TensorBuilder.h
new file mode 100644
index 000000000..8ea114912
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/TensorBuilder.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
+
+#include <backend/basic/DynamicTensorManager.h>
+#include <backend/basic/TensorRegistry.h>
+#include <backend/basic/StaticTensorManager.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
+
+  /**
+   * @brief     Register tensor information to allocate on CPU backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void allocate(void);
+
+  DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+  const std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_BUILDER_H__
diff --git a/runtime/onert/core/include/backend/basic/TensorRegistry.h b/runtime/onert/core/include/backend/basic/TensorRegistry.h
new file mode 100644
index 000000000..bfff45e37
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/TensorRegistry.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
+#define __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
+
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+using TensorRegistry = PortableTensorRegistryTemplate<basic::Tensor>;
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TENSOR_REGISTRY__
diff --git a/runtime/onert/core/include/backend/basic/train/TrainableBackendContextHelpers.h b/runtime/onert/core/include/backend/basic/train/TrainableBackendContextHelpers.h
new file mode 100644
index 000000000..e1d3b034a
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/train/TrainableBackendContextHelpers.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TRAIN_TRAINABLE_BACKEND_CONTEXT_HELPERS_H__
+#define __ONERT_BACKEND_BASIC_TRAIN_TRAINABLE_BACKEND_CONTEXT_HELPERS_H__
+
+#include "backend/basic/BackendContextHelpers.h"
+#include "backend/train/TrainableBackendContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+namespace train
+{
+
+// TODO Unify with the above `getTensors()` function in `BackendContextHelpers.h`
+template <typename TensorBuilder>
+ITensorRegistry *genTensors(backend::train::TrainableBackendContext &ctx,
+                            const std::shared_ptr<TensorBuilder> &tensor_builder)
+{
+  const auto &tgraph = *ctx.trainable_graph();
+
+  auto model_io =
+    (tgraph.getInputs() + tgraph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  tgraph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+    if (ctx.external_operands().contains(ind))
+      return;
+    // NOTE Assuming there is no layout changes (Always assume NHWC or UNKNOWN)
+    assert(tgraph.layout() != ir::Layout::NCHW);
+    ir::OperandInfo backend_info{obj.shape(), obj.typeInfo(), obj.info().memAllocType(),
+                                 obj.isConstant()};
+    tensor_builder->registerTensorInfo(ind, backend_info, ir::Layout::NHWC);
+  });
+
+  // For the executors that does not have fixed linear execution order:
+  // To make tensors never be deallocated, this is a workaround to use static memory planner
+  tgraph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+    if (tensor_builder->isRegistered(ind))
+      tensor_builder->notifyFirstUse(ind);
+  });
+
+  tensor_builder->allocate();
+
+  return ctx.tensor_registry().get();
+}
+
+} // namespace train
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TRAIN_TRAINABLE_BACKEND_CONTEXT_HELPERS_H__
diff --git a/runtime/onert/core/include/backend/basic/train/TrainableTensor.h b/runtime/onert/core/include/backend/basic/train/TrainableTensor.h
new file mode 100644
index 000000000..e985f2930
--- /dev/null
+++ b/runtime/onert/core/include/backend/basic/train/TrainableTensor.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_TRAIN_TRAINABLE_TENSOR_H__
+#define __ONERT_BACKEND_BASIC_TRAIN_TRAINABLE_TENSOR_H__
+
+#include "backend/train/ITrainableTensor.h"
+
+#include "backend/basic/Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+namespace train
+{
+
+class TrainableTensor : public backend::train::ITrainableTensor
+{
+public:
+  TrainableTensor() = delete;
+  virtual ~TrainableTensor() = default;
+
+public:
+  TrainableTensor(const ir::OperandInfo &info, const ir::Layout layout)
+    : ITrainableTensor{info}, _tensor{info, layout, nullptr}, _opt_vars{}
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Set the Buffer object. This method is called for static and non-const tensor
+   */
+  void setBuffer(uint8_t *buffer) { _tensor.setBuffer(buffer); }
+
+public:
+  uint8_t *buffer() const override { return _tensor.buffer(); }
+  /**
+   * @brief Get dimension by index
+   *
+   * @param index Index to get diemension
+   * @return size_t Dimension at index
+   * @note N : dimension(0)
+   *       H : dimension(1)
+   *       W : dimension(2)
+   *       C : dimension(3)
+   */
+  size_t total_size() const override { return _tensor.total_size(); }
+  size_t calcOffset(const ir::Coordinates &coords) const override
+  {
+    return _tensor.calcOffset(coords);
+  }
+  ir::Layout layout() const override { return _tensor.layout(); }
+  ir::DataType data_type() const override { return _tensor.data_type(); }
+  bool is_constant() const override { return _tensor.is_constant(); }
+  bool is_dynamic() const override { return _tensor.is_dynamic(); }
+  ir::Shape getShape() const override { return _tensor.getShape(); };
+  const ir::OperandInfo &get_info() { return _tensor.get_info(); }
+
+public:
+  std::vector<ITensor *> optVars() override;
+  void appendOptVar(std::unique_ptr<Tensor> opt_var) { _opt_vars.emplace_back(std::move(opt_var)); }
+
+public:
+  void fillBuffer(const std::shared_ptr<ir::Data> &data);
+
+private:
+  using ITensor::setShape;
+  using ITensor::set_dynamic;
+  using ITensor::applyShape;
+
+protected:
+  Tensor _tensor;
+  std::vector<std::unique_ptr<Tensor>> _opt_vars; //< Optimizer variables
+};
+
+} // namespace train
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_TRAIN_TRAINABLE_TENSOR_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/Allocator.h b/runtime/onert/core/include/backend/cpu_common/Allocator.h
deleted file mode 100644
index fa67fc7c4..000000000
--- a/runtime/onert/core/include/backend/cpu_common/Allocator.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file        Allocator.h
- * @brief       This file contains Allocator related classes
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
-#define __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
-
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Class to allocate memory
- */
-class Allocator
-{
-public:
-  Allocator(uint32_t capacity);
-  /**
-   * @brief Get memory base pointer
-   * @return base pointer
-   */
-  uint8_t *base() const { return _base.get(); }
-  void release() { _base.reset(); }
-
-private:
-  std::unique_ptr<uint8_t[]> _base;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_ALLOCATOR_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h b/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h
deleted file mode 100644
index e3c8c8666..000000000
--- a/runtime/onert/core/include/backend/cpu_common/DynamicTensorManager.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
-
-#include "MemoryManager.h"
-#include "TensorRegistry.h"
-
-#include <backend/IDynamicTensorManager.h>
-#include <ir/OperandInfo.h>
-#include <ir/Operation.h>
-#include <ir/Index.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-// TODO Find optimized algorithm to manage memory.
-
-/**
- * @brief Class to manage dynamic tensor and its memory
- */
-class DynamicTensorManager : public backend::IDynamicTensorManager
-{
-public:
-  DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg);
-
-  virtual ~DynamicTensorManager() = default;
-
-  void applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) override;
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
-                   ir::Layout backend_layout);
-
-  void planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) override;
-  void deallocInput(ir::OperationIndex op_ind) override;
-  void deallocSubgraphOutput(ir::OperandIndex ind) override;
-
-private:
-  /**
-   * @brief Memory manager for dynamic tensor.
-   * @todo  DynamicMemoryManager is not optimized. Optimized one is needed
-   */
-  std::shared_ptr<DynamicMemoryManager> _dynamic_mem_mgr;
-  const std::shared_ptr<TensorRegistry> _tensors;
-
-  // contains list of dynamic tensor index, which can be deallocated after running operation
-  // note: this map could contain static tensor index too. Careful use is required.
-  std::unordered_map<ir::OperationIndex, std::unordered_set<ir::OperandIndex>> _dealloc_tensor_map;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/IMemoryPlanner.h b/runtime/onert/core/include/backend/cpu_common/IMemoryPlanner.h
deleted file mode 100644
index 335f8f5c0..000000000
--- a/runtime/onert/core/include/backend/cpu_common/IMemoryPlanner.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_IMEMORY_PLANNER_H__
-#define __ONERT_BACKEND_IMEMORY_PLANNER_H__
-
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Structure to have memory offset and size
- */
-struct Block
-{
-  uint32_t offset;
-  size_t size;
-};
-
-/**
- * @brief Interface to plan memory
- */
-struct IMemoryPlanner
-{
-  using MemoryPlans = ir::OperandIndexMap<Block>;
-
-  /**
-   * @brief Claim memory for operand
-   * @param[in] index The operand index
-   * @param[in] size The size of the memory
-   */
-  virtual void claim(const ir::OperandIndex &, size_t) = 0;
-  /**
-   * @brief Release memory for operand
-   * @param[in] index The operand index
-   */
-  virtual void release(const ir::OperandIndex &) = 0;
-  /**
-   * @brief Get capacity for memory planning
-   * @return The value of capacity
-   */
-  virtual uint32_t capacity() = 0;
-  /**
-   * @brief Get MemoryPlans
-   * @return MemoryPlans
-   */
-  virtual MemoryPlans &memory_plans() = 0;
-
-  virtual ~IMemoryPlanner() = default;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_IMEMORY_PLANNER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/MemoryManager.h b/runtime/onert/core/include/backend/cpu_common/MemoryManager.h
deleted file mode 100644
index 4be7a1a11..000000000
--- a/runtime/onert/core/include/backend/cpu_common/MemoryManager.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
-#define __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
-
-#include "Allocator.h"
-#include "backend/IMemoryManager.h"
-#include "IMemoryPlanner.h"
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class MemoryManager : public backend::IMemoryManager
-{
-public:
-  MemoryManager();
-  MemoryManager(const std::string);
-  virtual ~MemoryManager() = default;
-
-  void allocate(void) override;
-  uint8_t *getBuffer(const ir::OperandIndex &ind) const;
-  void deallocate(void) override { _mem_alloc->release(); }
-
-  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
-  void releasePlan(const ir::OperandIndex &ind);
-
-private:
-  IMemoryPlanner *createMemoryPlanner();
-  IMemoryPlanner *createMemoryPlanner(const std::string);
-
-private:
-  ir::OperandIndexMap<Block> _tensor_mem_map;
-  std::shared_ptr<IMemoryPlanner> _mem_planner;
-  std::shared_ptr<Allocator> _mem_alloc;
-};
-
-class DynamicMemoryManager
-{
-public:
-  DynamicMemoryManager() = default;
-  virtual ~DynamicMemoryManager() = default;
-
-  std::shared_ptr<Allocator> allocate(const ir::OperandIndex &ind, uint32_t capacity);
-  void deallocate(const ir::OperandIndex &ind);
-  void deallocate(void);
-
-private:
-  ir::OperandIndexMap<std::shared_ptr<Allocator>> _mem_alloc_map;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_MEMORY_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
deleted file mode 100644
index 3f09b7a4a..000000000
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
-
-#include "MemoryManager.h"
-
-#include "backend/IStaticTensorManager.h"
-#include "backend/IDynamicTensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
-#include "TensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
-  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                      IDynamicTensorManager *dynamic_tensor_manager);
-  virtual ~StaticTensorManager() = default;
-
-  void allocateConsts(void);
-  void allocateNonconsts(void);
-  void deallocateConsts(void);
-  void deallocateNonconsts(void);
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
-                   ir::Layout backend_layout, bool as_const);
-
-  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
-  void releasePlan(const ir::OperandIndex &ind);
-
-  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
-  std::unique_ptr<DynamicMemoryManager> _const_mgr;
-  std::unique_ptr<MemoryManager> _nonconst_mgr;
-  const std::shared_ptr<TensorRegistry> _tensors;
-  ir::OperandIndexMap<bool> _as_constants;
-  IDynamicTensorManager *_dynamic_tensor_manager;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/Tensor.h b/runtime/onert/core/include/backend/cpu_common/Tensor.h
deleted file mode 100644
index 974501ecb..000000000
--- a/runtime/onert/core/include/backend/cpu_common/Tensor.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
-#define __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
-
-#include "Allocator.h"
-
-#include <backend/IPortableTensor.h>
-#include <ir/OperandInfo.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class Tensor : public IPortableTensor
-{
-public:
-  Tensor() = delete;
-
-public:
-  Tensor(const ir::OperandInfo &info, const ir::Layout layout,
-         IDynamicTensorManager *dynamic_tensor_manager)
-      : _info(info), _layout(layout), _buffer(nullptr), _num_references(0),
-        _dynamic_tensor_manager(dynamic_tensor_manager), _allocator(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  // Only one of two method 'setBuffer' must be called once
-
-  /**
-   * @brief Set the Buffer object. This method is called for static and non-const tensor
-   */
-  void setBuffer(uint8_t *buffer)
-  {
-    assert(_buffer == nullptr);
-    _buffer = buffer;
-  }
-
-  /**
-   * @brief Set the Buffer object. This method is called for dynamic or const tensor
-   */
-  void setBuffer(const std::shared_ptr<Allocator> &alloc)
-  {
-    assert(_buffer == nullptr);
-    _allocator = alloc;
-    _buffer = alloc->base();
-  }
-
-  // This works just as setBuffer but it simply overwrite existing Allocator without nullptr check
-  void overwriteBuffer(const std::shared_ptr<Allocator> &alloc)
-  {
-    _allocator = alloc;
-    _buffer = alloc->base();
-  }
-
-  /**
-   * @brief Mark this tensor does not have memory.
-   *        Real memory deallocation should be done by caller.
-   */
-  void resetBuffer()
-  {
-    _allocator.reset();
-    _buffer = nullptr;
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer; }
-  /**
-   * @brief Get dimension by index
-   *
-   * @param index Index to get diemension
-   * @return size_t Dimension at index
-   * @note N : dimension(0)
-   *       H : dimension(1)
-   *       W : dimension(2)
-   *       C : dimension(3)
-   */
-  size_t dimension(size_t index) const override { return _info.shape().dim(index); }
-  size_t num_dimensions() const override { return _info.shape().rank(); }
-  size_t total_size() const override { return _info.total_size(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override;
-  ir::Layout layout() const override { return _layout; }
-  ir::DataType data_type() const override { return _info.typeInfo().type(); }
-  float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_offset() const override { return _info.typeInfo().offset(); }
-  bool is_constant() const override { return _info.isConstant(); }
-  bool is_dynamic() const override { return _info.isDynamic(); }
-  void set_dynamic() override { _info.setDynamic(); }
-  IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
-  bool is_sparse() const override { return _info.typeInfo().sparse(); }
-  virtual const uint16_t *w1_segments() const override { return _info.typeInfo().w1_segments(); }
-  virtual const uint16_t *w1_indices() const override { return _info.typeInfo().w1_indices(); }
-
-  virtual void increase_ref()
-  {
-    assert(is_dynamic() ||
-           // when not dynamic
-           (_buffer != nullptr));
-
-    ++_num_references;
-  }
-  virtual void decrease_ref()
-  {
-    assert(_buffer != nullptr || _allocator != nullptr);
-    assert(_num_references > 0);
-    --_num_references;
-    // constant tensor and dynamic tensor has _allocator
-    if (_num_references == 0)
-    {
-      if (_buffer != nullptr)
-        _buffer = nullptr;
-      if (_allocator != nullptr)
-      {
-        _allocator->release();
-        _allocator = nullptr;
-      }
-    }
-  }
-
-  void setShape(const ir::Shape &new_shape) override;
-
-protected:
-  ir::OperandInfo _info;
-  ir::Layout _layout;
-  uint8_t *_buffer;
-  int32_t _num_references;
-  IDynamicTensorManager *_dynamic_tensor_manager;
-
-private:
-  /**
-   * @brief Memory allocator for dynamic tensor and const tensor
-   *        Since maintaing _allocator and also _buffer makes confusion,
-   *        we will mainly use _buffer (not _allocator.base()) for memory pointer in this code.
-   *        _allocator(shared_ptr) is used to guarantee that we have valid _buffer.
-   */
-  std::shared_ptr<Allocator> _allocator;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_TENSOR_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/TensorRegistry.h b/runtime/onert/core/include/backend/cpu_common/TensorRegistry.h
deleted file mode 100644
index 5896fb7ad..000000000
--- a/runtime/onert/core/include/backend/cpu_common/TensorRegistry.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
-#define __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
-
-#include "backend/ITensorRegistry.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-using TensorRegistry = PortableTensorRegistryTemplate<cpu_common::Tensor>;
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_TENSOR_REGISTRY__
diff --git a/runtime/onert/core/include/backend/train/ITensorRegistry.h b/runtime/onert/core/include/backend/train/ITensorRegistry.h
new file mode 100644
index 000000000..72b8a35db
--- /dev/null
+++ b/runtime/onert/core/include/backend/train/ITensorRegistry.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_ITENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_TRAIN_ITENSOR_REGISTRY_H__
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+class ITensorRegistry : public backend::ITensorRegistry
+{
+public:
+  /**
+   * @brief Returns pointer of ITensor among native and migrant tensors, not derivative and gradient
+   *
+   */
+  using backend::ITensorRegistry::getITensor;
+
+  /**
+   * @brief Returns pointer of ITensor among native tensors, not derivative and gradient
+   *
+   */
+  using backend::ITensorRegistry::getNativeITensor;
+
+  /**
+   * @brief Returns pointer of ITensor for derivative
+   *
+   * @note  Return tensor cannot be used longer than dynamic tensor manager
+   */
+  virtual ITensor *getDerivativeITensor(const ir::OperandIndex &) = 0;
+
+  /**
+   * @brief Returns pointer of ITensor for gradient
+   *
+   * @note  Returned tensor cannot be used longer than dynamic tensor manager
+   */
+  virtual ITensor *getGradientITensor(const ir::OperandIndex &) = 0;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+template <typename Tensor, typename TrainableTensor, typename DerivativeTensor,
+          typename GradientTensor>
+class PortableTensorRegistryTemplate : public backend::train::ITensorRegistry
+{
+public:
+  using TrainingTensors = std::tuple<TrainableTensor *, GradientTensor *>;
+
+public:
+  ITensor *getITensor(const ir::OperandIndex &index) override
+  {
+    auto _migrant_tensor = _migrant.find(index);
+    if (_migrant_tensor != _migrant.end())
+      return _migrant_tensor->second;
+    return getNativeITensor(index);
+  }
+
+  ITensor *getNativeITensor(const ir::OperandIndex &index) override
+  {
+    ITensor *tensor = getTrainableTensor(index);
+    if (tensor == nullptr)
+      tensor = getNonConstTensor(index);
+    return tensor;
+  }
+
+  ITensor *getDerivativeITensor(const ir::OperandIndex &index) override
+  {
+    return getDerivativeTensor(index);
+  }
+
+  ITensor *getGradientITensor(const ir::OperandIndex &index) override
+  {
+    return getGradientTensor(index);
+  }
+
+  IPortableTensor *getPortableTensor(const ir::OperandIndex &index)
+  {
+    auto tensor = _trainable.find(index);
+    if (tensor != _trainable.end())
+    {
+      if (tensor->second)
+        return tensor->second.get();
+    }
+    return getNonConstTensor(index);
+  }
+
+  Tensor *getNonConstTensor(const ir::OperandIndex &index)
+  {
+    auto tensor = _non_const.find(index);
+    if (tensor != _non_const.end())
+      return tensor->second.get();
+    return nullptr;
+  }
+
+  TrainableTensor *getTrainableTensor(const ir::OperandIndex &index)
+  {
+    auto tensor = _trainable.find(index);
+    if (tensor != _trainable.end())
+      return tensor->second.get();
+
+    return nullptr;
+  }
+
+  DerivativeTensor *getDerivativeTensor(const ir::OperandIndex &index)
+  {
+    auto tensor = _derivative.find(index);
+    if (tensor != _derivative.end())
+      return tensor->second.get();
+    return nullptr;
+  }
+
+  GradientTensor *getGradientTensor(const ir::OperandIndex &index)
+  {
+    auto tensor = _gradient.find(index);
+    if (tensor != _gradient.end())
+      return tensor->second.get();
+    return nullptr;
+  }
+
+  TrainingTensors getTrainingTensors(const ir::OperandIndex &index)
+  {
+    auto trainable = getTrainableTensor(index);
+    if (trainable == nullptr)
+      throw std::runtime_error{
+        "Tried to get a trainable tensor but the corresponding tensor does not exist."};
+
+    auto gradient = getGradientTensor(index);
+    if (gradient == nullptr)
+      throw std::runtime_error{
+        "Tried to get a gradient tensor but the corresponding tensor does not exist."};
+
+    return TrainingTensors{std::make_pair(trainable, gradient)};
+  }
+
+  bool setMigrantTensor(const ir::OperandIndex &index, IPortableTensor *tensor) override
+  {
+    assert(tensor != nullptr);
+    if (getITensor(index) != nullptr)
+      throw std::runtime_error{
+        "Tried to set a trainable tensor but another tensor already exists."};
+
+    _migrant[index] = tensor;
+    return true;
+  }
+
+  void setNonConstTensor(const ir::OperandIndex &index, std::unique_ptr<Tensor> tensor)
+  {
+    assert(tensor != nullptr);
+    if (getITensor(index) != nullptr)
+      throw std::runtime_error{
+        "Tried to set a trainable tensor but another tensor already exists."};
+
+    _non_const[index] = std::move(tensor);
+  }
+
+  void setTrainableTensor(const ir::OperandIndex &index, std::unique_ptr<TrainableTensor> tensor)
+  {
+    assert(tensor != nullptr);
+    if (getITensor(index) != nullptr)
+      throw std::runtime_error{
+        "Tried to set a trainable tensor but another tensor already exists."};
+
+    _trainable[index] = std::move(tensor);
+  }
+
+  void setDerivativeTensor(const ir::OperandIndex &index, std::unique_ptr<DerivativeTensor> tensor)
+  {
+    assert(tensor != nullptr);
+    auto itr = _derivative.find(index);
+    if (itr != _derivative.end())
+      throw std::runtime_error{
+        "Tried to set a derivative tensor but another derivative tensor already exists."};
+
+    _derivative[index] = std::move(tensor);
+  }
+
+  void setGradientTensor(const ir::OperandIndex &index, std::unique_ptr<GradientTensor> tensor)
+  {
+    assert(tensor != nullptr);
+    auto itr = _gradient.find(index);
+    if (itr != _gradient.end())
+      throw std::runtime_error{
+        "Tried to set a gradient tensor but another gradient tensor already exists."};
+
+    _gradient[index] = std::move(tensor);
+  }
+
+  const ir::OperandIndexMap<std::unique_ptr<TrainableTensor>> &trainable_tensors()
+  {
+    return _trainable;
+  }
+  const ir::OperandIndexMap<std::unique_ptr<Tensor>> &nonconst_tensors() { return _non_const; }
+  const ir::OperandIndexMap<std::unique_ptr<Tensor>> &derivative_tensors() { return _derivative; }
+  const ir::OperandIndexMap<std::unique_ptr<GradientTensor>> &gradient_tensors()
+  {
+    return _gradient;
+  }
+
+private:
+  // Native tensors
+  ir::OperandIndexMap<std::unique_ptr<Tensor>> _non_const;
+  ir::OperandIndexMap<std::unique_ptr<TrainableTensor>> _trainable;
+
+  // Migrant tensors
+  ir::OperandIndexMap<IPortableTensor *> _migrant;
+
+  // Tensors for backpropagation
+  ir::OperandIndexMap<std::unique_ptr<DerivativeTensor>> _derivative;
+
+  // Tensors for updating trainable tensors
+  ir::OperandIndexMap<std::unique_ptr<GradientTensor>> _gradient;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_ITENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/include/backend/train/ITrainableBackend.h b/runtime/onert/core/include/backend/train/ITrainableBackend.h
new file mode 100644
index 000000000..76e394216
--- /dev/null
+++ b/runtime/onert/core/include/backend/train/ITrainableBackend.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_ITRAINABLE_BACKEND_H__
+#define __ONERT_BACKEND_TRAIN_ITRAINABLE_BACKEND_H__
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+class TrainableBackendContext;
+struct TrainableContextData;
+
+struct ITrainableBackend
+{
+  virtual ~ITrainableBackend() = default;
+  virtual std::unique_ptr<TrainableBackendContext> newContext(TrainableContextData &&) const = 0;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_ITRAINABLE_BACKEND_H__
diff --git a/runtime/onert/core/include/backend/train/ITrainableTensor.h b/runtime/onert/core/include/backend/train/ITrainableTensor.h
new file mode 100644
index 000000000..9d7ab345b
--- /dev/null
+++ b/runtime/onert/core/include/backend/train/ITrainableTensor.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_ITRAINABLE_TENSOR_H__
+#define __ONERT_BACKEND_TRAIN_ITRAINABLE_TENSOR_H__
+
+#include "backend/IPortableTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+/**
+ * @brief A tensor class that can be trained
+ *
+ */
+// NOTE It is more appropriate to inherit ITensor, but there is no easy way
+//      except for virtual inheritance.
+class ITrainableTensor : public IPortableTensor
+{
+public:
+  using IPortableTensor::IPortableTensor;
+  virtual ~ITrainableTensor() = default;
+
+  /**
+   * @brief Get optimizer variables of this trainable tensor
+   *
+   * @return Optimizer variables
+   */
+  virtual std::vector<ITensor *> optVars() = 0;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_ITRAINABLE_TENSOR_H__
diff --git a/runtime/onert/core/include/backend/train/KernelGeneratorBase.h b/runtime/onert/core/include/backend/train/KernelGeneratorBase.h
new file mode 100644
index 000000000..b5031a5cd
--- /dev/null
+++ b/runtime/onert/core/include/backend/train/KernelGeneratorBase.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRAIN_KERNEL_GENERATOR_BASE_H__
+#define __ONERT_BACKEND_TRAIN_KERNEL_GENERATOR_BASE_H__
+
+#include <memory>
+
+#include "backend/ITensorRegistry.h"
+#include "exec/train/TrainableFnSequence.h"
+#include "ir/train/TrainableGraph.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+class KernelGeneratorBase : public ir::train::TrainableOperationVisitor
+{
+public:
+  virtual ~KernelGeneratorBase() = default;
+  KernelGeneratorBase(const ir::train::TrainableGraph &tgraph) : _tgraph{tgraph} {}
+
+  virtual std::unique_ptr<exec::train::TrainableFnSequence> generate(ir::OperationIndex ind) = 0;
+
+protected:
+#define OP(InternalName)                                                                \
+  void visit(const ir::train::operation::InternalName &) override                       \
+  {                                                                                     \
+    throw std::runtime_error("KernelGenerator: NYI for operation '" #InternalName "'"); \
+  }
+#include "ir/train/Operations.lst"
+#undef OP
+
+protected:
+  const ir::train::TrainableGraph &_tgraph;
+  std::unique_ptr<exec::train::ITrainableFunction> _return_fn;
+};
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRAIN_KERNEL_GENERATOR_BASE_H__
diff --git a/runtime/onert/core/include/backend/train/TrainableBackendContext.h b/runtime/onert/core/include/backend/train/TrainableBackendContext.h
new file mode 100644
index 000000000..3f47af747
--- /dev/null
+++ b/runtime/onert/core/include/backend/train/TrainableBackendContext.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BACKEND_TRAIN_TRAINABLE_CONTEXT_H__
+#define __ONERT_BACKEND_BACKEND_TRAIN_TRAINABLE_CONTEXT_H__
+
+#include "backend/Backend.h"
+#include "backend/train/ITensorRegistry.h"
+#include "backend/train/ITrainableBackend.h"
+#include "exec/train/optimizer/Optimizer.h"
+#include "exec/train/TrainableFnSequence.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/train/TrainableGraph.h"
+#include "util/Set.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace train
+{
+
+using FunctionMap =
+  std::vector<std::pair<ir::OperationIndex, std::unique_ptr<exec::train::TrainableFnSequence>>>;
+
+struct TrainableContextData
+{
+  // A partial and trainable graph that only includes used operand/operations of the original graph
+  std::unique_ptr<ir::train::TrainableGraph> tgraph;
+  /* A linear order of operations. This is neccessary for when a graph is not fully connected */
+  std::vector<onert::ir::OperationIndex> op_order;
+  /* Operands that are defined by other backends */
+  util::Set<ir::OperandIndex> external_operands;
+  /* Operand layout info */
+  ir::OperandIndexMap<ir::Layout> operand_layouts;
+  /* Custom kernel builder */
+  std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
+  /* Is linear executor or not */
+  bool is_linear_executor;
+  /* Optimizer */
+  std::shared_ptr<exec::train::optimizer::Optimizer> optimizer;
+};
+
+class TrainableBackendContext
+{
+public:
+  TrainableBackendContext(const ITrainableBackend *backend,
+                          std::unique_ptr<TrainableContextData> &&tdata,
+                          std::shared_ptr<ITensorRegistry> tensor_registry = nullptr)
+    : _backend{backend}, _tdata{std::move(tdata)}, _tensor_registry{tensor_registry}
+  {
+    assert(_tdata);
+  }
+  virtual ~TrainableBackendContext() = default;
+
+  const ir::train::TrainableGraph *trainable_graph() const { return _tdata->tgraph.get(); }
+
+  const TrainableContextData *data() const { return _tdata.get(); }
+
+  const ITrainableBackend *backend() const { return _backend; }
+  const util::Set<ir::OperandIndex> &external_operands() const { return _tdata->external_operands; }
+  const ir::OperandIndexMap<ir::Layout> &operand_layouts() const { return _tdata->operand_layouts; }
+
+  std::shared_ptr<ITensorRegistry> tensor_registry() { return _tensor_registry; }
+
+  virtual ITensorRegistry *genTrainingTensors() = 0;
+  virtual backend::ITensorRegistry *genTensors() = 0;
+  virtual FunctionMap genKernels() = 0;
+
+private:
+  const ITrainableBackend *_backend{nullptr};
+
+protected:
+  std::unique_ptr<TrainableContextData> _tdata;
+
+protected:
+  std::shared_ptr<ITensorRegistry> _tensor_registry;
+};
+
+using TrainableBackendContexts =
+  std::unordered_map<const Backend *, std::unique_ptr<TrainableBackendContext>>;
+
+} // namespace train
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BACKEND_TRAIN_TRAINABLE_CONTEXT_H__
diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h
index af13d13f7..b44fcf836 100644
--- a/runtime/onert/core/include/compiler/BackendManager.h
+++ b/runtime/onert/core/include/compiler/BackendManager.h
@@ -17,12 +17,11 @@
 #ifndef __ONERT_COMPILER_BACKEND_MANAGER_H__
 #define __ONERT_COMPILER_BACKEND_MANAGER_H__
 
-#include <memory>
-#include <map>
-
-#include "ir/Operands.h"
 #include "backend/Backend.h"
-#include "backend/controlflow/Backend.h"
+#include "ir/Operands.h"
+
+#include <map>
+#include <memory>
 
 namespace onert
 {
@@ -34,14 +33,14 @@ class BackendManager
 public:
   using backend_create_t = backend::Backend *(*)();
   using backend_destroy_t = void (*)(backend::Backend *);
-  using dlhandle_destroy_t = void (*)(void *);
+  using dlhandle_destroy_t = std::function<void(void *)>;
 
   static BackendManager &get();
 
 public:
   backend::Backend *get(const std::string &key);
   const backend::Backend *get(const std::string &key) const;
-  const backend::controlflow::Backend *getControlflow() const;
+  const backend::Backend *getBuiltin() const;
   const std::vector<const backend::Backend *> getAll() const
   {
     std::vector<const backend::Backend *> v;
@@ -65,15 +64,15 @@ private:
 private:
   std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map;
   std::map<std::string, std::unique_ptr<backend::Backend, backend_destroy_t>> _gen_map;
-  backend::controlflow::Backend *_controlflow{nullptr};
+  backend::Backend *_builtin{nullptr};
   /**
-   * @brief load controlflow backend
+   * @brief load builtin backend
    *
    * @param backend backend to be loaded
    *
    * @return
    */
-  void loadControlflowBackend();
+  void loadBuiltinBackend();
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/include/compiler/CodeMap.h b/runtime/onert/core/include/compiler/CodeMap.h
index e13d3334c..93fe43cfd 100644
--- a/runtime/onert/core/include/compiler/CodeMap.h
+++ b/runtime/onert/core/include/compiler/CodeMap.h
@@ -18,6 +18,10 @@
 #define __ONERT_COMPILER_CODE_MAP_H__
 
 #include <unordered_map>
+#include "ir/Index.h"
+#include "ir/IOperation.h"
+#include "exec/FunctionSequence.h"
+#include "OperationLowerInfo.h"
 
 namespace onert
 {
@@ -26,18 +30,20 @@ namespace compiler
 
 struct CodeAndInfo
 {
-  const ir::OpSequence *op_seq;
-  const ir::operation::LowerInfo *lower_info;
+  ir::OperationIndex op_ind;
+  const ir::IOperation *op;
+  const OperationLowerInfo *lower_info;
   std::unique_ptr<exec::FunctionSequence> fn_seq;
 
-  CodeAndInfo(const ir::OpSequence *op_seq, const ir::operation::LowerInfo *lower_info,
+  CodeAndInfo(const ir::OperationIndex op_ind, const ir::IOperation *op,
+              const OperationLowerInfo *lower_info,
               std::unique_ptr<exec::FunctionSequence> &&fn_seq)
-      : op_seq{op_seq}, lower_info{lower_info}, fn_seq{std::move(fn_seq)}
+    : op_ind{op_ind}, op{op}, lower_info{lower_info}, fn_seq{std::move(fn_seq)}
   {
   }
 };
 
-using CodeMap = std::unordered_map<ir::OpSequenceIndex, CodeAndInfo>;
+using CodeMap = std::unordered_map<ir::OperationIndex, CodeAndInfo>;
 
 } // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h
index 3098be7ba..9a86f407e 100644
--- a/runtime/onert/core/include/compiler/Compiler.h
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -22,101 +22,55 @@
 #ifndef __ONERT_COMPILER_COMPILE_H_
 #define __ONERT_COMPILER_COMPILE_H_
 
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
+#include "CompilerOptions.h"
+#include "ICompiler.h"
+#include "ir/NNPkg.h"
 
 namespace onert
 {
-
 namespace compiler
 {
 
-enum class State
-{
-  CREATED, // Before compilation
-  COMPILED // Success compilation
-};
-
-struct ManualSchedulerOptions
-{
-  std::string backend_for_all;
-  std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
-  std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
-};
-
-struct CompilerOptions
-{
-  // GENERAL OPTIONS
-  std::vector<std::string> backend_list;
-  bool is_primary_subgraph; // TODO Remove this out of this struct as it is not user-given option
-
-  // OPTIONS ONLY FOR DEBUGGING/PROFILING
-  std::string trace_filepath; //< File path to save trace records
-  int graph_dump_level;       //< Graph dump level, values between 0 and 2 are valid
-  int op_seq_max_node;        //< Number of nodes that can be
-  std::string executor;       //< Executor name to use
-  ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
-  bool he_scheduler;      //< HEScheduler if true, ManualScheduler otherwise
-  bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
-  bool disable_compile;   //< Run with Interpreter if true, try compilation otherwise
-  bool fp16_enable;       //< Whether fp16 mode ON/OFF
-};
-
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs);
-
 /**
- * @brief Class to compile graph model
+ * @brief Class to compile NN package
  */
-class Compiler
+class Compiler : public ICompiler
 {
 public:
   /**
-   * @brief     Construct a new Compiler object
-   * @param[in] subgs All subgraphs of a model
+   * @brief     Construct a new Compiler object for single model
+   * @param[in] model     model to compile
+   * @param[in] coptions  Compiler Options
    */
-  Compiler(const std::shared_ptr<ir::Subgraphs> &subgs);
+  Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt);
 
-public:
   /**
-   * @brief   Do compilation with the options
-   *
-   * @return std::shared_ptr<exec::ExecutorMap> Executors as a result of compilation
+   * @brief     Construct a new Compiler object for NN package
+   * @param[in] nnpkg    NN package to compile
+   * @param[in] coptions Compiler option vector for each model in package
    */
-  std::shared_ptr<exec::ExecutorMap> compile(void);
-
-  State state(void) const { return _state; }
+  Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+           std::vector<std::unique_ptr<CompilerOptions>> &copts);
 
   /**
-   * @brief   Check if model can compile
-   * @return  @c true if model can compile, otherwise @c false
-   * @note    This method don't check model correctness,\n
-   *          so model verification should be done before calling this method
+   * @brief Destroy the Compiler object
    */
-  bool checkCompilable();
-  CompilerOptions &options() { return _options; }
+  ~Compiler() = default;
 
+public:
   /**
-   * @brief   Allow to compute float32 using float16 data type
+   * @brief   Do compilation with the options
+   *
+   * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
    */
-  void enableToFp16();
+  std::shared_ptr<CompilerArtifact> compile(void);
 
 private:
-  void checkProfilerConditions();
-  std::shared_ptr<ir::Graph> &primary_subgraph() { return _subgraphs->at(ir::SubgraphIndex{0}); }
-
-private:
-  std::shared_ptr<ir::Subgraphs> _subgraphs;
-  // NOTE These executors does not have duplicated subgraph. This mean they do not allow support
-  // subgraphs being called recursively because data of non-constant tensor of parent executor will
-  // be updated by child executor. If you want to support subgraphs being called recursively, you
-  // have to add allocate non-constant tensor memory of executors in execution time when each
-  // subgraph is called.
-  State _state;
-  CompilerOptions _options;
+  std::shared_ptr<ir::Model> _model;
+  CompilerOptions *_options;
 };
 
 } // namespace compiler
-
 } // namespace onert
 
 #endif // __ONERT_COMPILER_COMPILE_H_
diff --git a/runtime/onert/core/include/compiler/CompilerFactory.h b/runtime/onert/core/include/compiler/CompilerFactory.h
new file mode 100644
index 000000000..5a8886aa1
--- /dev/null
+++ b/runtime/onert/core/include/compiler/CompilerFactory.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_FACTORY_H__
+#define __ONERT_COMPILER_COMPILER_FACTORY_H__
+
+#include "ICompiler.h"
+#include "CompilerOptions.h"
+#include "compiler/train/TrainingInfo.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+// TODO Support register and use compiler plugin
+class CompilerFactory
+{
+public:
+  static CompilerFactory &get();
+
+public:
+  std::unique_ptr<ICompiler> create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                                    std::vector<std::unique_ptr<CompilerOptions>> &copts,
+                                    const compiler::train::TrainingInfo *training_info = nullptr);
+
+private:
+  // It is not allowed to use CompilerFactory without get()
+  CompilerFactory() = default;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_FACTORY_H__
diff --git a/runtime/onert/core/include/compiler/CompilerOptions.h b/runtime/onert/core/include/compiler/CompilerOptions.h
new file mode 100644
index 000000000..bb0d0a430
--- /dev/null
+++ b/runtime/onert/core/include/compiler/CompilerOptions.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_OPTIONS_H_
+#define __ONERT_COMPILER_COMPILER_OPTIONS_H_
+
+#include "ir/OpCode.h"
+#include "ir/Index.h"
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace onert
+{
+namespace compiler
+{
+
+struct ManualSchedulerOptions
+{
+public:
+  void setBackendMap(const std::string &str);
+
+public:
+  std::string backend_for_all;
+  std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
+  std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
+};
+
+class CompilerOptions
+{
+public:
+  /**
+   * @brief   Set default values for CompilerOptions
+   * @return  Generated CompileOption
+   *
+   * @note    All these default values should not be fetched from Env
+   *          when we stop supporting Android NNAPI.
+   */
+  static std::unique_ptr<CompilerOptions> fromGlobalConfig();
+
+  /**
+   * @brief Allow to compute float32 using float16 data type
+   */
+  void enableToFp16() { fp16_enable = true; }
+
+  /**
+   * @brief Force default values of CompilerOptions for correct compilations
+   *
+   * @note  This should be called after CompilerOptions setting is finished
+   *        to prevent value overwriting
+   */
+  void forceInternalOptions();
+
+  /**
+   * @brief Print option value
+   */
+  void verboseOptions();
+
+public:
+  // GENERAL OPTIONS
+  std::vector<std::string> backend_list;
+  std::string minmax_filepath; //< File path to save minmax
+
+  // OPTIONS ONLY FOR DEBUGGING/PROFILING
+  std::string trace_filepath; //< File path to save trace records
+  int graph_dump_level;       //< Graph dump level, values between 0 and 2 are valid
+  std::string executor;       //< Executor name to use
+  ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
+  bool he_scheduler;      //< HEScheduler if true, ManualScheduler otherwise
+  bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
+  bool fp16_enable;       //< Whether fp16 mode ON/OFF
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_OPTIONS_H_
diff --git a/runtime/onert/core/include/compiler/ExecutionBuilder.h b/runtime/onert/core/include/compiler/ExecutionBuilder.h
index d54d9d046..e36ad6d24 100644
--- a/runtime/onert/core/include/compiler/ExecutionBuilder.h
+++ b/runtime/onert/core/include/compiler/ExecutionBuilder.h
@@ -19,8 +19,7 @@
 
 #include <memory>
 
-#include "ir/operation/LowerInfo.h"
-#include "ir/OpSequence.h"
+#include "ir/Index.h"
 #include "exec/FunctionSequence.h"
 #include "CodeMap.h"
 
@@ -32,7 +31,7 @@ namespace compiler
 class ExecutionBuilder
 {
 public:
-  void append(const ir::OpSequenceIndex index, CodeAndInfo &&code_and_info)
+  void append(const ir::OperationIndex index, CodeAndInfo &&code_and_info)
   {
     _code_map.emplace(index, std::move(code_and_info));
   }
diff --git a/runtime/onert/core/include/compiler/GraphLowerInfo.h b/runtime/onert/core/include/compiler/GraphLowerInfo.h
new file mode 100644
index 000000000..b679891d6
--- /dev/null
+++ b/runtime/onert/core/include/compiler/GraphLowerInfo.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_GRAPH_LOWER_INFO_H__
+#define __ONERT_COMPILER_GRAPH_LOWER_INFO_H__
+
+#include <memory>
+#include <unordered_map>
+
+#include "compiler/OperandLowerInfo.h"
+#include "compiler/OperationLowerInfo.h"
+#include "util/ObjectManager.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct GraphLowerInfo
+{
+  util::ObjectManager<ir::OperationIndex, OperationLowerInfo> operation;
+  util::ObjectManager<ir::OperandIndex, OperandLowerInfo> operand;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_GRAPH_LOWER_INFO_MAP_H__
diff --git a/runtime/onert/core/include/compiler/ICompiler.h b/runtime/onert/core/include/compiler/ICompiler.h
new file mode 100644
index 000000000..255e0509d
--- /dev/null
+++ b/runtime/onert/core/include/compiler/ICompiler.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file  ICompiler.h
+ * @brief This file contains ICompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_I_COMPILER_H_
+#define __ONERT_COMPILER_I_COMPILER_H_
+
+#include "exec/IExecutors.h"
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct CompilerArtifact
+{
+  CompilerArtifact(void) = delete;
+  CompilerArtifact(std::shared_ptr<exec::IExecutors> executors,
+                   std::unique_ptr<const util::TracingCtx> tracing_ctx)
+    : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
+
+  std::shared_ptr<exec::IExecutors> _executors;
+  std::unique_ptr<const util::TracingCtx> _tracing_ctx;
+};
+
+class ICompiler
+{
+public:
+  /**
+   * @brief Virtual ICompiler destructor
+   * @note  Require derived class destructor
+   */
+  virtual ~ICompiler() = default;
+
+  /**
+   * @brief   Do compilation
+   * @return  std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+   */
+  virtual std::shared_ptr<CompilerArtifact> compile(void) = 0;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_I_COMPILER_H_
diff --git a/runtime/onert/core/include/compiler/ILoweredGraph.h b/runtime/onert/core/include/compiler/ILoweredGraph.h
new file mode 100644
index 000000000..bc49fa1d7
--- /dev/null
+++ b/runtime/onert/core/include/compiler/ILoweredGraph.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ILOWERED_GRAPH_H__
+#define __ONERT_COMPILER_ILOWERED_GRAPH_H__
+
+#include "ir/Graph.h"
+#include "compiler/GraphLowerInfo.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct ILoweredGraph
+{
+  virtual ~ILoweredGraph() = default;
+  virtual ir::Graph &graph() = 0;
+  virtual const ir::Graph &graph() const = 0;
+  virtual const compiler::GraphLowerInfo &lower_info() const = 0;
+  virtual compiler::GraphLowerInfo &lower_info() = 0;
+  virtual void setHasDynamicTensor(ir::OperationIndex ind, bool val) = 0;
+  virtual bool getHasDynamicTensor(ir::OperationIndex ind) const = 0;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ILOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
index aadba6857..b970a884b 100644
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_LOWERED_GRAPH_H__
-#define __ONERT_IR_LOWERED_GRAPH_H__
+#ifndef __ONERT_COMPILER_LOWERED_GRAPH_H__
+#define __ONERT_COMPILER_LOWERED_GRAPH_H__
 
-#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequences.h"
 #include "compiler/BackendResolver.h"
 #include "compiler/Compiler.h"
+#include "compiler/GraphLowerInfo.h"
+#include "compiler/ILoweredGraph.h"
+#include "ir/Graph.h"
 
 namespace onert
 {
@@ -33,58 +33,45 @@ namespace compiler
  *        In addition, after lowering, operands in graph will be set to "dynamic"
  *        if the shape of output of an operation cannot be decided at compilation time.
  */
-class LoweredGraph
+class LoweredGraph : public ILoweredGraph
 {
 public:
   LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
 
-  ir::Graph &graph() { return _graph; }
-  const ir::Graph &graph() const { return _graph; }
-  const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
-  const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const;
-  void setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
-                    std::unique_ptr<ir::operation::LowerInfo> &&lower_info);
-  void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index);
-  const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const;
-  ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index);
-  void setLowerInfo(const ir::OperandIndex &index,
-                    std::unique_ptr<ir::operand::LowerInfo> &&lower_info);
-  void removeLowerInfo(const ir::OperandIndex &index);
-  ir::OpSequences &op_seqs() { return _op_seqs; }
-  const ir::OpSequences &op_seqs() const { return _op_seqs; }
-  void iterateTopolOpSeqs(
-      const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const;
-  void
-  iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn);
-  const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
-  const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+  ir::Graph &graph() override { return _graph; }
+  const ir::Graph &graph() const override { return _graph; }
+  const compiler::GraphLowerInfo &lower_info() const override { return _lower_info_map; }
+  compiler::GraphLowerInfo &lower_info() override { return _lower_info_map; }
   std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
 
-private:
-  void
-  makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
-                  const compiler::CompilerOptions &options,
-                  const compiler::BackendResolver &backend_resolver);
+  void setHasDynamicTensor(ir::OperationIndex ind, bool val) override
+  {
+    _has_dynamic_tensor_map.emplace(ind, val);
+  }
+  bool getHasDynamicTensor(ir::OperationIndex ind) const override
+  {
+    auto itr = _has_dynamic_tensor_map.find(ind);
+    return (itr == _has_dynamic_tensor_map.end()) ? false : itr->second;
+  }
 
-  void manipulateLowerInfo(
-      ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
-      bool is_primary);
+private:
+  void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
   void dumpLowerInfo();
-  bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
-                 ir::Layout layout, const compiler::BackendResolver &backend_resolver);
-  ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
-                                                  const ir::Operation &node);
+  void lowerGraph(const compiler::CompilerOptions &options);
 
 private:
+  /**
+   *  @brief  Copy of target graph for lowering
+   *  @note   It uses copy of graph, not reference.
+   *          It allows the original graph can be compiled multiple times.
+   */
   ir::Graph _graph;
-  backend::BackendContexts _backend_contexts;
   std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
-  ir::LowerInfoMap _lower_info_map;
-  // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
-  ir::OpSequences _op_seqs;
+  compiler::GraphLowerInfo _lower_info_map;
+  ir::OperationIndexMap<bool> _has_dynamic_tensor_map;
 };
 
 } // namespace compiler
 } // namespace onert
 
-#endif // __ONERT_IR_LOWERED_GRAPH_H__
+#endif // __ONERT_COMPILER_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/compiler/OperandLowerInfo.h b/runtime/onert/core/include/compiler/OperandLowerInfo.h
new file mode 100644
index 000000000..340b9cef1
--- /dev/null
+++ b/runtime/onert/core/include/compiler/OperandLowerInfo.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
+#define __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
+
+#include <functional>
+#include <stdint.h>
+
+#include "compiler/PermuteFactor.h"
+#include "util/Set.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+using PermuteFactorSet = util::Set<PermuteFactor>;
+
+class OperandLowerInfo
+{
+public:
+  OperandLowerInfo()
+  {
+    // DO NOTHING
+  }
+
+public:
+  const PermuteFactorSet &def_factors(void) const { return _def_factors; }
+  const PermuteFactorSet &use_factors(void) const { return _use_factors; }
+
+public:
+  void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); }
+  void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); }
+  void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); }
+  void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); }
+
+private:
+  PermuteFactorSet _def_factors;
+  PermuteFactorSet _use_factors;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OPERAND_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/compiler/OperationLowerInfo.h b/runtime/onert/core/include/compiler/OperationLowerInfo.h
new file mode 100644
index 000000000..20ca12952
--- /dev/null
+++ b/runtime/onert/core/include/compiler/OperationLowerInfo.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
+#define __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
+
+#include <string>
+
+#include <compiler/PermuteFactor.h>
+#include <ir/Layout.h>
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+class OperationLowerInfo
+{
+public:
+  OperationLowerInfo(const backend::Backend *backend, ir::Layout layout);
+  const backend::Backend *backend() const { return _permute_factor.backend(); }
+  ir::Layout layout() const { return _permute_factor.layout(); }
+
+private:
+  PermuteFactor _permute_factor;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_OP_SEQUENCE_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/compiler/PermuteFactor.h b/runtime/onert/core/include/compiler/PermuteFactor.h
new file mode 100644
index 000000000..67ce957bb
--- /dev/null
+++ b/runtime/onert/core/include/compiler/PermuteFactor.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file     PermuteFactor.h
+ * @brief    This file contains PermuteFactor class
+ * @ingroup  COM_AI_RUNTIME
+ */
+
+#ifndef __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
+#define __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
+
+#include <functional>
+
+#include "ir/Layout.h"
+
+namespace onert
+{
+namespace backend
+{
+class Backend;
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class that has factors of permutation
+ */
+class PermuteFactor
+{
+public:
+  /**
+   * @brief Construct PermuteFactor object.
+   * @param backend  The backend factor
+   * @param layout   The layout factor
+   */
+  PermuteFactor(const backend::Backend *backend, ir::Layout layout)
+    : _backend{backend}, _layout{layout}
+  {
+    // DO NOTHING
+  }
+  /**
+   * @brief Construct PermuteFactor object by copy semantics.
+   */
+  PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout}
+  {
+    // DO NOTHING
+  }
+  /**
+   * @brief Construct PermuteFactor object by move semantics.
+   */
+  PermuteFactor(PermuteFactor &&) = default;
+
+public:
+  /**
+   * @brief Get backend
+   *
+   * @return Backend factor
+   */
+  const backend::Backend *backend() const { return _backend; }
+  /**
+   * @brief Get layout
+   *
+   * @return Layout factor
+   */
+  ir::Layout layout() const { return _layout; }
+
+public:
+  /**
+   * @brief operator overloading function for `==`
+   *
+   * @return Whether two PermuteFactor are the same
+   */
+  bool operator==(const PermuteFactor &other) const
+  {
+    return _backend == other.backend() && _layout == other.layout();
+  }
+  /**
+   * @brief operator overloading function for `!=`
+   *
+   * @return Whether two PermuteFactor are differenct
+   */
+  bool operator!=(const PermuteFactor &other) const { return !(*this == other); }
+
+private:
+  const backend::Backend *_backend{nullptr};
+  ir::Layout _layout{ir::Layout::UNKNOWN};
+};
+
+} // namespace compiler
+} // namespace onert
+
+namespace std
+{
+
+/**
+ * @brief Structure that provides hash value of PermuteFactor
+ */
+template <> struct hash<onert::compiler::PermuteFactor>
+{
+  size_t operator()(const onert::compiler::PermuteFactor &factor) const noexcept
+  {
+    hash<const onert::backend::Backend *> b_hash{};
+    hash<onert::ir::Layout> l_hash{};
+    return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1);
+  }
+};
+
+} // namespace std
+
+std::ostream &operator<<(std::ostream &os, const onert::compiler::PermuteFactor &obj);
+
+#endif // __ONERT_COMPILER_OPERAND_PERMUTE_FACTOR_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInference.h b/runtime/onert/core/include/compiler/StaticShapeInference.h
deleted file mode 100644
index b97cb5b7b..000000000
--- a/runtime/onert/core/include/compiler/StaticShapeInference.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_STATIC_SHAPE_INFERENCE_H__
-#define __ONERT_COMPILER_STATIC_SHAPE_INFERENCE_H__
-
-#include "ir/OperationVisitor.h"
-#include "ir/OpSequence.h"
-#include "compiler/LoweredGraph.h"
-#include "ir/Index.h"
-
-#include <memory>
-#include <unordered_map>
-
-namespace onert
-{
-namespace compiler
-{
-
-/**
- * @brief Class to infer shape before running kernels. It does the following:
- *        - re-calculate and set output shape at compile time (before running kernels)
- *        - if calculation cannot be done at compile time, mark the outputs to be dynamic, meaning
- *          shapes of outputs will be calculated during running kernels
- */
-class StaticShapeInferer : public ir::OperationVisitor
-{
-public:
-  StaticShapeInferer(
-      const ir::SubgraphIndex &subg_idx,
-      const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
-          &lowered_subgs)
-      : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
-        _operations(lowered_subgs.at(subg_idx)->graph().operations()),
-        _return_has_dynamic_tensor(false)
-  { /* empty */
-  }
-  virtual ~StaticShapeInferer() = default;
-
-public:
-  /**
-   * @brief Infer shape of operands beloning to ops and set the output shape.
-   *        If output shape cannot be known without running op, mark it so that it can be allocated
-   *        when running kernel.
-   * @param op_seq sequence of operations
-   * @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise.
-   */
-  bool infer(const ir::OpSequence &op_seq);
-
-  void dump();
-
-private:
-  bool checkDynamicInput(const ir::Operation &op);
-  void setDynamicOutput(const ir::Operation &op);
-
-private:
-  // TODO Define visitors for operations. List them in alphabetic order.
-  void visit(const ir::operation::ArgMax &op) override;
-  void visit(const ir::operation::BatchMatMul &op) override;
-  void visit(const ir::operation::BinaryArithmetic &op) override;
-  void visit(const ir::operation::BroadcastTo &op) override;
-  void visit(const ir::operation::Comparison &op) override;
-  void visit(const ir::operation::Concat &op) override;
-  void visit(const ir::operation::Conv2D &op) override;
-  void visit(const ir::operation::ElementwiseActivation &op) override;
-  void visit(const ir::operation::ElementwiseBinary &op) override;
-  void visit(const ir::operation::ElementwiseUnary &op) override;
-  void visit(const ir::operation::ExpandDims &op) override;
-  void visit(const ir::operation::Fill &op) override;
-  void visit(const ir::operation::FullyConnected &op) override;
-  void visit(const ir::operation::FusedBatchNorm &op) override;
-  void visit(const ir::operation::Gather &op) override;
-  void visit(const ir::operation::If &op) override;
-  void visit(const ir::operation::L2Normalization &op) override;
-  void visit(const ir::operation::MatrixBandPart &op) override;
-  void visit(const ir::operation::OneHot &op) override;
-  void visit(const ir::operation::Pack &op) override;
-  void visit(const ir::operation::Pad &op) override;
-  void visit(const ir::operation::Permute &op) override;
-  void visit(const ir::operation::Pow &op) override;
-  void visit(const ir::operation::Range &op) override;
-  void visit(const ir::operation::Reduce &op) override;
-  void visit(const ir::operation::Reshape &op) override;
-  void visit(const ir::operation::ResizeBilinear &op) override;
-  void visit(const ir::operation::Reverse &op) override;
-  void visit(const ir::operation::Select &op) override;
-  void visit(const ir::operation::Shape &op) override;
-  void visit(const ir::operation::Slice &op) override;
-  void visit(const ir::operation::Softmax &op) override;
-  void visit(const ir::operation::SpaceToBatchND &op) override;
-  void visit(const ir::operation::Split &op) override;
-  void visit(const ir::operation::Squeeze &op) override;
-  void visit(const ir::operation::StridedSlice &op) override;
-  void visit(const ir::operation::SquaredDifference &op) override;
-  void visit(const ir::operation::Tile &op) override;
-  void visit(const ir::operation::Transpose &op) override;
-  void visit(const ir::operation::Unpack &op) override;
-  void visit(const ir::operation::While &op) override;
-
-private:
-  /**
-   * @brief Performs shape inference for arithmetic operation
-   */
-  void handleBinaryArithmeticOp(const ir::Operation &op, const ir::OperandIndex lhs_idx,
-                                const ir::OperandIndex rhs_idx);
-
-  /**
-   * @brief Performs shape inference for unary op whose output shape is
-   *        always same with input shape
-   */
-  void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
-
-private:
-  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
-      &_lowered_subgs;
-  // _operands and _operations can be changed by controlflow operation
-  ir::Operands &_operands;     // operands of current subgraph
-  ir::Operations &_operations; // operations of current subgraph
-  bool _return_has_dynamic_tensor;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_STATIC_SHAPE_INFERENCE_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h
new file mode 100644
index 000000000..83dede726
--- /dev/null
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_STATIC_SHAPE_INFERER_H__
+#define __ONERT_COMPILER_STATIC_SHAPE_INFERER_H__
+
+#include "ir/OperationVisitor.h"
+#include "compiler/LoweredGraph.h"
+#include "ir/Index.h"
+
+#include <memory>
+#include <unordered_map>
+
+namespace onert
+{
+namespace compiler
+{
+/**
+ * @brief Class that observe and update operands.
+ */
+class OperandObserver
+{
+public:
+  /**
+   * @brief Constructor of OperandObserver
+   *
+   * @param operands Operands to be updated
+   */
+  OperandObserver(const std::vector<ir::Operand *> &operands) : _operands{operands} {}
+  /**
+   * @brief Destructor of OperandObserver
+   */
+  virtual ~OperandObserver() = default;
+
+public:
+  /**
+   * @brief Update Shape and some OperandInfo of operands
+   *
+   * @param operands Operands to be updated
+   * @param unpredictable Whether runtime can predict shapes of operands in compilation time
+   */
+  void updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+                    bool unpredictable = false);
+
+private:
+  std::vector<ir::Operand *> _operands;
+};
+
+/**
+ * @brief Class to infer shape before running kernels. It does the following:
+ *        - re-calculate and set output shape at compile time (before running kernels)
+ *        - if calculation cannot be done at compile time, mark the outputs to be dynamic, meaning
+ *          shapes of outputs will be calculated during running kernels
+ */
+class StaticShapeInferer : public ir::OperationVisitor
+{
+public:
+  StaticShapeInferer(compiler::ILoweredGraph *lowered_subg)
+    : _lowered_subg{lowered_subg}, _subg_input_observers{}, _controlflow_output_observer{nullptr},
+      _child_inferers{}
+  {
+  }
+  virtual ~StaticShapeInferer() = default;
+
+public:
+  void appendSubgInputObserver(const ir::SubgraphIndex &subg_idx,
+                               std::unique_ptr<OperandObserver> &&subg_input_observer) noexcept
+  {
+    _subg_input_observers[subg_idx] = std::move(subg_input_observer);
+  }
+
+  void setControlflowOutputObserver(std::unique_ptr<OperandObserver> &&output_observer) noexcept
+  {
+    _controlflow_output_observer = std::move(output_observer);
+  }
+
+  void appendChildInferer(const ir::SubgraphIndex &subg_idx, compiler::StaticShapeInferer *inferer)
+  {
+    _child_inferers[subg_idx] = inferer;
+  }
+
+  /**
+   * @brief Infer shape of operands belonging to ops and set the output shape.
+   *        If output shape cannot be known without running op, mark it so that it can be allocated
+   *        when running kernel.
+   */
+  void infer(void);
+
+  void dump();
+
+  /**
+   * @brief     Create a shape inferer map for a lowered model
+   * @param[in] lowered_subgs lowered model map
+   * @return    Shape inferer map
+   */
+  static std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+  createStaticShapeInferers(
+    const std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> &lowered_subgs);
+
+private:
+  bool checkDynamicInput(const ir::IOperation &op);
+  bool checkDynamicOutput(const ir::IOperation &op);
+  void setDynamicOutput(const ir::IOperation &op);
+
+private:
+  // TODO Define visitors for operations. List them in alphabetic order.
+  void visit(const ir::operation::ArgMinMax &op) override;
+  void visit(const ir::operation::BatchMatMul &op) override;
+  void visit(const ir::operation::BCQFullyConnected &op) override;
+  void visit(const ir::operation::BCQGather &op) override;
+  void visit(const ir::operation::BinaryArithmetic &op) override;
+  void visit(const ir::operation::BroadcastTo &op) override;
+  void visit(const ir::operation::Comparison &op) override;
+  void visit(const ir::operation::Concat &op) override;
+  void visit(const ir::operation::Conv2D &op) override;
+  void visit(const ir::operation::ElementwiseActivation &op) override;
+  void visit(const ir::operation::ElementwiseBinary &op) override;
+  void visit(const ir::operation::ElementwiseUnary &op) override;
+  void visit(const ir::operation::ExpandDims &op) override;
+  void visit(const ir::operation::Fill &op) override;
+  void visit(const ir::operation::FullyConnected &op) override;
+  void visit(const ir::operation::FusedBatchNorm &op) override;
+  void visit(const ir::operation::Gather &op) override;
+  void visit(const ir::operation::If &op) override;
+  void visit(const ir::operation::L2Normalization &op) override;
+  void visit(const ir::operation::Loss &op) override;
+  void visit(const ir::operation::LSTM &op) override;
+  void visit(const ir::operation::MatrixBandPart &op) override;
+  void visit(const ir::operation::OneHot &op) override;
+  void visit(const ir::operation::Pack &op) override;
+  void visit(const ir::operation::Pad &op) override;
+  void visit(const ir::operation::Permute &op) override;
+  void visit(const ir::operation::Pow &op) override;
+  void visit(const ir::operation::Range &op) override;
+  void visit(const ir::operation::Reduce &op) override;
+  void visit(const ir::operation::Reshape &op) override;
+  void visit(const ir::operation::ResizeBilinear &op) override;
+  void visit(const ir::operation::Reverse &op) override;
+  void visit(const ir::operation::Select &op) override;
+  void visit(const ir::operation::Shape &op) override;
+  void visit(const ir::operation::Slice &op) override;
+  void visit(const ir::operation::Softmax &op) override;
+  void visit(const ir::operation::SpaceToBatchND &op) override;
+  void visit(const ir::operation::Split &op) override;
+  void visit(const ir::operation::Squeeze &op) override;
+  void visit(const ir::operation::StridedSlice &op) override;
+  void visit(const ir::operation::SquaredDifference &op) override;
+  void visit(const ir::operation::Tile &op) override;
+  void visit(const ir::operation::Transpose &op) override;
+  void visit(const ir::operation::Unpack &op) override;
+  void visit(const ir::operation::While &op) override;
+  void visit(const ir::operation::DetectionPostProcess &op) override;
+  void visit(const ir::operation::Bulk &op) override;
+
+private:
+  /**
+   * @brief Performs shape inference for arithmetic operation
+   */
+  void handleBinaryArithmeticOp(const ir::Operation &op, const ir::OperandIndex lhs_idx,
+                                const ir::OperandIndex rhs_idx);
+
+  /**
+   * @brief Performs shape inference for unary op whose output shape is
+   *        always same with input shape
+   */
+  void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
+
+private:
+  compiler::ILoweredGraph *_lowered_subg;
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<OperandObserver>>
+    _subg_input_observers;                                       // child subg input
+  std::unique_ptr<OperandObserver> _controlflow_output_observer; // parent controlflow op output
+  std::unordered_map<ir::SubgraphIndex, compiler::StaticShapeInferer *> _child_inferers;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_STATIC_SHAPE_INFERER_H__
diff --git a/runtime/onert/core/include/compiler/train/LoweredTrainableGraph.h b/runtime/onert/core/include/compiler/train/LoweredTrainableGraph.h
new file mode 100644
index 000000000..a49d1c6a8
--- /dev/null
+++ b/runtime/onert/core/include/compiler/train/LoweredTrainableGraph.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_LOWERED_TRAINABLE_GRAPH_H__
+#define __ONERT_COMPILER_TRAIN_LOWERED_TRAINABLE_GRAPH_H__
+
+#include "compiler/BackendResolver.h"
+#include "compiler/CompilerOptions.h"
+#include "compiler/GraphLowerInfo.h"
+#include "compiler/ILoweredGraph.h"
+#include "ir/train/TrainableGraph.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+// TODO Unify with LoweredGraph
+/**
+ * @brief Class that contains lowering information on graph.
+ *        In addition, after lowering, operands in graph will be set to "dynamic"
+ *        if the shape of output of an operation cannot be decided at compilation time.
+ */
+class LoweredTrainableGraph : public ILoweredGraph
+{
+public:
+  LoweredTrainableGraph(ir::train::TrainableGraph &graph, const compiler::CompilerOptions &options);
+
+  // TODO Remove const_cast
+  ir::Graph &graph() override { return const_cast<ir::Graph &>(_trainable_graph.graph()); }
+  const ir::Graph &graph() const override { return _trainable_graph.graph(); }
+  ir::train::TrainableGraph &trainable_graph() { return _trainable_graph; }
+  const ir::train::TrainableGraph &trainable_graph() const { return _trainable_graph; }
+  const compiler::GraphLowerInfo &lower_info() const override { return _lower_info_map; }
+  compiler::GraphLowerInfo &lower_info() override { return _lower_info_map; }
+  std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
+
+  void setHasDynamicTensor(ir::OperationIndex, bool has_dynamic) override
+  {
+    if (has_dynamic)
+      throw std::runtime_error("LoweredTrainableGraph does not support dynamic tensors yet");
+  }
+  bool getHasDynamicTensor(ir::OperationIndex) const override { return false; }
+
+private:
+  void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
+  void dumpLowerInfo();
+  void lowerGraph(const compiler::CompilerOptions &options);
+
+private:
+  /**
+   *  @brief  Copy of target graph for lowering
+   *  @note   It uses copy of graph, not reference.
+   *          It allows the original graph can be compiled multiple times.
+   */
+  ir::train::TrainableGraph _trainable_graph;
+  std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+  compiler::GraphLowerInfo _lower_info_map;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_LOWERED_TRAINABLE_GRAPH_H__
diff --git a/runtime/onert/core/include/compiler/train/TrainableCodeMap.h b/runtime/onert/core/include/compiler/train/TrainableCodeMap.h
new file mode 100644
index 000000000..1069a47c9
--- /dev/null
+++ b/runtime/onert/core/include/compiler/train/TrainableCodeMap.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_TRAINABLE_CODE_MAP_H__
+#define __ONERT_COMPILER_TRAIN_TRAINABLE_CODE_MAP_H__
+
+#include <unordered_map>
+#include "compiler/OperationLowerInfo.h"
+#include "exec/train/TrainableFnSequence.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+struct TrainableCodeAndInfo
+{
+  ir::OperationIndex op_ind;
+  const ir::train::ITrainableOperation *op;
+  const OperationLowerInfo *lower_info;
+  // TODO Change to TrainableFnSequence
+  std::unique_ptr<exec::train::TrainableFnSequence> tn_seq;
+
+  TrainableCodeAndInfo(const ir::OperationIndex op_ind, const ir::train::ITrainableOperation *op,
+                       const OperationLowerInfo *lower_info,
+                       std::unique_ptr<exec::train::TrainableFnSequence> &&tn_seq)
+    : op_ind{op_ind}, op{op}, lower_info{lower_info}, tn_seq{std::move(tn_seq)}
+  {
+  }
+};
+
+using TrainableCodeMap = std::unordered_map<ir::OperationIndex, TrainableCodeAndInfo>;
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_TRAINABLE_CODE_MAP_H__
diff --git a/runtime/onert/core/include/compiler/train/TrainingInfo.h b/runtime/onert/core/include/compiler/train/TrainingInfo.h
new file mode 100644
index 000000000..3b77c838c
--- /dev/null
+++ b/runtime/onert/core/include/compiler/train/TrainingInfo.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_TRAINING_INFO_H__
+#define __ONERT_COMPILER_TRAIN_TRAINING_INFO_H__
+
+#include "ir/Index.h"
+#include "exec/train/optimizer/OptimizerCode.h"
+#include "ir/operation/Loss.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+struct LossInfo
+{
+  ir::operation::Loss::Type type;
+  // TODO Add members for loss
+};
+
+struct OptimizerInfo
+{
+  exec::train::optimizer::OptimizerCode optim_code;
+  float learning_rate;
+  // TODO Add properties
+};
+
+class TrainingInfo
+{
+public:
+  TrainingInfo() {}
+  TrainingInfo(const TrainingInfo &obj) = default;
+  TrainingInfo(TrainingInfo &&) = default;
+  TrainingInfo &operator=(const TrainingInfo &) = default;
+  TrainingInfo &operator=(TrainingInfo &&) = default;
+  ~TrainingInfo() = default;
+
+  uint32_t batchSize() const { return _batch_size; }
+  void setBatchSize(const uint32_t batch_size) { _batch_size = batch_size; }
+  const LossInfo &lossInfo() const { return _loss_info; }
+  void setLossInfo(const LossInfo &loss_info) { _loss_info = loss_info; }
+  const OptimizerInfo &optimizerInfo() const { return _optimizer_info; }
+  void setOptimizerInfo(const OptimizerInfo &optimizer_info) { _optimizer_info = optimizer_info; }
+
+private:
+  LossInfo _loss_info;
+  OptimizerInfo _optimizer_info;
+  uint32_t _batch_size;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_TRAINING_INFO_H__
diff --git a/runtime/onert/core/include/exec/DynamicShapeInference.h b/runtime/onert/core/include/exec/DynamicShapeInference.h
deleted file mode 100644
index 6f6659659..000000000
--- a/runtime/onert/core/include/exec/DynamicShapeInference.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_DYNAMIC_SHAPE_INFERENCE_H__
-#define __ONERT_EXEC_DYNAMIC_SHAPE_INFERENCE_H__
-
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-#include "ir/Index.h"
-#include "backend/IDynamicTensorManager.h"
-#include "backend/ITensorManager.h"
-#include "backend/ITensorRegistry.h"
-
-#include <map>
-
-namespace onert
-{
-namespace exec
-{
-
-/**
- * @brief Class to infer shape of output tensor at execution time and
- *        allocate memory fo output tensor if needed
- */
-class DynamicShapeInferer : public ir::OperationVisitor
-{
-public:
-  DynamicShapeInferer(const ir::Operands &operands,
-                      const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
-      : _operands(operands), _tensor_registry(tensor_registry)
-  {
-    UNUSED_RELEASE(_operands);
-    UNUSED_RELEASE(_tensor_registry);
-  }
-
-public:
-  // TODO Define visitors for operations. List them in alphabetic order.
-  // Remove TODO when any op starting from the alphabet is added
-  void visit(const ir::operation::ArgMax &op) override;
-  void visit(const ir::operation::BatchMatMul &op) override;
-  void visit(const ir::operation::BinaryArithmetic &op) override;
-  void visit(const ir::operation::BroadcastTo &op) override;
-  void visit(const ir::operation::Comparison &op) override;
-  void visit(const ir::operation::Concat &op) override;
-  void visit(const ir::operation::Conv2D &op) override;
-  void visit(const ir::operation::ElementwiseActivation &op) override;
-  void visit(const ir::operation::ElementwiseBinary &op) override;
-  void visit(const ir::operation::ElementwiseUnary &op) override;
-  void visit(const ir::operation::ExpandDims &op) override;
-  void visit(const ir::operation::Fill &op) override;
-  void visit(const ir::operation::FullyConnected &op) override;
-  void visit(const ir::operation::FusedBatchNorm &op) override;
-  void visit(const ir::operation::Gather &op) override;
-  void visit(const ir::operation::L2Normalization &op) override;
-  void visit(const ir::operation::MatrixBandPart &op) override;
-  void visit(const ir::operation::OneHot &op) override;
-  void visit(const ir::operation::Pack &op) override;
-  void visit(const ir::operation::Pad &op) override;
-  void visit(const ir::operation::Permute &op) override;
-  void visit(const ir::operation::Pow &op) override;
-  // TODO write op starting from Q
-  void visit(const ir::operation::Range &op) override;
-  void visit(const ir::operation::Reduce &op) override;
-  void visit(const ir::operation::Reshape &op) override;
-  void visit(const ir::operation::ResizeBilinear &op) override;
-  void visit(const ir::operation::Reverse &op) override;
-  void visit(const ir::operation::Select &op) override;
-  void visit(const ir::operation::Shape &op) override;
-  void visit(const ir::operation::Slice &op) override;
-  void visit(const ir::operation::Softmax &op) override;
-  void visit(const ir::operation::SpaceToBatchND &op) override;
-  void visit(const ir::operation::Split &op) override;
-  void visit(const ir::operation::Squeeze &op) override;
-  void visit(const ir::operation::StridedSlice &op) override;
-  void visit(const ir::operation::SquaredDifference &op) override;
-  void visit(const ir::operation::Tile &op) override;
-  void visit(const ir::operation::Transpose &op) override;
-  void visit(const ir::operation::Unpack &op) override;
-  // TODO write op starting from V
-
-private:
-  /**
-   * @brief Performs shape inference and memory allocation for arithmetic operation
-   */
-  void handleBinaryArithmeticOp(const ir::Operation &op, const ir::OperandIndex lhs_idx,
-                                const ir::OperandIndex rhs_idx);
-  /**
-   * @brief Performs shape inference and memory allocation for unary op whose output shape is
-   *        always same with input shape
-   */
-  void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
-
-private:
-  /**
-   * @brief To get operand-level info, e.g., ir::Operand::isConstant()
-   */
-  const ir::Operands &_operands;
-  /**
-   * @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
-   */
-  std::shared_ptr<backend::ITensorRegistry> _tensor_registry;
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_DYNAMIC_SHAPE_INFERENCE_H__
diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h
new file mode 100644
index 000000000..f814b789a
--- /dev/null
+++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_DYNAMIC_SHAPE_INFERER_H__
+#define __ONERT_EXEC_DYNAMIC_SHAPE_INFERER_H__
+
+#include "ir/Operands.h"
+#include "ir/OperationVisitor.h"
+#include "ir/Index.h"
+#include "backend/ITensorRegistry.h"
+
+#include <map>
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to infer shape of output tensor at execution time and
+ *        allocate memory fo output tensor if needed
+ */
+class DynamicShapeInferer : public ir::OperationVisitor
+{
+public:
+  DynamicShapeInferer(const ir::Operands &operands,
+                      const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
+    : _operands(operands), _tensor_registry(tensor_registry)
+  {
+    UNUSED_RELEASE(_operands);
+    UNUSED_RELEASE(_tensor_registry);
+  }
+
+public:
+  // TODO Define visitors for operations. List them in alphabetic order.
+  // Remove TODO when any op starting from the alphabet is added
+  void visit(const ir::operation::ArgMinMax &op) override;
+  void visit(const ir::operation::BatchMatMul &op) override;
+  void visit(const ir::operation::BCQFullyConnected &op) override;
+  void visit(const ir::operation::BCQGather &op) override;
+  void visit(const ir::operation::BinaryArithmetic &op) override;
+  void visit(const ir::operation::BroadcastTo &op) override;
+  void visit(const ir::operation::Comparison &op) override;
+  void visit(const ir::operation::Concat &op) override;
+  void visit(const ir::operation::Conv2D &op) override;
+  void visit(const ir::operation::ElementwiseActivation &op) override;
+  void visit(const ir::operation::ElementwiseBinary &op) override;
+  void visit(const ir::operation::ElementwiseUnary &op) override;
+  void visit(const ir::operation::ExpandDims &op) override;
+  void visit(const ir::operation::Fill &op) override;
+  void visit(const ir::operation::FullyConnected &op) override;
+  void visit(const ir::operation::FusedBatchNorm &op) override;
+  void visit(const ir::operation::Gather &op) override;
+  void visit(const ir::operation::L2Normalization &op) override;
+  void visit(const ir::operation::LSTM &op) override;
+  void visit(const ir::operation::MatrixBandPart &op) override;
+  void visit(const ir::operation::DetectionPostProcess &op) override;
+  void visit(const ir::operation::OneHot &op) override;
+  void visit(const ir::operation::Pack &op) override;
+  void visit(const ir::operation::Pad &op) override;
+  void visit(const ir::operation::Permute &op) override;
+  void visit(const ir::operation::Pow &op) override;
+  // TODO write op starting from Q
+  void visit(const ir::operation::Range &op) override;
+  void visit(const ir::operation::Reduce &op) override;
+  void visit(const ir::operation::Reshape &op) override;
+  void visit(const ir::operation::ResizeBilinear &op) override;
+  void visit(const ir::operation::Reverse &op) override;
+  void visit(const ir::operation::Select &op) override;
+  void visit(const ir::operation::Shape &op) override;
+  void visit(const ir::operation::Slice &op) override;
+  void visit(const ir::operation::Softmax &op) override;
+  void visit(const ir::operation::SpaceToBatchND &op) override;
+  void visit(const ir::operation::Split &op) override;
+  void visit(const ir::operation::Squeeze &op) override;
+  void visit(const ir::operation::StridedSlice &op) override;
+  void visit(const ir::operation::SquaredDifference &op) override;
+  void visit(const ir::operation::Tile &op) override;
+  void visit(const ir::operation::Transpose &op) override;
+  void visit(const ir::operation::Unpack &op) override;
+  // TODO write op starting from V
+
+private:
+  /**
+   * @brief Performs shape inference and memory allocation for arithmetic operation
+   */
+  void handleBinaryArithmeticOp(const ir::Operation &op, const ir::OperandIndex lhs_idx,
+                                const ir::OperandIndex rhs_idx);
+  /**
+   * @brief Performs shape inference and memory allocation for unary op whose output shape is
+   *        always same with input shape
+   */
+  void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
+
+  // in case of output tensor of an op, it is possible that
+  // the output became dynamic although it had been static before.
+  // Once a tensor becomes dynamic, it will lost memory allocated for static.
+  // Therefore once output is dynamic, it should be treated as dynamic tensor. (memory should be
+  // allocated at runtime) `previously` means `dynamic` or `static` has been set in previous loop in
+  // WHILE of previous call of `nnfw_run()`
+  bool previously_static(backend::ITensor *op_output) { return !op_output->is_dynamic(); }
+
+  // helper function that check if op's input is static
+  // Note that input of n'th op has been set to static or dynamic by (n-1)th op.
+  // That's why it is called `currently_static`
+  bool currently_static(backend::ITensor *op_input) { return !op_input->is_dynamic(); }
+
+private:
+  /**
+   * @brief To get operand-level info, e.g., ir::Operand::isConstant()
+   */
+  const ir::Operands &_operands;
+  /**
+   * @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
+   */
+  std::shared_ptr<backend::ITensorRegistry> _tensor_registry;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_DYNAMIC_SHAPE_INFERER_H__
diff --git a/runtime/onert/core/include/exec/Execution.h b/runtime/onert/core/include/exec/Execution.h
index d3c5b6dda..da4d20dbe 100644
--- a/runtime/onert/core/include/exec/Execution.h
+++ b/runtime/onert/core/include/exec/Execution.h
@@ -22,10 +22,12 @@
 #define __ONERT_EXEC_EXECUTION_H__
 
 #include "ir/Layout.h"
-#include "exec/IExecutor.h"
+#include "exec/IExecutors.h"
 #include "IODescription.h"
 
 #include <thread>
+#include <deque>
+#include <semaphore.h>
 
 namespace onert
 {
@@ -44,14 +46,14 @@ public:
    * @brief     Construct a new Execution object
    * @param[in] executor  Model executor
    */
-  Execution(const std::shared_ptr<ExecutorMap> &executors);
+  Execution(const std::shared_ptr<IExecutors> &executors);
 
 public:
   /**
    * @brief   Returns primary graph object
    * @return  Graph object
    */
-  const ir::Graph &primary_subgraph() const { return primary_executor()->graph(); }
+  const ir::Graph &primary_subgraph() const { return entryExecutor()->graph(); }
 
   /**
    * @brief     Change input shape
@@ -69,6 +71,7 @@ public:
    */
   void setInput(const ir::IOIndex &index, const void *buffer, size_t length,
                 ir::Layout layout = ir::Layout::NHWC);
+
   /**
    * @brief     Set input data's information, especially to specify unknown dimensions on model
    * build time.
@@ -139,18 +142,35 @@ public:
    */
   bool isFinished(void) const;
 
+#ifdef ONERT_TRAIN
+  /**
+   * @brief  Train
+   * @note   It should be called after setting input and output buffer
+   * @param training_step The number of iterations of the training process.
+   *                      In other words, the number of gradient update.
+   */
+  void train(uint32_t training_step);
+
+  /**
+   * @brief     Get loss
+   * @note      It should be called after training
+   * @param[in] ind   Output index
+   * @return @c float Loss value
+   */
+  float getLoss(const ir::IOIndex &ind);
+#endif // ONERT_TRAIN
+
   ir::Shape getInputShape(ir::IOIndex ind) const;
   ir::Shape getOutputShape(ir::IOIndex ind) const;
+  size_t getInputTotalSize(ir::IOIndex ind) const;
+  size_t getOutputTotalSize(ir::IOIndex ind) const;
 
 private:
-  const std::unique_ptr<IExecutor> &primary_executor() const
-  {
-    return _executors->at(ir::SubgraphIndex{0});
-  };
-  std::unique_ptr<IExecutor> &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); };
+  const IExecutor *entryExecutor() const { return _executors->entryExecutor(); };
+  IExecutor *entryExecutor() { return _executors->entryExecutor(); };
 
 private:
-  const std::shared_ptr<ExecutorMap> _executors;
+  const std::shared_ptr<IExecutors> _executors;
   IODescription _io_desc;
   std::unique_ptr<std::thread> _exec_thread;
   bool finished{false};
diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h
index 79a58ed00..f3384be3c 100644
--- a/runtime/onert/core/include/exec/FunctionSequence.h
+++ b/runtime/onert/core/include/exec/FunctionSequence.h
@@ -23,10 +23,9 @@
 #include <functional>
 
 #include "exec/IFunction.h"
-#include "exec/DynamicShapeInference.h"
+#include "exec/DynamicShapeInferer.h"
 #include "ir/Operations.h"
 #include "backend/ITensorRegistry.h"
-#include "backend/IDynamicTensorManager.h"
 
 namespace onert
 {
@@ -67,7 +66,7 @@ public:
 
   template <typename T, typename... Args> void wrap(Args &&... args)
   {
-    for (auto &function : _functions)
+    for (auto &&function : _functions)
     {
       function = std::make_unique<T>(std::move(function), args...);
     }
@@ -76,11 +75,8 @@ public:
 public: // methods related to dynamic tensor
   struct DynamicTensorCtx
   {
-    const ir::OpSequence *op_seq = nullptr;
-    const ir::Operations *operations = nullptr;
+    const ir::IOperation *op = nullptr;
     std::shared_ptr<exec::DynamicShapeInferer> dynamic_shape_inferer = nullptr;
-    std::shared_ptr<backend::ITensorRegistry> tensor_registry = nullptr;
-    backend::IDynamicTensorManager *dynamic_tensor_manager = nullptr;
   };
 
   /**
@@ -104,14 +100,25 @@ public: // methods related to dynamic tensor
    */
   void enableDynamicShapeInferer(bool enable)
   {
-    _enable_dynamic_shape_inferer = _enable_dynamic_shape_inferer && enable;
+    _enable_dynamic_shape_inferer = _enable_dynamic_shape_inferer || enable;
   }
 
+  /**
+   * @brief Call this function to initialize vars before running
+   * @note When we run a model with static tensor input and then run with dynamic tensor input,
+   *       _enable_dynamic_shape_inferer is set to @c false at first run.
+   *       Once _enable_dynamic_shape_inferer is set to @c true it cannot be changed to @c false
+   *       only with calling enableDynamicShapeInferer(). So initializing it to @c false is
+   *       necessary.
+   * @todo This is a quick fix. Adding this will increase time for run(). Find way to optimize.
+   */
+  void initRunning() { _enable_dynamic_shape_inferer = false; }
+
 protected:
   std::vector<std::unique_ptr<IFunction>> _functions;
 
 protected:
-  bool _enable_dynamic_shape_inferer = true;
+  bool _enable_dynamic_shape_inferer = false;
 
   std::shared_ptr<DynamicTensorCtx> _dynamic_tensor_ctx = nullptr;
 };
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index 6c8bab67c..46dbcd033 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -18,20 +18,34 @@
  * @file  IExecutor.h
  * @brief This file defines interface of Executor
  */
-#ifndef __ONERT_EXEC_I_EXECUTOR_H_
-#define __ONERT_EXEC_I_EXECUTOR_H_
+#ifndef __ONERT_EXEC_I_EXECUTOR_H__
+#define __ONERT_EXEC_I_EXECUTOR_H__
 
 #include "ir/Graph.h"
 #include "IFunction.h"
 #include "IODescription.h"
+#include "ir/Index.h"
 #include "ir/OperationIndexMap.h"
-#include "backend/IDynamicTensorManager.h"
 
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+class IPortableTensor;
+namespace builtin
+{
+class IOTensor;
+}
+} // namespace backend
+} // namespace onert
 namespace onert
 {
 namespace exec
 {
-class IExecutionObserver;
 /**
  * @brief Struct to define interface of Executor
  */
@@ -51,7 +65,7 @@ struct IExecutor
    *
    * @return Graph object
    */
-  virtual const ir::Graph &graph() = 0;
+  virtual const ir::Graph &graph() const = 0;
 
   /**
    * @brief     Set an ordering on operations
@@ -60,31 +74,39 @@ struct IExecutor
   virtual void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) = 0;
 
   /**
-   * @brief     Start execution
+   * @brief     Execute with user-given input/output description (for primary subgraph)
    * @param[in] desc Input and output description
    * @note      This method should be thread-safe
    */
   virtual void execute(const IODescription &desc) = 0;
-};
 
-using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
+  /**
+   * @brief Execute with given input/output tensors
+   *
+   * For non-primary subgraphs, input and output tensors must be given.
+   *
+   * @param[in] inputs tensors that are passed as inputs
+   * @param[in] outputs tensors that are passed as outputs
+   */
+  virtual void execute(const std::vector<backend::IPortableTensor *> &inputs,
+                       const std::vector<backend::IPortableTensor *> &outputs) = 0;
+
+  /**
+   * @brief Get input tensor objects
+   *
+   * @return Vector of @c IOTensor
+   */
+  virtual const std::vector<backend::builtin::IOTensor *> &getInputTensors() const = 0;
 
-// TODO Move this structure to suitable place
-/**
- * @brief Dynamic allocation info for input tensors
- *        When user sets shape of input having unknown dims after compilation, memory for the input
- * should be allocated before executing kernels. This struct contains information to allocate
- * memory.
- */
-struct DynAllocInfo
-{
-  /// @brief index of input tensor whose memory needs to be allocated at execution time
-  ir::OperandIndex ind;
+  /**
+   * @brief Get output tensor objects
+   *
+   * @return Vector of @c IOTensor
+   */
+  virtual const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const = 0;
 };
 
-using DynAllocInfoMap = std::unordered_map<std::shared_ptr<backend::ITensor>, DynAllocInfo>;
-
 } // namespace exec
 } // namespace onert
 
-#endif // __ONERT_EXEC_I_EXECUTOR_H_
+#endif // __ONERT_EXEC_I_EXECUTOR_H__
diff --git a/runtime/onert/core/include/exec/IExecutors.h b/runtime/onert/core/include/exec/IExecutors.h
new file mode 100644
index 000000000..013da716b
--- /dev/null
+++ b/runtime/onert/core/include/exec/IExecutors.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_I_EXECUTORS_H__
+#define __ONERT_EXEC_I_EXECUTORS_H__
+
+#include "IExecutor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather NN package's executor set
+ */
+class IExecutors
+{
+public:
+  /**
+   * @brief Virtual IExecutors destructor
+   * @note  Require derived class destructor
+   */
+  virtual ~IExecutors() = default;
+
+public:
+  /**
+   * @brief     Insert executor in executor set
+   * @param[in] model_index Model index
+   * @param[in] subg_index  Subgraph index
+   * @param[in] exec        Executor to insert
+   *
+   * @todo      Use Executor index
+   */
+  virtual void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                       std::unique_ptr<IExecutor> exec) = 0;
+
+  /**
+   * @brief     Return executor of index
+   * @param[in] model_index Model index
+   * @param[in] subg_index  Subgraph index
+   * @return    Executor
+   */
+  virtual IExecutor *at(const ir::ModelIndex &model_index,
+                        const ir::SubgraphIndex &subg_index) const = 0;
+
+  IExecutor *entryExecutor() const { return at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); }
+
+  /**
+   * @brief   Return executor set's number of input
+   * @return  Number of input
+   */
+  virtual uint32_t inputSize() const = 0;
+
+  /**
+   * @brief   Return executor set's number of output
+   * @return  Number of output
+   */
+  virtual uint32_t outputSize() const = 0;
+
+  /**
+   * @brief     Return NN package input tensor info
+   * @param[in] index Input index
+   * @return    Tensor info
+   */
+  virtual const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const = 0;
+
+  /**
+   * @brief     Return NN package output tensor info
+   * @param[in] index Output index
+   * @return    Tensor info
+   */
+  virtual const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const = 0;
+
+  /**
+   * @brief     Execute NN package executor set
+   * @param[in] desc  Input and output buffer description
+   */
+  virtual void execute(const IODescription &desc) = 0;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_I_EXECUTORS_H__
diff --git a/runtime/onert/core/include/exec/IODescription.h b/runtime/onert/core/include/exec/IODescription.h
index d1810ec3b..14c5ffc2b 100644
--- a/runtime/onert/core/include/exec/IODescription.h
+++ b/runtime/onert/core/include/exec/IODescription.h
@@ -19,6 +19,7 @@
 
 #include <vector>
 #include <unordered_map>
+#include <semaphore.h>
 
 #include "ir/OperandInfo.h"
 #include "ir/Index.h"
@@ -37,7 +38,7 @@ struct InputDesc
 
   InputDesc(void) = delete;
   InputDesc(const ir::OperandInfo &info, const void *buffer, const size_t size, ir::Layout layout)
-      : info(info), buffer(buffer), size(size), layout(layout)
+    : info(info), buffer(buffer), size(size), layout(layout)
   {
   }
 };
@@ -53,7 +54,7 @@ struct OutputDesc
 
   OutputDesc(void) = delete;
   OutputDesc(const ir::OperandInfo &info, void *buffer, const size_t size, ir::Layout layout)
-      : info(info), buffer(buffer), size(size), layout(layout)
+    : info(info), buffer(buffer), size(size), layout(layout)
   {
   }
 };
diff --git a/runtime/onert/core/include/exec/MinMaxMap.h b/runtime/onert/core/include/exec/MinMaxMap.h
new file mode 100644
index 000000000..fc6849e74
--- /dev/null
+++ b/runtime/onert/core/include/exec/MinMaxMap.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_MINMAX_MAP_H__
+#define __ONERT_EXEC_MINMAX_MAP_H__
+
+#include "ir/Index.h"
+#include "util/MinMaxMap.h"
+
+namespace onert
+{
+namespace exec
+{
+struct SMHash
+{
+  size_t operator()(const std::pair<ir::SubgraphIndex, ir::OperationIndex> &k) const noexcept
+  {
+    return std::hash<ir::SubgraphIndex>()(k.first) ^ std::hash<ir::OperationIndex>()(k.second);
+  }
+};
+// SM means single model
+using SMMinMaxMap = util::MinMaxMap<std::pair<ir::SubgraphIndex, ir::OperationIndex>, SMHash>;
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_MINMAX_MAP_H__
diff --git a/runtime/onert/core/include/exec/train/IGradientApplier.h b/runtime/onert/core/include/exec/train/IGradientApplier.h
new file mode 100644
index 000000000..65e931e0e
--- /dev/null
+++ b/runtime/onert/core/include/exec/train/IGradientApplier.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_IGRADIENT_APPLIER_H__
+#define __ONERT_EXEC_TRAIN_IGRADIENT_APPLIER_H__
+
+#include <cstdint>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+class IGradientApplier
+{
+public:
+  virtual ~IGradientApplier() = default;
+
+  /**
+   * @brief Apply gradients to a trainable tensor
+   *
+   * @param training_step The number of iterations of the training process.
+   */
+  virtual void applyGradient(uint32_t training_step) = 0;
+};
+
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_IGRADIENT_APPLIER_H__
diff --git a/runtime/onert/core/include/exec/train/ITrainableFunction.h b/runtime/onert/core/include/exec/train/ITrainableFunction.h
new file mode 100644
index 000000000..45adc258f
--- /dev/null
+++ b/runtime/onert/core/include/exec/train/ITrainableFunction.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_I_TRAINABLE_FUNCTION_H__
+#define __ONERT_EXEC_TRAIN_I_TRAINABLE_FUNCTION_H__
+
+#include <cstdint>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+class ITrainableFunction
+{
+public:
+  virtual ~ITrainableFunction() = default;
+  virtual void forward(bool training) = 0;
+  virtual void backward() = 0;
+};
+
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_I_TRAINABLE_FUNCTION_H__
diff --git a/runtime/onert/core/include/exec/train/TrainableFnSequence.h b/runtime/onert/core/include/exec/train/TrainableFnSequence.h
new file mode 100644
index 000000000..8be1b1e5d
--- /dev/null
+++ b/runtime/onert/core/include/exec/train/TrainableFnSequence.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_FN_SEQUENCE_H__
+#define __ONERT_EXEC_TRAIN_TRAINABLE_FN_SEQUENCE_H__
+
+#include "exec/train/ITrainableFunction.h"
+#include "exec/train/IGradientApplier.h"
+
+#include <memory>
+#include <vector>
+#include <functional>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+class TrainableFnSequence
+{
+public:
+  void forward(bool training);
+  void backward(uint32_t training_step);
+
+  void append(std::unique_ptr<ITrainableFunction> &&fn);
+  void append(std::unique_ptr<IGradientApplier> &&applier);
+  void iterate(const std::function<void(ITrainableFunction &)> &fn);
+
+public:
+  // TODO Change members
+  std::vector<std::unique_ptr<ITrainableFunction>> _functions;
+  std::vector<std::unique_ptr<IGradientApplier>> _appliers;
+};
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_TRAINABLE_FN_SEQUENCE_H__
diff --git a/runtime/onert/core/include/exec/train/optimizer/Optimizer.h b/runtime/onert/core/include/exec/train/optimizer/Optimizer.h
new file mode 100644
index 000000000..05f2ee19b
--- /dev/null
+++ b/runtime/onert/core/include/exec/train/optimizer/Optimizer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_H__
+#define __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_H__
+
+#include "backend/IPortableTensor.h"
+#include "backend/train/ITrainableTensor.h"
+
+#include <string>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+// Gradient tensor, Trainable Tensor, Number of training steps
+using UpdateFactors =
+  std::tuple<const backend::IPortableTensor &, backend::train::ITrainableTensor &, size_t>;
+
+/**
+ * @class   Optimizer Base class for optimizers
+ * @brief   Base class for all optimizers
+ */
+class Optimizer
+{
+public:
+  virtual ~Optimizer() = default;
+
+  /**
+   * @brief Get the name of optimizer
+   *
+   * @return The name of optimizer
+   */
+  virtual std::string name() const { return std::string{"Invalid"}; }
+
+  /**
+   * @brief Get the Learning Rate
+   *
+   * @param iteration The number of training steps
+   * @return Learning rate
+   */
+  virtual double getLearningRate(uint32_t iteration) const = 0;
+
+  /**
+   * @brief Apply gradient to a trainable tensor
+   *
+   * @param factors UpdateFactors to be used for applying gradient to a trainable tensor
+   */
+  virtual void applyGradient(const UpdateFactors &factors) const = 0;
+
+  // TODO Add member functions for exporting optimizer information
+};
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_H__
diff --git a/runtime/onert/core/include/exec/train/optimizer/OptimizerCode.h b/runtime/onert/core/include/exec/train/optimizer/OptimizerCode.h
new file mode 100644
index 000000000..3e4a8f2a6
--- /dev/null
+++ b/runtime/onert/core/include/exec/train/optimizer/OptimizerCode.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_CODE_H__
+#define __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_CODE_H__
+
+#include <functional>
+#include <stdint.h>
+#include <string>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+enum class OptimizerCode
+{
+  Invalid, //< Invalid
+  SGD,     //< SGD optimizer
+  Adam     //< Adam optimizer
+};
+
+/**
+ * @brief Convert the optimizer code to the name
+ *
+ * @param opcode The optimizer code
+ * @return The name of the optimizer
+ */
+std::string toString(OptimizerCode opcode);
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_CODE_H__
diff --git a/runtime/onert/core/include/exec/train/optimizer/SGD.h b/runtime/onert/core/include/exec/train/optimizer/SGD.h
new file mode 100644
index 000000000..6a1a5c9b8
--- /dev/null
+++ b/runtime/onert/core/include/exec/train/optimizer/SGD.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_OPTIMIZER_SGD_H__
+#define __ONERT_EXEC_TRAIN_OPTIMIZER_SGD_H__
+
+#include "exec/train/optimizer/Optimizer.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+/**
+ * @class   SGD optimizer class
+ * @brief   SGD optimizer
+ */
+class SGD : public Optimizer
+{
+public:
+  struct Property
+  {
+    double momentum{0.0};
+    bool nesterov{false};
+  };
+
+public:
+  explicit SGD() : _props{}, _learning_rate{0.01} {}
+  explicit SGD(const Property &props) : _props{props}, _learning_rate{0.01} {}
+  explicit SGD(double lr) : _props{}, _learning_rate{lr} {}
+  explicit SGD(const Property &props, double lr) : _props{props}, _learning_rate{lr} {}
+
+public:
+  /**
+   * @brief Get the name of optimizer
+   *
+   * @return The name of optimizer
+   */
+  std::string name() const override { return std::string{"SGD"}; }
+
+  /**
+   * @brief Get the Learning Rate
+   *
+   * @param iteration The number of training steps
+   * @return Learning rate
+   */
+  double getLearningRate(uint32_t iteration = 0) const override;
+
+  /**
+   * @brief Apply gradient to a trainable tensor
+   *
+   * @param factors UpdateFactors to be used for applying gradient to a trainable tensor
+   */
+  void applyGradient(const UpdateFactors &factors) const override;
+
+private:
+  Property _props;
+  double _learning_rate;
+};
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_OPTIMIZER_SGD_H__
diff --git a/runtime/onert/core/include/ir/Coordinates.h b/runtime/onert/core/include/ir/Coordinates.h
index 3849a5509..9963cab4c 100644
--- a/runtime/onert/core/include/ir/Coordinates.h
+++ b/runtime/onert/core/include/ir/Coordinates.h
@@ -62,6 +62,12 @@ public:
   {
     assert(init.size() <= num_max_dimensions);
   }
+  /**
+   * @brief     Construct a new Coordinates object with rank
+   * @param[in] rank The rank of coordinates
+   * @return
+   */
+  explicit Coordinates(int rank) : _coordinates(rank, 0) {}
 
 public:
   /**
diff --git a/runtime/onert/core/include/ir/Data.h b/runtime/onert/core/include/ir/Data.h
index d31191b4f..bd0d87cae 100644
--- a/runtime/onert/core/include/ir/Data.h
+++ b/runtime/onert/core/include/ir/Data.h
@@ -75,10 +75,10 @@ class MMapedData final : public ExternalData
 public:
   MMapedData(int fd, const std::ptrdiff_t mmap_offset, const size_t mmap_size,
              const std::ptrdiff_t data_offset, const size_t data_size)
-      : ExternalData(nullptr, data_size),
-        _mmap_base(
-            static_cast<uint8_t *>(mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, mmap_offset))),
-        _mmap_size(mmap_size), _offset(data_offset - mmap_offset)
+    : ExternalData(nullptr, data_size),
+      _mmap_base(
+        static_cast<uint8_t *>(mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, mmap_offset))),
+      _mmap_size(mmap_size), _offset(data_offset - mmap_offset)
   {
     // DO NOTHING
   }
diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h
index fe10b9283..0ec0e0711 100644
--- a/runtime/onert/core/include/ir/DataType.h
+++ b/runtime/onert/core/include/ir/DataType.h
@@ -35,6 +35,10 @@ enum class DataType
   QUANT_INT8_SYMM = 6,
   FLOAT16 = 7,
   INT64 = 8,
+  QUANT_INT8_ASYMM = 9,
+  QUANT_INT16_ASYMM = 10,
+  QUANT_INT8_SYMM_PER_CHANNEL = 11,
+  QUANT_INT16_SYMM = 12,
 };
 
 size_t sizeOfDataType(DataType data_type);
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
index 2103e6e64..641698eb2 100644
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -20,29 +20,17 @@
 #include <functional>
 #include <unordered_map>
 
+#include "ir/IGraph.h"
+#include "ir/Model.h"
 #include "ir/Operands.h"
 #include "ir/Operations.h"
-#include "ir/OpSequence.h"
-#include "ir/OpSequences.h"
-#include "ir/Subgraphs.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace custom
-{
-class IKernelBuilder;
-} // namespace custom
-} // namespace backend
-} // namespace onert
 
 namespace onert
 {
 namespace ir
 {
 
-class Graph
+class Graph : public IGraph
 {
 private:
   enum class Phase
@@ -52,68 +40,93 @@ private:
   };
 
 public:
-  Graph(void);
+  explicit Graph(void);
+  explicit Graph(const Graph &);
+
   ~Graph(void);
 
   // Graph Building
 public:
   OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
-  OperationIndex addOperation(std::unique_ptr<Operation> &&node);
+  /**
+   * @brief Add an operand to the graph with the given index and object
+   *
+   * If the given index is available, it succeeds. And @c operand is moved which invalidates the
+   * caller's pointer. If the given index is already taken, it fails. And @c operand will not be
+   * moved so the caller's pointer will be still valid.
+   *
+   * @param[in] index Index to be added
+   * @param[in] operand Operand to be added
+   * @return OperandIndex @c index if successful, Undefined otherwise
+   */
+  OperandIndex addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand);
+  OperationIndex addOperation(std::unique_ptr<IOperation> &&node);
+  /**
+   * @brief Add an operation to the graph with the given index and object
+   *
+   * If the given index is available, it succeeds. And @c operation is moved which invalidates the
+   * caller's pointer. If the given index is already taken, it fails. And @c operation will not be
+   * moved so the caller's pointer will be still valid.
+   *
+   * @param index Index to be added
+   * @param operation IOperation to be added
+   * @return OperandIndex @c index if successful, Undefined otherwise
+   */
+  OperationIndex addOperation(OperationIndex index, std::unique_ptr<IOperation> &&operation);
+  /**
+   * @brief Replace an operation which the graph already has
+   *
+   * If the given @c index is available, it succeeds. And @c operation is moved which invalidates
+   * the caller's pointer. If the given @c operation has at least one invalid operand index, it
+   * fails. And @c operation will not be moved so the caller's pointer will be still valid.
+   *
+   * No information in the graph is changed except for replacing an operation.
+   *
+   * @param operation Operation to be added
+   * @return OperationIndex @c index if successful, UNDEFINED otherwise
+   */
+  OperationIndex replaceOperation(OperationIndex index, std::unique_ptr<IOperation> &&operation);
   void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
+  void changeShape(const OperandIndex &ind, const ir::Shape &new_shape) override;
   void addInput(const OperandIndex &ind, const std::string &name = "");
   void addOutput(const OperandIndex &ind, const std::string &name = "");
-  void finishBuilding(void);
+  void verify(void) const;
   void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
-  bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
   void setLayout(Layout layout) { _layout = layout; }
-  void setSubgraphs(const std::shared_ptr<Subgraphs> &subgs) { _subgraphs = subgs; }
 
 private:
+  bool checkOperandsForOperation(const IOperation &operation);
+  void linkOperandToOperation(OperationIndex index, const IOperation &operation);
   void initializeUseDef();
+  // TODO Rename to `sweepUnusedOperands`
+  // TODO Make this public
   void sweepGarbageOperands();
 
-  // Custom operations support
-public:
-  void
-  bindKernelBuilder(const std::shared_ptr<onert::backend::custom::IKernelBuilder> &kernel_builder)
-  {
-    _kernel_builder = kernel_builder;
-  }
-
-  const std::shared_ptr<backend::custom::IKernelBuilder> &getKernelBuilder() const
-  {
-    return _kernel_builder;
-  }
-
-private:
-  std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
-
   // Accessors
 public:
-  const OperandIndexSequence &getInputs() const { return _inputs; }
+  const OperandIndexSequence &getInputs() const override { return _inputs; }
   OperandIndexSequence &getInputs() { return _inputs; }
-  const OperandIndexSequence &getOutputs() const { return _outputs; }
+  const OperandIndexSequence &getOutputs() const override { return _outputs; }
   OperandIndexSequence &getOutputs() { return _outputs; }
-  IOIndex getInputIndex(const std::string &name) const;
-  IOIndex getOutputIndex(const std::string &name) const;
-  const Operands &operands() const { return _operands; }
+  IOIndex getInputIndex(const std::string &name) const override;
+  IOIndex getOutputIndex(const std::string &name) const override;
+  const Operands &operands() const override { return _operands; }
   Operands &operands() { return _operands; } // TODO Remove this non-const accessor
-  const Operations &operations() const { return _operations; }
+  const Operations &operations() const override { return _operations; }
   Operations &operations() { return _operations; }
-  const std::shared_ptr<Subgraphs> &subgraphs() const { return _subgraphs; }
-  std::shared_ptr<Subgraphs> &subgraphs() { return _subgraphs; }
   Layout layout() const { return _layout; }
 
+  // Topological sort
+public:
+  std::vector<ir::OperationIndex> topolSortOperations() const;
+
 private:
-  Phase _phase{Phase::BUILDING};
   Operations _operations;
   Operands _operands;
   OperandIndexSequence _inputs;
   OperandIndexSequence _outputs;
   std::unordered_map<std::string, IOIndex> _name_to_input;
   std::unordered_map<std::string, IOIndex> _name_to_output;
-  // Child subgraphs
-  std::shared_ptr<Subgraphs> _subgraphs;
   // TFLite and circle's default layout is NHWC;
   Layout _layout{Layout::NHWC};
 };
diff --git a/runtime/onert/core/include/ir/IGraph.h b/runtime/onert/core/include/ir/IGraph.h
new file mode 100644
index 000000000..34fb20188
--- /dev/null
+++ b/runtime/onert/core/include/ir/IGraph.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_IGRAPH_H__
+#define __ONERT_IR_IGRAPH_H__
+
+#include "ir/Operands.h"
+#include "ir/Operations.h"
+
+namespace onert
+{
+namespace ir
+{
+
+struct IGraph
+{
+  virtual ~IGraph() = default;
+
+  // Accessors
+  virtual const OperandIndexSequence &getInputs() const = 0;
+  virtual const OperandIndexSequence &getOutputs() const = 0;
+  virtual IOIndex getInputIndex(const std::string &name) const = 0;
+  virtual IOIndex getOutputIndex(const std::string &name) const = 0;
+  virtual const Operands &operands() const = 0;
+  virtual const Operations &operations() const = 0;
+
+  // Methods that can change graph
+  virtual void changeShape(const OperandIndex &index, const ir::Shape &new_shape) = 0;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_IGRAPH_H__
diff --git a/runtime/onert/core/include/ir/IOperation.h b/runtime/onert/core/include/ir/IOperation.h
new file mode 100644
index 000000000..be0dd939d
--- /dev/null
+++ b/runtime/onert/core/include/ir/IOperation.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_IOPERATION_H__
+#define __ONERT_IR_IOPERATION_H__
+
+#include <memory>
+
+#include "ir/Index.h"
+#include "ir/OpCode.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace ir
+{
+
+struct OperationVisitor;
+
+struct IOperation
+{
+  virtual ~IOperation() = default;
+
+  virtual void accept(OperationVisitor &v) const = 0;
+  virtual std::string name() const { return std::string{toString(opcode())}; }
+  virtual OpCode opcode() const = 0;
+
+  virtual void replaceInputs(const OperandIndex &from, const OperandIndex &to) = 0;
+  virtual void replaceOutputs(const OperandIndex &from, const OperandIndex &to) = 0;
+  virtual const OperandIndexSequence &getInputs() const = 0;
+  virtual const OperandIndexSequence &getOutputs() const = 0;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_IOPERATION_H__
diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h
index 2538301a4..1864c3bdb 100644
--- a/runtime/onert/core/include/ir/Index.h
+++ b/runtime/onert/core/include/ir/Index.h
@@ -19,6 +19,8 @@
 
 #include "util/Index.h"
 
+#include <ostream>
+
 namespace onert
 {
 namespace ir
@@ -33,11 +35,45 @@ using OperandIndex = ::onert::util::Index<uint32_t, OperandIndexTag>;
 struct IOIndexTag;
 using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
 
-struct OpSequenceIndexTag;
-using OpSequenceIndex = ::onert::util::Index<uint32_t, OpSequenceIndexTag>;
-
 struct SubgraphIndexTag;
-using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
+using SubgraphIndex = ::onert::util::Index<uint16_t, SubgraphIndexTag>;
+
+struct ModelIndexTag;
+using ModelIndex = ::onert::util::Index<uint16_t, ModelIndexTag>;
+
+template <typename IndexType>
+std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
+{
+  if (index.undefined())
+    return o << prefix << std::string("?");
+  else
+    return o << prefix << index.value();
+}
+
+inline std::ostream &operator<<(std::ostream &o, const OperationIndex &i)
+{
+  return _index_print_impl(o, "@", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const OperandIndex &i)
+{
+  return _index_print_impl(o, "%", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const IOIndex &i)
+{
+  return _index_print_impl(o, "IO", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const SubgraphIndex &i)
+{
+  return _index_print_impl(o, "SUBGRAPH", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const ModelIndex &i)
+{
+  return _index_print_impl(o, "MODEL", i);
+}
 
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/include/ir/InternalType.h b/runtime/onert/core/include/ir/InternalType.h
index 1d962c185..50292e812 100644
--- a/runtime/onert/core/include/ir/InternalType.h
+++ b/runtime/onert/core/include/ir/InternalType.h
@@ -46,6 +46,13 @@ struct Dilation
   uint32_t height_factor;
 };
 
+enum class FullyConnectedWeightsFormat
+{
+  Default = 0,
+  Shuffled4x16Int8 = 1,
+  Shuffled16x1Float32 = 127
+};
+
 } // namespace ir
 } // namespace onert
 
diff --git a/runtime/onert/core/include/ir/Layout.h b/runtime/onert/core/include/ir/Layout.h
index 082810172..0cdbcc2c8 100644
--- a/runtime/onert/core/include/ir/Layout.h
+++ b/runtime/onert/core/include/ir/Layout.h
@@ -18,6 +18,7 @@
 #define __ONERT_IR_LAYOUT_H__
 
 #include <functional>
+#include <stdexcept>
 #include <string>
 
 namespace onert
diff --git a/runtime/onert/core/include/ir/LowerInfoMap.h b/runtime/onert/core/include/ir/LowerInfoMap.h
deleted file mode 100644
index fbabaf39d..000000000
--- a/runtime/onert/core/include/ir/LowerInfoMap.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_LOWER_INFO_MAP_H__
-#define __ONERT_IR_LOWER_INFO_MAP_H__
-
-#include <memory>
-#include <unordered_map>
-
-#include "ir/operand/LowerInfo.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/Index.h"
-
-namespace onert
-{
-namespace ir
-{
-
-struct LowerInfoMap
-{
-  std::unordered_map<OpSequenceIndex, std::unique_ptr<operation::LowerInfo>> op_seq;
-  OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operand;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_LOWER_INFO_MAP_H__
diff --git a/runtime/onert/core/include/ir/Model.h b/runtime/onert/core/include/ir/Model.h
new file mode 100644
index 000000000..950d28850
--- /dev/null
+++ b/runtime/onert/core/include/ir/Model.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_MODEL_H__
+#define __ONERT_IR_MODEL_H__
+
+#include <memory>
+#include <unordered_map>
+
+#include "ir/IGraph.h"
+#include "ir/Index.h"
+#include "util/ObjectManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace custom
+{
+class IKernelBuilder;
+} // namespace custom
+} // namespace backend
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+
+class Model
+{
+public:
+  Model() = default;
+  Model(const Model &obj) = default;
+  Model(Model &&) = default;
+  Model &operator=(const Model &) = default;
+  Model &operator=(Model &&) = default;
+  ~Model() = default;
+
+  /**
+   * @brief Put subgraph in the container with a new Index for that
+   *
+   * @param[in] subg Subgraph to be pushed
+   * @param[in] index Index of subgraph to be pushed
+   * @return Created
+   */
+  void push(SubgraphIndex index, const std::shared_ptr<IGraph> &subg) { _subgraphs[index] = subg; }
+
+  /**
+   * @brief Remove the subgraph that is associated with the given index
+   *
+   * @param[in] index Index of the subgraph to be removed
+   * @return N/A
+   */
+  void remove(const SubgraphIndex &index) { _subgraphs.erase(index); }
+
+  /**
+   * @brief Get the subgraph that is associated with the given index
+   *
+   * @param[in] index Index of the subgraph to be returned
+   * @return IGraph
+   */
+  const std::shared_ptr<IGraph> &at(const SubgraphIndex &index) const
+  {
+    return _subgraphs.at(index);
+  }
+  /**
+   * @brief Get the subgraph that is associated with the given index
+   *
+   * @param[in] index Index of the subgraph to be returned
+   * @return IGraph
+   */
+  std::shared_ptr<IGraph> &at(const SubgraphIndex &index) { return _subgraphs.at(index); }
+
+  /**
+   * @brief Get the subgraph that is associated with the given index
+   *
+   * @param[in] index Index of the subgraph to be returned
+   * @return true if such entry exists otherwise false
+   */
+  bool exist(const SubgraphIndex &index) const
+  {
+    auto it = _subgraphs.find(index);
+    return it != _subgraphs.end();
+  }
+
+  /**
+   * @brief Iterate over the container with given function
+   *
+   * @param[in] fn Function to be run for every container entry
+   * @return N/A
+   */
+  void iterate(const std::function<void(const SubgraphIndex &, const IGraph &)> &fn) const
+  {
+    for (const auto &e : _subgraphs)
+    {
+      fn(e.first, *e.second);
+    }
+  }
+
+  /**
+   * @brief Iterate over the container with given function
+   *
+   * @param[in] fn Function to be run for every container entry
+   * @return N/A
+   */
+  void iterate(const std::function<void(const SubgraphIndex &, IGraph &)> &fn)
+  {
+    for (const auto &e : _subgraphs)
+    {
+      fn(e.first, *e.second);
+    }
+  }
+
+  /**
+   * @brief Get count of Subgraphs
+   *
+   * @return count of Subgraphs
+   */
+  size_t subgraphs_count() const { return _subgraphs.size(); }
+
+  /**
+   * @brief Return the primary subgraph
+   *
+   * @return std::shared_ptr<IGraph> Primary subgraph
+   */
+  std::shared_ptr<IGraph> primary_subgraph() const { return _subgraphs.at(SubgraphIndex{0}); }
+
+  /**
+   * @brief Return whether the model has only typename Graph
+   *
+   * @tparam Graph Type that inherits from IGraph
+   *
+   * @return true if the model has only typename Graph, otherwise false
+   */
+  template <typename Graph, std::enable_if_t<std::is_base_of<IGraph, Graph>::value, bool> = true>
+  bool hasOnly()
+  {
+    for (const auto &e : _subgraphs)
+    {
+      if (std::dynamic_pointer_cast<Graph>(e.second) == nullptr)
+        return false;
+    }
+    return true;
+  }
+
+private:
+  std::unordered_map<SubgraphIndex, std::shared_ptr<IGraph>> _subgraphs;
+
+  // Custom operations support
+public:
+  void
+  bindKernelBuilder(const std::shared_ptr<onert::backend::custom::IKernelBuilder> &kernel_builder)
+  {
+    _kernel_builder = kernel_builder;
+  }
+
+  const std::shared_ptr<backend::custom::IKernelBuilder> &getKernelBuilder() const
+  {
+    return _kernel_builder;
+  }
+
+private:
+  std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_MODEL_H__
diff --git a/runtime/onert/core/include/ir/NNPkg.h b/runtime/onert/core/include/ir/NNPkg.h
new file mode 100644
index 000000000..5df58bde7
--- /dev/null
+++ b/runtime/onert/core/include/ir/NNPkg.h
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_NNPKG_H__
+#define __ONERT_IR_NNPKG_H__
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "ir/Index.h"
+#include "ir/Model.h"
+
+namespace onert
+{
+namespace ir
+{
+
+using IODesc = std::tuple<ModelIndex, SubgraphIndex, IOIndex>;
+
+struct ModelEdge
+{
+  IODesc from;
+  IODesc to;
+};
+
+struct ModelEdgeEqual
+{
+  bool operator()(const onert::ir::ModelEdge &lhs, const onert::ir::ModelEdge &rhs) const
+  {
+    return lhs.from == rhs.from && lhs.to == rhs.to;
+  }
+};
+
+struct ModelEdgeHash
+{
+  size_t operator()(const ::onert::ir::ModelEdge &edge) const noexcept
+  {
+    unsigned long long h1 = (std::get<0>(edge.from).value() << 24) |
+                            (std::get<1>(edge.from).value() << 16) | std::get<2>(edge.from).value();
+    unsigned long long h2 = (std::get<0>(edge.to).value() << 24) |
+                            (std::get<1>(edge.to).value() << 16) | std::get<2>(edge.to).value();
+    return h1 + h2;
+  }
+};
+
+inline std::ostream &operator<<(std::ostream &o, const IODesc &od)
+{
+  o << std::get<0>(od).value() << ":" << std::get<1>(od).value() << ":" << std::get<2>(od).value();
+  return o;
+}
+
+using ModelEdgeSet = std::unordered_set<ir::ModelEdge, ir::ModelEdgeHash, ir::ModelEdgeEqual>;
+
+/**
+ * @brief Struct to gather model I/O information in multimodel NN package
+ *        Model I/O will have role one of below
+ *        - Package input/output
+ *        - Edge's start/finish point between model
+ */
+struct ModelEdges
+{
+  std::vector<ir::IODesc> pkg_inputs;
+  std::vector<ir::IODesc> pkg_outputs;
+  ModelEdgeSet edges;
+};
+
+class NNPkg
+{
+public:
+  NNPkg() = default;
+  NNPkg(const NNPkg &obj) = default;
+  NNPkg(NNPkg &&) = default;
+  NNPkg &operator=(const NNPkg &) = default;
+  NNPkg &operator=(NNPkg &&) = default;
+  ~NNPkg() = default;
+
+  NNPkg(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; }
+  std::shared_ptr<Model> primary_model() const { return _models.at(onert::ir::ModelIndex{0}); }
+
+  /**
+   * @brief Put model at index
+   *
+   * @param[in] model Model to be pushed
+   * @param[in] index Index where Model is to be pushed
+   */
+  void push(ModelIndex index, const std::shared_ptr<Model> &model) { _models[index] = model; }
+
+  /**
+   * @brief Get the count of model
+   *
+   * @return the count of models
+   */
+  size_t model_count() const { return _models.size(); }
+
+  /**
+   * @brief Get model at index
+   *
+   * @param[in] index Index of the model to be returned
+   * @return Model at index
+   */
+  const std::shared_ptr<Model> &model(const ModelIndex &index) const { return _models.at(index); }
+  /**
+   * @brief Get model at index
+   *
+   * @param[in] index Index of the model to be returned
+   * @return Model at index
+   */
+  std::shared_ptr<Model> &model(const ModelIndex &index) { return _models.at(index); }
+
+  /**
+   * @brief Get pkg_input at index
+   *
+   * @param[in] index Index of pkg_input to be returned
+   * @return IODesc at index
+   */
+  const IODesc &input(uint32_t index) const { return _edges.pkg_inputs[index]; }
+  /**
+   * @brief Get pkg_input at index
+   *
+   * @param[in] index Index of pkg_input to be returned
+   * @return IODesc at index
+   */
+  IODesc &input(uint32_t index) { return _edges.pkg_inputs[index]; }
+  /**
+   * @brief Add input at the end
+   *
+   * @param[in] input Input IODesc to be pushed
+   */
+  void addInput(const IODesc &input) { _edges.pkg_inputs.push_back(input); }
+
+  /**
+   * @brief Get pkg_output at index
+   *
+   * @param[in] index Index of pkg_output to be returned
+   * @return IODesc at index
+   */
+  const IODesc &output(uint32_t index) const { return _edges.pkg_outputs[index]; }
+  /**
+   * @brief Get pkg_output at index
+   *
+   * @param[in] index Index of pkg_output to be returned
+   * @return IODesc at index
+   */
+  IODesc &output(uint32_t index) { return _edges.pkg_outputs[index]; }
+  /**
+   * @brief Add output at the end
+   *
+   * @param[in] output Output IODesc to be pushed
+   */
+  void addOutput(const IODesc &output) { _edges.pkg_outputs.push_back(output); }
+
+  /**
+   * @brief Add edge between models at the end
+   *
+   * @param[in] from from IODesc
+   * @param[in] to   to IODesc
+   */
+  void addEdge(const IODesc &from, const IODesc &to)
+  {
+    std::cout << from << " -> " << to << std::endl;
+    _edges.edges.insert(ModelEdge{from, to});
+  }
+  /**
+   * @brief   Get model edge set
+   * @return  Edge set reference
+   */
+  const ModelEdges &model_edges() { return _edges; }
+
+  /**
+   * @brief Verify NNPkg
+   *
+   */
+  void verify(void)
+  {
+    // Verify edges information
+    //
+    // Only duplicates of nnpkg output and Edge `from` are possible.
+    // | Whether duplicates are possible   | Edge `to` | Edge `from` |
+    // | nnpkg input  (input of subgraph)  | X (*1)    | X (*2)      |
+    // | nnpkg output (output of subgraph) | X (*2)    | O           |
+    // *1. The subjects who determine values of each buffer are different.
+    //    - nnpkg input : user input
+    //    - Edge `to`   : output of another subgraph
+    // *2. `IOIndex` of inputs and outputs of subgraph is distinct.
+    //
+    for (const auto &edge : _edges.edges)
+    {
+      if (std::find(_edges.pkg_inputs.begin(), _edges.pkg_inputs.end(), edge.to) !=
+          _edges.pkg_inputs.end())
+      {
+        throw std::runtime_error{
+          "Invalid edge information. NNPkg inputs and Edge `to` cannot be duplicated"};
+      }
+    }
+  }
+
+  // TODO Find better way to handle single model NNPackage and multi model NNPackage on inputSize(),
+  //      outputSize(), inputInfo(), outputInfo()
+
+  /**
+   * @brief   Get model input size
+   */
+  uint32_t inputSize() const
+  {
+    return _models.size() == 1 ? primary_model()->primary_subgraph()->getInputs().size()
+                               : _edges.pkg_inputs.size();
+  }
+
+  /**
+   * @brief   Get model output size
+   */
+  uint32_t outputSize() const
+  {
+    return _models.size() == 1 ? primary_model()->primary_subgraph()->getOutputs().size()
+                               : _edges.pkg_outputs.size();
+  }
+
+  /**
+   * @brief   Get model input info
+   */
+  const OperandInfo &inputInfo(uint32_t index) const
+  {
+    if (_models.size() == 1)
+    {
+      auto const graph = primary_model()->primary_subgraph();
+      auto const operand_index = graph->getInputs().at(index);
+      return graph->operands().at(operand_index).info();
+    }
+
+    auto const &desc = input(index);
+    auto const graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+    auto const operand_index = graph->getInputs().at(std::get<IOIndex>(desc).value());
+    return graph->operands().at(operand_index).info();
+  }
+
+  /**
+   * @brief   Get model output info
+   */
+  const OperandInfo &outputInfo(uint32_t index) const
+  {
+    if (_models.size() == 1)
+    {
+      auto const graph = primary_model()->primary_subgraph();
+      auto const operand_index = graph->getOutputs().at(index);
+      return graph->operands().at(operand_index).info();
+    }
+
+    auto const &desc = output(index);
+    auto const graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+    auto const operand_index = graph->getOutputs().at(std::get<IOIndex>(desc).value());
+    return graph->operands().at(operand_index).info();
+  }
+
+  void changeInputShape(uint32_t index, const ir::Shape &new_shape)
+  {
+    if (_models.size() == 1)
+    {
+      auto graph = primary_model()->primary_subgraph();
+      auto const operand_index = graph->getInputs().at(index);
+      graph->changeShape(operand_index, new_shape);
+      return;
+    }
+
+    auto const &desc = input(index);
+    auto graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+    auto const operand_index = graph->getInputs().at(std::get<IOIndex>(desc).value());
+    graph->changeShape(operand_index, new_shape);
+  }
+
+  /**
+   * @brief Replace model
+   *
+   * @param[in] model Model to be replaced
+   *
+   * TODO:  Support multiple models
+   */
+  void replaceModel(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; }
+
+  // TODO: Add iterate() or getter for edges
+
+private:
+  std::unordered_map<ModelIndex, std::shared_ptr<Model>> _models;
+  ModelEdges _edges;
+};
+
+} // namespace ir
+} // namespace onert
+
+namespace std
+{
+
+template <> struct hash<onert::ir::IODesc>
+{
+  size_t operator()(const ::onert::ir::IODesc &iodesc) const noexcept
+  {
+    return (std::get<0>(iodesc).value() << 24) | (std::get<1>(iodesc).value() << 16) |
+           std::get<2>(iodesc).value();
+  }
+};
+
+} // namespace std
+
+#endif // __ONERT_IR_NNPKG_H__
diff --git a/runtime/onert/core/include/ir/OpSequence.h b/runtime/onert/core/include/ir/OpSequence.h
deleted file mode 100644
index 754cf3b34..000000000
--- a/runtime/onert/core/include/ir/OpSequence.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OP_SEQUENCE_H__
-#define __ONERT_IR_OP_SEQUENCE_H__
-
-#include <vector>
-#include <string>
-#include <memory>
-
-#include "ir/Layout.h"
-#include "ir/Index.h"
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-
-class Operations;
-
-class OpSequence
-{
-public:
-  explicit OpSequence(Layout layout);
-  OpSequence(const OpSequence &) = delete;
-
-public:
-  void accept(OperationVisitor &v) const;
-
-public:
-  const OperandIndexSequence &getInputs() const { return _inputs; }
-  const OperandIndexSequence &getOutputs() const { return _outputs; }
-  void setInputs(const OperandIndexSequence &indexes) { _inputs = indexes; }
-  void setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
-  void replaceInputs(const OperandIndex &from, const OperandIndex &to)
-  {
-    _inputs.replace(from, to);
-  }
-  void replaceOutputs(const OperandIndex &from, const OperandIndex &to)
-  {
-    _outputs.replace(from, to);
-  }
-
-  void appendOperation(const OperationIndex &index) { _operations.emplace_back(index); }
-
-  std::vector<OperationIndex> &operations(void) { return _operations; }
-
-  const std::vector<OperationIndex> &operations(void) const { return _operations; }
-
-  uint32_t size(void) const { return _operations.size(); }
-
-public:
-  void remove(const OperationIndex &index);
-
-  bool exist(const OperationIndex &index) const;
-
-public:
-  Layout getLayout() const { return _layout; }
-
-public:
-  std::vector<OperationIndex>::const_iterator begin() const { return _operations.begin(); }
-  std::vector<OperationIndex>::const_iterator end() const { return _operations.end(); }
-
-public:
-  /**
-   * @brief Set @c true if any operation in this opSequence has dynamic input
-   *        or dynamic output;
-   *        @c false if all operations' inputs and outputs are static tensors
-   */
-  void has_dynamic_tensor(bool has_dynamic_tensor) { _has_dynamic_tensor = has_dynamic_tensor; }
-  bool has_dynamic_tensor() const { return _has_dynamic_tensor; }
-
-private:
-  OperandIndexSequence _inputs;
-  OperandIndexSequence _outputs;
-  std::vector<OperationIndex> _operations;
-
-private:
-  Layout _layout;
-  bool _has_dynamic_tensor;
-};
-
-std::string getStrFromOpSeq(const OpSequence &op_seq, const Operations &operations);
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OP_SEQUENCE_H__
diff --git a/runtime/onert/core/include/ir/OpSequences.h b/runtime/onert/core/include/ir/OpSequences.h
deleted file mode 100644
index ab258f395..000000000
--- a/runtime/onert/core/include/ir/OpSequences.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OP_SEQUENCES_H__
-#define __ONERT_IR_OP_SEQUENCES_H__
-
-#include "ir/Index.h"
-#include "ir/OpSequence.h"
-#include "util/ObjectManager.h"
-
-namespace onert
-{
-namespace ir
-{
-
-/**
- * @brief Class that manages OpSequence objects
- */
-class OpSequences : public util::ObjectManager<OpSequenceIndex, OpSequence>
-{
-public:
-  /**
-   * @brief Create an instance of OpSequence with given op and push it to objects
-   *
-   * @param[in] op_idx Operation index that is emplaced
-   * @param[in] layout OpSequence's layout
-   * @return OpSequenceIndex
-   */
-  OpSequenceIndex emplace(const OperationIndex &op_index, Layout layout);
-
-  /**
-   * @brief Push an instance of OpSequence to objects
-   *
-   * @param[in] op_seq An instance of OpSequence
-   * @return OpSequenceIndex
-   */
-  OpSequenceIndex emplace(std::unique_ptr<OpSequence> &&op_seq);
-  /**
-   * @brief Check if an operation does exist in any OpSequences
-   *
-   * @param operation_index Operation index to find
-   * @return true If such operation exists in any OpSequences otherwise false
-   */
-  bool containsOperation(const OperationIndex &operation_index) const;
-  /**
-   * @brief Find an operation from all OpSequences
-   *
-   * @param operation_index Operation index to find
-   * @return OpSequenceIndex Index of OpSequence that contains given operation index
-   */
-  OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
-  /**
-   * @brief Remove an operation from OpSequence
-   *
-   * @param operation_index Operation index to be removed
-   */
-  void removeFromOpSequence(const OperationIndex &operation_index);
-
-private:
-  void cacheSequenceIndex(const OpSequenceIndex &seq_index, const OperationIndex &op_index) const;
-  OpSequenceIndex *findSequenceIndex(const OperationIndex &operation_index) const;
-
-  OpSequenceIndex findOperation(const OperationIndex &operation_index) const;
-  mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes;
-};
-
-/**
- * @brief Dump OpSequences
- *
- * @param op_seqs Operation Sequences
- * @param operations Operation context
- */
-void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations);
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OP_SEQUENCES_H__
diff --git a/runtime/onert/core/include/ir/Operand.h b/runtime/onert/core/include/ir/Operand.h
index 1b3a43b02..e4a91579a 100644
--- a/runtime/onert/core/include/ir/Operand.h
+++ b/runtime/onert/core/include/ir/Operand.h
@@ -36,10 +36,11 @@ class Operand
 {
 public:
   explicit Operand(const Shape &shape, const TypeInfo &type)
-      : _info{shape, type, MemAllocType::STATIC}
+    : _info{shape, type, MemAllocType::STATIC}
   {
     // DO NOTHING
   }
+  explicit Operand(const Operand &) = default;
 
 public:
   const Shape &shape(void) const { return _info.shape(); }
@@ -54,6 +55,7 @@ public:
   void removeUse(const OperationIndex &idx);
   void setDef(const OperationIndex &idx);
   void unsetDef();
+  void clearDefUse();
 
 public:
   void type(const DataType type) { _info.type(type); };
diff --git a/runtime/onert/core/include/ir/OperandIndexSequence.h b/runtime/onert/core/include/ir/OperandIndexSequence.h
index aa01eccaa..66d00761b 100644
--- a/runtime/onert/core/include/ir/OperandIndexSequence.h
+++ b/runtime/onert/core/include/ir/OperandIndexSequence.h
@@ -76,12 +76,15 @@ public:
   }
 
 public:
+  bool operator==(const OperandIndexSequence &other) const;
   OperandIndexSequence operator+(const OperandIndexSequence &other) const;
-  friend std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &op_seq);
+  friend std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &operand_seq);
 
 public:
   std::vector<OperandIndex>::const_iterator begin(void) const { return _vec.begin(); }
   std::vector<OperandIndex>::const_iterator end(void) const { return _vec.end(); }
+  std::vector<OperandIndex>::iterator begin(void) { return _vec.begin(); }
+  std::vector<OperandIndex>::iterator end(void) { return _vec.end(); }
 
 private:
   std::vector<OperandIndex> _vec;
diff --git a/runtime/onert/core/include/ir/OperandInfo.h b/runtime/onert/core/include/ir/OperandInfo.h
index b8e123027..11aeb4920 100644
--- a/runtime/onert/core/include/ir/OperandInfo.h
+++ b/runtime/onert/core/include/ir/OperandInfo.h
@@ -66,8 +66,9 @@ public:
    * @param[in] alloc_type  When the thesor needs memory allocation
    */
   OperandInfo(const Shape &shape, const TypeInfo &typeInfo, MemAllocType alloc_type,
-              bool is_const = false)
-      : _shape(shape), _typeInfo(typeInfo), _alloc_type(alloc_type), _const(is_const)
+              bool is_const = false, bool is_variable = false)
+    : _shape(shape), _typeInfo(typeInfo), _alloc_type(alloc_type), _const(is_const),
+      _variable(is_variable)
   {
     // DO NOTHING
   }
@@ -117,12 +118,21 @@ public:
 
   MemAllocType memAllocType() const { return _alloc_type; }
   void setAsConstant() { _const = true; }
+  void setAsNonConst() { _const = false; }
   bool isConstant() const
   {
     // Impossible case: constant and dynamic operand
     assert(!(isDynamic() && _const));
     return _const;
   }
+  void setAsVariable()
+  {
+    // Impossible case: constant or dynamic operand
+    // The variable operand with buffer is not supported yet
+    assert(!(isDynamic() || _const));
+    _variable = true;
+  }
+  bool isVariable() const { return _variable; }
   bool isDynamic() const { return _alloc_type == MemAllocType::DYNAMIC; }
   void setDynamic() { _alloc_type = MemAllocType::DYNAMIC; }
 
@@ -132,6 +142,7 @@ private:
 
   MemAllocType _alloc_type;
   bool _const;
+  bool _variable;
 };
 
 } // namespace ir
diff --git a/runtime/onert/core/include/ir/Operation.h b/runtime/onert/core/include/ir/Operation.h
index 818bd913b..06ab29ecb 100644
--- a/runtime/onert/core/include/ir/Operation.h
+++ b/runtime/onert/core/include/ir/Operation.h
@@ -19,9 +19,8 @@
 
 #include <memory>
 
-#include "ir/OpCode.h"
+#include "ir/IOperation.h"
 #include "ir/Operand.h"
-#include "ir/OperandIndexSequence.h"
 #include "ir/OperandConstraint.h"
 
 namespace onert
@@ -29,14 +28,17 @@ namespace onert
 namespace ir
 {
 
-struct OperationVisitor;
-
-class Operation
+// NOTE Virtual inheritance is introduced because trainable operations inherit
+//      `ITrainableOperation` and `Operation` which inherit `IOperation`.
+class Operation : virtual public IOperation
 {
 public:
+  // TODO Remove default parameter
   Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
-            const OperandIndexSequence &outputs);
-  explicit Operation(OperandConstraint input_constr);
+            const OperandIndexSequence &outputs,
+            OperandConstraint output_constr = OperandConstraint::createAny());
+  explicit Operation(OperandConstraint input_constr,
+                     OperandConstraint output_constr = OperandConstraint::createAny());
 
   Operation(const Operation &) = default;
   Operation(Operation &&) = default;
@@ -46,22 +48,18 @@ public:
   virtual ~Operation();
 
 public:
-  virtual void accept(OperationVisitor &v) const = 0;
-  virtual std::string name() const { return std::string{toString(opcode())}; }
-  virtual OpCode opcode() const = 0;
-
-public:
-  void replaceInputs(const OperandIndex &from, const OperandIndex &to);
-  void replaceOutputs(const OperandIndex &from, const OperandIndex &to);
+  void replaceInputs(const OperandIndex &from, const OperandIndex &to) override;
+  void replaceOutputs(const OperandIndex &from, const OperandIndex &to) override;
   OperandIndexSequence &getInputs() { return _inputs; }
-  const OperandIndexSequence &getInputs() const { return _inputs; }
-  const OperandIndexSequence &getOutputs() const { return _outputs; }
+  const OperandIndexSequence &getInputs() const override { return _inputs; }
+  const OperandIndexSequence &getOutputs() const override { return _outputs; }
   // It's for only input/output tensors but const data.
   void setInputs(const OperandIndexSequence &indexes);
   void setOutputs(const OperandIndexSequence &indexes);
 
 private:
   OperandConstraint _input_constr;
+  OperandConstraint _output_constr;
   OperandIndexSequence _inputs;
   OperandIndexSequence _outputs;
 };
diff --git a/runtime/onert/core/include/ir/OperationVisitor.h b/runtime/onert/core/include/ir/OperationVisitor.h
index a27770744..4d08a5c71 100644
--- a/runtime/onert/core/include/ir/OperationVisitor.h
+++ b/runtime/onert/core/include/ir/OperationVisitor.h
@@ -18,7 +18,6 @@
 #define __ONERT_IR_OPERATION_VISITOR_H__
 
 #include "ir/Operations.Include.h"
-#include "ir/OpSequence.h"
 
 namespace onert
 {
@@ -33,15 +32,6 @@ struct OperationVisitor
   virtual void visit(const operation::InternalName &) {}
 #include "ir/Operations.lst"
 #undef OP
-
-  // This OpSequence node should be handled specially so that
-  // Op.lst doesn't have OpSequence
-  // TODO Remove by pushing it down to derived classes.
-  virtual void visit(const OpSequence &)
-  {
-    throw std::runtime_error{
-        "OperationVisitor: This does not privide visit function in OpSequence"};
-  }
 };
 
 } // namespace ir
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 17bbbc29c..6352b8ed9 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -16,69 +16,73 @@
 
 // This file has no ifdef guard intentionally
 
+#include "ir/operation/AddN.h"
+#include "ir/operation/ArgMinMax.h"
+#include "ir/operation/BatchMatMul.h"
 #include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BCQFullyConnected.h"
+#include "ir/operation/BCQGather.h"
 #include "ir/operation/BinaryArithmetic.h"
 #include "ir/operation/BroadcastTo.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Pool2D.h"
+#include "ir/operation/Bulk.h"
+#include "ir/operation/Comparison.h"
 #include "ir/operation/Concat.h"
-#include "ir/operation/Reshape.h"
-#include "ir/operation/Fill.h"
-#include "ir/operation/FullyConnected.h"
-#include "ir/operation/Softmax.h"
-#include "ir/operation/Transpose.h"
-#include "ir/operation/Permute.h"
-#include "ir/operation/Reduce.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/ConvertFp16ToFp32.h"
+#include "ir/operation/ConvertFp32ToFp16.h"
+#include "ir/operation/Custom.h"
+#include "ir/operation/DepthToSpace.h"
 #include "ir/operation/DepthwiseConv2D.h"
-#include "ir/operation/Slice.h"
-#include "ir/operation/StridedSlice.h"
-#include "ir/operation/Squeeze.h"
+#include "ir/operation/Einsum.h"
 #include "ir/operation/ElementwiseActivation.h"
 #include "ir/operation/ElementwiseBinary.h"
 #include "ir/operation/ElementwiseUnary.h"
+#include "ir/operation/EmbeddingLookup.h"
 #include "ir/operation/ExpandDims.h"
-#include "ir/operation/Comparison.h"
+#include "ir/operation/Fill.h"
+#include "ir/operation/FullyConnected.h"
+#include "ir/operation/FusedBatchNorm.h"
+#include "ir/operation/Gather.h"
+#include "ir/operation/HashtableLookup.h"
+#include "ir/operation/If.h"
+#include "ir/operation/InstanceNorm.h"
+#include "ir/operation/L2Normalization.h"
+#include "ir/operation/LocalResponseNormalization.h"
+#include "ir/operation/LogSoftmax.h"
+#include "ir/operation/Loss.h"
 #include "ir/operation/LSTM.h"
+#include "ir/operation/MatrixBandPart.h"
+#include "ir/operation/DetectionPostProcess.h"
+#include "ir/operation/OneHot.h"
+#include "ir/operation/Pack.h"
+#include "ir/operation/Pad.h"
+#include "ir/operation/Permute.h"
+#include "ir/operation/Pool2D.h"
+#include "ir/operation/Pow.h"
+#include "ir/operation/PReLU.h"
+#include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
+#include "ir/operation/Reduce.h"
+#include "ir/operation/Reshape.h"
 #include "ir/operation/ResizeBilinear.h"
 #include "ir/operation/ResizeNearestNeighbor.h"
 #include "ir/operation/Reverse.h"
 #include "ir/operation/RNN.h"
+#include "ir/operation/Select.h"
+#include "ir/operation/Shape.h"
+#include "ir/operation/Slice.h"
+#include "ir/operation/Softmax.h"
 #include "ir/operation/SpaceToBatchND.h"
 #include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/EmbeddingLookup.h"
-#include "ir/operation/L2Normalization.h"
-#include "ir/operation/HashtableLookup.h"
-#include "ir/operation/InstanceNorm.h"
-#include "ir/operation/PReLU.h"
-#include "ir/operation/TransposeConv.h"
-#include "ir/operation/SquaredDifference.h"
-#include "ir/operation/TopKV2.h"
-#include "ir/operation/Gather.h"
-#include "ir/operation/ArgMax.h"
-#include "ir/operation/LocalResponseNormalization.h"
-#include "ir/operation/DepthToSpace.h"
-#include "ir/operation/Pack.h"
-#include "ir/operation/Select.h"
 #include "ir/operation/Split.h"
 #include "ir/operation/SplitV.h"
+#include "ir/operation/SquaredDifference.h"
+#include "ir/operation/Squeeze.h"
+#include "ir/operation/StatelessRandomUniform.h"
+#include "ir/operation/StridedSlice.h"
+#include "ir/operation/Tile.h"
+#include "ir/operation/TopKV2.h"
+#include "ir/operation/Transpose.h"
+#include "ir/operation/TransposeConv.h"
 #include "ir/operation/Unpack.h"
-#include "ir/operation/Pad.h"
-#include "ir/operation/Custom.h"
-#include "ir/operation/Einsum.h"
-#include "ir/operation/OneHot.h"
-#include "ir/operation/Shape.h"
-#include "ir/operation/ConvertFp32ToFp16.h"
-#include "ir/operation/ConvertFp16ToFp32.h"
-#include "ir/operation/If.h"
 #include "ir/operation/While.h"
-#include "ir/operation/Pow.h"
-#include "ir/operation/Tile.h"
-#include "ir/operation/Range.h"
-#include "ir/operation/Rank.h"
-#include "ir/operation/BCQFullyConnected.h"
-#include "ir/operation/BCQGather.h"
-#include "ir/operation/MatrixBandPart.h"
-#include "ir/operation/BatchMatMul.h"
-#include "ir/operation/FusedBatchNorm.h"
-#include "ir/operation/LogSoftmax.h"
-#include "ir/operation/StatelessRandomUniform.h"
diff --git a/runtime/onert/core/include/ir/Operations.h b/runtime/onert/core/include/ir/Operations.h
index 0b5fbf529..4102fcebe 100644
--- a/runtime/onert/core/include/ir/Operations.h
+++ b/runtime/onert/core/include/ir/Operations.h
@@ -18,7 +18,7 @@
 #define __ONERT_IR_OPERATIONS_H__
 
 #include "ir/Index.h"
-#include "ir/Operation.h"
+#include "ir/IOperation.h"
 #include "util/ObjectManager.h"
 
 namespace onert
@@ -26,7 +26,7 @@ namespace onert
 namespace ir
 {
 
-class Operations : public util::ObjectManager<OperationIndex, Operation>
+class Operations : public util::ObjectManager<OperationIndex, IOperation>
 {
 public:
   Operations() = default;
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index ab2146821..1f91aecb2 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -19,69 +19,75 @@
 #endif
 
 // Internal Name
+OP(AddN)
+OP(ArgMinMax)
+OP(BatchMatMul)
 OP(BatchToSpaceND)
+OP(BCQFullyConnected)
+OP(BCQGather)
 OP(BinaryArithmetic)
 OP(BroadcastTo)
+OP(Bulk)
+OP(Comparison)
+OP(Concat)
 OP(Conv2D)
+OP(ConvertFp16ToFp32)
+OP(ConvertFp32ToFp16)
+OP(Custom)
+OP(DepthToSpace)
 OP(DepthwiseConv2D)
-OP(Pool2D)
-OP(Concat)
-OP(Fill)
-OP(FullyConnected)
-OP(Reduce)
-OP(Reshape)
-OP(Softmax)
-OP(Squeeze)
-OP(Slice)
-OP(StridedSlice)
-OP(Transpose)
+OP(Einsum)
 OP(ElementwiseActivation)
 OP(ElementwiseBinary)
 OP(ElementwiseUnary)
+OP(EmbeddingLookup)
 OP(ExpandDims)
-OP(Comparison)
+OP(Fill)
+OP(FullyConnected)
+OP(FusedBatchNorm)
+OP(Gather)
+OP(HashtableLookup)
+OP(If)
+OP(InstanceNorm)
+OP(L2Normalization)
+OP(LocalResponseNormalization)
+OP(LogSoftmax)
 OP(LSTM)
+OP(MatrixBandPart)
+OP(DetectionPostProcess)
+OP(OneHot)
+OP(Pack)
+OP(Pad)
+OP(Permute)
+OP(Pool2D)
+OP(Pow)
+OP(PReLU)
+OP(Range)
+OP(Rank)
+OP(Reduce)
+OP(Reshape)
 OP(ResizeBilinear)
 OP(ResizeNearestNeighbor)
 OP(Reverse)
 OP(RNN)
+OP(Select)
+OP(Shape)
+OP(Slice)
+OP(Softmax)
 OP(SpaceToBatchND)
 OP(SpaceToDepth)
-OP(EmbeddingLookup)
-OP(L2Normalization)
-OP(HashtableLookup)
-OP(InstanceNorm)
-OP(PReLU)
-OP(TransposeConv)
-OP(SquaredDifference)
-OP(TopKV2)
-OP(Gather)
-OP(ArgMax)
-OP(Einsum)
-OP(LocalResponseNormalization)
-OP(DepthToSpace)
-OP(Pack)
-OP(Select)
 OP(Split)
 OP(SplitV)
+OP(SquaredDifference)
+OP(Squeeze)
+OP(StatelessRandomUniform)
+OP(StridedSlice)
+OP(Tile)
+OP(TopKV2)
+OP(Transpose)
+OP(TransposeConv)
 OP(Unpack)
-OP(Pad)
-OP(Custom)
-OP(Permute)
-OP(OneHot)
-OP(Shape)
-OP(ConvertFp32ToFp16)
-OP(ConvertFp16ToFp32)
-OP(If)
 OP(While)
-OP(Pow)
-OP(Tile)
-OP(Range)
-OP(Rank)
-OP(BCQFullyConnected)
-OP(BCQGather)
-OP(MatrixBandPart)
-OP(BatchMatMul)
-OP(FusedBatchNorm)
-OP(LogSoftmax)
-OP(StatelessRandomUniform)
+
+// Training Only
+OP(Loss)
diff --git a/runtime/onert/core/include/ir/Shape.h b/runtime/onert/core/include/ir/Shape.h
index a0b4bb196..cf84e2626 100644
--- a/runtime/onert/core/include/ir/Shape.h
+++ b/runtime/onert/core/include/ir/Shape.h
@@ -12,7 +12,7 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
-*/
+ */
 
 #ifndef __ONERT_IR_SHAPE_H__
 #define __ONERT_IR_SHAPE_H__
@@ -61,7 +61,7 @@ struct FeatureShape
    * @param[in]  width  The width value
    */
   FeatureShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
-      : N{batch}, C{depth}, H{height}, W{width}
+    : N{batch}, C{depth}, H{height}, W{width}
   {
     // DO NOTHING
   }
@@ -70,8 +70,8 @@ struct FeatureShape
 struct Shape
 {
 public:
-  static int32_t const UNSPECIFIED_DIM;
-  static int32_t const MAX_RANK;
+  static int32_t const kUnspecifiedDim;
+  static int32_t const kMaxRank;
 
   Shape() = default;
 
@@ -89,6 +89,7 @@ public:
     return rank() == 0 ? 1 : _dimensions.at(i);
   }
 
+  // TODO Fix different behavior with const version
   int32_t &dim(int i) { return _dimensions.at(i); }
 
   /**
@@ -125,7 +126,7 @@ public:
    */
   bool hasUnspecifiedDims() const
   {
-    return (std::find(_dimensions.begin(), _dimensions.end(), UNSPECIFIED_DIM) !=
+    return (std::find(_dimensions.begin(), _dimensions.end(), kUnspecifiedDim) !=
             _dimensions.end());
   }
 
@@ -139,10 +140,10 @@ inline bool operator!=(const Shape &lhs, const Shape &rhs) { return lhs.dims() !
 Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout);
 
 /**
-* @brief Find out if tha rank in this shape is "maybe" unspecified.
-*        Note that when rank == 0, shape could represent scalar or unspecified rank
-* \see https://developer.android.com/ndk/reference/struct/a-neural-networks-operand-type
-*/
+ * @brief Find out if tha rank in this shape is "maybe" unspecified.
+ *        Note that when rank == 0, shape could represent scalar or unspecified rank
+ * \see https://developer.android.com/ndk/reference/struct/a-neural-networks-operand-type
+ */
 inline bool rankMaybeUnspecified(const ir::Shape &shape) { return (shape.rank() == 0); }
 
 } // namespace ir
diff --git a/runtime/onert/core/include/ir/Sparsity.h b/runtime/onert/core/include/ir/Sparsity.h
new file mode 100644
index 000000000..690304ad2
--- /dev/null
+++ b/runtime/onert/core/include/ir/Sparsity.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_SPARSITY_H__
+#define __ONERT_IR_SPARSITY_H__
+
+#include <cassert>
+#include <cstdint>
+#include <vector>
+
+namespace onert
+{
+namespace ir
+{
+
+/**
+ * @brief  Structure for Sparse Tensor
+ */
+struct Sparsity
+{
+public:
+  Sparsity() = default;
+  Sparsity(std::vector<uint16_t> &&w1_segments, std::vector<uint16_t> &&w1_indices,
+           std::vector<int32_t> &&block_size)
+    : _w1_segments(w1_segments), _w1_indices(w1_indices), _block_size(block_size)
+  {
+  }
+
+  /**
+   * @brief Returns segments array. See compressed sparse row format.
+   */
+  const uint16_t *w1_segments() const { return _w1_segments.data(); }
+  /**
+   * @brief Returns indices array. See compressed sparse row format.
+   */
+  const uint16_t *w1_indices() const { return _w1_indices.data(); }
+  /**
+   * @brief Returns block size which is used for block sparsity
+   */
+  const std::vector<int32_t> &block_size() const { return _block_size; }
+
+private:
+  std::vector<uint16_t> _w1_segments;
+  std::vector<uint16_t> _w1_indices;
+  std::vector<int32_t> _block_size;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_SPARSITY_H__
diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Subgraphs.h
deleted file mode 100644
index 7b4c33b76..000000000
--- a/runtime/onert/core/include/ir/Subgraphs.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_SUBGRAPHS_H__
-#define __ONERT_IR_SUBGRAPHS_H__
-
-#include <memory>
-#include <unordered_map>
-
-#include "ir/Index.h"
-#include "util/ObjectManager.h"
-
-namespace onert
-{
-namespace ir
-{
-
-class Graph;
-
-class Subgraphs
-{
-public:
-  Subgraphs() = default;
-  Subgraphs(const Subgraphs &obj) = default;
-  Subgraphs(Subgraphs &&) = default;
-  Subgraphs &operator=(const Subgraphs &) = default;
-  Subgraphs &operator=(Subgraphs &&) = default;
-  ~Subgraphs() = default;
-
-  /**
-   * @brief Put subgraph in the container with a new Index for that
-   *
-   * @param[in] subg Subgraph to be pushed
-   * @param[in] index Index of subgraph to be pushed
-   * @return Created
-   */
-  void push(SubgraphIndex index, const std::shared_ptr<Graph> &subg) { _subgraphs[index] = subg; }
-
-  /**
-   * @brief Remove the subgraph that is associated with the given index
-   *
-   * @param[in] index Index of the subgraph to be removed
-   * @return N/A
-   */
-  void remove(const SubgraphIndex &index) { _subgraphs.erase(index); }
-
-  /**
-   * @brief Get the subgraph that is associated with the given index
-   *
-   * @param[in] index Index of the subgraph to be returned
-   * @return Graph
-   */
-  const std::shared_ptr<Graph> &at(const SubgraphIndex &index) const
-  {
-    return _subgraphs.at(index);
-  }
-  /**
-   * @brief Get the subgraph that is associated with the given index
-   *
-   * @param[in] index Index of the subgraph to be returned
-   * @return Graph
-   */
-  std::shared_ptr<Graph> &at(const SubgraphIndex &index) { return _subgraphs.at(index); }
-
-  /**
-   * @brief Get the subgraph that is associated with the given index
-   *
-   * @param[in] index Index of the subgraph to be returned
-   * @return true if such entry exists otherwise false
-   */
-  bool exist(const SubgraphIndex &index) const
-  {
-    auto it = _subgraphs.find(index);
-    return it != _subgraphs.end();
-  }
-
-  /**
-   * @brief Iterate over the container with given function
-   *
-   * @param[in] fn Function to be run for every container entry
-   * @return N/A
-   */
-  void iterate(const std::function<void(const SubgraphIndex &, const Graph &)> &fn) const
-  {
-    for (const auto &e : _subgraphs)
-    {
-      fn(e.first, *e.second);
-    }
-  }
-
-  /**
-   * @brief Iterate over the container with given function
-   *
-   * @param[in] fn Function to be run for every container entry
-   * @return N/A
-   */
-  void iterate(const std::function<void(const SubgraphIndex &, Graph &)> &fn)
-  {
-    for (const auto &e : _subgraphs)
-    {
-      fn(e.first, *e.second);
-    }
-  }
-
-  /**
-   * @brief Get count of Subgraphs
-   *
-   * @return count of Subgraphs
-   */
-  size_t count() { return _subgraphs.size(); }
-
-  /**
-   * @brief Return the primary subgraph
-   *
-   * @return std::shared_ptr<Graph> Primary sugraph
-   */
-  std::shared_ptr<Graph> primary() const { return _subgraphs.at(SubgraphIndex{0}); }
-
-private:
-  std::unordered_map<SubgraphIndex, std::shared_ptr<Graph>> _subgraphs;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_SUBGRAPHS_H__
diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h
index 3f7eab4c0..3c5062795 100644
--- a/runtime/onert/core/include/ir/TypeInfo.h
+++ b/runtime/onert/core/include/ir/TypeInfo.h
@@ -17,52 +17,69 @@
 #ifndef __ONERT_IR_TYPEINFO_H__
 #define __ONERT_IR_TYPEINFO_H__
 
+#include <cassert>
 #include <cstdint>
+#include <memory>
 #include <vector>
 
 #include "ir/DataType.h"
+#include "ir/Sparsity.h"
 
 namespace onert
 {
 namespace ir
 {
 
+struct Quantization
+{
+  std::vector<float> scales;
+  std::vector<int32_t> zero_points;
+};
+
 class TypeInfo
 {
 public:
   TypeInfo() = delete;
 
-  explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0)
-      : _type(type), _scale(scale), _offset(offset), _sparse(false)
+  explicit TypeInfo(DataType type) : _type{type}, _sparsity{nullptr} {}
+
+  TypeInfo(DataType type, float scale, int32_t zero_point) : _type{type}, _sparsity{nullptr}
   {
+    quantization(scale, zero_point);
   }
 
 public:
   DataType type() const { return _type; }
-  float scale() const { return _scale; }
-  int32_t offset() const { return _offset; }
-  bool sparse() const { return _sparse; }
-  const uint16_t *w1_segments() const { return _w1_segments.data(); }
-  const uint16_t *w1_indices() const { return _w1_indices.data(); }
+  float scale() const { return _quant.scales[0]; }
+  const std::vector<float> &scales() const { return _quant.scales; }
+  int32_t zero_point() const
+  {
+    assert(_quant.zero_points.size() == 1);
+    return _quant.zero_points[0];
+  }
+  const std::vector<int32_t> &zero_points() const { return _quant.zero_points; }
+  const ir::Sparsity *sparsity() const { return _sparsity.get(); }
+  void quantization(float scale, int32_t zero_point)
+  {
+    _quant.scales.resize(1);
+    _quant.scales[0] = scale;
+    _quant.zero_points.resize(1);
+    _quant.zero_points[0] = zero_point;
+  }
+  void quantization(std::vector<float> &&scales, std::vector<int32_t> &&zero_points)
+  {
+    _quant.scales = scales;
+    _quant.zero_points = zero_points;
+  }
+  void sparsity(std::shared_ptr<ir::Sparsity> sparsity) { _sparsity = sparsity; }
 
 public:
   void type(const DataType type) { _type = type; }
-  void sparse2DMetadata(std::vector<uint16_t> &&w1_segments, std::vector<uint16_t> &&w1_indices)
-  {
-    _sparse = true;
-    _w1_segments = w1_segments;
-    _w1_indices = w1_indices;
-  }
 
 private:
   DataType _type;
-  // for quantization
-  float _scale;
-  int32_t _offset;
-  // for sparsity
-  bool _sparse;
-  std::vector<uint16_t> _w1_segments;
-  std::vector<uint16_t> _w1_indices;
+  ir::Quantization _quant;
+  std::shared_ptr<ir::Sparsity> _sparsity;
 };
 
 bool operator==(const TypeInfo &lhs, const TypeInfo &rhs);
diff --git a/runtime/onert/core/include/ir/operand/LowerInfo.h b/runtime/onert/core/include/ir/operand/LowerInfo.h
deleted file mode 100644
index b7f032b02..000000000
--- a/runtime/onert/core/include/ir/operand/LowerInfo.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERAND_LOWER_INFO_H__
-#define __ONERT_IR_OPERAND_LOWER_INFO_H__
-
-#include <functional>
-#include <stdint.h>
-
-#include "ir/operand/PermuteFactor.h"
-#include "util/Set.h"
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operand
-{
-using PermuteFactorSet = util::Set<PermuteFactor>;
-
-class LowerInfo
-{
-public:
-  LowerInfo()
-  {
-    // DO NOTHING
-  }
-
-public:
-  const PermuteFactorSet &def_factors(void) const { return _def_factors; }
-  const PermuteFactorSet &use_factors(void) const { return _use_factors; }
-
-public:
-  void addDefPermuteFactor(const PermuteFactor &factor) { _def_factors.add(factor); }
-  void addUsePermuteFactor(const PermuteFactor &factor) { _use_factors.add(factor); }
-  void removeDefPermuteFactor(const PermuteFactor &factor) { _def_factors.remove(factor); }
-  void removeUsePermuteFactor(const PermuteFactor &factor) { _use_factors.remove(factor); }
-
-private:
-  PermuteFactorSet _def_factors;
-  PermuteFactorSet _use_factors;
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERAND_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/ir/operand/PermuteFactor.h b/runtime/onert/core/include/ir/operand/PermuteFactor.h
deleted file mode 100644
index d0bfed337..000000000
--- a/runtime/onert/core/include/ir/operand/PermuteFactor.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     PermuteFactor.h
- * @brief    This file contains onert::ir::operand::PermuteFactor class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
-#define __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
-
-#include <functional>
-
-#include "ir/Layout.h"
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operand
-{
-
-/**
- * @brief Class that has factors of permutation
- */
-class PermuteFactor
-{
-public:
-  /**
-   * @brief Construct PermuteFactor object.
-   * @param backend  The backend factor
-   * @param layout   The layout factor
-   */
-  PermuteFactor(const backend::Backend *backend, Layout layout) : _backend{backend}, _layout{layout}
-  {
-    // DO NOTHING
-  }
-  /**
-   * @brief Construct PermuteFactor object by copy semantics.
-   */
-  PermuteFactor(const PermuteFactor &f) : _backend{f._backend}, _layout{f._layout}
-  {
-    // DO NOTHING
-  }
-  /**
-   * @brief Construct PermuteFactor object by move semantics.
-   */
-  PermuteFactor(PermuteFactor &&) = default;
-
-public:
-  /**
-   * @brief Get backend
-   *
-   * @return Backend factor
-   */
-  const backend::Backend *backend() const { return _backend; }
-  /**
-   * @brief Get layout
-   *
-   * @return Layout factor
-   */
-  Layout layout() const { return _layout; }
-
-public:
-  /**
-   * @brief operator overloading function for `==`
-   *
-   * @return Whether two PermuteFactor are the same
-   */
-  bool operator==(const PermuteFactor &other) const
-  {
-    return _backend == other.backend() && _layout == other.layout();
-  }
-  /**
-   * @brief operator overloading function for `!=`
-   *
-   * @return Whether two PermuteFactor are differenct
-   */
-  bool operator!=(const PermuteFactor &other) const { return !(*this == other); }
-
-private:
-  const backend::Backend *_backend{nullptr};
-  Layout _layout{Layout::UNKNOWN};
-};
-
-} // namespace operand
-} // namespace ir
-} // namespace onert
-
-namespace std
-{
-
-/**
- * @brief Structure that provides hash value of PermuteFactor
- */
-template <> struct hash<onert::ir::operand::PermuteFactor>
-{
-  size_t operator()(const onert::ir::operand::PermuteFactor &factor) const noexcept
-  {
-    hash<const onert::backend::Backend *> b_hash{};
-    hash<onert::ir::Layout> l_hash{};
-    return b_hash(factor.backend()) ^ (l_hash(factor.layout()) << 1);
-  }
-};
-
-} // namespace std
-
-#endif // __ONERT_IR_OPERAND_PERMUTE_FACTOR_H__
diff --git a/runtime/onert/core/include/ir/operation/AddN.h b/runtime/onert/core/include/ir/operation/AddN.h
new file mode 100644
index 000000000..7a307efa5
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/AddN.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_ADDN_H__
+#define __ONERT_IR_OPERATION_ADDN_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class AddN : public Operation
+{
+public:
+  AddN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+  void accept(OperationVisitor &v) const override;
+  OpCode opcode() const final { return OpCode::AddN; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_ADDN_H__
diff --git a/runtime/onert/core/include/ir/operation/ArgMax.h b/runtime/onert/core/include/ir/operation/ArgMax.h
deleted file mode 100644
index 8400e1f1e..000000000
--- a/runtime/onert/core/include/ir/operation/ArgMax.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ARG_MAX_H__
-#define __ONERT_IR_OPERATION_ARG_MAX_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ArgMax : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT
-  };
-
-  struct Param
-  {
-    int axis;
-    DataType output_type;
-  };
-
-public:
-  ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ArgMax; }
-
-public:
-  const Param &param() const { return _param; }
-
-private:
-  Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ARG_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/ArgMinMax.h b/runtime/onert/core/include/ir/operation/ArgMinMax.h
new file mode 100644
index 000000000..1c9fccd22
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/ArgMinMax.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
+#define __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class ArgMinMax : public Operation
+{
+public:
+  enum Input
+  {
+    INPUT = 0,
+    AXIS = 1
+  };
+
+  struct Param
+  {
+    DataType output_type;
+    bool is_arg_max = true;
+  };
+
+public:
+  ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+            const Param &param);
+
+public:
+  void accept(OperationVisitor &v) const override;
+  OpCode opcode() const final { return OpCode::ArgMinMax; }
+
+public:
+  const Param &param() const { return _param; }
+
+private:
+  Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/BinaryArithmetic.h b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
index 110fff565..3dca80bbc 100644
--- a/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
+++ b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
@@ -27,7 +27,7 @@ namespace ir
 namespace operation
 {
 
-class BinaryArithmetic final : public Operation
+class BinaryArithmetic : public Operation
 {
 public:
   enum Input
diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h
new file mode 100644
index 000000000..3c20f392f
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Bulk.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_BULK_H__
+#define __ONERT_IR_OPERATION_BULK_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Bulk : public Operation
+{
+public:
+  struct Param
+  {
+    std::string binary_path;
+    std::vector<ir::Shape> origin_input_shapes;
+    std::vector<ir::Shape> origin_output_shapes;
+  };
+
+public:
+  Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+  void accept(OperationVisitor &v) const override;
+  OpCode opcode() const final { return OpCode::Bulk; }
+  const Param &param() const { return _param; }
+
+private:
+  Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_BULK_H__
diff --git a/runtime/onert/core/include/ir/operation/DepthwiseConv2D.h b/runtime/onert/core/include/ir/operation/DepthwiseConv2D.h
index b10bf708c..38e2b5cd6 100644
--- a/runtime/onert/core/include/ir/operation/DepthwiseConv2D.h
+++ b/runtime/onert/core/include/ir/operation/DepthwiseConv2D.h
@@ -46,6 +46,7 @@ public:
     Padding padding;
     uint32_t multiplier;
     Activation activation;
+    Dilation dilation;
   };
 
 public:
diff --git a/runtime/onert/core/include/ir/operation/DetectionPostProcess.h b/runtime/onert/core/include/ir/operation/DetectionPostProcess.h
new file mode 100644
index 000000000..becb0e21a
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/DetectionPostProcess.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
+#define __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class DetectionPostProcess : public Operation
+{
+public:
+  enum Input
+  {
+    BOXES = 0,
+    SCORES = 1,
+    INPUT_ANCHORS = 2
+  };
+
+  enum Output
+  {
+    BOX_COORDS = 0,
+    BOX_CLASSES = 1,
+    BOX_SCORES = 2,
+    NUM_SELECTED = 3
+  };
+
+  struct Scale
+  {
+    float y_scale;
+    float x_scale;
+    float h_scale;
+    float w_scale;
+  };
+
+  struct Param
+  {
+    int max_detections;
+    float score_threshold;
+    float iou_threshold; // intersection-over-union
+    int max_boxes_per_class;
+    int32_t num_classes;
+    int32_t max_classes_per_detection;
+    // N*N complexity instead of N*N*M, where N - number of boxes and M number of classes
+    bool center_size_boxes;
+    bool do_fast_eval = true;
+    Scale scale;
+  };
+
+public:
+  DetectionPostProcess(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                       const Param &param);
+
+public:
+  void accept(OperationVisitor &v) const override;
+
+  std::string getName() const { return "DetectionPostProcess"; }
+
+public:
+  const Param &param() const { return _param; }
+  OpCode opcode() const final { return OpCode::DetectionPostProcess; }
+
+private:
+  Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
diff --git a/runtime/onert/core/include/ir/operation/ElementwiseBinary.h b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
index dd07f6058..e265e81ec 100644
--- a/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
@@ -37,6 +37,7 @@ public:
 
   enum class ElementwiseBinaryType
   {
+    FLOOR_DIV,
     LOGICAL_AND,
     LOGICAL_OR,
     MAX,
diff --git a/runtime/onert/core/include/ir/operation/ElementwiseUnary.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
index c40778a56..7d6cb544a 100644
--- a/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
-#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
 
 #include "ir/Operation.h"
 
@@ -51,7 +51,7 @@ public:
     RSQRT,
     SIN,
     SQRT,
-    SQURE,
+    SQUARE,
     ZEROS_LIKE
   };
 
@@ -80,4 +80,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
diff --git a/runtime/onert/core/include/ir/operation/Fill.h b/runtime/onert/core/include/ir/operation/Fill.h
index 524e41385..b55c77ae5 100644
--- a/runtime/onert/core/include/ir/operation/Fill.h
+++ b/runtime/onert/core/include/ir/operation/Fill.h
@@ -31,7 +31,7 @@ class Fill : public Operation
 public:
   enum Input
   {
-    INPUT = 0,
+    SHAPE = 0,
     VALUE,
   };
 
diff --git a/runtime/onert/core/include/ir/operation/FullyConnected.h b/runtime/onert/core/include/ir/operation/FullyConnected.h
index b6484ae4d..f83a64557 100644
--- a/runtime/onert/core/include/ir/operation/FullyConnected.h
+++ b/runtime/onert/core/include/ir/operation/FullyConnected.h
@@ -42,6 +42,7 @@ public:
   struct Param
   {
     Activation activation;
+    FullyConnectedWeightsFormat weights_format;
   };
 
 public:
diff --git a/runtime/onert/core/include/ir/operation/LSTM.h b/runtime/onert/core/include/ir/operation/LSTM.h
index 1e6c00bf3..027bc6b42 100644
--- a/runtime/onert/core/include/ir/operation/LSTM.h
+++ b/runtime/onert/core/include/ir/operation/LSTM.h
@@ -26,6 +26,7 @@ namespace ir
 namespace operation
 {
 
+// This operation supports only unidirectional sequence lstm
 class LSTM : public Operation
 {
 public:
@@ -51,6 +52,10 @@ public:
     PROJECTION_BIAS = 17,
     OUTPUT_STATE_IN = 18,
     CELL_STATE_IN = 19,
+    INPUT_LAYER_NORMALIZATION_WEIGHTS = 20,
+    FORGET_LAYER_NORMALIZATION_WEIGHTS = 21,
+    CELL_LAYER_NORMALIZATION_WEIGHTS = 22,
+    OUTPUT_LAYER_NORMALIZATION_WEIGHTS = 23,
   };
 
   enum Output
@@ -66,6 +71,7 @@ public:
     Activation activation;
     float cell_threshold;
     float projection_threshold;
+    bool time_major;
   };
 
 public:
@@ -73,6 +79,7 @@ public:
 
 public:
   void accept(OperationVisitor &v) const override;
+  std::string name() const override;
   OpCode opcode() const final { return OpCode::LSTM; }
 
 public:
diff --git a/runtime/onert/core/include/ir/operation/Loss.h b/runtime/onert/core/include/ir/operation/Loss.h
new file mode 100644
index 000000000..73f1aed59
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Loss.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_LOSS_H__
+#define __ONERT_IR_OPERATION_LOSS_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Loss : public Operation
+{
+public:
+  enum Input
+  {
+    Y_PRED = 0,
+    Y_TRUE = 1
+    // TODO Add more inputs if necessary
+  };
+
+  // NOTE It is not yet determined how to get the information of the previous activation when
+  //      generating kernels of Loss operation for each backend. If it is determined to get it
+  //      from the object of this class, we have to consider whether to change this enum class.
+  enum class Type
+  {
+    MEAN_SQUARED_ERROR,
+    CATEGORICAL_CROSSENTROPY
+  };
+
+  struct Param
+  {
+    Type op_type;
+    // TODO Add more params if necessary
+    Param() : op_type(Type::MEAN_SQUARED_ERROR) {}
+  };
+
+public:
+  Loss(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+  void accept(OperationVisitor &v) const override;
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::Loss; }
+
+public:
+  const Param &param() const { return _param; }
+
+private:
+  Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_LOSS_H__
diff --git a/runtime/onert/core/include/ir/operation/LowerInfo.h b/runtime/onert/core/include/ir/operation/LowerInfo.h
deleted file mode 100644
index 7ef53b8c7..000000000
--- a/runtime/onert/core/include/ir/operation/LowerInfo.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOWER_INFO_H__
-#define __ONERT_IR_OPERATION_LOWER_INFO_H__
-
-#include <string>
-
-#include <ir/operand/PermuteFactor.h>
-
-namespace onert
-{
-namespace backend
-{
-class Backend;
-} // namespace backend
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LowerInfo
-{
-public:
-  LowerInfo(const backend::Backend *backend, Layout layout);
-  const backend::Backend *backend() const { return _permute_factor.backend(); }
-  Layout layout() const { return _permute_factor.layout(); }
-
-private:
-  operand::PermuteFactor _permute_factor;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOWER_INFO_H__
diff --git a/runtime/onert/core/include/ir/operation/ResizeBilinear.h b/runtime/onert/core/include/ir/operation/ResizeBilinear.h
index 29aa496d7..ab330c826 100644
--- a/runtime/onert/core/include/ir/operation/ResizeBilinear.h
+++ b/runtime/onert/core/include/ir/operation/ResizeBilinear.h
@@ -34,10 +34,12 @@ public:
   enum Input
   {
     INPUT = 0,
+    SIZE = 1,
   };
 
   struct Param
   {
+    // If the input SIZE exists in inputs, height_out and width_out are not set. Ignore these params
     int32_t height_out;
     int32_t width_out;
     bool align_corners;
diff --git a/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
index e4d810eeb..10827803e 100644
--- a/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
+++ b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
@@ -34,10 +34,12 @@ public:
   enum Input
   {
     INPUT = 0,
+    SIZE = 1,
   };
 
   struct Param
   {
+    // If the input SIZE exists in inputs, Be height_out and width_out not set. Ignore these params
     int32_t height_out;
     int32_t width_out;
     bool align_corners;
diff --git a/runtime/onert/core/include/ir/operation/Split.h b/runtime/onert/core/include/ir/operation/Split.h
index 60e0fdf15..c415941a4 100644
--- a/runtime/onert/core/include/ir/operation/Split.h
+++ b/runtime/onert/core/include/ir/operation/Split.h
@@ -29,12 +29,12 @@ class Split : public Operation
 public:
   enum Input
   {
-    INPUT = 0
+    AXIS = 0,
+    INPUT = 1,
   };
 
   struct Param
   {
-    int axis;
     int num_splits;
   };
 
diff --git a/runtime/onert/core/include/ir/operation/Transpose.h b/runtime/onert/core/include/ir/operation/Transpose.h
index 9631f7aaa..665c9bbce 100644
--- a/runtime/onert/core/include/ir/operation/Transpose.h
+++ b/runtime/onert/core/include/ir/operation/Transpose.h
@@ -34,26 +34,15 @@ public:
   enum Input
   {
     INPUT = 0, // for an n-D tensor, specifying the tensor to be transposed.
-  };
-
-  struct Param
-  {
-    std::vector<int> perm;
+    PERMUTATION = 1,
   };
 
 public:
-  Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-            const Param &param);
+  Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
 
 public:
   void accept(OperationVisitor &v) const override;
   OpCode opcode() const final { return OpCode::Transpose; }
-
-public:
-  const Param &param() const { return _param; }
-
-private:
-  Param _param;
 };
 
 } // namespace operation
diff --git a/runtime/onert/core/include/ir/train/ITrainableOperation.h b/runtime/onert/core/include/ir/train/ITrainableOperation.h
new file mode 100644
index 000000000..590bed45d
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/ITrainableOperation.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_ITRAINABLE_OPERATION_H__
+#define __ONERT_IR_TRAIN_ITRAINABLE_OPERATION_H__
+
+#include "ir/IOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+
+struct TrainableOperationVisitor;
+
+// NOTE Virtual inheritance is introduced because trainable operations inherit
+//      `ITrainableOperation` and `Operation` which inherit `IOperation`.
+class ITrainableOperation : virtual public IOperation
+{
+public:
+  virtual ~ITrainableOperation() = default;
+
+public:
+  virtual std::unique_ptr<ITrainableOperation> clone() const = 0;
+  virtual void accept(OperationVisitor &v) const override = 0;
+  virtual void accept(TrainableOperationVisitor &v) const = 0;
+  // TODO Add virtual methods related to training
+};
+
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_ITRAINABLE_OPERATION_H__
diff --git a/runtime/onert/core/include/ir/train/Operations.Include.h b/runtime/onert/core/include/ir/train/Operations.Include.h
new file mode 100644
index 000000000..56e752f94
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/Operations.Include.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATIONS_OPERATION_INCLUDE_H__
+#define __ONERT_IR_TRAIN_OPERATIONS_OPERATION_INCLUDE_H__
+
+#include "ir/train/operation/Conv2D.h"
+#include "ir/train/operation/ElementwiseActivation.h"
+#include "ir/train/operation/FullyConnected.h"
+#include "ir/train/operation/Loss.h"
+#include "ir/train/operation/Permute.h"
+#include "ir/train/operation/Pool2D.h"
+#include "ir/train/operation/Reshape.h"
+#include "ir/train/operation/Softmax.h"
+
+#endif // __ONERT_IR_TRAIN_OPERATIONS_OPERATION_INCLUDE_H__
diff --git a/runtime/onert/core/include/ir/train/Operations.lst b/runtime/onert/core/include/ir/train/Operations.lst
new file mode 100644
index 000000000..14dc38819
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/Operations.lst
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OP
+#error Define OP before including this file
+#endif
+
+OP(Conv2D)
+OP(ElementwiseActivation)
+OP(FullyConnected)
+OP(Loss)
+OP(Permute)
+OP(Pool2D)
+OP(Reshape)
+OP(Softmax)
diff --git a/runtime/onert/core/include/ir/train/TrainableGraph.h b/runtime/onert/core/include/ir/train/TrainableGraph.h
new file mode 100644
index 000000000..90c49e212
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/TrainableGraph.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_TRAINABLE_GRAPH_H__
+#define __ONERT_IR_TRAIN_TRAINABLE_GRAPH_H__
+
+#include <functional>
+#include <unordered_map>
+
+#include "ir/Graph.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+
+class TrainableGraph : public IGraph
+{
+public:
+  /**
+   * @brief Construct a new Trainable Graph object
+   *
+   * @param graph
+   */
+  explicit TrainableGraph();
+  explicit TrainableGraph(const TrainableGraph &tgraph);
+  explicit TrainableGraph(const Graph &graph);
+  ~TrainableGraph() = default;
+
+  // TrainableGraph Building
+public:
+  OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
+  /**
+   * @brief Add an operand to the graph with the given index and object
+   *
+   * If the given index is available, it succeeds. And @c operand is moved which invalidates the
+   * caller's pointer. If the given index is already taken, it fails. And @c operand will not be
+   * moved so the caller's pointer will be still valid.
+   *
+   * @param[in] index Index to be added
+   * @param[in] operand Operand to be added
+   * @return OperandIndex @c index if successful, UNDEFINED otherwise
+   */
+  OperandIndex addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand);
+  /**
+   * @brief Add a new trainable operation to the graph
+   *
+   * If the given @c operation has at least one invalid operand index, it fails. And @c operation
+   * will not be moved so the caller's pointer will be still valid.
+   *
+   * @param operation Operation to be added
+   * @return OperationIndex @c index if successful, UNDEFINED otherwise
+   */
+  OperationIndex addOperation(std::unique_ptr<ITrainableOperation> &&operation);
+  /**
+   * @brief Replace a trainable operation which the graph already has
+   *
+   * If the given @c index is available, it succeeds. And @c operation is moved which invalidates
+   * the caller's pointer. If the given @c operation has at least one invalid operand index, it
+   * fails. And @c operation will not be moved so the caller's pointer will be still valid.
+   *
+   * No information in the graph is changed except for replacing an operation.
+   *
+   * @param operation Operation to be added
+   * @return OperationIndex @c index if successful, UNDEFINED otherwise
+   */
+  OperationIndex replaceOperation(OperationIndex index,
+                                  std::unique_ptr<ITrainableOperation> &&operation);
+
+  /**
+   * @brief Add a derivative to the graph with the given index and object
+   *
+   * If the given index is available, it succeeds. And @c derivative is moved which invalidates the
+   * caller's pointer. If the given index is already taken, it fails. And @c derivative will not be
+   * moved so the caller's pointer will be still valid.
+   *
+   * @param[in] index      Index to be added
+   * @param[in] derivative Derivative operand to be added
+   * @return OperandIndex @c index if successful, UNDEFINED otherwise
+   */
+  OperandIndex addDerivative(OperandIndex index, std::unique_ptr<Operand> &&derivative);
+
+public:
+  void changeShape(const OperandIndex &ind, const ir::Shape &new_shape) override;
+  void changeDerivativeShape(const OperandIndex &ind, const ir::Shape &new_shape);
+  void addInput(const OperandIndex &ind, const std::string &name = "");
+  void addOutput(const OperandIndex &ind, const std::string &name = "");
+  void addLoss(const OperandIndex &loss_ind, const IOIndex &pred_io_ind);
+  void verify() const;
+  void removeOperand(const OperandIndex &ind);
+  void setLayout(Layout layout);
+  void setInputs(OperandIndexSequence inputs,
+                 std::unordered_map<std::string, IOIndex> name_to_input);
+  void setOutputs(OperandIndexSequence outputs,
+                  std::unordered_map<std::string, IOIndex> name_to_output);
+
+  // Accessors
+public:
+  const OperandIndexSequence &getInputs() const override { return _graph.getInputs(); }
+  const OperandIndexSequence &getOutputs() const override { return _graph.getOutputs(); }
+  IOIndex getInputIndex(const std::string &name) const override;
+  IOIndex getOutputIndex(const std::string &name) const override;
+  const Operands &operands() const override { return _graph.operands(); }
+  Operands &operands() { return _graph.operands(); } // TODO Remove this non-const accessor
+  const Operations &operations() const override { return _graph.operations(); }
+  const Operands &derivatives() const { return _derivatives; }
+  OperandIndex getLossIndex(const IOIndex &pred_io_ind) const;
+  Layout layout() const { return _graph.layout(); }
+  const Graph &graph() const { return _graph; }
+
+public:
+  const ITrainableOperation &operation(OperationIndex index) const;
+
+public:
+  std::vector<ir::OperationIndex> topolSortOperations() const;
+  // TODO Support topological sort for backwarding
+
+private:
+  Graph _graph;
+  Operands _derivatives;
+
+  std::unordered_map<IOIndex, OperandIndex> _losses;
+};
+
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_TRAINABLE_GRAPH_H__
diff --git a/runtime/onert/core/include/ir/train/TrainableOperationVisitor.h b/runtime/onert/core/include/ir/train/TrainableOperationVisitor.h
new file mode 100644
index 000000000..fc58c351d
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/TrainableOperationVisitor.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_TRAINABLE_OPERATION_VISITOR_H__
+#define __ONERT_IR_TRAIN_TRAINABLE_OPERATION_VISITOR_H__
+
+#include "ir/train/Operations.Include.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+
+struct TrainableOperationVisitor
+{
+  virtual ~TrainableOperationVisitor() = default;
+
+#define OP(InternalName) \
+  virtual void visit(const operation::InternalName &) {}
+#include "ir/train/Operations.lst"
+#undef OP
+};
+
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_TRAINABLE_OPERATION_VISITOR_H__
diff --git a/runtime/onert/core/include/ir/train/operation/Conv2D.h b/runtime/onert/core/include/ir/train/operation/Conv2D.h
new file mode 100644
index 000000000..b8968926a
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/Conv2D.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_CONV2D_H__
+#define __ONERT_IR_TRAIN_OPERATION_CONV2D_H__
+
+#include "ir/operation/Conv2D.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+class Conv2D : public ir::operation::Conv2D, public ITrainableOperation
+{
+private:
+  using OperationType = ir::operation::Conv2D;
+
+public:
+  Conv2D(const OperationType &operation);
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override;
+  void accept(OperationVisitor &v) const override;
+  void accept(TrainableOperationVisitor &v) const override;
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_CONV2D_H__
diff --git a/runtime/onert/core/include/ir/train/operation/ElementwiseActivation.h b/runtime/onert/core/include/ir/train/operation/ElementwiseActivation.h
new file mode 100644
index 000000000..97ab54d17
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/ElementwiseActivation.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_ELEMENTWISE_ACTIVATION_H__
+#define __ONERT_IR_TRAIN_OPERATION_ELEMENTWISE_ACTIVATION_H__
+
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+class ElementwiseActivation : public ir::operation::ElementwiseActivation,
+                              public ITrainableOperation
+{
+private:
+  using OperationType = ir::operation::ElementwiseActivation;
+
+public:
+  ElementwiseActivation(const OperationType &operation);
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override;
+  void accept(OperationVisitor &v) const override;
+  void accept(TrainableOperationVisitor &v) const override;
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_ELEMENTWISE_ACTIVATION_H__
diff --git a/runtime/onert/core/include/ir/train/operation/FullyConnected.h b/runtime/onert/core/include/ir/train/operation/FullyConnected.h
new file mode 100644
index 000000000..bede58d69
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/FullyConnected.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_FULLYCONNECTED_H__
+#define __ONERT_IR_TRAIN_OPERATION_FULLYCONNECTED_H__
+
+#include "ir/operation/FullyConnected.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+class FullyConnected : public ir::operation::FullyConnected, public ITrainableOperation
+{
+private:
+  using OperationType = ir::operation::FullyConnected;
+
+public:
+  FullyConnected(const OperationType &operation);
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override;
+  void accept(OperationVisitor &v) const override;
+  void accept(TrainableOperationVisitor &v) const override;
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_FULLYCONNECTED_H__
diff --git a/runtime/onert/core/include/ir/train/operation/Loss.h b/runtime/onert/core/include/ir/train/operation/Loss.h
new file mode 100644
index 000000000..c7cc4213a
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/Loss.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_LOSS_H__
+#define __ONERT_IR_TRAIN_OPERATION_LOSS_H__
+
+#include "ir/operation/Loss.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+class Loss : public ir::operation::Loss, public ITrainableOperation
+{
+private:
+  using OperationType = ir::operation::Loss;
+
+public:
+  Loss(const OperationType &operation);
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override;
+  void accept(OperationVisitor &v) const override;
+  void accept(TrainableOperationVisitor &v) const override;
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_LOSS_H__
diff --git a/runtime/onert/core/include/ir/train/operation/Permute.h b/runtime/onert/core/include/ir/train/operation/Permute.h
new file mode 100644
index 000000000..e652b136d
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/Permute.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_PERMUTE_H__
+#define __ONERT_IR_TRAIN_OPERATION_PERMUTE_H__
+
+#include "ir/operation/Permute.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+class Permute : public ir::operation::Permute, public ITrainableOperation
+{
+private:
+  using OperationType = ir::operation::Permute;
+
+public:
+  Permute(const OperationType &operation);
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override;
+  void accept(OperationVisitor &v) const override;
+  void accept(TrainableOperationVisitor &v) const override;
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_PERMUTE_H__
diff --git a/runtime/onert/core/include/ir/train/operation/Pool2D.h b/runtime/onert/core/include/ir/train/operation/Pool2D.h
new file mode 100644
index 000000000..024997074
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/Pool2D.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_POOL2D_H__
+#define __ONERT_IR_TRAIN_OPERATION_POOL2D_H__
+
+#include "ir/operation/Pool2D.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+class Pool2D : public ir::operation::Pool2D, public ITrainableOperation
+{
+private:
+  using OperationType = ir::operation::Pool2D;
+
+public:
+  Pool2D(const OperationType &operation);
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override;
+  void accept(OperationVisitor &v) const override;
+  void accept(TrainableOperationVisitor &v) const override;
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_POOL2D_H__
diff --git a/runtime/onert/core/include/ir/train/operation/Reshape.h b/runtime/onert/core/include/ir/train/operation/Reshape.h
new file mode 100644
index 000000000..1efd62cfe
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/Reshape.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_RESHAPE_H__
+#define __ONERT_IR_TRAIN_OPERATION_RESHAPE_H__
+
+#include "ir/operation/Reshape.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+class Reshape : public ir::operation::Reshape, public ITrainableOperation
+{
+private:
+  using OperationType = ir::operation::Reshape;
+
+public:
+  Reshape(const OperationType &operation);
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override;
+  void accept(OperationVisitor &v) const override;
+  void accept(TrainableOperationVisitor &v) const override;
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_RESHAPE_H__
diff --git a/runtime/onert/core/include/ir/train/operation/Softmax.h b/runtime/onert/core/include/ir/train/operation/Softmax.h
new file mode 100644
index 000000000..b12e6abc1
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/Softmax.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_SOFTMAX_H__
+#define __ONERT_IR_TRAIN_OPERATION_SOFTMAX_H__
+
+#include "ir/operation/Softmax.h"
+#include "ir/train/ITrainableOperation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+class Softmax : public ir::operation::Softmax, public ITrainableOperation
+{
+private:
+  using OperationType = ir::operation::Softmax;
+
+public:
+  Softmax(const OperationType &operation);
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override;
+  void accept(OperationVisitor &v) const override;
+  void accept(TrainableOperationVisitor &v) const override;
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_SOFTMAX_H__
diff --git a/runtime/onert/core/include/ir/train/operation/UntrainableOperation.h b/runtime/onert/core/include/ir/train/operation/UntrainableOperation.h
new file mode 100644
index 000000000..7cda0ec0c
--- /dev/null
+++ b/runtime/onert/core/include/ir/train/operation/UntrainableOperation.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_TRAIN_OPERATION_UNTRAINABLE_OPERATION_H__
+#define __ONERT_IR_TRAIN_OPERATION_UNTRAINABLE_OPERATION_H__
+
+#include "ir/train/ITrainableOperation.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+#include <type_traits>
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+// `UntrainableOperation` wraps operations that are not yet supported for training.
+// This class can be removed if all operations are supported for training.
+template <typename OperationType,
+          typename = std::enable_if_t<std::is_base_of<Operation, OperationType>::value>>
+class UntrainableOperation : public OperationType, public ITrainableOperation
+{
+public:
+  UntrainableOperation(const OperationType &operation) : OperationType{operation} {}
+  virtual ~UntrainableOperation() = default;
+
+public:
+  std::unique_ptr<ITrainableOperation> clone() const override
+  {
+    return std::make_unique<UntrainableOperation<OperationType>>(*this);
+  }
+  void accept(OperationVisitor &v) const override { v.visit(*this); }
+  void accept(TrainableOperationVisitor &) const override
+  {
+    throw std::runtime_error(OperationType::name() + "operation is not trainable yet");
+  }
+};
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_TRAIN_OPERATION_UNTRAINABLE_OPERATION_H__
diff --git a/runtime/onert/core/include/odc/IQuantizer.h b/runtime/onert/core/include/odc/IQuantizer.h
new file mode 100644
index 000000000..d698d9ef0
--- /dev/null
+++ b/runtime/onert/core/include/odc/IQuantizer.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_ODC_IQUANTIZER_H__
+#define __ONERT_ODC_IQUANTIZER_H__
+
+namespace onert
+{
+namespace odc
+{
+
+class IQuantizer
+{
+public:
+  virtual ~IQuantizer() = default;
+
+  virtual int quantize(const char *in, const char *out, bool is_q16) = 0;
+};
+
+} // namespace odc
+} // namespace onert
+
+#endif // __ONERT_ODC_IQUANTIZER_H__
diff --git a/runtime/onert/core/include/odc/QuantizeManager.h b/runtime/onert/core/include/odc/QuantizeManager.h
new file mode 100644
index 000000000..a749c0ee1
--- /dev/null
+++ b/runtime/onert/core/include/odc/QuantizeManager.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_ODC_QUANTIZE_MANAGER_H__
+#define __ONERT_ODC_QUANTIZE_MANAGER_H__
+
+#include "IQuantizer.h"
+
+#include <functional>
+#include <string>
+
+namespace onert
+{
+namespace odc
+{
+
+class Quantize;
+
+class QuantizeManager
+{
+public:
+  // Non-copyable
+  QuantizeManager() = delete;
+  QuantizeManager(const std::string &model_path) : _model_path(model_path) {}
+  QuantizeManager(QuantizeManager const &) = delete;
+  QuantizeManager &operator=(QuantizeManager const &) = delete;
+
+public:
+  /**
+   * @brief Set model path to export quantized model
+   *
+   * @param model_path  Model path to export quantized model
+   */
+  void exportModelPath(const std::string &model_path) { _export_model_path = model_path; }
+
+  /**
+   * @brief   Get model path to export quantized model
+   *
+   * @return  Model path to export quantized model
+   */
+  std::string &exportModelPath() { return _export_model_path; }
+
+  /**
+   * @brief Set quantize type
+   *
+   * @param is_q16  true if q16, false if q8
+   *
+   * @todo  Support more general quantize type
+   */
+  void quantizeType(bool is_q16) { _is_q16 = is_q16; }
+
+  /**
+   * @brief  Quantize model
+   *
+   * @return  true if success, otherwise false
+   */
+  bool quantize();
+
+private:
+  std::string _model_path = "";
+  std::string _export_model_path = "";
+  bool _is_q16 = false;
+};
+
+} // namespace odc
+} // namespace onert
+
+#endif // __ONERT_ODC_QUANTIZE_MANAGER_H__
diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h
new file mode 100644
index 000000000..4369ca53e
--- /dev/null
+++ b/runtime/onert/core/include/util/CalculateActivationRange.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
+#define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
+
+#include <limits>
+
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace util
+{
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+  if (activation == ir::Activation::RELU)
+  {
+    *activation_min = 0;
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else if (activation == ir::Activation::RELU6)
+  {
+    *activation_min = 0;
+    *activation_max = 6;
+  }
+  else if (activation == ir::Activation::RELU1)
+  {
+    *activation_min = -1;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::SIGMOID)
+  {
+    *activation_min = 0;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::NONE)
+  {
+    *activation_min = std::numeric_limits<T>::lowest();
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    throw std::runtime_error{"Unsupported fused activation function."};
+  }
+}
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 5077fad69..d3e37ce8f 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,10 +20,9 @@
 
 //     Name                    | Type         | Default
 CONFIG(GRAPH_DOT_DUMP          , int          , "0")
-CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
+CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq
 CONFIG(OP_BACKEND_ALLOPS       , std::string  , "")
 CONFIG(OP_BACKEND_MAP          , std::string  , "")
-CONFIG(DISABLE_COMPILE         , bool         , "0")
 CONFIG(ONERT_LOG_ENABLE        , bool         , "0")
 CONFIG(CPU_MEMORY_PLANNER      , std::string  , "WIC")
 CONFIG(EXECUTOR                , std::string  , "Linear")
@@ -31,10 +30,12 @@ CONFIG(ACL_LAYOUT              , std::string  , "none")
 CONFIG(NCNN_LAYOUT             , std::string  , "NCHW")
 CONFIG(PROFILING_MODE          , bool         , "0")
 CONFIG(USE_SCHEDULER           , bool         , "0")
-CONFIG(OP_SEQ_MAX_NODE         , int          , "0")
 CONFIG(TRACE_FILEPATH          , std::string  , "")
+CONFIG(MINMAX_FILEPATH         , std::string  , "")
 CONFIG(FP16_ENABLE             , bool         , "0")
 CONFIG(RUY_THREADS             , int          , "-1")
+CONFIG(XNNPACK_THREADS         , int          , "-1")
+CONFIG(USE_MMAPED_DATA         , bool         , "0")
 
 // Auto-generate all operations
 
@@ -42,4 +43,3 @@ CONFIG(RUY_THREADS             , int          , "-1")
     CONFIG(OP_BACKEND_ ## InternalName, std::string, "")
 #include "ir/Operations.lst"
 #undef OP
-
diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h
index b6a8144fd..d53b8106d 100644
--- a/runtime/onert/core/include/util/ConfigSource.h
+++ b/runtime/onert/core/include/util/ConfigSource.h
@@ -17,16 +17,17 @@
 #ifndef __ONERT_UTIL_CONFIG_SOURCE_H__
 #define __ONERT_UTIL_CONFIG_SOURCE_H__
 
-#include <memory>
-
-#include "IConfigSource.h"
+#include <string>
+#include <unordered_map>
 
 namespace onert
 {
 namespace util
 {
 
-void config_source(std::unique_ptr<IConfigSource> &&source);
+using CfgKeyValues = std::unordered_map<std::string, std::string>;
+
+void setConfigKeyValues(const CfgKeyValues &keyValues);
 
 bool toBool(const std::string &val);
 int toInt(const std::string &val);
diff --git a/runtime/onert/core/include/util/EnvConfigSource.h b/runtime/onert/core/include/util/EnvConfigSource.h
deleted file mode 100644
index 8c5d0e8e9..000000000
--- a/runtime/onert/core/include/util/EnvConfigSource.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
-
-#include <unordered_map>
-
-#include "util/GeneralConfigSource.h"
-
-namespace onert
-{
-namespace util
-{
-
-class EnvConfigSource final : public GeneralConfigSource
-{
-public:
-  std::string get(const std::string &key) const override;
-
-private:
-  std::unordered_map<std::string, std::string> _default_attributes;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/Exceptions.h b/runtime/onert/core/include/util/Exceptions.h
index fc3fa0f64..e77686593 100644
--- a/runtime/onert/core/include/util/Exceptions.h
+++ b/runtime/onert/core/include/util/Exceptions.h
@@ -38,7 +38,7 @@ class InsufficientBufferSizeException : public OnertException
 {
 public:
   InsufficientBufferSizeException(const std::string &msg)
-      : OnertException{"InsufficientBufferSize", msg}
+    : OnertException{"InsufficientBufferSize", msg}
   {
   }
 };
diff --git a/runtime/onert/core/include/util/GeneralConfigSource.h b/runtime/onert/core/include/util/GeneralConfigSource.h
deleted file mode 100644
index dedc820ec..000000000
--- a/runtime/onert/core/include/util/GeneralConfigSource.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
-
-#include <unordered_map>
-
-#include "util/IConfigSource.h"
-
-namespace onert
-{
-namespace util
-{
-
-class GeneralConfigSource : public IConfigSource
-{
-public:
-  GeneralConfigSource() = default;
-
-  std::string get(const std::string &key) const override;
-  void set(const std::string &key, const std::string &val);
-
-private:
-  std::unordered_map<std::string, std::string> _map;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/IConfigSource.h b/runtime/onert/core/include/util/IConfigSource.h
deleted file mode 100644
index 07b09848a..000000000
--- a/runtime/onert/core/include/util/IConfigSource.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_I_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_I_CONFIG_SOURCE_H__
-
-#include <string>
-
-namespace onert
-{
-namespace util
-{
-
-struct IConfigSource
-{
-  /**
-   * @brief Destroy the IConfigSource object
-   */
-  virtual ~IConfigSource() = default;
-
-  /**
-   * @brief get the value for the matching key
-   *
-   * @param key string key to search
-   * @return string value associated with the key
-   */
-  virtual std::string get(const std::string &key) const = 0;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_I_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/ITimer.h b/runtime/onert/core/include/util/ITimer.h
index d5a4e1eb0..f63a3f220 100644
--- a/runtime/onert/core/include/util/ITimer.h
+++ b/runtime/onert/core/include/util/ITimer.h
@@ -46,7 +46,7 @@ public:
   {
     const auto end_time = std::chrono::steady_clock::now();
     _timer_res =
-        std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
+      std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
   };
 
 private:
diff --git a/runtime/onert/core/include/util/Index.h b/runtime/onert/core/include/util/Index.h
index e8f59282d..49c5f4c6d 100644
--- a/runtime/onert/core/include/util/Index.h
+++ b/runtime/onert/core/include/util/Index.h
@@ -138,13 +138,12 @@ public:
    */
   T value() const { return _index; }
 
-  friend std::ostream &operator<<(std::ostream &o, const Index &t)
-  {
-    if (t.undefined())
-      return o << std::string("undefined");
-    else
-      return o << t.value();
-  }
+  /**
+   * @brief Return max index value
+   *
+   * @return Maximum valid index value
+   */
+  static T max() { return UNDEFINED - 1; }
 
 private:
   T _index;
diff --git a/runtime/onert/core/include/util/MinMaxMap.h b/runtime/onert/core/include/util/MinMaxMap.h
new file mode 100644
index 000000000..2245f84b0
--- /dev/null
+++ b/runtime/onert/core/include/util/MinMaxMap.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_MINMAX_MAP_H_
+#define __ONERT_UTIL_MINMAX_MAP_H_
+
+#include <unordered_map>
+#include <utility>
+
+namespace onert
+{
+namespace util
+{
+
+template <typename N, typename Hash = std::hash<N>> class MinMaxMap
+{
+  struct MinMaxPair
+  {
+    float data[2]; // [0] = min, [1] = max
+  };
+
+public:
+  void append(N node, float min, float max) { _minmax_map[node] = {min, max}; }
+  auto begin() const { return _minmax_map.begin(); }
+  auto end() const { return _minmax_map.end(); }
+
+private:
+  std::unordered_map<N, MinMaxPair, Hash> _minmax_map;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_MINMAX_MAP_H_
diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h
index d2dd881a8..077a4c2ef 100644
--- a/runtime/onert/core/include/util/ObjectManager.h
+++ b/runtime/onert/core/include/util/ObjectManager.h
@@ -17,12 +17,13 @@
 #ifndef __ONERT_UTIL_OBJECT_MANAGER_H__
 #define __ONERT_UTIL_OBJECT_MANAGER_H__
 
-#include <unordered_map>
-#include <memory>
-#include <list>
-#include <functional>
+#include "util/logging.h"
 
+#include <cassert>
+#include <functional>
+#include <list>
 #include <memory>
+#include <unordered_map>
 
 namespace onert
 {
@@ -36,35 +37,71 @@ namespace util
 template <typename Index, typename Object> class ObjectManager
 {
 public:
-  ObjectManager() : _index_count{0u} {}
+  ObjectManager() : _next_index{0u} {}
 
 public:
   /**
-   * @brief Create an object with args and put it in the container with a new Index for that
+   * @brief Create an object with args and put it in the container with a newly assigned @c Index
    *
    * @param[in] args Arguments for creating Operand object
-   * @return Created index that is associated to the object
+   * @return Created index that is associated to the object if successful, Undefined index otherwise
    */
   template <class... Args> Index emplace(Args &&... args)
   {
     auto index = generateIndex();
+    if (!index.valid())
+      return index;
     _objects.emplace(index, std::make_unique<Object>(std::forward<Args>(args)...));
     return index;
   }
 
   /**
-   * @brief Put object in the container with a new Index for that
+   * @brief Put the object in the container with given index.
+   *
+   * It fails when the given index is already taken or @c index is Undefined.
+   *
+   * @param[in] object Object to be pushed
+   * @param[in] index Index associated with the object
+   * @return @c index if successful, an Undefined index otherwise
+   */
+  Index push(std::unique_ptr<Object> &&object, Index index)
+  {
+    auto gen_index = tryIndex(index);
+    if (gen_index.valid())
+      _objects.emplace(gen_index, std::move(object));
+    return gen_index;
+  }
+  /**
+   * @brief Put the object in the container with a newly assigned index.
+   *
+   * It fails when it cannot generate a valid index.
    *
    * @param[in] object Object to be pushed
-   * @return Created index that is associated to the object
+   * @return The newly assigned index if successful, an Undefined index otherwise
    */
   Index push(std::unique_ptr<Object> &&object)
   {
-    auto index = generateIndex();
-    _objects.emplace(index, std::move(object));
+    auto gen_index = generateIndex();
+    if (gen_index.valid())
+      _objects.emplace(gen_index, std::move(object));
+    return gen_index;
+  }
+  /**
+   * @brief Set the object in the container with given index.
+   *
+   * If the index is Undefined, it will fail.
+   * If the index is already taken, it will overwrite the content.
+   *
+   * @param[in] object Object to be pushed
+   * @param[in] index Index associated with the object
+   * @return @c index if successful, an Undefined index otherwise
+   */
+  Index set(Index index, std::unique_ptr<Object> &&object)
+  {
+    if (index.valid())
+      _objects[index] = std::move(object);
     return index;
   }
-
   /**
    * @brief Remove the object that is associated with the given index
    *
@@ -76,6 +113,8 @@ public:
   /**
    * @brief Get the object that is associated with the given index
    *
+   * If such object does not exist, it will throw @c std::out_of_range
+   *
    * @param[in] index Index of the object to be returned
    * @return Object
    */
@@ -83,6 +122,8 @@ public:
   /**
    * @brief Get the object that is associated with the given index
    *
+   * If such object does not exist, it will throw @c std::out_of_range
+   *
    * @param[in] index Index of the object to be returned
    * @return Object
    */
@@ -90,6 +131,38 @@ public:
   /**
    * @brief Get the object that is associated with the given index
    *
+   * If such object does not exist, it will return `nullptr`
+   *
+   * @param[in] index Index of the object to be returned
+   * @return Object
+   */
+  const Object *getRawPtr(const Index &index) const
+  {
+    auto itr = _objects.find(index);
+    if (itr == _objects.end())
+      return nullptr;
+    else
+    {
+      assert(itr->second != nullptr);
+      return itr->second.get();
+    }
+  }
+  /**
+   * @brief Get the object that is associated with the given index
+   *
+   * If such object does not exist, it will return `nullptr`
+   *
+   * @param[in] index Index of the object to be returned
+   * @return Object The found object
+   */
+  Object *getRawPtr(const Index &index)
+  {
+    return const_cast<Object *>(
+      const_cast<const ObjectManager<Index, Object> *>(this)->getRawPtr(index));
+  }
+  /**
+   * @brief Get the object that is associated with the given index
+   *
    * @param[in] index Index of the object to be returned
    * @return true if such entry exists otherwise false
    */
@@ -99,6 +172,12 @@ public:
     return it != _objects.end();
   }
   /**
+   * @brief Return the number of objects that the manager contains
+   *
+   * @return size_t Number of objects
+   */
+  size_t size() const { return _objects.size(); }
+  /**
    * @brief Iterate over the container with given function
    *
    * @param[in] fn Function to be run for every container entry
@@ -123,23 +202,51 @@ public:
     // This implementation is a workaround in case of adding operands while iteration
     std::list<Index> l;
 
-    for (auto &e : _objects)
+    for (const auto &e : _objects)
     {
       l.push_back(e.first);
     }
 
-    for (auto index : l)
+    for (const auto &index : l)
     {
       fn(index, *_objects[index]);
     }
   }
 
 private:
-  Index generateIndex() { return Index{_index_count++}; }
+  // Try assigning the given index
+  Index tryIndex(Index index)
+  {
+    if (!index.valid())
+      return index;
+    if (_objects.find(index) == _objects.end())
+    {
+      // If the given index does not exist, update the next index and return the index
+      if (index.value() >= _next_index)
+        _next_index = index.value() + 1;
+      return index;
+    }
+    else
+    {
+      // If the given index exists already, return a non-valid index
+      return Index{};
+    }
+  }
+
+  // Generate a new index with `_next_index`
+  Index generateIndex()
+  {
+    // No need to check if there is an entry with _next_index since
+    // _next_index is always ("the highest index in the object map" + 1)
+    if (Index{_next_index}.valid())
+      return Index{_next_index++};
+    else
+      return Index{};
+  }
 
 protected:
   std::unordered_map<Index, std::unique_ptr<Object>> _objects;
-  uint32_t _index_count;
+  uint32_t _next_index;
 };
 
 } // namespace util
diff --git a/runtime/onert/core/include/util/Set.h b/runtime/onert/core/include/util/Set.h
index ee4062d25..73d43d4f0 100644
--- a/runtime/onert/core/include/util/Set.h
+++ b/runtime/onert/core/include/util/Set.h
@@ -53,6 +53,16 @@ public:
 
 public:
   /**
+   * @brief copy assignment operator
+   */
+  Set<Element> &operator=(const Set<Element> &) = default;
+  /**
+   * @brief move assignment operator
+   */
+  Set<Element> &operator=(Set<Element> &&) = default;
+
+public:
+  /**
    * @brief Add a given element to the set
    *
    * @param e Element added
@@ -104,7 +114,7 @@ public:
   Set<Element> operator|(const Set<Element> &other) const // Union
   {
     auto ret = *this;
-    for (auto e : other)
+    for (auto &&e : other)
     {
       ret.add(e);
     }
@@ -118,7 +128,7 @@ public:
   Set<Element> operator&(const Set<Element> &other) const // Intersect
   {
     Set<Element> ret;
-    for (auto e : other)
+    for (auto &&e : other)
     {
       if (contains(e))
       {
@@ -135,7 +145,7 @@ public:
   Set<Element> operator-(const Set<Element> &other) const // Minus
   {
     auto ret = *this;
-    for (auto e : other)
+    for (auto &&e : other)
     {
       ret.remove(e);
     }
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h
index 1ebed48f2..d859378c6 100644
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -29,7 +29,6 @@
 #include "ir/Index.h"
 #include "ir/Layout.h"
 #include "ir/OperationVisitor.h"
-#include "backend/IDynamicTensorManager.h"
 #include "backend/ITensor.h"
 #include "backend/ITensorRegistry.h"
 
@@ -42,12 +41,19 @@ using Shapes = std::vector<ir::Shape>;
 
 // Define shape calculation for operations. List them in alphabetic order.
 
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank);
 
 ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
                                 const ir::operation::BatchMatMul::Param &param);
 
-ir::Shape inferBroadcastToShape(const ir::Shape wshape, const int32_t *shape_buffer);
+ir::Shape inferBCQFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &cluster_shape,
+                                      const int32_t *cluster_buf);
+
+ir::Shape inferBCQGatherShape(const ir::Shape &indices_shape, const ir::Shape &cluster_shape,
+                              const int32_t *cluster_buf, int rank,
+                              const ir::operation::BCQGather::Param &param);
+
+ir::Shape inferBroadcastToShape(const ir::Shape shp_shape, const int32_t *shp_buf);
 
 ir::Shape inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat::Param &param);
 
@@ -63,7 +69,7 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
 
 ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis);
 
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *buf);
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf);
 
 ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape);
 
@@ -97,12 +103,12 @@ ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t outp
 ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &input_true_shape,
                            const ir::Shape &input_false_shape);
 
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins,
-                          const int32_t *sizes);
+template <typename T>
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const T *begins_buf, const T *sizes_buf);
 
 ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
-                                   const ir::Shape &padding_shape, const int32_t *block_shape_data,
-                                   const int32_t *padding_data);
+                                   const ir::Shape &padding_shape, const int32_t *block_shape_buf,
+                                   const int32_t *padding_buf);
 
 ir::Shape inferSplitShape(const ir::Shape input_shape, int axis_value, int num_splits);
 
@@ -132,9 +138,11 @@ StridedSliceParams buildStridedSliceParams(const T *begin, const T *end, const T
 ir::Shape inferStridedSliceShape(const ir::Shape &input_shape, const StridedSliceParams &op_params,
                                  uint32_t rank);
 
-ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier);
+ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier_buf,
+                         const int32_t multiplier_size);
 
-ir::Shape inferTransposeShape(const ir::Shape &in_shape, const std::vector<int> &perm);
+ir::Shape inferTransposeShape(const ir::Shape &in_shape, const int32_t *perm_buf,
+                              const int32_t rank);
 
 ir::Shape inferUnpackShape(const ir::Shape &input_shape, int axis, int rank);
 
diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h
new file mode 100644
index 000000000..da284d2fb
--- /dev/null
+++ b/runtime/onert/core/include/util/TracingCtx.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_TRACING_CTX_H__
+#define __ONERT_UTIL_TRACING_CTX_H__
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/Model.h"
+
+#include <unordered_map>
+#include <mutex>
+
+namespace onert
+{
+namespace util
+{
+
+/**
+ * @brief Class to maintain information about profiling per session
+ */
+class TracingCtx
+{
+public:
+  /**
+   * @brief Create and store unique session id managed by this class
+   * @note  This constructor can be called by multiple session running in parallely.
+   */
+  TracingCtx(void) { decideSessionID(); }
+
+  uint32_t getSessionId() const { return _session_id; }
+
+  /**
+   * @brief Return true if more than 1 session exist
+   *
+   * @note  This method is NOT thread-safe. Call this in thread-safe situation.
+   */
+  bool hasMultipleSessions() const { return _next_session_id > 1; }
+
+  /**
+   * @brief Set subgraph index of a graph
+   */
+  void setSubgraphIndex(const ir::Graph *g, uint32_t index) { _subgraph_indices.emplace(g, index); }
+
+  /**
+   * @brief Get subgraph index of a graph.
+   */
+  ir::SubgraphIndex getSubgraphIndex(const ir::Graph *g) const { return _subgraph_indices.at(g); }
+
+private:
+  void decideSessionID()
+  {
+    std::unique_lock<std::mutex> lock{_session_id_mutex};
+
+    _session_id = _next_session_id++;
+  }
+
+private:
+  std::unordered_map<const ir::Graph *, ir::SubgraphIndex> _subgraph_indices;
+  uint32_t _session_id;
+  static std::mutex _session_id_mutex;
+  static uint32_t _next_session_id;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_TRACING_CTX_H__
diff --git a/runtime/onert/core/include/util/Utils.h b/runtime/onert/core/include/util/Utils.h
index 847fb6971..6b6bc2400 100644
--- a/runtime/onert/core/include/util/Utils.h
+++ b/runtime/onert/core/include/util/Utils.h
@@ -22,6 +22,61 @@
 #ifndef __ONERT_UTIL_UTILS_H__
 #define __ONERT_UTIL_UTILS_H__
 
+#include "ir/Coordinates.h"
+#include "ir/Shape.h"
+
 #define UNUSED_RELEASE(a) (void)(a)
 
+template <size_t rest> struct ForEachDimension
+{
+  template <typename L>
+  static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
+                     L lambda_function)
+  {
+    if (static_cast<int>(rest) > shape.rank())
+    {
+      ForEachDimension<rest - 1>::unroll(shape, coords, lambda_function);
+      return;
+    }
+
+    const auto axis = shape.rank() - rest;
+    const auto &d = shape.dim(axis);
+
+    for (auto v = 0; v < d; v++)
+    {
+      coords.set(axis, v);
+      ForEachDimension<rest - 1>::unroll(shape, coords, lambda_function);
+    }
+  }
+};
+
+template <> struct ForEachDimension<0>
+{
+  template <typename L>
+  static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
+                     L lambda_function)
+  {
+    UNUSED_RELEASE(shape);
+    lambda_function(coords);
+  }
+};
+
+template <typename L> inline void ShapeLoop(const onert::ir::Shape &shape, L lambda_function)
+{
+  int32_t rank = shape.rank();
+  assert(rank > 0);
+  for (int32_t i = 0; i < rank; ++i)
+  {
+    assert(shape.dim(i) > 0);
+  }
+
+  onert::ir::Coordinates coords;
+  if (rank == 0)
+  {
+    coords.set(0, 0);
+  }
+  // TODO Change 6 to onert::ir::Shape::kMaxRank if onert::ir::Shape::kMaxRank is modified as a
+  // constant expression
+  ForEachDimension<6>::unroll(shape, coords, lambda_function);
+}
 #endif // __ONERT_UTIL_UTILS_H__
diff --git a/runtime/onert/core/include/util/logging.h b/runtime/onert/core/include/util/logging.h
index 76cfb8d60..fe255f8ff 100644
--- a/runtime/onert/core/include/util/logging.h
+++ b/runtime/onert/core/include/util/logging.h
@@ -18,6 +18,7 @@
 #define __ONERT_UTIL_LOGGING_H__
 
 #include <iostream>
+#include <cstring>
 
 #include "util/ConfigSource.h"
 
@@ -52,16 +53,34 @@ private:
 
 static Context &ctx = Context::get();
 
+inline std::string decorated_name(const char *input)
+{
+  const int min_prefix = 16;
+  std::string prefix(input);
+  auto len_prefix = prefix.size();
+  if (len_prefix > min_prefix)
+    return "[" + prefix + "] ";
+  std::string spaces((min_prefix - len_prefix) / 2, ' ');
+  return (len_prefix % 2 ? "[ " : "[") + spaces + prefix + spaces + "] ";
+}
+
 } // namespace logging
 } // namespace util
 } // namespace onert
 
 #define VERBOSE(name)                        \
   if (::onert::util::logging::ctx.enabled()) \
-  std::cout << "[" << #name << "] "
+  std::cout << ::onert::util::logging::decorated_name(#name)
 
 #define VERBOSE_F()                          \
   if (::onert::util::logging::ctx.enabled()) \
-  std::cout << "[" << __func__ << "] "
+  std::cout << ::onert::util::logging::decorated_name(__func__)
+
+#define WHEN_LOG_ENABLED(METHOD)             \
+  if (::onert::util::logging::ctx.enabled()) \
+    do                                       \
+    {                                        \
+      METHOD;                                \
+  } while (0)
 
 #endif // __ONERT_UTIL_LOGGING_H__
diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc
index bafa36d28..7b36f106d 100644
--- a/runtime/onert/core/src/backend/BackendContext.cc
+++ b/runtime/onert/core/src/backend/BackendContext.cc
@@ -16,40 +16,10 @@
 
 #include "backend/BackendContext.h"
 
-#include "ir/Operation.h"
-#include "backend/IConstantInitializer.h"
-
 namespace onert
 {
 namespace backend
 {
 
-void BackendContext::initialize(const std::vector<OperationInfo> &operation_list,
-                                const std::vector<ir::OperandIndex> &operand_list)
-{
-  _operation_list = operation_list;
-  _operand_list = operand_list;
-}
-
-void BackendContext::initConsts()
-{
-  for (auto &op : _operation_list)
-  {
-    constant_initializer->setLayout(op.layout);
-    _graph->operations().at(op.index).accept(*constant_initializer);
-  }
-
-  for (auto ind : _operand_list)
-  {
-    const auto &obj = _graph->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  }
-
-  constant_initializer->run();
-}
-
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/core/src/backend/IConstantInitializer.cc b/runtime/onert/core/src/backend/IConstantInitializer.cc
deleted file mode 100644
index 934a42753..000000000
--- a/runtime/onert/core/src/backend/IConstantInitializer.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/IConstantInitializer.h"
-
-#include <Half.h>
-
-using float16 = Half;
-
-namespace onert
-{
-namespace backend
-{
-
-void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
-                                                   const ir::Operand &obj)
-{
-  // For only CONSTANTS
-  // TODO Add to check if tensor has been allocated
-  if (!obj.isConstant())
-    return;
-
-  const auto type = obj.typeInfo().type();
-  using ir::DataType;
-
-  switch (type)
-  {
-    case DataType::FLOAT32:
-      _init_map[index] = copyInit<float>;
-      break;
-    case DataType::INT32:
-      _init_map[index] = copyInit<int32_t>;
-      break;
-    case DataType::UINT32:
-      _init_map[index] = copyInit<uint32_t>;
-      break;
-    case DataType::BOOL8:
-    case DataType::QUANT_UINT8_ASYMM:
-      _init_map[index] = copyInit<uint8_t>;
-      break;
-    case DataType::QUANT_INT8_SYMM:
-      _init_map[index] = copyInit<int8_t>;
-      break;
-    case DataType::FLOAT16:
-      _init_map[index] = copyInit<float16>;
-      break;
-    case DataType::INT64:
-      _init_map[index] = copyInit<int64_t>;
-      break;
-    default:
-      throw std::runtime_error("Not supported, yet");
-      break;
-  }
-}
-
-void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
-                                                      const ir::Operand &obj)
-{
-  // For only CONSTANTS
-  // TODO Add to check if tensor has been allocated
-  if (!obj.isConstant())
-    return;
-
-  const auto type = obj.typeInfo().type();
-  using ir::DataType;
-  using namespace std::placeholders;
-
-  switch (type)
-  {
-    case DataType::FLOAT32:
-      _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_op_seq_layout);
-      break;
-    case DataType::INT32:
-      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_op_seq_layout);
-      break;
-    case DataType::UINT32:
-      _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_op_seq_layout);
-      break;
-    case DataType::BOOL8:
-    case DataType::QUANT_UINT8_ASYMM:
-      _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_op_seq_layout);
-      break;
-    case DataType::QUANT_INT8_SYMM:
-      _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_op_seq_layout);
-      break;
-    case DataType::FLOAT16:
-      _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_op_seq_layout);
-      break;
-    case DataType::INT64:
-      _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_op_seq_layout);
-      break;
-    default:
-      throw std::runtime_error("Not supported, yet");
-      break;
-  }
-}
-
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/IPortableTensor.cc b/runtime/onert/core/src/backend/IPortableTensor.cc
new file mode 100644
index 000000000..cec34e780
--- /dev/null
+++ b/runtime/onert/core/src/backend/IPortableTensor.cc
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/IPortableTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+IPortableTensor::~IPortableTensor() {}
+
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/ITensor.cc b/runtime/onert/core/src/backend/ITensor.cc
index 7127ed93d..1339cb409 100644
--- a/runtime/onert/core/src/backend/ITensor.cc
+++ b/runtime/onert/core/src/backend/ITensor.cc
@@ -21,14 +21,9 @@ namespace onert
 namespace backend
 {
 
-ir::Shape ITensor::getShape() const
-{
-  onert::ir::Shape shape(num_dimensions());
-  for (uint32_t d = 0; d < num_dimensions(); d++)
-    shape.dim(d) = dimension(d);
-
-  return shape;
-}
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ITensor::~ITensor() {}
 
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/Allocator.cc b/runtime/onert/core/src/backend/basic/Allocator.cc
new file mode 100644
index 000000000..61214dfad
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/Allocator.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/Allocator.h"
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+Allocator::Allocator(uint32_t capacity)
+{
+  _base = std::make_unique<uint8_t[]>(capacity);
+
+  VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
+  VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc b/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc
new file mode 100644
index 000000000..c02cc0cf2
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/BackendContextHelpers.h"
diff --git a/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc b/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc
new file mode 100644
index 000000000..07bcb09ee
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/DynamicTensorManager.h"
+
+#include "util/logging.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg)
+  : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg}
+{
+  // DO NOTHING
+}
+
+void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
+                                       const ir::OperandInfo &tensor_info,
+                                       ir::Layout backend_layout)
+{
+  assert(_tensors->getNativeTensor(ind) == nullptr);
+  auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get());
+  _tensors->setNativeTensor(ind, std::move(tensor));
+}
+
+const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind)
+{
+  auto ptr = _tensors->getITensor(ind);
+  assert(ptr);
+  return ptr;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/MemoryManager.cc b/runtime/onert/core/src/backend/basic/MemoryManager.cc
new file mode 100644
index 000000000..05fd9cc77
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryManager.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <backend/basic/MemoryManager.h>
+
+#include <cassert>
+
+#include "MemoryPlannerFactory.h"
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
+{
+  // DO NOTHING
+}
+
+MemoryManager::MemoryManager(const std::string planner_id)
+  : _mem_planner{createMemoryPlanner(planner_id)}
+{
+  // DO NOTHING
+}
+
+basic::IMemoryPlanner *MemoryManager::createMemoryPlanner()
+{
+  auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
+  return basic::MemoryPlannerFactory::get().create(planner_id);
+}
+
+basic::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
+{
+  return basic::MemoryPlannerFactory::get().create(planner_id);
+}
+
+void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+  _mem_planner->claim(ind, size);
+}
+
+void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
+
+void MemoryManager::allocate(void)
+{
+  _mem_alloc = std::make_shared<basic::Allocator>(_mem_planner->capacity());
+  assert(_mem_alloc->base());
+}
+
+uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
+{
+  assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
+  const auto &mem_blk = _mem_planner->memory_plans().at(ind);
+  return _mem_alloc->base() + mem_blk.offset;
+}
+
+std::shared_ptr<basic::Allocator> DynamicMemoryManager::allocate(const ITensor *tensor,
+                                                                 uint32_t capacity)
+{
+  auto find = _mem_alloc_map.find(tensor);
+  if (find != _mem_alloc_map.end())
+    throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated.");
+
+  _mem_alloc_map[tensor] = std::make_shared<basic::Allocator>(capacity);
+  return _mem_alloc_map[tensor];
+}
+
+void DynamicMemoryManager::deallocate(const ITensor *tensor)
+{
+  auto find = _mem_alloc_map.find(tensor);
+  if (find == _mem_alloc_map.end())
+    throw std::runtime_error("Cannot find Allocator for the requested index");
+
+  find->second->release();    // explicitly erase memory
+  _mem_alloc_map.erase(find); // remove tensor and alloc
+}
+
+void DynamicMemoryManager::deallocate(void)
+{
+  for (auto &&mem_alloc : _mem_alloc_map)
+  {
+    // Release memory buffer of mem_alloc
+    mem_alloc.second->release();
+  }
+
+  _mem_alloc_map.clear();
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlanner.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc
new file mode 100644
index 000000000..1c048043c
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlanner.h"
+#include "util/logging.h"
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+  Block blk{_capacity, size};
+  _mem_plans[ind] = blk;
+  _capacity += size;
+
+  VERBOSE(BP_PLANNER) << "CLAIM(" << ind << "): " << blk.offset << ", " << blk.size << std::endl;
+}
+
+void BumpPlanner::release(const ir::OperandIndex &ind)
+{
+  VERBOSE(BP_PLANNER) << "RELEASE(" << ind << "): "
+                      << "NOTHING does" << std::endl;
+}
+
+// There are some assumptions for claiming memory(== making a reservation for memory).
+// 1. About _claim_table(std::map).
+//   - The table's data structure is std::map so that it always sorts
+//     value(OperandIndex) by key(base_offset).
+//   - This claim() inserts key/value into _claim_table and the release() removes the key/value from
+//     _claim_table.
+//   - _claim_table shows the memory status at a certain point in time. Therefore,
+//     - If _claim_table has an offset and a certain size at a certain point in time,
+//       it means the place at the offset has been already claimed(== can't claim now. need to find
+//       someplace new).
+//     - If _claim_table doesn't have any element for an offset and a certain size at a certain
+//       point in time, it means the place at the offset can be claimed.
+// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
+//    the previous claim_base_offset.
+void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+  // Find the right position for claiming
+  uint32_t next_offset = 0;
+  for (const auto &mem_claim : _claim_table)
+  {
+    auto claimed_base_offset = mem_claim.first;
+    auto claimed_size = _mem_plans[mem_claim.second].size;
+    if (next_offset + size <= claimed_base_offset)
+    {
+      break;
+    }
+    else
+    {
+      next_offset = claimed_base_offset + claimed_size;
+    }
+  }
+
+  // Now next_offset is set to the proper offset
+  _claim_table[next_offset] = ind;
+  _mem_plans[ind] = {next_offset, size};
+
+  VERBOSE(FF_PLANNER) << "claim(" << ind << "): [+" << next_offset << ", " << size << "sz]"
+                      << std::endl;
+
+  if (_capacity < next_offset + size)
+  {
+    _capacity = next_offset + size;
+  }
+}
+
+void FirstFitPlanner::release(const ir::OperandIndex &ind)
+{
+  for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
+  {
+    if (it->second == ind)
+    {
+      uint32_t offset = it->first;
+      uint32_t index = ind.value();
+      uint32_t size = _mem_plans[ind].size;
+
+      _claim_table.erase(it);
+
+      VERBOSE(FF_PLANNER) << "release(" << index << "): [+" << offset << ", " << size << "sz]"
+                          << std::endl;
+      return;
+    }
+  }
+  assert(!"Cannot release for given index. It has been not claimed or released already.");
+}
+
+WICPlanner::WICPlanner()
+  : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
+    _operands()
+{
+  // DO NOTHING
+}
+
+void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
+{
+  _operands.emplace(size, ind);
+  _interference_graph[ind].insert(_interference_graph[ind].end(), _live_operands.cbegin(),
+                                  _live_operands.cend());
+  for (const auto &live_operand : _live_operands)
+  {
+    _interference_graph[live_operand].emplace_back(ind);
+  }
+  _live_operands.emplace(ind);
+
+  VERBOSE(WIC_PLANNER) << "claim(" << ind << "): [" << size << "sz]" << std::endl;
+}
+
+void WICPlanner::release(const ir::OperandIndex &ind)
+{
+  _live_operands.erase(ind);
+  VERBOSE(WIC_PLANNER) << "release(" << ind << ")" << std::endl;
+}
+
+/*
+ * Build memory plans using liveness and size of operands
+ * 1. Build inference graph at claim
+ *   - Two operands interfere if they have overlapped live range
+ * 2. Sort operands in descending order of size
+ *   - Use std::multimap to sort operands
+ * 3. Allocate memory block for sorted operands
+ *   - Find free memory block which does not overlap with interfered operands
+ */
+void WICPlanner::buildMemoryPlans()
+{
+  for (const auto &operand : _operands)
+  {
+    uint32_t size = operand.first;
+    const ir::OperandIndex &ind = operand.second;
+    VERBOSE(WIC_PLANNER) << "build_plan(" << ind << "): [" << size << "sz]" << std::endl;
+
+    uint32_t next_offset = 0;
+    if (_interference_graph.count(ind))
+    {
+      // Find interfered memory plans and sort them by offset
+      std::multimap<uint32_t, uint32_t> interfered_plans;
+      for (const auto &interference : _interference_graph[ind])
+      {
+        if (_mem_plans.count(interference))
+          interfered_plans.emplace(_mem_plans[interference].offset, _mem_plans[interference].size);
+      }
+
+      // Find free memory block in first-fit manner
+      for (const auto &interfered_plan : interfered_plans)
+      {
+        auto claimed_base_offset = interfered_plan.first;
+        auto claimed_size = interfered_plan.second;
+        VERBOSE(WIC_PLANNER) << "interfere : [+" << claimed_base_offset << ", " << claimed_size
+                             << "sz]" << std::endl;
+        if (next_offset + size <= claimed_base_offset)
+        {
+          break;
+        }
+        else if (next_offset < claimed_base_offset + claimed_size)
+        {
+          next_offset = claimed_base_offset + claimed_size;
+        }
+      }
+    }
+    else
+    {
+      VERBOSE(WIC_PLANNER) << "No interference" << std::endl;
+    }
+
+    _mem_plans[ind] = {next_offset, size};
+    VERBOSE(WIC_PLANNER) << "alloc(" << ind << "): [+" << next_offset << ", " << size << "sz]"
+                         << std::endl;
+
+    if (_capacity < next_offset + size)
+    {
+      _capacity = next_offset + size;
+    }
+  }
+  _initialized = true;
+  _interference_graph.clear();
+  _operands.clear();
+}
+
+WICPlanner::MemoryPlans &WICPlanner::memory_plans()
+{
+  if (!_initialized)
+    buildMemoryPlans();
+  return _mem_plans;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlanner.h b/runtime/onert/core/src/backend/basic/MemoryPlanner.h
new file mode 100644
index 000000000..661d0b5d9
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        MemoryPlanner.h
+ * @brief       This file contains Memory Planning related classes
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
+#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
+
+#include <map>
+#include <vector>
+#include <unordered_set>
+#include <memory>
+
+#include "backend/basic/Allocator.h"
+#include "backend/basic/IMemoryPlanner.h"
+#include "ir/OperandIndexMap.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+/**
+ * @brief Class to plan memory by bump way
+ */
+class BumpPlanner : public IMemoryPlanner
+{
+public:
+  /**
+   * @brief Claim memory for operand by bump way
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  void claim(const ir::OperandIndex &, size_t) override;
+  /**
+   * @brief Release memory for operand by bump way
+   * @param[in] index The operand index
+   */
+  void release(const ir::OperandIndex &) override;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  uint32_t capacity() override { return _capacity; }
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+  uint32_t _capacity = 0;
+  MemoryPlans _mem_plans;
+};
+
+/**
+ * @brief Class to plan memory by firstfit way
+ */
+class FirstFitPlanner : public IMemoryPlanner
+{
+public:
+  /**
+   * @brief Claim memory for operand by firstfit way
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  void claim(const ir::OperandIndex &, size_t) override;
+  /**
+   * @brief Release memory for operand by firstfit way
+   * @param[in] index The operand index
+   */
+  void release(const ir::OperandIndex &) override;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  uint32_t capacity() override { return _capacity; }
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  MemoryPlans &memory_plans() override { return _mem_plans; }
+
+private:
+  uint32_t _capacity = 0;
+  MemoryPlans _mem_plans;
+  // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
+  std::map<uint32_t, ir::OperandIndex> _claim_table;
+};
+
+/**
+ * @brief Class to plan memory by Weighted Interval Color algorithm
+ */
+class WICPlanner : public IMemoryPlanner
+{
+public:
+  WICPlanner();
+
+  /**
+   * @brief Claim memory for operand by WIC algorithm
+   * @param[in] index The operand index
+   * @param[in] size The size of the memory
+   */
+  void claim(const ir::OperandIndex &, size_t) override;
+  /**
+   * @brief Release memory for operand by WIC algorithm
+   * @param[in] index The operand index
+   */
+  void release(const ir::OperandIndex &) override;
+  /**
+   * @brief Get capacity for memory planning
+   * @return The value of capacity
+   */
+  uint32_t capacity() override
+  {
+    if (!_initialized)
+      buildMemoryPlans();
+    return _capacity;
+  }
+  /**
+   * @brief Get MemoryPlans
+   * @return MemoryPlans
+   */
+  MemoryPlans &memory_plans() override;
+
+private:
+  void buildMemoryPlans();
+
+  bool _initialized;
+  uint32_t _capacity;
+  MemoryPlans _mem_plans;
+  std::unordered_set<ir::OperandIndex> _live_operands;
+  ir::OperandIndexMap<std::vector<ir::OperandIndex>> _interference_graph;
+  // Sort operands by descending order of size
+  std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _operands;
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc
new file mode 100644
index 000000000..a32228cbe
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "MemoryPlanner.h"
+#include "ir/Index.h"
+
+TEST(Allocator, allocate_test)
+{
+  ::onert::backend::basic::Allocator allocator(1024);
+  ASSERT_NE(allocator.base(), nullptr);
+}
+
+TEST(BumpPlanner, claim_test)
+{
+  ::onert::backend::basic::BumpPlanner planner;
+
+  auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.claim(mem_idx, size);
+    auto mem_blk = planner.memory_plans()[mem_idx];
+    ASSERT_EQ(mem_blk.offset, expected_offset);
+    ASSERT_EQ(mem_blk.size, size);
+  };
+
+  claim(0, 10, 0);
+  claim(1, 20, 10);
+  claim(2, 30, 30);
+}
+
+TEST(FirstFitPlanner, claim_release_test)
+{
+  ::onert::backend::basic::FirstFitPlanner planner;
+
+  auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.claim(mem_idx, size);
+    auto mem_blk = planner.memory_plans()[mem_idx];
+    ASSERT_EQ(mem_blk.offset, expected_offset);
+    ASSERT_EQ(mem_blk.size, size);
+  };
+
+  auto release = [&planner](uint32_t index) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.release(mem_idx);
+  };
+
+  // 0 CLAIM - 10
+  claim(0, 10, 0);
+
+  // 1 CLAIM - 20
+  claim(1, 20, 10);
+
+  // 2 CLAIM - 30
+  claim(2, 30, 30);
+
+  // 0 RELEASE - 10
+  release(0);
+
+  // 3 CLAIM - 20
+  claim(3, 20, 60);
+
+  // 4 CLAIM - 5
+  claim(4, 5, 0);
+
+  // 5 CLAIM - 10
+  claim(5, 10, 80);
+
+  // 6 CLAIM - 5
+  claim(6, 5, 5);
+
+  // 2 RELEASE - 30
+  release(2);
+
+  // 7 CLAIM - 35
+  claim(7, 35, 90);
+
+  // 8 CLAIM - 10
+  claim(8, 10, 30);
+
+  // 4 RELEASE - 5
+  release(4);
+
+  // 9 CLAIM - 10
+  claim(9, 10, 40);
+
+  // 10 CLAIM - 10
+  claim(10, 10, 50);
+
+  // 6 RELEASE
+  release(6);
+
+  // 1 RELEASE
+  release(1);
+
+  // 8 RELEASE
+  release(8);
+
+  // 9 RELEASE
+  release(9);
+
+  // 10 RELEASE
+  release(10);
+
+  // 3 RELEASE
+  release(3);
+
+  // 5 RELEASE
+  release(5);
+
+  // 7 RELEASE
+  release(7);
+}
+
+TEST(WICPlanner, claim_release_test)
+{
+  ::onert::backend::basic::WICPlanner planner;
+
+  auto claim = [&planner](uint32_t index, size_t size) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.claim(mem_idx, size);
+  };
+
+  auto release = [&planner](uint32_t index) {
+    onert::ir::OperandIndex mem_idx(index);
+    planner.release(mem_idx);
+  };
+
+  auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) {
+    onert::ir::OperandIndex mem_idx(index);
+    auto mem_blk = planner.memory_plans()[mem_idx];
+    ASSERT_EQ(mem_blk.offset, expected_offset);
+    ASSERT_EQ(mem_blk.size, size);
+  };
+
+  auto capacity = [&planner](uint32_t expected_capacity) {
+    auto actual_capacity = planner.capacity();
+    ASSERT_EQ(actual_capacity, expected_capacity);
+  };
+
+  claim(0, 20);
+  claim(1, 5);
+  release(0);
+  claim(2, 10);
+  release(1);
+  claim(3, 10);
+  release(2);
+  claim(4, 10);
+  release(3);
+  claim(5, 20);
+  release(4);
+  claim(6, 20);
+  release(5);
+  release(7);
+
+  // VERIFY 0 - 0
+  verify(0, 20, 0);
+
+  // VERIFY 1 - 20
+  verify(1, 5, 20);
+
+  // VERIFY 2 - 0
+  verify(2, 10, 0);
+
+  // VERIFY 3 - 10
+  verify(3, 10, 10);
+
+  // VERIFY 4 - 20
+  verify(4, 10, 20);
+
+  // VERIFY 5 - 0
+  verify(5, 20, 0);
+
+  // VERIFY 6 - 20
+  verify(6, 20, 20);
+
+  // CAPACITY - 40
+  capacity(40);
+}
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc
new file mode 100644
index 000000000..e12635359
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MemoryPlannerFactory.h"
+
+#include "MemoryPlanner.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+MemoryPlannerFactory &MemoryPlannerFactory::get()
+{
+  static MemoryPlannerFactory instance;
+  return instance;
+}
+
+IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
+{
+  if (key == "FirstFit")
+  {
+    return new FirstFitPlanner;
+  }
+  else if (key == "Bump")
+  {
+    return new BumpPlanner;
+  }
+  else if (key == "WIC")
+  {
+    return new WICPlanner;
+  }
+  return new FirstFitPlanner; // Default Planner
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h
new file mode 100644
index 000000000..fe32f4c99
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
+#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
+
+#include "backend/basic/IMemoryPlanner.h"
+
+#include <string>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+class MemoryPlannerFactory
+{
+public:
+  static MemoryPlannerFactory &get();
+
+private:
+  MemoryPlannerFactory() = default;
+
+public:
+  IMemoryPlanner *create(const std::string &key);
+};
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
new file mode 100644
index 000000000..71cde4cde
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/StaticTensorManager.h"
+
+#include "backend/basic/DynamicTensorManager.h"
+#include "backend/basic/Tensor.h"
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                                         DynamicTensorManager *dynamic_tensor_manager)
+  : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, _dynamic_tensor_manager{
+                                                         dynamic_tensor_manager}
+{
+  // DO NOTHING
+}
+
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                                         const std::string planner_id,
+                                         DynamicTensorManager *dynamic_tensor_manager)
+  : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg}, _dynamic_tensor_manager{
+                                                                   dynamic_tensor_manager}
+{
+  // DO NOTHING
+}
+
+void StaticTensorManager::allocateNonconsts(void)
+{
+  _nonconst_mgr->allocate();
+
+  for (auto &&pair : _tensors->native_tensors())
+  {
+    const auto &ind = pair.first;
+    auto tensor = pair.second.get();
+    if (!_as_constants[ind] && !tensor->is_dynamic())
+    {
+      auto *buffer = _nonconst_mgr->getBuffer(ind);
+      tensor->setBuffer(buffer);
+
+      VERBOSE(CPU_StaticTensorManager)
+        << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
+    }
+  }
+}
+
+void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
+                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
+                                      bool as_const)
+{
+  assert(!_tensors->getNativeTensor(ind));
+  if (as_const)
+  {
+    auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
+    _tensors->setNativeTensor(ind, std::move(tensor));
+  }
+  else
+  {
+    auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
+                                           _dynamic_tensor_manager->dynamic_mem_mgr().get());
+    _tensors->setNativeTensor(ind, std::move(tensor));
+  }
+  _as_constants[ind] = as_const;
+}
+
+void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+  assert(_tensors->getNativeTensor(ind));
+
+  // This method is called only when a tensor has proper shape
+  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
+
+  if (!_as_constants[ind])
+    _nonconst_mgr->claimPlan(ind, size);
+}
+
+void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
+{
+  assert(_tensors->getNativeTensor(ind));
+
+  // This method is called only when a tensor has proper shape
+  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
+
+  if (!_as_constants[ind])
+    _nonconst_mgr->releasePlan(ind);
+}
+
+void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+  for (const auto &it : _tensors->native_tensors())
+    fn(it.first);
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/Tensor.cc b/runtime/onert/core/src/backend/basic/Tensor.cc
new file mode 100644
index 000000000..de1cff4f4
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/Tensor.cc
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/Tensor.h"
+
+#include "ir/DataType.h"
+#include "backend/basic/MemoryManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+Tensor::~Tensor() {}
+
+size_t Tensor::calcOffset(const ir::Coordinates &coords) const
+{
+  auto shape = getShape();
+  size_t rank = shape.rank();
+  rank = rank == 0 ? 1 : rank;
+  size_t offset = 0;
+  for (size_t i = 0; i < rank; ++i)
+  {
+    auto dim = shape.rank() == 0 ? 1 : shape.dim(i);
+    offset = offset * dim + coords[i];
+  }
+  offset *= sizeOfDataType(data_type());
+  return offset;
+}
+
+void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
+
+bool Tensor::applyShape(const ir::Shape &new_shape)
+{
+  bool previously_dynamic = is_dynamic();
+
+  auto allocTensorMem = [&]() {
+    auto capacity = total_size();
+    assert(_dynamic_mem_mgr);
+    auto alloc = _dynamic_mem_mgr->allocate(this, capacity);
+    setBuffer(alloc);
+  };
+
+  if (!previously_dynamic || buffer() == nullptr)
+  {
+    // Always set shape - when buffer with same size was already allocated, shape could differ
+    setShape(new_shape);
+    set_dynamic();
+    allocTensorMem();
+  }
+  else
+  {
+    auto previous_size = total_size();
+    auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+    if (previous_size != new_size)
+    {
+      assert(_dynamic_mem_mgr);
+      _dynamic_mem_mgr->deallocate(this);
+
+      setShape(new_shape);
+      set_dynamic();
+      allocTensorMem();
+    }
+    else
+    { // when buffer with same size was already allocated, shape could differ
+      setShape(new_shape);
+    }
+  }
+  return true;
+}
+
+ir::Shape Tensor::getShape() const { return _info.shape(); }
+
+void Tensor::deallocBuffer()
+{
+  if (_allocator)
+  {
+    _buffer = nullptr;
+    _allocator.reset();
+    if (_dynamic_mem_mgr)
+    {
+      _dynamic_mem_mgr->deallocate(this);
+    }
+  }
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+// ExternalTensor
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ExternalTensor::~ExternalTensor() {}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc
new file mode 100644
index 000000000..4912af1f5
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/TensorBuilder.cc
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <backend/basic/TensorBuilder.h>
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+  : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
+                             const std::string planner_id)
+  : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout layout)
+{
+  _tensor_info_map.emplace(ind, info);
+
+  // CPU backend supports only one layout as NHWC
+  assert(layout == ir::Layout::NHWC);
+  if (info.isDynamic())
+  {
+    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+  }
+  else
+  {
+    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  const auto &tensor_info = _tensor_info_map.at(ind);
+
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    const auto size = tensor_info.total_size();
+    _static_tensor_mgr->claimPlan(ind, size);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    _static_tensor_mgr->releasePlan(ind);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc b/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc
new file mode 100644
index 000000000..d09604224
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <backend/basic/train/TrainableTensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+namespace train
+{
+
+std::vector<ITensor *> TrainableTensor::optVars()
+{
+  std::vector<ITensor *> ret;
+  for (auto &&e : _opt_vars)
+  {
+    ret.emplace_back(e.get());
+  }
+  return ret;
+}
+
+void TrainableTensor::fillBuffer(const std::shared_ptr<ir::Data> &data)
+{
+  auto *buffer = _tensor.buffer();
+  assert(buffer);
+  assert(total_size() == data->size());
+  std::memcpy(buffer, data->base(), data->size());
+}
+
+} // namespace train
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/Backend.h b/runtime/onert/core/src/backend/builtin/Backend.h
new file mode 100644
index 000000000..c05494a6a
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/Backend.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_H__
+#define __ONERT_BACKEND_BUILTIN_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+#include "TensorBuilder.h"
+#include "Tensor.h"
+#ifdef ONERT_TRAIN
+#include "train/BackendContext.h"
+#include "train/KernelGenerator.h"
+#include "train/TensorRegistry.h"
+#endif // ONERT_TRAIN
+
+#include <backend/Backend.h>
+#ifdef ONERT_TRAIN
+#include <backend/train/ITrainableBackend.h>
+#endif // ONERT_TRAIN
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class Backend : public ::onert::backend::Backend
+#ifdef ONERT_TRAIN
+  ,
+                public backend::train::ITrainableBackend
+#endif // ONERT_TRAIN
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    // ControlFlow backend may not build tensors for itself because the backend's operation uses
+    // tensors of other baceknd instead
+    // But the backend builds tensors in case of that the controlflow operation may have constant
+    // input or that consecutive controflow operations exist. We have to make them not to be built
+    // later
+    // 1. Constant input
+    //   These tensors cannot be dynamic tensor, so let's do it as follows:
+    //   - always skip copying
+    //   - if it is operation's input in child subgraph: register "use" as constant input of the
+    //   operations in child subgraph
+    //   - if it is child subgraph's output: register "use" as constant input of the operations
+    //   using it
+    // 2. Consecutive controflow operation's intermediate tensor
+    //   These tensors can be dynamic tensor and this is complicated to support without copying. But
+    //   there is no such case until now, let's support it later
+    // TODO Remove TensorBuilder and ConstantInitializer
+    // TODO Support Consecutive controflow operation's intermediate tensor
+    auto tr = std::make_shared<TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->kernel_gen = std::make_shared<KernelGenerator>(
+      *context->graph(), tb->dynamicTensorManager(), tr, context->external_context());
+    return context;
+  }
+
+#ifdef ONERT_TRAIN
+  std::unique_ptr<backend::train::TrainableBackendContext>
+  newContext(backend::train::TrainableContextData &&tdata) const override
+  {
+    const auto &tgraph = *tdata.tgraph;
+    auto tr = std::make_shared<train::TensorRegistry>();
+    // TODO Create TensorBuilder if necessary
+    auto tdata_ptr = std::make_unique<backend::train::TrainableContextData>(std::move(tdata));
+    auto context = std::make_unique<train::BackendContext>(this, std::move(tdata_ptr), tr);
+
+    context->kernel_gen =
+      std::make_shared<train::KernelGenerator>(tgraph, tr, context->external_context());
+    return context;
+  }
+#endif // ONERT_TRAIN
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_BACKEND_H__
diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.cc b/runtime/onert/core/src/backend/builtin/BackendContext.cc
new file mode 100644
index 000000000..573617e28
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/BackendContext.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "KernelGenerator.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap ret;
+
+  for (auto &&op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  basic::initConsts(*this);
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph *>(graph())->operands().iterate(
+    [&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &&it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.h b/runtime/onert/core/src/backend/builtin/BackendContext.h
new file mode 100644
index 000000000..93e825239
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/BackendContext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen},
+      _external_context(std::make_shared<ExternalContext>())
+  {
+  }
+
+  ITensorRegistry *genTensors() override;
+
+  FunctionMap genKernels() override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  void planTensors(const std::vector<onert::ir::OperationIndex> &order,
+                   const compiler::GraphLowerInfo &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/builtin/Config.cc b/runtime/onert/core/src/backend/builtin/Config.cc
new file mode 100644
index 000000000..e5f6d4c21
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/Config.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+std::string Config::ID = "builtin";
+
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout frontend_layout)
+{
+  return frontend_layout;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/Config.h b/runtime/onert/core/src/backend/builtin/Config.h
new file mode 100644
index 000000000..196b299d3
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/Config.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_CONFIG_H__
+#define __ONERT_BACKEND_BUILTIN_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class Config : public IConfig
+{
+public:
+  static std::string ID;
+  std::string id() override { return ID; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return false; }
+  bool supportDynamicTensor() override
+  {
+    // TODO Make this backend to support dynamic tensor or not to build non-constant tensor
+    return true;
+  }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_CONFIG_H__
diff --git a/runtime/onert/core/src/backend/builtin/ConstantInitializer.h b/runtime/onert/core/src/backend/builtin/ConstantInitializer.h
new file mode 100644
index 000000000..6b8eb3e9d
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
+
+#include <backend/basic/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using ConstantInitializer = basic::ConstantInitializer;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h b/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h
new file mode 100644
index 000000000..148948a9c
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
+
+#include "TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/basic/DynamicTensorManager.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using DynamicTensorManager = basic::DynamicTensorManager;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h
new file mode 100644
index 000000000..390dbb579
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+
+#include <ruy/context.h>
+#include <ruy/context_get_ctx.h>
+#include <ruy/ctx.h>
+#include <ruy/tune.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+// TODO Unify this with cpu::ExternalContext
+class ExternalContext
+{
+private:
+  static const int kDefaultNumThreadpoolThreads = 1;
+
+public:
+  ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
+  {
+    setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+    initPerThreadState();
+  }
+
+  void setMaxNumThreads(int max_num_threads)
+  {
+    const int target_num_threads =
+      max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+    _ruy_context->set_max_num_threads(target_num_threads);
+  }
+
+  ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+  void initPerThreadState()
+  {
+    // Initialize per-thread state.
+    const int thread_count = _ruy_context->max_num_threads();
+    auto ctx = ruy::get_ctx(_ruy_context.get());
+    ctx->EnsureThreadSpecificResources(thread_count);
+    for (int i = 0; i < thread_count; i++)
+    {
+      ctx->GetThreadSpecificTuningResolver(i)->SetTuning(ctx->explicit_tuning());
+    }
+  }
+
+private:
+  const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.cc b/runtime/onert/core/src/backend/builtin/IOTensor.cc
new file mode 100644
index 000000000..f7f4a6977
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOTensor.h"
+
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+IOTensor::~IOTensor() {}
+
+IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
+  : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
+{
+  setUserTensor(nullptr, 0);
+}
+
+void IOTensor::setTensor(IPortableTensor *tensor)
+{
+  assert(tensor);
+  assert(tensor != this);
+  // TODO Handle when layout was changed
+  assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
+  _user_tensor.reset();
+  _tensor = tensor;
+}
+
+void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
+{
+  _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
+  _tensor = _user_tensor.get();
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.h b/runtime/onert/core/src/backend/builtin/IOTensor.h
new file mode 100644
index 000000000..d94ed0bca
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
+
+#include "backend/IPortableTensor.h"
+#include "UserTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor object that indirects to the tensor it is pointing to.
+ *
+ * A model I/O tensor could be two types.
+ *
+ * 1. @c UserTensor, if it is the primary graph
+ * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
+ *
+ * To support these, this object indirects everything to the actual tensor pointer.
+ * Exceptionally if it is UserTensor, this class creates and manages it.
+ */
+class IOTensor : public IPortableTensor
+{
+public:
+  IOTensor(const ir::OperandInfo &info, ir::Layout layout);
+  ~IOTensor();
+
+public:
+  void setTensor(IPortableTensor *tensor);
+  void setUserTensor(uint8_t *buffer, size_t size);
+  const ir::OperandInfo &orig_info() const { return _orig_info; }
+  ir::Layout orig_layout() const { return _orig_layout; }
+
+public:
+  uint8_t *buffer() const override { return _tensor->buffer(); }
+  size_t total_size() const override { return _tensor->total_size(); }
+  size_t calcOffset(const ir::Coordinates &coords) const override
+  {
+    return _tensor->calcOffset(coords);
+  }
+  ir::Layout layout() const override { return _tensor->layout(); }
+  ir::DataType data_type() const override { return _tensor->data_type(); }
+  bool is_dynamic() const override
+  {
+    return _is_dynamic || _orig_info.isDynamic() || (_tensor && _tensor->is_dynamic());
+  }
+  void set_dynamic() override { _is_dynamic = true; }
+  ir::Shape getShape() const override { return _tensor->getShape(); }
+  void setShape(const ir::Shape &shape) override
+  {
+    // Workaround for IPortableTensor holds _info as its member
+    _info.shape(shape);
+    _tensor->setShape(shape);
+  }
+  bool is_constant() const override { return _tensor->is_constant(); }
+  bool applyShape(const ir::Shape &shape) override
+  {
+    // Workaround for IPortableTensor holds _info as its member
+    _info.shape(shape);
+    return _tensor->applyShape(shape);
+  }
+
+public:
+  void setShapeOfIPortableTensor(const ir::Shape &shape) { _info.shape(shape); }
+
+private:
+  const ir::OperandInfo _orig_info;
+  const ir::Layout _orig_layout;
+  bool _is_dynamic{false};
+  IPortableTensor *_tensor{nullptr};        //< The actual tensor that is indirected
+  std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
new file mode 100644
index 000000000..00c200a92
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "kernel/IfLayer.h"
+#include "kernel/PermuteLayer.h"
+#include "kernel/WhileLayer.h"
+
+#include "exec/FunctionSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
+                                 const std::shared_ptr<TensorRegistry> &tensor_reg,
+                                 const std::shared_ptr<ExternalContext> &external_context)
+  : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
+    _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _model_index{},
+    _external_context{external_context}
+{
+  UNUSED_RELEASE(_graph);
+  UNUSED_RELEASE(_tensor_registries);
+  UNUSED_RELEASE(_executors);
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  assert(_dyn_tensor_manager);
+  assert(_tensor_reg);
+
+  auto ret = std::make_unique<exec::FunctionSequence>();
+
+  // Prepare to handle dynamic tensors later
+  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+  {
+    dyn_ctx->op = &_graph.operations().at(ind);
+    dyn_ctx->dynamic_shape_inferer =
+      std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
+  }
+  ret->dynamic_tensor_ctx(dyn_ctx);
+
+  auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  assert(_return_fn); // _return_fn must have been generated
+  ret->append(std::move(_return_fn));
+
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::If &node)
+{
+  const auto then_subg_index = node.param().then_subg_index;
+  const auto else_subg_index = node.param().else_subg_index;
+
+  std::vector<backend::IPortableTensor *> input_tensors;
+  for (const auto &input_index : node.getInputs())
+  {
+    auto input_tensor = getPortableTensor(input_index);
+    input_tensors.emplace_back(input_tensor);
+  }
+
+  std::vector<backend::IPortableTensor *> output_tensors;
+  for (const auto &output_index : node.getOutputs())
+  {
+    auto output_tensor = getPortableTensor(output_index);
+    output_tensors.emplace_back(output_tensor);
+  }
+
+  // IfLayer just set Executors instead of then and else executor to avoid complexity of
+  // creating executor recusively
+  const auto cond_tensor = input_tensors.front();
+  input_tensors.erase(input_tensors.begin());
+  auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
+    cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors,
+    _model_index, _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Permute &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  // Add PermuteLayer
+  std::vector<ITensor *> output_tensors{getTensor(output_index)};
+  std::vector<ITensor *> input_tensors{getTensor(input_index)};
+
+  auto fn =
+    std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, _external_context);
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::While &node)
+{
+  const auto cond_subg_index = node.param().cond_subg_index;
+  const auto body_subg_index = node.param().body_subg_index;
+
+  // This op does not support input as a constant, because builtin backend does not have
+  // TensorBuilder
+  std::vector<backend::IPortableTensor *> input_tensors;
+  for (const auto &input_index : node.getInputs())
+  {
+    auto input_tensor = getPortableTensor(input_index);
+    input_tensors.emplace_back(input_tensor);
+  }
+
+  std::vector<backend::IPortableTensor *> output_tensors;
+  for (const auto &output_index : node.getOutputs())
+  {
+    auto output_tensor = getPortableTensor(output_index);
+    output_tensors.emplace_back(output_tensor);
+  }
+
+  // WhileLayer just set Executors instead of cond and body executor to avoid complexity of
+  // creating executor recusively
+  auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
+    input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors, _model_index,
+    _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
+{
+  // get Tensor from all tensor registries (for Permute op)
+  auto ret = _tensor_registries.getITensor(index);
+  assert(ret != nullptr);
+  return ret;
+}
+
+backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
+{
+  auto ret = _tensor_reg->getPortableTensor(index);
+  assert(ret != nullptr);
+  return ret;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
new file mode 100644
index 000000000..3c86fe306
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
+
+#include "DynamicTensorManager.h"
+#include "ExternalContext.h"
+#include "TensorRegistry.h"
+#include "../../compiler/TensorRegistries.h"
+
+#include "backend/basic/KernelGeneratorBase.h"
+#include "exec/IExecutors.h"
+#include "ir/Graph.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
+                  const std::shared_ptr<TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
+  {
+    _tensor_registries = tensor_registries;
+  }
+  void setExecutors(const std::shared_ptr<exec::IExecutors> &executors)
+  {
+    // FIXME Using shared_ptr's raw pointer!
+    _executors = executors.get();
+  }
+
+  void setModelIndex(const ir::ModelIndex &index) { _model_index = index; }
+
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+
+private:
+  void visit(const ir::operation::If &) override;
+  void visit(const ir::operation::Permute &) override;
+  void visit(const ir::operation::While &) override;
+
+private:
+  backend::ITensor *getTensor(const ir::OperandIndex &index);
+  backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
+
+private:
+  DynamicTensorManager *_dyn_tensor_manager;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  compiler::TensorRegistries _tensor_registries;
+  exec::IExecutors *_executors;
+  ir::ModelIndex _model_index;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/Tensor.h b/runtime/onert/core/src/backend/builtin/Tensor.h
new file mode 100644
index 000000000..d55e64161
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/Tensor.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_H__
+
+#include <backend/basic/Tensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.cc b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc
new file mode 100644
index 000000000..a2f7af3ea
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+  : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
+    _static_tensor_mgr{
+      new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout backend_layout)
+{
+  _tensor_info_map.emplace(ind, info);
+
+  VERBOSE_F() << "cpucommon REGISTER!! " << ind << std::endl;
+  if (info.isDynamic())
+  {
+    _dynamic_tensor_mgr->buildTensor(ind, info, backend_layout);
+  }
+  else
+  {
+    _static_tensor_mgr->buildTensor(ind, info, backend_layout, info.isConstant());
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+    return;
+
+  const auto &tensor_info = _tensor_info_map.at(ind);
+
+  if (!nativeOwnTensorAt(ind)->is_dynamic())
+  {
+    const auto size = tensor_info.total_size();
+    _static_tensor_mgr->claimPlan(ind, size);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+    return;
+
+  if (!nativeOwnTensorAt(ind)->is_dynamic())
+  {
+    _static_tensor_mgr->releasePlan(ind);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  // User tensors are not registered in _tensor_info_map but objects for them are exist
+  // in the tensor registry.
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_reg->getITensor(ind))
+    return true;
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
+{
+  return _dynamic_tensor_mgr.get();
+}
+
+basic::Tensor *TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
+{
+  return _tensor_reg->getNativeOwnTensor(ind);
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.h b/runtime/onert/core/src/backend/builtin/TensorBuilder.h
new file mode 100644
index 000000000..1e364c927
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
+
+#include <backend/basic/StaticTensorManager.h>
+#include <backend/basic/TensorRegistry.h>
+#include <backend/basic/Tensor.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include <unordered_map>
+
+#include "DynamicTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
+
+  /**
+   * @brief     Register tensor information to allocate on CPU backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void allocate(void);
+
+  DynamicTensorManager *dynamicTensorManager(void);
+
+  /**
+   * @brief Get tensor with a specific OperandIndex.
+   * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
+   *        If not, program will crash with assert or exception.
+   * @return operand::Tensor *
+   */
+  basic::Tensor *nativeOwnTensorAt(const ir::OperandIndex &ind);
+
+private:
+  const std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<basic::StaticTensorManager> _static_tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
diff --git a/runtime/onert/core/src/backend/builtin/TensorRegistry.h b/runtime/onert/core/src/backend/builtin/TensorRegistry.h
new file mode 100644
index 000000000..ae68b1318
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/TensorRegistry.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
+
+#include "backend/basic/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "IOTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor registry class for builtin backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeIOTensor  - @c IOTensor managed by this backend ( in @c _base_reg )
+ *     - NOTE The tensor it actually points to can be from another backend
+ * - NativeOwnTensor - @c basic::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor   - @c IPortableTensor managed by other backends
+ *
+ * @note @c _base_reg is used in implementation to reuse @c basic::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+  TensorRegistry() : _base_reg{new basic::TensorRegistry} {}
+
+  ITensor *getITensor(const ir::OperandIndex &ind) override
+  {
+    auto base_tensor = _base_reg->getITensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(ind);
+  }
+
+  ITensor *getNativeITensor(const ir::OperandIndex &ind) override
+  {
+    auto base_tensor = _base_reg->getNativeITensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(ind);
+  }
+
+  IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
+  {
+    auto base_tensor = _base_reg->getPortableTensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(ind);
+  }
+
+  IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
+  {
+    auto base_tensor = _base_reg->getNativeTensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(ind);
+  }
+
+  Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
+  {
+    return _base_reg->getNativeTensor(ind);
+  }
+
+  IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
+  {
+    auto tensor = _native_io_tensors.find(ind);
+    if (tensor != _native_io_tensors.end())
+      return tensor->second.get();
+    return nullptr;
+  }
+
+  bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _base_reg->setMigrantTensor(ind, tensor);
+    return true;
+  }
+
+  void setNativeOwnTensor(ir::OperandIndex ind, std::unique_ptr<Tensor> &&tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _base_reg->setNativeTensor(ind, std::move(tensor));
+  }
+
+  void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _native_io_tensors[ind] = std::move(tensor);
+  }
+
+  const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
+  {
+    return _native_io_tensors;
+  }
+  std::shared_ptr<basic::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+  std::shared_ptr<basic::TensorRegistry> _base_reg;
+  ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/builtin/UserTensor.cc b/runtime/onert/core/src/backend/builtin/UserTensor.cc
new file mode 100644
index 000000000..f0b00b928
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/UserTensor.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UserTensor.h"
+
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
+{
+  size_t rank = getShape().rank();
+  size_t offset = 0;
+  for (size_t i = 0; i < rank; ++i)
+  {
+    offset = offset * getShape().dim(i) + coords[i];
+  }
+  offset *= sizeOfDataType(data_type());
+  return offset;
+}
+
+bool UserTensor::applyShape(const ir::Shape &new_shape)
+{
+  // User tensors cannot be reallocated.
+  auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+  if (total_size() < new_size)
+    throw InsufficientBufferSizeException{"User given buffer size is too small."};
+  setShape(new_shape);
+  return true;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/UserTensor.h b/runtime/onert/core/src/backend/builtin/UserTensor.h
new file mode 100644
index 000000000..0d0ed73c5
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/UserTensor.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
+
+#include "ir/OperandInfo.h"
+#include "backend/IPortableTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor object that is for Input and Output tensors from the user.
+ *
+ * This class is a wrapped buffer that is allocated by the user. So it does not have resposibility
+ * on allocation nor deallocation. All the model input/output tensors are wrapped with this class
+ * for execution.
+ *
+ */
+class UserTensor : public IPortableTensor
+{
+public:
+  UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
+    : IPortableTensor{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
+  {
+  }
+
+  UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
+  {
+  }
+
+public:
+  void setBuffer(uint8_t *buffer, size_t size)
+  {
+    _buffer = buffer;
+    _size = size;
+  }
+
+public:
+  uint8_t *buffer() const override { return _buffer; }
+  size_t total_size() const override { return _size; }
+  size_t calcOffset(const ir::Coordinates &coords) const override;
+  ir::Layout layout() const override { return _layout; }
+  ir::DataType data_type() const override { return _info.typeInfo().type(); }
+  bool is_dynamic() const override { return _dynamic; }
+  void set_dynamic() override { _dynamic = true; }
+  ir::Shape getShape() const override { return _info.shape(); }
+  void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
+  bool is_constant() const override { return false; }
+  bool applyShape(const ir::Shape &) override;
+
+private:
+  ir::Layout _layout;
+  uint8_t *_buffer;
+  size_t _size;
+  bool _dynamic;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
new file mode 100644
index 000000000..51bc5a8f2
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IfLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
+                 const std::vector<backend::IPortableTensor *> input_tensors,
+                 const std::vector<backend::IPortableTensor *> output_tensors,
+                 const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
+                 exec::IExecutors *executors, const ir::ModelIndex &model_index,
+                 const std::shared_ptr<ExternalContext> &external_context)
+  : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
+    _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors},
+    _model_index{model_index}, _external_context{external_context}
+{
+  // At this point, executors may not have executors of then subg and else subg
+}
+
+void IfLayer::run()
+{
+  // Check condition
+  // // If true
+  // // // Set _input_tensors -> then-subg's inputs
+  // // // Set outputs of then-subg -> _output_tensors
+  // // // Run then-subg
+  // // Else
+  // // // Set _input_tensors -> else-subg's inputs
+  // // // Set outputs of else-subg -> _output_tensors
+  // // // Run else-subg
+
+  auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
+    bool ret = false;
+    tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
+    return ret;
+  };
+
+  exec::IExecutor *subg_exec = nullptr;
+  bool cond_result = getResultCond(_cond_tensor);
+  if (cond_result)
+  {
+    VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
+    subg_exec = _executors->at(_model_index, _then_subg_index);
+  }
+  else
+  {
+    VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
+    subg_exec = _executors->at(_model_index, _else_subg_index);
+  }
+
+  subg_exec->execute(_input_tensors, _output_tensors);
+  VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
+              << std::endl;
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
new file mode 100644
index 000000000..8f639ced9
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include <exec/IExecutors.h>
+#include "../ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class IfLayer : public ::onert::exec::IFunction
+{
+public:
+  IfLayer(backend::IPortableTensor *cond_tensor,
+          const std::vector<backend::IPortableTensor *> input_tensors,
+          const std::vector<backend::IPortableTensor *> output_tensors,
+          const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
+          exec::IExecutors *executors, const ir::ModelIndex &model_index,
+          const std::shared_ptr<ExternalContext> &external_context);
+
+public:
+  void run() override;
+
+private:
+  backend::IPortableTensor *_cond_tensor;
+  const std::vector<backend::IPortableTensor *> _input_tensors;
+  const std::vector<backend::IPortableTensor *> _output_tensors;
+  const ir::SubgraphIndex _then_subg_index;
+  const ir::SubgraphIndex _else_subg_index;
+  exec::IExecutors *_executors;
+  ir::ModelIndex _model_index;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
new file mode 100644
index 000000000..600180077
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -0,0 +1,316 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+#include "../../../exec/ShapeConverter.h"
+
+#include <ruy/context.h> // from @ruy
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
+                           const std::vector<ITensor *> &dst_tensors,
+                           const std::shared_ptr<ExternalContext> &external_context)
+  : _external_context{external_context}, _tasks_map{}
+{
+  assert(src_tensors.size() == dst_tensors.size());
+  _src_tensors = src_tensors;
+  _dst_tensors = dst_tensors;
+  _src_tensors_offsets.resize(src_tensors.size());
+  _dst_tensors_offsets.resize(dst_tensors.size());
+}
+
+void PermuteLayer::optimize()
+{
+  // Remove copying of tensor as nullptr
+  auto src_it = _src_tensors.begin();
+  auto dst_it = _dst_tensors.begin();
+  auto src_offsets_it = _src_tensors_offsets.begin();
+  auto dst_offsets_it = _dst_tensors_offsets.begin();
+  while (src_it != _src_tensors.end())
+  {
+    if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
+    {
+      src_it = _src_tensors.erase(src_it);
+      dst_it = _dst_tensors.erase(dst_it);
+      src_offsets_it = _src_tensors_offsets.erase(src_offsets_it);
+      dst_offsets_it = _dst_tensors_offsets.erase(dst_offsets_it);
+    }
+    else
+    {
+      auto src = *src_it;
+      auto dst = *dst_it;
+      src_offsets_it->resize(0);
+      dst_offsets_it->resize(0);
+      if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
+        continue;
+      const auto permute_type = [&]() -> PermuteType {
+        if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NHWC &&
+            dst->layout() == ir::Layout::NCHW)
+        {
+          return PermuteType::NHWC_TO_NCHW;
+        }
+        else if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NCHW &&
+                 dst->layout() == ir::Layout::NHWC)
+        {
+          return PermuteType::NCHW_TO_NHWC;
+        }
+        else
+        {
+          return PermuteType::COPY;
+        }
+      }();
+
+      // TODO Support different types
+      auto fn = [&](backend::ITensor &src_tensor) {
+        dst->access([&](backend::ITensor &dst_tensor) {
+          // NOTE The buffer of both tensor can be nullptr in this step
+          const auto data_size = ir::sizeOfDataType(src_tensor.data_type());
+
+          if (permute_type == PermuteType::COPY)
+          {
+            if ((!src_tensor.has_padding() && !dst_tensor.has_padding()))
+            {
+              const auto num_elements = src_tensor.getShape().num_elements();
+              const int thread_count =
+                _external_context->ruy_context()->max_num_threads() < static_cast<int>(num_elements)
+                  ? _external_context->ruy_context()->max_num_threads()
+                  : num_elements;
+
+              std::vector<PermuteWorkerTask> tasks;
+              auto start = 0;
+              for (auto i = 0; i < thread_count; ++i)
+              {
+                int end = start + (num_elements - start) / (thread_count - i);
+                tasks.emplace_back(src_tensor.buffer(), dst_tensor.buffer(), start * data_size,
+                                   start * data_size, (end - start) * data_size);
+                start = end;
+              }
+              assert(tasks.size() >= 1);
+              _tasks_map[src] = std::move(tasks);
+            }
+            else
+            {
+              auto loop_shape = src_tensor.getShape();
+
+              auto copy_axis = loop_shape.rank() - 1;
+              copy_axis = copy_axis < 0 ? 1 : copy_axis;
+              const auto copy_len = loop_shape.dim(copy_axis) * data_size;
+              loop_shape.dim(copy_axis) = 1;
+
+              appendPermuteTasks(src, dst, loop_shape, copy_len);
+            }
+          }
+          else
+          {
+            assert(src_tensor.getShape().rank() == 4 &&
+                   (permute_type == PermuteType::NHWC_TO_NCHW ||
+                    permute_type == PermuteType::NCHW_TO_NHWC));
+            const auto loop_shape = src_tensor.getShape();
+            const auto copy_len = data_size;
+
+            appendPermuteTasks(src, dst, loop_shape, copy_len);
+          }
+        });
+      };
+      src->access(fn);
+      src_it++;
+      dst_it++;
+      src_offsets_it++;
+      dst_offsets_it++;
+    }
+  }
+}
+
+void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
+                                      const ir::Shape &loop_shape, size_t size)
+{
+  size_t distributed_dim = 0;
+  auto src_shape = src_tensor->getShape();
+  if (src_tensor->layout() == dst_tensor->layout())
+  {
+    for (int i = 1; i < src_shape.rank() - 1; ++i)
+    {
+      distributed_dim = src_shape.dim(distributed_dim) < src_shape.dim(i) ? i : distributed_dim;
+    }
+  }
+  const auto distributed_dim_val = src_shape.dim(distributed_dim);
+  const int thread_count =
+    _external_context->ruy_context()->max_num_threads() < static_cast<int>(distributed_dim_val)
+      ? _external_context->ruy_context()->max_num_threads()
+      : distributed_dim_val;
+  // NOTE Do not remove this assertion. It would cause performance degradation by new threads to be
+  // created in the context's thread pool
+  assert(thread_count <= _external_context->ruy_context()->max_num_threads());
+
+  std::vector<PermuteWorkerTask> tasks;
+  int start = 0;
+  auto one_thread_loop_shape = loop_shape;
+  for (auto i = 0; i < thread_count; ++i)
+  {
+    ir::Coordinates start_coords(one_thread_loop_shape.rank());
+    start_coords.set(distributed_dim, start);
+    int end = start + (distributed_dim_val - start) / (thread_count - i);
+    one_thread_loop_shape.dim(distributed_dim) = end - start;
+    tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
+    start = end;
+  }
+  assert(tasks.size() >= 1);
+  _tasks_map[src_tensor] = std::move(tasks);
+}
+
+void PermuteLayer::runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer)
+{
+  assert(src->getShape().num_elements() * ir::sizeOfDataType(src->data_type()) <=
+         src->total_size());
+  std::vector<PermuteWorkerTask> &tasks = _tasks_map.at(src);
+  for (size_t i = 0; i < tasks.size(); ++i)
+  {
+    tasks.at(i).setBuffers(src->buffer(), dst_buffer);
+  }
+  assert(tasks.size() >= 1);
+  _external_context->ruy_context()->mutable_thread_pool()->Execute(tasks.size(), tasks.data());
+}
+
+void PermuteLayer::run()
+{
+  assert(_src_tensors.size() == _dst_tensors.size());
+  // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
+  // reasons:
+  // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
+  // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
+  // subgraphs(with other backends)
+  // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
+  // reasons
+
+  // check if output is not dynamic
+  for (size_t i = 0; i < _src_tensors.size(); ++i)
+  {
+    auto dst_tensor = _dst_tensors.at(i);
+    auto src_tensor = _src_tensors.at(i);
+    if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
+    {
+      // getting output shape
+      auto src_shape = src_tensor->getShape();
+
+      // set output shape and output buffer
+      ir::Shape new_shape =
+        exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
+
+      try
+      {
+        if (!dst_tensor->applyShape(new_shape))
+          throw std::runtime_error{
+            "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+        assert(dst_tensor->buffer() != nullptr);
+      }
+      catch (const std::out_of_range &e)
+      {
+        std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
+                     "dynamic tensor"
+                  << '\n';
+        throw;
+      }
+    }
+    assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
+           dst_tensor->getShape());
+  }
+  assert(_src_tensors.size() == _dst_tensors.size());
+  assert(_src_tensors.size() == _src_tensors_offsets.size());
+  assert(_dst_tensors.size() == _dst_tensors_offsets.size());
+  auto src_it = _src_tensors.begin();
+  auto dst_it = _dst_tensors.begin();
+  auto src_offsets_it = _src_tensors_offsets.begin();
+  auto dst_offsets_it = _dst_tensors_offsets.begin();
+  while (src_it != _src_tensors.end())
+  {
+    auto src = *src_it;
+    auto dst = *dst_it;
+    auto &src_offsets = *src_offsets_it;
+    auto &dst_offsets = *dst_offsets_it;
+
+    if (src->total_size() == 0)
+    {
+      assert(dst->total_size() == 0);
+    }
+    else
+    {
+      if (src != dst)
+      {
+        // Conditions to run permutation with multithreading
+        // 1. The tasks for multithreathing was created
+        // 2. The tasks's size > 1
+        // 3. Both tensors are not dynamic
+        // 4. Data types of both tensors are different
+        if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
+            src->is_dynamic() || dst->is_dynamic() ||
+            underlying_type(src->data_type()) != underlying_type(dst->data_type()))
+        {
+          permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets);
+        }
+        // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
+        else if (dst->needMemoryMap() && !dst->is_subtensor())
+        {
+          if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
+          {
+            // This is more effective than multi-threading
+            src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
+          }
+          else
+          {
+            // TODO Optimize this block in case of that padding size of dst is big.
+            _buffers_map[dst].reserve(dst->total_size());
+            auto dst_buffer = _buffers_map[dst].data();
+
+            src->access([&](backend::ITensor &) { runPermuteTasks(src, dst_buffer); });
+            dst->enqueueWriteBuffer(dst_buffer, false);
+          }
+        }
+        else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
+                 !dst->has_padding() && src->layout() == dst->layout())
+        {
+          // This is more effective than multi-threading
+          assert(!dst->needMemoryMap());
+          dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
+        }
+        else
+        {
+          auto fn = [&](backend::ITensor &) {
+            dst->access([&](backend::ITensor &) { runPermuteTasks(src, dst->buffer()); });
+          };
+          src->access(fn);
+        }
+      }
+    }
+    src_it++;
+    dst_it++;
+    src_offsets_it++;
+    dst_offsets_it++;
+  }
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
new file mode 100644
index 000000000..227e32434
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
+
+#include "../ExternalContext.h"
+#include "../../../exec/IPermuteFunction.h"
+
+#include <ruy/thread_pool.h> // from @ruy
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class PermuteLayer : public onert::exec::IPermuteFunction
+{
+public:
+  PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
+               const std::shared_ptr<ExternalContext> &external_context);
+
+  void optimize() override;
+
+  void run() override;
+
+private:
+  std::shared_ptr<ExternalContext> _external_context;
+
+private:
+  void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
+                          const ir::Shape &loop_shape, size_t size);
+
+  void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);
+
+  struct PermuteWorkerTask : ruy::Task
+  {
+    using Strides = ir::Coordinates;
+
+    PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
+                      const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
+      : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
+        _src_start_offset{src_tensor.calcOffset(start_coords)},
+        _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
+        _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
+        _dst_layout{dst_tensor.layout()}, _is_permutation{true}
+    {
+      // Set strides
+      setStrides(src_tensor, &_src_strides);
+      setStrides(dst_tensor, &_dst_strides);
+
+      _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
+    }
+    // Constructor for a copy
+    PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
+                      uint32_t dst_start_offset, size_t size)
+      : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
+        _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0},
+        _loop_shape{1}, _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
+    {
+      // DO NOTHING
+    }
+    void setBuffers(const uint8_t *src_buffer, uint8_t *dst_buffer)
+    {
+      _src_buffer = src_buffer;
+      _dst_buffer = dst_buffer;
+    }
+    void Run() override
+    {
+      ShapeLoop(_loop_shape, [&](const onert::ir::Coordinates &coords) {
+        size_t src_offset = _src_start_offset;
+        size_t dst_offset = _dst_start_offset;
+        assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
+        ir::Coordinates dst_coords = coords;
+        if (_is_permutation)
+        {
+          dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
+        }
+        for (auto i = 0; i < _loop_shape.rank(); ++i)
+        {
+          assert(coords[i] >= 0 && dst_coords[i] >= 0);
+          src_offset += coords[i] * _src_strides[i];
+          dst_offset += dst_coords[i] * _dst_strides[i];
+        }
+        memcpy(_dst_buffer + dst_offset, _src_buffer + src_offset, _size);
+      });
+    }
+
+  private:
+    void setStrides(const ITensor &tensor, Strides *strides)
+    {
+      auto shape = tensor.getShape();
+      const size_t rank = shape.rank();
+      for (size_t i = 0; i < rank; ++i)
+      {
+        ir::Coordinates no_step(rank), one_step(rank);
+        one_step.set(i, 1);
+        if (shape.dim(i) > 1)
+        {
+          strides->set(i, tensor.calcOffset(one_step) - tensor.calcOffset(no_step));
+        }
+        else
+        {
+          // If dimension value is 0 or 1, the stride of the dimension will be not used
+          // Do not call calcOffset() with coordinate value that is greater than dimension value
+          strides->set(i, 0);
+        }
+        assert((*strides)[i] >= 0);
+      }
+    }
+
+  private:
+    const uint8_t *_src_buffer;
+    uint8_t *_dst_buffer;
+    size_t _src_start_offset;
+    size_t _dst_start_offset;
+    Strides _src_strides;
+    Strides _dst_strides;
+    const ir::Shape _loop_shape;
+    const size_t _size;
+    const ir::Layout _src_layout;
+    const ir::Layout _dst_layout;
+    bool _is_permutation;
+  };
+  std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
new file mode 100644
index 000000000..8b00db468
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WhileLayer.h"
+
+#include "PermuteLayer.h"
+#include "../../../exec/ExecutorBase.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <algorithm>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+                       const std::vector<backend::IPortableTensor *> output_tensors,
+                       const ir::SubgraphIndex &cond_subg_index,
+                       const ir::SubgraphIndex &body_subg_index, exec::IExecutors *executors,
+                       const ir::ModelIndex &model_index,
+                       basic::DynamicMemoryManager *dyn_memory_manager,
+                       const std::shared_ptr<ExternalContext> &external_context)
+  : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
+    _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors},
+    _model_index{model_index}, _dyn_memory_manager{dyn_memory_manager}, _external_context{
+                                                                          external_context}
+{
+  // At this point, executors may not have executors of cond subg and body subg
+}
+
+void WhileLayer::run()
+{
+  // Copy "_input_tensors" -> "cond subg inputs"
+  // Run cond subg
+  // Start loop while output of cond subg is ture
+  // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
+  // outputs" -> "body subg inputs" in the second or more iterations
+  // // Run body subg
+  // // Copy "body subg outputs" -> "cond subg inputs"
+  // // Run cond subg
+  // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
+  // "_dst_tensors"
+  auto cond_exec = _executors->at(_model_index, _cond_subg_index);
+  auto body_exec = _executors->at(_model_index, _body_subg_index);
+
+  // Need a temp tensor to hold the cond subgraph output
+  assert(cond_exec->getOutputTensors().size() == 1);
+  auto cond_output_tensor = [&]() {
+    auto cond_output = cond_exec->getOutputTensors().at(0);
+    auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
+                                           _dyn_memory_manager);
+    tensor->set_dynamic();
+    tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+    return tensor;
+  }();
+
+  VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
+  cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
+  VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
+
+  auto getResultCond = [](backend::ITensor *tensor) -> bool {
+    bool ret = false;
+    tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
+    return ret;
+  };
+
+  std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
+  std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
+  // Copying body inputs to outputs when the loop body is never executed
+  if (!getResultCond(cond_output_tensor.get()))
+  {
+    PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
+    copy_body_inputs_to_op_outputs.run();
+    return;
+  }
+
+  // Need some temp tensors to hold the body subgraph output
+  std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
+  std::vector<IPortableTensor *> temp_outputs;
+  for (auto &&io_tensor : body_exec->getOutputTensors())
+  {
+    auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
+                                           _dyn_memory_manager);
+    tensor->set_dynamic();
+    tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+    temp_outputs.push_back(tensor.get());
+    temp_outputs_o.push_back(std::move(tensor));
+  }
+
+  std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
+  PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
+
+  const auto body_execute_with_op_inputs = [&]() {
+    VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
+    body_exec->execute(_input_tensors, temp_outputs);
+    VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
+  };
+
+  const auto body_execute_with_body_outputs = [&]() {
+    VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
+    body_exec->execute(_output_tensors, temp_outputs);
+    VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
+  };
+
+  std::function<void()> body_execute = body_execute_with_op_inputs;
+  const auto cond_execute = [&]() {
+    VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
+    cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
+    VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
+  };
+
+  // Loop while Cond subgraph's output is true
+  while (getResultCond(cond_output_tensor.get()))
+  {
+    body_execute();
+    copy_body_outputs_to_op_outputs.run();
+    cond_execute();
+    body_execute = body_execute_with_body_outputs;
+  }
+
+  // Clean-up the temp tensors
+  _dyn_memory_manager->deallocate(cond_output_tensor.get());
+  for (auto &&tensor : temp_outputs)
+  {
+    _dyn_memory_manager->deallocate(tensor);
+  }
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
new file mode 100644
index 000000000..40ca4fe23
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include <exec/IExecutors.h>
+#include <exec/IFunction.h>
+#include <ir/OperandIndexSequence.h>
+#include <ir/Graph.h>
+#include "../ExternalContext.h"
+
+#include "backend/basic/MemoryManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class WhileLayer : public ::onert::exec::IFunction
+{
+public:
+  WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+             const std::vector<backend::IPortableTensor *> output_tensors,
+             const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
+             exec::IExecutors *executors, const ir::ModelIndex &model_index,
+             basic::DynamicMemoryManager *dyn_memory_manager,
+             const std::shared_ptr<ExternalContext> &external_context);
+
+public:
+  void run() override;
+
+private:
+  const ir::SubgraphIndex _cond_subg_index;
+  const ir::SubgraphIndex _body_subg_index;
+  const std::vector<backend::IPortableTensor *> _input_tensors;
+  const std::vector<backend::IPortableTensor *> _output_tensors;
+  exec::IExecutors *_executors;
+  const ir::ModelIndex _model_index;
+  basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/BackendContext.cc b/runtime/onert/core/src/backend/builtin/train/BackendContext.cc
new file mode 100644
index 000000000..fa9131f4d
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/BackendContext.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "backend/basic/train/TrainableBackendContextHelpers.h"
+#include "exec/FunctionSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+backend::ITensorRegistry *BackendContext::genTensors()
+{
+  // For now, there is no need to generate tensors for forwarding.
+  // builtin train backend handles 3 operators: `Permute`, `IF`, `WHILE`.
+  // `Permute`: Tensor generation is not required.
+  // `IF`, `WHILE`: Not supported yet
+  return tensor_registry().get();
+}
+
+backend::train::ITensorRegistry *BackendContext::genTrainingTensors()
+{
+  // For now, there is no need to generate tensors for backwarding.
+  return tensor_registry().get();
+}
+
+backend::train::FunctionMap BackendContext::genKernels()
+{
+  backend::train::FunctionMap ret;
+
+  for (auto &&op_ind : _tdata->op_order)
+  {
+    auto tn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(tn_seq));
+  }
+
+  trainable_graph()->operands().iterate(
+    [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      if (!external_operands().contains(ind) && operand.isConstant())
+      {
+        throw std::runtime_error(
+          "BackendContext: builtin backend does not support updatable weights yet");
+      }
+    });
+
+  // TODO Enable prepare()
+  // for (auto &&it : ret)
+  // {
+  //   auto &fn_seq = it.second;
+  //   fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  // }
+
+  return ret;
+}
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/train/BackendContext.h b/runtime/onert/core/src/backend/builtin/train/BackendContext.h
new file mode 100644
index 000000000..6f8ce4cae
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/BackendContext.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__
+
+#include <backend/train/TrainableBackendContext.h>
+
+#include "KernelGenerator.h"
+#include "../ExternalContext.h"
+#include "../TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+class BackendContext : public backend::train::TrainableBackendContext
+{
+public:
+  BackendContext(const backend::train::ITrainableBackend *backend,
+                 std::unique_ptr<backend::train::TrainableContextData> &&data,
+                 std::shared_ptr<backend::train::ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : backend::train::TrainableBackendContext(backend, std::move(data), tensor_registry),
+      kernel_gen{kernel_gen},
+      _external_context(new ExternalContext), _tensor_builder{tensor_builder}
+  {
+  }
+
+  backend::ITensorRegistry *genTensors() override;
+  backend::train::ITensorRegistry *genTrainingTensors() override;
+
+public:
+  backend::train::FunctionMap genKernels() override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+
+private:
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc
new file mode 100644
index 000000000..6f2c0a3b9
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "kernel/PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+KernelGenerator::KernelGenerator(const ir::train::TrainableGraph &tgraph,
+                                 const std::shared_ptr<TensorRegistry> &tensor_reg,
+                                 const std::shared_ptr<ExternalContext> &external_context)
+  : KernelGeneratorBase{tgraph}, _tensor_reg{tensor_reg}, _external_context(external_context)
+{
+}
+
+std::unique_ptr<exec::train::TrainableFnSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  auto ret = std::make_unique<exec::train::TrainableFnSequence>();
+  const auto &op = _tgraph.operation(ind);
+  op.accept(*this);
+  // _return_fn must have been generated
+  if (_return_fn == nullptr)
+  {
+    throw std::runtime_error(op.name() + " op does not supported trainable kernel yet");
+  }
+
+  ret->_functions.emplace_back(std::move(_return_fn));
+
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::train::operation::Permute &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  // Add PermuteLayer
+  std::vector<ITensor *> output_tensors{getTensor(output_index)};
+  std::vector<ITensor *> input_tensors{getTensor(input_index)};
+
+  std::vector<ITensor *> output_deriv_tensors;
+  std::vector<ITensor *> input_deriv_tensors;
+
+  auto input_deriv_tensor = getDerivativeTensor(input_index);
+  auto output_deriv_tensor = getDerivativeTensor(output_index);
+  output_deriv_tensors.emplace_back(output_deriv_tensor);
+  input_deriv_tensors.emplace_back(input_deriv_tensor);
+
+  // NOTE IOTensors of graph outputs for passing data to users must be ignored in training
+  //      because the buffers of those IOTensors are unnecessary and nullptr
+  bool ignore_forward_in_training = _whole_graph_outputs.contains(output_index);
+  auto fn = std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors,
+                                                   input_deriv_tensors, output_deriv_tensors,
+                                                   ignore_forward_in_training, _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
+{
+  // Get Tensor from all tensor registries (for Permute op)
+  auto ret = _tensor_registries.getITensor(index);
+  assert(ret != nullptr);
+  return ret;
+}
+
+backend::ITensor *KernelGenerator::getDerivativeTensor(const ir::OperandIndex &index)
+{
+  // Get derivative Tensor from all tensor registries (for Permute op)
+  auto ret = _tensor_registries.getDerivativeITensor(index);
+  return ret;
+}
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h
new file mode 100644
index 000000000..d8781c0d0
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__
+
+#include "../ExternalContext.h"
+#include "../train/TensorRegistry.h"
+#include "../../../compiler/train/TensorRegistries.h"
+
+#include <backend/train/KernelGeneratorBase.h>
+#include <exec/train/TrainableFnSequence.h>
+#include <ir/train/TrainableGraph.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+class KernelGenerator : public backend::train::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::train::TrainableGraph &tgraph,
+                  const std::shared_ptr<TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  std::unique_ptr<exec::train::TrainableFnSequence> generate(ir::OperationIndex ind) override;
+
+  void setTensorRegistries(const compiler::train::TensorRegistries &tensor_registries)
+  {
+    _tensor_registries = tensor_registries;
+  }
+
+  void setWholeGraphOutputs(const ir::OperandIndexSequence &outputs)
+  {
+    _whole_graph_outputs = outputs;
+  }
+
+private:
+  void visit(const ir::train::operation::Permute &) override;
+
+private:
+  backend::ITensor *getTensor(const ir::OperandIndex &index);
+  backend::ITensor *getDerivativeTensor(const ir::OperandIndex &index);
+
+private:
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  compiler::train::TensorRegistries _tensor_registries;
+  const std::shared_ptr<ExternalContext> _external_context;
+  ir::OperandIndexSequence _whole_graph_outputs;
+};
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/Tensor.h b/runtime/onert/core/src/backend/builtin/train/Tensor.h
new file mode 100644
index 000000000..611407bd2
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/Tensor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__
+
+#include <backend/basic/train/TrainableTensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+using TrainableTensor = basic::train::TrainableTensor;
+using DerivativeTensor = basic::Tensor;
+using GradientTensor = basic::Tensor;
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h b/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h
new file mode 100644
index 000000000..c48e5fe93
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__
+
+#include <backend/train/ITensorRegistry.h>
+
+#include "../IOTensor.h"
+#include "../Tensor.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+using BaseTensorRegistry =
+  backend::train::PortableTensorRegistryTemplate<Tensor, TrainableTensor, DerivativeTensor,
+                                                 GradientTensor>;
+
+class TensorRegistry : public backend::train::ITensorRegistry
+{
+public:
+  TensorRegistry() : _base_reg{new BaseTensorRegistry} {}
+
+  ITensor *getITensor(const ir::OperandIndex &index) override
+  {
+    auto base_tensor = _base_reg->getITensor(index);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(index);
+  }
+
+  ITensor *getNativeITensor(const ir::OperandIndex &index) override
+  {
+    auto base_tensor = _base_reg->getNativeITensor(index);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(index);
+  }
+
+  IPortableTensor *getPortableTensor(const ir::OperandIndex &index)
+  {
+    auto base_tensor = _base_reg->getPortableTensor(index);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeIOTensor(index);
+  }
+
+  IOTensor *getNativeIOTensor(const ir::OperandIndex &index)
+  {
+    auto tensor = _native_io_tensors.find(index);
+    if (tensor != _native_io_tensors.end())
+      return tensor->second.get();
+    return nullptr;
+  }
+
+  ITensor *getDerivativeITensor(const ir::OperandIndex &index) override
+  {
+    return _base_reg->getDerivativeTensor(index);
+  }
+
+  ITensor *getGradientITensor(const ir::OperandIndex &index) override
+  {
+    return _base_reg->getGradientTensor(index);
+  }
+
+  DerivativeTensor *getDerivativeTensor(const ir::OperandIndex &index)
+  {
+    return _base_reg->getDerivativeTensor(index);
+  }
+
+  bool setMigrantTensor(const ir::OperandIndex &index, IPortableTensor *tensor) override
+  {
+    assert(tensor);
+    assert(!getITensor(index)); // For the index, tensor is not registered yet
+    _base_reg->setMigrantTensor(index, tensor);
+    return true;
+  }
+
+  void setDerivativeTensor(const ir::OperandIndex &index, std::unique_ptr<DerivativeTensor> tensor)
+  {
+    _base_reg->setDerivativeTensor(index, std::move(tensor));
+  }
+
+  void setGradientTensor(const ir::OperandIndex &index, std::unique_ptr<GradientTensor> tensor)
+  {
+    _base_reg->setGradientTensor(index, std::move(tensor));
+  }
+
+  void setNativeIOTensor(ir::OperandIndex index, std::unique_ptr<IOTensor> &&tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(index)); // For the index, tensor is not registered yet
+    _native_io_tensors[index] = std::move(tensor);
+  }
+
+  const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
+  {
+    return _native_io_tensors;
+  }
+  std::shared_ptr<BaseTensorRegistry> base_reg() { return _base_reg; }
+
+private:
+  std::shared_ptr<BaseTensorRegistry> _base_reg;
+  ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
+};
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc
new file mode 100644
index 000000000..929092dde
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc
@@ -0,0 +1,85 @@
+
+
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+namespace kernel
+{
+
+PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
+                           const std::vector<ITensor *> &dst_tensors,
+                           const std::vector<ITensor *> &input_deriv_tensors,
+                           const std::vector<ITensor *> &output_deriv_tensors,
+                           bool ignore_forward_in_training,
+                           const std::shared_ptr<ExternalContext> &external_context)
+  : builtin::kernel::PermuteLayer{src_tensors, dst_tensors, external_context},
+    _input_deriv_tensors{input_deriv_tensors}, _output_deriv_tensors{output_deriv_tensors},
+    _ignore_forward_in_training{ignore_forward_in_training}
+{
+  assert(input_deriv_tensors.size() == output_deriv_tensors.size());
+  assert(src_tensors.size() == dst_tensors.size());
+}
+
+void PermuteLayer::optimize()
+{
+  builtin::kernel::PermuteLayer::optimize();
+
+  // TODO Calculate offsets of derivative tensors if necessary
+}
+
+void PermuteLayer::forward(bool training)
+{
+  if (training && _ignore_forward_in_training)
+    return;
+
+  builtin::kernel::PermuteLayer::run();
+}
+
+void PermuteLayer::backward()
+{
+  for (uint32_t i = 0; i < _output_deriv_tensors.size(); ++i)
+  {
+    auto src_deriv = _output_deriv_tensors.at(i);
+    auto dst_deriv = _input_deriv_tensors.at(i);
+
+    // NOTE The derivative tensors corresponding to inputs/outputs of model are nullptr
+    //      because permuting those tensors is meaningless
+    if (src_deriv && dst_deriv)
+    {
+      const auto rank = src_deriv->getShape().rank();
+      auto output_offsets = _dst_tensors_offsets.at(i);
+      auto input_offsets = _src_tensors_offsets.at(i);
+
+      exec::IPermuteFunction::permute(src_deriv, dst_deriv, rank, output_offsets, input_offsets);
+    }
+  }
+}
+
+} // namespace kernel
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h
new file mode 100644
index 000000000..de8063a21
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__
+#define __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__
+
+#include "../../kernel/PermuteLayer.h"
+
+#include "exec/train/ITrainableFunction.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+namespace kernel
+{
+
+class PermuteLayer : public builtin::kernel::PermuteLayer, public exec::train::ITrainableFunction
+{
+public:
+  PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
+               const std::vector<ITensor *> &input_deriv_tensors,
+               const std::vector<ITensor *> &output_deriv_tensors, bool ignore_forward_in_training,
+               const std::shared_ptr<ExternalContext> &external_context);
+
+  void optimize() override;
+
+  void forward(bool training) override;
+  void backward() override;
+
+private:
+  std::vector<ITensor *> _input_deriv_tensors;
+  std::vector<ITensor *> _output_deriv_tensors;
+  bool _ignore_forward_in_training;
+};
+
+} // namespace kernel
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h
deleted file mode 100644
index 670f7750f..000000000
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
-#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
-
-#include "Config.h"
-#include "ConstantInitializer.h"
-#include "KernelGenerator.h"
-#include "TensorBuilder.h"
-#include "Tensor.h"
-
-#include <backend/Backend.h>
-
-#include <memory>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class Backend : public ::onert::backend::Backend
-{
-public:
-  Backend() : _config{std::make_shared<Config>()} {}
-
-  std::shared_ptr<IConfig> config() const override { return _config; }
-
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &,
-                                             bool) const override
-  {
-    const auto &operands = graph.operands();
-    auto context = std::make_unique<BackendContext>(this, &graph);
-    // ControlFlow backend may not build tensors for itself because the backend's operation uses
-    // tensors of other baceknd instead
-    // But the backend builds tensors in case of that the controlflow operation may have constant
-    // input or that consecutive controflow operations exist. We have to make them not to be built
-    // later
-    // 1. Constant input
-    //   These tensors cannot be dynamic tensor, so let's do it as follows:
-    //   - always skip copying
-    //   - if it is operation's input in child subgraph: register "use" as constant input of the
-    //   operations in child subgraph
-    //   - if it is child subgraph's output: register "use" as constant input of the operations
-    //   using it
-    // 2. Consecutive controflow operation's intermediate tensor
-    //   These tensors can be dynamic tensor and this is complicated to support without copying. But
-    //   there is no such case until now, let's support it later
-    // TODO Remove TensorBuilder and ConstantInitializer
-    // TODO Support Consecutive controflow operation's intermediate tensor
-    auto tr = std::make_shared<TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
-    context->tensor_registry = tr;
-    context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
-    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr);
-    context->tensor_register = nullptr;
-    context->optimizer = nullptr;
-    return context;
-  }
-
-private:
-  std::shared_ptr<IConfig> _config;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
diff --git a/runtime/onert/core/src/backend/controlflow/Config.cc b/runtime/onert/core/src/backend/controlflow/Config.cc
deleted file mode 100644
index 5ec01fe11..000000000
--- a/runtime/onert/core/src/backend/controlflow/Config.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Config.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-std::string Config::ID = "controlflow";
-
-bool Config::initialize() { return true; }
-
-ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout)
-{
-  return frontend_layout;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/Config.h b/runtime/onert/core/src/backend/controlflow/Config.h
deleted file mode 100644
index 6645ed59d..000000000
--- a/runtime/onert/core/src/backend/controlflow/Config.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
-#define __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
-
-#include <backend/IConfig.h>
-#include <memory>
-#include <util/ITimer.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class Config : public IConfig
-{
-public:
-  static std::string ID;
-  std::string id() override { return ID; }
-  bool initialize() override;
-  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
-  bool supportPermutation() override { return false; }
-  bool supportDynamicTensor() override
-  {
-    // TODO Make this backend to support dynamic tensor or not to build non-constant tensor
-    return true;
-  }
-  bool supportFP16() override { return false; }
-
-  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
deleted file mode 100644
index e21a8f357..000000000
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-
-#include "TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class ConstantInitializer : public IConstantInitializer
-{
-public:
-  ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<ITensorRegistry> &tensor_reg)
-      : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
-  {
-  }
-
-private:
-  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
-  std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
deleted file mode 100644
index 1288e4c96..000000000
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DynamicTensorManager.h"
-
-#include "util/logging.h"
-#include "util/Exceptions.h"
-#include "ir/DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors)
-    : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors}
-{
-  // DO NOTHING
-}
-
-void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
-{
-  // NOTE Handle user tensors first
-  auto user_tensor = _tensors->getNativeUserTensor(ind);
-  if (user_tensor)
-  {
-    // User tensors cannot be reallocated.
-    auto buffer_size = user_tensor->total_size();
-    auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type());
-    if (buffer_size < new_size)
-      throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"};
-    user_tensor->setShape(new_shape);
-    return;
-  }
-
-  // NOTE Then handle own tensors
-  auto tensor = _tensors->getNativeOwnTensor(ind);
-  assert(tensor);
-
-  bool previously_dynamic = tensor->is_dynamic();
-
-  auto allocTensorMem = [&](bool overwrite = false) {
-    auto capacity = tensor->total_size();
-    auto alloc = _dynamic_mem_mgr->allocate(ind, capacity);
-
-    if (overwrite)
-      tensor->overwriteBuffer(alloc);
-    else
-      tensor->setBuffer(alloc);
-  };
-
-  if (!previously_dynamic)
-  {
-    // TODO deallocate tensor->buffer()
-    // issue is that staticTensorManager might have allocate this memory
-    tensor->setShape(new_shape);
-    tensor->set_dynamic();
-    allocTensorMem(true);
-  }
-  else if (tensor->buffer() == nullptr)
-  {
-    tensor->setShape(new_shape);
-    tensor->set_dynamic();
-    allocTensorMem();
-  }
-  // when buffer was already allocated and new_shape requires different size
-  else
-  {
-    auto previous_size = tensor->total_size();
-    auto new_size = new_shape.num_elements() * sizeOfDataType(tensor->data_type());
-    if (previous_size != new_size)
-    {
-      _dynamic_mem_mgr->deallocate(ind);
-
-      tensor->setShape(new_shape);
-      tensor->set_dynamic();
-      allocTensorMem(true);
-    }
-    else
-    { // when buffer with same size was already allocated, shape could differ
-      tensor->setShape(new_shape);
-    }
-  }
-}
-
-void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
-                                       const ir::OperandInfo &tensor_info,
-                                       ir::Layout backend_layout)
-{
-  auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
-  _tensors->setNativeOwnTensor(ind, tensor);
-}
-
-void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
-{
-  _dealloc_tensor_map[op_ind].emplace(operand_ind);
-}
-
-void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
-{
-  auto find = _dealloc_tensor_map.find(op_ind);
-  if (find == _dealloc_tensor_map.end())
-    return;
-
-  auto &input_set = find->second;
-  for (auto input_ind : input_set)
-  {
-    if (!_tensors->getNativeTensor(input_ind)->is_dynamic())
-      continue;
-
-    _dynamic_mem_mgr->deallocate(input_ind);
-    VERBOSE(DynamicTensorManager) << "Deallocating #" << input_ind.value()
-                                  << " (input of op_ind: " << op_ind.value() << ")" << std::endl;
-  }
-}
-
-void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
-{
-  if (!_tensors->getNativeTensor(output_ind)->is_dynamic())
-    return;
-
-  _dynamic_mem_mgr->deallocate(output_ind);
-  VERBOSE(DynamicTensorManager) << "Deallocating #" << output_ind.value()
-                                << " (output of a subgraph)" << std::endl;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
deleted file mode 100644
index dbe388ba2..000000000
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-
-#include "TensorRegistry.h"
-#include "Tensor.h"
-
-#include <backend/IDynamicTensorManager.h>
-#include <backend/cpu_common/MemoryManager.h>
-#include <ir/OperandInfo.h>
-#include <ir/Operation.h>
-#include <ir/Index.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Class to manage dynamic tensor and its memory
- */
-class DynamicTensorManager : public backend::IDynamicTensorManager
-{
-public:
-  DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors);
-
-  virtual ~DynamicTensorManager() = default;
-
-  void applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) override;
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
-                   ir::Layout backend_layout);
-
-  void planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) override;
-  void deallocInput(ir::OperationIndex op_ind) override;
-  void deallocSubgraphOutput(ir::OperandIndex ind) override;
-
-private:
-  /**
-   * @brief Memory manager for dynamic tensor.
-   * @todo  DynamicMemoryManager is not optimized. Optimized one is needed
-   */
-  std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
-  const std::shared_ptr<TensorRegistry> _tensors;
-
-  // contains list of dynamic tensor index, which can be deallocated after running operation
-  // note: this map could contain static tensor index too. Careful use is required.
-  std::unordered_map<ir::OperationIndex, std::unordered_set<ir::OperandIndex>> _dealloc_tensor_map;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
deleted file mode 100644
index de5a6a5f6..000000000
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <backend/BackendContext.h>
-#include <util/Utils.h>
-#include "kernel/IfLayer.h"
-#include "kernel/WhileLayer.h"
-#include "kernel/PermuteLayer.h"
-#include "exec/ExecutorBase.h"
-#include "exec/FunctionSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
-                                 const std::shared_ptr<TensorRegistry> &tensor_reg)
-    : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
-      _tensor_registries{}, _executor_map{nullptr}
-{
-  UNUSED_RELEASE(_graph);
-  UNUSED_RELEASE(_tensor_registries);
-  UNUSED_RELEASE(_executor_map);
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
-  assert(!_return_fn_seq);
-  assert(_dyn_tensor_manager);
-  assert(_tensor_reg);
-
-  auto dyn_shape_inferer =
-      std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
-
-  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
-  // Prepare to handle dynamic tensors later
-  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
-  {
-    dyn_ctx->op_seq = &op_seq;
-    dyn_ctx->operations = &_graph.operations();
-    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->tensor_registry = _tensor_reg;
-    dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
-
-    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
-  }
-  _return_fn_seq->enableDynamicShapeInferer(true);
-
-  for (const auto &op_idx : op_seq.operations())
-  {
-    const auto &node = _graph.operations().at(op_idx);
-    node.accept(*this);
-    _return_fn_seq->append(releaseFunction());
-  }
-}
-
-void KernelGenerator::visit(const ir::operation::If &node)
-{
-  const auto then_subg_index = node.param().then_subg_index;
-  const auto else_subg_index = node.param().else_subg_index;
-
-  std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
-  for (const auto input_index : node.getInputs())
-  {
-    auto input_tensor = getTensor(input_index);
-
-    input_tensors.emplace_back(input_tensor);
-  }
-
-  std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
-  exec::DynAllocInfoMap outputs_dyn_alloc_info;
-  for (const auto output_index : node.getOutputs())
-  {
-    auto output_tensor = getTensor(output_index);
-
-    output_tensors.emplace_back(output_tensor);
-    outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
-  }
-
-  // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
-  // creating executor recusively
-  const auto cond_tensor = input_tensors.front();
-  input_tensors.erase(input_tensors.begin());
-  auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>(
-      cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, outputs_dyn_alloc_info,
-      then_subg_index, else_subg_index, _executor_map);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Permute &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  // Add PermuteLayer
-  std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)};
-  std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)};
-  std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
-  outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index};
-
-  auto fn =
-      std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::While &node)
-{
-  const auto cond_subg_index = node.param().cond_subg_index;
-  const auto body_subg_index = node.param().body_subg_index;
-
-  // This op does not support input as a constant, because controlflow backend does not have
-  // TensorBuilder
-  std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
-  for (const auto input_index : node.getInputs())
-  {
-    auto input_tensor = getTensor(input_index);
-
-    input_tensors.emplace_back(input_tensor);
-  }
-
-  std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
-  std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
-  for (const auto output_index : node.getOutputs())
-  {
-    auto output_tensor = getTensor(output_index);
-
-    output_tensors.emplace_back(output_tensor);
-
-    outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
-  }
-
-  // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
-  // creating executor recusively
-  auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>(
-      input_tensors, output_tensors, node.getOutputs(), _graph, outputs_dyn_alloc_info,
-      cond_subg_index, body_subg_index, _executor_map);
-
-  _return_fn = std::move(fn);
-}
-
-std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index)
-{
-  std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index);
-  assert(ret != nullptr);
-  return ret;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
deleted file mode 100644
index b84a810e4..000000000
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-
-#include <backend/IKernelGenerator.h>
-#include <backend/ITensorBuilder.h>
-#include <exec/IExecutor.h>
-#include <ir/Graph.h>
-#include "TensorBuilder.h"
-#include "compiler/TensorRegistries.h"
-#include "TensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class KernelGenerator : public IKernelGenerator
-{
-public:
-  KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
-                  const std::shared_ptr<TensorRegistry> &tensor_reg);
-
-  void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
-  {
-    _tensor_registries = tensor_registries;
-  }
-  void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
-  {
-    // FIXME Using shared_ptr's raw pointer!
-    _executor_map = executor_map.get();
-  }
-
-  using IKernelGenerator::visit;
-
-  void visit(const ir::OpSequence &) override;
-  void visit(const ir::operation::If &) override;
-  void visit(const ir::operation::Permute &) override;
-  void visit(const ir::operation::While &) override;
-
-private:
-  std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index);
-
-private:
-  const ir::Graph &_graph;
-  IDynamicTensorManager *_dyn_tensor_manager;
-  std::shared_ptr<TensorRegistry> _tensor_reg;
-  compiler::TensorRegistries _tensor_registries;
-  exec::ExecutorMap *_executor_map;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/Tensor.h b/runtime/onert/core/src/backend/controlflow/Tensor.h
deleted file mode 100644
index ba5bafd75..000000000
--- a/runtime/onert/core/src/backend/controlflow/Tensor.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-
-#include <backend/cpu_common/Tensor.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-using Tensor = cpu_common::Tensor;
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
deleted file mode 100644
index e5c3f5fd5..000000000
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorBuilder.h"
-
-#include <util/logging.h>
-
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
-    : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
-      _static_tensor_mgr{
-          new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
-{
-  /* empty */
-}
-
-void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                       ir::Layout backend_layout)
-{
-  _tensor_info_map.emplace(ind, info);
-
-  _tensor_layout_map.insert({ind, backend_layout});
-
-  if (info.isDynamic())
-  {
-    _dynamic_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind]);
-  }
-  else
-  {
-    _static_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind], info.isConstant());
-  }
-}
-
-void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
-{
-  // TODO Enhance the way of checking user tensors
-  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
-    return;
-
-  const auto tensor_info = _tensor_info_map.at(ind);
-
-  if (!nativeOwnTensorAt(ind)->is_dynamic())
-  {
-    const auto size = tensor_info.total_size();
-    _static_tensor_mgr->claimPlan(ind, size);
-  }
-}
-
-void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
-{
-  // TODO Enhance the way of checking user tensors
-  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
-    return;
-
-  if (!nativeOwnTensorAt(ind)->is_dynamic())
-  {
-    _static_tensor_mgr->releasePlan(ind);
-  }
-}
-
-bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
-{
-  // User tensors are not registered in _tensor_info_map but objects for them are exist
-  // in the tensor registry.
-  // TODO Enhance the way of checking user tensors
-  if (_tensor_reg->getITensor(ind))
-    return true;
-  return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-void TensorBuilder::prepare(void)
-{
-  _static_tensor_mgr->allocateConsts();
-  _static_tensor_mgr->allocateNonconsts();
-}
-
-void TensorBuilder::allocate()
-{
-  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
-  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
-}
-
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getNativeOwnTensor(ind);
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
-{
-  return std::move(_static_tensor_mgr);
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
-{
-  return std::move(_dynamic_tensor_mgr);
-}
-
-void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind,
-                                        const std::shared_ptr<UserTensor> &tensor)
-{
-  _tensor_reg->setNativeUserTensor(ind, tensor);
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
deleted file mode 100644
index 2f2a2c47e..000000000
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
-
-#include <backend/cpu_common/StaticTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/cpu_common/Tensor.h>
-
-#include <backend/ITensorBuilder.h>
-#include <ir/OperandIndexMap.h>
-
-#include <unordered_map>
-
-#include "DynamicTensorManager.h"
-#include "UserTensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class TensorBuilder : public ITensorBuilder
-{
-public:
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
-
-  /**
-   * @brief     Register tensor information to allocate on CPU backend
-   * @param[in] ind    Operand index
-   * @param[in] info   Operand information
-   * @param[in] layout Operand data layout
-   */
-  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout) override;
-
-  void notifyFirstUse(const ir::OperandIndex &) override;
-  void notifyLastUse(const ir::OperandIndex &) override;
-
-  bool isRegistered(const ir::OperandIndex &) const override;
-
-  void prepare(void) override;
-  void allocate() override;
-  void postFunctionPrepare() override { /* DO NOTHING */}
-
-  std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
-
-  IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
-
-  std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
-
-  /**
-   * @brief Get tensor with a specific OperandIndex.
-   * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
-   *        If not, program will crash with assert or exception.
-   * @return shared_ptr<operand::Tensor>
-   */
-  std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind);
-  void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
-
-private:
-  const std::shared_ptr<TensorRegistry> _tensor_reg;
-  std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
-  std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
-  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-  ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
deleted file mode 100644
index 678c5b73b..000000000
--- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
-
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorRegistry.h"
-#include "Tensor.h"
-#include "UserTensor.h"
-#include <assert.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor registry class for controlflow backend
- *
- * This class contains three types of tensors. Two native tensors(tensors that are managed by this
- * backend) and the other is migrant tensor.
- *
- * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
- * - NativeOwnTensor  - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
- * - MigrantTensor    - @c IPortableTensor managed by other backends ( in @c _base_reg )
- *
- * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
- *
- */
-class TensorRegistry : public ITensorRegistry
-{
-public:
-  TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
-
-  std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
-  {
-    auto base_tensor = _base_reg->getITensor(ind);
-    if (base_tensor)
-      return base_tensor;
-    return getNativeUserTensor(ind);
-  }
-
-  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
-  {
-    auto base_tensor = _base_reg->getNativeITensor(ind);
-    if (base_tensor)
-      return base_tensor;
-    return getNativeUserTensor(ind);
-  }
-
-  std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
-  {
-    auto base_tensor = _base_reg->getPortableTensor(ind);
-    if (base_tensor)
-      return base_tensor;
-    return getNativeUserTensor(ind);
-  }
-
-  std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind)
-  {
-    auto base_tensor = _base_reg->getNativeTensor(ind);
-    if (base_tensor)
-      return base_tensor;
-    return getNativeUserTensor(ind);
-  }
-
-  std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind)
-  {
-    return _base_reg->getNativeTensor(ind);
-  }
-
-  std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind)
-  {
-    auto tensor = _native_user_tensors.find(ind);
-    if (tensor != _native_user_tensors.end())
-      return tensor->second;
-    return nullptr;
-  }
-
-  bool setMigrantTensor(const ir::OperandIndex &ind,
-                        const std::shared_ptr<IPortableTensor> &tensor) override
-  {
-    assert(tensor);
-    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
-    _base_reg->setMigrantTensor(ind, tensor);
-    return true;
-  }
-
-  void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor)
-  {
-    assert(tensor);
-    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
-    _base_reg->setNativeTensor(ind, tensor);
-  }
-
-  void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor)
-  {
-    assert(tensor);
-    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
-    _native_user_tensors[ind] = tensor;
-  }
-
-  const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors()
-  {
-    return _native_user_tensors;
-  }
-  std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
-
-private:
-  std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
-  ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.cc b/runtime/onert/core/src/backend/controlflow/UserTensor.cc
deleted file mode 100644
index c8e2ebade..000000000
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "UserTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
-{
-  size_t rank = num_dimensions();
-  size_t offset = 0;
-  for (size_t i = 0; i < rank; ++i)
-  {
-    offset = offset * dimension(i) + coords[i];
-  }
-  offset *= sizeOfDataType(data_type());
-  return offset;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.h b/runtime/onert/core/src/backend/controlflow/UserTensor.h
deleted file mode 100644
index 9be33595d..000000000
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
-
-#include "ir/OperandInfo.h"
-#include "backend/IPortableTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor object that is for Input and Output tensors from the user.
- *
- * This class is a wrapped buffer that is allocated by the user. So it does not have resposibility
- * on allocation nor deallocation. All the model input/output tensors are wrapped with this class
- * for execution.
- *
- */
-class UserTensor : public IPortableTensor
-{
-public:
-  UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size,
-             IDynamicTensorManager *dynamic_tensor_manager)
-      : _info{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false},
-        _dynamic_tensor_manager{dynamic_tensor_manager}
-  {
-  }
-
-  UserTensor(const ir::OperandInfo &info, ir::Layout layout,
-             IDynamicTensorManager *dynamic_tensor_manager)
-      : UserTensor{info, layout, nullptr, 0, dynamic_tensor_manager}
-  {
-  }
-
-public:
-  void setBuffer(uint8_t *buffer, size_t size)
-  {
-    _buffer = buffer;
-    _size = size;
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer; }
-  size_t total_size() const override { return _size; }
-  size_t dimension(size_t index) const override { return _info.shape().dim(index); }
-  size_t num_dimensions() const override { return _info.shape().rank(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override;
-  ir::Layout layout() const override { return _layout; }
-  ir::DataType data_type() const override { return _info.typeInfo().type(); }
-  float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_offset() const override { return _info.typeInfo().offset(); }
-  bool is_dynamic() const override { return _dynamic; }
-  void set_dynamic() override { _dynamic = true; }
-  ir::Shape getShape() const override { return _info.shape(); }
-  void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
-  bool is_constant() const override { return false; }
-  IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
-
-private:
-  ir::OperandInfo _info;
-  ir::Layout _layout;
-  uint8_t *_buffer;
-  size_t _size;
-  bool _dynamic;
-  IDynamicTensorManager *_dynamic_tensor_manager;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h b/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h
deleted file mode 100644
index fa2a2d54c..000000000
--- a/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
-#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
-
-#include "backend/ITensorRegistry.h"
-#include "UserTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-using UserTensorRegistry = PortableTensorRegistryTemplate<UserTensor>;
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
deleted file mode 100644
index 8377c7183..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "IfLayer.h"
-
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
-#include "PermuteLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-IfLayer::IfLayer(const std::shared_ptr<backend::ITensor> &cond_tensor,
-                 const std::vector<std::shared_ptr<backend::ITensor>> input_tensors,
-                 const std::vector<std::shared_ptr<backend::ITensor>> output_tensors,
-                 const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
-                 const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
-                 const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
-                 exec::ExecutorMap *executor_map)
-    : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
-      _output_indices{output_indices}, _graph{graph},
-      _outputs_dyn_alloc_info{outputs_dyn_alloc_info}, _then_subg_index{then_subg_index},
-      _else_subg_index{else_subg_index}, _executor_map{executor_map}
-{
-  // At this point, executor_map may not have executors of then subg and else subg
-}
-
-void IfLayer::run()
-{
-  // Check condition
-  // // If true
-  // // // Copy _input_tensors -> then subg's inputs
-  // // // Run then subg
-  // // // Copy outputs of then subg -> _output_tensors
-  // // Else
-  // // // Copy _input_tensors -> else subg's inputs if false
-  // // // Run else subg
-  // // // Copy outputs of else subg -> _output_tensors
-  auto getResultCond = [](backend::ITensor *tensor) -> bool {
-    bool ret = false;
-    tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
-    return ret;
-  };
-
-  exec::ExecutorBase *subg_exec = nullptr;
-  if (getResultCond(_cond_tensor.get()))
-  {
-    subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
-        _executor_map->at(_then_subg_index).get());
-  }
-  else
-  {
-    subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
-        _executor_map->at(_else_subg_index).get());
-  }
-
-  const auto &subg_graph = subg_exec->graph();
-
-  std::vector<std::shared_ptr<backend::ITensor>> src_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> dst_tensors;
-  // Add tensors used in subgraph or contained in outputs of subgraph
-  assert(subg_graph.getInputs().size() == _input_tensors.size());
-  assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size());
-  for (uint32_t i = 0; i < subg_graph.getInputs().size(); ++i)
-  {
-    const auto &subg_input_index = subg_graph.getInputs().at(i);
-    const auto &subg_input = subg_graph.operands().at(subg_input_index);
-    if (subg_input.getUses().size() > 0 || subg_graph.getOutputs().contains(subg_input_index))
-    {
-      src_tensors.emplace_back(_input_tensors.at(i));
-      dst_tensors.emplace_back(subg_exec->getInputTensors().at(i));
-    }
-  }
-  const auto &subg_inputs_dyn_alloc_info = subg_exec->getInputsDynamicAllocInfo();
-  const auto permute_op_input_to_subg_input =
-      std::make_shared<PermuteLayer>(src_tensors, dst_tensors, subg_inputs_dyn_alloc_info);
-
-  // Add tensors used as output of operation or contained in outputs of operation
-  src_tensors.clear();
-  dst_tensors.clear();
-  assert(_output_indices.size() == subg_exec->getOutputTensors().size());
-  assert(_output_indices.size() == _output_tensors.size());
-  for (uint32_t i = 0; i < _output_indices.size(); ++i)
-  {
-    const auto &output_index = _output_indices.at(i);
-    const auto &output = _graph.operands().at(output_index);
-    if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
-    {
-      src_tensors.emplace_back(subg_exec->getOutputTensors().at(i));
-      dst_tensors.emplace_back(_output_tensors.at(i));
-    }
-  }
-  const auto permute_subg_output_to_op_output =
-      std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _outputs_dyn_alloc_info);
-
-  // Remove copying of unused tensor
-  permute_op_input_to_subg_input->prepare();
-  permute_subg_output_to_op_output->prepare();
-
-  // Copy & run
-  subg_exec->execute(_input_tensors, permute_op_input_to_subg_input);
-  permute_subg_output_to_op_output->run();
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
deleted file mode 100644
index ef3a6e6f6..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-
-#include <backend/ITensor.h>
-#include <exec/IExecutor.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class IfLayer : public ::onert::exec::IFunction
-{
-public:
-  IfLayer(const std::shared_ptr<backend::ITensor> &cond_tensor,
-          const std::vector<std::shared_ptr<backend::ITensor>> input_tensors,
-          const std::vector<std::shared_ptr<backend::ITensor>> output_tensors,
-          const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
-          const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
-          const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
-          exec::ExecutorMap *executor_map);
-
-public:
-  void run() override;
-
-private:
-  const std::shared_ptr<backend::ITensor> _cond_tensor;
-  const std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
-  const std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
-  const ir::OperandIndexSequence &_output_indices;
-  const ir::Graph &_graph;
-  const exec::DynAllocInfoMap _outputs_dyn_alloc_info;
-  const ir::SubgraphIndex _then_subg_index;
-  const ir::SubgraphIndex _else_subg_index;
-  exec::ExecutorMap *_executor_map;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
deleted file mode 100644
index e8f1ea679..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PermuteLayer.h"
-
-#include "exec/ShapeConverter.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-void PermuteLayer::run()
-{
-  assert(_src_tensors.size() == _dst_tensors.size());
-  // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
-  // reasons:
-  // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
-  // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
-  // subgraphs(with other backends)
-  // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
-  // reasons
-
-  // check if output is not dynamic
-  for (size_t i = 0; i < _src_tensors.size(); ++i)
-  {
-    auto dst_tensor = _dst_tensors.at(i);
-    auto src_tensor = _src_tensors.at(i);
-    if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
-    {
-      // getting output shape
-      auto src_shape = src_tensor->getShape();
-
-      // set output shape and output buffer
-      ir::Shape new_shape =
-          exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
-
-      try
-      {
-        const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind;
-        auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager();
-        if (!dyn_tensor_manager)
-          throw std::runtime_error{
-              "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
-        dyn_tensor_manager->applyShape(dst_index, new_shape);
-        assert(dst_tensor->buffer() != nullptr);
-      }
-      catch (const std::out_of_range &e)
-      {
-        std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
-                     "dynamic tensor"
-                  << '\n';
-        throw;
-      }
-    }
-    assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
-           dst_tensor->getShape());
-  }
-  IPermuteFunction::run();
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
deleted file mode 100644
index 403ac770d..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-
-#include "backend/ITensorBuilder.h"
-#include "exec/IPermuteFunction.h"
-#include "exec/IExecutor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class PermuteLayer : public onert::exec::IPermuteFunction
-{
-public:
-  PermuteLayer(const std::vector<std::shared_ptr<ITensor>> &src_tensors,
-               const std::vector<std::shared_ptr<ITensor>> &dst_tensors,
-               const exec::DynAllocInfoMap &dst_dyn_alloc_info_map)
-      : _dst_dyn_alloc_info_map{dst_dyn_alloc_info_map}
-  {
-    assert(src_tensors.size() == dst_tensors.size());
-    _src_tensors = src_tensors;
-    _dst_tensors = dst_tensors;
-  }
-
-  void optimize() override
-  {
-    // Remove copying of tensor as nullptr
-    auto src_it = _src_tensors.begin();
-    auto dst_it = _dst_tensors.begin();
-    while (src_it != _src_tensors.end())
-    {
-      if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
-      {
-        src_it = _src_tensors.erase(src_it);
-        dst_it = _dst_tensors.erase(dst_it);
-      }
-      else
-      {
-        ++src_it;
-        ++dst_it;
-      }
-    }
-  }
-
-  void run() override;
-
-private:
-  const exec::DynAllocInfoMap _dst_dyn_alloc_info_map;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
deleted file mode 100644
index 50936e5f6..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WhileLayer.h"
-
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
-#include "PermuteLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-WhileLayer::WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-                       const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                       const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
-                       const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
-                       const ir::SubgraphIndex &cond_subg_index,
-                       const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map)
-    : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
-      _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors},
-      _output_tensors{output_tensors}, _outputs_dyn_alloc_info{outputs_dyn_alloc_info},
-      _executor_map{executor_map}
-{
-  // At this point, executor_map may not have executors of cond subg and body subg
-}
-
-void WhileLayer::run()
-{
-  // Copy "_input_tensors" -> "cond subg inputs"
-  // Run cond subg
-  // Start loop while output of cond subg is ture
-  // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
-  // outputs" -> "body subg inputs" in the second or more iterations
-  // // Run body subg
-  // // Copy "body subg outputs" -> "cond subg inputs"
-  // // Run cond subg
-  // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
-  // "_dst_tensors"
-  auto cond_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
-      _executor_map->at(_cond_subg_index).get());
-  auto body_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
-      _executor_map->at(_body_subg_index).get());
-
-  const auto &cond_graph = cond_exec->graph();
-  const auto &cond_inputs_dyn_alloc = cond_exec->getInputsDynamicAllocInfo();
-  const auto &body_graph = body_exec->graph();
-  const auto &body_inputs_dyn_alloc = body_exec->getInputsDynamicAllocInfo();
-
-  std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> cond_input_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> body_input_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> body_output_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
-
-  // Add only used tensors in cond subgraph
-  assert(cond_graph.getInputs().size() == _input_tensors.size());
-  assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
-  for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
-  {
-    const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
-    if (cond_input.getUses().size() > 0)
-    {
-      input_tensors.emplace_back(_input_tensors.at(i));
-      cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
-    }
-  }
-  const auto permute_op_input_to_cond_input =
-      std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, cond_inputs_dyn_alloc);
-
-  // Add only used tensors among outputs of while operation
-  assert(_output_indices.size() == _input_tensors.size());
-  assert(_output_indices.size() == _output_tensors.size());
-  input_tensors.clear();
-  output_tensors.clear();
-  for (size_t i = 0; i < _output_indices.size(); ++i)
-  {
-    const auto &output_index = _output_indices.at(i);
-    const auto &output = _graph.operands().at(output_index);
-    if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
-    {
-      input_tensors.emplace_back(_input_tensors.at(i));
-      output_tensors.emplace_back(_output_tensors.at(i));
-    }
-  }
-  const auto permute_op_input_to_op_output =
-      std::make_shared<PermuteLayer>(input_tensors, output_tensors, _outputs_dyn_alloc_info);
-
-  // Add all tensors with unused tensors in body subgraph because unused input tensors will be
-  // copied output tensors in body subgraph
-  assert(_input_tensors.size() == body_exec->getInputTensors().size());
-  input_tensors = _input_tensors;
-  body_input_tensors = body_exec->getInputTensors();
-  const auto permute_op_input_to_body_input =
-      std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, body_inputs_dyn_alloc);
-
-  // Add only used tensors in cond subgraph
-  assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size());
-  assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
-  body_output_tensors.clear();
-  cond_input_tensors.clear();
-  for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
-  {
-    const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
-    if (cond_input.getUses().size() > 0)
-    {
-      body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
-      cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
-    }
-  }
-  const auto permute_body_output_to_cond_input = std::make_shared<PermuteLayer>(
-      body_output_tensors, cond_input_tensors, cond_inputs_dyn_alloc);
-
-  // Add only used tensors in body subgraph
-  assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size());
-  assert(body_graph.getInputs().size() == body_exec->getInputTensors().size());
-  body_output_tensors.clear();
-  body_input_tensors.clear();
-  for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i)
-  {
-    const auto &body_input_index = body_graph.getInputs().at(i);
-    const auto &body_input = body_graph.operands().at(body_input_index);
-    if (body_input.getUses().size() > 0 &&
-        !body_exec->graph().getOutputs().contains(body_input_index))
-    {
-      body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
-      body_input_tensors.emplace_back(body_exec->getInputTensors().at(i));
-    }
-  }
-  const auto permute_body_output_to_body_input = std::make_shared<PermuteLayer>(
-      body_output_tensors, body_input_tensors, body_inputs_dyn_alloc);
-
-  // Add only used tensors among outputs of while operation
-  assert(_output_indices.size() == body_exec->getOutputTensors().size());
-  assert(_output_indices.size() == _output_tensors.size());
-  body_output_tensors.clear();
-  output_tensors.clear();
-  for (size_t i = 0; i < _output_indices.size(); ++i)
-  {
-    const auto &output_index = _output_indices.at(i);
-    const auto &output = _graph.operands().at(output_index);
-    if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
-    {
-      body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
-      output_tensors.emplace_back(_output_tensors.at(i));
-    }
-  }
-  const auto permute_body_output_to_op_output =
-      std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _outputs_dyn_alloc_info);
-
-  // Remove copying of unused tensor
-  permute_op_input_to_cond_input->prepare();
-  permute_op_input_to_op_output->prepare();
-  permute_op_input_to_body_input->prepare();
-  permute_body_output_to_cond_input->prepare();
-  permute_body_output_to_body_input->prepare();
-  permute_body_output_to_op_output->prepare();
-
-  cond_exec->execute(_input_tensors, permute_op_input_to_cond_input);
-
-  assert(cond_exec->getOutputTensors().size() == 1);
-  auto &cond_output_tensor = cond_exec->getOutputTensors().at(0);
-  auto getResultCond = [](backend::ITensor *tensor) -> bool {
-    bool ret = false;
-    tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
-    return ret;
-  };
-
-  const auto body_execute_with_op_inputs = [&]() {
-    body_exec->execute(_input_tensors, permute_op_input_to_body_input);
-  };
-
-  const auto body_execute_with_body_outputs = [&]() {
-    body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input);
-  };
-
-  std::function<void()> body_execute = body_execute_with_op_inputs;
-  const auto cond_execute = [&]() {
-    cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input);
-  };
-  auto permute_to_outputs_fn = permute_op_input_to_op_output;
-
-  // Loop while Cond subgraph's output is true
-  while (getResultCond(cond_output_tensor.get()))
-  {
-    body_execute();
-    cond_execute();
-    body_execute = body_execute_with_body_outputs;
-    permute_to_outputs_fn = permute_body_output_to_op_output;
-  }
-  permute_to_outputs_fn->run();
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
deleted file mode 100644
index ebca8acdc..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-
-#include <backend/ITensor.h>
-#include <exec/IExecutor.h>
-#include <exec/IFunction.h>
-#include <ir/OperandIndexSequence.h>
-#include <ir/Graph.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class WhileLayer : public ::onert::exec::IFunction
-{
-public:
-  WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-             const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-             const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
-             const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
-             const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
-             exec::ExecutorMap *executor_map);
-
-public:
-  void run() override;
-
-private:
-  const ir::SubgraphIndex _cond_subg_index;
-  const ir::SubgraphIndex _body_subg_index;
-  const ir::OperandIndexSequence &_output_indices;
-  const ir::Graph &_graph;
-  const std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
-  const std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
-  const exec::DynAllocInfoMap _outputs_dyn_alloc_info;
-  exec::ExecutorMap *_executor_map;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
diff --git a/runtime/onert/core/src/backend/cpu_common/Allocator.cc b/runtime/onert/core/src/backend/cpu_common/Allocator.cc
deleted file mode 100644
index 0ba444ee6..000000000
--- a/runtime/onert/core/src/backend/cpu_common/Allocator.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/Allocator.h"
-
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-Allocator::Allocator(uint32_t capacity)
-{
-  _base = std::make_unique<uint8_t[]>(capacity);
-
-  VERBOSE(ALLOC) << "allocation capacity: " << capacity << std::endl;
-  VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
deleted file mode 100644
index f7ce3d011..000000000
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/DynamicTensorManager.h"
-
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg)
-    : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg}
-{
-  // DO NOTHING
-}
-
-void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
-{
-  VERBOSE_F() << ind << std::endl;
-
-  auto tensor = _tensors->getNativeTensor(ind);
-  assert(tensor);
-
-  bool previously_dynamic = tensor->is_dynamic();
-
-  auto allocTensorMem = [&](bool overwrite = false) {
-    auto capacity = tensor->total_size();
-    auto alloc = _dynamic_mem_mgr->allocate(ind, capacity);
-
-    if (overwrite)
-      tensor->overwriteBuffer(alloc);
-    else
-      tensor->setBuffer(alloc);
-  };
-
-  if (!previously_dynamic)
-  {
-    // TODO deallocate tensor->buffer()
-    // issue is that staticTensorManager might have allocate this memory
-    tensor->setShape(new_shape);
-    tensor->set_dynamic();
-    allocTensorMem(true);
-  }
-  else if (tensor->buffer() == nullptr)
-  {
-    tensor->setShape(new_shape);
-    tensor->set_dynamic();
-    allocTensorMem();
-  }
-  // when buffer was already allocated and new_shape requires different size
-  else
-  {
-    auto previous_size = tensor->total_size();
-    auto new_size = new_shape.num_elements() * sizeOfDataType(tensor->data_type());
-    if (previous_size != new_size)
-    {
-      _dynamic_mem_mgr->deallocate(ind);
-
-      tensor->setShape(new_shape);
-      tensor->set_dynamic();
-      allocTensorMem(true);
-    }
-    else
-    { // when buffer with same size was already allocated, shape could differ
-      tensor->setShape(new_shape);
-    }
-  }
-}
-
-void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
-                                       const ir::OperandInfo &tensor_info,
-                                       ir::Layout backend_layout)
-{
-  assert(_tensors->getNativeTensor(ind) == nullptr);
-  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, this);
-  _tensors->setNativeTensor(ind, tensor);
-}
-
-void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
-{
-  _dealloc_tensor_map[op_ind].emplace(operand_ind);
-}
-
-void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
-{
-  auto find = _dealloc_tensor_map.find(op_ind);
-  if (find == _dealloc_tensor_map.end())
-    return;
-
-  auto &input_set = find->second;
-  for (auto input_ind : input_set)
-  {
-    auto *tensor = _tensors->getNativeTensor(input_ind).get();
-    if (!tensor->is_dynamic())
-      continue;
-
-    _dynamic_mem_mgr->deallocate(input_ind);
-    tensor->resetBuffer();
-
-    VERBOSE(DynamicTensorManager) << "Deallocating #" << input_ind.value()
-                                  << " (input of op_ind: " << op_ind.value() << ")" << std::endl;
-  }
-}
-
-void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
-{
-  auto *tensor = _tensors->getNativeTensor(output_ind).get();
-  if (!tensor->is_dynamic())
-    return;
-
-  _dynamic_mem_mgr->deallocate(output_ind);
-  tensor->resetBuffer();
-
-  VERBOSE(DynamicTensorManager) << "Deallocating #" << output_ind.value()
-                                << " (output of a subgraph)" << std::endl;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc b/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc
deleted file mode 100644
index 8cb9c22ca..000000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <backend/cpu_common/MemoryManager.h>
-
-#include <cassert>
-
-#include "MemoryPlannerFactory.h"
-#include "util/ConfigSource.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
-{
-  // DO NOTHING
-}
-
-MemoryManager::MemoryManager(const std::string planner_id)
-    : _mem_planner{createMemoryPlanner(planner_id)}
-{
-  // DO NOTHING
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner()
-{
-  auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
-  return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
-{
-  return cpu_common::MemoryPlannerFactory::get().create(planner_id);
-}
-
-void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
-  _mem_planner->claim(ind, size);
-}
-
-void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->release(ind); }
-
-void MemoryManager::allocate(void)
-{
-  _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity());
-  assert(_mem_alloc->base());
-}
-
-uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
-{
-  assert(_mem_planner->memory_plans().find(ind) != _mem_planner->memory_plans().end());
-  const auto &mem_blk = _mem_planner->memory_plans().at(ind);
-  return _mem_alloc->base() + mem_blk.offset;
-}
-
-std::shared_ptr<cpu_common::Allocator> DynamicMemoryManager::allocate(const ir::OperandIndex &ind,
-                                                                      uint32_t capacity)
-{
-  auto find = _mem_alloc_map.find(ind);
-  if (find != _mem_alloc_map.end())
-    throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated.");
-
-  _mem_alloc_map[ind] = std::make_shared<cpu_common::Allocator>(capacity);
-  return _mem_alloc_map[ind];
-}
-
-void DynamicMemoryManager::deallocate(const ir::OperandIndex &ind)
-{
-  auto find = _mem_alloc_map.find(ind);
-  if (find == _mem_alloc_map.end())
-    throw std::runtime_error("Cannot find Allocator for the requested index");
-
-  find->second->release();    // explicitly erase memory
-  _mem_alloc_map.erase(find); // remove tensor and alloc
-}
-
-void DynamicMemoryManager::deallocate(void)
-{
-  for (auto &mem_alloc : _mem_alloc_map)
-  {
-    // Release memory buffer of mem_alloc
-    mem_alloc.second->release();
-  }
-
-  _mem_alloc_map.clear();
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc b/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc
deleted file mode 100644
index 75c2da7d2..000000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryPlanner.h"
-#include "util/logging.h"
-#include <cassert>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
-  assert(size != 0);
-
-  Block blk{_capacity, size};
-  _mem_plans[ind] = blk;
-  _capacity += size;
-
-  VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size
-                      << std::endl;
-}
-
-void BumpPlanner::release(const ir::OperandIndex &ind)
-{
-  VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): "
-                      << "NOTHING does" << std::endl;
-}
-
-// There are some assumptions for claiming memory(== making a reservation for memory).
-// 1. About _claim_table(std::map).
-//   - The table's data structure is std::map so that it always sorts
-//     value(OperandIndex) by key(base_offset).
-//   - This claim() inserts key/value into _claim_table and the release() removes the key/value from
-//     _claim_table.
-//   - _claim_table shows the memory status at a certain point in time. Therefore,
-//     - If _claim_table has an offset and a certain size at a certain point in time,
-//       it means the place at the offset has been already claimed(== can't claim now. need to find
-//       someplace new).
-//     - If _claim_table doesn't have any element for an offset and a certain size at a certain
-//       point in time, it means the place at the offset can be claimed.
-// 2. In the loop for _claim_table, we can assume the current claim_base_offset value is bigger than
-//    the previous claim_base_offset.
-void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
-  assert(size != 0);
-
-  // Find the right position for claiming
-  uint32_t next_offset = 0;
-  for (auto &mem_claim : _claim_table)
-  {
-    auto claimed_base_offset = mem_claim.first;
-    auto claimed_size = _mem_plans[mem_claim.second].size;
-    if (next_offset + size <= claimed_base_offset)
-    {
-      break;
-    }
-    else
-    {
-      next_offset = claimed_base_offset + claimed_size;
-    }
-  }
-
-  // Now next_offset is set to the proper offset
-  _claim_table[next_offset] = ind;
-  _mem_plans[ind] = {next_offset, size};
-
-  VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]"
-                      << std::endl;
-
-  if (_capacity < next_offset + size)
-  {
-    _capacity = next_offset + size;
-  }
-}
-
-void FirstFitPlanner::release(const ir::OperandIndex &ind)
-{
-  for (auto it = _claim_table.cbegin(); it != _claim_table.cend(); ++it)
-  {
-    if (it->second == ind)
-    {
-      uint32_t offset = it->first;
-      uint32_t index = ind.value();
-      uint32_t size = _mem_plans[ind].size;
-
-      _claim_table.erase(it);
-
-      VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]"
-                          << std::endl;
-      return;
-    }
-  }
-  assert(!"Cannot release for given index. It has been not claimed or released already.");
-}
-
-WICPlanner::WICPlanner()
-    : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
-      _operands()
-{
-  // DO NOTHING
-}
-
-void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
-{
-  assert(size != 0);
-
-  _operands.emplace(size, ind);
-  _interference_graph[ind].insert(_interference_graph[ind].end(), _live_operands.cbegin(),
-                                  _live_operands.cend());
-  for (const auto &live_operand : _live_operands)
-  {
-    _interference_graph[live_operand].emplace_back(ind);
-  }
-  _live_operands.emplace(ind);
-
-  VERBOSE(WIC_PLANNER) << "claim(#" << ind.value() << "): [" << size << "sz]" << std::endl;
-}
-
-void WICPlanner::release(const ir::OperandIndex &ind)
-{
-  _live_operands.erase(ind);
-  VERBOSE(WIC_PLANNER) << "release(#" << ind.value() << ")" << std::endl;
-}
-
-/*
- * Build memory plans using liveness and size of operands
- * 1. Build inference graph at claim
- *   - Two operands interfere if they have overlapped live range
- * 2. Sort operands in descending order of size
- *   - Use std::multimap to sort operands
- * 3. Allocate memory block for sorted operands
- *   - Find free memory block which does not overlap with interfered operands
- */
-void WICPlanner::buildMemoryPlans()
-{
-  for (const auto &operand : _operands)
-  {
-    uint32_t size = operand.first;
-    const ir::OperandIndex &ind = operand.second;
-    VERBOSE(WIC_PLANNER) << "build_plan(#" << ind.value() << "): [" << size << "sz]" << std::endl;
-
-    uint32_t next_offset = 0;
-    if (_interference_graph.count(ind))
-    {
-      // Find interfered memory plans and sort them by offset
-      std::multimap<uint32_t, uint32_t> interfered_plans;
-      for (const auto &interference : _interference_graph[ind])
-      {
-        if (_mem_plans.count(interference))
-          interfered_plans.emplace(_mem_plans[interference].offset, _mem_plans[interference].size);
-      }
-
-      // Find free memory block in first-fit manner
-      for (const auto &interfered_plan : interfered_plans)
-      {
-        auto claimed_base_offset = interfered_plan.first;
-        auto claimed_size = interfered_plan.second;
-        VERBOSE(WIC_PLANNER) << "interfere : [+" << claimed_base_offset << ", " << claimed_size
-                             << "sz]" << std::endl;
-        if (next_offset + size <= claimed_base_offset)
-        {
-          break;
-        }
-        else if (next_offset < claimed_base_offset + claimed_size)
-        {
-          next_offset = claimed_base_offset + claimed_size;
-        }
-      }
-    }
-    else
-    {
-      VERBOSE(WIC_PLANNER) << "No interference" << std::endl;
-    }
-
-    _mem_plans[ind] = {next_offset, size};
-    VERBOSE(WIC_PLANNER) << "alloc(#" << ind.value() << "): [+" << next_offset << ", " << size
-                         << "sz]" << std::endl;
-
-    if (_capacity < next_offset + size)
-    {
-      _capacity = next_offset + size;
-    }
-  }
-  _initialized = true;
-  _interference_graph.clear();
-  _operands.clear();
-}
-
-WICPlanner::MemoryPlans &WICPlanner::memory_plans()
-{
-  if (!_initialized)
-    buildMemoryPlans();
-  return _mem_plans;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h b/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h
deleted file mode 100644
index 7c387e542..000000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file        MemoryPlanner.h
- * @brief       This file contains Memory Planning related classes
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-
-#include <map>
-#include <vector>
-#include <unordered_set>
-#include <memory>
-
-#include "backend/cpu_common/Allocator.h"
-#include "backend/cpu_common/IMemoryPlanner.h"
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-/**
- * @brief Class to plan memory by bump way
- */
-class BumpPlanner : public IMemoryPlanner
-{
-public:
-  /**
-   * @brief Claim memory for operand by bump way
-   * @param[in] index The operand index
-   * @param[in] size The size of the memory
-   */
-  void claim(const ir::OperandIndex &, size_t) override;
-  /**
-   * @brief Release memory for operand by bump way
-   * @param[in] index The operand index
-   */
-  void release(const ir::OperandIndex &) override;
-  /**
-   * @brief Get capacity for memory planning
-   * @return The value of capacity
-   */
-  uint32_t capacity() override { return _capacity; }
-  /**
-   * @brief Get MemoryPlans
-   * @return MemoryPlans
-   */
-  MemoryPlans &memory_plans() override { return _mem_plans; }
-
-private:
-  uint32_t _capacity = 0;
-  MemoryPlans _mem_plans;
-};
-
-/**
- * @brief Class to plan memory by firstfit way
- */
-class FirstFitPlanner : public IMemoryPlanner
-{
-public:
-  /**
-   * @brief Claim memory for operand by firstfit way
-   * @param[in] index The operand index
-   * @param[in] size The size of the memory
-   */
-  void claim(const ir::OperandIndex &, size_t) override;
-  /**
-   * @brief Release memory for operand by firstfit way
-   * @param[in] index The operand index
-   */
-  void release(const ir::OperandIndex &) override;
-  /**
-   * @brief Get capacity for memory planning
-   * @return The value of capacity
-   */
-  uint32_t capacity() override { return _capacity; }
-  /**
-   * @brief Get MemoryPlans
-   * @return MemoryPlans
-   */
-  MemoryPlans &memory_plans() override { return _mem_plans; }
-
-private:
-  uint32_t _capacity = 0;
-  MemoryPlans _mem_plans;
-  // Use std::map because claim() assumes that _claim_table is sorted by uint32_t(base_offset)
-  std::map<uint32_t, ir::OperandIndex> _claim_table;
-};
-
-/**
- * @brief Class to plan memory by Weighted Interval Color algorithm
- */
-class WICPlanner : public IMemoryPlanner
-{
-public:
-  WICPlanner();
-
-  /**
-   * @brief Claim memory for operand by WIC algorithm
-   * @param[in] index The operand index
-   * @param[in] size The size of the memory
-   */
-  void claim(const ir::OperandIndex &, size_t) override;
-  /**
-   * @brief Release memory for operand by WIC algorithm
-   * @param[in] index The operand index
-   */
-  void release(const ir::OperandIndex &) override;
-  /**
-   * @brief Get capacity for memory planning
-   * @return The value of capacity
-   */
-  uint32_t capacity() override
-  {
-    if (!_initialized)
-      buildMemoryPlans();
-    return _capacity;
-  }
-  /**
-   * @brief Get MemoryPlans
-   * @return MemoryPlans
-   */
-  MemoryPlans &memory_plans() override;
-
-private:
-  void buildMemoryPlans();
-
-  bool _initialized;
-  uint32_t _capacity;
-  MemoryPlans _mem_plans;
-  std::unordered_set<ir::OperandIndex> _live_operands;
-  ir::OperandIndexMap<std::vector<ir::OperandIndex>> _interference_graph;
-  // Sort operands by descending order of size
-  std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _operands;
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc b/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc
deleted file mode 100644
index 5208a94d4..000000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "MemoryPlanner.h"
-#include "ir/Index.h"
-
-TEST(Allocator, allocate_test)
-{
-  ::onert::backend::cpu_common::Allocator allocator(1024);
-  ASSERT_NE(allocator.base(), nullptr);
-}
-
-TEST(BumpPlanner, claim_test)
-{
-  ::onert::backend::cpu_common::BumpPlanner planner;
-
-  auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.claim(mem_idx, size);
-    auto mem_blk = planner.memory_plans()[mem_idx];
-    ASSERT_EQ(mem_blk.offset, expected_offset);
-    ASSERT_EQ(mem_blk.size, size);
-  };
-
-  claim(0, 10, 0);
-  claim(1, 20, 10);
-  claim(2, 30, 30);
-}
-
-TEST(FirstFitPlanner, claim_release_test)
-{
-  ::onert::backend::cpu_common::FirstFitPlanner planner;
-
-  auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.claim(mem_idx, size);
-    auto mem_blk = planner.memory_plans()[mem_idx];
-    ASSERT_EQ(mem_blk.offset, expected_offset);
-    ASSERT_EQ(mem_blk.size, size);
-  };
-
-  auto release = [&planner](uint32_t index) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.release(mem_idx);
-  };
-
-  // 0 CLAIM - 10
-  claim(0, 10, 0);
-
-  // 1 CLAIM - 20
-  claim(1, 20, 10);
-
-  // 2 CLAIM - 30
-  claim(2, 30, 30);
-
-  // 0 RELEASE - 10
-  release(0);
-
-  // 3 CLAIM - 20
-  claim(3, 20, 60);
-
-  // 4 CLAIM - 5
-  claim(4, 5, 0);
-
-  // 5 CLAIM - 10
-  claim(5, 10, 80);
-
-  // 6 CLAIM - 5
-  claim(6, 5, 5);
-
-  // 2 RELEASE - 30
-  release(2);
-
-  // 7 CLAIM - 35
-  claim(7, 35, 90);
-
-  // 8 CLAIM - 10
-  claim(8, 10, 30);
-
-  // 4 RELEASE - 5
-  release(4);
-
-  // 9 CLAIM - 10
-  claim(9, 10, 40);
-
-  // 10 CLAIM - 10
-  claim(10, 10, 50);
-
-  // 6 RELEASE
-  release(6);
-
-  // 1 RELEASE
-  release(1);
-
-  // 8 RELEASE
-  release(8);
-
-  // 9 RELEASE
-  release(9);
-
-  // 10 RELEASE
-  release(10);
-
-  // 3 RELEASE
-  release(3);
-
-  // 5 RELEASE
-  release(5);
-
-  // 7 RELEASE
-  release(7);
-}
-
-TEST(WICPlanner, claim_release_test)
-{
-  ::onert::backend::cpu_common::WICPlanner planner;
-
-  auto claim = [&planner](uint32_t index, size_t size) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.claim(mem_idx, size);
-  };
-
-  auto release = [&planner](uint32_t index) {
-    onert::ir::OperandIndex mem_idx(index);
-    planner.release(mem_idx);
-  };
-
-  auto verify = [&planner](uint32_t index, uint32_t size, uint32_t expected_offset) {
-    onert::ir::OperandIndex mem_idx(index);
-    auto mem_blk = planner.memory_plans()[mem_idx];
-    ASSERT_EQ(mem_blk.offset, expected_offset);
-    ASSERT_EQ(mem_blk.size, size);
-  };
-
-  auto capacity = [&planner](uint32_t expected_capacity) {
-    auto actual_capacity = planner.capacity();
-    ASSERT_EQ(actual_capacity, expected_capacity);
-  };
-
-  claim(0, 20);
-  claim(1, 5);
-  release(0);
-  claim(2, 10);
-  release(1);
-  claim(3, 10);
-  release(2);
-  claim(4, 10);
-  release(3);
-  claim(5, 20);
-  release(4);
-  claim(6, 20);
-  release(5);
-  release(7);
-
-  // VERIFY 0 - 0
-  verify(0, 20, 0);
-
-  // VERIFY 1 - 20
-  verify(1, 5, 20);
-
-  // VERIFY 2 - 0
-  verify(2, 10, 0);
-
-  // VERIFY 3 - 10
-  verify(3, 10, 10);
-
-  // VERIFY 4 - 20
-  verify(4, 10, 20);
-
-  // VERIFY 5 - 0
-  verify(5, 20, 0);
-
-  // VERIFY 6 - 20
-  verify(6, 20, 20);
-
-  // CAPACITY - 40
-  capacity(40);
-}
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc b/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc
deleted file mode 100644
index ead4f3294..000000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MemoryPlannerFactory.h"
-
-#include "MemoryPlanner.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-MemoryPlannerFactory &MemoryPlannerFactory::get()
-{
-  static MemoryPlannerFactory instance;
-  return instance;
-}
-
-IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
-{
-  if (key == "FirstFit")
-  {
-    return new FirstFitPlanner;
-  }
-  else if (key == "Bump")
-  {
-    return new BumpPlanner;
-  }
-  else if (key == "WIC")
-  {
-    return new WICPlanner;
-  }
-  return new FirstFitPlanner; // Default Planner
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h b/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h
deleted file mode 100644
index d14ec13ca..000000000
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-
-#include "backend/cpu_common/IMemoryPlanner.h"
-
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-class MemoryPlannerFactory
-{
-public:
-  static MemoryPlannerFactory &get();
-
-private:
-  MemoryPlannerFactory() = default;
-
-public:
-  IMemoryPlanner *create(const std::string &key);
-};
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
deleted file mode 100644
index 440f70c93..000000000
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/StaticTensorManager.h"
-
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         IDynamicTensorManager *dynamic_tensor_manager)
-    : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
-      _dynamic_tensor_manager{dynamic_tensor_manager}
-{
-  // DO NOTHING
-}
-
-void StaticTensorManager::allocateConsts(void)
-{
-  for (auto &pair : _tensors->native_tensors())
-  {
-    const auto &ind = pair.first;
-    auto tensor = pair.second;
-    if (_as_constants[ind])
-    {
-      auto mem_alloc = _const_mgr->allocate(ind, tensor->total_size());
-      tensor->setBuffer(mem_alloc);
-      auto buffer = mem_alloc->base();
-      VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
-                                              << "): " << static_cast<void *>(buffer)
-                                              << "size : " << tensor->total_size() << std::endl;
-    }
-  }
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
-  _nonconst_mgr->allocate();
-
-  for (auto &pair : _tensors->native_tensors())
-  {
-    const auto &ind = pair.first;
-    auto tensor = pair.second;
-    if (!_as_constants[ind] && !tensor->is_dynamic())
-    {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value()
-                                              << "): " << static_cast<void *>(buffer) << std::endl;
-    }
-  }
-}
-
-void StaticTensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
-                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
-                                      bool as_const)
-{
-  assert(!_tensors->getNativeTensor(ind));
-  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
-  _tensors->setNativeTensor(ind, tensor);
-  _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
-  assert(_tensors->getNativeTensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
-  assert(_tensors->getNativeTensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
-  for (const auto &it : _tensors->native_tensors())
-    fn(it.first);
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/cpu_common/Tensor.cc
deleted file mode 100644
index f34564dd9..000000000
--- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
-{
-  size_t rank = num_dimensions();
-  rank = rank == 0 ? 1 : rank;
-  size_t offset = 0;
-  for (size_t i = 0; i < rank; ++i)
-  {
-    offset = offset * dimension(i) + coords[i];
-  }
-  offset *= sizeOfDataType(data_type());
-  return offset;
-}
-
-void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
index db7a14a96..44442c065 100644
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -16,22 +16,17 @@
 
 #include "compiler/BackendManager.h"
 
-#include <memory>
-#include <dlfcn.h>
+#include "../backend/builtin/Backend.h"
+#include "../backend/builtin/Config.h"
 
-#include "backend/Backend.h"
-#include "backend/controlflow/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/IConfig.h"
-#include "util/logging.h"
-#include "util/ConfigSource.h"
-#include "misc/string_helpers.h"
+#include <dlfcn.h>
+#include <memory>
 
 static const char *SHARED_LIB_EXT =
 #if defined(__APPLE__) && defined(__MACH__)
-    ".dylib";
+  ".dylib";
 #else
-    ".so";
+  ".so";
 #endif
 
 namespace onert
@@ -45,20 +40,20 @@ BackendManager &BackendManager::get()
   return object;
 }
 
-BackendManager::BackendManager() { loadControlflowBackend(); }
+BackendManager::BackendManager() { loadBuiltinBackend(); }
 
-void BackendManager::loadControlflowBackend()
+void BackendManager::loadBuiltinBackend()
 {
-  auto backend_object = std::unique_ptr<backend::controlflow::Backend, backend_destroy_t>(
-      new backend::controlflow::Backend, [](backend::Backend *backend) { delete backend; });
+  auto backend_object = std::unique_ptr<backend::builtin::Backend, backend_destroy_t>(
+    new backend::builtin::Backend, [](backend::Backend *backend) { delete backend; });
 
   bool initialized = backend_object->config()->initialize(); // Call initialize here?
   if (!initialized)
   {
-    throw std::runtime_error(backend::controlflow::Config::ID + " backend initialization failed");
+    throw std::runtime_error(backend::builtin::Config::ID + " backend initialization failed");
   }
-  _controlflow = backend_object.get(); // Save the controlflow backend implementation pointer
-  assert(_controlflow);
+  _builtin = backend_object.get(); // Save the builtin backend implementation pointer
+  assert(_builtin);
   _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
 }
 
@@ -69,68 +64,67 @@ void BackendManager::loadBackend(const std::string &backend)
     return;
   }
 
-  // TODO Remove indentation
-  // Workaround If backend have dynamic library with "-boost" suffix naming,
-  //            BackendManager load library with "-boost" suffix instead of library without suffix
-  //            This feature is used for custom backend extension to support additional operations
-  {
-    const std::string backend_boost_so = "libbackend_" + backend + "-boost" + SHARED_LIB_EXT;
-    const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
+  const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
+  void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
 
-    void *handle = dlopen(backend_boost_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
-    if (handle == nullptr)
-    {
-      handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+  if (handle == nullptr)
+  {
+    VERBOSE(BackendManager) << "Failed to load backend '" << backend << "' - " << dlerror() << "\n";
+    return;
+  }
 
-      if (handle == nullptr)
-      {
-        VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl;
-        return;
-      }
+  VERBOSE(BackendManager) << "Successfully loaded '" << backend << "'(" << backend_so << ")\n";
 
-      VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n";
+  {
+    // load object creator function
+    auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
+    if (backend_create == nullptr)
+    {
+      // TODO replace `fprintf` with `VERBOSE`
+      fprintf(stderr, "BackendManager: unable to find function `onert_backend_create` : %s\n",
+              dlerror());
+      dlclose(handle);
+      return;
     }
-    else
+
+    // load object creator function
+    auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
+    if (backend_destroy == nullptr)
     {
-      VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_boost_so << "\n";
+      // TODO replace `fprintf` with `VERBOSE`
+      fprintf(stderr, "BackendManager: unable to find `function onert_backend_destroy` : %s\n",
+              dlerror());
+      dlclose(handle);
+      return;
     }
 
+    auto backend_object =
+      std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
+    bool initialized = backend_object->config()->initialize(); // Call initialize here?
+    if (!initialized)
     {
-      // load object creator function
-      auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
-      if (backend_create == nullptr)
-      {
-        fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n",
-                dlerror());
-        abort();
-      }
+      VERBOSE(BackendManager) << backend.c_str()
+                              << " backend initialization failed. Don't use this backend"
+                              << std::endl;
+      dlclose(handle);
+      return;
+    }
+    _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
+  }
 
-      // load object creator function
-      auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
-      if (backend_destroy == nullptr)
+  // Save backend handle (avoid warning by handle lost without dlclose())
+  auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{
+    handle, [id = backend, filename = backend_so](void *h) {
+      if (dlclose(h) == 0)
       {
-        fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n",
-                dlerror());
-        abort();
+        VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
       }
-
-      auto backend_object =
-          std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
-      bool initialized = backend_object->config()->initialize(); // Call initialize here?
-      if (!initialized)
+      else
       {
-        VERBOSE_F() << backend.c_str() << " backend initialization failed. Don't use this backend"
-                    << std::endl;
-        dlclose(handle);
-        return;
+        VERBOSE(BackendManager) << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
       }
-      _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
-    }
-
-    // Save backend handle (avoid warning by handle lost without dlclose())
-    auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }};
-    _handle_map.emplace(backend, std::move(u_handle));
-  }
+    }};
+  _handle_map.emplace(backend, std::move(u_handle));
 }
 
 backend::Backend *BackendManager::get(const std::string &key)
@@ -153,7 +147,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const
   return nullptr;
 }
 
-const backend::controlflow::Backend *BackendManager::getControlflow() const { return _controlflow; }
+const backend::Backend *BackendManager::getBuiltin() const { return _builtin; }
 
 } // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 93dbbc3b5..ba621bb4f 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -16,284 +16,185 @@
 
 #include "compiler/Compiler.h"
 
-#include "ParamChecker.h"
+#include "CompilerHelpers.h"
 #include "ExecutorFactory.h"
-#include "OperationValidator.h"
-#include "Fp32ToFp16Converter.h"
-
-#include <backend/controlflow/Config.h>
-#include "compiler/BackendManager.h"
-#include "compiler/IScheduler.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
-#include "compiler/StaticShapeInference.h"
-#include "exec/ExecTime.h"
-#include "ir/operation/LowerInfo.h"
-#include "dumper/dot/DotDumper.h"
-#include "compiler/Linear.h"
-#include "interp/InterpExecutor.h"
-#include "util/ConfigSource.h"
-#include "util/logging.h"
-#include "ir/OperationDumper.h"
-#include "misc/string_helpers.h"
+#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../exec/SingleModelExecutors.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
+
+#include "compiler/StaticShapeInferer.h"
+
+#include <misc/string_helpers.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
-
 namespace compiler
 {
 
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
+Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt)
+  : _model{model}, _options{&copt}
 {
-  CompilerOptions options;
-  options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
-  options.is_primary_subgraph = false;
-  options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
-  options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
-  options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE);
-  options.executor = util::getConfigString(util::config::EXECUTOR);
-  options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
-  options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
-  options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
-  options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
-#ifdef RUY_PROFILER
-  options.op_seq_max_node = 1;
-#endif
-
-  {
-    // Backend for all
-    auto &ms_options = options.manual_scheduler_options;
-
-    // Default value for op_backend_all is first element in the backend list
-    ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
-
-// Opcode to Backend
-#define OP(OpName)                                                                      \
-  {                                                                                     \
-    const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
-    if (!backend_str.empty())                                                           \
-    {                                                                                   \
-      ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str;                   \
-    }                                                                                   \
-  }
-#include "ir/Operations.lst"
-#undef OP
-
-    // Index to Backend
-    // TODO Support multiple subgraphs for manual scheduling
-    auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
-    auto key_val_list = nnfw::misc::split(map_str, ';');
-    for (const auto &key_val_str : key_val_list)
-    {
-      if (key_val_str.empty())
-      {
-        continue;
-      }
-
-      auto key_val = nnfw::misc::split(key_val_str, '=');
-      const auto &key_str = key_val.at(0);
-      const auto &val = key_val.at(1);
-      auto key = static_cast<uint32_t>(std::stoi(key_str));
-
-      subgs.at(ir::SubgraphIndex{0})
-          ->operations()
-          .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
-      ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
-    }
-  }
-  return options;
+  // DO NOTHING
 }
 
-Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs)
-    : _subgraphs{subgs}, _state{State::CREATED}
+Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                   std::vector<std::unique_ptr<CompilerOptions>> &copts)
+  : _model{nnpkg->primary_model()}, _options{copts[0].get()}
 {
-  // Set default values for CompilerOptions
-  // All these default values should not be fetched from Env, when we stop supporting Android NN
-  // API.
-  _options = fetchCompilerOptionsFromGlobalConfig(*subgs);
+  // Use for single model only
+  assert(nnpkg->model_count() == 1);
 }
 
-void Compiler::enableToFp16() { _options.fp16_enable = true; }
-
-void Compiler::checkProfilerConditions()
+std::shared_ptr<CompilerArtifact> Compiler::compile(void)
 {
-  if (!_options.he_scheduler)
-    throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
-
-  if (_options.executor != "Dataflow")
-    throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
-}
+  /***************************************************
+   * Prepare compilation phase
+   ***************************************************/
+  if (!_options)
+    throw std::runtime_error{"Empty compile option"};
 
-std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
-{
-  // Set control flow backend for control flow operators
+  // Mode check
+  // TODO handle option for each model
+  if (_options->he_profiling_mode)
   {
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] =
-        backend::controlflow::Config::ID;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] =
-        backend::controlflow::Config::ID;
+    if (!_options->he_scheduler)
+      throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
+
+    if (_options->executor != "Dataflow")
+      throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
   }
 
-  // FIXME This is a workaround for bcq operations, should remove it
+  if (!_options->minmax_filepath.empty())
   {
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+    if (_options->executor != "Linear")
+      throw std::runtime_error("Recording minmax works only with Linear executor");
   }
 
+  if (!_model->hasOnly<ir::Graph>())
   {
-    VERBOSE(Compiler) << std::boolalpha;
-    VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
-    VERBOSE(Compiler) << "backend_list             : "
-                      << nnfw::misc::join(_options.backend_list.begin(),
-                                          _options.backend_list.end(), "/")
-                      << std::endl;
-    VERBOSE(Compiler) << "trace_filepath           : " << _options.trace_filepath << std::endl;
-    VERBOSE(Compiler) << "graph_dump_level         : " << _options.graph_dump_level << std::endl;
-    VERBOSE(Compiler) << "op_seq_max_node          : " << _options.op_seq_max_node << std::endl;
-    VERBOSE(Compiler) << "executor                 : " << _options.executor << std::endl;
-    VERBOSE(Compiler) << "manual_scheduler_options : (Too many things to print)" << std::endl;
-    VERBOSE(Compiler) << "he_scheduler             : " << _options.he_scheduler << std::endl;
-    VERBOSE(Compiler) << "he_profiling_mode        : " << _options.he_profiling_mode << std::endl;
-    VERBOSE(Compiler) << "disable_compile          : " << _options.disable_compile << std::endl;
-    VERBOSE(Compiler) << "fp16_enable              : " << _options.fp16_enable << std::endl;
-    VERBOSE(Compiler) << std::noboolalpha;
+    throw std::runtime_error("Compiler can only compile models for inference.");
   }
 
-  /***************************************************
-   * Prepare compilation phase
-   ***************************************************/
+  _options->forceInternalOptions();
+  _options->verboseOptions();
 
-  auto executors = std::make_shared<exec::ExecutorMap>();
+  auto custom_kernel_builder = _model->getKernelBuilder();
 
-  // Compilable check
-  // TODO: Support hybrid execution -
-  //       execution between interpreter and compiled executor (including control flow)
-  if (!checkCompilable())
-  {
-    _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
-      executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
-    });
-    _state = State::COMPILED;
-    return executors;
-  }
+  _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) {
+    auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
 
-  // Mode check
-  if (_options.he_profiling_mode)
-    checkProfilerConditions();
+    // Mandatory passes
+    pass::PassRunner{}
+      .append(std::make_unique<pass::ConstantOutputPass>(subg))
+      .append(std::make_unique<pass::OddOutputPass>(subg))
+      .run();
+
+    // Optimizations
+    pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+  });
 
   /***************************************************
    * Backend independent analysis & optimization phase
    ***************************************************/
-  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+  // TODO Handle dump level for each model
+  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level);
+  onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+  // Tracing context
+  auto tracing_ctx = std::make_unique<util::TracingCtx>();
 
   // Lower: Assign backend
   std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
-  _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
-    _options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
-    onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
-    dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
-
-    // Lower: Assign backend
-    lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
-
-    // Check backend(s) for subgraph support FP16
-    bool backends_support_fp16 = true;
-    auto &contexts = (*lowered_subgs[index]).backend_contexts();
-    for (auto it = contexts.begin(); it != contexts.end(); it++)
-    {
-      // Controlflow backend is not for actual computaion of operations so it is an exception
-      if (it->first->config()->id() != backend::controlflow::Config::ID)
-        backends_support_fp16 &= it->first->config()->supportFP16();
-    }
-
-    if (_options.fp16_enable && backends_support_fp16)
-    {
-      // NOTE: the only acl_cl backend enables fp16 mode
-      Fp32ToFp16Converter(*lowered_subgs[index]).run();
-    }
+  {
+    _model->iterate([&](const ir::SubgraphIndex &subg_index, ir::IGraph &graph) {
+      auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
+
+      // Lower: Assign backend
+      lowered_subgs[subg_index] = std::make_unique<compiler::LoweredGraph>(subg, *_options);
+      // Set tracing_ctx for copied graph
+      if (tracing_ctx != nullptr)
+        tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value());
+    });
+  }
 
-    subg.setSubgraphs(nullptr);
-  });
+  _model.reset();
 
-  _subgraphs.reset();
+  for (const auto &pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    const auto &lowered_subg = pair.second;
+    dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value()));
+  }
 
   // Shape inference.
   {
+    // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+    // recursively
+    std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+      createStaticShapeInferers(lowered_subgs);
+
     const auto primary_subg_idx = ir::SubgraphIndex{0};
-    StaticShapeInferer inferer(primary_subg_idx, lowered_subgs);
-    lowered_subgs.at(primary_subg_idx)
-        ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-          auto has_dynamic_tensor = inferer.infer(op_seq);
-          op_seq.has_dynamic_tensor(has_dynamic_tensor);
-        });
-    inferer.dump();
-  }
+    inferers.at(primary_subg_idx)->infer();
 
-  /*************************************************************
-   *  Backend independent analysis & optimization phase finished
-   *************************************************************/
+    for (const auto &pair_inferer : inferers)
+    {
+      const auto inferer = pair_inferer.second.get();
+      inferer->dump();
+    }
+  }
 
-  // operation validation
-  for (auto &pair : lowered_subgs)
+  // Shape validation
+  // TODO Move shape independent feature check from ShapeValidator to OperationValidator
+  // TODO Move ShapeValidator into shape inference
+  //      - Check input tensor shape validation
+  //      - Check parameter value validation which valid value is depend on input tensor shape
+  //      - Output tensor shape validation check is needless because
+  //        static/dynamic shape inferer will make valid output shape
+  for (const auto &pair : lowered_subgs)
   {
     auto &lowered_subg = pair.second;
-    compiler::OperationValidator{lowered_subg->graph()}();
+    compiler::ShapeValidator{lowered_subg->graph()}();
   }
 
-  executors = std::make_shared<exec::ExecutorMap>();
-  for (auto &pair : lowered_subgs)
+  /*************************************************************
+   *  Backend independent analysis & optimization phase finished
+   *************************************************************/
+  auto executors = std::make_shared<exec::SingleModelExecutors>();
+  for (auto &&pair : lowered_subgs)
   {
-    const auto &subg_index = pair.first;
+    auto const model_index = ir::ModelIndex{0};
+    auto const subg_index = pair.first;
     auto &lowered_subg = pair.second;
-    auto indexed_ranks = lowered_subg->indexed_ranks();
-
-    _options.is_primary_subgraph = (subg_index == ir::SubgraphIndex{0});
-
-    onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
-    dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
+    auto const indexed_ranks = lowered_subg->indexed_ranks();
 
-    ir::OperationDumper dumper("START SUBGRAPH " + std::to_string(subg_index.value()));
+    ir::OperationDumper dumper("Executor generation of Subgraph " +
+                               std::to_string(subg_index.value()));
     lowered_subg->graph().operations().iterate(
-        [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
+      [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); });
+
+    ExecutorFactoryArgs args;
+    args.tracing_ctx = tracing_ctx.get();
+    args.options = _options;
+    args.model_index = model_index;
+    args.custom_kernel_builder = custom_kernel_builder;
     auto executor = std::unique_ptr<exec::IExecutor>{
-        ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
+      ExecutorFactory::get().create(std::move(lowered_subg), executors, args)};
     executor->setIndexedRanks(indexed_ranks);
-    executors->insert(std::make_pair(subg_index, std::move(executor)));
+    executors->emplace(model_index, subg_index, std::move(executor));
   }
 
   /********************************
    * Code generation phase finished
    ********************************/
-  _state = State::COMPILED;
-  return executors;
-}
-
-bool Compiler::checkCompilable()
-{
-  // Disable compile phase
-  // When ready to use interpreter backend, remove this config and use backend setting
-  if (_options.disable_compile)
-  {
-    return false;
-  }
-
-  // TODO check unspecified operand shape
-
-  // Check compilable parameter
-  for (uint32_t i = 0; i < _subgraphs->count(); ++i)
-  {
-    auto graph = _subgraphs->at(ir::SubgraphIndex{i});
-    ParamChecker paramChecker{graph};
-    paramChecker();
-    if (paramChecker.haveNoneConstParam())
-    {
-      return false;
-    }
-  }
-
-  return true;
+  return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
 }
 
 } // namespace compiler
-
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/CompilerFactory.cc b/runtime/onert/core/src/compiler/CompilerFactory.cc
new file mode 100644
index 000000000..aeb0876c4
--- /dev/null
+++ b/runtime/onert/core/src/compiler/CompilerFactory.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerFactory.h"
+
+#include "MultiModelCompiler.h"
+#ifdef ONERT_TRAIN
+#include "train/TrainingCompiler.h"
+#endif // ONERT_TRAIN
+
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+CompilerFactory &CompilerFactory::get()
+{
+  static CompilerFactory singleton;
+  return singleton;
+}
+
+std::unique_ptr<ICompiler>
+CompilerFactory::create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                        std::vector<std::unique_ptr<CompilerOptions>> &copts,
+                        const compiler::train::TrainingInfo *training_info)
+{
+#ifdef ONERT_TRAIN
+  // Returing compiler for training
+  if (training_info)
+    return std::make_unique<train::TrainingCompiler>(nnpkg, copts, *training_info);
+#else  // ONERT_TRAIN
+  (void)training_info;
+#endif // ONERT_TRAIN
+
+  // Returing compiler for inference
+  if (nnpkg->model_count() == 1)
+    return std::make_unique<Compiler>(nnpkg, copts);
+
+  return std::make_unique<MultiModelCompiler>(nnpkg, copts);
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/CompilerHelpers.h b/runtime/onert/core/src/compiler/CompilerHelpers.h
new file mode 100644
index 000000000..798334b3b
--- /dev/null
+++ b/runtime/onert/core/src/compiler/CompilerHelpers.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_HELPERS_H__
+#define __ONERT_COMPILER_COMPILER_HELPERS_H__
+
+#include <compiler/ILoweredGraph.h>
+#include <compiler/StaticShapeInferer.h>
+#include <ir/Index.h>
+
+#include <memory>
+#include <unordered_map>
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief     Create a shape inferer map for a lowered model
+ * @param[in] lowered_subgs lowered model map
+ * @return    Shape inferer map
+ */
+template <typename LoweredGraphType,
+          typename = std::enable_if_t<std::is_base_of<ILoweredGraph, LoweredGraphType>::value>>
+static std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+createStaticShapeInferers(
+  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraphType>> &lowered_subgs)
+{
+  std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> lsubgs;
+  for (auto &&e : lowered_subgs)
+    lsubgs[e.first] = e.second.get();
+  return StaticShapeInferer::createStaticShapeInferers(lsubgs);
+}
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_HELPERS_H__
diff --git a/runtime/onert/core/src/compiler/CompilerOptions.cc b/runtime/onert/core/src/compiler/CompilerOptions.cc
new file mode 100644
index 000000000..830d9dd00
--- /dev/null
+++ b/runtime/onert/core/src/compiler/CompilerOptions.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerOptions.h"
+
+#include "../backend/builtin/Backend.h"
+
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/string_helpers.h>
+
+namespace
+{
+
+using namespace onert;
+
+std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
+{
+  std::unordered_map<ir::OpCode, std::string>::iterator it;
+  std::string opbackends;
+
+  for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
+  {
+    if (!opbackends.empty())
+      opbackends = opbackends + ", ";
+
+    auto opcode = it->first;
+    const std::string opname = ir::toString(opcode);
+    opbackends += opname + "=" + it->second;
+  }
+  return opbackends;
+}
+
+} // namespace
+
+namespace onert
+{
+namespace compiler
+{
+
+void ManualSchedulerOptions::setBackendMap(const std::string &str)
+{
+  // TODO Support multiple subgraphs for manual scheduling
+  auto key_val_list = nnfw::misc::split(str, ';');
+  for (const auto &key_val_str : key_val_list)
+  {
+    if (key_val_str.empty())
+    {
+      continue;
+    }
+
+    auto key_val = nnfw::misc::split(key_val_str, '=');
+    const auto &key_str = key_val.at(0);
+    const auto &val = key_val.at(1);
+    auto key = static_cast<uint32_t>(std::stoi(key_str));
+    this->index_to_backend.emplace(ir::OperationIndex{key}, val);
+  }
+}
+
+std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
+{
+  auto o = std::make_unique<CompilerOptions>();
+  o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+  o->minmax_filepath = util::getConfigString(util::config::MINMAX_FILEPATH);
+  o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+  o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+  o->executor = util::getConfigString(util::config::EXECUTOR);
+  o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+  o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+  o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
+  {
+    // Backend for all
+    auto &ms_options = o->manual_scheduler_options;
+
+    // Default value for op_backend_all is first element in the backend list
+    ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
+
+// Opcode to Backend
+#define OP(OpName)                                                                      \
+  {                                                                                     \
+    const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
+    if (!backend_str.empty())                                                           \
+    {                                                                                   \
+      ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str;                   \
+    }                                                                                   \
+  }
+#include "ir/Operations.lst"
+#undef OP
+
+    // Index to Backend
+    auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
+    ms_options.setBackendMap(map_str);
+  }
+  return o;
+}
+
+void CompilerOptions::forceInternalOptions()
+{
+  // Set control flow backend for control flow operators
+  auto &builtin_id = backend::builtin::Config::ID;
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
+
+  // FIXME This is a workaround for bcq operations, should remove it
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+
+  // FIXME This is a workaround for bulk operations, should remove it
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
+}
+
+void CompilerOptions::verboseOptions()
+{
+  VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
+  VERBOSE(Compiler) << "backend_list             : "
+                    << nnfw::misc::join(backend_list.begin(), backend_list.end(), "/") << std::endl;
+  VERBOSE(Compiler) << "trace_filepath           : " << trace_filepath << std::endl;
+  VERBOSE(Compiler) << "graph_dump_level         : " << graph_dump_level << std::endl;
+  VERBOSE(Compiler) << "executor                 : " << executor << std::endl;
+  VERBOSE(Compiler) << "manual backend_for_all   : " << manual_scheduler_options.backend_for_all
+                    << std::endl;
+  VERBOSE(Compiler) << "manual_scheduler_options : "
+                    << getOpBackends(manual_scheduler_options.opcode_to_backend) << std::endl;
+  VERBOSE(Compiler) << "he_scheduler             : " << he_scheduler << std::endl;
+  VERBOSE(Compiler) << "he_profiling_mode        : " << he_profiling_mode << std::endl;
+  VERBOSE(Compiler) << "fp16_enable              : " << fp16_enable << std::endl
+                    << std::noboolalpha;
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index 062c6c9c3..6a08524cc 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,26 +16,37 @@
 
 #include "ExecutorFactory.h"
 
+#include "Linear.h"
+#include "../backend/builtin/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/UserTensor.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../exec/DataflowExecutor.h"
+#include "../exec/ExecTime.h"
+#include "../exec/ExecutionObservers.h"
+#include "../exec/LinearExecutor.h"
+#ifdef MINMAX_H5DUMPER
+#include "../exec/MinMaxRecorder.h"
+#endif
+#include "../exec/ParallelExecutor.h"
+#include "../ir/OperationCloner.h"
+
+#include <backend/IPortableTensor.h>
+#include <compiler/BackendManager.h>
+#include <compiler/ExecutionBuilder.h>
+#include <util/TracingCtx.h>
+
 #include <functional>
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "compiler/BackendManager.h"
-#include "compiler/ExecutionBuilder.h"
-#include "exec/ExecTime.h"
-#include "compiler/Linear.h"
-#include "compiler/TensorBuilders.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/IKernelGenerator.h"
-#include "backend/IOptimizer.h"
-#include "backend/ITensorRegister.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/KernelGenerator.h"
-#include "backend/controlflow/UserTensor.h"
-#include "backend/controlflow/TensorBuilder.h"
 #include <memory>
 
+#ifdef ONERT_TRAIN
+#include "../backend/builtin/train/BackendContext.h"
+#include "../exec/train/TrainableExecutor.h"
+
+#include <backend/train/TrainableBackendContext.h>
+#include <backend/train/ITrainableBackend.h>
+#endif // ONERT_TRAIN
+
 namespace onert
 {
 namespace
@@ -46,7 +57,7 @@ class SyncFunction final : public exec::IFunction
 public:
   virtual ~SyncFunction() = default;
   SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
-      : _fn{std::move(fn)}, _config{config}
+    : _fn{std::move(fn)}, _config{config}
   {
     assert(_fn);
     assert(_config);
@@ -65,21 +76,218 @@ private:
   std::shared_ptr<backend::IConfig> _config;
 };
 
-// TODO Think of a better way to manage TensorManagers
-backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
+using DeallocList = std::vector<backend::ITensor *>;
+// Deallocation after execution of an operation used by Linear Executor
+class DeallocFunction final : public exec::IFunction
+{
+public:
+  DeallocFunction(const DeallocList &tensors) : _dealloc_list{tensors} {}
+
+  void run() override
+  {
+    for (auto &&tensor : _dealloc_list)
+    {
+      if (!tensor->is_dynamic())
+        continue;
+      tensor->deallocBuffer();
+    }
+  }
+
+private:
+  DeallocList _dealloc_list;
+};
+
+// TODO Unify initializeSubgraphIOTensors
+void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph,
+                                 const backend::BackendContexts &backend_contexts,
+                                 const ir::OperandIndexSequence &indices)
+{
+  // TODO Store builtin backend in BackendContext
+  std::shared_ptr<backend::builtin::TensorRegistry> builtin_tensor_reg;
+  for (const auto &e : backend_contexts)
+  {
+    auto backend = e.first;
+    auto &context = e.second;
+    if (backend->config()->id() == backend::builtin::Config::ID)
+    {
+      builtin_tensor_reg =
+        std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(context->tensor_registry);
+    }
+  }
+  assert(builtin_tensor_reg);
+
+  for (auto &&ind : indices)
+  {
+    const auto &operand = lowered_graph.graph().operands().at(ind);
+    auto tensor = std::make_unique<backend::builtin::IOTensor>(
+      operand.info(),
+      ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */
+    );
+
+    // Add tensor to builtin TensorRegistry.
+    builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+  }
+}
+
+#ifdef ONERT_TRAIN
+void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph,
+                                 const backend::train::TrainableBackendContexts &backend_contexts,
+                                 const ir::OperandIndexSequence &indices)
 {
-  backend::TensorManagerSet tensor_mgrs;
-  for (auto &tensor_builder : tensor_builders)
+  std::shared_ptr<backend::builtin::train::TensorRegistry> builtin_tensor_reg;
+  for (const auto &e : backend_contexts)
   {
-    auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
-    if (s_tensor_manager != nullptr)
-      tensor_mgrs.insert(std::move(s_tensor_manager));
+    auto backend = e.first;
+    auto &context = e.second;
+    if (backend->config()->id() == backend::builtin::Config::ID)
+    {
+      builtin_tensor_reg = std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>(
+        context->tensor_registry());
+    }
+  }
+  assert(builtin_tensor_reg);
+
+  for (auto &&ind : indices)
+  {
+    const auto &operand = lowered_graph.graph().operands().at(ind);
+    auto tensor = std::make_unique<backend::builtin::IOTensor>(
+      operand.info(),
+      ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */
+    );
+
+    // Add tensor to builtin TensorRegistry.
+    builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+  }
+}
+#endif // ONERT_TRAIN
+
+backend::BackendContexts
+createBackendContexts(compiler::ILoweredGraph &lgraph, bool linear_executor,
+                      std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder)
+{
+  backend::BackendContexts contexts;
+  auto &backend_manager = compiler::BackendManager::get();
+
+  std::unordered_map<const backend::Backend *, backend::ContextData> context_data_map;
+
+  // Generate partial graphs for each backend
+  for (auto &&backend : backend_manager.getAll())
+  {
+    auto &data = context_data_map[backend];
+    auto graph = std::make_unique<ir::Graph>();
+    graph->setLayout(lgraph.graph().layout());
+    data.graph = std::move(graph);
+  }
+
+  auto &whole_graph = lgraph.graph();
+  // Separate operands into partial graphs
+  whole_graph.operands().iterate([&](const ir::OperandIndex &operand_ind, ir::Operand &operand) {
+    auto &operand_li = lgraph.lower_info().operand;
+    const auto &def_factors = operand_li.at(operand_ind).def_factors();
+    if (def_factors.size() == 0) // Ignore unused tensor
+      return;
+    const auto &def_factor = def_factors.getOnlyElement();
+    const auto backend = def_factor.backend();
+    auto &partial_graph = *context_data_map[backend].graph;
+    auto &operand_layouts = context_data_map[backend].operand_layouts;
+    assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+    operand_layouts[operand_ind] = def_factor.layout();
+
+    // Copy the operand and insert it to the partial graph
+    auto new_operand = std::make_unique<ir::Operand>(operand);
+    new_operand->clearDefUse();
+    operand.releaseData(); // Deref data of LoweredGraph
+    auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+    UNUSED_RELEASE(new_operand_ind);
+    assert(new_operand_ind == operand_ind);
+  });
+  // Separate operations into partial graphs
+  whole_graph.operations().iterate(
+    [&](const ir::OperationIndex &op_ind, const ir::IOperation &operation) {
+      auto &op_li = lgraph.lower_info().operation;
+      auto backend = op_li.at(op_ind).backend();
+      auto &partial_graph = *context_data_map[backend].graph;
+      auto &external_operands = context_data_map[backend].external_operands;
+      auto &operand_layouts = context_data_map[backend].operand_layouts;
+
+      {
+        // Add missing operands (externals)
+        auto io_list = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED |
+                       ir::Remove::UNDEFINED;
+        for (auto &&operand_ind : io_list)
+        {
+          if (partial_graph.operands().exist(operand_ind))
+            continue;
+
+          // Copy the operand and insert it to the partial graph
+          const auto &operand = whole_graph.operands().at(operand_ind);
+          auto new_operand = std::make_unique<ir::Operand>(operand);
+          new_operand->clearDefUse();
+          auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+          UNUSED_RELEASE(new_operand_ind);
+          assert(new_operand_ind == operand_ind);
+
+          auto layout =
+            lgraph.lower_info().operand.at(operand_ind).def_factors().getOnlyElement().layout();
+          assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+          operand_layouts[operand_ind] = layout;
+          external_operands.add(operand_ind);
+        }
+
+        auto new_op_ind = partial_graph.addOperation(op_ind, clone(operation));
+        UNUSED_RELEASE(new_op_ind);
+        assert(new_op_ind == op_ind);
+      }
+    });
+
+  // Create contexts
+  auto whole_op_order = lgraph.graph().topolSortOperations();
+  for (auto &&pair : context_data_map)
+  {
+    auto backend = pair.first;
+    auto &data = pair.second;
+    // Handle graph input/outputs or external tensors
+    data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      if (whole_graph.getInputs().contains(ind) || whole_graph.getOutputs().contains(ind))
+        data.external_operands.add(ind);
+      // Inputs are either "graph input" or "no def op and non-constant"
+      if (whole_graph.getInputs().contains(ind) ||
+          (!operand.getDef().valid() && !operand.isConstant()))
+        // Outputs are either "graph output" or "no uses"
+        data.graph->addInput(ind);
+      if (whole_graph.getOutputs().contains(ind) || operand.getUses().size() == 0)
+        data.graph->addOutput(ind);
+    });
+    dumper::text::dumpGraph(*data.graph);
+
+    std::copy_if(whole_op_order.begin(), whole_op_order.end(), std::back_inserter(data.op_order),
+                 [&](const auto &ind) { return data.graph->operations().exist(ind); });
+    data.is_linear_executor = linear_executor;
+    data.custom_kernel_builder = custom_kernel_builder;
+    contexts.emplace(backend, backend->newContext(std::move(data)));
+  }
+  return contexts;
+}
+
+template <typename Context>
+std::deque<std::pair<const backend::Backend *, Context *>> orderBackendContext(
+  const std::unordered_map<const backend::Backend *, std::unique_ptr<Context>> &tbackend_contexts)
+{
+  std::deque<std::pair<const backend::Backend *, Context *>> ordered_contexts;
 
-    auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
-    if (d_tensor_manager != nullptr)
-      tensor_mgrs.insert(std::move(d_tensor_manager));
+  for (auto &&pair : tbackend_contexts)
+  {
+    // NOTE builtin backend must be processed lastly.
+    // This is because of Permute layer's specialty which is the only operation that could have
+    // different ITensor objects for the input and the output. And it requires all other backends'
+    // tensors are ready to use.
+    if (pair.first->config()->id() == "builtin")
+      ordered_contexts.emplace_back(pair.first, pair.second.get());
+    else
+      ordered_contexts.emplace_front(pair.first, pair.second.get());
   }
-  return tensor_mgrs;
+
+  return ordered_contexts;
 }
 
 } // namespace
@@ -106,415 +314,588 @@ ExecutorFactory::ExecutorFactory()
 }
 
 exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                                         const compiler::CompilerOptions &options,
-                                         const std::shared_ptr<exec::ExecutorMap> &executor_map)
+                                         const std::shared_ptr<exec::IExecutors> &executors,
+                                         const ExecutorFactoryArgs &args)
 {
-  return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
+  assert(args.options != nullptr);
+  return _map.at(args.options->executor)(std::move(lowered_graph), executors, args);
 }
 
-void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
+void ExecutorFactory::prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph,
+                                            const backend::BackendContexts &backend_contexts)
 {
-  struct Entry
-  {
-    std::vector<backend::BackendContext::OperationInfo> operation_list;
-    std::vector<ir::OperandIndex> operand_list;
-  };
-  std::unordered_map<const backend::Backend *, Entry> backend_assets;
-
-  // Build lists for operations
-  lowered_graph->op_seqs().iterate(
-      [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
-        auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
-        auto backend = op_seq_li.at(op_seq_index)->backend();
-        for (auto &operation_idx : op_seq.operations())
+  TensorRegistries tensor_regs{backend_contexts, true};
+
+  lowered_graph.graph().operations().iterate(
+    [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+      auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind);
+      auto &backend_ctx = backend_contexts.at(lower_info->backend());
+      for (auto &&ind :
+           (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+      {
+        // If an Operation's input/output tensor does not have an own tensor object,
+        // it must be using migrant tensors, so find the tensor from other tensor registries and
+        // register it to the current tensor registry if it is portable
+        if (!backend_ctx->tensor_registry->getITensor(ind))
         {
-          backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
+          auto tensor = tensor_regs.getITensor(ind);
+          assert(tensor); // The tensor must have been registered
+          auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+          if (ptensor)
+            backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
         }
-      });
+      }
+    });
+}
 
-  // Build lists for operands
-  lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-    const auto lower_info = lowered_graph->getLowerInfo(ind);
-    for (auto factor : lower_info->def_factors())
+void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+                                            const std::shared_ptr<exec::IExecutors> &executors,
+                                            const backend::BackendContexts &backend_contexts,
+                                            const ir::ModelIndex &index)
+{
+  for (auto &&pair : backend_contexts)
+  {
+    auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+    if (builtin_context != nullptr)
     {
-      auto backend = factor.backend();
-      backend_assets[backend].operand_list.emplace_back(ind);
+      auto builtin_kernel_gen = builtin_context->kernel_gen;
+      builtin_kernel_gen->setTensorRegistries(tensor_regs);
+      builtin_kernel_gen->setExecutors(executors);
+      builtin_kernel_gen->setModelIndex(index);
     }
-  });
+  }
+}
 
-  for (auto &pair : backend_assets)
+std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_contexts)
+{
+  std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
+  for (auto &&pair : backend_contexts)
   {
-    auto backend = pair.first;
-    auto &arg = pair.second;
-    lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
+    // NOTE builtin backend must be processed lastly.
+    // This is because of Permute layer's specialty which is the only operation that could have
+    // different ITensor objects for the input and the output. And it requires all other backends'
+    // tensors are ready to use.
+    if (pair.first->config()->id() == "builtin")
+      ordered_contexts.emplace_back(pair.first, pair.second.get());
+    else
+      ordered_contexts.emplace_front(pair.first, pair.second.get());
   }
+  return ordered_contexts;
 }
 
-void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
-                                            const std::vector<ir::OpSequenceIndex> &order)
+exec::IExecutor *
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                                      const std::shared_ptr<exec::IExecutors> &executors,
+                                      const ExecutorFactoryArgs &args)
 {
-  for (const auto index : order)
+  const auto options = args.options;
+  const auto &model_index = args.model_index;
+  const auto tracing_ctx = args.tracing_ctx;
+  auto custom_kernel_builder = args.custom_kernel_builder;
+  auto &graph = lowered_graph->graph();
+
+  backend::BackendContexts backend_contexts =
+    createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder);
+
+  TensorRegistries tensor_regs{backend_contexts, true};
+
+  initializeSubgraphIOTensors(
+    *lowered_graph, backend_contexts,
+    (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+      ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+
+  // linearize
+  auto order = Linear::linearize(*lowered_graph);
+  Linear::dump(*lowered_graph, order);
+
+  for (auto &&pair : backend_contexts)
   {
-    const auto &op_seq = lowered_graph->op_seqs().at(index);
-    const auto backend = lowered_graph->getLowerInfo(index)->backend();
-    const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
-    auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
-    auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+    pair.second->genTensors();
+  }
+
+  prepareMigrantTensors(*lowered_graph, backend_contexts);
 
-    if (tensor_register)
+  // Give some runtime objects to builtin KernelGenerator
+  prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index);
+
+  ExecutionBuilder builder;
+
+  // Adjust the order of backends for the upcoming iteration
+  auto ordered_contexts = orderBackendContext(backend_contexts);
+
+  // Simulate the execution for deallocation of tensors
+  std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
+  {
+    ir::OperandIndexMap<uint32_t> uses_map;
+    ir::OperandIndexSequence constants;
+
+    auto model_io =
+      (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+    // Prepare scanning
+    graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      uses_map[ind] = obj.getUses().size();
+
+      if (obj.isConstant())
+        constants.append(ind);
+    });
+
+    // A trick to consider constants as an execption
+    for (const auto &ind : constants)
     {
-      // Custom registration
-      tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo());
+      uses_map[ind]++;
     }
-    else
+
+    for (const auto &op_ind : order)
     {
-      // Default registration
-      for (const auto op_idx : op_seq)
+      const auto &op = graph.operations().at(op_ind);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      for (const auto &ind : op_inputs)
       {
-        const auto &op = lowered_graph->graph().operations().at(op_idx);
-        for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
+        const auto &operand = graph.operands().at(ind);
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
         {
-          if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
-          {
-            const auto &operand_lower_info =
-                lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
-
-            // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
-            // op.getOutputs() of permute (CPU) returns tensor A
-            // but tensor A belongs to the backend of acl_cl.
-            // So, we have to make this tensor NOT registered for CPU.
-            if (operand_lower_info.backend() != backend)
-              continue;
-
-            const auto &obj = lowered_graph->graph().operands().at(index);
-            const auto frontend_layout = op_seq.getLayout();
-            const auto backend_layout = operand_lower_info.layout();
-            ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                         obj.typeInfo(), obj.info().memAllocType(),
-                                         obj.isConstant()};
-            tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
-          }
+          dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
         }
       }
     }
-  }
-}
 
-std::vector<std::shared_ptr<backend::ITensor>>
-ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
-                                          const ir::OperandIndexSequence &indices)
-{
-  std::vector<std::shared_ptr<backend::ITensor>> ret;
+    // Dispose and validate
+    for (const auto &ind : constants)
+    {
+      --uses_map[ind];
+    }
 
-  // TODO Store controlflow backend in BackendContext
-  std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
-  std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
-  for (const auto &e : lowered_graph.backend_contexts())
+    assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+  }
+
+  // Generate kernels
+  for (auto &&pair : ordered_contexts)
   {
-    auto backend = e.first;
-    auto &context = e.second;
-    if (backend->config()->id() == backend::controlflow::Config::ID)
+    auto codes = pair.second->genKernels();
+    for (auto &&pair : codes)
     {
-      cf_tensor_builder =
-          std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
-      cf_tensor_reg =
-          std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+      auto &op_ind = pair.first;
+      auto &fn_seq = pair.second;
+      auto &op = lowered_graph->graph().operations().at(op_ind);
+      auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+      if (options->he_profiling_mode)
+        fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+      if (!dealloc_list_map[op_ind].empty())
+        fn_seq->append(std::make_unique<DeallocFunction>(dealloc_list_map[op_ind]));
+      builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
     }
   }
-  assert(cf_tensor_builder);
-  assert(cf_tensor_reg);
 
-  for (auto ind : indices)
+  auto code_map = builder.releaseCodeMap();
+
+  auto exec = new exec::LinearExecutor{std::move(lowered_graph),
+                                       std::move(backend_contexts),
+                                       tensor_regs,
+                                       std::move(code_map),
+                                       order,
+                                       tracing_ctx};
+
+  if (!options->trace_filepath.empty())
   {
-    const auto &operand = lowered_graph.graph().operands().at(ind);
-    auto tensor = std::make_shared<backend::controlflow::UserTensor>(
-        operand.info(),
-        ir::Layout::NHWC, /* FIXME find op_seq for this operand and use frontend_layout */
-        cf_tensor_builder->dynamicTensorManager());
-
-    // Add tensor to controlflow TensorRegistry.
-    cf_tensor_reg->setNativeUserTensor(ind, tensor);
-    ret.push_back(tensor);
+    std::unique_ptr<exec::IExecutionObserver> ctp =
+      std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
+    exec->addObserver(std::move(ctp));
   }
-  return ret;
-}
+#ifdef MINMAX_H5DUMPER
+  if (!options->minmax_filepath.empty())
+    exec->addObserver(std::make_unique<exec::MinMaxRecorder>(
+      options->minmax_filepath, exec->graph(), exec->getBackendContexts()));
+#endif
 
-void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
-{
-  TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
-
-  lowered_graph.op_seqs().iterate(
-      [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
-        auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
-        auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
-        for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
-                            ir::Remove::UNDEFINED)
-        {
-          // If an OpSequence input/output tensor does not have a own tensor object,
-          // it must be using external tensors, so find the tensor from other tensor builders and
-          // set the tensor to this tensor builder if portable
-          if (!backend_ctx->tensor_registry->getITensor(ind))
-          {
-            auto tensor = tensor_regs.getITensor(ind);
-            assert(tensor); // The tensor must have been registered
-            auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
-            if (ptensor)
-              backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
-          }
-        }
-      });
+  return exec;
 }
 
 exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                                      const compiler::CompilerOptions &options,
-                                      const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ExecutorFactory::createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                                        const std::shared_ptr<exec::IExecutors> &executors,
+                                        const ExecutorFactoryArgs &args, bool parallel)
 {
-  const auto &backend_contexts = lowered_graph->backend_contexts();
+  const auto options = args.options;
+  const auto &model_index = args.model_index;
+  const auto tracing_ctx = args.tracing_ctx;
+  auto custom_kernel_builder = args.custom_kernel_builder;
 
-  initializeBackendContext(lowered_graph.get());
+  backend::BackendContexts backend_contexts =
+    createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder);
 
-  // linearize
-  assert(!lowered_graph->graph().isBuildingPhase());
+  TensorRegistries tensor_regs{backend_contexts, true};
 
-  /*************************************************
-   * Backend dependent analysis & optimization phase
-   *************************************************/
+  initializeSubgraphIOTensors(
+    *lowered_graph, backend_contexts,
+    (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+      ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
 
-  for (auto &pair : backend_contexts)
+  for (auto &&pair : backend_contexts)
   {
-    auto &optimizer = pair.second->optimizer;
-    if (optimizer)
-      optimizer->optimize();
+    pair.second->genTensors();
   }
 
-  /**********************************************************
-   * Backend dependent analysis & optimization phase finished
-   **********************************************************/
+  prepareMigrantTensors(*lowered_graph, backend_contexts);
 
-  /***********************
-   * Code generation phase
-   ***********************/
+  // Give some runtime objects to builtin KernelGenerator
+  prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index);
 
-  auto order = Linear::linearize(*lowered_graph);
-  runTensorRegistration(lowered_graph.get(), order);
+  ExecutionBuilder builder;
 
-  std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
-  if (options.is_primary_subgraph)
+  // Adjust the order of backends for the upcoming iteration
+  auto ordered_contexts = orderBackendContext(backend_contexts);
+
+  // Generate kernels
+  for (auto &&pair : ordered_contexts)
   {
-    input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
-    output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
+    auto codes = pair.second->genKernels();
+    for (auto &&pair : codes)
+    {
+      auto &op_ind = pair.first;
+      auto &fn_seq = pair.second;
+      auto &op = lowered_graph->graph().operations().at(op_ind);
+      auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+      if (options->he_profiling_mode)
+        fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+      builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
+    }
   }
 
-  Linear::dump(*lowered_graph, order);
-  Linear::planTensors(*lowered_graph, order);
+  auto code_map = builder.releaseCodeMap();
 
-  TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
-  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
+  exec::ExecutorBase *exec = nullptr;
+  if (parallel)
+  {
+    exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts),
+                                      tensor_regs, std::move(code_map), tracing_ctx};
+  }
+  else
+  {
+    auto dataflow_exec =
+      new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+                                 std::move(code_map), tracing_ctx};
+    if (options->he_profiling_mode)
+    {
+      std::vector<const backend::Backend *> backends;
+      for (const auto &pair : backend_contexts)
+      {
+        backends.push_back(pair.first);
+      }
+      auto et = std::make_shared<exec::ExecTime>(backends);
+      std::unique_ptr<exec::IExecutionObserver> obs =
+        std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
+      dataflow_exec->addObserver(std::move(obs));
+    }
+    exec = dataflow_exec;
+  }
 
-  for (auto &tensor_builder : tensor_builders)
+  if (!options->trace_filepath.empty())
   {
-    tensor_builder->prepare();
+    std::unique_ptr<exec::IExecutionObserver> ctp =
+      std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
+    exec->addObserver(std::move(ctp));
   }
 
-  prepareExternalTensors(*lowered_graph);
+  return exec;
+}
 
-  ExecutionBuilder builder;
+#ifdef ONERT_TRAIN
+exec::IExecutor *
+ExecutorFactory::create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+                        const std::shared_ptr<exec::IExecutors> &executors,
+                        const ExecutorFactoryArgs &args,
+                        const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer)
+{
+  assert(args.options != nullptr);
 
-  // Generate kernels
-  lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
-                                        const ir::OpSequence &op_seq) {
-    auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
-    auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
-    // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
-    auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
-    if (cf_kernel_gen != nullptr)
+  if (args.options->executor != "Linear")
+    throw std::runtime_error("ExecutorFactory: TrainableExecutor supports only 'Linear' now");
+
+  return createTrainableExecutor(std::move(lowered_graph), executors, args, optimizer);
+}
+
+void ExecutorFactory::prepareMigrantTensors(
+  compiler::ILoweredGraph &lowered_graph,
+  const backend::train::TrainableBackendContexts &backend_contexts)
+{
+  train::TensorRegistries tensor_regs{backend_contexts, true};
+
+  lowered_graph.graph().operations().iterate(
+    [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+      auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind);
+      auto &backend_ctx = backend_contexts.at(lower_info->backend());
+      for (auto &&ind :
+           (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+      {
+        // If an Operation's input/output tensor does not have an own tensor object,
+        // it must be using migrant tensors, so find the tensor from other tensor registries and
+        // register it to the current tensor registry if it is portable
+        if (!backend_ctx->tensor_registry()->getITensor(ind))
+        {
+          auto tensor = tensor_regs.getITensor(ind);
+          assert(tensor); // The tensor must have been registered
+          auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+          if (ptensor)
+            backend_ctx->tensor_registry()->setMigrantTensor(ind, ptensor);
+        }
+      }
+    });
+}
+
+exec::IExecutor *ExecutorFactory::createTrainableExecutor(
+  std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+  const std::shared_ptr<exec::IExecutors> &, const ExecutorFactoryArgs &args,
+  const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer)
+{
+  const auto options = args.options;
+  const auto tracing_ctx = args.tracing_ctx;
+  auto custom_kernel_builder = args.custom_kernel_builder;
+
+  auto &graph = lowered_graph->graph();
+
+  lowered_graph->trainable_graph().operations().iterate([](const onert::ir::OperationIndex &,
+                                                           const onert::ir::IOperation &op) {
+    try
     {
-      cf_kernel_gen->setTensorRegistries(tensor_regs);
-      cf_kernel_gen->setExecutorMap(executor_map);
+      UNUSED_RELEASE(dynamic_cast<const ir::train::ITrainableOperation &>(op));
     }
-    auto fn_seq = kernel_gen->generate(op_seq);
-    if (options.he_profiling_mode)
+    catch (std::bad_cast &)
     {
-      fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+      throw std::runtime_error("ExecutorFactory: " + op.name() + " is not trainable operation yet");
     }
-    builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
   });
 
-  for (auto &tensor_builder : tensor_builders)
-  {
-    tensor_builder->allocate();
-  }
+  // TODO Create context only once instead of replacing
+  backend::train::TrainableBackendContexts tbackend_contexts;
+  backend::BackendContexts base_backend_contexts =
+    createBackendContexts(*lowered_graph, true, custom_kernel_builder);
 
-  for (auto &pair : backend_contexts)
+  // Replace BackendContext with TrainbleBackendContext
+  for (auto &&pair : base_backend_contexts)
   {
-    pair.second->initConsts();
-  }
-
-  lowered_graph->graph().operands().iterate(
-      [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  auto code_map = builder.releaseCodeMap();
-
-  for (auto &it : code_map)
-  {
-    auto op_seq_index = it.first;
-    auto &fn_seq = it.second.fn_seq;
-
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
-      auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
-      tensor_builder->postFunctionPrepare();
+    auto ctx = pair.second.get();
+    const auto &data = ctx->data();
+
+    // Create partial and trainable graphs
+    auto tgraph = std::make_unique<ir::train::TrainableGraph>(*data.graph);
+    data.graph->operations().iterate(
+      [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &) {
+        const auto &orig_tgraph = lowered_graph->trainable_graph();
+        const auto &trainable_op = orig_tgraph.operation(op_index);
+        auto gen_index = tgraph->replaceOperation(op_index, trainable_op.clone());
+        UNUSED_RELEASE(gen_index);
+        assert(gen_index == op_index);
+      });
+    data.graph->operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+      const auto &orig_tgraph = lowered_graph->trainable_graph();
+      if (orig_tgraph.derivatives().exist(index))
+      {
+        const auto &deriv = orig_tgraph.derivatives().at(index);
+        auto new_deriv = std::make_unique<ir::Operand>(deriv);
+        auto gen_index = tgraph->addDerivative(index, std::move(new_deriv));
+        UNUSED_RELEASE(gen_index);
+        assert(gen_index == index);
+      }
     });
-  }
 
-  backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
-  auto exec = new exec::LinearExecutor{
-      std::move(lowered_graph), input_tensors,       output_tensors, tensor_regs,
-      std::move(tensor_mgrs),   std::move(code_map), order};
+    // Remove outputs of whole graph from external_operands
+    auto external_operands = data.external_operands;
+    for (const auto &index : lowered_graph->trainable_graph().getOutputs())
+    {
+      if (external_operands.contains(index))
+        external_operands.remove(index);
+    }
 
-  if (!options.trace_filepath.empty())
-  {
-    std::unique_ptr<exec::IExecutionObserver> ctp =
-        std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
-    exec->addObserver(std::move(ctp));
+    // Set trainable context data
+    backend::train::TrainableContextData tdata;
+    tdata.tgraph = std::move(tgraph);
+    tdata.op_order = std::move(data.op_order);
+    tdata.external_operands = std::move(external_operands);
+    tdata.operand_layouts = std::move(data.operand_layouts);
+    tdata.custom_kernel_builder = std::move(data.custom_kernel_builder);
+    tdata.is_linear_executor = data.is_linear_executor;
+    tdata.optimizer = optimizer;
+
+    // TODO Remove dynamic_cast
+    try
+    {
+      const auto backend = pair.first;
+      const auto tbackend = dynamic_cast<const backend::train::ITrainableBackend *>(backend);
+      tbackend_contexts.emplace(backend, tbackend->newContext(std::move(tdata)));
+    }
+    catch (const std::bad_cast &)
+    {
+      throw std::runtime_error("ExecutorFactory: Invalid backend - TrainableExecutor does not "
+                               "support non-trainble backends");
+    }
   }
+  base_backend_contexts.clear();
 
-  return exec;
-}
-
-exec::IExecutor *ExecutorFactory::createDataflowExecutor(
-    std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
-    const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
-{
-  const auto &backend_contexts = lowered_graph->backend_contexts();
+  train::TensorRegistries tensor_regs{tbackend_contexts, true};
 
-  initializeBackendContext(lowered_graph.get());
+  initializeSubgraphIOTensors(
+    *lowered_graph, tbackend_contexts,
+    (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+      ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
 
+  // linearize
   auto order = Linear::linearize(*lowered_graph);
-  runTensorRegistration(lowered_graph.get(), order);
+  Linear::dump(*lowered_graph, order);
 
-  std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
-  if (options.is_primary_subgraph)
+  for (auto &&pair : tbackend_contexts)
   {
-    input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
-    output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
+    pair.second->genTensors();
   }
 
-  TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
-  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
-
-  // To make tensors never be deallocated, this is a workaround to use static memory planner
-  for (auto &tensor_builder : tensor_builders)
+  for (auto &&pair : tbackend_contexts)
   {
-    lowered_graph->graph().operands().iterate(
-        [&](const ir::OperandIndex &ind, const ir::Operand &) {
-          if (tensor_builder->isRegistered(ind))
-          {
-            tensor_builder->notifyFirstUse(ind);
-          }
-        });
+    auto tctx = pair.second.get();
+    tctx->genTrainingTensors();
   }
 
-  for (auto &tensor_builder : tensor_builders)
+  prepareMigrantTensors(*lowered_graph, tbackend_contexts);
+
+  // Give some runtime objects to builtin KernelGenerator
+  for (auto &&pair : tbackend_contexts)
   {
-    tensor_builder->prepare();
+    auto builtin_context =
+      dynamic_cast<backend::builtin::train::BackendContext *>(pair.second.get());
+    if (builtin_context != nullptr)
+    {
+      auto builtin_kernel_gen = builtin_context->kernel_gen;
+      builtin_kernel_gen->setTensorRegistries(tensor_regs);
+      builtin_kernel_gen->setWholeGraphOutputs(lowered_graph->trainable_graph().getOutputs());
+    }
   }
 
-  prepareExternalTensors(*lowered_graph);
+  // Adjust the order of backends for the upcoming iteration
+  auto ordered_contexts =
+    onert::orderBackendContext<backend::train::TrainableBackendContext>(tbackend_contexts);
 
-  ExecutionBuilder builder;
+  // TODO Remove this simulation
+  // Simulate the execution for deallocation of tensors
+  std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
+  {
+    ir::OperandIndexMap<uint32_t> uses_map;
+    ir::OperandIndexSequence constants;
 
-  // Generate kernels
-  lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
-                                        const ir::OpSequence &op_seq) {
-    auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
-    auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
-    // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
-    auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
-    if (cf_kernel_gen != nullptr)
+    auto model_io =
+      (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+    // Prepare scanning
+    graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      uses_map[ind] = obj.getUses().size();
+
+      if (obj.isConstant())
+        constants.append(ind);
+    });
+
+    // A trick to consider constants as an execption
+    for (const auto &ind : constants)
     {
-      assert(cf_kernel_gen != nullptr);
-      cf_kernel_gen->setTensorRegistries(tensor_regs);
-      cf_kernel_gen->setExecutorMap(executor_map);
+      uses_map[ind]++;
     }
-    auto fn_seq = kernel_gen->generate(op_seq);
-    if (options.he_profiling_mode)
+
+    for (const auto op_ind : order)
     {
-      fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+      const auto &op = graph.operations().at(op_ind);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      for (const auto &ind : op_inputs)
+      {
+        const auto &operand = graph.operands().at(ind);
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
+        {
+          dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
+        }
+      }
     }
-    builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
-  });
 
-  for (const auto &tensor_builder : tensor_builders)
-  {
-    tensor_builder->allocate();
-  }
+    // Dispose and validate
+    for (const auto &ind : constants)
+    {
+      --uses_map[ind];
+    }
 
-  for (auto &pair : backend_contexts)
-  {
-    pair.second->initConsts();
+    assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
   }
 
-  lowered_graph->graph().operands().iterate(
-      [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  auto code_map = builder.releaseCodeMap();
-
-  for (auto &it : code_map)
+  // Check derivative tensors
   {
-    auto op_seq_index = it.first;
-    auto &fn_seq = it.second.fn_seq;
-
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
-      auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
-      tensor_builder->postFunctionPrepare();
-    });
+    // TODO Support multiple subgraphs
+    // Check if the derivative tensors corresponding to inputs of model are nullptr
+    // NOTE The derivative tensors corresponding to inputs of model are for inputs of PermuteLayers
+    //      and they are nullptr and because they are meaningless.
+    assert(std::all_of(lowered_graph->trainable_graph().getInputs().begin(),
+                       lowered_graph->trainable_graph().getInputs().end(),
+                       [&](const auto &input_idx) {
+                         return tensor_regs.getDerivativeITensor(input_idx) == nullptr;
+                       }));
+
+    // Check if the derivative tensors corresponding to outputs of model exist
+    assert(std::all_of(lowered_graph->trainable_graph().getOutputs().begin(),
+                       lowered_graph->trainable_graph().getOutputs().end(),
+                       [&](const auto &output_idx) {
+                         return tensor_regs.getDerivativeITensor(output_idx) == nullptr;
+                       }));
   }
 
-  backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
-
-  exec::ExecutorBase *exec = nullptr;
-  if (parallel)
-  {
-    exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
-                                      output_tensors,           tensor_regs,
-                                      std::move(tensor_mgrs),   std::move(code_map)};
-  }
-  else
+  train::TrainableCodeMap code_map;
+  // Generate kernels
+  for (auto &&pair : ordered_contexts)
   {
-    auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
-                                                    output_tensors,           tensor_regs,
-                                                    std::move(tensor_mgrs),   std::move(code_map)};
-    if (options.he_profiling_mode)
+    auto codes = pair.second->genKernels();
+    for (auto &&pair : codes)
     {
-      std::vector<const backend::Backend *> backends;
-      for (const auto &pair : backend_contexts)
-      {
-        backends.push_back(pair.first);
-      }
-      auto et = std::make_shared<exec::ExecTime>(backends);
-      std::unique_ptr<exec::IExecutionObserver> obs =
-          std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
-      dataflow_exec->addObserver(std::move(obs));
+      auto &op_ind = pair.first;
+      auto &tn_seq = pair.second;
+      auto &op = lowered_graph->trainable_graph().operation(op_ind);
+      auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+
+      assert(code_map.find(op_ind) == code_map.end());
+      code_map.insert(
+        {op_ind, train::TrainableCodeAndInfo{op_ind, &op, lower_info, std::move(tn_seq)}});
     }
-    exec = dataflow_exec;
   }
 
-  if (!options.trace_filepath.empty())
+  if (order.size() != code_map.size())
+  {
+    throw std::runtime_error("ExecutorFactory: Some kernels are not generated");
+  }
+
+  auto exec = new exec::train::TrainableExecutor{std::move(lowered_graph),
+                                                 std::move(tbackend_contexts),
+                                                 tensor_regs,
+                                                 std::move(code_map),
+                                                 order,
+                                                 tracing_ctx};
+
+  if (!options->trace_filepath.empty())
   {
     std::unique_ptr<exec::IExecutionObserver> ctp =
-        std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+      std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
     exec->addObserver(std::move(ctp));
   }
+  // TODO Support MINMAX_H5DUMPER
 
   return exec;
 }
+#endif // ONERT_TRAIN
 
 } // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index b8893c03b..cc621bccf 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -17,18 +17,37 @@
 #ifndef __ONERT_COMPILER_EXECUTOR_FACTORY_H__
 #define __ONERT_COMPILER_EXECUTOR_FACTORY_H__
 
-#include <unordered_map>
+#include "TensorRegistries.h"
 
 #include "backend/ITensor.h"
-#include "exec/IExecutor.h"
+
+#ifdef ONERT_TRAIN
+#include "backend/train/TrainableBackendContext.h"
+#endif // ONERT_TRAIN
 #include "compiler/LoweredGraph.h"
-#include "TensorRegistries.h"
+#ifdef ONERT_TRAIN
+#include "compiler/train/LoweredTrainableGraph.h"
+#include "exec/train/optimizer/Optimizer.h"
+#endif // ONERT_TRAIN
+#include "exec/IExecutors.h"
+
+#include <deque>
+#include <unordered_map>
 
 namespace onert
 {
 namespace compiler
 {
 
+// TODO Change to a better name
+struct ExecutorFactoryArgs
+{
+  const util::TracingCtx *tracing_ctx;
+  const compiler::CompilerOptions *options;
+  ir::ModelIndex model_index;
+  std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder;
+};
+
 class ExecutorFactory
 {
 public:
@@ -36,35 +55,56 @@ public:
 
 public:
   exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                          const compiler::CompilerOptions &options,
-                          const std::shared_ptr<exec::ExecutorMap> &executor_map);
+                          const std::shared_ptr<exec::IExecutors> &executors,
+                          const ExecutorFactoryArgs &args);
+
+#ifdef ONERT_TRAIN
+  // TODO Unify create()
+  exec::IExecutor *create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+                          const std::shared_ptr<exec::IExecutors> &executors,
+                          const ExecutorFactoryArgs &args,
+                          const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer);
+#endif // ONERT_TRAIN
 
 private:
   ExecutorFactory();
 
 private:
-  static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
-  static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
-                                    const std::vector<ir::OpSequenceIndex> &order);
-  static std::vector<std::shared_ptr<backend::ITensor>>
-  initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
-                           const ir::OperandIndexSequence &indices);
-  static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph);
+  static void prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph,
+                                    const backend::BackendContexts &backend_contexts);
+  static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+                                    const std::shared_ptr<exec::IExecutors> &executors,
+                                    const backend::BackendContexts &backend_contexts,
+                                    const ir::ModelIndex &index);
+  static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+  orderBackendContext(const backend::BackendContexts &backend_contexts);
+
   static exec::IExecutor *
   createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                       const compiler::CompilerOptions &options,
-                       const std::shared_ptr<exec::ExecutorMap> &executor_map);
+                       const std::shared_ptr<exec::IExecutors> &executors,
+                       const ExecutorFactoryArgs &args);
   static exec::IExecutor *
   createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                         const compiler::CompilerOptions &options,
-                         const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
+                         const std::shared_ptr<exec::IExecutors> &executors,
+                         const ExecutorFactoryArgs &args, bool parallel);
+#ifdef ONERT_TRAIN
+  // TODO Unify prepareMigrantTensors
+  static void
+  prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph,
+                        const backend::train::TrainableBackendContexts &backend_contexts);
+  static exec::IExecutor *
+  createTrainableExecutor(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+                          const std::shared_ptr<exec::IExecutors> &executors,
+                          const ExecutorFactoryArgs &args,
+                          const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer);
+#endif // ONERT_TRAIN
 
 private:
-  std::unordered_map<std::string, std::function<exec::IExecutor *(
-                                      std::unique_ptr<compiler::LoweredGraph>,
-                                      const compiler::CompilerOptions &options,
-                                      const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
-      _map;
+  std::unordered_map<
+    std::string, std::function<exec::IExecutor *(std::unique_ptr<compiler::LoweredGraph>,
+                                                 const std::shared_ptr<exec::IExecutors> &executors,
+                                                 const ExecutorFactoryArgs &args)>>
+    _map;
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 23a6a253d..ce9b09c2d 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#if 0 // This file is temporarily unused
+
 #include "Fp32ToFp16Converter.h"
 #include "ir/operation/ConvertFp32ToFp16.h"
 #include "ir/operation/ConvertFp16ToFp32.h"
@@ -45,7 +47,7 @@ namespace compiler
 {
 
 Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
-    : _lowered_graph{lowered_graph}
+  : _lowered_graph{lowered_graph}
 {
   VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
 }
@@ -177,26 +179,26 @@ void Fp32ToFp16Converter::run()
 void Fp32ToFp16Converter::appendOpSequences()
 {
   _lowered_graph.op_seqs().iterate(
-      [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
-        const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
-        assert(lower_info != nullptr);
-
-        // For now, the only acl_cl supports fully fp16 type
-        // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
-        // operations.
-        //      To do this, we could check the support by `operation by operation`. After that, we
-        //      would partition an op_seq if it contains unsupported operations.
-        if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
-          return;
-
-        // OpSeq's input set should be included in the first operation's input set or
-        // OpSeq's output set should be included in the last operation's output set
-        assert(checkOperandsOfOpSequence(op_seq));
-
-        // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
-        appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
-        appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
-      });
+    [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
+      const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+      assert(lower_info != nullptr);
+
+      // For now, the only acl_cl supports fully fp16 type
+      // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
+      // operations.
+      //      To do this, we could check the support by `operation by operation`. After that, we
+      //      would partition an op_seq if it contains unsupported operations.
+      if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
+        return;
+
+      // OpSeq's input set should be included in the first operation's input set or
+      // OpSeq's output set should be included in the last operation's output set
+      assert(checkOperandsOfOpSequence(op_seq));
+
+      // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
+      appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
+      appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
+    });
 }
 
 //
@@ -253,7 +255,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp32ToFp16(const ir::OpSequenc
     const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
 
     // set new lower_info for op_seq
-    setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
+    setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
 
     _list_fp32_to_fp16.insert(new_op_seq_ind);
 
@@ -326,7 +328,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenc
     auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
 
     // set new lower_info for op_seq
-    setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
+    setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
 
     _list_fp16_to_fp32.insert(new_op_seq_ind);
 
@@ -372,16 +374,16 @@ void Fp32ToFp16Converter::optimize()
 void Fp32ToFp16Converter::convertOperands()
 {
   _lowered_graph.op_seqs().iterate(
-      [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
-        const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
-        assert(lower_info != nullptr);
-        // For now, the only acl_cl supports fully fp16
-        if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
-          return;
-
-        // Convert input,output operands' type to fp16
-        convertOperandsOfOpSequence(op_seq);
-      });
+    [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
+      const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+      assert(lower_info != nullptr);
+      // For now, the only acl_cl supports fully fp16
+      if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
+        return;
+
+      // Convert input,output operands' type to fp16
+      convertOperandsOfOpSequence(op_seq);
+    });
 }
 
 void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
@@ -391,10 +393,10 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
   const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
   const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
 
-  for (auto &op_idx : op_seq)
+  for (const auto &op_idx : op_seq)
   {
     const auto &node = operations.at(op_idx);
-    for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
+    for (const auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
     {
       if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
         continue;
@@ -405,10 +407,10 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
 
       obj.type(ir::DataType::FLOAT16);
 
-      VERBOSE(Fp32ToFp16Converter) << "Input Operand #" << ind.value() << ": fp16" << std::endl;
+      VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl;
     }
 
-    for (auto &ind : node.getOutputs())
+    for (const auto &ind : node.getOutputs())
     {
       if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
         continue;
@@ -419,7 +421,7 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
 
       obj.type(ir::DataType::FLOAT16);
 
-      VERBOSE(Fp32ToFp16Converter) << "Output Operand #" << ind.value() << ": fp16" << std::endl;
+      VERBOSE(Fp32ToFp16Converter) << "Output Operand " << ind << ": fp16" << std::endl;
     }
   }
 }
@@ -444,7 +446,7 @@ void Fp32ToFp16Converter::convertDatas()
 
       obj.data(std::move(new_data));
       obj.type(ir::DataType::FLOAT16);
-      VERBOSE(Fp32ToFp16Converter) << "Constant Operand #" << ind.value() << ": fp16" << std::endl;
+      VERBOSE(Fp32ToFp16Converter) << "Constant Operand " << ind << ": fp16" << std::endl;
     }
   });
 }
@@ -513,23 +515,23 @@ ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &o
 void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
                                                  const ir::OperandIndex &new_op_ind)
 {
-  const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+  const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
   assert(lower_info != nullptr);
-  auto new_lower_info = std::make_unique<ir::operand::LowerInfo>();
-  auto permute_factor = ir::operand::PermuteFactor(lower_info->backend(), lower_info->layout());
+  auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>();
+  auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout());
   new_lower_info->addDefPermuteFactor(permute_factor);
   new_lower_info->addUsePermuteFactor(permute_factor);
   _lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info));
 }
 
-void Fp32ToFp16Converter::setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
-                                                    const ir::OpSequenceIndex &new_op_seq_ind)
+void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
+                                                   const ir::OpSequenceIndex &new_op_seq_ind)
 {
-  const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+  const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
   assert(lower_info != nullptr);
 
   auto new_lower_info =
-      std::make_unique<ir::operation::LowerInfo>(lower_info->backend(), lower_info->layout());
+    std::make_unique<compiler::OperationLowerInfo>(lower_info->backend(), lower_info->layout());
   _lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info));
 }
 
@@ -600,7 +602,7 @@ Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_se
   auto &new_op_obj = operands.at(new_op_ind);
 
   std::unique_ptr<ir::Operation> new_node(
-      new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
+    new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
   const auto new_node_ind = operations.push(std::move(new_node));
 
   input_obj.insertUse(new_node_ind);
@@ -620,7 +622,7 @@ Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_se
   auto &new_op_obj = operands.at(new_op_ind);
 
   std::unique_ptr<ir::Operation> new_node(
-      new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
+    new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
   const auto new_node_ind = operations.push(std::move(new_node));
 
   new_op_obj.insertUse(new_node_ind);
@@ -633,7 +635,7 @@ ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex
                                                        const ir::OperationIndex &node_index)
 {
   auto &node = _lowered_graph.graph().operations().at(node_index);
-  const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+  const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
   assert(lower_info != nullptr);
   auto layout = lower_info->layout();
 
@@ -745,7 +747,7 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_
     //    |                       |
     // [OPERATION]             [OPERATION]
     //
-    for (auto &op_seq_ind : found_input_in_op_seqs->second)
+    for (const auto &op_seq_ind : found_input_in_op_seqs->second)
     {
       auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
       if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
@@ -759,9 +761,8 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_
           opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind);
         }
 
-        VERBOSE(Fp32ToFp16Converter)
-            << "Contiguous from OpSeq#" << op_seq_ind_fp16_to_fp32.value() << "(ToFp32)"
-            << " to OpSeq#" << op_seq_ind.value() << "(ToFp16)" << std::endl;
+        VERBOSE(Fp32ToFp16Converter) << "Contiguous from " << op_seq_ind_fp16_to_fp32 << "(ToFp32)"
+                                     << " to " << op_seq_ind << "(ToFp16)" << std::endl;
       }
     }
   }
@@ -775,7 +776,7 @@ Fp32ToFp16Converter::InputToOpSeqs Fp32ToFp16Converter::prepareInputToOpSeqs() c
 
   InputToOpSeqs input_to_op_seqs;
   op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) {
-    for (auto input : op_seq.getInputs() | ir::Remove::UNDEFINED)
+    for (auto &&input : op_seq.getInputs() | ir::Remove::UNDEFINED)
     {
       auto it = input_to_op_seqs.find(input);
       if (it == input_to_op_seqs.end())
@@ -798,13 +799,13 @@ Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_
   OpSeqIndexList list;
   for (const auto &it : opseq_map_to_delete)
   {
-    auto &opseq_ind_fp16_to_fp32 = it.first;
+    const auto &opseq_ind_fp16_to_fp32 = it.first;
     if (list.find(opseq_ind_fp16_to_fp32) == list.end())
     {
       list.emplace(opseq_ind_fp16_to_fp32);
     }
 
-    for (auto &opseq_ind_fp32_to_fp16 : it.second)
+    for (const auto &opseq_ind_fp32_to_fp16 : it.second)
     {
       if (list.find(opseq_ind_fp32_to_fp16) == list.end())
       {
@@ -842,7 +843,7 @@ Fp32ToFp16Converter::findOperationsToDelete(const OpSeqIndexList &list_to_delete
 }
 
 void Fp32ToFp16Converter::manipulateContiguousOpSequences(
-    const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
+  const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
 {
   auto &op_seqs = _lowered_graph.op_seqs();
 
@@ -861,14 +862,14 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
   //    |
   // [OPERATION] // op_seq_ind_next_to_fp16
   //
-  for (auto it : opseq_map_to_delete)
+  for (auto &&it : opseq_map_to_delete)
   {
     // fp16_to_fp32's input/output num is always 1
     auto &op_seq_ind_fp16_to_fp32 = it.first;
     auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
     auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
 
-    for (auto &op_seq_ind_fp32_to_fp16 : it.second)
+    for (const auto &op_seq_ind_fp32_to_fp16 : it.second)
     {
       auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
       assert(op_seq_fp32_to_fp16.size() == 1);
@@ -878,7 +879,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
       auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
       assert(found_next_to_fp16 != input_to_op_seqs.end());
 
-      for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
+      for (const auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
       {
         manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
       }
@@ -894,61 +895,62 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
 }
 
 void Fp32ToFp16Converter::deleteContiguousOpSequences(
-    const OpSeqIndexList &list_to_delete_op_seqs,
-    const ir::OperandIndexSequence &list_to_delete_ops)
+  const OpSeqIndexList &list_to_delete_op_seqs, const ir::OperandIndexSequence &list_to_delete_ops)
 {
   auto &operands = _lowered_graph.graph().operands();
   auto &operations = _lowered_graph.graph().operations();
   auto &op_seqs = _lowered_graph.op_seqs();
 
-  for (auto &op_seq_ind : list_to_delete_op_seqs)
+  for (const auto &op_seq_ind : list_to_delete_op_seqs)
   {
     auto &op_seq = op_seqs.at(op_seq_ind);
     assert(op_seq.size() == 1);
-    VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq #" << op_seq_ind.value() << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq " << op_seq_ind << std::endl;
 
     auto &first_node_ind = op_seq.operations().at(0);
     auto &first_node = operations.at(first_node_ind);
     assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
            first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
-    VERBOSE(Fp32ToFp16Converter) << "Delete Node #" << first_node_ind.value() << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl;
 
     // Uses
-    for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+    for (const auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
     {
       auto &obj = operands.at(ind);
       obj.removeUse(first_node_ind);
-      VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Use(Node#"
-                                   << first_node_ind.value() << ") is removed" << std::endl;
+      VERBOSE(Fp32ToFp16Converter)
+        << "Operand " << ind << "'s Use(Node" << first_node_ind << ") is removed" << std::endl;
     }
 
     // Def
-    for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+    for (const auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
     {
       auto &obj = operands.at(ind);
       assert(obj.getDef() == first_node_ind);
       obj.unsetDef();
-      VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
-                                   << first_node_ind.value() << ") is removed" << std::endl;
+      VERBOSE(Fp32ToFp16Converter)
+        << "Operand " << ind << "'s Def(Node" << first_node_ind << ") is removed" << std::endl;
     }
 
     // Operation
     operations.remove(first_node_ind);
-    VERBOSE(Fp32ToFp16Converter) << "Node#" << first_node_ind.value() << " is removed" << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "Node" << first_node_ind << " is removed" << std::endl;
 
     // OpSequence
     op_seqs.remove(op_seq_ind);
-    VERBOSE(Fp32ToFp16Converter) << "OpSeq#" << op_seq_ind.value() << " is removed" << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "OpSeq" << op_seq_ind << " is removed" << std::endl;
   }
 
   // Operand
-  for (auto &ind : list_to_delete_ops)
+  for (const auto &ind : list_to_delete_ops)
   {
     operands.remove(ind);
-    VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << " is removed" << std::endl;
+    VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl;
   }
 }
 
 } // namespace compiler
 
 } // namespace onert
+
+#endif
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
index eeecb9846..87751ceb4 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#if 0 // This file is temporarily unused
+
 #ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
 #define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
 
@@ -64,8 +66,8 @@ private:
 
   void setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
                               const ir::OperandIndex &new_op_ind);
-  void setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
-                                 const ir::OpSequenceIndex &new_op_seq_ind);
+  void setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
+                                const ir::OpSequenceIndex &new_op_seq_ind);
 
   void manipulateInput(const ir::OpSequenceIndex &op_seq_ind,
                        const ir::OperandIndex &op_seq_input_ind,
@@ -99,3 +101,5 @@ private:
 } // namespace onert
 
 #endif // __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
+
+#endif
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index 5653b090e..56e2208d6 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -14,34 +14,32 @@
  * limitations under the License.
  */
 
-#include "ir/Operand.h"
-#include "compiler/HEScheduler.h"
-#include "ir/Graph.h"
-#include "util/ConfigSource.h"
+#include "HEScheduler.h"
+
 #include "compiler/BackendResolver.h"
+#include "ir/Graph.h"
 #include "util/logging.h"
-#include "util/Utils.h"
-#include "exec/FunctionSequence.h"
+
 #include <cassert>
 #include <cmath>
-#include <chrono>
 
-namespace onert
+namespace
 {
 
-namespace compiler
-{
-static uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node)
+using namespace onert;
+
+uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::IOperation &node)
 {
   uint32_t size = 0;
-  for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+  for (const auto &ind :
+       (node.getInputs() | ir::Remove::UNDEFINED) + (node.getOutputs() | ir::Remove::UNDEFINED))
   {
     size += graph.operands().at(ind).info().total_size();
   }
   return size;
 }
 
-static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
+bool isQuant(const ir::Graph &graph, const ir::IOperation &node)
 {
   for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED)
   {
@@ -54,18 +52,17 @@ static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
   return false;
 }
 
-static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &,
-                             bool)
+bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::IOperation &, bool)
 {
   // Now, there is no workaround
   return false;
 }
 
 // if a node can be merged into op_seq
-static bool isMergeable(const ir::Graph &graph, const ir::Operation &node)
+bool isMergeable(const ir::Graph &graph, const ir::IOperation &node)
 {
   size_t prev_op_cnt = 0;
-  for (const auto &input : node.getInputs())
+  for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED)
   {
     // only valid_inputs
     const auto &operand = graph.operands().at(input);
@@ -85,15 +82,23 @@ static bool isMergeable(const ir::Graph &graph, const ir::Operation &node)
   return true;
 }
 
+} // namespace
+
+namespace onert
+{
+
+namespace compiler
+{
+
 void HEScheduler::scheduleShufflingBackends()
 {
   VERBOSE(HEScheduler::schedule)
-      << "Started task scheduling: uses all backends to get more metrics for data transfer"
-      << std::endl;
+    << "Started task scheduling: uses all backends to get more metrics for data transfer"
+    << std::endl;
   size_t backend_ind = 0;
   for (const auto &rank : _rank_to_op)
   {
-    VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl;
+    VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second << ")" << std::endl;
     const auto &node = _graph->operations().at(rank.second);
     const bool quant = isQuant(*_graph, node);
     const auto size = getOperationsFlattenedIOSize(*_graph, node);
@@ -115,7 +120,7 @@ void HEScheduler::scheduleShufflingBackends()
         continue;
       }
       const auto exec_time =
-          _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
+        _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
       // Scheduling to measure data transfer must be done after measuring all backends separately
       assert(exec_time != _exec_time->NOT_FOUND);
       if (exec_time == _exec_time->getMax())
@@ -132,7 +137,7 @@ void HEScheduler::scheduleShufflingBackends()
   }
 }
 
-bool HEScheduler::isNodeProfiled(const ir::Operation &node)
+bool HEScheduler::isNodeProfiled(const ir::IOperation &node)
 {
   const bool quant = isQuant(*_graph, node);
   const auto size = getOperationsFlattenedIOSize(*_graph, node);
@@ -202,7 +207,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph
   {
     // Check if profiling info about all backend/node pairs already exists
     bool all_nodes_are_profiled = true;
-    _graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+    _graph->operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &op) {
       if (all_nodes_are_profiled)
         all_nodes_are_profiled = isNodeProfiled(op);
     });
@@ -219,7 +224,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph
 
   ir::OperationIndexMap<bool> visited;
   graph.operations().iterate(
-      [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
+    [&](const ir::OperationIndex &index, const ir::IOperation &) { visited[index] = false; });
   // for each task select the backend with the smallest earliest finishing time(eft)
   for (const auto &rank : _rank_to_op)
   {
@@ -248,19 +253,20 @@ int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend,
   if (time != _exec_time->NOT_FOUND)
     return time;
 
+  // FIXME permute time is not recorded so the control reaches here always
   // Makes the scheduler prefer keeping computations on one backend
-  return size / 200;
+  return size / 400;
 }
 
-int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend)
+int64_t HEScheduler::tryBackend(const ir::IOperation &node, const backend::Backend *backend)
 {
   // if there is no profiling info don't use this backend during scheduling
   if (!_is_profiling_mode)
   {
     VERBOSE(HEScheduler::tryBackend)
-        << "Trying to HE schedule while there is no profiling info for " << node.name()
-        << " on backend " << backend->config()->id() << ". So this backend won't be used. "
-        << std::endl;
+      << "Trying to HE schedule while there is no profiling info for " << node.name()
+      << " on backend " << backend->config()->id() << ". So this backend won't be used. "
+      << std::endl;
     _is_supported[backend][node.name()] = false;
     return _exec_time->getMax();
   }
@@ -291,10 +297,10 @@ void HEScheduler::makeRank()
   VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl;
 
   _graph->operations().iterate(
-      [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
+    [&](const ir::OperationIndex &index, const ir::IOperation &) { DFSMaxRank(index); });
 
   // Check that ranks are calculated for all operations(nodes)
-  _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+  _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &) {
     UNUSED_RELEASE(index);
     assert(_op_to_rank->find(index) != _op_to_rank->end());
   });
@@ -360,8 +366,8 @@ int64_t HEScheduler::DFSMaxRank(const ir::OperationIndex &index)
   assert(rank >= 0);
   _rank_to_op.emplace(rank, index);
   _op_to_rank->emplace(index, rank);
-  VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name()
-                                   << " is " << rank << std::endl;
+  VERBOSE(HEScheduler::DFSMaxRank)
+    << "rank of operation (" << index << ")" << node.name() << " is " << rank << std::endl;
 
   return rank;
 }
@@ -370,7 +376,7 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
 {
   const auto &node = _graph->operations().at(index);
   int64_t max_child_rank = 0;
-  for (const auto &output : node.getOutputs())
+  for (const auto &output : node.getOutputs() | ir::Remove::UNDEFINED)
   {
     const auto &operand = _graph->operands().at(output);
     const bool quant = operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
@@ -384,9 +390,9 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
         {
           continue;
         }
-        // TODO Change it to controlflow backend
+        // TODO Change it to builtin backend
         auto transfer_cost =
-            getPermuteTime(backend, other_backend, quant, operand.info().total_size());
+          getPermuteTime(backend, other_backend, quant, operand.info().total_size());
         avg_transfer_cost += transfer_cost;
       }
     }
@@ -403,7 +409,7 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
 int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
                                           const int64_t &starting_time, const int64_t &time_amount)
 {
-  const auto backend_times = _backends_avail_time.at(backend);
+  const auto &backend_times = _backends_avail_time.at(backend);
   // finishing and starting times of an op, that will come after current op
   auto next_op_fst = backend_times.upper_bound(starting_time);
   // finishing time of an op, that will come before current op
@@ -419,7 +425,7 @@ int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
 
 bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend)
 {
-  VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl;
+  VERBOSE(HEScheduler::schedule) << "scheduling (" << index << ")" << std::endl;
   int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
   const auto &node = _graph->operations().at(index);
 
@@ -506,7 +512,7 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation
   // Find free time for data transferring and insert it into backend taskset. This is needed:
   //  1. Time for multiple permutations for this node's input is found correctly
   //  2. If backend==cpu, then free time for this node must come after permutations
-  for (auto &it : transfer_st_exec_time)
+  for (auto &&it : transfer_st_exec_time)
   {
     if (_is_parallel_exec)
     {
@@ -542,27 +548,27 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation
   if (!_is_parallel_exec)
   {
     VERBOSE(HEScheduler::ESTAndExecTime)
-        << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
-        << backend->config()->id() << " is " << exec_time
-        << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+      << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on "
+      << backend->config()->id() << " is " << exec_time
+      << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
 
     return {total_transfer_cost, exec_time};
   }
   VERBOSE(HEScheduler::ESTAndExecTime)
-      << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
-      << backend->config()->id() << ": " << exec_time
-      << " microseconds. Backend available time: " << prev_op_ft
-      << " Parent's max eft: " << max_pred_eft - total_transfer_cost
-      << " data transfer cost: " << total_transfer_cost << std::endl;
+    << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on "
+    << backend->config()->id() << ": " << exec_time
+    << " microseconds. Backend available time: " << prev_op_ft
+    << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+    << " data transfer cost: " << total_transfer_cost << std::endl;
 
   return {prev_op_ft, exec_time};
 }
 
-int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
+int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::IOperation &node,
                                 std::multimap<int64_t, int64_t> &transfer_st_exec_time)
 {
   int64_t max_pred_eft = 0;
-  for (const auto &input_operand_idx : node.getInputs())
+  for (const auto &input_operand_idx : node.getInputs() | ir::Remove::UNDEFINED)
   {
     const auto &input_operand = _graph->operands().at(input_operand_idx);
     const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
@@ -578,7 +584,7 @@ int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Opera
       {
         // Multiply operand size by 2 because size must describe input+output size
         int64_t transfer_cost =
-            getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
+          getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
         transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
       }
     }
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
index b9cee5881..df6c07926 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -23,14 +23,16 @@
 #ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_
 #define __ONERT_COMPILER_H_E_SCHEDULER_H_
 
-#include "compiler/IScheduler.h"
-#include "compiler/BackendManager.h"
-#include "compiler/Compiler.h"
-#include "ir/Graph.h"
-#include "exec/ExecTime.h"
-#include "backend/Backend.h"
-#include <memory>
-#include "ir/OperationIndexMap.h"
+#include "IScheduler.h"
+#include "../backend/builtin/Config.h"
+#include "../exec/ExecTime.h"
+
+#include <backend/Backend.h>
+#include <compiler/BackendManager.h>
+#include <compiler/Compiler.h>
+#include <ir/Graph.h>
+#include <ir/OperationIndexMap.h>
+
 #include <map>
 #include <memory>
 
@@ -50,26 +52,26 @@ public:
    * @param[in] model Graph model
    * @param[in] backend_resolver backend resolver
    */
-  HEScheduler(const backend::BackendContexts &backend_contexts, const CompilerOptions &options)
-      : _is_supported{}, _backends_avail_time{}, _ops_eft{},
-        _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
-        _is_profiling_mode{options.he_profiling_mode},
-        _is_linear_exec{options.executor == "Linear"},
-        _is_parallel_exec{options.executor == "Parallel"}
+  HEScheduler(const std::vector<const backend::Backend *> &backends, const CompilerOptions &options)
+    : _is_supported{}, _backends_avail_time{}, _ops_eft{},
+      _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
+      _is_profiling_mode{options.he_profiling_mode}, _is_linear_exec{options.executor == "Linear"},
+      _is_parallel_exec{options.executor == "Parallel"}
   {
-    for (auto &entry : backend_contexts)
+    for (auto &&entry : backends)
     {
-      if (entry.first->config()->id() == backend::controlflow::Config::ID)
+      if (entry->config()->id() == backend::builtin::Config::ID)
         continue;
-      _all_backends.push_back(entry.first);
+      _all_backends.push_back(entry);
     }
     _backend_resolver = std::make_unique<compiler::BackendResolver>();
     _exec_time = std::make_unique<exec::ExecTime>(_all_backends);
 
     // Find cpu backend
-    auto cpu_backend_it = std::find_if(
-        _all_backends.begin(), _all_backends.end(),
-        [](const backend::Backend *backend) { return backend->config()->id() == "cpu"; });
+    auto cpu_backend_it =
+      std::find_if(_all_backends.begin(), _all_backends.end(), [](const backend::Backend *backend) {
+        return backend->config()->id() == "cpu";
+      });
     if (cpu_backend_it == _all_backends.end())
       throw std::runtime_error("HEScheduler could be used only if 'cpu' backend is available");
     _cpu_backend = *cpu_backend_it;
@@ -86,7 +88,7 @@ public:
   std::shared_ptr<ir::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; }
 
 private:
-  bool isNodeProfiled(const ir::Operation &);
+  bool isNodeProfiled(const ir::IOperation &);
 
   bool schedule(const ir::OperationIndex &, const backend::Backend *parent_backend);
   /**
@@ -113,7 +115,7 @@ private:
    *
    * @return earliest finishing time of parent nodes
    */
-  int64_t predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
+  int64_t predMaxEFT(const backend::Backend *backend, const ir::IOperation &node,
                      std::multimap<int64_t, int64_t> &transfer_st_exec_time);
 
   void makeRank();
@@ -144,7 +146,7 @@ private:
 
   void scheduleShufflingBackends();
 
-  int64_t tryBackend(const ir::Operation &node, const backend::Backend *backend);
+  int64_t tryBackend(const ir::IOperation &node, const backend::Backend *backend);
 
   /**
    * @brief   Schedule a node and its successor until:
@@ -173,7 +175,7 @@ private:
   std::unique_ptr<exec::ExecTime> _exec_time;
   const ir::Graph *_graph{nullptr};
   std::vector<const backend::Backend *> _all_backends;
-  const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to controlflow_backend
+  const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to _builtin_backend
   bool _is_profiling_mode;
   bool _is_linear_exec;
   bool _is_parallel_exec;
diff --git a/runtime/onert/core/src/compiler/HEScheduler.test.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc
new file mode 100644
index 000000000..1654bfc8b
--- /dev/null
+++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc
@@ -0,0 +1,572 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HEScheduler.h"
+#include "../exec/ExecTime.h"
+
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+#include <ir/operation/BinaryArithmetic.h>
+#include <ir/operation/FullyConnected.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace operation;
+using namespace exec;
+
+//
+// Mock backends classes
+//
+
+struct MockConfigCPU : public IConfig
+{
+  std::string id() override { return "cpu"; }
+  bool initialize() override { return true; };
+  bool supportPermutation() override { return false; }
+  Layout supportLayout(const IOperation &, Layout) override { return Layout::UNKNOWN; }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+};
+
+class MockBackendContext : public BackendContext
+{
+public:
+  using BackendContext::BackendContext;
+  ITensorRegistry *genTensors() override { return nullptr; }
+  FunctionMap genKernels() override { return {}; }
+};
+
+struct MockBackendCPU : public Backend
+{
+  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+  {
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+  }
+};
+
+struct MockConfigGPU : public IConfig
+{
+  std::string id() override { return "gpu"; }
+  bool initialize() override { return true; };
+  bool supportPermutation() override { return false; }
+  ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override
+  {
+    return ir::Layout::UNKNOWN;
+  }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+};
+
+struct MockBackendGPU : public Backend
+{
+  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+  {
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+  }
+};
+
+struct MockConfigNPU : public IConfig
+{
+  std::string id() override { return "npu"; }
+  bool initialize() override { return true; };
+  bool supportPermutation() override { return false; }
+  ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override
+  {
+    return ir::Layout::UNKNOWN;
+  }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+};
+
+struct MockBackendNPU : public Backend
+{
+  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+  {
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+  }
+};
+
+//
+// Constants
+//
+
+const int OPERAND_ELEMS = 268203;
+const int OPERAND_SIZE = OPERAND_ELEMS * 4;
+const int OPERATION_SIZE = OPERAND_SIZE * 3;
+
+const std::string LINEAR("Linear");
+const std::string DATAFLOW("Dataflow");
+const std::string PARALLEL("Parallel");
+
+//
+// Helper functions
+//
+
+// Set executor through environment variable
+void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
+
+// Set profiling mode through environment variable
+void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
+
+// Calculate operation size by addition sizes of all input and output operands
+uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
+{
+  uint32_t size = 0;
+  const auto &op = graph->operations().at(op_idx);
+  for (const auto &ind : op.getInputs() + op.getOutputs())
+    size += graph->operands().at(ind).info().total_size();
+  return size;
+}
+
+// Set execution operation time. This method is needed since ExecutionTime has only
+// 'updateOperationExecTime' method.
+void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
+                          bool quant, uint32_t op_size, int64_t time)
+{
+  // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+  assert(time > 0);
+  int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
+  int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+  et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
+  assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
+}
+
+// Set same execution time for all given backends/operations
+void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
+                                const std::vector<std::string> &op_names,
+                                const std::vector<uint32_t> &op_sizes, int64_t exec_time)
+{
+  assert(op_names.size() == op_sizes.size());
+  ExecTime et(backends);
+  for (int i = 0; i < op_names.size(); ++i)
+  {
+    for (const auto backend : backends)
+      setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
+  }
+  et.storeOperationsExecTime();
+}
+
+// Set permute time from one backend to another. This method is needed since ExecutionTime has only
+// 'updatePermuteTime' method.
+void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
+                        bool quant, uint32_t op_size, int64_t time)
+{
+  // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+  assert(time > 0);
+  int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
+  int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+  et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
+  assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
+}
+
+// Set same permutation time between all given backends
+void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
+                                  const int operand_size, const int64_t exec_time)
+{
+  ExecTime et(backends);
+  for (const auto &backend : backends)
+  {
+    for (const auto other_backend : backends)
+    {
+      if (backend == other_backend)
+        continue;
+      setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
+    }
+  }
+  et.storeOperationsExecTime();
+}
+
+//
+// Functions for creating graphs
+//
+
+using OIS = OperandIndexSequence;
+
+template <typename NodeT, typename... Types>
+OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
+{
+  auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
+  auto op_idx = graph->addOperation(std::move(op));
+  // For now in scheduler test all operations in tested graphs has same size (for simplicity)
+  assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
+  return op_idx;
+}
+
+// Create straight graph: Add->Sub->Mul
+std::shared_ptr<Graph> createStraightGraph()
+{
+  auto graph = std::make_shared<Graph>();
+  const TypeInfo float_op(DataType::FLOAT32);
+
+  // Create add node
+  auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
+
+  // Create sub node
+  auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
+
+  // Create mul node
+  auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
+
+  graph->verify();
+  return graph;
+}
+
+/* Create branched graph:
+ *       [Add]
+ *      //   \\
+ *   [Mul1]  [FC2]
+ *     ||     ||
+ *   [Mul2]  [FC2]
+ *      \\   //
+ *       [Sub]
+ */
+std::shared_ptr<Graph> createBranchedGraph()
+{
+  auto graph = std::make_shared<Graph>();
+  const TypeInfo float_op(DataType::FLOAT32);
+
+  // Create add node
+  auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
+
+  // Create mul1 node
+  auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+                           mul1_op_params);
+
+  // Create mul2 node
+  auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+                           mul2_op_params);
+
+  // Create fc1 node
+  auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  FullyConnected::Param fc1_op_params{Activation::NONE};
+  create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
+
+  // Create fc2 node
+  auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  FullyConnected::Param fc2_op_params{Activation::NONE};
+  create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
+
+  // Create sub node
+  auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
+
+  graph->verify();
+  return graph;
+}
+
+//
+// Tests setup/teardown
+//
+
+// SetUp/TearDown methods runs before/after each test and performs actions common for each test
+class HESchedulerTest : public ::testing::Test
+{
+protected:
+  void SetUp() override
+  {
+    // Initialize mock backends
+    _cpu_backend = new MockBackendCPU();
+    _gpu_backend = new MockBackendGPU();
+    _npu_backend = new MockBackendNPU();
+    _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
+
+    // Remove previous profile data if it exists
+    if (!remove("exec_time.json"))
+    {
+      // DO NOTHING (no profile data)
+    }
+
+    // Remember original value of 'EXECUTOR' environment variable
+    char *executor = std::getenv("EXECUTOR");
+    _original_executor = executor == nullptr ? "" : executor;
+
+    // Remember original value of 'PROFILING_MODE' environment variable
+    char *profiling_mode = std::getenv("PROFILING_MODE");
+    _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
+  }
+
+  void TearDown() override
+  {
+    delete _cpu_backend;
+    delete _gpu_backend;
+    delete _npu_backend;
+    EXPECT_EQ(remove("exec_time.json"), 0);
+    setenv("EXECUTOR", _original_executor.c_str(), true);
+    setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
+  }
+
+  const MockBackendCPU *_cpu_backend{nullptr};
+  const MockBackendGPU *_gpu_backend{nullptr};
+  const MockBackendNPU *_npu_backend{nullptr};
+  std::vector<const Backend *> _mock_backends;
+
+  std::string _original_executor;
+  std::string _original_profiling_mode;
+};
+
+//
+// HEScheduler tests
+//
+
+class HESchedulerTestWithExecutorParam : public HESchedulerTest,
+                                         public testing::WithParamInterface<std::string>
+{
+};
+
+// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
+// one time for each executor
+INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam,
+                         testing::Values(LINEAR, DATAFLOW, PARALLEL));
+
+// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
+TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
+{
+  setExecutor(GetParam());
+
+  // Prepare graph
+  ir::Model model;
+  auto graph(createStraightGraph());
+  model.push(ir::SubgraphIndex{0}, graph);
+  OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
+
+  // Set default execution and transfer time
+  setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
+  setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
+                             {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+  // Test 1
+  // Expected behaviour: scheduler assigns different backend to each node
+  {
+    // For each backend reduce execution time of one node
+    ExecTime et(_mock_backends);
+    setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
+    setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
+    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
+    ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
+  }
+
+  // Test 2
+  // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
+  {
+    // Increase transfer time
+    setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+    ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
+  }
+}
+
+// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
+TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
+{
+  const int64_t NPU_ET = 5000;
+  setExecutor(GetParam());
+
+  // Prepare graph
+  ir::Model model;
+  auto graph(createBranchedGraph());
+  model.push(ir::SubgraphIndex{0}, graph);
+  OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+    sub_op_idx(5);
+
+  // Set default execution and transfer time
+  setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
+  setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
+                             {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+  // Test 1
+  // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
+  // nodes, in case of parallel executor scheduler assigns different backends to branches.
+  {
+    // Reduce execution time
+    ExecTime et(_mock_backends);
+    setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
+    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
+    setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
+    setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
+    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
+    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+
+    std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
+    if (GetParam() == PARALLEL)
+    {
+      branch1_expected_backend =
+        br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
+      branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
+    }
+
+    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
+    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
+    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
+    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+  }
+
+  // Test 2
+  // Expected behaviour: scheduler assigns single backend to all nodes
+  {
+    // Increase execution time for GPU backend
+    ExecTime et(_mock_backends);
+    /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
+     * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
+     * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
+     * branching or scheduler assigns another backend to a node*/
+    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+  }
+}
+
+// Test scheduler behavior for branched graph and enabled profiling mode
+TEST_F(HESchedulerTest, branched_graph_profiling_mode)
+{
+  const int ET = 1e5;
+
+  // Turn on profiling mode
+  setProfilingMode(true);
+  setExecutor(DATAFLOW);
+
+  // Prepare graph
+  ir::Model model;
+  auto graph(createBranchedGraph());
+  model.push(ir::SubgraphIndex{0}, graph);
+  OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+    sub_op_idx(5);
+
+  // Test 1
+  // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
+  {
+    // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
+    ExecTime et(_mock_backends);
+    setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+    setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+    setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
+    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+  }
+
+  // Test 2
+  // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
+  // neighbor nodes
+  {
+    // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
+    ExecTime et(_mock_backends);
+    setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+              br->getBackend(mul1_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+              br->getBackend(fc1_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
+              br->getBackend(mul2_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
+              br->getBackend(fc2_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
+              br->getBackend(sub_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
+              br->getBackend(sub_op_idx)->config()->id());
+  }
+}
+
+// TODO: Add tests with unknown execution and permutation time
+
+} // unnamed namespace
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 49a989500..4dbe229c8 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -14,207 +14,38 @@
  * limitations under the License.
  */
 
-#include <algorithm>
-
 #include "Linear.h"
 
-#include "backend/IConfig.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/ITensorRegister.h"
-#include "backend/Backend.h"
+#include "../dumper/text/GraphDumper.h"
+
 #include "util/logging.h"
 
+#include <sstream>
+
 namespace onert
 {
 namespace compiler
 {
 
-std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
+// TODO(easy) Change the LoweredGraph param to Graph
+std::vector<ir::OperationIndex> Linear::linearize(const compiler::ILoweredGraph &lowered_graph)
 {
-  std::vector<ir::OpSequenceIndex> order;
-  lowered_graph.iterateTopolOpSeqs(
-      [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) -> void {
-        order.emplace_back(index);
-      });
-  return order;
+  return lowered_graph.graph().topolSortOperations();
 }
 
-void Linear::dump(const compiler::LoweredGraph &lowered_graph,
-                  const std::vector<ir::OpSequenceIndex> &order)
+// TODO(easy) Change the LoweredGraph param to Graph
+void Linear::dump(const compiler::ILoweredGraph &lowered_graph,
+                  const std::vector<ir::OperationIndex> &order)
 {
+  for (const auto &ind : order)
   {
-    const auto &toString = [](const onert::backend::Backend *backend) {
-      assert(backend);
-      std::string str;
-      str += backend->config()->id();
-      return "{" + str + "}";
-    };
-
-    VERBOSE(Linear) << "Final OpSequence" << std::endl;
-    for (const auto index : order)
-    {
-      const auto &op_seq = lowered_graph.op_seqs().at(index);
-      const auto lower_info = lowered_graph.getLowerInfo(index);
-      const auto &operations = lowered_graph.graph().operations();
-      VERBOSE(Linear) << "* OP_SEQ " << toString(lower_info->backend()) << " "
-                      << ir::getStrFromOpSeq(op_seq, operations) << std::endl;
-    }
+    // TODO Could logging system can handle this? (Inserting prefix for each line)
+    std::istringstream iss{dumper::text::formatOperation(lowered_graph.graph(), ind)};
+    std::string line;
+    while (std::getline(iss, line))
+      VERBOSE(GraphDumper) << line << std::endl;
   }
 }
 
-void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
-                         const std::vector<ir::OpSequenceIndex> &order)
-{
-  const auto &graph = lowered_graph.graph();
-  ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map;
-
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  // Prepare scanning
-  graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    const auto lower_info = lowered_graph.getLowerInfo(ind);
-    // TODO Remove if onert doesn't support anymore such as
-    // GeneratedTests.reshape_quant8_weights_as_inputs
-    if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
-        !graph.getInputs().contains(ind))
-    {
-      VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
-                      << std::endl;
-      return;
-    }
-
-    // Unused input of subgraph
-    // TODO Register unused input as nullptr in tensor_builder
-    if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
-        graph.getInputs().contains(ind))
-    {
-      VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
-                      << std::endl;
-      return;
-    }
-
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    bool is_const = obj.isConstant();
-    if (is_const)
-    {
-      constants.append(ind);
-    }
-
-    auto factor = lower_info->def_factors().getOnlyElement();
-    auto backend = factor.backend();
-    auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder;
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any op_seq (No use and def)
-      const auto info = obj.info();
-      const auto backend_layout = factor.layout();
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, backend_layout);
-    }
-
-    tensor_builder_map[ind] = tensor_builder;
-  });
-
-  // If a tensor is model output, increase the use of the tensor.
-  // This aim is same to above one.
-  for (const auto &ind : graph.getOutputs() | ir::Remove::DUPLICATED)
-  {
-    uses_map[ind]++;
-  }
-
-  // Start scanning to do notify{First|Last}Use for each tensor
-
-  // If a tensor is a constant, increase the use of the tensor.
-  // It makes the tensor not be dealloced. It means these will be deallocated last.
-  // And allocate constant operands first
-  VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl;
-  for (const auto &ind : constants)
-  {
-    uses_map[ind]++;
-    tensor_builder_map[ind]->notifyFirstUse(ind);
-  }
-
-  // Allocate Model's inputs
-  VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl;
-  for (const auto &ind : graph.getInputs() | ir::Remove::DUPLICATED)
-  {
-    auto tensor_builder = tensor_builder_map[ind];
-    if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs
-      continue;
-    tensor_builder->notifyFirstUse(ind);
-  }
-
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  VERBOSE(LINEAR) << "TENSORS" << std::endl;
-  for (const auto op_seq_ind : order)
-  {
-    const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind);
-    for (const auto &op_idx : op_seq.operations())
-    {
-      for (const auto &ind : graph.operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
-                                 ir::Remove::UNDEFINED)
-      {
-        assert(def_map.find(ind) != def_map.end());
-        if (def_map[ind])
-        {
-          def_map[ind] = 0;
-          tensor_builder_map[ind]->notifyFirstUse(ind);
-        }
-      }
-
-      for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
-                                 ir::Remove::UNDEFINED)
-      {
-        assert(uses_map.find(ind) != uses_map.end());
-        assert(uses_map[ind] > 0);
-        uses_map[ind]--;
-        if (uses_map[ind] == 0)
-        {
-          // plan for deallocation of static tensornode
-          tensor_builder_map[ind]->notifyLastUse(ind);
-
-          // plan for deallocation of dynamic tensor
-          auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
-          if (dyn_tensor_manager)
-            dyn_tensor_manager->planDealloc(op_idx, ind);
-        }
-      }
-    }
-  }
-
-  // Dispose and validate
-  for (const auto &ind : graph.getOutputs() | ir::Remove::DUPLICATED)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder_map[ind]->notifyLastUse(ind);
-    }
-  }
-
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder_map[ind]->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-      std::all_of(uses_map.begin(), uses_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-      std::all_of(def_map.begin(), def_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
 } // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h
index 1e24cf92b..4f92dc88d 100644
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -20,18 +20,8 @@
 #include <vector>
 #include <memory>
 
-#include "ir/OpSequences.h"
 #include "ir/Index.h"
-#include "backend/ITensorBuilder.h"
-#include "compiler/LoweredGraph.h"
-
-namespace onert
-{
-namespace ir
-{
-struct OperationVisitor;
-} // namespace ir
-} // namespace onert
+#include "compiler/ILoweredGraph.h"
 
 namespace onert
 {
@@ -41,11 +31,9 @@ namespace compiler
 class Linear
 {
 public:
-  static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
-  static void dump(const compiler::LoweredGraph &lowered_graph,
-                   const std::vector<ir::OpSequenceIndex> &order);
-  static void planTensors(const compiler::LoweredGraph &lowered_graph,
-                          const std::vector<ir::OpSequenceIndex> &order);
+  static std::vector<ir::OperationIndex> linearize(const compiler::ILoweredGraph &lowered_graph);
+  static void dump(const compiler::ILoweredGraph &lowered_graph,
+                   const std::vector<ir::OperationIndex> &order);
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 1489a1884..46a45e44a 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -16,21 +16,23 @@
 
 #include "compiler/LoweredGraph.h"
 
-#include <assert.h>
-#include <sstream>
-#include "util/logging.h"
-#include "compiler/pass/ConstantInsertionPass.h"
-#include "compiler/pass/ConstantLoweringPass.h"
-#include "compiler/pass/PermutationOperationPass.h"
-#include "compiler/pass/PermutationInsertionPass.h"
-#include "compiler/pass/PermutationEliminationPass.h"
-#include "ir/GraphIterator.h"
-#include "ir/verifier/Verifier.h"
+#include "HEScheduler.h"
+#include "ManualScheduler.h"
+#include "pass/ConstantInsertionPass.h"
+#include "pass/ConstantLoweringPass.h"
+#include "pass/PassRunner.h"
+#include "pass/PermutationEliminationPass.h"
+#include "pass/PermutationInsertionPass.h"
+#include "pass/PermutationOperationPass.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../ir/verifier/Verifier.h"
+
 #include "backend/Backend.h"
-#include "backend/IConfig.h"
 #include "compiler/BackendResolver.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
+#include "util/logging.h"
+
+#include <cassert>
+#include <sstream>
 
 namespace onert
 {
@@ -39,18 +41,15 @@ namespace compiler
 
 LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
 {
-  bool linear_executor = (options.executor == "Linear");
+  lowerGraph(options);
+}
 
+void LoweredGraph::lowerGraph(const CompilerOptions &options)
+{
   // Build backend contexts
   auto &backend_manager = BackendManager::get();
-
-  // Always create Controlflow backend context
-  auto cf_backend = backend_manager.getControlflow();
-  _backend_contexts.emplace(
-      cf_backend, cf_backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor));
-
   // Create contexts for other backends
-  for (auto backend_str : options.backend_list)
+  for (auto &&backend_str : options.backend_list)
   {
     backend_manager.loadBackend(backend_str);
     auto backend = backend_manager.get(backend_str);
@@ -60,12 +59,9 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
     // we should change it back(throw if backend is not loaded) later.
     if (!backend)
     {
-      VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str;
+      VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str << std::endl;
       continue;
     }
-
-    _backend_contexts.emplace(
-        backend, backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor));
   }
   if (backend_manager.num_backends() == 0)
     throw std::runtime_error{"No available backends loaded."};
@@ -73,317 +69,115 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
   // TODO Move "schedule" phase out of here
   // Schedule
   std::unique_ptr<BackendResolver> backend_resolver;
+  auto all_backends = backend_manager.getAll();
   if (options.he_scheduler)
   {
-    auto scheduler = HEScheduler(_backend_contexts, options);
+    auto scheduler = HEScheduler(all_backends, options);
     backend_resolver = scheduler.schedule(_graph);
     _indexed_ranks = scheduler.getIndexedRanks();
   }
   else
   {
-    auto scheduler = ManualScheduler(_backend_contexts, options);
+    auto scheduler = ManualScheduler(all_backends, options);
     backend_resolver = scheduler.schedule(_graph);
   }
 
-  {
-    // operand::LowerInfo holder
-    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
-
-    _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
-      operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
-    });
-
-    // Make op_seqs while checking whether a node can be merged into a op_seq.
-    makeOpSequences(operands_lower_info, options, *backend_resolver);
+  makeLowerInfo(*backend_resolver);
+  VERBOSE(LoweredGraph) << "dump before mandatory passes" << std::endl;
+  dumper::text::dumpLoweredGraph(*this);
 
-    _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-      assert(op_seq.operations().size() > 0);
-      std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
-    });
+  // Mandatory passes - kind of legalization(?)
+  pass::PassRunner{}
+    .append(std::make_unique<pass::ConstantInsertionPass>(*this))
+    .append(std::make_unique<pass::ConstantLoweringPass>(*this))
+    .append(std::make_unique<pass::PermutationOperationPass>(*this))
+    .append(std::make_unique<pass::PermutationInsertionPass>(*this))
+    .run();
 
-    VERBOSE(OpSequences) << "dump without permutation" << std::endl;
-    dumpOpSequences(_op_seqs, _graph.operations());
+  dumpLowerInfo();
 
-    pass::ConstantInsertionPass ci_pass(*this);
-    ci_pass.run();
+  // Optimization passes (optional)
+  pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
 
-    pass::ConstantLoweringPass cl_pass(*this);
-    cl_pass.run();
-
-    // Set LowerInfo for each operand from the operand::LowerInfo holder
-    manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph);
-
-    dumpLowerInfo();
-  }
-
-  // Run Permutation Passes
-  {
-    pass::PermutationOperationPass po_pass(*this);
-    po_pass.run();
-
-    pass::PermutationInsertionPass pi_pass(*this);
-    pi_pass.run();
-
-    pass::PermutationEliminationPass pe_pass(*this);
-    pe_pass.run();
-
-    VERBOSE(OpSequences) << "dump with permutation" << std::endl;
-    dumpOpSequences(_op_seqs, _graph.operations());
-  }
+  VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl;
+  for (auto &&operand : _graph.getInputs())
+    VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
+  for (auto &&operand : _graph.getOutputs())
+    VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
+  dumper::text::dumpLoweredGraph(*this);
 
   // Graph verifications
   {
+    assert(ir::verifier::InputOutputChecker().verify(_graph));
     assert(ir::verifier::DAGChecker().verify(_graph));
-    assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
+    assert(ir::verifier::EdgeChecker().verify(_graph));
   }
 }
 
-const ir::operation::LowerInfo *
-LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
+void LoweredGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver)
 {
-  auto itr = _lower_info_map.op_seq.find(op_seq_index);
-  if (itr == _lower_info_map.op_seq.end())
-    return nullptr;
-  return itr->second.get();
-}
-
-void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
-                                std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
-{
-  _lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
-}
+  _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+    lower_info().operand.set(index, std::make_unique<OperandLowerInfo>());
+  });
 
-void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
-{
-  auto &op_seq_lower_info = _lower_info_map.op_seq;
-  assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
-  for (auto it = op_seq_lower_info.begin(); it != op_seq_lower_info.end(); ++it)
-  {
-    if (it->first == op_seq_index)
+  // Set operand lower info using assigned backends to operations
+  _graph.operations().iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &) {
+    const ir::IOperation &op = _graph.operations().at(op_ind);
+    auto backend = backend_resolver.getBackend(op_ind);
+    if (!backend)
     {
-      op_seq_lower_info.erase(it);
-      break;
+      throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"};
     }
-  }
-}
-
-const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
-{
-  auto itr = _lower_info_map.operand.find(index);
-  if (itr == _lower_info_map.operand.end())
-    return nullptr;
-  return itr->second.get();
-}
-
-ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
-{
-  auto itr = _lower_info_map.operand.find(index);
-  if (itr == _lower_info_map.operand.end())
-    return nullptr;
-  return itr->second.get();
-}
-
-void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
-                                std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
-{
-  _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
-}
-
-void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
-{
-  _lower_info_map.operand.erase(index);
-}
-
-void LoweredGraph::iterateTopolOpSeqs(
-    const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
-{
-  // Topological Sorting for ir::OpSequences
-  std::vector<ir::OpSequenceIndex> topol_sorted;
-  ir::PostDfsIterator<true>{}.iterateOpSeqs(
-      *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
-        topol_sorted.emplace_back(index);
-      });
-  std::reverse(topol_sorted.begin(), topol_sorted.end());
-  for (const auto op_seq_idx : topol_sorted)
-  {
-    const auto &op_seq = _op_seqs.at(op_seq_idx);
-    fn(op_seq_idx, op_seq);
-  }
-}
-
-void LoweredGraph::iterateTopolOpSeqs(
-    const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
-{
-  // Topological Sorting for ir::OpSequences
-  std::vector<ir::OpSequenceIndex> topol_sorted;
-  ir::PostDfsIterator<false>{}.iterateOpSeqs(
-      *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
-        topol_sorted.emplace_back(index);
-      });
-  std::reverse(topol_sorted.begin(), topol_sorted.end());
-  for (const auto op_seq_idx : topol_sorted)
-  {
-    auto &op_seq = _op_seqs.at(op_seq_idx);
-    fn(op_seq_idx, op_seq);
-  }
-}
-
-ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
-                                                              const ir::Operation &node)
-{
-  // Create a fresh op_seq with one operation, and append it to op_seqs
-  // Create a fresh op_seq
-  auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
-
-  // Add an operation
-  op_seq->appendOperation(node_index);
-
-  // Update input/output
-  op_seq->setOutputs(node.getOutputs());
-  op_seq->setInputs(node.getInputs());
-
-  return _op_seqs.emplace(std::move(op_seq));
-}
-
-void LoweredGraph::makeOpSequences(
-    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
-    const CompilerOptions &options, const BackendResolver &backend_resolver)
-{
-  // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
-  const int op_seq_max_node = options.op_seq_max_node;
-  assert(op_seq_max_node >= 0);
-
-  bool is_profiling = options.he_profiling_mode;
-  ir::OpSequence *op_seq = nullptr;
-  ir::OpSequenceIndex op_seq_index;
-
-  // NOTE: The below method appends nodes while making one op_seq if needed. If something better
-  // ways, happy to update this code.
-  ir::PostDfsConstIterator{}.iterate(
-      _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
-        // LowerInfo for in/output operands
-        auto backend = backend_resolver.getBackend(node_index);
-
-        // Get frontend's layout
-        auto frontend_layout = _graph.layout();
-
-        // The layout of each backend should be set at another place
-        // TODO Change setting layout of each backend at another place
-        auto backend_layout = backend->config()->supportLayout(node, frontend_layout);
-
-        for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
-        {
-          auto &&lower_info = operands_lower_info.at(operand);
-          lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
-        }
-        for (auto operand : node.getOutputs())
-        {
-          auto &&lower_info = operands_lower_info.at(operand);
-          lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
-        }
-
-        bool new_op_seq = (op_seq == nullptr ||
-                           (op_seq_max_node != 0 &&
-                            op_seq->operations().size() >= static_cast<size_t>(op_seq_max_node)));
-
-        // for profiling each op_seq must contain just one node,
-        // so that we can measure a node separately
-        if (new_op_seq || is_profiling ||
-            !mergeable(op_seq_index, node_index, backend_layout, backend_resolver))
-        {
-          auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
-
-          // ir::OpSequence LowerInfo
-          setLowerInfo(new_op_seq_index,
-                       std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
-
-          op_seq_index = new_op_seq_index;
-          op_seq = &(_op_seqs.at(new_op_seq_index));
-
-          VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is created for "
-                         << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
-        }
-        else
-        {
-          op_seq->appendOperation(node_index);
-          // Set inputs
-          auto new_inputs = node.getInputs();
-          // Add inputs except outputs of the previous node
-          for (auto ind : op_seq->getInputs())
-          {
-            if (!node.getOutputs().contains(ind))
-              new_inputs.append(ind);
-          }
-          op_seq->setInputs(new_inputs);
 
-          VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " merges "
-                         << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
-        }
-      });
-}
+    auto frontend_layout = _graph.layout();
 
-void LoweredGraph::manipulateLowerInfo(
-    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
-    bool is_primary)
-{
-  const auto controlflow_backend = BackendManager::get().getControlflow();
+    // The layout of each backend should be set at another place
+    // TODO Change setting layout of each backend at another place
+    auto backend_layout = backend->config()->supportLayout(op, frontend_layout);
 
-  // TODO Rather than handling primary graph specially,
-  //      let the permute inserted and remove it later
-  if (is_primary)
-  {
-    // TODO Rather than using NHWC Get frontend layout of this node from IR
-    auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
-    for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
+    for (auto &&ind : op.getInputs() | ir::Remove::UNDEFINED)
     {
-      auto &&lower_info = operands_lower_info.at(index);
-      assert(lower_info->def_factors().empty());
-      lower_info->addDefPermuteFactor(factor);
+      auto &operand_li = lower_info().operand.at(ind);
+      operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout});
     }
-    for (auto index : _graph.getOutputs())
+    for (auto &&ind : op.getOutputs() | ir::Remove::UNDEFINED)
     {
-      auto &&lower_info = operands_lower_info.at(index);
-      lower_info->addUsePermuteFactor(factor);
+      auto &operand_li = lower_info().operand.at(ind);
+      operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout});
     }
-  }
-  else
+    lower_info().operation.set(
+      op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout));
+  });
+
+  // Handle graph inputs and outputs
+  const auto builtin_backend = BackendManager::get().getBuiltin();
+  auto factor = PermuteFactor{builtin_backend, _graph.layout()};
+  for (auto &&index : _graph.getInputs() | ir::Remove::UNDEFINED)
   {
-    for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
-    {
-      auto &&lower_info = operands_lower_info.at(index);
-      if (!(lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0))
-      {
-        // In case of not that Graph's input is not used in any operation and not the graph's
-        // output.
-        // In other words, it is not unused input in Graph.
-        lower_info->addDefPermuteFactor(*lower_info->use_factors().begin());
-      }
-      else
-      {
-        // In case of that an operand is Graph's input and not input or output of any operation
-        lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
-            controlflow_backend,
-            ir::Layout::NHWC // TODO Get frontend layout of this node from IR
-        });
-      }
-    }
+    auto &operand_li = lower_info().operand.at(index);
+    assert(operand_li.def_factors().empty());
+    operand_li.addDefPermuteFactor(factor);
   }
-  for (auto index : _graph.getOutputs())
+  for (auto &&index : _graph.getOutputs() | ir::Remove::UNDEFINED)
   {
-    auto &&lower_info = operands_lower_info.at(index);
-    if (lower_info->def_factors().size() == 0)
-    {
-      // In case of that an operand is Graph's output and not input or output of any operation
-      lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
-          controlflow_backend,
-          ir::Layout::NHWC // TODO Get frontend layout of this node from IR
-      });
-    }
+    auto &operand_li = lower_info().operand.at(index);
+    operand_li.addUsePermuteFactor(factor);
   }
 
-  // Set LowerInfo for each operand from the operand::LowerInfo holder
-  _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) {
-    setLowerInfo(index, std::move(operands_lower_info[index]));
+  // Handle variable tensors
+  _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) {
+    // Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs
+    // and not undefined operand - these are variable tensors. For example,
+    // UnidirectionalSequenceLSTM has such inputs.
+    if (operand.info().isVariable())
+    {
+      // The variable operand with buffer is not supported yet
+      assert(operand.data() == nullptr);
+      assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+      auto operand_li = lower_info().operand.at(index);
+      assert(operand_li.def_factors().empty());
+      operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement());
+    }
   });
 }
 
@@ -395,12 +189,22 @@ void LoweredGraph::dumpLowerInfo()
   std::map<uint32_t, std::string> dumps;
 
   _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
-    std::stringstream sstream;
-    if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
+    const auto operand_lower_info = lower_info().operand.getRawPtr(index);
+    assert(operand_lower_info);
+    if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty())
     {
-      auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
+      auto shape_to_string = [](const ir::Shape &shape) {
+        std::stringstream sstream;
+        sstream << "{ ";
+        for (auto i = 0; i < shape.rank(); ++i)
+          sstream << (shape.dim(i)) << " ";
+        sstream << "}";
+        return sstream.str();
+      };
+
+      auto factors_to_string = [](const PermuteFactorSet &factors) {
         std::string str;
-        for (auto factor : factors)
+        for (auto &&factor : factors)
         {
           str += factor.backend()->config()->id();
           str += "(" + to_string(factor.layout()) + ")";
@@ -409,159 +213,45 @@ void LoweredGraph::dumpLowerInfo()
         return "{ " + str + "}";
       };
 
-      auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
-        std::string str;
-        for (auto op : operations)
-        {
-          str += std::to_string(op.value());
-          str += " ";
-        }
-        return "{ " + str + "}";
+      auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) {
+        std::stringstream sstream;
+        sstream << "{ ";
+        for (auto &&op : operations)
+          sstream << op << " ";
+        sstream << "}";
+        return sstream.str();
+      };
+
+      auto data_to_str = [](const ir::Data *data) {
+        return (data ? (std::to_string(data->size()) + " bytes") : "N/A");
       };
 
-      const auto lower_info = getLowerInfo(index);
-      const auto &shape = object.shape();
-      std::string def_ops =
-          object.getDef().valid() ? std::to_string(object.getDef().value()) : "N/A";
-      std::string use_ops = operation_index_to_string(object.getUses());
-      std::string def_layouts = factors_to_string(lower_info->def_factors());
-      std::string use_layouts = factors_to_string(lower_info->use_factors());
-      sstream << "Operand #" << index.value() << " LowerInfo" << std::endl;
-      sstream << "  - Shape           : { ";
-      for (auto i = 0; i < shape.rank(); ++i)
-      {
-        sstream << (shape.dim(i)) << " ";
-      }
-      sstream << "}" << std::endl;
-      sstream << "  - Def ir::Operations  : " << def_ops << std::endl;
-      sstream << "  - Use ir::Operations  : " << use_ops << std::endl;
-      sstream << "  - Lower Info" << std::endl;
-      sstream << "    - Def Backends    : " << def_layouts << std::endl;
-      sstream << "    - Use Backends    : " << use_layouts << std::endl;
+      std::string shape_str = shape_to_string(object.shape());
+      std::string def_op = operation_index_set_to_string({object.getDef()});
+      std::string use_ops = operation_index_set_to_string(object.getUses());
+      std::string def_factors = factors_to_string(operand_lower_info->def_factors());
+      std::string use_factors = factors_to_string(operand_lower_info->use_factors());
+      std::stringstream sstream;
+      sstream << "Operand " << index << " Info" << std::endl;
+      sstream << "  - Shape     : " << shape_str << std::endl;
+      sstream << "  - Def/Uses  : Def " << def_op << " Uses " << use_ops << std::endl;
+      sstream << "  - Data      : " << data_to_str(object.data()) << std::endl;
+      sstream << "  - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl;
+      dumps.emplace(index.value(), sstream.str());
     }
-    dumps.emplace(index.value(), sstream.str());
   });
 
   for (const auto &e : dumps)
   {
     if (!e.second.empty())
     {
-      VERBOSE(Lower) << e.second;
+      std::istringstream iss(e.second);
+      std::string line;
+      while (std::getline(iss, line))
+        VERBOSE(Lower) << line << std::endl;
     }
   }
 }
 
-bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
-                             const ir::OperationIndex &node_index, ir::Layout layout,
-                             const BackendResolver &backend_resolver)
-{
-  // Are they mergeable?
-  // 1. the same backend id and layout?
-  // 2. Is op_seq or node branched?
-  // 3. if 1 is true, the op_seq and a node are connected?
-  const auto &op_seq = _op_seqs.at(op_seq_index);
-  const auto &node = _graph.operations().at(node_index);
-
-  // The same backend id and layout?
-  {
-    const auto op_seq_backend_layout = getLowerInfo(op_seq_index)->layout();
-    const auto &op_seq_backend_id = getLowerInfo(op_seq_index)->backend()->config()->id();
-    const auto &node_backend_id = backend_resolver.getBackend(node_index)->config()->id();
-    VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " { " << op_seq_backend_id << "("
-                   << to_string(op_seq_backend_layout) << ") } "
-                   << " NODE#" << node_index.value() << " (" << node.name() << ") { "
-                   << node_backend_id << "(" << to_string(layout) << ") } " << std::endl;
-    if (op_seq_backend_id != node_backend_id || op_seq_backend_layout != layout)
-      return false;
-  }
-
-  // Branched?
-  {
-    std::unordered_set<ir::OperationIndex> branched_set;
-
-    // Check for branching up
-    for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
-    {
-      const auto &input_obj = _graph.operands().at(input);
-      auto def = input_obj.getDef();
-      if (def.valid())
-      {
-        branched_set.insert(def);
-        if (branched_set.size() > 1)
-        {
-          return false;
-        }
-      }
-    }
-    branched_set.clear();
-
-    // Check for branching down
-    for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED)
-    {
-      // TODO Fix this workaround for the case of model outputs that are used by another operation
-      //      This is needed since the branching is decided by operation, but for model outputs,
-      //      there is controlflow backen(use backend) but no actual use operation exists
-      if (_graph.getOutputs().contains(output))
-        return false;
-
-      const auto &output_obj = _graph.operands().at(output);
-      for (const auto &use : output_obj.getUses())
-      {
-        branched_set.insert(use);
-        if (branched_set.size() > 1)
-        {
-          return false;
-        }
-      }
-    }
-  }
-
-  // Connected?
-  // an input of one node is an output of the other node? or vice-versa?
-  {
-    const auto &node_inputs = node.getInputs();
-    const auto &node_outputs = node.getOutputs();
-
-    // op_seq's operations are in order so that we just check the first and the last
-    std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
-    if (op_seq.operations().size() > 1)
-      op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
-
-    for (const auto &n_index : op_seq_ops)
-    {
-      const auto &n = _graph.operations().at(n_index);
-
-      // node's output == op_seq's input?
-      for (const auto input : n.getInputs() | ir::Remove::UNDEFINED)
-      {
-        if (node_outputs.contains(input))
-        {
-          VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value()
-                         << "(" << n.name() << ") is connected to NODE#" << node_index.value()
-                         << "(" << node.name() << ")" << std::endl;
-          return true;
-        }
-      }
-
-      // node's input == op_seq's output?
-      for (const auto output : n.getOutputs())
-      {
-        if (node_inputs.contains(output))
-        {
-          VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value()
-                         << " (" << n.name() << ") is connected to NODE#" << node_index.value()
-                         << std::endl;
-          return true;
-        }
-      }
-    }
-
-    VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is not connected to NODE#"
-                   << node_index.value() << "(" << node.name() << ")" << std::endl;
-  }
-
-  return false;
-}
-
 } // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
index ed49ee56f..ccd08893f 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -29,9 +29,9 @@ namespace onert
 namespace compiler
 {
 
-ManualScheduler::ManualScheduler(const backend::BackendContexts &backend_contexts,
+ManualScheduler::ManualScheduler(const std::vector<const backend::Backend *> &backends,
                                  const compiler::CompilerOptions &options)
-    : _backend_contexts{backend_contexts}, _options{options}
+  : _backends{backends}, _options{options}
 {
 }
 
@@ -42,7 +42,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
 
   // This fallback will be used in case that `backend_for_all` is unavailable
   auto fallback = [&]() -> const backend::Backend * {
-    for (auto backend_id : _options.backend_list)
+    for (auto &&backend_id : _options.backend_list)
     {
       auto backend = resolveBackend(backend_id);
       if (backend)
@@ -58,20 +58,20 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
   VERBOSE(ManualScheduler) << "Default backend for all ops: " << backend_all->config()->id()
                            << std::endl;
 
-  graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+  graph.operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &) {
     backend_resolver->setBackend(index, backend_all);
   });
 
   // 2. Backend per operation type
   std::unordered_map<ir::OpCode, backend::Backend *> op_type_map;
-  for (auto &pair : manual_options.opcode_to_backend)
+  for (const auto &pair : manual_options.opcode_to_backend)
   {
     op_type_map.emplace(pair.first, BackendManager::get().get(pair.second));
   }
   // By default, Custom uses cpu backend
   op_type_map[ir::OpCode::Custom] = BackendManager::get().get("cpu");
 
-  graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &operation) {
+  graph.operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &operation) {
     auto itr = op_type_map.find(operation.opcode());
     if (itr != op_type_map.end())
     {
@@ -80,7 +80,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
   });
 
   // 3. Backend per operation
-  for (auto &pair : manual_options.index_to_backend)
+  for (const auto &pair : manual_options.index_to_backend)
   {
     const auto &key = pair.first;
     const auto &val = pair.second;
@@ -88,22 +88,21 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
     try
     {
       graph.operations().at(key); // Check if exist, or this will throw
-      backend_resolver->setBackend(
-          key, BackendManager::get().get(
-                   val)); // TODO Ensure this backend is available in backend contexts
+      backend_resolver->setBackend(key, BackendManager::get().get(val));
     }
     catch (...)
     {
-      VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @"
-                               << key.value() << " -> \"" << val << "\"" << std::endl;
+      VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @" << key
+                               << " -> \"" << val << "\"" << std::endl;
     }
   }
 
   // Dump final assignment
-  backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) {
-    VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
-                             << backend.config()->id() << std::endl;
-  });
+  WHEN_LOG_ENABLED(backend_resolver->iterate(
+    [&](const ir::OperationIndex &index, const backend::Backend &backend) {
+      VERBOSE(ManualScheduler) << "backend for " << index << ": " << backend.config()->id()
+                               << std::endl;
+    }));
 
   return backend_resolver;
 }
@@ -113,7 +112,7 @@ const backend::Backend *ManualScheduler::resolveBackend(const std::string &id,
 {
   // Ensure if the backend is available in the current backend context
   const backend::Backend *backend = BackendManager::get().get(id);
-  if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
+  if (!backend || std::find(_backends.begin(), _backends.end(), backend) == _backends.end())
   {
     backend = fallback;
   }
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.h b/runtime/onert/core/src/compiler/ManualScheduler.h
index 41503f7ff..18732d744 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.h
+++ b/runtime/onert/core/src/compiler/ManualScheduler.h
@@ -28,7 +28,7 @@ namespace compiler
 class ManualScheduler : public IScheduler
 {
 public:
-  ManualScheduler(const backend::BackendContexts &backend_contexts,
+  ManualScheduler(const std::vector<const backend::Backend *> &backends,
                   const compiler::CompilerOptions &options);
   std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) override;
 
@@ -37,7 +37,7 @@ private:
                                          const backend::Backend *fallback = nullptr);
 
 private:
-  const backend::BackendContexts &_backend_contexts;
+  std::vector<const backend::Backend *> _backends;
   compiler::CompilerOptions _options;
 };
 
diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.cc b/runtime/onert/core/src/compiler/MultiModelCompiler.cc
new file mode 100644
index 000000000..141fdfe09
--- /dev/null
+++ b/runtime/onert/core/src/compiler/MultiModelCompiler.cc
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MultiModelCompiler.h"
+
+#include "CompilerHelpers.h"
+#include "ExecutorFactory.h"
+#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../exec/Executors.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
+
+#include "compiler/StaticShapeInferer.h"
+
+#include <misc/string_helpers.h>
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace compiler
+{
+
+MultiModelCompiler::MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                                       std::vector<std::unique_ptr<CompilerOptions>> &copts)
+  : _nnpkg{nnpkg}, _voptions{}
+{
+  assert(nnpkg->model_count() != 1);
+
+  for (uint32_t i = 0; i < copts.size(); i++)
+  {
+    _voptions.push_back(copts[i].get());
+  }
+}
+
+std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void)
+{
+  /***************************************************
+   * Prepare compilation phase
+   ***************************************************/
+  for (auto &&options : _voptions)
+  {
+    if (!options)
+      throw std::runtime_error{"Empty compile option"};
+
+    // Mode check
+    // TODO handle option for each model
+    if (options->he_profiling_mode)
+      throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
+
+    if (!options->minmax_filepath.empty())
+      throw std::runtime_error("Recording minmax is not supported for multiple models");
+
+    options->forceInternalOptions();
+    options->verboseOptions();
+  }
+
+  // NYI: allow one model compilation
+  auto const model_count = _nnpkg->model_count();
+  if (model_count != _voptions.size())
+    throw std::runtime_error{"Model count and option vector size mismatch"};
+
+  for (uint16_t i = 0; i < model_count; i++)
+  {
+    if (!_nnpkg->model(ir::ModelIndex{i})->hasOnly<ir::Graph>())
+      throw std::runtime_error("MultiModelCompiler can only compile models for inference.");
+  }
+
+  for (uint16_t i = 0; i < model_count; i++)
+  {
+    _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) {
+      auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
+
+      // Mandatory passes
+      pass::PassRunner{}
+        .append(std::make_unique<pass::ConstantOutputPass>(subg))
+        .append(std::make_unique<pass::OddOutputPass>(subg))
+        .run();
+
+      // Optimizations
+      pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+    });
+  }
+
+  /***************************************************
+   * Backend independent analysis & optimization phase
+   ***************************************************/
+  // TODO Handle dump level for each model
+  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+  onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+  // Tracing context
+  // TODO Support tracing_ctx for multiple model
+  std::unique_ptr<util::TracingCtx> tracing_ctx = nullptr;
+
+  // Model edge context: copy model edge context
+  auto model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
+
+  // Custom kernels
+  std::unordered_map<ir::ModelIndex, std::shared_ptr<backend::custom::IKernelBuilder>>
+    custom_kernel_builders;
+  for (uint16_t i = 0; i < model_count; i++)
+  {
+    auto const model_index = ir::ModelIndex{i};
+    custom_kernel_builders[model_index] = _nnpkg->model(model_index)->getKernelBuilder();
+  }
+
+  // Lower: Assign backend
+  std::unordered_map<ir::ModelIndex,
+                     std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>>
+    lowered_subgs;
+
+  for (uint16_t i = 0; i < model_count; i++)
+  {
+    auto const model_index = ir::ModelIndex{i};
+    auto model = _nnpkg->model(model_index);
+
+    model->iterate([&](const ir::SubgraphIndex &subg_index, ir::IGraph &graph) {
+      auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
+
+      dot_dumper.dump(subg,
+                      nnfw::misc::str("before_lower_model-", i, "-subg-", subg_index.value()));
+      // Lower: Assign backend
+      lowered_subgs[model_index][subg_index] =
+        std::make_unique<compiler::LoweredGraph>(subg, *_voptions[i]);
+      // Set tracing_ctx for copied graph
+      if (tracing_ctx != nullptr)
+        tracing_ctx->setSubgraphIndex(&(lowered_subgs[model_index][subg_index]->graph()),
+                                      subg_index.value());
+    });
+  }
+
+  _nnpkg.reset();
+
+  for (const auto &pair : lowered_subgs)
+  {
+    const auto &model_index = pair.first;
+    const auto &model_lsubg = pair.second;
+
+    for (const auto &pair_inner : model_lsubg)
+    {
+      const auto &subg_index = pair_inner.first;
+      const auto &lowered_subg = pair_inner.second;
+      dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_model-", model_index.value(),
+                                                     "-subg-", subg_index.value()));
+    }
+  }
+
+  // Shape inference.
+  for (auto &&pair : lowered_subgs)
+  {
+    auto &model_lsubgs = pair.second;
+    // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+    // recursively
+    std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+      createStaticShapeInferers(model_lsubgs);
+
+    const auto primary_subg_idx = ir::SubgraphIndex{0};
+    inferers.at(primary_subg_idx)->infer();
+
+    for (const auto &pair_inferer : inferers)
+    {
+      const auto inferer = pair_inferer.second.get();
+      inferer->dump();
+    }
+  }
+
+  // Shape validation
+  // TODO Move shape independent feature check from ShapeValidator to OperationValidator
+  // TODO Move ShapeValidator into shape inference
+  //      - Check input tensor shape validation
+  //      - Check parameter value validation which valid value is depend on input tensor shape
+  //      - Output tensor shape validation check is needless because
+  //        static/dynamic shape inferer will make valid output shape
+  for (const auto &pair : lowered_subgs)
+  {
+    const auto &model_lsubgs = pair.second;
+
+    for (const auto &pair_inner : model_lsubgs)
+    {
+      const auto &lowered_subg = pair_inner.second;
+      compiler::ShapeValidator{lowered_subg->graph()}();
+    }
+  }
+
+  /*************************************************************
+   *  Backend independent analysis & optimization phase finished
+   *************************************************************/
+  auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
+  for (auto &&pair : lowered_subgs)
+  {
+    auto const &model_index = pair.first;
+    auto &model_lsubgs = pair.second;
+
+    for (auto &&pair_inner : model_lsubgs)
+    {
+      auto const subg_index = pair_inner.first;
+      auto &lowered_subg = pair_inner.second;
+      auto const indexed_ranks = lowered_subg->indexed_ranks();
+
+      ir::OperationDumper dumper("Executor generation of Subgraph " +
+                                 std::to_string(subg_index.value()));
+      lowered_subg->graph().operations().iterate(
+        [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); });
+
+      ExecutorFactoryArgs args;
+      args.tracing_ctx = tracing_ctx.get();
+      args.options = _voptions[model_index.value()];
+      args.model_index = model_index;
+      args.custom_kernel_builder = custom_kernel_builders[model_index];
+      auto executor = std::unique_ptr<exec::IExecutor>{
+        ExecutorFactory::get().create(std::move(lowered_subg), executors, args)};
+      executor->setIndexedRanks(indexed_ranks);
+      executors->emplace(model_index, subg_index, std::move(executor));
+    }
+  }
+
+  /********************************
+   * Code generation phase finished
+   ********************************/
+  return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.h b/runtime/onert/core/src/compiler/MultiModelCompiler.h
new file mode 100644
index 000000000..b282a5087
--- /dev/null
+++ b/runtime/onert/core/src/compiler/MultiModelCompiler.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file  MultiModelCompiler.h
+ * @brief This file contains MultiModelCompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+#define __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+
+#include "compiler/CompilerOptions.h"
+#include "compiler/ICompiler.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class to compile NN package
+ */
+class MultiModelCompiler final : public ICompiler
+{
+public:
+  /**
+   * @brief     Construct a new Compiler object for NN package
+   * @param[in] nnpkg    NN package to compile
+   * @param[in] coptions Compiler option vector for each model in package
+   */
+  MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                     std::vector<std::unique_ptr<CompilerOptions>> &copts);
+
+  /**
+   * @brief Destroy the MultiModelCompiler object
+   */
+  ~MultiModelCompiler() = default;
+
+public:
+  /**
+   * @brief   Do compilation with the options
+   *
+   * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+   */
+  std::shared_ptr<CompilerArtifact> compile(void);
+
+private:
+  std::shared_ptr<ir::NNPkg> _nnpkg;
+  std::vector<CompilerOptions *> _voptions;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
diff --git a/runtime/onert/core/src/compiler/OperationLowerInfo.cc b/runtime/onert/core/src/compiler/OperationLowerInfo.cc
new file mode 100644
index 000000000..e8a438130
--- /dev/null
+++ b/runtime/onert/core/src/compiler/OperationLowerInfo.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/OperationLowerInfo.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+OperationLowerInfo::OperationLowerInfo(const backend::Backend *backend, ir::Layout layout)
+  : _permute_factor{backend, layout}
+{
+  // DO NOTHING
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc
deleted file mode 100644
index f7f659e3e..000000000
--- a/runtime/onert/core/src/compiler/OperationValidator.cc
+++ /dev/null
@@ -1,1053 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationValidator.h"
-
-#include <typeinfo>
-
-#include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
-
-#include "util/logging.h"
-#include "util/Utils.h"
-
-#define OP_REQUIRES(EXP)                                                                         \
-  do                                                                                             \
-  {                                                                                              \
-    if (!(EXP))                                                                                  \
-      throw std::runtime_error("OperationValidator failed at line " + std::to_string(__LINE__)); \
-  } while (0)
-
-namespace onert
-{
-namespace compiler
-{
-
-OperationValidator::OperationValidator(const ir::Graph &graph)
-    : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN}
-{
-}
-
-void OperationValidator::checkUnaryOp(const ir::Operation &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  // Check if I/O types match
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  // Check if I/O shapes match
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::operator()()
-{
-  // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
-  // creating Compiler
-  assert(_graph.subgraphs() == nullptr);
-
-  _current_op_seq_layout = _graph.layout();
-
-  _graph.operations().iterate(
-      [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
-}
-
-void OperationValidator::visit(const ir::operation::BatchMatMul &node)
-{
-  const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
-  const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
-  const auto out_index{node.getOutputs().at(0)};
-
-  // Constant lhs and rhs is not implemented yet
-  OP_REQUIRES(!_ctx.at(lhs_index).isConstant() && !_ctx.at(rhs_index).isConstant());
-
-  if (_ctx.at(out_index).info().isDynamic())
-    return;
-
-  OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4);
-  OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4);
-  OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2);
-  OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2);
-}
-
-void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
-  const auto block_size_index{
-      node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
-
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
-  const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-
-  // All requirement as per NNAPI specification.
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
-
-  OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
-
-  OP_REQUIRES(_ctx.at(block_size_index).isConstant());
-
-  OP_REQUIRES(input_shape.C == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::Comparison &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  // This validator does not check shape. So checking isDynamic() is skipped.
-
-  const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::BOOL8);
-}
-
-void OperationValidator::visit(const ir::operation::Softmax &node)
-{
-  VERBOSE(Softmax) << "Configure SOFTMAX operation" << std::endl;
-
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::InstanceNorm &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
-  const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
-  const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
-
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
-  OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1);
-  OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
-}
-
-void OperationValidator::visit(const ir::operation::Pool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
-
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-}
-
-void OperationValidator::visit(const ir::operation::Permute &node)
-{
-  VERBOSE(Permute) << "Configure Permute operation" << std::endl;
-
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Reduce &node)
-{
-  VERBOSE(Permute) << "Configure " + node.name() + " operation" << std::endl;
-
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
-  const auto input_shape = _ctx.at(input_index).shape();
-  const auto output_shape = _ctx.at(output_index).shape();
-
-  OP_REQUIRES(input_shape.rank() <= 4);
-  OP_REQUIRES(output_shape.rank() <= input_shape.rank());
-
-  // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
-  // supports cases reducing height and width or reducing depth.
-  // TODO We have to support all cases of dimensions up to 4.
-  // For correct permuting, we have to set output's shape to be equal in dimension position of the
-  // input. But the positions of the same dimensions in the input and output may be set differently.
-  // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
-  // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
-  // extend it in 4 dimensions, it should be {1,1,3,5}.
-  // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
-  // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
-  // next operation is not desired.
-  if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
-  {
-    if (output_shape.rank() == 2)
-    {
-      // Reducing HW
-      OP_REQUIRES(input_shape.dim(0) == output_shape.dim(0) &&
-                  input_shape.dim(3) == output_shape.dim(1));
-    }
-    else if (output_shape.rank() == 3)
-    {
-      // Reducing C or
-      // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
-      OP_REQUIRES((input_shape.dim(0) == output_shape.dim(0) &&
-                   input_shape.dim(1) == output_shape.dim(1) &&
-                   input_shape.dim(2) == output_shape.dim(2)) ||
-                  (input_shape.dim(0) == output_shape.dim(0) &&
-                   (input_shape.dim(1) == output_shape.dim(1) ||
-                    input_shape.dim(2) == output_shape.dim(1)) &&
-                   input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
-    }
-  }
-}
-
-void OperationValidator::visit(const ir::operation::Transpose &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
-  const auto &perm{node.param().perm};
-
-  const auto &output_shape = _ctx.at(output_index).shape();
-  const auto &input_shape = _ctx.at(input_index).shape();
-
-  OP_REQUIRES(input_shape.rank() == static_cast<int>(perm.size()));
-  OP_REQUIRES(input_shape.rank() == output_shape.rank());
-}
-
-void OperationValidator::visit(const ir::operation::RNN &node)
-{
-  // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
-  // TODO Support dynamic rnn
-  const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto hidden_state_out_index{
-      node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
-
-  const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
-  const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
-  const auto recurrent_weights_index{
-      node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
-  const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
-  const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
-
-  const auto batch_size = _ctx.at(output_index).shape().dim(0);
-  const auto num_units = _ctx.at(output_index).shape().dim(1);
-
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 &&
-              _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
-              _ctx.at(input_index).shape().rank() == 2 &&
-              _ctx.at(weights_index).shape().rank() == 2 &&
-              _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
-              _ctx.at(hidden_state_in_index).shape().rank() == 2);
-  OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1);
-
-  OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) &&
-              batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
-              batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
-  OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
-
-  OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) &&
-              num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(bias_index).shape().dim(0));
-  OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) &&
-              num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
-              num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
-              num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
-}
-
-void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
-  const auto block_size_index{
-      node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
-  const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
-  const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-
-  // All requirement as per NNAPI specification.
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
-  OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2);
-
-  OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
-  OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2);
-  OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2);
-
-  OP_REQUIRES(_ctx.at(block_size_index).isConstant());
-  OP_REQUIRES(_ctx.at(paddings_index).isConstant());
-
-  OP_REQUIRES(input_shape.C == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
-
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
-  const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-  const auto block_size = node.param().block_size;
-
-  // All assertions as per NNAPI specification.
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
-  OP_REQUIRES((block_size >= 1) && (input_shape.H % block_size == 0) &&
-              (input_shape.W % block_size == 0));
-  OP_REQUIRES(input_shape.N == output_shape.N);
-  OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
-{
-  checkUnaryOp(node);
-}
-
-void OperationValidator::visit(const ir::operation::ElementwiseBinary &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
-
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
-
-  OP_REQUIRES(node.getInputs().size() == 1);
-  OP_REQUIRES(node.getOutputs().size() == 1);
-
-  // Check if I/O types match
-  if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE)
-  {
-    OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
-  }
-  else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
-  {
-    OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
-    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-  }
-  else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST)
-  {
-    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-  }
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
-  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
-
-  const auto &output_obj = _ctx.at(output_index);
-  const auto &lookups_obj = _ctx.at(lookups_index);
-  const auto &values_obj = _ctx.at(values_index);
-
-  // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
-  // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
-  {
-    OP_REQUIRES(lookups_obj.typeInfo().type() == ir::DataType::INT32);
-
-    if (_ctx.at(output_index).info().isDynamic())
-      return;
-
-    const auto &output_shape = output_obj.shape();
-    const auto &lookups_shape = lookups_obj.shape();
-    const auto &values_shape = values_obj.shape();
-
-    OP_REQUIRES(lookups_shape.rank() == 1);
-    OP_REQUIRES(values_shape.rank() >= 2);
-
-    // output should be a n-D tensor with the same rank and shape as the values tensor, except for
-    // the first dimension which has the same size as lookups' only dimension.
-    OP_REQUIRES(output_shape.rank() == values_shape.rank());
-    OP_REQUIRES(output_shape.dim(0) == lookups_shape.dim(0));
-    for (int n = 1; n < output_shape.rank(); ++n)
-    {
-      OP_REQUIRES(output_shape.dim(n) == values_shape.dim(n));
-    }
-  }
-}
-
-void OperationValidator::visit(const ir::operation::ExpandDims &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
-
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(axis_index).typeInfo().type() == ir::DataType::INT32);
-
-  if (_ctx.at(axis_index).info().isDynamic())
-    return;
-  OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
-}
-
-void OperationValidator::visit(const ir::operation::HashtableLookup &node)
-{
-  const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
-  const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
-
-  const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
-  const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
-  const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
-
-  const auto &output_obj = _ctx.at(output_index);
-  const auto &hits_obj = _ctx.at(hits_index);
-
-  const auto &lookups_obj = _ctx.at(lookups_index);
-  const auto &keys_obj = _ctx.at(keys_index);
-  const auto &values_obj = _ctx.at(values_index);
-
-  OP_REQUIRES(lookups_obj.typeInfo().type() == ir::DataType::INT32);
-  OP_REQUIRES(keys_obj.typeInfo().type() == ir::DataType::INT32);
-  OP_REQUIRES(hits_obj.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto &output_shape = output_obj.shape();
-  const auto &lookups_shape = lookups_obj.shape();
-  const auto &keys_shape = keys_obj.shape();
-  const auto &values_shape = values_obj.shape();
-
-  OP_REQUIRES(values_shape.rank() == output_shape.rank());
-  OP_REQUIRES(lookups_shape.rank() == 1);
-  OP_REQUIRES(keys_shape.rank() == 1);
-  OP_REQUIRES(values_shape.dim(0) == keys_shape.dim(0));
-  OP_REQUIRES(lookups_shape.dim(0) == output_shape.dim(0));
-}
-
-void OperationValidator::visit(const ir::operation::TransposeConv &node)
-{
-  // param check
-  OP_REQUIRES((node.param().padding.type == ir::PaddingType::SAME) ||
-              (node.param().padding.type == ir::PaddingType::VALID));
-
-  // shape check
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
-  const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
-
-  // Only 4D tensors are supported
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
-
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
-  // The kernel has only IHWO layout on frontend
-  // So ker_shape is treated here below
-  // I -> N
-  // H -> H
-  // W -> W
-  // O -> C
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
-
-  OP_REQUIRES(ifm_shape.N == ofm_shape.N);
-  OP_REQUIRES(ifm_shape.C == ker_shape.C);
-  OP_REQUIRES(ker_shape.N == ofm_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::Gather &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
-  const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
-  const auto ifm_shape = _ctx.at(ifm_index).shape();
-  const auto indices_shape = _ctx.at(indices_index).shape();
-  const auto ofm_shape = _ctx.at(ofm_index).shape();
-
-  OP_REQUIRES(ifm_shape.rank() <= 4);
-  OP_REQUIRES(indices_shape.rank() <= 3);
-  OP_REQUIRES(ofm_shape.rank() <= 4);
-}
-
-void OperationValidator::visit(const ir::operation::DepthToSpace &node)
-{
-  // param check
-  int32_t block_size = node.param().block_size;
-
-  OP_REQUIRES(block_size > 0);
-
-  // shape check
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
-
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
-  const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
-
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
-
-  {
-    OP_REQUIRES(output_shape.N == input_shape.N);
-    OP_REQUIRES(output_shape.H == input_shape.H * block_size);
-    OP_REQUIRES(output_shape.W == input_shape.W * block_size);
-    OP_REQUIRES(input_shape.C % (block_size * block_size) == 0);
-    OP_REQUIRES(output_shape.C == input_shape.C / (block_size * block_size));
-  }
-}
-
-void OperationValidator::visit(const ir::operation::Pack &node)
-{
-  // param check
-  const auto num{node.param().num};
-  const auto axis{node.param().axis};
-  OP_REQUIRES(num == static_cast<int32_t>(node.getInputs().size()));
-
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  // shape check
-  const auto &output_shape = _ctx.at(output_index).shape();
-  const auto output_rank = static_cast<int32_t>(output_shape.rank());
-
-  const auto input1_index{node.getInputs().at(0)};
-  const auto input_shape = _ctx.at(input1_index).shape();
-
-  OP_REQUIRES(axis >= -output_rank && axis < output_rank);
-  for (const auto &index : node.getInputs())
-  {
-    OP_REQUIRES(input_shape == _ctx.at(index).shape());
-  }
-}
-
-void OperationValidator::visit(const ir::operation::LSTM &node)
-{
-  // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
-  // TODO Support dynamic rnn
-  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
-  const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
-  const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
-
-  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
-  const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
-  const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
-  const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
-  const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
-  const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
-  const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
-  const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
-  const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
-  const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)};
-  const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)};
-  const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)};
-  const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
-  const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
-  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
-  const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
-  const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)};
-  const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)};
-  const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
-  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
-
-  OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2 &&
-              _ctx.at(output_state_out_index).shape().rank() == 2 &&
-              _ctx.at(cell_state_out_index).shape().rank() == 2 &&
-              _ctx.at(output_index).shape().rank() == 2 &&
-              _ctx.at(input_index).shape().rank() == 2 &&
-              _ctx.at(input_to_input_weights_index).shape().rank() == 2 &&
-              _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
-              _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
-              _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
-              _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2 &&
-              _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
-              _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
-              _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
-              _ctx.at(projection_weights_index).shape().rank() == 2 &&
-              _ctx.at(output_state_in_index).shape().rank() == 2 &&
-              _ctx.at(cell_state_in_index).shape().rank() == 2);
-
-  OP_REQUIRES(_ctx.at(cell_to_input_weights_index).shape().rank() == 1 &&
-              _ctx.at(cell_to_forget_weights_index).shape().rank() == 1 &&
-              _ctx.at(cell_to_output_weights_index).shape().rank() == 1 &&
-              _ctx.at(input_gate_bias_index).shape().rank() == 1 &&
-              _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
-              _ctx.at(cell_bias_index).shape().rank() == 1 &&
-              _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
-              _ctx.at(projection_bias_index).shape().rank() == 1);
-
-  // CIFG assertion
-  OP_REQUIRES((_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
-               _ctx.at(input_to_input_weights_index).shape().dim(1) == 0 &&
-               _ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
-               _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0 &&
-               _ctx.at(input_gate_bias_index).shape().dim(0) == 0 &&
-               _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) ||
-              (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-               _ctx.at(input_to_input_weights_index).shape().dim(1) != 0 &&
-               _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-               _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0 &&
-               _ctx.at(input_gate_bias_index).shape().dim(0) != 0));
-
-  // Peephole assertion
-  OP_REQUIRES((_ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0 &&
-               _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0) ||
-              (_ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0 &&
-               _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0));
-
-  bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-                                    _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
-  bool has_recurrent_to_input_weights =
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
-  bool has_input_gate_bias = _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
-  bool has_cell_to_input_weights = _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
-  bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
-  bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
-  bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
-                                _ctx.at(projection_weights_index).shape().dim(1) != 0;
-  bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
-  // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
-  // true: no CIFG
-  // false: CIFG
-  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
-  // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole.
-  // true: peephole
-  // false: no peephole
-  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
-  // NOTE The projection weights may have data but the projection bias may not.
-  bool has_projection_param = has_projection_weights;
-
-  const auto batch_size = _ctx.at(input_index).shape().dim(0);
-  OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
-              batch_size == _ctx.at(cell_state_in_index).shape().dim(0) &&
-              batch_size == _ctx.at(scratch_buffer_index).shape().dim(0) &&
-              batch_size == _ctx.at(output_state_out_index).shape().dim(0) &&
-              batch_size == _ctx.at(cell_state_out_index).shape().dim(0) &&
-              batch_size == _ctx.at(output_index).shape().dim(0));
-
-  const auto input_size = _ctx.at(input_index).shape().dim(1);
-  OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
-              input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
-              input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
-
-  const auto num_units = _ctx.at(cell_state_out_index).shape().dim(1);
-  OP_REQUIRES(num_units == _ctx.at(input_to_forget_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
-              num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
-              num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
-              num_units == _ctx.at(cell_state_in_index).shape().dim(1) &&
-              (((num_units * 3) == _ctx.at(scratch_buffer_index).shape().dim(1)) ||
-               ((num_units * 4) == _ctx.at(scratch_buffer_index).shape().dim(1))));
-
-  const auto output_size = _ctx.at(output_index).shape().dim(1);
-  OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
-              output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
-              output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
-              output_size == _ctx.at(output_state_in_index).shape().dim(1) &&
-              output_size == _ctx.at(output_state_out_index).shape().dim(1));
-
-  if (has_cifg_param)
-  {
-    OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
-    OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
-                num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
-                (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
-                 _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* non-peephole */) &&
-                num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
-    OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
-    OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
-                has_input_gate_bias);
-    if (has_cell_to_input_weights)
-    {
-      // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
-      OP_REQUIRES(has_peephole_param);
-    }
-    OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
-  }
-  else
-  {
-    OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
-  }
-
-  if (has_peephole_param)
-  {
-    OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
-                num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
-                (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
-                 _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
-  }
-
-  if (has_projection_param)
-  {
-    OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1));
-    OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0));
-    if (has_projection_bias)
-    {
-      OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0));
-    }
-  }
-}
-
-void OperationValidator::visit(const ir::operation::L2Normalization &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
-
-  auto ifm_shape = _ctx.at(ifm_index).shape();
-  auto ofm_shape = _ctx.at(ofm_index).shape();
-
-  OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
-
-  for (auto i = 0; i < ifm_shape.rank(); i++)
-  {
-    OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i));
-  }
-}
-
-void OperationValidator::visit(const ir::operation::Unpack &node)
-{
-  const auto num{node.param().num};
-  OP_REQUIRES(num == static_cast<int32_t>(node.getOutputs().size()));
-  const auto axis{node.param().axis};
-
-  const auto output_index{node.getInputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
-
-  const auto &input_shape = _ctx.at(input_index).shape();
-  const auto input_rank = static_cast<int32_t>(input_shape.rank());
-
-  OP_REQUIRES(axis >= -input_rank && axis < input_rank);
-}
-
-void OperationValidator::visit(const ir::operation::Pad &node)
-{
-  const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
-  OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
-
-  const auto output_index{node.getInputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
-
-  const auto &pad_shape = _ctx.at(pad_index).shape();
-  const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
-
-  OP_REQUIRES(pad_shape.rank() == 2);
-  OP_REQUIRES(pad_shape.dim(0) == input_rank);
-  OP_REQUIRES(pad_shape.dim(1) == 2);
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Select &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  // This validator does not check shape. So checking isDynamic() is skipped.
-
-  const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
-  const auto input_true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
-  const auto input_false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
-  UNUSED_RELEASE(output_index);
-  UNUSED_RELEASE(input_true_index);
-  UNUSED_RELEASE(input_false_index);
-
-  OP_REQUIRES(_ctx.at(condition_index).typeInfo().type() == ir::DataType::BOOL8);
-}
-
-void OperationValidator::visit(const ir::operation::StridedSlice &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
-  const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
-  const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
-  const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-
-  UNUSED_RELEASE(starts_index);
-  UNUSED_RELEASE(ends_index);
-  UNUSED_RELEASE(strides_index);
-
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
-}
-
-void OperationValidator::visit(const ir::operation::Split &node)
-{
-  const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
-
-  if (_ctx.at(input_index).info().isDynamic())
-    return;
-
-  const auto num_splits = node.param().num_splits;
-  const auto input_rank = _ctx.at(input_index).shape().rank();
-  const auto axis = node.param().axis < 0 ? node.param().axis + input_rank : node.param().axis;
-
-  OP_REQUIRES(num_splits > 0 && num_splits <= 0xFFFF);
-  OP_REQUIRES(axis >= 0 && axis < input_rank);
-  OP_REQUIRES(node.getOutputs().size() == static_cast<uint32_t>(num_splits));
-
-  OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
-}
-
-void OperationValidator::visit(const ir::operation::Shape &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-  UNUSED_RELEASE(input_index);
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
-}
-
-void OperationValidator::visit(const ir::operation::ResizeBilinear &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-
-  if (_ctx.at(output_index).info().isDynamic())
-  {
-    return;
-  }
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
-
-  auto align_corners = node.param().align_corners;
-  auto half_pixel_centers = node.param().half_pixel_centers;
-
-  OP_REQUIRES(!align_corners || !half_pixel_centers);
-}
-
-void OperationValidator::visit(const ir::operation::Reverse &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::Reverse::Input::AXIS)};
-
-  OP_REQUIRES(_ctx.at(axis_index).typeInfo().type() == ir::DataType::INT32);
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::If &)
-{
-  // TODO Add to validate with subgraphs
-}
-
-void OperationValidator::visit(const ir::operation::While &node)
-{
-  // This validator does not check shape. So checking isDynamic() is skipped.
-
-  OP_REQUIRES(node.getInputs().size() == node.getOutputs().size());
-  // TODO Add to validate with subgraphs
-}
-
-void OperationValidator::visit(const ir::operation::SquaredDifference &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
-
-  // Check for Type equivalence
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(lhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-
-  // Check for dimension constraints
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  auto output_shape = _ctx.at(output_index).shape();
-  auto lhs_shape = _ctx.at(lhs_index).shape();
-  auto rhs_shape = _ctx.at(rhs_index).shape();
-  // Check for output rank
-  OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
-  auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
-
-  for (int idx = 1; idx <= min_rank; idx++)
-  {
-    int l_idx = lhs_shape.rank() - idx;
-    int r_idx = rhs_shape.rank() - idx;
-    int out_idx = output_shape.rank() - idx;
-
-    OP_REQUIRES((l_idx >= 0) && (r_idx >= 0) && (out_idx >= 0));
-
-    auto l_dims = lhs_shape.dim(l_idx);
-    auto r_dims = rhs_shape.dim(r_idx);
-    auto out_dims = output_shape.dim(out_idx);
-
-    OP_REQUIRES(((l_dims == r_dims) && (out_dims == l_dims)) ||
-                ((l_dims == 1) && (out_dims == r_dims)) || ((r_dims == 1) && (out_dims == l_dims)));
-  }
-  auto &tmp_shape = (lhs_shape.rank() > rhs_shape.rank()) ? lhs_shape : rhs_shape;
-  for (int idx = min_rank + 1; idx <= output_shape.rank(); idx++)
-  {
-    int out_idx = output_shape.rank() - idx;
-    int tmp_idx = tmp_shape.rank() - idx;
-
-    OP_REQUIRES((out_idx >= 0) && (tmp_idx >= 0) &&
-                (output_shape.dim(out_idx) == tmp_shape.dim(tmp_idx)));
-  }
-}
-void OperationValidator::visit(const ir::operation::Tile &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-  const auto multiple_index{node.getInputs().at(1)};
-
-  OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1);
-  OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank());
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Range &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
-  const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
-  const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
-
-  // Check for dimension constraints
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0);
-  OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0);
-  OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0);
-}
-
-void OperationValidator::visit(const ir::operation::MatrixBandPart &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
-  const auto num_lower_index{
-      node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
-  const auto num_upper_index{
-      node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
-
-  // Check for dimension constraints
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2);     // input must be more than 2 dim matrix
-  OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
-  OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
-}
-
-void OperationValidator::visit(const ir::operation::LogSoftmax &node)
-{
-  VERBOSE(LogSoftmax) << "Configure LOGSOFTMAX operation" << std::endl;
-
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-} // namespace compiler
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h
deleted file mode 100644
index deb6357bb..000000000
--- a/runtime/onert/core/src/compiler/OperationValidator.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_OPERATION_VALIDATOR_H__
-#define __ONERT_COMPILER_OPERATION_VALIDATOR_H__
-
-#include "ir/Layout.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-class Graph;
-class Operands;
-} // namespace ir
-} // namespace onert
-
-namespace onert
-{
-namespace compiler
-{
-
-class OperationValidator : public ir::OperationVisitor
-{
-public:
-  OperationValidator(void) = delete;
-  OperationValidator(const ir::Graph &graph);
-
-public:
-  void operator()();
-
-public:
-  void visit(const ir::operation::BatchMatMul &node) override;
-  void visit(const ir::operation::BatchToSpaceND &node) override;
-  void visit(const ir::operation::Comparison &node) override;
-  void visit(const ir::operation::Softmax &node) override;
-  void visit(const ir::operation::InstanceNorm &node) override;
-  void visit(const ir::operation::Permute &node) override;
-  void visit(const ir::operation::Pool2D &node) override;
-  void visit(const ir::operation::Reduce &node) override;
-  void visit(const ir::operation::Transpose &node) override;
-  void visit(const ir::operation::RNN &node) override;
-  void visit(const ir::operation::SpaceToBatchND &node) override;
-  void visit(const ir::operation::SpaceToDepth &node) override;
-  void visit(const ir::operation::ElementwiseActivation &node) override;
-  void visit(const ir::operation::ElementwiseBinary &node) override;
-  void visit(const ir::operation::ElementwiseUnary &node) override;
-  void visit(const ir::operation::EmbeddingLookup &node) override;
-  void visit(const ir::operation::ExpandDims &node) override;
-  void visit(const ir::operation::HashtableLookup &node) override;
-  void visit(const ir::operation::TransposeConv &node) override;
-  void visit(const ir::operation::Gather &node) override;
-  void visit(const ir::operation::DepthToSpace &node) override;
-  void visit(const ir::operation::Pack &node) override;
-  void visit(const ir::operation::LSTM &node) override;
-  void visit(const ir::operation::L2Normalization &node) override;
-  void visit(const ir::operation::Unpack &node) override;
-  void visit(const ir::operation::Pad &node) override;
-  void visit(const ir::operation::Select &node) override;
-  void visit(const ir::operation::StridedSlice &node) override;
-  void visit(const ir::operation::Split &node) override;
-  void visit(const ir::operation::Shape &node) override;
-  void visit(const ir::operation::ResizeBilinear &node) override;
-  void visit(const ir::operation::Reverse &node) override;
-  void visit(const ir::operation::If &node) override;
-  void visit(const ir::operation::While &node) override;
-  void visit(const ir::operation::SquaredDifference &node) override;
-  void visit(const ir::operation::Tile &node) override;
-  void visit(const ir::operation::Range &node) override;
-  void visit(const ir::operation::MatrixBandPart &node) override;
-  void visit(const ir::operation::LogSoftmax &node) override;
-
-private:
-  void checkUnaryOp(const ir::Operation &node);
-
-private:
-  // TODO Remove _ctx field
-  const ir::Graph &_graph;
-  const ir::Operands &_ctx;
-  ir::Layout _current_op_seq_layout;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__
diff --git a/runtime/onert/core/src/compiler/ParamChecker.cc b/runtime/onert/core/src/compiler/ParamChecker.cc
deleted file mode 100644
index c4f80f087..000000000
--- a/runtime/onert/core/src/compiler/ParamChecker.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ParamChecker.h"
-
-#include "ir/Graph.h"
-
-namespace onert
-{
-namespace compiler
-{
-
-void ParamChecker::operator()()
-{
-  _model->operations().iterate(
-      [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
-}
-
-} // namespace compiler
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ParamChecker.h b/runtime/onert/core/src/compiler/ParamChecker.h
deleted file mode 100644
index 61429d521..000000000
--- a/runtime/onert/core/src/compiler/ParamChecker.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ParamChecker.h
- * @brief This file contains ParamChecker to check\n
- *        operations' parameters are compilable at machine independent phase\n
- *        ex) Check param is constant
- */
-#ifndef __ONERT_COMPILER_PARAM_CHECKER_H__
-#define __ONERT_COMPILER_PARAM_CHECKER_H__
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-class Graph;
-} // namespace ir
-} // namespace onert
-
-namespace onert
-{
-namespace compiler
-{
-
-class ParamChecker : public ir::OperationVisitor
-{
-public:
-  /**
-   * @brief Construct a new Param Checker object (deleted)
-   */
-  ParamChecker(void) = delete;
-  /**
-   * @brief Construct a new Param Checker object
-   * @param[in] model Graph model to check
-   */
-  ParamChecker(std::shared_ptr<ir::Graph> model) : _model{model} {}
-
-public:
-  /**
-   * @brief Run parameter analysis
-   */
-  void operator()();
-  /**
-   * @brief   Return analysis result if model have non-const parameter
-   * @return  @c true if there is non-const parameter, otherwise @c false
-   */
-  bool haveNoneConstParam(void) { return _nonConstParam; }
-
-private:
-  const std::shared_ptr<ir::Graph> _model;
-  bool _nonConstParam{false};
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__
diff --git a/runtime/onert/core/src/compiler/PermuteFactor.cc b/runtime/onert/core/src/compiler/PermuteFactor.cc
new file mode 100644
index 000000000..f0081a2a4
--- /dev/null
+++ b/runtime/onert/core/src/compiler/PermuteFactor.cc
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/PermuteFactor.h"
+
+#include <assert.h>
+#include <ostream>
+
+#include "backend/Backend.h"
+
+std::ostream &operator<<(std::ostream &os, const onert::compiler::PermuteFactor &obj)
+{
+  assert(obj.backend() && obj.backend()->config());
+  return os << "(" << obj.backend()->config()->id() << "/" << to_string(obj.layout()) << ")";
+}
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc
new file mode 100644
index 000000000..5c25ea1d1
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeValidator.h"
+
+#include <typeinfo>
+
+#include "ir/Graph.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+#define OP_REQUIRES(EXP)                                                                     \
+  do                                                                                         \
+  {                                                                                          \
+    if (!(EXP))                                                                              \
+      throw std::runtime_error("ShapeValidator failed at line " + std::to_string(__LINE__)); \
+  } while (0)
+
+namespace onert
+{
+namespace compiler
+{
+
+ShapeValidator::ShapeValidator(const ir::Graph &graph) : _graph{graph} {}
+
+void ShapeValidator::checkUnaryOp(const ir::Operation &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  // Check if I/O shapes match
+  OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
+}
+
+void ShapeValidator::operator()()
+{
+  _graph.operations().iterate(
+    [&](const ir::OperationIndex &, const ir::IOperation &node) { node.accept(*this); });
+}
+
+void ShapeValidator::visit(const ir::operation::BatchMatMul &node)
+{
+  const auto &operands = _graph.operands();
+  const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
+  const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
+  const auto out_index{node.getOutputs().at(0)};
+
+  if (operands.at(out_index).info().isDynamic())
+    return;
+
+  OP_REQUIRES(operands.at(lhs_index).shape().rank() <= 4);
+  OP_REQUIRES(operands.at(rhs_index).shape().rank() <= 4);
+  OP_REQUIRES(operands.at(lhs_index).shape().rank() >= 2);
+  OP_REQUIRES(operands.at(rhs_index).shape().rank() >= 2);
+}
+
+void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+  const auto block_size_index{
+    node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+
+  const auto frontend_layout = _graph.layout();
+  const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+  const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+
+  // All requirement as per NNAPI specification.
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
+
+  OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
+
+  if (node.getInputs().size() != 2)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    OP_REQUIRES(operands.at(crops_index).shape().rank() == 2);
+    OP_REQUIRES(operands.at(crops_index).shape().dim(0) ==
+                (operands.at(ifm_index).shape().rank() - 2));
+    OP_REQUIRES(operands.at(crops_index).shape().dim(1) == 2);
+  }
+
+  OP_REQUIRES(input_shape.C == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+  const auto weight_scales_index{
+    node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_SCALES)};
+  const auto weight_binary_index{
+    node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_BINARY)};
+  const auto weight_cluster_index{
+    node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+  // const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)};
+
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(weight_scales_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(weight_binary_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(weight_cluster_index).shape().rank() == 2);
+
+  OP_REQUIRES(operands.at(ifm_index).shape().dim(1) == operands.at(ofm_index).shape().dim(1));
+
+  OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(0) > 0);
+  OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(1) == 2);
+
+  // more shape validation will be done inside kernel.
+
+  // TODO Check bias dimension (can be null tensor)
+}
+
+void ShapeValidator::visit(const ir::operation::BCQGather &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+  const auto input_binary_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
+  const auto input_scales_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_SCALES)};
+  const auto input_clusters_index{
+    node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+
+  OP_REQUIRES(operands.at(indices_index).shape().rank() <=
+              2); // TODO : support rank up to 4 or more
+  OP_REQUIRES(operands.at(input_binary_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(input_scales_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(input_clusters_index).shape().rank() == 2);
+
+  OP_REQUIRES(operands.at(input_clusters_index).shape().dim(0) > 0);
+  OP_REQUIRES(operands.at(input_clusters_index).shape().dim(1) == 2);
+
+  // more shape validation will be done inside kernel.
+}
+
+void ShapeValidator::visit(const ir::operation::Comparison &)
+{
+  // TODO Shape validation of comparison
+}
+
+void ShapeValidator::visit(const ir::operation::Softmax &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(0)};
+
+  OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::InstanceNorm &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
+  const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
+  const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
+
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ifm_index).shape() == operands.at(ofm_index).shape());
+  OP_REQUIRES(operands.at(gamma_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(beta_index).shape().rank() == 1);
+}
+
+void ShapeValidator::visit(const ir::operation::Pool2D &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Permute &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(0)};
+
+  OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Reduce &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto &input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
+  const auto &input_shape = operands.at(input_index).shape();
+  const auto &output_shape = operands.at(output_index).shape();
+
+  OP_REQUIRES(input_shape.rank() <= 4);
+  OP_REQUIRES(output_shape.rank() <= input_shape.rank());
+
+  // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+  // supports cases reducing height and width or reducing depth.
+  // TODO We have to support all cases of dimensions up to 4.
+  // For correct permuting, we have to set output's shape to be equal in dimension position of the
+  // input. But the positions of the same dimensions in the input and output may be set differently.
+  // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+  // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+  // extend it in 4 dimensions, it should be {1,1,3,5}.
+  // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+  // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+  // next operation is not desired.
+  if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
+  {
+    if (output_shape.rank() == 2)
+    {
+      // Reducing HW
+      OP_REQUIRES(input_shape.dim(0) == output_shape.dim(0) &&
+                  input_shape.dim(3) == output_shape.dim(1));
+    }
+    else if (output_shape.rank() == 3)
+    {
+      // Reducing C or
+      // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
+      OP_REQUIRES(
+        (input_shape.dim(0) == output_shape.dim(0) && input_shape.dim(1) == output_shape.dim(1) &&
+         input_shape.dim(2) == output_shape.dim(2)) ||
+        (input_shape.dim(0) == output_shape.dim(0) &&
+         (input_shape.dim(1) == output_shape.dim(1) || input_shape.dim(2) == output_shape.dim(1)) &&
+         input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
+    }
+  }
+}
+
+void ShapeValidator::visit(const ir::operation::Transpose &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+  const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
+
+  const auto &output_shape = operands.at(output_index).shape();
+  const auto &input_shape = operands.at(input_index).shape();
+
+  OP_REQUIRES(operands.at(perm_index).shape().num_elements() == 0 ||
+              input_shape.rank() ==
+                static_cast<int>(operands.at(perm_index).shape().num_elements()));
+  OP_REQUIRES(input_shape.rank() == output_shape.rank());
+}
+
+void ShapeValidator::visit(const ir::operation::RNN &node)
+{
+  // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
+  // TODO Support dynamic rnn
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto hidden_state_out_index{
+    node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+
+  const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
+  const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
+  const auto recurrent_weights_index{
+    node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+  const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
+  const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
+
+  const auto batch_size = operands.at(output_index).shape().dim(0);
+  const auto num_units = operands.at(output_index).shape().dim(1);
+
+  OP_REQUIRES(operands.at(output_index).shape().rank() == 2 &&
+              operands.at(hidden_state_out_index).shape().rank() == 2 &&
+              operands.at(input_index).shape().rank() == 2 &&
+              operands.at(weights_index).shape().rank() == 2 &&
+              operands.at(recurrent_weights_index).shape().rank() == 2 &&
+              operands.at(hidden_state_in_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(bias_index).shape().rank() == 1);
+
+  OP_REQUIRES(batch_size == operands.at(input_index).shape().dim(0) &&
+              batch_size == operands.at(hidden_state_in_index).shape().dim(0) &&
+              batch_size == operands.at(hidden_state_out_index).shape().dim(0));
+  OP_REQUIRES(operands.at(input_index).shape().dim(1) == operands.at(weights_index).shape().dim(1));
+
+  OP_REQUIRES(num_units == operands.at(weights_index).shape().dim(0) &&
+              num_units == operands.at(recurrent_weights_index).shape().dim(0) &&
+              num_units == operands.at(bias_index).shape().dim(0));
+  OP_REQUIRES(num_units == operands.at(output_index).shape().dim(1) &&
+              num_units == operands.at(recurrent_weights_index).shape().dim(1) &&
+              num_units == operands.at(hidden_state_in_index).shape().dim(1) &&
+              num_units == operands.at(hidden_state_out_index).shape().dim(1));
+}
+
+void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+  const auto block_size_index{
+    node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+  const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+  const auto frontend_layout = _graph.layout();
+  const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+  const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+
+  // All requirement as per NNAPI specification.
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(paddings_index).shape().rank() == 2);
+
+  OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
+  OP_REQUIRES(operands.at(paddings_index).shape().dim(0) == 2);
+  OP_REQUIRES(operands.at(paddings_index).shape().dim(1) == 2);
+
+  OP_REQUIRES(input_shape.C == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::SpaceToDepth &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+
+  const auto frontend_layout = _graph.layout();
+  const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+  const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+  const auto block_size = node.param().block_size;
+
+  // All assertions as per NNAPI specification.
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+  OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0));
+  OP_REQUIRES(input_shape.N == output_shape.N);
+  OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::ElementwiseActivation &node) { checkUnaryOp(node); }
+
+void ShapeValidator::visit(const ir::operation::ElementwiseBinary &)
+{
+  // TODO Shape validation of ElementwiseBinary
+}
+
+void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
+}
+
+void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+
+  const auto &output_obj = operands.at(output_index);
+  const auto &lookups_obj = operands.at(lookups_index);
+  const auto &values_obj = operands.at(values_index);
+
+  // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
+  // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
+  {
+    if (operands.at(output_index).info().isDynamic())
+      return;
+
+    const auto &output_shape = output_obj.shape();
+    const auto &lookups_shape = lookups_obj.shape();
+    const auto &values_shape = values_obj.shape();
+
+    OP_REQUIRES(lookups_shape.rank() == 1);
+    OP_REQUIRES(values_shape.rank() >= 2);
+
+    // output should be a n-D tensor with the same rank and shape as the values tensor, except for
+    // the first dimension which has the same size as lookups' only dimension.
+    OP_REQUIRES(output_shape.rank() == values_shape.rank());
+    OP_REQUIRES(output_shape.dim(0) == lookups_shape.dim(0));
+    for (int n = 1; n < output_shape.rank(); ++n)
+    {
+      OP_REQUIRES(output_shape.dim(n) == values_shape.dim(n));
+    }
+  }
+}
+
+void ShapeValidator::visit(const ir::operation::ExpandDims &node)
+{
+  const auto &operands = _graph.operands();
+  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+
+  if (operands.at(axis_index).info().isDynamic())
+    return;
+  OP_REQUIRES(operands.at(axis_index).shape().rank() <= 1);
+}
+
+void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
+  const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
+  const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
+  const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
+
+  const auto &output_obj = operands.at(output_index);
+  const auto &lookups_obj = operands.at(lookups_index);
+  const auto &keys_obj = operands.at(keys_index);
+  const auto &values_obj = operands.at(values_index);
+
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto &output_shape = output_obj.shape();
+  const auto &lookups_shape = lookups_obj.shape();
+  const auto &keys_shape = keys_obj.shape();
+  const auto &values_shape = values_obj.shape();
+
+  OP_REQUIRES(values_shape.rank() == output_shape.rank());
+  OP_REQUIRES(lookups_shape.rank() == 1);
+  OP_REQUIRES(keys_shape.rank() == 1);
+  OP_REQUIRES(values_shape.dim(0) == keys_shape.dim(0));
+  OP_REQUIRES(lookups_shape.dim(0) == output_shape.dim(0));
+}
+
+void ShapeValidator::visit(const ir::operation::TransposeConv &node)
+{
+  // shape check
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
+
+  // Only 4D tensors are supported
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ifm_index).shape().rank());
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ker_index).shape().rank());
+
+  const auto frontend_layout = _graph.layout();
+  const auto ofm_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+  const auto ifm_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+  // The kernel has only IHWO layout on frontend
+  // So ker_shape is treated here below
+  // I -> N
+  // H -> H
+  // W -> W
+  // O -> C
+  const auto ker_shape = operands.at(ker_index).shape().asFeature(ir::Layout::NHWC);
+
+  OP_REQUIRES(ifm_shape.N == ofm_shape.N);
+  OP_REQUIRES(ifm_shape.C == ker_shape.C);
+  OP_REQUIRES(ker_shape.N == ofm_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::Gather &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+  const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+  const auto &ifm_shape = operands.at(ifm_index).shape();
+  const auto &indices_shape = operands.at(indices_index).shape();
+  const auto &ofm_shape = operands.at(ofm_index).shape();
+
+  OP_REQUIRES(ifm_shape.rank() <= 4);
+  OP_REQUIRES(indices_shape.rank() <= 3);
+  OP_REQUIRES(ofm_shape.rank() <= 4);
+}
+
+void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
+{
+  const auto &operands = _graph.operands();
+  int32_t block_size = node.param().block_size;
+
+  // shape check
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+
+  const auto frontend_layout = _graph.layout();
+  const auto output_shape = operands.at(output_index).shape().asFeature(frontend_layout);
+  const auto input_shape = operands.at(input_index).shape().asFeature(frontend_layout);
+
+  OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
+
+  {
+    OP_REQUIRES(output_shape.N == input_shape.N);
+    OP_REQUIRES(output_shape.H == input_shape.H * block_size);
+    OP_REQUIRES(output_shape.W == input_shape.W * block_size);
+    OP_REQUIRES(input_shape.C % (block_size * block_size) == 0);
+    OP_REQUIRES(output_shape.C == input_shape.C / (block_size * block_size));
+  }
+}
+
+void ShapeValidator::visit(const ir::operation::Pack &node)
+{
+  const auto &operands = _graph.operands();
+  const auto axis{node.param().axis};
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  // shape check
+  const auto &output_shape = operands.at(output_index).shape();
+  const auto output_rank = static_cast<int32_t>(output_shape.rank());
+
+  const auto input1_index{node.getInputs().at(0)};
+  const auto &input_shape = operands.at(input1_index).shape();
+
+  OP_REQUIRES(axis >= -output_rank && axis < output_rank);
+  for (const auto &index : node.getInputs())
+  {
+    OP_REQUIRES(input_shape == operands.at(index).shape());
+  }
+}
+
+void ShapeValidator::visit(const ir::operation::LSTM &node)
+{
+  // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
+  // TODO Support dynamic rnn
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto scratch_buffer_index{
+    node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; // Optional
+  const auto output_state_out_index{
+    node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; // Optional
+  const auto cell_state_out_index{
+    node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; // Optional
+
+  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+  const auto input_to_input_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // Optional
+  const auto input_to_forget_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+  const auto input_to_cell_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+  const auto input_to_output_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+  const auto recurrent_to_input_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // Optional
+  const auto recurrent_to_forget_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+  const auto recurrent_to_cell_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+  const auto recurrent_to_output_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+  const auto cell_to_input_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // Optional
+  const auto cell_to_forget_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // Optional
+  const auto cell_to_output_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // Optional
+  const auto input_gate_bias_index{
+    node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; // Optional
+  const auto forget_gate_bias_index{
+    node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+  const auto output_gate_bias_index{
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+  const auto projection_weights_index{
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // Optional
+  const auto projection_bias_index{
+    node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // Optional
+  const auto output_state_in_index{
+    node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+
+  OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+  for (int i = 0; i < operands.at(input_index).shape().rank() - 1; ++i)
+  {
+    OP_REQUIRES(operands.at(input_index).shape().dim(i) ==
+                operands.at(output_index).shape().dim(i));
+  }
+  OP_REQUIRES((operands.at(output_index).shape().rank() == 2 ||
+               operands.at(output_index).shape().rank() == 3) &&
+              (operands.at(input_index).shape().rank() == 2 ||
+               operands.at(input_index).shape().rank() == 3) &&
+              (!operands.exist(input_to_input_weights_index) ||
+               operands.at(input_to_input_weights_index).shape().rank() == 2) &&
+              operands.at(input_to_forget_weights_index).shape().rank() == 2 &&
+              operands.at(input_to_cell_weights_index).shape().rank() == 2 &&
+              operands.at(input_to_output_weights_index).shape().rank() == 2 &&
+              (!operands.exist(recurrent_to_input_weights_index) ||
+               operands.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
+              operands.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+              operands.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+              operands.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+              (!operands.exist(projection_weights_index) ||
+               operands.at(projection_weights_index).shape().rank() == 2) &&
+              operands.at(output_state_in_index).shape().rank() == 2 &&
+              operands.at(cell_state_in_index).shape().rank() == 2);
+
+  OP_REQUIRES((!operands.exist(cell_to_input_weights_index) ||
+               operands.at(cell_to_input_weights_index).shape().rank() == 1) &&
+              (!operands.exist(cell_to_forget_weights_index) ||
+               operands.at(cell_to_forget_weights_index).shape().rank() == 1) &&
+              (!operands.exist(cell_to_output_weights_index) ||
+               operands.at(cell_to_output_weights_index).shape().rank() == 1) &&
+              (!operands.exist(input_gate_bias_index) ||
+               operands.at(input_gate_bias_index).shape().rank() == 1) &&
+              operands.at(forget_gate_bias_index).shape().rank() == 1 &&
+              operands.at(cell_bias_index).shape().rank() == 1 &&
+              operands.at(output_gate_bias_index).shape().rank() == 1 &&
+              (!operands.exist(projection_bias_index) ||
+               operands.at(projection_bias_index).shape().rank() == 1));
+
+  // CIFG assertion
+  OP_REQUIRES(((!operands.exist(input_to_input_weights_index) ||
+                (operands.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+                 operands.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
+               (!operands.exist(recurrent_to_input_weights_index) ||
+                (operands.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+                 operands.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
+               (!operands.exist(input_gate_bias_index) ||
+                operands.at(input_gate_bias_index).shape().dim(0) == 0) &&
+               (!operands.exist(cell_to_input_weights_index) ||
+                operands.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
+              ((operands.exist(input_to_input_weights_index) &&
+                (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+                 operands.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
+               (operands.exist(recurrent_to_input_weights_index) &&
+                (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+                 operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
+               (operands.exist(input_gate_bias_index) &&
+                operands.at(input_gate_bias_index).shape().dim(0) != 0)));
+
+  // Peephole assertion
+  OP_REQUIRES(((!operands.exist(cell_to_forget_weights_index) ||
+                operands.at(cell_to_forget_weights_index).shape().dim(0) == 0) &&
+               (!operands.exist(cell_to_output_weights_index) ||
+                operands.at(cell_to_output_weights_index).shape().dim(0) == 0)) ||
+              ((operands.exist(cell_to_forget_weights_index) &&
+                operands.at(cell_to_forget_weights_index).shape().dim(0) != 0) &&
+               (operands.exist(cell_to_output_weights_index) &&
+                operands.at(cell_to_output_weights_index).shape().dim(0) != 0)));
+
+  bool has_input_to_input_weights =
+    operands.exist(input_to_input_weights_index) &&
+    (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+     operands.at(input_to_input_weights_index).shape().dim(1) != 0);
+  bool has_recurrent_to_input_weights =
+    operands.exist(recurrent_to_input_weights_index) &&
+    (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+     operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+  bool has_input_gate_bias =
+    operands.exist(input_gate_bias_index) && operands.at(input_gate_bias_index).shape().dim(0) != 0;
+  bool has_cell_to_input_weights = operands.exist(cell_to_input_weights_index) &&
+                                   operands.at(cell_to_input_weights_index).shape().dim(0) != 0;
+  bool has_cell_to_forget_weights = operands.exist(cell_to_forget_weights_index) &&
+                                    operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+  bool has_cell_to_output_weights = operands.exist(cell_to_output_weights_index) &&
+                                    operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+  bool has_projection_weights = operands.exist(projection_weights_index) &&
+                                (operands.at(projection_weights_index).shape().dim(0) != 0 &&
+                                 operands.at(projection_weights_index).shape().dim(1) != 0);
+  bool has_projection_bias =
+    operands.exist(projection_bias_index) && operands.at(projection_bias_index).shape().dim(0) != 0;
+
+  // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+  // true: no CIFG
+  // false: CIFG
+  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+  // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole.
+  // true: peephole
+  // false: no peephole
+  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+  // NOTE The projection weights may have data but the projection bias may not.
+  bool has_projection_param = has_projection_weights;
+
+  const auto batch_size = (operands.at(input_index).shape().rank() == 3 && node.param().time_major)
+                            ? operands.at(input_index).shape().dim(1)
+                            : operands.at(input_index).shape().dim(0);
+  OP_REQUIRES(batch_size == operands.at(output_state_in_index).shape().dim(0) &&
+              batch_size == operands.at(cell_state_in_index).shape().dim(0));
+
+  const auto input_size =
+    operands.at(input_index).shape().dim(operands.at(input_index).shape().rank() - 1);
+  OP_REQUIRES(input_size == operands.at(input_to_forget_weights_index).shape().dim(1) &&
+              input_size == operands.at(input_to_cell_weights_index).shape().dim(1) &&
+              input_size == operands.at(input_to_output_weights_index).shape().dim(1));
+
+  const auto num_units = operands.at(input_to_output_weights_index).shape().dim(0);
+  OP_REQUIRES(num_units == operands.at(input_to_cell_weights_index).shape().dim(0) &&
+              num_units == operands.at(input_to_output_weights_index).shape().dim(0) &&
+              num_units == operands.at(recurrent_to_forget_weights_index).shape().dim(0) &&
+              num_units == operands.at(recurrent_to_cell_weights_index).shape().dim(0) &&
+              num_units == operands.at(recurrent_to_output_weights_index).shape().dim(0) &&
+              num_units == operands.at(forget_gate_bias_index).shape().dim(0) &&
+              num_units == operands.at(cell_bias_index).shape().dim(0) &&
+              num_units == operands.at(output_gate_bias_index).shape().dim(0) &&
+              num_units == operands.at(cell_state_in_index).shape().dim(1));
+
+  const auto output_size =
+    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+  OP_REQUIRES(output_size == operands.at(recurrent_to_forget_weights_index).shape().dim(1) &&
+              output_size == operands.at(recurrent_to_cell_weights_index).shape().dim(1) &&
+              output_size == operands.at(recurrent_to_output_weights_index).shape().dim(1) &&
+              output_size == operands.at(output_state_in_index).shape().dim(1));
+
+  if (has_cifg_param)
+  {
+    OP_REQUIRES(input_size == operands.at(input_to_input_weights_index).shape().dim(1));
+    OP_REQUIRES(
+      num_units == operands.at(input_to_input_weights_index).shape().dim(0) &&
+      num_units == operands.at(recurrent_to_input_weights_index).shape().dim(0) &&
+      ((operands.exist(cell_to_input_weights_index) &&
+        num_units == operands.at(cell_to_input_weights_index).shape().dim(0)) ||
+       (!operands.exist(cell_to_input_weights_index) ||
+        operands.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) &&
+      num_units == operands.at(input_gate_bias_index).shape().dim(0));
+    OP_REQUIRES(output_size == operands.at(recurrent_to_input_weights_index).shape().dim(1));
+    OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
+                has_input_gate_bias);
+    if (has_cell_to_input_weights)
+    {
+      // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
+      OP_REQUIRES(has_peephole_param);
+    }
+    if (operands.exist(scratch_buffer_index))
+      OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
+  }
+  else
+  {
+    if (operands.exist(scratch_buffer_index))
+      OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
+  }
+
+  if (has_peephole_param)
+  {
+    OP_REQUIRES(num_units == operands.at(cell_to_forget_weights_index).shape().dim(0) &&
+                num_units == operands.at(cell_to_output_weights_index).shape().dim(0) &&
+                (num_units == operands.at(cell_to_input_weights_index).shape().dim(0) ||
+                 operands.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
+  }
+
+  if (has_projection_param)
+  {
+    OP_REQUIRES(num_units == operands.at(projection_weights_index).shape().dim(1));
+    OP_REQUIRES(output_size == operands.at(projection_weights_index).shape().dim(0));
+    if (has_projection_bias)
+    {
+      OP_REQUIRES(output_size == operands.at(projection_bias_index).shape().dim(0));
+    }
+  }
+
+  if (operands.exist(scratch_buffer_index))
+  {
+    OP_REQUIRES(operands.at(scratch_buffer_index).shape().rank() == 2);
+    OP_REQUIRES(batch_size == operands.at(scratch_buffer_index).shape().dim(0));
+  }
+
+  if (operands.exist(output_state_out_index))
+  {
+    OP_REQUIRES(operands.at(output_state_out_index).shape().rank() == 2);
+    OP_REQUIRES(batch_size == operands.at(output_state_out_index).shape().dim(0));
+    OP_REQUIRES(output_size == operands.at(output_state_out_index).shape().dim(1));
+  }
+
+  if (operands.exist(cell_state_out_index))
+  {
+    OP_REQUIRES(operands.at(cell_state_out_index).shape().rank() == 2);
+    OP_REQUIRES(batch_size == operands.at(cell_state_out_index).shape().dim(0));
+    OP_REQUIRES(num_units == operands.at(cell_state_out_index).shape().dim(1));
+  }
+}
+
+void ShapeValidator::visit(const ir::operation::L2Normalization &node)
+{
+  const auto &operands = _graph.operands();
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (operands.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
+
+  auto ifm_shape = operands.at(ifm_index).shape();
+  auto ofm_shape = operands.at(ofm_index).shape();
+
+  OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
+
+  for (auto i = 0; i < ifm_shape.rank(); i++)
+  {
+    OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i));
+  }
+}
+
+void ShapeValidator::visit(const ir::operation::Unpack &node)
+{
+  const auto &operands = _graph.operands();
+  const auto axis{node.param().axis};
+  const auto output_index{node.getInputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+
+  const auto &input_shape = operands.at(input_index).shape();
+  const auto input_rank = static_cast<int32_t>(input_shape.rank());
+
+  OP_REQUIRES(axis >= -input_rank && axis < input_rank);
+}
+
+void ShapeValidator::visit(const ir::operation::Pad &node)
+{
+  const auto &operands = _graph.operands();
+  const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+  OP_REQUIRES(operands.at(pad_index).typeInfo().type() == ir::DataType::INT32);
+
+  const auto output_index{node.getInputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+
+  const auto &pad_shape = operands.at(pad_index).shape();
+  const auto input_rank = static_cast<int32_t>(operands.at(input_index).shape().rank());
+
+  OP_REQUIRES(pad_shape.rank() == 2);
+  OP_REQUIRES(pad_shape.dim(0) == input_rank);
+  OP_REQUIRES(pad_shape.dim(1) == 2);
+  OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Select &)
+{
+  // TODO Shape validation of select
+}
+
+void ShapeValidator::visit(const ir::operation::StridedSlice &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  OP_REQUIRES(operands.at(input_index).shape().rank() <= 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Split &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
+
+  const auto num_splits = node.param().num_splits;
+  const auto input_rank = operands.at(input_index).shape().rank();
+  auto axis = *reinterpret_cast<const int32_t *>(operands.at(axis_index).data()->base());
+  axis = axis < 0 ? axis + input_rank : axis;
+
+  OP_REQUIRES(axis >= 0 && axis < input_rank);
+  OP_REQUIRES(operands.at(input_index).shape().dim(axis) % num_splits == 0);
+}
+
+void ShapeValidator::visit(const ir::operation::Shape &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(0)};
+  UNUSED_RELEASE(input_index);
+  OP_REQUIRES(operands.at(output_index).shape().rank() == 1);
+}
+
+void ShapeValidator::visit(const ir::operation::ResizeBilinear &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+
+  if (operands.at(output_index).info().isDynamic())
+  {
+    return;
+  }
+  OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Reverse &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
+
+  if (operands.at(output_index).info().isDynamic())
+    return;
+  OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
+}
+
+void ShapeValidator::visit(const ir::operation::If &)
+{
+  // TODO Add to validate with subgraphs
+}
+
+void ShapeValidator::visit(const ir::operation::While &)
+{
+  // This validator does not check shape. So checking isDynamic() is skipped.
+  // TODO Add to validate with subgraphs
+}
+
+void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
+
+  // Check for dimension constraints
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  auto output_shape = operands.at(output_index).shape();
+  auto lhs_shape = operands.at(lhs_index).shape();
+  auto rhs_shape = operands.at(rhs_index).shape();
+  // Check for output rank
+  OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
+  auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
+
+  for (int idx = 1; idx <= min_rank; idx++)
+  {
+    int l_idx = lhs_shape.rank() - idx;
+    int r_idx = rhs_shape.rank() - idx;
+    int out_idx = output_shape.rank() - idx;
+
+    OP_REQUIRES((l_idx >= 0) && (r_idx >= 0) && (out_idx >= 0));
+
+    auto l_dims = lhs_shape.dim(l_idx);
+    auto r_dims = rhs_shape.dim(r_idx);
+    auto out_dims = output_shape.dim(out_idx);
+
+    OP_REQUIRES(((l_dims == r_dims) && (out_dims == l_dims)) ||
+                ((l_dims == 1) && (out_dims == r_dims)) || ((r_dims == 1) && (out_dims == l_dims)));
+  }
+  auto &tmp_shape = (lhs_shape.rank() > rhs_shape.rank()) ? lhs_shape : rhs_shape;
+  for (int idx = min_rank + 1; idx <= output_shape.rank(); idx++)
+  {
+    int out_idx = output_shape.rank() - idx;
+    int tmp_idx = tmp_shape.rank() - idx;
+
+    OP_REQUIRES((out_idx >= 0) && (tmp_idx >= 0) &&
+                (output_shape.dim(out_idx) == tmp_shape.dim(tmp_idx)));
+  }
+}
+void ShapeValidator::visit(const ir::operation::Tile &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(0)};
+  const auto multiple_index{node.getInputs().at(1)};
+
+  OP_REQUIRES(operands.at(multiple_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(multiple_index).shape().dim(0) ==
+              operands.at(input_index).shape().rank());
+  OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Range &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
+  const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
+  const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
+
+  // Check for dimension constraints
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  OP_REQUIRES(operands.at(start_index).shape().rank() == 0);
+  OP_REQUIRES(operands.at(limit_index).shape().rank() == 0);
+  OP_REQUIRES(operands.at(delta_index).shape().rank() == 0);
+}
+
+void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
+  const auto num_lower_index{
+    node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
+  const auto num_upper_index{
+    node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
+
+  // Check for dimension constraints
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  OP_REQUIRES(operands.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
+  OP_REQUIRES(operands.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
+  OP_REQUIRES(operands.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
+}
+
+void ShapeValidator::visit(const ir::operation::LogSoftmax &node)
+{
+  const auto &operands = _graph.operands();
+  const auto output_index{node.getOutputs().at(0)};
+  if (operands.at(output_index).info().isDynamic())
+    return;
+
+  const auto input_index{node.getInputs().at(0)};
+
+  OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h
new file mode 100644
index 000000000..a51e8adc0
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ShapeValidator.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_SHAPE_VALIDATOR_H__
+#define __ONERT_COMPILER_SHAPE_VALIDATOR_H__
+
+#include "ir/Layout.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+class Operands;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+
+class ShapeValidator : public ir::OperationVisitor
+{
+public:
+  ShapeValidator(void) = delete;
+  ShapeValidator(const ir::Graph &graph);
+  ShapeValidator(const ShapeValidator &) = delete;
+  ShapeValidator(ShapeValidator &&) = delete;
+  ~ShapeValidator() = default;
+
+public:
+  ShapeValidator &operator=(const ShapeValidator &) = delete;
+  ShapeValidator &operator=(ShapeValidator &&) = delete;
+  void operator()();
+
+public:
+  void visit(const ir::operation::BatchMatMul &node) override;
+  void visit(const ir::operation::BatchToSpaceND &node) override;
+  void visit(const ir::operation::BCQFullyConnected &node) override;
+  void visit(const ir::operation::BCQGather &node) override;
+  void visit(const ir::operation::Comparison &node) override;
+  void visit(const ir::operation::Softmax &node) override;
+  void visit(const ir::operation::InstanceNorm &node) override;
+  void visit(const ir::operation::Permute &node) override;
+  void visit(const ir::operation::Pool2D &node) override;
+  void visit(const ir::operation::Reduce &node) override;
+  void visit(const ir::operation::Transpose &node) override;
+  void visit(const ir::operation::RNN &node) override;
+  void visit(const ir::operation::SpaceToBatchND &node) override;
+  void visit(const ir::operation::SpaceToDepth &node) override;
+  void visit(const ir::operation::ElementwiseActivation &node) override;
+  void visit(const ir::operation::ElementwiseBinary &node) override;
+  void visit(const ir::operation::ElementwiseUnary &node) override;
+  void visit(const ir::operation::EmbeddingLookup &node) override;
+  void visit(const ir::operation::ExpandDims &node) override;
+  void visit(const ir::operation::HashtableLookup &node) override;
+  void visit(const ir::operation::TransposeConv &node) override;
+  void visit(const ir::operation::Gather &node) override;
+  void visit(const ir::operation::DepthToSpace &node) override;
+  void visit(const ir::operation::Pack &node) override;
+  void visit(const ir::operation::LSTM &node) override;
+  void visit(const ir::operation::L2Normalization &node) override;
+  void visit(const ir::operation::Unpack &node) override;
+  void visit(const ir::operation::Pad &node) override;
+  void visit(const ir::operation::Select &node) override;
+  void visit(const ir::operation::StridedSlice &node) override;
+  void visit(const ir::operation::Split &node) override;
+  void visit(const ir::operation::Shape &node) override;
+  void visit(const ir::operation::ResizeBilinear &node) override;
+  void visit(const ir::operation::Reverse &node) override;
+  void visit(const ir::operation::If &node) override;
+  void visit(const ir::operation::While &node) override;
+  void visit(const ir::operation::SquaredDifference &node) override;
+  void visit(const ir::operation::Tile &node) override;
+  void visit(const ir::operation::Range &node) override;
+  void visit(const ir::operation::MatrixBandPart &node) override;
+  void visit(const ir::operation::LogSoftmax &node) override;
+
+private:
+  void checkUnaryOp(const ir::Operation &node);
+
+private:
+  const ir::Graph &_graph;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_SHAPE_VALIDATOR_H__
diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc
deleted file mode 100644
index 4eba1ff49..000000000
--- a/runtime/onert/core/src/compiler/StaticShapeInference.cc
+++ /dev/null
@@ -1,1096 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "compiler/StaticShapeInference.h"
-#include "util/ShapeInference.h"
-#include "util/logging.h"
-
-#include <sstream>
-
-namespace onert
-{
-namespace compiler
-{
-
-bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
-{
-  bool has_dynamic_tensor = false;
-
-  for (const auto &operation_idx : op_seq.operations())
-  {
-    auto &op = _operations.at(operation_idx);
-    auto opcode = op.opcode();
-
-    _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
-    // IF: need shape inference for then, else
-    // While: need shape inference for condition, body
-    if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
-    {
-      op.accept(*this);
-    }
-    else
-    {
-      _return_has_dynamic_tensor = checkDynamicInput(op);
-
-      if (_return_has_dynamic_tensor)
-      {
-        setDynamicOutput(op);
-      }
-      else
-      {
-        op.accept(*this);
-      }
-    }
-
-    has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
-  }
-
-  return has_dynamic_tensor;
-}
-
-bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
-{
-  for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
-  {
-    if (_operands.at(input_idx).info().isDynamic())
-    {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
-{
-  for (auto output_idx : op.getOutputs())
-  {
-    _operands.at(output_idx).info().setDynamic();
-  }
-}
-
-void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
-                                                  const ir::OperandIndex lhs_idx,
-                                                  const ir::OperandIndex rhs_idx)
-{
-  const auto &lhs = _operands.at(lhs_idx);
-  const auto &rhs = _operands.at(rhs_idx);
-
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
-                                             const ir::OperandIndex input_idx)
-{
-  const auto &input = _operands.at(input_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // re-sizing output shape
-  ir::Shape new_shape = input.info().shape();
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::dump()
-{
-  auto get_shape_str = [](const ir::Shape &shape) {
-    std::stringstream sstream;
-    sstream << "shape : {";
-    for (int i = 0; i < shape.rank(); i++)
-    {
-      if (i == 0)
-        sstream << shape.dim(i);
-      else
-        sstream << " " << shape.dim(i);
-    }
-    sstream << "}";
-    return sstream.str();
-  };
-
-  for (const auto &pair : _lowered_subgs)
-  {
-    const auto index = pair.first;
-    const auto &lowered_subg = pair.second;
-    VERBOSE(StaticShapeInferer) << "SubGraph #" << index.value() << std::endl;
-    lowered_subg->graph().operands().iterate(
-        [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-          VERBOSE(StaticShapeInferer) << "Operand #" << ind.value() << ", "
-                                      << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
-                                      << get_shape_str(operand.info().shape()) << std::endl;
-        });
-  }
-}
-
-void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-  const auto rank = input.info().shape().rank();
-  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
-  assert(0 <= axis && axis < rank);
-
-  // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis, rank);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
-{
-  const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
-  const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
-  const auto output_index = op.getOutputs().at(0);
-  const auto lhs = _operands.at(lhs_index);
-  const auto rhs = _operands.at(rhs_index);
-  auto &output = _operands.at(output_index);
-  auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
-                           op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
-{
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
-  const auto &shape = _operands.at(shape_idx);
-
-  if (!shape.isConstant())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  // assert(shape.typeInfo().type() == ir::DataType::INT32);
-  auto shape_buffer = reinterpret_cast<const int32_t *>(shape.data()->base());
-
-  // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferBroadcastToShape(shape.info().shape(), shape_buffer);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Comparison &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
-                           op.getInputs().at(ir::operation::Comparison::Input::INPUT1));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Concat &op)
-{
-  const auto input_count = op.getInputs().size();
-
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  shape_inference::Shapes input_shapes;
-  for (uint32_t i = 0; i < input_count; i++)
-  {
-    const auto input_idx{op.getInputs().at(i)};
-    const auto &input = _operands.at(input_idx);
-    input_shapes.emplace_back(input.shape());
-  }
-
-  ir::Shape out_shape = shape_inference::inferConcatShape(input_shapes, op.param());
-
-  // re-sizing output shape
-  output.info().shape(out_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-  const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)};
-  const auto &ker = _operands.at(ker_idx);
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // re-sizing output shape
-  ir::Shape new_shape =
-      shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
-                           op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-  const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
-  const auto &axis = _operands.at(axis_idx);
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  if (!axis.isConstant())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  // even when axis is constant, output shape should be recalculated since user might call
-  // nnfw_set_input_tensorinfo(input, some_new_shape)
-  auto axis_buf = reinterpret_cast<const int32_t *>(axis.data()->base());
-  assert(axis_buf);
-
-  // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_buf[0]);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Fill &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Fill::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  if (!input.isConstant())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  assert(input.typeInfo().type() == ir::DataType::INT32);
-
-  auto input_buf = reinterpret_cast<const int32_t *>(input.data()->base());
-  assert(input_buf);
-
-  // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferFillShape(input.info().shape(), input_buf);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
-  const auto &ker = _operands.at(ker_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-  // re-sizing output shape
-  ir::Shape new_shape =
-      shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Gather &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
-  const auto &indices = _operands.at(indices_idx);
-  const auto rank = input.info().shape().rank();
-  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
-  assert(0 <= axis && axis < rank);
-
-  // re-sizing output shape
-  ir::Shape new_shape =
-      shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::If &op)
-{
-  auto &then_graph = _lowered_subgs.at(op.param().then_subg_index)->graph();
-  auto &else_graph = _lowered_subgs.at(op.param().else_subg_index)->graph();
-  const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()};
-  const auto &outputs = op.getOutputs();
-
-  // re-sizing input shapes of then subgraph
-  const auto &then_inputs = then_graph.getInputs();
-  assert(inputs.size() == then_inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i)
-  {
-    auto &then_input = then_graph.operands().at(then_inputs.at(i));
-    if (_operands.at(inputs.at(i)).info().isDynamic())
-    {
-      then_input.info().setDynamic();
-    }
-    else
-    {
-      auto new_shape = _operands.at(inputs.at(i)).info().shape();
-      then_input.info().shape(new_shape);
-    }
-  }
-
-  // re-sizing input shapes of else subgraph
-  const auto &else_inputs = else_graph.getInputs();
-  assert(inputs.size() == else_inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i)
-  {
-    auto &else_input = else_graph.operands().at(else_inputs.at(i));
-    if (_operands.at(inputs.at(i)).info().isDynamic())
-    {
-      else_input.info().setDynamic();
-    }
-    else
-    {
-      const auto &new_shape = _operands.at(inputs.at(i)).info().shape();
-      else_input.info().shape(new_shape);
-    }
-  }
-
-  // re-sizing operands of then subgraph
-  StaticShapeInferer then_inferer(op.param().then_subg_index, _lowered_subgs);
-  _lowered_subgs.at(op.param().then_subg_index)
-      ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-        bool has_dynamic_tensor = then_inferer.infer(op_seq);
-        op_seq.has_dynamic_tensor(has_dynamic_tensor);
-      });
-
-  // re-sizing operands of else subgraph
-  StaticShapeInferer else_inferer(op.param().else_subg_index, _lowered_subgs);
-  _lowered_subgs.at(op.param().else_subg_index)
-      ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-        bool has_dynamic_tensor = else_inferer.infer(op_seq);
-        op_seq.has_dynamic_tensor(has_dynamic_tensor);
-      });
-
-  // re-sizing output shapes
-  const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs();
-  const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs();
-  assert(outputs.size() == then_outputs.size());
-  assert(outputs.size() == else_outputs.size());
-  for (size_t i = 0; i < outputs.size(); ++i)
-  {
-    const auto &then_output = then_graph.operands().at(then_outputs.at(i));
-    const auto &else_output = else_graph.operands().at(else_outputs.at(i));
-    auto &output = _operands.at(outputs.at(i));
-    if (!then_output.info().isDynamic() && !else_output.info().isDynamic() &&
-        then_output.shape() == else_output.shape())
-    {
-      output.info().shape(then_output.shape());
-    }
-    else
-    {
-      output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
-    }
-  }
-}
-
-void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::OneHot &op)
-{
-  const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
-  const auto &indice = _operands.at(indice_idx);
-  const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
-  const auto &depth = _operands.at(depth_idx);
-
-  const auto axis = op.param().axis;
-
-  auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  if (!depth.isConstant())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  const auto *depth_buf = reinterpret_cast<const int32_t *>(depth.data()->base());
-  assert(depth_buf);
-  // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferOnehotShape(indice.info().shape(), *depth_buf, axis);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Pack &op)
-{
-  const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  const auto rank = input.shape().rank() + 1;
-  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-  const auto num = op.param().num;
-
-  assert(0 <= axis && axis < rank);
-
-  // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferPackShape(input.info().shape(), axis, rank, num);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Pad &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)};
-  const auto &pad = _operands.at(pad_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // if pad is not constant, output also becomes dynamic
-  if (!pad.isConstant())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  // re-sizing output shape
-  const auto new_shape = shape_inference::inferPadShape(
-      input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()),
-      pad.shape().num_elements());
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Permute &op)
-{
-  const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // re-sizing output shape
-  // Permute is a special operation that layouts of input/output may be different on backend
-  // However, it is not applied here, so input/output have the same layout of frontend. Because
-  // "ExecutorFactory" would convert shape of input/output accoding to the layouts when registering
-  // operand info to "TensorBuilder" after calling "StaticShapeInferer"
-  const auto new_shape = input.info().shape();
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Pow &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS),
-                           op.getInputs().at(ir::operation::Pow::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Range &op)
-{
-  const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)};
-  const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)};
-  const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)};
-  const auto &start_op = _operands.at(start_idx);
-  const auto &limit_op = _operands.at(limit_idx);
-  const auto &delta_op = _operands.at(delta_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  ir::Shape new_shape;
-  if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
-  {
-    assert(start_op.typeInfo().type() == limit_op.typeInfo().type() &&
-           start_op.typeInfo().type() == delta_op.typeInfo().type());
-    if (output.typeInfo().type() == ir::DataType::FLOAT32)
-    {
-      new_shape = shape_inference::inferRangeShape<float>(
-          start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>());
-    }
-    else if (output.typeInfo().type() == ir::DataType::INT32)
-    {
-      new_shape = shape_inference::inferRangeShape<int32_t>(
-          start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>());
-    }
-    assert(output.shape() == new_shape);
-  }
-  else
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-  }
-}
-
-void StaticShapeInferer::visit(const ir::operation::Reduce &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
-  const auto &axes = _operands.at(axes_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  std::vector<int32_t> axes_vec;
-  for (size_t i = 0; i < axes.shape().num_elements(); ++i)
-  {
-    switch (axes.typeInfo().type())
-    {
-      case ir::DataType::INT32:
-      {
-        axes_vec.emplace_back(reinterpret_cast<const int32_t *>(axes.data()->base())[i]);
-        break;
-      }
-      case ir::DataType::INT64:
-      {
-        axes_vec.emplace_back(reinterpret_cast<const int64_t *>(axes.data()->base())[i]);
-        break;
-      }
-      default:
-        throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported data type");
-        break;
-    }
-  }
-  const auto keep_dims = op.param().keep_dims;
-
-  // re-sizing output shape
-  ir::Shape new_shape =
-      shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Reshape &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // New shape is given by second input tensor
-  if (op.getInputs().size() == 2)
-  {
-    // Let's check the second input
-    const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
-    const auto &shape = _operands.at(shape_idx);
-
-    if (shape.isConstant())
-    {
-      const auto *shape_buf = reinterpret_cast<const int32_t *>(shape.data()->base());
-      assert(shape_buf);
-
-      ir::Shape new_shape = shape_inference::inferReshapeShape(
-          shape_buf, shape.shape().num_elements(), input.shape().num_elements());
-
-      // if shape is from Const, TFLC put the shape of output into tensor
-      if (new_shape != output.shape())
-      {
-        // change on output shape
-        output.info().shape(new_shape);
-      }
-    }
-    else
-    {
-      // if shape is NOT Const, set output shape to be dynamic_
-      output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
-    }
-  }
-  // New shape is given by option
-  else if (op.param().new_shape.size() != 0)
-  {
-    // Let's check the new_shape option
-    auto shape = op.param().new_shape;
-    ir::Shape new_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(),
-                                                             input.shape().num_elements());
-
-    if (new_shape != output.shape())
-    {
-      // change on output shape
-      output.info().shape(new_shape);
-    }
-  }
-  else
-  {
-    throw std::runtime_error("Reshape: new shape is missing");
-  }
-}
-
-void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // Shape inferencing logic based on Params
-  ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
-      input.shape(), op.param().height_out, op.param().width_out);
-
-  // if size_op is from Const, TFLC put the shape of output into tensor
-  if (new_shape != output.shape())
-  {
-    // change on output shape
-    output.info().shape(new_shape);
-  }
-}
-
-void StaticShapeInferer::visit(const ir::operation::Reverse &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Select &op)
-{
-  const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
-  const auto &input_cond = _operands.at(input_cond_idx);
-
-  const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
-  const auto &input_true = _operands.at(input_true_idx);
-
-  const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
-  const auto &input_false = _operands.at(input_false_idx);
-
-  auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // Select output shpae
-  ir::Shape new_shape = shape_inference::inferSelectShape(
-      input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Shape &op)
-{
-  const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  // re-sizing output shape
-  ir::Shape output_shape;
-  output_shape.append(input.info().shape().rank());
-
-  output.info().shape(output_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Slice &op)
-{
-  const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
-  const auto &input = _operands.at(input_index);
-  const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
-  const auto &begins = _operands.at(begins_index);
-  const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
-  const auto &sizes = _operands.at(sizes_index);
-  const auto output_index = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_index);
-
-  // Whether input is constant or not does not affect whether output is dynamic or not
-  if (!(begins.isConstant() && sizes.isConstant()))
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  auto begins_buf = reinterpret_cast<const int32_t *>(begins.data()->base());
-  auto sizes_buf = reinterpret_cast<const int32_t *>(sizes.data()->base());
-
-  ir::Shape new_shape =
-      shape_inference::inferSliceShape(input.info().shape(), begins_buf, sizes_buf);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Softmax &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
-{
-  const auto output_index = op.getOutputs().at(0);
-  const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
-  const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
-  const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
-  ir::Operand &output = _operands.at(output_index);
-  const auto &input = _operands.at(input_idx);
-  const auto &block_shape = _operands.at(block_shape_idx);
-  const auto &padding = _operands.at(padding_idx);
-
-  // Whether input is constant or not does not affect whether output is dynamic or not
-  if (!(block_shape.isConstant() && padding.isConstant()))
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  auto input_shape = input.info().shape();
-  auto block_shape_shape = block_shape.info().shape();
-  auto padding_shape = padding.info().shape();
-
-  auto block_shape_data = reinterpret_cast<const int32_t *>(block_shape.data()->base());
-  auto padding_data = reinterpret_cast<const int32_t *>(padding.data()->base());
-
-  ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
-      input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
-
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Split &op)
-{
-  const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
-
-  const auto axis = op.param().axis;
-  const auto num_splits = op.param().num_splits;
-
-  const auto rank = input.info().shape().rank();
-  auto axis_resolved = axis < 0 ? axis + rank : axis;
-
-  assert(0 <= axis_resolved && axis_resolved < rank);
-
-  ir::Shape new_shape =
-      shape_inference::inferSplitShape(input.info().shape(), axis_resolved, num_splits);
-  auto output_tensors = op.getOutputs();
-  for (auto output_idx : output_tensors)
-  {
-    ir::Operand &output = _operands.at(output_idx);
-    output.info().shape(new_shape);
-  }
-}
-
-void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS),
-                           op.getInputs().at(ir::operation::SquaredDifference::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  // Squeeze output shpae
-  ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param());
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
-{
-  const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
-  const auto &input = _operands.at(input_index);
-  const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
-  const auto &starts = _operands.at(starts_index);
-  const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
-  const auto &ends = _operands.at(ends_index);
-  const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-  const auto &strides = _operands.at(strides_index);
-  const auto output_index = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_index);
-
-  if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  const auto begin_mask = op.param().begin_mask;
-  const auto end_mask = op.param().end_mask;
-  const auto shrink_axis_mask = op.param().shrink_axis_mask;
-  const auto rank = input.info().shape().rank();
-
-  auto starts_buf = reinterpret_cast<const uint32_t *>(starts.data()->base());
-  auto ends_buf = reinterpret_cast<const uint32_t *>(ends.data()->base());
-  auto strides_buf = reinterpret_cast<const uint32_t *>(strides.data()->base());
-
-  auto op_params = shape_inference::buildStridedSliceParams(
-      starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank);
-
-  ir::Shape new_shape =
-      shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Tile &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)};
-  const auto &multiplier = _operands.at(multiplier_idx);
-
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-
-  if (!multiplier.isConstant())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier.data()->base());
-  assert(multiplier_buffer);
-
-  // re-sizing output shape
-  auto new_shape = shape_inference::inferTileShape(input.info().shape(), multiplier_buffer);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Transpose &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
-
-  // get mutable output operand
-  const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
-  const auto perm{op.param().perm};
-  // const auto rank{op.param().rank};
-
-  // set output shape, based on input and params
-  ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm);
-  output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Unpack &op)
-{
-  const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
-  const auto num = op.param().num;
-  const auto rank = input.shape().rank();
-  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
-  assert(axis < rank);
-  if (axis < 0)
-  {
-    for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
-    {
-      const auto output_idx = op.getOutputs().at(out_tensor_idx);
-      ir::Operand &output = _operands.at(output_idx);
-      output.info().setDynamic();
-    }
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
-  ir::Shape new_shape = shape_inference::inferUnpackShape(input.info().shape(), axis, rank);
-
-  // re-sizing output shape
-  for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
-  {
-    const auto output_idx = op.getOutputs().at(out_tensor_idx);
-    ir::Operand &output = _operands.at(output_idx);
-    output.info().shape(new_shape);
-  }
-}
-
-void StaticShapeInferer::visit(const ir::operation::While &op)
-{
-  auto &cond_graph = _lowered_subgs.at(op.param().cond_subg_index)->graph();
-  auto &body_graph = _lowered_subgs.at(op.param().body_subg_index)->graph();
-  const auto inputs = op.getInputs();
-  const auto &outputs = op.getOutputs();
-
-  // re-sizing input shapes of then subgraph
-  const auto &cond_inputs = cond_graph.getInputs();
-  assert(inputs.size() == cond_inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i)
-  {
-    const auto &input = _operands.at(inputs.at(i));
-    auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
-    if (input.info().isDynamic())
-    {
-      cond_input.info().setDynamic();
-    }
-    else
-    {
-      auto new_shape = input.info().shape();
-      cond_input.info().shape(new_shape);
-    }
-  }
-
-  // re-sizing input shapes of body subgraph
-  const auto &body_inputs = body_graph.getInputs();
-  assert(cond_inputs.size() == body_inputs.size());
-  for (size_t i = 0; i < cond_inputs.size(); ++i)
-  {
-    const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
-    auto &body_input = body_graph.operands().at(body_inputs.at(i));
-    if (cond_input.info().isDynamic())
-    {
-      body_input.info().setDynamic();
-    }
-    else
-    {
-      const auto &new_shape = cond_input.info().shape();
-      body_input.info().shape(new_shape);
-    }
-  }
-
-  // re-sizing operands of body subgraph
-  StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs);
-  _lowered_subgs.at(op.param().body_subg_index)
-      ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-        bool has_dynamic_tensor = body_inferer.infer(op_seq);
-        op_seq.has_dynamic_tensor(has_dynamic_tensor);
-      });
-
-  // Check whether while operation's shapes are predictable
-  // If any of shape of body outputs and cond inputs are different, non-constant operands would be
-  // set to dynamic
-  bool check_unpredictable_dynamic = false;
-  const auto &body_outputs = body_graph.getOutputs();
-  assert(body_outputs.size() == cond_inputs.size());
-  for (size_t i = 0; i < body_outputs.size(); ++i)
-  {
-    const auto &body_output = body_graph.operands().at(body_outputs.at(i));
-    auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
-    if ((cond_input.info().isDynamic() != body_output.info().isDynamic()) ||
-        (cond_input.shape() != body_output.shape()))
-    {
-      check_unpredictable_dynamic = true;
-      break;
-    }
-  }
-
-  if (check_unpredictable_dynamic)
-  {
-    // Set inputs of body subgraph
-    for (const auto &input_index : body_inputs)
-    {
-      auto &input = body_graph.operands().at(input_index);
-      if (!input.isConstant())
-      {
-        input.info().setDynamic();
-      }
-    }
-
-    // Set inputs of cond subgraph
-    for (const auto &input_index : cond_inputs)
-    {
-      auto &input = cond_graph.operands().at(input_index);
-      if (!input.isConstant())
-      {
-        input.info().setDynamic();
-      }
-    }
-
-    // Set non-constant operands of body subgraph to dynamic
-    StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs);
-    _lowered_subgs.at(op.param().body_subg_index)
-        ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-          bool has_dynamic_tensor = body_inferer.infer(op_seq);
-          op_seq.has_dynamic_tensor(has_dynamic_tensor);
-        });
-  }
-
-  // re-sizing operands of cond subgraph
-  // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to
-  // dynamic
-  StaticShapeInferer cond_inferer(op.param().cond_subg_index, _lowered_subgs);
-  _lowered_subgs.at(op.param().cond_subg_index)
-      ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-        bool has_dynamic_tensor = cond_inferer.infer(op_seq);
-        op_seq.has_dynamic_tensor(has_dynamic_tensor);
-      });
-
-  // re-sizing outputs of while operation
-  // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic
-  assert(cond_inputs.size() == outputs.size());
-  for (size_t i = 0; i < cond_inputs.size(); ++i)
-  {
-    const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
-    auto &output = _operands.at(outputs.at(i));
-    if (cond_input.info().isDynamic())
-    {
-      output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
-    }
-    else
-    {
-      const auto new_shape = cond_input.info().shape();
-      output.info().shape(new_shape);
-    }
-  }
-}
-
-} // namespace compiler
-
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
new file mode 100644
index 000000000..ec5d2146b
--- /dev/null
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -0,0 +1,1425 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/StaticShapeInferer.h"
+#include "util/ShapeInference.h"
+#include "util/logging.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <sstream>
+#include <stdexcept>
+
+namespace onert
+{
+namespace compiler
+{
+void OperandObserver::updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+                                   bool unpredictable)
+{
+  assert(changed_operands_info.size() == _operands.size());
+  for (size_t i = 0; i < changed_operands_info.size(); ++i)
+  {
+    const auto &changed_operand_info = changed_operands_info.at(i);
+    auto &operand = _operands.at(i);
+    // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+    // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+    // This error check may by replaced by an assertion if this function is called after the
+    // validation of models are completed.
+    if (changed_operand_info.typeInfo() != operand->typeInfo())
+    {
+      throw std::runtime_error("OperandObserver: The types of operands are mismatched");
+    }
+    if (!operand->info().isConstant() && (changed_operand_info.isDynamic() || unpredictable))
+    {
+      operand->info().setDynamic();
+    }
+    else
+    {
+      const auto &new_shape = changed_operands_info.at(i).shape();
+      operand->info().shape(new_shape);
+    }
+  }
+}
+
+void StaticShapeInferer::infer()
+{
+  for (const auto &op_idx : _lowered_subg->graph().topolSortOperations())
+  {
+    const auto &op = _lowered_subg->graph().operations().at(op_idx);
+    bool has_dynamic_tensor = false;
+    const auto opcode = op.opcode();
+    // IF: requires shape inference for then, else
+    // While: requires shape inference for condition, body
+    if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+    {
+      op.accept(*this);
+    }
+    else
+    {
+      has_dynamic_tensor = checkDynamicInput(op);
+      if (has_dynamic_tensor)
+      {
+        setDynamicOutput(op);
+      }
+      else
+      {
+        op.accept(*this);
+      }
+    }
+    has_dynamic_tensor = has_dynamic_tensor || checkDynamicOutput(op);
+    _lowered_subg->setHasDynamicTensor(op_idx, has_dynamic_tensor);
+  }
+
+  if (_controlflow_output_observer != nullptr)
+  {
+    // re-sizing output shapes of the controflow operation branching to this subgraph
+    std::vector<ir::OperandInfo> outputs_info;
+    const auto &graph = _lowered_subg->graph();
+    const auto &outputs = graph.getOutputs();
+    for (size_t i = 0; i < outputs.size(); ++i)
+    {
+      const auto &operand_info = graph.operands().at(outputs.at(i)).info();
+      outputs_info.emplace_back(operand_info);
+    }
+    _controlflow_output_observer->updateShapes(outputs_info);
+  }
+}
+
+bool StaticShapeInferer::checkDynamicInput(const ir::IOperation &op)
+{
+  const auto &operands = _lowered_subg->graph().operands();
+  for (auto &&input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+  {
+    if (operands.at(input_idx).info().isDynamic())
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool StaticShapeInferer::checkDynamicOutput(const ir::IOperation &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+  for (auto &&output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
+  {
+    if (operands.at(output_idx).info().isDynamic())
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+void StaticShapeInferer::setDynamicOutput(const ir::IOperation &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+  for (auto &&output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
+  {
+    operands.at(output_idx).info().setDynamic();
+  }
+}
+
+void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
+                                                  const ir::OperandIndex lhs_idx,
+                                                  const ir::OperandIndex rhs_idx)
+{
+  auto &operands = _lowered_subg->graph().operands();
+  const auto &lhs = operands.at(lhs_idx);
+  const auto &rhs = operands.at(rhs_idx);
+
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // re-sizing output shape
+  ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
+                                             const ir::OperandIndex input_idx)
+{
+  auto &operands = _lowered_subg->graph().operands();
+  const auto &input = operands.at(input_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // re-sizing output shape
+  ir::Shape new_shape = input.info().shape();
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::dump()
+{
+  auto get_shape_str = [](const ir::Shape &shape) {
+    std::stringstream sstream;
+    sstream << "shape : {";
+    for (int i = 0; i < shape.rank(); i++)
+    {
+      if (i == 0)
+        sstream << shape.dim(i);
+      else
+        sstream << " " << shape.dim(i);
+    }
+    sstream << "}";
+    return sstream.str();
+  };
+
+  _lowered_subg->graph().operands().iterate(
+    [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      VERBOSE(StaticShapeInferer) << "  " << ind << ", "
+                                  << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
+                                  << get_shape_str(operand.info().shape()) << std::endl;
+    });
+}
+
+std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+StaticShapeInferer::createStaticShapeInferers(
+  const std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> &lowered_subgs)
+{
+  // Allocate StaticShapeInferer per each subgraph
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers;
+  for (auto &&pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    auto &lowered_subg = pair.second;
+    inferers[subg_index] = std::make_unique<StaticShapeInferer>(lowered_subg);
+  }
+
+  // Append observers in all StaticShapeInferers
+  for (auto &&pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    auto &lowered_subg = pair.second;
+
+    // TODO: Change this iteration for all to controlflow iteration
+    lowered_subg->graph().operations().iterate(
+      [&](const ir::OperationIndex &, const ir::IOperation &op) {
+        // A Function to append child inferers. These make it possible for a StaticShapeInferer to
+        // call StaticShapeInferes of child subgraphs recursively
+        auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
+          auto *child_inferer = inferers.at(child_subg_idx).get();
+          inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
+        };
+
+        // A Function to appaend subg input observers. This makes it possible for a
+        // StaticShapeInferer to update inputs of child subgraphs
+        auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+          std::vector<ir::Operand *> child_subg_inputs;
+          auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
+          for (const auto &input_idx : child_subg.getInputs())
+          {
+            auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
+            child_subg_inputs.emplace_back(operand_ptr);
+          }
+          inferers.at(subg_index)
+            ->appendSubgInputObserver(child_subg_idx,
+                                      std::make_unique<OperandObserver>(child_subg_inputs));
+        };
+
+        // A Function to set controlflow output observers. This makes it possible for a
+        // StaticShapeInferer to update outputs of parent controlflow opeerations
+        auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+          std::vector<ir::Operand *> cf_outputs;
+          auto &subg = lowered_subg->graph();
+          for (const auto &output_idx : op.getOutputs())
+          {
+            auto operand_ptr = subg.operands().getRawPtr(output_idx);
+            cf_outputs.emplace_back(operand_ptr);
+          }
+          inferers.at(child_subg_idx)
+            ->setControlflowOutputObserver(std::make_unique<OperandObserver>(cf_outputs));
+        };
+
+        // Append Observers in a StaticShapeInferer
+        if (op.opcode() == ir::OpCode::If)
+        {
+          // TODO Remove dynamic_cast
+          // An virtual base class cannot be downcasted by static_cast
+          const auto &if_op = dynamic_cast<const ir::operation::If &>(op);
+
+          appendChildInferer(if_op.param().then_subg_index);
+          appendChildInferer(if_op.param().else_subg_index);
+
+          appendSubgraphInputObserver(if_op.param().then_subg_index);
+          appendSubgraphInputObserver(if_op.param().else_subg_index);
+
+          setControlFlowOutputObserver(if_op.param().then_subg_index);
+        }
+        else if (op.opcode() == ir::OpCode::While)
+        {
+          // TODO Remove dynamic_cast
+          const auto &while_op = dynamic_cast<const ir::operation::While &>(op);
+
+          appendChildInferer(while_op.param().cond_subg_index);
+          appendChildInferer(while_op.param().body_subg_index);
+
+          appendSubgraphInputObserver(while_op.param().cond_subg_index);
+          appendSubgraphInputObserver(while_op.param().body_subg_index);
+
+          setControlFlowOutputObserver(while_op.param().body_subg_index);
+        }
+      });
+  }
+
+  return inferers;
+}
+
+void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
+  const auto &axis = operands.at(axis_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  if (!axis.isConstant())
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  const auto rank = input.info().shape().rank();
+  auto axis_value = axis.asScalar<int32_t>();
+  axis_value = axis_value < 0 ? axis_value + rank : axis_value;
+
+  // re-sizing output shape
+  ir::Shape new_shape =
+    shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
+  const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
+  const auto output_index = op.getOutputs().at(0);
+  const auto &lhs = operands.at(lhs_index);
+  const auto &rhs = operands.at(rhs_index);
+  auto &output = operands.at(output_index);
+  auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto cluster_idx{
+    op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+  const auto &cluster = operands.at(cluster_idx);
+
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
+  assert(cluster_buf);
+
+  // re-sizing output shape
+  ir::Shape new_shape = shape_inference::inferBCQFullyConnectedShape(
+    input.info().shape(), cluster.info().shape(), cluster_buf);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BCQGather &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+  const auto &indices = operands.at(indices_idx);
+
+  const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
+  const auto &input_binary = operands.at(input_binary_idx);
+
+  const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+  const auto &cluster = operands.at(cluster_idx);
+
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
+  assert(cluster_buf);
+
+  auto rank = input_binary.shape().rank();
+
+  // re-sizing output shape
+  ir::Shape new_shape = shape_inference::inferBCQGatherShape(
+    indices.info().shape(), cluster.info().shape(), cluster_buf, rank, op.param());
+
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+                           op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+
+void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+{
+  // get mutable output operand
+  auto &operands = _lowered_subg->graph().operands();
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
+  const auto &shape = operands.at(shape_idx);
+
+  if (!shape.isConstant())
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  // assert(shape.typeInfo().type() == ir::DataType::INT32);
+  auto shape_buffer = reinterpret_cast<const int32_t *>(shape.data()->base());
+
+  // re-sizing output shape
+  ir::Shape new_shape = shape_inference::inferBroadcastToShape(shape.info().shape(), shape_buffer);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Comparison &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
+                           op.getInputs().at(ir::operation::Comparison::Input::INPUT1));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Concat &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_count = op.getInputs().size();
+
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  shape_inference::Shapes input_shapes;
+  for (uint32_t i = 0; i < input_count; i++)
+  {
+    const auto input_idx{op.getInputs().at(i)};
+    const auto &input = operands.at(input_idx);
+    input_shapes.emplace_back(input.shape());
+  }
+
+  ir::Shape out_shape = shape_inference::inferConcatShape(input_shapes, op.param());
+
+  // re-sizing output shape
+  output.info().shape(out_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+  const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)};
+  const auto &ker = operands.at(ker_idx);
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // re-sizing output shape
+  ir::Shape new_shape =
+    shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+                           op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
+}
+
+void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+  const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+  const auto &axis = operands.at(axis_idx);
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  if (!axis.isConstant())
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  // even when axis is constant, output shape should be recalculated since user might call
+  // nnfw_set_input_tensorinfo(input, some_new_shape)
+  auto axis_type = axis.typeInfo().type();
+  assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
+
+  assert(axis.data()->base());
+  int32_t axis_value =
+    (axis_type == ir::DataType::INT32)
+      ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
+      : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
+
+  // re-sizing output shape
+  ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Fill &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)};
+  const auto &shape = operands.at(shape_idx);
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  if (!shape.isConstant())
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  const auto dims_type = shape.typeInfo().type();
+  assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
+
+  auto dims_buf = shape.data()->base();
+  assert(dims_buf);
+
+  const auto &dims_shape = shape.info().shape();
+  const auto &new_shape = ((dims_type == ir::DataType::INT32)
+                             ? shape_inference::inferFillShape<int32_t>(
+                                 dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+                             : shape_inference::inferFillShape<int64_t>(
+                                 dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
+
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
+  const auto &ker = operands.at(ker_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+  // re-sizing output shape
+  ir::Shape new_shape =
+    shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Gather &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
+  const auto &indices = operands.at(indices_idx);
+  const auto rank = input.info().shape().rank();
+  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+
+  assert(0 <= axis && axis < rank);
+
+  // re-sizing output shape
+  ir::Shape new_shape =
+    shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::If &op)
+{
+  // re-sizing input shapes of then/else subgraph
+  const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()};
+
+  std::vector<ir::OperandInfo> inputs_info;
+  const auto &graph = _lowered_subg->graph();
+  for (size_t i = 0; i < inputs.size(); ++i)
+  {
+    const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+    inputs_info.emplace_back(operand_info);
+  }
+  _subg_input_observers.at(op.param().then_subg_index)->updateShapes(inputs_info);
+  _child_inferers.at(op.param().then_subg_index)->infer();
+
+  _subg_input_observers.at(op.param().else_subg_index)->updateShapes(inputs_info);
+  _child_inferers.at(op.param().else_subg_index)->infer();
+}
+
+void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Loss &)
+{
+  // TODO Consider SparseCategoricalCrossentropy case
+
+  // TODO Consider output shape in case of reduction option
+}
+
+void StaticShapeInferer::visit(const ir::operation::LSTM &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+  auto &output = operands.at(output_index);
+
+  const auto output_state_out_index{
+    op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+
+  const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+
+  const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+
+  if (output.info().isDynamic() ||
+      (operands.exist(output_state_out_index) &&
+       operands.at(output_state_out_index).info().isDynamic()) ||
+      (operands.exist(cell_state_out_index) &&
+       operands.at(cell_state_out_index).info().isDynamic()) ||
+      (operands.exist(scratch_buffer_index) &&
+       operands.at(scratch_buffer_index).info().isDynamic()))
+    return;
+
+  const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+  const auto &input = operands.at(input_index);
+
+  const auto input_to_output_weights_index{
+    op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+  const auto &input_to_output_weights = operands.at(input_to_output_weights_index);
+
+  const auto recurrent_to_output_weights_index{
+    op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+  const auto &recurrent_to_output_weights = operands.at(recurrent_to_output_weights_index);
+
+  // re-sizing outputs
+  const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1)
+                                                                           : input.shape().dim(0);
+  const int n_cell = input_to_output_weights.shape().dim(0);
+  const int n_output = recurrent_to_output_weights.shape().dim(1);
+  if (input.shape().rank() == 3)
+  {
+    if (op.param().time_major)
+      output.info().shape(ir::Shape{input.shape().dim(0), n_batch, n_output});
+    else
+      output.info().shape(ir::Shape{n_batch, input.shape().dim(1), n_output});
+  }
+  else
+  {
+    assert(input.shape().rank() == 2);
+    output.info().shape(ir::Shape{n_batch, n_output});
+  }
+
+  if (operands.exist(output_state_out_index))
+  {
+    auto &output_state_out = operands.at(output_state_out_index);
+    output_state_out.info().shape(ir::Shape{n_batch, n_output});
+  }
+
+  if (operands.exist(cell_state_out_index))
+  {
+    auto &cell_state_out = operands.at(cell_state_out_index);
+    cell_state_out.info().shape(ir::Shape{n_batch, n_cell});
+  }
+
+  if (operands.exist(scratch_buffer_index))
+  {
+    auto &scratch_buffer = operands.at(scratch_buffer_index);
+
+    const auto input_to_input_weights_index{
+      op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+    const auto recurrent_to_input_weights_index{
+      op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+
+    bool has_input_to_input_weights =
+      operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+      operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+    bool has_recurrent_to_input_weights =
+      operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+      operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+
+    // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+    // true: no CIFG
+    // false: CIFG
+    bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+    if (has_cifg_param)
+    {
+      scratch_buffer.info().shape(ir::Shape{n_batch, n_cell * 4});
+    }
+    else
+    {
+      scratch_buffer.info().shape(ir::Shape{n_batch, n_cell * 3});
+    }
+  }
+}
+
+void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::OneHot &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
+  const auto &indice = operands.at(indice_idx);
+  const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
+  const auto &depth = operands.at(depth_idx);
+
+  const auto axis = op.param().axis;
+
+  auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  if (!depth.isConstant())
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  const auto *depth_buf = reinterpret_cast<const int32_t *>(depth.data()->base());
+  assert(depth_buf);
+  // re-sizing output shape
+  ir::Shape new_shape = shape_inference::inferOnehotShape(indice.info().shape(), *depth_buf, axis);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Pack &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = operands.at(input_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  const auto rank = input.shape().rank() + 1;
+  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+  const auto num = op.param().num;
+
+  assert(0 <= axis && axis < rank);
+
+  // re-sizing output shape
+  ir::Shape new_shape = shape_inference::inferPackShape(input.info().shape(), axis, rank, num);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Pad &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)};
+  const auto &pad = operands.at(pad_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // if pad is not constant, output also becomes dynamic
+  if (!pad.isConstant())
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  // re-sizing output shape
+  const auto new_shape = shape_inference::inferPadShape(
+    input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()),
+    pad.shape().num_elements());
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Permute &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = operands.at(input_idx);
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // re-sizing output shape
+  // Permute is a special operation that layouts of input/output may be different on backend
+  // However, it is not applied here, so input/output have the same layout of frontend. Because
+  // "ExecutorFactory" would convert shape of input/output accoding to the layouts when registering
+  // operand info to "TensorBuilder" after calling "StaticShapeInferer"
+  const auto &new_shape = input.info().shape();
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Pow &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS),
+                           op.getInputs().at(ir::operation::Pow::Input::RHS));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Range &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)};
+  const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)};
+  const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)};
+  const auto &start_op = operands.at(start_idx);
+  const auto &limit_op = operands.at(limit_idx);
+  const auto &delta_op = operands.at(delta_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  ir::Shape new_shape;
+  if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
+  {
+    assert(start_op.typeInfo().type() == limit_op.typeInfo().type() &&
+           start_op.typeInfo().type() == delta_op.typeInfo().type());
+    if (output.typeInfo().type() == ir::DataType::FLOAT32)
+    {
+      new_shape = shape_inference::inferRangeShape<float>(
+        start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>());
+    }
+    else if (output.typeInfo().type() == ir::DataType::INT32)
+    {
+      new_shape = shape_inference::inferRangeShape<int32_t>(
+        start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>());
+    }
+    assert(output.shape() == new_shape);
+  }
+  else
+  {
+    output.info().setDynamic();
+  }
+}
+
+void StaticShapeInferer::visit(const ir::operation::Reduce &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
+  const auto &axes = operands.at(axes_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  std::vector<int32_t> axes_vec;
+  for (size_t i = 0; i < axes.shape().num_elements(); ++i)
+  {
+    switch (axes.typeInfo().type())
+    {
+      case ir::DataType::INT32:
+      {
+        axes_vec.emplace_back(reinterpret_cast<const int32_t *>(axes.data()->base())[i]);
+        break;
+      }
+      case ir::DataType::INT64:
+      {
+        axes_vec.emplace_back(reinterpret_cast<const int64_t *>(axes.data()->base())[i]);
+        break;
+      }
+      default:
+        throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported data type");
+        break;
+    }
+  }
+  const auto keep_dims = op.param().keep_dims;
+
+  // re-sizing output shape
+  ir::Shape new_shape =
+    shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Reshape &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // New shape is given by second input tensor
+  if (op.getInputs().size() == 2)
+  {
+    // Let's check the second input
+    const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
+    const auto &shape = operands.at(shape_idx);
+
+    if (shape.isConstant())
+    {
+      const auto *shape_buf = reinterpret_cast<const int32_t *>(shape.data()->base());
+      assert(shape_buf);
+
+      ir::Shape new_shape = shape_inference::inferReshapeShape(
+        shape_buf, shape.shape().num_elements(), input.shape().num_elements());
+
+      // if shape is from Const, TFLC put the shape of output into tensor
+      if (new_shape != output.shape())
+      {
+        // change on output shape
+        output.info().shape(new_shape);
+      }
+    }
+    else
+    {
+      // if shape is NOT Const, set output shape to be dynamic_
+      output.info().setDynamic();
+    }
+  }
+  // New shape is given by option
+  else if (op.param().new_shape.size() != 0)
+  {
+    // Let's check the new_shape option
+    auto shape = op.param().new_shape;
+    ir::Shape new_shape =
+      shape_inference::inferReshapeShape(shape.data(), shape.size(), input.shape().num_elements());
+
+    if (new_shape != output.shape())
+    {
+      // change on output shape
+      output.info().shape(new_shape);
+    }
+  }
+  else
+  {
+    throw std::runtime_error("Reshape: new shape is missing");
+  }
+}
+
+void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  int32_t height_out, width_out;
+  if (op.getInputs().size() == 2)
+  {
+    auto &size = operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
+    if (!size.isConstant())
+    {
+      output.info().setDynamic();
+      return;
+    }
+    const auto size_v = size.asVector<std::int32_t>();
+    height_out = size_v[0];
+    width_out = size_v[1];
+  }
+  else
+  {
+    height_out = op.param().height_out;
+    width_out = op.param().width_out;
+  }
+
+  // Shape inferencing logic based on Params
+  ir::Shape new_shape =
+    shape_inference::inferResizeBilinearShape(input.shape(), height_out, width_out);
+
+  // if size_op is from Const, TFLC put the shape of output into tensor
+  if (new_shape != output.shape())
+  {
+    // change on output shape
+    output.info().shape(new_shape);
+  }
+}
+
+void StaticShapeInferer::visit(const ir::operation::Reverse &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Select &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
+  const auto &input_cond = operands.at(input_cond_idx);
+
+  const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
+  const auto &input_true = operands.at(input_true_idx);
+
+  const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
+  const auto &input_false = operands.at(input_false_idx);
+
+  auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // Select output shpae
+  ir::Shape new_shape = shape_inference::inferSelectShape(
+    input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Shape &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = operands.at(input_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // re-sizing output shape
+  ir::Shape output_shape;
+  output_shape.append(input.info().shape().rank());
+
+  output.info().shape(output_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Slice &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
+  const auto &input = operands.at(input_index);
+  const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
+  const auto &begins = operands.at(begins_index);
+  const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
+  const auto &sizes = operands.at(sizes_index);
+  const auto output_index = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_index);
+
+  // Whether input is constant or not does not affect whether output is dynamic or not
+  if (!(begins.isConstant() && sizes.isConstant()))
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  auto begins_buf = begins.data()->base();
+  auto sizes_buf = sizes.data()->base();
+
+  const auto begins_type = begins.typeInfo().type();
+  assert(begins_type == ir::DataType::INT32 || begins_type == ir::DataType::INT64);
+  assert(begins_type == sizes.typeInfo().type());
+
+  ir::Shape new_shape =
+    (begins_type == ir::DataType::INT32)
+      ? shape_inference::inferSliceShape<int32_t>(input.info().shape(),
+                                                  reinterpret_cast<const int32_t *>(begins_buf),
+                                                  reinterpret_cast<const int32_t *>(sizes_buf))
+      : shape_inference::inferSliceShape<int64_t>(input.info().shape(),
+                                                  reinterpret_cast<const int64_t *>(begins_buf),
+                                                  reinterpret_cast<const int64_t *>(sizes_buf));
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Softmax &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto output_index = op.getOutputs().at(0);
+  const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+  const auto &block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+  const auto &padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+  ir::Operand &output = operands.at(output_index);
+  const auto &input = operands.at(input_idx);
+  const auto &block_shape = operands.at(block_shape_idx);
+  const auto &padding = operands.at(padding_idx);
+
+  // Whether input is constant or not does not affect whether output is dynamic or not
+  if (!(block_shape.isConstant() && padding.isConstant()))
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  const auto &input_shape = input.info().shape();
+  const auto &block_shape_shape = block_shape.info().shape();
+  const auto &padding_shape = padding.info().shape();
+
+  auto block_shape_data = reinterpret_cast<const int32_t *>(block_shape.data()->base());
+  auto padding_data = reinterpret_cast<const int32_t *>(padding.data()->base());
+
+  ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
+    input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
+
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Split &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
+  const auto &axis = operands.at(axis_idx);
+
+  auto outputs = op.getOutputs();
+  if (!axis.isConstant())
+  {
+    for (auto &&output_idx : outputs)
+    {
+      ir::Operand &output = operands.at(output_idx);
+      output.info().setDynamic();
+    }
+    return;
+  }
+
+  const auto num_splits = op.param().num_splits;
+
+  const auto rank = input.info().shape().rank();
+  auto axis_value = axis.asScalar<int32_t>();
+  axis_value = axis_value < 0 ? axis_value + rank : axis_value;
+
+  assert(0 <= axis_value && axis_value < rank);
+
+  ir::Shape new_shape =
+    shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
+  for (auto &&output_idx : outputs)
+  {
+    ir::Operand &output = operands.at(output_idx);
+    output.info().shape(new_shape);
+  }
+}
+
+void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS),
+                           op.getInputs().at(ir::operation::SquaredDifference::Input::RHS));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  // Squeeze output shpae
+  ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param());
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+  const auto &input = operands.at(input_index);
+  const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+  const auto &starts = operands.at(starts_index);
+  const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+  const auto &ends = operands.at(ends_index);
+  const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+  const auto &strides = operands.at(strides_index);
+  const auto output_index = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_index);
+
+  if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  const auto begin_mask = op.param().begin_mask;
+  const auto end_mask = op.param().end_mask;
+  const auto shrink_axis_mask = op.param().shrink_axis_mask;
+  const auto rank = input.info().shape().rank();
+
+  auto starts_buf = reinterpret_cast<const uint32_t *>(starts.data()->base());
+  auto ends_buf = reinterpret_cast<const uint32_t *>(ends.data()->base());
+  auto strides_buf = reinterpret_cast<const uint32_t *>(strides.data()->base());
+
+  auto op_params = shape_inference::buildStridedSliceParams(
+    starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank);
+
+  ir::Shape new_shape =
+    shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank);
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Tile &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)};
+  const auto &multiplier = operands.at(multiplier_idx);
+
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  if (!multiplier.isConstant())
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier.data()->base());
+  assert(multiplier_buffer);
+
+  // re-sizing output shape
+  auto new_shape = shape_inference::inferTileShape(input.info().shape(), multiplier_buffer,
+                                                   multiplier.shape().num_elements());
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Transpose &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+  const auto &input = operands.at(input_idx);
+
+  const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
+  const auto &perm = operands.at(perm_idx);
+
+  // perm.shape() != ir::Shape{0} means that perm is (n-1...0)
+  // TODO This condition changes to perm.num_elements() == 0
+  const auto is_regular_transpose = perm.shape() == ir::Shape{0};
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  auto &output = operands.at(output_idx);
+  if (!perm.isConstant() && !is_regular_transpose)
+  {
+    output.info().setDynamic();
+    return;
+  }
+
+  ir::Shape new_shape;
+  if (is_regular_transpose)
+  {
+    // Call by (n-1...0)
+    new_shape = shape_inference::inferTransposeShape(input.info().shape(), nullptr, 0);
+  }
+  else
+  {
+    // Check rank
+    if (input.info().shape().rank() != static_cast<int>(perm.info().shape().num_elements()))
+    {
+      throw std::runtime_error("StaticShapeInferer failed, bad rank size: " +
+                               std::to_string(perm.info().shape().num_elements()));
+    }
+
+    // set output shape, based on input and params
+    const auto perm_buf = reinterpret_cast<const int32_t *>(perm.data()->base());
+    new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm_buf,
+                                                     perm.shape().num_elements());
+  }
+  output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Unpack &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = operands.at(input_idx);
+  const auto num = op.param().num;
+  const auto rank = input.shape().rank();
+  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+
+  assert(axis < rank);
+  if (axis < 0)
+  {
+    for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
+    {
+      const auto output_idx = op.getOutputs().at(out_tensor_idx);
+      ir::Operand &output = operands.at(output_idx);
+      output.info().setDynamic();
+    }
+    return;
+  }
+
+  ir::Shape new_shape = shape_inference::inferUnpackShape(input.info().shape(), axis, rank);
+
+  // re-sizing output shape
+  for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
+  {
+    const auto output_idx = op.getOutputs().at(out_tensor_idx);
+    ir::Operand &output = operands.at(output_idx);
+    output.info().shape(new_shape);
+  }
+}
+
+void StaticShapeInferer::visit(const ir::operation::While &op)
+{
+  auto body_input_observer = _subg_input_observers.at(op.param().body_subg_index).get();
+  auto cond_input_observer = _subg_input_observers.at(op.param().cond_subg_index).get();
+  // re-sizing input shapes of body subgraph
+  const auto &inputs = op.getInputs();
+  std::vector<ir::OperandInfo> inputs_info;
+  const auto &graph = _lowered_subg->graph();
+  for (size_t i = 0; i < inputs.size(); ++i)
+  {
+    const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+    inputs_info.emplace_back(operand_info);
+  }
+
+  body_input_observer->updateShapes(inputs_info);
+  _child_inferers.at(op.param().body_subg_index)->infer();
+
+  // Check whether while operation's shapes are predictable
+  // This while op's outputs are also updated in the above function
+  // "_child_inferers.at(op.param().body_subg_index)->update()". That means that body's outputs and
+  // thils op's outputs must have the same shape. So we can predict whether body subgraphs will
+  // change at every step by comparing the shapes of inputs/outputs. If any of shape of body outputs
+  // and inputs are different Non-constant operands will be set to dynamic.
+  bool check_unpredictable_dynamic = false;
+  const auto &updated_outputs = op.getOutputs();
+  assert(inputs_info.size() == updated_outputs.size());
+  for (size_t i = 0; i < updated_outputs.size(); ++i)
+  {
+    const auto &input_info = inputs_info.at(i);
+    const auto &output_info = graph.operands().at(updated_outputs.at(i)).info();
+    if (input_info.isDynamic() != output_info.isDynamic() ||
+        input_info.shape() != output_info.shape())
+    {
+      check_unpredictable_dynamic = true;
+      break;
+    }
+  }
+
+  if (check_unpredictable_dynamic)
+  {
+    body_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+    _child_inferers.at(op.param().body_subg_index)->infer();
+  }
+  cond_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+  _child_inferers.at(op.param().cond_subg_index)->infer();
+}
+
+void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
+{
+  // TODO: NMS supports very limited input/output size.
+  ir::operation::DetectionPostProcess::Param param = op.param();
+
+  auto &operands = _lowered_subg->graph().operands();
+  const int num_detected_boxes = param.max_detections * param.max_classes_per_detection;
+
+  const auto output_idx1 = op.getOutputs().at(0);
+  auto &output1 = operands.at(output_idx1);
+  output1.info().shape({1, num_detected_boxes, 4});
+
+  const auto output_idx2 = op.getOutputs().at(1);
+  auto &output2 = operands.at(output_idx2);
+  output2.info().shape({1, num_detected_boxes});
+
+  const auto output_idx3 = op.getOutputs().at(2);
+  auto &output3 = operands.at(output_idx3);
+  output3.info().shape({1, num_detected_boxes});
+
+  const auto output_idx4 = op.getOutputs().at(3);
+  auto &output4 = operands.at(output_idx4);
+  output4.info().shape({1});
+}
+void StaticShapeInferer::visit(const ir::operation::Bulk &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  // TODO: support multiple inputs/outputs
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = operands.at(input_idx);
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  const auto &cur_input_shape = input.info().shape();
+  auto origin_output_shape = op.param().origin_output_shapes[0];
+
+  // TODO: more check for valid batch request
+  if ((cur_input_shape.dim(0) < origin_output_shape.dim(0)) ||
+      (cur_input_shape.dim(0) % origin_output_shape.dim(0) != 0))
+  {
+    throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported batch size");
+  }
+  size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0);
+
+  ir::Shape new_shape;
+  new_shape.append(origin_output_shape.dim(0) * batch_multiplier);
+  for (int32_t d = 1; d < origin_output_shape.rank(); ++d)
+    new_shape.append(origin_output_shape.dim(d));
+
+  output.info().shape(new_shape);
+}
+
+} // namespace compiler
+
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h
deleted file mode 100644
index 3b0360b4b..000000000
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_TENSOR_BUILDERS_H__
-#define __ONERT_COMPILER_TENSOR_BUILDERS_H__
-
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
-#include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace compiler
-{
-
-class TensorBuilders
-{
-public:
-  TensorBuilders() = default;
-
-  TensorBuilders(const onert::backend::BackendContexts &backend_contexts, bool include_controlflow)
-  {
-    for (const auto &e : backend_contexts)
-    {
-      if (e.first->config()->id() == backend::controlflow::Config::ID)
-      {
-        _cf_tensor_builder = std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(
-            e.second->tensor_builder);
-        if (include_controlflow)
-          _tensor_builders.insert(e.second->tensor_builder);
-      }
-      else
-      {
-        _tensor_builders.insert(e.second->tensor_builder);
-      }
-    }
-  }
-
-  std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator begin() const
-  {
-    return _tensor_builders.cbegin();
-  }
-  std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator end() const
-  {
-    return _tensor_builders.cend();
-  }
-
-  std::shared_ptr<backend::controlflow::TensorBuilder> getControlflowTensorBuilder() const
-  {
-    return _cf_tensor_builder;
-  }
-
-private:
-  std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
-  std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_TENSOR_BUILDERS_H__
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
index 8be87b081..c7e06e84c 100644
--- a/runtime/onert/core/src/compiler/TensorRegistries.h
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -17,13 +17,14 @@
 #ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
 #define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
 
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/TensorRegistry.h"
+
 #include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "backend/controlflow/TensorRegistry.h"
+#include "backend/BackendContext.h"
+
+#include <memory>
+#include <unordered_set>
 
 namespace onert
 {
@@ -35,17 +36,16 @@ class TensorRegistries
 public:
   TensorRegistries() = default;
 
-  TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
-                   bool include_controlflow)
+  TensorRegistries(const onert::backend::BackendContexts &backend_contexts, bool include_builtin)
   {
     for (const auto &e : backend_contexts)
     {
       auto tensor_reg = e.second->tensor_registry;
-      if (e.first->config()->id() == backend::controlflow::Config::ID)
+      if (e.first->config()->id() == backend::builtin::Config::ID)
       {
-        _cf_tensor_reg =
-            std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
-        if (include_controlflow)
+        _builtin_tensor_reg =
+          std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(tensor_reg);
+        if (include_builtin)
           _tensor_regs.insert(tensor_reg);
       }
       else
@@ -64,14 +64,14 @@ public:
     return _tensor_regs.cend();
   }
 
-  std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+  std::shared_ptr<backend::builtin::TensorRegistry> getBuiltinTensorRegistry() const
   {
-    return _cf_tensor_reg;
+    return _builtin_tensor_reg;
   }
 
-  std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const
+  backend::ITensor *getITensor(ir::OperandIndex ind) const
   {
-    for (auto &tensor_reg : _tensor_regs)
+    for (auto &&tensor_reg : _tensor_regs)
     {
       auto tensor = tensor_reg->getITensor(ind);
       if (tensor)
@@ -82,7 +82,7 @@ public:
 
 private:
   std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
-  std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+  std::shared_ptr<backend::builtin::TensorRegistry> _builtin_tensor_reg;
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
index 647669e46..a6590b13f 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
@@ -17,8 +17,9 @@
 #include "ConstantInsertionPass.h"
 
 #include "backend/Backend.h"
-#include <ir/Graph.h>
-#include <util/Utils.h>
+#include "ir/Graph.h"
+#include "util/Utils.h"
+#include "util/logging.h"
 
 namespace onert
 {
@@ -27,15 +28,14 @@ namespace compiler
 namespace pass
 {
 
-void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
+void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::IOperation &node)
 {
-  const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
-  const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
-  const auto backend = op_seq_lower_info->backend();
-  const auto layout = op_seq_lower_info->layout();
-  const auto factor = ir::operand::PermuteFactor{backend, layout};
+  const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index);
+  const auto backend = op_lower_info->backend();
+  const auto layout = op_lower_info->layout();
+  const auto factor = PermuteFactor{backend, layout};
 
-  for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+  for (const auto &input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
   {
     auto &object = _graph.operands().at(input);
 
@@ -44,22 +44,13 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
       const auto key = ReplaceKey{input, factor};
       if (_replace_operands_map.count(key) == 0)
       {
-        auto new_object = object;
-        new_object.unsetDef();
-        // TODO Remove const_case
-        const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
+        ir::Operand new_object(object);
+        new_object.clearDefUse();
         const auto new_index = _graph.operands().emplace(new_object);
         _replace_operands_map[key] = new_index;
       }
 
       const auto replaced_input = _replace_operands_map[key];
-      // Update op_seq
-      if (_lowered_graph.op_seqs().at(op_sequence_index).getInputs().contains(input))
-      {
-        // All inputs of op_seq have the same PermuteFactor because those inputs are inputs of first
-        // operation
-        _lowered_graph.op_seqs().at(op_sequence_index).replaceInputs(input, replaced_input);
-      }
 
       // Update the same inputs of a node at once because inputs of an operation have the same
       // PermuteFactor
@@ -69,6 +60,8 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
       auto &replaced_object = _graph.operands().at(replaced_input);
       replaced_object.insertUse(node_index);
 
+      VERBOSE(ConstInsertPass) << "New operand " << replaced_input << " added(copy of " << input
+                               << ") for " << factor << std::endl;
       // Remove this node from uses of origin operand
       // Constant operand has no def.
       assert(!object.getDef().valid());
@@ -76,12 +69,16 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
 
       // Remove origin operand
       if (object.getUses().size() == 0)
+      {
         _graph.removeOperand(input);
+        VERBOSE(ConstInsertPass) << "Original operand " << input << " removed - no uses"
+                                 << std::endl;
+      }
     }
   }
 
   // Now this runtime does not support the node making output as constant
-  for (const auto &output : node.getOutputs())
+  for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
   {
     UNUSED_RELEASE(output);
     assert(!_graph.operands().at(output).isConstant());
diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
index 052883c92..d5b9aa14e 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
 #define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
 
-#include <ir/operand/PermuteFactor.h>
+#include <compiler/PermuteFactor.h>
 #include <ir/Index.h>
 #include "LoweredOperationPass.h"
 #include <unordered_map>
@@ -39,13 +39,13 @@ public:
   std::string id() final { return "ConstantInsertionPass"; }
 
 public:
-  void callback(const ir::OperationIndex &index, ir::Operation &node) final;
+  void callback(const ir::OperationIndex &index, ir::IOperation &node) final;
 
 private:
   struct ReplaceKey
   {
     ir::OperandIndex index;
-    ir::operand::PermuteFactor factor;
+    PermuteFactor factor;
 
     bool operator==(const ReplaceKey &other) const
     {
@@ -61,8 +61,7 @@ private:
     std::size_t operator()(const ReplaceKey &key) const noexcept
     {
       using std::hash;
-      return hash<ir::OperandIndex>()(key.index) ^
-             (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
+      return hash<ir::OperandIndex>()(key.index) ^ (hash<PermuteFactor>()(key.factor) << 1);
     }
   };
 
diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
index 1c1dbe0ee..32e32d0ef 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
@@ -18,8 +18,9 @@
 
 #include "backend/Backend.h"
 #include <ir/Graph.h>
-#include <ir/operand/PermuteFactor.h>
+#include <compiler/PermuteFactor.h>
 #include <util/Utils.h>
+#include "util/logging.h"
 
 namespace onert
 {
@@ -28,25 +29,25 @@ namespace compiler
 namespace pass
 {
 
-void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
+void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::IOperation &node)
 {
-  const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
-  const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
-  const auto backend = op_seq_lower_info->backend();
-  const auto layout = op_seq_lower_info->layout();
-  const auto factor = ir::operand::PermuteFactor{backend, layout};
+  const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index);
+  const auto backend = op_lower_info->backend();
+  const auto layout = op_lower_info->layout();
+  const auto factor = PermuteFactor{backend, layout};
 
   // Now this runtime does not support the node making output of operation as constant
-  for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+  for (const auto &input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
   {
     auto &object = _graph.operands().at(input);
     if (object.isConstant())
     {
       // All constant operand are already assinged at each backend by ContantInsertionPass. So a
       // constant has `def` and `use` as the same PermuteFactor
-      _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
-      _lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
-      _lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
+      auto operand_li = std::make_unique<compiler::OperandLowerInfo>();
+      operand_li->addDefPermuteFactor(factor);
+      operand_li->addUsePermuteFactor(factor);
+      _lowered_graph.lower_info().operand.set(input, std::move(operand_li));
     }
   }
 }
diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
index e17d776d1..d60a1033f 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
@@ -36,7 +36,7 @@ public:
   std::string id() final { return "ConstantLoweringPass"; }
 
 public:
-  void callback(const ir::OperationIndex &index, ir::Operation &node) final;
+  void callback(const ir::OperationIndex &index, ir::IOperation &node) final;
 };
 
 } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc
new file mode 100644
index 000000000..1448de473
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantOutputPass.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Permute.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void ConstantOutputPass::callback(const ir::OperandIndex &ind, ir::Operand &obj)
+{
+  if (!_graph.getOutputs().contains(ind) || !obj.isConstant())
+    return;
+
+  auto permute_input_ind = _graph.addOperand(obj.shape(), obj.typeInfo());
+  auto &permute_input_obj = _graph.operands().at(permute_input_ind);
+
+  // Move the const data
+  permute_input_obj.data(obj.shareData());
+  obj.releaseData();
+  obj.info().setAsNonConst();
+
+  using ir::operation::Permute;
+  auto permute_obj = std::make_unique<Permute>(permute_input_ind, ind, Permute::Type::COPY);
+  auto permute_ind = _graph.operations().push(std::move(permute_obj));
+
+  permute_input_obj.insertUse(permute_ind);
+  obj.setDef(permute_ind);
+
+  // Make the operations that uses this operand to use the generated operand
+  auto orig_uses = obj.getUses();
+  for (auto &&use : orig_uses)
+  {
+    permute_input_obj.insertUse(use);
+    obj.removeUse(use);
+    _graph.operations().at(use).replaceInputs(ind, permute_input_ind);
+  }
+
+  VERBOSE(ConstantOutputPass) << "Permute Op inserted for a constant ouput, node index : "
+                              << permute_ind << std::endl;
+  VERBOSE(ConstantOutputPass) << "  - Input (inserted) Operand : " << permute_input_ind
+                              << std::endl;
+  VERBOSE(ConstantOutputPass) << "  - Output(original) Operand : " << ind << std::endl;
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h
new file mode 100644
index 000000000..193dd3a68
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_OUTPUT_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_OUTPUT_PASS_H__
+
+#include "OperandPass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Pass to specially handle constant model outputs
+ *
+ * As an output buffer is given right before an execution but constant initialization is done at
+ * prepare phase, the current runtime structure cannot handle when an output is constant.
+ * To resolve this problem, this pass inserts a Permute layer with a const input and make the model
+ * output tensor to be its output.
+ *
+ * e.g.)
+ *
+ * ((Const Output))
+ *
+ * becomes
+ *
+ * (Const) -> [Permute] -> ((Output))
+ *
+ * Note that this is a mandatory pass for Graph.
+ */
+class ConstantOutputPass : public OperandPass
+{
+public:
+  using OperandPass::OperandPass;
+
+public:
+  std::string id() final { return "ConstantOutputPass"; }
+
+public:
+  void callback(const ir::OperandIndex &i, ir::Operand &o) final;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/IPass.h b/runtime/onert/core/src/compiler/pass/IPass.h
new file mode 100644
index 000000000..77f5916fd
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/IPass.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_IPASS_H__
+#define __ONERT_COMPILER_PASS_IPASS_H__
+
+#include <string>
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+struct IPass
+{
+  virtual ~IPass() = default;
+
+  virtual std::string id() = 0;
+  virtual void run() = 0;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_IPASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
index 0c5f7d745..64831a0ac 100644
--- a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
@@ -18,7 +18,7 @@
 #define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
 
 #include "OperandPass.h"
-#include "compiler/LoweredGraph.h"
+#include "compiler/ILoweredGraph.h"
 
 namespace onert
 {
@@ -30,8 +30,8 @@ namespace pass
 class LoweredOperandPass : public OperandPass
 {
 public:
-  LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
-      : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+  LoweredOperandPass(compiler::ILoweredGraph &lowered_graph)
+    : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
   {
     // DO NOTHING
   }
@@ -42,7 +42,7 @@ public:
   void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0;
 
 protected:
-  compiler::LoweredGraph &_lowered_graph;
+  compiler::ILoweredGraph &_lowered_graph;
 };
 
 } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
index 5c8569be2..27ca77c91 100644
--- a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
@@ -18,7 +18,7 @@
 #define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
 
 #include "OperationPass.h"
-#include "compiler/LoweredGraph.h"
+#include "compiler/ILoweredGraph.h"
 
 namespace onert
 {
@@ -30,8 +30,8 @@ namespace pass
 class LoweredOperationPass : public OperationPass
 {
 public:
-  LoweredOperationPass(LoweredGraph &lowered_graph)
-      : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+  LoweredOperationPass(ILoweredGraph &lowered_graph)
+    : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
   {
     // DO NOTHING
   }
@@ -39,10 +39,10 @@ public:
   virtual ~LoweredOperationPass() = default;
 
   std::string id() override = 0;
-  void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0;
+  void callback(const ir::OperationIndex &i, ir::IOperation &o) override = 0;
 
 protected:
-  LoweredGraph &_lowered_graph;
+  ILoweredGraph &_lowered_graph;
 };
 
 } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.cc b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
new file mode 100644
index 000000000..e2b3f6111
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OddOutputPass.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Permute.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void OddOutputPass::run()
+{
+  auto &outputs = _graph.getOutputs();
+
+  VERBOSE(OddOutputPass) << "Case 1 : An operand which is a model output and a model input"
+                         << std::endl;
+  for (const auto &ind : outputs)
+  {
+    if (_graph.getInputs().contains(ind))
+    {
+      auto permute_output_ind = insertPermute(ind);
+      // Update the output to be newly added operand
+      _graph.getOutputs().replace(ind, permute_output_ind);
+    }
+  }
+
+  VERBOSE(OddOutputPass) << "Case 2 : Two or more duplicated outputs" << std::endl;
+  std::unordered_set<ir::OperandIndex> occurence;
+  for (auto &&ind : outputs)
+  {
+    auto &obj = _graph.operands().at(ind);
+    if (occurence.count(ind) == 0)
+    {
+      occurence.insert(ind);
+      continue;
+    }
+
+    // Panic when it is const, it must have been handled earlier in another pass
+    UNUSED_RELEASE(obj);
+    assert(!obj.isConstant());
+
+    auto permute_output_ind = insertPermute(ind);
+    ind = permute_output_ind; // Replace output index to fix output duplication
+  }
+}
+
+ir::OperandIndex OddOutputPass::insertPermute(ir::OperandIndex ind)
+{
+  auto &obj = _graph.operands().at(ind);
+  auto output_ind = _graph.addOperand(obj.shape(), obj.typeInfo());
+  auto &output_obj = _graph.operands().at(output_ind);
+
+  using ir::operation::Permute;
+  auto permute_obj = std::make_unique<Permute>(ind, output_ind, Permute::Type::COPY);
+  auto permute_ind = _graph.operations().push(std::move(permute_obj));
+
+  output_obj.setDef(permute_ind);
+  obj.insertUse(permute_ind);
+
+  VERBOSE(OddOutputPass) << "Permute Op inserted for a constant output, node index : "
+                         << permute_ind << std::endl;
+  VERBOSE(OddOutputPass) << "  - Input (original) Operand : " << ind << std::endl;
+  VERBOSE(OddOutputPass) << "  - Output(inserted) Operand : " << output_ind << std::endl;
+
+  return output_ind;
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.h b/runtime/onert/core/src/compiler/pass/OddOutputPass.h
new file mode 100644
index 000000000..2accbac60
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
+#define __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
+
+#include <unordered_set>
+
+#include "Pass.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Pass to specially handle odd outputs in a subgraph
+ *
+ * Runtime Graph IR requires every input or output must have distinct tensor index, this is onert's
+ * restriction. However we allow duplication of indices in the models(or API). So we should
+ * transform the graph after model-loading.
+ *
+ * This is necessary since our API lets users to set different buffers for each input and output so
+ * it is unavoidable that we must copy the value at runtime.
+ *
+ * Note that this is a mandatory pass for Graph.
+ *
+ * Case 1 : An operand which is a model output and a model input
+ *
+ * Create an operand and insert a Permute(copy) op between them. And change the output to be the
+ * newly generated operand.
+ *
+ * e.g.)
+ *
+ * ```
+ * ((#0 Input0 and also Output0))
+ * becomes
+ * ((#0 Input0)) -> [#0 Permute] -> ((#1 Output0))
+ * ```
+ *
+ * Case 2 : Two or more duplicated outputs
+ *
+ * Do the same with Case 1, but between two outputs of the same tensor index.
+ *
+ * e.g.)
+ *
+ * ```
+ * ((#0 Input0)) -> [#0 Some Operation] -> ((#1 Output0 and also Output1))
+ * becomes
+ * ((#0 Input0)) -> [#0 Some Operation] -> ((#1 Output0)) [#1 Permute] -> ((#2 Output1))
+ * ```
+ *
+ */
+class OddOutputPass : public Pass
+{
+public:
+  using Pass::Pass;
+
+public:
+  std::string id() final { return "OddOutputPass"; }
+
+public:
+  void run() override;
+
+private:
+  ir::OperandIndex insertPermute(ir::OperandIndex input);
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc
index 50c001c30..db8ebedcd 100644
--- a/runtime/onert/core/src/compiler/pass/OperandPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc
@@ -28,7 +28,7 @@ namespace pass
 void OperandPass::run()
 {
   _graph.operands().iterate(
-      [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
+    [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
 }
 
 } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc
index d7a55cb22..bd9bcb4a4 100644
--- a/runtime/onert/core/src/compiler/pass/OperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc
@@ -17,7 +17,7 @@
 #include "OperationPass.h"
 
 #include "ir/Index.h"
-#include "ir/Operation.h"
+#include "ir/IOperation.h"
 #include "ir/Graph.h"
 
 namespace onert
@@ -30,7 +30,7 @@ namespace pass
 void OperationPass::run()
 {
   _graph.operations().iterate(
-      [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
+    [&](const ir::OperationIndex &index, ir::IOperation &node) { callback(index, node); });
 }
 
 } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h
index ac4d818a2..0a00b11d1 100644
--- a/runtime/onert/core/src/compiler/pass/OperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.h
@@ -29,7 +29,7 @@ namespace onert
 {
 namespace ir
 {
-class Operation;
+struct IOperation;
 } // namespace ir
 } // namespace onert
 
@@ -62,7 +62,7 @@ public:
    * @param index is the index of a node in graph
    * @param node is the node in graph
    */
-  virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0;
+  virtual void callback(const ir::OperationIndex &index, ir::IOperation &node) = 0;
 
   /**
    * @brief Run the pass
diff --git a/runtime/onert/core/src/compiler/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h
index 3f356c337..b34695c97 100644
--- a/runtime/onert/core/src/compiler/pass/Pass.h
+++ b/runtime/onert/core/src/compiler/pass/Pass.h
@@ -17,6 +17,8 @@
 #ifndef __ONERT_COMPILER_PASS_PASS_H__
 #define __ONERT_COMPILER_PASS_PASS_H__
 
+#include "IPass.h"
+
 #include <string>
 
 namespace onert
@@ -24,7 +26,7 @@ namespace onert
 namespace ir
 {
 class Graph;
-} // namespace compiler
+} // namespace ir
 } // namespace onert
 
 namespace onert
@@ -34,7 +36,7 @@ namespace compiler
 namespace pass
 {
 
-class Pass
+class Pass : public IPass
 {
 public:
   Pass(ir::Graph &graph) : _graph{graph} {}
diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.cc b/runtime/onert/core/src/compiler/pass/PassRunner.cc
new file mode 100644
index 000000000..cd1b82bb2
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PassRunner.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PassRunner.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+PassRunner &PassRunner::append(std::unique_ptr<IPass> pass)
+{
+  _passes.emplace_back(std::move(pass));
+  return *this;
+}
+
+void PassRunner::run()
+{
+  for (auto &&pass : _passes)
+  {
+    VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl;
+    pass->run();
+    VERBOSE(PassRunner) << "Finished running '" << pass->id() << "'" << std::endl;
+    // TODO Dump graph?
+  }
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.h b/runtime/onert/core/src/compiler/pass/PassRunner.h
new file mode 100644
index 000000000..03bfbe220
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PassRunner.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_PASS_RUNNER_H__
+#define __ONERT_COMPILER_PASS_PASS_RUNNER_H__
+
+#include <initializer_list>
+#include <memory>
+#include <vector>
+
+#include "IPass.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Composite passes with logging
+ */
+class PassRunner
+{
+public:
+  PassRunner() = default;
+  PassRunner &append(std::unique_ptr<IPass> pass);
+
+  void run();
+
+private:
+  std::vector<std::unique_ptr<IPass>> _passes;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_PASS_RUNNER_H__
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index f01697034..d9452c7f9 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -15,8 +15,8 @@
  */
 
 #include "PermutationEliminationPass.h"
-#include "backend/controlflow/Config.h"
 
+#include "backend/Backend.h"
 #include "util/logging.h"
 
 namespace onert
@@ -26,7 +26,7 @@ namespace compiler
 namespace pass
 {
 
-void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node)
+void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::IOperation &node)
 {
   _op_ind = ind;
   node.accept(*this);
@@ -39,8 +39,9 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
 
   // Check if two tensors are both portable if not, we can't eliminate the node
   {
-    auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
-    auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
+    auto &operand_li_map = _lowered_graph.lower_info().operand;
+    auto in_def_factor = operand_li_map.getRawPtr(in_operand)->def_factors().getOnlyElement();
+    auto out_def_factor = operand_li_map.getRawPtr(out_operand)->def_factors().getOnlyElement();
 
     auto in_config = in_def_factor.backend()->config();
     auto out_config = out_def_factor.backend()->config();
@@ -53,59 +54,50 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
 
   if (_graph.getOutputs().contains(out_operand))
   {
+    // If the input is a const, we cannot remove it since we cannot put the constant data in the
+    // output buffer during prepare phase.
+    auto permute_input = node.getInputs().at(0);
+    if (_graph.operands().at(permute_input).isConstant())
+      return;
+    // If the input is a model input, we cannot remove it since our API lets users to set different
+    // buffers for inputs and outputs even though one tensor is both at the same time.
+    auto permute_output = node.getOutputs().at(0);
+    if (_graph.getInputs().contains(permute_input) && _graph.getOutputs().contains(permute_output))
+      return;
+    // Likewise, if copying between outputs to outputs, keep it.
+    if (_graph.getOutputs().contains(permute_input) && _graph.getOutputs().contains(permute_output))
+      return;
+
     // Exceptional case : When the output operand is a model output
     // In this case we keep the output and remove the input
 
     auto &out_operand_obj = _graph.operands().at(out_operand);
     assert(out_operand_obj.getDef() == _op_ind);
     out_operand_obj.unsetDef();
-    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-      if (!op_seq.getOutputs().contains(in_operand))
+    _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) {
+      if (!op.getOutputs().contains(in_operand))
         return;
-
-      // Update OpSequence/ir::Operation edges and ir::Operand edges
-      op_seq.replaceOutputs(in_operand, out_operand);
-      for (auto op : op_seq.operations())
-      {
-        auto &operation_obj = _graph.operations().at(op);
-        if (operation_obj.getOutputs().contains(in_operand))
-        {
-          operation_obj.replaceOutputs(in_operand, out_operand);
-          out_operand_obj.setDef(op);
-        }
-      }
+      // Update Operation and Operand edges
+      op.replaceOutputs(in_operand, out_operand);
+      out_operand_obj.setDef(op_ind);
     });
 
-    // Remove Permute operation, enclosing OpSequence and the operand
+    // Remove Permute operation and the operand
     {
       _graph.removeOperand(in_operand);
-
-      auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
-      // Assumes enclosing OpSequence contatins just this Permute operation
-      assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
-      _lowered_graph.op_seqs().remove(op_seq_ind);
       _graph.operations().remove(_op_ind);
     }
 
-    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-      if (!op_seq.getInputs().contains(in_operand))
+    _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) {
+      if (!op.getInputs().contains(in_operand))
         return;
-
-      op_seq.replaceInputs(in_operand, out_operand);
-      for (auto op : op_seq.operations())
-      {
-        auto &operation_obj = _graph.operations().at(op);
-        if (operation_obj.getInputs().contains(in_operand))
-        {
-          operation_obj.replaceInputs(in_operand, out_operand);
-          out_operand_obj.insertUse(op);
-        }
-      }
+      op.replaceInputs(in_operand, out_operand);
+      out_operand_obj.insertUse(op_ind);
     });
 
     VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
-    VERBOSE(removePermute) << "  - Input (removed) ir::Operand : " << in_operand << std::endl;
-    VERBOSE(removePermute) << "  - Output(kept)    ir::Operand : " << out_operand << std::endl;
+    VERBOSE(removePermute) << "  - Input (removed) Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(kept)    Operand : " << out_operand << std::endl;
   }
   else
   {
@@ -114,37 +106,23 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
     auto &in_operand_obj = _graph.operands().at(in_operand);
     in_operand_obj.removeUse(_op_ind);
 
-    // Make OpSequences(that use the output) use the input
-    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
-      if (!op_seq.getInputs().contains(out_operand))
+    // Make operations(that use the output) use the input
+    _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) {
+      if (!op.getInputs().contains(out_operand))
         return;
-
-      op_seq.replaceInputs(out_operand, in_operand);
-      for (auto op : op_seq.operations())
-      {
-        auto &operation_obj = _graph.operations().at(op);
-        if (operation_obj.getInputs().contains(out_operand))
-        {
-          operation_obj.replaceInputs(out_operand, in_operand);
-          in_operand_obj.insertUse(op);
-        }
-      }
+      op.replaceInputs(out_operand, in_operand);
+      in_operand_obj.insertUse(op_ind);
     });
 
-    // Remove Permute operation, enclosing OpSequence and the operand
+    // Remove the Permute operation and out_operand
     {
       _graph.removeOperand(out_operand);
-
-      auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
-      // Assumes enclosing OpSequence contatins just this Permute operation
-      assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
-      _lowered_graph.op_seqs().remove(op_seq_ind);
       _graph.operations().remove(_op_ind);
     }
 
-    VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
-    VERBOSE(removePermute) << "  - Input (kept)    ir::Operand : " << in_operand << std::endl;
-    VERBOSE(removePermute) << "  - Output(removed) ir::Operand : " << out_operand << std::endl;
+    VERBOSE(removePermute) << "Permute Op removed : " << _op_ind << std::endl;
+    VERBOSE(removePermute) << "  - Input (kept)    Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(removed) Operand : " << out_operand << std::endl;
   }
 }
 
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
index 29daf1a82..18ba99804 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
@@ -35,7 +35,7 @@ namespace pass
  * are compatible and layouts match.
  *
  * Permute input tensor is kept and the output is removed for all the cases, except model outputs.
- * As all output tensors have to be controlflow backend, so the output is kept.
+ * As all output tensors have to be builtin backend, so the output is kept.
  *
  * @note This is an optimization pass which means that everything should work fine even if this pass
  *       was skipped.
@@ -49,7 +49,7 @@ public:
   std::string id() final { return "PermutationEliminationPass"; }
 
 public:
-  void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+  void callback(const ir::OperationIndex &i, ir::IOperation &n) final;
 
 private:
   void visit(const ir::operation::Permute &) final;
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index c83a72ada..11c22778e 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -9,6 +9,7 @@
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
+
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
@@ -16,18 +17,16 @@
 
 #include "PermutationInsertionPass.h"
 
-#include <cassert>
-#include <utility>
-#include <unordered_map>
+#include "../../backend/builtin/Config.h"
 
-#include "backend/controlflow/Config.h"
-#include "ir/Operand.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
+#include "compiler/OperationLowerInfo.h"
+#include "ir/operation/Permute.h"
 #include "util/logging.h"
+
+#include <cassert>
 #include <memory>
-#include "ir/operation/Permute.h"
+#include <unordered_map>
+#include <utility>
 
 namespace onert
 {
@@ -38,7 +37,8 @@ namespace pass
 
 void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
 {
-  auto &&operand_li = _lowered_graph.getLowerInfo(index);
+  auto &operand_li_map = _lowered_graph.lower_info().operand;
+  auto &&operand_li = operand_li_map.getRawPtr(index);
   assert(operand_li);
 
   // NOTE Later, constants also will have Def
@@ -51,16 +51,16 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
   std::list<ir::OperationIndex> permute_indexes;
 
   // Build a map for all necessary type of operands
-  std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
+  std::unordered_map<PermuteFactor, ir::OperandIndex> factor_to_index;
   {
     assert(operand_li->def_factors().size() == 1);
-    for (auto factor : operand_li->def_factors())
+    for (auto &&factor : operand_li->def_factors())
     {
       factor_to_index.emplace(factor, index);
     }
 
     auto insert_set = operand_li->use_factors() - operand_li->def_factors();
-    for (auto factor : insert_set)
+    for (auto &&factor : insert_set)
     {
       const auto permute_operation_index = insertPermute(index, factor);
       permute_indexes.push_back(permute_operation_index);
@@ -75,33 +75,22 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
     std::list<ir::OperationIndex> remove_list;
 
     auto uses = object.getUses();
-    for (auto use : uses)
+    for (auto &&use : uses)
     {
       // If permute operation, ignore it
       if (std::find(permute_indexes.begin(), permute_indexes.end(), use) != permute_indexes.end())
         continue;
 
       auto &operation = _graph.operations().at(use);
-      assert(_lowered_graph.op_seqs().containsOperation(use));
-      auto op_seq_index = _lowered_graph.op_seqs().getOperation(use);
-      auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
-      assert(op_seq_li);
-      const auto op_seq_layout = op_seq_li->layout();
-      const backend::Backend *backend = op_seq_li->backend();
+      auto op_li = _lowered_graph.lower_info().operation.getRawPtr(use);
+      assert(op_li);
+      const auto op_layout = op_li->layout();
+      const backend::Backend *backend = op_li->backend();
       assert(backend);
-      auto use_node_inputs = operation.getInputs();
-      assert(use_node_inputs.contains(index));
 
-      auto new_index = factor_to_index.at({backend, op_seq_layout});
+      auto new_index = factor_to_index.at({backend, op_layout});
       if (index != new_index)
       {
-        // Update from op_seq
-        // Replace the same inputs of an OpSequence at once for the following reasons:
-        // 1. An OpSequence's inputs are the same inputs of first operation
-        // 2. An OpSequence may have inputs as the same operand (2 or more).
-        // 3. The same inputs of OpSequence have the same PermuteFactor.
-        _lowered_graph.op_seqs().at(op_seq_index).replaceInputs(index, new_index);
-
         // Update from operation
         // Replace the same inputs of an operation at once for the following reasons:
         // No. 2 and 3 above
@@ -109,63 +98,69 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
 
         // Update from operand
         remove_list.push_back(
-            use); // Removal should be done in another loop since we are in the loop
+          use); // Removal should be done in another loop since we are in the loop
         _graph.operands().at(new_index).insertUse(use);
       }
     }
 
-    for (auto &operation : remove_list)
+    for (const auto &operation_index : remove_list)
     {
-      object.removeUse(operation);
+      object.removeUse(operation_index);
     }
   }
 }
 
 ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
-                                                           const ir::operand::PermuteFactor &factor)
+                                                           const PermuteFactor &factor)
 {
-  assert(!_graph.isBuildingPhase());
-
   auto &operand = _graph.operands().at(operand_index);
 
   // Generate output operand and permute operation
   auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
-  // change model output if operand_index is model output index
+  // change model output if operand_index is model output index and the out operand is builtin
+  // backend
   auto &model_outputs = _graph.getOutputs();
-  if (model_outputs.contains(operand_index))
+  const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin();
+  assert(builtin_backend->config()->id() == onert::backend::builtin::Config::ID);
+
+  if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend)
   {
     model_outputs.replace(operand_index, out_operand_index);
   }
 
+  auto &operand_li_map = _lowered_graph.lower_info().operand;
+
   // Find Permute information
-  auto input_factor = _lowered_graph.getLowerInfo(operand_index)->def_factors().getOnlyElement();
+  auto input_factor = operand_li_map.getRawPtr(operand_index)->def_factors().getOnlyElement();
   auto input_backend = input_factor.backend();
   auto output_backend = factor.backend();
   // NOTE Permute may not have specific layout because the layout of input and output may be
   // different.
   const auto permute_node_layout = ir::Layout::UNKNOWN;
   // NOTE If one backend supports several layout, the backend must support Permute operation
-  const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
+  const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin();
+  assert(permute_node_backend->config()->id() == onert::backend::builtin::Config::ID);
+
   if (input_backend == output_backend)
   {
     permute_node_backend = input_backend;
   }
-  const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+  const PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
 
   // Update LowerInfo of input operand
-  auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
+  auto operand_lower_info = operand_li_map.getRawPtr(operand_index);
   operand_lower_info->removeUsePermuteFactor(factor);
   operand_lower_info->addUsePermuteFactor(permute_node_factor);
 
   // Update LowerInfo of output operand
-  auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
+  auto out_operand_li = std::make_unique<compiler::OperandLowerInfo>();
 
   // The input and output factors of all nodes will be the same except Permute. So Tensor's
   // allocators allocates memory using only the information of def permutation factor now.
   // TODO Change param to permute_node_factor
   out_operand_li->addDefPermuteFactor(factor);
   out_operand_li->addUsePermuteFactor(factor);
-  _lowered_graph.setLowerInfo(out_operand_index, std::move(out_operand_li));
+  operand_li_map.set(out_operand_index, std::move(out_operand_li));
 
   // Insert permute operation to the graph
   const auto input_layout = input_factor.layout();
@@ -188,20 +183,18 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
   auto insert_node = std::make_unique<Permute>(operand_index, out_operand_index, permute_type);
 
   auto node_index = _graph.operations().push(std::move(insert_node));
-  const auto &node = _graph.operations().at(node_index);
 
   VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl;
-  VERBOSE_F() << "  - Input (original) Operand : " << operand_index << std::endl;
-  VERBOSE_F() << "  - Output(inserted) Operand : " << out_operand_index << std::endl;
+  VERBOSE_F() << "  - Input (original) Operand : " << operand_index << "("
+              << input_factor.backend()->config()->id() << ")" << std::endl;
+  VERBOSE_F() << "  - Output(inserted) Operand : " << out_operand_index << "("
+              << factor.backend()->config()->id() << ")" << std::endl;
 
-  // OpSequence
+  // Operation LowerInfo
   {
-    auto op_seq_index = _lowered_graph.op_seqs().emplace(node_index, permute_node_layout);
-    auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
-    op_seq.setInputs(node.getInputs());
-    op_seq.setOutputs(node.getOutputs());
-    _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
-                                                  permute_node_backend, permute_node_layout));
+    auto &operation_li_map = _lowered_graph.lower_info().operation;
+    operation_li_map.set(node_index, std::make_unique<compiler::OperationLowerInfo>(
+                                       permute_node_backend, permute_node_layout));
   }
 
   // Update Use/Def info
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
index 758515385..ee0a1464c 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
@@ -20,7 +20,7 @@
 #include "LoweredOperandPass.h"
 #include "compiler/BackendManager.h"
 #include "ir/Operand.h"
-#include "ir/operand/PermuteFactor.h"
+#include "compiler/PermuteFactor.h"
 
 namespace onert
 {
@@ -48,7 +48,7 @@ private:
    * @return ir::OperationIndex
    */
   ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
-                                   const ir::operand::PermuteFactor &factor);
+                                   const PermuteFactor &factor);
 };
 
 } // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
index c5c95c726..f014d29d3 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
@@ -30,10 +30,10 @@ namespace pass
 
 using namespace ir;
 
-void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
+void PermutationOperationPass::callback(const OperationIndex &, IOperation &node)
 {
   node.accept(*this);
-};
+}
 
 // TODO Remove this. Expanding ranks of Operand is dangerous
 void PermutationOperationPass::applyExpandRanks(const Operation &node)
@@ -43,9 +43,8 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node)
 
   assert(output.getDef().valid());
   const auto node_index = output.getDef();
-  const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
-  const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
-  const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+  const auto frontend_layout = _graph.layout();
+  const auto backend_layout = _lowered_graph.lower_info().operation.getRawPtr(node_index)->layout();
 
   if (frontend_layout == backend_layout)
   {
@@ -84,10 +83,11 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
 
   assert(output_obj.getDef().valid());
   const auto node_index = output_obj.getDef();
-  const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
 
-  const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
-  const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+  auto &operation_li_map = _lowered_graph.lower_info().operation;
+  auto &operand_li_map = _lowered_graph.lower_info().operand;
+  const auto frontend_layout = _graph.layout();
+  const auto backend_layout = operation_li_map.getRawPtr(node_index)->layout();
 
   if (frontend_layout == backend_layout)
   {
@@ -97,96 +97,27 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
   // Permutation changing layout beyond 4-D is not supported yet
   assert(output_obj.shape().rank() <= 4);
 
-  // Divide op_seq based on target operation
-  {
-    auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
-    auto &operations = _lowered_graph.graph().operations();
-
-    // Create new op_seq and move information from existing op_seq to new op_seq if target
-    // node is the end of op_seq
-    auto it = prev_op_seq.begin();
-    // Find iterator of target node in op_seq
-    while (*(it++) != node_index)
-      ;
-    if (it != prev_op_seq.end())
-    {
-      const auto &target_op_idx = *it;
-      const auto &target_node = operations.at(target_op_idx);
-      const auto &next_op_seq_index =
-          _lowered_graph.op_seqs().emplace(target_op_idx, prev_op_seq.getLayout());
-      auto &next_op_seq = _lowered_graph.op_seqs().at(next_op_seq_index);
-      next_op_seq.setInputs(target_node.getInputs());
-      next_op_seq.setOutputs(target_node.getOutputs());
-
-      std::vector<OperationIndex> remove_list;
-      remove_list.emplace_back(target_op_idx);
-      while (++it != prev_op_seq.end())
-      {
-        next_op_seq.appendOperation(target_op_idx);
-        next_op_seq.setOutputs(target_node.getOutputs());
-        remove_list.emplace_back(target_op_idx);
-      }
-
-      prev_op_seq.setOutputs(node.getOutputs());
-      for (const auto &index : remove_list)
-      {
-        prev_op_seq.remove(index);
-      }
-
-      const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
-      _lowered_graph.setLowerInfo(
-          next_op_seq_index,
-          std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
-    }
-  }
-
-  // Remove target operation from op_seq and insert the target operation to new op_seq
+  // Change PermuteFactors of operands and the operation of target node
   {
-    const auto backend = _lowered_graph.getLowerInfo(op_seq_index)->backend();
+    const auto op_li = operation_li_map.getRawPtr(node_index);
+    const auto backend = op_li->backend();
 
-    // Remove target operation from op_sequence
-    _lowered_graph.op_seqs().removeFromOpSequence(node_index);
+    operation_li_map.set(node_index,
+                         std::make_unique<compiler::OperationLowerInfo>(backend, frontend_layout));
 
-    if (!_lowered_graph.op_seqs().exist(op_seq_index))
-    {
-      // Remove lowerinfo for op_seq of target operation if the op_seq does not exist
-      _lowered_graph.removeLowerInfo(op_seq_index);
-    }
-    else
-    {
-      // Update op_seq of target operation if the op_seq exists
-      auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
-      const auto &last_node_idx = *(--prev_op_seq.end());
-      const auto &last_node = _lowered_graph.graph().operations().at(last_node_idx);
-      prev_op_seq.setOutputs(last_node.getOutputs());
-    }
-
-    // Create new op_seq and set information to the op_seq
-    auto new_op_seq_index = _lowered_graph.op_seqs().emplace(node_index, frontend_layout);
-    auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
-    new_op_seq.setInputs(node.getInputs());
-    new_op_seq.setOutputs(node.getOutputs());
-    _lowered_graph.setLowerInfo(
-        new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
-  }
-
-  // Change PermuteFactors of operands of target node
-  {
-    const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
-    const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
-    const auto backend = op_seq_li->backend();
-    const operand::PermuteFactor removed_factor{backend, backend_layout};
-    const operand::PermuteFactor new_factor{backend, frontend_layout};
+    const PermuteFactor removed_factor{backend, backend_layout};
+    const PermuteFactor new_factor{backend, frontend_layout};
     for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
     {
+      // Check if it can be removed by checking if the operand is used by another operation and
+      // it uses the same backend and layout
       bool canRemove = true;
       for (const auto &use : _graph.operands().at(input).getUses())
       {
         if (use != node_index)
         {
-          const auto &use_op_seq_index = _lowered_graph.op_seqs().getOperation(use);
-          auto use_op_seq_li = _lowered_graph.getLowerInfo(use_op_seq_index);
-          if (use_op_seq_li->backend() == backend && use_op_seq_li->layout() == backend_layout)
+          auto use_op_li = operation_li_map.getRawPtr(use);
+          if (use_op_li->backend() == backend && use_op_li->layout() == backend_layout)
           {
             canRemove = false;
             break;
@@ -194,27 +125,27 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
         }
       }
 
-      auto lower_info = _lowered_graph.getLowerInfo(input);
+      auto input_li = operand_li_map.getRawPtr(input);
       if (canRemove)
       {
-        lower_info->removeUsePermuteFactor(removed_factor);
+        input_li->removeUsePermuteFactor(removed_factor);
       }
-      lower_info->addUsePermuteFactor(new_factor);
+      input_li->addUsePermuteFactor(new_factor);
 
       // Whether if node's input is an input of model or a constant
       if (!_graph.operands().at(input).getDef().valid() &&
-          (lower_info->def_factors().size() == 1 &&
-           lower_info->def_factors().getOnlyElement() == removed_factor))
+          (input_li->def_factors().size() == 1 &&
+           input_li->def_factors().getOnlyElement() == removed_factor))
       {
         assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant());
-        lower_info->removeDefPermuteFactor(removed_factor);
-        lower_info->addDefPermuteFactor(new_factor);
+        input_li->removeDefPermuteFactor(removed_factor);
+        input_li->addDefPermuteFactor(new_factor);
       }
     }
 
-    for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
+    for (const auto &output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
     {
-      auto lower_info = _lowered_graph.getLowerInfo(output);
+      auto lower_info = operand_li_map.getRawPtr(output);
       lower_info->removeDefPermuteFactor(removed_factor);
       lower_info->addDefPermuteFactor(new_factor);
 
@@ -279,6 +210,18 @@ void PermutationOperationPass::visit(const ir::operation::Gather &node)
   }
 }
 
+void PermutationOperationPass::visit(const ir::operation::OneHot &node)
+{
+  const auto &output_ind = node.getOutputs().at(0);
+  const auto &output_obj = _graph.operands().at(output_ind);
+  const auto &output_shape = output_obj.shape();
+
+  if (output_shape.rank() >= 4)
+  {
+    changeToKeepLayout(node);
+  }
+}
+
 void PermutationOperationPass::visit(const ir::operation::Pack &node)
 {
   const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
index 2dd76b971..e253a77ad 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
@@ -36,7 +36,7 @@ public:
   std::string id() final { return "PermutationOperationPass"; }
 
 public:
-  void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+  void callback(const ir::OperationIndex &i, ir::IOperation &n) final;
 
 public:
   void visit(const ir::operation::BinaryArithmetic &) final;
@@ -44,6 +44,7 @@ public:
   void visit(const ir::operation::Concat &) final;
   void visit(const ir::operation::ElementwiseBinary &) final;
   void visit(const ir::operation::ElementwiseUnary &) final;
+  void visit(const ir::operation::OneHot &) final;
   void visit(const ir::operation::Pack &) final;
   void visit(const ir::operation::PReLU &) final;
   void visit(const ir::operation::SquaredDifference &) final;
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc
new file mode 100644
index 000000000..162c4e7ef
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pass.h"
+
+#include "UnusedOperandEliminationPass.h"
+#include "ir/Index.h"
+#include "util/Set.h"
+#include "ir/Graph.h"
+
+/**
+ * @file  UnusedOperandEliminationPass.cc
+ * @brief This file contains UnusedOperandEliminationPass class implementation
+ */
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void UnusedOperandEliminationPass::run()
+{
+  util::Set<ir::OperandIndex> used;
+
+  _graph.operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &node) {
+    for (auto &&ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED)
+    {
+      used.add(ind);
+    }
+  });
+
+  // Graph's inputs/outputs are always considered as used
+  for (auto &&ind : (_graph.getInputs() + _graph.getOutputs()) | ir::Remove::UNDEFINED)
+  {
+    used.add(ind);
+  }
+
+  _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+    if (!used.contains(ind))
+    {
+      VERBOSE() << "Remove unused operand " << ind << std::endl;
+      _graph.operands().remove(ind);
+    }
+  });
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h
new file mode 100644
index 000000000..8078f4246
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file  UnusedOperandEliminationPass.h
+ * @brief This file contains UnusedOperandEliminationPass class
+ */
+
+#ifndef __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
+
+#include "Pass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief  A pass to eliminate unused operands from the graph
+ *
+ * Remove operands that are not used by any operations, except Graph inputs/outputs.
+ *
+ */
+class UnusedOperandEliminationPass : public Pass
+{
+public:
+  using Pass::Pass;
+
+public:
+  std::string id() override { return "UnusedOperandEliminationPass"; }
+  void run() final;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
new file mode 100644
index 000000000..572b4df24
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnusedOperandEliminationPass.h"
+
+#include "ir/Graph.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+using namespace onert::compiler::pass;
+
+TEST(UnusedOperandEliminationPass, Simple)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  auto unused = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+
+  UnusedOperandEliminationPass{graph}.run();
+
+  ASSERT_TRUE(graph.operands().exist(in));
+  ASSERT_TRUE(graph.operands().exist(out));
+  ASSERT_FALSE(graph.operands().exist(unused));
+}
diff --git a/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc b/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc
new file mode 100644
index 000000000..490c648cd
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/train/LoweredTrainableGraph.h"
+
+#include "../ManualScheduler.h"
+#include "../pass/ConstantInsertionPass.h"
+#include "../pass/ConstantLoweringPass.h"
+#include "../pass/PassRunner.h"
+#include "../pass/PermutationEliminationPass.h"
+#include "../pass/PermutationInsertionPass.h"
+#include "../pass/PermutationOperationPass.h"
+#include "../../backend/builtin/Config.h"
+#include "../../dumper/text/GraphDumper.h"
+#include "../../ir/verifier/Verifier.h"
+#include "TrainableOperationConverter.h"
+
+#include "backend/Backend.h"
+#include "backend/train/ITrainableBackend.h"
+#include "compiler/BackendResolver.h"
+#include "util/logging.h"
+
+#include <cassert>
+#include <sstream>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+LoweredTrainableGraph::LoweredTrainableGraph(ir::train::TrainableGraph &graph,
+                                             const CompilerOptions &options)
+  : _trainable_graph{graph}
+{
+  lowerGraph(options);
+}
+
+void LoweredTrainableGraph::lowerGraph(const CompilerOptions &options)
+{
+  // Build backend contexts
+  auto &backend_manager = BackendManager::get();
+  // Create contexts for other backends
+  for (auto &&backend_str : options.backend_list)
+  {
+    backend_manager.loadBackend(backend_str);
+    auto backend = backend_manager.get(backend_str);
+
+    // TODO As the default value of backend list contains "cpu", "acl_cl" and "acl_neon", and some
+    // are not available on x64 or some other platforms. So this may be a workaround for x64 and
+    // we should change it back(throw if backend is not loaded) later.
+    if (!backend)
+    {
+      VERBOSE(LoweredTrainableGraph) << "Cannot load backend - " << backend_str << std::endl;
+      continue;
+    }
+  }
+  if (backend_manager.num_backends() == 0)
+    throw std::runtime_error{"No available backends loaded."};
+
+  // TODO Move "schedule" phase out of here
+  // TODO Scheduling
+  std::unique_ptr<BackendResolver> backend_resolver;
+  auto all_backends = backend_manager.getAll();
+
+  auto scheduler = ManualScheduler(all_backends, options);
+  backend_resolver = scheduler.schedule(_trainable_graph.graph());
+
+  // Check if backends are trainable
+  _trainable_graph.operations().iterate(
+    [&](const ir::OperationIndex &op_ind, const ir::IOperation &) {
+      const auto backend = backend_resolver->getBackend(op_ind);
+
+      // TODO Remove dynamic_cast
+      if (dynamic_cast<const backend::train::ITrainableBackend *>(backend) == nullptr)
+      {
+        throw std::runtime_error(backend->config()->id() + "backend does not support training");
+      }
+    });
+
+  makeLowerInfo(*backend_resolver);
+  VERBOSE(LoweredTrainableGraph) << "dump before mandatory passes" << std::endl;
+  dumper::text::dumpLoweredGraph(*this);
+
+  // Mandatory passes - kind of legalization(?)
+  compiler::pass::PassRunner{}
+    .append(std::make_unique<compiler::pass::ConstantInsertionPass>(*this))
+    .append(std::make_unique<compiler::pass::ConstantLoweringPass>(*this))
+    .append(std::make_unique<compiler::pass::PermutationOperationPass>(*this))
+    .append(std::make_unique<compiler::pass::PermutationInsertionPass>(*this))
+    .run();
+
+  // TODO Move converting Permute op into PermutationInsertionPass
+  auto op_converter = TrainableOperationConverter{_trainable_graph, nullptr};
+  _trainable_graph.operations().iterate(
+    [&](const onert::ir::OperationIndex &index, const onert::ir::IOperation &op) {
+      if (op.opcode() == ir::OpCode::Permute)
+      {
+        auto trainable_op = op_converter(op);
+        auto gen_index = _trainable_graph.replaceOperation(index, std::move(trainable_op));
+        UNUSED_RELEASE(gen_index);
+        assert(gen_index == index);
+      }
+    });
+
+  dumpLowerInfo();
+
+  // Optimization passes (optional)
+  compiler::pass::PassRunner{}
+    .append(std::make_unique<compiler::pass::PermutationEliminationPass>(*this))
+    .run();
+
+  // TODO Update LowerInfo for training
+
+  VERBOSE(LoweredTrainableGraph) << "Dump after all the passes" << std::endl;
+  for (auto &&operand : _trainable_graph.getInputs())
+    VERBOSE(LoweredTrainableGraph) << "Graph Input : " << operand << std::endl;
+  for (auto &&operand : _trainable_graph.getOutputs())
+    VERBOSE(LoweredTrainableGraph) << "Graph Output : " << operand << std::endl;
+  dumper::text::dumpLoweredGraph(*this);
+
+  // Graph verifications
+  {
+    assert(ir::verifier::InputOutputChecker().verify(_trainable_graph.graph()));
+    assert(ir::verifier::DAGChecker().verify(_trainable_graph.graph()));
+    assert(ir::verifier::EdgeChecker().verify(_trainable_graph.graph()));
+  }
+}
+
+void LoweredTrainableGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver)
+{
+  _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+    lower_info().operand.set(index, std::make_unique<OperandLowerInfo>());
+  });
+
+  // Set operand lower info using assigned backends to operations
+  _trainable_graph.operations().iterate(
+    [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+      auto backend = backend_resolver.getBackend(op_ind);
+      if (!backend)
+      {
+        throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"};
+      }
+
+      auto frontend_layout = _trainable_graph.layout();
+
+      // The layout of each backend should be set at another place
+      // TODO Change setting layout of each backend at another place
+      auto backend_layout = backend->config()->supportLayout(op, frontend_layout);
+
+      for (auto &&ind : op.getInputs() | ir::Remove::UNDEFINED)
+      {
+        auto &operand_li = lower_info().operand.at(ind);
+        operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout});
+      }
+      for (auto &&ind : op.getOutputs() | ir::Remove::UNDEFINED)
+      {
+        auto &operand_li = lower_info().operand.at(ind);
+        operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout});
+      }
+      lower_info().operation.set(
+        op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout));
+    });
+
+  // Handle graph inputs and outputs
+  const auto builtin_backend = BackendManager::get().getBuiltin();
+  auto factor = PermuteFactor{builtin_backend, _trainable_graph.layout()};
+  for (auto &&index : _trainable_graph.getInputs() | ir::Remove::UNDEFINED)
+  {
+    auto &operand_li = lower_info().operand.at(index);
+    assert(operand_li.def_factors().empty());
+    operand_li.addDefPermuteFactor(factor);
+  }
+  for (auto &&index : _trainable_graph.getOutputs() | ir::Remove::UNDEFINED)
+  {
+    auto &operand_li = lower_info().operand.at(index);
+    operand_li.addUsePermuteFactor(factor);
+  }
+
+  // Handle variable tensors
+  _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) {
+    // Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs
+    // and not undefined operand - these are variable tensors. For example,
+    // UnidirectionalSequenceLSTM has such inputs.
+    if (operand.info().isVariable())
+    {
+      // The variable operand with buffer is not supported yet
+      assert(operand.data() == nullptr);
+      assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+      auto operand_li = lower_info().operand.at(index);
+      assert(operand_li.def_factors().empty());
+      operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement());
+    }
+  });
+}
+
+void LoweredTrainableGraph::dumpLowerInfo()
+{
+  if (::onert::util::logging::ctx.enabled() == false)
+    return;
+
+  std::map<uint32_t, std::string> dumps;
+
+  _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
+    const auto operand_lower_info = lower_info().operand.getRawPtr(index);
+    assert(operand_lower_info);
+    if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty())
+    {
+      auto shape_to_string = [](const ir::Shape &shape) {
+        std::stringstream sstream;
+        sstream << "{ ";
+        for (auto i = 0; i < shape.rank(); ++i)
+          sstream << (shape.dim(i)) << " ";
+        sstream << "}";
+        return sstream.str();
+      };
+
+      auto factors_to_string = [](const PermuteFactorSet &factors) {
+        std::string str;
+        for (auto &&factor : factors)
+        {
+          str += factor.backend()->config()->id();
+          str += "(" + to_string(factor.layout()) + ")";
+          str += " ";
+        }
+        return "{ " + str + "}";
+      };
+
+      auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) {
+        std::stringstream sstream;
+        sstream << "{ ";
+        for (auto &&op : operations)
+          sstream << op << " ";
+        sstream << "}";
+        return sstream.str();
+      };
+
+      auto data_to_str = [](const ir::Data *data) {
+        return (data ? (std::to_string(data->size()) + " bytes") : "N/A");
+      };
+
+      std::string shape_str = shape_to_string(object.shape());
+      std::string def_op = operation_index_set_to_string({object.getDef()});
+      std::string use_ops = operation_index_set_to_string(object.getUses());
+      std::string def_factors = factors_to_string(operand_lower_info->def_factors());
+      std::string use_factors = factors_to_string(operand_lower_info->use_factors());
+      std::stringstream sstream;
+      sstream << "Operand " << index << " Info" << std::endl;
+      sstream << "  - Shape     : " << shape_str << std::endl;
+      sstream << "  - Def/Uses  : Def " << def_op << " Uses " << use_ops << std::endl;
+      sstream << "  - Data      : " << data_to_str(object.data()) << std::endl;
+      sstream << "  - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl;
+      dumps.emplace(index.value(), sstream.str());
+    }
+  });
+
+  for (const auto &e : dumps)
+  {
+    if (!e.second.empty())
+    {
+      std::istringstream iss(e.second);
+      std::string line;
+      while (std::getline(iss, line))
+        VERBOSE(Lower) << line << std::endl;
+    }
+  }
+}
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc
new file mode 100644
index 000000000..d2153296f
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StaticDerivativeShapeInferer.h"
+#include "util/ShapeInference.h"
+#include "util/logging.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <sstream>
+#include <stdexcept>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+void StaticDerivativeShapeInferer::infer()
+{
+  // It is not determined to iterate in reverse order.
+  auto sorted_ops = _lowered_subg->graph().topolSortOperations();
+  for (auto it = sorted_ops.rbegin(); it != sorted_ops.rend(); ++it)
+  {
+    const auto op_idx = *it;
+    const auto &op = _lowered_subg->trainable_graph().operation(op_idx);
+    if (checkDynamicInput(op))
+    {
+      std::stringstream msg;
+      msg << "StaticDerivativeShapeInferer does not support dynamic shape yet, ";
+      msg << op.name() << "(op index: " << op_idx << ") has dynamic shape.";
+      throw std::runtime_error(msg.str());
+    }
+
+    checkOutput(op);
+
+    op.accept(*this);
+  }
+}
+
+void StaticDerivativeShapeInferer::dump()
+{
+  // TODO dump
+}
+
+bool StaticDerivativeShapeInferer::checkDynamicInput(const ir::IOperation &op)
+{
+  const auto &operands = _lowered_subg->graph().operands();
+  for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+  {
+    if (operands.at(input_idx).info().isDynamic())
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void StaticDerivativeShapeInferer::checkOutput(const ir::IOperation &op)
+{
+  const auto &derivatives = _lowered_subg->trainable_graph().derivatives();
+  for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+  {
+    if (!derivatives.exist(output_idx))
+    {
+      std::stringstream msg;
+      msg << "StaticDerivativeShapeInferer : Invalid output, ";
+      msg << op.name() << "'s derivative output(index: " << output_idx << ") does not exist.";
+      throw std::runtime_error(msg.str());
+    }
+  }
+}
+
+void StaticDerivativeShapeInferer::setShape(const ir::OperandIndex &index, const ir::Shape &shape)
+{
+  auto &tgraph = _lowered_subg->trainable_graph();
+
+  if (tgraph.derivatives().exist(index))
+    tgraph.changeDerivativeShape(index, shape);
+  else
+  {
+    // NOTE This code assumes the types are always the same, but I'm not sure.
+    const auto &type = tgraph.operands().at(index).typeInfo();
+    const auto new_index = tgraph.addDerivative(index, std::make_unique<ir::Operand>(shape, type));
+    assert(new_index == index);
+    UNUSED_RELEASE(new_index);
+  }
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Conv2D &)
+{
+  // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::ElementwiseActivation &)
+{
+  // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Loss &)
+{
+  // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Permute &op)
+{
+  const auto &derivatives = _lowered_subg->trainable_graph().derivatives();
+
+  const auto &output_idx = op.getOutputs().at(0);
+  const auto &output = derivatives.at(output_idx);
+
+  // re-sizing input derivative shape
+  const auto &input_idx = op.getInputs().at(0);
+  const auto &new_shape = output.info().shape();
+  setShape(input_idx, new_shape);
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Pool2D &)
+{
+  // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Reshape &)
+{
+  // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Softmax &)
+{
+  // NYI
+}
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h
new file mode 100644
index 000000000..48b3172d2
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__
+#define __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__
+
+#include "ir/train/TrainableOperationVisitor.h"
+
+#include "compiler/train/LoweredTrainableGraph.h"
+#include "ir/Index.h"
+
+#include <memory>
+#include <unordered_map>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+/**
+ * @brief Class to infer shape before running kernels. It does the following:
+ *        - re-calculate and set output shape at compile time (before running kernels)
+ *        - if calculation cannot be done at compile time, mark the outputs to be dynamic, meaning
+ *          shapes of outputs will be calculated during running kernels
+ */
+class StaticDerivativeShapeInferer : public ir::train::TrainableOperationVisitor
+{
+public:
+  StaticDerivativeShapeInferer(compiler::train::LoweredTrainableGraph *lowered_subg)
+    : _lowered_subg{lowered_subg}
+  {
+  }
+
+  /**
+   * @brief Infer shape of operands belonging to ops and set the output shape.
+   *        If output shape cannot be known without running op, mark it so that it can be allocated
+   *        when running kernel.
+   */
+  void infer(void);
+
+  void dump();
+
+private:
+  bool checkDynamicInput(const ir::IOperation &op);
+  void checkOutput(const ir::IOperation &op);
+  void setShape(const ir::OperandIndex &index, const ir::Shape &shape);
+
+private:
+  void visit(const ir::train::operation::Conv2D &op) override;
+  void visit(const ir::train::operation::ElementwiseActivation &op) override;
+  void visit(const ir::train::operation::Loss &op) override;
+  void visit(const ir::train::operation::Permute &op) override;
+  void visit(const ir::train::operation::Pool2D &op) override;
+  void visit(const ir::train::operation::Reshape &op) override;
+  void visit(const ir::train::operation::Softmax &op) override;
+
+private:
+  compiler::train::LoweredTrainableGraph *_lowered_subg;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__
diff --git a/runtime/onert/core/src/compiler/train/TensorRegistries.h b/runtime/onert/core/src/compiler/train/TensorRegistries.h
new file mode 100644
index 000000000..48eaf10a1
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TensorRegistries.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__
+#define __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__
+
+#include "../../backend/builtin/Config.h"
+#include "../../backend/builtin/train/TensorRegistry.h"
+
+#include <backend/train/TrainableBackendContext.h>
+
+#include <memory>
+#include <unordered_set>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+class TensorRegistries
+{
+public:
+  TensorRegistries() = default;
+
+  TensorRegistries(const backend::train::TrainableBackendContexts &backend_contexts,
+                   bool include_builtin)
+  {
+    for (const auto &e : backend_contexts)
+    {
+      auto tensor_reg = e.second->tensor_registry();
+      if (e.first->config()->id() == backend::builtin::Config::ID)
+      {
+        _builtin_tensor_reg =
+          std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>(tensor_reg);
+        if (include_builtin)
+          _tensor_regs.insert(tensor_reg);
+      }
+      else
+      {
+        _tensor_regs.insert(tensor_reg);
+      }
+    }
+  }
+
+  std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>>::const_iterator begin() const
+  {
+    return _tensor_regs.cbegin();
+  }
+  std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>>::const_iterator end() const
+  {
+    return _tensor_regs.cend();
+  }
+
+  std::shared_ptr<backend::builtin::train::TensorRegistry> getBuiltinTensorRegistry() const
+  {
+    return _builtin_tensor_reg;
+  }
+
+  backend::ITensor *getITensor(ir::OperandIndex index) const
+  {
+    for (auto &&tensor_reg : _tensor_regs)
+    {
+      auto tensor = tensor_reg->getITensor(index);
+      if (tensor)
+        return tensor;
+    }
+    return nullptr;
+  }
+
+  backend::ITensor *getDerivativeITensor(ir::OperandIndex index) const
+  {
+    for (auto &&tensor_reg : _tensor_regs)
+    {
+      auto tensor = tensor_reg->getDerivativeITensor(index);
+      if (tensor)
+        return tensor;
+    }
+    return nullptr;
+  }
+
+private:
+  std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>> _tensor_regs;
+  std::shared_ptr<backend::builtin::train::TensorRegistry> _builtin_tensor_reg;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__
diff --git a/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc
new file mode 100644
index 000000000..d20ae9fd3
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainableOperationConverter.h"
+
+#include "ir/train/Operations.Include.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+TrainableOperationConverter::TrainableOperationConverter(
+  ir::train::TrainableGraph &tgraph, const compiler::train::TrainingInfo *training_info)
+  : UntrainableOperationConverter{tgraph}, _training_info{training_info}
+{
+  // Avoid unused-private-field error
+  UNUSED_RELEASE(_training_info);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Conv2D &node)
+{
+  _return_op = std::make_unique<ir::train::operation::Conv2D>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::ElementwiseActivation &node)
+{
+  if (node.param().op_type == ir::operation::ElementwiseActivation::Type::RELU)
+  {
+    _return_op = std::make_unique<ir::train::operation::ElementwiseActivation>(node);
+  }
+  else
+  {
+    UntrainableOperationConverter::visit(node);
+  }
+}
+
+void TrainableOperationConverter::visit(const ir::operation::FullyConnected &node)
+{
+  _return_op = std::make_unique<ir::train::operation::FullyConnected>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Loss &node)
+{
+  _return_op = std::make_unique<ir::train::operation::Loss>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Permute &node)
+{
+  _return_op = std::make_unique<ir::train::operation::Permute>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Pool2D &node)
+{
+  _return_op = std::make_unique<ir::train::operation::Pool2D>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Reshape &node)
+{
+  _return_op = std::make_unique<ir::train::operation::Reshape>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Softmax &node)
+{
+  _return_op = std::make_unique<ir::train::operation::Softmax>(node);
+}
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h
new file mode 100644
index 000000000..5f6fc10c3
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__
+#define __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__
+
+#include "UntrainableOperationConverter.h"
+
+#include "compiler/train/TrainingInfo.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+class TrainableOperationConverter : public UntrainableOperationConverter
+{
+public:
+  TrainableOperationConverter(ir::train::TrainableGraph &trainable_graph,
+                              const compiler::train::TrainingInfo *training_info);
+
+  using UntrainableOperationConverter::operator();
+
+private:
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::Loss &node) override;
+  void visit(const ir::operation::Permute &node) override;
+  void visit(const ir::operation::Pool2D &node) override;
+  void visit(const ir::operation::Reshape &) override;
+  void visit(const ir::operation::Softmax &) override;
+
+private:
+  const compiler::train::TrainingInfo *_training_info;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__
diff --git a/runtime/onert/core/src/compiler/train/TrainingCompiler.cc b/runtime/onert/core/src/compiler/train/TrainingCompiler.cc
new file mode 100644
index 000000000..711af1651
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TrainingCompiler.cc
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainingCompiler.h"
+
+#include "StaticDerivativeShapeInferer.h"
+#include "TrainableOperationConverter.h"
+#include "pass/LossInsertionPass.h"
+#include "../CompilerHelpers.h"
+#include "../ExecutorFactory.h"
+#include "../pass/ConstantOutputPass.h"
+#include "../pass/OddOutputPass.h"
+#include "../pass/PassRunner.h"
+#include "../pass/UnusedOperandEliminationPass.h"
+#include "../ShapeValidator.h"
+#include "../../dumper/dot/DotDumper.h"
+#include "../../exec/train/TrainableExecutors.h"
+#include "../../ir/OperationDumper.h"
+#include "../../ir/verifier/Verifier.h"
+
+#include <compiler/StaticShapeInferer.h>
+#include <compiler/train/LoweredTrainableGraph.h>
+#include <ir/train/TrainableGraph.h>
+#include <exec/train/optimizer/SGD.h>
+
+#include <misc/polymorphic_downcast.h>
+#include <misc/string_helpers.h>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+TrainingCompiler::TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                                   std::vector<std::unique_ptr<CompilerOptions>> &copts,
+                                   const TrainingInfo &training_info)
+  : _model{nnpkg->primary_model()}, _options{copts[0].get()}, _training_info{training_info}
+{
+  if (nnpkg->model_count() > 1)
+    throw std::runtime_error("TrainingCompiler does not support multiple models yet");
+
+  if (nnpkg->primary_model()->subgraphs_count() > 1)
+    throw std::runtime_error("TrainingCompiler does not support multiple subgraphs yet");
+}
+
+std::shared_ptr<CompilerArtifact> TrainingCompiler::compile(void)
+{
+  /***************************************************
+   * Prepare compilation phase
+   ***************************************************/
+  if (!_options)
+    throw std::runtime_error{"Empty compile option"};
+
+  // Mode check
+  // TODO handle option for each model
+  if (_options->he_profiling_mode)
+  {
+    if (!_options->he_scheduler)
+      throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
+
+    if (_options->executor != "Dataflow")
+      throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
+  }
+
+  if (!_options->minmax_filepath.empty())
+  {
+    if (_options->executor != "Linear")
+      throw std::runtime_error("Recording minmax works only with Linear executor");
+  }
+
+  _options->forceInternalOptions();
+  _options->verboseOptions();
+
+  auto custom_kernel_builder = _model->getKernelBuilder();
+
+  _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) {
+    auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
+    // Mandatory passes
+    compiler::pass::PassRunner{}
+      .append(std::make_unique<compiler::pass::ConstantOutputPass>(subg))
+      .append(std::make_unique<compiler::pass::OddOutputPass>(subg))
+      .run();
+
+    // Optimizations
+    compiler::pass::PassRunner{}
+      .append(std::make_unique<compiler::pass::UnusedOperandEliminationPass>(subg))
+      .run();
+  });
+
+  std::unordered_map<ir::SubgraphIndex, std::shared_ptr<ir::train::TrainableGraph>>
+    trainable_subgraphs;
+
+  if (_model->hasOnly<ir::Graph>())
+  {
+    // Create trainable subgraphs by copy and converting inference model
+    _model->iterate([&](const ir::SubgraphIndex &subg_index, const ir::IGraph &graph) {
+      const auto &subg = nnfw::misc::polymorphic_downcast<const ir::Graph &>(graph);
+      // Create TrainableGraph by copying Graph
+      auto trainable_subg = std::make_shared<ir::train::TrainableGraph>(subg);
+
+      // Convert operations to trainable operations
+      auto converter = TrainableOperationConverter{*trainable_subg, &_training_info};
+      subg.operations().iterate(
+        [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &op) {
+          auto trainable_op = converter(op);
+          auto gen_index = trainable_subg->replaceOperation(op_index, std::move(trainable_op));
+          UNUSED_RELEASE(gen_index);
+          assert(gen_index == op_index);
+        });
+
+      trainable_subgraphs[subg_index] = std::move(trainable_subg);
+    });
+  }
+  else
+  {
+    // TODO Support models that have TrainableGraphs
+    throw std::runtime_error("TrainingCompiler: Invalid model");
+  }
+
+  // operation
+  _model.reset();
+
+  // Apply pass for trainable subgraphs
+  for (auto &&pair : trainable_subgraphs)
+  {
+    auto trainable_subg = pair.second;
+    auto subg_index = pair.first;
+
+    compiler::pass::PassRunner{}
+      .append(std::make_unique<train::pass::LossInsertionPass>(*trainable_subg, &_training_info,
+                                                               subg_index))
+      .run();
+  }
+
+  // Change input shape according to batch_size
+  for (auto &&pair : trainable_subgraphs)
+  {
+    auto trainable_subg = pair.second;
+
+    for (const auto &ind : trainable_subg->getInputs())
+    {
+      auto &input = trainable_subg->operands().at(ind);
+      auto new_shape = input.info().shape();
+      // TODO Consider batch size index
+      if (new_shape.dim(0) != 1)
+        throw std::runtime_error("the first dim is not 1. It is not supported yet.");
+      new_shape.dim(0) = _training_info.batchSize();
+      input.info().shape(new_shape);
+    }
+  }
+
+  /***************************************************
+   * Backend independent analysis & optimization phase
+   ***************************************************/
+  // TODO Handle dump level for each model
+  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level);
+  onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+  // Tracing context
+  auto tracing_ctx = std::make_unique<util::TracingCtx>();
+
+  // Lower: Assign backend
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::train::LoweredTrainableGraph>>
+    lowered_subgs;
+  {
+    for (auto &&pair : trainable_subgraphs)
+    {
+      auto &subg_index = pair.first;
+      auto trainable_subg = pair.second;
+
+      // Lower: Assign backend
+      lowered_subgs[subg_index] =
+        std::make_unique<compiler::train::LoweredTrainableGraph>(*trainable_subg, *_options);
+      // Set tracing_ctx for copied graph
+      if (tracing_ctx != nullptr)
+        tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value());
+    }
+  }
+
+  for (const auto &pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    const auto &lowered_subg = pair.second;
+    dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value()));
+  }
+
+  // Set derivatives as default tensor info
+  for (const auto &pair : lowered_subgs)
+  {
+    auto lowered_subg = pair.second.get();
+    auto &tgraph = lowered_subg->trainable_graph();
+    tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &obj) {
+      if (!obj.isConstant())
+      {
+        auto deriv = std::make_unique<ir::Operand>(obj);
+        const auto gen_index = tgraph.addDerivative(index, std::move(deriv));
+        assert(gen_index == index);
+        UNUSED_RELEASE(gen_index);
+      }
+    });
+  }
+
+  // Shape inference.
+  {
+    // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+    // recursively
+    std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+      createStaticShapeInferers(lowered_subgs);
+
+    const auto primary_subg_idx = ir::SubgraphIndex{0};
+    inferers.at(primary_subg_idx)->infer();
+
+    for (const auto &pair_inferer : inferers)
+    {
+      const auto inferer = pair_inferer.second.get();
+      inferer->dump();
+    }
+
+    // NOTE StaticDerivativeShapeInferer is allocated for each subgraph,
+    //      so it does not support models that have controlflow operations yet.
+    for (auto &&pair : lowered_subgs)
+    {
+      auto &lowered_subg = pair.second;
+      auto inferer = std::make_unique<StaticDerivativeShapeInferer>(lowered_subg.get());
+      inferer->infer();
+      inferer->dump();
+    }
+  }
+
+  // Shape validation
+  for (const auto &pair : lowered_subgs)
+  {
+    auto &lowered_subg = pair.second;
+    compiler::ShapeValidator{lowered_subg->graph()}();
+  }
+
+  // TODO Validate shapes of derivative tensors
+
+  // Create optimizer
+  // TODO Set properties of optimizer
+  std::shared_ptr<exec::train::optimizer::Optimizer> optimizer;
+  const auto &optim_info = _training_info.optimizerInfo();
+  if (optim_info.optim_code == exec::train::optimizer::OptimizerCode::SGD)
+    optimizer = std::make_shared<exec::train::optimizer::SGD>(optim_info.learning_rate);
+  else
+    throw std::runtime_error("Invalid optimizer type, " +
+                             exec::train::optimizer::toString(optim_info.optim_code));
+
+  /*************************************************************
+   *  Backend independent analysis & optimization phase finished
+   *************************************************************/
+  auto executors = std::make_shared<exec::train::TrainableExecutors>();
+  for (auto &&pair : lowered_subgs)
+  {
+    auto const model_index = ir::ModelIndex{0};
+    auto const subg_index = pair.first;
+    auto &lowered_subg = pair.second;
+    auto const indexed_ranks = lowered_subg->indexed_ranks();
+
+    ir::OperationDumper dumper("Executor generation of Subgraph " +
+                               std::to_string(subg_index.value()));
+    lowered_subg->graph().operations().iterate(
+      [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); });
+
+    ExecutorFactoryArgs args;
+    args.tracing_ctx = tracing_ctx.get();
+    args.options = _options;
+    args.model_index = model_index;
+    args.custom_kernel_builder = custom_kernel_builder;
+    auto executor = std::unique_ptr<exec::IExecutor>{
+      ExecutorFactory::get().create(std::move(lowered_subg), executors, args, optimizer)};
+    executor->setIndexedRanks(indexed_ranks);
+    executors->emplace(model_index, subg_index, std::move(executor));
+  }
+
+  /********************************
+   * Code generation phase finished
+   ********************************/
+  return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
+}
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/TrainingCompiler.h b/runtime/onert/core/src/compiler/train/TrainingCompiler.h
new file mode 100644
index 000000000..b93437217
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TrainingCompiler.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file  TrainingCompiler.h
+ * @brief This file contains TrainingCompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_
+#define __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_
+
+#include "compiler/CompilerOptions.h"
+#include "compiler/ICompiler.h"
+#include "compiler/train/TrainingInfo.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+/**
+ * @brief Class to compile NN package
+ */
+class TrainingCompiler : public ICompiler
+{
+public:
+  /**
+   * @brief     Construct a new TrainingCompiler object for single model
+   * @param[in] model     model to compile
+   * @param[in] inference_compiler Compiler for inference
+   * @param[in] coptions           Compiler Options
+   * @param[in] training_info      Training information
+   */
+  explicit TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                            std::vector<std::unique_ptr<CompilerOptions>> &copts,
+                            const TrainingInfo &training_info);
+
+  /**
+   * @brief Default Construct
+   *
+   */
+  TrainingCompiler(void) = delete;
+
+  /**
+   * @brief Destroy the TrainingCompiler object
+   */
+  ~TrainingCompiler() = default;
+
+public:
+  /**
+   * @brief Do compilation with the options
+   *
+   * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+   */
+  std::shared_ptr<CompilerArtifact> compile(void);
+
+private:
+  std::shared_ptr<ir::Model> _model;
+  CompilerOptions *_options;
+  const TrainingInfo _training_info;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_
diff --git a/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc
new file mode 100644
index 000000000..6a5a052b6
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UntrainableOperationConverter.h"
+
+#include "ir/train/operation/UntrainableOperation.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+UntrainableOperationConverter::UntrainableOperationConverter(ir::train::TrainableGraph &tgraph)
+  : _tgraph{tgraph}, _return_op{nullptr}
+{
+}
+
+std::unique_ptr<ir::train::ITrainableOperation> UntrainableOperationConverter::
+operator()(const ir::IOperation &op)
+{
+  op.accept(*this);
+
+  return std::move(_return_op);
+}
+
+#define OP(InternalName)                                                                         \
+  void UntrainableOperationConverter::visit(const ir::operation::InternalName &node)             \
+  {                                                                                              \
+    _return_op =                                                                                 \
+      std::make_unique<ir::train::operation::UntrainableOperation<ir::operation::InternalName>>( \
+        node);                                                                                   \
+  }
+#include "ir/Operations.lst"
+#undef OP
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h
new file mode 100644
index 000000000..e960b3831
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__
+#define __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__
+
+#include "ir/Operations.Include.h"
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableGraph.h"
+
+#include <memory>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+class UntrainableOperationConverter : public ir::OperationVisitor
+{
+public:
+  UntrainableOperationConverter(ir::train::TrainableGraph &tgraph);
+  std::unique_ptr<ir::train::ITrainableOperation> operator()(const ir::IOperation &op);
+
+#define OP(InternalName) void visit(const ir::operation::InternalName &node);
+#include "ir/Operations.lst"
+#undef OP
+
+protected:
+  ir::train::TrainableGraph &_tgraph;
+  std::unique_ptr<ir::train::ITrainableOperation> _return_op;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__
diff --git a/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc
new file mode 100644
index 000000000..3e01a9739
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LossInsertionPass.h"
+
+#include "ir/train/TrainableGraph.h"
+#include "ir/train/operation/Loss.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+namespace pass
+{
+
+void LossInsertionPass::run()
+{
+  const auto &loss_info = _training_info->lossInfo();
+
+  ir::operation::Loss::Param param;
+  param.op_type = loss_info.type;
+
+  if (_trainable_graph.getOutputs().size() != 1)
+  {
+    throw std::runtime_error("LossInsertionPass: Not supported multiple outputs");
+  }
+
+  // TODO Consider SparseCategoricalCrossentropy y_true shape
+  //      SparseCategoricalCrossentropy loss has a different y_true shape than y_pred.
+
+  // TODO Implement Loop [0, getOutputs().size())
+  //      index: a loop index
+  const auto index = 0;
+  const auto &y_pred_index = _trainable_graph.getOutputs().at(index);
+  const auto &y_pred = _trainable_graph.operands().at(y_pred_index);
+  const auto &shape = y_pred.shape();
+  const auto &type_info = y_pred.typeInfo();
+  auto y_true_index = _trainable_graph.addOperand(shape, type_info);
+  ir::OperandIndexSequence inputs{y_pred_index, y_true_index};
+
+  // TODO Consider Reduction
+  //      Some types of Reduction have the same shape y_true and output.
+
+  const ir::TypeInfo float_op(ir::DataType::FLOAT32);
+  auto output_index = _trainable_graph.addOperand(ir::Shape{1}, float_op);
+  ir::OperandIndexSequence outputs{output_index};
+
+  auto loss_op = std::make_unique<ir::operation::Loss>(inputs, outputs, param);
+  auto trainable_loss_op = std::make_unique<ir::train::operation::Loss>(*loss_op);
+
+  _trainable_graph.addOperation(std::move(trainable_loss_op));
+
+  _trainable_graph.addInput(y_true_index);
+
+  // TODO Add loss as many as output size
+  _trainable_graph.addLoss(output_index, ir::IOIndex{index});
+}
+
+} // namespace pass
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h
new file mode 100644
index 000000000..ed4d60c96
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__
+#define __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__
+
+#include "Pass.h"
+
+#include "compiler/train/TrainingInfo.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+namespace pass
+{
+
+class LossInsertionPass : public Pass
+{
+public:
+  LossInsertionPass(ir::train::TrainableGraph &trainable_graph, const TrainingInfo *training_info,
+                    const ir::SubgraphIndex &subg_index)
+    : Pass{trainable_graph, training_info}, _subg_index{subg_index}
+  {
+  }
+
+public:
+  std::string id() final { return "LossInsertionPass"; }
+  void run() final;
+
+private:
+  ir::SubgraphIndex _subg_index;
+};
+
+} // namespace pass
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/compiler/train/pass/Pass.h b/runtime/onert/core/src/compiler/train/pass/Pass.h
new file mode 100644
index 000000000..d64c06cf4
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/pass/Pass.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_PASS_PASS_H__
+#define __ONERT_COMPILER_TRAIN_PASS_PASS_H__
+
+#include "../../pass/IPass.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+class TrainableGraph;
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+class TrainingInfo;
+
+namespace pass
+{
+
+class Pass : public compiler::pass::IPass
+{
+public:
+  Pass(ir::train::TrainableGraph &trainable_graph, const TrainingInfo *training_info)
+    : _trainable_graph{trainable_graph}, _training_info{training_info}
+  {
+  }
+  virtual ~Pass() = default;
+
+protected:
+  ir::train::TrainableGraph &_trainable_graph;
+  const TrainingInfo *_training_info;
+};
+
+} // namespace pass
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_PASS_PASS_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.cc b/runtime/onert/core/src/dumper/dot/DotBuilder.cc
index 38a69696e..9257434fa 100644
--- a/runtime/onert/core/src/dumper/dot/DotBuilder.cc
+++ b/runtime/onert/core/src/dumper/dot/DotBuilder.cc
@@ -29,31 +29,12 @@ DotBuilder::DotBuilder() {}
 void DotBuilder::update(const Node &node_info)
 {
   add(node_info);
-  for (auto edge : node_info.out_edges())
+  for (auto &&edge : node_info.out_edges())
   {
     addEdge(node_info, *edge);
   }
 }
 
-void DotBuilder::addOpSequence(const DotSubgraphInfo &subgraph_info)
-{
-  _dot << "subgraph cluster_" << subgraph_info.index().value() << " {\n";
-  _dot << "  label=\"" << subgraph_info.label() << "\";\n";
-  _dot << "  style=filled;\n";
-  _dot << "  color=lightgrey;\n";
-  _dot << "  ";
-  for (auto op : subgraph_info.operations())
-  {
-    _dot << "operation" << op.value() << "; ";
-  }
-  for (auto op : subgraph_info.operands())
-  {
-    _dot << "operand" << op.value() << "; ";
-  }
-  _dot << "\n";
-  _dot << "}\n";
-}
-
 void DotBuilder::writeDot(std::ostream &os)
 {
   os << "digraph D {\n"
@@ -66,7 +47,7 @@ void DotBuilder::add(const Node &node)
   _dot << node.id();
   std::stringstream ss;
   _dot << "[";
-  for (auto attr : node.attributes())
+  for (auto &&attr : node.attributes())
   {
     _dot << attr.first << "=\"" << attr.second << "\" ";
   }
diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.h b/runtime/onert/core/src/dumper/dot/DotBuilder.h
index 681cbbf5d..30f32f8f9 100644
--- a/runtime/onert/core/src/dumper/dot/DotBuilder.h
+++ b/runtime/onert/core/src/dumper/dot/DotBuilder.h
@@ -25,7 +25,6 @@
 
 #include "OperationNode.h"
 #include "OperandNode.h"
-#include "DotSubgraphInfo.h"
 
 using Operation = onert::ir::Operation;
 using Object = onert::ir::Operand;
@@ -44,7 +43,6 @@ public:
 
 public:
   void update(const Node &dotinfo);
-  void addOpSequence(const DotSubgraphInfo &subgraph_info);
 
   void writeDot(std::ostream &os);
 
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc
index 118057f09..ab77a6c62 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.cc
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc
@@ -19,8 +19,7 @@
 
 #include "DotDumper.h"
 #include "DotBuilder.h"
-#include "DotSubgraphInfo.h"
-#include "ir/OpSequence.h"
+#include "ir/OperandIndexMap.h"
 #include "ir/OperationIndexMap.h"
 #include "backend/Backend.h"
 #include "backend/IConfig.h"
@@ -33,151 +32,153 @@ namespace dumper
 namespace dot
 {
 
-void DotDumper::dump(const std::string &tag)
+namespace
 {
-  if (_level == Level::OFF)
-  {
-    return;
-  }
-
-  onert::dumper::dot::DotBuilder dot_builder;
-
-  auto &operations = _graph.operations();
-  auto &operands = _graph.operands();
-
-  ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes;
-  std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes;
-
-  auto backend_to_fillcolor = [](const backend::Backend *backend) {
-    static const auto map = []() {
-      std::unordered_map<const backend::Backend *, std::string> ret;
-      uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
-      for (const auto backend : compiler::BackendManager::get().getAll())
-      {
-        ret.emplace(backend, Node::BG_COLORS[index]);
-        index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
-      }
-      return ret;
-    }();
-
-    auto itr = map.find(backend);
-    if (itr == map.end())
-    {
-      return Node::DEFAULT_FILLCOLOR;
-    }
-    else
+std::string backend_to_fillcolor(const backend::Backend *backend)
+{
+  static const auto map = []() {
+    std::unordered_map<const backend::Backend *, std::string> ret;
+    uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
+    for (const auto backend : compiler::BackendManager::get().getAll())
     {
-      return itr->second;
+      ret.emplace(backend, Node::BG_COLORS[index]);
+      index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
     }
-  };
+    return ret;
+  }();
+  auto itr = map.find(backend);
+  if (itr == map.end())
+  {
+    return Node::DEFAULT_FILLCOLOR;
+  }
+  else
+  {
+    return itr->second;
+  }
+}
 
-  util::Set<ir::OperandIndex> shown_operand_set;
+std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>>
+generate_dot_operands(const ir::Graph &graph, const DotDumper::Level level)
+{
+  std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> dot_operands;
 
+  const auto &operands = graph.operands();
   operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) {
-    bool showing_cond = false;
-    if (_level == Level::ALL)
-    {
-      showing_cond = true;
-    }
-    else
-    {
-      showing_cond = !object.isConstant();
-    }
-    if (object.isConstant() || _graph.getInputs().contains(index))
-    {
-      showing_cond = showing_cond && (object.getUses().size() > 0);
-    }
+    bool showing_cond =
+      level == DotDumper::Level::ALL
+        ? true
+        : !object.isConstant() || (graph.getInputs() + graph.getOutputs()).contains(index);
     if (showing_cond)
     {
-      shown_operand_set.add(index);
-
       auto type = [&]() {
         using onert::dumper::dot::Operand;
-        if (_graph.getInputs().contains(index))
+        if (graph.getInputs().contains(index))
           return Operand::Type::MODEL_INPUT;
-        if (_graph.getOutputs().contains(index))
+        if (graph.getOutputs().contains(index))
           return Operand::Type::MODEL_OUTPUT;
         return Operand::Type::INTERNAL;
       }();
 
       auto node = std::make_unique<Operand>(index, type);
+      std::string label = std::to_string(index.value());
+      std::string fillcolor = "";
+      node->setAttribute("label", label);
+      node->setAttribute("fillcolor", fillcolor);
 
-      {
-        // Display LowerInfo attributes
-        std::string label = std::to_string(index.value());
-        std::string fillcolor = "";
-        if (_lowered_graph)
-        {
-          auto lower_info = _lowered_graph->getLowerInfo(index);
-          const auto &def_factors = lower_info->def_factors();
-          if (def_factors.size() > 0)
-          {
-            label += "\\n[";
-            label += def_factors.getOnlyElement().backend()->config()->id();
-            label += "]";
-
-            fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
-          }
-        }
-        node->setAttribute("label", label);
-        node->setAttribute("fillcolor", fillcolor);
-      }
-
-      operand_nodes.emplace(index, std::move(node));
+      dot_operands.emplace(index, std::move(node));
     }
   });
 
-  operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) {
+  return dot_operands;
+}
+
+ir::OperationIndexMap<std::unique_ptr<Operation>>
+generate_dot_operations(const ir::Graph &graph,
+                        const ir::OperandIndexMap<std::unique_ptr<Operand>> &dot_operands)
+{
+  ir::OperationIndexMap<std::unique_ptr<Operation>> dot_operations;
+  const auto &operations = graph.operations();
+  operations.iterate([&](const ir::OperationIndex &index, const ir::IOperation &op) {
     auto node = std::make_unique<Operation>(index, op);
 
-    for (auto input : op.getInputs())
+    for (auto &&input : op.getInputs())
     {
       using onert::dumper::dot::Operand;
 
       // Constant input and dump level is ALL_BUT_CONSTANTS
-      if (operand_nodes.find(input) == operand_nodes.end())
+      if (dot_operands.find(input) == dot_operands.end())
         continue;
 
-      auto &input_node = operand_nodes.at(input);
+      auto &input_node = dot_operands.at(input);
       input_node->addOutEdge(node.get());
     }
 
-    for (auto output : op.getOutputs())
+    for (auto &&output : op.getOutputs() | ir::Remove::UNDEFINED)
     {
       using onert::dumper::dot::Operand;
-      auto &output_node = operand_nodes.at(output);
+      auto &output_node = dot_operands.at(output);
       node->addOutEdge(output_node.get());
     }
 
-    operation_nodes.emplace(index, std::move(node));
+    dot_operations.emplace(index, std::move(node));
   });
 
-  if (_lowered_graph)
-  {
-    const auto &op_seqs = _lowered_graph->op_seqs();
-    op_seqs.iterate([&](const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq) {
-      const auto lower_info = _lowered_graph->getLowerInfo(index);
+  return dot_operations;
+}
+
+void update_lower_info(const compiler::ILoweredGraph &lowered_graph,
+                       ir::OperandIndexMap<std::unique_ptr<Operand>> *dot_operands)
+{
+  const auto &operands = lowered_graph.graph().operands();
+  operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+    auto itr = dot_operands->find(index);
+    if (itr != dot_operands->end())
+    {
+      auto &node = itr->second;
+      // Display LowerInfo attributes
+      std::string label = node->getAttribute("label");
+      std::string fillcolor = node->getAttribute("fillcolor");
+      auto lower_info = lowered_graph.lower_info().operand.getRawPtr(index);
+      const auto &def_factors = lower_info->def_factors();
+      if (def_factors.size() > 0)
+      {
+        label += "\\n[";
+        label += def_factors.getOnlyElement().backend()->config()->id();
+        label += "]";
+        fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
+      }
+      node->setAttribute("label", label);
+      node->setAttribute("fillcolor", fillcolor);
+    }
+  });
+}
+
+void update_lower_info(const compiler::ILoweredGraph &lowered_graph,
+                       ir::OperationIndexMap<std::unique_ptr<Operation>> *dot_operations)
+{
+  const auto &operations = lowered_graph.graph().operations();
+  operations.iterate([&](const ir::OperationIndex &index, const ir::IOperation &) {
+    const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index);
+    if (lower_info)
+    {
       auto fillcolor = backend_to_fillcolor(lower_info->backend());
-      std::string label =
-          std::to_string(index.value()) + " [" + lower_info->backend()->config()->id() + "]";
-      DotSubgraphInfo subgraph_info{index, op_seq, shown_operand_set, _graph.operations()};
-      subgraph_info.label(label);
-      subgraph_info.fillcolor(fillcolor);
-      dot_builder.addOpSequence(subgraph_info);
-
-      // Set fillcolor of all operations in the op_seq
-      for (const auto &op_idx : op_seq.operations())
+      std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
+      auto itr = dot_operations->find(index);
+      if (itr != dot_operations->end())
       {
-        auto found = operation_nodes.find(op_idx);
-        if (found != operation_nodes.end())
-        {
-          auto &&op = found->second;
-          op->setAttribute("fillcolor", fillcolor);
-        }
+        auto &node = itr->second;
+        node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
+        node->setAttribute("fillcolor", fillcolor);
       }
-    });
-  }
+    }
+  });
+}
 
+void dump_to_file(const ir::OperandIndexMap<std::unique_ptr<Operand>> &operand_nodes,
+                  const ir::OperationIndexMap<std::unique_ptr<Operation>> &operation_nodes,
+                  const std::string &tag)
+{
+  onert::dumper::dot::DotBuilder dot_builder;
   for (const auto &e : operation_nodes)
     dot_builder.update(*e.second);
   for (const auto &e : operand_nodes)
@@ -198,6 +199,34 @@ void DotDumper::dump(const std::string &tag)
     fb.close();
   }
 }
+} // namespace
+
+void DotDumper::dump(const ir::Graph &graph, const std::string &tag)
+{
+  if (_level == Level::OFF)
+  {
+    return;
+  }
+
+  const auto dot_operands = generate_dot_operands(graph, _level);
+  const auto dot_operations = generate_dot_operations(graph, dot_operands);
+  dump_to_file(dot_operands, dot_operations, tag);
+}
+
+// TODO Support derivative tensors
+void DotDumper::dump(const compiler::ILoweredGraph &lowered_graph, const std::string &tag)
+{
+  if (_level == Level::OFF)
+  {
+    return;
+  }
+
+  auto dot_operands = generate_dot_operands(lowered_graph.graph(), _level);
+  auto dot_operations = generate_dot_operations(lowered_graph.graph(), dot_operands);
+  update_lower_info(lowered_graph, &dot_operands);
+  update_lower_info(lowered_graph, &dot_operations);
+  dump_to_file(dot_operands, dot_operations, tag);
+}
 
 } // namespace dot
 } // namespace dumper
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
index fdbca1642..fca5f356c 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -15,7 +15,7 @@
  */
 
 #include "ir/Graph.h"
-#include "compiler/LoweredGraph.h"
+#include "compiler/ILoweredGraph.h"
 
 #ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__
 #define __ONERT_DUMPER_DOT_DOT_DUMPER_H__
@@ -38,27 +38,28 @@ public:
   };
 
 public:
-  DotDumper(const ir::Graph &graph, Level level)
-      : _lowered_graph{nullptr}, _graph(graph), _level{level}
-  {
-  }
-  DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
-      : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
-  {
-  }
+  DotDumper(Level level) : _level{level} {}
 
 public:
   /**
-   * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set
+   * @brief Dump graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
+   *
+   * @param[in] graph  The graph that would be used to get operations and operands
+   * @param[in] tag    The name of dot file that would be created
+   * @return N/A
+   */
+  void dump(const ir::Graph &graph, const std::string &tag);
+
+  /**
+   * @brief Dump lowered graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
    *
+   * @param[in] graph  The graph that would be used to get operations and operands
    * @param[in] tag    The name of dot file that would be created
    * @return N/A
    */
-  void dump(const std::string &tag);
+  void dump(const compiler::ILoweredGraph &lowered_graph, const std::string &tag);
 
 private:
-  const compiler::LoweredGraph *_lowered_graph;
-  const ir::Graph &_graph;
   Level _level;
 };
 
diff --git a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc b/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc
deleted file mode 100644
index 52e9c758d..000000000
--- a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DotSubgraphInfo.h"
-
-#include <sstream>
-
-namespace onert
-{
-namespace dumper
-{
-namespace dot
-{
-
-DotSubgraphInfo::DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
-                                 const util::Set<ir::OperandIndex> &shown_operands,
-                                 const ir::Operations &operations_ctx)
-    : _index{index}
-{
-  for (const auto &op_idx : op_seq.operations())
-  {
-    _operations.insert(op_idx);
-    const auto &node = operations_ctx.at(op_idx);
-    for (auto o : node.getInputs())
-    {
-      // Must be a shown operand, not op_seq's inputs
-      if (shown_operands.contains(o) && !op_seq.getInputs().contains(o))
-      {
-        _operands.insert(o);
-      }
-    }
-    for (auto o : node.getOutputs())
-    {
-      // Must be a shown operand, not op_seq's inputs
-      if (shown_operands.contains(o) && !op_seq.getOutputs().contains(o))
-      {
-        _operands.insert(o);
-      }
-    }
-  }
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h b/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h
deleted file mode 100644
index 95ba8953e..000000000
--- a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-#define __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-
-#include <unordered_set>
-
-#include "ir/Index.h"
-#include <ir/Operations.h>
-#include "ir/OpSequence.h"
-#include "util/Set.h"
-
-namespace onert
-{
-namespace dumper
-{
-namespace dot
-{
-
-class DotSubgraphInfo
-{
-public:
-  DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
-                  const util::Set<ir::OperandIndex> &shown_operands,
-                  const ir::Operations &operations_ctx);
-
-  ir::OpSequenceIndex index() const { return _index; }
-  std::string label() const { return _label; }
-  void label(const std::string &val) { _label = val; }
-  std::string fillcolor() const { return _fillcolor; }
-  void fillcolor(const std::string &val) { _fillcolor = val; }
-  const std::unordered_set<ir::OperationIndex> &operations() const { return _operations; }
-  const std::unordered_set<ir::OperandIndex> &operands() const { return _operands; }
-
-private:
-  ir::OpSequenceIndex _index;
-  std::string _label;
-  std::string _fillcolor;
-  std::unordered_set<ir::OperationIndex> _operations;
-  std::unordered_set<ir::OperandIndex> _operands;
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace onert
-
-#endif // __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.cc b/runtime/onert/core/src/dumper/dot/OperandNode.cc
index 5a6015ca9..49319d595 100644
--- a/runtime/onert/core/src/dumper/dot/OperandNode.cc
+++ b/runtime/onert/core/src/dumper/dot/OperandNode.cc
@@ -18,7 +18,6 @@
 
 #include "OperandNode.h"
 #include "ir/Graph.h"
-#include "ir/operand/LowerInfo.h"
 
 namespace onert
 {
@@ -33,10 +32,10 @@ const std::string Operand::OPERAND_SHAPE = "ellipse";
 const std::string Operand::BG_COLOR_SCHEME = "set18";
 
 Operand::Operand(const ir::OperandIndex &index, Type type)
-    : Node{"operand" + std::to_string(index.value())}
+  : Node{"operand" + std::to_string(index.value())}
 {
   {
-    auto type_to_shape = [](Type type) {
+    auto type_to_shape = [](Type type) -> const auto & {
       switch (type)
       {
         case Type::MODEL_INPUT:
diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.h b/runtime/onert/core/src/dumper/dot/OperandNode.h
index 2e7cc5861..f2aea80ad 100644
--- a/runtime/onert/core/src/dumper/dot/OperandNode.h
+++ b/runtime/onert/core/src/dumper/dot/OperandNode.h
@@ -64,7 +64,6 @@ public:
    *
    * @param[in] index Operand index
    * @param[in] type Operand type
-   * @param[in] lower_info Operand LowerInfo
    */
   Operand(const ir::OperandIndex &index, Type type);
 
diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.cc b/runtime/onert/core/src/dumper/dot/OperationNode.cc
index bee137e7c..2ef08c9c6 100644
--- a/runtime/onert/core/src/dumper/dot/OperationNode.cc
+++ b/runtime/onert/core/src/dumper/dot/OperationNode.cc
@@ -18,7 +18,6 @@
 
 #include "OperationNode.h"
 #include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
 #include "backend/IConfig.h"
 #include "backend/Backend.h"
 
@@ -32,8 +31,8 @@ namespace dot
 const std::string Operation::OPERATION_SHAPE = "rect";
 const std::string Operation::BG_COLOR_SCHEME = "pastel18";
 
-Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node)
-    : Node{"operation" + std::to_string(index.value())}
+Operation::Operation(const ir::OperationIndex &index, const ir::IOperation &node)
+  : Node{"operation" + std::to_string(index.value())}
 {
   setAttribute("label", std::to_string(index.value()) + " : " + node.name());
   setAttribute("shape", OPERATION_SHAPE);
diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.h b/runtime/onert/core/src/dumper/dot/OperationNode.h
index 74a37d3fb..d9292ad0c 100644
--- a/runtime/onert/core/src/dumper/dot/OperationNode.h
+++ b/runtime/onert/core/src/dumper/dot/OperationNode.h
@@ -25,7 +25,7 @@
 #define __ONERT_DUMPER_DOT_DOT_NODE_INFO_H__
 
 #include "Node.h"
-#include "ir/Operation.h"
+#include "ir/IOperation.h"
 #include "ir/Index.h"
 
 namespace onert
@@ -52,7 +52,7 @@ public:
    * @param[in] index operation index
    * @param[in] node operation object
    */
-  Operation(const ir::OperationIndex &index, const ir::Operation &node);
+  Operation(const ir::OperationIndex &index, const ir::IOperation &node);
 };
 
 } // namespace dot
diff --git a/runtime/onert/core/src/dumper/h5/Dumper.cc b/runtime/onert/core/src/dumper/h5/Dumper.cc
new file mode 100644
index 000000000..5e12c2dbb
--- /dev/null
+++ b/runtime/onert/core/src/dumper/h5/Dumper.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dumper.h"
+
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+
+namespace onert
+{
+namespace dumper
+{
+namespace h5
+{
+
+Dumper::Dumper(const std::string &filepath) : _file{filepath, H5F_ACC_CREAT | H5F_ACC_RDWR} {}
+
+} // namespace h5
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/h5/Dumper.h b/runtime/onert/core/src/dumper/h5/Dumper.h
new file mode 100644
index 000000000..53d0e0332
--- /dev/null
+++ b/runtime/onert/core/src/dumper/h5/Dumper.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_H5_DUMPER_H__
+#define __ONERT_DUMPER_H5_DUMPER_H__
+
+#include "exec/MinMaxMap.h"
+
+#include <H5Cpp.h>
+#include <string>
+
+namespace onert
+{
+namespace dumper
+{
+namespace h5
+{
+
+class Dumper
+{
+public:
+  /**
+   * @brief Construct dumper
+   *
+   * @param[in] path  filepath to dump
+   * @throw 	H5::FileIException on error during file open/create
+   */
+  Dumper(const std::string &filepath);
+
+protected:
+  H5::H5File _file;
+};
+
+} // namespace h5
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_H5_DUMPER_H__
diff --git a/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc b/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc
new file mode 100644
index 000000000..8a9de9f95
--- /dev/null
+++ b/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MinMaxDumper.h"
+
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+
+namespace onert
+{
+namespace dumper
+{
+namespace h5
+{
+
+static const char *h5_value_grpname = "value";
+
+/*
+ * ensure grp_name exists in parent
+ */
+H5::Group ensureGroup(H5::Group parent, const char *child)
+{
+  H5::Exception::dontPrint();
+  try
+  {
+    return parent.openGroup(child);
+  }
+  catch (H5::Exception &e)
+  {
+    return parent.createGroup(child);
+  }
+}
+
+MinMaxDumper::MinMaxDumper(const std::string &filepath) : Dumper(filepath)
+{
+  auto root_grp = _file.openGroup("/");
+  ensureGroup(root_grp, h5_value_grpname);
+}
+
+void MinMaxDumper::dump(const exec::SMMinMaxMap &mmmap) const
+{
+  auto val_grp = _file.openGroup(h5_value_grpname);
+  auto num_run = val_grp.getNumObjs();
+  auto num_grp = val_grp.createGroup(std::to_string(num_run));
+  auto model_grp = ensureGroup(num_grp, "0");
+  hsize_t dims[] = {2};
+  H5::DataSpace dspace(1, dims); // rank=1, dim(0)=2, {min, max}
+  for (auto &&e : mmmap)
+  {
+    // key = {subg_idx, op_idx} = e.first
+    const auto subg_idx = e.first.first.value();
+    const auto op_idx = e.first.second.value();
+    auto subg_grp = ensureGroup(model_grp, std::to_string(subg_idx).c_str());
+    auto op_dset = subg_grp.createDataSet(std::to_string(op_idx), H5::PredType::IEEE_F32BE, dspace);
+    op_dset.write(e.second.data, H5::PredType::NATIVE_FLOAT);
+  }
+}
+
+} // namespace h5
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/h5/MinMaxDumper.h b/runtime/onert/core/src/dumper/h5/MinMaxDumper.h
new file mode 100644
index 000000000..1f1b27c6e
--- /dev/null
+++ b/runtime/onert/core/src/dumper/h5/MinMaxDumper.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_H5_MINMAX_DUMPER_H__
+#define __ONERT_DUMPER_H5_MINMAX_DUMPER_H__
+
+#include "exec/MinMaxMap.h"
+#include "Dumper.h"
+
+#include <H5Cpp.h>
+#include <string>
+
+namespace onert
+{
+namespace dumper
+{
+namespace h5
+{
+
+// The hierachy of single model minmax h5 file
+//
+// GROUP /
+//   GROUP value
+//     └── GROUP run_idx
+//           └── GROUP model_idx
+//                 └── GROUP subg_idx
+//                       └── DATASET op_idx
+//                              DATATYPE Float32
+//                              DATASPACE (2)
+//                              DATA { min, max }
+//   GROUP name   (optional, for debug)
+//     └── GROUP model_idx
+//           └── GROUP subg_idx
+//                 └── ATTRIBUTE op_idx
+//                        DATATYPE String
+//                        DATA { "model/your/op/name"}
+//
+class MinMaxDumper : private Dumper
+{
+public:
+  MinMaxDumper(const std::string &filepath);
+  /**
+   * @brief Dump minmax map
+   *
+   * @param[in] map  single model minmax map
+   */
+  void dump(const exec::SMMinMaxMap &map) const;
+
+private:
+  H5::Group _val_grp;
+};
+
+} // namespace h5
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_H5_MINMAX_DUMPER_H__
diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.cc b/runtime/onert/core/src/dumper/text/GraphDumper.cc
new file mode 100644
index 000000000..6bd7904aa
--- /dev/null
+++ b/runtime/onert/core/src/dumper/text/GraphDumper.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphDumper.h"
+
+#include "ir/Graph.h"
+#include "compiler/LoweredGraph.h"
+#ifdef ONERT_TRAIN
+#include "compiler/train/LoweredTrainableGraph.h"
+#endif // ONERT_TRAIN
+#include "util/logging.h"
+#include "misc/string_helpers.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace text
+{
+
+namespace
+{
+
+std::string formatOperandIndexSequence(const ir::OperandIndexSequence &seq)
+{
+  std::vector<std::string> strs;
+  for (auto &&ind : seq)
+    strs.push_back(dumper::text::formatOperandBrief(ind));
+  return nnfw::misc::join(strs.begin(), strs.end(), ", ");
+}
+
+} // namespace
+
+std::string formatOperandBrief(ir::OperandIndex ind)
+{
+  std::stringstream ss;
+  ss << ind;
+  return ss.str();
+}
+
+std::string formatOperand(const ir::Graph &, ir::OperandIndex ind)
+{
+  std::stringstream ss;
+  ss << ind;
+  // TODO Print shape, type and maybe more
+  return ss.str();
+}
+
+std::string formatOperation(const ir::IOperation &op, ir::OperationIndex ind)
+{
+  std::stringstream ss;
+
+  ss << formatOperandIndexSequence(op.getOutputs());
+  ss << " = ";
+  ss << ind << "_" << op.name() << "(";
+  ss << formatOperandIndexSequence(op.getInputs());
+  ss << ")";
+  return ss.str();
+}
+
+std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind)
+{
+  std::stringstream ss;
+  const auto &op = graph.operations().at(ind);
+  return formatOperation(op, ind);
+}
+
+void dumpGraph(const ir::Graph &graph)
+{
+  VERBOSE(GraphDumper) << "{\n";
+  auto ops_topol = graph.topolSortOperations();
+  for (auto &&op_ind : ops_topol)
+  {
+    const auto &op = graph.operations().at(op_ind);
+    VERBOSE(GraphDumper) << "  " << formatOperation(op, op_ind) << "\n";
+  }
+  VERBOSE(GraphDumper) << "}\n";
+  VERBOSE(GraphDumper) << std::endl;
+}
+
+void dumpLoweredGraph(const compiler::LoweredGraph &lgraph)
+{
+  // TODO Graph dump with backend info
+  dumpGraph(lgraph.graph());
+}
+
+#ifdef ONERT_TRAIN
+void dumpLoweredGraph(const compiler::train::LoweredTrainableGraph &lgraph)
+{
+  // TODO Graph dump with backend info
+  dumpGraph(lgraph.graph());
+}
+#endif // ONERT_TRAIN
+
+} // namespace text
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.h b/runtime/onert/core/src/dumper/text/GraphDumper.h
new file mode 100644
index 000000000..ab0061465
--- /dev/null
+++ b/runtime/onert/core/src/dumper/text/GraphDumper.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
+#define __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
+
+#include <ir/Index.h>
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+struct IOperation;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+class LoweredGraph;
+
+#ifdef ONERT_TRAIN
+namespace train
+{
+class LoweredTrainableGraph;
+} // namespace train
+#endif // ONERT_TRAIN
+} // namespace compiler
+} // namespace onert
+
+namespace onert
+{
+namespace dumper
+{
+namespace text
+{
+
+std::string formatOperandBrief(ir::OperandIndex ind);
+std::string formatOperand(const ir::Graph &, ir::OperandIndex ind);
+std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind);
+void dumpGraph(const ir::Graph &graph);
+void dumpLoweredGraph(const compiler::LoweredGraph &lgraph);
+#ifdef ONERT_TRAIN
+void dumpLoweredGraph(const compiler::train::LoweredTrainableGraph &lgraph);
+#endif // ONERT_TRAIN
+
+} // namespace text
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
index a69ae9cdb..e0b00077f 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -54,14 +54,13 @@ void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id)
 {
   auto &job = _waiting_jobs[id];
   assert(job != nullptr);
-  auto &op_seq = _lowered_graph->op_seqs().at(_job_to_op_seq[job->index()]);
-  auto rank = calculateRank(op_seq.operations());
+  auto rank = calculateRank({_job_to_op[job->index()]});
   _ready_jobs.emplace(rank, std::move(job));
 }
 
 void DataflowExecutor::notify(uint32_t finished_job_id)
 {
-  for (auto id : _output_info[finished_job_id])
+  for (auto &&id : _output_info[finished_job_id])
   {
     assert(_input_info[id] > 0);
     auto count = --_input_info[id];
@@ -77,52 +76,49 @@ bool DataflowExecutor::noWaitingJobs()
                      [](const std::unique_ptr<Job> &job) { return job == nullptr; });
 }
 
-DataflowExecutor::DataflowExecutor(
-    std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-    const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-    const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-    const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
-    compiler::CodeMap &&code_map)
-    : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
-                   std::move(tensor_mgrs)},
-      _code_map{std::move(code_map)}
+DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                                   backend::BackendContexts &&backend_contexts,
+                                   const compiler::TensorRegistries &tensor_regs,
+                                   compiler::CodeMap &&code_map,
+                                   const util::TracingCtx *tracing_ctx)
+  : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx},
+    _code_map{std::move(code_map)}
 {
   VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
 
-  const auto &op_seqs = _lowered_graph->op_seqs();
-  // Assign jobs convert OpSequenceIndex to job index(uint32_t)
+  // Assign jobs convert OperationIndex to job index(uint32_t)
   uint32_t next_job_index = 0;
-  std::unordered_map<ir::OpSequenceIndex, uint32_t> op_seq_to_job;
-  op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &) {
-    VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with OpSequenceIndex "
-                              << op_seq_index.value() << std::endl;
+  std::unordered_map<ir::OperationIndex, uint32_t> op_to_job;
+  const auto &operations = _lowered_graph->graph().operations();
+  operations.iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &) {
+    VERBOSE(DataflowExecutor) << "Create a job " << next_job_index << " with Operation " << op_ind
+                              << std::endl;
     _finished_jobs.emplace_back(
-        std::make_unique<Job>(next_job_index, _code_map.at(op_seq_index).fn_seq.get()));
-    op_seq_to_job[op_seq_index] = next_job_index++;
+      std::make_unique<Job>(next_job_index, _code_map.at(op_ind).fn_seq.get()));
+    op_to_job[op_ind] = next_job_index++;
   });
 
   _waiting_jobs.resize(next_job_index);
   _output_info.resize(next_job_index);
   _initial_input_info.resize(next_job_index, 0);
 
-  op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
-    auto job_index = op_seq_to_job[op_seq_index];
-    for (auto output : op_seq.getOutputs())
+  operations.iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+    auto job_index = op_to_job[op_ind];
+    for (auto &&output : op.getOutputs())
     {
       // Update output and input info
-      op_seqs.iterate(
-          [&](const ir::OpSequenceIndex &op_seq_cur_index, const ir::OpSequence &op_seq_cur) {
-            if (op_seq_cur.getInputs().contains(output))
-            {
-              auto dep_index = op_seq_to_job[op_seq_cur_index];
-              ++_initial_input_info[dep_index];
-              _output_info[job_index].push_back(dep_index);
-            }
-          });
+      operations.iterate([&](const ir::OperationIndex &op_cur_ind, const ir::IOperation &op_cur) {
+        if (op_cur.getInputs().contains(output))
+        {
+          auto dep_index = op_to_job[op_cur_ind];
+          ++_initial_input_info[dep_index];
+          _output_info[job_index].push_back(dep_index);
+        }
+      });
     }
   });
-  for (const auto &s : op_seq_to_job)
-    _job_to_op_seq.emplace(s.second, s.first);
+  for (const auto &s : op_to_job)
+    _job_to_op.emplace(s.second, s.first);
 
   _input_info = _initial_input_info;
 }
@@ -145,35 +141,38 @@ void DataflowExecutor::executeImpl()
   }
   assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
 
-  _subject.notifyModelBegin(this);
+  auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+  _subject.notifySubgraphBegin(profiling_subg_index);
 
   while (!_ready_jobs.empty())
   {
     auto job = std::move((_ready_jobs.begin())->second);
     _ready_jobs.erase(_ready_jobs.begin());
     auto job_index = job->index();
-    VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl;
+    VERBOSE(DataflowExecutor) << "Run job " << job_index << std::endl;
+
+    auto op_ind = _job_to_op[job_index];
+    const backend::Backend *backend = _lowered_graph->lower_info().operation.at(op_ind).backend();
 
-    auto op_seq_index = _job_to_op_seq[job_index];
-    auto op_seq = &_lowered_graph->op_seqs().at(op_seq_index);
-    const backend::Backend *backend =
-        _lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend();
+    _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
 
-    _subject.notifyJobBegin(this, op_seq, backend);
+    job->fn_seq()->initRunning();
 
     // check if FunctionSequence needs to handle dynamic tensor
-    bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
+    bool handle_dynamic_tensor =
+      _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
     job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
 
     job->run();
 
-    _subject.notifyJobEnd(this, op_seq, backend);
+    _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
     notify(job_index);
     _finished_jobs[job_index] = std::move(job);
   }
   assert(noWaitingJobs());
 
-  _subject.notifyModelEnd(this);
+  _subject.notifySubgraphEnd(profiling_subg_index);
 
   // Reset input info for the next execution
   _input_info = _initial_input_info;
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index 8d60e3e4b..1649be733 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -17,17 +17,17 @@
 #ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
 #define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
 
-#include <list>
-#include <map>
-#include <unordered_map>
-
-#include "exec/FunctionSequence.h"
+#include "ExecutorBase.h"
 #include "Job.h"
+
+#include "compiler/CodeMap.h"
 #include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
+#include "util/TracingCtx.h"
+
+#include <list>
+#include <map>
 #include <memory>
-#include "exec/ExecutorBase.h"
-#include "compiler/CodeMap.h"
+#include <unordered_map>
 
 namespace onert
 {
@@ -47,13 +47,12 @@ public:
    *
    * @param lowered_graph LoweredGraph object
    * @param tensor_builders Tensor builders that are currently used
-   * @param code_map OpSequence and its code map
+   * @param code_map @c ir::Operation and its code map
    */
   DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                   const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-                   const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                   const compiler::TensorRegistries &tensor_regs,
-                   backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
+                   backend::BackendContexts &&backend_contexts,
+                   const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+                   const util::TracingCtx *tracing_ctx);
 
   void executeImpl() override;
 
@@ -88,7 +87,7 @@ protected:
   std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs;
 
   /// @brief Which job runs which op and function.
-  std::unordered_map<uint32_t, ir::OpSequenceIndex> _job_to_op_seq;
+  std::unordered_map<uint32_t, ir::OperationIndex> _job_to_op;
 };
 
 } // namespace exec
diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInference.cc
deleted file mode 100644
index 70bddfce4..000000000
--- a/runtime/onert/core/src/exec/DynamicShapeInference.cc
+++ /dev/null
@@ -1,1015 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "exec/DynamicShapeInference.h"
-#include "util/ShapeInference.h"
-#include <assert.h>
-
-namespace onert
-{
-namespace exec
-{
-
-inline backend::IDynamicTensorManager *
-dynamicTensorManagerOf(const std::shared_ptr<backend::ITensor> &tensor)
-{
-  if (!tensor->dynamic_tensor_manager())
-    throw std::runtime_error{"Dynamic Tensor Manager is not available for this tensor."};
-  return tensor->dynamic_tensor_manager();
-}
-
-void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
-                                                   const ir::OperandIndex lhs_idx,
-                                                   const ir::OperandIndex rhs_idx)
-{
-  auto lhs = _tensor_registry->getITensor(lhs_idx);
-  auto lhs_shape = lhs->getShape();
-
-  auto rhs = _tensor_registry->getITensor(rhs_idx);
-  auto rhs_shape = rhs->getShape();
-
-  /*
-    Here, the state after compilation (satic shape inference) could be one of the following:
-
-              lhs       rhs              output     execution-time shape inf required
-      ------------------------------------------    ---------------------------------
-      case 1) static    static           static      X
-      case 2) one or both are dynamic    dynamic     O
-
-    Then nnfw_apply_tensorinf() could change one or both inputs dynamic.
-    So, in this method, we have one more state and we have to re-calculate shape for this shape.
-
-      case 3) one or both are dynamic    static      O
-
-    So, only when all inputs are static, we can skip dynamic shape inference.
-  */
-  if ((!lhs->is_dynamic()) && (!rhs->is_dynamic()))
-    return;
-
-  auto output_idx = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_idx);
-
-  ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
-
-  dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
-                                              const ir::OperandIndex input_ind)
-{
-  // check if input is not dynamic
-  auto input = _tensor_registry->getITensor(input_ind);
-  auto output_shape = input->getShape();
-
-  /*
-    Here, the state after compilation (satic shape inference) could be one of the following:
-
-              input      output    execution-time shape inf required
-      -------------------------    ---------------------------------
-      case 1) static     static      X
-      case 2) dynamic    dynamic     O
-
-    Then nnfw_apply_tensorinf() could change input dynamic.
-    So, in this method, we have one more state and we have to re-calculate shape for this shape.
-
-      case 3) dynamic    static      O
-
-    So, only when input is static, we can skip dynamic shape inference.
-  */
-  if (!input->is_dynamic())
-    return;
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-  auto input_shape = input->getShape();
-
-  if (!input->is_dynamic())
-    return;
-
-  const auto rank = input_shape.rank();
-  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
-  assert(0 <= axis && axis < rank);
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis, rank);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
-{
-  const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
-  const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
-  auto lhs = _tensor_registry->getITensor(lhs_index);
-  auto rhs = _tensor_registry->getITensor(rhs_index);
-
-  if (!lhs->is_dynamic() && !rhs->is_dynamic())
-    return;
-
-  const auto output_index = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_index);
-
-  auto lhs_shape = lhs->getShape();
-  auto rhs_shape = rhs->getShape();
-  // TODO
-
-  auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param());
-  dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
-                           op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
-{
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  auto input_idx = op.getInputs().at(ir::operation::BroadcastTo::INPUT);
-  auto input = _tensor_registry->getITensor(input_idx);
-
-  if ((!input->is_dynamic()) && (!output->is_dynamic()))
-    return;
-
-  auto shape_idx = op.getInputs().at(ir::operation::Tile::Input::MULTIPLES);
-  const auto &shape = _tensor_registry->getITensor(shape_idx);
-
-  assert(shape); // It shouldn't be 0.
-
-  auto output_shape = shape_inference::inferBroadcastToShape(
-      shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
-
-  // set output shape and output buffer
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
-                           op.getInputs().at(ir::operation::Comparison::Input::INPUT1));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Concat &op)
-{
-  /*
-    The state after compilation (satic shape inference) could be one of the following:
-
-              inputs                  output        execution-time shape inf required
-      ------------------------------------------    ---------------------------------
-      case 1) all static              static         X
-      case 2) at least on is dynamic  dynamic        O
-
-    Then nnfw_apply_tensorinf() could change one or both inputs dynamic.
-    So, in this method, we have one more state and we have to re-calculate shape for this shape.
-
-      case 3) at least on is dynamic  static         O
-
-    So, only when all inputs are static, we can skip dynamic shape inference.
-  */
-  bool all_static = true;
-  for (auto input_ind : op.getInputs())
-  {
-    auto input = _tensor_registry->getITensor(input_ind);
-    if (input->is_dynamic())
-    {
-      all_static = false;
-      break;
-    }
-  }
-
-  if (all_static)
-    return;
-
-  // sanity check
-  {
-    auto isConcatible = [](const backend::ITensor *input1, const backend::ITensor *input2,
-                           int32_t axis) {
-      if (input1->num_dimensions() != input2->num_dimensions())
-        return false;
-
-      for (size_t i = 0; i < input1->num_dimensions(); i++)
-      {
-        auto positive_axis = (axis >= 0) ? axis : axis + input1->num_dimensions();
-
-        if (i != positive_axis)
-          if (input1->dimension(i) != input2->dimension(i))
-            return false;
-      }
-
-      return true;
-    };
-
-    auto first_input_ind = op.getInputs().at(0);
-    auto first_input = _tensor_registry->getITensor(first_input_ind);
-
-    for (auto input_ind : op.getInputs())
-    {
-      auto input = _tensor_registry->getITensor(input_ind);
-      if (input != first_input && !isConcatible(first_input.get(), input.get(), op.param().axis))
-        throw std::runtime_error("input shapes does not matched for concat");
-    }
-  }
-
-  // getting output shape
-  onert::shape_inference::Shapes in_shapes;
-  for (auto input_ind : op.getInputs())
-  {
-    auto input = _tensor_registry->getITensor(input_ind);
-    ir::Shape shape = input->getShape();
-
-    in_shapes.emplace_back(shape);
-  }
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-  auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param());
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
-{
-  // check if input is not dynamic
-  auto input_ind = op.getInputs().at(ir::operation::Conv2D::INPUT);
-  auto input = _tensor_registry->getITensor(input_ind);
-
-  auto ker_ind = op.getInputs().at(ir::operation::Conv2D::KERNEL);
-  auto ker = _tensor_registry->getITensor(ker_ind);
-
-  if ((!input->is_dynamic()) && (!ker->is_dynamic()))
-    return;
-
-  ir::Shape input_shape = input->getShape();
-  ir::Shape ker_shape = ker->getShape();
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param());
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
-                           op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
-{
-  // check if input is not dynamic
-  auto input_ind = op.getInputs().at(ir::operation::ExpandDims::INPUT);
-  auto input = _tensor_registry->getITensor(input_ind);
-
-  // check if output is not dynamic, meaning when 1st input is static and 2nd input is const
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  /*
-    Here, the state after compilation (satic shape inference) could be one of the following:
-
-              input1     input2      output     execution-time shape inf required
-              -----------------------------     --------------------------------
-      case 1) static     const       static      X
-      case 2) static    placeholder  dynamic     O
-      case 3) dynamic    const       dynamic     O
-      case 4) dynamic   placeholder  dynamic     O
-
-    Then nnfw_apply_tensorinf() could change input dynamic.
-    So, in this method, we could have one more state and we have to re-calculate shape
-    for this shape.
-
-      case 5) dynamic    const       static       O
-
-    So, only when input1 and ouput are static, we can skip dynamic shape inference.
-  */
-  if ((!input->is_dynamic()) && (!output->is_dynamic()))
-    return;
-
-  ir::Shape input_shape = input->getShape();
-
-  auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS);
-  auto axis = _tensor_registry->getITensor(axis_ind);
-  auto axis_buf = reinterpret_cast<const int32_t *>(axis->buffer());
-  assert(axis_buf);
-
-  auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Fill &op)
-{
-  // check if output is not dynamic
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-  auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT);
-  auto input = _tensor_registry->getITensor(input_ind);
-  ir::Shape input_shape = input->getShape();
-
-  if ((!input->is_dynamic()) && (!output->is_dynamic()))
-    return;
-
-  assert(input.get()->data_type() == ir::DataType::INT32);
-
-  auto input_buf = reinterpret_cast<const int32_t *>(input->buffer());
-  assert(input_buf);
-
-  auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::FullyConnected &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-
-  const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
-  const auto &ker = _tensor_registry->getITensor(ker_idx);
-
-  if (!input->is_dynamic() && !ker->is_dynamic())
-    return;
-
-  auto input_shape = input->getShape();
-  auto ker_shape = ker->getShape();
-
-  ir::Shape new_shape = shape_inference::inferFullyConnectedShape(input_shape, ker_shape);
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Gather &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-  auto input_shape = input->getShape();
-
-  const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
-  const auto &indices = _tensor_registry->getITensor(indices_idx);
-  auto indices_shape = indices->getShape();
-
-  if (!(input->is_dynamic()) && !(indices->is_dynamic()))
-    return;
-
-  const auto rank = input_shape.rank();
-  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
-  assert(0 <= axis && axis < rank);
-
-  ir::Shape new_shape = shape_inference::inferGatherShape(input_shape, indices_shape, axis, rank);
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
-{
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  auto indices_ind = op.getInputs().at(ir::operation::OneHot::INDICES);
-  const auto &indices = _tensor_registry->getITensor(indices_ind);
-  auto indices_shape = indices->getShape();
-
-  auto depth_ind = op.getInputs().at(ir::operation::OneHot::DEPTH);
-  const auto &depth = _tensor_registry->getITensor(depth_ind);
-
-  if (!indices->is_dynamic() && !depth->is_dynamic())
-  {
-    return;
-  }
-
-  int32_t *depth_buf = reinterpret_cast<int32_t *>(depth->buffer());
-  assert(depth_buf);
-  const auto axis_val = op.param().axis;
-
-  ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val);
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Pack &op)
-{
-  bool is_any_of_inputs_dynamic = [&]() -> bool {
-    for (uint32_t i = 0; i < op.getInputs().size(); ++i)
-    {
-      const auto &input = _tensor_registry->getITensor(op.getInputs().at(i));
-      if (input->is_dynamic())
-      {
-        return true;
-      }
-    }
-    return false;
-  }();
-
-  const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-  auto input_shape = input->getShape();
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  if (!is_any_of_inputs_dynamic && !output->is_dynamic())
-    return;
-
-  const auto rank = input_shape.rank() + 1;
-  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-  const auto num = op.param().num;
-
-  assert(0 <= axis && axis < rank);
-
-  ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Pad &op)
-{
-  // check if output is not dynamic
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  auto input_ind = op.getInputs().at(ir::operation::Pad::Input::INPUT);
-  auto input = _tensor_registry->getITensor(input_ind);
-
-  auto pad_ind = op.getInputs().at(ir::operation::Pad::Input::PAD);
-  auto pad = _tensor_registry->getITensor(pad_ind);
-
-  // check if input and output are not dynamic
-  if ((!input->is_dynamic()) && (!output->is_dynamic()))
-    return;
-
-  int32_t *pad_buf = reinterpret_cast<int32_t *>(pad->buffer());
-  assert(pad_buf);
-
-  auto output_shape =
-      shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
-
-  // change output shape and reallocate output tensor memory
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Permute & /* op */)
-{
-  // NOTE Permute is a special operation which does not do shape inference before the actual
-  // function(kernel) execution. Shape inference and output allocation will be done in the kernel
-  // on-the-fly, as it must support inter-backend inference/allocation.
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Pow &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS),
-                           op.getInputs().at(ir::operation::Pow::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Range &op)
-{
-  // check if output is not dynamic
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  // from op, access the buffer of second input to read new shape
-  auto start_idx = op.getInputs().at(ir::operation::Range::Input::START);
-  auto start_tensor = _tensor_registry->getITensor(start_idx);
-
-  auto limit_idx = op.getInputs().at(ir::operation::Range::Input::LIMIT);
-  auto limit_tensor = _tensor_registry->getITensor(limit_idx);
-
-  auto delta_idx = op.getInputs().at(ir::operation::Range::Input::DELTA);
-  auto delta_tensor = _tensor_registry->getITensor(delta_idx);
-
-  if (!start_tensor->is_dynamic() && !limit_tensor->is_dynamic() && !delta_tensor->is_dynamic() &&
-      !output->is_dynamic())
-    return;
-
-  ir::Shape new_shape;
-  if (output->data_type() == ir::DataType::FLOAT32)
-  {
-    new_shape =
-        shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
-                                                *reinterpret_cast<float *>(limit_tensor->buffer()),
-                                                *reinterpret_cast<float *>(delta_tensor->buffer()));
-  }
-  else if (output->data_type() == ir::DataType::INT32)
-  {
-    new_shape = shape_inference::inferRangeShape<int32_t>(
-        *reinterpret_cast<int32_t *>(start_tensor->buffer()),
-        *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
-        *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
-  }
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Reduce &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-  auto input_shape = input->getShape();
-
-  const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
-  const auto &axes = _tensor_registry->getITensor(axes_idx);
-
-  if (!input->is_dynamic())
-    return;
-
-  std::vector<int32_t> axes_vec;
-  for (uint32_t i = 0; i < axes->getShape().num_elements(); ++i)
-  {
-    const auto buffer = axes->buffer() + axes->calcOffset({i});
-    switch (axes->data_type())
-    {
-      case ir::DataType::INT32:
-      {
-        axes_vec.emplace_back(*reinterpret_cast<const int32_t *>(buffer));
-        break;
-      }
-      case ir::DataType::INT64:
-      {
-        axes_vec.emplace_back(*reinterpret_cast<const int64_t *>(buffer));
-        break;
-      }
-      default:
-        throw std::runtime_error("DynamicShapeInferer " + op.name() + ": Not supported data type");
-        break;
-    }
-  }
-  const auto keep_dims = op.param().keep_dims;
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
-{
-  // check if output is not dynamic
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
-  auto input = _tensor_registry->getITensor(input_ind);
-
-  /*
-    Here, the state after compilation (satic shape inference) could be one of the following:
-
-              input1   input2 (or option)   output     execution-time shape inf required
-              ------------------------------------     --------------------------------
-      case 1) static         const          static       X
-      case 2) static      placeholder       dynamic      O
-      case 3) dynamic        const          dynamic      O
-      case 4) dynamic     placeholder       dynamic      O
-
-    Then nnfw_apply_tensorinf() could change input dynamic.
-    So, in this method, we could have one more state and we have to re-calculate shape
-    for this shape.
-
-      case 5) dynamic    const       static       O
-
-    So, only when both input1 and ouput are static, we can skip dynamic shape inference.
-  */
-  if ((!input->is_dynamic()) && (!output->is_dynamic()))
-    return;
-
-  // New shape is given by second input tensor
-  if (op.getInputs().size() == 2)
-  {
-    // from op, access the buffer of second input to read new shape
-    auto new_shape_ind = op.getInputs().at(ir::operation::Reshape::Input::SHAPE);
-
-    // getting output shape by reading new_shape tensor buffer
-    auto new_shape = _tensor_registry->getITensor(new_shape_ind);
-    assert(new_shape);
-
-    int32_t *new_shape_buf = reinterpret_cast<int32_t *>(new_shape->buffer());
-    assert(new_shape_buf);
-
-    auto output_shape = shape_inference::inferReshapeShape(
-        new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
-
-    // if shape is changed, change output shape and reallocate output tensor memory
-    if (output_shape != output->getShape() || output->buffer() == nullptr)
-    {
-      // change on output shape
-      dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-    }
-    assert(output->buffer() != nullptr);
-  }
-  // New shape is given by option
-  else if (op.param().new_shape.size() != 0)
-  {
-    // Let's check the new_shape option
-    auto shape = op.param().new_shape;
-    auto output_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(),
-                                                           input->getShape().num_elements());
-
-    // if shape is changed, change output shape and reallocate output tensor memory
-    if (output_shape != output->getShape() || output->buffer() == nullptr)
-    {
-      // change on output shape
-      dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-    }
-    assert(output->buffer() != nullptr);
-  }
-  else
-  {
-    throw std::runtime_error("Reshape: new shape is missing");
-    return;
-  }
-}
-
-void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
-{
-  // check if output is not dynamic
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
-  auto input = _tensor_registry->getITensor(input_ind);
-
-  if ((!input->is_dynamic()) && (!output->is_dynamic()))
-    return;
-
-  // getting output shape from input shape and Params
-  auto output_shape = shape_inference::inferResizeBilinearShape(
-      input->getShape(), op.param().height_out, op.param().width_out);
-
-  // if shape is changed, change output shape and reallocate output tensor memory
-  if (output_shape != output->getShape() || output->buffer() == nullptr)
-  {
-    // change on output shape
-    dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  }
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Select &op)
-{
-  const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
-  const auto &input_cond = _tensor_registry->getITensor(input_cond_idx);
-
-  const auto input_true_idx = op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE);
-  const auto &input_true = _tensor_registry->getITensor(input_true_idx);
-
-  const auto input_false_idx = op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE);
-  const auto &input_false = _tensor_registry->getITensor(input_false_idx);
-
-  if ((!input_cond->is_dynamic()) && (!input_true->is_dynamic()) && (!input_false->is_dynamic()))
-  {
-    return;
-  }
-
-  auto input_cond_shape = input_cond->getShape();
-  auto input_true_shape = input_true->getShape();
-  auto input_false_shape = input_false->getShape();
-
-  // Select output shpae
-  ir::Shape new_shape =
-      shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Shape &op)
-{
-  const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-  auto input_shape = input->getShape();
-
-  if (!input->is_dynamic())
-    return;
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  ir::Shape output_shape;
-  output_shape.append(input_shape.rank());
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Slice &op)
-{
-  const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
-  const auto input = _tensor_registry->getITensor(input_index);
-  const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
-  const auto begins = _tensor_registry->getITensor(begins_index);
-  const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
-  const auto sizes = _tensor_registry->getITensor(sizes_index);
-  auto output_index = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_index);
-
-  if (!(input->is_dynamic() || begins->is_dynamic() || sizes->is_dynamic() || output->is_dynamic()))
-  {
-    return;
-  }
-
-  ir::Shape input_shape = input->getShape();
-  auto begins_buf = reinterpret_cast<const int32_t *>(begins->buffer());
-  auto sizes_buf = reinterpret_cast<const int32_t *>(sizes->buffer());
-
-  ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf);
-
-  dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Softmax &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
-  const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
-  const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-  auto output_idx{op.getOutputs().at(0)};
-
-  const auto &input = _tensor_registry->getITensor(input_idx);
-  const auto &block_shape = _tensor_registry->getITensor(block_shape_idx);
-  const auto &padding = _tensor_registry->getITensor(padding_idx);
-  auto output = _tensor_registry->getITensor(output_idx);
-
-  if (!(input->is_dynamic() || block_shape->is_dynamic() || padding->is_dynamic() ||
-        output->is_dynamic()))
-  {
-    return;
-  }
-
-  auto input_shape = input->getShape();
-  auto block_shape_shape = block_shape->getShape();
-  auto padding_shape = padding->getShape();
-
-  auto block_shape_data = reinterpret_cast<int32_t *>(block_shape->buffer());
-  auto padding_data = reinterpret_cast<int32_t *>(padding->buffer());
-
-  ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
-      input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
-
-  dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Split &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-
-  if (!input->is_dynamic())
-  {
-    return;
-  }
-
-  auto input_shape = input->getShape();
-
-  const auto axis = op.param().axis;
-  const auto num_splits = op.param().num_splits;
-  const auto rank = input_shape.rank();
-  auto axis_resolved = axis < 0 ? axis + rank : axis;
-
-  assert(0 <= axis_resolved && axis_resolved < rank);
-
-  ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_resolved, num_splits);
-  for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
-  {
-    auto output_ind = op.getOutputs().at(out_tensor_idx);
-    auto output = _tensor_registry->getITensor(output_ind);
-
-    dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-    assert(output->buffer() != nullptr);
-  }
-}
-
-void DynamicShapeInferer::visit(const ir::operation::SquaredDifference &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS),
-                           op.getInputs().at(ir::operation::SquaredDifference::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Squeeze &op)
-{
-  const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-
-  if (!input->is_dynamic())
-  {
-    return;
-  }
-
-  auto input_shape = input->getShape();
-
-  // Squeeze output shpae
-  ir::Shape new_shape = shape_inference::inferSqueezeShape(input_shape, op.param());
-
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
-{
-
-  const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
-  auto input = _tensor_registry->getITensor(input_index);
-  ir::Shape input_shape = input->getShape();
-
-  const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
-  auto starts = _tensor_registry->getITensor(starts_index);
-
-  const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
-  auto ends = _tensor_registry->getITensor(ends_index);
-
-  const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-  auto strides = _tensor_registry->getITensor(strides_index);
-
-  if (!(input->is_dynamic() || starts->is_dynamic() || ends->is_dynamic() || strides->is_dynamic()))
-  {
-    return;
-  }
-
-  const auto begin_mask = op.param().begin_mask;
-  const auto end_mask = op.param().end_mask;
-  const auto shrink_axis_mask = op.param().shrink_axis_mask;
-  const auto rank = input_shape.rank();
-
-  auto op_params = shape_inference::buildStridedSliceParams(
-      reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
-      reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask,
-      rank);
-
-  auto output_index = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_index);
-
-  ir::Shape output_shape =
-      onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
-
-  dynamicTensorManagerOf(output)->applyShape(output_index, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Tile &op)
-{
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  auto input_idx = op.getInputs().at(ir::operation::Tile::Input::INPUT);
-  auto input = _tensor_registry->getITensor(input_idx);
-
-  auto multiplier_idx = op.getInputs().at(ir::operation::Tile::Input::MULTIPLES);
-  auto multiplier = _tensor_registry->getITensor(multiplier_idx);
-
-  if ((!input->is_dynamic()) && (!output->is_dynamic()))
-    return;
-
-  auto input_shape = input->getShape();
-  auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer());
-  assert(multiplier_buffer);
-
-  auto output_shape = shape_inference::inferTileShape(input_shape, multiplier_buffer);
-
-  // set output shape and output buffer
-  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
-{
-  // check if output is not dynamic
-  auto output_ind = op.getOutputs().at(0);
-  auto output = _tensor_registry->getITensor(output_ind);
-
-  // from op, access the buffer of second input to read new shape
-  auto input_ind = op.getInputs().at(ir::operation::Transpose::Input::INPUT);
-  auto input_tensor = _tensor_registry->getITensor(input_ind);
-  auto input_shape = input_tensor->getShape();
-
-  if (!input_tensor->is_dynamic())
-    return;
-
-  const auto perm{op.param().perm};
-  // set output shape, based on input and params
-  ir::Shape new_shape = shape_inference::inferTransposeShape(input_shape, perm);
-
-  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-  assert(output->buffer() != nullptr);
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
-{
-  // check if output is not dynamic
-  const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _tensor_registry->getITensor(input_idx);
-
-  if (!input->is_dynamic())
-    return;
-
-  auto input_shape = input->getShape();
-
-  const auto rank = input_shape.rank();
-  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-  const auto num = op.param().num;
-
-  assert(0 <= axis && axis < rank);
-
-  ir::Shape new_shape = shape_inference::inferUnpackShape(input_shape, axis, rank);
-
-  for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
-  {
-    auto output_ind = op.getOutputs().at(out_tensor_idx);
-    auto output = _tensor_registry->getITensor(output_ind);
-
-    dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
-
-    assert(output->buffer() != nullptr);
-  }
-}
-
-} // namespace exec
-} // namespace onert
diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
new file mode 100644
index 000000000..4cbf2fe64
--- /dev/null
+++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
@@ -0,0 +1,1258 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/DynamicShapeInferer.h"
+#include "util/ShapeInference.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace exec
+{
+
+void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
+                                                   const ir::OperandIndex lhs_idx,
+                                                   const ir::OperandIndex rhs_idx)
+{
+  auto lhs = _tensor_registry->getITensor(lhs_idx);
+  auto lhs_shape = lhs->getShape();
+
+  auto rhs = _tensor_registry->getITensor(rhs_idx);
+  auto rhs_shape = rhs->getShape();
+
+  /*
+    Here, the state after compilation (satic shape inference) could be one of the following:
+
+              lhs       rhs              output     execution-time shape inf required
+      ------------------------------------------    ---------------------------------
+      case 1) static    static           static      X
+      case 2) one or both are dynamic    dynamic     O
+
+    Then nnfw_apply_tensorinf() could change one or both inputs dynamic.
+    So, in this method, we have one more state and we have to re-calculate shape for this shape.
+
+      case 3) one or both are dynamic    static      O
+
+    So, only when all inputs are static, we can skip dynamic shape inference.
+  */
+  auto output_idx = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_idx);
+
+  if ((currently_static(lhs) && currently_static(rhs)) && previously_static(output))
+    return;
+
+  ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
+                                              const ir::OperandIndex input_ind)
+{
+  // check if input is not dynamic
+  auto input = _tensor_registry->getITensor(input_ind);
+  auto output_shape = input->getShape();
+
+  /*
+    Here, the state after compilation (satic shape inference) could be one of the following:
+
+              input      output    execution-time shape inf required
+      -------------------------    ---------------------------------
+      case 1) static     static      X
+      case 2) dynamic    dynamic     O
+
+    Then nnfw_apply_tensorinf() could change input dynamic.
+    So, in this method, we have one more state and we have to re-calculate shape for this shape.
+
+      case 3) dynamic    static      O
+
+    So, only when input is static, we can skip dynamic shape inference.
+  */
+  if (!input->is_dynamic())
+    return;
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto input = _tensor_registry->getITensor(input_idx);
+
+  const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
+  const auto axis = _tensor_registry->getITensor(axis_idx);
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  if (!input->is_dynamic() && !output->is_dynamic())
+    return;
+
+  auto input_shape = input->getShape();
+  auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
+  const auto rank = input_shape.rank();
+  axis_value = axis_value < 0 ? axis_value + rank : axis_value;
+
+  ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
+{
+  const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
+  const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
+  auto lhs = _tensor_registry->getITensor(lhs_index);
+  auto rhs = _tensor_registry->getITensor(rhs_index);
+
+  if (!lhs->is_dynamic() && !rhs->is_dynamic())
+    return;
+
+  const auto output_index = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_index);
+
+  auto lhs_shape = lhs->getShape();
+  auto rhs_shape = rhs->getShape();
+  // TODO
+
+  auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param());
+  output->applyShape(new_shape);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+
+  const auto cluster_idx{
+    op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+  const auto &cluster = _tensor_registry->getITensor(cluster_idx);
+  assert(cluster->is_constant());
+
+  if (!input->is_dynamic())
+    return;
+
+  auto input_shape = input->getShape();
+  auto cluster_shape = cluster->getShape();
+
+  auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
+  assert(cluster_buf);
+
+  ir::Shape new_shape =
+    shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BCQGather &op)
+{
+  const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+  const auto &indices = _tensor_registry->getITensor(indices_idx);
+
+  const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
+  const auto &input_binary = _tensor_registry->getITensor(input_binary_idx);
+
+  const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+  const auto &cluster = _tensor_registry->getITensor(cluster_idx);
+  assert(cluster->is_constant());
+
+  if (!indices->is_dynamic())
+    return;
+
+  auto indices_shape = indices->getShape();
+  auto cluster_shape = cluster->getShape();
+  auto rank = input_binary->getShape().rank();
+
+  auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
+  assert(cluster_buf);
+
+  ir::Shape new_shape = shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+                                                             cluster_buf, rank, op.param());
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+                           op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
+{
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  auto input_idx = op.getInputs().at(ir::operation::BroadcastTo::INPUT);
+  auto input = _tensor_registry->getITensor(input_idx);
+
+  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  auto shape_idx = op.getInputs().at(ir::operation::Tile::Input::MULTIPLES);
+  const auto &shape = _tensor_registry->getITensor(shape_idx);
+
+  assert(shape); // It shouldn't be 0.
+
+  auto output_shape = shape_inference::inferBroadcastToShape(
+    shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
+
+  // set output shape and output buffer
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
+                           op.getInputs().at(ir::operation::Comparison::Input::INPUT1));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Concat &op)
+{
+  /*
+    The state after compilation (satic shape inference) could be one of the following:
+
+              inputs                  output        execution-time shape inf required
+      ------------------------------------------    ---------------------------------
+      case 1) all static              static         X
+      case 2) at least on is dynamic  dynamic        O
+
+    Then nnfw_apply_tensorinf() could change one or both inputs dynamic.
+    So, in this method, we have one more state and we have to re-calculate shape for this shape.
+
+      case 3) at least on is dynamic  static         O
+
+    So, only when all inputs are static, we can skip dynamic shape inference.
+  */
+  bool all_static = true;
+  for (auto &&input_ind : op.getInputs())
+  {
+    auto input = _tensor_registry->getITensor(input_ind);
+    if (input->is_dynamic())
+    {
+      all_static = false;
+      break;
+    }
+  }
+
+  if (all_static)
+    return;
+
+  // sanity check
+  {
+    auto isConcatible = [](const backend::ITensor *input1, const backend::ITensor *input2,
+                           int32_t axis) {
+      auto shape1 = input1->getShape();
+      auto shape2 = input2->getShape();
+      if (shape1.rank() != shape2.rank())
+        return false;
+
+      for (int i = 0; i < shape1.rank(); i++)
+      {
+        auto positive_axis = (axis >= 0) ? axis : axis + input1->getShape().rank();
+
+        if (i != positive_axis)
+          if (shape1.dim(i) != shape2.dim(i))
+            return false;
+      }
+
+      return true;
+    };
+
+    auto first_input_ind = op.getInputs().at(0);
+    auto first_input = _tensor_registry->getITensor(first_input_ind);
+
+    for (auto &&input_ind : op.getInputs())
+    {
+      auto input = _tensor_registry->getITensor(input_ind);
+      if (input != first_input && !isConcatible(first_input, input, op.param().axis))
+        throw std::runtime_error("input shapes does not matched for concat");
+    }
+  }
+
+  // getting output shape
+  onert::shape_inference::Shapes in_shapes;
+  for (auto &&input_ind : op.getInputs())
+  {
+    auto input = _tensor_registry->getITensor(input_ind);
+    ir::Shape shape = input->getShape();
+
+    in_shapes.emplace_back(shape);
+  }
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+  auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param());
+
+  output->applyShape(output_shape);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
+{
+  // check if input is not dynamic
+  auto input_ind = op.getInputs().at(ir::operation::Conv2D::INPUT);
+  auto input = _tensor_registry->getITensor(input_ind);
+
+  auto ker_ind = op.getInputs().at(ir::operation::Conv2D::KERNEL);
+  auto ker = _tensor_registry->getITensor(ker_ind);
+
+  if ((!input->is_dynamic()) && (!ker->is_dynamic()))
+    return;
+
+  ir::Shape input_shape = input->getShape();
+  ir::Shape ker_shape = ker->getShape();
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param());
+
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+                           op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
+{
+  // check if input is not dynamic
+  auto input_ind = op.getInputs().at(ir::operation::ExpandDims::INPUT);
+  auto input = _tensor_registry->getITensor(input_ind);
+
+  // check if output is not dynamic, meaning when 1st input is static and 2nd input is const
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  /*
+    Here, the state after compilation (satic shape inference) could be one of the following:
+
+              input1     input2      output     execution-time shape inf required
+              -----------------------------     --------------------------------
+      case 1) static     const       static      X
+      case 2) static    placeholder  dynamic     O
+      case 3) dynamic    const       dynamic     O
+      case 4) dynamic   placeholder  dynamic     O
+
+    Then nnfw_apply_tensorinf() could change input dynamic.
+    So, in this method, we could have one more state and we have to re-calculate shape
+    for this shape.
+
+      case 5) dynamic    const       static       O
+
+    So, only when input1 and ouput are static, we can skip dynamic shape inference.
+  */
+  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  ir::Shape input_shape = input->getShape();
+
+  auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS);
+  auto axis = _tensor_registry->getITensor(axis_ind);
+  auto axis_type = axis->data_type();
+  assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
+
+  assert(axis->buffer());
+  int32_t axis_value =
+    (axis_type == ir::DataType::INT32)
+      ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
+      : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
+
+  auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value);
+
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Fill &op)
+{
+  // check if output is not dynamic
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+  auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE);
+  auto shape = _tensor_registry->getITensor(shape_ind);
+
+  if ((!shape->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  const auto dims_type = shape->data_type();
+  assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
+
+  auto dims_buf = shape->buffer();
+  assert(dims_buf);
+
+  const auto &dims_shape = shape->getShape();
+  const auto &output_shape = ((dims_type == ir::DataType::INT32)
+                                ? shape_inference::inferFillShape<int32_t>(
+                                    dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+                                : shape_inference::inferFillShape<int64_t>(
+                                    dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
+
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::FullyConnected &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+
+  const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
+  const auto &ker = _tensor_registry->getITensor(ker_idx);
+
+  if (!input->is_dynamic() && !ker->is_dynamic())
+    return;
+
+  auto input_shape = input->getShape();
+  auto ker_shape = ker->getShape();
+
+  ir::Shape new_shape = shape_inference::inferFullyConnectedShape(input_shape, ker_shape);
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Gather &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+  auto input_shape = input->getShape();
+
+  const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
+  const auto &indices = _tensor_registry->getITensor(indices_idx);
+  auto indices_shape = indices->getShape();
+
+  if (!(input->is_dynamic()) && !(indices->is_dynamic()))
+    return;
+
+  const auto rank = input_shape.rank();
+  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+
+  assert(0 <= axis && axis < rank);
+
+  ir::Shape new_shape = shape_inference::inferGatherShape(input_shape, indices_shape, axis, rank);
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::LSTM &op)
+{
+  const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+  auto output = _tensor_registry->getITensor(output_index);
+
+  const auto output_state_out_index{
+    op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+
+  const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+
+  const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+
+  if (!output->is_dynamic() &&
+      !(_tensor_registry->getITensor(output_state_out_index) != nullptr &&
+        _tensor_registry->getITensor(output_state_out_index)->is_dynamic()) &&
+      !(_tensor_registry->getITensor(cell_state_out_index) != nullptr &&
+        _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()) &&
+      !(_tensor_registry->getITensor(scratch_buffer_index) != nullptr &&
+        _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()))
+    return;
+
+  const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+  const auto input = _tensor_registry->getITensor(input_index);
+  const auto input_shape = input->getShape();
+
+  const auto input_to_output_weights_index{
+    op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+  const auto input_to_output_weights = _tensor_registry->getITensor(input_to_output_weights_index);
+  const auto input_to_output_weights_shape = input_to_output_weights->getShape();
+
+  const auto recurrent_to_output_weights_index{
+    op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+  const auto recurrent_to_output_weights =
+    _tensor_registry->getITensor(recurrent_to_output_weights_index);
+  const auto recurrent_to_output_weights_shape = recurrent_to_output_weights->getShape();
+
+  // re-sizing outputs
+  const int n_batch =
+    (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
+  const int n_cell = input_to_output_weights_shape.dim(0);
+  const int n_output = recurrent_to_output_weights_shape.dim(1);
+  if (input_shape.rank() == 3)
+  {
+    if (op.param().time_major)
+      output->applyShape(ir::Shape{input_shape.dim(0), n_batch, n_output});
+    else
+      output->applyShape(ir::Shape{n_batch, input_shape.dim(1), n_output});
+  }
+  else
+  {
+    assert(input_shape.rank() == 2);
+    output->applyShape(ir::Shape{n_batch, n_output});
+  }
+  assert(output->buffer() != nullptr);
+
+  auto output_state_out = _tensor_registry->getITensor(output_state_out_index);
+  if (output_state_out != nullptr)
+  {
+    output_state_out->applyShape(ir::Shape{n_batch, n_output});
+    assert(output_state_out->buffer() != nullptr);
+  }
+
+  auto cell_state_out = _tensor_registry->getITensor(cell_state_out_index);
+  if (cell_state_out != nullptr)
+  {
+    cell_state_out->applyShape(ir::Shape{n_batch, n_cell});
+    assert(cell_state_out->buffer() != nullptr);
+  }
+
+  auto scratch_buffer = _tensor_registry->getITensor(scratch_buffer_index);
+  if (scratch_buffer != nullptr)
+  {
+    const auto input_to_input_weights_index{
+      op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+    const auto recurrent_to_input_weights_index{
+      op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+
+    const auto input_to_input_weights_shape =
+      _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
+    bool has_input_to_input_weights =
+      input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
+
+    const auto recurrent_to_input_weights_shape =
+      _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
+    bool has_recurrent_to_input_weights =
+      recurrent_to_input_weights_shape.dim(0) != 0 && recurrent_to_input_weights_shape.dim(1) != 0;
+
+    // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+    // true: no CIFG
+    // false: CIFG
+    bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+    if (has_cifg_param)
+    {
+      scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 4});
+    }
+    else
+    {
+      scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 3});
+    }
+    assert(scratch_buffer->buffer() != nullptr);
+  }
+}
+
+void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::DetectionPostProcess & /* op */)
+{
+  // NOTE DetectionPostProcess's undefined outputs' shape are decided on compile time
+  //      by static shape inferer.
+  //      DetectionPostProcess's outputs' shape are independent with input shape
+  //      and decided by parameter value.
+}
+
+void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
+{
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  auto indices_ind = op.getInputs().at(ir::operation::OneHot::INDICES);
+  const auto &indices = _tensor_registry->getITensor(indices_ind);
+  auto indices_shape = indices->getShape();
+
+  auto depth_ind = op.getInputs().at(ir::operation::OneHot::DEPTH);
+  const auto &depth = _tensor_registry->getITensor(depth_ind);
+
+  if (!indices->is_dynamic() && !depth->is_dynamic())
+  {
+    return;
+  }
+
+  int32_t *depth_buf = reinterpret_cast<int32_t *>(depth->buffer());
+  assert(depth_buf);
+  const auto axis_val = op.param().axis;
+
+  ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val);
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Pack &op)
+{
+  bool is_any_of_inputs_dynamic = [&]() -> bool {
+    for (uint32_t i = 0; i < op.getInputs().size(); ++i)
+    {
+      const auto &input = _tensor_registry->getITensor(op.getInputs().at(i));
+      if (input->is_dynamic())
+      {
+        return true;
+      }
+    }
+    return false;
+  }();
+
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+  auto input_shape = input->getShape();
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  if (!is_any_of_inputs_dynamic && !output->is_dynamic())
+    return;
+
+  const auto rank = input_shape.rank() + 1;
+  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+  const auto num = op.param().num;
+
+  assert(0 <= axis && axis < rank);
+
+  ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Pad &op)
+{
+  // check if output is not dynamic
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  auto input_ind = op.getInputs().at(ir::operation::Pad::Input::INPUT);
+  auto input = _tensor_registry->getITensor(input_ind);
+
+  auto pad_ind = op.getInputs().at(ir::operation::Pad::Input::PAD);
+  auto pad = _tensor_registry->getITensor(pad_ind);
+
+  // check if input and output are not dynamic
+  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  int32_t *pad_buf = reinterpret_cast<int32_t *>(pad->buffer());
+  assert(pad_buf);
+
+  auto output_shape =
+    shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
+
+  // change output shape and reallocate output tensor memory
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Permute & /* op */)
+{
+  // NOTE Permute is a special operation which does not do shape inference before the actual
+  // function(kernel) execution. Shape inference and output allocation will be done in the kernel
+  // on-the-fly, as it must support inter-backend inference/allocation.
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Pow &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS),
+                           op.getInputs().at(ir::operation::Pow::Input::RHS));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Range &op)
+{
+  // check if output is not dynamic
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  // from op, access the buffer of second input to read new shape
+  auto start_idx = op.getInputs().at(ir::operation::Range::Input::START);
+  auto start_tensor = _tensor_registry->getITensor(start_idx);
+
+  auto limit_idx = op.getInputs().at(ir::operation::Range::Input::LIMIT);
+  auto limit_tensor = _tensor_registry->getITensor(limit_idx);
+
+  auto delta_idx = op.getInputs().at(ir::operation::Range::Input::DELTA);
+  auto delta_tensor = _tensor_registry->getITensor(delta_idx);
+
+  if (!start_tensor->is_dynamic() && !limit_tensor->is_dynamic() && !delta_tensor->is_dynamic() &&
+      !output->is_dynamic())
+    return;
+
+  ir::Shape new_shape;
+  if (output->data_type() == ir::DataType::FLOAT32)
+  {
+    new_shape =
+      shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
+                                              *reinterpret_cast<float *>(limit_tensor->buffer()),
+                                              *reinterpret_cast<float *>(delta_tensor->buffer()));
+  }
+  else if (output->data_type() == ir::DataType::INT32)
+  {
+    new_shape = shape_inference::inferRangeShape<int32_t>(
+      *reinterpret_cast<int32_t *>(start_tensor->buffer()),
+      *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
+      *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
+  }
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Reduce &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+  auto input_shape = input->getShape();
+
+  const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
+  const auto &axes = _tensor_registry->getITensor(axes_idx);
+
+  if (!input->is_dynamic())
+    return;
+
+  std::vector<int32_t> axes_vec;
+  for (uint32_t i = 0; i < axes->getShape().num_elements(); ++i)
+  {
+    const auto buffer = axes->buffer() + axes->calcOffset({i});
+    switch (axes->data_type())
+    {
+      case ir::DataType::INT32:
+      {
+        axes_vec.emplace_back(*reinterpret_cast<const int32_t *>(buffer));
+        break;
+      }
+      case ir::DataType::INT64:
+      {
+        axes_vec.emplace_back(*reinterpret_cast<const int64_t *>(buffer));
+        break;
+      }
+      default:
+        throw std::runtime_error("DynamicShapeInferer " + op.name() + ": Not supported data type");
+        break;
+    }
+  }
+  const auto keep_dims = op.param().keep_dims;
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
+{
+  // check if output is not dynamic
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
+  auto input = _tensor_registry->getITensor(input_ind);
+
+  /*
+    Here, the state after compilation (satic shape inference) could be one of the following:
+
+              input1   input2 (or option)   output     execution-time shape inf required
+              ------------------------------------     --------------------------------
+      case 1) static         const          static       X
+      case 2) static      placeholder       dynamic      O
+      case 3) dynamic        const          dynamic      O
+      case 4) dynamic     placeholder       dynamic      O
+
+    Then nnfw_apply_tensorinf() could change input dynamic.
+    So, in this method, we could have one more state and we have to re-calculate shape
+    for this shape.
+
+      case 5) dynamic    const       static       O
+
+    So, only when both input1 and ouput are static, we can skip dynamic shape inference.
+  */
+  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  // New shape is given by second input tensor
+  if (op.getInputs().size() == 2)
+  {
+    // from op, access the buffer of second input to read new shape
+    auto new_shape_ind = op.getInputs().at(ir::operation::Reshape::Input::SHAPE);
+
+    // getting output shape by reading new_shape tensor buffer
+    auto new_shape = _tensor_registry->getITensor(new_shape_ind);
+    assert(new_shape);
+
+    int32_t *new_shape_buf = reinterpret_cast<int32_t *>(new_shape->buffer());
+    assert(new_shape_buf);
+
+    auto output_shape = shape_inference::inferReshapeShape(
+      new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
+
+    // if shape is changed, change output shape and reallocate output tensor memory
+    if (output_shape != output->getShape() || output->buffer() == nullptr)
+    {
+      // change on output shape
+      output->applyShape(output_shape);
+    }
+    assert(output->buffer() != nullptr);
+  }
+  // New shape is given by option
+  else if (op.param().new_shape.size() != 0)
+  {
+    // Let's check the new_shape option
+    auto shape = op.param().new_shape;
+    auto output_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(),
+                                                           input->getShape().num_elements());
+
+    // if shape is changed, change output shape and reallocate output tensor memory
+    if (output_shape != output->getShape() || output->buffer() == nullptr)
+    {
+      // change on output shape
+      output->applyShape(output_shape);
+    }
+    assert(output->buffer() != nullptr);
+  }
+  else
+  {
+    throw std::runtime_error("Reshape: new shape is missing");
+    return;
+  }
+}
+
+void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
+{
+  // check if output is not dynamic
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
+  auto input = _tensor_registry->getITensor(input_ind);
+
+  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  // getting output shape from input shape and Params
+  int32_t height_out, width_out;
+  if (op.getInputs().size() == 2)
+  {
+    auto size_ind = op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE);
+    auto size = _tensor_registry->getITensor(size_ind);
+    if (size->data_type() == ir::DataType::INT32)
+    {
+      auto size_buf = reinterpret_cast<const int32_t *>(size->buffer());
+      height_out = size_buf[0];
+      width_out = size_buf[1];
+    }
+    else
+    {
+      throw std::runtime_error("DynamicShapeInferer ResizeBilinear : Unsupported data type");
+    }
+  }
+  else
+  {
+    height_out = op.param().height_out;
+    width_out = op.param().width_out;
+  }
+  auto output_shape =
+    shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
+
+  // if shape is changed, change output shape and reallocate output tensor memory
+  if (output_shape != output->getShape() || output->buffer() == nullptr)
+  {
+    // change on output shape
+    output->applyShape(output_shape);
+  }
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Select &op)
+{
+  const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
+  const auto &input_cond = _tensor_registry->getITensor(input_cond_idx);
+
+  const auto input_true_idx = op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE);
+  const auto &input_true = _tensor_registry->getITensor(input_true_idx);
+
+  const auto input_false_idx = op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE);
+  const auto &input_false = _tensor_registry->getITensor(input_false_idx);
+
+  if ((!input_cond->is_dynamic()) && (!input_true->is_dynamic()) && (!input_false->is_dynamic()))
+  {
+    return;
+  }
+
+  auto input_cond_shape = input_cond->getShape();
+  auto input_true_shape = input_true->getShape();
+  auto input_false_shape = input_false->getShape();
+
+  // Select output shpae
+  ir::Shape new_shape =
+    shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Shape &op)
+{
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+  auto input_shape = input->getShape();
+
+  if (!input->is_dynamic())
+    return;
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  ir::Shape output_shape;
+  output_shape.append(input_shape.rank());
+
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Slice &op)
+{
+  const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
+  const auto input = _tensor_registry->getITensor(input_index);
+  const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
+  const auto begins = _tensor_registry->getITensor(begins_index);
+  const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
+  const auto sizes = _tensor_registry->getITensor(sizes_index);
+  auto output_index = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_index);
+
+  if (!(input->is_dynamic() || begins->is_dynamic() || sizes->is_dynamic() || output->is_dynamic()))
+  {
+    return;
+  }
+
+  ir::Shape input_shape = input->getShape();
+  auto begins_buf = reinterpret_cast<const int32_t *>(begins->buffer());
+  auto sizes_buf = reinterpret_cast<const int32_t *>(sizes->buffer());
+
+  ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Softmax &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::INPUT));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+  const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+  const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+  auto output_idx{op.getOutputs().at(0)};
+
+  const auto &input = _tensor_registry->getITensor(input_idx);
+  const auto &block_shape = _tensor_registry->getITensor(block_shape_idx);
+  const auto &padding = _tensor_registry->getITensor(padding_idx);
+  auto output = _tensor_registry->getITensor(output_idx);
+
+  if (!(input->is_dynamic() || block_shape->is_dynamic() || padding->is_dynamic() ||
+        output->is_dynamic()))
+  {
+    return;
+  }
+
+  auto input_shape = input->getShape();
+  auto block_shape_shape = block_shape->getShape();
+  auto padding_shape = padding->getShape();
+
+  auto block_shape_data = reinterpret_cast<int32_t *>(block_shape->buffer());
+  auto padding_data = reinterpret_cast<int32_t *>(padding->buffer());
+
+  ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
+    input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Split &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+
+  // Return if all tensors are not dynamic
+  bool has_dynamic = false;
+  for (const auto &output_idx : op.getOutputs())
+  {
+    auto output = _tensor_registry->getITensor(output_idx);
+    has_dynamic |= output->is_dynamic();
+  }
+  if (!input->is_dynamic() && !has_dynamic)
+  {
+    return;
+  }
+
+  auto input_shape = input->getShape();
+
+  const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
+  const auto &axis = _tensor_registry->getITensor(axis_idx);
+
+  auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
+  const auto num_splits = op.param().num_splits;
+  const auto rank = input_shape.rank();
+  axis_value = axis_value < 0 ? axis_value + rank : axis_value;
+
+  assert(0 <= axis_value && axis_value < rank);
+
+  ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_value, num_splits);
+  for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
+  {
+    auto output_ind = op.getOutputs().at(out_tensor_idx);
+    auto output = _tensor_registry->getITensor(output_ind);
+
+    output->applyShape(new_shape);
+    assert(output->buffer() != nullptr);
+  }
+}
+
+void DynamicShapeInferer::visit(const ir::operation::SquaredDifference &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS),
+                           op.getInputs().at(ir::operation::SquaredDifference::Input::RHS));
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Squeeze &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+
+  if (!input->is_dynamic())
+  {
+    return;
+  }
+
+  auto input_shape = input->getShape();
+
+  // Squeeze output shpae
+  ir::Shape new_shape = shape_inference::inferSqueezeShape(input_shape, op.param());
+
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
+{
+
+  const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+  auto input = _tensor_registry->getITensor(input_index);
+  ir::Shape input_shape = input->getShape();
+
+  const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+  auto starts = _tensor_registry->getITensor(starts_index);
+
+  const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+  auto ends = _tensor_registry->getITensor(ends_index);
+
+  const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+  auto strides = _tensor_registry->getITensor(strides_index);
+
+  if (!(input->is_dynamic() || starts->is_dynamic() || ends->is_dynamic() || strides->is_dynamic()))
+  {
+    return;
+  }
+
+  const auto begin_mask = op.param().begin_mask;
+  const auto end_mask = op.param().end_mask;
+  const auto shrink_axis_mask = op.param().shrink_axis_mask;
+  const auto rank = input_shape.rank();
+
+  auto op_params = shape_inference::buildStridedSliceParams(
+    reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
+    reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask, rank);
+
+  auto output_index = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_index);
+
+  ir::Shape output_shape =
+    onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
+
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Tile &op)
+{
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  auto input_idx = op.getInputs().at(ir::operation::Tile::Input::INPUT);
+  auto input = _tensor_registry->getITensor(input_idx);
+
+  auto multiplier_idx = op.getInputs().at(ir::operation::Tile::Input::MULTIPLES);
+  auto multiplier = _tensor_registry->getITensor(multiplier_idx);
+
+  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  auto input_shape = input->getShape();
+  auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer());
+  assert(multiplier_buffer);
+
+  auto mult_shape = multiplier->getShape();
+  auto output_shape = shape_inference::inferTileShape(
+    input_shape, multiplier_buffer, mult_shape.rank() == 0 ? 1 : mult_shape.dim(0));
+
+  // set output shape and output buffer
+  output->applyShape(output_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
+{
+  // check if output is not dynamic
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  // from op, access the buffer of second input to read new shape
+  auto input_ind = op.getInputs().at(ir::operation::Transpose::Input::INPUT);
+  auto input = _tensor_registry->getITensor(input_ind);
+  auto input_shape = input->getShape();
+
+  /*
+    Here, the state after compilation (static shape inference) could be one of the following:
+
+              input       perms             output     execution-time shape inf required
+              ------------------------------------     --------------------------------
+      case 1) static         const          static       X
+      case 2) static       non-const        dynamic      O
+      case 3) dynamic        const          dynamic      O
+      case 4) dynamic      non-const        dynamic      O
+
+    So, only when both input1 and ouput are static, we can skip dynamic shape inference.
+  */
+  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  auto perm_ind = op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION);
+  auto perm = _tensor_registry->getITensor(perm_ind);
+
+  ir::Shape new_shape;
+  // TODO Change perm->dimension(0) == 0 to perm->num_elements() == 0
+  if (perm->getShape().dim(0) == 0) // This condition means that perm is (n-1...0)
+  {
+    // Call by (n-1...0)
+    new_shape = shape_inference::inferTransposeShape(input_shape, nullptr, 0);
+  }
+  else
+  {
+    // Check rank
+    if (static_cast<size_t>(input->getShape().rank()) != perm->getShape().num_elements())
+    {
+      throw std::runtime_error("DynamicShapeInferer failed, bad rank size: " +
+                               std::to_string(perm->getShape().num_elements()));
+    }
+
+    // set output shape, based on input and params
+    const auto perm_buffer = reinterpret_cast<const int32_t *>(perm->buffer());
+    new_shape =
+      shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->getShape().dim(0));
+  }
+  output->applyShape(new_shape);
+  assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
+{
+  // check if output is not dynamic
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = _tensor_registry->getITensor(input_idx);
+
+  if (!input->is_dynamic())
+    return;
+
+  auto input_shape = input->getShape();
+
+  const auto rank = input_shape.rank();
+  const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+  const auto num = op.param().num;
+
+  assert(0 <= axis && axis < rank);
+
+  ir::Shape new_shape = shape_inference::inferUnpackShape(input_shape, axis, rank);
+
+  for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
+  {
+    auto output_ind = op.getOutputs().at(out_tensor_idx);
+    auto output = _tensor_registry->getITensor(output_ind);
+
+    output->applyShape(new_shape);
+
+    assert(output->buffer() != nullptr);
+  }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc
index 6bf2744a9..4b82655b9 100644
--- a/runtime/onert/core/src/exec/ExecTime.cc
+++ b/runtime/onert/core/src/exec/ExecTime.cc
@@ -14,12 +14,10 @@
  * limitations under the License.
  */
 
-#include "exec/ExecTime.h"
+#include "ExecTime.h"
 
-#include <fstream>
-#include <cassert>
-#include <limits>
 #include <algorithm>
+#include <cassert>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/exec/ExecTime.h b/runtime/onert/core/src/exec/ExecTime.h
index 846d0930b..95f460053 100644
--- a/runtime/onert/core/src/exec/ExecTime.h
+++ b/runtime/onert/core/src/exec/ExecTime.h
@@ -34,7 +34,7 @@ class ExecTime
 {
 public:
   explicit ExecTime(const std::vector<const backend::Backend *> &backends)
-      : _json(backends, _measurements)
+    : _json(backends, _measurements)
   {
   }
 
@@ -94,7 +94,7 @@ public:
   /**
    * @brief Update metrics file with new data.
    */
-  void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); }
+  void storeOperationsExecTime() const { _json.storeOperationsExecTime(); }
   static const int64_t NOT_FOUND = -1;
 
 private:
diff --git a/runtime/onert/core/src/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc
new file mode 100644
index 000000000..939184e4e
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecTime.test.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecTime.h"
+
+#include "backend/IConfig.h"
+#include "backend/Backend.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+namespace
+{
+using namespace onert;
+using namespace exec;
+using namespace backend;
+
+struct MockConfig : public IConfig
+{
+  std::string id() override { return "b1"; }
+  bool initialize() override { return true; };
+  bool supportPermutation() override { return false; }
+  ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override
+  {
+    return ir::Layout::UNKNOWN;
+  }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+};
+
+struct MockBackend : public ::onert::backend::Backend
+{
+  std::shared_ptr<onert::backend::IConfig> config() const override
+  {
+    return std::make_shared<MockConfig>();
+  }
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override
+  {
+    return nullptr;
+  }
+};
+
+TEST(ExecTime, roundtrip_ok)
+{
+  const auto *b = new MockBackend();
+  std::vector<const Backend *> bs = {b};
+  {
+    ExecTime et(bs);
+    et.updateOperationExecTime(b, "op1", true, 100, 100);
+    et.updateOperationExecTime(b, "op1", true, 200, 200);
+    et.updateOperationExecTime(b, "op1", false, 100, 888);
+    et.storeOperationsExecTime();
+  }
+  {
+    ExecTime et(bs);
+    auto time = et.getOperationExecTime(b, "op1", true, 100);
+    ASSERT_EQ(time, 100);
+    // Check interpolation
+    time = et.getOperationExecTime(b, "op1", true, 150);
+    ASSERT_EQ(time, 150);
+    time = et.getOperationExecTime(b, "op1", false, 100);
+    ASSERT_EQ(time, 888);
+    et.storeOperationsExecTime();
+  }
+  // clean up
+  EXPECT_EQ(remove("exec_time.json"), 0);
+}
+
+TEST(ExecTime, structure)
+{
+
+  const auto *b = new MockBackend();
+  std::vector<const Backend *> bs = {b};
+  {
+    ExecTime et(bs);
+    et.updateOperationExecTime(b, "op1", true, 100, 100);
+    et.updateOperationExecTime(b, "op1", true, 200, 200);
+    et.storeOperationsExecTime();
+  }
+  {
+    ExecTime et(bs);
+    auto time = et.getOperationExecTime(b, "op1", true, 100);
+    ASSERT_EQ(time, 100);
+    // Check interpolation
+    time = et.getOperationExecTime(b, "op1", true, 200);
+    ASSERT_EQ(time, 200);
+    et.storeOperationsExecTime();
+  }
+  // clean up
+  EXPECT_EQ(remove("exec_time.json"), 0);
+}
+} // unnamed namespace
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 7feb3ab68..f51bed820 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -16,6 +16,8 @@
 
 #include "exec/Execution.h"
 
+#include "train/TrainableExecutors.h"
+
 #include "util/logging.h"
 
 namespace onert
@@ -23,33 +25,30 @@ namespace onert
 namespace exec
 {
 
-Execution::Execution(const std::shared_ptr<ExecutorMap> &executors) : _executors{executors}
+Execution::Execution(const std::shared_ptr<IExecutors> &executors) : _executors{executors}
 {
   assert(executors != nullptr);
-  assert(executors->at(ir::SubgraphIndex{0}) != nullptr);
-  const auto &primary_subg = primary_subgraph();
-  _io_desc.inputs.resize(primary_subg.getInputs().size());
-  _io_desc.outputs.resize(primary_subg.getOutputs().size());
+  assert(executors->entryExecutor() != nullptr);
+  _io_desc.inputs.resize(_executors->inputSize());
+  _io_desc.outputs.resize(_executors->outputSize());
 }
 
 void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_shape)
 {
-  // This should be called BEFORE setInput.
-  if (_io_desc.inputs.at(index.value()) != 0)
-    throw std::runtime_error("Error in calling order");
-
   // This will be used later to set input tensor dynamic
   // Note that 'compiled' model will not be updated with new_shape
   // but new_shape will change model input shape while 'running' the model
   _io_desc.dynamic_input_shapes[index] = new_shape;
+
+  VERBOSE(Execution) << "Model input shape will be changed at the start of execute()"
+                     << "(index: " << index << ")" << std::endl;
 }
 
 // TODO Remove default parameter
 void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length,
                          ir::Layout layout)
 {
-  const auto input_index = primary_subgraph().getInputs().at(index);
-  const auto info = primary_subgraph().operands().at(input_index).info();
+  const auto &info = _executors->inputInfo(index);
 
   // TODO handle when (!buffer && length != 0) : setting the input as an optional tensor
 
@@ -58,10 +57,10 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
   // note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
   {
     auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
-    auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
-                             ? input_shape_sig->second.num_elements() *
-                                   onert::ir::sizeOfDataType(info.typeInfo().type())
-                             : info.total_size();
+    auto size_required =
+      (input_shape_sig != _io_desc.dynamic_input_shapes.end())
+        ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type())
+        : info.total_size();
 
     if (length < size_required)
     {
@@ -89,8 +88,7 @@ void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, con
 // TODO Remove default parameter
 void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout)
 {
-  const auto output_index = primary_subgraph().getOutputs().at(index);
-  const auto info = primary_subgraph().operands().at(output_index).info();
+  const auto &info = _executors->outputInfo(index);
 
   if (length < info.total_size())
   {
@@ -104,7 +102,7 @@ void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length,
 void Execution::setOutput(const ir::IOIndex &index, const ir::TypeInfo &type,
                           const ir::Shape &shape, void *buffer, size_t length, ir::Layout layout)
 {
-  auto info = ir::OperandInfo::createStaticInfo(shape, type);
+  const auto &info = ir::OperandInfo::createStaticInfo(shape, type);
 
   if (length < info.total_size())
   {
@@ -118,21 +116,21 @@ void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout)
 {
   const auto &input_desc = _io_desc.inputs.at(index.value());
   _io_desc.inputs.at(index.value()) =
-      std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
+    std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
 }
 
 void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout)
 {
   const auto &output_desc = _io_desc.outputs.at(index.value());
-  _io_desc.outputs.at(index.value()) = std::make_unique<OutputDesc>(
-      output_desc->info, output_desc->buffer, output_desc->size, layout);
+  _io_desc.outputs.at(index.value()) =
+    std::make_unique<OutputDesc>(output_desc->info, output_desc->buffer, output_desc->size, layout);
 }
 
 void Execution::execute()
 {
   VERBOSE(Execution) << "Start execution" << std::endl;
 
-  primary_executor()->execute(_io_desc);
+  _executors->execute(_io_desc);
   finished = true;
 
   VERBOSE(Execution) << "Execution finished" << std::endl;
@@ -155,13 +153,41 @@ void Execution::waitFinish()
 
 bool Execution::isFinished(void) const { return finished; }
 
+#ifdef ONERT_TRAIN
+void Execution::train(uint32_t training_step)
+{
+  auto execs = dynamic_cast<exec::train::TrainableExecutors *>(_executors.get());
+  if (!execs)
+  {
+    throw std::runtime_error{"Supported only TrainableExecutors"};
+  }
+
+  VERBOSE(Execution) << "Start training" << std::endl;
+
+  execs->train(_io_desc, training_step);
+  finished = true;
+
+  VERBOSE(Execution) << "training finished" << std::endl;
+}
+
+float Execution::getLoss(const ir::IOIndex &ind)
+{
+  auto execs = dynamic_cast<exec::train::TrainableExecutors *>(_executors.get());
+  if (!execs)
+  {
+    throw std::runtime_error{"Supported only TrainableExecutors"};
+  }
+
+  return execs->getLoss(ind);
+}
+#endif // ONERT_TRAIN
+
 ir::Shape Execution::getInputShape(ir::IOIndex ind) const
 {
   auto itr = _io_desc.dynamic_input_shapes.find(ind);
   if (itr == _io_desc.dynamic_input_shapes.end())
   {
-    auto operand_idx = primary_subgraph().getInputs().at(ind.value());
-    return primary_subgraph().operands().at(operand_idx).shape();
+    return _executors->inputInfo(ind).shape();
   }
   else
   {
@@ -169,15 +195,32 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const
   }
 }
 
+// NNAPI return fail if ANeuralNetworksExecution_getOutputOperandRank or
+// ANeuralNetworksExecution_getOutputOperandDimensions is called before execution.
+// On the other hand, NNFW API return static shape inference result if nnfw_output_tensorinfo is
+// called before execution.
+// To handle both case, this method retun static shape inference result and fail will be handled on
+// NNAPI frontend.
 ir::Shape Execution::getOutputShape(ir::IOIndex ind) const
 {
   if (!isFinished())
-    throw std::runtime_error("Cannot get output shape before execution is finished");
+    return _executors->outputInfo(ind).shape();
 
   const auto &output_desc = _io_desc.outputs.at(ind.value());
 
   return output_desc->info.shape();
 }
 
+size_t Execution::getInputTotalSize(ir::IOIndex ind) const
+{
+  // TODO Support dynamic shape
+  return _executors->inputInfo(ind).total_size();
+}
+
+size_t Execution::getOutputTotalSize(ir::IOIndex ind) const
+{
+  return _executors->outputInfo(ind).total_size();
+}
+
 } // namespace exec
 } // namespace onert
diff --git a/runtime/onert/core/src/exec/Execution.test.cc b/runtime/onert/core/src/exec/Execution.test.cc
new file mode 100644
index 000000000..fefe8a332
--- /dev/null
+++ b/runtime/onert/core/src/exec/Execution.test.cc
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Execution.h"
+
+#include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+#include "util/TracingCtx.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+
+namespace
+{
+
+using namespace onert::ir;
+
+class CompiledMockUpModel
+{
+public:
+  CompiledMockUpModel()
+  {
+    // Model: two elementwise add operation
+    // model input: lhs, rhs1
+    // model output: second add result (result2)
+    // constant: rhs2
+    // result1 <= (lhs + rhs)
+    // result2 <= (result1 + rhs2)
+    // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
+    // activation: none (constant)
+    graph = std::make_shared<Graph>();
+    // 1st add operands (result1 <= lhs + rhs1)
+    Shape shape{1, 2, 2, 1};
+    TypeInfo type{DataType::FLOAT32};
+    static float rhs2_data[4] = {3, 1, -1, 5};
+    auto operand_lhs = graph->addOperand(shape, type);
+    auto operand_rhs1 = graph->addOperand(shape, type);
+    auto operand_result1 = graph->addOperand(shape, type);
+    auto operand_rhs2 = graph->addOperand(shape, type);
+    auto operand_result2 = graph->addOperand(shape, type);
+    graph->operands()
+      .at(operand_rhs2)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+    // 2nd add operations (result2 <= result1 + rhs2)
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param1.activation = Activation::NONE;
+    auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
+    auto output_set1 = OperandIndexSequence{operand_result1};
+    graph->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param2.activation = Activation::NONE;
+    auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
+    auto output_set2 = OperandIndexSequence{operand_result2};
+    graph->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+    // Identify model inputs and outputs
+    graph->addInput(operand_lhs);
+    graph->addInput(operand_rhs1);
+    graph->addOutput(operand_result2);
+    graph->verify();
+
+    // Compile
+    auto model = std::make_shared<onert::ir::Model>();
+    model->push(onert::ir::SubgraphIndex{0}, graph);
+    coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+    onert::compiler::Compiler compiler{model, *coptions};
+    artifact = compiler.compile();
+  }
+
+public:
+  std::shared_ptr<Graph> graph;
+  std::unique_ptr<onert::compiler::CompilerOptions> coptions;
+  std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+};
+
+class CompiledMockUpMultiModel
+{
+public:
+  CompiledMockUpMultiModel()
+  {
+    // Model0: a float elementwise add operation
+    // Model0 input: lhs0, rhs0
+    // Model0 output: add result (result0)
+
+    // Model1: a qasymm8 elementwise add operation
+    // Model1 input: result0, rhs1
+    // Model1 output: add result (result1)
+
+    // Model2: a float elementwise add operation
+    // Model2 input: result0, result1
+    // Model2 output: add result (result2)
+
+    // constant: rhs2
+    // result0 <= (lhs0 + rhs0)
+    // result1 <= (result0 + rhs1)
+    // result2 <= (result0 + result1)
+    // lhs0, rhs0, rh1, result0, result1, result2 shape: {1, 2, 2, 1}
+    // activation: none (constant)
+
+    // Update edge information
+    edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{0});
+    edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{1});
+    edges.pkg_outputs.emplace_back(ModelIndex{2}, SubgraphIndex{0}, IOIndex{0});
+    // From
+    const auto result0 = IODesc{ModelIndex{0}, SubgraphIndex{0}, IOIndex{0}};
+    const auto result1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+    // To
+    const auto lhs1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+    const auto lhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{0}};
+    const auto rhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{1}};
+    edges.edges.insert({result0, lhs1});
+    edges.edges.insert({result0, lhs2});
+    edges.edges.insert({result1, rhs2});
+
+    for (size_t i = 0; i < 3; ++i)
+    {
+      graphs.emplace_back(std::make_shared<Graph>());
+    }
+    Shape shape{1, 2, 2, 1};
+
+    // Model0's add operands (result1 <= lhs0 + rhs0)
+    DataType types[3] = {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::FLOAT32};
+    auto operand_lhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+    auto operand_rhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+    auto operand_result0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+
+    // Model0's add operation
+    operation::BinaryArithmetic::Param param0;
+    param0.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param0.activation = Activation::NONE;
+    auto input_set0 = OperandIndexSequence{operand_lhs0, operand_rhs0};
+    auto output_set0 = OperandIndexSequence{operand_result0};
+    graphs[0]->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set0, output_set0, param0));
+
+    // Model0's inputs/outputs
+    graphs[0]->addInput(operand_lhs0);
+    graphs[0]->addInput(operand_rhs0);
+    graphs[0]->addOutput(operand_result0);
+    graphs[0]->verify();
+
+    // Model1's add operands (result2 <= Model0 result + rhs1)
+    // static float rhs1_data[4] = {3, 1, -1, 5};
+    static uint8_t rhs1_data[4] = {131, 129, 127, 133};
+    const float scale = 1;
+    const int32_t zero_point = 128;
+    auto operand_lhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+    auto operand_rhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+    auto operand_result1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+    graphs[1]
+      ->operands()
+      .at(operand_rhs1)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs1_data), 4));
+
+    // Model1's add operation
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param1.activation = Activation::NONE;
+    auto input_set1 = OperandIndexSequence{operand_lhs1, operand_rhs1};
+    auto output_set1 = OperandIndexSequence{operand_result1};
+    graphs[1]->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+
+    // Model1's inputs/outputs
+    graphs[1]->addInput(operand_lhs1);
+    graphs[1]->addOutput(operand_result1);
+    graphs[1]->verify();
+
+    // Model2's additional operands (result3 <= Model0 result + Model1 result)
+    auto operand_lhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+    auto operand_rhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+    auto operand_result2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+
+    // Model2's add operation
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param2.activation = Activation::NONE;
+    auto input_set2 = OperandIndexSequence{operand_lhs2, operand_rhs2};
+    auto output_set2 = OperandIndexSequence{operand_result2};
+    graphs[2]->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+
+    // Model1's inputs/outputs
+    graphs[2]->addInput(operand_lhs2);
+    graphs[2]->addInput(operand_rhs2);
+    graphs[2]->addOutput(operand_result2);
+    graphs[2]->verify();
+
+    // Compile
+    compile();
+  }
+
+public:
+  void compile()
+  {
+    auto nnpkg = std::make_shared<onert::ir::NNPkg>();
+    coptions.clear();
+    for (uint16_t i = 0; i < graphs.size(); ++i)
+    {
+      coptions.emplace_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+
+      auto model = std::make_shared<onert::ir::Model>();
+      model->push(SubgraphIndex{0}, graphs[i]);
+
+      nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+    }
+    for (const auto &pkg_input : edges.pkg_inputs)
+    {
+      nnpkg->addInput(pkg_input);
+    }
+    for (const auto &pkg_output : edges.pkg_outputs)
+    {
+      nnpkg->addOutput(pkg_output);
+    }
+    for (const auto &edge : edges.edges)
+    {
+      nnpkg->addEdge(edge.from, edge.to);
+    }
+    auto compiler = onert::compiler::CompilerFactory::get().create(nnpkg, coptions);
+    nnpkg.reset();
+    artifact = compiler->compile();
+  }
+
+public:
+  std::vector<std::shared_ptr<Graph>> graphs;
+  std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> coptions;
+  std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+  ModelEdges edges;
+};
+
+TEST(ExecInstance, simple)
+{
+  auto mockup = CompiledMockUpModel();
+  auto graph = mockup.graph;
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float input1_buffer[4] = {1, 0, -1, -2};
+  const float input2_buffer[4] = {1, -3, 2, -4};
+  float output_buffer[4] = {};
+  const float output_expected[4] = {5, -2, 0, -1};
+
+  onert::exec::Execution execution{executors};
+
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+TEST(ExecInstance, twoCompile)
+{
+  auto mockup = CompiledMockUpModel();
+  auto graph = mockup.graph;
+  auto executors1 = mockup.artifact->_executors;
+  onert::exec::Execution execution1{executors1};
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+  // Make new executor: compile again
+  auto model = std::make_shared<onert::ir::Model>();
+  model->push(onert::ir::SubgraphIndex{0}, graph);
+  auto coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+  onert::compiler::Compiler compiler{model, *coptions};
+  std::shared_ptr<onert::compiler::CompilerArtifact> artifact = compiler.compile();
+  onert::exec::Execution execution2{artifact->_executors};
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+  const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+  execution1.execute();
+  execution2.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, twoExecution)
+{
+  auto mockup = CompiledMockUpModel();
+  auto executors = mockup.artifact->_executors;
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output1 = IOIndex{0};
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {5, -2, 0, -1};
+  const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+  onert::exec::Execution execution1{executors};
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+
+  // Make new execution
+  onert::exec::Execution execution2{executors};
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+  execution1.execute();
+  execution2.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+class Inference
+{
+public:
+  Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
+            std::shared_ptr<onert::exec::IExecutors> &executors)
+    : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
+  {
+    // DO NOTHING
+  }
+
+  void inference(void)
+  {
+    auto input1 = IOIndex{0};
+    auto input2 = IOIndex{1};
+    auto output1 = IOIndex{0};
+
+    onert::exec::Execution execution{_executors};
+    execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
+    execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
+    execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
+
+    execution.execute();
+  }
+
+private:
+  const float (&_input1)[4];
+  const float (&_input2)[4];
+  float (&_output)[4];
+  std::shared_ptr<onert::exec::IExecutors> &_executors;
+};
+
+// Support multi-thread execution
+TEST(ExecInstance, twoThreads)
+{
+  auto mockup = CompiledMockUpModel();
+  auto executors = mockup.artifact->_executors;
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+  Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors};
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+  const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+  Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors};
+
+  std::thread t1{&Inference::inference, &execution1};
+  std::thread t2{&Inference::inference, &execution2};
+
+  t1.join();
+  t2.join();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+// Support asynchronous execution
+TEST(ExecInstance, async)
+{
+  auto mockup = CompiledMockUpModel();
+  auto graph = mockup.graph;
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float input1_buffer[4] = {1, 0, -1, -2};
+  const float input2_buffer[4] = {1, -3, 2, -4};
+  float output_buffer[4] = {};
+  const float output_expected[4] = {5, -2, 0, -1};
+
+  onert::exec::Execution execution{executors};
+
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.startExecute();
+  execution.waitFinish();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+TEST(ExecInstance, multi_model_simple)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float input1_buffer[4] = {1, 0, -1, -2};
+  const float input2_buffer[4] = {1, -3, 2, -4};
+  float output_buffer[4] = {};
+  const float output_expected[4] = {7, -5, 1, -7};
+
+  onert::exec::Execution execution{executors};
+
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+TEST(ExecInstance, multi_model_twoCompile)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors1 = mockup.artifact->_executors;
+  onert::exec::Execution execution1{executors1};
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {7, -5, 1, -7};
+
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+  // Make new executor: compile again
+  mockup.compile();
+  onert::exec::Execution execution2{mockup.artifact->_executors};
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+  const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+  execution1.execute();
+  execution2.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, multi_model_twoExecution)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors = mockup.artifact->_executors;
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output1 = IOIndex{0};
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {7, -5, 1, -7};
+  const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+  onert::exec::Execution execution1{executors};
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+
+  // Make new execution
+  onert::exec::Execution execution2{executors};
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+  execution1.execute();
+  execution1.execute();
+  execution2.execute();
+  execution2.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+// Multi-model is not thread-safe yet
+
+// Support asynchronous execution
+TEST(ExecInstance, multi_model_async)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float input1_buffer[4] = {1, 0, -1, -2};
+  const float input2_buffer[4] = {1, -3, 2, -4};
+  float output_buffer[4] = {};
+  const float output_expected[4] = {7, -5, 1, -7};
+
+  onert::exec::Execution execution{executors};
+
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.startExecute();
+  execution.waitFinish();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+TEST(ExecInstance, multi_model_dequant_input_quant_output)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const uint8_t input1_buffer[4] = {138, 128, 118, 108}; // {1, 0, -1, -2}
+  const uint8_t input2_buffer[4] = {138, 98, 148, 88};   // {1, -3, 2, -4}
+  uint8_t output_buffer[4] = {};
+  const uint8_t output_expected[4] = {198, 78, 138, 58}; // {7, -5, 1, -7}
+  float scale = 0.1;
+  int32_t zero_point = 128;
+
+  onert::exec::Execution execution{executors};
+
+  onert::ir::TypeInfo type_info{onert::ir::DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+  execution.setInput(input1, type_info, execution.getInputShape(input1),
+                     reinterpret_cast<const void *>(input1_buffer), 4, onert::ir::Layout::NHWC);
+  execution.setInput(input2, type_info, execution.getInputShape(input2),
+                     reinterpret_cast<const void *>(input2_buffer), 4, onert::ir::Layout::NHWC);
+  execution.setOutput(output, type_info, execution.getOutputShape(output),
+                      reinterpret_cast<void *>(output_buffer), 4, onert::ir::Layout::NHWC);
+  execution.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+// TODO Add an unittest multi_model_quant_input_dequant_output
+
+} // namespace
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc
index ddb1fb6a0..66610f0e0 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservee.cc
@@ -26,37 +26,37 @@ void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
   _observers.emplace_back(std::move(observer));
 }
 
-void ExecutionObservee::notifyModelBegin(IExecutor *executor)
+void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
 {
-  for (auto &o : _observers)
+  for (auto &&o : _observers)
   {
-    o->handleBegin(executor);
+    o->handleSubgraphBegin(ind);
   }
 }
 
-void ExecutionObservee::notifyModelEnd(IExecutor *executor)
+void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
 {
-  for (auto &o : _observers)
+  for (auto &&o : _observers)
   {
-    o->handleEnd(executor);
+    o->handleSubgraphEnd(ind);
   }
 }
 
-void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
-                                       const backend::Backend *backend)
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind,
+                                       ir::OperationIndex op_ind, const backend::Backend *backend)
 {
-  for (auto &o : _observers)
+  for (auto &&o : _observers)
   {
-    o->handleBegin(executor, op_seq, backend);
+    o->handleJobBegin(executor, subg_ind, op_ind, backend);
   }
 }
 
-void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
-                                     const backend::Backend *backend)
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind,
+                                     ir::OperationIndex op_ind, const backend::Backend *backend)
 {
-  for (auto &o : _observers)
+  for (auto &&o : _observers)
   {
-    o->handleEnd(executor, op_seq, backend);
+    o->handleJobEnd(executor, subg_ind, op_ind, backend);
   }
 }
 
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h
index 49d409a3a..3ee1754c9 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.h
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -17,9 +17,11 @@
 #ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__
 #define __ONERT_EXEC_EXECUTION_OBSERVEE_H__
 
-#include <list>
+#include "ExecutionObservers.h"
+
+#include "ir/Index.h"
 
-#include "exec/ExecutionObservers.h"
+#include <list>
 
 namespace onert
 {
@@ -39,11 +41,11 @@ public:
    * @param observer Observer to be added
    */
   void add(std::unique_ptr<IExecutionObserver> observer);
-  void notifyModelBegin(IExecutor *executor);
-  void notifyModelEnd(IExecutor *executor);
-  void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+  void notifySubgraphBegin(ir::SubgraphIndex ind);
+  void notifySubgraphEnd(ir::SubgraphIndex ind);
+  void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind,
                       const backend::Backend *backend);
-  void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+  void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind,
                     const backend::Backend *backend);
 
 private:
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
index 060f874de..5245518a0 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -14,14 +14,58 @@
  * limitations under the License.
  */
 
-#include "exec/ExecutionObservers.h"
+#include "ExecutionObservers.h"
 
-#include <string>
+#include "../util/EventWriter.h"
 
 #include "util/logging.h"
-#include "exec/IExecutor.h"
-#include "misc/polymorphic_downcast.h"
-#include "ir/OpSequence.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <string>
+#include <sstream>
+
+namespace
+{
+
+void setUserData(const onert::ir::Graph &g, const onert::ir::IOperation *op,
+                 decltype(EventCollector::Event::userData) &data)
+{
+  // From a tensor of shape [a, b, c], this will return a string "shape(a b c)".
+  // String like "[1, 2, 3]" looks better but this will be considered as a list in Json
+  // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult
+  auto build_shape_str = [&](onert::ir::OperandIndex operand_idx) {
+    std::string shape_str;
+    auto &shape = g.operands().at(operand_idx).info().shape();
+    for (int i = 0; i < shape.rank(); i++)
+    {
+      if (i == 0)
+        shape_str = "shape(" + std::to_string(shape.dim(i));
+      else
+        shape_str += " " + std::to_string(shape.dim(i));
+    }
+    shape_str += ")";
+
+    return shape_str;
+  };
+
+  auto &inputs = op->getInputs();
+  auto size = inputs.size();
+  for (size_t i = 0; i < size; i++)
+  {
+    auto operand_idx = inputs.at(i);
+    if (operand_idx.undefined())
+      continue;
+
+    std::string key("input_shape_" + std::to_string(i));
+    std::string value = build_shape_str(operand_idx);
+    data.emplace_back(std::make_pair(key, value));
+  }
+
+  // add other userData as needed
+}
+
+} // namespace
 
 namespace onert
 {
@@ -29,8 +73,8 @@ namespace onert
 namespace exec
 {
 
-void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *,
-                                  const onert::backend::Backend *backend)
+void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex,
+                                     ir::OperationIndex, const onert::backend::Backend *backend)
 {
   _timer = backend->config()->timer();
   if (_timer == nullptr)
@@ -38,14 +82,14 @@ void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence
   _timer->handleBegin();
 }
 
-void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
-                                const backend::Backend *backend)
+void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex,
+                                   const ir::OperationIndex op_ind, const backend::Backend *backend)
 {
   _timer->handleEnd();
   const auto timer_res = _timer->getTime();
 
-  // NOTE This assumes there is just one operation in a op_seq
-  const auto &node = _graph.operations().at(op_seq->operations().at(0));
+  // NOTE This assumes there is just one operation in a op
+  const auto &node = _graph.operations().at(op_ind);
   auto node_name = node.name();
   VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl;
 
@@ -54,7 +98,7 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
                       ir::DataType::QUANT_UINT8_ASYMM;
 
   uint32_t size = 0;
-  for (const auto &ind : node.getInputs() + node.getOutputs())
+  for (const auto &ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED)
   {
     size += exec->graph().operands().at(ind).info().total_size();
   }
@@ -69,64 +113,59 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
   }
 };
 
-ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph)
-    : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder}, _graph{graph}
+TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph,
+                                 const util::TracingCtx *tracing_ctx)
+  : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
+    _tracing_ctx{tracing_ctx}
 {
+  _event_writer = EventWriter::get(filepath);
+  _event_writer->startToUse();
 }
 
-ChromeTracingObserver::~ChromeTracingObserver()
+TracingObserver::~TracingObserver()
 {
   try
   {
-    _recorder.writeToFile(_ofs);
+    _event_writer->readyToFlush(std::move(_recorder));
   }
   catch (const std::exception &e)
   {
-    std::cerr << "E: Fail to record event in ChromeTracingObserver: " << e.what() << std::endl;
+    std::cerr << "E: Fail to record event in TracingObserver: " << e.what() << std::endl;
   }
 }
 
-void ChromeTracingObserver::handleBegin(IExecutor *)
+void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind)
 {
-  _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
+  _collector.onEvent(
+    EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::BEGIN, subg_ind.value()});
 }
 
-void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq,
-                                        const backend::Backend *backend)
+void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind,
+                                     ir::OperationIndex op_ind, const backend::Backend *backend)
 {
   std::string backend_id = backend->config()->id();
-  _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
-                                           opSequenceTag(op_seq, _graph.operations())});
+  const auto &op = _graph.operations().at(op_ind);
+  auto ev = EventCollector::OpSeqEvent{_tracing_ctx,     EventCollector::Edge::BEGIN,
+                                       subg_ind.value(), backend_id,
+                                       op_ind.value(),   op.name()};
+  // add shape of inputs
+  setUserData(_graph, &op, ev.userData);
+  _collector.onEvent(ev);
 }
 
-void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq,
-                                      const backend::Backend *backend)
+void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind,
+                                   ir::OperationIndex op_ind, const backend::Backend *backend)
 {
   std::string backend_id = backend->config()->id();
-  _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id,
-                                           opSequenceTag(op_seq, _graph.operations())});
+  _collector.onEvent(EventCollector::OpSeqEvent{_tracing_ctx, EventCollector::Edge::END,
+                                                subg_ind.value(), backend_id, op_ind.value(),
+                                                _graph.operations().at(op_ind).name()});
 }
 
-void ChromeTracingObserver::handleEnd(IExecutor *)
+void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind)
 {
-  _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
-}
-
-std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
-                                                 const ir::Operations &operations)
-{
-  if (op_seq->size() == 0)
-    return "Empty OpSequence";
-
-  const auto &first_op_idx = op_seq->operations().at(0);
-  const auto &first_op_node = operations.at(first_op_idx);
-  std::string tag = "$" + std::to_string(first_op_idx.value());
-  tag += " " + first_op_node.name();
-  if (op_seq->size() > 1)
-  {
-    tag += " (+" + std::to_string(op_seq->size() - 1) + ")";
-  }
-  return tag;
+  _collector.onEvent(
+    EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::END, subg_ind.value()});
 }
 
 } // namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index ac0076ed2..7e93ecf7c 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -17,13 +17,16 @@
 #ifndef __ONERT_EXEC_OBSREVERS_H__
 #define __ONERT_EXEC_OBSREVERS_H__
 
-#include "exec/IFunction.h"
-#include "ir/OpSequence.h"
 #include "ExecTime.h"
-#include "util/ITimer.h"
+#include "../util/EventCollector.h"
+#include "../util/EventRecorder.h"
+#include "../util/EventWriter.h"
+
 #include "exec/IExecutor.h"
-#include "util/EventCollector.h"
-#include "util/EventRecorder.h"
+#include "ir/Index.h"
+#include "ir/IOperation.h"
+#include "util/ITimer.h"
+#include "util/TracingCtx.h"
 
 namespace onert
 {
@@ -33,13 +36,15 @@ class IExecutionObserver
 {
 public:
   /// @brief Invoked just before model (not individual operation) execution begins
-  virtual void handleBegin(IExecutor *) { return; }
+  virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; }
 
-  virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
-  virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
+  virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+                              const backend::Backend *) = 0;
+  virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+                            const backend::Backend *) = 0;
 
   /// @brief Invoked just after model (not individual operation) execution ends
-  virtual void handleEnd(IExecutor *) { return; }
+  virtual void handleSubgraphEnd(ir::SubgraphIndex) { return; }
 
   virtual ~IExecutionObserver() = default;
 };
@@ -48,13 +53,15 @@ class ProfileObserver : public IExecutionObserver
 {
 public:
   explicit ProfileObserver(std::shared_ptr<ExecTime> et, const ir::Graph &graph)
-      : _et(std::move(et)), _graph(graph)
+    : _et(std::move(et)), _graph(graph)
   {
   }
-  void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
-  void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+  void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+                      const backend::Backend *) override;
+  void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+                    const backend::Backend *) override;
 
-  void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); }
+  void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); }
 
 private:
   std::unique_ptr<util::ITimer> _timer;
@@ -62,24 +69,25 @@ private:
   const ir::Graph &_graph;
 };
 
-class ChromeTracingObserver : public IExecutionObserver
+class TracingObserver : public IExecutionObserver
 {
 public:
-  ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph);
-  ~ChromeTracingObserver();
-  void handleBegin(IExecutor *) override;
-  void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
-  void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
-  void handleEnd(IExecutor *) override;
-
-private:
-  static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations);
+  TracingObserver(const std::string &filepath, const ir::Graph &graph,
+                  const util::TracingCtx *tracing_ctx);
+  ~TracingObserver();
+  void handleSubgraphBegin(ir::SubgraphIndex) override;
+  void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+                      const backend::Backend *) override;
+  void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+                    const backend::Backend *) override;
+  void handleSubgraphEnd(ir::SubgraphIndex) override;
 
 private:
-  std::ofstream _ofs;
-  EventRecorder _recorder;
+  std::unique_ptr<EventRecorder> _recorder;
   EventCollector _collector;
   const ir::Graph &_graph;
+  EventWriter *_event_writer;
+  const util::TracingCtx *_tracing_ctx;
 };
 
 } // namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index f835a9675..0bc088b02 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -16,10 +16,9 @@
 
 #include "ExecutorBase.h"
 
-#include "backend/ITensor.h"
-#include "backend/controlflow/UserTensor.h"
-#include "backend/cpu_common/Tensor.h"
-#include "util/logging.h"
+#include "ShapeConverter.h"
+
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
@@ -27,68 +26,29 @@ namespace exec
 {
 
 ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
-                           const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-                           const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
+                           backend::BackendContexts &&backend_contexts,
                            const compiler::TensorRegistries &tensor_regs,
-                           backend::TensorManagerSet &&tensor_mgrs)
-    : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
-      _input_tensors{input_tensors}, _output_tensors{output_tensors},
-      _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
+                           const util::TracingCtx *tracing_ctx)
+  : _lowered_graph{std::move(lowered_graph)},
+    _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(),
+    _tracing_ctx(tracing_ctx)
 {
-  // TODO Fix the way of knowing whether it is primary or not
-  bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
-  if (!primary_executor)
-  {
-    auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
-      std::vector<std::shared_ptr<backend::ITensor>> list;
-      for (auto ind : ind_seq)
-      {
-        std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
-        assert(tensor != nullptr);
-        DynAllocInfo dyn_alloc_info{ind};
-        _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
-        list.push_back(tensor);
-      }
-      return list;
-    };
-    auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
-      std::vector<std::shared_ptr<backend::ITensor>> list;
-      for (auto ind : ind_seq)
-      {
-        std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
-        assert(tensor != nullptr);
-        DynAllocInfo dyn_alloc_info{ind};
-        _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
-        list.push_back(tensor);
-      }
-      return list;
-    };
-    _input_tensors = build_input_tensor_list(_graph.getInputs());
-    _output_tensors = build_output_tensor_list(_graph.getOutputs());
-  }
-  else
-  {
-    assert(input_tensors.size() == _graph.getInputs().size());
-    assert(output_tensors.size() == _graph.getOutputs().size());
-    for (uint32_t i = 0; i < input_tensors.size(); i++)
+  auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
+    assert(tensors.empty());
+    for (auto &&ind : ind_seq)
     {
-      auto tensor = input_tensors[i];
-      auto ind = _graph.getInputs().at(i);
-      DynAllocInfo dyn_alloc_info{ind};
-      _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
+      backend::ITensor *tensor = tensor_regs.getITensor(ind);
+      assert(tensor != nullptr);
+      auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor);
+      tensors.push_back(io_tensor);
     }
-    for (uint32_t i = 0; i < output_tensors.size(); i++)
-    {
-      auto tensor = output_tensors[i];
-      auto ind = _graph.getOutputs().at(i);
-      DynAllocInfo dyn_alloc_info{ind};
-      _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
-    }
-  }
+  };
+  build_tensor_list(_graph.getInputs(), _input_tensors);
+  build_tensor_list(_graph.getOutputs(), _output_tensors);
 }
 
-void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
-                           const std::shared_ptr<IPermuteFunction> &pre_fn)
+void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs,
+                           const std::vector<backend::IPortableTensor *> &outputs)
 {
   // For thread-safe, use mutex
   // TODO: if all used backends on this executor are thread-safe,
@@ -96,41 +56,43 @@ void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>>
   // Deadlock occurs when an Executor is called recursively.
   std::lock_guard<std::mutex> lock(_mutex);
 
-  assert(src_tensors.size() == _graph.getInputs().size());
-  assert(src_tensors.size() == _input_tensors.size());
-  for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
+  assert(inputs.size() == _graph.getInputs().size());
+  assert(inputs.size() == _input_tensors.size());
+  for (uint32_t n = 0; n < inputs.size(); ++n)
   {
-    // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
-    // This code find the info to allocate dynamic tensor, and allocate memory based on the source
-    // tensor's shape set by caller.
-    const auto src_tensor = src_tensors[n];
+    const auto input = inputs[n];
+    assert(input->buffer() != nullptr);
     auto input_tensor = _input_tensors[n];
-    // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
-    if (src_tensor != nullptr && input_tensor != nullptr)
+    assert(input_tensor != nullptr);
+    if (input != nullptr)
     {
-      auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[n]);
-      const auto orig_input_shape = input_tensor->getShape();
-      const auto changed_input_shape =
-          convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
+      const auto &orig_input_shape = input_tensor->orig_info().shape();
+      const auto &changed_input_shape =
+        convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
+      if (input_tensor->get_info().shape() != changed_input_shape)
+      {
+        // TODO Fix this workaround that is introduced since cpu based kernels directly use `_info`
+        // rather than interface methods to avoid virtual function calls.
+        input_tensor->setShapeOfIPortableTensor(changed_input_shape);
+      }
       if (orig_input_shape != changed_input_shape)
       {
-        if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
-        {
-          // The input_tensor is a dynamic tensor of backend that doesn't support dynamic tensor
-          throw std::runtime_error("Unknown dim is found at execution time for a backend that "
-                                   "does not support dynamic tensor");
-        }
-        else
-        {
-          input_tensor->set_dynamic();
-        }
+        input_tensor->set_dynamic();
       }
     }
+    input_tensor->setTensor(input);
   }
 
-  // TODO Move calling permute_fn.run() into executeImpl()
-  assert(pre_fn);
-  pre_fn->run();
+  assert(outputs.size() == _graph.getOutputs().size());
+  assert(outputs.size() == _output_tensors.size());
+  for (uint32_t n = 0; n < outputs.size(); ++n)
+  {
+    const auto output = outputs[n];
+    // assert(dst_tensor->buffer() != nullptr);
+    auto output_tensor = _output_tensors[n];
+    assert(output_tensor != nullptr);
+    output_tensor->setTensor(output);
+  }
 
   executeImpl();
 }
@@ -146,32 +108,50 @@ void ExecutorBase::execute(const IODescription &desc)
   assert(_input_tensors.size() == desc.inputs.size());
   for (uint32_t i = 0; i < _input_tensors.size(); ++i)
   {
-    // TODO Remove dynamic_cast
-    auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
-    assert(tensor);
+    auto tensor = _input_tensors[i];
+
+    // TODO Check if (desc.inputs[i] == nullptr)
+    // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
+    tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
+                          desc.inputs[i]->size);
+
     auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
     if (input_shape != desc.dynamic_input_shapes.end())
     {
       tensor->set_dynamic();
       tensor->setShape(input_shape->second);
+      /*
+       * Changes tensor shape and allocate memory since its shape was changed
+       * perhaps by nnfw_set_input_tensorinfo()
+       *
+       * Cases are:
+       * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+       *                                                 (a)          (b)
+       *
+       * at (a), operand is static, tensor is static - memory dealloc is not needed
+       *   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
+       * at (b), operand is static, tensor is dynamic - memory dealloc is needed
+       *
+       * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+       *                                                  (a)          (b)
+       *
+       * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
+       *                                       since it has not been allocated yet
+       * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
+       */
+      tensor->applyShape(input_shape->second);
     }
-    // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
-    tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
-                      desc.inputs[i]->size);
-
-    handleDynamicInputTensor(ir::IOIndex{i}, desc);
   }
 
   assert(_output_tensors.size() == desc.outputs.size());
   for (uint32_t i = 0; i < _output_tensors.size(); ++i)
   {
-    // TODO Remove dynamic_cast
-    auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_output_tensors[i]);
-    assert(tensor);
+    auto tensor = _output_tensors[i];
+
+    if (desc.outputs[i] == nullptr)
+      throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
+    tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
     tensor->set_dynamic(); // It can't be resized but shape could change
-    // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
-    tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.outputs[i]->buffer)),
-                      desc.outputs[i]->size);
   }
 
   executeImpl();
@@ -190,51 +170,13 @@ void ExecutorBase::execute(const IODescription &desc)
     // set shape of outputDesc to tensor shape since tensor can be dynamic
     const auto output_tensor_shape = _output_tensors[n]->getShape();
     output.info.shape(
-        convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
-  }
-}
-
-/**
- * @brief Changes tensor shape and allocate memory
- *        if input shape was changed by nnfw_set_input_tensorinfo()
- *
- * @note  Cases are:
- *        1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- *                                                        (a)          (b)
- *
- *           at (a), operand is static, tensor is static - memory dealloc is not needed
- *                   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
- *           at (b), operand is static, tensor is dynamic - memory dealloc is needed
- *
- *        2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- *                                                         (a)          (b)
- *
- *           at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
- *                                                           since it has not been allocated yet
- *           at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
- */
-void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
-{
-  auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
-  if (shape_sig_found != desc.dynamic_input_shapes.end())
-  {
-    auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
-    if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
-      throw std::runtime_error("Unknown dim is found at execution time for a backend that "
-                               "does not support dynamic tensor");
-
-    auto changed_input_shape = shape_sig_found->second;
-    auto operand_ind = dyn_alloc_info->second.ind;
-
-    auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
-    assert(dyn_tensor_manager);
-    dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
+      convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
   }
 }
 
 bool ExecutorBase::hasDynamicInput()
 {
-  for (auto &tensor : _input_tensors)
+  for (auto &&tensor : _input_tensors)
   {
     if (tensor->is_dynamic())
       return true;
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index a13be7dbf..4f97de922 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -17,25 +17,20 @@
 #ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
 #define __ONERT_EXEC_EXECUTOR_BASE_H__
 
-#include <mutex>
+#include "ExecutionObservee.h"
+#include "../backend/builtin/IOTensor.h"
+#include "../compiler/TensorRegistries.h"
 
-#include "IPermuteFunction.h"
-#include "Source.h"
-#include "exec/ExecutionObservers.h"
-#include "Sink.h"
-#include "ShapeConverter.h"
-#include "exec/IExecutor.h"
 #include "compiler/LoweredGraph.h"
-#include "ir/LowerInfoMap.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
-#include "exec/ExecTime.h"
-#include "exec/IFunction.h"
-#include "backend/IDynamicTensorManager.h"
-#include "backend/ITensorManager.h"
-#include "exec/ExecutionObservee.h"
-#include "compiler/TensorRegistries.h"
-#include <list>
+#include "exec/IExecutor.h"
+#include "exec/IODescription.h"
+#include "ir/Graph.h"
+#include "ir/OperationIndexMap.h"
+#include "util/TracingCtx.h"
+
+#include <memory>
+#include <mutex>
+#include <vector>
 
 namespace onert
 {
@@ -51,26 +46,18 @@ public:
    * @param tensor_builders Tensor builders that are currently used
    */
   ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
-               const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-               const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-               const compiler::TensorRegistries &tensor_regs,
-               backend::TensorManagerSet &&tensor_mgrs);
+               backend::BackendContexts &&backend_contexts,
+               const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx);
 
   virtual ~ExecutorBase() = default;
 
-  const ir::Graph &graph() final { return _graph; }
-
-  /**
-   * @brief Execute without IODescription
-   *
-   * @param src_tensor Tensor list that will be copied to input tensors of this
-   * @param pre_fn The permutation function that copy from src_tensor to input tensors of this
-   */
-  void execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
-               const std::shared_ptr<IPermuteFunction> &pre_fn);
+  const ir::Graph &graph() const final { return _graph; }
 
   void execute(const IODescription &desc) final;
 
+  void execute(const std::vector<backend::IPortableTensor *> &inputs,
+               const std::vector<backend::IPortableTensor *> &outputs) override;
+
   // Used only in Dataflow and Parallel Executors
   void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
   {
@@ -81,17 +68,16 @@ public:
 
   void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
 
-  const std::vector<std::shared_ptr<backend::ITensor>> &getInputTensors() const
+  const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override
   {
     return _input_tensors;
   }
 
-  const std::vector<std::shared_ptr<backend::ITensor>> &getOutputTensors() const
+  const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
   {
     return _output_tensors;
   }
-
-  const DynAllocInfoMap &getInputsDynamicAllocInfo() const { return _input_to_dyn_alloc_info; }
+  backend::BackendContexts &getBackendContexts() { return _backend_contexts; }
 
 protected:
   /**
@@ -103,16 +89,12 @@ protected:
   ExecutionObservee _subject;
   std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
   std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
+  backend::BackendContexts _backend_contexts;
   const ir::Graph &_graph;
-  std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
-  DynAllocInfoMap _input_to_dyn_alloc_info;
-  DynAllocInfoMap _output_to_dyn_alloc_info;
-  backend::TensorManagerSet _tensor_mgrs;
+  std::vector<backend::builtin::IOTensor *> _input_tensors;
+  std::vector<backend::builtin::IOTensor *> _output_tensors;
   std::mutex _mutex;
-
-private:
-  void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc);
+  const util::TracingCtx *_tracing_ctx;
 };
 
 } // namespace exec
diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc
new file mode 100644
index 000000000..8a1be3df4
--- /dev/null
+++ b/runtime/onert/core/src/exec/Executors.cc
@@ -0,0 +1,649 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Executors.h"
+
+#include "../backend/builtin/IOTensor.h"
+
+namespace
+{
+
+using namespace onert;
+
+int32_t find_input_index(const std::vector<ir::IODesc> &pkg_inputs,
+                         const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                         const ir::IOIndex &io_index)
+{
+  for (size_t i = 0; i < pkg_inputs.size(); i++)
+  {
+    auto &input_desc = pkg_inputs[i];
+    if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+        (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+        (std::get<ir::IOIndex>(input_desc) == io_index))
+      return static_cast<int32_t>(i);
+  }
+  return -1;
+}
+
+int32_t find_output_index(const std::vector<ir::IODesc> &pkg_outputs,
+                          const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                          const ir::IOIndex &io_index)
+{
+  for (size_t i = 0; i < pkg_outputs.size(); i++)
+  {
+    auto &input_desc = pkg_outputs[i];
+    if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+        (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+        (std::get<ir::IOIndex>(input_desc) == io_index))
+      return static_cast<int32_t>(i);
+  }
+  return -1;
+}
+
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+class Executors::EdgeTensor : public backend::builtin::IOTensor
+{
+public:
+  EdgeTensor(const ir::OperandInfo &info, ir::Layout layout)
+    : backend::builtin::IOTensor(info, layout), _buffer{nullptr}, _ref_count{0}
+  {
+  }
+  ~EdgeTensor() = default;
+
+  void allocate_buffer()
+  {
+    const auto total_size = orig_info().total_size();
+    _buffer = std::make_unique<uint8_t[]>(total_size);
+    _ref_count = 1;
+
+    // NOTE Executor's inputs/outputs are always IPortableTensor. If backend of inputs/outputs
+    //      is using tensor that does not inherit IPortableTensor, Permute operation is added
+    //      and all inputs/outputs become IPortableTensor at compile stage.
+    //      This allows user's buffers to be set to inputs/outputs of executors.
+    setUserTensor(_buffer.get(), total_size);
+  }
+
+  void increase_ref() { _ref_count++; }
+
+  void decrease_ref()
+  {
+    assert(_ref_count > 0);
+    _ref_count--;
+    if (_ref_count == 0)
+    {
+      _buffer.reset();
+      setUserTensor(nullptr, orig_info().total_size());
+    }
+  }
+
+private:
+  std::unique_ptr<uint8_t[]> _buffer;
+  int32_t _ref_count;
+};
+
+void Executors::emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                        std::unique_ptr<IExecutor> exec)
+{
+  _executors.emplace(std::make_pair(model_index, subg_index), std::move(exec));
+}
+
+IExecutor *Executors::at(const ir::ModelIndex &model_index,
+                         const ir::SubgraphIndex &subg_index) const
+{
+  return _executors.at(std::make_pair(model_index, subg_index)).get();
+}
+
+uint32_t Executors::inputSize() const { return _model_edges->pkg_inputs.size(); }
+
+uint32_t Executors::outputSize() const { return _model_edges->pkg_outputs.size(); }
+
+const ir::OperandInfo &Executors::inputInfo(const ir::IOIndex &index) const
+{
+  auto const desc = _model_edges->pkg_inputs[index.value()];
+  auto const model_index = std::get<0>(desc);
+  auto const subg_index = std::get<1>(desc);
+  auto const io_index = std::get<2>(desc);
+  auto const executor = at(model_index, subg_index);
+  return executor->getInputTensors().at(io_index.value())->orig_info();
+}
+
+const ir::OperandInfo &Executors::outputInfo(const ir::IOIndex &index) const
+{
+  auto const desc = _model_edges->pkg_outputs[index.value()];
+  auto const model_index = std::get<0>(desc);
+  auto const subg_index = std::get<1>(desc);
+  auto const io_index = std::get<2>(desc);
+  auto const executor = at(model_index, subg_index);
+  return executor->getOutputTensors().at(io_index.value())->orig_info();
+}
+
+// Allow below edges only
+//  m1 < m2, s1 == 0 and s2 == 0 if m1:s1:o1 -> m2:s2:o2'
+void Executors::checkSupportedMultimodel() const
+{
+  // If package includes no-connection model, model_count is less than real model count in package.
+  // Then this method will throw exception based on model index
+  //  1st model: input assumption
+  //  Otherwise: edges assumption
+
+  // Assumption: edges
+  // m1 < m2, s1 == 0 and s2 == 0 if edge 'm1:s1:o1 -> m2:s2:o2'
+  for (auto &&edge : _model_edges->edges)
+  {
+    auto const model_from = std::get<ir::ModelIndex>(edge.from);
+    auto const model_to = std::get<ir::ModelIndex>(edge.to);
+    auto const subg_from = std::get<ir::SubgraphIndex>(edge.from);
+    auto const subg_to = std::get<ir::SubgraphIndex>(edge.to);
+
+    if (model_from.value() == model_to.value())
+    {
+      throw std::runtime_error{"Multi model's edge set has invalid edge"};
+    }
+
+    if ((model_from.value() > model_to.value()) || (subg_from != ir::SubgraphIndex{0}) ||
+        (subg_to != ir::SubgraphIndex{0}))
+      throw std::runtime_error{"NYI: Multi model execution for this edge set is not supported yet"};
+  }
+
+  // Assumption: package inputs
+  //  All 1st model inputs come from package input if always m1 < m2
+  {
+    auto first_executor = at(ir::ModelIndex{0}, ir::SubgraphIndex{0});
+    auto search_first_model = [&](const ir::IOIndex &input_index) {
+      for (const auto &input : _model_edges->pkg_inputs)
+      {
+        if ((std::get<ir::ModelIndex>(input) == ir::ModelIndex{0}) ||
+            (std::get<ir::SubgraphIndex>(input) == ir::SubgraphIndex{0}) ||
+            (std::get<ir::IOIndex>(input) == input_index))
+          return true;
+      }
+
+      return false;
+    };
+
+    for (uint32_t i = 0; i < first_executor->getInputTensors().size(); i++)
+    {
+      if (!search_first_model(ir::IOIndex{i}))
+        throw std::runtime_error{"Cannot find 1st model's input buffer"};
+    }
+  }
+
+  // Check whether nnpkg outputs and Edge `from` are duplicated
+  for (const auto &edge : _model_edges->edges)
+  {
+    if (std::find(_model_edges->pkg_outputs.begin(), _model_edges->pkg_outputs.end(), edge.from) !=
+        _model_edges->pkg_outputs.end())
+    {
+      throw std::runtime_error{"Multi model execution does not support duplicating nnpkg outputs "
+                               "with `from` of edges yet"};
+    }
+  }
+}
+
+void Executors::createEdgeQuantLayers()
+{
+  if (_is_created_edge_quant_layers)
+  {
+    return;
+  }
+
+  // Create EdgeTensor for edges between executors
+  for (const auto &pair : _edge_map)
+  {
+    const auto &from_iodesc = pair.first;
+    const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+    const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+    const auto &from_io_index = std::get<ir::IOIndex>(from_iodesc);
+
+    const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+    const auto from_tensor = from_executor->getOutputTensors().at(from_io_index.value());
+
+    const auto &from_info = from_tensor->orig_info();
+    const auto from_layout = from_tensor->orig_layout();
+    _edge_tensors[from_iodesc] = std::make_unique<EdgeTensor>(from_info, from_layout);
+  }
+
+  // Append type-aware quantization layer for edges between executors
+  for (const auto &executor_pair : _executors)
+  {
+    const auto &executor_index = executor_pair.first;
+    const auto &model_index = executor_index.first;
+    const auto &subg_index = executor_index.second;
+
+    std::vector<backend::ITensor *> inputs;
+    std::vector<backend::ITensor *> outputs;
+    for (const auto &pair : _edge_map)
+    {
+      const auto &from_iodesc = pair.first;
+      if (std::get<ir::ModelIndex>(from_iodesc) == model_index &&
+          std::get<ir::SubgraphIndex>(from_iodesc) == subg_index)
+      {
+        const auto from_tensor = _edge_tensors[from_iodesc].get();
+        const auto &to_list = pair.second;
+
+        for (const auto &to_iodesc : to_list)
+        {
+          const auto &to_model_index = std::get<ir::ModelIndex>(to_iodesc);
+          const auto &to_subg_index = std::get<ir::SubgraphIndex>(to_iodesc);
+          const auto &to_io_index = std::get<ir::IOIndex>(to_iodesc);
+
+          const auto to_executor = _executors.at({to_model_index, to_subg_index}).get();
+          const auto to_tensor = to_executor->getInputTensors().at(to_io_index.value());
+
+          // TODO Unify tensors with the same `from` tensor and same type
+          if (from_tensor->data_type() != to_tensor->data_type())
+          {
+            assert(inputs.size() == outputs.size());
+            const auto &to_info =
+              to_executor->getInputTensors().at(to_io_index.value())->orig_info();
+            const auto to_layout = to_tensor->orig_layout();
+            inputs.emplace_back(from_tensor);
+
+            auto type_aware_quant_tensor = std::make_unique<EdgeTensor>(to_info, to_layout);
+            outputs.emplace_back(type_aware_quant_tensor.get());
+
+            _edge_quant_tensors[to_iodesc] = std::move(type_aware_quant_tensor);
+          }
+        }
+      }
+    }
+
+    auto layer = std::make_unique<PermuteLayer>(inputs, outputs);
+    layer->prepare();
+    _edge_quant_layers[{model_index, subg_index}] = std::move(layer);
+  }
+
+  _is_created_edge_quant_layers = true;
+}
+
+void Executors::CreatePkgIOTensors(const IODescription &desc)
+{
+  for (const auto &pkg_input : _model_edges->pkg_inputs)
+  {
+    // Create IOTensor for nnpkg inputs
+    const auto &model_index = std::get<ir::ModelIndex>(pkg_input);
+    const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_input);
+    const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+    const auto input_pkg_index =
+      find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+    if (input_pkg_index == -1)
+      throw std::runtime_error{"Cannot find multi model input index"};
+    auto input_desc = desc.inputs[input_pkg_index].get();
+    _pkg_input_tensors[pkg_input] =
+      std::make_unique<backend::builtin::IOTensor>(input_desc->info, input_desc->layout);
+  }
+
+  for (const auto &pkg_output : _model_edges->pkg_outputs)
+  {
+    // Create IOTensor for nnpkg outputs
+    const auto &model_index = std::get<ir::ModelIndex>(pkg_output);
+    const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_output);
+    const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+    const auto output_pkg_index =
+      find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+    if (output_pkg_index == -1)
+      throw std::runtime_error{"Cannot find multi model output index"};
+    auto output_desc = desc.outputs[output_pkg_index].get();
+    _pkg_output_tensors[pkg_output] =
+      std::make_unique<backend::builtin::IOTensor>(output_desc->info, output_desc->layout);
+  }
+}
+
+void Executors::createPkgIOQuantLayers(const IODescription &desc)
+{
+  // Append type-aware quantization layer for nnpkg inputs/outputs between executors
+  for (const auto &pair : _executors)
+  {
+    const auto &executor_index = pair.first;
+    const auto &model_index = executor_index.first;
+    const auto &subg_index = executor_index.second;
+    const auto executor = pair.second.get();
+
+    // Find pkg inputs of current executor
+    std::vector<ir::IODesc> pkg_inputs;
+    for (const auto &pkg_input : _model_edges->pkg_inputs)
+    {
+      if (std::get<ir::ModelIndex>(pkg_input) == model_index &&
+          std::get<ir::SubgraphIndex>(pkg_input) == subg_index)
+      {
+        pkg_inputs.emplace_back(pkg_input);
+      }
+    }
+    std::vector<backend::ITensor *> src_tensors;
+    std::vector<backend::ITensor *> dst_tensors;
+    for (const auto &pkg_input : pkg_inputs)
+    {
+      const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+      const auto input_pkg_index =
+        find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+      if (input_pkg_index == -1)
+        throw std::runtime_error{"Cannot find multi model input index"};
+      auto input_desc = desc.inputs[input_pkg_index].get();
+
+      // Create EdgeTensor for nnpkg input if type is different
+      const auto input_tensor =
+        executor->getInputTensors().at(std::get<ir::IOIndex>(pkg_input).value());
+      const auto &orig_info = input_tensor->orig_info();
+      if (input_desc->info.typeInfo().type() != input_tensor->orig_info().typeInfo().type())
+      {
+        const auto orig_layout = input_tensor->orig_layout();
+        auto pkg_input_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+        _pkg_input_quant_tensors[pkg_input] = std::move(pkg_input_edge_tensor);
+
+        // Append type-aware quantization layer's inputs/outputs
+        src_tensors.emplace_back(_pkg_input_tensors[pkg_input].get());
+        dst_tensors.emplace_back(_pkg_input_quant_tensors[pkg_input].get());
+      }
+    }
+
+    // Create type-aware quantization layer for nnpkg inputs
+    auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+    pkg_input_layer->prepare();
+    _pkg_input_quant_layers[{model_index, subg_index}] = std::move(pkg_input_layer);
+
+    // Find pkg outputs of current executor
+    std::vector<ir::IODesc> pkg_outputs;
+    for (const auto &pkg_output : _model_edges->pkg_outputs)
+    {
+      if (std::get<ir::ModelIndex>(pkg_output) == model_index &&
+          std::get<ir::SubgraphIndex>(pkg_output) == subg_index)
+      {
+        pkg_outputs.emplace_back(pkg_output);
+      }
+    }
+    src_tensors.clear();
+    dst_tensors.clear();
+    // Create Tensors of nnpkg outputs for type-aware quantization
+    for (const auto &pkg_output : pkg_outputs)
+    {
+      const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+      const auto output_pkg_index =
+        find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+      if (output_pkg_index == -1)
+        throw std::runtime_error{"Cannot find multi model output index"};
+      auto output_desc = desc.outputs[output_pkg_index].get();
+
+      // Create EdgeTensor for nnpkg output if type is different
+      const auto output_tensor =
+        executor->getOutputTensors().at(std::get<ir::IOIndex>(pkg_output).value());
+      const auto &orig_info = output_tensor->orig_info();
+      if (output_desc->info.typeInfo().type() != output_tensor->orig_info().typeInfo().type())
+      {
+        const auto orig_layout = output_tensor->orig_layout();
+        auto pkg_output_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+        _pkg_output_quant_tensors[pkg_output] = std::move(pkg_output_edge_tensor);
+
+        // Append type-aware quantization layer's inputs/outputs
+        src_tensors.emplace_back(_pkg_output_quant_tensors[pkg_output].get());
+        dst_tensors.emplace_back(_pkg_output_tensors[pkg_output].get());
+      }
+    }
+
+    // Create type-aware quantization layer for nnpkg outputs
+    auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+    pkg_output_layer->prepare();
+    _pkg_output_quant_layers[{model_index, subg_index}] = std::move(pkg_output_layer);
+  }
+}
+
+void Executors::execute(const IODescription &desc)
+{
+  // Check supported multi model package
+  checkSupportedMultimodel();
+
+  // TODO Move creating type-aware quantization layers for edges in compilation stage
+  createEdgeQuantLayers();
+
+  // TODO Create IOTensors only once and recreate them only if nnpkg info changes
+  CreatePkgIOTensors(desc);
+
+  // TODO Create type-aware quantization layers only once and recreate them only if type changes
+  createPkgIOQuantLayers(desc);
+
+  // TODO Find better way to schedule order of executors
+  auto const model_count = modelCount();
+
+  auto find_from = [&](const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                       const ir::IOIndex &io_index) {
+    for (const auto &edge : _model_edges->edges)
+    {
+      if ((std::get<ir::ModelIndex>(edge.to) == model_index) &&
+          (std::get<ir::SubgraphIndex>(edge.to) == subg_index) &&
+          (std::get<ir::IOIndex>(edge.to) == io_index))
+        return edge.from;
+    }
+
+    throw std::runtime_error{"Cannot find edge for model input"};
+  };
+
+  // Execute each model
+  // NOTE May be better to use vector instead of unordered_map for _executors
+  for (auto model_index = ir::ModelIndex{0}; model_index.value() < model_count; model_index++)
+  {
+    // Find executor
+    auto executor = at(model_index, ir::SubgraphIndex{0});
+
+    // Set IOTensors
+    // TODO Set internal IOTensors only once
+    std::vector<backend::IPortableTensor *> inputs_inter;
+    std::vector<backend::IPortableTensor *> outputs_inter;
+    const auto &input_tensors = executor->getInputTensors();
+    const auto &output_tensors = executor->getOutputTensors();
+    auto const input_size = input_tensors.size();
+    auto const output_size = output_tensors.size();
+    inputs_inter.resize(input_size);
+    outputs_inter.resize(output_size);
+
+    // Set inputs of executor
+    // TODO Create layer to allocate/deallocate buffers of EdgeTensor for each executor
+    for (uint32_t i = 0; i < input_size; i++)
+    {
+      const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+                                                    ir::SubgraphIndex{0}, ir::IOIndex{i});
+      const auto input_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+      if (input_pkg_index != -1)
+      {
+        // Allocate type-aware quantization tensors for nnpkg inputs and set internal tensors
+        if (_pkg_input_quant_tensors.find(input_io_desc) != _pkg_input_quant_tensors.end())
+        {
+          _pkg_input_quant_tensors[input_io_desc]->allocate_buffer();
+
+          inputs_inter[i] = _pkg_input_quant_tensors[input_io_desc].get();
+        }
+        else
+        {
+          inputs_inter[i] = _pkg_input_tensors[input_io_desc].get();
+        }
+
+        // Set buffer of IOTensor
+        auto input_desc = desc.inputs[input_pkg_index].get();
+        // TODO Remove const_cast (we need const_cast as ITensor is writable)
+        _pkg_input_tensors[input_io_desc]->setUserTensor(
+          reinterpret_cast<uint8_t *>(const_cast<void *>(input_desc->buffer)), input_desc->size);
+      }
+      else
+      {
+        auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+        const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+        const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+        const auto &from_ioindex = std::get<ir::IOIndex>(from_iodesc).value();
+
+        // Supported only sequantial execution of models
+        assert(from_model_index.value() < model_index.value());
+        assert(from_subg_index.value() == 0);
+        const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+        const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+        if (_edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end())
+        {
+          inputs_inter[i] = from_executor->getOutputTensors().at(from_ioindex);
+        }
+        else
+        {
+          inputs_inter[i] = _edge_quant_tensors.at(to_iodesc).get();
+        }
+        assert(inputs_inter[i]->buffer() != nullptr);
+      }
+    }
+
+    // Set outputs of executor
+    for (uint32_t i = 0; i < output_size; i++)
+    {
+      const auto output_pkg_index = find_output_index(_model_edges->pkg_outputs, model_index,
+                                                      ir::SubgraphIndex{0}, ir::IOIndex{i});
+      const auto output_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+      if (output_pkg_index != -1)
+      {
+        // Allocate type-aware quantization tensors for nnpkg outputs and set internal tensors
+        if (_pkg_output_quant_tensors.find(output_io_desc) != _pkg_output_quant_tensors.end())
+        {
+          _pkg_output_quant_tensors[output_io_desc]->allocate_buffer();
+
+          outputs_inter[i] = _pkg_output_quant_tensors[output_io_desc].get();
+        }
+        else
+        {
+          outputs_inter[i] = _pkg_output_tensors[output_io_desc].get();
+        }
+
+        // Set buffer of IOTensor
+        auto output_desc = desc.outputs[output_pkg_index].get();
+        _pkg_output_tensors[output_io_desc]->setUserTensor(
+          reinterpret_cast<uint8_t *>(output_desc->buffer), output_desc->size);
+      }
+      else
+      {
+        // Allocate buffer of `from` tensors
+        const auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+        _edge_tensors[from_iodesc]->allocate_buffer();
+        outputs_inter[i] = _edge_tensors[from_iodesc].get();
+
+        // Allocate buffer of tensors for type-aware quantization
+        for (const auto &to_iodesc : _edge_map[from_iodesc])
+        {
+          _edge_tensors[from_iodesc]->increase_ref();
+          if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+          {
+            auto type_aware_quant_tensor = _edge_quant_tensors.at(to_iodesc).get();
+            type_aware_quant_tensor->allocate_buffer();
+
+            _edge_tensors[from_iodesc]->decrease_ref();
+          }
+        }
+      }
+    }
+
+    _pkg_input_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+    executor->execute(inputs_inter, outputs_inter);
+
+    _edge_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+    _pkg_output_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+    // Release input buffers that are no longer needed
+    for (uint32_t i = 0; i < input_size; i++)
+    {
+      const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+                                                    ir::SubgraphIndex{0}, ir::IOIndex{i});
+
+      const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+      if (input_pkg_index == -1)
+      {
+        if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+        {
+          // Decrease reference count of tensor for type-aware quantization if input tensor is the
+          // tensor
+          const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+          if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+          {
+            _edge_quant_tensors[to_iodesc]->decrease_ref();
+          }
+        }
+        else
+        {
+          // Decrease reference count of `from` tensor if input tensor is the `from` tensor
+          const auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+          _edge_tensors[from_iodesc]->decrease_ref();
+
+          // Decrease reference count of nnpkg inputs
+          if (_pkg_input_quant_tensors.find(to_iodesc) != _pkg_input_quant_tensors.end())
+          {
+            _pkg_input_quant_tensors[to_iodesc]->decrease_ref();
+          }
+        }
+      }
+    }
+
+    // Release output buffers if those buffers are no longer used other executors because of
+    // type-aware quantization
+    // FIXME if tensors for type-aware quantization unified for the same `from` tensor and same type
+    for (uint32_t i = 0; i < output_size; i++)
+    {
+      auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+
+      // Check if other executors will use the buffer of edge tensor
+      const auto &to_list = _edge_map[from_iodesc];
+      if (to_list.size() == 0)
+      {
+        // This condition means `from_iodesc` tensor is an output of nnpkg
+        continue;
+      }
+
+      bool to_be_release =
+        !std::any_of(to_list.begin(), to_list.end(), [&](const ir::IODesc &to_iodesc) {
+          // This condition means another executor uses the buffer of edge tensor
+          return _edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end();
+        });
+
+      if (to_be_release)
+      {
+        // This edge tensor's buffer won't be used in other executors
+        // Tensors for type-aware quantization take over the role of this edge tensor instead
+        _edge_tensors[from_iodesc]->decrease_ref();
+      }
+
+      // Decrease reference count of nnpkg outputs
+      if (_pkg_output_quant_tensors.find(from_iodesc) != _pkg_output_quant_tensors.end())
+      {
+        _pkg_output_quant_tensors[from_iodesc]->decrease_ref();
+      }
+    }
+  }
+}
+
+// modelCount() iterates _executors.
+// It assumes that Compiler will generate Executor for all models and _executors includes all
+// generated Executor.
+// If nnpackage includes model(s) which has no connection and Compiler does not
+// generate Executor for them, modelCount() return less value than real model count.
+uint16_t Executors::modelCount() const
+{
+  uint16_t model_count = 0;
+  for (; _executors.find(std::make_pair(ir::ModelIndex{model_count}, ir::SubgraphIndex{0})) !=
+         _executors.end();
+       model_count++)
+    ;
+
+  return model_count;
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/Executors.h b/runtime/onert/core/src/exec/Executors.h
new file mode 100644
index 000000000..ac7489186
--- /dev/null
+++ b/runtime/onert/core/src/exec/Executors.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTORS_H__
+#define __ONERT_EXEC_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+#include "IPermuteFunction.h"
+
+namespace std
+{
+
+template <> struct hash<std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex>>
+{
+  size_t
+  operator()(const std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex> &pair) const
+    noexcept
+  {
+    return (hash<uint32_t>()(pair.first.value()) << 16) ^ hash<uint32_t>()(pair.second.value());
+  }
+};
+
+} // namespace std
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executors
+ */
+class Executors : public IExecutors
+{
+public:
+  Executors(void) = delete;
+  Executors(std::unique_ptr<ir::ModelEdges> model_edges)
+    : _executors{}, _model_edges{std::move(model_edges)}, _edge_quant_layers{},
+      _edge_quant_tensors{}, _edge_tensors{}, _is_created_edge_quant_layers{false},
+      _pkg_input_quant_layers{}, _pkg_output_quant_layers{}, _pkg_input_quant_tensors{},
+      _pkg_output_quant_tensors{}, _pkg_input_tensors{}, _pkg_output_tensors{}
+  {
+    for (const auto &edge : _model_edges->edges)
+    {
+      _edge_map[edge.from].emplace_back(edge.to);
+    }
+  }
+  Executors(const Executors &) = delete;
+  Executors(Executors &&) = default;
+  ~Executors() = default;
+
+  // TODO Use Executor index
+  void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+               std::unique_ptr<IExecutor> exec) override;
+
+  IExecutor *at(const ir::ModelIndex &model_index,
+                const ir::SubgraphIndex &subg_index) const override;
+
+  uint32_t inputSize() const override;
+
+  uint32_t outputSize() const override;
+
+  const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+  const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+  void execute(const IODescription &desc) override;
+
+private:
+  void checkSupportedMultimodel() const;
+  void createEdgeQuantLayers();
+  void CreatePkgIOTensors(const IODescription &desc);
+  void createPkgIOQuantLayers(const IODescription &desc);
+  uint16_t modelCount() const;
+
+private:
+  // TODO Remove this class
+  class PermuteLayer : public exec::IPermuteFunction
+  {
+  public:
+    PermuteLayer(const std::vector<backend::ITensor *> &inputs,
+                 const std::vector<backend::ITensor *> &outputs)
+    {
+      assert(inputs.size() == outputs.size());
+      _src_tensors = inputs;
+      _dst_tensors = outputs;
+    }
+    virtual ~PermuteLayer() {}
+    void optimize() override {}
+  };
+
+  class EdgeTensor;
+
+private:
+  std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<IExecutor>>
+    _executors;
+
+  // NOTE _model_edges may use different struct type for executor implementation
+  std::unique_ptr<ir::ModelEdges> _model_edges;
+  std::unordered_map<ir::IODesc, std::vector<ir::IODesc>> _edge_map;
+
+  /**
+   * @brief Type-aware quantization layers for edges between executors
+   *
+   */
+  // TODO Move variables related to type-aware quantization for edges into compilation stage
+  // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+  std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+    _edge_quant_layers;
+
+  /**
+   * @brief Tensors for type-aware quantization of edges
+   *        Key: `to` IODesc, Value: EdgeTensor
+   */
+  //
+  // Q: Why is Key `to` IODesc
+  // A: these tensors are currently created depending on the type of `to`
+  // TODO Unify tensors with the same `from` tensor and same type
+  // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+  std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_quant_tensors;
+
+  /**
+   * @brief Tensors for edges between executors that are not related to type-aware quantization
+   *        Key: `from` IODesc, Value: EdgeTensor
+   */
+  // Q: Why is Key `from` IODesc
+  // A: `from` can be connected to multiple `to`
+  // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+  std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_tensors;
+  /**
+   * @brief Whether type-aware quantization layers for edges between executors are created
+   *
+   */
+  // TODO Remove this member after the creation of type-aware quantization layers for edges
+  //      is moved into compilation stage
+  bool _is_created_edge_quant_layers;
+
+  // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+  std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+    _pkg_input_quant_layers;
+  // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+  std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+    _pkg_output_quant_layers;
+  // Edge tensors of nnpkg inputs/outputs for type-aware quantization
+  std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_input_quant_tensors;
+  std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_output_quant_tensors;
+  // IOTensors for user buffer
+  std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_input_tensors;
+  std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_output_tensors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index fb31f7582..578123a54 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -16,8 +16,6 @@
 
 #include "exec/FunctionSequence.h"
 
-#include "ir/Operation.h"
-#include "backend/IDynamicTensorManager.h"
 #include "backend/ITensorRegistry.h"
 #include "util/logging.h"
 
@@ -28,19 +26,19 @@ namespace exec
 
 void FunctionSequence::run()
 {
-  // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false
   if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx)
   {
-    if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
-      throw std::runtime_error("operation and functions should be mapped one by one");
+    // acl_cl and acl_neon backend don't support dynamic shape.
+    // _dynamic_tensor_ctx is always nullptr for acl_cl and acl_neon
+    // Thus, those two bakends cannot reach here.
+
+    // Do dynamic shape inference
+    _dynamic_tensor_ctx->op->accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
 
-    auto op_seq_iter = _dynamic_tensor_ctx->op_seq->begin();
     for (const auto &function : _functions)
     {
-      // set shape of output and allocate memory when needed
-      auto &op = _dynamic_tensor_ctx->operations->at(*op_seq_iter);
-      op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
-
+      // NOTE the function could be also FunctionSequence so we do this
+      // TODO Remove this or do this recursively
       auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get());
       if (sub_func_seq != nullptr)
       {
@@ -50,22 +48,12 @@ void FunctionSequence::run()
 
       // run kernel
       function->run();
-
-      // deallocate input tensors which is no longer used
-      _dynamic_tensor_ctx->dynamic_tensor_manager->deallocInput(*op_seq_iter);
-
-      op_seq_iter++;
     }
   }
   else
   {
     for (const auto &function : _functions)
     {
-      auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get());
-      if (sub_func_seq != nullptr)
-      {
-        sub_func_seq->enableDynamicShapeInferer(false);
-      }
       function->run();
     }
   }
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.cc b/runtime/onert/core/src/exec/IPermuteFunction.cc
new file mode 100644
index 000000000..9d548e6dc
--- /dev/null
+++ b/runtime/onert/core/src/exec/IPermuteFunction.cc
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Dequantize.h>
+#include "backend/IPortableTensor.h"
+#include "exec/IFunction.h"
+#include "ir/Index.h"
+#include "ir/Shape.h"
+#include <memory>
+#include <misc/polymorphic_downcast.h>
+#include <typeinfo>
+#include "util/Utils.h"
+#include <vector>
+#include <unordered_map>
+
+namespace
+{
+using namespace onert;
+
+inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)
+{
+  const ir::Shape shape = tensor->getShape();
+
+  assert(tensor->layout() == ir::Layout::NHWC);
+
+  auto rank = shape.rank();
+  nnfw::cker::Shape ret(rank);
+  auto data = ret.DimsData();
+  for (int i = 0; i < rank; ++i)
+  {
+    data[i] = shape.dim(i);
+  }
+  return ret;
+}
+
+// Quantize per element
+template <typename InputT, typename OutputT>
+void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+  const auto scale = dst_tensor->data_scale();
+  const auto zero_point = dst_tensor->data_zero_point();
+
+  int min_val = std::numeric_limits<OutputT>::min();
+  int max_val = std::numeric_limits<OutputT>::max();
+
+  auto loop_shape = src_tensor->getShape();
+  const auto src_layout = src_tensor->layout();
+  const auto dst_layout = dst_tensor->layout();
+  const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+  ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+    const InputT *input_data =
+      reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+    int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
+    int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+
+    ir::Coordinates dst_coords =
+      is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+    OutputT *output_data =
+      reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+    *output_data = clamped;
+  });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+  if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+      src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+  {
+    assert(!dst_tensor->is_dynamic());
+
+    // Call optimized neon kernel
+    nnfw::cker::Quantize(getShape(src_tensor),
+                         reinterpret_cast<const InputT *>(src_tensor->buffer()),
+                         getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+                         dst_tensor->data_scale(), dst_tensor->data_zero_point());
+  }
+  else
+  {
+    elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
+  }
+}
+
+// Dequantize per element
+template <typename InputT, typename OutputT>
+void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+  const auto scale = src_tensor->data_scale();
+  const auto zero_point = src_tensor->data_zero_point();
+
+  auto loop_shape = src_tensor->getShape();
+  const auto src_layout = src_tensor->layout();
+  const auto dst_layout = dst_tensor->layout();
+  const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+  ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+    const InputT *input_data =
+      reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+    const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));
+
+    ir::Coordinates dst_coords =
+      is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+    OutputT *output_data =
+      reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+    *output_data = result;
+  });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+  if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+      src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+  {
+    assert(!dst_tensor->is_dynamic());
+
+    // Call optimized neon kernel
+    nnfw::cker::Dequantize(getShape(src_tensor),
+                           reinterpret_cast<const InputT *>(src_tensor->buffer()),
+                           getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+                           src_tensor->data_scale(), src_tensor->data_zero_point());
+  }
+  else
+  {
+    elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
+  }
+}
+
+template <typename SRC_T, typename DST_T,
+          std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
+                             std::is_base_of<backend::ITensor, DST_T>::value,
+                           bool> = true>
+void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
+{
+  // TODO Support other types
+  if (src_tensor->data_type() == ir::DataType::FLOAT32)
+  {
+    switch (dst_tensor->data_type())
+    {
+      case ir::DataType::QUANT_UINT8_ASYMM:
+      {
+        quantize<float, uint8_t>(src_tensor, dst_tensor);
+        break;
+      }
+      case ir::DataType::QUANT_INT8_SYMM:
+      {
+        quantize<float, int8_t>(src_tensor, dst_tensor);
+        break;
+      }
+      case ir::DataType::QUANT_INT16_SYMM:
+      {
+        quantize<float, int16_t>(src_tensor, dst_tensor);
+        break;
+      }
+      default:
+      {
+        throw std::runtime_error("IPermuteFunction: Unsupported quantization type");
+        break;
+      }
+    }
+  }
+  else if (dst_tensor->data_type() == ir::DataType::FLOAT32)
+  {
+    switch (src_tensor->data_type())
+    {
+      case ir::DataType::QUANT_UINT8_ASYMM:
+      {
+        dequantize<uint8_t, float>(src_tensor, dst_tensor);
+        break;
+      }
+      case ir::DataType::QUANT_INT8_SYMM:
+      {
+        dequantize<int8_t, float>(src_tensor, dst_tensor);
+        break;
+      }
+      case ir::DataType::QUANT_INT16_SYMM:
+      {
+        dequantize<int16_t, float>(src_tensor, dst_tensor);
+        break;
+      }
+      default:
+      {
+        throw std::runtime_error("IPermuteFunction: Unsupported dequantization type");
+        break;
+      }
+    }
+  }
+  else
+  {
+    throw std::runtime_error("IPermuteFunction: Unsupported type for type-aware quantization yet");
+  }
+}
+
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+void IPermuteFunction::IPermuteFunction::run()
+{
+  // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
+  assert(_src_tensors.size() == _dst_tensors.size());
+  if (_src_tensors_offsets.size() == 0)
+  {
+    _src_tensors_offsets.resize(_src_tensors.size());
+    _dst_tensors_offsets.resize(_dst_tensors.size());
+  }
+  assert(_src_tensors.size() == _src_tensors_offsets.size());
+  assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
+
+  for (size_t i = 0; i < _src_tensors.size(); ++i)
+  {
+    auto src_tensor = _src_tensors.at(i);
+    auto dst_tensor = _dst_tensors.at(i);
+    auto &src_offsets = _src_tensors_offsets.at(i);
+    auto &dst_offsets = _dst_tensors_offsets.at(i);
+    if (src_tensor != dst_tensor)
+    {
+      const auto rank = src_tensor->getShape().rank();
+      permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+    }
+  }
+}
+
+void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+                               size_t rank, std::vector<size_t> &src_offsets,
+                               std::vector<size_t> &dst_offsets)
+{
+  if (src_tensor->total_size() == 0)
+  {
+    assert(dst_tensor->total_size() == 0);
+    return;
+  }
+
+  assert(src_tensor != dst_tensor);
+  if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
+  {
+    typeAwareQuantize(src_tensor, dst_tensor);
+    return;
+  }
+
+  switch (src_tensor->data_type())
+  {
+    case ir::DataType::FLOAT32:
+      permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::INT32:
+      permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::UINT32:
+      permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::BOOL8:
+    case ir::DataType::QUANT_UINT8_ASYMM:
+    case ir::DataType::UINT8:
+      permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::QUANT_INT8_ASYMM:
+    case ir::DataType::QUANT_INT8_SYMM:
+      permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::INT64:
+      permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::QUANT_INT16_SYMM:
+      permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    default:
+      throw std::runtime_error("IPermuteFunction: Not supported data type");
+      break;
+  }
+}
+
+const std::type_info &IPermuteFunction::underlying_type(ir::DataType type) const
+{
+  switch (type)
+  {
+    case ir::DataType::FLOAT32:
+      return typeid(float);
+    case ir::DataType::INT32:
+      return typeid(int32_t);
+    case ir::DataType::UINT32:
+      return typeid(uint32_t);
+    case ir::DataType::INT64:
+      return typeid(int64_t);
+    case ir::DataType::BOOL8:
+    case ir::DataType::QUANT_UINT8_ASYMM:
+    case ir::DataType::UINT8:
+      return typeid(uint8_t);
+    case ir::DataType::QUANT_INT8_ASYMM:
+    case ir::DataType::QUANT_INT8_SYMM:
+      return typeid(int8_t);
+    case ir::DataType::QUANT_INT16_SYMM:
+      return typeid(int16_t);
+    default:
+      throw std::runtime_error("IPermuteFunction: Not supported data type");
+  }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h
index 6b4d15380..e790f3290 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -25,21 +25,48 @@
 
 #include "backend/ITensor.h"
 #include "exec/IFunction.h"
-#include "ir/Index.h"
-#include "ir/Shape.h"
 #include <memory>
-#include <typeinfo>
-#include "util/Utils.h"
 #include <vector>
+#include <unordered_map>
 
 namespace onert
 {
 namespace exec
 {
 
+inline void UpdateOffsets(::onert::backend::ITensor *src, ::onert::backend::ITensor *dst,
+                          const ::onert::ir::Shape &loop_shape, std::vector<size_t> &src_offsets,
+                          std::vector<size_t> &dst_offsets)
+{
+  ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+    src_offsets.emplace_back(src->calcOffset(coords));
+    dst_offsets.emplace_back(dst->calcOffset(coords));
+  });
+}
+
+inline void CopyStatic(const uint8_t *src_buffer, uint8_t *dst_buffer,
+                       const std::vector<size_t> &src_offsets,
+                       const std::vector<size_t> &dst_offsets, size_t copy_len)
+{
+  assert(src_offsets.size() == dst_offsets.size());
+  for (size_t i = 0; i < src_offsets.size(); ++i)
+  {
+    memcpy(dst_buffer + dst_offsets.at(i), src_buffer + src_offsets.at(i), copy_len);
+  }
+}
+
+inline void CopyDynamic(const ::onert::backend::ITensor *src, const ::onert::backend::ITensor *dst,
+                        uint8_t *dst_buffer, const ::onert::ir::Shape &loop_shape, size_t copy_len)
+{
+  ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+    // Copy src tensor's data to dst_buffer with calculated offset of dst tensor
+    memcpy(dst_buffer + dst->calcOffset(coords), src->buffer() + src->calcOffset(coords), copy_len);
+  });
+}
+
 class IPermuteFunction : public IFunction
 {
-private:
+protected:
   enum class PermuteType
   {
     NHWC_TO_NCHW,
@@ -48,63 +75,69 @@ private:
   };
 
 public:
-  virtual void run() override
+  virtual void run() override;
+
+  virtual void prepare() override { optimize(); }
+
+  virtual void optimize() = 0;
+
+protected:
+  void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank,
+               std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets);
+
+private:
+  // TODO make src const by proving const access()
+  template <class T>
+  void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank,
+               std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
   {
-    assert(_src_tensors.size() > 0);
-    assert(_src_tensors.size() == _dst_tensors.size());
-    auto src_it = _src_tensors.begin();
-    auto dst_it = _dst_tensors.begin();
-    while (src_it != _src_tensors.end())
+    assert(src->total_size() != 0 && dst->total_size() != 0);
+    // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
+    if (dst->needMemoryMap() && !dst->is_subtensor())
     {
-      const auto src_tensor = *src_it;
-      auto dst_tensor = *dst_it;
-      if (src_tensor != dst_tensor)
+      // A assertion to check mapping without calling map()
+      // Now there is no case where both src and dst have cl buffer.
+      assert(!src->needMemoryMap());
+
+      if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
       {
-        // TODO Change to permute in parallel
-        assert(underlying_type(src_tensor->data_type()) ==
-               underlying_type(dst_tensor->data_type()));
-        const auto rank = src_tensor->num_dimensions();
-        switch (src_tensor->data_type())
-        {
-          case ir::DataType::FLOAT32:
-            permute<float>(src_tensor, dst_tensor, rank);
-            break;
-          case ir::DataType::INT32:
-            permute<int32_t>(src_tensor, dst_tensor, rank);
-            break;
-          case ir::DataType::UINT32:
-            permute<uint32_t>(src_tensor, dst_tensor, rank);
-            break;
-          case ir::DataType::BOOL8:
-          case ir::DataType::QUANT_UINT8_ASYMM:
-          case ir::DataType::UINT8:
-            permute<uint8_t>(src_tensor, dst_tensor, rank);
-            break;
-          case ir::DataType::QUANT_INT8_SYMM:
-            permute<int8_t>(src_tensor, dst_tensor, rank);
-            break;
-          case ir::DataType::INT64:
-            permute<int64_t>(src_tensor, dst_tensor, rank);
-            break;
-          default:
-            throw std::runtime_error("IPermuteFunction: Not supported data type");
-            break;
-        }
+        src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
       }
-      src_it++;
-      dst_it++;
+      else
+      {
+        // TODO Optimize this block in case of that padding size of dst is big.
+        _buffers_map[dst].reserve(dst->total_size());
+        auto dst_buffer = _buffers_map[dst].data();
+        src->access([&](backend::ITensor &) {
+          permute<T>(src, dst, rank, dst_buffer, dst->total_size(), src_offsets, dst_offsets);
+        });
+        dst->enqueueWriteBuffer(dst_buffer, false);
+      }
+    }
+    else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
+             !dst->has_padding() && src->layout() == dst->layout())
+    {
+      assert(!dst->needMemoryMap());
+      dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
+    }
+    else
+    {
+      auto fn = [&](backend::ITensor &) {
+        dst->access([&](backend::ITensor &) {
+          permute<T>(src, dst, rank, dst->buffer(), dst->total_size(), src_offsets, dst_offsets);
+        });
+      };
+      src->access(fn);
     }
   }
 
-  virtual void prepare() override { optimize(); }
-
-  virtual void optimize() = 0;
-
-private:
   template <class T>
-  void permute(const std::shared_ptr<backend::ITensor> &src, std::shared_ptr<backend::ITensor> &dst,
-               size_t rank)
+  void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank, uint8_t *dst_buffer,
+               size_t dst_size, std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
   {
+    assert(dst_buffer != nullptr);
+    assert(dst_size == dst->total_size());
+
     const auto permute_type = [&]() -> PermuteType {
       if (src->layout() == ir::Layout::NHWC && dst->layout() == ir::Layout::NCHW)
       {
@@ -119,166 +152,115 @@ private:
         return PermuteType::COPY;
       }
     }();
-    auto fn = [&](backend::ITensor &src_tensor) {
-      dst->access([&](backend::ITensor &dst_tensor) {
-        auto src_buffer = src_tensor.buffer();
-        auto src_size = src_tensor.total_size();
-        auto dst_buffer = dst_tensor.buffer();
-        if (permute_type == PermuteType::COPY)
+    if (rank == 4 && permute_type != PermuteType::COPY)
+    {
+      switch (permute_type)
+      {
+        case PermuteType::NHWC_TO_NCHW:
         {
-          assert(src_tensor.layout() == dst_tensor.layout());
-          if (!src_tensor.has_padding() && !dst_tensor.has_padding())
-          {
-            assert(src_size <= dst_tensor.total_size());
-            memcpy(dst_buffer, src_buffer, src_size);
-            return;
-          }
+          ir::FeatureShape shape;
+          auto dst_shape = dst->getShape();
+          shape.N = dst_shape.dim(0);
+          shape.C = dst_shape.dim(1);
+          shape.H = dst_shape.dim(2);
+          shape.W = dst_shape.dim(3);
+
+          typename feature::nchw::View<T>::Strides strides;
+          const auto start_offset = dst->calcOffset({0, 0, 0, 0});
+          strides.W = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
+          strides.H = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
+          strides.C = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
+          strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
+
+          const feature::nhwc::Reader<T> from(src);
+          feature::nchw::View<T> into(shape, strides,
+                                      reinterpret_cast<T *>(dst_buffer + start_offset), dst_size);
+          feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+            const auto value = from.at(batch, row, col, ch);
+            into.at(batch, ch, row, col) = value;
+          };
+          break;
         }
-        switch (rank)
+        case PermuteType::NCHW_TO_NHWC:
         {
-          case 0:
-          case 1:
-          {
-            const int32_t copy_len = dst_tensor.dimension(0);
+          ir::FeatureShape shape;
+          auto dst_shape = dst->getShape();
+          shape.N = dst_shape.dim(0);
+          shape.H = dst_shape.dim(1);
+          shape.W = dst_shape.dim(2);
+          shape.C = dst_shape.dim(3);
 
-            memcpy(dst_buffer, src_buffer, copy_len * sizeof(T));
-            break;
-          }
-          case 2:
-          {
-            const int32_t dim_0 = dst_tensor.dimension(0);
-            const int32_t copy_len = dst_tensor.dimension(1);
+          typename feature::nhwc::View<T>::Strides strides;
+          const auto start_offset = dst->calcOffset({0, 0, 0, 0});
+          strides.C = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
+          strides.W = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
+          strides.H = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
+          strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
 
-            for (int32_t i = 0; i < dim_0; ++i)
-            {
-              ir::Coordinates coords{i, 0};
-              memcpy(dst_buffer + dst_tensor.calcOffset(coords),
-                     src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
-            }
-            break;
-          }
-          case 3:
-          {
-            const int32_t dim_0 = dst_tensor.dimension(0);
-            const int32_t dim_1 = dst_tensor.dimension(1);
-            const int32_t copy_len = dst_tensor.dimension(2);
+          const feature::nchw::Reader<T> from(src);
+          feature::nhwc::View<T> into(shape, strides,
+                                      reinterpret_cast<T *>(dst_buffer + start_offset), dst_size);
+          feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+            const auto value = from.at(batch, ch, row, col);
+            into.at(batch, row, col, ch) = value;
+          };
+          break;
+        }
+        default:
+        {
+          throw std::runtime_error("Unsupported Permutation");
+          break;
+        }
+      }
+    }
+    else if (!src->has_padding() && !dst->has_padding())
+    {
+      auto src_size = src->total_size();
+      assert(src_size <= dst->total_size());
+      memcpy(dst_buffer, src->buffer(), src_size);
+    }
+    else
+    {
+      auto loop_shape = src->getShape();
+      const auto copy_axis = loop_shape.rank() - 1;
+      const auto copy_len = loop_shape.dim(copy_axis) * sizeof(T);
+      loop_shape.dim(copy_axis) = 1;
 
-            for (auto i = 0; i < dim_0; ++i)
-            {
-              for (auto j = 0; j < dim_1; ++j)
-              {
-                ir::Coordinates coords{i, j, 0};
-                memcpy(dst_buffer + dst_tensor.calcOffset(coords),
-                       src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
-              }
-            }
-            break;
-          }
-          case 4:
-          {
-            switch (permute_type)
-            {
-              case PermuteType::NHWC_TO_NCHW:
-              {
-                ir::FeatureShape shape;
-                shape.N = dst_tensor.dimension(0);
-                shape.C = dst_tensor.dimension(1);
-                shape.H = dst_tensor.dimension(2);
-                shape.W = dst_tensor.dimension(3);
-                const feature::nhwc::Reader<T> from(&src_tensor);
-                feature::nchw::View<T> into(&dst_tensor);
-                feature::iterate(shape)
-                    << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-                         const auto value = from.at(batch, row, col, ch);
-                         into.at(batch, ch, row, col) = value;
-                       };
-                break;
-              }
-              case PermuteType::NCHW_TO_NHWC:
-              {
-                ir::FeatureShape shape;
-                shape.N = src_tensor.dimension(0);
-                shape.C = src_tensor.dimension(1);
-                shape.H = src_tensor.dimension(2);
-                shape.W = src_tensor.dimension(3);
-                const feature::nchw::Reader<T> from(&src_tensor);
-                feature::nhwc::View<T> into(&dst_tensor);
-                feature::iterate(shape)
-                    << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-                         const auto value = from.at(batch, ch, row, col);
-                         into.at(batch, row, col, ch) = value;
-                       };
-                break;
-              }
-              case PermuteType::COPY:
-              {
-                const int32_t dim_0 = dst_tensor.dimension(0);
-                const int32_t dim_1 = dst_tensor.dimension(1);
-                const int32_t dim_2 = dst_tensor.dimension(2);
-                const int32_t copy_len = dst_tensor.dimension(3);
+      if (src->is_dynamic())
+      {
+        assert(dst->is_dynamic());
+        CopyDynamic(src, dst, dst_buffer, loop_shape, copy_len);
+      }
+      else
+      {
+        // TODO Uncomment the assertion below
+        // assert(!dst->is_dynamic() || dst is output of graph);
+        if (src_offsets.size() == 0)
+        {
+          assert(dst_offsets.size() == 0);
 
-                for (auto i = 0; i < dim_0; ++i)
-                {
-                  for (auto j = 0; j < dim_1; ++j)
-                  {
-                    for (auto k = 0; k < dim_2; ++k)
-                    {
-                      ir::Coordinates coords{i, j, k, 0};
-                      memcpy(dst_buffer + dst_tensor.calcOffset(coords),
-                             src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
-                    }
-                  }
-                }
-                break;
-              }
-              default:
-              {
-                throw std::runtime_error("Unsupported Permutation");
-                break;
-              }
-            }
-            break;
-          }
-          default:
-            throw std::runtime_error("Unsupported rank in permutation");
-            break;
+          auto loop_shape = src->getShape();
+          const auto copy_axis = loop_shape.rank() - 1;
+          loop_shape.dim(copy_axis) = 1;
+          UpdateOffsets(src, dst, loop_shape, src_offsets, dst_offsets);
         }
-      });
-    };
-    src->access(fn);
+        CopyStatic(src->buffer(), dst_buffer, src_offsets, dst_offsets, copy_len);
+      }
+    }
   }
 
+protected:
   // NOTE The typeid expression is lvalue expression which refers to an object with static storage
   //      duration, of the polymorphic type const std::type_info or of some type derived from it.
   //      So std::type_info is non-copyable
-  const std::type_info &underlying_type(ir::DataType type) const
-  {
-    switch (type)
-    {
-      case ir::DataType::FLOAT32:
-        return typeid(float);
-      case ir::DataType::INT32:
-        return typeid(int32_t);
-      case ir::DataType::UINT32:
-        return typeid(uint32_t);
-      case ir::DataType::INT64:
-        return typeid(int64_t);
-      case ir::DataType::BOOL8:
-      case ir::DataType::QUANT_UINT8_ASYMM:
-      case ir::DataType::UINT8:
-        return typeid(uint8_t);
-      case ir::DataType::QUANT_INT8_SYMM:
-        return typeid(int8_t);
-      default:
-        throw std::runtime_error("IPermuteFunction: Not supported data type");
-    }
-  }
+  const std::type_info &underlying_type(ir::DataType type) const;
 
 protected:
-  std::vector<std::shared_ptr<backend::ITensor>> _src_tensors;
-  std::vector<std::shared_ptr<backend::ITensor>> _dst_tensors;
-  // TODO Remove this member if it is possible
-  std::vector<size_t> _ranks;
+  std::vector<backend::ITensor *> _src_tensors;
+  std::vector<backend::ITensor *> _dst_tensors;
+  std::vector<std::vector<size_t>> _src_tensors_offsets;
+  std::vector<std::vector<size_t>> _dst_tensors_offsets;
+  std::unordered_map<const backend::ITensor *, std::vector<uint8_t>> _buffers_map;
 };
 
 } // namespace exec
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.test.cc b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
new file mode 100644
index 000000000..1009f194d
--- /dev/null
+++ b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
@@ -0,0 +1,902 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <ir/Layout.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+
+#include <cmath>
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace exec;
+
+class MockUpTensor : public ITensor
+{
+public:
+  MockUpTensor(const Shape &shape, const TypeInfo &type_info, Layout layout, size_t pad)
+    : _shape(shape), _type_info(type_info), _data(nullptr), _layout(layout)
+  {
+    _strides.resize(shape.rank());
+
+    std::vector<size_t> pads(shape.rank(), 0);
+    pads[shape.rank() - 1] = pad;
+    size_t stride = 1;
+    for (int32_t i = _shape.rank() - 1; i >= 0; --i)
+    {
+      _strides.at(i) = stride;
+      stride = stride * (_shape.dim(i) + pads.at(i));
+    }
+  }
+  virtual ~MockUpTensor() {}
+
+  void setBuffer(uint8_t *data) { _data = data; }
+
+  size_t total_size() const override
+  {
+    size_t total_size = _strides[0] * _shape.dim(0);
+    total_size *= sizeOfDataType(data_type());
+    return total_size;
+  }
+
+  size_t calcOffset(const ir::Coordinates &coords) const override
+  {
+    size_t offset = 0;
+    for (size_t i = 0; i < _shape.rank(); ++i)
+    {
+      offset += (_strides[i] * coords[i]);
+    }
+    offset *= sizeOfDataType(data_type());
+    return offset;
+  }
+
+  uint8_t *buffer() const override { return _data; }
+
+  ir::Layout layout() const override { return _layout; }
+  ir::DataType data_type() const override { return _type_info.type(); }
+  float data_scale() const override { return _type_info.scale(); }
+  int32_t data_zero_point() const override { return _type_info.zero_point(); }
+  const std::vector<float> &data_scales() const override { return _type_info.scales(); }
+  const std::vector<int32_t> &data_zero_points() const override { return _type_info.zero_points(); }
+  bool has_padding() const override
+  {
+    return total_size() / sizeOfDataType(data_type()) != _shape.num_elements();
+  }
+  void access(const std::function<void(ITensor &tensor)> &fn) final { fn(*this); }
+
+  bool is_dynamic() const override { return false; }
+  Shape getShape() const override { return _shape; }
+
+private:
+  Shape _shape;
+  TypeInfo _type_info;
+  Layout _layout;
+  uint8_t *_data;
+  std::vector<size_t> _strides;
+};
+
+class MockUpLayer : public IPermuteFunction
+{
+public:
+  MockUpLayer(const std::vector<ITensor *> &inputs, const std::vector<ITensor *> &outputs)
+  {
+    assert(inputs.size() == outputs.size());
+    _src_tensors = inputs;
+    _dst_tensors = outputs;
+  }
+  virtual ~MockUpLayer() {}
+  void optimize() override {}
+};
+
+TEST(IPermuteFunction, float_rank1)
+{
+  const size_t input_pads[4] = {0, 1, 0, 2};
+  const size_t output_pads[4] = {0, 0, 2, 1};
+  const std::vector<Shape> shapes{{1}, {4}, {5}, {2}};
+  float expected_buffer[] = {1, 0, -1, -2, 3};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      Coordinates coords{j};
+      float result =
+        *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+      float expected =
+        *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+      EXPECT_EQ(result, expected);
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_rank2)
+{
+  const size_t input_pads[4] = {0, 1, 0, 2};
+  const size_t output_pads[4] = {0, 0, 2, 1};
+  const std::vector<Shape> shapes{{1, 4}, {2, 2}, {1, 5}, {2, 3}};
+  float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        Coordinates coords{j, k};
+        float result =
+          *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+        float expected =
+          *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+        EXPECT_EQ(result, expected);
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_rank3)
+{
+  const size_t input_pads[4] = {0, 5, 0, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 4, 1}, {1, 2, 1}, {2, 1, 5}, {1, 2, 3}};
+  float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          Coordinates coords{j, k, l};
+          float result =
+            *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+          float expected =
+            *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+          EXPECT_EQ(result, expected);
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_rank4)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            float result =
+              *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_rank4_layout)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    Layout layout = Layout::NHWC;
+    Shape shape = shapes[i];
+    if (i % 2 == 1)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    if (layout == Layout::NHWC)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    else
+    {
+      layout = Layout::NHWC;
+      shape = shapes[i];
+    }
+    outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates input_coords;
+            Coordinates output_coords;
+            if (inputs[i]->layout() == Layout::NHWC)
+            {
+              input_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              input_coords = Coordinates{j, m, k, l};
+            }
+            if (outputs[i]->layout() == Layout::NHWC)
+            {
+              output_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              output_coords = Coordinates{j, m, k, l};
+            }
+            float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+                                                      outputs[i]->calcOffset(output_coords));
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 128;
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+                                               input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            uint8_t qasymm8 =
+              *reinterpret_cast<uint8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            float result = (qasymm8 - zero_point) * scale;
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_to_qsymm8)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 0;
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+                                               input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            int8_t qsymm8 =
+              *reinterpret_cast<int8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            float result = (qsymm8 - zero_point) * scale;
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_to_qsymm16)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 0;
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+                                               input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            int16_t qsymm16 =
+              *reinterpret_cast<int16_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            float result = (qsymm16 - zero_point) * scale;
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, qasymm8_to_float)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 128;
+  uint8_t input_buffer[12];
+
+  int32_t min_val = std::numeric_limits<uint8_t>::min();
+  int32_t max_val = std::numeric_limits<uint8_t>::max();
+  for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+  {
+    int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+    input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+  }
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+                                                Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            float result =
+              *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            uint8_t qasymm8 =
+              *reinterpret_cast<uint8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            float expected = (qasymm8 - zero_point) * scale;
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, qsymm8_to_float)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 0;
+  uint8_t input_buffer[12];
+
+  int32_t min_val = std::numeric_limits<int8_t>::min();
+  int32_t max_val = std::numeric_limits<int8_t>::max();
+  for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+  {
+    int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+    input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+  }
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+                                                Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            float result =
+              *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            int8_t qasymm8 =
+              *reinterpret_cast<int8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            float expected = (qasymm8 - zero_point) * scale;
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, qsymm16_to_float)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 0;
+  uint8_t input_buffer[12];
+
+  int32_t min_val = std::numeric_limits<int16_t>::min();
+  int32_t max_val = std::numeric_limits<int16_t>::max();
+  for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+  {
+    int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+    input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+  }
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+                                                Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            float result =
+              *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            int16_t qasymm8 =
+              *reinterpret_cast<int16_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            float expected = (qasymm8 - zero_point) * scale;
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8_layout)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10,  0,  -10,  -20, 30,   -40, 50,   -60, 70,
+                             -80, 90, -100, 110, -120, 130, -140, 150, -160};
+  float scale = 10;
+  int32_t zero_point = 128;
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    Layout layout = Layout::NHWC;
+    Shape shape = shapes[i];
+    if (i % 2 == 1)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    inputs[i] =
+      std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    if (layout == Layout::NHWC)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    else
+    {
+      layout = Layout::NHWC;
+      shape = shapes[i];
+    }
+    TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+    outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates input_coords;
+            Coordinates output_coords;
+            if (inputs[i]->layout() == Layout::NHWC)
+            {
+              input_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              input_coords = Coordinates{j, m, k, l};
+            }
+            if (outputs[i]->layout() == Layout::NHWC)
+            {
+              output_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              output_coords = Coordinates{j, m, k, l};
+            }
+            uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(outputs[i]->buffer() +
+                                                           outputs[i]->calcOffset(output_coords));
+            float result = (qasymm8 - zero_point) * scale;
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, asymm8_to_float_layout)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10,  0,  -10,  -20, 30,   -40, 50,   -60, 70,
+                             -80, 90, -100, 110, -120, 130, -140, 150, -160};
+  float scale = 10;
+  int32_t zero_point = 128;
+  uint8_t input_buffer[18];
+
+  int32_t min_val = std::numeric_limits<int16_t>::min();
+  int32_t max_val = std::numeric_limits<int16_t>::max();
+  for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+  {
+    int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+    input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+  }
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    Layout layout = Layout::NHWC;
+    Shape shape = shapes[i];
+    if (i % 2 == 1)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+    inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    if (layout == Layout::NHWC)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    else
+    {
+      layout = Layout::NHWC;
+      shape = shapes[i];
+    }
+    outputs[i] =
+      std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates input_coords;
+            Coordinates output_coords;
+            if (inputs[i]->layout() == Layout::NHWC)
+            {
+              input_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              input_coords = Coordinates{j, m, k, l};
+            }
+            if (outputs[i]->layout() == Layout::NHWC)
+            {
+              output_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              output_coords = Coordinates{j, m, k, l};
+            }
+            float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+                                                      outputs[i]->calcOffset(output_coords));
+            uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(inputs[i]->buffer() +
+                                                           inputs[i]->calcOffset(input_coords));
+            float expected = (qasymm8 - zero_point) * scale;
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace
diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc
index 72a18def1..d149345fd 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.cc
+++ b/runtime/onert/core/src/exec/JSONExecTime.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "exec/JSONExecTime.h"
-#include "backend/IConfig.h"
+#include "JSONExecTime.h"
+
 #include <fstream>
 
 namespace onert
@@ -135,7 +135,7 @@ void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info,
   stream.seekp(-2, std::ofstream::end);
 }
 
-void JSON::uploadOperationsExecTime() const
+void JSON::storeOperationsExecTime() const
 {
   std::ofstream stream(_measurement_file);
   if (!stream.is_open())
diff --git a/runtime/onert/core/src/exec/JSONExecTime.h b/runtime/onert/core/src/exec/JSONExecTime.h
index a64cb3133..e01723611 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.h
+++ b/runtime/onert/core/src/exec/JSONExecTime.h
@@ -37,15 +37,15 @@ namespace exec
  * _measurements[Backend*]["string"][bool][uint32_t] = int64_t
  */
 using MeasurementData = std::unordered_map<
-    const backend::Backend *,
-    std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
+  const backend::Backend *,
+  std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
 
 class JSON
 {
 public:
   explicit JSON(const std::vector<const backend::Backend *> &backends,
                 MeasurementData &measurements)
-      : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
+    : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
   {
     for (const auto b : backends)
     {
@@ -54,18 +54,16 @@ public:
     loadOperationsExecTime();
   };
   /**
-   * @brief Update _operations_exec_time_file with new data.
+   * @brief Update _measurement_file with new data.
    */
-  void uploadOperationsExecTime() const;
+  void storeOperationsExecTime() const;
 
 private:
   ///@brief file containing measurements
   std::string _measurement_file;
   std::unordered_map<std::string, const backend::Backend *> _backends;
-  std::unordered_map<
-      const backend::Backend *,
-      std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>
-      &_measurements;
+  MeasurementData &_measurements;
+
   /**
    * @brief Helper function for inserting data to OperationExecTimes
    *
@@ -86,7 +84,7 @@ private:
   void printOperation(const std::map<uint32_t, int64_t> &operation_info,
                       std::ofstream &stream) const;
   /**
-   * @brief Parse and load operations_exec_time from _operations_exec_time_file.
+   * @brief Parse and load _measurements from _measurement_file.
    */
   void loadOperationsExecTime();
 };
diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc
index 69dfe9b9b..a64dadcb1 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.cc
+++ b/runtime/onert/core/src/exec/LinearExecutor.cc
@@ -24,41 +24,54 @@ namespace onert
 namespace exec
 {
 
-#ifdef RUY_PROFILER
-namespace
-{
-char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operations &operations)
+void LinearExecutor::executeImpl()
 {
-  auto node_name = operations.at(*op_seq->begin()).name();
-  char *cstr = new char[node_name.length() + 1];
-  std::strcpy(cstr, node_name.c_str());
-  return cstr;
-}
-} // namespace
+  if (_tracing_ctx)
+  {
+    auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+    _subject.notifySubgraphBegin(profiling_subg_index);
+    for (auto &&code : _code)
+    {
+      const auto backend = code.lower_info->backend();
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+      ruy::profiler::ScopeLabel label(code.op->name());
 #endif
+      _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend);
 
-void LinearExecutor::executeImpl()
-{
-  _subject.notifyModelBegin(this);
-  for (auto &&code : _code)
+      auto &fn_seq = code.fn_seq;
+
+      fn_seq->initRunning();
+
+      bool handle_dynamic_tensor =
+        _lowered_graph->getHasDynamicTensor(code.op_ind) || hasDynamicInput();
+      fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
+      fn_seq->run();
+
+      _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend);
+    }
+    _subject.notifySubgraphEnd(profiling_subg_index);
+  }
+  else
   {
-    const auto op_seq = code.op_seq;
-    const auto backend = code.lower_info->backend();
+    for (auto &&code : _code)
+    {
 // TODO : Move ruy profiler into ExecutionObserver
 #ifdef RUY_PROFILER
-    ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations()));
+      ruy::profiler::ScopeLabel label(code.op->name());
 #endif
-    _subject.notifyJobBegin(this, op_seq, backend);
 
-    auto &fn_seq = code.fn_seq;
-    bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || hasDynamicInput();
+      auto &fn_seq = code.fn_seq;
 
-    fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
-    fn_seq->run();
+      fn_seq->initRunning();
 
-    _subject.notifyJobEnd(this, op_seq, backend);
+      bool handle_dynamic_tensor =
+        _lowered_graph->getHasDynamicTensor(code.op_ind) || hasDynamicInput();
+      fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
+      fn_seq->run();
+    }
   }
-  _subject.notifyModelEnd(this);
 }
 
 } // namespace exec
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index c224d3f4f..cc073411a 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -22,11 +22,11 @@
 #ifndef __ONERT_EXEC_EXECUTOR_H_
 #define __ONERT_EXEC_EXECUTOR_H_
 
-#include "ir/Index.h"
 #include "ExecutorBase.h"
-#include "compiler/Linear.h"
-#include "exec/FunctionSequence.h"
+
 #include "compiler/CodeMap.h"
+#include "ir/Index.h"
+#include "util/TracingCtx.h"
 
 namespace onert
 {
@@ -44,18 +44,15 @@ public:
    * @brief Construct a new LinearExecutor object
    * @param lowered_graph LoweredGraph object
    * @param tensor_builders Tensor builders that are currently used
-   * @param code_map OpSequence and its code map
+   * @param code_map @c ir::Operation and its code map
    */
   LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                 const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-                 const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                 const compiler::TensorRegistries &tensor_regs,
-                 backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map,
-                 const std::vector<ir::OpSequenceIndex> &order)
-      : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
-                     std::move(tensor_mgrs)}
+                 backend::BackendContexts &&backend_contexts,
+                 const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+                 const std::vector<ir::OperationIndex> &order, const util::TracingCtx *tracing_ctx)
+    : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx}
   {
-    for (auto index : order)
+    for (auto &&index : order)
     {
       _code.emplace_back(std::move(code_map.at(index)));
     }
diff --git a/runtime/onert/core/src/exec/MinMaxRecorder.cc b/runtime/onert/core/src/exec/MinMaxRecorder.cc
new file mode 100644
index 000000000..88fc104d1
--- /dev/null
+++ b/runtime/onert/core/src/exec/MinMaxRecorder.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MinMaxRecorder.h"
+
+#include "backend/ITensor.h"
+
+#include <cassert>
+#include <cmath>
+
+namespace onert
+{
+namespace exec
+{
+
+MinMaxRecorder::MinMaxRecorder(const std::string &minmax_filepath, const ir::Graph &graph,
+                               const backend::BackendContexts &backend_contexts)
+  : _graph{graph}, _backend_contexts{backend_contexts}, _h5dumper(minmax_filepath)
+{
+}
+
+void MinMaxRecorder::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_idx,
+                                  ir::OperationIndex op_idx, const backend::Backend *backend)
+{
+  const auto &tensor_reg = _backend_contexts.at(backend)->tensor_registry;
+  const auto &op = _graph.operations().at(op_idx);
+  const auto &outputs = op.getOutputs();
+  // TODO: Support multiple output
+  if (outputs.size() != 1)
+    throw std::runtime_error("Only 1 output operator is supported for recording minmax.");
+
+  auto tensor = tensor_reg->getITensor(outputs.at(0));
+
+  // Logic copied from MinMaxObserver.cpp.
+
+  // Filter Ops
+  if (tensor->is_constant())
+    return;
+
+  if (tensor->data_type() != ir::DataType::FLOAT32)
+    return;
+
+  switch (op.opcode())
+  {
+    // Operators with multiple outputs
+    case ir::OpCode::If:
+    case ir::OpCode::Split:
+    case ir::OpCode::SplitV:
+    case ir::OpCode::TopKV2:
+    case ir::OpCode::Unpack:
+    case ir::OpCode::While:
+      return;
+    // NOTE: Sin, Cos, Tanh's output is in [-1, 1]
+    // We may not need to dump those operators.
+    default:; // Do Nothing
+  }
+
+  // Otherwise, dump!
+  assert(tensor->data_type() == ir::DataType::FLOAT32);
+  const auto data = reinterpret_cast<float *>(tensor->buffer());
+  const auto num_elements = tensor->total_size() / sizeof(float);
+
+  float max = std::numeric_limits<float>::lowest();
+  float min = std::numeric_limits<float>::max();
+
+  bool all_nan = true;
+  for (size_t i = 0; i < num_elements; ++i)
+  {
+    const float number = data[i];
+    if (std::isnan(number))
+      continue;
+
+    if (number == std::numeric_limits<float>::lowest())
+      continue;
+
+    all_nan = false;
+
+    if (number > max)
+      max = number;
+
+    if (number < min)
+      min = number;
+  }
+
+  if (all_nan)
+    throw std::runtime_error("All values are NaN(Not a Number)");
+
+  _minmax_map.append({subg_idx, op_idx}, min, max);
+}
+
+void MinMaxRecorder::handleSubgraphEnd(ir::SubgraphIndex)
+{
+  // It would be better to dump at the end of model execution, not subgraph
+  // But it requires more changes than subgraph.
+  _h5dumper.dump(_minmax_map);
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/MinMaxRecorder.h b/runtime/onert/core/src/exec/MinMaxRecorder.h
new file mode 100644
index 000000000..7a0817f5f
--- /dev/null
+++ b/runtime/onert/core/src/exec/MinMaxRecorder.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_MINMAX_RECORDER__
+#define __ONERT_EXEC_MINMAX_RECORDER__
+
+#include "ExecutionObservers.h"
+#include "ir/Index.h"
+#include "exec/MinMaxMap.h"
+#include "../dumper/h5/MinMaxDumper.h"
+
+#include <memory>
+
+namespace onert
+{
+namespace exec
+{
+
+class MinMaxRecorder : public IExecutionObserver
+{
+public:
+  MinMaxRecorder(const std::string &minmax_filepath, const ir::Graph &graph,
+                 const backend::BackendContexts &backend_contexts);
+  void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+                      const backend::Backend *) override
+  {
+    return;
+  }
+  void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+                    const backend::Backend *) override;
+  void handleSubgraphEnd(ir::SubgraphIndex) override;
+
+private:
+  const ir::Graph &_graph;
+  const backend::BackendContexts &_backend_contexts;
+  dumper::h5::MinMaxDumper _h5dumper;
+  SMMinMaxMap _minmax_map;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_MINMAX_RECORDER__
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
index ab234aacd..9da7c82b4 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -31,7 +31,7 @@ class HookFunction : public IFunction
 public:
   HookFunction(IFunction *fn, const std::function<void()> &setup,
                const std::function<void()> &teardown)
-      : _fn{fn}, _setup{setup}, _teardown{teardown}
+    : _fn{fn}, _setup{setup}, _teardown{teardown}
   {
   }
 
@@ -59,14 +59,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
   _cv_jobs.notify_all();
 }
 
-ParallelExecutor::ParallelExecutor(
-    std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-    const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-    const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-    const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
-    compiler::CodeMap &&code_map)
-    : DataflowExecutor{std::move(lowered_graph), input_tensors,      output_tensors, tensor_regs,
-                       std::move(tensor_mgrs),   std::move(code_map)}
+ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                                   backend::BackendContexts &&backend_contexts,
+                                   const compiler::TensorRegistries &tensor_regs,
+                                   compiler::CodeMap &&code_map,
+                                   const util::TracingCtx *tracing_ctx)
+  : DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+                     std::move(code_map), tracing_ctx}
 {
   VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
 }
@@ -76,12 +75,12 @@ void ParallelExecutor::executeImpl()
   bool dynamic_input_exists = hasDynamicInput();
 
   // Init scheduler
-  // TODO Consider to have distinct backend set in LowerInfoMap
+  // TODO Consider to have distinct backend set in GraphLowerInfo
   BackendSet backends;
-  for (auto &itr : _lowered_graph->getLowerInfo()->op_seq)
-  {
-    backends.add(itr.second->backend());
-  }
+  _lowered_graph->lower_info().operation.iterate(
+    [&](const ir::OperationIndex &, const compiler::OperationLowerInfo &lower_info) {
+      backends.add(lower_info.backend());
+    });
   _scheduler = std::make_unique<ParallelScheduler>(backends);
 
   assert(noWaitingJobs());
@@ -101,7 +100,10 @@ void ParallelExecutor::executeImpl()
 
   VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
 
-  _subject.notifyModelBegin(this);
+  auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+  _subject.notifySubgraphBegin(profiling_subg_index);
+
   while (true)
   {
     std::unique_lock<std::mutex> lock{_mu_jobs};
@@ -121,20 +123,24 @@ void ParallelExecutor::executeImpl()
 
     lock.unlock();
 
-    VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl;
+    VERBOSE(ParallelExecutor) << "Assigning fn " << job->index() << std::endl;
 
     auto job_index = job->index();
-    auto op_sequence_index = _job_to_op_seq[job_index];
-    auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index);
-    auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend();
-    auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); };
-    auto teardown = [&, job_index, op_seq, backend]() {
-      _subject.notifyJobEnd(this, op_seq, backend);
+    auto op_ind = _job_to_op[job_index];
+    auto backend = _lowered_graph->lower_info().operation.at(op_ind).backend();
+    auto setup = [&, op_ind, backend]() {
+      _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
+    };
+    auto teardown = [&, job_index, op_ind, backend]() {
+      _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
       notify(job_index);
     };
 
+    job->fn_seq()->initRunning();
+
     // dynamic tensor setting
-    bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
+    bool handle_dynamic_tensor =
+      _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
     job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
 
     _scheduler->assign(std::make_unique<HookFunction>(job->fn_seq(), setup, teardown), backend);
@@ -145,7 +151,7 @@ void ParallelExecutor::executeImpl()
 
   // Wait for all the jobs done
   _scheduler->finish();
-  _subject.notifyModelEnd(this);
+  _subject.notifySubgraphEnd(profiling_subg_index);
 
   // Reset input info for the next execution
   _input_info = _initial_input_info;
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 929edfce9..7d459b0b4 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -17,17 +17,12 @@
 #ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__
 #define __ONERT_EXEC_PARALLEL_EXECUTOR_H__
 
-#include <list>
-#include <queue>
-#include <unordered_map>
+#include "DataflowExecutor.h"
+#include "ParallelScheduler.h"
+
+#include "util/TracingCtx.h"
 
-#include "exec/FunctionSequence.h"
-#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
 #include <memory>
-#include "exec/DataflowExecutor.h"
-#include "ParallelScheduler.h"
 
 namespace onert
 {
@@ -48,13 +43,12 @@ public:
    *
    * @param lowered_graph LoweredGraph object
    * @param tensor_builders Tensor builders that are currently used
-   * @param code_map OpSequence and its code map
+   * @param code_map @c ir::Operation and its code map
    */
   ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                   const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
-                   const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                   const compiler::TensorRegistries &tensor_regs,
-                   backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
+                   backend::BackendContexts &&backend_contexts,
+                   const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+                   const util::TracingCtx *tracing_ctx);
 
   void executeImpl() override;
 
diff --git a/runtime/onert/core/src/exec/ParallelScheduler.cc b/runtime/onert/core/src/exec/ParallelScheduler.cc
index 70c9c3dd6..538945631 100644
--- a/runtime/onert/core/src/exec/ParallelScheduler.cc
+++ b/runtime/onert/core/src/exec/ParallelScheduler.cc
@@ -30,7 +30,7 @@ ParallelScheduler::ParallelScheduler(const BackendSet &backends)
 {
   assert(!backends.empty());
 
-  for (auto backend : backends)
+  for (auto &&backend : backends)
   {
     _thread_pools[backend] = std::make_unique<ThreadPool>();
   }
@@ -45,7 +45,7 @@ void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::B
 
 void ParallelScheduler::finish()
 {
-  for (auto &itr : _thread_pools)
+  for (auto &&itr : _thread_pools)
   {
     itr.second->finish();
   }
diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.cc b/runtime/onert/core/src/exec/SingleModelExecutors.cc
new file mode 100644
index 000000000..4b954bab2
--- /dev/null
+++ b/runtime/onert/core/src/exec/SingleModelExecutors.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SingleModelExecutors.h"
+
+#include "../backend/builtin/IOTensor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+void SingleModelExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index,
+                                   std::unique_ptr<IExecutor> exec)
+{
+  _executors.emplace(subg_index, std::move(exec));
+}
+
+IExecutor *SingleModelExecutors::at(const ir::ModelIndex &,
+                                    const ir::SubgraphIndex &subg_index) const
+{
+  return _executors.at(subg_index).get();
+}
+
+uint32_t SingleModelExecutors::inputSize() const
+{
+  return entryExecutor()->getInputTensors().size();
+}
+
+uint32_t SingleModelExecutors::outputSize() const
+{
+  return entryExecutor()->getOutputTensors().size();
+}
+
+const ir::OperandInfo &SingleModelExecutors::inputInfo(const ir::IOIndex &index) const
+{
+  return entryExecutor()->getInputTensors().at(index.value())->orig_info();
+}
+
+const ir::OperandInfo &SingleModelExecutors::outputInfo(const ir::IOIndex &index) const
+{
+  return entryExecutor()->getOutputTensors().at(index.value())->orig_info();
+}
+
+void SingleModelExecutors::execute(const IODescription &desc) { entryExecutor()->execute(desc); }
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.h b/runtime/onert/core/src/exec/SingleModelExecutors.h
new file mode 100644
index 000000000..98d629eae
--- /dev/null
+++ b/runtime/onert/core/src/exec/SingleModelExecutors.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+#define __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executor set for single model NN package
+ */
+class SingleModelExecutors : public IExecutors
+{
+public:
+  /**
+   * @brief Construct a new SingleModelExecutors object
+   */
+  SingleModelExecutors(void) = default;
+  SingleModelExecutors(const SingleModelExecutors &) = delete;
+  SingleModelExecutors(SingleModelExecutors &&) = default;
+
+  /**
+   * @brief Destroy the SingleModelExecutors object
+   */
+  ~SingleModelExecutors() = default;
+
+public:
+  void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+               std::unique_ptr<IExecutor> exec) override;
+
+  IExecutor *at(const ir::ModelIndex &model_index,
+                const ir::SubgraphIndex &subg_index) const override;
+
+  uint32_t inputSize() const override;
+
+  uint32_t outputSize() const override;
+
+  const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+  const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+  void execute(const IODescription &desc) override;
+
+private:
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/Sink.h b/runtime/onert/core/src/exec/Sink.h
deleted file mode 100644
index 6a99efe60..000000000
--- a/runtime/onert/core/src/exec/Sink.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_SINK_H__
-#define __ONERT_EXEC_SINK_H__
-
-#include "feature/nchw/Reader.h"
-#include "feature/nchw/View.h"
-#include "feature/nhwc/Reader.h"
-#include "feature/nhwc/View.h"
-
-#include <cassert>
-#include <memory>
-#include "util/Utils.h"
-#include <misc/feature/IndexIterator.h>
-
-namespace onert
-{
-namespace exec
-{
-struct ISink
-{
-  virtual ~ISink() = default;
-
-  virtual void pull(::onert::backend::ITensor &tensor) const = 0;
-};
-
-// Create second lever inheritance: the first lever is used as a reference type in use-case places
-template <typename T> class ITemplSink : public ISink
-{
-public:
-  ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
-             const bool copy, ir::Layout io_layout)
-      : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size},
-        _shape{shape}, _copy{copy}, _io_layout{io_layout}
-  {
-  }
-
-protected:
-  void pullUnif(onert::backend::ITensor &tensor) const
-  {
-    assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
-            (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
-           _copy);
-    auto input_buffer = tensor.buffer();
-    auto rank = _shape.rank();
-
-    if (!tensor.has_padding() && rank < 4 + _copy)
-    {
-      memcpy(_output_buffer, input_buffer, _output_size);
-      return;
-    }
-
-    switch (rank)
-    {
-      case 0:
-      case 1:
-      {
-        memcpy(_output_buffer, input_buffer, _output_size);
-        break;
-      }
-      case 2:
-      {
-        const int32_t copy_len = _shape.dim(1);
-
-        for (auto i = 0; i < _shape.dim(0); ++i)
-        {
-          ir::Coordinates coords{i, 0};
-          memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords),
-                 copy_len * sizeof(T));
-        }
-        break;
-      }
-      case 3:
-      {
-        const int32_t dim1 = _shape.dim(1);
-        const int32_t dim2 = _shape.dim(2);
-
-        for (auto i = 0; i < _shape.dim(0); ++i)
-        {
-          for (auto j = 0; j < _shape.dim(1); ++j)
-          {
-            ir::Coordinates coords{i, j, 0};
-            memcpy(_output_buffer + i * dim1 * dim2 + j * dim2,
-                   input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T));
-          }
-        }
-        break;
-      }
-      case 4:
-      {
-        if (_copy)
-        {
-          const int32_t dim1 = _shape.dim(1);
-          const int32_t dim2 = _shape.dim(2);
-          const int32_t dim3 = _shape.dim(3);
-
-          for (auto i = 0; i < _shape.dim(0); ++i)
-          {
-            for (auto j = 0; j < _shape.dim(1); ++j)
-            {
-              for (auto k = 0; k < _shape.dim(2); ++k)
-              {
-                ir::Coordinates coords{i, j, k, 0};
-                memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
-                       input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T));
-              }
-            }
-          }
-        }
-        else
-        {
-          const auto shape = _shape.asFeature(_io_layout);
-
-          if (_io_layout == ir::Layout::NHWC)
-          {
-            const exec::feature::nchw::Reader<T> from(&tensor);
-            exec::feature::nhwc::View<T> into(shape, _output_buffer, _output_size);
-            feature::iterate(shape)
-                << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-                     const auto value = from.at(batch, ch, row, col);
-                     into.at(batch, row, col, ch) = value;
-                   };
-          }
-          else if (_io_layout == ir::Layout::NCHW)
-          {
-            const exec::feature::nhwc::Reader<T> from(&tensor);
-            exec::feature::nchw::View<T> into(shape, _output_buffer, _output_size);
-            feature::iterate(shape)
-                << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-                     const auto value = from.at(batch, row, col, ch);
-                     into.at(batch, ch, row, col) = value;
-                   };
-          }
-          else
-          {
-            throw std::runtime_error("Wrong Layout");
-          }
-        }
-        break;
-      }
-      default:
-        throw std::runtime_error("NYI: rank > 4");
-        break;
-    }
-  }
-
-private:
-  T *_output_buffer;
-  const size_t _output_size;
-  const ir::Shape _shape;
-  const bool _copy;
-  const ir::Layout _io_layout;
-};
-
-template <typename T> class PermutateSink final : public ITemplSink<T>
-{
-public:
-  PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
-                ir::Layout io_layout)
-      : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout)
-  {
-  }
-
-public:
-  void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); }
-};
-
-// Only supports NHWC format front-end(NNAPI) now
-template <typename T> class CopySink final : public ITemplSink<T>
-{
-public:
-  CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
-           ir::Layout io_layout = ir::Layout::UNKNOWN)
-      : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout)
-  {
-  }
-
-public:
-  void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); }
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_SINK_H__
diff --git a/runtime/onert/core/src/exec/Source.h b/runtime/onert/core/src/exec/Source.h
deleted file mode 100644
index fb2be4dd8..000000000
--- a/runtime/onert/core/src/exec/Source.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_SOURCE_H__
-#define __ONERT_EXEC_SOURCE_H__
-
-#include "feature/IndexIterator.h"
-#include "feature/nchw/Reader.h"
-#include "feature/nchw/View.h"
-#include "feature/nhwc/Reader.h"
-#include "feature/nhwc/View.h"
-
-#include <cassert>
-#include <memory>
-#include "util/Utils.h"
-#include <ir/Layout.h>
-#include "ir/Shape.h"
-
-namespace onert
-{
-namespace exec
-{
-
-struct ISource
-{
-  virtual ~ISource() = default;
-
-  virtual void push(::onert::backend::ITensor &tensor) const = 0;
-};
-
-// Create second lever inheritance: the first lever is used as a reference type in use-case places
-template <typename T> class ITemplSource : public ISource
-{
-public:
-  ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
-               const bool copy, ir::Layout io_layout)
-      : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size},
-        _shape{shape}, _copy(copy), _io_layout{io_layout}
-  {
-  }
-
-  virtual void push(::onert::backend::ITensor &tensor) const = 0;
-
-protected:
-  void pushUnif(onert::backend::ITensor &tensor) const
-  {
-    assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
-            (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
-           _copy);
-    auto output_buffer = tensor.buffer();
-    auto rank = _shape.rank();
-
-    if (!tensor.has_padding() && rank < 4 + _copy)
-    {
-      memcpy(output_buffer, _input_buffer, _input_size);
-      return;
-    }
-
-    switch (rank)
-    {
-      case 0:
-      case 1:
-      {
-        memcpy(output_buffer, _input_buffer, _input_size);
-        break;
-      }
-      case 2:
-      {
-        const int32_t copy_len = _shape.dim(1);
-
-        for (auto i = 0; i < _shape.dim(0); ++i)
-        {
-          ir::Coordinates coords{i, 0};
-          memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len,
-                 copy_len * sizeof(T));
-        }
-        break;
-      }
-      case 3:
-      {
-        const int32_t dim1 = _shape.dim(1);
-        const int32_t dim2 = _shape.dim(2);
-
-        for (auto i = 0; i < _shape.dim(0); ++i)
-        {
-          for (auto j = 0; j < _shape.dim(1); ++j)
-          {
-            ir::Coordinates coords{i, j, 0};
-            memcpy(output_buffer + tensor.calcOffset(coords),
-                   _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T));
-          }
-        }
-        break;
-      }
-      case 4:
-      {
-        if (_copy)
-        {
-          const int32_t dim1 = _shape.dim(1);
-          const int32_t dim2 = _shape.dim(2);
-          const int32_t dim3 = _shape.dim(3);
-          for (auto i = 0; i < _shape.dim(0); ++i)
-          {
-            for (auto j = 0; j < _shape.dim(1); ++j)
-            {
-              for (auto k = 0; k < _shape.dim(2); ++k)
-              {
-                ir::Coordinates coords{i, j, k, 0};
-                memcpy(output_buffer + tensor.calcOffset(coords),
-                       _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
-                       dim3 * sizeof(T));
-              }
-            }
-          }
-        }
-        else
-        {
-          const auto shape = _shape.asFeature(_io_layout);
-
-          if (_io_layout == ir::Layout::NCHW)
-          {
-            const exec::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size);
-            exec::feature::nhwc::View<T> into(&tensor);
-            feature::iterate(shape)
-                << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-                     const auto value = from.at(batch, ch, row, col);
-                     into.at(batch, row, col, ch) = value;
-                   };
-          }
-          else if (_io_layout == ir::Layout::NHWC)
-          {
-            const exec::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size);
-            exec::feature::nchw::View<T> into(&tensor);
-            feature::iterate(shape)
-                << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-                     const auto value = from.at(batch, row, col, ch);
-                     into.at(batch, ch, row, col) = value;
-                   };
-          }
-          else
-          {
-            throw std::runtime_error("Wrong Layout");
-          }
-        }
-
-        break;
-      }
-      default:
-        throw std::runtime_error("NYI: rank > 4");
-        break;
-    }
-  }
-
-private:
-  const T *_input_buffer;
-  const size_t _input_size;
-  const ir::Shape _shape;
-  const bool _copy;
-  const ir::Layout _io_layout;
-};
-
-template <typename T> class PermutateSource final : public ITemplSource<T>
-{
-public:
-  PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
-                  ir::Layout io_layout)
-      : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout)
-  {
-  }
-
-public:
-  void push(onert::backend::ITensor &tensor) const override
-  {
-    // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation
-    ITemplSource<T>::pushUnif(tensor);
-  }
-};
-
-template <typename T> class CopySource final : public ITemplSource<T>
-{
-public:
-  CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
-             ir::Layout io_layout = ir::Layout::UNKNOWN)
-      : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout)
-  {
-  }
-
-public:
-  void push(onert::backend::ITensor &tensor) const override { ITemplSource<T>::pushUnif(tensor); }
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_SOURCE_H__
diff --git a/runtime/onert/core/src/exec/ThreadPool.cc b/runtime/onert/core/src/exec/ThreadPool.cc
index c8e0e3265..bf85e59f6 100644
--- a/runtime/onert/core/src/exec/ThreadPool.cc
+++ b/runtime/onert/core/src/exec/ThreadPool.cc
@@ -48,7 +48,7 @@ uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); }
 
 void ThreadPool::join()
 {
-  for (auto &thread : _threads)
+  for (auto &&thread : _threads)
   {
     thread.join();
   }
diff --git a/runtime/onert/core/src/exec/feature/MockTensor.h b/runtime/onert/core/src/exec/feature/MockTensor.h
new file mode 100644
index 000000000..1d2d375e2
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/MockTensor.h
@@ -0,0 +1,66 @@
+
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/ITensor.h"
+
+template <typename T> class MockTensor : public onert::backend::ITensor
+{
+public:
+  MockTensor<T>(onert::ir::Shape &shape, T *buf, onert::ir::Layout layout)
+    : _buf(reinterpret_cast<uint8_t *>(buf)), _shape(shape), _layout(layout)
+  {
+  }
+
+public:
+  uint8_t *buffer() const override { return _buf; }
+
+  size_t calcOffset(const onert::ir::Coordinates &coords) const override
+  {
+    size_t rank = _shape.rank();
+    rank = rank == 0 ? 1 : rank;
+    size_t offset = 0;
+    for (size_t i = 0; i < rank; ++i)
+    {
+      auto dim = _shape.rank() == 0 ? 1 : _shape.dim(i);
+      offset = offset * dim + coords[i];
+    }
+    offset *= sizeof(T);
+
+    return offset;
+  }
+
+  onert::ir::Shape getShape() const override { return _shape; }
+
+public: // DUMMY methods
+  size_t total_size() const override { return 0; }
+  onert::ir::Layout layout() const override { return _layout; }
+  onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; }
+  float data_scale() const override { return 0; }
+  int32_t data_zero_point() const override { return 0; }
+  const std::vector<float> &data_scales() const override { return _dummy_scales; }
+  const std::vector<int32_t> &data_zero_points() const override { return _dummy_zerops; }
+  bool has_padding() const override { return false; }
+  void access(const std::function<void(ITensor &tensor)> &fn) override {}
+  bool is_dynamic() const override { return false; }
+
+private:
+  uint8_t *_buf = nullptr;
+  onert::ir::Shape _shape;
+  onert::ir::Layout _layout = onert::ir::Layout::UNKNOWN;
+  std::vector<float> _dummy_scales;
+  std::vector<int32_t> _dummy_zerops;
+};
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h
index 7be9df4d5..d5e3cb97c 100644
--- a/runtime/onert/core/src/exec/feature/nchw/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h
@@ -36,35 +36,36 @@ namespace nchw
 template <typename T> class Reader : public feature::Reader<T>
 {
 public:
-  // Construct for buffer of model inputs
-  Reader(const ir::FeatureShape &shape, const T *ptr, size_t len)
-      : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+  using Strides = ir::FeatureShape;
+  // Construct for buffer and strides
+  Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len)
+    : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
   {
-    assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
-    // No padding
-    _strides.W = sizeof(T);
-    _strides.H = shape.W * sizeof(T);
-    _strides.C = shape.W * shape.H * sizeof(T);
-    _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+    UNUSED_RELEASE(len); // Workaround for unused variable in release mode
+    assert(len == static_cast<size_t>(strides.N != 0
+                                        ? shape.N * strides.N
+                                        : strides.C != 0 ? shape.C * strides.C
+                                                         : strides.H != 0 ? shape.H * strides.H
+                                                                          : shape.W * strides.W));
   }
 
   // Construct for backend tensor
   Reader(backend::ITensor *tensor)
-      : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+    : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
   {
     assert(tensor->layout() == ir::Layout::NCHW);
 
     const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
-    _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
-    _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
-    _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
-    _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
-    _shape.W = tensor->dimension(3);
-    _shape.H = tensor->dimension(2);
-    _shape.C = tensor->dimension(1);
-    _shape.N = tensor->dimension(0);
+    auto shape = tensor->getShape();
+    _strides.W = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+    _strides.H = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+    _strides.C = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+    _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+    _shape.W = shape.dim(3);
+    _shape.H = shape.dim(2);
+    _shape.C = shape.dim(1);
+    _shape.N = shape.dim(0);
   }
 
 public:
@@ -104,7 +105,6 @@ private:
 private:
   // TODO Remove _shape
   ir::FeatureShape _shape;
-  using Strides = ir::FeatureShape;
   Strides _strides;
   const uint8_t *_ptr;
   size_t _len;
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
new file mode 100644
index 000000000..f439cafb5
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nchw : public testing::Test
+{
+public:
+  void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+  void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    _shape = onert::ir::FeatureShape(batch, depth, height, width);
+  }
+
+  void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    auto elem_size = sizeof(T);
+    _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+                                      width * elem_size);
+  }
+
+  void createReader()
+  {
+    _reader =
+      std::make_shared<nchw::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+  }
+
+  void createUsingMockTensor()
+  {
+    onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+    _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+    _reader = std::make_shared<nchw::Reader<T>>(_tensor.get());
+  }
+
+  std::shared_ptr<Reader<T>> _reader = nullptr;
+
+private:
+  std::shared_ptr<std::vector<T>> _data = nullptr;
+  onert::ir::FeatureShape _shape;
+  onert::ir::FeatureShape _stride;
+  std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nchw, ReaderTypes);
+
+TYPED_TEST(Reader_nchw, basic_reader)
+{
+  this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+  this->setShape(1, 2, 3, 2);
+  this->setStride(12, 6, 2, 1);
+  this->createReader();
+
+  // Data: NCHW
+  // Shape: NCHW
+  ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+  ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+  // Data: NCHW
+  // Shape: NCHW
+  this->createUsingMockTensor();
+
+  ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+  ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h
index dbaf1a91e..cdbb0cd7c 100644
--- a/runtime/onert/core/src/exec/feature/nchw/View.h
+++ b/runtime/onert/core/src/exec/feature/nchw/View.h
@@ -37,8 +37,10 @@ namespace nchw
 template <typename T> class View final : public Reader<T>
 {
 public:
+  using Strides = typename Reader<T>::Strides;
   // Construct for buffer of model inputs
-  View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
+  View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len)
+    : Reader<T>{shape, strides, ptr, len}
   {
     // DO NOTHING
   }
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.test.cc b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
new file mode 100644
index 000000000..c6dcda710
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nchw : public testing::Test
+{
+public:
+  void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+  void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    _shape = onert::ir::FeatureShape(batch, depth, height, width);
+  }
+
+  void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    auto elem_size = sizeof(T);
+    _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+                                      width * elem_size);
+  }
+
+  void createView()
+  {
+    _view =
+      std::make_shared<nchw::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+  }
+
+  void createUsingMockTensor()
+  {
+    onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+    _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+    _view = std::make_shared<nchw::View<T>>(_tensor.get());
+  }
+
+  std::shared_ptr<nchw::View<T>> _view = nullptr;
+
+private:
+  std::shared_ptr<std::vector<T>> _data = nullptr;
+  onert::ir::FeatureShape _shape;
+  onert::ir::FeatureShape _stride;
+  std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nchw, ViewTypes);
+
+TYPED_TEST(View_nchw, basic_view)
+{
+  this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+  this->setShape(1, 2, 3, 2);
+  this->setStride(12, 6, 2, 1);
+  this->createView();
+
+  // Data: NCHW
+  // Shape: NCHW
+  ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+  ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+  // Data: NCHW
+  // Shape: NCHW
+  this->createUsingMockTensor();
+
+  ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+  ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
index 7730cee72..0bc1ee95b 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
@@ -37,36 +37,36 @@ namespace nhwc
 template <typename T> class Reader : public feature::Reader<T>
 {
 public:
-  // Construct for buffer of model inputs
-  Reader(const ir::FeatureShape &shape, const T *ptr, size_t len)
-      : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+  using Strides = ir::FeatureShape;
+  // Construct for buffer and strides
+  Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len)
+    : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
   {
     UNUSED_RELEASE(len); // Workaround for unused variable in release mode
-    assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
-    // No padding
-    _strides.C = sizeof(T);
-    _strides.W = shape.C * sizeof(T);
-    _strides.H = shape.C * shape.W * sizeof(T);
-    _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+    assert(len == static_cast<size_t>(strides.N != 0
+                                        ? shape.N * strides.N
+                                        : strides.H != 0 ? shape.H * strides.H
+                                                         : strides.W != 0 ? shape.W * strides.W
+                                                                          : shape.C * strides.C));
   }
 
   // Construct for backend tensor
   Reader(const backend::ITensor *tensor)
-      : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+    : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
   {
     assert(tensor->layout() == ir::Layout::NHWC);
 
     const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
-    _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
-    _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
-    _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
-    _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
-    _shape.C = tensor->dimension(3);
-    _shape.W = tensor->dimension(2);
-    _shape.H = tensor->dimension(1);
-    _shape.N = tensor->dimension(0);
+    auto shape = tensor->getShape();
+    _strides.C = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+    _strides.W = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+    _strides.H = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+    _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+    _shape.C = shape.dim(3);
+    _shape.W = shape.dim(2);
+    _shape.H = shape.dim(1);
+    _shape.N = shape.dim(0);
   }
 
 public:
@@ -106,7 +106,6 @@ private:
 private:
   // TODO Remove _shape
   ir::FeatureShape _shape;
-  using Strides = ir::FeatureShape;
   Strides _strides;
   const uint8_t *_ptr;
   size_t _len;
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
new file mode 100644
index 000000000..773199042
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nhwc : public testing::Test
+{
+public:
+  void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+  void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    _shape = onert::ir::FeatureShape(batch, depth, height, width);
+  }
+
+  void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    auto elem_size = sizeof(T);
+    _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+                                      width * elem_size);
+  }
+
+  void createReader()
+  {
+    _reader =
+      std::make_shared<nhwc::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+  }
+
+  void createUsingMockTensor()
+  {
+    onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+    _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+    _reader = std::make_shared<nhwc::Reader<T>>(_tensor.get());
+  }
+
+  std::shared_ptr<nhwc::Reader<T>> _reader = nullptr;
+
+private:
+  std::shared_ptr<std::vector<T>> _data = nullptr;
+  onert::ir::FeatureShape _shape;
+  onert::ir::FeatureShape _stride;
+  std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nhwc, ReaderTypes);
+TYPED_TEST_SUITE(MockTensorReader_nhwc, ReaderTypes);
+
+TYPED_TEST(Reader_nhwc, basic_reader)
+{
+  this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+  this->setShape(1, 2, 3, 2);
+  this->setStride(12, 1, 6, 2);
+  this->createReader();
+
+  // Data: NCHW
+  // Shape: NHWC
+  ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+  ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+  // Data: NHWC
+  // Shape: NHWC
+  this->createUsingMockTensor();
+
+  ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+  ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h
index 72c8c3415..c98d050c3 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
 #define __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
 
-#include "../Reader.h"
+#include "Reader.h"
 
 #include <cassert>
 #include <cstddef>
@@ -38,8 +38,10 @@ namespace nhwc
 template <typename T> class View final : public Reader<T>
 {
 public:
-  // Construct for buffer of model inputs
-  View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
+  using Strides = typename Reader<T>::Strides;
+  // Construct for buffer and strides
+  View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len)
+    : Reader<T>{shape, strides, ptr, len}
   {
     // DO NOTHING
   }
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.test.cc b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
new file mode 100644
index 000000000..bdd73d5a7
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nhwc : public testing::Test
+{
+public:
+  void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+  void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    _shape = onert::ir::FeatureShape(batch, depth, height, width);
+  }
+
+  void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    auto elem_size = sizeof(T);
+    _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+                                      width * elem_size);
+  }
+
+  void createView()
+  {
+    _view =
+      std::make_shared<nhwc::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+  }
+
+  void createUsingMockTensor()
+  {
+    onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+    _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+    _view = std::make_shared<nhwc::View<T>>(_tensor.get());
+  }
+
+  std::shared_ptr<nhwc::View<T>> _view = nullptr;
+
+private:
+  std::shared_ptr<std::vector<T>> _data = nullptr;
+  onert::ir::FeatureShape _shape;
+  onert::ir::FeatureShape _stride;
+  std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nhwc, ViewTypes);
+TYPED_TEST_SUITE(MockTensorView_nhwc, ViewTypes);
+
+TYPED_TEST(View_nhwc, basic_view)
+{
+  this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+  this->setShape(1, 2, 3, 2);
+  this->setStride(12, 1, 6, 2);
+  this->createView();
+
+  // Data: NCHW
+  // Shape: NHWC
+  ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+  ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+  // Data: NHWC
+  // Shape: NHWC
+  this->createUsingMockTensor();
+
+  ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+  ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/train/TrainableExecutor.cc b/runtime/onert/core/src/exec/train/TrainableExecutor.cc
new file mode 100644
index 000000000..9c7e70c29
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableExecutor.cc
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainableExecutor.h"
+#ifdef RUY_PROFILER
+#include "ruy/profiler/instrumentation.h"
+#endif
+
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+TrainableExecutor::TrainableExecutor(
+  std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+  backend::train::TrainableBackendContexts &&backend_contexts,
+  const compiler::train::TensorRegistries &tensor_regs,
+  compiler::train::TrainableCodeMap &&code_map, const std::vector<ir::OperationIndex> &order,
+  const util::TracingCtx *tracing_ctx)
+  : _lowered_graph{std::move(lowered_graph)}, _backend_contexts{std::move(backend_contexts)},
+    _trainable_graph{_lowered_graph->trainable_graph()}, _tensor_regs{std::move(tensor_regs)},
+    _mutex(), _tracing_ctx(tracing_ctx)
+{
+  auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
+    assert(tensors.empty());
+    for (auto &&ind : ind_seq)
+    {
+      backend::ITensor *tensor = tensor_regs.getITensor(ind);
+      assert(tensor != nullptr);
+      auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor);
+      tensors.push_back(io_tensor);
+    }
+  };
+  build_tensor_list(_trainable_graph.getInputs(), _input_tensors);
+  build_tensor_list(_trainable_graph.getOutputs(), _output_tensors);
+
+  for (auto &&index : order)
+  {
+    auto &trainable_code = code_map.at(index);
+    _code.emplace_back(std::move(trainable_code));
+  }
+}
+
+void TrainableExecutor::execute(const std::vector<backend::IPortableTensor *> &,
+                                const std::vector<backend::IPortableTensor *> &)
+{
+  throw std::runtime_error("TrainableExecutor does not support multiple subgraphs yet");
+}
+
+void TrainableExecutor::forward(const IODescription &desc, bool training)
+{
+  // For thread-safe, use mutex
+  // TODO: if all used backends on this executor are thread-safe,
+  //       do not need to use mutex (otherwise, use mutex)
+  std::lock_guard<std::mutex> lock(_mutex);
+
+  // TODO Update IO tensors if desc has dynamic input
+  // Set input(s)
+  assert(_input_tensors.size() == desc.inputs.size());
+  for (uint32_t i = 0; i < _input_tensors.size(); ++i)
+  {
+    auto tensor = _input_tensors[i];
+
+    // TODO Check if (desc.inputs[i] == nullptr)
+    // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
+    tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
+                          desc.inputs[i]->size);
+  }
+
+  if (!training)
+  {
+    // Set output(s)
+    assert(_output_tensors.size() == desc.outputs.size());
+    for (uint32_t i = 0; i < _output_tensors.size(); ++i)
+    {
+      auto tensor = _output_tensors[i];
+
+      if (desc.outputs[i] == nullptr)
+        throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
+      tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
+    }
+  }
+
+  forwardImpl(training);
+
+  // TODO Update output(s) desc if desc has dynamic input
+}
+
+void TrainableExecutor::forwardImpl(bool training)
+{
+  if (_tracing_ctx)
+  {
+    auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_trainable_graph.graph());
+
+    _subject.notifySubgraphBegin(profiling_subg_index);
+    for (auto &&code : _code)
+    {
+      const auto backend = code.lower_info->backend();
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+      ruy::profiler::ScopeLabel label(code.op->name());
+#endif
+      _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend);
+
+      auto &tn_seq = code.tn_seq;
+      tn_seq->forward(training);
+
+      _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend);
+    }
+    _subject.notifySubgraphEnd(profiling_subg_index);
+  }
+  else
+  {
+    for (auto &&code : _code)
+    {
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+      ruy::profiler::ScopeLabel label(code.op->name());
+#endif
+      auto &tn_seq = code.tn_seq;
+      tn_seq->forward(training);
+    }
+  }
+}
+
+void TrainableExecutor::backward(const IODescription &, uint32_t training_step)
+{
+  // For thread-safe, use mutex
+  // TODO: if all used backends on this executor are thread-safe,
+  //       do not need to use mutex (otherwise, use mutex)
+  std::lock_guard<std::mutex> lock(_mutex);
+
+  backwardImpl(training_step);
+}
+
+void TrainableExecutor::backwardImpl(uint32_t training_step)
+{
+  if (_tracing_ctx)
+  {
+    auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_trainable_graph.graph());
+
+    _subject.notifySubgraphBegin(profiling_subg_index);
+    for (auto it = _code.rbegin(); it != _code.rend(); ++it)
+    {
+      const auto &code = *it;
+      const auto backend = code.lower_info->backend();
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+      ruy::profiler::ScopeLabel label(code.op->name());
+#endif
+      _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend);
+
+      auto &tn_seq = code.tn_seq;
+      tn_seq->backward(training_step);
+
+      _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend);
+    }
+    _subject.notifySubgraphEnd(profiling_subg_index);
+  }
+  else
+  {
+    for (auto it = _code.rbegin(); it != _code.rend(); ++it)
+    {
+      const auto &code = *it;
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+      ruy::profiler::ScopeLabel label(code.op->name());
+#endif
+      auto &tn_seq = code.tn_seq;
+      tn_seq->backward(training_step);
+    }
+  }
+}
+
+float TrainableExecutor::getLoss(const ir::IOIndex &pred_io_ind) const
+{
+  const auto &loss_ind = _trainable_graph.getLossIndex(pred_io_ind);
+  if (loss_ind.undefined())
+    throw std::runtime_error{"Loss " + std::to_string(loss_ind.value()) + " is not defined."};
+  backend::ITensor *tensor = _tensor_regs.getITensor(loss_ind);
+  auto loss_buf = reinterpret_cast<float *>(tensor->buffer());
+  return *loss_buf;
+}
+
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/train/TrainableExecutor.h b/runtime/onert/core/src/exec/train/TrainableExecutor.h
new file mode 100644
index 000000000..6b645305f
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableExecutor.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_
+#define __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_
+
+#include "exec/IExecutor.h"
+
+#include "../ExecutionObservee.h"
+#include "../../compiler/train/TensorRegistries.h"
+
+#include "backend/train/TrainableBackendContext.h"
+#include "compiler/train/TrainableCodeMap.h"
+#include "compiler/train/LoweredTrainableGraph.h"
+#include "ir/Index.h"
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+class TrainableExecutor : public IExecutor
+{
+public:
+  /**
+   * @brief Construct a new TrainableExecutor object
+   * @param lowered_graph LoweredTrainableGraph object
+   * @param tensor_builders Tensor builders that are currently used
+   * @param code_map @c ir::Operation and its code map
+   */
+  TrainableExecutor(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+                    backend::train::TrainableBackendContexts &&backend_contexts,
+                    const compiler::train::TensorRegistries &tensor_regs,
+                    compiler::train::TrainableCodeMap &&code_map,
+                    const std::vector<ir::OperationIndex> &order,
+                    const util::TracingCtx *tracing_ctx);
+
+public:
+  const ir::Graph &graph() const final { return _trainable_graph.graph(); }
+
+  void execute(const IODescription &desc) override { forward(desc, false); };
+
+  void execute(const std::vector<backend::IPortableTensor *> &inputs,
+               const std::vector<backend::IPortableTensor *> &outputs) override;
+
+  void forward(const IODescription &desc, bool training);
+  void backward(const IODescription &desc, uint32_t training_step);
+
+  // Used only in Dataflow and Parallel Executors
+  void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
+  {
+    _indexed_ranks = std::move(ranks);
+  };
+
+  void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
+
+  const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override
+  {
+    return _input_tensors;
+  }
+
+  const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
+  {
+    return _output_tensors;
+  }
+
+  float getLoss(const ir::IOIndex &pred_io_ind) const;
+
+  backend::train::TrainableBackendContexts &getBackendContexts() { return _backend_contexts; }
+
+private:
+  void forwardImpl(bool training);
+  void backwardImpl(uint32_t training_step);
+
+private:
+  std::vector<compiler::train::TrainableCodeAndInfo> _code;
+  ExecutionObservee _subject;
+  std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+  std::unique_ptr<compiler::train::LoweredTrainableGraph> _lowered_graph;
+  backend::train::TrainableBackendContexts _backend_contexts;
+  const ir::train::TrainableGraph &_trainable_graph;
+  compiler::train::TensorRegistries _tensor_regs;
+  std::vector<backend::builtin::IOTensor *> _input_tensors;
+  std::vector<backend::builtin::IOTensor *> _output_tensors;
+  std::mutex _mutex;
+  const util::TracingCtx *_tracing_ctx;
+};
+
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_
diff --git a/runtime/onert/core/src/exec/train/TrainableExecutors.cc b/runtime/onert/core/src/exec/train/TrainableExecutors.cc
new file mode 100644
index 000000000..ba39bf0f0
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableExecutors.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainableExecutors.h"
+
+#include "../../backend/builtin/IOTensor.h"
+
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+void TrainableExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index,
+                                 std::unique_ptr<IExecutor> exec)
+{
+  std::unique_ptr<TrainableExecutor> t_exec{
+    nnfw::misc::polymorphic_downcast<TrainableExecutor *>(exec.release())};
+  _executors.emplace(subg_index, std::move(t_exec));
+}
+
+TrainableExecutor *TrainableExecutors::at(const ir::ModelIndex &,
+                                          const ir::SubgraphIndex &subg_index) const
+{
+  return _executors.at(subg_index).get();
+}
+
+uint32_t TrainableExecutors::inputSize() const { return entryExecutor()->getInputTensors().size(); }
+
+uint32_t TrainableExecutors::outputSize() const
+{
+  return entryExecutor()->getOutputTensors().size();
+}
+
+const ir::OperandInfo &TrainableExecutors::inputInfo(const ir::IOIndex &index) const
+{
+  return entryExecutor()->getInputTensors().at(index.value())->orig_info();
+}
+
+const ir::OperandInfo &TrainableExecutors::outputInfo(const ir::IOIndex &index) const
+{
+  return entryExecutor()->getOutputTensors().at(index.value())->orig_info();
+}
+
+void TrainableExecutors::execute(const IODescription &desc)
+{
+  if (_executors.size() > 1)
+    throw std::runtime_error("TrainableExecutors does not support multiple executors yet");
+  entryExecutor()->forward(desc, false);
+
+  // TODO Support multple executors
+}
+
+void TrainableExecutors::train(const IODescription &desc, uint32_t training_step)
+{
+  if (_executors.size() > 1)
+    throw std::runtime_error("TrainableExecutors does not support multiple executors yet");
+  entryExecutor()->forward(desc, true);
+  entryExecutor()->backward(desc, training_step);
+
+  // TODO Support multple executors
+}
+
+float TrainableExecutors::getLoss(const ir::IOIndex &index) const
+{
+  if (_executors.size() > 1)
+    throw std::runtime_error("TrainableExecutors does not support multiple executors yet");
+  return entryExecutor()->getLoss(index);
+}
+
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/train/TrainableExecutors.h b/runtime/onert/core/src/exec/train/TrainableExecutors.h
new file mode 100644
index 000000000..db6d198b1
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableExecutors.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__
+#define __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__
+
+#include "TrainableExecutor.h"
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+/**
+ * @brief Class to gather executor set for trainable model NN package
+ */
+class TrainableExecutors : public IExecutors
+{
+public:
+  /**
+   * @brief Construct a new TrainableExecutors object
+   */
+  TrainableExecutors(void) = default;
+  TrainableExecutors(const TrainableExecutors &) = delete;
+  TrainableExecutors(TrainableExecutors &&) = default;
+
+  /**
+   * @brief Destroy the TrainableExecutors object
+   */
+  ~TrainableExecutors() = default;
+
+public:
+  TrainableExecutors &operator=(const TrainableExecutors &) = delete;
+  TrainableExecutors &operator=(TrainableExecutors &&) = default;
+
+public:
+  void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+               std::unique_ptr<IExecutor> exec) override;
+
+  TrainableExecutor *at(const ir::ModelIndex &model_index,
+                        const ir::SubgraphIndex &subg_index) const override;
+
+  TrainableExecutor *entryExecutor() const { return at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); }
+
+  uint32_t inputSize() const override;
+
+  uint32_t outputSize() const override;
+
+  const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+  const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+  void execute(const IODescription &desc) override;
+
+  /**
+   * @brief Train
+   *
+   * @param desc          IO information
+   * @param training_step The number of iterations of an training process.
+   *                      In other words, the number of gradient update.
+   */
+  void train(const IODescription &desc, uint32_t training_step);
+
+  float getLoss(const ir::IOIndex &index) const;
+
+private:
+  // TODO Append model index to ModelIndex
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<TrainableExecutor>> _executors;
+};
+
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/train/TrainableFnSequence.cc b/runtime/onert/core/src/exec/train/TrainableFnSequence.cc
new file mode 100644
index 000000000..084b3d708
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableFnSequence.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/train/TrainableFnSequence.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+void TrainableFnSequence::forward(bool training)
+{
+  for (const auto &function : _functions)
+  {
+    function->forward(training);
+  }
+}
+
+void TrainableFnSequence::backward(uint32_t training_step)
+{
+  for (auto it = _functions.rbegin(); it != _functions.rend(); ++it)
+  {
+    (*it)->backward();
+  }
+
+  for (const auto &applier : _appliers)
+  {
+    applier->applyGradient(training_step);
+  }
+}
+
+void TrainableFnSequence::append(std::unique_ptr<ITrainableFunction> &&function)
+{
+  _functions.push_back(std::move(function));
+}
+
+void TrainableFnSequence::append(std::unique_ptr<IGradientApplier> &&applier)
+{
+  _appliers.push_back(std::move(applier));
+}
+
+void TrainableFnSequence::iterate(const std::function<void(ITrainableFunction &)> &fn)
+{
+  for (const auto &func : _functions)
+  {
+    fn(*func);
+  }
+}
+
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc b/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc
new file mode 100644
index 000000000..72b581bf6
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/train/optimizer/OptimizerCode.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+std::string toString(OptimizerCode code)
+{
+  static const std::unordered_map<OptimizerCode, const char *> map{
+    {OptimizerCode::Invalid, "Invalid"},
+    {OptimizerCode::SGD, "SGD"},
+    {OptimizerCode::Adam, "Adam"}};
+  return map.at(code);
+}
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h b/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h
new file mode 100644
index 000000000..66a08b50f
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__
+#define __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__
+
+#include "backend/IPortableTensor.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+template <typename T, typename L>
+void elementwise(const ir::Shape &shape, const backend::ITensor &src, backend::ITensor &dst,
+                 const L &f)
+{
+  ShapeLoop(shape, [&](const ir::Coordinates &coords) {
+    const T src_val = *reinterpret_cast<const T *>(src.buffer() + src.calcOffset(coords));
+    T *dst_data = reinterpret_cast<T *>(dst.buffer() + dst.calcOffset(coords));
+    *dst_data = f(src_val, *dst_data);
+  });
+}
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__
diff --git a/runtime/onert/core/src/exec/train/optimizer/SGD.cc b/runtime/onert/core/src/exec/train/optimizer/SGD.cc
new file mode 100644
index 000000000..abfbc1b4b
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/optimizer/SGD.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <exec/train/optimizer/SGD.h>
+
+#include "OptimizerHelpers.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+double SGD::getLearningRate(uint32_t) const
+{
+  // TODO Use iteration, momentum, and nesterov
+  return _learning_rate;
+}
+
+void SGD::applyGradient(const UpdateFactors &factors) const
+{
+  const auto lr = getLearningRate(std::get<size_t>(factors));
+  const auto &grad_tensor = std::get<const backend::IPortableTensor &>(factors);
+  auto &trainable_tensor = std::get<backend::train::ITrainableTensor &>(factors);
+  assert(trainable_tensor.data_type() == grad_tensor.data_type());
+
+  const auto shape = trainable_tensor.getShape();
+  const auto &grad_shape = grad_tensor.get_info().shape();
+
+  // TODO Support for different shapes
+  if (shape != grad_shape)
+  {
+    throw std::runtime_error("SGD: Invalid gradient tensor");
+  }
+
+  switch (grad_tensor.data_type())
+  {
+    case ir::DataType::FLOAT32:
+      elementwise<float>(shape, grad_tensor, trainable_tensor,
+                         [&](float src, float dst) -> float { return dst - src * lr; });
+      break;
+    default:
+      throw std::runtime_error("SGD: Not supported data type");
+  }
+}
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/Buffer.h b/runtime/onert/core/src/interp/Buffer.h
deleted file mode 100644
index 24938f74f..000000000
--- a/runtime/onert/core/src/interp/Buffer.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  Buffer.h
- * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class
- */
-#ifndef __ONERT_INTERP_BUFFER_H__
-#define __ONERT_INTERP_BUFFER_H__
-
-#include <memory>
-
-#include "ir/Data.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface for writable data area
- */
-class Buffer : public ir::Data
-{
-public:
-  /**
-   * @brief   Return writable pointer for data area
-   * @return  Writable pointer
-   */
-  virtual uint8_t *baseWritable(void) const = 0;
-};
-
-/**
- * @brief Class for internally allocated data area
- */
-class InternalBuffer final : public Buffer
-{
-public:
-  InternalBuffer(size_t size) : _base{std::make_unique<uint8_t[]>(size)}, _size{size}
-  {
-    // DO NOTHING
-  }
-
-public:
-  size_t size(void) const override { return _size; }
-  const uint8_t *base(void) const override { return _base.get(); }
-  uint8_t *baseWritable(void) const override { return _base.get(); }
-
-private:
-  std::unique_ptr<uint8_t[]> _base;
-  size_t _size;
-};
-
-/**
- * @brief Class for data area from outside
- */
-class ExternalBuffer final : public Buffer
-{
-public:
-  ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size}
-  {
-    // DO NOTHING
-  }
-
-public:
-  size_t size(void) const override { return _size; }
-  const uint8_t *base(void) const override { return _base; }
-  uint8_t *baseWritable(void) const override { return _base; }
-
-private:
-  uint8_t *_base;
-  size_t _size;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_BUFFER_H__
diff --git a/runtime/onert/core/src/interp/ExecEnv.h b/runtime/onert/core/src/interp/ExecEnv.h
deleted file mode 100644
index 7f577ea6e..000000000
--- a/runtime/onert/core/src/interp/ExecEnv.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  ExecEnv.h
- * @brief This file contains ExecEnv to access interpreter tensor and execution status
- */
-#ifndef __ONERT_INTERP_EXEC_ENV_H_
-#define __ONERT_INTERP_EXEC_ENV_H_
-
-#include <unordered_set>
-
-#include "ir/Graph.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class to gather interpreter execution environment
- *        Each interpreter instance own execution environment
- */
-class ExecEnv
-{
-public:
-  /**
-   * @brief Construct a new Exec Env object (deleted)
-   */
-  ExecEnv(void) = delete;
-  /**
-   * @brief Construct a new ExecEnv object
-   * @param[in] graph Graph to execute by interpreter
-   */
-  explicit ExecEnv(const ir::Graph &graph) : _graph(graph)
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief   Return graph to execute
-   * @return  Graph
-   */
-  const ir::Graph &graph(void) const { return _graph; }
-  /**
-   * @brief     Assign tensor to environment which have allocated or assigned buffer
-   * @param[in] index   Tensor index
-   * @param[in] tensor  Tensor
-   */
-  void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor)
-  {
-    assert(tensor->bufferRO() != nullptr);
-    _tensors.emplace(index, tensor);
-  }
-
-  /**
-   * @brief     Return tensor pointer in environment
-   * @param[in] index         Tensor index
-   *            can_optional  @c True if tensor can be optional input, otherwise @c false
-   * @return    Tensor pointer
-   */
-  const ITensor *tensorAt(const ir::OperandIndex index, bool can_optional = false) const
-  {
-    if (_tensors.find(index) == _tensors.end())
-    {
-      // It may optional input,
-      // otherwise input is not set by runtime user
-      if (can_optional)
-      {
-        return nullptr;
-      }
-
-      throw std::runtime_error{"ExecEnv: Input is not set"};
-    }
-
-    return _tensors.at(index).get();
-  }
-
-  /**
-   * @brief     Check environment contains tensor
-   * @param[in] index Tensor index
-   * @return    @c true if environment contain tensor, otherwise @c false
-   */
-  bool contains(const ir::OperandIndex index) const
-  {
-    return (_tensors.find(index) != _tensors.end());
-  }
-
-  /**
-   * @brief     Allocate tensor using operand info
-   * @param[in] index     Tensor index
-   * @param[in] info      Operand info
-   * @note      If already allocated, just return
-   * @TODO      More smart allocation policy
-   */
-  void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info)
-  {
-    // already allocated, or constant
-    if (contains(index))
-    {
-      return;
-    }
-
-    // Buffer from external (ex. model output)
-    auto tensor = std::make_shared<Tensor>(info);
-    if (isExtBuffer(index))
-    {
-      tensor->setBuffer(_external_buffers.at(index));
-      assignTensor(index, tensor);
-
-      return;
-    }
-
-    tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size()));
-    assignTensor(index, tensor);
-    _buffers.insert(index);
-  }
-
-  /**
-   * @brief     Allocate read-only tensor and share data with other tensor
-   * @param[in] index           Tensor index
-   * @param[in] info            Operand info
-   * @param[in] index_to_share  Tensor index that have data to share
-   */
-  void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info,
-                                const ir::OperandIndex index_to_share)
-  {
-    if (!contains(index_to_share))
-    {
-      throw std::runtime_error{"Cannot find tensor to share data"};
-    }
-
-    // already allocated
-    if (contains(index))
-    {
-      return;
-    }
-
-    if (isExtBuffer(index))
-    {
-      auto tensor = std::make_shared<Tensor>(info);
-      tensor->setBuffer(_external_buffers.at(index));
-      assignTensor(index, tensor);
-    }
-    else
-    {
-      auto tensor = std::make_shared<ROTensor>(info);
-      tensor->setData(tensorAt(index_to_share)->shareData());
-      assignTensor(index, tensor);
-      _buffers.insert(index);
-    }
-  }
-
-  /**
-   * @brief     Free buffer if allocated by allocateIfNeed
-   * @param[in] index Tensor index
-   * @note      If allocated by outside, just return
-   */
-  void freeIfAllocated(const ir::OperandIndex index)
-  {
-    if (_buffers.find(index) != _buffers.end())
-    {
-      _tensors.at(index)->releaseData();
-    }
-  }
-
-  /**
-   * @brief     Assign ExternalBuffer into external buffer map
-   * @param[in] index   Tensor index
-   * @param[in] buffer  External buffer
-   */
-  void assignExternalBuffer(const ir::OperandIndex index, std::shared_ptr<ExternalBuffer> buffer)
-  {
-    _external_buffers.emplace(index, buffer);
-  }
-
-private:
-  bool isExtBuffer(const ir::OperandIndex index)
-  {
-    return (_external_buffers.find(index) != _external_buffers.end());
-  }
-
-private:
-  const ir::Graph &_graph;
-  // Tensor map to use in interpreter
-  // It should map tensors that have allocated or assigned buffer pointer
-  std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors;
-  // Tensors allocated by allocateIfNeed (buffer)
-  std::unordered_set<ir::OperandIndex> _buffers;
-  // Tensor buffer from external
-  std::unordered_map<ir::OperandIndex, std::shared_ptr<ExternalBuffer>> _external_buffers;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_EXEC_ENV_H_
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc
deleted file mode 100644
index cd31a4dca..000000000
--- a/runtime/onert/core/src/interp/InterpExecutor.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "interp/InterpExecutor.h"
-#include "interp/ExecEnv.h"
-#include "interp/Interpreter.h"
-
-#include "util/logging.h"
-
-#include <memory>
-
-namespace onert
-{
-namespace interp
-{
-
-void InterpExecutor::execute(const exec::IODescription &desc)
-{
-  /************************************************************************
-   * Prepare execution model (submodel)
-     It may execute divided model
-     but now consider model inference is done at interpreter
-   ***********************************************************************/
-  ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map;
-
-  for (uint32_t n = 0; n < _graph.getInputs().size(); n++)
-  {
-    ir::IOIndex index{n};
-    const auto input_index = _graph.getInputs().at(index);
-
-    const auto input = desc.inputs.at(n).get();
-    if (input == nullptr)
-    {
-      // Optional input
-      continue;
-    }
-
-    auto input_tensor = std::make_shared<ROTensor>(input->info);
-    input_tensor->setData(std::make_shared<const ir::ExternalData>(
-        reinterpret_cast<const uint8_t *>(input->buffer), input->size));
-    tensor_map[input_index] = input_tensor;
-  }
-
-  /************************************************************************
-   * Prepare execution environment
-     Execution environment will be assigned to invoked interpreter instance
-   ***********************************************************************/
-
-  std::unique_ptr<ExecEnv> interp_env = std::make_unique<ExecEnv>(_graph);
-
-  // Assign input/output tensor into interpreter execution environment
-  for (auto index : _graph.getInputs())
-  {
-    if (tensor_map.find(index) != tensor_map.end())
-    {
-      VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl;
-      interp_env->assignTensor(index, tensor_map.at(index));
-    }
-  }
-
-  for (uint32_t n = 0; n < _graph.getOutputs().size(); n++)
-  {
-    ir::IOIndex index{n};
-    const auto output_index = _graph.getOutputs().at(index);
-    const auto output = desc.outputs.at(n).get();
-    if (output == nullptr)
-    {
-      // Optional output
-      continue;
-    }
-
-    VERBOSE(INTERPRETER) << "Set out buffer to ExecEnv. operand index:" << output_index.value()
-                         << std::endl;
-
-    interp_env->assignExternalBuffer(
-        output_index, std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer),
-                                                       output->size));
-  }
-
-  // Allocate constant tensor
-  _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (obj.isConstant())
-    {
-      VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value()
-                           << std::endl;
-
-      assert(obj.data());
-      auto const_tensor = std::make_shared<ROTensor>(obj.info());
-      // Assume that interpreter's tensor layout is same with model (NHWC)
-      const_tensor->setData(
-          std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
-      interp_env->assignTensor(ind, const_tensor);
-    }
-  });
-
-  /*****************************************************************************
-   * Invoke interpreter
-   ****************************************************************************/
-
-  interp::Interpreter interp(std::move(interp_env));
-  interp.run();
-
-  /*****************************************************************************
-   * Invoked interpreter run is finished
-   ****************************************************************************/
-
-  // If interpreter execute submodel
-  //  1. Get tensor output of submodel into tensor_map to save result
-  //  2. Generate new ExecEnv for next interpretation
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
deleted file mode 100644
index 2e3f3ca54..000000000
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  InterpExecutor.h
- * @brief This file contains InterpExecutor class\n
- *        to manage interpreter execution and environment
- */
-#ifndef __ONERT_INTERP_INTERP_EXECUTOR_H__
-#define __ONERT_INTERP_INTERP_EXECUTOR_H__
-
-#include "ir/OperandIndexMap.h"
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-class ITensor;
-
-/**
- * @brief Class to execute model using interpreter
- */
-class InterpExecutor final : public exec::IExecutor
-{
-public:
-  explicit InterpExecutor(const ir::Graph &graph) : _graph(graph)
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief   Return graph object
-   * @return  Graph object
-   */
-  const ir::Graph &graph() final { return _graph; }
-  void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
-      // Not implemented
-  };
-  /**
-   * @brief  Start execution
-   * @note   It should be called after setting input and output buffer
-   */
-  void execute(const exec::IODescription &desc) final;
-
-private:
-  const ir::Graph &_graph;
-  ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERP_EXECUTOR_H__
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst
deleted file mode 100644
index 0714df38a..000000000
--- a/runtime/onert/core/src/interp/InterpOps.lst
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INTERP_OP
-#error Define INTERP_OP before including this file
-#endif
-
-// Supported operation name in interpreter
-//
-// Same list with Operations.lst
-// Make comment out if operation is not supported in interpreter
-INTERP_OP(BinaryArithmetic)
-//INTERP_OP(BatchToSpaceND)
-//INTERP_OP(Cast)
-INTERP_OP(Conv2D)
-INTERP_OP(DepthwiseConv2D)
-INTERP_OP(Pool2D)
-INTERP_OP(Concat)
-INTERP_OP(FullyConnected)
-//INTERP_OP(Reduce)
-INTERP_OP(Reshape)
-INTERP_OP(Softmax)
-//INTERP_OP(Squeeze)
-//INTERP_OP(Slice)
-//INTERP_OP(StridedSlice)
-INTERP_OP(ElementwiseActivation)
-//INTERP_OP(Transpose)
-//INTERP_OP(Exp)
-//INTERP_OP(Comparison)
-//INTERP_OP(LogicalNot)
-//INTERP_OP(LSTM)
-//INTERP_OP(RSQRT)
-//INTERP_OP(ResizeBilinear)
-//INTERP_OP(RNN)
-//INTERP_OP(Floor)
-//INTERP_OP(SpaceToBatchND)
-//INTERP_OP(SpaceToDepth)
-//INTERP_OP(EmbeddingLookup)
-//INTERP_OP(L2Normalization)
-//INTERP_OP(HashtableLookup)
-INTERP_OP(InstanceNorm)
-//INTERP_OP(PReLU)
-INTERP_OP(TransposeConv)
-//INTERP_OP(SQRT)
-//INTERP_OP(SquaredDifference)
-//INTERP_OP(TopKV2)
-INTERP_OP(Gather)
-//INTERP_OP(Neg)
-//INTERP_OP(Abs)
-//INTERP_OP(ArgMax)
-//INTERP_OP(Dequantize)
-//INTERP_OP(LocalResponseNormalization)
-//INTERP_OP(DepthToSpace)
-//INTERP_OP(Pack)
-//INTERP_OP(Split)
-//INTERP_OP(Unpack)
-INTERP_OP(Pad)
-//INTERP_OP(Custom)
-//INTERP_OP(Permute)
-//INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/Interpreter.cc b/runtime/onert/core/src/interp/Interpreter.cc
deleted file mode 100644
index b92afbe73..000000000
--- a/runtime/onert/core/src/interp/Interpreter.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Interpreter.h"
-
-#include <stack>
-#include <unordered_set>
-
-#include "Registration.h"
-
-#include "ir/OperandIndexMap.h"
-#include "util/logging.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-// TODO more structured execution kernel implementation
-// TODO use cker for execution
-// TODO divide tensor prepare and execution
-// TODO introduce memory manager (buffer allocate and free)
-class OperationExecutor
-{
-public:
-  OperationExecutor(ExecEnv *env) : _env{env}
-  {
-#define INTERP_OP(InternalName) _kernels[ir::OpCode::InternalName] = get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
-  }
-
-  void execute(const ir::OperationIndex &idx)
-  {
-    const ir::Operation &node = _env->graph().operations().at(idx);
-    const auto nodeName = node.name();
-    VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
-                         << " operation (id: " << idx.value() << ")" << std::endl;
-
-    const auto nodeOpCode = node.opcode();
-    if (_kernels.find(nodeOpCode) == _kernels.end())
-    {
-      throw std::runtime_error{"Interpreter: Operation " + nodeName + " is not yet implemented"};
-    }
-
-    if (_kernels[nodeOpCode]->prepare != nullptr)
-    {
-      _kernels[nodeOpCode]->prepare(_env, node);
-    }
-    _kernels[nodeOpCode]->invoke(_env, node);
-  }
-
-private:
-  ExecEnv *_env;
-  std::unordered_map<ir::OpCode, OpKernel *> _kernels;
-};
-
-void Interpreter::run()
-{
-  VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl;
-
-  // operand_stack: save operands prepared to use
-  std::stack<ir::OperandIndex> operand_stack;
-
-  // Note: We should push input first, then constant.
-  //       We use use-def for find operators ready to execution,
-  //       but Use-Def cannot handle parameters (maybe constant, but not always)
-  // Note: If all model inputs are constant, it may not work (depend on tensors' order).
-  //       But that scenario may not exist
-  for (auto ind : _env->graph().getInputs())
-  {
-    VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl;
-
-    operand_stack.push(ind);
-  }
-
-  _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (obj.isConstant())
-    {
-      VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl;
-
-      operand_stack.push(ind);
-    }
-  });
-
-  // Execution
-  std::unordered_set<ir::OperandIndex> ready_check;
-  std::unordered_set<ir::OperationIndex> executed;
-  OperationExecutor executor{_env.get()};
-  while (!operand_stack.empty())
-  {
-    const auto current_operand_index = operand_stack.top();
-    operand_stack.pop();
-    VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value()
-                         << " is checked ready to use" << std::endl;
-
-    assert(ready_check.find(current_operand_index) == ready_check.end());
-    ready_check.insert(current_operand_index);
-
-    // Find prepared operations by scan use of current operand
-    std::stack<ir::OperationIndex> operation_stack;
-    const auto use_operators = _env->graph().operands().at(current_operand_index).getUses();
-    for (const auto &use_operator : use_operators)
-    {
-      // Assumption: all parameters are ready to use
-      bool operator_ready = true;
-      for (auto input_index : _env->graph().operations().at(use_operator).getInputs())
-      {
-        if (ready_check.find(input_index) == ready_check.end())
-        {
-          operator_ready = false;
-          break;
-        }
-      }
-
-      if (operator_ready)
-      {
-        VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl;
-        operation_stack.push(use_operator);
-      }
-    }
-
-    while (!operation_stack.empty())
-    {
-      const auto current_operation_index = operation_stack.top();
-      operation_stack.pop();
-      VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "("
-                           << _env->graph().operations().at(current_operation_index).name() << ")"
-                           << std::endl;
-
-      // execution
-      // 1. Prepare output tensor
-      // 2. Call operation kernel
-      executor.execute(current_operation_index);
-      executed.insert(current_operation_index);
-
-      // 3. Push each output into operand stack
-      const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs();
-      for (auto def_operand : def_operands)
-      {
-        VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value()
-                             << std::endl;
-        operand_stack.push(def_operand);
-      }
-
-      // 4. Free if lifetime of buffer operands used by input is finished
-      for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs())
-      {
-        const auto use_operators = _env->graph().operands().at(input_index).getUses();
-        bool dead_buffer = true;
-        for (const auto &use_operator : use_operators)
-        {
-          if (executed.find(use_operator) == executed.end())
-          {
-            dead_buffer = false;
-            break;
-          }
-        }
-
-        if (dead_buffer)
-        {
-          _env->freeIfAllocated(input_index);
-        }
-      }
-    }
-  }
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/Interpreter.h b/runtime/onert/core/src/interp/Interpreter.h
deleted file mode 100644
index d2165f538..000000000
--- a/runtime/onert/core/src/interp/Interpreter.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  Interpreter.h
- * @brief This file contains Interpreter class for interpretation
- */
-#ifndef __ONERT_INTERP_INTERPRETER_H__
-#define __ONERT_INTERP_INTERPRETER_H__
-
-#include "ExecEnv.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class for interpretation
- */
-class Interpreter
-{
-
-public:
-  /**
-   * @brief Construct a new Interpreter object (deleted)
-   */
-  Interpreter() = delete;
-  /**
-   * @brief     Construct a new Interpreter object
-   * @param[in] env Execution environment variable for interpreter object
-   */
-  Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Run interpreter until there is no operation to execute
-   */
-  void run();
-
-private:
-  std::unique_ptr<ExecEnv> _env;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERPRETER_H__
diff --git a/runtime/onert/core/src/interp/Registration.h b/runtime/onert/core/src/interp/Registration.h
deleted file mode 100644
index 956b92a53..000000000
--- a/runtime/onert/core/src/interp/Registration.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_REGISTRATION_H__
-#define __ONERT_INTERP_REGISTRATION_H__
-
-#include "ExecEnv.h"
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace interp
-{
-
-struct OpKernel
-{
-  std::function<void(ExecEnv *, const ir::Operation &)> prepare;
-  std::function<void(const ExecEnv *, const ir::Operation &)> invoke;
-};
-
-// Defined in operations/ directory
-#define INTERP_OP(InternalName) OpKernel *get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_REGISTRATION_H__
diff --git a/runtime/onert/core/src/interp/Tensor.cc b/runtime/onert/core/src/interp/Tensor.cc
deleted file mode 100644
index 07f8b75dc..000000000
--- a/runtime/onert/core/src/interp/Tensor.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#define NO_USE(a) (void)(a)
-
-namespace onert
-{
-namespace interp
-{
-
-void ITensor::access(const std::function<void(backend::ITensor &tensor)> &fn) { fn(*this); }
-
-size_t ROTensor::calcOffset(const ir::Coordinates &coords) const
-{
-  NO_USE(coords);
-  throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
-{
-  NO_USE(coords);
-  throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-ir::Layout ROTensor::layout() const
-{
-  // TODO Changes to return frontend layout
-  return ir::Layout::NHWC;
-}
-
-ir::Layout Tensor::layout() const
-{
-  // TODO Changes to return frontend layout
-  return ir::Layout::NHWC;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h
deleted file mode 100644
index 008a4b9d4..000000000
--- a/runtime/onert/core/src/interp/Tensor.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  Tensor.h
- * @brief This file contains ITensor interface, ROTensor class, and Tensor class
- */
-#ifndef __ONERT_INTERP_TENSOR_H__
-#define __ONERT_INTERP_TENSOR_H__
-
-#include "Buffer.h"
-
-#include "ir/OperandInfo.h"
-#include "backend/ITensor.h"
-#include "ir/Layout.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface to handle Tensor in interpreter
- */
-class ITensor : public backend::ITensor
-{
-public:
-  virtual ~ITensor() = default;
-
-public:
-  virtual uint8_t *buffer() const = 0;
-  /**
-   * @brief   Return shared pointer for buffer
-   * @return  Buffer shared pointer
-   */
-  virtual std::shared_ptr<const Buffer> shareBuffer() const = 0;
-  /**
-   * @brief   Return read-only buffer pointer
-   * @return  Read-only buffer pointer
-   */
-  virtual const uint8_t *bufferRO() const = 0;
-  /**
-   * @brief   Return shared pointer for data
-   * @return  Data shared pointer
-   */
-  virtual std::shared_ptr<const ir::Data> shareData() const = 0;
-  /**
-   * @brief     Set internal/external buffer
-   * @param[in] buffer  Buffer pointer
-   */
-  virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0;
-  /**
-   * @brief     Set data reference (including constant, input)
-   * @param[in] data  Data pointer
-   */
-  virtual void setData(std::shared_ptr<const ir::Data> data) = 0;
-  virtual void releaseData() = 0;
-
-  virtual size_t total_size() const = 0;
-  virtual size_t dimension(size_t index) const = 0;
-  virtual size_t num_dimensions() const = 0;
-  virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
-
-  virtual bool has_padding() const = 0;
-  /**
-   * @brief   Return data type of tensor
-   * @return  Data type of tensor
-   */
-  virtual ir::DataType data_type() const = 0;
-  /**
-   * @brief   Return TensorInfo
-   * @return  TensorInfo
-   */
-  virtual const ir::OperandInfo &tensorInfo() const = 0;
-  /**
-   * @brief   Return number of elements
-   * @return  Number of elements
-   */
-  virtual uint64_t num_elements() const = 0;
-  void access(const std::function<void(backend::ITensor &tensor)> &fn) final;
-};
-
-/**
- * @brief Class to handle tensor in interpreter as read-only
- */
-class ROTensor final : public ITensor
-{
-public:
-  ROTensor() = delete;
-  ROTensor(const ir::OperandInfo &info) : _info(info)
-  {
-    // DO NOTHING
-  }
-
-public:
-  uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; }
-  std::shared_ptr<const Buffer> shareBuffer() const override
-  {
-    throw std::runtime_error{"Read only tensor"};
-  }
-  const uint8_t *bufferRO() const override { return _data->base(); }
-  std::shared_ptr<const ir::Data> shareData() const override { return _data; }
-  void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; }
-  void setData(std::shared_ptr<const ir::Data> data) override { _data = data; }
-  void releaseData() override { _data = nullptr; }
-
-  size_t total_size() const override { return _info.total_size(); }
-  size_t dimension(size_t index) const override { return _info.shape().dim(index); }
-  size_t num_dimensions() const override { return _info.shape().rank(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override;
-  ir::Layout layout() const override;
-  bool is_dynamic() const override { return false; }
-  bool has_padding() const override { return false; }
-  ir::DataType data_type() const override { return _info.typeInfo().type(); }
-  float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_offset() const override { return _info.typeInfo().offset(); }
-  const ir::OperandInfo &tensorInfo() const override { return _info; }
-  uint64_t num_elements() const override { return _info.shape().num_elements(); };
-
-private:
-  const ir::OperandInfo _info;
-  std::shared_ptr<const ir::Data> _data{nullptr};
-};
-
-/**
- * @brief Class to handle tensor in interpreter as writable
- */
-class Tensor final : public ITensor
-{
-public:
-  Tensor() = delete;
-  Tensor(const ir::OperandInfo &info) : _info(info)
-  {
-    // DO NOTHING
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer->baseWritable(); }
-  std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; };
-  const uint8_t *bufferRO() const override { return _buffer->base(); }
-  std::shared_ptr<const ir::Data> shareData() const override { return _buffer; }
-  void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; }
-  void setData(std::shared_ptr<const ir::Data>) override
-  {
-    throw std::runtime_error{"Passed data may read-only"};
-  }
-  void releaseData() override { _buffer = nullptr; }
-
-  size_t total_size() const override { return _info.total_size(); }
-  size_t dimension(size_t index) const override { return _info.shape().dim(index); }
-  size_t num_dimensions() const override { return _info.shape().rank(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override;
-  ir::Layout layout() const override;
-  bool is_dynamic() const override { return false; }
-  bool has_padding() const override { return false; }
-  ir::DataType data_type() const override { return _info.typeInfo().type(); }
-  float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_offset() const override { return _info.typeInfo().offset(); }
-  const ir::OperandInfo &tensorInfo() const override { return _info; }
-  uint64_t num_elements() const override { return _info.shape().num_elements(); };
-  backend::IDynamicTensorManager *dynamic_tensor_manager() override { return nullptr; }
-
-private:
-  const ir::OperandInfo _info;
-  std::shared_ptr<const Buffer> _buffer{nullptr};
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_TENSOR_H__
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
deleted file mode 100644
index 86e883524..000000000
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/BinaryArithmetic.h"
-#include "misc/polymorphic_downcast.h"
-#include "cker/Types.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class OpType
-{
-  ADD,
-  SUB,
-  MUL
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
-  const auto &arithmetic_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
-  const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
-  const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto lhs_tensor = env->tensorAt(lhs_index);
-  const auto rhs_tensor = env->tensorAt(rhs_index);
-
-  // Check shape and type lhs is same with rhs
-  // TODO Util function to compare TensorInfo
-  if (lhs_tensor->data_type() != rhs_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
-  }
-
-  bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
-  if (try_broadcast)
-  {
-    bool success = true;
-    auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(),
-                                        rhs_tensor->tensorInfo().shape(), success);
-    if (!success)
-    {
-      throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
-    }
-
-    auto output_info =
-        ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
-    // We can handle already allocated (ex. model output)
-    env->allocateIfNeeded(out_index, output_info);
-  }
-  else
-  {
-    // Output's shape and type is same with input
-    auto output_info = lhs_tensor->tensorInfo();
-    // We can handle already allocated (ex. model output)
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  // Check shape and type lhs is same with output
-  // TODO Util function to compare TensorInfo
-  if (lhs_tensor->data_type() != out_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
-  }
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
-{
-  params->float_activation_min = min;
-  params->float_activation_max = max;
-}
-
-inline void setActivationParams(int32_t min, int32_t max,
-                                nnfw::cker::BinaryArithmeticOpParam *params)
-{
-  params->quantized_activation_min = min;
-  params->quantized_activation_max = max;
-}
-
-template <typename raw_type, OpType op_type>
-void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
-            const ir::operation::BinaryArithmetic::Param &param)
-{
-  const auto lhs_buffer = lhs_tensor->bufferRO();
-  const auto rhs_buffer = rhs_tensor->bufferRO();
-  auto out_buffer = out_tensor->buffer();
-
-  nnfw::cker::BinaryArithmeticOpParam cker_param;
-  raw_type activation_min, activation_max;
-  calculateActivationRange(param.activation, &activation_min, &activation_max);
-  setActivationParams(activation_min, activation_max, &cker_param);
-  const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer);
-  const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer);
-  raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
-
-  const auto cker_op_type =
-      (op_type == OpType::ADD)
-          ? nnfw::cker::BinaryArithmeticOpType::ADD
-          : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
-                                      : nnfw::cker::BinaryArithmeticOpType::MUL);
-
-  const bool need_broadcast = nnfw::cker::ProcessBroadcastShapes(
-      convertShape(lhs_tensor->tensorInfo().shape()),
-      convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
-
-  if (need_broadcast)
-  {
-    const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
-    const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
-    const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-    nnfw::cker::BroadcastBinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape,
-                                                          rhs_ptr, out_shape, out_ptr);
-    return;
-  }
-
-  const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
-  const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
-  const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-  nnfw::cker::BinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr,
-                                               out_shape, out_ptr);
-}
-
-template <OpType op_type>
-void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
-{
-  const auto lhs_index = node.getInputs().at(node.LHS);
-  const auto rhs_index = node.getInputs().at(node.RHS);
-  const auto out_index = node.getOutputs().at(0);
-  const auto lhs_tensor = env->tensorAt(lhs_index);
-  const auto rhs_tensor = env->tensorAt(rhs_index);
-  const auto out_tensor = env->tensorAt(out_index);
-  const auto data_type = lhs_tensor->data_type();
-
-  if (data_type == ir::DataType::INT32)
-  {
-    invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
-  }
-  else if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Unsupported data type"};
-  }
-}
-
-void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &arithmetic_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
-  switch (arithmetic_node.param().arithmetic_type)
-  {
-    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
-      invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
-      break;
-    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
-      invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
-      break;
-    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
-      invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
-      break;
-    default:
-      throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
-                               arithmetic_node.name()};
-      break;
-  }
-}
-
-} // namespace
-
-OpKernel *getBinaryArithmetic()
-{
-  static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc
deleted file mode 100644
index efc46c66b..000000000
--- a/runtime/onert/core/src/interp/operations/Concat.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Concatenation.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Concat.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace concat
-{
-
-void prepareConcat(ExecEnv *env, const ir::Operation &node)
-{
-  const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
-
-  const auto first_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto first_tensor = env->tensorAt(first_index);
-  uint32_t out_axis_dimension = 0;
-  const int32_t axis_raw = concat_node.param().axis;
-  const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw;
-
-  // All inputs shape should be same except axis dimension
-  // All inputs type should be same
-  for (auto input : node.getInputs())
-  {
-    assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions());
-    assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
-    for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
-    {
-      if (i == axis)
-      {
-        out_axis_dimension += env->tensorAt(input)->dimension(i);
-        continue;
-      }
-      assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i));
-    }
-  }
-
-  // Make output tensor info using first input tensor info, and accumulated axis dimension value
-  auto out_shape = first_tensor->tensorInfo().shape();
-  out_shape.dim(axis) = out_axis_dimension;
-  env->allocateIfNeeded(out_index, ir::OperandInfo::createStaticInfo(
-                                       out_shape, first_tensor->tensorInfo().typeInfo()));
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Output shape should be same with input except axis dimension
-  // Output type should be same with input
-  assert(first_tensor->data_type() == out_tensor->data_type());
-  for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
-  {
-    if (i == axis)
-    {
-      continue;
-    }
-    assert(first_tensor->dimension(i) == out_tensor->dimension(i));
-  }
-}
-
-void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis)
-{
-  const uint32_t count = in_tensors.size();
-
-  // Calculate
-  nnfw::cker::ConcatenationParams cker_param;
-  cker_param.axis = (int8_t)axis;
-  cker_param.inputs_count = count;
-
-  const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
-  std::vector<nnfw::cker::Shape> in_shapes;
-  std::vector<const nnfw::cker::Shape *> in_shape_ptrs;
-  in_shapes.reserve(count);
-  in_shape_ptrs.reserve(count);
-  std::vector<const float *> in_ptrs;
-  for (uint32_t i = 0; i < count; i++)
-  {
-    in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape()));
-    in_shape_ptrs.push_back(&in_shapes[i]);
-    in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO()));
-  }
-
-  auto out_buffer = out_tensor->buffer();
-  float *out_ptr = reinterpret_cast<float *>(out_buffer);
-
-  nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape,
-                                   out_ptr);
-}
-
-void invokeConcat(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
-  const int32_t axis_raw = concat_node.param().axis;
-
-  std::vector<const ITensor *> in_tensors;
-  for (const auto &e : concat_node.getInputs())
-  {
-    in_tensors.emplace_back(env->tensorAt(e));
-  }
-
-  const auto out_index = node.getOutputs().at(0);
-  const auto out_tensor = env->tensorAt(out_index);
-  const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw;
-
-  const auto data_type = in_tensors[0]->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(in_tensors, out_tensor, axis);
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-} // namespace concat
-
-OpKernel *getConcat()
-{
-  static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc
deleted file mode 100644
index bb00b828c..000000000
--- a/runtime/onert/core/src/interp/operations/Conv2D.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Conv.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Conv2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace conv2d
-{
-
-void prepareConv2D(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
-  const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
-  const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto kernel_tensor = env->tensorAt(kernel_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-
-  assert(in_tensor->num_dimensions() == 4);
-  assert(kernel_tensor->num_dimensions() == 4);
-  assert(bias_tensor->num_dimensions() == 1);
-
-  UNUSED_RELEASE(in_tensor);
-  UNUSED_RELEASE(kernel_tensor);
-  UNUSED_RELEASE(bias_tensor);
-
-  const auto output_info = env->graph().operands().at(out_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Handle unspecified output shape
-    const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
-    const auto infered_output_shape = shape_inference::inferConv2DShape(
-        in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
-    env->allocateIfNeeded(
-        out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
-  }
-  else
-  {
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
-            const ITensor *ofm_tensor, const ir::operation::Conv2D::Param &param)
-{
-  // TODO Support NCHW frontned
-  const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
-  const auto &ker_shape = ker_tensor->tensorInfo().shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
-  const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
-                                            ker_width, ker_height);
-
-  // Calculate
-  float activation_min, activation_max;
-  calculateActivationRange(param.activation, &activation_min, &activation_max);
-
-  nnfw::cker::ConvParams cker_param;
-  cker_param.padding_type = convertPaddingType(param.padding.type);
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-  cker_param.dilation_width_factor = 1;
-  cker_param.dilation_height_factor = 1;
-  cker_param.float_activation_min = activation_min;
-  cker_param.float_activation_max = activation_max;
-
-  const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
-  const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
-  const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
-  const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
-  const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
-  const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
-  const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
-  float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
-  nnfw::cker::Conv conv_kernel;
-  conv_kernel(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape,
-              bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeConv2D(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
-
-  const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
-  const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-  const auto ofm_tensor = env->tensorAt(ofm_index);
-
-  const auto data_type = ifm_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-} // namespace conv2d
-
-OpKernel *getConv2D()
-{
-  static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
deleted file mode 100644
index 0473855d9..000000000
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/DepthwiseConv2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-
-namespace onert
-{
-namespace interp
-{
-
-namespace
-{
-
-void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
-  const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
-  const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto kernel_tensor = env->tensorAt(kernel_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-
-  assert(in_tensor->num_dimensions() == 4);
-  assert(kernel_tensor->num_dimensions() == 4);
-  assert(bias_tensor->num_dimensions() == 1);
-
-  UNUSED_RELEASE(in_tensor);
-  UNUSED_RELEASE(kernel_tensor);
-  UNUSED_RELEASE(bias_tensor);
-
-  // TODO handle unspecified output shape:
-  //      calculate output shape using ifm shape, kernel shape, padding, stride
-  const auto output_info = env->graph().operands().at(out_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Handle unspecified output shape
-    const auto &depth_conv_node =
-        nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
-    const auto infered_output_shape = shape_inference::inferDepthwiseConv2DShape(
-        in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
-        depth_conv_node.param());
-    env->allocateIfNeeded(
-        out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
-  }
-  else
-  {
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
-            const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param &param)
-{
-  // TODO Support NCHW frontend
-  const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  // Kernel format is [1, kernel_height, kernel_width, depth_out].
-  const auto &ker_shape = ker_tensor->tensorInfo().shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
-  const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
-                                            ker_width, ker_height);
-
-  // Calculate
-  float activation_min, activation_max;
-  calculateActivationRange(param.activation, &activation_min, &activation_max);
-
-  nnfw::cker::DepthwiseConvParams cker_param;
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.depth_multiplier = param.multiplier;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-  cker_param.dilation_width_factor = 1;
-  cker_param.dilation_height_factor = 1;
-  cker_param.float_activation_min = activation_min;
-  cker_param.float_activation_max = activation_max;
-
-  const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
-  const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
-  const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
-  const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
-  const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
-  const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
-  const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
-  float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
-  nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
-                            cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node);
-
-  const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
-  const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-  const auto ofm_tensor = env->tensorAt(ofm_index);
-
-  const auto data_type = ifm_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-
-} // namespace
-
-OpKernel *getDepthwiseConv2D()
-{
-  static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
deleted file mode 100644
index c8773bef4..000000000
--- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cmath>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-
-#include "ir/operation/ElementwiseActivation.h"
-
-#include <misc/polymorphic_downcast.h>
-#include <cker/operation/Logistic.h>
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class ActivationType
-{
-  Logistic,
-  ReLU,
-  Tanh
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(0);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-
-  const auto output_info = env->graph().operands().at(output_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Output's shape and type is same with input
-    auto input_info = input_tensor->tensorInfo();
-    // We can handle already allocated (ex. model output)
-    env->allocateIfNeeded(output_index, input_info);
-  }
-  else
-  {
-    env->allocateIfNeeded(output_index, output_info);
-  }
-
-  const auto output_tensor = env->tensorAt(output_index);
-  // Check shape and type lhs is same with output
-  // TODO Util function to compare TensorInfo
-  if (input_tensor->data_type() != output_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
-  }
-}
-
-template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
-               float beta)
-{
-  std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
-  switch (act_type)
-  {
-    case ActivationType::ReLU:
-      fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
-      break;
-    case ActivationType::Tanh:
-      fn = [](const float &in) { return std::tanh(in); };
-      break;
-    default:
-      throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
-      break;
-  }
-
-  const float *input_end = input_ptr + num_elements;
-  for (; input_ptr < input_end; input_ptr++, output_ptr++)
-  {
-    *output_ptr = fn(*input_ptr);
-  }
-}
-
-template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(0);
-  const auto output_index = node.getOutputs().at(0);
-
-  // Check lhs shape is same with rhs (with broadcast)
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto output_tensor = env->tensorAt(output_index);
-
-  const auto data_type = input_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    uint64_t elements = input_tensor->num_elements();
-    const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
-    float *out = reinterpret_cast<float *>(output_tensor->buffer());
-    if (act_type == ActivationType::Logistic)
-    {
-      const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
-      const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-      nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
-    }
-    else
-    {
-      const auto &act_node =
-          nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
-      evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
-                          act_node.param().beta);
-    }
-  }
-  else
-  {
-    throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
-  }
-}
-
-void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &act_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
-  switch (act_node.param().op_type)
-  {
-    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
-      invoke<ActivationType::Logistic>(env, node);
-      break;
-    case ir::operation::ElementwiseActivation::Type::RELU:
-      invoke<ActivationType::ReLU>(env, node);
-      break;
-    case ir::operation::ElementwiseActivation::Type::TANH:
-      invoke<ActivationType::Tanh>(env, node);
-      break;
-    default:
-      throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
-  }
-}
-
-} // namespace
-
-OpKernel *getElementwiseActivation()
-{
-  static OpKernel kernel = {prepare, invokeElementwiseActivation};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc
deleted file mode 100644
index 12f529dab..000000000
--- a/runtime/onert/core/src/interp/operations/FullyConnected.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/FullyConnected.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/FullyConnected.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace fc
-{
-
-void prepareFC(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
-  const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
-  const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto kernel_tensor = env->tensorAt(kernel_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-
-  UNUSED_RELEASE(in_tensor);
-  UNUSED_RELEASE(kernel_tensor);
-  UNUSED_RELEASE(bias_tensor);
-
-  assert(in_tensor->num_dimensions() >= 2);
-  assert(kernel_tensor->num_dimensions() == 2);
-  assert(bias_tensor->num_dimensions() == 1);
-
-  const auto input_size_with_batch = in_tensor->num_elements();
-  const auto num_units = kernel_tensor->dimension(0);
-  const auto input_size = kernel_tensor->dimension(1);
-  const auto batch_size = input_size_with_batch / input_size;
-  assert(input_size_with_batch % input_size == 0);
-  assert(num_units == bias_tensor->dimension(0));
-
-  // Make output tensor info
-  ir::Shape output_shape(2);
-  output_shape.dim(0) = batch_size;
-  output_shape.dim(1) = num_units;
-  const auto out_info =
-      ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
-  env->allocateIfNeeded(out_index, out_info);
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 2);
-  assert(out_tensor->dimension(0) == batch_size);
-  assert(out_tensor->dimension(1) == num_units);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
-            const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param &param)
-{
-  const auto ifm_buffer = ifm_tensor->bufferRO();
-  const auto ker_buffer = ker_tensor->bufferRO();
-  const auto bias_buffer = bias_tensor->bufferRO();
-  auto ofm_buffer = ofm_tensor->buffer();
-
-  // Calculate
-  nnfw::cker::FullyConnectedParams cker_param;
-  cker_param.activation = convertActivationType(param.activation);
-  calculateActivationRange(param.activation, &cker_param.float_activation_min,
-                           &cker_param.float_activation_max);
-  const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
-  const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
-  const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
-  const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
-  const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer);
-  const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer);
-  const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer);
-  float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer);
-
-  nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
-                             cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeFC(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &conv_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
-
-  const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
-  const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-  const auto ofm_tensor = env->tensorAt(ofm_index);
-
-  const auto data_type = ifm_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float only"};
-  }
-}
-} // namespace fc
-
-OpKernel *getFullyConnected()
-{
-  static OpKernel kernel = {fc::prepareFC, fc::invokeFC};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc
deleted file mode 100644
index 9e82def5f..000000000
--- a/runtime/onert/core/src/interp/operations/Gather.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Gather.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Gather.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareGather(ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
-  const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto indices_tensor = env->tensorAt(indices_index);
-
-  // TODO handle unspecified output shape:
-  //      calculate output shape using ifm shape, kernel shape, padding, stride
-  const auto output_info = env->graph().operands().at(output_index).info();
-  if (output_info.total_size() == 0)
-  {
-    throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"};
-  }
-  else
-  {
-    env->allocateIfNeeded(output_index, output_info);
-  }
-
-  if (indices_tensor->data_type() != ir::DataType::INT32)
-  {
-    throw std::runtime_error{"Interp(Gather): Invalid indices data type"};
-  }
-
-  auto output_tensor = env->tensorAt(output_index);
-  auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1;
-
-  if (output_rank != output_tensor->num_dimensions())
-  {
-    throw std::runtime_error{"Interp(Gather): Invalid output rank"};
-  }
-  if (output_tensor->data_type() != input_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(Gather): Invalid output data type"};
-  }
-
-  if (input_tensor->data_type() == ir::DataType::QUANT_UINT8_ASYMM &&
-      input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
-  {
-    throw std::runtime_error{
-        "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
-  }
-}
-
-template <typename raw_type>
-void invoke(const ITensor *input_tensors, const ITensor *indices_tensors,
-            const ITensor *output_tensor, uint32_t axis)
-{
-  // Calculate
-  nnfw::cker::GatherParams cker_param;
-  cker_param.axis = (int8_t)axis;
-
-  const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape());
-  const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape());
-  const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-  const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO());
-  const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO());
-  raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer());
-
-  nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape,
-                               indices_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeGather(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node);
-  const int32_t axis_raw = gather_node.param().axis;
-
-  const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
-  const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto indices_tensor = env->tensorAt(indices_index);
-  const auto output_tensor = env->tensorAt(output_index);
-  const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw;
-
-  const auto data_type = input_tensor->data_type();
-
-  switch (data_type)
-  {
-    case ir::DataType::FLOAT32:
-      invoke<float>(input_tensor, indices_tensor, output_tensor, axis);
-      break;
-    case ir::DataType::INT32:
-      invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis);
-      break;
-    case ir::DataType::QUANT_UINT8_ASYMM:
-      invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis);
-      break;
-    default:
-      throw std::runtime_error{"Interp(Gather): NYI - Not supported type"};
-  }
-}
-
-} // namespace
-
-OpKernel *getGather()
-{
-  static OpKernel kernel = {prepareGather, invokeGather};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
deleted file mode 100644
index 2538bcc39..000000000
--- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/InstanceNorm.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/InstanceNorm.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace instancenorm
-{
-
-void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
-{
-  const auto &instancenorm_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
-  const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
-  const auto output_index = node.getOutputs().at(0);
-  const auto input_tensor = env->tensorAt(input_index);
-
-  if (input_tensor->num_dimensions() != 4)
-  {
-    throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
-  }
-
-  // Output shape should be same with input
-  env->allocateIfNeeded(output_index, input_tensor->tensorInfo());
-
-  auto output_tensor = env->tensorAt(output_index);
-  UNUSED_RELEASE(output_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(input_tensor->data_type() == output_tensor->data_type());
-  assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape());
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params)
-{
-  params->float_activation_min = min;
-  params->float_activation_max = max;
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor,
-            const ITensor *output_tensor, const ir::operation::InstanceNorm::Param &param)
-{
-  // Calculate
-  float activation_min, activation_max;
-  calculateActivationRange(param.activation, &activation_min, &activation_max);
-
-  nnfw::cker::InstanceNormParams cker_param;
-  cker_param.epsilon = param.epsilon;
-  cker_param.float_activation_min = activation_min;
-  cker_param.float_activation_max = activation_max;
-
-  const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
-  const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape());
-  const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape());
-  const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-  const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO());
-  const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO());
-  const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO());
-  float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer());
-
-  nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr,
-                           cker_beta_shape, beta_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &instancenorm_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
-  const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
-  const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
-  const auto beta_index = node.getInputs().at(instancenorm_node.BETA);
-  const auto out_index = node.getOutputs().at(0);
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto gamma_tensor = env->tensorAt(gamma_index);
-  const auto beta_tensor = env->tensorAt(beta_index);
-  const auto out_tensor = env->tensorAt(out_index);
-  const auto data_type = input_tensor->data_type();
-
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Unsupported data type"};
-  }
-}
-} // namespace instancenorm
-
-OpKernel *getInstanceNorm()
-{
-  static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/OperationUtil.h b/runtime/onert/core/src/interp/operations/OperationUtil.h
deleted file mode 100644
index 2fdf098f0..000000000
--- a/runtime/onert/core/src/interp/operations/OperationUtil.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-#define __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-
-#include "ir/Shape.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-
-namespace onert
-{
-namespace interp
-{
-
-inline nnfw::cker::Shape convertShape(const ir::Shape &shape)
-{
-  auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
-  std::vector<int32_t> raw_shape;
-  raw_shape.resize(dimensions.size());
-
-  for (uint32_t i = 0; i < dimensions.size(); ++i)
-  {
-    raw_shape[i] = dimensions[i];
-  }
-
-  return nnfw::cker::GetShape(raw_shape);
-}
-
-inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape)
-{
-  auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
-  const int32_t extended_rank = 4;
-  int32_t raw_shape[extended_rank];
-  uint32_t start = extended_rank - dimensions.size();
-
-  for (uint32_t i = 0; i < extended_rank; ++i)
-  {
-    if (i < start)
-    {
-      raw_shape[i] = 1;
-    }
-    else
-    {
-      raw_shape[i] = dimensions[i - start];
-    }
-  }
-
-  return nnfw::cker::Shape(extended_rank, raw_shape);
-}
-
-inline nnfw::cker::FusedActivationFunctionType
-convertActivationType(const ir::Activation activation)
-{
-  switch (activation)
-  {
-    case ir::Activation::NONE:
-      return nnfw::cker::FusedActivationFunctionType::kNone;
-    case ir::Activation::RELU:
-      return nnfw::cker::FusedActivationFunctionType::kRelu;
-    case ir::Activation::RELU1:
-      return nnfw::cker::FusedActivationFunctionType::kRelu1;
-    case ir::Activation::RELU6:
-      return nnfw::cker::FusedActivationFunctionType::kRelu6;
-    default:
-      throw std::runtime_error{"CPU backend: Cannot convert activation type"};
-  }
-}
-
-template <typename T>
-void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    throw std::runtime_error{"Unsupported activation type"};
-  }
-}
-
-inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success)
-{
-  int lhs_rank = lhs.rank();
-  int rhs_rank = rhs.rank();
-
-  int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank);
-  ir::Shape out_shape(out_rank);
-
-  int lhs_idim = lhs_rank - 1;
-  int rhs_idim = rhs_rank - 1;
-  success = true;
-  for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--)
-  {
-    if (lhs_idim == -1 && rhs_idim == -1)
-    {
-      // invalid result
-      success = false;
-      break;
-    }
-
-    if (lhs_idim == -1)
-    {
-      out_shape.dim(out_idim) = rhs.dim(rhs_idim);
-      rhs_idim--;
-    }
-    else if (rhs_idim == -1)
-    {
-      out_shape.dim(out_idim) = lhs.dim(lhs_idim);
-      lhs_idim--;
-    }
-    else
-    {
-      if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim))
-      {
-        out_shape.dim(out_idim) = lhs.dim(lhs_idim);
-        lhs_idim--;
-        rhs_idim--;
-      }
-      else if (lhs.dim(lhs_idim) == 1)
-      {
-        out_shape.dim(out_idim) = rhs.dim(rhs_idim);
-        lhs_idim--;
-        rhs_idim--;
-      }
-      else if (rhs.dim(rhs_idim) == 1)
-      {
-        out_shape.dim(out_idim) = lhs.dim(lhs_idim);
-        lhs_idim--;
-        rhs_idim--;
-      }
-      else
-      {
-        // invalid result
-        success = false;
-        break;
-      }
-    }
-  }
-
-  if (lhs_idim != -1 || rhs_idim != -1)
-  {
-    // invalid result
-    success = false;
-  }
-  return out_shape;
-}
-
-inline nnfw::cker::PaddingType convertPaddingType(ir::PaddingType ir_padding_type)
-{
-  switch (ir_padding_type)
-  {
-    case ir::PaddingType::EXPLICIT:
-      return nnfw::cker::PaddingType::kNone;
-    case ir::PaddingType::SAME:
-      return nnfw::cker::PaddingType::kSame;
-    case ir::PaddingType::VALID:
-      return nnfw::cker::PaddingType::kValid;
-    default:
-      throw std::runtime_error("Wrong padding type.");
-      break;
-  }
-}
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc
deleted file mode 100644
index c8dce698d..000000000
--- a/runtime/onert/core/src/interp/operations/Pad.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Pad.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Pad.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void preparePad(ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-
-  const auto output_info = env->graph().operands().at(output_index).info();
-
-  // Check shape and type lhs is same with rhs
-  // TODO Util function to compare TensorInfo
-  if (output_info.total_size() == 0)
-  {
-    throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"};
-  }
-  else
-  {
-    env->allocateIfNeeded(output_index, output_info);
-  }
-
-  const auto output_tensor = env->tensorAt(output_index);
-  if (input_tensor->data_type() != output_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(Pad): Invalid output type"};
-  }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor)
-{
-  const auto input_buffer = input_tensor->bufferRO();
-  const auto pad_buffer = pad_tensor->bufferRO();
-  auto output_buffer = output_tensor->buffer();
-
-  int32_t pad_rank = pad_tensor->dimension(0);
-
-  const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
-  const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-  const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
-  const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
-  float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
-  nnfw::cker::Pad<float>(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape,
-                         output_ptr, nullptr);
-}
-
-void invokePad(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
-  const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto pad_tensor = env->tensorAt(pad_index);
-  const auto output_tensor = env->tensorAt(output_index);
-
-  const auto data_type = input_tensor->data_type();
-
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(input_tensor, pad_tensor, output_tensor);
-  }
-  else
-  {
-    throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"};
-  }
-}
-} // namespace
-
-OpKernel *getPad()
-{
-  static OpKernel kernel = {preparePad, invokePad};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
deleted file mode 100644
index 92f9d70b2..000000000
--- a/runtime/onert/core/src/interp/operations/Pool2D.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/AveragePool.h>
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Pool2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace pool2d
-{
-
-void preparePool2D(ExecEnv *env, const ir::Operation &node)
-{
-  const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
-  const auto in_index = node.getInputs().at(pool_node.INPUT);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  UNUSED_RELEASE(in_tensor);
-
-  assert(in_tensor->num_dimensions() == 4);
-
-  const auto output_info = env->graph().operands().at(out_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Handle unspecified output shape
-    const auto infered_output_shape =
-        shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
-    env->allocateIfNeeded(
-        out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
-  }
-  else
-  {
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 4);
-}
-
-template <typename T>
-void invoke(const nnfw::cker::PoolParams &params, const nnfw::cker::Shape &in_shape,
-            const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
-            ir::operation::Pool2D::PoolType op_type)
-{
-  switch (op_type)
-  {
-    case ir::operation::Pool2D::PoolType::AVG:
-      nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
-      break;
-    case ir::operation::Pool2D::PoolType::MAX:
-      nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
-      break;
-    default:
-      throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
-      break;
-  }
-}
-
-void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
-
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  // Check lhs shape is same with rhs (with broadcast)
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto out_tensor = env->tensorAt(out_index);
-
-  // TODO support NCHW frontend
-  const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto param = pool_node.param();
-  const auto padding =
-      ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
-  // Calculate
-  nnfw::cker::PoolParams cker_param;
-  cker_param.filter_width = param.kw;
-  cker_param.filter_height = param.kh;
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-
-  const auto data_type = in_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    calculateActivationRange(param.activation, &cker_param.float_activation_min,
-                             &cker_param.float_activation_max);
-
-    const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
-    const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-    const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
-    float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-    // Now, invoke() supports only Pool2D in float
-    invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float only"};
-  }
-}
-} // namespace pool2d
-
-OpKernel *getPool2D()
-{
-  static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc
deleted file mode 100644
index 3a118456b..000000000
--- a/runtime/onert/core/src/interp/operations/Reshape.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "interp/Registration.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  // Unspecified shape is not supported in operation node spec now
-  const auto output_info = env->graph().operands().at(out_index).info();
-  env->allocateAndShareIfNeeded(out_index, output_info, in_index);
-
-  assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size());
-}
-
-void invoke(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO())
-  {
-    // Same data
-    return;
-  }
-
-  const auto output_info = env->graph().operands().at(out_index).info();
-  memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(),
-         output_info.total_size());
-}
-
-} // namespace
-
-OpKernel *getReshape()
-{
-  static OpKernel kernel = {prepare, invoke};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
deleted file mode 100644
index d30f78deb..000000000
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/SoftMax.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Softmax.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  UNUSED_RELEASE(in_tensor);
-
-  assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2));
-
-  // Output shape should be same with input
-  // Output type is pre-defined in model
-  const auto output_shape = env->graph().operands().at(in_index).info().shape();
-  const auto output_type = env->graph().operands().at(out_index).info().typeInfo();
-
-  const auto output_info = ir::OperandInfo::createStaticInfo(output_shape, output_type);
-  env->allocateIfNeeded(out_index, output_info);
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Check output shape is same with input
-  assert(out_tensor->num_dimensions() == out_tensor->num_dimensions());
-  for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++)
-  {
-    assert(in_tensor->dimension(i) == out_tensor->dimension(i));
-  }
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
-            const ir::operation::Softmax::Param &param)
-{
-  const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
-  float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
-  float beta = param.beta;
-
-  if (in_tensor->num_dimensions() == 2)
-  {
-    uint32_t batch_size = in_tensor->dimension(0);
-    uint32_t input_size = in_tensor->dimension(1);
-
-    nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
-  }
-  else if (in_tensor->num_dimensions() == 4)
-  {
-    const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
-    const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
-    nnfw::cker::SoftmaxParams cker_param;
-    cker_param.beta = beta;
-
-    nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-  }
-  else
-  {
-    throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"};
-  }
-}
-
-void invokeSoftMax(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node);
-
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto out_tensor = env->tensorAt(out_index);
-
-  const auto in_data_type = in_tensor->data_type();
-  const auto out_data_type = out_tensor->data_type();
-  if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32))
-  {
-    invoke(in_tensor, out_tensor, softmax_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-
-} // namespace
-
-OpKernel *getSoftmax()
-{
-  static OpKernel kernel = {prepareSoftMax, invokeSoftMax};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc
deleted file mode 100644
index cc2ced26b..000000000
--- a/runtime/onert/core/src/interp/operations/TransposeConv.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/TransposeConv.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
-{
-  const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
-  const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
-
-  assert(ifm_tensor->num_dimensions() == 4);
-  assert(ker_tensor->num_dimensions() == 4);
-  assert(ofm_shape_tensor->num_dimensions() == 1);
-
-  UNUSED_RELEASE(ifm_tensor);
-  UNUSED_RELEASE(ker_tensor);
-  UNUSED_RELEASE(ofm_shape_tensor);
-
-  const auto output_info = env->graph().operands().at(ofm_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // TODO: Handle unspecified output shape
-    throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
-  }
-  else
-  {
-    env->allocateIfNeeded(ofm_index, output_info);
-  }
-
-  auto ofm_tensor = env->tensorAt(ofm_index);
-  UNUSED_RELEASE(ofm_tensor);
-
-  // Handle same ifm & ofm data type only
-  if (ifm_tensor->data_type() != ofm_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
-  }
-
-  if (ofm_tensor->num_dimensions() != 4)
-  {
-    throw std::runtime_error{"Interp(TConv): Invalid output rank"};
-  }
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor,
-            const ir::operation::TransposeConv::Param &param)
-{
-  const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
-  const auto ker_shape = ker_tensor->tensorInfo().shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
-  const auto padding = ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride,
-                                            ker_width, ker_height);
-
-  nnfw::cker::TransposeConvParams cker_param;
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-  cker_param.dilation_width_factor = 1;
-  cker_param.dilation_height_factor = 1;
-
-  const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
-  const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
-  const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
-  const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
-  const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
-  float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
-  nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
-                            cker_ofm_shape, ofm_ptr);
-}
-
-void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &tconv_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
-
-  const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto ofm_tensor = env->tensorAt(ofm_index);
-
-  const auto data_type = ifm_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"Interp(TConv): Support float32 only"};
-  }
-}
-
-} // namespace
-
-OpKernel *getTransposeConv()
-{
-  static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc
index 80c659b3a..07670c720 100644
--- a/runtime/onert/core/src/ir/DataType.cc
+++ b/runtime/onert/core/src/ir/DataType.cc
@@ -41,11 +41,17 @@ size_t sizeOfDataType(DataType data_type)
     case DataType::UINT8:
       return sizeof(uint8_t);
     case DataType::QUANT_INT8_SYMM:
+    case DataType::QUANT_INT8_ASYMM:
+    case DataType::QUANT_INT8_SYMM_PER_CHANNEL:
       return sizeof(int8_t);
     case DataType::FLOAT16:
       return sizeof(float16);
     case DataType::INT64:
       return sizeof(int64_t);
+    case DataType::QUANT_INT16_ASYMM:
+      return sizeof(int16_t);
+    case DataType::QUANT_INT16_SYMM:
+      return sizeof(int16_t);
     default:
       throw std::runtime_error{"Unsupported type size"};
   }
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index fe8b1b443..306572c99 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -16,18 +16,10 @@
 
 #include "ir/Graph.h"
 
-#include <algorithm>
-#include <bitset>
-#include <sstream>
-
-#include "util/logging.h"
+#include "OperationValidator.h"
 #include "verifier/Verifier.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/operand/LowerInfo.h"
-#include "ir/operand/PermuteFactor.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/GraphIterator.h"
-#include "backend/IConfig.h"
+
+#include "util/Set.h"
 
 namespace onert
 {
@@ -36,6 +28,8 @@ namespace ir
 
 Graph::Graph() = default;
 
+Graph::Graph(const Graph &) = default;
+
 Graph::~Graph(void) = default;
 
 OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
@@ -43,22 +37,91 @@ OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
   return _operands.emplace(shape, type);
 }
 
-OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&node)
+OperandIndex Graph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand)
+{
+  return _operands.push(std::move(operand), index);
+}
+
+bool Graph::checkOperandsForOperation(const IOperation &operation)
 {
-  assert(isBuildingPhase());
-  return _operations.push(std::move(node));
+  auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  for (auto &&input : inputs)
+    if (!operands().exist(input))
+      return false;
+  for (auto &&input : outputs)
+    if (!operands().exist(input))
+      return false;
+  return true;
+}
+
+void Graph::linkOperandToOperation(OperationIndex index, const IOperation &operation)
+{
+  auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+  auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+  for (auto &&input : inputs)
+    operands().at(input).insertUse(index);
+  for (auto &&output : outputs)
+    operands().at(output).setDef(index);
+}
+
+OperationIndex Graph::addOperation(std::unique_ptr<IOperation> &&operation)
+{
+  const IOperation &op_ref = *operation;
+  if (!checkOperandsForOperation(op_ref))
+    return OperationIndex{};
+  auto ind = _operations.push(std::move(operation));
+  if (ind.valid())
+    linkOperandToOperation(ind, op_ref);
+  return ind;
+}
+
+OperationIndex Graph::addOperation(OperationIndex index, std::unique_ptr<IOperation> &&operation)
+{
+  const IOperation &op_ref = *operation;
+  if (!checkOperandsForOperation(op_ref))
+    return OperationIndex{};
+  auto ind_gen = _operations.push(std::move(operation), index);
+  if (ind_gen.valid())
+  {
+    assert(ind_gen == index);
+    linkOperandToOperation(index, op_ref);
+  }
+  return index;
+}
+
+OperationIndex Graph::replaceOperation(OperationIndex index,
+                                       std::unique_ptr<IOperation> &&operation)
+{
+  const IOperation &op_ref = *operation;
+  if (!checkOperandsForOperation(op_ref) || !_operations.exist(index))
+    return OperationIndex{};
+
+  // Check the new operation has the same inputs/outputs as the existing operation
+  const auto &old_op = _operations.at(index);
+  if (!(old_op.getInputs() == op_ref.getInputs() && old_op.getOutputs() == op_ref.getOutputs()))
+  {
+    return OperationIndex{};
+  }
+
+  return _operations.set(index, std::move(operation));
 }
 
 void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
 {
-  assert(isBuildingPhase());
   assert(_operands.exist(ind));
   _operands.at(ind).data(std::move(data));
 }
 
+void Graph::changeShape(const OperandIndex &ind, const ir::Shape &new_shape)
+{
+  assert(_operands.exist(ind));
+  _operands.at(ind).info().shape(new_shape);
+}
+
 void Graph::addInput(const OperandIndex &ind, const std::string &name)
 {
-  assert(isBuildingPhase());
   if (!name.empty())
     _name_to_input.emplace(name, IOIndex{_inputs.size()});
   _inputs.append(ind);
@@ -66,7 +129,6 @@ void Graph::addInput(const OperandIndex &ind, const std::string &name)
 
 void Graph::addOutput(const OperandIndex &ind, const std::string &name)
 {
-  assert(isBuildingPhase());
   if (!name.empty())
     _name_to_output.emplace(name, IOIndex{_outputs.size()});
   _outputs.append(ind);
@@ -84,62 +146,70 @@ IOIndex Graph::getOutputIndex(const std::string &name) const
   return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
 }
 
-void Graph::finishBuilding(void)
+void Graph::verify(void) const
 {
-  assert(isBuildingPhase());
-  _phase = Phase::MODEL;
-
-  initializeUseDef();
-  sweepGarbageOperands();
-
   // Call graph verifications for the MODEL phase
   {
-    assert(verifier::DAGChecker().verify(*this));
-    assert(verifier::EdgeConsistencyChecker().verify(*this));
+    // Except for edge consistency, the user might have been given a bad model
+    // so here it throws an execption rather than assertion.
+    if (!verifier::InputOutputChecker().verify(*this))
+      throw std::runtime_error{"One of model input and output operands does not exist."};
+    if (!verifier::DAGChecker().verify(*this))
+      throw std::runtime_error{"The graph is cyclic."};
+    assert(verifier::EdgeChecker().verify(*this));
   }
+
+  // Check shape independent operation feature
+  // - Operand type
+  // - Shape independent parameter
+  OperationValidator{*this}();
 }
 
 void Graph::initializeUseDef()
 {
-  operations().iterate([&](const OperationIndex &index, const Operation &node) -> void {
-    auto outputs = node.getOutputs();
-    for (auto output : outputs)
+  operations().iterate([&](const OperationIndex &index, const IOperation &node) -> void {
+    const auto &outputs = node.getOutputs();
+    for (auto &&output : outputs | ir::Remove::UNDEFINED)
     {
       operands().at(output).setDef(index);
     }
 
-    for (auto input : node.getInputs() | ir::Remove::UNDEFINED)
+    for (auto &&input : node.getInputs() | ir::Remove::UNDEFINED)
     {
       operands().at(input).insertUse(index);
     }
   });
 }
 
-void Graph::sweepGarbageOperands()
+std::vector<ir::OperationIndex> Graph::topolSortOperations() const
 {
-  // Remove operands that are not used by any operations, except Graph inputs/outputs
-  ir::OperandIndexMap<bool> visited;
-
-  operations().iterate([&](const OperationIndex &, const Operation &node) {
-    for (auto ind : node.getInputs() + node.getOutputs())
-    {
-      visited[ind] = true;
-    }
-  });
-
-  // Graph's inputs/outputs are always reachable
-  for (auto ind : getInputs() + getOutputs())
-  {
-    visited[ind] = true;
-  }
-
-  operands().iterate([&](const OperandIndex &ind, const Operand &) {
-    if (!visited[ind])
+  std::vector<ir::OperationIndex> ret;
+  util::Set<ir::OperationIndex> unvisited;
+  operations().iterate(
+    [&](const ir::OperationIndex &index, const ir::IOperation &) { unvisited.add(index); });
+
+  std::function<void(const ir::OperationIndex &, const ir::IOperation &)> dfs =
+    [&](const ir::OperationIndex &index, const ir::IOperation &op) -> void {
+    if (!unvisited.contains(index))
+      return;
+    unvisited.remove(index);
+
+    for (const auto &output : op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
     {
-      VERBOSE(Graph::sweepGarbageOperands) << "Sweep garbage operand " << ind.value() << std::endl;
-      operands().remove(ind);
+      const auto &operand = operands().at(output);
+      for (const auto &use : operand.getUses())
+      {
+        dfs(use, operations().at(use));
+      }
     }
-  });
+    ret.push_back(index);
+  };
+  operations().iterate(dfs);
+
+  assert(unvisited.empty()); // All of the nodes must have been visited
+  // Reversing Postorder DFS result to make it sorted in topoligical order
+  std::reverse(ret.begin(), ret.end());
+  return ret;
 }
 
 } // namespace ir
diff --git a/runtime/onert/core/src/ir/Graph.test.cc b/runtime/onert/core/src/ir/Graph.test.cc
new file mode 100644
index 000000000..144500745
--- /dev/null
+++ b/runtime/onert/core/src/ir/Graph.test.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+
+#include <gtest/gtest.h>
+
+TEST(Graph, neg_inputs_and_outputs)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::OperandIndex index0{0u};
+  onert::ir::OperandIndex index1{1u};
+
+  graph.addInput({index0});
+  graph.addInput({index1});
+
+  onert::ir::OperandIndex index10{10u};
+  onert::ir::OperandIndex index11{11u};
+  onert::ir::OperandIndex index12{12u};
+
+  graph.addOutput({index10});
+  graph.addOutput({index11});
+  graph.addOutput({index12});
+
+  ASSERT_EQ(graph.getInputs().size(), 2);
+  ASSERT_EQ(graph.getOutputs().size(), 3);
+
+  onert::ir::IOIndex io_index0{0};
+  onert::ir::IOIndex io_index1{1};
+  onert::ir::IOIndex io_index2{2};
+
+  ASSERT_EQ(graph.getInputs().at(io_index0), 0);
+  ASSERT_EQ(graph.getInputs().at(io_index1), 1);
+
+  ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
+  ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
+  ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
+
+  EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
+}
+
+using namespace onert::ir;
+
+OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
+                               const OperandIndexSequence outputs)
+{
+  // Add "ADD" operation
+  operation::BinaryArithmetic::Param param;
+  param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+  param.activation = Activation::NONE;
+  return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
+}
+
+TEST(Graph, OneOpGraphSimpleValid)
+{
+  // Simple Graph with just one Add operation
+
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto lhs = graph.addOperand(shape, type);
+  auto rhs = graph.addOperand(shape, type);
+  auto res = graph.addOperand(shape, type);
+
+  addAddOperation(graph, {lhs, rhs}, {res});
+
+  // Set model inputs/outputs
+  graph.addInput(lhs);
+  graph.addInput(rhs);
+  graph.addOutput(res);
+
+  graph.verify();
+
+  SUCCEED();
+}
+
+TEST(Graph, neg_InvalidGraph_BadInput)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+  graph.addInput(OperandIndex{89}); // Non-exisiting operand!
+
+  EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidGraph_BadOutput)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+  graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
+
+  EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidAddOperation_BadInputIndex)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto lhs = graph.addOperand(shape, type);
+  auto rhs = graph.addOperand(shape, type);
+  auto res = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(lhs);
+  graph.addInput(rhs);
+  graph.addOutput(res);
+
+  ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid());
+}
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc
deleted file mode 100644
index 4bea1a55d..000000000
--- a/runtime/onert/core/src/ir/GraphIterator.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GraphIterator.h"
-
-#include "ir/OperationIndexMap.h"
-#include "compiler/LoweredGraph.h"
-
-namespace onert
-{
-namespace ir
-{
-
-//
-// Graph::DefaultIterator
-//
-
-template <bool is_const>
-void DefaultIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
-  graph.operations().iterate(
-      [&](const OperationIndex &index, NodeRef node) -> void { fn(index, node); });
-}
-
-//
-// Graph::PostDfsIterator
-//
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
-  assert(!graph.isBuildingPhase()); // Restrict iteration condition
-
-  OperationIndexMap<bool> visited;
-  graph.operations().iterate([&](const OperationIndex &index, NodeRef) { visited[index] = false; });
-
-  std::function<void(const OperationIndex &, NodeRef)> dfs_recursive =
-      [&](const OperationIndex &index, NodeRef node) -> void {
-    if (visited[index])
-      return;
-    visited[index] = true;
-
-    for (const auto output : node.getOutputs() | Remove::DUPLICATED)
-    {
-      const auto &operand = graph.operands().at(output);
-      for (const auto &use : operand.getUses())
-      {
-        dfs_recursive(use, graph.operations().at(use));
-      }
-    }
-
-    fn(index, node);
-  };
-
-  graph.operations().iterate(dfs_recursive);
-
-  // All of the operations(nodes) must have been visited.
-  assert(std::all_of(visited.begin(), visited.end(),
-                     [](const std::pair<const OperationIndex, bool> &v) { return v.second; }));
-}
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterateOpSeqs(LoweredGraphRef lowered_graph,
-                                              const OpSeqIterFn &fn) const
-{
-  std::unordered_map<OpSequenceIndex, bool> visited;
-  lowered_graph.op_seqs().iterate(
-      [&](const OpSequenceIndex &index, OpSequenceRef) { visited[index] = false; });
-
-  std::function<void(const OpSequenceIndex &, OpSequenceRef)> dfs_recursive =
-      [&](const OpSequenceIndex &index, OpSequenceRef op_seq) -> void {
-    if (visited[index])
-      return;
-    visited[index] = true;
-
-    for (const auto output : op_seq.getOutputs() | Remove::DUPLICATED)
-    {
-      const auto &operand = lowered_graph.graph().operands().at(output);
-      for (const auto &use : operand.getUses())
-      {
-        const auto use_op_seq_index = lowered_graph.op_seqs().getOperation(use);
-        dfs_recursive(use_op_seq_index, lowered_graph.op_seqs().at(use_op_seq_index));
-      }
-    }
-
-    fn(index, op_seq);
-  };
-
-  lowered_graph.op_seqs().iterate(dfs_recursive);
-
-  // All of the operations(nodes) must have been visited.
-  assert(std::all_of(visited.begin(), visited.end(),
-                     [](const std::pair<const OpSequenceIndex, bool> &v) { return v.second; }));
-}
-
-// Explicit instantiations to have implementation in the source file.
-// NOTE If these instatiations were in the top of this file, `iterate` is compiled and saved in
-//      `GraphIterator.cc.o` but `iterateOpSeqs`. This happens only when cross-building for Android.
-//      (Maybe a bug of NDK toolchain(clang)?)
-
-template class DefaultIterator<true>;
-template class DefaultIterator<false>;
-
-template class PostDfsIterator<true>;
-template class PostDfsIterator<false>;
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h
deleted file mode 100644
index b54314e0e..000000000
--- a/runtime/onert/core/src/ir/GraphIterator.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_GRAPH_ITERATOR_H__
-#define __ONERT_IR_GRAPH_ITERATOR_H__
-
-#include <type_traits>
-
-#include "ir/Index.h"
-
-namespace onert
-{
-namespace compiler
-{
-class LoweredGraph;
-} // namespace compiler
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-
-class Graph;
-class Operation;
-class OpSequence;
-
-template <bool is_const> class Iterator
-{
-public:
-  using GraphRef = typename std::conditional<is_const, const Graph &, Graph &>::type;
-  using IndexRef = const OperationIndex &;
-  using NodeRef = typename std::conditional<is_const, const Operation &, Operation &>::type;
-  using IterFn = std::function<void(IndexRef, NodeRef)>;
-
-public:
-  virtual ~Iterator() = default;
-  virtual void iterate(GraphRef graph, const IterFn &fn) const = 0;
-};
-
-template <bool is_const = false> class DefaultIterator final : public Iterator<is_const>
-{
-public:
-  using GraphRef = typename Iterator<is_const>::GraphRef;
-  using IndexRef = typename Iterator<is_const>::IndexRef;
-  using NodeRef = typename Iterator<is_const>::NodeRef;
-  using IterFn = typename Iterator<is_const>::IterFn;
-
-public:
-  void iterate(GraphRef graph, const IterFn &fn) const;
-};
-using DefaultConstIterator = DefaultIterator<true>;
-
-template <bool is_const = false> class PostDfsIterator final : public Iterator<is_const>
-{
-public:
-  using GraphRef = typename Iterator<is_const>::GraphRef;
-  using IndexRef = typename Iterator<is_const>::IndexRef;
-  using NodeRef = typename Iterator<is_const>::NodeRef;
-  using IterFn = typename Iterator<is_const>::IterFn;
-  using LoweredGraphRef =
-      typename std::conditional<is_const, const typename compiler::LoweredGraph &,
-                                typename compiler::LoweredGraph &>::type;
-  using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
-  using OpSeqIndexRef = const OpSequenceIndex &;
-  using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
-
-public:
-  void iterate(GraphRef graph, const IterFn &fn) const;
-  void iterateOpSeqs(LoweredGraphRef lowered_graph, const OpSeqIterFn &f) const;
-};
-using PostDfsConstIterator = PostDfsIterator<true>;
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_GRAPH_ITERATOR_H__
diff --git a/runtime/onert/core/src/ir/LayoutSet.cc b/runtime/onert/core/src/ir/LayoutSet.cc
index bd3f438ad..732460aa2 100644
--- a/runtime/onert/core/src/ir/LayoutSet.cc
+++ b/runtime/onert/core/src/ir/LayoutSet.cc
@@ -23,7 +23,7 @@ namespace ir
 
 LayoutSet::LayoutSet(std::initializer_list<Layout> layouts)
 {
-  for (auto layout : layouts)
+  for (auto &&layout : layouts)
   {
     _set.insert(layout);
   }
@@ -32,7 +32,7 @@ LayoutSet::LayoutSet(std::initializer_list<Layout> layouts)
 LayoutSet LayoutSet::operator|(const LayoutSet &other) const
 {
   auto ret = *this;
-  for (auto layout : other)
+  for (auto &&layout : other)
   {
     ret.add(layout);
   }
@@ -42,7 +42,7 @@ LayoutSet LayoutSet::operator|(const LayoutSet &other) const
 LayoutSet LayoutSet::operator&(const LayoutSet &other) const
 {
   LayoutSet ret;
-  for (auto layout : other)
+  for (auto &&layout : other)
   {
     if (contains(layout))
     {
@@ -55,7 +55,7 @@ LayoutSet LayoutSet::operator&(const LayoutSet &other) const
 LayoutSet LayoutSet::operator-(const LayoutSet &other) const
 {
   auto ret = *this;
-  for (auto layout : other)
+  for (auto &&layout : other)
   {
     ret.remove(layout);
   }
diff --git a/runtime/onert/core/src/ir/LayoutSet.h b/runtime/onert/core/src/ir/LayoutSet.h
index 6ce4e38c6..be077f2f0 100644
--- a/runtime/onert/core/src/ir/LayoutSet.h
+++ b/runtime/onert/core/src/ir/LayoutSet.h
@@ -17,6 +17,7 @@
 #ifndef __ONERT_IR_LAYOUT_SET_H__
 #define __ONERT_IR_LAYOUT_SET_H__
 
+#include <cstdint>
 #include <initializer_list>
 #include <unordered_set>
 
diff --git a/runtime/onert/core/src/ir/LayoutSet.test.cc b/runtime/onert/core/src/ir/LayoutSet.test.cc
new file mode 100644
index 000000000..fc956abe8
--- /dev/null
+++ b/runtime/onert/core/src/ir/LayoutSet.test.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayoutSet.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::Layout;
+using onert::ir::LayoutSet;
+
+TEST(ir_LayoutSet, neg_add_remove)
+{
+  LayoutSet set{Layout::NCHW};
+  set.remove(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 2);
+  set.remove(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.remove(Layout::NCHW);
+  ASSERT_EQ(set.size(), 0);
+  set.remove(Layout::NCHW);
+  ASSERT_EQ(set.size(), 0);
+}
+
+TEST(ir_LayoutSet, neg_add_twice)
+{
+  LayoutSet set;
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+}
+
+TEST(ir_LayoutSet, set_operators)
+{
+  LayoutSet set1{Layout::NCHW};
+  LayoutSet set2{Layout::NHWC};
+  LayoutSet set3 = set1 | set2;
+
+  ASSERT_EQ(set3.size(), 2);
+
+  ASSERT_EQ((set3 - set1).size(), 1);
+  ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
+  ASSERT_EQ((set3 - set2).size(), 1);
+  ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
+  ASSERT_EQ((set3 - set3).size(), 0);
+
+  ASSERT_EQ((set3 & set1).size(), 1);
+  ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
+  ASSERT_EQ((set3 & set2).size(), 1);
+  ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
+  ASSERT_EQ((set1 & set2).size(), 0);
+}
diff --git a/runtime/onert/core/src/ir/MockNode.h b/runtime/onert/core/src/ir/MockNode.h
new file mode 100644
index 000000000..0e7ed977b
--- /dev/null
+++ b/runtime/onert/core/src/ir/MockNode.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__
+#define __ONERT_TEST_GRAPH_MOCK_NODE_H__
+
+#include "ir/Operation.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert_test
+{
+namespace ir
+{
+
+class SimpleMock : public onert::ir::Operation
+{
+public:
+  SimpleMock(const onert::ir::OperandIndexSequence &inputs,
+             const onert::ir::OperandIndexSequence &outputs)
+    : Operation{onert::ir::OperandConstraint::createAny()}
+  {
+    setInputs(inputs);
+    setOutputs(outputs);
+  }
+
+public:
+  void accept(onert::ir::OperationVisitor &) const override {}
+  onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; }
+};
+
+} // namespace ir
+} // namespace onert_test
+
+#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__
diff --git a/runtime/onert/core/src/ir/OpSequence.cc b/runtime/onert/core/src/ir/OpSequence.cc
deleted file mode 100644
index e2b989d8c..000000000
--- a/runtime/onert/core/src/ir/OpSequence.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequence.h"
-
-#include "ir/Operations.h"
-#include "ir/OperationVisitor.h"
-#include <sstream>
-
-namespace
-{
-
-std::string getStrFromIndice(const onert::ir::OperandIndexSequence &indice)
-{
-  std::string str;
-  for (const auto &ind : indice)
-  {
-    str += std::to_string(ind.value());
-    str.push_back(',');
-  }
-  if (str.back() == ',')
-    str.pop_back();
-
-  return str;
-}
-}
-
-namespace onert
-{
-namespace ir
-{
-
-OpSequence::OpSequence(Layout layout) : _layout{layout}, _has_dynamic_tensor{false}
-{
-  // DO NOTHING
-}
-
-void OpSequence::accept(OperationVisitor &v) const { v.visit(*this); }
-
-// TODO: Impl Dumper instead of this method
-std::string getStrFromOpSeq(const OpSequence &op_seq, const Operations &operations)
-{
-  // "  OpSequence IN(0,1,2) -> { op0(0,1,2:3), op1(3:4), op2(4:5) } -> OUT(5)"
-  std::stringstream ss;
-  ss << "  OpSequence IN(" << getStrFromIndice(op_seq.getInputs()) << ") -> {";
-  for (const auto &op_idx : op_seq)
-  {
-    ss << " " << op_idx.value() << "(" << operations.at(op_idx).name() << ":"
-       << getStrFromIndice(operations.at(op_idx).getInputs()) << ":"
-       << getStrFromIndice(operations.at(op_idx).getOutputs()) << ")";
-  }
-  ss << " } -> OUT(" << getStrFromIndice(op_seq.getOutputs()) << ")";
-  return ss.str();
-}
-
-void OpSequence::remove(const OperationIndex &index)
-{
-  assert(exist(index));
-  for (auto it = _operations.cbegin(); it != _operations.cend(); ++it)
-  {
-    if (*it == index)
-    {
-      _operations.erase(it);
-      break;
-    }
-  }
-}
-
-bool OpSequence::exist(const OperationIndex &index) const
-{
-  for (const auto &inner_op_idx : _operations)
-  {
-    if (inner_op_idx == index)
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc
deleted file mode 100644
index 68884783e..000000000
--- a/runtime/onert/core/src/ir/OpSequences.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequences.h"
-#include "util/logging.h"
-#include <memory>
-
-#include <cassert>
-#include <string>
-
-namespace onert
-{
-namespace ir
-{
-
-OpSequenceIndex OpSequences::emplace(const OperationIndex &index, Layout layout)
-{
-  std::unique_ptr<OpSequence> op_seq = std::make_unique<OpSequence>(layout);
-  op_seq->appendOperation(index);
-  const OpSequenceIndex &seq_index = push(std::move(op_seq));
-  cacheSequenceIndex(seq_index, index);
-  return seq_index;
-}
-
-OpSequenceIndex OpSequences::emplace(std::unique_ptr<OpSequence> &&op_seq)
-{
-  auto &operations = op_seq->operations();
-  const OpSequenceIndex &seq_index = push(std::move(op_seq));
-  for (const auto &op_idx : operations)
-  {
-    cacheSequenceIndex(seq_index, op_idx);
-  }
-  return seq_index;
-}
-
-void OpSequences::cacheSequenceIndex(const OpSequenceIndex &seq_index,
-                                     const OperationIndex &op_index) const
-{
-  _seq_indexes.emplace(op_index, seq_index);
-}
-
-OpSequenceIndex *OpSequences::findSequenceIndex(const OperationIndex &operation_index) const
-{
-  // If opration_index is cached, return sequence_index from cache
-  if (_seq_indexes.count(operation_index))
-  {
-    auto &op_seq_index = _seq_indexes.at(operation_index);
-    if (_objects.count(op_seq_index) && _objects.at(op_seq_index)->exist(operation_index))
-    {
-      return &op_seq_index;
-    }
-    else
-    {
-      _seq_indexes.erase(operation_index);
-      return nullptr;
-    }
-  }
-  return nullptr;
-}
-
-bool OpSequences::containsOperation(const OperationIndex &operation_index) const
-{
-  return findOperation(operation_index).valid();
-}
-
-OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index) const
-{
-  OpSequenceIndex ret = findOperation(operation_index);
-  assert(ret.valid());
-  return ret;
-}
-
-void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
-{
-  const auto op_seq_index = findOperation(operation_index);
-  auto &op_seq = at(op_seq_index);
-  _seq_indexes.erase(operation_index);
-  op_seq.remove(operation_index);
-  if (op_seq.size() == 0)
-  {
-    remove(op_seq_index);
-  }
-}
-
-OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index) const
-{
-  if (OpSequenceIndex *op_seq_index = findSequenceIndex(operation_index))
-    return *op_seq_index;
-
-  for (auto &e : _objects)
-  {
-    OpSequence &object = *e.second;
-    auto it = find(object.operations().begin(), object.operations().end(), operation_index);
-    if (it != object.operations().end())
-    {
-      cacheSequenceIndex(e.first, operation_index);
-      return e.first;
-    }
-  }
-  throw std::runtime_error("Operation not found");
-}
-
-void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
-{
-  op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
-    VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
-  });
-}
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/Operand.cc b/runtime/onert/core/src/ir/Operand.cc
index e29c7a6ec..18981dbf1 100644
--- a/runtime/onert/core/src/ir/Operand.cc
+++ b/runtime/onert/core/src/ir/Operand.cc
@@ -46,5 +46,11 @@ void Operand::setDef(const OperationIndex &idx) { _def = idx; }
 
 void Operand::unsetDef() { _def = OperationIndex{}; }
 
+void Operand::clearDefUse()
+{
+  unsetDef();
+  _uses.clear();
+}
+
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/Operand.test.cc b/runtime/onert/core/src/ir/Operand.test.cc
new file mode 100644
index 000000000..0b858792a
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operand.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+
+#include "MockNode.h"
+#include "verifier/Verifier.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <typeindex>
+
+namespace
+{
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+} // namespace
+
+TEST(ir_Operand, neg_usedef)
+{
+  onert::ir::Graph graph;
+  onert::ir::verifier::DAGChecker verifier;
+
+  onert::ir::Shape shape(3);
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  // Model Input/Output
+  auto input_operand = graph.addOperand(shape, type);
+  auto output_operand = graph.addOperand(shape, type);
+
+  graph.addInput(input_operand);
+  graph.addOutput(output_operand);
+
+  // MockNode1
+  auto operand_index1 = graph.addOperand(shape, type);
+  auto mocknode_index1 =
+    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+
+  // MockNode2
+  auto operand_index2 = graph.addOperand(shape, type);
+  auto mocknode_index2 =
+    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+
+  // MockNode3(two input)
+  auto multiinput_index = graph.addOperation(
+    std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+
+  graph.verify();
+
+  ASSERT_TRUE(verifier.verify(graph));
+
+  // Check def
+  ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
+  ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
+  ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
+
+  ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
+  ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
+
+  // Check use
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
+  ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
+  ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
+
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
+  ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
+  ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
+}
diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.cc b/runtime/onert/core/src/ir/OperandIndexSequence.cc
index 73f928280..a15b6d0d6 100644
--- a/runtime/onert/core/src/ir/OperandIndexSequence.cc
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.cc
@@ -31,7 +31,7 @@ OperandIndexSequence::OperandIndexSequence(std::initializer_list<OperandIndex> l
 
 OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list)
 {
-  for (auto val : list)
+  for (auto &&val : list)
   {
     _vec.emplace_back(static_cast<uint32_t>(val));
   }
@@ -39,7 +39,7 @@ OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list)
 
 OperandIndexSequence::OperandIndexSequence(std::initializer_list<uint32_t> list)
 {
-  for (auto val : list)
+  for (auto &&val : list)
   {
     _vec.emplace_back(val);
   }
@@ -55,6 +55,11 @@ void OperandIndexSequence::replace(const OperandIndex &from, const OperandIndex
   std::replace(_vec.begin(), _vec.end(), from, to);
 }
 
+bool OperandIndexSequence::operator==(const OperandIndexSequence &other) const
+{
+  return _vec == other._vec;
+}
+
 OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence &other) const
 {
   OperandIndexSequence ret = *this;
@@ -62,10 +67,10 @@ OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence
   return ret;
 }
 
-std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &op_seq)
+std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &operand_seq)
 {
   std::string delimeter;
-  for (const auto &ind : op_seq._vec)
+  for (const auto &ind : operand_seq._vec)
   {
     o << delimeter << ind;
     delimeter = ',';
diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.test.cc b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
new file mode 100644
index 000000000..588c4e419
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OperandIndexSequence.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::OperandIndex;
+using onert::ir::OperandIndexSequence;
+
+TEST(ir_OperandIndexSequence, neg_append)
+{
+  OperandIndexSequence iset{0, 2, 4, 8};
+
+  ASSERT_EQ(iset.size(), 4);
+
+  iset.append(OperandIndex{10});
+
+  ASSERT_EQ(iset.size(), 5);
+
+  onert::ir::IOIndex index1{1};
+  onert::ir::IOIndex index2{4};
+
+  ASSERT_EQ(iset.at(index1), 2);
+  ASSERT_EQ(iset.at(index2), 10);
+
+  ASSERT_TRUE(iset.contains(OperandIndex{2}));
+  ASSERT_TRUE(iset.contains(OperandIndex{10}));
+  ASSERT_FALSE(iset.contains(OperandIndex{11}));
+}
+
+TEST(graph_OperandIndexSequence, neg_replace)
+{
+  OperandIndexSequence iset{0, 1, 2, 3};
+
+  iset.replace(OperandIndex{1}, OperandIndex{9});
+  ASSERT_FALSE(iset.contains(OperandIndex{1}));
+  ASSERT_TRUE(iset.contains(OperandIndex{9}));
+}
diff --git a/runtime/onert/core/src/ir/Operands.cc b/runtime/onert/core/src/ir/Operands.cc
index ab32e478a..f8cfd16ef 100644
--- a/runtime/onert/core/src/ir/Operands.cc
+++ b/runtime/onert/core/src/ir/Operands.cc
@@ -29,7 +29,7 @@ Operands::Operands(const Operands &obj)
   obj.iterate([&](const OperandIndex &index, const Operand &operand) {
     _objects.emplace(index, std::make_unique<Operand>(operand));
   });
-  _index_count = obj._index_count;
+  _next_index = obj._next_index;
 }
 
 } // namespace ir
diff --git a/runtime/onert/core/src/ir/Operands.test.cc b/runtime/onert/core/src/ir/Operands.test.cc
new file mode 100644
index 000000000..aff228b10
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operands.test.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operands.h"
+
+#include <gtest/gtest.h>
+
+TEST(ir_Operands, neg_set_test)
+{
+  onert::ir::Operands set;
+
+  onert::ir::Shape shape0{1, 2, 3};
+
+  onert::ir::Shape shape1(4);
+  shape1.dim(0) = 10;
+  shape1.dim(1) = 20;
+  shape1.dim(2) = 30;
+  shape1.dim(3) = 40;
+
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  set.emplace(shape0, type);
+  set.emplace(shape1, type);
+
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
+
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
+}
diff --git a/runtime/onert/core/src/ir/Operation.cc b/runtime/onert/core/src/ir/Operation.cc
index 04be8c0d9..64792525d 100644
--- a/runtime/onert/core/src/ir/Operation.cc
+++ b/runtime/onert/core/src/ir/Operation.cc
@@ -24,22 +24,33 @@ namespace ir
 {
 
 Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
-                     const OperandIndexSequence &outputs)
-    : _input_constr{input_constr}, _inputs{inputs}, _outputs{outputs}
+                     const OperandIndexSequence &outputs, OperandConstraint output_constr)
+  : _input_constr{input_constr}, _output_constr{output_constr}
 {
+  setInputs(inputs);
+  setOutputs(outputs);
 }
 
-Operation::Operation(OperandConstraint input_constr) : _input_constr{input_constr} {}
+Operation::Operation(OperandConstraint input_constr, OperandConstraint output_constr)
+  : _input_constr{input_constr}, _output_constr{output_constr}
+{
+}
 
 Operation::~Operation() = default;
 
 void Operation::setInputs(const OperandIndexSequence &indexes)
 {
-  assert(_input_constr.check(indexes.size()));
+  if (!_input_constr.check(indexes.size()))
+    throw std::runtime_error{"Invalid number of input tensors for this operation."};
   _inputs = indexes;
 }
 
-void Operation::setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
+void Operation::setOutputs(const OperandIndexSequence &indexes)
+{
+  if (!_output_constr.check(indexes.size()))
+    throw std::runtime_error{"Invalid number of output tensors for this operation."};
+  _outputs = indexes;
+}
 
 void Operation::replaceInputs(const OperandIndex &from, const OperandIndex &to)
 {
diff --git a/runtime/onert/core/src/ir/Operation.test.cc b/runtime/onert/core/src/ir/Operation.test.cc
new file mode 100644
index 000000000..b3c4e852d
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operation.test.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/operation/Concat.h"
+#include "ir/operation/Conv2D.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <stdexcept>
+
+using Index = onert::ir::IOIndex;
+using IndexSet = onert::ir::OperandIndexSequence;
+
+TEST(ir_Operation_setIO, operation_setIO_conv)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  // Add Conv
+  using Graph = onert::ir::operation::Conv2D;
+
+  auto input_operand = graph.addOperand(shape, type);
+  auto kernel_operand = graph.addOperand(shape, type);
+  auto bias_operand = graph.addOperand(shape, type);
+  IndexSet inputs{input_operand, kernel_operand, bias_operand};
+
+  Graph::Param conv_params;
+  conv_params.padding.type = onert::ir::PaddingType::SAME;
+  conv_params.stride.horizontal = 1;
+  conv_params.stride.vertical = 1;
+  conv_params.activation = onert::ir::Activation::NONE;
+
+  auto output_operand = graph.addOperand(shape, type).value();
+  IndexSet outputs{output_operand};
+
+  auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
+
+  ASSERT_NE(conv, nullptr);
+  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  conv->setInputs({8, 9, 10});
+  ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
+}
+
+TEST(ir_Operation_setIO, neg_operation_setIO_concat)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  using Graph = onert::ir::operation::Concat;
+
+  // Add Concat
+  IndexSet inputs;
+  for (int i = 0; i < 6; ++i)
+  {
+    inputs.append(graph.addOperand(shape, type));
+  }
+
+  Graph::Param concat_params{0};
+
+  auto output_operand = graph.addOperand(shape, type).value();
+  IndexSet outputs{output_operand};
+
+  auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
+
+  ASSERT_NE(concat, nullptr);
+  ASSERT_EQ(concat->getInputs().size(), 6);
+  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+
+  concat->setInputs({80, 6, 9, 11});
+  ASSERT_EQ(concat->getInputs().size(), 4);
+  ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
+  ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
+  ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
+}
diff --git a/runtime/onert/core/src/ir/OperationCloner.cc b/runtime/onert/core/src/ir/OperationCloner.cc
index b4e60f0bc..64e1cc807 100644
--- a/runtime/onert/core/src/ir/OperationCloner.cc
+++ b/runtime/onert/core/src/ir/OperationCloner.cc
@@ -23,6 +23,23 @@ namespace onert
 namespace ir
 {
 
+namespace
+{
+
+class OperationCloner : public OperationVisitor
+{
+public:
+#define OP(Name) void visit(const operation::Name &o) override;
+#include "ir/Operations.lst"
+#undef OP
+
+public:
+  std::unique_ptr<Operation> releaseClone();
+
+private:
+  std::unique_ptr<Operation> _return_op;
+};
+
 #define OP(Name)                                        \
   void OperationCloner::visit(const operation::Name &o) \
   {                                                     \
@@ -38,5 +55,14 @@ std::unique_ptr<Operation> OperationCloner::releaseClone()
   return std::move(_return_op);
 }
 
+} // namespace
+
+std::unique_ptr<Operation> clone(const IOperation &operation)
+{
+  OperationCloner cloner;
+  operation.accept(cloner);
+  return cloner.releaseClone();
+}
+
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationCloner.h b/runtime/onert/core/src/ir/OperationCloner.h
index 0e8cda2a0..49297a05c 100644
--- a/runtime/onert/core/src/ir/OperationCloner.h
+++ b/runtime/onert/core/src/ir/OperationCloner.h
@@ -26,19 +26,7 @@ namespace onert
 namespace ir
 {
 
-class OperationCloner : public OperationVisitor
-{
-public:
-#define OP(Name) void visit(const operation::Name &o) override;
-#include "ir/Operations.lst"
-#undef OP
-
-public:
-  std::unique_ptr<Operation> releaseClone();
-
-private:
-  std::unique_ptr<Operation> _return_op;
-};
+std::unique_ptr<Operation> clone(const IOperation &operation);
 
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index 48361f464..5e6d700f3 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -29,19 +29,21 @@ using namespace operation;
 
 namespace
 {
-void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
+
+// Dump all input and output.
+// Use this function when there is no special input or(and) output.
+void dumpOpGeneric(const Operation &node, const std::string &adding_input = "")
 {
   VERBOSE(LIR) << "* " << node.name() << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs() << ") " << adding_input << std::endl;
+  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs() << ")" << std::endl;
 }
 
-void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
 {
   VERBOSE(LIR) << "* " << node.name() << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0)
-               << ") " << adding_input << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
+               << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
@@ -53,18 +55,6 @@ void dumpConvOp(const Operation &node, const std::string &padding_type)
                << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
   VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
 }
-
-void dumpPackingOp(const Operation &node)
-{
-  VERBOSE(LIR) << "* " << node.name() << std::endl;
-  std::string inputs;
-  for (auto i : node.getInputs())
-  {
-    inputs += std::to_string(i.value()) + ",";
-  }
-  VERBOSE(LIR) << "  - Inputs : Inputs(" << inputs << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
 } // namespace
 
 OperationDumper::OperationDumper(const std::string &start_msg)
@@ -72,41 +62,62 @@ OperationDumper::OperationDumper(const std::string &start_msg)
   VERBOSE(LIR) << start_msg << std::endl;
 }
 
-void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ArgMinMax &node)
+{
+  std::string min_max = node.param().is_arg_max ? "(Max)" : "(Min)";
+  VERBOSE(LIR) << "* " << node.name() << min_max << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ArgMinMax::INPUT) << ") Axis("
+               << node.getInputs().at(ArgMinMax::AXIS) << ") " << std::endl;
+  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
 
 void OperationDumper::visit(const BatchToSpaceND &node)
 {
   std::string block_size =
-      "BlockSize(" +
-      std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
-  dumpUnaryInputOp(node, block_size);
+    "BlockSize(" + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) +
+    ")";
+  dumpOpGeneric(node, block_size);
 }
 
-void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const BCQFullyConnected &node)
+{
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(BCQFullyConnected::Input::INPUT)
+               << ") WeightsBinary("
+               << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_BINARY)
+               << ") WeightsScales("
+               << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_SCALES)
+               << ") WeightsClusters("
+               << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_CLUSTERS) << ") Bias("
+               << node.getInputs().at(BCQFullyConnected::Input::BIAS) << ")" << std::endl;
+  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Comparison &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
+void OperationDumper::visit(const Concat &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const Conv2D &node)
 {
   std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+    node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
   dumpConvOp(node, padding_type);
 }
 
-void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const DepthToSpace &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const DepthwiseConv2D &node)
 {
   std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+    node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
   dumpConvOp(node, padding_type);
 }
 
@@ -122,12 +133,12 @@ void OperationDumper::visit(const ElementwiseActivation &node)
   {
     params = " alpha value(" + std::to_string(node.param().alpha) + ")";
   }
-  dumpUnaryInputOp(node, params);
+  dumpOpGeneric(node, params);
 }
 
-void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const EmbeddingLookup &node)
 {
@@ -141,22 +152,30 @@ void OperationDumper::visit(const EmbeddingLookup &node)
 void OperationDumper::visit(const ExpandDims &node)
 {
   std::string axis =
-      "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+    "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
   dumpUnaryInputOp(node, axis);
 }
 
+void OperationDumper::visit(const Fill &node)
+{
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Shape(" << node.getInputs().at(Fill::Input::SHAPE) << ") Value("
+               << node.getInputs().at(Fill::Input::VALUE) << ")" << std::endl;
+  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
 void OperationDumper::visit(const FullyConnected &node)
 {
   std::string inputs =
-      "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
-      ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+    "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+    ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
   dumpUnaryInputOp(node, inputs);
 }
 
 void OperationDumper::visit(const Gather &node)
 {
   std::string indices =
-      "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+    "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
   dumpUnaryInputOp(node, indices);
 }
 
@@ -174,50 +193,70 @@ void OperationDumper::visit(const HashtableLookup &node)
 void OperationDumper::visit(const InstanceNorm &node)
 {
   std::string inputs =
-      "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
-      ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+    "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) + ") Beta(" +
+    std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
   dumpUnaryInputOp(node, inputs);
 }
 
-void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const L2Normalization &node) { dumpOpGeneric(node); }
+
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Loss &node)
+{
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  VERBOSE(LIR) << " - Inputs : Prediction(" << node.getInputs().at(Loss::Input::Y_PRED) << ") True("
+               << node.getInputs().at(Loss::Input::Y_TRUE) << ")" << std::endl;
+  VERBOSE(LIR) << " - Outputs : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
 
 void OperationDumper::visit(const LSTM &node)
 {
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
   VERBOSE(LIR)
-      << "  - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
-      << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
-      << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS)
-      << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS)
-      << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)
-      << ") Recurrent To Input Weights("
-      << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)
-      << ") Recurrent To Forget Weights("
-      << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)
-      << ") Recurrent To Cell Weights("
-      << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)
-      << ") Recurrent To Output Weights("
-      << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights("
-      << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights("
-      << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights("
-      << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias("
-      << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias("
-      << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias("
-      << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias("
-      << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights("
-      << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
-      << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
-      << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
-      << node.getInputs().at(LSTM::Input::CELL_STATE_IN) << ")" << std::endl;
+    << "  - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
+    << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
+    << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS)
+    << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS)
+    << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)
+    << ") Recurrent To Input Weights("
+    << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)
+    << ") Recurrent To Forget Weights("
+    << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)
+    << ") Recurrent To Cell Weights(" << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)
+    << ") Recurrent To Output Weights("
+    << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights("
+    << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights("
+    << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights("
+    << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias("
+    << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias("
+    << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias("
+    << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias("
+    << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights("
+    << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
+    << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
+    << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
+    << node.getInputs().at(LSTM::Input::CELL_STATE_IN);
+  if (node.getInputs().size() == 24)
+  {
+    VERBOSE(LIR) << ") Input Layer Normalization Weights("
+                 << node.getInputs().at(LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)
+                 << ") Forget Layer Normalization Weights("
+                 << node.getInputs().at(LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)
+                 << ") Cell Layer Normalization Weights("
+                 << node.getInputs().at(LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)
+                 << ") Ouput Layer Normalization Weights("
+                 << node.getInputs().at(LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS);
+  }
+  VERBOSE(LIR) << ")" << std::endl;
   VERBOSE(LIR) << "  - Output : Scratch Buffer("
                << node.getOutputs().at(LSTM::Output::SCRATCH_BUFFER) << ") Output State Out("
-               << node.getInputs().at(LSTM::Output::OUTPUT_STATE_OUT) << ") Cell State Out("
-               << node.getInputs().at(LSTM::Output::CELL_STATE_OUT) << ") Output("
-               << node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
+               << node.getOutputs().at(LSTM::Output::OUTPUT_STATE_OUT) << ") Cell State Out("
+               << node.getOutputs().at(LSTM::Output::CELL_STATE_OUT) << ") Output("
+               << node.getOutputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
+void OperationDumper::visit(const Pack &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const Pad &node)
 {
@@ -249,23 +288,23 @@ void OperationDumper::visit(const Permute &node)
 void OperationDumper::visit(const Pool2D &node)
 {
   std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+    node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
   VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
   VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
                << std::endl;
   VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Pow &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const PReLU &node)
 {
   std::string alpha =
-      "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
-  dumpUnaryInputOp(node, alpha);
+    "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+  dumpOpGeneric(node, alpha);
 }
 
-void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Rank &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
 
@@ -273,18 +312,20 @@ void OperationDumper::visit(const Reshape &node)
 {
   // optional param
   std::string shape =
-      node.getInputs().size() == 2
-          ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
-          : "Shape(not provided)";
+    node.getInputs().size() == 2
+      ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+      : "Shape(not provided)";
   dumpUnaryInputOp(node, shape);
 }
 
-void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ResizeBilinear &node) { dumpOpGeneric(node); }
+
+void OperationDumper::visit(const ResizeNearestNeighbor &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const Reverse &node)
 {
   std::string axis =
-      "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+    "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
   dumpUnaryInputOp(node, axis);
 }
 
@@ -320,25 +361,24 @@ void OperationDumper::visit(const Select &node)
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Softmax &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const SpaceToBatchND &node)
 {
   std::string inputs =
-      "BlockSize(" +
-      std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
-      ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
-      ")";
+    "BlockSize(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+    ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+    ")";
   dumpUnaryInputOp(node, inputs);
 }
 
-void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const SpaceToDepth &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Split &node) { dumpOpGeneric(node); }
 
-void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const SquaredDifference &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const StatelessRandomUniform &node)
 {
@@ -349,7 +389,7 @@ void OperationDumper::visit(const StatelessRandomUniform &node)
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Squeeze &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
 
@@ -358,7 +398,7 @@ void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node);
 void OperationDumper::visit(const Tile &node)
 {
   std::string multiples =
-      "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+    "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
   dumpUnaryInputOp(node, multiples);
 }
 
@@ -375,7 +415,7 @@ void OperationDumper::visit(const TopKV2 &node)
 void OperationDumper::visit(const TransposeConv &node)
 {
   std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+    node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
   VERBOSE(LIR) << "* TransposeConv(" << padding_type << ")" << std::endl;
   VERBOSE(LIR) << "  - Inputs : Output Shape("
                << node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE) << ") KERNEL("
@@ -384,22 +424,14 @@ void OperationDumper::visit(const TransposeConv &node)
   VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Transpose &node) { dumpOpGeneric(node); }
 
 void OperationDumper::visit(const Unpack &node)
 {
   VERBOSE(LIR) << "* " << node.name() << std::endl;
   VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
                << std::endl;
-  std::string outputs;
-  const auto &output_indices = node.getOutputs();
-  for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
-  {
-    outputs += std::to_string(it->value());
-    if (std::next(it) != std::end(output_indices))
-      outputs += ", ";
-  }
-  VERBOSE(LIR) << "  - Outputs : Outputs(" << outputs << ")" << std::endl;
+  VERBOSE(LIR) << "  - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
 }
 
 void OperationDumper::visit(const OneHot &node)
@@ -413,51 +445,21 @@ void OperationDumper::visit(const OneHot &node)
 void OperationDumper::visit(const If &node)
 {
   VERBOSE(LIR) << "* " << node.name() << std::endl;
-  std::string inputs;
-  const auto &input_indices = node.getInputs();
-  for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
-  {
-    inputs += std::to_string(it->value());
-    if (std::next(it) != std::end(input_indices))
-      inputs += ", ";
-  }
   VERBOSE(LIR) << "  - Inputs : "
                << "Then subgraph (" << node.param().then_subg_index << ") Else subgraph ("
-               << node.param().else_subg_index << ") Inputs(" << inputs << ")" << std::endl;
-  std::string outputs;
-  const auto &output_indices = node.getOutputs();
-  for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
-  {
-    outputs += std::to_string(it->value());
-    if (std::next(it) != std::end(output_indices))
-      outputs += ", ";
-  }
-  VERBOSE(LIR) << "  - Output : Outputs(" << outputs << ")" << std::endl;
+               << node.param().else_subg_index << ") Inputs(" << node.getInputs() << ")"
+               << std::endl;
+  VERBOSE(LIR) << "  - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
 }
 
 void OperationDumper::visit(const While &node)
 {
   VERBOSE(LIR) << "* " << node.name() << std::endl;
-  std::string inputs;
-  const auto &input_indices = node.getInputs();
-  for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
-  {
-    inputs += std::to_string(it->value());
-    if (std::next(it) != std::end(input_indices))
-      inputs += ", ";
-  }
   VERBOSE(LIR) << "  - Inputs : "
                << "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph ("
-               << node.param().cond_subg_index << ") Inputs(" << inputs << ")" << std::endl;
-  std::string outputs;
-  const auto &output_indices = node.getOutputs();
-  for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
-  {
-    outputs += std::to_string(it->value());
-    if (std::next(it) != std::end(output_indices))
-      outputs += ", ";
-  }
-  VERBOSE(LIR) << "  - Output : Outputs(" << outputs << ")" << std::endl;
+               << node.param().body_subg_index << ") Inputs(" << node.getInputs() << ")"
+               << std::endl;
+  VERBOSE(LIR) << "  - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
 }
 
 } // namespace ir
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
index e8ab3b3cd..99bf869d5 100644
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -31,8 +31,9 @@ public:
   OperationDumper(const std::string &start_msg);
 
 public:
-  void visit(const operation::ArgMax &) override;
+  void visit(const operation::ArgMinMax &) override;
   void visit(const operation::BatchToSpaceND &node) override;
+  void visit(const operation::BCQFullyConnected &node) override;
   void visit(const operation::BinaryArithmetic &node) override;
   void visit(const operation::BroadcastTo &) override;
   void visit(const operation::Comparison &) override;
@@ -47,12 +48,14 @@ public:
   void visit(const operation::ElementwiseUnary &) override;
   void visit(const operation::EmbeddingLookup &) override;
   void visit(const operation::ExpandDims &) override;
+  void visit(const operation::Fill &) override;
   void visit(const operation::FullyConnected &node) override;
   void visit(const operation::Gather &) override;
   void visit(const operation::HashtableLookup &) override;
   void visit(const operation::InstanceNorm &) override;
   void visit(const operation::L2Normalization &) override;
   void visit(const operation::LocalResponseNormalization &) override;
+  void visit(const operation::Loss &node) override;
   void visit(const operation::LSTM &) override;
   void visit(const operation::Pack &) override;
   void visit(const operation::Pad &) override;
@@ -65,6 +68,7 @@ public:
   void visit(const operation::Reduce &) override;
   void visit(const operation::Reshape &node) override;
   void visit(const operation::ResizeBilinear &) override;
+  void visit(const operation::ResizeNearestNeighbor &) override;
   void visit(const operation::Reverse &) override;
   void visit(const operation::RNN &) override;
   void visit(const operation::Select &node) override;
diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc
new file mode 100644
index 000000000..09f773cf0
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationValidator.cc
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationValidator.h"
+
+#include "ir/Graph.h"
+#include "util/logging.h"
+
+#define OP_REQUIRES(EXP)                                                                         \
+  do                                                                                             \
+  {                                                                                              \
+    if (!(EXP))                                                                                  \
+      throw std::runtime_error("OperationValidator failed at line " + std::to_string(__LINE__)); \
+  } while (0)
+
+namespace onert
+{
+namespace ir
+{
+
+OperationValidator::OperationValidator(const Graph &graph)
+  : _operations{graph.operations()}, _operands{graph.operands()}
+{
+}
+
+void OperationValidator::operator()()
+{
+  _operations.iterate([&](const OperationIndex &, const IOperation &node) { node.accept(*this); });
+}
+
+DataType OperationValidator::operandType(const OperandIndex &idx)
+{
+  return _operands.at(idx).typeInfo().type();
+}
+
+bool OperationValidator::isConstant(const OperandIndex &idx)
+{
+  return _operands.at(idx).isConstant();
+}
+
+bool OperationValidator::isSameType(const OperandIndex &idx1, const OperandIndex &idx2)
+{
+  return operandType(idx1) == operandType(idx2);
+}
+
+bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2)
+{
+  if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale())
+    return false;
+
+  if (_operands.at(idx1).typeInfo().zero_point() != _operands.at(idx2).typeInfo().zero_point())
+    return false;
+
+  return true;
+}
+
+bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &type)
+{
+  return operandType(idx) == type;
+}
+
+bool OperationValidator::isValidType(const OperandIndex &idx,
+                                     std::initializer_list<DataType> valid_types)
+{
+  for (auto &&type_to_check : valid_types)
+  {
+    if (isValidType(idx, type_to_check))
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void OperationValidator::visit(const operation::AddN &node)
+{
+  const auto output_index(node.getOutputs().at(0));
+
+  int size = node.getInputs().size();
+  for (int i = 0; i < size; i++)
+  {
+    const auto input_index(node.getInputs().at(i));
+    OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32}));
+    OP_REQUIRES(isSameType(input_index, output_index));
+  }
+}
+
+void OperationValidator::visit(const operation::ArgMinMax &node)
+{
+  const auto input_index(node.getInputs().at(operation::ArgMinMax::Input::INPUT));
+  const auto axis_index(node.getInputs().at(operation::ArgMinMax::Input::AXIS));
+  const auto output_index(node.getOutputs().at(0));
+  const auto output_type = node.param().output_type;
+
+  OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::UINT8,
+                                        DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+  OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+  OP_REQUIRES(isValidType(output_index, {DataType::INT32, DataType::INT64}));
+  OP_REQUIRES(isValidType(output_index, output_type));
+}
+
+void OperationValidator::visit(const operation::BatchMatMul &node)
+{
+  const auto lhs_index(node.getInputs().at(operation::BatchMatMul::Input::LHS));
+  const auto rhs_index(node.getInputs().at(operation::BatchMatMul::Input::RHS));
+  const auto output_index(node.getOutputs().at(0));
+
+  // Constant lhs and rhs is not implemented yet
+  OP_REQUIRES(!isConstant(lhs_index) && !isConstant(rhs_index));
+
+  // Allow hybrid quantization (lhs: float / rhs: qint8 / out: float)
+  OP_REQUIRES(isValidType(lhs_index, {DataType::FLOAT32, DataType::QUANT_INT8_ASYMM}));
+  OP_REQUIRES(isSameType(lhs_index, rhs_index) ||
+              ((operandType(lhs_index) == DataType::FLOAT32) &&
+               (operandType(rhs_index) == DataType::QUANT_INT8_ASYMM)));
+  OP_REQUIRES(isSameType(lhs_index, output_index));
+}
+
+void OperationValidator::visit(const operation::BatchToSpaceND &node)
+{
+  const auto input_index{node.getInputs().at(operation::BatchToSpaceND::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  OP_REQUIRES(isSameType(input_index, output_index));
+}
+
+void OperationValidator::visit(const operation::BinaryArithmetic &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(operation::BinaryArithmetic::Input::RHS)};
+
+  OP_REQUIRES(isSameType(lhs_index, rhs_index));
+  OP_REQUIRES(isSameType(lhs_index, output_index));
+}
+
+void OperationValidator::visit(const operation::Comparison &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+
+  const auto lhs_index{node.getInputs().at(operation::Comparison::Input::INPUT0)};
+  const auto rhs_index{node.getInputs().at(operation::Comparison::Input::INPUT1)};
+
+  OP_REQUIRES(isSameType(lhs_index, rhs_index));
+  OP_REQUIRES(isValidType(output_index, DataType::BOOL8));
+}
+
+void OperationValidator::visit(const operation::Concat &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+
+  for (auto &&input_index : node.getInputs())
+  {
+    OP_REQUIRES(isSameType(input_index, output_index));
+
+    // Int8 quantization requires same scale and zero point
+    if (isValidType(output_index, DataType::QUANT_INT8_ASYMM))
+    {
+      OP_REQUIRES(isSameQuantParam(input_index, output_index));
+    }
+  }
+}
+
+void OperationValidator::visit(const operation::Conv2D &node)
+{
+  const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)};
+  const auto kernel_index{node.getInputs().at(operation::Conv2D::Input::KERNEL)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  uint32_t stride_horizontal = node.param().stride.horizontal;
+  uint32_t stride_vertical = node.param().stride.vertical;
+  uint32_t dilation_width = node.param().dilation.width_factor;
+  uint32_t dilation_height = node.param().dilation.height_factor;
+
+  OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
+  OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
+  OP_REQUIRES(isSameType(input_index, output_index));
+
+  if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
+  {
+    for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
+      OP_REQUIRES(zeropoint == 0);
+  }
+}
+
+void OperationValidator::visit(const operation::DepthToSpace &node)
+{
+  const auto input_index{node.getInputs().at(operation::DepthToSpace::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  int32_t block_size = node.param().block_size;
+
+  OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::INT64,
+                                        DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+  OP_REQUIRES(isSameType(input_index, output_index));
+
+  OP_REQUIRES(block_size > 0);
+}
+
+void OperationValidator::visit(const operation::DetectionPostProcess &node)
+{
+  const auto &param = node.param();
+
+  // FIXME: number of classes should be 1 for now.
+  OP_REQUIRES(param.num_classes == 1);
+}
+
+void OperationValidator::visit(const operation::DepthwiseConv2D &node)
+{
+  const auto input_index{node.getInputs().at(operation::DepthwiseConv2D::Input::INPUT)};
+  const auto kernel_index{node.getInputs().at(operation::DepthwiseConv2D::Input::KERNEL)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  uint32_t stride_horizontal = node.param().stride.horizontal;
+  uint32_t stride_vertical = node.param().stride.vertical;
+  uint32_t dilation_width = node.param().dilation.width_factor;
+  uint32_t dilation_height = node.param().dilation.height_factor;
+
+  OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
+  OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
+  OP_REQUIRES(isSameType(input_index, output_index));
+
+  if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
+  {
+    for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
+      OP_REQUIRES(zeropoint == 0);
+  }
+}
+
+void OperationValidator::visit(const operation::ElementwiseActivation &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  // Check if I/O types match
+  OP_REQUIRES(isSameType(output_index, input_index));
+
+  switch (node.param().op_type)
+  {
+    case operation::ElementwiseActivation::Type::ELU:
+      OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
+      break;
+    case operation::ElementwiseActivation::Type::LEAKY_RELU:
+      OP_REQUIRES(
+        isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                  DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+      break;
+    case operation::ElementwiseActivation::Type::LOGISTIC:
+      OP_REQUIRES(
+        isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                  DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+      break;
+    case operation::ElementwiseActivation::Type::RELU:
+      OP_REQUIRES(isValidType(
+        input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+      break;
+    case operation::ElementwiseActivation::Type::TANH:
+      OP_REQUIRES(
+        isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                  DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+      break;
+  }
+}
+
+void OperationValidator::visit(const operation::ElementwiseBinary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(operation::ElementwiseBinary::Input::RHS)};
+
+  OP_REQUIRES(isSameType(lhs_index, rhs_index));
+  OP_REQUIRES(isSameType(lhs_index, output_index));
+
+  const auto op_type = node.param().op_type;
+  if (op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND ||
+      op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR)
+  {
+    OP_REQUIRES(isValidType(lhs_index, DataType::BOOL8));
+  }
+}
+
+void OperationValidator::visit(const operation::ElementwiseUnary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::ElementwiseUnary::Input::INPUT)};
+
+  // Check if I/O types match
+  if (node.param().op_type == operation::ElementwiseUnary::Type::DEQUANTIZE)
+  {
+    // NNAPI allow QUANT_INT8_SYMM type input
+    OP_REQUIRES(isValidType(input_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_SYMM,
+                                          DataType::QUANT_INT8_ASYMM}));
+    OP_REQUIRES(isValidType(output_index, DataType::FLOAT32));
+  }
+  else if (node.param().op_type == operation::ElementwiseUnary::Type::QUANTIZE)
+  {
+    OP_REQUIRES(isValidType(
+      input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+    OP_REQUIRES(
+      isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+  }
+  else if (node.param().op_type == operation::ElementwiseUnary::Type::FLOOR)
+  {
+    OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
+    OP_REQUIRES(isSameType(output_index, input_index));
+  }
+  else if (node.param().op_type != operation::ElementwiseUnary::Type::CAST)
+  {
+    OP_REQUIRES(isSameType(output_index, input_index));
+  }
+}
+
+void OperationValidator::visit(const operation::EmbeddingLookup &node)
+{
+  const auto lookups_index{node.getInputs().at(operation::EmbeddingLookup::Input::LOOKUPS)};
+  const auto values_index{node.getInputs().at(operation::EmbeddingLookup::Input::VALUES)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  OP_REQUIRES(isValidType(lookups_index, DataType::INT32));
+
+  // TFLite: Allow hybrid type - value table & output
+  // NNAPI: Require same value table and output type
+  OP_REQUIRES(
+    isSameType(values_index, output_index) ||
+    (isValidType(output_index, DataType::FLOAT32) &&
+     (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
+}
+
+void OperationValidator::visit(const operation::ExpandDims &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::ExpandDims::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(operation::ExpandDims::Input::AXIS)};
+
+  OP_REQUIRES(isSameType(output_index, input_index));
+  OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+}
+
+void OperationValidator::visit(const operation::Fill &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::Fill::Input::SHAPE)};
+  const auto value_index{node.getInputs().at(operation::Fill::Input::VALUE)};
+
+  OP_REQUIRES(isSameType(output_index, value_index));
+  OP_REQUIRES(isValidType(input_index, {DataType::INT32, DataType::INT64}));
+  OP_REQUIRES(isValidType(output_index,
+                          {DataType::FLOAT32, DataType::INT32, DataType::INT64, DataType::BOOL8}));
+}
+
+void OperationValidator::visit(const operation::HashtableLookup &node)
+{
+  const auto hits_index{node.getOutputs().at(operation::HashtableLookup::Output::HITS)};
+  const auto lookups_index{node.getInputs().at(operation::HashtableLookup::Input::LOOKUPS)};
+  const auto keys_index{node.getInputs().at(operation::HashtableLookup::Input::KEYS)};
+
+  OP_REQUIRES(isValidType(lookups_index, DataType::INT32));
+  OP_REQUIRES(isValidType(keys_index, DataType::INT32));
+  OP_REQUIRES(isValidType(hits_index, DataType::QUANT_UINT8_ASYMM));
+}
+
+void OperationValidator::visit(const operation::Pack &node)
+{
+  const auto num{node.param().num};
+
+  OP_REQUIRES(num == static_cast<int32_t>(node.getInputs().size()));
+}
+
+void OperationValidator::visit(const operation::Pad &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::Pad::Input::INPUT)};
+  const auto pad_index{node.getInputs().at(operation::Pad::Input::PAD)};
+  bool isQuantType =
+    isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM});
+  bool isPadV2 = node.getInputs().size() == 3 ? true : false;
+
+  OP_REQUIRES(isValidType(pad_index, DataType::INT32));
+  OP_REQUIRES(isSameType(input_index, output_index));
+
+  if (isQuantType)
+    OP_REQUIRES(isSameQuantParam(input_index, output_index));
+
+  if (isPadV2)
+  {
+    const auto value_index{node.getInputs().at(operation::Pad::Input::VALUE)};
+    const bool cond_same = isSameType(input_index, value_index);
+    const bool cond_same_quant = (!isQuantType || isSameQuantParam(input_index, value_index));
+    const auto input_t = operandType(input_index);
+    const auto value_t = operandType(value_index);
+    // NNAPI accepts this case. scale and zeroPoint are assumed to be the same as in input0.
+    const bool cond_quant8 =
+      ((input_t == DataType::QUANT_UINT8_ASYMM || input_t == DataType::QUANT_INT8_ASYMM) &&
+       value_t == DataType::INT32);
+    OP_REQUIRES((cond_same && cond_same_quant) || cond_quant8);
+  }
+}
+
+void OperationValidator::visit(const operation::Rank &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+
+  OP_REQUIRES(isValidType(output_index, DataType::INT32));
+}
+
+void OperationValidator::visit(const operation::ResizeBilinear &node)
+{
+  auto align_corners = node.param().align_corners;
+  auto half_pixel_centers = node.param().half_pixel_centers;
+
+  OP_REQUIRES(!align_corners || !half_pixel_centers);
+}
+
+void OperationValidator::visit(const operation::Reverse &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::Reverse::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(operation::Reverse::Input::AXIS)};
+
+  OP_REQUIRES(isValidType(axis_index, DataType::INT32));
+  OP_REQUIRES(isSameType(output_index, input_index));
+}
+
+void OperationValidator::visit(const operation::Select &node)
+{
+  const auto condition_index{node.getInputs().at(operation::Select::Input::CONDITION)};
+  const auto input_true_index{node.getInputs().at(operation::Select::Input::INPUT_TRUE)};
+  const auto input_false_index{node.getInputs().at(operation::Select::Input::INPUT_FALSE)};
+
+  OP_REQUIRES(isValidType(condition_index, DataType::BOOL8));
+  OP_REQUIRES(isSameType(input_true_index, input_false_index));
+}
+
+void OperationValidator::visit(const operation::Shape &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+
+  OP_REQUIRES(isValidType(output_index, {DataType::UINT32, DataType::INT32, DataType::INT64}));
+}
+
+void OperationValidator::visit(const operation::Slice &node)
+{
+  const auto begins_index{node.getInputs().at(operation::Slice::BEGINS)};
+  const auto sizes_index{node.getInputs().at(operation::Slice::SIZES)};
+
+  OP_REQUIRES(isValidType(begins_index, {DataType::INT32, DataType::INT64}));
+  OP_REQUIRES(isSameType(begins_index, sizes_index));
+}
+
+void OperationValidator::visit(const operation::Softmax &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::Softmax::INPUT)};
+
+  OP_REQUIRES(isSameType(input_index, output_index));
+  OP_REQUIRES(isValidType(
+    output_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+}
+
+void OperationValidator::visit(const operation::SpaceToBatchND &node)
+{
+  const auto block_size_index{node.getInputs().at(operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+  const auto paddings_index{node.getInputs().at(operation::SpaceToBatchND::Input::PADDINGS)};
+
+  // Non-constant block_size and padding is not implemented yet
+  OP_REQUIRES(isConstant(block_size_index));
+  OP_REQUIRES(isConstant(paddings_index));
+}
+
+void OperationValidator::visit(const operation::SpaceToDepth &node)
+{
+  const auto block_size = node.param().block_size;
+  OP_REQUIRES(block_size >= 1);
+}
+
+void OperationValidator::visit(const operation::Split &node)
+{
+  const auto num_splits = node.param().num_splits;
+
+  OP_REQUIRES(num_splits > 0 && num_splits <= 0xFFFF);
+  OP_REQUIRES(node.getOutputs().size() == static_cast<uint32_t>(num_splits));
+}
+
+void OperationValidator::visit(const operation::SquaredDifference &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(operation::SquaredDifference::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(operation::SquaredDifference::Input::RHS)};
+
+  OP_REQUIRES(isSameType(output_index, lhs_index));
+  OP_REQUIRES(isSameType(lhs_index, rhs_index));
+}
+
+void OperationValidator::visit(const operation::StatelessRandomUniform &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto shape_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SHAPE)};
+  const auto seed_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SEED)};
+
+  OP_REQUIRES(isValidType(output_index, DataType::FLOAT32));
+  OP_REQUIRES(isValidType(shape_index, DataType::INT32));
+  OP_REQUIRES(isValidType(seed_index, DataType::INT32));
+}
+
+void OperationValidator::visit(const operation::StridedSlice &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::StridedSlice::Input::INPUT)};
+
+  OP_REQUIRES(isSameType(output_index, input_index));
+}
+
+void OperationValidator::visit(const operation::TransposeConv &node)
+{
+  OP_REQUIRES((node.param().padding.type == PaddingType::SAME) ||
+              (node.param().padding.type == PaddingType::VALID));
+}
+
+void OperationValidator::visit(const operation::Unpack &node)
+{
+  const auto num{node.param().num};
+  OP_REQUIRES(num == static_cast<int32_t>(node.getOutputs().size()));
+}
+
+void OperationValidator::visit(const operation::While &node)
+{
+  OP_REQUIRES(node.getInputs().size() == node.getOutputs().size());
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h
new file mode 100644
index 000000000..b9bcc4ee8
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationValidator.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_VALIDATOR_H__
+#define __ONERT_IR_OPERATION_VALIDATOR_H__
+
+#include "ir/OperationVisitor.h"
+#include "ir/Operations.h"
+#include "ir/Operands.h"
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+class Operands;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+
+class OperationValidator : public OperationVisitor
+{
+public:
+  OperationValidator(void) = delete;
+  OperationValidator(const Graph &graph);
+
+public:
+  void operator()();
+
+public:
+  void visit(const operation::AddN &node) override;
+  void visit(const operation::ArgMinMax &node) override;
+  void visit(const operation::BatchMatMul &node) override;
+  void visit(const operation::BatchToSpaceND &node) override;
+  void visit(const operation::BinaryArithmetic &node) override;
+  void visit(const operation::Comparison &node) override;
+  void visit(const operation::Concat &node) override;
+  void visit(const operation::Conv2D &node) override;
+  void visit(const operation::DepthToSpace &node) override;
+  void visit(const operation::DepthwiseConv2D &node) override;
+  void visit(const operation::DetectionPostProcess &node) override;
+  void visit(const operation::ElementwiseActivation &node) override;
+  void visit(const operation::ElementwiseBinary &node) override;
+  void visit(const operation::ElementwiseUnary &node) override;
+  void visit(const operation::EmbeddingLookup &node) override;
+  void visit(const operation::ExpandDims &node) override;
+  void visit(const operation::Fill &node) override;
+  void visit(const operation::HashtableLookup &node) override;
+  void visit(const operation::Pack &node) override;
+  void visit(const operation::Pad &node) override;
+  void visit(const operation::Rank &node) override;
+  void visit(const operation::ResizeBilinear &node) override;
+  void visit(const operation::Reverse &node) override;
+  void visit(const operation::Select &node) override;
+  void visit(const operation::Shape &node) override;
+  void visit(const operation::Slice &node) override;
+  void visit(const operation::Softmax &node) override;
+  void visit(const operation::SpaceToBatchND &node) override;
+  void visit(const operation::SpaceToDepth &node) override;
+  void visit(const operation::Split &node) override;
+  void visit(const operation::SquaredDifference &node) override;
+  void visit(const operation::StatelessRandomUniform &node) override;
+  void visit(const operation::StridedSlice &node) override;
+  void visit(const operation::TransposeConv &node) override;
+  void visit(const operation::Unpack &node) override;
+  void visit(const operation::While &node) override;
+
+private:
+  DataType operandType(const OperandIndex &idx);
+  bool isConstant(const OperandIndex &idx);
+  bool isSameType(const OperandIndex &idx1, const OperandIndex &idx2);
+  bool isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2);
+  bool isValidType(const OperandIndex &idx, const DataType &type);
+  bool isValidType(const OperandIndex &idx, std::initializer_list<DataType> valid_types);
+
+private:
+  const Operations &_operations;
+  const Operands &_operands;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_VALIDATOR_H__
diff --git a/runtime/onert/core/src/ir/Operations.cc b/runtime/onert/core/src/ir/Operations.cc
index 64d0bd6f0..1b4691f58 100644
--- a/runtime/onert/core/src/ir/Operations.cc
+++ b/runtime/onert/core/src/ir/Operations.cc
@@ -25,12 +25,9 @@ namespace ir
 
 Operations::Operations(const Operations &obj)
 {
-  obj.iterate([&](const OperationIndex &index, const Operation &op) {
-    OperationCloner cloner;
-    op.accept(cloner);
-    _objects.emplace(index, cloner.releaseClone());
-  });
-  _index_count = obj._index_count;
+  obj.iterate(
+    [&](const OperationIndex &index, const IOperation &op) { _objects.emplace(index, clone(op)); });
+  _next_index = obj._next_index;
 }
 
 } // namespace ir
diff --git a/runtime/onert/core/src/ir/Operations.test.cc b/runtime/onert/core/src/ir/Operations.test.cc
new file mode 100644
index 000000000..e57872689
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operations.test.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operations.h"
+
+#include "MockNode.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::Operation;
+using onert::ir::OperationIndex;
+using onert::ir::Operations;
+
+TEST(ir_Operations, basic)
+{
+  Operations ops;
+  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+  OperationIndex idx{0u};
+  ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
+  ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
+}
+
+TEST(ir_Operations, neg_at)
+{
+  Operations ops;
+  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+  OperationIndex idx{99u};
+  EXPECT_THROW(ops.at(idx), std::out_of_range);
+}
diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc
index d74f80217..b2b004e7a 100644
--- a/runtime/onert/core/src/ir/Padding.cc
+++ b/runtime/onert/core/src/ir/Padding.cc
@@ -66,14 +66,14 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
 
   const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
   const int32_t horizontal_expected_output =
-      (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+    (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
 
   const int32_t vertical_needed_input =
-      (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
+    (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
   const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
 
   const int32_t horizontal_needed_input =
-      (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
+    (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
   const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
 
   padding.top = vertical_total_padding / 2;
@@ -90,7 +90,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS
 {
   const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
   const int32_t horizontal_expected_output =
-      (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+    (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
   assert(vertical_expected_output == ofm_shape.H);
   assert(horizontal_expected_output == ofm_shape.W);
 
@@ -129,7 +129,7 @@ Padding::Padding(PaddingType paddingType) : type{paddingType}, param{0, 0, 0, 0}
 }
 
 Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
-    : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
+  : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/core/src/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.cc
index 322df7b4c..e4e4c154b 100644
--- a/runtime/onert/core/src/ir/Shape.cc
+++ b/runtime/onert/core/src/ir/Shape.cc
@@ -26,10 +26,10 @@ namespace onert
 namespace ir
 {
 
-int32_t const Shape::UNSPECIFIED_DIM = -1;
+int32_t const Shape::kUnspecifiedDim = -1;
 
 // NNFW_MAX_RANK is 6
-int32_t const Shape::MAX_RANK = 6;
+int32_t const Shape::kMaxRank = 6;
 
 FeatureShape Shape::asFeature(Layout layout) const
 {
@@ -80,34 +80,37 @@ uint64_t Shape::num_elements() const
 {
   // if dimension is 0, it means unspecified and cannot calculate the total number of elements
   if (std::any_of(_dimensions.begin(), _dimensions.end(),
-                  [](const int32_t &v) { return v == UNSPECIFIED_DIM; }))
+                  [](const int32_t &v) { return v == kUnspecifiedDim; }))
     throw std::runtime_error("num_elements() cannot calculate when any dimension is unspecified");
 
   return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1),
                          std::multiplies<uint64_t>());
 }
 
-Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout)
+Shape permuteShape(const Shape &shape, Layout from, Layout to)
 {
-  assert(shape.rank() <= Shape::MAX_RANK);
-  Shape backend_shape{shape};
-  if (shape.rank() >= 4 && frontend_layout == Layout::NHWC && backend_layout == Layout::NCHW)
+  assert(shape.rank() <= Shape::kMaxRank);
+  Shape ret{shape};
+  if (from == to)
+    return ret;
+  if (shape.rank() < 4)
+    return ret;
+  // Permutation changing layout beyond 4-D is not supported yet
+  assert(shape.rank() <= 4);
+  if (from == Layout::NHWC && to == Layout::NCHW)
   {
-    // Permutation changing layout beyond 4-D is not supported yet
-    assert(shape.rank() <= 4);
-    backend_shape.dim(1) = shape.dim(3);
-    backend_shape.dim(2) = shape.dim(1);
-    backend_shape.dim(3) = shape.dim(2);
+    ret.dim(1) = shape.dim(3);
+    ret.dim(2) = shape.dim(1);
+    ret.dim(3) = shape.dim(2);
   }
-  else if (shape.rank() >= 4 && frontend_layout == Layout::NCHW && backend_layout == Layout::NHWC)
+  else if (from == Layout::NCHW && to == Layout::NHWC)
   {
-    // Permutation changing layout beyond 4-D is not supported yet
-    assert(shape.rank() <= 4);
-    backend_shape.dim(1) = shape.dim(2);
-    backend_shape.dim(2) = shape.dim(3);
-    backend_shape.dim(3) = shape.dim(1);
+    ret.dim(1) = shape.dim(2);
+    ret.dim(2) = shape.dim(3);
+    ret.dim(3) = shape.dim(1);
   }
-  return backend_shape;
+  // Other cases(either `from` or `to` is UNKNOWN), just return the original shape
+  return ret;
 }
 
 } // namespace ir
diff --git a/runtime/onert/core/src/ir/Shape.test.cc b/runtime/onert/core/src/ir/Shape.test.cc
new file mode 100644
index 000000000..4788522d3
--- /dev/null
+++ b/runtime/onert/core/src/ir/Shape.test.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Shape.h"
+
+#include <gtest/gtest.h>
+
+TEST(ShapeTest, basic_test)
+{
+  {
+    onert::ir::Shape shape(3);
+
+    shape.dim(0) = 1;
+    shape.dim(1) = 2;
+    shape.dim(2) = 3;
+
+    ASSERT_EQ(shape.rank(), 3);
+    ASSERT_EQ(shape.num_elements(), 6);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+  }
+  {
+    onert::ir::Shape shape; // scalar or rank is unspecified
+
+    ASSERT_EQ(shape.rank(), 0);
+    ASSERT_EQ(shape.num_elements(), 1);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+  }
+}
+
+TEST(ShapeTest, neg_basic_test)
+{
+  {
+    onert::ir::Shape shape(2);
+
+    shape.dim(0) = 1;
+    shape.dim(1) = onert::ir::Shape::kUnspecifiedDim;
+
+    ASSERT_EQ(shape.rank(), 2);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), true);
+    EXPECT_ANY_THROW(shape.num_elements());
+  }
+}
diff --git a/runtime/onert/core/src/ir/TypeInfo.cc b/runtime/onert/core/src/ir/TypeInfo.cc
index ab8af287e..5d1c7ba8b 100644
--- a/runtime/onert/core/src/ir/TypeInfo.cc
+++ b/runtime/onert/core/src/ir/TypeInfo.cc
@@ -28,7 +28,7 @@ bool operator==(const TypeInfo &lhs, const TypeInfo &rhs)
     return false;
   }
 
-  if (lhs.offset() != rhs.offset())
+  if (lhs.zero_point() != rhs.zero_point())
   {
     return false;
   }
diff --git a/runtime/onert/core/src/ir/operation/AddN.cc b/runtime/onert/core/src/ir/operation/AddN.cc
new file mode 100644
index 000000000..a51e12dff
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/AddN.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/AddN.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void AddN::accept(OperationVisitor &v) const { v.visit(*this); }
+
+AddN::AddN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+  : Operation{OperandConstraint::createExact(inputs.size()), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMax.cc
deleted file mode 100644
index 1275ae43a..000000000
--- a/runtime/onert/core/src/ir/operation/ArgMax.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ArgMax.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-               const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ArgMinMax.cc b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
new file mode 100644
index 000000000..2f18ff2e2
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ArgMinMax.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                     const Param &param)
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
index 9dc54e6e9..ccda674ad 100644
--- a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/BCQFullyConnected.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void BCQFullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
 
 BCQFullyConnected::BCQFullyConnected(const OperandIndexSequence &inputs,
                                      const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/BCQGather.cc b/runtime/onert/core/src/ir/operation/BCQGather.cc
index 80efa6460..1ca5b0c9f 100644
--- a/runtime/onert/core/src/ir/operation/BCQGather.cc
+++ b/runtime/onert/core/src/ir/operation/BCQGather.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/BCQGather.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void BCQGather::accept(OperationVisitor &v) const { v.visit(*this); }
 
 BCQGather::BCQGather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                      const Param &param)
-    : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/BatchMatMul.cc b/runtime/onert/core/src/ir/operation/BatchMatMul.cc
index b9616158d..20c5682f9 100644
--- a/runtime/onert/core/src/ir/operation/BatchMatMul.cc
+++ b/runtime/onert/core/src/ir/operation/BatchMatMul.cc
@@ -28,7 +28,7 @@ void BatchMatMul::accept(OperationVisitor &v) const { v.visit(*this); }
 
 BatchMatMul::BatchMatMul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                          const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
index 9ef2b125f..3c5578ac4 100644
--- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
+++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/BatchToSpaceND.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); }
 
 BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
                                const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
index 2b1422c73..5eb3fc3d7 100644
--- a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -15,12 +15,10 @@
  */
 
 #include "ir/operation/BinaryArithmetic.h"
+#include "ir/OperationVisitor.h"
 
-#include <cassert>
 #include <unordered_map>
 
-#include "ir/OperationVisitor.h"
-
 namespace onert
 {
 namespace ir
@@ -32,7 +30,7 @@ void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); }
 
 BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
                                    const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
 {
 }
 
@@ -40,10 +38,10 @@ std::string BinaryArithmetic::name() const
 {
   using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
   static const std::unordered_map<ArithmeticType, std::string> name_map{
-      {ArithmeticType::ADD, std::string{"Add"}},
-      {ArithmeticType::SUB, std::string{"Sub"}},
-      {ArithmeticType::MUL, std::string{"Mul"}},
-      {ArithmeticType::DIV, std::string{"Div"}}};
+    {ArithmeticType::ADD, std::string{"Add"}},
+    {ArithmeticType::SUB, std::string{"Sub"}},
+    {ArithmeticType::MUL, std::string{"Mul"}},
+    {ArithmeticType::DIV, std::string{"Div"}}};
   return name_map.at(_param.arithmetic_type);
 }
 
diff --git a/runtime/onert/core/src/ir/operation/BroadcastTo.cc b/runtime/onert/core/src/ir/operation/BroadcastTo.cc
index a8f5e59cf..eab6c0611 100644
--- a/runtime/onert/core/src/ir/operation/BroadcastTo.cc
+++ b/runtime/onert/core/src/ir/operation/BroadcastTo.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/BroadcastTo.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -29,7 +26,7 @@ namespace operation
 void BroadcastTo::accept(OperationVisitor &v) const { v.visit(*this); }
 
 BroadcastTo::BroadcastTo(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Bulk.cc b/runtime/onert/core/src/ir/operation/Bulk.cc
new file mode 100644
index 000000000..4b96c9d94
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Bulk.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Bulk.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void Bulk::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Bulk::Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+           const Bulk::Param &param)
+  : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Comparison.cc b/runtime/onert/core/src/ir/operation/Comparison.cc
index 2f6775411..33365657c 100644
--- a/runtime/onert/core/src/ir/operation/Comparison.cc
+++ b/runtime/onert/core/src/ir/operation/Comparison.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Comparison.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void Comparison::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Comparison::Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                        const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Concat.cc b/runtime/onert/core/src/ir/operation/Concat.cc
index 608bc29a6..3a21e36f2 100644
--- a/runtime/onert/core/src/ir/operation/Concat.cc
+++ b/runtime/onert/core/src/ir/operation/Concat.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Concat.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void Concat::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Concat::Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Conv2D.cc b/runtime/onert/core/src/ir/operation/Conv2D.cc
index 3a2e1d1fe..d615ae416 100644
--- a/runtime/onert/core/src/ir/operation/Conv2D.cc
+++ b/runtime/onert/core/src/ir/operation/Conv2D.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Conv2D.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Conv2D::Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
index 676e039fa..365745ea8 100644
--- a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/ConvertFp16ToFp32.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void ConvertFp16ToFp32::accept(OperationVisitor &v) const { v.visit(*this); }
 
 ConvertFp16ToFp32::ConvertFp16ToFp32(const OperandIndexSequence &inputs,
                                      const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
index bcfcbfc04..d4fc7031c 100644
--- a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/ConvertFp32ToFp16.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void ConvertFp32ToFp16::accept(OperationVisitor &v) const { v.visit(*this); }
 
 ConvertFp32ToFp16::ConvertFp32ToFp16(const OperandIndexSequence &inputs,
                                      const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Custom.cc b/runtime/onert/core/src/ir/operation/Custom.cc
index 25c53e1ba..06c84f81a 100644
--- a/runtime/onert/core/src/ir/operation/Custom.cc
+++ b/runtime/onert/core/src/ir/operation/Custom.cc
@@ -29,7 +29,7 @@ void Custom::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Custom::Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs,
                const OperandIndexSequence &outputs, std::string id, const Userdata &userdata)
-    : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
+  : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/DepthToSpace.cc b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
index f2d6c7c1b..e3edea777 100644
--- a/runtime/onert/core/src/ir/operation/DepthToSpace.cc
+++ b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/DepthToSpace.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void DepthToSpace::accept(OperationVisitor &v) const { v.visit(*this); }
 
 DepthToSpace::DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                            const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
index d587a5591..0e7137306 100644
--- a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/DepthwiseConv2D.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void DepthwiseConv2D::accept(OperationVisitor &v) const { v.visit(*this); }
 
 DepthwiseConv2D::DepthwiseConv2D(const OperandIndexSequence &inputs,
                                  const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc
new file mode 100644
index 000000000..cd708796d
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/DetectionPostProcess.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+DetectionPostProcess::DetectionPostProcess(const OperandIndexSequence &inputs,
+                                           const OperandIndexSequence &outputs, const Param &param)
+  : Operation(OperandConstraint::createExact(3u), inputs, outputs), _param(param)
+{
+}
+
+void DetectionPostProcess::accept(OperationVisitor &v) const { v.visit(*this); }
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Einsum.cc b/runtime/onert/core/src/ir/operation/Einsum.cc
index 3c1473aaa..b50f070e7 100644
--- a/runtime/onert/core/src/ir/operation/Einsum.cc
+++ b/runtime/onert/core/src/ir/operation/Einsum.cc
@@ -28,7 +28,7 @@ void Einsum::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Einsum::Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
index f6718b656..e83c26e28 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -15,12 +15,10 @@
  */
 
 #include "ir/operation/ElementwiseActivation.h"
+#include "ir/OperationVisitor.h"
 
-#include <cassert>
 #include <unordered_map>
 
-#include "ir/OperationVisitor.h"
-
 namespace onert
 {
 namespace ir
@@ -33,13 +31,14 @@ void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this);
 ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
                                              const OperandIndexSequence &outputs,
                                              const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
   if (param.op_type == Type::LOGISTIC)
   {
-    assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
-                                                        "sigmoid function(L=1, k=1, x0=0). So, do "
-                                                        "not use alpha and beta");
+    assert(param.alpha == 0.0f && param.beta == 0.0f &&
+           "Logistic will be supported only as "
+           "sigmoid function(L=1, k=1, x0=0). So, do "
+           "not use alpha and beta");
   }
   else if (param.op_type == Type::RELU)
   {
@@ -47,9 +46,10 @@ ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
   }
   else if (param.op_type == Type::TANH)
   {
-    assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
-                                                        "supported only the values of alpha and "
-                                                        "beta are 1.f");
+    assert(param.alpha == 1.0f && param.beta == 1.0f &&
+           "f(x) = alpha * tanh(beta * x), Tanh is "
+           "supported only the values of alpha and "
+           "beta are 1.f");
   }
 }
 
@@ -57,11 +57,11 @@ std::string ElementwiseActivation::name() const
 {
   using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
   static const std::unordered_map<Type, std::string> name_map{
-      {ElementwiseActivationType::ELU, "ELU"},
-      {ElementwiseActivationType::LOGISTIC, "Logistic"},
-      {ElementwiseActivationType::RELU, "ReLU"},
-      {ElementwiseActivationType::TANH, "Tanh"},
-      {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+    {ElementwiseActivationType::ELU, "ELU"},
+    {ElementwiseActivationType::LOGISTIC, "Logistic"},
+    {ElementwiseActivationType::RELU, "ReLU"},
+    {ElementwiseActivationType::TANH, "Tanh"},
+    {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
   return name_map.at(_param.op_type);
 }
 
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
index 3287fc0a3..b22bed7bc 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -15,12 +15,10 @@
  */
 
 #include "ir/operation/ElementwiseBinary.h"
+#include "ir/OperationVisitor.h"
 
-#include <cassert>
 #include <unordered_map>
 
-#include "ir/OperationVisitor.h"
-
 namespace onert
 {
 namespace ir
@@ -32,7 +30,7 @@ void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); }
 
 ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
                                      const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
 {
 }
 
@@ -40,10 +38,11 @@ std::string ElementwiseBinary::name() const
 {
   using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
   static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
-      {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
-      {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
-      {ElementwiseBinaryType::MAX, std::string{"Max"}},
-      {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+    {ElementwiseBinaryType::FLOOR_DIV, std::string{"FloorDiv"}},
+    {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+    {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+    {ElementwiseBinaryType::MAX, std::string{"Max"}},
+    {ElementwiseBinaryType::MIN, std::string{"Min"}}};
   return name_map.at(_param.op_type);
 }
 
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
index 7dfcd4a98..fd463e0fe 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -15,12 +15,10 @@
  */
 
 #include "ir/operation/ElementwiseUnary.h"
+#include "ir/OperationVisitor.h"
 
-#include <cassert>
 #include <unordered_map>
 
-#include "ir/OperationVisitor.h"
-
 namespace onert
 {
 namespace ir
@@ -32,7 +30,9 @@ void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
 
 ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
                                    const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs,
+              OperandConstraint::createExact(1u)},
+    _param{param}
 {
 }
 
@@ -40,23 +40,23 @@ std::string ElementwiseUnary::name() const
 {
   using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
   static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
-      {ElementwiseUnaryType::ABS, std::string{"Abs"}},
-      {ElementwiseUnaryType::CAST, std::string{"Cast"}},
-      {ElementwiseUnaryType::COS, std::string{"Cos"}},
-      {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
-      {ElementwiseUnaryType::ERF, std::string{"Erf"}},
-      {ElementwiseUnaryType::EXP, std::string{"Exp"}},
-      {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
-      {ElementwiseUnaryType::LOG, std::string{"Log"}},
-      {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
-      {ElementwiseUnaryType::NEG, std::string{"Neg"}},
-      {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
-      {ElementwiseUnaryType::ROUND, std::string{"Round"}},
-      {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
-      {ElementwiseUnaryType::SIN, std::string{"Sin"}},
-      {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
-      {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
-      {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+    {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+    {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+    {ElementwiseUnaryType::COS, std::string{"Cos"}},
+    {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+    {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+    {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+    {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+    {ElementwiseUnaryType::LOG, std::string{"Log"}},
+    {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+    {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+    {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+    {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+    {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+    {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+    {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+    {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
+    {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
   return name_map.at(_param.op_type);
 }
 
diff --git a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
index b300b004e..66b80b2c5 100644
--- a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
+++ b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/EmbeddingLookup.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void EmbeddingLookup::accept(OperationVisitor &v) const { v.visit(*this); }
 
 EmbeddingLookup::EmbeddingLookup(const OperandIndexSequence &inputs,
                                  const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/ExpandDims.cc b/runtime/onert/core/src/ir/operation/ExpandDims.cc
index 3f555bd23..e421bc383 100644
--- a/runtime/onert/core/src/ir/operation/ExpandDims.cc
+++ b/runtime/onert/core/src/ir/operation/ExpandDims.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/ExpandDims.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void ExpandDims::accept(OperationVisitor &v) const { v.visit(*this); }
 
 ExpandDims::ExpandDims(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Fill.cc b/runtime/onert/core/src/ir/operation/Fill.cc
index c44f45aab..60355c609 100644
--- a/runtime/onert/core/src/ir/operation/Fill.cc
+++ b/runtime/onert/core/src/ir/operation/Fill.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Fill.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void Fill::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Fill::Fill(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc
index 118ae554a..3533df097 100644
--- a/runtime/onert/core/src/ir/operation/FullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/FullyConnected.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
 
 FullyConnected::FullyConnected(const OperandIndexSequence &inputs,
                                const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc b/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc
index 7b9301ea6..b5679f308 100644
--- a/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc
+++ b/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc
@@ -28,7 +28,7 @@ void FusedBatchNorm::accept(OperationVisitor &v) const { v.visit(*this); }
 
 FusedBatchNorm::FusedBatchNorm(const OperandIndexSequence &inputs,
                                const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Gather.cc b/runtime/onert/core/src/ir/operation/Gather.cc
index 11d46e75b..e0c4630a0 100644
--- a/runtime/onert/core/src/ir/operation/Gather.cc
+++ b/runtime/onert/core/src/ir/operation/Gather.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Gather.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void Gather::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Gather::Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/HashtableLookup.cc b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
index e9a7a82ff..5d1589cd1 100644
--- a/runtime/onert/core/src/ir/operation/HashtableLookup.cc
+++ b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/HashtableLookup.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void HashtableLookup::accept(OperationVisitor &v) const { v.visit(*this); }
 
 HashtableLookup::HashtableLookup(const OperandIndexSequence &inputs,
                                  const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/If.cc b/runtime/onert/core/src/ir/operation/If.cc
index 599751dfd..380c87dbe 100644
--- a/runtime/onert/core/src/ir/operation/If.cc
+++ b/runtime/onert/core/src/ir/operation/If.cc
@@ -24,7 +24,7 @@ namespace operation
 {
 void If::accept(OperationVisitor &v) const { v.visit(*this); }
 If::If(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
 {
 }
 } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/InstanceNorm.cc b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
index 2334560ef..9fb55383e 100644
--- a/runtime/onert/core/src/ir/operation/InstanceNorm.cc
+++ b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/InstanceNorm.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void InstanceNorm::accept(OperationVisitor &v) const { v.visit(*this); }
 
 InstanceNorm::InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                            const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/L2Normalization.cc b/runtime/onert/core/src/ir/operation/L2Normalization.cc
index 9a7d3eb61..6725df596 100644
--- a/runtime/onert/core/src/ir/operation/L2Normalization.cc
+++ b/runtime/onert/core/src/ir/operation/L2Normalization.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/L2Normalization.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void L2Normalization::accept(OperationVisitor &v) const { v.visit(*this); }
 
 L2Normalization::L2Normalization(const OperandIndexSequence &inputs,
                                  const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc
index 30a865326..06e66158b 100644
--- a/runtime/onert/core/src/ir/operation/LSTM.cc
+++ b/runtime/onert/core/src/ir/operation/LSTM.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/LSTM.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,8 +28,16 @@ void LSTM::accept(OperationVisitor &v) const { v.visit(*this); }
 
 LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
            const Param &param)
-    : Operation{OperandConstraint::createExact(23u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createInRange(20u, 24u), inputs, outputs}, _param{param}
+{
+}
+
+std::string LSTM::name() const
 {
+  if (getOutputs().at(Output::SCRATCH_BUFFER).undefined())
+    return std::string{"UnidirectionalSequenceLSTM"};
+  else
+    return Operation::name();
 }
 
 } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
index 1ae97c142..73fca9938 100644
--- a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
+++ b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/LocalResponseNormalization.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -32,7 +29,7 @@ void LocalResponseNormalization::accept(OperationVisitor &v) const { v.visit(*th
 LocalResponseNormalization::LocalResponseNormalization(const OperandIndexSequence &inputs,
                                                        const OperandIndexSequence &outputs,
                                                        const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/LogSoftmax.cc b/runtime/onert/core/src/ir/operation/LogSoftmax.cc
index 73c6580ec..d580e63e1 100644
--- a/runtime/onert/core/src/ir/operation/LogSoftmax.cc
+++ b/runtime/onert/core/src/ir/operation/LogSoftmax.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/LogSoftmax.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void LogSoftmax::accept(OperationVisitor &v) const { v.visit(*this); }
 
 LogSoftmax::LogSoftmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                        const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Loss.cc b/runtime/onert/core/src/ir/operation/Loss.cc
new file mode 100644
index 000000000..fa3520b2c
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Loss.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Loss.h"
+#include "ir/OperationVisitor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Loss::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Loss::Loss(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+           const Param &param)
+  : Operation{OperandConstraint::createAtLeast(2u), inputs, outputs}, _param{param}
+{
+  if (param.op_type == Type::CATEGORICAL_CROSSENTROPY)
+  {
+    assert(inputs.size() == 2 && "CategoricalCrossentropy Loss has 2 inputs");
+  }
+}
+
+std::string Loss::name() const
+{
+  using LossType = onert::ir::operation::Loss::Type;
+  static const std::unordered_map<Type, std::string> name_map{
+    {LossType::MEAN_SQUARED_ERROR, "MeanSquaredError Loss"},
+    {LossType::CATEGORICAL_CROSSENTROPY, "CategoricalCrossentropy Loss"}};
+  return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LowerInfo.cc b/runtime/onert/core/src/ir/operation/LowerInfo.cc
deleted file mode 100644
index 249918bd6..000000000
--- a/runtime/onert/core/src/ir/operation/LowerInfo.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LowerInfo.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-LowerInfo::LowerInfo(const backend::Backend *backend, Layout layout)
-    : _permute_factor{backend, layout}
-{
-  // DO NOTHING
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
index bac31f13e..e52bddc1f 100644
--- a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
+++ b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/MatrixBandPart.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void MatrixBandPart::accept(OperationVisitor &v) const { v.visit(*this); }
 
 MatrixBandPart::MatrixBandPart(const OperandIndexSequence &inputs,
                                const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/OneHot.cc b/runtime/onert/core/src/ir/operation/OneHot.cc
index 22935e7d6..90898f1ed 100644
--- a/runtime/onert/core/src/ir/operation/OneHot.cc
+++ b/runtime/onert/core/src/ir/operation/OneHot.cc
@@ -28,7 +28,7 @@ void OneHot::accept(OperationVisitor &v) const { v.visit(*this); }
 
 OneHot::OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/PReLU.cc b/runtime/onert/core/src/ir/operation/PReLU.cc
index a2e37e0ad..87bd12e60 100644
--- a/runtime/onert/core/src/ir/operation/PReLU.cc
+++ b/runtime/onert/core/src/ir/operation/PReLU.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/PReLU.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void PReLU::accept(OperationVisitor &v) const { v.visit(*this); }
 
 PReLU::PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Pack.cc b/runtime/onert/core/src/ir/operation/Pack.cc
index f0908a2c6..00feadfb0 100644
--- a/runtime/onert/core/src/ir/operation/Pack.cc
+++ b/runtime/onert/core/src/ir/operation/Pack.cc
@@ -25,7 +25,7 @@ namespace operation
 void Pack::accept(OperationVisitor &v) const { v.visit(*this); }
 Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
            const Param &param)
-    : Operation{OperandConstraint::createAtLeast(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
 {
 }
 } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc
index 0c56e92e3..a3f2d9752 100644
--- a/runtime/onert/core/src/ir/operation/Pad.cc
+++ b/runtime/onert/core/src/ir/operation/Pad.cc
@@ -30,7 +30,7 @@ void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
 // PAD: 2 inputs
 // PADV2: 3 inputs
 Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
+  : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Permute.cc b/runtime/onert/core/src/ir/operation/Permute.cc
index eefb6c542..813fbaf30 100644
--- a/runtime/onert/core/src/ir/operation/Permute.cc
+++ b/runtime/onert/core/src/ir/operation/Permute.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Permute.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void Permute::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Permute::Permute(const OperandIndex &input, const OperandIndex &output, Type type)
-    : Operation{OperandConstraint::createExact(1u)}, _type{type}
+  : Operation{OperandConstraint::createExact(1u)}, _type{type}
 {
   setInputs({input});
   setOutputs({output});
diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc
index 761d14c3d..e32b876e6 100644
--- a/runtime/onert/core/src/ir/operation/Pool2D.cc
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -15,12 +15,10 @@
  */
 
 #include "ir/operation/Pool2D.h"
+#include "ir/OperationVisitor.h"
 
-#include <cassert>
 #include <unordered_map>
 
-#include "ir/OperationVisitor.h"
-
 namespace onert
 {
 namespace ir
@@ -32,7 +30,7 @@ void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
@@ -40,9 +38,9 @@ std::string Pool2D::name() const
 {
   using PoolType = onert::ir::operation::Pool2D::PoolType;
   static const std::unordered_map<PoolType, std::string> name_map{
-      {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
-      {PoolType::L2, "L2" + std::string{toString(opcode())}},
-      {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+    {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+    {PoolType::L2, "L2" + std::string{toString(opcode())}},
+    {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
   return name_map.at(_param.op_type);
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Pow.cc b/runtime/onert/core/src/ir/operation/Pow.cc
index 940b1391a..f7c159a12 100644
--- a/runtime/onert/core/src/ir/operation/Pow.cc
+++ b/runtime/onert/core/src/ir/operation/Pow.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Pow.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void Pow::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Pow::Pow(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/RNN.cc b/runtime/onert/core/src/ir/operation/RNN.cc
index 298c5e745..988a50669 100644
--- a/runtime/onert/core/src/ir/operation/RNN.cc
+++ b/runtime/onert/core/src/ir/operation/RNN.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/RNN.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void RNN::accept(OperationVisitor &v) const { v.visit(*this); }
 
 RNN::RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
          const Param &param)
-    : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Range.cc b/runtime/onert/core/src/ir/operation/Range.cc
index 96ab04c1b..8ced92a0b 100644
--- a/runtime/onert/core/src/ir/operation/Range.cc
+++ b/runtime/onert/core/src/ir/operation/Range.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Range.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void Range::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Range::Range(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Rank.cc b/runtime/onert/core/src/ir/operation/Rank.cc
index c357e9018..40797bf29 100644
--- a/runtime/onert/core/src/ir/operation/Rank.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Rank.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Reduce.cc b/runtime/onert/core/src/ir/operation/Reduce.cc
index d6a1d953c..8da1940fa 100644
--- a/runtime/onert/core/src/ir/operation/Reduce.cc
+++ b/runtime/onert/core/src/ir/operation/Reduce.cc
@@ -15,12 +15,10 @@
  */
 
 #include "ir/operation/Reduce.h"
+#include "ir/OperationVisitor.h"
 
-#include <cassert>
 #include <unordered_map>
 
-#include "ir/OperationVisitor.h"
-
 namespace onert
 {
 namespace ir
@@ -32,7 +30,7 @@ void Reduce::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Reduce::Reduce(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
 {
 }
 
@@ -40,13 +38,13 @@ std::string Reduce::name() const
 {
   using ReduceType = onert::ir::operation::Reduce::ReduceType;
   static const std::unordered_map<ReduceType, std::string> name_map{
-      {ReduceType::ALL, std::string{toString(opcode())} + "All"},
-      {ReduceType::ANY, std::string{toString(opcode())} + "Any"},
-      {ReduceType::MAX, std::string{toString(opcode())} + "Max"},
-      {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"},
-      {ReduceType::MIN, std::string{toString(opcode())} + "Min"},
-      {ReduceType::PROD, std::string{toString(opcode())} + "Prod"},
-      {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}};
+    {ReduceType::ALL, std::string{toString(opcode())} + "All"},
+    {ReduceType::ANY, std::string{toString(opcode())} + "Any"},
+    {ReduceType::MAX, std::string{toString(opcode())} + "Max"},
+    {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"},
+    {ReduceType::MIN, std::string{toString(opcode())} + "Min"},
+    {ReduceType::PROD, std::string{toString(opcode())} + "Prod"},
+    {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}};
   return name_map.at(_param.reduce_type);
   //  return std::string(toString(opcode())) + reduce_type_str_map.at(_param.reduce_type);
 }
diff --git a/runtime/onert/core/src/ir/operation/Reshape.cc b/runtime/onert/core/src/ir/operation/Reshape.cc
index 92aa89ac6..0ed4affa1 100644
--- a/runtime/onert/core/src/ir/operation/Reshape.cc
+++ b/runtime/onert/core/src/ir/operation/Reshape.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Reshape.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void Reshape::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Reshape::Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                  const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param)
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param)
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
index d0d89f45f..7d256f447 100644
--- a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/ResizeBilinear.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void ResizeBilinear::accept(OperationVisitor &v) const { v.visit(*this); }
 
 ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs,
                                const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
index 9f17af97c..58be87b95 100644
--- a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/ResizeNearestNeighbor.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -32,7 +29,7 @@ void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this);
 ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
                                              const OperandIndexSequence &outputs,
                                              const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Reverse.cc b/runtime/onert/core/src/ir/operation/Reverse.cc
index 4b3c1e1af..6c3746426 100644
--- a/runtime/onert/core/src/ir/operation/Reverse.cc
+++ b/runtime/onert/core/src/ir/operation/Reverse.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Reverse.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void Reverse::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Reverse::Reverse(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Select.cc b/runtime/onert/core/src/ir/operation/Select.cc
index 1f22b5234..59684190c 100644
--- a/runtime/onert/core/src/ir/operation/Select.cc
+++ b/runtime/onert/core/src/ir/operation/Select.cc
@@ -28,7 +28,7 @@ namespace operation
 void Select::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Select::Select(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Shape.cc b/runtime/onert/core/src/ir/operation/Shape.cc
index 2a63d6dcf..f90924488 100644
--- a/runtime/onert/core/src/ir/operation/Shape.cc
+++ b/runtime/onert/core/src/ir/operation/Shape.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Shape.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void Shape::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Shape::Shape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Slice.cc b/runtime/onert/core/src/ir/operation/Slice.cc
index 888b563fb..1362c0f91 100644
--- a/runtime/onert/core/src/ir/operation/Slice.cc
+++ b/runtime/onert/core/src/ir/operation/Slice.cc
@@ -27,7 +27,7 @@ namespace operation
 void Slice::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Slice::Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Softmax.cc b/runtime/onert/core/src/ir/operation/Softmax.cc
index 3f1aa0af1..c06c85309 100644
--- a/runtime/onert/core/src/ir/operation/Softmax.cc
+++ b/runtime/onert/core/src/ir/operation/Softmax.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Softmax.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void Softmax::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Softmax::Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                  const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
index 53fab4fa9..94acccb0c 100644
--- a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/SpaceToBatchND.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void SpaceToBatchND::accept(OperationVisitor &v) const { v.visit(*this); }
 
 SpaceToBatchND::SpaceToBatchND(const OperandIndexSequence &inputs,
                                const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
index d8a45aee5..08e7e5190 100644
--- a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/SpaceToDepth.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void SpaceToDepth::accept(OperationVisitor &v) const { v.visit(*this); }
 
 SpaceToDepth::SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                            const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc
index 244884e41..3e371188d 100644
--- a/runtime/onert/core/src/ir/operation/Split.cc
+++ b/runtime/onert/core/src/ir/operation/Split.cc
@@ -13,9 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "ir/operation/Split.h"
-#include <cassert>
 #include "ir/OperationVisitor.h"
+
 namespace onert
 {
 namespace ir
@@ -25,7 +26,7 @@ namespace operation
 void Split::accept(OperationVisitor &v) const { v.visit(*this); }
 Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
              const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
 {
 }
 } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/SplitV.cc b/runtime/onert/core/src/ir/operation/SplitV.cc
index e638c9ac9..be13f167e 100644
--- a/runtime/onert/core/src/ir/operation/SplitV.cc
+++ b/runtime/onert/core/src/ir/operation/SplitV.cc
@@ -13,9 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "ir/operation/SplitV.h"
-#include <cassert>
 #include "ir/OperationVisitor.h"
+
 namespace onert
 {
 namespace ir
@@ -25,7 +26,7 @@ namespace operation
 void SplitV::accept(OperationVisitor &v) const { v.visit(*this); }
 SplitV::SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
 {
 }
 } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/SquaredDifference.cc b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
index 49e58aaf2..db93903c7 100644
--- a/runtime/onert/core/src/ir/operation/SquaredDifference.cc
+++ b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/SquaredDifference.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void SquaredDifference::accept(OperationVisitor &v) const { v.visit(*this); }
 
 SquaredDifference::SquaredDifference(const OperandIndexSequence &inputs,
                                      const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Squeeze.cc b/runtime/onert/core/src/ir/operation/Squeeze.cc
index 8cf928fb4..e059c4bee 100644
--- a/runtime/onert/core/src/ir/operation/Squeeze.cc
+++ b/runtime/onert/core/src/ir/operation/Squeeze.cc
@@ -28,7 +28,7 @@ void Squeeze::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Squeeze::Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                  const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
index cbb0ff251..94be0be86 100644
--- a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
+++ b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/StatelessRandomUniform.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ void StatelessRandomUniform::accept(OperationVisitor &v) const { v.visit(*this);
 
 StatelessRandomUniform::StatelessRandomUniform(const OperandIndexSequence &inputs,
                                                const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/StridedSlice.cc b/runtime/onert/core/src/ir/operation/StridedSlice.cc
index 2a7905995..a38282c93 100644
--- a/runtime/onert/core/src/ir/operation/StridedSlice.cc
+++ b/runtime/onert/core/src/ir/operation/StridedSlice.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/StridedSlice.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void StridedSlice::accept(OperationVisitor &v) const { v.visit(*this); }
 
 StridedSlice::StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                            const Param &param)
-    : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Tile.cc b/runtime/onert/core/src/ir/operation/Tile.cc
index 5ba3df2ad..51c1ff1dc 100644
--- a/runtime/onert/core/src/ir/operation/Tile.cc
+++ b/runtime/onert/core/src/ir/operation/Tile.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Tile.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -30,7 +27,7 @@ namespace operation
 void Tile::accept(OperationVisitor &v) const { v.visit(*this); }
 
 Tile::Tile(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/TopKV2.cc b/runtime/onert/core/src/ir/operation/TopKV2.cc
index a5e6c6a85..e1723d180 100644
--- a/runtime/onert/core/src/ir/operation/TopKV2.cc
+++ b/runtime/onert/core/src/ir/operation/TopKV2.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/TopKV2.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void TopKV2::accept(OperationVisitor &v) const { v.visit(*this); }
 
 TopKV2::TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc
index 3a663fbce..dbc5ef2aa 100644
--- a/runtime/onert/core/src/ir/operation/Transpose.cc
+++ b/runtime/onert/core/src/ir/operation/Transpose.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/Transpose.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -29,9 +26,8 @@ namespace operation
 
 void Transpose::accept(OperationVisitor &v) const { v.visit(*this); }
 
-Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-                     const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+  : Operation{OperandConstraint::createExact(2u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/TransposeConv.cc b/runtime/onert/core/src/ir/operation/TransposeConv.cc
index 7f29ca44e..944cc365d 100644
--- a/runtime/onert/core/src/ir/operation/TransposeConv.cc
+++ b/runtime/onert/core/src/ir/operation/TransposeConv.cc
@@ -15,9 +15,6 @@
  */
 
 #include "ir/operation/TransposeConv.h"
-
-#include <cassert>
-
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -31,7 +28,7 @@ void TransposeConv::accept(OperationVisitor &v) const { v.visit(*this); }
 
 TransposeConv::TransposeConv(const OperandIndexSequence &inputs,
                              const OperandIndexSequence &outputs, const Param &param)
-    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Unpack.cc b/runtime/onert/core/src/ir/operation/Unpack.cc
index 67aa54ab5..185eddce3 100644
--- a/runtime/onert/core/src/ir/operation/Unpack.cc
+++ b/runtime/onert/core/src/ir/operation/Unpack.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "ir/operation/Unpack.h"
 #include "ir/OperationVisitor.h"
 
@@ -25,7 +26,7 @@ namespace operation
 void Unpack::accept(OperationVisitor &v) const { v.visit(*this); }
 Unpack::Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
                const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/While.cc b/runtime/onert/core/src/ir/operation/While.cc
index 2505c60e3..f35996b07 100644
--- a/runtime/onert/core/src/ir/operation/While.cc
+++ b/runtime/onert/core/src/ir/operation/While.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "ir/operation/While.h"
 #include "ir/OperationVisitor.h"
 
@@ -25,7 +26,7 @@ namespace operation
 void While::accept(OperationVisitor &v) const { v.visit(*this); }
 While::While(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
              const Param &param)
-    : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+  : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
 {
 }
 } // namespace operation
diff --git a/runtime/onert/core/src/ir/train/TrainableGraph.cc b/runtime/onert/core/src/ir/train/TrainableGraph.cc
new file mode 100644
index 000000000..781f04956
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/TrainableGraph.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/TrainableGraph.h"
+#include "util/Utils.h"
+
+#include <algorithm>
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+
+TrainableGraph::TrainableGraph() : _graph{} {}
+
+TrainableGraph::TrainableGraph(const TrainableGraph &tgraph)
+  : _graph{tgraph._graph}, _derivatives{tgraph._derivatives}, _losses{tgraph._losses}
+{
+  tgraph.operations().iterate(
+    [&](const onert::ir::OperationIndex &index, const onert::ir::IOperation &op) {
+      replaceOperation(index, dynamic_cast<const ITrainableOperation &>(op).clone());
+    });
+}
+
+TrainableGraph::TrainableGraph(const Graph &graph) : _graph{graph} {}
+
+OperandIndex TrainableGraph::addOperand(const Shape &shape, const TypeInfo &type)
+{
+  return _graph.addOperand(shape, type);
+}
+
+OperandIndex TrainableGraph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand)
+{
+  return _graph.addOperand(index, std::move(operand));
+}
+
+OperationIndex TrainableGraph::addOperation(std::unique_ptr<ITrainableOperation> &&operation)
+{
+  return _graph.addOperation(std::move(operation));
+}
+
+OperationIndex TrainableGraph::replaceOperation(OperationIndex index,
+                                                std::unique_ptr<ITrainableOperation> &&operation)
+{
+  return _graph.replaceOperation(index, std::move(operation));
+}
+
+OperandIndex TrainableGraph::addDerivative(OperandIndex index,
+                                           std::unique_ptr<Operand> &&derivative)
+{
+  return _derivatives.push(std::move(derivative), index);
+}
+
+IOIndex TrainableGraph::getInputIndex(const std::string &name) const
+{
+  return _graph.getInputIndex(name);
+}
+
+IOIndex TrainableGraph::getOutputIndex(const std::string &name) const
+{
+  return _graph.getOutputIndex(name);
+}
+
+void TrainableGraph::changeShape(const OperandIndex &index, const ir::Shape &new_shape)
+{
+  _graph.changeShape(index, new_shape);
+}
+
+void TrainableGraph::changeDerivativeShape(const OperandIndex &index, const ir::Shape &new_shape)
+{
+  assert(_derivatives.exist(index));
+  _derivatives.at(index).info().shape(new_shape);
+}
+
+void TrainableGraph::addInput(const OperandIndex &ind, const std::string &name)
+{
+  _graph.addInput(ind, name);
+}
+
+void TrainableGraph::addOutput(const OperandIndex &ind, const std::string &name)
+{
+  _graph.addOutput(ind, name);
+}
+
+void TrainableGraph::verify(void) const
+{
+  _graph.verify();
+
+  operations().iterate([](const onert::ir::OperationIndex &, const onert::ir::IOperation &op) {
+    try
+    {
+      UNUSED_RELEASE(dynamic_cast<const onert::ir::train::ITrainableOperation &>(op));
+    }
+    catch (const std::bad_cast &)
+    {
+      std::runtime_error("TrainableGraph: " + op.name() + " is not a trainable operation");
+    }
+  });
+}
+
+void TrainableGraph::removeOperand(const OperandIndex &ind) { _graph.removeOperand(ind); }
+
+void TrainableGraph::setLayout(Layout layout) { _graph.setLayout(layout); }
+
+const ITrainableOperation &TrainableGraph::operation(OperationIndex index) const
+{
+  // NOTE Virtual inherited objects cannot be static_casted.
+  return dynamic_cast<const ITrainableOperation &>(_graph.operations().at(index));
+}
+
+std::vector<ir::OperationIndex> TrainableGraph::topolSortOperations() const
+{
+  return _graph.topolSortOperations();
+}
+
+void TrainableGraph::addLoss(const OperandIndex &loss_ind, const IOIndex &pred_ioind)
+{
+  _losses.emplace(pred_ioind, loss_ind);
+}
+
+OperandIndex TrainableGraph::getLossIndex(const IOIndex &pred_ioind) const
+{
+  auto itr = _losses.find(pred_ioind);
+  return (itr == _losses.end()) ? OperandIndex{} : itr->second;
+}
+
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Conv2D.cc b/runtime/onert/core/src/ir/train/operation/Conv2D.cc
new file mode 100644
index 000000000..923861ae3
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Conv2D.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Conv2D.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Conv2D::clone() const
+{
+  return std::make_unique<Conv2D>(*this);
+}
+
+void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Conv2D::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Conv2D::Conv2D(const OperationType &operation)
+  : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+  // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc
new file mode 100644
index 000000000..1dae3f674
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/ElementwiseActivation.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> ElementwiseActivation::clone() const
+{
+  return std::make_unique<ElementwiseActivation>(*this);
+}
+
+void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void ElementwiseActivation::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseActivation::ElementwiseActivation(const OperationType &operation)
+  : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+  // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/FullyConnected.cc b/runtime/onert/core/src/ir/train/operation/FullyConnected.cc
new file mode 100644
index 000000000..a26f7c489
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/FullyConnected.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/FullyConnected.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> FullyConnected::clone() const
+{
+  return std::make_unique<FullyConnected>(*this);
+}
+
+void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void FullyConnected::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+FullyConnected::FullyConnected(const OperationType &operation)
+  : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+  // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Loss.cc b/runtime/onert/core/src/ir/train/operation/Loss.cc
new file mode 100644
index 000000000..abd79929b
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Loss.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Loss.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Loss::clone() const { return std::make_unique<Loss>(*this); }
+
+void Loss::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Loss::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Loss::Loss(const OperationType &operation)
+  : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+  // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Permute.cc b/runtime/onert/core/src/ir/train/operation/Permute.cc
new file mode 100644
index 000000000..adc23aa49
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Permute.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Permute.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Permute::clone() const
+{
+  return std::make_unique<Permute>(*this);
+}
+
+void Permute::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Permute::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Permute::Permute(const OperationType &operation)
+  : OperationType{operation.getInputs().at(0), operation.getOutputs().at(0),
+                  operation.getPermuteType()}
+{
+  // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Pool2D.cc b/runtime/onert/core/src/ir/train/operation/Pool2D.cc
new file mode 100644
index 000000000..021574f19
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Pool2D.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Pool2D.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Pool2D::clone() const
+{
+  return std::make_unique<Pool2D>(*this);
+}
+
+void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Pool2D::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Pool2D::Pool2D(const OperationType &operation)
+  : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+  // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Reshape.cc b/runtime/onert/core/src/ir/train/operation/Reshape.cc
new file mode 100644
index 000000000..c76158607
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Reshape.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Reshape.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Reshape::clone() const
+{
+  return std::make_unique<Reshape>(*this);
+}
+
+void Reshape::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Reshape::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Reshape::Reshape(const OperationType &operation)
+  : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+  // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Softmax.cc b/runtime/onert/core/src/ir/train/operation/Softmax.cc
new file mode 100644
index 000000000..dbd403879
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Softmax.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Softmax.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Softmax::clone() const
+{
+  return std::make_unique<Softmax>(*this);
+}
+
+void Softmax::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Softmax::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Softmax::Softmax(const OperationType &operation)
+  : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+  // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.cc
index 09cbdcf2f..6260d29ff 100644
--- a/runtime/onert/core/src/ir/verifier/Verifier.cc
+++ b/runtime/onert/core/src/ir/verifier/Verifier.cc
@@ -39,11 +39,11 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
 
   OperationIndexMap<bool> visited;
   operations.iterate(
-      [&](const OperationIndex &index, const Operation &) { visited[index] = false; });
+    [&](const OperationIndex &index, const IOperation &) { visited[index] = false; });
   OperationIndexMap<bool> on_stack = visited; // Copy from visited
 
-  std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive =
-      [&](const OperationIndex &index, const Operation &node) -> void {
+  std::function<void(const OperationIndex &index, const IOperation &)> dfs_recursive =
+    [&](const OperationIndex &index, const IOperation &node) -> void {
     if (on_stack[index])
       cyclic = true;
     if (visited[index])
@@ -51,7 +51,7 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
     visited[index] = true;
     on_stack[index] = true;
 
-    for (auto output : node.getOutputs() | Remove::DUPLICATED)
+    for (auto &&output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
     {
       const auto &operand = graph.operands().at(output);
       for (const auto &use : operand.getUses())
@@ -72,12 +72,12 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
 // EdgeConsistencyVerifier
 //
 
-bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
+bool EdgeChecker::verify(const Graph &graph) const noexcept
 {
   auto &operations = graph.operations();
   uint32_t errors = 0;
-  operations.iterate([&](const OperationIndex &index, const Operation &node) {
-    for (auto operand_index : node.getInputs() | ir::Remove::UNDEFINED)
+  operations.iterate([&](const OperationIndex &index, const IOperation &node) {
+    for (auto &&operand_index : node.getInputs() | ir::Remove::UNDEFINED)
     {
       try
       {
@@ -85,48 +85,60 @@ bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
         bool operand_has_use = operand.getUses().contains(index);
         if (!operand_has_use)
         {
-          VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
-                                          << operand_index << " to Operation " << index
-                                          << std::endl;
+          VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
+                               << operand_index << " to Operation " << index << std::endl;
           errors += 1;
         }
       }
       catch (const std::out_of_range &e)
       {
-        VERBOSE(EdgeConsistencyChecker)
-            << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
-            << operand_index << ", but the operand object is not present in the graph" << std::endl;
+        VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index
+                             << " has Operand " << operand_index
+                             << ", but the operand object is not present in the graph" << std::endl;
         errors += 1;
       }
     }
-    for (auto operand_index : node.getOutputs())
+    for (auto &&operand_index : node.getOutputs() | ir::Remove::UNDEFINED)
     {
       try
       {
         auto &operand = graph.operands().at(operand_index);
         if (operand.getDef() != index)
         {
-          VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
-                                          << operand_index << " to Operation " << index
-                                          << std::endl;
+          VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
+                               << operand_index << " to Operation " << index << std::endl;
           errors += 1;
         }
       }
       catch (const std::out_of_range &e)
       {
-        VERBOSE(EdgeConsistencyChecker)
-            << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
-            << operand_index << ", but the operand object is not present in the graph" << std::endl;
+        VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index
+                             << " has Operand " << operand_index
+                             << ", but the operand object is not present in the graph" << std::endl;
         errors += 1;
       }
     }
   });
 
-  VERBOSE(EdgeConsistencyChecker) << "Total Number of errors : " << errors << std::endl;
+  VERBOSE(EdgeChecker) << "Total Number of errors : " << errors << std::endl;
 
   return errors == 0;
 }
 
+bool InputOutputChecker::verify(const Graph &graph) const noexcept
+{
+  for (auto &&operand_ind :
+       (graph.getInputs() + graph.getOutputs()) | Remove::DUPLICATED | Remove::UNDEFINED)
+  {
+    if (!graph.operands().exist(operand_ind))
+    {
+      VERBOSE(InputOutputChecker) << "Input or Output tensor " << operand_ind << " does not exist.";
+      return false;
+    }
+  }
+  return true;
+}
+
 } // namespace verifier
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.h b/runtime/onert/core/src/ir/verifier/Verifier.h
index 0c7b57b04..fa1311983 100644
--- a/runtime/onert/core/src/ir/verifier/Verifier.h
+++ b/runtime/onert/core/src/ir/verifier/Verifier.h
@@ -55,7 +55,16 @@ public:
   bool verify(const Graph &graph) const noexcept override;
 };
 
-class EdgeConsistencyChecker : public IVerifier
+class EdgeChecker : public IVerifier
+{
+public:
+  bool verify(const Graph &graph) const noexcept override;
+};
+
+/**
+ * @brief Check model input and output operands are really exist in the graph
+ */
+class InputOutputChecker : public IVerifier
 {
 public:
   bool verify(const Graph &graph) const noexcept override;
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.test.cc b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
new file mode 100644
index 000000000..1ec71cd55
--- /dev/null
+++ b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Verifier.h"
+
+#include "../MockNode.h"
+
+#include "ir/Graph.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+TEST(Verifier, dag_checker)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
+
+  onert::ir::verifier::DAGChecker verifier;
+
+  ASSERT_TRUE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_1)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+  auto op_ind = graph.addOperation(std::move(mock_op));
+
+  graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
+
+  onert::ir::verifier::EdgeChecker verifier;
+  ASSERT_FALSE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_2)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+  auto mock_op_ptr = mock_op.get();
+  auto op_ind = graph.addOperation(std::move(mock_op));
+
+  mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
+
+  onert::ir::verifier::EdgeChecker verifier;
+  ASSERT_FALSE(verifier.verify(graph));
+}
diff --git a/runtime/onert/core/src/odc/QuantizeManager.cc b/runtime/onert/core/src/odc/QuantizeManager.cc
new file mode 100644
index 000000000..71572a7e0
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizeManager.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizerLoader.h"
+#include "odc/QuantizeManager.h"
+
+#include <iostream>
+#include <mutex>
+
+namespace onert
+{
+namespace odc
+{
+
+bool QuantizeManager::quantize()
+{
+  // Compile function is thread-unsafe
+  static std::mutex lock;
+  std::lock_guard<std::mutex> guard(lock);
+
+  if (_export_model_path.empty())
+    throw std::runtime_error("Export model path is not set");
+
+  auto &quantize_loader = QuantizerLoader::instance();
+  if (quantize_loader.loadLibrary() != 0)
+    return false;
+
+  auto quantizer = quantize_loader.get();
+  auto result = quantizer->quantize(_model_path.c_str(), _export_model_path.c_str(), _is_q16);
+
+  // TODO Unload quantize library to reduce memory usage
+
+  return (result == 0);
+}
+
+} // namespace odc
+} // namespace onert
diff --git a/runtime/onert/core/src/odc/QuantizeManager.test.cc b/runtime/onert/core/src/odc/QuantizeManager.test.cc
new file mode 100644
index 000000000..4e155a6ef
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizeManager.test.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "odc/QuantizeManager.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::odc;
+
+// Test export model path is not set
+TEST(odc_QuantizeManager, neg_export_model_path)
+{
+  QuantizeManager manager("model_path");
+  ASSERT_THROW(manager.quantize(), std::runtime_error);
+}
+
+// Test invalid model path
+TEST(odc_QuantizeManager, neg_invalid_model_path)
+{
+  QuantizeManager manager("invalid_model_path.circle");
+  manager.exportModelPath("export_model_path.circle");
+  ASSERT_EQ(manager.quantize(), false);
+}
diff --git a/runtime/onert/core/src/odc/QuantizerLoader.cc b/runtime/onert/core/src/odc/QuantizerLoader.cc
new file mode 100644
index 000000000..8a972e97e
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizerLoader.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizerLoader.h"
+
+#include <dlfcn.h>
+#include <iostream>
+#include <string>
+
+static const char *SHARED_LIB_EXT =
+#if defined(__APPLE__) && defined(__MACH__)
+  ".dylib";
+#else
+  ".so";
+#endif
+
+namespace onert
+{
+namespace odc
+{
+
+QuantizerLoader &QuantizerLoader::instance()
+{
+  static QuantizerLoader singleton;
+  return singleton;
+}
+
+int32_t QuantizerLoader::loadLibrary()
+{
+  if (get() != nullptr)
+    return 0;
+
+  const std::string quantize_so = std::string("libonert_odc") + SHARED_LIB_EXT;
+  void *handle = dlopen(quantize_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+  auto dlerror_msg = dlerror();
+
+  if (handle == nullptr)
+  {
+    std::cerr << "Failed to load " << quantize_so << std::endl;
+    std::cerr << dlerror_msg << std::endl;
+    return 1;
+  }
+
+  {
+    const char *factory_name = "create_quantizer";
+    auto factory = (factory_t)dlsym(handle, factory_name);
+    dlerror_msg = dlerror();
+
+    if (factory == nullptr)
+    {
+      std::cerr << "QuantizerLoader: unable to find function " << factory_name << dlerror_msg
+                << std::endl;
+      dlclose(handle);
+      return 1;
+    }
+
+    auto destroyer = (quantizer_destory_t)dlsym(handle, "destroy_quantizer");
+    _quantizer = std::unique_ptr<IQuantizer, quantizer_destory_t>(factory(), destroyer);
+
+    if (_quantizer == nullptr)
+    {
+      std::cerr << "QuantizerLoader: unable to create quantizer" << std::endl;
+      dlclose(handle);
+      return 1;
+    }
+  }
+
+  // Save quantize library handle (avoid warning by handle lost without dlclose())
+  // clang-format off
+  _dlhandle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [filename = quantize_so](void *h) {
+    if (dlclose(h) != 0)
+      std::cerr << "Failed to unload backend " << filename << std::endl;
+  }};
+  // clang-format on
+
+  return 0;
+}
+
+int32_t QuantizerLoader::unloadLibrary()
+{
+  if (get() == nullptr)
+    return 0;
+
+  _quantizer.reset(nullptr);
+  _dlhandle.reset(nullptr);
+
+  return 0;
+}
+
+} // namespace odc
+} // namespace onert
diff --git a/runtime/onert/core/src/odc/QuantizerLoader.h b/runtime/onert/core/src/odc/QuantizerLoader.h
new file mode 100644
index 000000000..36a9f2996
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizerLoader.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_ODC_QUANTIZER_LOADER_H__
+#define __ONERT_ODC_QUANTIZER_LOADER_H__
+
+#include "odc/IQuantizer.h"
+
+#include <functional>
+#include <memory>
+
+namespace onert
+{
+namespace odc
+{
+
+/**
+ * @brief Class to manage loading and unloading of dynamic library containing
+ *        implementation of IQuantizer interface
+ */
+class QuantizerLoader
+{
+public:
+  /**
+   * @brief Typedef for function pointer to destroy loaded library handle
+   */
+  using dlhandle_destroy_t = std::function<void(void *)>;
+  /**
+   * @brief Typedef for function pointer to create instance of IQuantizer
+   */
+  using factory_t = IQuantizer *(*)();
+  /**
+   * @brief Typedef for function pointer to destroy instance of IQuantizer
+   */
+  using quantizer_destory_t = void (*)(IQuantizer *);
+
+  /**
+   * @brief   Get singleton instance of QuantizerLoader
+   * @return  Reference to singleton instance of QuantizerLoader
+   */
+  static QuantizerLoader &instance();
+
+private:
+  // Cannot create instance of QuantizerLoader outside of this class
+  QuantizerLoader() = default;
+  QuantizerLoader(QuantizerLoader const &) = delete;
+  QuantizerLoader &operator=(QuantizerLoader const &) = delete;
+  ~QuantizerLoader() = default;
+
+public:
+  /**
+   * @brief   Load dynamic library containing implementation of IQuantizer
+   * @return  0 if success, otherwise errno value
+   */
+  int32_t loadLibrary();
+  /**
+   * @brief  Unload dynamic library containing implementation of IQuantizer
+   * @return 0 if success, otherwise errno value
+   */
+  int32_t unloadLibrary();
+  /**
+   * @brief   Get instance of IQuantizer created through factory method
+   * @return  Pointer to instance of IQuantizer
+   */
+  IQuantizer *get() const { return _quantizer.get(); }
+
+private:
+  // Note: Keep handle to avoid svace warning of "handle lost without dlclose()"
+  std::unique_ptr<void, dlhandle_destroy_t> _dlhandle;
+  std::unique_ptr<IQuantizer, quantizer_destory_t> _quantizer{nullptr, nullptr};
+};
+
+} // namespace odc
+} // namespace onert
+
+#endif // __ONERT_ODC_QUANTIZER_LOADER_H__
diff --git a/runtime/onert/core/src/odc/QuantizerLoader.test.cc b/runtime/onert/core/src/odc/QuantizerLoader.test.cc
new file mode 100644
index 000000000..112e65b27
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizerLoader.test.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizerLoader.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::odc;
+
+// Test QuantizerLoader singleton
+TEST(odc_QuantizerLoader, singleton)
+{
+  QuantizerLoader &loader1 = QuantizerLoader::instance();
+  QuantizerLoader &loader2 = QuantizerLoader::instance();
+  ASSERT_EQ(&loader1, &loader2);
+}
+
+// Test load quantizer library
+TEST(odc_QuantizerLoader, load)
+{
+  QuantizerLoader &loader = QuantizerLoader::instance();
+  // Unload because it may be loaded on previous tests
+  ASSERT_EQ(loader.unloadLibrary(), 0);
+
+  if (loader.loadLibrary() == 0)
+  {
+    // Load twice to check if it is thread-safe
+    ASSERT_EQ(loader.loadLibrary(), 0);
+  }
+}
+
+// Get quantizer function without loading quantizer library
+TEST(odc_QuantizerLoader, neg_get)
+{
+  QuantizerLoader &loader = QuantizerLoader::instance();
+  // Unload because it may be loaded on previous tests
+  ASSERT_EQ(loader.unloadLibrary(), 0);
+  ASSERT_EQ(loader.get(), nullptr);
+}
+
+// Check quantizer function pointer when QuantizerLoader is unloaded
+TEST(odc_QuantizerLoader, neg_unload)
+{
+  QuantizerLoader &loader = QuantizerLoader::instance();
+  if (loader.loadLibrary() == 0)
+    ASSERT_NE(loader.get(), nullptr);
+
+  ASSERT_EQ(loader.unloadLibrary(), 0);
+  ASSERT_EQ(loader.get(), nullptr);
+}
diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
new file mode 100644
index 000000000..c3f5179df
--- /dev/null
+++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EventWriter.h"
+
+#include <cassert>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+// json type for ChromeTracingWriter
+namespace
+{
+
+std::string quote(const std::string &value)
+{
+  std::stringstream ss;
+  ss << '"' << value << '"';
+  return ss.str();
+}
+
+std::string field(const std::string &k, const std::string &v)
+{
+  std::stringstream ss;
+  ss << quote(k) << " : " << quote(v);
+  return ss.str();
+}
+
+struct Content // One Entry in Chrome Event Trace
+{
+  std::vector<std::pair<std::string, std::string>> flds;
+  std::vector<std::pair<std::string, std::string>> args;
+};
+
+std::string object(const Content &content)
+{
+  std::stringstream ss;
+
+  ss << "{ ";
+
+  ss << field(content.flds[0].first, content.flds[0].second);
+
+  for (uint32_t n = 1; n < content.flds.size(); ++n)
+  {
+    ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
+  }
+
+  if (content.args.size() > 0)
+  {
+    ss << ", " << quote("args") << " : { ";
+    ss << field(content.args.at(0).first, content.args.at(0).second);
+
+    for (uint32_t n = 1; n < content.args.size(); ++n)
+    {
+      ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
+    }
+
+    ss << "}";
+  }
+
+  ss << " }";
+
+  return ss.str();
+}
+
+void fill(Content &content, const DurationEvent &evt, const std::string &name,
+          const std::string &tid)
+{
+  content.flds.emplace_back("name", name);
+  content.flds.emplace_back("pid", "0");
+  content.flds.emplace_back("tid", tid);
+  content.flds.emplace_back("ph", evt.ph);
+  content.flds.emplace_back("ts", evt.ts);
+  content.args = evt.args;
+}
+
+void fill(Content &content, const CounterEvent &evt)
+{
+  assert(evt.name != "");
+
+  content.flds.emplace_back("name", evt.name);
+  content.flds.emplace_back("pid", "0");
+  content.flds.emplace_back("tid", evt.tid);
+  content.flds.emplace_back("ph", evt.ph);
+  content.flds.emplace_back("ts", evt.ts);
+  content.args = evt.args;
+}
+
+std::string object(const DurationEvent &evt, const std::string &name, const std::string &tid)
+{
+  Content content;
+
+  fill(content, evt, name, tid);
+
+  return ::object(content);
+}
+
+std::string object(const CounterEvent &evt)
+{
+  Content content;
+
+  fill(content, evt);
+
+  for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
+  {
+    content.args.emplace_back(it->first, it->second);
+  }
+
+  return ::object(content);
+}
+
+std::string getSessionLabel(const DurationEvent &evt)
+{
+  return "$" + std::to_string(evt.session_index) + " sess";
+}
+
+std::string getSubgLabel(const DurationEvent &evt)
+{
+  return "$" + std::to_string(evt.subg_index) + " subg";
+}
+
+std::string getOpLabel(const OpSeqDurationEvent &evt)
+{
+  return "@" + std::to_string(evt.op_index) + " " + evt.op_name;
+}
+
+std::string getLabel(const DurationEvent &evt)
+{
+  if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+  {
+    return getOpLabel(*evt_ptr);
+  }
+  else // SubgDurationEvent
+  {
+    return getSubgLabel(evt);
+  }
+}
+
+std::string getTid(const DurationEvent &evt)
+{
+  if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+  {
+    return getSessionLabel(*evt_ptr) + ", " + getSubgLabel(*evt_ptr) + ", " + evt_ptr->backend;
+  }
+  else // SubgDurationEvent
+  {
+    return getSessionLabel(evt) + ", " + getSubgLabel(evt);
+  }
+}
+
+} // namespace
+
+void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
+{
+  _os << "{\n";
+  _os << "  " << quote("traceEvents") << ": [\n";
+
+  for (const auto &recorder : recorders)
+  {
+    flushOneRecord(*recorder);
+  }
+
+  _os << "    { }\n";
+  _os << "  ]\n";
+  _os << "}\n";
+}
+
+void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
+{
+  for (const auto &evt : recorder.duration_events())
+  {
+    const std::string name = getLabel(*evt);
+    const std::string tid = getTid(*evt);
+
+    _os << "    " << object(*evt, name, tid) << ",\n";
+  }
+
+  for (const auto &evt : recorder.counter_events())
+  {
+    _os << "    " << object(evt) << ",\n";
+  }
+}
diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc
index 45cce662e..b7fcefc7a 100644
--- a/runtime/onert/core/src/util/ConfigSource.cc
+++ b/runtime/onert/core/src/util/ConfigSource.cc
@@ -15,13 +15,15 @@
  */
 
 #include "util/ConfigSource.h"
-#include "util/GeneralConfigSource.h"
-#include "util/EnvConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
 
-#include <array>
 #include <algorithm>
+#include <array>
 #include <cassert>
-
 #include <memory>
 
 namespace onert
@@ -29,9 +31,26 @@ namespace onert
 namespace util
 {
 
+using namespace nnfw::misc;
+
 static std::unique_ptr<IConfigSource> _source;
+static std::unique_ptr<IConfigSource> _source_ext;
 
 void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); }
+
+void setConfigKeyValues(const CfgKeyValues &keyValues)
+{
+  auto configsrc = std::make_unique<GeneralConfigSource>();
+
+  for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
+  {
+    VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
+    configsrc->set(it->first, it->second);
+  }
+
+  onert::util::config_source_ext(std::move(configsrc));
+}
 
 static IConfigSource *config_source()
 {
@@ -67,6 +86,15 @@ static std::string getConfigOrDefault(const std::string &key)
   auto ret = config_source()->get(key);
   if (ret.empty())
   {
+    // if env is not set, search from external
+    if (_source_ext.get())
+    {
+      ret = _source_ext.get()->get(key);
+    }
+  }
+  // if not found search from defaults
+  if (ret.empty())
+  {
     auto itr = defaults.find(key);
     if (itr != defaults.end())
     {
diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/onert/core/src/util/EnvConfigSource.cc
deleted file mode 100644
index 0d25b7353..000000000
--- a/runtime/onert/core/src/util/EnvConfigSource.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EnvConfigSource.h"
-
-#include <cstdlib>
-
-namespace onert
-{
-namespace util
-{
-
-std::string EnvConfigSource::get(const std::string &key) const
-{
-  const char *value = std::getenv(key.c_str());
-  if (value != nullptr)
-  {
-    return value;
-  }
-  else
-  {
-    return GeneralConfigSource::get(key);
-  }
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc
index de37276bf..c1b9c4315 100644
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/EventCollector.h"
+#include "EventCollector.h"
 
 // C++ standard libraries
 #include <chrono>
@@ -30,24 +30,62 @@ std::string timestamp(void)
 {
   auto now = std::chrono::steady_clock::now();
   return std::to_string(
-      std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
+    std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
 }
 
-class DurationEventBuilder
+class DurationEventBuilder : public EventCollector::EventVisitor
 {
 public:
   DurationEventBuilder(const std::string &ts) : _ts{ts} {}
 
-  DurationEvent build(const std::string &tid, const std::string &name, const std::string &ph) const
+  std::unique_ptr<SubgDurationEvent> build(const EventCollector::SubgEvent &evt_collected,
+                                           const std::string &ph) const
   {
-    DurationEvent evt;
+    auto dur_evt = std::make_unique<SubgDurationEvent>();
 
-    evt.name = name;
-    evt.tid = tid;
-    evt.ph = ph;
-    evt.ts = _ts;
+    // The following will be set by a child of EventsWriter:
+    // dur_evt.name, dur_evt.tid
+    dur_evt->ph = ph;
+    dur_evt->ts = _ts;
+    dur_evt->tracing_ctx = evt_collected.tracing_ctx;
 
-    return evt;
+    dur_evt->session_index = evt_collected.session_index;
+    dur_evt->subg_index = evt_collected.subg_index;
+
+    dur_evt->args = evt_collected.userData;
+    {
+      dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index));
+      dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index));
+    }
+
+    return dur_evt;
+  }
+
+  std::unique_ptr<OpSeqDurationEvent> build(const EventCollector::OpSeqEvent &evt_collected,
+                                            const std::string &ph) const
+  {
+    auto dur_evt = std::make_unique<OpSeqDurationEvent>();
+
+    // The following will be set by a child of EventsWriter:
+    // dur_evt.name, dur_evt.tid
+    dur_evt->ph = ph;
+    dur_evt->ts = _ts;
+    dur_evt->tracing_ctx = evt_collected.tracing_ctx;
+
+    dur_evt->session_index = evt_collected.session_index;
+    dur_evt->subg_index = evt_collected.subg_index;
+
+    dur_evt->backend = evt_collected.backend;
+    dur_evt->op_index = evt_collected.op_index;
+    dur_evt->op_name = evt_collected.op_name;
+
+    dur_evt->args = evt_collected.userData;
+    {
+      dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index));
+      dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index));
+    }
+
+    return dur_evt;
   }
 
 private:
@@ -86,19 +124,26 @@ inline void emit_rusage(EventRecorder *rec, const std::string &ts)
 
 } // namespace
 
-void EventCollector::onEvent(const Event &event)
+template <typename EventT> void EventCollector::onEvent(const EventT &event)
 {
   auto ts = timestamp();
 
+  DurationEventBuilder builder(ts);
+
   switch (event.edge)
   {
     case Edge::BEGIN:
-      _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "B"));
+    {
+      auto duration_evt = builder.build(event, "B");
+      _rec->emit(std::move(duration_evt));
       break;
-
+    }
     case Edge::END:
-      _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "E"));
+    {
+      auto duration_evt = builder.build(event, "E");
+      _rec->emit(std::move(duration_evt));
       break;
+    }
   }
 
 // TODO: Add resurece measurement(e.g. RSS)
@@ -107,3 +152,7 @@ void EventCollector::onEvent(const Event &event)
   emit_rusage(_rec, ts);
 #endif
 }
+
+// template instantiation
+template void EventCollector::onEvent<EventCollector::SubgEvent>(const SubgEvent &event);
+template void EventCollector::onEvent<EventCollector::OpSeqEvent>(const OpSeqEvent &event);
diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h
index 8154be592..effb72373 100644
--- a/runtime/onert/core/src/util/EventCollector.h
+++ b/runtime/onert/core/src/util/EventCollector.h
@@ -17,7 +17,13 @@
 #ifndef __ONERT_UTIL_EVENT_COLLECTOR_H__
 #define __ONERT_UTIL_EVENT_COLLECTOR_H__
 
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
+
+#include "util/TracingCtx.h"
+
+#include <string>
+#include <utility>
+#include <vector>
 
 class EventCollector
 {
@@ -28,11 +34,69 @@ public:
     END
   };
 
+  struct SubgEvent;
+  struct OpEvent;
+
+  class EventVisitor
+  {
+  public:
+    virtual ~EventVisitor() = default;
+
+    virtual std::unique_ptr<DurationEvent> visit(const SubgEvent &, const std::string &) const
+    {
+      throw std::runtime_error("Please implement");
+    }
+    virtual std::unique_ptr<DurationEvent> visit(const OpEvent &, const std::string &) const
+    {
+      throw std::runtime_error("Please implement");
+    }
+  };
+
   struct Event
   {
+    const onert::util::TracingCtx *tracing_ctx;
+
     Edge edge;
+    uint32_t session_index;
+    uint32_t subg_index;
+
+    // user-defined data: pairs of (key, value)
+    std::vector<std::pair<std::string, std::string>> userData;
+
+  protected:
+    Event(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index)
+      : tracing_ctx(a_tracing_ctx), edge(a_edge), session_index(tracing_ctx->getSessionId()),
+        subg_index(a_subg_index)
+    { /* empty */
+    }
+
+    virtual ~Event() = default;
+  };
+
+  struct SubgEvent : public Event
+  {
+    // constructor for subgraph start and end event
+    SubgEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index)
+      : Event(a_tracing_ctx, a_edge, a_subg_index)
+    { /* empty */
+    }
+  };
+
+  // TODO Rename this to OperationEvent
+  struct OpSeqEvent : public Event
+  {
     std::string backend;
-    std::string label;
+    uint32_t op_index;
+    std::string op_name;
+
+    OpSeqEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index,
+               const std::string a_backend, uint32_t a_op_index, const std::string a_op_name)
+      : Event(a_tracing_ctx, a_edge, a_subg_index)
+    {
+      backend.assign(a_backend);
+      op_index = a_op_index;
+      op_name.assign(a_op_name);
+    }
   };
 
 public:
@@ -42,7 +106,7 @@ public:
   }
 
 public:
-  void onEvent(const Event &event);
+  template <typename EventT> void onEvent(const EventT &event);
 
 protected:
   EventRecorder *_rec;
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc
deleted file mode 100644
index d09b95210..000000000
--- a/runtime/onert/core/src/util/EventCollectorGlobal.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EventCollectorGlobal.h"
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-
-#include "util/ConfigSource.h"
-
-namespace onert
-{
-namespace util
-{
-
-EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder}
-{
-  // DO NOTHING
-}
-
-EventCollectorGlobal::~EventCollectorGlobal()
-{
-  if (!_recorder.empty())
-  {
-    try
-    {
-      // TODO Need better way for saved file path than the hardcoded path
-      std::ofstream ofs{"trace.global.json"};
-      _recorder.writeToFile(ofs);
-    }
-    catch (const std::exception &e)
-    {
-      std::cerr << "E: Fail to record event in EventCollectorGlobal: " << e.what() << std::endl;
-    }
-  }
-}
-
-EventCollectorGlobal &EventCollectorGlobal::get()
-{
-  static EventCollectorGlobal instance;
-  return instance;
-}
-
-EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag}
-{
-  auto &glob = EventCollectorGlobal::get();
-  glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-EventDurationBlock::~EventDurationBlock()
-{
-  auto &glob = EventCollectorGlobal::get();
-  glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {}
-
-EventDurationManual::~EventDurationManual()
-{
-  // Check if it has called begin-end pair
-  assert(_pair);
-}
-
-void EventDurationManual::begin()
-{
-  _pair = false;
-  auto &glob = EventCollectorGlobal::get();
-  glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-
-void EventDurationManual::end()
-{
-  assert(!_pair);
-  _pair = true;
-  auto &glob = EventCollectorGlobal::get();
-  glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.h b/runtime/onert/core/src/util/EventCollectorGlobal.h
deleted file mode 100644
index 1027ec84d..000000000
--- a/runtime/onert/core/src/util/EventCollectorGlobal.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-
-#include "util/EventRecorder.h"
-#include "util/EventCollector.h"
-
-namespace onert
-{
-namespace util
-{
-
-/**
- * @brief Singleton class for event collection from anywhere in code
- *
- */
-class EventCollectorGlobal
-{
-public:
-  /**
-   * @brief Get the singleton object of this class
-   *
-   * @return EventCollectorGlobal& Singleton object
-   */
-  static EventCollectorGlobal &get();
-
-public:
-  /**
-   * @brief Getter for event collector object
-   *
-   * @return EventCollector& Collector object
-   */
-  EventCollector &collector() { return _collector; }
-
-private:
-  EventCollectorGlobal();
-  ~EventCollectorGlobal();
-
-private:
-  EventRecorder _recorder;
-  EventCollector _collector;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor
- *
- */
-class EventDurationBlock
-{
-public:
-  /**
-   * @brief Raise a duration event with type of BEGIN
-   *
-   * @param tag A label for the duration event
-   */
-  EventDurationBlock(const std::string &tag);
-  /**
-   * @brief Raise a duration event with type of END
-   *
-   */
-  ~EventDurationBlock();
-
-private:
-  std::string _tag;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled manually
- *
- *        Usage:
- *        {
- *          ...
- *          EventDurationManual duration("some tag");
- *          duration.begin();
- *          ...
- *          ... // Code for duration
- *          ...
- *          duration.end();
- *        }
- *
- */
-class EventDurationManual
-{
-public:
-  /**
-   * @brief Construct a new Event Duration Manual object
-   *
-   * @param tag A label for the duration object
-   */
-  EventDurationManual(const std::string &tag);
-  /**
-   * @brief Destroy the Event Duration Manual object
-   *
-   */
-  ~EventDurationManual();
-
-  /**
-   * @brief Raise a duration event with type of BEGIN
-   *
-   */
-  void begin();
-  /**
-   * @brief Raise a duration event with type of END
-   *
-   */
-  void end();
-
-private:
-  std::string _tag;
-  bool _pair;
-};
-
-} // namespace util
-} // namespace onert
-
-/**
- * Helper Macro Definitions
- *
- * HOW TO USE
- *
- * void f(args)
- * {
- *   EVENT_DURATION_FUNCTION();
- *   ...
- *   if(cond)
- *   {
- *     EVENT_DURATION_REGION("if branch");
- *     ...
- *   }
- *   ...
- * }
- */
-
-#define EVENT_DURATION_FUNCTION() \
-  ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ }
-
-#define EVENT_DURATION_REGION(tag) \
-  ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag }
-
-#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index 13a599bed..85a588d38 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -14,396 +14,13 @@
  * limitations under the License.
  */
 
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
 
-#include <sstream>
-#include <vector>
-#include <unordered_map>
-#include <json/json.h>
-#include <assert.h>
-#include <utility>
-#include <map>
-#include <set>
-#include <stdint.h>
-
-// json type for Chrome Event Trace
-namespace
-{
-
-std::string quote(const std::string &value)
-{
-  std::stringstream ss;
-  ss << '"' << value << '"';
-  return ss.str();
-}
-
-std::string field(const std::string &k, const std::string &v)
-{
-  std::stringstream ss;
-  ss << quote(k) << " : " << quote(v);
-  return ss.str();
-}
-
-struct Content // One Entry in Chrome Event Trace
-{
-  std::vector<std::pair<std::string, std::string>> flds;
-  std::vector<std::pair<std::string, std::string>> args;
-};
-
-std::string object(const Content &content)
-{
-  std::stringstream ss;
-
-  ss << "{ ";
-
-  ss << field(content.flds[0].first, content.flds[0].second);
-
-  for (uint32_t n = 1; n < content.flds.size(); ++n)
-  {
-    ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
-  }
-
-  if (content.args.size() > 0)
-  {
-    ss << ", " << quote("args") << " : { ";
-    ss << field(content.args.at(0).first, content.args.at(0).second);
-
-    for (uint32_t n = 1; n < content.args.size(); ++n)
-    {
-      ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
-    }
-
-    ss << "}";
-  }
-
-  ss << " }";
-
-  return ss.str();
-}
-
-void fill(Content &content, const Event &evt)
-{
-  content.flds.emplace_back("name", evt.name);
-  content.flds.emplace_back("pid", "0");
-  content.flds.emplace_back("tid", evt.tid);
-  content.flds.emplace_back("ph", evt.ph);
-  content.flds.emplace_back("ts", evt.ts);
-}
-
-std::string object(const DurationEvent &evt)
-{
-  Content content;
-
-  fill(content, evt);
-
-  return ::object(content);
-}
-
-std::string object(const CounterEvent &evt)
-{
-  Content content;
-
-  fill(content, evt);
-
-  for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
-  {
-    content.args.emplace_back(it->first, it->second);
-  }
-
-  return ::object(content);
-}
-
-} // namespace
-
-// md table type
-namespace
-{
-
-void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
-{
-  os << "| ";
-  for (auto &key : list)
-  {
-    os << key << " | ";
-  }
-  os << "\n";
-}
-
-struct MDContent
-{
-  std::string name;
-  uint64_t begin_ts;
-  uint64_t end_ts;
-  uint32_t min_rss;
-  uint32_t max_rss;
-  uint32_t min_page_reclaims;
-  uint32_t max_page_reclaims;
-
-  MDContent()
-      : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
-        max_page_reclaims(0)
-  {
-    // DO NOTHING
-  }
-
-  virtual ~MDContent() = default;
-
-  void updateRss(uint32_t rss)
-  {
-    if (min_rss == UINT32_MAX)
-      min_rss = rss;
-    if (max_rss == 0)
-      max_rss = rss;
-
-    if (min_rss > rss)
-      min_rss = rss;
-    else if (max_rss < rss)
-      max_rss = rss;
-  }
-
-  void updateMinflt(uint32_t minflt)
-  {
-    if (min_page_reclaims == UINT32_MAX)
-      min_page_reclaims = minflt;
-    if (max_page_reclaims == 0)
-      max_page_reclaims = minflt;
-
-    if (min_page_reclaims > minflt)
-      min_page_reclaims = minflt;
-    else if (max_page_reclaims < minflt)
-      max_page_reclaims = minflt;
-  }
-
-  virtual void write(std::ostream &os) const = 0;
-};
-
-struct OpSeq : public MDContent
-{
-  std::string backend;
-  uint64_t graph_latency;
-
-  struct OpSeqCmp
-  {
-    bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
-    {
-      return lhs.begin_ts < rhs.begin_ts;
-    }
-    bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
-    bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
-  };
-
-  void write(std::ostream &os) const override
-  {
-    uint64_t opseq_latency = end_ts - begin_ts;
-    double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
-    writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
-                         std::to_string(min_rss), std::to_string(max_rss),
-                         std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
-  }
-};
-
-struct Graph : public MDContent
-{
-  std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
-
-  void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
-  {
-    uint64_t graph_latency = end_ts - begin_ts;
-    for (auto it : name_to_opseq)
-    {
-      auto opseq = it.second;
-      opseq.graph_latency = graph_latency;
-
-      opseqs.insert(opseq);
-
-      updateRss(opseq.min_rss);
-      updateRss(opseq.max_rss);
-      updateMinflt(opseq.min_page_reclaims);
-      updateMinflt(opseq.max_page_reclaims);
-    }
-  }
-
-  void write(std::ostream &os) const override
-  {
-    static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
-                                                  "page_reclaims_min", "page_reclaims_max"};
-
-    static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
-                                                       "-----------------", "-----------------"};
-
-    // Graph's Header
-    writeMDTableRow(os, graph_headers);
-    writeMDTableRow(os, graph_headers_line);
-
-    // Graph's contents
-    writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
-                         std::to_string(max_rss), std::to_string(min_page_reclaims),
-                         std::to_string(max_page_reclaims)});
-
-    os << "\n";
-
-    static std::vector<std::string> opseq_headers{
-        "OpSeq name",  "backend",     "latency(us)",       "latency(%)",
-        "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
-
-    static std::vector<std::string> opseq_headers_line{
-        "----------", "-------", "-----------",       "-----------",
-        "-------",    "-------", "-----------------", "-----------------"};
-
-    os << "## OpSequences \n";
-
-    // OpSeq's Header
-    writeMDTableRow(os, opseq_headers);
-    writeMDTableRow(os, opseq_headers_line);
-
-    // OpSeq's contents
-    for (auto opseq : opseqs)
-    {
-      opseq.write(os);
-    }
-
-    os << "\n";
-  }
-};
-
-struct MDTableBuilder
-{
-  MDTableBuilder(const std::vector<DurationEvent> &duration_events,
-                 const std::vector<CounterEvent> &counter_events)
-      : _duration_events(duration_events), _counter_events(counter_events)
-  {
-    for (const auto &evt : _counter_events)
-    {
-      uint64_t ts = std::stoull(evt.ts);
-      auto &name = evt.name;
-      assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
-      assert(evt.values.size() == 1);
-      auto &val = evt.values.begin()->second;
-      if (_ts_to_values.find(ts) == _ts_to_values.end())
-      {
-        std::pair<uint32_t, uint32_t> values;
-        if (name.compare("maxrss") == 0)
-          values.first = std::stoul(val);
-        else
-          values.second = std::stoul(val);
-        _ts_to_values.insert({ts, values});
-      }
-      else
-      {
-        auto &values = _ts_to_values.at(ts);
-        if (name.compare("maxrss") == 0)
-          values.first = std::stoul(val);
-        else
-          values.second = std::stoul(val);
-      }
-    }
-  }
-
-  MDTableBuilder &build()
-  {
-    for (auto &it : divideGraph())
-    {
-      size_t begin_idx = it.first;
-      size_t end_idx = it.second;
-      std::map<std::string, OpSeq> name_to_opseq;
-      for (size_t i = begin_idx + 1; i < end_idx; ++i)
-      {
-        const auto &evt = _duration_events[i];
-        assert(evt.name.compare("Graph") != 0);
-        assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
-        if (evt.ph.compare("B") == 0)
-        {
-          assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
-          name_to_opseq.insert({evt.name, makeOpSeq(evt)});
-        }
-        else
-        {
-          assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
-          auto &opseq = name_to_opseq.at(evt.name);
-          updateOpSeq(opseq, evt);
-        }
-      }
-
-      _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
-    }
-
-    return *this;
-  }
-
-  std::vector<std::pair<size_t, size_t>> divideGraph()
-  {
-    std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
-    for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
-    {
-      const auto &evt = _duration_events.at(i);
-      if (evt.name.compare("Graph") == 0)
-      {
-        if (evt.ph.compare("B") == 0)
-          begin_idx = i;
-        else
-          graph_idx_list.emplace_back(begin_idx, i);
-      }
-    }
-    return graph_idx_list;
-  }
-
-  OpSeq makeOpSeq(const DurationEvent &evt)
-  {
-    OpSeq opseq;
-    opseq.name = evt.name;
-    opseq.begin_ts = std::stoull(evt.ts);
-    opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
-    opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
-    opseq.backend = evt.tid;
-    return opseq;
-  }
-
-  void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
-  {
-    opseq.end_ts = std::stoull(evt.ts);
-    opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
-    opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
-  }
-
-  Graph makeGraph(size_t begin_idx, size_t end_idx,
-                  const std::map<std::string, OpSeq> &name_to_opseq)
-  {
-    Graph graph;
-    graph.name = "Graph";
-    graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
-    graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
-    graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
-    graph.end_ts = std::stoull(_duration_events[end_idx].ts);
-    graph.updateRss(_ts_to_values.at(graph.end_ts).first);
-    graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
-    graph.setOpSeqs(name_to_opseq);
-    return graph;
-  }
-
-  void write(std::ostream &os)
-  {
-    // Write contents
-    for (size_t i = 0; i < _graphs.size(); ++i)
-    {
-      os << "# Graph " << i << "\n";
-      _graphs.at(i).write(os);
-    }
-  }
-
-  const std::vector<DurationEvent> &_duration_events;
-  const std::vector<CounterEvent> &_counter_events;
-  // timestamp to std::pair<maxrss, minflt>
-  std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
-  std::vector<Graph> _graphs;
-};
-
-} // namespace
-
-void EventRecorder::emit(const DurationEvent &evt)
+void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt)
 {
   std::lock_guard<std::mutex> lock{_mu};
 
-  _duration_events.push_back(evt);
+  _duration_events.push_back(std::move(evt));
 }
 
 void EventRecorder::emit(const CounterEvent &evt)
@@ -412,146 +29,3 @@ void EventRecorder::emit(const CounterEvent &evt)
 
   _counter_events.push_back(evt);
 }
-
-void EventRecorder::writeToFile(std::ostream &os)
-{
-  std::lock_guard<std::mutex> lock{_mu};
-
-  switch (_write_format)
-  {
-    case WriteFormat::CHROME_TRACING:
-      writeChromeTrace(os);
-      break;
-    case WriteFormat::SNPE_BENCHMARK:
-      writeSNPEBenchmark(os);
-      break;
-    case WriteFormat::MD_TABLE:
-      writeMDTable(os);
-      break;
-    default:
-      assert(!"Invalid value");
-      break;
-  }
-}
-
-void EventRecorder::writeSNPEBenchmark(std::ostream &os)
-{
-  Json::Value root;
-  auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
-
-  struct Stat
-  {
-    uint64_t sum = 0;
-    uint64_t count = 0;
-    uint64_t max = 0;
-    uint64_t min = std::numeric_limits<uint64_t>::max();
-
-    void accumulate(uint64_t val)
-    {
-      sum += val;
-      count++;
-      max = std::max(max, val);
-      min = std::min(min, val);
-    }
-  };
-
-  // Memory
-  {
-    std::unordered_map<std::string, Stat> mem_stats;
-    for (auto &evt : _counter_events)
-    {
-      auto &mem_stat = mem_stats[evt.name];
-      uint64_t val = std::stoull(evt.values["value"]);
-      mem_stat.accumulate(val);
-    }
-
-    auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
-    for (auto &kv : mem_stats)
-    {
-      auto &key = kv.first;
-      auto &val = kv.second;
-      mem[key]["Avg_Size"] = val.sum / val.count;
-      mem[key]["Max_Size"] = val.max;
-      mem[key]["Min_Size"] = val.min;
-      mem[key]["Runtime"] = "NA";
-    }
-  }
-
-  // Operation Execution Time
-  {
-    // NOTE This assumes _duration_events is sorted by "ts" ascending
-
-    // 2D keys : stats[tid][name]
-    std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
-    std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
-    for (auto &evt : _duration_events)
-    {
-      auto &stat = stats[evt.tid][evt.name];
-      auto &begin_ts = begin_timestamps[evt.tid][evt.name];
-      uint64_t timestamp = std::stoull(evt.ts);
-      if (evt.ph == "B")
-      {
-        if (begin_ts != 0)
-          throw std::runtime_error{"Invalid Data"};
-        begin_ts = timestamp;
-      }
-      else if (evt.ph == "E")
-      {
-        if (begin_ts == 0 || timestamp < begin_ts)
-          throw std::runtime_error{"Invalid Data"};
-        stat.accumulate(timestamp - begin_ts);
-        begin_ts = 0;
-      }
-      else
-        throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
-    }
-
-    for (auto &kv : begin_timestamps)
-      for (auto &kv2 : kv.second)
-        if (kv2.second != 0)
-          throw std::runtime_error{"Invalid Data - B and E pair does not match."};
-
-    for (auto &kv : stats)
-    {
-      auto &tid = kv.first;
-      auto &map = kv.second;
-      auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
-      for (auto &kv : map)
-      {
-        auto &name = kv.first;
-        auto &val = kv.second;
-        json_tid[name]["Avg_Time"] = val.sum / val.count;
-        json_tid[name]["Max_Time"] = val.max;
-        json_tid[name]["Min_Time"] = val.min;
-        json_tid[name]["Runtime"] = tid;
-      }
-    }
-  }
-
-  os << root;
-}
-
-void EventRecorder::writeChromeTrace(std::ostream &os)
-{
-  os << "{\n";
-  os << "  " << quote("traceEvents") << ": [\n";
-
-  for (auto &evt : _duration_events)
-  {
-    os << "    " << object(evt) << ",\n";
-  }
-
-  for (auto &evt : _counter_events)
-  {
-    os << "    " << object(evt) << ",\n";
-  }
-
-  os << "    { }\n";
-  os << "  ]\n";
-  os << "}\n";
-}
-
-void EventRecorder::writeMDTable(std::ostream &os)
-{
-  MDTableBuilder(_duration_events, _counter_events).build().write(os);
-}
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h
index 37ec1a0f1..5cf03d8ac 100644
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -17,28 +17,52 @@
 #ifndef __ONERT_UTIL_EVENT_RECORDER_H__
 #define __ONERT_UTIL_EVENT_RECORDER_H__
 
+#include "util/TracingCtx.h"
+
 #include <map>
 #include <memory>
 #include <mutex>
 
-#include <ostream>
 #include <vector>
 
+// refer to https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/edit#
 struct Event
 {
-  std::string name;
-  std::string tid;
-  std::string ph; /* REQUIRED */
-  std::string ts; /* REQUIRED */
+  const onert::util::TracingCtx *tracing_ctx;
+
+  std::string ph;                                        // Event type.
+  std::string ts;                                        // tracing clock of timestamp of this event
+  std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value)
+
+  virtual ~Event() = default;
 };
 
 struct DurationEvent : public Event
 {
-  // TO BE FILLED
+  uint32_t session_index = 0;
+  uint32_t subg_index = 0;
+
+protected:
+  DurationEvent() = default;
+};
+
+struct SubgDurationEvent : public DurationEvent
+{ /* same with DurationEvent */
+};
+
+// TODO Rename it to OperationDurationEvent
+struct OpSeqDurationEvent : public DurationEvent
+{
+  // Note: DurationEvent's name and tid will be set by EventWriter
+  std::string backend;
+  uint32_t op_index;
+  std::string op_name;
 };
 
 struct CounterEvent : public Event
 {
+  std::string name; // name of event
+  std::string tid;  // thread ID
   std::map<std::string, std::string> values;
 };
 
@@ -50,35 +74,22 @@ struct CounterEvent : public Event
 class EventRecorder
 {
 public:
-  enum class WriteFormat
-  {
-    CHROME_TRACING,
-    SNPE_BENCHMARK,
-    MD_TABLE,
-  };
-
-public:
   EventRecorder() = default;
 
 public:
-  void emit(const DurationEvent &evt);
+  void emit(std::unique_ptr<DurationEvent> &&evt);
   void emit(const CounterEvent &evt);
 
 public:
-  bool empty() { return _duration_events.empty() && _counter_events.empty(); }
-  void writeToFile(std::ostream &os);
-  void setWriteFormat(WriteFormat write_format) { _write_format = write_format; }
-
-private:
-  void writeSNPEBenchmark(std::ostream &os);
-  void writeChromeTrace(std::ostream &os);
-  void writeMDTable(std::ostream &os);
+  const std::vector<std::unique_ptr<DurationEvent>> &duration_events() const
+  {
+    return _duration_events;
+  }
+  const std::vector<CounterEvent> &counter_events() const { return _counter_events; }
 
 private:
   std::mutex _mu;
-  // TODO: Allow user to control write_format
-  WriteFormat _write_format{WriteFormat::SNPE_BENCHMARK};
-  std::vector<DurationEvent> _duration_events;
+  std::vector<std::unique_ptr<DurationEvent>> _duration_events;
   std::vector<CounterEvent> _counter_events;
 };
 
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc
new file mode 100644
index 000000000..ca4bd302e
--- /dev/null
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EventWriter.h"
+
+#include <cassert>
+
+// initialization
+std::mutex EventWriter::_mutex;
+
+void EventWriter::readyToFlush(std::unique_ptr<EventRecorder> &&recorder)
+{
+  {
+    std::unique_lock<std::mutex> lock{_mutex};
+
+    _recorders.emplace_back(std::move(recorder));
+
+    if (--_ref_count > 0)
+      return;
+  }
+  // The caller of this method is the last instance that uses EventWriter.
+  // Let's write log files.
+
+  // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
+  flush(WriteFormat::SNPE_BENCHMARK);
+  flush(WriteFormat::CHROME_TRACING);
+  flush(WriteFormat::MD_TABLE);
+}
+
+void EventWriter::flush(WriteFormat write_format)
+{
+  auto *writer = _actual_writers[write_format].get();
+  assert(writer);
+
+  writer->flush(_recorders);
+}
diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h
new file mode 100644
index 000000000..0a35a8508
--- /dev/null
+++ b/runtime/onert/core/src/util/EventWriter.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_EVENT_WRITER_H__
+#define __ONERT_UTIL_EVENT_WRITER_H__
+
+#include "EventRecorder.h"
+
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <mutex>
+#include <fstream>
+
+class EventFormatWriter
+{
+public:
+  EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {}
+  virtual ~EventFormatWriter()
+  { /* empty */
+  }
+
+  virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0;
+
+protected:
+  std::ofstream _os;
+};
+
+class SNPEWriter : public EventFormatWriter
+{
+public:
+  SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath)
+  { /* empty */
+  }
+  ~SNPEWriter() {}
+
+  void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+};
+
+class ChromeTracingWriter : public EventFormatWriter
+{
+public:
+  ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath)
+  { /* empty */
+  }
+  ~ChromeTracingWriter() {}
+
+  void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+
+private:
+  void flushOneRecord(const EventRecorder &);
+};
+
+class MDTableWriter : public EventFormatWriter
+{
+public:
+  MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath)
+  { /* empty */
+  }
+  ~MDTableWriter() {}
+
+  void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+};
+
+#include <mutex>
+
+class EventWriter
+{
+public:
+  enum class WriteFormat
+  {
+    CHROME_TRACING,
+    SNPE_BENCHMARK,
+    MD_TABLE,
+  };
+
+  /**
+   * @brief Retuens a singleton object
+   */
+  static EventWriter *get(const std::string &filename)
+  {
+    std::unique_lock<std::mutex> lock{_mutex};
+
+    static EventWriter singleton(filename);
+    return &singleton;
+  }
+
+  /**
+   * @brief Call this when observer which use EventWriter starts
+   */
+  void startToUse()
+  {
+    std::unique_lock<std::mutex> lock{_mutex};
+    _ref_count++;
+  }
+
+  /**
+   * @brief Call this when observer which use EventWriter finishes.
+   *        After multiple observers calls this method, the reference count will eventually be 0.
+   *        Then, EventWriter will write profiling result file.
+   */
+  void readyToFlush(std::unique_ptr<EventRecorder> &&recorder);
+
+private:
+  EventWriter(const std::string &filepath) : _ref_count(0)
+  {
+    std::string snpe_log_name(filepath);
+    std::string chrome_tracing_log_name(filepath + ".chrome.json");
+    std::string md_table_log_name(filepath + ".table.md");
+
+    _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name);
+    _actual_writers[WriteFormat::CHROME_TRACING] =
+      std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
+    _actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name);
+  };
+
+  void flush(WriteFormat write_format);
+
+private:
+  static std::mutex _mutex;
+
+  // number of observer of an executor that want to write profiling data
+  int32_t _ref_count;
+
+  // one recorder object per executor
+  std::vector<std::unique_ptr<EventRecorder>> _recorders;
+
+  std::unordered_map<WriteFormat, std::unique_ptr<EventFormatWriter>> _actual_writers;
+};
+
+#endif // __ONERT_UTIL_EVENT_WRITER_H__
diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/onert/core/src/util/GeneralConfigSource.cc
deleted file mode 100644
index 7d2757e58..000000000
--- a/runtime/onert/core/src/util/GeneralConfigSource.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/GeneralConfigSource.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace util
-{
-
-std::string GeneralConfigSource::get(const std::string &key) const
-{
-  auto itr = _map.find(key);
-  if (itr == _map.end())
-  {
-    return "";
-  }
-  else
-  {
-    return itr->second;
-  }
-}
-
-void GeneralConfigSource::set(const std::string &key, const std::string &val)
-{
-  VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl;
-  _map[key] = val;
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/Index.test.cc b/runtime/onert/core/src/util/Index.test.cc
new file mode 100644
index 000000000..ff73e5e59
--- /dev/null
+++ b/runtime/onert/core/src/util/Index.test.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/Index.h"
+
+#include <gtest/gtest.h>
+
+using Index = ::onert::util::Index<uint32_t, struct TestTag>;
+
+TEST(Index, neg_index_test)
+{
+  Index idx1{1u};
+  Index idx2{2u};
+  Index idx3{idx1};
+
+  ASSERT_EQ(idx1, 1);
+  ASSERT_EQ(idx1, 1u);
+  ASSERT_EQ(idx1.value(), 1u);
+  ASSERT_NE(idx1, idx2);
+  ASSERT_EQ(idx1, idx3);
+}
diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc
new file mode 100644
index 000000000..e7d90eec4
--- /dev/null
+++ b/runtime/onert/core/src/util/MDTableEventWriter.cc
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EventWriter.h"
+
+#include <cassert>
+#include <map>
+#include <set>
+#include <sstream>
+#include <stdint.h>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+  os << "| ";
+  for (const auto &key : list)
+  {
+    os << key << " | ";
+  }
+  os << "\n";
+}
+
+struct MDContent
+{
+  std::string name;
+  uint64_t begin_ts;
+  uint64_t end_ts;
+  uint32_t min_rss;
+  uint32_t max_rss;
+  uint32_t min_page_reclaims;
+  uint32_t max_page_reclaims;
+
+  MDContent()
+    : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+      max_page_reclaims(0)
+  {
+    // DO NOTHING
+  }
+
+  virtual ~MDContent() = default;
+
+  void updateRss(uint32_t rss)
+  {
+    if (min_rss == UINT32_MAX)
+      min_rss = rss;
+    if (max_rss == 0)
+      max_rss = rss;
+
+    if (min_rss > rss)
+      min_rss = rss;
+    else if (max_rss < rss)
+      max_rss = rss;
+  }
+
+  void updateMinflt(uint32_t minflt)
+  {
+    if (min_page_reclaims == UINT32_MAX)
+      min_page_reclaims = minflt;
+    if (max_page_reclaims == 0)
+      max_page_reclaims = minflt;
+
+    if (min_page_reclaims > minflt)
+      min_page_reclaims = minflt;
+    else if (max_page_reclaims < minflt)
+      max_page_reclaims = minflt;
+  }
+
+  virtual void write(std::ostream &os) const = 0;
+};
+
+struct Operation : public MDContent
+{
+  std::string backend;
+  uint64_t graph_latency;
+
+  struct OperationCmp
+  {
+    bool operator()(const Operation &lhs, const Operation &rhs) const
+    {
+      return lhs.begin_ts < rhs.begin_ts;
+    }
+    bool operator()(const Operation &lhs, const Operation &rhs)
+    {
+      return lhs.begin_ts < rhs.begin_ts;
+    }
+    bool operator()(Operation &lhs, Operation &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+  };
+
+  void write(std::ostream &os) const override
+  {
+    uint64_t op_latency = end_ts - begin_ts;
+    double op_per = static_cast<double>(op_latency) / graph_latency * 100.0;
+    writeMDTableRow(os, {name, backend, std::to_string(op_latency), std::to_string(op_per),
+                         std::to_string(min_rss), std::to_string(max_rss),
+                         std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+  }
+};
+
+struct Graph : public MDContent
+{
+  std::set<Operation, Operation::OperationCmp> ops;
+  std::string session_index;
+  std::string subgraph_index;
+
+  void setOperations(const std::map<std::string, Operation> &name_to_op)
+  {
+    uint64_t graph_latency = end_ts - begin_ts;
+    for (auto &&it : name_to_op)
+    {
+      auto op = it.second;
+      op.graph_latency = graph_latency;
+
+      ops.insert(op);
+
+      updateRss(op.min_rss);
+      updateRss(op.max_rss);
+      updateMinflt(op.min_page_reclaims);
+      updateMinflt(op.max_page_reclaims);
+    }
+  }
+
+  void write(std::ostream &os) const override
+  {
+    static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+                                                  "page_reclaims_min", "page_reclaims_max"};
+
+    static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+                                                       "-----------------", "-----------------"};
+
+    // Graph's Header
+    writeMDTableRow(os, graph_headers);
+    writeMDTableRow(os, graph_headers_line);
+
+    // Graph's contents
+    writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+                         std::to_string(max_rss), std::to_string(min_page_reclaims),
+                         std::to_string(max_page_reclaims)});
+
+    os << "\n";
+
+    static std::vector<std::string> op_headers{
+      "Op name",     "backend",     "latency(us)",       "latency(%)",
+      "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+    static std::vector<std::string> op_headers_line{
+      "-------", "-------", "-----------",       "-----------",
+      "-------", "-------", "-----------------", "-----------------"};
+
+    os << "## Op \n";
+
+    // Operation's Header
+    writeMDTableRow(os, op_headers);
+    writeMDTableRow(os, op_headers_line);
+
+    // Operation's contents
+    for (auto &&op : ops)
+    {
+      op.write(os);
+    }
+
+    os << "\n";
+  }
+};
+
+std::string getLabel(const OpSeqDurationEvent &evt)
+{
+  std::string subg_label("$" + std::to_string(evt.subg_index) + " subgraph");
+  std::string op_label("@" + std::to_string(evt.op_index) + " " + evt.op_name);
+
+  return subg_label + " " + op_label;
+}
+
+struct MDTableBuilder
+{
+  MDTableBuilder(const std::vector<std::unique_ptr<DurationEvent>> &duration_events,
+                 const std::vector<CounterEvent> &counter_events)
+    : _duration_events(duration_events), _counter_events(counter_events)
+  {
+// when ready with low overhead in release build
+#ifdef DEBUG
+    for (const auto &evt : _counter_events)
+    {
+      uint64_t ts = std::stoull(evt.ts);
+      auto &name = evt.name;
+      assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+      assert(evt.values.size() == 1);
+      auto &val = evt.values.begin()->second;
+      if (_ts_to_values.find(ts) == _ts_to_values.end())
+      {
+        std::pair<uint32_t, uint32_t> values;
+        if (name.compare("maxrss") == 0)
+          values.first = std::stoul(val);
+        else
+          values.second = std::stoul(val);
+        _ts_to_values.insert({ts, values});
+      }
+      else
+      {
+        auto &values = _ts_to_values.at(ts);
+        if (name.compare("maxrss") == 0)
+          values.first = std::stoul(val);
+        else
+          values.second = std::stoul(val);
+      }
+    }
+#endif
+  }
+
+  MDTableBuilder &build()
+  {
+    for (const auto &it : divideGraph())
+    {
+      size_t begin_idx = it.first;
+      size_t end_idx = it.second;
+      std::map<std::string, Operation> name_to_op;
+      for (size_t i = begin_idx + 1; i < end_idx; ++i)
+      {
+        const auto *evt = dynamic_cast<const OpSeqDurationEvent *>(_duration_events[i].get());
+        if (evt == nullptr)
+          continue;
+
+        const std::string evt_name = getLabel(*evt);
+        assert(evt->ph.compare("B") == 0 || evt->ph.compare("E") == 0);
+        if (evt->ph.compare("B") == 0)
+        {
+          assert(name_to_op.find(evt_name) == name_to_op.end());
+          name_to_op.insert({evt_name, makeOperation(*evt)});
+        }
+        else
+        {
+          assert(name_to_op.find(evt_name) != name_to_op.end());
+          auto &op = name_to_op.at(evt_name);
+          updateOperation(op, *evt);
+        }
+      }
+
+      _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_op));
+    }
+
+    return *this;
+  }
+
+  std::vector<std::pair<size_t, size_t>> divideGraph()
+  {
+    std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+    for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+    {
+      const auto subg_evt = dynamic_cast<const SubgDurationEvent *>(_duration_events.at(i).get());
+      if (subg_evt == nullptr)
+        continue;
+
+      if (subg_evt->ph.compare("B") == 0)
+        begin_idx = i;
+      else
+        graph_idx_list.emplace_back(begin_idx, i);
+    }
+    return graph_idx_list;
+  }
+
+  Operation makeOperation(const OpSeqDurationEvent &evt)
+  {
+    Operation op;
+    const std::string &evt_name = getLabel(evt);
+    op.name = evt_name;
+    op.begin_ts = std::stoull(evt.ts);
+    op.backend = evt.backend;
+#ifdef DEBUG
+    op.updateRss(_ts_to_values.at(op.begin_ts).first);
+    op.updateMinflt(_ts_to_values.at(op.begin_ts).second);
+#else
+    op.updateRss(0);
+    op.updateMinflt(0);
+#endif
+    return op;
+  }
+
+  void updateOperation(Operation &op, const DurationEvent &evt)
+  {
+    op.end_ts = std::stoull(evt.ts);
+#ifdef DEBUG
+    op.updateRss(_ts_to_values.at(op.end_ts).first);
+    op.updateMinflt(_ts_to_values.at(op.end_ts).second);
+#else
+    op.updateRss(0);
+    op.updateMinflt(0);
+#endif
+  }
+
+  Graph makeGraph(size_t begin_idx, size_t end_idx,
+                  const std::map<std::string, Operation> &name_to_op)
+  {
+    Graph graph;
+    graph.name = "Subgraph";
+    graph.begin_ts = std::stoull(_duration_events[begin_idx]->ts);
+    graph.end_ts = std::stoull(_duration_events[end_idx]->ts);
+    graph.setOperations(name_to_op);
+
+    for (const auto &arg : _duration_events[end_idx]->args)
+    {
+      if (arg.first == "session")
+        graph.session_index = arg.second;
+      if (arg.first == "subgraph")
+        graph.subgraph_index = arg.second;
+    }
+
+#ifdef DEBUG
+    graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+    graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+    graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+    graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+#else
+    graph.updateRss(0);
+    graph.updateMinflt(0);
+#endif
+    return graph;
+  }
+
+  void write(std::ostream &os)
+  {
+    // Write contents
+    for (size_t i = 0; i < _graphs.size(); ++i)
+    {
+      auto &graph = _graphs.at(i);
+      os << "# Session: " << graph.session_index << ", Subgraph: " << graph.subgraph_index
+         << ", Running count: " << i << "\n";
+      _graphs.at(i).write(os);
+    }
+  }
+
+  const std::vector<std::unique_ptr<DurationEvent>> &_duration_events;
+  const std::vector<CounterEvent> &_counter_events;
+
+  // timestamp to std::pair<maxrss, minflt>
+  std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+  std::vector<Graph> _graphs;
+};
+
+} // namespace
+
+void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
+{
+  for (const auto &recorder : records)
+  {
+    MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
+  }
+}
diff --git a/runtime/onert/core/src/util/ObjectManager.test.cc b/runtime/onert/core/src/util/ObjectManager.test.cc
new file mode 100644
index 000000000..3fe735732
--- /dev/null
+++ b/runtime/onert/core/src/util/ObjectManager.test.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/Index.h"
+#include "util/ObjectManager.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert;
+
+struct TestTag;
+using Index = typename util::Index<uint32_t, TestTag>;
+
+TEST(ObjectManager, emplace)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index = man.emplace(100);
+  ASSERT_EQ(man.at(index), 100);
+}
+
+TEST(ObjectManager, neg_remove_1)
+{
+  util::ObjectManager<Index, int> man;
+
+  Index index = man.emplace(100);
+  ASSERT_TRUE(man.exist(index));
+  ASSERT_EQ(man.at(index), 100);
+
+  man.remove(index);
+  ASSERT_FALSE(man.exist(index));
+}
+
+TEST(ObjectManager, neg_remove_2)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  ASSERT_TRUE(man.exist(index0));
+  ASSERT_EQ(man.at(index0), 100);
+  ASSERT_TRUE(man.exist(index1));
+  ASSERT_EQ(man.at(index1), 200);
+
+  man.remove(index0);
+  ASSERT_FALSE(man.exist(index0));
+  ASSERT_TRUE(man.exist(index1));
+  ASSERT_EQ(man.at(index1), 200);
+}
+
+TEST(ObjectManager, push)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Not specify index
+  auto index = man.push(std::make_unique<int>(100));
+  ASSERT_EQ(man.at(index), 100);
+
+  // Specify index
+  auto index2 = man.push(std::make_unique<int>(200), Index{33});
+  ASSERT_EQ(index2.value(), 33);
+  ASSERT_EQ(man.at(index2), 200);
+
+  auto index3 = man.push(std::make_unique<int>(300));
+  // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+  ASSERT_EQ(index3.value(), 34);
+  ASSERT_EQ(man.at(index3), 300);
+
+  auto index4 = man.push(std::make_unique<int>(400), Index{22});
+  ASSERT_EQ(index4.value(), 22);
+  ASSERT_EQ(man.at(index4), 400);
+
+  auto index5 = man.push(std::make_unique<int>(500));
+  // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+  ASSERT_EQ(index5.value(), 35);
+  ASSERT_EQ(man.at(index5), 500);
+}
+
+TEST(ObjectManager, neg_push)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Specify index
+  auto index = man.push(std::make_unique<int>(100), Index{55});
+  ASSERT_EQ(index.value(), 55);
+  ASSERT_EQ(man.at(index), 100);
+
+  // Specify the same index
+  auto index2 = man.push(std::make_unique<int>(200), Index{55});
+  ASSERT_FALSE(index2.valid());
+}
+
+static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max();
+
+TEST(ObjectManager, neg_push_undefined_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Try inserting invalid(undefined) index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32});
+  ASSERT_FALSE(index.valid());
+  ASSERT_EQ(man.size(), 0);
+}
+
+TEST(ObjectManager, neg_push_max_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Insert an object with maximum valid index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+  ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+  ASSERT_EQ(man.at(index), 100);
+  ASSERT_EQ(man.size(), 1);
+
+  // Reached to the final index so next push/emplace must fail
+  auto index2 = man.push(std::make_unique<int>(200));
+  ASSERT_EQ(man.size(), 1);
+  ASSERT_FALSE(index2.valid());
+}
+
+TEST(ObjectManager, neg_emplace_max_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Insert an object with maximum valid index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+  ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+  ASSERT_EQ(man.at(index), 100);
+  ASSERT_EQ(man.size(), 1);
+
+  // Reached to the final index so next push/emplace must fail
+  auto index3 = man.emplace(200);
+  ASSERT_EQ(man.size(), 1);
+  ASSERT_FALSE(index3.valid());
+}
+
+TEST(ObjectManager, const_iterate)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  auto index2 = man.emplace(300);
+
+  int sum = 0;
+  man.iterate([&](const Index &index, const int &val) { sum += val; });
+  ASSERT_EQ(sum, 600);
+}
+
+TEST(ObjectManager, non_const_iterate)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  auto index2 = man.emplace(300);
+
+  man.iterate([&](const Index &index, int &val) { val += 1; });
+  ASSERT_EQ(man.at(index0), 101);
+  ASSERT_EQ(man.at(index1), 201);
+  ASSERT_EQ(man.at(index2), 301);
+}
+
+TEST(ObjectManager, set)
+{
+  util::ObjectManager<Index, int> man;
+  auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert
+  ASSERT_EQ(index, Index{1});
+  auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite
+  ASSERT_EQ(index2, index);
+  ASSERT_EQ(man.at(index2), 200);
+}
+
+TEST(ObjectManager, neg_set)
+{
+  auto v = std::make_unique<int>(100);
+  util::ObjectManager<Index, int> man;
+  auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index
+  ASSERT_EQ(index, Index{});
+  ASSERT_FALSE(index.valid());
+  ASSERT_NE(v, nullptr); // v must be kept when failure
+}
+
+TEST(ObjectManager, getRawPtr)
+{
+  auto v = std::make_unique<int>(100);
+  auto v_ptr = v.get();
+  util::ObjectManager<Index, int> man;
+  auto index = man.push(std::move(v));
+  ASSERT_EQ(v_ptr, man.getRawPtr(index));
+}
+
+TEST(ObjectManager, neg_getRawPtr)
+{
+  util::ObjectManager<Index, int> man;
+  auto ptr = man.getRawPtr(Index{1});
+  ASSERT_EQ(ptr, nullptr);
+}
diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc
new file mode 100644
index 000000000..87bbfc662
--- /dev/null
+++ b/runtime/onert/core/src/util/SNPEEventWriter.cc
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EventWriter.h"
+
+#include <json/json.h>
+
+#include <cassert>
+#include <unordered_map>
+#include <utility>
+
+/**
+ * @brief Version of SNPE format
+ * In version 1
+ * - There is no "version" field in Json
+ * - Only one subgraph is supported
+ * - Operation name is a form of "$3 ADD"
+ *
+ * In version 2,
+ * - "version" : "2" was added in Json
+ * - Multiple session and multiple subgraphs are supported
+ * - When there is only one session, operation name is a form of "$2 subgraph $3 ADD",
+ *   meaning ADD op whose operation index 3 in a subgraph whose index is 2
+ * - When there are two or more sessions, operation name is a form of
+ *   "$1 session $2 subgraph $3 ADD", meaning ADD op whose operation index 3
+ *   in a subgraph whose index is 2, which was run in 1st session.
+ */
+#define SNPE_JSON_SCHEMA_VERSION "2"
+
+namespace
+{
+
+std::string getLabel(const DurationEvent &evt)
+{
+  if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+  {
+    std::string subg_label("$" + std::to_string(evt_ptr->subg_index) + " subgraph");
+    std::string op_label("$" + std::to_string(evt_ptr->op_index) + " " + evt_ptr->op_name);
+
+    // Note : At this moment, there is only one thread running for EventWriter
+    if (evt_ptr->tracing_ctx->hasMultipleSessions())
+    {
+      std::string session_label("$" + std::to_string(evt_ptr->session_index) + " session");
+      return session_label + " " + subg_label + " " + op_label;
+    }
+    else
+    {
+      // When there is only one session, do not include session info
+      // Refer to https://github.sec.samsung.net/STAR/nnfw/issues/11436#issuecomment-930332
+      return subg_label + " " + op_label;
+    }
+  }
+  else // SubgEvent
+    return "Graph";
+}
+
+std::string getBackend(const DurationEvent &evt)
+{
+  if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+    return evt_ptr->backend;
+  else // SubbEvent
+    return "runtime";
+}
+
+} // namespace
+
+void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
+{
+  struct Stat
+  {
+    uint64_t sum = 0;
+    uint64_t count = 0;
+    uint64_t max = 0;
+    uint64_t min = std::numeric_limits<uint64_t>::max();
+
+    void accumulate(uint64_t val)
+    {
+      sum += val;
+      count++;
+      max = std::max(max, val);
+      min = std::min(min, val);
+    }
+  };
+
+  Json::Value root;
+  root["version"] = SNPE_JSON_SCHEMA_VERSION;
+
+  auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
+
+  // Memory
+  {
+    std::unordered_map<std::string, Stat> mem_stats;
+    for (const auto &recorder : recorders)
+    {
+      for (const auto &evt : recorder->counter_events())
+      {
+        auto &mem_stat = mem_stats[evt.name];
+        uint64_t val = std::stoull(evt.values.at("value"));
+        mem_stat.accumulate(val);
+      }
+    }
+
+    auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
+    for (const auto &kv : mem_stats)
+    {
+      auto &key = kv.first;
+      auto &val = kv.second;
+      mem[key]["Avg_Size"] = val.sum / val.count;
+      mem[key]["Max_Size"] = val.max;
+      mem[key]["Min_Size"] = val.min;
+      mem[key]["Runtime"] = "NA";
+    }
+  }
+
+  // Operation Execution Time
+  {
+    // NOTE This assumes _duration_events is sorted by "ts" ascending
+
+    // 2D keys : stats[tid][name]
+    std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
+    std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
+    for (const auto &recorder : recorders)
+    {
+      for (const auto &evt : recorder->duration_events())
+      {
+        std::string evt_name = getLabel(*evt);
+        std::string evt_tid = getBackend(*evt);
+
+        auto &stat = stats[evt_tid][evt_name];
+        auto &begin_ts = begin_timestamps[evt_tid][evt_name];
+        uint64_t timestamp = std::stoull(evt->ts);
+        if (evt->ph == "B")
+        {
+          if (begin_ts != 0)
+            throw std::runtime_error{"Invalid Data"};
+          begin_ts = timestamp;
+        }
+        else if (evt->ph == "E")
+        {
+          if (begin_ts == 0 || timestamp < begin_ts)
+            throw std::runtime_error{"Invalid Data"};
+          stat.accumulate(timestamp - begin_ts);
+          begin_ts = 0;
+        }
+        else
+          throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt->ph + "\""};
+      }
+    }
+
+    for (const auto &kv : begin_timestamps)
+      for (const auto &kv2 : kv.second)
+        if (kv2.second != 0)
+          throw std::runtime_error{"Invalid Data - B and E pair does not match."};
+
+    for (const auto &kv : stats)
+    {
+      const auto &tid = kv.first;
+      const auto &map = kv.second;
+      auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
+      for (const auto &kv : map)
+      {
+        auto &name = kv.first;
+        auto &val = kv.second;
+        json_tid[name]["Avg_Time"] = val.sum / val.count;
+        json_tid[name]["Max_Time"] = val.max;
+        json_tid[name]["Min_Time"] = val.min;
+        json_tid[name]["Runtime"] = tid;
+      }
+    }
+  }
+
+  _os << root;
+}
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 95c15049d..862d6f725 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -22,6 +22,7 @@
 #include "util/logging.h"
 
 #include <cassert>
+#include <numeric>
 #include <sstream>
 #include <cmath>
 
@@ -72,6 +73,19 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape
 
 } // namespace
 
+namespace bcq
+{
+inline int getOutputSize(const ir::Shape &cluster_shape, const int32_t *cluster_buf)
+{
+  int size = 0;
+  for (int idx = 0; idx < cluster_shape.dim(0); idx++)
+  {
+    size += cluster_buf[idx * 2 + 1];
+  }
+  return size;
+}
+} // namespace bcq
+
 //
 // Shape inference
 //
@@ -97,10 +111,9 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c
       break;
     case ir::PaddingType::EXPLICIT:
       out_h =
-          (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+        (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
       out_w =
-          (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
-          1;
+        (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal + 1;
       break;
     default:
       assert(false);
@@ -114,8 +127,13 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
   return broadcastShapes(lhs_shape, rhs_shape);
 }
 
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank)
 {
+  if (axis < 0 || axis >= rank)
+  {
+    throw std::runtime_error("ArgMinMax shape inference: Wrong axis value " + std::to_string(axis));
+  }
+
   ir::Shape out_shape;
   for (int idx = 0; idx < rank; ++idx)
   {
@@ -171,11 +189,12 @@ ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int>
     for (int i = 0; i < num_axis; ++i)
     {
       int current = axes[i];
+      if (!(-input_num_dims <= current && current < input_num_dims))
+        throw std::runtime_error{"Invalid dim value " + std::to_string(current)};
       if (current < 0)
       {
         current += input_num_dims;
       }
-      assert(0 <= current && current < input_num_dims);
       for (int j = 0; j < i; ++j)
       {
         int previous = axes[j];
@@ -259,19 +278,24 @@ ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs
   return output_shape;
 }
 
-ir::Shape inferBroadcastToShape(const ir::Shape wshape, const int32_t *shape_buffer)
+/*
+ * shp_shape : SHAPE input tensor's shape
+ * shp_buf : SHAPE input tensor's buffer
+ */
+ir::Shape inferBroadcastToShape(const ir::Shape shp_shape, const int32_t *shp_buf)
 {
-  const int num_elements = wshape.num_elements();
+
+  const int num_elements = shp_shape.num_elements();
 
   assert(num_elements != 0);
-  assert(shape_buffer);
+  assert(shp_buf);
 
   ir::Shape new_shape(num_elements);
 
   for (int i = 0; i < num_elements; ++i)
   {
-    assert(shape_buffer[i] != 0); // It shouldn't be 0.
-    new_shape.dim(i) = shape_buffer[i];
+    assert(shp_buf[i] != 0); // It shouldn't be 0.
+    new_shape.dim(i) = shp_buf[i];
   }
 
   return new_shape;
@@ -305,6 +329,9 @@ ir::Shape inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat:
 ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
                            const ir::operation::Conv2D::Param &param, ir::Layout layout)
 {
+  if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+    throw std::runtime_error{"Conv2D: stride values must be positive"};
+
   auto ifm_shape = in_shape.asFeature(layout);
 
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]
@@ -321,6 +348,9 @@ ir::Shape inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &
                                     const ir::operation::DepthwiseConv2D::Param &param,
                                     ir::Layout layout)
 {
+  if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+    throw std::runtime_error{"DepthwiseConv2D: stride values must be positive"};
+
   assert(layout == ir::Layout::NHWC);
   auto ifm_shape = in_shape.asFeature(layout);
 
@@ -330,7 +360,7 @@ ir::Shape inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &
   assert(kf_shape.N == 1);
 
   const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
-                                                  param.padding, param.stride);
+                                                  param.padding, param.stride, param.dilation);
 
   return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.C};
 }
@@ -354,18 +384,22 @@ ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis)
   return out_shape;
 }
 
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *buffer)
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf)
 {
-  ir::Shape out_shape(in_shape.dim(0));
+  ir::Shape out_shape(fill_shape.dim(0));
 
   for (int out_x = 0; out_x < out_shape.rank(); ++out_x)
   {
-    out_shape.dim(out_x) = buffer[out_x];
+    out_shape.dim(out_x) = static_cast<int32_t>(shape_buf[out_x]);
   }
 
   return out_shape;
 }
 
+// template instantiation
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int32_t *shape_buf);
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int64_t *shape_buf);
+
 ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape)
 {
   assert(in_shape.rank() >= 2);
@@ -380,11 +414,60 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k
   return {ir::Shape({static_cast<int32_t>(batch_size), num_units})};
 }
 
+ir::Shape inferBCQFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &cluster_shape,
+                                      const int32_t *cluster_buf)
+{
+  assert(cluster_shape.rank() == 2);
+  assert(cluster_shape.dim(1) == 2);
+
+  const auto input_size = in_shape.dim(1);
+  const auto output_size = bcq::getOutputSize(cluster_shape, cluster_buf);
+
+  return {ir::Shape({output_size, input_size})};
+}
+
+ir::Shape inferBCQGatherShape(const ir::Shape &indices_shape, const ir::Shape &cluster_shape,
+                              const int32_t *cluster_buf, int rank,
+                              const ir::operation::BCQGather::Param &param)
+{
+  ir::Shape out_shape;
+  ir::Shape in_original_shape;
+
+  assert(cluster_shape.rank() == 2);
+  assert(cluster_shape.dim(1) == 2);
+
+  auto hidden_size = param.input_hidden_size;
+  auto axis = param.axis;
+
+  in_original_shape.append(bcq::getOutputSize(cluster_shape, cluster_buf));
+  in_original_shape.append(hidden_size);
+
+  const int indices_rank = indices_shape.rank();
+  for (int idx = 0; idx < rank; ++idx)
+  {
+    if (idx == (int)axis)
+    {
+      for (int indices_idx = 0; indices_idx < indices_rank; indices_idx++)
+      {
+        out_shape.append(indices_shape.dim(indices_idx));
+      }
+    }
+    else
+    {
+      out_shape.append(in_original_shape.dim(idx));
+    }
+  }
+
+  return out_shape;
+}
+
 ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis,
                            int rank)
 {
   ir::Shape out_shape;
+
   const int indices_rank = indices_shape.rank();
+
   for (int idx = 0; idx < rank; ++idx)
   {
     if (idx == axis)
@@ -470,6 +553,9 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const
 ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
                          const ir::Layout layout)
 {
+  if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+    throw std::runtime_error{"Pool2D: stride values must be positive"};
+
   assert(layout == ir::Layout::NHWC);
   auto ifm_shape = in_shape.asFeature(layout);
   const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
@@ -482,6 +568,17 @@ ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t outp
                                    const int32_t output_width)
 {
   assert(in_shape.rank() == 4);
+  if (output_height < 0)
+  {
+    throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_height = " +
+                             std::to_string(output_height)};
+  }
+  if (output_width < 0)
+  {
+    throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_width = " +
+                             std::to_string(output_width)};
+  }
+
   ir::Shape ret(in_shape.rank());
 
   ret.dim(0) = in_shape.dim(0);
@@ -497,9 +594,9 @@ template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delt
   ir::Shape out_shape(static_cast<int>(1));
 
   out_shape.dim(0) =
-      (std::is_integral<T>::value
-           ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val))
-           : std::ceil(std::abs((start_val - limit_val) / delta_val)));
+    (std::is_integral<T>::value
+       ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val))
+       : std::ceil(std::abs((start_val - limit_val) / delta_val)));
   return out_shape;
 }
 
@@ -511,12 +608,12 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el
                             const size_t total_num_elements)
 {
   ir::Shape ret(shape_num_elements);
-  int32_t flatten_dim = ir::Shape::UNSPECIFIED_DIM;
+  int32_t flatten_dim = ir::Shape::kUnspecifiedDim;
   for (int32_t i = 0; i < shape_num_elements; ++i)
   {
     if (shape_buf[i] < 0)
     {
-      if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+      if (flatten_dim != ir::Shape::kUnspecifiedDim)
         throw std::runtime_error("Reshape: 2nd param has special dim(for flatten) more than twice");
       flatten_dim = i;
       ret.dim(i) = 1;
@@ -526,7 +623,7 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el
       ret.dim(i) = shape_buf[i];
     }
   }
-  if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+  if (flatten_dim != ir::Shape::kUnspecifiedDim)
     ret.dim(flatten_dim) = total_num_elements / ret.num_elements();
 
   // Check reshapable
@@ -566,9 +663,9 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
     ir::Shape true_shape = input_true_shape;
     ir::Shape false_shape = input_false_shape;
     int most_rank =
-        (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank())
-            ? cond_shape.rank()
-            : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank());
+      (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank())
+        ? cond_shape.rank()
+        : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank());
 
     ir::Shape calculate_shape(most_rank);
 
@@ -579,9 +676,9 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
     for (int i = 0; i < most_rank; ++i)
     {
       calculate_shape.dim(i) =
-          (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i))
-              ? cond_shape.dim(i)
-              : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i));
+        (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i))
+          ? cond_shape.dim(i)
+          : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i));
 
       if ((cond_shape.dim(i) != calculate_shape.dim(i) && cond_shape.dim(i) != 1) ||
           (true_shape.dim(i) != calculate_shape.dim(i) && true_shape.dim(i) != 1) ||
@@ -613,7 +710,8 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
   return new_shape;
 }
 
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, const int32_t *sizes)
+template <typename T>
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const T *begins_buf, const T *sizes_buf)
 {
   const uint32_t rank = input_shape.rank();
   ir::Shape out_shape(rank);
@@ -623,12 +721,12 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, c
     const auto input_dim = input_shape.dim(idx);
 
     // begin is zero-based
-    auto begin = begins[idx];
+    auto begin = begins_buf[idx];
     if (begin < 0)
       throw std::runtime_error("shape inference Slice: Invalid begin.");
 
     // size is one-based
-    auto size = sizes[idx];
+    auto size = sizes_buf[idx];
     if (size < -1)
       throw std::runtime_error("shape inference Slice: Invalid size.");
 
@@ -638,18 +736,23 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, c
     }
     else
     {
-      if (input_dim < begin + size)
+      if (input_dim < static_cast<int32_t>(begin + size))
         throw std::runtime_error("shape inference Slice: Invalid begin and size.");
     }
-    out_shape.dim(idx) = size;
+    out_shape.dim(idx) = static_cast<int32_t>(size);
   }
 
   return out_shape;
 }
+// template instantiation
+template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
+                                   const int32_t *sizes_buf);
+template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int64_t *begins_buf,
+                                   const int64_t *sizes_buf);
 
 ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
-                                   const ir::Shape &padding_shape, const int32_t *block_shape_data,
-                                   const int32_t *padding_data)
+                                   const ir::Shape &padding_shape, const int32_t *block_shape_buf,
+                                   const int32_t *padding_buf)
 {
   const uint32_t rank = input_shape.rank();
   ir::Shape out_shape(rank);
@@ -677,14 +780,14 @@ ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape
   for (int dim = 0; dim < kSpatialDimensionNum; ++dim)
   {
     int final_dim_size =
-        (input_shape.dim(dim + 1) + padding_data[dim * 2] + padding_data[dim * 2 + 1]);
+      (input_shape.dim(dim + 1) + padding_buf[dim * 2] + padding_buf[dim * 2 + 1]);
 
-    assert(final_dim_size % block_shape_data[dim] == 0);
+    assert(final_dim_size % block_shape_buf[dim] == 0);
 
-    out_shape.dim(dim + 1) = final_dim_size / block_shape_data[dim];
+    out_shape.dim(dim + 1) = final_dim_size / block_shape_buf[dim];
   }
 
-  const int output_batch_size = input_shape.dim(0) * block_shape_data[0] * block_shape_data[1];
+  const int output_batch_size = input_shape.dim(0) * block_shape_buf[0] * block_shape_buf[1];
   const int output_channel_size = input_shape.dim(3);
 
   out_shape.dim(0) = output_batch_size;
@@ -740,7 +843,7 @@ ir::Shape inferSqueezeShape(const ir::Shape &in_shape, const ir::operation::Sque
       if (!(current >= 0 && current < shape_rank && in_shape.dim(current) == 1))
       {
         throw std::runtime_error(
-            "The following conditions must be met: 0 <= dim < Shape rank, dim == 1");
+          "The following conditions must be met: 0 <= dim < Shape rank, dim == 1");
       }
 
       if (!should_squeeze[current])
@@ -948,35 +1051,71 @@ ir::Shape inferStridedSliceShape(const ir::Shape &input_shape, const StridedSlic
   return out_shape;
 }
 
-ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier)
+ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier_buf,
+                         const int32_t multiplier_size)
 {
-  // assert(in_shape.rank() == multiplier.rank());
+  if (multiplier_size != in_shape.rank())
+  {
+    throw std::runtime_error(
+      "inferTileShape failed, input rank: " + std::to_string(in_shape.rank()) +
+      ", bad multipliers size: " + std::to_string(multiplier_size) + "");
+  }
   ir::Shape new_Shape(in_shape.rank());
 
   for (int i = 0; i < in_shape.rank(); ++i)
   {
-    assert(multiplier[i]); // multiplier[i] shuld not be 0.
-    new_Shape.dim(i) = in_shape.dim(i) * multiplier[i];
+    assert(multiplier_buf[i]); // multiplier_buf[i] shuld not be 0.
+    new_Shape.dim(i) = in_shape.dim(i) * multiplier_buf[i];
   }
   return new_Shape;
 }
 
-ir::Shape inferTransposeShape(const ir::Shape &in_shape, const std::vector<int> &perm)
+ir::Shape inferTransposeShape(const ir::Shape &in_shape, const int32_t *perm_buf,
+                              const int32_t perm_size)
 {
-  if (static_cast<int>(perm.size()) > in_shape.rank())
+  const auto rank = in_shape.rank();
+  if (perm_size > rank)
   {
-    throw std::runtime_error("inferTransposeShape failed, bad rank size: " +
-                             std::to_string(static_cast<int>(perm.size())));
+    throw std::runtime_error("inferTransposeShape failed, bad permutation size: " +
+                             std::to_string(perm_size));
   }
-  ir::Shape out_shape(static_cast<int>(perm.size()));
-  for (int idx = 0; idx < static_cast<int>(perm.size()); idx++)
+
+  const int32_t *perm_data = perm_buf;
+  std::vector<int32_t> regular_perm_vec;
+  if (perm_size == 0)
+  {
+    // perm_data will be set to (n-1...0)
+    regular_perm_vec.resize(rank);
+    std::iota(regular_perm_vec.begin(), regular_perm_vec.end(), 0);
+    std::reverse(regular_perm_vec.begin(), regular_perm_vec.end());
+    perm_data = regular_perm_vec.data();
+  }
+  else
   {
-    if (perm[idx] < 0 || perm[idx] >= static_cast<int>(perm.size()))
+    assert(rank == perm_size);
+  }
+
+  ir::Shape out_shape(rank);
+  std::vector<bool> visit_perms(rank, false);
+  for (int idx = 0; idx < rank; idx++)
+  {
+    const auto perm_val = perm_data[idx];
+    // Check invalid permutation value
+    if (perm_val < 0 || perm_val >= rank)
     {
-      throw std::runtime_error("inferTransposeShape failed, bad perm value: " +
-                               std::to_string(perm[idx]));
+      throw std::runtime_error("inferTransposeShape failed, bad permutation value: " +
+                               std::to_string(perm_val));
     }
-    out_shape.dim(idx) = in_shape.dim(perm[idx]);
+
+    // Check duplicated permutation value
+    if (visit_perms.at(perm_val))
+    {
+      throw std::runtime_error("inferTransposeShape failed, duplicated permutation value: " +
+                               std::to_string(perm_val));
+    }
+    visit_perms.at(perm_val) = true;
+
+    out_shape.dim(idx) = in_shape.dim(perm_val);
   }
   return out_shape;
 }
diff --git a/runtime/onert/core/src/util/ShapeInference.test.cc b/runtime/onert/core/src/util/ShapeInference.test.cc
new file mode 100644
index 000000000..96579bfa2
--- /dev/null
+++ b/runtime/onert/core/src/util/ShapeInference.test.cc
@@ -0,0 +1,544 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/ShapeInference.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+
+TEST(ShapeInference, Elementwise)
+{
+  Shape lhs_shape{1, 299, 299, 3};
+  Shape rhs_shape{3};
+  auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.dim(0), 1);
+  ASSERT_EQ(infered_out_shape.dim(1), 299);
+  ASSERT_EQ(infered_out_shape.dim(2), 299);
+  ASSERT_EQ(infered_out_shape.dim(3), 3);
+}
+
+TEST(ShapeInference, neg_Elementwise)
+{
+  Shape lhs_shape{1, 299, 299, 3};
+  Shape rhs_shape{5, 3};
+  ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
+}
+
+TEST(ShapeInference, Pool2DNodeSame)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{3, 7};
+  Padding padding{PaddingType::SAME};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeValid)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{3, 7};
+  Padding padding{PaddingType::VALID};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeExplicit)
+{
+  Shape in_shape{10, 3, 5, 20};
+
+  Stride stride{3, 7};
+  Padding padding{4, 3, 2, 1};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{0, 7};
+  Padding padding{PaddingType::SAME};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, Conv2D)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{30, 3, 6, 20};
+
+  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+                                 Dilation{1, 1}};
+  auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+                                   Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+  param =
+    operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+}
+
+TEST(ShapeInference, neg_Conv2D_InvalidStride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{30, 3, 6, 20};
+
+  operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
+                                 Dilation{1, 1}};
+  ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, DepthwiseConv2D)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{1, 3, 6, 60};
+
+  operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
+                                          Activation::NONE, Dilation{1, 1}};
+  auto infered_out_shape =
+    onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
+                                            Activation::NONE, Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
+                                            Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+}
+
+TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{1, 3, 6, 60};
+
+  operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
+                                          Activation::NONE, Dilation{1, 1}};
+  ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, Concat)
+{
+  {
+    Shape in1{10, 20, 30, 3, 50};
+    Shape in2{10, 20, 30, 2, 50};
+    Shape in3{10, 20, 30, 2, 50};
+
+    operation::Concat::Param param{3};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 5);
+    ASSERT_EQ(infered_out_shape.dim(0), 10);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 30);
+    ASSERT_EQ(infered_out_shape.dim(3), 7);
+    ASSERT_EQ(infered_out_shape.dim(4), 50);
+  }
+  {
+    // case 1. when axis < 0
+    Shape in1{10, 20, 2};
+    Shape in2{10, 20, 3};
+
+    operation::Concat::Param param{-1};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 3);
+    ASSERT_EQ(infered_out_shape.dim(0), 10);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 5);
+  }
+  {
+    // case 2. when axis < 0
+    Shape in1{2, 20, 2};
+    Shape in2{3, 20, 2};
+
+    operation::Concat::Param param{-3};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 3);
+    ASSERT_EQ(infered_out_shape.dim(0), 5);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 2);
+  }
+}
+
+TEST(ShapeInference, neg_Concat)
+{
+  {
+    operation::Concat::Param param{2};
+    Shape in1{10, 1, 3};
+    Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
+
+    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+  }
+  { // wrong rank
+    operation::Concat::Param param{2};
+    Shape in1{10, 2, 3, 4};
+    Shape in2{10, 2, 4}; // rank should be 4
+
+    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+  }
+}
+
+TEST(ShapeInference, ExpandDims)
+{
+  Shape in_shape{30, 40};
+
+  auto check = [&](int32_t axis, Shape &expected) {
+    auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
+
+    ASSERT_EQ(actual.rank(), 3);
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  { // boundary
+    int32_t axis = 0;
+    Shape expected{1, 30, 40};
+    check(axis, expected);
+  }
+  { // boundary
+    int32_t axis = 2;
+    Shape expected{30, 40, 1};
+    check(axis, expected);
+  }
+  { // inside
+    int32_t axis = 1;
+    Shape expected{30, 1, 40};
+    check(axis, expected);
+  }
+  { // negative boundary
+    int32_t axis = -1;
+    Shape expected{30, 40, 1};
+    check(axis, expected);
+  }
+  { // negative boundary
+    int32_t axis = -3;
+    Shape expected{1, 30, 40};
+    check(axis, expected);
+  }
+}
+
+TEST(ShapeInference, neg_ExpandDims)
+{
+  Shape in_shape{30, 40};
+
+  { // over boundary
+    int32_t axis = 3;
+    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+  }
+  { // over boundary
+    int32_t axis = -4;
+    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+  }
+}
+
+TEST(ShapeInference, FullyConnected)
+{
+  Shape in_shape{3, 4, 5, 6};
+  Shape ker_shape{3, 10};
+  auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
+
+  ASSERT_EQ(infered_out_shape.rank(), 2);
+  ASSERT_EQ(infered_out_shape.dim(0), 36);
+  ASSERT_EQ(infered_out_shape.dim(1), 3);
+}
+
+TEST(ShapeInference, Transpose)
+{
+  auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
+    // pre-conditions
+    ASSERT_EQ(in_shape.rank(), perm.size());
+    ASSERT_EQ(expected.rank(), perm.size());
+    auto inferred_out_shape =
+      onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+    // post-conditions
+    ASSERT_EQ(inferred_out_shape.rank(), perm.size());
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+    {
+      ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
+    }
+  };
+  // check for 2-D
+  {
+    Shape in_shape{2, 3};
+    std::vector<int> perm = {1, 0};
+    Shape expected{3, 2};
+    // int32_t rank = 2;
+    check(in_shape, perm, expected);
+  }
+  // check for 3-D
+  {
+    Shape in_shape{1, 2, 3};
+    std::vector<int> perm = {2, 0, 1};
+    Shape expected{3, 1, 2};
+    // int32_t rank = 3;
+    check(in_shape, perm, expected);
+  }
+  // check for 4-D
+  {
+    Shape in_shape{1, 2, 3, 4};
+    std::vector<int> perm = {1, 3, 0, 2};
+    Shape expected{2, 4, 1, 3};
+    // int32_t rank = 4;
+    check(in_shape, perm, expected);
+  }
+}
+
+TEST(ShapeInference, neg_Transpose)
+{
+  Shape in_shape{1, 2, 3};
+  // Invalid parameter size
+  {
+    std::vector<int> perm = {2, 0, 1, 0};
+    // int32_t rank = 3;
+    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+                 std::runtime_error);
+  }
+  // Invalid parameter value
+  {
+    std::vector<int> perm = {2, 0, 3};
+    // int32_t rank = 3;
+    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+                 std::runtime_error);
+  }
+}
+
+TEST(ShapeInference, Gather)
+{
+  auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
+    int rank = input.rank();
+    auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
+
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  // check for 2-D, 3-D, axis 0
+  {
+    Shape input{3, 4};
+    Shape indices{1, 1, 2};
+    int32_t axis = 0;
+    Shape expected{1, 1, 2, 4};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 2-D, 3-D, axis 1
+  {
+    Shape input{3, 4};
+    Shape indices{1, 2, 1};
+    int32_t axis = 1;
+    Shape expected{3, 1, 2, 1};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 3-D, 2-D, axis 0
+  {
+    Shape input{2, 3, 4};
+    Shape indices{1, 2};
+    int32_t axis = 0;
+    Shape expected{1, 2, 3, 4};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 3-D, 2-D, axis 2
+  {
+    Shape input{2, 3, 4};
+    Shape indices{2, 1};
+    int32_t axis = 2;
+    Shape expected{2, 3, 2, 1};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 4D, axis 0
+  {
+    Shape input{1, 2, 3, 4};
+    Shape indices{2};
+    int32_t axis = 0;
+    Shape expected{2, 2, 3, 4};
+    check(input, indices, expected, axis);
+  }
+}
+
+TEST(ShapeInference, BCQFullyConnected)
+{
+  auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
+                   Shape &expected) {
+    auto actual =
+      onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  {
+    Shape in_shape{10, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+
+    Shape expected{30, 1};
+    check(in_shape, cluster_shape, cluster, expected);
+  }
+
+  {
+    Shape in_shape{1, 1};
+    Shape cluster_shape{1, 2};
+    std::vector<int> cluster = {3, 50};
+
+    Shape expected{50, 1};
+    check(in_shape, cluster_shape, cluster, expected);
+  }
+}
+
+TEST(ShapeInference, BCQGather)
+{
+  auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
+                   uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
+    operation::BCQGather::Param param{hidden_size, axis};
+    auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+                                                              cluster.data(), rank, param);
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  {
+    Shape indices_shape{5, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+    uint32_t hidden_size = 10;
+    uint32_t axis = 0;
+    int rank = 2;
+
+    Shape expected{5, 1, 10};
+    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+  }
+
+  {
+    Shape indices_shape{5, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+    uint32_t hidden_size = 10;
+    uint32_t axis = 1;
+    int rank = 2;
+
+    Shape expected{30, 5, 1};
+    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+  }
+}
diff --git a/runtime/onert/core/src/util/TracingCtx.cc b/runtime/onert/core/src/util/TracingCtx.cc
new file mode 100644
index 000000000..c05baee60
--- /dev/null
+++ b/runtime/onert/core/src/util/TracingCtx.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace util
+{
+
+// initializing static member var
+std::mutex TracingCtx::_session_id_mutex;
+uint32_t TracingCtx::_next_session_id = 0;
+
+} // namespace util
+} // namespace onert
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 480452e01..a6b1fb4a1 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -1,4 +1,5 @@
 /*
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -38,7 +39,7 @@ namespace onert
 namespace base_loader
 {
 
-template <typename LoaderDomain, typename SpecificLoader> class BaseLoader
+template <typename LoaderDomain> class BaseLoader
 {
 protected:
   using Verifier = typename LoaderDomain::Verifier;
@@ -64,11 +65,12 @@ public:
   /**
    * @brief Construct a new Loader object
    *
-   * @param graph reference on subgraphs
+   * @param model reference to model
    */
-  explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs)
-      : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}
+  explicit BaseLoader(std::unique_ptr<ir::Model> &model)
+    : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr}
   {
+    _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
   }
 
   /**
@@ -76,7 +78,7 @@ public:
    *
    * @param file_path
    */
-  void loadFromFile(const char *file_path);
+  void loadFromFile(const std::string &file_path);
   /**
    * @brief Load a model from a buffer
    *
@@ -93,10 +95,12 @@ protected:
   ir::Activation convertActivation(ActivationFunctionType type);
   ir::DataType tensorTypeToDataType(TensorType type);
   ir::OperandIndex tensorIdxToOperandIdx(int32_t tensorIdx);
-  void deallocateMmappedArea(uint8_t *ptr, size_t size);
+  flexbuffers::Map getCustomOpAttrMap(const Operator *op);
 
   // Create operands form tflite::Tensor
   ir::OperandIndex loadOperand(const Tensor *tensor, ir::Graph &subg);
+  void loadQuantization(const Tensor *tensor, ir::TypeInfo &typeInfo);
+  void loadSparsity(const Tensor *tensor, ir::TypeInfo &typeInfo);
   void loadOperationIO(const Operator *op, ir::OperandIndexSequence &inputs,
                        ir::OperandIndexSequence &outputs);
   // Create operations from Operator
@@ -106,70 +110,76 @@ protected:
   void loadStridesAndPaddings(Param &param, const OptionsType *options);
   // Load Pool2D param
   template <typename Param> void loadPool2DOptions(Param &param, const Pool2DOptions *options);
+  // Get BuiltinOperator
+  BuiltinOperator getBuiltinOperator(const Operator *op)
+  {
+    auto const builtin_opcode = _domain_model->operator_codes()->Get(op->opcode_index());
+    auto builtin_op = builtin_opcode->builtin_code();
+    if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+      builtin_op = static_cast<BuiltinOperator>(builtin_opcode->deprecated_builtin_code());
+
+    return builtin_op;
+  }
 
+private:
+  virtual std::unique_ptr<ir::Graph> loadSubgraph(const SubGraph *subg) = 0;
   // Operations
+  template <typename OpIR, typename... Args>
+  const OpIR *loadOperationTo(const Operator *op, ir::Graph &subg, Args &&... args);
+
+  void loadAddV2(const Operator *op, ir::Graph &subg);
+  void loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax);
+  void loadBatchMatMul(const Operator *op, ir::Graph &subg);
+  void loadBinaryArithmetic(const Operator *op, ir::Graph &subg,
+                            ir::operation::BinaryArithmetic::ArithmeticType op_type);
+  void loadComparison(const Operator *op, ir::Graph &subg);
+  void loadConcatenation(const Operator *op, ir::Graph &subg);
   void loadConv2D(const Operator *op, ir::Graph &subg);
+  void loadCustom(const Operator *op, ir::Graph &subg);
+  void loadDepthToSpace(const Operator *op, ir::Graph &subg);
   void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
-  void loadTransposeConv(const Operator *op, ir::Graph &subg);
-  void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
-  void loadReshape(const Operator *op, ir::Graph &subg);
-  void loadSoftmax(const Operator *op, ir::Graph &subg);
-  void loadConcatenation(const Operator *op, ir::Graph &subg);
-  void loadFill(const Operator *op, ir::Graph &subg);
-  void loadFC(const Operator *op, ir::Graph &subg);
-  template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
-  void loadBinaryArithmetic(const Operator *op, ir::Graph &subg);
-  void loadAddV2(const Operator *op, ir::Graph &subg);
-  void loadPack(const Operator *op, ir::Graph &subg);
-  void loadResizeBilinear(const Operator *op, ir::Graph &subg);
-  void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
-  void loadSelect(const Operator *op, ir::Graph &subg);
-  void loadSquaredDifference(const Operator *op, ir::Graph &subg);
-  void loadTranspose(const Operator *op, ir::Graph &subg);
-  template <ir::operation::Reduce::ReduceType reduce_type>
-  void loadReduce(const Operator *op, ir::Graph &subg);
-  void loadReduceAll(const Operator *op, ir::Graph &subg);
-  void loadReverseV2(const Operator *op, ir::Graph &subg);
-  void loadPad(const Operator *op, ir::Graph &subg);
+  void loadEinsum(const Operator *op, ir::Graph &subg);
   void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
                                  ir::operation::ElementwiseActivation::Type op_type,
                                  float alpha = 0.f, float beta = 0.f);
-  template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
-  void loadElementwiseBinary(const Operator *op, ir::Graph &subg);
+  void loadElementwiseBinary(const Operator *op, ir::Graph &subg,
+                             ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type);
   void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
                             ir::operation::ElementwiseUnary::Type op_type);
-  void loadExpandDims(const Operator *op, ir::Graph &subg);
+  void loadFC(const Operator *op, ir::Graph &subg);
+  void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
   void loadGather(const Operator *op, ir::Graph &subg);
-  void loadCustom(const Operator *op, ir::Graph &subg);
-  void loadSpaceToBatchND(const Operator *op, ir::Graph &subg);
-  void loadBatchMatMul(const Operator *op, ir::Graph &subg);
-  void loadBatchToSpaceND(const Operator *op, ir::Graph &subg);
-  void loadSqueeze(const Operator *op, ir::Graph &subg);
-  void loadPrelu(const Operator *op, ir::Graph &subg);
+  void loadIf(const Operator *op, ir::Graph &subg);
+  void loadLeakyRelu(const Operator *op, ir::Graph &subg);
+  void loadLogSoftmax(const Operator *op, ir::Graph &subg);
+  void loadDetectionPostProcess(const Operator *op, ir::Graph &subg);
+  void loadOneHot(const Operator *op, ir::Graph &subg);
+  void loadPack(const Operator *op, ir::Graph &subg);
+  void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
+  void loadReduce(const Operator *op, ir::Graph &subg,
+                  ir::operation::Reduce::ReduceType reduce_type);
+  void loadReduceAll(const Operator *op, ir::Graph &subg);
+  void loadReshape(const Operator *op, ir::Graph &subg);
+  void loadResizeBilinear(const Operator *op, ir::Graph &subg);
+  void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
+  void loadSoftmax(const Operator *op, ir::Graph &subg);
+  void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
   void loadSplit(const Operator *op, ir::Graph &subg);
   void loadSplitV(const Operator *op, ir::Graph &subg);
-  void loadSlice(const Operator *op, ir::Graph &subg);
+  void loadSqueeze(const Operator *op, ir::Graph &subg);
   void loadStridedSlice(const Operator *op, ir::Graph &subg);
+  void loadTransposeConv(const Operator *op, ir::Graph &subg);
+  void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg);
   void loadUnpack(const Operator *op, ir::Graph &subg);
-  void loadComparison(const Operator *op, ir::Graph &subg);
-  void loadEinsum(const Operator *op, ir::Graph &subg);
-  void loadOneHot(const Operator *op, ir::Graph &subg);
-  void loadShape(const Operator *op, ir::Graph &subg);
-  void loadIf(const Operator *op, ir::Graph &subg);
   void loadWhile(const Operator *op, ir::Graph &subg);
-  void loadArgMax(const Operator *op, ir::Graph &subg);
-  void loadPow(const Operator *op, ir::Graph &subg);
-  void loadTile(const Operator *op, ir::Graph &subg);
-  void loadRange(const Operator *op, ir::Graph &subg);
-  void loadRank(const Operator *op, ir::Graph &subg);
-  void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
-  void loadBroadcastTo(const Operator *op, ir::Graph &subg);
-  void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
-  void loadLogSoftmax(const Operator *op, ir::Graph &subg);
-  void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
-  void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
-  void loadL2Normalization(const Operator *op, ir::Graph &subg);
-  void loadLeakyRelu(const Operator *op, ir::Graph &subg);
+
+  void verifySubgraphIndex(int subg_index)
+  {
+    const auto num_subgraphs = _domain_model->subgraphs()->size();
+    if (subg_index < 0 || subg_index >= static_cast<int32_t>(num_subgraphs))
+      throw std::runtime_error{std::string{"Invalid subgraph index - "} +
+                               std::to_string(subg_index)};
+  }
 
 protected:
   // Base address for mapped region for loading (if needed)
@@ -178,30 +188,34 @@ protected:
   int32_t _pagesize;
   // loaded file description
   int _fd;
-  // Reference on loadable subgraphs
-  std::unique_ptr<ir::Subgraphs> &_subgraphs;
-  const Model *_model;
+  // Reference to ir::model (to be loaded from _domain_model)
+  std::unique_ptr<ir::Model> &_model;
+  const Model *_domain_model;
   // Maps Tensor indices to onert Operands.
   std::vector<ir::OperandIndex> _tensor_to_operand;
   std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
   // Verifier
   std::unique_ptr<Verifier> _verifier;
+  // Boolean flag to use MMAPED_DATA
+  bool _use_mmaped_data = false;
+
+  std::unordered_map<uint32_t /* Buffer Index in circle file */, std::shared_ptr<ir::Data>>
+    _buf_to_data;
 };
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromFile(const char *file_path)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::BaseLoader::loadFromFile(const std::string &file_path)
 {
-  _fd = open(file_path, O_RDONLY);
+  _fd = open(file_path.c_str(), O_RDONLY);
   if (_fd < 0)
   {
-    throw std::runtime_error("Failed to open file " + std::string(file_path));
+    throw std::runtime_error("Failed to open file " + file_path);
   }
 
   struct stat file_stat;
   if (fstat(_fd, &file_stat) != 0)
   {
-    throw std::runtime_error("Fstat failed or file " + std::string(file_path) +
-                             " is not a regular file");
+    throw std::runtime_error("Fstat failed or file " + file_path + " is not a regular file");
   }
   int size = file_stat.st_size;
 
@@ -216,22 +230,22 @@ void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromFile(const ch
   _verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_base), size);
 
   loadModel();
+  munmap(_base, size);
 
   close(_fd);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromBuffer(uint8_t *buffer,
-                                                                          size_t size)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::BaseLoader::loadFromBuffer(uint8_t *buffer, size_t size)
 {
   _base = buffer;
   _verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_base), size);
   loadModel();
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActivation(
-    const ActivationFunctionType type)
+template <typename LoaderDomain>
+ir::Activation
+BaseLoader<LoaderDomain>::BaseLoader::convertActivation(const ActivationFunctionType type)
 {
   switch (type)
   {
@@ -246,68 +260,59 @@ ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActi
     case ActivationFunctionType::ActivationFunctionType_TANH:
       return ir::Activation::TANH;
     default:
-      throw std::runtime_error(std::string("Unsupported activation type: ")
-                                   .append(EnumNameActivationFunctionType(type)));
+      throw std::runtime_error(std::string("Unsupported or invalid activation type: ") +
+                               std::to_string(static_cast<int>(type)));
   }
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-ir::DataType
-BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::tensorTypeToDataType(const TensorType type)
+template <typename LoaderDomain>
+ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const TensorType type)
 {
   switch (type)
   {
     case TensorType::TensorType_FLOAT32:
       return ir::DataType::FLOAT32;
+    case TensorType::TensorType_FLOAT16:
+      return ir::DataType::FLOAT16;
     case TensorType::TensorType_INT32:
       return ir::DataType::INT32;
-    case TensorType::TensorType_BOOL:
-      return ir::DataType::BOOL8;
     case TensorType::TensorType_UINT8:
       return ir::DataType::QUANT_UINT8_ASYMM;
-    case TensorType::TensorType_INT8:
-      return ir::DataType::QUANT_INT8_SYMM;
     case TensorType::TensorType_INT64:
       return ir::DataType::INT64;
+    // case TensorType::TensorType_STRING:
+    case TensorType::TensorType_BOOL:
+      return ir::DataType::BOOL8;
+    case TensorType::TensorType_INT16:
+      return ir::DataType::QUANT_INT16_ASYMM;
+    // case TensorType::TensorType_COMPLEX64
+    case TensorType::TensorType_INT8:
+      return ir::DataType::QUANT_INT8_ASYMM;
+    // case TensorType::TensorType_FLOAT64
+    case TensorType::TensorType_UINT32:
+      return ir::DataType::UINT32;
     default:
       throw std::runtime_error(
-          std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
+        std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
   }
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-ir::OperandIndex
-BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::tensorIdxToOperandIdx(int32_t tensorIdx)
+template <typename LoaderDomain>
+ir::OperandIndex BaseLoader<LoaderDomain>::BaseLoader::tensorIdxToOperandIdx(int32_t tensorIdx)
 {
   return isOptionalInputTensor(tensorIdx) ? ir::OperandIndex() : _tensor_to_operand[tensorIdx];
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::deallocateMmappedArea(uint8_t *ptr,
-                                                                                 size_t size)
+template <typename LoaderDomain>
+flexbuffers::Map BaseLoader<LoaderDomain>::BaseLoader::getCustomOpAttrMap(const Operator *op)
 {
-  // Calculate offset from base address of mapped region
-  ptrdiff_t unaligned_offset_start = ptr - _base;
-  ptrdiff_t unaligned_offset_end = unaligned_offset_start + size;
-
-  // Calculated aligned offset from base address of mapped region
-  // munmap accepts memory address which is a multiple of the pagesize
-  ptrdiff_t aligned_offset_start =
-      ((unaligned_offset_start + (_pagesize - 1)) / _pagesize) * _pagesize;
-  ptrdiff_t aligned_offset_end = (unaligned_offset_end / _pagesize) * _pagesize;
-
-  ptrdiff_t area_size = aligned_offset_end - aligned_offset_start;
-  if (area_size > 0)
-  {
-    // Unmap mapped region for CachedData
-    if (munmap(_base + aligned_offset_start, area_size) == -1)
-    {
-      VERBOSE(BASE_LOADER) << "munmap failed" << std::endl;
-    }
-  }
+  size_t custom_op_data_size = op->custom_options()->size();
+  auto custom_op_data = op->custom_options()->Data();
+  auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
+  return data_root.AsMap();
 }
 
-/* Copied from tensorflow lite. Need to append copyright */
+/* Copy is copied from tensorflow lite */
 template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr)
 {
   if (data_ptr->values() == nullptr)
@@ -324,9 +329,8 @@ template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr)
   return true;
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Tensor *tensor,
-                                                                       ir::Graph &subg)
+template <typename LoaderDomain>
+ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir::Graph &subg)
 {
   ir::Shape shape;
   // Shape
@@ -346,67 +350,175 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
   //       If app wants to change the input shape, call nnfw_apply_input_tensorinfo() can
   //       be used.
 
-  // Type
-  ir::DataType data_type = tensorTypeToDataType(tensor->type());
-  // Quantization
-  auto q_params = tensor->quantization();
-  float scale = 0.0;
-  long zero_point = 0;
-  if (q_params != nullptr)
+  // TypeInfo
+  ir::TypeInfo type_info(tensorTypeToDataType(tensor->type()));
+  loadQuantization(tensor, type_info);
+  loadSparsity(tensor, type_info);
+
+  // Create operand
+  const auto operand_index = subg.addOperand(shape, type_info);
+
+  // Constant tensors are indicated by non-empty data.
+  const auto *data = _domain_model->buffers()->Get(tensor->buffer())->data();
+  if (data != nullptr)
   {
-    if (q_params->scale())
+    using std::ptrdiff_t;
+    std::shared_ptr<ir::Data> data_obj;
+
+    if (_fd == -1) // Model is from memory
     {
-      if (q_params->scale()->size() != 1)
-      {
-        throw std::runtime_error("Only 1 scale for a tensor is supported.");
-      }
-      scale = q_params->scale()->Get(0);
+      data_obj = std::make_shared<ir::ExternalData>(data->data(), data->size());
     }
-
-    if (q_params->zero_point())
+    else // Model is loaded(mmap'd) from a file
     {
-      if (q_params->zero_point()->size() != 1)
+      size_t data_size = data->size();
+      ptrdiff_t unaligned_offset_start = data->data() - _base;
+      ptrdiff_t offset_end = unaligned_offset_start + data_size;
+
+      // Calculated aligned offset from base address of mapped region
+      // munmap accepts memory address which is a multiple of the pagesize
+      ptrdiff_t aligned_offset_start = (unaligned_offset_start / _pagesize) * _pagesize;
+      size_t mmap_size = offset_end - aligned_offset_start;
+
+      uint32_t buf_idx = tensor->buffer();
+      auto buffer_found = _buf_to_data.find(buf_idx);
+
+      if (buffer_found != _buf_to_data.end())
+      {
+        // Another tensor points this buffer and its matching Data(either CachedData or MMapedData)
+        // was already created. Let's reuse the Data
+        data_obj = buffer_found->second;
+      }
+      else if (_use_mmaped_data)
+      {
+        data_obj = std::make_shared<ir::MMapedData>(_fd, aligned_offset_start, mmap_size,
+                                                    unaligned_offset_start, data_size);
+        _buf_to_data[buf_idx] = data_obj;
+      }
+      else
       {
-        throw std::runtime_error("Only 1 zero_point value for a tensor is supported.");
+        size_t offset = unaligned_offset_start - aligned_offset_start;
+        uint8_t *mmap_base = static_cast<uint8_t *>(
+          mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
+
+        data_obj = std::make_shared<ir::CachedData>(mmap_base + offset, data_size);
+        _buf_to_data[buf_idx] = data_obj;
+
+        munmap(mmap_base, mmap_size);
       }
-      zero_point = q_params->zero_point()->Get(0);
-      // zero_point is long while TypeInfo.zero_point is defined as int32_t.
-      assert(zero_point >= std::numeric_limits<int32_t>::min());
-      assert(zero_point <= std::numeric_limits<int32_t>::max());
     }
-    auto details = q_params->details_as_CustomQuantization();
-    if (details != nullptr)
-      throw std::runtime_error("Custom Quantization is not supported");
+    subg.setOperandValue(operand_index, std::move(data_obj));
+  }
+
+  _tensor_names.emplace(operand_index, tensor->name()->str());
+
+  // Variable
+  if (tensor->is_variable())
+  {
+    if (data != nullptr)
+      throw std::runtime_error("Variable tensor with buffer is not supported!");
+
+    subg.operands().at(operand_index).info().setAsVariable();
   }
-  // Create TypeInfo
-  ir::TypeInfo type_info(data_type, scale, zero_point);
-  // Sparsity
+
+  return operand_index;
+}
+
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadQuantization(const Tensor *tensor, ir::TypeInfo &typeInfo)
+{
+  auto q_params = tensor->quantization();
+  if (q_params == nullptr || q_params->scale() == nullptr || q_params->scale()->size() == 0)
+  {
+    typeInfo.quantization(0., 0);
+    return;
+  }
+  if (q_params->zero_point() == nullptr)
+  {
+    throw std::runtime_error("Quantization params: scale is not null, but zero_point is null.");
+  }
+  const size_t num_scales = q_params->scale()->size();
+  if (num_scales != q_params->zero_point()->size())
+  {
+    throw std::runtime_error("Quantization params: scale size != zero_point size");
+  }
+  std::vector<float> scales;
+  std::vector<int32_t> zero_points;
+  scales.resize(num_scales);
+  zero_points.resize(num_scales);
+  for (size_t i = 0; i < num_scales; ++i)
+  {
+    scales[i] = q_params->scale()->Get(i);
+    // zero_point is defined as long (i64) in schema while TypeInfo's zero_point is int32_t.
+    // int64_t is used instead of long because long is 4 byte in most 32bit architecture.
+    int64_t zero_point = q_params->zero_point()->Get(i);
+    if (zero_point < std::numeric_limits<int32_t>::min() ||
+        zero_point > std::numeric_limits<int32_t>::max())
+      throw std::runtime_error("Zero_point is out of int32 range.");
+    zero_points[i] = static_cast<int32_t>(zero_point);
+  }
+  auto details = q_params->details_as_CustomQuantization();
+  if (details != nullptr)
+    throw std::runtime_error("Custom Quantization is not supported");
+  typeInfo.quantization(std::move(scales), std::move(zero_points));
+}
+
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, ir::TypeInfo &typeInfo)
+{
   auto src_sparsity = tensor->sparsity();
   if (src_sparsity != nullptr)
   {
     std::vector<uint16_t> w1_segments;
     std::vector<uint16_t> w1_indices;
-    // ignore traversal_order, block_map
+    // check traversal_order
+    if (src_sparsity->traversal_order())
+    {
+      const int traversal_order_size = src_sparsity->traversal_order()->size();
+      for (int i = 0; i < traversal_order_size; ++i)
+      {
+        if (i != src_sparsity->traversal_order()->Get(i))
+          throw std::runtime_error("traversal_order [0, 1, ..., n-1] is only supported.");
+      }
+    }
+    // check block_map
+    int block_rank = 0;
+    if (src_sparsity->block_map())
+    {
+      block_rank = src_sparsity->block_map()->size();
+      for (int i = 0; i < block_rank; ++i)
+      {
+        if (i != src_sparsity->block_map()->Get(i))
+          throw std::runtime_error("block_map [0, 1, ..., n-1] is only supported.");
+      }
+    }
     // load metadata
-    const size_t dim_metadata_size = src_sparsity->dim_metadata()->size();
-    if (dim_metadata_size != 2)
-      throw std::runtime_error("sparse tensor is supported only for 2D");
+    const auto dim_metadata_size = src_sparsity->dim_metadata()->size();
+    const auto dense_rank = tensor->shape() ? tensor->shape()->size() : 0;
+    if (dense_rank + block_rank != dim_metadata_size)
+      throw std::runtime_error("sparsity dim_metadata length is wrong.");
+    bool random_sparsity = dim_metadata_size == 2 && block_rank == 0;
+    bool block2D_sparsity = dim_metadata_size == 4 && block_rank == 2;
+    if (dim_metadata_size != !random_sparsity && !block2D_sparsity)
+      throw std::runtime_error(
+        "sparsity is supported only for 2D tensor with random or 16x1 block sparsity.");
+
     const auto *src_metadata = src_sparsity->dim_metadata()->Get(0);
     if (src_metadata->format() != DimensionType::DimensionType_DENSE)
       throw std::runtime_error("sparse tensor dim[0] is not DENSE");
     src_metadata = src_sparsity->dim_metadata()->Get(1);
     if (src_metadata->format() != DimensionType::DimensionType_SPARSE_CSR)
       throw std::runtime_error("sparse tensor dim[0] is not SPARSE_CSR");
-
     auto ParseSparseIndexVector = [src_metadata, &w1_segments, &w1_indices]() {
       if (src_metadata->array_segments() == nullptr || src_metadata->array_indices() == nullptr)
         return false;
       bool status = true;
+      /* `onert` inernally uses uint16 type regardless of the value of
+         the array_segments_type and array_indices_type */
       switch (src_metadata->array_segments_type())
       {
         case SparseIndexVector::SparseIndexVector_Int32Vector:
-          status = Copy(src_metadata->array_segments_as_Int32Vector(), w1_segments);
-          break;
+          throw std::runtime_error("sparse tensor with int32 segment type is not supported");
         case SparseIndexVector::SparseIndexVector_Uint16Vector:
           status = Copy(src_metadata->array_segments_as_Uint16Vector(), w1_segments);
           break;
@@ -421,7 +533,7 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
       switch (src_metadata->array_indices_type())
       {
         case SparseIndexVector::SparseIndexVector_Int32Vector:
-          return Copy(src_metadata->array_indices_as_Int32Vector(), w1_indices);
+          throw std::runtime_error("sparse tensor with int32 indices type is not supported");
         case SparseIndexVector::SparseIndexVector_Uint16Vector:
           return Copy(src_metadata->array_indices_as_Uint16Vector(), w1_indices);
         case SparseIndexVector::SparseIndexVector_Uint8Vector:
@@ -433,52 +545,33 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
     };
     if (ParseSparseIndexVector() == false)
       throw std::runtime_error("Error during parsing sparsity index information");
-    type_info.sparse2DMetadata(std::move(w1_segments), std::move(w1_indices));
-  }
-  // Create operand
-  const auto operand_index = subg.addOperand(shape, type_info);
-
-  // Constant tensors are indicated by non-empty data.
-  const auto *data = _model->buffers()->Get(tensor->buffer())->data();
-  if (data != nullptr)
-  {
-    using std::ptrdiff_t;
-    std::unique_ptr<ir::Data> data_obj;
-    if (_fd == -1) // Model is from memory
+    // Get block size
+    std::vector<int32_t> block_size;
+    for (int i = 0; i < block_rank; ++i)
     {
-      data_obj = std::make_unique<ir::ExternalData>(data->data(), data->size());
+      auto block_metadata = src_sparsity->dim_metadata()->Get(dense_rank + i);
+      if (block_metadata->format() != DimensionType::DimensionType_DENSE)
+        throw std::runtime_error("block dimension must be DENSE.");
+      block_size.push_back(block_metadata->dense_size());
     }
-    else // Model is loaded(mmap'd) from a file
-    {
-      data_obj = std::make_unique<ir::CachedData>(data->data(), data->size());
-      deallocateMmappedArea(const_cast<uint8_t *>(data->data()), data->size());
-    }
-    subg.setOperandValue(operand_index, std::move(data_obj));
+    typeInfo.sparsity(std::make_shared<ir::Sparsity>(std::move(w1_segments), std::move(w1_indices),
+                                                     std::move(block_size)));
   }
-
-  _tensor_names.emplace(operand_index, tensor->name()->str());
-
-  // Variablie
-  if (tensor->is_variable())
-    throw std::runtime_error("Variable tensor not supported!");
-
-  return operand_index;
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOperationIO(const Operator *op,
-                                                               ir::OperandIndexSequence &inputs,
-                                                               ir::OperandIndexSequence &outputs)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadOperationIO(const Operator *op, ir::OperandIndexSequence &inputs,
+                                               ir::OperandIndexSequence &outputs)
 {
   for (const std::int32_t idx : *op->inputs())
   {
     // Optional tensors are not supported yet except for FULLY_CONNECTED and BCQ_FULLY_CONNECTED
     auto check_optional_input = [&]() {
-      auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+      auto builtin_code = getBuiltinOperator(op);
       if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code))
         throw std::runtime_error(
-            std::string("loader doesn't support optional input tensor yet for ")
-                .append(EnumNameBuiltinOperator(builtin_code)));
+          std::string("loader doesn't support optional input tensor yet for ")
+            .append(EnumNameBuiltinOperator(builtin_code)));
     };
     check_optional_input();
     inputs.append(tensorIdxToOperandIdx(idx));
@@ -490,120 +583,144 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperationIO(const Operator *o
   }
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
+template <typename LoaderDomain>
 template <typename Param, typename OptionsType>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &param,
-                                                                      const OptionsType *options)
+void BaseLoader<LoaderDomain>::loadStridesAndPaddings(Param &param, const OptionsType *options)
 {
   // Strides
   param.stride.vertical = options->stride_h();
   param.stride.horizontal = options->stride_w();
   // Paddings
-  if (options->padding() == Padding::Padding_SAME)
-    param.padding.type = ir::PaddingType::SAME;
-  if (options->padding() == Padding::Padding_VALID)
-    param.padding.type = ir::PaddingType::VALID;
+  switch (options->padding())
+  {
+    case Padding::Padding_SAME:
+      param.padding.type = ir::PaddingType::SAME;
+      break;
+    case Padding::Padding_VALID:
+      param.padding.type = ir::PaddingType::VALID;
+      break;
+    default:
+      throw std::runtime_error{"Invalid padding type"};
+  }
   // param paddings indexes unused
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
+template <typename LoaderDomain>
 template <typename Param>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2DOptions(Param &param,
-                                                                 const Pool2DOptions *options)
+void BaseLoader<LoaderDomain>::loadPool2DOptions(Param &param, const Pool2DOptions *options)
 {
   // Strides and Paddings
+  if (options->stride_h() <= 0 || options->stride_w() <= 0)
+    throw std::runtime_error{"Invalid stride vertical or horizontal - both must be bigger than 0"};
   loadStridesAndPaddings(param, options);
   // Filter width and height
   // Strides
+  if (options->filter_width() <= 0 || options->filter_height() <= 0)
+    throw std::runtime_error{"Invalid filter width or height - both must be bigger than 0"};
   param.kw = options->filter_width();
   param.kh = options->filter_height();
   // Activation
   param.activation = convertActivation(options->fused_activation_function());
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+template <typename OpIR, typename... Args>
+const OpIR *BaseLoader<LoaderDomain>::loadOperationTo(const Operator *op, ir::Graph &subg,
+                                                      Args &&... args)
 {
+  static_assert(sizeof...(args) <= 1, "You can't have more than 1 arguments!");
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
+  std::unique_ptr<OpIR> new_op(new OpIR(inputs, outputs, std::forward<Args>(args)...));
+  auto ret = new_op.get();
+  subg.addOperation(std::move(new_op));
+
+  return ret;
+}
+
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadConv2D(const Operator *op, ir::Graph &subg)
+{
   ir::operation::Conv2D::Param param;
   const auto *options = op->builtin_options_as_Conv2DOptions();
   param.activation = convertActivation(options->fused_activation_function());
   loadStridesAndPaddings(param, options);
-
   param.dilation.width_factor = options->dilation_w_factor();
   param.dilation.height_factor = options->dilation_h_factor();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  const auto conv = loadOperationTo<ir::operation::Conv2D>(op, subg, param);
+
+  // TFLite support old hybrid quantization (float input/output, uint8 kernel)
+  // but it interprets weight type as init8 internally
+  const auto &input_operand =
+    subg.operands().at(conv->getInputs().at(ir::operation::Conv2D::INPUT));
+  auto &weights_operand = subg.operands().at(conv->getInputs().at(ir::operation::Conv2D::KERNEL));
+  if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
+      ((weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) ||
+       weights_operand.typeInfo().type() == ir::DataType::QUANT_INT8_ASYMM))
+  {
+    weights_operand.type(ir::DataType::QUANT_INT8_SYMM);
+  }
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDepthwiseConv2D(const Operator *op,
-                                                                   ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDepthwiseConv2D(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::DepthwiseConv2D::Param param;
   const auto *options = op->builtin_options_as_DepthwiseConv2DOptions();
   param.activation = convertActivation(options->fused_activation_function());
   loadStridesAndPaddings(param, options);
-  // Multiplier
   param.multiplier = options->depth_multiplier();
   // Dilation h/w factor unused
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::DepthwiseConv2D(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
+  param.dilation.width_factor = options->dilation_w_factor();
+  param.dilation.height_factor = options->dilation_h_factor();
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator *op,
-                                                                 ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  const auto dconv = loadOperationTo<ir::operation::DepthwiseConv2D>(op, subg, param);
 
-  loadOperationIO(op, inputs, outputs);
+  // TFLite does not support old hybrid quantization (float input/output, uint8 kernel)
+  // for depthwise convolution.
+  // But for consistency with Conv2D and FC, we interpret weight type as init8 internally
+  const auto &input_operand =
+    subg.operands().at(dconv->getInputs().at(ir::operation::DepthwiseConv2D::INPUT));
+  auto &weights_operand =
+    subg.operands().at(dconv->getInputs().at(ir::operation::DepthwiseConv2D::KERNEL));
+  if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
+      ((weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) ||
+       weights_operand.typeInfo().type() == ir::DataType::QUANT_INT8_ASYMM))
+  {
+    weights_operand.type(ir::DataType::QUANT_INT8_SYMM);
+  }
+}
 
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadTransposeConv(const Operator *op, ir::Graph &subg)
+{
   ir::operation::TransposeConv::Param param;
   const auto *options = op->builtin_options_as_TransposeConvOptions();
   loadStridesAndPaddings(param, options);
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::TransposeConv(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+
+  loadOperationTo<ir::operation::TransposeConv>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(const Operator *op, ir::Graph &subg,
-                                                          ir::operation::Pool2D::PoolType op_type)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadPool2D(const Operator *op, ir::Graph &subg,
+                                          ir::operation::Pool2D::PoolType op_type)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Pool2D::Param param;
   param.op_type = op_type;
   const auto *options = op->builtin_options_as_Pool2DOptions();
 
   loadPool2DOptions(param, options);
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Pool2D(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Pool2D>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReshape(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadReshape(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Reshape::Param param{};
   const auto *options = op->builtin_options_as_ReshapeOptions();
   if (options != nullptr)
@@ -611,99 +728,67 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadReshape(const Operator *op, i
     const auto *new_shape = options->new_shape();
     if (new_shape)
     {
-      for (uint i = 0; i < new_shape->Length(); ++i)
+      for (uint i = 0; i < new_shape->size(); ++i)
       {
         param.new_shape.push_back(new_shape->Get(i));
       }
     }
   }
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Reshape(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Reshape>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSoftmax(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Softmax::Param param;
   const auto *options = op->builtin_options_as_SoftmaxOptions();
   // Beta
   param.beta = options->beta();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Softmax(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Softmax>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op,
-                                                                 ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadConcatenation(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Concat::Param param;
   const auto *options = op->builtin_options_as_ConcatenationOptions();
   // Axis
   param.axis = options->axis();
   // activation unused
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Concat(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Concat>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadFill(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadFC(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Fill(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
+  ir::operation::FullyConnected::Param param;
+  const auto *options = op->builtin_options_as_FullyConnectedOptions();
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  param.activation = convertActivation(options->fused_activation_function());
+  param.weights_format = static_cast<ir::FullyConnectedWeightsFormat>(options->weights_format());
 
-  loadOperationIO(op, inputs, outputs);
+  const auto fc = loadOperationTo<ir::operation::FullyConnected>(op, subg, param);
 
-  const auto &input_operand = subg.operands().at(inputs.at(ir::operation::FullyConnected::INPUT));
-  auto &weights_operand = subg.operands().at(inputs.at(ir::operation::FullyConnected::WEIGHT));
+  // TFLite supports old hybrid quantization (float input/output, uint8 kernel)
+  // but it interprets weight type as init8 internally
+  const auto &input_operand =
+    subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT));
+  auto &weights_operand =
+    subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT));
   if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
-      weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
+      ((weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) ||
+       weights_operand.typeInfo().type() == ir::DataType::QUANT_INT8_ASYMM))
   {
     weights_operand.type(ir::DataType::QUANT_INT8_SYMM);
   }
-
-  ir::operation::FullyConnected::Param param;
-  const auto *options = op->builtin_options_as_FullyConnectedOptions();
-
-  param.activation = convertActivation(options->fused_activation_function());
-  // weights_format unused
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::FullyConnected(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::BinaryArithmetic::Param param;
   param.arithmetic_type = ir::operation::BinaryArithmetic::ArithmeticType::ADD;
 
@@ -713,30 +798,29 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir:
   }
   else
   {
-    size_t custom_op_data_size = op->custom_options()->size();
-    auto custom_op_data = op->custom_options()->Data();
-    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-    auto attr_map = data_root.AsMap();
+    const auto attr_map = getCustomOpAttrMap(op);
     const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
-        attr_map["fused_activation_function"].AsInt8());
+      attr_map["fused_activation_function"].AsInt8());
     param.activation = convertActivation(fused_activation_func);
   }
 
-  std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::BinaryArithmetic(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::BinaryArithmetic>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operator *op,
-                                                                    ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDepthToSpace(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  ir::operation::DepthToSpace::Param param;
+  const auto *options = op->builtin_options_as_DepthToSpaceOptions();
+  param.block_size = options->block_size();
 
-  loadOperationIO(op, inputs, outputs);
+  loadOperationTo<ir::operation::DepthToSpace>(op, subg, param);
+}
 
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadBinaryArithmetic(
+  const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type)
+{
   ir::operation::BinaryArithmetic::Param param;
   param.arithmetic_type = op_type;
   switch (op_type)
@@ -771,172 +855,66 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operat
       break;
   }
 
-  std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::BinaryArithmetic(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::BinaryArithmetic>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadPack(const Operator *op, ir::Graph &subg)
 {
-  // This runtime_error will be removed if the one of backend supports this operation
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Pack::Param param;
   const auto *options = op->builtin_options_as_PackOptions();
   param.num = options->values_count();
   param.axis = options->axis();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Pack(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Pack>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseActivation(
-    const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
-    float alpha, float beta)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadElementwiseActivation(
+  const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+  float alpha, float beta)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::ElementwiseActivation::Param param;
   param.op_type = op_type;
   param.alpha = alpha;
   param.beta = beta;
 
-  std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::ElementwiseActivation(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::ElementwiseActivation>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator *op,
-                                                                  ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadResizeBilinear(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-  auto input = inputs.at(0);
-  auto size = inputs.at(1);
-
-  // FIXME Handle ResizeBilinearOptions.
-  if (!subg.operands().at(size).isConstant())
-    throw std::runtime_error("ResizeBilinear: non-constant 'size' is not supported.");
-
-  std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
-
   ir::operation::ResizeBilinear::Param param;
-  param.height_out = size_v[0];
-  param.width_out = size_v[1];
   param.align_corners = op->builtin_options_as_ResizeBilinearOptions()->align_corners();
   param.half_pixel_centers = op->builtin_options_as_ResizeBilinearOptions()->half_pixel_centers();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ResizeBilinear({input}, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::ResizeBilinear>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeNearestNeighbor(const Operator *op,
-                                                                         ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-  auto input = inputs.at(0);
-  auto size = inputs.at(1);
-
-  if (!subg.operands().at(size).isConstant())
-    throw std::runtime_error("ResizeNearestNeighbor: non-constant 'size' is not supported.");
-
-  std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
-
   ir::operation::ResizeNearestNeighbor::Param param;
-  param.height_out = size_v[0];
-  param.width_out = size_v[1];
   param.align_corners = op->builtin_options_as_ResizeNearestNeighborOptions()->align_corners();
 
-  std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::ResizeNearestNeighbor({input}, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::ResizeNearestNeighbor>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadReduce(const Operator *op, ir::Graph &subg,
+                                          ir::operation::Reduce::ReduceType reduce_type)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Operator *op,
-                                                                     ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::SquaredDifference(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-  auto input = inputs.at(0);
-  auto perm = inputs.at(1);
-
-  if (!subg.operands().at(perm).isConstant())
-    throw std::runtime_error("Transpose: non-constant 'perm' is not supported.");
-
-  ir::operation::Transpose::Param param;
-  param.perm = subg.operands().at(perm).template asVector<int>();
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Transpose({input}, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-template <ir::operation::Reduce::ReduceType reduce_type>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Reduce::Param param;
   param.reduce_type = reduce_type;
   param.keep_dims = op->builtin_options_as_ReducerOptions()->keep_dims();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Reduce(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Reduce>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceAll(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadReduceAll(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Reduce::Param param;
   param.reduce_type = ir::operation::Reduce::ReduceType::ALL;
   if (op->custom_options() == nullptr)
@@ -945,71 +923,32 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadReduceAll(const Operator *op,
   }
   else
   {
-    size_t custom_op_data_size = op->custom_options()->size();
-    auto custom_op_data = op->custom_options()->Data();
-    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-    auto attr_map = data_root.AsMap();
+    const auto attr_map = getCustomOpAttrMap(op);
     param.keep_dims = attr_map["keep_dims"].AsBool();
   }
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Reduce(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Reduce>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReverseV2(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadElementwiseBinary(
+  const Operator *op, ir::Graph &subg,
+  ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Reverse(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Pad(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseBinary(const Operator *op,
-                                                                     ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::ElementwiseBinary::Param param;
   param.op_type = op_type;
 
-  std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::ElementwiseBinary(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::ElementwiseBinary>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
-    const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadElementwiseUnary(const Operator *op, ir::Graph &subg,
+                                                    ir::operation::ElementwiseUnary::Type op_type)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::ElementwiseUnary::Param param;
   param.op_type = op_type;
 
+  const auto eu = loadOperationTo<ir::operation::ElementwiseUnary>(op, subg, param);
   if (op_type == ir::operation::ElementwiseUnary::Type::CAST)
   {
     auto qasymm8ToUint8 = [](ir::Operand &operand) {
@@ -1018,71 +957,75 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
         operand.type(ir::DataType::UINT8);
       }
     };
-    qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::ElementwiseUnary::Input::INPUT)));
-    qasymm8ToUint8(subg.operands().at(outputs.at(0)));
+    qasymm8ToUint8(
+      subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)));
+    qasymm8ToUint8(subg.operands().at(eu->getOutputs().at(0)));
   }
-
-  std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::ElementwiseUnary(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadExpandDims(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadGather(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
+  ir::operation::Gather::Param param;
+  param.axis = op->builtin_options_as_GatherOptions()->axis();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ExpandDims(inputs, outputs));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Gather>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadGather(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDetectionPostProcess(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  const auto &m = getCustomOpAttrMap(op);
 
-  loadOperationIO(op, inputs, outputs);
-  ir::operation::Gather::Param param;
-  param.axis = op->builtin_options_as_GatherOptions()->axis();
+  ir::operation::DetectionPostProcess::Param param;
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Gather(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
+  param.max_detections = m["max_detections"].AsInt32();
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToBatchND(const Operator *op,
-                                                                  ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  // TODO fixme
+  param.max_classes_per_detection = m["max_classes_per_detection"].AsInt32();
+  if (m["detections_per_class"].IsNull())
+    param.max_boxes_per_class = 100;
+  else
+    param.max_boxes_per_class = m["detections_per_class"].AsInt32();
 
-  loadOperationIO(op, inputs, outputs);
+  if (m["use_regular_nms"].IsNull())
+    param.do_fast_eval = true;
+  else
+    param.do_fast_eval = !m["use_regular_nms"].AsBool();
 
-  std::unique_ptr<ir::Operation> new_op{new ir::operation::SpaceToBatchND{inputs, outputs}};
-  subg.addOperation(std::move(new_op));
+  param.score_threshold = m["nms_score_threshold"].AsFloat();
+  param.iou_threshold = m["nms_iou_threshold"].AsFloat();
+
+  // TODO add num classes support
+  param.num_classes = m["num_classes"].AsInt32();
+
+  param.scale.y_scale = m["y_scale"].AsFloat();
+  param.scale.x_scale = m["x_scale"].AsFloat();
+  param.scale.h_scale = m["h_scale"].AsFloat();
+  param.scale.w_scale = m["w_scale"].AsFloat();
+
+  // TODO depends on input model framework
+  param.center_size_boxes = true;
+
+  loadOperationTo<ir::operation::DetectionPostProcess>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchMatMul(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
   ir::operation::BatchMatMul::Param param;
 
-  const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+  const auto builtin_op = getBuiltinOperator(op);
 
   switch (builtin_op)
   {
     case BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
-      param.adj_x = op->builtin_options_as_BatchMatMulOptions()->adjoint_lhs();
-      param.adj_y = op->builtin_options_as_BatchMatMulOptions()->adjoint_rhs();
-      break;
+      // Handled on each loader: different option name
+      //  Circle: adjoint_lhs, adjoint_rhs
+      //  TFLite: adj_x, adj_y
+      throw std::runtime_error(
+        std::string("Cannot handle here: ").append(EnumNameBuiltinOperator(builtin_op)) + " as " +
+        EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
     case BuiltinOperator::BuiltinOperator_CUSTOM:
       if (op->custom_options() == nullptr)
       {
@@ -1091,103 +1034,32 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchMatMul(const Operator *o
       }
       else
       {
-        size_t custom_op_data_size = op->custom_options()->size();
-        auto custom_op_data = op->custom_options()->Data();
-        auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-        auto attr_map = data_root.AsMap();
+        const auto attr_map = getCustomOpAttrMap(op);
         param.adj_x = attr_map["adj_x"].AsBool();
         param.adj_y = attr_map["adj_y"].AsBool();
       }
       break;
     default:
       throw std::runtime_error(
-          std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) +
-          " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
+        std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) +
+        " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
   }
 
-  std::unique_ptr<ir::Operation> new_op{new ir::operation::BatchMatMul{inputs, outputs, param}};
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::BatchMatMul>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchToSpaceND(const Operator *op,
-                                                                  ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSpaceToDepth(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op{new ir::operation::BatchToSpaceND{inputs, outputs}};
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMatrixBandPart(const Operator *op,
-                                                                  ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::MatrixBandPart(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBroadcastTo(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::BroadcastTo(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToDepth(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
   ir::operation::SpaceToDepth::Param param;
-
   const auto *options = op->builtin_options_as_SpaceToDepthOptions();
-
   param.block_size = options->block_size();
 
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::SpaceToDepth(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::SpaceToDepth>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const Operator *op,
-                                                                          ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::StatelessRandomUniform(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRank(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Rank(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
@@ -1195,7 +1067,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
   assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS &&
          "Unsupported custom operation options format");
 
-  auto *op_code = _model->operator_codes()->Get(op->opcode_index());
+  auto *op_code = _domain_model->operator_codes()->Get(op->opcode_index());
   auto custom_op_name = op_code->custom_code()->str();
 
   enum class BuiltinOP
@@ -1208,20 +1080,22 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
     BroadcastTo,
     FusedBatchNorm,
     StatelessRandomUniform,
-    Erf
+    Erf,
+    DetectionPostProcess
   };
 
   // Mapping from custom op name string to BuiltinOP enum
   std::map<std::string, BuiltinOP> builtin_map = {
-      {"AddV2", BuiltinOP::AddV2},
-      {"All", BuiltinOP::ReduceAll},
-      {"MatrixBandPart", BuiltinOP::MatrixBandPart},
-      {"BatchMatMulV2", BuiltinOP::BatchMatMul},
-      {"Einsum", BuiltinOP::Einsum},
-      {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
-      {"BroadcastTo", BuiltinOP::BroadcastTo},
-      {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
-      {"Erf", BuiltinOP::Erf},
+    {"AddV2", BuiltinOP::AddV2},
+    {"All", BuiltinOP::ReduceAll},
+    {"MatrixBandPart", BuiltinOP::MatrixBandPart},
+    {"BatchMatMulV2", BuiltinOP::BatchMatMul},
+    {"Einsum", BuiltinOP::Einsum},
+    {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
+    {"BroadcastTo", BuiltinOP::BroadcastTo},
+    {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+    {"Erf", BuiltinOP::Erf},
+    {"TFLite_Detection_PostProcess", BuiltinOP::DetectionPostProcess},
   };
 
   try
@@ -1237,7 +1111,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
         loadReduceAll(op, subg);
         break;
       case BuiltinOP::MatrixBandPart:
-        loadMatrixBandPart(op, subg);
+        loadOperationTo<ir::operation::MatrixBandPart>(op, subg);
         break;
       case BuiltinOP::BatchMatMul:
         loadBatchMatMul(op, subg);
@@ -1246,20 +1120,23 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
         loadEinsum(op, subg);
         break;
       case BuiltinOP::BroadcastTo:
-        loadBroadcastTo(op, subg);
+        loadOperationTo<ir::operation::BroadcastTo>(op, subg);
         break;
       case BuiltinOP::FusedBatchNorm:
         loadFusedBatchNorm(op, subg);
         break;
       case BuiltinOP::StatelessRandomUniform:
-        loadStatelessRandomUniform(op, subg);
+        loadOperationTo<ir::operation::StatelessRandomUniform>(op, subg);
         break;
       case BuiltinOP::Erf:
         loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
         break;
+      case BuiltinOP::DetectionPostProcess:
+        loadDetectionPostProcess(op, subg);
+        break;
       default:
         throw std::runtime_error{
-            "Loader: Custom OP map is defined but operation loader function is not defined"};
+          "Loader: Custom OP map is defined but operation loader function is not defined"};
     }
 
     return;
@@ -1285,142 +1162,72 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
   }
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqueeze(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSqueeze(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::Squeeze::Param param{};
+  ir::operation::Squeeze::Param param;
   const auto *options = op->builtin_options_as_SqueezeOptions();
   const auto *dims = options->squeeze_dims();
   if (dims)
   {
-    if (dims->Length() > sizeof(param.dims) / sizeof(param.dims[0]))
+    if (dims->size() > sizeof(param.dims) / sizeof(param.dims[0]))
       throw std::runtime_error("Squeeze: 'param.ndims' is out of range.");
-    param.ndim = dims->Length();
+    param.ndim = dims->size();
     for (int i = 0; i < param.ndim; ++i)
       param.dims[i] = dims->Get(i);
   }
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Squeeze(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPrelu(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::PReLU(inputs, outputs));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Squeeze>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSplit(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSplit(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-  // Notice : input order is strange for tflite split
-  auto input = inputs.at(1);
-  auto axis = inputs.at(0);
-
-  // FIXME Handle SplitOptions.
-  if (!subg.operands().at(axis).isConstant())
-    throw std::runtime_error("Split: non-constant 'axis' is not supported.");
-
-  ir::operation::Split::Param param{};
-  param.axis = subg.operands().at(axis).template asScalar<int>();
+  ir::operation::Split::Param param;
   const auto *options = op->builtin_options_as_SplitOptions();
   param.num_splits = options->num_splits();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Split({input}, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Split>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSplitV(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadSplitV(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::SplitV::Param param{};
-
+  ir::operation::SplitV::Param param;
   const auto *options = op->builtin_options_as_SplitVOptions();
   param.num_splits = options->num_splits();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::SplitV(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::SplitV>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSlice(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadStridedSlice(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op{new ir::operation::Slice{inputs, outputs}};
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadStridedSlice(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::StridedSlice::Param param;
-
   const auto *options = op->builtin_options_as_StridedSliceOptions();
   param.begin_mask = options->begin_mask();
   param.end_mask = options->end_mask();
   param.shrink_axis_mask = options->shrink_axis_mask();
 
-  std::unique_ptr<ir::Operation> new_op{new ir::operation::StridedSlice{inputs, outputs, param}};
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::StridedSlice>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadUnpack(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Unpack::Param param;
   const auto *options = op->builtin_options_as_UnpackOptions();
   param.num = options->num();
   param.axis = options->axis();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Unpack(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Unpack>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::Comparison::Param param;
-
-  const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+  const auto builtin_op = getBuiltinOperator(op);
 
   switch (builtin_op)
   {
@@ -1444,267 +1251,180 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op
       break;
     default:
       throw std::runtime_error(
-          std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+        std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
   }
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Comparison(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::Comparison>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadEinsum(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadEinsum(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
   ir::operation::Einsum::Param param;
-
-  if (inputs.size() != 2)
-  {
-    throw std::runtime_error{"Einsum: NYI input - only support two inputs"};
-  }
-
   if (op->custom_options() == nullptr)
   {
     throw std::runtime_error{"Einsum: empty equation"};
   }
   else
   {
-    size_t custom_op_data_size = op->custom_options()->size();
-    auto custom_op_data = op->custom_options()->Data();
-    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-    auto attr_map = data_root.AsMap();
+    const auto attr_map = getCustomOpAttrMap(op);
     param.equation = attr_map["equation"].ToString();
   }
 
-  std::unique_ptr<ir::Operation> new_op{new ir::operation::Einsum{inputs, outputs, param}};
-  subg.addOperation(std::move(new_op));
+  const auto es = loadOperationTo<ir::operation::Einsum>(op, subg, param);
+  if (es->getInputs().size() != 2)
+  {
+    throw std::runtime_error{"Einsum: NYI input - only support two inputs"};
+  }
 }
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadFusedBatchNorm(const Operator *op,
-                                                                  ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadFusedBatchNorm(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
   ir::operation::FusedBatchNorm::Param param;
-
-  if (inputs.size() != 5)
-  {
-    throw std::runtime_error{"FusedBatchNorm: NYI input - only support five inputs"};
-  }
-
   if (op->custom_options() == nullptr)
   {
     throw std::runtime_error{"FusedBatchNorm: empty option"};
   }
   else
   {
-    size_t custom_op_data_size = op->custom_options()->size();
-    auto custom_op_data = op->custom_options()->Data();
-    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-    auto attr_map = data_root.AsMap();
+    const auto attr_map = getCustomOpAttrMap(op);
     param.is_training = attr_map["is_training"].AsBool();
     param.epsilon = attr_map["epsilon"].AsFloat();
     param.data_format = attr_map["data_format"].ToString();
   }
 
-  std::unique_ptr<ir::Operation> new_op{new ir::operation::FusedBatchNorm{inputs, outputs, param}};
-  subg.addOperation(std::move(new_op));
+  const auto fbn = loadOperationTo<ir::operation::FusedBatchNorm>(op, subg, param);
+
+  if (fbn->getInputs().size() != 5)
+  {
+    throw std::runtime_error{"FusedBatchNorm: NYI input - only support five inputs"};
+  }
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadOneHot(const Operator *op, ir::Graph &subg)
 {
   if (op->inputs()->size() != 4 || op->outputs()->size() != 1)
     throw std::runtime_error("OneHot Op has wrong number of input or output tensors.");
 
-  // Set input and output tensors
-  ir::OperandIndexSequence inputs, outputs;
-  loadOperationIO(op, inputs, outputs);
-
   // Set parameter
-  const auto axis = op->builtin_options_as_OneHotOptions()->axis();
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::OneHot(inputs, outputs, {axis}));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
+  ir::operation::OneHot::Param param;
+  param.axis = op->builtin_options_as_OneHotOptions()->axis();
 
-  // ir::operation::Shape::Param param;
-  // const auto *options = op->builtin_options_as_ShapeOptions();
-  // param.out_type = tensorTypeToDataType(options->out_type());
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Shape(inputs, outputs /*, param*/));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::OneHot>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadIf(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadIf(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  const auto *options = op->builtin_options_as_IfOptions();
+  const int32_t then_index = options->then_subgraph_index();
+  const int32_t else_index = options->else_subgraph_index();
 
-  loadOperationIO(op, inputs, outputs);
+  verifySubgraphIndex(then_index);
+  verifySubgraphIndex(else_index);
 
   ir::operation::If::Param param;
-  const auto *options = op->builtin_options_as_IfOptions();
-  const uint32_t then_index = options->then_subgraph_index();
-  const uint32_t else_index = options->else_subgraph_index();
-  param.then_subg_index = ir::SubgraphIndex{then_index};
-  param.else_subg_index = ir::SubgraphIndex{else_index};
+  param.then_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(then_index)};
+  param.else_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(else_index)};
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::If(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::If>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadWhile(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadWhile(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  const auto *options = op->builtin_options_as_WhileOptions();
+  const int32_t cond_index = options->cond_subgraph_index();
+  const int32_t body_index = options->body_subgraph_index();
 
-  loadOperationIO(op, inputs, outputs);
+  verifySubgraphIndex(cond_index);
+  verifySubgraphIndex(body_index);
 
   ir::operation::While::Param param;
-  const auto *options = op->builtin_options_as_WhileOptions();
-  const uint32_t cond_index = options->cond_subgraph_index();
-  const uint32_t body_index = options->body_subgraph_index();
-  param.cond_subg_index = ir::SubgraphIndex{cond_index};
-  param.body_subg_index = ir::SubgraphIndex{body_index};
+  param.cond_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(cond_index)};
+  param.body_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(body_index)};
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::While(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::While>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  auto inputOperand = subg.operands().at(inputs.at(0));
-  auto axisOperand = subg.operands().at(inputs.at(1));
-
-  if (!axisOperand.isConstant())
-    throw std::runtime_error("ArgMax: non-constant 'axis' is not supported.");
-  if (!(axisOperand.operandSize() == 4 && (axisOperand.typeInfo().type() == ir::DataType::INT32 ||
-                                           axisOperand.typeInfo().type() == ir::DataType::INT64)))
-    throw std::runtime_error("ArgMax: `axis` with an int32 or int64 element is only supported.");
-
-  ir::operation::ArgMax::Param param;
-  param.axis = axisOperand.template asVector<int>()[0];
-  const auto output_type = op->builtin_options_as_ArgMaxOptions()->output_type();
-  switch (output_type)
-  {
-    case TensorType::TensorType_INT32:
-    case TensorType::TensorType_INT64:
-      break;
-    default:
-      throw std::runtime_error("ArgMax: `output_type` must be either int32 or int64.");
-  }
+  ir::operation::ArgMinMax::Param param;
+  const auto output_type = is_argmax ? op->builtin_options_as_ArgMaxOptions()->output_type()
+                                     : op->builtin_options_as_ArgMinOptions()->output_type();
   param.output_type = tensorTypeToDataType(output_type);
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ArgMax(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
+  param.is_arg_max = is_argmax;
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Pow(inputs, outputs));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::ArgMinMax>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRange(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
 {
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Range(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTile(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  auto multiples = inputs.at(ir::operation::Tile::MULTIPLES);
-
-  if (!subg.operands().at(multiples).isConstant())
-    throw std::runtime_error("Tile: non-constant 'multiples' is not supported.");
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Tile(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
   ir::operation::LogSoftmax::Param param;
-
   // In tflite, beta is fixed to 1.0 and axis is fixed to -1.
   param.beta = 1.0f;
   param.axis = -1;
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::LogSoftmax(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
+  loadOperationTo<ir::operation::LogSoftmax>(op, subg, param);
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadL2Normalization(const Operator *op,
-                                                                   ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
 {
+  float alpha = op->builtin_options_as_LeakyReluOptions()->alpha();
+  loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha,
+                            1.f);
+}
+
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg)
+{
+  ir::operation::LSTM::Param param;
+  const auto *options = op->builtin_options_as_UnidirectionalSequenceLSTMOptions();
+  param.activation = convertActivation(options->fused_activation_function());
+  param.cell_threshold = options->cell_clip();
+  param.projection_threshold = options->proj_clip();
+  param.time_major = options->time_major();
+  // The asymmetric_quantize_inputs option is unused yet
+
   ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  for (const std::int32_t idx : *op->inputs())
+  {
+    inputs.append(tensorIdxToOperandIdx(idx));
+  }
 
-  loadOperationIO(op, inputs, outputs);
+  ir::OperandIndexSequence outputs;
+  // loader doesn't support optional output tensor yet
+  if (op->outputs()->size() != 1)
+  {
+    auto builtin_code = getBuiltinOperator(op);
+    throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ")
+                               .append(EnumNameBuiltinOperator(builtin_code)));
+  }
+  for (size_t i = 0; i < ir::operation::LSTM::Output::OUTPUT; ++i)
+  {
+    // Add optional outputs
+    outputs.append(ir::OperandIndex());
+  }
+  outputs.append(tensorIdxToOperandIdx(op->outputs()->Get(0)));
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::L2Normalization(inputs, outputs));
+  std::unique_ptr<ir::operation::LSTM> new_op(new ir::operation::LSTM(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg)
 {
-  float alpha = op->builtin_options_as_LeakyReluOptions()->alpha();
-  loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha,
-                            1.f);
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
-{
-  const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+  auto const builtin_op = getBuiltinOperator(op);
 
   switch (builtin_op)
   {
+    case BuiltinOperator::BuiltinOperator_ADD_N:
+      loadOperationTo<ir::operation::AddN>(op, subg);
+      return;
     case BuiltinOperator::BuiltinOperator_CONV_2D:
       loadConv2D(op, subg);
       return;
@@ -1729,24 +1449,30 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
     case BuiltinOperator::BuiltinOperator_CONCATENATION:
       loadConcatenation(op, subg);
       return;
+    case BuiltinOperator::BuiltinOperator_FLOOR:
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::FLOOR);
+      return;
     case BuiltinOperator::BuiltinOperator_FULLY_CONNECTED:
       loadFC(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_ADD:
-      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::ADD>(op, subg);
+      loadBinaryArithmetic(op, subg, ir::operation::BinaryArithmetic::ArithmeticType::ADD);
       return;
     case BuiltinOperator::BuiltinOperator_SUB:
-      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::SUB>(op, subg);
+      loadBinaryArithmetic(op, subg, ir::operation::BinaryArithmetic::ArithmeticType::SUB);
       return;
     case BuiltinOperator::BuiltinOperator_MUL:
-      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::MUL>(op, subg);
+      loadBinaryArithmetic(op, subg, ir::operation::BinaryArithmetic::ArithmeticType::MUL);
       return;
     case BuiltinOperator::BuiltinOperator_DIV:
-      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::DIV>(op, subg);
+      loadBinaryArithmetic(op, subg, ir::operation::BinaryArithmetic::ArithmeticType::DIV);
       return;
     case BuiltinOperator::BuiltinOperator_PACK:
       loadPack(op, subg);
       return;
+    case BuiltinOperator::BuiltinOperator_ELU:
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::ELU);
+      return;
     case BuiltinOperator::BuiltinOperator_RELU:
       loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
                                 ir::operation::ElementwiseActivation::infinity, 0.f);
@@ -1769,40 +1495,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::RSQRT);
       return;
     case BuiltinOperator::BuiltinOperator_SELECT:
-      loadSelect(op, subg);
-      return;
     case BuiltinOperator::BuiltinOperator_SELECT_V2:
-      // Use same loader with BuiltinOperator_SELECT
-      loadSelect(op, subg);
+      loadOperationTo<ir::operation::Select>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_SQRT:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
       return;
+    case BuiltinOperator::BuiltinOperator_SQUARE:
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQUARE);
+      return;
     case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
-      loadSquaredDifference(op, subg);
+      loadOperationTo<ir::operation::SquaredDifference>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_TANH:
       loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::TANH, 1.f,
                                 1.f);
       return;
     case BuiltinOperator::BuiltinOperator_TRANSPOSE:
-      loadTranspose(op, subg);
+      loadOperationTo<ir::operation::Transpose>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_MEAN:
-      loadReduce<ir::operation::Reduce::ReduceType::MEAN>(op, subg);
+      loadReduce(op, subg, ir::operation::Reduce::ReduceType::MEAN);
       return;
     case BuiltinOperator::BuiltinOperator_REDUCE_ANY:
-      loadReduce<ir::operation::Reduce::ReduceType::ANY>(op, subg);
+      loadReduce(op, subg, ir::operation::Reduce::ReduceType::ANY);
       return;
     case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
-      loadReduce<ir::operation::Reduce::ReduceType::MAX>(op, subg);
+      loadReduce(op, subg, ir::operation::Reduce::ReduceType::MAX);
       return;
     case BuiltinOperator::BuiltinOperator_REVERSE_V2:
-      loadReverseV2(op, subg);
+      loadOperationTo<ir::operation::Reverse>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_PAD:
     case BuiltinOperator::BuiltinOperator_PADV2:
-      loadPad(op, subg);
+      loadOperationTo<ir::operation::Pad>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_LOGISTIC:
       loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LOGISTIC);
@@ -1811,19 +1537,19 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::EXP);
       return;
     case BuiltinOperator::BuiltinOperator_EXPAND_DIMS:
-      loadExpandDims(op, subg);
+      loadOperationTo<ir::operation::ExpandDims>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_GATHER:
       loadGather(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_SPACE_TO_BATCH_ND:
-      loadSpaceToBatchND(op, subg);
+      loadOperationTo<ir::operation::SpaceToBatchND>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_BATCH_TO_SPACE_ND:
-      loadBatchToSpaceND(op, subg);
+      loadOperationTo<ir::operation::BatchToSpaceND>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_SUM:
-      loadReduce<ir::operation::Reduce::ReduceType::SUM>(op, subg);
+      loadReduce(op, subg, ir::operation::Reduce::ReduceType::SUM);
       return;
     case BuiltinOperator::BuiltinOperator_CUSTOM:
       loadCustom(op, subg);
@@ -1832,7 +1558,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadSqueeze(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_PRELU:
-      loadPrelu(op, subg);
+      loadOperationTo<ir::operation::PReLU>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_SPLIT:
       loadSplit(op, subg);
@@ -1841,7 +1567,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadSplitV(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_SLICE:
-      loadSlice(op, subg);
+      loadOperationTo<ir::operation::Slice>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_STRIDED_SLICE:
       loadStridedSlice(op, subg);
@@ -1849,11 +1575,15 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
     case BuiltinOperator::BuiltinOperator_UNPACK:
       loadUnpack(op, subg);
       return;
+    case BuiltinOperator::BuiltinOperator_FLOOR_DIV:
+      loadElementwiseBinary(op, subg,
+                            ir::operation::ElementwiseBinary::ElementwiseBinaryType::FLOOR_DIV);
+      return;
     case BuiltinOperator::BuiltinOperator_MINIMUM:
-      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN>(op, subg);
+      loadElementwiseBinary(op, subg, ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
       return;
     case BuiltinOperator::BuiltinOperator_MAXIMUM:
-      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX>(op, subg);
+      loadElementwiseBinary(op, subg, ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
       return;
     case BuiltinOperator::BuiltinOperator_CAST:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::CAST);
@@ -1879,10 +1609,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SIN);
       return;
     case BuiltinOperator::BuiltinOperator_SHAPE:
-      loadShape(op, subg);
+      loadOperationTo<ir::operation::Shape>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_REDUCE_PROD:
-      loadReduce<ir::operation::Reduce::ReduceType::PROD>(op, subg);
+      loadReduce(op, subg, ir::operation::Reduce::ReduceType::PROD);
       return;
     case BuiltinOperator::BuiltinOperator_IF:
       loadIf(op, subg);
@@ -1894,7 +1624,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
       return;
     case BuiltinOperator::BuiltinOperator_ARG_MAX:
-      loadArgMax(op, subg);
+      loadArgMinMax(op, subg, true);
+      return;
+    case BuiltinOperator::BuiltinOperator_ARG_MIN:
+      loadArgMinMax(op, subg, false);
       return;
     case BuiltinOperator::BuiltinOperator_LOG:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
@@ -1903,26 +1636,30 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ROUND);
       return;
     case BuiltinOperator::BuiltinOperator_POW:
-      loadPow(op, subg);
+      loadOperationTo<ir::operation::Pow>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
       return;
+    case BuiltinOperator::BuiltinOperator_LOGICAL_AND:
+      loadElementwiseBinary(op, subg,
+                            ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
+      return;
     case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
-      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR>(
-          op, subg);
+      loadElementwiseBinary(op, subg,
+                            ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
       return;
     case BuiltinOperator::BuiltinOperator_FILL:
-      loadFill(op, subg);
+      loadOperationTo<ir::operation::Fill>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_ZEROS_LIKE:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ZEROS_LIKE);
       return;
     case BuiltinOperator::BuiltinOperator_TILE:
-      loadTile(op, subg);
+      loadOperationTo<ir::operation::Tile>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_RANGE:
-      loadRange(op, subg);
+      loadOperationTo<ir::operation::Range>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
       loadBatchMatMul(op, subg);
@@ -1933,29 +1670,43 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
     case BuiltinOperator::BuiltinOperator_QUANTIZE:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::QUANTIZE);
       return;
+    case BuiltinOperator::BuiltinOperator_DEQUANTIZE:
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::DEQUANTIZE);
+      return;
     case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH:
       loadSpaceToDepth(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_L2_NORMALIZATION:
-      loadL2Normalization(op, subg);
+      loadOperationTo<ir::operation::L2Normalization>(op, subg);
       break;
     case BuiltinOperator::BuiltinOperator_LEAKY_RELU:
       loadLeakyRelu(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_RANK:
-      loadRank(op, subg);
+      loadOperationTo<ir::operation::Rank>(op, subg);
+      return;
+    case BuiltinOperator::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
+      loadUnidirectionalSequenceLSTM(op, subg);
+      return;
+    case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE:
+      loadDepthToSpace(op, subg);
+      return;
+    case BuiltinOperator::BuiltinOperator_EMBEDDING_LOOKUP:
+      loadOperationTo<ir::operation::EmbeddingLookup>(op, subg);
+      return;
+    case BuiltinOperator::BuiltinOperator_HASHTABLE_LOOKUP:
+      loadOperationTo<ir::operation::HashtableLookup>(op, subg);
       return;
     default:
       throw std::runtime_error(
-          std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+        std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
   }
 }
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadModel()
+template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
 {
   LoaderDomain::VerifyModelBuffer(*_verifier.get());
-  _model = LoaderDomain::GetModel(_base);
+  _domain_model = LoaderDomain::GetModel(_base);
   // Version unused
   // const auto version = _model->version();
   // Description unused
@@ -1963,15 +1714,19 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadModel()
   // Metabuffer unsued
   // const auto *metadata_buffer = _model->metadata_buffer();
   // Load subgraphs and map operations on subgraph
-  const auto domain_subgraphs = _model->subgraphs();
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  for (uint32_t subgraph_index = 0; subgraph_index < domain_subgraphs->size(); ++subgraph_index)
+  const auto subgraphs = _domain_model->subgraphs();
+  auto model = std::make_unique<ir::Model>();
+  if (subgraphs->size() - 1 > ir::SubgraphIndex::max())
+    throw std::runtime_error{"The number of subgraphs cannot exceed " +
+                             std::to_string(ir::SubgraphIndex::max() + 1)};
+  for (uint16_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
   {
-    auto subg =
-        static_cast<SpecificLoader *>(this)->loadSubgraph((*_model->subgraphs())[subgraph_index]);
-    subgraphs->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
+    auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]);
+    // NOTE: Used () instead of {}, which does not check narrowing.
+    // It is okay since overflow is checked the above if-statement.
+    model->push(ir::SubgraphIndex(subgraph_index), std::move(subg));
   }
-  _subgraphs = std::move(subgraphs);
+  _model = std::move(model);
 }
 
 } // namespace base_loader
diff --git a/runtime/onert/frontend/circle/CMakeLists.txt b/runtime/onert/frontend/circle/CMakeLists.txt
index 8bcf85dd3..fffe5cc37 100644
--- a/runtime/onert/frontend/circle/CMakeLists.txt
+++ b/runtime/onert/frontend/circle/CMakeLists.txt
@@ -4,12 +4,11 @@ endif ()
 
 set(CIRCLE_LOADER_SOURCES src/circle_loader.cc)
 
-add_library(circle_loader SHARED ${CIRCLE_LOADER_SOURCES})
+add_library(circle_loader STATIC ${CIRCLE_LOADER_SOURCES})
+set_target_properties(circle_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 target_include_directories(circle_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 
-target_link_libraries(circle_loader PUBLIC onert_core)
+target_link_libraries(circle_loader PRIVATE onert_core)
 target_link_libraries(circle_loader PRIVATE base_loader nnfw_common nnfw_coverage)
 target_link_libraries(circle_loader PRIVATE circle_schema)
-
-install(TARGETS circle_loader DESTINATION lib)
diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h
index 675a5b3e7..87e5d70ae 100644
--- a/runtime/onert/frontend/circle/include/circle_loader.h
+++ b/runtime/onert/frontend/circle/include/circle_loader.h
@@ -25,8 +25,8 @@ namespace onert
 {
 namespace circle_loader
 {
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename);
-std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size);
 } // namespace circle_loader
 } // namespace onert
 
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 92a9ee7a5..5bf626d6c 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -69,9 +69,15 @@ struct LoaderDomain
   static bool VerifyModelBuffer(Verifier &verifier) { return circle::VerifyModelBuffer(verifier); }
 };
 
-class CircleLoader final : public base_loader::BaseLoader<LoaderDomain, CircleLoader>
+class CircleLoader final : public base_loader::BaseLoader<LoaderDomain>
 {
 protected:
+  // Different option name
+  //  Circle: adjoint_lhs, adjoint_rhs
+  //  TFLite: adj_x, adj_y
+  void loadBatchMatMul(const Operator *op, ir::Graph &subg);
+
+  // Only circle operations
   void loadInstanceNorm(const Operator *op, ir::Graph &subg);
   void loadBCQFullyConnected(const Operator *op, ir::Graph &subg);
   void loadBCQGather(const Operator *op, ir::Graph &subg);
@@ -85,13 +91,15 @@ public:
     {
       case BuiltinOperator::BuiltinOperator_FULLY_CONNECTED:
       case BuiltinOperator::BuiltinOperator_BCQ_FULLY_CONNECTED:
+      case BuiltinOperator::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
         return true;
       default:
         return false;
     }
   }
 
-  std::unique_ptr<ir::Graph> loadSubgraph(const circle::SubGraph *circle_subg)
+private:
+  std::unique_ptr<ir::Graph> loadSubgraph(const circle::SubGraph *circle_subg) override
   {
     auto subg = std::make_unique<ir::Graph>();
     // Load tensors
@@ -120,17 +128,20 @@ public:
 
     subg->setLayout(convertDataFormat(circle_subg->data_format()));
 
-    subg->finishBuilding();
+    subg->verify();
 
     return subg;
   }
 
   void loadOperation(const circle::Operator *op, ir::Graph &subg)
   {
-    const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+    auto const builtin_op = getBuiltinOperator(op);
 
     switch (builtin_op)
     {
+      case circle::BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
+        loadBatchMatMul(op, subg);
+        return;
       case circle::BuiltinOperator::BuiltinOperator_INSTANCE_NORM:
         loadInstanceNorm(op, subg);
         return;
@@ -147,6 +158,23 @@ public:
   }
 };
 
+void CircleLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  ir::operation::BatchMatMul::Param param;
+  const auto *options = op->builtin_options_as_BatchMatMulOptions();
+
+  param.adj_x = options->adjoint_lhs();
+  param.adj_y = options->adjoint_rhs();
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::BatchMatMul(inputs, outputs, param));
+  subg.addOperation(std::move(new_op));
+}
+
 void CircleLoader::loadInstanceNorm(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -194,26 +222,26 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
   param.activation = convertActivation(options->fused_activation_function());
 
   std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::BCQFullyConnected(inputs, outputs, param));
+    new ir::operation::BCQFullyConnected(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
 } // namespace
 
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
 {
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  CircleLoader loader(subgraphs);
+  auto model = std::make_unique<ir::Model>();
+  CircleLoader loader(model);
   loader.loadFromFile(filename);
-  return subgraphs;
+  return model;
 }
 
-std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size)
+std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size)
 {
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  CircleLoader loader(subgraphs);
+  auto model = std::make_unique<ir::Model>();
+  CircleLoader loader(model);
   loader.loadFromBuffer(buffer, size);
-  return subgraphs;
+  return model;
 }
 
 } // namespace circle_loader
diff --git a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
index 190c84d98..dd6f9dcd7 100644
--- a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
+++ b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2019-2023 Samsung Electronics Co., Ltd. All Rights Reserved
  * Copyright 2018 The TensorFlow Authors. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,242 +25,444 @@ namespace circle
 {
 
 struct CustomQuantization;
+struct CustomQuantizationBuilder;
 
 struct QuantizationParameters;
+struct QuantizationParametersBuilder;
 
 struct Int32Vector;
+struct Int32VectorBuilder;
 
 struct Uint16Vector;
+struct Uint16VectorBuilder;
 
 struct Uint8Vector;
+struct Uint8VectorBuilder;
 
 struct DimensionMetadata;
+struct DimensionMetadataBuilder;
 
 struct SparsityParameters;
+struct SparsityParametersBuilder;
+
+struct VariantSubType;
+struct VariantSubTypeBuilder;
 
 struct Tensor;
+struct TensorBuilder;
 
 struct Conv2DOptions;
+struct Conv2DOptionsBuilder;
+
+struct Conv3DOptions;
+struct Conv3DOptionsBuilder;
 
 struct Pool2DOptions;
+struct Pool2DOptionsBuilder;
 
 struct DepthwiseConv2DOptions;
+struct DepthwiseConv2DOptionsBuilder;
 
 struct ConcatEmbeddingsOptions;
+struct ConcatEmbeddingsOptionsBuilder;
 
 struct LSHProjectionOptions;
+struct LSHProjectionOptionsBuilder;
 
 struct SVDFOptions;
+struct SVDFOptionsBuilder;
 
 struct RNNOptions;
+struct RNNOptionsBuilder;
 
 struct SequenceRNNOptions;
+struct SequenceRNNOptionsBuilder;
 
 struct BidirectionalSequenceRNNOptions;
+struct BidirectionalSequenceRNNOptionsBuilder;
 
 struct FullyConnectedOptions;
+struct FullyConnectedOptionsBuilder;
 
 struct SoftmaxOptions;
+struct SoftmaxOptionsBuilder;
 
 struct ConcatenationOptions;
+struct ConcatenationOptionsBuilder;
 
 struct AddOptions;
+struct AddOptionsBuilder;
 
 struct MulOptions;
+struct MulOptionsBuilder;
 
 struct L2NormOptions;
+struct L2NormOptionsBuilder;
 
 struct LocalResponseNormalizationOptions;
+struct LocalResponseNormalizationOptionsBuilder;
 
 struct LSTMOptions;
+struct LSTMOptionsBuilder;
 
 struct UnidirectionalSequenceLSTMOptions;
+struct UnidirectionalSequenceLSTMOptionsBuilder;
 
 struct BidirectionalSequenceLSTMOptions;
+struct BidirectionalSequenceLSTMOptionsBuilder;
 
 struct ResizeBilinearOptions;
+struct ResizeBilinearOptionsBuilder;
 
 struct ResizeNearestNeighborOptions;
+struct ResizeNearestNeighborOptionsBuilder;
 
 struct CallOptions;
+struct CallOptionsBuilder;
 
 struct PadOptions;
+struct PadOptionsBuilder;
 
 struct PadV2Options;
+struct PadV2OptionsBuilder;
 
 struct ReshapeOptions;
+struct ReshapeOptionsBuilder;
 
 struct SpaceToBatchNDOptions;
+struct SpaceToBatchNDOptionsBuilder;
 
 struct BatchToSpaceNDOptions;
+struct BatchToSpaceNDOptionsBuilder;
 
 struct SkipGramOptions;
+struct SkipGramOptionsBuilder;
 
 struct SpaceToDepthOptions;
+struct SpaceToDepthOptionsBuilder;
 
 struct DepthToSpaceOptions;
+struct DepthToSpaceOptionsBuilder;
 
 struct SubOptions;
+struct SubOptionsBuilder;
 
 struct DivOptions;
+struct DivOptionsBuilder;
 
 struct TopKV2Options;
+struct TopKV2OptionsBuilder;
 
 struct EmbeddingLookupSparseOptions;
+struct EmbeddingLookupSparseOptionsBuilder;
 
 struct GatherOptions;
+struct GatherOptionsBuilder;
 
 struct TransposeOptions;
+struct TransposeOptionsBuilder;
 
 struct ExpOptions;
+struct ExpOptionsBuilder;
 
 struct CosOptions;
+struct CosOptionsBuilder;
 
 struct ReducerOptions;
+struct ReducerOptionsBuilder;
 
 struct SqueezeOptions;
+struct SqueezeOptionsBuilder;
 
 struct SplitOptions;
+struct SplitOptionsBuilder;
 
 struct SplitVOptions;
+struct SplitVOptionsBuilder;
 
 struct StridedSliceOptions;
+struct StridedSliceOptionsBuilder;
 
 struct LogSoftmaxOptions;
+struct LogSoftmaxOptionsBuilder;
 
 struct CastOptions;
+struct CastOptionsBuilder;
 
 struct DequantizeOptions;
+struct DequantizeOptionsBuilder;
 
 struct MaximumMinimumOptions;
+struct MaximumMinimumOptionsBuilder;
 
 struct TileOptions;
+struct TileOptionsBuilder;
 
 struct ArgMaxOptions;
+struct ArgMaxOptionsBuilder;
 
 struct ArgMinOptions;
+struct ArgMinOptionsBuilder;
 
 struct GreaterOptions;
+struct GreaterOptionsBuilder;
 
 struct GreaterEqualOptions;
+struct GreaterEqualOptionsBuilder;
 
 struct LessOptions;
+struct LessOptionsBuilder;
 
 struct LessEqualOptions;
+struct LessEqualOptionsBuilder;
 
 struct NegOptions;
+struct NegOptionsBuilder;
 
 struct SelectOptions;
+struct SelectOptionsBuilder;
 
 struct SliceOptions;
+struct SliceOptionsBuilder;
 
 struct TransposeConvOptions;
+struct TransposeConvOptionsBuilder;
 
 struct ExpandDimsOptions;
+struct ExpandDimsOptionsBuilder;
 
 struct SparseToDenseOptions;
+struct SparseToDenseOptionsBuilder;
 
 struct EqualOptions;
+struct EqualOptionsBuilder;
 
 struct NotEqualOptions;
+struct NotEqualOptionsBuilder;
 
 struct ShapeOptions;
+struct ShapeOptionsBuilder;
 
 struct RankOptions;
+struct RankOptionsBuilder;
 
 struct PowOptions;
+struct PowOptionsBuilder;
 
 struct FakeQuantOptions;
+struct FakeQuantOptionsBuilder;
 
 struct PackOptions;
+struct PackOptionsBuilder;
 
 struct LogicalOrOptions;
+struct LogicalOrOptionsBuilder;
 
 struct OneHotOptions;
+struct OneHotOptionsBuilder;
 
 struct AbsOptions;
+struct AbsOptionsBuilder;
 
 struct HardSwishOptions;
+struct HardSwishOptionsBuilder;
 
 struct LogicalAndOptions;
+struct LogicalAndOptionsBuilder;
 
 struct LogicalNotOptions;
+struct LogicalNotOptionsBuilder;
 
 struct UnpackOptions;
+struct UnpackOptionsBuilder;
 
 struct FloorDivOptions;
+struct FloorDivOptionsBuilder;
 
 struct SquareOptions;
+struct SquareOptionsBuilder;
 
 struct ZerosLikeOptions;
+struct ZerosLikeOptionsBuilder;
 
 struct FillOptions;
+struct FillOptionsBuilder;
 
 struct FloorModOptions;
+struct FloorModOptionsBuilder;
 
 struct RangeOptions;
+struct RangeOptionsBuilder;
 
 struct LeakyReluOptions;
+struct LeakyReluOptionsBuilder;
 
 struct SquaredDifferenceOptions;
+struct SquaredDifferenceOptionsBuilder;
 
 struct MirrorPadOptions;
+struct MirrorPadOptionsBuilder;
 
 struct UniqueOptions;
+struct UniqueOptionsBuilder;
 
 struct ReverseV2Options;
+struct ReverseV2OptionsBuilder;
 
 struct AddNOptions;
+struct AddNOptionsBuilder;
 
 struct GatherNdOptions;
+struct GatherNdOptionsBuilder;
 
 struct WhereOptions;
+struct WhereOptionsBuilder;
 
 struct ReverseSequenceOptions;
+struct ReverseSequenceOptionsBuilder;
 
 struct MatrixDiagOptions;
+struct MatrixDiagOptionsBuilder;
 
 struct QuantizeOptions;
+struct QuantizeOptionsBuilder;
 
 struct MatrixSetDiagOptions;
+struct MatrixSetDiagOptionsBuilder;
 
 struct IfOptions;
+struct IfOptionsBuilder;
+
+struct CallOnceOptions;
+struct CallOnceOptionsBuilder;
 
 struct WhileOptions;
+struct WhileOptionsBuilder;
 
 struct NonMaxSuppressionV4Options;
+struct NonMaxSuppressionV4OptionsBuilder;
 
 struct NonMaxSuppressionV5Options;
+struct NonMaxSuppressionV5OptionsBuilder;
 
 struct ScatterNdOptions;
+struct ScatterNdOptionsBuilder;
 
 struct SelectV2Options;
+struct SelectV2OptionsBuilder;
 
 struct DensifyOptions;
+struct DensifyOptionsBuilder;
 
 struct SegmentSumOptions;
+struct SegmentSumOptionsBuilder;
 
 struct BatchMatMulOptions;
+struct BatchMatMulOptionsBuilder;
+
+struct CumsumOptions;
+struct CumsumOptionsBuilder;
+
+struct BroadcastToOptions;
+struct BroadcastToOptionsBuilder;
+
+struct Rfft2dOptions;
+struct Rfft2dOptionsBuilder;
+
+struct HashtableOptions;
+struct HashtableOptionsBuilder;
+
+struct HashtableFindOptions;
+struct HashtableFindOptionsBuilder;
+
+struct HashtableImportOptions;
+struct HashtableImportOptionsBuilder;
+
+struct HashtableSizeOptions;
+struct HashtableSizeOptionsBuilder;
+
+struct VarHandleOptions;
+struct VarHandleOptionsBuilder;
+
+struct ReadVariableOptions;
+struct ReadVariableOptionsBuilder;
+
+struct AssignVariableOptions;
+struct AssignVariableOptionsBuilder;
+
+struct RandomOptions;
+struct RandomOptionsBuilder;
+
+struct BucketizeOptions;
+struct BucketizeOptionsBuilder;
+
+struct GeluOptions;
+struct GeluOptionsBuilder;
+
+struct DynamicUpdateSliceOptions;
+struct DynamicUpdateSliceOptionsBuilder;
+
+struct UnsortedSegmentProdOptions;
+struct UnsortedSegmentProdOptionsBuilder;
+
+struct UnsortedSegmentMaxOptions;
+struct UnsortedSegmentMaxOptionsBuilder;
+
+struct UnsortedSegmentSumOptions;
+struct UnsortedSegmentSumOptionsBuilder;
+
+struct ATan2Options;
+struct ATan2OptionsBuilder;
+
+struct UnsortedSegmentMinOptions;
+struct UnsortedSegmentMinOptionsBuilder;
+
+struct SignOptions;
+struct SignOptionsBuilder;
+
+struct BitcastOptions;
+struct BitcastOptionsBuilder;
+
+struct BitwiseXorOptions;
+struct BitwiseXorOptionsBuilder;
+
+struct RightShiftOptions;
+struct RightShiftOptionsBuilder;
 
 struct BCQGatherOptions;
+struct BCQGatherOptionsBuilder;
 
 struct BCQFullyConnectedOptions;
+struct BCQFullyConnectedOptionsBuilder;
 
 struct InstanceNormOptions;
+struct InstanceNormOptionsBuilder;
 
 struct OperatorCode;
+struct OperatorCodeBuilder;
 
 struct Operator;
+struct OperatorBuilder;
 
 struct SubGraph;
+struct SubGraphBuilder;
 
 struct Buffer;
+struct BufferBuilder;
 
 struct Metadata;
+struct MetadataBuilder;
+
+struct TensorMap;
+struct TensorMapBuilder;
+
+struct SignatureDef;
+struct SignatureDefBuilder;
 
 struct Model;
+struct ModelBuilder;
 
-enum TensorType
+enum TensorType : int8_t
 {
   TensorType_FLOAT32 = 0,
   TensorType_FLOAT16 = 1,
@@ -273,34 +475,46 @@ enum TensorType
   TensorType_COMPLEX64 = 8,
   TensorType_INT8 = 9,
   TensorType_FLOAT64 = 10,
+  TensorType_COMPLEX128 = 11,
+  TensorType_UINT64 = 12,
+  TensorType_RESOURCE = 13,
+  TensorType_VARIANT = 14,
+  TensorType_UINT32 = 15,
+  TensorType_UINT16 = 16,
+  TensorType_INT4 = 17,
   TensorType_MIN = TensorType_FLOAT32,
-  TensorType_MAX = TensorType_FLOAT64
+  TensorType_MAX = TensorType_INT4
 };
 
-inline const TensorType (&EnumValuesTensorType())[11]
+inline const TensorType (&EnumValuesTensorType())[18]
 {
-  static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
-                                      TensorType_UINT8,   TensorType_INT64,   TensorType_STRING,
-                                      TensorType_BOOL,    TensorType_INT16,   TensorType_COMPLEX64,
-                                      TensorType_INT8,    TensorType_FLOAT64};
+  static const TensorType values[] = {
+    TensorType_FLOAT32,   TensorType_FLOAT16,  TensorType_INT32,   TensorType_UINT8,
+    TensorType_INT64,     TensorType_STRING,   TensorType_BOOL,    TensorType_INT16,
+    TensorType_COMPLEX64, TensorType_INT8,     TensorType_FLOAT64, TensorType_COMPLEX128,
+    TensorType_UINT64,    TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32,
+    TensorType_UINT16,    TensorType_INT4};
   return values;
 }
 
 inline const char *const *EnumNamesTensorType()
 {
-  static const char *const names[] = {"FLOAT32",   "FLOAT16", "INT32",   "UINT8",
-                                      "INT64",     "STRING",  "BOOL",    "INT16",
-                                      "COMPLEX64", "INT8",    "FLOAT64", nullptr};
+  static const char *const names[19] = {"FLOAT32", "FLOAT16",    "INT32",  "UINT8",     "INT64",
+                                        "STRING",  "BOOL",       "INT16",  "COMPLEX64", "INT8",
+                                        "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE",  "VARIANT",
+                                        "UINT32",  "UINT16",     "INT4",   nullptr};
   return names;
 }
 
 inline const char *EnumNameTensorType(TensorType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_INT4))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesTensorType()[index];
 }
 
-enum QuantizationDetails
+enum QuantizationDetails : uint8_t
 {
   QuantizationDetails_NONE = 0,
   QuantizationDetails_CustomQuantization = 1,
@@ -317,13 +531,15 @@ inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
 
 inline const char *const *EnumNamesQuantizationDetails()
 {
-  static const char *const names[] = {"NONE", "CustomQuantization", nullptr};
+  static const char *const names[3] = {"NONE", "CustomQuantization", nullptr};
   return names;
 }
 
 inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesQuantizationDetails()[index];
 }
 
@@ -332,7 +548,7 @@ template <typename T> struct QuantizationDetailsTraits
   static const QuantizationDetails enum_value = QuantizationDetails_NONE;
 };
 
-template <> struct QuantizationDetailsTraits<CustomQuantization>
+template <> struct QuantizationDetailsTraits<circle::CustomQuantization>
 {
   static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
 };
@@ -343,7 +559,7 @@ bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
                                      const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                      const flatbuffers::Vector<uint8_t> *types);
 
-enum DimensionType
+enum DimensionType : int8_t
 {
   DimensionType_DENSE = 0,
   DimensionType_SPARSE_CSR = 1,
@@ -359,17 +575,19 @@ inline const DimensionType (&EnumValuesDimensionType())[2]
 
 inline const char *const *EnumNamesDimensionType()
 {
-  static const char *const names[] = {"DENSE", "SPARSE_CSR", nullptr};
+  static const char *const names[3] = {"DENSE", "SPARSE_CSR", nullptr};
   return names;
 }
 
 inline const char *EnumNameDimensionType(DimensionType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesDimensionType()[index];
 }
 
-enum SparseIndexVector
+enum SparseIndexVector : uint8_t
 {
   SparseIndexVector_NONE = 0,
   SparseIndexVector_Int32Vector = 1,
@@ -389,14 +607,16 @@ inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4]
 
 inline const char *const *EnumNamesSparseIndexVector()
 {
-  static const char *const names[] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
-                                      nullptr};
+  static const char *const names[5] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
+                                       nullptr};
   return names;
 }
 
 inline const char *EnumNameSparseIndexVector(SparseIndexVector e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesSparseIndexVector()[index];
 }
 
@@ -405,17 +625,17 @@ template <typename T> struct SparseIndexVectorTraits
   static const SparseIndexVector enum_value = SparseIndexVector_NONE;
 };
 
-template <> struct SparseIndexVectorTraits<Int32Vector>
+template <> struct SparseIndexVectorTraits<circle::Int32Vector>
 {
   static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
 };
 
-template <> struct SparseIndexVectorTraits<Uint16Vector>
+template <> struct SparseIndexVectorTraits<circle::Uint16Vector>
 {
   static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
 };
 
-template <> struct SparseIndexVectorTraits<Uint8Vector>
+template <> struct SparseIndexVectorTraits<circle::Uint8Vector>
 {
   static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
 };
@@ -426,8 +646,11 @@ bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
                                    const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                    const flatbuffers::Vector<uint8_t> *types);
 
-enum BuiltinOperator
+enum BuiltinOperator : int32_t
 {
+  BuiltinOperator_BCQ_GATHER = -4,
+  BuiltinOperator_BCQ_FULLY_CONNECTED = -3,
+  BuiltinOperator_INSTANCE_NORM = -2,
   BuiltinOperator_ADD = 0,
   BuiltinOperator_AVERAGE_POOL_2D = 1,
   BuiltinOperator_CONCATENATION = 2,
@@ -555,16 +778,51 @@ enum BuiltinOperator
   BuiltinOperator_DENSIFY = 124,
   BuiltinOperator_SEGMENT_SUM = 125,
   BuiltinOperator_BATCH_MATMUL = 126,
-  BuiltinOperator_BCQ_GATHER = 252,
-  BuiltinOperator_BCQ_FULLY_CONNECTED = 253,
-  BuiltinOperator_INSTANCE_NORM = 254,
-  BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_INSTANCE_NORM
-};
-
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[130]
-{
-  static const BuiltinOperator values[] = {BuiltinOperator_ADD,
+  BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  BuiltinOperator_CUMSUM = 128,
+  BuiltinOperator_CALL_ONCE = 129,
+  BuiltinOperator_BROADCAST_TO = 130,
+  BuiltinOperator_RFFT2D = 131,
+  BuiltinOperator_CONV_3D = 132,
+  BuiltinOperator_IMAG = 133,
+  BuiltinOperator_REAL = 134,
+  BuiltinOperator_COMPLEX_ABS = 135,
+  BuiltinOperator_HASHTABLE = 136,
+  BuiltinOperator_HASHTABLE_FIND = 137,
+  BuiltinOperator_HASHTABLE_IMPORT = 138,
+  BuiltinOperator_HASHTABLE_SIZE = 139,
+  BuiltinOperator_REDUCE_ALL = 140,
+  BuiltinOperator_CONV_3D_TRANSPOSE = 141,
+  BuiltinOperator_VAR_HANDLE = 142,
+  BuiltinOperator_READ_VARIABLE = 143,
+  BuiltinOperator_ASSIGN_VARIABLE = 144,
+  BuiltinOperator_BROADCAST_ARGS = 145,
+  BuiltinOperator_RANDOM_STANDARD_NORMAL = 146,
+  BuiltinOperator_BUCKETIZE = 147,
+  BuiltinOperator_RANDOM_UNIFORM = 148,
+  BuiltinOperator_MULTINOMIAL = 149,
+  BuiltinOperator_GELU = 150,
+  BuiltinOperator_DYNAMIC_UPDATE_SLICE = 151,
+  BuiltinOperator_RELU_0_TO_1 = 152,
+  BuiltinOperator_UNSORTED_SEGMENT_PROD = 153,
+  BuiltinOperator_UNSORTED_SEGMENT_MAX = 154,
+  BuiltinOperator_UNSORTED_SEGMENT_SUM = 155,
+  BuiltinOperator_ATAN2 = 156,
+  BuiltinOperator_UNSORTED_SEGMENT_MIN = 157,
+  BuiltinOperator_SIGN = 158,
+  BuiltinOperator_BITCAST = 159,
+  BuiltinOperator_BITWISE_XOR = 160,
+  BuiltinOperator_RIGHT_SHIFT = 161,
+  BuiltinOperator_MIN = BuiltinOperator_BCQ_GATHER,
+  BuiltinOperator_MAX = BuiltinOperator_RIGHT_SHIFT
+};
+
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[165]
+{
+  static const BuiltinOperator values[] = {BuiltinOperator_BCQ_GATHER,
+                                           BuiltinOperator_BCQ_FULLY_CONNECTED,
+                                           BuiltinOperator_INSTANCE_NORM,
+                                           BuiltinOperator_ADD,
                                            BuiltinOperator_AVERAGE_POOL_2D,
                                            BuiltinOperator_CONCATENATION,
                                            BuiltinOperator_CONV_2D,
@@ -691,280 +949,225 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[130]
                                            BuiltinOperator_DENSIFY,
                                            BuiltinOperator_SEGMENT_SUM,
                                            BuiltinOperator_BATCH_MATMUL,
-                                           BuiltinOperator_BCQ_GATHER,
-                                           BuiltinOperator_BCQ_FULLY_CONNECTED,
-                                           BuiltinOperator_INSTANCE_NORM};
+                                           BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES,
+                                           BuiltinOperator_CUMSUM,
+                                           BuiltinOperator_CALL_ONCE,
+                                           BuiltinOperator_BROADCAST_TO,
+                                           BuiltinOperator_RFFT2D,
+                                           BuiltinOperator_CONV_3D,
+                                           BuiltinOperator_IMAG,
+                                           BuiltinOperator_REAL,
+                                           BuiltinOperator_COMPLEX_ABS,
+                                           BuiltinOperator_HASHTABLE,
+                                           BuiltinOperator_HASHTABLE_FIND,
+                                           BuiltinOperator_HASHTABLE_IMPORT,
+                                           BuiltinOperator_HASHTABLE_SIZE,
+                                           BuiltinOperator_REDUCE_ALL,
+                                           BuiltinOperator_CONV_3D_TRANSPOSE,
+                                           BuiltinOperator_VAR_HANDLE,
+                                           BuiltinOperator_READ_VARIABLE,
+                                           BuiltinOperator_ASSIGN_VARIABLE,
+                                           BuiltinOperator_BROADCAST_ARGS,
+                                           BuiltinOperator_RANDOM_STANDARD_NORMAL,
+                                           BuiltinOperator_BUCKETIZE,
+                                           BuiltinOperator_RANDOM_UNIFORM,
+                                           BuiltinOperator_MULTINOMIAL,
+                                           BuiltinOperator_GELU,
+                                           BuiltinOperator_DYNAMIC_UPDATE_SLICE,
+                                           BuiltinOperator_RELU_0_TO_1,
+                                           BuiltinOperator_UNSORTED_SEGMENT_PROD,
+                                           BuiltinOperator_UNSORTED_SEGMENT_MAX,
+                                           BuiltinOperator_UNSORTED_SEGMENT_SUM,
+                                           BuiltinOperator_ATAN2,
+                                           BuiltinOperator_UNSORTED_SEGMENT_MIN,
+                                           BuiltinOperator_SIGN,
+                                           BuiltinOperator_BITCAST,
+                                           BuiltinOperator_BITWISE_XOR,
+                                           BuiltinOperator_RIGHT_SHIFT};
   return values;
 }
 
 inline const char *const *EnumNamesBuiltinOperator()
 {
-  static const char *const names[] = {"ADD",
-                                      "AVERAGE_POOL_2D",
-                                      "CONCATENATION",
-                                      "CONV_2D",
-                                      "DEPTHWISE_CONV_2D",
-                                      "DEPTH_TO_SPACE",
-                                      "DEQUANTIZE",
-                                      "EMBEDDING_LOOKUP",
-                                      "FLOOR",
-                                      "FULLY_CONNECTED",
-                                      "HASHTABLE_LOOKUP",
-                                      "L2_NORMALIZATION",
-                                      "L2_POOL_2D",
-                                      "LOCAL_RESPONSE_NORMALIZATION",
-                                      "LOGISTIC",
-                                      "LSH_PROJECTION",
-                                      "LSTM",
-                                      "MAX_POOL_2D",
-                                      "MUL",
-                                      "RELU",
-                                      "RELU_N1_TO_1",
-                                      "RELU6",
-                                      "RESHAPE",
-                                      "RESIZE_BILINEAR",
-                                      "RNN",
-                                      "SOFTMAX",
-                                      "SPACE_TO_DEPTH",
-                                      "SVDF",
-                                      "TANH",
-                                      "CONCAT_EMBEDDINGS",
-                                      "SKIP_GRAM",
-                                      "CALL",
-                                      "CUSTOM",
-                                      "EMBEDDING_LOOKUP_SPARSE",
-                                      "PAD",
-                                      "UNIDIRECTIONAL_SEQUENCE_RNN",
-                                      "GATHER",
-                                      "BATCH_TO_SPACE_ND",
-                                      "SPACE_TO_BATCH_ND",
-                                      "TRANSPOSE",
-                                      "MEAN",
-                                      "SUB",
-                                      "DIV",
-                                      "SQUEEZE",
-                                      "UNIDIRECTIONAL_SEQUENCE_LSTM",
-                                      "STRIDED_SLICE",
-                                      "BIDIRECTIONAL_SEQUENCE_RNN",
-                                      "EXP",
-                                      "TOPK_V2",
-                                      "SPLIT",
-                                      "LOG_SOFTMAX",
-                                      "DELEGATE",
-                                      "BIDIRECTIONAL_SEQUENCE_LSTM",
-                                      "CAST",
-                                      "PRELU",
-                                      "MAXIMUM",
-                                      "ARG_MAX",
-                                      "MINIMUM",
-                                      "LESS",
-                                      "NEG",
-                                      "PADV2",
-                                      "GREATER",
-                                      "GREATER_EQUAL",
-                                      "LESS_EQUAL",
-                                      "SELECT",
-                                      "SLICE",
-                                      "SIN",
-                                      "TRANSPOSE_CONV",
-                                      "SPARSE_TO_DENSE",
-                                      "TILE",
-                                      "EXPAND_DIMS",
-                                      "EQUAL",
-                                      "NOT_EQUAL",
-                                      "LOG",
-                                      "SUM",
-                                      "SQRT",
-                                      "RSQRT",
-                                      "SHAPE",
-                                      "POW",
-                                      "ARG_MIN",
-                                      "FAKE_QUANT",
-                                      "REDUCE_PROD",
-                                      "REDUCE_MAX",
-                                      "PACK",
-                                      "LOGICAL_OR",
-                                      "ONE_HOT",
-                                      "LOGICAL_AND",
-                                      "LOGICAL_NOT",
-                                      "UNPACK",
-                                      "REDUCE_MIN",
-                                      "FLOOR_DIV",
-                                      "REDUCE_ANY",
-                                      "SQUARE",
-                                      "ZEROS_LIKE",
-                                      "FILL",
-                                      "FLOOR_MOD",
-                                      "RANGE",
-                                      "RESIZE_NEAREST_NEIGHBOR",
-                                      "LEAKY_RELU",
-                                      "SQUARED_DIFFERENCE",
-                                      "MIRROR_PAD",
-                                      "ABS",
-                                      "SPLIT_V",
-                                      "UNIQUE",
-                                      "CEIL",
-                                      "REVERSE_V2",
-                                      "ADD_N",
-                                      "GATHER_ND",
-                                      "COS",
-                                      "WHERE",
-                                      "RANK",
-                                      "ELU",
-                                      "REVERSE_SEQUENCE",
-                                      "MATRIX_DIAG",
-                                      "QUANTIZE",
-                                      "MATRIX_SET_DIAG",
-                                      "ROUND",
-                                      "HARD_SWISH",
-                                      "IF",
-                                      "WHILE",
-                                      "NON_MAX_SUPPRESSION_V4",
-                                      "NON_MAX_SUPPRESSION_V5",
-                                      "SCATTER_ND",
-                                      "SELECT_V2",
-                                      "DENSIFY",
-                                      "SEGMENT_SUM",
-                                      "BATCH_MATMUL",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "BCQ_GATHER",
-                                      "BCQ_FULLY_CONNECTED",
-                                      "INSTANCE_NORM",
-                                      nullptr};
+  static const char *const names[167] = {"BCQ_GATHER",
+                                         "BCQ_FULLY_CONNECTED",
+                                         "INSTANCE_NORM",
+                                         "",
+                                         "ADD",
+                                         "AVERAGE_POOL_2D",
+                                         "CONCATENATION",
+                                         "CONV_2D",
+                                         "DEPTHWISE_CONV_2D",
+                                         "DEPTH_TO_SPACE",
+                                         "DEQUANTIZE",
+                                         "EMBEDDING_LOOKUP",
+                                         "FLOOR",
+                                         "FULLY_CONNECTED",
+                                         "HASHTABLE_LOOKUP",
+                                         "L2_NORMALIZATION",
+                                         "L2_POOL_2D",
+                                         "LOCAL_RESPONSE_NORMALIZATION",
+                                         "LOGISTIC",
+                                         "LSH_PROJECTION",
+                                         "LSTM",
+                                         "MAX_POOL_2D",
+                                         "MUL",
+                                         "RELU",
+                                         "RELU_N1_TO_1",
+                                         "RELU6",
+                                         "RESHAPE",
+                                         "RESIZE_BILINEAR",
+                                         "RNN",
+                                         "SOFTMAX",
+                                         "SPACE_TO_DEPTH",
+                                         "SVDF",
+                                         "TANH",
+                                         "CONCAT_EMBEDDINGS",
+                                         "SKIP_GRAM",
+                                         "CALL",
+                                         "CUSTOM",
+                                         "EMBEDDING_LOOKUP_SPARSE",
+                                         "PAD",
+                                         "UNIDIRECTIONAL_SEQUENCE_RNN",
+                                         "GATHER",
+                                         "BATCH_TO_SPACE_ND",
+                                         "SPACE_TO_BATCH_ND",
+                                         "TRANSPOSE",
+                                         "MEAN",
+                                         "SUB",
+                                         "DIV",
+                                         "SQUEEZE",
+                                         "UNIDIRECTIONAL_SEQUENCE_LSTM",
+                                         "STRIDED_SLICE",
+                                         "BIDIRECTIONAL_SEQUENCE_RNN",
+                                         "EXP",
+                                         "TOPK_V2",
+                                         "SPLIT",
+                                         "LOG_SOFTMAX",
+                                         "DELEGATE",
+                                         "BIDIRECTIONAL_SEQUENCE_LSTM",
+                                         "CAST",
+                                         "PRELU",
+                                         "MAXIMUM",
+                                         "ARG_MAX",
+                                         "MINIMUM",
+                                         "LESS",
+                                         "NEG",
+                                         "PADV2",
+                                         "GREATER",
+                                         "GREATER_EQUAL",
+                                         "LESS_EQUAL",
+                                         "SELECT",
+                                         "SLICE",
+                                         "SIN",
+                                         "TRANSPOSE_CONV",
+                                         "SPARSE_TO_DENSE",
+                                         "TILE",
+                                         "EXPAND_DIMS",
+                                         "EQUAL",
+                                         "NOT_EQUAL",
+                                         "LOG",
+                                         "SUM",
+                                         "SQRT",
+                                         "RSQRT",
+                                         "SHAPE",
+                                         "POW",
+                                         "ARG_MIN",
+                                         "FAKE_QUANT",
+                                         "REDUCE_PROD",
+                                         "REDUCE_MAX",
+                                         "PACK",
+                                         "LOGICAL_OR",
+                                         "ONE_HOT",
+                                         "LOGICAL_AND",
+                                         "LOGICAL_NOT",
+                                         "UNPACK",
+                                         "REDUCE_MIN",
+                                         "FLOOR_DIV",
+                                         "REDUCE_ANY",
+                                         "SQUARE",
+                                         "ZEROS_LIKE",
+                                         "FILL",
+                                         "FLOOR_MOD",
+                                         "RANGE",
+                                         "RESIZE_NEAREST_NEIGHBOR",
+                                         "LEAKY_RELU",
+                                         "SQUARED_DIFFERENCE",
+                                         "MIRROR_PAD",
+                                         "ABS",
+                                         "SPLIT_V",
+                                         "UNIQUE",
+                                         "CEIL",
+                                         "REVERSE_V2",
+                                         "ADD_N",
+                                         "GATHER_ND",
+                                         "COS",
+                                         "WHERE",
+                                         "RANK",
+                                         "ELU",
+                                         "REVERSE_SEQUENCE",
+                                         "MATRIX_DIAG",
+                                         "QUANTIZE",
+                                         "MATRIX_SET_DIAG",
+                                         "ROUND",
+                                         "HARD_SWISH",
+                                         "IF",
+                                         "WHILE",
+                                         "NON_MAX_SUPPRESSION_V4",
+                                         "NON_MAX_SUPPRESSION_V5",
+                                         "SCATTER_ND",
+                                         "SELECT_V2",
+                                         "DENSIFY",
+                                         "SEGMENT_SUM",
+                                         "BATCH_MATMUL",
+                                         "PLACEHOLDER_FOR_GREATER_OP_CODES",
+                                         "CUMSUM",
+                                         "CALL_ONCE",
+                                         "BROADCAST_TO",
+                                         "RFFT2D",
+                                         "CONV_3D",
+                                         "IMAG",
+                                         "REAL",
+                                         "COMPLEX_ABS",
+                                         "HASHTABLE",
+                                         "HASHTABLE_FIND",
+                                         "HASHTABLE_IMPORT",
+                                         "HASHTABLE_SIZE",
+                                         "REDUCE_ALL",
+                                         "CONV_3D_TRANSPOSE",
+                                         "VAR_HANDLE",
+                                         "READ_VARIABLE",
+                                         "ASSIGN_VARIABLE",
+                                         "BROADCAST_ARGS",
+                                         "RANDOM_STANDARD_NORMAL",
+                                         "BUCKETIZE",
+                                         "RANDOM_UNIFORM",
+                                         "MULTINOMIAL",
+                                         "GELU",
+                                         "DYNAMIC_UPDATE_SLICE",
+                                         "RELU_0_TO_1",
+                                         "UNSORTED_SEGMENT_PROD",
+                                         "UNSORTED_SEGMENT_MAX",
+                                         "UNSORTED_SEGMENT_SUM",
+                                         "ATAN2",
+                                         "UNSORTED_SEGMENT_MIN",
+                                         "SIGN",
+                                         "BITCAST",
+                                         "BITWISE_XOR",
+                                         "RIGHT_SHIFT",
+                                         nullptr};
   return names;
 }
 
 inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, BuiltinOperator_BCQ_GATHER, BuiltinOperator_RIGHT_SHIFT))
+    return "";
+  const size_t index = static_cast<size_t>(e) - static_cast<size_t>(BuiltinOperator_BCQ_GATHER);
   return EnumNamesBuiltinOperator()[index];
 }
 
-enum BuiltinOptions
+enum BuiltinOptions : uint8_t
 {
   BuiltinOptions_NONE = 0,
   BuiltinOptions_Conv2DOptions = 1,
@@ -1068,6 +1271,31 @@ enum BuiltinOptions
   BuiltinOptions_DensifyOptions = 99,
   BuiltinOptions_SegmentSumOptions = 100,
   BuiltinOptions_BatchMatMulOptions = 101,
+  BuiltinOptions_CumsumOptions = 102,
+  BuiltinOptions_CallOnceOptions = 103,
+  BuiltinOptions_BroadcastToOptions = 104,
+  BuiltinOptions_Rfft2dOptions = 105,
+  BuiltinOptions_Conv3DOptions = 106,
+  BuiltinOptions_HashtableOptions = 107,
+  BuiltinOptions_HashtableFindOptions = 108,
+  BuiltinOptions_HashtableImportOptions = 109,
+  BuiltinOptions_HashtableSizeOptions = 110,
+  BuiltinOptions_VarHandleOptions = 111,
+  BuiltinOptions_ReadVariableOptions = 112,
+  BuiltinOptions_AssignVariableOptions = 113,
+  BuiltinOptions_RandomOptions = 114,
+  BuiltinOptions_BucketizeOptions = 115,
+  BuiltinOptions_GeluOptions = 116,
+  BuiltinOptions_DynamicUpdateSliceOptions = 117,
+  BuiltinOptions_UnsortedSegmentProdOptions = 118,
+  BuiltinOptions_UnsortedSegmentMaxOptions = 119,
+  BuiltinOptions_UnsortedSegmentMinOptions = 120,
+  BuiltinOptions_UnsortedSegmentSumOptions = 121,
+  BuiltinOptions_ATan2Options = 122,
+  BuiltinOptions_SignOptions = 123,
+  BuiltinOptions_BitcastOptions = 124,
+  BuiltinOptions_BitwiseXorOptions = 125,
+  BuiltinOptions_RightShiftOptions = 126,
   BuiltinOptions_BCQGatherOptions = 252,
   BuiltinOptions_BCQFullyConnectedOptions = 253,
   BuiltinOptions_InstanceNormOptions = 254,
@@ -1075,7 +1303,7 @@ enum BuiltinOptions
   BuiltinOptions_MAX = BuiltinOptions_InstanceNormOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105]
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[130]
 {
   static const BuiltinOptions values[] = {BuiltinOptions_NONE,
                                           BuiltinOptions_Conv2DOptions,
@@ -1179,6 +1407,31 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105]
                                           BuiltinOptions_DensifyOptions,
                                           BuiltinOptions_SegmentSumOptions,
                                           BuiltinOptions_BatchMatMulOptions,
+                                          BuiltinOptions_CumsumOptions,
+                                          BuiltinOptions_CallOnceOptions,
+                                          BuiltinOptions_BroadcastToOptions,
+                                          BuiltinOptions_Rfft2dOptions,
+                                          BuiltinOptions_Conv3DOptions,
+                                          BuiltinOptions_HashtableOptions,
+                                          BuiltinOptions_HashtableFindOptions,
+                                          BuiltinOptions_HashtableImportOptions,
+                                          BuiltinOptions_HashtableSizeOptions,
+                                          BuiltinOptions_VarHandleOptions,
+                                          BuiltinOptions_ReadVariableOptions,
+                                          BuiltinOptions_AssignVariableOptions,
+                                          BuiltinOptions_RandomOptions,
+                                          BuiltinOptions_BucketizeOptions,
+                                          BuiltinOptions_GeluOptions,
+                                          BuiltinOptions_DynamicUpdateSliceOptions,
+                                          BuiltinOptions_UnsortedSegmentProdOptions,
+                                          BuiltinOptions_UnsortedSegmentMaxOptions,
+                                          BuiltinOptions_UnsortedSegmentMinOptions,
+                                          BuiltinOptions_UnsortedSegmentSumOptions,
+                                          BuiltinOptions_ATan2Options,
+                                          BuiltinOptions_SignOptions,
+                                          BuiltinOptions_BitcastOptions,
+                                          BuiltinOptions_BitwiseXorOptions,
+                                          BuiltinOptions_RightShiftOptions,
                                           BuiltinOptions_BCQGatherOptions,
                                           BuiltinOptions_BCQFullyConnectedOptions,
                                           BuiltinOptions_InstanceNormOptions};
@@ -1187,268 +1440,270 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105]
 
 inline const char *const *EnumNamesBuiltinOptions()
 {
-  static const char *const names[] = {"NONE",
-                                      "Conv2DOptions",
-                                      "DepthwiseConv2DOptions",
-                                      "ConcatEmbeddingsOptions",
-                                      "LSHProjectionOptions",
-                                      "Pool2DOptions",
-                                      "SVDFOptions",
-                                      "RNNOptions",
-                                      "FullyConnectedOptions",
-                                      "SoftmaxOptions",
-                                      "ConcatenationOptions",
-                                      "AddOptions",
-                                      "L2NormOptions",
-                                      "LocalResponseNormalizationOptions",
-                                      "LSTMOptions",
-                                      "ResizeBilinearOptions",
-                                      "CallOptions",
-                                      "ReshapeOptions",
-                                      "SkipGramOptions",
-                                      "SpaceToDepthOptions",
-                                      "EmbeddingLookupSparseOptions",
-                                      "MulOptions",
-                                      "PadOptions",
-                                      "GatherOptions",
-                                      "BatchToSpaceNDOptions",
-                                      "SpaceToBatchNDOptions",
-                                      "TransposeOptions",
-                                      "ReducerOptions",
-                                      "SubOptions",
-                                      "DivOptions",
-                                      "SqueezeOptions",
-                                      "SequenceRNNOptions",
-                                      "StridedSliceOptions",
-                                      "ExpOptions",
-                                      "TopKV2Options",
-                                      "SplitOptions",
-                                      "LogSoftmaxOptions",
-                                      "CastOptions",
-                                      "DequantizeOptions",
-                                      "MaximumMinimumOptions",
-                                      "ArgMaxOptions",
-                                      "LessOptions",
-                                      "NegOptions",
-                                      "PadV2Options",
-                                      "GreaterOptions",
-                                      "GreaterEqualOptions",
-                                      "LessEqualOptions",
-                                      "SelectOptions",
-                                      "SliceOptions",
-                                      "TransposeConvOptions",
-                                      "SparseToDenseOptions",
-                                      "TileOptions",
-                                      "ExpandDimsOptions",
-                                      "EqualOptions",
-                                      "NotEqualOptions",
-                                      "ShapeOptions",
-                                      "PowOptions",
-                                      "ArgMinOptions",
-                                      "FakeQuantOptions",
-                                      "PackOptions",
-                                      "LogicalOrOptions",
-                                      "OneHotOptions",
-                                      "LogicalAndOptions",
-                                      "LogicalNotOptions",
-                                      "UnpackOptions",
-                                      "FloorDivOptions",
-                                      "SquareOptions",
-                                      "ZerosLikeOptions",
-                                      "FillOptions",
-                                      "BidirectionalSequenceLSTMOptions",
-                                      "BidirectionalSequenceRNNOptions",
-                                      "UnidirectionalSequenceLSTMOptions",
-                                      "FloorModOptions",
-                                      "RangeOptions",
-                                      "ResizeNearestNeighborOptions",
-                                      "LeakyReluOptions",
-                                      "SquaredDifferenceOptions",
-                                      "MirrorPadOptions",
-                                      "AbsOptions",
-                                      "SplitVOptions",
-                                      "UniqueOptions",
-                                      "ReverseV2Options",
-                                      "AddNOptions",
-                                      "GatherNdOptions",
-                                      "CosOptions",
-                                      "WhereOptions",
-                                      "RankOptions",
-                                      "ReverseSequenceOptions",
-                                      "MatrixDiagOptions",
-                                      "QuantizeOptions",
-                                      "MatrixSetDiagOptions",
-                                      "HardSwishOptions",
-                                      "IfOptions",
-                                      "WhileOptions",
-                                      "DepthToSpaceOptions",
-                                      "NonMaxSuppressionV4Options",
-                                      "NonMaxSuppressionV5Options",
-                                      "ScatterNdOptions",
-                                      "SelectV2Options",
-                                      "DensifyOptions",
-                                      "SegmentSumOptions",
-                                      "BatchMatMulOptions",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "",
-                                      "BCQGatherOptions",
-                                      "BCQFullyConnectedOptions",
-                                      "InstanceNormOptions",
-                                      nullptr};
+  static const char *const names[256] = {"NONE",
+                                         "Conv2DOptions",
+                                         "DepthwiseConv2DOptions",
+                                         "ConcatEmbeddingsOptions",
+                                         "LSHProjectionOptions",
+                                         "Pool2DOptions",
+                                         "SVDFOptions",
+                                         "RNNOptions",
+                                         "FullyConnectedOptions",
+                                         "SoftmaxOptions",
+                                         "ConcatenationOptions",
+                                         "AddOptions",
+                                         "L2NormOptions",
+                                         "LocalResponseNormalizationOptions",
+                                         "LSTMOptions",
+                                         "ResizeBilinearOptions",
+                                         "CallOptions",
+                                         "ReshapeOptions",
+                                         "SkipGramOptions",
+                                         "SpaceToDepthOptions",
+                                         "EmbeddingLookupSparseOptions",
+                                         "MulOptions",
+                                         "PadOptions",
+                                         "GatherOptions",
+                                         "BatchToSpaceNDOptions",
+                                         "SpaceToBatchNDOptions",
+                                         "TransposeOptions",
+                                         "ReducerOptions",
+                                         "SubOptions",
+                                         "DivOptions",
+                                         "SqueezeOptions",
+                                         "SequenceRNNOptions",
+                                         "StridedSliceOptions",
+                                         "ExpOptions",
+                                         "TopKV2Options",
+                                         "SplitOptions",
+                                         "LogSoftmaxOptions",
+                                         "CastOptions",
+                                         "DequantizeOptions",
+                                         "MaximumMinimumOptions",
+                                         "ArgMaxOptions",
+                                         "LessOptions",
+                                         "NegOptions",
+                                         "PadV2Options",
+                                         "GreaterOptions",
+                                         "GreaterEqualOptions",
+                                         "LessEqualOptions",
+                                         "SelectOptions",
+                                         "SliceOptions",
+                                         "TransposeConvOptions",
+                                         "SparseToDenseOptions",
+                                         "TileOptions",
+                                         "ExpandDimsOptions",
+                                         "EqualOptions",
+                                         "NotEqualOptions",
+                                         "ShapeOptions",
+                                         "PowOptions",
+                                         "ArgMinOptions",
+                                         "FakeQuantOptions",
+                                         "PackOptions",
+                                         "LogicalOrOptions",
+                                         "OneHotOptions",
+                                         "LogicalAndOptions",
+                                         "LogicalNotOptions",
+                                         "UnpackOptions",
+                                         "FloorDivOptions",
+                                         "SquareOptions",
+                                         "ZerosLikeOptions",
+                                         "FillOptions",
+                                         "BidirectionalSequenceLSTMOptions",
+                                         "BidirectionalSequenceRNNOptions",
+                                         "UnidirectionalSequenceLSTMOptions",
+                                         "FloorModOptions",
+                                         "RangeOptions",
+                                         "ResizeNearestNeighborOptions",
+                                         "LeakyReluOptions",
+                                         "SquaredDifferenceOptions",
+                                         "MirrorPadOptions",
+                                         "AbsOptions",
+                                         "SplitVOptions",
+                                         "UniqueOptions",
+                                         "ReverseV2Options",
+                                         "AddNOptions",
+                                         "GatherNdOptions",
+                                         "CosOptions",
+                                         "WhereOptions",
+                                         "RankOptions",
+                                         "ReverseSequenceOptions",
+                                         "MatrixDiagOptions",
+                                         "QuantizeOptions",
+                                         "MatrixSetDiagOptions",
+                                         "HardSwishOptions",
+                                         "IfOptions",
+                                         "WhileOptions",
+                                         "DepthToSpaceOptions",
+                                         "NonMaxSuppressionV4Options",
+                                         "NonMaxSuppressionV5Options",
+                                         "ScatterNdOptions",
+                                         "SelectV2Options",
+                                         "DensifyOptions",
+                                         "SegmentSumOptions",
+                                         "BatchMatMulOptions",
+                                         "CumsumOptions",
+                                         "CallOnceOptions",
+                                         "BroadcastToOptions",
+                                         "Rfft2dOptions",
+                                         "Conv3DOptions",
+                                         "HashtableOptions",
+                                         "HashtableFindOptions",
+                                         "HashtableImportOptions",
+                                         "HashtableSizeOptions",
+                                         "VarHandleOptions",
+                                         "ReadVariableOptions",
+                                         "AssignVariableOptions",
+                                         "RandomOptions",
+                                         "BucketizeOptions",
+                                         "GeluOptions",
+                                         "DynamicUpdateSliceOptions",
+                                         "UnsortedSegmentProdOptions",
+                                         "UnsortedSegmentMaxOptions",
+                                         "UnsortedSegmentMinOptions",
+                                         "UnsortedSegmentSumOptions",
+                                         "ATan2Options",
+                                         "SignOptions",
+                                         "BitcastOptions",
+                                         "BitwiseXorOptions",
+                                         "RightShiftOptions",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "",
+                                         "BCQGatherOptions",
+                                         "BCQFullyConnectedOptions",
+                                         "InstanceNormOptions",
+                                         nullptr};
   return names;
 }
 
 inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_InstanceNormOptions))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOptions()[index];
 }
 
@@ -1457,522 +1712,647 @@ template <typename T> struct BuiltinOptionsTraits
   static const BuiltinOptions enum_value = BuiltinOptions_NONE;
 };
 
-template <> struct BuiltinOptionsTraits<Conv2DOptions>
+template <> struct BuiltinOptionsTraits<circle::Conv2DOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
 };
 
-template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions>
+template <> struct BuiltinOptionsTraits<circle::DepthwiseConv2DOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions>
+template <> struct BuiltinOptionsTraits<circle::ConcatEmbeddingsOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LSHProjectionOptions>
+template <> struct BuiltinOptionsTraits<circle::LSHProjectionOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
 };
 
-template <> struct BuiltinOptionsTraits<Pool2DOptions>
+template <> struct BuiltinOptionsTraits<circle::Pool2DOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SVDFOptions>
+template <> struct BuiltinOptionsTraits<circle::SVDFOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
 };
 
-template <> struct BuiltinOptionsTraits<RNNOptions>
+template <> struct BuiltinOptionsTraits<circle::RNNOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FullyConnectedOptions>
+template <> struct BuiltinOptionsTraits<circle::FullyConnectedOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SoftmaxOptions>
+template <> struct BuiltinOptionsTraits<circle::SoftmaxOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ConcatenationOptions>
+template <> struct BuiltinOptionsTraits<circle::ConcatenationOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
 };
 
-template <> struct BuiltinOptionsTraits<AddOptions>
+template <> struct BuiltinOptionsTraits<circle::AddOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
 };
 
-template <> struct BuiltinOptionsTraits<L2NormOptions>
+template <> struct BuiltinOptionsTraits<circle::L2NormOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions>
+template <> struct BuiltinOptionsTraits<circle::LocalResponseNormalizationOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LSTMOptions>
+template <> struct BuiltinOptionsTraits<circle::LSTMOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ResizeBilinearOptions>
+template <> struct BuiltinOptionsTraits<circle::ResizeBilinearOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
 };
 
-template <> struct BuiltinOptionsTraits<CallOptions>
+template <> struct BuiltinOptionsTraits<circle::CallOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ReshapeOptions>
+template <> struct BuiltinOptionsTraits<circle::ReshapeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SkipGramOptions>
+template <> struct BuiltinOptionsTraits<circle::SkipGramOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SpaceToDepthOptions>
+template <> struct BuiltinOptionsTraits<circle::SpaceToDepthOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
 };
 
-template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions>
+template <> struct BuiltinOptionsTraits<circle::EmbeddingLookupSparseOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MulOptions>
+template <> struct BuiltinOptionsTraits<circle::MulOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
 };
 
-template <> struct BuiltinOptionsTraits<PadOptions>
+template <> struct BuiltinOptionsTraits<circle::PadOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
 };
 
-template <> struct BuiltinOptionsTraits<GatherOptions>
+template <> struct BuiltinOptionsTraits<circle::GatherOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions>
+template <> struct BuiltinOptionsTraits<circle::BatchToSpaceNDOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions>
+template <> struct BuiltinOptionsTraits<circle::SpaceToBatchNDOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
 };
 
-template <> struct BuiltinOptionsTraits<TransposeOptions>
+template <> struct BuiltinOptionsTraits<circle::TransposeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ReducerOptions>
+template <> struct BuiltinOptionsTraits<circle::ReducerOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SubOptions>
+template <> struct BuiltinOptionsTraits<circle::SubOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
 };
 
-template <> struct BuiltinOptionsTraits<DivOptions>
+template <> struct BuiltinOptionsTraits<circle::DivOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SqueezeOptions>
+template <> struct BuiltinOptionsTraits<circle::SqueezeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SequenceRNNOptions>
+template <> struct BuiltinOptionsTraits<circle::SequenceRNNOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
 };
 
-template <> struct BuiltinOptionsTraits<StridedSliceOptions>
+template <> struct BuiltinOptionsTraits<circle::StridedSliceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ExpOptions>
+template <> struct BuiltinOptionsTraits<circle::ExpOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
 };
 
-template <> struct BuiltinOptionsTraits<TopKV2Options>
+template <> struct BuiltinOptionsTraits<circle::TopKV2Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
 };
 
-template <> struct BuiltinOptionsTraits<SplitOptions>
+template <> struct BuiltinOptionsTraits<circle::SplitOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LogSoftmaxOptions>
+template <> struct BuiltinOptionsTraits<circle::LogSoftmaxOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
 };
 
-template <> struct BuiltinOptionsTraits<CastOptions>
+template <> struct BuiltinOptionsTraits<circle::CastOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
 };
 
-template <> struct BuiltinOptionsTraits<DequantizeOptions>
+template <> struct BuiltinOptionsTraits<circle::DequantizeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MaximumMinimumOptions>
+template <> struct BuiltinOptionsTraits<circle::MaximumMinimumOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ArgMaxOptions>
+template <> struct BuiltinOptionsTraits<circle::ArgMaxOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LessOptions>
+template <> struct BuiltinOptionsTraits<circle::LessOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
 };
 
-template <> struct BuiltinOptionsTraits<NegOptions>
+template <> struct BuiltinOptionsTraits<circle::NegOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
 };
 
-template <> struct BuiltinOptionsTraits<PadV2Options>
+template <> struct BuiltinOptionsTraits<circle::PadV2Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
 };
 
-template <> struct BuiltinOptionsTraits<GreaterOptions>
+template <> struct BuiltinOptionsTraits<circle::GreaterOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
 };
 
-template <> struct BuiltinOptionsTraits<GreaterEqualOptions>
+template <> struct BuiltinOptionsTraits<circle::GreaterEqualOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LessEqualOptions>
+template <> struct BuiltinOptionsTraits<circle::LessEqualOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SelectOptions>
+template <> struct BuiltinOptionsTraits<circle::SelectOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SliceOptions>
+template <> struct BuiltinOptionsTraits<circle::SliceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<TransposeConvOptions>
+template <> struct BuiltinOptionsTraits<circle::TransposeConvOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SparseToDenseOptions>
+template <> struct BuiltinOptionsTraits<circle::SparseToDenseOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
 };
 
-template <> struct BuiltinOptionsTraits<TileOptions>
+template <> struct BuiltinOptionsTraits<circle::TileOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ExpandDimsOptions>
+template <> struct BuiltinOptionsTraits<circle::ExpandDimsOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
 };
 
-template <> struct BuiltinOptionsTraits<EqualOptions>
+template <> struct BuiltinOptionsTraits<circle::EqualOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
 };
 
-template <> struct BuiltinOptionsTraits<NotEqualOptions>
+template <> struct BuiltinOptionsTraits<circle::NotEqualOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ShapeOptions>
+template <> struct BuiltinOptionsTraits<circle::ShapeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<PowOptions>
+template <> struct BuiltinOptionsTraits<circle::PowOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ArgMinOptions>
+template <> struct BuiltinOptionsTraits<circle::ArgMinOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FakeQuantOptions>
+template <> struct BuiltinOptionsTraits<circle::FakeQuantOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
 };
 
-template <> struct BuiltinOptionsTraits<PackOptions>
+template <> struct BuiltinOptionsTraits<circle::PackOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LogicalOrOptions>
+template <> struct BuiltinOptionsTraits<circle::LogicalOrOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
 };
 
-template <> struct BuiltinOptionsTraits<OneHotOptions>
+template <> struct BuiltinOptionsTraits<circle::OneHotOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LogicalAndOptions>
+template <> struct BuiltinOptionsTraits<circle::LogicalAndOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LogicalNotOptions>
+template <> struct BuiltinOptionsTraits<circle::LogicalNotOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
 };
 
-template <> struct BuiltinOptionsTraits<UnpackOptions>
+template <> struct BuiltinOptionsTraits<circle::UnpackOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FloorDivOptions>
+template <> struct BuiltinOptionsTraits<circle::FloorDivOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SquareOptions>
+template <> struct BuiltinOptionsTraits<circle::SquareOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ZerosLikeOptions>
+template <> struct BuiltinOptionsTraits<circle::ZerosLikeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FillOptions>
+template <> struct BuiltinOptionsTraits<circle::FillOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions>
+template <> struct BuiltinOptionsTraits<circle::BidirectionalSequenceLSTMOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions>
+template <> struct BuiltinOptionsTraits<circle::BidirectionalSequenceRNNOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
 };
 
-template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions>
+template <> struct BuiltinOptionsTraits<circle::UnidirectionalSequenceLSTMOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FloorModOptions>
+template <> struct BuiltinOptionsTraits<circle::FloorModOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
 };
 
-template <> struct BuiltinOptionsTraits<RangeOptions>
+template <> struct BuiltinOptionsTraits<circle::RangeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions>
+template <> struct BuiltinOptionsTraits<circle::ResizeNearestNeighborOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LeakyReluOptions>
+template <> struct BuiltinOptionsTraits<circle::LeakyReluOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions>
+template <> struct BuiltinOptionsTraits<circle::SquaredDifferenceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MirrorPadOptions>
+template <> struct BuiltinOptionsTraits<circle::MirrorPadOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
 };
 
-template <> struct BuiltinOptionsTraits<AbsOptions>
+template <> struct BuiltinOptionsTraits<circle::AbsOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SplitVOptions>
+template <> struct BuiltinOptionsTraits<circle::SplitVOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
 };
 
-template <> struct BuiltinOptionsTraits<UniqueOptions>
+template <> struct BuiltinOptionsTraits<circle::UniqueOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ReverseV2Options>
+template <> struct BuiltinOptionsTraits<circle::ReverseV2Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options;
 };
 
-template <> struct BuiltinOptionsTraits<AddNOptions>
+template <> struct BuiltinOptionsTraits<circle::AddNOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
 };
 
-template <> struct BuiltinOptionsTraits<GatherNdOptions>
+template <> struct BuiltinOptionsTraits<circle::GatherNdOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
 };
 
-template <> struct BuiltinOptionsTraits<CosOptions>
+template <> struct BuiltinOptionsTraits<circle::CosOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
 };
 
-template <> struct BuiltinOptionsTraits<WhereOptions>
+template <> struct BuiltinOptionsTraits<circle::WhereOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
 };
 
-template <> struct BuiltinOptionsTraits<RankOptions>
+template <> struct BuiltinOptionsTraits<circle::RankOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ReverseSequenceOptions>
+template <> struct BuiltinOptionsTraits<circle::ReverseSequenceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MatrixDiagOptions>
+template <> struct BuiltinOptionsTraits<circle::MatrixDiagOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions;
 };
 
-template <> struct BuiltinOptionsTraits<QuantizeOptions>
+template <> struct BuiltinOptionsTraits<circle::QuantizeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MatrixSetDiagOptions>
+template <> struct BuiltinOptionsTraits<circle::MatrixSetDiagOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions;
 };
 
-template <> struct BuiltinOptionsTraits<HardSwishOptions>
+template <> struct BuiltinOptionsTraits<circle::HardSwishOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions;
 };
 
-template <> struct BuiltinOptionsTraits<IfOptions>
+template <> struct BuiltinOptionsTraits<circle::IfOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_IfOptions;
 };
 
-template <> struct BuiltinOptionsTraits<WhileOptions>
+template <> struct BuiltinOptionsTraits<circle::WhileOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions;
 };
 
-template <> struct BuiltinOptionsTraits<DepthToSpaceOptions>
+template <> struct BuiltinOptionsTraits<circle::DepthToSpaceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<NonMaxSuppressionV4Options>
+template <> struct BuiltinOptionsTraits<circle::NonMaxSuppressionV4Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options;
 };
 
-template <> struct BuiltinOptionsTraits<NonMaxSuppressionV5Options>
+template <> struct BuiltinOptionsTraits<circle::NonMaxSuppressionV5Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options;
 };
 
-template <> struct BuiltinOptionsTraits<ScatterNdOptions>
+template <> struct BuiltinOptionsTraits<circle::ScatterNdOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SelectV2Options>
+template <> struct BuiltinOptionsTraits<circle::SelectV2Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
 };
 
-template <> struct BuiltinOptionsTraits<DensifyOptions>
+template <> struct BuiltinOptionsTraits<circle::DensifyOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SegmentSumOptions>
+template <> struct BuiltinOptionsTraits<circle::SegmentSumOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BatchMatMulOptions>
+template <> struct BuiltinOptionsTraits<circle::BatchMatMulOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BCQGatherOptions>
+template <> struct BuiltinOptionsTraits<circle::CumsumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::CallOnceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BroadcastToOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Rfft2dOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Conv3DOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableFindOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableImportOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableSizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::VarHandleOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ReadVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::AssignVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::RandomOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BucketizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BucketizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::GeluOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_GeluOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::DynamicUpdateSliceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_DynamicUpdateSliceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::UnsortedSegmentProdOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentProdOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::UnsortedSegmentMaxOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::UnsortedSegmentMinOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMinOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::UnsortedSegmentSumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentSumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ATan2Options>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ATan2Options;
+};
+
+template <> struct BuiltinOptionsTraits<circle::SignOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_SignOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BitcastOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BitcastOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BitwiseXorOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BitwiseXorOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::RightShiftOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RightShiftOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BCQGatherOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BCQGatherOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BCQFullyConnectedOptions>
+template <> struct BuiltinOptionsTraits<circle::BCQFullyConnectedOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BCQFullyConnectedOptions;
 };
 
-template <> struct BuiltinOptionsTraits<InstanceNormOptions>
+template <> struct BuiltinOptionsTraits<circle::InstanceNormOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_InstanceNormOptions;
 };
@@ -1982,7 +2362,7 @@ bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
                                 const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                 const flatbuffers::Vector<uint8_t> *types);
 
-enum Padding
+enum Padding : int8_t
 {
   Padding_SAME = 0,
   Padding_VALID = 1,
@@ -1998,17 +2378,19 @@ inline const Padding (&EnumValuesPadding())[2]
 
 inline const char *const *EnumNamesPadding()
 {
-  static const char *const names[] = {"SAME", "VALID", nullptr};
+  static const char *const names[3] = {"SAME", "VALID", nullptr};
   return names;
 }
 
 inline const char *EnumNamePadding(Padding e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesPadding()[index];
 }
 
-enum ActivationFunctionType
+enum ActivationFunctionType : int8_t
 {
   ActivationFunctionType_NONE = 0,
   ActivationFunctionType_RELU = 1,
@@ -2023,26 +2405,27 @@ enum ActivationFunctionType
 inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
 {
   static const ActivationFunctionType values[] = {
-      ActivationFunctionType_NONE,         ActivationFunctionType_RELU,
-      ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
-      ActivationFunctionType_TANH,         ActivationFunctionType_SIGN_BIT};
+    ActivationFunctionType_NONE,  ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+    ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
   return values;
 }
 
 inline const char *const *EnumNamesActivationFunctionType()
 {
-  static const char *const names[] = {"NONE", "RELU",     "RELU_N1_TO_1", "RELU6",
-                                      "TANH", "SIGN_BIT", nullptr};
+  static const char *const names[7] = {"NONE", "RELU",     "RELU_N1_TO_1", "RELU6",
+                                       "TANH", "SIGN_BIT", nullptr};
   return names;
 }
 
 inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesActivationFunctionType()[index];
 }
 
-enum LSHProjectionType
+enum LSHProjectionType : int8_t
 {
   LSHProjectionType_UNKNOWN = 0,
   LSHProjectionType_SPARSE = 1,
@@ -2060,45 +2443,51 @@ inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
 
 inline const char *const *EnumNamesLSHProjectionType()
 {
-  static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
+  static const char *const names[4] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
   return names;
 }
 
 inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesLSHProjectionType()[index];
 }
 
-enum FullyConnectedOptionsWeightsFormat
+enum FullyConnectedOptionsWeightsFormat : int8_t
 {
   FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
   FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
+  FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32 = 127,
   FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT,
-  FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
+  FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32
 };
 
-inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
+inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[3]
 {
   static const FullyConnectedOptionsWeightsFormat values[] = {
-      FullyConnectedOptionsWeightsFormat_DEFAULT,
-      FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
+    FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8,
+    FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32};
   return values;
 }
 
-inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat()
-{
-  static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
-  return names;
-}
-
 inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
 {
-  const size_t index = static_cast<int>(e);
-  return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
+  switch (e)
+  {
+    case FullyConnectedOptionsWeightsFormat_DEFAULT:
+      return "DEFAULT";
+    case FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8:
+      return "SHUFFLED4x16INT8";
+    case FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32:
+      return "SHUFFLED16x1FLOAT32";
+    default:
+      return "";
+  }
 }
 
-enum LSTMKernelType
+enum LSTMKernelType : int8_t
 {
   LSTMKernelType_FULL = 0,
   LSTMKernelType_BASIC = 1,
@@ -2114,17 +2503,19 @@ inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
 
 inline const char *const *EnumNamesLSTMKernelType()
 {
-  static const char *const names[] = {"FULL", "BASIC", nullptr};
+  static const char *const names[3] = {"FULL", "BASIC", nullptr};
   return names;
 }
 
 inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesLSTMKernelType()[index];
 }
 
-enum CombinerType
+enum CombinerType : int8_t
 {
   CombinerType_SUM = 0,
   CombinerType_MEAN = 1,
@@ -2141,17 +2532,19 @@ inline const CombinerType (&EnumValuesCombinerType())[3]
 
 inline const char *const *EnumNamesCombinerType()
 {
-  static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr};
+  static const char *const names[4] = {"SUM", "MEAN", "SQRTN", nullptr};
   return names;
 }
 
 inline const char *EnumNameCombinerType(CombinerType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesCombinerType()[index];
 }
 
-enum MirrorPadMode
+enum MirrorPadMode : int8_t
 {
   MirrorPadMode_REFLECT = 0,
   MirrorPadMode_SYMMETRIC = 1,
@@ -2167,17 +2560,19 @@ inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
 
 inline const char *const *EnumNamesMirrorPadMode()
 {
-  static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr};
+  static const char *const names[3] = {"REFLECT", "SYMMETRIC", nullptr};
   return names;
 }
 
 inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesMirrorPadMode()[index];
 }
 
-enum CustomOptionsFormat
+enum CustomOptionsFormat : int8_t
 {
   CustomOptionsFormat_FLEXBUFFERS = 0,
   CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
@@ -2192,17 +2587,19 @@ inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
 
 inline const char *const *EnumNamesCustomOptionsFormat()
 {
-  static const char *const names[] = {"FLEXBUFFERS", nullptr};
+  static const char *const names[2] = {"FLEXBUFFERS", nullptr};
   return names;
 }
 
 inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesCustomOptionsFormat()[index];
 }
 
-enum DataFormat
+enum DataFormat : int8_t
 {
   DataFormat_CHANNELS_LAST = 0,
   DataFormat_CHANNELS_FIRST = 1,
@@ -2218,19 +2615,22 @@ inline const DataFormat (&EnumValuesDataFormat())[2]
 
 inline const char *const *EnumNamesDataFormat()
 {
-  static const char *const names[] = {"CHANNELS_LAST", "CHANNELS_FIRST", nullptr};
+  static const char *const names[3] = {"CHANNELS_LAST", "CHANNELS_FIRST", nullptr};
   return names;
 }
 
 inline const char *EnumNameDataFormat(DataFormat e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, DataFormat_CHANNELS_LAST, DataFormat_CHANNELS_FIRST))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesDataFormat()[index];
 }
 
 struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef CustomQuantizationBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_CUSTOM = 4
   };
@@ -2247,6 +2647,7 @@ struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct CustomQuantizationBuilder
 {
+  typedef CustomQuantization Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
@@ -2257,7 +2658,6 @@ struct CustomQuantizationBuilder
   {
     start_ = fbb_.StartTable();
   }
-  CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
   flatbuffers::Offset<CustomQuantization> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2279,12 +2679,18 @@ inline flatbuffers::Offset<CustomQuantization>
 CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
                                const std::vector<uint8_t> *custom = nullptr)
 {
-  return circle::CreateCustomQuantization(_fbb, custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
+  if (custom)
+  {
+    _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16);
+  }
+  auto custom__ = custom ? _fbb.CreateVector<uint8_t>(*custom) : 0;
+  return circle::CreateCustomQuantization(_fbb, custom__);
 }
 
 struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef QuantizationParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_MIN = 4,
     VT_MAX = 6,
@@ -2310,17 +2716,17 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   {
     return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
   }
-  QuantizationDetails details_type() const
+  circle::QuantizationDetails details_type() const
   {
-    return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
+    return static_cast<circle::QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
   }
   const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
   template <typename T> const T *details_as() const;
-  const CustomQuantization *details_as_CustomQuantization() const
+  const circle::CustomQuantization *details_as_CustomQuantization() const
   {
-    return details_type() == QuantizationDetails_CustomQuantization
-               ? static_cast<const CustomQuantization *>(details())
-               : nullptr;
+    return details_type() == circle::QuantizationDetails_CustomQuantization
+             ? static_cast<const circle::CustomQuantization *>(details())
+             : nullptr;
   }
   int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const
@@ -2337,13 +2743,15 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
 };
 
 template <>
-inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const
+inline const circle::CustomQuantization *
+QuantizationParameters::details_as<circle::CustomQuantization>() const
 {
   return details_as_CustomQuantization();
 }
 
 struct QuantizationParametersBuilder
 {
+  typedef QuantizationParameters Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
@@ -2362,7 +2770,7 @@ struct QuantizationParametersBuilder
   {
     fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
   }
-  void add_details_type(QuantizationDetails details_type)
+  void add_details_type(circle::QuantizationDetails details_type)
   {
     fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
                              static_cast<uint8_t>(details_type), 0);
@@ -2380,7 +2788,6 @@ struct QuantizationParametersBuilder
   {
     start_ = fbb_.StartTable();
   }
-  QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
   flatbuffers::Offset<QuantizationParameters> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2389,14 +2796,13 @@ struct QuantizationParametersBuilder
   }
 };
 
-inline flatbuffers::Offset<QuantizationParameters>
-CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
-                             QuantizationDetails details_type = QuantizationDetails_NONE,
-                             flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+  circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
 {
   QuantizationParametersBuilder builder_(_fbb);
   builder_.add_quantized_dimension(quantized_dimension);
@@ -2410,22 +2816,24 @@ CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
 }
 
 inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
-    const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
-    const std::vector<int64_t> *zero_point = nullptr,
-    QuantizationDetails details_type = QuantizationDetails_NONE,
-    flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+  const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+  const std::vector<int64_t> *zero_point = nullptr,
+  circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
 {
-  return circle::CreateQuantizationParameters(
-      _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
-      scale ? _fbb.CreateVector<float>(*scale) : 0,
-      zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
-      quantized_dimension);
+  auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
+  auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
+  auto scale__ = scale ? _fbb.CreateVector<float>(*scale) : 0;
+  auto zero_point__ = zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0;
+  return circle::CreateQuantizationParameters(_fbb, min__, max__, scale__, zero_point__,
+                                              details_type, details, quantized_dimension);
 }
 
 struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Int32VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALUES = 4
   };
@@ -2442,6 +2850,7 @@ struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Int32VectorBuilder
 {
+  typedef Int32Vector Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values)
@@ -2452,7 +2861,6 @@ struct Int32VectorBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Int32VectorBuilder &operator=(const Int32VectorBuilder &);
   flatbuffers::Offset<Int32Vector> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2474,12 +2882,14 @@ inline flatbuffers::Offset<Int32Vector>
 CreateInt32VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                         const std::vector<int32_t> *values = nullptr)
 {
-  return circle::CreateInt32Vector(_fbb, values ? _fbb.CreateVector<int32_t>(*values) : 0);
+  auto values__ = values ? _fbb.CreateVector<int32_t>(*values) : 0;
+  return circle::CreateInt32Vector(_fbb, values__);
 }
 
 struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Uint16VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALUES = 4
   };
@@ -2496,6 +2906,7 @@ struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Uint16VectorBuilder
 {
+  typedef Uint16Vector Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values)
@@ -2506,7 +2917,6 @@ struct Uint16VectorBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
   flatbuffers::Offset<Uint16Vector> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2528,12 +2938,18 @@ inline flatbuffers::Offset<Uint16Vector>
 CreateUint16VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                          const std::vector<uint16_t> *values = nullptr)
 {
-  return circle::CreateUint16Vector(_fbb, values ? _fbb.CreateVector<uint16_t>(*values) : 0);
+  if (values)
+  {
+    _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4);
+  }
+  auto values__ = values ? _fbb.CreateVector<uint16_t>(*values) : 0;
+  return circle::CreateUint16Vector(_fbb, values__);
 }
 
 struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Uint8VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALUES = 4
   };
@@ -2550,6 +2966,7 @@ struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Uint8VectorBuilder
 {
+  typedef Uint8Vector Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values)
@@ -2560,7 +2977,6 @@ struct Uint8VectorBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
   flatbuffers::Offset<Uint8Vector> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2582,12 +2998,18 @@ inline flatbuffers::Offset<Uint8Vector>
 CreateUint8VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                         const std::vector<uint8_t> *values = nullptr)
 {
-  return circle::CreateUint8Vector(_fbb, values ? _fbb.CreateVector<uint8_t>(*values) : 0);
+  if (values)
+  {
+    _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4);
+  }
+  auto values__ = values ? _fbb.CreateVector<uint8_t>(*values) : 0;
+  return circle::CreateUint8Vector(_fbb, values__);
 }
 
 struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef DimensionMetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FORMAT = 4,
     VT_DENSE_SIZE = 6,
@@ -2596,58 +3018,58 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_ARRAY_INDICES_TYPE = 12,
     VT_ARRAY_INDICES = 14
   };
-  DimensionType format() const
+  circle::DimensionType format() const
   {
-    return static_cast<DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+    return static_cast<circle::DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
   }
   int32_t dense_size() const { return GetField<int32_t>(VT_DENSE_SIZE, 0); }
-  SparseIndexVector array_segments_type() const
+  circle::SparseIndexVector array_segments_type() const
   {
-    return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
+    return static_cast<circle::SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
   }
   const void *array_segments() const { return GetPointer<const void *>(VT_ARRAY_SEGMENTS); }
   template <typename T> const T *array_segments_as() const;
-  const Int32Vector *array_segments_as_Int32Vector() const
+  const circle::Int32Vector *array_segments_as_Int32Vector() const
   {
-    return array_segments_type() == SparseIndexVector_Int32Vector
-               ? static_cast<const Int32Vector *>(array_segments())
-               : nullptr;
+    return array_segments_type() == circle::SparseIndexVector_Int32Vector
+             ? static_cast<const circle::Int32Vector *>(array_segments())
+             : nullptr;
   }
-  const Uint16Vector *array_segments_as_Uint16Vector() const
+  const circle::Uint16Vector *array_segments_as_Uint16Vector() const
   {
-    return array_segments_type() == SparseIndexVector_Uint16Vector
-               ? static_cast<const Uint16Vector *>(array_segments())
-               : nullptr;
+    return array_segments_type() == circle::SparseIndexVector_Uint16Vector
+             ? static_cast<const circle::Uint16Vector *>(array_segments())
+             : nullptr;
   }
-  const Uint8Vector *array_segments_as_Uint8Vector() const
+  const circle::Uint8Vector *array_segments_as_Uint8Vector() const
   {
-    return array_segments_type() == SparseIndexVector_Uint8Vector
-               ? static_cast<const Uint8Vector *>(array_segments())
-               : nullptr;
+    return array_segments_type() == circle::SparseIndexVector_Uint8Vector
+             ? static_cast<const circle::Uint8Vector *>(array_segments())
+             : nullptr;
   }
-  SparseIndexVector array_indices_type() const
+  circle::SparseIndexVector array_indices_type() const
   {
-    return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
+    return static_cast<circle::SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
   }
   const void *array_indices() const { return GetPointer<const void *>(VT_ARRAY_INDICES); }
   template <typename T> const T *array_indices_as() const;
-  const Int32Vector *array_indices_as_Int32Vector() const
+  const circle::Int32Vector *array_indices_as_Int32Vector() const
   {
-    return array_indices_type() == SparseIndexVector_Int32Vector
-               ? static_cast<const Int32Vector *>(array_indices())
-               : nullptr;
+    return array_indices_type() == circle::SparseIndexVector_Int32Vector
+             ? static_cast<const circle::Int32Vector *>(array_indices())
+             : nullptr;
   }
-  const Uint16Vector *array_indices_as_Uint16Vector() const
+  const circle::Uint16Vector *array_indices_as_Uint16Vector() const
   {
-    return array_indices_type() == SparseIndexVector_Uint16Vector
-               ? static_cast<const Uint16Vector *>(array_indices())
-               : nullptr;
+    return array_indices_type() == circle::SparseIndexVector_Uint16Vector
+             ? static_cast<const circle::Uint16Vector *>(array_indices())
+             : nullptr;
   }
-  const Uint8Vector *array_indices_as_Uint8Vector() const
+  const circle::Uint8Vector *array_indices_as_Uint8Vector() const
   {
-    return array_indices_type() == SparseIndexVector_Uint8Vector
-               ? static_cast<const Uint8Vector *>(array_indices())
-               : nullptr;
+    return array_indices_type() == circle::SparseIndexVector_Uint8Vector
+             ? static_cast<const circle::Uint8Vector *>(array_indices())
+             : nullptr;
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2663,41 +3085,49 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   }
 };
 
-template <> inline const Int32Vector *DimensionMetadata::array_segments_as<Int32Vector>() const
+template <>
+inline const circle::Int32Vector *DimensionMetadata::array_segments_as<circle::Int32Vector>() const
 {
   return array_segments_as_Int32Vector();
 }
 
-template <> inline const Uint16Vector *DimensionMetadata::array_segments_as<Uint16Vector>() const
+template <>
+inline const circle::Uint16Vector *
+DimensionMetadata::array_segments_as<circle::Uint16Vector>() const
 {
   return array_segments_as_Uint16Vector();
 }
 
-template <> inline const Uint8Vector *DimensionMetadata::array_segments_as<Uint8Vector>() const
+template <>
+inline const circle::Uint8Vector *DimensionMetadata::array_segments_as<circle::Uint8Vector>() const
 {
   return array_segments_as_Uint8Vector();
 }
 
-template <> inline const Int32Vector *DimensionMetadata::array_indices_as<Int32Vector>() const
+template <>
+inline const circle::Int32Vector *DimensionMetadata::array_indices_as<circle::Int32Vector>() const
 {
   return array_indices_as_Int32Vector();
 }
 
-template <> inline const Uint16Vector *DimensionMetadata::array_indices_as<Uint16Vector>() const
+template <>
+inline const circle::Uint16Vector *DimensionMetadata::array_indices_as<circle::Uint16Vector>() const
 {
   return array_indices_as_Uint16Vector();
 }
 
-template <> inline const Uint8Vector *DimensionMetadata::array_indices_as<Uint8Vector>() const
+template <>
+inline const circle::Uint8Vector *DimensionMetadata::array_indices_as<circle::Uint8Vector>() const
 {
   return array_indices_as_Uint8Vector();
 }
 
 struct DimensionMetadataBuilder
 {
+  typedef DimensionMetadata Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_format(DimensionType format)
+  void add_format(circle::DimensionType format)
   {
     fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
   }
@@ -2705,7 +3135,7 @@ struct DimensionMetadataBuilder
   {
     fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
   }
-  void add_array_segments_type(SparseIndexVector array_segments_type)
+  void add_array_segments_type(circle::SparseIndexVector array_segments_type)
   {
     fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
                              static_cast<uint8_t>(array_segments_type), 0);
@@ -2714,7 +3144,7 @@ struct DimensionMetadataBuilder
   {
     fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
   }
-  void add_array_indices_type(SparseIndexVector array_indices_type)
+  void add_array_indices_type(circle::SparseIndexVector array_indices_type)
   {
     fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE,
                              static_cast<uint8_t>(array_indices_type), 0);
@@ -2727,7 +3157,6 @@ struct DimensionMetadataBuilder
   {
     start_ = fbb_.StartTable();
   }
-  DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
   flatbuffers::Offset<DimensionMetadata> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2736,13 +3165,13 @@ struct DimensionMetadataBuilder
   }
 };
 
-inline flatbuffers::Offset<DimensionMetadata>
-CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
-                        DimensionType format = DimensionType_DENSE, int32_t dense_size = 0,
-                        SparseIndexVector array_segments_type = SparseIndexVector_NONE,
-                        flatbuffers::Offset<void> array_segments = 0,
-                        SparseIndexVector array_indices_type = SparseIndexVector_NONE,
-                        flatbuffers::Offset<void> array_indices = 0)
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::DimensionType format = circle::DimensionType_DENSE,
+  int32_t dense_size = 0,
+  circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_segments = 0,
+  circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_indices = 0)
 {
   DimensionMetadataBuilder builder_(_fbb);
   builder_.add_array_indices(array_indices);
@@ -2756,7 +3185,8 @@ CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SparsityParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TRAVERSAL_ORDER = 4,
     VT_BLOCK_MAP = 6,
@@ -2770,10 +3200,10 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
+  const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
-        VT_DIM_METADATA);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *>(
+      VT_DIM_METADATA);
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2787,6 +3217,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SparsityParametersBuilder
 {
+  typedef SparsityParameters Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order)
@@ -2798,7 +3229,8 @@ struct SparsityParametersBuilder
     fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
   }
   void add_dim_metadata(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+      dim_metadata)
   {
     fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
   }
@@ -2806,7 +3238,6 @@ struct SparsityParametersBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
   flatbuffers::Offset<SparsityParameters> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2816,11 +3247,11 @@ struct SparsityParametersBuilder
 };
 
 inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata =
-        0)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+    dim_metadata = 0)
 {
   SparsityParametersBuilder builder_(_fbb);
   builder_.add_dim_metadata(dim_metadata);
@@ -2830,19 +3261,97 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
 }
 
 inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
-    const std::vector<int32_t> *block_map = nullptr,
-    const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+  const std::vector<int32_t> *block_map = nullptr,
+  const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr)
+{
+  auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
+  auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
+  auto dim_metadata__ =
+    dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata)
+                 : 0;
+  return circle::CreateSparsityParameters(_fbb, traversal_order__, block_map__, dim_metadata__);
+}
+
+struct VariantSubType FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef VariantSubTypeBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SHAPE = 4,
+    VT_TYPE = 6,
+    VT_HAS_RANK = 8
+  };
+  const flatbuffers::Vector<int32_t> *shape() const
+  {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
+  }
+  circle::TensorType type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_TYPE, 0));
+  }
+  bool has_rank() const { return GetField<uint8_t>(VT_HAS_RANK, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
+           verifier.VerifyVector(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) &&
+           VerifyField<uint8_t>(verifier, VT_HAS_RANK) && verifier.EndTable();
+  }
+};
+
+struct VariantSubTypeBuilder
+{
+  typedef VariantSubType Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
+  {
+    fbb_.AddOffset(VariantSubType::VT_SHAPE, shape);
+  }
+  void add_type(circle::TensorType type)
+  {
+    fbb_.AddElement<int8_t>(VariantSubType::VT_TYPE, static_cast<int8_t>(type), 0);
+  }
+  void add_has_rank(bool has_rank)
+  {
+    fbb_.AddElement<uint8_t>(VariantSubType::VT_HAS_RANK, static_cast<uint8_t>(has_rank), 0);
+  }
+  explicit VariantSubTypeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<VariantSubType> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<VariantSubType>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<VariantSubType>
+CreateVariantSubType(flatbuffers::FlatBufferBuilder &_fbb,
+                     flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+                     circle::TensorType type = circle::TensorType_FLOAT32, bool has_rank = false)
 {
-  return circle::CreateSparsityParameters(
-      _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
-      block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
-      dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+  VariantSubTypeBuilder builder_(_fbb);
+  builder_.add_shape(shape);
+  builder_.add_has_rank(has_rank);
+  builder_.add_type(type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VariantSubType> CreateVariantSubTypeDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+  circle::TensorType type = circle::TensorType_FLOAT32, bool has_rank = false)
+{
+  auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
+  return circle::CreateVariantSubType(_fbb, shape__, type, has_rank);
 }
 
 struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef TensorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_SHAPE = 4,
     VT_TYPE = 6,
@@ -2851,31 +3360,42 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_QUANTIZATION = 12,
     VT_IS_VARIABLE = 14,
     VT_SPARSITY = 16,
-    VT_SHAPE_SIGNATURE = 18
+    VT_SHAPE_SIGNATURE = 18,
+    VT_HAS_RANK = 20,
+    VT_VARIANT_TENSORS = 22
   };
   const flatbuffers::Vector<int32_t> *shape() const
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
   }
-  TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); }
+  circle::TensorType type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_TYPE, 0));
+  }
   uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
   const flatbuffers::String *name() const
   {
     return GetPointer<const flatbuffers::String *>(VT_NAME);
   }
-  const QuantizationParameters *quantization() const
+  const circle::QuantizationParameters *quantization() const
   {
-    return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
+    return GetPointer<const circle::QuantizationParameters *>(VT_QUANTIZATION);
   }
   bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
-  const SparsityParameters *sparsity() const
+  const circle::SparsityParameters *sparsity() const
   {
-    return GetPointer<const SparsityParameters *>(VT_SPARSITY);
+    return GetPointer<const circle::SparsityParameters *>(VT_SPARSITY);
   }
   const flatbuffers::Vector<int32_t> *shape_signature() const
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE_SIGNATURE);
   }
+  bool has_rank() const { return GetField<uint8_t>(VT_HAS_RANK, 0) != 0; }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::VariantSubType>> *variant_tensors() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::VariantSubType>> *>(
+      VT_VARIANT_TENSORS);
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
@@ -2885,19 +3405,22 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
            verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
            VerifyOffset(verifier, VT_SPARSITY) && verifier.VerifyTable(sparsity()) &&
            VerifyOffset(verifier, VT_SHAPE_SIGNATURE) && verifier.VerifyVector(shape_signature()) &&
-           verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_HAS_RANK) &&
+           VerifyOffset(verifier, VT_VARIANT_TENSORS) && verifier.VerifyVector(variant_tensors()) &&
+           verifier.VerifyVectorOfTables(variant_tensors()) && verifier.EndTable();
   }
 };
 
 struct TensorBuilder
 {
+  typedef Tensor Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
   {
     fbb_.AddOffset(Tensor::VT_SHAPE, shape);
   }
-  void add_type(TensorType type)
+  void add_type(circle::TensorType type)
   {
     fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
   }
@@ -2906,7 +3429,7 @@ struct TensorBuilder
   {
     fbb_.AddOffset(Tensor::VT_NAME, name);
   }
-  void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization)
+  void add_quantization(flatbuffers::Offset<circle::QuantizationParameters> quantization)
   {
     fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
   }
@@ -2914,7 +3437,7 @@ struct TensorBuilder
   {
     fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
   }
-  void add_sparsity(flatbuffers::Offset<SparsityParameters> sparsity)
+  void add_sparsity(flatbuffers::Offset<circle::SparsityParameters> sparsity)
   {
     fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
   }
@@ -2922,11 +3445,20 @@ struct TensorBuilder
   {
     fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature);
   }
+  void add_has_rank(bool has_rank)
+  {
+    fbb_.AddElement<uint8_t>(Tensor::VT_HAS_RANK, static_cast<uint8_t>(has_rank), 0);
+  }
+  void add_variant_tensors(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::VariantSubType>>>
+      variant_tensors)
+  {
+    fbb_.AddOffset(Tensor::VT_VARIANT_TENSORS, variant_tensors);
+  }
   explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TensorBuilder &operator=(const TensorBuilder &);
   flatbuffers::Offset<Tensor> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2935,43 +3467,53 @@ struct TensorBuilder
   }
 };
 
-inline flatbuffers::Offset<Tensor>
-CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
-             flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
-             TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
-             flatbuffers::Offset<flatbuffers::String> name = 0,
-             flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
-             flatbuffers::Offset<SparsityParameters> sparsity = 0,
-             flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
+inline flatbuffers::Offset<Tensor> CreateTensor(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+  circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
+  flatbuffers::Offset<flatbuffers::String> name = 0,
+  flatbuffers::Offset<circle::QuantizationParameters> quantization = 0, bool is_variable = false,
+  flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0, bool has_rank = false,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::VariantSubType>>>
+    variant_tensors = 0)
 {
   TensorBuilder builder_(_fbb);
+  builder_.add_variant_tensors(variant_tensors);
   builder_.add_shape_signature(shape_signature);
   builder_.add_sparsity(sparsity);
   builder_.add_quantization(quantization);
   builder_.add_name(name);
   builder_.add_buffer(buffer);
   builder_.add_shape(shape);
+  builder_.add_has_rank(has_rank);
   builder_.add_is_variable(is_variable);
   builder_.add_type(type);
   return builder_.Finish();
 }
 
 inline flatbuffers::Offset<Tensor> CreateTensorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
-    TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
-    flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
-    flatbuffers::Offset<SparsityParameters> sparsity = 0,
-    const std::vector<int32_t> *shape_signature = nullptr)
-{
-  return circle::CreateTensor(_fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
-                              name ? _fbb.CreateString(name) : 0, quantization, is_variable,
-                              sparsity,
-                              shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+  circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
+  const char *name = nullptr, flatbuffers::Offset<circle::QuantizationParameters> quantization = 0,
+  bool is_variable = false, flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
+  const std::vector<int32_t> *shape_signature = nullptr, bool has_rank = false,
+  const std::vector<flatbuffers::Offset<circle::VariantSubType>> *variant_tensors = nullptr)
+{
+  auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto shape_signature__ = shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0;
+  auto variant_tensors__ =
+    variant_tensors
+      ? _fbb.CreateVector<flatbuffers::Offset<circle::VariantSubType>>(*variant_tensors)
+      : 0;
+  return circle::CreateTensor(_fbb, shape__, type, buffer, name__, quantization, is_variable,
+                              sparsity, shape_signature__, has_rank, variant_tensors__);
 }
 
 struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Conv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_PADDING = 4,
     VT_STRIDE_W = 6,
@@ -2980,12 +3522,16 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_DILATION_W_FACTOR = 12,
     VT_DILATION_H_FACTOR = 14
   };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
   int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
   int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
   int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -3002,9 +3548,10 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Conv2DOptionsBuilder
 {
+  typedef Conv2DOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(circle::Padding padding)
   {
     fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
   }
@@ -3016,7 +3563,7 @@ struct Conv2DOptionsBuilder
   {
     fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3033,7 +3580,6 @@ struct Conv2DOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
   flatbuffers::Offset<Conv2DOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3042,11 +3588,11 @@ struct Conv2DOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<Conv2DOptions>
-CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
-                    int32_t stride_w = 0, int32_t stride_h = 0,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-                    int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
 {
   Conv2DOptionsBuilder builder_(_fbb);
   builder_.add_dilation_h_factor(dilation_h_factor);
@@ -3058,9 +3604,120 @@ CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd
   return builder_.Finish();
 }
 
+struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Conv3DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_D = 6,
+    VT_STRIDE_W = 8,
+    VT_STRIDE_H = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_D_FACTOR = 14,
+    VT_DILATION_W_FACTOR = 16,
+    VT_DILATION_H_FACTOR = 18
+  };
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); }
+  int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+  int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_D) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+  }
+};
+
+struct Conv3DOptionsBuilder
+{
+  typedef Conv3DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(circle::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_d(int32_t stride_d)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_d_factor(int32_t dilation_d_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Conv3DOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Conv3DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+  Conv3DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_dilation_d_factor(dilation_d_factor);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_stride_d(stride_d);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
 struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Pool2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_PADDING = 4,
     VT_STRIDE_W = 6,
@@ -3069,14 +3726,18 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_FILTER_HEIGHT = 12,
     VT_FUSED_ACTIVATION_FUNCTION = 14
   };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
   int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
   int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
   int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
   int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3091,9 +3752,10 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Pool2DOptionsBuilder
 {
+  typedef Pool2DOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(circle::Padding padding)
   {
     fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
   }
@@ -3113,7 +3775,7 @@ struct Pool2DOptionsBuilder
   {
     fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3122,7 +3784,6 @@ struct Pool2DOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
   flatbuffers::Offset<Pool2DOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3131,11 +3792,10 @@ struct Pool2DOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<Pool2DOptions>
-CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
-                    int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
-                    int32_t filter_height = 0,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   Pool2DOptionsBuilder builder_(_fbb);
   builder_.add_filter_height(filter_height);
@@ -3149,7 +3809,8 @@ CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd
 
 struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef DepthwiseConv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_PADDING = 4,
     VT_STRIDE_W = 6,
@@ -3159,13 +3820,17 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
     VT_DILATION_W_FACTOR = 14,
     VT_DILATION_H_FACTOR = 16
   };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
   int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
   int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
   int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
   int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -3183,9 +3848,10 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
 
 struct DepthwiseConv2DOptionsBuilder
 {
+  typedef DepthwiseConv2DOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(circle::Padding padding)
   {
     fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
   }
@@ -3201,7 +3867,7 @@ struct DepthwiseConv2DOptionsBuilder
   {
     fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3218,7 +3884,6 @@ struct DepthwiseConv2DOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
   flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3228,10 +3893,10 @@ struct DepthwiseConv2DOptionsBuilder
 };
 
 inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
-    int32_t stride_h = 0, int32_t depth_multiplier = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
 {
   DepthwiseConv2DOptionsBuilder builder_(_fbb);
   builder_.add_dilation_h_factor(dilation_h_factor);
@@ -3246,7 +3911,8 @@ inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
 
 struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ConcatEmbeddingsOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NUM_CHANNELS = 4,
     VT_NUM_COLUMNS_PER_CHANNEL = 6,
@@ -3273,6 +3939,7 @@ struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Ta
 
 struct ConcatEmbeddingsOptionsBuilder
 {
+  typedef ConcatEmbeddingsOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num_channels(int32_t num_channels)
@@ -3280,12 +3947,12 @@ struct ConcatEmbeddingsOptionsBuilder
     fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
   }
   void add_num_columns_per_channel(
-      flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
   {
     fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
   }
   void add_embedding_dim_per_channel(
-      flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
   {
     fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
                    embedding_dim_per_channel);
@@ -3294,7 +3961,6 @@ struct ConcatEmbeddingsOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
   flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3304,9 +3970,9 @@ struct ConcatEmbeddingsOptionsBuilder
 };
 
 inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
 {
   ConcatEmbeddingsOptionsBuilder builder_(_fbb);
   builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
@@ -3320,21 +3986,24 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_
                                     const std::vector<int32_t> *num_columns_per_channel = nullptr,
                                     const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
 {
-  return circle::CreateConcatEmbeddingsOptions(
-      _fbb, num_channels,
-      num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
-      embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+  auto num_columns_per_channel__ =
+    num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+  auto embedding_dim_per_channel__ =
+    embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+  return circle::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__,
+                                               embedding_dim_per_channel__);
 }
 
 struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef LSHProjectionOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TYPE = 4
   };
-  LSHProjectionType type() const
+  circle::LSHProjectionType type() const
   {
-    return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+    return static_cast<circle::LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3345,9 +4014,10 @@ struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LSHProjectionOptionsBuilder
 {
+  typedef LSHProjectionOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_type(LSHProjectionType type)
+  void add_type(circle::LSHProjectionType type)
   {
     fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
   }
@@ -3355,7 +4025,6 @@ struct LSHProjectionOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
   flatbuffers::Offset<LSHProjectionOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3366,7 +4035,7 @@ struct LSHProjectionOptionsBuilder
 
 inline flatbuffers::Offset<LSHProjectionOptions>
 CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                           LSHProjectionType type = LSHProjectionType_UNKNOWN)
+                           circle::LSHProjectionType type = circle::LSHProjectionType_UNKNOWN)
 {
   LSHProjectionOptionsBuilder builder_(_fbb);
   builder_.add_type(type);
@@ -3375,16 +4044,18 @@ CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SVDFOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_RANK = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
   };
   int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3400,10 +4071,11 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SVDFOptionsBuilder
 {
+  typedef SVDFOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3417,7 +4089,6 @@ struct SVDFOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
   flatbuffers::Offset<SVDFOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3426,10 +4097,10 @@ struct SVDFOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<SVDFOptions>
-CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
-                  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-                  bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
 {
   SVDFOptionsBuilder builder_(_fbb);
   builder_.add_rank(rank);
@@ -3440,14 +4111,16 @@ CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
 
 struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef RNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3463,9 +4136,10 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct RNNOptionsBuilder
 {
+  typedef RNNOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3479,7 +4153,6 @@ struct RNNOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
   flatbuffers::Offset<RNNOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3488,10 +4161,10 @@ struct RNNOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<RNNOptions>
-CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-                 bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
 {
   RNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3501,16 +4174,18 @@ CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TIME_MAJOR = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
   };
   bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3526,6 +4201,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SequenceRNNOptionsBuilder
 {
+  typedef SequenceRNNOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_time_major(bool time_major)
@@ -3533,7 +4209,7 @@ struct SequenceRNNOptionsBuilder
     fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
                              0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3547,7 +4223,6 @@ struct SequenceRNNOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
   flatbuffers::Offset<SequenceRNNOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3557,9 +4232,9 @@ struct SequenceRNNOptionsBuilder
 };
 
 inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
 {
   SequenceRNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3570,7 +4245,8 @@ inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
 
 struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BidirectionalSequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TIME_MAJOR = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6,
@@ -3578,9 +4254,10 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
   };
   bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
   bool asymmetric_quantize_inputs() const
@@ -3598,6 +4275,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
 
 struct BidirectionalSequenceRNNOptionsBuilder
 {
+  typedef BidirectionalSequenceRNNOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_time_major(bool time_major)
@@ -3605,7 +4283,7 @@ struct BidirectionalSequenceRNNOptionsBuilder
     fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
                              static_cast<uint8_t>(time_major), 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3624,7 +4302,6 @@ struct BidirectionalSequenceRNNOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
   flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3634,9 +4311,9 @@ struct BidirectionalSequenceRNNOptionsBuilder
 };
 
 inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
 {
   BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3648,20 +4325,23 @@ inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalS
 
 struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef FullyConnectedOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_WEIGHTS_FORMAT = 6,
     VT_KEEP_NUM_DIMS = 8,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
-  FullyConnectedOptionsWeightsFormat weights_format() const
+  circle::FullyConnectedOptionsWeightsFormat weights_format() const
   {
-    return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+    return static_cast<circle::FullyConnectedOptionsWeightsFormat>(
+      GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
   }
   bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
   bool asymmetric_quantize_inputs() const
@@ -3680,14 +4360,15 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct FullyConnectedOptionsBuilder
 {
+  typedef FullyConnectedOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
   }
-  void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format)
+  void add_weights_format(circle::FullyConnectedOptionsWeightsFormat weights_format)
   {
     fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
                             static_cast<int8_t>(weights_format), 0);
@@ -3706,7 +4387,6 @@ struct FullyConnectedOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
   flatbuffers::Offset<FullyConnectedOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3716,10 +4396,11 @@ struct FullyConnectedOptionsBuilder
 };
 
 inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
-    bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  circle::FullyConnectedOptionsWeightsFormat weights_format =
+    circle::FullyConnectedOptionsWeightsFormat_DEFAULT,
+  bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
 {
   FullyConnectedOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3731,7 +4412,8 @@ inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
 
 struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SoftmaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_BETA = 4
   };
@@ -3745,6 +4427,7 @@ struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SoftmaxOptionsBuilder
 {
+  typedef SoftmaxOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
@@ -3752,7 +4435,6 @@ struct SoftmaxOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
   flatbuffers::Offset<SoftmaxOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3771,15 +4453,17 @@ CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
 
 struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ConcatenationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_AXIS = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6
   };
   int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3790,10 +4474,11 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ConcatenationOptionsBuilder
 {
+  typedef ConcatenationOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3802,7 +4487,6 @@ struct ConcatenationOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
   flatbuffers::Offset<ConcatenationOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3812,8 +4496,8 @@ struct ConcatenationOptionsBuilder
 };
 
 inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   ConcatenationOptionsBuilder builder_(_fbb);
   builder_.add_axis(axis);
@@ -3823,35 +4507,45 @@ inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
 
 struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef AddOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
   }
 };
 
 struct AddOptionsBuilder
 {
+  typedef AddOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
   }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
   explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  AddOptionsBuilder &operator=(const AddOptionsBuilder &);
   flatbuffers::Offset<AddOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3860,24 +4554,28 @@ struct AddOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<AddOptions>
-CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<AddOptions> CreateAddOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool pot_scale_int16 = true)
 {
   AddOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
 struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef MulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3888,9 +4586,10 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MulOptionsBuilder
 {
+  typedef MulOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3899,7 +4598,6 @@ struct MulOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  MulOptionsBuilder &operator=(const MulOptionsBuilder &);
   flatbuffers::Offset<MulOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3908,9 +4606,9 @@ struct MulOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<MulOptions>
-CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<MulOptions> CreateMulOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   MulOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -3919,13 +4617,15 @@ CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef L2NormOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3936,9 +4636,10 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct L2NormOptionsBuilder
 {
+  typedef L2NormOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3947,7 +4648,6 @@ struct L2NormOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
   flatbuffers::Offset<L2NormOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3956,9 +4656,9 @@ struct L2NormOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<L2NormOptions>
-CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   L2NormOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -3967,7 +4667,8 @@ CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef LocalResponseNormalizationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_RADIUS = 4,
     VT_BIAS = 6,
@@ -3988,6 +4689,7 @@ struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatb
 
 struct LocalResponseNormalizationOptionsBuilder
 {
+  typedef LocalResponseNormalizationOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_radius(int32_t radius)
@@ -4007,12 +4709,10 @@ struct LocalResponseNormalizationOptionsBuilder
     fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
   }
   explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LocalResponseNormalizationOptionsBuilder &
-  operator=(const LocalResponseNormalizationOptionsBuilder &);
   flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4035,7 +4735,8 @@ CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, in
 
 struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef LSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_CELL_CLIP = 6,
@@ -4043,15 +4744,16 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_KERNEL_TYPE = 10,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
-  LSTMKernelType kernel_type() const
+  circle::LSTMKernelType kernel_type() const
   {
-    return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
+    return static_cast<circle::LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -4070,9 +4772,10 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LSTMOptionsBuilder
 {
+  typedef LSTMOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -4085,7 +4788,7 @@ struct LSTMOptionsBuilder
   {
     fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
   }
-  void add_kernel_type(LSTMKernelType kernel_type)
+  void add_kernel_type(circle::LSTMKernelType kernel_type)
   {
     fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
   }
@@ -4098,7 +4801,6 @@ struct LSTMOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
   flatbuffers::Offset<LSTMOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4107,12 +4809,12 @@ struct LSTMOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<LSTMOptions>
-CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-                  float cell_clip = 0.0f, float proj_clip = 0.0f,
-                  LSTMKernelType kernel_type = LSTMKernelType_FULL,
-                  bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f,
+  circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL,
+  bool asymmetric_quantize_inputs = false)
 {
   LSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -4125,17 +4827,20 @@ CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef UnidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_CELL_CLIP = 6,
     VT_PROJ_CLIP = 8,
     VT_TIME_MAJOR = 10,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 12,
+    VT_DIAGONAL_RECURRENT_TENSORS = 14
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -4144,6 +4849,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
   {
     return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
   }
+  bool diagonal_recurrent_tensors() const
+  {
+    return GetField<uint8_t>(VT_DIAGONAL_RECURRENT_TENSORS, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) &&
@@ -4151,15 +4860,17 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
            VerifyField<float>(verifier, VT_CELL_CLIP) &&
            VerifyField<float>(verifier, VT_PROJ_CLIP) &&
            VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
+           VerifyField<uint8_t>(verifier, VT_DIAGONAL_RECURRENT_TENSORS) && verifier.EndTable();
   }
 };
 
 struct UnidirectionalSequenceLSTMOptionsBuilder
 {
+  typedef UnidirectionalSequenceLSTMOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -4182,13 +4893,16 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
     fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
                              static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
+  void add_diagonal_recurrent_tensors(bool diagonal_recurrent_tensors)
+  {
+    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_DIAGONAL_RECURRENT_TENSORS,
+                             static_cast<uint8_t>(diagonal_recurrent_tensors), 0);
+  }
   explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  UnidirectionalSequenceLSTMOptionsBuilder &
-  operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
   flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4199,14 +4913,15 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
 
 inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
 CreateUnidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+  bool asymmetric_quantize_inputs = false, bool diagonal_recurrent_tensors = false)
 {
   UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
   builder_.add_cell_clip(cell_clip);
+  builder_.add_diagonal_recurrent_tensors(diagonal_recurrent_tensors);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
   builder_.add_time_major(time_major);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -4215,7 +4930,8 @@ CreateUnidirectionalSequenceLSTMOptions(
 
 struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_CELL_CLIP = 6,
@@ -4224,9 +4940,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
     VT_TIME_MAJOR = 12,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -4250,9 +4967,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
 
 struct BidirectionalSequenceLSTMOptionsBuilder
 {
+  typedef BidirectionalSequenceLSTMOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -4281,12 +4999,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder
                              static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
   explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  BidirectionalSequenceLSTMOptionsBuilder &
-  operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
   flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4296,10 +5012,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder
 };
 
 inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
-    bool time_major = true, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+  bool time_major = true, bool asymmetric_quantize_inputs = false)
 {
   BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -4313,7 +5029,8 @@ inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectional
 
 struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ResizeBilinearOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_ALIGN_CORNERS = 8,
     VT_HALF_PIXEL_CENTERS = 10
@@ -4329,6 +5046,7 @@ struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct ResizeBilinearOptionsBuilder
 {
+  typedef ResizeBilinearOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_align_corners(bool align_corners)
@@ -4345,7 +5063,6 @@ struct ResizeBilinearOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
   flatbuffers::Offset<ResizeBilinearOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4366,20 +5083,24 @@ CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_cor
 
 struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ResizeNearestNeighborOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_ALIGN_CORNERS = 4
+    VT_ALIGN_CORNERS = 4,
+    VT_HALF_PIXEL_CENTERS = 6
   };
   bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+  bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
-           verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
   }
 };
 
 struct ResizeNearestNeighborOptionsBuilder
 {
+  typedef ResizeNearestNeighborOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_align_corners(bool align_corners)
@@ -4387,11 +5108,15 @@ struct ResizeNearestNeighborOptionsBuilder
     fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
                              static_cast<uint8_t>(align_corners), 0);
   }
+  void add_half_pixel_centers(bool half_pixel_centers)
+  {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS,
+                             static_cast<uint8_t>(half_pixel_centers), 0);
+  }
   explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
   flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4401,16 +5126,19 @@ struct ResizeNearestNeighborOptionsBuilder
 };
 
 inline flatbuffers::Offset<ResizeNearestNeighborOptions>
-CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+                                   bool half_pixel_centers = false)
 {
   ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
   builder_.add_align_corners(align_corners);
   return builder_.Finish();
 }
 
 struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef CallOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_SUBGRAPH = 4
   };
@@ -4424,6 +5152,7 @@ struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct CallOptionsBuilder
 {
+  typedef CallOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_subgraph(uint32_t subgraph)
@@ -4434,7 +5163,6 @@ struct CallOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  CallOptionsBuilder &operator=(const CallOptionsBuilder &);
   flatbuffers::Offset<CallOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4453,6 +5181,7 @@ inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBuffe
 
 struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef PadOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4461,13 +5190,13 @@ struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct PadOptionsBuilder
 {
+  typedef PadOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  PadOptionsBuilder &operator=(const PadOptionsBuilder &);
   flatbuffers::Offset<PadOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4484,6 +5213,7 @@ inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferB
 
 struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef PadV2OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4492,13 +5222,13 @@ struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct PadV2OptionsBuilder
 {
+  typedef PadV2Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
   flatbuffers::Offset<PadV2Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4515,7 +5245,8 @@ inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBuf
 
 struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ReshapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NEW_SHAPE = 4
   };
@@ -4532,6 +5263,7 @@ struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ReshapeOptionsBuilder
 {
+  typedef ReshapeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
@@ -4542,7 +5274,6 @@ struct ReshapeOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
   flatbuffers::Offset<ReshapeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4564,11 +5295,13 @@ inline flatbuffers::Offset<ReshapeOptions>
 CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
                            const std::vector<int32_t> *new_shape = nullptr)
 {
-  return circle::CreateReshapeOptions(_fbb, new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
+  auto new_shape__ = new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0;
+  return circle::CreateReshapeOptions(_fbb, new_shape__);
 }
 
 struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SpaceToBatchNDOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4577,13 +5310,13 @@ struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct SpaceToBatchNDOptionsBuilder
 {
+  typedef SpaceToBatchNDOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
   flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4601,6 +5334,7 @@ CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef BatchToSpaceNDOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4609,13 +5343,13 @@ struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct BatchToSpaceNDOptionsBuilder
 {
+  typedef BatchToSpaceNDOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
   flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4633,7 +5367,8 @@ CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SkipGramOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NGRAM_SIZE = 4,
     VT_MAX_SKIP_SIZE = 6,
@@ -4652,6 +5387,7 @@ struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SkipGramOptionsBuilder
 {
+  typedef SkipGramOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_ngram_size(int32_t ngram_size)
@@ -4671,7 +5407,6 @@ struct SkipGramOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
   flatbuffers::Offset<SkipGramOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4693,7 +5428,8 @@ CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size =
 
 struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SpaceToDepthOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_BLOCK_SIZE = 4
   };
@@ -4707,6 +5443,7 @@ struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SpaceToDepthOptionsBuilder
 {
+  typedef SpaceToDepthOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_block_size(int32_t block_size)
@@ -4717,7 +5454,6 @@ struct SpaceToDepthOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
   flatbuffers::Offset<SpaceToDepthOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4736,7 +5472,8 @@ CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si
 
 struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef DepthToSpaceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_BLOCK_SIZE = 4
   };
@@ -4750,6 +5487,7 @@ struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct DepthToSpaceOptionsBuilder
 {
+  typedef DepthToSpaceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_block_size(int32_t block_size)
@@ -4760,7 +5498,6 @@ struct DepthToSpaceOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
   flatbuffers::Offset<DepthToSpaceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4779,35 +5516,45 @@ CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si
 
 struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SubOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
   }
 };
 
 struct SubOptionsBuilder
 {
+  typedef SubOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
   }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
   explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SubOptionsBuilder &operator=(const SubOptionsBuilder &);
   flatbuffers::Offset<SubOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4816,24 +5563,28 @@ struct SubOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<SubOptions>
-CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<SubOptions> CreateSubOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool pot_scale_int16 = true)
 {
   SubOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
 struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef DivOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4
   };
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -4844,9 +5595,10 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct DivOptionsBuilder
 {
+  typedef DivOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -4855,7 +5607,6 @@ struct DivOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  DivOptionsBuilder &operator=(const DivOptionsBuilder &);
   flatbuffers::Offset<DivOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4864,9 +5615,9 @@ struct DivOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<DivOptions>
-CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<DivOptions> CreateDivOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   DivOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -4875,6 +5626,7 @@ CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef TopKV2OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4883,13 +5635,13 @@ struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct TopKV2OptionsBuilder
 {
+  typedef TopKV2Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
   flatbuffers::Offset<TopKV2Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4906,13 +5658,14 @@ inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatB
 
 struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef EmbeddingLookupSparseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_COMBINER = 4
   };
-  CombinerType combiner() const
+  circle::CombinerType combiner() const
   {
-    return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+    return static_cast<circle::CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -4923,9 +5676,10 @@ struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffer
 
 struct EmbeddingLookupSparseOptionsBuilder
 {
+  typedef EmbeddingLookupSparseOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_combiner(CombinerType combiner)
+  void add_combiner(circle::CombinerType combiner)
   {
     fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
                             static_cast<int8_t>(combiner), 0);
@@ -4934,7 +5688,6 @@ struct EmbeddingLookupSparseOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
   flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4945,7 +5698,7 @@ struct EmbeddingLookupSparseOptionsBuilder
 
 inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
 CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                                   CombinerType combiner = CombinerType_SUM)
+                                   circle::CombinerType combiner = circle::CombinerType_SUM)
 {
   EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
   builder_.add_combiner(combiner);
@@ -4954,28 +5707,35 @@ CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef GatherOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_AXIS = 4
+    VT_AXIS = 4,
+    VT_BATCH_DIMS = 6
   };
   int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
-           verifier.EndTable();
+           VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable();
   }
 };
 
 struct GatherOptionsBuilder
 {
+  typedef GatherOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+  void add_batch_dims(int32_t batch_dims)
+  {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
+  }
   explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
   flatbuffers::Offset<GatherOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4984,16 +5744,18 @@ struct GatherOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                                                              int32_t axis = 0)
+inline flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0)
 {
   GatherOptionsBuilder builder_(_fbb);
+  builder_.add_batch_dims(batch_dims);
   builder_.add_axis(axis);
   return builder_.Finish();
 }
 
 struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef TransposeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5002,13 +5764,13 @@ struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct TransposeOptionsBuilder
 {
+  typedef TransposeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
   flatbuffers::Offset<TransposeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5026,6 +5788,7 @@ CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ExpOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5034,13 +5797,13 @@ struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ExpOptionsBuilder
 {
+  typedef ExpOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
   flatbuffers::Offset<ExpOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5057,6 +5820,7 @@ inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferB
 
 struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef CosOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5065,13 +5829,13 @@ struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct CosOptionsBuilder
 {
+  typedef CosOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  CosOptionsBuilder &operator=(const CosOptionsBuilder &);
   flatbuffers::Offset<CosOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5088,7 +5852,8 @@ inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferB
 
 struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ReducerOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_KEEP_DIMS = 4
   };
@@ -5102,6 +5867,7 @@ struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ReducerOptionsBuilder
 {
+  typedef ReducerOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_keep_dims(bool keep_dims)
@@ -5112,7 +5878,6 @@ struct ReducerOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
   flatbuffers::Offset<ReducerOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5131,7 +5896,8 @@ CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = fals
 
 struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SqueezeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_SQUEEZE_DIMS = 4
   };
@@ -5148,6 +5914,7 @@ struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SqueezeOptionsBuilder
 {
+  typedef SqueezeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
@@ -5158,7 +5925,6 @@ struct SqueezeOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
   flatbuffers::Offset<SqueezeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5180,13 +5946,14 @@ inline flatbuffers::Offset<SqueezeOptions>
 CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
                            const std::vector<int32_t> *squeeze_dims = nullptr)
 {
-  return circle::CreateSqueezeOptions(_fbb,
-                                      squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+  auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0;
+  return circle::CreateSqueezeOptions(_fbb, squeeze_dims__);
 }
 
 struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SplitOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NUM_SPLITS = 4
   };
@@ -5200,6 +5967,7 @@ struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SplitOptionsBuilder
 {
+  typedef SplitOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num_splits(int32_t num_splits)
@@ -5210,7 +5978,6 @@ struct SplitOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
   flatbuffers::Offset<SplitOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5229,7 +5996,8 @@ inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBuf
 
 struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SplitVOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NUM_SPLITS = 4
   };
@@ -5243,6 +6011,7 @@ struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SplitVOptionsBuilder
 {
+  typedef SplitVOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num_splits(int32_t num_splits)
@@ -5253,7 +6022,6 @@ struct SplitVOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
   flatbuffers::Offset<SplitVOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5272,7 +6040,8 @@ inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatB
 
 struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef StridedSliceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_BEGIN_MASK = 4,
     VT_END_MASK = 6,
@@ -5297,6 +6066,7 @@ struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct StridedSliceOptionsBuilder
 {
+  typedef StridedSliceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_begin_mask(int32_t begin_mask)
@@ -5323,7 +6093,6 @@ struct StridedSliceOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
   flatbuffers::Offset<StridedSliceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5348,6 +6117,7 @@ CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_ma
 
 struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LogSoftmaxOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5356,13 +6126,13 @@ struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LogSoftmaxOptionsBuilder
 {
+  typedef LogSoftmaxOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
   flatbuffers::Offset<LogSoftmaxOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5380,18 +6150,19 @@ CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef CastOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_IN_DATA_TYPE = 4,
     VT_OUT_DATA_TYPE = 6
   };
-  TensorType in_data_type() const
+  circle::TensorType in_data_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
   }
-  TensorType out_data_type() const
+  circle::TensorType out_data_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -5402,13 +6173,14 @@ struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct CastOptionsBuilder
 {
+  typedef CastOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_in_data_type(TensorType in_data_type)
+  void add_in_data_type(circle::TensorType in_data_type)
   {
     fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
   }
-  void add_out_data_type(TensorType out_data_type)
+  void add_out_data_type(circle::TensorType out_data_type)
   {
     fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
   }
@@ -5416,7 +6188,6 @@ struct CastOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  CastOptionsBuilder &operator=(const CastOptionsBuilder &);
   flatbuffers::Offset<CastOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5427,8 +6198,8 @@ struct CastOptionsBuilder
 
 inline flatbuffers::Offset<CastOptions>
 CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                  TensorType in_data_type = TensorType_FLOAT32,
-                  TensorType out_data_type = TensorType_FLOAT32)
+                  circle::TensorType in_data_type = circle::TensorType_FLOAT32,
+                  circle::TensorType out_data_type = circle::TensorType_FLOAT32)
 {
   CastOptionsBuilder builder_(_fbb);
   builder_.add_out_data_type(out_data_type);
@@ -5438,6 +6209,7 @@ CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef DequantizeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5446,13 +6218,13 @@ struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct DequantizeOptionsBuilder
 {
+  typedef DequantizeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
   flatbuffers::Offset<DequantizeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5470,6 +6242,7 @@ CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef MaximumMinimumOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5478,13 +6251,13 @@ struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct MaximumMinimumOptionsBuilder
 {
+  typedef MaximumMinimumOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
   flatbuffers::Offset<MaximumMinimumOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5502,6 +6275,7 @@ CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef TileOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5510,13 +6284,13 @@ struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct TileOptionsBuilder
 {
+  typedef TileOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TileOptionsBuilder &operator=(const TileOptionsBuilder &);
   flatbuffers::Offset<TileOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5533,13 +6307,14 @@ inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBuffe
 
 struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ArgMaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_OUTPUT_TYPE = 4
   };
-  TensorType output_type() const
+  circle::TensorType output_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -5550,9 +6325,10 @@ struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ArgMaxOptionsBuilder
 {
+  typedef ArgMaxOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_output_type(TensorType output_type)
+  void add_output_type(circle::TensorType output_type)
   {
     fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
   }
@@ -5560,7 +6336,6 @@ struct ArgMaxOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
   flatbuffers::Offset<ArgMaxOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5571,7 +6346,7 @@ struct ArgMaxOptionsBuilder
 
 inline flatbuffers::Offset<ArgMaxOptions>
 CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType output_type = TensorType_FLOAT32)
+                    circle::TensorType output_type = circle::TensorType_FLOAT32)
 {
   ArgMaxOptionsBuilder builder_(_fbb);
   builder_.add_output_type(output_type);
@@ -5580,13 +6355,14 @@ CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ArgMinOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_OUTPUT_TYPE = 4
   };
-  TensorType output_type() const
+  circle::TensorType output_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -5597,9 +6373,10 @@ struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ArgMinOptionsBuilder
 {
+  typedef ArgMinOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_output_type(TensorType output_type)
+  void add_output_type(circle::TensorType output_type)
   {
     fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
   }
@@ -5607,7 +6384,6 @@ struct ArgMinOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
   flatbuffers::Offset<ArgMinOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5618,7 +6394,7 @@ struct ArgMinOptionsBuilder
 
 inline flatbuffers::Offset<ArgMinOptions>
 CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType output_type = TensorType_FLOAT32)
+                    circle::TensorType output_type = circle::TensorType_FLOAT32)
 {
   ArgMinOptionsBuilder builder_(_fbb);
   builder_.add_output_type(output_type);
@@ -5627,6 +6403,7 @@ CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef GreaterOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5635,13 +6412,13 @@ struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct GreaterOptionsBuilder
 {
+  typedef GreaterOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
   flatbuffers::Offset<GreaterOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5659,6 +6436,7 @@ CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef GreaterEqualOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5667,13 +6445,13 @@ struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct GreaterEqualOptionsBuilder
 {
+  typedef GreaterEqualOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
   flatbuffers::Offset<GreaterEqualOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5691,6 +6469,7 @@ CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LessOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5699,13 +6478,13 @@ struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LessOptionsBuilder
 {
+  typedef LessOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LessOptionsBuilder &operator=(const LessOptionsBuilder &);
   flatbuffers::Offset<LessOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5722,6 +6501,7 @@ inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBuffe
 
 struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LessEqualOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5730,13 +6510,13 @@ struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LessEqualOptionsBuilder
 {
+  typedef LessEqualOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
   flatbuffers::Offset<LessEqualOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5754,6 +6534,7 @@ CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef NegOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5762,13 +6543,13 @@ struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct NegOptionsBuilder
 {
+  typedef NegOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  NegOptionsBuilder &operator=(const NegOptionsBuilder &);
   flatbuffers::Offset<NegOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5785,6 +6566,7 @@ inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferB
 
 struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SelectOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5793,13 +6575,13 @@ struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SelectOptionsBuilder
 {
+  typedef SelectOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
   flatbuffers::Offset<SelectOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5816,6 +6598,7 @@ inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatB
 
 struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SliceOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5824,13 +6607,13 @@ struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SliceOptionsBuilder
 {
+  typedef SliceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
   flatbuffers::Offset<SliceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5847,28 +6630,40 @@ inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBuf
 
 struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef TransposeConvOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_PADDING = 4,
     VT_STRIDE_W = 6,
-    VT_STRIDE_H = 8
+    VT_STRIDE_H = 8,
+    VT_FUSED_ACTIVATION_FUNCTION = 10
   };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
   int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
   int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
            VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_H) && verifier.EndTable();
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
   }
 };
 
 struct TransposeConvOptionsBuilder
 {
+  typedef TransposeConvOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(circle::Padding padding)
   {
     fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
   }
@@ -5880,11 +6675,15 @@ struct TransposeConvOptionsBuilder
   {
     fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
   }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(TransposeConvOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
   explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
   flatbuffers::Offset<TransposeConvOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5893,19 +6692,22 @@ struct TransposeConvOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<TransposeConvOptions>
-CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
-                           int32_t stride_w = 0, int32_t stride_h = 0)
+inline flatbuffers::Offset<TransposeConvOptions> CreateTransposeConvOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   TransposeConvOptionsBuilder builder_(_fbb);
   builder_.add_stride_h(stride_h);
   builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
   builder_.add_padding(padding);
   return builder_.Finish();
 }
 
 struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ExpandDimsOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5914,13 +6716,13 @@ struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ExpandDimsOptionsBuilder
 {
+  typedef ExpandDimsOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
   flatbuffers::Offset<ExpandDimsOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5938,7 +6740,8 @@ CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SparseToDenseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALIDATE_INDICES = 4
   };
@@ -5952,6 +6755,7 @@ struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SparseToDenseOptionsBuilder
 {
+  typedef SparseToDenseOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_validate_indices(bool validate_indices)
@@ -5963,7 +6767,6 @@ struct SparseToDenseOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
   flatbuffers::Offset<SparseToDenseOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5982,6 +6785,7 @@ CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_i
 
 struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef EqualOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5990,13 +6794,13 @@ struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct EqualOptionsBuilder
 {
+  typedef EqualOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
   flatbuffers::Offset<EqualOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6013,6 +6817,7 @@ inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBuf
 
 struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef NotEqualOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6021,13 +6826,13 @@ struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct NotEqualOptionsBuilder
 {
+  typedef NotEqualOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
   flatbuffers::Offset<NotEqualOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6045,11 +6850,15 @@ CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ShapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_OUT_TYPE = 4
   };
-  TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); }
+  circle::TensorType out_type() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0));
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
@@ -6059,9 +6868,10 @@ struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ShapeOptionsBuilder
 {
+  typedef ShapeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_out_type(TensorType out_type)
+  void add_out_type(circle::TensorType out_type)
   {
     fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
   }
@@ -6069,7 +6879,6 @@ struct ShapeOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
   flatbuffers::Offset<ShapeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6079,7 +6888,8 @@ struct ShapeOptionsBuilder
 };
 
 inline flatbuffers::Offset<ShapeOptions>
-CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32)
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                   circle::TensorType out_type = circle::TensorType_FLOAT32)
 {
   ShapeOptionsBuilder builder_(_fbb);
   builder_.add_out_type(out_type);
@@ -6088,6 +6898,7 @@ CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = T
 
 struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef RankOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6096,13 +6907,13 @@ struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct RankOptionsBuilder
 {
+  typedef RankOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  RankOptionsBuilder &operator=(const RankOptionsBuilder &);
   flatbuffers::Offset<RankOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6119,6 +6930,7 @@ inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBuffe
 
 struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef PowOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6127,13 +6939,13 @@ struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct PowOptionsBuilder
 {
+  typedef PowOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  PowOptionsBuilder &operator=(const PowOptionsBuilder &);
   flatbuffers::Offset<PowOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6150,7 +6962,8 @@ inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferB
 
 struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef FakeQuantOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_MIN = 4,
     VT_MAX = 6,
@@ -6171,6 +6984,7 @@ struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct FakeQuantOptionsBuilder
 {
+  typedef FakeQuantOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
@@ -6188,7 +7002,6 @@ struct FakeQuantOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
   flatbuffers::Offset<FakeQuantOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6211,7 +7024,8 @@ CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, f
 
 struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef PackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALUES_COUNT = 4,
     VT_AXIS = 6
@@ -6227,6 +7041,7 @@ struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct PackOptionsBuilder
 {
+  typedef PackOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_values_count(int32_t values_count)
@@ -6238,7 +7053,6 @@ struct PackOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  PackOptionsBuilder &operator=(const PackOptionsBuilder &);
   flatbuffers::Offset<PackOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6258,6 +7072,7 @@ CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0
 
 struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LogicalOrOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6266,13 +7081,13 @@ struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LogicalOrOptionsBuilder
 {
+  typedef LogicalOrOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
   flatbuffers::Offset<LogicalOrOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6290,7 +7105,8 @@ CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef OneHotOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_AXIS = 4
   };
@@ -6304,6 +7120,7 @@ struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct OneHotOptionsBuilder
 {
+  typedef OneHotOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
@@ -6311,7 +7128,6 @@ struct OneHotOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
   flatbuffers::Offset<OneHotOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6330,6 +7146,7 @@ inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatB
 
 struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef AbsOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6338,13 +7155,13 @@ struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct AbsOptionsBuilder
 {
+  typedef AbsOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
   flatbuffers::Offset<AbsOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6361,6 +7178,7 @@ inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferB
 
 struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef HardSwishOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6369,13 +7187,13 @@ struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct HardSwishOptionsBuilder
 {
+  typedef HardSwishOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
   flatbuffers::Offset<HardSwishOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6393,6 +7211,7 @@ CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LogicalAndOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6401,13 +7220,13 @@ struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LogicalAndOptionsBuilder
 {
+  typedef LogicalAndOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
   flatbuffers::Offset<LogicalAndOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6425,6 +7244,7 @@ CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LogicalNotOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6433,13 +7253,13 @@ struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LogicalNotOptionsBuilder
 {
+  typedef LogicalNotOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
   flatbuffers::Offset<LogicalNotOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6457,7 +7277,8 @@ CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef UnpackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NUM = 4,
     VT_AXIS = 6
@@ -6473,6 +7294,7 @@ struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct UnpackOptionsBuilder
 {
+  typedef UnpackOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
@@ -6481,7 +7303,6 @@ struct UnpackOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
   flatbuffers::Offset<UnpackOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6501,6 +7322,7 @@ inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatB
 
 struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef FloorDivOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6509,13 +7331,13 @@ struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct FloorDivOptionsBuilder
 {
+  typedef FloorDivOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
   flatbuffers::Offset<FloorDivOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6533,6 +7355,7 @@ CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SquareOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6541,13 +7364,13 @@ struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SquareOptionsBuilder
 {
+  typedef SquareOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
   flatbuffers::Offset<SquareOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6564,6 +7387,7 @@ inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatB
 
 struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ZerosLikeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6572,13 +7396,13 @@ struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ZerosLikeOptionsBuilder
 {
+  typedef ZerosLikeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
   flatbuffers::Offset<ZerosLikeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6596,6 +7420,7 @@ CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef FillOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6604,13 +7429,13 @@ struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct FillOptionsBuilder
 {
+  typedef FillOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  FillOptionsBuilder &operator=(const FillOptionsBuilder &);
   flatbuffers::Offset<FillOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6627,6 +7452,7 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBuffe
 
 struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef FloorModOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6635,13 +7461,13 @@ struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct FloorModOptionsBuilder
 {
+  typedef FloorModOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
   flatbuffers::Offset<FloorModOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6659,6 +7485,7 @@ CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef RangeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6667,13 +7494,13 @@ struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct RangeOptionsBuilder
 {
+  typedef RangeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
   flatbuffers::Offset<RangeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6690,7 +7517,8 @@ inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBuf
 
 struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef LeakyReluOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_ALPHA = 4
   };
@@ -6704,6 +7532,7 @@ struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LeakyReluOptionsBuilder
 {
+  typedef LeakyReluOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
@@ -6711,7 +7540,6 @@ struct LeakyReluOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
   flatbuffers::Offset<LeakyReluOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6730,6 +7558,7 @@ CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
 
 struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SquaredDifferenceOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6738,13 +7567,13 @@ struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T
 
 struct SquaredDifferenceOptionsBuilder
 {
+  typedef SquaredDifferenceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
   flatbuffers::Offset<SquaredDifferenceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6762,11 +7591,15 @@ CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef MirrorPadOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_MODE = 4
   };
-  MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); }
+  circle::MirrorPadMode mode() const
+  {
+    return static_cast<circle::MirrorPadMode>(GetField<int8_t>(VT_MODE, 0));
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
@@ -6776,9 +7609,10 @@ struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MirrorPadOptionsBuilder
 {
+  typedef MirrorPadOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_mode(MirrorPadMode mode)
+  void add_mode(circle::MirrorPadMode mode)
   {
     fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
   }
@@ -6786,7 +7620,6 @@ struct MirrorPadOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
   flatbuffers::Offset<MirrorPadOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6797,7 +7630,7 @@ struct MirrorPadOptionsBuilder
 
 inline flatbuffers::Offset<MirrorPadOptions>
 CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                       MirrorPadMode mode = MirrorPadMode_REFLECT)
+                       circle::MirrorPadMode mode = circle::MirrorPadMode_REFLECT)
 {
   MirrorPadOptionsBuilder builder_(_fbb);
   builder_.add_mode(mode);
@@ -6806,13 +7639,14 @@ CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef UniqueOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_IDX_OUT_TYPE = 4
   };
-  TensorType idx_out_type() const
+  circle::TensorType idx_out_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -6823,9 +7657,10 @@ struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct UniqueOptionsBuilder
 {
+  typedef UniqueOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_idx_out_type(TensorType idx_out_type)
+  void add_idx_out_type(circle::TensorType idx_out_type)
   {
     fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
   }
@@ -6833,7 +7668,6 @@ struct UniqueOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
   flatbuffers::Offset<UniqueOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6844,7 +7678,7 @@ struct UniqueOptionsBuilder
 
 inline flatbuffers::Offset<UniqueOptions>
 CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType idx_out_type = TensorType_INT32)
+                    circle::TensorType idx_out_type = circle::TensorType_INT32)
 {
   UniqueOptionsBuilder builder_(_fbb);
   builder_.add_idx_out_type(idx_out_type);
@@ -6853,6 +7687,7 @@ CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ReverseV2OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6861,13 +7696,13 @@ struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ReverseV2OptionsBuilder
 {
+  typedef ReverseV2Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
   flatbuffers::Offset<ReverseV2Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6885,6 +7720,7 @@ CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef AddNOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6893,13 +7729,13 @@ struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct AddNOptionsBuilder
 {
+  typedef AddNOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
   flatbuffers::Offset<AddNOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6916,6 +7752,7 @@ inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBuffe
 
 struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef GatherNdOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6924,13 +7761,13 @@ struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct GatherNdOptionsBuilder
 {
+  typedef GatherNdOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
   flatbuffers::Offset<GatherNdOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6948,6 +7785,7 @@ CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef WhereOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6956,13 +7794,13 @@ struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct WhereOptionsBuilder
 {
+  typedef WhereOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
   flatbuffers::Offset<WhereOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6979,7 +7817,8 @@ inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBuf
 
 struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ReverseSequenceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_SEQ_DIM = 4,
     VT_BATCH_DIM = 6
@@ -6995,6 +7834,7 @@ struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
 
 struct ReverseSequenceOptionsBuilder
 {
+  typedef ReverseSequenceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_seq_dim(int32_t seq_dim)
@@ -7009,7 +7849,6 @@ struct ReverseSequenceOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
   flatbuffers::Offset<ReverseSequenceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7030,6 +7869,7 @@ CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t seq_d
 
 struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef MatrixDiagOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7038,13 +7878,13 @@ struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MatrixDiagOptionsBuilder
 {
+  typedef MatrixDiagOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
   flatbuffers::Offset<MatrixDiagOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7062,6 +7902,7 @@ CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef QuantizeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7070,13 +7911,13 @@ struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct QuantizeOptionsBuilder
 {
+  typedef QuantizeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
   flatbuffers::Offset<QuantizeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7094,6 +7935,7 @@ CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef MatrixSetDiagOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7102,13 +7944,13 @@ struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MatrixSetDiagOptionsBuilder
 {
+  typedef MatrixSetDiagOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
   flatbuffers::Offset<MatrixSetDiagOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7126,7 +7968,8 @@ CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef IfOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_THEN_SUBGRAPH_INDEX = 4,
     VT_ELSE_SUBGRAPH_INDEX = 6
@@ -7142,6 +7985,7 @@ struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct IfOptionsBuilder
 {
+  typedef IfOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_then_subgraph_index(int32_t then_subgraph_index)
@@ -7156,7 +8000,6 @@ struct IfOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  IfOptionsBuilder &operator=(const IfOptionsBuilder &);
   flatbuffers::Offset<IfOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7175,9 +8018,54 @@ inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBui
   return builder_.Finish();
 }
 
+struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CallOnceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INIT_SUBGRAPH_INDEX = 4
+  };
+  int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) &&
+           verifier.EndTable();
+  }
+};
+
+struct CallOnceOptionsBuilder
+{
+  typedef CallOnceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_init_subgraph_index(int32_t init_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
+  }
+  explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CallOnceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOnceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0)
+{
+  CallOnceOptionsBuilder builder_(_fbb);
+  builder_.add_init_subgraph_index(init_subgraph_index);
+  return builder_.Finish();
+}
+
 struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef WhileOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_COND_SUBGRAPH_INDEX = 4,
     VT_BODY_SUBGRAPH_INDEX = 6
@@ -7193,6 +8081,7 @@ struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct WhileOptionsBuilder
 {
+  typedef WhileOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_cond_subgraph_index(int32_t cond_subgraph_index)
@@ -7207,7 +8096,6 @@ struct WhileOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
   flatbuffers::Offset<WhileOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7228,6 +8116,7 @@ inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBuf
 
 struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef NonMaxSuppressionV4OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7236,13 +8125,13 @@ struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers:
 
 struct NonMaxSuppressionV4OptionsBuilder
 {
+  typedef NonMaxSuppressionV4Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
   flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7260,6 +8149,7 @@ CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef NonMaxSuppressionV5OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7268,13 +8158,13 @@ struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers:
 
 struct NonMaxSuppressionV5OptionsBuilder
 {
+  typedef NonMaxSuppressionV5Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
   flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7292,6 +8182,7 @@ CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ScatterNdOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7300,13 +8191,13 @@ struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ScatterNdOptionsBuilder
 {
+  typedef ScatterNdOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
   flatbuffers::Offset<ScatterNdOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7324,6 +8215,7 @@ CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SelectV2OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7332,13 +8224,13 @@ struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SelectV2OptionsBuilder
 {
+  typedef SelectV2Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
   flatbuffers::Offset<SelectV2Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7356,6 +8248,7 @@ CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef DensifyOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7364,13 +8257,13 @@ struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct DensifyOptionsBuilder
 {
+  typedef DensifyOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
   flatbuffers::Offset<DensifyOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7388,6 +8281,7 @@ CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SegmentSumOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7396,13 +8290,13 @@ struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SegmentSumOptionsBuilder
 {
+  typedef SegmentSumOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
   flatbuffers::Offset<SegmentSumOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7420,22 +8314,30 @@ CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BatchMatMulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_ADJOINT_LHS = 4,
-    VT_ADJOINT_RHS = 6
+    VT_ADJOINT_RHS = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
   };
   bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
   bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
-           VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
   }
 };
 
 struct BatchMatMulOptionsBuilder
 {
+  typedef BatchMatMulOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_adjoint_lhs(bool adjoint_lhs)
@@ -7448,11 +8350,15 @@ struct BatchMatMulOptionsBuilder
     fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
                              0);
   }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
   explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
   flatbuffers::Offset<BatchMatMulOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7463,17 +8369,910 @@ struct BatchMatMulOptionsBuilder
 
 inline flatbuffers::Offset<BatchMatMulOptions>
 CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
-                         bool adjoint_rhs = false)
+                         bool adjoint_rhs = false, bool asymmetric_quantize_inputs = false)
 {
   BatchMatMulOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
   builder_.add_adjoint_rhs(adjoint_rhs);
   builder_.add_adjoint_lhs(adjoint_lhs);
   return builder_.Finish();
 }
 
+struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CumsumOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_EXCLUSIVE = 4,
+    VT_REVERSE = 6
+  };
+  bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; }
+  bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) &&
+           VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable();
+  }
+};
+
+struct CumsumOptionsBuilder
+{
+  typedef CumsumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_exclusive(bool exclusive)
+  {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
+  }
+  void add_reverse(bool reverse)
+  {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
+  }
+  explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CumsumOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CumsumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              bool exclusive = false,
+                                                              bool reverse = false)
+{
+  CumsumOptionsBuilder builder_(_fbb);
+  builder_.add_reverse(reverse);
+  builder_.add_exclusive(exclusive);
+  return builder_.Finish();
+}
+
+struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BroadcastToOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct BroadcastToOptionsBuilder
+{
+  typedef BroadcastToOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BroadcastToOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BroadcastToOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  BroadcastToOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Rfft2dOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct Rfft2dOptionsBuilder
+{
+  typedef Rfft2dOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Rfft2dOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Rfft2dOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  Rfft2dOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TABLE_ID = 4,
+    VT_KEY_DTYPE = 6,
+    VT_VALUE_DTYPE = 8
+  };
+  int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); }
+  circle::TensorType key_dtype() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
+  }
+  circle::TensorType value_dtype() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) &&
+           VerifyField<int8_t>(verifier, VT_KEY_DTYPE) &&
+           VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable();
+  }
+};
+
+struct HashtableOptionsBuilder
+{
+  typedef HashtableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_table_id(int32_t table_id)
+  {
+    fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
+  }
+  void add_key_dtype(circle::TensorType key_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
+  }
+  void add_value_dtype(circle::TensorType value_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
+  }
+  explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0,
+                       circle::TensorType key_dtype = circle::TensorType_FLOAT32,
+                       circle::TensorType value_dtype = circle::TensorType_FLOAT32)
+{
+  HashtableOptionsBuilder builder_(_fbb);
+  builder_.add_table_id(table_id);
+  builder_.add_value_dtype(value_dtype);
+  builder_.add_key_dtype(key_dtype);
+  return builder_.Finish();
+}
+
+struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableFindOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableFindOptionsBuilder
+{
+  typedef HashtableFindOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableFindOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableFindOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableFindOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableImportOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableImportOptionsBuilder
+{
+  typedef HashtableImportOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableImportOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableImportOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableImportOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableSizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableSizeOptionsBuilder
+{
+  typedef HashtableSizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableSizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableSizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef VarHandleOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_CONTAINER = 4,
+    VT_SHARED_NAME = 6
+  };
+  const flatbuffers::String *container() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_CONTAINER);
+  }
+  const flatbuffers::String *shared_name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) &&
+           verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) &&
+           verifier.VerifyString(shared_name()) && verifier.EndTable();
+  }
+};
+
+struct VarHandleOptionsBuilder
+{
+  typedef VarHandleOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_container(flatbuffers::Offset<flatbuffers::String> container)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container);
+  }
+  void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name);
+  }
+  explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<VarHandleOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<VarHandleOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                       flatbuffers::Offset<flatbuffers::String> container = 0,
+                       flatbuffers::Offset<flatbuffers::String> shared_name = 0)
+{
+  VarHandleOptionsBuilder builder_(_fbb);
+  builder_.add_shared_name(shared_name);
+  builder_.add_container(container);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr,
+                             const char *shared_name = nullptr)
+{
+  auto container__ = container ? _fbb.CreateString(container) : 0;
+  auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0;
+  return circle::CreateVarHandleOptions(_fbb, container__, shared_name__);
+}
+
+struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReadVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct ReadVariableOptionsBuilder
+{
+  typedef ReadVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReadVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReadVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ReadVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef AssignVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct AssignVariableOptionsBuilder
+{
+  typedef AssignVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AssignVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AssignVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  AssignVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RandomOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SEED = 4,
+    VT_SEED2 = 6
+  };
+  int64_t seed() const { return GetField<int64_t>(VT_SEED, 0); }
+  int64_t seed2() const { return GetField<int64_t>(VT_SEED2, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int64_t>(verifier, VT_SEED) &&
+           VerifyField<int64_t>(verifier, VT_SEED2) && verifier.EndTable();
+  }
+};
+
+struct RandomOptionsBuilder
+{
+  typedef RandomOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_seed(int64_t seed) { fbb_.AddElement<int64_t>(RandomOptions::VT_SEED, seed, 0); }
+  void add_seed2(int64_t seed2) { fbb_.AddElement<int64_t>(RandomOptions::VT_SEED2, seed2, 0); }
+  explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RandomOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RandomOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              int64_t seed = 0, int64_t seed2 = 0)
+{
+  RandomOptionsBuilder builder_(_fbb);
+  builder_.add_seed2(seed2);
+  builder_.add_seed(seed);
+  return builder_.Finish();
+}
+
+struct BucketizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BucketizeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_BOUNDARIES = 4
+  };
+  const flatbuffers::Vector<float> *boundaries() const
+  {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_BOUNDARIES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_BOUNDARIES) &&
+           verifier.VerifyVector(boundaries()) && verifier.EndTable();
+  }
+};
+
+struct BucketizeOptionsBuilder
+{
+  typedef BucketizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_boundaries(flatbuffers::Offset<flatbuffers::Vector<float>> boundaries)
+  {
+    fbb_.AddOffset(BucketizeOptions::VT_BOUNDARIES, boundaries);
+  }
+  explicit BucketizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BucketizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BucketizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BucketizeOptions>
+CreateBucketizeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                       flatbuffers::Offset<flatbuffers::Vector<float>> boundaries = 0)
+{
+  BucketizeOptionsBuilder builder_(_fbb);
+  builder_.add_boundaries(boundaries);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<BucketizeOptions>
+CreateBucketizeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                             const std::vector<float> *boundaries = nullptr)
+{
+  auto boundaries__ = boundaries ? _fbb.CreateVector<float>(*boundaries) : 0;
+  return circle::CreateBucketizeOptions(_fbb, boundaries__);
+}
+
+struct GeluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef GeluOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_APPROXIMATE = 4
+  };
+  bool approximate() const { return GetField<uint8_t>(VT_APPROXIMATE, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_APPROXIMATE) &&
+           verifier.EndTable();
+  }
+};
+
+struct GeluOptionsBuilder
+{
+  typedef GeluOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_approximate(bool approximate)
+  {
+    fbb_.AddElement<uint8_t>(GeluOptions::VT_APPROXIMATE, static_cast<uint8_t>(approximate), 0);
+  }
+  explicit GeluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GeluOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GeluOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GeluOptions> CreateGeluOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                          bool approximate = false)
+{
+  GeluOptionsBuilder builder_(_fbb);
+  builder_.add_approximate(approximate);
+  return builder_.Finish();
+}
+
+struct DynamicUpdateSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DynamicUpdateSliceOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct DynamicUpdateSliceOptionsBuilder
+{
+  typedef DynamicUpdateSliceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit DynamicUpdateSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DynamicUpdateSliceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DynamicUpdateSliceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DynamicUpdateSliceOptions>
+CreateDynamicUpdateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  DynamicUpdateSliceOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct UnsortedSegmentProdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnsortedSegmentProdOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct UnsortedSegmentProdOptionsBuilder
+{
+  typedef UnsortedSegmentProdOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentProdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentProdOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentProdOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentProdOptions>
+CreateUnsortedSegmentProdOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  UnsortedSegmentProdOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct UnsortedSegmentMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnsortedSegmentMaxOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct UnsortedSegmentMaxOptionsBuilder
+{
+  typedef UnsortedSegmentMaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentMaxOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentMaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentMaxOptions>
+CreateUnsortedSegmentMaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  UnsortedSegmentMaxOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct UnsortedSegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnsortedSegmentSumOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct UnsortedSegmentSumOptionsBuilder
+{
+  typedef UnsortedSegmentSumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentSumOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentSumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentSumOptions>
+CreateUnsortedSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  UnsortedSegmentSumOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct ATan2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ATan2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct ATan2OptionsBuilder
+{
+  typedef ATan2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ATan2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ATan2Options> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ATan2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ATan2Options> CreateATan2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ATan2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct UnsortedSegmentMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnsortedSegmentMinOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct UnsortedSegmentMinOptionsBuilder
+{
+  typedef UnsortedSegmentMinOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentMinOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentMinOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentMinOptions>
+CreateUnsortedSegmentMinOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  UnsortedSegmentMinOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct SignOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SignOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct SignOptionsBuilder
+{
+  typedef SignOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SignOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SignOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SignOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SignOptions> CreateSignOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  SignOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct BitcastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BitcastOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct BitcastOptionsBuilder
+{
+  typedef BitcastOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BitcastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BitcastOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BitcastOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BitcastOptions>
+CreateBitcastOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  BitcastOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct BitwiseXorOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BitwiseXorOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct BitwiseXorOptionsBuilder
+{
+  typedef BitwiseXorOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BitwiseXorOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BitwiseXorOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BitwiseXorOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BitwiseXorOptions>
+CreateBitwiseXorOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  BitwiseXorOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct RightShiftOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RightShiftOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct RightShiftOptionsBuilder
+{
+  typedef RightShiftOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit RightShiftOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RightShiftOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RightShiftOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RightShiftOptions>
+CreateRightShiftOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  RightShiftOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
 struct BCQGatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BCQGatherOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_INPUT_HIDDEN_SIZE = 4,
     VT_AXIS = 6
@@ -7489,6 +9288,7 @@ struct BCQGatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct BCQGatherOptionsBuilder
 {
+  typedef BCQGatherOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_input_hidden_size(int32_t input_hidden_size)
@@ -7500,7 +9300,6 @@ struct BCQGatherOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  BCQGatherOptionsBuilder &operator=(const BCQGatherOptionsBuilder &);
   flatbuffers::Offset<BCQGatherOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7521,15 +9320,17 @@ CreateBCQGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t input_hidde
 
 struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BCQFullyConnectedOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_WEIGHTS_HIDDEN_SIZE = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6
   };
   int32_t weights_hidden_size() const { return GetField<int32_t>(VT_WEIGHTS_HIDDEN_SIZE, 0); }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -7540,6 +9341,7 @@ struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T
 
 struct BCQFullyConnectedOptionsBuilder
 {
+  typedef BCQFullyConnectedOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_weights_hidden_size(int32_t weights_hidden_size)
@@ -7547,7 +9349,7 @@ struct BCQFullyConnectedOptionsBuilder
     fbb_.AddElement<int32_t>(BCQFullyConnectedOptions::VT_WEIGHTS_HIDDEN_SIZE, weights_hidden_size,
                              0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(BCQFullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -7556,7 +9358,6 @@ struct BCQFullyConnectedOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  BCQFullyConnectedOptionsBuilder &operator=(const BCQFullyConnectedOptionsBuilder &);
   flatbuffers::Offset<BCQFullyConnectedOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7566,8 +9367,8 @@ struct BCQFullyConnectedOptionsBuilder
 };
 
 inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   BCQFullyConnectedOptionsBuilder builder_(_fbb);
   builder_.add_weights_hidden_size(weights_hidden_size);
@@ -7577,15 +9378,17 @@ inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOpti
 
 struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef InstanceNormOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_EPSILON = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6
   };
   float epsilon() const { return GetField<float>(VT_EPSILON, 0.0f); }
-  ActivationFunctionType fused_activation_function() const
+  circle::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -7596,13 +9399,14 @@ struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct InstanceNormOptionsBuilder
 {
+  typedef InstanceNormOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_epsilon(float epsilon)
   {
     fbb_.AddElement<float>(InstanceNormOptions::VT_EPSILON, epsilon, 0.0f);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(InstanceNormOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -7611,7 +9415,6 @@ struct InstanceNormOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  InstanceNormOptionsBuilder &operator=(const InstanceNormOptionsBuilder &);
   flatbuffers::Offset<InstanceNormOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7621,8 +9424,8 @@ struct InstanceNormOptionsBuilder
 };
 
 inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   InstanceNormOptionsBuilder builder_(_fbb);
   builder_.add_epsilon(epsilon);
@@ -7632,36 +9435,42 @@ inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions(
 
 struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef OperatorCodeBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_BUILTIN_CODE = 4,
+    VT_DEPRECATED_BUILTIN_CODE = 4,
     VT_CUSTOM_CODE = 6,
-    VT_VERSION = 8
+    VT_VERSION = 8,
+    VT_BUILTIN_CODE = 10
   };
-  BuiltinOperator builtin_code() const
-  {
-    return static_cast<BuiltinOperator>(GetField<uint8_t>(VT_BUILTIN_CODE, 0));
-  }
+  int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); }
   const flatbuffers::String *custom_code() const
   {
     return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
   }
   int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+  circle::BuiltinOperator builtin_code() const
+  {
+    return static_cast<circle::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
-    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_BUILTIN_CODE) &&
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) &&
            VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
-           VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
+           VerifyField<int32_t>(verifier, VT_VERSION) &&
+           VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable();
   }
 };
 
 struct OperatorCodeBuilder
 {
+  typedef OperatorCode Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_builtin_code(BuiltinOperator builtin_code)
+  void add_deprecated_builtin_code(int8_t deprecated_builtin_code)
   {
-    fbb_.AddElement<uint8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<uint8_t>(builtin_code), 0);
+    fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
   }
   void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
   {
@@ -7671,11 +9480,14 @@ struct OperatorCodeBuilder
   {
     fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
   }
+  void add_builtin_code(circle::BuiltinOperator builtin_code)
+  {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
+  }
   explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
   flatbuffers::Offset<OperatorCode> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7685,29 +9497,32 @@ struct OperatorCodeBuilder
 };
 
 inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
-                   BuiltinOperator builtin_code = BuiltinOperator_ADD,
-                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1,
+                   circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD)
 {
   OperatorCodeBuilder builder_(_fbb);
+  builder_.add_builtin_code(builtin_code);
   builder_.add_version(version);
   builder_.add_custom_code(custom_code);
-  builder_.add_builtin_code(builtin_code);
+  builder_.add_deprecated_builtin_code(deprecated_builtin_code);
   return builder_.Finish();
 }
 
 inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
-                         BuiltinOperator builtin_code = BuiltinOperator_ADD,
-                         const char *custom_code = nullptr, int32_t version = 1)
+CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+                         const char *custom_code = nullptr, int32_t version = 1,
+                         circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD)
 {
-  return circle::CreateOperatorCode(_fbb, builtin_code,
-                                    custom_code ? _fbb.CreateString(custom_code) : 0, version);
+  auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
+  return circle::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version,
+                                    builtin_code);
 }
 
 struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef OperatorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_OPCODE_INDEX = 4,
     VT_INPUTS = 6,
@@ -7728,646 +9543,799 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
   }
-  BuiltinOptions builtin_options_type() const
+  circle::BuiltinOptions builtin_options_type() const
   {
-    return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+    return static_cast<circle::BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
   }
   const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
   template <typename T> const T *builtin_options_as() const;
-  const Conv2DOptions *builtin_options_as_Conv2DOptions() const
+  const circle::Conv2DOptions *builtin_options_as_Conv2DOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_Conv2DOptions
-               ? static_cast<const Conv2DOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_Conv2DOptions
+             ? static_cast<const circle::Conv2DOptions *>(builtin_options())
+             : nullptr;
   }
-  const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
+  const circle::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
-               ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_DepthwiseConv2DOptions
+             ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options())
+             : nullptr;
   }
-  const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
+  const circle::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
-               ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ConcatEmbeddingsOptions
+             ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options())
+             : nullptr;
   }
-  const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
+  const circle::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
-               ? static_cast<const LSHProjectionOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LSHProjectionOptions
+             ? static_cast<const circle::LSHProjectionOptions *>(builtin_options())
+             : nullptr;
   }
-  const Pool2DOptions *builtin_options_as_Pool2DOptions() const
+  const circle::Pool2DOptions *builtin_options_as_Pool2DOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_Pool2DOptions
-               ? static_cast<const Pool2DOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_Pool2DOptions
+             ? static_cast<const circle::Pool2DOptions *>(builtin_options())
+             : nullptr;
   }
-  const SVDFOptions *builtin_options_as_SVDFOptions() const
+  const circle::SVDFOptions *builtin_options_as_SVDFOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SVDFOptions
-               ? static_cast<const SVDFOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SVDFOptions
+             ? static_cast<const circle::SVDFOptions *>(builtin_options())
+             : nullptr;
   }
-  const RNNOptions *builtin_options_as_RNNOptions() const
+  const circle::RNNOptions *builtin_options_as_RNNOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_RNNOptions
-               ? static_cast<const RNNOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_RNNOptions
+             ? static_cast<const circle::RNNOptions *>(builtin_options())
+             : nullptr;
   }
-  const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
+  const circle::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
-               ? static_cast<const FullyConnectedOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_FullyConnectedOptions
+             ? static_cast<const circle::FullyConnectedOptions *>(builtin_options())
+             : nullptr;
   }
-  const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
+  const circle::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SoftmaxOptions
-               ? static_cast<const SoftmaxOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SoftmaxOptions
+             ? static_cast<const circle::SoftmaxOptions *>(builtin_options())
+             : nullptr;
   }
-  const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
+  const circle::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ConcatenationOptions
-               ? static_cast<const ConcatenationOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ConcatenationOptions
+             ? static_cast<const circle::ConcatenationOptions *>(builtin_options())
+             : nullptr;
   }
-  const AddOptions *builtin_options_as_AddOptions() const
+  const circle::AddOptions *builtin_options_as_AddOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_AddOptions
-               ? static_cast<const AddOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_AddOptions
+             ? static_cast<const circle::AddOptions *>(builtin_options())
+             : nullptr;
   }
-  const L2NormOptions *builtin_options_as_L2NormOptions() const
+  const circle::L2NormOptions *builtin_options_as_L2NormOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_L2NormOptions
-               ? static_cast<const L2NormOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_L2NormOptions
+             ? static_cast<const circle::L2NormOptions *>(builtin_options())
+             : nullptr;
   }
-  const LocalResponseNormalizationOptions *
+  const circle::LocalResponseNormalizationOptions *
   builtin_options_as_LocalResponseNormalizationOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
-               ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LocalResponseNormalizationOptions
+             ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options())
+             : nullptr;
   }
-  const LSTMOptions *builtin_options_as_LSTMOptions() const
+  const circle::LSTMOptions *builtin_options_as_LSTMOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LSTMOptions
-               ? static_cast<const LSTMOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LSTMOptions
+             ? static_cast<const circle::LSTMOptions *>(builtin_options())
+             : nullptr;
   }
-  const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
+  const circle::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
-               ? static_cast<const ResizeBilinearOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ResizeBilinearOptions
+             ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options())
+             : nullptr;
   }
-  const CallOptions *builtin_options_as_CallOptions() const
+  const circle::CallOptions *builtin_options_as_CallOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_CallOptions
-               ? static_cast<const CallOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_CallOptions
+             ? static_cast<const circle::CallOptions *>(builtin_options())
+             : nullptr;
   }
-  const ReshapeOptions *builtin_options_as_ReshapeOptions() const
+  const circle::ReshapeOptions *builtin_options_as_ReshapeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ReshapeOptions
-               ? static_cast<const ReshapeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ReshapeOptions
+             ? static_cast<const circle::ReshapeOptions *>(builtin_options())
+             : nullptr;
   }
-  const SkipGramOptions *builtin_options_as_SkipGramOptions() const
+  const circle::SkipGramOptions *builtin_options_as_SkipGramOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SkipGramOptions
-               ? static_cast<const SkipGramOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SkipGramOptions
+             ? static_cast<const circle::SkipGramOptions *>(builtin_options())
+             : nullptr;
   }
-  const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
+  const circle::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
-               ? static_cast<const SpaceToDepthOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SpaceToDepthOptions
+             ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options())
+             : nullptr;
   }
-  const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
+  const circle::EmbeddingLookupSparseOptions *
+  builtin_options_as_EmbeddingLookupSparseOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
-               ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_EmbeddingLookupSparseOptions
+             ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options())
+             : nullptr;
   }
-  const MulOptions *builtin_options_as_MulOptions() const
+  const circle::MulOptions *builtin_options_as_MulOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MulOptions
-               ? static_cast<const MulOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_MulOptions
+             ? static_cast<const circle::MulOptions *>(builtin_options())
+             : nullptr;
   }
-  const PadOptions *builtin_options_as_PadOptions() const
+  const circle::PadOptions *builtin_options_as_PadOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_PadOptions
-               ? static_cast<const PadOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_PadOptions
+             ? static_cast<const circle::PadOptions *>(builtin_options())
+             : nullptr;
   }
-  const GatherOptions *builtin_options_as_GatherOptions() const
+  const circle::GatherOptions *builtin_options_as_GatherOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_GatherOptions
-               ? static_cast<const GatherOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_GatherOptions
+             ? static_cast<const circle::GatherOptions *>(builtin_options())
+             : nullptr;
   }
-  const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
+  const circle::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
-               ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BatchToSpaceNDOptions
+             ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options())
+             : nullptr;
   }
-  const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
+  const circle::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
-               ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SpaceToBatchNDOptions
+             ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options())
+             : nullptr;
   }
-  const TransposeOptions *builtin_options_as_TransposeOptions() const
+  const circle::TransposeOptions *builtin_options_as_TransposeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_TransposeOptions
-               ? static_cast<const TransposeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_TransposeOptions
+             ? static_cast<const circle::TransposeOptions *>(builtin_options())
+             : nullptr;
   }
-  const ReducerOptions *builtin_options_as_ReducerOptions() const
+  const circle::ReducerOptions *builtin_options_as_ReducerOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ReducerOptions
-               ? static_cast<const ReducerOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ReducerOptions
+             ? static_cast<const circle::ReducerOptions *>(builtin_options())
+             : nullptr;
   }
-  const SubOptions *builtin_options_as_SubOptions() const
+  const circle::SubOptions *builtin_options_as_SubOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SubOptions
-               ? static_cast<const SubOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SubOptions
+             ? static_cast<const circle::SubOptions *>(builtin_options())
+             : nullptr;
   }
-  const DivOptions *builtin_options_as_DivOptions() const
+  const circle::DivOptions *builtin_options_as_DivOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DivOptions
-               ? static_cast<const DivOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_DivOptions
+             ? static_cast<const circle::DivOptions *>(builtin_options())
+             : nullptr;
   }
-  const SqueezeOptions *builtin_options_as_SqueezeOptions() const
+  const circle::SqueezeOptions *builtin_options_as_SqueezeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SqueezeOptions
-               ? static_cast<const SqueezeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SqueezeOptions
+             ? static_cast<const circle::SqueezeOptions *>(builtin_options())
+             : nullptr;
   }
-  const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
+  const circle::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
-               ? static_cast<const SequenceRNNOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SequenceRNNOptions
+             ? static_cast<const circle::SequenceRNNOptions *>(builtin_options())
+             : nullptr;
   }
-  const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
+  const circle::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_StridedSliceOptions
-               ? static_cast<const StridedSliceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_StridedSliceOptions
+             ? static_cast<const circle::StridedSliceOptions *>(builtin_options())
+             : nullptr;
   }
-  const ExpOptions *builtin_options_as_ExpOptions() const
+  const circle::ExpOptions *builtin_options_as_ExpOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ExpOptions
-               ? static_cast<const ExpOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ExpOptions
+             ? static_cast<const circle::ExpOptions *>(builtin_options())
+             : nullptr;
   }
-  const TopKV2Options *builtin_options_as_TopKV2Options() const
+  const circle::TopKV2Options *builtin_options_as_TopKV2Options() const
   {
-    return builtin_options_type() == BuiltinOptions_TopKV2Options
-               ? static_cast<const TopKV2Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_TopKV2Options
+             ? static_cast<const circle::TopKV2Options *>(builtin_options())
+             : nullptr;
   }
-  const SplitOptions *builtin_options_as_SplitOptions() const
+  const circle::SplitOptions *builtin_options_as_SplitOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SplitOptions
-               ? static_cast<const SplitOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SplitOptions
+             ? static_cast<const circle::SplitOptions *>(builtin_options())
+             : nullptr;
   }
-  const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
+  const circle::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
-               ? static_cast<const LogSoftmaxOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LogSoftmaxOptions
+             ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options())
+             : nullptr;
   }
-  const CastOptions *builtin_options_as_CastOptions() const
+  const circle::CastOptions *builtin_options_as_CastOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_CastOptions
-               ? static_cast<const CastOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_CastOptions
+             ? static_cast<const circle::CastOptions *>(builtin_options())
+             : nullptr;
   }
-  const DequantizeOptions *builtin_options_as_DequantizeOptions() const
+  const circle::DequantizeOptions *builtin_options_as_DequantizeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DequantizeOptions
-               ? static_cast<const DequantizeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_DequantizeOptions
+             ? static_cast<const circle::DequantizeOptions *>(builtin_options())
+             : nullptr;
   }
-  const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
+  const circle::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
-               ? static_cast<const MaximumMinimumOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_MaximumMinimumOptions
+             ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options())
+             : nullptr;
   }
-  const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
+  const circle::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ArgMaxOptions
-               ? static_cast<const ArgMaxOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ArgMaxOptions
+             ? static_cast<const circle::ArgMaxOptions *>(builtin_options())
+             : nullptr;
   }
-  const LessOptions *builtin_options_as_LessOptions() const
+  const circle::LessOptions *builtin_options_as_LessOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LessOptions
-               ? static_cast<const LessOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LessOptions
+             ? static_cast<const circle::LessOptions *>(builtin_options())
+             : nullptr;
   }
-  const NegOptions *builtin_options_as_NegOptions() const
+  const circle::NegOptions *builtin_options_as_NegOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_NegOptions
-               ? static_cast<const NegOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_NegOptions
+             ? static_cast<const circle::NegOptions *>(builtin_options())
+             : nullptr;
   }
-  const PadV2Options *builtin_options_as_PadV2Options() const
+  const circle::PadV2Options *builtin_options_as_PadV2Options() const
   {
-    return builtin_options_type() == BuiltinOptions_PadV2Options
-               ? static_cast<const PadV2Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_PadV2Options
+             ? static_cast<const circle::PadV2Options *>(builtin_options())
+             : nullptr;
   }
-  const GreaterOptions *builtin_options_as_GreaterOptions() const
+  const circle::GreaterOptions *builtin_options_as_GreaterOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_GreaterOptions
-               ? static_cast<const GreaterOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_GreaterOptions
+             ? static_cast<const circle::GreaterOptions *>(builtin_options())
+             : nullptr;
   }
-  const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
+  const circle::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
-               ? static_cast<const GreaterEqualOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_GreaterEqualOptions
+             ? static_cast<const circle::GreaterEqualOptions *>(builtin_options())
+             : nullptr;
   }
-  const LessEqualOptions *builtin_options_as_LessEqualOptions() const
+  const circle::LessEqualOptions *builtin_options_as_LessEqualOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LessEqualOptions
-               ? static_cast<const LessEqualOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LessEqualOptions
+             ? static_cast<const circle::LessEqualOptions *>(builtin_options())
+             : nullptr;
   }
-  const SelectOptions *builtin_options_as_SelectOptions() const
+  const circle::SelectOptions *builtin_options_as_SelectOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SelectOptions
-               ? static_cast<const SelectOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SelectOptions
+             ? static_cast<const circle::SelectOptions *>(builtin_options())
+             : nullptr;
   }
-  const SliceOptions *builtin_options_as_SliceOptions() const
+  const circle::SliceOptions *builtin_options_as_SliceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SliceOptions
-               ? static_cast<const SliceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SliceOptions
+             ? static_cast<const circle::SliceOptions *>(builtin_options())
+             : nullptr;
   }
-  const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
+  const circle::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_TransposeConvOptions
-               ? static_cast<const TransposeConvOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_TransposeConvOptions
+             ? static_cast<const circle::TransposeConvOptions *>(builtin_options())
+             : nullptr;
   }
-  const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
+  const circle::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
-               ? static_cast<const SparseToDenseOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SparseToDenseOptions
+             ? static_cast<const circle::SparseToDenseOptions *>(builtin_options())
+             : nullptr;
   }
-  const TileOptions *builtin_options_as_TileOptions() const
+  const circle::TileOptions *builtin_options_as_TileOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_TileOptions
-               ? static_cast<const TileOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_TileOptions
+             ? static_cast<const circle::TileOptions *>(builtin_options())
+             : nullptr;
   }
-  const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
+  const circle::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
-               ? static_cast<const ExpandDimsOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ExpandDimsOptions
+             ? static_cast<const circle::ExpandDimsOptions *>(builtin_options())
+             : nullptr;
   }
-  const EqualOptions *builtin_options_as_EqualOptions() const
+  const circle::EqualOptions *builtin_options_as_EqualOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_EqualOptions
-               ? static_cast<const EqualOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_EqualOptions
+             ? static_cast<const circle::EqualOptions *>(builtin_options())
+             : nullptr;
   }
-  const NotEqualOptions *builtin_options_as_NotEqualOptions() const
+  const circle::NotEqualOptions *builtin_options_as_NotEqualOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_NotEqualOptions
-               ? static_cast<const NotEqualOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_NotEqualOptions
+             ? static_cast<const circle::NotEqualOptions *>(builtin_options())
+             : nullptr;
   }
-  const ShapeOptions *builtin_options_as_ShapeOptions() const
+  const circle::ShapeOptions *builtin_options_as_ShapeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ShapeOptions
-               ? static_cast<const ShapeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ShapeOptions
+             ? static_cast<const circle::ShapeOptions *>(builtin_options())
+             : nullptr;
   }
-  const PowOptions *builtin_options_as_PowOptions() const
+  const circle::PowOptions *builtin_options_as_PowOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_PowOptions
-               ? static_cast<const PowOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_PowOptions
+             ? static_cast<const circle::PowOptions *>(builtin_options())
+             : nullptr;
   }
-  const ArgMinOptions *builtin_options_as_ArgMinOptions() const
+  const circle::ArgMinOptions *builtin_options_as_ArgMinOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ArgMinOptions
-               ? static_cast<const ArgMinOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ArgMinOptions
+             ? static_cast<const circle::ArgMinOptions *>(builtin_options())
+             : nullptr;
   }
-  const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
+  const circle::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FakeQuantOptions
-               ? static_cast<const FakeQuantOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_FakeQuantOptions
+             ? static_cast<const circle::FakeQuantOptions *>(builtin_options())
+             : nullptr;
   }
-  const PackOptions *builtin_options_as_PackOptions() const
+  const circle::PackOptions *builtin_options_as_PackOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_PackOptions
-               ? static_cast<const PackOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_PackOptions
+             ? static_cast<const circle::PackOptions *>(builtin_options())
+             : nullptr;
   }
-  const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
+  const circle::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LogicalOrOptions
-               ? static_cast<const LogicalOrOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LogicalOrOptions
+             ? static_cast<const circle::LogicalOrOptions *>(builtin_options())
+             : nullptr;
   }
-  const OneHotOptions *builtin_options_as_OneHotOptions() const
+  const circle::OneHotOptions *builtin_options_as_OneHotOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_OneHotOptions
-               ? static_cast<const OneHotOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_OneHotOptions
+             ? static_cast<const circle::OneHotOptions *>(builtin_options())
+             : nullptr;
   }
-  const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
+  const circle::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LogicalAndOptions
-               ? static_cast<const LogicalAndOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LogicalAndOptions
+             ? static_cast<const circle::LogicalAndOptions *>(builtin_options())
+             : nullptr;
   }
-  const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
+  const circle::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LogicalNotOptions
-               ? static_cast<const LogicalNotOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_LogicalNotOptions
+             ? static_cast<const circle::LogicalNotOptions *>(builtin_options())
+             : nullptr;
   }
-  const UnpackOptions *builtin_options_as_UnpackOptions() const
+  const circle::UnpackOptions *builtin_options_as_UnpackOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_UnpackOptions
-               ? static_cast<const UnpackOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_UnpackOptions
+             ? static_cast<const circle::UnpackOptions *>(builtin_options())
+             : nullptr;
   }
-  const FloorDivOptions *builtin_options_as_FloorDivOptions() const
+  const circle::FloorDivOptions *builtin_options_as_FloorDivOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FloorDivOptions
-               ? static_cast<const FloorDivOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_FloorDivOptions
+             ? static_cast<const circle::FloorDivOptions *>(builtin_options())
+             : nullptr;
   }
-  const SquareOptions *builtin_options_as_SquareOptions() const
+  const circle::SquareOptions *builtin_options_as_SquareOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SquareOptions
-               ? static_cast<const SquareOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SquareOptions
+             ? static_cast<const circle::SquareOptions *>(builtin_options())
+             : nullptr;
   }
-  const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
+  const circle::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
-               ? static_cast<const ZerosLikeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ZerosLikeOptions
+             ? static_cast<const circle::ZerosLikeOptions *>(builtin_options())
+             : nullptr;
   }
-  const FillOptions *builtin_options_as_FillOptions() const
+  const circle::FillOptions *builtin_options_as_FillOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FillOptions
-               ? static_cast<const FillOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_FillOptions
+             ? static_cast<const circle::FillOptions *>(builtin_options())
+             : nullptr;
   }
-  const BidirectionalSequenceLSTMOptions *
+  const circle::BidirectionalSequenceLSTMOptions *
   builtin_options_as_BidirectionalSequenceLSTMOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
-               ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceLSTMOptions
+             ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options())
+             : nullptr;
   }
-  const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
+  const circle::BidirectionalSequenceRNNOptions *
+  builtin_options_as_BidirectionalSequenceRNNOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
-               ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceRNNOptions
+             ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options())
+             : nullptr;
   }
-  const UnidirectionalSequenceLSTMOptions *
+  const circle::UnidirectionalSequenceLSTMOptions *
   builtin_options_as_UnidirectionalSequenceLSTMOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
-               ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions
+             ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::FloorModOptions *builtin_options_as_FloorModOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_FloorModOptions
+             ? static_cast<const circle::FloorModOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::RangeOptions *builtin_options_as_RangeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_RangeOptions
+             ? static_cast<const circle::RangeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ResizeNearestNeighborOptions *
+  builtin_options_as_ResizeNearestNeighborOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ResizeNearestNeighborOptions
+             ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_LeakyReluOptions
+             ? static_cast<const circle::LeakyReluOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SquaredDifferenceOptions
+             ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_MirrorPadOptions
+             ? static_cast<const circle::MirrorPadOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::AbsOptions *builtin_options_as_AbsOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_AbsOptions
+             ? static_cast<const circle::AbsOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::SplitVOptions *builtin_options_as_SplitVOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_SplitVOptions
+             ? static_cast<const circle::SplitVOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::UniqueOptions *builtin_options_as_UniqueOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_UniqueOptions
+             ? static_cast<const circle::UniqueOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ReverseV2Options *builtin_options_as_ReverseV2Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ReverseV2Options
+             ? static_cast<const circle::ReverseV2Options *>(builtin_options())
+             : nullptr;
+  }
+  const circle::AddNOptions *builtin_options_as_AddNOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_AddNOptions
+             ? static_cast<const circle::AddNOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::GatherNdOptions *builtin_options_as_GatherNdOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_GatherNdOptions
+             ? static_cast<const circle::GatherNdOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::CosOptions *builtin_options_as_CosOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_CosOptions
+             ? static_cast<const circle::CosOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::WhereOptions *builtin_options_as_WhereOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_WhereOptions
+             ? static_cast<const circle::WhereOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::RankOptions *builtin_options_as_RankOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_RankOptions
+             ? static_cast<const circle::RankOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ReverseSequenceOptions
+             ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_MatrixDiagOptions
+             ? static_cast<const circle::MatrixDiagOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::QuantizeOptions *builtin_options_as_QuantizeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_QuantizeOptions
+             ? static_cast<const circle::QuantizeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_MatrixSetDiagOptions
+             ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HardSwishOptions *builtin_options_as_HardSwishOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HardSwishOptions
+             ? static_cast<const circle::HardSwishOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::IfOptions *builtin_options_as_IfOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_IfOptions
+             ? static_cast<const circle::IfOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::WhileOptions *builtin_options_as_WhileOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_WhileOptions
+             ? static_cast<const circle::WhileOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_DepthToSpaceOptions
+             ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV4Options
+             ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options())
+             : nullptr;
+  }
+  const circle::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV5Options
+             ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options())
+             : nullptr;
   }
-  const FloorModOptions *builtin_options_as_FloorModOptions() const
+  const circle::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FloorModOptions
-               ? static_cast<const FloorModOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ScatterNdOptions
+             ? static_cast<const circle::ScatterNdOptions *>(builtin_options())
+             : nullptr;
   }
-  const RangeOptions *builtin_options_as_RangeOptions() const
+  const circle::SelectV2Options *builtin_options_as_SelectV2Options() const
   {
-    return builtin_options_type() == BuiltinOptions_RangeOptions
-               ? static_cast<const RangeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SelectV2Options
+             ? static_cast<const circle::SelectV2Options *>(builtin_options())
+             : nullptr;
   }
-  const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
+  const circle::DensifyOptions *builtin_options_as_DensifyOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
-               ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_DensifyOptions
+             ? static_cast<const circle::DensifyOptions *>(builtin_options())
+             : nullptr;
   }
-  const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+  const circle::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LeakyReluOptions
-               ? static_cast<const LeakyReluOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SegmentSumOptions
+             ? static_cast<const circle::SegmentSumOptions *>(builtin_options())
+             : nullptr;
   }
-  const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+  const circle::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
-               ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BatchMatMulOptions
+             ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
+             : nullptr;
   }
-  const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+  const circle::CumsumOptions *builtin_options_as_CumsumOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MirrorPadOptions
-               ? static_cast<const MirrorPadOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_CumsumOptions
+             ? static_cast<const circle::CumsumOptions *>(builtin_options())
+             : nullptr;
   }
-  const AbsOptions *builtin_options_as_AbsOptions() const
+  const circle::CallOnceOptions *builtin_options_as_CallOnceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_AbsOptions
-               ? static_cast<const AbsOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_CallOnceOptions
+             ? static_cast<const circle::CallOnceOptions *>(builtin_options())
+             : nullptr;
   }
-  const SplitVOptions *builtin_options_as_SplitVOptions() const
+  const circle::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SplitVOptions
-               ? static_cast<const SplitVOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BroadcastToOptions
+             ? static_cast<const circle::BroadcastToOptions *>(builtin_options())
+             : nullptr;
   }
-  const UniqueOptions *builtin_options_as_UniqueOptions() const
+  const circle::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_UniqueOptions
-               ? static_cast<const UniqueOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_Rfft2dOptions
+             ? static_cast<const circle::Rfft2dOptions *>(builtin_options())
+             : nullptr;
   }
-  const ReverseV2Options *builtin_options_as_ReverseV2Options() const
+  const circle::Conv3DOptions *builtin_options_as_Conv3DOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ReverseV2Options
-               ? static_cast<const ReverseV2Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_Conv3DOptions
+             ? static_cast<const circle::Conv3DOptions *>(builtin_options())
+             : nullptr;
   }
-  const AddNOptions *builtin_options_as_AddNOptions() const
+  const circle::HashtableOptions *builtin_options_as_HashtableOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_AddNOptions
-               ? static_cast<const AddNOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_HashtableOptions
+             ? static_cast<const circle::HashtableOptions *>(builtin_options())
+             : nullptr;
   }
-  const GatherNdOptions *builtin_options_as_GatherNdOptions() const
+  const circle::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_GatherNdOptions
-               ? static_cast<const GatherNdOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_HashtableFindOptions
+             ? static_cast<const circle::HashtableFindOptions *>(builtin_options())
+             : nullptr;
   }
-  const CosOptions *builtin_options_as_CosOptions() const
+  const circle::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_CosOptions
-               ? static_cast<const CosOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_HashtableImportOptions
+             ? static_cast<const circle::HashtableImportOptions *>(builtin_options())
+             : nullptr;
   }
-  const WhereOptions *builtin_options_as_WhereOptions() const
+  const circle::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_WhereOptions
-               ? static_cast<const WhereOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_HashtableSizeOptions
+             ? static_cast<const circle::HashtableSizeOptions *>(builtin_options())
+             : nullptr;
   }
-  const RankOptions *builtin_options_as_RankOptions() const
+  const circle::VarHandleOptions *builtin_options_as_VarHandleOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_RankOptions
-               ? static_cast<const RankOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_VarHandleOptions
+             ? static_cast<const circle::VarHandleOptions *>(builtin_options())
+             : nullptr;
   }
-  const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
+  const circle::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
-               ? static_cast<const ReverseSequenceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ReadVariableOptions
+             ? static_cast<const circle::ReadVariableOptions *>(builtin_options())
+             : nullptr;
   }
-  const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
+  const circle::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
-               ? static_cast<const MatrixDiagOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_AssignVariableOptions
+             ? static_cast<const circle::AssignVariableOptions *>(builtin_options())
+             : nullptr;
   }
-  const QuantizeOptions *builtin_options_as_QuantizeOptions() const
+  const circle::RandomOptions *builtin_options_as_RandomOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_QuantizeOptions
-               ? static_cast<const QuantizeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_RandomOptions
+             ? static_cast<const circle::RandomOptions *>(builtin_options())
+             : nullptr;
   }
-  const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
+  const circle::BucketizeOptions *builtin_options_as_BucketizeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
-               ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BucketizeOptions
+             ? static_cast<const circle::BucketizeOptions *>(builtin_options())
+             : nullptr;
   }
-  const HardSwishOptions *builtin_options_as_HardSwishOptions() const
+  const circle::GeluOptions *builtin_options_as_GeluOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_HardSwishOptions
-               ? static_cast<const HardSwishOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_GeluOptions
+             ? static_cast<const circle::GeluOptions *>(builtin_options())
+             : nullptr;
   }
-  const IfOptions *builtin_options_as_IfOptions() const
+  const circle::DynamicUpdateSliceOptions *builtin_options_as_DynamicUpdateSliceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_IfOptions
-               ? static_cast<const IfOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_DynamicUpdateSliceOptions
+             ? static_cast<const circle::DynamicUpdateSliceOptions *>(builtin_options())
+             : nullptr;
   }
-  const WhileOptions *builtin_options_as_WhileOptions() const
+  const circle::UnsortedSegmentProdOptions *builtin_options_as_UnsortedSegmentProdOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_WhileOptions
-               ? static_cast<const WhileOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_UnsortedSegmentProdOptions
+             ? static_cast<const circle::UnsortedSegmentProdOptions *>(builtin_options())
+             : nullptr;
   }
-  const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
+  const circle::UnsortedSegmentMaxOptions *builtin_options_as_UnsortedSegmentMaxOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
-               ? static_cast<const DepthToSpaceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_UnsortedSegmentMaxOptions
+             ? static_cast<const circle::UnsortedSegmentMaxOptions *>(builtin_options())
+             : nullptr;
   }
-  const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
+  const circle::UnsortedSegmentMinOptions *builtin_options_as_UnsortedSegmentMinOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
-               ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_UnsortedSegmentMinOptions
+             ? static_cast<const circle::UnsortedSegmentMinOptions *>(builtin_options())
+             : nullptr;
   }
-  const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
+  const circle::UnsortedSegmentSumOptions *builtin_options_as_UnsortedSegmentSumOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
-               ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_UnsortedSegmentSumOptions
+             ? static_cast<const circle::UnsortedSegmentSumOptions *>(builtin_options())
+             : nullptr;
   }
-  const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
+  const circle::ATan2Options *builtin_options_as_ATan2Options() const
   {
-    return builtin_options_type() == BuiltinOptions_ScatterNdOptions
-               ? static_cast<const ScatterNdOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_ATan2Options
+             ? static_cast<const circle::ATan2Options *>(builtin_options())
+             : nullptr;
   }
-  const SelectV2Options *builtin_options_as_SelectV2Options() const
+  const circle::SignOptions *builtin_options_as_SignOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SelectV2Options
-               ? static_cast<const SelectV2Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_SignOptions
+             ? static_cast<const circle::SignOptions *>(builtin_options())
+             : nullptr;
   }
-  const DensifyOptions *builtin_options_as_DensifyOptions() const
+  const circle::BitcastOptions *builtin_options_as_BitcastOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DensifyOptions
-               ? static_cast<const DensifyOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BitcastOptions
+             ? static_cast<const circle::BitcastOptions *>(builtin_options())
+             : nullptr;
   }
-  const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
+  const circle::BitwiseXorOptions *builtin_options_as_BitwiseXorOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SegmentSumOptions
-               ? static_cast<const SegmentSumOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BitwiseXorOptions
+             ? static_cast<const circle::BitwiseXorOptions *>(builtin_options())
+             : nullptr;
   }
-  const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
+  const circle::RightShiftOptions *builtin_options_as_RightShiftOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
-               ? static_cast<const BatchMatMulOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_RightShiftOptions
+             ? static_cast<const circle::RightShiftOptions *>(builtin_options())
+             : nullptr;
   }
-  const BCQGatherOptions *builtin_options_as_BCQGatherOptions() const
+  const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BCQGatherOptions
-               ? static_cast<const BCQGatherOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions
+             ? static_cast<const circle::BCQGatherOptions *>(builtin_options())
+             : nullptr;
   }
-  const BCQFullyConnectedOptions *builtin_options_as_BCQFullyConnectedOptions() const
+  const circle::BCQFullyConnectedOptions *builtin_options_as_BCQFullyConnectedOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BCQFullyConnectedOptions
-               ? static_cast<const BCQFullyConnectedOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_BCQFullyConnectedOptions
+             ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options())
+             : nullptr;
   }
-  const InstanceNormOptions *builtin_options_as_InstanceNormOptions() const
+  const circle::InstanceNormOptions *builtin_options_as_InstanceNormOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_InstanceNormOptions
-               ? static_cast<const InstanceNormOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == circle::BuiltinOptions_InstanceNormOptions
+             ? static_cast<const circle::InstanceNormOptions *>(builtin_options())
+             : nullptr;
   }
   const flatbuffers::Vector<uint8_t> *custom_options() const
   {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
-  CustomOptionsFormat custom_options_format() const
+  circle::CustomOptionsFormat custom_options_format() const
   {
-    return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+    return static_cast<circle::CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
   }
   const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
   {
@@ -8394,568 +10362,846 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   }
 };
 
-template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const
+template <>
+inline const circle::Conv2DOptions *Operator::builtin_options_as<circle::Conv2DOptions>() const
 {
   return builtin_options_as_Conv2DOptions();
 }
 
 template <>
-inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const
+inline const circle::DepthwiseConv2DOptions *
+Operator::builtin_options_as<circle::DepthwiseConv2DOptions>() const
 {
   return builtin_options_as_DepthwiseConv2DOptions();
 }
 
 template <>
-inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const
+inline const circle::ConcatEmbeddingsOptions *
+Operator::builtin_options_as<circle::ConcatEmbeddingsOptions>() const
 {
   return builtin_options_as_ConcatEmbeddingsOptions();
 }
 
 template <>
-inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const
+inline const circle::LSHProjectionOptions *
+Operator::builtin_options_as<circle::LSHProjectionOptions>() const
 {
   return builtin_options_as_LSHProjectionOptions();
 }
 
-template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const
+template <>
+inline const circle::Pool2DOptions *Operator::builtin_options_as<circle::Pool2DOptions>() const
 {
   return builtin_options_as_Pool2DOptions();
 }
 
-template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const
+template <>
+inline const circle::SVDFOptions *Operator::builtin_options_as<circle::SVDFOptions>() const
 {
   return builtin_options_as_SVDFOptions();
 }
 
-template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const
+template <>
+inline const circle::RNNOptions *Operator::builtin_options_as<circle::RNNOptions>() const
 {
   return builtin_options_as_RNNOptions();
 }
 
 template <>
-inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const
+inline const circle::FullyConnectedOptions *
+Operator::builtin_options_as<circle::FullyConnectedOptions>() const
 {
   return builtin_options_as_FullyConnectedOptions();
 }
 
-template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const
+template <>
+inline const circle::SoftmaxOptions *Operator::builtin_options_as<circle::SoftmaxOptions>() const
 {
   return builtin_options_as_SoftmaxOptions();
 }
 
 template <>
-inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const
+inline const circle::ConcatenationOptions *
+Operator::builtin_options_as<circle::ConcatenationOptions>() const
 {
   return builtin_options_as_ConcatenationOptions();
 }
 
-template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const
+template <>
+inline const circle::AddOptions *Operator::builtin_options_as<circle::AddOptions>() const
 {
   return builtin_options_as_AddOptions();
 }
 
-template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const
+template <>
+inline const circle::L2NormOptions *Operator::builtin_options_as<circle::L2NormOptions>() const
 {
   return builtin_options_as_L2NormOptions();
 }
 
 template <>
-inline const LocalResponseNormalizationOptions *
-Operator::builtin_options_as<LocalResponseNormalizationOptions>() const
+inline const circle::LocalResponseNormalizationOptions *
+Operator::builtin_options_as<circle::LocalResponseNormalizationOptions>() const
 {
   return builtin_options_as_LocalResponseNormalizationOptions();
 }
 
-template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const
+template <>
+inline const circle::LSTMOptions *Operator::builtin_options_as<circle::LSTMOptions>() const
 {
   return builtin_options_as_LSTMOptions();
 }
 
 template <>
-inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const
+inline const circle::ResizeBilinearOptions *
+Operator::builtin_options_as<circle::ResizeBilinearOptions>() const
 {
   return builtin_options_as_ResizeBilinearOptions();
 }
 
-template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const
+template <>
+inline const circle::CallOptions *Operator::builtin_options_as<circle::CallOptions>() const
 {
   return builtin_options_as_CallOptions();
 }
 
-template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const
+template <>
+inline const circle::ReshapeOptions *Operator::builtin_options_as<circle::ReshapeOptions>() const
 {
   return builtin_options_as_ReshapeOptions();
 }
 
-template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const
+template <>
+inline const circle::SkipGramOptions *Operator::builtin_options_as<circle::SkipGramOptions>() const
 {
   return builtin_options_as_SkipGramOptions();
 }
 
 template <>
-inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const
+inline const circle::SpaceToDepthOptions *
+Operator::builtin_options_as<circle::SpaceToDepthOptions>() const
 {
   return builtin_options_as_SpaceToDepthOptions();
 }
 
 template <>
-inline const EmbeddingLookupSparseOptions *
-Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const
+inline const circle::EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<circle::EmbeddingLookupSparseOptions>() const
 {
   return builtin_options_as_EmbeddingLookupSparseOptions();
 }
 
-template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const
+template <>
+inline const circle::MulOptions *Operator::builtin_options_as<circle::MulOptions>() const
 {
   return builtin_options_as_MulOptions();
 }
 
-template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const
+template <>
+inline const circle::PadOptions *Operator::builtin_options_as<circle::PadOptions>() const
 {
   return builtin_options_as_PadOptions();
 }
 
-template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const
+template <>
+inline const circle::GatherOptions *Operator::builtin_options_as<circle::GatherOptions>() const
 {
   return builtin_options_as_GatherOptions();
 }
 
 template <>
-inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const
+inline const circle::BatchToSpaceNDOptions *
+Operator::builtin_options_as<circle::BatchToSpaceNDOptions>() const
 {
   return builtin_options_as_BatchToSpaceNDOptions();
 }
 
 template <>
-inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const
+inline const circle::SpaceToBatchNDOptions *
+Operator::builtin_options_as<circle::SpaceToBatchNDOptions>() const
 {
   return builtin_options_as_SpaceToBatchNDOptions();
 }
 
-template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const
+template <>
+inline const circle::TransposeOptions *
+Operator::builtin_options_as<circle::TransposeOptions>() const
 {
   return builtin_options_as_TransposeOptions();
 }
 
-template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const
+template <>
+inline const circle::ReducerOptions *Operator::builtin_options_as<circle::ReducerOptions>() const
 {
   return builtin_options_as_ReducerOptions();
 }
 
-template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const
+template <>
+inline const circle::SubOptions *Operator::builtin_options_as<circle::SubOptions>() const
 {
   return builtin_options_as_SubOptions();
 }
 
-template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const
+template <>
+inline const circle::DivOptions *Operator::builtin_options_as<circle::DivOptions>() const
 {
   return builtin_options_as_DivOptions();
 }
 
-template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const
+template <>
+inline const circle::SqueezeOptions *Operator::builtin_options_as<circle::SqueezeOptions>() const
 {
   return builtin_options_as_SqueezeOptions();
 }
 
 template <>
-inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const
+inline const circle::SequenceRNNOptions *
+Operator::builtin_options_as<circle::SequenceRNNOptions>() const
 {
   return builtin_options_as_SequenceRNNOptions();
 }
 
 template <>
-inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const
+inline const circle::StridedSliceOptions *
+Operator::builtin_options_as<circle::StridedSliceOptions>() const
 {
   return builtin_options_as_StridedSliceOptions();
 }
 
-template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const
+template <>
+inline const circle::ExpOptions *Operator::builtin_options_as<circle::ExpOptions>() const
 {
   return builtin_options_as_ExpOptions();
 }
 
-template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const
+template <>
+inline const circle::TopKV2Options *Operator::builtin_options_as<circle::TopKV2Options>() const
 {
   return builtin_options_as_TopKV2Options();
 }
 
-template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const
+template <>
+inline const circle::SplitOptions *Operator::builtin_options_as<circle::SplitOptions>() const
 {
   return builtin_options_as_SplitOptions();
 }
 
-template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const
+template <>
+inline const circle::LogSoftmaxOptions *
+Operator::builtin_options_as<circle::LogSoftmaxOptions>() const
 {
   return builtin_options_as_LogSoftmaxOptions();
 }
 
-template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const
+template <>
+inline const circle::CastOptions *Operator::builtin_options_as<circle::CastOptions>() const
 {
   return builtin_options_as_CastOptions();
 }
 
-template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const
+template <>
+inline const circle::DequantizeOptions *
+Operator::builtin_options_as<circle::DequantizeOptions>() const
 {
   return builtin_options_as_DequantizeOptions();
 }
 
 template <>
-inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const
+inline const circle::MaximumMinimumOptions *
+Operator::builtin_options_as<circle::MaximumMinimumOptions>() const
 {
   return builtin_options_as_MaximumMinimumOptions();
 }
 
-template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const
+template <>
+inline const circle::ArgMaxOptions *Operator::builtin_options_as<circle::ArgMaxOptions>() const
 {
   return builtin_options_as_ArgMaxOptions();
 }
 
-template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const
+template <>
+inline const circle::LessOptions *Operator::builtin_options_as<circle::LessOptions>() const
 {
   return builtin_options_as_LessOptions();
 }
 
-template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const
+template <>
+inline const circle::NegOptions *Operator::builtin_options_as<circle::NegOptions>() const
 {
   return builtin_options_as_NegOptions();
 }
 
-template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const
+template <>
+inline const circle::PadV2Options *Operator::builtin_options_as<circle::PadV2Options>() const
 {
   return builtin_options_as_PadV2Options();
 }
 
-template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const
+template <>
+inline const circle::GreaterOptions *Operator::builtin_options_as<circle::GreaterOptions>() const
 {
   return builtin_options_as_GreaterOptions();
 }
 
 template <>
-inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const
+inline const circle::GreaterEqualOptions *
+Operator::builtin_options_as<circle::GreaterEqualOptions>() const
 {
   return builtin_options_as_GreaterEqualOptions();
 }
 
-template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const
+template <>
+inline const circle::LessEqualOptions *
+Operator::builtin_options_as<circle::LessEqualOptions>() const
 {
   return builtin_options_as_LessEqualOptions();
 }
 
-template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const
+template <>
+inline const circle::SelectOptions *Operator::builtin_options_as<circle::SelectOptions>() const
 {
   return builtin_options_as_SelectOptions();
 }
 
-template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const
+template <>
+inline const circle::SliceOptions *Operator::builtin_options_as<circle::SliceOptions>() const
 {
   return builtin_options_as_SliceOptions();
 }
 
 template <>
-inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const
+inline const circle::TransposeConvOptions *
+Operator::builtin_options_as<circle::TransposeConvOptions>() const
 {
   return builtin_options_as_TransposeConvOptions();
 }
 
 template <>
-inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const
+inline const circle::SparseToDenseOptions *
+Operator::builtin_options_as<circle::SparseToDenseOptions>() const
 {
   return builtin_options_as_SparseToDenseOptions();
 }
 
-template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const
+template <>
+inline const circle::TileOptions *Operator::builtin_options_as<circle::TileOptions>() const
 {
   return builtin_options_as_TileOptions();
 }
 
-template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const
+template <>
+inline const circle::ExpandDimsOptions *
+Operator::builtin_options_as<circle::ExpandDimsOptions>() const
 {
   return builtin_options_as_ExpandDimsOptions();
 }
 
-template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const
+template <>
+inline const circle::EqualOptions *Operator::builtin_options_as<circle::EqualOptions>() const
 {
   return builtin_options_as_EqualOptions();
 }
 
-template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const
+template <>
+inline const circle::NotEqualOptions *Operator::builtin_options_as<circle::NotEqualOptions>() const
 {
   return builtin_options_as_NotEqualOptions();
 }
 
-template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const
+template <>
+inline const circle::ShapeOptions *Operator::builtin_options_as<circle::ShapeOptions>() const
 {
   return builtin_options_as_ShapeOptions();
 }
 
-template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const
+template <>
+inline const circle::PowOptions *Operator::builtin_options_as<circle::PowOptions>() const
 {
   return builtin_options_as_PowOptions();
 }
 
-template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const
+template <>
+inline const circle::ArgMinOptions *Operator::builtin_options_as<circle::ArgMinOptions>() const
 {
   return builtin_options_as_ArgMinOptions();
 }
 
-template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const
+template <>
+inline const circle::FakeQuantOptions *
+Operator::builtin_options_as<circle::FakeQuantOptions>() const
 {
   return builtin_options_as_FakeQuantOptions();
 }
 
-template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const
+template <>
+inline const circle::PackOptions *Operator::builtin_options_as<circle::PackOptions>() const
 {
   return builtin_options_as_PackOptions();
 }
 
-template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const
+template <>
+inline const circle::LogicalOrOptions *
+Operator::builtin_options_as<circle::LogicalOrOptions>() const
 {
   return builtin_options_as_LogicalOrOptions();
 }
 
-template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const
+template <>
+inline const circle::OneHotOptions *Operator::builtin_options_as<circle::OneHotOptions>() const
 {
   return builtin_options_as_OneHotOptions();
 }
 
-template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const
+template <>
+inline const circle::LogicalAndOptions *
+Operator::builtin_options_as<circle::LogicalAndOptions>() const
 {
   return builtin_options_as_LogicalAndOptions();
 }
 
-template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const
+template <>
+inline const circle::LogicalNotOptions *
+Operator::builtin_options_as<circle::LogicalNotOptions>() const
 {
   return builtin_options_as_LogicalNotOptions();
 }
 
-template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const
+template <>
+inline const circle::UnpackOptions *Operator::builtin_options_as<circle::UnpackOptions>() const
 {
   return builtin_options_as_UnpackOptions();
 }
 
-template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const
+template <>
+inline const circle::FloorDivOptions *Operator::builtin_options_as<circle::FloorDivOptions>() const
 {
   return builtin_options_as_FloorDivOptions();
 }
 
-template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const
+template <>
+inline const circle::SquareOptions *Operator::builtin_options_as<circle::SquareOptions>() const
 {
   return builtin_options_as_SquareOptions();
 }
 
-template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const
+template <>
+inline const circle::ZerosLikeOptions *
+Operator::builtin_options_as<circle::ZerosLikeOptions>() const
 {
   return builtin_options_as_ZerosLikeOptions();
 }
 
-template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const
+template <>
+inline const circle::FillOptions *Operator::builtin_options_as<circle::FillOptions>() const
 {
   return builtin_options_as_FillOptions();
 }
 
 template <>
-inline const BidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const
+inline const circle::BidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<circle::BidirectionalSequenceLSTMOptions>() const
 {
   return builtin_options_as_BidirectionalSequenceLSTMOptions();
 }
 
 template <>
-inline const BidirectionalSequenceRNNOptions *
-Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const
+inline const circle::BidirectionalSequenceRNNOptions *
+Operator::builtin_options_as<circle::BidirectionalSequenceRNNOptions>() const
 {
   return builtin_options_as_BidirectionalSequenceRNNOptions();
 }
 
 template <>
-inline const UnidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const
+inline const circle::UnidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<circle::UnidirectionalSequenceLSTMOptions>() const
 {
   return builtin_options_as_UnidirectionalSequenceLSTMOptions();
 }
 
-template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const
+template <>
+inline const circle::FloorModOptions *Operator::builtin_options_as<circle::FloorModOptions>() const
 {
   return builtin_options_as_FloorModOptions();
 }
 
-template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const
+template <>
+inline const circle::RangeOptions *Operator::builtin_options_as<circle::RangeOptions>() const
 {
   return builtin_options_as_RangeOptions();
 }
 
 template <>
-inline const ResizeNearestNeighborOptions *
-Operator::builtin_options_as<ResizeNearestNeighborOptions>() const
+inline const circle::ResizeNearestNeighborOptions *
+Operator::builtin_options_as<circle::ResizeNearestNeighborOptions>() const
 {
   return builtin_options_as_ResizeNearestNeighborOptions();
 }
 
-template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const
+template <>
+inline const circle::LeakyReluOptions *
+Operator::builtin_options_as<circle::LeakyReluOptions>() const
 {
   return builtin_options_as_LeakyReluOptions();
 }
 
 template <>
-inline const SquaredDifferenceOptions *
-Operator::builtin_options_as<SquaredDifferenceOptions>() const
+inline const circle::SquaredDifferenceOptions *
+Operator::builtin_options_as<circle::SquaredDifferenceOptions>() const
 {
   return builtin_options_as_SquaredDifferenceOptions();
 }
 
-template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const
+template <>
+inline const circle::MirrorPadOptions *
+Operator::builtin_options_as<circle::MirrorPadOptions>() const
 {
   return builtin_options_as_MirrorPadOptions();
 }
 
-template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const
+template <>
+inline const circle::AbsOptions *Operator::builtin_options_as<circle::AbsOptions>() const
 {
   return builtin_options_as_AbsOptions();
 }
 
-template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const
+template <>
+inline const circle::SplitVOptions *Operator::builtin_options_as<circle::SplitVOptions>() const
 {
   return builtin_options_as_SplitVOptions();
 }
 
-template <> inline const UniqueOptions *Operator::builtin_options_as<UniqueOptions>() const
+template <>
+inline const circle::UniqueOptions *Operator::builtin_options_as<circle::UniqueOptions>() const
 {
   return builtin_options_as_UniqueOptions();
 }
 
-template <> inline const ReverseV2Options *Operator::builtin_options_as<ReverseV2Options>() const
+template <>
+inline const circle::ReverseV2Options *
+Operator::builtin_options_as<circle::ReverseV2Options>() const
 {
   return builtin_options_as_ReverseV2Options();
 }
 
-template <> inline const AddNOptions *Operator::builtin_options_as<AddNOptions>() const
+template <>
+inline const circle::AddNOptions *Operator::builtin_options_as<circle::AddNOptions>() const
 {
   return builtin_options_as_AddNOptions();
 }
 
-template <> inline const GatherNdOptions *Operator::builtin_options_as<GatherNdOptions>() const
+template <>
+inline const circle::GatherNdOptions *Operator::builtin_options_as<circle::GatherNdOptions>() const
 {
   return builtin_options_as_GatherNdOptions();
 }
 
-template <> inline const CosOptions *Operator::builtin_options_as<CosOptions>() const
+template <>
+inline const circle::CosOptions *Operator::builtin_options_as<circle::CosOptions>() const
 {
   return builtin_options_as_CosOptions();
 }
 
-template <> inline const WhereOptions *Operator::builtin_options_as<WhereOptions>() const
+template <>
+inline const circle::WhereOptions *Operator::builtin_options_as<circle::WhereOptions>() const
 {
   return builtin_options_as_WhereOptions();
 }
 
-template <> inline const RankOptions *Operator::builtin_options_as<RankOptions>() const
+template <>
+inline const circle::RankOptions *Operator::builtin_options_as<circle::RankOptions>() const
 {
   return builtin_options_as_RankOptions();
 }
 
 template <>
-inline const ReverseSequenceOptions *Operator::builtin_options_as<ReverseSequenceOptions>() const
+inline const circle::ReverseSequenceOptions *
+Operator::builtin_options_as<circle::ReverseSequenceOptions>() const
 {
   return builtin_options_as_ReverseSequenceOptions();
 }
 
-template <> inline const MatrixDiagOptions *Operator::builtin_options_as<MatrixDiagOptions>() const
+template <>
+inline const circle::MatrixDiagOptions *
+Operator::builtin_options_as<circle::MatrixDiagOptions>() const
 {
   return builtin_options_as_MatrixDiagOptions();
 }
 
-template <> inline const QuantizeOptions *Operator::builtin_options_as<QuantizeOptions>() const
+template <>
+inline const circle::QuantizeOptions *Operator::builtin_options_as<circle::QuantizeOptions>() const
 {
   return builtin_options_as_QuantizeOptions();
 }
 
 template <>
-inline const MatrixSetDiagOptions *Operator::builtin_options_as<MatrixSetDiagOptions>() const
+inline const circle::MatrixSetDiagOptions *
+Operator::builtin_options_as<circle::MatrixSetDiagOptions>() const
 {
   return builtin_options_as_MatrixSetDiagOptions();
 }
 
-template <> inline const HardSwishOptions *Operator::builtin_options_as<HardSwishOptions>() const
+template <>
+inline const circle::HardSwishOptions *
+Operator::builtin_options_as<circle::HardSwishOptions>() const
 {
   return builtin_options_as_HardSwishOptions();
 }
 
-template <> inline const IfOptions *Operator::builtin_options_as<IfOptions>() const
+template <> inline const circle::IfOptions *Operator::builtin_options_as<circle::IfOptions>() const
 {
   return builtin_options_as_IfOptions();
 }
 
-template <> inline const WhileOptions *Operator::builtin_options_as<WhileOptions>() const
+template <>
+inline const circle::WhileOptions *Operator::builtin_options_as<circle::WhileOptions>() const
 {
   return builtin_options_as_WhileOptions();
 }
 
 template <>
-inline const DepthToSpaceOptions *Operator::builtin_options_as<DepthToSpaceOptions>() const
+inline const circle::DepthToSpaceOptions *
+Operator::builtin_options_as<circle::DepthToSpaceOptions>() const
 {
   return builtin_options_as_DepthToSpaceOptions();
 }
 
 template <>
-inline const NonMaxSuppressionV4Options *
-Operator::builtin_options_as<NonMaxSuppressionV4Options>() const
+inline const circle::NonMaxSuppressionV4Options *
+Operator::builtin_options_as<circle::NonMaxSuppressionV4Options>() const
 {
   return builtin_options_as_NonMaxSuppressionV4Options();
 }
 
 template <>
-inline const NonMaxSuppressionV5Options *
-Operator::builtin_options_as<NonMaxSuppressionV5Options>() const
+inline const circle::NonMaxSuppressionV5Options *
+Operator::builtin_options_as<circle::NonMaxSuppressionV5Options>() const
 {
   return builtin_options_as_NonMaxSuppressionV5Options();
 }
 
-template <> inline const ScatterNdOptions *Operator::builtin_options_as<ScatterNdOptions>() const
+template <>
+inline const circle::ScatterNdOptions *
+Operator::builtin_options_as<circle::ScatterNdOptions>() const
 {
   return builtin_options_as_ScatterNdOptions();
 }
 
-template <> inline const SelectV2Options *Operator::builtin_options_as<SelectV2Options>() const
+template <>
+inline const circle::SelectV2Options *Operator::builtin_options_as<circle::SelectV2Options>() const
 {
   return builtin_options_as_SelectV2Options();
 }
 
-template <> inline const DensifyOptions *Operator::builtin_options_as<DensifyOptions>() const
+template <>
+inline const circle::DensifyOptions *Operator::builtin_options_as<circle::DensifyOptions>() const
 {
   return builtin_options_as_DensifyOptions();
 }
 
-template <> inline const SegmentSumOptions *Operator::builtin_options_as<SegmentSumOptions>() const
+template <>
+inline const circle::SegmentSumOptions *
+Operator::builtin_options_as<circle::SegmentSumOptions>() const
 {
   return builtin_options_as_SegmentSumOptions();
 }
 
 template <>
-inline const BatchMatMulOptions *Operator::builtin_options_as<BatchMatMulOptions>() const
+inline const circle::BatchMatMulOptions *
+Operator::builtin_options_as<circle::BatchMatMulOptions>() const
 {
   return builtin_options_as_BatchMatMulOptions();
 }
 
-template <> inline const BCQGatherOptions *Operator::builtin_options_as<BCQGatherOptions>() const
+template <>
+inline const circle::CumsumOptions *Operator::builtin_options_as<circle::CumsumOptions>() const
+{
+  return builtin_options_as_CumsumOptions();
+}
+
+template <>
+inline const circle::CallOnceOptions *Operator::builtin_options_as<circle::CallOnceOptions>() const
+{
+  return builtin_options_as_CallOnceOptions();
+}
+
+template <>
+inline const circle::BroadcastToOptions *
+Operator::builtin_options_as<circle::BroadcastToOptions>() const
+{
+  return builtin_options_as_BroadcastToOptions();
+}
+
+template <>
+inline const circle::Rfft2dOptions *Operator::builtin_options_as<circle::Rfft2dOptions>() const
+{
+  return builtin_options_as_Rfft2dOptions();
+}
+
+template <>
+inline const circle::Conv3DOptions *Operator::builtin_options_as<circle::Conv3DOptions>() const
+{
+  return builtin_options_as_Conv3DOptions();
+}
+
+template <>
+inline const circle::HashtableOptions *
+Operator::builtin_options_as<circle::HashtableOptions>() const
+{
+  return builtin_options_as_HashtableOptions();
+}
+
+template <>
+inline const circle::HashtableFindOptions *
+Operator::builtin_options_as<circle::HashtableFindOptions>() const
+{
+  return builtin_options_as_HashtableFindOptions();
+}
+
+template <>
+inline const circle::HashtableImportOptions *
+Operator::builtin_options_as<circle::HashtableImportOptions>() const
+{
+  return builtin_options_as_HashtableImportOptions();
+}
+
+template <>
+inline const circle::HashtableSizeOptions *
+Operator::builtin_options_as<circle::HashtableSizeOptions>() const
+{
+  return builtin_options_as_HashtableSizeOptions();
+}
+
+template <>
+inline const circle::VarHandleOptions *
+Operator::builtin_options_as<circle::VarHandleOptions>() const
+{
+  return builtin_options_as_VarHandleOptions();
+}
+
+template <>
+inline const circle::ReadVariableOptions *
+Operator::builtin_options_as<circle::ReadVariableOptions>() const
+{
+  return builtin_options_as_ReadVariableOptions();
+}
+
+template <>
+inline const circle::AssignVariableOptions *
+Operator::builtin_options_as<circle::AssignVariableOptions>() const
+{
+  return builtin_options_as_AssignVariableOptions();
+}
+
+template <>
+inline const circle::RandomOptions *Operator::builtin_options_as<circle::RandomOptions>() const
+{
+  return builtin_options_as_RandomOptions();
+}
+
+template <>
+inline const circle::BucketizeOptions *
+Operator::builtin_options_as<circle::BucketizeOptions>() const
+{
+  return builtin_options_as_BucketizeOptions();
+}
+
+template <>
+inline const circle::GeluOptions *Operator::builtin_options_as<circle::GeluOptions>() const
+{
+  return builtin_options_as_GeluOptions();
+}
+
+template <>
+inline const circle::DynamicUpdateSliceOptions *
+Operator::builtin_options_as<circle::DynamicUpdateSliceOptions>() const
+{
+  return builtin_options_as_DynamicUpdateSliceOptions();
+}
+
+template <>
+inline const circle::UnsortedSegmentProdOptions *
+Operator::builtin_options_as<circle::UnsortedSegmentProdOptions>() const
+{
+  return builtin_options_as_UnsortedSegmentProdOptions();
+}
+
+template <>
+inline const circle::UnsortedSegmentMaxOptions *
+Operator::builtin_options_as<circle::UnsortedSegmentMaxOptions>() const
+{
+  return builtin_options_as_UnsortedSegmentMaxOptions();
+}
+
+template <>
+inline const circle::UnsortedSegmentMinOptions *
+Operator::builtin_options_as<circle::UnsortedSegmentMinOptions>() const
+{
+  return builtin_options_as_UnsortedSegmentMinOptions();
+}
+
+template <>
+inline const circle::UnsortedSegmentSumOptions *
+Operator::builtin_options_as<circle::UnsortedSegmentSumOptions>() const
+{
+  return builtin_options_as_UnsortedSegmentSumOptions();
+}
+
+template <>
+inline const circle::ATan2Options *Operator::builtin_options_as<circle::ATan2Options>() const
+{
+  return builtin_options_as_ATan2Options();
+}
+
+template <>
+inline const circle::SignOptions *Operator::builtin_options_as<circle::SignOptions>() const
+{
+  return builtin_options_as_SignOptions();
+}
+
+template <>
+inline const circle::BitcastOptions *Operator::builtin_options_as<circle::BitcastOptions>() const
+{
+  return builtin_options_as_BitcastOptions();
+}
+
+template <>
+inline const circle::BitwiseXorOptions *
+Operator::builtin_options_as<circle::BitwiseXorOptions>() const
+{
+  return builtin_options_as_BitwiseXorOptions();
+}
+
+template <>
+inline const circle::RightShiftOptions *
+Operator::builtin_options_as<circle::RightShiftOptions>() const
+{
+  return builtin_options_as_RightShiftOptions();
+}
+
+template <>
+inline const circle::BCQGatherOptions *
+Operator::builtin_options_as<circle::BCQGatherOptions>() const
 {
   return builtin_options_as_BCQGatherOptions();
 }
 
 template <>
-inline const BCQFullyConnectedOptions *
-Operator::builtin_options_as<BCQFullyConnectedOptions>() const
+inline const circle::BCQFullyConnectedOptions *
+Operator::builtin_options_as<circle::BCQFullyConnectedOptions>() const
 {
   return builtin_options_as_BCQFullyConnectedOptions();
 }
 
 template <>
-inline const InstanceNormOptions *Operator::builtin_options_as<InstanceNormOptions>() const
+inline const circle::InstanceNormOptions *
+Operator::builtin_options_as<circle::InstanceNormOptions>() const
 {
   return builtin_options_as_InstanceNormOptions();
 }
 
 struct OperatorBuilder
 {
+  typedef Operator Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_opcode_index(uint32_t opcode_index)
@@ -8970,7 +11216,7 @@ struct OperatorBuilder
   {
     fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
   }
-  void add_builtin_options_type(BuiltinOptions builtin_options_type)
+  void add_builtin_options_type(circle::BuiltinOptions builtin_options_type)
   {
     fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
                              static_cast<uint8_t>(builtin_options_type), 0);
@@ -8983,13 +11229,13 @@ struct OperatorBuilder
   {
     fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
   }
-  void add_custom_options_format(CustomOptionsFormat custom_options_format)
+  void add_custom_options_format(circle::CustomOptionsFormat custom_options_format)
   {
     fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
                             static_cast<int8_t>(custom_options_format), 0);
   }
   void add_mutating_variable_inputs(
-      flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
   {
     fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
   }
@@ -9001,7 +11247,6 @@ struct OperatorBuilder
   {
     start_ = fbb_.StartTable();
   }
-  OperatorBuilder &operator=(const OperatorBuilder &);
   flatbuffers::Offset<Operator> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -9010,16 +11255,16 @@ struct OperatorBuilder
   }
 };
 
-inline flatbuffers::Offset<Operator>
-CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-               BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
-               flatbuffers::Offset<void> builtin_options = 0,
-               flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
-               CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
-               flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+inline flatbuffers::Offset<Operator> CreateOperator(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+  circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
 {
   OperatorBuilder builder_(_fbb);
   builder_.add_intermediates(intermediates);
@@ -9034,28 +11279,31 @@ CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<Operator>
-CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
-                     const std::vector<int32_t> *inputs = nullptr,
-                     const std::vector<int32_t> *outputs = nullptr,
-                     BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
-                     flatbuffers::Offset<void> builtin_options = 0,
-                     const std::vector<uint8_t> *custom_options = nullptr,
-                     CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
-                     const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
-                     const std::vector<int32_t> *intermediates = nullptr)
+inline flatbuffers::Offset<Operator> CreateOperatorDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  const std::vector<uint8_t> *custom_options = nullptr,
+  circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+  const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+  const std::vector<int32_t> *intermediates = nullptr)
 {
-  return circle::CreateOperator(
-      _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
-      custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
-      mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
-      intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
+  auto mutating_variable_inputs__ =
+    mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+  auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
+  return circle::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type,
+                                builtin_options, custom_options__, custom_options_format,
+                                mutating_variable_inputs__, intermediates__);
 }
 
 struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SubGraphBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TENSORS = 4,
     VT_INPUTS = 6,
@@ -9064,9 +11312,9 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_NAME = 12,
     VT_DATA_FORMAT = 14
   };
-  const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const
+  const flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>> *tensors() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>> *>(VT_TENSORS);
   }
   const flatbuffers::Vector<int32_t> *inputs() const
   {
@@ -9076,17 +11324,18 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const
+  const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *operators() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *>(
+      VT_OPERATORS);
   }
   const flatbuffers::String *name() const
   {
     return GetPointer<const flatbuffers::String *>(VT_NAME);
   }
-  DataFormat data_format() const
+  circle::DataFormat data_format() const
   {
-    return static_cast<DataFormat>(GetField<int8_t>(VT_DATA_FORMAT, 0));
+    return static_cast<circle::DataFormat>(GetField<int8_t>(VT_DATA_FORMAT, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -9103,9 +11352,11 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SubGraphBuilder
 {
+  typedef SubGraph Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors)
+  void
+  add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors)
   {
     fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
   }
@@ -9117,8 +11368,8 @@ struct SubGraphBuilder
   {
     fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
   }
-  void
-  add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators)
+  void add_operators(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators)
   {
     fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
   }
@@ -9126,7 +11377,7 @@ struct SubGraphBuilder
   {
     fbb_.AddOffset(SubGraph::VT_NAME, name);
   }
-  void add_data_format(DataFormat data_format)
+  void add_data_format(circle::DataFormat data_format)
   {
     fbb_.AddElement<int8_t>(SubGraph::VT_DATA_FORMAT, static_cast<int8_t>(data_format), 0);
   }
@@ -9134,7 +11385,6 @@ struct SubGraphBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SubGraphBuilder &operator=(const SubGraphBuilder &);
   flatbuffers::Offset<SubGraph> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -9144,13 +11394,13 @@ struct SubGraphBuilder
 };
 
 inline flatbuffers::Offset<SubGraph> CreateSubGraph(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
-    flatbuffers::Offset<flatbuffers::String> name = 0,
-    DataFormat data_format = DataFormat_CHANNELS_LAST)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0,
+  flatbuffers::Offset<flatbuffers::String> name = 0,
+  circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
 {
   SubGraphBuilder builder_(_fbb);
   builder_.add_name(name);
@@ -9162,25 +11412,27 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph(
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<SubGraph>
-CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
-                     const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
-                     const std::vector<int32_t> *inputs = nullptr,
-                     const std::vector<int32_t> *outputs = nullptr,
-                     const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
-                     const char *name = nullptr, DataFormat data_format = DataFormat_CHANNELS_LAST)
+inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr,
+  const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
 {
-  return circle::CreateSubGraph(
-      _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
-      inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
-      operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
-      name ? _fbb.CreateString(name) : 0, data_format);
+  auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<circle::Tensor>>(*tensors) : 0;
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto operators__ =
+    operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return circle::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__,
+                                data_format);
 }
 
 struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BufferBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_DATA = 4
   };
@@ -9197,6 +11449,7 @@ struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct BufferBuilder
 {
+  typedef Buffer Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
@@ -9207,7 +11460,6 @@ struct BufferBuilder
   {
     start_ = fbb_.StartTable();
   }
-  BufferBuilder &operator=(const BufferBuilder &);
   flatbuffers::Offset<Buffer> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -9228,12 +11480,18 @@ CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
 inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
                                                       const std::vector<uint8_t> *data = nullptr)
 {
-  return circle::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0);
+  if (data)
+  {
+    _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16);
+  }
+  auto data__ = data ? _fbb.CreateVector<uint8_t>(*data) : 0;
+  return circle::CreateBuffer(_fbb, data__);
 }
 
 struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef MetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NAME = 4,
     VT_BUFFER = 6
@@ -9253,6 +11511,7 @@ struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MetadataBuilder
 {
+  typedef Metadata Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_name(flatbuffers::Offset<flatbuffers::String> name)
@@ -9264,7 +11523,6 @@ struct MetadataBuilder
   {
     start_ = fbb_.StartTable();
   }
-  MetadataBuilder &operator=(const MetadataBuilder &);
   flatbuffers::Offset<Metadata> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -9287,12 +11545,176 @@ inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBuffe
                                                           const char *name = nullptr,
                                                           uint32_t buffer = 0)
 {
-  return circle::CreateMetadata(_fbb, name ? _fbb.CreateString(name) : 0, buffer);
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return circle::CreateMetadata(_fbb, name__, buffer);
+}
+
+struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TensorMapBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_TENSOR_INDEX = 6
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) &&
+           verifier.EndTable();
+  }
+};
+
+struct TensorMapBuilder
+{
+  typedef TensorMap Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(TensorMap::VT_NAME, name);
+  }
+  void add_tensor_index(uint32_t tensor_index)
+  {
+    fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
+  }
+  explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TensorMap> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TensorMap>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb,
+                flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0)
+{
+  TensorMapBuilder builder_(_fbb);
+  builder_.add_tensor_index(tensor_index);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                                                            const char *name = nullptr,
+                                                            uint32_t tensor_index = 0)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return circle::CreateTensorMap(_fbb, name__, tensor_index);
+}
+
+struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SignatureDefBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INPUTS = 4,
+    VT_OUTPUTS = 6,
+    VT_SIGNATURE_KEY = 8,
+    VT_SUBGRAPH_INDEX = 12
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *inputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>(
+      VT_INPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *outputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>(
+      VT_OUTPUTS);
+  }
+  const flatbuffers::String *signature_key() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY);
+  }
+  uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) &&
+           verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+           verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) &&
+           verifier.VerifyString(signature_key()) &&
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable();
+  }
+};
+
+struct SignatureDefBuilder
+{
+  typedef SignatureDef Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_inputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
+  }
+  void add_outputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
+  }
+  void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key)
+  {
+    fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key);
+  }
+  void add_subgraph_index(uint32_t subgraph_index)
+  {
+    fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
+  }
+  explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SignatureDef> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SignatureDef>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs = 0,
+  flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0)
+{
+  SignatureDefBuilder builder_(_fbb);
+  builder_.add_subgraph_index(subgraph_index);
+  builder_.add_signature_key(signature_key);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<circle::TensorMap>> *inputs = nullptr,
+  const std::vector<flatbuffers::Offset<circle::TensorMap>> *outputs = nullptr,
+  const char *signature_key = nullptr, uint32_t subgraph_index = 0)
+{
+  auto inputs__ = inputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*inputs) : 0;
+  auto outputs__ =
+    outputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*outputs) : 0;
+  auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0;
+  return circle::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__, subgraph_index);
 }
 
 struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ModelBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VERSION = 4,
     VT_OPERATOR_CODES = 6,
@@ -9300,33 +11722,41 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_DESCRIPTION = 10,
     VT_BUFFERS = 12,
     VT_METADATA_BUFFER = 14,
-    VT_METADATA = 16
+    VT_METADATA = 16,
+    VT_SIGNATURE_DEFS = 18
   };
   uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
-  const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
+  const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
-        VT_OPERATOR_CODES);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *>(
+      VT_OPERATOR_CODES);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
+  const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *>(
+      VT_SUBGRAPHS);
   }
   const flatbuffers::String *description() const
   {
     return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const
+  const flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>> *buffers() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>> *>(VT_BUFFERS);
   }
   const flatbuffers::Vector<int32_t> *metadata_buffer() const
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *metadata() const
+  const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *metadata() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>(
+      VT_METADATA);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *>(VT_METADATA);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *>(
+      VT_SIGNATURE_DEFS);
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -9339,22 +11769,26 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
            verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
            VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
            VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
-           verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable();
+           verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
+           verifier.VerifyVector(signature_defs()) &&
+           verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable();
   }
 };
 
 struct ModelBuilder
 {
+  typedef Model Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
   void add_operator_codes(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+      operator_codes)
   {
     fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
   }
-  void
-  add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs)
+  void add_subgraphs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs)
   {
     fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
   }
@@ -9362,7 +11796,8 @@ struct ModelBuilder
   {
     fbb_.AddOffset(Model::VT_DESCRIPTION, description);
   }
-  void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers)
+  void
+  add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers)
   {
     fbb_.AddOffset(Model::VT_BUFFERS, buffers);
   }
@@ -9370,16 +11805,21 @@ struct ModelBuilder
   {
     fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
   }
-  void
-  add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata)
+  void add_metadata(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata)
   {
     fbb_.AddOffset(Model::VT_METADATA, metadata);
   }
+  void add_signature_defs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>>
+      signature_defs)
+  {
+    fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
+  }
   explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ModelBuilder &operator=(const ModelBuilder &);
   flatbuffers::Offset<Model> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -9389,15 +11829,19 @@ struct ModelBuilder
 };
 
 inline flatbuffers::Offset<Model> CreateModel(
-    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
-    flatbuffers::Offset<flatbuffers::String> description = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+    operator_codes = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0,
+  flatbuffers::Offset<flatbuffers::String> description = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>>
+    signature_defs = 0)
 {
   ModelBuilder builder_(_fbb);
+  builder_.add_signature_defs(signature_defs);
   builder_.add_metadata(metadata);
   builder_.add_metadata_buffer(metadata_buffer);
   builder_.add_buffers(buffers);
@@ -9408,23 +11852,31 @@ inline flatbuffers::Offset<Model> CreateModel(
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<Model>
-CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-                  const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
-                  const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
-                  const char *description = nullptr,
-                  const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr,
-                  const std::vector<int32_t> *metadata_buffer = nullptr,
-                  const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
-{
-  return circle::CreateModel(
-      _fbb, version,
-      operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
-      subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
-      description ? _fbb.CreateString(description) : 0,
-      buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
-      metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
-      metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+inline flatbuffers::Offset<Model> CreateModelDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr,
+  const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr,
+  const char *description = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
+  const std::vector<int32_t> *metadata_buffer = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr,
+  const std::vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs = nullptr)
+{
+  auto operator_codes__ =
+    operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
+                   : 0;
+  auto subgraphs__ =
+    subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0;
+  auto description__ = description ? _fbb.CreateString(description) : 0;
+  auto buffers__ = buffers ? _fbb.CreateVector<flatbuffers::Offset<circle::Buffer>>(*buffers) : 0;
+  auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
+  auto metadata__ =
+    metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
+  auto signature_defs__ =
+    signature_defs ? _fbb.CreateVector<flatbuffers::Offset<circle::SignatureDef>>(*signature_defs)
+                   : 0;
+  return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__,
+                             metadata_buffer__, metadata__, signature_defs__);
 }
 
 inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
@@ -9438,11 +11890,11 @@ inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const voi
     }
     case QuantizationDetails_CustomQuantization:
     {
-      auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
+      auto ptr = reinterpret_cast<const circle::CustomQuantization *>(obj);
       return verifier.VerifyTable(ptr);
     }
     default:
-      return false;
+      return true;
   }
 }
 
@@ -9477,21 +11929,21 @@ inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void
     }
     case SparseIndexVector_Int32Vector:
     {
-      auto ptr = reinterpret_cast<const Int32Vector *>(obj);
+      auto ptr = reinterpret_cast<const circle::Int32Vector *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case SparseIndexVector_Uint16Vector:
     {
-      auto ptr = reinterpret_cast<const Uint16Vector *>(obj);
+      auto ptr = reinterpret_cast<const circle::Uint16Vector *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case SparseIndexVector_Uint8Vector:
     {
-      auto ptr = reinterpret_cast<const Uint8Vector *>(obj);
+      auto ptr = reinterpret_cast<const circle::Uint8Vector *>(obj);
       return verifier.VerifyTable(ptr);
     }
     default:
-      return false;
+      return true;
   }
 }
 
@@ -9525,526 +11977,651 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
     }
     case BuiltinOptions_Conv2DOptions:
     {
-      auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::Conv2DOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DepthwiseConv2DOptions:
     {
-      auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::DepthwiseConv2DOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ConcatEmbeddingsOptions:
     {
-      auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ConcatEmbeddingsOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LSHProjectionOptions:
     {
-      auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LSHProjectionOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_Pool2DOptions:
     {
-      auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::Pool2DOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SVDFOptions:
     {
-      auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SVDFOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_RNNOptions:
     {
-      auto ptr = reinterpret_cast<const RNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::RNNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FullyConnectedOptions:
     {
-      auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::FullyConnectedOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SoftmaxOptions:
     {
-      auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SoftmaxOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ConcatenationOptions:
     {
-      auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ConcatenationOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_AddOptions:
     {
-      auto ptr = reinterpret_cast<const AddOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::AddOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_L2NormOptions:
     {
-      auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::L2NormOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LocalResponseNormalizationOptions:
     {
-      auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LocalResponseNormalizationOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LSTMOptions:
     {
-      auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LSTMOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ResizeBilinearOptions:
     {
-      auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ResizeBilinearOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_CallOptions:
     {
-      auto ptr = reinterpret_cast<const CallOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::CallOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ReshapeOptions:
     {
-      auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ReshapeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SkipGramOptions:
     {
-      auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SkipGramOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SpaceToDepthOptions:
     {
-      auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SpaceToDepthOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_EmbeddingLookupSparseOptions:
     {
-      auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::EmbeddingLookupSparseOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MulOptions:
     {
-      auto ptr = reinterpret_cast<const MulOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::MulOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_PadOptions:
     {
-      auto ptr = reinterpret_cast<const PadOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::PadOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_GatherOptions:
     {
-      auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::GatherOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BatchToSpaceNDOptions:
     {
-      auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::BatchToSpaceNDOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SpaceToBatchNDOptions:
     {
-      auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SpaceToBatchNDOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_TransposeOptions:
     {
-      auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::TransposeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ReducerOptions:
     {
-      auto ptr = reinterpret_cast<const ReducerOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ReducerOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SubOptions:
     {
-      auto ptr = reinterpret_cast<const SubOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SubOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DivOptions:
     {
-      auto ptr = reinterpret_cast<const DivOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::DivOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SqueezeOptions:
     {
-      auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SqueezeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SequenceRNNOptions:
     {
-      auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SequenceRNNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_StridedSliceOptions:
     {
-      auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::StridedSliceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ExpOptions:
     {
-      auto ptr = reinterpret_cast<const ExpOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ExpOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_TopKV2Options:
     {
-      auto ptr = reinterpret_cast<const TopKV2Options *>(obj);
+      auto ptr = reinterpret_cast<const circle::TopKV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SplitOptions:
     {
-      auto ptr = reinterpret_cast<const SplitOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SplitOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LogSoftmaxOptions:
     {
-      auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LogSoftmaxOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_CastOptions:
     {
-      auto ptr = reinterpret_cast<const CastOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::CastOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DequantizeOptions:
     {
-      auto ptr = reinterpret_cast<const DequantizeOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::DequantizeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MaximumMinimumOptions:
     {
-      auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::MaximumMinimumOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ArgMaxOptions:
     {
-      auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ArgMaxOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LessOptions:
     {
-      auto ptr = reinterpret_cast<const LessOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LessOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_NegOptions:
     {
-      auto ptr = reinterpret_cast<const NegOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::NegOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_PadV2Options:
     {
-      auto ptr = reinterpret_cast<const PadV2Options *>(obj);
+      auto ptr = reinterpret_cast<const circle::PadV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_GreaterOptions:
     {
-      auto ptr = reinterpret_cast<const GreaterOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::GreaterOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_GreaterEqualOptions:
     {
-      auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::GreaterEqualOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LessEqualOptions:
     {
-      auto ptr = reinterpret_cast<const LessEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LessEqualOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SelectOptions:
     {
-      auto ptr = reinterpret_cast<const SelectOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SelectOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SliceOptions:
     {
-      auto ptr = reinterpret_cast<const SliceOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SliceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_TransposeConvOptions:
     {
-      auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::TransposeConvOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SparseToDenseOptions:
     {
-      auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SparseToDenseOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_TileOptions:
     {
-      auto ptr = reinterpret_cast<const TileOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::TileOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ExpandDimsOptions:
     {
-      auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ExpandDimsOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_EqualOptions:
     {
-      auto ptr = reinterpret_cast<const EqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::EqualOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_NotEqualOptions:
     {
-      auto ptr = reinterpret_cast<const NotEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::NotEqualOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ShapeOptions:
     {
-      auto ptr = reinterpret_cast<const ShapeOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ShapeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_PowOptions:
     {
-      auto ptr = reinterpret_cast<const PowOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::PowOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ArgMinOptions:
     {
-      auto ptr = reinterpret_cast<const ArgMinOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ArgMinOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FakeQuantOptions:
     {
-      auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::FakeQuantOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_PackOptions:
     {
-      auto ptr = reinterpret_cast<const PackOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::PackOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LogicalOrOptions:
     {
-      auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LogicalOrOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_OneHotOptions:
     {
-      auto ptr = reinterpret_cast<const OneHotOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::OneHotOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LogicalAndOptions:
     {
-      auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LogicalAndOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LogicalNotOptions:
     {
-      auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LogicalNotOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_UnpackOptions:
     {
-      auto ptr = reinterpret_cast<const UnpackOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::UnpackOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FloorDivOptions:
     {
-      auto ptr = reinterpret_cast<const FloorDivOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::FloorDivOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SquareOptions:
     {
-      auto ptr = reinterpret_cast<const SquareOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SquareOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ZerosLikeOptions:
     {
-      auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ZerosLikeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FillOptions:
     {
-      auto ptr = reinterpret_cast<const FillOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::FillOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BidirectionalSequenceLSTMOptions:
     {
-      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::BidirectionalSequenceLSTMOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BidirectionalSequenceRNNOptions:
     {
-      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::BidirectionalSequenceRNNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
     {
-      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::UnidirectionalSequenceLSTMOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FloorModOptions:
     {
-      auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::FloorModOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_RangeOptions:
     {
-      auto ptr = reinterpret_cast<const RangeOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::RangeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ResizeNearestNeighborOptions:
     {
-      auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ResizeNearestNeighborOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LeakyReluOptions:
     {
-      auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::LeakyReluOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SquaredDifferenceOptions:
     {
-      auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SquaredDifferenceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MirrorPadOptions:
     {
-      auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::MirrorPadOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_AbsOptions:
     {
-      auto ptr = reinterpret_cast<const AbsOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::AbsOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SplitVOptions:
     {
-      auto ptr = reinterpret_cast<const SplitVOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SplitVOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_UniqueOptions:
     {
-      auto ptr = reinterpret_cast<const UniqueOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::UniqueOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ReverseV2Options:
     {
-      auto ptr = reinterpret_cast<const ReverseV2Options *>(obj);
+      auto ptr = reinterpret_cast<const circle::ReverseV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_AddNOptions:
     {
-      auto ptr = reinterpret_cast<const AddNOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::AddNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_GatherNdOptions:
     {
-      auto ptr = reinterpret_cast<const GatherNdOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::GatherNdOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_CosOptions:
     {
-      auto ptr = reinterpret_cast<const CosOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::CosOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_WhereOptions:
     {
-      auto ptr = reinterpret_cast<const WhereOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::WhereOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_RankOptions:
     {
-      auto ptr = reinterpret_cast<const RankOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::RankOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ReverseSequenceOptions:
     {
-      auto ptr = reinterpret_cast<const ReverseSequenceOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ReverseSequenceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MatrixDiagOptions:
     {
-      auto ptr = reinterpret_cast<const MatrixDiagOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::MatrixDiagOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_QuantizeOptions:
     {
-      auto ptr = reinterpret_cast<const QuantizeOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::QuantizeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MatrixSetDiagOptions:
     {
-      auto ptr = reinterpret_cast<const MatrixSetDiagOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::MatrixSetDiagOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_HardSwishOptions:
     {
-      auto ptr = reinterpret_cast<const HardSwishOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::HardSwishOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_IfOptions:
     {
-      auto ptr = reinterpret_cast<const IfOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::IfOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_WhileOptions:
     {
-      auto ptr = reinterpret_cast<const WhileOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::WhileOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DepthToSpaceOptions:
     {
-      auto ptr = reinterpret_cast<const DepthToSpaceOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::DepthToSpaceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_NonMaxSuppressionV4Options:
     {
-      auto ptr = reinterpret_cast<const NonMaxSuppressionV4Options *>(obj);
+      auto ptr = reinterpret_cast<const circle::NonMaxSuppressionV4Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_NonMaxSuppressionV5Options:
     {
-      auto ptr = reinterpret_cast<const NonMaxSuppressionV5Options *>(obj);
+      auto ptr = reinterpret_cast<const circle::NonMaxSuppressionV5Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ScatterNdOptions:
     {
-      auto ptr = reinterpret_cast<const ScatterNdOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::ScatterNdOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SelectV2Options:
     {
-      auto ptr = reinterpret_cast<const SelectV2Options *>(obj);
+      auto ptr = reinterpret_cast<const circle::SelectV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DensifyOptions:
     {
-      auto ptr = reinterpret_cast<const DensifyOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::DensifyOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SegmentSumOptions:
     {
-      auto ptr = reinterpret_cast<const SegmentSumOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::SegmentSumOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BatchMatMulOptions:
     {
-      auto ptr = reinterpret_cast<const BatchMatMulOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::BatchMatMulOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CumsumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CumsumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CallOnceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BroadcastToOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Rfft2dOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Conv3DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableFindOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableImportOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableSizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::VarHandleOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReadVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AssignVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RandomOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BucketizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BucketizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GeluOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::GeluOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DynamicUpdateSliceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::DynamicUpdateSliceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentProdOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnsortedSegmentProdOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentMaxOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnsortedSegmentMaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentMinOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnsortedSegmentMinOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentSumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::UnsortedSegmentSumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ATan2Options:
+    {
+      auto ptr = reinterpret_cast<const circle::ATan2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SignOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::SignOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BitcastOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BitcastOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BitwiseXorOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BitwiseXorOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RightShiftOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RightShiftOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BCQGatherOptions:
     {
-      auto ptr = reinterpret_cast<const BCQGatherOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::BCQGatherOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BCQFullyConnectedOptions:
     {
-      auto ptr = reinterpret_cast<const BCQFullyConnectedOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::BCQFullyConnectedOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_InstanceNormOptions:
     {
-      auto ptr = reinterpret_cast<const InstanceNormOptions *>(obj);
+      auto ptr = reinterpret_cast<const circle::InstanceNormOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     default:
-      return false;
+      return true;
   }
 }
 
diff --git a/runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc b/runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc
deleted file mode 100644
index 15a279a7e..000000000
--- a/runtime/onert/frontend/nnapi/ANeuralNetworksModel.test.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "wrapper/ANeuralNetworksModel.h"
-
-TEST(MODEL, model_build)
-{
-  ANeuralNetworksModel model;
-  ASSERT_EQ(model.isFinished(), false);
-}
diff --git a/runtime/onert/frontend/nnapi/CMakeLists.txt b/runtime/onert/frontend/nnapi/CMakeLists.txt
index dafd84ccf..b66b32e89 100644
--- a/runtime/onert/frontend/nnapi/CMakeLists.txt
+++ b/runtime/onert/frontend/nnapi/CMakeLists.txt
@@ -24,4 +24,4 @@ target_link_libraries(test_onert_frontend_nnapi PRIVATE ${LIB_ONERT} dl)
 target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest)
 target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest_main)
 
-install(TARGETS test_onert_frontend_nnapi DESTINATION unittest_standalone)
+install(TARGETS test_onert_frontend_nnapi DESTINATION unittest)
diff --git a/runtime/onert/frontend/nnapi/compilation.cc b/runtime/onert/frontend/nnapi/compilation.cc
index 871c040ef..2c56f061a 100644
--- a/runtime/onert/frontend/nnapi/compilation.cc
+++ b/runtime/onert/frontend/nnapi/compilation.cc
@@ -58,7 +58,7 @@ int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
     return ANEURALNETWORKS_UNEXPECTED_NULL;
   }
 
-  if (compilation->state() != ::onert::compiler::State::CREATED)
+  if (compilation->isFinished())
   {
     VERBOSE(NNAPI::Compilation) << "finish: Already finished" << std::endl;
     return ANEURALNETWORKS_BAD_STATE;
@@ -87,7 +87,7 @@ int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compila
     return ANEURALNETWORKS_UNEXPECTED_NULL;
   }
 
-  if (compilation->state() != ::onert::compiler::State::CREATED)
+  if (compilation->isFinished())
   {
     VERBOSE(NNAPI::Compilation) << "setPreference: Already finished" << std::endl;
     return ANEURALNETWORKS_BAD_STATE;
diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc
index ce7da579e..4e1a985f3 100644
--- a/runtime/onert/frontend/nnapi/execution.cc
+++ b/runtime/onert/frontend/nnapi/execution.cc
@@ -37,7 +37,7 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
     return ANEURALNETWORKS_UNEXPECTED_NULL;
   }
 
-  std::shared_ptr<onert::exec::ExecutorMap> executors;
+  std::shared_ptr<onert::exec::IExecutors> executors;
 
   compilation->publish(executors);
 
@@ -94,12 +94,36 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32
 
   // Omitted optional input
   // LSTM operation's some inputs can be optional input
+  // Transpose operation's permutation input can be optional input
   if ((buffer == nullptr) && (length == 0))
   {
+    uint32_t dims[1] = {0};
+    ANeuralNetworksOperandType compared_shape;
+    compared_shape.dimensionCount = 1;
+    compared_shape.dimensions = dims;
     if (execution->hasUnspecifiedDims(operand_index))
     {
       return ANEURALNETWORKS_NO_ERROR;
     }
+    else if (type == nullptr && execution->IsOptionalInput(operand_index))
+    {
+      if (!execution->setOptionalInput(index, type, buffer, length))
+      {
+        VERBOSE(NNAPI::Execution) << "setInput: Fail to set optional input" << std::endl;
+        return ANEURALNETWORKS_BAD_DATA;
+      }
+      return ANEURALNETWORKS_NO_ERROR;
+    }
+    // TODO Changes the condition to check zero sized
+    else if (execution->compareShape(&compared_shape, operand_index))
+    {
+      if (!execution->setInput(index, type, buffer, length))
+      {
+        VERBOSE(NNAPI::Execution) << "setInput: Fail to set input" << std::endl;
+        return ANEURALNETWORKS_BAD_DATA;
+      }
+      return ANEURALNETWORKS_NO_ERROR;
+    }
     else
     {
       VERBOSE(NNAPI::Execution) << "setInput: Cannot handle fully-specified shape on model build "
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
index 81cd38f4f..3b5edc180 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
@@ -18,21 +18,23 @@
 
 #include "util/logging.h"
 
+using namespace onert;
+
 // TODO Support multiple subgraphs
 ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept
-    : _subgraphs{model->getSubGraphs()}, _compiler{new onert::compiler::Compiler{_subgraphs}}
+  : _model{model->getModel()}, _coptions{compiler::CompilerOptions::fromGlobalConfig()},
+    _compiler{std::make_shared<compiler::Compiler>(_model, *_coptions)}
 {
   if (model->allowedToFp16())
-  {
-    _compiler->enableToFp16();
-  }
+    _coptions->enableToFp16();
 }
 
 bool ANeuralNetworksCompilation::finish() noexcept
 {
   try
   {
-    _executors = _compiler->compile();
+    _artifact = _compiler->compile();
+    _compiler = nullptr;
   }
   catch (const std::exception &e)
   {
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
index 5f0650b9a..3898f1d5e 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
@@ -21,8 +21,9 @@
 
 #include "compiler/Compiler.h"
 #include "ir/Graph.h"
-#include "ir/Subgraphs.h"
-#include "exec/IExecutor.h"
+#include "ir/Model.h"
+#include "exec/IExecutors.h"
+#include "util/TracingCtx.h"
 
 struct ANeuralNetworksCompilation
 {
@@ -31,17 +32,18 @@ public:
 
 public:
   bool finish() noexcept;
+  bool isFinished() noexcept { return _compiler == nullptr; }
 
-  onert::compiler::State state(void) noexcept { return _compiler->state(); }
-  void publish(std::shared_ptr<onert::exec::ExecutorMap> &executors) noexcept
+  void publish(std::shared_ptr<onert::exec::IExecutors> &executors) noexcept
   {
-    executors = _executors;
+    executors = _artifact ? _artifact->_executors : nullptr;
   }
 
 private:
-  std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
+  std::shared_ptr<onert::ir::Model> _model;
+  std::unique_ptr<onert::compiler::CompilerOptions> _coptions;
   std::shared_ptr<onert::compiler::Compiler> _compiler;
-  std::shared_ptr<onert::exec::ExecutorMap> _executors;
+  std::shared_ptr<onert::compiler::CompilerArtifact> _artifact;
 };
 
 #endif
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
index 2bea729be..b0ea51917 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
@@ -20,7 +20,7 @@
 #include "util/logging.h"
 
 ANeuralNetworksEvent::ANeuralNetworksEvent(const std::shared_ptr<onert::exec::Execution> &execution)
-    : _execution{execution}
+  : _execution{execution}
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
index eb12d7e76..2265e990f 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
@@ -64,7 +64,7 @@ bool ANeuralNetworksExecution::compareDataType(const ANeuralNetworksOperandType
 {
   try
   {
-    const auto operand_type = _execution->primary_subgraph().operands().at(index).typeInfo();
+    const auto &operand_type = _execution->primary_subgraph().operands().at(index).typeInfo();
     const auto typeInfo = NNAPIConvert::getTypeInfo(type);
 
     if (operand_type != typeInfo)
@@ -98,9 +98,20 @@ bool ANeuralNetworksExecution::compareShape(const ANeuralNetworksOperandType *ty
   return operand_shape == shape_from_type;
 }
 
+bool ANeuralNetworksExecution::IsOptionalInput(const onert::ir::OperandIndex index) noexcept
+{
+  const auto &operand_shape = _execution->primary_subgraph().operands().at(index).shape();
+  for (int32_t i = 0; i < operand_shape.rank(); ++i)
+  {
+    if (operand_shape.dim(i) != 0)
+      return false;
+  }
+  return true;
+}
+
 bool ANeuralNetworksExecution::hasUnspecifiedDims(const onert::ir::OperandIndex index) noexcept
 {
-  const auto operand_shape = _execution->primary_subgraph().operands().at(index).shape();
+  const auto &operand_shape = _execution->primary_subgraph().operands().at(index).shape();
 
   return operand_shape.hasUnspecifiedDims();
 }
@@ -127,10 +138,10 @@ bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOpe
     onert::ir::IOIndex input_index{index};
     const auto operand_index = getInputOperandIndex(index);
 
-    const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
+    const auto &type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
     const auto shape = (type != nullptr)
-                           ? NNAPIConvert::getShape(type)
-                           : _execution->primary_subgraph().operands().at(operand_index).shape();
+                         ? NNAPIConvert::getShape(type)
+                         : _execution->primary_subgraph().operands().at(operand_index).shape();
 
     // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
     // words, we can assume that io_layout from nnapi always is the same as layout of the used
@@ -148,6 +159,44 @@ bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOpe
   return true;
 }
 
+bool ANeuralNetworksExecution::setOptionalInput(uint32_t index,
+                                                const ANeuralNetworksOperandType *type,
+                                                const void *buffer, size_t length) noexcept
+{
+  assert(type == nullptr);
+  assert(buffer == nullptr);
+  assert(length == 0);
+  try
+  {
+    onert::ir::IOIndex input_index{index};
+    const auto operand_index = getInputOperandIndex(index);
+
+    const auto shape = (type != nullptr)
+                         ? NNAPIConvert::getShape(type)
+                         : _execution->primary_subgraph().operands().at(operand_index).shape();
+
+    // ANeuralNetworksExecution::setInput() uses only shape information
+    ANeuralNetworksOperandType optional_input_type;
+    optional_input_type.dimensionCount = shape.rank();
+    std::vector<uint32_t> dims(optional_input_type.dimensionCount);
+    for (uint32_t i = 0; i < optional_input_type.dimensionCount; ++i)
+    {
+      dims.at(i) = shape.dim(i);
+    }
+    optional_input_type.dimensions = dims.data();
+
+    return setInput(index, &optional_input_type, buffer, length);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(EXCEPTION) << e.what() << std::endl;
+
+    return false;
+  }
+
+  return true;
+}
+
 bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOperandType *type,
                                          void *buffer, size_t length) noexcept
 {
@@ -156,10 +205,10 @@ bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOp
     onert::ir::IOIndex output_index{index};
     const auto operand_index = getOutputOperandIndex(index);
 
-    const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
+    const auto &type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
     const auto shape = (type != nullptr)
-                           ? NNAPIConvert::getShape(type)
-                           : _execution->primary_subgraph().operands().at(operand_index).shape();
+                         ? NNAPIConvert::getShape(type)
+                         : _execution->primary_subgraph().operands().at(operand_index).shape();
 
     // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
     // words, we can assume that io_layout from nnapi always is the same as layout of the used
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
index 848ae743f..6fbc4c2e0 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -26,8 +26,8 @@
 struct ANeuralNetworksExecution
 {
 public:
-  ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors)
-      : _execution{std::make_shared<onert::exec::Execution>(executors)}
+  ANeuralNetworksExecution(const std::shared_ptr<onert::exec::IExecutors> &executors)
+    : _execution{std::make_shared<onert::exec::Execution>(executors)}
   {
     // DO NOTHING
   }
@@ -35,6 +35,8 @@ public:
 public:
   bool setInput(uint32_t index, const ANeuralNetworksOperandType *type, const void *buffer,
                 size_t length) noexcept;
+  bool setOptionalInput(uint32_t index, const ANeuralNetworksOperandType *type, const void *buffer,
+                        size_t length) noexcept;
   bool setOutput(uint32_t index, const ANeuralNetworksOperandType *type, void *buffer,
                  size_t length) noexcept;
   bool startExecute(void) noexcept;
@@ -46,6 +48,7 @@ public:
                        const onert::ir::OperandIndex index) noexcept;
   bool compareShape(const ANeuralNetworksOperandType *type,
                     const onert::ir::OperandIndex index) noexcept;
+  bool IsOptionalInput(const onert::ir::OperandIndex index) noexcept;
   bool hasUnspecifiedDims(const onert::ir::OperandIndex index) noexcept;
   size_t getOperandSize(const onert::ir::OperandIndex index) noexcept;
   const std::shared_ptr<onert::exec::Execution> instance(void) noexcept;
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
index 97b820aea..837dac954 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
@@ -27,7 +27,8 @@
 // ANeuralNetworksModel
 //
 ANeuralNetworksModel::ANeuralNetworksModel() noexcept
-    : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
+  : _finished_building{false}, _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{
+                                                                          false}
 {
   _graph = std::make_shared<onert::ir::Graph>();
 }
@@ -72,12 +73,12 @@ bool ANeuralNetworksModel::setOperandValue(uint32_t index, const void *buffer, s
     if (copy)
     {
       _graph->operands().at(ind).data(
-          std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
+        std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
     }
     else
     {
       _graph->operands().at(ind).data(
-          std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
+        std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
     }
   }
   catch (const std::exception &e)
@@ -111,9 +112,9 @@ bool ANeuralNetworksModel::addOperation(ANeuralNetworksOperationType type, uint3
     if (type == ANEURALNETWORKS_FULLY_CONNECTED)
     {
       const auto &input_operand =
-          _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
+        _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
       auto &weights_operand =
-          _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
+        _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
       if (input_operand.typeInfo().type() == onert::ir::DataType::FLOAT32 &&
           weights_operand.typeInfo().type() == onert::ir::DataType::QUANT_UINT8_ASYMM)
       {
@@ -208,9 +209,9 @@ bool ANeuralNetworksModel::finish() noexcept
   {
     fillOptionalOperand();
 
-    _graph->finishBuilding();
-
+    _graph->verify();
     _operand_usages.clear();
+    _finished_building = true;
   }
   catch (const std::exception &e)
   {
@@ -222,7 +223,7 @@ bool ANeuralNetworksModel::finish() noexcept
   return true;
 }
 
-bool ANeuralNetworksModel::isFinished() noexcept { return !_graph->isBuildingPhase(); }
+bool ANeuralNetworksModel::isFinished() noexcept { return _finished_building; }
 
 bool ANeuralNetworksModel::isExistOperand(uint32_t index) noexcept
 {
@@ -260,8 +261,8 @@ void ANeuralNetworksModel::setOptionalOperand(const onert::ir::OperandIndex idx)
 
 void ANeuralNetworksModel::fillOptionalOperand(void)
 {
-  _graph->operations().iterate([&](const onert::ir::OperationIndex &, onert::ir::Operation &node) {
-    for (auto input : node.getInputs())
+  _graph->operations().iterate([&](const onert::ir::OperationIndex &, onert::ir::IOperation &node) {
+    for (auto &&input : node.getInputs())
     {
       // TODO fill default value for optional operands
       if (_optional_operands.find(input) != _optional_operands.end())
@@ -272,16 +273,16 @@ void ANeuralNetworksModel::fillOptionalOperand(void)
   });
 }
 
-std::shared_ptr<onert::ir::Subgraphs> ANeuralNetworksModel::getSubGraphs() const
+std::shared_ptr<onert::ir::Model> ANeuralNetworksModel::getModel() const
 {
-  auto all_subgs = std::make_shared<onert::ir::Subgraphs>();
+  auto model = std::make_shared<onert::ir::Model>();
 
-  all_subgs->push(onert::ir::SubgraphIndex{0}, _graph);
+  model->push(onert::ir::SubgraphIndex{0}, _graph);
   // TODO Find all child subgraphs and copy them to all_subgs
   // Must find the same subgraph by using to compare pointer of subgraphs and set subgraph's index
   // to operands of control flow operations
   // Must clean all child subgraphs's pointer to prevent memory leak in case of that graph has
   // subgraph itself recursively
 
-  return all_subgs;
+  return model;
 }
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
index df6c97c44..04f4cf0f2 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
@@ -22,7 +22,7 @@
 #include <NeuralNetworksEx.h>
 
 #include "ir/Graph.h"
-#include "ir/Subgraphs.h"
+#include "ir/Model.h"
 
 struct ANeuralNetworksModel
 {
@@ -59,7 +59,7 @@ public:
   size_t operandSize(uint32_t index) noexcept;
   bool isUsageSet(uint32_t index) noexcept;
   bool isOperationOutput(uint32_t index) noexcept;
-  std::shared_ptr<onert::ir::Subgraphs> getSubGraphs() const;
+  std::shared_ptr<onert::ir::Model> getModel() const;
 
 private:
   void setOptionalOperand(const onert::ir::OperandIndex idx);
@@ -67,6 +67,7 @@ private:
 
 private:
   std::shared_ptr<onert::ir::Graph> _graph;
+  bool _finished_building;
   std::unordered_set<onert::ir::OperandIndex> _optional_operands;
   std::vector<OperandUsage> _operand_usages;
   bool _allowFloat32toFloat16;
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.test.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.test.cc
new file mode 100644
index 000000000..bb42f2b08
--- /dev/null
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.test.cc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ANeuralNetworksModel.h"
+
+TEST(MODEL, neg_model_build)
+{
+  ANeuralNetworksModel model;
+  ASSERT_FALSE(model.isFinished());
+}
diff --git a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
index 63d4e3c09..94b8f02f5 100644
--- a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
@@ -39,6 +39,13 @@ DataType NNAPIConvert::getDataType(OperandCode type)
     case ANEURALNETWORKS_BOOL:
     case ANEURALNETWORKS_TENSOR_BOOL8:
       return DataType::BOOL8;
+    case ANEURALNETWORKS_TENSOR_FLOAT16:
+    case ANEURALNETWORKS_FLOAT16:
+      return DataType::FLOAT16;
+    case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL:
+      return DataType::QUANT_INT8_SYMM_PER_CHANNEL;
+    case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
+      return DataType::QUANT_INT8_ASYMM;
     default:
       throw std::runtime_error("Unsupported type");
   }
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
index 8e3d83db4..ba739f618 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -82,6 +82,27 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
   return static_cast<uint32_t>(int32_value);
 }
 
+Activation getActivation(Operands &operands, const OperandIndex index)
+{
+  switch (operands.at(index).asScalar<int32_t>())
+  {
+    case 0:
+      return Activation::NONE;
+    case 1:
+      return Activation::RELU;
+    case 2:
+      return Activation::RELU1;
+    case 3:
+      return Activation::RELU6;
+    case 4:
+      return Activation::TANH;
+    case 6:
+      return Activation::SIGMOID;
+    default:
+      throw std::runtime_error("Unsupported activation type");
+  }
+}
+
 OperationFactory::Generator
 getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type,
                                   float alpha = 0.f, float beta = 0.f)
@@ -107,7 +128,7 @@ getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivat
 }
 
 OperationFactory::Generator getElementwiseBinaryGenerator(
-    const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+  const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
 {
   return [op_type](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 2);
@@ -182,7 +203,7 @@ getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::Arith
     param.arithmetic_type = op_type;
     const auto activation_index = OperandIndex{init_param.inputs[2]};
     param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+      NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
 
     return new operation::BinaryArithmetic{inputs, outputs, param};
   };
@@ -221,12 +242,12 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
       const auto activation_index = OperandIndex{init_param.inputs[6]};
 
       param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
       param.stride = makeStride(operands, hstride_index, vstride_index);
       param.kw = getUint32Scalar(operands, kw_index);
       param.kh = operands.at(kh_index).asScalar<uint32_t>();
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else // support explicit padding
     {
@@ -259,7 +280,7 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
       param.kw = getUint32Scalar(operands, kw_index);
       param.kh = getUint32Scalar(operands, kh_index);
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     return new operation::Pool2D{inputs, outputs, param};
   };
@@ -382,11 +403,11 @@ OperationFactory::OperationFactory()
       const auto activation_index = OperandIndex{init_param.inputs[7]};
 
       param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
       param.stride = makeStride(operands, hstride_index, vstride_index);
       param.multiplier = getUint32Scalar(operands, multiplier_index);
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else
     {
@@ -417,9 +438,13 @@ OperationFactory::OperationFactory()
       param.stride = makeStride(operands, hstride_index, vstride_index);
       param.multiplier = getUint32Scalar(operands, multiplier_index);
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
 
+    // TODO set dilation
+    param.dilation.width_factor = 1;
+    param.dilation.height_factor = 1;
+
     return new operation::DepthwiseConv2D{inputs, outputs, param};
   };
 
@@ -486,7 +511,8 @@ OperationFactory::OperationFactory()
     operation::FullyConnected::Param param;
     const auto activation_index = OperandIndex{init_param.inputs[3]};
     param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+      NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+    param.weights_format = FullyConnectedWeightsFormat::Default;
 
     return new operation::FullyConnected{inputs, outputs, param};
   };
@@ -512,11 +538,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_CAST] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
-
-  // ANEURALNETWORKS_CAST_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_CAST_EX
-  _map[ANEURALNETWORKS_CAST_EX] = _map[ANEURALNETWORKS_CAST];
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
 
   _map[ANEURALNETWORKS_CONV_2D] = [](const OperationFactory::Param &init_param,
                                      Operands &operands) {
@@ -552,14 +574,14 @@ OperationFactory::OperationFactory()
       const auto activation_index = OperandIndex{init_param.inputs[6]};
 
       param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
       param.stride = makeStride(operands, hstride_index, vstride_index);
 
       param.dilation.width_factor = 1;
       param.dilation.height_factor = 1;
 
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else if (init_param.input_count == 10) // support explicit padding
     {
@@ -590,7 +612,7 @@ OperationFactory::OperationFactory()
       param.dilation.height_factor = 1;
 
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else if (init_param.input_count == 13) // support dilation
     {
@@ -628,7 +650,7 @@ OperationFactory::OperationFactory()
       param.dilation.height_factor = height_factor;
 
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else
     {
@@ -639,19 +661,15 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_ADD] =
-      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+    getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
 
   _map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
 
   _map[ANEURALNETWORKS_REDUCE_SUM] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
-
-  // ANEURALNETWORKS_REDUCE_SUM_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
-  _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
 
   _map[ANEURALNETWORKS_SUB] =
-      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
+    getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
 
   _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -703,39 +721,15 @@ OperationFactory::OperationFactory()
     param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>();
     param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar<std::int32_t>();
     param.shrink_axis_mask =
-        operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
+      operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
 
     return new operation::StridedSlice{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_TRANSPOSE] = [](const OperationFactory::Param &init_param,
-                                       Operands &operands) {
-    // TODO make this work with init_param.input_count == 1 (when permutation vector is optional)
-
-    // Inputs
-    // 0: An n-D tensor, specifying the tensor to be transposed.
-    // 1: An optional 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32},
-    //    the permutation of the dimensions of the input tensor.
-    //    The returned tensor's dimension i corresponds to the input dimension
-    //    perm[i]. If perm is not given, it is set to (n-1...0), where n is the
-    //    rank of the input tensor. Hence by default, this operation performs a
-    //    regular matrix transpose on 2-D input Tensors.
-    assert(init_param.input_count == 2);
-    assert(init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-    std::vector<std::int32_t> perm =
-        operands.at(OperandIndex{init_param.inputs[1]}).asVector<std::int32_t>();
-
-    operation::Transpose::Param param;
-    param.perm.assign(perm.cbegin(), perm.cend());
-
-    return new operation::Transpose{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_TRANSPOSE] = createSimpleBinaryOp<operation::Transpose>;
 
   _map[ANEURALNETWORKS_MUL] =
-      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
+    getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
 
   _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
                                      Operands &operands) {
@@ -777,151 +771,49 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
-      onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
+    onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
 
   _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
 
-  _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
-      onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
+  _map[ANEURALNETWORKS_LOGISTIC] =
+    getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
 
   _map[ANEURALNETWORKS_DIV] =
-      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
+    getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
 
   _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
 
-  // ANEURALNETWORKS_EXP_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_EXP_EX
-  _map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP];
-
   // Each input should be interpreted as follows:
   //  0 -> Input Tensor Index
   //  1 -> Axis Tensor Index
   _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
 
   _map[ANEURALNETWORKS_GREATER] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+    getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
   _map[ANEURALNETWORKS_GREATER_EQUAL] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+    getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
   _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
   _map[ANEURALNETWORKS_LESS_EQUAL] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+    getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
   _map[ANEURALNETWORKS_NOT_EQUAL] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+    getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
   _map[ANEURALNETWORKS_EQUAL] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
-
-  // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
-  _map[ANEURALNETWORKS_GREATER_EQUAL_EX] = [](const OperationFactory::Param &init_param,
-                                              Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
-    // Output operand type must be boolean
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
-  // ANEURALNETWORKS_LESS_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_LESS_EX
-  _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
-                                     Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Less;
-
-    // Output operand type must be boolean
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
+    getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
 
   _map[ANEURALNETWORKS_REDUCE_ALL] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
 
   _map[ANEURALNETWORKS_REDUCE_ANY] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY);
 
   _map[ANEURALNETWORKS_REDUCE_MAX] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
-
-  // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
-  _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
-
-  // ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
-  _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
-                                          Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input1 Tensor Index
-    //  1 -> input2 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
-    // Output operand type must be boolean
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
-      operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
-
-  // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
-  _map[ANEURALNETWORKS_LOGICAL_AND_EX] = [](const OperationFactory::Param &init_param,
-                                            Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    // This operation's operands must be boolean type.
-    replaceDataType(operands, inputs.at(0), DataType::BOOL8);
-    replaceDataType(operands, inputs.at(1), DataType::BOOL8);
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
 
-    operation::ElementwiseBinary::Param param;
-    param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND;
-
-    return new operation::ElementwiseBinary{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_LOGICAL_AND] =
+    getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
 
   _map[ANEURALNETWORKS_RSQRT] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
 
   _map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -938,28 +830,11 @@ OperationFactory::OperationFactory()
     return new operation::Select{inputs, outputs};
   };
 
-  _map[ANEURALNETWORKS_SELECT_V2_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 3 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Condition Tensor Index
-    //  1 -> Input X(true) Tensor Index
-    //  2 -> Input Y(false) Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
-
-    return new operation::Select{inputs, outputs};
-  };
-
-  // ANEURALNETWORKS_RSQRT_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_RSQRT_EX
-  _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
+  _map[ANEURALNETWORKS_SELECT_V2_EX] = _map[ANEURALNETWORKS_SELECT];
 
   _map[ANEURALNETWORKS_RELU] =
-      getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
-                                        onert::ir::operation::ElementwiseActivation::infinity, 0);
+    getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+                                      onert::ir::operation::ElementwiseActivation::infinity, 0);
 
   _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
                                              Operands &operands) {
@@ -982,11 +857,33 @@ OperationFactory::OperationFactory()
     return new operation::ResizeBilinear{inputs, outputs, param};
   };
 
+  _map[ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR] = [](const OperationFactory::Param &init_param,
+                                                     Operands &operands) {
+    assert((init_param.input_count == 3 || init_param.input_count == 4) &&
+           init_param.output_count == 1);
+
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> IFM Index
+    //  1 -> Height Index
+    //  2 -> Width Index
+    OperandIndexSequence inputs{init_param.inputs[0]};
+
+    operation::ResizeNearestNeighbor::Param param;
+    param.height_out = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>();
+    param.width_out = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>();
+    param.align_corners = false;
+    // The layout input is not supported yet
+    return new operation::ResizeNearestNeighbor{inputs, outputs, param};
+  };
+
   _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
-      onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
+    onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
 
   _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
-      onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
+    onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
 
   _map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1028,13 +925,13 @@ OperationFactory::OperationFactory()
     operation::RNN::Param param;
     const auto activation_index = OperandIndex{init_param.inputs[5]};
     param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+      NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
 
     return new operation::RNN{inputs, outputs, param};
   };
 
   _map[ANEURALNETWORKS_FLOOR] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
 
   _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
                                                Operands &) {
@@ -1138,10 +1035,6 @@ OperationFactory::OperationFactory()
     return new operation::PReLU{inputs, outputs};
   };
 
-  // ANEURALNETWORKS_PRELU_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_PRELU_EX
-  _map[ANEURALNETWORKS_PRELU_EX] = _map[ANEURALNETWORKS_PRELU];
-
   _map[ANEURALNETWORKS_TRANSPOSE_CONV_EX] = [](const OperationFactory::Param &init_param,
                                                Operands &operands) {
     assert(init_param.input_count == 6 && init_param.output_count == 1);
@@ -1166,72 +1059,20 @@ OperationFactory::OperationFactory()
     const auto vstride_index = OperandIndex{init_param.inputs[5]};
 
     param.padding.type =
-        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+      NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
     param.stride = makeStride(operands, hstride_index, vstride_index);
 
     return new operation::TransposeConv{inputs, outputs, param};
   };
 
   _map[ANEURALNETWORKS_SQRT] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
 
-  // ANEURALNETWORKS_SQRT_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_SQRT_EX
-  _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
-
-  _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
-      operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
-
-  // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
-  _map[ANEURALNETWORKS_LOGICAL_OR_EX] = [](const OperationFactory::Param &init_param,
-                                           Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    // This operation's operands must be boolean type.
-    replaceDataType(operands, inputs.at(0), DataType::BOOL8);
-    replaceDataType(operands, inputs.at(1), DataType::BOOL8);
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    operation::ElementwiseBinary::Param param;
-    param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR;
-
-    return new operation::ElementwiseBinary{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_LOGICAL_OR] =
+    getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
 
   _map[ANEURALNETWORKS_LOGICAL_NOT] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
-
-  // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
-  _map[ANEURALNETWORKS_LOGICAL_NOT_EX] = [](const OperationFactory::Param &init_param,
-                                            Operands &operands) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    // This operation's operands must be boolean type.
-    replaceDataType(operands, inputs.at(0), DataType::BOOL8);
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    operation::ElementwiseUnary::Param param;
-    param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT;
-
-    return new operation::ElementwiseUnary{inputs, outputs, param};
-  };
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
 
   _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
     assert(init_param.input_count == 23 && init_param.output_count == 4);
@@ -1277,58 +1118,86 @@ OperationFactory::OperationFactory()
     }
 
     operation::LSTM::Param param;
-    const auto activation_index = OperandIndex{init_param.inputs[20]};
-    switch (operands.at(activation_index).asScalar<int32_t>())
-    {
-      case 0:
-        param.activation = Activation::NONE;
-        break;
-      case 1:
-        param.activation = Activation::RELU;
-        break;
-      case 2:
-        param.activation = Activation::RELU1;
-        break;
-      case 3:
-        param.activation = Activation::RELU6;
-        break;
-      case 4:
-        param.activation = Activation::TANH;
-        break;
-      case 6:
-        param.activation = Activation::SIGMOID;
-        break;
-      default:
-        throw std::runtime_error("Unsupported activation type");
-        break;
-    }
+    param.activation = getActivation(operands, OperandIndex{init_param.inputs[20]});
     param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
     param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
+    // This is initialization to prevent warning or error by static code analyzer. LSTM operation
+    // does not need time_major
+    param.time_major = false;
 
     return new operation::LSTM{inputs, outputs, param};
   };
 
-  // ANEURALNETWORKS_EQUAL_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_EQUAL_EX
-  _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
-                                      Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
+  _map[ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM] = [](const OperationFactory::Param &init_param,
+                                                          Operands &operands) {
+    assert((init_param.input_count >= 24 || init_param.input_count <= 28) &&
+           (init_param.output_count >= 1 && init_param.output_count <= 3));
 
     // Each input should be interpreted as follows:
     //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+    // 0 -> Input Tensor Index
+    // 1 -> Input to Input Tensor Index
+    // 2 -> Input to Forget Tensor Index
+    // 3 -> Input to Cell Tensor Index
+    // 4 -> Input to Output Tensor Index
+    // 5 -> Recurrent to Input Weights Tensor Index
+    // 6 -> Recurrent to Forget Weights Tensor Index
+    // 7 -> Recurrent to Cell Weights Tensor Index
+    // 8 -> Recurrent to Output Weights Tensor Index
+    // 9 -> Cell to Input Weights Tensor Index
+    // 10 -> Cell to Forget Weights Tensor Index
+    // 11 -> Cell to Output Weights Tensor Index
+    // 12 -> Input Gate Bias Tensor Index
+    // 13 -> Forget Gate Bias Tensor Index
+    // 14 -> Cell Bias Tensor Index
+    // 15 -> Output Gate Bias Tensor Index
+    // 16 -> Projection Weights Tensor Index
+    // 17 -> Projection Bias Tensor Index
+    // 18 -> Output State In Tensor Index
+    // 19 -> Cell State In Tensor Index
+    assert(init_param.input_count - 3 > 20);
+    OperandIndexSequence inputs;
+    for (uint32_t n = 0; n < 20; ++n)
+    {
+      inputs.append(OperandIndex{init_param.inputs[n]});
+    }
 
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Equal;
+    // 24 -> Input Layer Normalization Weights Tensor Index
+    // 25 -> Forget Layer Normalization Weights Tensor Index
+    // 26 -> Cell Layer Normalization Weights Tensor Index
+    // 27 -> Output Layer Normalization Weights Tensor Index
+    if (init_param.input_count > 24)
+    {
+      for (uint32_t n = 24; n < 28; ++n)
+      {
+        if (init_param.input_count > n)
+        {
+          inputs.append(OperandIndex{init_param.inputs[n]});
+        }
+      }
+    }
+
+    // Each output should be interpreted as follows:
+    //
+    // 0 -> Output Tensor Index -> 3
+    // 1 -> Output State Out Tensor Index
+    // 2 -> Cell State Out Tensor Index
+    const OperandIndex scratch_buffer_index;
+    OperandIndex output_state_index =
+      init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex();
+    OperandIndex cell_state_index =
+      init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex();
+    const OperandIndex output_index = OperandIndex{init_param.outputs[0]};
+    OperandIndexSequence outputs{scratch_buffer_index, output_state_index, cell_state_index,
+                                 output_index};
 
-    // Output operand type must be boolean
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
+    operation::LSTM::Param param;
+    param.activation = getActivation(operands, OperandIndex{init_param.inputs[20]});
+    param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
+    param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
+    param.time_major = operands.at(OperandIndex{init_param.inputs[23]}).asScalar<bool>();
 
-    return new operation::Comparison{inputs, outputs, param};
+    return new operation::LSTM{inputs, outputs, param};
   };
 
   _map[ANEURALNETWORKS_SQUARED_DIFFERENCE_EX] = [](const OperationFactory::Param &init_param,
@@ -1368,10 +1237,6 @@ OperationFactory::OperationFactory()
     return new operation::TopKV2{inputs, outputs, param};
   };
 
-  // ANEURALNETWORKS_CAST_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_CAST_EX
-  _map[ANEURALNETWORKS_TOPK_V2_EX] = _map[ANEURALNETWORKS_TOPK_V2];
-
   _map[ANEURALNETWORKS_GATHER] = [](const OperationFactory::Param &init_param, Operands &operands) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
 
@@ -1390,23 +1255,11 @@ OperationFactory::OperationFactory()
     return new operation::Gather{inputs, outputs, param};
   };
 
-  // ANEURALNETWORKS_GATHER_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_GATHER_EX
-  _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
-
   _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG);
 
-  // ANEURALNETWORKS_NEG_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_NEG_EX
-  _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
-
   _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS);
 
-  // ANEURALNETWORKS_ABS_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_ABS_EX
-  _map[ANEURALNETWORKS_ABS_EX] = _map[ANEURALNETWORKS_ABS];
-
-  _map[ANEURALNETWORKS_ARGMAX] = [](const OperationFactory::Param &init_param, Operands &operands) {
+  _map[ANEURALNETWORKS_ARGMAX] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 2 && init_param.output_count == 1);
 
     OperandIndexSequence outputs{init_param.outputs[0]};
@@ -1415,22 +1268,37 @@ OperationFactory::OperationFactory()
     //
     //  0 -> Input Tensor Index
     //  1 -> Axis Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
 
-    operation::ArgMax::Param param;
-    param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+    operation::ArgMinMax::Param param;
     // NNAPI ARGMAX output type is always int32
     param.output_type = DataType::INT32;
+    param.is_arg_max = true;
 
-    return new operation::ArgMax{inputs, outputs, param};
+    return new operation::ArgMinMax{inputs, outputs, param};
   };
 
-  // ANEURALNETWORKS_ARGMAX_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_ARGMAX_EX
-  _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
+  _map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) {
+    assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> Input Tensor Index
+    //  1 -> Axis Tensor Index
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+    operation::ArgMinMax::Param param;
+    // NNAPI ARGMIN output type is always int32
+    param.output_type = DataType::INT32;
+    param.is_arg_max = false;
+
+    return new operation::ArgMinMax{inputs, outputs, param};
+  };
 
   _map[ANEURALNETWORKS_DEQUANTIZE] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
 
   _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1507,17 +1375,13 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_REDUCE_MIN] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
-
-  // ANEURALNETWORKS_REDUCE_MIN_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX
-  _map[ANEURALNETWORKS_REDUCE_MIN_EX] = _map[ANEURALNETWORKS_REDUCE_MIN];
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
 
   _map[ANEURALNETWORKS_SPLIT] = [](const OperationFactory::Param &init_param, Operands &operands) {
     assert(init_param.input_count == 3);
     assert(init_param.output_count >= 1); // At least one output tensor and axis
 
-    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence inputs{init_param.inputs[1], init_param.inputs[0]};
     OperandIndexSequence outputs;
     for (uint32_t n = 0; n < init_param.output_count; ++n)
     {
@@ -1525,7 +1389,6 @@ OperationFactory::OperationFactory()
     }
 
     operation::Split::Param param;
-    param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
     param.num_splits = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<std::int32_t>();
 
     return new operation::Split{inputs, outputs, param};
@@ -1548,10 +1411,6 @@ OperationFactory::OperationFactory()
     return new operation::SplitV{inputs, outputs, param};
   };
 
-  // ANEURALNETWORKS_SPLIT_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_SPLIT_EX
-  _map[ANEURALNETWORKS_SPLIT_EX] = _map[ANEURALNETWORKS_SPLIT];
-
   _map[ANEURALNETWORKS_UNPACK_EX] = [](const OperationFactory::Param &init_param,
                                        Operands &operands) {
     assert(init_param.input_count == 3 && init_param.output_count >= 1);
@@ -1589,10 +1448,10 @@ OperationFactory::OperationFactory()
   _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
 
   _map[ANEURALNETWORKS_MINIMUM] =
-      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
+    getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
 
   _map[ANEURALNETWORKS_MAXIMUM] =
-      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
+    getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
 
   _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
                                         Operands &operands) {
@@ -1619,7 +1478,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_COS_EX] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
 
   _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
 
@@ -1633,10 +1492,10 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_REDUCE_PROD] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
 
   _map[ANEURALNETWORKS_ROUND_EX] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
 
   _map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1664,7 +1523,7 @@ OperationFactory::OperationFactory()
   _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
 
   _map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
   // Each input should be interpreted as follows:
   //  0 -> Input Tensor Index
   //  1 -> Multiple Tensor Index
@@ -1804,7 +1663,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_QUANTIZE] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
 }
 
 Operation *OperationFactory::create(ANeuralNetworksOperationType type,
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
index 367cf74db..74e187421 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
@@ -40,7 +40,7 @@ public:
 
 public:
   using Generator =
-      std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
+    std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
 
 public:
   static OperationFactory &get();
diff --git a/runtime/onert/frontend/tflite/CMakeLists.txt b/runtime/onert/frontend/tflite/CMakeLists.txt
index fcadf5223..792feebe5 100644
--- a/runtime/onert/frontend/tflite/CMakeLists.txt
+++ b/runtime/onert/frontend/tflite/CMakeLists.txt
@@ -4,11 +4,10 @@ endif(NOT BUILD_TFLITE_LOADER)
 
 set(TFLITE_LOADER_SOURCES src/tflite_loader.cc)
 
-add_library(tflite_loader SHARED ${TFLITE_LOADER_SOURCES})
+add_library(tflite_loader STATIC ${TFLITE_LOADER_SOURCES})
+set_target_properties(tflite_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 target_include_directories(tflite_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 
-target_link_libraries(tflite_loader PUBLIC onert_core)
+target_link_libraries(tflite_loader PRIVATE onert_core)
 target_link_libraries(tflite_loader PRIVATE base_loader nnfw_common nnfw_coverage)
-
-install(TARGETS tflite_loader DESTINATION lib)
diff --git a/runtime/onert/frontend/tflite/include/tflite_loader.h b/runtime/onert/frontend/tflite/include/tflite_loader.h
index 743c05f9e..cf17863f5 100644
--- a/runtime/onert/frontend/tflite/include/tflite_loader.h
+++ b/runtime/onert/frontend/tflite/include/tflite_loader.h
@@ -26,7 +26,7 @@ namespace onert
 namespace tflite_loader
 {
 
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
 
 } // namespace tflite_loader
 } // namespace onert
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index 7eef15717..dc8564632 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -62,8 +62,14 @@ struct LoaderDomain
   }
 };
 
-class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain, TFLiteLoader>
+class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain>
 {
+protected:
+  // Different option name
+  //  Circle: adjoint_lhs, adjoint_rhs
+  //  TFLite: adj_x, adj_y
+  void loadBatchMatMul(const Operator *op, ir::Graph &subg);
+
 public:
   using BaseLoader::BaseLoader;
 
@@ -72,13 +78,15 @@ public:
     switch (op)
     {
       case BuiltinOperator::BuiltinOperator_FULLY_CONNECTED:
+      case BuiltinOperator::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
         return true;
       default:
         return false;
     }
   }
 
-  std::unique_ptr<ir::Graph> loadSubgraph(const onert_tflite::SubGraph *tflite_subg)
+private:
+  std::unique_ptr<ir::Graph> loadSubgraph(const onert_tflite::SubGraph *tflite_subg) override
   {
     auto subg = std::make_unique<ir::Graph>();
     // Load tensors
@@ -105,20 +113,52 @@ public:
       loadOperation(op, *subg);
     }
 
-    subg->finishBuilding();
+    subg->verify();
 
     return subg;
   }
+
+  void loadOperation(const onert_tflite::Operator *op, ir::Graph &subg)
+  {
+    auto const builtin_op = getBuiltinOperator(op);
+
+    switch (builtin_op)
+    {
+      case onert_tflite::BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
+        loadBatchMatMul(op, subg);
+        return;
+      default:
+        BaseLoader::loadOperation(op, subg);
+        return;
+    }
+  }
 };
 
+void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  ir::operation::BatchMatMul::Param param;
+  const auto *options = op->builtin_options_as_BatchMatMulOptions();
+
+  param.adj_x = options->adj_x();
+  param.adj_y = options->adj_y();
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::BatchMatMul(inputs, outputs, param));
+  subg.addOperation(std::move(new_op));
+}
+
 } // namespace
 
-std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
 {
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  TFLiteLoader loader(subgraphs);
+  auto model = std::make_unique<ir::Model>();
+  TFLiteLoader loader(model);
   loader.loadFromFile(filename);
-  return subgraphs;
+  return model;
 }
 
 } // namespace tflite_loader
diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
index c6e9147cd..7ad3c75bd 100644
--- a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
+++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2019-2023 Samsung Electronics Co., Ltd. All Rights Reserved
  * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 // automatically generated by the FlatBuffers compiler, do not modify
 
 #ifndef FLATBUFFERS_GENERATED_TFLITESCHEMA_ONERT_TFLITE_H_
@@ -26,236 +25,417 @@ namespace onert_tflite
 {
 
 struct CustomQuantization;
+struct CustomQuantizationBuilder;
 
 struct QuantizationParameters;
+struct QuantizationParametersBuilder;
 
 struct Int32Vector;
+struct Int32VectorBuilder;
 
 struct Uint16Vector;
+struct Uint16VectorBuilder;
 
 struct Uint8Vector;
+struct Uint8VectorBuilder;
 
 struct DimensionMetadata;
+struct DimensionMetadataBuilder;
 
 struct SparsityParameters;
+struct SparsityParametersBuilder;
 
 struct Tensor;
+struct TensorBuilder;
 
 struct Conv2DOptions;
+struct Conv2DOptionsBuilder;
+
+struct Conv3DOptions;
+struct Conv3DOptionsBuilder;
 
 struct Pool2DOptions;
+struct Pool2DOptionsBuilder;
 
 struct DepthwiseConv2DOptions;
+struct DepthwiseConv2DOptionsBuilder;
 
 struct ConcatEmbeddingsOptions;
+struct ConcatEmbeddingsOptionsBuilder;
 
 struct LSHProjectionOptions;
+struct LSHProjectionOptionsBuilder;
 
 struct SVDFOptions;
+struct SVDFOptionsBuilder;
 
 struct RNNOptions;
+struct RNNOptionsBuilder;
 
 struct SequenceRNNOptions;
+struct SequenceRNNOptionsBuilder;
 
 struct BidirectionalSequenceRNNOptions;
+struct BidirectionalSequenceRNNOptionsBuilder;
 
 struct FullyConnectedOptions;
+struct FullyConnectedOptionsBuilder;
 
 struct SoftmaxOptions;
+struct SoftmaxOptionsBuilder;
 
 struct ConcatenationOptions;
+struct ConcatenationOptionsBuilder;
 
 struct AddOptions;
+struct AddOptionsBuilder;
 
 struct MulOptions;
+struct MulOptionsBuilder;
 
 struct L2NormOptions;
+struct L2NormOptionsBuilder;
 
 struct LocalResponseNormalizationOptions;
+struct LocalResponseNormalizationOptionsBuilder;
 
 struct LSTMOptions;
+struct LSTMOptionsBuilder;
 
 struct UnidirectionalSequenceLSTMOptions;
+struct UnidirectionalSequenceLSTMOptionsBuilder;
 
 struct BidirectionalSequenceLSTMOptions;
+struct BidirectionalSequenceLSTMOptionsBuilder;
 
 struct ResizeBilinearOptions;
+struct ResizeBilinearOptionsBuilder;
 
 struct ResizeNearestNeighborOptions;
+struct ResizeNearestNeighborOptionsBuilder;
 
 struct CallOptions;
+struct CallOptionsBuilder;
 
 struct PadOptions;
+struct PadOptionsBuilder;
 
 struct PadV2Options;
+struct PadV2OptionsBuilder;
 
 struct ReshapeOptions;
+struct ReshapeOptionsBuilder;
 
 struct SpaceToBatchNDOptions;
+struct SpaceToBatchNDOptionsBuilder;
 
 struct BatchToSpaceNDOptions;
+struct BatchToSpaceNDOptionsBuilder;
 
 struct SkipGramOptions;
+struct SkipGramOptionsBuilder;
 
 struct SpaceToDepthOptions;
+struct SpaceToDepthOptionsBuilder;
 
 struct DepthToSpaceOptions;
+struct DepthToSpaceOptionsBuilder;
 
 struct SubOptions;
+struct SubOptionsBuilder;
 
 struct DivOptions;
+struct DivOptionsBuilder;
 
 struct TopKV2Options;
+struct TopKV2OptionsBuilder;
 
 struct EmbeddingLookupSparseOptions;
+struct EmbeddingLookupSparseOptionsBuilder;
 
 struct GatherOptions;
+struct GatherOptionsBuilder;
 
 struct TransposeOptions;
+struct TransposeOptionsBuilder;
 
 struct ExpOptions;
+struct ExpOptionsBuilder;
 
 struct CosOptions;
+struct CosOptionsBuilder;
 
 struct ReducerOptions;
+struct ReducerOptionsBuilder;
 
 struct SqueezeOptions;
+struct SqueezeOptionsBuilder;
 
 struct SplitOptions;
+struct SplitOptionsBuilder;
 
 struct SplitVOptions;
+struct SplitVOptionsBuilder;
 
 struct StridedSliceOptions;
+struct StridedSliceOptionsBuilder;
 
 struct LogSoftmaxOptions;
+struct LogSoftmaxOptionsBuilder;
 
 struct CastOptions;
+struct CastOptionsBuilder;
 
 struct DequantizeOptions;
+struct DequantizeOptionsBuilder;
 
 struct MaximumMinimumOptions;
+struct MaximumMinimumOptionsBuilder;
 
 struct TileOptions;
+struct TileOptionsBuilder;
 
 struct ArgMaxOptions;
+struct ArgMaxOptionsBuilder;
 
 struct ArgMinOptions;
+struct ArgMinOptionsBuilder;
 
 struct GreaterOptions;
+struct GreaterOptionsBuilder;
 
 struct GreaterEqualOptions;
+struct GreaterEqualOptionsBuilder;
 
 struct LessOptions;
+struct LessOptionsBuilder;
 
 struct LessEqualOptions;
+struct LessEqualOptionsBuilder;
 
 struct NegOptions;
+struct NegOptionsBuilder;
 
 struct SelectOptions;
+struct SelectOptionsBuilder;
 
 struct SliceOptions;
+struct SliceOptionsBuilder;
 
 struct TransposeConvOptions;
+struct TransposeConvOptionsBuilder;
 
 struct ExpandDimsOptions;
+struct ExpandDimsOptionsBuilder;
 
 struct SparseToDenseOptions;
+struct SparseToDenseOptionsBuilder;
 
 struct EqualOptions;
+struct EqualOptionsBuilder;
 
 struct NotEqualOptions;
+struct NotEqualOptionsBuilder;
 
 struct ShapeOptions;
+struct ShapeOptionsBuilder;
 
 struct RankOptions;
+struct RankOptionsBuilder;
 
 struct PowOptions;
+struct PowOptionsBuilder;
 
 struct FakeQuantOptions;
+struct FakeQuantOptionsBuilder;
 
 struct PackOptions;
+struct PackOptionsBuilder;
 
 struct LogicalOrOptions;
+struct LogicalOrOptionsBuilder;
 
 struct OneHotOptions;
+struct OneHotOptionsBuilder;
 
 struct AbsOptions;
+struct AbsOptionsBuilder;
 
 struct HardSwishOptions;
+struct HardSwishOptionsBuilder;
 
 struct LogicalAndOptions;
+struct LogicalAndOptionsBuilder;
 
 struct LogicalNotOptions;
+struct LogicalNotOptionsBuilder;
 
 struct UnpackOptions;
+struct UnpackOptionsBuilder;
 
 struct FloorDivOptions;
+struct FloorDivOptionsBuilder;
 
 struct SquareOptions;
+struct SquareOptionsBuilder;
 
 struct ZerosLikeOptions;
+struct ZerosLikeOptionsBuilder;
 
 struct FillOptions;
+struct FillOptionsBuilder;
 
 struct FloorModOptions;
+struct FloorModOptionsBuilder;
 
 struct RangeOptions;
+struct RangeOptionsBuilder;
 
 struct LeakyReluOptions;
+struct LeakyReluOptionsBuilder;
 
 struct SquaredDifferenceOptions;
+struct SquaredDifferenceOptionsBuilder;
 
 struct MirrorPadOptions;
+struct MirrorPadOptionsBuilder;
 
 struct UniqueOptions;
+struct UniqueOptionsBuilder;
 
 struct ReverseV2Options;
+struct ReverseV2OptionsBuilder;
 
 struct AddNOptions;
+struct AddNOptionsBuilder;
 
 struct GatherNdOptions;
+struct GatherNdOptionsBuilder;
 
 struct WhereOptions;
+struct WhereOptionsBuilder;
 
 struct ReverseSequenceOptions;
+struct ReverseSequenceOptionsBuilder;
 
 struct MatrixDiagOptions;
+struct MatrixDiagOptionsBuilder;
 
 struct QuantizeOptions;
+struct QuantizeOptionsBuilder;
 
 struct MatrixSetDiagOptions;
+struct MatrixSetDiagOptionsBuilder;
 
 struct IfOptions;
+struct IfOptionsBuilder;
+
+struct CallOnceOptions;
+struct CallOnceOptionsBuilder;
 
 struct WhileOptions;
+struct WhileOptionsBuilder;
 
 struct NonMaxSuppressionV4Options;
+struct NonMaxSuppressionV4OptionsBuilder;
 
 struct NonMaxSuppressionV5Options;
+struct NonMaxSuppressionV5OptionsBuilder;
 
 struct ScatterNdOptions;
+struct ScatterNdOptionsBuilder;
 
 struct SelectV2Options;
+struct SelectV2OptionsBuilder;
 
 struct DensifyOptions;
+struct DensifyOptionsBuilder;
 
 struct SegmentSumOptions;
+struct SegmentSumOptionsBuilder;
 
 struct BatchMatMulOptions;
+struct BatchMatMulOptionsBuilder;
+
+struct CumsumOptions;
+struct CumsumOptionsBuilder;
+
+struct BroadcastToOptions;
+struct BroadcastToOptionsBuilder;
+
+struct Rfft2dOptions;
+struct Rfft2dOptionsBuilder;
+
+struct HashtableOptions;
+struct HashtableOptionsBuilder;
+
+struct HashtableFindOptions;
+struct HashtableFindOptionsBuilder;
+
+struct HashtableImportOptions;
+struct HashtableImportOptionsBuilder;
+
+struct HashtableSizeOptions;
+struct HashtableSizeOptionsBuilder;
+
+struct VarHandleOptions;
+struct VarHandleOptionsBuilder;
+
+struct ReadVariableOptions;
+struct ReadVariableOptionsBuilder;
+
+struct AssignVariableOptions;
+struct AssignVariableOptionsBuilder;
+
+struct RandomOptions;
+struct RandomOptionsBuilder;
+
+struct BucketizeOptions;
+struct BucketizeOptionsBuilder;
+
+struct GeluOptions;
+struct GeluOptionsBuilder;
+
+struct DynamicUpdateSliceOptions;
+struct DynamicUpdateSliceOptionsBuilder;
+
+struct UnsortedSegmentProdOptions;
+struct UnsortedSegmentProdOptionsBuilder;
+
+struct UnsortedSegmentMaxOptions;
+struct UnsortedSegmentMaxOptionsBuilder;
+
+struct UnsortedSegmentSumOptions;
+struct UnsortedSegmentSumOptionsBuilder;
+
+struct ATan2Options;
+struct ATan2OptionsBuilder;
 
 struct OperatorCode;
+struct OperatorCodeBuilder;
 
 struct Operator;
+struct OperatorBuilder;
 
 struct SubGraph;
+struct SubGraphBuilder;
 
 struct Buffer;
+struct BufferBuilder;
 
 struct Metadata;
+struct MetadataBuilder;
+
+struct TensorMap;
+struct TensorMapBuilder;
+
+struct SignatureDef;
+struct SignatureDefBuilder;
 
 struct Model;
+struct ModelBuilder;
 
-enum TensorType
+enum TensorType : int8_t
 {
   TensorType_FLOAT32 = 0,
   TensorType_FLOAT16 = 1,
@@ -268,34 +448,45 @@ enum TensorType
   TensorType_COMPLEX64 = 8,
   TensorType_INT8 = 9,
   TensorType_FLOAT64 = 10,
+  TensorType_COMPLEX128 = 11,
+  TensorType_UINT64 = 12,
+  TensorType_RESOURCE = 13,
+  TensorType_VARIANT = 14,
+  TensorType_UINT32 = 15,
+  TensorType_UINT16 = 16,
   TensorType_MIN = TensorType_FLOAT32,
-  TensorType_MAX = TensorType_FLOAT64
+  TensorType_MAX = TensorType_UINT16
 };
 
-inline const TensorType (&EnumValuesTensorType())[11]
+inline const TensorType (&EnumValuesTensorType())[17]
 {
-  static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
-                                      TensorType_UINT8,   TensorType_INT64,   TensorType_STRING,
-                                      TensorType_BOOL,    TensorType_INT16,   TensorType_COMPLEX64,
-                                      TensorType_INT8,    TensorType_FLOAT64};
+  static const TensorType values[] = {
+    TensorType_FLOAT32,   TensorType_FLOAT16,  TensorType_INT32,   TensorType_UINT8,
+    TensorType_INT64,     TensorType_STRING,   TensorType_BOOL,    TensorType_INT16,
+    TensorType_COMPLEX64, TensorType_INT8,     TensorType_FLOAT64, TensorType_COMPLEX128,
+    TensorType_UINT64,    TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32,
+    TensorType_UINT16};
   return values;
 }
 
 inline const char *const *EnumNamesTensorType()
 {
-  static const char *const names[] = {"FLOAT32",   "FLOAT16", "INT32",   "UINT8",
-                                      "INT64",     "STRING",  "BOOL",    "INT16",
-                                      "COMPLEX64", "INT8",    "FLOAT64", nullptr};
+  static const char *const names[18] = {"FLOAT32", "FLOAT16",    "INT32",  "UINT8",     "INT64",
+                                        "STRING",  "BOOL",       "INT16",  "COMPLEX64", "INT8",
+                                        "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE",  "VARIANT",
+                                        "UINT32",  "UINT16",     nullptr};
   return names;
 }
 
 inline const char *EnumNameTensorType(TensorType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT16))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesTensorType()[index];
 }
 
-enum QuantizationDetails
+enum QuantizationDetails : uint8_t
 {
   QuantizationDetails_NONE = 0,
   QuantizationDetails_CustomQuantization = 1,
@@ -312,13 +503,15 @@ inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
 
 inline const char *const *EnumNamesQuantizationDetails()
 {
-  static const char *const names[] = {"NONE", "CustomQuantization", nullptr};
+  static const char *const names[3] = {"NONE", "CustomQuantization", nullptr};
   return names;
 }
 
 inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesQuantizationDetails()[index];
 }
 
@@ -327,7 +520,7 @@ template <typename T> struct QuantizationDetailsTraits
   static const QuantizationDetails enum_value = QuantizationDetails_NONE;
 };
 
-template <> struct QuantizationDetailsTraits<CustomQuantization>
+template <> struct QuantizationDetailsTraits<onert_tflite::CustomQuantization>
 {
   static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
 };
@@ -338,7 +531,7 @@ bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
                                      const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                      const flatbuffers::Vector<uint8_t> *types);
 
-enum DimensionType
+enum DimensionType : int8_t
 {
   DimensionType_DENSE = 0,
   DimensionType_SPARSE_CSR = 1,
@@ -354,17 +547,19 @@ inline const DimensionType (&EnumValuesDimensionType())[2]
 
 inline const char *const *EnumNamesDimensionType()
 {
-  static const char *const names[] = {"DENSE", "SPARSE_CSR", nullptr};
+  static const char *const names[3] = {"DENSE", "SPARSE_CSR", nullptr};
   return names;
 }
 
 inline const char *EnumNameDimensionType(DimensionType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesDimensionType()[index];
 }
 
-enum SparseIndexVector
+enum SparseIndexVector : uint8_t
 {
   SparseIndexVector_NONE = 0,
   SparseIndexVector_Int32Vector = 1,
@@ -384,14 +579,16 @@ inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4]
 
 inline const char *const *EnumNamesSparseIndexVector()
 {
-  static const char *const names[] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
-                                      nullptr};
+  static const char *const names[5] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
+                                       nullptr};
   return names;
 }
 
 inline const char *EnumNameSparseIndexVector(SparseIndexVector e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesSparseIndexVector()[index];
 }
 
@@ -400,17 +597,17 @@ template <typename T> struct SparseIndexVectorTraits
   static const SparseIndexVector enum_value = SparseIndexVector_NONE;
 };
 
-template <> struct SparseIndexVectorTraits<Int32Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Int32Vector>
 {
   static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
 };
 
-template <> struct SparseIndexVectorTraits<Uint16Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Uint16Vector>
 {
   static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
 };
 
-template <> struct SparseIndexVectorTraits<Uint8Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Uint8Vector>
 {
   static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
 };
@@ -421,7 +618,7 @@ bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
                                    const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                    const flatbuffers::Vector<uint8_t> *types);
 
-enum BuiltinOperator
+enum BuiltinOperator : int32_t
 {
   BuiltinOperator_ADD = 0,
   BuiltinOperator_AVERAGE_POOL_2D = 1,
@@ -550,11 +747,41 @@ enum BuiltinOperator
   BuiltinOperator_DENSIFY = 124,
   BuiltinOperator_SEGMENT_SUM = 125,
   BuiltinOperator_BATCH_MATMUL = 126,
+  BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  BuiltinOperator_CUMSUM = 128,
+  BuiltinOperator_CALL_ONCE = 129,
+  BuiltinOperator_BROADCAST_TO = 130,
+  BuiltinOperator_RFFT2D = 131,
+  BuiltinOperator_CONV_3D = 132,
+  BuiltinOperator_IMAG = 133,
+  BuiltinOperator_REAL = 134,
+  BuiltinOperator_COMPLEX_ABS = 135,
+  BuiltinOperator_HASHTABLE = 136,
+  BuiltinOperator_HASHTABLE_FIND = 137,
+  BuiltinOperator_HASHTABLE_IMPORT = 138,
+  BuiltinOperator_HASHTABLE_SIZE = 139,
+  BuiltinOperator_REDUCE_ALL = 140,
+  BuiltinOperator_CONV_3D_TRANSPOSE = 141,
+  BuiltinOperator_VAR_HANDLE = 142,
+  BuiltinOperator_READ_VARIABLE = 143,
+  BuiltinOperator_ASSIGN_VARIABLE = 144,
+  BuiltinOperator_BROADCAST_ARGS = 145,
+  BuiltinOperator_RANDOM_STANDARD_NORMAL = 146,
+  BuiltinOperator_BUCKETIZE = 147,
+  BuiltinOperator_RANDOM_UNIFORM = 148,
+  BuiltinOperator_MULTINOMIAL = 149,
+  BuiltinOperator_GELU = 150,
+  BuiltinOperator_DYNAMIC_UPDATE_SLICE = 151,
+  BuiltinOperator_RELU_0_TO_1 = 152,
+  BuiltinOperator_UNSORTED_SEGMENT_PROD = 153,
+  BuiltinOperator_UNSORTED_SEGMENT_MAX = 154,
+  BuiltinOperator_UNSORTED_SEGMENT_SUM = 155,
+  BuiltinOperator_ATAN2 = 156,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_BATCH_MATMUL
+  BuiltinOperator_MAX = BuiltinOperator_ATAN2
 };
 
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127]
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[157]
 {
   static const BuiltinOperator values[] = {BuiltinOperator_ADD,
                                            BuiltinOperator_AVERAGE_POOL_2D,
@@ -682,150 +909,212 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127]
                                            BuiltinOperator_SELECT_V2,
                                            BuiltinOperator_DENSIFY,
                                            BuiltinOperator_SEGMENT_SUM,
-                                           BuiltinOperator_BATCH_MATMUL};
+                                           BuiltinOperator_BATCH_MATMUL,
+                                           BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES,
+                                           BuiltinOperator_CUMSUM,
+                                           BuiltinOperator_CALL_ONCE,
+                                           BuiltinOperator_BROADCAST_TO,
+                                           BuiltinOperator_RFFT2D,
+                                           BuiltinOperator_CONV_3D,
+                                           BuiltinOperator_IMAG,
+                                           BuiltinOperator_REAL,
+                                           BuiltinOperator_COMPLEX_ABS,
+                                           BuiltinOperator_HASHTABLE,
+                                           BuiltinOperator_HASHTABLE_FIND,
+                                           BuiltinOperator_HASHTABLE_IMPORT,
+                                           BuiltinOperator_HASHTABLE_SIZE,
+                                           BuiltinOperator_REDUCE_ALL,
+                                           BuiltinOperator_CONV_3D_TRANSPOSE,
+                                           BuiltinOperator_VAR_HANDLE,
+                                           BuiltinOperator_READ_VARIABLE,
+                                           BuiltinOperator_ASSIGN_VARIABLE,
+                                           BuiltinOperator_BROADCAST_ARGS,
+                                           BuiltinOperator_RANDOM_STANDARD_NORMAL,
+                                           BuiltinOperator_BUCKETIZE,
+                                           BuiltinOperator_RANDOM_UNIFORM,
+                                           BuiltinOperator_MULTINOMIAL,
+                                           BuiltinOperator_GELU,
+                                           BuiltinOperator_DYNAMIC_UPDATE_SLICE,
+                                           BuiltinOperator_RELU_0_TO_1,
+                                           BuiltinOperator_UNSORTED_SEGMENT_PROD,
+                                           BuiltinOperator_UNSORTED_SEGMENT_MAX,
+                                           BuiltinOperator_UNSORTED_SEGMENT_SUM,
+                                           BuiltinOperator_ATAN2};
   return values;
 }
 
 inline const char *const *EnumNamesBuiltinOperator()
 {
-  static const char *const names[] = {"ADD",
-                                      "AVERAGE_POOL_2D",
-                                      "CONCATENATION",
-                                      "CONV_2D",
-                                      "DEPTHWISE_CONV_2D",
-                                      "DEPTH_TO_SPACE",
-                                      "DEQUANTIZE",
-                                      "EMBEDDING_LOOKUP",
-                                      "FLOOR",
-                                      "FULLY_CONNECTED",
-                                      "HASHTABLE_LOOKUP",
-                                      "L2_NORMALIZATION",
-                                      "L2_POOL_2D",
-                                      "LOCAL_RESPONSE_NORMALIZATION",
-                                      "LOGISTIC",
-                                      "LSH_PROJECTION",
-                                      "LSTM",
-                                      "MAX_POOL_2D",
-                                      "MUL",
-                                      "RELU",
-                                      "RELU_N1_TO_1",
-                                      "RELU6",
-                                      "RESHAPE",
-                                      "RESIZE_BILINEAR",
-                                      "RNN",
-                                      "SOFTMAX",
-                                      "SPACE_TO_DEPTH",
-                                      "SVDF",
-                                      "TANH",
-                                      "CONCAT_EMBEDDINGS",
-                                      "SKIP_GRAM",
-                                      "CALL",
-                                      "CUSTOM",
-                                      "EMBEDDING_LOOKUP_SPARSE",
-                                      "PAD",
-                                      "UNIDIRECTIONAL_SEQUENCE_RNN",
-                                      "GATHER",
-                                      "BATCH_TO_SPACE_ND",
-                                      "SPACE_TO_BATCH_ND",
-                                      "TRANSPOSE",
-                                      "MEAN",
-                                      "SUB",
-                                      "DIV",
-                                      "SQUEEZE",
-                                      "UNIDIRECTIONAL_SEQUENCE_LSTM",
-                                      "STRIDED_SLICE",
-                                      "BIDIRECTIONAL_SEQUENCE_RNN",
-                                      "EXP",
-                                      "TOPK_V2",
-                                      "SPLIT",
-                                      "LOG_SOFTMAX",
-                                      "DELEGATE",
-                                      "BIDIRECTIONAL_SEQUENCE_LSTM",
-                                      "CAST",
-                                      "PRELU",
-                                      "MAXIMUM",
-                                      "ARG_MAX",
-                                      "MINIMUM",
-                                      "LESS",
-                                      "NEG",
-                                      "PADV2",
-                                      "GREATER",
-                                      "GREATER_EQUAL",
-                                      "LESS_EQUAL",
-                                      "SELECT",
-                                      "SLICE",
-                                      "SIN",
-                                      "TRANSPOSE_CONV",
-                                      "SPARSE_TO_DENSE",
-                                      "TILE",
-                                      "EXPAND_DIMS",
-                                      "EQUAL",
-                                      "NOT_EQUAL",
-                                      "LOG",
-                                      "SUM",
-                                      "SQRT",
-                                      "RSQRT",
-                                      "SHAPE",
-                                      "POW",
-                                      "ARG_MIN",
-                                      "FAKE_QUANT",
-                                      "REDUCE_PROD",
-                                      "REDUCE_MAX",
-                                      "PACK",
-                                      "LOGICAL_OR",
-                                      "ONE_HOT",
-                                      "LOGICAL_AND",
-                                      "LOGICAL_NOT",
-                                      "UNPACK",
-                                      "REDUCE_MIN",
-                                      "FLOOR_DIV",
-                                      "REDUCE_ANY",
-                                      "SQUARE",
-                                      "ZEROS_LIKE",
-                                      "FILL",
-                                      "FLOOR_MOD",
-                                      "RANGE",
-                                      "RESIZE_NEAREST_NEIGHBOR",
-                                      "LEAKY_RELU",
-                                      "SQUARED_DIFFERENCE",
-                                      "MIRROR_PAD",
-                                      "ABS",
-                                      "SPLIT_V",
-                                      "UNIQUE",
-                                      "CEIL",
-                                      "REVERSE_V2",
-                                      "ADD_N",
-                                      "GATHER_ND",
-                                      "COS",
-                                      "WHERE",
-                                      "RANK",
-                                      "ELU",
-                                      "REVERSE_SEQUENCE",
-                                      "MATRIX_DIAG",
-                                      "QUANTIZE",
-                                      "MATRIX_SET_DIAG",
-                                      "ROUND",
-                                      "HARD_SWISH",
-                                      "IF",
-                                      "WHILE",
-                                      "NON_MAX_SUPPRESSION_V4",
-                                      "NON_MAX_SUPPRESSION_V5",
-                                      "SCATTER_ND",
-                                      "SELECT_V2",
-                                      "DENSIFY",
-                                      "SEGMENT_SUM",
-                                      "BATCH_MATMUL",
-                                      nullptr};
+  static const char *const names[158] = {"ADD",
+                                         "AVERAGE_POOL_2D",
+                                         "CONCATENATION",
+                                         "CONV_2D",
+                                         "DEPTHWISE_CONV_2D",
+                                         "DEPTH_TO_SPACE",
+                                         "DEQUANTIZE",
+                                         "EMBEDDING_LOOKUP",
+                                         "FLOOR",
+                                         "FULLY_CONNECTED",
+                                         "HASHTABLE_LOOKUP",
+                                         "L2_NORMALIZATION",
+                                         "L2_POOL_2D",
+                                         "LOCAL_RESPONSE_NORMALIZATION",
+                                         "LOGISTIC",
+                                         "LSH_PROJECTION",
+                                         "LSTM",
+                                         "MAX_POOL_2D",
+                                         "MUL",
+                                         "RELU",
+                                         "RELU_N1_TO_1",
+                                         "RELU6",
+                                         "RESHAPE",
+                                         "RESIZE_BILINEAR",
+                                         "RNN",
+                                         "SOFTMAX",
+                                         "SPACE_TO_DEPTH",
+                                         "SVDF",
+                                         "TANH",
+                                         "CONCAT_EMBEDDINGS",
+                                         "SKIP_GRAM",
+                                         "CALL",
+                                         "CUSTOM",
+                                         "EMBEDDING_LOOKUP_SPARSE",
+                                         "PAD",
+                                         "UNIDIRECTIONAL_SEQUENCE_RNN",
+                                         "GATHER",
+                                         "BATCH_TO_SPACE_ND",
+                                         "SPACE_TO_BATCH_ND",
+                                         "TRANSPOSE",
+                                         "MEAN",
+                                         "SUB",
+                                         "DIV",
+                                         "SQUEEZE",
+                                         "UNIDIRECTIONAL_SEQUENCE_LSTM",
+                                         "STRIDED_SLICE",
+                                         "BIDIRECTIONAL_SEQUENCE_RNN",
+                                         "EXP",
+                                         "TOPK_V2",
+                                         "SPLIT",
+                                         "LOG_SOFTMAX",
+                                         "DELEGATE",
+                                         "BIDIRECTIONAL_SEQUENCE_LSTM",
+                                         "CAST",
+                                         "PRELU",
+                                         "MAXIMUM",
+                                         "ARG_MAX",
+                                         "MINIMUM",
+                                         "LESS",
+                                         "NEG",
+                                         "PADV2",
+                                         "GREATER",
+                                         "GREATER_EQUAL",
+                                         "LESS_EQUAL",
+                                         "SELECT",
+                                         "SLICE",
+                                         "SIN",
+                                         "TRANSPOSE_CONV",
+                                         "SPARSE_TO_DENSE",
+                                         "TILE",
+                                         "EXPAND_DIMS",
+                                         "EQUAL",
+                                         "NOT_EQUAL",
+                                         "LOG",
+                                         "SUM",
+                                         "SQRT",
+                                         "RSQRT",
+                                         "SHAPE",
+                                         "POW",
+                                         "ARG_MIN",
+                                         "FAKE_QUANT",
+                                         "REDUCE_PROD",
+                                         "REDUCE_MAX",
+                                         "PACK",
+                                         "LOGICAL_OR",
+                                         "ONE_HOT",
+                                         "LOGICAL_AND",
+                                         "LOGICAL_NOT",
+                                         "UNPACK",
+                                         "REDUCE_MIN",
+                                         "FLOOR_DIV",
+                                         "REDUCE_ANY",
+                                         "SQUARE",
+                                         "ZEROS_LIKE",
+                                         "FILL",
+                                         "FLOOR_MOD",
+                                         "RANGE",
+                                         "RESIZE_NEAREST_NEIGHBOR",
+                                         "LEAKY_RELU",
+                                         "SQUARED_DIFFERENCE",
+                                         "MIRROR_PAD",
+                                         "ABS",
+                                         "SPLIT_V",
+                                         "UNIQUE",
+                                         "CEIL",
+                                         "REVERSE_V2",
+                                         "ADD_N",
+                                         "GATHER_ND",
+                                         "COS",
+                                         "WHERE",
+                                         "RANK",
+                                         "ELU",
+                                         "REVERSE_SEQUENCE",
+                                         "MATRIX_DIAG",
+                                         "QUANTIZE",
+                                         "MATRIX_SET_DIAG",
+                                         "ROUND",
+                                         "HARD_SWISH",
+                                         "IF",
+                                         "WHILE",
+                                         "NON_MAX_SUPPRESSION_V4",
+                                         "NON_MAX_SUPPRESSION_V5",
+                                         "SCATTER_ND",
+                                         "SELECT_V2",
+                                         "DENSIFY",
+                                         "SEGMENT_SUM",
+                                         "BATCH_MATMUL",
+                                         "PLACEHOLDER_FOR_GREATER_OP_CODES",
+                                         "CUMSUM",
+                                         "CALL_ONCE",
+                                         "BROADCAST_TO",
+                                         "RFFT2D",
+                                         "CONV_3D",
+                                         "IMAG",
+                                         "REAL",
+                                         "COMPLEX_ABS",
+                                         "HASHTABLE",
+                                         "HASHTABLE_FIND",
+                                         "HASHTABLE_IMPORT",
+                                         "HASHTABLE_SIZE",
+                                         "REDUCE_ALL",
+                                         "CONV_3D_TRANSPOSE",
+                                         "VAR_HANDLE",
+                                         "READ_VARIABLE",
+                                         "ASSIGN_VARIABLE",
+                                         "BROADCAST_ARGS",
+                                         "RANDOM_STANDARD_NORMAL",
+                                         "BUCKETIZE",
+                                         "RANDOM_UNIFORM",
+                                         "MULTINOMIAL",
+                                         "GELU",
+                                         "DYNAMIC_UPDATE_SLICE",
+                                         "RELU_0_TO_1",
+                                         "UNSORTED_SEGMENT_PROD",
+                                         "UNSORTED_SEGMENT_MAX",
+                                         "UNSORTED_SEGMENT_SUM",
+                                         "ATAN2",
+                                         nullptr};
   return names;
 }
 
 inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_ATAN2))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOperator()[index];
 }
 
-enum BuiltinOptions
+enum BuiltinOptions : uint8_t
 {
   BuiltinOptions_NONE = 0,
   BuiltinOptions_Conv2DOptions = 1,
@@ -929,11 +1218,31 @@ enum BuiltinOptions
   BuiltinOptions_DensifyOptions = 99,
   BuiltinOptions_SegmentSumOptions = 100,
   BuiltinOptions_BatchMatMulOptions = 101,
+  BuiltinOptions_CumsumOptions = 102,
+  BuiltinOptions_CallOnceOptions = 103,
+  BuiltinOptions_BroadcastToOptions = 104,
+  BuiltinOptions_Rfft2dOptions = 105,
+  BuiltinOptions_Conv3DOptions = 106,
+  BuiltinOptions_HashtableOptions = 107,
+  BuiltinOptions_HashtableFindOptions = 108,
+  BuiltinOptions_HashtableImportOptions = 109,
+  BuiltinOptions_HashtableSizeOptions = 110,
+  BuiltinOptions_VarHandleOptions = 111,
+  BuiltinOptions_ReadVariableOptions = 112,
+  BuiltinOptions_AssignVariableOptions = 113,
+  BuiltinOptions_RandomOptions = 114,
+  BuiltinOptions_BucketizeOptions = 115,
+  BuiltinOptions_GeluOptions = 116,
+  BuiltinOptions_DynamicUpdateSliceOptions = 117,
+  BuiltinOptions_UnsortedSegmentProdOptions = 118,
+  BuiltinOptions_UnsortedSegmentMaxOptions = 119,
+  BuiltinOptions_UnsortedSegmentSumOptions = 120,
+  BuiltinOptions_ATan2Options = 121,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_BatchMatMulOptions
+  BuiltinOptions_MAX = BuiltinOptions_ATan2Options
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102]
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[122]
 {
   static const BuiltinOptions values[] = {BuiltinOptions_NONE,
                                           BuiltinOptions_Conv2DOptions,
@@ -1036,121 +1345,163 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102]
                                           BuiltinOptions_SelectV2Options,
                                           BuiltinOptions_DensifyOptions,
                                           BuiltinOptions_SegmentSumOptions,
-                                          BuiltinOptions_BatchMatMulOptions};
+                                          BuiltinOptions_BatchMatMulOptions,
+                                          BuiltinOptions_CumsumOptions,
+                                          BuiltinOptions_CallOnceOptions,
+                                          BuiltinOptions_BroadcastToOptions,
+                                          BuiltinOptions_Rfft2dOptions,
+                                          BuiltinOptions_Conv3DOptions,
+                                          BuiltinOptions_HashtableOptions,
+                                          BuiltinOptions_HashtableFindOptions,
+                                          BuiltinOptions_HashtableImportOptions,
+                                          BuiltinOptions_HashtableSizeOptions,
+                                          BuiltinOptions_VarHandleOptions,
+                                          BuiltinOptions_ReadVariableOptions,
+                                          BuiltinOptions_AssignVariableOptions,
+                                          BuiltinOptions_RandomOptions,
+                                          BuiltinOptions_BucketizeOptions,
+                                          BuiltinOptions_GeluOptions,
+                                          BuiltinOptions_DynamicUpdateSliceOptions,
+                                          BuiltinOptions_UnsortedSegmentProdOptions,
+                                          BuiltinOptions_UnsortedSegmentMaxOptions,
+                                          BuiltinOptions_UnsortedSegmentSumOptions,
+                                          BuiltinOptions_ATan2Options};
   return values;
 }
 
 inline const char *const *EnumNamesBuiltinOptions()
 {
-  static const char *const names[] = {"NONE",
-                                      "Conv2DOptions",
-                                      "DepthwiseConv2DOptions",
-                                      "ConcatEmbeddingsOptions",
-                                      "LSHProjectionOptions",
-                                      "Pool2DOptions",
-                                      "SVDFOptions",
-                                      "RNNOptions",
-                                      "FullyConnectedOptions",
-                                      "SoftmaxOptions",
-                                      "ConcatenationOptions",
-                                      "AddOptions",
-                                      "L2NormOptions",
-                                      "LocalResponseNormalizationOptions",
-                                      "LSTMOptions",
-                                      "ResizeBilinearOptions",
-                                      "CallOptions",
-                                      "ReshapeOptions",
-                                      "SkipGramOptions",
-                                      "SpaceToDepthOptions",
-                                      "EmbeddingLookupSparseOptions",
-                                      "MulOptions",
-                                      "PadOptions",
-                                      "GatherOptions",
-                                      "BatchToSpaceNDOptions",
-                                      "SpaceToBatchNDOptions",
-                                      "TransposeOptions",
-                                      "ReducerOptions",
-                                      "SubOptions",
-                                      "DivOptions",
-                                      "SqueezeOptions",
-                                      "SequenceRNNOptions",
-                                      "StridedSliceOptions",
-                                      "ExpOptions",
-                                      "TopKV2Options",
-                                      "SplitOptions",
-                                      "LogSoftmaxOptions",
-                                      "CastOptions",
-                                      "DequantizeOptions",
-                                      "MaximumMinimumOptions",
-                                      "ArgMaxOptions",
-                                      "LessOptions",
-                                      "NegOptions",
-                                      "PadV2Options",
-                                      "GreaterOptions",
-                                      "GreaterEqualOptions",
-                                      "LessEqualOptions",
-                                      "SelectOptions",
-                                      "SliceOptions",
-                                      "TransposeConvOptions",
-                                      "SparseToDenseOptions",
-                                      "TileOptions",
-                                      "ExpandDimsOptions",
-                                      "EqualOptions",
-                                      "NotEqualOptions",
-                                      "ShapeOptions",
-                                      "PowOptions",
-                                      "ArgMinOptions",
-                                      "FakeQuantOptions",
-                                      "PackOptions",
-                                      "LogicalOrOptions",
-                                      "OneHotOptions",
-                                      "LogicalAndOptions",
-                                      "LogicalNotOptions",
-                                      "UnpackOptions",
-                                      "FloorDivOptions",
-                                      "SquareOptions",
-                                      "ZerosLikeOptions",
-                                      "FillOptions",
-                                      "BidirectionalSequenceLSTMOptions",
-                                      "BidirectionalSequenceRNNOptions",
-                                      "UnidirectionalSequenceLSTMOptions",
-                                      "FloorModOptions",
-                                      "RangeOptions",
-                                      "ResizeNearestNeighborOptions",
-                                      "LeakyReluOptions",
-                                      "SquaredDifferenceOptions",
-                                      "MirrorPadOptions",
-                                      "AbsOptions",
-                                      "SplitVOptions",
-                                      "UniqueOptions",
-                                      "ReverseV2Options",
-                                      "AddNOptions",
-                                      "GatherNdOptions",
-                                      "CosOptions",
-                                      "WhereOptions",
-                                      "RankOptions",
-                                      "ReverseSequenceOptions",
-                                      "MatrixDiagOptions",
-                                      "QuantizeOptions",
-                                      "MatrixSetDiagOptions",
-                                      "HardSwishOptions",
-                                      "IfOptions",
-                                      "WhileOptions",
-                                      "DepthToSpaceOptions",
-                                      "NonMaxSuppressionV4Options",
-                                      "NonMaxSuppressionV5Options",
-                                      "ScatterNdOptions",
-                                      "SelectV2Options",
-                                      "DensifyOptions",
-                                      "SegmentSumOptions",
-                                      "BatchMatMulOptions",
-                                      nullptr};
+  static const char *const names[123] = {"NONE",
+                                         "Conv2DOptions",
+                                         "DepthwiseConv2DOptions",
+                                         "ConcatEmbeddingsOptions",
+                                         "LSHProjectionOptions",
+                                         "Pool2DOptions",
+                                         "SVDFOptions",
+                                         "RNNOptions",
+                                         "FullyConnectedOptions",
+                                         "SoftmaxOptions",
+                                         "ConcatenationOptions",
+                                         "AddOptions",
+                                         "L2NormOptions",
+                                         "LocalResponseNormalizationOptions",
+                                         "LSTMOptions",
+                                         "ResizeBilinearOptions",
+                                         "CallOptions",
+                                         "ReshapeOptions",
+                                         "SkipGramOptions",
+                                         "SpaceToDepthOptions",
+                                         "EmbeddingLookupSparseOptions",
+                                         "MulOptions",
+                                         "PadOptions",
+                                         "GatherOptions",
+                                         "BatchToSpaceNDOptions",
+                                         "SpaceToBatchNDOptions",
+                                         "TransposeOptions",
+                                         "ReducerOptions",
+                                         "SubOptions",
+                                         "DivOptions",
+                                         "SqueezeOptions",
+                                         "SequenceRNNOptions",
+                                         "StridedSliceOptions",
+                                         "ExpOptions",
+                                         "TopKV2Options",
+                                         "SplitOptions",
+                                         "LogSoftmaxOptions",
+                                         "CastOptions",
+                                         "DequantizeOptions",
+                                         "MaximumMinimumOptions",
+                                         "ArgMaxOptions",
+                                         "LessOptions",
+                                         "NegOptions",
+                                         "PadV2Options",
+                                         "GreaterOptions",
+                                         "GreaterEqualOptions",
+                                         "LessEqualOptions",
+                                         "SelectOptions",
+                                         "SliceOptions",
+                                         "TransposeConvOptions",
+                                         "SparseToDenseOptions",
+                                         "TileOptions",
+                                         "ExpandDimsOptions",
+                                         "EqualOptions",
+                                         "NotEqualOptions",
+                                         "ShapeOptions",
+                                         "PowOptions",
+                                         "ArgMinOptions",
+                                         "FakeQuantOptions",
+                                         "PackOptions",
+                                         "LogicalOrOptions",
+                                         "OneHotOptions",
+                                         "LogicalAndOptions",
+                                         "LogicalNotOptions",
+                                         "UnpackOptions",
+                                         "FloorDivOptions",
+                                         "SquareOptions",
+                                         "ZerosLikeOptions",
+                                         "FillOptions",
+                                         "BidirectionalSequenceLSTMOptions",
+                                         "BidirectionalSequenceRNNOptions",
+                                         "UnidirectionalSequenceLSTMOptions",
+                                         "FloorModOptions",
+                                         "RangeOptions",
+                                         "ResizeNearestNeighborOptions",
+                                         "LeakyReluOptions",
+                                         "SquaredDifferenceOptions",
+                                         "MirrorPadOptions",
+                                         "AbsOptions",
+                                         "SplitVOptions",
+                                         "UniqueOptions",
+                                         "ReverseV2Options",
+                                         "AddNOptions",
+                                         "GatherNdOptions",
+                                         "CosOptions",
+                                         "WhereOptions",
+                                         "RankOptions",
+                                         "ReverseSequenceOptions",
+                                         "MatrixDiagOptions",
+                                         "QuantizeOptions",
+                                         "MatrixSetDiagOptions",
+                                         "HardSwishOptions",
+                                         "IfOptions",
+                                         "WhileOptions",
+                                         "DepthToSpaceOptions",
+                                         "NonMaxSuppressionV4Options",
+                                         "NonMaxSuppressionV5Options",
+                                         "ScatterNdOptions",
+                                         "SelectV2Options",
+                                         "DensifyOptions",
+                                         "SegmentSumOptions",
+                                         "BatchMatMulOptions",
+                                         "CumsumOptions",
+                                         "CallOnceOptions",
+                                         "BroadcastToOptions",
+                                         "Rfft2dOptions",
+                                         "Conv3DOptions",
+                                         "HashtableOptions",
+                                         "HashtableFindOptions",
+                                         "HashtableImportOptions",
+                                         "HashtableSizeOptions",
+                                         "VarHandleOptions",
+                                         "ReadVariableOptions",
+                                         "AssignVariableOptions",
+                                         "RandomOptions",
+                                         "BucketizeOptions",
+                                         "GeluOptions",
+                                         "DynamicUpdateSliceOptions",
+                                         "UnsortedSegmentProdOptions",
+                                         "UnsortedSegmentMaxOptions",
+                                         "UnsortedSegmentSumOptions",
+                                         "ATan2Options",
+                                         nullptr};
   return names;
 }
 
 inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_ATan2Options))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOptions()[index];
 }
 
@@ -1159,517 +1510,617 @@ template <typename T> struct BuiltinOptionsTraits
   static const BuiltinOptions enum_value = BuiltinOptions_NONE;
 };
 
-template <> struct BuiltinOptionsTraits<Conv2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::Conv2DOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
 };
 
-template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DepthwiseConv2DOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ConcatEmbeddingsOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LSHProjectionOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LSHProjectionOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
 };
 
-template <> struct BuiltinOptionsTraits<Pool2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::Pool2DOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SVDFOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SVDFOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
 };
 
-template <> struct BuiltinOptionsTraits<RNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RNNOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FullyConnectedOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FullyConnectedOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SoftmaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SoftmaxOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ConcatenationOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ConcatenationOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
 };
 
-template <> struct BuiltinOptionsTraits<AddOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AddOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
 };
 
-template <> struct BuiltinOptionsTraits<L2NormOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::L2NormOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LocalResponseNormalizationOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LSTMOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ResizeBilinearOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ResizeBilinearOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
 };
 
-template <> struct BuiltinOptionsTraits<CallOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CallOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ReshapeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReshapeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SkipGramOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SkipGramOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SpaceToDepthOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SpaceToDepthOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
 };
 
-template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::EmbeddingLookupSparseOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MulOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MulOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
 };
 
-template <> struct BuiltinOptionsTraits<PadOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PadOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
 };
 
-template <> struct BuiltinOptionsTraits<GatherOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GatherOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BatchToSpaceNDOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SpaceToBatchNDOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
 };
 
-template <> struct BuiltinOptionsTraits<TransposeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TransposeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ReducerOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReducerOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SubOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SubOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
 };
 
-template <> struct BuiltinOptionsTraits<DivOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DivOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SqueezeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SqueezeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SequenceRNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SequenceRNNOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
 };
 
-template <> struct BuiltinOptionsTraits<StridedSliceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::StridedSliceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ExpOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ExpOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
 };
 
-template <> struct BuiltinOptionsTraits<TopKV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::TopKV2Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
 };
 
-template <> struct BuiltinOptionsTraits<SplitOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SplitOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LogSoftmaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogSoftmaxOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
 };
 
-template <> struct BuiltinOptionsTraits<CastOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CastOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
 };
 
-template <> struct BuiltinOptionsTraits<DequantizeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DequantizeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MaximumMinimumOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MaximumMinimumOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ArgMaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ArgMaxOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LessOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LessOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
 };
 
-template <> struct BuiltinOptionsTraits<NegOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::NegOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
 };
 
-template <> struct BuiltinOptionsTraits<PadV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::PadV2Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
 };
 
-template <> struct BuiltinOptionsTraits<GreaterOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GreaterOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
 };
 
-template <> struct BuiltinOptionsTraits<GreaterEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GreaterEqualOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LessEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LessEqualOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SelectOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SelectOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SliceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SliceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<TransposeConvOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TransposeConvOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SparseToDenseOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SparseToDenseOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
 };
 
-template <> struct BuiltinOptionsTraits<TileOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TileOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ExpandDimsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ExpandDimsOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
 };
 
-template <> struct BuiltinOptionsTraits<EqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::EqualOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
 };
 
-template <> struct BuiltinOptionsTraits<NotEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::NotEqualOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ShapeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ShapeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<PowOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PowOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ArgMinOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ArgMinOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FakeQuantOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FakeQuantOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
 };
 
-template <> struct BuiltinOptionsTraits<PackOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PackOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LogicalOrOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalOrOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
 };
 
-template <> struct BuiltinOptionsTraits<OneHotOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::OneHotOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LogicalAndOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalAndOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LogicalNotOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalNotOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
 };
 
-template <> struct BuiltinOptionsTraits<UnpackOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UnpackOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FloorDivOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FloorDivOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SquareOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SquareOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ZerosLikeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ZerosLikeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FillOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FillOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BidirectionalSequenceLSTMOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BidirectionalSequenceRNNOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
 };
 
-template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UnidirectionalSequenceLSTMOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
 };
 
-template <> struct BuiltinOptionsTraits<FloorModOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FloorModOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
 };
 
-template <> struct BuiltinOptionsTraits<RangeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RangeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ResizeNearestNeighborOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
 };
 
-template <> struct BuiltinOptionsTraits<LeakyReluOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LeakyReluOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SquaredDifferenceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MirrorPadOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MirrorPadOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
 };
 
-template <> struct BuiltinOptionsTraits<AbsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AbsOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SplitVOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SplitVOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
 };
 
-template <> struct BuiltinOptionsTraits<UniqueOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UniqueOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ReverseV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReverseV2Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options;
 };
 
-template <> struct BuiltinOptionsTraits<AddNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AddNOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
 };
 
-template <> struct BuiltinOptionsTraits<GatherNdOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GatherNdOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
 };
 
-template <> struct BuiltinOptionsTraits<CosOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CosOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
 };
 
-template <> struct BuiltinOptionsTraits<WhereOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::WhereOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
 };
 
-template <> struct BuiltinOptionsTraits<RankOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RankOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
 };
 
-template <> struct BuiltinOptionsTraits<ReverseSequenceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReverseSequenceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MatrixDiagOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MatrixDiagOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions;
 };
 
-template <> struct BuiltinOptionsTraits<QuantizeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::QuantizeOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions;
 };
 
-template <> struct BuiltinOptionsTraits<MatrixSetDiagOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MatrixSetDiagOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions;
 };
 
-template <> struct BuiltinOptionsTraits<HardSwishOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::HardSwishOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions;
 };
 
-template <> struct BuiltinOptionsTraits<IfOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::IfOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_IfOptions;
 };
 
-template <> struct BuiltinOptionsTraits<WhileOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::WhileOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions;
 };
 
-template <> struct BuiltinOptionsTraits<DepthToSpaceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DepthToSpaceOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions;
 };
 
-template <> struct BuiltinOptionsTraits<NonMaxSuppressionV4Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::NonMaxSuppressionV4Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options;
 };
 
-template <> struct BuiltinOptionsTraits<NonMaxSuppressionV5Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::NonMaxSuppressionV5Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options;
 };
 
-template <> struct BuiltinOptionsTraits<ScatterNdOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ScatterNdOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SelectV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::SelectV2Options>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
 };
 
-template <> struct BuiltinOptionsTraits<DensifyOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DensifyOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
 };
 
-template <> struct BuiltinOptionsTraits<SegmentSumOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SegmentSumOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions;
 };
 
-template <> struct BuiltinOptionsTraits<BatchMatMulOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BatchMatMulOptions>
 {
   static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
 };
 
+template <> struct BuiltinOptionsTraits<onert_tflite::CumsumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::CallOnceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::BroadcastToOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::Rfft2dOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::Conv3DOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableFindOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableImportOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableSizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::VarHandleOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::ReadVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::AssignVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::RandomOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::BucketizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BucketizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::GeluOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_GeluOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::DynamicUpdateSliceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_DynamicUpdateSliceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::UnsortedSegmentProdOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentProdOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::UnsortedSegmentMaxOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMaxOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::UnsortedSegmentSumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentSumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::ATan2Options>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ATan2Options;
+};
+
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
 bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
                                 const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                 const flatbuffers::Vector<uint8_t> *types);
 
-enum Padding
+enum Padding : int8_t
 {
   Padding_SAME = 0,
   Padding_VALID = 1,
@@ -1685,17 +2136,19 @@ inline const Padding (&EnumValuesPadding())[2]
 
 inline const char *const *EnumNamesPadding()
 {
-  static const char *const names[] = {"SAME", "VALID", nullptr};
+  static const char *const names[3] = {"SAME", "VALID", nullptr};
   return names;
 }
 
 inline const char *EnumNamePadding(Padding e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesPadding()[index];
 }
 
-enum ActivationFunctionType
+enum ActivationFunctionType : int8_t
 {
   ActivationFunctionType_NONE = 0,
   ActivationFunctionType_RELU = 1,
@@ -1710,26 +2163,27 @@ enum ActivationFunctionType
 inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
 {
   static const ActivationFunctionType values[] = {
-      ActivationFunctionType_NONE,         ActivationFunctionType_RELU,
-      ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
-      ActivationFunctionType_TANH,         ActivationFunctionType_SIGN_BIT};
+    ActivationFunctionType_NONE,  ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+    ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
   return values;
 }
 
 inline const char *const *EnumNamesActivationFunctionType()
 {
-  static const char *const names[] = {"NONE", "RELU",     "RELU_N1_TO_1", "RELU6",
-                                      "TANH", "SIGN_BIT", nullptr};
+  static const char *const names[7] = {"NONE", "RELU",     "RELU_N1_TO_1", "RELU6",
+                                       "TANH", "SIGN_BIT", nullptr};
   return names;
 }
 
 inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesActivationFunctionType()[index];
 }
 
-enum LSHProjectionType
+enum LSHProjectionType : int8_t
 {
   LSHProjectionType_UNKNOWN = 0,
   LSHProjectionType_SPARSE = 1,
@@ -1747,17 +2201,19 @@ inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
 
 inline const char *const *EnumNamesLSHProjectionType()
 {
-  static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
+  static const char *const names[4] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
   return names;
 }
 
 inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesLSHProjectionType()[index];
 }
 
-enum FullyConnectedOptionsWeightsFormat
+enum FullyConnectedOptionsWeightsFormat : int8_t
 {
   FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
   FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
@@ -1768,24 +2224,27 @@ enum FullyConnectedOptionsWeightsFormat
 inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
 {
   static const FullyConnectedOptionsWeightsFormat values[] = {
-      FullyConnectedOptionsWeightsFormat_DEFAULT,
-      FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
+    FullyConnectedOptionsWeightsFormat_DEFAULT,
+    FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
   return values;
 }
 
 inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat()
 {
-  static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
+  static const char *const names[3] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
   return names;
 }
 
 inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, FullyConnectedOptionsWeightsFormat_DEFAULT,
+                              FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
 }
 
-enum LSTMKernelType
+enum LSTMKernelType : int8_t
 {
   LSTMKernelType_FULL = 0,
   LSTMKernelType_BASIC = 1,
@@ -1801,17 +2260,19 @@ inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
 
 inline const char *const *EnumNamesLSTMKernelType()
 {
-  static const char *const names[] = {"FULL", "BASIC", nullptr};
+  static const char *const names[3] = {"FULL", "BASIC", nullptr};
   return names;
 }
 
 inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesLSTMKernelType()[index];
 }
 
-enum CombinerType
+enum CombinerType : int8_t
 {
   CombinerType_SUM = 0,
   CombinerType_MEAN = 1,
@@ -1828,17 +2289,19 @@ inline const CombinerType (&EnumValuesCombinerType())[3]
 
 inline const char *const *EnumNamesCombinerType()
 {
-  static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr};
+  static const char *const names[4] = {"SUM", "MEAN", "SQRTN", nullptr};
   return names;
 }
 
 inline const char *EnumNameCombinerType(CombinerType e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesCombinerType()[index];
 }
 
-enum MirrorPadMode
+enum MirrorPadMode : int8_t
 {
   MirrorPadMode_REFLECT = 0,
   MirrorPadMode_SYMMETRIC = 1,
@@ -1854,17 +2317,19 @@ inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
 
 inline const char *const *EnumNamesMirrorPadMode()
 {
-  static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr};
+  static const char *const names[3] = {"REFLECT", "SYMMETRIC", nullptr};
   return names;
 }
 
 inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesMirrorPadMode()[index];
 }
 
-enum CustomOptionsFormat
+enum CustomOptionsFormat : int8_t
 {
   CustomOptionsFormat_FLEXBUFFERS = 0,
   CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
@@ -1879,19 +2344,22 @@ inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
 
 inline const char *const *EnumNamesCustomOptionsFormat()
 {
-  static const char *const names[] = {"FLEXBUFFERS", nullptr};
+  static const char *const names[2] = {"FLEXBUFFERS", nullptr};
   return names;
 }
 
 inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
 {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS))
+    return "";
+  const size_t index = static_cast<size_t>(e);
   return EnumNamesCustomOptionsFormat()[index];
 }
 
 struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef CustomQuantizationBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_CUSTOM = 4
   };
@@ -1908,6 +2376,7 @@ struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct CustomQuantizationBuilder
 {
+  typedef CustomQuantization Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
@@ -1918,7 +2387,6 @@ struct CustomQuantizationBuilder
   {
     start_ = fbb_.StartTable();
   }
-  CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
   flatbuffers::Offset<CustomQuantization> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -1940,13 +2408,18 @@ inline flatbuffers::Offset<CustomQuantization>
 CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
                                const std::vector<uint8_t> *custom = nullptr)
 {
-  return onert_tflite::CreateCustomQuantization(_fbb,
-                                                custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
+  if (custom)
+  {
+    _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16);
+  }
+  auto custom__ = custom ? _fbb.CreateVector<uint8_t>(*custom) : 0;
+  return onert_tflite::CreateCustomQuantization(_fbb, custom__);
 }
 
 struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef QuantizationParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_MIN = 4,
     VT_MAX = 6,
@@ -1972,17 +2445,17 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   {
     return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
   }
-  QuantizationDetails details_type() const
+  onert_tflite::QuantizationDetails details_type() const
   {
-    return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
+    return static_cast<onert_tflite::QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
   }
   const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
   template <typename T> const T *details_as() const;
-  const CustomQuantization *details_as_CustomQuantization() const
+  const onert_tflite::CustomQuantization *details_as_CustomQuantization() const
   {
-    return details_type() == QuantizationDetails_CustomQuantization
-               ? static_cast<const CustomQuantization *>(details())
-               : nullptr;
+    return details_type() == onert_tflite::QuantizationDetails_CustomQuantization
+             ? static_cast<const onert_tflite::CustomQuantization *>(details())
+             : nullptr;
   }
   int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const
@@ -1999,13 +2472,15 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
 };
 
 template <>
-inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const
+inline const onert_tflite::CustomQuantization *
+QuantizationParameters::details_as<onert_tflite::CustomQuantization>() const
 {
   return details_as_CustomQuantization();
 }
 
 struct QuantizationParametersBuilder
 {
+  typedef QuantizationParameters Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
@@ -2024,7 +2499,7 @@ struct QuantizationParametersBuilder
   {
     fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
   }
-  void add_details_type(QuantizationDetails details_type)
+  void add_details_type(onert_tflite::QuantizationDetails details_type)
   {
     fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
                              static_cast<uint8_t>(details_type), 0);
@@ -2042,7 +2517,6 @@ struct QuantizationParametersBuilder
   {
     start_ = fbb_.StartTable();
   }
-  QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
   flatbuffers::Offset<QuantizationParameters> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2051,14 +2525,13 @@ struct QuantizationParametersBuilder
   }
 };
 
-inline flatbuffers::Offset<QuantizationParameters>
-CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
-                             QuantizationDetails details_type = QuantizationDetails_NONE,
-                             flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+  onert_tflite::QuantizationDetails details_type = onert_tflite::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
 {
   QuantizationParametersBuilder builder_(_fbb);
   builder_.add_quantized_dimension(quantized_dimension);
@@ -2072,22 +2545,24 @@ CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
 }
 
 inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
-    const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
-    const std::vector<int64_t> *zero_point = nullptr,
-    QuantizationDetails details_type = QuantizationDetails_NONE,
-    flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+  const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+  const std::vector<int64_t> *zero_point = nullptr,
+  onert_tflite::QuantizationDetails details_type = onert_tflite::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
 {
-  return onert_tflite::CreateQuantizationParameters(
-      _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
-      scale ? _fbb.CreateVector<float>(*scale) : 0,
-      zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
-      quantized_dimension);
+  auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
+  auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
+  auto scale__ = scale ? _fbb.CreateVector<float>(*scale) : 0;
+  auto zero_point__ = zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0;
+  return onert_tflite::CreateQuantizationParameters(_fbb, min__, max__, scale__, zero_point__,
+                                                    details_type, details, quantized_dimension);
 }
 
 struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Int32VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALUES = 4
   };
@@ -2104,6 +2579,7 @@ struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Int32VectorBuilder
 {
+  typedef Int32Vector Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values)
@@ -2114,7 +2590,6 @@ struct Int32VectorBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Int32VectorBuilder &operator=(const Int32VectorBuilder &);
   flatbuffers::Offset<Int32Vector> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2136,12 +2611,14 @@ inline flatbuffers::Offset<Int32Vector>
 CreateInt32VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                         const std::vector<int32_t> *values = nullptr)
 {
-  return onert_tflite::CreateInt32Vector(_fbb, values ? _fbb.CreateVector<int32_t>(*values) : 0);
+  auto values__ = values ? _fbb.CreateVector<int32_t>(*values) : 0;
+  return onert_tflite::CreateInt32Vector(_fbb, values__);
 }
 
 struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Uint16VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALUES = 4
   };
@@ -2158,6 +2635,7 @@ struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Uint16VectorBuilder
 {
+  typedef Uint16Vector Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values)
@@ -2168,7 +2646,6 @@ struct Uint16VectorBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
   flatbuffers::Offset<Uint16Vector> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2190,12 +2667,18 @@ inline flatbuffers::Offset<Uint16Vector>
 CreateUint16VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                          const std::vector<uint16_t> *values = nullptr)
 {
-  return onert_tflite::CreateUint16Vector(_fbb, values ? _fbb.CreateVector<uint16_t>(*values) : 0);
+  if (values)
+  {
+    _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4);
+  }
+  auto values__ = values ? _fbb.CreateVector<uint16_t>(*values) : 0;
+  return onert_tflite::CreateUint16Vector(_fbb, values__);
 }
 
 struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Uint8VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALUES = 4
   };
@@ -2212,6 +2695,7 @@ struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Uint8VectorBuilder
 {
+  typedef Uint8Vector Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values)
@@ -2222,7 +2706,6 @@ struct Uint8VectorBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
   flatbuffers::Offset<Uint8Vector> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2244,12 +2727,18 @@ inline flatbuffers::Offset<Uint8Vector>
 CreateUint8VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                         const std::vector<uint8_t> *values = nullptr)
 {
-  return onert_tflite::CreateUint8Vector(_fbb, values ? _fbb.CreateVector<uint8_t>(*values) : 0);
+  if (values)
+  {
+    _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4);
+  }
+  auto values__ = values ? _fbb.CreateVector<uint8_t>(*values) : 0;
+  return onert_tflite::CreateUint8Vector(_fbb, values__);
 }
 
 struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef DimensionMetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FORMAT = 4,
     VT_DENSE_SIZE = 6,
@@ -2258,58 +2747,60 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_ARRAY_INDICES_TYPE = 12,
     VT_ARRAY_INDICES = 14
   };
-  DimensionType format() const
+  onert_tflite::DimensionType format() const
   {
-    return static_cast<DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+    return static_cast<onert_tflite::DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
   }
   int32_t dense_size() const { return GetField<int32_t>(VT_DENSE_SIZE, 0); }
-  SparseIndexVector array_segments_type() const
+  onert_tflite::SparseIndexVector array_segments_type() const
   {
-    return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
+    return static_cast<onert_tflite::SparseIndexVector>(
+      GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
   }
   const void *array_segments() const { return GetPointer<const void *>(VT_ARRAY_SEGMENTS); }
   template <typename T> const T *array_segments_as() const;
-  const Int32Vector *array_segments_as_Int32Vector() const
+  const onert_tflite::Int32Vector *array_segments_as_Int32Vector() const
   {
-    return array_segments_type() == SparseIndexVector_Int32Vector
-               ? static_cast<const Int32Vector *>(array_segments())
-               : nullptr;
+    return array_segments_type() == onert_tflite::SparseIndexVector_Int32Vector
+             ? static_cast<const onert_tflite::Int32Vector *>(array_segments())
+             : nullptr;
   }
-  const Uint16Vector *array_segments_as_Uint16Vector() const
+  const onert_tflite::Uint16Vector *array_segments_as_Uint16Vector() const
   {
-    return array_segments_type() == SparseIndexVector_Uint16Vector
-               ? static_cast<const Uint16Vector *>(array_segments())
-               : nullptr;
+    return array_segments_type() == onert_tflite::SparseIndexVector_Uint16Vector
+             ? static_cast<const onert_tflite::Uint16Vector *>(array_segments())
+             : nullptr;
   }
-  const Uint8Vector *array_segments_as_Uint8Vector() const
+  const onert_tflite::Uint8Vector *array_segments_as_Uint8Vector() const
   {
-    return array_segments_type() == SparseIndexVector_Uint8Vector
-               ? static_cast<const Uint8Vector *>(array_segments())
-               : nullptr;
+    return array_segments_type() == onert_tflite::SparseIndexVector_Uint8Vector
+             ? static_cast<const onert_tflite::Uint8Vector *>(array_segments())
+             : nullptr;
   }
-  SparseIndexVector array_indices_type() const
+  onert_tflite::SparseIndexVector array_indices_type() const
   {
-    return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
+    return static_cast<onert_tflite::SparseIndexVector>(
+      GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
   }
   const void *array_indices() const { return GetPointer<const void *>(VT_ARRAY_INDICES); }
   template <typename T> const T *array_indices_as() const;
-  const Int32Vector *array_indices_as_Int32Vector() const
+  const onert_tflite::Int32Vector *array_indices_as_Int32Vector() const
   {
-    return array_indices_type() == SparseIndexVector_Int32Vector
-               ? static_cast<const Int32Vector *>(array_indices())
-               : nullptr;
+    return array_indices_type() == onert_tflite::SparseIndexVector_Int32Vector
+             ? static_cast<const onert_tflite::Int32Vector *>(array_indices())
+             : nullptr;
   }
-  const Uint16Vector *array_indices_as_Uint16Vector() const
+  const onert_tflite::Uint16Vector *array_indices_as_Uint16Vector() const
   {
-    return array_indices_type() == SparseIndexVector_Uint16Vector
-               ? static_cast<const Uint16Vector *>(array_indices())
-               : nullptr;
+    return array_indices_type() == onert_tflite::SparseIndexVector_Uint16Vector
+             ? static_cast<const onert_tflite::Uint16Vector *>(array_indices())
+             : nullptr;
   }
-  const Uint8Vector *array_indices_as_Uint8Vector() const
+  const onert_tflite::Uint8Vector *array_indices_as_Uint8Vector() const
   {
-    return array_indices_type() == SparseIndexVector_Uint8Vector
-               ? static_cast<const Uint8Vector *>(array_indices())
-               : nullptr;
+    return array_indices_type() == onert_tflite::SparseIndexVector_Uint8Vector
+             ? static_cast<const onert_tflite::Uint8Vector *>(array_indices())
+             : nullptr;
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2325,41 +2816,54 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   }
 };
 
-template <> inline const Int32Vector *DimensionMetadata::array_segments_as<Int32Vector>() const
+template <>
+inline const onert_tflite::Int32Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Int32Vector>() const
 {
   return array_segments_as_Int32Vector();
 }
 
-template <> inline const Uint16Vector *DimensionMetadata::array_segments_as<Uint16Vector>() const
+template <>
+inline const onert_tflite::Uint16Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Uint16Vector>() const
 {
   return array_segments_as_Uint16Vector();
 }
 
-template <> inline const Uint8Vector *DimensionMetadata::array_segments_as<Uint8Vector>() const
+template <>
+inline const onert_tflite::Uint8Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Uint8Vector>() const
 {
   return array_segments_as_Uint8Vector();
 }
 
-template <> inline const Int32Vector *DimensionMetadata::array_indices_as<Int32Vector>() const
+template <>
+inline const onert_tflite::Int32Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Int32Vector>() const
 {
   return array_indices_as_Int32Vector();
 }
 
-template <> inline const Uint16Vector *DimensionMetadata::array_indices_as<Uint16Vector>() const
+template <>
+inline const onert_tflite::Uint16Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Uint16Vector>() const
 {
   return array_indices_as_Uint16Vector();
 }
 
-template <> inline const Uint8Vector *DimensionMetadata::array_indices_as<Uint8Vector>() const
+template <>
+inline const onert_tflite::Uint8Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Uint8Vector>() const
 {
   return array_indices_as_Uint8Vector();
 }
 
 struct DimensionMetadataBuilder
 {
+  typedef DimensionMetadata Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_format(DimensionType format)
+  void add_format(onert_tflite::DimensionType format)
   {
     fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
   }
@@ -2367,7 +2871,7 @@ struct DimensionMetadataBuilder
   {
     fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
   }
-  void add_array_segments_type(SparseIndexVector array_segments_type)
+  void add_array_segments_type(onert_tflite::SparseIndexVector array_segments_type)
   {
     fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
                              static_cast<uint8_t>(array_segments_type), 0);
@@ -2376,7 +2880,7 @@ struct DimensionMetadataBuilder
   {
     fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
   }
-  void add_array_indices_type(SparseIndexVector array_indices_type)
+  void add_array_indices_type(onert_tflite::SparseIndexVector array_indices_type)
   {
     fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE,
                              static_cast<uint8_t>(array_indices_type), 0);
@@ -2389,7 +2893,6 @@ struct DimensionMetadataBuilder
   {
     start_ = fbb_.StartTable();
   }
-  DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
   flatbuffers::Offset<DimensionMetadata> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2398,13 +2901,13 @@ struct DimensionMetadataBuilder
   }
 };
 
-inline flatbuffers::Offset<DimensionMetadata>
-CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
-                        DimensionType format = DimensionType_DENSE, int32_t dense_size = 0,
-                        SparseIndexVector array_segments_type = SparseIndexVector_NONE,
-                        flatbuffers::Offset<void> array_segments = 0,
-                        SparseIndexVector array_indices_type = SparseIndexVector_NONE,
-                        flatbuffers::Offset<void> array_indices = 0)
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  onert_tflite::DimensionType format = onert_tflite::DimensionType_DENSE, int32_t dense_size = 0,
+  onert_tflite::SparseIndexVector array_segments_type = onert_tflite::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_segments = 0,
+  onert_tflite::SparseIndexVector array_indices_type = onert_tflite::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_indices = 0)
 {
   DimensionMetadataBuilder builder_(_fbb);
   builder_.add_array_indices(array_indices);
@@ -2418,7 +2921,8 @@ CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SparsityParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TRAVERSAL_ORDER = 4,
     VT_BLOCK_MAP = 6,
@@ -2432,10 +2936,12 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *
+  dim_metadata() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
-        VT_DIM_METADATA);
+    return GetPointer<
+      const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *>(
+      VT_DIM_METADATA);
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2449,6 +2955,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SparsityParametersBuilder
 {
+  typedef SparsityParameters Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order)
@@ -2460,7 +2967,8 @@ struct SparsityParametersBuilder
     fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
   }
   void add_dim_metadata(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>>
+      dim_metadata)
   {
     fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
   }
@@ -2468,7 +2976,6 @@ struct SparsityParametersBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
   flatbuffers::Offset<SparsityParameters> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2478,11 +2985,11 @@ struct SparsityParametersBuilder
 };
 
 inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata =
-        0)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>>
+    dim_metadata = 0)
 {
   SparsityParametersBuilder builder_(_fbb);
   builder_.add_dim_metadata(dim_metadata);
@@ -2492,19 +2999,24 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
 }
 
 inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
-    const std::vector<int32_t> *block_map = nullptr,
-    const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+  const std::vector<int32_t> *block_map = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *dim_metadata = nullptr)
 {
-  return onert_tflite::CreateSparsityParameters(
-      _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
-      block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
-      dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+  auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
+  auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
+  auto dim_metadata__ =
+    dim_metadata
+      ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>(*dim_metadata)
+      : 0;
+  return onert_tflite::CreateSparsityParameters(_fbb, traversal_order__, block_map__,
+                                                dim_metadata__);
 }
 
 struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef TensorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_SHAPE = 4,
     VT_TYPE = 6,
@@ -2513,31 +3025,36 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_QUANTIZATION = 12,
     VT_IS_VARIABLE = 14,
     VT_SPARSITY = 16,
-    VT_SHAPE_SIGNATURE = 18
+    VT_SHAPE_SIGNATURE = 18,
+    VT_HAS_RANK = 20
   };
   const flatbuffers::Vector<int32_t> *shape() const
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
   }
-  TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); }
+  onert_tflite::TensorType type() const
+  {
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_TYPE, 0));
+  }
   uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
   const flatbuffers::String *name() const
   {
     return GetPointer<const flatbuffers::String *>(VT_NAME);
   }
-  const QuantizationParameters *quantization() const
+  const onert_tflite::QuantizationParameters *quantization() const
   {
-    return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
+    return GetPointer<const onert_tflite::QuantizationParameters *>(VT_QUANTIZATION);
   }
   bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
-  const SparsityParameters *sparsity() const
+  const onert_tflite::SparsityParameters *sparsity() const
   {
-    return GetPointer<const SparsityParameters *>(VT_SPARSITY);
+    return GetPointer<const onert_tflite::SparsityParameters *>(VT_SPARSITY);
   }
   const flatbuffers::Vector<int32_t> *shape_signature() const
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE_SIGNATURE);
   }
+  bool has_rank() const { return GetField<uint8_t>(VT_HAS_RANK, 0) != 0; }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
@@ -2547,19 +3064,20 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
            verifier.VerifyTable(quantization()) && VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
            VerifyOffset(verifier, VT_SPARSITY) && verifier.VerifyTable(sparsity()) &&
            VerifyOffset(verifier, VT_SHAPE_SIGNATURE) && verifier.VerifyVector(shape_signature()) &&
-           verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_HAS_RANK) && verifier.EndTable();
   }
 };
 
 struct TensorBuilder
 {
+  typedef Tensor Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
   {
     fbb_.AddOffset(Tensor::VT_SHAPE, shape);
   }
-  void add_type(TensorType type)
+  void add_type(onert_tflite::TensorType type)
   {
     fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
   }
@@ -2568,7 +3086,7 @@ struct TensorBuilder
   {
     fbb_.AddOffset(Tensor::VT_NAME, name);
   }
-  void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization)
+  void add_quantization(flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization)
   {
     fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
   }
@@ -2576,7 +3094,7 @@ struct TensorBuilder
   {
     fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
   }
-  void add_sparsity(flatbuffers::Offset<SparsityParameters> sparsity)
+  void add_sparsity(flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity)
   {
     fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
   }
@@ -2584,11 +3102,14 @@ struct TensorBuilder
   {
     fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature);
   }
+  void add_has_rank(bool has_rank)
+  {
+    fbb_.AddElement<uint8_t>(Tensor::VT_HAS_RANK, static_cast<uint8_t>(has_rank), 0);
+  }
   explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TensorBuilder &operator=(const TensorBuilder &);
   flatbuffers::Offset<Tensor> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2597,14 +3118,13 @@ struct TensorBuilder
   }
 };
 
-inline flatbuffers::Offset<Tensor>
-CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
-             flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
-             TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
-             flatbuffers::Offset<flatbuffers::String> name = 0,
-             flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
-             flatbuffers::Offset<SparsityParameters> sparsity = 0,
-             flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
+inline flatbuffers::Offset<Tensor> CreateTensor(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+  onert_tflite::TensorType type = onert_tflite::TensorType_FLOAT32, uint32_t buffer = 0,
+  flatbuffers::Offset<flatbuffers::String> name = 0,
+  flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization = 0,
+  bool is_variable = false, flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0, bool has_rank = false)
 {
   TensorBuilder builder_(_fbb);
   builder_.add_shape_signature(shape_signature);
@@ -2613,27 +3133,31 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
   builder_.add_name(name);
   builder_.add_buffer(buffer);
   builder_.add_shape(shape);
+  builder_.add_has_rank(has_rank);
   builder_.add_is_variable(is_variable);
   builder_.add_type(type);
   return builder_.Finish();
 }
 
 inline flatbuffers::Offset<Tensor> CreateTensorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
-    TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
-    flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
-    flatbuffers::Offset<SparsityParameters> sparsity = 0,
-    const std::vector<int32_t> *shape_signature = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+  onert_tflite::TensorType type = onert_tflite::TensorType_FLOAT32, uint32_t buffer = 0,
+  const char *name = nullptr,
+  flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization = 0,
+  bool is_variable = false, flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity = 0,
+  const std::vector<int32_t> *shape_signature = nullptr, bool has_rank = false)
 {
-  return onert_tflite::CreateTensor(
-      _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
-      name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
-      shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+  auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto shape_signature__ = shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0;
+  return onert_tflite::CreateTensor(_fbb, shape__, type, buffer, name__, quantization, is_variable,
+                                    sparsity, shape_signature__, has_rank);
 }
 
 struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Conv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_PADDING = 4,
     VT_STRIDE_W = 6,
@@ -2642,12 +3166,16 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_DILATION_W_FACTOR = 12,
     VT_DILATION_H_FACTOR = 14
   };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
   int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
   int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
   int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -2664,9 +3192,10 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Conv2DOptionsBuilder
 {
+  typedef Conv2DOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(onert_tflite::Padding padding)
   {
     fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
   }
@@ -2678,7 +3207,7 @@ struct Conv2DOptionsBuilder
   {
     fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -2695,7 +3224,6 @@ struct Conv2DOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
   flatbuffers::Offset<Conv2DOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2705,9 +3233,11 @@ struct Conv2DOptionsBuilder
 };
 
 inline flatbuffers::Offset<Conv2DOptions>
-CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                    onert_tflite::Padding padding = onert_tflite::Padding_SAME,
                     int32_t stride_w = 0, int32_t stride_h = 0,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+                    onert_tflite::ActivationFunctionType fused_activation_function =
+                      onert_tflite::ActivationFunctionType_NONE,
                     int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
 {
   Conv2DOptionsBuilder builder_(_fbb);
@@ -2720,9 +3250,121 @@ CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd
   return builder_.Finish();
 }
 
+struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Conv3DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_D = 6,
+    VT_STRIDE_W = 8,
+    VT_STRIDE_H = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_D_FACTOR = 14,
+    VT_DILATION_W_FACTOR = 16,
+    VT_DILATION_H_FACTOR = 18
+  };
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  onert_tflite::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); }
+  int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+  int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_D) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+  }
+};
+
+struct Conv3DOptionsBuilder
+{
+  typedef Conv3DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(onert_tflite::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_d(int32_t stride_d)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_d_factor(int32_t dilation_d_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Conv3DOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Conv3DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+  int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
+  int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+  Conv3DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_dilation_d_factor(dilation_d_factor);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_stride_d(stride_d);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
 struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef Pool2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_PADDING = 4,
     VT_STRIDE_W = 6,
@@ -2731,14 +3373,18 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_FILTER_HEIGHT = 12,
     VT_FUSED_ACTIVATION_FUNCTION = 14
   };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
   int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
   int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
   int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
   int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2753,9 +3399,10 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct Pool2DOptionsBuilder
 {
+  typedef Pool2DOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(onert_tflite::Padding padding)
   {
     fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
   }
@@ -2775,7 +3422,7 @@ struct Pool2DOptionsBuilder
   {
     fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -2784,7 +3431,6 @@ struct Pool2DOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
   flatbuffers::Offset<Pool2DOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2793,11 +3439,11 @@ struct Pool2DOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<Pool2DOptions>
-CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
-                    int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
-                    int32_t filter_height = 0,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE)
 {
   Pool2DOptionsBuilder builder_(_fbb);
   builder_.add_filter_height(filter_height);
@@ -2811,7 +3457,8 @@ CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd
 
 struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef DepthwiseConv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_PADDING = 4,
     VT_STRIDE_W = 6,
@@ -2821,13 +3468,17 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
     VT_DILATION_W_FACTOR = 14,
     VT_DILATION_H_FACTOR = 16
   };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
   int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
   int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
   int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
   int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -2845,9 +3496,10 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
 
 struct DepthwiseConv2DOptionsBuilder
 {
+  typedef DepthwiseConv2DOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(onert_tflite::Padding padding)
   {
     fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
   }
@@ -2863,7 +3515,7 @@ struct DepthwiseConv2DOptionsBuilder
   {
     fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -2880,7 +3532,6 @@ struct DepthwiseConv2DOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
   flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2890,10 +3541,11 @@ struct DepthwiseConv2DOptionsBuilder
 };
 
 inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
-    int32_t stride_h = 0, int32_t depth_multiplier = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+  flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
+  int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
 {
   DepthwiseConv2DOptionsBuilder builder_(_fbb);
   builder_.add_dilation_h_factor(dilation_h_factor);
@@ -2908,7 +3560,8 @@ inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
 
 struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ConcatEmbeddingsOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NUM_CHANNELS = 4,
     VT_NUM_COLUMNS_PER_CHANNEL = 6,
@@ -2935,6 +3588,7 @@ struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Ta
 
 struct ConcatEmbeddingsOptionsBuilder
 {
+  typedef ConcatEmbeddingsOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num_channels(int32_t num_channels)
@@ -2942,12 +3596,12 @@ struct ConcatEmbeddingsOptionsBuilder
     fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
   }
   void add_num_columns_per_channel(
-      flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
   {
     fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
   }
   void add_embedding_dim_per_channel(
-      flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
   {
     fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
                    embedding_dim_per_channel);
@@ -2956,7 +3610,6 @@ struct ConcatEmbeddingsOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
   flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -2966,9 +3619,9 @@ struct ConcatEmbeddingsOptionsBuilder
 };
 
 inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
 {
   ConcatEmbeddingsOptionsBuilder builder_(_fbb);
   builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
@@ -2982,21 +3635,24 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_
                                     const std::vector<int32_t> *num_columns_per_channel = nullptr,
                                     const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
 {
-  return onert_tflite::CreateConcatEmbeddingsOptions(
-      _fbb, num_channels,
-      num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
-      embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+  auto num_columns_per_channel__ =
+    num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+  auto embedding_dim_per_channel__ =
+    embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+  return onert_tflite::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__,
+                                                     embedding_dim_per_channel__);
 }
 
 struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef LSHProjectionOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TYPE = 4
   };
-  LSHProjectionType type() const
+  onert_tflite::LSHProjectionType type() const
   {
-    return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+    return static_cast<onert_tflite::LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3007,9 +3663,10 @@ struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LSHProjectionOptionsBuilder
 {
+  typedef LSHProjectionOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_type(LSHProjectionType type)
+  void add_type(onert_tflite::LSHProjectionType type)
   {
     fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
   }
@@ -3017,7 +3674,6 @@ struct LSHProjectionOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
   flatbuffers::Offset<LSHProjectionOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3026,9 +3682,9 @@ struct LSHProjectionOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<LSHProjectionOptions>
-CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                           LSHProjectionType type = LSHProjectionType_UNKNOWN)
+inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  onert_tflite::LSHProjectionType type = onert_tflite::LSHProjectionType_UNKNOWN)
 {
   LSHProjectionOptionsBuilder builder_(_fbb);
   builder_.add_type(type);
@@ -3037,16 +3693,18 @@ CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SVDFOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_RANK = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
   };
   int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3062,10 +3720,11 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SVDFOptionsBuilder
 {
+  typedef SVDFOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3079,7 +3738,6 @@ struct SVDFOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
   flatbuffers::Offset<SVDFOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3090,7 +3748,8 @@ struct SVDFOptionsBuilder
 
 inline flatbuffers::Offset<SVDFOptions>
 CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
-                  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+                  onert_tflite::ActivationFunctionType fused_activation_function =
+                    onert_tflite::ActivationFunctionType_NONE,
                   bool asymmetric_quantize_inputs = false)
 {
   SVDFOptionsBuilder builder_(_fbb);
@@ -3102,14 +3761,16 @@ CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
 
 struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef RNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3125,9 +3786,10 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct RNNOptionsBuilder
 {
+  typedef RNNOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3141,7 +3803,6 @@ struct RNNOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
   flatbuffers::Offset<RNNOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3152,7 +3813,8 @@ struct RNNOptionsBuilder
 
 inline flatbuffers::Offset<RNNOptions>
 CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE,
                  bool asymmetric_quantize_inputs = false)
 {
   RNNOptionsBuilder builder_(_fbb);
@@ -3163,16 +3825,18 @@ CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TIME_MAJOR = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
   };
   bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3188,6 +3852,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SequenceRNNOptionsBuilder
 {
+  typedef SequenceRNNOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_time_major(bool time_major)
@@ -3195,7 +3860,7 @@ struct SequenceRNNOptionsBuilder
     fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
                              0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3209,7 +3874,6 @@ struct SequenceRNNOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
   flatbuffers::Offset<SequenceRNNOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3218,10 +3882,11 @@ struct SequenceRNNOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<SequenceRNNOptions>
+CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+                         onert_tflite::ActivationFunctionType fused_activation_function =
+                           onert_tflite::ActivationFunctionType_NONE,
+                         bool asymmetric_quantize_inputs = false)
 {
   SequenceRNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3232,7 +3897,8 @@ inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
 
 struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BidirectionalSequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TIME_MAJOR = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6,
@@ -3240,9 +3906,10 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
   };
   bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
   bool asymmetric_quantize_inputs() const
@@ -3260,6 +3927,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
 
 struct BidirectionalSequenceRNNOptionsBuilder
 {
+  typedef BidirectionalSequenceRNNOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_time_major(bool time_major)
@@ -3267,7 +3935,7 @@ struct BidirectionalSequenceRNNOptionsBuilder
     fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
                              static_cast<uint8_t>(time_major), 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3286,7 +3954,6 @@ struct BidirectionalSequenceRNNOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
   flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3296,9 +3963,10 @@ struct BidirectionalSequenceRNNOptionsBuilder
 };
 
 inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
+  bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
 {
   BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3310,20 +3978,23 @@ inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalS
 
 struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef FullyConnectedOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_WEIGHTS_FORMAT = 6,
     VT_KEEP_NUM_DIMS = 8,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
-  FullyConnectedOptionsWeightsFormat weights_format() const
+  onert_tflite::FullyConnectedOptionsWeightsFormat weights_format() const
   {
-    return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+    return static_cast<onert_tflite::FullyConnectedOptionsWeightsFormat>(
+      GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
   }
   bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
   bool asymmetric_quantize_inputs() const
@@ -3342,14 +4013,15 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct FullyConnectedOptionsBuilder
 {
+  typedef FullyConnectedOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
   }
-  void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format)
+  void add_weights_format(onert_tflite::FullyConnectedOptionsWeightsFormat weights_format)
   {
     fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
                             static_cast<int8_t>(weights_format), 0);
@@ -3368,7 +4040,6 @@ struct FullyConnectedOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
   flatbuffers::Offset<FullyConnectedOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3377,11 +4048,13 @@ struct FullyConnectedOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
-    bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<FullyConnectedOptions>
+CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                            onert_tflite::ActivationFunctionType fused_activation_function =
+                              onert_tflite::ActivationFunctionType_NONE,
+                            onert_tflite::FullyConnectedOptionsWeightsFormat weights_format =
+                              onert_tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
+                            bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
 {
   FullyConnectedOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3393,7 +4066,8 @@ inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
 
 struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SoftmaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_BETA = 4
   };
@@ -3407,6 +4081,7 @@ struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SoftmaxOptionsBuilder
 {
+  typedef SoftmaxOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
@@ -3414,7 +4089,6 @@ struct SoftmaxOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
   flatbuffers::Offset<SoftmaxOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3433,15 +4107,17 @@ CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
 
 struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ConcatenationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_AXIS = 4,
     VT_FUSED_ACTIVATION_FUNCTION = 6
   };
   int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3452,10 +4128,11 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ConcatenationOptionsBuilder
 {
+  typedef ConcatenationOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3464,7 +4141,6 @@ struct ConcatenationOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
   flatbuffers::Offset<ConcatenationOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3473,9 +4149,10 @@ struct ConcatenationOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<ConcatenationOptions>
+CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+                           onert_tflite::ActivationFunctionType fused_activation_function =
+                             onert_tflite::ActivationFunctionType_NONE)
 {
   ConcatenationOptionsBuilder builder_(_fbb);
   builder_.add_axis(axis);
@@ -3485,35 +4162,45 @@ inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
 
 struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef AddOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
   }
 };
 
 struct AddOptionsBuilder
 {
+  typedef AddOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
   }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
   explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  AddOptionsBuilder &operator=(const AddOptionsBuilder &);
   flatbuffers::Offset<AddOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3524,22 +4211,27 @@ struct AddOptionsBuilder
 
 inline flatbuffers::Offset<AddOptions>
 CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE,
+                 bool pot_scale_int16 = true)
 {
   AddOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
 struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef MulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3550,9 +4242,10 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MulOptionsBuilder
 {
+  typedef MulOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3561,7 +4254,6 @@ struct MulOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  MulOptionsBuilder &operator=(const MulOptionsBuilder &);
   flatbuffers::Offset<MulOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3572,7 +4264,8 @@ struct MulOptionsBuilder
 
 inline flatbuffers::Offset<MulOptions>
 CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE)
 {
   MulOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -3581,13 +4274,15 @@ CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef L2NormOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3598,9 +4293,10 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct L2NormOptionsBuilder
 {
+  typedef L2NormOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3609,7 +4305,6 @@ struct L2NormOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
   flatbuffers::Offset<L2NormOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3620,7 +4315,8 @@ struct L2NormOptionsBuilder
 
 inline flatbuffers::Offset<L2NormOptions>
 CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                    onert_tflite::ActivationFunctionType fused_activation_function =
+                      onert_tflite::ActivationFunctionType_NONE)
 {
   L2NormOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -3629,7 +4325,8 @@ CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef LocalResponseNormalizationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_RADIUS = 4,
     VT_BIAS = 6,
@@ -3650,6 +4347,7 @@ struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatb
 
 struct LocalResponseNormalizationOptionsBuilder
 {
+  typedef LocalResponseNormalizationOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_radius(int32_t radius)
@@ -3669,12 +4367,10 @@ struct LocalResponseNormalizationOptionsBuilder
     fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
   }
   explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LocalResponseNormalizationOptionsBuilder &
-  operator=(const LocalResponseNormalizationOptionsBuilder &);
   flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3697,7 +4393,8 @@ CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, in
 
 struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef LSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_CELL_CLIP = 6,
@@ -3705,15 +4402,16 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_KERNEL_TYPE = 10,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
-  LSTMKernelType kernel_type() const
+  onert_tflite::LSTMKernelType kernel_type() const
   {
-    return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
+    return static_cast<onert_tflite::LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3732,9 +4430,10 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LSTMOptionsBuilder
 {
+  typedef LSTMOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3747,7 +4446,7 @@ struct LSTMOptionsBuilder
   {
     fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
   }
-  void add_kernel_type(LSTMKernelType kernel_type)
+  void add_kernel_type(onert_tflite::LSTMKernelType kernel_type)
   {
     fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
   }
@@ -3760,7 +4459,6 @@ struct LSTMOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
   flatbuffers::Offset<LSTMOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3771,9 +4469,10 @@ struct LSTMOptionsBuilder
 
 inline flatbuffers::Offset<LSTMOptions>
 CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+                  onert_tflite::ActivationFunctionType fused_activation_function =
+                    onert_tflite::ActivationFunctionType_NONE,
                   float cell_clip = 0.0f, float proj_clip = 0.0f,
-                  LSTMKernelType kernel_type = LSTMKernelType_FULL,
+                  onert_tflite::LSTMKernelType kernel_type = onert_tflite::LSTMKernelType_FULL,
                   bool asymmetric_quantize_inputs = false)
 {
   LSTMOptionsBuilder builder_(_fbb);
@@ -3787,7 +4486,8 @@ CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef UnidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_CELL_CLIP = 6,
@@ -3795,9 +4495,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
     VT_TIME_MAJOR = 10,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -3819,9 +4520,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
 
 struct UnidirectionalSequenceLSTMOptionsBuilder
 {
+  typedef UnidirectionalSequenceLSTMOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3845,12 +4547,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
                              static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
   explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  UnidirectionalSequenceLSTMOptionsBuilder &
-  operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
   flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3861,10 +4561,11 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
 
 inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
 CreateUnidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+  bool asymmetric_quantize_inputs = false)
 {
   UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -3877,7 +4578,8 @@ CreateUnidirectionalSequenceLSTMOptions(
 
 struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_CELL_CLIP = 6,
@@ -3886,9 +4588,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
     VT_TIME_MAJOR = 12,
     VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -3912,9 +4615,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
 
 struct BidirectionalSequenceLSTMOptionsBuilder
 {
+  typedef BidirectionalSequenceLSTMOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -3943,12 +4647,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder
                              static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
   explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  BidirectionalSequenceLSTMOptionsBuilder &
-  operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
   flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -3958,10 +4660,11 @@ struct BidirectionalSequenceLSTMOptionsBuilder
 };
 
 inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
-    bool time_major = true, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+  bool time_major = true, bool asymmetric_quantize_inputs = false)
 {
   BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -3975,7 +4678,8 @@ inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectional
 
 struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ResizeBilinearOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_ALIGN_CORNERS = 8,
     VT_HALF_PIXEL_CENTERS = 10
@@ -3991,6 +4695,7 @@ struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct ResizeBilinearOptionsBuilder
 {
+  typedef ResizeBilinearOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_align_corners(bool align_corners)
@@ -4007,7 +4712,6 @@ struct ResizeBilinearOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
   flatbuffers::Offset<ResizeBilinearOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4028,20 +4732,24 @@ CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_cor
 
 struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ResizeNearestNeighborOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_ALIGN_CORNERS = 4
+    VT_ALIGN_CORNERS = 4,
+    VT_HALF_PIXEL_CENTERS = 6
   };
   bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+  bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
-           verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
   }
 };
 
 struct ResizeNearestNeighborOptionsBuilder
 {
+  typedef ResizeNearestNeighborOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_align_corners(bool align_corners)
@@ -4049,11 +4757,15 @@ struct ResizeNearestNeighborOptionsBuilder
     fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
                              static_cast<uint8_t>(align_corners), 0);
   }
+  void add_half_pixel_centers(bool half_pixel_centers)
+  {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS,
+                             static_cast<uint8_t>(half_pixel_centers), 0);
+  }
   explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
   flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4063,16 +4775,19 @@ struct ResizeNearestNeighborOptionsBuilder
 };
 
 inline flatbuffers::Offset<ResizeNearestNeighborOptions>
-CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+                                   bool half_pixel_centers = false)
 {
   ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
   builder_.add_align_corners(align_corners);
   return builder_.Finish();
 }
 
 struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef CallOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_SUBGRAPH = 4
   };
@@ -4086,6 +4801,7 @@ struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct CallOptionsBuilder
 {
+  typedef CallOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_subgraph(uint32_t subgraph)
@@ -4096,7 +4812,6 @@ struct CallOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  CallOptionsBuilder &operator=(const CallOptionsBuilder &);
   flatbuffers::Offset<CallOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4115,6 +4830,7 @@ inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBuffe
 
 struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef PadOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4123,13 +4839,13 @@ struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct PadOptionsBuilder
 {
+  typedef PadOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  PadOptionsBuilder &operator=(const PadOptionsBuilder &);
   flatbuffers::Offset<PadOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4146,6 +4862,7 @@ inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferB
 
 struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef PadV2OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4154,13 +4871,13 @@ struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct PadV2OptionsBuilder
 {
+  typedef PadV2Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
   flatbuffers::Offset<PadV2Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4177,7 +4894,8 @@ inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBuf
 
 struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ReshapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NEW_SHAPE = 4
   };
@@ -4194,6 +4912,7 @@ struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ReshapeOptionsBuilder
 {
+  typedef ReshapeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
@@ -4204,7 +4923,6 @@ struct ReshapeOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
   flatbuffers::Offset<ReshapeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4226,12 +4944,13 @@ inline flatbuffers::Offset<ReshapeOptions>
 CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
                            const std::vector<int32_t> *new_shape = nullptr)
 {
-  return onert_tflite::CreateReshapeOptions(_fbb,
-                                            new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
+  auto new_shape__ = new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0;
+  return onert_tflite::CreateReshapeOptions(_fbb, new_shape__);
 }
 
 struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SpaceToBatchNDOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4240,13 +4959,13 @@ struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct SpaceToBatchNDOptionsBuilder
 {
+  typedef SpaceToBatchNDOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
   flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4264,6 +4983,7 @@ CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef BatchToSpaceNDOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4272,13 +4992,13 @@ struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct BatchToSpaceNDOptionsBuilder
 {
+  typedef BatchToSpaceNDOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
   flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4296,7 +5016,8 @@ CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SkipGramOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NGRAM_SIZE = 4,
     VT_MAX_SKIP_SIZE = 6,
@@ -4315,6 +5036,7 @@ struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SkipGramOptionsBuilder
 {
+  typedef SkipGramOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_ngram_size(int32_t ngram_size)
@@ -4334,7 +5056,6 @@ struct SkipGramOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
   flatbuffers::Offset<SkipGramOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4356,7 +5077,8 @@ CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size =
 
 struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SpaceToDepthOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_BLOCK_SIZE = 4
   };
@@ -4370,6 +5092,7 @@ struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SpaceToDepthOptionsBuilder
 {
+  typedef SpaceToDepthOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_block_size(int32_t block_size)
@@ -4380,7 +5103,6 @@ struct SpaceToDepthOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
   flatbuffers::Offset<SpaceToDepthOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4399,7 +5121,8 @@ CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si
 
 struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef DepthToSpaceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_BLOCK_SIZE = 4
   };
@@ -4413,6 +5136,7 @@ struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct DepthToSpaceOptionsBuilder
 {
+  typedef DepthToSpaceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_block_size(int32_t block_size)
@@ -4423,7 +5147,6 @@ struct DepthToSpaceOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
   flatbuffers::Offset<DepthToSpaceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4442,35 +5165,45 @@ CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si
 
 struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SubOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
   }
 };
 
 struct SubOptionsBuilder
 {
+  typedef SubOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
   }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
   explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SubOptionsBuilder &operator=(const SubOptionsBuilder &);
   flatbuffers::Offset<SubOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4481,22 +5214,27 @@ struct SubOptionsBuilder
 
 inline flatbuffers::Offset<SubOptions>
 CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE,
+                 bool pot_scale_int16 = true)
 {
   SubOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
 struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef DivOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_FUSED_ACTIVATION_FUNCTION = 4
   };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
   {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -4507,9 +5245,10 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct DivOptionsBuilder
 {
+  typedef DivOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
   {
     fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
                             static_cast<int8_t>(fused_activation_function), 0);
@@ -4518,7 +5257,6 @@ struct DivOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  DivOptionsBuilder &operator=(const DivOptionsBuilder &);
   flatbuffers::Offset<DivOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4529,7 +5267,8 @@ struct DivOptionsBuilder
 
 inline flatbuffers::Offset<DivOptions>
 CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE)
 {
   DivOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -4538,6 +5277,7 @@ CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef TopKV2OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4546,13 +5286,13 @@ struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct TopKV2OptionsBuilder
 {
+  typedef TopKV2Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
   flatbuffers::Offset<TopKV2Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4569,13 +5309,14 @@ inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatB
 
 struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef EmbeddingLookupSparseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_COMBINER = 4
   };
-  CombinerType combiner() const
+  onert_tflite::CombinerType combiner() const
   {
-    return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+    return static_cast<onert_tflite::CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -4586,9 +5327,10 @@ struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffer
 
 struct EmbeddingLookupSparseOptionsBuilder
 {
+  typedef EmbeddingLookupSparseOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_combiner(CombinerType combiner)
+  void add_combiner(onert_tflite::CombinerType combiner)
   {
     fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
                             static_cast<int8_t>(combiner), 0);
@@ -4597,7 +5339,6 @@ struct EmbeddingLookupSparseOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
   flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4606,9 +5347,9 @@ struct EmbeddingLookupSparseOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
-CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                                   CombinerType combiner = CombinerType_SUM)
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  onert_tflite::CombinerType combiner = onert_tflite::CombinerType_SUM)
 {
   EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
   builder_.add_combiner(combiner);
@@ -4617,28 +5358,35 @@ CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef GatherOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_AXIS = 4
+    VT_AXIS = 4,
+    VT_BATCH_DIMS = 6
   };
   int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
-           verifier.EndTable();
+           VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable();
   }
 };
 
 struct GatherOptionsBuilder
 {
+  typedef GatherOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+  void add_batch_dims(int32_t batch_dims)
+  {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
+  }
   explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
   flatbuffers::Offset<GatherOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4647,16 +5395,18 @@ struct GatherOptionsBuilder
   }
 };
 
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                                                              int32_t axis = 0)
+inline flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0)
 {
   GatherOptionsBuilder builder_(_fbb);
+  builder_.add_batch_dims(batch_dims);
   builder_.add_axis(axis);
   return builder_.Finish();
 }
 
 struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef TransposeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4665,13 +5415,13 @@ struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct TransposeOptionsBuilder
 {
+  typedef TransposeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
   flatbuffers::Offset<TransposeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4689,6 +5439,7 @@ CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ExpOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4697,13 +5448,13 @@ struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ExpOptionsBuilder
 {
+  typedef ExpOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
   flatbuffers::Offset<ExpOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4720,6 +5471,7 @@ inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferB
 
 struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef CosOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4728,13 +5480,13 @@ struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct CosOptionsBuilder
 {
+  typedef CosOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  CosOptionsBuilder &operator=(const CosOptionsBuilder &);
   flatbuffers::Offset<CosOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4751,7 +5503,8 @@ inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferB
 
 struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ReducerOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_KEEP_DIMS = 4
   };
@@ -4765,6 +5518,7 @@ struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ReducerOptionsBuilder
 {
+  typedef ReducerOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_keep_dims(bool keep_dims)
@@ -4775,7 +5529,6 @@ struct ReducerOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
   flatbuffers::Offset<ReducerOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4794,7 +5547,8 @@ CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = fals
 
 struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SqueezeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_SQUEEZE_DIMS = 4
   };
@@ -4811,6 +5565,7 @@ struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SqueezeOptionsBuilder
 {
+  typedef SqueezeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
@@ -4821,7 +5576,6 @@ struct SqueezeOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
   flatbuffers::Offset<SqueezeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4843,13 +5597,14 @@ inline flatbuffers::Offset<SqueezeOptions>
 CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
                            const std::vector<int32_t> *squeeze_dims = nullptr)
 {
-  return onert_tflite::CreateSqueezeOptions(
-      _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+  auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0;
+  return onert_tflite::CreateSqueezeOptions(_fbb, squeeze_dims__);
 }
 
 struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SplitOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NUM_SPLITS = 4
   };
@@ -4863,6 +5618,7 @@ struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SplitOptionsBuilder
 {
+  typedef SplitOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num_splits(int32_t num_splits)
@@ -4873,7 +5629,6 @@ struct SplitOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
   flatbuffers::Offset<SplitOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4892,7 +5647,8 @@ inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBuf
 
 struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SplitVOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NUM_SPLITS = 4
   };
@@ -4906,6 +5662,7 @@ struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SplitVOptionsBuilder
 {
+  typedef SplitVOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num_splits(int32_t num_splits)
@@ -4916,7 +5673,6 @@ struct SplitVOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
   flatbuffers::Offset<SplitVOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -4935,7 +5691,8 @@ inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatB
 
 struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef StridedSliceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_BEGIN_MASK = 4,
     VT_END_MASK = 6,
@@ -4960,6 +5717,7 @@ struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct StridedSliceOptionsBuilder
 {
+  typedef StridedSliceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_begin_mask(int32_t begin_mask)
@@ -4986,7 +5744,6 @@ struct StridedSliceOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
   flatbuffers::Offset<StridedSliceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5011,6 +5768,7 @@ CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_ma
 
 struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LogSoftmaxOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5019,13 +5777,13 @@ struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LogSoftmaxOptionsBuilder
 {
+  typedef LogSoftmaxOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
   flatbuffers::Offset<LogSoftmaxOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5043,18 +5801,19 @@ CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef CastOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_IN_DATA_TYPE = 4,
     VT_OUT_DATA_TYPE = 6
   };
-  TensorType in_data_type() const
+  onert_tflite::TensorType in_data_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
   }
-  TensorType out_data_type() const
+  onert_tflite::TensorType out_data_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -5065,13 +5824,14 @@ struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct CastOptionsBuilder
 {
+  typedef CastOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_in_data_type(TensorType in_data_type)
+  void add_in_data_type(onert_tflite::TensorType in_data_type)
   {
     fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
   }
-  void add_out_data_type(TensorType out_data_type)
+  void add_out_data_type(onert_tflite::TensorType out_data_type)
   {
     fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
   }
@@ -5079,7 +5839,6 @@ struct CastOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  CastOptionsBuilder &operator=(const CastOptionsBuilder &);
   flatbuffers::Offset<CastOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5090,8 +5849,8 @@ struct CastOptionsBuilder
 
 inline flatbuffers::Offset<CastOptions>
 CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                  TensorType in_data_type = TensorType_FLOAT32,
-                  TensorType out_data_type = TensorType_FLOAT32)
+                  onert_tflite::TensorType in_data_type = onert_tflite::TensorType_FLOAT32,
+                  onert_tflite::TensorType out_data_type = onert_tflite::TensorType_FLOAT32)
 {
   CastOptionsBuilder builder_(_fbb);
   builder_.add_out_data_type(out_data_type);
@@ -5101,6 +5860,7 @@ CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef DequantizeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5109,13 +5869,13 @@ struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct DequantizeOptionsBuilder
 {
+  typedef DequantizeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
   flatbuffers::Offset<DequantizeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5133,6 +5893,7 @@ CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef MaximumMinimumOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5141,13 +5902,13 @@ struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
 
 struct MaximumMinimumOptionsBuilder
 {
+  typedef MaximumMinimumOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
   flatbuffers::Offset<MaximumMinimumOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5165,6 +5926,7 @@ CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef TileOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5173,13 +5935,13 @@ struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct TileOptionsBuilder
 {
+  typedef TileOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  TileOptionsBuilder &operator=(const TileOptionsBuilder &);
   flatbuffers::Offset<TileOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5196,13 +5958,14 @@ inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBuffe
 
 struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ArgMaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_OUTPUT_TYPE = 4
   };
-  TensorType output_type() const
+  onert_tflite::TensorType output_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -5213,9 +5976,10 @@ struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ArgMaxOptionsBuilder
 {
+  typedef ArgMaxOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_output_type(TensorType output_type)
+  void add_output_type(onert_tflite::TensorType output_type)
   {
     fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
   }
@@ -5223,7 +5987,6 @@ struct ArgMaxOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
   flatbuffers::Offset<ArgMaxOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5234,7 +5997,7 @@ struct ArgMaxOptionsBuilder
 
 inline flatbuffers::Offset<ArgMaxOptions>
 CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType output_type = TensorType_FLOAT32)
+                    onert_tflite::TensorType output_type = onert_tflite::TensorType_FLOAT32)
 {
   ArgMaxOptionsBuilder builder_(_fbb);
   builder_.add_output_type(output_type);
@@ -5243,13 +6006,14 @@ CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ArgMinOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_OUTPUT_TYPE = 4
   };
-  TensorType output_type() const
+  onert_tflite::TensorType output_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -5260,9 +6024,10 @@ struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ArgMinOptionsBuilder
 {
+  typedef ArgMinOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_output_type(TensorType output_type)
+  void add_output_type(onert_tflite::TensorType output_type)
   {
     fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
   }
@@ -5270,7 +6035,6 @@ struct ArgMinOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
   flatbuffers::Offset<ArgMinOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5281,7 +6045,7 @@ struct ArgMinOptionsBuilder
 
 inline flatbuffers::Offset<ArgMinOptions>
 CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType output_type = TensorType_FLOAT32)
+                    onert_tflite::TensorType output_type = onert_tflite::TensorType_FLOAT32)
 {
   ArgMinOptionsBuilder builder_(_fbb);
   builder_.add_output_type(output_type);
@@ -5290,6 +6054,7 @@ CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef GreaterOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5298,13 +6063,13 @@ struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct GreaterOptionsBuilder
 {
+  typedef GreaterOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
   flatbuffers::Offset<GreaterOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5322,6 +6087,7 @@ CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef GreaterEqualOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5330,13 +6096,13 @@ struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct GreaterEqualOptionsBuilder
 {
+  typedef GreaterEqualOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
   flatbuffers::Offset<GreaterEqualOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5354,6 +6120,7 @@ CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LessOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5362,13 +6129,13 @@ struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LessOptionsBuilder
 {
+  typedef LessOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LessOptionsBuilder &operator=(const LessOptionsBuilder &);
   flatbuffers::Offset<LessOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5385,6 +6152,7 @@ inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBuffe
 
 struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LessEqualOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5393,13 +6161,13 @@ struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LessEqualOptionsBuilder
 {
+  typedef LessEqualOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
   flatbuffers::Offset<LessEqualOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5417,6 +6185,7 @@ CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef NegOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5425,13 +6194,13 @@ struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct NegOptionsBuilder
 {
+  typedef NegOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  NegOptionsBuilder &operator=(const NegOptionsBuilder &);
   flatbuffers::Offset<NegOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5448,6 +6217,7 @@ inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferB
 
 struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SelectOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5456,13 +6226,13 @@ struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SelectOptionsBuilder
 {
+  typedef SelectOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
   flatbuffers::Offset<SelectOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5479,6 +6249,7 @@ inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatB
 
 struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SliceOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5487,13 +6258,13 @@ struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SliceOptionsBuilder
 {
+  typedef SliceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
   flatbuffers::Offset<SliceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5510,13 +6281,17 @@ inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBuf
 
 struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef TransposeConvOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_PADDING = 4,
     VT_STRIDE_W = 6,
     VT_STRIDE_H = 8
   };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
   int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
   int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const
@@ -5529,9 +6304,10 @@ struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct TransposeConvOptionsBuilder
 {
+  typedef TransposeConvOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(onert_tflite::Padding padding)
   {
     fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
   }
@@ -5547,7 +6323,6 @@ struct TransposeConvOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
   flatbuffers::Offset<TransposeConvOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5557,7 +6332,8 @@ struct TransposeConvOptionsBuilder
 };
 
 inline flatbuffers::Offset<TransposeConvOptions>
-CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                           onert_tflite::Padding padding = onert_tflite::Padding_SAME,
                            int32_t stride_w = 0, int32_t stride_h = 0)
 {
   TransposeConvOptionsBuilder builder_(_fbb);
@@ -5569,6 +6345,7 @@ CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding
 
 struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ExpandDimsOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5577,13 +6354,13 @@ struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ExpandDimsOptionsBuilder
 {
+  typedef ExpandDimsOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
   flatbuffers::Offset<ExpandDimsOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5601,7 +6378,8 @@ CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SparseToDenseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALIDATE_INDICES = 4
   };
@@ -5615,6 +6393,7 @@ struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SparseToDenseOptionsBuilder
 {
+  typedef SparseToDenseOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_validate_indices(bool validate_indices)
@@ -5626,7 +6405,6 @@ struct SparseToDenseOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
   flatbuffers::Offset<SparseToDenseOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5645,6 +6423,7 @@ CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_i
 
 struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef EqualOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5653,13 +6432,13 @@ struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct EqualOptionsBuilder
 {
+  typedef EqualOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
   flatbuffers::Offset<EqualOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5676,6 +6455,7 @@ inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBuf
 
 struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef NotEqualOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5684,13 +6464,13 @@ struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct NotEqualOptionsBuilder
 {
+  typedef NotEqualOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
   flatbuffers::Offset<NotEqualOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5708,11 +6488,15 @@ CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ShapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_OUT_TYPE = 4
   };
-  TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); }
+  onert_tflite::TensorType out_type() const
+  {
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0));
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
@@ -5722,9 +6506,10 @@ struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ShapeOptionsBuilder
 {
+  typedef ShapeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_out_type(TensorType out_type)
+  void add_out_type(onert_tflite::TensorType out_type)
   {
     fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
   }
@@ -5732,7 +6517,6 @@ struct ShapeOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
   flatbuffers::Offset<ShapeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5742,7 +6526,8 @@ struct ShapeOptionsBuilder
 };
 
 inline flatbuffers::Offset<ShapeOptions>
-CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32)
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                   onert_tflite::TensorType out_type = onert_tflite::TensorType_FLOAT32)
 {
   ShapeOptionsBuilder builder_(_fbb);
   builder_.add_out_type(out_type);
@@ -5751,6 +6536,7 @@ CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = T
 
 struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef RankOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5759,13 +6545,13 @@ struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct RankOptionsBuilder
 {
+  typedef RankOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  RankOptionsBuilder &operator=(const RankOptionsBuilder &);
   flatbuffers::Offset<RankOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5782,6 +6568,7 @@ inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBuffe
 
 struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef PowOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5790,13 +6577,13 @@ struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct PowOptionsBuilder
 {
+  typedef PowOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  PowOptionsBuilder &operator=(const PowOptionsBuilder &);
   flatbuffers::Offset<PowOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5813,7 +6600,8 @@ inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferB
 
 struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef FakeQuantOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_MIN = 4,
     VT_MAX = 6,
@@ -5834,6 +6622,7 @@ struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct FakeQuantOptionsBuilder
 {
+  typedef FakeQuantOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
@@ -5851,7 +6640,6 @@ struct FakeQuantOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
   flatbuffers::Offset<FakeQuantOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5874,7 +6662,8 @@ CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, f
 
 struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef PackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VALUES_COUNT = 4,
     VT_AXIS = 6
@@ -5890,6 +6679,7 @@ struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct PackOptionsBuilder
 {
+  typedef PackOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_values_count(int32_t values_count)
@@ -5901,7 +6691,6 @@ struct PackOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  PackOptionsBuilder &operator=(const PackOptionsBuilder &);
   flatbuffers::Offset<PackOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5921,6 +6710,7 @@ CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0
 
 struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LogicalOrOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5929,13 +6719,13 @@ struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LogicalOrOptionsBuilder
 {
+  typedef LogicalOrOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
   flatbuffers::Offset<LogicalOrOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5953,7 +6743,8 @@ CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef OneHotOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_AXIS = 4
   };
@@ -5967,6 +6758,7 @@ struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct OneHotOptionsBuilder
 {
+  typedef OneHotOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
@@ -5974,7 +6766,6 @@ struct OneHotOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
   flatbuffers::Offset<OneHotOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -5993,6 +6784,7 @@ inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatB
 
 struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef AbsOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6001,13 +6793,13 @@ struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct AbsOptionsBuilder
 {
+  typedef AbsOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
   flatbuffers::Offset<AbsOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6024,6 +6816,7 @@ inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferB
 
 struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef HardSwishOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6032,13 +6825,13 @@ struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct HardSwishOptionsBuilder
 {
+  typedef HardSwishOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
   flatbuffers::Offset<HardSwishOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6056,6 +6849,7 @@ CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LogicalAndOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6064,13 +6858,13 @@ struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LogicalAndOptionsBuilder
 {
+  typedef LogicalAndOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
   flatbuffers::Offset<LogicalAndOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6088,6 +6882,7 @@ CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef LogicalNotOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6096,13 +6891,13 @@ struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LogicalNotOptionsBuilder
 {
+  typedef LogicalNotOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
   flatbuffers::Offset<LogicalNotOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6120,7 +6915,8 @@ CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef UnpackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NUM = 4,
     VT_AXIS = 6
@@ -6136,6 +6932,7 @@ struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct UnpackOptionsBuilder
 {
+  typedef UnpackOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
@@ -6144,7 +6941,6 @@ struct UnpackOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
   flatbuffers::Offset<UnpackOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6164,6 +6960,7 @@ inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatB
 
 struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef FloorDivOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6172,13 +6969,13 @@ struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct FloorDivOptionsBuilder
 {
+  typedef FloorDivOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
   flatbuffers::Offset<FloorDivOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6196,6 +6993,7 @@ CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SquareOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6204,13 +7002,13 @@ struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SquareOptionsBuilder
 {
+  typedef SquareOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
   flatbuffers::Offset<SquareOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6227,6 +7025,7 @@ inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatB
 
 struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ZerosLikeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6235,13 +7034,13 @@ struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ZerosLikeOptionsBuilder
 {
+  typedef ZerosLikeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
   flatbuffers::Offset<ZerosLikeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6259,6 +7058,7 @@ CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef FillOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6267,13 +7067,13 @@ struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct FillOptionsBuilder
 {
+  typedef FillOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  FillOptionsBuilder &operator=(const FillOptionsBuilder &);
   flatbuffers::Offset<FillOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6290,6 +7090,7 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBuffe
 
 struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef FloorModOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6298,13 +7099,13 @@ struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct FloorModOptionsBuilder
 {
+  typedef FloorModOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
   flatbuffers::Offset<FloorModOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6322,6 +7123,7 @@ CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef RangeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6330,13 +7132,13 @@ struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct RangeOptionsBuilder
 {
+  typedef RangeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
   flatbuffers::Offset<RangeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6353,7 +7155,8 @@ inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBuf
 
 struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef LeakyReluOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_ALPHA = 4
   };
@@ -6367,6 +7170,7 @@ struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct LeakyReluOptionsBuilder
 {
+  typedef LeakyReluOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
@@ -6374,7 +7178,6 @@ struct LeakyReluOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
   flatbuffers::Offset<LeakyReluOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6393,6 +7196,7 @@ CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
 
 struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SquaredDifferenceOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6401,13 +7205,13 @@ struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T
 
 struct SquaredDifferenceOptionsBuilder
 {
+  typedef SquaredDifferenceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
   flatbuffers::Offset<SquaredDifferenceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6425,11 +7229,15 @@ CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef MirrorPadOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_MODE = 4
   };
-  MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); }
+  onert_tflite::MirrorPadMode mode() const
+  {
+    return static_cast<onert_tflite::MirrorPadMode>(GetField<int8_t>(VT_MODE, 0));
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
@@ -6439,9 +7247,10 @@ struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MirrorPadOptionsBuilder
 {
+  typedef MirrorPadOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_mode(MirrorPadMode mode)
+  void add_mode(onert_tflite::MirrorPadMode mode)
   {
     fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
   }
@@ -6449,7 +7258,6 @@ struct MirrorPadOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
   flatbuffers::Offset<MirrorPadOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6460,7 +7268,7 @@ struct MirrorPadOptionsBuilder
 
 inline flatbuffers::Offset<MirrorPadOptions>
 CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                       MirrorPadMode mode = MirrorPadMode_REFLECT)
+                       onert_tflite::MirrorPadMode mode = onert_tflite::MirrorPadMode_REFLECT)
 {
   MirrorPadOptionsBuilder builder_(_fbb);
   builder_.add_mode(mode);
@@ -6469,13 +7277,14 @@ CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef UniqueOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_IDX_OUT_TYPE = 4
   };
-  TensorType idx_out_type() const
+  onert_tflite::TensorType idx_out_type() const
   {
-    return static_cast<TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -6486,9 +7295,10 @@ struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct UniqueOptionsBuilder
 {
+  typedef UniqueOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_idx_out_type(TensorType idx_out_type)
+  void add_idx_out_type(onert_tflite::TensorType idx_out_type)
   {
     fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
   }
@@ -6496,7 +7306,6 @@ struct UniqueOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
   flatbuffers::Offset<UniqueOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6507,7 +7316,7 @@ struct UniqueOptionsBuilder
 
 inline flatbuffers::Offset<UniqueOptions>
 CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType idx_out_type = TensorType_INT32)
+                    onert_tflite::TensorType idx_out_type = onert_tflite::TensorType_INT32)
 {
   UniqueOptionsBuilder builder_(_fbb);
   builder_.add_idx_out_type(idx_out_type);
@@ -6516,6 +7325,7 @@ CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
 
 struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ReverseV2OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6524,13 +7334,13 @@ struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ReverseV2OptionsBuilder
 {
+  typedef ReverseV2Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
   flatbuffers::Offset<ReverseV2Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6548,6 +7358,7 @@ CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef AddNOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6556,13 +7367,13 @@ struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct AddNOptionsBuilder
 {
+  typedef AddNOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
   flatbuffers::Offset<AddNOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6579,6 +7390,7 @@ inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBuffe
 
 struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef GatherNdOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6587,13 +7399,13 @@ struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct GatherNdOptionsBuilder
 {
+  typedef GatherNdOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
   flatbuffers::Offset<GatherNdOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6611,6 +7423,7 @@ CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef WhereOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6619,13 +7432,13 @@ struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct WhereOptionsBuilder
 {
+  typedef WhereOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
   flatbuffers::Offset<WhereOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6642,7 +7455,8 @@ inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBuf
 
 struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ReverseSequenceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_SEQ_DIM = 4,
     VT_BATCH_DIM = 6
@@ -6658,6 +7472,7 @@ struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
 
 struct ReverseSequenceOptionsBuilder
 {
+  typedef ReverseSequenceOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_seq_dim(int32_t seq_dim)
@@ -6672,7 +7487,6 @@ struct ReverseSequenceOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
   flatbuffers::Offset<ReverseSequenceOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6693,6 +7507,7 @@ CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t seq_d
 
 struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef MatrixDiagOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6701,13 +7516,13 @@ struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MatrixDiagOptionsBuilder
 {
+  typedef MatrixDiagOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
   flatbuffers::Offset<MatrixDiagOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6725,6 +7540,7 @@ CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef QuantizeOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6733,13 +7549,13 @@ struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct QuantizeOptionsBuilder
 {
+  typedef QuantizeOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
   flatbuffers::Offset<QuantizeOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6757,6 +7573,7 @@ CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef MatrixSetDiagOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6765,13 +7582,13 @@ struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MatrixSetDiagOptionsBuilder
 {
+  typedef MatrixSetDiagOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
   flatbuffers::Offset<MatrixSetDiagOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6789,7 +7606,8 @@ CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef IfOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_THEN_SUBGRAPH_INDEX = 4,
     VT_ELSE_SUBGRAPH_INDEX = 6
@@ -6805,6 +7623,7 @@ struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct IfOptionsBuilder
 {
+  typedef IfOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_then_subgraph_index(int32_t then_subgraph_index)
@@ -6819,7 +7638,6 @@ struct IfOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  IfOptionsBuilder &operator=(const IfOptionsBuilder &);
   flatbuffers::Offset<IfOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6838,9 +7656,54 @@ inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBui
   return builder_.Finish();
 }
 
+struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CallOnceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INIT_SUBGRAPH_INDEX = 4
+  };
+  int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) &&
+           verifier.EndTable();
+  }
+};
+
+struct CallOnceOptionsBuilder
+{
+  typedef CallOnceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_init_subgraph_index(int32_t init_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
+  }
+  explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CallOnceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOnceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0)
+{
+  CallOnceOptionsBuilder builder_(_fbb);
+  builder_.add_init_subgraph_index(init_subgraph_index);
+  return builder_.Finish();
+}
+
 struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef WhileOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_COND_SUBGRAPH_INDEX = 4,
     VT_BODY_SUBGRAPH_INDEX = 6
@@ -6856,6 +7719,7 @@ struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct WhileOptionsBuilder
 {
+  typedef WhileOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_cond_subgraph_index(int32_t cond_subgraph_index)
@@ -6870,7 +7734,6 @@ struct WhileOptionsBuilder
   {
     start_ = fbb_.StartTable();
   }
-  WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
   flatbuffers::Offset<WhileOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6891,6 +7754,7 @@ inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBuf
 
 struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef NonMaxSuppressionV4OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6899,13 +7763,13 @@ struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers:
 
 struct NonMaxSuppressionV4OptionsBuilder
 {
+  typedef NonMaxSuppressionV4Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
   flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6923,6 +7787,7 @@ CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef NonMaxSuppressionV5OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6931,13 +7796,13 @@ struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers:
 
 struct NonMaxSuppressionV5OptionsBuilder
 {
+  typedef NonMaxSuppressionV5Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
   flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6955,6 +7820,7 @@ CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef ScatterNdOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6963,13 +7829,13 @@ struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct ScatterNdOptionsBuilder
 {
+  typedef ScatterNdOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
   flatbuffers::Offset<ScatterNdOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -6987,6 +7853,7 @@ CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SelectV2OptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6995,13 +7862,13 @@ struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SelectV2OptionsBuilder
 {
+  typedef SelectV2Options Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
   flatbuffers::Offset<SelectV2Options> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7019,6 +7886,7 @@ CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef DensifyOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7027,13 +7895,13 @@ struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct DensifyOptionsBuilder
 {
+  typedef DensifyOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
   flatbuffers::Offset<DensifyOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7051,6 +7919,7 @@ CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
+  typedef SegmentSumOptionsBuilder Builder;
   bool Verify(flatbuffers::Verifier &verifier) const
   {
     return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7059,13 +7928,13 @@ struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SegmentSumOptionsBuilder
 {
+  typedef SegmentSumOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
   flatbuffers::Offset<SegmentSumOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7083,39 +7952,49 @@ CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
 
 struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BatchMatMulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_ADJOINT_LHS = 4,
-    VT_ADJOINT_RHS = 6
+    VT_ADJ_X = 4,
+    VT_ADJ_Y = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
   };
-  bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
-  bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+  bool adj_x() const { return GetField<uint8_t>(VT_ADJ_X, 0) != 0; }
+  bool adj_y() const { return GetField<uint8_t>(VT_ADJ_Y, 0) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
-    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
-           VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable();
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJ_X) &&
+           VerifyField<uint8_t>(verifier, VT_ADJ_Y) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
   }
 };
 
 struct BatchMatMulOptionsBuilder
 {
+  typedef BatchMatMulOptions Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_adjoint_lhs(bool adjoint_lhs)
+  void add_adj_x(bool adj_x)
   {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_LHS, static_cast<uint8_t>(adjoint_lhs),
-                             0);
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_X, static_cast<uint8_t>(adj_x), 0);
   }
-  void add_adjoint_rhs(bool adjoint_rhs)
+  void add_adj_y(bool adj_y)
   {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
-                             0);
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_Y, static_cast<uint8_t>(adj_y), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
   explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
   flatbuffers::Offset<BatchMatMulOptions> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7125,47 +8004,781 @@ struct BatchMatMulOptionsBuilder
 };
 
 inline flatbuffers::Offset<BatchMatMulOptions>
-CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
-                         bool adjoint_rhs = false)
+CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adj_x = false,
+                         bool adj_y = false, bool asymmetric_quantize_inputs = false)
 {
   BatchMatMulOptionsBuilder builder_(_fbb);
-  builder_.add_adjoint_rhs(adjoint_rhs);
-  builder_.add_adjoint_lhs(adjoint_lhs);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_adj_y(adj_y);
+  builder_.add_adj_x(adj_x);
   return builder_.Finish();
 }
 
-struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef CumsumOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
-    VT_BUILTIN_CODE = 4,
-    VT_CUSTOM_CODE = 6,
-    VT_VERSION = 8
+    VT_EXCLUSIVE = 4,
+    VT_REVERSE = 6
   };
-  BuiltinOperator builtin_code() const
+  bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; }
+  bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
   {
-    return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0));
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) &&
+           VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable();
   }
+};
+
+struct CumsumOptionsBuilder
+{
+  typedef CumsumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_exclusive(bool exclusive)
+  {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
+  }
+  void add_reverse(bool reverse)
+  {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
+  }
+  explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CumsumOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CumsumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              bool exclusive = false,
+                                                              bool reverse = false)
+{
+  CumsumOptionsBuilder builder_(_fbb);
+  builder_.add_reverse(reverse);
+  builder_.add_exclusive(exclusive);
+  return builder_.Finish();
+}
+
+struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BroadcastToOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct BroadcastToOptionsBuilder
+{
+  typedef BroadcastToOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BroadcastToOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BroadcastToOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  BroadcastToOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Rfft2dOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct Rfft2dOptionsBuilder
+{
+  typedef Rfft2dOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Rfft2dOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Rfft2dOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  Rfft2dOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TABLE_ID = 4,
+    VT_KEY_DTYPE = 6,
+    VT_VALUE_DTYPE = 8
+  };
+  int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); }
+  onert_tflite::TensorType key_dtype() const
+  {
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
+  }
+  onert_tflite::TensorType value_dtype() const
+  {
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) &&
+           VerifyField<int8_t>(verifier, VT_KEY_DTYPE) &&
+           VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable();
+  }
+};
+
+struct HashtableOptionsBuilder
+{
+  typedef HashtableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_table_id(int32_t table_id)
+  {
+    fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
+  }
+  void add_key_dtype(onert_tflite::TensorType key_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
+  }
+  void add_value_dtype(onert_tflite::TensorType value_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
+  }
+  explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0,
+                       onert_tflite::TensorType key_dtype = onert_tflite::TensorType_FLOAT32,
+                       onert_tflite::TensorType value_dtype = onert_tflite::TensorType_FLOAT32)
+{
+  HashtableOptionsBuilder builder_(_fbb);
+  builder_.add_table_id(table_id);
+  builder_.add_value_dtype(value_dtype);
+  builder_.add_key_dtype(key_dtype);
+  return builder_.Finish();
+}
+
+struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableFindOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableFindOptionsBuilder
+{
+  typedef HashtableFindOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableFindOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableFindOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableFindOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableImportOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableImportOptionsBuilder
+{
+  typedef HashtableImportOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableImportOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableImportOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableImportOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableSizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableSizeOptionsBuilder
+{
+  typedef HashtableSizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableSizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableSizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef VarHandleOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_CONTAINER = 4,
+    VT_SHARED_NAME = 6
+  };
+  const flatbuffers::String *container() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_CONTAINER);
+  }
+  const flatbuffers::String *shared_name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) &&
+           verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) &&
+           verifier.VerifyString(shared_name()) && verifier.EndTable();
+  }
+};
+
+struct VarHandleOptionsBuilder
+{
+  typedef VarHandleOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_container(flatbuffers::Offset<flatbuffers::String> container)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container);
+  }
+  void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name);
+  }
+  explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<VarHandleOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<VarHandleOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                       flatbuffers::Offset<flatbuffers::String> container = 0,
+                       flatbuffers::Offset<flatbuffers::String> shared_name = 0)
+{
+  VarHandleOptionsBuilder builder_(_fbb);
+  builder_.add_shared_name(shared_name);
+  builder_.add_container(container);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr,
+                             const char *shared_name = nullptr)
+{
+  auto container__ = container ? _fbb.CreateString(container) : 0;
+  auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0;
+  return onert_tflite::CreateVarHandleOptions(_fbb, container__, shared_name__);
+}
+
+struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReadVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct ReadVariableOptionsBuilder
+{
+  typedef ReadVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReadVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReadVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ReadVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef AssignVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct AssignVariableOptionsBuilder
+{
+  typedef AssignVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AssignVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AssignVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  AssignVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RandomOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SEED = 4,
+    VT_SEED2 = 6
+  };
+  int64_t seed() const { return GetField<int64_t>(VT_SEED, 0); }
+  int64_t seed2() const { return GetField<int64_t>(VT_SEED2, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int64_t>(verifier, VT_SEED) &&
+           VerifyField<int64_t>(verifier, VT_SEED2) && verifier.EndTable();
+  }
+};
+
+struct RandomOptionsBuilder
+{
+  typedef RandomOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_seed(int64_t seed) { fbb_.AddElement<int64_t>(RandomOptions::VT_SEED, seed, 0); }
+  void add_seed2(int64_t seed2) { fbb_.AddElement<int64_t>(RandomOptions::VT_SEED2, seed2, 0); }
+  explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RandomOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RandomOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              int64_t seed = 0, int64_t seed2 = 0)
+{
+  RandomOptionsBuilder builder_(_fbb);
+  builder_.add_seed2(seed2);
+  builder_.add_seed(seed);
+  return builder_.Finish();
+}
+
+struct BucketizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BucketizeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_BOUNDARIES = 4
+  };
+  const flatbuffers::Vector<float> *boundaries() const
+  {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_BOUNDARIES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_BOUNDARIES) &&
+           verifier.VerifyVector(boundaries()) && verifier.EndTable();
+  }
+};
+
+struct BucketizeOptionsBuilder
+{
+  typedef BucketizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_boundaries(flatbuffers::Offset<flatbuffers::Vector<float>> boundaries)
+  {
+    fbb_.AddOffset(BucketizeOptions::VT_BOUNDARIES, boundaries);
+  }
+  explicit BucketizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BucketizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BucketizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BucketizeOptions>
+CreateBucketizeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                       flatbuffers::Offset<flatbuffers::Vector<float>> boundaries = 0)
+{
+  BucketizeOptionsBuilder builder_(_fbb);
+  builder_.add_boundaries(boundaries);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<BucketizeOptions>
+CreateBucketizeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                             const std::vector<float> *boundaries = nullptr)
+{
+  auto boundaries__ = boundaries ? _fbb.CreateVector<float>(*boundaries) : 0;
+  return onert_tflite::CreateBucketizeOptions(_fbb, boundaries__);
+}
+
+struct GeluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef GeluOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_APPROXIMATE = 4
+  };
+  bool approximate() const { return GetField<uint8_t>(VT_APPROXIMATE, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_APPROXIMATE) &&
+           verifier.EndTable();
+  }
+};
+
+struct GeluOptionsBuilder
+{
+  typedef GeluOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_approximate(bool approximate)
+  {
+    fbb_.AddElement<uint8_t>(GeluOptions::VT_APPROXIMATE, static_cast<uint8_t>(approximate), 0);
+  }
+  explicit GeluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GeluOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GeluOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GeluOptions> CreateGeluOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                          bool approximate = false)
+{
+  GeluOptionsBuilder builder_(_fbb);
+  builder_.add_approximate(approximate);
+  return builder_.Finish();
+}
+
+struct DynamicUpdateSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DynamicUpdateSliceOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct DynamicUpdateSliceOptionsBuilder
+{
+  typedef DynamicUpdateSliceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit DynamicUpdateSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DynamicUpdateSliceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DynamicUpdateSliceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DynamicUpdateSliceOptions>
+CreateDynamicUpdateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  DynamicUpdateSliceOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct UnsortedSegmentProdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnsortedSegmentProdOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct UnsortedSegmentProdOptionsBuilder
+{
+  typedef UnsortedSegmentProdOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentProdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentProdOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentProdOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentProdOptions>
+CreateUnsortedSegmentProdOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  UnsortedSegmentProdOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct UnsortedSegmentMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnsortedSegmentMaxOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct UnsortedSegmentMaxOptionsBuilder
+{
+  typedef UnsortedSegmentMaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentMaxOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentMaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentMaxOptions>
+CreateUnsortedSegmentMaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  UnsortedSegmentMaxOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct UnsortedSegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef UnsortedSegmentSumOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct UnsortedSegmentSumOptionsBuilder
+{
+  typedef UnsortedSegmentSumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentSumOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentSumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentSumOptions>
+CreateUnsortedSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  UnsortedSegmentSumOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct ATan2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ATan2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct ATan2OptionsBuilder
+{
+  typedef ATan2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ATan2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ATan2Options> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ATan2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ATan2Options> CreateATan2Options(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ATan2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef OperatorCodeBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_DEPRECATED_BUILTIN_CODE = 4,
+    VT_CUSTOM_CODE = 6,
+    VT_VERSION = 8,
+    VT_BUILTIN_CODE = 10
+  };
+  int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); }
   const flatbuffers::String *custom_code() const
   {
     return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
   }
   int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+  onert_tflite::BuiltinOperator builtin_code() const
+  {
+    return static_cast<onert_tflite::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
+  }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
-    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) &&
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) &&
            VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
-           VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
+           VerifyField<int32_t>(verifier, VT_VERSION) &&
+           VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable();
   }
 };
 
 struct OperatorCodeBuilder
 {
+  typedef OperatorCode Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_builtin_code(BuiltinOperator builtin_code)
+  void add_deprecated_builtin_code(int8_t deprecated_builtin_code)
   {
-    fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int8_t>(builtin_code), 0);
+    fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
   }
   void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
   {
@@ -7175,11 +8788,14 @@ struct OperatorCodeBuilder
   {
     fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
   }
+  void add_builtin_code(onert_tflite::BuiltinOperator builtin_code)
+  {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
+  }
   explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
   flatbuffers::Offset<OperatorCode> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -7189,29 +8805,32 @@ struct OperatorCodeBuilder
 };
 
 inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
-                   BuiltinOperator builtin_code = BuiltinOperator_ADD,
-                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1,
+                   onert_tflite::BuiltinOperator builtin_code = onert_tflite::BuiltinOperator_ADD)
 {
   OperatorCodeBuilder builder_(_fbb);
+  builder_.add_builtin_code(builtin_code);
   builder_.add_version(version);
   builder_.add_custom_code(custom_code);
-  builder_.add_builtin_code(builtin_code);
+  builder_.add_deprecated_builtin_code(deprecated_builtin_code);
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
-                         BuiltinOperator builtin_code = BuiltinOperator_ADD,
-                         const char *custom_code = nullptr, int32_t version = 1)
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCodeDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+  const char *custom_code = nullptr, int32_t version = 1,
+  onert_tflite::BuiltinOperator builtin_code = onert_tflite::BuiltinOperator_ADD)
 {
-  return onert_tflite::CreateOperatorCode(
-      _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
+  auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
+  return onert_tflite::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version,
+                                          builtin_code);
 }
 
 struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef OperatorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_OPCODE_INDEX = 4,
     VT_INPUTS = 6,
@@ -7232,628 +8851,761 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
   }
-  BuiltinOptions builtin_options_type() const
+  onert_tflite::BuiltinOptions builtin_options_type() const
   {
-    return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+    return static_cast<onert_tflite::BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
   }
   const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
   template <typename T> const T *builtin_options_as() const;
-  const Conv2DOptions *builtin_options_as_Conv2DOptions() const
+  const onert_tflite::Conv2DOptions *builtin_options_as_Conv2DOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_Conv2DOptions
-               ? static_cast<const Conv2DOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_Conv2DOptions
+             ? static_cast<const onert_tflite::Conv2DOptions *>(builtin_options())
+             : nullptr;
   }
-  const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
+  const onert_tflite::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
-               ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DepthwiseConv2DOptions
+             ? static_cast<const onert_tflite::DepthwiseConv2DOptions *>(builtin_options())
+             : nullptr;
   }
-  const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
+  const onert_tflite::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
-               ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ConcatEmbeddingsOptions
+             ? static_cast<const onert_tflite::ConcatEmbeddingsOptions *>(builtin_options())
+             : nullptr;
   }
-  const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
+  const onert_tflite::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
-               ? static_cast<const LSHProjectionOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LSHProjectionOptions
+             ? static_cast<const onert_tflite::LSHProjectionOptions *>(builtin_options())
+             : nullptr;
   }
-  const Pool2DOptions *builtin_options_as_Pool2DOptions() const
+  const onert_tflite::Pool2DOptions *builtin_options_as_Pool2DOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_Pool2DOptions
-               ? static_cast<const Pool2DOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_Pool2DOptions
+             ? static_cast<const onert_tflite::Pool2DOptions *>(builtin_options())
+             : nullptr;
   }
-  const SVDFOptions *builtin_options_as_SVDFOptions() const
+  const onert_tflite::SVDFOptions *builtin_options_as_SVDFOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SVDFOptions
-               ? static_cast<const SVDFOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SVDFOptions
+             ? static_cast<const onert_tflite::SVDFOptions *>(builtin_options())
+             : nullptr;
   }
-  const RNNOptions *builtin_options_as_RNNOptions() const
+  const onert_tflite::RNNOptions *builtin_options_as_RNNOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_RNNOptions
-               ? static_cast<const RNNOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_RNNOptions
+             ? static_cast<const onert_tflite::RNNOptions *>(builtin_options())
+             : nullptr;
   }
-  const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
+  const onert_tflite::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
-               ? static_cast<const FullyConnectedOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FullyConnectedOptions
+             ? static_cast<const onert_tflite::FullyConnectedOptions *>(builtin_options())
+             : nullptr;
   }
-  const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
+  const onert_tflite::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SoftmaxOptions
-               ? static_cast<const SoftmaxOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SoftmaxOptions
+             ? static_cast<const onert_tflite::SoftmaxOptions *>(builtin_options())
+             : nullptr;
   }
-  const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
+  const onert_tflite::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ConcatenationOptions
-               ? static_cast<const ConcatenationOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ConcatenationOptions
+             ? static_cast<const onert_tflite::ConcatenationOptions *>(builtin_options())
+             : nullptr;
   }
-  const AddOptions *builtin_options_as_AddOptions() const
+  const onert_tflite::AddOptions *builtin_options_as_AddOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_AddOptions
-               ? static_cast<const AddOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_AddOptions
+             ? static_cast<const onert_tflite::AddOptions *>(builtin_options())
+             : nullptr;
   }
-  const L2NormOptions *builtin_options_as_L2NormOptions() const
+  const onert_tflite::L2NormOptions *builtin_options_as_L2NormOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_L2NormOptions
-               ? static_cast<const L2NormOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_L2NormOptions
+             ? static_cast<const onert_tflite::L2NormOptions *>(builtin_options())
+             : nullptr;
   }
-  const LocalResponseNormalizationOptions *
+  const onert_tflite::LocalResponseNormalizationOptions *
   builtin_options_as_LocalResponseNormalizationOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
-               ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LocalResponseNormalizationOptions
+             ? static_cast<const onert_tflite::LocalResponseNormalizationOptions *>(
+                 builtin_options())
+             : nullptr;
   }
-  const LSTMOptions *builtin_options_as_LSTMOptions() const
+  const onert_tflite::LSTMOptions *builtin_options_as_LSTMOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LSTMOptions
-               ? static_cast<const LSTMOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LSTMOptions
+             ? static_cast<const onert_tflite::LSTMOptions *>(builtin_options())
+             : nullptr;
   }
-  const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
+  const onert_tflite::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
-               ? static_cast<const ResizeBilinearOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ResizeBilinearOptions
+             ? static_cast<const onert_tflite::ResizeBilinearOptions *>(builtin_options())
+             : nullptr;
   }
-  const CallOptions *builtin_options_as_CallOptions() const
+  const onert_tflite::CallOptions *builtin_options_as_CallOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_CallOptions
-               ? static_cast<const CallOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CallOptions
+             ? static_cast<const onert_tflite::CallOptions *>(builtin_options())
+             : nullptr;
   }
-  const ReshapeOptions *builtin_options_as_ReshapeOptions() const
+  const onert_tflite::ReshapeOptions *builtin_options_as_ReshapeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ReshapeOptions
-               ? static_cast<const ReshapeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReshapeOptions
+             ? static_cast<const onert_tflite::ReshapeOptions *>(builtin_options())
+             : nullptr;
   }
-  const SkipGramOptions *builtin_options_as_SkipGramOptions() const
+  const onert_tflite::SkipGramOptions *builtin_options_as_SkipGramOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SkipGramOptions
-               ? static_cast<const SkipGramOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SkipGramOptions
+             ? static_cast<const onert_tflite::SkipGramOptions *>(builtin_options())
+             : nullptr;
   }
-  const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
+  const onert_tflite::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
-               ? static_cast<const SpaceToDepthOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SpaceToDepthOptions
+             ? static_cast<const onert_tflite::SpaceToDepthOptions *>(builtin_options())
+             : nullptr;
   }
-  const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
+  const onert_tflite::EmbeddingLookupSparseOptions *
+  builtin_options_as_EmbeddingLookupSparseOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
-               ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_EmbeddingLookupSparseOptions
+             ? static_cast<const onert_tflite::EmbeddingLookupSparseOptions *>(builtin_options())
+             : nullptr;
   }
-  const MulOptions *builtin_options_as_MulOptions() const
+  const onert_tflite::MulOptions *builtin_options_as_MulOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MulOptions
-               ? static_cast<const MulOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MulOptions
+             ? static_cast<const onert_tflite::MulOptions *>(builtin_options())
+             : nullptr;
   }
-  const PadOptions *builtin_options_as_PadOptions() const
+  const onert_tflite::PadOptions *builtin_options_as_PadOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_PadOptions
-               ? static_cast<const PadOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_PadOptions
+             ? static_cast<const onert_tflite::PadOptions *>(builtin_options())
+             : nullptr;
   }
-  const GatherOptions *builtin_options_as_GatherOptions() const
+  const onert_tflite::GatherOptions *builtin_options_as_GatherOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_GatherOptions
-               ? static_cast<const GatherOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GatherOptions
+             ? static_cast<const onert_tflite::GatherOptions *>(builtin_options())
+             : nullptr;
   }
-  const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
+  const onert_tflite::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
-               ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BatchToSpaceNDOptions
+             ? static_cast<const onert_tflite::BatchToSpaceNDOptions *>(builtin_options())
+             : nullptr;
   }
-  const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
+  const onert_tflite::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
-               ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SpaceToBatchNDOptions
+             ? static_cast<const onert_tflite::SpaceToBatchNDOptions *>(builtin_options())
+             : nullptr;
   }
-  const TransposeOptions *builtin_options_as_TransposeOptions() const
+  const onert_tflite::TransposeOptions *builtin_options_as_TransposeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_TransposeOptions
-               ? static_cast<const TransposeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_TransposeOptions
+             ? static_cast<const onert_tflite::TransposeOptions *>(builtin_options())
+             : nullptr;
   }
-  const ReducerOptions *builtin_options_as_ReducerOptions() const
+  const onert_tflite::ReducerOptions *builtin_options_as_ReducerOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ReducerOptions
-               ? static_cast<const ReducerOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReducerOptions
+             ? static_cast<const onert_tflite::ReducerOptions *>(builtin_options())
+             : nullptr;
   }
-  const SubOptions *builtin_options_as_SubOptions() const
+  const onert_tflite::SubOptions *builtin_options_as_SubOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SubOptions
-               ? static_cast<const SubOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SubOptions
+             ? static_cast<const onert_tflite::SubOptions *>(builtin_options())
+             : nullptr;
   }
-  const DivOptions *builtin_options_as_DivOptions() const
+  const onert_tflite::DivOptions *builtin_options_as_DivOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DivOptions
-               ? static_cast<const DivOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DivOptions
+             ? static_cast<const onert_tflite::DivOptions *>(builtin_options())
+             : nullptr;
   }
-  const SqueezeOptions *builtin_options_as_SqueezeOptions() const
+  const onert_tflite::SqueezeOptions *builtin_options_as_SqueezeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SqueezeOptions
-               ? static_cast<const SqueezeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SqueezeOptions
+             ? static_cast<const onert_tflite::SqueezeOptions *>(builtin_options())
+             : nullptr;
   }
-  const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
+  const onert_tflite::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
-               ? static_cast<const SequenceRNNOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SequenceRNNOptions
+             ? static_cast<const onert_tflite::SequenceRNNOptions *>(builtin_options())
+             : nullptr;
   }
-  const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
+  const onert_tflite::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_StridedSliceOptions
-               ? static_cast<const StridedSliceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_StridedSliceOptions
+             ? static_cast<const onert_tflite::StridedSliceOptions *>(builtin_options())
+             : nullptr;
   }
-  const ExpOptions *builtin_options_as_ExpOptions() const
+  const onert_tflite::ExpOptions *builtin_options_as_ExpOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ExpOptions
-               ? static_cast<const ExpOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ExpOptions
+             ? static_cast<const onert_tflite::ExpOptions *>(builtin_options())
+             : nullptr;
   }
-  const TopKV2Options *builtin_options_as_TopKV2Options() const
+  const onert_tflite::TopKV2Options *builtin_options_as_TopKV2Options() const
   {
-    return builtin_options_type() == BuiltinOptions_TopKV2Options
-               ? static_cast<const TopKV2Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_TopKV2Options
+             ? static_cast<const onert_tflite::TopKV2Options *>(builtin_options())
+             : nullptr;
   }
-  const SplitOptions *builtin_options_as_SplitOptions() const
+  const onert_tflite::SplitOptions *builtin_options_as_SplitOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SplitOptions
-               ? static_cast<const SplitOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SplitOptions
+             ? static_cast<const onert_tflite::SplitOptions *>(builtin_options())
+             : nullptr;
   }
-  const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
+  const onert_tflite::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
-               ? static_cast<const LogSoftmaxOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LogSoftmaxOptions
+             ? static_cast<const onert_tflite::LogSoftmaxOptions *>(builtin_options())
+             : nullptr;
   }
-  const CastOptions *builtin_options_as_CastOptions() const
+  const onert_tflite::CastOptions *builtin_options_as_CastOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_CastOptions
-               ? static_cast<const CastOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CastOptions
+             ? static_cast<const onert_tflite::CastOptions *>(builtin_options())
+             : nullptr;
   }
-  const DequantizeOptions *builtin_options_as_DequantizeOptions() const
+  const onert_tflite::DequantizeOptions *builtin_options_as_DequantizeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DequantizeOptions
-               ? static_cast<const DequantizeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DequantizeOptions
+             ? static_cast<const onert_tflite::DequantizeOptions *>(builtin_options())
+             : nullptr;
   }
-  const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
+  const onert_tflite::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
-               ? static_cast<const MaximumMinimumOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MaximumMinimumOptions
+             ? static_cast<const onert_tflite::MaximumMinimumOptions *>(builtin_options())
+             : nullptr;
   }
-  const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
+  const onert_tflite::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ArgMaxOptions
-               ? static_cast<const ArgMaxOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ArgMaxOptions
+             ? static_cast<const onert_tflite::ArgMaxOptions *>(builtin_options())
+             : nullptr;
   }
-  const LessOptions *builtin_options_as_LessOptions() const
+  const onert_tflite::LessOptions *builtin_options_as_LessOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LessOptions
-               ? static_cast<const LessOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LessOptions
+             ? static_cast<const onert_tflite::LessOptions *>(builtin_options())
+             : nullptr;
   }
-  const NegOptions *builtin_options_as_NegOptions() const
+  const onert_tflite::NegOptions *builtin_options_as_NegOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_NegOptions
-               ? static_cast<const NegOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_NegOptions
+             ? static_cast<const onert_tflite::NegOptions *>(builtin_options())
+             : nullptr;
   }
-  const PadV2Options *builtin_options_as_PadV2Options() const
+  const onert_tflite::PadV2Options *builtin_options_as_PadV2Options() const
   {
-    return builtin_options_type() == BuiltinOptions_PadV2Options
-               ? static_cast<const PadV2Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_PadV2Options
+             ? static_cast<const onert_tflite::PadV2Options *>(builtin_options())
+             : nullptr;
   }
-  const GreaterOptions *builtin_options_as_GreaterOptions() const
+  const onert_tflite::GreaterOptions *builtin_options_as_GreaterOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_GreaterOptions
-               ? static_cast<const GreaterOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GreaterOptions
+             ? static_cast<const onert_tflite::GreaterOptions *>(builtin_options())
+             : nullptr;
   }
-  const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
+  const onert_tflite::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
-               ? static_cast<const GreaterEqualOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GreaterEqualOptions
+             ? static_cast<const onert_tflite::GreaterEqualOptions *>(builtin_options())
+             : nullptr;
   }
-  const LessEqualOptions *builtin_options_as_LessEqualOptions() const
+  const onert_tflite::LessEqualOptions *builtin_options_as_LessEqualOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LessEqualOptions
-               ? static_cast<const LessEqualOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LessEqualOptions
+             ? static_cast<const onert_tflite::LessEqualOptions *>(builtin_options())
+             : nullptr;
   }
-  const SelectOptions *builtin_options_as_SelectOptions() const
+  const onert_tflite::SelectOptions *builtin_options_as_SelectOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SelectOptions
-               ? static_cast<const SelectOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SelectOptions
+             ? static_cast<const onert_tflite::SelectOptions *>(builtin_options())
+             : nullptr;
   }
-  const SliceOptions *builtin_options_as_SliceOptions() const
+  const onert_tflite::SliceOptions *builtin_options_as_SliceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SliceOptions
-               ? static_cast<const SliceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SliceOptions
+             ? static_cast<const onert_tflite::SliceOptions *>(builtin_options())
+             : nullptr;
   }
-  const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
+  const onert_tflite::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_TransposeConvOptions
-               ? static_cast<const TransposeConvOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_TransposeConvOptions
+             ? static_cast<const onert_tflite::TransposeConvOptions *>(builtin_options())
+             : nullptr;
   }
-  const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
+  const onert_tflite::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
-               ? static_cast<const SparseToDenseOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SparseToDenseOptions
+             ? static_cast<const onert_tflite::SparseToDenseOptions *>(builtin_options())
+             : nullptr;
   }
-  const TileOptions *builtin_options_as_TileOptions() const
+  const onert_tflite::TileOptions *builtin_options_as_TileOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_TileOptions
-               ? static_cast<const TileOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_TileOptions
+             ? static_cast<const onert_tflite::TileOptions *>(builtin_options())
+             : nullptr;
   }
-  const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
+  const onert_tflite::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
-               ? static_cast<const ExpandDimsOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ExpandDimsOptions
+             ? static_cast<const onert_tflite::ExpandDimsOptions *>(builtin_options())
+             : nullptr;
   }
-  const EqualOptions *builtin_options_as_EqualOptions() const
+  const onert_tflite::EqualOptions *builtin_options_as_EqualOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_EqualOptions
-               ? static_cast<const EqualOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_EqualOptions
+             ? static_cast<const onert_tflite::EqualOptions *>(builtin_options())
+             : nullptr;
   }
-  const NotEqualOptions *builtin_options_as_NotEqualOptions() const
+  const onert_tflite::NotEqualOptions *builtin_options_as_NotEqualOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_NotEqualOptions
-               ? static_cast<const NotEqualOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_NotEqualOptions
+             ? static_cast<const onert_tflite::NotEqualOptions *>(builtin_options())
+             : nullptr;
   }
-  const ShapeOptions *builtin_options_as_ShapeOptions() const
+  const onert_tflite::ShapeOptions *builtin_options_as_ShapeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ShapeOptions
-               ? static_cast<const ShapeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ShapeOptions
+             ? static_cast<const onert_tflite::ShapeOptions *>(builtin_options())
+             : nullptr;
   }
-  const PowOptions *builtin_options_as_PowOptions() const
+  const onert_tflite::PowOptions *builtin_options_as_PowOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_PowOptions
-               ? static_cast<const PowOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_PowOptions
+             ? static_cast<const onert_tflite::PowOptions *>(builtin_options())
+             : nullptr;
   }
-  const ArgMinOptions *builtin_options_as_ArgMinOptions() const
+  const onert_tflite::ArgMinOptions *builtin_options_as_ArgMinOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ArgMinOptions
-               ? static_cast<const ArgMinOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ArgMinOptions
+             ? static_cast<const onert_tflite::ArgMinOptions *>(builtin_options())
+             : nullptr;
   }
-  const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
+  const onert_tflite::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FakeQuantOptions
-               ? static_cast<const FakeQuantOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FakeQuantOptions
+             ? static_cast<const onert_tflite::FakeQuantOptions *>(builtin_options())
+             : nullptr;
   }
-  const PackOptions *builtin_options_as_PackOptions() const
+  const onert_tflite::PackOptions *builtin_options_as_PackOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_PackOptions
-               ? static_cast<const PackOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_PackOptions
+             ? static_cast<const onert_tflite::PackOptions *>(builtin_options())
+             : nullptr;
   }
-  const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
+  const onert_tflite::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LogicalOrOptions
-               ? static_cast<const LogicalOrOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalOrOptions
+             ? static_cast<const onert_tflite::LogicalOrOptions *>(builtin_options())
+             : nullptr;
   }
-  const OneHotOptions *builtin_options_as_OneHotOptions() const
+  const onert_tflite::OneHotOptions *builtin_options_as_OneHotOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_OneHotOptions
-               ? static_cast<const OneHotOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_OneHotOptions
+             ? static_cast<const onert_tflite::OneHotOptions *>(builtin_options())
+             : nullptr;
   }
-  const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
+  const onert_tflite::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LogicalAndOptions
-               ? static_cast<const LogicalAndOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalAndOptions
+             ? static_cast<const onert_tflite::LogicalAndOptions *>(builtin_options())
+             : nullptr;
   }
-  const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
+  const onert_tflite::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LogicalNotOptions
-               ? static_cast<const LogicalNotOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalNotOptions
+             ? static_cast<const onert_tflite::LogicalNotOptions *>(builtin_options())
+             : nullptr;
   }
-  const UnpackOptions *builtin_options_as_UnpackOptions() const
+  const onert_tflite::UnpackOptions *builtin_options_as_UnpackOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_UnpackOptions
-               ? static_cast<const UnpackOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UnpackOptions
+             ? static_cast<const onert_tflite::UnpackOptions *>(builtin_options())
+             : nullptr;
   }
-  const FloorDivOptions *builtin_options_as_FloorDivOptions() const
+  const onert_tflite::FloorDivOptions *builtin_options_as_FloorDivOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FloorDivOptions
-               ? static_cast<const FloorDivOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FloorDivOptions
+             ? static_cast<const onert_tflite::FloorDivOptions *>(builtin_options())
+             : nullptr;
   }
-  const SquareOptions *builtin_options_as_SquareOptions() const
+  const onert_tflite::SquareOptions *builtin_options_as_SquareOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SquareOptions
-               ? static_cast<const SquareOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SquareOptions
+             ? static_cast<const onert_tflite::SquareOptions *>(builtin_options())
+             : nullptr;
   }
-  const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
+  const onert_tflite::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
-               ? static_cast<const ZerosLikeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ZerosLikeOptions
+             ? static_cast<const onert_tflite::ZerosLikeOptions *>(builtin_options())
+             : nullptr;
   }
-  const FillOptions *builtin_options_as_FillOptions() const
+  const onert_tflite::FillOptions *builtin_options_as_FillOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FillOptions
-               ? static_cast<const FillOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FillOptions
+             ? static_cast<const onert_tflite::FillOptions *>(builtin_options())
+             : nullptr;
   }
-  const BidirectionalSequenceLSTMOptions *
+  const onert_tflite::BidirectionalSequenceLSTMOptions *
   builtin_options_as_BidirectionalSequenceLSTMOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
-               ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions
+             ? static_cast<const onert_tflite::BidirectionalSequenceLSTMOptions *>(
+                 builtin_options())
+             : nullptr;
   }
-  const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
+  const onert_tflite::BidirectionalSequenceRNNOptions *
+  builtin_options_as_BidirectionalSequenceRNNOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
-               ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BidirectionalSequenceRNNOptions
+             ? static_cast<const onert_tflite::BidirectionalSequenceRNNOptions *>(builtin_options())
+             : nullptr;
   }
-  const UnidirectionalSequenceLSTMOptions *
+  const onert_tflite::UnidirectionalSequenceLSTMOptions *
   builtin_options_as_UnidirectionalSequenceLSTMOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
-               ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions
+             ? static_cast<const onert_tflite::UnidirectionalSequenceLSTMOptions *>(
+                 builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::FloorModOptions *builtin_options_as_FloorModOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FloorModOptions
+             ? static_cast<const onert_tflite::FloorModOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::RangeOptions *builtin_options_as_RangeOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_RangeOptions
+             ? static_cast<const onert_tflite::RangeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::ResizeNearestNeighborOptions *
+  builtin_options_as_ResizeNearestNeighborOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ResizeNearestNeighborOptions
+             ? static_cast<const onert_tflite::ResizeNearestNeighborOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LeakyReluOptions
+             ? static_cast<const onert_tflite::LeakyReluOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SquaredDifferenceOptions
+             ? static_cast<const onert_tflite::SquaredDifferenceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MirrorPadOptions
+             ? static_cast<const onert_tflite::MirrorPadOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::AbsOptions *builtin_options_as_AbsOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_AbsOptions
+             ? static_cast<const onert_tflite::AbsOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::SplitVOptions *builtin_options_as_SplitVOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SplitVOptions
+             ? static_cast<const onert_tflite::SplitVOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::UniqueOptions *builtin_options_as_UniqueOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UniqueOptions
+             ? static_cast<const onert_tflite::UniqueOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::ReverseV2Options *builtin_options_as_ReverseV2Options() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReverseV2Options
+             ? static_cast<const onert_tflite::ReverseV2Options *>(builtin_options())
+             : nullptr;
   }
-  const FloorModOptions *builtin_options_as_FloorModOptions() const
+  const onert_tflite::AddNOptions *builtin_options_as_AddNOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_FloorModOptions
-               ? static_cast<const FloorModOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_AddNOptions
+             ? static_cast<const onert_tflite::AddNOptions *>(builtin_options())
+             : nullptr;
   }
-  const RangeOptions *builtin_options_as_RangeOptions() const
+  const onert_tflite::GatherNdOptions *builtin_options_as_GatherNdOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_RangeOptions
-               ? static_cast<const RangeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GatherNdOptions
+             ? static_cast<const onert_tflite::GatherNdOptions *>(builtin_options())
+             : nullptr;
   }
-  const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
+  const onert_tflite::CosOptions *builtin_options_as_CosOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
-               ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CosOptions
+             ? static_cast<const onert_tflite::CosOptions *>(builtin_options())
+             : nullptr;
   }
-  const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+  const onert_tflite::WhereOptions *builtin_options_as_WhereOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_LeakyReluOptions
-               ? static_cast<const LeakyReluOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_WhereOptions
+             ? static_cast<const onert_tflite::WhereOptions *>(builtin_options())
+             : nullptr;
   }
-  const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+  const onert_tflite::RankOptions *builtin_options_as_RankOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
-               ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_RankOptions
+             ? static_cast<const onert_tflite::RankOptions *>(builtin_options())
+             : nullptr;
   }
-  const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+  const onert_tflite::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MirrorPadOptions
-               ? static_cast<const MirrorPadOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReverseSequenceOptions
+             ? static_cast<const onert_tflite::ReverseSequenceOptions *>(builtin_options())
+             : nullptr;
   }
-  const AbsOptions *builtin_options_as_AbsOptions() const
+  const onert_tflite::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_AbsOptions
-               ? static_cast<const AbsOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MatrixDiagOptions
+             ? static_cast<const onert_tflite::MatrixDiagOptions *>(builtin_options())
+             : nullptr;
   }
-  const SplitVOptions *builtin_options_as_SplitVOptions() const
+  const onert_tflite::QuantizeOptions *builtin_options_as_QuantizeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SplitVOptions
-               ? static_cast<const SplitVOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_QuantizeOptions
+             ? static_cast<const onert_tflite::QuantizeOptions *>(builtin_options())
+             : nullptr;
   }
-  const UniqueOptions *builtin_options_as_UniqueOptions() const
+  const onert_tflite::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_UniqueOptions
-               ? static_cast<const UniqueOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MatrixSetDiagOptions
+             ? static_cast<const onert_tflite::MatrixSetDiagOptions *>(builtin_options())
+             : nullptr;
   }
-  const ReverseV2Options *builtin_options_as_ReverseV2Options() const
+  const onert_tflite::HardSwishOptions *builtin_options_as_HardSwishOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ReverseV2Options
-               ? static_cast<const ReverseV2Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HardSwishOptions
+             ? static_cast<const onert_tflite::HardSwishOptions *>(builtin_options())
+             : nullptr;
   }
-  const AddNOptions *builtin_options_as_AddNOptions() const
+  const onert_tflite::IfOptions *builtin_options_as_IfOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_AddNOptions
-               ? static_cast<const AddNOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_IfOptions
+             ? static_cast<const onert_tflite::IfOptions *>(builtin_options())
+             : nullptr;
   }
-  const GatherNdOptions *builtin_options_as_GatherNdOptions() const
+  const onert_tflite::WhileOptions *builtin_options_as_WhileOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_GatherNdOptions
-               ? static_cast<const GatherNdOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_WhileOptions
+             ? static_cast<const onert_tflite::WhileOptions *>(builtin_options())
+             : nullptr;
   }
-  const CosOptions *builtin_options_as_CosOptions() const
+  const onert_tflite::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_CosOptions
-               ? static_cast<const CosOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DepthToSpaceOptions
+             ? static_cast<const onert_tflite::DepthToSpaceOptions *>(builtin_options())
+             : nullptr;
   }
-  const WhereOptions *builtin_options_as_WhereOptions() const
+  const onert_tflite::NonMaxSuppressionV4Options *
+  builtin_options_as_NonMaxSuppressionV4Options() const
   {
-    return builtin_options_type() == BuiltinOptions_WhereOptions
-               ? static_cast<const WhereOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_NonMaxSuppressionV4Options
+             ? static_cast<const onert_tflite::NonMaxSuppressionV4Options *>(builtin_options())
+             : nullptr;
   }
-  const RankOptions *builtin_options_as_RankOptions() const
+  const onert_tflite::NonMaxSuppressionV5Options *
+  builtin_options_as_NonMaxSuppressionV5Options() const
   {
-    return builtin_options_type() == BuiltinOptions_RankOptions
-               ? static_cast<const RankOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_NonMaxSuppressionV5Options
+             ? static_cast<const onert_tflite::NonMaxSuppressionV5Options *>(builtin_options())
+             : nullptr;
   }
-  const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
+  const onert_tflite::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
-               ? static_cast<const ReverseSequenceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ScatterNdOptions
+             ? static_cast<const onert_tflite::ScatterNdOptions *>(builtin_options())
+             : nullptr;
   }
-  const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
+  const onert_tflite::SelectV2Options *builtin_options_as_SelectV2Options() const
   {
-    return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
-               ? static_cast<const MatrixDiagOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SelectV2Options
+             ? static_cast<const onert_tflite::SelectV2Options *>(builtin_options())
+             : nullptr;
   }
-  const QuantizeOptions *builtin_options_as_QuantizeOptions() const
+  const onert_tflite::DensifyOptions *builtin_options_as_DensifyOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_QuantizeOptions
-               ? static_cast<const QuantizeOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DensifyOptions
+             ? static_cast<const onert_tflite::DensifyOptions *>(builtin_options())
+             : nullptr;
   }
-  const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
+  const onert_tflite::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
-               ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SegmentSumOptions
+             ? static_cast<const onert_tflite::SegmentSumOptions *>(builtin_options())
+             : nullptr;
   }
-  const HardSwishOptions *builtin_options_as_HardSwishOptions() const
+  const onert_tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_HardSwishOptions
-               ? static_cast<const HardSwishOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BatchMatMulOptions
+             ? static_cast<const onert_tflite::BatchMatMulOptions *>(builtin_options())
+             : nullptr;
   }
-  const IfOptions *builtin_options_as_IfOptions() const
+  const onert_tflite::CumsumOptions *builtin_options_as_CumsumOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_IfOptions
-               ? static_cast<const IfOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CumsumOptions
+             ? static_cast<const onert_tflite::CumsumOptions *>(builtin_options())
+             : nullptr;
   }
-  const WhileOptions *builtin_options_as_WhileOptions() const
+  const onert_tflite::CallOnceOptions *builtin_options_as_CallOnceOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_WhileOptions
-               ? static_cast<const WhileOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CallOnceOptions
+             ? static_cast<const onert_tflite::CallOnceOptions *>(builtin_options())
+             : nullptr;
   }
-  const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
+  const onert_tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
-               ? static_cast<const DepthToSpaceOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BroadcastToOptions
+             ? static_cast<const onert_tflite::BroadcastToOptions *>(builtin_options())
+             : nullptr;
   }
-  const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
+  const onert_tflite::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
-               ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_Rfft2dOptions
+             ? static_cast<const onert_tflite::Rfft2dOptions *>(builtin_options())
+             : nullptr;
   }
-  const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
+  const onert_tflite::Conv3DOptions *builtin_options_as_Conv3DOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
-               ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_Conv3DOptions
+             ? static_cast<const onert_tflite::Conv3DOptions *>(builtin_options())
+             : nullptr;
   }
-  const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
+  const onert_tflite::HashtableOptions *builtin_options_as_HashtableOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_ScatterNdOptions
-               ? static_cast<const ScatterNdOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableOptions
+             ? static_cast<const onert_tflite::HashtableOptions *>(builtin_options())
+             : nullptr;
   }
-  const SelectV2Options *builtin_options_as_SelectV2Options() const
+  const onert_tflite::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SelectV2Options
-               ? static_cast<const SelectV2Options *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableFindOptions
+             ? static_cast<const onert_tflite::HashtableFindOptions *>(builtin_options())
+             : nullptr;
   }
-  const DensifyOptions *builtin_options_as_DensifyOptions() const
+  const onert_tflite::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_DensifyOptions
-               ? static_cast<const DensifyOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableImportOptions
+             ? static_cast<const onert_tflite::HashtableImportOptions *>(builtin_options())
+             : nullptr;
   }
-  const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
+  const onert_tflite::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_SegmentSumOptions
-               ? static_cast<const SegmentSumOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableSizeOptions
+             ? static_cast<const onert_tflite::HashtableSizeOptions *>(builtin_options())
+             : nullptr;
   }
-  const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
+  const onert_tflite::VarHandleOptions *builtin_options_as_VarHandleOptions() const
   {
-    return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
-               ? static_cast<const BatchMatMulOptions *>(builtin_options())
-               : nullptr;
+    return builtin_options_type() == onert_tflite::BuiltinOptions_VarHandleOptions
+             ? static_cast<const onert_tflite::VarHandleOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReadVariableOptions
+             ? static_cast<const onert_tflite::ReadVariableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_AssignVariableOptions
+             ? static_cast<const onert_tflite::AssignVariableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::RandomOptions *builtin_options_as_RandomOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_RandomOptions
+             ? static_cast<const onert_tflite::RandomOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::BucketizeOptions *builtin_options_as_BucketizeOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BucketizeOptions
+             ? static_cast<const onert_tflite::BucketizeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::GeluOptions *builtin_options_as_GeluOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GeluOptions
+             ? static_cast<const onert_tflite::GeluOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::DynamicUpdateSliceOptions *
+  builtin_options_as_DynamicUpdateSliceOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DynamicUpdateSliceOptions
+             ? static_cast<const onert_tflite::DynamicUpdateSliceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::UnsortedSegmentProdOptions *
+  builtin_options_as_UnsortedSegmentProdOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UnsortedSegmentProdOptions
+             ? static_cast<const onert_tflite::UnsortedSegmentProdOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::UnsortedSegmentMaxOptions *
+  builtin_options_as_UnsortedSegmentMaxOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UnsortedSegmentMaxOptions
+             ? static_cast<const onert_tflite::UnsortedSegmentMaxOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::UnsortedSegmentSumOptions *
+  builtin_options_as_UnsortedSegmentSumOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UnsortedSegmentSumOptions
+             ? static_cast<const onert_tflite::UnsortedSegmentSumOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::ATan2Options *builtin_options_as_ATan2Options() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ATan2Options
+             ? static_cast<const onert_tflite::ATan2Options *>(builtin_options())
+             : nullptr;
   }
   const flatbuffers::Vector<uint8_t> *custom_options() const
   {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
-  CustomOptionsFormat custom_options_format() const
+  onert_tflite::CustomOptionsFormat custom_options_format() const
   {
-    return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+    return static_cast<onert_tflite::CustomOptionsFormat>(
+      GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
   }
   const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
   {
@@ -7880,550 +9632,855 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   }
 };
 
-template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const
+template <>
+inline const onert_tflite::Conv2DOptions *
+Operator::builtin_options_as<onert_tflite::Conv2DOptions>() const
 {
   return builtin_options_as_Conv2DOptions();
 }
 
 template <>
-inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const
+inline const onert_tflite::DepthwiseConv2DOptions *
+Operator::builtin_options_as<onert_tflite::DepthwiseConv2DOptions>() const
 {
   return builtin_options_as_DepthwiseConv2DOptions();
 }
 
 template <>
-inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const
+inline const onert_tflite::ConcatEmbeddingsOptions *
+Operator::builtin_options_as<onert_tflite::ConcatEmbeddingsOptions>() const
 {
   return builtin_options_as_ConcatEmbeddingsOptions();
 }
 
 template <>
-inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const
+inline const onert_tflite::LSHProjectionOptions *
+Operator::builtin_options_as<onert_tflite::LSHProjectionOptions>() const
 {
   return builtin_options_as_LSHProjectionOptions();
 }
 
-template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const
+template <>
+inline const onert_tflite::Pool2DOptions *
+Operator::builtin_options_as<onert_tflite::Pool2DOptions>() const
 {
   return builtin_options_as_Pool2DOptions();
 }
 
-template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const
+template <>
+inline const onert_tflite::SVDFOptions *
+Operator::builtin_options_as<onert_tflite::SVDFOptions>() const
 {
   return builtin_options_as_SVDFOptions();
 }
 
-template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const
+template <>
+inline const onert_tflite::RNNOptions *
+Operator::builtin_options_as<onert_tflite::RNNOptions>() const
 {
   return builtin_options_as_RNNOptions();
 }
 
 template <>
-inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const
+inline const onert_tflite::FullyConnectedOptions *
+Operator::builtin_options_as<onert_tflite::FullyConnectedOptions>() const
 {
   return builtin_options_as_FullyConnectedOptions();
 }
 
-template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const
+template <>
+inline const onert_tflite::SoftmaxOptions *
+Operator::builtin_options_as<onert_tflite::SoftmaxOptions>() const
 {
   return builtin_options_as_SoftmaxOptions();
 }
 
 template <>
-inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const
+inline const onert_tflite::ConcatenationOptions *
+Operator::builtin_options_as<onert_tflite::ConcatenationOptions>() const
 {
   return builtin_options_as_ConcatenationOptions();
 }
 
-template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const
+template <>
+inline const onert_tflite::AddOptions *
+Operator::builtin_options_as<onert_tflite::AddOptions>() const
 {
   return builtin_options_as_AddOptions();
 }
 
-template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const
+template <>
+inline const onert_tflite::L2NormOptions *
+Operator::builtin_options_as<onert_tflite::L2NormOptions>() const
 {
   return builtin_options_as_L2NormOptions();
 }
 
 template <>
-inline const LocalResponseNormalizationOptions *
-Operator::builtin_options_as<LocalResponseNormalizationOptions>() const
+inline const onert_tflite::LocalResponseNormalizationOptions *
+Operator::builtin_options_as<onert_tflite::LocalResponseNormalizationOptions>() const
 {
   return builtin_options_as_LocalResponseNormalizationOptions();
 }
 
-template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const
+template <>
+inline const onert_tflite::LSTMOptions *
+Operator::builtin_options_as<onert_tflite::LSTMOptions>() const
 {
   return builtin_options_as_LSTMOptions();
 }
 
 template <>
-inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const
+inline const onert_tflite::ResizeBilinearOptions *
+Operator::builtin_options_as<onert_tflite::ResizeBilinearOptions>() const
 {
   return builtin_options_as_ResizeBilinearOptions();
 }
 
-template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const
+template <>
+inline const onert_tflite::CallOptions *
+Operator::builtin_options_as<onert_tflite::CallOptions>() const
 {
   return builtin_options_as_CallOptions();
 }
 
-template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const
+template <>
+inline const onert_tflite::ReshapeOptions *
+Operator::builtin_options_as<onert_tflite::ReshapeOptions>() const
 {
   return builtin_options_as_ReshapeOptions();
 }
 
-template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const
+template <>
+inline const onert_tflite::SkipGramOptions *
+Operator::builtin_options_as<onert_tflite::SkipGramOptions>() const
 {
   return builtin_options_as_SkipGramOptions();
 }
 
 template <>
-inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const
+inline const onert_tflite::SpaceToDepthOptions *
+Operator::builtin_options_as<onert_tflite::SpaceToDepthOptions>() const
 {
   return builtin_options_as_SpaceToDepthOptions();
 }
 
 template <>
-inline const EmbeddingLookupSparseOptions *
-Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const
+inline const onert_tflite::EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<onert_tflite::EmbeddingLookupSparseOptions>() const
 {
   return builtin_options_as_EmbeddingLookupSparseOptions();
 }
 
-template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const
+template <>
+inline const onert_tflite::MulOptions *
+Operator::builtin_options_as<onert_tflite::MulOptions>() const
 {
   return builtin_options_as_MulOptions();
 }
 
-template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const
+template <>
+inline const onert_tflite::PadOptions *
+Operator::builtin_options_as<onert_tflite::PadOptions>() const
 {
   return builtin_options_as_PadOptions();
 }
 
-template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const
+template <>
+inline const onert_tflite::GatherOptions *
+Operator::builtin_options_as<onert_tflite::GatherOptions>() const
 {
   return builtin_options_as_GatherOptions();
 }
 
 template <>
-inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const
+inline const onert_tflite::BatchToSpaceNDOptions *
+Operator::builtin_options_as<onert_tflite::BatchToSpaceNDOptions>() const
 {
   return builtin_options_as_BatchToSpaceNDOptions();
 }
 
 template <>
-inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const
+inline const onert_tflite::SpaceToBatchNDOptions *
+Operator::builtin_options_as<onert_tflite::SpaceToBatchNDOptions>() const
 {
   return builtin_options_as_SpaceToBatchNDOptions();
 }
 
-template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const
+template <>
+inline const onert_tflite::TransposeOptions *
+Operator::builtin_options_as<onert_tflite::TransposeOptions>() const
 {
   return builtin_options_as_TransposeOptions();
 }
 
-template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const
+template <>
+inline const onert_tflite::ReducerOptions *
+Operator::builtin_options_as<onert_tflite::ReducerOptions>() const
 {
   return builtin_options_as_ReducerOptions();
 }
 
-template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const
+template <>
+inline const onert_tflite::SubOptions *
+Operator::builtin_options_as<onert_tflite::SubOptions>() const
 {
   return builtin_options_as_SubOptions();
 }
 
-template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const
+template <>
+inline const onert_tflite::DivOptions *
+Operator::builtin_options_as<onert_tflite::DivOptions>() const
 {
   return builtin_options_as_DivOptions();
 }
 
-template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const
+template <>
+inline const onert_tflite::SqueezeOptions *
+Operator::builtin_options_as<onert_tflite::SqueezeOptions>() const
 {
   return builtin_options_as_SqueezeOptions();
 }
 
 template <>
-inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const
+inline const onert_tflite::SequenceRNNOptions *
+Operator::builtin_options_as<onert_tflite::SequenceRNNOptions>() const
 {
   return builtin_options_as_SequenceRNNOptions();
 }
 
 template <>
-inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const
+inline const onert_tflite::StridedSliceOptions *
+Operator::builtin_options_as<onert_tflite::StridedSliceOptions>() const
 {
   return builtin_options_as_StridedSliceOptions();
 }
 
-template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const
+template <>
+inline const onert_tflite::ExpOptions *
+Operator::builtin_options_as<onert_tflite::ExpOptions>() const
 {
   return builtin_options_as_ExpOptions();
 }
 
-template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const
+template <>
+inline const onert_tflite::TopKV2Options *
+Operator::builtin_options_as<onert_tflite::TopKV2Options>() const
 {
   return builtin_options_as_TopKV2Options();
 }
 
-template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const
+template <>
+inline const onert_tflite::SplitOptions *
+Operator::builtin_options_as<onert_tflite::SplitOptions>() const
 {
   return builtin_options_as_SplitOptions();
 }
 
-template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const
+template <>
+inline const onert_tflite::LogSoftmaxOptions *
+Operator::builtin_options_as<onert_tflite::LogSoftmaxOptions>() const
 {
   return builtin_options_as_LogSoftmaxOptions();
 }
 
-template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const
+template <>
+inline const onert_tflite::CastOptions *
+Operator::builtin_options_as<onert_tflite::CastOptions>() const
 {
   return builtin_options_as_CastOptions();
 }
 
-template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const
+template <>
+inline const onert_tflite::DequantizeOptions *
+Operator::builtin_options_as<onert_tflite::DequantizeOptions>() const
 {
   return builtin_options_as_DequantizeOptions();
 }
 
 template <>
-inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const
+inline const onert_tflite::MaximumMinimumOptions *
+Operator::builtin_options_as<onert_tflite::MaximumMinimumOptions>() const
 {
   return builtin_options_as_MaximumMinimumOptions();
 }
 
-template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const
+template <>
+inline const onert_tflite::ArgMaxOptions *
+Operator::builtin_options_as<onert_tflite::ArgMaxOptions>() const
 {
   return builtin_options_as_ArgMaxOptions();
 }
 
-template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const
+template <>
+inline const onert_tflite::LessOptions *
+Operator::builtin_options_as<onert_tflite::LessOptions>() const
 {
   return builtin_options_as_LessOptions();
 }
 
-template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const
+template <>
+inline const onert_tflite::NegOptions *
+Operator::builtin_options_as<onert_tflite::NegOptions>() const
 {
   return builtin_options_as_NegOptions();
 }
 
-template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const
+template <>
+inline const onert_tflite::PadV2Options *
+Operator::builtin_options_as<onert_tflite::PadV2Options>() const
 {
   return builtin_options_as_PadV2Options();
 }
 
-template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const
+template <>
+inline const onert_tflite::GreaterOptions *
+Operator::builtin_options_as<onert_tflite::GreaterOptions>() const
 {
   return builtin_options_as_GreaterOptions();
 }
 
 template <>
-inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const
+inline const onert_tflite::GreaterEqualOptions *
+Operator::builtin_options_as<onert_tflite::GreaterEqualOptions>() const
 {
   return builtin_options_as_GreaterEqualOptions();
 }
 
-template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const
+template <>
+inline const onert_tflite::LessEqualOptions *
+Operator::builtin_options_as<onert_tflite::LessEqualOptions>() const
 {
   return builtin_options_as_LessEqualOptions();
 }
 
-template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const
+template <>
+inline const onert_tflite::SelectOptions *
+Operator::builtin_options_as<onert_tflite::SelectOptions>() const
 {
   return builtin_options_as_SelectOptions();
 }
 
-template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const
+template <>
+inline const onert_tflite::SliceOptions *
+Operator::builtin_options_as<onert_tflite::SliceOptions>() const
 {
   return builtin_options_as_SliceOptions();
 }
 
 template <>
-inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const
+inline const onert_tflite::TransposeConvOptions *
+Operator::builtin_options_as<onert_tflite::TransposeConvOptions>() const
 {
   return builtin_options_as_TransposeConvOptions();
 }
 
 template <>
-inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const
+inline const onert_tflite::SparseToDenseOptions *
+Operator::builtin_options_as<onert_tflite::SparseToDenseOptions>() const
 {
   return builtin_options_as_SparseToDenseOptions();
 }
 
-template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const
+template <>
+inline const onert_tflite::TileOptions *
+Operator::builtin_options_as<onert_tflite::TileOptions>() const
 {
   return builtin_options_as_TileOptions();
 }
 
-template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const
+template <>
+inline const onert_tflite::ExpandDimsOptions *
+Operator::builtin_options_as<onert_tflite::ExpandDimsOptions>() const
 {
   return builtin_options_as_ExpandDimsOptions();
 }
 
-template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const
+template <>
+inline const onert_tflite::EqualOptions *
+Operator::builtin_options_as<onert_tflite::EqualOptions>() const
 {
   return builtin_options_as_EqualOptions();
 }
 
-template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const
+template <>
+inline const onert_tflite::NotEqualOptions *
+Operator::builtin_options_as<onert_tflite::NotEqualOptions>() const
 {
   return builtin_options_as_NotEqualOptions();
 }
 
-template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const
+template <>
+inline const onert_tflite::ShapeOptions *
+Operator::builtin_options_as<onert_tflite::ShapeOptions>() const
 {
   return builtin_options_as_ShapeOptions();
 }
 
-template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const
+template <>
+inline const onert_tflite::PowOptions *
+Operator::builtin_options_as<onert_tflite::PowOptions>() const
 {
   return builtin_options_as_PowOptions();
 }
 
-template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const
+template <>
+inline const onert_tflite::ArgMinOptions *
+Operator::builtin_options_as<onert_tflite::ArgMinOptions>() const
 {
   return builtin_options_as_ArgMinOptions();
 }
 
-template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const
+template <>
+inline const onert_tflite::FakeQuantOptions *
+Operator::builtin_options_as<onert_tflite::FakeQuantOptions>() const
 {
   return builtin_options_as_FakeQuantOptions();
 }
 
-template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const
+template <>
+inline const onert_tflite::PackOptions *
+Operator::builtin_options_as<onert_tflite::PackOptions>() const
 {
   return builtin_options_as_PackOptions();
 }
 
-template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const
+template <>
+inline const onert_tflite::LogicalOrOptions *
+Operator::builtin_options_as<onert_tflite::LogicalOrOptions>() const
 {
   return builtin_options_as_LogicalOrOptions();
 }
 
-template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const
+template <>
+inline const onert_tflite::OneHotOptions *
+Operator::builtin_options_as<onert_tflite::OneHotOptions>() const
 {
   return builtin_options_as_OneHotOptions();
 }
 
-template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const
+template <>
+inline const onert_tflite::LogicalAndOptions *
+Operator::builtin_options_as<onert_tflite::LogicalAndOptions>() const
 {
   return builtin_options_as_LogicalAndOptions();
 }
 
-template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const
+template <>
+inline const onert_tflite::LogicalNotOptions *
+Operator::builtin_options_as<onert_tflite::LogicalNotOptions>() const
 {
   return builtin_options_as_LogicalNotOptions();
 }
 
-template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const
+template <>
+inline const onert_tflite::UnpackOptions *
+Operator::builtin_options_as<onert_tflite::UnpackOptions>() const
 {
   return builtin_options_as_UnpackOptions();
 }
 
-template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const
+template <>
+inline const onert_tflite::FloorDivOptions *
+Operator::builtin_options_as<onert_tflite::FloorDivOptions>() const
 {
   return builtin_options_as_FloorDivOptions();
 }
 
-template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const
+template <>
+inline const onert_tflite::SquareOptions *
+Operator::builtin_options_as<onert_tflite::SquareOptions>() const
 {
   return builtin_options_as_SquareOptions();
 }
 
-template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const
+template <>
+inline const onert_tflite::ZerosLikeOptions *
+Operator::builtin_options_as<onert_tflite::ZerosLikeOptions>() const
 {
   return builtin_options_as_ZerosLikeOptions();
 }
 
-template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const
+template <>
+inline const onert_tflite::FillOptions *
+Operator::builtin_options_as<onert_tflite::FillOptions>() const
 {
   return builtin_options_as_FillOptions();
 }
 
 template <>
-inline const BidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const
+inline const onert_tflite::BidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<onert_tflite::BidirectionalSequenceLSTMOptions>() const
 {
   return builtin_options_as_BidirectionalSequenceLSTMOptions();
 }
 
 template <>
-inline const BidirectionalSequenceRNNOptions *
-Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const
+inline const onert_tflite::BidirectionalSequenceRNNOptions *
+Operator::builtin_options_as<onert_tflite::BidirectionalSequenceRNNOptions>() const
 {
   return builtin_options_as_BidirectionalSequenceRNNOptions();
 }
 
 template <>
-inline const UnidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const
+inline const onert_tflite::UnidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<onert_tflite::UnidirectionalSequenceLSTMOptions>() const
 {
   return builtin_options_as_UnidirectionalSequenceLSTMOptions();
 }
 
-template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const
+template <>
+inline const onert_tflite::FloorModOptions *
+Operator::builtin_options_as<onert_tflite::FloorModOptions>() const
 {
   return builtin_options_as_FloorModOptions();
 }
 
-template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const
+template <>
+inline const onert_tflite::RangeOptions *
+Operator::builtin_options_as<onert_tflite::RangeOptions>() const
 {
   return builtin_options_as_RangeOptions();
 }
 
 template <>
-inline const ResizeNearestNeighborOptions *
-Operator::builtin_options_as<ResizeNearestNeighborOptions>() const
+inline const onert_tflite::ResizeNearestNeighborOptions *
+Operator::builtin_options_as<onert_tflite::ResizeNearestNeighborOptions>() const
 {
   return builtin_options_as_ResizeNearestNeighborOptions();
 }
 
-template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const
+template <>
+inline const onert_tflite::LeakyReluOptions *
+Operator::builtin_options_as<onert_tflite::LeakyReluOptions>() const
 {
   return builtin_options_as_LeakyReluOptions();
 }
 
 template <>
-inline const SquaredDifferenceOptions *
-Operator::builtin_options_as<SquaredDifferenceOptions>() const
+inline const onert_tflite::SquaredDifferenceOptions *
+Operator::builtin_options_as<onert_tflite::SquaredDifferenceOptions>() const
 {
   return builtin_options_as_SquaredDifferenceOptions();
 }
 
-template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const
+template <>
+inline const onert_tflite::MirrorPadOptions *
+Operator::builtin_options_as<onert_tflite::MirrorPadOptions>() const
 {
   return builtin_options_as_MirrorPadOptions();
 }
 
-template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const
+template <>
+inline const onert_tflite::AbsOptions *
+Operator::builtin_options_as<onert_tflite::AbsOptions>() const
 {
   return builtin_options_as_AbsOptions();
 }
 
-template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const
+template <>
+inline const onert_tflite::SplitVOptions *
+Operator::builtin_options_as<onert_tflite::SplitVOptions>() const
 {
   return builtin_options_as_SplitVOptions();
 }
 
-template <> inline const UniqueOptions *Operator::builtin_options_as<UniqueOptions>() const
+template <>
+inline const onert_tflite::UniqueOptions *
+Operator::builtin_options_as<onert_tflite::UniqueOptions>() const
 {
   return builtin_options_as_UniqueOptions();
 }
 
-template <> inline const ReverseV2Options *Operator::builtin_options_as<ReverseV2Options>() const
+template <>
+inline const onert_tflite::ReverseV2Options *
+Operator::builtin_options_as<onert_tflite::ReverseV2Options>() const
 {
   return builtin_options_as_ReverseV2Options();
 }
 
-template <> inline const AddNOptions *Operator::builtin_options_as<AddNOptions>() const
+template <>
+inline const onert_tflite::AddNOptions *
+Operator::builtin_options_as<onert_tflite::AddNOptions>() const
 {
   return builtin_options_as_AddNOptions();
 }
 
-template <> inline const GatherNdOptions *Operator::builtin_options_as<GatherNdOptions>() const
+template <>
+inline const onert_tflite::GatherNdOptions *
+Operator::builtin_options_as<onert_tflite::GatherNdOptions>() const
 {
   return builtin_options_as_GatherNdOptions();
 }
 
-template <> inline const CosOptions *Operator::builtin_options_as<CosOptions>() const
+template <>
+inline const onert_tflite::CosOptions *
+Operator::builtin_options_as<onert_tflite::CosOptions>() const
 {
   return builtin_options_as_CosOptions();
 }
 
-template <> inline const WhereOptions *Operator::builtin_options_as<WhereOptions>() const
+template <>
+inline const onert_tflite::WhereOptions *
+Operator::builtin_options_as<onert_tflite::WhereOptions>() const
 {
   return builtin_options_as_WhereOptions();
 }
 
-template <> inline const RankOptions *Operator::builtin_options_as<RankOptions>() const
+template <>
+inline const onert_tflite::RankOptions *
+Operator::builtin_options_as<onert_tflite::RankOptions>() const
 {
   return builtin_options_as_RankOptions();
 }
 
 template <>
-inline const ReverseSequenceOptions *Operator::builtin_options_as<ReverseSequenceOptions>() const
+inline const onert_tflite::ReverseSequenceOptions *
+Operator::builtin_options_as<onert_tflite::ReverseSequenceOptions>() const
 {
   return builtin_options_as_ReverseSequenceOptions();
 }
 
-template <> inline const MatrixDiagOptions *Operator::builtin_options_as<MatrixDiagOptions>() const
+template <>
+inline const onert_tflite::MatrixDiagOptions *
+Operator::builtin_options_as<onert_tflite::MatrixDiagOptions>() const
 {
   return builtin_options_as_MatrixDiagOptions();
 }
 
-template <> inline const QuantizeOptions *Operator::builtin_options_as<QuantizeOptions>() const
+template <>
+inline const onert_tflite::QuantizeOptions *
+Operator::builtin_options_as<onert_tflite::QuantizeOptions>() const
 {
   return builtin_options_as_QuantizeOptions();
 }
 
 template <>
-inline const MatrixSetDiagOptions *Operator::builtin_options_as<MatrixSetDiagOptions>() const
+inline const onert_tflite::MatrixSetDiagOptions *
+Operator::builtin_options_as<onert_tflite::MatrixSetDiagOptions>() const
 {
   return builtin_options_as_MatrixSetDiagOptions();
 }
 
-template <> inline const HardSwishOptions *Operator::builtin_options_as<HardSwishOptions>() const
+template <>
+inline const onert_tflite::HardSwishOptions *
+Operator::builtin_options_as<onert_tflite::HardSwishOptions>() const
 {
   return builtin_options_as_HardSwishOptions();
 }
 
-template <> inline const IfOptions *Operator::builtin_options_as<IfOptions>() const
+template <>
+inline const onert_tflite::IfOptions *Operator::builtin_options_as<onert_tflite::IfOptions>() const
 {
   return builtin_options_as_IfOptions();
 }
 
-template <> inline const WhileOptions *Operator::builtin_options_as<WhileOptions>() const
+template <>
+inline const onert_tflite::WhileOptions *
+Operator::builtin_options_as<onert_tflite::WhileOptions>() const
 {
   return builtin_options_as_WhileOptions();
 }
 
 template <>
-inline const DepthToSpaceOptions *Operator::builtin_options_as<DepthToSpaceOptions>() const
+inline const onert_tflite::DepthToSpaceOptions *
+Operator::builtin_options_as<onert_tflite::DepthToSpaceOptions>() const
 {
   return builtin_options_as_DepthToSpaceOptions();
 }
 
 template <>
-inline const NonMaxSuppressionV4Options *
-Operator::builtin_options_as<NonMaxSuppressionV4Options>() const
+inline const onert_tflite::NonMaxSuppressionV4Options *
+Operator::builtin_options_as<onert_tflite::NonMaxSuppressionV4Options>() const
 {
   return builtin_options_as_NonMaxSuppressionV4Options();
 }
 
 template <>
-inline const NonMaxSuppressionV5Options *
-Operator::builtin_options_as<NonMaxSuppressionV5Options>() const
+inline const onert_tflite::NonMaxSuppressionV5Options *
+Operator::builtin_options_as<onert_tflite::NonMaxSuppressionV5Options>() const
 {
   return builtin_options_as_NonMaxSuppressionV5Options();
 }
 
-template <> inline const ScatterNdOptions *Operator::builtin_options_as<ScatterNdOptions>() const
+template <>
+inline const onert_tflite::ScatterNdOptions *
+Operator::builtin_options_as<onert_tflite::ScatterNdOptions>() const
 {
   return builtin_options_as_ScatterNdOptions();
 }
 
-template <> inline const SelectV2Options *Operator::builtin_options_as<SelectV2Options>() const
+template <>
+inline const onert_tflite::SelectV2Options *
+Operator::builtin_options_as<onert_tflite::SelectV2Options>() const
 {
   return builtin_options_as_SelectV2Options();
 }
 
-template <> inline const DensifyOptions *Operator::builtin_options_as<DensifyOptions>() const
+template <>
+inline const onert_tflite::DensifyOptions *
+Operator::builtin_options_as<onert_tflite::DensifyOptions>() const
 {
   return builtin_options_as_DensifyOptions();
 }
 
-template <> inline const SegmentSumOptions *Operator::builtin_options_as<SegmentSumOptions>() const
+template <>
+inline const onert_tflite::SegmentSumOptions *
+Operator::builtin_options_as<onert_tflite::SegmentSumOptions>() const
 {
   return builtin_options_as_SegmentSumOptions();
 }
 
 template <>
-inline const BatchMatMulOptions *Operator::builtin_options_as<BatchMatMulOptions>() const
+inline const onert_tflite::BatchMatMulOptions *
+Operator::builtin_options_as<onert_tflite::BatchMatMulOptions>() const
 {
   return builtin_options_as_BatchMatMulOptions();
 }
 
+template <>
+inline const onert_tflite::CumsumOptions *
+Operator::builtin_options_as<onert_tflite::CumsumOptions>() const
+{
+  return builtin_options_as_CumsumOptions();
+}
+
+template <>
+inline const onert_tflite::CallOnceOptions *
+Operator::builtin_options_as<onert_tflite::CallOnceOptions>() const
+{
+  return builtin_options_as_CallOnceOptions();
+}
+
+template <>
+inline const onert_tflite::BroadcastToOptions *
+Operator::builtin_options_as<onert_tflite::BroadcastToOptions>() const
+{
+  return builtin_options_as_BroadcastToOptions();
+}
+
+template <>
+inline const onert_tflite::Rfft2dOptions *
+Operator::builtin_options_as<onert_tflite::Rfft2dOptions>() const
+{
+  return builtin_options_as_Rfft2dOptions();
+}
+
+template <>
+inline const onert_tflite::Conv3DOptions *
+Operator::builtin_options_as<onert_tflite::Conv3DOptions>() const
+{
+  return builtin_options_as_Conv3DOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableOptions *
+Operator::builtin_options_as<onert_tflite::HashtableOptions>() const
+{
+  return builtin_options_as_HashtableOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableFindOptions *
+Operator::builtin_options_as<onert_tflite::HashtableFindOptions>() const
+{
+  return builtin_options_as_HashtableFindOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableImportOptions *
+Operator::builtin_options_as<onert_tflite::HashtableImportOptions>() const
+{
+  return builtin_options_as_HashtableImportOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableSizeOptions *
+Operator::builtin_options_as<onert_tflite::HashtableSizeOptions>() const
+{
+  return builtin_options_as_HashtableSizeOptions();
+}
+
+template <>
+inline const onert_tflite::VarHandleOptions *
+Operator::builtin_options_as<onert_tflite::VarHandleOptions>() const
+{
+  return builtin_options_as_VarHandleOptions();
+}
+
+template <>
+inline const onert_tflite::ReadVariableOptions *
+Operator::builtin_options_as<onert_tflite::ReadVariableOptions>() const
+{
+  return builtin_options_as_ReadVariableOptions();
+}
+
+template <>
+inline const onert_tflite::AssignVariableOptions *
+Operator::builtin_options_as<onert_tflite::AssignVariableOptions>() const
+{
+  return builtin_options_as_AssignVariableOptions();
+}
+
+template <>
+inline const onert_tflite::RandomOptions *
+Operator::builtin_options_as<onert_tflite::RandomOptions>() const
+{
+  return builtin_options_as_RandomOptions();
+}
+
+template <>
+inline const onert_tflite::BucketizeOptions *
+Operator::builtin_options_as<onert_tflite::BucketizeOptions>() const
+{
+  return builtin_options_as_BucketizeOptions();
+}
+
+template <>
+inline const onert_tflite::GeluOptions *
+Operator::builtin_options_as<onert_tflite::GeluOptions>() const
+{
+  return builtin_options_as_GeluOptions();
+}
+
+template <>
+inline const onert_tflite::DynamicUpdateSliceOptions *
+Operator::builtin_options_as<onert_tflite::DynamicUpdateSliceOptions>() const
+{
+  return builtin_options_as_DynamicUpdateSliceOptions();
+}
+
+template <>
+inline const onert_tflite::UnsortedSegmentProdOptions *
+Operator::builtin_options_as<onert_tflite::UnsortedSegmentProdOptions>() const
+{
+  return builtin_options_as_UnsortedSegmentProdOptions();
+}
+
+template <>
+inline const onert_tflite::UnsortedSegmentMaxOptions *
+Operator::builtin_options_as<onert_tflite::UnsortedSegmentMaxOptions>() const
+{
+  return builtin_options_as_UnsortedSegmentMaxOptions();
+}
+
+template <>
+inline const onert_tflite::UnsortedSegmentSumOptions *
+Operator::builtin_options_as<onert_tflite::UnsortedSegmentSumOptions>() const
+{
+  return builtin_options_as_UnsortedSegmentSumOptions();
+}
+
+template <>
+inline const onert_tflite::ATan2Options *
+Operator::builtin_options_as<onert_tflite::ATan2Options>() const
+{
+  return builtin_options_as_ATan2Options();
+}
+
 struct OperatorBuilder
 {
+  typedef Operator Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_opcode_index(uint32_t opcode_index)
@@ -8438,7 +10495,7 @@ struct OperatorBuilder
   {
     fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
   }
-  void add_builtin_options_type(BuiltinOptions builtin_options_type)
+  void add_builtin_options_type(onert_tflite::BuiltinOptions builtin_options_type)
   {
     fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
                              static_cast<uint8_t>(builtin_options_type), 0);
@@ -8451,13 +10508,13 @@ struct OperatorBuilder
   {
     fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
   }
-  void add_custom_options_format(CustomOptionsFormat custom_options_format)
+  void add_custom_options_format(onert_tflite::CustomOptionsFormat custom_options_format)
   {
     fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
                             static_cast<int8_t>(custom_options_format), 0);
   }
   void add_mutating_variable_inputs(
-      flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
   {
     fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
   }
@@ -8469,7 +10526,6 @@ struct OperatorBuilder
   {
     start_ = fbb_.StartTable();
   }
-  OperatorBuilder &operator=(const OperatorBuilder &);
   flatbuffers::Offset<Operator> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -8478,16 +10534,17 @@ struct OperatorBuilder
   }
 };
 
-inline flatbuffers::Offset<Operator>
-CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-               BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
-               flatbuffers::Offset<void> builtin_options = 0,
-               flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
-               CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
-               flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+inline flatbuffers::Offset<Operator> CreateOperator(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  onert_tflite::BuiltinOptions builtin_options_type = onert_tflite::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+  onert_tflite::CustomOptionsFormat custom_options_format =
+    onert_tflite::CustomOptionsFormat_FLEXBUFFERS,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
 {
   OperatorBuilder builder_(_fbb);
   builder_.add_intermediates(intermediates);
@@ -8502,28 +10559,32 @@ CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<Operator>
-CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
-                     const std::vector<int32_t> *inputs = nullptr,
-                     const std::vector<int32_t> *outputs = nullptr,
-                     BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
-                     flatbuffers::Offset<void> builtin_options = 0,
-                     const std::vector<uint8_t> *custom_options = nullptr,
-                     CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
-                     const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
-                     const std::vector<int32_t> *intermediates = nullptr)
-{
-  return onert_tflite::CreateOperator(
-      _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
-      custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
-      mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
-      intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+inline flatbuffers::Offset<Operator> CreateOperatorDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  onert_tflite::BuiltinOptions builtin_options_type = onert_tflite::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  const std::vector<uint8_t> *custom_options = nullptr,
+  onert_tflite::CustomOptionsFormat custom_options_format =
+    onert_tflite::CustomOptionsFormat_FLEXBUFFERS,
+  const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+  const std::vector<int32_t> *intermediates = nullptr)
+{
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
+  auto mutating_variable_inputs__ =
+    mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+  auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
+  return onert_tflite::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type,
+                                      builtin_options, custom_options__, custom_options_format,
+                                      mutating_variable_inputs__, intermediates__);
 }
 
 struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef SubGraphBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_TENSORS = 4,
     VT_INPUTS = 6,
@@ -8531,9 +10592,10 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_OPERATORS = 10,
     VT_NAME = 12
   };
-  const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>> *tensors() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>> *>(
+      VT_TENSORS);
   }
   const flatbuffers::Vector<int32_t> *inputs() const
   {
@@ -8543,9 +10605,10 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>> *operators() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>> *>(
+      VT_OPERATORS);
   }
   const flatbuffers::String *name() const
   {
@@ -8565,9 +10628,11 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct SubGraphBuilder
 {
+  typedef SubGraph Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors)
+  void add_tensors(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>>> tensors)
   {
     fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
   }
@@ -8579,8 +10644,8 @@ struct SubGraphBuilder
   {
     fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
   }
-  void
-  add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators)
+  void add_operators(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>>> operators)
   {
     fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
   }
@@ -8592,7 +10657,6 @@ struct SubGraphBuilder
   {
     start_ = fbb_.StartTable();
   }
-  SubGraphBuilder &operator=(const SubGraphBuilder &);
   flatbuffers::Offset<SubGraph> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -8602,12 +10666,13 @@ struct SubGraphBuilder
 };
 
 inline flatbuffers::Offset<SubGraph> CreateSubGraph(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
-    flatbuffers::Offset<flatbuffers::String> name = 0)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>>> tensors = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>>> operators =
+    0,
+  flatbuffers::Offset<flatbuffers::String> name = 0)
 {
   SubGraphBuilder builder_(_fbb);
   builder_.add_name(name);
@@ -8618,25 +10683,27 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph(
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<SubGraph>
-CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
-                     const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
-                     const std::vector<int32_t> *inputs = nullptr,
-                     const std::vector<int32_t> *outputs = nullptr,
-                     const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
-                     const char *name = nullptr)
+inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<onert_tflite::Tensor>> *tensors = nullptr,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::Operator>> *operators = nullptr,
+  const char *name = nullptr)
 {
-  return onert_tflite::CreateSubGraph(
-      _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
-      inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
-      operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
-      name ? _fbb.CreateString(name) : 0);
+  auto tensors__ =
+    tensors ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Tensor>>(*tensors) : 0;
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto operators__ =
+    operators ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Operator>>(*operators) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return onert_tflite::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__);
 }
 
 struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef BufferBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_DATA = 4
   };
@@ -8653,6 +10720,7 @@ struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct BufferBuilder
 {
+  typedef Buffer Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
@@ -8663,7 +10731,6 @@ struct BufferBuilder
   {
     start_ = fbb_.StartTable();
   }
-  BufferBuilder &operator=(const BufferBuilder &);
   flatbuffers::Offset<Buffer> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -8684,12 +10751,18 @@ CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
 inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
                                                       const std::vector<uint8_t> *data = nullptr)
 {
-  return onert_tflite::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0);
+  if (data)
+  {
+    _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16);
+  }
+  auto data__ = data ? _fbb.CreateVector<uint8_t>(*data) : 0;
+  return onert_tflite::CreateBuffer(_fbb, data__);
 }
 
 struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef MetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_NAME = 4,
     VT_BUFFER = 6
@@ -8709,6 +10782,7 @@ struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 
 struct MetadataBuilder
 {
+  typedef Metadata Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_name(flatbuffers::Offset<flatbuffers::String> name)
@@ -8720,7 +10794,6 @@ struct MetadataBuilder
   {
     start_ = fbb_.StartTable();
   }
-  MetadataBuilder &operator=(const MetadataBuilder &);
   flatbuffers::Offset<Metadata> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -8743,12 +10816,179 @@ inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBuffe
                                                           const char *name = nullptr,
                                                           uint32_t buffer = 0)
 {
-  return onert_tflite::CreateMetadata(_fbb, name ? _fbb.CreateString(name) : 0, buffer);
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return onert_tflite::CreateMetadata(_fbb, name__, buffer);
+}
+
+struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TensorMapBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_TENSOR_INDEX = 6
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) &&
+           verifier.EndTable();
+  }
+};
+
+struct TensorMapBuilder
+{
+  typedef TensorMap Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(TensorMap::VT_NAME, name);
+  }
+  void add_tensor_index(uint32_t tensor_index)
+  {
+    fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
+  }
+  explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TensorMap> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TensorMap>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb,
+                flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0)
+{
+  TensorMapBuilder builder_(_fbb);
+  builder_.add_tensor_index(tensor_index);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                                                            const char *name = nullptr,
+                                                            uint32_t tensor_index = 0)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return onert_tflite::CreateTensorMap(_fbb, name__, tensor_index);
+}
+
+struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SignatureDefBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INPUTS = 4,
+    VT_OUTPUTS = 6,
+    VT_SIGNATURE_KEY = 8,
+    VT_SUBGRAPH_INDEX = 12
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *inputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *>(
+      VT_INPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *outputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *>(
+      VT_OUTPUTS);
+  }
+  const flatbuffers::String *signature_key() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY);
+  }
+  uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) &&
+           verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+           verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) &&
+           verifier.VerifyString(signature_key()) &&
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable();
+  }
+};
+
+struct SignatureDefBuilder
+{
+  typedef SignatureDef Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_inputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> inputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
+  }
+  void add_outputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> outputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
+  }
+  void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key)
+  {
+    fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key);
+  }
+  void add_subgraph_index(uint32_t subgraph_index)
+  {
+    fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
+  }
+  explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SignatureDef> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SignatureDef>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> outputs =
+    0,
+  flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0)
+{
+  SignatureDefBuilder builder_(_fbb);
+  builder_.add_subgraph_index(subgraph_index);
+  builder_.add_signature_key(signature_key);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<onert_tflite::TensorMap>> *inputs = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::TensorMap>> *outputs = nullptr,
+  const char *signature_key = nullptr, uint32_t subgraph_index = 0)
+{
+  auto inputs__ =
+    inputs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::TensorMap>>(*inputs) : 0;
+  auto outputs__ =
+    outputs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::TensorMap>>(*outputs) : 0;
+  auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0;
+  return onert_tflite::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__,
+                                          subgraph_index);
 }
 
 struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
 {
-  enum
+  typedef ModelBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
   {
     VT_VERSION = 4,
     VT_OPERATOR_CODES = 6,
@@ -8756,33 +10996,42 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
     VT_DESCRIPTION = 10,
     VT_BUFFERS = 12,
     VT_METADATA_BUFFER = 14,
-    VT_METADATA = 16
+    VT_METADATA = 16,
+    VT_SIGNATURE_DEFS = 18
   };
   uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
-  const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *operator_codes() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
-        VT_OPERATOR_CODES);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *>(
+      VT_OPERATOR_CODES);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>> *subgraphs() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>> *>(
+      VT_SUBGRAPHS);
   }
   const flatbuffers::String *description() const
   {
     return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>> *buffers() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>> *>(
+      VT_BUFFERS);
   }
   const flatbuffers::Vector<int32_t> *metadata_buffer() const
   {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
   }
-  const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *metadata() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>> *metadata() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>> *>(
+      VT_METADATA);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *signature_defs() const
   {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *>(VT_METADATA);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *>(
+      VT_SIGNATURE_DEFS);
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -8795,22 +11044,26 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
            verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
            VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
            VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
-           verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable();
+           verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
+           verifier.VerifyVector(signature_defs()) &&
+           verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable();
   }
 };
 
 struct ModelBuilder
 {
+  typedef Model Table;
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
   void add_operator_codes(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>>>
+      operator_codes)
   {
     fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
   }
-  void
-  add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs)
+  void add_subgraphs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>>> subgraphs)
   {
     fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
   }
@@ -8818,7 +11071,8 @@ struct ModelBuilder
   {
     fbb_.AddOffset(Model::VT_DESCRIPTION, description);
   }
-  void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers)
+  void add_buffers(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>>> buffers)
   {
     fbb_.AddOffset(Model::VT_BUFFERS, buffers);
   }
@@ -8826,16 +11080,21 @@ struct ModelBuilder
   {
     fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
   }
-  void
-  add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata)
+  void add_metadata(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>>> metadata)
   {
     fbb_.AddOffset(Model::VT_METADATA, metadata);
   }
+  void add_signature_defs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>>>
+      signature_defs)
+  {
+    fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
+  }
   explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
-  ModelBuilder &operator=(const ModelBuilder &);
   flatbuffers::Offset<Model> Finish()
   {
     const auto end = fbb_.EndTable(start_);
@@ -8845,15 +11104,21 @@ struct ModelBuilder
 };
 
 inline flatbuffers::Offset<Model> CreateModel(
-    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
-    flatbuffers::Offset<flatbuffers::String> description = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>>>
+    operator_codes = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>>> subgraphs =
+    0,
+  flatbuffers::Offset<flatbuffers::String> description = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>>> buffers = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>>> metadata =
+    0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>>>
+    signature_defs = 0)
 {
   ModelBuilder builder_(_fbb);
+  builder_.add_signature_defs(signature_defs);
   builder_.add_metadata(metadata);
   builder_.add_metadata_buffer(metadata_buffer);
   builder_.add_buffers(buffers);
@@ -8864,23 +11129,34 @@ inline flatbuffers::Offset<Model> CreateModel(
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<Model>
-CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-                  const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
-                  const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
-                  const char *description = nullptr,
-                  const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr,
-                  const std::vector<int32_t> *metadata_buffer = nullptr,
-                  const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
-{
-  return onert_tflite::CreateModel(
-      _fbb, version,
-      operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
-      subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
-      description ? _fbb.CreateString(description) : 0,
-      buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
-      metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
-      metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+inline flatbuffers::Offset<Model> CreateModelDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  const std::vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *operator_codes = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::SubGraph>> *subgraphs = nullptr,
+  const char *description = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::Buffer>> *buffers = nullptr,
+  const std::vector<int32_t> *metadata_buffer = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::Metadata>> *metadata = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *signature_defs = nullptr)
+{
+  auto operator_codes__ =
+    operator_codes
+      ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::OperatorCode>>(*operator_codes)
+      : 0;
+  auto subgraphs__ =
+    subgraphs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::SubGraph>>(*subgraphs) : 0;
+  auto description__ = description ? _fbb.CreateString(description) : 0;
+  auto buffers__ =
+    buffers ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Buffer>>(*buffers) : 0;
+  auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
+  auto metadata__ =
+    metadata ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Metadata>>(*metadata) : 0;
+  auto signature_defs__ =
+    signature_defs
+      ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::SignatureDef>>(*signature_defs)
+      : 0;
+  return onert_tflite::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__,
+                                   buffers__, metadata_buffer__, metadata__, signature_defs__);
 }
 
 inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
@@ -8894,11 +11170,11 @@ inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const voi
     }
     case QuantizationDetails_CustomQuantization:
     {
-      auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::CustomQuantization *>(obj);
       return verifier.VerifyTable(ptr);
     }
     default:
-      return false;
+      return true;
   }
 }
 
@@ -8933,21 +11209,21 @@ inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void
     }
     case SparseIndexVector_Int32Vector:
     {
-      auto ptr = reinterpret_cast<const Int32Vector *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Int32Vector *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case SparseIndexVector_Uint16Vector:
     {
-      auto ptr = reinterpret_cast<const Uint16Vector *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Uint16Vector *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case SparseIndexVector_Uint8Vector:
     {
-      auto ptr = reinterpret_cast<const Uint8Vector *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Uint8Vector *>(obj);
       return verifier.VerifyTable(ptr);
     }
     default:
-      return false;
+      return true;
   }
 }
 
@@ -8981,511 +11257,611 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
     }
     case BuiltinOptions_Conv2DOptions:
     {
-      auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Conv2DOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DepthwiseConv2DOptions:
     {
-      auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DepthwiseConv2DOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ConcatEmbeddingsOptions:
     {
-      auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ConcatEmbeddingsOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LSHProjectionOptions:
     {
-      auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LSHProjectionOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_Pool2DOptions:
     {
-      auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Pool2DOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SVDFOptions:
     {
-      auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SVDFOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_RNNOptions:
     {
-      auto ptr = reinterpret_cast<const RNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::RNNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FullyConnectedOptions:
     {
-      auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FullyConnectedOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SoftmaxOptions:
     {
-      auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SoftmaxOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ConcatenationOptions:
     {
-      auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ConcatenationOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_AddOptions:
     {
-      auto ptr = reinterpret_cast<const AddOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::AddOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_L2NormOptions:
     {
-      auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::L2NormOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LocalResponseNormalizationOptions:
     {
-      auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LocalResponseNormalizationOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LSTMOptions:
     {
-      auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LSTMOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ResizeBilinearOptions:
     {
-      auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ResizeBilinearOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_CallOptions:
     {
-      auto ptr = reinterpret_cast<const CallOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::CallOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ReshapeOptions:
     {
-      auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ReshapeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SkipGramOptions:
     {
-      auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SkipGramOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SpaceToDepthOptions:
     {
-      auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SpaceToDepthOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_EmbeddingLookupSparseOptions:
     {
-      auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::EmbeddingLookupSparseOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MulOptions:
     {
-      auto ptr = reinterpret_cast<const MulOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MulOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_PadOptions:
     {
-      auto ptr = reinterpret_cast<const PadOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::PadOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_GatherOptions:
     {
-      auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::GatherOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BatchToSpaceNDOptions:
     {
-      auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::BatchToSpaceNDOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SpaceToBatchNDOptions:
     {
-      auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SpaceToBatchNDOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_TransposeOptions:
     {
-      auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::TransposeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ReducerOptions:
     {
-      auto ptr = reinterpret_cast<const ReducerOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ReducerOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SubOptions:
     {
-      auto ptr = reinterpret_cast<const SubOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SubOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DivOptions:
     {
-      auto ptr = reinterpret_cast<const DivOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DivOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SqueezeOptions:
     {
-      auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SqueezeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SequenceRNNOptions:
     {
-      auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SequenceRNNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_StridedSliceOptions:
     {
-      auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::StridedSliceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ExpOptions:
     {
-      auto ptr = reinterpret_cast<const ExpOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ExpOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_TopKV2Options:
     {
-      auto ptr = reinterpret_cast<const TopKV2Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::TopKV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SplitOptions:
     {
-      auto ptr = reinterpret_cast<const SplitOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SplitOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LogSoftmaxOptions:
     {
-      auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LogSoftmaxOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_CastOptions:
     {
-      auto ptr = reinterpret_cast<const CastOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::CastOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DequantizeOptions:
     {
-      auto ptr = reinterpret_cast<const DequantizeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DequantizeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MaximumMinimumOptions:
     {
-      auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MaximumMinimumOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ArgMaxOptions:
     {
-      auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ArgMaxOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LessOptions:
     {
-      auto ptr = reinterpret_cast<const LessOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LessOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_NegOptions:
     {
-      auto ptr = reinterpret_cast<const NegOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::NegOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_PadV2Options:
     {
-      auto ptr = reinterpret_cast<const PadV2Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::PadV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_GreaterOptions:
     {
-      auto ptr = reinterpret_cast<const GreaterOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::GreaterOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_GreaterEqualOptions:
     {
-      auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::GreaterEqualOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LessEqualOptions:
     {
-      auto ptr = reinterpret_cast<const LessEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LessEqualOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SelectOptions:
     {
-      auto ptr = reinterpret_cast<const SelectOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SelectOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SliceOptions:
     {
-      auto ptr = reinterpret_cast<const SliceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SliceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_TransposeConvOptions:
     {
-      auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::TransposeConvOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SparseToDenseOptions:
     {
-      auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SparseToDenseOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_TileOptions:
     {
-      auto ptr = reinterpret_cast<const TileOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::TileOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ExpandDimsOptions:
     {
-      auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ExpandDimsOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_EqualOptions:
     {
-      auto ptr = reinterpret_cast<const EqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::EqualOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_NotEqualOptions:
     {
-      auto ptr = reinterpret_cast<const NotEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::NotEqualOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ShapeOptions:
     {
-      auto ptr = reinterpret_cast<const ShapeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ShapeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_PowOptions:
     {
-      auto ptr = reinterpret_cast<const PowOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::PowOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ArgMinOptions:
     {
-      auto ptr = reinterpret_cast<const ArgMinOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ArgMinOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FakeQuantOptions:
     {
-      auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FakeQuantOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_PackOptions:
     {
-      auto ptr = reinterpret_cast<const PackOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::PackOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LogicalOrOptions:
     {
-      auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LogicalOrOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_OneHotOptions:
     {
-      auto ptr = reinterpret_cast<const OneHotOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::OneHotOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LogicalAndOptions:
     {
-      auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LogicalAndOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LogicalNotOptions:
     {
-      auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LogicalNotOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_UnpackOptions:
     {
-      auto ptr = reinterpret_cast<const UnpackOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::UnpackOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FloorDivOptions:
     {
-      auto ptr = reinterpret_cast<const FloorDivOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FloorDivOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SquareOptions:
     {
-      auto ptr = reinterpret_cast<const SquareOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SquareOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ZerosLikeOptions:
     {
-      auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ZerosLikeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FillOptions:
     {
-      auto ptr = reinterpret_cast<const FillOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FillOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BidirectionalSequenceLSTMOptions:
     {
-      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::BidirectionalSequenceLSTMOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BidirectionalSequenceRNNOptions:
     {
-      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::BidirectionalSequenceRNNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
     {
-      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::UnidirectionalSequenceLSTMOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_FloorModOptions:
     {
-      auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FloorModOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_RangeOptions:
     {
-      auto ptr = reinterpret_cast<const RangeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::RangeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ResizeNearestNeighborOptions:
     {
-      auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ResizeNearestNeighborOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LeakyReluOptions:
     {
-      auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LeakyReluOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SquaredDifferenceOptions:
     {
-      auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SquaredDifferenceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MirrorPadOptions:
     {
-      auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MirrorPadOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_AbsOptions:
     {
-      auto ptr = reinterpret_cast<const AbsOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::AbsOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SplitVOptions:
     {
-      auto ptr = reinterpret_cast<const SplitVOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SplitVOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_UniqueOptions:
     {
-      auto ptr = reinterpret_cast<const UniqueOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::UniqueOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ReverseV2Options:
     {
-      auto ptr = reinterpret_cast<const ReverseV2Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ReverseV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_AddNOptions:
     {
-      auto ptr = reinterpret_cast<const AddNOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::AddNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_GatherNdOptions:
     {
-      auto ptr = reinterpret_cast<const GatherNdOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::GatherNdOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_CosOptions:
     {
-      auto ptr = reinterpret_cast<const CosOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::CosOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_WhereOptions:
     {
-      auto ptr = reinterpret_cast<const WhereOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::WhereOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_RankOptions:
     {
-      auto ptr = reinterpret_cast<const RankOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::RankOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ReverseSequenceOptions:
     {
-      auto ptr = reinterpret_cast<const ReverseSequenceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ReverseSequenceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MatrixDiagOptions:
     {
-      auto ptr = reinterpret_cast<const MatrixDiagOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MatrixDiagOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_QuantizeOptions:
     {
-      auto ptr = reinterpret_cast<const QuantizeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::QuantizeOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_MatrixSetDiagOptions:
     {
-      auto ptr = reinterpret_cast<const MatrixSetDiagOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MatrixSetDiagOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_HardSwishOptions:
     {
-      auto ptr = reinterpret_cast<const HardSwishOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::HardSwishOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_IfOptions:
     {
-      auto ptr = reinterpret_cast<const IfOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::IfOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_WhileOptions:
     {
-      auto ptr = reinterpret_cast<const WhileOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::WhileOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DepthToSpaceOptions:
     {
-      auto ptr = reinterpret_cast<const DepthToSpaceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DepthToSpaceOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_NonMaxSuppressionV4Options:
     {
-      auto ptr = reinterpret_cast<const NonMaxSuppressionV4Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::NonMaxSuppressionV4Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_NonMaxSuppressionV5Options:
     {
-      auto ptr = reinterpret_cast<const NonMaxSuppressionV5Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::NonMaxSuppressionV5Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_ScatterNdOptions:
     {
-      auto ptr = reinterpret_cast<const ScatterNdOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ScatterNdOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SelectV2Options:
     {
-      auto ptr = reinterpret_cast<const SelectV2Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SelectV2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_DensifyOptions:
     {
-      auto ptr = reinterpret_cast<const DensifyOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DensifyOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_SegmentSumOptions:
     {
-      auto ptr = reinterpret_cast<const SegmentSumOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SegmentSumOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_BatchMatMulOptions:
     {
-      auto ptr = reinterpret_cast<const BatchMatMulOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::BatchMatMulOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CumsumOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::CumsumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::CallOnceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::BroadcastToOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::Rfft2dOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::Conv3DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::HashtableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::HashtableFindOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::HashtableImportOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::HashtableSizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::VarHandleOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::ReadVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::AssignVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::RandomOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BucketizeOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::BucketizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GeluOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::GeluOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DynamicUpdateSliceOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::DynamicUpdateSliceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentProdOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::UnsortedSegmentProdOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentMaxOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::UnsortedSegmentMaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentSumOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::UnsortedSegmentSumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ATan2Options:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::ATan2Options *>(obj);
       return verifier.VerifyTable(ptr);
     }
     default:
-      return false;
+      return true;
   }
 }
 
diff --git a/runtime/onert/frontend/tflite/tflite_schema.fbs b/runtime/onert/frontend/tflite/tflite_schema.fbs
index 9bffb4f3c..f7997528e 100644
--- a/runtime/onert/frontend/tflite/tflite_schema.fbs
+++ b/runtime/onert/frontend/tflite/tflite_schema.fbs
@@ -18,6 +18,10 @@
 // Version 1: Add subgraphs to schema.
 // Version 2: Rename operators to conform to NN API.
 // Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+//             version 3 and 3a.
 
 // Change namespace to onert_tflite
 namespace onert_tflite;
@@ -43,6 +47,15 @@ enum TensorType : byte {
   COMPLEX64 = 8,
   INT8 = 9,
   FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+  UINT16 = 16
 }
 
 // Custom quantization parameters for experimenting with new quantization
@@ -209,14 +222,18 @@ table Tensor {
   // Encodes `shape` with unknown dimensions. Unknown dimensions are
   // represented with -1.
   shape_signature:[int]; // Optional.
+
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
 }
 
 // A list of builtin operators. Builtin operators are slightly faster than custom
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
-
-enum BuiltinOperator : byte {
+// LINT.IfChange
+enum BuiltinOperator : int32 {
   ADD = 0,
   AVERAGE_POOL_2D = 1,
   CONCATENATION = 2,
@@ -249,7 +266,6 @@ enum BuiltinOperator : byte {
   SPACE_TO_DEPTH = 26,
   SVDF = 27,
   TANH = 28,
-  // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
   CONCAT_EMBEDDINGS = 29,
   SKIP_GRAM = 30,
   CALL = 31,
@@ -350,9 +366,39 @@ enum BuiltinOperator : byte {
   SELECT_V2 = 123,
   DENSIFY = 124,
   SEGMENT_SUM = 125,
-  BATCH_MATMUL = 126
-}
-
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+  DYNAMIC_UPDATE_SLICE = 151,
+  RELU_0_TO_1 = 152,
+  UNSORTED_SEGMENT_PROD = 153,
+  UNSORTED_SEGMENT_MAX = 154,
+  UNSORTED_SEGMENT_SUM = 155,
+  ATAN2 = 156
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
 
 // Options for the builtin operators.
 union BuiltinOptions {
@@ -456,11 +502,34 @@ union BuiltinOptions {
   SelectV2Options,
   DensifyOptions,
   SegmentSumOptions,
-  BatchMatMulOptions
-}
-
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+  DynamicUpdateSliceOptions,
+  UnsortedSegmentProdOptions,
+  UnsortedSegmentMaxOptions,
+  UnsortedSegmentSumOptions,
+  ATan2Options
+}
+
+// LINT.IfChange
 enum Padding : byte { SAME, VALID }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
+// LINT.IfChange
 enum ActivationFunctionType : byte {
   NONE = 0,
   RELU = 1,
@@ -469,6 +538,7 @@ enum ActivationFunctionType : byte {
   TANH = 4,
   SIGN_BIT = 5,
 }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
 table Conv2DOptions {
   padding:Padding;
@@ -479,6 +549,18 @@ table Conv2DOptions {
   dilation_h_factor:int = 1;
 }
 
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
 table Pool2DOptions {
   padding:Padding;
   stride_w:int;
@@ -548,10 +630,12 @@ table BidirectionalSequenceRNNOptions {
   asymmetric_quantize_inputs:bool;
 }
 
+// LINT.IfChange
 enum FullyConnectedOptionsWeightsFormat: byte {
   DEFAULT = 0,
   SHUFFLED4x16INT8 = 1,
 }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
 // An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
 table FullyConnectedOptions {
@@ -584,6 +668,8 @@ table ConcatenationOptions {
 
 table AddOptions {
   fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
 }
 
 table MulOptions {
@@ -591,6 +677,7 @@ table MulOptions {
 }
 
 table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
   fused_activation_function:ActivationFunctionType;
 }
 
@@ -601,12 +688,14 @@ table LocalResponseNormalizationOptions {
   beta:float;
 }
 
+// LINT.IfChange
 enum LSTMKernelType : byte {
   // Full LSTM kernel which supports peephole and projection.
   FULL = 0,
   // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
   BASIC = 1,
 }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
 // An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
 table LSTMOptions {
@@ -664,6 +753,7 @@ table ResizeBilinearOptions {
 
 table ResizeNearestNeighborOptions {
   align_corners: bool;
+  half_pixel_centers: bool;
 }
 
 // A call operation options
@@ -704,6 +794,8 @@ table DepthToSpaceOptions {
 
 table SubOptions {
   fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
 }
 
 table DivOptions {
@@ -725,6 +817,8 @@ table EmbeddingLookupSparseOptions {
 
 table GatherOptions {
   axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
 }
 
 table TransposeOptions {
@@ -901,12 +995,14 @@ table LeakyReluOptions {
 table SquaredDifferenceOptions {
 }
 
+// LINT.IfChange
 enum MirrorPadMode : byte {
   // Doesn't include borders.
   REFLECT = 0,
   // Includes borders.
   SYMMETRIC = 1,
 }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
 
 table MirrorPadOptions {
   mode:MirrorPadMode;
@@ -947,6 +1043,10 @@ table IfOptions {
   else_subgraph_index:int;
 }
 
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
 table WhileOptions {
   cond_subgraph_index:int;
   body_subgraph_index:int;
@@ -971,19 +1071,100 @@ table SegmentSumOptions {
 }
 
 table BatchMatMulOptions {
-  adjoint_lhs:bool;
-  adjoint_rhs:bool;
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
 }
 
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+table DynamicUpdateSliceOptions {
+}
+
+table UnsortedSegmentProdOptions {
+}
+
+table UnsortedSegmentMaxOptions {
+}
+
+table UnsortedSegmentSumOptions {
+}
+
+table ATan2Options {
+}
+
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
-  builtin_code:BuiltinOperator;
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
   custom_code:string;
 
   // The version of the operator. The version need to be bumped whenever new
   // parameters are introduced into an op.
   version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
 }
 
 enum CustomOptionsFormat : byte {
@@ -1062,6 +1243,35 @@ table Metadata {
   buffer:uint;
 }
 
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
 table Model {
   // Version of the schema.
   version:uint;
@@ -1090,6 +1300,9 @@ table Model {
 
   // Metadata about the model.
   metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
 }
 
 root_type Model;
diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt
new file mode 100644
index 000000000..8d9063f6c
--- /dev/null
+++ b/runtime/onert/frontend/trix/CMakeLists.txt
@@ -0,0 +1,21 @@
+if (NOT BUILD_TRIX_LOADER)
+  return()
+endif ()
+
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
+if(TRIXEngine_FOUND)
+  list(APPEND SOURCES src/trix_loader.cc)
+else()
+  list(APPEND SOURCES src/trix_loader_dummy.cc)
+endif(TRIXEngine_FOUND)
+
+add_library(trix_loader STATIC ${SOURCES})
+set_target_properties(trix_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(trix_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(trix_loader PRIVATE onert_core)
+target_link_libraries(trix_loader PRIVATE nnfw_common nnfw_coverage)
+
+if(TRIXEngine_FOUND)
+  target_include_directories(trix_loader PUBLIC ${TRIXEngine_INCLUDE_DIR})
+  target_link_libraries(trix_loader PRIVATE trix_engine)
+endif(TRIXEngine_FOUND)
diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h
new file mode 100644
index 000000000..26d6a3c56
--- /dev/null
+++ b/runtime/onert/frontend/trix/include/trix_loader.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TRIX_TRIX_LOADER_H__
+#define __TRIX_TRIX_LOADER_H__
+
+#include "ir/Graph.h"
+#include <memory>
+
+namespace onert
+{
+namespace trix_loader
+{
+/**
+ * @throw runtime_error when tvn path is wrong or tvn is invalid
+ */
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
+} // namespace trix_loader
+} // namespace onert
+
+#endif // __TRIX_TRIX_LOADER_H__
diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc
new file mode 100644
index 000000000..cdf239648
--- /dev/null
+++ b/runtime/onert/frontend/trix/src/trix_loader.cc
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trix_loader.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Bulk.h"
+
+#include <libnpuhost.h>
+#include <npubinfmt.h>
+#include <typedef.h>
+
+namespace onert
+{
+namespace trix_loader
+{
+
+/**
+ * @brief A tvn metadata reader
+ */
+class TrixMetaReader
+{
+public:
+  TrixMetaReader() = default;
+  ~TrixMetaReader() { free(_meta); }
+
+  void init(const char *path);
+  data_layout input_seg_layout(uint32_t n) const { return _meta->input_seg_layout[n]; }
+  data_layout output_seg_layout(uint32_t n) const { return _meta->output_seg_layout[n]; }
+  data_type input_seg_quant_type(uint32_t n) const { return _meta->input_seg_quant_type[n]; }
+  data_type output_seg_quant_type(uint32_t n) const { return _meta->output_seg_quant_type[n]; }
+  float input_seg_quant_scale(uint32_t n) const { return _meta->input_seg_quant_s[n]; }
+  float output_seg_quant_scale(uint32_t n) const { return _meta->output_seg_quant_s[n]; }
+  int32_t input_seg_quant_zp(uint32_t n) { return _meta->input_seg_quant_z[n]; }
+  int32_t output_seg_quant_zp(uint32_t n) { return _meta->output_seg_quant_z[n]; }
+  uint32_t input_seg_num() const { return _meta->input_seg_num; }
+  uint32_t output_seg_num() const { return _meta->output_seg_num; }
+  uint32_t input_seg_dims(uint32_t n, uint32_t axis) const
+  {
+    return _meta->input_seg_dims[n][axis];
+  }
+  uint32_t output_seg_dims(uint32_t n, uint32_t axis) const
+  {
+    return _meta->output_seg_dims[n][axis];
+  }
+
+private:
+  npubin_meta *_meta = nullptr;
+};
+
+void TrixMetaReader::init(const char *path)
+{
+  assert(path);
+  _meta = getNPUmodel_metadata(path, false);
+  if (_meta == nullptr)
+  {
+    throw std::runtime_error("Failed to get TRIX model metadata");
+  }
+  if (NPUBIN_VERSION(_meta->magiccode) != 3)
+  {
+    throw std::runtime_error("TRIX model metadata version mismatched.");
+  }
+}
+
+class TrixLoader
+{
+public:
+  /**
+   * @brief Construct a new Loader object
+   *
+   * @param model reference on model
+   */
+  explicit TrixLoader(std::unique_ptr<ir::Model> &model) : _model(model) {}
+
+  /**
+   * @brief Load a model from file
+   * @param file_path
+   */
+  void loadFromFile(const std::string &file_path);
+
+private:
+  /*
+   * @brief Load actually
+   * @throw runtime_error when tvn path is wrong or tvn is invalid
+   */
+  void loadModel();
+  std::unique_ptr<ir::Graph> loadSubgraph();
+  void loadOperands(ir::Graph &subg);
+  ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg);
+  ir::OperandIndex loadOperandFromOutput(uint32_t i, ir::Graph &subg);
+  void loadBulk(ir::Graph &subg);
+  void loadOperationIO(ir::OperandIndexSequence &inputs, ir::OperandIndexSequence &outputs);
+  ir::OperandIndex inputIdxToOperandIdx(uint32_t i) const;
+  ir::OperandIndex outputIdxToOperandIdx(uint32_t i) const;
+  ir::DataType toDataType(const data_type type) const;
+
+private:
+protected:
+  /** path to model (e.g. tvn) */
+  std::string _model_path;
+  /** original IO shapes */
+  std::vector<ir::Shape> _origin_input_shapes;
+  std::vector<ir::Shape> _origin_output_shapes;
+  /** Reference on loadable subgraphs */
+  std::unique_ptr<ir::Model> &_model;
+  TrixMetaReader _meta;
+};
+
+ir::DataType TrixLoader::toDataType(const data_type type) const
+{
+  switch (type)
+  {
+    case DATA_TYPE_QASYMM8:
+      return ir::DataType::QUANT_UINT8_ASYMM;
+    case DATA_TYPE_QSYMM16:
+      return ir::DataType::QUANT_INT16_SYMM;
+    default:
+      throw std::runtime_error("Unsupported data type from trix model");
+  }
+}
+
+ir::OperandIndex TrixLoader::inputIdxToOperandIdx(uint32_t i) const { return ir::OperandIndex(i); }
+ir::OperandIndex TrixLoader::outputIdxToOperandIdx(uint32_t i) const
+{
+  return ir::OperandIndex(_meta.input_seg_num() + i);
+}
+
+void TrixLoader::loadOperationIO(ir::OperandIndexSequence &inputs,
+                                 ir::OperandIndexSequence &outputs)
+{
+  for (uint32_t i = 0; i < _meta.input_seg_num(); ++i)
+  {
+    inputs.append(inputIdxToOperandIdx(i));
+  }
+
+  for (uint32_t i = 0; i < _meta.output_seg_num(); ++i)
+  {
+    outputs.append(outputIdxToOperandIdx(i));
+  }
+}
+
+void TrixLoader::loadBulk(ir::Graph &subg)
+{
+  ir::operation::Bulk::Param param;
+  param.binary_path = _model_path;
+  param.origin_input_shapes = _origin_input_shapes;
+  param.origin_output_shapes = _origin_output_shapes;
+
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(inputs, outputs);
+
+  std::unique_ptr<ir::operation::Bulk> bulk(new ir::operation::Bulk(inputs, outputs, param));
+  subg.addOperation(std::move(bulk));
+}
+
+ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg)
+{
+  // Shape
+  ir::Shape shape;
+  for (uint32_t d = 0; d < MAX_RANK; ++d)
+    shape.append(_meta.input_seg_dims(idx, d));
+
+  // TypeInfo
+  ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)),
+                         _meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx));
+
+  _origin_input_shapes.push_back(shape);
+  // Create operand
+  const auto operand_index = subg.addOperand(shape, type_info);
+  return operand_index;
+}
+
+ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg)
+{
+  // Shape
+  ir::Shape shape;
+  for (uint32_t d = 0; d < MAX_RANK; ++d)
+    shape.append(_meta.output_seg_dims(idx, d));
+
+  // TypeInfo
+  ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)),
+                         _meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx));
+
+  _origin_output_shapes.push_back(shape);
+  // Create operand
+  const auto operand_index = subg.addOperand(shape, type_info);
+  return operand_index;
+}
+
+void TrixLoader::loadOperands(ir::Graph &subg)
+{
+  auto in_num = _meta.input_seg_num();
+  for (uint32_t i = 0; i < in_num; ++i)
+  {
+    loadOperandFromInput(i, subg);
+  }
+  auto out_num = _meta.output_seg_num();
+  for (uint32_t i = 0; i < out_num; ++i)
+  {
+    loadOperandFromOutput(i, subg);
+  }
+}
+
+std::unique_ptr<ir::Graph> TrixLoader::loadSubgraph()
+{
+  auto subg = std::make_unique<ir::Graph>();
+  _meta.init(_model_path.c_str());
+
+  // Load tensors
+  loadOperands(*subg);
+
+  // Set inputs
+  for (uint32_t i = 0; i < _meta.input_seg_num(); ++i)
+  {
+    subg->addInput(inputIdxToOperandIdx(i), "tvn_input" + std::to_string(i));
+  }
+  // Set outputs
+  for (uint32_t i = 0; i < _meta.output_seg_num(); ++i)
+  {
+    subg->addOutput(outputIdxToOperandIdx(i), "tvn_out" + std::to_string(i));
+  }
+  // Create operations
+  loadBulk(*subg);
+
+  // TODO: NHWC only supported at this moment.
+  subg->setLayout(ir::Layout::NHWC);
+  subg->verify();
+  return subg;
+}
+
+void TrixLoader::loadModel()
+{
+  // one subgraph only
+  auto subg = loadSubgraph();
+  _model->push(ir::SubgraphIndex(0), std::move(subg));
+}
+
+void TrixLoader::loadFromFile(const std::string &file_path)
+{
+  // model path will be used to set Bulk param
+  _model_path = file_path;
+  // metadata is initialized from model path since it is loadFromFile
+  _meta.init(_model_path.c_str());
+  loadModel();
+}
+
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
+{
+  auto model = std::make_unique<ir::Model>();
+  TrixLoader loader(model);
+  loader.loadFromFile(filename);
+  return model;
+}
+} // namespace trix_loader
+} // namespace onert
diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
new file mode 100644
index 000000000..eecbd2217
--- /dev/null
+++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trix_loader.h"
+
+// Dummy implementation to avoid build error for target, which doesn't have trix_engine
+
+namespace onert
+{
+namespace trix_loader
+{
+std::unique_ptr<ir::Model> loadModel(const std::string &)
+{
+  auto model = std::make_unique<ir::Model>();
+  return model;
+}
+} // namespace trix_loader
+} // namespace onert
diff --git a/runtime/onert/odc/CMakeLists.txt b/runtime/onert/odc/CMakeLists.txt
new file mode 100644
index 000000000..e48878dc3
--- /dev/null
+++ b/runtime/onert/odc/CMakeLists.txt
@@ -0,0 +1,39 @@
+# Luci library is not supported is on cross build
+if(CMAKE_CROSSCOMPILING)
+  return()
+endif()
+
+nnfw_find_package(Luci QUIET)
+if(NOT Luci_FOUND)
+  message(STATUS "Luci not found. Skip onert_odc")
+  return()
+endif()
+
+file(GLOB_RECURSE SOURCES "*.cc")
+file(GLOB_RECURSE TESTS "*.test.cc")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(onert_odc SHARED ${SOURCES})
+target_link_libraries(onert_odc PRIVATE onert_core luci::import luci::export luci::pass luci::loco)
+target_link_libraries(onert_odc PRIVATE nnfw_common)
+target_link_libraries(onert_odc PRIVATE nnfw_coverage)
+
+install(TARGETS onert_odc LIBRARY DESTINATION lib/nnfw/odc)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# Unit Tests
+set(TEST_ONERT_ODC test_onert_odc)
+
+add_executable(${TEST_ONERT_ODC} ${TESTS})
+
+target_link_libraries(${TEST_ONERT_ODC} onert_odc)
+# Requires linking nnfw_coverage: check header coverage
+target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage)
+target_link_libraries(${TEST_ONERT_ODC} gtest gtest_main dl ${LIB_PTHREAD})
+target_include_directories(${TEST_ONERT_ODC} PRIVATE $<TARGET_PROPERTY:onert_odc,INCLUDE_DIRECTORIES>)
+
+add_test(${TEST_ONERT_ODC} ${TEST_ONERT_ODC})
+install(TARGETS ${TEST_ONERT_ODC} DESTINATION unittest)
diff --git a/runtime/onert/odc/Quantizer.cc b/runtime/onert/odc/Quantizer.cc
new file mode 100644
index 000000000..b8aec97ce
--- /dev/null
+++ b/runtime/onert/odc/Quantizer.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Quantizer.h"
+
+#include <luci/ImporterEx.h>
+#include <luci/CircleQuantizer.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+
+#include <iostream>
+
+extern "C" onert::odc::IQuantizer *create_quantizer() { return new onert::odc::Quantizer(); }
+extern "C" void destroy_quantizer(onert::odc::IQuantizer *quantizer) { delete quantizer; }
+
+namespace onert
+{
+namespace odc
+{
+
+int Quantizer::quantize(const char *in, const char *out, bool is_q16)
+{
+  // Load model from the file
+  luci::ImporterEx importerex;
+  auto module = importerex.importVerifyModule(std::string(in));
+  if (module.get() == nullptr)
+    return 1;
+
+  luci::CircleQuantizer quantizer;
+  auto options = quantizer.options();
+  {
+    options->enable(luci::CircleQuantizer::Options::Algorithm::QuantizeWeights);
+
+    using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+    options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+    options->param(AlgorithmParameters::Quantize_output_model_dtype, is_q16 ? "int16" : "int8");
+    options->param(AlgorithmParameters::Quantize_granularity, "channel");
+  }
+
+  for (size_t idx = 0; idx < module->size(); ++idx)
+  {
+    auto graph = module->graph(idx);
+
+    // quantize the graph
+    quantizer.quantize(graph);
+
+    // Skip validate
+    // TODO Validate if needed
+#if 0
+    if (!luci::validate(graph))
+    {
+      std::cerr << "ERROR: Quantized graph is invalid" << std::endl;
+      return 1;
+    }
+#endif
+  }
+
+  // Export to output Circle file
+  luci::CircleExporter exporter;
+  luci::CircleFileExpContract contract(module.get(), std::string(out));
+
+  if (!exporter.invoke(&contract))
+    return 1;
+
+  // Return 0 when luci::CircleQuantizer::Options::Algorithm::QuantizeWeights is ready
+  return 0;
+}
+
+} // namespace odc
+} // namespace onert
diff --git a/runtime/onert/odc/Quantizer.h b/runtime/onert/odc/Quantizer.h
new file mode 100644
index 000000000..8a03f59d5
--- /dev/null
+++ b/runtime/onert/odc/Quantizer.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_ODC_QUANTIZE_H__
+#define __ONERT_ODC_QUANTIZE_H__
+
+#include "odc/IQuantizer.h"
+
+namespace onert
+{
+namespace odc
+{
+
+class Quantizer : public IQuantizer
+{
+public:
+  Quantizer() = default;
+  ~Quantizer() = default;
+
+  int quantize(const char *in, const char *out, bool is_q16);
+};
+
+} // namespace odc
+} // namespace onert
+
+#endif // __ONERT_ODC_QUANTIZE_H__
diff --git a/runtime/onert/odc/Quantizer.test.cc b/runtime/onert/odc/Quantizer.test.cc
new file mode 100644
index 000000000..22baed576
--- /dev/null
+++ b/runtime/onert/odc/Quantizer.test.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Quantizer.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::odc;
+
+// Test model input path is not set
+TEST(odc_Quantizer, neg_model_input_path)
+{
+  Quantizer quantizer;
+  ASSERT_THROW(quantizer.quantize(nullptr, "out", false), std::logic_error);
+}
+
+// Test model output path is not set
+TEST(odc_Quantizer, neg_model_output_path)
+{
+  Quantizer quantizer;
+  ASSERT_NE(quantizer.quantize("in", nullptr, false), 0);
+}
+
+// Test invalid model input path
+TEST(odc_Quantizer, neg_invalid_model_input_path)
+{
+  Quantizer quantizer;
+  ASSERT_NE(quantizer.quantize("invalid_model_input_path.circle", "out", false), 0);
+}
diff --git a/runtime/onert/test/CMakeLists.txt b/runtime/onert/test/CMakeLists.txt
deleted file mode 100644
index 38899976d..000000000
--- a/runtime/onert/test/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-set(TEST_ONERT test_onert)
-
-file(GLOB_RECURSE TESTS "*.cc")
-
-add_executable(${TEST_ONERT} ${TESTS})
-
-target_include_directories(${TEST_ONERT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src)
-
-target_link_libraries(${TEST_ONERT} onert_core)
-target_link_libraries(${TEST_ONERT} gtest)
-target_link_libraries(${TEST_ONERT} gtest_main)
-target_link_libraries(${TEST_ONERT} ${LIB_PTHREAD} dl)
-add_test(${TEST_ONERT} ${TEST_ONERT})
-
-install(TARGETS ${TEST_ONERT} DESTINATION unittest_standalone)
diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/Scheduler.cc
deleted file mode 100644
index 50f3964db..000000000
--- a/runtime/onert/test/core/compiler/Scheduler.cc
+++ /dev/null
@@ -1,587 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <compiler/HEScheduler.h>
-#include <exec/ExecTime.h>
-
-#include <ir/Shape.h>
-#include <ir/InternalType.h>
-#include <ir/TypeInfo.h>
-#include <ir/DataType.h>
-
-#include <ir/operation/BinaryArithmetic.h>
-#include <ir/operation/FullyConnected.h>
-
-#include <gtest/gtest.h>
-
-namespace
-{
-using namespace onert;
-using namespace ir;
-using namespace backend;
-using namespace operation;
-using namespace exec;
-
-//
-// Mock backends classes
-//
-
-struct MockConfigCPU : public IConfig
-{
-  std::string id() override { return "cpu"; }
-  bool initialize() override { return true; };
-  bool supportPermutation() override { return false; }
-  Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
-  bool supportDynamicTensor() override { return false; }
-  bool supportFP16() override { return false; }
-};
-
-struct MockBackendCPU : public Backend
-{
-  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
-  std::unique_ptr<BackendContext>
-  newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
-  {
-    return std::unique_ptr<BackendContext>(
-        new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
-  }
-};
-
-struct MockConfigGPU : public IConfig
-{
-  std::string id() override { return "gpu"; }
-  bool initialize() override { return true; };
-  bool supportPermutation() override { return false; }
-  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
-  {
-    return ir::Layout::UNKNOWN;
-  }
-  bool supportDynamicTensor() override { return false; }
-  bool supportFP16() override { return false; }
-};
-
-struct MockBackendGPU : public Backend
-{
-  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
-  std::unique_ptr<BackendContext>
-  newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
-  {
-    return std::unique_ptr<BackendContext>(
-        new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
-  }
-};
-
-struct MockConfigNPU : public IConfig
-{
-  std::string id() override { return "npu"; }
-  bool initialize() override { return true; };
-  bool supportPermutation() override { return false; }
-  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
-  {
-    return ir::Layout::UNKNOWN;
-  }
-  bool supportDynamicTensor() override { return false; }
-  bool supportFP16() override { return false; }
-};
-
-struct MockBackendNPU : public Backend
-{
-  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
-  std::unique_ptr<BackendContext>
-  newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
-  {
-    return std::unique_ptr<BackendContext>(
-        new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
-  }
-};
-
-//
-// Constants
-//
-
-const int OPERAND_ELEMS = 268203;
-const int OPERAND_SIZE = OPERAND_ELEMS * 4;
-const int OPERATION_SIZE = OPERAND_SIZE * 3;
-
-const std::string LINEAR("Linear");
-const std::string DATAFLOW("Dataflow");
-const std::string PARALLEL("Parallel");
-
-//
-// Helper functions
-//
-
-// Set executor through environment variable
-void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
-
-// Set profiling mode through environment variable
-void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
-
-// Calculate operation size by addition sizes of all input and output operands
-uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
-{
-  uint32_t size = 0;
-  const auto &op = graph->operations().at(op_idx);
-  for (const auto &ind : op.getInputs() + op.getOutputs())
-    size += graph->operands().at(ind).info().total_size();
-  return size;
-}
-
-// Set execution operation time. This method is needed since ExecutionTime has only
-// 'updateOperationExecTime' method.
-void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
-                          bool quant, uint32_t op_size, int64_t time)
-{
-  // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
-  assert(time > 0);
-  int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
-  int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
-  et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
-  assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
-}
-
-// Set same execution time for all given backends/operations
-void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
-                                const std::vector<std::string> &op_names,
-                                const std::vector<uint32_t> &op_sizes, int64_t exec_time)
-{
-  assert(op_names.size() == op_sizes.size());
-  ExecTime et(backends);
-  for (int i = 0; i < op_names.size(); ++i)
-  {
-    for (auto &backend : backends)
-      setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
-  }
-  et.uploadOperationsExecTime();
-}
-
-// Set permute time from one backend to another. This method is needed since ExecutionTime has only
-// 'updatePermuteTime' method.
-void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
-                        bool quant, uint32_t op_size, int64_t time)
-{
-  // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
-  assert(time > 0);
-  int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
-  int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
-  et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
-  assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
-}
-
-// Set same permutation time between all given backends
-void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
-                                  const int operand_size, const int64_t exec_time)
-{
-  ExecTime et(backends);
-  for (const auto &backend : backends)
-  {
-    for (auto &other_backend : backends)
-    {
-      if (backend == other_backend)
-        continue;
-      setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
-    }
-  }
-  et.uploadOperationsExecTime();
-}
-
-//
-// Functions for creating graphs
-//
-
-using OIS = OperandIndexSequence;
-
-template <typename NodeT, typename... Types>
-OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
-{
-  auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
-  auto op_idx = graph->addOperation(std::move(op));
-  // For now in scheduler test all operations in tested graphs has same size (for simplicity)
-  assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
-  return op_idx;
-}
-
-// Create straight graph: Add->Sub->Mul
-std::shared_ptr<Graph> createStraightGraph()
-{
-  auto graph = std::make_shared<Graph>();
-  const TypeInfo float_op(DataType::FLOAT32);
-
-  // Create add node
-  auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
-
-  // Create sub node
-  auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
-
-  // Create mul node
-  auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
-
-  graph->finishBuilding();
-  return graph;
-}
-
-/* Create branched graph:
- *       [Add]
- *      //   \\
- *   [Mul1]  [FC2]
- *     ||     ||
- *   [Mul2]  [FC2]
- *      \\   //
- *       [Sub]
- */
-std::shared_ptr<Graph> createBranchedGraph()
-{
-  auto graph = std::make_shared<Graph>();
-  const TypeInfo float_op(DataType::FLOAT32);
-
-  // Create add node
-  auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
-
-  // Create mul1 node
-  auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
-                           mul1_op_params);
-
-  // Create mul2 node
-  auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
-                           mul2_op_params);
-
-  // Create fc1 node
-  auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  FullyConnected::Param fc1_op_params{Activation::NONE};
-  create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
-
-  // Create fc2 node
-  auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  FullyConnected::Param fc2_op_params{Activation::NONE};
-  create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
-
-  // Create sub node
-  auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
-
-  graph->finishBuilding();
-  return graph;
-}
-
-//
-// Tests setup/teardown
-//
-
-// SetUp/TearDown methods runs before/after each test and performs actions common for each test
-class SchedulerTest : public ::testing::Test
-{
-protected:
-  void SetUp() override
-  {
-    // Initialize mock backends
-    _cpu_backend = new MockBackendCPU();
-    _gpu_backend = new MockBackendGPU();
-    _npu_backend = new MockBackendNPU();
-    _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
-
-    // Remove previous profile data if it exists
-    if (!remove("exec_time.json"))
-    {
-      // DO NOTHING (no profile data)
-    }
-
-    // Remember original value of 'EXECUTOR' environment variable
-    char *executor = std::getenv("EXECUTOR");
-    _original_executor = executor == nullptr ? "" : executor;
-
-    // Remember original value of 'PROFILING_MODE' environment variable
-    char *profiling_mode = std::getenv("PROFILING_MODE");
-    _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
-  }
-
-  void TearDown() override
-  {
-    delete _cpu_backend;
-    delete _gpu_backend;
-    delete _npu_backend;
-    EXPECT_EQ(remove("exec_time.json"), 0);
-    setenv("EXECUTOR", _original_executor.c_str(), true);
-    setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
-  }
-
-  backend::BackendContexts buildBackendContexts(const Graph &graph)
-  {
-    backend::BackendContexts contexts;
-    for (auto backend : _mock_backends)
-    {
-      contexts.emplace(backend, backend->newContext(graph, nullptr, false));
-    }
-    return contexts;
-  }
-
-  const MockBackendCPU *_cpu_backend{nullptr};
-  const MockBackendGPU *_gpu_backend{nullptr};
-  const MockBackendNPU *_npu_backend{nullptr};
-  std::vector<const Backend *> _mock_backends;
-
-  std::string _original_executor;
-  std::string _original_profiling_mode;
-};
-
-class SchedulerTestWithExecutorParam : public SchedulerTest,
-                                       public testing::WithParamInterface<std::string>
-{
-};
-
-//
-// HEScheduler tests
-//
-
-// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
-TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
-{
-  setExecutor(GetParam());
-
-  // Prepare graph
-  ir::Subgraphs subgs;
-  auto graph(createStraightGraph());
-  subgs.push(ir::SubgraphIndex{0}, graph);
-  OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
-
-  // Set default execution and transfer time
-  setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
-  setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
-                             {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
-
-  // Test 1
-  // Expected behaviour: scheduler assigns different backend to each node
-  {
-    // For each backend reduce execution time of one node
-    ExecTime et(_mock_backends);
-    setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
-    setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
-    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
-    et.uploadOperationsExecTime();
-
-    // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
-    ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
-  }
-
-  // Test 2
-  // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
-  {
-    // Increase transfer time
-    setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
-
-    // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
-    ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
-  }
-}
-
-// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
-TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
-{
-  const int64_t NPU_ET = 5000;
-  setExecutor(GetParam());
-
-  // Prepare graph
-  ir::Subgraphs subgs;
-  auto graph(createBranchedGraph());
-  subgs.push(ir::SubgraphIndex{0}, graph);
-  OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
-      sub_op_idx(5);
-
-  // Set default execution and transfer time
-  setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
-  setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
-                             {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
-
-  // Test 1
-  // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
-  // nodes, in case of parallel executor scheduler assigns different backends to branches.
-  {
-    // Reduce execution time
-    ExecTime et(_mock_backends);
-    setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
-    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
-    setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
-    setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
-    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
-    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
-    et.uploadOperationsExecTime();
-
-    // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-
-    std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
-    if (GetParam() == PARALLEL)
-    {
-      branch1_expected_backend =
-          br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
-      branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
-    }
-
-    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
-    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
-    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
-    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
-  }
-
-  // Test 2
-  // Expected behaviour: scheduler assigns single backend to all nodes
-  {
-    // Increase execution time for GPU backend
-    ExecTime et(_mock_backends);
-    /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
-     * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
-     * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
-     * branching or scheduler assigns another backend to a node*/
-    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
-    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
-    et.uploadOperationsExecTime();
-
-    // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
-  }
-}
-
-// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
-// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam,
-                        testing::Values(LINEAR, DATAFLOW, PARALLEL));
-
-// Test scheduler behavior for branched graph and enabled profiling mode
-TEST_F(SchedulerTest, branched_graph_profiling_mode)
-{
-  const int ET = 1e5;
-
-  // Turn on profiling mode
-  setProfilingMode(true);
-  setExecutor(DATAFLOW);
-
-  // Prepare graph
-  ir::Subgraphs subgs;
-  auto graph(createBranchedGraph());
-  subgs.push(ir::SubgraphIndex{0}, graph);
-  OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
-      sub_op_idx(5);
-
-  // Test 1
-  // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
-  {
-    // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
-    ExecTime et(_mock_backends);
-    setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
-    setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
-    setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
-    et.uploadOperationsExecTime();
-
-    // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
-    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
-  }
-
-  // Test 2
-  // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
-  // neighbor nodes
-  {
-    // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
-    ExecTime et(_mock_backends);
-    setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
-    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
-    et.uploadOperationsExecTime();
-
-    // Test scheduler
-    auto backend_contexts = buildBackendContexts(*graph);
-    auto scheduler = compiler::HEScheduler(backend_contexts,
-                                           compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
-              br->getBackend(mul1_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
-              br->getBackend(fc1_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
-              br->getBackend(mul2_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
-              br->getBackend(fc2_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
-              br->getBackend(sub_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
-              br->getBackend(sub_op_idx)->config()->id());
-  }
-}
-
-// TODO: Add tests with unknown execution and permutation time
-
-} // unnamed namespace
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
deleted file mode 100644
index 806b47ecc..000000000
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <thread>
-
-#include "ir/Graph.h"
-#include "compiler/Compiler.h"
-#include "exec/Execution.h"
-#include "ir/operation/BinaryArithmetic.h"
-
-namespace
-{
-
-using namespace onert::ir;
-
-class CompiledMockUpModel
-{
-public:
-  CompiledMockUpModel()
-  {
-    // Model: two elementwise add operation
-    // model input: lhs, rhs1
-    // model output: second add result (result2)
-    // constant: rhs2
-    // result1 <= (lhs + rhs)
-    // result2 <= (result1 + rhs2)
-    // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
-    // activation: none (constant)
-    graph = std::make_shared<Graph>();
-    // 1st add operands (result1 <= lhs + rhs1)
-    Shape shape{1, 2, 2, 1};
-    TypeInfo type{DataType::FLOAT32};
-    static float rhs2_data[4] = {3, 1, -1, 5};
-    auto operand_lhs = graph->addOperand(shape, type);
-    auto operand_rhs1 = graph->addOperand(shape, type);
-    auto operand_result1 = graph->addOperand(shape, type);
-    auto operand_rhs2 = graph->addOperand(shape, type);
-    auto operand_result2 = graph->addOperand(shape, type);
-    graph->operands()
-        .at(operand_rhs2)
-        .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
-    // 2nd add operations (result2 <= result1 + rhs2)
-    operation::BinaryArithmetic::Param param1;
-    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param1.activation = Activation::NONE;
-    auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
-    auto output_set1 = OperandIndexSequence{operand_result1};
-    graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
-    operation::BinaryArithmetic::Param param2;
-    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param2.activation = Activation::NONE;
-    auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
-    auto output_set2 = OperandIndexSequence{operand_result2};
-    graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
-    // Identify model inputs and outputs
-    graph->addInput(operand_lhs);
-    graph->addInput(operand_rhs1);
-    graph->addOutput(operand_result2);
-    graph->finishBuilding();
-
-    // Compile
-    auto subgs = std::make_shared<onert::ir::Subgraphs>();
-    subgs->push(onert::ir::SubgraphIndex{0}, graph);
-    onert::compiler::Compiler compiler{subgs};
-    executors = compiler.compile();
-  }
-
-public:
-  std::shared_ptr<Graph> graph;
-  std::shared_ptr<onert::exec::ExecutorMap> executors;
-};
-
-TEST(ExecInstance, simple)
-{
-  auto mockup = CompiledMockUpModel();
-  auto graph = mockup.graph;
-  auto executors = mockup.executors;
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto output = IOIndex{0};
-
-  const float input1_buffer[4] = {1, 0, -1, -2};
-  const float input2_buffer[4] = {1, -3, 2, -4};
-  float output_buffer[4] = {};
-  const float output_expected[4] = {5, -2, 0, -1};
-
-  onert::exec::Execution execution{executors};
-
-  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
-  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
-  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
-  execution.execute();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(output_buffer[i], output_expected[i]);
-  }
-}
-
-TEST(ExecInstance, twoCompile)
-{
-  auto mockup = CompiledMockUpModel();
-  auto graph = mockup.graph;
-  auto executors1 = mockup.executors;
-  onert::exec::Execution execution1{executors1};
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto output = IOIndex{0};
-
-  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
-  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
-  float exe1_output_buffer[4] = {};
-  const float exe1_output_expected[4] = {5, -2, 0, -1};
-
-  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
-  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
-  execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
-
-  // Make new executor: compile again
-  auto subgs = std::make_shared<onert::ir::Subgraphs>();
-  subgs->push(onert::ir::SubgraphIndex{0}, graph);
-  onert::compiler::Compiler compiler{subgs};
-  std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
-  onert::exec::Execution execution2{executors2};
-
-  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
-  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
-  float exe2_output_buffer[4] = {};
-  const float exe2_output_expected[4] = {2, 5, -2, 7};
-
-  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
-  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
-  execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
-
-  execution1.execute();
-  execution2.execute();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
-    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
-  }
-}
-
-// Support two initialized execution instance then ordered execution
-TEST(ExecInstance, twoExecution)
-{
-  auto mockup = CompiledMockUpModel();
-  auto executors = mockup.executors;
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto output1 = IOIndex{0};
-
-  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
-  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
-  float exe1_output_buffer[4] = {};
-  const float exe1_output_expected[4] = {5, -2, 0, -1};
-  const float exe2_output_expected[4] = {2, 5, -2, 7};
-
-  onert::exec::Execution execution1{executors};
-  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
-  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
-  execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
-
-  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
-  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
-  float exe2_output_buffer[4] = {};
-
-  // Make new execution
-  onert::exec::Execution execution2{executors};
-  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
-  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
-  execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
-
-  execution1.execute();
-  execution2.execute();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
-    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
-  }
-}
-
-class Inference
-{
-public:
-  Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
-            std::shared_ptr<onert::exec::ExecutorMap> &executors)
-      : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
-  {
-    // DO NOTHING
-  }
-
-  void inference(void)
-  {
-    auto input1 = IOIndex{0};
-    auto input2 = IOIndex{1};
-    auto output1 = IOIndex{0};
-
-    onert::exec::Execution execution{_executors};
-    execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
-    execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
-    execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
-
-    execution.execute();
-  }
-
-private:
-  const float (&_input1)[4];
-  const float (&_input2)[4];
-  float (&_output)[4];
-  std::shared_ptr<onert::exec::ExecutorMap> &_executors;
-};
-
-// Support multi-thread execution
-TEST(ExecInstance, twoThreads)
-{
-  auto mockup = CompiledMockUpModel();
-  auto executors = mockup.executors;
-
-  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
-  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
-  float exe1_output_buffer[4] = {};
-  const float exe1_output_expected[4] = {5, -2, 0, -1};
-
-  Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors};
-
-  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
-  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
-  float exe2_output_buffer[4] = {};
-  const float exe2_output_expected[4] = {2, 5, -2, 7};
-
-  Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors};
-
-  std::thread t1{&Inference::inference, &execution1};
-  std::thread t2{&Inference::inference, &execution2};
-
-  t1.join();
-  t2.join();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
-    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
-  }
-}
-
-// Support asynchronous execution
-TEST(ExecInstance, async)
-{
-  auto mockup = CompiledMockUpModel();
-  auto graph = mockup.graph;
-  auto executors = mockup.executors;
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto output = IOIndex{0};
-
-  const float input1_buffer[4] = {1, 0, -1, -2};
-  const float input2_buffer[4] = {1, -3, 2, -4};
-  float output_buffer[4] = {};
-  const float output_expected[4] = {5, -2, 0, -1};
-
-  onert::exec::Execution execution{executors};
-
-  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
-  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
-  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
-  execution.startExecute();
-  execution.waitFinish();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(output_buffer[i], output_expected[i]);
-  }
-}
-
-} // namespace
diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc
deleted file mode 100644
index 8c2e34df8..000000000
--- a/runtime/onert/test/core/exec/ExecTime.test.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "exec/ExecTime.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
-#include <gtest/gtest.h>
-#include <string>
-
-namespace
-{
-using namespace onert;
-using namespace exec;
-using namespace backend;
-
-struct MockConfig : public IConfig
-{
-  std::string id() override { return "b1"; }
-  bool initialize() override { return true; };
-  bool supportPermutation() override { return false; }
-  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
-  {
-    return ir::Layout::UNKNOWN;
-  }
-  bool supportDynamicTensor() override { return false; }
-  bool supportFP16() override { return false; }
-};
-
-struct MockBackend : public ::onert::backend::Backend
-{
-  std::shared_ptr<onert::backend::IConfig> config() const override
-  {
-    return std::make_shared<MockConfig>();
-  }
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &,
-                                             const std::shared_ptr<custom::IKernelBuilder> &kb,
-                                             bool) const override
-  {
-    return nullptr;
-  }
-};
-
-TEST(ExecTime, roundtrip_ok)
-{
-  const auto *b = new MockBackend();
-  std::vector<const Backend *> bs = {b};
-  {
-    ExecTime et(bs);
-    et.updateOperationExecTime(b, "op1", true, 100, 100);
-    et.updateOperationExecTime(b, "op1", true, 200, 200);
-    et.updateOperationExecTime(b, "op1", false, 100, 888);
-    et.uploadOperationsExecTime();
-  }
-  {
-    ExecTime et(bs);
-    auto time = et.getOperationExecTime(b, "op1", true, 100);
-    ASSERT_EQ(time, 100);
-    // Check interpolation
-    time = et.getOperationExecTime(b, "op1", true, 150);
-    ASSERT_EQ(time, 150);
-    time = et.getOperationExecTime(b, "op1", false, 100);
-    ASSERT_EQ(time, 888);
-    et.uploadOperationsExecTime();
-  }
-  // clean up
-  EXPECT_EQ(remove("exec_time.json"), 0);
-}
-
-TEST(ExecTime, structure)
-{
-
-  const auto *b = new MockBackend();
-  std::vector<const Backend *> bs = {b};
-  {
-    ExecTime et(bs);
-    et.updateOperationExecTime(b, "op1", true, 100, 100);
-    et.updateOperationExecTime(b, "op1", true, 200, 200);
-    et.uploadOperationsExecTime();
-  }
-  {
-    ExecTime et(bs);
-    auto time = et.getOperationExecTime(b, "op1", true, 100);
-    ASSERT_EQ(time, 100);
-    // Check interpolation
-    time = et.getOperationExecTime(b, "op1", true, 200);
-    ASSERT_EQ(time, 200);
-    et.uploadOperationsExecTime();
-  }
-  // clean up
-  EXPECT_EQ(remove("exec_time.json"), 0);
-}
-} // unnamed namespace
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
deleted file mode 100644
index 09190bc58..000000000
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "ir/Graph.h"
-#include "interp/InterpExecutor.h"
-#include "exec/Execution.h"
-#include "ir/operation/BinaryArithmetic.h"
-
-namespace
-{
-
-using namespace onert::ir;
-using InterpExecutor = onert::interp::InterpExecutor;
-using Execution = onert::exec::Execution;
-using ExecutorMap = onert::exec::ExecutorMap;
-
-class InterpExecutorTest : public ::testing::Test
-{
-protected:
-  virtual void SetUp() {}
-  void CreateSimpleModel()
-  {
-    // Model: one elementwise add operation
-    // model input: lhs, rhs
-    // model output: add result
-    // lhs, rhs, result shape: {1, 2, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // Add operands
-
-    Shape shape{1, 2, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs = _graph->addOperand(shape, type);
-    auto operand_result = _graph->addOperand(shape, type);
-
-    // Add operations
-
-    operation::BinaryArithmetic::Param param;
-    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param.activation = Activation::NONE;
-    auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
-    auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs);
-    _graph->getOutputs().append(operand_result);
-
-    _graph->finishBuilding();
-
-    auto subgs = std::make_shared<onert::ir::Subgraphs>();
-    subgs->push(onert::ir::SubgraphIndex{0}, _graph);
-    _graph->setSubgraphs(subgs);
-
-    _executors = std::make_shared<ExecutorMap>();
-    _executors->insert(
-        std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
-  }
-
-  void CreateTwoStepModel()
-  {
-    // Model: two elementwise add operation
-    // model input: lhs, rhs1
-    // model output: second add result (result2)
-    // constant: rhs2
-    // result1 <= (lhs + rhs)
-    // result2 <= (result1 + rhs2)
-    // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // 1st add operands (result1 <= lhs + rhs1)
-
-    Shape shape{1, 2, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    static int32_t rhs2_data[4] = {3, 1, -1, 5};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs1 = _graph->addOperand(shape, type);
-    auto operand_result1 = _graph->addOperand(shape, type);
-    auto operand_rhs2 = _graph->addOperand(shape, type);
-    auto operand_result2 = _graph->addOperand(shape, type);
-    _graph->operands()
-        .at(operand_rhs2)
-        .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
-
-    // 2nd add operations (result2 <= result1 + rhs2)
-
-    operation::BinaryArithmetic::Param param1;
-    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param1.activation = Activation::NONE;
-    auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
-    auto output_set1 = OperandIndexSequence{operand_result1};
-    _graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
-
-    operation::BinaryArithmetic::Param param2;
-    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param2.activation = Activation::NONE;
-    auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
-    auto output_set2 = OperandIndexSequence{operand_result2};
-    _graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs1);
-    _graph->getOutputs().append(operand_result2);
-
-    _graph->finishBuilding();
-
-    auto subgs = std::make_shared<onert::ir::Subgraphs>();
-    subgs->push(onert::ir::SubgraphIndex{0}, _graph);
-    _graph->setSubgraphs(subgs);
-
-    _executors = std::make_shared<ExecutorMap>();
-    _executors->insert(
-        std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
-  }
-
-  void CreateUnspecifiedDimensionsModel()
-  {
-    // Model: one elementwise add operation
-    // model input: lhs, rhs
-    // model output: add result
-    // lhs, rhs, result shape: {1, unknown, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // Add operands
-
-    Shape shape{1, 0, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs = _graph->addOperand(shape, type);
-
-    auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
-    _graph->operands()
-        .at(operand_activation)
-        .data(
-            std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
-
-    auto operand_result = _graph->addOperand(shape, type);
-
-    // Add operations
-
-    operation::BinaryArithmetic::Param param;
-    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param.activation = Activation::NONE;
-    auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
-    auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs);
-    _graph->getOutputs().append(operand_result);
-
-    _graph->finishBuilding();
-
-    auto subgs = std::make_shared<onert::ir::Subgraphs>();
-    subgs->push(onert::ir::SubgraphIndex{0}, _graph);
-    _graph->setSubgraphs(subgs);
-
-    _executors = std::make_shared<ExecutorMap>();
-    _executors->insert(
-        std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
-  }
-
-  void createExecution() { _execution = std::make_unique<Execution>(_executors); }
-
-  virtual void TearDown() { _executors = nullptr; }
-
-  std::shared_ptr<Graph> _graph{nullptr};
-  std::shared_ptr<ExecutorMap> _executors{nullptr};
-  std::unique_ptr<Execution> _execution{nullptr};
-  const int32_t _activation_value{0};
-};
-
-TEST_F(InterpExecutorTest, create_empty)
-{
-  Graph graph;
-  graph.finishBuilding();
-  auto executor = std::make_unique<InterpExecutor>(graph);
-  ASSERT_NE(executor, nullptr);
-}
-
-TEST_F(InterpExecutorTest, create_simple)
-{
-  CreateSimpleModel();
-  ASSERT_NE(_executors, nullptr);
-  ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr);
-}
-
-TEST_F(InterpExecutorTest, setInput)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
-  EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, setOutput)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, setInputForUnspecifiedDimensions)
-{
-  CreateUnspecifiedDimensionsModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
-  TypeInfo operand_type{DataType::INT32};
-  Shape operand_shape{1, 2, 2, 1};
-
-  EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                    reinterpret_cast<const void *>(input1_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                    reinterpret_cast<const void *>(input1_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                       reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, setOutputForUnspecifiedDimensions)
-{
-  CreateUnspecifiedDimensionsModel();
-  createExecution();
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  TypeInfo operand_type{DataType::INT32};
-  Shape operand_shape{1, 2, 2, 1};
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                     reinterpret_cast<void *>(output_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                     reinterpret_cast<void *>(output_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                        reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, execute)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto input1_idx = _graph->getInputs().at(input1);
-  auto input2_idx = _graph->getInputs().at(input2);
-
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-  const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-  EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-  EXPECT_NO_THROW(_execution->execute());
-  EXPECT_EQ(output_buffer[0], 2);
-  EXPECT_EQ(output_buffer[1], -3);
-  EXPECT_EQ(output_buffer[2], 1);
-  EXPECT_EQ(output_buffer[3], -6);
-}
-
-TEST_F(InterpExecutorTest, executeTwoStep)
-{
-  CreateTwoStepModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto input1_idx = _graph->getInputs().at(input1);
-  auto input2_idx = _graph->getInputs().at(input2);
-
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-  const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-  EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-  EXPECT_NO_THROW(_execution->execute());
-  EXPECT_EQ(output_buffer[0], 5);
-  EXPECT_EQ(output_buffer[1], -2);
-  EXPECT_EQ(output_buffer[2], 0);
-  EXPECT_EQ(output_buffer[3], -1);
-}
-
-} // namespace
diff --git a/runtime/onert/test/graph/Graph.cc b/runtime/onert/test/graph/Graph.cc
deleted file mode 100644
index 34e9fe002..000000000
--- a/runtime/onert/test/graph/Graph.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-
-TEST(Graph, inputs_and_outputs)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::OperandIndex index0{0u};
-  onert::ir::OperandIndex index1{1u};
-
-  graph.addInput({index0});
-  graph.addInput({index1});
-
-  onert::ir::OperandIndex index10{10u};
-  onert::ir::OperandIndex index11{11u};
-  onert::ir::OperandIndex index12{12u};
-
-  graph.addOutput({index10});
-  graph.addOutput({index11});
-  graph.addOutput({index12});
-
-  ASSERT_EQ(graph.getInputs().size(), 2);
-  ASSERT_EQ(graph.getOutputs().size(), 3);
-
-  onert::ir::IOIndex io_index0{0};
-  onert::ir::IOIndex io_index1{1};
-  onert::ir::IOIndex io_index2{2};
-
-  ASSERT_EQ(graph.getInputs().at(io_index0), 0);
-  ASSERT_EQ(graph.getInputs().at(io_index1), 1);
-
-  ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
-  ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
-  ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
-}
diff --git a/runtime/onert/test/graph/Index.cc b/runtime/onert/test/graph/Index.cc
deleted file mode 100644
index 358e64c82..000000000
--- a/runtime/onert/test/graph/Index.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/Index.h"
-
-using Index = ::onert::util::Index<uint32_t, struct TestTag>;
-
-TEST(Index, index_test)
-{
-  Index idx1{1u};
-  Index idx2{2u};
-  Index idx3{idx1};
-
-  ASSERT_EQ(idx1, 1);
-  ASSERT_EQ(idx1, 1u);
-  ASSERT_EQ(idx1.value(), 1u);
-  ASSERT_NE(idx1, idx2);
-  ASSERT_EQ(idx1, idx3);
-}
diff --git a/runtime/onert/test/graph/MockNode.h b/runtime/onert/test/graph/MockNode.h
deleted file mode 100644
index 60b4719ed..000000000
--- a/runtime/onert/test/graph/MockNode.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__
-#define __ONERT_TEST_GRAPH_MOCK_NODE_H__
-
-#include "ir/Operation.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert_test
-{
-namespace ir
-{
-
-class SimpleMock : public onert::ir::Operation
-{
-public:
-  SimpleMock(const onert::ir::OperandIndexSequence &inputs,
-             const onert::ir::OperandIndexSequence &outputs)
-      : Operation{onert::ir::OperandConstraint::createAny()}
-  {
-    setInputs(inputs);
-    setOutputs(outputs);
-  }
-
-public:
-  void accept(onert::ir::OperationVisitor &) const override {}
-  onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; }
-};
-
-} // namespace ir
-} // namespace onert_test
-
-#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__
diff --git a/runtime/onert/test/graph/operand/IndexSet.cc b/runtime/onert/test/graph/operand/IndexSet.cc
deleted file mode 100644
index 6215e0d24..000000000
--- a/runtime/onert/test/graph/operand/IndexSet.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/OperandIndexSequence.h"
-
-using onert::ir::OperandIndex;
-using onert::ir::OperandIndexSequence;
-
-TEST(graph_OperandIndexSequence, append)
-{
-  OperandIndexSequence iset{0, 2, 4, 8};
-
-  ASSERT_EQ(iset.size(), 4);
-
-  iset.append(OperandIndex{10});
-
-  ASSERT_EQ(iset.size(), 5);
-
-  onert::ir::IOIndex index1{1};
-  onert::ir::IOIndex index2{4};
-
-  ASSERT_EQ(iset.at(index1), 2);
-  ASSERT_EQ(iset.at(index2), 10);
-
-  ASSERT_TRUE(iset.contains(OperandIndex{2}));
-  ASSERT_TRUE(iset.contains(OperandIndex{10}));
-  ASSERT_FALSE(iset.contains(OperandIndex{11}));
-}
-
-TEST(graph_OperandIndexSequence, replace)
-{
-  OperandIndexSequence iset{0, 1, 2, 3};
-
-  iset.replace(OperandIndex{1}, OperandIndex{9});
-  ASSERT_FALSE(iset.contains(OperandIndex{1}));
-  ASSERT_TRUE(iset.contains(OperandIndex{9}));
-}
diff --git a/runtime/onert/test/graph/operand/LayoutSet.cc b/runtime/onert/test/graph/operand/LayoutSet.cc
deleted file mode 100644
index e35bddd8b..000000000
--- a/runtime/onert/test/graph/operand/LayoutSet.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/LayoutSet.h"
-
-using onert::ir::Layout;
-using onert::ir::LayoutSet;
-
-TEST(graph_operand_LayoutSet, layout_set_operators)
-{
-  LayoutSet set1{Layout::NCHW};
-  LayoutSet set2{Layout::NHWC};
-  LayoutSet set3 = set1 | set2;
-
-  ASSERT_EQ(set3.size(), 2);
-
-  ASSERT_EQ((set3 - set1).size(), 1);
-  ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
-  ASSERT_EQ((set3 - set2).size(), 1);
-  ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
-  ASSERT_EQ((set3 - set3).size(), 0);
-
-  ASSERT_EQ((set3 & set1).size(), 1);
-  ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
-  ASSERT_EQ((set3 & set2).size(), 1);
-  ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
-  ASSERT_EQ((set1 & set2).size(), 0);
-}
diff --git a/runtime/onert/test/graph/operand/Set.cc b/runtime/onert/test/graph/operand/Set.cc
deleted file mode 100644
index 0d35b5581..000000000
--- a/runtime/onert/test/graph/operand/Set.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operands.h"
-
-TEST(graph_operand_Set, set_test)
-{
-  onert::ir::Operands set;
-
-  onert::ir::Shape shape0{1, 2, 3};
-
-  onert::ir::Shape shape1(4);
-  shape1.dim(0) = 10;
-  shape1.dim(1) = 20;
-  shape1.dim(2) = 30;
-  shape1.dim(3) = 40;
-
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  set.emplace(shape0, type);
-  set.emplace(shape1, type);
-
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
-
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
-}
diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc
deleted file mode 100644
index cd2cdb739..000000000
--- a/runtime/onert/test/graph/operand/UseDef.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "../MockNode.h"
-
-#include <typeindex>
-
-namespace
-{
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-} // namespace
-
-TEST(graph_operand_usedef, usedef_test)
-{
-  onert::ir::Graph graph;
-  onert::ir::verifier::DAGChecker verifier;
-
-  onert::ir::Shape shape(3);
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  // Model Input/Output
-  auto input_operand = graph.addOperand(shape, type);
-  auto output_operand = graph.addOperand(shape, type);
-
-  graph.addInput(input_operand);
-  graph.addOutput(output_operand);
-
-  // MockNode1
-  auto operand_index1 = graph.addOperand(shape, type);
-  auto mocknode_index1 =
-      graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
-
-  // MockNode2
-  auto operand_index2 = graph.addOperand(shape, type);
-  auto mocknode_index2 =
-      graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
-
-  // MockNode3(two input)
-  auto multiinput_index = graph.addOperation(
-      std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
-
-  graph.finishBuilding();
-
-  ASSERT_EQ(verifier.verify(graph), true);
-
-  // Check def
-  ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
-  ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
-  ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
-
-  ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
-  ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
-
-  // Check use
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
-  ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
-  ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
-
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
-  ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
-  ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
-}
diff --git a/runtime/onert/test/graph/operation/Set.cc b/runtime/onert/test/graph/operation/Set.cc
deleted file mode 100644
index 088c44b8c..000000000
--- a/runtime/onert/test/graph/operation/Set.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "../MockNode.h"
-#include "ir/Operations.h"
-
-using onert::ir::Operation;
-using onert::ir::OperationIndex;
-using onert::ir::Operations;
-
-TEST(graph_operation_Set, operation_test)
-{
-  Operations ops;
-  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
-  OperationIndex idx{0u};
-  ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
-  ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
-}
diff --git a/runtime/onert/test/graph/operation/SetIO.cc b/runtime/onert/test/graph/operation/SetIO.cc
deleted file mode 100644
index 378c5b4b9..000000000
--- a/runtime/onert/test/graph/operation/SetIO.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Concat.h"
-
-#include <memory>
-
-#include <stdexcept>
-
-using Index = onert::ir::IOIndex;
-using IndexSet = onert::ir::OperandIndexSequence;
-
-TEST(graph_operation_setIO, operation_setIO_conv)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  // Add Conv
-  using Graph = onert::ir::operation::Conv2D;
-
-  auto input_operand = graph.addOperand(shape, type);
-  auto kernel_operand = graph.addOperand(shape, type);
-  auto bias_operand = graph.addOperand(shape, type);
-  IndexSet inputs{input_operand, kernel_operand, bias_operand};
-
-  Graph::Param conv_params;
-  conv_params.padding.type = onert::ir::PaddingType::SAME;
-  conv_params.stride.horizontal = 1;
-  conv_params.stride.vertical = 1;
-  conv_params.activation = onert::ir::Activation::NONE;
-
-  auto output_operand = graph.addOperand(shape, type).value();
-  IndexSet outputs{output_operand};
-
-  auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
-
-  ASSERT_NE(conv, nullptr);
-  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  conv->setInputs({8, 9, 10});
-  ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
-}
-
-TEST(graph_operation_setIO, operation_setIO_concat)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  using Graph = onert::ir::operation::Concat;
-
-  // Add Concat
-  IndexSet inputs;
-  for (int i = 0; i < 6; ++i)
-  {
-    inputs.append(graph.addOperand(shape, type));
-  }
-
-  Graph::Param concat_params{0};
-
-  auto output_operand = graph.addOperand(shape, type).value();
-  IndexSet outputs{output_operand};
-
-  auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
-
-  ASSERT_NE(concat, nullptr);
-  ASSERT_EQ(concat->getInputs().size(), 6);
-  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-
-  concat->setInputs({80, 6, 9, 11});
-  ASSERT_EQ(concat->getInputs().size(), 4);
-  ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
-  ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
-  ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
-}
diff --git a/runtime/onert/test/graph/verifier/Verifier.cc b/runtime/onert/test/graph/verifier/Verifier.cc
deleted file mode 100644
index f8c7557e3..000000000
--- a/runtime/onert/test/graph/verifier/Verifier.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operation.h"
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "ir/Operand.h"
-#include "../MockNode.h"
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-TEST(Verifier, dag_checker)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  auto operand1 = graph.addOperand(shape, type);
-  auto operand2 = graph.addOperand(shape, type);
-
-  graph.addInput(operand1);
-  graph.addOutput(operand2);
-
-  graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
-
-  graph.finishBuilding();
-
-  onert::ir::verifier::DAGChecker verifier;
-
-  ASSERT_EQ(verifier.verify(graph), true);
-}
diff --git a/runtime/onert/test/ir/Shape.cc b/runtime/onert/test/ir/Shape.cc
deleted file mode 100644
index c24aeda8d..000000000
--- a/runtime/onert/test/ir/Shape.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <ir/Shape.h>
-
-#include <gtest/gtest.h>
-
-TEST(ShapeTest, basic_test)
-{
-  {
-    onert::ir::Shape shape(3);
-
-    shape.dim(0) = 1;
-    shape.dim(1) = 2;
-    shape.dim(2) = 3;
-
-    ASSERT_EQ(shape.rank(), 3);
-    ASSERT_EQ(shape.num_elements(), 6);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
-  }
-  {
-    onert::ir::Shape shape; // scalar or rank is unspecified
-
-    ASSERT_EQ(shape.rank(), 0);
-    ASSERT_EQ(shape.num_elements(), 1);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
-  }
-}
-
-TEST(ShapeTest, neg_basic_test)
-{
-  {
-    onert::ir::Shape shape(2);
-
-    shape.dim(0) = 1;
-    shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
-
-    ASSERT_EQ(shape.rank(), 2);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), true);
-    EXPECT_ANY_THROW(shape.num_elements());
-  }
-}
diff --git a/runtime/onert/test/util/ObjectManager.cc b/runtime/onert/test/util/ObjectManager.cc
deleted file mode 100644
index 5051bcfa6..000000000
--- a/runtime/onert/test/util/ObjectManager.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/ObjectManager.h"
-#include "util/Index.h"
-
-using namespace onert;
-
-struct TestTag;
-using Index = typename util::Index<uint32_t, TestTag>;
-
-TEST(ObjectManager, emplace)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index = man.emplace(100);
-  ASSERT_EQ(man.at(index), 100);
-}
-
-TEST(ObjectManager, remove_1)
-{
-  util::ObjectManager<Index, int> man;
-
-  Index index = man.emplace(100);
-  ASSERT_TRUE(man.exist(index));
-  ASSERT_EQ(man.at(index), 100);
-
-  man.remove(index);
-  ASSERT_FALSE(man.exist(index));
-}
-
-TEST(ObjectManager, remove_2)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  ASSERT_TRUE(man.exist(index0));
-  ASSERT_EQ(man.at(index0), 100);
-  ASSERT_TRUE(man.exist(index1));
-  ASSERT_EQ(man.at(index1), 200);
-
-  man.remove(index0);
-  ASSERT_FALSE(man.exist(index0));
-  ASSERT_TRUE(man.exist(index1));
-  ASSERT_EQ(man.at(index1), 200);
-}
-
-TEST(ObjectManager, push)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index = man.push(std::unique_ptr<int>{new int{100}});
-  ASSERT_EQ(man.at(index), 100);
-}
-
-TEST(ObjectManager, const_iterate)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  auto index2 = man.emplace(300);
-
-  int sum = 0;
-  man.iterate([&](const Index &index, const int &val) { sum += val; });
-  ASSERT_EQ(sum, 600);
-}
-
-TEST(ObjectManager, non_const_iterate)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  auto index2 = man.emplace(300);
-
-  man.iterate([&](const Index &index, int &val) { val += 1; });
-  ASSERT_EQ(man.at(index0), 101);
-  ASSERT_EQ(man.at(index1), 201);
-  ASSERT_EQ(man.at(index2), 301);
-}
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc
deleted file mode 100644
index aab33fab5..000000000
--- a/runtime/onert/test/util/ShapeInference.cc
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Layout.h"
-#include "util/ShapeInference.h"
-
-using namespace onert::ir;
-
-TEST(ShapeInference, Elementwise)
-{
-  Shape lhs_shape{1, 299, 299, 3};
-  Shape rhs_shape{3};
-  auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.dim(0), 1);
-  ASSERT_EQ(infered_out_shape.dim(1), 299);
-  ASSERT_EQ(infered_out_shape.dim(2), 299);
-  ASSERT_EQ(infered_out_shape.dim(3), 3);
-}
-
-TEST(ShapeInference, IncorrectElementwise)
-{
-  Shape lhs_shape{1, 299, 299, 3};
-  Shape rhs_shape{5, 3};
-  ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
-}
-
-TEST(ShapeInference, Pool2DNodeSame)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Stride stride{3, 7};
-  Padding padding{PaddingType::SAME};
-
-  operation::Pool2D::Param avg_pool_param{
-      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeValid)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Stride stride{3, 7};
-  Padding padding{PaddingType::VALID};
-
-  operation::Pool2D::Param avg_pool_param{
-      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeExplicit)
-{
-  Shape in_shape{10, 3, 5, 20};
-
-  Stride stride{3, 7};
-  Padding padding{4, 3, 2, 1};
-
-  operation::Pool2D::Param avg_pool_param{
-      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Conv2D)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{30, 3, 6, 20};
-
-  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
-                                 Dilation{1, 1}};
-  auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
-  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
-                                   Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
-  param =
-      operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-}
-
-TEST(ShapeInference, DepthwiseConv2D)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{1, 3, 6, 60};
-
-  operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
-                                          Activation::NONE};
-  auto infered_out_shape =
-      onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
-  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
-                                            Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
-  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-}
-
-TEST(ShapeInference, Concat)
-{
-  {
-    Shape in1{10, 20, 30, 3, 50};
-    Shape in2{10, 20, 30, 2, 50};
-    Shape in3{10, 20, 30, 2, 50};
-
-    operation::Concat::Param param{3};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 5);
-    ASSERT_EQ(infered_out_shape.dim(0), 10);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 30);
-    ASSERT_EQ(infered_out_shape.dim(3), 7);
-    ASSERT_EQ(infered_out_shape.dim(4), 50);
-  }
-  {
-    // case 1. when axis < 0
-    Shape in1{10, 20, 2};
-    Shape in2{10, 20, 3};
-
-    operation::Concat::Param param{-1};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 3);
-    ASSERT_EQ(infered_out_shape.dim(0), 10);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 5);
-  }
-  {
-    // case 2. when axis < 0
-    Shape in1{2, 20, 2};
-    Shape in2{3, 20, 2};
-
-    operation::Concat::Param param{-3};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 3);
-    ASSERT_EQ(infered_out_shape.dim(0), 5);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 2);
-  }
-}
-
-TEST(ShapeInference, neg_Concat)
-{
-  {
-    operation::Concat::Param param{2};
-    Shape in1{10, 1, 3};
-    Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
-
-    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
-  }
-  { // wrong rank
-    operation::Concat::Param param{2};
-    Shape in1{10, 2, 3, 4};
-    Shape in2{10, 2, 4}; // rank should be 4
-
-    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
-  }
-}
-
-TEST(ShapeInference, ExpandDims)
-{
-  Shape in_shape{30, 40};
-
-  auto check = [&](int32_t axis, Shape &expected) {
-    auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
-
-    ASSERT_EQ(actual.rank(), 3);
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  { // boundary
-    int32_t axis = 0;
-    Shape expected{1, 30, 40};
-    check(axis, expected);
-  }
-  { // boundary
-    int32_t axis = 2;
-    Shape expected{30, 40, 1};
-    check(axis, expected);
-  }
-  { // inside
-    int32_t axis = 1;
-    Shape expected{30, 1, 40};
-    check(axis, expected);
-  }
-  { // negative boundary
-    int32_t axis = -1;
-    Shape expected{30, 40, 1};
-    check(axis, expected);
-  }
-  { // negative boundary
-    int32_t axis = -3;
-    Shape expected{1, 30, 40};
-    check(axis, expected);
-  }
-}
-
-TEST(ShapeInference, neg_ExpandDims)
-{
-  Shape in_shape{30, 40};
-
-  { // over boundary
-    int32_t axis = 3;
-    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
-  }
-  { // over boundary
-    int32_t axis = -4;
-    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
-  }
-}
-
-TEST(ShapeInference, FullyConnected)
-{
-  Shape in_shape{3, 4, 5, 6};
-  Shape ker_shape{3, 10};
-  auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
-
-  ASSERT_EQ(infered_out_shape.rank(), 2);
-  ASSERT_EQ(infered_out_shape.dim(0), 36);
-  ASSERT_EQ(infered_out_shape.dim(1), 3);
-}
-
-TEST(ShapeInference, Transpose)
-{
-  auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
-    // pre-conditions
-    ASSERT_EQ(in_shape.rank(), perm.size());
-    ASSERT_EQ(expected.rank(), perm.size());
-    auto inferred_out_shape = onert::shape_inference::inferTransposeShape(in_shape, perm);
-    // post-conditions
-    ASSERT_EQ(inferred_out_shape.rank(), perm.size());
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-    {
-      ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
-    }
-  };
-  // check for 2-D
-  {
-    Shape in_shape{2, 3};
-    std::vector<int> perm = {1, 0};
-    Shape expected{3, 2};
-    // int32_t rank = 2;
-    check(in_shape, perm, expected);
-  }
-  // check for 3-D
-  {
-    Shape in_shape{1, 2, 3};
-    std::vector<int> perm = {2, 0, 1};
-    Shape expected{3, 1, 2};
-    // int32_t rank = 3;
-    check(in_shape, perm, expected);
-  }
-  // check for 4-D
-  {
-    Shape in_shape{1, 2, 3, 4};
-    std::vector<int> perm = {1, 3, 0, 2};
-    Shape expected{2, 4, 1, 3};
-    // int32_t rank = 4;
-    check(in_shape, perm, expected);
-  }
-}
-
-TEST(ShapeInference, neg_Transpose)
-{
-  Shape in_shape{1, 2, 3};
-  // Invalid parameter size
-  {
-    std::vector<int> perm = {2, 0, 1, 0};
-    // int32_t rank = 3;
-    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm), std::runtime_error);
-  }
-  // Invalid parameter value
-  {
-    std::vector<int> perm = {2, 0, 3};
-    // int32_t rank = 3;
-    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm), std::runtime_error);
-  }
-}
diff --git a/runtime/service/CMakeLists.txt b/runtime/service/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/runtime/service/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/runtime/service/npud/CMakeLists.txt b/runtime/service/npud/CMakeLists.txt
new file mode 100644
index 000000000..9cf433dff
--- /dev/null
+++ b/runtime/service/npud/CMakeLists.txt
@@ -0,0 +1,42 @@
+if(NOT BUILD_NPUD)
+  return()
+endif(NOT BUILD_NPUD)
+
+set(NPUD_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})
+
+nnfw_find_package(Gio2.0 REQUIRED)
+nnfw_find_package(Giounix2.0 REQUIRED)
+
+find_program(GDBUS_CODEGEN NAMES gdbus-codegen)
+if (NOT GDBUS_CODEGEN)
+    message(SEND_ERROR "Could not find gdbus-codegen")
+endif(NOT GDBUS_CODEGEN)
+
+set(DBUS_INCLUDE_DIRS "${CMAKE_CURRENT_BINARY_DIR}")
+set(DBUS_INTERFACE "org.tizen.npud")
+set(DBUS_NAMESPACE "Npud")
+set(DBUS_INTROSPECTION_XML "org.tizen.npud.xml")
+set(DBUS_CORE "dbus-core")
+set(DBUS_CORE_SOURCE "${DBUS_CORE}.c")
+set(DBUS_CONFIG_FILE "org.tizen.npud.conf")
+
+add_custom_command(OUTPUT ${DBUS_CORE_SOURCE}
+  COMMAND ${GDBUS_CODEGEN}
+  --generate-c-code ${DBUS_CORE}
+  --interface-prefix ${DBUS_INTERFACE}
+  --c-namespace ${DBUS_NAMESPACE}
+  ${CMAKE_CURRENT_SOURCE_DIR}/${DBUS_INTROSPECTION_XML}
+  DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${DBUS_INTROSPECTION_XML})
+
+add_library(npud_dbus STATIC ${DBUS_CORE_SOURCE})
+
+target_include_directories(npud_dbus PUBLIC ${GIO2.0_INCLUDE_DIRS})
+target_include_directories(npud_dbus PUBLIC ${GIO_UNIX_2.0_INCLUDE_DIRS})
+target_link_libraries(npud_dbus PRIVATE ${GIO2.0_LIBRARIES})
+target_link_libraries(npud_dbus PRIVATE ${GIO_UNIX_2.0_LIBRARIES})
+
+install(FILES ${DBUS_CONFIG_FILE} DESTINATION share)
+
+add_subdirectory(core)
+add_subdirectory(tests)
+add_subdirectory(backend)
diff --git a/runtime/service/npud/backend/CMakeLists.txt b/runtime/service/npud/backend/CMakeLists.txt
new file mode 100644
index 000000000..53e3d7a1d
--- /dev/null
+++ b/runtime/service/npud/backend/CMakeLists.txt
@@ -0,0 +1,2 @@
+# Backends
+add_subdirectory(trix)
diff --git a/runtime/service/npud/backend/trix/CMakeLists.txt b/runtime/service/npud/backend/trix/CMakeLists.txt
new file mode 100644
index 000000000..fa7aaa9a7
--- /dev/null
+++ b/runtime/service/npud/backend/trix/CMakeLists.txt
@@ -0,0 +1,19 @@
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
+
+if(NOT TRIXEngine_FOUND)
+  return()
+endif(NOT TRIXEngine_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(npud_backend_trix SHARED ${SOURCES})
+
+target_include_directories(npud_backend_trix PUBLIC ${NPUD_INCLUDE_DIRS})
+target_link_libraries(npud_backend_trix PRIVATE nnfw_lib_misc)
+target_link_libraries(npud_backend_trix PRIVATE trix_engine)
+
+if(ENVVAR_NPUD_CONFIG)
+  target_compile_definitions(npud_backend_trix PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+install(TARGETS npud_backend_trix DESTINATION lib)
diff --git a/runtime/service/npud/backend/trix/TrixBackend.cc b/runtime/service/npud/backend/trix/TrixBackend.cc
new file mode 100644
index 000000000..38717c13f
--- /dev/null
+++ b/runtime/service/npud/backend/trix/TrixBackend.cc
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrixBackend.h"
+
+#include <algorithm>
+
+#if defined(__linux__)
+extern "C" {
+using namespace ::npud::backend::trix;
+
+TrixBackend *allocate() { return new TrixBackend(); }
+
+void deallocate(TrixBackend *trix) { delete trix; }
+}
+#endif
+
+namespace npud
+{
+namespace backend
+{
+namespace trix
+{
+
+TrixBackend::TrixBackend() : _devType(NPUCOND_TRIV2_CONN_SOCIP)
+{
+  auto coreNum = getnumNPUdeviceByType(_devType);
+  if (coreNum <= 0)
+  {
+    return;
+  }
+
+  std::vector<npudev_h> handles;
+  for (int i = 0; i < coreNum; ++i)
+  {
+    npudev_h handle;
+    if (getNPUdeviceByType(&handle, _devType, i) < 0)
+    {
+      // Note Run for all cores.
+      continue;
+    }
+    handles.emplace_back(handle);
+  }
+
+  if (handles.size() == 0)
+  {
+    return;
+  }
+
+  _dev = std::make_unique<TrixDevice>();
+  _dev->handles = std::move(handles);
+}
+
+TrixBackend::~TrixBackend()
+{
+  for (const auto &ctx : _dev->ctxs)
+  {
+    npudev_h handle = _dev->handles.at(ctx->defaultCore);
+    for (const auto id : ctx->requests)
+    {
+      removeNPU_request(handle, id);
+    }
+  }
+
+  for (const auto &handle : _dev->handles)
+  {
+    unregisterNPUmodel_all(handle);
+    putNPUdevice(handle);
+  }
+}
+
+NpuStatus TrixBackend::getVersion(std::string &version)
+{
+  // TODO Implement details
+  return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::createContext(int deviceId, int priority, NpuContext **ctx)
+{
+  if (deviceId >= _dev->handles.size())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+  auto context = std::make_unique<NpuContext>();
+  context->defaultCore = deviceId;
+  // TODO Consider priority.
+  *ctx = context.get();
+  _dev->ctxs.emplace_back(std::move(context));
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::destroyContext(NpuContext *ctx)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto citer = std::find_if(_dev->ctxs.begin(), _dev->ctxs.end(),
+                            [&](std::unique_ptr<NpuContext> &c) { return c.get() == ctx; });
+  if (citer == _dev->ctxs.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+
+  for (auto &&rid : ctx->requests)
+  {
+    if (removeNPU_request(handle, rid) < 0)
+    {
+      return NPU_STATUS_ERROR_OPERATION_FAILED;
+    }
+    _dev->requests.erase(rid);
+  }
+
+  for (auto &&mid : ctx->models)
+  {
+    auto &minfo = _dev->models.at(mid);
+    if (--minfo->refCount == 0)
+    {
+      if (unregisterNPUmodel(handle, mid) < 0)
+      {
+        return NPU_STATUS_ERROR_OPERATION_FAILED;
+      }
+      _dev->models.erase(mid);
+    }
+  }
+
+  _dev->ctxs.erase(citer);
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::createBuffer(NpuContext *ctx, GenericBuffer *buffer)
+{
+  // TODO Implement details
+  return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::destroyBuffer(NpuContext *ctx, GenericBuffer *buffer)
+{
+  // TODO Implement details
+  return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::registerModel(NpuContext *ctx, const std::string &modelPath,
+                                     ModelID *modelId)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  ModelID id = 0;
+  auto iter =
+    std::find_if(_dev->models.begin(), _dev->models.end(),
+                 [&](const std::pair<const ModelID, std::unique_ptr<TrixModelInfo>> &p) {
+                   return p.second->core == ctx->defaultCore && p.second->path == modelPath;
+                 });
+  // Already registered model.
+  if (iter != _dev->models.end())
+  {
+    _dev->models.at(iter->first)->refCount++;
+    ctx->models.emplace_back(iter->first);
+  }
+  else
+  {
+    auto meta = getNPUmodel_metadata(modelPath.c_str(), false);
+    if (meta == nullptr)
+    {
+      return NPU_STATUS_ERROR_OPERATION_FAILED;
+    }
+
+    generic_buffer fileInfo;
+    fileInfo.type = BUFFER_FILE;
+    fileInfo.filepath = modelPath.c_str();
+    fileInfo.size = meta->size;
+
+    npudev_h handle = _dev->handles.at(ctx->defaultCore);
+    if (registerNPUmodel(handle, &fileInfo, &id) < 0)
+    {
+      return NPU_STATUS_ERROR_OPERATION_FAILED;
+    }
+
+    _dev->models.insert(std::make_pair(id, std::unique_ptr<TrixModelInfo>(new TrixModelInfo{
+                                             id, modelPath, ctx->defaultCore, meta, 1})));
+    ctx->models.emplace_back(id);
+  }
+
+  *modelId = id;
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::unregisterModel(NpuContext *ctx, ModelID modelId)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto miter = std::find(ctx->models.begin(), ctx->models.end(), modelId);
+  if (miter == ctx->models.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_MODEL;
+  }
+
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+
+  for (auto riter = ctx->requests.begin(); riter != ctx->requests.end();)
+  {
+    auto &rinfo = _dev->requests.at(*riter);
+    if (rinfo->modelId == modelId)
+    {
+      if (removeNPU_request(handle, rinfo->id) < 0)
+      {
+        return NPU_STATUS_ERROR_OPERATION_FAILED;
+      }
+      _dev->requests.erase(rinfo->id);
+      riter = ctx->requests.erase(riter);
+    }
+    else
+    {
+      ++riter;
+    }
+  }
+
+  auto &minfo = _dev->models.at(modelId);
+  if (--minfo->refCount == 0)
+  {
+    if (unregisterNPUmodel(handle, modelId) < 0)
+    {
+      return NPU_STATUS_ERROR_OPERATION_FAILED;
+    }
+    _dev->models.erase(modelId);
+  }
+
+  ctx->models.erase(miter);
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto miter = std::find(ctx->models.begin(), ctx->models.end(), modelId);
+  if (miter == ctx->models.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_MODEL;
+  }
+
+  int id = 0;
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+  if (createNPU_request(handle, modelId, &id) < 0)
+  {
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+
+  _dev->requests.insert(std::make_pair(id, std::unique_ptr<TrixRequestInfo>(new TrixRequestInfo{
+                                             static_cast<RequestID>(id), modelId})));
+  ctx->requests.emplace_back(id);
+
+  *requestId = id;
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::destroyRequest(NpuContext *ctx, RequestID requestId)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto riter = std::find(ctx->requests.begin(), ctx->requests.end(), requestId);
+  if (riter == ctx->requests.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+  if (removeNPU_request(handle, requestId) < 0)
+  {
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+
+  _dev->requests.erase(requestId);
+  ctx->requests.erase(riter);
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+                                      TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+                                      TensorDataInfos *outputInfos)
+{
+  auto citer = std::find_if(_dev->ctxs.begin(), _dev->ctxs.end(),
+                            [&](std::unique_ptr<NpuContext> &c) { return c.get() == ctx; });
+  if (citer == _dev->ctxs.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto riter = std::find(ctx->requests.begin(), ctx->requests.end(), requestId);
+  if (riter == ctx->requests.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto &req = _dev->requests.at(requestId);
+  auto miter = std::find(ctx->models.begin(), ctx->models.end(), req->modelId);
+  if (miter == ctx->models.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_MODEL;
+  }
+
+  // TODO Exception controll of `at`
+  auto &minfo = _dev->models.at(req->modelId);
+  if (minfo->meta->input_seg_num != inputBufs->numBuffers ||
+      minfo->meta->output_seg_num != outputBufs->numBuffers)
+  {
+    return NPU_STATUS_ERROR_INVALID_DATA;
+  }
+
+  auto &inInfos = req->inInfos;
+  auto &outInfos = req->outInfos;
+
+  inInfos->num_info = inputBufs->numBuffers;
+  for (auto i = 0; i < inInfos->num_info; ++i)
+  {
+    inInfos->info[i].layout = DATA_LAYOUT_MODEL;
+    inInfos->info[i].type = minfo->meta->input_seg_quant_type[i];
+  }
+
+  outInfos->num_info = outputBufs->numBuffers;
+  for (auto i = 0; i < outInfos->num_info; ++i)
+  {
+    outInfos->info[i].layout = DATA_LAYOUT_MODEL;
+    outInfos->info[i].type = minfo->meta->output_seg_quant_type[i];
+  }
+
+  auto &inBufs = req->inBufs;
+  auto &outBufs = req->outBufs;
+
+  inBufs->num_buffers = inputBufs->numBuffers;
+  for (auto i = 0; i < inBufs->num_buffers; ++i)
+  {
+    if (inputBufs->buffers[i].type == NPU_BUFFER_MAPPED)
+    {
+      inBufs->bufs[i].addr = inputBufs->buffers[i].addr;
+    }
+    else if (inputBufs->buffers[i].type == NPU_BUFFER_DMABUF)
+    {
+      // TODO Implement details
+      // inBufs.bufs[i].dmabuf = inputBufs->buffers[i].dmabuf;
+      // inBufs.bufs[i].offset = inputBufs->buffers[i].offset;
+    }
+    else
+    {
+      continue;
+    }
+    inBufs->bufs[i].size = inputBufs->buffers[i].size;
+    inBufs->bufs[i].type = static_cast<buffer_types>(inputBufs->buffers[i].type);
+  }
+
+  outBufs->num_buffers = outputBufs->numBuffers;
+  for (auto i = 0; i < outBufs->num_buffers; ++i)
+  {
+    if (outputBufs->buffers[i].type == NPU_BUFFER_MAPPED)
+    {
+      outBufs->bufs[i].addr = outputBufs->buffers[i].addr;
+    }
+    else if (outputBufs->buffers[i].type == NPU_BUFFER_DMABUF)
+    {
+      // TODO Implement details
+      // outBufs.bufs[i].dmabuf = outputBufs->buffers[i].dmabuf;
+      // outBufs.bufs[i].offset = outputBufs->buffers[i].offset;
+    }
+    else
+    {
+      continue;
+    }
+    outBufs->bufs[i].size = outputBufs->buffers[i].size;
+    outBufs->bufs[i].type = static_cast<buffer_types>(outputBufs->buffers[i].type);
+  }
+
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+  if (setNPU_requestData(handle, requestId, inBufs.get(), inInfos.get(), outBufs.get(),
+                         outInfos.get()) < 0)
+  {
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::submitRequest(NpuContext *ctx, RequestID requestId)
+{
+  // TODO Implement details
+  return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace npud
diff --git a/runtime/service/npud/backend/trix/TrixBackend.h b/runtime/service/npud/backend/trix/TrixBackend.h
new file mode 100644
index 000000000..0920d9a30
--- /dev/null
+++ b/runtime/service/npud/backend/trix/TrixBackend.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
+#define __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
+
+#include <core/Backend.h>
+#include <libnpuhost.h>
+#include <memory>
+#include <vector>
+#include <map>
+
+namespace npud
+{
+namespace backend
+{
+namespace trix
+{
+
+using namespace ::npud::core;
+
+using Handle = void *;
+
+/**
+ * @brief Trix model information.
+ *
+ * @param id The model identifier.
+ * @param path The model path.
+ * @param core The core number where the model is registered.
+ * @param meta The meta data of model.
+ * @param refCount The reference count of model users.
+ */
+struct TrixModelInfo
+{
+  ModelID id;
+  std::string path;
+  int core;
+  npubin_meta *meta;
+  int refCount;
+
+  TrixModelInfo() : meta(nullptr), refCount(0) {}
+  TrixModelInfo(ModelID _id, const std::string &_path, int _core, npubin_meta *_meta, int _refCount)
+    : id(_id), path(_path), core(_core), meta(_meta), refCount(_refCount)
+  {
+  }
+  ~TrixModelInfo() { free(meta); }
+};
+
+/**
+ * @brief Trix request information
+ *
+ * @param id The request identifier.
+ * @param modelId The model id of request.
+ */
+struct TrixRequestInfo
+{
+  RequestID id;
+  ModelID modelId;
+  std::unique_ptr<input_buffers> inBufs;
+  std::unique_ptr<tensors_data_info> inInfos;
+  std::unique_ptr<output_buffers> outBufs;
+  std::unique_ptr<tensors_data_info> outInfos;
+  TrixRequestInfo(RequestID _id, ModelID _mid)
+    : id(_id), modelId(_mid), inBufs(std::make_unique<input_buffers>()),
+      inInfos(std::make_unique<tensors_data_info>()), outBufs(std::make_unique<output_buffers>()),
+      outInfos(std::make_unique<tensors_data_info>())
+  {
+  }
+};
+
+/**
+ * @brief Trix device information
+ *
+ * @param handles The device handle list.
+ * @param ctxs The NpuContext list.
+ * @param models The model map.
+ * @param requests The request map.
+ */
+struct TrixDevice
+{
+  std::vector<Handle> handles;
+  std::vector<std::unique_ptr<NpuContext>> ctxs;
+  std::map<ModelID, std::unique_ptr<TrixModelInfo>> models;
+  std::map<RequestID, std::unique_ptr<TrixRequestInfo>> requests;
+};
+
+class TrixBackend : public Backend
+{
+public:
+  TrixBackend();
+  ~TrixBackend();
+
+  NpuStatus getVersion(std::string &version) override;
+  NpuStatus createContext(int deviceId, int priority, NpuContext **ctx) override;
+  NpuStatus destroyContext(NpuContext *ctx) override;
+  NpuStatus createBuffer(NpuContext *ctx, GenericBuffer *buffer) override;
+  NpuStatus destroyBuffer(NpuContext *ctx, GenericBuffer *buffer) override;
+  // TODO Support to register model from buffer
+  NpuStatus registerModel(NpuContext *ctx, const std::string &modelPath, ModelID *modelId) override;
+  NpuStatus unregisterModel(NpuContext *ctx, ModelID modelId) override;
+  NpuStatus createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId) override;
+  NpuStatus destroyRequest(NpuContext *ctx, RequestID requestId) override;
+  NpuStatus setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+                           TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+                           TensorDataInfos *outputInfos) override;
+  NpuStatus submitRequest(NpuContext *ctx, RequestID requestId) override;
+
+private:
+  dev_type _devType;
+  std::unique_ptr<TrixDevice> _dev;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
diff --git a/runtime/service/npud/core/Backend.h b/runtime/service/npud/core/Backend.h
new file mode 100644
index 000000000..6b810d18e
--- /dev/null
+++ b/runtime/service/npud/core/Backend.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_BACKEND_H__
+#define __ONE_SERVICE_NPUD_CORE_BACKEND_H__
+
+#include "ir/Layout.h"
+#include "ir/DataType.h"
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace npud
+{
+namespace core
+{
+
+#define NPU_TENSOR_MAX (16)
+
+/**
+ * @brief Npu model ID.
+ *
+ */
+using ModelID = uint32_t;
+
+/**
+ * @brief Npu request ID.
+ *
+ */
+using RequestID = uint32_t;
+
+/**
+ * @brief Npu buffer type
+ *
+ */
+enum BufferTypes
+{
+  NPU_BUFFER_MAPPED,   /**< buffer is a memory-mapped ptr */
+  NPU_BUFFER_DMABUF,   /**< buffer is a dmabuf fd, representing contiguous memory */
+  NPU_BUFFER_UNDEFINED /**< buffer type is undefined */
+};
+
+/**
+ * @brief Various kinds of buffer supported for input/output/model.
+ *
+ */
+struct GenericBuffer
+{
+  struct
+  {             /** NPU_BUFFER_MAPPED/DMABUF */
+    void *addr; /**< Mapped address of the buffer */
+    struct
+    {                  /** NPU_BUFFER_DMABUF only */
+      int dmabuf;      /**< The dma-buf fd handle of the memory allocated */
+      uint64_t offset; /**< Offset to be applied to the base memory address */
+    };
+  };
+  uint64_t size;    /**< The size of the buffer in bytes */
+  BufferTypes type; /**< Type of memory in this buffer */
+};
+
+/**
+ * @brief Npu generic buffer array.
+ *
+ */
+struct GenericBuffers
+{
+  uint32_t numBuffers;
+  GenericBuffer buffers[NPU_TENSOR_MAX];
+};
+
+/**
+ * @brief Npu input/output buffers are compotible with GenericBuffers.
+ *
+ */
+typedef GenericBuffers InputBuffers;
+typedef GenericBuffers OutputBuffers;
+
+/**
+ * @brief Npu tensor data info description.
+ *
+ */
+struct TensorDataInfo
+{
+  ir::Layout layout;
+  ir::DataType type;
+};
+
+/**
+ * @brief Npu tensor data info array.
+ *
+ */
+struct TensorDataInfos
+{
+  uint32_t numInfos;
+  TensorDataInfo infos[NPU_TENSOR_MAX];
+};
+
+/**
+ * @brief Npu error status.
+ *
+ */
+enum NpuStatus
+{
+  NPU_STATUS_SUCCESS = 0,
+  NPU_STATUS_ERROR_OPERATION_FAILED,
+  NPU_STATUS_ERROR_NOT_SUPPORTED,
+  NPU_STATUS_ERROR_INVALID_ARGUMENT,
+  NPU_STATUS_ERROR_INVALID_MODEL,
+  NPU_STATUS_ERROR_INVALID_DATA,
+};
+
+/**
+ * @brief Npu context definition
+ *
+ * @param models The model lists.
+ * @param requests The request lists.
+ * @param defaultCore The core number to be used by default.
+ */
+struct NpuContext
+{
+  std::vector<ModelID> models;
+  std::vector<RequestID> requests;
+  int defaultCore;
+};
+
+/**
+ * @brief Npu backend interface
+ *
+ * Backend module should implement this Backend interface.
+ * Npu daemon will load this class symbol at runtime.
+ */
+class Backend
+{
+public:
+  virtual ~Backend() = default;
+
+  virtual NpuStatus getVersion(std::string &version) = 0;
+  virtual NpuStatus createContext(int deviceId, int priority, NpuContext **ctx) = 0;
+  virtual NpuStatus destroyContext(NpuContext *ctx) = 0;
+  virtual NpuStatus createBuffer(NpuContext *ctx, GenericBuffer *buffer) = 0;
+  virtual NpuStatus destroyBuffer(NpuContext *ctx, GenericBuffer *buffer) = 0;
+  // TODO Support to register model from buffer
+  virtual NpuStatus registerModel(NpuContext *ctx, const std::string &modelPath,
+                                  ModelID *modelId) = 0;
+  virtual NpuStatus unregisterModel(NpuContext *ctx, ModelID modelId) = 0;
+  virtual NpuStatus createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId) = 0;
+  virtual NpuStatus destroyRequest(NpuContext *ctx, RequestID requestId) = 0;
+  virtual NpuStatus setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+                                   TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+                                   TensorDataInfos *outputInfos) = 0;
+  virtual NpuStatus submitRequest(NpuContext *ctx, RequestID requestId) = 0;
+};
+
+typedef Backend *(*NpuAlloc)();
+typedef void (*NpuDealloc)(Backend *);
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_BACKEND_H__
diff --git a/runtime/service/npud/core/CMakeLists.txt b/runtime/service/npud/core/CMakeLists.txt
new file mode 100644
index 000000000..31341c664
--- /dev/null
+++ b/runtime/service/npud/core/CMakeLists.txt
@@ -0,0 +1,29 @@
+nnfw_find_package(GLib2.0 REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+file(GLOB_RECURSE MAIN_SOURCE_FILE "main.cc")
+list(REMOVE_ITEM SOURCES ${MAIN_SOURCE_FILE})
+
+add_library(npud_core STATIC ${SOURCES})
+
+set_target_properties(npud_core PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(npud_core PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_include_directories(npud_core PUBLIC ${DBUS_INCLUDE_DIRS})
+target_link_libraries(npud_core PRIVATE nnfw_lib_misc)
+target_link_libraries(npud_core PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud_core PRIVATE ${LIB_PTHREAD})
+target_link_libraries(npud_core PRIVATE dl)
+target_link_libraries(npud_core PRIVATE npud_dbus)
+
+if(ENVVAR_NPUD_CONFIG)
+  target_compile_definitions(npud_core PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+# npud executable
+add_executable(npud ${MAIN_SOURCE_FILE})
+
+set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX)
+target_link_libraries(npud PRIVATE npud_core)
+
+install(TARGETS npud DESTINATION bin)
diff --git a/runtime/service/npud/core/ContextManager.cc b/runtime/service/npud/core/ContextManager.cc
new file mode 100644
index 000000000..9ceb541e0
--- /dev/null
+++ b/runtime/service/npud/core/ContextManager.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ContextManager.h"
+
+#include <algorithm>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+ContextManager::ContextManager() noexcept {}
+
+ContextManager::~ContextManager() noexcept { _contexts.clear(); }
+
+void ContextManager::newContext(NpuContext *npuContext, ContextID *contextId)
+{
+  auto context = std::make_unique<Context>();
+  // TODO Consider the possibility of reusing the same address.
+  context->contextId = reinterpret_cast<ContextID>(context.get());
+  context->npuContext = npuContext;
+  *contextId = context->contextId;
+  _contexts.emplace_back(std::move(context));
+
+  this->listContexts();
+}
+
+void ContextManager::deleteContext(ContextID contextId)
+{
+  const auto iter =
+    std::remove_if(_contexts.begin(), _contexts.end(),
+                   [&](std::unique_ptr<Context> &c) { return c->contextId == contextId; });
+  if (iter == _contexts.end())
+  {
+    return;
+  }
+
+  _contexts.erase(iter, _contexts.end());
+
+  this->listContexts();
+}
+
+void ContextManager::listContexts()
+{
+#ifdef DEBUG
+  VERBOSE(ContextManager) << "Size: " << _contexts.size() << std::endl;
+  for (const auto &context : _contexts)
+  {
+    VERBOSE(ContextManager) << "==========================" << std::endl;
+    VERBOSE(ContextManager) << "contextId: " << context->contextId << std::endl;
+  }
+  VERBOSE(ContextManager) << "==========================" << std::endl;
+#endif
+}
+
+const std::vector<std::unique_ptr<Context>>::iterator
+ContextManager::getContext(ContextID contextId)
+{
+  const auto iter =
+    std::find_if(_contexts.begin(), _contexts.end(),
+                 [&](std::unique_ptr<Context> &c) { return c->contextId == contextId; });
+  return iter;
+}
+
+NpuContext *ContextManager::getNpuContext(ContextID contextId)
+{
+  const auto iter = getContext(contextId);
+  if (iter == _contexts.end())
+  {
+    return nullptr;
+  }
+
+  return iter->get()->npuContext;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/ContextManager.h b/runtime/service/npud/core/ContextManager.h
new file mode 100644
index 000000000..c54862459
--- /dev/null
+++ b/runtime/service/npud/core/ContextManager.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
+#define __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
+
+#include "Backend.h"
+
+#include <vector>
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+using ContextID = uint64_t;
+struct Context
+{
+  // TODO Describe the variables
+  ContextID contextId;
+  NpuContext *npuContext;
+};
+
+class ContextManager
+{
+public:
+  ContextManager() noexcept;
+  ~ContextManager() noexcept;
+
+  ContextManager(const ContextManager &) = delete;
+  ContextManager &operator=(const ContextManager &) = delete;
+
+  void newContext(NpuContext *npuContext, ContextID *contextId);
+  void deleteContext(ContextID contextId);
+  const std::vector<std::unique_ptr<Context>>::iterator getContext(ContextID contextId);
+  NpuContext *getNpuContext(ContextID contextId);
+
+private:
+  void listContexts(void);
+
+private:
+  std::vector<std::unique_ptr<Context>> _contexts;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
diff --git a/runtime/service/npud/core/Core.cc b/runtime/service/npud/core/Core.cc
new file mode 100644
index 000000000..6d010ee69
--- /dev/null
+++ b/runtime/service/npud/core/Core.cc
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Core.h"
+#include "util/Logging.h"
+
+namespace npud
+{
+namespace core
+{
+
+Core::Core() noexcept
+  : _devManager(std::make_unique<DevManager>()), _contextManager(std::make_unique<ContextManager>())
+{
+}
+
+void Core::init() { _devManager->loadModules(); }
+
+void Core::deinit() { _devManager->releaseModules(); }
+
+int Core::getAvailableDeviceList(std::vector<std::string> &list) const { return 0; }
+
+int Core::createContext(int deviceId, int priority, ContextID *contextId) const
+{
+  VERBOSE(Core) << "createContext with " << deviceId << ", " << priority << std::endl;
+  NpuContext *npuContext;
+  int ret = _devManager->createContext(deviceId, priority, &npuContext);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Fail to create dev context" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  ContextID _contextId;
+  _contextManager->newContext(npuContext, &_contextId);
+  *contextId = _contextId;
+  return 0;
+}
+
+int Core::destroyContext(ContextID contextId) const
+{
+  VERBOSE(Core) << "destroyContext with " << contextId << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  int ret = _devManager->destroyContext(npuContext);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to destroy npu context: " << ret << std::endl;
+    return 1;
+  }
+
+  _contextManager->deleteContext(contextId);
+  return 0;
+}
+
+int Core::createNetwork(ContextID contextId, const std::string &modelPath, ModelID *modelId) const
+{
+  VERBOSE(Core) << "createNetwork with " << contextId << ", " << modelPath << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  ModelID id;
+  int ret = _devManager->registerModel(npuContext, modelPath, &id);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to register model: " << ret << std::endl;
+    return 1;
+  }
+
+  *modelId = id;
+  return 0;
+}
+
+int Core::destroyNetwork(ContextID contextId, ModelID modelId) const
+{
+  VERBOSE(Core) << "destroyNetwork with " << contextId << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  int ret = _devManager->unregisterModel(npuContext, modelId);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to unregister model: " << ret << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  return 0;
+}
+
+int Core::createRequest(ContextID contextId, ModelID modelId, RequestID *requestId) const
+{
+  VERBOSE(Core) << "createRequest with " << contextId << ", " << modelId << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  RequestID id;
+  int ret = _devManager->createRequest(npuContext, modelId, &id);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to create request of model: " << ret << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  *requestId = id;
+  return 0;
+}
+
+int Core::destroyRequest(ContextID contextId, RequestID requestId) const
+{
+  VERBOSE(Core) << "destroyRequest with " << contextId << ", " << requestId << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  int ret = _devManager->destroyRequest(npuContext, requestId);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to destroy request: " << ret << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  return 0;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Core.h b/runtime/service/npud/core/Core.h
new file mode 100644
index 000000000..d702fcb14
--- /dev/null
+++ b/runtime/service/npud/core/Core.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_CORE_H__
+#define __ONE_SERVICE_NPUD_CORE_CORE_H__
+
+#include "DevManager.h"
+#include "ContextManager.h"
+
+#include <vector>
+#include <string>
+
+namespace npud
+{
+namespace core
+{
+
+// TODO Define error status
+
+class Core
+{
+public:
+  Core() noexcept;
+  ~Core() noexcept = default;
+
+  Core(const Core &) = delete;
+  Core &operator=(const Core &) = delete;
+
+  void init();
+  void deinit();
+
+  int getAvailableDeviceList(std::vector<std::string> &list) const;
+  int createContext(int deviceId, int priority, ContextID *contextId) const;
+  int destroyContext(ContextID contextId) const;
+  int createNetwork(ContextID contextId, const std::string &modelPath, ModelID *modelId) const;
+  int destroyNetwork(ContextID contextId, ModelID modelId) const;
+  int createRequest(ContextID contextId, ModelID modelId, RequestID *requestId) const;
+  int destroyRequest(ContextID contextId, RequestID requestId) const;
+
+private:
+  std::unique_ptr<DevManager> _devManager;
+  std::unique_ptr<ContextManager> _contextManager;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_CORE_H__
diff --git a/runtime/service/npud/core/DBus.cc b/runtime/service/npud/core/DBus.cc
new file mode 100644
index 000000000..074ef50de
--- /dev/null
+++ b/runtime/service/npud/core/DBus.cc
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DBus.h"
+#include "Server.h"
+
+#include <atomic>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+std::atomic_bool DBus::_isReady(false);
+
+DBus::DBus() noexcept
+{
+  VERBOSE(DBus) << "Starting dbus service" << std::endl;
+
+  _gdbus_id = g_bus_own_name(G_BUS_TYPE_SYSTEM, "org.tizen.npud", G_BUS_NAME_OWNER_FLAGS_NONE,
+                             (GBusAcquiredCallback)on_bus_acquired,
+                             (GBusNameAcquiredCallback)on_name_acquired,
+                             (GBusNameLostCallback)on_name_lost, NULL, NULL);
+}
+
+DBus::~DBus() noexcept
+{
+  VERBOSE(DBus) << "Stop dbus service" << std::endl;
+
+  g_bus_unown_name(_gdbus_id);
+}
+
+void DBus::on_bus_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+  VERBOSE(DBus) << name << " on bus acquired" << std::endl;
+
+  GError *error = NULL;
+  NpudCore *core = npud_core_skeleton_new();
+  NpudCoreIface *iface = NPUD_CORE_GET_IFACE(core);
+
+  iface->handle_device_get_available_list = &on_handle_device_get_available_list;
+  iface->handle_context_create = &on_handle_context_create;
+  iface->handle_context_destroy = &on_handle_context_destroy;
+  iface->handle_buffers_create = &on_handle_buffers_create;
+  iface->handle_buffers_destroy = &on_handle_buffers_destroy;
+  iface->handle_network_create = &on_handle_network_create;
+  iface->handle_network_destroy = &on_handle_network_destroy;
+  iface->handle_request_create = &on_handle_request_create;
+  iface->handle_request_destroy = &on_handle_request_destroy;
+  iface->handle_request_set_data = &on_handle_request_set_data;
+  iface->handle_execute_run = &on_handle_execute_run;
+
+  if (!g_dbus_interface_skeleton_export(G_DBUS_INTERFACE_SKELETON(core), conn, "/org/tizen/npud",
+                                        &error))
+  {
+    VERBOSE(DBus) << "Failed to export skeleton, Server will stop." << std::endl;
+    Server::instance().stop();
+  }
+
+  _isReady.exchange(true);
+}
+
+void DBus::on_name_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+  VERBOSE(DBus) << name << " on name acquired" << std::endl;
+}
+
+void DBus::on_name_lost(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+  VERBOSE(DBus) << name << " on name lost, Server will stop." << std::endl;
+  Server::instance().stop();
+}
+
+gboolean DBus::on_handle_device_get_available_list(NpudCore *object,
+                                                   GDBusMethodInvocation *invocation)
+{
+  VERBOSE(DBus) << __FUNCTION__ << std::endl;
+  std::vector<std::string> list;
+  int error = Server::instance().core().getAvailableDeviceList(list);
+  // TODO Implement variant outputs
+  npud_core_complete_device_get_available_list(object, invocation, error);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_context_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        gint arg_device_id, gint arg_priority)
+{
+  VERBOSE(DBus) << "on_handle_context_create with " << arg_device_id << ", " << arg_priority
+                << std::endl;
+
+  guint64 out_ctx = 0;
+  int ret = Server::instance().core().createContext(arg_device_id, arg_priority, &out_ctx);
+  npud_core_complete_context_create(object, invocation, out_ctx, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_context_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                         guint64 arg_ctx)
+{
+  VERBOSE(DBus) << "on_handle_context_destroy with " << arg_ctx << std::endl;
+  int ret = Server::instance().core().destroyContext(arg_ctx);
+  npud_core_complete_context_destroy(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_buffers_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        guint64 arg_ctx, GVariant *arg_buffers)
+{
+  VERBOSE(DBus) << "on_handle_buffers_create with " << arg_ctx << std::endl;
+  GenericBuffers bufs;
+  GVariantIter *iter = NULL;
+  gint32 type;
+  guint64 addr;
+  guint32 size;
+  int index = 0;
+  g_variant_get(arg_buffers, "a(itu)", &iter);
+  while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+  {
+    VERBOSE(DBus) << "in [" << index << "] Type: " << type << ", Addr: " << addr
+                  << ", Size: " << size << std::endl;
+    bufs.buffers[index].type = static_cast<BufferTypes>(type);
+    bufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+    bufs.buffers[index].size = size;
+    index++;
+  }
+  bufs.numBuffers = index;
+  g_variant_iter_free(iter);
+
+  // TODO Invoke Core function.
+  int ret = -1;
+
+  GVariantBuilder *builder = g_variant_builder_new(G_VARIANT_TYPE("a(itu)"));
+
+  // TODO Enable below code when we can update ret value by core function
+  // if (ret == 0)
+  // {
+  //   for (auto i = 0; i < bufs.numBuffers; ++i)
+  //   {
+  //     VERBOSE(DBus) << "out [" << index << "] Type: " << bufs.buffers[i].type
+  //                   << ", Addr: " << bufs.buffers[i].addr << ", Size: " << bufs.buffers[i].size
+  //                   << std::endl;
+  //     g_variant_builder_add(builder, "(itu)", bufs.buffers[i].type, bufs.buffers[i].addr,
+  //                           bufs.buffers[i].size);
+  //   }
+  // }
+  npud_core_complete_buffers_create(object, invocation, g_variant_builder_end(builder), ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_buffers_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                         guint64 arg_ctx, GVariant *arg_buffers)
+{
+  VERBOSE(DBus) << "on_handle_buffers_destroy with " << arg_ctx << std::endl;
+  GenericBuffers bufs;
+  GVariantIter *iter = NULL;
+  gint32 type;
+  guint64 addr;
+  guint32 size;
+  int index = 0;
+  g_variant_get(arg_buffers, "a(itu)", &iter);
+  while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+  {
+    VERBOSE(DBus) << "[" << index << "] Type: " << type << ", Addr: " << (void *)addr
+                  << ", Size: " << size << std::endl;
+    bufs.buffers[index].type = static_cast<BufferTypes>(type);
+    bufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+    bufs.buffers[index].size = size;
+    index++;
+  }
+  bufs.numBuffers = index;
+  g_variant_iter_free(iter);
+  // TODO Invoke Core function.
+  int ret = -1;
+  npud_core_complete_buffers_destroy(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_network_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        guint64 arg_ctx, const gchar *arg_model_path)
+{
+  VERBOSE(DBus) << "on_handle_network_create with " << arg_ctx << ", " << arg_model_path
+                << std::endl;
+  ModelID modelId = 0;
+  int ret = Server::instance().core().createNetwork(arg_ctx, arg_model_path, &modelId);
+  npud_core_complete_network_create(object, invocation, guint(modelId), ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_network_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                         guint64 arg_ctx, guint arg_nw_handle)
+{
+  VERBOSE(DBus) << "on_handle_network_destroy with " << arg_ctx << ", " << arg_nw_handle
+                << std::endl;
+  int ret = Server::instance().core().destroyNetwork(arg_ctx, arg_nw_handle);
+  npud_core_complete_network_destroy(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_request_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        guint64 arg_ctx, guint arg_nw_handle)
+{
+  VERBOSE(DBus) << "on_handle_request_create with " << arg_ctx << ", " << arg_nw_handle
+                << std::endl;
+  RequestID requestID = 0;
+  int ret = Server::instance().core().createRequest(arg_ctx, arg_nw_handle, &requestID);
+  npud_core_complete_request_create(object, invocation, guint(requestID), ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_request_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                         guint64 arg_ctx, guint arg_rq_handle)
+{
+  VERBOSE(DBus) << "on_handle_request_destroy with " << arg_ctx << ", " << arg_rq_handle
+                << std::endl;
+  int ret = Server::instance().core().destroyRequest(arg_ctx, arg_rq_handle);
+  npud_core_complete_request_destroy(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_request_set_data(NpudCore *object, GDBusMethodInvocation *invocation,
+                                          guint64 arg_ctx, guint arg_rq_handle,
+                                          GVariant *arg_input_buffers, GVariant *arg_output_buffers)
+{
+  VERBOSE(DBus) << "on_handle_request_set_data with " << arg_ctx << ", " << arg_rq_handle
+                << std::endl;
+  GVariantIter *iter = NULL;
+  InputBuffers inBufs;
+  OutputBuffers outBufs;
+  gint32 type;
+  guint64 addr;
+  guint32 size;
+  int index = 0;
+
+  // inBufs
+  g_variant_get(arg_input_buffers, "a(itu)", &iter);
+  index = 0;
+  while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+  {
+    VERBOSE(DBus) << "in [" << index << "] Type: " << type << ", Addr: " << (void *)addr
+                  << ", Size: " << size << std::endl;
+    if (type == 0) // NPU_BUFFER_MAPPED
+    {
+      inBufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+    }
+    else if (type == 1) // NPU_BUFFER_DMABUF
+    {
+      // TODO Support dma buffer
+      VERBOSE(DBus) << "[NYI] NPU_BUFFER_DMABUF" << std::endl;
+      continue;
+    }
+    else
+    {
+      VERBOSE(DBus) << "Wrong buffer type. Ignored." << std::endl;
+      continue;
+    }
+    inBufs.buffers[index].size = size;
+    inBufs.buffers[index].type = static_cast<BufferTypes>(type);
+    index++;
+  }
+  inBufs.numBuffers = index;
+  g_variant_iter_free(iter);
+
+  // outBufs
+  g_variant_get(arg_output_buffers, "a(itu)", &iter);
+  index = 0;
+  while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+  {
+    VERBOSE(DBus) << "out [" << index << "] Type: " << type << ", Addr: " << (void *)addr
+                  << ", Size: " << size << std::endl;
+    if (type == 0) // NPU_BUFFER_MAPPED
+    {
+      outBufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+    }
+    else if (type == 1) // NPU_BUFFER_DMABUF
+    {
+      // TODO Support dma buffer
+      VERBOSE(DBus) << "[NYI] NPU_BUFFER_DMABUF" << std::endl;
+      continue;
+    }
+    else
+    {
+      VERBOSE(DBus) << "Wrong buffer type. Ignored." << std::endl;
+      continue;
+    }
+    outBufs.buffers[index].size = size;
+    outBufs.buffers[index].type = static_cast<BufferTypes>(type);
+    index++;
+  }
+  outBufs.numBuffers = index;
+  g_variant_iter_free(iter);
+
+  // TODO Invoke Core function.
+  int ret = -1;
+  npud_core_complete_request_set_data(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_execute_run(NpudCore *object, GDBusMethodInvocation *invocation,
+                                     guint64 arg_ctx, guint arg_rq_handle)
+{
+  VERBOSE(DBus) << "on_handle_execute_run with " << arg_ctx << ", " << arg_rq_handle << std::endl;
+  // TODO Invoke Core function.
+  int ret = -1;
+  npud_core_complete_execute_run(object, invocation, ret);
+  return TRUE;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/DBus.h b/runtime/service/npud/core/DBus.h
new file mode 100644
index 000000000..8840f9c16
--- /dev/null
+++ b/runtime/service/npud/core/DBus.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DBUS_H__
+#define __ONE_SERVICE_NPUD_CORE_DBUS_H__
+
+#include <dbus-core.h>
+#include <gio/gio.h>
+#include <memory>
+#include <atomic>
+
+namespace npud
+{
+namespace core
+{
+
+class DBus
+{
+public:
+  DBus() noexcept;
+  ~DBus() noexcept;
+
+  DBus(const DBus &) = delete;
+  DBus &operator=(const DBus &) = delete;
+
+  bool isReady() { return _isReady.load(); }
+
+  static void on_bus_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data);
+  static void on_name_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data);
+  static void on_name_lost(GDBusConnection *conn, const gchar *name, gpointer user_data);
+
+  static gboolean on_handle_device_get_available_list(NpudCore *object,
+                                                      GDBusMethodInvocation *invocation);
+  static gboolean on_handle_context_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                           gint arg_device_id, gint arg_priority);
+  static gboolean on_handle_context_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                            guint64 arg_ctx);
+  static gboolean on_handle_buffers_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                           guint64 arg_ctx, GVariant *arg_buffers);
+  static gboolean on_handle_buffers_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                            guint64 arg_ctx, GVariant *arg_buffers);
+  static gboolean on_handle_network_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                           guint64 arg_ctx, const gchar *arg_model_path);
+  static gboolean on_handle_network_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                            guint64 arg_ctx, guint arg_nw_handle);
+  static gboolean on_handle_request_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                           guint64 arg_ctx, guint arg_nw_handle);
+  static gboolean on_handle_request_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                            guint64 arg_ctx, guint arg_rq_handle);
+  static gboolean on_handle_request_set_data(NpudCore *object, GDBusMethodInvocation *invocation,
+                                             guint64 arg_ctx, guint arg_rq_handle,
+                                             GVariant *arg_input_buffers,
+                                             GVariant *arg_output_buffers);
+  static gboolean on_handle_execute_run(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        guint64 arg_ctx, guint arg_rq_handle);
+
+private:
+  guint _gdbus_id;
+  static std::atomic_bool _isReady;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DBUS_H__
diff --git a/runtime/service/npud/core/DevManager.cc b/runtime/service/npud/core/DevManager.cc
new file mode 100644
index 000000000..0765eec5c
--- /dev/null
+++ b/runtime/service/npud/core/DevManager.cc
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DevManager.h"
+#include "util/Logging.h"
+
+#include <dirent.h>
+
+namespace npud
+{
+namespace core
+{
+
+DevManager::DevManager()
+{
+  const auto env = util::getConfigString(util::config::DEVICE_MODULE_PATH);
+  _module_dir = std::move(env);
+}
+
+void DevManager::loadModules()
+{
+  VERBOSE(DevManager) << "load modules from " << _module_dir << std::endl;
+
+  releaseModules();
+
+  DIR *dir;
+  struct dirent *entry;
+
+  // NOTE
+  // Return NULL(0) value when opendir or readdir error occurs.
+  // NULL should be used instead of nullptr.
+  dir = opendir(_module_dir.c_str());
+  if (dir == NULL)
+  {
+    VERBOSE(DevManager) << "Fail to open module directory" << std::endl;
+    return;
+  }
+
+  while ((entry = readdir(dir)) != NULL)
+  {
+    std::string modulePath(entry->d_name);
+    if (modulePath.find("npud_backend") == std::string::npos)
+    {
+      continue;
+    }
+
+    DynamicLoader *loader = nullptr;
+    try
+    {
+      loader = new DynamicLoader(modulePath.c_str());
+    }
+    catch (const std::exception &e)
+    {
+      VERBOSE(DevManager) << e.what() << std::endl;
+      continue;
+    }
+
+    std::unique_ptr<Device> dev = std::make_unique<Device>();
+    dev->modulePath = std::move(modulePath);
+    dev->loader = std::unique_ptr<DynamicLoader>(loader);
+
+    _dev = std::move(dev);
+    break;
+  }
+
+  closedir(dir);
+}
+
+void DevManager::releaseModules()
+{
+  if (_dev)
+  {
+    _dev.reset();
+  }
+}
+
+std::shared_ptr<Backend> DevManager::getBackend()
+{
+  if (!_dev)
+  {
+    throw std::runtime_error("No backend device.");
+  }
+  return _dev->loader->getInstance();
+}
+
+int DevManager::createContext(int deviceId, int priority, NpuContext **npuContext)
+{
+  try
+  {
+    return getBackend()->createContext(deviceId, priority, npuContext);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::destroyContext(NpuContext *npuContext)
+{
+  try
+  {
+    return getBackend()->destroyContext(npuContext);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::registerModel(NpuContext *npuContext, const std::string &modelPath,
+                              ModelID *modelId)
+{
+  try
+  {
+    return getBackend()->registerModel(npuContext, modelPath, modelId);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::unregisterModel(NpuContext *npuContext, ModelID modelId)
+{
+  try
+  {
+    return getBackend()->unregisterModel(npuContext, modelId);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::createRequest(NpuContext *npuContext, ModelID modelId, RequestID *requestId)
+{
+  try
+  {
+    return getBackend()->createRequest(npuContext, modelId, requestId);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::destroyRequest(NpuContext *npuContext, RequestID requestId)
+{
+  try
+  {
+    return getBackend()->destroyRequest(npuContext, requestId);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/DevManager.h b/runtime/service/npud/core/DevManager.h
new file mode 100644
index 000000000..7fbfe408b
--- /dev/null
+++ b/runtime/service/npud/core/DevManager.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
+#define __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
+
+#include "DynamicLoader.h"
+
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+struct Device
+{
+  std::string modulePath;
+  std::unique_ptr<DynamicLoader> loader;
+};
+
+class DevManager
+{
+public:
+  DevManager();
+  ~DevManager() = default;
+
+  DevManager(const DevManager &) = delete;
+  DevManager &operator=(const DevManager &) = delete;
+
+  void loadModules();
+  void releaseModules();
+  std::shared_ptr<Backend> getBackend();
+
+  int createContext(int deviceId, int priority, NpuContext **npuContext);
+  int destroyContext(NpuContext *npuContext);
+  int registerModel(NpuContext *npuContext, const std::string &modelPath, ModelID *modelId);
+  int unregisterModel(NpuContext *npuContext, ModelID modelId);
+  int createRequest(NpuContext *npuContext, ModelID modelId, RequestID *requestId);
+  int destroyRequest(NpuContext *npuContext, RequestID requestId);
+
+private:
+  std::unique_ptr<Device> _dev;
+  std::string _module_dir;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
diff --git a/runtime/service/npud/core/DynamicLoader.cc b/runtime/service/npud/core/DynamicLoader.cc
new file mode 100644
index 000000000..6be2ff908
--- /dev/null
+++ b/runtime/service/npud/core/DynamicLoader.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DynamicLoader.h"
+
+#include "util/Logging.h"
+
+namespace npud
+{
+namespace core
+{
+
+DynamicLoader::DynamicLoader(const char *file, int flags)
+  : _handle(nullptr), _filepath(file), _allocSymbol("allocate"), _deallocSymbol("deallocate")
+{
+  if (!(_handle = dlopen(_filepath.c_str(), flags)))
+  {
+    VERBOSE(DynamicLoader) << "Fail to load " << _filepath << " module: " << dlerror() << std::endl;
+    throw std::runtime_error("Fail to load " + _filepath + " module");
+  }
+
+  NpuAlloc alloc;
+  NpuDealloc dealloc;
+
+  alloc = reinterpret_cast<NpuAlloc>(dlsym(_handle, _allocSymbol.c_str()));
+  dealloc = reinterpret_cast<NpuDealloc>(dlsym(_handle, _deallocSymbol.c_str()));
+  if (!alloc || !dealloc)
+  {
+    VERBOSE(DynamicLoader) << "Fail to load " << _filepath << " symbol: " << dlerror() << std::endl;
+    dlclose(_handle);
+    throw std::runtime_error("Fail to load " + _filepath + " module");
+  }
+
+  _backend = std::shared_ptr<Backend>(alloc(), [dealloc](Backend *b) { dealloc(b); });
+}
+
+DynamicLoader::~DynamicLoader()
+{
+  // NOTE
+  // The _backend shared_ptr must be explicitly deleted before
+  // the dynamic library handle is released.
+  _backend.reset();
+  dlclose(_handle);
+}
+
+std::shared_ptr<Backend> DynamicLoader::getInstance() { return _backend; }
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/DynamicLoader.h b/runtime/service/npud/core/DynamicLoader.h
new file mode 100644
index 000000000..c2f036a60
--- /dev/null
+++ b/runtime/service/npud/core/DynamicLoader.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
+#define __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
+
+#include "Backend.h"
+
+#include <dlfcn.h>
+#include <string>
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+using DLHandle = void *;
+
+class DynamicLoader
+{
+public:
+  DynamicLoader(const char *file, int flags = RTLD_LAZY);
+  ~DynamicLoader();
+
+  DynamicLoader(const DynamicLoader &) = delete;
+
+  std::shared_ptr<Backend> getInstance();
+
+private:
+  DLHandle _handle;
+  std::string _filepath;
+  std::string _allocSymbol;
+  std::string _deallocSymbol;
+  std::shared_ptr<Backend> _backend;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
diff --git a/runtime/service/npud/core/Server.cc b/runtime/service/npud/core/Server.cc
new file mode 100644
index 000000000..751849e57
--- /dev/null
+++ b/runtime/service/npud/core/Server.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Server.h"
+#include "util/Logging.h"
+
+#include <thread>
+
+namespace npud
+{
+namespace core
+{
+
+std::atomic_bool Server::_isRunning(false);
+
+Server::Server() noexcept
+  : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique<Signal>()),
+    _core(std::make_unique<Core>()), _dbus(std::make_unique<DBus>())
+{
+}
+
+bool Server::isServiceReady()
+{
+  if (!_isRunning.load())
+  {
+    VERBOSE(Server) << "Server is not started." << std::endl;
+    return false;
+  }
+
+  if (!_dbus->isReady())
+  {
+    VERBOSE(Server) << "DBus service is not ready." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+void Server::run(void)
+{
+  VERBOSE(Server) << "Starting Server\n";
+
+  if (_isRunning.exchange(true))
+  {
+    return;
+  }
+
+  _core->init();
+
+  g_main_loop_run(_mainloop.get());
+}
+
+void Server::stop(void)
+{
+  VERBOSE(Server) << "Stop Server\n";
+
+  if (!_isRunning.load())
+  {
+    return;
+  }
+
+  while (!g_main_loop_is_running(_mainloop.get()))
+  {
+    std::this_thread::yield();
+  }
+
+  _core->deinit();
+
+  g_main_loop_quit(_mainloop.get());
+  _isRunning = false;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Server.h b/runtime/service/npud/core/Server.h
new file mode 100644
index 000000000..09f115ccc
--- /dev/null
+++ b/runtime/service/npud/core/Server.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_SERVER_H__
+#define __ONE_SERVICE_NPUD_CORE_SERVER_H__
+
+#include "Signal.h"
+#include "Core.h"
+#include "DBus.h"
+
+#include <glib.h>
+#include <memory>
+#include <atomic>
+
+namespace npud
+{
+namespace core
+{
+
+class Server
+{
+public:
+  Server(const Server &) = delete;
+  Server &operator=(const Server &) = delete;
+
+  void run(void);
+  void stop(void);
+
+  bool isRunning() { return _isRunning.load(); }
+  bool isServiceReady();
+
+  static Server &instance(void)
+  {
+    static Server server;
+    return server;
+  }
+
+  const Core &core(void) { return *_core.get(); }
+
+private:
+  Server() noexcept;
+
+  static std::atomic_bool _isRunning;
+
+  std::unique_ptr<GMainLoop, void (*)(GMainLoop *)> _mainloop;
+  std::unique_ptr<Signal> _signal;
+  std::unique_ptr<Core> _core;
+  std::unique_ptr<DBus> _dbus;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_SERVER_H__
diff --git a/runtime/service/npud/core/Signal.cc b/runtime/service/npud/core/Signal.cc
new file mode 100644
index 000000000..4b2cc0b4f
--- /dev/null
+++ b/runtime/service/npud/core/Signal.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Signal.h"
+#include "Server.h"
+#include "util/Logging.h"
+
+#include <csignal>
+
+namespace npud
+{
+namespace core
+{
+
+Signal::Signal(void) noexcept { init(); }
+
+void Signal::init(void)
+{
+  // NOTE Types of signals
+  // SIGTERM: termination request, sent to the program
+  // SIGINT:  external interrupt, usually initiated by the user
+  // SIGILL:	invalid program image, such as invalid instruction
+  // SIGABRT:	abnormal termination condition, as is e.g. initiated by std::abort()
+  // SIGFPE: 	erroneous arithmetic operation such as divide by zero
+  // from https://en.cppreference.com/w/cpp/utility/program/SIG_types
+  std::signal(SIGTERM, handleSignal);
+  std::signal(SIGINT, handleSignal);
+  std::signal(SIGILL, handleSignal);
+  std::signal(SIGABRT, handleSignal);
+  std::signal(SIGFPE, handleSignal);
+}
+
+void Signal::handleSignal(int signum)
+{
+  VERBOSE(signal) << "Signal received: " << strsignal(signum) << "(" << signum << ")\n";
+  Server::instance().stop();
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Signal.h b/runtime/service/npud/core/Signal.h
new file mode 100644
index 000000000..ffddc7255
--- /dev/null
+++ b/runtime/service/npud/core/Signal.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
+#define __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
+
+namespace npud
+{
+namespace core
+{
+
+class Signal
+{
+public:
+  Signal() noexcept;
+
+  void init(void);
+  static void handleSignal(int signum);
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
diff --git a/runtime/service/npud/core/ir/DataType.h b/runtime/service/npud/core/ir/DataType.h
new file mode 100644
index 000000000..923088e33
--- /dev/null
+++ b/runtime/service/npud/core/ir/DataType.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
+#define __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
+
+#include <cstdlib>
+
+namespace npud
+{
+namespace core
+{
+namespace ir
+{
+
+enum class DataType
+{
+  INT8 = 0,
+  UINT8,
+  QUANT_UINT8_ASYMM,
+  INT16,
+  UINT16,
+  QUANT_INT16_SYMM,
+  INT32,
+  UINT32,
+  FLOAT32,
+  INT64,
+  UINT64,
+  FLOAT64,
+};
+
+} // namespace ir
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
diff --git a/runtime/service/npud/core/ir/Layout.h b/runtime/service/npud/core/ir/Layout.h
new file mode 100644
index 000000000..a28e69a7f
--- /dev/null
+++ b/runtime/service/npud/core/ir/Layout.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
+#define __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
+
+#include <functional>
+#include <stdexcept>
+#include <string>
+
+namespace npud
+{
+namespace core
+{
+namespace ir
+{
+
+enum class Layout
+{
+  UNKNOWN = 0,
+  NHWC,
+  NCHW
+};
+
+} // namespace ir
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
diff --git a/runtime/service/npud/core/main.cc b/runtime/service/npud/core/main.cc
new file mode 100644
index 000000000..642201ca6
--- /dev/null
+++ b/runtime/service/npud/core/main.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Server.h"
+#include "util/Logging.h"
+
+using namespace npud;
+
+int main(int argc, const char *argv[])
+{
+  auto &server = core::Server::instance();
+
+  VERBOSE(main) << "Starting npud\n";
+  try
+  {
+    server.run();
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    return 1;
+  }
+
+  VERBOSE(main) << "Finished npud\n";
+  return 0;
+}
diff --git a/runtime/service/npud/core/util/Config.lst b/runtime/service/npud/core/util/Config.lst
new file mode 100644
index 000000000..2fb9993cc
--- /dev/null
+++ b/runtime/service/npud/core/util/Config.lst
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONFIG
+#error  Define CONFIG before including this file
+#endif
+
+//     Name                    | Type         | Default
+CONFIG(NPUD_LOG_ENABLE         , bool         , "0")
+CONFIG(DEVICE_MODULE_PATH      , std::string  , "/usr/lib/npud/devices")
diff --git a/runtime/service/npud/core/util/ConfigSource.cc b/runtime/service/npud/core/util/ConfigSource.cc
new file mode 100644
index 000000000..7a14b0200
--- /dev/null
+++ b/runtime/service/npud/core/util/ConfigSource.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConfigSource.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <memory>
+
+namespace npud
+{
+namespace util
+{
+
+using namespace nnfw::misc;
+
+static std::unique_ptr<IConfigSource> _source;
+
+void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+
+static IConfigSource *config_source()
+{
+  if (!_source)
+  {
+#ifdef ENVVAR_FOR_DEFAULT_CONFIG
+    // Default ConfigSource is EnvConfigSource
+    _source = std::make_unique<EnvConfigSource>();
+#else
+    _source = std::make_unique<GeneralConfigSource>();
+#endif // ENVVAR_FOR_DEFAULT_CONFIG
+  }
+  return _source.get();
+}
+
+static std::string getConfigOrDefault(const std::string &key)
+{
+  static std::unordered_map<std::string, std::string> defaults;
+  if (defaults.empty())
+  {
+#define CONFIG(Name, Type, Default)               \
+  {                                               \
+    auto name = std::string{#Name};               \
+    defaults.emplace(name, std::string{Default}); \
+  }
+
+#include "Config.lst"
+
+#undef CONFIG
+  }
+
+  // Treat empty string and absence of the value to be the same
+  auto ret = config_source()->get(key);
+  // if not found search from defaults
+  if (ret.empty())
+  {
+    auto itr = defaults.find(key);
+    if (itr != defaults.end())
+    {
+      // Return the default value if exists
+      ret = itr->second;
+    }
+  }
+
+  return ret;
+}
+
+bool toBool(const std::string &val)
+{
+  static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"};
+  auto false_found = std::find(false_list.begin(), false_list.end(), val);
+  return false_found == false_list.end();
+}
+
+int toInt(const std::string &val) { return std::stoi(val); }
+
+bool getConfigBool(const std::string &key)
+{
+  auto raw = getConfigOrDefault(key);
+  return toBool(raw);
+}
+
+int getConfigInt(const std::string &key)
+{
+  auto raw = getConfigOrDefault(key);
+  return toInt(raw);
+}
+
+std::string getConfigString(const std::string &key) { return getConfigOrDefault(key); }
+
+} // namespace util
+} // namespace npud
+
+namespace npud
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) const char *Name = #Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace npud
diff --git a/runtime/service/npud/core/util/ConfigSource.h b/runtime/service/npud/core/util/ConfigSource.h
new file mode 100644
index 000000000..f4ecc79a5
--- /dev/null
+++ b/runtime/service/npud/core/util/ConfigSource.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
+#define __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
+
+#include <string>
+
+namespace npud
+{
+namespace util
+{
+
+bool getConfigBool(const std::string &key);
+int getConfigInt(const std::string &key);
+std::string getConfigString(const std::string &key);
+
+} // namespace util
+} // namespace npud
+
+namespace npud
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) extern const char *Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
diff --git a/runtime/service/npud/core/util/Logging.h b/runtime/service/npud/core/util/Logging.h
new file mode 100644
index 000000000..0b75b3966
--- /dev/null
+++ b/runtime/service/npud/core/util/Logging.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
+#define __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
+
+#include <iostream>
+#include <cstring>
+
+#include "ConfigSource.h"
+
+namespace npud
+{
+namespace util
+{
+namespace logging
+{
+class Context
+{
+public:
+  Context() noexcept : _enabled{false}
+  {
+    const auto env = util::getConfigBool(util::config::NPUD_LOG_ENABLE);
+
+    if (env)
+    {
+      _enabled = true;
+    }
+  }
+
+  static Context &get() noexcept
+  {
+    static Context ctx;
+    return ctx;
+  }
+
+public:
+  bool enabled(void) const { return _enabled; }
+
+private:
+  bool _enabled;
+};
+
+static Context &ctx = Context::get();
+
+inline std::string decorated_name(const char *input)
+{
+  const int min_prefix = 16;
+  std::string prefix(input);
+  auto len_prefix = prefix.size();
+  if (len_prefix > min_prefix)
+    return "[" + prefix + "] ";
+  std::string spaces((min_prefix - len_prefix) / 2, ' ');
+  return (len_prefix % 2 ? "[ " : "[") + spaces + prefix + spaces + "] ";
+}
+} // namespace logging
+} // namespace util
+} // namespace npud
+
+#define VERBOSE(name)                       \
+  if (::npud::util::logging::ctx.enabled()) \
+  std::cout << ::npud::util::logging::decorated_name(#name)
+
+#define VERBOSE_F()                         \
+  if (::npud::util::logging::ctx.enabled()) \
+  std::cout << ::npud::util::logging::decorated_name(__func__)
+
+#define WHEN_LOG_ENABLED(METHOD)            \
+  if (::npud::util::logging::ctx.enabled()) \
+    do                                      \
+    {                                       \
+      METHOD;                               \
+  } while (0)
+
+#endif // __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
diff --git a/runtime/service/npud/org.tizen.npud.conf b/runtime/service/npud/org.tizen.npud.conf
new file mode 100644
index 000000000..025ca6797
--- /dev/null
+++ b/runtime/service/npud/org.tizen.npud.conf
@@ -0,0 +1,9 @@
+<!DOCTYPE node PUBLIC "-//freedesktop//DTD D-BUS Object Introspection 1.0//EN"
+  "http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd">
+<busconfig>
+  <policy context="default">
+    <allow own="org.tizen.npud"/>
+    <allow send_destination="org.tizen.npud"/>
+    <allow receive_sender="org.tizen.npud"/>
+  </policy>
+</busconfig>
diff --git a/runtime/service/npud/org.tizen.npud.xml b/runtime/service/npud/org.tizen.npud.xml
new file mode 100644
index 000000000..09b310ad9
--- /dev/null
+++ b/runtime/service/npud/org.tizen.npud.xml
@@ -0,0 +1,162 @@
+<!DOCTYPE node PUBLIC "-//freedesktop//DTD D-BUS Object Introspection 1.0//EN"
+  "http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd">
+<node name="/">
+  <!-- org.tizen.npud.core:
+       @short_description: Npud interface
+
+       The interface used to run AI models on npu devices.
+  -->
+  <interface name="org.tizen.npud.core">
+    <!--
+      device_get_available_list:
+      @error: The error status of the function.
+
+      Get all available npu device lists.
+    -->
+    <method name="device_get_available_list">
+	    <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      context_create:
+      @device_id: The device numger to use.
+      @priority: The device priority.
+      @ctx: The Context handle.
+      @error: The error status of the function.
+
+      Create context.
+    -->
+    <method name="context_create">
+      <arg name="device_id" type="i" direction="in" />
+      <arg name="priority" type="i" direction="in" />
+      <arg name="ctx" type="t" direction="out" />
+      <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      context_destroy:
+      @ctx: The Context handle to destroy.
+      @error: The error status of the function.
+
+      Destroy context.
+    -->
+    <method name="context_destroy">
+	    <arg name="ctx" type="t" direction="in"  />
+	    <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      buffers_create:
+      @ctx: The Context handle.
+      @buffers: The array of buffer structure. (i:type, t:address, u:size)
+      @out_buffers: The array of buffer sturcture containing created buffer address.
+      @error: The error status of the function.
+
+      Create buffer array.
+    -->
+    <method name="buffers_create">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="buffers" type="a(itu)" direction="in" />
+      <arg name="out_buffers" type="a(itu)" direction="out" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      buffers_destroy:
+      @ctx: The Context handle.
+      @buffers: The array of buffer structure. (i:type, t:address, u:size)
+      @error: The error status of the function.
+
+      Destroy buffer array.
+    -->
+    <method name="buffers_destroy">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="buffers" type="a(itu)" direction="in" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      network_create:
+      @ctx: The context handle.
+      @model_path: The model path to run.
+      @nw_handle: The Network handle.
+      @error: The error status of the function.
+
+      Create network.
+
+      TODO Support file descriptor input
+    -->
+    <method name="network_create">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="model_path" type="s" direction="in"  />
+      <arg name="nw_handle" type="u" direction="out"  />
+      <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      network_destroy:
+      @ctx: The context handle.
+      @nw_handle: The Network handle.
+      @error: The error status of the function.
+
+      Destroy network.
+    -->
+    <method name="network_destroy">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="nw_handle" type="u" direction="in" />
+      <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      request_create:
+      @ctx: The context handle.
+      @nw_handle: The Network handle.
+      @rq_handle: The Request handle.
+      @error: The error status of the function.
+
+      Create request.
+    -->
+    <method name="request_create">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="nw_handle" type="u" direction="in" />
+      <arg name="rq_handle" type="u" direction="out" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      request_destroy:
+      @ctx: The context handle.
+      @rq_handle: The Request handle.
+      @error: The error status of the function.
+
+      Destroy request.
+    -->
+    <method name="request_destroy">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="rq_handle" type="u" direction="in" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      request_set_data:
+      @ctx: The context handle.
+      @rq_handle: The Request handle.
+      @input_buffers: The input buffer datas.
+      @output_buffers: The output buffer datas.
+      @error: The error status of the function.
+
+      Set request data.
+    -->
+    <method name="request_set_data">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="rq_handle" type="u" direction="in" />
+      <arg name="input_buffers" type="a(itu)" direction="in" />
+      <arg name="output_buffers" type="a(itu)" direction="in" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      execute_run:
+      @ctx: The context handle.
+      @rq_handle: The Request handle.
+      @error: The error status of the function.
+
+      Execute run.
+    -->
+    <method name="execute_run">
+	    <arg name="ctx" type="t" direction="in"  />
+	    <arg name="rq_handle" type="u" direction="in"  />
+	    <arg name="error" type="i" direction="out"  />
+    </method>
+  </interface>
+</node>
diff --git a/runtime/service/npud/tests/CMakeLists.txt b/runtime/service/npud/tests/CMakeLists.txt
new file mode 100644
index 000000000..b97823b0b
--- /dev/null
+++ b/runtime/service/npud/tests/CMakeLists.txt
@@ -0,0 +1,17 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+file(GLOB_RECURSE TESTS "*.cc")
+
+add_executable(npud_gtest ${TESTS})
+
+set_target_properties(npud_gtest PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud_gtest PUBLIC ${NPUD_INCLUDE_DIRS})
+target_include_directories(npud_gtest PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_link_libraries(npud_gtest PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud_gtest PRIVATE ${LIB_PTHREAD})
+target_link_libraries(npud_gtest PRIVATE npud_core)
+target_link_libraries(npud_gtest PRIVATE gtest_main dl)
+
+install(TARGETS npud_gtest DESTINATION npud-gtest)
diff --git a/runtime/service/npud/tests/core/DBus.test.cc b/runtime/service/npud/tests/core/DBus.test.cc
new file mode 100644
index 000000000..1c52a2353
--- /dev/null
+++ b/runtime/service/npud/tests/core/DBus.test.cc
@@ -0,0 +1,608 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <core/Server.h>
+#include <gtest/gtest.h>
+#include <thread>
+#include <gio/gio.h>
+#include <dbus-core.h>
+#include <iostream>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// DBusTest setup/teardown
+//
+class DBusTest : public ::testing::Test
+{
+protected:
+  static void runTask()
+  {
+    auto &server = Server::instance();
+    server.run();
+  }
+
+  void SetUp() override
+  {
+    std::thread child = std::thread(runTask);
+    child.detach();
+    auto &server = Server::instance();
+    while (server.isServiceReady() != true)
+    {
+    }
+  }
+
+  void TearDown() override
+  {
+    auto &server = Server::instance();
+    if (server.isRunning())
+    {
+      server.stop();
+    }
+  }
+
+  NpudCore *getProxy()
+  {
+    GError *error = nullptr;
+    NpudCore *proxy = nullptr;
+    proxy = npud_core_proxy_new_for_bus_sync(G_BUS_TYPE_SYSTEM, G_DBUS_PROXY_FLAGS_NONE,
+                                             "org.tizen.npud", "/org/tizen/npud", NULL, &error);
+    if (error)
+    {
+      g_error_free(error);
+    }
+    return proxy;
+  }
+
+  const std::string &getModel()
+  {
+    if (model.empty())
+    {
+      auto model_path = std::getenv("GTEST_MODEL_PATH");
+      model = model_path + std::string("/mv1.q8/mv1.q8.tvn");
+    }
+    if (access(model.c_str(), F_OK) != 0)
+    {
+      model.clear();
+    }
+    return model;
+  }
+
+private:
+  std::string model;
+};
+
+//
+// DBusTest
+//
+TEST_F(DBusTest, get_proxy)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+}
+
+TEST_F(DBusTest, device_get_available_list)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  npud_core_call_device_get_available_list_sync(proxy, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, context_create)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, context_destroy)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  npud_core_call_context_destroy_sync(proxy, out_ctx, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_context_destroy_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  guint64 out_ctx = 0;
+  npud_core_call_context_destroy_sync(proxy, out_ctx, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, network_create)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_create_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  guint64 out_ctx = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_create_invalid_model)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  // Invalid model
+  const gchar *model_path = "invalid.tvn";
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, network_destroy)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_destroy_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  // Invalid ctx
+  out_ctx = -1;
+  npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_destroy_invalid_nw_handle)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_nw_handle = -1;
+  npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, request_create)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+                                     NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_create_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, 0, out_nw_handle, &out_rq_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_create_invalid_nw)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, out_ctx, 0, &out_rq_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, request_destroy)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+                                     NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  npud_core_call_request_destroy_sync(proxy, out_ctx, out_rq_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_destroy_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+                                     NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  npud_core_call_request_destroy_sync(proxy, 0, out_rq_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_destroy_invalid_rq)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  npud_core_call_request_destroy_sync(proxy, out_ctx, 0, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+} // unnamed namespace
diff --git a/runtime/service/npud/tests/core/Server.test.cc b/runtime/service/npud/tests/core/Server.test.cc
new file mode 100644
index 000000000..f24a4e92b
--- /dev/null
+++ b/runtime/service/npud/tests/core/Server.test.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/Server.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// ServerTest setup/teardown
+//
+class ServerTest : public ::testing::Test
+{
+protected:
+  static void runTask()
+  {
+    auto &server = Server::instance();
+    server.run();
+  }
+
+  void SetUp() override
+  {
+    std::thread child = std::thread(runTask);
+    child.detach();
+    auto &server = Server::instance();
+    while (server.isRunning() != true)
+    {
+    }
+  }
+
+  void TearDown() override
+  {
+    auto &server = Server::instance();
+    if (server.isRunning())
+    {
+      server.stop();
+    }
+  }
+};
+
+//
+// ServerTest
+//
+TEST_F(ServerTest, run)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+}
+
+TEST_F(ServerTest, stop)
+{
+  auto &server = Server::instance();
+  server.stop();
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+} // unnamed namespace
diff --git a/runtime/service/npud/tests/core/Signal.test.cc b/runtime/service/npud/tests/core/Signal.test.cc
new file mode 100644
index 000000000..459e7d4d3
--- /dev/null
+++ b/runtime/service/npud/tests/core/Signal.test.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/Server.h"
+#include "core/Signal.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+#include <csignal>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// SignalTest setup/teardown
+//
+class SignalTest : public ::testing::Test
+{
+protected:
+  static void runTask()
+  {
+    auto &server = Server::instance();
+    server.run();
+  }
+
+  void SetUp() override
+  {
+    std::thread child = std::thread(runTask);
+    child.detach();
+    auto &server = Server::instance();
+    while (server.isRunning() != true)
+    {
+    }
+  }
+
+  void TearDown() override
+  {
+    auto &server = Server::instance();
+    if (server.isRunning())
+    {
+      server.stop();
+    }
+  }
+};
+
+//
+// SignalTest
+//
+TEST_F(SignalTest, raise_SIGTERM)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGTERM);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGINT)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGINT);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGILL)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGILL);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGABRT)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGABRT);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGFPE)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGFPE);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+} // unnamed namespace
diff --git a/tests/custom_op/FillFrom/FillFrom_runner.cc b/tests/custom_op/FillFrom/FillFrom_runner.cc
index 731308638..6b09d5db6 100644
--- a/tests/custom_op/FillFrom/FillFrom_runner.cc
+++ b/tests/custom_op/FillFrom/FillFrom_runner.cc
@@ -87,7 +87,7 @@ std::vector<float> genData(uint64_t size)
 
 template <typename InIter1, typename InIter2>
 static auto findMaxDifference(InIter1 first1, InIter1 last1, InIter2 first2)
-    -> decltype(*first1 - *first2)
+  -> decltype(*first1 - *first2)
 {
   auto max_difference = std::abs(*first1 - *first2);
   for (; first1 != last1; ++first1, ++first2)
@@ -227,7 +227,7 @@ int main(const int argc, char **argv)
 
   const float tolerance = 0.01f;
   auto max_difference =
-      findMaxDifference(outputs[0].begin(), outputs[0].end(), std::begin(ref_data));
+    findMaxDifference(outputs[0].begin(), outputs[0].end(), std::begin(ref_data));
 
   int exit_code = 0;
   if (max_difference > tolerance)
diff --git a/tests/nnapi/CMakeLists.txt b/tests/nnapi/CMakeLists.txt
index b1215d867..80c3cced2 100644
--- a/tests/nnapi/CMakeLists.txt
+++ b/tests/nnapi/CMakeLists.txt
@@ -9,6 +9,8 @@ endif(NOT BUILD_ONERT)
 
 nnfw_find_package(GTest)
 
+# NNAPI gtest requires c++17
+set(CMAKE_CXX_STANDARD 17)
 
 set(GENERATED_CPPS "${CMAKE_CURRENT_SOURCE_DIR}/src/generated/all_generated_V1_2_cts_tests.cpp"
                    "${CMAKE_CURRENT_SOURCE_DIR}/src/generated/all_generated_V1_1_cts_tests.cpp"
@@ -54,17 +56,15 @@ target_link_libraries(${RUNTIME_NNAPI_TEST} nnfw_lib_nnapi)
 target_link_libraries(${RUNTIME_NNAPI_TEST} gtest gmock)
 target_link_libraries(${RUNTIME_NNAPI_TEST} ${LIB_PTHREAD} dl)
 
-install(TARGETS ${RUNTIME_NNAPI_TEST} DESTINATION unittest)
+install(TARGETS ${RUNTIME_NNAPI_TEST} DESTINATION nnapi-gtest)
 
 # Default test backend: cpu
 set(SKIPLIST_FILE_NAME ${RUNTIME_NNAPI_TEST}.skip.${TARGET_PLATFORM}.cpu)
 install(FILES ${SKIPLIST_FILE_NAME}
-        DESTINATION unittest
+        DESTINATION nnapi-gtest
         RENAME ${RUNTIME_NNAPI_TEST}.skip
         OPTIONAL)
 
 # Install skiplist file for target as backup
 FILE(GLOB SKIPLIST_TARGET ${CMAKE_CURRENT_SOURCE_DIR}/${RUNTIME_NNAPI_TEST}.skip.${TARGET_PLATFORM}*)
-FILE(GLOB SKIPLIST_NOARCH ${CMAKE_CURRENT_SOURCE_DIR}/${RUNTIME_NNAPI_TEST}.skip.noarch.*)
-list(APPEND SKIPLIST_ALL_RUNTIME ${SKIPLIST_TARGET} ${SKIPLIST_NOARCH})
-install(FILES ${SKIPLIST_ALL_RUNTIME} DESTINATION unittest OPTIONAL)
+install(FILES ${SKIPLIST_TARGET} DESTINATION nnapi-gtest OPTIONAL)
diff --git a/tests/nnapi/include/NeuralNetworksWrapper.h b/tests/nnapi/include/NeuralNetworksWrapper.h
index af19008fe..724d4cd0a 100644
--- a/tests/nnapi/include/NeuralNetworksWrapper.h
+++ b/tests/nnapi/include/NeuralNetworksWrapper.h
@@ -27,9 +27,7 @@
 #include "NeuralNetworksExShim.h"
 
 #include <math.h>
-// Fix for onert: use boost::optional instead of std::optional
-// TODO in onert: introduce and use internal optional library
-#include <boost/optional.hpp>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -104,10 +102,7 @@ struct SymmPerChannelQuantParams {
 struct OperandType {
     ANeuralNetworksOperandType operandType;
     std::vector<uint32_t> dimensions;
-    // Fix for onert:
-    //  Use boost::optional instead of std::optional
-    //  Default value: std::nullopt -> boost::none
-    boost::optional<SymmPerChannelQuantParams> channelQuant;
+    std::optional<SymmPerChannelQuantParams> channelQuant;
 
     OperandType(const OperandType& other)
         : operandType(other.operandType),
@@ -127,7 +122,7 @@ struct OperandType {
     }
 
     OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0)
-        : dimensions(std::move(d)), channelQuant(boost::none) {
+        : dimensions(std::move(d)), channelQuant(std::nullopt) {
         operandType = {
                 .type = static_cast<int32_t>(type),
                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl
new file mode 100644
index 000000000..4e4d68871
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl
@@ -0,0 +1,305 @@
+GeneratedTests.abs_
+GeneratedTests.abs_dynamic_nnfw
+GeneratedTests.add_dynamic_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
+GeneratedTests.argmax_dynamic_nnfw
+GeneratedTests.batch_matmul_ex_dynamic_nnfw
+GeneratedTests.batch_matmul_ex_float_adj_x
+GeneratedTests.batch_matmul_ex_float_adj_y
+GeneratedTests.batch_matmul_ex_float_batch2
+GeneratedTests.batch_matmul_ex_float_broadcast
+GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy
+GeneratedTests.batch_matmul_ex_float_broadcast_adj_x
+GeneratedTests.batch_matmul_ex_float_simple
+GeneratedTests.broadcast_to_ex_1D_nnfw
+GeneratedTests.broadcast_to_ex_2D_nnfw
+GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw
+GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw
+GeneratedTests.cast_dynamic_float32_to_int32_nnfw
+GeneratedTests.cast_float16_to_float16
+GeneratedTests.cast_float16_to_float32
+GeneratedTests.cast_float16_to_float32_relaxed
+GeneratedTests.cast_float16_to_int32
+GeneratedTests.cast_float16_to_quant8
+GeneratedTests.cast_float16_to_quant8_overflow
+GeneratedTests.cast_float32_to_float16
+GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_float32_to_int32_nnfw
+GeneratedTests.cast_int32_to_float16
+GeneratedTests.cast_quant8_to_float16
+GeneratedTests.concat_dynamic_nnfw
+GeneratedTests.conv_dynamic_nnfw
+GeneratedTests.conv_float_channels_weights_as_inputs
+GeneratedTests.conv_float_channels_weights_as_inputs_relaxed
+GeneratedTests.conv_float_large_weights_as_inputs
+GeneratedTests.conv_float_large_weights_as_inputs_relaxed
+GeneratedTests.conv_float_weights_as_inputs
+GeneratedTests.conv_float_weights_as_inputs_relaxed
+GeneratedTests.conv_quant8_channels_weights_as_inputs
+GeneratedTests.conv_quant8_large_weights_as_inputs
+GeneratedTests.conv_quant8_overflow_weights_as_inputs
+GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
+GeneratedTests.cos_ex_1D_float_nnfw
+GeneratedTests.cos_ex_4D_float_nnfw
+GeneratedTests.cos_ex_dynamic_nnfw
+GeneratedTests.dequantize_v1_2_3d_quant8_symm
+GeneratedTests.dequantize_v1_2_4d_quant8_symm
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
+GeneratedTests.div_dynamic_nnfw
+GeneratedTests.einsum_ex_float_matmul_2x2_2
+GeneratedTests.einsum_ex_float_matmul_3x2_3
+GeneratedTests.einsum_ex_float_matmul_3x3_4
+GeneratedTests.einsum_ex_float_matmul_4x4_4
+GeneratedTests.einsum_ex_float_matmul_4x4_4_2
+GeneratedTests.equal_dynamic_float_nnfw
+GeneratedTests.exp_
+GeneratedTests.exp_dynamic_nnfw
+GeneratedTests.expand_dims_dynamic_nnfw_1
+GeneratedTests.expand_dims_dynamic_nnfw_2
+GeneratedTests.fill_ex_1D_float
+GeneratedTests.fill_ex_4D_float
+GeneratedTests.fill_ex_dynamic_nnfw
+GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
+GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
+GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
+GeneratedTests.gather_dynamic_nnfw
+GeneratedTests.gather_float16
+GeneratedTests.gather_float16_2
+GeneratedTests.gather_float16_3
+GeneratedTests.gather_float16_4
+GeneratedTests.gather_float16_5
+GeneratedTests.gather_float16_6
+GeneratedTests.gather_float16_7
+GeneratedTests.gather_float16_8
+GeneratedTests.greater_dynamic_float_nnfw
+GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
+GeneratedTests.less_dynamic_float_nnfw
+GeneratedTests.less_equal_dynamic_float_nnfw
+GeneratedTests.log_4D_float_nnfw
+GeneratedTests.log_dynamic_nnfw
+GeneratedTests.log_softmax_nnfw
+GeneratedTests.log_softmax_nnfw_2
+GeneratedTests.log_softmax_nnfw_3
+GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
+GeneratedTests.logical_not
+GeneratedTests.logical_not_1D_nnfw
+GeneratedTests.logical_not_4D_nnfw
+GeneratedTests.logical_not_dynamic_nnfw
+GeneratedTests.logical_or_broadcast
+GeneratedTests.logical_or_dynamic_nnfw
+GeneratedTests.logistic_dynamic_nnfw
+GeneratedTests.lsh_projection
+GeneratedTests.lsh_projection_2
+GeneratedTests.lsh_projection_weights_as_inputs
+GeneratedTests.lstm
+GeneratedTests.lstm2
+GeneratedTests.lstm2_state
+GeneratedTests.lstm2_state2
+GeneratedTests.lstm3
+GeneratedTests.lstm3_state
+GeneratedTests.lstm3_state2
+GeneratedTests.lstm3_state3
+GeneratedTests.lstm_state
+GeneratedTests.lstm_state2
+GeneratedTests.matrix_band_part_ex_4D_float
+GeneratedTests.matrix_band_part_ex_dynamic_nnfw
+GeneratedTests.maximum_dynamic_nnfw
+GeneratedTests.minimum_dynamic_nnfw
+GeneratedTests.minimum_int32
+GeneratedTests.mul_dynamic_nnfw
+GeneratedTests.neg
+GeneratedTests.neg_dynamic_nnfw
+GeneratedTests.not_equal_dynamic_float_nnfw
+GeneratedTests.one_hot_ex_dynamic_nnfw
+GeneratedTests.pack_ex_dynamic_nnfw
+GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
+GeneratedTests.pow_2D_float_nnfw
+GeneratedTests.pow_broadcast_float_nnfw
+GeneratedTests.pow_broadcast_float_nnfw_2
+GeneratedTests.pow_broadcast_float_nnfw_3
+GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
+GeneratedTests.range_ex_float_1
+GeneratedTests.range_ex_float_1_all_constant_inputs
+GeneratedTests.range_ex_float_1_dynamic_nnfw
+GeneratedTests.range_ex_float_2
+GeneratedTests.range_ex_float_2_dynamic_nnfw
+GeneratedTests.reduce_all
+GeneratedTests.reduce_all_2
+GeneratedTests.reduce_all_2D_nnfw
+GeneratedTests.reduce_all_3
+GeneratedTests.reduce_all_4D_nnfw
+GeneratedTests.reduce_all_dynamic_nnfw
+GeneratedTests.reduce_any
+GeneratedTests.reduce_any_2
+GeneratedTests.reduce_any_2D_nnfw
+GeneratedTests.reduce_any_3
+GeneratedTests.reduce_any_4D_nnfw
+GeneratedTests.reduce_mean_dynamic_1_nnfw
+GeneratedTests.reduce_mean_dynamic_2_nnfw
+GeneratedTests.reduce_min_dynamic_nnfw
+GeneratedTests.reduce_prod
+GeneratedTests.reduce_prod_2
+GeneratedTests.reduce_prod_2D_float_nnfw
+GeneratedTests.reduce_prod_3
+GeneratedTests.reduce_prod_4
+GeneratedTests.reduce_prod_4D_float_nnfw
+GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw
+GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw
+GeneratedTests.reduce_prod_dynamic_1_nnfw
+GeneratedTests.reduce_prod_dynamic_2_nnfw
+GeneratedTests.reduce_sum_dynamic_1_nnfw
+GeneratedTests.reduce_sum_dynamic_2_nnfw
+GeneratedTests.reshape_dynamic_nnfw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
+GeneratedTests.reverse_ex_1d
+GeneratedTests.reverse_ex_3d
+GeneratedTests.reverse_ex_dynamic_1D
+GeneratedTests.reverse_ex_dynamic_3D
+GeneratedTests.rnn
+GeneratedTests.rnn_state
+GeneratedTests.round_ex_1D_float
+GeneratedTests.round_ex_4D_float
+GeneratedTests.round_ex_dynamic_nnfw
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_dynamic_nnfw
+GeneratedTests.select_v1_2_five_dim
+GeneratedTests.select_v1_2_five_dim_quant8
+GeneratedTests.select_v1_2_one_dim
+GeneratedTests.select_v1_2_one_dim_quant8
+GeneratedTests.select_v1_2_two_dim
+GeneratedTests.select_v1_2_two_dim_quant8
+GeneratedTests.select_v2_ex_broadcast_1d_single_value
+GeneratedTests.select_v2_ex_broadcast_2d_one
+GeneratedTests.select_v2_ex_broadcast_2d_two
+GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw
+GeneratedTests.select_v2_ex_broadcast_less_4d
+GeneratedTests.select_v2_ex_float
+GeneratedTests.shape_ex_dynamic_nnfw
+GeneratedTests.sin_1D_float_nnfw
+GeneratedTests.sin_4D_float_nnfw
+GeneratedTests.sin_dynamic_nnfw
+GeneratedTests.slice
+GeneratedTests.slice_2
+GeneratedTests.slice_3
+GeneratedTests.slice_4
+GeneratedTests.slice_5
+GeneratedTests.slice_6
+GeneratedTests.slice_7
+GeneratedTests.slice_8
+GeneratedTests.slice_dynamic_nnfw
+GeneratedTests.slice_zero_sized
+GeneratedTests.slice_zero_sized_quant8
+GeneratedTests.softmax_dynamic_nnfw
+GeneratedTests.space_to_batch_dynamic_float_nnfw
+GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_float_5_axis_as_input_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
+GeneratedTests.sqrt_
+GeneratedTests.squared_difference_ex_dynamic_nnfw
+GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
+GeneratedTests.strided_slice_dynamic_nnfw
+GeneratedTests.sub_dynamic_nnfw
+GeneratedTests.sub_v1_2_zero_sized
+GeneratedTests.sub_v1_2_zero_sized_quant8
+GeneratedTests.svdf
+GeneratedTests.svdf2
+GeneratedTests.svdf_bias_present
+GeneratedTests.svdf_state
+GeneratedTests.tanh_v1_2
+GeneratedTests.tanh_v1_2_zero_sized
+GeneratedTests.tanh_v1_2_zero_sized_quant8
+GeneratedTests.tanh_v1_dynamic_nnfw
+GeneratedTests.tile_1
+GeneratedTests.tile_1_dynamic_float32_nnfw
+GeneratedTests.tile_1_float16
+GeneratedTests.tile_1_quant8
+GeneratedTests.tile_2
+GeneratedTests.tile_2_dynamic_float32_nnfw
+GeneratedTests.tile_2_float16
+GeneratedTests.tile_2_int32
+GeneratedTests.tile_2_quant8
+GeneratedTests.tile_3
+GeneratedTests.tile_3_dynamic_float32_nnfw
+GeneratedTests.tile_3_float16
+GeneratedTests.tile_3_int32
+GeneratedTests.tile_3_quant8
+GeneratedTests.transpose_dynamic_nnfw
+GeneratedTests.transpose_float_1_perms_as_input_nnfw
+GeneratedTests.transpose_v1_2_zero_sized
+GeneratedTests.transpose_v1_2_zero_sized_quant8
+GeneratedTests.unidirectional_sequence_lstm_1step
+GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
+GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
+GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
+GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
+GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
+GeneratedTests.unpack_ex_dynamic_nnfw
+GeneratedTests.zeros_like_ex_2D_float
+GeneratedTests.zeros_like_ex_4D_int32
+GeneratedTests.zeros_like_ex_dynamic_float32
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon
new file mode 100644
index 000000000..03bdf0916
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon
@@ -0,0 +1,377 @@
+GeneratedTests.abs_
+GeneratedTests.abs_dynamic_nnfw
+GeneratedTests.add_dynamic_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
+GeneratedTests.argmax_dynamic_nnfw
+GeneratedTests.batch_matmul_ex_dynamic_nnfw
+GeneratedTests.batch_matmul_ex_float_adj_x
+GeneratedTests.batch_matmul_ex_float_adj_y
+GeneratedTests.batch_matmul_ex_float_batch2
+GeneratedTests.batch_matmul_ex_float_broadcast
+GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy
+GeneratedTests.batch_matmul_ex_float_broadcast_adj_x
+GeneratedTests.batch_matmul_ex_float_simple
+GeneratedTests.broadcast_to_ex_1D_nnfw
+GeneratedTests.broadcast_to_ex_2D_nnfw
+GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw
+GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw
+GeneratedTests.cast_dynamic_float32_to_int32_nnfw
+GeneratedTests.cast_float16_to_float16
+GeneratedTests.cast_float16_to_float32
+GeneratedTests.cast_float16_to_float32_relaxed
+GeneratedTests.cast_float16_to_int32
+GeneratedTests.cast_float16_to_quant8
+GeneratedTests.cast_float16_to_quant8_overflow
+GeneratedTests.cast_float32_to_float16
+GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_int32_to_float16
+GeneratedTests.cast_quant8_to_float16
+GeneratedTests.concat_dynamic_nnfw
+GeneratedTests.conv_dynamic_nnfw
+GeneratedTests.conv_float_channels_weights_as_inputs
+GeneratedTests.conv_float_channels_weights_as_inputs_relaxed
+GeneratedTests.conv_float_large_weights_as_inputs
+GeneratedTests.conv_float_large_weights_as_inputs_relaxed
+GeneratedTests.conv_float_weights_as_inputs
+GeneratedTests.conv_float_weights_as_inputs_relaxed
+GeneratedTests.conv_quant8_channels_weights_as_inputs
+GeneratedTests.conv_quant8_large_weights_as_inputs
+GeneratedTests.conv_quant8_overflow_weights_as_inputs
+GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
+GeneratedTests.cos_ex_1D_float_nnfw
+GeneratedTests.cos_ex_4D_float_nnfw
+GeneratedTests.cos_ex_dynamic_nnfw
+GeneratedTests.dequantize_v1_2_3d_quant8_symm
+GeneratedTests.dequantize_v1_2_4d_quant8_symm
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
+GeneratedTests.div_dynamic_nnfw
+GeneratedTests.einsum_ex_float_matmul_2x2_2
+GeneratedTests.einsum_ex_float_matmul_3x2_3
+GeneratedTests.einsum_ex_float_matmul_3x3_4
+GeneratedTests.einsum_ex_float_matmul_4x4_4
+GeneratedTests.einsum_ex_float_matmul_4x4_4_2
+GeneratedTests.equal_boolean
+GeneratedTests.equal_dynamic_float_nnfw
+GeneratedTests.exp_
+GeneratedTests.exp_2D_float_nnfw
+GeneratedTests.exp_dynamic_nnfw
+GeneratedTests.expand_dims_dynamic_nnfw_1
+GeneratedTests.expand_dims_dynamic_nnfw_2
+GeneratedTests.fill_ex_1D_float
+GeneratedTests.fill_ex_4D_float
+GeneratedTests.fill_ex_dynamic_nnfw
+GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
+GeneratedTests.fully_connected_hybrid_1_nnfw
+GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
+GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
+GeneratedTests.gather_dynamic_nnfw
+GeneratedTests.gather_float16
+GeneratedTests.gather_float16_2
+GeneratedTests.gather_float16_3
+GeneratedTests.gather_float16_4
+GeneratedTests.gather_float16_5
+GeneratedTests.gather_float16_6
+GeneratedTests.gather_float16_7
+GeneratedTests.gather_float16_8
+GeneratedTests.greater_dynamic_float_nnfw
+GeneratedTests.greater_equal_boolean
+GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
+GeneratedTests.less_boolean
+GeneratedTests.less_dynamic_float_nnfw
+GeneratedTests.less_equal_dynamic_float_nnfw
+GeneratedTests.log_4D_float_nnfw
+GeneratedTests.log_dynamic_nnfw
+GeneratedTests.log_softmax_nnfw
+GeneratedTests.log_softmax_nnfw_2
+GeneratedTests.log_softmax_nnfw_3
+GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
+GeneratedTests.logical_not
+GeneratedTests.logical_not_1D_nnfw
+GeneratedTests.logical_not_4D_nnfw
+GeneratedTests.logical_not_dynamic_nnfw
+GeneratedTests.logical_or_dynamic_nnfw
+GeneratedTests.logistic_dynamic_nnfw
+GeneratedTests.lsh_projection
+GeneratedTests.lsh_projection_2
+GeneratedTests.lsh_projection_weights_as_inputs
+GeneratedTests.lstm
+GeneratedTests.lstm2
+GeneratedTests.lstm2_state
+GeneratedTests.lstm2_state2
+GeneratedTests.lstm3
+GeneratedTests.lstm3_state
+GeneratedTests.lstm3_state2
+GeneratedTests.lstm3_state3
+GeneratedTests.lstm_state
+GeneratedTests.lstm_state2
+GeneratedTests.matrix_band_part_ex_4D_float
+GeneratedTests.matrix_band_part_ex_dynamic_nnfw
+GeneratedTests.maximum_dynamic_nnfw
+GeneratedTests.minimum_dynamic_nnfw
+GeneratedTests.mul_dynamic_nnfw
+GeneratedTests.neg
+GeneratedTests.neg_dynamic_nnfw
+GeneratedTests.not_equal_boolean
+GeneratedTests.not_equal_dynamic_float_nnfw
+GeneratedTests.one_hot_ex_dynamic_nnfw
+GeneratedTests.pack_ex_dynamic_nnfw
+GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
+GeneratedTests.pow_2D_float_nnfw
+GeneratedTests.pow_broadcast_float_nnfw
+GeneratedTests.pow_broadcast_float_nnfw_2
+GeneratedTests.pow_broadcast_float_nnfw_3
+GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
+GeneratedTests.range_ex_float_1
+GeneratedTests.range_ex_float_1_all_constant_inputs
+GeneratedTests.range_ex_float_1_dynamic_nnfw
+GeneratedTests.range_ex_float_2
+GeneratedTests.range_ex_float_2_dynamic_nnfw
+GeneratedTests.reduce_all
+GeneratedTests.reduce_all_2
+GeneratedTests.reduce_all_2D_nnfw
+GeneratedTests.reduce_all_3
+GeneratedTests.reduce_all_4D_nnfw
+GeneratedTests.reduce_all_dynamic_nnfw
+GeneratedTests.reduce_any
+GeneratedTests.reduce_any_2
+GeneratedTests.reduce_any_2D_nnfw
+GeneratedTests.reduce_any_3
+GeneratedTests.reduce_any_4D_nnfw
+GeneratedTests.reduce_max_2D_int32_nnfw
+GeneratedTests.reduce_max_quant8
+GeneratedTests.reduce_mean_dynamic_1_nnfw
+GeneratedTests.reduce_mean_dynamic_2_nnfw
+GeneratedTests.reduce_min_dynamic_nnfw
+GeneratedTests.reduce_prod
+GeneratedTests.reduce_prod_2
+GeneratedTests.reduce_prod_2D_float_nnfw
+GeneratedTests.reduce_prod_3
+GeneratedTests.reduce_prod_4
+GeneratedTests.reduce_prod_4D_float_nnfw
+GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw
+GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw
+GeneratedTests.reduce_prod_dynamic_1_nnfw
+GeneratedTests.reduce_prod_dynamic_2_nnfw
+GeneratedTests.reduce_sum_dynamic_1_nnfw
+GeneratedTests.reduce_sum_dynamic_2_nnfw
+GeneratedTests.reshape_dynamic_nnfw
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
+GeneratedTests.reverse_ex_1d
+GeneratedTests.reverse_ex_3d
+GeneratedTests.reverse_ex_dynamic_1D
+GeneratedTests.reverse_ex_dynamic_3D
+GeneratedTests.rnn
+GeneratedTests.rnn_state
+GeneratedTests.round_ex_1D_float
+GeneratedTests.round_ex_4D_float
+GeneratedTests.round_ex_dynamic_nnfw
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_dynamic_nnfw
+GeneratedTests.select_v1_2_five_dim
+GeneratedTests.select_v1_2_five_dim_quant8
+GeneratedTests.select_v1_2_one_dim
+GeneratedTests.select_v1_2_one_dim_quant8
+GeneratedTests.select_v1_2_two_dim
+GeneratedTests.select_v1_2_two_dim_quant8
+GeneratedTests.select_v2_ex_broadcast_1d_single_value
+GeneratedTests.select_v2_ex_broadcast_2d_one
+GeneratedTests.select_v2_ex_broadcast_2d_two
+GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw
+GeneratedTests.select_v2_ex_broadcast_less_4d
+GeneratedTests.select_v2_ex_float
+GeneratedTests.shape_ex_dynamic_nnfw
+GeneratedTests.sin_1D_float_nnfw
+GeneratedTests.sin_4D_float_nnfw
+GeneratedTests.sin_dynamic_nnfw
+GeneratedTests.slice
+GeneratedTests.slice_2
+GeneratedTests.slice_3
+GeneratedTests.slice_4
+GeneratedTests.slice_5
+GeneratedTests.slice_6
+GeneratedTests.slice_7
+GeneratedTests.slice_8
+GeneratedTests.slice_dynamic_nnfw
+GeneratedTests.slice_zero_sized
+GeneratedTests.slice_zero_sized_quant8
+GeneratedTests.softmax_dynamic_nnfw
+GeneratedTests.space_to_batch_float_1_nnfw
+GeneratedTests.space_to_batch_float_2
+GeneratedTests.space_to_batch_float_3
+GeneratedTests.space_to_batch_dynamic_float_nnfw
+GeneratedTests.space_to_batch_quant8_1_nnfw
+GeneratedTests.space_to_batch_quant8_2
+GeneratedTests.space_to_batch_quant8_2_nnfw
+GeneratedTests.space_to_batch_quant8_3
+GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_float_5_axis_as_input_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
+GeneratedTests.sqrt_
+GeneratedTests.squared_difference_ex_dynamic_nnfw
+GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
+GeneratedTests.strided_slice_dynamic_nnfw
+GeneratedTests.sub_dynamic_nnfw
+GeneratedTests.sub_v1_2_zero_sized
+GeneratedTests.sub_v1_2_zero_sized_quant8
+GeneratedTests.svdf
+GeneratedTests.svdf2
+GeneratedTests.svdf_bias_present
+GeneratedTests.svdf_state
+GeneratedTests.tanh_v1_2
+GeneratedTests.tanh_v1_2_zero_sized
+GeneratedTests.tanh_v1_2_zero_sized_quant8
+GeneratedTests.tanh_v1_dynamic_nnfw
+GeneratedTests.tile_1
+GeneratedTests.tile_1_dynamic_float32_nnfw
+GeneratedTests.tile_1_float16
+GeneratedTests.tile_1_quant8
+GeneratedTests.tile_2
+GeneratedTests.tile_2_dynamic_float32_nnfw
+GeneratedTests.tile_2_float16
+GeneratedTests.tile_2_int32
+GeneratedTests.tile_2_quant8
+GeneratedTests.tile_3
+GeneratedTests.tile_3_dynamic_float32_nnfw
+GeneratedTests.tile_3_float16
+GeneratedTests.tile_3_int32
+GeneratedTests.tile_3_quant8
+GeneratedTests.topk_v2
+GeneratedTests.topk_v2_1D_float_nnfw
+GeneratedTests.topk_v2_1D_int32_nnfw
+GeneratedTests.topk_v2_1D_quant8_nnfw
+GeneratedTests.topk_v2_2
+GeneratedTests.topk_v2_2D_float_nnfw
+GeneratedTests.topk_v2_2D_int32_nnfw
+GeneratedTests.topk_v2_2D_quant8_nnfw
+GeneratedTests.topk_v2_3
+GeneratedTests.topk_v2_4
+GeneratedTests.topk_v2_5
+GeneratedTests.topk_v2_6
+GeneratedTests.transpose_dynamic_nnfw
+GeneratedTests.transpose_float_1_perms_as_input_nnfw
+GeneratedTests.transpose_v1_2_zero_sized
+GeneratedTests.transpose_v1_2_zero_sized_quant8
+GeneratedTests.unidirectional_sequence_lstm_1step
+GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
+GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
+GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
+GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
+GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
+GeneratedTests.unpack_ex_dynamic_nnfw
+GeneratedTests.zeros_like_ex_2D_float
+GeneratedTests.zeros_like_ex_4D_int32
+GeneratedTests.zeros_like_ex_dynamic_float32
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-android.cpu b/tests/nnapi/nnapi_gtest.skip.aarch64-android.cpu
new file mode 100644
index 000000000..a64ffca04
--- /dev/null
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-android.cpu
@@ -0,0 +1,231 @@
+GeneratedTests.abs_
+GeneratedTests.cast_float16_to_float16
+GeneratedTests.cast_float16_to_float32
+GeneratedTests.cast_float16_to_float32_relaxed
+GeneratedTests.cast_float16_to_int32
+GeneratedTests.cast_float16_to_quant8
+GeneratedTests.cast_float16_to_quant8_overflow
+GeneratedTests.cast_float32_to_float16
+GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_float32_to_quant8_overflow
+GeneratedTests.cast_float32_to_quant8_overflow_relaxed
+GeneratedTests.cast_int32_to_float16
+GeneratedTests.cast_int32_to_quant8_overflow
+GeneratedTests.cast_quant8_to_float16
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
+GeneratedTests.embedding_lookup
+GeneratedTests.embedding_lookup_2d_nnfw
+GeneratedTests.embedding_lookup_4d_nnfw
+GeneratedTests.equal_broadcast_float_nnfw
+GeneratedTests.exp_
+GeneratedTests.floor_
+GeneratedTests.gather_float16
+GeneratedTests.gather_float16_2
+GeneratedTests.gather_float16_3
+GeneratedTests.gather_float16_4
+GeneratedTests.gather_float16_5
+GeneratedTests.gather_float16_6
+GeneratedTests.gather_float16_7
+GeneratedTests.gather_float16_8
+GeneratedTests.hashtable_lookup_float
+GeneratedTests.hashtable_lookup_float_4D_nnfw
+GeneratedTests.hashtable_lookup_quant8
+GeneratedTests.l2_pool_float
+GeneratedTests.l2_pool_float_2
+GeneratedTests.l2_pool_float_large
+GeneratedTests.local_response_norm_float_1
+GeneratedTests.local_response_norm_float_2
+GeneratedTests.local_response_norm_float_3
+GeneratedTests.local_response_norm_float_4
+GeneratedTests.logical_not
+GeneratedTests.lsh_projection
+GeneratedTests.lsh_projection_2
+GeneratedTests.lsh_projection_weights_as_inputs
+GeneratedTests.lstm2
+GeneratedTests.lstm2_state
+GeneratedTests.lstm2_state2
+GeneratedTests.maximum_broadcast_quant8
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant8
+GeneratedTests.minimum_broadcast_quant8
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant8
+GeneratedTests.neg
+GeneratedTests.neg_3D_int_nnfw
+GeneratedTests.neg_4D_int_nnfw
+GeneratedTests.prelu
+GeneratedTests.prelu_broadcast_float_1_nnfw
+GeneratedTests.prelu_broadcast_quant8_1_nnfw
+GeneratedTests.prelu_float_1_nnfw
+GeneratedTests.prelu_quant8
+GeneratedTests.prelu_quant8_1_nnfw
+GeneratedTests.prelu_quant8_2
+GeneratedTests.prelu_quant8_3
+GeneratedTests.prelu_quant8_4
+GeneratedTests.prelu_weight_as_input
+GeneratedTests.prelu_weight_as_input_quant8
+GeneratedTests.prelu_weight_as_input_quant8_2
+GeneratedTests.prelu_weight_as_input_quant8_3
+GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
+GeneratedTests.reduce_max_quant8
+GeneratedTests.reduce_max_quant8_1_nnfw
+GeneratedTests.reduce_max_quant8_2
+GeneratedTests.reduce_max_quant8_2_nnfw
+GeneratedTests.reduce_max_quant8_3
+GeneratedTests.reduce_max_quant8_4
+GeneratedTests.reduce_min_quant8
+GeneratedTests.reduce_min_quant8_2
+GeneratedTests.reduce_min_quant8_3
+GeneratedTests.reduce_min_quant8_4
+GeneratedTests.relu1_float_1
+GeneratedTests.relu1_float_2
+GeneratedTests.relu1_quant8_1
+GeneratedTests.relu1_quant8_2
+GeneratedTests.relu6_quant8_1
+GeneratedTests.relu6_quant8_2
+GeneratedTests.relu_quant8_1
+GeneratedTests.relu_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
+GeneratedTests.rnn
+GeneratedTests.rnn_state
+GeneratedTests.rsqrt
+GeneratedTests.select_v1_2_five_dim
+GeneratedTests.select_v1_2_five_dim_quant8
+GeneratedTests.select_v1_2_one_dim_quant8
+GeneratedTests.select_v1_2_two_dim_quant8
+GeneratedTests.slice_5
+GeneratedTests.slice_6
+GeneratedTests.slice_8
+GeneratedTests.slice_zero_sized
+GeneratedTests.slice_zero_sized_quant8
+GeneratedTests.sqrt_
+GeneratedTests.sqrt_1D_float_nnfw
+GeneratedTests.sqrt_2D_float_nnfw
+GeneratedTests.sqrt_3D_float_nnfw
+GeneratedTests.sqrt_4D_float_nnfw
+GeneratedTests.strided_slice_qaunt8_10
+GeneratedTests.strided_slice_qaunt8_11
+GeneratedTests.strided_slice_quant8_1
+GeneratedTests.strided_slice_quant8_2
+GeneratedTests.strided_slice_quant8_3
+GeneratedTests.strided_slice_quant8_4
+GeneratedTests.strided_slice_quant8_5
+GeneratedTests.strided_slice_quant8_6
+GeneratedTests.strided_slice_quant8_7
+GeneratedTests.strided_slice_quant8_8
+GeneratedTests.strided_slice_quant8_9
+GeneratedTests.sub_v1_2_zero_sized
+GeneratedTests.sub_v1_2_zero_sized_quant8
+GeneratedTests.svdf
+GeneratedTests.svdf2
+GeneratedTests.svdf_bias_present
+GeneratedTests.svdf_state
+GeneratedTests.tanh_v1_2
+GeneratedTests.tanh_v1_2_zero_sized
+GeneratedTests.tanh_v1_2_zero_sized_quant8
+GeneratedTests.tile_1_float16
+GeneratedTests.tile_1_quant8
+GeneratedTests.tile_2_float16
+GeneratedTests.tile_2_int32
+GeneratedTests.tile_2_quant8
+GeneratedTests.tile_3_float16
+GeneratedTests.tile_3_int32
+GeneratedTests.tile_3_quant8
+GeneratedTests.topk_v2
+GeneratedTests.topk_v2_1D_float_nnfw
+GeneratedTests.topk_v2_1D_int32_nnfw
+GeneratedTests.topk_v2_1D_quant8_nnfw
+GeneratedTests.topk_v2_2
+GeneratedTests.topk_v2_2D_float_nnfw
+GeneratedTests.topk_v2_2D_int32_nnfw
+GeneratedTests.topk_v2_2D_quant8_nnfw
+GeneratedTests.topk_v2_3
+GeneratedTests.topk_v2_4
+GeneratedTests.topk_v2_5
+GeneratedTests.topk_v2_6
+GeneratedTests.transpose_conv_ex_float_1
+GeneratedTests.transpose_conv_ex_float_2
+GeneratedTests.transpose_conv_ex_float_3
+GeneratedTests.transpose_conv_ex_float_4
+GeneratedTests.transpose_v1_2_zero_sized
+GeneratedTests.transpose_v1_2_zero_sized_quant8
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
index 984dbfa2a..4e4d68871 100644
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
@@ -1,6 +1,8 @@
 GeneratedTests.abs_
 GeneratedTests.abs_dynamic_nnfw
 GeneratedTests.add_dynamic_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
 GeneratedTests.argmax_dynamic_nnfw
 GeneratedTests.batch_matmul_ex_dynamic_nnfw
 GeneratedTests.batch_matmul_ex_float_adj_x
@@ -49,6 +51,13 @@ GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
 GeneratedTests.cos_ex_1D_float_nnfw
 GeneratedTests.cos_ex_4D_float_nnfw
 GeneratedTests.cos_ex_dynamic_nnfw
+GeneratedTests.dequantize_v1_2_3d_quant8_symm
+GeneratedTests.dequantize_v1_2_4d_quant8_symm
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
 GeneratedTests.einsum_ex_float_matmul_2x2_2
 GeneratedTests.einsum_ex_float_matmul_3x2_3
@@ -58,6 +67,8 @@ GeneratedTests.einsum_ex_float_matmul_4x4_4_2
 GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
 GeneratedTests.exp_dynamic_nnfw
+GeneratedTests.expand_dims_dynamic_nnfw_1
+GeneratedTests.expand_dims_dynamic_nnfw_2
 GeneratedTests.fill_ex_1D_float
 GeneratedTests.fill_ex_4D_float
 GeneratedTests.fill_ex_dynamic_nnfw
@@ -92,6 +103,7 @@ GeneratedTests.logical_not_1D_nnfw
 GeneratedTests.logical_not_4D_nnfw
 GeneratedTests.logical_not_dynamic_nnfw
 GeneratedTests.logical_or_broadcast
+GeneratedTests.logical_or_dynamic_nnfw
 GeneratedTests.logistic_dynamic_nnfw
 GeneratedTests.lsh_projection
 GeneratedTests.lsh_projection_2
@@ -110,6 +122,7 @@ GeneratedTests.matrix_band_part_ex_4D_float
 GeneratedTests.matrix_band_part_ex_dynamic_nnfw
 GeneratedTests.maximum_dynamic_nnfw
 GeneratedTests.minimum_dynamic_nnfw
+GeneratedTests.minimum_int32
 GeneratedTests.mul_dynamic_nnfw
 GeneratedTests.neg
 GeneratedTests.neg_dynamic_nnfw
@@ -169,6 +182,30 @@ GeneratedTests.reduce_prod_dynamic_2_nnfw
 GeneratedTests.reduce_sum_dynamic_1_nnfw
 GeneratedTests.reduce_sum_dynamic_2_nnfw
 GeneratedTests.reshape_dynamic_nnfw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
 GeneratedTests.reverse_ex_1d
 GeneratedTests.reverse_ex_3d
 GeneratedTests.reverse_ex_dynamic_1D
@@ -210,6 +247,7 @@ GeneratedTests.slice_zero_sized_quant8
 GeneratedTests.softmax_dynamic_nnfw
 GeneratedTests.space_to_batch_dynamic_float_nnfw
 GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_float_5_axis_as_input_nnfw
 GeneratedTests.split_v_ex_1D_float_1_nnfw
 GeneratedTests.split_v_ex_1D_float_2_nnfw
 GeneratedTests.split_v_ex_1D_int32_nnfw
@@ -252,10 +290,15 @@ GeneratedTests.tile_3_float16
 GeneratedTests.tile_3_int32
 GeneratedTests.tile_3_quant8
 GeneratedTests.transpose_dynamic_nnfw
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
+GeneratedTests.transpose_float_1_perms_as_input_nnfw
 GeneratedTests.transpose_v1_2_zero_sized
 GeneratedTests.transpose_v1_2_zero_sized_quant8
+GeneratedTests.unidirectional_sequence_lstm_1step
+GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
+GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
+GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
+GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
+GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
 GeneratedTests.unpack_ex_dynamic_nnfw
 GeneratedTests.zeros_like_ex_2D_float
 GeneratedTests.zeros_like_ex_4D_int32
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
index a7bedf14b..03bdf0916 100644
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
@@ -1,6 +1,8 @@
 GeneratedTests.abs_
 GeneratedTests.abs_dynamic_nnfw
 GeneratedTests.add_dynamic_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
 GeneratedTests.argmax_dynamic_nnfw
 GeneratedTests.batch_matmul_ex_dynamic_nnfw
 GeneratedTests.batch_matmul_ex_float_adj_x
@@ -48,6 +50,13 @@ GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
 GeneratedTests.cos_ex_1D_float_nnfw
 GeneratedTests.cos_ex_4D_float_nnfw
 GeneratedTests.cos_ex_dynamic_nnfw
+GeneratedTests.dequantize_v1_2_3d_quant8_symm
+GeneratedTests.dequantize_v1_2_4d_quant8_symm
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
 GeneratedTests.einsum_ex_float_matmul_2x2_2
 GeneratedTests.einsum_ex_float_matmul_3x2_3
@@ -59,10 +68,13 @@ GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
 GeneratedTests.exp_2D_float_nnfw
 GeneratedTests.exp_dynamic_nnfw
+GeneratedTests.expand_dims_dynamic_nnfw_1
+GeneratedTests.expand_dims_dynamic_nnfw_2
 GeneratedTests.fill_ex_1D_float
 GeneratedTests.fill_ex_4D_float
 GeneratedTests.fill_ex_dynamic_nnfw
 GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
 GeneratedTests.fully_connected_hybrid_1_nnfw
 GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
 GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
@@ -94,6 +106,7 @@ GeneratedTests.logical_not
 GeneratedTests.logical_not_1D_nnfw
 GeneratedTests.logical_not_4D_nnfw
 GeneratedTests.logical_not_dynamic_nnfw
+GeneratedTests.logical_or_dynamic_nnfw
 GeneratedTests.logistic_dynamic_nnfw
 GeneratedTests.lsh_projection
 GeneratedTests.lsh_projection_2
@@ -174,6 +187,78 @@ GeneratedTests.reduce_prod_dynamic_2_nnfw
 GeneratedTests.reduce_sum_dynamic_1_nnfw
 GeneratedTests.reduce_sum_dynamic_2_nnfw
 GeneratedTests.reshape_dynamic_nnfw
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
 GeneratedTests.reverse_ex_1d
 GeneratedTests.reverse_ex_3d
 GeneratedTests.reverse_ex_dynamic_1D
@@ -222,6 +307,7 @@ GeneratedTests.space_to_batch_quant8_2
 GeneratedTests.space_to_batch_quant8_2_nnfw
 GeneratedTests.space_to_batch_quant8_3
 GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_float_5_axis_as_input_nnfw
 GeneratedTests.split_v_ex_1D_float_1_nnfw
 GeneratedTests.split_v_ex_1D_float_2_nnfw
 GeneratedTests.split_v_ex_1D_int32_nnfw
@@ -276,10 +362,15 @@ GeneratedTests.topk_v2_4
 GeneratedTests.topk_v2_5
 GeneratedTests.topk_v2_6
 GeneratedTests.transpose_dynamic_nnfw
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
+GeneratedTests.transpose_float_1_perms_as_input_nnfw
 GeneratedTests.transpose_v1_2_zero_sized
 GeneratedTests.transpose_v1_2_zero_sized_quant8
+GeneratedTests.unidirectional_sequence_lstm_1step
+GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
+GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
+GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
+GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
+GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
 GeneratedTests.unpack_ex_dynamic_nnfw
 GeneratedTests.zeros_like_ex_2D_float
 GeneratedTests.zeros_like_ex_4D_int32
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu
index e98007e08..a64ffca04 100644
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu
@@ -12,12 +12,11 @@ GeneratedTests.cast_float32_to_quant8_overflow_relaxed
 GeneratedTests.cast_int32_to_float16
 GeneratedTests.cast_int32_to_quant8_overflow
 GeneratedTests.cast_quant8_to_float16
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
-GeneratedTests.dequantize
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.embedding_lookup
 GeneratedTests.embedding_lookup_2d_nnfw
 GeneratedTests.embedding_lookup_4d_nnfw
@@ -42,28 +41,13 @@ GeneratedTests.local_response_norm_float_1
 GeneratedTests.local_response_norm_float_2
 GeneratedTests.local_response_norm_float_3
 GeneratedTests.local_response_norm_float_4
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
 GeneratedTests.logical_not
 GeneratedTests.lsh_projection
 GeneratedTests.lsh_projection_2
 GeneratedTests.lsh_projection_weights_as_inputs
-GeneratedTests.lstm
 GeneratedTests.lstm2
 GeneratedTests.lstm2_state
 GeneratedTests.lstm2_state2
-GeneratedTests.lstm3
-GeneratedTests.lstm3_state
-GeneratedTests.lstm3_state2
-GeneratedTests.lstm3_state3
-GeneratedTests.lstm_state
-GeneratedTests.lstm_state2
 GeneratedTests.maximum_broadcast_quant8
 GeneratedTests.maximum_overflow
 GeneratedTests.maximum_simple_quant8
@@ -110,6 +94,78 @@ GeneratedTests.relu6_quant8_1
 GeneratedTests.relu6_quant8_2
 GeneratedTests.relu_quant8_1
 GeneratedTests.relu_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
 GeneratedTests.rnn
 GeneratedTests.rnn_state
 GeneratedTests.rsqrt
@@ -171,7 +227,5 @@ GeneratedTests.transpose_conv_ex_float_1
 GeneratedTests.transpose_conv_ex_float_2
 GeneratedTests.transpose_conv_ex_float_3
 GeneratedTests.transpose_conv_ex_float_4
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
 GeneratedTests.transpose_v1_2_zero_sized
 GeneratedTests.transpose_v1_2_zero_sized_quant8
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
index 984dbfa2a..854d6ac80 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
@@ -1,6 +1,8 @@
 GeneratedTests.abs_
 GeneratedTests.abs_dynamic_nnfw
 GeneratedTests.add_dynamic_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
 GeneratedTests.argmax_dynamic_nnfw
 GeneratedTests.batch_matmul_ex_dynamic_nnfw
 GeneratedTests.batch_matmul_ex_float_adj_x
@@ -49,6 +51,13 @@ GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
 GeneratedTests.cos_ex_1D_float_nnfw
 GeneratedTests.cos_ex_4D_float_nnfw
 GeneratedTests.cos_ex_dynamic_nnfw
+GeneratedTests.dequantize_v1_2_3d_quant8_symm
+GeneratedTests.dequantize_v1_2_4d_quant8_symm
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
 GeneratedTests.einsum_ex_float_matmul_2x2_2
 GeneratedTests.einsum_ex_float_matmul_3x2_3
@@ -58,6 +67,8 @@ GeneratedTests.einsum_ex_float_matmul_4x4_4_2
 GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
 GeneratedTests.exp_dynamic_nnfw
+GeneratedTests.expand_dims_dynamic_nnfw_1
+GeneratedTests.expand_dims_dynamic_nnfw_2
 GeneratedTests.fill_ex_1D_float
 GeneratedTests.fill_ex_4D_float
 GeneratedTests.fill_ex_dynamic_nnfw
@@ -92,6 +103,7 @@ GeneratedTests.logical_not_1D_nnfw
 GeneratedTests.logical_not_4D_nnfw
 GeneratedTests.logical_not_dynamic_nnfw
 GeneratedTests.logical_or_broadcast
+GeneratedTests.logical_or_dynamic_nnfw
 GeneratedTests.logistic_dynamic_nnfw
 GeneratedTests.lsh_projection
 GeneratedTests.lsh_projection_2
@@ -110,6 +122,7 @@ GeneratedTests.matrix_band_part_ex_4D_float
 GeneratedTests.matrix_band_part_ex_dynamic_nnfw
 GeneratedTests.maximum_dynamic_nnfw
 GeneratedTests.minimum_dynamic_nnfw
+GeneratedTests.minimum_int32
 GeneratedTests.mul_dynamic_nnfw
 GeneratedTests.neg
 GeneratedTests.neg_dynamic_nnfw
@@ -169,6 +182,32 @@ GeneratedTests.reduce_prod_dynamic_2_nnfw
 GeneratedTests.reduce_sum_dynamic_1_nnfw
 GeneratedTests.reduce_sum_dynamic_2_nnfw
 GeneratedTests.reshape_dynamic_nnfw
+GeneratedTests.reshape_quant8_weights_as_inputs
+GeneratedTests.reshape_weights_as_inputs
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
 GeneratedTests.reverse_ex_1d
 GeneratedTests.reverse_ex_3d
 GeneratedTests.reverse_ex_dynamic_1D
@@ -210,6 +249,7 @@ GeneratedTests.slice_zero_sized_quant8
 GeneratedTests.softmax_dynamic_nnfw
 GeneratedTests.space_to_batch_dynamic_float_nnfw
 GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_float_5_axis_as_input_nnfw
 GeneratedTests.split_v_ex_1D_float_1_nnfw
 GeneratedTests.split_v_ex_1D_float_2_nnfw
 GeneratedTests.split_v_ex_1D_int32_nnfw
@@ -252,10 +292,15 @@ GeneratedTests.tile_3_float16
 GeneratedTests.tile_3_int32
 GeneratedTests.tile_3_quant8
 GeneratedTests.transpose_dynamic_nnfw
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
+GeneratedTests.transpose_float_1_perms_as_input_nnfw
 GeneratedTests.transpose_v1_2_zero_sized
 GeneratedTests.transpose_v1_2_zero_sized_quant8
+GeneratedTests.unidirectional_sequence_lstm_1step
+GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
+GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
+GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
+GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
+GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
 GeneratedTests.unpack_ex_dynamic_nnfw
 GeneratedTests.zeros_like_ex_2D_float
 GeneratedTests.zeros_like_ex_4D_int32
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
index 036c869c6..a3320998a 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -1,6 +1,8 @@
 GeneratedTests.abs_
 GeneratedTests.abs_dynamic_nnfw
 GeneratedTests.add_dynamic_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
 GeneratedTests.argmax_dynamic_nnfw
 GeneratedTests.batch_matmul_ex_dynamic_nnfw
 GeneratedTests.batch_matmul_ex_float_adj_x
@@ -48,6 +50,13 @@ GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
 GeneratedTests.cos_ex_1D_float_nnfw
 GeneratedTests.cos_ex_4D_float_nnfw
 GeneratedTests.cos_ex_dynamic_nnfw
+GeneratedTests.dequantize_v1_2_3d_quant8_symm
+GeneratedTests.dequantize_v1_2_4d_quant8_symm
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
 GeneratedTests.einsum_ex_float_matmul_2x2_2
 GeneratedTests.einsum_ex_float_matmul_3x2_3
@@ -59,10 +68,13 @@ GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
 GeneratedTests.exp_2D_float_nnfw
 GeneratedTests.exp_dynamic_nnfw
+GeneratedTests.expand_dims_dynamic_nnfw_1
+GeneratedTests.expand_dims_dynamic_nnfw_2
 GeneratedTests.fill_ex_1D_float
 GeneratedTests.fill_ex_4D_float
 GeneratedTests.fill_ex_dynamic_nnfw
 GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
 GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
 GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
 GeneratedTests.gather_dynamic_nnfw
@@ -93,6 +105,7 @@ GeneratedTests.logical_not
 GeneratedTests.logical_not_1D_nnfw
 GeneratedTests.logical_not_4D_nnfw
 GeneratedTests.logical_not_dynamic_nnfw
+GeneratedTests.logical_or_dynamic_nnfw
 GeneratedTests.logistic_dynamic_nnfw
 GeneratedTests.lsh_projection
 GeneratedTests.lsh_projection_2
@@ -173,6 +186,80 @@ GeneratedTests.reduce_prod_dynamic_2_nnfw
 GeneratedTests.reduce_sum_dynamic_1_nnfw
 GeneratedTests.reduce_sum_dynamic_2_nnfw
 GeneratedTests.reshape_dynamic_nnfw
+GeneratedTests.reshape_quant8_weights_as_inputs
+GeneratedTests.reshape_weights_as_inputs
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
 GeneratedTests.reverse_ex_1d
 GeneratedTests.reverse_ex_3d
 GeneratedTests.reverse_ex_dynamic_1D
@@ -214,6 +301,7 @@ GeneratedTests.slice_zero_sized_quant8
 GeneratedTests.softmax_dynamic_nnfw
 GeneratedTests.space_to_batch_dynamic_float_nnfw
 GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_float_5_axis_as_input_nnfw
 GeneratedTests.split_v_ex_1D_float_1_nnfw
 GeneratedTests.split_v_ex_1D_float_2_nnfw
 GeneratedTests.split_v_ex_1D_int32_nnfw
@@ -268,10 +356,15 @@ GeneratedTests.topk_v2_4
 GeneratedTests.topk_v2_5
 GeneratedTests.topk_v2_6
 GeneratedTests.transpose_dynamic_nnfw
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
+GeneratedTests.transpose_float_1_perms_as_input_nnfw
 GeneratedTests.transpose_v1_2_zero_sized
 GeneratedTests.transpose_v1_2_zero_sized_quant8
+GeneratedTests.unidirectional_sequence_lstm_1step
+GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
+GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
+GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
+GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
+GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
 GeneratedTests.unpack_ex_dynamic_nnfw
 GeneratedTests.zeros_like_ex_2D_float
 GeneratedTests.zeros_like_ex_4D_int32
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
index e98007e08..a64ffca04 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
@@ -12,12 +12,11 @@ GeneratedTests.cast_float32_to_quant8_overflow_relaxed
 GeneratedTests.cast_int32_to_float16
 GeneratedTests.cast_int32_to_quant8_overflow
 GeneratedTests.cast_quant8_to_float16
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
-GeneratedTests.dequantize
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.embedding_lookup
 GeneratedTests.embedding_lookup_2d_nnfw
 GeneratedTests.embedding_lookup_4d_nnfw
@@ -42,28 +41,13 @@ GeneratedTests.local_response_norm_float_1
 GeneratedTests.local_response_norm_float_2
 GeneratedTests.local_response_norm_float_3
 GeneratedTests.local_response_norm_float_4
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
 GeneratedTests.logical_not
 GeneratedTests.lsh_projection
 GeneratedTests.lsh_projection_2
 GeneratedTests.lsh_projection_weights_as_inputs
-GeneratedTests.lstm
 GeneratedTests.lstm2
 GeneratedTests.lstm2_state
 GeneratedTests.lstm2_state2
-GeneratedTests.lstm3
-GeneratedTests.lstm3_state
-GeneratedTests.lstm3_state2
-GeneratedTests.lstm3_state3
-GeneratedTests.lstm_state
-GeneratedTests.lstm_state2
 GeneratedTests.maximum_broadcast_quant8
 GeneratedTests.maximum_overflow
 GeneratedTests.maximum_simple_quant8
@@ -110,6 +94,78 @@ GeneratedTests.relu6_quant8_1
 GeneratedTests.relu6_quant8_2
 GeneratedTests.relu_quant8_1
 GeneratedTests.relu_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
 GeneratedTests.rnn
 GeneratedTests.rnn_state
 GeneratedTests.rsqrt
@@ -171,7 +227,5 @@ GeneratedTests.transpose_conv_ex_float_1
 GeneratedTests.transpose_conv_ex_float_2
 GeneratedTests.transpose_conv_ex_float_3
 GeneratedTests.transpose_conv_ex_float_4
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
 GeneratedTests.transpose_v1_2_zero_sized
 GeneratedTests.transpose_v1_2_zero_sized_quant8
diff --git a/tests/nnapi/nnapi_gtest.skip.noarch.interp b/tests/nnapi/nnapi_gtest.skip.noarch.interp
deleted file mode 100644
index 9e51e759e..000000000
--- a/tests/nnapi/nnapi_gtest.skip.noarch.interp
+++ /dev/null
@@ -1,641 +0,0 @@
-GeneratedTests.abs_
-GeneratedTests.abs_1D_float_nnfw
-GeneratedTests.abs_2D_float_nnfw
-GeneratedTests.abs_3D_float_nnfw
-GeneratedTests.abs_4D_float_nnfw
-GeneratedTests.abs_dynamic_nnfw
-GeneratedTests.add_broadcast_quant8
-GeneratedTests.add_dynamic_nnfw
-GeneratedTests.add_quant8
-GeneratedTests.argmax_1
-GeneratedTests.argmax_1_quant8
-GeneratedTests.argmax_2
-GeneratedTests.argmax_2_quant8
-GeneratedTests.argmax_3
-GeneratedTests.argmax_3_quant8
-GeneratedTests.argmax_dynamic_nnfw
-GeneratedTests.argmax_float_1_nnfw
-GeneratedTests.argmax_float_2_nnfw
-GeneratedTests.argmax_int32_nnfw
-GeneratedTests.argmax_neg_axis_float_nnfw
-GeneratedTests.argmax_neg_axis_int32_nnfw
-GeneratedTests.argmax_quant8_neg_axis_nnfw
-GeneratedTests.argmax_quant8_nnfw
-GeneratedTests.avg_pool_quant8_1
-GeneratedTests.avg_pool_quant8_2
-GeneratedTests.avg_pool_quant8_3
-GeneratedTests.avg_pool_quant8_4
-GeneratedTests.avg_pool_quant8_5
-GeneratedTests.batch_matmul_ex_dynamic_nnfw
-GeneratedTests.batch_matmul_ex_float_adj_x
-GeneratedTests.batch_matmul_ex_float_adj_y
-GeneratedTests.batch_matmul_ex_float_batch2
-GeneratedTests.batch_matmul_ex_float_broadcast
-GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy
-GeneratedTests.batch_matmul_ex_float_broadcast_adj_x
-GeneratedTests.batch_matmul_ex_float_simple
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
-GeneratedTests.broadcast_to_ex_1D_nnfw
-GeneratedTests.broadcast_to_ex_2D_nnfw
-GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw
-GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw
-GeneratedTests.cast_dynamic_float32_to_int32_nnfw
-GeneratedTests.cast_float16_to_float16
-GeneratedTests.cast_float16_to_float32
-GeneratedTests.cast_float16_to_float32_relaxed
-GeneratedTests.cast_float16_to_int32
-GeneratedTests.cast_float16_to_quant8
-GeneratedTests.cast_float16_to_quant8_overflow
-GeneratedTests.cast_float32_to_float16
-GeneratedTests.cast_float32_to_float16_relaxed
-GeneratedTests.cast_float32_to_float32
-GeneratedTests.cast_float32_to_float32_relaxed
-GeneratedTests.cast_float32_to_int32
-GeneratedTests.cast_float32_to_int32_nnfw
-GeneratedTests.cast_float32_to_int32_relaxed
-GeneratedTests.cast_float32_to_quant8
-GeneratedTests.cast_float32_to_quant8_overflow
-GeneratedTests.cast_float32_to_quant8_overflow_relaxed
-GeneratedTests.cast_float32_to_quant8_relaxed
-GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_float32
-GeneratedTests.cast_int32_to_float32_nnfw
-GeneratedTests.cast_int32_to_float32_relaxed
-GeneratedTests.cast_int32_to_int32
-GeneratedTests.cast_int32_to_quant8
-GeneratedTests.cast_int32_to_quant8_overflow
-GeneratedTests.cast_quant8_to_float16
-GeneratedTests.cast_quant8_to_float32
-GeneratedTests.cast_quant8_to_float32_relaxed
-GeneratedTests.cast_quant8_to_int32
-GeneratedTests.cast_quant8_to_quant8
-GeneratedTests.concat_dynamic_nnfw
-GeneratedTests.concat_quant8_1
-GeneratedTests.concat_quant8_2
-GeneratedTests.concat_quant8_3
-GeneratedTests.conv_dynamic_nnfw
-GeneratedTests.conv_quant8
-GeneratedTests.conv_quant8_2
-GeneratedTests.conv_quant8_channels
-GeneratedTests.conv_quant8_channels_weights_as_inputs
-GeneratedTests.conv_quant8_large
-GeneratedTests.conv_quant8_large_weights_as_inputs
-GeneratedTests.conv_quant8_overflow
-GeneratedTests.conv_quant8_overflow_weights_as_inputs
-GeneratedTests.conv_quant8_weights_as_inputs
-GeneratedTests.conv2d_dilation_nnfw
-GeneratedTests.conv2d_dilation_nnfw_quant8
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
-GeneratedTests.conv2d_dilation_nnfw_2
-GeneratedTests.conv2d_dilation_nnfw_quant8_2
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
-GeneratedTests.cos_ex_1D_float_nnfw
-GeneratedTests.cos_ex_4D_float_nnfw
-GeneratedTests.cos_ex_dynamic_nnfw
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
-GeneratedTests.depthwise_conv2d_quant8
-GeneratedTests.depthwise_conv2d_quant8_2
-GeneratedTests.depthwise_conv2d_quant8_large
-GeneratedTests.depthwise_conv2d_quant8_large_weights_as_inputs
-GeneratedTests.depthwise_conv2d_quant8_weights_as_inputs
-GeneratedTests.dequantize
-GeneratedTests.div_
-GeneratedTests.div_broadcast_float
-GeneratedTests.div_broadcast_float_4D_2D_nnfw
-GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
-GeneratedTests.embedding_lookup
-GeneratedTests.embedding_lookup_2d_nnfw
-GeneratedTests.embedding_lookup_4d_nnfw
-GeneratedTests.equal_1D_float_nnfw
-GeneratedTests.equal_4D_float_nnfw
-GeneratedTests.equal_boolean
-GeneratedTests.equal_broadcast
-GeneratedTests.equal_broadcast_4D_2D_float_nnfw
-GeneratedTests.equal_broadcast_float_nnfw
-GeneratedTests.equal_broadcast_quant8_nnfw
-GeneratedTests.equal_dynamic_float_nnfw
-GeneratedTests.equal_quant8_nnfw
-GeneratedTests.equal_quantized_different_scale
-GeneratedTests.equal_quantized_different_zero_point
-GeneratedTests.equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.equal_simple
-GeneratedTests.exp_
-GeneratedTests.exp_1D_float_nnfw
-GeneratedTests.exp_2D_float_nnfw
-GeneratedTests.exp_3D_float_nnfw
-GeneratedTests.exp_4D_float_nnfw
-GeneratedTests.exp_dynamic_nnfw
-GeneratedTests.expand_dims
-GeneratedTests.expand_dims_2
-GeneratedTests.expand_dims_3
-GeneratedTests.expand_dims_4
-GeneratedTests.expand_dims_dynamic_nnfw_1
-GeneratedTests.expand_dims_dynamic_nnfw_2
-GeneratedTests.expand_dims_int32
-GeneratedTests.expand_dims_int32_2
-GeneratedTests.expand_dims_int32_3
-GeneratedTests.expand_dims_int32_4
-GeneratedTests.expand_dims_quant8
-GeneratedTests.expand_dims_quant8_2
-GeneratedTests.expand_dims_quant8_3
-GeneratedTests.expand_dims_quant8_4
-GeneratedTests.fill_ex_1D_float
-GeneratedTests.fill_ex_4D_float
-GeneratedTests.fill_ex_dynamic_nnfw
-GeneratedTests.floor_
-GeneratedTests.fully_connected_dynamic_nnfw
-GeneratedTests.fully_connected_hybrid_1_nnfw
-GeneratedTests.fully_connected_hybrid_2_nnfw
-GeneratedTests.fully_connected_quant8
-GeneratedTests.fully_connected_quant8_2
-GeneratedTests.fully_connected_quant8_large
-GeneratedTests.fully_connected_quant8_large_weights_as_inputs
-GeneratedTests.fully_connected_quant8_weights_as_inputs
-GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
-GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
-GeneratedTests.gather_dynamic_nnfw
-GeneratedTests.gather_float16
-GeneratedTests.gather_float16_2
-GeneratedTests.gather_float16_3
-GeneratedTests.gather_float16_4
-GeneratedTests.gather_float16_5
-GeneratedTests.gather_float16_6
-GeneratedTests.gather_float16_7
-GeneratedTests.gather_float16_8
-GeneratedTests.greater_broadcast_quant8_nnfw
-GeneratedTests.greater_dynamic_float_nnfw
-GeneratedTests.greater_equal_boolean
-GeneratedTests.greater_equal_broadcast
-GeneratedTests.greater_equal_broadcast_quant8_nnfw
-GeneratedTests.greater_equal_dynamic_float_nnfw
-GeneratedTests.greater_equal_nnfw
-GeneratedTests.greater_equal_quant8_nnfw
-GeneratedTests.greater_equal_quantized_different_scale
-GeneratedTests.greater_equal_quantized_different_zero_point
-GeneratedTests.greater_equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.greater_equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.greater_equal_simple
-GeneratedTests.greater_quant8_nnfw
-GeneratedTests.hashtable_lookup_float
-GeneratedTests.hashtable_lookup_float_4D_nnfw
-GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
-GeneratedTests.l2_normalization_quant8_nnfw
-GeneratedTests.l2_pool_float
-GeneratedTests.l2_pool_float_2
-GeneratedTests.l2_pool_float_large
-GeneratedTests.less_boolean
-GeneratedTests.less_broadcast
-GeneratedTests.less_broadcast_quant8_nnfw
-GeneratedTests.less_dynamic_float_nnfw
-GeneratedTests.less_equal_broadcast_quant8_nnfw
-GeneratedTests.less_equal_dynamic_float_nnfw
-GeneratedTests.less_equal_quant8_nnfw
-GeneratedTests.less_nnfw
-GeneratedTests.less_quant8_nnfw
-GeneratedTests.less_quantized_different_scale
-GeneratedTests.less_quantized_different_zero_point
-GeneratedTests.less_quantized_overflow_first_input_if_requantized
-GeneratedTests.less_quantized_overflow_second_input_if_requantized
-GeneratedTests.less_simple
-GeneratedTests.local_response_norm_float_1
-GeneratedTests.local_response_norm_float_2
-GeneratedTests.local_response_norm_float_3
-GeneratedTests.local_response_norm_float_4
-GeneratedTests.log_4D_float_nnfw
-GeneratedTests.log_dynamic_nnfw
-GeneratedTests.log_softmax_nnfw
-GeneratedTests.log_softmax_nnfw_2
-GeneratedTests.log_softmax_nnfw_3
-GeneratedTests.log_softmax_nnfw_4
-GeneratedTests.log_softmax_nnfw_5
-GeneratedTests.log_softmax_nnfw_quant8
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
-GeneratedTests.logical_not
-GeneratedTests.logical_not_1D_nnfw
-GeneratedTests.logical_not_4D_nnfw
-GeneratedTests.logical_not_dynamic_nnfw
-GeneratedTests.logical_or_1D_nnfw
-GeneratedTests.logical_or_2D_nnfw
-GeneratedTests.logical_or_3D_nnfw
-GeneratedTests.logical_or_4D_nnfw
-GeneratedTests.logical_or_broadcast
-GeneratedTests.logical_or_broadcast_4D_2D_nnfw
-GeneratedTests.logical_or_broadcast_nnfw
-GeneratedTests.logical_or_simple
-GeneratedTests.logistic_dynamic_nnfw
-GeneratedTests.logistic_quant8_1
-GeneratedTests.logistic_quant8_2
-GeneratedTests.lsh_projection
-GeneratedTests.lsh_projection_2
-GeneratedTests.lsh_projection_weights_as_inputs
-GeneratedTests.lstm
-GeneratedTests.lstm2
-GeneratedTests.lstm2_state
-GeneratedTests.lstm2_state2
-GeneratedTests.lstm3
-GeneratedTests.lstm3_state
-GeneratedTests.lstm3_state2
-GeneratedTests.lstm3_state3
-GeneratedTests.lstm_state
-GeneratedTests.lstm_state2
-GeneratedTests.matrix_band_part_ex_4D_float
-GeneratedTests.matrix_band_part_ex_dynamic_nnfw
-GeneratedTests.max_pool_quant8_1
-GeneratedTests.max_pool_quant8_2
-GeneratedTests.max_pool_quant8_3
-GeneratedTests.max_pool_quant8_4
-GeneratedTests.maximum_broadcast
-GeneratedTests.maximum_broadcast_quant8
-GeneratedTests.maximum_dynamic_nnfw
-GeneratedTests.maximum_overflow
-GeneratedTests.maximum_quant8_nnfw
-GeneratedTests.maximum_simple
-GeneratedTests.maximum_simple_quant8
-GeneratedTests.mean
-GeneratedTests.mean_4D_float_reducing_C_nnfw
-GeneratedTests.mean_4D_float_reducing_HW_nnfw
-GeneratedTests.mean_axis01_1_nnfw
-GeneratedTests.mean_axis01_2_nnfw
-GeneratedTests.mean_float_1
-GeneratedTests.mean_float_2
-GeneratedTests.mean_quant8_1
-GeneratedTests.mean_quant8_2
-GeneratedTests.minimum_broadcast
-GeneratedTests.minimum_broadcast_quant8
-GeneratedTests.minimum_dynamic_nnfw
-GeneratedTests.minimum_overflow
-GeneratedTests.minimum_quant8_nnfw
-GeneratedTests.minimum_simple
-GeneratedTests.minimum_simple_quant8
-GeneratedTests.mul_broadcast_quant8
-GeneratedTests.mul_dynamic_nnfw
-GeneratedTests.mul_quant8
-GeneratedTests.neg
-GeneratedTests.neg_1D_float_nnfw
-GeneratedTests.neg_2D_float_nnfw
-GeneratedTests.neg_3D_float_nnfw
-GeneratedTests.neg_3D_int_nnfw
-GeneratedTests.neg_4D_float_nnfw
-GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.neg_dynamic_nnfw
-GeneratedTests.not_equal_boolean
-GeneratedTests.not_equal_broadcast
-GeneratedTests.not_equal_broadcast_4D_2D_float_nnfw
-GeneratedTests.not_equal_broadcast_float_nnfw
-GeneratedTests.not_equal_broadcast_quant8_nnfw
-GeneratedTests.not_equal_dynamic_float_nnfw
-GeneratedTests.not_equal_float_nnfw
-GeneratedTests.not_equal_quant8_nnfw
-GeneratedTests.not_equal_quantized_different_scale
-GeneratedTests.not_equal_quantized_different_zero_point
-GeneratedTests.not_equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.not_equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.not_equal_simple
-GeneratedTests.one_hot_ex_dynamic_nnfw
-GeneratedTests.pack_ex_2D_float_1
-GeneratedTests.pack_ex_2D_float_2
-GeneratedTests.pack_ex_2D_int_1
-GeneratedTests.pack_ex_2D_int_2
-GeneratedTests.pack_ex_dynamic_nnfw
-GeneratedTests.pad_dynamic_nnfw
-GeneratedTests.pad_quant8_nnfw
-GeneratedTests.pad_v2_1_float
-GeneratedTests.pad_v2_1_quant8
-GeneratedTests.pad_v2_all_dims
-GeneratedTests.pad_v2_all_dims_quant8
-GeneratedTests.pad_v2_low_rank
-GeneratedTests.pad_v2_low_rank_quant8
-GeneratedTests.pow_2D_float_nnfw
-GeneratedTests.pow_broadcast_float_nnfw
-GeneratedTests.pow_broadcast_float_nnfw_2
-GeneratedTests.pow_broadcast_float_nnfw_3
-GeneratedTests.pow_dynamic_nnfw
-GeneratedTests.prelu
-GeneratedTests.prelu_broadcast_float_1_nnfw
-GeneratedTests.prelu_broadcast_quant8_1_nnfw
-GeneratedTests.prelu_float_1_nnfw
-GeneratedTests.prelu_quant8
-GeneratedTests.prelu_quant8_1_nnfw
-GeneratedTests.prelu_quant8_2
-GeneratedTests.prelu_quant8_3
-GeneratedTests.prelu_quant8_4
-GeneratedTests.prelu_weight_as_input
-GeneratedTests.prelu_weight_as_input_quant8
-GeneratedTests.prelu_weight_as_input_quant8_2
-GeneratedTests.prelu_weight_as_input_quant8_3
-GeneratedTests.prelu_weight_as_input_quant8_4
-GeneratedTests.quantize_quant8
-GeneratedTests.quantize_quant8_2
-GeneratedTests.quantize_quant8_3
-GeneratedTests.quantize_quant8_4
-GeneratedTests.quantize_quant8_5
-GeneratedTests.quantize_quant8_6
-GeneratedTests.quantize_quant8_7
-GeneratedTests.quantize_quant8_8
-GeneratedTests.quantize_zero_sized
-GeneratedTests.range_ex_float_1
-GeneratedTests.range_ex_float_1_all_constant_inputs
-GeneratedTests.range_ex_float_1_dynamic_nnfw
-GeneratedTests.range_ex_float_2
-GeneratedTests.range_ex_float_2_dynamic_nnfw
-GeneratedTests.reduce_all
-GeneratedTests.reduce_all_2
-GeneratedTests.reduce_all_2D_nnfw
-GeneratedTests.reduce_all_3
-GeneratedTests.reduce_all_4D_nnfw
-GeneratedTests.reduce_all_dynamic_nnfw
-GeneratedTests.reduce_any
-GeneratedTests.reduce_any_2
-GeneratedTests.reduce_any_2D_nnfw
-GeneratedTests.reduce_any_3
-GeneratedTests.reduce_any_4D_nnfw
-GeneratedTests.reduce_max
-GeneratedTests.reduce_max_2
-GeneratedTests.reduce_max_2D_float_nnfw
-GeneratedTests.reduce_max_2D_int32_nnfw
-GeneratedTests.reduce_max_3
-GeneratedTests.reduce_max_4
-GeneratedTests.reduce_max_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_max_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_max_float_1_nnfw
-GeneratedTests.reduce_max_float_2_nnfw
-GeneratedTests.reduce_max_float_nnfw
-GeneratedTests.reduce_max_quant8
-GeneratedTests.reduce_max_quant8_1_nnfw
-GeneratedTests.reduce_max_quant8_2
-GeneratedTests.reduce_max_quant8_2_nnfw
-GeneratedTests.reduce_max_quant8_3
-GeneratedTests.reduce_max_quant8_4
-GeneratedTests.reduce_mean_dynamic_1_nnfw
-GeneratedTests.reduce_mean_dynamic_2_nnfw
-GeneratedTests.reduce_min
-GeneratedTests.reduce_min_2
-GeneratedTests.reduce_min_3
-GeneratedTests.reduce_min_4
-GeneratedTests.reduce_min_dynamic_nnfw
-GeneratedTests.reduce_min_float_1_nnfw
-GeneratedTests.reduce_min_float_2_nnfw
-GeneratedTests.reduce_min_float_nnfw
-GeneratedTests.reduce_min_quant8
-GeneratedTests.reduce_min_quant8_2
-GeneratedTests.reduce_min_quant8_3
-GeneratedTests.reduce_min_quant8_4
-GeneratedTests.reduce_prod
-GeneratedTests.reduce_prod_2
-GeneratedTests.reduce_prod_2D_float_nnfw
-GeneratedTests.reduce_prod_3
-GeneratedTests.reduce_prod_4
-GeneratedTests.reduce_prod_4D_float_nnfw
-GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_prod_dynamic_1_nnfw
-GeneratedTests.reduce_prod_dynamic_2_nnfw
-GeneratedTests.reduce_sum
-GeneratedTests.reduce_sum_2
-GeneratedTests.reduce_sum_2D_float_nnfw
-GeneratedTests.reduce_sum_3
-GeneratedTests.reduce_sum_4
-GeneratedTests.reduce_sum_4D_float_nnfw
-GeneratedTests.reduce_sum_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_sum_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_sum_dynamic_1_nnfw
-GeneratedTests.reduce_sum_dynamic_2_nnfw
-GeneratedTests.relu1_quant8_1
-GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_quant8_1
-GeneratedTests.relu6_quant8_2
-GeneratedTests.relu_quant8_1
-GeneratedTests.relu_quant8_2
-GeneratedTests.reshape_dynamic_nnfw
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
-GeneratedTests.resize_bilinear_quant8_nnfw
-GeneratedTests.reverse_ex_1d
-GeneratedTests.reverse_ex_3d
-GeneratedTests.reverse_ex_dynamic_1D
-GeneratedTests.reverse_ex_dynamic_3D
-GeneratedTests.rnn
-GeneratedTests.rnn_state
-GeneratedTests.round_ex_1D_float
-GeneratedTests.round_ex_4D_float
-GeneratedTests.round_ex_dynamic_nnfw
-GeneratedTests.rsqrt
-GeneratedTests.rsqrt_1D_float_nnfw
-GeneratedTests.rsqrt_2D_float_nnfw
-GeneratedTests.rsqrt_3D_float_nnfw
-GeneratedTests.rsqrt_4D_float_nnfw
-GeneratedTests.rsqrt_dynamic_nnfw
-GeneratedTests.select_v1_2_five_dim
-GeneratedTests.select_v1_2_five_dim_quant8
-GeneratedTests.select_v1_2_one_dim
-GeneratedTests.select_v1_2_one_dim_quant8
-GeneratedTests.select_v1_2_two_dim
-GeneratedTests.select_v1_2_two_dim_quant8
-GeneratedTests.select_v2_ex_broadcast_1d_single_value
-GeneratedTests.select_v2_ex_broadcast_2d_one
-GeneratedTests.select_v2_ex_broadcast_2d_two
-GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw
-GeneratedTests.select_v2_ex_broadcast_less_4d
-GeneratedTests.select_v2_ex_float
-GeneratedTests.shape_ex_dynamic_nnfw
-GeneratedTests.sin_1D_float_nnfw
-GeneratedTests.sin_4D_float_nnfw
-GeneratedTests.sin_dynamic_nnfw
-GeneratedTests.slice
-GeneratedTests.slice_2
-GeneratedTests.slice_3
-GeneratedTests.slice_4
-GeneratedTests.slice_5
-GeneratedTests.slice_6
-GeneratedTests.slice_7
-GeneratedTests.slice_8
-GeneratedTests.slice_dynamic_nnfw
-GeneratedTests.slice_zero_sized
-GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.softmax_dynamic_nnfw
-GeneratedTests.softmax_quant8_1
-GeneratedTests.softmax_quant8_2
-GeneratedTests.space_to_batch
-GeneratedTests.space_to_batch_dynamic_float_nnfw
-GeneratedTests.space_to_batch_float_1
-GeneratedTests.space_to_batch_float_1_nnfw
-GeneratedTests.space_to_batch_float_2
-GeneratedTests.space_to_batch_float_3
-GeneratedTests.space_to_batch_quant8_1
-GeneratedTests.space_to_batch_quant8_1_nnfw
-GeneratedTests.space_to_batch_quant8_2
-GeneratedTests.space_to_batch_quant8_2_nnfw
-GeneratedTests.space_to_batch_quant8_3
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
-GeneratedTests.split_1D_float_nnfw
-GeneratedTests.split_1D_int32_nnfw
-GeneratedTests.split_4D_float_1_nnfw
-GeneratedTests.split_4D_float_2_nnfw
-GeneratedTests.split_4D_float_3_nnfw
-GeneratedTests.split_4D_int32_1_nnfw
-GeneratedTests.split_4D_int32_2_nnfw
-GeneratedTests.split_4D_int32_3_nnfw
-GeneratedTests.split_4D_int32_4_nnfw
-GeneratedTests.split_4D_int32_5_nnfw
-GeneratedTests.split_4D_quant8_nnfw
-GeneratedTests.split_dynamic_float_nnfw
-GeneratedTests.split_float_1
-GeneratedTests.split_float_2
-GeneratedTests.split_float_3
-GeneratedTests.split_float_4
-GeneratedTests.split_float_5
-GeneratedTests.split_int32_1
-GeneratedTests.split_int32_1_relaxed
-GeneratedTests.split_int32_2
-GeneratedTests.split_int32_2_relaxed
-GeneratedTests.split_int32_3
-GeneratedTests.split_int32_3_relaxed
-GeneratedTests.split_int32_4
-GeneratedTests.split_int32_4_relaxed
-GeneratedTests.split_quant8_1
-GeneratedTests.split_quant8_1_relaxed
-GeneratedTests.split_quant8_2
-GeneratedTests.split_quant8_2_relaxed
-GeneratedTests.split_quant8_3
-GeneratedTests.split_quant8_4
-GeneratedTests.split_v_ex_1D_float_1_nnfw
-GeneratedTests.split_v_ex_1D_float_2_nnfw
-GeneratedTests.split_v_ex_1D_int32_nnfw
-GeneratedTests.split_v_ex_4D_float_1_nnfw
-GeneratedTests.split_v_ex_4D_float_2_nnfw
-GeneratedTests.split_v_ex_4D_float_3_nnfw
-GeneratedTests.split_v_ex_4D_float_4_nnfw
-GeneratedTests.split_v_ex_4D_int32_1_nnfw
-GeneratedTests.split_v_ex_4D_int32_2_nnfw
-GeneratedTests.split_v_ex_4D_int32_3_nnfw
-GeneratedTests.split_v_ex_4D_int32_4_nnfw
-GeneratedTests.sqrt_
-GeneratedTests.sqrt_1D_float_nnfw
-GeneratedTests.sqrt_2D_float_nnfw
-GeneratedTests.sqrt_3D_float_nnfw
-GeneratedTests.sqrt_4D_float_nnfw
-GeneratedTests.squared_difference_ex_1D_float
-GeneratedTests.squared_difference_ex_2D_float
-GeneratedTests.squared_difference_ex_3D_float
-GeneratedTests.squared_difference_ex_4D_float
-GeneratedTests.squared_difference_ex_broadcast_4D_2D_float
-GeneratedTests.squared_difference_ex_broadcast_float
-GeneratedTests.squared_difference_ex_dynamic_nnfw
-GeneratedTests.squeeze
-GeneratedTests.squeeze_2D_float_1_nnfw
-GeneratedTests.squeeze_dynamic_float_nnfw
-GeneratedTests.squeeze_float_1
-GeneratedTests.squeeze_float_1_relaxed
-GeneratedTests.squeeze_quant8_1
-GeneratedTests.squeeze_relaxed
-GeneratedTests.stateless_random_uniform_ex_nnfw
-GeneratedTests.strided_slice
-GeneratedTests.strided_slice_dynamic_nnfw
-GeneratedTests.strided_slice_float_1
-GeneratedTests.strided_slice_float_10
-GeneratedTests.strided_slice_float_11
-GeneratedTests.strided_slice_float_2
-GeneratedTests.strided_slice_float_3
-GeneratedTests.strided_slice_float_4
-GeneratedTests.strided_slice_float_5
-GeneratedTests.strided_slice_float_6
-GeneratedTests.strided_slice_float_7
-GeneratedTests.strided_slice_float_8
-GeneratedTests.strided_slice_float_9
-GeneratedTests.strided_slice_qaunt8_10
-GeneratedTests.strided_slice_qaunt8_11
-GeneratedTests.strided_slice_quant8_1
-GeneratedTests.strided_slice_quant8_2
-GeneratedTests.strided_slice_quant8_3
-GeneratedTests.strided_slice_quant8_4
-GeneratedTests.strided_slice_quant8_5
-GeneratedTests.strided_slice_quant8_6
-GeneratedTests.strided_slice_quant8_7
-GeneratedTests.strided_slice_quant8_8
-GeneratedTests.strided_slice_quant8_9
-GeneratedTests.sub_dynamic_nnfw
-GeneratedTests.sub_v1_2_broadcast_quant8
-GeneratedTests.sub_v1_2_quant8
-GeneratedTests.sub_v1_2_zero_sized
-GeneratedTests.sub_v1_2_zero_sized_quant8
-GeneratedTests.svdf
-GeneratedTests.svdf2
-GeneratedTests.svdf_bias_present
-GeneratedTests.svdf_state
-GeneratedTests.tanh_v1_2
-GeneratedTests.tanh_v1_2_2
-GeneratedTests.tanh_v1_2_zero_sized
-GeneratedTests.tanh_v1_2_zero_sized_quant8
-GeneratedTests.tanh_v1_dynamic_nnfw
-GeneratedTests.tile_1
-GeneratedTests.tile_1_dynamic_float32_nnfw
-GeneratedTests.tile_1_float16
-GeneratedTests.tile_1_quant8
-GeneratedTests.tile_2
-GeneratedTests.tile_2_dynamic_float32_nnfw
-GeneratedTests.tile_2_float16
-GeneratedTests.tile_2_int32
-GeneratedTests.tile_2_quant8
-GeneratedTests.tile_3
-GeneratedTests.tile_3_dynamic_float32_nnfw
-GeneratedTests.tile_3_float16
-GeneratedTests.tile_3_int32
-GeneratedTests.tile_3_quant8
-GeneratedTests.topk_v2
-GeneratedTests.topk_v2_1D_float_nnfw
-GeneratedTests.topk_v2_1D_int32_nnfw
-GeneratedTests.topk_v2_1D_quant8_nnfw
-GeneratedTests.topk_v2_2
-GeneratedTests.topk_v2_2D_float_nnfw
-GeneratedTests.topk_v2_2D_int32_nnfw
-GeneratedTests.topk_v2_2D_quant8_nnfw
-GeneratedTests.topk_v2_3
-GeneratedTests.topk_v2_4
-GeneratedTests.topk_v2_5
-GeneratedTests.topk_v2_6
-GeneratedTests.transpose
-GeneratedTests.transpose_2D_nnfw
-GeneratedTests.transpose_3D_nnfw
-GeneratedTests.transpose_dynamic_nnfw
-GeneratedTests.transpose_float_1
-GeneratedTests.transpose_quant8_1
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
-GeneratedTests.transpose_v1_2_zero_sized
-GeneratedTests.transpose_v1_2_zero_sized_quant8
-GeneratedTests.unpack_ex_3D_float_1
-GeneratedTests.unpack_ex_3D_float_2
-GeneratedTests.unpack_ex_3D_int_1
-GeneratedTests.unpack_ex_3D_int_2
-GeneratedTests.unpack_ex_dynamic_nnfw
-GeneratedTests.zeros_like_ex_2D_float
-GeneratedTests.zeros_like_ex_4D_int32
-GeneratedTests.zeros_like_ex_dynamic_float32
diff --git a/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu b/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu
index e98007e08..cad07296a 100644
--- a/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu
@@ -12,12 +12,11 @@ GeneratedTests.cast_float32_to_quant8_overflow_relaxed
 GeneratedTests.cast_int32_to_float16
 GeneratedTests.cast_int32_to_quant8_overflow
 GeneratedTests.cast_quant8_to_float16
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
-GeneratedTests.dequantize
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.embedding_lookup
 GeneratedTests.embedding_lookup_2d_nnfw
 GeneratedTests.embedding_lookup_4d_nnfw
@@ -42,31 +41,17 @@ GeneratedTests.local_response_norm_float_1
 GeneratedTests.local_response_norm_float_2
 GeneratedTests.local_response_norm_float_3
 GeneratedTests.local_response_norm_float_4
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
 GeneratedTests.logical_not
 GeneratedTests.lsh_projection
 GeneratedTests.lsh_projection_2
 GeneratedTests.lsh_projection_weights_as_inputs
-GeneratedTests.lstm
 GeneratedTests.lstm2
 GeneratedTests.lstm2_state
 GeneratedTests.lstm2_state2
-GeneratedTests.lstm3
-GeneratedTests.lstm3_state
-GeneratedTests.lstm3_state2
-GeneratedTests.lstm3_state3
-GeneratedTests.lstm_state
-GeneratedTests.lstm_state2
 GeneratedTests.maximum_broadcast_quant8
 GeneratedTests.maximum_overflow
 GeneratedTests.maximum_simple_quant8
+GeneratedTests.minimum_int32
 GeneratedTests.minimum_broadcast_quant8
 GeneratedTests.minimum_overflow
 GeneratedTests.minimum_simple_quant8
@@ -110,6 +95,78 @@ GeneratedTests.relu6_quant8_1
 GeneratedTests.relu6_quant8_2
 GeneratedTests.relu_quant8_1
 GeneratedTests.relu_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
 GeneratedTests.rnn
 GeneratedTests.rnn_state
 GeneratedTests.rsqrt
@@ -171,7 +228,5 @@ GeneratedTests.transpose_conv_ex_float_1
 GeneratedTests.transpose_conv_ex_float_2
 GeneratedTests.transpose_conv_ex_float_3
 GeneratedTests.transpose_conv_ex_float_4
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
 GeneratedTests.transpose_v1_2_zero_sized
 GeneratedTests.transpose_v1_2_zero_sized_quant8
diff --git a/tests/nnapi/specs/Ex/one_hot_ex_float_1_nnfw.mod.py b/tests/nnapi/specs/Ex/one_hot_ex_float_1_nnfw.mod.py
new file mode 100644
index 000000000..1bdf22b92
--- /dev/null
+++ b/tests/nnapi/specs/Ex/one_hot_ex_float_1_nnfw.mod.py
@@ -0,0 +1,48 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+# Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model = Model()
+
+
+indices = Input("indices", "TENSOR_INT32", "{2, 2}")
+depth = Parameter("depth", "TENSOR_INT32", "{1}", [3])
+onvalue = Input("onvalue", "TENSOR_FLOAT32", "{1}")
+offvalue = Input("offvalue", "TENSOR_FLOAT32", "{1}")
+
+axis0 = Int32Scalar("axis", -1) # default value is -1.
+model_output0 = Output("output", "TENSOR_FLOAT32", "{2, 2, 3}")
+
+model0 = model.Operation("ONE_HOT_EX", indices, depth, onvalue, offvalue, axis0).To(model_output0)
+
+model_output_data = ([0., 1., 0.,
+                      0., 0., 1.,
+                      1., 0., 0.,
+                      0., 0., 1.,])
+
+indices_data = [1, 2, 0, 2]
+onvalue_data = [1.]
+offvalue_data = [0.]
+
+Example(
+  {
+    indices : indices_data,
+    onvalue : onvalue_data,
+    offvalue : offvalue_data,
+
+    model_output0 : model_output_data,
+  })
+
diff --git a/tests/nnapi/specs/Ex/one_hot_ex_float_2_nnfw.mod.py b/tests/nnapi/specs/Ex/one_hot_ex_float_2_nnfw.mod.py
new file mode 100644
index 000000000..6a41488df
--- /dev/null
+++ b/tests/nnapi/specs/Ex/one_hot_ex_float_2_nnfw.mod.py
@@ -0,0 +1,47 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+# Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model = Model()
+
+
+indices = Input("indices", "TENSOR_INT32", "{1, 2, 2}")
+depth = Parameter("depth", "TENSOR_INT32", "{1}", [3])
+onvalue = Input("onvalue", "TENSOR_FLOAT32", "{1}")
+offvalue = Input("offvalue", "TENSOR_FLOAT32", "{1}")
+
+axis0 = Int32Scalar("axis", 1) # default value is -1.
+model_output0 = Output("output", "TENSOR_FLOAT32", "{1, 3, 2, 2}")
+
+model0 = model.Operation("ONE_HOT_EX", indices, depth, onvalue, offvalue, axis0).To(model_output0)
+
+model_output_data = ([1., 1., 2., 1.,
+		              2., 1., 1., 1.,
+					  1., 2., 1., 2.,])
+
+indices_data = [1, 2, 0, 2]
+onvalue_data = [2.]
+offvalue_data = [1.]
+
+Example(
+  {
+    indices : indices_data,
+    onvalue : onvalue_data,
+    offvalue : offvalue_data,
+
+    model_output0 : model_output_data,
+  })
+
diff --git a/tests/nnapi/specs/Ex/one_hot_ex_float_off_value_constant_zero_nnfw.mod.py b/tests/nnapi/specs/Ex/one_hot_ex_float_off_value_constant_zero_nnfw.mod.py
new file mode 100644
index 000000000..fc8859baa
--- /dev/null
+++ b/tests/nnapi/specs/Ex/one_hot_ex_float_off_value_constant_zero_nnfw.mod.py
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+# Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model = Model()
+
+
+indices = Input("indices", "TENSOR_INT32", "{1, 2, 2}")
+depth = Parameter("depth", "TENSOR_INT32", "{1}", [3])
+onvalue = Input("onvalue", "TENSOR_FLOAT32", "{1}")
+offvalue = Parameter("offvalue", "TENSOR_FLOAT32", "{1}", [0.])
+
+axis0 = Int32Scalar("axis", 2) # default value is -1.
+model_output0 = Output("output", "TENSOR_FLOAT32", "{1, 2, 3, 2}")
+
+model0 = model.Operation("ONE_HOT_EX", indices, depth, onvalue, offvalue, axis0).To(model_output0)
+
+model_output_data = ([0., 0., 1., 0., 0., 1.,
+                      1., 0., 0., 0., 0., 1.,])
+
+
+indices_data = [1, 2, 0, 2]
+onvalue_data = [1.]
+
+Example(
+  {
+    indices : indices_data,
+    onvalue : onvalue_data,
+
+    model_output0 : model_output_data,
+  })
+
diff --git a/tests/nnapi/specs/V1_1/transpose_float_1_perms_as_input_nnfw.mod.py b/tests/nnapi/specs/V1_1/transpose_float_1_perms_as_input_nnfw.mod.py
new file mode 100644
index 000000000..c4040f4bb
--- /dev/null
+++ b/tests/nnapi/specs/V1_1/transpose_float_1_perms_as_input_nnfw.mod.py
@@ -0,0 +1,34 @@
+model = Model()
+i1 = Input("input", "TENSOR_FLOAT32", "{2, 3, 4, 5}")
+perms = Input("perms", "TENSOR_INT32", "{4}")
+output = Output("output", "TENSOR_FLOAT32", "{4, 2, 3, 5}")
+
+model = model.Operation("TRANSPOSE", i1, perms).To(output)
+
+# Example 1. Inputs in operand 0,
+input0 = {i1: # input 0
+          [0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,
+           12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,
+           24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
+           36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
+           48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+           60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,
+           72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
+           84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+           96,  97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107,
+           108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119],
+          perms: # permutations
+          [2, 0, 1, 3]}
+
+output0 = {output: # output 0
+          [0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+           60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+           5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+           65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+           10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+           70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+           15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+           75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_2/argmax_3_axis_as_input_nnfw.mod.py b/tests/nnapi/specs/V1_2/argmax_3_axis_as_input_nnfw.mod.py
new file mode 100644
index 000000000..a3550367f
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/argmax_3_axis_as_input_nnfw.mod.py
@@ -0,0 +1,35 @@
+#
+# Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Negative axis support test.
+
+input0 = Input("input0", "TENSOR_FLOAT32", "{2, 2}")
+axis = Input("axis", "TENSOR_INT32", "{}")
+output0 = Output("output", "TENSOR_INT32", "{2}")
+
+model = Model().Operation("ARGMAX", input0, axis).To(output0)
+
+quant8 = DataTypeConverter().Identify({
+    input0: ["TENSOR_QUANT8_ASYMM", 1.0, 0],
+})
+
+Example({
+    input0: [1.0, 2.0,
+             4.0, 3.0],
+    axis: [-1],
+    output0: [1, 0],
+}).AddVariations("relaxed", "float16", "int32", quant8)
diff --git a/tests/nnapi/specs/skip/V1_2/argmin_1.mod.py b/tests/nnapi/specs/V1_2/argmin_1.mod.py
index e89ceead9..e89ceead9 100644
--- a/tests/nnapi/specs/skip/V1_2/argmin_1.mod.py
+++ b/tests/nnapi/specs/V1_2/argmin_1.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/argmin_2.mod.py b/tests/nnapi/specs/V1_2/argmin_2.mod.py
index e54cff784..e54cff784 100644
--- a/tests/nnapi/specs/skip/V1_2/argmin_2.mod.py
+++ b/tests/nnapi/specs/V1_2/argmin_2.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/argmin_3.mod.py b/tests/nnapi/specs/V1_2/argmin_3.mod.py
index d3cbd76ed..d3cbd76ed 100644
--- a/tests/nnapi/specs/skip/V1_2/argmin_3.mod.py
+++ b/tests/nnapi/specs/V1_2/argmin_3.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/dequantize_v1_2.mod.py b/tests/nnapi/specs/V1_2/dequantize_v1_2.mod.py
index 81e3515cd..81e3515cd 100644
--- a/tests/nnapi/specs/skip/V1_2/dequantize_v1_2.mod.py
+++ b/tests/nnapi/specs/V1_2/dequantize_v1_2.mod.py
diff --git a/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
index 279c000ba..6663b79c0 100644
--- a/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
@@ -3,8 +3,8 @@ model = Model()
 i1 = Input("op1", "TENSOR_FLOAT32", "{2, 2}")
 i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2}")
 
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-model = model.Operation("EQUAL_EX", i1, i2).To(i3)
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2}")
+model = model.Operation("EQUAL", i1, i2).To(i3)
 
 # Example 1. Input in operand 0,
 input0 = {i1: # input 0
diff --git a/tests/nnapi/specs/V1_2/minimum_int32.mod.py b/tests/nnapi/specs/V1_2/minimum_int32.mod.py
new file mode 100644
index 000000000..7d65f1039
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/minimum_int32.mod.py
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2020 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+i1 = Input("input0", "TENSOR_INT32", "{3, 1, 2}")
+i2 = Input("input1", "TENSOR_INT32", "{3, 1, 2}")
+i3 = Output("output0", "TENSOR_INT32", "{3, 1, 2}")
+
+model = Model().Operation("MINIMUM", i1, i2).To(i3)
+
+input0 = {i1:
+          [129, 12, 15, 130, -77, 33],
+          i2:
+          [44, 127, -25, 5, 39, 27]}
+
+output0 = {i3:
+           [44, 12, -25, 5, -77, 27]}
+
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/resize_nearest_neighbor.mod.py b/tests/nnapi/specs/V1_2/resize_nearest_neighbor.mod.py
index 04102c5ed..04102c5ed 100644
--- a/tests/nnapi/specs/skip/V1_2/resize_nearest_neighbor.mod.py
+++ b/tests/nnapi/specs/V1_2/resize_nearest_neighbor.mod.py
diff --git a/tests/nnapi/specs/V1_2/split_float_5_axis_as_input_nnfw.mod.py b/tests/nnapi/specs/V1_2/split_float_5_axis_as_input_nnfw.mod.py
new file mode 100644
index 000000000..9676e1638
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/split_float_5_axis_as_input_nnfw.mod.py
@@ -0,0 +1,38 @@
+#
+# Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# model
+input0 = Input("input0", "TENSOR_FLOAT32", "{2, 2, 2}")
+axis = Input("axis", "TENSOR_INT32", "{}")
+num_splits = Int32Scalar("num_splits", 2)
+output0 = Output("output0", "TENSOR_FLOAT32", "{2, 1, 2}")
+output1 = Output("output1", "TENSOR_FLOAT32", "{2, 1, 2}")
+
+model = Model().Operation("SPLIT", input0, axis, num_splits).To((output0, output1))
+
+# Example 1.
+input_dict = {
+    input0: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+    axis: [-2]
+}
+output_dict = {
+    output0: [1.0, 2.0, 5.0, 6.0],
+    output1: [3.0, 4.0, 7.0, 8.0],
+}
+
+# Instantiate an example
+Example((input_dict, output_dict)).AddVariations("relaxed", "float16")
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_1step.mod.py b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_1step.mod.py
index 1e9a633a9..1e9a633a9 100644
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_1step.mod.py
+++ b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_1step.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py
index 08bc3ab5c..08bc3ab5c 100644
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py
+++ b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_batch_major_norm_peephole_projection.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py
index d4f020e31..d4f020e31 100644
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py
+++ b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_batch_major_peephole_projection_bias.mod.py
diff --git a/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_dynamic_nnfw.mod.py b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_dynamic_nnfw.mod.py
new file mode 100644
index 000000000..db4d2da78
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_dynamic_nnfw.mod.py
@@ -0,0 +1,172 @@
+#
+# Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Unidirectional Sequence LSTM Test:
+# 1 Time Step, Layer Normalization, No Cifg, Peephole, Projection, and No Clipping.
+import copy
+import dynamic_tensor
+
+model = Model()
+
+max_time = 1
+n_batch = 2
+n_input = 5
+# n_cell and n_output have the same size when there is no projection.
+n_cell = 4
+n_output = 3
+
+input_shape = [max_time, n_batch, n_input]
+
+dynamic_layer = dynamic_tensor.DynamicInputGenerator(model, input_shape, "TENSOR_FLOAT32")
+
+input = dynamic_layer.getTestNodeInput()
+
+input_to_input_weights = Input("input_to_input_weights", "TENSOR_FLOAT32",
+                               "{%d, %d}" % (n_cell, n_input))
+input_to_forget_weights = Input("input_to_forget_weights", "TENSOR_FLOAT32",
+                                "{%d, %d}" % (n_cell, n_input))
+input_to_cell_weights = Input("input_to_cell_weights", "TENSOR_FLOAT32",
+                              "{%d, %d}" % (n_cell, n_input))
+input_to_output_weights = Input("input_to_output_weights", "TENSOR_FLOAT32",
+                                "{%d, %d}" % (n_cell, n_input))
+
+recurrent_to_input_weights = Input("recurrent_to_intput_weights",
+                                   "TENSOR_FLOAT32",
+                                   "{%d, %d}" % (n_cell, n_output))
+recurrent_to_forget_weights = Input("recurrent_to_forget_weights",
+                                    "TENSOR_FLOAT32",
+                                    "{%d, %d}" % (n_cell, n_output))
+recurrent_to_cell_weights = Input("recurrent_to_cell_weights", "TENSOR_FLOAT32",
+                                  "{%d, %d}" % (n_cell, n_output))
+recurrent_to_output_weights = Input("recurrent_to_output_weights",
+                                    "TENSOR_FLOAT32",
+                                    "{%d, %d}" % (n_cell, n_output))
+
+cell_to_input_weights = Input("cell_to_input_weights", "TENSOR_FLOAT32",
+                              "{%d}" % (n_cell))
+cell_to_forget_weights = Input("cell_to_forget_weights", "TENSOR_FLOAT32",
+                               "{%d}" % (n_cell))
+cell_to_output_weights = Input("cell_to_output_weights", "TENSOR_FLOAT32",
+                               "{%d}" % (n_cell))
+
+input_gate_bias = Input("input_gate_bias", "TENSOR_FLOAT32", "{%d}" % (n_cell))
+forget_gate_bias = Input("forget_gate_bias", "TENSOR_FLOAT32",
+                         "{%d}" % (n_cell))
+cell_gate_bias = Input("cell_gate_bias", "TENSOR_FLOAT32", "{%d}" % (n_cell))
+output_gate_bias = Input("output_gate_bias", "TENSOR_FLOAT32",
+                         "{%d}" % (n_cell))
+
+projection_weights = Input("projection_weights", "TENSOR_FLOAT32",
+                           "{%d,%d}" % (n_output, n_cell))
+projection_bias = Input("projection_bias", "TENSOR_FLOAT32", "{0}")
+
+output_state_in = Input("output_state_in", "TENSOR_FLOAT32",
+                        "{%d, %d}" % (n_batch, n_output))
+cell_state_in = Input("cell_state_in", "TENSOR_FLOAT32",
+                      "{%d, %d}" % (n_batch, n_cell))
+
+activation_param = Int32Scalar("activation_param", 4)  # Tanh
+cell_clip_param = Float32Scalar("cell_clip_param", 0.)
+proj_clip_param = Float32Scalar("proj_clip_param", 0.)
+time_major_param = BoolScalar("time_major_param", True)
+
+input_layer_norm_weights = Input("input_layer_norm_weights", "TENSOR_FLOAT32",
+                                 "{%d}" % n_cell)
+forget_layer_norm_weights = Input("forget_layer_norm_weights", "TENSOR_FLOAT32",
+                                  "{%d}" % n_cell)
+cell_layer_norm_weights = Input("cell_layer_norm_weights", "TENSOR_FLOAT32",
+                                "{%d}" % n_cell)
+output_layer_norm_weights = Input("output_layer_norm_weights", "TENSOR_FLOAT32",
+                                  "{%d}" % n_cell)
+
+output = Output("output", "TENSOR_FLOAT32", "{%d, %d, %d}" % (max_time, n_batch, n_output))
+
+model = model.Operation(
+    "UNIDIRECTIONAL_SEQUENCE_LSTM", input, input_to_input_weights, input_to_forget_weights,
+    input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
+    recurrent_to_forget_weights, recurrent_to_cell_weights,
+    recurrent_to_output_weights, cell_to_input_weights, cell_to_forget_weights,
+    cell_to_output_weights, input_gate_bias, forget_gate_bias, cell_gate_bias,
+    output_gate_bias, projection_weights, projection_bias, output_state_in,
+    cell_state_in, activation_param, cell_clip_param, proj_clip_param, time_major_param,
+    input_layer_norm_weights, forget_layer_norm_weights,
+    cell_layer_norm_weights, output_layer_norm_weights).To([output])
+
+# Example 1. Input in operand 0,
+test_input = [0.7, 0.8, 0.1, 0.2, 0.3, 0.3, 0.2, 0.9, 0.8, 0.1]
+
+input0 = {
+    dynamic_layer.getModelInput() : test_input,
+    dynamic_layer.getShapeInput() : input_shape,
+    input_to_input_weights: [
+        0.5, 0.6, 0.7, -0.8, -0.9, 0.1, 0.2, 0.3, -0.4, 0.5, -0.8, 0.7, -0.6,
+        0.5, -0.4, -0.5, -0.4, -0.3, -0.2, -0.1
+    ],
+    input_to_forget_weights: [
+        -0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, -0.4, 0.3, -0.8, -0.4, 0.3, -0.5,
+        -0.4, -0.6, 0.3, -0.4, -0.6, -0.5, -0.5
+    ],
+    input_to_cell_weights: [
+        -0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, -0.3, -0.2, -0.6, 0.6, -0.1,
+        -0.4, -0.3, -0.7, 0.7, -0.9, -0.5, 0.8, 0.6
+    ],
+    input_to_output_weights: [
+        -0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, -0.3, -0.8, -0.2, 0.6, -0.2,
+        0.4, -0.7, -0.3, -0.5, 0.1, 0.5, -0.6, -0.4
+    ],
+    input_gate_bias: [0.03, 0.15, 0.22, 0.38],
+    forget_gate_bias: [0.1, -0.3, -0.2, 0.1],
+    cell_gate_bias: [-0.05, 0.72, 0.25, 0.08],
+    output_gate_bias: [0.05, -0.01, 0.2, 0.1],
+    recurrent_to_input_weights: [
+        -0.2, -0.3, 0.4, 0.1, -0.5, 0.9, -0.2, -0.3, -0.7, 0.05, -0.2, -0.6
+    ],
+    recurrent_to_cell_weights: [
+        -0.3, 0.2, 0.1, -0.3, 0.8, -0.08, -0.2, 0.3, 0.8, -0.6, -0.1, 0.2
+    ],
+    recurrent_to_forget_weights: [
+        -0.5, -0.3, -0.5, -0.2, 0.6, 0.4, 0.9, 0.3, -0.1, 0.2, 0.5, 0.2
+    ],
+    recurrent_to_output_weights: [
+        0.3, -0.1, 0.1, -0.2, -0.5, -0.7, -0.2, -0.6, -0.1, -0.4, -0.7, -0.2
+    ],
+    cell_to_input_weights: [0.05, 0.1, 0.25, 0.15],
+    cell_to_forget_weights: [-0.02, -0.15, -0.25, -0.03],
+    cell_to_output_weights: [0.1, -0.1, -0.5, 0.05],
+    projection_weights: [
+        -0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2
+    ],
+    projection_bias: [],
+    input_layer_norm_weights: [0.1, 0.2, 0.3, 0.5],
+    forget_layer_norm_weights: [0.2, 0.2, 0.4, 0.3],
+    cell_layer_norm_weights: [0.7, 0.2, 0.3, 0.8],
+    output_layer_norm_weights: [0.6, 0.2, 0.2, 0.5]
+}
+
+golden_output = [
+    0.024407668039203, 0.128027379512787, -0.001709178090096,
+    -0.006924282759428, 0.084874063730240, 0.063444979488850
+]
+
+output0 = {
+    output: golden_output,
+}
+
+input0[output_state_in] = [ 0 for _ in range(n_batch * n_output) ]
+input0[cell_state_in] = [ 0 for _ in range(n_batch * n_cell) ]
+
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py
index 1fa0ff01a..1fa0ff01a 100644
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py
+++ b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_layer_norm_cifg_peephole.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py
index 421fbf72a..421fbf72a 100644
--- a/tests/nnapi/specs/skip/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py
+++ b/tests/nnapi/specs/V1_2/unidirectional_sequence_lstm_norm_peephole_projection.mod.py
diff --git a/tests/nnapi/src/TestGenerated.cpp b/tests/nnapi/src/TestGenerated.cpp
index 234735354..093e5a964 100644
--- a/tests/nnapi/src/TestGenerated.cpp
+++ b/tests/nnapi/src/TestGenerated.cpp
@@ -256,8 +256,11 @@ void GeneratedTests::SetUp() {
     mOldComputeMode = Execution::setComputeMode(GetParam());
 #endif
     // Fix for onert: Fix file path for linux
+#ifndef __ANDROID__
     char cacheDirTemp[] = "/tmp/TestCompilationCachingXXXXXX";
-    //char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
+#else
+    char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
+#endif
     char* cacheDir = mkdtemp(cacheDirTemp);
     ASSERT_NE(cacheDir, nullptr);
     mCacheDir = cacheDir;
diff --git a/tests/nnapi/src/TestMain.cpp b/tests/nnapi/src/TestMain.cpp
index 725d77c1c..907407242 100644
--- a/tests/nnapi/src/TestMain.cpp
+++ b/tests/nnapi/src/TestMain.cpp
@@ -95,22 +95,28 @@ static int test(bool useCpuOnly, Execution::ComputeMode computeMode, bool allowS
     return RUN_ALL_TESTS();
 }
 
+// FIX for onert: disable argument
+#if 0
 void checkArgs(int argc, char** argv, int nextArg) {
     if (nextArg != argc) {
         std::cerr << "Unexpected argument: " << argv[nextArg] << std::endl;
         exit(1);
     }
 }
+#endif
 
 int main(int argc, char** argv) {
     ::testing::InitGoogleTest(&argc, argv);
 
+    // FIX for onert: disable argument
+#if 0
     if ((argc > 1) && std::isdigit(argv[1][0])) {
         allowedPasses = std::stoull(argv[1]);
         checkArgs(argc, argv, 2);
     } else {
         checkArgs(argc, argv, 1);
     }
+#endif
 
 #ifndef NNTEST_ONLY_PUBLIC_API
     android::nn::initVLogMask();
diff --git a/tests/nnapi/src/TestNeuralNetworksWrapper.h b/tests/nnapi/src/TestNeuralNetworksWrapper.h
index 022f3fade..10ce8a6f5 100644
--- a/tests/nnapi/src/TestNeuralNetworksWrapper.h
+++ b/tests/nnapi/src/TestNeuralNetworksWrapper.h
@@ -27,9 +27,7 @@
 //#include "NeuralNetworksWrapperExtensions.h"
 
 #include <math.h>
-// Fix for onert: use boost::optional instead of std::optional
-// TODO in onert: introduce and use internal optional library
-#include <boost/optional.hpp>
+#include <optional>
 #include <string>
 #include <vector>
 
diff --git a/tests/nnapi/src/TestValidation.cpp b/tests/nnapi/src/TestValidation.cpp
index 45432c0a4..3e749b84d 100644
--- a/tests/nnapi/src/TestValidation.cpp
+++ b/tests/nnapi/src/TestValidation.cpp
@@ -29,13 +29,19 @@
 // This file tests all the validations done by the Neural Networks API.
 namespace {
 
+#ifndef PATH_MAX
 #define PATH_MAX 256
+#endif
 
 static int shmem_num = 0;
 static int shmem_create_region(size_t size)
 {
     char temp[PATH_MAX];
+#ifndef __ANDROID__
     snprintf(temp, sizeof(temp), "/tmp/nn-shmem-%d-%d-XXXXXXXXX", getpid(), shmem_num++);
+#else
+    snprintf(temp, sizeof(temp), "/data/local/tmp/nn-shmem-%d-%d-XXXXXXXXX", getpid(), shmem_num++);
+#endif
 
     // Set umask and recover after generate temporary file to avoid security issue
     mode_t umaskPrev = umask(S_IRUSR|S_IWUSR);
diff --git a/tests/nnfw_api/CMakeLists.txt b/tests/nnfw_api/CMakeLists.txt
index aa3a9421d..93cc980e0 100644
--- a/tests/nnfw_api/CMakeLists.txt
+++ b/tests/nnfw_api/CMakeLists.txt
@@ -19,13 +19,50 @@ if(ARMCompute_FOUND)
   target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_ACL_BACKEND)
 endif(ARMCompute_FOUND)
 
+nnfw_find_package(Xnnpack QUIET)
+if(Xnnpack_FOUND)
+  target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_XNNPACK_BACKEND)
+endif(Xnnpack_FOUND)
+
+nnas_find_package(Opencl_Headers QUIET)
+if(Opencl_Headers_FOUND)
+  target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_GPU_CL_BACKEND)
+endif(Opencl_Headers_FOUND)
+
 set(RUNTIME_NNFW_API_TEST_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include
                                   ${CMAKE_CURRENT_SOURCE_DIR}/src)
 target_include_directories(${RUNTIME_NNFW_API_TEST} PRIVATE ${RUNTIME_NNFW_API_TEST_INCLUDE})
 
-target_link_libraries(${RUNTIME_NNFW_API_TEST} nnfw-dev)
+target_link_libraries(${RUNTIME_NNFW_API_TEST} nnfw-dev jsoncpp)
 target_link_libraries(${RUNTIME_NNFW_API_TEST} gtest gmock)
 target_link_libraries(${RUNTIME_NNFW_API_TEST} ${LIB_PTHREAD} dl)
 target_link_libraries(${RUNTIME_NNFW_API_TEST} circle_schema)
 
-install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest_standalone)
+install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest)
+
+# Install nnpackage test model (add)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/add)
+set(NNPACKAGE_INSTALL_TARGET unittest/nnfw_api_gtest_models)
+
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add)
+
+# Install nnpackage test model (add_no_manifest)
+set(NNPACKAGE_MODEL ${NNPACKAGE_MODEL_DIR}/add.tflite)
+install(FILES ${NNPACKAGE_MODEL} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add_no_manifest/add_no_manifest)
+
+# Install nnpackage test model (add_invalid_manifest)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/add_invalid_manifest)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add_invalid_manifest)
+
+# Install nnpackage test model (if)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/if_dynamic)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/if_dynamic)
+
+# Install nnpackage test model (while)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/while_dynamic)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/while_dynamic)
+
+# Install nnpackage test model (mobilenet)
+set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/)
+install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/mobilenet_v1_1.0_224)
+
diff --git a/tests/nnfw_api/README.md b/tests/nnfw_api/README.md
index 7e14fc445..58ba12992 100644
--- a/tests/nnfw_api/README.md
+++ b/tests/nnfw_api/README.md
@@ -16,6 +16,8 @@ This test framework consists of 3 kinds of tests:
 
 ## nnpackages for testing
 
+> NOTE It is not recommended adding a test this way, since you can make a Circle model with some code using `CircleGen` class. See also `GenModelTest`.
+
 To test *nnfw_api*, we almost always need some nnpackages. Those are stored in a web server so there is no nnpackage files in the repo.
 
 ### How to add nnpackages for test
@@ -27,4 +29,4 @@ Once you have done the above steps, please register it in the test source code t
 
 ### Installation
 
-You must install the test nnpackages before running the tests. They must be in the same directory with the test executable, under `nnfw_api_gtest_models/`. There is an installation script `tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh`, however the nnpackage file server is not public so it will fail.
+You must install the test nnpackages before running the tests. They must be in the same directory with the test executable, under `nnfw_api_gtest_models/`. Installation is done by command `onert-test prepare-model`. It only runs correctly on CI, since the nnpackage file server is not public.
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc
index 19cb95f37..e4601843c 100644
--- a/tests/nnfw_api/src/CircleGen.cc
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -14,23 +14,8 @@
  * limitations under the License.
  */
 
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 #include "CircleGen.h"
+#include "flatbuffers/flexbuffers.h"
 
 CircleGen::CircleGen() : _subgraph_contexts(1) // Create primary subgraph
 {
@@ -54,11 +39,35 @@ uint32_t CircleGen::addBuffer(const uint8_t *buf, size_t size)
 
 uint32_t CircleGen::addTensor(const TensorParams &params)
 {
-  int ind = curSubgCtx().tensors.size();
+  uint32_t ind = curSubgCtx().tensors.size();
   curSubgCtx().tensors.emplace_back(buildTensor(params));
   return ind;
 }
 
+uint32_t CircleGen::addTensor(const TensorParams &params, float scale, int64_t zero_point)
+{
+  // TensorType_INT8: scale >= 0, zero_point: [-128, 127]
+  // TensorType_UINT8: scale >= 0, zero_point: [0, 255]
+  uint32_t ind = curSubgCtx().tensors.size();
+  curSubgCtx().tensors.emplace_back(buildTensor(params, scale, zero_point));
+  return ind;
+}
+
+uint32_t CircleGen::addTensor(const TensorParams &params, std::vector<float> &scale,
+                              std::vector<int64_t> &zero_point)
+{
+  uint32_t ind = curSubgCtx().tensors.size();
+  curSubgCtx().tensors.emplace_back(buildTensor(params, scale, zero_point));
+  return ind;
+}
+
+uint32_t CircleGen::addTensor(const TensorParams &params, const SparsityParams &sp)
+{
+  uint32_t ind = curSubgCtx().tensors.size();
+  curSubgCtx().tensors.emplace_back(buildTensor(params, sp));
+  return ind;
+}
+
 void CircleGen::setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
 {
   curSubgCtx().inputs = inputs;
@@ -78,7 +87,7 @@ CircleBuffer CircleGen::finish()
   for (auto &ctx : _subgraph_contexts)
     subgraphs.push_back(buildSubGraph(ctx));
   auto model =
-      circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
+    circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
   _fbb.Finish(model);
   return CircleBuffer{std::move(_fbb)};
 }
@@ -93,17 +102,46 @@ uint32_t CircleGen::addOperatorAdd(const OperatorParams &params,
                                 circle::BuiltinOptions_AddOptions, options);
 }
 
+uint32_t CircleGen::addOperatorAddN(const OperatorParams &params)
+{
+  auto options = circle::CreateAddNOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_ADD_N,
+                                circle::BuiltinOptions_AddNOptions, options);
+}
+
+uint32_t CircleGen::addOperatorArgMax(const OperatorParams &params, circle::TensorType output_type)
+{
+  auto options = circle::CreateArgMaxOptions(_fbb, output_type).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_ARG_MAX,
+                                circle::BuiltinOptions_ArgMaxOptions, options);
+}
+
+uint32_t CircleGen::addOperatorArgMin(const OperatorParams &params, circle::TensorType output_type)
+{
+  auto options = circle::CreateArgMaxOptions(_fbb, output_type).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_ARG_MIN,
+                                circle::BuiltinOptions_ArgMinOptions, options);
+}
+
 uint32_t CircleGen::addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
                                              int stride_w, int stride_h, int filter_w, int filter_h,
                                              circle::ActivationFunctionType actfn)
 {
   auto options =
-      circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
-          .Union();
+    circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
+      .Union();
   return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
                                 circle::BuiltinOptions_Pool2DOptions, options);
 }
 
+uint32_t CircleGen::addOperatorCast(const OperatorParams &params, circle::TensorType input_type,
+                                    circle::TensorType output_type)
+{
+  auto options = circle::CreateCastOptions(_fbb, input_type, output_type).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_CAST,
+                                circle::BuiltinOptions_AddOptions, options);
+}
+
 uint32_t CircleGen::addOperatorConcatenation(const OperatorParams &params, int axis,
                                              circle::ActivationFunctionType actfn)
 {
@@ -112,6 +150,18 @@ uint32_t CircleGen::addOperatorConcatenation(const OperatorParams &params, int a
                                 circle::BuiltinOptions_ConcatenationOptions, options);
 }
 
+uint32_t CircleGen::addOperatorConv2D(const OperatorParams &params, circle::Padding padding,
+                                      int stride_w, int stride_h,
+                                      circle::ActivationFunctionType actfn, int dilation_w,
+                                      int dilation_h)
+{
+  auto options =
+    circle::CreateConv2DOptions(_fbb, padding, stride_w, stride_h, actfn, dilation_w, dilation_h)
+      .Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_CONV_2D,
+                                circle::BuiltinOptions_Conv2DOptions, options);
+}
+
 uint32_t CircleGen::addOperatorCos(const OperatorParams &params)
 {
   auto options = circle::CreateCosOptions(_fbb).Union();
@@ -119,6 +169,120 @@ uint32_t CircleGen::addOperatorCos(const OperatorParams &params)
                                 circle::BuiltinOptions_CosOptions, options);
 }
 
+uint32_t CircleGen::addOperatorDepthToSpace(const OperatorParams &params, int32_t block_size)
+{
+  auto options = circle::CreateDepthToSpaceOptions(_fbb, block_size).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_DEPTH_TO_SPACE,
+                                circle::BuiltinOptions_DepthToSpaceOptions, options);
+}
+
+uint32_t CircleGen::addOperatorDepthwiseConv2D(const OperatorParams &params,
+                                               circle::Padding padding, int stride_w, int stride_h,
+                                               int depth_multiplier,
+                                               circle::ActivationFunctionType actfn, int dilation_w,
+                                               int dilation_h)
+{
+  auto options =
+    circle::CreateDepthwiseConv2DOptions(_fbb, padding, stride_w, stride_h, depth_multiplier, actfn,
+                                         dilation_w, dilation_h)
+      .Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
+                                circle::BuiltinOptions_DepthwiseConv2DOptions, options);
+}
+
+uint32_t CircleGen::addOperatorDetectionPostProcess(const OperatorParams &params, int num_classes,
+                                                    float y_scale, float x_scale, float h_scale,
+                                                    float w_scale, float nms_score_threshold,
+                                                    float nms_iou_threshold, int max_detections,
+                                                    int max_classes_per_detection,
+                                                    int detections_per_class)
+{
+  // flexbuffer custom_option
+  auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+  size_t map_start = flex_buffers->StartMap();
+  flex_buffers->Int("num_classes", num_classes);
+  flex_buffers->Float("y_scale", y_scale);
+  flex_buffers->Float("x_scale", x_scale);
+  flex_buffers->Float("h_scale", h_scale);
+  flex_buffers->Float("w_scale", w_scale);
+  flex_buffers->Float("nms_iou_threshold", nms_iou_threshold);
+  flex_buffers->Float("nms_score_threshold", nms_score_threshold);
+  flex_buffers->Int("max_detections", max_detections);
+  flex_buffers->Int("max_classes_per_detection", max_classes_per_detection);
+  flex_buffers->Int("detections_per_class", detections_per_class);
+  flex_buffers->EndMap(map_start);
+  flex_buffers->Finish();
+
+  return addCustomOperatorWithOptions(params, "TFLite_Detection_PostProcess",
+                                      circle::BuiltinOptions_NONE, 0, &flex_buffers->GetBuffer(),
+                                      circle::CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS,
+                                      nullptr, nullptr);
+}
+
+uint32_t CircleGen::addOperatorElu(const OperatorParams &params)
+{
+  return addOperatorWithOptions(params, circle::BuiltinOperator_ELU, circle::BuiltinOptions_NONE,
+                                0);
+}
+
+uint32_t CircleGen::addOperatorEqual(const OperatorParams &params)
+{
+  auto options = circle::CreateEqualOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_EQUAL,
+                                circle::BuiltinOptions_EqualOptions, options);
+}
+
+uint32_t CircleGen::addOperatorExpandDims(const OperatorParams &params)
+{
+  auto options = circle::CreateEqualOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_EXPAND_DIMS,
+                                circle::BuiltinOptions_ExpandDimsOptions, options);
+}
+
+uint32_t
+CircleGen::addOperatorFullyConnected(const OperatorParams &params,
+                                     circle::FullyConnectedOptionsWeightsFormat weights_format)
+{
+  auto options =
+    circle::CreateFullyConnectedOptions(_fbb, circle::ActivationFunctionType_NONE, weights_format)
+      .Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_FULLY_CONNECTED,
+                                circle::BuiltinOptions_FullyConnectedOptions, options);
+}
+
+uint32_t CircleGen::addOperatorFill(const OperatorParams &params)
+{
+  auto options = circle::CreateFillOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_FILL,
+                                circle::BuiltinOptions_FillOptions, options);
+}
+
+uint32_t CircleGen::addOperatorFloor(const OperatorParams &params)
+{
+  return addOperatorWithOptions(params, circle::BuiltinOperator_FLOOR, circle::BuiltinOptions_NONE,
+                                0);
+}
+
+uint32_t CircleGen::addOperatorFloorDiv(const OperatorParams &params)
+{
+  return addOperatorWithOptions(params, circle::BuiltinOperator_FLOOR_DIV,
+                                circle::BuiltinOptions_NONE, 0);
+}
+
+uint32_t CircleGen::addOperatorGreater(const OperatorParams &params)
+{
+  auto options = circle::CreateLessOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_GREATER,
+                                circle::BuiltinOptions_GreaterOptions, options);
+}
+
+uint32_t CircleGen::addOperatorGreaterEqual(const OperatorParams &params)
+{
+  auto options = circle::CreateGreaterOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_GREATER_EQUAL,
+                                circle::BuiltinOptions_GreaterEqualOptions, options);
+}
+
 uint32_t CircleGen::addOperatorL2Normalization(const OperatorParams &params)
 {
   auto options = circle::CreateL2NormOptions(_fbb).Union();
@@ -133,6 +297,13 @@ uint32_t CircleGen::addOperatorLess(const OperatorParams &params)
                                 circle::BuiltinOptions_LessOptions, options);
 }
 
+uint32_t CircleGen::addOperatorLessEqual(const OperatorParams &params)
+{
+  auto options = circle::CreateLessOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_LESS_EQUAL,
+                                circle::BuiltinOptions_LessEqualOptions, options);
+}
+
 uint32_t CircleGen::addOperatorLeakyRelu(const OperatorParams &params, float alpha)
 {
   auto options = circle::CreateLeakyReluOptions(_fbb, alpha).Union();
@@ -140,6 +311,28 @@ uint32_t CircleGen::addOperatorLeakyRelu(const OperatorParams &params, float alp
                                 circle::BuiltinOptions_LeakyReluOptions, options);
 }
 
+uint32_t CircleGen::addOperatorLogSoftmax(const OperatorParams &params)
+{
+  auto options = circle::CreateLogSoftmaxOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_LOG_SOFTMAX,
+                                circle::BuiltinOptions_LogSoftmaxOptions, options);
+}
+
+uint32_t CircleGen::addOperatorMean(const OperatorParams &params, bool keep_dims)
+{
+  auto options = circle::CreateReducerOptions(_fbb, keep_dims).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_MEAN,
+                                circle::BuiltinOptions_ReducerOptions, options);
+}
+
+uint32_t CircleGen::addOperatorMul(const OperatorParams &params,
+                                   circle::ActivationFunctionType actfn)
+{
+  auto options = circle::CreateMulOptions(_fbb, actfn).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_MUL,
+                                circle::BuiltinOptions_MulOptions, options);
+}
+
 uint32_t CircleGen::addOperatorNeg(const OperatorParams &params)
 {
   auto options = circle::CreatePadOptions(_fbb).Union();
@@ -147,6 +340,20 @@ uint32_t CircleGen::addOperatorNeg(const OperatorParams &params)
                                 circle::BuiltinOptions_NegOptions, options);
 }
 
+uint32_t CircleGen::addOperatorNotEqual(const OperatorParams &params)
+{
+  auto options = circle::CreateEqualOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_NOT_EQUAL,
+                                circle::BuiltinOptions_NotEqualOptions, options);
+}
+
+uint32_t CircleGen::addOperatorOneHot(const OperatorParams &params, int32_t axis)
+{
+  auto options = circle::CreateOneHotOptions(_fbb, axis).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_ONE_HOT,
+                                circle::BuiltinOptions_OneHotOptions, options);
+}
+
 uint32_t CircleGen::addOperatorPad(const OperatorParams &params)
 {
   auto options = circle::CreatePadOptions(_fbb).Union();
@@ -161,6 +368,13 @@ uint32_t CircleGen::addOperatorPadV2(const OperatorParams &params)
                                 circle::BuiltinOptions_PadV2Options, options);
 }
 
+uint32_t CircleGen::addOperatorQuantize(const OperatorParams &params)
+{
+  auto options = circle::CreateQuantizeOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_QUANTIZE,
+                                circle::BuiltinOptions_QuantizeOptions, options);
+}
+
 uint32_t CircleGen::addOperatorRank(const OperatorParams &params)
 {
   auto options = circle::CreateRankOptions(_fbb).Union();
@@ -168,6 +382,51 @@ uint32_t CircleGen::addOperatorRank(const OperatorParams &params)
                                 circle::BuiltinOptions_RankOptions, options);
 }
 
+uint32_t CircleGen::addOperatorReduce(const OperatorParams &params,
+                                      circle::BuiltinOperator reduce_op, bool keep_dims)
+{
+  switch (reduce_op)
+  {
+    case circle::BuiltinOperator_REDUCE_ANY:
+    case circle::BuiltinOperator_REDUCE_MIN:
+    case circle::BuiltinOperator_REDUCE_MAX:
+    case circle::BuiltinOperator_REDUCE_PROD:
+      break;
+    default:
+      throw std::runtime_error{"Wrong reduce op"};
+  }
+  auto options = circle::CreateReducerOptions(_fbb, keep_dims).Union();
+  return addOperatorWithOptions(params, reduce_op, circle::BuiltinOptions_ReducerOptions, options);
+}
+
+uint32_t CircleGen::addOperatorRelu(const OperatorParams &params)
+{
+  return addOperatorWithOptions(params, circle::BuiltinOperator_RELU, circle::BuiltinOptions_NONE,
+                                0);
+}
+
+uint32_t CircleGen::addOperatorRelu6(const OperatorParams &params)
+{
+  return addOperatorWithOptions(params, circle::BuiltinOperator_RELU6, circle::BuiltinOptions_NONE,
+                                0);
+}
+
+uint32_t CircleGen::addOperatorReshape(const OperatorParams &params, const Shape *new_shape)
+{
+  auto options = circle::CreateReshapeOptionsDirect(_fbb, new_shape).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_RESHAPE,
+                                circle::BuiltinOptions_ReshapeOptions, options);
+}
+
+uint32_t CircleGen::addOperatorResizeBilinear(const OperatorParams &params, bool align_corners,
+                                              bool half_pixel_centers)
+{
+  auto options =
+    circle::CreateResizeBilinearOptions(_fbb, align_corners, half_pixel_centers).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_RESIZE_BILINEAR,
+                                circle::BuiltinOptions_ResizeBilinearOptions, options);
+}
+
 uint32_t CircleGen::addOperatorResizeNearestNeighbor(const OperatorParams &params)
 {
   auto options = circle::CreateResizeNearestNeighborOptions(_fbb).Union();
@@ -175,6 +434,81 @@ uint32_t CircleGen::addOperatorResizeNearestNeighbor(const OperatorParams &param
                                 circle::BuiltinOptions_ResizeNearestNeighborOptions, options);
 }
 
+uint32_t CircleGen::addOperatorReverseV2(const OperatorParams &params)
+{
+  auto options = circle::CreateReverseV2Options(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_REVERSE_V2,
+                                circle::BuiltinOptions_ReverseV2Options, options);
+}
+
+uint32_t CircleGen::addOperatorShape(const OperatorParams &params, circle::TensorType type)
+{
+  auto options = circle::CreateShapeOptions(_fbb, type).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SHAPE,
+                                circle::BuiltinOptions_RankOptions, options);
+}
+
+uint32_t CircleGen::addOperatorSelect(const OperatorParams &params)
+{
+  auto options = circle::CreateSelectOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SELECT,
+                                circle::BuiltinOptions_SelectOptions, options);
+}
+
+uint32_t CircleGen::addOperatorSelectV2(const OperatorParams &params)
+{
+  auto options = circle::CreateSelectV2Options(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SELECT_V2,
+                                circle::BuiltinOptions_SelectV2Options, options);
+}
+
+uint32_t CircleGen::addOperatorSlice(const OperatorParams &params)
+{
+  auto options = circle::CreateSliceOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SLICE,
+                                circle::BuiltinOptions_SliceOptions, options);
+}
+
+uint32_t CircleGen::addOperatorSoftmax(const OperatorParams &params, float beta)
+{
+  auto options = circle::CreateSoftmaxOptions(_fbb, beta).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SOFTMAX,
+                                circle::BuiltinOptions_SoftmaxOptions, options);
+}
+
+uint32_t CircleGen::addOperatorSplit(const OperatorParams &params, int32_t num_split)
+{
+  auto options = circle::CreateSplitOptions(_fbb, num_split).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SPLIT,
+                                circle::BuiltinOptions_SplitOptions, options);
+}
+
+uint32_t CircleGen::addOperatorStridedSlice(const OperatorParams &params, int32_t begin_mask,
+                                            int32_t end_mask, int32_t ellipsis_mask,
+                                            int32_t new_axis_mask, int32_t shrink_axis_mask)
+{
+  auto options = circle::CreateStridedSliceOptions(_fbb, begin_mask, end_mask, ellipsis_mask,
+                                                   new_axis_mask, shrink_axis_mask)
+                   .Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_STRIDED_SLICE,
+                                circle::BuiltinOptions_StridedSliceOptions, options);
+}
+
+uint32_t CircleGen::addOperatorSub(const OperatorParams &params,
+                                   circle::ActivationFunctionType actfn)
+{
+  auto options = circle::CreateSubOptions(_fbb, actfn).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SUB,
+                                circle::BuiltinOptions_SubOptions, options);
+}
+
+uint32_t CircleGen::addOperatorTile(const OperatorParams &params)
+{
+  auto options = circle::CreateTileOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_TILE,
+                                circle::BuiltinOptions_TileOptions, options);
+}
+
 uint32_t CircleGen::addOperatorWhile(const OperatorParams &params, uint32_t cond_subg,
                                      uint32_t body_subg)
 {
@@ -183,6 +517,49 @@ uint32_t CircleGen::addOperatorWhile(const OperatorParams &params, uint32_t cond
                                 circle::BuiltinOptions_WhileOptions, options);
 }
 
+uint32_t CircleGen::addOperatorIf(const OperatorParams &params, uint32_t then_subg,
+                                  uint32_t else_subg)
+{
+  auto options = circle::CreateIfOptions(_fbb, then_subg, else_subg).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_IF,
+                                circle::BuiltinOptions_IfOptions, options);
+}
+
+uint32_t CircleGen::addOperatorInstanceNorm(const OperatorParams &params, float epsilon,
+                                            circle::ActivationFunctionType actfn)
+{
+  auto options = circle::CreateInstanceNormOptions(_fbb, epsilon, actfn).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_INSTANCE_NORM,
+                                circle::BuiltinOptions_InstanceNormOptions, options);
+}
+
+uint32_t CircleGen::addOperatorTranspose(const OperatorParams &params)
+{
+  auto options = circle::CreateTransposeOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_TRANSPOSE,
+                                circle::BuiltinOptions_TransposeOptions, options);
+}
+
+uint32_t CircleGen::addOperatorSqrt(const OperatorParams &params)
+{
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SQRT, circle::BuiltinOptions_NONE,
+                                0);
+}
+
+uint32_t CircleGen::addOperatorSquare(const OperatorParams &params)
+{
+  auto options = circle::CreateSquareOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_SQUARE,
+                                circle::BuiltinOptions_SquareOptions, options);
+}
+
+uint32_t CircleGen::addOperatorBatchToSpaceND(const OperatorParams &params)
+{
+  auto options = circle::CreateBatchToSpaceNDOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
+                                circle::BuiltinOptions_BatchToSpaceNDOptions, options);
+}
+
 // NOTE Please add addOperator functions ABOVE this lie
 //
 // %  How to add a new addOperatorXXX fuction
@@ -191,6 +568,9 @@ uint32_t CircleGen::addOperatorWhile(const OperatorParams &params, uint32_t cond
 // 2. Change enum BuiltinOperator
 // 3. Change enum BuiltinOptions
 // 4. Change CreateXXXOptions accordingly
+//
+// If operator don't have option table, remove CreateXXXOptions call,
+// call addOperatorWithOptions with options_type = circle::BuiltinOptions_NONE and options = 0
 
 // ===== Add Operator methods end =====
 
@@ -208,6 +588,23 @@ uint32_t CircleGen::addOperatorWithOptions(const OperatorParams &params,
   return ind;
 }
 
+uint32_t CircleGen::addCustomOperatorWithOptions(
+  const OperatorParams &params, std::string custom_code, circle::BuiltinOptions options_type,
+  flatbuffers::Offset<void> options, const std::vector<uint8_t> *custom_options,
+  circle::CustomOptionsFormat custom_options_format,
+  const std::vector<uint8_t> *mutating_variable_inputs, const std::vector<int32_t> *intermediates)
+
+{
+  uint32_t opcode_ind = addCustomOperatorCode(custom_code);
+  auto op = circle::CreateOperatorDirect(
+    _fbb, opcode_ind, &params.inputs, &params.outputs, options_type, options, custom_options,
+    custom_options_format, mutating_variable_inputs, intermediates);
+
+  uint32_t ind = curSubgCtx().operators.size();
+  curSubgCtx().operators.emplace_back(op);
+  return ind;
+}
+
 uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
 {
   // TODO If the same OperatorCode is registered already, just return it
@@ -216,9 +613,18 @@ uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
   return ind;
 }
 
+uint32_t CircleGen::addCustomOperatorCode(std::string custom_code)
+{
+  // TODO If the same OperatorCode is registered already, just return it
+  uint32_t ind = _opcodes.size();
+  _opcodes.emplace_back(
+    circle::CreateOperatorCodeDirect(_fbb, circle::BuiltinOperator_CUSTOM, custom_code.c_str()));
+  return ind;
+}
+
 flatbuffers::Offset<circle::Buffer> CircleGen::buildBuffer(const uint8_t *buf, size_t size)
 {
-  if (buf == nullptr && size == 0)
+  if (buf == nullptr || size == 0)
     return circle::CreateBuffer(_fbb);
   auto buffer = _fbb.CreateVector(buf, size);
   return circle::CreateBuffer(_fbb, buffer);
@@ -233,6 +639,68 @@ flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams &p
                               0 /* shape_signature */);
 }
 
+flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams &params, float scale,
+                                                           int64_t zero_point)
+{
+  auto shape = _fbb.CreateVector(params.shape);
+  auto name = _fbb.CreateString(params.name);
+  std::vector<float> scale_vector = {scale};
+  std::vector<int64_t> zero_point_vector = {zero_point};
+  auto quantization = circle::CreateQuantizationParametersDirect(_fbb, nullptr, nullptr,
+                                                                 &scale_vector, &zero_point_vector);
+  return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name, quantization,
+                              false /* is_variable */, 0 /* sparsity */, 0 /* shape_signature */);
+}
+
+flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams &params,
+                                                           std::vector<float> &scales,
+                                                           std::vector<int64_t> &zero_points)
+{
+  auto shape = _fbb.CreateVector(params.shape);
+  auto name = _fbb.CreateString(params.name);
+  auto quantization =
+    circle::CreateQuantizationParametersDirect(_fbb, nullptr, nullptr, &scales, &zero_points);
+  return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name, quantization,
+                              false /* is_variable */, 0 /* sparsity */, 0 /* shape_signature */);
+}
+
+flatbuffers::Offset<circle::SparsityParameters>
+CircleGen::buildSparsityParameters(const SparsityParams &sp)
+{
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+    dim_metadata;
+
+  traversal_order = _fbb.CreateVector(sp.traversal_order);
+  block_map = _fbb.CreateVector(sp.block_map);
+
+  std::vector<flatbuffers::Offset<circle::DimensionMetadata>> dim_metadata_vec;
+  for (auto &it : sp.dim_metadata)
+  {
+    auto fb_array_segments = circle::CreateUint16VectorDirect(_fbb, &it._array_segments.u16);
+    auto fb_array_indices = circle::CreateUint16VectorDirect(_fbb, &it._array_indices.u16);
+    auto dim_metadata = circle::CreateDimensionMetadata(
+      _fbb, it._format, it._dense_size, it._array_segments_type, fb_array_segments.Union(),
+      it._array_indices_type, fb_array_indices.Union());
+    dim_metadata_vec.emplace_back(dim_metadata);
+  }
+  dim_metadata = _fbb.CreateVector(dim_metadata_vec);
+
+  return circle::CreateSparsityParameters(_fbb, traversal_order, block_map, dim_metadata);
+}
+
+flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams &params,
+                                                           const SparsityParams &sp)
+{
+  auto shape = _fbb.CreateVector(params.shape);
+  auto name = _fbb.CreateString(params.name);
+  auto sparsity = buildSparsityParameters(sp);
+  return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
+                              0 /* QuantParam */, false /* is_variable */, sparsity,
+                              0 /* shape_signature */);
+}
+
 flatbuffers::Offset<circle::SubGraph> CircleGen::buildSubGraph(const SubgraphContext &ctx)
 {
   return circle::CreateSubGraphDirect(_fbb, &ctx.tensors, &ctx.inputs, &ctx.outputs, &ctx.operators,
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h
index 09ca5a5db..d780eb1bb 100644
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -52,6 +52,47 @@ private:
 class CircleGen
 {
 public:
+  using Shape = std::vector<int32_t>;
+
+  using SparseIndexVectorType = circle::SparseIndexVector;
+  using SparseDimensionType = circle::DimensionType;
+
+  struct SparseIndexVector
+  {
+    std::vector<uint16_t> u16;
+  };
+
+  struct DimMetaData
+  {
+    DimMetaData() = delete;
+    DimMetaData(SparseDimensionType format, std::vector<uint16_t> array_segments,
+                std::vector<uint16_t> array_indices)
+      : _format{format},
+        _array_segments_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector),
+        _array_indices_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector)
+    {
+      _array_segments.u16 = array_segments;
+      _array_indices.u16 = array_indices;
+    }
+    DimMetaData(SparseDimensionType format, int32_t dense_size)
+      : _format{format}, _dense_size{dense_size}
+    {
+    }
+    SparseDimensionType _format{circle::DimensionType_DENSE};
+    int32_t _dense_size{0};
+    SparseIndexVectorType _array_segments_type{circle::SparseIndexVector_NONE};
+    SparseIndexVector _array_segments;
+    SparseIndexVectorType _array_indices_type{circle::SparseIndexVector_NONE};
+    SparseIndexVector _array_indices;
+  };
+
+  struct SparsityParams
+  {
+    std::vector<int32_t> traversal_order;
+    std::vector<int32_t> block_map;
+    std::vector<DimMetaData> dim_metadata;
+  };
+
   struct TensorParams
   {
     std::vector<int32_t> shape;
@@ -86,39 +127,128 @@ public:
   }
   uint32_t addBuffer(const uint8_t *buf, size_t size);
   uint32_t addTensor(const TensorParams &params);
+  uint32_t addTensor(const TensorParams &params, float scale, int64_t zero_point);
+  uint32_t addTensor(const TensorParams &params, std::vector<float> &scale,
+                     std::vector<int64_t> &zero_point);
+  uint32_t addTensor(const TensorParams &params, const SparsityParams &sp);
   void setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs);
   uint32_t nextSubgraph();
   CircleBuffer finish();
 
-  // ===== Add Operator methods begin =====
+  // ===== Add Operator methods begin (SORTED IN ALPHABETICAL ORDER) =====
 
   uint32_t addOperatorAdd(const OperatorParams &params, circle::ActivationFunctionType actfn);
+  uint32_t addOperatorAddN(const OperatorParams &params);
+  uint32_t addOperatorArgMax(const OperatorParams &params,
+                             circle::TensorType output_type = circle::TensorType::TensorType_INT32);
+  uint32_t addOperatorArgMin(const OperatorParams &params,
+                             circle::TensorType output_type = circle::TensorType::TensorType_INT32);
   uint32_t addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
                                     int stride_w, int stride_h, int filter_w, int filter_h,
                                     circle::ActivationFunctionType actfn);
+  uint32_t addOperatorBatchToSpaceND(const OperatorParams &params);
+  uint32_t addOperatorCast(const OperatorParams &params, circle::TensorType input_type,
+                           circle::TensorType output_type);
   uint32_t addOperatorConcatenation(const OperatorParams &params, int axis,
                                     circle::ActivationFunctionType actfn);
+  uint32_t addOperatorConv2D(const OperatorParams &params, circle::Padding padding, int stride_w,
+                             int stride_h, circle::ActivationFunctionType actfn, int dilation_w = 1,
+                             int dilation_h = 1);
   uint32_t addOperatorCos(const OperatorParams &params);
+  uint32_t addOperatorDepthToSpace(const OperatorParams &params, int32_t block_size);
+  uint32_t addOperatorDepthwiseConv2D(const OperatorParams &params, circle::Padding padding,
+                                      int stride_w, int stride_h, int depth_multiplier,
+                                      circle::ActivationFunctionType actfn, int dilation_w = 1,
+                                      int dilation_h = 1);
+  uint32_t addOperatorDetectionPostProcess(const OperatorParams &params, int num_classes,
+                                           float y_scale, float x_scale, float h_scale,
+                                           float w_scale, float nms_score_threshold,
+                                           float nms_iou_threshold, int max_detections,
+                                           int max_classes_per_detection, int detections_per_class);
+  uint32_t addOperatorElu(const OperatorParams &params);
+  uint32_t addOperatorEqual(const OperatorParams &params);
+  uint32_t addOperatorExpandDims(const OperatorParams &params);
+  uint32_t addOperatorFill(const OperatorParams &params);
+  uint32_t addOperatorFloor(const OperatorParams &params);
+  uint32_t addOperatorFloorDiv(const OperatorParams &params);
+  uint32_t addOperatorFullyConnected(const OperatorParams &params,
+                                     circle::FullyConnectedOptionsWeightsFormat weights_format =
+                                       circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
+  uint32_t addOperatorGreater(const OperatorParams &params);
+  uint32_t addOperatorGreaterEqual(const OperatorParams &params);
+  uint32_t addOperatorIf(const OperatorParams &params, uint32_t then_subg, uint32_t else_subg);
+  uint32_t addOperatorInstanceNorm(const OperatorParams &params, float epsilon,
+                                   circle::ActivationFunctionType actfn);
   uint32_t addOperatorL2Normalization(const OperatorParams &params);
   uint32_t addOperatorLeakyRelu(const OperatorParams &params, float alpha);
   uint32_t addOperatorLess(const OperatorParams &params);
+  uint32_t addOperatorLessEqual(const OperatorParams &params);
+  uint32_t addOperatorLogSoftmax(const OperatorParams &params);
+  uint32_t addOperatorMul(const OperatorParams &params, circle::ActivationFunctionType actfn);
+  uint32_t addOperatorMean(const OperatorParams &params, bool keep_dims);
   uint32_t addOperatorNeg(const OperatorParams &params);
+  uint32_t addOperatorNotEqual(const OperatorParams &params);
+  uint32_t addOperatorOneHot(const OperatorParams &params, int32_t axis);
   uint32_t addOperatorPad(const OperatorParams &params);
   uint32_t addOperatorPadV2(const OperatorParams &params);
+  uint32_t addOperatorQuantize(const OperatorParams &params);
   uint32_t addOperatorRank(const OperatorParams &params);
+  uint32_t addOperatorReduce(const OperatorParams &params, circle::BuiltinOperator reduce_op,
+                             bool keep_dims);
+  /**
+   * @brief Create circle Reshape op
+   *        the second param new_shape can be optional just like circle::CreateReshapeOptionsDirect
+   */
+  uint32_t addOperatorRelu(const OperatorParams &params);
+  uint32_t addOperatorRelu6(const OperatorParams &params);
+  uint32_t addOperatorReshape(const OperatorParams &params, const Shape *new_shape = nullptr);
+  uint32_t addOperatorResizeBilinear(const OperatorParams &params, bool align_corners = false,
+                                     bool half_pixel_centers = false);
   uint32_t addOperatorResizeNearestNeighbor(const OperatorParams &params);
+  uint32_t addOperatorReverseV2(const OperatorParams &params);
+  uint32_t addOperatorShape(const OperatorParams &params,
+                            circle::TensorType type = circle::TensorType::TensorType_INT32);
+  uint32_t addOperatorSelect(const OperatorParams &params);
+  uint32_t addOperatorSelectV2(const OperatorParams &params);
+  uint32_t addOperatorSlice(const OperatorParams &params);
+  uint32_t addOperatorSoftmax(const OperatorParams &params, float beta);
+  uint32_t addOperatorSplit(const OperatorParams &params, int32_t num_split);
+  uint32_t addOperatorSqrt(const OperatorParams &params);
+  uint32_t addOperatorSquare(const OperatorParams &params);
+  uint32_t addOperatorStridedSlice(const OperatorParams &params, int32_t begin_mask = 0,
+                                   int32_t end_mask = 0, int32_t ellipsis_mask = 0,
+                                   int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0);
+  uint32_t addOperatorSub(const OperatorParams &params, circle::ActivationFunctionType actfn);
+  uint32_t addOperatorTile(const OperatorParams &params);
+  uint32_t addOperatorTranspose(const OperatorParams &params);
   uint32_t addOperatorWhile(const OperatorParams &params, uint32_t cond_subg, uint32_t body_subg);
 
-  // NOTE Please add addOperator functions ABOVE this lie
+  // NOTE Please add addOperator functions ABOVE this line in ALPHABETICAL ORDER
   // ===== Add Operator methods end =====
 
 private:
   uint32_t addOperatorWithOptions(const OperatorParams &params, circle::BuiltinOperator opcode,
                                   circle::BuiltinOptions options_type,
                                   flatbuffers::Offset<void> options);
+  uint32_t addCustomOperatorWithOptions(const OperatorParams &params, std::string custom_code,
+                                        circle::BuiltinOptions options_type,
+                                        flatbuffers::Offset<void> options,
+                                        const std::vector<uint8_t> *custom_options,
+                                        circle::CustomOptionsFormat custom_options_format,
+                                        const std::vector<uint8_t> *mutating_variable_inputs,
+                                        const std::vector<int32_t> *intermediates);
   uint32_t addOperatorCode(circle::BuiltinOperator opcode);
+  uint32_t addCustomOperatorCode(std::string custom_code);
   flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size);
   flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params);
+  flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params, float scale,
+                                                  int64_t zero_point);
+  flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params,
+                                                  std::vector<float> &scales,
+                                                  std::vector<int64_t> &zero_points);
+  flatbuffers::Offset<circle::SparsityParameters> buildSparsityParameters(const SparsityParams &sp);
+  flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params,
+                                                  const SparsityParams &sp);
   flatbuffers::Offset<circle::SubGraph> buildSubGraph(const SubgraphContext &ctx);
 
   SubgraphContext &curSubgCtx() { return _subgraph_contexts.back(); }
diff --git a/tests/nnfw_api/src/GenModelTest.h b/tests/nnfw_api/src/GenModelTest.h
index 530ccdd8c..90b7cfcad 100644
--- a/tests/nnfw_api/src/GenModelTest.h
+++ b/tests/nnfw_api/src/GenModelTest.h
@@ -14,31 +14,141 @@
  * limitations under the License.
  */
 
+#ifndef __NNFW_API_TEST_GEN_MODEL_TEST_H__
+#define __NNFW_API_TEST_GEN_MODEL_TEST_H__
+
 #include <gtest/gtest.h>
 #include <nnfw_internal.h>
 
 #include <fstream>
 #include <string>
+#include <unordered_map>
 
 #include "CircleGen.h"
 #include "fixtures.h"
 
+inline size_t sizeOfNnfwType(NNFW_TYPE type)
+{
+  switch (type)
+  {
+    case NNFW_TYPE_TENSOR_BOOL:
+    case NNFW_TYPE_TENSOR_UINT8:
+    case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+    case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+      return 1;
+    case NNFW_TYPE_TENSOR_FLOAT32:
+    case NNFW_TYPE_TENSOR_INT32:
+      return 4;
+    case NNFW_TYPE_TENSOR_INT64:
+      return 8;
+    default:
+      throw std::runtime_error{"Invalid tensor type"};
+  }
+}
+
+// TODO Unify this with `SessionObject` in `fixtures.h`
+struct SessionObjectGeneric
+{
+  nnfw_session *session = nullptr;
+  std::vector<std::vector<uint8_t>> inputs;
+  std::vector<std::vector<uint8_t>> outputs;
+};
+
 struct TestCaseData
 {
   /**
    * @brief A vector of input buffers
-   *
-   * @todo support other types as well as float
    */
-  std::vector<std::vector<float>> inputs;
+  std::vector<std::vector<uint8_t>> inputs;
+
   /**
    * @brief A vector of output buffers
+   */
+  std::vector<std::vector<uint8_t>> outputs;
+
+  /**
+   * @brief Append vector data to inputs
+   *
+   * @tparam T Data type
+   * @param data vector data array
+   */
+  template <typename T> TestCaseData &addInput(const std::vector<T> &data)
+  {
+    addData(inputs, data);
+    return *this;
+  }
+
+  /**
+   * @brief Append vector data to inputs
    *
-   * @todo support other types as well as float
+   * @tparam T Data type
+   * @param data vector data array
+   */
+  template <typename T> TestCaseData &addOutput(const std::vector<T> &data)
+  {
+    addData(outputs, data);
+    return *this;
+  }
+
+  /**
+   * @brief Call this when @c nnfw_run() for this test case is expected to be failed
    */
-  std::vector<std::vector<float>> outputs;
+  TestCaseData &expectFailRun()
+  {
+    _expected_fail_run = true;
+    return *this;
+  }
+  bool expected_fail_run() const { return _expected_fail_run; }
+
+private:
+  template <typename T>
+  static void addData(std::vector<std::vector<uint8_t>> &dest, const std::vector<T> &data)
+  {
+    size_t size = data.size() * sizeof(T);
+    dest.emplace_back();
+    dest.back().resize(size);
+    std::memcpy(dest.back().data(), data.data(), size);
+  }
+
+  bool _expected_fail_run = false;
 };
 
+template <>
+inline void TestCaseData::addData<bool>(std::vector<std::vector<uint8_t>> &dest,
+                                        const std::vector<bool> &data)
+{
+  size_t size = data.size() * sizeof(uint8_t);
+  dest.emplace_back();
+  dest.back().resize(size);
+  std::transform(data.cbegin(), data.cend(), dest.back().data(),
+                 [](bool b) { return static_cast<uint8_t>(b); });
+}
+
+/**
+ * @brief Create a TestCaseData with a uniform type
+ *
+ * A helper function for generating test cases that has the same data type for model inputs/outputs.
+ *
+ * @tparam T Uniform tensor type
+ * @param inputs Inputs tensor buffers
+ * @param outputs Output tensor buffers
+ * @return TestCaseData Generated test case data
+ */
+template <typename T>
+static TestCaseData uniformTCD(const std::vector<std::vector<T>> &inputs,
+                               const std::vector<std::vector<T>> &outputs)
+{
+  TestCaseData ret;
+  for (const auto &data : inputs)
+    ret.addInput(data);
+  for (const auto &data : outputs)
+    ret.addOutput(data);
+  return ret;
+}
+
+/**
+ * @brief A test configuration class
+ */
 class GenModelTestContext
 {
 public:
@@ -66,11 +176,32 @@ public:
   const std::vector<std::string> &backends() const { return _backends; }
 
   /**
+   * @brief Return test is defined to fail on model load
+   *
+   * @return bool test is defined to fail on model load
+   */
+  bool expected_fail_model_load() const { return _expected_fail_model_load; }
+
+  /**
    * @brief Return test is defined to fail on compile
    *
    * @return bool test is defined to fail on compile
    */
-  const bool fail_compile() const { return _fail_compile; }
+  bool expected_fail_compile() const { return _expected_fail_compile; }
+
+  /**
+   * @brief Set the output buffer size of specified output tensor
+   *        Note that output tensor size of a model with dynamic tensor is calculated while
+   *        running the model.
+   *        Therefore, before runniing the model, the sufficient size of buffer should
+   *        be prepared by calling this method.
+   *        The size does not need to be the exact size.
+   */
+  void output_sizes(uint32_t ind, size_t size) { _output_sizes[ind] = size; }
+
+  size_t output_sizes(uint32_t ind) const { return _output_sizes.at(ind); }
+
+  bool hasOutputSizes(uint32_t ind) const { return _output_sizes.find(ind) != _output_sizes.end(); }
 
   /**
    * @brief Add a test case
@@ -96,23 +227,48 @@ public:
         _backends.push_back(backend);
       }
 #endif
-      if (backend == "cpu")
+      if (backend == "cpu" || backend == "ruy")
+      {
+        _backends.push_back(backend);
+      }
+#ifdef TEST_XNNPACK_BACKEND
+      if (backend == "xnnpack")
+      {
+        _backends.push_back(backend);
+      }
+#endif
+#ifdef TEST_GPU_CL_BACKEND
+      if (backend == "gpu_cl")
       {
         _backends.push_back(backend);
       }
+#endif
     }
   }
 
   /**
-   * @brief Set the Test Fail
+   * @brief Expect failure while model load
+   */
+  void expectFailModelLoad() { _expected_fail_model_load = true; }
+
+  /**
+   * @brief Expect failure while compiling
+   */
+  void expectFailCompile() { _expected_fail_compile = true; }
+
+  /**
+   * @brief Expect failure while execution
    */
-  void setCompileFail() { _fail_compile = true; }
+  void expectFailExecution() { _expected_fail_execution = true; }
 
 private:
   CircleBuffer _cbuf;
   std::vector<TestCaseData> _test_cases;
   std::vector<std::string> _backends;
-  bool _fail_compile{false};
+  std::unordered_map<uint32_t, size_t> _output_sizes;
+  bool _expected_fail_model_load{false};
+  bool _expected_fail_compile{false};
+  bool _expected_fail_execution{false};
 };
 
 /**
@@ -141,12 +297,21 @@ protected:
       //      nnfw_load_circle_from_buffer to outside forloop
       NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session));
       auto &cbuf = _context->cbuf();
-      NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size()));
+      auto model_load_result =
+        nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size());
+      if (_context->expected_fail_model_load())
+      {
+        ASSERT_NE(model_load_result, NNFW_STATUS_NO_ERROR);
+        std::cerr << "Failed model loading as expected." << std::endl;
+        NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+        continue;
+      }
+      NNFW_ENSURE_SUCCESS(model_load_result);
       NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_so.session, backend.data()));
 
-      if (_context->fail_compile())
+      if (_context->expected_fail_compile())
       {
-        ASSERT_EQ(nnfw_prepare(_so.session), NNFW_STATUS_ERROR);
+        ASSERT_NE(nnfw_prepare(_so.session), NNFW_STATUS_NO_ERROR);
 
         NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
         continue;
@@ -162,11 +327,18 @@ protected:
         nnfw_tensorinfo ti;
         NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
         uint64_t input_elements = num_elems(&ti);
-        _so.inputs[ind].resize(input_elements);
-
-        ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
-                                 sizeof(float) * input_elements),
-                  NNFW_STATUS_NO_ERROR);
+        _so.inputs[ind].resize(input_elements * sizeOfNnfwType(ti.dtype));
+        if (_so.inputs[ind].size() == 0)
+        {
+          // Optional inputs
+          ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, nullptr, 0), NNFW_STATUS_NO_ERROR);
+        }
+        else
+        {
+          ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
+                                   _so.inputs[ind].size()),
+                    NNFW_STATUS_NO_ERROR);
+        }
       }
 
       uint32_t num_outputs;
@@ -176,10 +348,23 @@ protected:
       {
         nnfw_tensorinfo ti;
         NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
-        uint64_t output_elements = num_elems(&ti);
-        _so.outputs[ind].resize(output_elements);
+
+        auto size = 0;
+        {
+          if (_context->hasOutputSizes(ind))
+          {
+            size = _context->output_sizes(ind);
+          }
+          else
+          {
+            uint64_t output_elements = num_elems(&ti);
+            size = output_elements * sizeOfNnfwType(ti.dtype);
+          }
+          _so.outputs[ind].resize(size);
+        }
+
         ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
-                                  sizeof(float) * output_elements),
+                                  _so.outputs[ind].size()),
                   NNFW_STATUS_NO_ERROR);
       }
 
@@ -193,7 +378,13 @@ protected:
         {
           // Fill the values
           ASSERT_EQ(_so.inputs[i].size(), ref_inputs[i].size());
-          memcpy(_so.inputs[i].data(), ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
+          memcpy(_so.inputs[i].data(), ref_inputs[i].data(), ref_inputs[i].size());
+        }
+
+        if (test_case.expected_fail_run())
+        {
+          ASSERT_NE(nnfw_run(_so.session), NNFW_STATUS_NO_ERROR);
+          continue;
         }
 
         NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
@@ -201,12 +392,48 @@ protected:
         ASSERT_EQ(_so.outputs.size(), ref_outputs.size());
         for (uint32_t i = 0; i < _so.outputs.size(); i++)
         {
+          nnfw_tensorinfo ti;
+          NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, i, &ti));
+
           // Check output tensor values
           auto &ref_output = ref_outputs[i];
           auto &output = _so.outputs[i];
-          ASSERT_EQ(output.size(), ref_output.size());
-          for (uint32_t e = 0; e < ref_output.size(); e++)
-            EXPECT_NEAR(ref_output[e], output[e], 0.001); // TODO better way for handling FP error?
+          auto expected_tensor_size = ref_output.size();
+          auto actual_tensor_size = output.size();
+          ASSERT_EQ(expected_tensor_size, actual_tensor_size) << "Output #" << i;
+
+          switch (ti.dtype)
+          {
+            case NNFW_TYPE_TENSOR_BOOL:
+              compareBuffersExactBool(ref_output, output, i);
+              break;
+            case NNFW_TYPE_TENSOR_UINT8:
+            case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+              compareBuffersExact<uint8_t>(ref_output, output, i);
+              break;
+            case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+              compareBuffersExact<int8_t>(ref_output, output, i);
+              break;
+            case NNFW_TYPE_TENSOR_INT32:
+              compareBuffersExact<int32_t>(ref_output, output, i);
+              break;
+            case NNFW_TYPE_TENSOR_FLOAT32:
+              // TODO better way for handling FP error?
+              for (uint32_t e = 0; e < ref_output.size() / sizeof(float); e++)
+              {
+                float expected = reinterpret_cast<const float *>(ref_output.data())[e];
+                float actual = reinterpret_cast<const float *>(output.data())[e];
+                EXPECT_NEAR(expected, actual, 0.001)
+                  << "Output #" << i << ", Element Index : " << e;
+              }
+              break;
+            case NNFW_TYPE_TENSOR_INT64:
+              compareBuffersExact<int64_t>(ref_output, output, i);
+              break;
+            default:
+              throw std::runtime_error{"Invalid tensor type"};
+          }
+          // TODO Add shape comparison
         }
       }
 
@@ -214,7 +441,35 @@ protected:
     }
   }
 
+private:
+  template <typename T>
+  void compareBuffersExact(const std::vector<uint8_t> &ref_buf, const std::vector<uint8_t> &act_buf,
+                           uint32_t index)
+  {
+    for (uint32_t e = 0; e < ref_buf.size() / sizeof(T); e++)
+    {
+      T expected = reinterpret_cast<const T *>(ref_buf.data())[e];
+      T actual = reinterpret_cast<const T *>(act_buf.data())[e];
+      EXPECT_EQ(expected, actual) << "Output #" << index << ", Element Index : " << e;
+    }
+  }
+
+  void compareBuffersExactBool(const std::vector<uint8_t> &ref_buf,
+                               const std::vector<uint8_t> &act_buf, uint32_t index)
+  {
+    for (uint32_t e = 0; e < ref_buf.size() / sizeof(uint8_t); e++)
+    {
+      uint8_t ref_raw = reinterpret_cast<const uint8_t *>(ref_buf.data())[e];
+      bool expected = (ref_raw != 0 ? true : false);
+      uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
+      bool actual = (act_raw != 0 ? true : false);
+      EXPECT_EQ(expected, actual) << "Output #" << index << ", Element Index : " << e;
+    }
+  }
+
 protected:
-  SessionObject _so;
+  SessionObjectGeneric _so;
   std::unique_ptr<GenModelTestContext> _context;
 };
+
+#endif // __NNFW_API_TEST_GEN_MODEL_TEST_H__
diff --git a/tests/nnfw_api/src/GenModelTests.test.cc b/tests/nnfw_api/src/GenModelTests.test.cc
new file mode 100644
index 000000000..53a3571db
--- /dev/null
+++ b/tests/nnfw_api/src/GenModelTests.test.cc
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file This file contains miscellaneous GenModelTest test cases.
+ *
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, UnusedConstOutputOnly)
+{
+  // A single tensor which is constant
+  CircleGen cgen;
+  uint32_t const_buf = cgen.addBuffer(std::vector<float>{9, 8, 7, 6});
+  int out_const = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, const_buf});
+  cgen.setInputsAndOutputs({}, {out_const});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({}, {{9, 8, 7, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, UnusedConstOutputAndAdd)
+{
+  // A single tensor which is constant + an Add op
+  CircleGen cgen;
+  uint32_t rhs_buf = cgen.addBuffer(std::vector<float>{5, 4, 7, 4});
+  uint32_t const_buf = cgen.addBuffer(std::vector<float>{9, 8, 7, 6});
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out_const = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, const_buf});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out, out_const});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{6, 7, 9, 8}, {9, 8, 7, 6}}));
+  _context->addTestCase(uniformTCD<float>({{0, 1, 2, 3}}, {{5, 5, 9, 7}, {9, 8, 7, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, UsedConstOutput)
+{
+  // (( Input 1 )) ---------\
+  //                         |=> [ Add ] -> (( Output 1 ))
+  // (( Const Output 2 )) --<
+  //                         |=> [ Add ] -> (( Output 0 ))
+  // (( Input 0 )) ---------/
+  CircleGen cgen;
+  uint32_t rhs_buf = cgen.addBuffer(std::vector<float>{6, 4, 8, 1});
+  int in0 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out0 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int const_out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  cgen.addOperatorAdd({{in0, const_out2}, {out0}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{const_out2, in1}, {out1}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in0, in1}, {out0, out1, const_out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 1, 1, 1}, {-1, -1, -1, -1}},
+                                          {{7, 5, 9, 2}, {5, 3, 7, 0}, {6, 4, 8, 1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, TensorBothInputOutput)
+{
+  // A single tensor which is an input and an output at the same time
+  CircleGen cgen;
+  int t = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.setInputsAndOutputs({t}, {t});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{1, 3, 2, 4}}));
+  _context->addTestCase(uniformTCD<float>({{100, 300, 200, 400}}, {{100, 300, 200, 400}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, TensorBothInputOutputCrossed)
+{
+  // Two tensors which are an input and an output at the same time
+  // But the order of inputs and outputs is changed.
+  CircleGen cgen;
+  int t1 = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int t2 = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.setInputsAndOutputs({t1, t2}, {t2, t1});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1}, {2}}, {{2}, {1}}));
+  _context->addTestCase(uniformTCD<float>({{100}, {200}}, {{200}, {100}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneTensor_TwoOutputs)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out, out}); // Same tensors are used twice as output
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 1}, {2, 2}}, {{3, 3}, {3, 3}}));
+  _context->addTestCase(uniformTCD<float>({{2, 4}, {7, 4}}, {{9, 8}, {9, 8}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneTensor_ThreeOutputs)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out, out, out}); // Same tensors are used 3 times as output
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1}, {2}}, {{3}, {3}, {3}}));
+  _context->addTestCase(uniformTCD<float>({{2}, {7}}, {{9}, {9}, {9}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneTensor_InputAndTwoOutputs)
+{
+  CircleGen cgen;
+  int t = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.setInputsAndOutputs({t}, {t, t}); // Same tensor is an input and 2 outputs
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 1}}, {{1, 1}, {1, 1}}));
+  _context->addTestCase(uniformTCD<float>({{2, 4}}, {{2, 4}, {2, 4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneTensor_InputAndTwoOutputsUsed)
+{
+  CircleGen cgen;
+  int t = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+  int o = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorNeg({{t}, {o}});
+  cgen.setInputsAndOutputs({t}, {t, t, o}); // Same tensor is an input and 2 outputs
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 1}}, {{1, 1}, {1, 1}, {-1, -1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneTensor_ConstAndThreeOutputs)
+{
+  CircleGen cgen;
+  uint32_t const_buf = cgen.addBuffer(std::vector<float>{2, 5});
+  int t = cgen.addTensor({{2}, circle::TensorType_FLOAT32, const_buf});
+  cgen.setInputsAndOutputs({}, {t, t, t}); // A const tensor is 3 outputs
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({}, {{2, 5}, {2, 5}, {2, 5}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, Reshape_with_shape_param_as_const)
+{
+  CircleGen cgen;
+  auto i32 = circle::TensorType::TensorType_INT32;
+
+  int input = cgen.addTensor({{4}, i32});
+
+  std::vector<int32_t> new_shape_data{2, 2}; // const of value [2, 2]
+  uint32_t new_shape_buf = cgen.addBuffer(new_shape_data);
+  int new_shape = cgen.addTensor({{2}, i32, new_shape_buf});
+
+  int out = cgen.addTensor({{2, 2}, i32});
+
+  // reshape with new_shape param
+  cgen.addOperatorReshape({{input, new_shape}, {out}}, &new_shape_data);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int32_t>({{1, 2, 3, 4}}, {{1, 2, 3, 4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_Reshape_with_shape_param_as_const)
+{
+  // We will ses if Reshape with shape param can generate error during compilation if param is wrong
+  CircleGen cgen;
+  auto i32 = circle::TensorType::TensorType_INT32;
+
+  int input = cgen.addTensor({{4}, i32});
+
+  std::vector<int32_t> wrong_new_shape_data{2, 3}; // not match with input shape
+  uint32_t new_shape_buf = cgen.addBuffer(wrong_new_shape_data);
+  int new_shape = cgen.addTensor({{2}, i32, new_shape_buf});
+
+  int out = cgen.addTensor({{2, 2}, i32});
+
+  cgen.addOperatorReshape({{input, new_shape}, {out}}, &wrong_new_shape_data);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int32_t>({{1, 2, 3, 4}}, {{1, 2, 3, 4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, Reshape_with_shape_param_as_const_float)
+{
+  CircleGen cgen;
+  auto f32 = circle::TensorType::TensorType_FLOAT32;
+  int input = cgen.addTensor({{4}, f32});
+
+  std::vector<int32_t> new_shape_data{2, 2}; // const of value [2, 2]
+  uint32_t new_shape_buf = cgen.addBuffer(new_shape_data);
+  int new_shape = cgen.addTensor({{2}, f32, new_shape_buf});
+  int out = cgen.addTensor({{2, 2}, f32});
+
+  // reshape with new_shape param
+  cgen.addOperatorReshape({{input, new_shape}, {out}}, &new_shape_data);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4}}, {{1, 2, 3, 4}}));
+  _context->setBackends({"gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_Reshape_with_shape_param_as_const_float)
+{
+  // We will ses if Reshape with shape param can generate error during compilation if param is wrong
+  CircleGen cgen;
+  auto f32 = circle::TensorType::TensorType_FLOAT32;
+
+  int input = cgen.addTensor({{4}, f32});
+
+  std::vector<int32_t> wrong_new_shape_data{2, 3}; // not match with input shape
+  uint32_t new_shape_buf = cgen.addBuffer(wrong_new_shape_data);
+  int new_shape = cgen.addTensor({{2}, f32, new_shape_buf});
+
+  int out = cgen.addTensor({{2, 2}, f32});
+
+  cgen.addOperatorReshape({{input, new_shape}, {out}}, &wrong_new_shape_data);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4}}, {{1, 2, 3, 4}}));
+  _context->setBackends({"gpu_cl"});
+
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, Reshape_without_shape_param)
+{
+  CircleGen cgen;
+  auto i32 = circle::TensorType::TensorType_INT32;
+
+  int input = cgen.addTensor({{4}, i32});
+  int new_shape = cgen.addTensor({{2}, i32}); // reshape to 2D tensor
+  int out = cgen.addTensor({{}, i32}); // exact shape is not unknown since ouput is dynamic tensor
+
+  // reshape with new_shape param
+  cgen.addOperatorReshape({{input, new_shape}, {out}} /* no new_shape param */);
+  cgen.setInputsAndOutputs({input, new_shape}, {out});
+
+  CircleGen::Shape new_shape_val{2, 2};
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int32_t>({{1, 2, 3, 4}, new_shape_val}, {{1, 2, 3, 4}}));
+  _context->output_sizes(0, sizeof(int32_t) * 4);
+  _context->setBackends({"cpu" /* "acl_cl", "acl_neon" does not support dynamic tensor */});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_Reshape_without_shape_param)
+{
+  // We will ses if Reshape without shape param can generate error whiile running
+  CircleGen cgen;
+  auto i32 = circle::TensorType::TensorType_INT32;
+
+  int input = cgen.addTensor({{4}, i32});
+  int new_shape = cgen.addTensor({{2}, i32}); // reshape to 2D tensor
+  int out = cgen.addTensor({{}, i32}); // exact shape is not unknown since ouput is dynamic tensor
+
+  // reshape with new_shape param
+  cgen.addOperatorReshape({{input, new_shape}, {out}} /* no new_shape param */);
+  cgen.setInputsAndOutputs({input, new_shape}, {out});
+
+  CircleGen::Shape wrong_new_shape_val{2, 3};
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  auto tc = uniformTCD<int32_t>({{1, 2, 3, 4}, wrong_new_shape_val}, {{1, 2, 3, 4}});
+  tc.expectFailRun();
+  _context->addTestCase(tc);
+  _context->setBackends({"cpu" /* "acl_cl", "acl_neon" does not support dynamic tensor */});
+
+  SUCCEED();
+}
+
+// test to check model that has op->while->op
+TEST_F(GenModelTest, while_with_input_output)
+{
+  // The model looks just like the below pseudocode
+  //
+  //   x = cast(int to float)
+  //   while (x < 100.0)
+  //   {
+  //     x = x + 10.0;
+  //   }
+  //   x = cast(float to int)
+
+  CircleGen cgen;
+  std::vector<float> incr_data{10};
+  uint32_t incr_buf = cgen.addBuffer(incr_data);
+  std::vector<float> end_data{100};
+  uint32_t end_buf = cgen.addBuffer(end_data);
+
+  // primary subgraph
+  {
+    int model_in = cgen.addTensor({{1}, circle::TensorType_INT32});
+    int cast_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int while_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int model_out = cgen.addTensor({{1}, circle::TensorType_INT32});
+
+    cgen.addOperatorCast({{model_in}, {cast_out}}, circle::TensorType_INT32,
+                         circle::TensorType_FLOAT32);
+    cgen.addOperatorWhile({{cast_out}, {while_out}}, 1, 2);
+    cgen.addOperatorCast({{while_out}, {model_out}}, circle::TensorType_FLOAT32,
+                         circle::TensorType_INT32);
+
+    cgen.setInputsAndOutputs({model_in}, {model_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({x_in}, {x_out});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int>({{0}}, {{100}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.cc
deleted file mode 100644
index e2d70d2c0..000000000
--- a/tests/nnfw_api/src/ModelTestDynamicTensor.cc
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <nnfw_internal.h>
-
-#include "common.h"
-#include "fixtures.h"
-#include "NNPackages.h"
-#include "CircleGen.h"
-
-void set_input_output(nnfw_session *session, const std::vector<float> &input,
-                      std::vector<float> &actual_output)
-{
-  ASSERT_EQ(nnfw_set_input(session, 0, NNFW_TYPE_TENSOR_FLOAT32, input.data(),
-                           sizeof(float) * input.size()),
-            NNFW_STATUS_NO_ERROR);
-
-  ASSERT_EQ(nnfw_set_output(session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output.data(),
-                            sizeof(float) * actual_output.size()),
-            NNFW_STATUS_NO_ERROR);
-}
-
-void set_input_output(nnfw_session *session, const std::vector<float> &input0,
-                      const std::vector<float> &input1, std::vector<float> &actual_output)
-{
-  ASSERT_EQ(nnfw_set_input(session, 0, NNFW_TYPE_TENSOR_FLOAT32, input0.data(),
-                           sizeof(float) * input0.size()),
-            NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_set_input(session, 1, NNFW_TYPE_TENSOR_FLOAT32, input1.data(),
-                           sizeof(float) * input1.size()),
-            NNFW_STATUS_NO_ERROR);
-
-  ASSERT_EQ(nnfw_set_output(session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output.data(),
-                            sizeof(float) * actual_output.size()),
-            NNFW_STATUS_NO_ERROR);
-}
-
-/**
- * @brief Testing the following model:
- *
- * Testing the following model:
- *       #1 = const(value = [-1.5, -1.0, -0.5, 0.5, 1.0, 1.5], shape=[2, 3])
- *       #2 = placeholder (shape = [2])      <-------- this is an input
- *       #3 = reshape(#1, #2)
- *
- * @note Run this test with "cpu" backend
- */
-// TODO Rewrite this with CircleGen
-class TestDynamicTensorReshapeModelLoaded
-    : public ValidationTestModelLoaded<NNPackages::DYNAMIC_TENSOR_RESHAPE>
-{
-protected:
-  void set_input_output(const std::vector<int> &new_shape, int actual_output_size,
-                        std::vector<float> *actual_output)
-  {
-    NNFW_STATUS res = nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_INT32, new_shape.data(),
-                                     sizeof(int) * new_shape.size());
-    NNFW_ENSURE_SUCCESS(res);
-
-    res = nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output->data(),
-                          sizeof(float) * actual_output_size);
-    NNFW_ENSURE_SUCCESS(res);
-  }
-
-  void prepare_and_set_input_output(const std::vector<int> &new_shape, int actual_output_size,
-                                    std::vector<float> *actual_output)
-  {
-    NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-
-    NNFW_STATUS res = NNFW_STATUS_ERROR;
-
-    res = nnfw_prepare(_session);
-    NNFW_ENSURE_SUCCESS(res);
-
-    set_input_output(new_shape, actual_output_size, actual_output);
-    // real test case should start from calling nnfw_run()
-  }
-
-  // call this after calling nnfw_prepare()
-  void set_input_output_and_run(const std::vector<int> &new_shape,
-                                const std::vector<float> &expected_output, bool no_run_error = true)
-  {
-    int output_element_num = expected_output.size();
-    std::vector<float> actual_output(output_element_num);
-
-    set_input_output(new_shape, output_element_num, &actual_output);
-
-    // Do inference
-    NNFW_STATUS res = nnfw_run(_session);
-
-    if (no_run_error)
-    {
-      NNFW_ENSURE_SUCCESS(res);
-
-      // output shape check
-      nnfw_tensorinfo info;
-      NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &info));
-      ASSERT_EQ(info.rank, new_shape.size());
-      for (uint32_t d = 0; d < info.rank; ++d)
-        ASSERT_EQ(info.dims[d], new_shape[d]);
-
-      // output value check
-      for (int i = 0; i < expected_output.size(); ++i)
-        ASSERT_EQ(expected_output[i], actual_output[i]);
-    }
-    else
-    {
-      ASSERT_EQ(res, NNFW_STATUS_ERROR);
-    }
-  };
-
-  void TearDown() override
-  {
-    ValidationTestModelLoaded<NNPackages::DYNAMIC_TENSOR_RESHAPE>::TearDown();
-  }
-};
-
-TEST_F(TestDynamicTensorReshapeModelLoaded, reshape_to_3x2)
-{
-  const std::vector<int> new_shape = {3, 2};
-  const std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
-  std::vector<float> actual_output(expected.size());
-
-  prepare_and_set_input_output(new_shape, expected.size(), &actual_output);
-
-  // Do inference
-  NNFW_STATUS res = nnfw_run(_session);
-  NNFW_ENSURE_SUCCESS(res);
-
-  // output value check
-  for (int i = 0; i < expected.size(); ++i)
-    ASSERT_EQ(expected[i], actual_output[i]);
-}
-
-/**
- * @brief Negative test.
- *        Reshape's first input has 6 values but trying to reshaping to [3, 3]
- */
-TEST_F(TestDynamicTensorReshapeModelLoaded, neg_reshape_to_wrong_3x3)
-{
-  const std::vector<int> wrong_shape = {3, 3}; // wrong shape input
-  const int actual_element_num = 9;            // whatever number
-  std::vector<float> actual_output(9);         // whatever size
-
-  prepare_and_set_input_output(wrong_shape, actual_element_num, &actual_output);
-
-  // Do inference
-  NNFW_STATUS res = nnfw_run(_session);
-  ASSERT_EQ(res, NNFW_STATUS_ERROR); // run should fail
-}
-
-TEST_F(TestDynamicTensorReshapeModelLoaded, reshape_multiple_executions)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-
-  NNFW_STATUS res = nnfw_prepare(_session);
-  NNFW_ENSURE_SUCCESS(res);
-
-  std::vector<int> new_shape;
-  std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
-
-  // let's call multiple times
-  new_shape = {3, 2};
-  set_input_output_and_run(new_shape, expected);
-
-  new_shape = {1, 6};
-  set_input_output_and_run(new_shape, expected);
-
-  new_shape = {6, 1};
-  set_input_output_and_run(new_shape, expected);
-}
-
-TEST_F(TestDynamicTensorReshapeModelLoaded, neg_reshape_multiple_executions)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-
-  NNFW_STATUS res = nnfw_prepare(_session);
-  NNFW_ENSURE_SUCCESS(res);
-
-  std::vector<int> new_shape;
-  std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
-
-  // let's call multiple times including the second nnfw_run() to fail
-  new_shape = {3, 2};
-  set_input_output_and_run(new_shape, expected);
-
-  new_shape = {1, 100};                                 // wrong shape
-  set_input_output_and_run(new_shape, expected, false); // Run will fail
-
-  // next run should succeed
-  new_shape = {6, 1};
-  set_input_output_and_run(new_shape, expected);
-}
-
-//
-// Unknown Dimension Test
-//    Trying to set unknown dim to other value before calling nnfw_prepare()
-//
-
-/**
- * @brief Testing the following model:
- *
- *        #0 = placeholder([None, None])   # initially, shape is [1, 1]
- *        #1 = placeholder([2, 3])
- *        #2 = concat (#0, #1, axis=0)
- *
- *        Calling sequence:
- *        - nnfw_set_input_tensorinfo(#0, [1, 3])    # now, [1, 3]
- *        - nnfw_prepare()                 # this should work
- *        - nnfw_set_input()
- *        - nnfw_run()
- *
- * @note Run this test with "cpu" backend
- */
-auto build_model_buf_Concatenation_unknwon_dims()
-{
-  // Model is not important
-  CircleGen cgen;
-  auto f32 = circle::TensorType::TensorType_FLOAT32;
-  int in1 = cgen.addTensor({{1, 1}, f32}); // consider this [None, None]
-  int in2 = cgen.addTensor({{2, 3}, f32});
-  int out = cgen.addTensor({{}, f32}); // scalar, meaning output shape is unspecified
-  cgen.addOperatorConcatenation({{in1, in2}, {out}}, 0, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in1, in2}, {out});
-  auto cbuf = cgen.finish();
-  return cbuf;
-}
-
-TEST(TestDynamicTensor, concat_unknown_dim_input0_to_2x3)
-{
-  nnfw_session *session = nullptr;
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
-  const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
-  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
-
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
-
-  const std::vector<float> input0 = {1, 2, 3};          // of shape [1, 3]
-  const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
-
-  const std::vector<float> expected = {1, 2, 3, 4, 5, 6, 7, 8, 9};
-  std::vector<float> actual_output(expected.size());
-
-  // input reshaping to [1, 3]
-  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 3}};
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
-
-  set_input_output(session, input0, input1, actual_output);
-
-  // Do inference
-  NNFW_STATUS res = nnfw_run(session);
-  NNFW_ENSURE_SUCCESS(res);
-
-  // output value check
-  for (int i = 0; i < expected.size(); ++i)
-    ASSERT_EQ(expected[i], actual_output[i]);
-}
-
-/**
- * @brief Negative Test: Testing the following model:
- *
- *        #0 = placeholder([None, None])         # initially, [1, 1]
- *        #1 = placeholder([2, 3])
- *        #2 = concat (#0, #1, axis=0)
- *
- *        Calling sequence:
- *        - nnfw_set_input tensorinfo(#0, [3, 1]) # now [3, 1]
- *        - nnfw_prepare()                        # should fail (shape mismatch)
- *        - nnfw_set_input()
- *        - nnfw_run()
- *
- * @note Run this test with "cpu" backend and "linear" executor
- */
-TEST(TestDynamicTensor, neg_concat_input0_to_wrong_shape)
-{
-  nnfw_session *session = nullptr;
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
-  const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
-  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
-
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
-
-  const std::vector<float> input0 = {1, 2, 3};          // of shape [3, 1], wrong shape
-  const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
-
-  std::vector<float> actual_output(100); // whatever size
-
-  // input reshaping to [3, 1]
-  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {3, 1}};
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
-
-  ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
-}
-
-//
-// test about calling nnfw_set_input_tensorinfo() after compilation
-//
-
-/**
- * @brief Testing the following model, which has a binary operation:
- *
- *        #0 = placeholder([])
- *        #1 = placeholder([1, 2, 3])
- *        #2 = add (#0, #1)
- *        #3 = add (#2, #2)
- *
- *        Calling sequence:
- *        - nnfw_prepare()
- *        - nnfw_set_input_tensorinfo(#0, [2, 2, 3]) // This will make #3 tensor's shape [2, 2, 3]
- *        - nnfw_set_input()
- *        - nnfw_run()
- *
- * @note Run this test with "cpu" backend
- */
-auto build_model_buf_Add_unspecified_rank()
-{
-  // Model is not important
-  CircleGen cgen;
-  auto f32 = circle::TensorType::TensorType_FLOAT32;
-  int in1 = cgen.addTensor({{}, f32}); // scalar, meaning shape is unspecified
-  int in2 = cgen.addTensor({{1, 2, 3}, f32});
-  int op_out = cgen.addTensor({{}, f32}); // unspecified
-  int out = cgen.addTensor({{}, f32});    // unspecified
-  cgen.addOperatorAdd({{in1, in2}, {op_out}}, circle::ActivationFunctionType_NONE);
-  cgen.addOperatorAdd({{op_out, op_out}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in1, in2}, {out});
-  auto cbuf = cgen.finish();
-  return cbuf;
-}
-
-TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_add)
-{
-  nnfw_session *session = nullptr;
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
-  const auto model_buf = build_model_buf_Add_unspecified_rank();
-  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
-
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
-
-  // input reshaping to [2, 2, 3]
-  nnfw_tensorinfo input0_ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {2, 2, 3}};
-
-  std::vector<float> input0 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-  std::vector<float> input1 = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
-  std::vector<float> actual_output(12);
-  std::vector<float> expected_output = {1.1 * 2, 2.1 * 2, 3.1 * 2, 4.1 * 2,  5.1 * 2,  6.1 * 2,
-                                        7.1 * 2, 8.1 * 2, 9.1 * 2, 10.1 * 2, 11.1 * 2, 12.1 * 2};
-
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
-
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
-
-  set_input_output(session, input0, input1, actual_output);
-
-  // Do inference
-  NNFW_STATUS res = nnfw_run(session);
-  NNFW_ENSURE_SUCCESS(res);
-
-  // output value check
-  for (int i = 0; i < expected_output.size(); ++i)
-    ASSERT_EQ(expected_output[i], actual_output[i]);
-}
-
-/**
- * @brief Testing the following model, which has a unary operation:
- *
- *        #0 = placeholder(shape = [4, 4])
- *        #1 = neg (#0)
- *
- *        Calling sequence:
- *        - nnfw_prepare()
- *        - nnfw_set_input_tensorinfo(#0, [20, 50])
- *        - nnfw_set_input()
- *        - nnfw_run()
- *
- * @note Run this test with "cpu" backend
- */
-
-auto build_model_buf_NEG()
-{
-  // Model is not important
-  CircleGen cgen;
-  int in = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorNeg({{in}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-  auto cbuf = cgen.finish();
-  return cbuf;
-}
-
-TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_neg)
-{
-  nnfw_session *session = nullptr;
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
-  const auto model_buf = build_model_buf_NEG();
-  nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size());
-
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
-
-  nnfw_tensorinfo input0_ti_original = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 4}};
-
-  // input reshaping to [20, 50]
-  nnfw_tensorinfo input0_ti;
-  {
-    input0_ti.dtype = NNFW_TYPE_TENSOR_FLOAT32;
-    input0_ti.rank = 2;
-    input0_ti.dims[0] = 20;
-    input0_ti.dims[1] = 50;
-  }
-
-  std::vector<float> input0(20 * 50);
-  std::vector<float> actual_output(20 * 50);
-  std::vector<float> expected_output(20 * 50);
-
-  for (int i = 0; i < input0.size(); i++)
-  {
-    input0[i] = i * 1.1;
-    expected_output[i] = -1 * input0[i];
-  }
-
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
-
-  // input shape check
-  {
-    nnfw_tensorinfo ti = {};
-    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
-    ASSERT_TRUE(tensorInfoEqual(input0_ti_original, ti));
-  }
-
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
-
-  // input shape check
-  {
-    nnfw_tensorinfo ti = {};
-    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
-    ASSERT_TRUE(tensorInfoEqual(input0_ti, ti));
-  }
-
-  set_input_output(session, input0, actual_output);
-
-  // Do inference
-  NNFW_STATUS res = nnfw_run(session);
-  NNFW_ENSURE_SUCCESS(res);
-
-  // output value check
-  for (int i = 0; i < expected_output.size(); ++i)
-    ASSERT_EQ(expected_output[i], actual_output[i]);
-}
-
-using TestWhileDynamicModelLoaded = ValidationTestModelLoaded<NNPackages::WHILE_DYNAMIC>;
-
-// clang-format off
-const static std::vector<float> while_dynamic_input0{ 0.4325029254, 0.7332934141, 0.2969786823, 0.1540192217, 0.4608841240, 0.1523699313, 0.4334940016, 0.1022945493, 0.6928671598, 0.5891978741, 0.8283287883, 0.7041553259, 0.5243381262, 0.5623597503, 0.3395180404, 0.3212788701, 0.5248492956, 0.2551939189, 0.1338981092, 0.6406514645, 0.7089318633, 0.8164196610, 0.7689018846, 0.3551857173, 0.7668499351, 0.4942102134, 0.7345644236, 0.4689270556, 0.3495515287, 0.0768318549, 0.0868133679, 0.7823525667, 0.0791761801, 0.4397472143, 0.8150953054, 0.5074489713, 0.0895665437, 0.9451501966, 0.1064314246, 0.8803006411, 0.9903403521, 0.1259460151, 0.1889930069, 0.7466737032, 0.0553287826, 0.9712036252, 0.6352610588, 0.6301708817, 0.3079694211, 0.5367568731, 0.4070350230, 0.6815373302, 0.6948529482, 0.6158187985, 0.1485853940, 0.9162485600, 0.3622985184, 0.2672208250, 0.3396688998, 0.4135381579, 0.6450354457, 0.2386536747, 0.7072004080, 0.5289406180, 0.0643024296, 0.1969666779, 0.8667400479, 0.3396836221, 0.5878564715, 0.4551178813, 0.4318033755, 0.4376230836, 0.8211942315, 0.0230764486, 0.9005268812, 0.2147378176, 0.6036583781, 0.7161545157, 0.8246262074, 0.2989832759, 0.5491395593, 0.9779474735, 0.2006554008, 0.8227099776, 0.6018718481, 0.0132929254, 0.2212856710, 0.2032340616, 0.3059777319, 0.9094917178, 0.5409486890, 0.5595687032, 0.2436837852, 0.5649250150, 0.6730466485, 0.4421939552, 0.1432305574, 0.7053307891, 0.6284835935, 0.9216189384, 0.8686438799, 0.8385053873, 0.6248987913, 0.7697140574, 0.9808958173, 0.7571622133, 0.2297872156, 0.4201298952, 0.1305913031, 0.4572514296, 0.3072260618, 0.4668756723, 0.1919649392, 0.2050305754, 0.6062370539, 0.0006580966, 0.6217135191, 0.5123317838, 0.7305839658, 0.0610331446, 0.3614645600, 0.6455501914, 0.2919872701, 0.6446499228, 0.6293424964, 0.6947519779, 0.2680567801, 0.9756787419, 0.6422977448, 0.6911727786, 0.0343145914, 0.4764069021, 0.0876256451, 0.2926266789, 0.0487026349, 0.3558900952, 0.7788275480, 0.8566400409, 0.4791142642, 0.0595066175, 0.9609330297, 0.4075229764, 0.8758037090, 0.3485401869, 0.7945867181, 0.3457054794, 0.3327955306, 0.2870546579, 0.5697714090, 0.6144676208, 0.3251711428, 0.2342026234, 0.4153896868, 0.2149699926, 0.1064170301, 0.7240911722, 0.8196219206, 0.0208647959, 0.3081029952, 0.5742419958, 0.3027088642, 0.5005563498, 0.1707910597, 0.3358575106, 0.2290909439, 0.7788143754, 0.7611069679, 0.3525909781, 0.2308424413, 0.2585839927, 0.5973339677, 0.3728699684, 0.4975571036, 0.0781342834, 0.7119221091, 0.3926881850, 0.5501778126, 0.7364945412, 0.4965503812, 0.8785862923, 0.6024044752, 0.2638861239, 0.9093352556, 0.9069826007, 0.0359279662, 0.4043401778, 0.3457658887, 0.1013033912, 0.1810855120, 0.4946146905, 0.0194541160, 0.5453770161, 0.7965603471, 0.5493819714, 0.2422309667, 0.8376919031, 0.8350337148, 0.1898939908, 0.4576793313, 0.9535705447, 0.1353026628, 0.9474196434, 0.4256035388, 0.0255583692, 0.9593925476, 0.9245427847, 0.9780472517, 0.4356954992, 0.5673046708, 0.7346579432, 0.8614835143, 0.8782553673, 0.3395713866, 0.0013978065, 0.7640301585, 0.2504623234, 0.3626150787, 0.6888222694, 0.9404846430, 0.3519821763, 0.6855628490, 0.2415955663, 0.2107568830, 0.7718742490, 0.3419062793, 0.1280658394, 0.5126360059, 0.1722176671, 0.6543742418, 0.4206473231, 0.2138152719, 0.4514643848, 0.4293326437, 0.0042719250, 0.3195750117, 0.3874749541, 0.6262724400, 0.1620737463, 0.7417458892, 0.8521968126, 0.6405420303, 0.0713626966, 0.0474211276, 0.9068223834, 0.8541609645, 0.4279667437, 0.9738950133, 0.7167884707, 0.6812457442, 0.7938374281, 0.2077793330, 0.5163270831, 0.8487322927, 0.6320008039, 0.5116547942, 0.0056989277, 0.5253843665, 0.1517033428, 0.9921303988, 0.8305052519, 0.0771176443, 0.4621275961, 0.0299932379, 0.8129007220, 0.0946875364, 0.4544205368, 0.0143135618, 0.6373457313, 0.8202091455, 0.3447127640, 0.8560513258, 0.8079835773, 0.9697201252, 0.1521986276, 0.2269581258, 0.2245485932, 0.3396310210, 0.2649262249, 0.7799206972, 0.4020069242, 0.4444113672, 0.8123176098, 0.6460852027, 0.2041657269, 0.7889582515, 0.6526331902, 0.6626461744, 0.6049868464, 0.6901782155, 0.3364612758, 0.3053490818, 0.1905532777, 0.5362346172, 0.3618801832, 0.3485457003, 0.4509411156, 0.5986957550, 0.7858221531, 0.8822937012, 0.8280826807, 0.5261783004, 0.7312974334, 0.6962512732, 0.5243815780, 0.2492258698, 0.1734466404, 0.2547666430, 0.9950503111, 0.1781345457, 0.5630541444, 0.4552696049, 0.8874762058, 0.5965846777, 0.3575465977, 0.1213323772, 0.2790489793, 0.3157011569, 0.6218565702, 0.0304181967, 0.4112739265, 0.7361903787, 0.6753587723, 0.3667163849, 0.6275368929, 0.4185036719, 0.4791659117, 0.1246187463, 0.6651734114, 0.1778147966, 0.8796271682, 0.3000938296, 0.5996896029, 0.5020698309, 0.1601593345, 0.4467433393, 0.0287379269, 0.9011575580, 0.2722401917, 0.1642841995, 0.9468663335, 0.0238759480, 0.7811399102, 0.2070412934, 0.3746992052, 0.8473496437, 0.3498605192, 0.2693480551, 0.1523104310, 0.9660695791, 0.8762652278, 0.1654927284, 0.8743498921, 0.3143339157, 0.3896550536, 0.7256560922, 0.2408472896, 0.0930071324, 0.3269865215, 0.8070673347, 0.1218842566, 0.9943904281, 0.6901395917, 0.9491872787, 0.3617239892, 0.5459694862, 0.9408421516, 0.5354272127, 0.0377946161, 0.3319100142, 0.9823720455, 0.2373940945, 0.2439561784, 0.0767217800, 0.1102360934, 0.6404867172, 0.7430088520, 0.0165513344, 0.9841650128, 0.0532640740, 0.1635770351, 0.3721100390, 0.0598411299, 0.6548883319, 0.3812481761, 0.8741319180, 0.6431996226, 0.0550124273, 0.2009697258, 0.6922588348, 0.0673767105, 0.3385711610, 0.6945076585, 0.7870846987, 0.3323138356, 0.1601967812, 0.9595350623, 0.6049567461, 0.2068863660, 0.2562771440, 0.1041606516, 0.3444063365, 0.1464221030, 0.8932089210, 0.2040112168, 0.3407483399, 0.3251829743, 0.4777953327, 0.0534981787, 0.3613175154, 0.6707065105, 0.1188806742, 0.8228670359, 0.9907929897, 0.1556126177, 0.5561179519, 0.0124231419, 0.2054836601, 0.5855912566, 0.8455434442, 0.2268345803, 0.1841085702, 0.1096092239, 0.8316007257, 0.5046240687, 0.2195746899, 0.9222528338, 0.3633532226, 0.9383196831, 0.8803531528, 0.5124011636, 0.3909464478, 0.2731699646, 0.1102369502, 0.7489478588, 0.0600390583, 0.9290241599, 0.1041191891, 0.9347958565, 0.5584807396, 0.7331624031, 0.2267376930, 0.2868649662, 0.0016489516, 0.2301262319, 0.5107504129, 0.6500277519, 0.6766125560, 0.2019786686, 0.5890167952, 0.7182423472, 0.6890133023, 0.4442900419, 0.5760958791, 0.1364797056, 0.8246579766, 0.2527448535, 0.5444371700, 0.1561367512, 0.7551656961, 0.7171260715, 0.4264259040, 0.3883202970, 0.9166873693, 0.6557167768, 0.0264711548, 0.0761224255, 0.4693228602, 0.5476956964, 0.6261154413, 0.7666952610, 0.9579501152, 0.2581985295, 0.2322760671, 0.8342292905, 0.8143266439, 0.5771137476, 0.5815665126, 0.9772894382, 0.2359700650, 0.6501487494, 0.7841209769, 0.2793208659, 0.1745450795, 0.9626912475, 0.2373798192, 0.1235965416, 0.4632637799, 0.3763884604, 0.9971673489, 0.3533810079, 0.3203127384, 0.6102763414, 0.3859500289, 0.5929466486, 0.6658803821, 0.4130606949, 0.0352911949, 0.9713683128, 0.7546037436, 0.9780107737, 0.3970599473, 0.0187621433, 0.4941402078, 0.7670620680, 0.5360869765, 0.9634684920, 0.5996263027, 0.1895584762, 0.1214910895, 0.7381310463, 0.4301493466, 0.7403219938, 0.4817020297, 0.1843791455, 0.6473838091, 0.4138627350, 0.6825908422, 0.4481185675, 0.2030784935, 0.8468620777, 0.8059213758, 0.7525423169, 0.1854387224, 0.9046887755, 0.6654230952, 0.2029620409, 0.7164457440, 0.4172891080, 0.7797588110, 0.4135729969, 0.0026064927, 0.8375009894, 0.8355652690, 0.9187932014, 0.6724888086, 0.0276171323, 0.9106697440, 0.4562708735, 0.3417910039, 0.1569930464, 0.2029796541, 0.5049355626, 0.8143045306, 0.2432538420, 0.1068324223, 0.6258177757, 0.9749278426, 0.5378444791, 0.1657523215, 0.1930697113, 0.4833569825, 0.8000370264, 0.4315882921, 0.7571453452, 0.6069541574, 0.2073590159, 0.8702615499, 0.1951662153, 0.9303797483, 0.9241660833, 0.2795540988, 0.4241578877, 0.2383123934, 0.8627647758, 0.1700671613, 0.9635605216, 0.2514486313, 0.7766968012, 0.7126773596, 0.7009662986, 0.1317531914, 0.1318600327, 0.5509422421, 0.2159194350, 0.7851343751, 0.7231494188, 0.3523120880, 0.4999881089, 0.8202708960, 0.6340972185, 0.9181259274, 0.0057039275, 0.7197939754, 0.3580873907, 0.1026016176, 0.9657412767, 0.1973488480, 0.8099604845, 0.3302915096, 0.7635477781, 0.7097011805, 0.6271768212, 0.6583901644, 0.2334843278, 0.9448583126, 0.7434690595, 0.4068029821, 0.8815746307, 0.6311643124, 0.3891237080, 0.1507531852, 0.5215465426, 0.3248603344, 0.5837653279, 0.6689655185, 0.1362081915, 0.5130022764, 0.8519401550, 0.4397114217, 0.4129846096, 0.8706676960, 0.4183416367, 0.1135022715, 0.3501874208, 0.1142706573, 0.4111732543, 0.3972048163, 0.0740565360, 0.8445752263, 0.5659885406, 0.1107598469, 0.1261267066, 0.3106530905, 0.9623307586, 0.0014953646, 0.0421718284, 0.9182401299, 0.6180395484, 0.7947646379, 0.4402076006, 0.7980208993, 0.6131495237, 0.8885827065, 0.9406354427, 0.4568731785, 0.8838264346, 0.7086120248, 0.2050074339, 0.8598041534, 0.6360205412, 0.6444933414, 0.1086360887, 0.2146544755, 0.4044065177, 0.8566969037, 0.0974318087, 0.9650754929, 0.7885782719, 0.5817304850, 0.0668027699, 0.2600722611, 0.9546993971, 0.2609280050, 0.2063084394, 0.2960519791, 0.8144530654, 0.5386683941, 0.2757037580, 0.3237824142, 0.3469774723, 0.5878881812, 0.8034821153, 0.7495883107, 0.8035441637, 0.6059562564, 0.2713213861, 0.4108335674, 0.5539482832, 0.5046381950, 0.8435614705, 0.3766961098, 0.7583506107, 0.6175935268, 0.3487794399, 0.0058784639, 0.2900554240, 0.9057408571, 0.1079123169, 0.3200630546, 0.7326458693, 0.0237412248, 0.2757625282, 0.8461791873, 0.6101186872, 0.3705151379, 0.6318973899, 0.4013423026, 0.0222425349, 0.0391604938, 0.6966052055, 0.3186582327, 0.3277960122, 0.3301376998, 0.0874366611, 0.3782529831, 0.1412206143, 0.2574128807, 0.3423563242, 0.7656893730, 0.2097123116, 0.8109381199, 0.4845644534, 0.1744513661, 0.3877931535, 0.5369505286, 0.0147142150, 0.2457712293, 0.4901090264, 0.6373463869, 0.2244705260, 0.6722853184, 0.2888159454, 0.5694347620, 0.3042352200, 0.3482132256, 0.5619021654, 0.6760555506, 0.2648956776, 0.9160912037, 0.8973199129, 0.8901007175, 0.8260267973, 0.2438062280, 0.8338996172, 0.7751584649, 0.1436893344, 0.3578631580, 0.8111414909, 0.9454294443, 0.6478928924, 0.0714371502, 0.0711339787, 0.6473786235, 0.0266824700, 0.2442116290, 0.5528301001, 0.2558279037, 0.3684701622, 0.6729193330, 0.8132147193, 0.5830360651, 0.8655517101, 0.0593610443, 0.9748560190, 0.0221947283, 0.6729801893, 0.5001031756, 0.5116565824, 0.2824120522, 0.4552524984, 0.1693765223, 0.1908069402, 0.7663541436, 0.5339511037, 0.0649234429, 0.6125215292, 0.6771115661, 0.6019635797, 0.6840563416, 0.9653987288, 0.1369341463, 0.8428027630, 0.5227881670, 0.5990189910, 0.0936695337, 0.3645765185, 0.9354769588, 0.6745044589, 0.2816980183, 0.3783183694, 0.7331027389, 0.4139548242, 0.1671119779, 0.6703656316, 0.8604171872, 0.6643752456, 0.7547178268, 0.1386961490, 0.4443438351, 0.3267543018, 0.3348949254, 0.9952459931, 0.4534417391, 0.2089741081 };
-const static std::vector<float> while_dynamic_output0{ 0.0388205424, 0.0426156297, 0.0980401114, 0.0568757951, 0.1230962500, 0.0412184112, 0.0595490113, 0.4391007423, 0.0377574340, 0.0629260018 };
-// clang-format on
-
-TEST_F(TestWhileDynamicModelLoaded, run_verify)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
-
-  std::vector<float> actual_output0(10);
-
-  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {1, 28, 28}};
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
-
-  set_input_output(_session, while_dynamic_input0, actual_output0);
-
-  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
-
-  nnfw_tensorinfo ti_output0_expected = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 10}};
-  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
-  ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
-
-  // output value check
-  for (int i = 0; i < actual_output0.size(); ++i)
-    ASSERT_FLOAT_EQ(while_dynamic_output0[i], actual_output0[i]);
-}
-
-TEST_F(TestWhileDynamicModelLoaded, neg_run_verify)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
-
-  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {1, 28, 28}};
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
-
-  // Insufficient size of output (10 or more is sufficient)
-  std::vector<float> actual_output0(9);
-
-  set_input_output(_session, while_dynamic_input0, actual_output0);
-
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE);
-}
-
-using TestIfDynamicModelLoaded = ValidationTestModelLoaded<NNPackages::IF_DYNAMIC>;
-
-// clang-format off
-const static std::vector<float> if_dynamic_input0{ 0.7106545568, 0.2156167328, 0.0837147385, 0.0381200500, 0.8007305861, 0.2976274490, 0.8563324213, 0.7781477571, 0.5745304823, 0.8303883672, 0.0862579569, 0.0544887781, 0.1988027841, 0.2230974138, 0.4716774523, 0.4606758952, 0.4920695722, 0.1058474109, 0.0391142406, 0.9550740719, 0.9775217772, 0.1644495875, 0.6734005809, 0.2771040201, 0.4015675485, 0.9389892220, 0.5739571452, 0.6168602109, 0.4262073934, 0.1955287308, 0.6361171603, 0.3251913190, 0.9311535358, 0.9403554797, 0.2734249830, 0.8866292834, 0.5992837548, 0.2142961770, 0.7889495492, 0.0741494149, 0.1030917764, 0.4724597037, 0.3861218989, 0.8098146915, 0.2832616270, 0.6557519436, 0.5689851642, 0.8294774294, 0.4495503902, 0.5395354629, 0.7472639680, 0.4290334582, 0.6575341225, 0.3844197690, 0.5194811821, 0.9411858320, 0.8186575174, 0.6588338614, 0.5179415941, 0.7074140310, 0.1678132862, 0.7229011655, 0.3164389431, 0.6544682384, 0.7210181952, 0.0454275832, 0.6507202387, 0.4012205899, 0.2719061375, 0.2579342127, 0.1064170823, 0.5994709730, 0.1010676920, 0.3968397975, 0.5670611858, 0.1786351353, 0.9127767086, 0.9268618822, 0.6603804827, 0.3673154712, 0.3415949941, 0.5930755138, 0.3685272932, 0.6884198189, 0.1833280921, 0.3941298127, 0.0632725284, 0.1516269594, 0.2316887528, 0.8105147481, 0.1674028039, 0.2784884572, 0.5205677748, 0.4399658442, 0.6527903080, 0.6785870790, 0.2533956766, 0.0617546029, 0.5094803572, 0.5204600096, 0.0249194298, 0.0450648703, 0.1241398007, 0.3705165386, 0.9986394048, 0.6402000785, 0.4894598126, 0.8702902794, 0.4500190616, 0.8115220070, 0.8781826496, 0.6121248603, 0.9077111483, 0.4646541476, 0.7442384362, 0.5584337115, 0.0265889056, 0.9247944951, 0.5661407709, 0.9730864167, 0.6722183824, 0.9564477801, 0.6998952627, 0.6105464697, 0.8297851086, 0.7167860270, 0.6002981067, 0.4256598651, 0.1964918524, 0.9581518769, 0.3121621907, 0.8813912272, 0.3803862929, 0.8825226426, 0.9783715010, 0.1397246420, 0.6996101737, 0.1947445422, 0.9981691837, 0.9528205395, 0.1440794915, 0.2994889319, 0.9605104923, 0.7394120097, 0.8036665916, 0.1226263046, 0.5607838035, 0.5100311637, 0.9977583289, 0.1812620014, 0.8162402511, 0.6829946637, 0.8054547906, 0.5318715572, 0.2573204339, 0.6401459575, 0.9395645857, 0.0523465686, 0.1189657971, 0.4010948837, 0.5229173303, 0.3700955212, 0.8600971103, 0.2058345824, 0.0952973440, 0.6578513980, 0.8096982241, 0.3292799890, 0.3189097345, 0.2228140533, 0.7665079832, 0.3701375425, 0.7601019740, 0.8501300216, 0.5380855203, 0.7509619594, 0.8447382450, 0.6025870442, 0.6957519054, 0.6805172563, 0.5877657533, 0.3472520709, 0.0291769207, 0.0723123997, 0.4284786880, 0.5264689922, 0.4927068353, 0.7379829884, 0.9378200173, 0.8644418716, 0.8671935797, 0.9434295297, 0.5507473350, 0.0760083497, 0.1079615131, 0.1603826135, 0.2987570167, 0.4970068038, 0.0533443913, 0.7932291627, 0.4054899216, 0.8708239794, 0.8852948546, 0.7709504366, 0.2500700951, 0.7328734398, 0.1770015359, 0.4787373245, 0.6746702790, 0.6232759953, 0.8252257109, 0.5074343681, 0.4582579136, 0.7136889100, 0.1850759387, 0.0999758169, 0.9016878009, 0.0968299136, 0.9786298275, 0.7106815577, 0.5932894945, 0.5901473165, 0.8644450903, 0.8777941465, 0.3545308709, 0.5543619394, 0.4764245450, 0.4866352081, 0.7842248678, 0.8535351157, 0.8261910677, 0.4928103089, 0.4883008599, 0.9132300615, 0.0520589016, 0.0571883246, 0.8107213974, 0.2263001502, 0.4195134640, 0.1585850269, 0.6892622709, 0.9932649732, 0.9146085382, 0.3438154757, 0.3597939610, 0.8383805156, 0.1434784085, 0.1592836231, 0.3735914230, 0.5118701458, 0.6597173810, 0.5932899714, 0.7643446326, 0.7639417052, 0.7257087231, 0.8367394209, 0.7241969705, 0.2863937616, 0.7383541465, 0.3918549418, 0.8693540096, 0.8002281189, 0.0121407788, 0.3702836633, 0.3193098009, 0.2857846618, 0.3450623155, 0.8419249654, 0.4484305680, 0.0768098459, 0.1011011526, 0.9832069874, 0.2806532979, 0.6486470103, 0.0038275064, 0.5200383663, 0.5825559497, 0.8526763320, 0.2604954541, 0.4765493274, 0.8257845044, 0.9679267406, 0.3583108485, 0.5755933523, 0.6114814878, 0.5805739164, 0.1076851040, 0.0532303862, 0.3102329671, 0.2268214077, 0.3422079682, 0.3890814781, 0.2123251557, 0.6259000301, 0.9530308843, 0.2377676368, 0.4969599247, 0.3911451399, 0.6869695187, 0.4768487513, 0.0319234431, 0.5153809190, 0.7592291832, 0.5699093938, 0.6517769098, 0.1294958293, 0.5191193819, 0.9886645675, 0.2082915604, 0.9330775738, 0.1966033280, 0.7179551721, 0.4047450423, 0.3280299902, 0.7132403255, 0.7453812361, 0.1643252373, 0.0279585645, 0.0323586352, 0.0771650672, 0.8751529455, 0.3228718042, 0.0091584828, 0.2462333292, 0.2639203966, 0.1246995181, 0.7825807929, 0.0825880542, 0.5019466281, 0.5546332598, 0.2470002472, 0.3974646032, 0.3941309452, 0.2988025546, 0.5270965099, 0.0565799475, 0.7965186834, 0.8401004672, 0.8962592483, 0.2836867571, 0.9854408503, 0.1736569554, 0.3543607295, 0.1489263922, 0.0296417754, 0.8644942045, 0.5768237114, 0.5055403709, 0.7033663988, 0.7610059381, 0.7680964470, 0.9276048541, 0.4661210179, 0.1926902831, 0.8331482410, 0.3478438258, 0.4423305690, 0.1226840168, 0.2631755769, 0.7300418615, 0.8501742482, 0.7732837200, 0.1645421237, 0.9328539968, 0.3299001455, 0.1737864316, 0.6760513186, 0.6878529191, 0.8000500202, 0.7643007040, 0.8427000046, 0.7743517756, 0.4847290516, 0.5107879639, 0.1321444362, 0.2521093190, 0.6971111894, 0.9226302505, 0.7618960738, 0.0798677281, 0.9345219731, 0.3526974618, 0.5779649615, 0.6659775376, 0.0080328183, 0.6179481745, 0.3388322592, 0.8871348500, 0.3849443495, 0.5805974007, 0.4485530853, 0.0118454825, 0.1535516083, 0.9892683029, 0.6305456758, 0.8417525887, 0.9201779366, 0.5443179011, 0.3694557250, 0.9480580688, 0.0420885272, 0.3705308735, 0.1857404709, 0.2711791396, 0.3184533417, 0.2894020677, 0.8524381518, 0.1369639933, 0.5524237156, 0.2515565455, 0.2611325383, 0.7106022239, 0.7720850706, 0.5917789340, 0.1294544786, 0.1406515092, 0.4081685841, 0.7773256898, 0.0337970816, 0.2720888555, 0.6040735841, 0.4713420272, 0.2154571265, 0.7050493360, 0.5699684024, 0.8653516769, 0.2943878472, 0.0710595697, 0.7601916790, 0.8260607719, 0.5490139127, 0.2270360142, 0.6353984475, 0.0237506367, 0.1613635123, 0.2657604814, 0.9112974405, 0.3940451145, 0.9857107997, 0.6584201455, 0.2996906042, 0.6385321617, 0.3025711179, 0.5442391634, 0.5316760540, 0.9278558493, 0.2960957289, 0.2758596539, 0.8092618585, 0.7210826278, 0.5532572269, 0.0433825813, 0.4293606579, 0.9231137037, 0.7861453891, 0.0529759154, 0.2881730795, 0.4177611172, 0.0751738325, 0.2110737860, 0.0087767169, 0.9394732714, 0.7669738531, 0.1285874546, 0.0892729312, 0.7701640129, 0.3619799912, 0.1591310948, 0.5716432333, 0.3634774089, 0.5689123273, 0.1703432053, 0.7500917912, 0.8368289471, 0.6899937391, 0.8733949065, 0.3469920754, 0.9645365477, 0.9452517629, 0.0622390397, 0.0313139819, 0.9253467917, 0.5542111993, 0.4027656317, 0.5191525817, 0.3981988430, 0.7461462021, 0.6761778593, 0.2998072505, 0.8195981979, 0.6851982474, 0.0545753241, 0.1639913172, 0.8172791600, 0.7425212264, 0.1970316321, 0.1586989313, 0.3941454589, 0.8775137067, 0.3532845676, 0.1445332468, 0.4015854299, 0.7155395746, 0.4261780679, 0.7957311273, 0.8265135884, 0.5879834294, 0.7252638340, 0.3942884803, 0.7504889965, 0.5733796358, 0.7747340798, 0.9431585670, 0.5627400875, 0.3371616900, 0.6190663576, 0.5733695626, 0.2214016914, 0.8767938614, 0.2509712279, 0.6909803748, 0.3777657151, 0.6170743704, 0.7373610735, 0.0204360615, 0.7325904369, 0.4920690358, 0.5081653595, 0.9917234182, 0.2093250901, 0.8361138105, 0.7211740017, 0.2606147230, 0.3064637780, 0.1124278903, 0.6320124269, 0.2425052077, 0.4785803258, 0.4747911394, 0.8021139503, 0.3956191838, 0.7217889428, 0.7445480227, 0.1360257119, 0.3709513843, 0.5552678704, 0.2192365974, 0.9431814551, 0.8592399359, 0.7907270789, 0.5545215607, 0.6895139813, 0.1169689223, 0.2043674886, 0.0381150991, 0.7708708644, 0.4759636819, 0.9230924845, 0.6857032776, 0.4432366490, 0.3041133285, 0.7970084548, 0.5629503727, 0.2329168320, 0.2320910394, 0.8098289967, 0.8152811527, 0.9269255996, 0.2628753185, 0.7178934216, 0.1607068628, 0.6057552695, 0.5256694555, 0.5559988022, 0.8001552820, 0.5592993498, 0.5585735440, 0.7596833110, 0.4926379025, 0.8108907342, 0.5142205954, 0.8292154074, 0.9844856262, 0.9281103611, 0.8271671534, 0.8411998153, 0.4101325572, 0.9839829803, 0.1782312542, 0.5126013756, 0.4867194891, 0.9041156173, 0.8752650619, 0.9434064627, 0.5353408456, 0.3405859768, 0.9340458512, 0.1240679324, 0.5371315479, 0.3755141199, 0.2990591526, 0.0670647249, 0.0626592115, 0.7673836946, 0.2539713681, 0.4617587030, 0.9303754568, 0.4884444177, 0.9808034897, 0.7934950590, 0.9362392426, 0.8001930714, 0.8370914459, 0.4767935276, 0.8847136497, 0.8713309765, 0.8301703334, 0.9254899621, 0.5875709057, 0.4544037282, 0.2598260045, 0.7427998781, 0.7183818817, 0.9003841877, 0.0916625410, 0.2609814405, 0.6743535399, 0.7733583450, 0.7338136435, 0.7596724033, 0.7973198891, 0.0015392932, 0.2874146104, 0.1189730167, 0.4800435603, 0.7962353230, 0.4249678552, 0.7483268380, 0.0146148857, 0.6297842860, 0.3471757770, 0.9144366980, 0.8106345534, 0.1789025515, 0.7346886992, 0.1539165080, 0.4280290008, 0.2338476181, 0.3317435384, 0.9998268485, 0.3580373228, 0.9422348738, 0.1251947135, 0.5737128258, 0.6803853512, 0.0485891216, 0.8118965626, 0.7890921235, 0.7665926218, 0.8405004144, 0.3489693701, 0.1429360062, 0.1063490957, 0.5086215734, 0.1312662065, 0.0978318676, 0.4471830130, 0.0830681920, 0.0757851526, 0.1809245348, 0.9280508757, 0.4107315242, 0.5944178104, 0.5625417829, 0.2328256220, 0.9285324812, 0.9903659821, 0.9403946996, 0.5126894712, 0.0232842807, 0.3405880928, 0.6531285644, 0.8213183880, 0.7210904360, 0.4180826247, 0.7917050719, 0.7738851309, 0.1693093032, 0.4396123290, 0.7139748335, 0.8910710216, 0.5668603778, 0.4374921620, 0.8098046780, 0.4076835811, 0.1027061120, 0.5390046835, 0.0044658147, 0.8642644286, 0.8590582609, 0.2715446949, 0.8128718734, 0.7381446362, 0.3621498942, 0.5211849809, 0.6139976382, 0.8567240834, 0.1329502016, 0.2441152930, 0.4219030440, 0.1751736850, 0.6326612234, 0.3929811120, 0.0947103724, 0.1078760102, 0.8769059777, 0.1599343121, 0.6111860275, 0.0368208028, 0.0899466202, 0.9127882719, 0.1146656275, 0.4647151828, 0.3303563893, 0.5797663927, 0.8400436044, 0.2845958769, 0.2181742340, 0.9651557207, 0.1241061762, 0.0102593508, 0.6999664903, 0.8487475514, 0.6001151800, 0.9682601690, 0.6127328873, 0.1502806544, 0.2512893379, 0.3930048048, 0.3448313475, 0.5263126493, 0.7319667935, 0.9264212251, 0.4489789009, 0.0418849625, 0.5219999552, 0.3397078812, 0.4435234964, 0.4758536220, 0.1290920675, 0.1649249196, 0.1736114621, 0.5685442686, 0.3253444433, 0.0540574715, 0.2022368759, 0.0260062832, 0.9889448285, 0.2064949423, 0.3756456375, 0.8462600112, 0.8166462779, 0.1788506061, 0.6607533097, 0.1638182998, 0.7888727188, 0.3304887116, 0.3085075021, 0.6626392603, 0.2860932350, 0.1577534527, 0.0126363616, 0.1958409399, 0.2475458980, 0.1514713019, 0.5241229534, 0.9845717549, 0.8002693653, 0.3091083765, 0.3348104060, 0.1341333240, 0.3546191454, 0.3800157905, 0.0364337005 };
-const static std::vector<float> if_dynamic_output0{ 0.0444660522, 0.0271649156, 0.0191113371, 0.0014375688, 0.0690929219, 0.0001767588, 0.0030322229, 0.0118752792, 0.0419745520, 0.7816683054 };
-// clang-format on
-
-TEST_F(TestIfDynamicModelLoaded, run_verify)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
-
-  nnfw_tensorinfo ti_output0_expected = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 10}};
-
-  // Output tensor sizes are inferenced after `nnfw_prepare`
-  {
-    nnfw_tensorinfo ti;
-    NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
-    ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
-  }
-
-  std::vector<float> actual_output0(10);
-  set_input_output(_session, if_dynamic_input0, actual_output0);
-
-  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
-
-  // Check output tensor sizes again
-  {
-    nnfw_tensorinfo ti;
-    NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
-    ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
-  }
-
-  // Output value check
-  for (int i = 0; i < actual_output0.size(); ++i)
-    ASSERT_NEAR(if_dynamic_output0[i], actual_output0[i], 0.00001);
-}
diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.test.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.test.cc
new file mode 100644
index 000000000..1ed8f9581
--- /dev/null
+++ b/tests/nnfw_api/src/ModelTestDynamicTensor.test.cc
@@ -0,0 +1,995 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <nnfw_internal.h>
+
+#include "common.h"
+#include "fixtures.h"
+#include "CircleGen.h"
+#include "GenModelTest.h"
+#include "NNPackages.h"
+
+// This macro can be used instead of using NNFW_ENSURE_SUCCESS especially with negative test.
+// E.g., setInputOutput() is written with this macro and the following check is available to check
+// if there's any error while setting input or output:
+//
+//  EXPECT_ANY_THROW(setInputOutput(...));
+//
+#define THROW_WHEN_NNFW_ERROR(result)                                  \
+  do                                                                   \
+  {                                                                    \
+    if (result != NNFW_STATUS_NO_ERROR)                                \
+      throw std::runtime_error("returning error on calling nnfw api"); \
+  } while (false)
+
+template <class CPP_TYPE> struct nnfw_type;
+
+template <> struct nnfw_type<float>
+{
+  static const NNFW_TYPE dtype = NNFW_TYPE_TENSOR_FLOAT32;
+};
+
+template <> struct nnfw_type<int32_t>
+{
+  static const NNFW_TYPE dtype = NNFW_TYPE_TENSOR_INT32;
+};
+
+// TODO Add more struct nnfw_type for other types when needed
+
+template <class T_INPUT, class T_OUT>
+void setInputOutput(nnfw_session *session, const std::vector<T_INPUT> &input,
+                    std::vector<T_OUT> &actual_output)
+{
+  NNFW_STATUS result;
+  result = nnfw_set_input(session, 0, nnfw_type<T_INPUT>::dtype, input.data(),
+                          sizeof(T_INPUT) * input.size());
+  THROW_WHEN_NNFW_ERROR(result);
+
+  result = nnfw_set_output(session, 0, nnfw_type<T_OUT>::dtype, actual_output.data(),
+                           sizeof(T_OUT) * actual_output.size());
+  THROW_WHEN_NNFW_ERROR(result);
+}
+
+template <class T_INPUT0, class T_INPUT1, class T_OUT>
+void setInputOutput(nnfw_session *session, const std::vector<T_INPUT0> &input0,
+                    const std::vector<T_INPUT1> &input1, std::vector<T_OUT> &actual_output)
+{
+  NNFW_STATUS result;
+  result = nnfw_set_input(session, 0, nnfw_type<T_INPUT0>::dtype, input0.data(),
+                          sizeof(T_INPUT0) * input0.size());
+  THROW_WHEN_NNFW_ERROR(result);
+
+  result = nnfw_set_input(session, 1, nnfw_type<T_INPUT1>::dtype, input1.data(),
+                          sizeof(T_INPUT1) * input1.size());
+  THROW_WHEN_NNFW_ERROR(result);
+
+  result = nnfw_set_output(session, 0, nnfw_type<T_OUT>::dtype, actual_output.data(),
+                           sizeof(T_OUT) * actual_output.size());
+  THROW_WHEN_NNFW_ERROR(result);
+}
+
+template <class T_OUTPUT>
+void verifyOutput(nnfw_session *session, const nnfw_tensorinfo expected_ti,
+                  const std::vector<T_OUTPUT> &expected, const std::vector<T_OUTPUT> &actual)
+{
+  uint32_t output_num = -1;
+  nnfw_tensorinfo t_out;
+  NNFW_ENSURE_SUCCESS(nnfw_output_size(session, &output_num));
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(session, 0, &t_out));
+
+  ASSERT_EQ(output_num, 1);
+
+  // nnfw_tensorinfo of output
+  tensorInfoEqual(t_out, expected_ti);
+
+  // value of output
+  ASSERT_EQ(expected.size(), actual.size());
+  for (int i = 0; i < expected.size(); i++)
+  {
+    bool is_output_float = std::is_same<T_OUTPUT, float>::value;
+    if (is_output_float)
+      ASSERT_FLOAT_EQ(expected[i], actual[i]);
+    else
+      ASSERT_EQ(expected[i], actual[i]);
+  }
+}
+
+/**
+ * @brief Testing the following model:
+ *
+ * Testing the following model:
+ *       #1 = const(value = [-1.5, -1.0, -0.5, 0.5, 1.0, 1.5], shape=[2, 3])
+ *       #2 = placeholder (shape = [2])      <-------- this is an input
+ *       #3 = reshape(#1, #2)
+ *
+ * @note Run this test with "cpu" backend
+ */
+auto build_dynamic_Reshape()
+{
+  CircleGen cgen;
+
+  auto f32 = circle::TensorType::TensorType_FLOAT32;
+  auto i32 = circle::TensorType::TensorType_INT32;
+
+  std::vector<float> new_shape_data{-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
+  uint32_t input_buf = cgen.addBuffer(new_shape_data); // shape = [2, 3]
+  int input = cgen.addTensor({{2, 3}, f32, input_buf});
+  int new_shape = cgen.addTensor({{2}, i32});
+  int out = cgen.addTensor({{}, f32}); // scalar, meaning output shape is unspecified
+
+  CircleGen::Shape empty_new_shape;
+  cgen.addOperatorReshape({{input, new_shape}, {out}}, &empty_new_shape);
+  cgen.setInputsAndOutputs({new_shape}, {out});
+  auto cbuf = cgen.finish();
+  return cbuf;
+}
+
+TEST_F(GenModelTest, dynamic_reshape_from_2x3_to_3x2)
+{
+  const std::vector<int> new_shape{3, 2};
+  const std::vector<float> expected{-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
+
+  _context = std::make_unique<GenModelTestContext>(build_dynamic_Reshape());
+  {
+    _context->addTestCase(TestCaseData{}.addInput(new_shape).addOutput(expected));
+    _context->setBackends({"cpu"}); // Currently, dynamic tensor runs on "cpu" only
+    _context->output_sizes(0, sizeof(float) * expected.size());
+  }
+  // GenModelTest::teardown() will do the rest
+  SUCCEED();
+}
+
+/**
+ * @brief Negative test.
+ *        Reshape's first input has 6 values but trying to reshaping to [3, 3]
+ */
+TEST_F(GenModelTest, neg_reshape_from_2x3_to_wrong_3x3)
+{
+  const std::vector<int> wrong_shape{3, 3}; // wrong shape input
+  const std::vector<float> expected{0};     // whatever
+
+  _context = std::make_unique<GenModelTestContext>(build_dynamic_Reshape());
+  {
+
+    _context->addTestCase(TestCaseData{}.addInput(wrong_shape).addOutput(expected).expectFailRun());
+    _context->setBackends({"cpu"}); // Currently, dynamic tensor runs on "cpu" only
+    _context->output_sizes(0, sizeof(float) * expected.size());
+  }
+  // GenModelTest::teardown() will do the rest
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, reshape_multiple_executions)
+{
+  std::vector<int> new_shape;
+  std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
+
+  _context = std::make_unique<GenModelTestContext>(build_dynamic_Reshape());
+  {
+    _context->addTestCase(TestCaseData{}.addInput<int>({3, 2}).addOutput(expected));
+    _context->addTestCase(TestCaseData{}.addInput<int>({1, 6}).addOutput(expected));
+    _context->addTestCase(TestCaseData{}.addInput<int>({6, 1}).addOutput(expected));
+
+    _context->setBackends({"cpu"}); // Currently, dynamic tensor runs on "cpu" only
+    _context->output_sizes(0, sizeof(float) * expected.size());
+  }
+  // GenModelTest::teardown() will do the rest
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_reshape_multiple_executions)
+{
+  std::vector<int> new_shape;
+  std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
+
+  auto add_tcd = [&](const decltype(new_shape) &&new_shape, bool expect_fail_on_run) {
+    TestCaseData tcd;
+    tcd.addInput(new_shape).addOutput(expected);
+    if (expect_fail_on_run)
+      tcd.expectFailRun();
+    _context->addTestCase(tcd);
+  };
+
+  _context = std::make_unique<GenModelTestContext>(build_dynamic_Reshape());
+  {
+    bool EXPECT_FAIL_ON_RUN = true;
+    bool EXPECT_SUCCESS_ON_RUN = !EXPECT_FAIL_ON_RUN;
+
+    add_tcd({3, 2}, EXPECT_SUCCESS_ON_RUN);
+    add_tcd({1, 100}, EXPECT_FAIL_ON_RUN); // 1th tcd. wrong shape
+    add_tcd({6, 1}, EXPECT_SUCCESS_ON_RUN);
+
+    _context->setBackends({"cpu"}); // Currently, dynamic tensor runs on "cpu" only
+    _context->output_sizes(0, sizeof(float) * expected.size());
+  }
+  // GenModelTest::teardown() will do the rest
+  SUCCEED();
+}
+
+//
+// Unknown Dimension Test
+//    Trying to set unknown dim to other value before calling nnfw_prepare()
+//
+
+/**
+ * @brief Testing the following model:
+ *
+ *        #0 = placeholder([None, None])   # initially, shape is [1, 1]
+ *        #1 = placeholder([2, 3])
+ *        #2 = concat (#0, #1, axis=0)
+ *
+ *        Calling sequence:
+ *        - nnfw_set_input_tensorinfo(#0, [1, 3])    # now, [1, 3]
+ *        - nnfw_prepare()                 # this should work
+ *        - nnfw_set_input()
+ *        - nnfw_run()
+ *
+ * @note Run this test with "cpu" backend
+ */
+auto build_model_buf_Concatenation_unknwon_dims()
+{
+  // Model is not important
+  CircleGen cgen;
+  auto f32 = circle::TensorType::TensorType_FLOAT32;
+  int in1 = cgen.addTensor({{1, 1}, f32}); // consider this [None, None]
+  int in2 = cgen.addTensor({{2, 3}, f32});
+  int out = cgen.addTensor({{}, f32}); // scalar, meaning output shape is unspecified
+  cgen.addOperatorConcatenation({{in1, in2}, {out}}, 0, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in1, in2}, {out});
+  auto cbuf = cgen.finish();
+  return cbuf;
+}
+
+TEST(TestDynamicTensor, concat_unknown_dim_input0_to_2x3)
+{
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+
+  const std::vector<float> input0 = {1, 2, 3};          // of shape [1, 3]
+  const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
+
+  const std::vector<float> expected = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+  std::vector<float> actual_output(expected.size());
+
+  // input reshaping to [1, 3]
+  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 3}};
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+
+  setInputOutput(session, input0, input1, actual_output);
+
+  // Do inference
+  NNFW_STATUS res = nnfw_run(session);
+  NNFW_ENSURE_SUCCESS(res);
+
+  verifyOutput(session, {NNFW_TYPE_TENSOR_FLOAT32, 2, {3, 3}}, expected, actual_output);
+}
+
+/**
+ * @brief Negative Test: Testing the following model:
+ *
+ *        #0 = placeholder([None, None])         # initially, [1, 1]
+ *        #1 = placeholder([2, 3])
+ *        #2 = concat (#0, #1, axis=0)
+ *
+ *        Calling sequence:
+ *        - nnfw_set_input tensorinfo(#0, [3, 1]) # now [3, 1]
+ *        - nnfw_prepare()                        # should fail (shape mismatch)
+ *        - nnfw_set_input()
+ *        - nnfw_run()
+ *
+ * @note Run this test with "cpu" backend and "linear" executor
+ */
+TEST(TestDynamicTensor, neg_concat_input0_to_wrong_shape)
+{
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+
+  const std::vector<float> input0 = {1, 2, 3};          // of shape [3, 1], wrong shape
+  const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
+
+  std::vector<float> actual_output(100); // whatever size
+
+  // input reshaping to [3, 1]
+  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {3, 1}};
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
+
+  ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
+}
+
+//
+// test about calling nnfw_set_input_tensorinfo() after compilation
+//
+
+/**
+ * @brief Testing the following model, which has a binary operation:
+ *
+ *        #0 = placeholder([])
+ *        #1 = placeholder([1, 2, 3])
+ *        #2 = add (#0, #1)
+ *        #3 = add (#2, #2)
+ *
+ *        Calling sequence:
+ *        - nnfw_prepare()
+ *        - nnfw_set_input_tensorinfo(#0, [2, 2, 3]) // This will make #3 tensor's shape [2, 2, 3]
+ *        - nnfw_set_input()
+ *        - nnfw_run()
+ *
+ * @note Run this test with "cpu" backend
+ */
+auto build_model_buf_Add_unspecified_rank()
+{
+  // Model is not important
+  CircleGen cgen;
+  auto f32 = circle::TensorType::TensorType_FLOAT32;
+  int in1 = cgen.addTensor({{}, f32}); // scalar, meaning shape is unspecified
+  int in2 = cgen.addTensor({{1, 2, 3}, f32});
+  int op_out = cgen.addTensor({{}, f32}); // unspecified
+  int out = cgen.addTensor({{}, f32});    // unspecified
+  cgen.addOperatorAdd({{in1, in2}, {op_out}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{op_out, op_out}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in1, in2}, {out});
+  auto cbuf = cgen.finish();
+  return cbuf;
+}
+
+TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_add)
+{
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_buf_Add_unspecified_rank();
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+
+  // input reshaping to [2, 2, 3]
+  nnfw_tensorinfo input0_ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {2, 2, 3}};
+
+  std::vector<float> input0 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> input1 = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
+  std::vector<float> actual_output(12);
+  std::vector<float> expected_output = {1.1 * 2, 2.1 * 2, 3.1 * 2, 4.1 * 2,  5.1 * 2,  6.1 * 2,
+                                        7.1 * 2, 8.1 * 2, 9.1 * 2, 10.1 * 2, 11.1 * 2, 12.1 * 2};
+
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
+
+  setInputOutput(session, input0, input1, actual_output);
+
+  // Do inference
+  NNFW_STATUS res = nnfw_run(session);
+  NNFW_ENSURE_SUCCESS(res);
+
+  verifyOutput(session, {NNFW_TYPE_TENSOR_FLOAT32, 3, {2, 3, 3}}, expected_output, actual_output);
+}
+
+/**
+ * @brief Testing the following model, which has a unary operation:
+ *
+ *        #0 = placeholder(shape = [4, 4])
+ *        #1 = neg (#0)
+ *
+ *        Calling sequence:
+ *        - nnfw_prepare()
+ *        - nnfw_set_input_tensorinfo(#0, [20, 50])
+ *        - nnfw_set_input()
+ *        - nnfw_run()
+ *
+ * @note Run this test with "cpu" backend
+ */
+
+auto build_model_buf_NEG()
+{
+  // Model is not important
+  CircleGen cgen;
+  int in = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorNeg({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  auto cbuf = cgen.finish();
+  return cbuf;
+}
+
+TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_neg)
+{
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_buf_NEG();
+  nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size());
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+
+  nnfw_tensorinfo input0_ti_original = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 4}};
+
+  // input reshaping to [20, 50]
+  nnfw_tensorinfo input0_ti;
+  {
+    input0_ti.dtype = NNFW_TYPE_TENSOR_FLOAT32;
+    input0_ti.rank = 2;
+    input0_ti.dims[0] = 20;
+    input0_ti.dims[1] = 50;
+  }
+
+  std::vector<float> input0(20 * 50);
+  std::vector<float> actual_output(20 * 50);
+  std::vector<float> expected_output(20 * 50);
+
+  for (int i = 0; i < input0.size(); i++)
+  {
+    input0[i] = i * 1.1;
+    expected_output[i] = -1 * input0[i];
+  }
+
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+
+  // input shape check
+  {
+    nnfw_tensorinfo ti = {};
+    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
+    ASSERT_TRUE(tensorInfoEqual(input0_ti_original, ti));
+  }
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
+
+  // input shape check
+  {
+    nnfw_tensorinfo ti = {};
+    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
+    ASSERT_TRUE(tensorInfoEqual(input0_ti, ti));
+  }
+
+  setInputOutput(session, input0, actual_output);
+
+  // Do inference
+  NNFW_STATUS res = nnfw_run(session);
+  NNFW_ENSURE_SUCCESS(res);
+
+  // output value check
+  verifyOutput(session, {NNFW_TYPE_TENSOR_FLOAT32, 2, {20, 50}}, expected_output, actual_output);
+}
+
+using TestWhileDynamicModelLoaded = ValidationTestModelLoaded<NNPackages::WHILE_DYNAMIC>;
+
+// clang-format off
+const static std::vector<float> while_dynamic_input0{ 0.4325029254, 0.7332934141, 0.2969786823, 0.1540192217, 0.4608841240, 0.1523699313, 0.4334940016, 0.1022945493, 0.6928671598, 0.5891978741, 0.8283287883, 0.7041553259, 0.5243381262, 0.5623597503, 0.3395180404, 0.3212788701, 0.5248492956, 0.2551939189, 0.1338981092, 0.6406514645, 0.7089318633, 0.8164196610, 0.7689018846, 0.3551857173, 0.7668499351, 0.4942102134, 0.7345644236, 0.4689270556, 0.3495515287, 0.0768318549, 0.0868133679, 0.7823525667, 0.0791761801, 0.4397472143, 0.8150953054, 0.5074489713, 0.0895665437, 0.9451501966, 0.1064314246, 0.8803006411, 0.9903403521, 0.1259460151, 0.1889930069, 0.7466737032, 0.0553287826, 0.9712036252, 0.6352610588, 0.6301708817, 0.3079694211, 0.5367568731, 0.4070350230, 0.6815373302, 0.6948529482, 0.6158187985, 0.1485853940, 0.9162485600, 0.3622985184, 0.2672208250, 0.3396688998, 0.4135381579, 0.6450354457, 0.2386536747, 0.7072004080, 0.5289406180, 0.0643024296, 0.1969666779, 0.8667400479, 0.3396836221, 0.5878564715, 0.4551178813, 0.4318033755, 0.4376230836, 0.8211942315, 0.0230764486, 0.9005268812, 0.2147378176, 0.6036583781, 0.7161545157, 0.8246262074, 0.2989832759, 0.5491395593, 0.9779474735, 0.2006554008, 0.8227099776, 0.6018718481, 0.0132929254, 0.2212856710, 0.2032340616, 0.3059777319, 0.9094917178, 0.5409486890, 0.5595687032, 0.2436837852, 0.5649250150, 0.6730466485, 0.4421939552, 0.1432305574, 0.7053307891, 0.6284835935, 0.9216189384, 0.8686438799, 0.8385053873, 0.6248987913, 0.7697140574, 0.9808958173, 0.7571622133, 0.2297872156, 0.4201298952, 0.1305913031, 0.4572514296, 0.3072260618, 0.4668756723, 0.1919649392, 0.2050305754, 0.6062370539, 0.0006580966, 0.6217135191, 0.5123317838, 0.7305839658, 0.0610331446, 0.3614645600, 0.6455501914, 0.2919872701, 0.6446499228, 0.6293424964, 0.6947519779, 0.2680567801, 0.9756787419, 0.6422977448, 0.6911727786, 0.0343145914, 0.4764069021, 0.0876256451, 0.2926266789, 0.0487026349, 0.3558900952, 0.7788275480, 0.8566400409, 0.4791142642, 0.0595066175, 0.9609330297, 0.4075229764, 0.8758037090, 0.3485401869, 0.7945867181, 0.3457054794, 0.3327955306, 0.2870546579, 0.5697714090, 0.6144676208, 0.3251711428, 0.2342026234, 0.4153896868, 0.2149699926, 0.1064170301, 0.7240911722, 0.8196219206, 0.0208647959, 0.3081029952, 0.5742419958, 0.3027088642, 0.5005563498, 0.1707910597, 0.3358575106, 0.2290909439, 0.7788143754, 0.7611069679, 0.3525909781, 0.2308424413, 0.2585839927, 0.5973339677, 0.3728699684, 0.4975571036, 0.0781342834, 0.7119221091, 0.3926881850, 0.5501778126, 0.7364945412, 0.4965503812, 0.8785862923, 0.6024044752, 0.2638861239, 0.9093352556, 0.9069826007, 0.0359279662, 0.4043401778, 0.3457658887, 0.1013033912, 0.1810855120, 0.4946146905, 0.0194541160, 0.5453770161, 0.7965603471, 0.5493819714, 0.2422309667, 0.8376919031, 0.8350337148, 0.1898939908, 0.4576793313, 0.9535705447, 0.1353026628, 0.9474196434, 0.4256035388, 0.0255583692, 0.9593925476, 0.9245427847, 0.9780472517, 0.4356954992, 0.5673046708, 0.7346579432, 0.8614835143, 0.8782553673, 0.3395713866, 0.0013978065, 0.7640301585, 0.2504623234, 0.3626150787, 0.6888222694, 0.9404846430, 0.3519821763, 0.6855628490, 0.2415955663, 0.2107568830, 0.7718742490, 0.3419062793, 0.1280658394, 0.5126360059, 0.1722176671, 0.6543742418, 0.4206473231, 0.2138152719, 0.4514643848, 0.4293326437, 0.0042719250, 0.3195750117, 0.3874749541, 0.6262724400, 0.1620737463, 0.7417458892, 0.8521968126, 0.6405420303, 0.0713626966, 0.0474211276, 0.9068223834, 0.8541609645, 0.4279667437, 0.9738950133, 0.7167884707, 0.6812457442, 0.7938374281, 0.2077793330, 0.5163270831, 0.8487322927, 0.6320008039, 0.5116547942, 0.0056989277, 0.5253843665, 0.1517033428, 0.9921303988, 0.8305052519, 0.0771176443, 0.4621275961, 0.0299932379, 0.8129007220, 0.0946875364, 0.4544205368, 0.0143135618, 0.6373457313, 0.8202091455, 0.3447127640, 0.8560513258, 0.8079835773, 0.9697201252, 0.1521986276, 0.2269581258, 0.2245485932, 0.3396310210, 0.2649262249, 0.7799206972, 0.4020069242, 0.4444113672, 0.8123176098, 0.6460852027, 0.2041657269, 0.7889582515, 0.6526331902, 0.6626461744, 0.6049868464, 0.6901782155, 0.3364612758, 0.3053490818, 0.1905532777, 0.5362346172, 0.3618801832, 0.3485457003, 0.4509411156, 0.5986957550, 0.7858221531, 0.8822937012, 0.8280826807, 0.5261783004, 0.7312974334, 0.6962512732, 0.5243815780, 0.2492258698, 0.1734466404, 0.2547666430, 0.9950503111, 0.1781345457, 0.5630541444, 0.4552696049, 0.8874762058, 0.5965846777, 0.3575465977, 0.1213323772, 0.2790489793, 0.3157011569, 0.6218565702, 0.0304181967, 0.4112739265, 0.7361903787, 0.6753587723, 0.3667163849, 0.6275368929, 0.4185036719, 0.4791659117, 0.1246187463, 0.6651734114, 0.1778147966, 0.8796271682, 0.3000938296, 0.5996896029, 0.5020698309, 0.1601593345, 0.4467433393, 0.0287379269, 0.9011575580, 0.2722401917, 0.1642841995, 0.9468663335, 0.0238759480, 0.7811399102, 0.2070412934, 0.3746992052, 0.8473496437, 0.3498605192, 0.2693480551, 0.1523104310, 0.9660695791, 0.8762652278, 0.1654927284, 0.8743498921, 0.3143339157, 0.3896550536, 0.7256560922, 0.2408472896, 0.0930071324, 0.3269865215, 0.8070673347, 0.1218842566, 0.9943904281, 0.6901395917, 0.9491872787, 0.3617239892, 0.5459694862, 0.9408421516, 0.5354272127, 0.0377946161, 0.3319100142, 0.9823720455, 0.2373940945, 0.2439561784, 0.0767217800, 0.1102360934, 0.6404867172, 0.7430088520, 0.0165513344, 0.9841650128, 0.0532640740, 0.1635770351, 0.3721100390, 0.0598411299, 0.6548883319, 0.3812481761, 0.8741319180, 0.6431996226, 0.0550124273, 0.2009697258, 0.6922588348, 0.0673767105, 0.3385711610, 0.6945076585, 0.7870846987, 0.3323138356, 0.1601967812, 0.9595350623, 0.6049567461, 0.2068863660, 0.2562771440, 0.1041606516, 0.3444063365, 0.1464221030, 0.8932089210, 0.2040112168, 0.3407483399, 0.3251829743, 0.4777953327, 0.0534981787, 0.3613175154, 0.6707065105, 0.1188806742, 0.8228670359, 0.9907929897, 0.1556126177, 0.5561179519, 0.0124231419, 0.2054836601, 0.5855912566, 0.8455434442, 0.2268345803, 0.1841085702, 0.1096092239, 0.8316007257, 0.5046240687, 0.2195746899, 0.9222528338, 0.3633532226, 0.9383196831, 0.8803531528, 0.5124011636, 0.3909464478, 0.2731699646, 0.1102369502, 0.7489478588, 0.0600390583, 0.9290241599, 0.1041191891, 0.9347958565, 0.5584807396, 0.7331624031, 0.2267376930, 0.2868649662, 0.0016489516, 0.2301262319, 0.5107504129, 0.6500277519, 0.6766125560, 0.2019786686, 0.5890167952, 0.7182423472, 0.6890133023, 0.4442900419, 0.5760958791, 0.1364797056, 0.8246579766, 0.2527448535, 0.5444371700, 0.1561367512, 0.7551656961, 0.7171260715, 0.4264259040, 0.3883202970, 0.9166873693, 0.6557167768, 0.0264711548, 0.0761224255, 0.4693228602, 0.5476956964, 0.6261154413, 0.7666952610, 0.9579501152, 0.2581985295, 0.2322760671, 0.8342292905, 0.8143266439, 0.5771137476, 0.5815665126, 0.9772894382, 0.2359700650, 0.6501487494, 0.7841209769, 0.2793208659, 0.1745450795, 0.9626912475, 0.2373798192, 0.1235965416, 0.4632637799, 0.3763884604, 0.9971673489, 0.3533810079, 0.3203127384, 0.6102763414, 0.3859500289, 0.5929466486, 0.6658803821, 0.4130606949, 0.0352911949, 0.9713683128, 0.7546037436, 0.9780107737, 0.3970599473, 0.0187621433, 0.4941402078, 0.7670620680, 0.5360869765, 0.9634684920, 0.5996263027, 0.1895584762, 0.1214910895, 0.7381310463, 0.4301493466, 0.7403219938, 0.4817020297, 0.1843791455, 0.6473838091, 0.4138627350, 0.6825908422, 0.4481185675, 0.2030784935, 0.8468620777, 0.8059213758, 0.7525423169, 0.1854387224, 0.9046887755, 0.6654230952, 0.2029620409, 0.7164457440, 0.4172891080, 0.7797588110, 0.4135729969, 0.0026064927, 0.8375009894, 0.8355652690, 0.9187932014, 0.6724888086, 0.0276171323, 0.9106697440, 0.4562708735, 0.3417910039, 0.1569930464, 0.2029796541, 0.5049355626, 0.8143045306, 0.2432538420, 0.1068324223, 0.6258177757, 0.9749278426, 0.5378444791, 0.1657523215, 0.1930697113, 0.4833569825, 0.8000370264, 0.4315882921, 0.7571453452, 0.6069541574, 0.2073590159, 0.8702615499, 0.1951662153, 0.9303797483, 0.9241660833, 0.2795540988, 0.4241578877, 0.2383123934, 0.8627647758, 0.1700671613, 0.9635605216, 0.2514486313, 0.7766968012, 0.7126773596, 0.7009662986, 0.1317531914, 0.1318600327, 0.5509422421, 0.2159194350, 0.7851343751, 0.7231494188, 0.3523120880, 0.4999881089, 0.8202708960, 0.6340972185, 0.9181259274, 0.0057039275, 0.7197939754, 0.3580873907, 0.1026016176, 0.9657412767, 0.1973488480, 0.8099604845, 0.3302915096, 0.7635477781, 0.7097011805, 0.6271768212, 0.6583901644, 0.2334843278, 0.9448583126, 0.7434690595, 0.4068029821, 0.8815746307, 0.6311643124, 0.3891237080, 0.1507531852, 0.5215465426, 0.3248603344, 0.5837653279, 0.6689655185, 0.1362081915, 0.5130022764, 0.8519401550, 0.4397114217, 0.4129846096, 0.8706676960, 0.4183416367, 0.1135022715, 0.3501874208, 0.1142706573, 0.4111732543, 0.3972048163, 0.0740565360, 0.8445752263, 0.5659885406, 0.1107598469, 0.1261267066, 0.3106530905, 0.9623307586, 0.0014953646, 0.0421718284, 0.9182401299, 0.6180395484, 0.7947646379, 0.4402076006, 0.7980208993, 0.6131495237, 0.8885827065, 0.9406354427, 0.4568731785, 0.8838264346, 0.7086120248, 0.2050074339, 0.8598041534, 0.6360205412, 0.6444933414, 0.1086360887, 0.2146544755, 0.4044065177, 0.8566969037, 0.0974318087, 0.9650754929, 0.7885782719, 0.5817304850, 0.0668027699, 0.2600722611, 0.9546993971, 0.2609280050, 0.2063084394, 0.2960519791, 0.8144530654, 0.5386683941, 0.2757037580, 0.3237824142, 0.3469774723, 0.5878881812, 0.8034821153, 0.7495883107, 0.8035441637, 0.6059562564, 0.2713213861, 0.4108335674, 0.5539482832, 0.5046381950, 0.8435614705, 0.3766961098, 0.7583506107, 0.6175935268, 0.3487794399, 0.0058784639, 0.2900554240, 0.9057408571, 0.1079123169, 0.3200630546, 0.7326458693, 0.0237412248, 0.2757625282, 0.8461791873, 0.6101186872, 0.3705151379, 0.6318973899, 0.4013423026, 0.0222425349, 0.0391604938, 0.6966052055, 0.3186582327, 0.3277960122, 0.3301376998, 0.0874366611, 0.3782529831, 0.1412206143, 0.2574128807, 0.3423563242, 0.7656893730, 0.2097123116, 0.8109381199, 0.4845644534, 0.1744513661, 0.3877931535, 0.5369505286, 0.0147142150, 0.2457712293, 0.4901090264, 0.6373463869, 0.2244705260, 0.6722853184, 0.2888159454, 0.5694347620, 0.3042352200, 0.3482132256, 0.5619021654, 0.6760555506, 0.2648956776, 0.9160912037, 0.8973199129, 0.8901007175, 0.8260267973, 0.2438062280, 0.8338996172, 0.7751584649, 0.1436893344, 0.3578631580, 0.8111414909, 0.9454294443, 0.6478928924, 0.0714371502, 0.0711339787, 0.6473786235, 0.0266824700, 0.2442116290, 0.5528301001, 0.2558279037, 0.3684701622, 0.6729193330, 0.8132147193, 0.5830360651, 0.8655517101, 0.0593610443, 0.9748560190, 0.0221947283, 0.6729801893, 0.5001031756, 0.5116565824, 0.2824120522, 0.4552524984, 0.1693765223, 0.1908069402, 0.7663541436, 0.5339511037, 0.0649234429, 0.6125215292, 0.6771115661, 0.6019635797, 0.6840563416, 0.9653987288, 0.1369341463, 0.8428027630, 0.5227881670, 0.5990189910, 0.0936695337, 0.3645765185, 0.9354769588, 0.6745044589, 0.2816980183, 0.3783183694, 0.7331027389, 0.4139548242, 0.1671119779, 0.6703656316, 0.8604171872, 0.6643752456, 0.7547178268, 0.1386961490, 0.4443438351, 0.3267543018, 0.3348949254, 0.9952459931, 0.4534417391, 0.2089741081 };
+const static std::vector<float> while_dynamic_output0{ 0.0388205424, 0.0426156297, 0.0980401114, 0.0568757951, 0.1230962500, 0.0412184112, 0.0595490113, 0.4391007423, 0.0377574340, 0.0629260018 };
+// clang-format on
+
+TEST_F(TestWhileDynamicModelLoaded, run_verify)
+{
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+
+  std::vector<float> actual_output0(10);
+
+  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {1, 28, 28}};
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
+
+  setInputOutput(_session, while_dynamic_input0, actual_output0);
+
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+
+  // output check
+  verifyOutput(_session, {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 10}}, while_dynamic_output0,
+               actual_output0);
+}
+
+TEST_F(TestWhileDynamicModelLoaded, neg_run_verify)
+{
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+
+  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {1, 28, 28}};
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
+
+  // Insufficient size of output (10 or more is sufficient)
+  std::vector<float> actual_output0(9);
+
+  setInputOutput(_session, while_dynamic_input0, actual_output0);
+
+  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE);
+}
+
+using TestIfDynamicModelLoaded = ValidationTestModelLoaded<NNPackages::IF_DYNAMIC>;
+
+// clang-format off
+const static std::vector<float> if_dynamic_input0{ 0.7106545568, 0.2156167328, 0.0837147385, 0.0381200500, 0.8007305861, 0.2976274490, 0.8563324213, 0.7781477571, 0.5745304823, 0.8303883672, 0.0862579569, 0.0544887781, 0.1988027841, 0.2230974138, 0.4716774523, 0.4606758952, 0.4920695722, 0.1058474109, 0.0391142406, 0.9550740719, 0.9775217772, 0.1644495875, 0.6734005809, 0.2771040201, 0.4015675485, 0.9389892220, 0.5739571452, 0.6168602109, 0.4262073934, 0.1955287308, 0.6361171603, 0.3251913190, 0.9311535358, 0.9403554797, 0.2734249830, 0.8866292834, 0.5992837548, 0.2142961770, 0.7889495492, 0.0741494149, 0.1030917764, 0.4724597037, 0.3861218989, 0.8098146915, 0.2832616270, 0.6557519436, 0.5689851642, 0.8294774294, 0.4495503902, 0.5395354629, 0.7472639680, 0.4290334582, 0.6575341225, 0.3844197690, 0.5194811821, 0.9411858320, 0.8186575174, 0.6588338614, 0.5179415941, 0.7074140310, 0.1678132862, 0.7229011655, 0.3164389431, 0.6544682384, 0.7210181952, 0.0454275832, 0.6507202387, 0.4012205899, 0.2719061375, 0.2579342127, 0.1064170823, 0.5994709730, 0.1010676920, 0.3968397975, 0.5670611858, 0.1786351353, 0.9127767086, 0.9268618822, 0.6603804827, 0.3673154712, 0.3415949941, 0.5930755138, 0.3685272932, 0.6884198189, 0.1833280921, 0.3941298127, 0.0632725284, 0.1516269594, 0.2316887528, 0.8105147481, 0.1674028039, 0.2784884572, 0.5205677748, 0.4399658442, 0.6527903080, 0.6785870790, 0.2533956766, 0.0617546029, 0.5094803572, 0.5204600096, 0.0249194298, 0.0450648703, 0.1241398007, 0.3705165386, 0.9986394048, 0.6402000785, 0.4894598126, 0.8702902794, 0.4500190616, 0.8115220070, 0.8781826496, 0.6121248603, 0.9077111483, 0.4646541476, 0.7442384362, 0.5584337115, 0.0265889056, 0.9247944951, 0.5661407709, 0.9730864167, 0.6722183824, 0.9564477801, 0.6998952627, 0.6105464697, 0.8297851086, 0.7167860270, 0.6002981067, 0.4256598651, 0.1964918524, 0.9581518769, 0.3121621907, 0.8813912272, 0.3803862929, 0.8825226426, 0.9783715010, 0.1397246420, 0.6996101737, 0.1947445422, 0.9981691837, 0.9528205395, 0.1440794915, 0.2994889319, 0.9605104923, 0.7394120097, 0.8036665916, 0.1226263046, 0.5607838035, 0.5100311637, 0.9977583289, 0.1812620014, 0.8162402511, 0.6829946637, 0.8054547906, 0.5318715572, 0.2573204339, 0.6401459575, 0.9395645857, 0.0523465686, 0.1189657971, 0.4010948837, 0.5229173303, 0.3700955212, 0.8600971103, 0.2058345824, 0.0952973440, 0.6578513980, 0.8096982241, 0.3292799890, 0.3189097345, 0.2228140533, 0.7665079832, 0.3701375425, 0.7601019740, 0.8501300216, 0.5380855203, 0.7509619594, 0.8447382450, 0.6025870442, 0.6957519054, 0.6805172563, 0.5877657533, 0.3472520709, 0.0291769207, 0.0723123997, 0.4284786880, 0.5264689922, 0.4927068353, 0.7379829884, 0.9378200173, 0.8644418716, 0.8671935797, 0.9434295297, 0.5507473350, 0.0760083497, 0.1079615131, 0.1603826135, 0.2987570167, 0.4970068038, 0.0533443913, 0.7932291627, 0.4054899216, 0.8708239794, 0.8852948546, 0.7709504366, 0.2500700951, 0.7328734398, 0.1770015359, 0.4787373245, 0.6746702790, 0.6232759953, 0.8252257109, 0.5074343681, 0.4582579136, 0.7136889100, 0.1850759387, 0.0999758169, 0.9016878009, 0.0968299136, 0.9786298275, 0.7106815577, 0.5932894945, 0.5901473165, 0.8644450903, 0.8777941465, 0.3545308709, 0.5543619394, 0.4764245450, 0.4866352081, 0.7842248678, 0.8535351157, 0.8261910677, 0.4928103089, 0.4883008599, 0.9132300615, 0.0520589016, 0.0571883246, 0.8107213974, 0.2263001502, 0.4195134640, 0.1585850269, 0.6892622709, 0.9932649732, 0.9146085382, 0.3438154757, 0.3597939610, 0.8383805156, 0.1434784085, 0.1592836231, 0.3735914230, 0.5118701458, 0.6597173810, 0.5932899714, 0.7643446326, 0.7639417052, 0.7257087231, 0.8367394209, 0.7241969705, 0.2863937616, 0.7383541465, 0.3918549418, 0.8693540096, 0.8002281189, 0.0121407788, 0.3702836633, 0.3193098009, 0.2857846618, 0.3450623155, 0.8419249654, 0.4484305680, 0.0768098459, 0.1011011526, 0.9832069874, 0.2806532979, 0.6486470103, 0.0038275064, 0.5200383663, 0.5825559497, 0.8526763320, 0.2604954541, 0.4765493274, 0.8257845044, 0.9679267406, 0.3583108485, 0.5755933523, 0.6114814878, 0.5805739164, 0.1076851040, 0.0532303862, 0.3102329671, 0.2268214077, 0.3422079682, 0.3890814781, 0.2123251557, 0.6259000301, 0.9530308843, 0.2377676368, 0.4969599247, 0.3911451399, 0.6869695187, 0.4768487513, 0.0319234431, 0.5153809190, 0.7592291832, 0.5699093938, 0.6517769098, 0.1294958293, 0.5191193819, 0.9886645675, 0.2082915604, 0.9330775738, 0.1966033280, 0.7179551721, 0.4047450423, 0.3280299902, 0.7132403255, 0.7453812361, 0.1643252373, 0.0279585645, 0.0323586352, 0.0771650672, 0.8751529455, 0.3228718042, 0.0091584828, 0.2462333292, 0.2639203966, 0.1246995181, 0.7825807929, 0.0825880542, 0.5019466281, 0.5546332598, 0.2470002472, 0.3974646032, 0.3941309452, 0.2988025546, 0.5270965099, 0.0565799475, 0.7965186834, 0.8401004672, 0.8962592483, 0.2836867571, 0.9854408503, 0.1736569554, 0.3543607295, 0.1489263922, 0.0296417754, 0.8644942045, 0.5768237114, 0.5055403709, 0.7033663988, 0.7610059381, 0.7680964470, 0.9276048541, 0.4661210179, 0.1926902831, 0.8331482410, 0.3478438258, 0.4423305690, 0.1226840168, 0.2631755769, 0.7300418615, 0.8501742482, 0.7732837200, 0.1645421237, 0.9328539968, 0.3299001455, 0.1737864316, 0.6760513186, 0.6878529191, 0.8000500202, 0.7643007040, 0.8427000046, 0.7743517756, 0.4847290516, 0.5107879639, 0.1321444362, 0.2521093190, 0.6971111894, 0.9226302505, 0.7618960738, 0.0798677281, 0.9345219731, 0.3526974618, 0.5779649615, 0.6659775376, 0.0080328183, 0.6179481745, 0.3388322592, 0.8871348500, 0.3849443495, 0.5805974007, 0.4485530853, 0.0118454825, 0.1535516083, 0.9892683029, 0.6305456758, 0.8417525887, 0.9201779366, 0.5443179011, 0.3694557250, 0.9480580688, 0.0420885272, 0.3705308735, 0.1857404709, 0.2711791396, 0.3184533417, 0.2894020677, 0.8524381518, 0.1369639933, 0.5524237156, 0.2515565455, 0.2611325383, 0.7106022239, 0.7720850706, 0.5917789340, 0.1294544786, 0.1406515092, 0.4081685841, 0.7773256898, 0.0337970816, 0.2720888555, 0.6040735841, 0.4713420272, 0.2154571265, 0.7050493360, 0.5699684024, 0.8653516769, 0.2943878472, 0.0710595697, 0.7601916790, 0.8260607719, 0.5490139127, 0.2270360142, 0.6353984475, 0.0237506367, 0.1613635123, 0.2657604814, 0.9112974405, 0.3940451145, 0.9857107997, 0.6584201455, 0.2996906042, 0.6385321617, 0.3025711179, 0.5442391634, 0.5316760540, 0.9278558493, 0.2960957289, 0.2758596539, 0.8092618585, 0.7210826278, 0.5532572269, 0.0433825813, 0.4293606579, 0.9231137037, 0.7861453891, 0.0529759154, 0.2881730795, 0.4177611172, 0.0751738325, 0.2110737860, 0.0087767169, 0.9394732714, 0.7669738531, 0.1285874546, 0.0892729312, 0.7701640129, 0.3619799912, 0.1591310948, 0.5716432333, 0.3634774089, 0.5689123273, 0.1703432053, 0.7500917912, 0.8368289471, 0.6899937391, 0.8733949065, 0.3469920754, 0.9645365477, 0.9452517629, 0.0622390397, 0.0313139819, 0.9253467917, 0.5542111993, 0.4027656317, 0.5191525817, 0.3981988430, 0.7461462021, 0.6761778593, 0.2998072505, 0.8195981979, 0.6851982474, 0.0545753241, 0.1639913172, 0.8172791600, 0.7425212264, 0.1970316321, 0.1586989313, 0.3941454589, 0.8775137067, 0.3532845676, 0.1445332468, 0.4015854299, 0.7155395746, 0.4261780679, 0.7957311273, 0.8265135884, 0.5879834294, 0.7252638340, 0.3942884803, 0.7504889965, 0.5733796358, 0.7747340798, 0.9431585670, 0.5627400875, 0.3371616900, 0.6190663576, 0.5733695626, 0.2214016914, 0.8767938614, 0.2509712279, 0.6909803748, 0.3777657151, 0.6170743704, 0.7373610735, 0.0204360615, 0.7325904369, 0.4920690358, 0.5081653595, 0.9917234182, 0.2093250901, 0.8361138105, 0.7211740017, 0.2606147230, 0.3064637780, 0.1124278903, 0.6320124269, 0.2425052077, 0.4785803258, 0.4747911394, 0.8021139503, 0.3956191838, 0.7217889428, 0.7445480227, 0.1360257119, 0.3709513843, 0.5552678704, 0.2192365974, 0.9431814551, 0.8592399359, 0.7907270789, 0.5545215607, 0.6895139813, 0.1169689223, 0.2043674886, 0.0381150991, 0.7708708644, 0.4759636819, 0.9230924845, 0.6857032776, 0.4432366490, 0.3041133285, 0.7970084548, 0.5629503727, 0.2329168320, 0.2320910394, 0.8098289967, 0.8152811527, 0.9269255996, 0.2628753185, 0.7178934216, 0.1607068628, 0.6057552695, 0.5256694555, 0.5559988022, 0.8001552820, 0.5592993498, 0.5585735440, 0.7596833110, 0.4926379025, 0.8108907342, 0.5142205954, 0.8292154074, 0.9844856262, 0.9281103611, 0.8271671534, 0.8411998153, 0.4101325572, 0.9839829803, 0.1782312542, 0.5126013756, 0.4867194891, 0.9041156173, 0.8752650619, 0.9434064627, 0.5353408456, 0.3405859768, 0.9340458512, 0.1240679324, 0.5371315479, 0.3755141199, 0.2990591526, 0.0670647249, 0.0626592115, 0.7673836946, 0.2539713681, 0.4617587030, 0.9303754568, 0.4884444177, 0.9808034897, 0.7934950590, 0.9362392426, 0.8001930714, 0.8370914459, 0.4767935276, 0.8847136497, 0.8713309765, 0.8301703334, 0.9254899621, 0.5875709057, 0.4544037282, 0.2598260045, 0.7427998781, 0.7183818817, 0.9003841877, 0.0916625410, 0.2609814405, 0.6743535399, 0.7733583450, 0.7338136435, 0.7596724033, 0.7973198891, 0.0015392932, 0.2874146104, 0.1189730167, 0.4800435603, 0.7962353230, 0.4249678552, 0.7483268380, 0.0146148857, 0.6297842860, 0.3471757770, 0.9144366980, 0.8106345534, 0.1789025515, 0.7346886992, 0.1539165080, 0.4280290008, 0.2338476181, 0.3317435384, 0.9998268485, 0.3580373228, 0.9422348738, 0.1251947135, 0.5737128258, 0.6803853512, 0.0485891216, 0.8118965626, 0.7890921235, 0.7665926218, 0.8405004144, 0.3489693701, 0.1429360062, 0.1063490957, 0.5086215734, 0.1312662065, 0.0978318676, 0.4471830130, 0.0830681920, 0.0757851526, 0.1809245348, 0.9280508757, 0.4107315242, 0.5944178104, 0.5625417829, 0.2328256220, 0.9285324812, 0.9903659821, 0.9403946996, 0.5126894712, 0.0232842807, 0.3405880928, 0.6531285644, 0.8213183880, 0.7210904360, 0.4180826247, 0.7917050719, 0.7738851309, 0.1693093032, 0.4396123290, 0.7139748335, 0.8910710216, 0.5668603778, 0.4374921620, 0.8098046780, 0.4076835811, 0.1027061120, 0.5390046835, 0.0044658147, 0.8642644286, 0.8590582609, 0.2715446949, 0.8128718734, 0.7381446362, 0.3621498942, 0.5211849809, 0.6139976382, 0.8567240834, 0.1329502016, 0.2441152930, 0.4219030440, 0.1751736850, 0.6326612234, 0.3929811120, 0.0947103724, 0.1078760102, 0.8769059777, 0.1599343121, 0.6111860275, 0.0368208028, 0.0899466202, 0.9127882719, 0.1146656275, 0.4647151828, 0.3303563893, 0.5797663927, 0.8400436044, 0.2845958769, 0.2181742340, 0.9651557207, 0.1241061762, 0.0102593508, 0.6999664903, 0.8487475514, 0.6001151800, 0.9682601690, 0.6127328873, 0.1502806544, 0.2512893379, 0.3930048048, 0.3448313475, 0.5263126493, 0.7319667935, 0.9264212251, 0.4489789009, 0.0418849625, 0.5219999552, 0.3397078812, 0.4435234964, 0.4758536220, 0.1290920675, 0.1649249196, 0.1736114621, 0.5685442686, 0.3253444433, 0.0540574715, 0.2022368759, 0.0260062832, 0.9889448285, 0.2064949423, 0.3756456375, 0.8462600112, 0.8166462779, 0.1788506061, 0.6607533097, 0.1638182998, 0.7888727188, 0.3304887116, 0.3085075021, 0.6626392603, 0.2860932350, 0.1577534527, 0.0126363616, 0.1958409399, 0.2475458980, 0.1514713019, 0.5241229534, 0.9845717549, 0.8002693653, 0.3091083765, 0.3348104060, 0.1341333240, 0.3546191454, 0.3800157905, 0.0364337005 };
+const static std::vector<float> if_dynamic_output0{ 0.0444660522, 0.0271649156, 0.0191113371, 0.0014375688, 0.0690929219, 0.0001767588, 0.0030322229, 0.0118752792, 0.0419745520, 0.7816683054 };
+// clang-format on
+
+TEST_F(TestIfDynamicModelLoaded, run_verify)
+{
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+
+  nnfw_tensorinfo ti_output0_expected = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 10}};
+
+  // Output tensor sizes are inferenced after `nnfw_prepare`
+  {
+    nnfw_tensorinfo ti;
+    NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
+    ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
+  }
+
+  std::vector<float> actual_output0(10);
+  setInputOutput(_session, if_dynamic_input0, actual_output0);
+
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+
+  // Check output tensor sizes again
+  {
+    nnfw_tensorinfo ti;
+    NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
+    ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
+  }
+
+  // Output value check
+  for (int i = 0; i < actual_output0.size(); ++i)
+    ASSERT_NEAR(if_dynamic_output0[i], actual_output0[i], 0.00001);
+}
+
+class CombinationTest1 : public ::testing::Test
+{
+protected:
+  void SetUp() override
+  {
+    CircleGen cgen;
+
+    // Creating a graph which has dynamic tensors after compilation.
+    // #3 and #4 are dynamic. This model was used to check if internal dynamic tensors could
+    // make any side-effect.
+    //
+    // #0 = input 0 of shape [1]
+    // #1 = input 1 of shape [2]
+    // #2 = cast(#0, int to float)
+    // #3 = reshape(const of shape [4] , #1)
+    // #4 = add(#2, #3)
+
+    constexpr circle::TensorType CIRCLE_DTYPE = circle::TensorType::TensorType_FLOAT32;
+
+    int cast_in = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+    int cast_out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+
+    cgen.addOperatorCast({{cast_in}, {cast_out}}, circle::TensorType::TensorType_INT32,
+                         circle::TensorType::TensorType_FLOAT32);
+
+    std::vector<float> reshape_in_data{0, 1, 2, 3}; // defining constant tensor
+    uint32_t reshape_in_buf = cgen.addBuffer(reshape_in_data);
+    int reshape_in = cgen.addTensor({{4}, CIRCLE_DTYPE, reshape_in_buf});
+    int reshape_shape_in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+    int reshape_out = cgen.addTensor({{}, CIRCLE_DTYPE}); // dynamic tensor of shape {}
+
+    cgen.addOperatorReshape({{reshape_in, reshape_shape_in}, {reshape_out}});
+
+    int out = cgen.addTensor({{}, CIRCLE_DTYPE}); // dynamic tensor of shape {}
+    cgen.addOperatorAdd({{cast_out, reshape_out}, {out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({cast_in, reshape_shape_in}, {out});
+
+    _circle_buffer = cgen.finish();
+  }
+
+  void TearDown() override
+  { // DO NOTHING
+  }
+
+  void setSession(nnfw_session *session) { _session = session; }
+
+  CircleBuffer &getCircleBuffer() { return _circle_buffer; }
+
+  void run_WITHOUT_set_input_tensorinfo(const std::vector<int32_t> &cast_input,
+                                        const std::vector<int32_t> &reshape_shape_input,
+                                        const nnfw_tensorinfo &expected_ti,
+                                        const std::vector<float> &expected,
+                                        std::vector<float> &actual)
+  {
+    setInputOutput(_session, cast_input, reshape_shape_input, actual);
+    NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+    verifyOutput(_session, expected_ti, expected, actual);
+  }
+
+  void run_WITH_set_input_tensorinfo(int32_t new_dim_0, const std::vector<int32_t> &cast_input,
+                                     const std::vector<int32_t> &reshape_shape_input,
+                                     const nnfw_tensorinfo &expected_ti,
+                                     const std::vector<float> &expected, std::vector<float> &actual)
+  {
+    nnfw_tensorinfo t_in;
+    t_in.dtype = NNFW_TYPE_TENSOR_INT32;
+    t_in.rank = 1;
+    t_in.dims[0] = new_dim_0;
+    NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &t_in));
+
+    setInputOutput(_session, cast_input, reshape_shape_input, actual);
+    NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+    verifyOutput(_session, expected_ti, expected, actual);
+  }
+
+private:
+  nnfw_session *_session;
+  CircleBuffer _circle_buffer;
+};
+
+// test for https://github.com/Samsung/ONE/issues/4625
+TEST_F(CombinationTest1, combination_of_set_input_tensorinfo_and_nnfw_run)
+{
+  constexpr NNFW_TYPE NNFW_DTYPE = NNFW_TYPE_TENSOR_FLOAT32;
+  std::vector<int32_t> cast_in_buf;
+  std::vector<int32_t> reshape_shape_in_buf;
+  std::vector<float> actual(4), expected(4);
+
+  nnfw_session *session = nullptr;
+  auto &cbuf = getCircleBuffer();
+
+  auto create_prepare_session = [&](const CircleBuffer &cbuf) {
+    NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+    NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+    NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+    NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+  };
+
+  // combinations of executions of static and dynamic tensors
+  // 1) no change on the shape of #0 -> change #0 to shape [1] -> no change. use previous shape
+  // 2) no change on the shape of #0 -> change #0 to shape [2] -> no change. use previous shape
+  // 3) no change on the shape of #0 -> change #0 to shape [2] -> change #0 to shape [1]
+
+  // 1) no change on the shape of #0 -> change #0 to shape [1] -> no input change
+  //       static                             dynamic                      dynamic
+  create_prepare_session(cbuf);
+  {
+    setSession(session);
+
+    // no change on the shape of #0
+    cast_in_buf = {10};
+    reshape_shape_in_buf = {1, 4};
+    expected = {10, 11, 12, 13};
+    run_WITHOUT_set_input_tensorinfo(cast_in_buf, reshape_shape_in_buf,
+                                     {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 4}}, expected, actual);
+
+    // change to the default shape [1] of #0, this treats 0# dynamic
+    int32_t new_dim_0 = 1;
+    cast_in_buf = {10};
+    reshape_shape_in_buf = {1, 4};
+    expected = {10, 11, 12, 13};
+    run_WITH_set_input_tensorinfo(new_dim_0, cast_in_buf, reshape_shape_in_buf,
+                                  {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 4}}, expected, actual);
+
+    // no change. Use previous shape
+    run_WITHOUT_set_input_tensorinfo(cast_in_buf, reshape_shape_in_buf,
+                                     {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 4}}, expected, actual);
+
+    NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+  }
+
+  // 2) no change on the shape of #0 -> change #0 to shape [2] -> no change(use previous shape)
+  //       static                             dynamic                      dynamic
+  create_prepare_session(cbuf);
+  {
+    setSession(session);
+
+    // no change on the shape of #0
+    cast_in_buf = {10};
+    reshape_shape_in_buf = {1, 4};
+    expected = {10, 11, 12, 13};
+    run_WITHOUT_set_input_tensorinfo(cast_in_buf, reshape_shape_in_buf,
+                                     {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 4}}, expected, actual);
+
+    // change shape of #0 to [2], this treats 0# dynamic
+    int32_t new_dim_0 = 2;
+    cast_in_buf = {10, 20};
+    reshape_shape_in_buf = {2, 2};
+    expected = {10, 21, 12, 23};
+    run_WITH_set_input_tensorinfo(new_dim_0, cast_in_buf, reshape_shape_in_buf,
+                                  {NNFW_TYPE_TENSOR_FLOAT32, 2, {2, 2}}, expected, actual);
+
+    // no change. Use previous shape
+    run_WITH_set_input_tensorinfo(new_dim_0, cast_in_buf, reshape_shape_in_buf,
+                                  {NNFW_TYPE_TENSOR_FLOAT32, 2, {2, 2}}, expected, actual);
+
+    NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+  }
+
+  // 3) no change on the shape of #0 -> change #0 to shape [2] -> change #0 to shape [1]
+  //       static                             dynamic                      dynamic
+  create_prepare_session(cbuf);
+  {
+    setSession(session);
+
+    // no change on the shape of #0
+    cast_in_buf = {10};
+    reshape_shape_in_buf = {1, 4};
+    expected = {10, 11, 12, 13};
+    run_WITHOUT_set_input_tensorinfo(cast_in_buf, reshape_shape_in_buf,
+                                     {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 4}}, expected, actual);
+
+    // change shape of #0 to [2], this treats 0# dynamic
+    int32_t new_dim_0 = 2;
+    cast_in_buf = {10, 20};
+    reshape_shape_in_buf = {2, 2};
+    expected = {10, 21, 12, 23};
+    run_WITH_set_input_tensorinfo(new_dim_0, cast_in_buf, reshape_shape_in_buf,
+                                  {NNFW_TYPE_TENSOR_FLOAT32, 2, {2, 2}}, expected, actual);
+
+    // change #0 to shape [1]
+    new_dim_0 = 1;
+    cast_in_buf = {100};
+    reshape_shape_in_buf = {1, 4};
+    expected = {100, 101, 102, 103};
+    run_WITH_set_input_tensorinfo(new_dim_0, cast_in_buf, reshape_shape_in_buf,
+                                  {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 4}}, expected, actual);
+
+    NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+  }
+}
+
+TEST_F(CombinationTest1, neg_combination_of_set_input_tensorinfo_and_nnfw_run)
+{
+  nnfw_session *session = nullptr;
+  auto &cbuf = getCircleBuffer();
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+
+  setSession(session);
+
+  std::vector<int32_t> cast_in_buf;
+  std::vector<int32_t> reshape_shape_in_buf;
+  std::vector<float> actual(4), expected(4);
+
+  // no change on the shape of #0
+  cast_in_buf = {10};
+  reshape_shape_in_buf = {1, 4};
+  expected = {10, 11, 12, 13};
+  setInputOutput(session, cast_in_buf, reshape_shape_in_buf, actual);
+  NNFW_ENSURE_SUCCESS(nnfw_run(session));
+
+  // change the shape of #0 to [4]
+  cast_in_buf = {10, 20, 30, 40};
+  reshape_shape_in_buf = {1, 4};
+  expected = {10, 21, 32, 43};
+  run_WITH_set_input_tensorinfo(4, cast_in_buf, reshape_shape_in_buf,
+                                {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 4}}, expected, actual);
+  setInputOutput(session, cast_in_buf, reshape_shape_in_buf, actual);
+  NNFW_ENSURE_SUCCESS(nnfw_run(session));
+
+  // run without changing #0 but caller thought that it is now shape [1]
+  cast_in_buf = {10};
+  reshape_shape_in_buf = {1, 4};
+  expected = {10, 11, 12, 13};
+  // This should throw an error
+  EXPECT_ANY_THROW(setInputOutput(session, cast_in_buf, reshape_shape_in_buf, actual));
+
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+}
+
+// Class to test set_input_tensorinfo() against "two" inputs
+class CombinationTest2 : public ::testing::Test
+{
+protected:
+  void SetUp() override
+  {
+    CircleGen cgen;
+
+    // creating a graph with two inputs
+    //
+    // #0 = input 0 of shape [1]
+    // #1 = input 1 of shape [1]
+    // #2 = add(#0, #1)
+
+    constexpr circle::TensorType CIRCLE_DTYPE = circle::TensorType::TensorType_FLOAT32;
+
+    int in0 = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+    int in1 = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+    int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+    cgen.addOperatorAdd({{in0, in1}, {out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({in0, in1}, {out});
+
+    _circle_buffer = cgen.finish();
+  }
+
+  void TearDown() override
+  { // DO NOTHING
+  }
+
+  void setSession(nnfw_session *session) { _session = session; }
+
+  CircleBuffer &getCircleBuffer() { return _circle_buffer; }
+
+  void run_WITHOUT_set_input_tensorinfo(const std::vector<int32_t> &in0,
+                                        const std::vector<int32_t> &in1,
+                                        const nnfw_tensorinfo &expected_ti,
+                                        const std::vector<int32_t> &expected,
+                                        std::vector<int32_t> &actual)
+  {
+    setInputOutput(_session, in0, in1, actual);
+    NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+    verifyOutput(_session, expected_ti, expected, actual);
+  }
+
+  // Pass -1 for t0_new_dim_0 (or t1_new_dim_0)
+  // if shape of tensor 0 (or tensor 1) does not changed from the shape in a model
+  void run_WITH_set_input_tensorinfo(int32_t t0_new_dim_0, int32_t t1_new_dim_0,
+                                     const std::vector<int32_t> &in0,
+                                     const std::vector<int32_t> &in1,
+                                     const nnfw_tensorinfo &expected_ti,
+                                     const std::vector<int32_t> &expected,
+                                     std::vector<int32_t> &actual)
+  {
+    if (t0_new_dim_0 >= 0)
+    {
+      nnfw_tensorinfo t_in;
+      t_in.dtype = NNFW_TYPE_TENSOR_INT32;
+      t_in.rank = 1;
+      t_in.dims[0] = t0_new_dim_0;
+      NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &t_in));
+    }
+
+    if (t1_new_dim_0 >= 0)
+    {
+      nnfw_tensorinfo t_in;
+      t_in.dtype = NNFW_TYPE_TENSOR_INT32;
+      t_in.rank = 1;
+      t_in.dims[0] = t1_new_dim_0;
+      NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 1, &t_in));
+    }
+
+    setInputOutput(_session, in0, in1, actual);
+    NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+    verifyOutput(_session, expected_ti, expected, actual);
+  }
+
+private:
+  nnfw_session *_session;
+  CircleBuffer _circle_buffer;
+};
+
+// test for https://github.com/Samsung/ONE/issues/4625
+TEST_F(CombinationTest2, combination_set_input_tensorinfo_for_two_inputs)
+{
+  nnfw_session *session = nullptr;
+
+  // combinations of executions of static and dynamic tensors for "two" inputs (#0, #1)
+  // 0. both input shapes are [1] (input shapes of the model are [1], [1])
+  // 1. change shape of #0 to [2]
+  // 2. change shape of #0 to [1], change shape of #1 to [2]
+  // 3. change shape of #0 to [2], (shape of #1 is still [2])
+  // 4. don't call set_input_tensorinfo (both are still [2] and [2])
+  // 5. change shape of #0 to [1], change shape of #1 to [1]
+  std::vector<int32_t> in0, in1;
+  std::vector<int32_t> actual, expected;
+  nnfw_tensorinfo expected_ti;
+
+  auto &cbuf = getCircleBuffer();
+
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+  setSession(session);
+
+  constexpr int32_t NO_CHANGE = -1;
+
+  // 0. both input shapes are [1]
+  in0 = {10};
+  in1 = {100};
+  expected = {110};
+  expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {1}};
+  actual.resize(1);
+  run_WITHOUT_set_input_tensorinfo(in0, in1, expected_ti, expected, actual);
+
+  // 1. change shape of #0 to [2]
+  int32_t new_dim_0 = 2;
+  int32_t new_dim_1 = NO_CHANGE;
+  in0 = {10, 20};
+  in1 = {100};
+  expected = {110, 120};
+  expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {2}};
+  actual.resize(2);
+  run_WITH_set_input_tensorinfo(new_dim_0, new_dim_1, in0, in1, expected_ti, expected, actual);
+
+  // 2. change shape of #0 to [1], change shape of #1 to [2]
+  new_dim_0 = 1;
+  new_dim_1 = 2;
+  in0 = {1000};
+  in1 = {10, 20};
+  expected = {1010, 1020};
+  expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {2}};
+  actual.resize(2);
+  run_WITH_set_input_tensorinfo(new_dim_0, new_dim_1, in0, in1, expected_ti, expected, actual);
+
+  // // 3. change shape of #0 to [2], (shape of #1 is still [2])
+  new_dim_0 = 2;
+  new_dim_1 = NO_CHANGE;
+  in0 = {10, 20};
+  in1 = {100, 200};
+  expected = {110, 220};
+  expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {2}};
+  actual.resize(2);
+  run_WITH_set_input_tensorinfo(new_dim_0, new_dim_1, in0, in1, expected_ti, expected, actual);
+
+  // // 4. don't call set_input_tensorinfo (both are still [2] and [2])
+  in0 = {11, 22};
+  in1 = {1000, 2000};
+  expected = {1011, 2022};
+  expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {2}};
+  actual.resize(2);
+  run_WITHOUT_set_input_tensorinfo(in0, in1, expected_ti, expected, actual);
+
+  // // 5. change shape of #0 to [1], change shape of #1 to [1]
+  new_dim_0 = 1;
+  new_dim_1 = 1;
+  in0 = {50};
+  in1 = {500};
+  expected = {550};
+  expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {1}};
+  actual.resize(1);
+  run_WITH_set_input_tensorinfo(new_dim_0, new_dim_1, in0, in1, expected_ti, expected, actual);
+
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+}
+
+TEST_F(CombinationTest2, neg_combination_set_input_tensorinfo_for_two_inputs)
+{
+  nnfw_session *session = nullptr;
+
+  // change shape of #1 to [2]
+  // then, do not call nnfw_set_input_tensorinfo for #1
+  std::vector<int32_t> in0, in1;
+  std::vector<int32_t> expected_shape;
+  std::vector<int32_t> actual, expected;
+  nnfw_tensorinfo expected_ti;
+
+  auto &cbuf = getCircleBuffer();
+
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+  setSession(session);
+
+  constexpr int32_t NO_CHANGE = -1;
+
+  // change shape of #1 to [2]
+  int32_t new_dim_0 = NO_CHANGE;
+  int32_t new_dim_1 = 2;
+  in0 = {10};
+  in1 = {100, 200};
+  expected = {110, 210};
+  expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {2}};
+  actual.resize(2);
+  run_WITH_set_input_tensorinfo(new_dim_0, new_dim_1, in0, in1, expected_ti, expected, actual);
+
+  // then, do not call nnfw_set_input_tensorinfo for #1, thinking that
+  // #1 has now shape [1], which is wrong
+  in0 = {10};
+  in1 = {100};
+  expected = {110};                               // wrong
+  expected_ti = {NNFW_TYPE_TENSOR_INT32, 1, {1}}; // wrong
+  actual.resize(1);                               // wrong
+  EXPECT_ANY_THROW(run_WITHOUT_set_input_tensorinfo(in0, in1, expected_ti, expected, actual));
+
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+}
diff --git a/tests/nnfw_api/src/ModelTestInputReshaping.cc b/tests/nnfw_api/src/ModelTestInputReshaping.cc
deleted file mode 100644
index bfe347fe7..000000000
--- a/tests/nnfw_api/src/ModelTestInputReshaping.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <nnfw_internal.h>
-
-#include "fixtures.h"
-#include "NNPackages.h"
-#include "common.h"
-
-using TestInputReshapingAddModelLoaded = ValidationTestModelLoaded<NNPackages::INPUT_RESHAPING_ADD>;
-
-/**
- * @brief Testing the following model:
- *       #1 = placeholder (shape = [2, 2], dtype=float)
- *       #2 = placeholder (shape = [2], dtype=float)
- *       #3 = add(#1, #2)
- *
- * @note Run this test with "cpu" backend and "linear" executor
- */
-TEST_F(TestInputReshapingAddModelLoaded, reshaping_2x2_to_4x2)
-{
-  NNFW_STATUS res = NNFW_STATUS_ERROR;
-
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "EXECUTOR", "Linear"));
-
-  // input and output values
-  const std::vector<float> input1 = {0, 1, 2, 3, 4, 5, 6, 7}; // of changed shape [4, 2]
-  const std::vector<float> input2 = {-10, -10};
-  const std::vector<float> expected = {-10, -9, -8, -7, -6, -5, -4, -3}; // of shape [4, 2]
-
-  /*
-  testing sequence and what's been done:
-    1. nnfw_set_input_tensorinfo : set input shape to different shape (static inference)
-    2. nnfw_prepare
-    3. nnfw_set_input
-    4. nnfw_run
-  */
-
-  // input reshaping from [2, 2] to [4, 2]
-  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 2}};
-  res = nnfw_set_input_tensorinfo(_session, 0, &ti);
-
-  res = nnfw_prepare(_session);
-  NNFW_ENSURE_SUCCESS(res);
-
-  nnfw_tensorinfo ti_input = {}; // Static inference result will be stored
-  nnfw_input_tensorinfo(_session, 0, &ti_input);
-  ASSERT_TRUE(tensorInfoEqual(ti, ti_input));
-
-  nnfw_tensorinfo ti_output = {}; // Static inference result will be stored
-  nnfw_output_tensorinfo(_session, 0, &ti_output);
-  ASSERT_TRUE(tensorInfoEqual(ti, ti_output)); // input/output shapes are same with for this model
-
-  res = nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input1.data(),
-                       sizeof(float) * input1.size());
-  NNFW_ENSURE_SUCCESS(res);
-  res = nnfw_set_input(_session, 1, NNFW_TYPE_TENSOR_FLOAT32, input2.data(),
-                       sizeof(float) * input2.size());
-  NNFW_ENSURE_SUCCESS(res);
-
-  uint64_t output_num_elements = tensorInfoNumElements(ti_output);
-  ASSERT_EQ(output_num_elements, expected.size());
-  std::vector<float> actual_output(output_num_elements);
-  res = nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output.data(),
-                        sizeof(float) * actual_output.size());
-  NNFW_ENSURE_SUCCESS(res);
-
-  // Do inference
-  res = nnfw_run(_session);
-  NNFW_ENSURE_SUCCESS(res);
-
-  // compare
-  for (int i = 0; i < expected.size(); ++i)
-    ASSERT_EQ(expected[i], actual_output[i]);
-}
diff --git a/tests/nnfw_api/src/ModelTestInputReshaping.test.cc b/tests/nnfw_api/src/ModelTestInputReshaping.test.cc
new file mode 100644
index 000000000..f5ce3e062
--- /dev/null
+++ b/tests/nnfw_api/src/ModelTestInputReshaping.test.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <nnfw_internal.h>
+
+#include "fixtures.h"
+#include "common.h"
+#include "CircleGen.h"
+
+/**
+ * @brief Testing the following model:
+ *       #1 = placeholder (shape = [2, 2], dtype=float)
+ *       #2 = placeholder (shape = [2], dtype=float)
+ *       #3 = add(#1, #2)
+ */
+auto build_model_add_input_reshaping()
+{
+  // Model is not important
+  CircleGen cgen;
+  auto f32 = circle::TensorType::TensorType_FLOAT32;
+  int in1 = cgen.addTensor({{2, 2}, f32}); // consider this [None, None]
+  int in2 = cgen.addTensor({{2}, f32});
+  int out = cgen.addTensor({{}, f32}); // scalar, meaning output shape is unspecified
+  cgen.addOperatorAdd({{in1, in2}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in1, in2}, {out});
+  auto cbuf = cgen.finish();
+  return cbuf;
+}
+
+TEST(TestDynamicTensor, input_reshaping)
+{
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_add_input_reshaping();
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+
+  // input and output values
+  const std::vector<float> input1 = {0, 1, 2, 3, 4, 5, 6, 7}; // of changed shape [4, 2]
+  const std::vector<float> input2 = {-10, -10};
+  const std::vector<float> expected = {-10, -9, -8, -7, -6, -5, -4, -3}; // of shape [4, 2]
+
+  /*
+  testing sequence and what's been done:
+    1. nnfw_set_input_tensorinfo : set input shape to different shape (static inference)
+    2. nnfw_prepare
+    3. nnfw_set_input
+    4. nnfw_run
+  */
+
+  // input reshaping from [2, 2] to [4, 2]
+  nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 2}};
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
+
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+
+  nnfw_tensorinfo ti_input = {}; // Static inference result will be stored
+  NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti_input));
+  ASSERT_TRUE(tensorInfoEqual(ti, ti_input));
+
+  nnfw_tensorinfo ti_output = {}; // Static inference result will be stored
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(session, 0, &ti_output));
+  ASSERT_TRUE(tensorInfoEqual(ti, ti_output)); // input/output shapes are same with for this model
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(session, 0, NNFW_TYPE_TENSOR_FLOAT32, input1.data(),
+                                     sizeof(float) * input1.size()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(session, 1, NNFW_TYPE_TENSOR_FLOAT32, input2.data(),
+                                     sizeof(float) * input2.size()));
+
+  uint64_t output_num_elements = tensorInfoNumElements(ti_output);
+  ASSERT_EQ(output_num_elements, expected.size());
+  std::vector<float> actual_output(output_num_elements);
+  NNFW_ENSURE_SUCCESS(nnfw_set_output(session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output.data(),
+                                      sizeof(float) * actual_output.size()));
+
+  // Do inference
+  NNFW_ENSURE_SUCCESS(nnfw_run(session));
+
+  // compare
+  for (int i = 0; i < expected.size(); ++i)
+    ASSERT_EQ(expected[i], actual_output[i]);
+}
diff --git a/tests/nnfw_api/src/NNPackages.cc b/tests/nnfw_api/src/NNPackages.cc
index 534973cb0..af8b79933 100644
--- a/tests/nnfw_api/src/NNPackages.cc
+++ b/tests/nnfw_api/src/NNPackages.cc
@@ -25,11 +25,14 @@
 
 // NOTE Must match `enum TestPackages`
 const char *TEST_PACKAGE_NAMES[] = {
-    // for validation test
-    "add", "add_no_manifest", "add_invalid_manifest",
+  // for validation test
+  "add",
+  "add_no_manifest",
+  "add_invalid_manifest",
 
-    // for dynamic tensor test
-    "input_reshaping_add", "dynamic_tensor_reshape", "while_dynamic", "if_dynamic",
+  // for dynamic tensor test
+  "while_dynamic",
+  "if_dynamic",
 };
 
 NNPackages &NNPackages::get()
@@ -43,11 +46,11 @@ void NNPackages::init(const char *argv0)
   char raw_dir[1024];
   char cwd[1024];
   strncpy(raw_dir, argv0, sizeof(raw_dir) - 1);
-  dirname(raw_dir);
-  if (raw_dir[0] == '/')
+  char *dir_path = dirname(raw_dir);
+  if (dir_path[0] == '/')
   {
     // If it is an absolute path, just use it
-    _base_path = raw_dir;
+    _base_path = dir_path;
   }
   else
   {
@@ -55,7 +58,7 @@ void NNPackages::init(const char *argv0)
     getcwd(cwd, sizeof(cwd));
     _base_path = cwd;
     _base_path += "/";
-    _base_path += raw_dir;
+    _base_path += dir_path;
   }
 }
 
@@ -71,7 +74,9 @@ void NNPackages::checkAll()
     DIR *dir = opendir(path.c_str());
     if (!dir)
     {
-      std::string msg = "missing nnpackage: " + package_name + ", path: " + path;
+      std::string msg = "missing nnpackage: " + package_name + ", path: " + path +
+                        "\nPlease run \'[install_dir]/test/onert-test prepare-model\' to "
+                        "download nnpackage";
       throw std::runtime_error{msg};
     }
     closedir(dir);
@@ -93,3 +98,8 @@ std::string NNPackages::getModelAbsolutePath(const char *package_name)
 {
   return _base_path + "/nnfw_api_gtest_models/" + package_name + "/" + package_name;
 }
+
+std::string NNPackages::getModelAbsoluteFilePath(const char *package_name)
+{
+  return _base_path + "/nnfw_api_gtest_models/" + package_name + "/" + package_name + ".tflite";
+}
diff --git a/tests/nnfw_api/src/NNPackages.h b/tests/nnfw_api/src/NNPackages.h
index 735fa96a0..ab3d527f3 100644
--- a/tests/nnfw_api/src/NNPackages.h
+++ b/tests/nnfw_api/src/NNPackages.h
@@ -43,8 +43,6 @@ public:
     ADD_INVALID_MANIFEST, //< Contains "Add" model but the manifest file is broken JSON
 
     // for dynamic tensor test
-    INPUT_RESHAPING_ADD,
-    DYNAMIC_TENSOR_RESHAPE,
     WHILE_DYNAMIC,
     IF_DYNAMIC,
 
@@ -75,6 +73,14 @@ public:
   std::string getModelAbsolutePath(const char *package_name);
 
   /**
+   * @brief Get the absolute of the model file to find
+   *
+   * @param package_name Package name
+   * @return std::string The absolute path of model file
+   */
+  std::string getModelAbsoluteFilePath(const char *package_name);
+
+  /**
    * @brief Save the current executable's directory based on argv[0] and CWD
    *
    * @param argv0 0th command line argument of the current process
diff --git a/tests/nnfw_api/src/RegressionTests.cc b/tests/nnfw_api/src/RegressionTests.cc
deleted file mode 100644
index 05914b839..000000000
--- a/tests/nnfw_api/src/RegressionTests.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "fixtures.h"
-#include "NNPackages.h"
-
-#include <nnfw_internal.h>
-
-#include "CircleGen.h"
-
-TEST_F(RegressionTest, github_1535)
-{
-  auto package_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
-
-  nnfw_session *session1 = nullptr;
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session1));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(session1, package_path.c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session1, "cpu;acl_cl;acl_neon"));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(session1));
-
-  nnfw_session *session2 = nullptr;
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session2));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(session2, package_path.c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session2, "cpu"));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(session2));
-
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(session1));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(session2));
-
-  SUCCEED();
-}
-
-TEST_F(RegressionTest, neg_github_3826)
-{
-  // Model is not important
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-  auto cbuf = cgen.finish();
-
-  nnfw_session *session = nullptr;
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
-  // To test when there is no backends loaded for the session
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "unavailable_backend"));
-  ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
-}
diff --git a/tests/nnfw_api/src/RegressionTests.test.cc b/tests/nnfw_api/src/RegressionTests.test.cc
new file mode 100644
index 000000000..de233390d
--- /dev/null
+++ b/tests/nnfw_api/src/RegressionTests.test.cc
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fixtures.h"
+#include "NNPackages.h"
+
+#include <nnfw_internal.h>
+
+#include "CircleGen.h"
+
+TEST_F(RegressionTest, github_1535)
+{
+  auto package_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
+
+  nnfw_session *session1 = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session1));
+  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(session1, package_path.c_str()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session1, "cpu;acl_cl;acl_neon"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session1));
+
+  nnfw_session *session2 = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session2));
+  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(session2, package_path.c_str()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session2, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session2));
+
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session1));
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session2));
+
+  SUCCEED();
+}
+
+TEST_F(RegressionTest, neg_github_3826)
+{
+  // Model is not important
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  auto cbuf = cgen.finish();
+
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+  // To test when there is no backends loaded for the session
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "unavailable_backend"));
+  ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+}
+
+TEST_F(RegressionTest, github_11748)
+{
+  // At the 1st call, input tensor is static. From the 2nd call, input tensor becomes dynamic.
+  // the following model and calling sequence were what nnstreamer people used for their test case.
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+
+  std::vector<float> rhs_data{2};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out});
+  auto cbuf = cgen.finish();
+
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+  // To test when there is no backends loaded for the session
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+
+  uint32_t input_num = -1;
+  NNFW_ENSURE_SUCCESS(nnfw_input_size(session, &input_num));
+
+  nnfw_tensorinfo t_input;
+  NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &t_input));
+
+  uint32_t output_num = -1;
+  NNFW_ENSURE_SUCCESS(nnfw_output_size(session, &output_num));
+
+  nnfw_tensorinfo t_output;
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(session, 0, &t_output));
+
+  // when new_dim == 1, input tensor is static. From 2, input tensor becomes dynamic.
+  for (int32_t new_dim = 1; new_dim <= 4; new_dim++)
+  {
+    nnfw_tensorinfo t_new_input;
+    t_new_input.dtype = t_input.dtype;
+    t_new_input.rank = 1;
+    t_new_input.dims[0] = new_dim;
+    NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &t_new_input));
+
+    NNFW_ENSURE_SUCCESS(nnfw_input_size(session, &input_num));
+    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &t_input));
+
+    ASSERT_EQ(input_num, 1);
+    ASSERT_EQ(t_input.rank, t_new_input.rank);
+    ASSERT_EQ(t_input.dims[0], new_dim);
+
+    uint8_t input_buf[new_dim * sizeof(float)];
+    NNFW_ENSURE_SUCCESS(
+      nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float)));
+
+    uint8_t output_buf[new_dim * sizeof(float)];
+    NNFW_ENSURE_SUCCESS(
+      nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float)));
+
+    NNFW_ENSURE_SUCCESS(nnfw_run(session));
+
+    NNFW_ENSURE_SUCCESS(nnfw_output_size(session, &output_num));
+    NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(session, 0, &t_output));
+
+    ASSERT_EQ(output_num, 1);
+    ASSERT_EQ(t_output.rank, t_new_input.rank);
+    ASSERT_EQ(t_output.dims[0], new_dim);
+
+    // seems weird calling but anyway nnstreamer people case calls this again.
+    // Anyways, runtime should work
+    NNFW_ENSURE_SUCCESS(
+      nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float)));
+    NNFW_ENSURE_SUCCESS(
+      nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float)));
+    NNFW_ENSURE_SUCCESS(nnfw_run(session));
+  }
+
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+}
+
+TEST_F(RegressionTest, github_4585)
+{
+  // A single tensor which is an input and an output at the same time
+  CircleGen cgen;
+  int t = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.setInputsAndOutputs({t}, {t});
+  auto cbuf = cgen.finish();
+
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+  // To test when there is no backends loaded for the session
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
+
+  // Change input tensorinfo (Make dynamic shape inference happen)
+  nnfw_tensorinfo ti_new = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 2}};
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti_new));
+
+  std::vector<float> in_buf{1, 1};
+  std::vector<float> out_buf{-1, -1};
+
+  NNFW_ENSURE_SUCCESS(
+    nnfw_set_input(session, 0, ti_new.dtype, in_buf.data(), in_buf.size() * sizeof(float)));
+  NNFW_ENSURE_SUCCESS(
+    nnfw_set_output(session, 0, ti_new.dtype, out_buf.data(), out_buf.size() * sizeof(float)));
+
+  NNFW_ENSURE_SUCCESS(nnfw_run(session));
+
+  ASSERT_EQ(in_buf, out_buf);
+
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
+}
diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
deleted file mode 100644
index 1d3d4fc93..000000000
--- a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "fixtures.h"
-#include "NNPackages.h"
-
-using ValidationTestAddModelLoaded = ValidationTestModelLoaded<NNPackages::ADD>;
-
-TEST_F(ValidationTestAddModelLoaded, prepare_001)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
-
-  SUCCEED();
-}
-
-TEST_F(ValidationTestAddModelLoaded, set_available_backends_001)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-
-  SUCCEED();
-}
-
-TEST_F(ValidationTestAddModelLoaded, get_input_size)
-{
-  uint32_t size = 0;
-  NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &size));
-  ASSERT_EQ(size, 1);
-}
-
-TEST_F(ValidationTestAddModelLoaded, get_output_size)
-{
-  uint32_t size = 0;
-  NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &size));
-  ASSERT_EQ(size, 1);
-}
-
-TEST_F(ValidationTestAddModelLoaded, output_tensorinfo)
-{
-  nnfw_tensorinfo tensor_info;
-  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &tensor_info));
-  ASSERT_EQ(tensor_info.rank, 1);
-  ASSERT_EQ(tensor_info.dims[0], 1);
-}
-
-TEST_F(ValidationTestAddModelLoaded, input_output_tensorindex)
-{
-  uint32_t in_ind = 100;
-  NNFW_ENSURE_SUCCESS(nnfw_input_tensorindex(_session, "X_input", &in_ind));
-  ASSERT_EQ(in_ind, 0);
-
-  uint32_t out_ind = 100;
-  NNFW_ENSURE_SUCCESS(nnfw_output_tensorindex(_session, "ADD_TOP", &out_ind));
-  ASSERT_EQ(out_ind, 0);
-}
-
-TEST_F(ValidationTestAddModelLoaded, neg_run)
-{
-  // nnfw_prepare is not called
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestAddModelLoaded, neg_set_input)
-{
-  // nnfw_prepare is not called
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
-            NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestAddModelLoaded, neg_set_output)
-{
-  // nnfw_prepare is not called
-  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
-            NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestAddModelLoaded, neg_get_input_size)
-{
-  ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestAddModelLoaded, neg_get_output_size)
-{
-  ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestAddModelLoaded, neg_load_model)
-{
-  // load model twice
-  ASSERT_EQ(nnfw_load_model_from_file(
-                _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
-            NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestAddModelLoaded, neg_output_tensorinfo)
-{
-  // tensor_info is null
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestAddModelLoaded, neg_input_output_tensorindex)
-{
-  uint32_t in_ind = 100;
-  ASSERT_EQ(nnfw_input_tensorindex(_session, "ADD_TOP", &in_ind), NNFW_STATUS_ERROR);
-  ASSERT_EQ(in_ind, 100);
-  ASSERT_EQ(nnfw_input_tensorindex(_session, "y_var", &in_ind), NNFW_STATUS_ERROR);
-  ASSERT_EQ(in_ind, 100);
-
-  uint32_t out_ind = 100;
-  ASSERT_EQ(nnfw_output_tensorindex(_session, "X_input", &out_ind), NNFW_STATUS_ERROR);
-  ASSERT_EQ(out_ind, 100);
-}
diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc
new file mode 100644
index 000000000..0b7759374
--- /dev/null
+++ b/tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fixtures.h"
+#include "NNPackages.h"
+
+#include "nnfw_internal.h"
+
+using ValidationTestAddModelLoaded = ValidationTestModelLoaded<NNPackages::ADD>;
+
+TEST_F(ValidationTestAddModelLoaded, prepare_001)
+{
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+
+  SUCCEED();
+}
+
+TEST_F(ValidationTestAddModelLoaded, set_available_backends_001)
+{
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+
+  SUCCEED();
+}
+
+TEST_F(ValidationTestAddModelLoaded, get_input_size)
+{
+  uint32_t size = 0;
+  NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &size));
+  ASSERT_EQ(size, 1);
+}
+
+TEST_F(ValidationTestAddModelLoaded, get_output_size)
+{
+  uint32_t size = 0;
+  NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &size));
+  ASSERT_EQ(size, 1);
+}
+
+TEST_F(ValidationTestAddModelLoaded, output_tensorinfo)
+{
+  nnfw_tensorinfo tensor_info;
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &tensor_info));
+  ASSERT_EQ(tensor_info.rank, 1);
+  ASSERT_EQ(tensor_info.dims[0], 1);
+}
+
+TEST_F(ValidationTestAddModelLoaded, input_output_tensorindex)
+{
+  uint32_t in_ind = 100;
+  NNFW_ENSURE_SUCCESS(nnfw_input_tensorindex(_session, "X_input", &in_ind));
+  ASSERT_EQ(in_ind, 0);
+
+  uint32_t out_ind = 100;
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorindex(_session, "ADD_TOP", &out_ind));
+  ASSERT_EQ(out_ind, 0);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_run)
+{
+  // nnfw_prepare is not called
+  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_set_input)
+{
+  // nnfw_prepare is not called
+  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_set_output)
+{
+  // nnfw_prepare is not called
+  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_get_input_size)
+{
+  ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_get_output_size)
+{
+  ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_load_model)
+{
+  // load model twice
+  ASSERT_EQ(nnfw_load_model_from_file(
+              _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
+            NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_output_tensorinfo)
+{
+  // tensor_info is null
+  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_input_output_tensorindex)
+{
+  uint32_t in_ind = 100;
+  ASSERT_EQ(nnfw_input_tensorindex(_session, "ADD_TOP", &in_ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(in_ind, 100);
+  ASSERT_EQ(nnfw_input_tensorindex(_session, "y_var", &in_ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(in_ind, 100);
+
+  uint32_t out_ind = 100;
+  ASSERT_EQ(nnfw_output_tensorindex(_session, "X_input", &out_ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(out_ind, 100);
+}
+
+TEST_F(ValidationTestAddModelLoaded, experimental_input_tensorindex)
+{
+  uint32_t ind = 999;
+  NNFW_ENSURE_SUCCESS(nnfw_input_tensorindex(_session, "X_input", &ind));
+  ASSERT_EQ(ind, 0);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_experimental_input_tensorindex_name_null)
+{
+  uint32_t ind = 999;
+  ASSERT_EQ(nnfw_input_tensorindex(_session, nullptr, &ind), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(ind, 999);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_experimental_input_tensorindex_index_null)
+{
+  ASSERT_EQ(nnfw_input_tensorindex(_session, "X_input", nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_experimental_input_name_too_long)
+{
+  std::string long_name(1024, 'x'); // Too long
+  uint32_t ind = 999;
+  ASSERT_EQ(nnfw_output_tensorindex(_session, long_name.c_str(), &ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(ind, 999);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_experimental_input_no_such_name)
+{
+  uint32_t ind = 999;
+  ASSERT_EQ(nnfw_output_tensorindex(_session, "NO_SUCH_TENSOR_NAME", &ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(ind, 999);
+}
+
+TEST_F(ValidationTestAddModelLoaded, experimental_output_tensorindex)
+{
+  uint32_t ind = 999;
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorindex(_session, "ADD_TOP", &ind));
+  ASSERT_EQ(ind, 0);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_experimental_output_tensorindex_name_null)
+{
+  uint32_t ind = 999;
+  ASSERT_EQ(nnfw_output_tensorindex(_session, nullptr, &ind), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(ind, 999);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_experimental_output_tensorindex_index_null)
+{
+  ASSERT_EQ(nnfw_output_tensorindex(_session, "ADD_TOP", nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_experimental_output_name_too_long)
+{
+  std::string long_name(1024, 'x'); // Too long
+  uint32_t ind = 999;
+  ASSERT_EQ(nnfw_output_tensorindex(_session, long_name.c_str(), &ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(ind, 999);
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_experimental_output_no_such_name)
+{
+  uint32_t ind = 999;
+  ASSERT_EQ(nnfw_output_tensorindex(_session, "NO_SUCH_TENSOR_NAME", &ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(ind, 999);
+}
+
+TEST_F(ValidationTestAddModelLoaded, debug_set_config)
+{
+  // At least one test for all valid keys
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "TRACE_FILEPATH", ""));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "0"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "1"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "2"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "EXECUTOR", "Linear"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "OP_BACKEND_ALLOPS", "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "USE_SCHEDULER", "0"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "USE_SCHEDULER", "1"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "PROFILING_MODE", "0"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "PROFILING_MODE", "1"));
+  SUCCEED();
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_debug_set_config)
+{
+  // unexpected null args
+  ASSERT_EQ(nnfw_set_config(_session, nullptr, "1"), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(nnfw_set_config(_session, "EXECUTOR", nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+
+  // wrong keys
+  ASSERT_EQ(nnfw_set_config(_session, "", "1"), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_set_config(_session, "BAD_KEY", "1"), NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddModelLoaded, debug_get_config)
+{
+  // At least one test for all valid keys
+  char buf[1024];
+  NNFW_ENSURE_SUCCESS(nnfw_get_config(_session, "EXECUTOR", buf, sizeof(buf)));
+  NNFW_ENSURE_SUCCESS(nnfw_get_config(_session, "BACKENDS", buf, sizeof(buf)));
+  SUCCEED();
+}
+
+TEST_F(ValidationTestAddModelLoaded, neg_debug_get_config)
+{
+  // unexpected null args
+  char buf[1024];
+  ASSERT_EQ(nnfw_get_config(_session, nullptr, buf, sizeof(buf)), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(nnfw_get_config(_session, "EXECUTOR", nullptr, 0), NNFW_STATUS_UNEXPECTED_NULL);
+
+  // buffer is too small
+  ASSERT_EQ(nnfw_get_config(_session, "EXECUTOR", buf, 1), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_get_config(_session, "BACKENDS", buf, 1), NNFW_STATUS_ERROR);
+
+  // wrong keys
+  ASSERT_EQ(nnfw_get_config(_session, "", buf, sizeof(buf)), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_get_config(_session, "BAD_KEY", buf, sizeof(buf)), NNFW_STATUS_ERROR);
+}
diff --git a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc
deleted file mode 100644
index f19bb782c..000000000
--- a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "fixtures.h"
-#include "NNPackages.h"
-
-using ValidationTestAddSessionPrepared = ValidationTestSessionPrepared<NNPackages::ADD>;
-
-TEST_F(ValidationTestAddSessionPrepared, run)
-{
-  SetInOutBuffers();
-  _input[0] = 3.0;
-  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
-  ASSERT_FLOAT_EQ(_output[0], 5.0);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, run_twice)
-{
-  SetInOutBuffers();
-  _input[0] = 4.0;
-  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
-  ASSERT_FLOAT_EQ(_output[0], 6.0);
-
-  _input[0] = 5.0f;
-  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
-  ASSERT_FLOAT_EQ(_output[0], 7.0);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, run_async)
-{
-  SetInOutBuffers();
-  _input[0] = 3.0;
-  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
-  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
-  ASSERT_FLOAT_EQ(_output[0], 5.0);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, set_input_001)
-{
-  char input[32];
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
-            NNFW_STATUS_NO_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, get_input_size)
-{
-  uint32_t size = 0;
-  NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &size));
-  ASSERT_EQ(size, 1);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, get_output_size)
-{
-  uint32_t size = 0;
-  NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &size));
-  ASSERT_EQ(size, 1);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, output_tensorinfo)
-{
-  nnfw_tensorinfo tensor_info;
-  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &tensor_info));
-  ASSERT_EQ(tensor_info.rank, 1);
-  ASSERT_EQ(tensor_info.dims[0], 1);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_await_without_async_run)
-{
-  SetInOutBuffers();
-  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_await_after_sync_run)
-{
-  SetInOutBuffers();
-  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
-  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_await_twice)
-{
-  SetInOutBuffers();
-  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
-  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
-  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_run_during_async_run)
-{
-  SetInOutBuffers();
-  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
-  EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
-  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_set_input_001)
-{
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 1), NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_set_input_002)
-{
-  char input[1]; // buffer size is too small
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
-            NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, set_output_001)
-{
-  char buffer[32];
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, buffer, sizeof(buffer)),
-            NNFW_STATUS_NO_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_set_output_001)
-{
-  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 1), NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_set_output_002)
-{
-  char input[1]; // buffer size is too small
-  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
-            NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_get_input_size)
-{
-  ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_get_output_size)
-{
-  ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_load_model)
-{
-  // Load model twice
-  ASSERT_EQ(nnfw_load_model_from_file(
-                _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
-            NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestAddSessionPrepared, neg_prepare)
-{
-  // Call Prepare twice
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
-}
-
-// TODO Validation check when "nnfw_run" is called without input & output tensor setting
diff --git a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc
new file mode 100644
index 000000000..d668a1cb0
--- /dev/null
+++ b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fixtures.h"
+#include "NNPackages.h"
+
+using ValidationTestAddSessionPrepared = ValidationTestSessionPrepared<NNPackages::ADD>;
+
+TEST_F(ValidationTestAddSessionPrepared, run)
+{
+  SetInOutBuffers();
+  _input[0] = 3.0;
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+  ASSERT_FLOAT_EQ(_output[0], 5.0);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, run_twice)
+{
+  SetInOutBuffers();
+  _input[0] = 4.0;
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+  ASSERT_FLOAT_EQ(_output[0], 6.0);
+
+  _input[0] = 5.0f;
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+  ASSERT_FLOAT_EQ(_output[0], 7.0);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, run_many_times_dynamic_input)
+{
+  for (int v = 1; v <= 5; v++) // 5 times with different shapes
+  {
+    nnfw_tensorinfo ti_input = {NNFW_TYPE_TENSOR_FLOAT32, 4, {1, 1, 1, v}};
+    SetInOutBuffersDynamic(&ti_input);
+
+    for (int i = 0; i < v; i++)
+      _input[i] = i * 10.0;
+
+    NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+
+    // Check if the shape inference is correct
+    nnfw_tensorinfo ti_output;
+    ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti_output), NNFW_STATUS_NO_ERROR);
+    EXPECT_EQ(num_elems(&ti_input), num_elems(&ti_output));
+
+    for (int i = 0; i < v; i++)
+      ASSERT_FLOAT_EQ(_output[i], i * 10.0 + 2.0) << "i : " << i;
+  }
+}
+
+TEST_F(ValidationTestAddSessionPrepared, run_async)
+{
+  SetInOutBuffers();
+  _input[0] = 3.0;
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
+  ASSERT_FLOAT_EQ(_output[0], 5.0);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, set_input_001)
+{
+  char input[32];
+  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
+            NNFW_STATUS_NO_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, get_input_size)
+{
+  uint32_t size = 0;
+  NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &size));
+  ASSERT_EQ(size, 1);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, get_output_size)
+{
+  uint32_t size = 0;
+  NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &size));
+  ASSERT_EQ(size, 1);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, output_tensorinfo)
+{
+  nnfw_tensorinfo tensor_info;
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &tensor_info));
+  ASSERT_EQ(tensor_info.rank, 1);
+  ASSERT_EQ(tensor_info.dims[0], 1);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_await_without_async_run)
+{
+  SetInOutBuffers();
+  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_await_after_sync_run)
+{
+  SetInOutBuffers();
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
+  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_await_twice)
+{
+  SetInOutBuffers();
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
+  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_run_during_async_run)
+{
+  SetInOutBuffers();
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+  EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_set_input_001)
+{
+  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 1), NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_set_input_002)
+{
+  char input[1]; // buffer size is too small
+  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
+            NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, set_output_001)
+{
+  char buffer[32];
+  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, buffer, sizeof(buffer)),
+            NNFW_STATUS_NO_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_set_output_001)
+{
+  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 1), NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_set_output_002)
+{
+  char input[1]; // buffer size is too small
+  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
+            NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_get_input_size)
+{
+  ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_get_output_size)
+{
+  ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_load_model)
+{
+  // Load model twice
+  ASSERT_EQ(nnfw_load_model_from_file(
+              _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
+            NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_prepare)
+{
+  // Call Prepare twice
+  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_run_without_set_output)
+{
+  uint8_t input[4];
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)));
+  // `nnfw_set_output()` is not called
+  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestAddSessionPrepared, neg_internal_set_config)
+{
+  // All arguments are valid, but the session state is wrong
+  ASSERT_EQ(nnfw_set_config(_session, "TRACE_FILEPATH", ""), NNFW_STATUS_INVALID_STATE);
+  ASSERT_EQ(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "0"), NNFW_STATUS_INVALID_STATE);
+}
+
+// TODO Validation check when "nnfw_run" is called without input & output tensor setting
diff --git a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc
index e09402b01..e09402b01 100644
--- a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
+++ b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc b/tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc
new file mode 100644
index 000000000..ef00dc6bd
--- /dev/null
+++ b/tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fixtures.h"
+#include "one_op_tests/WhileTestModel.h"
+
+TEST_F(ValidationTestTwoSessions, neg_two_sessions_create)
+{
+  ASSERT_EQ(nnfw_create_session(&_session1), NNFW_STATUS_NO_ERROR);
+  ASSERT_EQ(nnfw_create_session(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+
+  ASSERT_EQ(nnfw_close_session(_session1), NNFW_STATUS_NO_ERROR);
+}
+
+class AveragePoolModel
+{
+public:
+  AveragePoolModel(int N, int H, int W, int C)
+  {
+    CircleGen cgen;
+    int in = cgen.addTensor({{N, H, W, C}, circle::TensorType::TensorType_FLOAT32});
+    int out = cgen.addTensor({{N, H / 2, W / 2, C}, circle::TensorType::TensorType_FLOAT32});
+    cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                  circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({in}, {out});
+    cbuf = cgen.finish();
+  };
+
+  CircleBuffer cbuf;
+};
+
+TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_AaveragePool_model)
+{
+  constexpr int N = 64, H = 64, W = 64, C = 3;
+  AveragePoolModel model(N, H, W, C);
+
+  NNFW_ENSURE_SUCCESS(
+    nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size()));
+  NNFW_ENSURE_SUCCESS(
+    nnfw_load_circle_from_buffer(_session2, model.cbuf.buffer(), model.cbuf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session1, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session2, "cpu"));
+
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session2));
+
+  constexpr int input_count = N * H * W * C;
+  constexpr int output_count = N * H / 2 * W / 2 * C;
+
+  std::vector<float> in_buf1(input_count); // any value
+  std::vector<float> out_buf1(output_count);
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf1.data(),
+                                     in_buf1.size() * sizeof(float)));
+  NNFW_ENSURE_SUCCESS(nnfw_set_output(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf1.data(),
+                                      out_buf1.size() * sizeof(float)));
+
+  std::vector<float> in_buf2(input_count); // any value
+  std::vector<float> out_buf2(output_count);
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf2.data(),
+                                     in_buf2.size() * sizeof(float)));
+  NNFW_ENSURE_SUCCESS(nnfw_set_output(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf2.data(),
+                                      out_buf2.size() * sizeof(float)));
+
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session2));
+
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session2));
+
+  SUCCEED();
+}
+
+TEST_F(ValidationTestTwoSessionsCreated, neg_two_sessions_model_load)
+{
+  constexpr int N = 64, H = 64, W = 64, C = 3;
+  AveragePoolModel model(N, H, W, C);
+
+  NNFW_ENSURE_SUCCESS(
+    nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size()));
+  ASSERT_EQ(nnfw_load_circle_from_buffer(nullptr, model.cbuf.buffer(), model.cbuf.size()),
+            NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_While_model)
+{
+  WhileModelLoop10 model;
+
+  NNFW_ENSURE_SUCCESS(
+    nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size()));
+  NNFW_ENSURE_SUCCESS(
+    nnfw_load_circle_from_buffer(_session2, model.cbuf.buffer(), model.cbuf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session1, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session2, "cpu"));
+
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session2));
+
+  std::vector<float> in_buf1(model.inputCount()); // any value
+  std::vector<float> out_buf1(model.outputputCount());
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf1.data(),
+                                     in_buf1.size() * model.sizeOfDType()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_output(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf1.data(),
+                                      out_buf1.size() * model.sizeOfDType()));
+
+  std::vector<float> in_buf2(model.inputCount()); // any value
+  std::vector<float> out_buf2(model.outputputCount());
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_input(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf2.data(),
+                                     in_buf2.size() * model.sizeOfDType()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_output(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf2.data(),
+                                      out_buf2.size() * model.sizeOfDType()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session2));
+
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session1));
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session2));
+
+  SUCCEED();
+}
+
+// TODO Write two-session-test with large models run by threads
diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.cc
deleted file mode 100644
index 4ef14f745..000000000
--- a/tests/nnfw_api/src/ValidationTestSessionCreated.cc
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NNPackages.h"
-#include "fixtures.h"
-
-TEST_F(ValidationTestSessionCreated, load_session_001)
-{
-  // Existing model must
-  ASSERT_EQ(nnfw_load_model_from_file(
-                _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
-            NNFW_STATUS_NO_ERROR);
-}
-
-TEST_F(ValidationTestSessionCreated, close_and_create_again)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-
-  SUCCEED();
-}
-
-TEST_F(ValidationTestSessionCreated, neg_load_session_1)
-{
-  ASSERT_EQ(nnfw_load_model_from_file(
-                _session, NNPackages::get().getModelAbsolutePath("nonexisting_directory").c_str()),
-            NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_load_session_2)
-{
-  ASSERT_EQ(nnfw_load_model_from_file(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_load_session_3)
-{
-  // Too long path
-  const std::string long_path(1024, 'x');
-  ASSERT_EQ(nnfw_load_model_from_file(
-                _session, NNPackages::get().getModelAbsolutePath(long_path.c_str()).c_str()),
-            NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_1)
-{
-  ASSERT_EQ(
-      nnfw_load_model_from_file(
-          _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_NO_MANIFEST).c_str()),
-      NNFW_STATUS_ERROR);
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_2)
-{
-  ASSERT_EQ(nnfw_load_model_from_file(
-                _session,
-                NNPackages::get().getModelAbsolutePath(NNPackages::ADD_INVALID_MANIFEST).c_str()),
-            NNFW_STATUS_ERROR);
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_prepare_001)
-{
-  // nnfw_load_model_from_file was not called
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_run_001)
-{
-  // nnfw_load_model_from_file and nnfw_prepare was not called
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_set_input_001)
-{
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
-            NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_set_output_001)
-{
-  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
-            NNFW_STATUS_INVALID_STATE);
-}
-
-TEST_F(ValidationTestSessionCreated, neg_get_input_size)
-{
-  uint32_t size = 10000;
-  ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_INVALID_STATE);
-  ASSERT_EQ(size, 10000); // Remain unchanged
-}
-
-TEST_F(ValidationTestSessionCreated, neg_get_output_size)
-{
-  uint32_t size = 10000;
-  ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_INVALID_STATE);
-  ASSERT_EQ(size, 10000); // Remain unchanged
-}
-
-TEST_F(ValidationTestSessionCreated, neg_output_tensorinfo)
-{
-  nnfw_tensorinfo tensor_info;
-  // model is not loaded
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_INVALID_STATE);
-  // model is not loaded and tensor_info is null
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_INVALID_STATE);
-}
diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.test.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.test.cc
new file mode 100644
index 000000000..cb0791933
--- /dev/null
+++ b/tests/nnfw_api/src/ValidationTestSessionCreated.test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NNPackages.h"
+#include "fixtures.h"
+
+TEST_F(ValidationTestSessionCreated, load_session_001)
+{
+  // Existing model must
+  ASSERT_EQ(nnfw_load_model_from_file(
+              _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
+            NNFW_STATUS_NO_ERROR);
+}
+
+TEST_F(ValidationTestSessionCreated, close_and_create_again)
+{
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
+
+  SUCCEED();
+}
+
+TEST_F(ValidationTestSessionCreated, neg_load_session_1)
+{
+  ASSERT_EQ(nnfw_load_model_from_file(
+              _session, NNPackages::get().getModelAbsolutePath("nonexisting_directory").c_str()),
+            NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_load_session_2)
+{
+  ASSERT_EQ(nnfw_load_model_from_file(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_load_session_3)
+{
+  // Too long path
+  const std::string long_path(1024, 'x');
+  ASSERT_EQ(nnfw_load_model_from_file(
+              _session, NNPackages::get().getModelAbsolutePath(long_path.c_str()).c_str()),
+            NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_1)
+{
+  ASSERT_EQ(
+    nnfw_load_model_from_file(
+      _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_NO_MANIFEST).c_str()),
+    NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_2)
+{
+  ASSERT_EQ(
+    nnfw_load_model_from_file(
+      _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_INVALID_MANIFEST).c_str()),
+    NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_prepare_001)
+{
+  // nnfw_load_model_from_file was not called
+  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_run_001)
+{
+  // nnfw_load_model_from_file and nnfw_prepare was not called
+  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_set_input_001)
+{
+  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_set_output_001)
+{
+  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_get_input_size)
+{
+  uint32_t size = 10000;
+  ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_INVALID_STATE);
+  ASSERT_EQ(size, 10000); // Remain unchanged
+}
+
+TEST_F(ValidationTestSessionCreated, neg_get_output_size)
+{
+  uint32_t size = 10000;
+  ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_INVALID_STATE);
+  ASSERT_EQ(size, 10000); // Remain unchanged
+}
+
+TEST_F(ValidationTestSessionCreated, neg_output_tensorinfo)
+{
+  nnfw_tensorinfo tensor_info;
+  // model is not loaded
+  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_INVALID_STATE);
+  // model is not loaded and tensor_info is null
+  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_INVALID_STATE);
+}
+
+TEST_F(ValidationTestSessionCreated, neg_internal_set_config)
+{
+  // All arguments are valid, but the session state is wrong
+  ASSERT_EQ(nnfw_set_config(_session, "TRACE_FILEPATH", ""), NNFW_STATUS_INVALID_STATE);
+  ASSERT_EQ(nnfw_set_config(_session, "GRAPH_DOT_DUMP", "0"), NNFW_STATUS_INVALID_STATE);
+}
diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.cc b/tests/nnfw_api/src/ValidationTestSingleSession.cc
deleted file mode 100644
index c74649203..000000000
--- a/tests/nnfw_api/src/ValidationTestSingleSession.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NNPackages.h"
-#include "fixtures.h"
-
-TEST_F(ValidationTestSingleSession, create_001)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  SUCCEED();
-}
-
-TEST_F(ValidationTestSingleSession, query_info_u32)
-{
-  uint32_t val = 0;
-  NNFW_ENSURE_SUCCESS(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val));
-
-  SUCCEED();
-}
-
-TEST_F(ValidationTestSingleSession, neg_create_001)
-{
-  ASSERT_EQ(nnfw_create_session(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSingleSession, neg_run_001)
-{
-  ASSERT_EQ(nnfw_run(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSingleSession, neg_set_input_001)
-{
-  // Invalid session
-  ASSERT_EQ(nnfw_set_input(nullptr, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
-            NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSingleSession, neg_set_input_002)
-{
-  char input[32];
-  ASSERT_EQ(nnfw_set_input(nullptr, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
-            NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSingleSession, neg_set_output_001)
-{
-  // Invalid session
-  ASSERT_EQ(nnfw_set_output(nullptr, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
-            NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSingleSession, neg_set_output_002)
-{
-  char buffer[32];
-  ASSERT_EQ(nnfw_set_output(nullptr, 0, NNFW_TYPE_TENSOR_FLOAT32, buffer, sizeof(buffer)),
-            NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSingleSession, neg_get_input_size)
-{
-  uint32_t size = 10000;
-  ASSERT_EQ(nnfw_input_size(nullptr, &size), NNFW_STATUS_UNEXPECTED_NULL);
-  ASSERT_EQ(size, 10000);
-}
-
-TEST_F(ValidationTestSingleSession, neg_get_output_size)
-{
-  uint32_t size = 10000;
-  ASSERT_EQ(nnfw_output_size(nullptr, &size), NNFW_STATUS_UNEXPECTED_NULL);
-  ASSERT_EQ(size, 10000);
-}
-
-TEST_F(ValidationTestSingleSession, neg_load_model)
-{
-  // Invalid state
-  ASSERT_EQ(nnfw_load_model_from_file(
-                nullptr, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
-            NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSingleSession, neg_prepare_001)
-{
-  ASSERT_EQ(nnfw_prepare(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestSingleSession, neg_query_info_u32)
-{
-  ASSERT_EQ(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, nullptr), NNFW_STATUS_ERROR);
-}
-
-TEST_F(ValidationTestSingleSession, neg_output_tensorinfo)
-{
-  nnfw_tensorinfo tensor_info;
-  ASSERT_EQ(nnfw_output_tensorinfo(nullptr, 0, &tensor_info), NNFW_STATUS_UNEXPECTED_NULL);
-  ASSERT_EQ(nnfw_output_tensorinfo(nullptr, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.test.cc b/tests/nnfw_api/src/ValidationTestSingleSession.test.cc
new file mode 100644
index 000000000..852d5cd21
--- /dev/null
+++ b/tests/nnfw_api/src/ValidationTestSingleSession.test.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NNPackages.h"
+#include "fixtures.h"
+
+TEST_F(ValidationTestSingleSession, create_001)
+{
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
+
+  SUCCEED();
+}
+
+TEST_F(ValidationTestSingleSession, query_info_u32)
+{
+  uint32_t val = 0;
+  NNFW_ENSURE_SUCCESS(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val));
+
+  SUCCEED();
+}
+
+TEST_F(ValidationTestSingleSession, neg_create_001)
+{
+  ASSERT_EQ(nnfw_create_session(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_run_001)
+{
+  ASSERT_EQ(nnfw_run(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_set_input_001)
+{
+  // Invalid session
+  ASSERT_EQ(nnfw_set_input(nullptr, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_set_input_002)
+{
+  char input[32];
+  ASSERT_EQ(nnfw_set_input(nullptr, 0, NNFW_TYPE_TENSOR_FLOAT32, input, sizeof(input)),
+            NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_set_output_001)
+{
+  // Invalid session
+  ASSERT_EQ(nnfw_set_output(nullptr, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_set_output_002)
+{
+  char buffer[32];
+  ASSERT_EQ(nnfw_set_output(nullptr, 0, NNFW_TYPE_TENSOR_FLOAT32, buffer, sizeof(buffer)),
+            NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_get_input_size)
+{
+  uint32_t size = 10000;
+  ASSERT_EQ(nnfw_input_size(nullptr, &size), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(size, 10000);
+}
+
+TEST_F(ValidationTestSingleSession, neg_get_output_size)
+{
+  uint32_t size = 10000;
+  ASSERT_EQ(nnfw_output_size(nullptr, &size), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(size, 10000);
+}
+
+TEST_F(ValidationTestSingleSession, neg_load_model)
+{
+  // Invalid state
+  ASSERT_EQ(nnfw_load_model_from_file(
+              nullptr, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
+            NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_prepare_001)
+{
+  ASSERT_EQ(nnfw_prepare(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_query_info_u32)
+{
+  ASSERT_EQ(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, nullptr), NNFW_STATUS_ERROR);
+}
+
+TEST_F(ValidationTestSingleSession, neg_output_tensorinfo)
+{
+  nnfw_tensorinfo tensor_info;
+  ASSERT_EQ(nnfw_output_tensorinfo(nullptr, 0, &tensor_info), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(nnfw_output_tensorinfo(nullptr, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+}
+
+TEST_F(ValidationTestSingleSession, neg_experimental_input_tensorindex_session_null)
+{
+  uint32_t ind = 999;
+  ASSERT_EQ(nnfw_input_tensorindex(nullptr, "X_input", &ind), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(ind, 999);
+}
+
+TEST_F(ValidationTestSingleSession, neg_experimental_output_tensorindex_session_null)
+{
+  uint32_t ind = 999;
+  ASSERT_EQ(nnfw_output_tensorindex(nullptr, "ADD_TOP", &ind), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(ind, 999);
+}
+
+TEST_F(ValidationTestSingleSession, neg_internal_set_config)
+{
+  ASSERT_EQ(nnfw_set_config(nullptr, "TRACE_FILEPATH", ""), NNFW_STATUS_UNEXPECTED_NULL);
+  ASSERT_EQ(nnfw_set_config(nullptr, "GRAPH_DOT_DUMP", "0"), NNFW_STATUS_UNEXPECTED_NULL);
+}
diff --git a/tests/nnfw_api/src/fixtures.h b/tests/nnfw_api/src/fixtures.h
index f273d6553..0e43a2f03 100644
--- a/tests/nnfw_api/src/fixtures.h
+++ b/tests/nnfw_api/src/fixtures.h
@@ -20,8 +20,10 @@
 #include <array>
 #include <gtest/gtest.h>
 #include <nnfw_experimental.h>
+#include <nnfw_internal.h>
 
 #include "NNPackages.h"
+#include "CircleGen.h"
 
 #define NNFW_ENSURE_SUCCESS(EXPR) ASSERT_EQ((EXPR), NNFW_STATUS_NO_ERROR)
 
@@ -67,6 +69,7 @@ protected:
   {
     ValidationTestSingleSession::SetUp();
     ASSERT_EQ(nnfw_create_session(&_session), NNFW_STATUS_NO_ERROR);
+    ASSERT_NE(_session, nullptr);
   }
 
   void TearDown() override
@@ -76,19 +79,43 @@ protected:
   }
 };
 
+inline CircleBuffer genAddModel()
+{
+  CircleGen cgen;
+  std::vector<float> rhs_data{2};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int lhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, 0, "X_input"});
+  int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, rhs_buf, "y_var"});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, 0, "ADD_TOP"});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out});
+  return cgen.finish();
+}
+
 template <int PackageNo> class ValidationTestModelLoaded : public ValidationTestSessionCreated
 {
 protected:
   void SetUp() override
   {
     ValidationTestSessionCreated::SetUp();
-    ASSERT_EQ(nnfw_load_model_from_file(_session,
-                                        NNPackages::get().getModelAbsolutePath(PackageNo).c_str()),
-              NNFW_STATUS_NO_ERROR);
-    ASSERT_NE(_session, nullptr);
+    if (PackageNo == NNPackages::ADD)
+    {
+      // NOTE the circle buffer must be kept until finishing the test, so keep it as a member
+      _cbuf = genAddModel();
+      NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_session, _cbuf.buffer(), _cbuf.size()));
+    }
+    else
+    {
+      // TODO Eventually, downloaded model tests are removed.
+      NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(
+        _session, NNPackages::get().getModelAbsolutePath(PackageNo).c_str()));
+    }
   }
 
   void TearDown() override { ValidationTestSessionCreated::TearDown(); }
+
+private:
+  CircleBuffer _cbuf; // Used only for models from buffer, unused for models from files
 };
 
 template <int PackageNo>
@@ -113,8 +140,8 @@ protected:
     EXPECT_EQ(input_elements, 1);
     _input.resize(input_elements);
     ASSERT_EQ(
-        nnfw_set_input(_session, 0, ti_input.dtype, _input.data(), sizeof(float) * input_elements),
-        NNFW_STATUS_NO_ERROR);
+      nnfw_set_input(_session, 0, ti_input.dtype, _input.data(), sizeof(float) * input_elements),
+      NNFW_STATUS_NO_ERROR);
 
     nnfw_tensorinfo ti_output;
     ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti_output), NNFW_STATUS_NO_ERROR);
@@ -126,6 +153,21 @@ protected:
               NNFW_STATUS_NO_ERROR);
   }
 
+  void SetInOutBuffersDynamic(const nnfw_tensorinfo *ti_input)
+  {
+    NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, ti_input));
+    uint64_t input_elements = num_elems(ti_input);
+    _input.resize(input_elements);
+    ASSERT_EQ(
+      nnfw_set_input(_session, 0, ti_input->dtype, _input.data(), sizeof(float) * input_elements),
+      NNFW_STATUS_NO_ERROR);
+
+    _output.resize(40000); // Give sufficient size for the output
+    ASSERT_EQ(
+      nnfw_set_output(_session, 0, ti_input->dtype, _output.data(), sizeof(float) * _output.size()),
+      NNFW_STATUS_NO_ERROR);
+  }
+
 protected:
   std::vector<float> _input;
   std::vector<float> _output;
@@ -140,12 +182,14 @@ protected:
   {
     ValidationTest::SetUp();
 
-    auto model_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
     for (auto &obj : _objects)
     {
       ASSERT_EQ(nnfw_create_session(&obj.session), NNFW_STATUS_NO_ERROR);
-      ASSERT_EQ(nnfw_load_model_from_file(obj.session, model_path.c_str()), NNFW_STATUS_NO_ERROR);
+
+      auto cbuf = genAddModel();
+      NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(obj.session, cbuf.buffer(), cbuf.size()));
       ASSERT_EQ(nnfw_prepare(obj.session), NNFW_STATUS_NO_ERROR);
+      _cbufs.push_back(std::move(cbuf)); // Keep the buffer so it can outlive the session
 
       uint32_t num_inputs;
       ASSERT_EQ(nnfw_input_size(obj.session, &num_inputs), NNFW_STATUS_NO_ERROR);
@@ -188,6 +232,40 @@ protected:
 
 protected:
   std::array<SessionObject, NUM_SESSIONS> _objects;
+  std::vector<CircleBuffer> _cbufs;
+};
+
+class ValidationTestTwoSessions : public ValidationTest
+{
+protected:
+  nnfw_session *_session1 = nullptr;
+  nnfw_session *_session2 = nullptr;
+};
+
+class ValidationTestTwoSessionsCreated : public ValidationTestTwoSessions
+{
+protected:
+  void SetUp() override
+  {
+    ValidationTestTwoSessions::SetUp();
+    ASSERT_EQ(nnfw_create_session(&_session1), NNFW_STATUS_NO_ERROR);
+    ASSERT_EQ(nnfw_create_session(&_session2), NNFW_STATUS_NO_ERROR);
+    ASSERT_NE(_session1, nullptr);
+    ASSERT_NE(_session2, nullptr);
+  }
+
+  void TearDown() override
+  {
+    ASSERT_EQ(nnfw_close_session(_session1), NNFW_STATUS_NO_ERROR);
+    ASSERT_EQ(nnfw_close_session(_session2), NNFW_STATUS_NO_ERROR);
+    ValidationTestTwoSessions::TearDown();
+  }
+};
+
+class ValidationTestPipelineSession : public ValidationTest
+{
+protected:
+  nnfw_session *_session = nullptr;
 };
 
 #endif // __NNFW_API_TEST_FIXTURES_H__
diff --git a/tests/nnfw_api/src/main.cc b/tests/nnfw_api/src/main.cc
index 741c0fb47..ff04eb32c 100644
--- a/tests/nnfw_api/src/main.cc
+++ b/tests/nnfw_api/src/main.cc
@@ -31,8 +31,8 @@ int main(int argc, char **argv)
   }
   catch (std::runtime_error &e)
   {
+    std::cerr << "[WARNING] Test models are not loaded, so some tests will fail" << std::endl;
     std::cerr << e.what() << std::endl;
-    return -1;
   }
 
   return RUN_ALL_TESTS();
diff --git a/tests/nnfw_api/src/one_op_tests/Add.cc b/tests/nnfw_api/src/one_op_tests/Add.cc
deleted file mode 100644
index 281d5ded5..000000000
--- a/tests/nnfw_api/src/one_op_tests/Add.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_Add_VarToConst)
-{
-  CircleGen cgen;
-  std::vector<float> rhs_data{5, 4, 7, 4};
-  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{1, 3, 2, 4}}, {{6, 7, 9, 8}}});
-  _context->addTestCase({{{0, 1, 2, 3}}, {{5, 5, 9, 7}}});
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVar)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}});
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
-{
-  CircleGen cgen;
-  std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
-  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Add.test.cc b/tests/nnfw_api/src/one_op_tests/Add.test.cc
new file mode 100644
index 000000000..9fc0e86b6
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Add.test.cc
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Add_VarToConst)
+{
+  CircleGen cgen;
+  std::vector<float> rhs_data{5, 4, 7, 4};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{6, 7, 9, 8}}));
+  _context->addTestCase(uniformTCD<float>({{0, 1, 2, 3}}, {{5, 5, 9, 7}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarUint8)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 1);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 4);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{7, 8, 10, 9}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarInt8)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarSame)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in, in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{2, 6, 4, 8}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarSize0)
+{
+  CircleGen cgen;
+  int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int c = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({a, b, c}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{}, {}, {}}, {{}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
+{
+  CircleGen cgen;
+  std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_ThreeOperands)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_NoOutput)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in}, {}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidActivation)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}},
+                      static_cast<circle::ActivationFunctionType>(128) /* Invalid value*/);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_VarToVarSize0_InvalidShape)
+{
+  CircleGen cgen;
+  int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int c = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+  int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({a, b, c}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailCompile();
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_VarToVarInt16)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AddN.test.cc b/tests/nnfw_api/src/one_op_tests/AddN.test.cc
new file mode 100644
index 000000000..73fa82168
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/AddN.test.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_AddN_1D)
+{
+  CircleGen cgen;
+
+  int in1 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+  int in2 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+  int in3 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorAddN({{in1, in2, in3}, {out}});
+  cgen.setInputsAndOutputs({in1, in2, in3}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->addTestCase(uniformTCD<float>({{1.2, 2.0, -3.0, 4.5, 10.0, 5.1, -7.0, 12.0},
+                                           {3.3, 4.1, 3.0, 4.4, 5.0, 4.3, -1.2, 4.0},
+                                           {-5.2, 3.1, 2.2, -3.7, 5.2, 2.0, -4.3, 5.0}},
+                                          {{-0.7, 9.2, 2.2, 5.2, 20.2, 11.4, -12.5, 21.0}}));
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AddN_InvalidType)
+{
+  CircleGen cgen;
+
+  int in1 = cgen.addTensor({{8}, circle::TensorType::TensorType_UINT8});
+  int in2 = cgen.addTensor({{8}, circle::TensorType::TensorType_UINT8});
+  int in3 = cgen.addTensor({{8}, circle::TensorType::TensorType_UINT8});
+  int out = cgen.addTensor({{8}, circle::TensorType::TensorType_UINT8});
+
+  cgen.addOperatorAddN({{in1, in2, in3}, {out}});
+  cgen.setInputsAndOutputs({in1, in2, in3}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AddN_TypeDiff)
+{
+  CircleGen cgen;
+
+  int in1 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+  int in2 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+  int in3 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{8}, circle::TensorType::TensorType_INT32});
+
+  cgen.addOperatorAddN({{in1, in2, in3}, {out}});
+  cgen.setInputsAndOutputs({in1, in2, in3}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc
new file mode 100644
index 000000000..1321552db
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+struct ArgMinMaxVariationParam
+{
+  TestCaseData tcd;
+  bool is_argmax = true;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class ArgMinMaxVariation : public GenModelTest,
+                           public ::testing::WithParamInterface<ArgMinMaxVariationParam>
+{
+};
+
+// Input shape: {1, 2, 2, 1}
+// Reduce axis: 1
+// Output shape: {1, 2, 1}
+// Output type: Int32
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, ArgMinMaxVariation,
+  ::testing::Values(
+    // ArgMax, float input
+    ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}),
+                            true},
+    // ArgMax, int32 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+      circle::TensorType::TensorType_INT32},
+    // ArgMax, uint8 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+      circle::TensorType::TensorType_UINT8, 1.0, 1},
+    // ArgMax, int8 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+      circle::TensorType::TensorType_INT8, 1.0, 1},
+    // ArgMin, float input
+    ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}),
+                            false},
+    // ArgMin, int32 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+      circle::TensorType::TensorType_INT32},
+    // ArgMin, uint8 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+      circle::TensorType::TensorType_UINT8, 1.0, 1},
+    // ArgMin, int8 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+      circle::TensorType::TensorType_INT8, 1.0, 1}));
+
+TEST_P(ArgMinMaxVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
+{
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT64;
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int64_t>({1, 0}));
+  _context->setBackends({"acl_cl", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
+{
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in, axis}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({1, 4, 2, 3})
+                          .addInput<int32_t>({-3})
+                          .addOutput<int32_t>({1, 0}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis0)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailCompile();
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis1)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{-3};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{2}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_InType)
+{
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_AxisType)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<float> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType)
+{
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_FLOAT32;
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_paramType)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  const auto output_param = circle::TensorType::TensorType_INT64;
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
deleted file mode 100644
index 854517e47..000000000
--- a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_AvgPool2D)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{1, 3, 2, 4}}, {{2.5}}});
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc
new file mode 100644
index 000000000..8276ca4c1
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct AvgPool2DParam
+{
+  TestCaseData tcd;
+  std::vector<int32_t> input_shape;
+  std::vector<int32_t> output_shape;
+  struct filter_stride
+  {
+    int32_t filter_w;
+    int32_t filter_h;
+    int32_t stride_w;
+    int32_t stride_h;
+  } param = {1, 1, 1, 1};
+  struct data_type
+  {
+    circle::TensorType data_type;
+    float scale;
+    int64_t zero_point;
+  } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0};
+  std::vector<std::string> backend = {"acl_cl", "acl_neon", "cpu", "gpu_cl"};
+};
+
+class AveragePool2DVariation : public GenModelTest,
+                               public ::testing::WithParamInterface<AvgPool2DParam>
+{
+};
+
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, AveragePool2DVariation,
+  ::testing::Values(
+    // float data
+    AvgPool2DParam{
+      uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}},
+    // float data - large
+    AvgPool2DParam{uniformTCD<float>({std::vector<float>(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}),
+                   {1, 18, 36, 2},
+                   {1, 1, 2, 2},
+                   {18, 18, 18, 18}},
+    // uint8_t data
+    AvgPool2DParam{uniformTCD<uint8_t>({{2, 6, 4, 8}}, {{5}}),
+                   {1, 2, 2, 1},
+                   {1, 1, 1, 1},
+                   {2, 2, 2, 2},
+                   {circle::TensorType::TensorType_UINT8, 1.2, 3},
+                   {"acl_cl", "acl_neon", "cpu"}},
+    // uint8_t data -large
+    AvgPool2DParam{
+      uniformTCD<uint8_t>({{std::vector<uint8_t>(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}),
+      {1, 18, 36, 2},
+      {1, 1, 2, 2},
+      {18, 18, 18, 18},
+      {circle::TensorType::TensorType_UINT8, 1.2, 3},
+      {"acl_cl", "acl_neon", "cpu"}},
+    // int8_t data
+    // TODO enable acl-cl, acl-neon backend
+    AvgPool2DParam{uniformTCD<int8_t>({{2, -6, 4, -8}}, {{-2}}),
+                   {1, 2, 2, 1},
+                   {1, 1, 1, 1},
+                   {2, 2, 2, 2},
+                   {circle::TensorType::TensorType_INT8, 2.0, -1},
+                   {"cpu"}},
+    // int8_t data - large
+    // TODO enable acl-cl, acl-neon backend
+    AvgPool2DParam{
+      uniformTCD<int8_t>({{std::vector<int8_t>(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}),
+      {1, 18, 36, 2},
+      {1, 1, 2, 2},
+      {18, 18, 18, 18},
+      {circle::TensorType::TensorType_INT8, 2.0, -1},
+      {"cpu"}}));
+
+TEST_P(AveragePool2DVariation, Test)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, param.param.filter_w, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backend);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
+{
+  // 3D Tensors are not supported
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput)
+{
+  // 2D Tensors are not supported
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidPaddingType)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99),
+                                param.param.stride_w, param.param.stride_h, param.param.filter_w,
+                                param.param.filter_h, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, -1, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, param.param.filter_w, 0,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_1)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h,
+                                param.param.filter_w, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_2)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100,
+                                param.param.filter_w, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc
new file mode 100644
index 000000000..57116acef
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_notCrop_1x1)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorBatchToSpaceND({{in, block}, {out}});
+  cgen.setInputsAndOutputs({in, block}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({1, 2, 3, 4})
+                          .addInput<int32_t>({2, 2})
+                          .addOutput<float>({1, 2, 3, 4}));
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_notCrop_2x2)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{4, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorBatchToSpaceND({{in, block}, {out}});
+  cgen.setInputsAndOutputs({in, block}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<float>({1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16})
+      .addInput<int32_t>({2, 2})
+      .addOutput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}));
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_Crop)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{8, 1, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int crop = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorBatchToSpaceND({{in, block, crop}, {out}});
+  cgen.setInputsAndOutputs({in, block, crop}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<float>(
+        {0, 1, 3, 0, 9, 11, 0, 2, 4, 0, 10, 12, 0, 5, 7, 0, 13, 15, 0, 6, 8, 0, 14, 16})
+      .addInput<int32_t>({2, 2})
+      .addInput<int32_t>({0, 0, 2, 0})
+      .addOutput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}));
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_BatchToSpaceND_DifferentType)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorBatchToSpaceND({{in, block}, {out}});
+  cgen.setInputsAndOutputs({in, block}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({1, 2, 3, 4})
+                          .addInput<int32_t>({2, 2})
+                          .addOutput<int>({1, 2, 3, 4}));
+  _context->expectFailModelLoad();
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Cast.test.cc b/tests/nnfw_api/src/one_op_tests/Cast.test.cc
new file mode 100644
index 000000000..b4cfa6f8f
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Cast.test.cc
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleCastModel(circle::TensorType from_t, circle::TensorType to_t)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, from_t});
+  int out = cgen.addTensor({{1, 2, 2, 1}, to_t});
+  cgen.addOperatorCast({{in}, {out}}, from_t, to_t);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Int32ToFloat32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_INT32, circle::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int32_t>({1, 2, 3, 4}).addOutput<float>({1, 2, 3, 4}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Float32ToInt32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_FLOAT32, circle::TensorType_INT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<int32_t>({1, 2, 3, 4}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToFloat32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<float>({1, 0, 1, 1}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToUInt8)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_UINT8);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<bool>({true, false, true, true})
+                          .addOutput(std::vector<uint8_t>{1, 0, 1, 1}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToInt32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_INT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<int32_t>({1, 0, 1, 1}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Uint8ToFloat32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_UINT8, circle::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // clang-format off
+  _context->addTestCase(
+    TestCaseData{}.addInput<uint8_t>({0, 100, 200, 255})
+                  .addOutput<float>({0., 100., 200., 255.}));
+  // clang-format on
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Int64ToFloat32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_INT64, circle::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<int64_t>({-12345, 3, 100, 2147483648})
+                          .addOutput<float>({-12345., 3., 100., 2147483648.}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_AfterEqual)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int equal_out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorEqual({{lhs, rhs}, {equal_out}});
+  cgen.addOperatorCast({{equal_out}, {out}}, circle::TensorType::TensorType_BOOL,
+                       circle::TensorType::TensorType_FLOAT32);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {2, 3, 1, 4}}, {{0, 1, 0, 1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount0)
+{
+  CircleGen cgen;
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorCast({{}, {out}}, circle::TensorType::TensorType_FLOAT32,
+                       circle::TensorType::TensorType_INT32);
+  cgen.setInputsAndOutputs({}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount2)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorCast({{lhs, rhs}, {out}}, circle::TensorType::TensorType_INT32,
+                       circle::TensorType::TensorType_FLOAT32);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount0)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorCast({{in}, {}}, circle::TensorType::TensorType_INT32,
+                       circle::TensorType::TensorType_FLOAT32);
+  cgen.setInputsAndOutputs({in}, {});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount2)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorCast({{in}, {out1, out2}}, circle::TensorType::TensorType_INT32,
+                       circle::TensorType::TensorType_FLOAT32);
+  cgen.setInputsAndOutputs({in}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Concat.test.cc b/tests/nnfw_api/src/one_op_tests/Concat.test.cc
new file mode 100644
index 000000000..4f8360353
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Concat.test.cc
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Concat_ShareSubTensor)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int shared_subtensor = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int concat_out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int pad_out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {shared_subtensor}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorConcatenation({{rhs, shared_subtensor}, {concat_out}}, 3,
+                                circle::ActivationFunctionType_NONE);
+  cgen.addOperatorPad({{shared_subtensor, padding}, {pad_out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {pad_out, concat_out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{1, 3, 2, 4}, {5, 4, 7, 4}},
+    {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+struct ConcatVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class ConcatVariation : public GenModelTest,
+                        public ::testing::WithParamInterface<ConcatVariationParam>
+{
+};
+
+// Input shape: {2, 3} / {2, 3}
+// Output shape: {4, 3}
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, ConcatVariation,
+  ::testing::Values(
+    // Float
+    ConcatVariationParam{uniformTCD<float>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                           {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})},
+    // Uint8
+    ConcatVariationParam{uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+                         circle::TensorType::TensorType_UINT8, 1.0f, -2},
+    // Int8
+    ConcatVariationParam{uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                            {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+                         circle::TensorType::TensorType_INT8, 1.0f, -2},
+    // Int16
+    // TODO Enable when nnfw api support int16 type
+    // ConcatVariationParam{
+    //    uniformTCD<int16_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+    //                                  {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+    //    circle::TensorType::TensorType_INT16, 1.0f, 0},
+    // Int32
+    ConcatVariationParam{uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+                         circle::TensorType::TensorType_INT32},
+    // Int64
+    ConcatVariationParam{uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+                         circle::TensorType::TensorType_INT64}));
+
+TEST_P(ConcatVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
+{
+  CircleGen cgen;
+  int in1 = cgen.addTensor({{1, 1, 1, 20}, circle::TensorType::TensorType_FLOAT32});
+  int in2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> axis_data{3};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+
+  int s_out1 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+  int s_out2 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+  int s_out3 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+  int s_out4 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+
+  int c_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  int c_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  int c_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+
+  int a_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  int a_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  int a_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+
+  int final_out = cgen.addTensor({{1, 1, 1, 35}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorSplit({{axis, in1}, {s_out1, s_out2, s_out3, s_out4}}, 4);
+
+  cgen.addOperatorConcatenation({{s_out1, s_out2}, {c_out1}}, 3,
+                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
+  cgen.addOperatorConcatenation({{s_out1, s_out3}, {c_out2}}, 3,
+                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
+  cgen.addOperatorConcatenation({{s_out1, s_out4}, {c_out3}}, 3,
+                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+  cgen.addOperatorAdd({{c_out1, in2}, {a_out1}},
+                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{c_out2, in2}, {a_out2}},
+                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{c_out3, in2}, {a_out3}},
+                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+  cgen.addOperatorConcatenation({{s_out1, a_out1, a_out2, a_out3}, {final_out}}, 3,
+                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+  cgen.setInputsAndOutputs({in1, in2}, {s_out1, s_out2, s_out3, s_out4, c_out1, c_out2, c_out3,
+                                        a_out1, a_out2, a_out3, final_out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {
+      // inputs
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}                                           // in2
+    },
+    {
+      // outputs
+      {1, 2, 3, 4, 5},                     // s_out1
+      {6, 7, 8, 9, 10},                    // s_out2
+      {11, 12, 13, 14, 15},                // s_out3
+      {16, 17, 18, 19, 20},                // s_out4
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},     // c_out1
+      {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2
+      {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},     // a_out1
+      {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2
+      {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3
+      {1, 2, 3,  4,  5,  1,  2,  3, 4, 5, 6, 7, 8,  9,  10, 1,  2, 3,
+       4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out
+    }));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidAxis)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+  int axis = 2;
+
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidRank)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point);
+  int axis = 0;
+
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidDimension)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+  int axis = 0;
+
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc
new file mode 100644
index 000000000..46632f18d
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Conv2D)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE, 1, 1);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+    {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_Stride)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2,
+                         circle::ActivationFunctionType_NONE, 1, 1);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+    {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_Dilation)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE, 2, 2);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+    {{-52, 7}}));
+  _context->setBackends({"cpu", "ruy", "xnnpack"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  int weight =
+    cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 38, 61}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1, 0.5};
+  std::vector<int64_t> weight_zeropoints = {0, 0, 0};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 30, 60}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_U8_PerChannel)
+{
+  CircleGen cgen;
+  // weight
+  std::vector<uint8_t> weight_data{2, 6, 2, 1, 2, 3, 2, 3, 4};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> weight_scales = {.5, 1, 2};
+  std::vector<int64_t> weight_zeropoints = {2, 0, 1};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_UINT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  // bias
+  std::vector<int32_t> bias_data{4, -8, -4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1., 0);
+
+  // in and out
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 2., 1);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 4., 2);
+
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{5, 3, 7}}, {{5, 11, 24}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8_Hybrid_PerChannel)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<float> weight_scales = {0.5, 1, 0.5};
+  std::vector<int64_t> weight_zeropoints = {0, 0, 0};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{5, 5, 5}}, {{15, 30, 60}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE, 1, 1);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0,
+                         circle::ActivationFunctionType_NONE, 1, 1);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE, 0, 0);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  int weight =
+    cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1, 0.5};
+  std::vector<int64_t> weight_zeropoints = {0, 0, 10};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_Hybrid_PerTensor)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  // Hybrid does not support per-tensor.
+  std::vector<float> weight_scales = {0.5};
+  std::vector<int64_t> weight_zeropoints = {0};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Cos.cc b/tests/nnfw_api/src/one_op_tests/Cos.cc
deleted file mode 100644
index 72bfe3e2f..000000000
--- a/tests/nnfw_api/src/one_op_tests/Cos.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_Cos)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorCos({{in}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  const float pi = 3.141592653589793;
-  _context->addTestCase({{{0, pi / 2, pi, 7}}, {{1, 0, -1, 0.75390225434}}});
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cos_TwoOperand)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorCos({{lhs, rhs}, {out1, out2}});
-  cgen.setInputsAndOutputs({lhs, rhs}, {out1, out2});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Cos.test.cc b/tests/nnfw_api/src/one_op_tests/Cos.test.cc
new file mode 100644
index 000000000..03944746a
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Cos.test.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Cos)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorCos({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  const float pi = 3.141592653589793;
+  _context->addTestCase(uniformTCD<float>({{0, pi / 2, pi, 7}}, {{1, 0, -1, 0.75390225434}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cos_TwoOperand)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorCos({{lhs, rhs}, {out1, out2}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc
new file mode 100644
index 000000000..ad2272996
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct DepthToSpaceVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class DepthToSpaceVariation : public GenModelTest,
+                              public ::testing::WithParamInterface<DepthToSpaceVariationParam>
+{
+};
+
+// Input shape: {1, 1, 2, 4}
+// Block size: 2
+// Output shape: {1, 2, 4, 1}
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, DepthToSpaceVariation,
+  ::testing::Values(
+    // Float
+    DepthToSpaceVariationParam{
+      uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})},
+    // Int32
+    DepthToSpaceVariationParam{
+      uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+      circle::TensorType::TensorType_INT32},
+    // Int64
+    DepthToSpaceVariationParam{
+      uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+      circle::TensorType::TensorType_INT64},
+    // Uint8
+    DepthToSpaceVariationParam{
+      uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+      circle::TensorType::TensorType_UINT8, 1.0f, -2},
+    // Int8
+    DepthToSpaceVariationParam{
+      uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+      circle::TensorType::TensorType_INT8, 1.0f, -2}));
+
+TEST_P(DepthToSpaceVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
+  cgen.addOperatorDepthToSpace({{in}, {out}}, 2);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(DepthToSpaceVariation, neg_Blocksize)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
+  cgen.addOperatorDepthToSpace({{in}, {out}}, -2);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc
new file mode 100644
index 000000000..55f43dcaf
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc
@@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{1, 2, 3, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}},
+                                          {{71, -34, 99, -20, 91, -26, 127, -4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0.5f, -0.5f};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+                      {{16.5f, 27.5f, 28.5f, 43.5f, 8.5f, 15.5f, 12.5f, 23.5f}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier_RELU6)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0.5f, -0.5f};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+                                  circle::ActivationFunctionType_RELU6);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+                                          {{6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_3x3)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                                 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0.0f, 0.0f};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+                      {{6.0f, 16.0f, 8.0f, 16.0f, 10.0f, 16.0f, 12.0f, 16.0f}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE, 2, 2);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{
+                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+                                            0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                          }},
+                                          {{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation_N_Stride)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 6, 6, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, 1,
+                                  circle::ActivationFunctionType_NONE, 3, 3);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
+                                            0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+                                          {{4, 0, 3, 0, 0, 0, 2, 0, 1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_U8_PerChannel)
+{
+  CircleGen cgen;
+  // weight
+  // clang-format off
+  std::vector<uint8_t> weight_data{2, 1, 2,
+                                   6, 2, 3,
+                                   2, 3, 4,
+                                   4, 4, 5};
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> weight_scales = {.5, 1, 2};
+  std::vector<int64_t> weight_zeropoints = {2, 0, 1};
+  int weight = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_UINT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  // bias
+  std::vector<int32_t> bias_data{4, -8, -4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1., 0);
+
+  // in and out
+  int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_UINT8}, 2., 1);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 4., 2);
+
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 1,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // clang-format off
+  _context->addTestCase(uniformTCD<uint8_t>({{5, 5, 5,  // NHWC
+                                              3, 3, 3,
+                                              7, 7, 7,
+                                              9, 9, 9}
+                                            },
+                                            {{9,
+                                              27,
+                                              56}
+                                            }));
+  // clang-format on
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_I8_Hybrid_PerChannel)
+{
+  CircleGen cgen;
+  // weight
+  // clang-format off
+  std::vector<int8_t> weight_data{1, 2, 1, 2,      -9,  10, -9,  10,
+                                  5, 6, 5, 6,      13, -14, 13, -14};
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> weight_scales = {1, 1, 1, 1};
+  std::vector<int64_t> weight_zeropoints = {0, 0, 0, 0};
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  // bias
+  std::vector<float> bias_data{0, 1, 2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+
+  // in and out
+  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // clang-format off
+  _context->addTestCase(uniformTCD<float>({{0, 1,     2, 3,
+                                            0, 1,     2, 3,
+                                            0, 1,     2, 3}},
+                                          {{8, -7, 20, -1,
+                                            8, -7, 20, -1}}));
+  // clang-format on
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Stride)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{1, 2, 3, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 0, 0, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Dilation)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE, 0, 0);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Type)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{1, 2, 3, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_UINT8});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+// Generate a model for negative test cases
+CircleBuffer genNegTestDepthwiseConv2DModel(circle::Padding padding, int stride_w, int stride_h,
+                                            int depth_multiplier,
+                                            circle::ActivationFunctionType actfn)
+{
+  CircleGen cgen;
+  uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3});
+  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>{0, 0});
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{2}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType_UINT8}, 1, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, padding, stride_w, stride_h,
+                                  depth_multiplier, actfn, 0, 0);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+template <typename T> struct DepthwiseConv2DQuantTestParam
+{
+  int stride = 1; // Used for both height and width
+  int input_depth = 1;
+  int depth_multiplier = 1;
+  std::vector<T> ref_output;
+};
+
+template <typename T>
+class DepthwiseConv2DQuantTest
+  : public GenModelTest,
+    public ::testing::WithParamInterface<DepthwiseConv2DQuantTestParam<T>>
+{
+};
+
+using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
+using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
+
+// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
+// kernels.
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, DepthwiseConv2DQuantTestU8,
+  ::testing::Values(
+    // Stride == 1
+    DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+    DepthwiseConv2DQuantTestParamU8{
+      1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+    DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
+    DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector<uint8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector<uint8_t>{2, 4}},
+    DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamU8{
+      1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+    DepthwiseConv2DQuantTestParamU8{1, 12, 1,
+                                    std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+    // Stride == 2
+    DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector<uint8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+                                                                   5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+                                                                   3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{
+      2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{
+      2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamU8{
+      2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamU8{
+      2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+
+CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
+{
+  assert(1 <= stride && stride <= 2);
+  assert(1 <= input_depth && input_depth <= 16);
+  assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+  const int output_depth = input_depth * depth_multiplier;
+  assert(1 <= output_depth && output_depth <= 32);
+
+  CircleGen cgen;
+  uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestU8, Test)
+{
+  // Same input is used for all tests but output differs
+  static const std::vector<uint8_t> input64{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+  auto &param = GetParam();
+  _context = std::make_unique<GenModelTestContext>(
+    genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
+  std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+  _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
+using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
+
+// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
+// kernels.
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, DepthwiseConv2DQuantTestI8,
+  ::testing::Values(
+    // Stride == 1
+    DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector<int8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+    DepthwiseConv2DQuantTestParamI8{
+      1, 2, 8, std::vector<int8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+    DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector<int8_t>{0, 1, 4, 6}},
+    DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector<int8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector<int8_t>{2, 4}},
+    DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector<int8_t>{0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamI8{
+      1, 4, 4, std::vector<int8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+    DepthwiseConv2DQuantTestParamI8{1, 12, 1,
+                                    std::vector<int8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+    // Stride == 2
+    DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector<int8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+                                                                  5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+                                                                  3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 1, 20, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 1, 16, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 8, 2, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+
+CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+{
+  assert(1 <= stride && stride <= 2);
+  assert(1 <= input_depth && input_depth <= 16);
+  assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+  const int output_depth = input_depth * depth_multiplier;
+  assert(1 <= output_depth && output_depth <= 32);
+
+  CircleGen cgen;
+  uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestI8, Test)
+{
+  // Same input is used for all tests but output differs
+  static const std::vector<int8_t> input64{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+  auto &param = GetParam();
+  _context = std::make_unique<GenModelTestContext>(
+    genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
+  std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+  _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
+{
+  _context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
+    static_cast<circle::Padding>(99), 1, 1, 1, circle::ActivationFunctionType_NONE));
+  _context->expectFailModelLoad();
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+  SUCCEED();
+}
+
+// TODO add other invalid operation tests like above
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1};
+  std::vector<int64_t> weight_zeropoints = {0, 10};
+  int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_Hybrid_PerTensor)
+{
+  // PerTensor Quantized Weight is not supported
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  // Hybrid does not support per-tensor.
+  std::vector<float> weight_scales = {0.5};
+  std::vector<int64_t> weight_zeropoints = {0};
+  int weight = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                                  /* depth_multiplier */ 1, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailCompile();
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc
new file mode 100644
index 000000000..188638bbb
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_DetectionPostProcess_SingleBox)
+{
+  CircleGen cgen;
+
+  int boxes = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int scores = cgen.addTensor({{1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+  int anchors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+
+  int box_coors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int box_classes = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int box_scores = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int num_selected = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorDetectionPostProcess(
+    {{boxes, scores, anchors}, {box_coors, box_classes, box_scores, num_selected}}, 1, 10, 10, 5, 5,
+    0.8, 0.5, 1, 1, 1);
+  cgen.setInputsAndOutputs({boxes, scores, anchors},
+                           {box_coors, box_classes, box_scores, num_selected});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0}, {0, 0.9}, {0, 0, 1, 1}},
+                                          {{-0.5, -0.5, 0.5, 0.5}, {0}, {0.9}, {1}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DetectionPostProcess_SinglBox_MultiClasses)
+{
+  CircleGen cgen;
+
+  int boxes = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int scores = cgen.addTensor({{1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  int anchors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+
+  int box_coors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int box_classes = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int box_scores = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int num_selected = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorDetectionPostProcess(
+    {{boxes, scores, anchors}, {box_coors, box_classes, box_scores, num_selected}}, 2, 10, 10, 5, 5,
+    0.8, 0.5, 1, 1, 1);
+  cgen.setInputsAndOutputs({boxes, scores, anchors},
+                           {box_coors, box_classes, box_scores, num_selected});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0}, {0, 0.7, 0.9}, {0, 0, 1, 1}},
+                                          {{-0.5, -0.5, 0.5, 0.5}, {1}, {0.9}, {1}}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Elu.test.cc b/tests/nnfw_api/src/one_op_tests/Elu.test.cc
new file mode 100644
index 000000000..a037070b2
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Elu.test.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Elu)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorElu({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{0, -6, 2, -4, 3, -2, 10, -0.1}},
+                      {{0.0, -0.997521, 2.0, -0.981684, 3.0, -0.864665, 10.0, -0.0951626}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Elu_Type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_UINT8}, 1.0f, 0);
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorElu({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Equal.test.cc b/tests/nnfw_api/src/one_op_tests/Equal.test.cc
new file mode 100644
index 000000000..da890978e
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Equal.test.cc
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct EqualVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class EqualVariation : public GenModelTest,
+                       public ::testing::WithParamInterface<EqualVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, EqualVariation,
+                         ::testing::Values(
+                           // Float type
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                                                 .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                 .addOutput<bool>({true, false, false, false})},
+                           // Float type - broadcast
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                                                 .addInput<float>({0.3})
+                                                 .addOutput<bool>({false, true, false, false})},
+                           // Int32 type
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<int32_t>({1, 3, 5, 7})
+                                                 .addInput<int32_t>({1, 2, 3, 4})
+                                                 .addOutput<bool>({true, false, false, false}),
+                                               circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<int32_t>({1, 3, 5, 7})
+                                                 .addInput<int32_t>({5})
+                                                 .addOutput<bool>({false, false, true, false}),
+                                               circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<int64_t>({1, 3, 5, 7})
+                                                 .addInput<int64_t>({1, 2, 3, 4})
+                                                 .addOutput<bool>({true, false, false, false}),
+                                               circle::TensorType::TensorType_INT64,
+                                               {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<int64_t>({1, 3, 5, 7})
+                                                 .addInput<int64_t>({1})
+                                                 .addOutput<bool>({true, false, false, false}),
+                                               circle::TensorType::TensorType_INT64,
+                                               {"cpu"}},
+                           // Bool type
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<bool>({true, true, false, false})
+                                                 .addInput<bool>({true, false, true, false})
+                                                 .addOutput<bool>({true, false, false, true}),
+                                               circle::TensorType::TensorType_BOOL},
+                           // Bool type - broadcast
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<bool>({true, true, false, false})
+                                                 .addInput<bool>({true})
+                                                 .addOutput<bool>({true, true, false, false}),
+                                               circle::TensorType::TensorType_BOOL}
+
+                           ));
+
+TEST_P(EqualVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Equal_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Equal_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc b/tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc
new file mode 100644
index 000000000..280cf7344
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_ExpandDims)
+{
+  CircleGen cgen;
+
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorExpandDims({{in, axis}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<float>({0.1, 0.3, 0.5, 0.7}).addOutput<float>({0.1, 0.3, 0.5, 0.7}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ExpandDims_Int64AxisNeg)
+{
+  CircleGen cgen;
+
+  std::vector<int64_t> axis_data{-1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT64, axis_buf});
+  int out = cgen.addTensor({{1, 4, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorExpandDims({{in, axis}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<float>({0.1, 0.3, 0.5, 0.7}).addOutput<float>({0.1, 0.3, 0.5, 0.7}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_neg_ExpandDims_Axis)
+{
+  CircleGen cgen;
+
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorExpandDims({{in, axis}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_neg_ExpandDims_AxisNegInput)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorExpandDims({{in, axis}, {out}});
+  cgen.setInputsAndOutputs({in, axis}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                          .addInput<int32_t>({-5})
+                          .addOutput<float>({0.1, 0.3, 0.5, 0.7})
+                          .expectFailRun());
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Fill.test.cc b/tests/nnfw_api/src/one_op_tests/Fill.test.cc
new file mode 100644
index 000000000..0d34056b3
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Fill.test.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct FillVariationParam
+{
+  TestCaseData tcd;
+  const uint8_t *value_data = nullptr;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+};
+
+class FillVariation : public GenModelTest, public ::testing::WithParamInterface<FillVariationParam>
+{
+};
+
+// value is constant
+TEST_P(FillVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  size_t value_size =
+    (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t);
+  uint32_t value_buf = cgen.addBuffer(param.value_data, value_size);
+
+  int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int value = cgen.addTensor({{1}, param.data_type, value_buf});
+  int out = cgen.addTensor({{2, 3}, param.data_type});
+  cgen.addOperatorFill({{dims, value}, {out}});
+  cgen.setInputsAndOutputs({dims}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+const int32_t test_int32 = 13;
+const int64_t test_int64 = 1052;
+const float test_float = 5.2;
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, FillVariation,
+  ::testing::Values(
+    // float value
+    FillVariationParam{
+      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}),
+      reinterpret_cast<const uint8_t *>(&test_float)},
+    // int32 value
+    FillVariationParam{
+      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}),
+      reinterpret_cast<const uint8_t *>(&test_int32), circle::TensorType::TensorType_INT32},
+    // uint8 value
+    FillVariationParam{
+      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({1052, 1052, 1052, 1052, 1052,
+                                                                   1052}),
+      reinterpret_cast<const uint8_t *>(&test_int64), circle::TensorType::TensorType_INT64}));
+
+TEST_F(GenModelTest, OneOp_Fill_Int64_Shape)
+{
+  CircleGen cgen;
+  std::vector<float> value_data{1.3};
+  uint32_t value_buf = cgen.addBuffer(value_data);
+
+  int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64});
+  int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFill({{dims, value}, {out}});
+  cgen.setInputsAndOutputs({dims}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int64_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Int32_oneoperand)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorFill({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Int64_oneoperand)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64});
+  cgen.addOperatorFill({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({13, 13, 13, 13, 13, 13}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Float32_oneoperand)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFill({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Floor.test.cc b/tests/nnfw_api/src/one_op_tests/Floor.test.cc
new file mode 100644
index 000000000..dcb402027
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Floor.test.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Floor_4D)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorFloor({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1.2, -1.2, 2.6, -2.6}}, {{1.0, -2.0, 2.0, -3.0}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Floor_InvalidType)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+
+  cgen.addOperatorFloor({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc b/tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc
new file mode 100644
index 000000000..edbca8504
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_FloorDiv_VarToVar_Float)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFloorDiv({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{1.0, 2.0, -6.8, 24.2}, {1.0, 2.0, 3.0, 4.0}}, {{1.0, 1.0, -3.0, 6.0}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_FloorDiv_VarToVar_Float_Broadcast)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFloorDiv({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1.0, 2.0, -6.8, 24.2}, {2.0}}, {{0.0, 1.0, -4, 12.0}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_FloorDiv_VarToVar_InvalidDivisor1)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFloorDiv({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_FloorDiv_Broadcast_InvalidDivisor1)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFloorDiv({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_FloorDiv_VarToVar_Int)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorFloorDiv({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<int32_t>({{10, 20, -68, 242}, {1, 2, 3, 4}}, {{10, 10, -23, 60}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_FloorDiv_VarToVar_Int_Broadcast)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorFloorDiv({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int32_t>({{10, 20, -67, 242}, {2}}, {{5, 10, -34, 121}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_FloorDiv_VarToVar_InvalidDivisor2)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorFloorDiv({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_FloorDiv_Broadcast_InvalidDivisor2)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorFloorDiv({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc b/tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc
new file mode 100644
index 000000000..791787f9b
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_FullyConnected)
+{
+  CircleGen cgen;
+  // clang-format off
+  std::vector<float> weight_data{ 1, 0, 0, 1,
+                                  2, 0, 0, -1,
+                                  3, 0, 0, 2,
+                                  4, 0, 0, 1,
+                                  1, 0, 0, 1,
+                                  2, 0, 0, -1,
+                                  3, 0, 0, 2,
+                                  4, 0, 0, 1,
+                                  1, 0, 0, 1,
+                                  2, 0, 0, -1,
+                                  3, 0, 0, 2,
+                                  4, 0, 0, 1,
+                                  1, 0, 0, 1,
+                                  2, 0, 0, -1,
+                                  3, 0, 0, 2,
+                                  4, 0, 0, 1 };
+  std::vector<float> bias_data{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int input = cgen.addTensor({{1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{16, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{16}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int output = cgen.addTensor({{1, 16}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFullyConnected({{input, weight, bias}, {output}});
+  cgen.setInputsAndOutputs({input}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
+  _context->setBackends({"cpu", "acl_neon", "xnnpack", "ruy"});
+
+  SUCCEED();
+}
+
+#if defined(__aarch64__)
+TEST_F(GenModelTest, OneOp_FullyConnectedShuffled16x1Float32)
+{
+  CircleGen cgen;
+  // clang-format off
+  std::vector<float> weight_data{ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
+                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                  1, -1, 2, 1, 1, -1, 2, 1, 1, -1, 2, 1, 1, -1, 2, 1 };
+  std::vector<float> bias_data{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int input = cgen.addTensor({{1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{16, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{16}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int output = cgen.addTensor({{1, 16}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFullyConnected({{input, weight, bias}, {output}},
+                                 circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32);
+  cgen.setInputsAndOutputs({input}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+#endif
+
+// Failure is expected except for aarch64 and cpu backend
+TEST_F(GenModelTest, OneOp_neg_FullyConnectedShuffled16x1Float32)
+{
+  CircleGen cgen;
+  // clang-format off
+  std::vector<float> weight_data{ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
+                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                  1, -1, 2, 1, 1, -1, 2, 1, 1, -1, 2, 1, 1, -1, 2, 1,
+                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  std::vector<float> bias_data{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int input = cgen.addTensor({{1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{16, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{16}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int output = cgen.addTensor({{1, 16}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFullyConnected({{input, weight, bias}, {output}},
+                                 circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32);
+  cgen.setInputsAndOutputs({input}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  auto tc = uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}});
+  _context->addTestCase(tc);
+  _context->setBackends({"acl_neon", "acl_cl"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_FullyConnected16x1Sparse)
+{
+  CircleGen cgen;
+  // clang-format off
+  std::vector<float> weight_data{ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
+                                  1, -1, 2, 1, 1, -1, 2, 1, 1, -1, 2, 1, 1, -1, 2, 1};
+  std::vector<float> bias_data{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int input = cgen.addTensor({{1, 4}, circle::TensorType::TensorType_FLOAT32});
+  CircleGen::SparsityParams sp{
+    {0, 1, 2, 3},
+    {0, 1},
+    {{CircleGen::SparseDimensionType::DimensionType_DENSE, 1},
+     {CircleGen::SparseDimensionType::DimensionType_SPARSE_CSR, {0, 2}, {0, 3}},
+     {CircleGen::SparseDimensionType::DimensionType_DENSE, 16},
+     {CircleGen::SparseDimensionType::DimensionType_DENSE, 1}}};
+  int weight = cgen.addTensor({{16, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}, sp);
+  int bias = cgen.addTensor({{16}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int output = cgen.addTensor({{1, 16}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFullyConnected({{input, weight, bias}, {output}});
+  cgen.setInputsAndOutputs({input}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_FullyConnected_OptionalBias)
+{
+  CircleGen cgen;
+  // clang-format off
+  std::vector<float> weight_data{ -1,  4,  0,  3,
+                                   1,  4,  0, -1,
+                                   3, -1,  0, -1,
+                                  -1,  3,  4,  4,
+                                   4,  0,  4,  0,
+                                   4,  1, -1,  1,
+                                   2,  2, -2, -1,
+                                   4, -1, -2,  3 };
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  int input = cgen.addTensor({{2, 4}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{8, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int output = cgen.addTensor({{2, 8}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFullyConnected({{input, weight, -1 /* Optional bias */}, {output}});
+  cgen.setInputsAndOutputs({input}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
+                      {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "ruy"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_FullyConnected_NoBias)
+{
+  CircleGen cgen;
+  // clang-format off
+  std::vector<float> weight_data{ -1,  4,  0,  3,
+                                   1,  4,  0, -1,
+                                   3, -1,  0, -1,
+                                  -1,  3,  4,  4,
+                                   4,  0,  4,  0,
+                                   4,  1, -1,  1,
+                                   2,  2, -2, -1,
+                                   4, -1, -2,  3 };
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  int input = cgen.addTensor({{2, 4}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{8, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int output = cgen.addTensor({{2, 8}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFullyConnected({{input, weight /* Missing bias */}, {output}});
+  cgen.setInputsAndOutputs({input}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
+                      {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "ruy"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Greater.test.cc b/tests/nnfw_api/src/one_op_tests/Greater.test.cc
new file mode 100644
index 000000000..b63075c0e
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Greater.test.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct GreaterVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class GreaterVariation : public GenModelTest,
+                         public ::testing::WithParamInterface<GreaterVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, GreaterVariation,
+                         ::testing::Values(
+                           // Float type
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                   .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                   .addOutput<bool>({false, true, false, true})},
+                           // Float type - broadcast
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                   .addInput<float>({0.3})
+                                                   .addOutput<bool>({false, false, false, true})},
+                           // Int32 type
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<int32_t>({1, 3, 2, 7})
+                                                   .addInput<int32_t>({1, 2, 3, 4})
+                                                   .addOutput<bool>({false, true, false, true}),
+                                                 circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<int32_t>({1, 3, 2, 7})
+                                                   .addInput<int32_t>({5})
+                                                   .addOutput<bool>({false, false, false, true}),
+                                                 circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<int64_t>({1, 3, -2, 7})
+                                                   .addInput<int64_t>({1, 2, 3, 4})
+                                                   .addOutput<bool>({false, true, false, true}),
+                                                 circle::TensorType::TensorType_INT64,
+                                                 {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<int64_t>({1, 3, -2, 7})
+                                                   .addInput<int64_t>({1})
+                                                   .addOutput<bool>({false, true, false, true}),
+                                                 circle::TensorType::TensorType_INT64,
+                                                 {"cpu"}}));
+
+TEST_P(GreaterVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorGreater({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Greater_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorGreater({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Greater_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorGreater({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc b/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc
new file mode 100644
index 000000000..f824030e0
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct GreaterEqualVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class GreaterEqualVariation : public GenModelTest,
+                              public ::testing::WithParamInterface<GreaterEqualVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, GreaterEqualVariation,
+  ::testing::Values(
+    // Float type
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                 .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                 .addOutput<bool>({true, true, false, true})},
+    // Float type - broadcast
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                 .addInput<float>({0.3})
+                                 .addOutput<bool>({false, true, false, true})},
+    // Int32 type
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<int32_t>({1, 3, 2, 7})
+                                 .addInput<int32_t>({1, 2, 3, 4})
+                                 .addOutput<bool>({true, true, false, true}),
+                               circle::TensorType::TensorType_INT32},
+    // Int32 type - broadcast
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<int32_t>({1, 3, 2, 7})
+                                 .addInput<int32_t>({5})
+                                 .addOutput<bool>({false, false, false, true}),
+                               circle::TensorType::TensorType_INT32},
+    // Int64 type
+    // NYI: acl backend
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<int64_t>({1, 3, -2, 7})
+                                 .addInput<int64_t>({1, 2, 3, 4})
+                                 .addOutput<bool>({true, true, false, true}),
+                               circle::TensorType::TensorType_INT64,
+                               {"cpu"}},
+    // Int64 type - broadcast
+    // NYI: acl backend
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<int64_t>({1, 3, -2, 7})
+                                 .addInput<int64_t>({1})
+                                 .addOutput<bool>({true, true, false, true}),
+                               circle::TensorType::TensorType_INT64,
+                               {"cpu"}}));
+
+TEST_P(GreaterEqualVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_GreaterEqual_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_GreaterEqual_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/If.test.cc b/tests/nnfw_api/src/one_op_tests/If.test.cc
new file mode 100644
index 000000000..543d87980
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/If.test.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_If)
+{
+  // The model looks just like the below pseudocode
+  //
+  // function model(x)
+  // {
+  //   if (x < 0.0)
+  //     return -100.0;
+  //   else
+  //     return 100.0;
+  // }
+
+  CircleGen cgen;
+
+  // constant buffers
+  std::vector<float> comp_data{0.0};
+  uint32_t comp_buf = cgen.addBuffer(comp_data);
+  std::vector<float> then_data{-100};
+  uint32_t then_buf = cgen.addBuffer(then_data);
+  std::vector<float> else_data{100};
+  uint32_t else_buf = cgen.addBuffer(else_data);
+
+  // primary subgraph
+  {
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int comp = cgen.addTensor({{1}, circle::TensorType_FLOAT32, comp_buf});
+    int cond = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, comp}, {cond}});
+
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorIf({{cond}, {ret}}, 1, 2);
+
+    cgen.setInputsAndOutputs({x}, {ret});
+  }
+
+  // then subgraph
+  {
+    cgen.nextSubgraph();
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
+    cgen.setInputsAndOutputs({}, {ret});
+  }
+
+  // else subgraph
+  {
+    cgen.nextSubgraph();
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
+    cgen.setInputsAndOutputs({}, {ret});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{-1.0}}, {{-100.0}}));
+  _context->addTestCase(uniformTCD<float>({{1.0}}, {{100.0}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+class IfWrongSubgraphIndex : public GenModelTest,
+                             public ::testing::WithParamInterface<std::pair<int, int>>
+{
+};
+
+TEST_P(IfWrongSubgraphIndex, neg_Test)
+{
+  // These values must be less than 0 or greater than 2
+  int then_subg = GetParam().first;
+  int else_subg = GetParam().second;
+
+  // When If operation's subgraph index is invalid
+
+  CircleGen cgen;
+
+  // constant buffers
+  std::vector<float> then_data{-100};
+  uint32_t then_buf = cgen.addBuffer(then_data);
+  std::vector<float> else_data{100};
+  uint32_t else_buf = cgen.addBuffer(else_data);
+
+  // primary subgraph
+  {
+    int x = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorIf({{x}, {ret}}, then_subg, else_subg);
+
+    cgen.setInputsAndOutputs({x}, {ret});
+  }
+
+  // then subgraph
+  {
+    cgen.nextSubgraph();
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
+    cgen.setInputsAndOutputs({}, {ret});
+  }
+
+  // else subgraph
+  {
+    cgen.nextSubgraph();
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
+    cgen.setInputsAndOutputs({}, {ret});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, IfWrongSubgraphIndex,
+                         ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
+                                           std::make_pair(1, 99), std::make_pair(1, -99),
+                                           std::make_pair(-99, 99)));
diff --git a/tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc b/tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc
new file mode 100644
index 000000000..6569ced21
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_InstanceNorm)
+{
+  CircleGen cgen;
+  uint32_t beta_buf = cgen.addBuffer(std::vector<float>{1});
+  uint32_t gamma_buf = cgen.addBuffer(std::vector<float>{2});
+  int beta = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, beta_buf});
+  int gamma = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, gamma_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorInstanceNorm({{in, beta, gamma}, {out}}, 0, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 1, 1, 1}}, {{2, 2, 2, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_InstanceNorm_InvalidActivation)
+{
+  CircleGen cgen;
+  uint32_t beta_buf = cgen.addBuffer(std::vector<float>{1});
+  uint32_t gamma_buf = cgen.addBuffer(std::vector<float>{2});
+  int beta = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, beta_buf});
+  int gamma = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, gamma_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorInstanceNorm({{in, beta, gamma}, {out}}, 0,
+                               static_cast<circle::ActivationFunctionType>(128) /* Invalid value*/);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc b/tests/nnfw_api/src/one_op_tests/L2Normalization.cc
deleted file mode 100644
index 8b4b8f5b6..000000000
--- a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_L2Normalization)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorL2Normalization({{in}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}},
-                         {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0,
-                           0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}}});
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc b/tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc
new file mode 100644
index 000000000..f825fec5c
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_L2Normalization)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorL2Normalization({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}},
+                      {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0,
+                        0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc b/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc
deleted file mode 100644
index 9db911734..000000000
--- a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_LeakyRelu)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorLeakyRelu({{in}, {out}}, 0.5);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}}});
-  _context->setBackends({"acl_cl", "acl_neon"});
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc b/tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc
new file mode 100644
index 000000000..cb3af4ee2
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_LeakyRelu)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorLeakyRelu({{in}, {out}}, 0.5);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}}));
+  _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_LeakyRelu_InvalidType)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_UINT8});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorLeakyRelu({{in}, {out}}, 0.5);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Less.test.cc b/tests/nnfw_api/src/one_op_tests/Less.test.cc
new file mode 100644
index 000000000..6f76465ae
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Less.test.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct LessVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class LessVariation : public GenModelTest, public ::testing::WithParamInterface<LessVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, LessVariation,
+                         ::testing::Values(
+                           // Float type
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                .addOutput<bool>({false, false, true, false})},
+                           // Float type - broadcast
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                .addInput<float>({0.3})
+                                                .addOutput<bool>({true, false, true, false})},
+                           // Int32 type
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<int32_t>({1, 3, 2, 7})
+                                                .addInput<int32_t>({1, 2, 3, 4})
+                                                .addOutput<bool>({false, false, true, false}),
+                                              circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<int32_t>({1, 3, 2, 7})
+                                                .addInput<int32_t>({5})
+                                                .addOutput<bool>({true, true, true, false}),
+                                              circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<int64_t>({1, 3, -2, 7})
+                                                .addInput<int64_t>({1, 2, 3, 4})
+                                                .addOutput<bool>({false, false, true, false}),
+                                              circle::TensorType::TensorType_INT64,
+                                              {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<int64_t>({1, 3, -2, 7})
+                                                .addInput<int64_t>({1})
+                                                .addOutput<bool>({false, false, true, false}),
+                                              circle::TensorType::TensorType_INT64,
+                                              {"cpu"}}));
+
+TEST_P(LessVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorLess({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Less_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorLess({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Less_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorLess({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc b/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc
new file mode 100644
index 000000000..e0e6d6698
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct LessEqualVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class LessEqualVariation : public GenModelTest,
+                           public ::testing::WithParamInterface<LessEqualVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, LessEqualVariation,
+                         ::testing::Values(
+                           // Float type
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                     .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                     .addOutput<bool>({true, false, true, false})},
+                           // Float type - broadcast
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                     .addInput<float>({0.3})
+                                                     .addOutput<bool>({true, true, true, false})},
+                           // Int32 type
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<int32_t>({1, 3, 2, 7})
+                                                     .addInput<int32_t>({1, 2, 3, 4})
+                                                     .addOutput<bool>({true, false, true, false}),
+                                                   circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<int32_t>({1, 3, 2, 7})
+                                                     .addInput<int32_t>({5})
+                                                     .addOutput<bool>({true, true, true, false}),
+                                                   circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<int64_t>({1, 3, -2, 7})
+                                                     .addInput<int64_t>({1, 2, 3, 4})
+                                                     .addOutput<bool>({true, false, true, false}),
+                                                   circle::TensorType::TensorType_INT64,
+                                                   {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<int64_t>({1, 3, -2, 7})
+                                                     .addInput<int64_t>({1})
+                                                     .addOutput<bool>({true, false, true, false}),
+                                                   circle::TensorType::TensorType_INT64,
+                                                   {"cpu"}}));
+
+TEST_P(LessEqualVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_LessEqual_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_LessEqual_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc b/tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc
new file mode 100644
index 000000000..5834fa53a
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_LogSoftmax)
+{
+  // NOTE For tf lite the params are fixed as:
+  // beta = 1.0, axis = -1
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 1, 4, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1, 4, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorLogSoftmax({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->addTestCase(uniformTCD<float>(
+    {{0, -6, 2, 4, 3, -2, 10, 1}},
+    {{-.00247565, -6.00247, -2.12692, -.126928, -.00671534, -5.00671, -.000123374, -9.00012}}));
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_LogSoftmax_InvalidModel)
+{
+  CircleGen cgen;
+  int out = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorLogSoftmax({{}, {out}}); // No input tensor
+  cgen.setInputsAndOutputs({}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Mean.test.cc b/tests/nnfw_api/src/one_op_tests/Mean.test.cc
new file mode 100644
index 000000000..6293d3837
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Mean.test.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleBuffer genSimpleMeanModel()
+{
+  CircleGen cgen;
+  uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{1, 2});
+  int in = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMean({{in, axis}, {out}}, true);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_F(GenModelTest, OneOp_Mean)
+{
+  auto model = genSimpleMeanModel();
+  _context = std::make_unique<GenModelTestContext>(std::move(model));
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8, 9}}, {{5}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+CircleBuffer genWrongMeanModel()
+{
+  CircleGen cgen;
+  uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{1, 2});
+  int in = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_BOOL});
+  int axis = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorMean({{in, axis}, {out}}, true);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mean)
+{
+  auto model = genWrongMeanModel();
+  _context = std::make_unique<GenModelTestContext>(std::move(model));
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8, 9}}, {{5}}));
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Mul.test.cc b/tests/nnfw_api/src/one_op_tests/Mul.test.cc
new file mode 100644
index 000000000..0c7944613
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Mul.test.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Mul_Uint8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{3, 12, 5, 2}, {5, 4, 7, 0}}, {{2, 110, 50, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Mul_Int8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{-14, -34, -6, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_MulBroadcast_Uint8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{3, 12, 5, 4}, {5}}, {{2, 146, 34, 18}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_MulBroadcast_Int8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{-14, 2, -6, 10}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_InvalidShape)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMul({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_OneOperand)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMul({{in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mul_ThreeOperands)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorMul({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Neg.test.cc b/tests/nnfw_api/src/one_op_tests/Neg.test.cc
new file mode 100644
index 000000000..7bc0cc452
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Neg.test.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Neg_Float32)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorNeg({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1.1, -2.2, 3.3, -4.4}}, {{-1.1, 2.2, -3.3, 4.4}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Neg_Int32)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorNeg({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int32_t>({{1, -2, 3, -4}}, {{-1, 2, -3, 4}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Neg_Int64)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT64});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT64});
+  cgen.addOperatorNeg({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int64_t>({{1, -2, 3, -4}}, {{-1, 2, -3, 4}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Neg_Float32_TwoOperand)
+{
+  CircleGen cgen;
+  int in1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorCos({{in1, in2}, {out1, out2}});
+  cgen.setInputsAndOutputs({in1, in2}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Neg_Int32_TwoOperand)
+{
+  CircleGen cgen;
+  int in1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int in2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorCos({{in1, in2}, {out1, out2}});
+  cgen.setInputsAndOutputs({in1, in2}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Neg_Int64_TwoOperand)
+{
+  CircleGen cgen;
+  int in1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT64});
+  int in2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT64});
+  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT64});
+  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT64});
+  cgen.addOperatorCos({{in1, in2}, {out1, out2}});
+  cgen.setInputsAndOutputs({in1, in2}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc b/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc
new file mode 100644
index 000000000..6a3fec150
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct NotEqualVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class NotEqualVariation : public GenModelTest,
+                          public ::testing::WithParamInterface<NotEqualVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, NotEqualVariation,
+                         ::testing::Values(
+                           // Float type
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                                                    .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                    .addOutput<bool>({false, true, true, true})},
+                           // Float type - broadcast
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                                                    .addInput<float>({0.3})
+                                                    .addOutput<bool>({true, false, true, true})},
+                           // Int32 type
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<int32_t>({1, 3, 5, 7})
+                                                    .addInput<int32_t>({1, 2, 3, 4})
+                                                    .addOutput<bool>({false, true, true, true}),
+                                                  circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<int32_t>({1, 3, 5, 7})
+                                                    .addInput<int32_t>({5})
+                                                    .addOutput<bool>({true, true, false, true}),
+                                                  circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<int64_t>({1, 3, 5, 7})
+                                                    .addInput<int64_t>({1, 2, 3, 4})
+                                                    .addOutput<bool>({false, true, true, true}),
+                                                  circle::TensorType::TensorType_INT64,
+                                                  {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<int64_t>({1, 3, 5, 7})
+                                                    .addInput<int64_t>({1})
+                                                    .addOutput<bool>({false, true, true, true}),
+                                                  circle::TensorType::TensorType_INT64,
+                                                  {"cpu"}},
+                           // Bool type
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<bool>({false, false, true, true})
+                                                    .addInput<bool>({false, true, false, true})
+                                                    .addOutput<bool>({false, true, true, false}),
+                                                  circle::TensorType::TensorType_BOOL},
+                           // Bool type - broadcast
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<bool>({false, false, true, true})
+                                                    .addInput<bool>({false})
+                                                    .addOutput<bool>({false, false, true, true}),
+                                                  circle::TensorType::TensorType_BOOL}
+
+                           ));
+
+TEST_P(NotEqualVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_NotEqual_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_NotEqual_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/OneHot.test.cc b/tests/nnfw_api/src/one_op_tests/OneHot.test.cc
new file mode 100644
index 000000000..78ad35b40
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/OneHot.test.cc
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_OneHot_OffValueToConst)
+{
+  CircleGen cgen;
+  std::vector<int32_t> depth_data{3};
+  uint32_t depth_buf = cgen.addBuffer(depth_data);
+  std::vector<float> off_value_data{0};
+  uint32_t off_value_buf = cgen.addBuffer(off_value_data);
+  int indices = cgen.addTensor({{1, 2, 2}, circle::TensorType::TensorType_INT32});
+  int depth = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, depth_buf});
+  int on_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int off_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, off_value_buf});
+  int axis = 2;
+  int out = cgen.addTensor({{1, 2, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorOneHot({{indices, depth, on_value, off_value}, {out}}, axis);
+  cgen.setInputsAndOutputs({indices, on_value}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<int32_t>({1, 2, 0, 2})
+                          .addInput<float>({1})
+                          .addOutput<float>({0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_OneHot_OffValueToNotZero)
+{
+  CircleGen cgen;
+  std::vector<int32_t> depth_data{3};
+  uint32_t depth_buf = cgen.addBuffer(depth_data);
+  int indices = cgen.addTensor({{1, 2, 2}, circle::TensorType::TensorType_INT32});
+  int depth = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, depth_buf});
+  int on_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int off_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = 2;
+  int out = cgen.addTensor({{1, 2, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorOneHot({{indices, depth, on_value, off_value}, {out}}, axis);
+  cgen.setInputsAndOutputs({indices, on_value, off_value}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<int32_t>({1, 2, 0, 2})
+                          .addInput<float>({1})
+                          .addInput<float>({-1})
+                          .addOutput<float>({-1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_OneHot_IndicesValueToNeg_OffValueToConst)
+{
+  CircleGen cgen;
+  std::vector<int32_t> depth_data{3};
+  uint32_t depth_buf = cgen.addBuffer(depth_data);
+  std::vector<float> off_value_data{0};
+  uint32_t off_value_buf = cgen.addBuffer(off_value_data);
+  int indices = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32});
+  int depth = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, depth_buf});
+  int on_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int off_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, off_value_buf});
+  int axis = 2;
+  int out = cgen.addTensor({{2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorOneHot({{indices, depth, on_value, off_value}, {out}}, axis);
+  cgen.setInputsAndOutputs({indices, on_value}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<int32_t>({1, 2, 0, -1})
+                          .addInput<float>({1})
+                          .addOutput<float>({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_OneHot_IndicesValueToNeg_OffValueToVar)
+{
+  CircleGen cgen;
+  std::vector<int32_t> depth_data{3};
+  uint32_t depth_buf = cgen.addBuffer(depth_data);
+  int indices = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32});
+  int depth = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, depth_buf});
+  int on_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int off_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = 2;
+  int out = cgen.addTensor({{2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorOneHot({{indices, depth, on_value, off_value}, {out}}, axis);
+  cgen.setInputsAndOutputs({indices, on_value, off_value}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<int32_t>({1, 2, 0, -1})
+                          .addInput<float>({1})
+                          .addInput<float>({0})
+                          .addOutput<float>({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_OneHot_OneOperand)
+{
+  CircleGen cgen;
+  int indices = cgen.addTensor({{1, 2, 2}, circle::TensorType::TensorType_INT32});
+  int axis = 2;
+  int out = cgen.addTensor({{1, 2, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorOneHot({{indices}, {out}}, axis);
+  cgen.setInputsAndOutputs({indices}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_OneHot_TwoOperands)
+{
+  CircleGen cgen;
+  std::vector<int> depth_data{3};
+  uint32_t depth_buf = cgen.addBuffer(depth_data);
+  int indices = cgen.addTensor({{1, 2, 2}, circle::TensorType::TensorType_INT32});
+  int depth = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, depth_buf});
+  int axis = 2;
+  int out = cgen.addTensor({{1, 2, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorOneHot({{indices, depth}, {out}}, axis);
+  cgen.setInputsAndOutputs({indices}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_OneHot_ThreeOperands)
+{
+  CircleGen cgen;
+  std::vector<int> depth_data{3};
+  uint32_t depth_buf = cgen.addBuffer(depth_data);
+  int indices = cgen.addTensor({{1, 2, 2}, circle::TensorType::TensorType_INT32});
+  int depth = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, depth_buf});
+  int on_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = 2;
+  int out = cgen.addTensor({{1, 2, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorOneHot({{indices, depth, on_value}, {out}}, axis);
+  cgen.setInputsAndOutputs({indices, on_value}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_OneHot_InvalidAxis)
+{
+  CircleGen cgen;
+  std::vector<int> depth_data{3};
+  uint32_t depth_buf = cgen.addBuffer(depth_data);
+  int indices = cgen.addTensor({{1, 2, 2}, circle::TensorType::TensorType_INT32});
+  int depth = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, depth_buf});
+  int on_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int off_value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int axis = 4;
+  int out = cgen.addTensor({{1, 2, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorOneHot({{indices, depth, on_value, off_value}, {out}}, axis);
+  cgen.setInputsAndOutputs({indices, on_value, off_value}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc
deleted file mode 100644
index 10fe6c78a..000000000
--- a/tests/nnfw_api/src/one_op_tests/Pad.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_Pad)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}}});
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.test.cc b/tests/nnfw_api/src/one_op_tests/Pad.test.cc
new file mode 100644
index 000000000..582bd84bc
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Pad.test.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// Input shape: {1, 2, 2, 1}
+// Padding: {0, 0, 1, 1, 1, 1, 0, 0}
+// Output shape: {1, 4, 4, 1}
+struct PadParam
+{
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class PadVariation : public GenModelTest, public ::testing::WithParamInterface<PadParam>
+{
+};
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, PadVariation,
+  ::testing::Values(
+    // float value
+    PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
+    // uint8 value
+    PadParam{
+      uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
+      circle::TensorType::TensorType_UINT8, 1.0, 8},
+    // int8 value
+    PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
+                                {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
+             circle::TensorType::TensorType_INT8, 1.0, -5}));
+
+TEST_P(PadVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadRank)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadDim0)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadDim1)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(PadVariation, neg_Type)
+{
+  auto &param = GetParam();
+
+  const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8)
+                                            ? circle::TensorType::TensorType_INT8
+                                            : circle::TensorType::TensorType_UINT8);
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.cc b/tests/nnfw_api/src/one_op_tests/PadV2.cc
deleted file mode 100644
index 9f7ff9c0e..000000000
--- a/tests/nnfw_api/src/one_op_tests/PadV2.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_PadV2)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  std::vector<float> padding_value_data{3.0};
-  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
-  int padding_value =
-      cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
-
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}}});
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadRank)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
-  std::vector<float> padding_value_data{3.0};
-  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
-  int padding_value =
-      cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
-
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim0)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  std::vector<float> padding_value_data{3.0};
-  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
-  int padding_value =
-      cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
-
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim1)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
-  std::vector<float> padding_value_data{3.0};
-  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
-  int padding_value =
-      cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
-
-  int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->setCompileFail();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.test.cc b/tests/nnfw_api/src/one_op_tests/PadV2.test.cc
new file mode 100644
index 000000000..3db2187b2
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/PadV2.test.cc
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_PadV2)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<float> padding_value_data{3.0};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+    cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadRank)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<float> padding_value_data{3.0};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+    cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim0)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<float> padding_value_data{3.0};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+    cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim1)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<float> padding_value_data{3.0};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+    cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+  int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_Type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<uint8_t> padding_value_data{3};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+    cgen.addTensor({{1}, circle::TensorType::TensorType_UINT8, padding_value_buf}, 1.0, 1);
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_QuantParam)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 2);
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<uint8_t> padding_value_data{3};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+    cgen.addTensor({{1}, circle::TensorType::TensorType_UINT8, padding_value_buf}, 1.0, 1);
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+
+  cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Quantize.test.cc b/tests/nnfw_api/src/one_op_tests/Quantize.test.cc
new file mode 100644
index 000000000..5ab4d6297
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Quantize.test.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleQuantizeModel(circle::TensorType from_t, float input_scale, int input_zeropoint,
+                                 circle::TensorType to_t, float output_scale, int output_zeropoint)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 4, 4, 1}, from_t}, input_scale, input_zeropoint);
+  int out = cgen.addTensor({{1, 4, 4, 1}, to_t}, output_scale, output_zeropoint);
+  cgen.addOperatorQuantize({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Quantize_Uint8toInt8)
+{
+  CircleGen cgen =
+    genSimpleQuantizeModel(circle::TensorType_UINT8, 1., 128, circle::TensorType_INT8, 2., -10);
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<uint8_t>({127, 48, 151, 232, 56, 176, 47, 37, 51, 52, 39, 94, 15, 108, 142, 243})
+      .addOutput<int8_t>(
+        {-10, -50, 2, 42, -46, 14, -50, -55, -48, -48, -54, -27, -66, -20, -3, 48}));
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Quantize_Int8toUint8)
+{
+  CircleGen cgen =
+    genSimpleQuantizeModel(circle::TensorType_INT8, 2., -10, circle::TensorType_UINT8, 1., 128);
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<int8_t>({-10, -50, 2, 42, -46, 14, -50, -55, -48, -48, -54, -27, -66, -20, -3, 48})
+      .addOutput<uint8_t>({128, 48, 152, 232, 56, 176, 48, 38, 52, 52, 40, 94, 16, 108, 142, 244}));
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Quantize_Uint8toInt16)
+{
+  CircleGen cgen =
+    genSimpleQuantizeModel(circle::TensorType_UINT8, 1., 128, circle::TensorType_INT16, 2., -10);
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Quantize_Int8toInt16)
+{
+  CircleGen cgen =
+    genSimpleQuantizeModel(circle::TensorType_INT8, 2., -10, circle::TensorType_INT16, 1., 128);
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Rank.cc b/tests/nnfw_api/src/one_op_tests/Rank.cc
deleted file mode 100644
index ed9d67294..000000000
--- a/tests/nnfw_api/src/one_op_tests/Rank.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-// WORKAROUND Handle int32_t type input/output
-union float_int {
-  int32_t i;
-  float f;
-};
-
-TEST_F(GenModelTest, OneOp_Rank)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
-
-  // TODO handle many type in addTestCase
-  float_int output_data;
-  output_data.i = 4;
-
-  cgen.addOperatorRank({{in}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-      {{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{output_data.f}}});
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Rank_Int32)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT32});
-  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
-
-  // TODO handle many type in addTestCase
-  float_int output_data;
-  output_data.i = 4;
-
-  cgen.addOperatorRank({{in}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-      {{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{output_data.f}}});
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Rank.test.cc b/tests/nnfw_api/src/one_op_tests/Rank.test.cc
new file mode 100644
index 000000000..60ec1931a
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Rank.test.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// WORKAROUND Handle int32_t type input/output
+TEST_F(GenModelTest, OneOp_Rank)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+  cgen.addOperatorRank({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
+      .addOutput<int32_t>({4}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Rank_Int32)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+  // TODO handle many type in addTestCase
+  cgen.addOperatorRank({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{4}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Rank_OutType)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_UINT8});
+
+  // TODO handle many type in addTestCase
+  cgen.addOperatorRank({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Reduce.test.cc b/tests/nnfw_api/src/one_op_tests/Reduce.test.cc
new file mode 100644
index 000000000..13d180aed
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Reduce.test.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleBuffer genSimpleReduceModel(circle::BuiltinOperator op, bool keep_dims)
+{
+  CircleGen cgen;
+  uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{0, 1, 2, 3});
+  int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorReduce({{in, axis}, {out}}, op, keep_dims);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_F(GenModelTest, OneOp_ReduceMax)
+{
+  auto model = genSimpleReduceModel(circle::BuiltinOperator_REDUCE_MAX, false);
+  _context = std::make_unique<GenModelTestContext>(std::move(model));
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{6}}));
+  _context->addTestCase(uniformTCD<float>({{100, 98, 55, 200, 3, 40}}, {{200}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+class ReduceMaxBadIndex : public GenModelTest,
+                          public ::testing::WithParamInterface<std::vector<int>>
+{
+};
+
+TEST_P(ReduceMaxBadIndex, neg_Test)
+{
+  CircleGen cgen;
+  // Axis cannot be equal or bigger than input's rank - 4
+  uint32_t axis_buf = cgen.addBuffer(GetParam());
+  int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorReduce({{in, axis}, {out}}, circle::BuiltinOperator_REDUCE_MAX, false);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, ReduceMaxBadIndex,
+                         ::testing::Values(std::vector<int32_t>{0, 1, 2, 4},
+                                           std::vector<int32_t>{0, -5, 2, 3},
+                                           std::vector<int32_t>{-88, 1, 2, 3},
+                                           std::vector<int32_t>{0, 1, 88, 3}));
diff --git a/tests/nnfw_api/src/one_op_tests/Relu.test.cc b/tests/nnfw_api/src/one_op_tests/Relu.test.cc
new file mode 100644
index 000000000..28c511270
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Relu.test.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Relu)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorRelu({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, 0, 0}}));
+  _context->setBackends({"cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Relu_InvalidType)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_UINT8});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorRelu({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu", "gpu_cl"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Relu6.test.cc b/tests/nnfw_api/src/one_op_tests/Relu6.test.cc
new file mode 100644
index 000000000..88b8eba83
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Relu6.test.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Relu6)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorRelu6({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{4, 7.0, 3.0, 8.0, -1.0, -2.0f}}, {{4, 6.0, 3.0, 6.0, 0, 0}}));
+  _context->setBackends({"cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Relu6_InvalidType)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_UINT8});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorRelu6({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu", "gpu_cl"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc
new file mode 100644
index 000000000..fe313d4e7
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+struct ResizeBilinearParam
+{
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class ResizeBilinearVariation : public GenModelTest,
+                                public ::testing::WithParamInterface<ResizeBilinearParam>
+{
+};
+
+TEST_P(ResizeBilinearVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  std::vector<int32_t> size_data{3, 3};
+  uint32_t size_buf = cgen.addBuffer(size_data);
+  int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point);
+  cgen.addOperatorResizeBilinear({{in, size}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, ResizeBilinearVariation,
+  ::testing::Values(
+    // float value
+    ResizeBilinearParam{uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667,
+                                                            1.666666667, 2, 2, 2}})},
+    // uint8 value
+    ResizeBilinearParam{uniformTCD<uint8_t>({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}),
+                        circle::TensorType::TensorType_UINT8, 1.0, 0},
+    // int8 value
+    ResizeBilinearParam{uniformTCD<int8_t>({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}),
+                        circle::TensorType::TensorType_INT8, 1.0, 0}));
+
+TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar)
+{
+  CircleGen cgen;
+  int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorResizeBilinear({{in, size}, {out}});
+  cgen.setInputsAndOutputs({in, size}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // FIXME enable a test case the below is not a valid test case
+  //_context->addTestCase(TestCaseData{}.addInput<int32_t>({3, 3}).addInput<float>({1, 1, 2,
+  // 2}).addOutput<float>({1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ResizeBilinear_InvalidSizeVal)
+{
+  CircleGen cgen;
+  std::vector<int32_t> size_data{-3, 3};
+  uint32_t size_buf = cgen.addBuffer(size_data);
+  int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorResizeBilinear({{in, size}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc
deleted file mode 100644
index 94f45d4a5..000000000
--- a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_ResizeNearestNeighbor)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> size_data{3, 3};
-  uint32_t size_buf = cgen.addBuffer(size_data);
-  int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, size_buf});
-
-  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorResizeNearestNeighbor({{in, size}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{3, 4, 6, 10, 9, 10, 12, 16}},
-                         {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}}});
-  _context->setBackends({"acl_cl"});
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc
new file mode 100644
index 000000000..1dd65844b
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_ResizeNearestNeighbor)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> size_data{3, 3};
+  uint32_t size_buf = cgen.addBuffer(size_data);
+  int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, size_buf});
+
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorResizeNearestNeighbor({{in, size}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{3, 4, 6, 10, 9, 10, 12, 16}},
+                      {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}}));
+  _context->setBackends({"acl_cl"});
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Reverse.test.cc b/tests/nnfw_api/src/one_op_tests/Reverse.test.cc
new file mode 100644
index 000000000..7b28d7c75
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Reverse.test.cc
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_ReverseV2_3D)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{4, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{4, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorReverseV2({{in, axis}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "cpu"});
+  _context->addTestCase(uniformTCD<float>(
+    {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}},
+    {{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8, 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}}));
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ReverseV2_1D)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> axis_data{0};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorReverseV2({{in, axis}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "cpu"});
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4}}, {{4, 3, 2, 1}}));
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ReverseV2_3D_DifferentType)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{4, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{4, 3, 2}, circle::TensorType::TensorType_INT32});
+
+  cgen.addOperatorReverseV2({{in, axis}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "cpu"});
+  _context->addTestCase(uniformTCD<int>(
+    {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}},
+    {{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8, 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}}));
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Select.test.cc b/tests/nnfw_api/src/one_op_tests/Select.test.cc
new file mode 100644
index 000000000..e1d991877
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Select.test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Select)
+{
+  CircleGen cgen;
+  std::vector<uint8_t> cond_data{1, 1, 0, 1};
+  uint32_t cond_buf = cgen.addBuffer(cond_data);
+  int cond = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL, cond_buf});
+  int in_true = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in_false = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSelect({{cond, in_true, in_false}, {out}});
+  cgen.setInputsAndOutputs({in_true, in_false}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0, 1, 2, 3}, {4, 5, 6, 7}}, {{0, 1, 6, 3}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_SelectV2_Broadcast)
+{
+  CircleGen cgen;
+  std::vector<uint8_t> cond_data{1, 0};
+  uint32_t cond_buf = cgen.addBuffer(cond_data);
+  int cond = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_BOOL, cond_buf});
+  int in_true = cgen.addTensor({{1, 1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in_false = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSelectV2({{cond, in_true, in_false}, {out}});
+  cgen.setInputsAndOutputs({in_true, in_false}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0, 1}, {4, 5, 6, 7}}, {{0, 1, 6, 7}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Select_InputType)
+{
+  CircleGen cgen;
+  std::vector<uint8_t> cond_data{1, 1, 0, 1};
+  uint32_t cond_buf = cgen.addBuffer(cond_data);
+  int cond = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL, cond_buf});
+  int in_true = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in_false = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSelect({{cond, in_true, in_false}, {out}});
+  cgen.setInputsAndOutputs({in_true, in_false}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Select_CondType)
+{
+  CircleGen cgen;
+  std::vector<uint8_t> cond_data{1, 1, 0, 1};
+  uint32_t cond_buf = cgen.addBuffer(cond_data);
+  int cond = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8, cond_buf});
+  int in_true = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in_false = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSelect({{cond, in_true, in_false}, {out}});
+  cgen.setInputsAndOutputs({in_true, in_false}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Shape.test.cc b/tests/nnfw_api/src/one_op_tests/Shape.test.cc
new file mode 100644
index 000000000..2a73db99a
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Shape.test.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// WORKAROUND Handle int32_t type input/output
+TEST_F(GenModelTest, OneOp_Shape)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32});
+
+  cgen.addOperatorShape({{in}, {out}}, circle::TensorType::TensorType_INT32);
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
+      .addOutput<int32_t>({1, 3, 3, 2}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Shape_Int64)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT64});
+
+  // TODO handle many type in addTestCase
+  cgen.addOperatorShape({{in}, {out}}, circle::TensorType::TensorType_INT64);
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
+      .addOutput<int64_t>({1, 3, 3, 2}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Shape_OutType)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_UINT8});
+
+  // TODO handle many type in addTestCase
+  cgen.addOperatorShape({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Slice.test.cc b/tests/nnfw_api/src/one_op_tests/Slice.test.cc
new file mode 100644
index 000000000..8cd9d7037
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Slice.test.cc
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct SliceVariationParam
+{
+  std::vector<int32_t> input_shape;
+  std::vector<int32_t> begins;
+  std::vector<int32_t> sizes;
+  TestCaseData tcd;
+
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+  circle::TensorType begins_type = circle::TensorType::TensorType_INT32;
+};
+
+class SliceVariation : public GenModelTest,
+                       public ::testing::WithParamInterface<SliceVariationParam>
+{
+};
+
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, SliceVariation,
+  ::testing::Values(
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+      circle::TensorType::TensorType_UINT8,
+      1,
+      0},
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+      circle::TensorType::TensorType_FLOAT32,
+      0,
+      0,
+      circle::TensorType::TensorType_INT64}));
+
+TEST_P(SliceVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+  if (param.begins_type == circle::TensorType::TensorType_INT32)
+  {
+    uint32_t begins_buf = cgen.addBuffer(param.begins);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+    int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  else if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    std::vector<int64_t> begins_64(param.begins.size());
+    std::vector<int64_t> sizes_64(param.sizes.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      begins_64[i] = param.begins[i];
+      sizes_64[i] = param.sizes[i];
+    }
+
+    uint32_t begins_buf = cgen.addBuffer(begins_64);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+    int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+
+  // acl don't support int64 yet
+  if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    _context->setBackends({"cpu"});
+  }
+  else
+  {
+    _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+  }
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Slice_Type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<float> begins_data = {0, 0, 1, 0};
+  uint32_t begins_buf = cgen.addBuffer(begins_data);
+  int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf});
+  std::vector<float> sizes_data = {1, 2, 1, 1};
+  uint32_t sizes_buf = cgen.addBuffer(sizes_data);
+  int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf});
+  int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(SliceVariation, neg_DiffType)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+  if (param.begins_type == circle::TensorType::TensorType_INT32)
+  {
+    uint32_t begins_buf = cgen.addBuffer(param.begins);
+    std::vector<int64_t> sizes_64(param.sizes.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      sizes_64[i] = param.sizes[i];
+    }
+
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+    int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  else if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    std::vector<int64_t> begins_64(param.begins.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      begins_64[i] = param.begins[i];
+    }
+
+    uint32_t begins_buf = cgen.addBuffer(begins_64);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+    int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.test.cc b/tests/nnfw_api/src/one_op_tests/Softmax.test.cc
new file mode 100644
index 000000000..1782baf64
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Softmax.test.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// beta = 0.1
+// input/output shape: {1, 2, 1, 4}
+struct SoftmaxParam
+{
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float input_scale = 0.0f;
+  int64_t input_zero_point = 0;
+};
+
+class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface<SoftmaxParam>
+{
+};
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, SoftmaxVariation,
+  ::testing::Values(
+    // float value
+    SoftmaxParam{
+      uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
+                        {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
+    // uint8 value
+    SoftmaxParam{
+      uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
+      circle::TensorType::TensorType_UINT8, 1.0, 10},
+    // int8 value
+    SoftmaxParam{
+      uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
+      circle::TensorType::TensorType_INT8, 1.0, 0}));
+
+TEST_P(SoftmaxVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  // NNAPI spec and tflite test use fixed output scale and zero-point
+  float out_scale = 0.0;
+  int64_t out_zero_point = 0;
+  if (param.data_type == circle::TensorType::TensorType_UINT8)
+  {
+    out_scale = 1.0f / 256;
+  }
+  else if (param.data_type == circle::TensorType::TensorType_INT8)
+  {
+    out_scale = 1.0f / 256;
+    out_zero_point = -128;
+  }
+
+  int input =
+    cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+  int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point);
+  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"cpu", "acl_neon", "acl_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Softmax_Invaild_Beta)
+{
+  CircleGen cgen;
+  int input = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{-1., 0., 1., 1.}}, {{-1., -1., -1., -1.}}));
+  _context->setBackends({"gpu_cl"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Softmax)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSoftmax({{lhs}, {out}}, 1.0);
+  cgen.setInputsAndOutputs({lhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{-1., 0., 1., 1.}},
+    {{0.054064586758613586, 0.14696279168128967, 0.39948627352714539, 0.39948627352714539}}));
+  _context->setBackends({"acl_cl", "cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_P(SoftmaxVariation, neg_Type)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input =
+    cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Split.test.cc b/tests/nnfw_api/src/one_op_tests/Split.test.cc
new file mode 100644
index 000000000..7dec8dba5
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Split.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Split)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 4}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+
+  int out1 = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out2 = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorSplit({{axis, in}, {out1, out2}}, 2);
+  cgen.setInputsAndOutputs({in}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6}, {3, 4, 7, 8}}));
+  _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_SplitNonConstAxis)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 4}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+  int out1 = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out2 = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorSplit({{axis, in}, {out1, out2}}, 2);
+  cgen.setInputsAndOutputs({axis, in}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<int32_t>({1})
+                          .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8})
+                          .addOutput<float>({1, 2, 5, 6})
+                          .addOutput<float>({3, 4, 7, 8}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_SplitNegatveSplitNum)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 4}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+
+  int out1 = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out2 = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorSplit({{axis, in}, {out1, out2}}, -3);
+  cgen.setInputsAndOutputs({in}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6}, {3, 4, 7, 8}}));
+  _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Sqrt.test.cc b/tests/nnfw_api/src/one_op_tests/Sqrt.test.cc
new file mode 100644
index 000000000..01f313371
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Sqrt.test.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleSqrtModel(circle::TensorType type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, type});
+  cgen.addOperatorSqrt({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Sqrt_f32)
+{
+  CircleGen cgen = genSimpleSqrtModel(circle::TensorType::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<float>({1, 4, 9, 16}).addOutput<float>({1, 2, 3, 4}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sqrt_i32)
+{
+  CircleGen cgen = genSimpleSqrtModel(circle::TensorType::TensorType_INT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}.addInput<int>({1, 4, 9, 16}).addOutput<float>({1, 2, 3, 4}));
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Square.test.cc b/tests/nnfw_api/src/one_op_tests/Square.test.cc
new file mode 100644
index 000000000..2ec9bad0d
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Square.test.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleSquareModel(circle::TensorType type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, type});
+  cgen.addOperatorSquare({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Square_f32)
+{
+  CircleGen cgen = genSimpleSquareModel(circle::TensorType::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<float>({1, 4, 9, 16}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Square_i32)
+{
+  CircleGen cgen = genSimpleSquareModel(circle::TensorType::TensorType_INT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}.addInput<int>({1, 2, 3, 4}).addOutput<float>({1, 4, 9, 16}));
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc b/tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc
new file mode 100644
index 000000000..9c00e6d89
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_StridedSlice_LastDim)
+{
+  CircleGen cgen;
+  std::vector<int32_t> begin_data{0, 3};
+  std::vector<int32_t> end_data{0, 6};
+  std::vector<int32_t> strides_data{1, 1};
+  uint32_t begin_buf = cgen.addBuffer(begin_data);
+  uint32_t end_buf = cgen.addBuffer(end_data);
+  uint32_t strides_buf = cgen.addBuffer(strides_data);
+  int input = cgen.addTensor({{1, 6}, circle::TensorType::TensorType_FLOAT32});
+  int begin = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, begin_buf});
+  int end = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, end_buf});
+  int strides = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, strides_buf});
+  int out = cgen.addTensor({{1, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorStridedSlice({{input, begin, end, strides}, {out}}, 1, 1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{4, 5, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_StridedSlice_DifferentType)
+{
+  CircleGen cgen;
+  std::vector<int32_t> begin_data{0, 3};
+  std::vector<int32_t> end_data{0, 6};
+  std::vector<int32_t> strides_data{1, 1};
+  uint32_t begin_buf = cgen.addBuffer(begin_data);
+  uint32_t end_buf = cgen.addBuffer(end_data);
+  uint32_t strides_buf = cgen.addBuffer(strides_data);
+  int input = cgen.addTensor({{1, 6}, circle::TensorType::TensorType_FLOAT32});
+  int begin = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, begin_buf});
+  int end = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, end_buf});
+  int strides = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, strides_buf});
+  int out = cgen.addTensor({{1, 3}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorStridedSlice({{input, begin, end, strides}, {out}}, 1, 1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{4, 5, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Sub.test.cc b/tests/nnfw_api/src/one_op_tests/Sub.test.cc
new file mode 100644
index 000000000..bb4fecd2d
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Sub.test.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Sub_Uint8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{13, 12, 25, 40}, {5, 4, 7, 0}}, {{6, 8, 22, 80}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Sub_Int8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{-16, 24, 34, -6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_SubBroadcast_Uint8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_UINT8}, 2.0, 1);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.5, 2);
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{13, 12, 25, 40}, {5}}, {{6, 4, 30, 60}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_SubBroadcast_Int8_VarVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1.0, 2);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2.0, 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{-16, -12, -14, -10}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_InvalidShape)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSub({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_OneOperand)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSub({{in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sub_ThreeOperands)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSub({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Tile.test.cc b/tests/nnfw_api/src/one_op_tests/Tile.test.cc
new file mode 100644
index 000000000..3f193d5e6
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Tile.test.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Tile_ConstMul)
+{
+  CircleGen cgen;
+  std::vector<int32_t> mul_data{1, 2};
+  uint32_t mul_buf = cgen.addBuffer(mul_data);
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int mul = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, mul_buf});
+  int out = cgen.addTensor({{2, 6}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTile({{in, mul}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Tile_MulToConst)
+{
+  CircleGen cgen;
+  std::vector<int32_t> multiplies_data{2, 3, 1};
+  uint32_t multiplies_buf = cgen.addBuffer(multiplies_data);
+  int multiplies = cgen.addTensor({{3}, circle::TensorType::TensorType_INT32, multiplies_buf});
+  int in = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 6, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTile({{in, multiplies}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{11, 12, 13, 21, 22, 23}},
+                      {{11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23,
+                        11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Tile_MulToVar)
+{
+  CircleGen cgen;
+  int multiplies = cgen.addTensor({{3}, circle::TensorType::TensorType_INT32});
+  int in = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 6, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTile({{in, multiplies}, {out}});
+  cgen.setInputsAndOutputs({in, multiplies}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<float>({11, 12, 13, 21, 22, 23})
+      .addInput<int32_t>({2, 3, 1})
+      .addOutput<float>({11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23,
+                         11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Tile_VarMul)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int mul = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{2, 6}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTile({{in, mul}, {out}});
+  cgen.setInputsAndOutputs({in, mul}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({1, 2, 3, 4, 5, 6})
+                          .addInput<int32_t>({1, 2})
+                          .addOutput<float>({1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Tile)
+{
+  CircleGen cgen;
+  std::vector<int32_t> mul_data{1, 2, 1, 2};
+  uint32_t mul_buf = cgen.addBuffer(mul_data);
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  // 2D multiples input is not supported
+  int mul = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, mul_buf});
+  int out = cgen.addTensor({{2, 6}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTile({{in, mul}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Tile_InvalidMulSize)
+{
+  CircleGen cgen;
+  std::vector<int32_t> multiplies_data{2, 6};
+  uint32_t multiplies_buf = cgen.addBuffer(multiplies_data);
+  int multiplies = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, multiplies_buf});
+  int in = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 6, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTile({{in, multiplies}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Transpose.test.cc b/tests/nnfw_api/src/one_op_tests/Transpose.test.cc
new file mode 100644
index 000000000..5a92c7303
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Transpose.test.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Transpose_PermsToConst)
+{
+  CircleGen cgen;
+  std::vector<int32_t> perms_data{2, 0, 1, 3};
+  uint32_t perms_buf = cgen.addBuffer(perms_data);
+  int perms = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, perms_buf});
+  int in = cgen.addTensor({{2, 3, 4, 5}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 3, 4, 5}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTranspose({{in, perms}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,
+      18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
+      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,
+      54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,
+      72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+      90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107,
+      108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}},
+    {{0,  1,  2,  3,  4,  20,  21,  22,  23,  24,  40, 41, 42, 43, 44, 60,  61,  62,  63,  64,
+      80, 81, 82, 83, 84, 100, 101, 102, 103, 104, 5,  6,  7,  8,  9,  25,  26,  27,  28,  29,
+      45, 46, 47, 48, 49, 65,  66,  67,  68,  69,  85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+      10, 11, 12, 13, 14, 30,  31,  32,  33,  34,  50, 51, 52, 53, 54, 70,  71,  72,  73,  74,
+      90, 91, 92, 93, 94, 110, 111, 112, 113, 114, 15, 16, 17, 18, 19, 35,  36,  37,  38,  39,
+      55, 56, 57, 58, 59, 75,  76,  77,  78,  79,  95, 96, 97, 98, 99, 115, 116, 117, 118, 119}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Transpose_PermsToVar)
+{
+  CircleGen cgen;
+  int perms = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32});
+  int in = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 3, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTranspose({{in, perms}, {out}});
+  cgen.setInputsAndOutputs({in, perms}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({1, 2, 3, 4, 5, 6})
+                          .addInput<int32_t>({0, 2, 1, 3})
+                          .addOutput<float>({1, 4, 2, 5, 3, 6}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Transpose_RegularTranspose)
+{
+  CircleGen cgen;
+  int perms = cgen.addTensor({{0}, circle::TensorType::TensorType_INT32});
+  int in = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 3, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTranspose({{in, perms}, {out}});
+  cgen.setInputsAndOutputs({in, perms}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({1, 2, 3, 4, 5, 6})
+                          .addInput<int32_t>({})
+                          .addOutput<float>({1, 4, 2, 5, 3, 6}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Transpose_InvalidPermsSize)
+{
+  CircleGen cgen;
+  std::vector<int32_t> perms_data{0, 1, 2};
+  uint32_t perms_buf = cgen.addBuffer(perms_data);
+  int perms = cgen.addTensor({{3}, circle::TensorType::TensorType_INT32, perms_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTranspose({{in, perms}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Transpose_InvalidPermsVal)
+{
+  CircleGen cgen;
+  std::vector<int32_t> perms_data{-3, 3, 1, 2};
+  uint32_t perms_buf = cgen.addBuffer(perms_data);
+  int perms = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, perms_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTranspose({{in, perms}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Transpose_DuplicatedPermsVal)
+{
+  CircleGen cgen;
+  std::vector<int32_t> perms_data{3, 3, 1, 2};
+  uint32_t perms_buf = cgen.addBuffer(perms_data);
+  int perms = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, perms_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorTranspose({{in, perms}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/While.cc b/tests/nnfw_api/src/one_op_tests/While.cc
deleted file mode 100644
index 1d86e6d6a..000000000
--- a/tests/nnfw_api/src/one_op_tests/While.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_While)
-{
-  // The model looks just like the below pseudocode
-  //
-  // function model(x)
-  // {
-  //   while (x < 100.0)
-  //   {
-  //     x = x + 10.0;
-  //   }
-  //   return x
-  // }
-
-  CircleGen cgen;
-  std::vector<float> incr_data{10};
-  uint32_t incr_buf = cgen.addBuffer(incr_data);
-  std::vector<float> end_data{100};
-  uint32_t end_buf = cgen.addBuffer(end_data);
-
-  // primary subgraph
-  {
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
-    cgen.setInputsAndOutputs({x_in}, {x_out});
-  }
-
-  // cond subgraph
-  {
-    cgen.nextSubgraph();
-    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
-    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
-    cgen.addOperatorLess({{x, end}, {result}});
-    cgen.setInputsAndOutputs({x}, {result});
-  }
-
-  // body subgraph
-  {
-    cgen.nextSubgraph();
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
-    cgen.setInputsAndOutputs({x_in}, {x_out});
-  }
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase({{{0}}, {{100}}});
-  _context->addTestCase({{{2}}, {{102}}});
-  _context->addTestCase({{{22}}, {{102}}});
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/While.test.cc b/tests/nnfw_api/src/one_op_tests/While.test.cc
new file mode 100644
index 000000000..5c4da552c
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/While.test.cc
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+#include "WhileTestModel.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_While)
+{
+  WhileModelLoop10 model;
+  _context = std::make_unique<GenModelTestContext>(std::move(model.cbuf));
+  _context->addTestCase(uniformTCD<float>({{0}}, {{100}}));
+  _context->addTestCase(uniformTCD<float>({{2}}, {{102}}));
+  _context->addTestCase(uniformTCD<float>({{22}}, {{102}}));
+  _context->addTestCase(uniformTCD<float>({{100}}, {{100}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_While_github_4783)
+{
+  // The model looks just like the below pseudocode
+  //
+  // function model(x, data)
+  // {
+  //   // `data` does not do anything but passed to while's cond and body subgraphs
+  //   // to measure copy overhead between subgraphs
+  //   while (x < 100.0)
+  //   {
+  //     x = x + 1.0;
+  //   }
+  //   return (x, data)
+  // }
+
+  const int kElems = 4;
+  const std::vector<int32_t> shape{kElems};
+
+  CircleGen cgen;
+  uint32_t incr_buf = cgen.addBuffer(std::vector<float>{1});
+  uint32_t incr_data_buf = cgen.addBuffer(std::vector<float>(kElems, 1));
+  uint32_t end_buf = cgen.addBuffer(std::vector<float>{100});
+
+  // primary subgraph
+  {
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    cgen.addOperatorWhile({{x_in, d_in}, {x_out, d_out}}, 1, 2);
+    cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int d = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x, d}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    int incr_d = cgen.addTensor({shape, circle::TensorType_FLOAT32, incr_data_buf});
+    int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.addOperatorAdd({{d_in, incr_d}, {d_out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  std::vector<float> tc_data_in(kElems, 9);
+  std::vector<float> tc_data_out(kElems, 109);
+  _context->addTestCase(uniformTCD<float>({{0}, tc_data_in}, {{100}, tc_data_out}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_While_TwoInputs)
+{
+  // The model looks just like the below pseudocode
+  //
+  // function model(x, end)
+  // {
+  //   while (x < end)
+  //   {
+  //     x = x + 10.0
+  //   }
+  //   return x
+  // }
+
+  CircleGen cgen;
+  std::vector<float> incr_data{10};
+  uint32_t incr_buf = cgen.addBuffer(incr_data);
+
+  // primary subgraph
+  {
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, 1, 2);
+    cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x, end}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0}, {20}}, {{20}}));
+  _context->addTestCase(uniformTCD<float>({{5}, {30}}, {{35}}));
+  _context->addTestCase(uniformTCD<float>({{20}, {10}}, {{20}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+class WhileWrongSubgraphIndex : public GenModelTest,
+                                public ::testing::WithParamInterface<std::pair<int, int>>
+{
+};
+
+TEST_P(WhileWrongSubgraphIndex, neg_Test)
+{
+  // These values must be less than 0 or greater than 2
+  int cond_subg = GetParam().first;
+  int body_subg = GetParam().second;
+
+  // When While operation's subgraph index is invalid
+
+  CircleGen cgen;
+
+  // constant buffers
+  std::vector<float> incr_data{10};
+  uint32_t incr_buf = cgen.addBuffer(incr_data);
+
+  // primary subgraph
+  {
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, cond_subg, body_subg);
+    cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x, end}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, WhileWrongSubgraphIndex,
+                         ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
+                                           std::make_pair(1, 99), std::make_pair(1, -99),
+                                           std::make_pair(-99, 99)));
+
+// In this test, output of WHILE and body subgraph have different data types
+TEST_F(GenModelTest, neg_while_wrong_dtype)
+{
+  CircleGen cgen;
+  std::vector<float> incr_data{10};
+  uint32_t incr_buf = cgen.addBuffer(incr_data);
+  std::vector<float> end_data{100};
+  uint32_t end_buf = cgen.addBuffer(end_data);
+
+  // primary subgraph
+  {
+    int model_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int model_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+
+    cgen.addOperatorWhile({{model_in}, {model_out}}, 1, 2);
+    cgen.setInputsAndOutputs({model_in}, {model_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int cast_out = cgen.addTensor({{1}, circle::TensorType_INT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.addOperatorCast({{x_out}, {cast_out}}, circle::TensorType_FLOAT32,
+                         circle::TensorType_INT32);
+    cgen.setInputsAndOutputs({x_in}, {cast_out});
+    // output of this subgraph is INT32 but output of WHILE is FLOAT32
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  // It is correct to call `_context->expectFailModelLoad();`, but OperationValidator does not deal
+  // with subgraphs. So it is verified by `_context->expectFailCompile(); as a workaround`
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/WhileTestModel.h b/tests/nnfw_api/src/one_op_tests/WhileTestModel.h
new file mode 100644
index 000000000..a1873cc5a
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/WhileTestModel.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_API_TEST_WHILE_TEST_MODEL_H__
+#define __NNFW_API_TEST_WHILE_TEST_MODEL_H__
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+class WhileModelLoop10
+{
+public:
+  WhileModelLoop10()
+  {
+    // The model looks just like the below pseudocode
+    //
+    // function model(x)
+    // {
+    //   while (x < 100.0)
+    //   {
+    //     x = x + 10.0;
+    //   }
+    //   return x
+    // }
+    CircleGen cgen;
+    std::vector<float> incr_data{10};
+    uint32_t incr_buf = cgen.addBuffer(incr_data);
+    std::vector<float> end_data{100};
+    uint32_t end_buf = cgen.addBuffer(end_data);
+
+    // primary subgraph
+    {
+      int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
+      cgen.setInputsAndOutputs({x_in}, {x_out});
+    }
+
+    // cond subgraph
+    {
+      cgen.nextSubgraph();
+      int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+      int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+      cgen.addOperatorLess({{x, end}, {result}});
+      cgen.setInputsAndOutputs({x}, {result});
+    }
+
+    // body subgraph
+    {
+      cgen.nextSubgraph();
+      int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+      int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+      cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+      cgen.setInputsAndOutputs({x_in}, {x_out});
+    }
+    cbuf = cgen.finish();
+  }
+
+  int inputCount() { return 1; }
+  int outputputCount() { return 1; }
+  int sizeOfDType() { return sizeof(float); }
+
+  CircleBuffer cbuf;
+};
+
+#endif // __NNFW_API_TEST_WHILE_TEST_MODEL_H__
diff --git a/tests/scripts/CMakeLists.txt b/tests/scripts/CMakeLists.txt
index 40e0dfdaa..ec319cab2 100644
--- a/tests/scripts/CMakeLists.txt
+++ b/tests/scripts/CMakeLists.txt
@@ -17,10 +17,6 @@ install(PROGRAMS ${MODEL_TEST_SCRIPT} DESTINATION test/models)
 file(GLOB TFLITE_CONFIG_DIR models/tflite)
 install(DIRECTORY ${TFLITE_CONFIG_DIR} DESTINATION test/models)
 
-# Install nnpackage test config
-file(GLOB NNPACKAGE_MODEL_CONFIG_DIR models/nnfw_api_gtest)
-install(DIRECTORY ${NNPACKAGE_MODEL_CONFIG_DIR} DESTINATION test/models)
-
 # Install test list
 file(GLOB TEST_LIST_DIR list)
 install(DIRECTORY ${TEST_LIST_DIR} DESTINATION test)
diff --git a/tests/scripts/README.md b/tests/scripts/README.md
index 7ab9861fa..e6baf26ea 100644
--- a/tests/scripts/README.md
+++ b/tests/scripts/README.md
@@ -70,7 +70,7 @@ $ ./tests/scripts/test-driver.sh \
 - input.h5 (input data)
 - expected.h5 (expected outpute data)
 
-`nnpkg_test` uses `nnpackage_run` internally to run `nnpackage`.
+`nnpkg_test` uses `onert_run` internally to run `nnpackage`.
 
 Then, it compares through `difftool` (either `i5diff` or `h5diff`).
 
@@ -95,8 +95,8 @@ Options:
          (dumped file are always deleted on success) (default=0)
 
 Environment variables:
-   nnpackage_run    path to nnpackage_run (default=Product/out/bin/nnpackage_run)
-   difftool         path to i5diff or h5diff (default=h5diff)
+   onert_run    path to onert_run (default=Product/out/bin/onert_run)
+   difftool     path to i5diff or h5diff (default=h5diff)
 
 Examples:
     nnpkg_test.sh Add_000                => run ./Add_000 and check output
diff --git a/tests/scripts/benchmark.sh b/tests/scripts/benchmark.sh
new file mode 100644
index 000000000..923f90c6f
--- /dev/null
+++ b/tests/scripts/benchmark.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# TODO : tizen sdb support
+# TODO : multiple backend at once
+#
+# This benchmark tool works as follows:
+# 0. Prepare test-suite
+#
+# On building, set make target to build_test_suite. This will create test-suite.tar.gz under Product/out directory.
+# ```
+# $ make build_test_suite
+# ```
+#
+# 1. Install test-suite into target devices
+#   - On android, test-suite should be located on /data/local/tmp/
+#   - On Tizen, nnfw-test pacakge will install test-suite into /opt/usr/nnfw-test/
+#
+# 2. Prepare nnpackge
+#
+# 3. Run benchmark
+#
+# ```
+# $./benchmark.sh --backend=cpu --num_runs=5 --nnpackge=/path/to/nnpkg
+#
+# ```
+# 4. Result trace.json
+#  - trace.json is the result file
+
+SCRIPT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+## If no optional argument is passed, set for android
+TEST_ROOT=/data/local/tmp/
+BRIDGE=adb
+BACKENDS=cpu
+NUM_RUNS=3
+
+function Usage()
+{
+    echo "Usage: ./benchamrk.sh --bridge=adb --backends=cpu --num_runs=5 --nnpackge=/path/to/nnpkg"
+    echo ""
+    echo "--bridge                  : adb or sdb"
+    echo "--nnpackage=<dir>         : directory containing nnpackage"
+    echo "--num_runs                : number of runs"
+    echo "--backends                : backend list"
+}
+
+# Parse command argv
+for i in "$@"
+do
+    case $i in
+        -h|--help|help)
+            Usage
+            exit 1
+            ;;
+        --bridge=*)
+            BRIDGE=${i#*=}
+            ;;
+        --bridge)
+            BRIDGE="$2"
+            shift
+            ;;
+        --num_runs=*)
+            NUM_RUNS=${i#*=}
+            ;;
+        --num_runs)
+            NUM_RUNS="$2"
+            shift
+            ;;
+        --nnpackage=*)
+            NNPKG_PATH=${i#*=}
+            ;;
+        --nnpackage)
+            NNPKG_PATH="$2"
+            shift
+            ;;
+    esac
+    shift
+done
+
+
+NNPKG_PATH_TARGET=$TEST_ROOT/nnpkg/`basename $NNPKG_PATH`
+
+# 0. Push nnpackage into targeta
+echo "Pusing nnpackge into ${NNPKG_PATH_TARGET}"
+pushd $NNPKG_PATH/.. > /dev/null
+tar -zcf nnpkg.tar.gz `basename $NNPKG_PATH`
+$BRIDGE push nnpkg.tar.gz $TEST_ROOT
+rm nnpkg.tar.gz
+popd > /dev/null
+$BRIDGE shell mkdir -p $TEST_ROOT/nnpkg
+$BRIDGE shell tar -zxf $TEST_ROOT/nnpkg.tar.gz -C $TEST_ROOT/nnpkg
+$BRIDGE shell rm $TEST_ROOT/nnpkg.tar.gz
+
+# 1. Run
+$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/onert_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
+
+# 2. Pull result file
+echo "Pulling data from target to trace.json"
+$BRIDGE pull $TEST_ROOT/trace.json
+
+# 3. Clean up
+$BRIDGE shell rm -rf $TEST_ROOT/nnpkg
diff --git a/tests/scripts/benchmark_nnapi.sh b/tests/scripts/benchmark_nnapi.sh
deleted file mode 100755
index af797287f..000000000
--- a/tests/scripts/benchmark_nnapi.sh
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-source $MY_PATH/common.sh
-
-BENCHMARK_DRIVER_BIN=
-BENCHMARK_REPORT_DIR=
-BENCHMARK_MODELS_FILE=
-MODEL_TEST_ROOT_PATH=
-TEST_OP="false"
-BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
-BACKEND_LIST="acl_cl acl_neon cpu" #TODO: accept this list as argument
-EXECUTORS="Linear Parallel" #TODO: accept this list as argument
-
-function Usage()
-{
-    echo "Usage: ./$0 --reportdir=. --driverbin=Product/out/bin/tflite_run"
-}
-
-for i in "$@"
-do
-    case $i in
-        -h|--help|help)
-            Usage
-            exit 1
-            ;;
-        --test_op)
-            TEST_OP="true"
-            ;;
-        --driverbin=*)
-            BENCHMARK_DRIVER_BIN=${i#*=}
-            ;;
-        --reportdir=*)
-            BENCHMARK_REPORT_DIR=${i#*=}
-            BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
-            ;;
-        --modelfilepath=*)
-            TEST_LIST_PATH=${i#*=}
-            MODEL_TEST_ROOT_PATH=$TEST_LIST_PATH/tests
-            ;;
-    esac
-    shift
-done
-
-function get_benchmark_op_list()
-{
-    local TEST_DIRS="$@"
-    local TESTS_TO_RUN=""
-
-    if [[ $# -eq 0 ]]; then
-        TEST_DIRS="."
-    fi
-
-    shift $#
-
-    pushd $MODEL_TEST_ROOT_PATH > /dev/null
-    for DIR in $TEST_DIRS; do
-        if [ -d "$DIR" ]; then
-            TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | grep -v '^MODELS/' | sort)
-            TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
-        fi
-    done
-    popd > /dev/null
-
-    BENCHMARK_MODEL_LIST=$(echo "${TESTS_TO_RUN}")
-}
-
-function profile_for_he_shed()
-{
-
-    local REPORT_MODEL_DIR=$1
-    local RUN_TEST_SH=$2
-    local BENCHMARK_DRIVER_BIN=$3
-    local MODEL=$4
-    local PROFILING_RUN_CNT=$5
-
-    export USE_SCHEDULER=1
-    export PROFILING_MODE=1
-    export EXECUTOR="Dataflow"
-    export ONERT_LOG_ENABLE=1
-
-    rm "exec_time.json" 2>/dev/null
-    for ((j = 1 ; j <= $PROFILING_RUN_CNT ; j++)); do
-        # Save the verbose log of each run
-        LOG_FILE=$REPORT_MODEL_DIR/tflite_profiling_$j.txt
-
-        print_with_dots "Profiling run #$j out of $PROFILING_RUN_CNT"
-
-        $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
-        RET=$?
-        if [[ $RET -ne 0 ]]; then
-            echo "Profiling $MODEL aborted in run#$j... exit code: $RET"xX
-            exit $RET
-        fi
-        echo "finished"
-        # Save the exec_time.json of each run
-        cp "exec_time.json" $REPORT_MODEL_DIR/"exec_time_$j.json"
-    done
-    unset USE_SCHEDULER PROFILING_MODE EXECUTOR ONERT_LOG_ENABLE
-}
-
-function run_with_he_scheduler()
-{
-    local REPORT_MODEL_DIR=$1
-    local RUN_TEST_SH=$2
-    local BENCHMARK_DRIVER_BIN=$3
-    local MODEL=$4
-    local EXECUTOR=$5
-
-    LOG_FILE=$REPORT_MODEL_DIR/tflite_onert_with_he_scheduler_in_$EXECUTOR.txt
-    export EXECUTOR=$EXECUTOR
-    export GRAPH_DOT_DUMP=1
-    export USE_SCHEDULER=1
-    export ONERT_LOG_ENABLE=1
-
-    print_with_dots "TFLite onert $EXECUTOR with HEScheduler"
-
-    RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
-    echo "$RESULT ms"
-
-    mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_$EXECUTOR.dot"
-    unset EXECUTOR GRAPH_DOT_DUMP USE_SCHEDULER ONERT_LOG_ENABLE
-}
-
-function run_onert_with_all_config()
-{
-    local MODEL=$1
-    local REPORT_MODEL_DIR=$2
-    local PAUSE_TIME_IN_SEC=$3
-    local BENCHMARK_DRIVER_BIN=$4
-    local EXECUTORS=$5
-    local BACKEND_LIST=$6
-
-    export USE_NNAPI=1
-
-    # Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
-    #     collect metrics for one unmeasured backend. On the last run metrics for data transfer
-    PROFILING_RUN_CNT=1
-    BACKENDS_TO_USE=
-    for backend in $BACKEND_LIST; do
-        BACKENDS_TO_USE+=$backend';'
-        ((++PROFILING_RUN_CNT))
-    done
-    export BACKENDS=$BACKENDS_TO_USE
-    if [ "$TEST_OP" == "false" ]; then
-        profile_for_he_shed $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $PROFILING_RUN_CNT
-    fi
-
-    for executor in $EXECUTORS; do
-        export EXECUTOR=$executor
-        if [ "$TEST_OP" == "false" ]; then
-            run_with_he_scheduler $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $executor
-        fi
-        for backend in $BACKEND_LIST; do
-            export OP_BACKEND_ALLOPS=$backend
-            run_benchmark_and_print "tflite_onert_"$executor"_executor_$backend" "TFLite onert $executor Executor $backend"\
-                                    $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
-        done
-    done
-    unset USE_NNAPI EXECUTOR OP_BACKEND_ALLOPS BACKENDS
-}
-
-function run_benchmark_test()
-{
-    local LOG_FILE=
-    local RESULT_FILE=
-    local RESULT=
-    local REPORT_MODEL_DIR=
-
-    export COUNT=5
-    export ONERT_LOG_ENABLE=1
-    echo
-    echo "============================================"
-    echo
-    date +'%Y-%m-%d %H:%M:%S %s'
-    echo
-    local i=0
-    for MODEL in $BENCHMARK_MODEL_LIST; do
-
-        STATUS="enabled"
-        if [ "$TEST_OP" == "true" ]; then
-            source $MODEL_TEST_ROOT_PATH/$MODEL/config.sh
-        fi
-
-        # Skip 'disabled' tests
-        if [ $(tr '[:upper:]' '[:lower:]' <<< "$STATUS") == "disabled" ]; then
-            continue
-        fi
-
-        echo "Benchmark test with `basename $BENCHMARK_DRIVER_BIN` & `echo $MODEL`"
-        echo $MODEL >> $BENCHMARK_MODELS_FILE
-
-        REPORT_MODEL_DIR=$BENCHMARK_REPORT_DIR/$MODEL
-        mkdir -p $REPORT_MODEL_DIR
-
-        # TFLite+CPU
-        unset USE_NNAPI
-        run_benchmark_and_print "tflite_cpu" "TFLite CPU" $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
-
-        # run onert
-        if [ "$TEST_OP" == "true" ]; then
-          # Operation test don't need to test each scheduler
-          run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "Linear" "$BACKEND_LIST"
-        else
-          run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "$EXECUTORS" "$BACKEND_LIST"
-        fi
-
-        if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
-            echo ""
-        fi
-        i=$((i+1))
-    done
-    echo "============================================"
-    unset COUNT
-}
-
-if [ ! -e "$BENCHMARK_REPORT_DIR" ]; then
-    mkdir -p $BENCHMARK_REPORT_DIR
-fi
-
-if [ "$TEST_OP" == "true" ]; then
-    get_benchmark_op_list
-fi
-
-rm -rf $BENCHMARK_MODELS_FILE
-
-echo ""
-# print the result AND append to log file
-run_benchmark_test 2>&1 | tee -a onert_benchmarks.txt
-echo ""
diff --git a/tests/scripts/benchmark_nnpkg.sh b/tests/scripts/benchmark_nnpkg.sh
index 52043f458..a180cd9b1 100755
--- a/tests/scripts/benchmark_nnpkg.sh
+++ b/tests/scripts/benchmark_nnpkg.sh
@@ -1,66 +1,58 @@
 #!/bin/bash
 
-usage()
-{
-  echo "$0 <options>"
-  echo "Options"
-  echo "--nnpackage_run : specific nnpackage_run path"
-  echo "--tflite_run : specific tflite_run path"
-  echo "--dir : the dir path of models"
-  echo "--list : the model list"
-  echo "--out  : the file name of out results"
-  echo "--tv   : for tv"
-  exit 1
-}
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $MY_PATH/common.sh
 
-scripts_dir="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-nnfw_dir="${scripts_dir}/../.."
-nnpackage_run="${nnfw_dir}/Product/out/bin/nnpackage_run"
-tflite_run="${nnfw_dir}/Product/out/bin/tflite_run"
+# Caution: DO NOT USE "pipefail"
+#          We should run test all nnpackages
+
+onert_run="$INSTALL_PATH/bin/onert_run"
+tflite_run="$INSTALL_PATH/bin/tflite_run"
 base_name="$(basename $0)"
 base_name="${base_name%.*}"
 outfile="${base_name}_result.txt"
 dir=""
-list="${scripts_dir}/list/${base_name}_model_list.txt"
+list="$INSTALL_PATH/test/list/benchmark_nnpkg_model_list.txt"
 tv_on="false"
 
+function usage()
+{
+  echo "Usage: ${BASH_SOURCE[0]} [OPTIONS]"
+  echo "Options"
+  echo "    --dir=PATH    : the dir path of models"
+  echo "    --list=FILE   : the model list (default: $list)"
+  echo "    --out=FILE    : the file name of out results (default: $outfile)"
+  echo "    --tv          : for tv"
+  echo "    --help        : display this help message and exit"
+  exit 1
+}
+
 for i in "$@"
 do
-case $i in
-  --nnpackage_run=*)
-    nnpackage_run="${i#*=}"
-    ;;
-  --tflite_run=*)
-    tflite_run="${i#*=}"
-    ;;
-  --out=*)
-    outfile="${i#*=}"
-    ;;
-  --dir=*)
-    dir="${i#*=}"
-    ;;
-  --list=*)
-    list="${i#*=}"
-    ;;
-  --tv)
-    tv_on="true"
-    ;;
-  *)
-    ;;
-esac
-shift
+  case $i in
+    --out=*)
+      outfile="${i#*=}"
+      ;;
+    --dir=*)
+      dir="${i#*=}"
+      ;;
+    --list=*)
+      list="${i#*=}"
+      ;;
+    --tv)
+      tv_on="true"
+      ;;
+    --help)
+      usage
+      exit 1
+      ;;
+    *)
+      ;;
+  esac
+  shift
 done
 
-if ! [ -f ${nnpackage_run} ]; then
-  echo "nnpackage_run file does not exists."
-  usage
-fi
-
-if ! [ -f ${tflite_run} ]; then
-  echo "tflite_run file does not exists."
-  usage
-fi
-
 if ! [ -f ${list} ]; then
   echo "model list file does not exists."
   usage
@@ -95,7 +87,7 @@ done
 for i in "${model_lists[@]}"; do
   echo "${i} result" | tee -a ${outfile}
 
-  CMD="${nnpackage_run} -r 10 -m 1 -p 1"
+  CMD="${onert_run} -r 10 -m 1 -p 1"
   if [ "$tv_on" == "true" ]; then
     ${CMD}="${CMD} -g 1"
   fi
@@ -128,7 +120,7 @@ for i in "${model_lists[@]}"; do
 
   echo "" >> ${outfile}
 
-  TFLITE_CMD="LD_LIBRARY_PATH=./Product/out/lib THREAD=3 ${tflite_run} -r 10 -m 1 -p 1" 
+  TFLITE_CMD="THREAD=3 ${tflite_run} -r 10 -m 1 -p 1"
   if [ "$tv_on" == "true" ]; then
     TFLITE_CMD="${TFLITE_CMD} -g 1"
   fi
@@ -143,4 +135,4 @@ for i in "${model_lists[@]}"; do
   sleep 20 # for avoiding cpu overheated
 done # ${model_lists}
 
-${scripts_dir}/merge_result_of_benchmark_nnpkg.py -i . -o . -l ${list}
+python3 $MY_PATH/merge_result_of_benchmark_nnpkg.py -i . -o . -l ${list}
diff --git a/tests/scripts/benchmark_ops.sh b/tests/scripts/benchmark_ops.sh
new file mode 100755
index 000000000..b1b3f69c2
--- /dev/null
+++ b/tests/scripts/benchmark_ops.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+#
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $MY_PATH/common.sh
+
+# Caution: DO NOT USE "pipefail"
+#          We should run test all operators
+
+ONERT_DRIVER_BIN=$INSTALL_PATH/bin/onert_run
+TFLITE_DRIVER_BIN=$INSTALL_PATH/bin/tflite_run
+REPORT_DIR=$ROOT_PATH/report
+BENCHMARK_REPORT_DIR=$REPORT_DIR/benchmark_op
+BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
+MODEL_TEST_ROOT_PATH=$INSTALL_PATH/test/models/tflite
+BENCHMARK_MODEL_LIST=
+BACKEND_LIST="acl_cl acl_neon cpu"
+TEST_DIRS="."
+
+function Usage()
+{
+    echo "Usage: ${BASH_SOURCE[0]} [OPTIONS]"
+    echo ""
+    echo "Options:"
+    echo "      --backends=STRING       Backends to test. (default='$BACKEND_LIST')"
+    echo "      --list=FILE             List file to test. Test all if list option is not passed"
+}
+
+for i in "$@"
+do
+    case $i in
+        -h|--help|help)
+            Usage
+            exit 1
+            ;;
+        --list=*)
+            TEST_LIST_PATH=${i#*=}
+            TEST_DIRS=$(grep -v '#' $TEST_LIST_PATH | tr '\n' ' ' )
+            ;;
+        --backends=*)
+            BACKEND_LIST=${i#*=}
+            ;;
+    esac
+    shift
+done
+
+function get_benchmark_op_list()
+{
+    local TESTS_TO_RUN=""
+
+    pushd $MODEL_TEST_ROOT_PATH > /dev/null
+    for DIR in $TEST_DIRS; do
+        if [ -d "$DIR" ]; then
+            TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | grep -v '^MODELS/' | sort)
+            TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
+        fi
+    done
+    popd > /dev/null
+
+    BENCHMARK_MODEL_LIST=$(echo "${TESTS_TO_RUN}")
+}
+
+function run_benchmark_and_print()
+{
+
+    local WRITE_FILE_NAME=$1
+    local MSG=$2
+    local MODEL=$3
+    local REPORT_MODEL_DIR=$4
+    local DRIVER_BIN=$5
+
+    LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
+    RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
+    print_with_dots $MSG
+    RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
+    echo "$RESULT ms"
+    echo "$MSG $RESULT" > $RESULT_FILE
+}
+
+function run_onert_with_all_config()
+{
+    local MODEL=$1
+    local REPORT_MODEL_DIR=$2
+    local BENCHMARK_DRIVER_BIN=$3
+
+    # Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
+    #     collect metrics for one unmeasured backend. On the last run metrics for data transfer
+    PROFILING_RUN_CNT=1
+    BACKENDS_TO_USE=
+    for backend in $BACKEND_LIST; do
+        BACKENDS_TO_USE+=$backend';'
+        ((++PROFILING_RUN_CNT))
+    done
+    export BACKENDS=$BACKENDS_TO_USE
+    export EXECUTOR="Linear"
+    for backend in $BACKEND_LIST; do
+        export OP_BACKEND_ALLOPS=$backend
+        run_benchmark_and_print "onert_$backend" "ONERT-${backend^^}"\
+                                $MODEL $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN
+    done
+    unset EXECUTOR OP_BACKEND_ALLOPS BACKENDS
+}
+
+function run_benchmark_test()
+{
+    local LOG_FILE=
+    local RESULT_FILE=
+    local RESULT=
+    local REPORT_MODEL_DIR=
+
+    export COUNT=5
+    echo
+    echo "============================================"
+    echo
+    date +'%Y-%m-%d %H:%M:%S %s'
+    echo
+    local i=0
+    for MODEL in $BENCHMARK_MODEL_LIST; do
+
+        STATUS="enabled"
+        source $MODEL_TEST_ROOT_PATH/$MODEL/config.sh
+
+        # Skip 'disabled' tests
+        if [ $(tr '[:upper:]' '[:lower:]' <<< "$STATUS") == "disabled" ]; then
+            continue
+        fi
+
+        echo "Benchmark test `echo $MODEL`"
+        echo $MODEL >> $BENCHMARK_MODELS_FILE
+
+        REPORT_MODEL_DIR=$BENCHMARK_REPORT_DIR/$MODEL
+        mkdir -p $REPORT_MODEL_DIR
+
+        # TFLite+CPU
+        run_benchmark_and_print "tflite_cpu" "TFLite-CPU" $MODEL $REPORT_MODEL_DIR $TFLITE_DRIVER_BIN
+
+        # run onert
+        # Operation test don't need to test each scheduler
+        run_onert_with_all_config $MODEL $REPORT_MODEL_DIR $ONERT_DRIVER_BIN
+
+        if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
+            echo ""
+        fi
+        i=$((i+1))
+    done
+    echo "============================================"
+    unset COUNT
+}
+
+if [ ! -e "$BENCHMARK_REPORT_DIR" ]; then
+    mkdir -p $BENCHMARK_REPORT_DIR
+fi
+
+get_benchmark_op_list
+
+rm -rf $BENCHMARK_MODELS_FILE
+
+# Model download server setting
+prepare_test_model
+
+echo ""
+# print the result AND append to log file
+run_benchmark_test 2>&1 | tee -a $REPORT_DIR/onert_benchmarks.txt
+echo ""
+
+# Make json file.
+# functions to fill json with benchmark results
+source $MY_PATH/print_to_json.sh
+print_to_json $BENCHMARK_REPORT_DIR $REPORT_DIR "benchmark_op_result.json"
diff --git a/tests/scripts/command/nnpkg-test b/tests/scripts/command/nnpkg-test
index a1176d153..7f8e0f8ff 100644
--- a/tests/scripts/command/nnpkg-test
+++ b/tests/scripts/command/nnpkg-test
@@ -9,9 +9,10 @@ command_exists() {
 progname=$(basename "${BASH_SOURCE[0]}")
 indir="."
 outdir="."
-nnpkg_run=${nnpkg_run:-"nnpackage_run"}
+nnpkg_run=${nnpkg_run:-"onert_run"}
 difftool=${difftool:-"h5diff"}
 delete_dumped_on_failure=0
+verbose_diff=0
 
 usage() {
   echo "Usage: $0 $progname [options] nnpackage_test"
@@ -27,10 +28,11 @@ usage() {
   echo "    -o   set output directory (default=$outdir)"
   echo "    -d   delete dumped file on failure."
   echo "         (dumped file are always deleted on success) (default=$delete_dumped_on_failure)"
+  echo "    -v   verbose result diff (default=$verbose_diff)"
   echo ""
   echo "Environment variables:"
-  echo "   nnpackage_run    path to nnpackage_run (default=nnpackage_run)"
-  echo "   difftool         path to i5diff or h5diff (default=h5diff)"
+  echo "   onert_run    path to onert_run (default=onert_run)"
+  echo "   difftool     path to i5diff or h5diff (default=h5diff)"
   echo ""
   echo "Examples:"
   echo "    $0 $progname Add_000                => run $indir/Add_000 and check output"
@@ -43,12 +45,13 @@ if [ $# -eq 0 ]; then
   exit 1
 fi
 
-while getopts "hdi:o:" OPTION; do
+while getopts "hdi:o:v" OPTION; do
 case "${OPTION}" in
     h) usage;;
     d) delete_dumped_on_failure=1;;
     i) indir=$OPTARG;;
     o) outdir=$OPTARG;;
+    v) verbose_diff=1;;
     ?) exit 1;;
 esac
 done
@@ -110,8 +113,8 @@ echo -n "[Compare] $nnpkg "
 test_fail()
 {
   echo -e "\tFail"
-  [ $delete_dumped_on_failure ] && rm "$dumped"
-  cat "$dumped.log"
+  [ $delete_dumped_on_failure -eq 1 ] && rm "$dumped"
+  [ $verbose_diff -eq 1 ] && cat "$dumped.log"
   rm "$dumped.log"
   exit 3
 }
@@ -119,7 +122,7 @@ test_fail()
 test_pass()
 {
   echo -e "\tPass"
-  cat "$dumped.log"
+  [ $verbose_diff -eq 1 ] && cat "$dumped.log"
   rm "$dumped" "$dumped.log"
 }
 
diff --git a/tests/scripts/command/prepare-model b/tests/scripts/command/prepare-model
index 3feb7a799..0a418dc77 100644
--- a/tests/scripts/command/prepare-model
+++ b/tests/scripts/command/prepare-model
@@ -14,19 +14,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
-
+MODEL_ROOT_DIR=$DRIVER_PATH/models
 MD5_CHECK="on"
-DOWNLOAD_MODEL="all"
 
 function Usage()
 {
     echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
     echo ""
     echo "Options:"
-    echo "      --ignoremd5                         Ignore MD5 check when download model files"
-    echo "      --model=(all|nnpackage|tflite)      Download test model (default=all)"
+    echo "          --ignoremd5     Ignore MD5 check when download model files"
+    echo "          --cachedir=PATH Set downloaded resouces cache directory (default: $CACHE_PATH)"
+    echo "      -h, --help          Display this help message and exit"
 }
 
 for i in "$@"
@@ -39,8 +37,8 @@ do
         --ignoremd5)
             MD5_CHECK="off"
             ;;
-        --model=*)
-            DOWNLOAD_MODEL=${i#*=}
+        --cachedir=*)
+            CACHE_PATH=${i#*=}
             ;;
         *)
             echo "Unknown option: $i"
@@ -50,15 +48,114 @@ do
     shift
 done
 
-if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "tflite" ]]; then
-    # Download tflite models
-    $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
-fi
+function find_tests()
+{
+    local TEST_DIRS="$@"
+    local TESTS_TO_DOWNLOAD=""
+
+    if [[ $# -eq 0 ]]; then
+        TEST_DIRS="."
+    fi
+
+    shift $#
+
+    pushd $MODEL_ROOT_DIR > /dev/null
+    for DIR in $TEST_DIRS; do
+        if [ -d "$DIR" ]; then
+            TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | sort)
+            TESTS_TO_DOWNLOAD="$TESTS_TO_DOWNLOAD $TESTS_FOUND"
+        else
+            echo "Test $DIR was not found. This test is not added." 1>&2
+        fi
+    done
+    popd > /dev/null
+
+    echo $TESTS_TO_DOWNLOAD
+}
+
+function need_download()
+{
+    LOCAL_PATH=$1
+    REMOTE_URL=$2
+    if [ ! -e $LOCAL_PATH ]; then
+        return 0;
+    fi
+    # Ignore checking md5 in cache
+    # TODO Use "--md5" option only and remove IGNORE_MD5 environment variable
+    if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
+        return 1
+    fi
+    if [ "$MD5_CHECK" = "off" ]; then
+        return 1
+    fi
+
+    LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
+    REMOTE_HASH=$(curl --netrc-optional -kLsS $REMOTE_URL | md5sum  | awk '{ print $1 }')
+    # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
+    if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
+        echo "Downloaded file is outdated or incomplete."
+        return 0
+    fi
+    return 1
+}
 
-if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "nnpackage" ]]; then
-    # Download nnpackage model
-    NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnfw_api_gtest/
-    NNPACKAGE_CACHE_DIR=$INSTALL_DIR/unittest_standalone/nnfw_api_gtest_models/
-    $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK \
-        --configdir=$NNPACKAGE_CONFIG_DIR --cachedir=$NNPACKAGE_CACHE_DIR
+function download_tests()
+{
+    SELECTED_TESTS=$@
+
+    echo ""
+    echo "Downloading tests:"
+    echo "======================"
+    for TEST_NAME in $SELECTED_TESTS; do
+        echo $TEST_NAME
+    done
+    echo "======================"
+
+    if [ ! -e $CACHE_PATH ]; then
+        mkdir -p $CACHE_PATH
+    fi
+
+    i=0
+    for TEST_NAME in $SELECTED_TESTS; do
+        # Test configure initialization
+        ((i++))
+        MODELFILE_URL_BASE=""
+        MODELFILE_NAME=""
+        source $MODEL_ROOT_DIR/$TEST_NAME/config.sh
+
+        MODELFILE=$CACHE_PATH/$MODELFILE_NAME
+        MODELFILE_URL="$MODELFILE_URL_BASE/$MODELFILE_NAME"
+        if [ -n  "$MODELFILE_SERVER" ]; then
+            MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
+        fi
+
+        # Download model file
+        # Download unless we have it in cache (Also check md5sum)
+        if need_download "$MODELFILE" "$MODELFILE_URL"; then
+            echo ""
+            echo "Download test file for $TEST_NAME"
+            echo "======================"
+
+            rm -f $MODELFILE # Remove invalid file if exists
+            pushd $CACHE_PATH > /dev/null
+            echo "Download $MODELFILE_URL"
+            curl --netrc-optional -kLOsS $MODELFILE_URL
+            if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+                unzip -o $MODELFILE_NAME -d ${MODELFILE_NAME%.zip}
+            fi
+            popd > /dev/null
+        fi
+
+    done
+}
+
+# Check MODELFILE_SERVER
+if [[ -z "$MODELFILE_SERVER" ]]; then
+    echo "Fail to download models: Please set MODELFILE_SERVER to download model"
+    exit 1
 fi
+echo "Download from $MODELFILE_SERVER"
+
+# Download tflite model
+TESTS_TO_DOWNLOAD=$(find_tests tflite)
+download_tests $TESTS_TO_DOWNLOAD
diff --git a/tests/scripts/command/unittest b/tests/scripts/command/unittest
index 135ebea7b..9eb66dce4 100644
--- a/tests/scripts/command/unittest
+++ b/tests/scripts/command/unittest
@@ -14,10 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
 UNITTEST_REPORT_DIR=
-UNITTEST_TEST_DIR=$INSTALL_DIR/unittest
+UNITTEST_TEST_DIR=$INSTALL_PATH/unittest
 UNITTEST_RESULT=0
 UNITTEST_RUN_ALL=""
 
diff --git a/tests/scripts/command/verify-tflite b/tests/scripts/command/verify-tflite
index 48863ff12..c621cc432 100644
--- a/tests/scripts/command/verify-tflite
+++ b/tests/scripts/command/verify-tflite
@@ -14,11 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
-
-MD5_CHECK="on"
-TFLITE_LOADER="nnapi"
 REPORT_DIR="report"
 TEST_LIST_FILE=
 
@@ -27,10 +22,9 @@ function Usage()
     echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
     echo ""
     echo "Options:"
-    echo "      --ignoremd5             Ignore MD5 check when download model files"
-    echo "      --api=(nnapi|loader)    TFLite model file loading API (default=$TFLITE_LOADER)"
     echo "      --reportdir=PATH        Path to write report (default=$REPORT_DIR)"
     echo "      --list=FILE             List file to test. Test all if list option is not passed"
+    echo "      --cachedir=PATH         Set downloaded resouces cache directory (default: $CACHE_PATH)"
 }
 
 for i in "$@"
@@ -40,18 +34,15 @@ do
             Usage
             exit 1
             ;;
-        --ignoremd5)
-            MD5_CHECK="off"
-            ;;
-        --api=*)
-            TFLITE_LOADER=${i#*=}
-            ;;
         --reportdir=*)
             REPORT_DIR=${i#*=}
             ;;
         --list=*)
             TEST_LIST_FILE=${i#*=}
             ;;
+        --cachedir=*)
+            CACHE_PATH=${i#*=}
+            ;;
         *)
             echo "Unknown option: $i"
             exit 1
@@ -70,23 +61,13 @@ fi
 
 TEST_RESULT=0
 TAP_NAME=verification_test.tap
-TEST_NAME="Verification"
-TEST_DRIVER=
-
-if [[ $TFLITE_LOADER == "nnapi" ]]; then
-    TEST_NAME="NNAPI Verification"
-    TEST_DRIVER=nnapi_test
-elif [[ $TFLITE_LOADER == "loader" ]]; then
-    TEST_NAME="Loader Verification"
-    TEST_DRIVER=tflite_loader_test_tool
-else
-    Usage
-    exit 1
-fi
+TEST_NAME="Loader Verification"
+TEST_DRIVER=tflite_comparator
 
-$INSTALL_DIR/test/models/run_test.sh --driverbin=$TEST_DRIVER \
+$INSTALL_PATH/test/models/run_test.sh --driverbin=$TEST_DRIVER \
     --reportdir=$REPORT_DIR \
     --tapname=$TAP_NAME \
+    --cachedir=$CACHE_PATH \
     ${MODELLIST:-} > $REPORT_DIR/verification_test.log 2>&1
 TEST_RESULT=$?
 
diff --git a/tests/scripts/common.sh b/tests/scripts/common.sh
index a2a261a9c..b0c44aef3 100755
--- a/tests/scripts/common.sh
+++ b/tests/scripts/common.sh
@@ -15,6 +15,22 @@
 # limitations under the License.
 
 MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_PATH="$(cd ${MY_PATH}/../../ && pwd)"
+
+# Install path on CI
+INSTALL_PATH=$ROOT_PATH/Product/out
+TEST_CACHE_PATH=$INSTALL_PATH/test/cache
+
+function prepare_test_model()
+{
+  # Model download server setting
+  if [[ -z "${MODELFILE_SERVER}" ]]; then
+    echo "Model file server is not set. Try to use default setting."
+  else
+    echo "Model Server: ${MODELFILE_SERVER}"
+  fi
+  $INSTALL_PATH/test/onert-test prepare-model --cachedir=$TEST_CACHE_PATH
+}
 
 function get_result_of_benchmark_test()
 {
@@ -23,7 +39,7 @@ function get_result_of_benchmark_test()
     local LOG_FILE=$3
 
     local RET=0
-    $MY_PATH/models/run_test.sh --driverbin="$DRIVER_BIN  -r 5 -w 3" $MODEL > $LOG_FILE 2>&1
+    $INSTALL_PATH/test/models/run_test.sh --driverbin="$DRIVER_BIN -r 5 -w 3" --cachedir=$TEST_CACHE_PATH $MODEL > $LOG_FILE 2>&1
     RET=$?
     if [[ $RET -ne 0 ]]; then
         echo "Testing $MODEL aborted... exit code: $RET"
@@ -34,15 +50,6 @@ function get_result_of_benchmark_test()
     echo "$RESULT"
 }
 
-function print_result_of_benchmark_test()
-{
-    local NAME=$1
-    local RESULT=$2
-    local RESULT_FILE=$3
-
-    echo "$NAME $RESULT" > $RESULT_FILE
-}
-
 function print_with_dots()
 {
     PRINT_WIDTH=45
@@ -52,23 +59,3 @@ function print_with_dots()
     printf '%s' "$MSG"
     printf '%*.*s ' 0 $padlength "$pad"
 }
-
-
-function run_benchmark_and_print()
-{
-    local WRITE_FILE_NAME=$1
-    local MSG=$2
-    local MODEL=$3
-    local REPORT_MODEL_DIR=$4
-    local PAUSE_TIME_IN_SEC=$5
-    local DRIVER_BIN=$6
-    local BENCHMARK_RUN_TEST_SH=$7
-
-    LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
-    RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
-    print_with_dots $MSG
-    RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
-    echo "$RESULT ms"
-    print_result_of_benchmark_test "$MSG" "$RESULT" $RESULT_FILE
-    sleep $PAUSE_TIME_IN_SEC
-}
diff --git a/tests/scripts/common_android.sh b/tests/scripts/common_android.sh
new file mode 100644
index 000000000..66601aea3
--- /dev/null
+++ b/tests/scripts/common_android.sh
@@ -0,0 +1,76 @@
+#!/system/bin/sh
+#
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+MY_PATH="$( cd "$( dirname "$0" )" && pwd )"
+
+SHELL_CMD=/system/bin/sh
+
+function get_result_of_benchmark_test()
+{
+    local DRIVER_BIN=$1
+    local MODEL=$2
+    local LOG_FILE=$3
+
+    local RET=0
+    $SHELL_CMD $MY_PATH/models/run_test_android.sh --driverbin="$DRIVER_BIN  -r 5 -w 3" $MODEL > $LOG_FILE 2>&1
+    RET=$?
+    if [[ $RET -ne 0 ]]; then
+        echo "Testing $MODEL aborted... exit code: $RET"
+        exit $RET
+    fi
+
+    local RESULT=`grep -E '^- MEAN ' $LOG_FILE | awk '{print $4}'`
+    echo "$RESULT"
+}
+
+function print_result_of_benchmark_test()
+{
+    local NAME=$1
+    local RESULT=$2
+    local RESULT_FILE=$3
+
+    echo "$NAME $RESULT" > $RESULT_FILE
+}
+
+function print_with_dots()
+{
+    PRINT_WIDTH=45
+    local MSG="$@"
+    pad="............................................."
+    padlength=$((PRINT_WIDTH- ${#MSG}))
+    printf '%s' "$MSG"
+    printf '%*.*s ' 0 $padlength "$pad"
+}
+
+
+function run_benchmark_and_print()
+{
+    local WRITE_FILE_NAME=$1
+    local MSG=$2
+    local MODEL=$3
+    local REPORT_MODEL_DIR=$4
+    local PAUSE_TIME_IN_SEC=$5
+    local DRIVER_BIN=$6
+    local BENCHMARK_RUN_TEST_SH=$7
+
+    LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
+    RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
+    print_with_dots $MSG
+    RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
+    echo "$RESULT ms"
+    print_result_of_benchmark_test "$MSG" "$RESULT" $RESULT_FILE
+    sleep $PAUSE_TIME_IN_SEC
+}
diff --git a/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt b/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt
deleted file mode 100644
index dd8d3b710..000000000
--- a/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-custom
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt b/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt
deleted file mode 100644
index b58d39ab7..000000000
--- a/tests/scripts/list/frameworktest_list.aarch64.acl_neon.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-floor
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-one_hot
-pack
-pad
-reduce_max
-reduce_mean
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_depth
-sqrt
-squeeze
-strided_slice
-sub
-tanh
-transpose
-transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt b/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt
deleted file mode 100644
index dd8d3b710..000000000
--- a/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-custom
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt b/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt
deleted file mode 100644
index 9df071bbb..000000000
--- a/tests/scripts/list/frameworktest_list.armv7l.acl_neon.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-one_hot
-pack
-pad
-reduce_max
-reduce_mean
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_depth
-sqrt
-squeeze
-strided_slice
-sub
-tanh
-transpose
-transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.armv7l.cpu.txt b/tests/scripts/list/frameworktest_list.armv7l.cpu.txt
deleted file mode 100644
index cf3297795..000000000
--- a/tests/scripts/list/frameworktest_list.armv7l.cpu.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-MODELS/mobilenet_quant8
-abs
-add
-average_pool_2d
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-equal
-exp
-fullyconnected
-greater
-greater_equal
-less
-less_equal
-logistic
-max
-max_pool_2d
-mean
-min
-mul
-neg
-not_equal
-one_hot
-pack
-reduce_max
-reduce_sum
-reshape/reshape1
-rsqrt
-select
-shape
-sin
-slice
-strided_slice
-softmax
-squeeze
-sub
-tanh
-tile
-transpose
-zeros_like
diff --git a/tests/scripts/list/frameworktest_list.noarch.interp.txt b/tests/scripts/list/frameworktest_list.noarch.interp.txt
deleted file mode 100644
index 3555ee28e..000000000
--- a/tests/scripts/list/frameworktest_list.noarch.interp.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected/fc1
-logistic
-max_pool_2d
-pad
-relu
-relu6
-reshape/reshape1
-softmax
-tanh
diff --git a/tests/scripts/list/frameworktest_list.x86_64.cpu.txt b/tests/scripts/list/frameworktest_list.x86_64.cpu.txt
deleted file mode 100644
index 5750ec4de..000000000
--- a/tests/scripts/list/frameworktest_list.x86_64.cpu.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-MODELS/mobilenet_quant8
-add
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected
-logistic
-max_pool_2d
-mean
-reduce_max
-reduce_sum
-reshape/reshape1
-select
-softmax
-squeeze
-tile
-transpose
-zeros_like
diff --git a/tests/scripts/list/nnpkg_test_list.noarch.interp b/tests/scripts/list/nnpkg_test_list.noarch.interp
deleted file mode 100644
index cc4e034ba..000000000
--- a/tests/scripts/list/nnpkg_test_list.noarch.interp
+++ /dev/null
@@ -1,42 +0,0 @@
-Add_000.opt
-#ArgMax_000.opt
-#ArgMax_001.opt
-#ArgMax_002.opt
-#ArgMax_003.opt
-AveragePool2D_000.opt
-#AveragePool2D_U8_000.opt
-Concatenation_000.opt
-Conv2D_000.opt
-Conv2D_001.opt
-Conv2D_002.opt
-Conv2D_004.opt
-DepthwiseConv2D_000.opt
-DepthwiseConv2D_002.opt
-FullyConnected_000.opt
-FullyConnected_001.opt
-FullyConnected_003.opt
-#L2Normalize_U8_000.opt
-Logistic_000.opt
-#Logistic_U8_000.opt
-MaxPool2D_000.opt
-#Mean_000.opt
-#Mean_001.opt
-Mul_000.opt
-#Net_TConv_BN_000.opt
-#Net_UnpackAdd_001.opt
-Pad_000.opt
-Quantization_000.opt
-Reshape_000.opt
-Reshape_001.opt
-Reshape_002.opt
-Softmax_000.opt
-#SpaceToDepth_U8_000.opt
-#Split_000.opt
-#Tanh_U8_000.opt
-#TransposeConv_000.opt
-#TransposeConv_001.opt
-#Transpose_000.opt
-#Unpack_000.opt
-#Unpack_001.opt
-#Unpack_002.opt
-#Unpack_003.opt
diff --git a/tests/scripts/list/tflite_comparator.aarch64.acl_cl.list b/tests/scripts/list/tflite_comparator.aarch64.acl_cl.list
new file mode 100644
index 000000000..d856e97a4
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.aarch64.acl_cl.list
@@ -0,0 +1,45 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+embedding_lookup
+exp
+floor
+fullyconnected/fc1
+fullyconnected/matmul2x2
+fullyconnected/weights_as_input
+gather
+hashtable_lookup
+l2_normalization
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_comparator.aarch64.acl_neon.list b/tests/scripts/list/tflite_comparator.aarch64.acl_neon.list
new file mode 100644
index 000000000..9ee9e6aa2
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.aarch64.acl_neon.list
@@ -0,0 +1,40 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+embedding_lookup
+floor
+gather
+hashtable_lookup
+l2_normalization
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+one_hot
+pack
+pad
+reduce_max
+reduce_mean
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_depth
+sqrt
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/frameworktest_list.aarch64.cpu.txt b/tests/scripts/list/tflite_comparator.aarch64.cpu.list
index 4b4b7fb24..4b4b7fb24 100644
--- a/tests/scripts/list/frameworktest_list.aarch64.cpu.txt
+++ b/tests/scripts/list/tflite_comparator.aarch64.cpu.list
diff --git a/tests/scripts/list/tflite_comparator.armv7l.acl_cl.list b/tests/scripts/list/tflite_comparator.armv7l.acl_cl.list
new file mode 100644
index 000000000..d856e97a4
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.armv7l.acl_cl.list
@@ -0,0 +1,45 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+batch_to_space_nd2
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+embedding_lookup
+exp
+floor
+fullyconnected/fc1
+fullyconnected/matmul2x2
+fullyconnected/weights_as_input
+gather
+hashtable_lookup
+l2_normalization
+max
+max_pool_2d
+mean
+min
+mul
+neg
+pack
+pad
+reduce_max
+reduce_mean
+reduce_sum/float
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_batch_nd2
+space_to_depth
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_comparator.armv7l.acl_neon.list b/tests/scripts/list/tflite_comparator.armv7l.acl_neon.list
new file mode 100644
index 000000000..3c8166b8c
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.armv7l.acl_neon.list
@@ -0,0 +1,41 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+embedding_lookup
+floor
+fullyconnected
+gather
+hashtable_lookup
+l2_normalization
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+one_hot
+pack
+pad
+reduce_max
+reduce_mean
+relu
+relu6
+reshape
+resize_bilinear
+rsqrt
+slice
+softmax
+space_to_depth
+sqrt
+squeeze
+strided_slice
+sub
+tanh
+transpose
+transpose_conv
diff --git a/tests/scripts/list/tflite_comparator.armv7l.cpu.list b/tests/scripts/list/tflite_comparator.armv7l.cpu.list
new file mode 100644
index 000000000..aedab6333
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.armv7l.cpu.list
@@ -0,0 +1,43 @@
+MODELS/inception_module
+MODELS/mobilenet
+abs
+add
+average_pool_2d
+cast
+concat
+conv_2d
+depthwise_conv_2d
+div
+equal
+exp
+fullyconnected
+greater
+greater_equal
+less
+less_equal
+logistic
+max
+max_pool_2d
+mean
+min
+mul
+neg
+not_equal
+one_hot
+pack
+reduce_max
+reduce_sum
+reshape/reshape1
+rsqrt
+select
+shape
+sin
+slice
+strided_slice
+softmax
+squeeze
+sub
+tanh
+tile
+transpose
+zeros_like
diff --git a/tests/scripts/list/tflite_comparator.x86_64.cpu.list b/tests/scripts/list/tflite_comparator.x86_64.cpu.list
new file mode 100644
index 000000000..201a9de96
--- /dev/null
+++ b/tests/scripts/list/tflite_comparator.x86_64.cpu.list
@@ -0,0 +1,20 @@
+MODELS/inception_module
+MODELS/mobilenet
+add
+average_pool_2d
+concat
+conv_2d
+depthwise_conv_2d
+fullyconnected
+logistic
+max_pool_2d
+mean
+reduce_max
+reduce_sum
+reshape/reshape1
+select
+softmax
+squeeze
+tile
+transpose
+zeros_like
diff --git a/tests/scripts/list/tflite_loader_list.aarch64.txt b/tests/scripts/list/tflite_loader_list.aarch64.txt
deleted file mode 100644
index e04d89d3f..000000000
--- a/tests/scripts/list/tflite_loader_list.aarch64.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-concat
-conv_2d/convolution1
-depthwise_conv_2d
-div
-exp
-fullyconnected/fc1
-logistic
-max
-max_pool_2d/maxpool1
-mean
-min
-mul
-pack
-pad
-reduce_max
-reduce_sum/float
-relu
-relu6
-reshape/reshape1
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-sqrt
-squeeze
-sub
-tanh
-transpose
-transpose_conv
diff --git a/tests/scripts/list/tflite_loader_list.armv7l.txt b/tests/scripts/list/tflite_loader_list.armv7l.txt
deleted file mode 100644
index e04d89d3f..000000000
--- a/tests/scripts/list/tflite_loader_list.armv7l.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-concat
-conv_2d/convolution1
-depthwise_conv_2d
-div
-exp
-fullyconnected/fc1
-logistic
-max
-max_pool_2d/maxpool1
-mean
-min
-mul
-pack
-pad
-reduce_max
-reduce_sum/float
-relu
-relu6
-reshape/reshape1
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-sqrt
-squeeze
-sub
-tanh
-transpose
-transpose_conv
diff --git a/tests/scripts/merge_result_of_benchmark_nnpkg.py b/tests/scripts/merge_result_of_benchmark_nnpkg.py
index 195fd378c..7e69df0eb 100755
--- a/tests/scripts/merge_result_of_benchmark_nnpkg.py
+++ b/tests/scripts/merge_result_of_benchmark_nnpkg.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
 #
diff --git a/tests/scripts/models/nnfw_api_gtest/add/config.sh b/tests/scripts/models/nnfw_api_gtest/add/config.sh
deleted file mode 100644
index e6e8677e7..000000000
--- a/tests/scripts/models/nnfw_api_gtest/add/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="add.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh b/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh
deleted file mode 100644
index 92c903274..000000000
--- a/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="add_invalid_manifest.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh b/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh
deleted file mode 100644
index 0d697a2a0..000000000
--- a/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="add_no_manifest.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/add_unspecified_rank_inputs/config.sh b/tests/scripts/models/nnfw_api_gtest/add_unspecified_rank_inputs/config.sh
deleted file mode 100644
index 2cc30f915..000000000
--- a/tests/scripts/models/nnfw_api_gtest/add_unspecified_rank_inputs/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="add_unspecified_rank_inputs.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/dynamic_tensor_reshape/config.sh b/tests/scripts/models/nnfw_api_gtest/dynamic_tensor_reshape/config.sh
deleted file mode 100644
index 8496509cb..000000000
--- a/tests/scripts/models/nnfw_api_gtest/dynamic_tensor_reshape/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="dynamic_tensor_reshape.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh b/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh
deleted file mode 100644
index 3b8506c6f..000000000
--- a/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="if_dynamic.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/input_reshaping_add/config.sh b/tests/scripts/models/nnfw_api_gtest/input_reshaping_add/config.sh
deleted file mode 100644
index 4e5a636ee..000000000
--- a/tests/scripts/models/nnfw_api_gtest/input_reshaping_add/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="input_reshaping_add.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/neg/config.sh b/tests/scripts/models/nnfw_api_gtest/neg/config.sh
deleted file mode 100644
index fdf093686..000000000
--- a/tests/scripts/models/nnfw_api_gtest/neg/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="neg.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/unknown_dim_input_concat/config.sh b/tests/scripts/models/nnfw_api_gtest/unknown_dim_input_concat/config.sh
deleted file mode 100644
index a3cab4787..000000000
--- a/tests/scripts/models/nnfw_api_gtest/unknown_dim_input_concat/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="unknown_dim_input_concat.zip"
diff --git a/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh b/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh
deleted file mode 100644
index ff14d4e05..000000000
--- a/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="while_dynamic.zip"
diff --git a/tests/scripts/models/run_test.sh b/tests/scripts/models/run_test.sh
index 32a277859..52ac8cd10 100755
--- a/tests/scripts/models/run_test.sh
+++ b/tests/scripts/models/run_test.sh
@@ -29,52 +29,20 @@ function command_exists() {
 
 function Usage()
 {
-    echo "Usage: ./$0 --driverbin={such as tflite_run} {tests to test or empty for all of tests}"
-    echo "Usage: ./$0 --driverbin=Product/out/bin/tflite_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
+    echo "Usage: ${BASH_SOURCE[0]} [OPTIONS] {tests to test or empty for all of tests}"
+    echo "(Ex) : ${BASH_SOURCE[0]} --driverbin=Product/out/bin/onert_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
     echo ""
-    echo "--download            - (default=on) Download model files"
-    echo "--run                 - (default=on) Test model files"
-    echo "--driverbin           - (default=../../Product/out/bin/tflite_run) Runner for runnning model tests"
-    echo "--reportdir           - (default=report) Directory to place tap files"
-    echo "--tapname             - (default=framework_test.tap) File name to be written for tap"
-    echo "--md5                 - (default=on) MD5 check when download model files"
-    echo "--configdir           - (default=$TEST_ROOT_PATH) Config directory to download and test model"
-    echo "--cachedir            - (default=$CACHE_ROOT_PATH) Directory to download model"
+    echo "--driverbin           : Runner for runnning model tests"
+    echo "--reportdir           : (default=$REPORT_DIR) Directory to place tap files"
+    echo "--tapname             : (default=$TAP_NAME) File name to be written for tap"
+    echo "--configdir           : (default=$TEST_ROOT_PATH) Config directory to download and test model"
+    echo "--cachedir            : (default=$CACHE_ROOT_PATH) Directory to download model"
     echo ""
 }
 
-function need_download()
-{
-    LOCAL_PATH=$1
-    REMOTE_URL=$2
-    if [ ! -e $LOCAL_PATH ]; then
-        return 0;
-    fi
-    # Ignore checking md5 in cache
-    # TODO Use "--md5" option only and remove IGNORE_MD5 environment variable
-    if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
-        return 1
-    fi
-    if [ "$MD5_CHECK" = "off" ]; then
-        return 1
-    fi
-
-    LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
-    REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum  | awk '{ print $1 }')
-    # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
-    if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
-        echo "Downloaded file is outdated or incomplete."
-        return 0
-    fi
-    return 1
-}
-
-DRIVER_BIN=""
+DRIVER_BIN="$NNFW_HOME/Product/out/bin/onert_run"
 TAP_NAME="framework_test.tap"
 TEST_LIST=()
-DOWNLOAD_MODEL="on"
-RUN_TEST="on"
-MD5_CHECK="on"
 
 # Support environment variable setting for mirror server
 FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
@@ -95,15 +63,6 @@ do
         --tapname=*)
             TAP_NAME=${i#*=}
             ;;
-        --download=*)
-            DOWNLOAD_MODE=${i#*=}
-            ;;
-        --md5=*)
-            MD5_CHECK=${i#*=}
-            ;;
-        --run=*)
-            RUN_TEST=${i#*=}
-            ;;
         --configdir=*)
             TEST_ROOT_PATH=${i#*=}
             ;;
@@ -121,17 +80,13 @@ if [[ ${#TEST_LIST[@]} -eq 0 ]]; then
     RUN_DISABLED="false"
 fi
 
-if [ ! -n "$DRIVER_BIN" ]; then
-    DRIVER_BIN="$NNFW_HOME/Product/out/bin/tflite_run"
-fi
-
 if [ ! -d "$TEST_ROOT_PATH" ]; then
     echo "Cannot find config directory for test: please set proper configdir"
     exit 1
 fi
 
 # Check test driver setting
-if ! command_exists $DRIVER_BIN && [ "$RUN_TEST" = "on" ]; then
+if ! command_exists $DRIVER_BIN ; then
     echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
     exit 1
 fi
@@ -155,7 +110,7 @@ run_tests()
         # Test configure initialization
         ((i++))
         STATUS="enabled"
-        MODELFILE_SERVER_PATH=""
+        MODELFILE_URL_BASE=""
         MODELFILE_NAME=""
         source $TEST_ROOT_PATH/$TEST_NAME/config.sh
 
@@ -168,13 +123,12 @@ run_tests()
             continue
         fi
 
-        TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
-        MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
+        MODELFILE=$CACHE_ROOT_PATH/$MODELFILE_NAME
 
         # Find model file for downloaded by zip
         if [ "${MODELFILE_NAME##*.}" = "zip" ]; then
-            pushd $TEST_CACHE_PATH
-            MODELFILE=$TEST_CACHE_PATH/$(ls *.tflite)
+            pushd $CACHE_ROOT_PATH
+            MODELFILE=$CACHE_ROOT_PATH/$(ls ${MODELFILE_NAME%.zip}/*.tflite)
             popd
         fi
 
@@ -195,57 +149,6 @@ run_tests()
     return $TOTAL_RESULT
 }
 
-download_tests()
-{
-    SELECTED_TESTS=$@
-
-    echo ""
-    echo "Downloading tests:"
-    echo "======================"
-    for TEST_NAME in $SELECTED_TESTS; do
-        echo $TEST_NAME
-    done
-    echo "======================"
-
-    i=0
-    for TEST_NAME in $SELECTED_TESTS; do
-        # Test configure initialization
-        ((i++))
-        MODELFILE_SERVER_PATH=""
-        MODELFILE_NAME=""
-        source $TEST_ROOT_PATH/$TEST_NAME/config.sh
-
-        TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
-        MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
-        MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
-        if [ -n  "$FIXED_MODELFILE_SERVER" ]; then
-            MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
-        fi
-
-        # Download model file
-        if [ ! -e $TEST_CACHE_PATH ]; then
-            mkdir -p $TEST_CACHE_PATH
-        fi
-
-        # Download unless we have it in cache (Also check md5sum)
-        if need_download "$MODELFILE" "$MODELFILE_URL"; then
-            echo ""
-            echo "Download test file for $TEST_NAME"
-            echo "======================"
-
-            rm -f $MODELFILE # Remove invalid file if exists
-            pushd $TEST_CACHE_PATH
-            wget -nv $MODELFILE_URL
-            if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
-                unzip -o $MODELFILE_NAME
-            fi
-            popd
-        fi
-
-    done
-}
-
-
 find_tests()
 {
     local TEST_DIRS="$@"
@@ -273,12 +176,6 @@ find_tests()
 
 mkdir -p $REPORT_DIR
 TESTS_TO_RUN=$(find_tests ${TEST_LIST[@]})
+run_tests $TESTS_TO_RUN
 
-if [ "$DOWNLOAD_MODEL" = "on" ]; then
-    download_tests $TESTS_TO_RUN
-fi
-
-if [ "$RUN_TEST" = "on" ]; then
-    run_tests $TESTS_TO_RUN
-fi
 exit $?
diff --git a/tests/scripts/models/run_test_android.sh b/tests/scripts/models/run_test_android.sh
new file mode 100644
index 000000000..74b5cdd46
--- /dev/null
+++ b/tests/scripts/models/run_test_android.sh
@@ -0,0 +1,186 @@
+#!/system/bin/sh
+#
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+MY_PATH="$( cd "$( dirname "$0" )" && pwd )"
+NNFW_HOME="$(dirname $(dirname $(dirname ${MY_PATH})))"
+
+CACHE_ROOT_PATH=$MY_PATH/"cache"
+TEST_ROOT_PATH=$MY_PATH/"tflite"
+REPORT_DIR="report"
+
+RUN_DISABLED="true"
+
+function Usage()
+{
+    echo "Usage: ./$0 --driverbin={such as tflite_run} {tests to test or empty for all of tests}"
+    echo "Usage: ./$0 --driverbin=Product/out/bin/tflite_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
+    echo ""
+    echo "--run                 - (default=on) Test model files"
+    echo "--driverbin           - (default=../../Product/out/bin/tflite_run) Runner for runnning model tests"
+    echo "--reportdir           - (default=report) Directory to place tap files"
+    echo "--tapname             - (default=framework_test.tap) File name to be written for tap"
+    echo "--configdir           - (default=$TEST_ROOT_PATH) Config directory to download and test model"
+    echo "--cachedir            - (default=$CACHE_ROOT_PATH) Directory to download model"
+    echo ""
+}
+
+DRIVER_BIN=""
+TAP_NAME="framework_test.tap"
+TEST_LIST=()
+RUN_TEST="on"
+MD5_CHECK="off"
+
+# Support environment variable setting for mirror server
+FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
+
+for i in "$@"
+do
+    case $i in
+        -h|--help|help)
+            Usage
+            exit 1
+            ;;
+        --driverbin=*)
+            DRIVER_BIN=${i#*=}
+            ;;
+        --reportdir=*)
+            REPORT_DIR=${i#*=}
+            ;;
+        --tapname=*)
+            TAP_NAME=${i#*=}
+            ;;
+        --run=*)
+            RUN_TEST=${i#*=}
+            ;;
+        --configdir=*)
+            TEST_ROOT_PATH=${i#*=}
+            ;;
+        --cachedir=*)
+            CACHE_ROOT_PATH=${i#*=}
+            ;;
+        *)
+            TEST_LIST+=( $i )
+            ;;
+    esac
+    shift
+done
+
+if [[ ${#TEST_LIST[@]} -eq 0 ]]; then
+    RUN_DISABLED="false"
+fi
+
+if [ ! -n "$DRIVER_BIN" ]; then
+    DRIVER_BIN="$NNFW_HOME/Product/out/bin/tflite_run"
+fi
+
+if [ ! -d "$TEST_ROOT_PATH" ]; then
+    echo "Cannot find config directory for test: please set proper configdir"
+    exit 1
+fi
+
+run_tests()
+{
+    echo "1..$#" > $REPORT_DIR/$TAP_NAME
+    SELECTED_TESTS=$@
+
+    echo ""
+    echo "Running tests:"
+    echo "======================"
+    for TEST_NAME in $SELECTED_TESTS; do
+        echo $TEST_NAME
+    done
+    echo "======================"
+
+    TOTAL_RESULT=0  # 0(normal) or 1(abnormal)
+    i=0
+    for TEST_NAME in $SELECTED_TESTS; do
+        # Test configure initialization
+        ((i++))
+        STATUS="enabled"
+        MODELFILE_SERVER_PATH=""
+        MODELFILE_NAME=""
+        source $TEST_ROOT_PATH/$TEST_NAME/config.sh
+
+        LOWER_STATUS="$(echo $STATUS | awk '{print tolower($0)}')"
+        if [ "$LOWER_STATUS" == "disabled" ] && [ "$RUN_DISABLED" == "false" ]; then
+            echo ""
+            echo "Skip $TEST_NAME"
+            echo "======================"
+            echo "ok $i # skip $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
+            continue
+        fi
+
+        MODELFILE=$CACHE_ROOT_PATH/$MODELFILE_NAME
+
+        # Find model file for downloaded by zip
+        if [ "${MODELFILE_NAME##*.}" = "zip" ]; then
+            __PWD=$(pwd)
+            cd $CACHE_ROOT_PATH
+            MODELFILE=$CACHE_ROOT_PATH/$(ls ${MODELFILE_NAME%.zip}/*.tflite)
+            cd $__PWD
+        fi
+
+        echo ""
+        echo "Run $TEST_NAME"
+        echo "======================"
+
+        # Run driver to test framework
+        $DRIVER_BIN $MODELFILE
+
+        if [[ $? -eq 0 ]]; then
+            echo "ok $i - $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
+        else
+            echo "not ok $i - $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
+            TOTAL_RESULT=1
+        fi
+    done
+    return $TOTAL_RESULT
+}
+
+find_tests()
+{
+    local TEST_DIRS="$@"
+    local TESTS_TO_RUN=""
+
+    if [[ $# -eq 0 ]]; then
+        TEST_DIRS="."
+    fi
+
+    shift $#
+
+    __PWD=$(pwd)
+    cd $TEST_ROOT_PATH
+    for DIR in $TEST_DIRS; do
+        if [ -d "$DIR" ]; then
+            TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | sort)
+            TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
+        else
+            echo "Test $DIR was not found. This test is not added." 1>&2
+        fi
+    done
+    cd $__PWD
+
+    echo $TESTS_TO_RUN
+}
+
+mkdir -p $REPORT_DIR
+TESTS_TO_RUN=$(find_tests ${TEST_LIST[@]})
+
+if [ "$RUN_TEST" = "on" ]; then
+    run_tests $TESTS_TO_RUN
+fi
+
+exit 0
diff --git a/tests/scripts/models/tflite/MODELS/inception_nonslim/config.sh b/tests/scripts/models/tflite/MODELS/inception_nonslim/config.sh
index 39f5d772d..69c48d029 100755
--- a/tests/scripts/models/tflite/MODELS/inception_nonslim/config.sh
+++ b/tests/scripts/models/tflite/MODELS/inception_nonslim/config.sh
@@ -1,2 +1,2 @@
-MODELFILE_SERVER_PATH="https://storage.googleapis.com/download.tensorflow.org/models/tflite"
+MODELFILE_URL_BASE="https://storage.googleapis.com/download.tensorflow.org/models/tflite"
 MODELFILE_NAME="inception_v3_2015_2017_11_10.zip"
diff --git a/tests/scripts/models/tflite/MODELS/inception_slim/config.sh b/tests/scripts/models/tflite/MODELS/inception_slim/config.sh
index 1c0cf3ef5..dc798a555 100755
--- a/tests/scripts/models/tflite/MODELS/inception_slim/config.sh
+++ b/tests/scripts/models/tflite/MODELS/inception_slim/config.sh
@@ -1,2 +1,2 @@
-MODELFILE_SERVER_PATH="https://storage.googleapis.com/download.tensorflow.org/models/tflite"
+MODELFILE_URL_BASE="https://storage.googleapis.com/download.tensorflow.org/models/tflite"
 MODELFILE_NAME="inception_v3_slim_2016_android_2017_11_10.zip"
diff --git a/tests/scripts/models/tflite/MODELS/mobilenet/config.sh b/tests/scripts/models/tflite/MODELS/mobilenet/config.sh
index b23d687cd..8ee45f7fb 100755
--- a/tests/scripts/models/tflite/MODELS/mobilenet/config.sh
+++ b/tests/scripts/models/tflite/MODELS/mobilenet/config.sh
@@ -1,2 +1,2 @@
-MODELFILE_SERVER_PATH="https://storage.googleapis.com/download.tensorflow.org/models/tflite"
+MODELFILE_URL_BASE="https://storage.googleapis.com/download.tensorflow.org/models/tflite"
 MODELFILE_NAME="mobilenet_v1_0.25_128_float_2017_11_08.zip"
diff --git a/tests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh b/tests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh
index 2e304df92..e4700fc85 100755
--- a/tests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh
+++ b/tests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh
@@ -1,2 +1,2 @@
-MODELFILE_SERVER_PATH="https://storage.googleapis.com/download.tensorflow.org/models/tflite"
+MODELFILE_URL_BASE="https://storage.googleapis.com/download.tensorflow.org/models/tflite"
 MODELFILE_NAME="mobilenet_v1_1.0_224_quant_and_labels.zip"
diff --git a/tests/scripts/models/tflite/custom/squared_difference/config.sh b/tests/scripts/models/tflite/custom/squared_difference/config.sh
deleted file mode 100755
index 745a84447..000000000
--- a/tests/scripts/models/tflite/custom/squared_difference/config.sh
+++ /dev/null
@@ -1 +0,0 @@
-MODELFILE_NAME="custom_squared_diff_test.tflite"
diff --git a/tests/scripts/onert-test b/tests/scripts/onert-test
index 99c107c52..f5ed47419 100644
--- a/tests/scripts/onert-test
+++ b/tests/scripts/onert-test
@@ -16,12 +16,17 @@
 
 [[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
 
+# Common variables which are used on test commands
+# DRIVER_PATH: test driver and related resources forder
+# INSTALL_PATH: test package installed folder
+# CACHE_PATH: cache folder for test resource download
 DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 INSTALL_PATH="$(dirname $DRIVER_PATH)"
-COMMAND_PATH=$INSTALL_PATH/test/command
-BIN_PATH=$INSTALL_PATH/bin
+CACHE_PATH=$DRIVER_PATH/cache
 
-export PATH=$BIN_PATH:$PATH
+export PATH=$INSTALL_PATH/bin:$PATH
+
+COMMAND_PATH=$DRIVER_PATH/command
 
 function Usage()
 {
diff --git a/tests/scripts/test-driver.sh b/tests/scripts/test-driver.sh
deleted file mode 100755
index aa97d95b0..000000000
--- a/tests/scripts/test-driver.sh
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-# NOTE: Supposed that this script would be executed with an artifact path.
-#       The artifact path has tests/(test suite) and Product/
-#       Reference this PR(https://github.sec.samsung.net/STAR/nnfw/pull/375).
-
-function Usage()
-{
-    echo "Usage: ./$0 --artifactpath=.    # run all tests"
-    echo "Usage: ./$0 --artifactpath=/home/dragon/nnfw --frameworktest --verification --benchmark    # run fw test & verfication and benchmark"
-    echo ""
-    echo "--artifactpath            - (default={test-driver.sh's path}/../../) it should contain tests/ and Product/"
-    echo ""
-    echo "Following options are needed when you want to tests of specific types. If you don't pass any one, unittest and verification will be run"
-    echo "--frameworktest           - (default=off) run framework test"
-    echo "--verification            - (default=on) run verification"
-    echo "--frameworktest_list_file - filepath of model list for test"
-    echo ""
-    echo "Following option is only needed when you want to test benchmark."
-    echo "--benchmark_onert_op     - (default=off) run benchmark per operation on onert"
-    echo ""
-    echo "etc."
-    echo "--framework_driverbin     - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
-    echo "--verification_driverbin  - (default=../../Product/out/bin/nnapi_test) runner for runnning verification tests"
-    echo ""
-    echo "--reportdir               - (default=\$ARTIFACT_PATH/report) directory to save report"
-    echo ""
-}
-
-TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-ARTIFACT_PATH="$TEST_DRIVER_DIR/../../"
-FRAMEWORK_DRIVER_BIN=""
-VERIFICATION_DRIVER_BIN=""
-ALLTEST_ON="true"
-FRAMEWORKTEST_ON="false"
-VERIFICATION_ON="false"
-BENCHMARK_ONERT_OP_ON="false"
-REPORT_DIR=""
-
-for i in "$@"
-do
-    case $i in
-        -h|--help|help)
-            Usage
-            exit 1
-            ;;
-        --artifactpath=*)
-            ARTIFACT_PATH=${i#*=}
-            ;;
-        --framework_driverbin=*)
-            FRAMEWORK_DRIVER_BIN=${i#*=}
-            ;;
-        --verification_driverbin=*)
-            VERIFICATION_DRIVER_BIN=${i#*=}
-            ;;
-        --frameworktest)
-            ALLTEST_ON="false"
-            FRAMEWORKTEST_ON="true"
-            ;;
-        --frameworktest_list_file=*)
-            FRAMEWORKTEST_LIST_FILE=$PWD/${i#*=}
-            if [ ! -e "$FRAMEWORKTEST_LIST_FILE" ]; then
-                echo "Pass on with proper frameworktest_list_file"
-                exit 1
-            fi
-            ;;
-        --verification)
-            ALLTEST_ON="false"
-            VERIFICATION_ON="true"
-            ;;
-        --benchmark_onert_op)
-            ALLTEST_ON="false"
-            BENCHMARK_ONERT_OP_ON="true"
-            ;;
-        --reportdir=*)
-            REPORT_DIR=${i#*=}
-            ;;
-        *)
-            # Be careful that others params are handled as $ARTIFACT_PATH
-            ARTIFACT_PATH="$i"
-            ;;
-    esac
-    shift
-done
-
-ARTIFACT_PATH="$(readlink -f $ARTIFACT_PATH)"
-
-if [ -z "$UNIT_TEST_DIR" ]; then
-    UNIT_TEST_DIR=$ARTIFACT_PATH/Product/out/unittest
-fi
-
-if [ -z "$REPORT_DIR" ]; then
-    REPORT_DIR=$ARTIFACT_PATH/report
-fi
-
-source $TEST_DRIVER_DIR/common.sh
-
-# Run tflite_run with various tflite models
-if [ "$FRAMEWORKTEST_ON" == "true" ]; then
-    if [ -z "$FRAMEWORK_DRIVER_BIN" ]; then
-        FRAMEWORK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
-    fi
-
-    $TEST_DRIVER_DIR/test_framework.sh \
-        --driverbin=$FRAMEWORK_DRIVER_BIN \
-        --reportdir=$REPORT_DIR \
-        --tapname=framework_test.tap \
-        --logname=framework_test.log \
-        --testname="Frameworktest" \
-        --frameworktest_list_file=${FRAMEWORKTEST_LIST_FILE:-}
-fi
-
-# Run nnapi_test with various tflite models
-if [ "$ALLTEST_ON" == "true" ] || [ "$VERIFICATION_ON" == "true" ]; then
-    if [ -z "$VERIFICATION_DRIVER_BIN" ]; then
-        VERIFICATION_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/nnapi_test
-    fi
-
-    # verification uses the same script as frameworktest does
-    $TEST_DRIVER_DIR/test_framework.sh \
-        --driverbin=$VERIFICATION_DRIVER_BIN \
-        --reportdir=$REPORT_DIR \
-        --tapname=verification_test.tap \
-        --logname=verification_test.log \
-        --testname="Verification" \
-        --frameworktest_list_file=${FRAMEWORKTEST_LIST_FILE:-}
-fi
-
-if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
-    DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
-
-    $TEST_DRIVER_DIR/benchmark_nnapi.sh \
-        --test_op \
-        --driverbin=$DRIVER_BIN \
-        --reportdir=$REPORT_DIR/benchmark_op \
-        --modelfilepath=$ARTIFACT_PATH/tests/scripts/models
-fi
-
-# Make json file. Actually, this process is only needed on CI. That's why it is in test-driver.sh.
-if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
-    # functions to fill json with benchmark results
-    source $ARTIFACT_PATH/tests/scripts/print_to_json.sh
-    if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
-        print_to_json $REPORT_DIR/benchmark_op $REPORT_DIR "benchmark_op_result.json"
-    else
-        print_to_json $REPORT_DIR/benchmark $REPORT_DIR "benchmark_result.json"
-    fi
-fi
diff --git a/tests/scripts/test_framework.sh b/tests/scripts/test_framework.sh
deleted file mode 100755
index 6bf9c89c5..000000000
--- a/tests/scripts/test_framework.sh
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-FWTEST_DRIVER_BIN=
-FWTEST_REPORT_DIR=
-FWTEST_TAP_NAME=
-FWTEST_LOG_NAME=
-FWTEST_TEST_NAME=
-
-function Usage()
-{
-    echo "Usage Example:"
-    echo "./$0 \\"
-    echo "  --driverbin=Product/out/bin/tflite_run \\  # Test driver path"
-    echo "  --frameworktest_list_file=tests/scripts/list/frameworktest_list.armv7l.cpu.txt \\"
-    echo "  --reportdir=report \\            # Directory for the report files will be saved"
-    echo "  --tapname=framework_test.tap \\  # Tap file name"
-    echo "  --logname=framework_test.log \\  # Log file name"
-    echo "  --testname=Frameworktest         # Name of the test just a label of tests"
-
-    exit 1
-}
-
-for i in "$@"
-do
-    case $i in
-        -h|--help|help)
-            Usage
-            ;;
-        --driverbin=*)
-            FWTEST_DRIVER_BIN=${i#*=}
-            ;;
-        --reportdir=*)
-            FWTEST_REPORT_DIR=${i#*=}
-            ;;
-        --tapname=*)
-            FWTEST_TAP_NAME=${i#*=}
-            ;;
-        --logname=*)
-            FWTEST_LOG_NAME=${i#*=}
-            ;;
-        --testname=*)
-            FWTEST_TEST_NAME=${i#*=}
-            ;;
-        --frameworktest_list_file=*)
-            FRAMEWORKTEST_LIST_FILE=${i#*=}
-            ;;
-    esac
-    shift
-done
-
-[ ! -z "$FWTEST_DRIVER_BIN" ] || Usage
-[ ! -z "$FWTEST_REPORT_DIR" ] || Usage
-[ ! -z "$FWTEST_TAP_NAME" ] || Usage
-[ ! -z "$FWTEST_LOG_NAME" ] || Usage
-[ ! -z "$FWTEST_TEST_NAME" ] || Usage
-
-if [ ! -e "$FWTEST_REPORT_DIR" ]; then
-    mkdir -p $FWTEST_REPORT_DIR
-fi
-
-echo ""
-echo "============================================"
-echo "$FWTEST_TEST_NAME with $(basename $FWTEST_DRIVER_BIN) ..."
-
-if [ ! -z "$FRAMEWORKTEST_LIST_FILE" ]; then
-    MODELLIST=$(cat "${FRAMEWORKTEST_LIST_FILE}")
-fi
-
-$MY_PATH/models/run_test.sh --driverbin=$FWTEST_DRIVER_BIN \
-    --reportdir=$FWTEST_REPORT_DIR \
-    --tapname=$FWTEST_TAP_NAME \
-    ${MODELLIST:-} \
-    > $FWTEST_REPORT_DIR/$FWTEST_LOG_NAME 2>&1
-FWTEST_RESULT=$?
-if [[ $FWTEST_RESULT -ne 0 ]]; then
-    echo ""
-    cat $FWTEST_REPORT_DIR/$FWTEST_TAP_NAME
-    echo ""
-    echo "$FWTEST_TEST_NAME failed... exit code: $FWTEST_RESULT"
-    echo "============================================"
-    echo ""
-    exit $FWTEST_RESULT
-fi
-
-echo ""
-cat $FWTEST_REPORT_DIR/$FWTEST_TAP_NAME
-echo "============================================"
-echo ""
diff --git a/tests/scripts/test_scheduler_with_profiling.sh b/tests/scripts/test_scheduler_with_profiling.sh
index b88cae838..6ee289033 100755
--- a/tests/scripts/test_scheduler_with_profiling.sh
+++ b/tests/scripts/test_scheduler_with_profiling.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -eo pipefail
+
 MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 source $MY_PATH/common.sh
 
@@ -7,11 +9,10 @@ BACKEND_CNT=3
 # Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
 #     collect metrics for one unmeasured backend. On the last run metrics for data transfer
 PROFILING_RUN_CNT=$((BACKEND_CNT+1))
-TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-ARTIFACT_PATH="$TEST_DRIVER_DIR/../.."
-BENCHMARK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
+ARTIFACT_PATH="$MY_PATH/../.."
+BENCHMARK_DRIVER_BIN=$INSTALL_PATH/bin/onert_run
 REPORT_DIR=$ARTIFACT_PATH/report
-RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/models/run_test.sh
+RUN_TEST_SH=$INSTALL_PATH/test/models/run_test.sh
 BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
 
 if [ ! -e "$RUN_TEST_SH" ]; then
@@ -19,10 +20,18 @@ if [ ! -e "$RUN_TEST_SH" ]; then
     exit 1
 fi
 
-
 BENCHMARK_REPORT_DIR=$REPORT_DIR/benchmark
 BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
 
+# Cleanup report files
+rm -rf $BENCHMARK_REPORT_DIR
+rm -f $BENCHMARK_MODELS_FILE
+mkdir -p $BENCHMARK_REPORT_DIR
+touch $BENCHMARK_MODELS_FILE
+
+# Prepare models
+prepare_test_model
+
 function run_without_sched()
 {
     local RESULT_SCH_INT=$1
@@ -39,7 +48,7 @@ function run_without_sched()
 
     RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
 
-    printf -v RESULT_INT '%d' $RESULT 2>/dev/null
+    printf -v RESULT_INT '%.0f' $RESULT
     PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
     echo "$RESULT ms. Parallel scheduler is $PERCENTAGE% faster"
 }
@@ -53,11 +62,10 @@ function run_benchmark_test()
     export COUNT=5
     echo "============================================"
     local i=0
-    export USE_NNAPI=1
     export BACKENDS="acl_cl;acl_neon;cpu"
     # Remove metrics so that profiler can get metrics for operations
     #      with input&output sizes the same as the model
-    rm "exec_time.json" 2>/dev/null
+    rm -f "exec_time.json" 2>/dev/null
     for MODEL in $BENCHMARK_MODEL_LIST; do
 
         echo "Benchmark test with `basename $BENCHMARK_DRIVER_BIN` & `echo $MODEL`"
@@ -79,10 +87,10 @@ function run_benchmark_test()
 
             print_with_dots "Profiling run #$j out of $PROFILING_RUN_CNT"
 
-            $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
+            RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
             RET=$?
             if [[ $RET -ne 0 ]]; then
-                echo "Profiling $MODEL aborted in run#$j... exit code: $RET"xX
+                echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
                 exit $RET
             fi
             echo "finished"
@@ -91,7 +99,6 @@ function run_benchmark_test()
         done
         unset ONERT_LOG_ENABLE
 
-
 ##################################################################################
         # Turn off profiling
 ##################################################################################
@@ -108,7 +115,7 @@ function run_benchmark_test()
         RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
         echo "$RESULT ms"
 
-        printf -v RESULT_SCH_INT '%d' $RESULT 2>/dev/null
+        printf -v RESULT_SCH_INT '%.0f' $RESULT
 
         mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel.dot"
 
@@ -122,7 +129,7 @@ function run_benchmark_test()
 
         RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
 
-        printf -v RESULT_INT '%d' $RESULT 2>/dev/null
+        printf -v RESULT_INT '%.0f' $RESULT
         PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
         echo "$RESULT ms. Parallel scheduler is $PERCENTAGE% faster"
 
@@ -131,7 +138,6 @@ function run_benchmark_test()
         mv "exec_time.json" $REPORT_MODEL_DIR
         # Save the dot graph
         mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_linear.dot"
-        unset GRAPH_DOT_DUMP
 
 ##################################################################################
         # Turn off scheduler
@@ -140,15 +146,31 @@ function run_benchmark_test()
 
         # Run LinearExecutor on acl_cl without scheduler
         run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Linear" "acl_cl"
+        mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_linear_acl_cl.dot"
 
         # Run LinearExecutor on acl_neon without scheduler
         run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Linear" "acl_neon"
+        mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_linear_acl_neon.dot"
 
         # Run ParallelExecutor on acl_cl without scheduler
         run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Parallel" "acl_cl"
+        mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel_acl_cl.dot"
 
         # Run ParallelExecutor on acl_neon without scheduler
         run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Parallel" "acl_neon"
+        mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel_acl_neon.dot"
+
+        unset GRAPH_DOT_DUMP
+
+        if command -v dot;
+        then
+            dot -Tpng $REPORT_MODEL_DIR/"after_lower_subg-0_parallel.dot" -o $REPORT_MODEL_DIR/"parallel.png"
+            dot -Tpng $REPORT_MODEL_DIR/"after_lower_subg-0_linear.dot" -o $REPORT_MODEL_DIR/"linear.png"
+            dot -Tpng $REPORT_MODEL_DIR/"after_lower_subg-0_linear_acl_cl.dot" -o $REPORT_MODEL_DIR/"linear_acl_cl.png"
+            dot -Tpng $REPORT_MODEL_DIR/"after_lower_subg-0_linear_acl_neon.dot" -o $REPORT_MODEL_DIR/"linear_acl_neon.png"
+            dot -Tpng $REPORT_MODEL_DIR/"after_lower_subg-0_parallel_acl_cl.dot" -o $REPORT_MODEL_DIR/"paralle_acl_cl.png"
+            dot -Tpng $REPORT_MODEL_DIR/"after_lower_subg-0_parallel_acl_neon.dot" -o $REPORT_MODEL_DIR/"parallel_acl_neon.png"
+        fi
 
         if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
             echo ""
@@ -163,8 +185,6 @@ function run_benchmark_test()
     unset BACKENDS
     echo "============================================"
     unset COUNT
-    unset USE_NNAPI
-
 }
 
 echo ""
diff --git a/tests/scripts/test_scheduler_with_profiling_android.sh b/tests/scripts/test_scheduler_with_profiling_android.sh
new file mode 100644
index 000000000..8c124233a
--- /dev/null
+++ b/tests/scripts/test_scheduler_with_profiling_android.sh
@@ -0,0 +1,230 @@
+#!/system/bin/sh
+#
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# How to run benchmark testing
+#
+# This script is copy of test_scheduler_with_profiling.sh for Android.
+# As Android does not provide bash, this with models/run_test_android.sh
+# and common_android.sh, three scripts are modified for Android benchmark
+# testing using Android shell.
+# Test models are downloaded into models folder but as Android also doesn't
+# provide downloading in shell script, user should push downloaded models
+# to Android device also.
+#
+# 1. To download test models,
+#    run test_scheduler_with_profiling.sh from in Ubuntu/ARM device
+# 2. You will have download models in tests/scripts/models/cache folder
+# 3. Build for OneRT for Android
+# 4. Copy files
+#    adb shell mkdir -p /data/local/tmp/Product/report/benchmark
+#    adb push tests /data/local/tmp/.
+#    adb push Product/aarch64-android.release/out /data/local/tmp/Product/.
+#
+# 5. Run benchmark inside Android shell
+#    export LD_LIBRARY_PATH=/data/local/tmp/Product/out/lib
+#    cd /data/local/tmp
+#    sh /data/local/tmp/tests/scripts/test_scheduler_with_profiling_android.sh
+#
+
+MY_PATH="$( cd "$( dirname "$0" )" && pwd )"
+
+SHELL_CMD=/system/bin/sh
+
+source $MY_PATH/common_android.sh
+
+BACKEND_CNT=3
+# Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
+#     collect metrics for one unmeasured backend. On the last run metrics for data transfer
+PROFILING_RUN_CNT=$((BACKEND_CNT+1))
+TEST_DRIVER_DIR="$( cd "$( dirname "$0" )" && pwd )"
+
+ARTIFACT_PATH="$TEST_DRIVER_DIR/../.."
+BENCHMARK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
+REPORT_DIR=$ARTIFACT_PATH/report
+RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/models/run_test_android.sh
+BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
+
+if [ ! -e "$RUN_TEST_SH" ]; then
+    echo "Cannot find $RUN_TEST_SH"
+    exit 1
+fi
+
+BENCHMARK_REPORT_DIR=$REPORT_DIR/benchmark
+BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
+
+function run_without_sched()
+{
+    local RESULT_SCH_INT=$1
+    local REPORT_MODEL_DIR=$2
+    local MODEL=$3
+    local EXECUTOR=$4
+    local BACKEND=$5
+
+    #LOG_FILE=$REPORT_MODEL_DIR/tflite_${EXECUTOR,,}_$BACKEND.txt
+    LOG_FILE=$REPORT_MODEL_DIR/tflite_$EXECUTOR_$BACKEND.txt
+    export OP_BACKEND_ALLOPS=$BACKEND
+    export EXECUTOR=$EXECUTOR
+
+    print_with_dots "$EXECUTOR $BACKEND without scheduler"
+
+    RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+
+    # printf -v RESULT_INT '%d' $RESULT 2>/dev/null
+    RESULT_I=$(printf "%.0f" $RESULT)
+    RESULT_INT=$(expr $RESULT_I)
+    PERCENTAGE=$((100 - RESULT_SCH_INT * 100 / RESULT_INT))
+    echo "$RESULT ms. Parallel scheduler is $PERCENTAGE % faster"
+}
+
+function run_benchmark_test()
+{
+    local LOG_FILE=
+    local RESULT=
+    local REPORT_MODEL_DIR=
+
+    export COUNT=5
+    echo "============================================"
+    local i=0
+    export USE_NNAPI=1
+    export BACKENDS="acl_cl;acl_neon;cpu"
+    # Remove metrics so that profiler can get metrics for operations
+    #      with input&output sizes the same as the model
+    rm "exec_time.json" 2>/dev/null
+    for MODEL in $BENCHMARK_MODEL_LIST; do
+
+        echo "Benchmark test with `basename $BENCHMARK_DRIVER_BIN` & `echo $MODEL`"
+        echo $MODEL >> $BENCHMARK_MODELS_FILE
+
+        REPORT_MODEL_DIR=$BENCHMARK_REPORT_DIR/scheduler_benchmark/$MODEL
+        mkdir -p $REPORT_MODEL_DIR
+
+##################################################################################
+        # Get metrics by running profiler
+##################################################################################
+        export USE_SCHEDULER=1
+        export PROFILING_MODE=1
+        export EXECUTOR="Dataflow"
+        export ONERT_LOG_ENABLE=1
+        for j in 1 2 3 4; do # 1 to $PROFILING_RUN_CNT
+            # Save the verbose log of each run
+            LOG_FILE=$REPORT_MODEL_DIR/tflite_profiling_$j.txt
+
+            print_with_dots "Profiling run #$j out of $PROFILING_RUN_CNT"
+
+            $SHELL_CMD $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
+            RET=$?
+            if [[ $RET -ne 0 ]]; then
+                echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
+                exit $RET
+            fi
+            echo "finished"
+            # Save the exec_time.json of each run
+            cp "exec_time.json" $REPORT_MODEL_DIR/"exec_time_$j.json"
+        done
+        unset ONERT_LOG_ENABLE
+
+
+##################################################################################
+        # Turn off profiling
+##################################################################################
+        export PROFILING_MODE=0
+
+##################################################################################
+        # Run ParallelExecutor with scheduler
+##################################################################################
+        LOG_FILE=$REPORT_MODEL_DIR/tflite_parallel_with_scheduler.txt
+        export EXECUTOR="Parallel"
+        export GRAPH_DOT_DUMP=1
+        print_with_dots "Parallel with scheduler"
+
+        RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+        echo "$RESULT ms"
+
+        # printf -v RESULT_SCH_INT '%d' $RESULT 2>/dev/null
+        RESULT_I=$(printf "%.0f" $RESULT)
+        RESULT_SCH_INT=$(expr $RESULT_I)
+
+        mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel.dot"
+
+##################################################################################
+        # Run Linear executor with scheduler
+##################################################################################
+        LOG_FILE=$REPORT_MODEL_DIR/tflite_linear_with_scheduler.txt
+        export EXECUTOR="Linear"
+        export GRAPH_DOT_DUMP=1
+        print_with_dots "Linear with scheduler"
+
+        RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+
+        # printf -v RESULT_INT '%d' $RESULT 2>/dev/null
+        RESULT_I=$(printf "%.0f" $RESULT)
+        RESULT_INT=$(expr $RESULT_I)
+
+        PERCENTAGE=$((100 - $RESULT_SCH_INT * 100 / $RESULT_INT))
+
+        echo "$RESULT ms. Parallel scheduler is $PERCENTAGE % faster"
+
+        # Remove metrics so that for next model in profiler can get metrics
+        #   for operations with input&output sizes the same as the model
+        mv "exec_time.json" $REPORT_MODEL_DIR
+        # Save the dot graph
+        mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_linear.dot"
+        unset GRAPH_DOT_DUMP
+
+##################################################################################
+        # Turn off scheduler
+##################################################################################
+        export USE_SCHEDULER=0
+
+        # Run LinearExecutor on acl_cl without scheduler
+        run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Linear" "acl_cl"
+
+        # Run LinearExecutor on acl_neon without scheduler
+        run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Linear" "acl_neon"
+
+        # Run LinearExecutor on cpu without scheduler
+        # run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Linear" "cpu"
+
+        # Run ParallelExecutor on acl_cl without scheduler
+        run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Parallel" "acl_cl"
+
+        # Run ParallelExecutor on acl_neon without scheduler
+        run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Parallel" "acl_neon"
+
+        # Run ParallelExecutor on cpi without scheduler
+        # run_without_sched $RESULT_SCH_INT $REPORT_MODEL_DIR $MODEL "Parallel" "cpu"
+
+        if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
+            echo ""
+        fi
+        i=$((i+1))
+
+        unset USE_SCHEDULER
+        unset PROFILING_MODE
+        unset EXECUTOR
+        unset OP_BACKEND_ALLOPS
+    done
+    unset BACKENDS
+    echo "============================================"
+    unset COUNT
+    unset USE_NNAPI
+
+}
+
+echo ""
+run_benchmark_test
+echo ""
diff --git a/tests/tools/nnapi_test/CMakeLists.txt b/tests/tools/nnapi_test/CMakeLists.txt
deleted file mode 100644
index eac649b15..000000000
--- a/tests/tools/nnapi_test/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-if(NOT BUILD_NNAPI_TEST)
-  return()
-endif(NOT BUILD_NNAPI_TEST)
-
-list(APPEND SOURCES "src/nnapi_test.cc")
-list(APPEND SOURCES "src/args.cc")
-
-nnfw_find_package(Boost REQUIRED program_options)
-
-add_executable(nnapi_test ${SOURCES})
-target_include_directories(nnapi_test PRIVATE ${Boost_INCLUDE_DIRS})
-target_link_libraries(nnapi_test nnfw_lib_tflite)
-target_link_libraries(nnapi_test ${Boost_PROGRAM_OPTIONS_LIBRARY})
-install(TARGETS nnapi_test DESTINATION bin)
diff --git a/tests/tools/nnapi_test/src/args.cc b/tests/tools/nnapi_test/src/args.cc
deleted file mode 100644
index 420e092c0..000000000
--- a/tests/tools/nnapi_test/src/args.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-namespace nnapi_test
-{
-
-Args::Args(const int argc, char **argv)
-{
-  Initialize();
-  try
-  {
-    Parse(argc, argv);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "The argments that cannot be parsed: " << e.what() << '\n';
-    print(argv);
-    exit(255);
-  }
-}
-
-void Args::print(char **argv)
-{
-  std::cout << "nnapi_test\n\n";
-  std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
-  std::cout << _options;
-  std::cout << "\n";
-}
-
-void Args::Initialize(void)
-{
-  // General options
-  po::options_description general("General options", 100);
-
-  // clang-format off
-  general.add_options()
-    ("help,h", "Print available options")
-    ("tflite", po::value<std::string>()->required())
-    ("seed", po::value<int>()->default_value(0), "The seed of random inputs")
-    ("num_runs", po::value<int>()->default_value(2), "The number of runs")
-    ;
-  // clang-format on
-
-  _options.add(general);
-  _positional.add("tflite", 1);
-  _positional.add("seed", 2);
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-
-  if (vm.count("help"))
-  {
-    print(argv);
-
-    exit(0);
-  }
-
-  po::notify(vm);
-  if (vm.count("tflite"))
-  {
-    _tflite_filename = vm["tflite"].as<std::string>();
-
-    if (_tflite_filename.empty())
-    {
-      std::cerr << "Please specify tflite file.\n";
-      print(argv);
-      exit(255);
-    }
-    else
-    {
-      if (access(_tflite_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
-        exit(255);
-      }
-    }
-  }
-
-  if (vm.count("seed"))
-  {
-    _seed = vm["seed"].as<int>();
-  }
-
-  if (vm.count("num_runs"))
-  {
-    _num_runs = vm["num_runs"].as<int>();
-    if (_num_runs < 0)
-    {
-      std::cerr << "num_runs value must be greater than 0.\n";
-      exit(255);
-    }
-  }
-}
-
-} // end of namespace nnapi_test
diff --git a/tests/tools/nnapi_test/src/args.h b/tests/tools/nnapi_test/src/args.h
deleted file mode 100644
index 486fbefd5..000000000
--- a/tests/tools/nnapi_test/src/args.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNAPI_TEST_ARGS_H__
-#define __NNAPI_TEST_ARGS_H__
-
-#include <boost/program_options.hpp>
-#include <string>
-
-namespace po = boost::program_options;
-
-namespace nnapi_test
-{
-
-class Args
-{
-public:
-  Args(const int argc, char **argv);
-  void print(char **argv);
-
-  const std::string &getTfliteFilename(void) const { return _tflite_filename; }
-  const int getSeed(void) const { return _seed; }
-  const int getNumRuns(void) const { return _num_runs; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::positional_options_description _positional;
-  po::options_description _options;
-
-  std::string _tflite_filename;
-  int _seed;
-  int _num_runs;
-};
-
-} // end of namespace nnapi_test
-
-#endif // __NNAPI_TEST_ARGS_H__
diff --git a/tests/tools/nnapi_test/src/nnapi_test.cc b/tests/tools/nnapi_test/src/nnapi_test.cc
deleted file mode 100644
index 921d0dc42..000000000
--- a/tests/tools/nnapi_test/src/nnapi_test.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-
-#include "tflite/interp/FlatBufferBuilder.h"
-#include "tflite/RandomTestRunner.h"
-
-#include <iostream>
-#include <stdexcept>
-
-#include "args.h"
-
-using namespace tflite;
-using namespace nnfw::tflite;
-using namespace nnapi_test;
-
-int main(const int argc, char **argv)
-{
-  Args args(argc, argv);
-
-  const auto filename = args.getTfliteFilename();
-
-  StderrReporter error_reporter;
-
-  auto model = FlatBufferModel::BuildFromFile(filename.c_str(), &error_reporter);
-
-  if (model == nullptr)
-  {
-    // error_reporter must have shown the error message already
-    return 1;
-  }
-
-  const nnfw::tflite::FlatBufferBuilder builder(*model);
-
-  try
-  {
-    const auto seed = static_cast<uint32_t>(args.getSeed());
-    auto runner = nnfw::tflite::RandomTestRunner::make(seed);
-    const auto num_runs = static_cast<size_t>(args.getNumRuns());
-    runner.compile(builder);
-    return runner.run(num_runs);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    return 1;
-  }
-}
diff --git a/tests/tools/nnpackage_run/CMakeLists.txt b/tests/tools/nnpackage_run/CMakeLists.txt
deleted file mode 100644
index ec45db4f6..000000000
--- a/tests/tools/nnpackage_run/CMakeLists.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT BUILD_NNPACKAGE_RUN)
-  return()
-endif(NOT BUILD_NNPACKAGE_RUN)
-
-if(NOT BUILD_ONERT)
-  return()
-endif(NOT BUILD_ONERT)
-
-list(APPEND NNPACKAGE_RUN_SRCS "src/nnpackage_run.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/args.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/nnfw_util.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/randomgen.cc")
-
-nnfw_find_package(Boost REQUIRED program_options)
-nnfw_find_package(Ruy QUIET)
-nnfw_find_package(HDF5 QUIET)
-
-if (HDF5_FOUND)
-  list(APPEND NNPACKAGE_RUN_SRCS "src/h5formatter.cc")
-endif()
-
-add_executable(nnpackage_run ${NNPACKAGE_RUN_SRCS})
-
-if (HDF5_FOUND)
-  target_compile_definitions(nnpackage_run PRIVATE ONERT_HAVE_HDF5=1)
-  target_include_directories(nnpackage_run PRIVATE ${HDF5_INCLUDE_DIRS})
-  target_link_libraries(nnpackage_run ${HDF5_CXX_LIBRARIES})
-else()
-  message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in nnpackage_run.")
-endif(HDF5_FOUND)
-
-target_include_directories(nnpackage_run PRIVATE src)
-target_include_directories(nnpackage_run PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(nnpackage_run onert_core onert tflite_loader)
-target_link_libraries(nnpackage_run nnfw_lib_tflite jsoncpp)
-target_link_libraries(nnpackage_run nnfw-dev)
-target_link_libraries(nnpackage_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(nnpackage_run nnfw_lib_benchmark)
-if(Ruy_FOUND AND PROFILE_RUY)
-  target_link_libraries(nnpackage_run ruy_instrumentation)
-  target_link_libraries(nnpackage_run ruy_profiler)
-endif(Ruy_FOUND AND PROFILE_RUY)
-
-install(TARGETS nnpackage_run DESTINATION bin)
diff --git a/tests/tools/nnpackage_run/README.md b/tests/tools/nnpackage_run/README.md
deleted file mode 100644
index 898cc84cf..000000000
--- a/tests/tools/nnpackage_run/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# nnpackage_run
-
-`nnpackage_run` is a tool to run `nnpackage`.
-
-It takes `nnpackage` as input. It uses **runtime API** internally.
-
-## Usage
-
-### Simple run
-
-This will run with random input data
-
-```
-$ ./nnpackage_run path_to_nnpackage_directory
-```
-
-Output would look like:
-
-```
-nnfw_prepare takes 425.235 ms
-nnfw_run     takes 2.525 ms
-```
diff --git a/tests/tools/nnpackage_run/src/allocation.h b/tests/tools/nnpackage_run/src/allocation.h
deleted file mode 100644
index ea4672f9a..000000000
--- a/tests/tools/nnpackage_run/src/allocation.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_ALLOCATION_H__
-#define __NNPACKAGE_RUN_ALLOCATION_H__
-
-#include <cstdlib>
-#include <cstdint>
-
-namespace nnpkg_run
-{
-class Allocation
-{
-public:
-  Allocation() : data_(nullptr) {}
-  ~Allocation() { free(data_); }
-  void *data() const { return data_; }
-  void *alloc(uint64_t sz) { return data_ = malloc(sz); }
-private:
-  void *data_;
-};
-} // end of namespace
-
-#endif // __NNPACKAGE_RUN_ALLOCATION_H__
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/nnpackage_run/src/args.cc
deleted file mode 100644
index 90021bff3..000000000
--- a/tests/tools/nnpackage_run/src/args.cc
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <functional>
-#include <iostream>
-#include <json/json.h>
-
-namespace
-{
-
-// This function parses a json object and returns as a vector of integers
-// For example,
-// [0, [1, 2, 3, 4], 3, 40, 4, []] in JSON
-// is converted to:
-// {
-//  0 -> [1, 2, 3, 4]
-//  3 -> 40
-//  4 -> []
-// } in std::unordered_map. Note that the value type is still Json::Value.
-std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonval)
-{
-  if (!jsonval.isArray() || (jsonval.size() % 2 != 0))
-  {
-    std::cerr << "JSON argument must be an even-sized array in JSON\n";
-    exit(1);
-  }
-
-  std::unordered_map<uint32_t, Json::Value> ret;
-  for (uint32_t i = 0; i < jsonval.size(); i += 2)
-  {
-    if (!jsonval[i].isUInt())
-    {
-      std::cerr << "Key values(values in even indices) must be unsigned integers\n";
-      exit(1);
-    }
-    uint32_t key = jsonval[i].asUInt();
-    Json::Value val = jsonval[i + 1];
-    ret[key] = jsonval[i + 1];
-  }
-  return ret;
-}
-
-// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
-void handleShapeJsonParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
-{
-  Json::Value root;
-  Json::Reader reader;
-  if (!reader.parse(shape_str, root, false))
-  {
-    std::cerr << "Invalid JSON format for output_sizes \"" << shape_str << "\"\n";
-    exit(1);
-  }
-
-  auto arg_map = argArrayToMap(root);
-  for (auto &pair : arg_map)
-  {
-    uint32_t key = pair.first;
-    Json::Value &shape_json = pair.second;
-    if (!shape_json.isArray())
-    {
-      std::cerr << "All the values must be list: " << shape_str << "\n";
-      exit(1);
-    }
-
-    std::vector<int> shape;
-    for (auto &dim_json : shape_json)
-    {
-      if (!dim_json.isUInt())
-      {
-        std::cerr << "All the dims should be dim >= 0: " << shape_str << "\n";
-        exit(1);
-      }
-
-      shape.emplace_back(dim_json.asUInt64());
-    }
-
-    shape_map[key] = shape;
-  }
-}
-
-} // namespace
-
-namespace nnpkg_run
-{
-
-Args::Args(const int argc, char **argv)
-{
-  Initialize();
-  Parse(argc, argv);
-}
-
-void Args::Initialize(void)
-{
-  auto process_nnpackage = [&](const std::string &package_filename) {
-    _package_filename = package_filename;
-
-    std::cerr << "Package Filename " << _package_filename << std::endl;
-    if (_package_filename.empty())
-    {
-      // TODO Print usage instead of the below message
-      std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
-                << "\n";
-
-      exit(1);
-    }
-    else
-    {
-      if (access(_package_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "nnpackage not found: " << _package_filename << "\n";
-      }
-    }
-  };
-
-  auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
-    Json::Value root;
-    Json::Reader reader;
-    if (!reader.parse(output_sizes_json_str, root, false))
-    {
-      std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
-      exit(1);
-    }
-
-    auto arg_map = argArrayToMap(root);
-    for (auto &pair : arg_map)
-    {
-      uint32_t key = pair.first;
-      Json::Value &val_json = pair.second;
-      if (!val_json.isUInt())
-      {
-        std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
-        exit(1);
-      }
-      uint32_t val = val_json.asUInt();
-      _output_sizes[key] = val;
-    }
-  };
-
-  auto process_shape_prepare = [&](const std::string &shape_str) {
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (shape_str == "H5" || shape_str == "h5")
-    {
-      _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
-      return;
-    }
-#endif
-    try
-    {
-      handleShapeJsonParam(_shape_prepare, shape_str);
-    }
-    catch (const std::exception &e)
-    {
-      std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
-      exit(1);
-    }
-  };
-
-  auto process_shape_run = [&](const std::string &shape_str) {
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (shape_str == "H5" || shape_str == "h5")
-    {
-      _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
-      return;
-    }
-#endif
-    try
-    {
-      handleShapeJsonParam(_shape_run, shape_str);
-    }
-    catch (const std::exception &e)
-    {
-      std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
-      exit(1);
-    }
-  };
-
-  // General options
-  po::options_description general("General options", 100);
-
-  // clang-format off
-  general.add_options()
-    ("help,h", "Print available options")
-    ("version", "Print version and exit immediately")
-    ("nnpackage", po::value<std::string>()->required()->notifier(process_nnpackage))
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
-    ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
-#endif
-    ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
-        "The output buffer size in JSON 1D array\n"
-        "If not given, the model's output sizes are used\n"
-        "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
-    ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
-    ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(ms) between runs (as default no delay")
-    ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
-    ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
-    ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
-         "Write report\n"
-         "{exec}-{nnpkg}-{backend}.csv will be generated.\n"
-         "e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
-         "{nnpkg} name may be changed to realpath if you use symbolic-link.")
-    ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
-         "set shape of specified tensor before compilation (before calling nnfw_prepare()).\n"
-         "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
-         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
-    ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
-         "set shape of specified tensor before running (before calling nnfw_run()).\n"
-         "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
-         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
-    ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
-         "Verbose level\n"
-         "0: prints the only result. Messages btw run don't print\n"
-         "1: prints result and message btw run\n"
-         "2: prints all of messages to print\n")
-    ;
-  // clang-format on
-
-  _options.add(general);
-  _positional.add("nnpackage", 1);
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-
-  {
-    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
-      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
-      {
-        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
-                                            "' cannot be given at once.");
-      }
-    };
-  }
-
-  if (vm.count("help"))
-  {
-    std::cout << "nnpackage_run\n\n";
-    std::cout << "Usage: " << argv[0] << " path to nnpackage root directory [<options>]\n\n";
-    std::cout << _options;
-    std::cout << "\n";
-
-    exit(0);
-  }
-
-  if (vm.count("version"))
-  {
-    _print_version = true;
-    return;
-  }
-
-  try
-  {
-    po::notify(vm);
-  }
-  catch (const std::bad_cast &e)
-  {
-    std::cerr << "Bad cast error - " << e.what() << '\n';
-    exit(1);
-  }
-
-  // This must be run after `notify` as `_warm_up_runs` must have been processed before.
-  if (vm.count("mem_poll"))
-  {
-    // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
-    if (_mem_poll && _warmup_runs == 0)
-    {
-      _warmup_runs = 1;
-    }
-  }
-}
-
-} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/args.h b/tests/tools/nnpackage_run/src/args.h
deleted file mode 100644
index d2b33fc82..000000000
--- a/tests/tools/nnpackage_run/src/args.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_ARGS_H__
-#define __NNPACKAGE_RUN_ARGS_H__
-
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include <boost/program_options.hpp>
-
-#include "types.h"
-
-namespace po = boost::program_options;
-
-namespace nnpkg_run
-{
-
-using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
-
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-enum class WhenToUseH5Shape
-{
-  DO_NOT_USE, // don't use shapes in h5 file
-  PREPARE,    // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
-  RUN,        // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
-};
-#endif
-
-class Args
-{
-public:
-  Args(const int argc, char **argv);
-  void print(void);
-
-  const std::string &getPackageFilename(void) const { return _package_filename; }
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-  const std::string &getDumpFilename(void) const { return _dump_filename; }
-  const std::string &getLoadFilename(void) const { return _load_filename; }
-  WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
-#endif
-  const int getNumRuns(void) const { return _num_runs; }
-  const int getWarmupRuns(void) const { return _warmup_runs; }
-  const int getRunDelay(void) const { return _run_delay; }
-  std::unordered_map<uint32_t, uint32_t> getOutputSizes(void) const { return _output_sizes; }
-  const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
-  const bool getMemoryPoll(void) const { return _mem_poll; }
-  const bool getWriteReport(void) const { return _write_report; }
-  const bool printVersion(void) const { return _print_version; }
-  TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
-  TensorShapeMap &getShapeMapForRun() { return _shape_run; }
-  const int getVerboseLevel(void) const { return _verbose_level; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::positional_options_description _positional;
-  po::options_description _options;
-
-  std::string _package_filename;
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-  std::string _dump_filename;
-  std::string _load_filename;
-  WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::DO_NOT_USE;
-#endif
-  TensorShapeMap _shape_prepare;
-  TensorShapeMap _shape_run;
-  int _num_runs;
-  int _warmup_runs;
-  int _run_delay;
-  std::unordered_map<uint32_t, uint32_t> _output_sizes;
-  bool _gpumem_poll;
-  bool _mem_poll;
-  bool _write_report;
-  bool _print_version = false;
-  int _verbose_level;
-};
-
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_ARGS_H__
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc
deleted file mode 100644
index 3929c8d90..000000000
--- a/tests/tools/nnpackage_run/src/h5formatter.cc
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "h5formatter.h"
-#include "nnfw.h"
-#include "nnfw_util.h"
-
-#include <iostream>
-#include <stdexcept>
-#include <H5Cpp.h>
-
-namespace
-{
-nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
-{
-  std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
-  H5::DataSpace data_space = data_set.getSpace();
-  int rank = data_space.getSimpleExtentNdims();
-  h5_shape.resize(rank);
-
-  // read shape info from H5 file
-  data_space.getSimpleExtentDims(h5_shape.data(), NULL);
-
-  nnpkg_run::TensorShape shape;
-  for (auto dim : h5_shape)
-    shape.emplace_back(static_cast<int>(dim));
-
-  return shape;
-}
-} // namespace
-
-namespace nnpkg_run
-{
-static const char *h5_value_grpname = "value";
-
-std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
-{
-  uint32_t num_inputs;
-  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
-  std::vector<TensorShape> tensor_shapes;
-
-  try
-  {
-    H5::Exception::dontPrint();
-
-    H5::H5File file(filename, H5F_ACC_RDONLY);
-    H5::Group value_group = file.openGroup(h5_value_grpname);
-
-    // Constraints: if there are n data set names, they should be unique and
-    //              one of [ "0", "1", .. , "n-1" ]
-    for (uint32_t i = 0; i < num_inputs; ++i)
-    {
-      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
-      H5::DataType type = data_set.getDataType();
-      auto shape = getShape(data_set);
-
-      tensor_shapes.emplace_back(shape);
-    }
-
-    return tensor_shapes;
-  }
-  catch (const H5::Exception &e)
-  {
-    H5::Exception::printErrorStack();
-    std::exit(-1);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    std::exit(-1);
-  }
-}
-
-void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
-{
-  uint32_t num_inputs;
-  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
-  try
-  {
-    // Turn off the automatic error printing.
-    H5::Exception::dontPrint();
-
-    H5::H5File file(filename, H5F_ACC_RDONLY);
-    H5::Group value_group = file.openGroup(h5_value_grpname);
-    for (uint32_t i = 0; i < num_inputs; ++i)
-    {
-      nnfw_tensorinfo ti;
-      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
-
-      // TODO Add Assert(nnfw shape, h5 file shape size)
-
-      // allocate memory for data
-      auto bufsz = bufsize_for(&ti);
-      inputs[i].alloc(bufsz);
-
-      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
-      H5::DataType type = data_set.getDataType();
-      switch (ti.dtype)
-      {
-        case NNFW_TYPE_TENSOR_FLOAT32:
-          if (type == H5::PredType::IEEE_F32BE || type == H5::PredType::IEEE_F32LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_FLOAT);
-          else
-            throw std::runtime_error("model input type is f32. But h5 data type is different.");
-          break;
-        case NNFW_TYPE_TENSOR_INT32:
-          if (type == H5::PredType::STD_I32BE || type == H5::PredType::STD_I32LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT32);
-          else
-            throw std::runtime_error("model input type is i32. But h5 data type is different.");
-          break;
-        case NNFW_TYPE_TENSOR_INT64:
-          if (type == H5::PredType::STD_I64BE || type == H5::PredType::STD_I64LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT64);
-          else
-            throw std::runtime_error("model input type is i64. But h5 data type is different.");
-          break;
-        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-        case NNFW_TYPE_TENSOR_BOOL:
-        case NNFW_TYPE_TENSOR_UINT8:
-          if (type == H5::PredType::STD_U8BE || type == H5::PredType::STD_U8LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
-          else
-            throw std::runtime_error(
-                "model input type is qasymm8, bool or uint8. But h5 data type is different.");
-          break;
-        default:
-          throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
-      }
-      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
-      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
-    }
-  }
-  catch (const H5::Exception &e)
-  {
-    H5::Exception::printErrorStack();
-    std::exit(-1);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    std::exit(-1);
-  }
-};
-
-void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
-{
-  uint32_t num_outputs;
-  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
-  try
-  {
-    // Turn off the automatic error printing.
-    H5::Exception::dontPrint();
-
-    H5::H5File file(filename, H5F_ACC_TRUNC);
-    H5::Group value_group = file.createGroup(h5_value_grpname);
-    for (uint32_t i = 0; i < num_outputs; i++)
-    {
-      nnfw_tensorinfo ti;
-      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
-      std::vector<hsize_t> dims(ti.rank);
-      for (uint32_t j = 0; j < ti.rank; ++j)
-      {
-        if (ti.dims[j] >= 0)
-          dims[j] = static_cast<hsize_t>(ti.dims[j]);
-        else
-        {
-          std::cerr << "Negative dimension in output tensor" << std::endl;
-          exit(-1);
-        }
-      }
-      H5::DataSpace data_space(ti.rank, dims.data());
-      switch (ti.dtype)
-      {
-        case NNFW_TYPE_TENSOR_FLOAT32:
-        {
-          H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_INT32:
-        {
-          H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_INT64:
-        {
-          H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_UINT8:
-        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-        {
-          H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_BOOL:
-        {
-          H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
-          break;
-        }
-        default:
-          throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
-      }
-    }
-  }
-  catch (const H5::Exception &e)
-  {
-    H5::Exception::printErrorStack();
-    std::exit(-1);
-  }
-  catch (const std::runtime_error &e)
-  {
-    std::cerr << "Error during dumpOutputs on nnpackage_run : " << e.what() << std::endl;
-    std::exit(-1);
-  }
-};
-
-} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/h5formatter.h b/tests/tools/nnpackage_run/src/h5formatter.h
deleted file mode 100644
index 203ba0e72..000000000
--- a/tests/tools/nnpackage_run/src/h5formatter.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_H5FORMATTER_H__
-#define __NNPACKAGE_RUN_H5FORMATTER_H__
-
-#include <string>
-#include <vector>
-
-#include "types.h"
-#include "allocation.h"
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class H5Formatter
-{
-public:
-  H5Formatter(nnfw_session *sess) : session_(sess) {}
-  std::vector<TensorShape> readTensorShapes(const std::string &filename);
-  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs);
-  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs);
-
-private:
-  nnfw_session *session_;
-};
-} // end of namespace
-
-#endif // __NNPACKAGE_RUN_H5FORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.cc b/tests/tools/nnpackage_run/src/nnfw_util.cc
deleted file mode 100644
index 01e72f99e..000000000
--- a/tests/tools/nnpackage_run/src/nnfw_util.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cassert>
-#include <string>
-#include "nnfw.h"
-
-namespace nnpkg_run
-{
-uint64_t num_elems(const nnfw_tensorinfo *ti)
-{
-  uint64_t n = 1;
-  for (uint32_t i = 0; i < ti->rank; ++i)
-  {
-    assert(ti->dims[i] >= 0);
-    n *= ti->dims[i];
-  }
-  return n;
-}
-
-uint64_t bufsize_for(const nnfw_tensorinfo *ti)
-{
-  static int elmsize[] = {
-      sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 */
-      sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 */
-      sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
-      sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
-      sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
-      sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
-
-  };
-  return elmsize[ti->dtype] * num_elems(ti);
-}
-
-} // end of namespace
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.h b/tests/tools/nnpackage_run/src/nnfw_util.h
deleted file mode 100644
index 6fe547eca..000000000
--- a/tests/tools/nnpackage_run/src/nnfw_util.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_NNFW_UTIL_H__
-#define __NNPACKAGE_RUN_NNFW_UTIL_H__
-
-#include "nnfw.h"
-
-#define NNPR_ENSURE_STATUS(a)        \
-  do                                 \
-  {                                  \
-    if ((a) != NNFW_STATUS_NO_ERROR) \
-    {                                \
-      exit(-1);                      \
-    }                                \
-  } while (0)
-
-namespace nnpkg_run
-{
-uint64_t num_elems(const nnfw_tensorinfo *ti);
-uint64_t bufsize_for(const nnfw_tensorinfo *ti);
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_UTIL_H__
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc
deleted file mode 100644
index a78e144d8..000000000
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "allocation.h"
-#include "args.h"
-#include "benchmark.h"
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-#include "h5formatter.h"
-#endif
-#include "nnfw.h"
-#include "nnfw_util.h"
-#include "nnfw_internal.h"
-#include "randomgen.h"
-#ifdef RUY_PROFILER
-#include "ruy/profiler/profiler.h"
-#endif
-
-#include <cassert>
-#include <chrono>
-#include <cstdlib>
-#include <iostream>
-#include <libgen.h>
-#include <stdexcept>
-#include <unordered_map>
-#include <vector>
-
-static const char *default_backend_cand = "cpu";
-
-void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
-                       std::vector<nnpkg_run::TensorShape> shapes)
-{
-  for (uint32_t i = 0; i < shapes.size(); i++)
-    shape_map[i] = shapes[i];
-}
-
-int main(const int argc, char **argv)
-{
-  using namespace nnpkg_run;
-
-  try
-  {
-    Args args(argc, argv);
-    auto nnpackage_path = args.getPackageFilename();
-    if (args.printVersion())
-    {
-      uint32_t version;
-      NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
-      std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
-                << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
-      exit(0);
-    }
-
-#ifdef RUY_PROFILER
-    ruy::profiler::ScopeProfile ruy_profile;
-#endif
-
-    // TODO Apply verbose level to phases
-    const int verbose = args.getVerboseLevel();
-    benchmark::Phases phases(
-        benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
-
-    nnfw_session *session = nullptr;
-    NNPR_ENSURE_STATUS(nnfw_create_session(&session));
-
-    // ModelLoad
-    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
-    });
-
-    char *available_backends = std::getenv("BACKENDS");
-    if (available_backends)
-      NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
-
-    uint32_t num_inputs;
-    NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
-
-    // verify input and output
-
-    auto verifyInputTypes = [session]() {
-      uint32_t sz;
-      NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
-      for (uint32_t i = 0; i < sz; ++i)
-      {
-        nnfw_tensorinfo ti;
-        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
-
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
-        {
-          std::cerr << "E: not supported input type" << std::endl;
-          exit(-1);
-        }
-      }
-    };
-
-    auto verifyOutputTypes = [session]() {
-      uint32_t sz;
-      NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
-
-      for (uint32_t i = 0; i < sz; ++i)
-      {
-        nnfw_tensorinfo ti;
-        NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
-
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
-        {
-          std::cerr << "E: not supported output type" << std::endl;
-          exit(-1);
-        }
-      }
-    };
-
-    auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
-      for (auto tensor_shape : tensor_shape_map)
-      {
-        auto ind = tensor_shape.first;
-        auto &shape = tensor_shape.second;
-        nnfw_tensorinfo ti;
-        // to fill dtype
-        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
-
-        ti.rank = shape.size();
-        for (int i = 0; i < ti.rank; i++)
-          ti.dims[i] = shape.at(i);
-        NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
-      }
-    };
-
-    verifyInputTypes();
-    verifyOutputTypes();
-
-// set input shape before compilation
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
-    {
-      auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
-      overwriteShapeMap(args.getShapeMapForPrepare(), shapes);
-    }
-#endif
-    setTensorInfo(args.getShapeMapForPrepare());
-
-    // prepare execution
-
-    // TODO When nnfw_{prepare|run} are failed, can't catch the time
-    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
-      NNPR_ENSURE_STATUS(nnfw_prepare(session));
-    });
-
-// set input shape after compilation and before execution
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN)
-    {
-      auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
-      overwriteShapeMap(args.getShapeMapForRun(), shapes);
-    }
-#endif
-    setTensorInfo(args.getShapeMapForRun());
-
-    // prepare input
-    std::vector<Allocation> inputs(num_inputs);
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (!args.getLoadFilename().empty())
-      H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
-    else
-      RandomGenerator(session).generate(inputs);
-#else
-    RandomGenerator(session).generate(inputs);
-#endif
-
-    // prepare output
-    uint32_t num_outputs = 0;
-    NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
-    std::vector<Allocation> outputs(num_outputs);
-    auto output_sizes = args.getOutputSizes();
-    for (uint32_t i = 0; i < num_outputs; i++)
-    {
-      nnfw_tensorinfo ti;
-      uint64_t output_size_in_bytes = 0;
-      {
-        auto found = output_sizes.find(i);
-        if (found == output_sizes.end())
-        {
-          NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
-          output_size_in_bytes = bufsize_for(&ti);
-        }
-        else
-        {
-          output_size_in_bytes = found->second;
-        }
-      }
-      outputs[i].alloc(output_size_in_bytes);
-      NNPR_ENSURE_STATUS(
-          nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
-      NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
-    }
-
-    // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
-    // only warmup.
-    if (verbose == 0)
-    {
-      phases.run("WARMUP",
-                 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-                 args.getWarmupRuns());
-      phases.run("EXECUTE",
-                 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-                 args.getNumRuns(), true);
-    }
-    else
-    {
-      phases.run("WARMUP",
-                 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-                 [&](const benchmark::Phase &phase, uint32_t nth) {
-                   std::cout << "... "
-                             << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                             << std::endl;
-                 },
-                 args.getWarmupRuns());
-      phases.run("EXECUTE",
-                 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-                 [&](const benchmark::Phase &phase, uint32_t nth) {
-                   std::cout << "... "
-                             << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                             << std::endl;
-                 },
-                 args.getNumRuns(), true);
-    }
-
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    // dump output tensors
-    if (!args.getDumpFilename().empty())
-      H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
-#endif
-
-    NNPR_ENSURE_STATUS(nnfw_close_session(session));
-
-    // TODO Apply verbose level to result
-
-    // prepare result
-    benchmark::Result result(phases);
-
-    // to stdout
-    benchmark::printResult(result);
-
-    // to csv
-    if (args.getWriteReport() == false)
-      return 0;
-
-    // prepare csv task
-    std::string exec_basename;
-    std::string nnpkg_basename;
-    std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
-    {
-      char buf[PATH_MAX];
-      char *res = realpath(nnpackage_path.c_str(), buf);
-      if (res)
-      {
-        nnpkg_basename = basename(buf);
-      }
-      else
-      {
-        std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
-        exit(-1);
-      }
-      exec_basename = basename(argv[0]);
-    }
-
-    benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
-
-    return 0;
-  }
-  catch (std::runtime_error &e)
-  {
-    std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
-    exit(-1);
-  }
-}
diff --git a/tests/tools/nnpackage_run/src/randomgen.cc b/tests/tools/nnpackage_run/src/randomgen.cc
deleted file mode 100644
index 343242081..000000000
--- a/tests/tools/nnpackage_run/src/randomgen.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "randomgen.h"
-#include "nnfw.h"
-#include "nnfw_util.h"
-#include "misc/RandomGenerator.h"
-
-#include <iostream>
-
-namespace nnpkg_run
-{
-
-template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
-{
-  for (uint64_t i = 0; i < size; i++)
-    reinterpret_cast<T *>(data)[i] = randgen.generate<T>();
-}
-
-void RandomGenerator::generate(std::vector<Allocation> &inputs)
-{
-  // generate random data
-  const int seed = 1;
-  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-  for (uint32_t i = 0; i < inputs.size(); ++i)
-  {
-    nnfw_tensorinfo ti;
-    NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
-    auto input_size_in_bytes = bufsize_for(&ti);
-    inputs[i].alloc(input_size_in_bytes);
-    switch (ti.dtype)
-    {
-      case NNFW_TYPE_TENSOR_FLOAT32:
-        randomData<float>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_BOOL:
-        randomData<bool>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_UINT8:
-        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_INT32:
-        randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_INT64:
-        randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      default:
-        std::cerr << "Not supported input type" << std::endl;
-        std::exit(-1);
-    }
-    NNPR_ENSURE_STATUS(
-        nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
-    NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
-  }
-};
-
-} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/randomgen.h b/tests/tools/nnpackage_run/src/randomgen.h
deleted file mode 100644
index 9ca51dd11..000000000
--- a/tests/tools/nnpackage_run/src/randomgen.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_RANDOMGEN_H__
-#define __NNPACKAGE_RUN_RANDOMGEN_H__
-
-#include <string>
-#include <vector>
-
-#include "allocation.h"
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class RandomGenerator
-{
-public:
-  RandomGenerator(nnfw_session *sess) : session_(sess) {}
-  void generate(std::vector<Allocation> &inputs);
-
-private:
-  nnfw_session *session_;
-};
-} // end of namespace
-
-#endif // __NNPACKAGE_RUN_RANDOMGEN_H__
diff --git a/tests/tools/nnpackage_run/src/types.h b/tests/tools/nnpackage_run/src/types.h
deleted file mode 100644
index 93a7ab230..000000000
--- a/tests/tools/nnpackage_run/src/types.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_TYPES_H__
-#define __NNPACKAGE_RUN_TYPES_H__
-
-namespace nnpkg_run
-{
-
-using TensorShape = std::vector<int>;
-
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_TYPES_H__
diff --git a/tests/tools/onert_run/CMakeLists.txt b/tests/tools/onert_run/CMakeLists.txt
new file mode 100644
index 000000000..1d536ddc0
--- /dev/null
+++ b/tests/tools/onert_run/CMakeLists.txt
@@ -0,0 +1,45 @@
+if(NOT BUILD_ONERT_RUN)
+  return()
+endif(NOT BUILD_ONERT_RUN)
+
+if(NOT BUILD_ONERT)
+  return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND ONERT_RUN_SRCS "src/onert_run.cc")
+list(APPEND ONERT_RUN_SRCS "src/args.cc")
+list(APPEND ONERT_RUN_SRCS "src/nnfw_util.cc")
+list(APPEND ONERT_RUN_SRCS "src/randomgen.cc")
+list(APPEND ONERT_RUN_SRCS "src/rawformatter.cc")
+
+nnfw_find_package(Boost REQUIRED program_options)
+nnfw_find_package(Ruy QUIET)
+nnfw_find_package(HDF5 QUIET)
+
+if (HDF5_FOUND)
+  list(APPEND ONERT_RUN_SRCS "src/h5formatter.cc")
+endif()
+
+add_executable(onert_run ${ONERT_RUN_SRCS})
+
+if (HDF5_FOUND)
+  target_compile_definitions(onert_run PRIVATE ONERT_HAVE_HDF5=1)
+  target_include_directories(onert_run PRIVATE ${HDF5_INCLUDE_DIRS})
+  target_link_libraries(onert_run ${HDF5_CXX_LIBRARIES})
+else()
+  message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in onert_run.")
+endif(HDF5_FOUND)
+
+target_include_directories(onert_run PRIVATE src)
+target_include_directories(onert_run PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(onert_run nnfw_lib_tflite jsoncpp)
+target_link_libraries(onert_run nnfw-dev)
+target_link_libraries(onert_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(onert_run nnfw_lib_benchmark)
+if(Ruy_FOUND AND PROFILE_RUY)
+  target_link_libraries(onert_run ruy_instrumentation)
+  target_link_libraries(onert_run ruy_profiler)
+endif(Ruy_FOUND AND PROFILE_RUY)
+
+install(TARGETS onert_run DESTINATION bin)
diff --git a/tests/tools/onert_run/README.md b/tests/tools/onert_run/README.md
new file mode 100644
index 000000000..9dc918ef9
--- /dev/null
+++ b/tests/tools/onert_run/README.md
@@ -0,0 +1,22 @@
+# onert_run
+
+`onert_run` is a tool to run `nnpackage`.
+
+It takes `nnpackage` as input. It uses **runtime API** internally.
+
+## Usage
+
+### Simple run
+
+This will run with random input data
+
+```
+$ ./onert_run path_to_nnpackage_directory
+```
+
+Output would look like:
+
+```
+nnfw_prepare takes 425.235 ms
+nnfw_run     takes 2.525 ms
+```
diff --git a/tests/tools/onert_run/src/allocation.h b/tests/tools/onert_run/src/allocation.h
new file mode 100644
index 000000000..798bf9d06
--- /dev/null
+++ b/tests/tools/onert_run/src/allocation.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_ALLOCATION_H__
+#define __ONERT_RUN_ALLOCATION_H__
+
+#include <cstdlib>
+#include <cstdint>
+
+namespace onert_run
+{
+class Allocation
+{
+public:
+  Allocation() : data_(nullptr) {}
+  ~Allocation() { free(data_); }
+  void *data() const { return data_; }
+  void *alloc(uint64_t sz) { return data_ = malloc(sz); }
+
+private:
+  void *data_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_ALLOCATION_H__
diff --git a/tests/tools/onert_run/src/args.cc b/tests/tools/onert_run/src/args.cc
new file mode 100644
index 000000000..a64d81db5
--- /dev/null
+++ b/tests/tools/onert_run/src/args.cc
@@ -0,0 +1,393 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <functional>
+#include <iostream>
+#include <sys/stat.h>
+#include <json/json.h>
+
+namespace
+{
+
+// This function parses a json object and returns as a vector of integers
+// For example,
+// [0, [1, 2, 3, 4], 3, 40, 4, []] in JSON
+// is converted to:
+// {
+//  0 -> [1, 2, 3, 4]
+//  3 -> 40
+//  4 -> []
+// } in std::unordered_map. Note that the value type is still Json::Value.
+std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonval)
+{
+  if (!jsonval.isArray() || (jsonval.size() % 2 != 0))
+  {
+    std::cerr << "JSON argument must be an even-sized array in JSON\n";
+    exit(1);
+  }
+
+  std::unordered_map<uint32_t, Json::Value> ret;
+  for (uint32_t i = 0; i < jsonval.size(); i += 2)
+  {
+    if (!jsonval[i].isUInt())
+    {
+      std::cerr << "Key values(values in even indices) must be unsigned integers\n";
+      exit(1);
+    }
+    uint32_t key = jsonval[i].asUInt();
+    Json::Value val = jsonval[i + 1];
+    ret[key] = jsonval[i + 1];
+  }
+  return ret;
+}
+
+// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
+void handleShapeJsonParam(onert_run::TensorShapeMap &shape_map, const std::string &shape_str)
+{
+  Json::Value root;
+  Json::Reader reader;
+  if (!reader.parse(shape_str, root, false))
+  {
+    std::cerr << "Invalid JSON format for output_sizes \"" << shape_str << "\"\n";
+    exit(1);
+  }
+
+  auto arg_map = argArrayToMap(root);
+  for (auto &pair : arg_map)
+  {
+    uint32_t key = pair.first;
+    Json::Value &shape_json = pair.second;
+    if (!shape_json.isArray())
+    {
+      std::cerr << "All the values must be list: " << shape_str << "\n";
+      exit(1);
+    }
+
+    std::vector<int> shape;
+    for (auto &dim_json : shape_json)
+    {
+      if (!dim_json.isUInt())
+      {
+        std::cerr << "All the dims should be dim >= 0: " << shape_str << "\n";
+        exit(1);
+      }
+
+      shape.emplace_back(dim_json.asUInt64());
+    }
+
+    shape_map[key] = shape;
+  }
+}
+
+void checkModelfile(const std::string &model_filename)
+{
+  if (model_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify model file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(model_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "Model file not found: " << model_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+void checkPackage(const std::string &package_filename)
+{
+  if (package_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(package_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "nnpackage not found: " << package_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+} // namespace
+
+namespace onert_run
+{
+
+Args::Args(const int argc, char **argv)
+{
+  Initialize();
+  Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+  auto process_nnpackage = [&](const std::string &package_filename) {
+    _package_filename = package_filename;
+
+    std::cerr << "Package Filename " << _package_filename << std::endl;
+    checkPackage(package_filename);
+  };
+
+  auto process_modelfile = [&](const std::string &model_filename) {
+    _model_filename = model_filename;
+
+    std::cerr << "Model Filename " << _model_filename << std::endl;
+    checkModelfile(model_filename);
+
+    _use_single_model = true;
+  };
+
+  auto process_path = [&](const std::string &path) {
+    struct stat sb;
+    if (stat(path.c_str(), &sb) == 0)
+    {
+      if (sb.st_mode & S_IFDIR)
+      {
+        _package_filename = path;
+        checkPackage(path);
+        std::cerr << "Package Filename " << path << std::endl;
+      }
+      else
+      {
+        _model_filename = path;
+        checkModelfile(path);
+        std::cerr << "Model Filename " << path << std::endl;
+        _use_single_model = true;
+      }
+    }
+    else
+    {
+      std::cerr << "Cannot find: " << path << "\n";
+      exit(1);
+    }
+  };
+
+  auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
+    Json::Value root;
+    Json::Reader reader;
+    if (!reader.parse(output_sizes_json_str, root, false))
+    {
+      std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
+      exit(1);
+    }
+
+    auto arg_map = argArrayToMap(root);
+    for (auto &pair : arg_map)
+    {
+      uint32_t key = pair.first;
+      Json::Value &val_json = pair.second;
+      if (!val_json.isUInt())
+      {
+        std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
+        exit(1);
+      }
+      uint32_t val = val_json.asUInt();
+      _output_sizes[key] = val;
+    }
+  };
+
+  auto process_shape_prepare = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (shape_str == "H5" || shape_str == "h5")
+    {
+      _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
+      return;
+    }
+#endif
+    try
+    {
+      handleShapeJsonParam(_shape_prepare, shape_str);
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
+      exit(1);
+    }
+  };
+
+  auto process_shape_run = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (shape_str == "H5" || shape_str == "h5")
+    {
+      _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
+      return;
+    }
+#endif
+    try
+    {
+      handleShapeJsonParam(_shape_run, shape_str);
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
+      exit(1);
+    }
+  };
+
+  // General options
+  po::options_description general("General options", 100);
+
+  // clang-format off
+  general.add_options()
+    ("help,h", "Print available options")
+    ("version", "Print version and exit immediately")
+    ("nnpackage", po::value<std::string>()->notifier(process_nnpackage), "NN Package file(directory) name")
+    ("modelfile", po::value<std::string>()->notifier(process_modelfile), "NN Model filename")
+    ("path", po::value<std::string>()->notifier(process_path), "NN Package or NN Modelfile path")
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
+    ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
+#endif
+    ("dump:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_raw_filename = v; }), "Raw Output filename")
+    ("load:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_raw_filename = v; }), "Raw Input filename")
+    ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
+        "The output buffer size in JSON 1D array\n"
+        "If not given, the model's output sizes are used\n"
+        "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
+    ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
+    ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
+    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(us) between runs (as default no delay")
+    ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
+    ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
+    ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
+         "Write report\n"
+         "{exec}-{nnpkg|modelfile}-{backend}.csv will be generated.\n"
+         "e.g. onert_run-UNIT_Add_000-acl_cl.csv.\n"
+         "{nnpkg|modelfile} name may be changed to realpath if you use symbolic-link.")
+    ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
+         "Please refer to the description of 'shape_run'")
+    ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
+         "'--shape_prepare: set shape of tensors before compilation (before calling nnfw_prepare()).\n"
+         "'--shape_run: set shape of tensors before running (before calling nnfw_run()).\n"
+         "Allowed value:.\n"
+         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [] (scalar).\n"
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+         "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+         "if '--load' option is provided but '--shape_prepare' or '--shape_run' is not provided,\n"
+         "'--shape_run h5' will be used by default.\n"
+#endif
+         "For detailed description, please consutl the description of nnfw_set_input_tensorinfo()\n"
+         )
+    ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
+         "Verbose level\n"
+         "0: prints the only result. Messages btw run don't print\n"
+         "1: prints result and message btw run\n"
+         "2: prints all of messages to print\n")
+    ("quantize,q", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _quantize = v; }), "Request quantization with type (int8 or int16)")
+    ("qpath", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _quantized_model_path = v; }),
+         "Path to export quantized model.\n"
+         "If it is not set, the quantized model will be exported to the same directory of the original model/package with q8/q16 suffix.")
+    ;
+  // clang-format on
+
+  _options.add(general);
+  _positional.add("path", -1);
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+            vm);
+
+  if (vm.count("help"))
+  {
+    std::cout << "onert_run\n\n";
+    std::cout << "Usage: " << argv[0] << " path to nnpackage root directory [<options>]\n\n";
+    std::cout << _options;
+    std::cout << "\n";
+
+    exit(0);
+  }
+
+  if (vm.count("version"))
+  {
+    _print_version = true;
+    return;
+  }
+
+  {
+    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
+      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
+      {
+        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
+                                            "' cannot be given at once.");
+      }
+    };
+
+    // calling, e.g., "onert_run .. -- shape_prepare .. --shape_run .." should theoretically
+    // work but allowing both options together on command line makes the usage and implemenation
+    // of onert_run too complicated. Therefore let's not allow those option together.
+    conflicting_options("shape_prepare", "shape_run");
+
+    // Cannot use both single model file and nnpackage at once
+    conflicting_options("modelfile", "nnpackage");
+
+    // Require modelfile, nnpackage, or path
+    if (!vm.count("modelfile") && !vm.count("nnpackage") && !vm.count("path"))
+      throw boost::program_options::error(
+        std::string("Require one of options modelfile, nnpackage, or path."));
+  }
+
+  try
+  {
+    po::notify(vm);
+  }
+  catch (const std::bad_cast &e)
+  {
+    std::cerr << "Bad cast error - " << e.what() << '\n';
+    exit(1);
+  }
+
+  // This must be run after `notify` as `_warm_up_runs` must have been processed before.
+  if (vm.count("mem_poll"))
+  {
+    // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
+    if (_mem_poll && _warmup_runs == 0)
+    {
+      _warmup_runs = 1;
+    }
+  }
+}
+
+bool Args::shapeParamProvided()
+{
+  bool provided = false;
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+  // "--shape_run h5" or "--shape_prepare h5" was provided
+  provided = (getWhenToUseH5Shape() != WhenToUseH5Shape::NOT_PROVIDED);
+#endif
+  // specific shape was provided
+  // e.g., "--shape_run '[0, [10, 1]]'" or "--shape_prepare '[0, [10, 1]]'"
+  provided |= (!getShapeMapForPrepare().empty()) || (!getShapeMapForRun().empty());
+
+  return provided;
+}
+
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/args.h b/tests/tools/onert_run/src/args.h
new file mode 100644
index 000000000..97d9b1af1
--- /dev/null
+++ b/tests/tools/onert_run/src/args.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_ARGS_H__
+#define __ONERT_RUN_ARGS_H__
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <boost/program_options.hpp>
+
+#include "types.h"
+
+namespace po = boost::program_options;
+
+namespace onert_run
+{
+
+using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+enum class WhenToUseH5Shape
+{
+  NOT_PROVIDED, // Param not provided
+  PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+  RUN,     // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+};
+#endif
+
+class Args
+{
+public:
+  Args(const int argc, char **argv);
+  void print(void);
+
+  const std::string &getPackageFilename(void) const { return _package_filename; }
+  const std::string &getModelFilename(void) const { return _model_filename; }
+  const bool useSingleModel(void) const { return _use_single_model; }
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+  const std::string &getDumpFilename(void) const { return _dump_filename; }
+  const std::string &getLoadFilename(void) const { return _load_filename; }
+  WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
+#endif
+  const std::string &getDumpRawFilename(void) const { return _dump_raw_filename; }
+  const std::string &getLoadRawFilename(void) const { return _load_raw_filename; }
+  const int getNumRuns(void) const { return _num_runs; }
+  const int getWarmupRuns(void) const { return _warmup_runs; }
+  const int getRunDelay(void) const { return _run_delay; }
+  std::unordered_map<uint32_t, uint32_t> getOutputSizes(void) const { return _output_sizes; }
+  const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
+  const bool getMemoryPoll(void) const { return _mem_poll; }
+  const bool getWriteReport(void) const { return _write_report; }
+  const bool printVersion(void) const { return _print_version; }
+  TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
+  TensorShapeMap &getShapeMapForRun() { return _shape_run; }
+  /// @brief Return true if "--shape_run" or "--shape_prepare" is provided
+  bool shapeParamProvided();
+  const int getVerboseLevel(void) const { return _verbose_level; }
+  const std::string &getQuantize(void) const { return _quantize; }
+  const std::string &getQuantizedModelPath(void) const { return _quantized_model_path; }
+
+private:
+  void Initialize();
+  void Parse(const int argc, char **argv);
+
+private:
+  po::positional_options_description _positional;
+  po::options_description _options;
+
+  std::string _package_filename;
+  std::string _model_filename;
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+  std::string _dump_filename;
+  std::string _load_filename;
+  WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::NOT_PROVIDED;
+#endif
+  std::string _dump_raw_filename;
+  std::string _load_raw_filename;
+  TensorShapeMap _shape_prepare;
+  TensorShapeMap _shape_run;
+  int _num_runs;
+  int _warmup_runs;
+  int _run_delay;
+  std::unordered_map<uint32_t, uint32_t> _output_sizes;
+  bool _gpumem_poll;
+  bool _mem_poll;
+  bool _write_report;
+  bool _print_version = false;
+  int _verbose_level;
+  bool _use_single_model = false;
+  std::string _quantize;
+  std::string _quantized_model_path;
+};
+
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_ARGS_H__
diff --git a/tests/tools/onert_run/src/formatter.h b/tests/tools/onert_run/src/formatter.h
new file mode 100644
index 000000000..5b73d2337
--- /dev/null
+++ b/tests/tools/onert_run/src/formatter.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_FORMATTER_H__
+#define __ONERT_RUN_FORMATTER_H__
+
+#include <string>
+#include <vector>
+
+#include "types.h"
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class Formatter
+{
+public:
+  virtual ~Formatter() = default;
+  Formatter(nnfw_session *sess) : session_(sess) {}
+  virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
+  virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
+  virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
+  {
+    return std::vector<TensorShape>();
+  };
+
+protected:
+  nnfw_session *session_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_FORMATTER_H__
diff --git a/tests/tools/onert_run/src/h5formatter.cc b/tests/tools/onert_run/src/h5formatter.cc
new file mode 100644
index 000000000..5ea6e4c4a
--- /dev/null
+++ b/tests/tools/onert_run/src/h5formatter.cc
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "h5formatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <stdexcept>
+#include <H5Cpp.h>
+
+namespace
+{
+onert_run::TensorShape getShape(H5::DataSet &data_set)
+{
+  std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
+  H5::DataSpace data_space = data_set.getSpace();
+  int rank = data_space.getSimpleExtentNdims();
+  h5_shape.resize(rank);
+
+  // read shape info from H5 file
+  data_space.getSimpleExtentDims(h5_shape.data(), NULL);
+
+  onert_run::TensorShape shape;
+  for (auto dim : h5_shape)
+    shape.emplace_back(static_cast<int>(dim));
+
+  return shape;
+}
+} // namespace
+
+namespace onert_run
+{
+static const char *h5_value_grpname = "value";
+
+std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  std::vector<TensorShape> tensor_shapes;
+
+  try
+  {
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+
+    // Constraints: if there are n data set names, they should be unique and
+    //              one of [ "0", "1", .. , "n-1" ]
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      auto shape = getShape(data_set);
+
+      tensor_shapes.emplace_back(shape);
+    }
+
+    return tensor_shapes;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+
+void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  try
+  {
+    // Turn off the automatic error printing.
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // TODO Add Assert(nnfw shape, h5 file shape size)
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      switch (ti.dtype)
+      {
+        case NNFW_TYPE_TENSOR_FLOAT32:
+          if (type == H5::PredType::IEEE_F32BE || type == H5::PredType::IEEE_F32LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_FLOAT);
+          else
+            throw std::runtime_error("model input type is f32. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_INT32:
+          if (type == H5::PredType::STD_I32BE || type == H5::PredType::STD_I32LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT32);
+          else
+            throw std::runtime_error("model input type is i32. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_INT64:
+          if (type == H5::PredType::STD_I64BE || type == H5::PredType::STD_I64LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT64);
+          else
+            throw std::runtime_error("model input type is i64. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        case NNFW_TYPE_TENSOR_BOOL:
+        case NNFW_TYPE_TENSOR_UINT8:
+          if (type == H5::PredType::STD_U8BE || type == H5::PredType::STD_U8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
+          else
+            throw std::runtime_error(
+              "model input type is qasymm8, bool or uint8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
+          else
+            throw std::runtime_error("model input type is int8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+        default:
+          throw std::runtime_error("onert_run can load f32, i32, qasymm8, bool and uint8.");
+      }
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    // Turn off the automatic error printing.
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_TRUNC);
+    H5::Group value_group = file.createGroup(h5_value_grpname);
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      std::vector<hsize_t> dims(ti.rank);
+      for (uint32_t j = 0; j < ti.rank; ++j)
+      {
+        if (ti.dims[j] >= 0)
+          dims[j] = static_cast<hsize_t>(ti.dims[j]);
+        else
+        {
+          std::cerr << "Negative dimension in output tensor" << std::endl;
+          exit(-1);
+        }
+      }
+      H5::DataSpace data_space(ti.rank, dims.data());
+      switch (ti.dtype)
+      {
+        case NNFW_TYPE_TENSOR_FLOAT32:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_INT32:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_INT64:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_UINT8:
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_BOOL:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+        default:
+          throw std::runtime_error("onert_run can dump f32, i32, qasymm8, bool and uint8.");
+      }
+    }
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/h5formatter.h b/tests/tools/onert_run/src/h5formatter.h
new file mode 100644
index 000000000..7ebb33f2e
--- /dev/null
+++ b/tests/tools/onert_run/src/h5formatter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_H5FORMATTER_H__
+#define __ONERT_RUN_H5FORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class H5Formatter : public Formatter
+{
+public:
+  H5Formatter(nnfw_session *sess) : Formatter(sess) {}
+  std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_H5FORMATTER_H__
diff --git a/tests/tools/onert_run/src/nnfw_util.cc b/tests/tools/onert_run/src/nnfw_util.cc
new file mode 100644
index 000000000..0a21395fd
--- /dev/null
+++ b/tests/tools/onert_run/src/nnfw_util.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+#include <string>
+#include "nnfw.h"
+
+namespace onert_run
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+  uint64_t n = 1;
+  for (uint32_t i = 0; i < ti->rank; ++i)
+  {
+    assert(ti->dims[i] >= 0);
+    n *= ti->dims[i];
+  }
+  return n;
+}
+
+uint64_t bufsize_for(const nnfw_tensorinfo *ti)
+{
+  static int elmsize[] = {
+    sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 */
+    sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
+    sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
+    sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
+    sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+    sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
+  };
+  return elmsize[ti->dtype] * num_elems(ti);
+}
+
+} // namespace onert_run
diff --git a/tests/tools/onert_run/src/nnfw_util.h b/tests/tools/onert_run/src/nnfw_util.h
new file mode 100644
index 000000000..1fcdfdf19
--- /dev/null
+++ b/tests/tools/onert_run/src/nnfw_util.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_NNFW_UTIL_H__
+#define __ONERT_RUN_NNFW_UTIL_H__
+
+#include "nnfw.h"
+
+#define NNPR_ENSURE_STATUS(a)        \
+  do                                 \
+  {                                  \
+    if ((a) != NNFW_STATUS_NO_ERROR) \
+    {                                \
+      exit(-1);                      \
+    }                                \
+  } while (0)
+
+namespace onert_run
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti);
+uint64_t bufsize_for(const nnfw_tensorinfo *ti);
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_NNFW_UTIL_H__
diff --git a/tests/tools/onert_run/src/onert_run.cc b/tests/tools/onert_run/src/onert_run.cc
new file mode 100644
index 000000000..0bc64bb2b
--- /dev/null
+++ b/tests/tools/onert_run/src/onert_run.cc
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "allocation.h"
+#include "args.h"
+#include "benchmark.h"
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+#include "h5formatter.h"
+#endif
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "nnfw_internal.h"
+#include "nnfw_experimental.h"
+#include "randomgen.h"
+#include "rawformatter.h"
+#ifdef RUY_PROFILER
+#include "ruy/profiler/profiler.h"
+#endif
+
+#include <boost/program_options.hpp>
+#include <cassert>
+#include <chrono>
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <stdexcept>
+#include <unordered_map>
+#include <vector>
+
+static const char *default_backend_cand = "cpu";
+
+void overwriteShapeMap(onert_run::TensorShapeMap &shape_map,
+                       std::vector<onert_run::TensorShape> shapes)
+{
+  for (uint32_t i = 0; i < shapes.size(); i++)
+    shape_map[i] = shapes[i];
+}
+
+std::string genQuantizedModelPathFromModelPath(const std::string &model_path, bool is_q16)
+{
+  auto const extension_pos = model_path.find(".circle");
+  if (extension_pos == std::string::npos)
+  {
+    std::cerr << "Input model isn't .circle." << std::endl;
+    exit(-1);
+  }
+  auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
+  return model_path.substr(0, extension_pos) + qstring + ".circle";
+}
+
+std::string genQuantizedModelPathFromPackagePath(const std::string &package_path, bool is_q16)
+{
+  auto package_path_without_slash = package_path;
+  if (package_path_without_slash.back() == '/')
+    package_path_without_slash.pop_back();
+  auto package_name_pos = package_path_without_slash.find_last_of('/');
+  if (package_name_pos == std::string::npos)
+    package_name_pos = 0;
+  else
+    package_name_pos++;
+  auto package_name = package_path_without_slash.substr(package_name_pos);
+  auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
+  return package_path_without_slash + "/" + package_name + qstring + ".circle";
+}
+
+int main(const int argc, char **argv)
+{
+  using namespace onert_run;
+
+  try
+  {
+    Args args(argc, argv);
+    if (args.printVersion())
+    {
+      uint32_t version;
+      NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
+      std::cout << "onert_run (nnfw runtime: v" << (version >> 24) << "."
+                << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
+      exit(0);
+    }
+
+#ifdef RUY_PROFILER
+    ruy::profiler::ScopeProfile ruy_profile;
+#endif
+
+    // TODO Apply verbose level to phases
+    const int verbose = args.getVerboseLevel();
+    benchmark::Phases phases(
+      benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+
+    nnfw_session *session = nullptr;
+    NNPR_ENSURE_STATUS(nnfw_create_session(&session));
+
+    // ModelLoad
+    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
+      if (args.useSingleModel())
+        NNPR_ENSURE_STATUS(
+          nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
+      else
+        NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
+    });
+
+    // Quantize model
+    auto quantize = args.getQuantize();
+    if (!quantize.empty())
+    {
+      NNFW_QUANTIZE_TYPE quantize_type = NNFW_QUANTIZE_TYPE_NOT_SET;
+      if (quantize == "int8")
+        quantize_type = NNFW_QUANTIZE_TYPE_U8_ASYM;
+      if (quantize == "int16")
+        quantize_type = NNFW_QUANTIZE_TYPE_I16_SYM;
+      NNPR_ENSURE_STATUS(nnfw_set_quantization_type(session, quantize_type));
+
+      if (args.getQuantizedModelPath() != "")
+        NNPR_ENSURE_STATUS(
+          nnfw_set_quantized_model_path(session, args.getQuantizedModelPath().c_str()));
+      else
+      {
+        if (args.useSingleModel())
+          NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
+            session,
+            genQuantizedModelPathFromModelPath(args.getModelFilename(), quantize == "int16")
+              .c_str()));
+        else
+          NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
+            session,
+            genQuantizedModelPathFromPackagePath(args.getPackageFilename(), quantize == "int16")
+              .c_str()));
+      }
+
+      NNPR_ENSURE_STATUS(nnfw_quantize(session));
+    }
+
+    char *available_backends = std::getenv("BACKENDS");
+    if (available_backends)
+      NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
+
+    uint32_t num_inputs;
+    NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
+
+    // verify input and output
+
+    auto verifyInputTypes = [session]() {
+      uint32_t sz;
+      NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
+      for (uint32_t i = 0; i < sz; ++i)
+      {
+        nnfw_tensorinfo ti;
+        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+        {
+          std::cerr << "E: not supported input type" << std::endl;
+          exit(-1);
+        }
+      }
+    };
+
+    auto verifyOutputTypes = [session]() {
+      uint32_t sz;
+      NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
+
+      for (uint32_t i = 0; i < sz; ++i)
+      {
+        nnfw_tensorinfo ti;
+        NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+        {
+          std::cerr << "E: not supported output type" << std::endl;
+          exit(-1);
+        }
+      }
+    };
+
+    auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
+      for (auto tensor_shape : tensor_shape_map)
+      {
+        auto ind = tensor_shape.first;
+        auto &shape = tensor_shape.second;
+        nnfw_tensorinfo ti;
+        // to fill dtype
+        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
+
+        bool set_input = false;
+        if (ti.rank != shape.size())
+        {
+          set_input = true;
+        }
+        else
+        {
+          for (int i = 0; i < ti.rank; i++)
+          {
+            if (ti.dims[i] != shape.at(i))
+            {
+              set_input = true;
+              break;
+            }
+          }
+        }
+        if (!set_input)
+          continue;
+
+        ti.rank = shape.size();
+        for (int i = 0; i < ti.rank; i++)
+          ti.dims[i] = shape.at(i);
+        NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
+      }
+    };
+
+    verifyInputTypes();
+    verifyOutputTypes();
+
+// set input shape before compilation
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+
+    auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
+      assert(!h5_file.empty());
+      auto shapes = H5Formatter(session).readTensorShapes(h5_file);
+      overwriteShapeMap(shape_map, shapes);
+    };
+
+    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
+      fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
+#endif
+    setTensorInfo(args.getShapeMapForPrepare());
+
+    // prepare execution
+
+    // TODO When nnfw_{prepare|run} are failed, can't catch the time
+    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
+      NNPR_ENSURE_STATUS(nnfw_prepare(session));
+    });
+
+// set input shape after compilation and before execution
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
+        (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
+      fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
+#endif
+    setTensorInfo(args.getShapeMapForRun());
+
+    // prepare input
+    std::vector<Allocation> inputs(num_inputs);
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (!args.getLoadFilename().empty())
+      H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
+    else if (!args.getLoadRawFilename().empty())
+      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+    else
+      RandomGenerator(session).generate(inputs);
+#else
+    if (!args.getLoadRawFilename().empty())
+      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+    else
+      RandomGenerator(session).generate(inputs);
+#endif
+
+    // prepare output
+    uint32_t num_outputs = 0;
+    NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
+    std::vector<Allocation> outputs(num_outputs);
+    auto output_sizes = args.getOutputSizes();
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      uint64_t output_size_in_bytes = 0;
+      {
+        auto found = output_sizes.find(i);
+        if (found == output_sizes.end())
+        {
+          NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+          output_size_in_bytes = bufsize_for(&ti);
+        }
+        else
+        {
+          output_size_in_bytes = found->second;
+        }
+      }
+      outputs[i].alloc(output_size_in_bytes);
+      NNPR_ENSURE_STATUS(
+        nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
+      NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+
+    // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
+    // only warmup.
+    if (verbose == 0)
+    {
+      phases.run(
+        "WARMUP",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        args.getWarmupRuns());
+      phases.run(
+        "EXECUTE",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        args.getNumRuns(), true);
+    }
+    else
+    {
+      phases.run(
+        "WARMUP",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        [&](const benchmark::Phase &phase, uint32_t nth) {
+          std::cout << "... "
+                    << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+                    << std::endl;
+        },
+        args.getWarmupRuns());
+      phases.run(
+        "EXECUTE",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        [&](const benchmark::Phase &phase, uint32_t nth) {
+          std::cout << "... "
+                    << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+                    << std::endl;
+        },
+        args.getNumRuns(), true);
+    }
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    // dump output tensors
+    if (!args.getDumpFilename().empty())
+      H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
+#endif
+    if (!args.getDumpRawFilename().empty())
+      RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
+
+    NNPR_ENSURE_STATUS(nnfw_close_session(session));
+
+    // TODO Apply verbose level to result
+
+    // prepare result
+    benchmark::Result result(phases);
+
+    // to stdout
+    benchmark::printResult(result);
+
+    // to csv
+    if (args.getWriteReport() == false)
+      return 0;
+
+    // prepare csv task
+    std::string exec_basename;
+    std::string nnpkg_basename;
+    std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
+    {
+      char buf[PATH_MAX];
+      char *res = args.useSingleModel() ? realpath(args.getModelFilename().c_str(), buf)
+                                        : realpath(args.getPackageFilename().c_str(), buf);
+      if (res)
+      {
+        nnpkg_basename = basename(buf);
+      }
+      else
+      {
+        std::cerr << "E: during getting realpath from nnpackage or model path." << std::endl;
+        exit(-1);
+      }
+      exec_basename = basename(argv[0]);
+    }
+
+    benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
+
+    return 0;
+  }
+  catch (boost::program_options::error &e)
+  {
+    std::cerr << "E: " << e.what() << std::endl;
+    exit(-1);
+  }
+  catch (std::runtime_error &e)
+  {
+    std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
+    exit(-1);
+  }
+}
diff --git a/tests/tools/onert_run/src/randomgen.cc b/tests/tools/onert_run/src/randomgen.cc
new file mode 100644
index 000000000..1a8a5045d
--- /dev/null
+++ b/tests/tools/onert_run/src/randomgen.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "randomgen.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "misc/RandomGenerator.h"
+
+#include <iostream>
+
+namespace onert_run
+{
+
+template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
+{
+  for (uint64_t i = 0; i < size; i++)
+    reinterpret_cast<T *>(data)[i] = randgen.generate<T>();
+}
+
+void RandomGenerator::generate(std::vector<Allocation> &inputs)
+{
+  // generate random data
+  const int seed = 1;
+  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+  for (uint32_t i = 0; i < inputs.size(); ++i)
+  {
+    nnfw_tensorinfo ti;
+    NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+    auto input_size_in_bytes = bufsize_for(&ti);
+    inputs[i].alloc(input_size_in_bytes);
+    switch (ti.dtype)
+    {
+      case NNFW_TYPE_TENSOR_FLOAT32:
+        randomData<float>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_BOOL:
+        randomData<bool>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_UINT8:
+        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_INT32:
+        randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_INT64:
+        randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+        randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      default:
+        std::cerr << "Not supported input type" << std::endl;
+        std::exit(-1);
+    }
+    NNPR_ENSURE_STATUS(
+      nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
+    NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+  }
+};
+
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/randomgen.h b/tests/tools/onert_run/src/randomgen.h
new file mode 100644
index 000000000..58afb4171
--- /dev/null
+++ b/tests/tools/onert_run/src/randomgen.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_RANDOMGEN_H__
+#define __ONERT_RUN_RANDOMGEN_H__
+
+#include <string>
+#include <vector>
+
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class RandomGenerator
+{
+public:
+  RandomGenerator(nnfw_session *sess) : session_(sess) {}
+  void generate(std::vector<Allocation> &inputs);
+
+private:
+  nnfw_session *session_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_RANDOMGEN_H__
diff --git a/tests/tools/onert_run/src/rawformatter.cc b/tests/tools/onert_run/src/rawformatter.cc
new file mode 100644
index 000000000..7cfab9904
--- /dev/null
+++ b/tests/tools/onert_run/src/rawformatter.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawformatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+
+namespace onert_run
+{
+void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+
+  // Support multiple inputs
+  // Option 1: Get comman-separated input file list like --load:raw a,b,c
+  // Option 2: Get prefix --load:raw in
+  //           Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
+  //           query api.
+  //
+  // Currently Option 2 is implemented.
+  try
+  {
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
+      auto filesz = file.tellg();
+      if (bufsz != filesz)
+      {
+        throw std::runtime_error("Input " + std::to_string(i) +
+                                 " size does not match: " + std::to_string(bufsz) +
+                                 " expected, but " + std::to_string(filesz) + " provided.");
+      }
+      file.seekg(0, std::ios::beg);
+      file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
+      file.close();
+
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      auto bufsz = bufsize_for(&ti);
+
+      std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
+      file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
+      file.close();
+      std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
+    }
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/rawformatter.h b/tests/tools/onert_run/src/rawformatter.h
new file mode 100644
index 000000000..b6eaab66d
--- /dev/null
+++ b/tests/tools/onert_run/src/rawformatter.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_RAWFORMATTER_H__
+#define __ONERT_RUN_RAWFORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class RawFormatter : public Formatter
+{
+public:
+  RawFormatter(nnfw_session *sess) : Formatter(sess) {}
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_RAWFORMATTER_H__
diff --git a/tests/tools/onert_run/src/types.h b/tests/tools/onert_run/src/types.h
new file mode 100644
index 000000000..563c5e488
--- /dev/null
+++ b/tests/tools/onert_run/src/types.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_TYPES_H__
+#define __ONERT_RUN_TYPES_H__
+
+namespace onert_run
+{
+
+using TensorShape = std::vector<int>;
+
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_TYPES_H__
diff --git a/tests/tools/onert_train/CMakeLists.txt b/tests/tools/onert_train/CMakeLists.txt
new file mode 100644
index 000000000..f047b2ad0
--- /dev/null
+++ b/tests/tools/onert_train/CMakeLists.txt
@@ -0,0 +1,60 @@
+if(NOT BUILD_ONERT_TRAIN)
+  return()
+endif(NOT BUILD_ONERT_TRAIN)
+
+if(NOT BUILD_ONERT)
+  return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND ONERT_TRAIN_SRCS "src/onert_train.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/args.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/nnfw_util.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/randomgen.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/rawformatter.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/rawdataloader.cc")
+
+nnfw_find_package(Boost REQUIRED program_options)
+nnfw_find_package(HDF5 QUIET)
+
+if (HDF5_FOUND)
+  list(APPEND ONERT_TRAIN_SRCS "src/h5formatter.cc")
+endif()
+
+add_executable(onert_train ${ONERT_TRAIN_SRCS})
+
+if (HDF5_FOUND)
+  target_compile_definitions(onert_train PRIVATE ONERT_HAVE_HDF5=1)
+  target_include_directories(onert_train PRIVATE ${HDF5_INCLUDE_DIRS})
+  target_link_libraries(onert_train ${HDF5_CXX_LIBRARIES})
+else()
+  message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in onert_train.")
+endif(HDF5_FOUND)
+
+target_include_directories(onert_train PRIVATE src)
+target_include_directories(onert_train PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(onert_train nnfw_lib_tflite jsoncpp)
+target_link_libraries(onert_train nnfw-dev)
+target_link_libraries(onert_train ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(onert_train nnfw_lib_benchmark)
+
+install(TARGETS onert_train DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# Unit Tests
+set(TEST_ONERT_TRAIN test_onert_train)
+
+file(GLOB_RECURSE ONERT_TRAIN_TEST_SRCS "test/*.cc")
+list(APPEND ONERT_TRAIN_TEST_SRCS "src/rawdataloader.cc")
+list(APPEND ONERT_TRAIN_TEST_SRCS "src/nnfw_util.cc")
+
+add_executable(${TEST_ONERT_TRAIN} ${ONERT_TRAIN_TEST_SRCS})
+
+target_link_libraries(${TEST_ONERT_TRAIN} nnfw-dev)
+target_link_libraries(${TEST_ONERT_TRAIN} gtest gtest_main dl ${LIB_PTHREAD})
+
+add_test(${TEST_ONERT_TRAIN} ${TEST_ONERT_TRAIN})
+install(TARGETS ${TEST_ONERT_TRAIN} DESTINATION unittest)
diff --git a/tests/tools/onert_train/README.md b/tests/tools/onert_train/README.md
new file mode 100644
index 000000000..a201237f6
--- /dev/null
+++ b/tests/tools/onert_train/README.md
@@ -0,0 +1,13 @@
+# onert_train
+
+`onert_train` aims to train ai model. This tool trains given the ai model entered by the user using a given input and an  expected output, and stores or inference the trained model.
+
+The input models that can be supported by this tool are as follows.
+- circle
+- nnpackage
+
+## Usage
+
+### Simple train
+
+### Simple inference to trained model
diff --git a/tests/tools/onert_train/src/allocation.h b/tests/tools/onert_train/src/allocation.h
new file mode 100644
index 000000000..f5a6aa73b
--- /dev/null
+++ b/tests/tools/onert_train/src/allocation.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_ALLOCATION_H__
+#define __ONERT_TRAIN_ALLOCATION_H__
+
+#include <cstdlib>
+#include <cstdint>
+
+namespace onert_train
+{
+class Allocation
+{
+public:
+  Allocation() : data_(nullptr) {}
+  ~Allocation() { free(data_); }
+  void *data() const { return data_; }
+  void *alloc(uint64_t sz)
+  {
+    if (data_)
+    {
+      free(data_);
+    }
+
+    return data_ = malloc(sz);
+  }
+
+private:
+  void *data_;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_ALLOCATION_H__
diff --git a/tests/tools/onert_train/src/args.cc b/tests/tools/onert_train/src/args.cc
new file mode 100644
index 000000000..dbdd384b5
--- /dev/null
+++ b/tests/tools/onert_train/src/args.cc
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <functional>
+#include <iostream>
+#include <sys/stat.h>
+#include <json/json.h>
+
+namespace
+{
+
+// This function parses a json object and returns as a vector of integers
+// For example,
+// [0, [1, 2, 3, 4], 3, 40, 4, []] in JSON
+// is converted to:
+// {
+//  0 -> [1, 2, 3, 4]
+//  3 -> 40
+//  4 -> []
+// } in std::unordered_map. Note that the value type is still Json::Value.
+std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonval)
+{
+  if (!jsonval.isArray() || (jsonval.size() % 2 != 0))
+  {
+    std::cerr << "JSON argument must be an even-sized array in JSON\n";
+    exit(1);
+  }
+
+  std::unordered_map<uint32_t, Json::Value> ret;
+  for (uint32_t i = 0; i < jsonval.size(); i += 2)
+  {
+    if (!jsonval[i].isUInt())
+    {
+      std::cerr << "Key values(values in even indices) must be unsigned integers\n";
+      exit(1);
+    }
+    uint32_t key = jsonval[i].asUInt();
+    Json::Value val = jsonval[i + 1];
+    ret[key] = jsonval[i + 1];
+  }
+  return ret;
+}
+
+void checkModelfile(const std::string &model_filename)
+{
+  if (model_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify model file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(model_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "Model file not found: " << model_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+void checkPackage(const std::string &package_filename)
+{
+  if (package_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(package_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "nnpackage not found: " << package_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+} // namespace
+
+namespace onert_train
+{
+
+Args::Args(const int argc, char **argv)
+{
+  Initialize();
+  Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+  auto process_nnpackage = [&](const std::string &package_filename) {
+    _package_filename = package_filename;
+
+    std::cerr << "Package Filename " << _package_filename << std::endl;
+    checkPackage(package_filename);
+  };
+
+  auto process_modelfile = [&](const std::string &model_filename) {
+    _model_filename = model_filename;
+
+    std::cerr << "Model Filename " << _model_filename << std::endl;
+    checkModelfile(model_filename);
+
+    _use_single_model = true;
+  };
+
+  auto process_path = [&](const std::string &path) {
+    struct stat sb;
+    if (stat(path.c_str(), &sb) == 0)
+    {
+      if (sb.st_mode & S_IFDIR)
+      {
+        _package_filename = path;
+        checkPackage(path);
+        std::cerr << "Package Filename " << path << std::endl;
+      }
+      else
+      {
+        _model_filename = path;
+        checkModelfile(path);
+        std::cerr << "Model Filename " << path << std::endl;
+        _use_single_model = true;
+      }
+    }
+    else
+    {
+      std::cerr << "Cannot find: " << path << "\n";
+      exit(1);
+    }
+  };
+
+  auto process_load_raw_inputfile = [&](const std::string &input_filename) {
+    _load_raw_input_filename = input_filename;
+
+    std::cerr << "Model Input Filename " << _load_raw_input_filename << std::endl;
+    checkModelfile(_load_raw_input_filename);
+  };
+
+  auto process_load_raw_expectedfile = [&](const std::string &expected_filename) {
+    _load_raw_expected_filename = expected_filename;
+
+    std::cerr << "Model Expected Filename " << _load_raw_expected_filename << std::endl;
+    checkModelfile(_load_raw_expected_filename);
+  };
+
+  auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
+    Json::Value root;
+    Json::Reader reader;
+    if (!reader.parse(output_sizes_json_str, root, false))
+    {
+      std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
+      exit(1);
+    }
+
+    auto arg_map = argArrayToMap(root);
+    for (auto &pair : arg_map)
+    {
+      uint32_t key = pair.first;
+      Json::Value &val_json = pair.second;
+      if (!val_json.isUInt())
+      {
+        std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
+        exit(1);
+      }
+      uint32_t val = val_json.asUInt();
+      _output_sizes[key] = val;
+    }
+  };
+
+  // General options
+  po::options_description general("General options", 100);
+
+  // clang-format off
+  general.add_options()
+    ("help,h", "Print available options")
+    ("version", "Print version and exit immediately")
+    ("nnpackage", po::value<std::string>()->notifier(process_nnpackage), "NN Package file(directory) name")
+    ("modelfile", po::value<std::string>()->notifier(process_modelfile), "NN Model filename")
+    ("path", po::value<std::string>()->notifier(process_path), "NN Package or NN Modelfile path")
+    ("data_length", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _data_length = v; }), "Data length number")
+    ("load_input:raw", po::value<std::string>()->notifier(process_load_raw_inputfile),
+         "NN Model Raw Input data file\n"
+         "The datafile must have data for each input number.\n"
+         "If there are 3 inputs, the data of input0 must exist as much as data_length, "
+         "and the data for input1 and input2 must be held sequentially as data_length.\n"
+    )
+    ("load_expected:raw", po::value<std::string>()->notifier(process_load_raw_expectedfile),
+         "NN Model Raw Expected data file\n"
+         "(Same data policy with load_input:raw)\n"
+    )
+    ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
+    ("epoch", po::value<int>()->default_value(5)->notifier([&](const auto &v) { _epoch = v; }), "Epoch number (default: 5)")
+    ("batch_size", po::value<int>()->default_value(32)->notifier([&](const auto &v) { _batch_size = v; }), "Batch size (default: 32)")
+    ("learning_rate", po::value<float>()->default_value(1.0e-4)->notifier([&](const auto &v) { _learning_rate = v; }), "Learning rate (default: 1.0e-4)")
+    ("loss", po::value<int>()->default_value(0)->notifier([&] (const auto &v) { _loss_type = v; }),
+        "Loss type\n"
+        "0: MEAN_SQUARED_ERROR (default)\n"
+        "1: CATEGORICAL_CROSSENTROPY\n")
+    ("optimizer", po::value<int>()->default_value(0)->notifier([&] (const auto &v) { _optimizer_type = v; }),
+      "Optimizer type\n"
+      "0: SGD (default)\n"
+      "1: Adam\n")
+    ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
+         "Verbose level\n"
+         "0: prints the only result. Messages btw run don't print\n"
+         "1: prints result and message btw run\n"
+         "2: prints all of messages to print\n")
+    ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
+        "The output buffer size in JSON 1D array\n"
+        "If not given, the model's output sizes are used\n"
+        "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
+    ;
+  // clang-format on
+
+  _options.add(general);
+  _positional.add("path", -1);
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+            vm);
+
+  if (vm.count("help"))
+  {
+    std::cout << "onert_train\n\n";
+    std::cout << "Usage: " << argv[0] << "[model path] [<options>]\n\n";
+    std::cout << _options;
+    std::cout << "\n";
+
+    exit(0);
+  }
+
+  if (vm.count("version"))
+  {
+    _print_version = true;
+    return;
+  }
+
+  {
+    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
+      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
+      {
+        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
+                                            "' cannot be given at once.");
+      }
+    };
+
+    // Cannot use both single model file and nnpackage at once
+    conflicting_options("modelfile", "nnpackage");
+
+    // Require modelfile, nnpackage, or path
+    if (!vm.count("modelfile") && !vm.count("nnpackage") && !vm.count("path"))
+      throw boost::program_options::error(
+        std::string("Require one of options modelfile, nnpackage, or path."));
+  }
+
+  try
+  {
+    po::notify(vm);
+  }
+  catch (const std::bad_cast &e)
+  {
+    std::cerr << "Bad cast error - " << e.what() << '\n';
+    exit(1);
+  }
+}
+
+} // end of namespace onert_train
diff --git a/tests/tools/onert_train/src/args.h b/tests/tools/onert_train/src/args.h
new file mode 100644
index 000000000..cbd87e111
--- /dev/null
+++ b/tests/tools/onert_train/src/args.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_ARGS_H__
+#define __ONERT_TRAIN_ARGS_H__
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <boost/program_options.hpp>
+
+#include "types.h"
+
+namespace po = boost::program_options;
+
+namespace onert_train
+{
+
+using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+enum class WhenToUseH5Shape
+{
+  NOT_PROVIDED, // Param not provided
+  PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+  RUN,     // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+};
+#endif
+
+class Args
+{
+public:
+  Args(const int argc, char **argv);
+  void print(void);
+
+  const std::string &getPackageFilename(void) const { return _package_filename; }
+  const std::string &getModelFilename(void) const { return _model_filename; }
+  const bool useSingleModel(void) const { return _use_single_model; }
+  const int getDataLength(void) const { return _data_length; }
+  const std::string &getLoadRawInputFilename(void) const { return _load_raw_input_filename; }
+  const std::string &getLoadRawExpectedFilename(void) const { return _load_raw_expected_filename; }
+  const bool getMemoryPoll(void) const { return _mem_poll; }
+  const int getEpoch(void) const { return _epoch; }
+  const int getBatchSize(void) const { return _batch_size; }
+  const float getLearningRate(void) const { return _learning_rate; }
+  const int getLossType(void) const { return _loss_type; }
+  const int getOptimizerType(void) const { return _optimizer_type; }
+  const bool printVersion(void) const { return _print_version; }
+  const int getVerboseLevel(void) const { return _verbose_level; }
+  std::unordered_map<uint32_t, uint32_t> getOutputSizes(void) const { return _output_sizes; }
+
+private:
+  void Initialize();
+  void Parse(const int argc, char **argv);
+
+private:
+  po::positional_options_description _positional;
+  po::options_description _options;
+
+  std::string _package_filename;
+  std::string _model_filename;
+  bool _use_single_model = false;
+  int _data_length;
+  std::string _load_raw_input_filename;
+  std::string _load_raw_expected_filename;
+  bool _mem_poll;
+  int _epoch;
+  int _batch_size;
+  float _learning_rate;
+  int _loss_type;
+  int _optimizer_type;
+  bool _print_version = false;
+  int _verbose_level;
+  std::unordered_map<uint32_t, uint32_t> _output_sizes;
+};
+
+} // end of namespace onert_train
+
+#endif // __ONERT_TRAIN_ARGS_H__
diff --git a/tests/tools/onert_train/src/formatter.h b/tests/tools/onert_train/src/formatter.h
new file mode 100644
index 000000000..6d256804e
--- /dev/null
+++ b/tests/tools/onert_train/src/formatter.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_FORMATTER_H__
+#define __ONERT_TRAIN_FORMATTER_H__
+
+#include <string>
+#include <vector>
+
+#include "types.h"
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_train
+{
+class Formatter
+{
+public:
+  virtual ~Formatter() = default;
+  Formatter(nnfw_session *sess) : session_(sess) {}
+  virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
+  virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
+  virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
+  {
+    return std::vector<TensorShape>();
+  };
+
+protected:
+  nnfw_session *session_;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_FORMATTER_H__
diff --git a/tests/tools/onert_train/src/h5formatter.cc b/tests/tools/onert_train/src/h5formatter.cc
new file mode 100644
index 000000000..12c570b5d
--- /dev/null
+++ b/tests/tools/onert_train/src/h5formatter.cc
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "h5formatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <stdexcept>
+#include <H5Cpp.h>
+
+namespace
+{
+onert_train::TensorShape getShape(H5::DataSet &data_set)
+{
+  std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
+  H5::DataSpace data_space = data_set.getSpace();
+  int rank = data_space.getSimpleExtentNdims();
+  h5_shape.resize(rank);
+
+  // read shape info from H5 file
+  data_space.getSimpleExtentDims(h5_shape.data(), NULL);
+
+  onert_train::TensorShape shape;
+  for (auto dim : h5_shape)
+    shape.emplace_back(static_cast<int>(dim));
+
+  return shape;
+}
+} // namespace
+
+namespace onert_train
+{
+static const char *h5_value_grpname = "value";
+
+std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  std::vector<TensorShape> tensor_shapes;
+
+  try
+  {
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+
+    // Constraints: if there are n data set names, they should be unique and
+    //              one of [ "0", "1", .. , "n-1" ]
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      auto shape = getShape(data_set);
+
+      tensor_shapes.emplace_back(shape);
+    }
+
+    return tensor_shapes;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+
+void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  try
+  {
+    // Turn off the automatic error printing.
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // TODO Add Assert(nnfw shape, h5 file shape size)
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      switch (ti.dtype)
+      {
+        case NNFW_TYPE_TENSOR_FLOAT32:
+          if (type == H5::PredType::IEEE_F32BE || type == H5::PredType::IEEE_F32LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_FLOAT);
+          else
+            throw std::runtime_error("model input type is f32. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_INT32:
+          if (type == H5::PredType::STD_I32BE || type == H5::PredType::STD_I32LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT32);
+          else
+            throw std::runtime_error("model input type is i32. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_INT64:
+          if (type == H5::PredType::STD_I64BE || type == H5::PredType::STD_I64LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT64);
+          else
+            throw std::runtime_error("model input type is i64. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        case NNFW_TYPE_TENSOR_BOOL:
+        case NNFW_TYPE_TENSOR_UINT8:
+          if (type == H5::PredType::STD_U8BE || type == H5::PredType::STD_U8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
+          else
+            throw std::runtime_error(
+              "model input type is qasymm8, bool or uint8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
+          else
+            throw std::runtime_error("model input type is int8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+        default:
+          throw std::runtime_error("onert_run can load f32, i32, qasymm8, bool and uint8.");
+      }
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    // Turn off the automatic error printing.
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_TRUNC);
+    H5::Group value_group = file.createGroup(h5_value_grpname);
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      std::vector<hsize_t> dims(ti.rank);
+      for (uint32_t j = 0; j < ti.rank; ++j)
+      {
+        if (ti.dims[j] >= 0)
+          dims[j] = static_cast<hsize_t>(ti.dims[j]);
+        else
+        {
+          std::cerr << "Negative dimension in output tensor" << std::endl;
+          exit(-1);
+        }
+      }
+      H5::DataSpace data_space(ti.rank, dims.data());
+      switch (ti.dtype)
+      {
+        case NNFW_TYPE_TENSOR_FLOAT32:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_INT32:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_INT64:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_UINT8:
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_BOOL:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+        default:
+          throw std::runtime_error("onert_run can dump f32, i32, qasymm8, bool and uint8.");
+      }
+    }
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+} // end of namespace onert_train
diff --git a/tests/tools/onert_train/src/h5formatter.h b/tests/tools/onert_train/src/h5formatter.h
new file mode 100644
index 000000000..21ef16526
--- /dev/null
+++ b/tests/tools/onert_train/src/h5formatter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_H5FORMATTER_H__
+#define __ONERT_TRAIN_H5FORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_train
+{
+class H5Formatter : public Formatter
+{
+public:
+  H5Formatter(nnfw_session *sess) : Formatter(sess) {}
+  std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_H5FORMATTER_H__
diff --git a/tests/tools/onert_train/src/measure.h b/tests/tools/onert_train/src/measure.h
new file mode 100644
index 000000000..f7c8610d0
--- /dev/null
+++ b/tests/tools/onert_train/src/measure.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_MEASURE_H__
+#define __ONERT_TRAIN_MEASURE_H__
+
+#include <algorithm>
+#include <ctime>
+#include <vector>
+
+namespace
+{
+uint64_t nowMicros()
+{
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return static_cast<uint64_t>(ts.tv_nsec) / 1e3 + static_cast<uint64_t>(ts.tv_sec) * 1e6;
+}
+} // namespace
+
+namespace onert_train
+{
+
+struct Step
+{
+  uint64_t time; // us
+  // TODO Support memory usage
+};
+
+class Measure
+{
+public:
+  Measure() = default;
+
+  void set(const int epoch, const int step)
+  {
+    _results.clear();
+    _results.resize(epoch);
+    std::for_each(_results.begin(), _results.end(), [step](auto &v) { v.resize(step); });
+  }
+
+  void run(const int epoch, const int step, const std::function<void()> &func)
+  {
+    if (_results.empty() || _results.size() <= epoch || _results[epoch].size() <= step)
+    {
+      throw std::runtime_error("Please set the number of epochs and steps first");
+    }
+
+    _results[epoch][step].time = nowMicros();
+
+    func();
+
+    _results[epoch][step].time = nowMicros() - _results[epoch][step].time;
+  }
+
+  double timeMicros(const int epoch)
+  {
+    if (_results.empty() || _results.size() <= epoch)
+    {
+      throw std::runtime_error("Invalid epoch");
+    }
+
+    double sum = 0u;
+    std::for_each(_results[epoch].begin(), _results[epoch].end(),
+                  [&sum](auto &v) { sum += v.time; });
+    return sum / _results[epoch].size();
+  }
+
+  double timeMs(const int epoch) { return timeMicros(epoch) / 1e3; }
+
+private:
+  std::vector<std::vector<Step>> _results;
+};
+
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_MEASURE_H__
diff --git a/tests/tools/onert_train/src/nnfw_util.cc b/tests/tools/onert_train/src/nnfw_util.cc
new file mode 100644
index 000000000..8dd2aa871
--- /dev/null
+++ b/tests/tools/onert_train/src/nnfw_util.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+#include <string>
+#include "nnfw.h"
+
+namespace onert_train
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+  uint64_t n = 1;
+  for (uint32_t i = 0; i < ti->rank; ++i)
+  {
+    assert(ti->dims[i] >= 0);
+    n *= ti->dims[i];
+  }
+  return n;
+}
+
+uint64_t bufsize_for(const nnfw_tensorinfo *ti)
+{
+  static int elmsize[] = {
+    sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 */
+    sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
+    sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
+    sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
+    sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+    sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
+  };
+  return elmsize[ti->dtype] * num_elems(ti);
+}
+
+} // namespace onert_train
diff --git a/tests/tools/onert_train/src/nnfw_util.h b/tests/tools/onert_train/src/nnfw_util.h
new file mode 100644
index 000000000..674e18fb2
--- /dev/null
+++ b/tests/tools/onert_train/src/nnfw_util.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_NNFW_UTIL_H__
+#define __ONERT_TRAIN_NNFW_UTIL_H__
+
+#include "nnfw.h"
+
+#define NNPR_ENSURE_STATUS(a)        \
+  do                                 \
+  {                                  \
+    if ((a) != NNFW_STATUS_NO_ERROR) \
+    {                                \
+      exit(-1);                      \
+    }                                \
+  } while (0)
+
+namespace onert_train
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti);
+uint64_t bufsize_for(const nnfw_tensorinfo *ti);
+} // end of namespace onert_train
+
+#endif // __ONERT_TRAIN_NNFW_UTIL_H__
diff --git a/tests/tools/onert_train/src/onert_train.cc b/tests/tools/onert_train/src/onert_train.cc
new file mode 100644
index 000000000..678d13fc9
--- /dev/null
+++ b/tests/tools/onert_train/src/onert_train.cc
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "allocation.h"
+#include "args.h"
+#include "benchmark.h"
+#include "measure.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "nnfw_internal.h"
+#include "nnfw_experimental.h"
+#include "randomgen.h"
+#include "rawformatter.h"
+#include "rawdataloader.h"
+
+#include <boost/program_options.hpp>
+#include <cassert>
+#include <chrono>
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <stdexcept>
+#include <unordered_map>
+#include <vector>
+
+static const char *default_backend_cand = "train";
+
+int main(const int argc, char **argv)
+{
+  using namespace onert_train;
+
+  try
+  {
+    Args args(argc, argv);
+    if (args.printVersion())
+    {
+      uint32_t version;
+      NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
+      std::cout << "onert_train (nnfw runtime: v" << (version >> 24) << "."
+                << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
+      exit(0);
+    }
+
+    // TODO Apply verbose level to phases
+    const int verbose = args.getVerboseLevel();
+    benchmark::Phases phases(benchmark::PhaseOption{});
+
+    nnfw_session *session = nullptr;
+    NNPR_ENSURE_STATUS(nnfw_create_session(&session));
+
+    // ModelLoad
+    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
+      if (args.useSingleModel())
+        NNPR_ENSURE_STATUS(
+          nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
+      else
+        NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
+    });
+
+    // Set training backend
+    NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, default_backend_cand));
+
+    uint32_t num_inputs;
+    NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
+
+    uint32_t num_expecteds;
+    NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_expecteds));
+
+    // verify input and output
+
+    auto verifyInputTypes = [session]() {
+      uint32_t sz;
+      NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
+      for (uint32_t i = 0; i < sz; ++i)
+      {
+        nnfw_tensorinfo ti;
+        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+        {
+          std::cerr << "E: not supported input type" << std::endl;
+          exit(-1);
+        }
+      }
+    };
+
+    auto verifyOutputTypes = [session]() {
+      uint32_t sz;
+      NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
+
+      for (uint32_t i = 0; i < sz; ++i)
+      {
+        nnfw_tensorinfo ti;
+        NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+        {
+          std::cerr << "E: not supported output type" << std::endl;
+          exit(-1);
+        }
+      }
+    };
+
+    verifyInputTypes();
+    verifyOutputTypes();
+
+    auto convertLossType = [](int type) {
+      switch (type)
+      {
+        case 0:
+          return NNFW_TRAIN_LOSS_MEAN_SQUARED_ERROR;
+        case 1:
+          return NNFW_TRAIN_LOSS_CATEGORICAL_CROSSENTROPY;
+        default:
+          std::cerr << "E: not supported loss type" << std::endl;
+          exit(-1);
+      }
+    };
+
+    auto convertOptType = [](int type) {
+      switch (type)
+      {
+        case 0:
+          return NNFW_TRAIN_OPTIMIZER_SGD;
+        case 1:
+          return NNFW_TRAIN_OPTIMIZER_ADAM;
+        default:
+          std::cerr << "E: not supported optimizer type" << std::endl;
+          exit(-1);
+      }
+    };
+
+    // prepare training info
+    nnfw_train_info tri;
+    tri.batch_size = args.getBatchSize();
+    tri.learning_rate = args.getLearningRate();
+    tri.loss = convertLossType(args.getLossType());
+    tri.opt = convertOptType(args.getOptimizerType());
+
+    // prepare execution
+
+    // TODO When nnfw_{prepare|run} are failed, can't catch the time
+    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
+      NNPR_ENSURE_STATUS(nnfw_train_prepare(session, &tri));
+    });
+
+    // prepare input and expected tensor info lists
+    std::vector<nnfw_tensorinfo> input_infos;
+    std::vector<nnfw_tensorinfo> expected_infos;
+
+    // prepare data buffers
+    std::vector<Allocation> input_data(num_inputs);
+    std::vector<Allocation> expected_data(num_expecteds);
+
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+      input_data[i].alloc(bufsize_for(&ti));
+      input_infos.emplace_back(std::move(ti));
+    }
+
+    for (uint32_t i = 0; i < num_expecteds; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+      expected_data[i].alloc(bufsize_for(&ti));
+      expected_infos.emplace_back(std::move(ti));
+    }
+
+    auto data_length = args.getDataLength();
+
+    Generator generator;
+    RawDataLoader rawDataLoader;
+
+    if (!args.getLoadRawInputFilename().empty() && !args.getLoadRawExpectedFilename().empty())
+    {
+      generator =
+        rawDataLoader.loadData(args.getLoadRawInputFilename(), args.getLoadRawExpectedFilename(),
+                               input_infos, expected_infos, data_length, tri.batch_size);
+    }
+    else
+    {
+      // TODO Use random generator
+      std::cerr << "E: not supported random input and expected generator" << std::endl;
+      exit(-1);
+    }
+
+    Measure measure;
+    std::vector<float> losses(num_expecteds);
+    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) {
+      const int num_step = data_length / tri.batch_size;
+      const int num_epoch = args.getEpoch();
+      measure.set(num_epoch, num_step);
+      for (uint32_t epoch = 0; epoch < num_epoch; ++epoch)
+      {
+        std::fill(losses.begin(), losses.end(), 0);
+        for (uint32_t n = 0; n < num_step; ++n)
+        {
+          // get batchsize data
+          if (!generator(n, input_data, expected_data))
+            break;
+
+          // prepare input
+          for (uint32_t i = 0; i < num_inputs; ++i)
+          {
+            NNPR_ENSURE_STATUS(
+              nnfw_train_set_input(session, i, input_data[i].data(), &input_infos[i]));
+          }
+
+          // prepare output
+          for (uint32_t i = 0; i < num_expecteds; ++i)
+          {
+            NNPR_ENSURE_STATUS(
+              nnfw_train_set_expected(session, i, expected_data[i].data(), &expected_infos[i]));
+          }
+
+          // train
+          measure.run(epoch, n, [&]() { NNPR_ENSURE_STATUS(nnfw_train(session, true)); });
+
+          // store loss
+          for (int32_t i = 0; i < num_expecteds; ++i)
+          {
+            float temp = 0.f;
+            NNPR_ENSURE_STATUS(nnfw_train_get_loss(session, i, &temp));
+            losses[i] += temp;
+          }
+        }
+
+        // print loss
+        std::cout << std::fixed;
+        std::cout.precision(3);
+        std::cout << "Epoch " << epoch + 1 << "/" << num_epoch << " - " << measure.timeMs(epoch)
+                  << "ms/step - loss: ";
+        std::cout.precision(4);
+        for (uint32_t i = 0; i < num_expecteds; ++i)
+        {
+          std::cout << "[" << i << "] " << losses[i] / num_step;
+        }
+        std::cout /* << "- accuracy: " << accuracy*/ << std::endl;
+      }
+    });
+
+    NNPR_ENSURE_STATUS(nnfw_close_session(session));
+
+    // prepare result
+    benchmark::Result result(phases);
+
+    // to stdout
+    benchmark::printResult(result);
+
+    return 0;
+  }
+  catch (boost::program_options::error &e)
+  {
+    std::cerr << "E: " << e.what() << std::endl;
+    exit(-1);
+  }
+  catch (std::runtime_error &e)
+  {
+    std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
+    exit(-1);
+  }
+}
diff --git a/tests/tools/onert_train/src/randomgen.cc b/tests/tools/onert_train/src/randomgen.cc
new file mode 100644
index 000000000..72599cbb2
--- /dev/null
+++ b/tests/tools/onert_train/src/randomgen.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "randomgen.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "misc/RandomGenerator.h"
+
+#include <iostream>
+
+namespace onert_train
+{
+
+template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
+{
+  for (uint64_t i = 0; i < size; i++)
+    reinterpret_cast<T *>(data)[i] = randgen.generate<T>();
+}
+
+void RandomGenerator::generate(std::vector<Allocation> &inputs)
+{
+  // generate random data
+  const int seed = 1;
+  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+  for (uint32_t i = 0; i < inputs.size(); ++i)
+  {
+    nnfw_tensorinfo ti;
+    NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+    auto input_size_in_bytes = bufsize_for(&ti);
+    inputs[i].alloc(input_size_in_bytes);
+    switch (ti.dtype)
+    {
+      case NNFW_TYPE_TENSOR_FLOAT32:
+        randomData<float>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_BOOL:
+        randomData<bool>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_UINT8:
+        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_INT32:
+        randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_INT64:
+        randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+        randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      default:
+        std::cerr << "Not supported input type" << std::endl;
+        std::exit(-1);
+    }
+    NNPR_ENSURE_STATUS(
+      nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
+    NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+  }
+};
+
+} // end of namespace onert_train
diff --git a/tests/tools/onert_train/src/randomgen.h b/tests/tools/onert_train/src/randomgen.h
new file mode 100644
index 000000000..410c66d6f
--- /dev/null
+++ b/tests/tools/onert_train/src/randomgen.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_RANDOMGEN_H__
+#define __ONERT_TRAIN_RANDOMGEN_H__
+
+#include <string>
+#include <vector>
+
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_train
+{
+class RandomGenerator
+{
+public:
+  RandomGenerator(nnfw_session *sess) : session_(sess) {}
+  void generate(std::vector<Allocation> &inputs);
+
+private:
+  nnfw_session *session_;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_RANDOMGEN_H__
diff --git a/tests/tools/onert_train/src/rawdataloader.cc b/tests/tools/onert_train/src/rawdataloader.cc
new file mode 100644
index 000000000..a3672a9f3
--- /dev/null
+++ b/tests/tools/onert_train/src/rawdataloader.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawdataloader.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <stdexcept>
+#include <numeric>
+
+namespace onert_train
+{
+
+Generator RawDataLoader::loadData(const std::string &input_file, const std::string &expected_file,
+                                  const std::vector<nnfw_tensorinfo> &input_infos,
+                                  const std::vector<nnfw_tensorinfo> &expected_infos,
+                                  const uint32_t data_length, const uint32_t batch_size)
+{
+  std::vector<uint32_t> input_origins(input_infos.size());
+  uint32_t start = 0;
+  for (uint32_t i = 0; i < input_infos.size(); ++i)
+  {
+    input_origins.at(i) = start;
+    start += (bufsize_for(&input_infos[i]) / batch_size * data_length);
+  }
+
+  std::vector<uint32_t> expected_origins(expected_infos.size());
+  start = 0;
+  for (uint32_t i = 0; i < expected_infos.size(); ++i)
+  {
+    expected_origins.at(i) = start;
+    start += (bufsize_for(&expected_infos[i]) / batch_size * data_length);
+  }
+
+  try
+  {
+    _input_file = std::ifstream(input_file, std::ios::ate | std::ios::binary);
+    _expected_file = std::ifstream(expected_file, std::ios::ate | std::ios::binary);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+
+  return [input_origins, expected_origins, &input_infos, &expected_infos,
+          this](uint32_t idx, std::vector<Allocation> &inputs, std::vector<Allocation> &expecteds) {
+    for (uint32_t i = 0; i < input_infos.size(); ++i)
+    {
+      auto bufsz = bufsize_for(&input_infos[i]);
+      _input_file.seekg(input_origins[i] + idx * bufsz, std::ios::beg);
+      _input_file.read(reinterpret_cast<char *>(inputs[i].data()), bufsz);
+    }
+    for (uint32_t i = 0; i < expected_infos.size(); ++i)
+    {
+      auto bufsz = bufsize_for(&expected_infos[i]);
+      _expected_file.seekg(expected_origins[i] + idx * bufsz, std::ios::beg);
+      _expected_file.read(reinterpret_cast<char *>(expecteds[i].data()), bufsz);
+    }
+    return true;
+  };
+}
+
+} // namespace onert_train
diff --git a/tests/tools/onert_train/src/rawdataloader.h b/tests/tools/onert_train/src/rawdataloader.h
new file mode 100644
index 000000000..3fb292770
--- /dev/null
+++ b/tests/tools/onert_train/src/rawdataloader.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_RAWDATALOADER_H__
+#define __ONERT_TRAIN_RAWDATALOADER_H__
+
+#include "allocation.h"
+#include "nnfw.h"
+
+#include <functional>
+#include <string>
+#include <vector>
+#include <fstream>
+
+namespace onert_train
+{
+
+using Generator = std::function<bool(uint32_t,                  /** index **/
+                                     std::vector<Allocation> &, /** input **/
+                                     std::vector<Allocation> & /** expected **/)>;
+
+class RawDataLoader
+{
+public:
+  RawDataLoader() = default;
+  Generator loadData(const std::string &input_file, const std::string &expected_file,
+                     const std::vector<nnfw_tensorinfo> &input_infos,
+                     const std::vector<nnfw_tensorinfo> &output_infos, const uint32_t data_length,
+                     const uint32_t batch_size);
+
+private:
+  std::ifstream _input_file;
+  std::ifstream _expected_file;
+};
+
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_RAWDATALOADER_H__
diff --git a/tests/tools/onert_train/src/rawformatter.cc b/tests/tools/onert_train/src/rawformatter.cc
new file mode 100644
index 000000000..a17071684
--- /dev/null
+++ b/tests/tools/onert_train/src/rawformatter.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawformatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+
+namespace onert_train
+{
+void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+
+  // Support multiple inputs
+  // Option 1: Get comman-separated input file list like --load:raw a,b,c
+  // Option 2: Get prefix --load:raw in
+  //           Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
+  //           query api.
+  //
+  // Currently Option 2 is implemented.
+  try
+  {
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
+      auto filesz = file.tellg();
+      if (bufsz != filesz)
+      {
+        throw std::runtime_error("Input " + std::to_string(i) +
+                                 " size does not match: " + std::to_string(bufsz) +
+                                 " expected, but " + std::to_string(filesz) + " provided.");
+      }
+      file.seekg(0, std::ios::beg);
+      file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
+      file.close();
+
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      auto bufsz = bufsize_for(&ti);
+
+      std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
+      file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
+      file.close();
+      std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
+    }
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+} // end of namespace onert_train
diff --git a/tests/tools/onert_train/src/rawformatter.h b/tests/tools/onert_train/src/rawformatter.h
new file mode 100644
index 000000000..90e81b2e9
--- /dev/null
+++ b/tests/tools/onert_train/src/rawformatter.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_RAWFORMATTER_H__
+#define __ONERT_TRAIN_RAWFORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_train
+{
+class RawFormatter : public Formatter
+{
+public:
+  RawFormatter(nnfw_session *sess) : Formatter(sess) {}
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_RAWFORMATTER_H__
diff --git a/tests/tools/onert_train/src/types.h b/tests/tools/onert_train/src/types.h
new file mode 100644
index 000000000..6e2693016
--- /dev/null
+++ b/tests/tools/onert_train/src/types.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_TYPES_H__
+#define __ONERT_TRAIN_TYPES_H__
+
+namespace onert_train
+{
+
+using TensorShape = std::vector<int>;
+
+} // end of namespace onert_train
+
+#endif // __ONERT_TRAIN_TYPES_H__
diff --git a/tests/tools/onert_train/test/rawdataloader.test.cc b/tests/tools/onert_train/test/rawdataloader.test.cc
new file mode 100644
index 000000000..b2930b37e
--- /dev/null
+++ b/tests/tools/onert_train/test/rawdataloader.test.cc
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nnfw.h>
+
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <numeric>
+
+#include "../src/rawdataloader.h"
+#include "../src/nnfw_util.h"
+
+namespace
+{
+using namespace onert_train;
+
+class DataFileGenerator
+{
+public:
+  DataFileGenerator(uint32_t data_length)
+    : _data_length{data_length}, _input_file{"input.bin"}, _expected_file{"expected.bin"}
+  {
+  }
+  ~DataFileGenerator()
+  {
+    try
+    {
+      if (std::remove(_input_file.c_str()) != 0)
+      {
+        std::cerr << "Failed to remove " << _input_file << std::endl;
+      }
+      if (std::remove(_expected_file.c_str()) != 0)
+      {
+        std::cerr << "Failed to remove " << _expected_file << std::endl;
+      }
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "Exception: " << e.what() << std::endl;
+    }
+  }
+
+  template <typename T>
+  const std::string &generateInputData(const std::vector<std::vector<T>> &data)
+  {
+    generateData(_input_file, data);
+    return _input_file;
+  }
+
+  template <typename T>
+  const std::string &generateExpectedData(const std::vector<std::vector<T>> &data)
+  {
+    generateData(_expected_file, data);
+    return _expected_file;
+  }
+
+private:
+  template <typename T>
+  void generateData(const std::string &name, const std::vector<std::vector<T>> &data)
+  {
+    try
+    {
+      std::ofstream file(name, std::ios::binary);
+      for (uint32_t i = 0; i < data.size(); ++i)
+      {
+        for (uint32_t j = 0; j < _data_length; ++j)
+        {
+          for (uint32_t k = 0; k < data[i].size(); ++k)
+          {
+            file.write(reinterpret_cast<const char *>(&data[i][k]), sizeof(data[i][k]));
+          }
+        }
+      }
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "Exception: " << e.what() << std::endl;
+    }
+  }
+
+private:
+  uint32_t _data_length;
+  std::string _input_file;
+  std::string _expected_file;
+};
+
+class RawDataLoaderTest : public testing::Test
+{
+protected:
+  void SetUp() override { nnfw_create_session(&_session); }
+
+  void TearDown() override { nnfw_close_session(_session); }
+
+  nnfw_session *_session = nullptr;
+};
+
+TEST_F(RawDataLoaderTest, loadDatas_1)
+{
+  const uint32_t data_length = 100;
+  const uint32_t num_input = 1;
+  const uint32_t num_expected = 1;
+  const uint32_t batch_size = 16;
+
+  // Set data tensor info
+  nnfw_tensorinfo in_info = {
+    .dtype = NNFW_TYPE_TENSOR_INT32,
+    .rank = 4,
+    .dims = {batch_size, 2, 2, 2},
+  };
+  std::vector<nnfw_tensorinfo> in_infos{in_info};
+
+  nnfw_tensorinfo expected_info = {
+    .dtype = NNFW_TYPE_TENSOR_INT32,
+    .rank = 4,
+    .dims = {batch_size, 1, 1, 1},
+  };
+  std::vector<nnfw_tensorinfo> expected_infos{expected_info};
+
+  // Generate test data
+  std::vector<std::vector<uint32_t>> in(num_input);
+  for (uint32_t i = 0; i < num_input; ++i)
+  {
+    in[i].resize(num_elems(&in_infos[i]) / batch_size);
+    std::generate(in[i].begin(), in[i].end(), [this] {
+      static uint32_t i = 0;
+      return i++;
+    });
+  }
+
+  std::vector<std::vector<uint32_t>> expected(num_expected);
+  for (uint32_t i = 0; i < num_expected; ++i)
+  {
+    expected[i].resize(num_elems(&expected_infos[i]) / batch_size);
+    std::generate(expected[i].begin(), expected[i].end(), [in, i] {
+      auto sum = std::accumulate(in[i].begin(), in[i].end(), 0);
+      return sum;
+    });
+  }
+
+  // Generate test data file
+  DataFileGenerator file_gen(data_length);
+  auto &input_file = file_gen.generateInputData<uint32_t>(in);
+  auto &expected_file = file_gen.generateExpectedData<uint32_t>(expected);
+
+  // Set expected datas
+  std::vector<std::vector<uint32_t>> expected_in(num_input);
+  std::vector<std::vector<uint32_t>> expected_ex(num_expected);
+  for (uint32_t i = 0; i < num_input; ++i)
+  {
+    for (uint32_t j = 0; j < batch_size; ++j)
+    {
+      expected_in[i].insert(expected_in[i].end(), in[i].begin(), in[i].end());
+    }
+  }
+  for (uint32_t i = 0; i < num_expected; ++i)
+  {
+    for (uint32_t j = 0; j < batch_size; ++j)
+    {
+      expected_ex[i].insert(expected_ex[i].end(), expected[i].begin(), expected[i].end());
+    }
+  }
+
+  // Load test datas
+  RawDataLoader loader;
+  Generator generator =
+    loader.loadData(input_file, expected_file, in_infos, expected_infos, data_length, batch_size);
+
+  // Allocate inputs and expecteds data memory
+  std::vector<Allocation> inputs(num_input);
+  for (uint32_t i = 0; i < num_input; ++i)
+  {
+    inputs[i].alloc(bufsize_for(&in_infos[i]));
+  }
+  std::vector<Allocation> expecteds(num_expected);
+  for (uint32_t i = 0; i < num_expected; ++i)
+  {
+    expecteds[i].alloc(bufsize_for(&expected_infos[i]));
+  }
+
+  uint32_t num_sample = data_length / batch_size;
+  for (uint32_t i = 0; i < num_sample; ++i)
+  {
+    auto data = generator(i, inputs, expecteds);
+
+    std::vector<std::vector<uint32_t>> gen_in(num_input);
+    for (uint32_t h = 0; h < num_input; ++h)
+    {
+      auto num_elem = num_elems(&in_infos[h]);
+      for (uint32_t k = 0; k < num_elem; ++k)
+      {
+        auto inbufs = reinterpret_cast<uint32_t *>(inputs[h].data()) + k;
+        gen_in[h].emplace_back(*inbufs);
+      }
+    }
+    std::vector<std::vector<uint32_t>> gen_ex(num_expected);
+    for (uint32_t h = 0; h < num_expected; ++h)
+    {
+      auto num_elem = num_elems(&expected_infos[h]);
+      for (uint32_t k = 0; k < num_elem; ++k)
+      {
+        auto exbufs = reinterpret_cast<uint32_t *>(expecteds[h].data()) + k;
+        gen_ex[h].emplace_back(*exbufs);
+      }
+    }
+
+    EXPECT_EQ(gen_in, expected_in);
+    EXPECT_EQ(gen_ex, expected_ex);
+  }
+}
+
+} // namespace
diff --git a/tests/tools/tflite_benchmark_model/CMakeLists.txt b/tests/tools/tflite_benchmark_model/CMakeLists.txt
deleted file mode 100644
index 017e1da57..000000000
--- a/tests/tools/tflite_benchmark_model/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-if (NOT BUILD_TFLITE_BENCHMARK_MODEL)
-  return()
-endif(NOT BUILD_TFLITE_BENCHMARK_MODEL)
-
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 REQUIRED)
-
-# TODO Remove this target_compile_definitions command, and just check its presence.
-#      This change is prerequisites on pre-built tensorflow-lite package support
-target_compile_definitions(tensorflow-lite PUBLIC "TFLITE_PROFILING_ENABLED")
-
-file(GLOB_RECURSE SOURCES "*.cc")
-
-nnas_find_package(TensorFlowSource EXACT 1.13.1 REQUIRED)
-set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/lite")
-list(APPEND SOURCES "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_main.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_model.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_params.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/command_line_flags.cc")
-
-add_executable(tflite_benchmark_model ${SOURCES})
-target_compile_definitions(tflite_benchmark_model PUBLIC "TFLITE_PROFILING_ENABLED")
-target_link_libraries(tflite_benchmark_model nnfw_lib_misc nnfw_lib_tflite nnfw_lib_profiling)
-target_link_libraries(tflite_benchmark_model tensorflow-lite ${LIB_PTHREAD} dl)
-install(TARGETS tflite_benchmark_model DESTINATION bin)
diff --git a/tests/tools/tflite_benchmark_model/README.md b/tests/tools/tflite_benchmark_model/README.md
deleted file mode 100644
index a71a2fa1c..000000000
--- a/tests/tools/tflite_benchmark_model/README.md
+++ /dev/null
@@ -1,197 +0,0 @@
-# TFLite Model Benchmark Tool
-
-## Description
-
-A simple C++ binary to benchmark a TFLite model and its individual operators,
-both on desktop machines and on Android. The binary takes a TFLite model,
-generates random inputs and then repeatedly runs the model for specified number
-of runs. Aggregrate latency statistics are reported after running the benchmark.
-
-The instructions below are for running the binary on Desktop and Android,
-for iOS please use the
-[iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark/ios).
-
-## Parameters
-
-The binary takes the following required parameters:
-
-*   `graph`: `string` \
-    The path to the TFLite model file.
-
-and the following optional parameters:
-
-*   `num_threads`: `int` (default=1) \
-    The number of threads to use for running TFLite interpreter.
-*   `warmup_runs`: `int` (default=1) \
-    The number of warmup runs to do before starting the benchmark.
-*   `num_runs`: `int` (default=50) \
-    The number of runs. Increase this to reduce variance.
-*   `run_delay`: `float` (default=-1.0) \
-    The delay in seconds between subsequent benchmark runs. Non-positive values
-    mean use no delay.
-*   `use_nnapi`: `bool` (default=false) \
-    Whether to use [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/).
-    This API is available on recent Android devices.
-
-## To build/install/run
-
-### On Android:
-
-(0) Refer to https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android to edit the `WORKSPACE` to configure the android NDK/SDK.
-
-(1) Build for your specific platform, e.g.:
-
-```
-bazel build -c opt \
-  --config=android_arm \
-  --cxxopt='--std=c++11' \
-  tensorflow/lite/tools/benchmark:benchmark_model
-```
-
-(2) Connect your phone. Push the binary to your phone with adb push
-     (make the directory if required):
-
-```
-adb push bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model /data/local/tmp
-```
-
-(3) Make the binary executable.
-
-```
-adb shell chmod +x /data/local/tmp/benchmark_model
-```
-
-(4) Push the compute graph that you need to test. For example:
-
-```
-adb push mobilenet_quant_v1_224.tflite /data/local/tmp
-```
-
-(5) Run the benchmark. For example:
-
-```
-adb shell /data/local/tmp/benchmark_model \
-  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
-  --num_threads=4
-```
-
-### On desktop:
-(1) build the binary
-
-```
-bazel build -c opt tensorflow/lite/tools/benchmark:benchmark_model
-```
-
-(2) Run on your compute graph, similar to the Android case but without the need of adb shell.
-For example:
-
-```
-bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model \
-  --graph=mobilenet_quant_v1_224.tflite \
-  --num_threads=4
-```
-
-The MobileNet graph used as an example here may be downloaded from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip).
-
-
-## Reducing variance between runs on Android.
-
-Most modern Android phones use [ARM big.LITTLE](https://en.wikipedia.org/wiki/ARM_big.LITTLE)
-architecture where some cores are more power hungry but faster than other cores.
-When running benchmarks on these phones there can be significant variance
-between different runs of the benchmark. One way to reduce variance between runs
-is to set the [CPU affinity](https://en.wikipedia.org/wiki/Processor_affinity)
-before running the benchmark. On Android this can be done using the `taskset`
-command.
-E.g. for running the benchmark on big cores on Pixel 2 with a single thread one
-can use the following command:
-
-```
-adb shell taskset f0 /data/local/tmp/benchmark_model \
-  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
-  --num_threads=1
-```
-
-where `f0` is the affinity mask for big cores on Pixel 2.
-Note: The affinity mask varies with the device.
-
-## Profiling model operators
-The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
-compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
-to compile benchmark with profiling support.
-For example, to compile with profiling support on Android, add this flag to the previous command:
-
-```
-bazel build -c opt \
-  --config=android_arm \
-  --cxxopt='--std=c++11' \
-  --copt=-DTFLITE_PROFILING_ENABLED \
-  tensorflow/lite/tools/benchmark:benchmark_model
-```
-This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
-
-```
-
-============================== Run Order ==============================
-	             [node type]	  [start]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
-	                 CONV_2D	    0.000	    4.269	    4.269	  0.107%	  0.107%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
-	       DEPTHWISE_CONV_2D	    4.270	    2.150	    2.150	  0.054%	  0.161%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6]
-	                 CONV_2D	    6.421	    6.107	    6.107	  0.153%	  0.314%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   12.528	    1.366	    1.366	  0.034%	  0.348%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6]
-	                 CONV_2D	   13.895	    4.195	    4.195	  0.105%	  0.454%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   18.091	    1.260	    1.260	  0.032%	  0.485%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6]
-	                 CONV_2D	   19.352	    6.652	    6.652	  0.167%	  0.652%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   26.005	    0.698	    0.698	  0.018%	  0.670%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6]
-	                 CONV_2D	   26.703	    3.344	    3.344	  0.084%	  0.754%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   30.047	    0.646	    0.646	  0.016%	  0.770%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6]
-	                 CONV_2D	   30.694	    5.800	    5.800	  0.145%	  0.915%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   36.495	    0.331	    0.331	  0.008%	  0.924%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6]
-	                 CONV_2D	   36.826	    2.838	    2.838	  0.071%	  0.995%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   39.665	    0.439	    0.439	  0.011%	  1.006%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6]
-	                 CONV_2D	   40.105	    5.293	    5.293	  0.133%	  1.139%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   45.399	    0.352	    0.352	  0.009%	  1.147%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6]
-	                 CONV_2D	   45.752	    5.322	    5.322	  0.133%	  1.281%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   51.075	    0.357	    0.357	  0.009%	  1.290%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6]
-	                 CONV_2D	   51.432	    5.693	    5.693	  0.143%	  1.433%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   57.126	    0.366	    0.366	  0.009%	  1.442%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6]
-	                 CONV_2D	   57.493	    5.472	    5.472	  0.137%	  1.579%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   62.966	    0.364	    0.364	  0.009%	  1.588%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6]
-	                 CONV_2D	   63.330	    5.404	    5.404	  0.136%	  1.724%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   68.735	    0.155	    0.155	  0.004%	  1.728%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6]
-	                 CONV_2D	   68.891	    2.970	    2.970	  0.074%	  1.802%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   71.862	    0.206	    0.206	  0.005%	  1.807%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6]
-	                 CONV_2D	   72.069	    5.888	    5.888	  0.148%	  1.955%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
-	         AVERAGE_POOL_2D	   77.958	    0.036	    0.036	  0.001%	  1.956%	     0.000	        0	[MobilenetV1/Logits/AvgPool_1a/AvgPool]
-	                 CONV_2D	   77.994	    1.445	    1.445	  0.036%	  1.992%	     0.000	        0	[MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd]
-	                 RESHAPE	   79.440	    0.002	    0.002	  0.000%	  1.992%	     0.000	        0	[MobilenetV1/Predictions/Reshape]
-	                 SOFTMAX	   79.443	    0.029	    0.029	  0.001%	  1.993%	     0.000	        0	[MobilenetV1/Predictions/Softmax]
-
-============================== Top by Computation Time ==============================
-	             [node type]	  [start]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
-	                 CONV_2D	   19.352	    6.652	    6.652	  0.167%	  0.167%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
-	                 CONV_2D	    6.421	    6.107	    6.107	  0.153%	  0.320%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
-	                 CONV_2D	   72.069	    5.888	    5.888	  0.148%	  0.468%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
-	                 CONV_2D	   30.694	    5.800	    5.800	  0.145%	  0.613%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
-	                 CONV_2D	   51.432	    5.693	    5.693	  0.143%	  0.756%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
-	                 CONV_2D	   57.493	    5.472	    5.472	  0.137%	  0.893%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
-	                 CONV_2D	   63.330	    5.404	    5.404	  0.136%	  1.029%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
-	                 CONV_2D	   45.752	    5.322	    5.322	  0.133%	  1.162%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
-	                 CONV_2D	   40.105	    5.293	    5.293	  0.133%	  1.295%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
-	                 CONV_2D	    0.000	    4.269	    4.269	  0.107%	  1.402%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
-
-Number of nodes executed: 31
-============================== Summary by node type ==============================
-	             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
-	                 CONV_2D	       15	     1.406	    89.270%	    89.270%	     0.000	        0
-	       DEPTHWISE_CONV_2D	       13	     0.169	    10.730%	   100.000%	     0.000	        0
-	                 SOFTMAX	        1	     0.000	     0.000%	   100.000%	     0.000	        0
-	                 RESHAPE	        1	     0.000	     0.000%	   100.000%	     0.000	        0
-	         AVERAGE_POOL_2D	        1	     0.000	     0.000%	   100.000%	     0.000	        0
-
-Timings (microseconds): count=50 first=79449 curr=81350 min=77385 max=88213 avg=79732 std=1929
-Memory (bytes): count=0
-31 nodes observed
-
-
-Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9
-```
diff --git a/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
deleted file mode 100644
index 16e85fc07..000000000
--- a/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
-
-#include <cstdarg>
-#include <cstdlib>
-#include <iostream>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/op_resolver.h"
-#include "tensorflow/lite/string_util.h"
-#include "tensorflow/lite/tools/benchmark/logging.h"
-
-#ifdef GEMMLOWP_PROFILING
-#include "gemmlowp/profiling/profiler.h"
-#endif
-
-// For profiling nnapi_delegate
-#include "profiling/profiling.h"
-#include "tflite/ext/nnapi_delegate.h"
-
-namespace {
-  nnfw::tflite::NNAPIDelegate nnfw_delegate_;
-}
-
-#ifdef TFLITE_CUSTOM_OPS_HEADER
-void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
-#endif
-
-namespace tflite {
-namespace benchmark {
-
-void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
-  TFLITE_BENCHMARK_CHECK(interpreter);
-  interpreter_ = interpreter;
-  interpreter_->SetProfiler(&profiler_);
-}
-
-void ProfilingListener::OnSingleRunStart(RunType run_type) {
-  if (run_type == REGULAR) {
-    profiler_.Reset();
-    profiler_.StartProfiling();
-  }
-}
-
-void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
-  if (has_profiles_) {
-    TFLITE_LOG(INFO) << summarizer_.GetOutputString();
-  }
-}
-
-void ProfilingListener::OnSingleRunEnd() {
-  profiler_.StopProfiling();
-  auto profile_events = profiler_.GetProfileEvents();
-  has_profiles_ = !profile_events.empty();
-  summarizer_.ProcessProfiles(profile_events, *interpreter_);
-}
-
-void GemmlowpProfilingListener::OnBenchmarkStart(
-    const BenchmarkParams& params) {
-#ifdef GEMMLOWP_PROFILING
-  gemmlowp::RegisterCurrentThreadForProfiling();
-  gemmlowp::StartProfiling();
-#endif
-}
-
-void GemmlowpProfilingListener::OnBenchmarkEnd(
-    const BenchmarkResults& results) {
-#ifdef GEMMLOWP_PROFILING
-  gemmlowp::FinishProfiling();
-#endif
-}
-
-namespace {
-
-std::vector<std::string> Split(const std::string& str, const char delim) {
-  std::istringstream input(str);
-  std::vector<std::string> results;
-  std::string item;
-  while (std::getline(input, item, delim)) {
-    results.push_back(item);
-  }
-  return results;
-}
-
-template <typename T>
-bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) {
-  std::istringstream input(str);
-  bool first = true;
-  while (!input.eof()) {
-    if (!first) {
-      char c;
-      input >> c;
-      if (c != delim) {
-        return false;
-      }
-    } else {
-      first = false;
-    }
-    T val;
-    input >> val;
-    if (!input.eof() && !input.good()) {
-      return false;
-    }
-    values->push_back(val);
-  }
-  return true;
-}
-
-template <typename T>
-void FillRandomValue(T* ptr, const std::vector<int>& sizes,
-                     const std::function<T()>& random_func) {
-  int num_elements = 1;
-  for (int dim : sizes) {
-    num_elements *= dim;
-  }
-  for (int i = 0; i < num_elements; ++i) {
-    *ptr++ = random_func();
-  }
-}
-
-void FillRandomString(tflite::DynamicBuffer* buffer,
-                      const std::vector<int>& sizes,
-                      const std::function<string()>& random_func) {
-  int num_elements = 1;
-  for (int dim : sizes) {
-    num_elements *= dim;
-  }
-  for (int i = 0; i < num_elements; ++i) {
-    auto str = random_func();
-    buffer->AddString(str.data(), str.length());
-  }
-}
-
-bool PopulateInputLayerInfo(
-    const string& names_string, const string& shapes_string,
-    std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
-  std::vector<std::string> names = Split(names_string, ',');
-  std::vector<std::string> shapes = Split(shapes_string, ':');
-
-  if (names.size() != shapes.size()) {
-    TFLITE_LOG(ERROR) << "The number of items in"
-                      << " --input_layer_shape (" << shapes_string << ", with "
-                      << shapes.size() << " items)"
-                      << " must match the number of items in"
-                      << " --input_layer (" << names_string << ", with "
-                      << names.size() << " items)."
-                      << " For example --input_layer=input1,input2"
-                      << " --input_layer_shape=1,224,224,4:1,20";
-    return false;
-  }
-
-  for (int i = 0; i < names.size(); ++i) {
-    info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
-    BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
-
-    input.name = names[i];
-
-    TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape))
-        << "Incorrect size string specified: " << shapes[i];
-    for (int dim : input.shape) {
-      if (dim == -1) {
-        TFLITE_LOG(ERROR)
-            << "Any unknown sizes in the shapes (-1's) must be replaced"
-            << " with the size you want to benchmark with.";
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) {
-  std::vector<int> values;
-  values.reserve(int_array->size);
-  for (size_t i = 0; i < int_array->size; i++) {
-    values.push_back(int_array->data[i]);
-  }
-  return values;
-}
-
-}  // namespace
-
-BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
-  BenchmarkParams default_params = BenchmarkModel::DefaultParams();
-  default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("input_layer",
-                          BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("input_layer_shape",
-                          BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false));
-  return default_params;
-}
-
-BenchmarkTfLiteModel::BenchmarkTfLiteModel()
-    : BenchmarkTfLiteModel(DefaultParams()) {}
-
-BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
-    : BenchmarkModel(std::move(params)) {
-  AddListener(&profiling_listener_);
-  AddListener(&gemmlowp_profiling_listener_);
-}
-
-std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
-  std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags();
-  std::vector<Flag> specific_flags = {
-      CreateFlag<std::string>("graph", &params_, "graph file name"),
-      CreateFlag<std::string>("input_layer", &params_, "input layer names"),
-      CreateFlag<std::string>("input_layer_shape", &params_,
-                              "input layer shape"),
-      CreateFlag<bool>("use_nnapi", &params_, "use nnapi api")};
-
-  flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
-  return flags;
-}
-
-void BenchmarkTfLiteModel::LogParams() {
-  BenchmarkModel::LogParams();
-  TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]";
-  TFLITE_LOG(INFO) << "Input layers: ["
-                   << params_.Get<std::string>("input_layer") << "]";
-  TFLITE_LOG(INFO) << "Input shapes: ["
-                   << params_.Get<std::string>("input_layer_shape") << "]";
-  TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]";
-}
-
-bool BenchmarkTfLiteModel::ValidateParams() {
-  if (params_.Get<std::string>("graph").empty()) {
-    TFLITE_LOG(ERROR)
-        << "Please specify the name of your TF Lite input file with --graph";
-    return false;
-  }
-  return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"),
-                                params_.Get<std::string>("input_layer_shape"),
-                                &inputs);
-}
-
-uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
-  TFLITE_BENCHMARK_CHECK(interpreter);
-  uint64_t total_input_bytes = 0;
-  for (int input : interpreter->inputs()) {
-    auto* t = interpreter->tensor(input);
-    total_input_bytes += t->bytes;
-  }
-  return total_input_bytes;
-}
-
-void BenchmarkTfLiteModel::PrepareInputsAndOutputs() {
-  auto interpreter_inputs = interpreter->inputs();
-  // Set the values of the input tensors.
-  for (int j = 0; j < interpreter_inputs.size(); ++j) {
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
-    // TODO(ahentz): below we ignore the O-th dimension (number of batches).
-    if (t->type == kTfLiteFloat32) {
-      FillRandomValue<float>(
-          interpreter->typed_tensor<float>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<float>(rand()) / RAND_MAX - 0.5f; });
-    } else if (t->type == kTfLiteInt32) {
-      // TODO(yunluli): This is currently only used for handling embedding input
-      // for speech models. Generalize if necessary.
-      FillRandomValue<int32_t>(
-          interpreter->typed_tensor<int32_t>(i),
-          std::vector<int32_t>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<int32_t>(rand()) % 100; });
-    } else if (t->type == kTfLiteUInt8) {
-      FillRandomValue<uint8_t>(
-          interpreter->typed_tensor<uint8_t>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<uint8_t>(rand()) % 255; });
-    } else if (t->type == kTfLiteInt8) {
-      FillRandomValue<int8_t>(
-          interpreter->typed_tensor<int8_t>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<int8_t>(rand()) % 255 - 127; });
-    } else if (t->type == kTfLiteString) {
-      tflite::DynamicBuffer buffer;
-      FillRandomString(&buffer, sizes, []() {
-        return "we're have some friends over saturday to hang out in the yard";
-      });
-      buffer.WriteToTensor(interpreter->tensor(i), /*new_shape=*/nullptr);
-    } else {
-      TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
-                        << " of type " << t->type;
-    }
-  }
-}
-
-void BenchmarkTfLiteModel::Init() {
-  std::string graph = params_.Get<std::string>("graph");
-  model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
-  if (!model) {
-    TFLITE_LOG(FATAL) << "Failed to mmap model " << graph;
-  }
-  TFLITE_LOG(INFO) << "Loaded model " << graph;
-  model->error_reporter();
-  TFLITE_LOG(INFO) << "resolved reporter";
-
-#ifdef TFLITE_CUSTOM_OPS_HEADER
-  tflite::MutableOpResolver resolver;
-  RegisterSelectedOps(&resolver);
-#else
-  nnfw::tflite::BuiltinOpResolver resolver;
-#endif
-
-  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
-  if (!interpreter) {
-    TFLITE_LOG(FATAL) << "Failed to construct interpreter";
-  }
-  profiling_listener_.SetInterpreter(interpreter.get());
-  ::profiling::Context::get().setProfiler(interpreter->GetProfiler());
-
-  auto enable_sync = std::getenv("PROFILING_OP_SYNC");
-  if (enable_sync && std::strtol(enable_sync, NULL, 0) != 0)
-  {
-    ::profiling::Context::get().setSync();
-  }
-
-  const int32_t num_threads = params_.Get<int32_t>("num_threads");
-
-  if (num_threads != -1) {
-    interpreter->SetNumThreads(num_threads);
-  }
-
-  bool use_nnapi = params_.Get<bool>("use_nnapi");
-
-  interpreter->UseNNAPI(use_nnapi);
-  if (use_nnapi) {
-    if (nnfw_delegate_.BuildGraph(&(interpreter.get()->primary_subgraph())) != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to BuildGraph!";
-    }
-  }
-  ApplyDelegates();
-
-  auto interpreter_inputs = interpreter->inputs();
-
-  if (!inputs.empty()) {
-    TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size())
-        << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
-        << " expected: " << inputs.size();
-  }
-
-  // TFLITE_BENCHMARK_CHECK that all names and types match
-  for (int j = 0; j < inputs.size(); ++j) {
-    const InputLayerInfo& input = inputs[j];
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    TFLITE_BENCHMARK_CHECK_EQ(t->name, input.name)
-        << "Tensor # " << i << " is named " << t->name << " but flags call it "
-        << input.name;
-  }
-
-  // Resize all non-string tensors.
-  for (int j = 0; j < inputs.size(); ++j) {
-    const InputLayerInfo& input = inputs[j];
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    if (t->type != kTfLiteString) {
-      interpreter->ResizeInputTensor(i, input.shape);
-    }
-  }
-
-  if (interpreter->AllocateTensors() != kTfLiteOk) {
-    TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
-  }
-}
-
-void BenchmarkTfLiteModel::RunImpl() {
-  bool use_nnapi = params_.Get<bool>("use_nnapi");
-  if (use_nnapi) {
-    if (nnfw_delegate_.Invoke(&interpreter->primary_subgraph()) != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to invoke!";
-    }
-  } else {
-    if (interpreter->Invoke() != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to invoke!";
-    }
-  }
-}
-
-}  // namespace benchmark
-}  // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/profile_summarizer.cc b/tests/tools/tflite_benchmark_model/profile_summarizer.cc
deleted file mode 100644
index b547c7095..000000000
--- a/tests/tools/tflite_benchmark_model/profile_summarizer.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/profiling/profile_summarizer.h"
-
-#include <sstream>
-
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-namespace profiling {
-namespace {
-
-struct OperatorDetails {
-  std::string name;
-  std::vector<std::string> inputs;
-  std::vector<std::string> outputs;
-};
-
-std::string GetTensorName(const tflite::Interpreter& interpreter,
-                          int tensor_index) {
-  const auto tensor = interpreter.tensor(tensor_index);
-  if (tensor == nullptr || tensor->name == nullptr) {
-    return "Unknown";
-  }
-  return tensor->name;
-}
-std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
-                                        const TfLiteIntArray* tensor_indices) {
-  std::vector<std::string> tensors;
-  tensors.reserve(tensor_indices->size);
-  for (int i = 0; i < tensor_indices->size; i++) {
-    tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
-  }
-  return tensors;
-}
-
-std::string ToString(const std::vector<std::string>& str_vector) {
-  std::stringstream stream;
-  stream << "[";
-  bool first = true;
-  for (const auto& s : str_vector) {
-    if (!first) {
-      stream << ", ";
-    } else {
-      first = false;
-    }
-    stream << s;
-  }
-  stream << "]";
-  return stream.str();
-}
-
-OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
-                                   int node_index) {
-  auto node_reg = interpreter.node_and_registration(node_index);
-  auto inputs = node_reg->first.inputs;
-  auto outputs = node_reg->first.outputs;
-  int code = node_reg->second.builtin_code;
-  const char* op_name = nullptr;
-  if (code == tflite::BuiltinOperator_CUSTOM) {
-    const char* custom_name = node_reg->second.custom_name;
-    op_name = custom_name ? custom_name : "UnknownCustomOp";
-  } else {
-    op_name = tflite::EnumNamesBuiltinOperator()[code];
-  }
-  const char* profiling_string =
-      interpreter.OpProfilingString(node_reg->second, &node_reg->first);
-  OperatorDetails details;
-  details.name = op_name;
-  if (profiling_string) {
-    details.name += ":" + std::string(profiling_string);
-  }
-  details.inputs = GetTensorNames(interpreter, inputs);
-  details.outputs = GetTensorNames(interpreter, outputs);
-  return details;
-}
-
-tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() {
-  auto options = tensorflow::StatSummarizerOptions();
-  options.show_summary = true;
-  options.show_memory = false;
-  return options;
-}
-
-}  // namespace
-
-ProfileSummarizer::ProfileSummarizer()
-    : stats_calculator_(
-          new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {}
-
-void ProfileSummarizer::ProcessProfiles(
-    const std::vector<const ProfileEvent*>& profile_stats,
-    const tflite::Interpreter& interpreter) {
-  std::vector<const ProfileEvent*> events;
-  std::copy_if(profile_stats.begin(), profile_stats.end(),
-               std::back_inserter(events), [](const ProfileEvent* e) {
-                 return e->event_type ==
-                            ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
-                        e->end_timestamp_us >= e->begin_timestamp_us;
-               });
-  // Sort with begin_time.
-  std::sort(events.begin(), events.end(),
-            [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
-              return a->begin_timestamp_us < b->begin_timestamp_us;
-            });
-  if (events.empty()) {
-    return;
-  }
-
-  int64_t base_start_us = events[0]->begin_timestamp_us;
-  int node_num = 0;
-  int64_t curr_total_us = 0;
-  int prev_op_idx = -1;
-  int child_op_no = 1;
-  for (auto event : events) {
-    auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
-    bool from_same_op = (prev_op_idx == event->event_metadata);
-    child_op_no = from_same_op ? child_op_no + 1 : 1;
-    auto node_name = ToString(op_details.outputs) + "#" + std::to_string(child_op_no);
-    int64_t start_us = event->begin_timestamp_us - base_start_us;
-    int64_t node_exec_time =
-        event->end_timestamp_us - event->begin_timestamp_us;
-    stats_calculator_->AddNodeStats(node_name, op_details.name, node_num,
-                                    start_us, node_exec_time, 0 /*memory */);
-    curr_total_us += node_exec_time;
-    ++node_num;
-    prev_op_idx = event->event_metadata;
-  }
-  stats_calculator_->UpdateRunTotalUs(curr_total_us);
-}
-}  // namespace profiling
-}  // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/stats_calculator.cc b/tests/tools/tflite_benchmark_model/stats_calculator.cc
deleted file mode 100644
index 578650701..000000000
--- a/tests/tools/tflite_benchmark_model/stats_calculator.cc
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/util/stats_calculator.h"
-
-#include <iomanip>
-#include <map>
-#include <queue>
-#include <sstream>
-#include <string>
-#include <algorithm>
-
-namespace tensorflow {
-
-StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
-    : options_(options) {}
-
-std::string StatsCalculator::GetShortSummary() const {
-  std::stringstream stream;
-  stream << "Timings (microseconds): ";
-  run_total_us_.OutputToStream(&stream);
-  stream << std::endl;
-
-  stream << "Memory (bytes): ";
-  memory_.OutputToStream(&stream);
-  stream << std::endl;
-
-  stream << details_.size() << " nodes observed" << std::endl;
-  return stream.str();
-}
-
-std::ostream& InitField(std::ostream& stream, int width) {
-  stream << "\t" << std::right << std::setw(width) << std::fixed
-         << std::setprecision(3);
-  return stream;
-}
-
-std::string StatsCalculator::HeaderString(const std::string& title) const {
-  std::stringstream stream;
-
-  stream << "============================== " << title
-         << " ==============================" << std::endl;
-
-  InitField(stream, 24) << "[node type]";
-  InitField(stream, 9) << "[start]";
-  InitField(stream, 9) << "[first]";
-  InitField(stream, 9) << "[avg ms]";
-  InitField(stream, 8) << "[%]";
-  InitField(stream, 8) << "[cdf%]";
-  InitField(stream, 10) << "[mem KB]";
-  InitField(stream, 9) << "[times called]";
-  stream << "\t"
-         << "[Name]";
-  return stream.str();
-}
-
-std::string StatsCalculator::ColumnString(const Detail& detail,
-                                          const int64_t cumulative_stat_on_node,
-                                          const Stat<int64_t>& stat) const {
-  const double start_ms = detail.start_us.avg() / 1000.0;
-  const double first_time_ms = detail.rel_end_us.first() / 1000.0;
-  const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
-  const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
-  const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
-  const int64_t times_called = detail.times_called / num_runs();
-
-  std::stringstream stream;
-  InitField(stream, 24) << detail.type;
-  InitField(stream, 9) << start_ms;
-  InitField(stream, 9) << first_time_ms;
-  InitField(stream, 9) << avg_time_ms;
-  InitField(stream, 7) << percentage << "%";
-  InitField(stream, 7) << cdf_percentage << "%";
-  InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
-  InitField(stream, 9) << times_called;
-  stream << "\t" << detail.name;
-
-  return stream.str();
-}
-
-void StatsCalculator::OrderNodesByMetric(
-    SortingMetric metric, std::vector<const Detail*>* details) const {
-  std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
-  const int num_nodes = details_.size();
-
-  for (const auto& det : details_) {
-    const Detail* detail = &(det.second);
-    std::stringstream stream;
-    stream << std::setw(20) << std::right << std::setprecision(10)
-           << std::fixed;
-
-    switch (metric) {
-      case BY_NAME:
-        stream << detail->name;
-        break;
-      case BY_RUN_ORDER:
-        stream << num_nodes - detail->run_order;
-        break;
-      case BY_TIME:
-        stream << detail->rel_end_us.avg();
-        break;
-      case BY_MEMORY:
-        stream << detail->mem_used.avg();
-        break;
-      case BY_TYPE:
-        stream << detail->type;
-        break;
-      default:
-        stream << "";
-        break;
-    }
-
-    sorted_list.emplace(stream.str(), detail);
-  }
-
-  while (!sorted_list.empty()) {
-    auto entry = sorted_list.top();
-    sorted_list.pop();
-    details->push_back(entry.second);
-  }
-}
-
-void StatsCalculator::ComputeStatsByType(
-    std::map<std::string, int64_t>* node_type_map_count,
-    std::map<std::string, int64_t>* node_type_map_time,
-    std::map<std::string, int64_t>* node_type_map_memory,
-    std::map<std::string, int64_t>* node_type_map_times_called,
-    int64_t* accumulated_us) const {
-  int64_t run_count = run_total_us_.count();
-
-  for (const auto& det : details_) {
-    const std::string node_name = det.first;
-    const Detail& detail = det.second;
-
-    int64_t curr_time_val =
-        static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
-    *accumulated_us += curr_time_val;
-
-    int64_t curr_memory_val = detail.mem_used.newest();
-
-    const std::string& node_type = detail.type;
-
-    const std::string sharp1("#1");
-    bool first = std::mismatch(sharp1.rbegin(), sharp1.rend(), node_name.rbegin()).first == sharp1.rend();
-
-    if (first) {
-      (*node_type_map_count)[node_type] += 1;
-      (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
-    }
-    (*node_type_map_time)[node_type] += curr_time_val;
-    (*node_type_map_memory)[node_type] += curr_memory_val;
-  }
-}
-
-std::string StatsCalculator::GetStatsByNodeType() const {
-  std::stringstream stream;
-
-  stream << "Number of nodes executed: " << details_.size() << std::endl;
-
-  stream << "============================== Summary by node type "
-            "=============================="
-         << std::endl;
-
-  std::map<std::string, int64_t> node_type_map_count;
-  std::map<std::string, int64_t> node_type_map_time;
-  std::map<std::string, int64_t> node_type_map_memory;
-  std::map<std::string, int64_t> node_type_map_times_called;
-  int64_t accumulated_us = 0;
-
-  ComputeStatsByType(&node_type_map_count, &node_type_map_time,
-                     &node_type_map_memory, &node_type_map_times_called,
-                     &accumulated_us);
-
-  // Sort them.
-  std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
-      timings;
-  for (const auto& node_type : node_type_map_time) {
-    const int64_t mem_used = node_type_map_memory[node_type.first];
-    timings.emplace(node_type.second,
-                    std::pair<std::string, int64_t>(node_type.first, mem_used));
-  }
-
-  InitField(stream, 24) << "[Node type]";
-  InitField(stream, 9) << "[count]";
-  InitField(stream, 10) << "[avg ms]";
-  InitField(stream, 11) << "[avg %]";
-  InitField(stream, 11) << "[cdf %]";
-  InitField(stream, 10) << "[mem KB]";
-  InitField(stream, 10) << "[times called]";
-  stream << std::endl;
-
-  float cdf = 0.0f;
-  while (!timings.empty()) {
-    auto entry = timings.top();
-    timings.pop();
-
-    const std::string node_type = entry.second.first;
-    const float memory = entry.second.second / 1000.0f;
-
-    const int64_t node_type_total_us = entry.first;
-    const float time_per_run_ms = node_type_total_us / 1000.0f;
-
-    const float percentage =
-        ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
-    cdf += percentage;
-
-    InitField(stream, 24) << node_type;
-    InitField(stream, 9) << node_type_map_count[node_type];
-    InitField(stream, 10) << time_per_run_ms;
-    InitField(stream, 10) << percentage << "%";
-    InitField(stream, 10) << cdf << "%";
-    InitField(stream, 10) << memory;
-    InitField(stream, 9) << node_type_map_times_called[node_type];
-    stream << std::endl;
-  }
-  stream << std::endl;
-  return stream.str();
-}
-
-std::string StatsCalculator::GetStatsByMetric(const std::string& title,
-                                              SortingMetric sorting_metric,
-                                              int num_stats) const {
-  std::vector<const Detail*> details;
-  OrderNodesByMetric(sorting_metric, &details);
-
-  double cumulative_stat_on_node = 0;
-
-  std::stringstream stream;
-  stream << HeaderString(title) << std::endl;
-  int stat_num = 0;
-  for (auto detail : details) {
-    ++stat_num;
-    if (num_stats > 0 && stat_num > num_stats) {
-      break;
-    }
-
-    // TODO(andrewharp): Make this keep track of the particular metric for cdf.
-    cumulative_stat_on_node += detail->rel_end_us.sum();
-    stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
-           << std::endl;
-  }
-  stream << std::endl;
-  return stream.str();
-}
-
-std::string StatsCalculator::GetOutputString() const {
-  std::stringstream stream;
-  if (options_.show_run_order) {
-    stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
-                               options_.run_order_limit);
-  }
-  if (options_.show_time) {
-    stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
-                               options_.time_limit);
-  }
-  if (options_.show_memory) {
-    stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
-                               options_.memory_limit);
-  }
-  if (options_.show_type) {
-    stream << GetStatsByNodeType();
-  }
-  if (options_.show_summary) {
-    stream << GetShortSummary() << std::endl;
-  }
-  return stream.str();
-}
-
-void StatsCalculator::AddNodeStats(const std::string& name,
-                                   const std::string& type, int64_t run_order,
-                                   int64_t start_us, int64_t rel_end_us,
-                                   int64_t mem_used) {
-  Detail* detail = nullptr;
-  if (details_.find(name) == details_.end()) {
-    details_.insert({name, {}});
-    detail = &details_.at(name);
-    detail->type = type;
-    detail->name = name;
-    detail->run_order = run_order;
-  } else {
-    detail = &details_.at(name);
-  }
-  detail->start_us.UpdateStat(start_us);
-  detail->rel_end_us.UpdateStat(rel_end_us);
-  detail->mem_used.UpdateStat(mem_used);
-  detail->times_called++;
-}
-
-}  // namespace tensorflow
diff --git a/tests/tools/tflite_comparator/CMakeLists.txt b/tests/tools/tflite_comparator/CMakeLists.txt
new file mode 100644
index 000000000..54e3f61fd
--- /dev/null
+++ b/tests/tools/tflite_comparator/CMakeLists.txt
@@ -0,0 +1,23 @@
+if(NOT BUILD_TFLITE_COMPARATOR_TEST_TOOL)
+  message("skipping tflite comparator tool build")
+  return()
+endif(NOT BUILD_TFLITE_COMPARATOR_TEST_TOOL)
+
+if(NOT BUILD_ONERT)
+  message("skipping tflite comparator tool build: onert is not built")
+  return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND SOURCES "src/tflite_comparator.cc")
+list(APPEND SOURCES "src/args.cc")
+
+nnfw_find_package(Boost REQUIRED program_options system filesystem)
+
+add_executable(tflite_comparator ${SOURCES})
+target_include_directories(tflite_comparator PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(tflite_comparator nnfw-dev)
+target_link_libraries(tflite_comparator nnfw_lib_tflite nnfw_lib_misc)
+target_link_libraries(tflite_comparator ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
+
+install(TARGETS tflite_comparator DESTINATION bin)
diff --git a/tests/tools/tflite_comparator/src/args.cc b/tests/tools/tflite_comparator/src/args.cc
new file mode 100644
index 000000000..ecab20b17
--- /dev/null
+++ b/tests/tools/tflite_comparator/src/args.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <iostream>
+
+#include <boost/filesystem.hpp>
+
+namespace TFLiteRun
+{
+
+Args::Args(const int argc, char **argv) noexcept
+{
+  Initialize();
+  Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+  // General options
+  po::options_description general("General options");
+
+  // clang-format off
+  general.add_options()
+    ("help,h", "Display available options")
+    ("tflite", po::value<std::string>()->default_value("")->required(), "Input tflite model file for serialization")
+    ("data,d", po::value<std::vector<std::string>>()->multitoken()->default_value(std::vector<std::string>{}, ""), "Input data file for model");
+  // clang-format on
+
+  _options.add(general);
+  _positional.add("tflite", 1);
+}
+
+void Args::print(char **argv)
+{
+  std::cout << "tflite_comparator" << std::endl << std::endl;
+  std::cout << "Load tflite model by onert and TFLite, and compare their output" << std::endl;
+  std::cout << "Usage:" << std::endl;
+  std::cout << argv[0] << " --tflite model_file.tflite --data input_data.dat" << std::endl;
+  std::cout << _options;
+  std::cout << std::endl;
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+            vm);
+  po::notify(vm);
+
+  if (vm.count("help"))
+  {
+    print(argv);
+
+    exit(0);
+  }
+
+  try
+  {
+    if (vm.count("tflite"))
+    {
+      _tflite_filename = vm["tflite"].as<std::string>();
+    }
+
+    if (vm.count("data"))
+    {
+      _data_filenames = vm["data"].as<std::vector<std::string>>();
+    }
+  }
+  catch (const std::bad_cast &e)
+  {
+    std::cerr << e.what() << '\n';
+    print(argv);
+    exit(1);
+  }
+}
+
+} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_loader/src/args.h b/tests/tools/tflite_comparator/src/args.h
index 4d0e8ff41..4d0e8ff41 100644
--- a/tests/tools/tflite_loader/src/args.h
+++ b/tests/tools/tflite_comparator/src/args.h
diff --git a/tests/tools/tflite_comparator/src/tflite_comparator.cc b/tests/tools/tflite_comparator/src/tflite_comparator.cc
new file mode 100644
index 000000000..383a4e4de
--- /dev/null
+++ b/tests/tools/tflite_comparator/src/tflite_comparator.cc
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <nnfw_experimental.h>
+#include <nnfw_internal.h>
+
+#include <misc/EnvVar.h>
+#include <misc/fp32.h>
+#include <misc/RandomGenerator.h>
+
+#include <tflite/Assert.h>
+#include <tflite/InterpreterSession.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+
+const int RUN_FAILED = 1;
+
+using namespace nnfw::tflite;
+
+const int FILE_ERROR = 2;
+
+#define NNFW_ASSERT_FAIL(expr, msg)   \
+  if ((expr) != NNFW_STATUS_NO_ERROR) \
+  {                                   \
+    std::cerr << msg << std::endl;    \
+    exit(-1);                         \
+  }
+
+// Read vector of floats from selected file
+void readData(const std::string &path, std::vector<uint8_t> &dest)
+{
+  std::ifstream in(path);
+  if (!in.good())
+  {
+    std::cerr << "can not open data file " << path << "\n";
+    exit(FILE_ERROR);
+  }
+  in.seekg(0, std::ifstream::end);
+  size_t len = in.tellg();
+  in.seekg(0, std::ifstream::beg);
+
+  assert(dest.size() == len);
+  in.read(reinterpret_cast<char *>(dest.data()), len);
+}
+
+template <typename T>
+void randomData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+  size_t elements = dest.size() / sizeof(T);
+  assert(dest.size() % sizeof(T) == 0);
+
+  std::vector<T> vec(elements);
+  for (uint64_t i = 0; i < elements; i++)
+  {
+    vec[i] = randgen.generate<T>();
+  }
+  memcpy(dest.data(), vec.data(), elements * sizeof(T));
+}
+
+void randomBoolData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+  size_t elements = dest.size();
+  std::vector<uint8_t> vec(elements);
+  for (uint64_t i = 0; i < elements; i++)
+  {
+    bool value = randgen.generate<bool>();
+    dest[i] = value ? 1 : 0;
+  }
+}
+
+inline uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+  uint64_t n = 1;
+  for (uint32_t i = 0; i < ti->rank; ++i)
+  {
+    n *= ti->dims[i];
+  }
+  return n;
+}
+
+inline size_t sizeOfNnfwType(NNFW_TYPE type)
+{
+  switch (type)
+  {
+    case NNFW_TYPE_TENSOR_BOOL:
+    case NNFW_TYPE_TENSOR_UINT8:
+    case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+    case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+      return 1;
+    case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+      return 2;
+    case NNFW_TYPE_TENSOR_FLOAT32:
+    case NNFW_TYPE_TENSOR_INT32:
+      return 4;
+    case NNFW_TYPE_TENSOR_INT64:
+      return 8;
+    default:
+      throw std::runtime_error{"Invalid tensor type"};
+  }
+}
+
+template <typename T>
+bool isClose(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+  // TODO better way for handling quant error?
+  auto tolerance = static_cast<uint64_t>(nnfw::misc::EnvVar("TOLERANCE").asInt(0));
+  bool match = true;
+
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
+  {
+    T ref = ref_buf[e];
+    T act = reinterpret_cast<const T *>(act_buf.data())[e];
+    uint64_t diff = static_cast<uint64_t>(((ref > act) ? (ref - act) : (act - ref)));
+
+    if (ref != act && diff > tolerance)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << " (diff: " << diff << ")" << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+template <>
+bool isClose<float>(const float *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+  uint32_t tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
+  bool match = true;
+
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(float); e++)
+  {
+    float ref = ref_buf[e];
+    float act = reinterpret_cast<const float *>(act_buf.data())[e];
+    float diff = std::fabs(ref - act);
+
+    bool match_elem = nnfw::misc::fp32::absolute_epsilon_equal(ref, act)
+                        ? true
+                        : nnfw::misc::fp32::epsilon_equal(ref, act, tolerance);
+
+    if (!match_elem)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << " (diff: " << diff << ")" << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+bool exact(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+  bool match = true;
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
+  {
+    uint8_t ref_raw = ref_buf[e];
+    bool ref = (ref_raw != 0 ? true : false);
+    uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
+    bool act = (act_raw != 0 ? true : false);
+    if (ref != act)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+int main(const int argc, char **argv)
+{
+  TFLiteRun::Args args(argc, argv);
+
+  auto tflite_file = args.getTFLiteFilename();
+  auto data_files = args.getDataFilenames();
+
+  if (tflite_file.empty())
+  {
+    args.print(argv);
+    return RUN_FAILED;
+  }
+
+  std::cout << "[Execution] Stage start!" << std::endl;
+  // Loading
+  nnfw_session *onert_session = nullptr;
+  NNFW_ASSERT_FAIL(nnfw_create_session(&onert_session), "[ ERROR ] Failure during model load");
+  if (onert_session == nullptr)
+  {
+    std::cerr << "[ ERROR ] Failure to open session" << std::endl;
+    exit(-1);
+  }
+
+  NNFW_ASSERT_FAIL(nnfw_load_model_from_modelfile(onert_session, tflite_file.c_str()),
+                   "[ ERROR ] Failure during model load");
+
+  uint32_t num_inputs;
+  uint32_t num_outputs;
+  NNFW_ASSERT_FAIL(nnfw_input_size(onert_session, &num_inputs),
+                   "[ ERROR ] Failure during get model inputs");
+  NNFW_ASSERT_FAIL(nnfw_output_size(onert_session, &num_outputs),
+                   "[ ERROR ] Failure during get model outputs");
+
+  std::cout << "[Execution] Model is deserialized!" << std::endl;
+
+  // Compile
+  nnfw_prepare(onert_session);
+
+  std::cout << "[Execution] Model compiled!" << std::endl;
+
+  // Prepare input/output data
+  std::vector<std::vector<uint8_t>> inputs(num_inputs);
+  std::vector<std::vector<uint8_t>> outputs(num_outputs);
+
+  bool generate_data = data_files.empty();
+  bool read_data = data_files.size() == num_inputs;
+  if (!generate_data && !read_data)
+  {
+    std::cerr << "[ ERROR ] "
+              << "Wrong number of input files." << std::endl;
+    exit(1);
+  }
+
+  const int seed = 1; /* TODO Add an option for seed value */
+  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    nnfw_tensorinfo ti_input;
+    NNFW_ASSERT_FAIL(nnfw_input_tensorinfo(onert_session, i, &ti_input),
+                     "[ ERROR ] Failure during get input data info");
+    size_t input_size = num_elems(&ti_input) * sizeOfNnfwType(ti_input.dtype);
+
+    inputs[i].resize(input_size);
+
+    if (generate_data)
+    {
+      switch (ti_input.dtype)
+      {
+        case NNFW_TYPE_TENSOR_BOOL:
+          randomBoolData(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_UINT8:
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+          randomData<uint8_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          randomData<int8_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          randomData<int16_t>(randgen, inputs[i]);
+        case NNFW_TYPE_TENSOR_FLOAT32:
+          randomData<float>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_INT32:
+          randomData<int32_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_INT64:
+          randomData<uint64_t>(randgen, inputs[i]);
+          break;
+        default:
+          std::cerr << "[ ERROR ] "
+                    << "Unspported input data type" << std::endl;
+          exit(-1);
+          break;
+      }
+    }
+    else /* read_data */
+      readData(data_files[i], inputs[i]);
+
+    NNFW_ASSERT_FAIL(nnfw_set_input(onert_session, i, ti_input.dtype, inputs[i].data(), input_size),
+                     "[ ERROR ] Failure to set input tensor buffer");
+  }
+
+  std::cout << "[Execution] Input data is defined!" << std::endl;
+
+  for (uint32_t i = 0; i < num_outputs; i++)
+  {
+    nnfw_tensorinfo ti_output;
+    NNFW_ASSERT_FAIL(nnfw_output_tensorinfo(onert_session, i, &ti_output),
+                     "[ ERROR ] Failure during get output tensor info");
+
+    uint64_t output_elements = num_elems(&ti_output);
+    size_t output_size = output_elements * sizeOfNnfwType(ti_output.dtype);
+    outputs[i].resize(output_size);
+
+    NNFW_ASSERT_FAIL(
+      nnfw_set_output(onert_session, i, ti_output.dtype, outputs[i].data(), output_size),
+      "[ ERROR ] Failure to set output tensor buffer");
+  }
+
+  // Execute
+  NNFW_ASSERT_FAIL(nnfw_run(onert_session), "[Execution] Can't execute");
+
+  std::cout << "[Execution] Done!" << std::endl;
+
+  // Compare with tflite
+  std::cout << "[Comparison] Stage start!" << std::endl;
+  // Read tflite model
+  auto model = TfLiteModelCreateFromFile(tflite_file.c_str());
+  auto options = TfLiteInterpreterOptionsCreate();
+  TfLiteInterpreterOptionsSetNumThreads(options, nnfw::misc::EnvVar("THREAD").asInt(1));
+  auto interpreter = TfLiteInterpreterCreate(model, options);
+
+  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter);
+  sess->prepare();
+  // Set input and run
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    auto input_tensor = TfLiteInterpreterGetInputTensor(interpreter, i);
+    memcpy(TfLiteTensorData(input_tensor), inputs[i].data(), inputs[i].size());
+  }
+  if (!sess->run())
+  {
+    std::cout << "[Comparison] TFLite run failed!" << std::endl;
+    assert(0 && "Run failed!");
+  }
+  std::cout << "[Comparison] TFLite run done!" << std::endl;
+
+  bool find_unmatched_output = false;
+
+  for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
+  {
+    nnfw_tensorinfo ti;
+    nnfw_output_tensorinfo(onert_session, out_idx, &ti);
+
+    bool matched = true;
+    // Check output tensor values
+    auto output_tensor = TfLiteInterpreterGetOutputTensor(interpreter, out_idx);
+    auto ref_output = TfLiteTensorData(output_tensor);
+    const auto &output = outputs[out_idx];
+
+    switch (ti.dtype)
+    {
+      case NNFW_TYPE_TENSOR_BOOL:
+        matched = exact(reinterpret_cast<uint8_t *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_UINT8:
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        matched = isClose<uint8_t>(reinterpret_cast<uint8_t *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        matched = isClose<int8_t>(reinterpret_cast<int8_t *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_INT32:
+        matched = isClose<int32_t>(reinterpret_cast<int32_t *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_FLOAT32:
+        matched = isClose<float>(reinterpret_cast<float *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_INT64:
+        matched = isClose<int64_t>(reinterpret_cast<int64_t *>(ref_output), output, out_idx);
+        break;
+      default:
+        throw std::runtime_error{"Invalid tensor type"};
+    }
+
+    if (!matched)
+      find_unmatched_output = true;
+  }
+
+  // Print results
+  int ret = 0;
+  if (find_unmatched_output)
+  {
+    std::cout << "[Comparison] outputs is not equal!" << std::endl;
+    ret = 1;
+  }
+  else
+  {
+    std::cout << "[Comparison] Outputs is equal!" << std::endl;
+  }
+  std::cout << "[Comparison] Done!" << std::endl;
+
+  nnfw_close_session(onert_session);
+
+  return ret;
+}
diff --git a/tests/tools/tflite_loader/CMakeLists.txt b/tests/tools/tflite_loader/CMakeLists.txt
deleted file mode 100644
index 0fe1c69de..000000000
--- a/tests/tools/tflite_loader/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-if(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
-  message("skipping tflite loader tool build")
-  return()
-endif(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
-
-if(NOT BUILD_ONERT)
-  message("skipping tflite loader tool build: onert is not built")
-  return()
-endif(NOT BUILD_ONERT)
-
-list(APPEND SOURCES "src/tflite_loader.cc")
-list(APPEND SOURCES "src/args.cc")
-
-nnfw_find_package(Boost REQUIRED program_options system filesystem)
-
-add_executable(tflite_loader_test_tool ${SOURCES})
-target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_loader_test_tool onert_core onert tflite_loader)
-target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite nnfw_lib_misc)
-target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
-
-install(TARGETS tflite_loader_test_tool DESTINATION bin)
diff --git a/tests/tools/tflite_loader/src/args.cc b/tests/tools/tflite_loader/src/args.cc
deleted file mode 100644
index e9fb141ca..000000000
--- a/tests/tools/tflite_loader/src/args.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-#include <boost/filesystem.hpp>
-
-namespace TFLiteRun
-{
-
-Args::Args(const int argc, char **argv) noexcept
-{
-  Initialize();
-  Parse(argc, argv);
-}
-
-void Args::Initialize(void)
-{
-  // General options
-  po::options_description general("General options");
-
-  // clang-format off
-  general.add_options()
-    ("help,h", "Display available options")
-    ("tflite", po::value<std::string>()->default_value("")->required(), "Input tflite model file for serialization")
-    ("data,d", po::value<std::vector<std::string>>()->multitoken()->default_value(std::vector<std::string>{}, ""), "Input data file for model");
-  // clang-format on
-
-  _options.add(general);
-  _positional.add("tflite", 1);
-}
-
-void Args::print(char **argv)
-{
-  std::cout << "tflite_loader" << std::endl << std::endl;
-  std::cout << "Load tflite model by Loader and TFLite and compare their output" << std::endl;
-  std::cout << "Usage:" << std::endl;
-  std::cout << argv[0] << " --tflite model_file.tflite --data input_data.dat" << std::endl;
-  std::cout << _options;
-  std::cout << std::endl;
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-  po::notify(vm);
-
-  if (vm.count("help"))
-  {
-    print(argv);
-
-    exit(0);
-  }
-
-  try
-  {
-    if (vm.count("tflite"))
-    {
-      _tflite_filename = vm["tflite"].as<std::string>();
-    }
-
-    if (vm.count("data"))
-    {
-      _data_filenames = vm["data"].as<std::vector<std::string>>();
-    }
-  }
-  catch (const std::bad_cast &e)
-  {
-    std::cerr << e.what() << '\n';
-    print(argv);
-    exit(1);
-  }
-}
-
-} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_loader/src/tflite_loader.cc b/tests/tools/tflite_loader/src/tflite_loader.cc
deleted file mode 100644
index ce099210b..000000000
--- a/tests/tools/tflite_loader/src/tflite_loader.cc
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-
-#include "args.h"
-#include "tflite/InterpreterSession.h"
-#include "tflite/Assert.h"
-#include "tflite/Diff.h"
-#include "misc/tensor/IndexIterator.h"
-
-#include <iostream>
-#include <fstream>
-
-#include "compiler/Compiler.h"
-#include "exec/Execution.h"
-#include "ir/Graph.h"
-
-#include "tflite_loader.h"
-
-#include <memory>
-
-const int RUN_FAILED = 1;
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-const int FILE_ERROR = 2;
-const float DIFFERENCE_THRESHOLD = 10e-5;
-
-// Read vector of floats from selected file
-std::vector<float> readData(const string &path)
-{
-  std::ifstream in(path);
-  if (!in.good())
-  {
-    std::cerr << "can not open data file " << path << "\n";
-    exit(FILE_ERROR);
-  }
-  in.seekg(0, std::ifstream::end);
-  size_t len = in.tellg();
-  in.seekg(0, std::ifstream::beg);
-  assert(len % sizeof(float) == 0);
-  size_t size = len / sizeof(float);
-  std::vector<float> vec(size);
-  for (size_t i = 0; i < size; ++i)
-  {
-    in.read(reinterpret_cast<char *>(&vec[i]), sizeof(float));
-  }
-  return vec;
-}
-
-std::vector<float> randomData(nnfw::misc::RandomGenerator &randgen, const uint64_t size)
-{
-  std::vector<float> vec(size);
-  for (uint64_t i = 0; i < size; i++)
-  {
-    vec[i] = randgen.generate<float>();
-  }
-  return vec;
-}
-
-void executeGraph(const std::shared_ptr<onert::ir::Graph> &g,
-                  const std::vector<std::vector<float>> &inputs,
-                  std::vector<std::vector<float>> &outputs)
-{
-  auto subgs = std::make_shared<onert::ir::Subgraphs>();
-  subgs->push(onert::ir::SubgraphIndex{0}, g);
-  auto compiler = new onert::compiler::Compiler(subgs);
-  std::shared_ptr<onert::exec::ExecutorMap> executors;
-  // Compilation
-  try
-  {
-    executors = compiler->compile();
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "[Execution] Can't compile model" << std::endl;
-    std::cerr << e.what() << std::endl;
-    exit(-1);
-  }
-
-  std::cout << "[Execution] Graph compiled!" << std::endl;
-
-  auto execution = std::make_shared<onert::exec::Execution>(executors);
-
-  // Setting IO
-  try
-  {
-    // Verify input shapes
-    auto num_inputs = inputs.size();
-    for (size_t i = 0; i < num_inputs; i++)
-    {
-      auto input_operand_idx = g->getInputs().at(i);
-      auto input_shape = g->operands().at(input_operand_idx).shape();
-      assert(inputs[i].size() == input_shape.num_elements());
-    }
-
-    // Set output shapes
-    auto num_outputs = g->getOutputs().size();
-    outputs.resize(num_outputs);
-    for (uint32_t i = 0; i < num_outputs; i++)
-    {
-      auto output_operand_idx = g->getOutputs().at(i);
-      auto output_shape = g->operands().at(output_operand_idx).shape();
-      outputs[i].resize(output_shape.num_elements());
-    }
-
-    for (size_t i = 0; i < num_inputs; i++)
-      execution->setInput(onert::ir::IOIndex(i), inputs[i].data(),
-                          inputs[i].size() * sizeof(float));
-    for (uint32_t i = 0; i < num_outputs; i++)
-      execution->setOutput(onert::ir::IOIndex(i), outputs[i].data(),
-                           outputs[i].size() * sizeof(float));
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "[Execution] Can't set model IO" << std::endl;
-    std::cerr << e.what() << '\n';
-    exit(-1);
-  }
-
-  try
-  {
-    execution->execute();
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "[Execution] Can't execute" << std::endl;
-    std::cerr << e.what() << '\n';
-    exit(-1);
-  }
-
-  std::cout << "[Execution] Done!" << std::endl;
-
-  delete compiler;
-}
-
-int main(const int argc, char **argv)
-{
-  TFLiteRun::Args args(argc, argv);
-
-  auto tflite_file = args.getTFLiteFilename();
-  auto data_files = args.getDataFilenames();
-
-  if (tflite_file.empty())
-  {
-    args.print(argv);
-    return RUN_FAILED;
-  }
-
-  std::cout << "[Execution] Stage start!" << std::endl;
-  std::shared_ptr<onert::ir::Graph> test_graph;
-  // Loading
-  try
-  {
-    test_graph =
-        onert::tflite_loader::loadModel(tflite_file.c_str())->at(onert::ir::SubgraphIndex{0});
-  }
-  catch (std::exception &e)
-  {
-    std::cerr << "[ ERROR ] "
-              << "Failure during model load" << std::endl;
-    std::cerr << e.what() << std::endl;
-    exit(-1);
-  }
-
-  // TODO: Support another input/output types
-  for (const auto &input_idx : test_graph->getInputs())
-  {
-    const auto input_type = test_graph->operands().at(input_idx).typeInfo().type();
-    assert(input_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 inputs are supported");
-  }
-  for (const auto &output_idx : test_graph->getOutputs())
-  {
-    const auto output_type = test_graph->operands().at(output_idx).typeInfo().type();
-    assert(output_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 outputs are supported");
-  }
-
-  std::cout << "[Execution] Model is deserialized!" << std::endl;
-  auto num_inputs = test_graph->getInputs().size();
-  std::vector<std::vector<float>> inputs(num_inputs);
-  bool generate_data = data_files.empty();
-  bool read_data = data_files.size() == num_inputs;
-  if (num_inputs == 0)
-  {
-    std::cerr << "[ ERROR ] "
-              << "No inputs in model => execution is not possible" << std::endl;
-    exit(1);
-  }
-  if (!generate_data && !read_data)
-  {
-    std::cerr << "[ ERROR ] "
-              << "Wrong number of input files." << std::endl;
-    exit(1);
-  }
-
-  const int seed = 1; /* TODO Add an option for seed value */
-  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-  try
-  {
-    for (uint32_t i = 0; i < num_inputs; i++)
-    {
-      if (generate_data)
-      {
-        uint64_t sz =
-            test_graph->operands().at(test_graph->getInputs().at(i)).shape().num_elements();
-        inputs[i] = randomData(randgen, sz);
-      }
-      else /* read_data */
-        inputs[i] = readData(data_files[i]);
-    }
-  }
-  catch (std::exception &e)
-  {
-    std::cerr << "[ ERROR ] "
-              << "Failure during input data generation" << std::endl;
-    std::cerr << e.what() << std::endl;
-    exit(-1);
-  }
-
-  std::cout << "[Execution] Input data is defined!" << std::endl;
-  std::vector<std::vector<float>> outputs;
-  // Run graph
-  executeGraph(test_graph, inputs, outputs);
-  // Compare with tflite
-  std::cout << "[Comparison] Stage start!" << std::endl;
-  // Read tflite model
-  StderrReporter error_reporter;
-  auto model = FlatBufferModel::BuildFromFile(tflite_file.c_str(), &error_reporter);
-
-  BuiltinOpResolver resolver;
-  InterpreterBuilder builder(*model, resolver);
-
-  std::unique_ptr<Interpreter> interpreter;
-  try
-  {
-    TFLITE_ENSURE(builder(&interpreter));
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    exit(FILE_ERROR);
-  }
-  interpreter->SetNumThreads(2);
-
-  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
-  sess->prepare();
-  // Set input and run
-  for (uint32_t i = 0; i < num_inputs; i++)
-  {
-    auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
-    memcpy(input_tensor->data.f, inputs[i].data(), inputs[i].size() * sizeof(float));
-  }
-  if (!sess->run())
-  {
-    std::cout << "[Comparison] TFLite run failed!" << std::endl;
-    assert(0 && "Run failed!");
-  }
-  std::cout << "[Comparison] TFLite run done!" << std::endl;
-
-  // Calculate max difference over all outputs
-  float max_difference = 0.0f;
-  auto num_outputs = test_graph->getOutputs().size();
-  for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
-  {
-    const auto &tflite_output_tensor = interpreter->tensor(interpreter->outputs().at(out_idx));
-    const auto &nnfw_output_tensor = outputs[out_idx];
-
-    if (nnfw_output_tensor.size() != tflite_output_tensor->bytes / sizeof(float))
-      std::cout << "[Comparison] Different size of outputs!" << std::endl;
-    // Check max difference
-    float *tflite_out_ptr = tflite_output_tensor->data.f;
-    for (const auto &nnfw_out : nnfw_output_tensor)
-    {
-      if (std::abs(nnfw_out - *tflite_out_ptr) > max_difference)
-        max_difference = std::abs(nnfw_out - *tflite_out_ptr);
-
-      tflite_out_ptr++;
-    }
-  }
-
-  // Print results
-  std::cout << "[Comparison] Max difference: " << max_difference << std::endl;
-  int ret = 0;
-  if (max_difference > DIFFERENCE_THRESHOLD)
-  {
-    std::cout << "[Comparison] Outputs is not equal!" << std::endl;
-    ret = 1;
-  }
-  else
-  {
-    std::cout << "[Comparison] Outputs is equal!" << std::endl;
-  }
-  std::cout << "[Comparison] Done!" << std::endl;
-
-  return ret;
-}
diff --git a/tests/tools/tflite_run/CMakeLists.txt b/tests/tools/tflite_run/CMakeLists.txt
index 3f30d3e32..bbe199294 100644
--- a/tests/tools/tflite_run/CMakeLists.txt
+++ b/tests/tools/tflite_run/CMakeLists.txt
@@ -32,4 +32,4 @@ add_executable(tflite_test src/tflite_test.cc)
 ## Link test executable against gtest & gtest_main
 target_link_libraries(tflite_test gtest gtest_main ${LIB_PTHREAD})
 ## install test binary for packaging
-install(TARGETS tflite_test DESTINATION unittest_standalone)
+install(TARGETS tflite_test DESTINATION unittest)
diff --git a/tests/tools/tflite_run/src/bin_image.cc b/tests/tools/tflite_run/src/bin_image.cc
index 16d4c94f7..fadece045 100644
--- a/tests/tools/tflite_run/src/bin_image.cc
+++ b/tests/tools/tflite_run/src/bin_image.cc
@@ -20,7 +20,7 @@
 #include "bin_image.h"
 
 BinImage::BinImage(unsigned int width, unsigned int height, unsigned int channels)
-    : _width(width), _height(height), _channels(channels)
+  : _width(width), _height(height), _channels(channels)
 {
 }
 
diff --git a/tests/tools/tflite_run/src/tensor_dumper.cc b/tests/tools/tflite_run/src/tensor_dumper.cc
index 4ccd4e11a..86d37de5d 100644
--- a/tests/tools/tflite_run/src/tensor_dumper.cc
+++ b/tests/tools/tflite_run/src/tensor_dumper.cc
@@ -20,7 +20,7 @@
 #include <iostream>
 #include <cstring>
 
-#include "tensorflow/lite/interpreter.h"
+#include <tensorflow/lite/c/c_api.h>
 
 namespace TFLiteRun
 {
@@ -30,16 +30,31 @@ TensorDumper::TensorDumper()
   // DO NOTHING
 }
 
-void TensorDumper::addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices)
+void TensorDumper::addInputTensors(TfLiteInterpreter &interpreter)
 {
-  for (const auto &o : indices)
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(&interpreter);
+  for (int32_t idx = 0; idx < input_count; idx++)
   {
-    const TfLiteTensor *tensor = interpreter.tensor(o);
-    int size = tensor->bytes;
+    const TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(&interpreter, idx);
+    auto size = TfLiteTensorByteSize(tensor);
     std::vector<char> buffer;
     buffer.resize(size);
-    memcpy(buffer.data(), tensor->data.raw, size);
-    _tensors.emplace_back(o, std::move(buffer));
+    memcpy(buffer.data(), TfLiteTensorData(tensor), size);
+    _input_tensors.emplace_back(idx, std::move(buffer));
+  }
+}
+
+void TensorDumper::addOutputTensors(TfLiteInterpreter &interpreter)
+{
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(&interpreter);
+  for (int32_t idx = 0; idx < output_count; idx++)
+  {
+    const TfLiteTensor *tensor = TfLiteInterpreterGetOutputTensor(&interpreter, idx);
+    auto size = TfLiteTensorByteSize(tensor);
+    std::vector<char> buffer;
+    buffer.resize(size);
+    memcpy(buffer.data(), TfLiteTensorData(tensor), size);
+    _output_tensors.emplace_back(idx, std::move(buffer));
   }
 }
 
@@ -49,17 +64,30 @@ void TensorDumper::dump(const std::string &filename) const
   std::ofstream file(filename, std::ios::out | std::ios::binary);
 
   // Write number of tensors
-  uint32_t num_tensors = static_cast<uint32_t>(_tensors.size());
+  uint32_t num_tensors =
+    static_cast<uint32_t>(_input_tensors.size()) + static_cast<uint32_t>(_output_tensors.size());
   file.write(reinterpret_cast<const char *>(&num_tensors), sizeof(num_tensors));
 
-  // Write tensor indices
-  for (const auto &t : _tensors)
+  // Write input tensor indices
+  for (const auto &t : _input_tensors)
   {
     file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
   }
 
-  // Write data
-  for (const auto &t : _tensors)
+  // Write output tensor indices
+  for (const auto &t : _output_tensors)
+  {
+    file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
+  }
+
+  // Write input data
+  for (const auto &t : _input_tensors)
+  {
+    file.write(t._data.data(), t._data.size());
+  }
+
+  // Write output data
+  for (const auto &t : _output_tensors)
   {
     file.write(t._data.data(), t._data.size());
   }
diff --git a/tests/tools/tflite_run/src/tensor_dumper.h b/tests/tools/tflite_run/src/tensor_dumper.h
index 5fdcc54f7..5847c3971 100644
--- a/tests/tools/tflite_run/src/tensor_dumper.h
+++ b/tests/tools/tflite_run/src/tensor_dumper.h
@@ -17,6 +17,8 @@
 #ifndef __TFLITE_RUN_TENSOR_DUMPER_H__
 #define __TFLITE_RUN_TENSOR_DUMPER_H__
 
+#include <tensorflow/lite/c/c_api.h>
+
 #include <memory>
 #include <string>
 #include <vector>
@@ -42,11 +44,13 @@ private:
 
 public:
   TensorDumper();
-  void addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices);
+  void addInputTensors(TfLiteInterpreter &interpreter);
+  void addOutputTensors(TfLiteInterpreter &interpreter);
   void dump(const std::string &filename) const;
 
 private:
-  std::vector<Tensor> _tensors;
+  std::vector<Tensor> _input_tensors;
+  std::vector<Tensor> _output_tensors;
 };
 
 } // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/tensor_loader.cc b/tests/tools/tflite_run/src/tensor_loader.cc
index 93d9e2f54..ebd64470d 100644
--- a/tests/tools/tflite_run/src/tensor_loader.cc
+++ b/tests/tools/tflite_run/src/tensor_loader.cc
@@ -18,6 +18,7 @@
 
 #include <assert.h>
 
+#include <cstring>
 #include <fstream>
 
 #include "misc/tensor/Shape.h"
@@ -25,8 +26,8 @@
 namespace TFLiteRun
 {
 
-TensorLoader::TensorLoader(tflite::Interpreter &interpreter)
-    : _interpreter(interpreter), _raw_data(nullptr)
+TensorLoader::TensorLoader(TfLiteInterpreter &interpreter)
+  : _interpreter(interpreter), _raw_data(nullptr)
 {
 }
 
@@ -42,21 +43,20 @@ void TensorLoader::loadDumpedTensors(const std::string &filename)
 
   int tensor_indices_raw[num_tensors];
   file.read(reinterpret_cast<char *>(tensor_indices_raw), sizeof(tensor_indices_raw));
-  std::vector<int> tensor_indices(tensor_indices_raw, tensor_indices_raw + num_tensors);
 
   _raw_data = std::unique_ptr<float[]>(new float[file_size]);
   file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
   file.close();
 
-  size_t read_bytes = loadTensorsFromRawData(tensor_indices);
+  size_t read_bytes = loadInputTensorsFromRawData();
+  read_bytes += loadOutputTensorsFromRawData();
 
   // The file size and total output tensor size must match
   assert(file_size ==
          sizeof(num_tensors) + sizeof(tensor_indices_raw) + read_bytes * sizeof(float));
 }
 
-void TensorLoader::loadRawTensors(const std::string &filename,
-                                  const std::vector<int> &tensor_indices)
+void TensorLoader::loadRawInputTensors(const std::string &filename)
 {
   // TODO Handle file open/read error
   std::ifstream file(filename, std::ios::ate | std::ios::binary);
@@ -67,41 +67,74 @@ void TensorLoader::loadRawTensors(const std::string &filename,
   file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
   file.close();
 
-  size_t read_bytes = loadTensorsFromRawData(tensor_indices);
+  size_t read_bytes = loadInputTensorsFromRawData();
 
   // The file size and total output tensor size must match
   assert(file_size == read_bytes * sizeof(float));
 }
 
-size_t TensorLoader::loadTensorsFromRawData(const std::vector<int> &tensor_indices)
+size_t TensorLoader::loadInputTensorsFromRawData()
 {
   size_t offset = 0;
-  for (const auto &o : tensor_indices)
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(&_interpreter);
+  for (auto idx = 0; idx < input_count; idx++)
   {
-    const TfLiteTensor *tensor = _interpreter.tensor(o);
+    const TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(&_interpreter, idx);
 
     // Convert tensor shape to `Shape` from `tensor->dims`
-    nnfw::misc::tensor::Shape shape(static_cast<size_t>(tensor->dims->size));
-    for (int d = 0; d < tensor->dims->size; d++)
+    nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
+    for (int32_t d = 0; d < TfLiteTensorNumDims(tensor); d++)
     {
-      shape.dim(d) = tensor->dims->data[d];
+      shape.dim(d) = TfLiteTensorDim(tensor, d);
     }
 
     float *base = _raw_data.get() + offset;
 
-    assert(tensor->bytes % sizeof(float) == 0);
-    offset += (tensor->bytes / sizeof(float));
+    assert(TfLiteTensorByteSize(tensor) % sizeof(float) == 0);
+    offset += (TfLiteTensorByteSize(tensor) / sizeof(float));
 
-    _tensor_map.insert(std::make_pair(o, nnfw::tflite::TensorView<float>(shape, base)));
+    _input_tensor_map.emplace(idx, nnfw::tflite::TensorView<float>(shape, base));
+
+    memcpy(TfLiteTensorData(tensor), reinterpret_cast<const void *>(base),
+           TfLiteTensorByteSize(tensor));
+  }
+
+  return offset;
+}
+
+size_t TensorLoader::loadOutputTensorsFromRawData()
+{
+  size_t offset = 0;
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(&_interpreter);
+  for (auto idx = 0; idx < output_count; idx++)
+  {
+    const TfLiteTensor *tensor = TfLiteInterpreterGetOutputTensor(&_interpreter, idx);
+
+    // Convert tensor shape to `Shape` from `tensor->dims`
+    nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
+    for (int32_t d = 0; d < TfLiteTensorNumDims(tensor); d++)
+    {
+      shape.dim(d) = TfLiteTensorDim(tensor, d);
+    }
+
+    float *base = _raw_data.get() + offset;
+
+    assert(TfLiteTensorByteSize(tensor) % sizeof(float) == 0);
+    offset += (TfLiteTensorByteSize(tensor) / sizeof(float));
+
+    _output_tensor_map.emplace(idx, nnfw::tflite::TensorView<float>(shape, base));
+
+    memcpy(TfLiteTensorData(tensor), reinterpret_cast<const void *>(base),
+           TfLiteTensorByteSize(tensor));
   }
 
   return offset;
 }
 
-const nnfw::tflite::TensorView<float> &TensorLoader::get(int tensor_idx) const
+const nnfw::tflite::TensorView<float> &TensorLoader::getOutput(int tensor_idx) const
 {
-  auto found = _tensor_map.find(tensor_idx);
-  assert(found != _tensor_map.end());
+  auto found = _output_tensor_map.find(tensor_idx);
+  assert(found != _output_tensor_map.end());
   return found->second;
 }
 
diff --git a/tests/tools/tflite_run/src/tensor_loader.h b/tests/tools/tflite_run/src/tensor_loader.h
index ef51e0fd4..b9e6b72cb 100644
--- a/tests/tools/tflite_run/src/tensor_loader.h
+++ b/tests/tools/tflite_run/src/tensor_loader.h
@@ -17,13 +17,14 @@
 #ifndef __TFLITE_RUN_TENSOR_LOADER_H__
 #define __TFLITE_RUN_TENSOR_LOADER_H__
 
+#include "tflite/TensorView.h"
+
 #include <sys/mman.h>
 
+#include <memory>
 #include <string>
 #include <unordered_map>
 
-#include "tflite/TensorView.h"
-
 namespace tflite
 {
 class Interpreter;
@@ -35,17 +36,18 @@ namespace TFLiteRun
 class TensorLoader
 {
 public:
-  TensorLoader(tflite::Interpreter &interpreter);
+  TensorLoader(TfLiteInterpreter &interpreter);
   void loadDumpedTensors(const std::string &filename);
-  void loadRawTensors(const std::string &filename, const std::vector<int> &tensor_indices);
-  const nnfw::tflite::TensorView<float> &get(int tensor_idx) const;
-  size_t getNums() const { return _tensor_map.size(); }
+  void loadRawInputTensors(const std::string &filename);
+  const nnfw::tflite::TensorView<float> &getOutput(int tensor_idx) const;
 
 private:
-  size_t loadTensorsFromRawData(const std::vector<int> &tensor_indices);
-  tflite::Interpreter &_interpreter;
+  size_t loadInputTensorsFromRawData();
+  size_t loadOutputTensorsFromRawData();
+  TfLiteInterpreter &_interpreter;
   std::unique_ptr<float[]> _raw_data;
-  std::unordered_map<int, nnfw::tflite::TensorView<float>> _tensor_map;
+  std::unordered_map<int, nnfw::tflite::TensorView<float>> _input_tensor_map;
+  std::unordered_map<int, nnfw::tflite::TensorView<float>> _output_tensor_map;
 };
 
 } // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc
index e72966db5..a1e3d2eb6 100644
--- a/tests/tools/tflite_run/src/tflite_run.cc
+++ b/tests/tools/tflite_run/src/tflite_run.cc
@@ -14,9 +14,6 @@
  * limitations under the License.
  */
 
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-
 #include "args.h"
 #include "tensor_dumper.h"
 #include "tensor_loader.h"
@@ -26,8 +23,8 @@
 #include "tflite/Diff.h"
 #include "tflite/Assert.h"
 #include "tflite/Session.h"
+#include "tflite/RandomInputInitializer.h"
 #include "tflite/InterpreterSession.h"
-#include "tflite/NNAPISession.h"
 #include "misc/tensor/IndexIterator.h"
 #include "misc/tensor/Object.h"
 #include "benchmark.h"
@@ -54,31 +51,10 @@ void print_max_idx(float *f, int size)
 
 static const char *default_backend_cand = "tflite_cpu";
 
-// Verifies whether the model is a flatbuffer file.
-class BMFlatBufferVerifier : public tflite::TfLiteVerifier
-{
-public:
-  bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
-  {
-
-    flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
-    if (!tflite::VerifyModelBuffer(verifier))
-    {
-      reporter->Report("The model is not a valid Flatbuffer file");
-      return false;
-    }
-    return true;
-  }
-};
-
-} // namespace anonymous
+} // namespace
 
 int main(const int argc, char **argv)
 {
-  const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
-
-  StderrReporter error_reporter;
-
   TFLiteRun::Args args(argc, argv);
 
   std::chrono::milliseconds t_model_load(0), t_prepare(0);
@@ -86,33 +62,14 @@ int main(const int argc, char **argv)
   // TODO Apply verbose level to phases
   const int verbose = args.getVerboseLevel();
   benchmark::Phases phases(
-      benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+    benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
 
-  std::unique_ptr<FlatBufferModel> model;
-  std::unique_ptr<Interpreter> interpreter;
-  std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
+  TfLiteModel *model = nullptr;
 
   try
   {
     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      if (args.getModelValidate())
-      {
-        model = FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
-                                                        verifier.get(), &error_reporter);
-      }
-      else
-      {
-        model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
-      }
-      if (model == nullptr)
-      {
-        throw std::runtime_error{"Cannot create model"};
-      }
-
-      BuiltinOpResolver resolver;
-      InterpreterBuilder builder(*model, resolver);
-      TFLITE_ENSURE(builder(&interpreter))
-      interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
+      model = TfLiteModelCreateFromFile(args.getTFLiteFilename().c_str());
     });
   }
   catch (const std::exception &e)
@@ -121,17 +78,16 @@ int main(const int argc, char **argv)
     return 1;
   }
 
-  std::shared_ptr<nnfw::tflite::Session> sess;
-
-  if (use_nnapi)
-  {
-    sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
-  }
-  else
+  if (model == nullptr)
   {
-    sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+    throw std::runtime_error{"Cannot create model"};
   }
 
+  auto options = TfLiteInterpreterOptionsCreate();
+  TfLiteInterpreterOptionsSetNumThreads(options, nnfw::misc::EnvVar("THREAD").asInt(1));
+
+  TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
+  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter);
   try
   {
     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) { sess->prepare(); });
@@ -144,27 +100,28 @@ int main(const int argc, char **argv)
 
   if (args.getInputShapes().size() != 0)
   {
-    const int dim_values = args.getInputShapes().size();
-    int offset = 0;
+    const auto dim_values = args.getInputShapes().size();
+    int32_t offset = 0;
 
-    for (const auto &id : interpreter->inputs())
+    auto const input_count = TfLiteInterpreterGetInputTensorCount(interpreter);
+    for (int32_t id = 0; id < input_count; id++)
     {
-      TfLiteTensor *tensor = interpreter->tensor(id);
+      TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(interpreter, id);
       std::vector<int32_t> new_dim;
-      new_dim.resize(tensor->dims->size);
+      new_dim.resize(TfLiteTensorNumDims(tensor));
 
-      for (uint32_t axis = 0; axis < tensor->dims->size; axis++, offset++)
+      for (int32_t axis = 0; axis < TfLiteTensorNumDims(tensor); axis++, offset++)
       {
         new_dim[axis] =
-            ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]);
+          ((offset < dim_values) ? args.getInputShapes()[offset] : TfLiteTensorDim(tensor, axis));
       }
 
-      interpreter->ResizeInputTensor(id, new_dim);
+      TfLiteInterpreterResizeInputTensor(interpreter, id, new_dim.data(), new_dim.size());
 
       if (offset >= dim_values)
         break;
     }
-    interpreter->AllocateTensors();
+    TfLiteInterpreterAllocateTensors(interpreter);
   }
 
   TFLiteRun::TensorLoader tensor_loader(*interpreter);
@@ -175,101 +132,31 @@ int main(const int argc, char **argv)
   {
     if (!args.getInputFilename().empty())
     {
-      tensor_loader.loadRawTensors(args.getInputFilename(), interpreter->inputs());
+      tensor_loader.loadRawInputTensors(args.getInputFilename());
     }
     else
     {
       tensor_loader.loadDumpedTensors(args.getCompareFilename());
     }
-
-    for (const auto &o : interpreter->inputs())
-    {
-      const auto &tensor_view = tensor_loader.get(o);
-      TfLiteTensor *tensor = interpreter->tensor(o);
-
-      memcpy(reinterpret_cast<void *>(tensor->data.f),
-             reinterpret_cast<const void *>(tensor_view._base), tensor->bytes);
-    }
   }
   else
   {
     const int seed = 1; /* TODO Add an option for seed value */
     nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
 
-    // No input specified. So we fill the input tensors with random values.
-    for (const auto &o : interpreter->inputs())
-    {
-      TfLiteTensor *tensor = interpreter->tensor(o);
-      if (tensor->type == kTfLiteInt32)
-      {
-        // Generate singed 32-bit integer (s32) input
-        auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o);
-
-        int32_t value = 0;
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-            << [&](const nnfw::misc::tensor::Index &ind) {
-                 // TODO Generate random values
-                 // Gather operation: index should be within input coverage.
-                 tensor_view.at(ind) = value;
-                 value++;
-               };
-      }
-      else if (tensor->type == kTfLiteUInt8)
-      {
-        // Generate unsigned 8-bit integer input
-        auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
-
-        auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
-            const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-            &nnfw::misc::RandomGenerator::generate<uint8_t>);
-        const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
-                                                       std::bind(fp, randgen, _1, _2));
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-            << [&](const nnfw::misc::tensor::Index &ind) {
-                 const auto value = data.at(ind);
-                 tensor_view.at(ind) = value;
-               };
-      }
-      else if (tensor->type == kTfLiteBool)
-      {
-        // Generate bool input
-        auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
-
-        auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-            const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-            &nnfw::misc::RandomGenerator::generate<bool>);
-        const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
-                                                    std::bind(fp, randgen, _1, _2));
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-            << [&](const nnfw::misc::tensor::Index &ind) {
-                 const auto value = data.at(ind);
-                 tensor_view.at(ind) = value;
-               };
-      }
-      else
-      {
-        assert(tensor->type == kTfLiteFloat32);
-
-        const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
-        for (float *ptr = tensor->data.f; ptr < end; ptr++)
-        {
-          *ptr = randgen.generate<float>();
-        }
-      }
-    }
+    RandomInputInitializer initializer{randgen};
+    initializer.run(*interpreter);
   }
 
   TFLiteRun::TensorDumper tensor_dumper;
   // Must be called before `interpreter->Invoke()`
-  tensor_dumper.addTensors(*interpreter, interpreter->inputs());
+  tensor_dumper.addInputTensors(*interpreter);
 
   std::cout << "input tensor indices = [";
-  for (const auto &o : interpreter->inputs())
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(interpreter);
+  for (int32_t idx = 0; idx < input_count; idx++)
   {
-    std::cout << o << ",";
+    std::cout << idx << ",";
   }
   std::cout << "]" << std::endl;
 
@@ -277,40 +164,42 @@ int main(const int argc, char **argv)
   // only warmup.
   if (verbose == 0)
   {
-    phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
-               args.getWarmupRuns());
-    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
-               args.getNumRuns(), true);
+    phases.run(
+      "WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, args.getWarmupRuns());
+    phases.run(
+      "EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, args.getNumRuns(), true);
   }
   else
   {
-    phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
-               [&](const benchmark::Phase &phase, uint32_t nth) {
-                 std::cout << "... "
-                           << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                           << std::endl;
-               },
-               args.getWarmupRuns());
-    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
-               [&](const benchmark::Phase &phase, uint32_t nth) {
-                 std::cout << "... "
-                           << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                           << std::endl;
-               },
-               args.getNumRuns(), true);
+    phases.run(
+      "WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
+      [&](const benchmark::Phase &phase, uint32_t nth) {
+        std::cout << "... "
+                  << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+                  << std::endl;
+      },
+      args.getWarmupRuns());
+    phases.run(
+      "EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
+      [&](const benchmark::Phase &phase, uint32_t nth) {
+        std::cout << "... "
+                  << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" << std::endl;
+      },
+      args.getNumRuns(), true);
   }
 
   sess->teardown();
 
   // Must be called after `interpreter->Invoke()`
-  tensor_dumper.addTensors(*interpreter, interpreter->outputs());
+  tensor_dumper.addOutputTensors(*interpreter);
 
   std::cout << "output tensor indices = [";
-  for (const auto &o : interpreter->outputs())
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(interpreter);
+  for (int32_t idx = 0; idx < output_count; idx++)
   {
-    std::cout << o << "(";
-
-    print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
+    auto tensor = TfLiteInterpreterGetOutputTensor(interpreter, idx);
+    print_max_idx(reinterpret_cast<float *>(TfLiteTensorData(tensor)),
+                  TfLiteTensorByteSize(tensor) / sizeof(float));
 
     std::cout << "),";
   }
@@ -374,12 +263,13 @@ int main(const int argc, char **argv)
     TfLiteInterpMatchApp app(comparator);
     bool res = true;
 
-    for (const auto &o : interpreter->outputs())
+    for (int32_t idx = 0; idx < output_count; idx++)
     {
-      auto expected = tensor_loader.get(o);
-      auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o);
+      auto expected = tensor_loader.getOutput(idx);
+      auto const tensor = TfLiteInterpreterGetOutputTensor(interpreter, idx);
+      auto obtained = nnfw::tflite::TensorView<float>::make(tensor);
 
-      res = res && app.compareSingleTensorView(expected, obtained, o);
+      res = res && app.compareSingleTensorView(expected, obtained, idx);
     }
 
     if (!res)
diff --git a/tests/tools/tflite_vanilla_run/CMakeLists.txt b/tests/tools/tflite_vanilla_run/CMakeLists.txt
deleted file mode 100644
index 19e21e923..000000000
--- a/tests/tools/tflite_vanilla_run/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-if(NOT BUILD_TFLITE_VANILLA_RUN)
-  return()
-endif()
-
-if(NOT BUILD_TENSORFLOW_LITE_2_3_0)
-  set(BUILD_TENSORFLOW_LITE_2_3_0 ON)
-endif()
-
-nnfw_find_package(TensorFlowLite-2.3.0 REQUIRED)
-nnfw_find_package(Boost REQUIRED)
-
-list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
-list(APPEND TFLITE_RUN_SRCS "src/args.cc")
-
-add_executable(tflite_vanilla_run ${TFLITE_RUN_SRCS})
-target_include_directories(tflite_vanilla_run PRIVATE src)
-target_include_directories(tflite_vanilla_run PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_vanilla_run tensorflow-lite-2.3.0 ${LIB_PTHREAD} dl)
-target_link_libraries(tflite_vanilla_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(tflite_vanilla_run nnfw_lib_benchmark nnfw_lib_misc)
-
-install(TARGETS tflite_vanilla_run DESTINATION bin)
diff --git a/tests/tools/tflite_vanilla_run/src/args.cc b/tests/tools/tflite_vanilla_run/src/args.cc
deleted file mode 100644
index dc9f250e4..000000000
--- a/tests/tools/tflite_vanilla_run/src/args.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-namespace TFLiteVanillaRun
-{
-
-Args::Args(const int argc, char **argv) noexcept
-{
-  try
-  {
-    Initialize();
-    Parse(argc, argv);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "error during paring args" << e.what() << '\n';
-    exit(1);
-  }
-}
-
-void Args::Initialize(void)
-{
-  try
-  {
-    // General options
-    po::options_description general("General options");
-
-    // clang-format off
-  general.add_options()
-    ("help,h", "Display available options")
-    ("input,i", po::value<std::string>()->default_value(""), "Input filename")
-    ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
-    ("ishapes", po::value<std::vector<int>>()->multitoken(), "Input shapes")
-    ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with")
-    ("tflite", po::value<std::string>()->required())
-    ("num_runs,r", po::value<int>()->default_value(1), "The number of runs")
-    ("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1), "Delay time(ms) between runs (as default no delay")
-    ("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
-    ("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
-    ("write_report,p", po::value<bool>()->default_value(false), "Write report")
-    ("validate", po::value<bool>()->default_value(true), "Validate tflite model")
-    ("verbose_level,v", po::value<int>()->default_value(0), "Verbose level\n"
-         "0: prints the only result. Messages btw run don't print\n"
-         "1: prints result and message btw run\n"
-         "2: prints all of messages to print\n")
-    ;
-    // clang-format on
-
-    _options.add(general);
-    _positional.add("tflite", 1);
-  }
-  catch (const std::bad_cast &e)
-  {
-    std::cerr << "error by bad cast during initialization of boost::program_options" << e.what()
-              << '\n';
-    exit(1);
-  }
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-
-  {
-    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
-      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
-      {
-        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
-                                            "' cannot be given at once.");
-      }
-    };
-
-    conflicting_options("input", "compare");
-  }
-
-  if (vm.count("help"))
-  {
-    std::cout << "tflite_run\n\n";
-    std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
-    std::cout << _options;
-    std::cout << "\n";
-
-    exit(0);
-  }
-
-  po::notify(vm);
-
-  if (vm.count("dump"))
-  {
-    _dump_filename = vm["dump"].as<std::string>();
-  }
-
-  if (vm.count("compare"))
-  {
-    _compare_filename = vm["compare"].as<std::string>();
-  }
-
-  if (vm.count("input"))
-  {
-    _input_filename = vm["input"].as<std::string>();
-
-    if (!_input_filename.empty())
-    {
-      if (access(_input_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "input image file not found: " << _input_filename << "\n";
-      }
-    }
-  }
-
-  if (vm.count("ishapes"))
-  {
-    _input_shapes.resize(vm["ishapes"].as<std::vector<int>>().size());
-    for (auto i = 0; i < _input_shapes.size(); i++)
-    {
-      _input_shapes[i] = vm["ishapes"].as<std::vector<int>>()[i];
-    }
-  }
-
-  if (vm.count("tflite"))
-  {
-    _tflite_filename = vm["tflite"].as<std::string>();
-
-    if (_tflite_filename.empty())
-    {
-      // TODO Print usage instead of the below message
-      std::cerr << "Please specify tflite file. Run with `--help` for usage."
-                << "\n";
-
-      exit(1);
-    }
-    else
-    {
-      if (access(_tflite_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
-        exit(1);
-      }
-    }
-  }
-
-  if (vm.count("num_runs"))
-  {
-    _num_runs = vm["num_runs"].as<int>();
-  }
-
-  if (vm.count("warmup_runs"))
-  {
-    _warmup_runs = vm["warmup_runs"].as<int>();
-  }
-
-  if (vm.count("run_delay"))
-  {
-    _run_delay = vm["run_delay"].as<int>();
-  }
-
-  if (vm.count("gpumem_poll"))
-  {
-    _gpumem_poll = vm["gpumem_poll"].as<bool>();
-  }
-
-  if (vm.count("mem_poll"))
-  {
-    _mem_poll = vm["mem_poll"].as<bool>();
-    // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
-    if (_mem_poll && _warmup_runs == 0)
-    {
-      _warmup_runs = 1;
-    }
-  }
-
-  if (vm.count("write_report"))
-  {
-    _write_report = vm["write_report"].as<bool>();
-  }
-
-  if (vm.count("validate"))
-  {
-    _tflite_validate = vm["validate"].as<bool>();
-  }
-
-  if (vm.count("verbose_level"))
-  {
-    _verbose_level = vm["verbose_level"].as<int>();
-  }
-}
-
-} // end of namespace TFLiteVanillaRun
diff --git a/tests/tools/tflite_vanilla_run/src/args.h b/tests/tools/tflite_vanilla_run/src/args.h
deleted file mode 100644
index 3605b651c..000000000
--- a/tests/tools/tflite_vanilla_run/src/args.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLITE_VANILLA_RUN_ARGS_H__
-#define __TFLITE_VANILLA_RUN_ARGS_H__
-
-#include <string>
-#include <boost/program_options.hpp>
-
-namespace po = boost::program_options;
-
-namespace TFLiteVanillaRun
-{
-
-class Args
-{
-public:
-  Args(const int argc, char **argv) noexcept;
-  void print(void);
-
-  const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
-  const std::string &getDumpFilename(void) const { return _dump_filename; }
-  const std::string &getCompareFilename(void) const { return _compare_filename; }
-  const std::string &getInputFilename(void) const { return _input_filename; }
-  const std::vector<int> &getInputShapes(void) const { return _input_shapes; }
-  const int getNumRuns(void) const { return _num_runs; }
-  const int getWarmupRuns(void) const { return _warmup_runs; }
-  const int getRunDelay(void) const { return _run_delay; }
-  const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
-  const bool getMemoryPoll(void) const { return _mem_poll; }
-  const bool getWriteReport(void) const { return _write_report; }
-  const bool getModelValidate(void) const { return _tflite_validate; }
-  const int getVerboseLevel(void) const { return _verbose_level; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::positional_options_description _positional;
-  po::options_description _options;
-
-  std::string _tflite_filename;
-  std::string _dump_filename;
-  std::string _compare_filename;
-  std::string _input_filename;
-  std::vector<int> _input_shapes;
-  int _num_runs;
-  int _warmup_runs;
-  int _run_delay;
-  bool _gpumem_poll;
-  bool _mem_poll;
-  bool _write_report;
-  bool _tflite_validate;
-  int _verbose_level;
-};
-
-} // end of namespace TFLiteVanillaRun
-
-#endif // __TFLITE_VANILLA_RUN_ARGS_H__
diff --git a/tests/tools/tflite_vanilla_run/src/tensor_view.h b/tests/tools/tflite_vanilla_run/src/tensor_view.h
deleted file mode 100644
index ca04a051e..000000000
--- a/tests/tools/tflite_vanilla_run/src/tensor_view.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     TensorView.h
- * @brief    This file contains TensorView class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
-#define __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
-
-#include "tensorflow/lite/interpreter.h"
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-#include "misc/tensor/Reader.h"
-#include "misc/tensor/NonIncreasingStride.h"
-
-namespace TFLiteVanillaRun
-{
-
-/**
- * @brief Class to define TensorView which is inherited from nnfw::misc::tensor::Reader<T> class
- */
-template <typename T> class TensorView final : public nnfw::misc::tensor::Reader<T>
-{
-public:
-  /**
-   * @brief Construct a TensorView object with base and shape informations
-   * @param[in] shape The shape of a tensor
-   * @param[in] base The base address of a tensor
-   */
-  TensorView(const nnfw::misc::tensor::Shape &shape, T *base) : _shape{shape}, _base{base}
-  {
-    // Set 'stride'
-    _stride.init(_shape);
-  }
-
-public:
-  /**
-   * @brief Get shape of tensor
-   * @return Reference of shape
-   */
-  const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
-
-public:
-  /**
-   * @brief Get value of tensor index
-   * @param[in] index The tensor index
-   * @return The value at the index
-   */
-  T at(const nnfw::misc::tensor::Index &index) const override
-  {
-    const auto offset = _stride.offset(index);
-    return *(_base + offset);
-  }
-
-public:
-  /**
-   * @brief Get reference value of tensor index
-   * @param[in] index The tensor index
-   * @return The reference value at the index
-   */
-  T &at(const nnfw::misc::tensor::Index &index)
-  {
-    const auto offset = _stride.offset(index);
-    return *(_base + offset);
-  }
-
-private:
-  nnfw::misc::tensor::Shape _shape; /**< The tensor shape */
-
-public:
-  T *_base;                                        /**< The base address of tensor */
-  nnfw::misc::tensor::NonIncreasingStride _stride; /**< The NonIncreasingStride object */
-
-public:
-  // TODO Introduce Operand ID class
-  /**
-   * @brief Create TensorView object using given parameters
-   * @param[in] interp The TfLite interpreter
-   * @param[in] tensor_index The tensor index
-   * @return The new TensorView<T> object
-   */
-  static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index)
-  {
-    auto tensor_ptr = interp.tensor(tensor_index);
-
-    // Set 'shape'
-    nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size);
-
-    for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-    {
-      shape.dim(axis) = tensor_ptr->dims->data[axis];
-    }
-
-    return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index));
-  }
-};
-
-} // namespace TFLiteVanillaRun
-
-#endif // __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
diff --git a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
deleted file mode 100644
index d44ea60cf..000000000
--- a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/kernels/register.h"
-
-#include "args.h"
-#include "tensor_view.h"
-#include "misc/EnvVar.h"
-#include "misc/RandomGenerator.h"
-#include "misc/tensor/IndexIterator.h"
-#include "misc/tensor/Object.h"
-#include "benchmark.h"
-
-#include <iostream>
-#include <chrono>
-#include <algorithm>
-#include <vector>
-#include <memory>
-
-using namespace std::placeholders; // for _1, _2 ...
-
-#define TFLITE_ENSURE(exp)                                             \
-  {                                                                    \
-    const TfLiteStatus status = (exp);                                 \
-                                                                       \
-    if (status != kTfLiteOk)                                           \
-    {                                                                  \
-      std::ostringstream ss;                                           \
-      ss << #exp << " failed (" << __FILE__ << ":" << __LINE__ << ")"; \
-      throw std::runtime_error{ss.str()};                              \
-    }                                                                  \
-  }
-
-namespace
-{
-
-void print_max_idx(float *f, int size)
-{
-  float *p = std::max_element(f, f + size);
-  std::cout << "max:" << p - f;
-}
-
-static const char *default_backend_cand = "tflite_cpu";
-
-// Verifies whether the model is a flatbuffer file.
-class BMFlatBufferVerifier : public tflite::TfLiteVerifier
-{
-public:
-  bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
-  {
-
-    flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
-    if (!tflite::VerifyModelBuffer(verifier))
-    {
-      reporter->Report("The model is not a valid Flatbuffer file");
-      return false;
-    }
-    return true;
-  }
-};
-
-} // namespace anonymous
-
-int main(const int argc, char **argv)
-{
-  tflite::StderrReporter error_reporter;
-
-  TFLiteVanillaRun::Args args(argc, argv);
-
-  std::chrono::milliseconds t_model_load(0), t_prepare(0);
-
-  // TODO Apply verbose level to phases
-  const int verbose = args.getVerboseLevel();
-  benchmark::Phases phases(
-      benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
-
-  std::unique_ptr<tflite::FlatBufferModel> model;
-  std::unique_ptr<tflite::Interpreter> interpreter;
-  std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
-
-  try
-  {
-    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      if (args.getModelValidate())
-      {
-        model = tflite::FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
-                                                                verifier.get(), &error_reporter);
-      }
-      else
-      {
-        model = tflite::FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(),
-                                                       &error_reporter);
-      }
-      if (model == nullptr)
-      {
-        throw std::runtime_error{"Cannot create model"};
-      }
-
-      // Use tflite's resolver, not onert's one
-      tflite::ops::builtin::BuiltinOpResolver resolver;
-      tflite::InterpreterBuilder builder(*model, resolver);
-      TFLITE_ENSURE(builder(&interpreter))
-      interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
-    });
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << '\n';
-    return 1;
-  }
-
-  const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
-
-  try
-  {
-    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
-      interpreter->UseNNAPI(use_nnapi);
-      interpreter->AllocateTensors();
-    });
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << '\n';
-    return 1;
-  }
-
-  const int seed = 1; /* TODO Add an option for seed value */
-  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-
-  // No input specified. So we fill the input tensors with random values.
-  for (const auto &o : interpreter->inputs())
-  {
-    TfLiteTensor *tensor = interpreter->tensor(o);
-    if (tensor->type == kTfLiteInt32)
-    {
-      // Generate singed 32-bit integer (s32) input
-      auto tensor_view = TFLiteVanillaRun::TensorView<int32_t>::make(*interpreter, o);
-
-      int32_t value = 0;
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               // TODO Generate random values
-               // Gather operation: index should be within input coverage.
-               tensor_view.at(ind) = value;
-               value++;
-             };
-    }
-    else if (tensor->type == kTfLiteUInt8)
-    {
-      // Generate unsigned 8-bit integer input
-      auto tensor_view = TFLiteVanillaRun::TensorView<uint8_t>::make(*interpreter, o);
-
-      uint8_t value = 0;
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               // TODO Generate random values
-               tensor_view.at(ind) = value;
-               value = (value + 1) & 0xFF;
-             };
-    }
-    else if (tensor->type == kTfLiteBool)
-    {
-      // Generate bool input
-      auto tensor_view = TFLiteVanillaRun::TensorView<bool>::make(*interpreter, o);
-
-      auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-          const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-          &nnfw::misc::RandomGenerator::generate<bool>);
-      const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
-                                                  std::bind(fp, randgen, _1, _2));
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               const auto value = data.at(ind);
-               tensor_view.at(ind) = value;
-             };
-    }
-    else
-    {
-      assert(tensor->type == kTfLiteFloat32);
-
-      const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
-      for (float *ptr = tensor->data.f; ptr < end; ptr++)
-      {
-        *ptr = randgen.generate<float>();
-      }
-    }
-  }
-
-  std::cout << "input tensor indices = [";
-  for (const auto &o : interpreter->inputs())
-  {
-    std::cout << o << ",";
-  }
-  std::cout << "]" << std::endl;
-
-  // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
-  // only warmup.
-  if (verbose == 0)
-  {
-    phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-               args.getWarmupRuns());
-    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-               args.getNumRuns(), true);
-  }
-  else
-  {
-    phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-               [&](const benchmark::Phase &phase, uint32_t nth) {
-                 std::cout << "... "
-                           << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                           << std::endl;
-               },
-               args.getWarmupRuns());
-    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-               [&](const benchmark::Phase &phase, uint32_t nth) {
-                 std::cout << "... "
-                           << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                           << std::endl;
-               },
-               args.getNumRuns(), true);
-  }
-
-  std::cout << "output tensor indices = [";
-  for (const auto &o : interpreter->outputs())
-  {
-    std::cout << o << "(";
-
-    print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
-
-    std::cout << "),";
-  }
-  std::cout << "]" << std::endl;
-
-  // TODO Apply verbose level to result
-
-  // prepare result
-  benchmark::Result result(phases);
-
-  // to stdout
-  benchmark::printResult(result);
-
-  if (args.getWriteReport())
-  {
-    // prepare csv task
-    std::string exec_basename;
-    std::string model_basename;
-    std::string backend_name = default_backend_cand;
-    {
-      std::vector<char> vpath(args.getTFLiteFilename().begin(), args.getTFLiteFilename().end() + 1);
-      model_basename = basename(vpath.data());
-      size_t lastindex = model_basename.find_last_of(".");
-      model_basename = model_basename.substr(0, lastindex);
-      exec_basename = basename(argv[0]);
-    }
-    benchmark::writeResult(result, exec_basename, model_basename, backend_name);
-  }
-
-  return 0;
-}
diff --git a/tools/cross/aarch64/sources.list.focal b/tools/cross/aarch64/sources.list.focal
new file mode 100644
index 000000000..4de2600c1
--- /dev/null
+++ b/tools/cross/aarch64/sources.list.focal
@@ -0,0 +1,11 @@
+deb http://ports.ubuntu.com/ubuntu-ports/ focal main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ focal main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ focal-updates main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-updates main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ focal-backports main restricted
+deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-backports main restricted
+
+deb http://ports.ubuntu.com/ubuntu-ports/ focal-security main restricted universe multiverse
+deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-security main restricted universe multiverse
diff --git a/tools/cross/aarch64/sources.list.jammy b/tools/cross/aarch64/sources.list.jammy
new file mode 100644
index 000000000..6bb045302
--- /dev/null
+++ b/tools/cross/aarch64/sources.list.jammy
@@ -0,0 +1,11 @@
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
diff --git a/tools/cross/aarch64/sources.list.trusty b/tools/cross/aarch64/sources.list.trusty
deleted file mode 100644
index 8aa98a259..000000000
--- a/tools/cross/aarch64/sources.list.trusty
+++ /dev/null
@@ -1,11 +0,0 @@
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
diff --git a/tools/cross/aarch64/sources.list.xenial b/tools/cross/aarch64/sources.list.xenial
deleted file mode 100644
index 56fbb36a5..000000000
--- a/tools/cross/aarch64/sources.list.xenial
+++ /dev/null
@@ -1,11 +0,0 @@
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse
diff --git a/tools/cross/arm/sources.list.focal b/tools/cross/arm/sources.list.focal
new file mode 100644
index 000000000..4de2600c1
--- /dev/null
+++ b/tools/cross/arm/sources.list.focal
@@ -0,0 +1,11 @@
+deb http://ports.ubuntu.com/ubuntu-ports/ focal main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ focal main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ focal-updates main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-updates main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ focal-backports main restricted
+deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-backports main restricted
+
+deb http://ports.ubuntu.com/ubuntu-ports/ focal-security main restricted universe multiverse
+deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-security main restricted universe multiverse
diff --git a/tools/cross/arm/sources.list.jammy b/tools/cross/arm/sources.list.jammy
new file mode 100644
index 000000000..6bb045302
--- /dev/null
+++ b/tools/cross/arm/sources.list.jammy
@@ -0,0 +1,11 @@
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
diff --git a/tools/cross/arm/sources.list.trusty b/tools/cross/arm/sources.list.trusty
deleted file mode 100644
index 8aa98a259..000000000
--- a/tools/cross/arm/sources.list.trusty
+++ /dev/null
@@ -1,11 +0,0 @@
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ trusty-security main restricted universe multiverse
diff --git a/tools/cross/arm/sources.list.xenial b/tools/cross/arm/sources.list.xenial
deleted file mode 100644
index 56fbb36a5..000000000
--- a/tools/cross/arm/sources.list.xenial
+++ /dev/null
@@ -1,11 +0,0 @@
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse
diff --git a/tools/cross/armel/tizen-build-rootfs.sh b/tools/cross/armel/tizen-build-rootfs.sh
deleted file mode 100755
index 2d0084d03..000000000
--- a/tools/cross/armel/tizen-build-rootfs.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-__ARM_SOFTFP_CrossDir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
-
-if [[ -z "$ROOTFS_DIR" ]]; then
-    echo "ROOTFS_DIR is not defined."
-    exit 1;
-fi
-
-# Clean-up (TODO-Cleanup: We may already delete  $ROOTFS_DIR at ./cross/build-rootfs.sh.)
-# hk0110
-if [ -d "$ROOTFS_DIR" ]; then
-    umount $ROOTFS_DIR/*
-    rm -rf $ROOTFS_DIR
-fi
-
-TIZEN_TMP_DIR=$ROOTFS_DIR/tizen_tmp
-mkdir -p $TIZEN_TMP_DIR
-
-# Download files
-echo ">>Start downloading files"
-VERBOSE=1 $__ARM_SOFTFP_CrossDir/tizen-fetch.sh $TIZEN_TMP_DIR
-echo "<<Finish downloading files"
-
-echo ">>Start constructing Tizen rootfs"
-TIZEN_RPM_FILES=`ls $TIZEN_TMP_DIR/*.rpm`
-cd $ROOTFS_DIR
-for f in $TIZEN_RPM_FILES; do
-    rpm2cpio $f  | cpio -idm --quiet
-done
-echo "<<Finish constructing Tizen rootfs"
-
-# Cleanup tmp
-rm -rf $TIZEN_TMP_DIR
-
-# Configure Tizen rootfs
-echo ">>Start configuring Tizen rootfs"
-rm ./usr/lib/libunwind.so
-ln -s libunwind.so.8 ./usr/lib/libunwind.so
-ln -sfn asm-arm ./usr/include/asm
-patch -p1 < $__ARM_SOFTFP_CrossDir/tizen.patch
-echo "<<Finish configuring Tizen rootfs"
diff --git a/tools/cross/armel/tizen-fetch.sh b/tools/cross/armel/tizen-fetch.sh
deleted file mode 100755
index 51fbe81b3..000000000
--- a/tools/cross/armel/tizen-fetch.sh
+++ /dev/null
@@ -1,169 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-if [[ -z "${VERBOSE// }" ]] || [ "$VERBOSE" -ne "$VERBOSE" ] 2>/dev/null; then
-	VERBOSE=0
-fi
-
-Log()
-{
-	if [ $VERBOSE -ge $1 ]; then
-		echo ${@:2}
-	fi
-}
-
-Inform()
-{
-	Log 1 -e "\x1B[0;34m$@\x1B[m"
-}
-
-Debug()
-{
-	Log 2 -e "\x1B[0;32m$@\x1B[m"
-}
-
-Error()
-{
-	>&2 Log 0 -e "\x1B[0;31m$@\x1B[m"
-}
-
-Fetch()
-{
-	URL=$1
-	FILE=$2
-	PROGRESS=$3
-	if [ $VERBOSE -ge 1 ] && [ $PROGRESS ]; then
-		CURL_OPT="--progress-bar"
-	else
-		CURL_OPT="--silent"
-	fi
-	curl $CURL_OPT $URL > $FILE
-}
-
-hash curl 2> /dev/null || { Error "Require 'curl' Aborting."; exit 1; }
-hash xmllint 2> /dev/null || { Error "Require 'xmllint' Aborting."; exit 1; }
-hash sha256sum 2> /dev/null || { Error "Require 'sha256sum' Aborting."; exit 1; }
-
-TMPDIR=$1
-if [ ! -d $TMPDIR ]; then
-	TMPDIR=./tizen_tmp
-	Debug "Create temporary directory : $TMPDIR"
-	mkdir -p $TMPDIR
-fi
-
-TIZEN_URL=http://download.tizen.org/releases/daily/tizen
-BUILD_XML=build.xml
-REPOMD_XML=repomd.xml
-PRIMARY_XML=primary.xml
-TARGET_URL="http://__not_initialized"
-
-Xpath_get()
-{
-	XPATH_RESULT=''
-	XPATH=$1
-	XML_FILE=$2
-	RESULT=$(xmllint --xpath $XPATH $XML_FILE)
-	if [[ -z ${RESULT// } ]]; then
-		Error "Can not find target from $XML_FILE"
-		Debug "Xpath = $XPATH"
-		exit 1
-	fi
-	XPATH_RESULT=$RESULT
-}
-
-fetch_tizen_pkgs_init()
-{
-	TARGET=$1
-	PROFILE=$2
-	Debug "Initialize TARGET=$TARGET, PROFILE=$PROFILE"
-
-	TMP_PKG_DIR=$TMPDIR/tizen_${PROFILE}_pkgs
-	if [ -d $TMP_PKG_DIR ]; then rm -rf $TMP_PKG_DIR; fi
-	mkdir -p $TMP_PKG_DIR
-
-	PKG_URL=$TIZEN_URL/$PROFILE/latest
-
-	BUILD_XML_URL=$PKG_URL/$BUILD_XML
-	TMP_BUILD=$TMP_PKG_DIR/$BUILD_XML
-	TMP_REPOMD=$TMP_PKG_DIR/$REPOMD_XML
-	TMP_PRIMARY=$TMP_PKG_DIR/$PRIMARY_XML
-	TMP_PRIMARYGZ=${TMP_PRIMARY}.gz
-
-	Fetch $BUILD_XML_URL $TMP_BUILD
-
-	Debug "fetch $BUILD_XML_URL to $TMP_BUILD"
-
-	TARGET_XPATH="//build/buildtargets/buildtarget[@name=\"$TARGET\"]/repo[@type=\"binary\"]/text()"
-	Xpath_get $TARGET_XPATH $TMP_BUILD
-	TARGET_PATH=$XPATH_RESULT
-	TARGET_URL=$PKG_URL/$TARGET_PATH
-
-	REPOMD_URL=$TARGET_URL/repodata/repomd.xml
-	PRIMARY_XPATH='string(//*[local-name()="data"][@type="primary"]/*[local-name()="location"]/@href)'
-
-	Fetch $REPOMD_URL $TMP_REPOMD
-
-	Debug "fetch $REPOMD_URL to $TMP_REPOMD"
-
-	Xpath_get $PRIMARY_XPATH $TMP_REPOMD
-	PRIMARY_XML_PATH=$XPATH_RESULT
-	PRIMARY_URL=$TARGET_URL/$PRIMARY_XML_PATH
-
-	Fetch $PRIMARY_URL $TMP_PRIMARYGZ
-
-	Debug "fetch $PRIMARY_URL to $TMP_PRIMARYGZ"
-
-	gunzip $TMP_PRIMARYGZ
-
-	Debug "unzip $TMP_PRIMARYGZ to $TMP_PRIMARY"
-}
-
-fetch_tizen_pkgs()
-{
-	ARCH=$1
-	PACKAGE_XPATH_TPL='string(//*[local-name()="metadata"]/*[local-name()="package"][*[local-name()="name"][text()="_PKG_"]][*[local-name()="arch"][text()="_ARCH_"]]/*[local-name()="location"]/@href)'
-
-	PACKAGE_CHECKSUM_XPATH_TPL='string(//*[local-name()="metadata"]/*[local-name()="package"][*[local-name()="name"][text()="_PKG_"]][*[local-name()="arch"][text()="_ARCH_"]]/*[local-name()="checksum"]/text())'
-
-	for pkg in ${@:2}
-	do
-		Inform "Fetching... $pkg"
-		XPATH=${PACKAGE_XPATH_TPL/_PKG_/$pkg}
-		XPATH=${XPATH/_ARCH_/$ARCH}
-		Xpath_get $XPATH $TMP_PRIMARY
-		PKG_PATH=$XPATH_RESULT
-
-		XPATH=${PACKAGE_CHECKSUM_XPATH_TPL/_PKG_/$pkg}
-		XPATH=${XPATH/_ARCH_/$ARCH}
-		Xpath_get $XPATH $TMP_PRIMARY
-		CHECKSUM=$XPATH_RESULT
-
-		PKG_URL=$TARGET_URL/$PKG_PATH
-		PKG_FILE=$(basename $PKG_PATH)
-		PKG_PATH=$TMPDIR/$PKG_FILE
-
-		Debug "Download $PKG_URL to $PKG_PATH"
-		Fetch $PKG_URL $PKG_PATH true
-
-		echo "$CHECKSUM $PKG_PATH" | sha256sum -c - > /dev/null
-		if [ $? -ne 0 ]; then
-			Error "Fail to fetch $PKG_URL to $PKG_PATH"
-			Debug "Checksum = $CHECKSUM"
-			exit 1
-		fi
-	done
-}
-
-Inform "Initialize arm base"
-fetch_tizen_pkgs_init standard base
-Inform "fetch common packages"
-fetch_tizen_pkgs armv7l glibc glibc-devel
-fetch_tizen_pkgs noarch linux-glibc-devel
-fetch_tizen_pkgs armv7l libgcc libstdc++ libstdc++-devel libunwind libunwind-devel
-
-
-Inform "Initialize standard unified"
-fetch_tizen_pkgs_init standard unified
-Inform "fetch nnfw packages"
-fetch_tizen_pkgs armv7l gmock-devel tensorflow-lite-devel boost-devel libarmcl-devel boost-program-options boost-system boost-filesystem gmock libarmcl
-
diff --git a/tools/cross/armel/tizen.patch b/tools/cross/armel/tizen.patch
deleted file mode 100644
index d223427c9..000000000
--- a/tools/cross/armel/tizen.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-diff -u -r a/usr/lib/libc.so b/usr/lib/libc.so
---- a/usr/lib/libc.so	2016-12-30 23:00:08.284951863 +0900
-+++ b/usr/lib/libc.so	2016-12-30 23:00:32.140951815 +0900
-@@ -2,4 +2,4 @@
-    Use the shared library, but some functions are only in
-    the static library, so try that secondarily.  */
- OUTPUT_FORMAT(elf32-littlearm)
--GROUP ( /lib/libc.so.6 /usr/lib/libc_nonshared.a  AS_NEEDED ( /lib/ld-linux.so.3 ) )
-+GROUP ( libc.so.6 libc_nonshared.a  AS_NEEDED ( ld-linux.so.3 ) )
-diff -u -r a/usr/lib/libpthread.so b/usr/lib/libpthread.so
---- a/usr/lib/libpthread.so	2016-12-30 23:00:19.408951841 +0900
-+++ b/usr/lib/libpthread.so	2016-12-30 23:00:39.068951801 +0900
-@@ -2,4 +2,4 @@
-    Use the shared library, but some functions are only in
-    the static library, so try that secondarily.  */
- OUTPUT_FORMAT(elf32-littlearm)
--GROUP ( /lib/libpthread.so.0 /usr/lib/libpthread_nonshared.a )
-+GROUP ( libpthread.so.0 libpthread_nonshared.a )
diff --git a/tools/cross/install_rootfs.sh b/tools/cross/install_rootfs.sh
index f6c59b9c5..7af7bf711 100755
--- a/tools/cross/install_rootfs.sh
+++ b/tools/cross/install_rootfs.sh
@@ -1,12 +1,9 @@
 #!/usr/bin/env bash
-set -x
-
 usage()
 {
     echo "Usage: $0 [BuildArch] [LinuxCodeName] [--setproxy=IP] [--skipunmount]"
-    echo "BuildArch can be: arm(default), aarch64 and armel"
-    echo "LinuxCodeName - optional, Code name for Linux, can be: xenial(default), trusty"
-    echo "                          If BuildArch is armel, this can be tizen(default)"
+    echo "BuildArch can be: arm(default), aarch64"
+    echo "LinuxCodeName - optional, Code name for Linux, can be: bionic, focal(default), jammy"
     echo "--setproxy=IP - optional, IP is the proxy server IP address or url with portnumber"
     echo "                           default no proxy. Example: --setproxy=127.1.2.3:8080"
     echo "--skipunmount - optional, will skip the unmount of rootfs folder."
@@ -19,16 +16,20 @@ __UbuntuRepo="http://ports.ubuntu.com/"
 
 __BuildArch=arm
 __QemuArch=armhf
-__LinuxCodeName=xenial
+__LinuxCodeName=focal
 __SkipUnmount=0
 __IsProxySet=0
 __Apt=""
 # base development support
+# install cmake to find cmake package configuration for target file system
 __UbuntuPackages="build-essential"
+__UbuntuPackages+=" cmake"
 
 # other development supports
-__UbuntuPackages+=" libboost-all-dev ocl-icd-opencl-dev"
+__UbuntuPackages+=" ocl-icd-opencl-dev"
 __UbuntuPackages+=" libhdf5-dev"
+__UbuntuPackages+=" libboost-all-dev"
+__UbuntuPackages+=" libglib2.0-dev"
 
 # symlinks fixer
 __UbuntuPackages+=" symlinks"
@@ -50,33 +51,15 @@ for i in "$@" ; do
             __BuildArch=aarch64
             __QemuArch=arm64
             ;;
-        armel)
-            __BuildArch=armel
-            __Tizen=tizen
-            __QemuArch=
-            __UbuntuRepo=
-            __LinuxCodeName=
-            ;;
-        tizen)
-            if [ "$__BuildArch" != "armel" ]; then
-                echo "Tizen rootfs is available only for armel."
-                usage;
-                exit 1;
-            fi
-            __Tizen=tizen
-            __QemuArch=
-            __UbuntuRepo=
-            __LinuxCodeName=
-            ;;
-        trusty)
-            __LinuxCodeName=trusty
-            ;;
-        xenial)
-            __LinuxCodeName=xenial
-            ;;
         bionic)
             __LinuxCodeName=bionic
             ;;
+        focal)
+            __LinuxCodeName=focal
+            ;;
+        jammy)
+            __LinuxCodeName=jammy
+            ;;
         --setproxy*)
             proxyip="${i#*=}"
             __Apt="Acquire::http::proxy \"http://$proxyip/\";\n"
@@ -93,6 +76,8 @@ for i in "$@" ; do
     esac
 done
 
+set -x
+
 __RootfsDir="$__CrossDir/rootfs/$__BuildArch"
 
 if [[ -n "$ROOTFS_DIR" ]]; then
@@ -106,7 +91,7 @@ if [ -d "$__RootfsDir" ]; then
     rm -rf $__RootfsDir
 fi
 
-if [ $__IsProxySet == 1 ] && [ "$__Tizen" != "tizen" ]; then
+if [ $__IsProxySet == 1 ]; then
     mkdir -p $__RootfsDir/etc/apt/apt.conf.d
     echo -e "$__Apt" >> $__RootfsDir/etc/apt/apt.conf.d/90proxy
 fi
@@ -125,8 +110,6 @@ if [[ -n $__LinuxCodeName ]]; then
     if [ $__SkipUnmount == 0 ]; then
         umount $__RootfsDir/*
     fi
-elif [ "$__Tizen" == "tizen" ]; then
-    ROOTFS_DIR=$__RootfsDir $__CrossDir/$__BuildArch/tizen-build-rootfs.sh
 else
     echo "Unsupported target platform."
     usage;
diff --git a/tools/generate_datafile/tf_dataset_converter/README.md b/tools/generate_datafile/tf_dataset_converter/README.md
new file mode 100644
index 000000000..3d4612520
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/README.md
@@ -0,0 +1,66 @@
+# tf dataset converter
+
+## What is tf dataset converter?
+
+_tf dataset converter_ is a tool which converts tensorflow datasets to datasets for `onert_train`.
+
+## Possible datasets
+- Tensorflow datasets with [ClassLabel feature](https://www.tensorflow.org/datasets/api_docs/python/tfds/features/ClassLabel)
+
+## Prerequisite
+- Python 3.8 (python3.8, python3.8-dev packages)
+- Python packages required
+
+## Usage
+usage: main.py [-h] [-s] [-d Dataset] [-o Dir] [-p Prefix] [-l N] [-t N]
+
+Convert a dataset of tensorflow to onert format
+
+options:
+  -h, --help            show this help message and exit
+  -s, --show-datasets   show dataset list
+  -d Dataset, --dataset-name Dataset
+                        name of dataset to be converted (default: "fashion_mnist")
+  -o Dir, --out-dir Dir
+                        relative path of the files to be created (default: "out")
+  -p Prefix, --prefix-name Prefix
+                        prefix name of the file to be created (default: "")
+  -l N, --train-length N
+                        Number of data for training (default: 1000)
+  -t N, --test-length N
+                        Number of data for training (default: 100)
+
+## Example
+### Install required packages
+```
+$ python3 -m pip install -r requirements.txt
+```
+
+### Show dataset list
+```
+$ python3 main.py --show-datasets
+Dataset list :
+[abstract_reasoning,
+accentdb,
+...
+fashion_mnist,
+...
+robotics:mt_opt_sd]
+```
+
+### Convert dataset to onert format
+```
+$ python3 main.py \
+ --dataset-name fashion_mnist \
+ --prefix-name fashion-mnist \
+ --train-length 2000 \
+ --test-length 200
+```
+```
+$ tree out
+out
+├── fashion-mnist.test.input.200.bin
+├── fashion-mnist.test.output.200.bin
+├── fashion-mnist.train.input.2000.bin
+└── fashion-mnist.train.output.2000.bin
+```
diff --git a/tools/generate_datafile/tf_dataset_converter/argparser.py b/tools/generate_datafile/tf_dataset_converter/argparser.py
new file mode 100644
index 000000000..daa7b5f07
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/argparser.py
@@ -0,0 +1,54 @@
+'''Parse arguments'''
+
+import argparse
+
+
+def _create_parser():
+    parser = argparse.ArgumentParser(
+        description='Convert a dataset of tensorflow to onert format')
+    parser.add_argument(
+        '-s', '--show-datasets', action='store_true', help='show dataset list')
+    parser.add_argument(
+        '-d',
+        '--dataset-name',
+        type=str,
+        default='fashion_mnist',
+        metavar='Dataset',
+        help='name of dataset to be converted (default: "fashion_mnist")')
+    parser.add_argument(
+        '-o',
+        '--out-dir',
+        type=str,
+        default='out',
+        metavar='Dir',
+        help='relative path of the files to be created (default: "out")')
+    parser.add_argument(
+        '-p',
+        '--prefix-name',
+        type=str,
+        default='',
+        metavar='Prefix',
+        help='prefix name of the file to be created (default: "")')
+    parser.add_argument(
+        '-l',
+        '--train-length',
+        type=int,
+        default=1000,
+        metavar='N',
+        help='Number of data for training (default: 1000)')
+    parser.add_argument(
+        '-t',
+        '--test-length',
+        type=int,
+        default=100,
+        metavar='N',
+        help='Number of data for training (default: 100)')
+
+    return parser
+
+
+def parse_args():
+    parser = _create_parser()
+    args = parser.parse_args()
+
+    return args
diff --git a/tools/generate_datafile/tf_dataset_converter/datasets.py b/tools/generate_datafile/tf_dataset_converter/datasets.py
new file mode 100644
index 000000000..d63320055
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/datasets.py
@@ -0,0 +1,80 @@
+'''Deal with the tensorflow dataset.'''
+
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from pathlib import Path
+
+dataset_root_dir = Path(__file__).parent.absolute() / 'data'
+
+
+class DatasetLoader():
+    '''
+    Loader of tensorflow datasets
+    '''
+
+    def load(self, dataset_name):
+        (ds_train, ds_test), ds_info = tfds.load(
+            dataset_name,
+            split=['train', 'test'],
+            data_dir=dataset_root_dir,
+            shuffle_files=True,
+            as_supervised=True,
+            with_info=True,
+        )
+
+        self.ds_info = ds_info
+
+        def _normalize_img(image, label):
+            """Normalizes images: `uint8` -> `float32`."""
+            return tf.cast(image, tf.float32) / 255., label
+
+        self.ds_train = ds_train.map(_normalize_img)
+        self.ds_test = ds_test.map(_normalize_img)
+
+        for images, labels in self.ds_train:
+            print(f'Shape of images : {images.shape}')
+            print(f'Shape of labels: {labels.shape} {labels.dtype}')
+            break
+
+    def get_dataset_names(self):
+        return tfds.list_builders()
+
+    def class_names(self):
+        '''
+        Get class names
+        '''
+        return self.ds_info.features['label'].names
+
+    def num_classes(self):
+        '''
+        Get the number of classes
+        '''
+        return self.ds_info.features['label'].num_classes
+
+    def get_num_train_examples(self):
+        '''
+        Get examples for training
+        '''
+        return self.ds_info.splits['train'].num_examples
+
+    def get_num_test_examples(self):
+        '''
+        Get examples for testing
+        '''
+        return self.ds_info.splits['test'].num_examples
+
+    def prefetched_datasets(self):
+        '''
+        get prefetched datasets for traning.
+
+        Return:
+           Datasets for training and testing.
+        '''
+
+        train_dataset = self.ds_train.cache()
+        train_dataset = train_dataset.shuffle(self.ds_info.splits['train'].num_examples)
+
+        test_dataset = self.ds_train.cache()
+
+        # return train_dataset, test_dataset
+        return self.ds_train.cache(), self.ds_test.cache()
diff --git a/tools/generate_datafile/tf_dataset_converter/main.py b/tools/generate_datafile/tf_dataset_converter/main.py
new file mode 100644
index 000000000..77e339965
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/main.py
@@ -0,0 +1,98 @@
+################################################################################
+# Parse arguments
+################################################################################
+
+from argparser import parse_args
+
+# You can see arguments' information in argparser.py
+args = parse_args()
+
+################################################################################
+# Load a dataset of tensorflow
+################################################################################
+
+# Disable tensorflow cpp warning log
+import os
+
+FILTERING_WARNING = '2'
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = FILTERING_WARNING
+
+from datasets import DatasetLoader
+from pathlib import Path
+import tensorflow as tf
+import numpy as np
+
+ds_loader = DatasetLoader()
+
+if args.show_datasets:
+    print('Dataset list :')
+    names = ',\n'.join(ds_loader.get_dataset_names())
+    print(f'[{names}]')
+    exit(0)
+
+ds_loader.load(args.dataset_name)
+ds_train, ds_test = ds_loader.prefetched_datasets()
+nums_train_ds = ds_loader.get_num_train_examples()
+nums_test_ds = ds_loader.get_num_test_examples()
+print(f'class names       : {ds_loader.class_names()}')
+print(f'train dataset len : {nums_train_ds}')
+print(f'test dataset len  : {nums_test_ds}')
+
+################################################################################
+# Convert tensorlfow dataset to onert format
+################################################################################
+Path(f'{args.out_dir}').mkdir(parents=True, exist_ok=True)
+prefix_name = f'{args.out_dir}/{args.prefix_name}'
+if args.prefix_name != '':
+    prefix_name += '.'
+
+nums_train = args.train_length
+if (nums_train > nums_train_ds):
+    print(
+        f'Oops! The number of data for training in the dataset is less than {nums_train}')
+    exit(1)
+
+nums_test = args.test_length
+if (nums_test > nums_test_ds):
+    print(f'Oops! The number of data for test in the dataset is less than {nums_test}')
+    exit(1)
+
+
+def _only_image(image, _):
+    return image
+
+
+def _only_label(_, label):
+    return label
+
+
+def _label_to_array(label):
+    arr = np.zeros(ds_loader.num_classes(), dtype=float)
+    arr[label] = 1.
+    tensor = tf.convert_to_tensor(arr, tf.float32)
+    return tensor
+
+
+file_path_list = [
+    f'{prefix_name}train.input.{nums_train}.bin',
+    f'{prefix_name}test.input.{nums_test}.bin',
+    f'{prefix_name}train.output.{nums_train}.bin',
+    f'{prefix_name}test.output.{nums_test}.bin'
+]
+
+ds_list = [
+    ds_train.take(nums_train).map(_only_image),
+    ds_test.take(nums_test).map(_only_image),
+    [_label_to_array(label) for label in ds_train.take(nums_train).map(_only_label)],
+    [_label_to_array(label) for label in ds_test.take(nums_test).map(_only_label)]
+]
+
+for i in range(4):
+    file_path = file_path_list[i]
+    with open(file_path, 'wb') as f:
+        ds = ds_list[i]
+        for tensor in ds:
+            f.write(tensor.numpy().tobytes())
+        f.close()
+
+print('The data files are created!')
diff --git a/tools/generate_datafile/tf_dataset_converter/requirements.txt b/tools/generate_datafile/tf_dataset_converter/requirements.txt
new file mode 100644
index 000000000..c34025fe6
--- /dev/null
+++ b/tools/generate_datafile/tf_dataset_converter/requirements.txt
@@ -0,0 +1,8 @@
+argparse
+numpy
+pathlib
+# Please upgrade pip version before installing these requerements.
+# pip 20.2 and earlier doesn’t have true dependency resolution.
+# Refer to https://pip.pypa.io/en/latest/user_guide/#requirements-files
+tensorflow==2.8.2  # This version specifies the upper bound for protobuf
+tensorflow_datasets==4.7.0
diff --git a/tools/kbenchmark/kernels/acl_cl/Convolution.cpp b/tools/kbenchmark/kernels/acl_cl/Convolution.cpp
index 37d179ac3..31cda05a6 100644
--- a/tools/kbenchmark/kernels/acl_cl/Convolution.cpp
+++ b/tools/kbenchmark/kernels/acl_cl/Convolution.cpp
@@ -230,12 +230,11 @@ inline nonius::benchmark_registry &local_benchmark_registry()
 
 } // namespace
 
-#define NONIUS_LOCAL_BENCHMARK(name, ...)                                              \
-  namespace                                                                            \
-  {                                                                                    \
-  static ::nonius::benchmark_registrar                                                 \
-      NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
-                                                     __VA_ARGS__);                     \
+#define NONIUS_LOCAL_BENCHMARK(name, ...)                                                          \
+  namespace                                                                                        \
+  {                                                                                                \
+  static ::nonius::benchmark_registrar                                                             \
+    NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
   }
 
 NONIUS_LOCAL_BENCHMARK("CLDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) {
diff --git a/tools/kbenchmark/kernels/acl_cl/TransposeConv.cpp b/tools/kbenchmark/kernels/acl_cl/TransposeConv.cpp
index 8278a61d3..c2ac30540 100644
--- a/tools/kbenchmark/kernels/acl_cl/TransposeConv.cpp
+++ b/tools/kbenchmark/kernels/acl_cl/TransposeConv.cpp
@@ -207,12 +207,11 @@ inline nonius::benchmark_registry &local_benchmark_registry()
 
 } // namespace
 
-#define NONIUS_LOCAL_BENCHMARK(name, ...)                                              \
-  namespace                                                                            \
-  {                                                                                    \
-  static ::nonius::benchmark_registrar                                                 \
-      NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
-                                                     __VA_ARGS__);                     \
+#define NONIUS_LOCAL_BENCHMARK(name, ...)                                                          \
+  namespace                                                                                        \
+  {                                                                                                \
+  static ::nonius::benchmark_registrar                                                             \
+    NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
   }
 
 NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayer_NCHW", [](nonius::chronometer meter) {
diff --git a/tools/kbenchmark/kernels/acl_neon/Convolution.cpp b/tools/kbenchmark/kernels/acl_neon/Convolution.cpp
index 2d19cb21e..1656186c6 100644
--- a/tools/kbenchmark/kernels/acl_neon/Convolution.cpp
+++ b/tools/kbenchmark/kernels/acl_neon/Convolution.cpp
@@ -223,12 +223,11 @@ inline nonius::benchmark_registry &local_benchmark_registry()
 
 } // namespace
 
-#define NONIUS_LOCAL_BENCHMARK(name, ...)                                              \
-  namespace                                                                            \
-  {                                                                                    \
-  static ::nonius::benchmark_registrar                                                 \
-      NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
-                                                     __VA_ARGS__);                     \
+#define NONIUS_LOCAL_BENCHMARK(name, ...)                                                          \
+  namespace                                                                                        \
+  {                                                                                                \
+  static ::nonius::benchmark_registrar                                                             \
+    NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
   }
 
 NONIUS_LOCAL_BENCHMARK("NEDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) {
diff --git a/tools/kbenchmark/kernels/acl_neon/TransposeConv.cpp b/tools/kbenchmark/kernels/acl_neon/TransposeConv.cpp
index 0878499e4..892547d89 100644
--- a/tools/kbenchmark/kernels/acl_neon/TransposeConv.cpp
+++ b/tools/kbenchmark/kernels/acl_neon/TransposeConv.cpp
@@ -199,12 +199,11 @@ inline nonius::benchmark_registry &local_benchmark_registry()
 
 } // namespace
 
-#define NONIUS_LOCAL_BENCHMARK(name, ...)                                              \
-  namespace                                                                            \
-  {                                                                                    \
-  static ::nonius::benchmark_registrar                                                 \
-      NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
-                                                     __VA_ARGS__);                     \
+#define NONIUS_LOCAL_BENCHMARK(name, ...)                                                          \
+  namespace                                                                                        \
+  {                                                                                                \
+  static ::nonius::benchmark_registrar                                                             \
+    NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
   }
 
 NONIUS_LOCAL_BENCHMARK("NEDeconvolutionLayer_NCHW", [](nonius::chronometer meter) {
diff --git a/tools/kernel_report/kernel_report.py b/tools/kernel_report/kernel_report.py
index b8a601eec..8940e8845 100755
--- a/tools/kernel_report/kernel_report.py
+++ b/tools/kernel_report/kernel_report.py
@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import argparse
+from os.path import dirname, realpath, join
 
 
 class Backend:
@@ -28,11 +28,8 @@ class Backend:
 
 class KernelReporter(object):
     def __init__(self, args):
-        # TODO: Remove os defendency - '/'
-        if args.base[0] != '/':
-            self.onertBase = os.getcwd() + '/' + args.base
-        else:
-            self.onertBase = args.base
+        root_path = dirname(dirname(dirname(realpath(__file__))))
+        self.onertBase = join(root_path, "runtime", "onert")
         if args.md5:
             self.printMD5 = True
         else:
@@ -181,7 +178,6 @@ if __name__ == '__main__':
         default='cpu,acl_cl,acl_neon',
         help="backend list to report (use comma)")
     arg_parser.add_argument("--md5", action='store_true', help="Print for md5")
-    arg_parser.add_argument("base", type=str, help="onert base directory")
     args = arg_parser.parse_args()
 
     report = KernelReporter(args)
diff --git a/tools/model_partition_tool/Graph.py b/tools/model_partition_tool/Graph.py
new file mode 100644
index 000000000..96caf3889
--- /dev/null
+++ b/tools/model_partition_tool/Graph.py
@@ -0,0 +1,457 @@
+#! /usr/bin/python
+import graph_analysis
+import json
+import logging
+import runtime_stats
+import os
+import sys
+import numpy as np
+from queue import LifoQueue
+
+
+class ModelInfo:
+    def __init__(self, modelfile, vertex_weights):
+        self._model_dir = os.path.dirname(modelfile)
+        self._dag = graph_analysis.generate_dag(modelfile)
+        self._ops = graph_analysis.get_model_ops(modelfile)
+        self._tensors = graph_analysis.get_model_tensors(modelfile)
+        self._vertex_weights = vertex_weights
+        """Return Directed Acyclic Graph (DAG)
+    """
+
+    def get_dag(self):
+        return self._dag
+        """Return list of model operations
+    """
+
+    def get_ops(self):
+        return self._ops
+        """Return list of model tensors
+    """
+
+    def get_tensors(self):
+        return self._tensors
+        """Return vertex weights representing execution times 
+    of model operations
+    """
+
+    def get_vertex_weights(self):
+        return self._vertex_weights
+        """Return size (bytes) of tensor connecting operation indexes n1 and n2
+    """
+
+    def get_tensor_size(self, n1, n2):
+        tensor_id = set(self._ops[n1]['outputs']).intersection(
+            set(self._ops[n2]['inputs']))
+        assert (len(tensor_id) == 1)
+        idx = tensor_id.pop()
+        tensor = self._tensors[idx]['shape']
+        return np.prod(tensor) * tensor.itemsize
+
+    def get_model_path(self):
+        return self._model_dir
+
+
+class GraphPartition:
+    def __init__(self, K):
+        self._K = K
+        self._indx = np.zeros(K, dtype=int)
+        self._session_weights = np.zeros(K, dtype=int)
+        self._session_ids = []
+        logging.basicConfig(level=logging.DEBUG)
+        self._logger = logging.getLogger("Minmax")
+
+    def set_dbglevel(self, dbglevel):
+        logging.basicConfig(level=dbglevel)
+        self._logger = logging.getLogger("Minmax")
+        self._logger.setLevel(dbglevel)
+        """Generates a session graph out of the provided dag (Directed Acyclic Graph)
+    Each dag node is associated with a session id, stored under attribute _session_ids.
+    """
+
+    def generate_session_graph(self):
+        def get_session_ids(i, j):
+            cnt = 0
+            idx_i = -1
+            idx_j = -1
+            for idx in range(self._K):
+                if i in self._session_ids[idx]:
+                    idx_i = idx
+                    cnt += 1
+                if j in self._session_ids[idx]:
+                    idx_j = idx
+                    cnt += 1
+                if cnt == 2:
+                    break
+            return (idx_i, idx_j)
+
+        dag = self._modelObj.get_dag()
+        n = dag.shape[0]
+        self._session_graph = np.zeros((self._K, self._K), dtype=int)
+        for i in range(n - 1):
+            for j in range(i + 1, n):
+                if dag[i][j] == 1:
+                    idx1, idx2 = get_session_ids(i, j)
+                    if idx1 == -1 or idx2 == -1:
+                        self._logger.debug("Something wrong with session ids")
+                        self._logger.debug(self._session_ids)
+                        self._logger.debug(i, j)
+                        sys.exit(-1)
+                    if idx1 != idx2:
+                        self._session_graph[idx1][idx2] = 1
+
+        for i in range(self._K - 1):
+            for j in range(i + 1, self._K):
+                if self._session_graph[i][j] == 1 and self._session_graph[j][i] == 1:
+                    self._logger.error("Session graph has cycles (%d, %d)", i, j)
+                    self._logger.error("Session %d: %s", i, self._session_ids[i])
+                    self._logger.error("Session %d: %s", j, self._session_ids[j])
+                    sys.exit(-1)
+        """Generate an initial partition of the topological ordering T, with the 
+    help of provided vertex weights. This method will update _session_weights, that is,
+    the cumulative sum of vertex weights within a session/partition
+    """
+
+    def initial_partition(self, modelObj, T):
+        self._modelObj = modelObj
+        self._logger.debug("Topological order: %s", T)
+        vwgt = modelObj.get_vertex_weights()
+        sorted_vwgt = np.array([vwgt[i] for i in T])
+        self._logger.debug("sorted weights: %s", sorted_vwgt)
+        sum1 = 0
+        c_sorted_vw = []
+        for s in sorted_vwgt:
+            c_sorted_vw.append(sum1 + s)
+            sum1 += s
+
+        pivot = np.zeros(self._K - 1)
+        self._logger.debug("Cumulative sum weights: %s", c_sorted_vw)
+        for i in range(1, self._K):
+            pivot[i - 1] = round(i * c_sorted_vw[-1] / self._K)
+
+        for i in range(self._K - 1):
+            self._indx[i + 1] = np.argmin(abs(c_sorted_vw - pivot[i]))
+
+        sum_weights = []
+        for i in range(self._K):
+            if i == self._K - 1:
+                self._session_ids.append(np.array(T[self._indx[i]:]))
+                self._session_weights[i] = np.sum(sorted_vwgt[self._indx[i]:])
+            else:
+                self._session_ids.append(np.array(T[self._indx[i]:self._indx[i + 1]]))
+                self._session_weights[i] = np.sum(
+                    sorted_vwgt[self._indx[i]:self._indx[i + 1]])
+        self.generate_session_graph()
+        """Print a summary that includes session graph, paritition info comprising node ids and their
+    cumulative vertex weights
+    """
+
+    def summarize(self, T):
+        self._logger.info(
+            "Session Graph:\n%s",
+            np.array2string(
+                self._session_graph, formatter={'int': lambda x: '{:>3}'.format(x)}))
+        for i in range(self._K):
+            self._logger.info("Partition %d : %s, sum weight = %s", i,
+                              self._session_ids[i].tolist(), self._session_weights[i])
+        """Move nodes from session1 to session2 until the maximum of the two cumulative session weights are exceeded.
+    The parameters to this method include the session ids, list of vertex weights (vwgt), and the directed adjacency matrix (dag).
+    At the end of the move, the session ids per session, and the session weights are updated. As node movement may also affect the session
+    graph, the session graph is updated as well.
+    """
+
+    def move_nodes(self, session1, session2):
+        dag = self._modelObj.get_dag()
+        vwgt = self._modelObj.get_vertex_weights()
+
+        def session_edges(s1, s2, dag, forward_direction):
+            sdict = {}
+            if forward_direction == True:
+                for k in s1:
+                    tmp_s = set(np.where(dag[k, :] == 1)[0]).difference(set(s1))
+                    if len(tmp_s) > 0:
+                        sdict[k] = list(tmp_s)
+            else:
+                for k in s2:
+                    tmp_s = set(np.where(dag[k, :] == 1)[0]).intersection(set(s1))
+                    if len(tmp_s) > 0:
+                        for key in tmp_s:
+                            sdict[key] = k
+            return sdict
+
+        move_success = False
+        if self._session_graph[session1][session2] == 1:
+            forward_direction = True
+        elif self._session_graph[session2][session1] == 1:
+            forward_direction = False
+        else:
+            self._logger.warning("Cannot move nodes between non-neighboring partitions")
+            return move_success
+
+        maxval = max(self._session_weights[session1], self._session_weights[session2])
+        improvement = True
+
+        marked = {}
+        while improvement == True:
+            s1 = self._session_ids[session1]
+            s2 = self._session_ids[session2]
+            sdict = session_edges(s1, s2, dag, forward_direction)
+
+            found_node = False
+            rnd_perm = np.random.permutation(list(sdict))
+            cnt = 0
+            while found_node == False and cnt < len(sdict):
+                rnd_key = rnd_perm[cnt]
+                marked[rnd_key] = True
+                found_node = True
+                if forward_direction == True:
+                    for k in range(session2):
+                        if len(
+                                set(np.where(dag[rnd_key, :] == 1)[0]).intersection(
+                                    set(self._session_ids[k]))) > 0:
+                            found_node = False
+                            cnt += 1
+                            break
+                else:
+                    for k in range(session2 + 1, self._K):
+                        if len(
+                                set(np.where(dag[:, rnd_key] == 1)[0]).intersection(
+                                    set(self._session_ids[k]))) > 0:
+                            found_node = False
+                            cnt += 1
+                            break
+                if found_node == True:
+                    new_maxval = max(self._session_weights[session1] - vwgt[rnd_key],
+                                     self._session_weights[session2] + vwgt[rnd_key])
+                    if new_maxval < maxval:
+                        self._logger.info("[old maxval] %s --> %s [new maxval], id: %s",
+                                          maxval, new_maxval, rnd_key)
+                        self._logger.debug("edges : %s", (sdict[rnd_key]))
+                        if type(sdict[rnd_key]) is list:
+                            rnd_val = np.random.choice(sdict[rnd_key])
+                        else:
+                            rnd_val = sdict[rnd_key]
+                        if forward_direction == True:
+                            if np.where(s2 == rnd_val)[0].size > 0:
+                                s2 = np.insert(s2, np.where(s2 == rnd_val)[0], rnd_key)
+                            else:
+                                s2 = np.insert(s2, 0, rnd_key)
+                        else:
+                            if np.where(s2 == sdict[rnd_key])[0].size > 0:
+                                s2 = np.insert(s2,
+                                               np.where(s2 == sdict[rnd_key])[0] + 1,
+                                               rnd_key)
+                            else:
+                                s2 = np.insert(s2, len(s2), rnd_key)
+                        s1 = np.delete(s1, np.where(s1 == rnd_key))
+                        del self._session_ids[session1]
+                        self._session_ids.insert(session1, s1)
+                        del self._session_ids[session2]
+                        self._session_ids.insert(session2, s2)
+                        self._session_weights[session1] -= vwgt[rnd_key]
+                        self._session_weights[session2] += vwgt[rnd_key]
+                        maxval = new_maxval
+                        self.generate_session_graph()
+                        move_success = True
+                    else:
+                        self._logger.warning("Move rejected, max value is greater")
+                        improvement = False
+                else:
+                    self._logger.warning(
+                        "Candidate %d cannot be moved, as it violates acyclic constraint",
+                        rnd_key)
+                    improvement = False
+        return move_success
+        """Method to get the session with the maximum session weight, or cumulative exection time. This
+    session is then searched for its neighboring sessions. The neighbors are then ranked in increasing order
+    of their execution times, so that session moves can be performed in that order.
+    """
+
+    def get_bottleneck_info(self):
+        maxval = 0
+        ret_id = -1
+        for i in range(self._K):
+            if maxval < self._session_weights[i]:
+                maxval = self._session_weights[i]
+                ret_id = i
+        neighbor_dict = {}
+
+        for i in range(self._K):
+            if self._session_graph[ret_id][i] == 1 or self._session_graph[i][ret_id] == 1:
+                neighbor_dict[i] = self._session_weights[i]
+        sorted_neighbor_list = sorted(neighbor_dict.items(), key=lambda item: item[1])
+        self._logger.info("Bottleneck id --> %d, sorted neighbors --> %s", ret_id,
+                          sorted_neighbor_list)
+        return ret_id, sorted_neighbor_list
+        """Get the cost and the partition id associated with the maximum value.
+    """
+
+    def get_maxPartitionCost(self):
+        dag = self._modelObj.get_dag()
+        maxval = 0
+        indx = -1
+        for i in range(self._K):
+            if self._session_weights[i] > maxval:
+                maxval = self._session_weights[i]
+                indx = i
+
+        def check_edges(dag, session1, session2):
+            e_cnt = 0
+            memory_overhead = 0
+            for s1 in self._session_ids[session1]:
+                for s2 in self._session_ids[session2]:
+                    if dag[s1][s2] == 1:
+                        e_cnt += 1
+                        memory_overhead += self._modelObj.get_tensor_size(s1, s2)
+                    elif dag[s2][s1] == 1:
+                        self._logger.error("%d (session %d) connects to %d (session %d)",
+                                           s2, session2, s1, session1)
+                        self._logger.error(self._session_graph)
+                        sys.exit(-1)
+
+            assert (e_cnt > 0)
+            return e_cnt, memory_overhead
+
+        edge_cut = 0
+        total_memory_overhead = 0
+        for i in range(self._K - 1):
+            for j in range(i + 1, self._K):
+                if self._session_graph[i][j] == 1:
+                    e_cnt, memory_overhead = check_edges(dag, i, j)
+                    edge_cut += e_cnt
+                    total_memory_overhead += memory_overhead
+        return indx, maxval, edge_cut, total_memory_overhead
+        """Get partition information.
+    """
+
+    def get_partitions(self):
+        return self._session_ids, self._session_weights, self._session_graph
+
+
+class GraphTopology:
+    def __init__(self, tflite_file, trace_file):
+        vertex_weights = runtime_stats.get_runtime_per_operation(trace_file)
+        self._modelObj = ModelInfo(tflite_file, vertex_weights)
+        self._dag = graph_analysis.generate_dag(tflite_file)
+        self._T = []
+        self._vwgt = np.array(vertex_weights)
+        logging.basicConfig(level=logging.INFO)
+        self._Graphlogger = logging.getLogger("Topology")
+
+    def set_dbglevel(self, dbglevel):
+        logging.basicConfig(level=dbglevel)
+        self._Graphlogger.setLevel(dbglevel)
+        """Perform Topological sort using the method outlined in https://arxiv.org/abs/1704.00705
+    """
+
+    def topological_sort(self):
+        del self._T
+        degree_matrix = np.copy(self._dag)
+        n = self._dag.shape[0]
+        S = []
+        T = LifoQueue(maxsize=n)
+        marked = {}
+
+        while T.qsize() < n:
+            indegree = np.sum(degree_matrix, axis=0)
+            candidates, = np.where(indegree == 0)
+            for i in candidates:
+                if i not in marked:
+                    S.append(i)
+            np.random.seed()
+            random_pos = int(np.random.rand() * len(S))
+            random_node = S[random_pos]
+            marked[random_node] = True
+            T.put(random_node)
+            neighbors, = np.where(self._dag[random_node, :] == 1)
+            for i in neighbors:
+                degree_matrix[random_node][i] = 0
+            del S
+            S = []
+
+        self._T = list(T.queue)
+        """Create a partition instance and perform an initial split over the cumulative sum weights
+    """
+
+    def partition_graph(self, K):
+        self._partition = GraphPartition(K)
+        self._partition.initial_partition(self._modelObj, self._T)
+        """Move nodes between sessions id1 and id2
+    """
+
+    def partition_move(self, id1, id2):
+        return self._partition.move_nodes(id1, id2)
+        """Summarize partition information
+    """
+
+    def partition_summary(self):
+        self._partition.summarize(self._T)
+        """Optimize for minmax partition. At each iteration, find the bottlenecked partition, and shuffle nodes out of it
+    to its neighbor with the smallest weight. If the neighbor session cannot accomodate any more nodes (because the minmax criterion is violated),
+    then select the next neighbor with the smallest weight. Repeat iterations until no further improvement is possible.
+    """
+
+    def partition_minmax(self, oneshot=False):
+        improvement = True
+        while improvement == True:
+            improvement = False
+            bottleneck_id, neighbor_list = self._partition.get_bottleneck_info()
+            for neighbor, wgt in neighbor_list:
+                self._Graphlogger.debug("====Moving from session %d to session %d",
+                                        bottleneck_id, neighbor)
+                ret_success = self.partition_move(bottleneck_id, neighbor)
+                if ret_success == True:
+                    improvement = True
+                    self._Graphlogger.debug(
+                        "====Successful move from session %d to session %d",
+                        bottleneck_id, neighbor)
+                    break
+                self._Graphlogger.debug("====Failed move from session %d to session %d",
+                                        bottleneck_id, neighbor)
+        if oneshot == True:
+            self.partition_summary()
+
+        return self._partition.get_maxPartitionCost()
+        """Perform MinMax partitioning over multiple runs, and pick the best solution.
+    """
+
+    def partition_minmax_multiple(self, K=3, nruns=100):
+        minval = np.inf
+        session_ids = []
+        session_weights = np.zeros(K, dtype=int)
+        edge_cut_best = 0
+        memory_overhead_best = 0
+        for run in range(nruns):
+            self._Graphlogger.debug("****Starting run %d", run)
+            self.topological_sort()
+            self.partition_graph(K)
+            indx, maxval, edge_cut, memory_overhead = self.partition_minmax()
+            if maxval < minval:
+                minval = maxval
+                edge_cut_best = edge_cut
+                memory_overhead_best = memory_overhead
+                session_ids, session_weights, session_graph = self._partition.get_partitions(
+                )
+            self._Graphlogger.debug("****Finished run %d", run)
+
+        self._Graphlogger.info("Done.. printing results")
+        self._Graphlogger.info("Session ids: ")
+        for i in range(K):
+            self._Graphlogger.info("Partition %d : %s, sum weight = %s", i,
+                                   session_ids[i].tolist(), session_weights[i])
+        self._Graphlogger.info(
+            "Session Graph:\n%s",
+            np.array2string(
+                session_graph, formatter={'int': lambda x: '{:>3}'.format(x)}))
+        self._Graphlogger.info("Edge cut: %d", edge_cut_best)
+        self._Graphlogger.info("Memory overhead (bytes): %d", memory_overhead_best)
+        output_data = {}
+        partition_map = np.zeros(self._dag.shape[0], dtype=int)
+        with open("".join([self._modelObj.get_model_path(), "/parition_map.json"]),
+                  "w") as ofile:
+            for i in range(K):
+                for op_idx in session_ids[i]:
+                    partition_map[op_idx] = i
+            output_data['partition_map'] = partition_map.tolist()
+            output_data['num_partitions'] = K
+            json.dump(output_data, ofile)
diff --git a/tools/model_partition_tool/README.md b/tools/model_partition_tool/README.md
new file mode 100644
index 000000000..b0b12793f
--- /dev/null
+++ b/tools/model_partition_tool/README.md
@@ -0,0 +1,138 @@
+# Heuristic Graph Partitioning
+
+This folder contains the necessary scripts to perform a a heuristic-based graph partitioning for machine learning models.
+
+The main contents of this folder are as follows:
+
+- [Python Files](#python-scripts)
+- [How to Run Partitioning Algorithm?](#how-to-partition-tflite-model)
+- [Example Script](#example-script-to-generate-partition-map)
+
+
+## Python Scripts
+The python scripts (**python3**) require an installation of Tensorflow 2.x package to retrieve TFLite model operations. Additionally, please ensure that the python `numpy` package has been installed beforehand. The scripts also import the following modules: `queue`, `json` and `argparse`, all of which should be available by default. If not, please install them either by `pip install <package>` or `sudo apt install python-<package>`.
+
+`Graph.py` is the main script that processes the model graph topology and implements the partitioning algorithm. Correspondingly, there are two classes within, namely `GraphTopology` and `GraphPartition`. `GraphTopology` has a container `GraphPartition` object within.
+
+`graph_analysis.py` is a helper module for translating TFLite models to graph data structures. `graph_analysis.py` is imported inside
+`Graph.py`.
+
+
+## How To Partition TFLite Model?
+To partition a TFLite model, simply `import Graph` at the outset. There are two ways to run the partitioning algorithm. If you prefer quick results without having to inspect the intermediate results, follow the steps below:
+
+### Quick Run For Final Result
+To get the partitioning result quickly, follow the steps below:
+
+1. Create a `GraphTopology` object as shown below:
+```
+In [70]: g = Graph.GraphTopology('inceptionV3.tflite', 'inceptionV3.chrome.json')
+```
+**Note**: Here, the argument `inceptionV3.chrome.json` is a single-execution trace of operation execution times, and is obtained using the Chrome Trace profiler.
+
+2. Run the **MinMax** partitioning algorithm over the topology. Specify the number of partitions (K) and the number of topological orderings (nruns) to evaluate before settling for the best result.
+```
+In [71]: g.partition_minmax_multiple(K=4, nruns=10)
+
+INFO:Topology:Session ids: 
+INFO:Topology:Partition 0 : [0, 1, 2, 3, 4, 5, 6, 13, 7, 10, 14, 11, 12, 8, 9, 15, 22, 16, 23, 19, 17, 20, 21], sum weight = 292393
+INFO:Topology:Partition 1 : [18, 24, 26, 27, 28, 25, 29, 30, 31, 32, 33, 34, 38, 35, 36, 37, 39, 49, 44, 41, 45, 42, 50, 46, 43, 40, 47, 48, 51, 53, 56, 52], sum weight = 293959
+INFO:Topology:Partition 2 : [61, 57, 58, 54, 55, 62, 59, 60, 63, 73, 74, 65, 64, 68, 66, 69, 67, 70, 71, 72, 75, 76, 80, 77, 78, 79, 81, 82], sum weight = 290835
+INFO:Topology:Partition 3 : [83, 84, 85, 86, 87, 90, 94, 91, 88, 89, 92, 93, 95, 96, 101, 97, 106, 98, 102, 103, 104, 99, 105, 107, 100, 108, 109, 110, 111, 114, 119, 120, 112, 115, 116, 117, 113, 118, 121, 122, 123, 124, 125], sum weight = 293819
+INFO:Topology:Session Graph:
+[[  0   1   0   0]
+ [  0   0   1   0]
+ [  0   0   0   1]
+ [  0   0   0   0]]
+INFO:Topology:Edge cut: 12
+INFO:Topology:Memory overhead (bytes): 4366144
+
+In [72]: 
+```
+
+### Detailed View
+For a detailed breakdown of the runtime steps, execute the function calls shown below:
+
+1. Create a `GraphTopology` object:
+```
+In [70]: g = Graph.GraphTopology('inceptionV3.tflite', 'inceptionV3.chrome.json')
+```
+
+2. Perform a topological sort
+```
+In [73]: g.topological_sort()
+```
+
+
+3. Partition the graph into K sub-graphs, using the topological order obtained above
+```
+In [74]: g.partition_graph(K=4)
+```
+
+4. View the execution time of each partition
+```
+In [75]: g.partition_summary()
+INFO:Minmax:Session Graph:
+[[  0   1   0   0]
+ [  0   0   1   0]
+ [  0   0   0   1]
+ [  0   0   0   0]]
+INFO:Minmax:Partition 0 : [0, 1, 2, 3, 4, 5, 6, 13, 8, 7, 14, 9, 10, 11, 12, 15, 22, 23, 17, 16, 18, 19], sum weight = 276635
+INFO:Minmax:Partition 1 : [20, 21, 24, 26, 28, 27, 29, 25, 30, 31, 32, 33, 38, 35, 36, 34, 37, 39, 40, 41, 44, 45, 46, 42, 43, 49, 50, 47, 48, 51, 52, 61], sum weight = 299334
+INFO:Minmax:Partition 2 : [56, 53, 54, 57, 58, 55, 59, 60, 62, 63, 73, 65, 66, 67, 68, 69, 74, 70, 64, 71, 72, 75, 80, 81, 77, 76, 78, 82, 85], sum weight = 291593
+INFO:Minmax:Partition 3 : [83, 86, 84, 79, 87, 94, 90, 91, 88, 92, 89, 93, 95, 106, 107, 96, 97, 101, 102, 98, 104, 103, 99, 100, 105, 108, 114, 109, 119, 120, 110, 112, 111, 113, 115, 117, 116, 118, 121, 122, 123, 124, 125], sum weight = 303444
+ ```
+
+ 5. Run a *OneShot* version of the partitioning algorithm
+```
+In [90]: indx, minmax, edge_cnt, memory_overhead = g.partition_minmax(oneshot=True)
+INFO:Minmax:Bottleneck id --> 3, sorted neighbors --> [(2, 291593)]
+DEBUG:Topology:====Moving from session 3 to session 2
+INFO:Minmax:[old maxval] 303444 --> 300754 [new maxval], id: 86
+WARNING:Minmax:Candidate 87 cannot be moved, as it violates acyclic constraint
+WARNING:Minmax:Move rejected, max value is greater
+DEBUG:Topology:====Successful move from session 3 to session 2
+INFO:Minmax:Bottleneck id --> 2, sorted neighbors --> [(3, 294283), (1, 299334)]
+DEBUG:Topology:====Moving from session 2 to session 3
+WARNING:Minmax:Move rejected, max value is greater
+DEBUG:Topology:====Failed move from session 2 to session 3
+DEBUG:Topology:====Moving from session 2 to session 1
+WARNING:Minmax:Move rejected, max value is greater
+DEBUG:Topology:====Failed move from session 2 to session 1
+INFO:Minmax:Session Graph:
+[[  0   1   0   0]
+ [  0   0   1   0]
+ [  0   0   0   1]
+ [  0   0   0   0]]
+INFO:Minmax:Partition 0 : [0, 1, 2, 3, 4, 5, 6, 13, 8, 7, 14, 9, 10, 11, 12, 15, 22, 23, 17, 16, 18, 19], sum weight = 276635
+INFO:Minmax:Partition 1 : [20, 21, 24, 26, 28, 27, 29, 25, 30, 31, 32, 33, 38, 35, 36, 34, 37, 39, 40, 41, 44, 45, 46, 42, 43, 49, 50, 47, 48, 51, 52, 61], sum weight = 299334
+INFO:Minmax:Partition 2 : [56, 53, 54, 57, 58, 55, 59, 60, 62, 63, 73, 65, 66, 67, 68, 69, 74, 70, 64, 71, 72, 75, 80, 81, 77, 76, 78, 82, 85, 86], sum weight = 300754
+INFO:Minmax:Partition 3 : [83, 84, 79, 87, 94, 90, 91, 88, 92, 89, 93, 95, 106, 107, 96, 97, 101, 102, 98, 104, 103, 99, 100, 105, 108, 114, 109, 119, 120, 110, 112, 111, 113, 115, 117, 116, 118, 121, 122, 123, 124, 125], sum weight = 294283
+```
+
+**Note** Please set debug levels in the script accordingly, for example, `g.set_dbglevel(logging.DEBUG)`.
+
+## Example Script To Generate Partition Map
+An example script `test_parition.py` is added to the folder. Please run `python3 test_partition.py --help` for details. The script parses the TFLite model file and the trace JSON as arguments, and creates a `partition_map.json` at the same location as the TFLite file. An output from running `test_partition.py` is shown below:
+
+```
+$ python3 test_partition.py /tmp/nnpackage/inception_v3/inception_v3.tflite /tmp/inceptionV3.chrome.json --num_parts=4
+
+...
+...
+INFO:Topology:Partition 0 : [0, 1, 2, 3, 4, 5, 6, 8, 13, 7, 9, 14, 10, 11, 12, 15, 19, 17, 16, 20, 21, 22, 23], sum weight = 292393
+INFO:Topology:Partition 1 : [18, 24, 28, 31, 32, 29, 30, 25, 26, 27, 33, 35, 34, 38, 36, 37, 39, 49, 40, 44, 45, 41, 50, 42, 43, 46, 47, 48, 51, 52, 56, 57], sum weight = 296611
+INFO:Topology:Partition 2 : [53, 61, 54, 58, 59, 60, 62, 55, 63, 68, 65, 73, 66, 64, 69, 67, 74, 70, 71, 72, 75, 80, 76, 81, 85, 82, 83, 77, 86], sum weight = 286608
+INFO:Topology:Partition 3 : [78, 79, 84, 87, 94, 90, 91, 88, 89, 92, 93, 95, 96, 106, 101, 102, 104, 107, 103, 97, 99, 105, 98, 100, 108, 114, 119, 120, 115, 117, 110, 116, 118, 112, 109, 111, 113, 121, 122, 123, 124, 125], sum weight = 295394
+INFO:Topology:Session Graph:
+[[  0   1   0   0]
+ [  0   0   1   0]
+ [  0   0   0   1]
+ [  0   0   0   0]]
+INFO:Topology:Edge cut: 12
+INFO:Topology:Memory overhead (bytes): 4403136
+
+$ cat /tmp/nnpackage/inception_v3/partition_map.json
+{"partition_map": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], "num_partitions": 4}
+$
+```
diff --git a/tools/model_partition_tool/graph_analysis.py b/tools/model_partition_tool/graph_analysis.py
new file mode 100644
index 000000000..18229b385
--- /dev/null
+++ b/tools/model_partition_tool/graph_analysis.py
@@ -0,0 +1,104 @@
+#! /usr/bin/python
+
+import sys
+import numpy as np
+import tensorflow as tf
+"""Return list of operations from TFLite model
+"""
+
+
+def get_model_ops(tflite_file):
+    intr = tf.lite.Interpreter(tflite_file)
+    intr.allocate_tensors()
+    ops = intr._get_ops_details()
+    return ops
+
+
+"""Return list of tensors from TFLite model
+"""
+
+
+def get_model_tensors(tflite_file):
+    intr = tf.lite.Interpreter(tflite_file)
+    intr.allocate_tensors()
+    tensors = intr.get_tensor_details()
+    return tensors
+
+
+"""Generate binary adjacency matrix from a tflite model. The adjacency matrix is symmetric and 
+undirected.
+"""
+
+
+def generate_adj_matrix(tflite_file):
+    intr = tf.lite.Interpreter(tflite_file)
+    intr.allocate_tensors()
+    ops = intr._get_ops_details()
+    adj_mat = np.zeros((len(ops), len(ops)), dtype=int)
+    for i in range(len(ops) - 1):
+        for j in range(i + 1, len(ops)):
+            if i != j:
+                if len(set(ops[i]['outputs']).intersection(set(ops[j]['inputs']))) > 0:
+                    adj_mat[i][j] = 1
+                    adj_mat[j][i] = 1
+                if len(set(ops[i]['inputs']).intersection(set(ops[j]['outputs']))) > 0:
+                    adj_mat[i][j] = 1
+                    adj_mat[j][i] = 1
+    return adj_mat
+
+
+"""Generate directed acyclic graph (DAG) from a tflite model.
+"""
+
+
+def generate_dag(tflite_file):
+    intr = tf.lite.Interpreter(tflite_file)
+    intr.allocate_tensors()
+    ops = intr._get_ops_details()
+    adj_mat = np.zeros((len(ops), len(ops)), dtype=int)
+    for i in range(len(ops) - 1):
+        for j in range(i + 1, len(ops)):
+            if i != j:
+                if len(set(ops[i]['outputs']).intersection(set(ops[j]['inputs']))) > 0:
+                    adj_mat[i][j] = 1
+                if len(set(ops[i]['inputs']).intersection(set(ops[j]['outputs']))) > 0:
+                    adj_mat[j][i] = 1
+    return adj_mat
+
+
+"""Generate Compressed Sparse Row format (CSR) of a adjacency matrix. Details on CSR are given at
+https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format).
+"""
+
+
+def get_csr(adj_matrix):
+    row_ptr = []
+    col_ind = []
+    assert (adj_matrix.shape[0] == adj_matrix.shape[1])
+    n = adj_matrix.shape[0]
+    cnt = 0
+    for i in range(n):
+        first = True
+        for j in range(n):
+            if adj_matrix[i][j] == 1:
+                col_ind.append(j)
+                if first == True:
+                    first = False
+                    row_ptr.append(cnt)
+                cnt += 1
+    row_ptr.append(cnt)
+    return row_ptr, col_ind
+
+
+"""Perform basic spectral clustering given a tflite model. The graph in this case is symmetric, undirected with
+unit weight per edge. Therefore, the spectral clustering is performed on a binary (0-1) adjacency matrix derived 
+from the tflite model.
+"""
+
+
+def spectral_cluster(tflite_file):
+    adj_matrix = generate_adj_matrix(tflite_file)
+    L = np.diag(np.sum(adj_matrix, axis=0)) - adj_matrix
+    e_val, e_vec = np.linalg.eig(L)
+    vecs = e_vec[:, np.argsort(e_val)]
+    return vecs.T[1]
diff --git a/tools/model_partition_tool/runtime_stats.py b/tools/model_partition_tool/runtime_stats.py
new file mode 100644
index 000000000..c8d9b83d7
--- /dev/null
+++ b/tools/model_partition_tool/runtime_stats.py
@@ -0,0 +1,30 @@
+#! /usr/bin/python
+import json
+from queue import LifoQueue
+
+
+def get_runtime_per_operation(trace_file):
+    with open(trace_file) as ifile:
+        data = json.load(ifile)
+    traceEvents = data['traceEvents']
+    time_val = {}
+    stack = LifoQueue(maxsize=1000)
+    for t in traceEvents:
+        if t == {}:
+            continue
+        if (t["name"].lower() != "graph" and "permute" not in t["name"].lower()) and \
+          ("subg" not in t["name"].lower() and "permute" not in t["name"].lower()):
+            if t["ph"] == "B":
+                stack.put((t["name"], int(t["ts"])))
+            elif t["ph"] == "E":
+                opname, st_time = stack.get()
+                assert (opname == t["name"])
+                if "$" in t["name"]:
+                    time_val[int(
+                        t["name"].split(" ")[0].lstrip('$'))] = int(t["ts"]) - st_time
+                else:
+                    time_val[int(
+                        t["name"].split(" ")[0].lstrip('@'))] = int(t["ts"]) - st_time
+
+    time_idx = [y for x, y in (sorted(time_val.items(), key=lambda item: item[0]))]
+    return time_idx
diff --git a/tools/model_partition_tool/test_partition.py b/tools/model_partition_tool/test_partition.py
new file mode 100644
index 000000000..553e54c55
--- /dev/null
+++ b/tools/model_partition_tool/test_partition.py
@@ -0,0 +1,21 @@
+#! /usr/bin/python
+
+import argparse
+import os
+import Graph
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        "test_partition.py", description="Example code to partition models")
+    parser.add_argument("modelfile", type=str, help="TFLite file with path")
+    parser.add_argument("tracefile", type=str, help="Chrome trace file with path")
+    parser.add_argument("--num_parts", type=int, default=2, help="Number of partitions")
+    parser.add_argument(
+        "--num_runs", type=int, default=10, help="Number of runs (topological orderings)")
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # Partition
+    g = Graph.GraphTopology(args.modelfile, args.tracefile)
+    g.partition_minmax_multiple(K=args.num_parts, nruns=args.num_runs)
diff --git a/tools/nnapi_quickcheck/CMakeLists.txt b/tools/nnapi_quickcheck/CMakeLists.txt
deleted file mode 100644
index c88155aa1..000000000
--- a/tools/nnapi_quickcheck/CMakeLists.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-if(NOT BUILD_NNAPI_QUICKCHECK)
-  return()
-endif(NOT BUILD_NNAPI_QUICKCHECK)
-
-file(GLOB_RECURSE NNAPI_QUICKCHECK_LIB_SOURCES "lib/*.cpp")
-file(GLOB_RECURSE NNAPI_QUICKCHECK_LIB_TESTS "lib/*.test.cpp")
-list(REMOVE_ITEM NNAPI_QUICKCHECK_LIB_SOURCES ${NNAPI_QUICKCHECK_LIB_TESTS})
-
-add_library(nnapi_quickcheck_common ${NNAPI_QUICKCHECK_LIB_SOURCES})
-target_include_directories(nnapi_quickcheck_common PUBLIC "inc")
-target_link_libraries(nnapi_quickcheck_common nnfw_lib_misc)
-target_link_libraries(nnapi_quickcheck_common nnfw_lib_tflite)
-
-add_executable(nnapi_quickcheck_lib_env_test "lib/env.test.cpp")
-target_link_libraries(nnapi_quickcheck_lib_env_test nnapi_quickcheck_common)
-
-function(add_nnapi_quickcheck NAME)
-  add_executable(nnapi_quickcheck_${NAME} "tests/${NAME}.cpp")
-  nnfw_find_package(GTest)
-  target_link_libraries(nnapi_quickcheck_${NAME} gtest gtest_main pthread)
-  target_link_libraries(nnapi_quickcheck_${NAME} nnapi_quickcheck_common)
-endfunction(add_nnapi_quickcheck)
-
-add_nnapi_quickcheck(add_1)
-add_nnapi_quickcheck(add_2)
-add_nnapi_quickcheck(add_3)
-add_nnapi_quickcheck(add_4)
-add_nnapi_quickcheck(add_5)
-add_nnapi_quickcheck(add_6)
-add_nnapi_quickcheck(add_7)
-add_nnapi_quickcheck(add_8)
-add_nnapi_quickcheck(add_9)
-add_nnapi_quickcheck(add_quan_1)
-add_nnapi_quickcheck(div_1)
-add_nnapi_quickcheck(div_2)
-add_nnapi_quickcheck(sub_1)
-add_nnapi_quickcheck(sub_2)
-add_nnapi_quickcheck(sub_3)
-add_nnapi_quickcheck(sub_4)
-add_nnapi_quickcheck(sub_5)
-add_nnapi_quickcheck(sub_6)
-add_nnapi_quickcheck(mul_1)
-add_nnapi_quickcheck(mul_2)
-add_nnapi_quickcheck(mul_quan_1)
-add_nnapi_quickcheck(relu_1)
-add_nnapi_quickcheck(relu_quan_1)
-add_nnapi_quickcheck(relu_2)
-add_nnapi_quickcheck(relu_3)
-add_nnapi_quickcheck(relu6_1)
-add_nnapi_quickcheck(relu6_quan_1)
-add_nnapi_quickcheck(relu1_1)
-add_nnapi_quickcheck(conv_1)
-add_nnapi_quickcheck(conv_quan_1)
-add_nnapi_quickcheck(dconv_1)
-add_nnapi_quickcheck(dconv_quan_1)
-add_nnapi_quickcheck(max_pool_1)
-add_nnapi_quickcheck(max_pool_quan_1)
-add_nnapi_quickcheck(avg_pool_1)
-add_nnapi_quickcheck(avg_pool_quan_1)
-add_nnapi_quickcheck(concat_1)
-add_nnapi_quickcheck(concat_quan_1)
-add_nnapi_quickcheck(reshape_1)
-add_nnapi_quickcheck(reshape_quan_1)
-add_nnapi_quickcheck(fully_connected_1)
-add_nnapi_quickcheck(fully_connected_quan_1)
-add_nnapi_quickcheck(softmax_1)
-add_nnapi_quickcheck(softmax_2)
-add_nnapi_quickcheck(softmax_quan_1)
-add_nnapi_quickcheck(resize_bilinear_1)
-add_nnapi_quickcheck(topk_v2_1)
-add_nnapi_quickcheck(cast_1)
-add_nnapi_quickcheck(cast_q_to_f_1)
-add_nnapi_quickcheck(cast_2)
-add_nnapi_quickcheck(gather_1)
-add_nnapi_quickcheck(gather_2)
-add_nnapi_quickcheck(dequantize_1)
-add_nnapi_quickcheck(tanh_1)
-add_nnapi_quickcheck(logistic_quan_1)
-add_nnapi_quickcheck(split_1)
-add_nnapi_quickcheck(split_2)
-add_nnapi_quickcheck(split_3)
-add_nnapi_quickcheck(split_4)
diff --git a/tools/nnapi_quickcheck/inc/env.h b/tools/nnapi_quickcheck/inc/env.h
deleted file mode 100644
index c2efcebc9..000000000
--- a/tools/nnapi_quickcheck/inc/env.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ENV_UTILS_H__
-#define __ENV_UTILS_H__
-
-#include <string>
-
-#include <cstdint>
-
-class IntVar
-{
-public:
-  IntVar(const std::string &name, int32_t value);
-
-public:
-  int32_t operator()(void) const { return _value; }
-
-private:
-  int32_t _value;
-};
-
-class FloatVar
-{
-public:
-  FloatVar(const std::string &name, float value);
-
-public:
-  float operator()(void) const { return _value; }
-
-private:
-  float _value;
-};
-
-class StrVar
-{
-public:
-  StrVar(const std::string &name, const std::string &value);
-
-public:
-  const std::string &operator()(void) const { return _value; }
-
-private:
-  std::string _value;
-};
-
-#endif // __ENV_UTILS_H__
diff --git a/tools/nnapi_quickcheck/inc/memory.h b/tools/nnapi_quickcheck/inc/memory.h
deleted file mode 100644
index 3f1bca8a4..000000000
--- a/tools/nnapi_quickcheck/inc/memory.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __MEMORY_H__
-#define __MEMORY_H__
-
-#include <cstdlib>
-
-template <typename T> inline T *make_alloc(void)
-{
-  auto ptr = malloc(sizeof(T));
-
-  if (ptr == nullptr)
-  {
-    throw std::bad_alloc{};
-  }
-
-  return reinterpret_cast<T *>(ptr);
-}
-
-#endif // __MEMORY_H__
diff --git a/tools/nnapi_quickcheck/lib/env.cpp b/tools/nnapi_quickcheck/lib/env.cpp
deleted file mode 100644
index 005e876c2..000000000
--- a/tools/nnapi_quickcheck/lib/env.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "env.h"
-
-#include "misc/environment.h"
-
-//
-// Integer variable
-//
-IntVar::IntVar(const std::string &name, int32_t value) : _value{value}
-{
-  nnfw::misc::env::IntAccessor{name}.access(_value);
-}
-
-//
-// Float variable
-//
-FloatVar::FloatVar(const std::string &name, float value) : _value{value}
-{
-  nnfw::misc::env::FloatAccessor{name}.access(_value);
-}
-
-//
-// String variable
-//
-#include <cstdlib>
-
-StrVar::StrVar(const std::string &name, const std::string &value) : _value{value}
-{
-  auto env = std::getenv(name.c_str());
-
-  if (env)
-  {
-    _value = std::string{env};
-  }
-}
diff --git a/tools/nnapi_quickcheck/lib/env.test.cpp b/tools/nnapi_quickcheck/lib/env.test.cpp
deleted file mode 100644
index dd9ac8be5..000000000
--- a/tools/nnapi_quickcheck/lib/env.test.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "env.h"
-
-#include <string>
-
-#include <cstdlib>
-#include <cassert>
-
-inline void ensure(int err) { assert(err == 0); }
-
-int main(int argc, char **argv)
-{
-  const std::string key{"TEST"};
-  const int num{3};
-
-  const auto str = std::to_string(num);
-
-  ensure(unsetenv(key.c_str()));
-  ensure(setenv(key.c_str(), str.c_str(), 0));
-
-  int value = 0;
-
-  assert(value != num);
-
-  IntVar buffer(key, value);
-
-  assert(buffer() == num);
-
-  return 0;
-}
diff --git a/tools/nnapi_quickcheck/tests/add_1.cpp b/tools/nnapi_quickcheck/tests/add_1.cpp
deleted file mode 100644
index f5363f918..000000000
--- a/tools/nnapi_quickcheck/tests/add_1.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_1.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
-                                        quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Left from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_1.lst b/tools/nnapi_quickcheck/tests/add_1.lst
deleted file mode 100644
index fa17caebb..000000000
--- a/tools/nnapi_quickcheck/tests/add_1.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/add_2.cpp b/tools/nnapi_quickcheck/tests/add_2.cpp
deleted file mode 100644
index fe4d12f5d..000000000
--- a/tools/nnapi_quickcheck/tests/add_2.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_2, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_2.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure left data
-  const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
-  float left_data[left_size] = {
-      0.0f,
-  };
-
-  // Fill left data with random data
-  {
-    std::normal_distribution<float> left_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < left_size; ++off)
-    {
-      left_data[off++] = left_dist(random);
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                       {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(left_data),
-                                       left_size * sizeof(float));
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
-                                        quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read LHS from Tensor #1
-    //  - Read RHS from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_2.lst b/tools/nnapi_quickcheck/tests/add_2.lst
deleted file mode 100644
index fa17caebb..000000000
--- a/tools/nnapi_quickcheck/tests/add_2.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/add_3.cpp b/tools/nnapi_quickcheck/tests/add_3.cpp
deleted file mode 100644
index ce409ccbc..000000000
--- a/tools/nnapi_quickcheck/tests/add_3.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/TensorShapeUtils.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_3, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-#define STR_VALUE(NAME, VALUE) StrVar NAME##_Value(#NAME, VALUE);
-#include "add_3.lst"
-#undef STR_VALUE
-
-  const auto LHS_SHAPE = nnfw::misc::tensor::Shape::from(LHS_SHAPE_Value());
-  const auto RHS_SHAPE = nnfw::misc::tensor::Shape::from(RHS_SHAPE_Value());
-  const auto OUT_SHAPE = nnfw::tflite::broadcast(LHS_SHAPE, RHS_SHAPE);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LHS_SHAPE);
-  PRINT_VALUE(RHS_SHAPE);
-  PRINT_VALUE(OUT_SHAPE);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    using nnfw::tflite::as_dims;
-
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        as_dims(OUT_SHAPE), quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        as_dims(LHS_SHAPE), quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        as_dims(RHS_SHAPE), quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Left from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = 0;
-  param.tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(param.verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(param.tolerance);
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_3.lst b/tools/nnapi_quickcheck/tests/add_3.lst
deleted file mode 100644
index 1981db4e1..000000000
--- a/tools/nnapi_quickcheck/tests/add_3.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef STR_VALUE
-#error "STR_VALUE should be defined"
-#endif // STR_VALUE
-
-STR_VALUE(LHS_SHAPE, "1,3,16,16")
-STR_VALUE(RHS_SHAPE, "1,3,16,16")
diff --git a/tools/nnapi_quickcheck/tests/add_4.cpp b/tools/nnapi_quickcheck/tests/add_4.cpp
deleted file mode 100644
index b1231dd3f..000000000
--- a/tools/nnapi_quickcheck/tests/add_4.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_4, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_4.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
-                                        quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Left from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_4.lst b/tools/nnapi_quickcheck/tests/add_4.lst
deleted file mode 100644
index 6b289007f..000000000
--- a/tools/nnapi_quickcheck/tests/add_4.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 2)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 8)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 2)
-INT_VALUE(RIGHT_H, 1)
-INT_VALUE(RIGHT_W, 8)
diff --git a/tools/nnapi_quickcheck/tests/add_5.cpp b/tools/nnapi_quickcheck/tests/add_5.cpp
deleted file mode 100644
index f900153c1..000000000
--- a/tools/nnapi_quickcheck/tests/add_5.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_5, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_5.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT = RIGHT_Value();
-
-  const int32_t OFM_N = LEFT_N;
-  const int32_t OFM_C = LEFT_C;
-  const int32_t OFM_H = LEFT_H;
-  const int32_t OFM_W = LEFT_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT} /* dims */, quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Left from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_5.lst b/tools/nnapi_quickcheck/tests/add_5.lst
deleted file mode 100644
index eb316b6ad..000000000
--- a/tools/nnapi_quickcheck/tests/add_5.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT, 1)
diff --git a/tools/nnapi_quickcheck/tests/add_6.cpp b/tools/nnapi_quickcheck/tests/add_6.cpp
deleted file mode 100644
index 83b87ef32..000000000
--- a/tools/nnapi_quickcheck/tests/add_6.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_6, simple_test)
-{
-  int verbose = 1;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_6.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT = RIGHT_Value();
-
-  const int32_t OFM_H = LEFT_H;
-  const int32_t OFM_W = LEFT_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_H, LEFT_W} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT} /* dims */, quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Left from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_6.lst b/tools/nnapi_quickcheck/tests/add_6.lst
deleted file mode 100644
index 75db4c8d0..000000000
--- a/tools/nnapi_quickcheck/tests/add_6.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 2)
-
-INT_VALUE(RIGHT, 1)
diff --git a/tools/nnapi_quickcheck/tests/add_7.cpp b/tools/nnapi_quickcheck/tests/add_7.cpp
deleted file mode 100644
index 732320f4a..000000000
--- a/tools/nnapi_quickcheck/tests/add_7.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_7, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_7.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_C = LEFT_C;
-  const int32_t OFM_H = LEFT_H;
-  const int32_t OFM_W = LEFT_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Left from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_7.lst b/tools/nnapi_quickcheck/tests/add_7.lst
deleted file mode 100644
index 1dc8b6147..000000000
--- a/tools/nnapi_quickcheck/tests/add_7.lst
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 8)
-INT_VALUE(RIGHT_W, 1)
diff --git a/tools/nnapi_quickcheck/tests/add_8.cpp b/tools/nnapi_quickcheck/tests/add_8.cpp
deleted file mode 100644
index d89e977d5..000000000
--- a/tools/nnapi_quickcheck/tests/add_8.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_8, simple_test)
-{
-  int verbose = 1;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_8.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure left data
-  const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
-  const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W;
-  float left_data[left_size] = {
-      0.0f,
-  };
-  float right_data[right_size] = {
-      0.0f,
-  };
-
-  // Fill left data with random data
-  {
-    std::normal_distribution<float> left_dist(-1.0f, +1.0f);
-    int value = 10;
-    for (uint32_t off = 0; off < left_size; ++off)
-    {
-      left_data[off] = value;
-      std::cout << left_data[off] << std::endl;
-    }
-    value = 1;
-    for (uint32_t off = 0; off < right_size; ++off)
-    {
-      right_data[off] = value++;
-      std::cout << right_data[off] << std::endl;
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                       {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(left_data),
-                                       left_size * sizeof(float));
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(
-        2, kTfLiteFloat32 /* type */, "right" /* name */, {RIGHT_C} /* dims */, quantization,
-        //{RIGHT_W, RIGHT_C} /* dims */, quantization,
-        reinterpret_cast<const char *>(right_data), right_size * sizeof(float));
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read LHS from Tensor #1
-    //  - Read RHS from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_8.lst b/tools/nnapi_quickcheck/tests/add_8.lst
deleted file mode 100644
index 3119c7f65..000000000
--- a/tools/nnapi_quickcheck/tests/add_8.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_H, 3)
-INT_VALUE(LEFT_W, 2)
-INT_VALUE(LEFT_C, 4)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_H, 1)
-INT_VALUE(RIGHT_W, 1)
-INT_VALUE(RIGHT_C, 4)
diff --git a/tools/nnapi_quickcheck/tests/add_9.cpp b/tools/nnapi_quickcheck/tests/add_9.cpp
deleted file mode 100644
index fd4e1f9c1..000000000
--- a/tools/nnapi_quickcheck/tests/add_9.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_9, simple_test)
-{
-  int verbose = 1;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_9.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_VALUE(LEFT_C);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-  PRINT_VALUE(OFM_C);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure left data
-  const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
-  const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W;
-  float left_data[left_size] = {
-      0.0f,
-  };
-  float right_data[right_size] = {
-      0.0f,
-  };
-
-  // Fill left data with random data
-  {
-    std::normal_distribution<float> left_dist(-1.0f, +1.0f);
-    float value = 10.0f;
-    for (uint32_t off = 0; off < left_size; ++off)
-    {
-      left_data[off] = value;
-    }
-    value = 1.0f;
-    for (uint32_t off = 0; off < right_size; ++off)
-    {
-      right_data[off] = value++;
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(
-        1, kTfLiteFloat32 /* type */, "left" /* name */, {LEFT_W, LEFT_C} /* dims */, quantization,
-        reinterpret_cast<const char *>(left_data), left_size * sizeof(float));
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                       {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
-                                       quantization, reinterpret_cast<const char *>(right_data),
-                                       right_size * sizeof(float));
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read LHS from Tensor #1
-    //  - Read RHS from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_9.lst b/tools/nnapi_quickcheck/tests/add_9.lst
deleted file mode 100644
index 52a1f1acc..000000000
--- a/tools/nnapi_quickcheck/tests/add_9.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_H, 1)
-INT_VALUE(LEFT_W, 3)
-INT_VALUE(LEFT_C, 4)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_H, 2)
-INT_VALUE(RIGHT_W, 3)
-INT_VALUE(RIGHT_C, 4)
diff --git a/tools/nnapi_quickcheck/tests/add_quan_1.cpp b/tools/nnapi_quickcheck/tests/add_quan_1.cpp
deleted file mode 100644
index e3d85122f..000000000
--- a/tools/nnapi_quickcheck/tests/add_quan_1.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    quantization.scale = 2.0f;
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    quantization.scale = 1.0f;
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "left" /* name */,
-                                        {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteUInt8 /* type */, "right" /* name */,
-                                        {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
-                                        quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Left from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/add_quan_1.lst b/tools/nnapi_quickcheck/tests/add_quan_1.lst
deleted file mode 100644
index fa17caebb..000000000
--- a/tools/nnapi_quickcheck/tests/add_quan_1.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/avg_pool_1.cpp b/tools/nnapi_quickcheck/tests/avg_pool_1.cpp
deleted file mode 100644
index 052c68915..000000000
--- a/tools/nnapi_quickcheck/tests/avg_pool_1.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_avg_pool_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "avg_pool_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = KER_W_Value();
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = (IFM_H - KER_H) + 1;
-  const int32_t OFM_W = (IFM_W - KER_W) + 1;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add Max Pooling Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLitePoolParams>();
-
-    param->padding = kTfLitePaddingValid;
-    param->stride_width = 1;
-    param->stride_height = 1;
-    param->filter_width = KER_W;
-    param->filter_height = KER_H;
-    param->activation = kTfLiteActNone;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_AVERAGE_POOL_2D, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/avg_pool_1.lst b/tools/nnapi_quickcheck/tests/avg_pool_1.lst
deleted file mode 100644
index 02d86d470..000000000
--- a/tools/nnapi_quickcheck/tests/avg_pool_1.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
diff --git a/tools/nnapi_quickcheck/tests/avg_pool_quan_1.cpp b/tools/nnapi_quickcheck/tests/avg_pool_quan_1.cpp
deleted file mode 100644
index 86f35f76d..000000000
--- a/tools/nnapi_quickcheck/tests/avg_pool_quan_1.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_avg_pool_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "avg_pool_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = KER_W_Value();
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = (IFM_H - KER_H) + 1;
-  const int32_t OFM_W = (IFM_W - KER_W) + 1;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-    quantization.scale = 1.0f;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add Max Pooling Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLitePoolParams>();
-
-    param->padding = kTfLitePaddingValid;
-    param->stride_width = 1;
-    param->stride_height = 1;
-    param->filter_width = KER_W;
-    param->filter_height = KER_H;
-    param->activation = kTfLiteActNone;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_AVERAGE_POOL_2D, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/avg_pool_quan_1.lst b/tools/nnapi_quickcheck/tests/avg_pool_quan_1.lst
deleted file mode 100644
index 02d86d470..000000000
--- a/tools/nnapi_quickcheck/tests/avg_pool_quan_1.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
diff --git a/tools/nnapi_quickcheck/tests/cast_1.cpp b/tools/nnapi_quickcheck/tests/cast_1.cpp
deleted file mode 100644
index 788cd575f..000000000
--- a/tools/nnapi_quickcheck/tests/cast_1.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_cast_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "cast_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    TfLiteQuantizationParams quantization;
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add Cast Node
-    // Run CAST and store the result into Tensor #0
-    //  - Read input from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1));
-
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/cast_1.lst b/tools/nnapi_quickcheck/tests/cast_1.lst
deleted file mode 100644
index a0077cb95..000000000
--- a/tools/nnapi_quickcheck/tests/cast_1.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
diff --git a/tools/nnapi_quickcheck/tests/cast_2.cpp b/tools/nnapi_quickcheck/tests/cast_2.cpp
deleted file mode 100644
index a9e99ee2b..000000000
--- a/tools/nnapi_quickcheck/tests/cast_2.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_cast_2, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "cast_2.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input
-    interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add Cast Node
-    // Run CAST and store the result into Tensor #0
-    //  - Read input from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1));
-
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/cast_2.lst b/tools/nnapi_quickcheck/tests/cast_2.lst
deleted file mode 100644
index a0077cb95..000000000
--- a/tools/nnapi_quickcheck/tests/cast_2.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
diff --git a/tools/nnapi_quickcheck/tests/cast_q_to_f_1.cpp b/tools/nnapi_quickcheck/tests/cast_q_to_f_1.cpp
deleted file mode 100644
index 4af6c772f..000000000
--- a/tools/nnapi_quickcheck/tests/cast_q_to_f_1.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_cast_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "cast_q_to_f_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    TfLiteQuantizationParams quantization;
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add Cast Node
-    // Run CAST and store the result into Tensor #0
-    //  - Read input from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1));
-
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/cast_q_to_f_1.lst b/tools/nnapi_quickcheck/tests/cast_q_to_f_1.lst
deleted file mode 100644
index a0077cb95..000000000
--- a/tools/nnapi_quickcheck/tests/cast_q_to_f_1.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
diff --git a/tools/nnapi_quickcheck/tests/concat_1.cpp b/tools/nnapi_quickcheck/tests/concat_1.cpp
deleted file mode 100644
index d2cb1aada..000000000
--- a/tools/nnapi_quickcheck/tests/concat_1.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_concat_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "concat_1.lst"
-#undef INT_VALUE
-
-  // TODO Allow users to set concat axis!
-  const int32_t CONCAT_COUNT = CONCAT_COUNT_Value();
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  int32_t OFM_C = 0;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(CONCAT_COUNT);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Randomize IFM depth
-  std::default_random_engine generator(SEED);
-  std::uniform_int_distribution<int> distribution(1, 8);
-
-  std::vector<int32_t> depths;
-
-  for (int32_t n = 0; n < CONCAT_COUNT; ++n)
-  {
-    const auto depth = distribution(generator);
-
-    OFM_C += depth;
-    depths.emplace_back(depth);
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(depths.size() + 1);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM(s)
-    std::vector<int> ifm_indexes;
-
-    for (uint32_t n = 0; n < depths.size(); ++n)
-    {
-      const auto ifm_index = 1 + n;
-      const auto IFM_C = depths.at(n);
-
-      interp.SetTensorParametersReadWrite(ifm_index, kTfLiteFloat32 /* type */, "input" /* name */,
-                                          {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-      ifm_indexes.emplace_back(ifm_index);
-    }
-
-    // Add Concat Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteConcatenationParams>();
-
-    param->activation = kTfLiteActNone;
-    param->axis = 3;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters(ifm_indexes, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_CONCATENATION, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs(ifm_indexes);
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/concat_1.lst b/tools/nnapi_quickcheck/tests/concat_1.lst
deleted file mode 100644
index db70d4c8b..000000000
--- a/tools/nnapi_quickcheck/tests/concat_1.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(CONCAT_COUNT, 3)
-
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
diff --git a/tools/nnapi_quickcheck/tests/concat_quan_1.cpp b/tools/nnapi_quickcheck/tests/concat_quan_1.cpp
deleted file mode 100644
index f861ac857..000000000
--- a/tools/nnapi_quickcheck/tests/concat_quan_1.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_concat_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "concat_quan_1.lst"
-#undef INT_VALUE
-
-  // TODO Allow users to set concat axis!
-  const int32_t CONCAT_COUNT = CONCAT_COUNT_Value();
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  int32_t OFM_C = 0;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(CONCAT_COUNT);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Randomize IFM depth
-  std::default_random_engine generator(SEED);
-  std::uniform_int_distribution<int> distribution(1, 8);
-
-  std::vector<int32_t> depths;
-
-  for (int32_t n = 0; n < CONCAT_COUNT; ++n)
-  {
-    const auto depth = distribution(generator);
-
-    OFM_C += depth;
-    depths.emplace_back(depth);
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-    quantization.scale = 1.0f;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(depths.size() + 1);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM(s)
-    std::vector<int> ifm_indexes;
-
-    for (uint32_t n = 0; n < depths.size(); ++n)
-    {
-      const auto ifm_index = 1 + n;
-      const auto IFM_C = depths.at(n);
-
-      interp.SetTensorParametersReadWrite(ifm_index, kTfLiteUInt8 /* type */, "input" /* name */,
-                                          {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-      ifm_indexes.emplace_back(ifm_index);
-    }
-
-    // Add Concat Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteConcatenationParams>();
-
-    param->activation = kTfLiteActNone;
-    param->axis = 3;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters(ifm_indexes, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_CONCATENATION, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs(ifm_indexes);
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/concat_quan_1.lst b/tools/nnapi_quickcheck/tests/concat_quan_1.lst
deleted file mode 100644
index db70d4c8b..000000000
--- a/tools/nnapi_quickcheck/tests/concat_quan_1.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(CONCAT_COUNT, 3)
-
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
diff --git a/tools/nnapi_quickcheck/tests/conv_1.cpp b/tools/nnapi_quickcheck/tests/conv_1.cpp
deleted file mode 100644
index b5b145ccb..000000000
--- a/tools/nnapi_quickcheck/tests/conv_1.cpp
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_conv_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "conv_1.lst"
-#undef INT_VALUE
-
-  const int32_t STRIDE_H = STRIDE_H_Value();
-  const int32_t STRIDE_W = STRIDE_W_Value();
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_N = KER_N_Value();
-  const int32_t KER_C = IFM_C_Value();
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = KER_W_Value();
-
-  const int32_t OFM_C = KER_N;
-  const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1;
-  const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(STRIDE_H);
-  PRINT_VALUE(STRIDE_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_N);
-  PRINT_VALUE(KER_C);
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure Kernel Data
-  const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W;
-  float kernel_data[kernel_size] = {
-      0.0f,
-  };
-
-  // Fill kernel data with random data
-  {
-    std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < kernel_size; ++off)
-    {
-      kernel_data[off++] = kernel_dist(random);
-    }
-  }
-
-  // Configure Bias Data
-  const auto bias_size = KER_N;
-  float bias_data[bias_size] = {
-      0.0f,
-  };
-
-  // Fill bias data with random data
-  {
-    std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < bias_size; ++off)
-    {
-      bias_data[off] = bias_dist(random);
-    }
-  }
-
-  // Assumption on this example
-  assert(IFM_C == KER_C);
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(5);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // NOTE kernel_data & bias_data should live longer than interpreter!
-    interp.SetTensorParametersReadOnly(
-        2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
-        quantization, reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(float));
-
-    interp.SetTensorParametersReadOnly(
-        3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
-        reinterpret_cast<const char *>(bias_data), bias_size * sizeof(float));
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteConvParams>();
-
-    param->padding = kTfLitePaddingValid;
-    param->stride_width = STRIDE_W;
-    param->stride_height = STRIDE_H;
-    param->activation = kTfLiteActRelu;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    //  - Read Filter from Tensor #2,
-    //  - Read Bias from Tensor #3
-    interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/conv_1.lst b/tools/nnapi_quickcheck/tests/conv_1.lst
deleted file mode 100644
index c01fc90ee..000000000
--- a/tools/nnapi_quickcheck/tests/conv_1.lst
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_N, 1)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(STRIDE_H, 1)
-INT_VALUE(STRIDE_W, 1)
diff --git a/tools/nnapi_quickcheck/tests/conv_quan_1.cpp b/tools/nnapi_quickcheck/tests/conv_quan_1.cpp
deleted file mode 100644
index 28245477b..000000000
--- a/tools/nnapi_quickcheck/tests/conv_quan_1.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_conv_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "conv_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t STRIDE_H = STRIDE_H_Value();
-  const int32_t STRIDE_W = STRIDE_W_Value();
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_N = KER_N_Value();
-  const int32_t KER_C = IFM_C_Value();
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = KER_W_Value();
-
-  const int32_t OFM_C = KER_N;
-  const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1;
-  const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(STRIDE_H);
-  PRINT_VALUE(STRIDE_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_N);
-  PRINT_VALUE(KER_C);
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure Kernel Data
-  const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W;
-  float kernel_data[kernel_size] = {
-      0.0f,
-  };
-
-  // Fill kernel data with random data
-  {
-    std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < kernel_size; ++off)
-    {
-      kernel_data[off++] = kernel_dist(random);
-    }
-  }
-
-  // Configure Bias Data
-  const auto bias_size = KER_N;
-  int32_t bias_data[bias_size] = {
-      0,
-  };
-
-  // Fill bias data with random data
-  {
-    std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < bias_size; ++off)
-    {
-      bias_data[off] = static_cast<int32_t>(bias_dist(random));
-    }
-  }
-
-  // Assumption on this example
-  assert(IFM_C == KER_C);
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    TfLiteQuantizationParams quantization;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(5);
-
-    // Configure OFM
-    float max_scale = (KER_N, KER_C * KER_H * KER_W) *
-                      std::numeric_limits<uint8_t>::max(); // * IFM_scale(1.0f) * kernel_scale(1.0f)
-    quantization.scale = max_scale;
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    quantization.scale = 1.0f;
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // NOTE kernel_data & bias_data should live longer than interpreter!
-    interp.SetTensorParametersReadOnly(
-        2, kTfLiteUInt8 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
-        quantization, reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(uint8_t));
-
-    quantization.scale *= quantization.scale;
-    interp.SetTensorParametersReadOnly(
-        3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
-        reinterpret_cast<const char *>(bias_data), bias_size * sizeof(int32_t));
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteConvParams>();
-
-    param->padding = kTfLitePaddingValid;
-    param->stride_width = STRIDE_W;
-    param->stride_height = STRIDE_H;
-    param->activation = kTfLiteActRelu;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    //  - Read Filter from Tensor #2,
-    //  - Read Bias from Tensor #3
-    interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/conv_quan_1.lst b/tools/nnapi_quickcheck/tests/conv_quan_1.lst
deleted file mode 100644
index c01fc90ee..000000000
--- a/tools/nnapi_quickcheck/tests/conv_quan_1.lst
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_N, 1)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(STRIDE_H, 1)
-INT_VALUE(STRIDE_W, 1)
diff --git a/tools/nnapi_quickcheck/tests/dconv_1.cpp b/tools/nnapi_quickcheck/tests/dconv_1.cpp
deleted file mode 100644
index 36ec7a943..000000000
--- a/tools/nnapi_quickcheck/tests/dconv_1.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_dconv_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "dconv_1.lst"
-#undef INT_VALUE
-
-  const int32_t STRIDE_H = STRIDE_H_Value();
-  const int32_t STRIDE_W = STRIDE_W_Value();
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_C = KER_C_Value();
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = KER_W_Value();
-
-  const int32_t OFM_C = KER_C;
-  const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1;
-  const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1;
-
-  const int32_t MULTIPLIER = MULTIPLIER_Value();
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_C);
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(STRIDE_H);
-  PRINT_VALUE(STRIDE_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(MULTIPLIER);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  assert(MULTIPLIER * IFM_C == KER_C);
-
-  // Configure Kernel Data
-  const uint32_t kernel_size = KER_C * KER_H * KER_W;
-  float kernel_data[kernel_size] = {
-      0.0f,
-  };
-
-  // Fill kernel data with random data
-  {
-    std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < kernel_size; ++off)
-    {
-      kernel_data[off] = kernel_dist(random);
-    }
-  }
-
-  // Configure Bias Data
-  const auto bias_size = KER_C;
-  float bias_data[bias_size] = {
-      0.0f,
-  };
-
-  // Fill bias data with random data
-  {
-    std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < bias_size; ++off)
-    {
-      bias_data[off] = bias_dist(random);
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(4);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // NOTE kernel_data & bias_data should live longer than interpreter!
-    interp.SetTensorParametersReadOnly(
-        2, kTfLiteFloat32 /* type */, "filter" /* name */, {1, KER_H, KER_W, KER_C} /* dims */,
-        quantization, reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(float));
-
-    interp.SetTensorParametersReadOnly(
-        3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
-        reinterpret_cast<const char *>(bias_data), bias_size * sizeof(float));
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteDepthwiseConvParams>();
-
-    param->padding = kTfLitePaddingValid;
-    param->stride_width = STRIDE_W;
-    param->stride_height = STRIDE_H;
-    param->depth_multiplier = MULTIPLIER;
-    param->activation = kTfLiteActRelu;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    //  - Read Filter from Tensor #2,
-    //  - Read Bias from Tensor #3
-    interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_DEPTHWISE_CONV_2D, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/dconv_1.lst b/tools/nnapi_quickcheck/tests/dconv_1.lst
deleted file mode 100644
index da851ae2d..000000000
--- a/tools/nnapi_quickcheck/tests/dconv_1.lst
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_C, 2)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(MULTIPLIER, 1)
-
-INT_VALUE(STRIDE_H, 1)
-INT_VALUE(STRIDE_W, 1)
diff --git a/tools/nnapi_quickcheck/tests/dconv_quan_1.cpp b/tools/nnapi_quickcheck/tests/dconv_quan_1.cpp
deleted file mode 100644
index 8305ad140..000000000
--- a/tools/nnapi_quickcheck/tests/dconv_quan_1.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_dconv_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "dconv_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t STRIDE_H = STRIDE_H_Value();
-  const int32_t STRIDE_W = STRIDE_W_Value();
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_C = KER_C_Value();
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = KER_W_Value();
-
-  const int32_t OFM_C = KER_C;
-  const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1;
-  const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1;
-
-  const int32_t MULTIPLIER = MULTIPLIER_Value();
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_C);
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(STRIDE_H);
-  PRINT_VALUE(STRIDE_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(MULTIPLIER);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  assert(MULTIPLIER * IFM_C == KER_C);
-
-  // Configure Kernel Data
-  const uint32_t kernel_size = KER_C * KER_H * KER_W;
-  float kernel_data[kernel_size] = {
-      0.0f,
-  };
-
-  // Fill kernel data with random data
-  {
-    std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < kernel_size; ++off)
-    {
-      kernel_data[off] = kernel_dist(random);
-    }
-  }
-
-  // Configure Bias Data
-  const auto bias_size = KER_C;
-  int32_t bias_data[bias_size] = {
-      0,
-  };
-
-  // Fill bias data with random data
-  {
-    std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < bias_size; ++off)
-    {
-      bias_data[off] = static_cast<int32_t>(bias_dist(random));
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    TfLiteQuantizationParams quantization;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(4);
-
-    // Configure OFM
-    float max_scale = (1 * KER_C * KER_H * KER_W) *
-                      std::numeric_limits<uint8_t>::max(); // * IFM_scale(1.0f) * kernel_scale(1.0f)
-    quantization.scale = max_scale;
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    quantization.scale = 1.0f;
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // NOTE kernel_data & bias_data should live longer than interpreter!
-    interp.SetTensorParametersReadOnly(
-        2, kTfLiteUInt8 /* type */, "filter" /* name */, {1, KER_H, KER_W, KER_C} /* dims */,
-        quantization, reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(uint8_t));
-
-    quantization.scale *= quantization.scale;
-    interp.SetTensorParametersReadOnly(
-        3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
-        reinterpret_cast<const char *>(bias_data), bias_size * sizeof(int32_t));
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteDepthwiseConvParams>();
-
-    param->padding = kTfLitePaddingValid;
-    param->stride_width = STRIDE_W;
-    param->stride_height = STRIDE_H;
-    param->depth_multiplier = MULTIPLIER;
-    param->activation = kTfLiteActRelu;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    //  - Read Filter from Tensor #2,
-    //  - Read Bias from Tensor #3
-    interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_DEPTHWISE_CONV_2D, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/dconv_quan_1.lst b/tools/nnapi_quickcheck/tests/dconv_quan_1.lst
deleted file mode 100644
index da851ae2d..000000000
--- a/tools/nnapi_quickcheck/tests/dconv_quan_1.lst
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_C, 2)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(MULTIPLIER, 1)
-
-INT_VALUE(STRIDE_H, 1)
-INT_VALUE(STRIDE_W, 1)
diff --git a/tools/nnapi_quickcheck/tests/dequantize_1.cpp b/tools/nnapi_quickcheck/tests/dequantize_1.cpp
deleted file mode 100644
index e725fa220..000000000
--- a/tools/nnapi_quickcheck/tests/dequantize_1.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_dequantize_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "dequantize_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    TfLiteQuantizationParams quantization;
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add DEQUANTIZE Node
-    // Run DEQUANTIZE and store the result into Tensor #0
-    //  - Read input from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_DEQUANTIZE, 1));
-
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/dequantize_1.lst b/tools/nnapi_quickcheck/tests/dequantize_1.lst
deleted file mode 100644
index a0077cb95..000000000
--- a/tools/nnapi_quickcheck/tests/dequantize_1.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
diff --git a/tools/nnapi_quickcheck/tests/div_1.cpp b/tools/nnapi_quickcheck/tests/div_1.cpp
deleted file mode 100644
index 26dfbbe53..000000000
--- a/tools/nnapi_quickcheck/tests/div_1.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_div_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "div_1.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
-                                        quantization);
-
-    // Add Division Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Div and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_DIV, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/div_1.lst b/tools/nnapi_quickcheck/tests/div_1.lst
deleted file mode 100644
index fa17caebb..000000000
--- a/tools/nnapi_quickcheck/tests/div_1.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/div_2.cpp b/tools/nnapi_quickcheck/tests/div_2.cpp
deleted file mode 100644
index df4efa4ff..000000000
--- a/tools/nnapi_quickcheck/tests/div_2.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_div_2, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "div_2.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT = RIGHT_Value();
-
-  const int32_t OFM_N = LEFT_N;
-  const int32_t OFM_C = LEFT_C;
-  const int32_t OFM_H = LEFT_H;
-  const int32_t OFM_W = LEFT_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT} /* dims */, quantization);
-
-    // Add Division Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Div and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_DIV, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/div_2.lst b/tools/nnapi_quickcheck/tests/div_2.lst
deleted file mode 100644
index cd36ac199..000000000
--- a/tools/nnapi_quickcheck/tests/div_2.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT, 1)
diff --git a/tools/nnapi_quickcheck/tests/fully_connected_1.cpp b/tools/nnapi_quickcheck/tests/fully_connected_1.cpp
deleted file mode 100644
index 43cd0a470..000000000
--- a/tools/nnapi_quickcheck/tests/fully_connected_1.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-template <typename T> T *make_malloc(void) { return reinterpret_cast<T *>(malloc(sizeof(T))); }
-
-TEST(NNAPI_Quickcheck_fully_connected_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "conv_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_H = KER_N_Value();
-  const int32_t KER_W = IFM_C_Value() * IFM_H_Value() * IFM_W_Value();
-
-  const int32_t OUT_LEN = KER_H;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OUT_LEN);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure Kernel Data
-  const uint32_t kernel_size = KER_H * KER_W;
-  float kernel_data[kernel_size] = {
-      0.0f,
-  };
-
-  // Fill kernel data with random data
-  {
-    std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < kernel_size; ++off)
-    {
-      kernel_data[off++] = kernel_dist(random);
-    }
-  }
-
-  // Configure Bias Data
-  const auto bias_size = KER_H;
-  float bias_data[bias_size] = {
-      0.0f,
-  };
-
-  // Fill bias data with random data
-  {
-    std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < bias_size; ++off)
-    {
-      bias_data[off] = bias_dist(random);
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(4);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, KER_H} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // NOTE kernel_data & bias_data should live longer than interpreter!
-    interp.SetTensorParametersReadOnly(
-        2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_H, KER_W} /* dims */, quantization,
-        reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(float));
-
-    interp.SetTensorParametersReadOnly(
-        3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
-        reinterpret_cast<const char *>(bias_data), bias_size * sizeof(float));
-
-    // Add Fully Connected Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_malloc<TfLiteFullyConnectedParams>();
-
-    param->activation = kTfLiteActRelu;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    //  - Read Filter from Tensor #2,
-    //  - Read Bias from Tensor #3
-    interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_FULLY_CONNECTED, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/fully_connected_1.lst b/tools/nnapi_quickcheck/tests/fully_connected_1.lst
deleted file mode 100644
index 22acb9f7f..000000000
--- a/tools/nnapi_quickcheck/tests/fully_connected_1.lst
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_H, 1)
diff --git a/tools/nnapi_quickcheck/tests/fully_connected_quan_1.cpp b/tools/nnapi_quickcheck/tests/fully_connected_quan_1.cpp
deleted file mode 100644
index 2c6883546..000000000
--- a/tools/nnapi_quickcheck/tests/fully_connected_quan_1.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-template <typename T> T *make_malloc(void) { return reinterpret_cast<T *>(malloc(sizeof(T))); }
-
-TEST(NNAPI_Quickcheck_fully_connected_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "fully_connected_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = IFM_C_Value() * IFM_H_Value() * IFM_W_Value();
-
-  const int32_t OUT_LEN = KER_H;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OUT_LEN);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure Kernel Data
-  const uint32_t kernel_size = KER_H * KER_W;
-  float kernel_data[kernel_size] = {
-      0.0f,
-  };
-
-  // Fill kernel data with random data
-  {
-    std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < kernel_size; ++off)
-    {
-      kernel_data[off++] = kernel_dist(random);
-    }
-  }
-
-  // Configure Bias Data
-  const auto bias_size = KER_H;
-  int32_t bias_data[bias_size] = {
-      0,
-  };
-
-  // Fill bias data with random data
-  {
-    std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
-    for (uint32_t off = 0; off < bias_size; ++off)
-    {
-      bias_data[off] = static_cast<int32_t>(bias_dist(random));
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-    quantization.scale = FLOAT_NEAREST_TO_1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(4);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {1 /*N*/, KER_H} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // NOTE kernel_data & bias_data should live longer than interpreter!
-    interp.SetTensorParametersReadOnly(
-        2, kTfLiteUInt8 /* type */, "filter" /* name */, {KER_H, KER_W} /* dims */, quantization,
-        reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(uint8_t));
-
-    interp.SetTensorParametersReadOnly(
-        3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
-        reinterpret_cast<const char *>(bias_data), bias_size * sizeof(int32_t));
-
-    // Add Fully Connected Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_malloc<TfLiteFullyConnectedParams>();
-
-    param->activation = kTfLiteActRelu;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    //  - Read Filter from Tensor #2,
-    //  - Read Bias from Tensor #3
-    interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_FULLY_CONNECTED, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/fully_connected_quan_1.lst b/tools/nnapi_quickcheck/tests/fully_connected_quan_1.lst
deleted file mode 100644
index 22acb9f7f..000000000
--- a/tools/nnapi_quickcheck/tests/fully_connected_quan_1.lst
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_H, 1)
diff --git a/tools/nnapi_quickcheck/tests/gather_1.cpp b/tools/nnapi_quickcheck/tests/gather_1.cpp
deleted file mode 100644
index 4ab164ea1..000000000
--- a/tools/nnapi_quickcheck/tests/gather_1.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_gather_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "gather_1.lst"
-#undef INT_VALUE
-
-  const int32_t INPUT_DATA = INPUT_DATA_Value();
-  const int32_t INDEX_DATA = INDEX_DATA_Value();
-
-  const int32_t OUTPUT_DATA = INDEX_DATA;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(INPUT_DATA);
-  PRINT_VALUE(INDEX_DATA);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OUTPUT_DATA);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure INPUT_DATA
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {INPUT_DATA} /* dims */, quantization);
-
-    // Configure INDEX_DATA
-    interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "index" /* name */,
-                                        {INDEX_DATA} /* dims */, quantization);
-
-    // Configure OUTPUT_VALUES
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_data" /* name */,
-                                        {OUTPUT_DATA} /* dims */, quantization);
-
-    auto *param = reinterpret_cast<TfLiteGatherParams *>(malloc(sizeof(TfLiteGatherParams)));
-
-    param->axis = 0;
-
-    // Add GATHER Node
-    // Run GATHER and store its result into Tensor #2
-    //  - Read input data and index_data from Tensor #0 and #1, respectively
-    interp.AddNodeWithParameters({0, 1}, {2}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_GATHER, 1));
-
-    // Set Tensor #0 and #1 as Input, and Tensor #2 as Output
-    interp.SetInputs({0, 1});
-    interp.SetOutputs({2});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/gather_1.lst b/tools/nnapi_quickcheck/tests/gather_1.lst
deleted file mode 100644
index 923a05677..000000000
--- a/tools/nnapi_quickcheck/tests/gather_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(INPUT_DATA, 8192)
-INT_VALUE(INDEX_DATA, 300)
diff --git a/tools/nnapi_quickcheck/tests/gather_2.cpp b/tools/nnapi_quickcheck/tests/gather_2.cpp
deleted file mode 100644
index ac9ec8b3b..000000000
--- a/tools/nnapi_quickcheck/tests/gather_2.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_gather_2, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "gather_2.lst"
-#undef INT_VALUE
-
-  const int32_t INPUT_DATA_H = INPUT_DATA_H_Value();
-  const int32_t INPUT_DATA_W = INPUT_DATA_W_Value();
-  const int32_t INDEX_DATA = INDEX_DATA_Value();
-
-  const int32_t OUTPUT_DATA_H = INPUT_DATA_H;
-  const int32_t OUTPUT_DATA_W = INDEX_DATA;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(INPUT_DATA_H);
-  PRINT_VALUE(INPUT_DATA_W);
-  PRINT_VALUE(INDEX_DATA);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OUTPUT_DATA_H);
-  PRINT_VALUE(OUTPUT_DATA_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure INPUT_DATA
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {INPUT_DATA_H, INPUT_DATA_W} /* dims */, quantization);
-
-    // Configure INDEX_DATA
-    interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "index" /* name */,
-                                        {INDEX_DATA} /* dims */, quantization);
-
-    // Configure OUTPUT_VALUES
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_data" /* name */,
-                                        {OUTPUT_DATA_H, OUTPUT_DATA_W} /* dims */, quantization);
-
-    auto *param = reinterpret_cast<TfLiteGatherParams *>(malloc(sizeof(TfLiteGatherParams)));
-
-    param->axis = 0;
-
-    // Add GATHER Node
-    // Run GATHER and store its result into Tensor #2
-    //  - Read input data and index_data from Tensor #0 and #1, respectively
-    interp.AddNodeWithParameters({0, 1}, {2}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_GATHER, 1));
-
-    // Set Tensor #0 and #1 as Input, and Tensor #2 as Output
-    interp.SetInputs({0, 1});
-    interp.SetOutputs({2});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/gather_2.lst b/tools/nnapi_quickcheck/tests/gather_2.lst
deleted file mode 100644
index 5bf6bd33a..000000000
--- a/tools/nnapi_quickcheck/tests/gather_2.lst
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(INPUT_DATA_H, 128192)
-INT_VALUE(INPUT_DATA_W, 4)
-INT_VALUE(INDEX_DATA, 300)
diff --git a/tools/nnapi_quickcheck/tests/logistic_quan_1.cpp b/tools/nnapi_quickcheck/tests/logistic_quan_1.cpp
deleted file mode 100644
index 0b0a69029..000000000
--- a/tools/nnapi_quickcheck/tests/logistic_quan_1.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_logistic_quan_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "logistic_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    TfLiteQuantizationParams in_quantization;
-    in_quantization.scale = 0.5f;
-    in_quantization.zero_point = 0;
-
-    TfLiteQuantizationParams out_quantization;
-    out_quantization.scale = 1.f / 256;
-    out_quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, out_quantization);
-
-    // Configure input
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, in_quantization);
-
-    // Add Logistic Node
-    // Run Logistic and store the result into Tensor #0
-    //  - Read input from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_LOGISTIC, 1));
-
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/logistic_quan_1.lst b/tools/nnapi_quickcheck/tests/logistic_quan_1.lst
deleted file mode 100644
index 9b3d8ebcf..000000000
--- a/tools/nnapi_quickcheck/tests/logistic_quan_1.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 1)
-INT_VALUE(IFM_H, 2)
-INT_VALUE(IFM_W, 2)
diff --git a/tools/nnapi_quickcheck/tests/max_pool_1.cpp b/tools/nnapi_quickcheck/tests/max_pool_1.cpp
deleted file mode 100644
index 62f985d65..000000000
--- a/tools/nnapi_quickcheck/tests/max_pool_1.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_max_pool_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "max_pool_1.lst"
-#undef INT_VALUE
-
-  const TfLitePadding PADDING_TYPE = static_cast<TfLitePadding>(PADDING_TYPE_Value());
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = KER_W_Value();
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = OFM_H_Value();
-  const int32_t OFM_W = OFM_W_Value();
-
-  assert((OFM_H >= (IFM_H - KER_H)));
-  assert((OFM_W >= (IFM_W - KER_W)));
-  assert((kTfLitePaddingSame == PADDING_TYPE) || (kTfLitePaddingValid == PADDING_TYPE));
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(PADDING_TYPE);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add Max Pooling Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLitePoolParams>();
-
-    param->padding = PADDING_TYPE;
-    param->stride_width = 1;
-    param->stride_height = 1;
-    param->filter_width = KER_W;
-    param->filter_height = KER_H;
-    param->activation = kTfLiteActNone;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_MAX_POOL_2D, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/max_pool_1.lst b/tools/nnapi_quickcheck/tests/max_pool_1.lst
deleted file mode 100644
index 4b5c1304e..000000000
--- a/tools/nnapi_quickcheck/tests/max_pool_1.lst
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_N, 1)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(OFM_H, 1)
-INT_VALUE(OFM_W, 1)
-
-// Default is kTfLitePaddingValid (= 2)
-INT_VALUE(PADDING_TYPE, 2)
diff --git a/tools/nnapi_quickcheck/tests/max_pool_quan_1.cpp b/tools/nnapi_quickcheck/tests/max_pool_quan_1.cpp
deleted file mode 100644
index 2c05a7d22..000000000
--- a/tools/nnapi_quickcheck/tests/max_pool_quan_1.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_max_pool_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "max_pool_quan_1.lst"
-#undef INT_VALUE
-
-  const TfLitePadding PADDING_TYPE = static_cast<TfLitePadding>(PADDING_TYPE_Value());
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t KER_H = KER_H_Value();
-  const int32_t KER_W = KER_W_Value();
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = OFM_H_Value();
-  const int32_t OFM_W = OFM_W_Value();
-
-  assert((OFM_H >= (IFM_H - KER_H)));
-  assert((OFM_W >= (IFM_W - KER_W)));
-  assert((kTfLitePaddingSame == PADDING_TYPE) || (kTfLitePaddingValid == PADDING_TYPE));
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(PADDING_TYPE);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(KER_H);
-  PRINT_VALUE(KER_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-    quantization.scale = 1.0f;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add Max Pooling Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLitePoolParams>();
-
-    param->padding = PADDING_TYPE;
-    param->stride_width = 1;
-    param->stride_height = 1;
-    param->filter_width = KER_W;
-    param->filter_height = KER_H;
-    param->activation = kTfLiteActNone;
-
-    // Run Convolution and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_MAX_POOL_2D, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/max_pool_quan_1.lst b/tools/nnapi_quickcheck/tests/max_pool_quan_1.lst
deleted file mode 100644
index 4b5c1304e..000000000
--- a/tools/nnapi_quickcheck/tests/max_pool_quan_1.lst
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_N, 1)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(OFM_H, 1)
-INT_VALUE(OFM_W, 1)
-
-// Default is kTfLitePaddingValid (= 2)
-INT_VALUE(PADDING_TYPE, 2)
diff --git a/tools/nnapi_quickcheck/tests/mul_1.cpp b/tools/nnapi_quickcheck/tests/mul_1.cpp
deleted file mode 100644
index 57ab71350..000000000
--- a/tools/nnapi_quickcheck/tests/mul_1.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_mul_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "mul_1.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_1D = LEFT_1D_Value();
-  const int32_t LEFT_2D = LEFT_2D_Value();
-  const int32_t LEFT_3D = LEFT_3D_Value();
-
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_1D = LEFT_1D_Value();
-  const int32_t OFM_2D = LEFT_2D_Value();
-  const int32_t OFM_3D = LEFT_3D_Value();
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_1D);
-  PRINT_VALUE(LEFT_2D);
-  PRINT_VALUE(LEFT_3D);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_1D);
-  PRINT_VALUE(OFM_2D);
-  PRINT_VALUE(OFM_3D);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_1D, OFM_2D, OFM_3D} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_1D, LEFT_2D, LEFT_3D} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_W} /* dims */, quantization);
-
-    // Add MUL Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run MUL and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-  param.tensor_logging = 1;
-  param.log_path = "report/tensor_mul_1.log";
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/mul_1.lst b/tools/nnapi_quickcheck/tests/mul_1.lst
deleted file mode 100644
index 1d42159de..000000000
--- a/tools/nnapi_quickcheck/tests/mul_1.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-// (3, 1, 4)
-INT_VALUE(LEFT_1D, 3)
-INT_VALUE(LEFT_2D, 1)
-INT_VALUE(LEFT_3D, 4)
-
-INT_VALUE(RIGHT_W, 4)
diff --git a/tools/nnapi_quickcheck/tests/mul_2.cpp b/tools/nnapi_quickcheck/tests/mul_2.cpp
deleted file mode 100644
index a692616e0..000000000
--- a/tools/nnapi_quickcheck/tests/mul_2.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_mul_2, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "mul_2.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_D1 = LEFT_D1_Value();
-  const int32_t LEFT_D2 = LEFT_D2_Value();
-  const int32_t LEFT_D3 = LEFT_D3_Value();
-
-  const int32_t RIGHT_D1 = RIGHT_D1_Value();
-
-  const int32_t OFM_D1 = LEFT_D1;
-  const int32_t OFM_D2 = LEFT_D2;
-  const int32_t OFM_D3 = LEFT_D3;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_D1);
-  PRINT_VALUE(LEFT_D2);
-  PRINT_VALUE(LEFT_D3);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_D1);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_D1);
-  PRINT_VALUE(OFM_D2);
-  PRINT_VALUE(OFM_D3);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-
-    quantization.scale = 1;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_D1, OFM_D2, OFM_D3} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_D1, LEFT_D2, LEFT_D3} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_D1} /* dims */, quantization);
-
-    // Add Convolution Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Add and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Left from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/mul_2.lst b/tools/nnapi_quickcheck/tests/mul_2.lst
deleted file mode 100644
index da53e7eee..000000000
--- a/tools/nnapi_quickcheck/tests/mul_2.lst
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_D1, 5)
-INT_VALUE(LEFT_D2, 3)
-INT_VALUE(LEFT_D3, 12)
-
-INT_VALUE(RIGHT_D1, 12)
diff --git a/tools/nnapi_quickcheck/tests/mul_quan_1.cpp b/tools/nnapi_quickcheck/tests/mul_quan_1.cpp
deleted file mode 100644
index 5f0061e8d..000000000
--- a/tools/nnapi_quickcheck/tests/mul_quan_1.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_mul_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "mul_1.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_1D = LEFT_1D_Value();
-  const int32_t LEFT_2D = LEFT_2D_Value();
-  const int32_t LEFT_3D = LEFT_3D_Value();
-
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_1D = LEFT_1D_Value();
-  const int32_t OFM_2D = LEFT_2D_Value();
-  const int32_t OFM_3D = LEFT_3D_Value();
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_1D);
-  PRINT_VALUE(LEFT_2D);
-  PRINT_VALUE(LEFT_3D);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_1D);
-  PRINT_VALUE(OFM_2D);
-  PRINT_VALUE(OFM_3D);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    float max_scale =
-        std::numeric_limits<uint8_t>::max(); // * input1_scale(1.0f) * input2_scale(1.0f)
-    quantization.scale = max_scale;
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {OFM_1D, OFM_2D, OFM_3D} /* dims */, quantization);
-
-    // Configure input(s)
-    quantization.scale = 1.0f;
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "left" /* name */,
-                                        {LEFT_1D, LEFT_2D, LEFT_3D} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteUInt8 /* type */, "right" /* name */,
-                                        {RIGHT_W} /* dims */, quantization);
-
-    // Add MUL Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run MUL and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/mul_quan_1.lst b/tools/nnapi_quickcheck/tests/mul_quan_1.lst
deleted file mode 100644
index d850f375a..000000000
--- a/tools/nnapi_quickcheck/tests/mul_quan_1.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-// (300, 1, 4)
-INT_VALUE(LEFT_1D, 300)
-INT_VALUE(LEFT_2D, 1)
-INT_VALUE(LEFT_3D, 4)
-
-INT_VALUE(RIGHT_W, 4)
diff --git a/tools/nnapi_quickcheck/tests/relu1_1.cpp b/tools/nnapi_quickcheck/tests/relu1_1.cpp
deleted file mode 100644
index 25e71dc55..000000000
--- a/tools/nnapi_quickcheck/tests/relu1_1.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-int main(int argc, char **argv)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu1_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_H, IFM_W} /* dims */, quantization);
-
-    // Add ReLU Node
-    // Run ReLU and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RELU_N1_TO_1, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  return RandomTestRunner{SEED, param}.run(builder);
-}
diff --git a/tools/nnapi_quickcheck/tests/relu1_1.lst b/tools/nnapi_quickcheck/tests/relu1_1.lst
deleted file mode 100644
index 4f61845a7..000000000
--- a/tools/nnapi_quickcheck/tests/relu1_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/relu6_1.cpp b/tools/nnapi_quickcheck/tests/relu6_1.cpp
deleted file mode 100644
index 43e8383f3..000000000
--- a/tools/nnapi_quickcheck/tests/relu6_1.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_relu6_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu6_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_H, IFM_W} /* dims */, quantization);
-
-    // Add ReLU Node
-    // Run ReLU and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RELU6, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/relu6_1.lst b/tools/nnapi_quickcheck/tests/relu6_1.lst
deleted file mode 100644
index 4f61845a7..000000000
--- a/tools/nnapi_quickcheck/tests/relu6_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/relu6_quan_1.cpp b/tools/nnapi_quickcheck/tests/relu6_quan_1.cpp
deleted file mode 100644
index 8356442ce..000000000
--- a/tools/nnapi_quickcheck/tests/relu6_quan_1.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-int main(int argc, char **argv)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu6_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-    quantization.scale = 1.0f;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {IFM_H, IFM_W} /* dims */, quantization);
-
-    // Add ReLU Node
-    // Run ReLU and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RELU6, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  return RandomTestRunner{SEED, param}.run(builder);
-}
diff --git a/tools/nnapi_quickcheck/tests/relu6_quan_1.lst b/tools/nnapi_quickcheck/tests/relu6_quan_1.lst
deleted file mode 100644
index 4f61845a7..000000000
--- a/tools/nnapi_quickcheck/tests/relu6_quan_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/relu_1.cpp b/tools/nnapi_quickcheck/tests/relu_1.cpp
deleted file mode 100644
index decd0ddfb..000000000
--- a/tools/nnapi_quickcheck/tests/relu_1.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_relu_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_H, IFM_W} /* dims */, quantization);
-
-    // Add ReLU Node
-    // Run ReLU and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/relu_1.lst b/tools/nnapi_quickcheck/tests/relu_1.lst
deleted file mode 100644
index 4f61845a7..000000000
--- a/tools/nnapi_quickcheck/tests/relu_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/relu_2.cpp b/tools/nnapi_quickcheck/tests/relu_2.cpp
deleted file mode 100644
index ccb9f06c4..000000000
--- a/tools/nnapi_quickcheck/tests/relu_2.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_relu_2, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu_2.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add ReLU Node
-    // Run ReLU and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/relu_2.lst b/tools/nnapi_quickcheck/tests/relu_2.lst
deleted file mode 100644
index 343bff819..000000000
--- a/tools/nnapi_quickcheck/tests/relu_2.lst
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
-INT_VALUE(IFM_C, 3)
diff --git a/tools/nnapi_quickcheck/tests/relu_3.cpp b/tools/nnapi_quickcheck/tests/relu_3.cpp
deleted file mode 100644
index 59a856041..000000000
--- a/tools/nnapi_quickcheck/tests/relu_3.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_relu_3, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu_3.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add ReLU Node
-    // Run ReLU and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/relu_3.lst b/tools/nnapi_quickcheck/tests/relu_3.lst
deleted file mode 100644
index a3a405c10..000000000
--- a/tools/nnapi_quickcheck/tests/relu_3.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_N, 1)
diff --git a/tools/nnapi_quickcheck/tests/relu_quan_1.cpp b/tools/nnapi_quickcheck/tests/relu_quan_1.cpp
deleted file mode 100644
index 303080ef5..000000000
--- a/tools/nnapi_quickcheck/tests/relu_quan_1.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-int main(int argc, char **argv)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-    quantization.scale = 1.0f;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {IFM_H, IFM_W} /* dims */, quantization);
-
-    // Add ReLU Node
-    // Run ReLU and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  return RandomTestRunner{SEED, param}.run(builder);
-}
diff --git a/tools/nnapi_quickcheck/tests/relu_quan_1.lst b/tools/nnapi_quickcheck/tests/relu_quan_1.lst
deleted file mode 100644
index 4f61845a7..000000000
--- a/tools/nnapi_quickcheck/tests/relu_quan_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/reshape_1.cpp b/tools/nnapi_quickcheck/tests/reshape_1.cpp
deleted file mode 100644
index 54cfce2f7..000000000
--- a/tools/nnapi_quickcheck/tests/reshape_1.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_reshape_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "max_pool_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OUT_L = IFM_C * IFM_H * IFM_W;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OUT_L);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t dims[2] = {1, OUT_L};
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and
-    // TENSOR_QUANT8_ASYMM are passed on to the runtime.
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, OUT_L} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Configure Shape
-    interp.SetTensorParametersReadOnly(2, kTfLiteInt32 /* type */, "shape" /* name */,
-                                       {2} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(dims), 2 * sizeof(int32_t));
-
-    // Add Reshape Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteReshapeParams>();
-
-    param->num_dimensions = 2;
-    param->shape[0] = 1;
-    param->shape[1] = OUT_L;
-
-    // Run Reshapeand store its result into Tensor #0
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RESHAPE, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/reshape_1.lst b/tools/nnapi_quickcheck/tests/reshape_1.lst
deleted file mode 100644
index fcaaff016..000000000
--- a/tools/nnapi_quickcheck/tests/reshape_1.lst
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 4)
-INT_VALUE(IFM_W, 8)
diff --git a/tools/nnapi_quickcheck/tests/reshape_quan_1.cpp b/tools/nnapi_quickcheck/tests/reshape_quan_1.cpp
deleted file mode 100644
index 8eb0bf387..000000000
--- a/tools/nnapi_quickcheck/tests/reshape_quan_1.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_reshape_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "reshape_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OUT_L = IFM_C * IFM_H * IFM_W;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OUT_L);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t dims[2] = {1, OUT_L};
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and
-    // TENSOR_QUANT8_ASYMM are passed on to the runtime.
-    TfLiteQuantizationParams quantization;
-    quantization.scale = 1.0f;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {1 /*N*/, OUT_L} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Configure Shape
-    interp.SetTensorParametersReadOnly(2, kTfLiteInt32 /* type */, "shape" /* name */,
-                                       {2} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(dims), 2 * sizeof(int32_t));
-
-    // Add Reshape Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteReshapeParams>();
-
-    param->num_dimensions = 2;
-    param->shape[0] = 1;
-    param->shape[1] = OUT_L;
-
-    // Run Reshapeand store its result into Tensor #0
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RESHAPE, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/reshape_quan_1.lst b/tools/nnapi_quickcheck/tests/reshape_quan_1.lst
deleted file mode 100644
index fcaaff016..000000000
--- a/tools/nnapi_quickcheck/tests/reshape_quan_1.lst
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 4)
-INT_VALUE(IFM_W, 8)
diff --git a/tools/nnapi_quickcheck/tests/resize_bilinear_1.cpp b/tools/nnapi_quickcheck/tests/resize_bilinear_1.cpp
deleted file mode 100644
index 5b2d7b634..000000000
--- a/tools/nnapi_quickcheck/tests/resize_bilinear_1.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_resize_bilinear_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "resize_bilinear_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = OFM_H_Value();
-  const int32_t OFM_W = OFM_W_Value();
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  int32_t size_data[2] = {OFM_H, OFM_W};
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and
-    // TENSOR_QUANT8_ASYMM are passed on to the runtime.
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure OFM
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure IFM
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Configure Size
-    interp.SetTensorParametersReadOnly(
-        2, kTfLiteInt32 /* type */, "size" /* name */, {2} /* dims */, quantization,
-        reinterpret_cast<const char *>(size_data), 2 * sizeof(int32_t));
-
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteResizeBilinearParams>();
-
-    // NOTE What is this?
-    param->align_corners = false;
-
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_RESIZE_BILINEAR, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/resize_bilinear_1.lst b/tools/nnapi_quickcheck/tests/resize_bilinear_1.lst
deleted file mode 100644
index cc3dbd5cc..000000000
--- a/tools/nnapi_quickcheck/tests/resize_bilinear_1.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(OFM_H, 30)
-INT_VALUE(OFM_W, 40)
diff --git a/tools/nnapi_quickcheck/tests/softmax_1.cpp b/tools/nnapi_quickcheck/tests/softmax_1.cpp
deleted file mode 100644
index 71424755a..000000000
--- a/tools/nnapi_quickcheck/tests/softmax_1.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_softmax_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "softmax_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = 1;
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W};
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1, IFM_H * IFM_W} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1, IFM_H * IFM_W} /* batch_size, input_size */,
-                                        quantization);
-
-    // Add Softmax Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteSoftmaxParams>();
-
-    param->beta = 1.0;
-
-    // Run Softmax and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/softmax_1.lst b/tools/nnapi_quickcheck/tests/softmax_1.lst
deleted file mode 100644
index 1ef9da075..000000000
--- a/tools/nnapi_quickcheck/tests/softmax_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 2)
-INT_VALUE(IFM_W, 2)
diff --git a/tools/nnapi_quickcheck/tests/softmax_2.cpp b/tools/nnapi_quickcheck/tests/softmax_2.cpp
deleted file mode 100644
index df1ff2731..000000000
--- a/tools/nnapi_quickcheck/tests/softmax_2.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_softmax_2, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define FLOAT_VALUE(NAME, VALUE) FloatVar NAME##_Value(#NAME, VALUE);
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "softmax_2.lst"
-#undef INT_VALUE
-#undef FLOAT_VALUE
-
-  const int32_t IFM_C = 1;
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-  const float BETA = BETA_Value();
-
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_VALUE(BETA);
-  PRINT_NEWLINE();
-
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W};
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {1, IFM_H * IFM_W} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {1, IFM_H * IFM_W} /* batch_size, input_size */,
-                                        quantization);
-
-    // Add Softmax Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteSoftmaxParams>();
-
-    param->beta = BETA;
-
-    // Run Softmax and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/softmax_2.lst b/tools/nnapi_quickcheck/tests/softmax_2.lst
deleted file mode 100644
index 1c381bf49..000000000
--- a/tools/nnapi_quickcheck/tests/softmax_2.lst
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-#ifndef FLOAT_VALUE
-#error "FLOAT_VALUE should be defined"
-#endif // FLOAT_VALUE
-
-INT_VALUE(IFM_H, 2)
-INT_VALUE(IFM_W, 2)
-FLOAT_VALUE(BETA, 0.1)
diff --git a/tools/nnapi_quickcheck/tests/softmax_quan_1.cpp b/tools/nnapi_quickcheck/tests/softmax_quan_1.cpp
deleted file mode 100644
index 5d38f7727..000000000
--- a/tools/nnapi_quickcheck/tests/softmax_quan_1.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_softmax_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "softmax_quan_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_C = 1;
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W};
-
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization;
-    quantization.scale = 1.0f / 256;
-    quantization.zero_point = 0;
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure Output Tensor
-    interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
-                                        {1, IFM_H * IFM_W} /* dims */, quantization);
-
-    // Configure Input Tensor
-    interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
-                                        {1, IFM_H * IFM_W} /* batch_size, input_size */,
-                                        quantization);
-
-    // Add Softmax Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteSoftmaxParams>();
-
-    param->beta = 1.0;
-
-    // Run Softmax and store its result into Tensor #0
-    //  - Read IFM from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/softmax_quan_1.lst b/tools/nnapi_quickcheck/tests/softmax_quan_1.lst
deleted file mode 100644
index 1ef9da075..000000000
--- a/tools/nnapi_quickcheck/tests/softmax_quan_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 2)
-INT_VALUE(IFM_W, 2)
diff --git a/tools/nnapi_quickcheck/tests/split_1.cpp b/tools/nnapi_quickcheck/tests/split_1.cpp
deleted file mode 100644
index 95a7aa842..000000000
--- a/tools/nnapi_quickcheck/tests/split_1.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_split_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "split_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-  const int32_t NUM_SPLIT = NUM_SPLIT_Value();
-  const int32_t AXIS = AXIS_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_VALUE(NUM_SPLIT);
-  PRINT_VALUE(AXIS);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-  const int32_t axis[1] = {AXIS};
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(NUM_SPLIT + 2);
-
-    // Configure Input Tensor(s)
-    interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */,
-                                       {1} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(axis), 1 * sizeof(int32_t));
-
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Configure Output Tensor
-    std::vector<int> ofm_indexes;
-
-    for (uint32_t n = 0; n < NUM_SPLIT; ++n)
-    {
-      const auto ofm_index = 2 + n;
-
-      interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */,
-                                          {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-      ofm_indexes.emplace_back(ofm_index);
-    }
-
-    auto *param = reinterpret_cast<TfLiteSplitParams *>(malloc(sizeof(TfLiteSplitParams)));
-
-    param->num_splits = NUM_SPLIT;
-
-    // Add SPLIT Node
-    // Run SPLIT and store its result into Tensor #0
-    //  - Read axis and IFM from Tensor #0 and #1, respectively
-    interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs(ofm_indexes);
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/split_1.lst b/tools/nnapi_quickcheck/tests/split_1.lst
deleted file mode 100644
index 823bf24fa..000000000
--- a/tools/nnapi_quickcheck/tests/split_1.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 1)
-INT_VALUE(IFM_H, 5)
-INT_VALUE(IFM_W, 30)
-INT_VALUE(NUM_SPLIT, 5)
-INT_VALUE(AXIS, 1)
diff --git a/tools/nnapi_quickcheck/tests/split_2.cpp b/tools/nnapi_quickcheck/tests/split_2.cpp
deleted file mode 100644
index eb06ea0f2..000000000
--- a/tools/nnapi_quickcheck/tests/split_2.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_split_2, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "split_2.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-  const int32_t NUM_SPLIT = NUM_SPLIT_Value();
-  const int32_t AXIS = AXIS_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_VALUE(NUM_SPLIT);
-  PRINT_VALUE(AXIS);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-  const int32_t axis[1] = {AXIS};
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(NUM_SPLIT + 2);
-
-    // Configure Input Tensor(s)
-    interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */,
-                                       {1} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(axis), 1 * sizeof(int32_t));
-
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Configure Output Tensor
-    std::vector<int> ofm_indexes;
-
-    for (uint32_t n = 0; n < NUM_SPLIT; ++n)
-    {
-      const auto ofm_index = 2 + n;
-
-      interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */,
-                                          {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-      ofm_indexes.emplace_back(ofm_index);
-    }
-
-    auto *param = reinterpret_cast<TfLiteSplitParams *>(malloc(sizeof(TfLiteSplitParams)));
-
-    param->num_splits = NUM_SPLIT;
-
-    // Add SPLIT Node
-    // Run SPLIT and store its result into Tensor #0
-    //  - Read axis and IFM from Tensor #0 and #1, respectively
-    interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs(ofm_indexes);
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/split_2.lst b/tools/nnapi_quickcheck/tests/split_2.lst
deleted file mode 100644
index ebfbab2d5..000000000
--- a/tools/nnapi_quickcheck/tests/split_2.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 1)
-INT_VALUE(IFM_H, 5)
-INT_VALUE(IFM_W, 30)
-INT_VALUE(NUM_SPLIT, 3)
-INT_VALUE(AXIS, 2)
diff --git a/tools/nnapi_quickcheck/tests/split_3.cpp b/tools/nnapi_quickcheck/tests/split_3.cpp
deleted file mode 100644
index e3beb5bd2..000000000
--- a/tools/nnapi_quickcheck/tests/split_3.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_split_3, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "split_3.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-  const int32_t NUM_SPLIT = NUM_SPLIT_Value();
-  const int32_t AXIS = AXIS_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_VALUE(NUM_SPLIT);
-  PRINT_VALUE(AXIS);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-  const int32_t axis[1] = {AXIS};
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(NUM_SPLIT + 2);
-
-    // Configure Input Tensor(s)
-    interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */,
-                                       {1} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(axis), 1 * sizeof(int32_t));
-
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_H, IFM_W} /* dims */, quantization);
-
-    // Configure Output Tensor
-    std::vector<int> ofm_indexes;
-
-    for (uint32_t n = 0; n < NUM_SPLIT; ++n)
-    {
-      const auto ofm_index = 2 + n;
-
-      interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */,
-                                          {OFM_H, OFM_W} /* dims */, quantization);
-
-      ofm_indexes.emplace_back(ofm_index);
-    }
-
-    auto *param = reinterpret_cast<TfLiteSplitParams *>(malloc(sizeof(TfLiteSplitParams)));
-
-    param->num_splits = NUM_SPLIT;
-
-    // Add SPLIT Node
-    // Run SPLIT and store its result into Tensor #0
-    //  - Read axis and IFM from Tensor #0 and #1, respectively
-    interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs(ofm_indexes);
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/split_3.lst b/tools/nnapi_quickcheck/tests/split_3.lst
deleted file mode 100644
index 300bb02b7..000000000
--- a/tools/nnapi_quickcheck/tests/split_3.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 5)
-INT_VALUE(IFM_W, 30)
-INT_VALUE(NUM_SPLIT, 3)
-INT_VALUE(AXIS, 1)
diff --git a/tools/nnapi_quickcheck/tests/split_4.cpp b/tools/nnapi_quickcheck/tests/split_4.cpp
deleted file mode 100644
index e098973d2..000000000
--- a/tools/nnapi_quickcheck/tests/split_4.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_split_4, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "split_4.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-  const int32_t NUM_SPLIT = NUM_SPLIT_Value();
-  const int32_t AXIS = AXIS_Value();
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_VALUE(NUM_SPLIT);
-  PRINT_VALUE(AXIS);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-  const int32_t axis[1] = {AXIS};
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(NUM_SPLIT + 2);
-
-    // Configure Input Tensor(s)
-    interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */,
-                                       {1} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(axis), 1 * sizeof(int32_t));
-
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_H, IFM_W} /* dims */, quantization);
-
-    // Configure Output Tensor
-    std::vector<int> ofm_indexes;
-
-    for (uint32_t n = 0; n < NUM_SPLIT; ++n)
-    {
-      const auto ofm_index = 2 + n;
-
-      interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */,
-                                          {OFM_H, OFM_W} /* dims */, quantization);
-
-      ofm_indexes.emplace_back(ofm_index);
-    }
-
-    auto *param = reinterpret_cast<TfLiteSplitParams *>(malloc(sizeof(TfLiteSplitParams)));
-
-    param->num_splits = NUM_SPLIT;
-
-    // Add SPLIT Node
-    // Run SPLIT and store its result into Tensor #0
-    //  - Read axis and IFM from Tensor #0 and #1, respectively
-    interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1));
-
-    // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0
-    interp.SetInputs({1});
-    interp.SetOutputs(ofm_indexes);
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/split_4.lst b/tools/nnapi_quickcheck/tests/split_4.lst
deleted file mode 100644
index 5b2882828..000000000
--- a/tools/nnapi_quickcheck/tests/split_4.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 5)
-INT_VALUE(IFM_W, 30)
-INT_VALUE(NUM_SPLIT, 5)
-INT_VALUE(AXIS, 0)
diff --git a/tools/nnapi_quickcheck/tests/sub_1.cpp b/tools/nnapi_quickcheck/tests/sub_1.cpp
deleted file mode 100644
index 8bc4208c5..000000000
--- a/tools/nnapi_quickcheck/tests/sub_1.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_1.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
-                                        quantization);
-
-    // Add Subtraction Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Sub and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/sub_1.lst b/tools/nnapi_quickcheck/tests/sub_1.lst
deleted file mode 100644
index fa17caebb..000000000
--- a/tools/nnapi_quickcheck/tests/sub_1.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/sub_2.cpp b/tools/nnapi_quickcheck/tests/sub_2.cpp
deleted file mode 100644
index 423e105f2..000000000
--- a/tools/nnapi_quickcheck/tests/sub_2.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_2, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_2.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT = RIGHT_Value();
-
-  const int32_t OFM_N = LEFT_N;
-  const int32_t OFM_C = LEFT_C;
-  const int32_t OFM_H = LEFT_H;
-  const int32_t OFM_W = LEFT_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT} /* dims */, quantization);
-
-    // Add Subtraction Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Sub and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/sub_2.lst b/tools/nnapi_quickcheck/tests/sub_2.lst
deleted file mode 100644
index cd36ac199..000000000
--- a/tools/nnapi_quickcheck/tests/sub_2.lst
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT, 1)
diff --git a/tools/nnapi_quickcheck/tests/sub_3.cpp b/tools/nnapi_quickcheck/tests/sub_3.cpp
deleted file mode 100644
index 7bb6ab4c0..000000000
--- a/tools/nnapi_quickcheck/tests/sub_3.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_3, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_3.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT = RIGHT_Value();
-
-  const int32_t OFM_H = LEFT_H;
-  const int32_t OFM_W = LEFT_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_H, LEFT_W} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT, LEFT_W} /* dims */, quantization);
-
-    // Add Subtraction Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Sub and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/sub_3.lst b/tools/nnapi_quickcheck/tests/sub_3.lst
deleted file mode 100644
index c56875048..000000000
--- a/tools/nnapi_quickcheck/tests/sub_3.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT, 1)
diff --git a/tools/nnapi_quickcheck/tests/sub_4.cpp b/tools/nnapi_quickcheck/tests/sub_4.cpp
deleted file mode 100644
index 7fc857746..000000000
--- a/tools/nnapi_quickcheck/tests/sub_4.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_4, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_1.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_C = LEFT_C_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-
-  const int32_t RIGHT_C = RIGHT_C_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_C);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_C);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                        {LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                        {RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, quantization);
-
-    // Add Subtraction Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Sub and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
-    interp.SetInputs({1, 2});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/sub_4.lst b/tools/nnapi_quickcheck/tests/sub_4.lst
deleted file mode 100644
index ce6128f83..000000000
--- a/tools/nnapi_quickcheck/tests/sub_4.lst
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 1)
-INT_VALUE(RIGHT_W, 16)
diff --git a/tools/nnapi_quickcheck/tests/sub_5.cpp b/tools/nnapi_quickcheck/tests/sub_5.cpp
deleted file mode 100644
index 19f95b616..000000000
--- a/tools/nnapi_quickcheck/tests/sub_5.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_5, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_5.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_VALUE(LEFT_C);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-  PRINT_VALUE(OFM_C);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure left data
-  const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
-  const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W;
-  float left_data[left_size] = {
-      0.0f,
-  };
-  float right_data[right_size] = {
-      0.0f,
-  };
-
-  // Fill left data with random data
-  {
-    std::normal_distribution<float> left_dist(-1.0f, +1.0f);
-    float value = 10.0f;
-    for (uint32_t off = 0; off < left_size; ++off)
-    {
-      left_data[off] = value;
-    }
-    value = 1.0f;
-    for (uint32_t off = 0; off < right_size; ++off)
-    {
-      right_data[off] = value++;
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                       {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization,
-                                       reinterpret_cast<const char *>(left_data),
-                                       left_size * sizeof(float));
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                       {RIGHT_W, RIGHT_C} /* dims: test with other shapes */,
-                                       quantization, reinterpret_cast<const char *>(right_data),
-                                       right_size * sizeof(float));
-
-    // Add Subtraction Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Sub and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
-    interp.SetInputs({});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/sub_5.lst b/tools/nnapi_quickcheck/tests/sub_5.lst
deleted file mode 100644
index 0327e6b73..000000000
--- a/tools/nnapi_quickcheck/tests/sub_5.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_H, 2)
-INT_VALUE(LEFT_W, 3)
-INT_VALUE(LEFT_C, 4)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_H, 1)
-INT_VALUE(RIGHT_W, 3)
-INT_VALUE(RIGHT_C, 4)
diff --git a/tools/nnapi_quickcheck/tests/sub_6.cpp b/tools/nnapi_quickcheck/tests/sub_6.cpp
deleted file mode 100644
index 66b167eb3..000000000
--- a/tools/nnapi_quickcheck/tests/sub_6.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_6, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_6.lst"
-#undef INT_VALUE
-
-  const int32_t LEFT_N = LEFT_N_Value();
-  const int32_t LEFT_H = LEFT_H_Value();
-  const int32_t LEFT_W = LEFT_W_Value();
-  const int32_t LEFT_C = LEFT_C_Value();
-
-  const int32_t RIGHT_N = RIGHT_N_Value();
-  const int32_t RIGHT_H = RIGHT_H_Value();
-  const int32_t RIGHT_W = RIGHT_W_Value();
-  const int32_t RIGHT_C = RIGHT_C_Value();
-
-  const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
-  const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
-  const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-  const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(LEFT_N);
-  PRINT_VALUE(LEFT_H);
-  PRINT_VALUE(LEFT_W);
-  PRINT_VALUE(LEFT_C);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(RIGHT_N);
-  PRINT_VALUE(RIGHT_H);
-  PRINT_VALUE(RIGHT_W);
-  PRINT_VALUE(RIGHT_C);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-  PRINT_VALUE(OFM_C);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Configure left data
-  const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
-  const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W;
-  float left_data[left_size] = {
-      0.0f,
-  };
-  float right_data[right_size] = {
-      0.0f,
-  };
-
-  // Fill left data with random data
-  {
-    std::normal_distribution<float> left_dist(-1.0f, +1.0f);
-    float value = 10.0f;
-    for (uint32_t off = 0; off < left_size; ++off)
-    {
-      left_data[off] = value;
-    }
-    value = 1.0f;
-    for (uint32_t off = 0; off < right_size; ++off)
-    {
-      right_data[off] = value++;
-    }
-  }
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(3);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */,
-                                       {LEFT_W, LEFT_C} /* dims: test with other shapes */,
-                                       quantization, reinterpret_cast<const char *>(left_data),
-                                       left_size * sizeof(float));
-
-    // Configure input(s)
-    interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */,
-                                       {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
-                                       quantization, reinterpret_cast<const char *>(right_data),
-                                       right_size * sizeof(float));
-
-    // Add Subtraction Node
-    //
-    // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
-    //      So, param should be allocated with malloc
-    auto param = make_alloc<TfLiteAddParams>();
-
-    param->activation = kTfLiteActNone;
-
-    // Run Sub and store the result into Tensor #0
-    //  - Read Left from Tensor #1
-    //  - Read Right from Tensor #2,
-    interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
-    interp.SetInputs({});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/sub_6.lst b/tools/nnapi_quickcheck/tests/sub_6.lst
deleted file mode 100644
index 52a1f1acc..000000000
--- a/tools/nnapi_quickcheck/tests/sub_6.lst
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_H, 1)
-INT_VALUE(LEFT_W, 3)
-INT_VALUE(LEFT_C, 4)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_H, 2)
-INT_VALUE(RIGHT_W, 3)
-INT_VALUE(RIGHT_C, 4)
diff --git a/tools/nnapi_quickcheck/tests/tanh_1.cpp b/tools/nnapi_quickcheck/tests/tanh_1.cpp
deleted file mode 100644
index 7dd92613c..000000000
--- a/tools/nnapi_quickcheck/tests/tanh_1.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_tanh_1, simple_test)
-{
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "tanh_1.lst"
-#undef INT_VALUE
-
-  const int32_t IFM_N = IFM_N_Value();
-  const int32_t IFM_C = IFM_C_Value();
-  const int32_t IFM_H = IFM_H_Value();
-  const int32_t IFM_W = IFM_W_Value();
-
-  const int32_t OFM_N = IFM_N;
-  const int32_t OFM_C = IFM_C;
-  const int32_t OFM_H = IFM_H;
-  const int32_t OFM_W = IFM_W;
-
-  // Initialize random number generator
-  std::minstd_rand random(SEED);
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(IFM_N);
-  PRINT_VALUE(IFM_C);
-  PRINT_VALUE(IFM_H);
-  PRINT_VALUE(IFM_W);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OFM_N);
-  PRINT_VALUE(OFM_C);
-  PRINT_VALUE(OFM_H);
-  PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(2);
-
-    // Configure output
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
-                                        {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
-    // Configure input
-    interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
-    // Add Tanh Node
-    // Run Tanh and store the result into Tensor #0
-    //  - Read input from Tensor #1
-    interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_TANH, 1));
-
-    interp.SetInputs({1});
-    interp.SetOutputs({0});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/tanh_1.lst b/tools/nnapi_quickcheck/tests/tanh_1.lst
deleted file mode 100644
index a0077cb95..000000000
--- a/tools/nnapi_quickcheck/tests/tanh_1.lst
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
diff --git a/tools/nnapi_quickcheck/tests/topk_v2_1.cpp b/tools/nnapi_quickcheck/tests/topk_v2_1.cpp
deleted file mode 100644
index c47af57cc..000000000
--- a/tools/nnapi_quickcheck/tests/topk_v2_1.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_topk_v2_1, simple_test)
-{
-  // Set random seed
-  int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
-  nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-  // Set random test parameters
-  int verbose = 0;
-  int tolerance = 1;
-
-  nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
-  nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "topk_v2_1.lst"
-#undef INT_VALUE
-
-  const int32_t INPUT_DATA = INPUT_DATA_Value();
-  const int32_t K = K_Value();
-
-  const int32_t OUTPUT_VALUES = K;
-  const int32_t OUTPUT_INDICES = K;
-
-  std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE()     \
-  {                         \
-    std::cout << std::endl; \
-  }
-#define PRINT_VALUE(value)                                       \
-  {                                                              \
-    std::cout << "  " << #value << ": " << (value) << std::endl; \
-  }
-  PRINT_VALUE(SEED);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(INPUT_DATA);
-  PRINT_VALUE(K);
-  PRINT_NEWLINE();
-
-  PRINT_VALUE(OUTPUT_VALUES);
-  PRINT_VALUE(OUTPUT_INDICES);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
-  // Fill the K data
-  int32_t k_data[1] = {K};
-
-  auto setup = [&](Interpreter &interp) {
-    // Comment from 'context.h'
-    //
-    // Parameters for asymmetric quantization. Quantized values can be converted
-    // back to float using:
-    //    real_value = scale * (quantized_value - zero_point);
-    //
-    // Q: Is this necessary?
-    // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and
-    // TENSOR_QUANT8_ASYMM are passed on to the runtime.
-    TfLiteQuantizationParams quantization = make_default_quantization();
-
-    // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
-    interp.AddTensors(4);
-
-    // Configure INPUT_DATA
-    interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */,
-                                        {INPUT_DATA} /* dims */, quantization);
-
-    // Configure K
-    interp.SetTensorParametersReadOnly(1, kTfLiteInt32 /* type */, "k" /* name */, {1} /* dims */,
-                                       quantization, reinterpret_cast<const char *>(k_data),
-                                       sizeof(k_data));
-
-    // Configure OUTPUT_VALUES
-    interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_values" /* name */,
-                                        {OUTPUT_VALUES} /* dims */, quantization);
-
-    // Configure OUTPUT_INDICES
-    interp.SetTensorParametersReadWrite(3, kTfLiteInt32 /* type */, "output_indices" /* name */,
-                                        {OUTPUT_INDICES} /* dims */, quantization);
-
-    // Add TopK_V2 Node
-    // Run TopK_V2 and store its result into Tensor #2 and #3
-    //  - Read input data and K from Tensor #0 and #1, respectively
-    interp.AddNodeWithParameters({0, 1}, {2, 3}, nullptr, 0, nullptr,
-                                 BuiltinOpResolver().FindOp(BuiltinOperator_TOPK_V2, 1));
-
-    // Set Tensor #0 as Input, and Tensor #2 and #3 as Output
-    interp.SetInputs({0});
-    interp.SetOutputs({2, 3});
-  };
-
-  const nnfw::tflite::FunctionBuilder builder(setup);
-
-  RandomTestParam param;
-
-  param.verbose = verbose;
-  param.tolerance = tolerance;
-
-  int res = RandomTestRunner{SEED, param}.run(builder);
-
-  EXPECT_EQ(res, 0);
-}
diff --git a/tools/nnapi_quickcheck/tests/topk_v2_1.lst b/tools/nnapi_quickcheck/tests/topk_v2_1.lst
deleted file mode 100644
index a40ee3c57..000000000
--- a/tools/nnapi_quickcheck/tests/topk_v2_1.lst
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(INPUT_DATA, 8192)
-INT_VALUE(K, 16)
diff --git a/tools/nnpackage_tool/gen_golden/gen_golden.py b/tools/nnpackage_tool/gen_golden/gen_golden.py
index 125a69cac..d555419a6 100755
--- a/tools/nnpackage_tool/gen_golden/gen_golden.py
+++ b/tools/nnpackage_tool/gen_golden/gen_golden.py
@@ -91,9 +91,12 @@ if __name__ == '__main__':
             if this_dtype == tf.uint8:
                 input_values.append(
                     np.random.randint(0, 255, this_shape).astype(np.uint8))
+            if this_dtype == tf.int8:
+                input_values.append(
+                    np.random.randint(-127, 127, this_shape).astype(np.int8))
             elif this_dtype == tf.float32:
                 input_values.append(
-                    np.random.random_sample(this_shape).astype(np.float32))
+                    (10 * np.random.random_sample(this_shape) - 5).astype(np.float32))
             elif this_dtype == tf.bool:
                 # generate random integer from [0, 2)
                 input_values.append(
@@ -134,9 +137,12 @@ if __name__ == '__main__':
             if this_dtype == np.uint8:
                 input_values.append(
                     np.random.randint(0, 255, this_shape).astype(np.uint8))
+            if this_dtype == np.int8:
+                input_values.append(
+                    np.random.randint(-127, 127, this_shape).astype(np.int8))
             elif this_dtype == np.float32:
                 input_values.append(
-                    np.random.random_sample(this_shape).astype(np.float32))
+                    (10 * np.random.random_sample(this_shape) - 5).astype(np.float32))
             elif this_dtype == np.bool_:
                 # generate random integer from [0, 2)
                 input_values.append(
@@ -158,10 +164,11 @@ if __name__ == '__main__':
 
     # dump input and output in h5
     import h5py
-    supported_dtypes = ("float32", "uint8", "bool", "int32", "int64")
+    supported_dtypes = ("float32", "uint8", "int8", "bool", "int32", "int64")
     h5dtypes = {
         "float32": ">f4",
         "uint8": "u1",
+        "int8": "i1",
         "bool": "u1",
         "int32": "int32",
         "int64": "int64"
diff --git a/tools/nnpackage_tool/model2nnpkg/README.md b/tools/nnpackage_tool/model2nnpkg/README.md
index 9d4676e23..4c70547a4 100644
--- a/tools/nnpackage_tool/model2nnpkg/README.md
+++ b/tools/nnpackage_tool/model2nnpkg/README.md
@@ -1,23 +1,59 @@
 # model2nnpkg
 
-`model2nnpkg` is a tool to convert model (either `tflite` or `circle`) to `nnpackage`.
+`model2nnpkg` is a tool to convert model (e.g. `tflite`, `circle` or `tvn`) to `nnpackage`.
 
 It takes `modelfile` as input and generates `nnpackage`.
 
+## prerequisite
+
+Python 3.5 or greater
+
 ## Usage
 
 ```
-Usage: model2nnpkg.sh [options] modelfile
-Convert modelfile (either tflite or circle) to nnpackage.
+usage:  model2nnpkg.py [options]
+  Examples:
+      model2nnpkg.py -m add.tflite                           => create nnpkg "add" in current directory
+      model2nnpkg.py -o out -m add.tflite                    => create nnpkg "add" in out/
+      model2nnpkg.py -o out -p addpkg -m add.tflite          => create nnpkg "addpkg" in out/
+      model2nnpkg.py -c add.cfg -m add.tflite                => create nnpkg "add" with add.cfg
+      model2nnpkg.py -o out -p addpkg -m a1.tflite a2.tflite -i a1.json a2.json
+        => create nnpkg "addpkg" with models a1.tflite and a2.tflite in out/
+
+
+Convert model files (tflite, circle or tvn) to nnpkg.
+
+options:
+  -h, --help            show this help message and exit
+  -o output_directory, --outdir output_directory
+                        set nnpkg output directory
+  -p nnpkg_name, --nnpkg-name nnpkg_name
+                        set nnpkg output name (default=[1st modelfile name])
+  -c conf [conf ...], --config conf [conf ...]
+                        provide configuration files
+  -m model [model ...], --models model [model ...]
+                        provide model files
+  -i io_info [io_info ...], --io-info io_info [io_info ...]
+                        provide io info
+```
+
+## Usage (To be deprecated)
+```
+Usage: model2nnpkg.sh [options]
+Convert modelfile (tflite, circle or tvn) to nnpackage.
 
 Options:
     -h   show this help
     -o   set nnpackage output directory (default=.)
-    -p   set nnpackage output name (default=[modelfile name])
+    -p   set nnpackage output name (default=[1st modelfile name])
+    -c   provide configuration files
+    -m   provide model files
 
 Examples:
-    model2nnpkg.sh add.tflite                  => create nnpackage 'add' in ./
-    model2nnpkg.sh -o out add.tflite           => create nnpackage 'add' in out/
-    model2nnpkg.sh -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/
+    model2nnpkg.sh -m add.tflite                                              => create nnpackage 'add' in ./
+    model2nnpkg.sh -o out -m add.tflite                                       => create nnpackage 'add' in out/
+    model2nnpkg.sh -o out -p addpkg -m add.tflite                             => create nnpackage 'addpkg' in out/
+    model2nnpkg.sh -c add.cfg -m add.tflite                                   => create nnpackage 'add' with add.cfg
+    model2nnpkg.py -o out -p addpkg -m a1.tflite a2.tflite => create nnpackage "addpkg" with models a1.tflite and a2.tflite in out/
 
 ```
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py
new file mode 100755
index 000000000..0f3862091
--- /dev/null
+++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+import shutil
+import sys
+
+
+def _is_json(myjson):
+    try:
+        json.load(myjson)
+    except ValueError as e:
+        return False
+    return True
+
+
+def _verify_args(args):
+    if args.config and len(args.config) != len(args.models):
+        raise Exception(
+            'error: when config file is provided, # of config file should be same with modelfile\n'
+            +
+            "Please provide config file for each model file, or don't provide config file."
+        )
+
+    for i in range(len(args.models)):
+        model_path = args.models[i]
+        if not os.path.isfile(model_path):
+            raise Exception(f'error: {model_path} does not exist.')
+
+        modelfile = os.path.basename(model_path)
+        if len(modelfile.split('.')) == 1:
+            raise Exception(
+                'error: modelfile does not have extension.\n' +
+                "Please provide extension so that $progname can identify what type of model you use."
+            )
+
+        if args.config:
+            config_path = os.path.basename(args.config[i])
+            if not os.path.isfile(config_path):
+                raise Exception(f'error: {config_path} does not exist.')
+
+    # Check each json file
+    for io_info_path in [path for path in (args.io_info or [])]:
+        with open(io_info_path, "r") as io_json:
+            if not _is_json(io_json):
+                raise Exception(
+                    f'error: io info file {io_info_path} is not json file.\n' +
+                    "Please provide json file that so that $progname can identify what inputs/outputs of model you use."
+                )
+
+    # Check size of indices of original model
+    size_inputs = 0
+    size_outputs = 0
+    for model_index, io_info_path in enumerate([path for path in (args.io_info or [])]):
+        with open(io_info_path, "r") as io_json:
+            model_io = json.load(io_json)
+            if model_index == 0:
+                size_inputs = len(model_io["org-model-io"]["inputs"]["new-indices"])
+                size_outputs = len(model_io["org-model-io"]["outputs"]["new-indices"])
+            else:
+                if size_inputs != len(model_io["org-model-io"]["inputs"]["new-indices"]):
+                    raise Exception(
+                        f'error: Invalid size of input indices\n' +
+                        "The size of orginal model's inputs in io info file {io_info_path} is different from the previous files."
+                    )
+                if size_outputs != len(
+                        model_io["org-model-io"]["outputs"]["new-indices"]):
+                    raise Exception(
+                        f'error: Invalid size of output indices.\n' +
+                        "The size of orginal model's outputs in io info file {io_info_path} is different from the previous files."
+                    )
+
+
+def _get_args():
+    parser = argparse.ArgumentParser(
+        description='Convert model files (tflite, circle or tvn) to nnpkg.',
+        usage=''' %(prog)s [options]
+  Examples:
+      %(prog)s -m add.tflite                           => create nnpkg "add" in current directory
+      %(prog)s -o out -m add.tflite                    => create nnpkg "add" in out/
+      %(prog)s -o out -p addpkg -m add.tflite          => create nnpkg "addpkg" in out/
+      %(prog)s -c add.cfg -m add.tflite                => create nnpkg "add" with add.cfg
+      %(prog)s -o out -p addpkg -m a1.tflite a2.tflite -i a1.json a2.json
+        => create nnpkg "addpkg" with models a1.tflite and a2.tflite in out/
+  ''')
+    parser.add_argument(
+        '-o',
+        '--outdir',
+        type=str,
+        default=os.getcwd(),
+        metavar='output_directory',
+        help='set nnpkg output directory')
+    parser.add_argument(
+        '-p',
+        '--nnpkg-name',
+        type=str,
+        metavar='nnpkg_name',
+        help='set nnpkg output name (default=[1st modelfile name])')
+    parser.add_argument(
+        '-c',
+        '--config',
+        type=str,
+        nargs='+',
+        default='',
+        metavar='conf',
+        help='provide configuration files')
+    parser.add_argument(
+        '-m',
+        '--models',
+        type=str,
+        nargs='+',
+        metavar='model',
+        help='provide model files')
+    parser.add_argument(
+        '-i', '--io-info', type=str, nargs='+', metavar='io_info', help='provide io info')
+
+    args = parser.parse_args()
+
+    _verify_args(args)
+
+    if not args.nnpkg_name:
+        first_model_name = os.path.basename(args.models[0]).rsplit('.', 1)[0]
+        args.nnpkg_name = first_model_name
+
+    args.prog = parser.prog
+
+    return args
+
+
+def _get_org_model_input_size(json_path):
+    with open(json_path, "r") as io_json:
+        model_io = json.load(io_json)
+        return len(model_io["org-model-io"]["inputs"]["new-indices"])
+
+
+def _get_org_model_output_size(json_path):
+    with open(json_path, "r") as io_json:
+        model_io = json.load(io_json)
+        return len(model_io["org-model-io"]["outputs"]["new-indices"])
+
+
+def _generate_io_conn_info(io_info_files):
+    ret = {}
+
+    if io_info_files is None:
+        return ret
+
+    pkg_inputs = list(range(_get_org_model_input_size(io_info_files[0])))
+    pkg_outputs = list(range(_get_org_model_output_size(io_info_files[0])))
+
+    org_model_io = []
+    new_model_io = {"inputs": [], "outputs": []}
+    for model_pos, io_info_path in enumerate(io_info_files):
+        with open(io_info_path, "r") as io_json:
+            model_io = json.load(io_json)
+
+            org_model_io.append(model_io["org-model-io"])
+            new_model_io["inputs"].append(model_io["new-model-io"]["inputs"])
+            new_model_io["outputs"].append(model_io["new-model-io"]["outputs"])
+
+    for model_pos in range(len(org_model_io)):
+        # Set pkg-inputs
+        for org_model_input_pos, new_input_index in enumerate(
+                org_model_io[model_pos]["inputs"]["new-indices"]):
+            if new_input_index != -1:
+                for new_model_input_pos, input_index in enumerate(
+                        new_model_io["inputs"][model_pos]["new-indices"]):
+                    if new_input_index == input_index:
+                        pkg_inputs[
+                            org_model_input_pos] = f'{model_pos}:0:{new_model_input_pos}'
+                        break
+
+                if pkg_inputs[org_model_input_pos] == 0:
+                    raise Exception(
+                        f'error: Wrong io information\n' +
+                        "The input index {new_input_index} exists in org-model-io, but not in new-model-io\n"
+                        + "Please check {io_info_files[model_pos]}")
+
+        # Set pkg-outputs
+        for org_model_output_pos, new_output_index in enumerate(
+                org_model_io[model_pos]["outputs"]["new-indices"]):
+            if new_output_index != -1:
+                for new_model_output_pos, output_index in enumerate(
+                        new_model_io["outputs"][model_pos]["new-indices"]):
+                    if new_output_index == output_index:
+                        pkg_outputs[
+                            org_model_output_pos] = f'{model_pos}:0:{new_model_output_pos}'
+                        break
+
+                if pkg_outputs[org_model_output_pos] == 0:
+                    raise Exception(
+                        f'error: Wrong io information\n' +
+                        "The output index {new_output_index} exists in org-model-io, but not in new-model-io\n"
+                        + "Please check {io_info_files[model_pos]}")
+
+    ret["pkg-inputs"] = pkg_inputs
+    ret["pkg-outputs"] = pkg_outputs
+
+    model_connect = {}
+    for input_model_pos, inputs in enumerate(new_model_io["inputs"]):
+        for output_model_pos, outputs in enumerate(new_model_io["outputs"]):
+            if input_model_pos == output_model_pos:
+                continue
+
+            for input_index_pos, org_input_index in enumerate(inputs["org-indices"]):
+                for output_index_pos, org_output_index in enumerate(
+                        outputs["org-indices"]):
+                    if org_input_index == org_output_index:
+                        edge_to = f'{input_model_pos}:0:{input_index_pos}'
+                        edge_from = f'{output_model_pos}:0:{output_index_pos}'
+
+                        if edge_from not in model_connect:
+                            model_connect[edge_from] = [edge_to]
+                        else:
+                            model_connect[edge_from].append(edge_to)
+
+    ret["model-connect"] = [{
+        "from": edge_from,
+        "to": edge_to
+    } for edge_from, edge_to in model_connect.items()]
+
+    return ret
+
+
+def _generate_manifest(args):
+    config_list = [""]
+    if args.config:
+        config_list = [os.path.basename(e) for e in args.config]
+    models_list = [os.path.basename(e) for e in args.models]
+    types_list = [os.path.basename(e).rsplit('.', 1)[1] for e in args.models]
+    io_conn_info = _generate_io_conn_info(args.io_info)
+
+    manifest = {}
+    manifest["major-version"] = "1"
+    manifest["minor-version"] = "2"
+    manifest["patch-version"] = "0"
+    manifest["configs"] = config_list
+    manifest["models"] = models_list
+    manifest["model-types"] = types_list
+    manifest = {**manifest, **io_conn_info}  # Requires python 3.5 or greater
+
+    return manifest
+
+
+def main():
+    try:
+        # parse arguments
+        args = _get_args()
+
+        print(f'{args.prog}: Generating nnpkg {args.nnpkg_name} in {args.outdir}')
+        # mkdir nnpkg directory
+        nnpkg_path = os.path.join(args.outdir, args.nnpkg_name)
+        os.makedirs(os.path.join(nnpkg_path, 'metadata'), exist_ok=True)
+
+        # dump manifest file
+        manifest = _generate_manifest(args)
+        manifest_path = os.path.join(nnpkg_path, 'metadata', 'MANIFEST')
+        with open(manifest_path, "w") as json_file:
+            json_file.write(f'{json.dumps(manifest, indent=2)}\n')
+
+        # copy models and configurations
+        for i in range(len(args.models)):
+            shutil.copy2(args.models[i], nnpkg_path)
+            if args.config:
+                shutil.copy2(args.config[i], os.path.join(nnpkg_path, 'metadata'))
+    except Exception as e:
+        print(e)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
index 26f6c70e8..4625c49b7 100755
--- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
+++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
@@ -5,72 +5,151 @@ set -eu
 progname=$(basename "${BASH_SOURCE[0]}")
 outdir="."
 name=""
+configs_src=()
+models_src=()
+configs_str=""
+models_str=""
+types_str=""
 
 usage() {
-  echo "Usage: $progname [options] modelfile"
-  echo "Convert modelfile (either tflite or circle) to nnpackage."
+  echo "Usage: $progname [options]"
+  echo "Convert modelfile (tflite, circle or tvn) to nnpackage."
   echo ""
   echo "Options:"
   echo "    -h   show this help"
   echo "    -o   set nnpackage output directory (default=$outdir)"
-  echo "    -p   set nnpackage output name (default=[modelfile name])"
+  echo "    -p   set nnpackage output name (default=[1st modelfile name])"
+  echo "    -c   provide configuration files"
+  echo "    -m   provide model files"
+  echo ""
+  echo "         (Will be deprecated: if there is one remain parameter, that is model file)"
   echo ""
   echo "Examples:"
-  echo "    $progname add.tflite                  => create nnpackage 'add' in $outdir/"
-  echo "    $progname -o out add.tflite           => create nnpackage 'add' in out/"
-  echo "    $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/"
+  echo "    $progname -m add.tflite                           => create nnpackage 'add' in $outdir/"
+  echo "    $progname -o out -m add.tflite                    => create nnpackage 'add' in out/"
+  echo "    $progname -o out -p addpkg -m add.tflite          => create nnpackage 'addpkg' in out/"
+  echo "    $progname -c add.cfg -m add.tflite                => create nnpackage 'add' with add.cfg"
+  echo "    $progname -o out -p addpkg -m a1.tflite a2.tflite => create nnpackage 'addpkg' with models a1.tflite and a2.tflite in out/"
+  echo ""
+  echo "(Will be deprecated: if there is one remain parameter, that is model file)"
   exit 1
 }
 
 if [ $# -eq 0 ]; then
-  echo "For help, type $progname -h"
+  >&2 echo "For help, type $progname -h"
   exit 1
 fi
 
-while getopts "ho:p:" OPTION; do
-case "${OPTION}" in
+while getopts "ho:p:c:m:" OPTION; do
+  case "${OPTION}" in
     h) usage;;
     o) outdir=$OPTARG;;
     p) name=$OPTARG;;
+    c)
+      configs_src=($OPTARG)
+      until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z "$(eval "echo \${$OPTIND}")" ]; do
+        if [[ $OPTIND -eq $# ]] && [[ ${#models_src[@]} -eq 0 ]]; then
+          # Backward compatibility (will be deprecated)
+          # The last remain parameter is model if there is no option "-m"
+          models_src=($(eval "echo \${$OPTIND}"))
+        else
+          configs_src+=($(eval "echo \${$OPTIND}"))
+        fi
+        OPTIND=$((OPTIND + 1))
+      done
+      ;;
+    m)
+      models_src=($OPTARG)
+      until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z "$(eval "echo \${$OPTIND}")" ]; do
+        models_src+=($(eval "echo \${$OPTIND}"))
+        OPTIND=$((OPTIND + 1))
+      done
+      ;;
     ?) exit 1;;
-esac
+  esac
 done
 
 shift $((OPTIND-1))
 
-if [ $# -ne 1 ]; then
-  echo "error: wrong argument (no argument or too many arguments)."
-  echo "For help, type $progname -h"
-  exit 1
+# Backward compatibility (will be deprecated)
+# The last remain parameter is model if there is no option "-m"
+if [ $# -eq 1 ] && [ ${#models_src[@]} -eq 0 ]; then
+  models_src=($1)
+  shift 1
 fi
 
-modelfile=$(basename "$1")
-
-if [[ "$modelfile" != *.* ]]; then
-  echo "error: modelfile does not have extension."
-  echo "Please provide extension so that $progname can identify what type of model you use."
+if [ $# -ne 0 ]; then
+  >&2 echo "error: wrong argument (too many arguments)."
+  >&2 echo "For help, type $progname -h"
   exit 1
 fi
 
-if [ ! -e $1 ]; then
-  echo "error: "$1" does not exist."
+if [[ ${#configs_src[@]} -ne 0 ]] && [[ ${#configs_src[@]} -ne ${#models_src[@]} ]]; then
+  >&2 echo "error: when config file is provided, # of config file should be same with modelfile"
+  >&2 echo "Please provide config file for each model file, or don't provide config file."
   exit 1
 fi
 
+delim=""
+for modelpath in "${models_src[@]}"
+do
+  modelfile=$(basename "$modelpath")
+
+  if [[ "$modelfile" != *.* ]]; then
+    >&2 echo "error: modelfile does not have extension."
+    >&2 echo "Please provide extension so that $progname can identify what type of model you use."
+    exit 1
+  fi
+
+  if [ ! -e "$modelpath" ]; then
+    >&2 echo "error: $modelpath does not exist."
+    exit 1
+  fi
+
+  models_str="$models_str$delim\"$modelfile\""
+  types_str="$types_str$delim\"${modelfile##*.}\""
+  delim=", "
+done
+
+delim=""
+for configpath in "${configs_src[@]}"
+do
+  configfile=$(basename "$configpath")
+
+  if [ ! -e "$configpath" ]; then
+    >&2 echo "error: $configpath does not exist."
+    exit 1
+  fi
+
+  configs_str="$configs_str$delim\"$configfile\""
+  delim=", "
+done
+
 if [ -z "$name" ]; then
-  name=${modelfile%.*}
+  first_modelfile=$(basename "${models_src[0]}")
+  name=${first_modelfile%.*}
 fi
-extension=${modelfile##*.}
 
-echo "Generating nnpackage "$name" in "$outdir""
+echo "$progname: Generating nnpackage $name in $outdir"
 mkdir -p "$outdir"/"$name"/metadata
+
 cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
 {
   "major-version" : "1",
-  "minor-version" : "0",
+  "minor-version" : "2",
   "patch-version" : "0",
-  "models"      : [ "$modelfile" ],
-  "model-types" : [ "$extension" ]
+  "configs"     : [ $configs_str ],
+  "models"      : [ $models_str ],
+  "model-types" : [ $types_str ]
 }
 EOF
-cp "$1" "$outdir"/"$name"
+
+for modelpath in "${models_src[@]}"
+do
+  cp "$modelpath" "$outdir"/"$name"
+done
+
+for configpath in "${configs_src[@]}"
+do
+  cp "$configpath" "$outdir/$name/metadata"
+done
diff --git a/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/README.md b/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/README.md
index 9e5ae2938..d35a381d4 100644
--- a/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/README.md
+++ b/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/README.md
@@ -1,8 +1,12 @@
 # nncc-tc-to-nnpkg-tc
 
-`model2nnpkg` is a tool to convert model (either `tflite` or `circle`) to `nnpackage`.
+`nncc-tc-to-nnpkg-tc` is a tool to convert nncc testcase to nnpackage testcase.
 
-It takes `modelfile` as input and generates `nnpackage`.
+It takes `nncc-tc` as input and generates `nnpkg-tc`.
+
+## prerequisite
+
+Python 3.5 or greater (for internally using model2nnpkg)
 
 ## Usage
 
diff --git a/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh b/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh
index bbc5b3e6c..4edb4ceae 100755
--- a/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh
+++ b/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh
@@ -4,7 +4,7 @@ set -eu
 
 progname=$(basename "${BASH_SOURCE[0]}")
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-model2nnpkg=${model2nnpkg:-"$script_dir"/../model2nnpkg/model2nnpkg.sh}
+model2nnpkg=${model2nnpkg:-"$script_dir"/../model2nnpkg/model2nnpkg.py}
 # Need to install nncc package & set path to tf2nnpkg
 tf2nnpkg=$(which tf2nnpkg)
 
@@ -78,7 +78,7 @@ if [[ "$model_type" == "pb" ]]; then
   $tf2nnpkg --info "$indir/$tcname".info --graphdef "$indir/$tcname"."$model_type" \
   "$tf_intf_version" -o "$outdir"
 else
-  $model2nnpkg -o "$outdir" "$indir/$tcname"."$model_type"
+  $model2nnpkg -o "$outdir" -m "$indir/$tcname"."$model_type"
 fi
 
 extensions="
diff --git a/tools/nnpackage_tool/qnf/qnf.md b/tools/nnpackage_tool/qnf/qnf.md
new file mode 100644
index 000000000..ee3a88d5c
--- /dev/null
+++ b/tools/nnpackage_tool/qnf/qnf.md
@@ -0,0 +1,35 @@
+# qnf
+
+`qnf` is a tool to convert between quantized and float.
+
+It gets quantization parameters from input circle file.
+
+## Prerequisite
+
+$ python -r requirements.txt
+
+## Usage
+
+```
+$ ./qnf.py -h
+$ python tools/nnpackage_tool/qnf/qnf.py -h
+usage: qnf.py [-h] [-o OUT_DIR] [-q | -d] h5 circle
+
+positional arguments:
+  h5                    path to h5 file either input or output to model
+  circle                path to quantized circle model
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -o OUT_DIR, --output OUT_DIR
+                        output directory
+  -q, --quantize        quantize f32 to q8u using circle input's qparam
+                        (default: false)
+  -d, --dequantize      dequantize q8u to f32 using circle output's qparam
+                        (default: false)
+
+Examples:
+  qnf.py -q input.h5 0c/0.circle    => generated quantized input as input_.h5
+  qnf.py -d output.h5 0c/0.circle   => generated dequantized output as output_.h5
+  qnf.py -o out/out.h5 -d output.h5 0c/0.circle   => generated dequantized output in out/output.h5
+```
diff --git a/tools/nnpackage_tool/qnf/qnf.py b/tools/nnpackage_tool/qnf/qnf.py
new file mode 100644
index 000000000..0e0277125
--- /dev/null
+++ b/tools/nnpackage_tool/qnf/qnf.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+from circle_schema import circle
+import h5py
+import numpy as np
+import os
+import sys
+
+h5dtypes = {
+    "float32": ">f4",
+    "uint8": "u1",
+    "int8": "i1",
+    "bool": "u1",
+    "int32": "int32",
+    "int64": "int64"
+}
+
+
+def quantize(h5_path, circle_path, h5_out_path):
+    with open(circle_path, 'rb') as f:
+        graph = circle.Model.GetRootAsModel(f.read(), 0).Subgraphs(0)
+    input_tensor = graph.Tensors(graph.Inputs(0))
+    input_names = [input_tensor.Name()]
+
+    with h5py.File(h5_path, 'r') as hf:
+        dset = hf['/value/0']
+        arr = np.array(dset)
+
+    if not np.issubdtype(arr.dtype,
+                         np.float32) or input_tensor.Type() != circle.TensorType.UINT8:
+        print("Not f32 to q8u")
+        sys.exit(-1)
+
+    # copied from python-tools/examples/pytorch_tutorial/main.py
+    dtype = 'uint8'
+
+    def _quantize_input0(data):
+        qparam = graph.Tensors(graph.Inputs(0)).Quantization()
+        rescaled_data = data / qparam.ScaleAsNumpy()[0] + qparam.ZeroPointAsNumpy()[0]
+        return np.round(rescaled_data).clip(np.iinfo(dtype).min,
+                                            np.iinfo(dtype).max).astype(dtype)
+
+    qarr = _quantize_input0(arr)
+
+    ensure_output_dir(h5_out_path)
+    with h5py.File(h5_out_path, 'w') as hf:
+        name_grp = hf.create_group("name")
+        val_grp = hf.create_group("value")
+        idx = 0
+        val_grp.create_dataset(str(idx), data=qarr, dtype=h5dtypes[dtype])
+        name_grp.attrs[str(idx)] = input_names[0]
+
+
+def dequantize(h5_path, circle_path, h5_out_path):
+    with open(circle_path, 'rb') as f:
+        graph = circle.Model.GetRootAsModel(f.read(), 0).Subgraphs(0)
+    output_tensor = graph.Tensors(graph.Outputs(0))
+    output_names = [output_tensor.Name()]
+
+    with h5py.File(h5_path, 'r') as hf:
+        dset = hf['/value/0']
+        arr = np.array(dset)
+    if not np.issubdtype(arr.dtype,
+                         np.uint8) or output_tensor.Type() != circle.TensorType.UINT8:
+        print("Not q8u to f32")
+        sys.exit(-1)
+
+    # copied from python-tools/examples/pytorch_tutorial/main.py
+    def _dequantize_output0(data):
+        qparam = graph.Tensors(graph.Outputs(0)).Quantization()
+        return (data.astype(np.float32) -
+                qparam.ZeroPointAsNumpy()[0]) * qparam.ScaleAsNumpy()[0]
+
+    qarr = _dequantize_output0(arr)
+
+    ensure_output_dir(h5_out_path)
+    with h5py.File(h5_out_path, 'w') as hf:
+        name_grp = hf.create_group("name")
+        val_grp = hf.create_group("value")
+        idx = 0
+        val_grp.create_dataset(str(idx), data=qarr, dtype='>f4')
+        name_grp.attrs[str(idx)] = output_names[0]
+
+
+def makeArgParser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'h5', type=str, help='path to h5 file either input or output to model')
+    parser.add_argument('circle', type=str, help='path to quantized circle model')
+    parser.add_argument(
+        '-o', '--output', action='store', dest="out_path", help="output file")
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument(
+        '-q',
+        '--quantize',
+        action='store_true',
+        help="quantize f32 to q8u using circle input's qparam (default: false)")
+    group.add_argument(
+        '-d',
+        '--dequantize',
+        action='store_true',
+        help="dequantize q8u to f32 using circle output's qparam (default: false)")
+    return parser
+
+
+def parseArgs():
+    args = parser.parse_args()
+    return args
+
+
+def ensure_output_dir(out_path):
+    if os.path.dirname(out_path):
+        os.makedirs(os.path.dirname(out_path), exist_ok=True)
+
+
+if __name__ == '__main__':
+    parser = makeArgParser()
+    args = parseArgs()
+
+    h5_path, circle_path = args.h5, args.circle
+
+    if args.out_path:
+        out_path = args.out_path
+    else:
+        h5_name, ext = os.path.splitext(h5_path)
+        out_path = h5_name + '_' + ext
+
+    if args.quantize:
+        quantize(h5_path, circle_path, out_path)
+
+    if args.dequantize:
+        dequantize(h5_path, circle_path, out_path)
diff --git a/tools/nnpackage_tool/qnf/requirements.txt b/tools/nnpackage_tool/qnf/requirements.txt
new file mode 100644
index 000000000..b018b6ee8
--- /dev/null
+++ b/tools/nnpackage_tool/qnf/requirements.txt
@@ -0,0 +1,3 @@
+numpy>=1.18.5
+circle-schema>=0.4.0.dev0
+h5py>=2.10.0
diff --git a/tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.sh b/tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.sh
index 56bff45cc..bc7b0b974 100755
--- a/tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.sh
+++ b/tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.sh
@@ -73,7 +73,7 @@ tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py $name.$suffix.json.f
 ${flatc} -o ./ -b ${circle_schema} $name.$suffix.json.fused.datalayout
 mv $name.$suffix.json.fused.circle $name.$suffix.circle
 tools/nnpackage_tool/gen_golden/gen_golden.py $name.$suffix.pb
-tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -o ${outdir} $name.$suffix.circle
+tools/nnpackage_tool/model2nnpkg/model2nnpkg.py -o ${outdir} -m $name.$suffix.circle
 mkdir -p ${outdir}/$name.$suffix/metadata/tc
 mv {input,expected}.h5 ${outdir}/$name.$suffix/metadata/tc/
 mv $name.$suffix.{pb,tflite} ${outdir}/$name.$suffix/
diff --git a/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md b/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
index 9f28ebacb..f42526623 100644
--- a/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
+++ b/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
@@ -41,6 +41,6 @@ nnpkg-tcs/cast
         └── input.h5
 
 # @ target
-# run nnpkg with nnpackage_run and compare with h5diff
+# run nnpkg with onert_run and compare with h5diff
 $ onert/test/onert-test nnpkg-test -i nnpkg-tcs cast
 ```
diff --git a/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.sh b/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.sh
index 8aea53c4c..1f0569b74 100755
--- a/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.sh
+++ b/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.sh
@@ -49,7 +49,7 @@ tflite_basename=$(basename "$1")
 name=${tflite_basename%.*}
 
 tools/nnpackage_tool/gen_golden/gen_golden.py $1
-tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -o ${outdir} $1
+tools/nnpackage_tool/model2nnpkg/model2nnpkg.py -o ${outdir} -m $1
 mkdir -p ${outdir}/$name/metadata/tc
 mv {input,expected}.h5 ${outdir}/$name/metadata/tc/
 cp $1 ${outdir}/$name/
diff --git a/tools/nnpackage_tool/tflite2circle/README.md b/tools/nnpackage_tool/tflite2circle/README.md
deleted file mode 100644
index 94ef5068c..000000000
--- a/tools/nnpackage_tool/tflite2circle/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# tflite2circle
-
-`tflite2circle` is a tool to convert tflite into circle.
-
-## Usage
-
-```
-Usage: tflite2circle.sh [options] tflite
-Convert tflite to circle
-
-Returns
-     0       success
-  non-zero   failure
-
-Options:
-    -h   show this help
-    -o   set output directory (default=.)
-
-Environment variables:
-   flatc           path to flatc
-                   (default=./build/externals/FLATBUFFERS/build/flatc)
-   tflite_schema   path to schema.fbs
-                   (default=./externals/TENSORFLOW-1.12/tensorflow/contrib/lite/schema/schema.fbs)
-
-Examples:
-    tflite2circle.sh Add_000.tflite         => convert Add_000.tflite into Add_000.circle
-    tflite2circle.sh -o my/circles Add_000  => convert Add_000.tflite into my/circles/Add_000.circle
-```
diff --git a/tools/nnpackage_tool/tflite2circle/fuse_instance_norm.js b/tools/nnpackage_tool/tflite2circle/fuse_instance_norm.js
deleted file mode 100644
index ef1633d3c..000000000
--- a/tools/nnpackage_tool/tflite2circle/fuse_instance_norm.js
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-'use strict'
-
-// read json and parse
-const fs = require('fs')
-let inputfile = "./03_2k.json"
-if (process.argv.length == 3)
-  inputfile = process.argv[2]
-let raw = fs.readFileSync(inputfile)
-let model = JSON.parse(raw)
-
-// 0. prepare shortcut variables with object destructuring
-const { operators, tensors } = model.subgraphs[0]
-
-//--------------------------------------------------------------------------
-// 0. construct infra
-
-// List : opcode index (number) => op name (string)
-let opcodeIdxToOpName = []
-for (const opcode of model.operator_codes) {
-  opcodeIdxToOpName.push(opcode.builtin_code)
-}
-
-// List: tensor index (number) => producing operator's index (number)
-// assume there is only one op that produces given output tensor.
-let defOp = []
-for (let i = 0; i < operators.length; ++i) {
-  let op = operators[i]
-  if (op.outputs.length !== 1) {
-    console.debug("Assumption failed. Multiple output operator exists.")
-    process.exit(-1);
-  }
-  defOp[op.outputs[0]] = i
-}
-
-// List: tensor index (number) => consuming operator indices (list of number)
-// Note that there may be multiple consumer ops for a given tensor index
-let useOps = []
-for (let i = 0; i < operators.length; ++i) {
-  let op = operators[i]
-  for (let inTensorIdx of op.inputs) {
-    if (useOps[inTensorIdx])
-      useOps[inTensorIdx].push(i)
-    else
-      useOps[inTensorIdx] = [ i ]
-  }
-}
-
-// return operator that defines the given tensor index
-function getDefOp(iTensor) {
-  return defOp[iTensor] === undefined ? undefined : operators[defOp[iTensor]]
-}
-
-function getUseOps(iTensor) {
-  if (useOps[iTensor] === undefined)
-    return undefined
-  let ret = []
-  for (let i of useOps[iTensor])
-    ret.push(operators[i])
-  return ret
-}
-
-function opeq(op, str) {
-  return op === undefined ? undefined : opcodeIdxToOpName[op.opcode_index] === str
-}
-
-function hasUndefined() {
-  for (let arg of arguments)
-    if (arg === undefined)
-      return true
-  return false
-}
-
-//--------------------------------------------------------------------------
-// find SquaredDifference as starting point
-let squaredDifferenceIdxList = []
-for (let i = 0; i < operators.length; ++i) {
-  if (opeq(operators[i], "SQUARED_DIFFERENCE"))
-    squaredDifferenceIdxList.push(i)
-}
-
-let instanceNormList = [ ]
-for (let idx of squaredDifferenceIdxList) {
-  const sqd1 = operators[idx]
-  const findMean0AndInstanceNormInputTensor = function(sqd1) {
-    let mean0, iInstanceNormInputTensor
-    for (let i = 0; i < sqd1.inputs.length; ++i) {
-      let op = getDefOp(sqd1.inputs[i])
-      if (opeq(op, "MEAN")) {
-        mean0 = op
-        // let's check one of inputs are instance_norm
-        // the other input is axis of mean operator.
-        for (let j = 0; j < mean0.inputs.length; ++j) {
-          // 1 - i means the other input of squared_difference.
-          if (mean0.inputs[j] === sqd1.inputs[1 - i]) {
-            iInstanceNormInputTensor = mean0.inputs[j]
-          }
-          if (!hasUndefined(iInstanceNormInputTensor)) break // found instance_norm
-        }
-      }
-      if (!hasUndefined(mean0, iInstanceNormInputTensor)) break
-    }
-    return [mean0, iInstanceNormInputTensor]
-  }
-  const [mean0, iInstanceNormInputTensor] = findMean0AndInstanceNormInputTensor(sqd1)
-  if (hasUndefined(mean0, iInstanceNormInputTensor)) continue
-
-  const findConsumer = function(op, expectedOp) {
-    let ops = getUseOps(op.outputs[0])
-    if (ops === undefined || ops.length !== 1 || !opeq(ops[0], expectedOp))
-      return undefined
-    return ops[0]
-  }
-  const mean2 = findConsumer(sqd1, "MEAN")
-  if (hasUndefined(mean2)) continue
-
-  const add3 = findConsumer(mean2, "ADD")
-  if (hasUndefined(add3)) continue
-
-  const isScalar  = function(tsr) { return tsr.shape.length === 0 }
-  const is1D      = function(tsr) { return tsr.shape.length === 1 }
-  const isFloat32 = function(tsr) { return tsr.type === "FLOAT32" }
-  const asFloat32 = function(arr) { return new Float32Array(new Uint8Array(arr).buffer)[0]; }
-  const getFloatScalarValueFromInputsOf = function(op) {
-    for (let i of op.inputs) {
-      if (isScalar(tensors[i]) && isFloat32(tensors[i])) {
-        let buf = model.buffers[tensors[i].buffer]
-        if (buf.data && buf.data.length === 4)
-          return asFloat32(buf.data)
-      }
-    }
-    return undefined
-  }
-  const epsilon = getFloatScalarValueFromInputsOf(add3)
-  if (hasUndefined(epsilon)) continue
-
-  const rsqrt4 = findConsumer(add3, "RSQRT")
-  if (hasUndefined(rsqrt4)) continue
-
-  const mul5 = findConsumer(rsqrt4, "MUL")
-  if (hasUndefined(mul5)) continue
-
-  const getFloat1DTensorIdxFromInputsOf = function(op) {
-    for (let i of op.inputs) {
-      if (is1D(tensors[i]) && isFloat32(tensors[i]))
-        return i
-    }
-    return undefined
-  }
-  const iGamma = getFloat1DTensorIdxFromInputsOf(mul5)
-  if (hasUndefined(iGamma)) continue
-
-  let mul6, mul7
-  for (let i of useOps[mul5.outputs[0]]) {
-    const op = operators[i]
-    if (opcodeIdxToOpName[op.opcode_index] !== "MUL")
-      break;
-    const otherInput = op.inputs[0] === mul5.outputs[0] ? op.inputs[1] : op.inputs[0]
-    if (otherInput === iInstanceNormInputTensor)
-      mul6 = op
-    else if (otherInput === mean0.outputs[0])
-      mul7 = op
-  }
-  if (hasUndefined(mul6, mul7)) continue
-
-  const sub8 = findConsumer(mul7, "SUB")
-  if (hasUndefined(sub8)) continue
-
-  const iBeta = getFloat1DTensorIdxFromInputsOf(sub8)
-  if (hasUndefined(iBeta)) continue
-
-  const add9 = findConsumer(sub8, "ADD")
-  if (hasUndefined(add9)) continue
-
-  const add9_2 = findConsumer(mul6, "ADD")
-  if (hasUndefined(add9_2)) continue
-
-  if (add9 !== add9_2)
-    continue
-
-  const getActivation = function(op) {
-    return op.builtin_options.fused_activation_function
-  }
-  const activation = getActivation(add9)
-  if (hasUndefined(activation)) continue
-
-  //--------------------------------------------------------------------------
-  // convert to instance norm
-  let instanceNormOpcodeIdx = model.operator_codes.findIndex(o => { return o.builtin_code === "INSTANCE_NORM" })
-  opcodeIdxToOpName.indexOf('INSTANCE_NORM')
-  if (instanceNormOpcodeIdx === -1) {
-    model.operator_codes.push( { "builtin_code": "INSTANCE_NORM", "version": 1 } )
-    instanceNormOpcodeIdx = model.operator_codes.length - 1;
-  }
-  // construct instance norm operator
-  let instanceNorm = {
-    "opcode_index": instanceNormOpcodeIdx,
-    "inputs": [ iInstanceNormInputTensor, iGamma, iBeta ],
-    "outputs": [ add9.outputs[0] ],
-    "builtin_options": { "epsilon": epsilon, "fused_activation_function": activation },
-    "builtin_options_type": "InstanceNormOptions",
-    "custom_options_format": "FLEXBUFFERS",
-    "mutating_variable_inputs": [],
-  }
-  // add instance norm after removing 0~9 nodes
-  instanceNormList.push(instanceNorm)
-} // end of sqd1
-let adjust = 0
-for (let i = 0; i < squaredDifferenceIdxList.length; ++i) {
-  let idx = squaredDifferenceIdxList[i] + adjust
-  operators.splice(idx - 1, 10, instanceNormList[i])
-  adjust += -9
-}
-let raw_fused = JSON.stringify(model)
-fs.writeFileSync(inputfile+".fused", raw_fused);
diff --git a/tools/nnpackage_tool/tflite2circle/tflite2circle.sh b/tools/nnpackage_tool/tflite2circle/tflite2circle.sh
deleted file mode 100755
index 409c05832..000000000
--- a/tools/nnpackage_tool/tflite2circle/tflite2circle.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-
-set -u
-
-progname=$(basename "${BASH_SOURCE[0]}")
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-nnfw_root="$( cd "${script_dir%*/*/*/*}" && pwd )"
-outdir="."
-flatc=${flatc:-"$nnfw_root/build/externals/FLATBUFFERS/build/flatc"}
-tflite_schema=${tflite_schema:-"$nnfw_root/externals/TENSORFLOW-1.13.1/tensorflow/lite/schema/schema.fbs"}
-circle_schema=${circle_schema:-"$nnfw_root/nnpackage/schema/circle_schema.fbs"}
-
-if ! [ -x "$flatc" ]; then
-  echo "Please make sure `flatc` is in path."
-  exit 2
-fi
-
-if ! { [ -e "$tflite_schema" ] && [ -e "$circle_schema" ]; }; then
-  echo "Please make sure that the `*.fbs` paths are set properly."
-  exit 3
-fi
-
-usage() {
-  echo "Usage: $progname [options] tflite"
-  echo "Convert tflite to circle"
-  echo ""
-  echo "Returns"
-  echo "     0       success"
-  echo "  non-zero   failure"
-  echo ""
-  echo "Options:"
-  echo "    -h   show this help"
-  echo "    -o   set output directory (default=$outdir)"
-  echo ""
-  echo "Environment variables:"
-  echo "   flatc           path to flatc"
-  echo "                   (default=./build/externals/FLATBUFFERS/build/flatc)"
-  echo "   tflite_schema   path to tflite schema (i.e. schema.fbs)"
-  echo "                   (default=./externals/TENSORFLOW-1.12/tensorflow/contrib/lite/schema/schema.fbs)"
-  echo "   circle_schema   path to circle schema"
-  echo "                   (default=./nnpackage/schema/circle_schema.fbs)"
-  echo ""
-  echo "Examples:"
-  echo "    $progname Add_000.tflite         => convert Add_000.tflite into Add_000.circle"
-  echo "    $progname -o my/circles Add_000  => convert Add_000.tflite into my/circles/Add_000.circle"
-  exit 1
-}
-
-if [ $# -eq 0 ]; then
-  echo "For help, type $progname -h"
-  exit 1
-fi
-
-while getopts "ho:" OPTION; do
-case "${OPTION}" in
-    h) usage;;
-    o) outdir=$OPTARG;;
-    ?) exit 1;;
-esac
-done
-
-shift $((OPTIND-1))
-
-if [ $# -ne 1 ]; then
-  echo "error: wrong argument (no argument or too many arguments)."
-  echo "For help, type $progname -h"
-  exit 1
-fi
-
-tflite_base=$(basename "$1")
-name=${tflite_base%.*}
-
-# convert
-
-mkdir -p "${outdir}"
-${flatc} -o ${outdir} --strict-json -t ${tflite_schema} -- $1
-${script_dir}/tflitejson2circlejson.py "${outdir}/${name}.json" > "${outdir}/${name}.circle"
-${flatc} -o ${outdir} -b ${circle_schema} "${outdir}/${name}.circle"
-rm -f ${outdir}/${name}.json
diff --git a/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py b/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
deleted file mode 100755
index 272463fb0..000000000
--- a/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/python3
-
-# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import os
-import sys
-from collections import OrderedDict
-
-
-def usage():
-    script = os.path.basename(os.path.basename(__file__))
-    print("Usage: {} path_to_tflite_in_json".format(script))
-    sys.exit(-1)
-
-
-if __name__ == '__main__':
-    if len(sys.argv) != 2:
-        usage()
-
-    json_path = sys.argv[1]
-    with open(json_path, "r") as f:
-        try:
-            json_dict = json.load(f, object_pairs_hook=OrderedDict)
-            json_dict["version"] = 0
-            print(json.dumps(json_dict, indent=2))
-        except KeyError:
-            print("subgraphs attribute does not exist.")
-            sys.exit(-2)
diff --git a/tools/pareto_profiler/README.md b/tools/pareto_profiler/README.md
new file mode 100644
index 000000000..5a0e86ebd
--- /dev/null
+++ b/tools/pareto_profiler/README.md
@@ -0,0 +1,95 @@
+This folder contains the necessary scripts to perform a pareto front estimation for machine learning models. Currently, the scripts support target devices running on Tizen, as well as `Odroid-XU4`.
+
+The contents of the folder can be categorized into the following groups:
+
+- [Generator scripts to map decision variables to `onert_run` parameters](#mapping-decision-to-parameters)
+- [Estimator scripts to compute pareto front](#pareto-estimation)
+
+The following subsections describe the role of each script in detail.
+
+## Mapping Decision to Parameters
+The generator script `gen_oplist.py` is located under `generator` folder, and encodes large integer representations for `nnpackage` backend assignments. Effectively, it maps suitable backend assignments to integer values. For example, a graph with only three operations and two backends will have a integer representation in the range `(0, 7)`. Thus a value `0` might imply all operations run on `cpu`, while `7` might imply that all operations run on `acl_cl` backend. As will be described below, the integer representation of `nnpackage` parameters serves as a convenient decision space for pareto estimation.
+
+Setting up parameters for `onert_run` requires a knowledge of model-specific operations. To this end, the `gen_oplist.py` script generates for each model, a `oplist` of unique operations. If an exhaustive mapping of backends to operation sequences is preferred, then `gen_oplist.py` also generates a so-called `opmap` list for uniquely observed `<operation name, data size>` pairs.
+
+`gen_oplist.py` is run on the development environment (read: *Desktop PC*) as shown below:
+```
+python3 gen_oplist.py <tflite model> <target>
+```
+
+The list of model operations and their mapping to graph node indexes are stored in a  *oplist.json* file, and transferred to the target device. For further details about usage, type `python3 gen_oplist.py --help`.
+
+## Pareto Estimation
+Scripts under the `estimator` folder fall under two categories, namely an [exhaustive, brute-force profiling](#exhaustive-profiling), and a [on-device version of pareto estimation](#on-device-pareto-estimation). These are described in detail below.
+
+## Exhaustive Profiling
+For the sake of testing several pareto estimation algorithms *offline* on common lookup data, the `generator` folder includes a `brute_force_profiler.py` that records all solutions in the decision *or* assignment space. `brute_force_profiler.py` is typically run on target device, with the following syntax:
+
+```
+python brute_force_profiler.py <model> <target> <run_folder> [--dumpfile=<filename>]
+```
+For details, type `python brute_force_profiler.py --help`. Below is a example of the dump generated by the brute-force profiler:
+
+```
+{"oplist": ["Pool2D", "BinaryArithmetic", "DepthwiseConv2D", "Conv2D", "Reshape"],
+  "solutions": [
+    {"memory": 56388, "id": 0, "time": 72.525},
+    {"memory": 63624, "id": 1, "time": 86.532},
+    {"memory": 64320, "id": 2, "time": 69.352},
+    {"memory": 65376, "id": 3, "time": 76.436},
+    {"memory": 73016, "id": 4, "time": 69.634},
+    {"memory": 73492, "id": 5, "time": 47.013},
+    {"memory": 74488, "id": 6, "time": 95.01},
+    {"memory": 74844, "id": 7, "time": 111.329},
+    {"memory": 393324, "id": 8, "time": 98.956},
+    {"memory": 395088, "id": 9, "time": 103.24},
+    {"memory": 396180, "id": 10, "time": 68.107},
+    {"memory": 395932, "id": 11, "time": 86.109},
+    {"memory": 402468, "id": 12, "time": 25.477},
+    {"memory": 402800, "id": 13, "time": 25.42},
+    {"memory": 403904, "id": 14, "time": 9.168},
+    {"memory": 404476, "id": 15, "time": 7.801},
+....
+    {"memory": 403940, "id": 30, "time": 9.145},
+    {"memory": 403568, "id": 31, "time": 8.034}]}
+```
+
+**Note**: As of present, the pareto estimation algorithms run on-device, and will support an *offline* mode in the near future.
+
+## On Device Pareto Estimation
+Currently the `estimator` folder includes only a `random_sampler.py`, however, in future, it will feature a set of pareto estimation algorithms. Regardless of the algorithm, the following steps must be carried out in sequence:
+
+1. Generate the oplist using `gen_oplist.py`, and transfer the JSON file to the target device. This step is performed on the development environment
+
+2. Copy the contents of the `estimator` folder to the target (*scp* for odroid, *sdb push* for tizen), at a preferred location
+
+3. On the target device, run the pareto-estimation algorithm. The following example shows how to run `random_sampler.py` (see `python random_sampler.py --help` for details)
+```
+python random_sampler.py /root/img_model/mobilenetv2/ /opt/usr/nnfw-test/Product/out/bin --mode=name --dumpfile=/tmp/mobilenetv2_opname_profile.json --iterations=20
+```
+After profiling, the results can be viewed under the filename provided by the `--dumpfile` argument. Below is an illustrative example of the same model that was brute-forced above:
+
+```
+{"configs": {
+  "4": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=cpu ",
+  "10": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
+  "14": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
+  "16": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+  "20": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+  "21": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+  "31": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=acl_cl "},
+  "oplist": ["Pool2D", "DepthwiseConv2D", "Reshape", "Conv2D", "BinaryArithmetic"],
+  "solutions": [
+    {"exec_time": 76.138, "max_rss": 62712, "id": 4},
+    {"exec_time": 72.719, "max_rss": 65272, "id": 16},
+    {"exec_time": 22.409, "max_rss": 403120, "id": 14},
+    {"exec_time": 28.138, "max_rss": 403064, "id": 10},
+    {"exec_time": 70.656, "max_rss": 65536, "id": 20},
+    {"exec_time": 68.805, "max_rss": 66076, "id": 21},
+    {"exec_time": 8.201, "max_rss": 404656, "id": 31}], "mode": "name"}
+```
+**Note**: The pareto-estimation algorithms require the use of python `numpy` package, so make sure to install it beforehand.
+
+
+
+
diff --git a/tools/pareto_profiler/estimator/Hlps.py b/tools/pareto_profiler/estimator/Hlps.py
new file mode 100644
index 000000000..ba0925d6f
--- /dev/null
+++ b/tools/pareto_profiler/estimator/Hlps.py
@@ -0,0 +1,257 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import sys
+import Queue
+import utils
+import signal
+from pareto import ParetoData
+
+
+class Hlps:
+    """ 
+    Initialize Runner and Pareto data structure
+  """
+
+    def __init__(self, runner, num_backends, num_samples):
+        self._runner = runner
+        self._num_backends = num_backends
+        self._num_samples = num_samples
+        self._marked = {}
+        self._extended_search = False
+        self._iteration = 0
+        self._pareto_obj = ParetoData()
+
+    """
+    Method to generate new samples from a given sample v_vec. 
+    The new samples bear a hamming distance hd from the provided sample.    
+  """
+
+    def gen_hamming(self, v_vec, hd=1, nsamples=None):
+        if nsamples is None:
+            nsamples = self._num_backends - 1
+        ret = np.zeros((nsamples, len(v_vec)), dtype=int)
+        v = v_vec
+        marked = np.full(len(v), False, dtype=bool)
+        cnt = 0
+
+        for r in range(nsamples):
+            ret[r] = v
+        rnd_pos = np.random.permutation(range(len(v)))
+        for i in range(hd):
+            pos = rnd_pos[i]
+            marked[pos] = True
+            for r in range(nsamples):
+                ret[r][pos] = (v[pos] - r - 1) % self._num_backends
+
+        return ret
+
+    """
+      Method to generate all samples from a given sample v_vec, that
+      have a hamming distance of one with respect to it.
+  """
+
+    def gen_hamming_one(self, v_vec, invert=False):
+        ret = np.zeros(((self._num_backends - 1) * len(v_vec), len(v_vec)), dtype=int)
+        if invert == False:
+            v = v_vec
+        else:
+            v = [1 - x for x in v_vec]
+        for nb in range(1, self._num_backends):
+            c = 0
+            for r in range((nb - 1) * len(v), nb * len(v)):
+                ret[r] = v
+                ret[r][c] = (v[c] - nb) % self._num_backends
+                c += 1
+        return ret
+
+    """
+      Enable profiling over extended search space
+  """
+
+    def enable_extended_search(self):
+        self._extended_search = True
+        for key in self._pareto_obj.get_pareto_keys():
+            config = self._pareto_obj.get_config(key)
+            extended_val = self._runner.get_extended_solution(config)
+            self._pareto_obj.set_config(key, extended_val)
+        self._iteration = 0
+
+    """
+      HLPS algorithm implementation provided here.
+      Description: Starting with a random sample, fill up a sampling 
+      queue with hamming neighbors. Fetch samples from queue,
+      each time checking for pareto optimality. Pareto-optimal samples
+      are then explored/exploited to generate new samples that are added to the queue.
+      Algorithm phase terminates when the queue is empty.
+      Repeat this phase in a multi-shot invokation for better results.
+  """
+
+    def hlps_routine(self, config_ids):
+        # Initialize
+        solution_q = Queue.Queue()
+        visited = {}
+        nbits = self._runner.get_nbits(self._extended_search)
+        is_extended = self._runner.get_mode_extended()
+        nsolutions = self._num_backends**nbits
+
+        stop_insert = False
+
+        cnt = 0
+        q_add_cnt = 0
+        round_cnt = 0
+
+        def extended_solution(s):
+            return self._runner.get_extended_solution(s)
+
+        def mark_solution(s):
+            if is_extended == True and self._extended_search == False:
+                self._marked[extended_solution(s)] = True
+            else:
+                self._marked[s] = True
+
+        def is_marked(s):
+            if is_extended == True and self._extended_search == False:
+                return (extended_solution(s) in self._marked)
+            else:
+                return (s in self._marked)
+
+        def visit_solution(s):
+            if is_extended == True and self._extended_search == False:
+                visited[extended_solution(s)] = True
+            else:
+                visited[s] = True
+
+        def is_visited(s):
+            if is_extended == True and self._extended_search == False:
+                return (extended_solution(s) in visited)
+            else:
+                return (s in visited)
+
+        def sigint_handler(signum, frame):
+            print("Round cnt = ", round_cnt)
+
+        signal.signal(signal.SIGINT, sigint_handler)
+        if len(config_ids) > 0:
+            for solution in config_ids:
+                if is_extended == True and self._extended_search == True and self._iteration == 0:
+                    s = extended_solution(solution)
+                else:
+                    s = solution
+                s_vec = utils.int_to_vec(s, self._num_backends, nbits)
+
+                candidate = self.gen_hamming_one(s_vec)
+                for hd in range((self._num_backends - 1) * nbits):
+                    candidate_int = int(''.join(str(x) for x in reversed(candidate[hd])),
+                                        self._num_backends)
+                    if is_marked(candidate_int) == False:
+                        solution_q.put(candidate_int)
+                        mark_solution(candidate_int)
+                        q_add_cnt += 1
+        else:
+            start_seed = int(np.random.rand() * (nsolutions))
+            solution_q.put(start_seed)
+            q_add_cnt += 1
+
+        self._iteration += 1
+        # Main routine
+        while not solution_q.empty():
+            s = solution_q.get()
+            mark_solution(s)
+            stop_insert = False
+            if (round_cnt % 100 == 0):
+                print("sample count = ", round_cnt)
+            if self._extended_search == True:
+                print("Queue size is ", solution_q.qsize())
+
+            if is_extended == True and self._extended_search == False:
+                time_val, memory_val = self._runner.profile_by_opname(s)
+            elif is_extended == True:
+                time_val, memory_val = self._runner.profile_by_opindex(s)
+            else:
+                time_val, memory_val = self._runner.profile_by_opname(s)
+            round_cnt += 1
+
+            utils.progressbar(round_cnt, nsolutions, prefix="% samples computed. : ")
+            self._pareto_obj.update_pareto_solutions(
+                s, time_val, memory_val, explore_flag=True)
+
+            for key in self._pareto_obj.get_pareto_keys():
+                pareto_sample = self._pareto_obj.get_config(key)
+                explore_sample = self._pareto_obj.get_exploration(key)
+
+                if is_visited(pareto_sample):
+                    continue
+                visit_solution(pareto_sample)
+                s_vec = utils.int_to_vec(pareto_sample, self._num_backends, nbits)
+
+                if explore_sample == True:
+                    # Explore solutions over a larger range
+                    for hd in range(1, nbits + 1):
+                        if stop_insert is True:
+                            break
+
+                        candidate = self.gen_hamming(s_vec, hd=hd)
+                        for i in range(self._num_backends - 1):
+                            if stop_insert is True:
+                                break
+                            candidate_int = int(
+                                ''.join(str(x) for x in reversed(candidate[i])),
+                                self._num_backends)
+                            try:
+                                if is_marked(candidate_int) == False:
+                                    solution_q.put(candidate_int)
+                                    q_add_cnt += 1
+                            except IndexError:
+                                print("candidate[i] = ", candidate[i],
+                                      ', candidate_int = ', candidate_int)
+                                sys.exit(-1)
+                            if (q_add_cnt >= self._num_samples):
+                                print("Queue full in explore")
+                                stop_insert = True
+                else:
+                    # Exploit solutions within immediate neighborhood
+                    candidate = self.gen_hamming_one(s_vec)
+
+                    for j in range((self._num_backends - 1) * nbits):
+                        if stop_insert is True:
+                            break
+                        candidate_int = int(
+                            ''.join(str(x) for x in reversed(candidate[j])),
+                            self._num_backends)
+                        if is_marked(candidate_int) == False:
+                            solution_q.put(candidate_int)
+                            q_add_cnt += 1
+                        if (q_add_cnt >= self._num_samples):
+                            print("Queue full in exploit")
+                            stop_insert = True
+                    self._pareto_obj.set_exploration(key)
+
+        pfront = set([
+            self._pareto_obj.get_config(key)
+            for key in self._pareto_obj.get_pareto_keys()
+        ])
+        return pfront, q_add_cnt
+
+    """
+      Method to dump results from HLPS
+  """
+
+    def dump_results(self, dumpdata):
+        dumpdata = self._pareto_obj.dump_pareto_solutions(dumpdata)
+        dumpdata = self._runner.dump_config(dumpdata)
+        return dumpdata
diff --git a/tools/pareto_profiler/estimator/brute_force_profiler.py b/tools/pareto_profiler/estimator/brute_force_profiler.py
new file mode 100644
index 000000000..f30a798ef
--- /dev/null
+++ b/tools/pareto_profiler/estimator/brute_force_profiler.py
@@ -0,0 +1,71 @@
+#! /usr/bin/python
+import argparse
+import json
+import sys
+from profile_args import ProfileArgs
+from runner import Runner
+from utils import progressbar
+
+if __name__ == "__main__":
+    parser = ProfileArgs(
+        prog="brute_force_profiler.py", description="Profiles onert_run using oplist")
+    # Parse arguments
+    args = parser.parse_args()
+    modelfile = args.model
+    mode = args.mode
+    n_backends = args.backends
+    dumpfile = args.dumpfile
+
+    # Initialize a runner for given model and target
+    runner = Runner(args.model, args.run_folder, args.backends, args.mode)
+    nruns = runner.get_solution_spacelen()
+    profile_results = {}
+    profile_results['solutions'] = []
+    chk_ptr = 0
+
+    # Profile each backend setting, record execution time and peak memory
+    for r in range(nruns):
+        if (r % 100) == 0:
+            # Checkpointing results, in case the runs take too long
+            if chk_ptr > 0:
+                with open("/tmp/solutions.json") as ifile:
+                    tmp_results = json.load(ifile)
+
+                with open("/tmp/solutions.json", "w") as ofile:
+                    json.dump(tmp_results + profile_results['solutions'][chk_ptr:], ofile)
+            else:
+                with open("/tmp/solutions.json", "w") as ofile:
+                    json.dump(profile_results['solutions'], ofile)
+            chk_ptr = r
+
+        if args.mode == "name":
+            exec_time, max_rss = runner.profile_by_opname(r)
+        elif args.mode == "index":
+            exec_time, max_rss = runner.profile_by_opindex(r)
+        else:
+            print("Invalid mode ", mode)
+            sys.exit(-1)
+
+        profile_results['solutions'].append({
+            "time": exec_time,
+            "memory": max_rss,
+            "id": r
+        })
+        progressbar(r, nruns, prefix="% samples computed. : ")
+    progressbar(nruns, nruns, prefix="% samples computed. : ")
+
+    oplist, opmap, opname_by_indx = runner.get_opconfig()
+
+    if args.mode == "index":
+        profile_results['oplist'] = oplist
+        profile_results['opmap'] = opmap
+        profile_results['opname_by_indx'] = opname_by_indx
+    elif args.mode == "name":
+        profile_results['oplist'] = oplist
+    else:
+        print("Invalid mode ", mode)
+        sys.exit(-1)
+
+    with open(dumpfile, "w") as ofile:
+        json.dump(profile_results, ofile)
+    print "\nDone.."
diff --git a/tools/pareto_profiler/estimator/hlps_sampler.py b/tools/pareto_profiler/estimator/hlps_sampler.py
new file mode 100644
index 000000000..a4c1e4fd8
--- /dev/null
+++ b/tools/pareto_profiler/estimator/hlps_sampler.py
@@ -0,0 +1,99 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import utils
+import sys
+import json
+import time
+from Hlps import Hlps
+from profile_args import ProfileArgs
+from runner import Runner
+
+
+def hlps_profiler(modelfile,
+                  run_folder,
+                  num_backends=2,
+                  mode="name",
+                  nruns=3,
+                  num_samples=2000,
+                  dumpfile=None):
+    runner = Runner(modelfile, run_folder, num_backends, mode=mode)
+    hlps = Hlps(runner, num_backends=num_backends, num_samples=num_samples)
+
+    config_set = set()
+    sample_cnt = 0
+    total_reject_list = []
+
+    for r in range(nruns):
+        config_set, sample_cnt_iter = hlps.hlps_routine(config_set)
+        sample_cnt += sample_cnt_iter
+
+    # Add the index mode search here.
+    print("Starting search over extended space")
+    print("\n")
+    if mode == "index":
+        hlps.enable_extended_search()
+        for r in range(nruns):
+            config_set, sample_cnt_iter = hlps.hlps_routine(config_set)
+            sample_cnt += sample_cnt_iter
+
+    # Export results to json file
+    # Dump profiler results
+    dumpdata = {}
+    dumpdata['mode'] = args.mode
+    dumpdata['sample_cnt'] = sample_cnt
+    dumpdata = hlps.dump_results(dumpdata)
+    with open(dumpfile, "w") as ofile:
+        json.dump(dumpdata, ofile)
+
+
+if __name__ == "__main__":
+    t_start = time.time()
+    parser = ProfileArgs(
+        "hlps_on_device.py",
+        description="On-Device Optimizing Profiler for TensorFlowLite Models")
+    parser.add_argument(
+        '--iterations',
+        type=int,
+        default=3,
+        help='Number of iterations, less than 10 should be enough')
+    parser.add_argument(
+        '--samples', type=int, default=2000, help='Number of samples per iteration')
+    parser.add_argument(
+        '--offline',
+        type=bool,
+        default=False,
+        help='Set to True for running over profiled data')
+    parser.add_argument('--profiled_data', type=str, help='Profile file with path')
+
+    args = parser.parse_args()
+
+    hlps_profiler(
+        args.model,
+        args.run_folder,
+        num_backends=args.backends,
+        mode=args.mode,
+        nruns=args.iterations,
+        num_samples=args.samples,
+        dumpfile=args.dumpfile)
+    t_end = time.time()
+    with open(args.dumpfile, "r") as ifile:
+        dumpdata = json.load(ifile)
+    dumpdata['profiling time'] = (t_end - t_start)
+    with open(args.dumpfile, "w") as ofile:
+        json.dump(dumpdata, ofile)
+    print("done.., profiling time = ", (t_end - t_start), " seconds")
diff --git a/tools/pareto_profiler/estimator/pareto.py b/tools/pareto_profiler/estimator/pareto.py
new file mode 100644
index 000000000..9c62eb358
--- /dev/null
+++ b/tools/pareto_profiler/estimator/pareto.py
@@ -0,0 +1,84 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class ParetoData:
+    def __init__(self):
+        self._pareto_solutions = {}
+        self._configs = {}
+        self._cnt = 0
+        self._explore = {}
+
+    def add_pareto_entry(self,
+                         sample,
+                         exec_time,
+                         max_rss,
+                         key,
+                         explore_flag,
+                         check_one_hop=True):
+        self._pareto_solutions[key] = [exec_time, max_rss]
+        self._configs[key] = sample
+        if explore_flag == True and check_one_hop == True:
+            self._explore[key] = False
+        elif explore_flag == True and check_one_hop == False:
+            self._explore[key] = True
+
+    def update_pareto_solutions(self, sample, exec_time, max_rss, explore_flag=False):
+        new_item = True
+        if self._pareto_solutions:
+            for key in list(self._pareto_solutions):
+                if self._pareto_solutions[key][0] < exec_time and self._pareto_solutions[key][1] < max_rss:
+                    new_item = False
+                    break
+                elif self._pareto_solutions[key][0] > exec_time and self._pareto_solutions[key][1] > max_rss:
+                    self.add_pareto_entry(sample, exec_time, max_rss, key, explore_flag,
+                                          True)
+                    new_item = False
+
+        if new_item is True:
+            self.add_pareto_entry(sample, exec_time, max_rss, self._cnt, explore_flag,
+                                  False)
+            self._cnt += 1
+
+    def dump_pareto_solutions(self, dumpdata):
+        marked = {}
+        pareto_results = []
+        for i in range(self._cnt):
+            if self._configs[i] not in marked:
+                marked[self._configs[i]] = True
+                pareto_results.append({
+                    "id": self._configs[i],
+                    "exec_time": self._pareto_solutions[i][0],
+                    "max_rss": self._pareto_solutions[i][1]
+                })
+        dumpdata.update({"solutions": pareto_results})
+
+        return dumpdata
+
+    def get_pareto_keys(self):
+        return self._configs.keys()
+
+    def get_config(self, key):
+        return self._configs[key]
+
+    def get_exploration(self, key):
+        return self._explore[key]
+
+    def set_exploration(self, key):
+        self._explore[key] = True
+
+    def set_config(self, key, extended_value):
+        self._configs[key] = extended_value
diff --git a/tools/pareto_profiler/estimator/profile_args.py b/tools/pareto_profiler/estimator/profile_args.py
new file mode 100644
index 000000000..4690d127f
--- /dev/null
+++ b/tools/pareto_profiler/estimator/profile_args.py
@@ -0,0 +1,37 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+
+class ProfileArgs(argparse.ArgumentParser):
+    def __init__(self, *args, **kwargs):
+        super(ProfileArgs, self).__init__(args, kwargs)
+        self.add_argument(
+            'model', type=str, default=None, help='nnpackage name with path')
+        self.add_argument('run_folder', type=str, help="path to onert_run executable")
+        self.add_argument(
+            '--mode',
+            type=str.lower,
+            choices=["index", "name"],
+            default="name",
+            help='Profile by operation index or name')
+        self.add_argument('--backends', type=int, default=2, help='Number of backends')
+        self.add_argument(
+            '--dumpfile',
+            type=str.lower,
+            default="/tmp/final_result.json",
+            help='JSON Dumpfile name with path')
diff --git a/tools/pareto_profiler/estimator/random_sampler.py b/tools/pareto_profiler/estimator/random_sampler.py
new file mode 100644
index 000000000..7646ea62c
--- /dev/null
+++ b/tools/pareto_profiler/estimator/random_sampler.py
@@ -0,0 +1,60 @@
+#! /usr/bin/python
+import argparse
+import json
+import numpy as np
+import sys
+import subprocess
+import time
+from pareto import ParetoData
+from profile_args import ProfileArgs
+from runner import Runner
+from utils import progressbar
+
+if __name__ == "__main__":
+    t_start = time.time()
+    parser = ProfileArgs("random_sampler.py", description="Random sampler")
+    parser.add_argument(
+        '--iterations', type=int, default=100, help='Number of iterations')
+
+    # Parse arguments
+    args = parser.parse_args()
+    dumpfile = args.dumpfile
+    iterations = args.iterations
+
+    # Initialize a runner and Pareto data structure obj
+    runner = Runner(args.model, args.run_folder, args.backends, args.mode)
+    pareto_obj = ParetoData()
+    # Initialize variables for random sampler
+    n_assignments = runner.get_solution_spacelen()
+    n_iterations = min(iterations, n_assignments)
+    chk_ptr = 0
+    marked_samples = {}
+
+    # Profile at random over solution space
+    for r in range(n_iterations):
+        random_sample = int(np.random.rand() * n_assignments)
+        while random_sample in marked_samples:
+            random_sample = int(np.random.rand() * n_assignments)
+        marked_samples[random_sample] = True
+        if args.mode == "name":
+            exec_time, max_rss = runner.profile_by_opname(random_sample)
+        elif args.mode == "index":
+            exec_time, max_rss = runner.profile_by_opindex(random_sample)
+        else:
+            print("Invalid mode ", mode)
+            sys.exit(-1)
+
+        pareto_obj.update_pareto_solutions(random_sample, exec_time, max_rss)
+        progressbar(r, n_assignments, prefix="% samples computed. : ")
+    progressbar(r + 1, n_assignments, prefix="% samples computed. : ")
+
+    # Dump profiler results
+    dumpdata = {}
+    dumpdata['mode'] = args.mode
+    dumpdata = pareto_obj.dump_pareto_solutions(dumpdata)
+    dumpdata = runner.dump_config(dumpdata)
+    with open(dumpfile, "w") as ofile:
+        json.dump(dumpdata, ofile)
+    t_end = time.time()
+    print("\n")
+    print("done.., profiling time = ", (t_end - t_start), " seconds")
diff --git a/tools/pareto_profiler/estimator/runner.py b/tools/pareto_profiler/estimator/runner.py
new file mode 100644
index 000000000..05532e3df
--- /dev/null
+++ b/tools/pareto_profiler/estimator/runner.py
@@ -0,0 +1,148 @@
+#! /usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+from utils import fetch_config_by_name
+from utils import fetch_config_by_indx
+from utils import generate_vars
+from utils import generate_vars_for_indx
+from utils import exec_shell
+from utils import import_configs
+from utils import int_to_vec
+import sys
+
+
+class Mapper:
+    def __init__(self, opmap, oplist, opname_by_index):
+        self._opmap = opmap
+        self._oplist = oplist
+        self._opname_by_indx = opname_by_index
+
+    def get_oplist(self):
+        return self._oplist
+
+    def get_opmap(self):
+        return self._opmap
+
+    def get_opname_by_indx(self):
+        return self._opname_by_indx
+
+    def get_indices(self, value):
+        indx_list = []
+        for i in range(len(self._opname_by_indx)):
+            if self._opname_by_indx[i] == value:
+                indx_list.append(i)
+        return indx_list
+
+    def map_to_extended_space(self, n, backends):
+        n_vec = int_to_vec(n, backends, len(self._oplist))
+        extended_vec = np.zeros(max(self._opmap) + 1, dtype=int)
+        cnt = 0
+
+        for allocation in n_vec:
+            extended_pos = list(
+                set([self._opmap[i] for i in self.get_indices(self._oplist[cnt])]))
+            try:
+                extended_vec[extended_pos] = allocation
+            except IndexError:
+                print("extended_vec size = ", extended_vec.size, ", extended_pos = ",
+                      extended_pos)
+            cnt += 1
+        extended_n = int(''.join(str(i) for i in extended_vec[::-1]), 2)
+        return extended_n
+
+
+class Runner:
+    def __init__(self, model, run_folder, num_backends, mode):
+        self._model = model
+        self._run_folder = run_folder
+        self._mode = mode
+        oplist, opmap, opname_by_index = import_configs(mode)
+        self._mapper = Mapper(opmap, oplist, opname_by_index)
+        self._nbackends = num_backends
+        self._extended_map = {}
+
+    def get_solution_spacelen(self):
+        if self._mode == "name":
+            return self._nbackends**len(self._mapper.get_oplist())
+        elif self._mode == "index":
+            return self._nbackends**max(self._mapper.get_opmap())
+        else:
+            print("Unknown mode ", mode, ", exiting profiler")
+            sys.exit(-1)
+
+    def get_nbits(self, extended_search_mode):
+        if self._mode == "index" and extended_search_mode == True:
+            return max(self._mapper.get_opmap())
+        else:
+            return len(self._mapper.get_oplist())
+
+    def get_mode_extended(self):
+        return (self._mode == "index")
+
+    def get_extended_solution(self, s):
+        if s in self._extended_map:
+            return self._extended_map[s]
+
+        extended_value = self._mapper.map_to_extended_space(s, self._nbackends)
+        self._extended_map[s] = extended_value
+        return extended_value
+
+    def run_inference(self, solution):
+        cmd_str = [
+            ". /tmp/envvars.sh && " + self._run_folder + "/onert_run -w1 -r1 -m1 -l " +
+            self._model + "/metadata/tc/input.h5 " + self._model + " 2> /dev/null"
+        ]
+        res = exec_shell(cmd_str, newline_split=True)
+        try:
+            exec_time = float(res[4].split(' ')[-2])
+            max_rss = int(res[13].split(' ')[-2])
+        except IndexError:
+            print("got index error at config ", solution)
+            print("result: ", res)
+            print("####")
+            sys.exit(-1)
+        return (exec_time, max_rss)
+
+    def profile_by_opname(self, solution):
+        generate_vars(self._mapper.get_oplist(), solution, self._nbackends)
+        return self.run_inference(solution)
+
+    def profile_by_opindex(self, solution):
+        generate_vars_for_indx(self._mapper.get_opmap(), solution, self._nbackends)
+        return self.run_inference(solution)
+
+    def get_opconfig(self):
+        return self._mapper.get_oplist(), self._mapper.get_opmap(
+        ), self._mapper.get_opname_by_indx()
+
+    def dump_config(self, dumpdata):
+        if self._mode == "name":
+            dumpdata.update({'oplist': self._mapper.get_oplist()})
+        elif self._mode == "index":
+            dumpdata.update({'oplist': self._mapper.get_opmap()})
+
+        configs = {}
+        for solution in dumpdata['solutions']:
+            if self._mode == "name":
+                configs[int(solution["id"])] = fetch_config_by_name(
+                    dumpdata['oplist'], solution["id"], self._nbackends)
+            elif self._mode == "index":
+                configs[int(solution["id"])] = fetch_config_by_indx(
+                    dumpdata['oplist'], solution["id"], self._nbackends)
+        dumpdata.update({'configs': configs})
+        return dumpdata
diff --git a/tools/pareto_profiler/estimator/utils.py b/tools/pareto_profiler/estimator/utils.py
new file mode 100644
index 000000000..9278674e3
--- /dev/null
+++ b/tools/pareto_profiler/estimator/utils.py
@@ -0,0 +1,201 @@
+#! /usr/bin/python
+import subprocess
+import numpy as np
+import sys
+import os
+import json
+"""
+  General executor for bash-like shell. Supports multiline results.
+"""
+
+
+def exec_shell(command_str, newline_split=False):
+    result = subprocess.Popen(command_str, shell=True, stdout=subprocess.PIPE)
+    out, err = result.communicate()
+    if (newline_split):
+        res = out.decode("utf-8").split('\n')
+        res = res[:-1]
+        return res
+    else:
+        return out.decode("utf-8").split("\n")[0]
+
+
+"""
+  Given a number and its base, return its symbol-wise vector representation
+"""
+
+
+def int_to_vec(n, b, n_operations):
+    number_arr = np.zeros(n_operations, dtype=int)
+    i = n_operations - 1
+    while (n != 0):
+        number_arr[i] = n % b
+        n = n // b
+        i -= 1
+
+    return number_arr[::-1]
+
+
+"""
+  Generate onert backend mapping for each graph node, give the encoded information in parameters.
+  The details of the parameters are as follows:
+  1. oplist     - a vector that maps each graph node to a unique <operation name, data size> id 
+                  that was generated by an earlier script (gen_oplist.py)
+  2. number     - the encoded backend assignment, typically a very long integer 
+  3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def generate_vars_for_indx(oplist, number, base_value):
+    ofile = open('/tmp/envvars.sh', 'w')
+    backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+
+    if (base_value == 2):
+        ofile.write("export BACKENDS=\"acl_cl;cpu\"")
+    elif (base_value == 3):
+        ofile.write("export BACKENDS=\"acl_cl;acl_neon;cpu\"")
+    ofile.write("\n")
+    number_arr = int_to_vec(number, base_value, len(oplist))
+    cnt = 0
+    op_backend_map_str = "export OP_BACKEND_MAP=\""
+    for cnt in range(len(oplist)):
+        backend_str = backend_map[int(number_arr[oplist[cnt]])]
+        op_backend_map_str += ''.join([str(cnt), backend_str])
+
+        if (cnt < (len(oplist) - 1)):
+            op_backend_map_str += ";"
+        else:
+            op_backend_map_str += "\""
+    ofile.write(op_backend_map_str)
+    ofile.write("\n")
+    ofile.close()
+
+
+"""
+  Print onert backend mapping for each graph node, give the encoded information in parameters.
+  The details of the parameters are as follows:
+  1. oplist     - a vector that maps each graph node to a unique <operation name, data size> id 
+                  that was generated by an earlier script (gen_oplist.py)
+  2. number     - the encoded backend assignment, typically a very long integer 
+  3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def fetch_config_by_indx(oplist, number, base_value):
+    var_str = ""
+    backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+
+    if (base_value == 2):
+        var_str += "BACKENDS=\"acl_cl;cpu\""
+    elif (base_value == 3):
+        var_str += "BACKENDS=\"acl_cl;acl_neon;cpu\""
+    var_str += " "
+    number_arr = int_to_vec(number, base_value, len(oplist))
+    cnt = 0
+    var_str += "OP_BACKEND_MAP=\""
+    op_backend_map_str = ""
+    for cnt in range(len(oplist)):
+        backend_str = backend_map[int(number_arr[oplist[cnt]])]
+        op_backend_map_str += ''.join([str(cnt), backend_str])
+
+        if (cnt < (len(oplist) - 1)):
+            op_backend_map_str += ";"
+        else:
+            op_backend_map_str += "\""
+    var_str += op_backend_map_str
+    return var_str
+
+
+"""
+  Generate onert backend mapping for each graph operation name, give the encoded information in parameters.
+  The details of the parameters are as follows:
+  1. oplist     - a vector that maps each graph node to a unique operation name. 
+                  The list is generated by an earlier script (gen_oplist.py)
+  2. number     - the encoded backend assignment, typically a long integer 
+  3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def generate_vars(oplist, number, base_value):
+    ofile = open('/tmp/envvars.sh', 'w')
+    backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+    if (base_value == 2):
+        ofile.write("export BACKENDS=\"acl_cl;cpu\"")
+    elif (base_value == 3):
+        ofile.write("export BACKENDS=\"acl_cl;acl_neon;cpu\"")
+    ofile.write("\n")
+    number_str = int_to_vec(number, base_value, len(oplist))
+
+    cnt = 0
+    for n in number_str:
+        op_backend_map_str = ''.join(
+            ["export OP_BACKEND_", oplist[cnt], backend_map[int(n)]])
+        ofile.write(op_backend_map_str)
+        ofile.write("\n")
+        cnt += 1
+    ofile.close()
+
+
+"""
+  Print onert backend mapping for each graph operation name, give the encoded information in parameters.
+  The details of the parameters are as follows:
+  1. oplist     - a vector that maps each graph node to a unique operation name. 
+                  The list is generated by an earlier script (gen_oplist.py)
+  2. number     - the encoded backend assignment, typically a long integer 
+  3. base_value - for practical purposes, this is equivalent to the number of backends
+"""
+
+
+def fetch_config_by_name(oplist, number, base_value):
+    var_str = ""
+    backend_map = {0: "=cpu", 1: "=acl_cl", 2: "=acl_neon"}
+    if (base_value == 2):
+        var_str += "BACKENDS=\"acl_cl;cpu\""
+    elif (base_value == 3):
+        var_str += "BACKENDS=\"acl_cl;acl_neon;cpu\""
+    var_str += " "
+
+    number_str = int_to_vec(number, base_value, len(oplist))
+
+    cnt = 0
+    for n in number_str:
+        var_str += ''.join(["OP_BACKEND_", oplist[cnt], backend_map[int(n)]])
+        var_str += " "
+        cnt += 1
+    return var_str
+
+
+"""
+  Import operation list, map and relevant information for profiling. Note: These information should have been
+  dumped under /tmp/oplist.json using the gen_oplist.py script.
+"""
+
+
+def import_configs(mode):
+    if not os.path.isfile('/tmp/oplist.json'):
+        print("No oplist")
+        sys.exit(-1)
+    with open('/tmp/oplist.json', 'r') as ifile:
+        data = json.load(ifile)
+    oplist = data['oplist']
+    if mode == "name":
+        nbits = len(oplist)
+        return oplist, None, None
+    elif mode == "index":
+        opmap = data['opmap']
+        opname_by_indx = data['opname_by_indx']
+        return oplist, opmap, opname_by_indx
+
+    print("mode is incorrect")
+    sys.exit(-1)
+
+
+"""
+  Generic Progress bar display
+"""
+
+
+def progressbar(current_cnt, max_cnt, prefix="", file=sys.stdout):
+    x = int(current_cnt * 100.0 / max_cnt)
+    file.write("%s[%s%s] %i/%i\r" % (prefix, "#" * x, "." * (100 - x), x, 100))
+    file.flush()
diff --git a/tools/pareto_profiler/generator/gen_oplist.py b/tools/pareto_profiler/generator/gen_oplist.py
new file mode 100644
index 000000000..5511937d2
--- /dev/null
+++ b/tools/pareto_profiler/generator/gen_oplist.py
@@ -0,0 +1,165 @@
+#! /usr/bin/python
+import argparse
+import tensorflow as tf
+import sys
+sys.path.append("../estimator")
+import subprocess
+import os
+import json
+from functools import reduce
+from utils import exec_shell
+"""
+  Generates from a tflite model, a list of unique onert operation names used in the model
+"""
+
+
+def generate_oplist_by_name(tflite_file):
+    with open("operations_map.json") as ifile:
+        data = json.load(ifile)
+    op_dict = data['op_dict']
+
+    intr = tf.lite.Interpreter(tflite_file)
+    intr.allocate_tensors()
+    tf_opset = set(op['op_name'] for op in intr._get_ops_details())
+    try:
+        onert_ops = set([op_dict[op] for op in tf_opset])
+    except KeyError:
+        print("Invalid mapping, check your tensorflow ops for new/unknown mappings: ",
+              tf_opset)
+        sys.exit(-1)
+    return onert_ops
+
+
+"""
+  Returns the total data size for the model graph node (inputs + outputs)
+  Params:
+  op: operation instance (obtained from _get_ops_details())
+  tsr: tensor instance (obtained from get_tensor_details()) 
+"""
+
+
+def get_op_data_size(op, tsr):
+    data_size = 0
+    for idx in op['inputs']:
+        if tsr[idx]['shape'].size > 0:
+            data_size += reduce(lambda x, y: x * y,
+                                tsr[idx]['shape']) * tsr[idx]['shape'].dtype.itemsize
+
+    for idx in op['outputs']:
+        if tsr[idx]['shape'].size > 0:
+            data_size += reduce(lambda x, y: x * y,
+                                tsr[idx]['shape']) * tsr[idx]['shape'].dtype.itemsize
+    return data_size
+
+
+"""
+  Generates from a tflite model, the following outputs:
+  1.  opmap - a symbol/bit index mapping from every graph operation to a unique <operation name, data size> index identifier. This mapping
+      will be used later when profiling the model at runtime.
+
+  2.  oplist - a list of unique onert operation names used in the model
+
+  3.  opname_by_index - a list of onert operation names, indexed by their topological order in the model
+"""
+
+
+def generate_oplist_by_name_size(tflite_file):
+    intr = tf.lite.Interpreter(tflite_file)
+    intr.allocate_tensors()
+    ops = intr._get_ops_details()
+    tsr = intr.get_tensor_details()
+
+    opset = set()
+    oplist = set()
+    indx = []
+    opname_by_indx = []
+    # Fetch tensorflow operation mapping to onert kernels
+    with open("operations_map.json") as ifile:
+        data = json.load(ifile)
+    op_dict = data['op_dict']
+
+    # Fetch all unique operation names and <operation name, tensordata size> pairs
+    for op in ops:
+        opset.add((op['op_name'], get_op_data_size(op, tsr)))
+        oplist.add(op_dict[op['op_name']])
+        indx.append(op['index'])
+    opname_by_indx = [op_dict[ops[i]['op_name']] for i in indx]
+
+    # Create a 'm' bit/symbol map indexed by <opname, tensordata size> values
+    inv_opset_map = {}
+    i = 0
+    for op in opset:
+        inv_opset_map[op] = i
+        i += 1
+
+    # Map 'n' operation symbol space to 'm' <opname, tensordata size> space
+    op_map = []
+    for op in ops:
+        data_size = get_op_data_size(op, tsr)
+        op_map.append(inv_opset_map[(op['op_name'], data_size)])
+
+    return op_map, oplist, opname_by_indx
+
+
+"""
+Script to generate oplist, given the following details:
+1. Modelfile
+2. target device type
+3. Additional information, such as authetication for file tranfer
+
+Info: python gen_oplist.py --help
+"""
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='''gen_backend: Generates oplist and uploads to target''',
+        epilog="""Success.""")
+    parser.add_argument(
+        '--auth', type=str, default=None, help='authentication: <user@host>')
+    parser.add_argument(
+        '--mode',
+        type=str.lower,
+        choices=["index", "name"],
+        default="name",
+        help='Profile by operation index or name')
+    parser.add_argument('model', type=str, default=None, help='tflite name with path')
+    parser.add_argument(
+        'target',
+        type=str.lower,
+        choices=['tizen', 'odroid'],
+        default="odroid",
+        help='target name')
+
+    # Parse arguments
+    args = parser.parse_args()
+    modelfile = args.model
+    target = args.target
+    mode = args.mode
+    if target == "odroid":
+        auth_str = args.auth
+        if auth_str is None:
+            print("Need valid authentication")
+            sys.exit(-1)
+
+    # Generate oplist
+    if mode == "name":
+        opset = generate_oplist_by_name(modelfile)
+        print(opset)
+        with open('/tmp/oplist.json', 'w') as opfile:
+            data = {}
+            data['oplist'] = list(opset)
+            json.dump(data, opfile)
+    elif mode == "index":
+        data = {}
+        opmap, oplist, opname_by_indx = generate_oplist_by_name_size(modelfile)
+        data['opmap'] = opmap
+        data['oplist'] = list(oplist)
+        data['opname_by_indx'] = opname_by_indx
+        with open('/tmp/oplist.json', 'w') as opfile:
+            json.dump(data, opfile)
+    # Upload oplist to target
+    if target == "tizen":
+        exec_shell("sdb push /tmp/oplist.json /tmp/oplist.json")
+    elif target == "odroid":
+        print("auth_str = ", auth_str)
+        exec_shell("scp  /tmp/oplist.json " + auth_str + ":/tmp/oplist.json")
+    print("done...")
diff --git a/tools/pareto_profiler/generator/operations_map.json b/tools/pareto_profiler/generator/operations_map.json
new file mode 100644
index 000000000..c35547ed9
--- /dev/null
+++ b/tools/pareto_profiler/generator/operations_map.json
@@ -0,0 +1,36 @@
+{ "op_dict": {
+  "SUM":"Reduce",
+  "ADD":"BinaryArithmetic",
+  "SUB":"BinaryArithmetic",
+  "DIV":"BinaryArithmetic",
+  "MUL":"BinaryArithmetic",
+  "REDUCE_MAX": "Reduce",
+  "REDUCE_MIN": "Reduce",
+  "CONV_2D": "Conv2D",
+  "PACK":"Pack",
+  "SOFTMAX":"Softmax",
+  "CONCATENATION":"Concat",
+  "EXP":"ElementwiseUnary",
+  "RESHAPE":"Reshape",
+  "SPLIT_V":"SplitV",
+  "ARG_MAX": "ArgMax",
+  "BATCH_TO_SPACE_ND":"BatchToSpaceND",
+  "DEPTHWISE_CONV_2D":"DepthwiseConv2D",
+  "LOGISTIC":"ElementwiseActivation",
+  "MEAN":"Reduce",
+  "RELU6":"ElementwiseActivation",
+  "RELU":"ElementwiseActivation",
+  "RESIZE_BILINEAR":"ResizeBilinear",
+  "REVERSE_V2":"Reverse",
+  "SPACE_TO_BATCH_ND":"SpaceToBatchND",
+  "AVERAGE_POOL_2D": "Pool2D",
+  "MAX_POOL_2D": "Pool2D",
+  "GATHER": "Gather",
+  "CAST": "ElementwiseUnary",
+  "FULLY_CONNECTED": "FullyConnected",
+  "PAD": "Pad",
+  "SLICE" : "Slice",
+  "STRIDED_SLICE": "StridedSlice",
+  "TRANSPOSE": "Transpose",
+  "UNPACK": "Unpack"
+}}
diff --git a/tools/release_tool/git_release.sh b/tools/release_tool/git_release.sh
index adba7df2f..00bf6bb70 100755
--- a/tools/release_tool/git_release.sh
+++ b/tools/release_tool/git_release.sh
@@ -5,7 +5,7 @@
 getopt --test > /dev/null
 if [ $? -ne 4 ]; then
   echo "[ERROR] Your system doesn't have enhanced getopt"
-  echo 2
+  exit 2
 fi
 
 function Usage()
@@ -25,6 +25,7 @@ function Usage()
   echo "--repo_owner       Owner of the repository"
   echo "--repo_name        The name of the repository"
   echo "--asset            Path of release asset"
+  echo "--asset_url        URL from which release asset is downloaded"
   echo ""
   echo "[EXAMPLE]"
   echo "$ ./git_release.sh --tag 1.9.0 --commitish release/1.9.0 --token 0de25f1ca5d1d758fe877b18c06 \\"
@@ -34,7 +35,8 @@ function Usage()
   echo "$ ./git_release.sh --tag v1.1 --commitish c024e85d0ce6cb1ed2fbc66f1a9c1c2814da7575 \\"
   echo "  --token 0de25f1ca5d1d758fe877b18c06 --repo_owner Samsung --repo_name ONE \\"
   echo "  --release_name \"Release Automation\" --release_note /home/mhs4670go/ONE/release_doc \\"
-  echo "  --host_name github.sec.company.net --draft"
+  echo "  --host_name github.sec.company.net --draft \\"
+  echo "  --asset_url \"http://one.server.com/artifacts/ONE-compiler.tar.gz\""
   echo ""
   echo "[REFERENCE]"
   echo "https://developer.github.com/v3/repos/releases/#create-a-release"
@@ -53,7 +55,8 @@ token:,\
 host_name:,\
 repo_owner:,\
 repo_name:,\
-asset:"
+asset:,\
+asset_url:"
 
 OPTS=$(getopt --options "$SHORT_OPTS" --longoptions "$LONG_OPTS" --name "$0" -- "$@")
 
@@ -71,6 +74,7 @@ unset REPO_OWNER
 unset REPO_NAME
 IS_DRAFT=false
 ASSET_PATHS=()
+ASSET_URLS=()
 
 while true ; do
   case "$1" in
@@ -118,6 +122,10 @@ while true ; do
       ASSET_PATHS+=("$2")
       shift 2
       ;;
+    --asset_url )
+      ASSET_URLS+=("$2")
+      shift 2
+      ;;
     -- )
       shift
       break
@@ -146,6 +154,12 @@ if [ -z ${USER_TOKEN} ]; then
   exit 0
 fi
 
+ASSETS_FROM_URL=()
+# Get asset name from url
+for ASSET_URL in "${ASSET_URLS[@]}"; do
+  ASSETS_FROM_URL+=($(basename "${ASSET_URL}"))
+done
+
 # Print variables and set default value
 DEFAULT_RELEASE_NAME="ONE Release ${TAG_NAME}"
 DEFAULT_HOST_NAME="api.github.com"
@@ -162,6 +176,7 @@ echo "HOST_NAME        : ${HOST_NAME:=${DEFAULT_HOST_NAME}}"
 echo "REPO_OWNER       : ${REPO_OWNER:=${DEFAULT_REPO_OWNER}}"
 echo "REPO_NAME        : ${REPO_NAME:=${DEFAULT_REPO_NAME}}"
 echo "ASSETS           : ${ASSET_PATHS[@]}"
+echo "ASSETS_FROM_URL  : ${ASSETS_FROM_URL[@]}"
 echo "==========================================================="
 
 function generate_release_data()
@@ -183,7 +198,7 @@ RELEASE_URL=$(curl -s --request GET --header "Authorization: token ${USER_TOKEN}
 https://${HOST_NAME}/repos/${REPO_OWNER}/${REPO_NAME}/releases/tags/${TAG_NAME} | \
 jq -r '.url')
 
-if [ $RELEASE_URL != null ]; then
+if [ "$RELEASE_URL" != null ]; then
   echo "[ERROR] The tag name you specified already exists."
   exit 2
 fi
@@ -197,10 +212,30 @@ jq -r '.upload_url')
 
 UPLOAD_URL=$(echo ${UPLOAD_URL} | cut -d "{" -f 1)?name=
 
-# Upload the assets
+# Download assets from url
+TMPDIR=$(mktemp -d)
+pushd $TMPDIR
+for ASSET_URL in "${ASSET_URLS[@]}"; do
+  wget "$ASSET_URL"
+done
+popd
+
+# Upload the assets from url
+for ASSET_NAME in "${ASSETS_FROM_URL[@]}"; do
+  ASSET_PATH="${TMPDIR}/${ASSET_NAME}"
+  curl -s --request POST --header "Authorization: token ${USER_TOKEN}" \
+  --header "Content-Type: $(file -b --mime-type ${ASSET_PATH})" \
+  --data-binary @${ASSET_PATH} \
+  ${UPLOAD_URL}${ASSET_NAME} > /dev/null
+done
+
+rm -rf ${TMPDIR}
+
+# Upload the assets from local
 for ASSET_PATH in "${ASSET_PATHS[@]}"; do
+  ASSET_BASENAME=$(basename ${ASSET_PATH})
   curl -s --request POST --header "Authorization: token ${USER_TOKEN}" \
   --header "Content-Type: $(file -b --mime-type ${ASSET_PATH})" \
   --data-binary @${ASSET_PATH} \
-  ${UPLOAD_URL}${ASSET_PATH} > /dev/null
+  ${UPLOAD_URL}${ASSET_BASENAME} > /dev/null
 done
diff --git a/tools/release_tool/onert_version.sh b/tools/release_tool/onert_version.sh
index eafe96e3d..374a58acf 100755
--- a/tools/release_tool/onert_version.sh
+++ b/tools/release_tool/onert_version.sh
@@ -5,6 +5,7 @@ set -eu
 progname=$(basename "${BASH_SOURCE[0]}")
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 nnfw_root="$( cd "${script_dir%*/*/*}" && pwd )"
+nightly=1
 
 usage() {
   echo "Usage: $progname version"
@@ -12,7 +13,8 @@ usage() {
   echo ""
   echo "Options:"
   echo "    -h   show this help"
-  echo "    -s   set onert  version"
+  echo "    -n   show current onert version with nightly suffix"
+  echo "    -s   set onert version"
   echo ""
   echo "Examples:"
   echo "    $progname           => show current onert version"
@@ -22,7 +24,13 @@ usage() {
 
 show_version() {
   version_line=$(cat ${nnfw_root}/packaging/nnfw.spec | grep "Version:")
-  echo ${version_line#"Version:"}
+  current_version=${version_line#"Version:"}
+
+  if [ $nightly -eq 0 ]; then
+    echo $current_version~$(date -u "+%y%m%d%H")
+  else
+    echo $current_version
+  fi
 
   exit 0
 }
@@ -43,9 +51,10 @@ if [ $# -eq 0 ]; then
   show_version
 fi
 
-while getopts "hs:" OPTION; do
+while getopts "hns:" OPTION; do
 case "${OPTION}" in
     h) usage;;
+    n) nightly=0; show_version;;
     s) set_version "$OPTARG";;
     ?) exit 1;;
 esac
diff --git a/tools/stab/README.md b/tools/stab/README.md
new file mode 100644
index 000000000..c52ba4183
--- /dev/null
+++ b/tools/stab/README.md
@@ -0,0 +1,54 @@
+# Stab - Static Backend Scheduler
+
+`Stab` is a tool to schedule backend for each opration using profiled data
+
+nnpackage with backend configuration will be created at `./tools/stab/nnpkg_sched`
+
+Supported backends : `cpu`, `ruy`, and `xnnpack`
+- Other backends will be supported when `stab` can measure and use permutation time between backends
+
+## Scheduling Process
+
+1. Upload ONE runtime and nnpackage to remote device
+   - Use `/tmp/ONE` folder on remote device
+1. Profile execution time of each backend on remote device
+1. Get profile result from remote device
+   - Profile result is saved at `./tools/stab/traces` on host
+1. Scheduler backend for each operation to get fastest inference time
+   - Use fastest backend for each operation
+1. Generate nnpackage with backend configuration
+   - Generated at `./tools/stab/nnpkg_sched`
+
+## Prerequisite
+
+- Install Python>=3. Tested on Python 3.6.9 and 3.7.5
+- Register SSH keys to use ssh commands without entering password
+  ```bash
+  ssh-keygen -t rsa
+  ssh-copy-id -i ~/.ssh/id_rsa.pub remote_user@remote_ip
+  ```
+
+## Usage
+
+```
+Usage: python3 ./tools/stab/stab.py --nnpackage nnpackage_dir --ip <IP>
+Runs nnpackage on remote device and create nnpackaged with scheduled backends
+
+required arguments:
+  --nnpackage NNPACKAGE
+                        nnpackage folder to profile
+  --ip IP               IP address of remote client
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -n NUM_THREADS, --num_threads NUM_THREADS
+                        Number of threads used by one runtime
+  -u USER, --user USER  User of remote client
+  -v, --verbose         Print verbose message
+  --no-profile          Disable profiling
+
+Examples:
+    python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1               => Profile on remote device 1.1.1.1 with current user
+    python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1 -n 4          => Profile on remote device 1.1.1.1 using 4 thread for ONE runtime
+    python3 ./tools/stab/stab.py --nnpackage ../nnpkg_tst/inception --ip 1.1.1.1 --user odroid => Profile on remote device 1.1.1.1 with user odroid
+```
diff --git a/tools/stab/backend_profiler.py b/tools/stab/backend_profiler.py
new file mode 100644
index 000000000..c9d71332d
--- /dev/null
+++ b/tools/stab/backend_profiler.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from op_list_parser import OpListParser
+from remote import RemoteSSH
+
+
+class BackendProfiler():
+    """
+    Run ONE runtime on remote device to create TRACE file which has operation execution time
+
+    TODO : Support Android device profiling
+    """
+
+    def __init__(self, user, ip, nnpackage_dir, num_threads):
+        self.remote_ssh = RemoteSSH(user, ip, nnpackage_dir, num_threads)
+        self.backend_op_list = OpListParser().parse()
+        self.backend_list = ["cpu"]
+        self.backend_list.extend([backend for backend in self.backend_op_list])
+
+    def sync(self):
+        logging.info("Upload ONE runtime and nnpackage to remote device")
+        self.remote_ssh.sync_binary()
+
+    def profile(self):
+        for backend in self.backend_list:
+            logging.info(f"Profiling {backend} backend")
+            self.remote_ssh.profile_backend(backend, self.backend_op_list)
+            self.remote_ssh.sync_trace(backend)
diff --git a/tools/stab/backend_scheduler.py b/tools/stab/backend_scheduler.py
new file mode 100644
index 000000000..e18a1556f
--- /dev/null
+++ b/tools/stab/backend_scheduler.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json, logging
+from pathlib import Path
+from op_list_parser import OpListParser
+from nnpkg_helper import NnpkgHelper
+
+
+class BackendScheduler:
+    """
+    Read profiled data and select proper backend for each operation
+    Scheduled nnpackage is saved at ./tools/stab/nnpkg_sched
+
+    TODO : Use permutation time for better scheduling
+    """
+
+    def __init__(self, nnpkg_dir, num_threads):
+        self.nnpkg_dir = Path(nnpkg_dir).resolve()
+        self.num_threads = num_threads
+        self.root_path = Path(__file__).parents[2]
+        self.nnpkg_helper = NnpkgHelper()
+
+    def read_traces(self, backend_list):
+        op_time = {}
+        inference_time = {}
+        for backend in backend_list:
+            try:
+                # Trace file is located at ./tools/stab/traces
+                trace_path = Path(
+                    __file__
+                ).parent / 'traces' / f"{self.nnpkg_dir.name}_{backend}_{self.num_threads}"
+                logging.debug(f"Trace path : {trace_path}")
+                with open(trace_path) as f:
+                    data = json.load(f)
+                    execution_data = data['Execution_Data']
+                    for entry in execution_data:
+                        if entry == "memory":
+                            continue
+                        elif entry == "runtime":
+                            inference_time[backend] = execution_data['runtime']['Graph'][
+                                'Avg_Time']
+                            continue
+                        op_backend = entry
+                        backend_data = execution_data[op_backend]
+                        for op in backend_data:
+                            op_index = int(op.split(' ')[2][1:])
+                            op_type = op.split(' ')[-1]
+                            time = int(backend_data[op]["Avg_Time"])
+                            if op_index not in op_time.keys():
+                                op_time[op_index] = {op_backend: time}
+                                op_time[op_index].update({"type": op_type})
+                            else:
+                                op_time[op_index].update({op_backend: time})
+            except IOError as e:
+                logging.warning(e)
+        return op_time, inference_time
+
+    def schedule(self):
+        backend_op_list = OpListParser().parse()
+        backend_list = ["cpu"]
+        backend_list.extend([backend for backend in backend_op_list])
+
+        op_time, backend_infer_time = self.read_traces(backend_list)
+
+        backend_mapping = {}
+
+        target_ops = set()
+        for _, v in backend_op_list.items():
+            target_ops.update(v)
+
+        # Find fastest backend for each operation
+        for op_index, value in sorted(op_time.items()):
+            op_type = value['type']
+            if op_type not in target_ops:
+                continue
+
+            logging.debug(f"----- Operation {op_index} -----")
+            op_infer_time = 0
+            for backend in backend_list:
+                if backend not in value:
+                    continue
+                backend_time = value[backend]
+
+                logging.debug(f"{backend}[{backend_time}]")
+                if op_infer_time == 0 or backend_time < op_infer_time:
+                    op_infer_time = backend_time
+                    backend_mapping[op_index] = backend
+
+        # Find default backend for Conv2D
+        default_backend = min(backend_infer_time, key=backend_infer_time.get)
+
+        # Create OP_BACKEND_MAP string
+        backend_conf = ""
+        for op_index, backend in sorted(backend_mapping.items()):
+            if backend != default_backend:
+                backend_conf += "{}={};".format(op_index, backend)
+
+        # Select fastet backend for each operation
+        logging.info("-------- Expected inference time ---------")
+        inference_time = {}
+        for backend in backend_list:
+            inference_time[backend] = 0
+            for op_index, value in sorted(op_time.items()):
+                if backend in value:
+                    inference_time[backend] += value[backend]
+                else:
+                    inference_time[backend] += value["cpu"]
+
+        schedule_time = 0
+        for op_index, value in sorted(op_time.items()):
+            op_type = value['type']
+            if op_type not in target_ops:
+                schedule_time += value["cpu"]
+                continue
+            else:
+                op_backend = backend_mapping[op_index]
+                schedule_time += value[op_backend]
+                if (default_backend != op_backend):
+                    logging.debug("[{}] {} -> {} : {:.2f} ms decrease".format(
+                        op_index, default_backend, op_backend,
+                        (value[default_backend] - value[op_backend]) / 1000))
+
+        for backend in backend_list:
+            logging.info(f"{backend} backend : {inference_time[backend]/1000:.2f} ms")
+        logging.info(f"Backend scheduling : {schedule_time / 1000:.2f} ms")
+
+        logging.info("-------- Backend Scheduling --------")
+        cmd = []
+        cmd += [f"OP_BACKEND_MAP={backend_conf}"]
+        for target_backend, op_list in backend_op_list.items():
+            if default_backend == target_backend:
+                for op in op_list:
+                    cmd += [f"OP_BACKEND_{op}={default_backend}"]
+        cmd += [f"BACKENDS={';'.join(backend_list)}"]
+        cmd += [f"RUY_THREADS={self.num_threads}"]
+        cmd += [f"XNNPACK_THREADS={self.num_threads}"]
+        logging.info(' '.join(cmd))
+
+        # Create nnpackage with backend mapping
+        dst_dir = Path(__file__).parent / 'nnpkg_sched' / self.nnpkg_dir.name
+        self.nnpkg_helper.copy(self.nnpkg_dir, dst_dir)
+        self.nnpkg_helper.add_config(dst_dir, cmd)
diff --git a/tools/stab/nnpkg_helper.py b/tools/stab/nnpkg_helper.py
new file mode 100644
index 000000000..7e68760ff
--- /dev/null
+++ b/tools/stab/nnpkg_helper.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json, logging
+from distutils.dir_util import copy_tree
+from pathlib import Path
+
+
+class NnpkgHelper:
+    """
+    Helper class for nnpackage
+    """
+
+    def __init__(self):
+        self.config_name = 'config.cfg'
+
+    def copy(self, src, dst):
+        copy_tree(str(src), str(dst))
+
+    def add_config(self, src, configs):
+        manifest_path = Path(src).resolve() / 'metadata' / 'MANIFEST'
+        config_path = Path(src).resolve() / 'metadata' / self.config_name
+
+        try:
+            # Read MANIFEST file
+            with open(manifest_path, 'r') as manifest_file:
+                data = json.load(manifest_file)
+
+            # Add configs to MANIFEST file
+            with open(manifest_path, 'w') as manifest_file:
+                data['configs'] = [self.config_name]
+                json.dump(data, manifest_file, indent=2)
+
+            # Write config.cfg file
+            with open(config_path, 'w') as config_file:
+                config_file.write('\n'.join(configs))
+
+            logging.info(f"Scheduled nnpackage is saved at {src}")
+
+        except IOError as e:
+            logging.warn(e)
+        except:
+            logging.warn("Error")
diff --git a/tools/stab/op_list.txt b/tools/stab/op_list.txt
new file mode 100644
index 000000000..7c5565655
--- /dev/null
+++ b/tools/stab/op_list.txt
@@ -0,0 +1,2 @@
+ruy:Conv2D,FullyConnected
+xnnpack:Conv2D,DepthwiseConv2D,FullyConnected
diff --git a/tools/stab/op_list_parser.py b/tools/stab/op_list_parser.py
new file mode 100644
index 000000000..d9fba508b
--- /dev/null
+++ b/tools/stab/op_list_parser.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+
+
+class OpListParser():
+    """
+    Read op_list.txt to create supported operation list for each backend
+
+    TODO : Reads supported tensor type for each operation (FP32 or INT8)
+    """
+
+    def __init__(self):
+        self.file_name = "op_list.txt"
+        self.op_list_file = Path(__file__).parent / self.file_name
+
+    def parse(self):
+        backend_op_list = {}
+        with open(self.op_list_file, 'r') as f:
+            lines = f.readlines()
+            for line in lines:
+                line = line.rstrip()
+                backend, _, op_list_str = line.partition(':')
+                op_list = op_list_str.split(',')
+                backend_op_list[backend] = op_list
+        return backend_op_list
diff --git a/tools/stab/remote.py b/tools/stab/remote.py
new file mode 100644
index 000000000..6e7e353da
--- /dev/null
+++ b/tools/stab/remote.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess, logging
+from pathlib import Path
+
+
+class RemoteSSH():
+    """
+    Execute commands on remove device using SSH
+
+    TODO : Using SSH library instead of direct ssh call
+    """
+
+    def __init__(self, user, ip, nnpkg_dir, num_threads):
+        self.base_dir = Path('/tmp/ONE')
+        self.trace_dir = 'traces'
+        self.host = f"{user}@{ip}" if user != None else ip
+        self.nnpkg_dir = Path(nnpkg_dir).resolve()
+        self.nnpkg_name = self.nnpkg_dir.name
+        self.root_path = Path(__file__).resolve().parents[2]
+        self.num_threads = num_threads
+
+    def sync_binary(self):
+        bin_dir = self.root_path / 'Product/armv7l-linux.release/out'
+        if (not bin_dir.is_dir()):
+            logging.warn(f"Build dir [{bin_dir}] is not exist")
+            exit()
+        elif (not self.nnpkg_dir.is_dir()):
+            logging.warn(f"nnpackage dir [{self.nnpkg_dir}] is not exist")
+            exit()
+        else:
+            # Create temporary folder
+            subprocess.call(
+                ["ssh", f"{self.host}", "mkdir", "-p", self.base_dir / self.trace_dir])
+            # Syne ONE runtime
+            subprocess.call([
+                "rsync", "-az", "--exclude", "test-suite.tar.gz", bin_dir,
+                self.remote(self.base_dir)
+            ])
+            # Sync target nnpackage
+            subprocess.call(["rsync", "-az", self.nnpkg_dir, self.remote(self.base_dir)])
+
+    def sync_trace(self, backend):
+        remote_trace_path = self.remote_trace_path(backend)
+        local_trace_path = self.local_trace_path(backend)
+        local_trace_path.parent.mkdir(parents=True, exist_ok=True)
+        logging.debug(f"Remote trace path : {self.remote(remote_trace_path)}")
+        logging.debug(f"Local trace path : {local_trace_path}")
+        # Sync trace file
+        subprocess.call(
+            ["rsync", "-az",
+             self.remote(remote_trace_path), local_trace_path])
+
+    def profile_backend(self, backend, backend_op_list):
+        nnpkg_run_path = self.base_dir / 'out/bin/onert_run'
+        nnpkg_path = self.base_dir / self.nnpkg_dir.name
+
+        cmd = ["ssh", f"{self.host}"]
+        cmd += [f"TRACE_FILEPATH={self.remote_trace_path(backend)}"]
+        for target_backend, op_list in backend_op_list.items():
+            if backend == target_backend:
+                for op in op_list:
+                    cmd += [f"OP_BACKEND_{op}={backend}"]
+        cmd += [f"XNNPACK_THREADS={self.num_threads}"]
+        cmd += [f"RUY_THREADS={self.num_threads}"]
+        cmd += [f"BACKENDS=\'{';'.join(['cpu', backend])}\'"]
+        cmd += [f"{nnpkg_run_path}"]
+        cmd += [f"--nnpackage"]
+        cmd += [f"{nnpkg_path}"]
+        cmd += [f"-w5 -r50"]
+        logging.debug(f"SSH command : {' '.join(cmd)}")
+        subprocess.call(cmd)
+
+    def base_path():
+        pass
+
+    def remote(self, path):
+        return f"{self.host}:{path}"
+
+    # TODO Create class for path generation
+    def trace_name(self, backend):
+        return f"{self.nnpkg_name}_{backend}_{self.num_threads}"
+
+    def remote_trace_path(self, backend):
+        return self.base_dir / self.trace_dir / self.trace_name(backend)
+
+    def local_trace_path(self, backend):
+        return Path(__file__).parent / self.trace_dir / self.trace_name(backend)
diff --git a/tools/stab/stab.py b/tools/stab/stab.py
new file mode 100644
index 000000000..7a069df5d
--- /dev/null
+++ b/tools/stab/stab.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse, logging, sys
+from backend_profiler import BackendProfiler
+from backend_scheduler import BackendScheduler
+
+
+def main(args):
+    if args.profile:
+        backend_profiler = BackendProfiler(args.user, args.ip, args.nnpackage,
+                                           args.num_threads)
+        backend_profiler.sync()
+        backend_profiler.profile()
+    backend_scheduler = BackendScheduler(args.nnpackage, args.num_threads)
+    backend_scheduler.schedule()
+
+
+if __name__ == "__main__":
+    arg_parser = argparse.ArgumentParser(add_help=False)
+    required = arg_parser.add_argument_group('required arguments')
+    optional = arg_parser.add_argument_group('optional arguments')
+
+    # Add back help
+    optional.add_argument(
+        '-h',
+        '--help',
+        action='help',
+        default=argparse.SUPPRESS,
+        help='show this help message and exit')
+    required.add_argument(
+        "--nnpackage", type=str, required=True, help="nnpackage folder to profile")
+    required.add_argument(
+        "--ip", type=str, required=True, help="IP address of remote client")
+    optional.add_argument(
+        "-n",
+        "--num_threads",
+        type=int,
+        default=1,
+        help="Number of threads used by one runtime")
+    optional.add_argument("-u", "--user", type=str, help="User of remote client")
+    optional.add_argument(
+        "-v",
+        "--verbose",
+        action='store_const',
+        dest="verbose_level",
+        default=logging.INFO,
+        const=logging.DEBUG,
+        help="Print verbose message")
+    optional.add_argument(
+        "--no-profile", dest='profile', action='store_false', help="Disable profiling")
+    optional.set_defaults(profile=True)
+    args = arg_parser.parse_args()
+
+    logging.basicConfig(
+        stream=sys.stdout,
+        level=args.verbose_level,
+        format="[%(levelname).5s] %(message)s")
+
+    main(args)
diff --git a/tools/tflite_accuracy/src/tflite_accuracy.cc b/tools/tflite_accuracy/src/tflite_accuracy.cc
index a532890a9..66c19a868 100644
--- a/tools/tflite_accuracy/src/tflite_accuracy.cc
+++ b/tools/tflite_accuracy/src/tflite_accuracy.cc
@@ -60,7 +60,7 @@ template <typename... Args> void Print(const char *fmt, Args... args)
 template <typename DataType> struct BaseLabelData
 {
   explicit BaseLabelData(int label = -1, DataType confidence = 0)
-      : label(label), confidence(confidence)
+    : label(label), confidence(confidence)
   {
   }
 
@@ -116,8 +116,8 @@ public:
   Runner(std::unique_ptr<tflite::Interpreter> interpreter,
          std::unique_ptr<tflite::FlatBufferModel> model,
          std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size)
-      : interpreter(std::move(interpreter)), model(std::move(model)), delegate(std::move(delegate)),
-        interrupted(false), kInputSize(1 * img_size * img_size * 3 * sizeof(DataType))
+    : interpreter(std::move(interpreter)), model(std::move(model)), delegate(std::move(delegate)),
+      interrupted(false), kInputSize(1 * img_size * img_size * 3 * sizeof(DataType))
   {
     inference_times.reserve(500);
     top1.reserve(500);
@@ -308,7 +308,7 @@ public:
   FloatRunner(std::unique_ptr<tflite::Interpreter> interpreter,
               std::unique_ptr<tflite::FlatBufferModel> model,
               std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size)
-      : Runner<float>(std::move(interpreter), std::move(model), std::move(delegate), img_size)
+    : Runner<float>(std::move(interpreter), std::move(model), std::move(delegate), img_size)
   {
   }
 
@@ -333,7 +333,7 @@ public:
   QuantizedRunner(std::unique_ptr<tflite::Interpreter> interpreter,
                   std::unique_ptr<tflite::FlatBufferModel> model,
                   std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size)
-      : Runner<uint8_t>(std::move(interpreter), std::move(model), std::move(delegate), img_size)
+    : Runner<uint8_t>(std::move(interpreter), std::move(model), std::move(delegate), img_size)
   {
   }
 
@@ -411,12 +411,12 @@ std::unique_ptr<BaseRunner> MakeRunner(const std::string &model_path, unsigned i
   if (interpreter->tensor(input_index)->type == kTfLiteFloat32)
   {
     return std::unique_ptr<FloatRunner>(
-        new FloatRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size));
+      new FloatRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size));
   }
   else if (interpreter->tensor(input_index)->type == kTfLiteUInt8)
   {
-    return std::unique_ptr<QuantizedRunner>(new QuantizedRunner(
-        std::move(interpreter), std::move(model), std::move(delegate), img_size));
+    return std::unique_ptr<QuantizedRunner>(
+      new QuantizedRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size));
   }
   throw std::invalid_argument("data type of model's input tensor is not supported.");
 }
@@ -424,13 +424,13 @@ std::unique_ptr<BaseRunner> MakeRunner(const std::string &model_path, unsigned i
 Target GetTarget(const std::string &str)
 {
   static const std::map<std::string, Target> target_names{
-      {"tflite-cpu", Target::TfLiteCpu},
-      {"tflite-delegate", Target::TfLiteDelegate},
-      {"nnfw-delegate", Target::NnfwDelegate}};
+    {"tflite-cpu", Target::TfLiteCpu},
+    {"tflite-delegate", Target::TfLiteDelegate},
+    {"nnfw-delegate", Target::NnfwDelegate}};
   if (target_names.find(str) == target_names.end())
   {
     throw std::invalid_argument(
-        str + ": invalid target. Run with --help for a list of available targets.");
+      str + ": invalid target. Run with --help for a list of available targets.");
   }
   return target_names.at(str);
 }
@@ -451,19 +451,22 @@ void HandleSigInt(int)
   }
 }
 
-int main(int argc, char *argv[]) try
+int main(int argc, char *argv[])
+try
 {
   namespace po = boost::program_options;
   po::options_description desc("Run a model on multiple binary images and print"
                                " statistics");
-  desc.add_options()("help", "print this message and quit")(
-      "model", po::value<std::string>()->default_value(kDefaultModelFile), "tflite file")(
-      "input", po::value<std::string>()->default_value(kDefaultImagesDir),
-      "directory with input images")("offset", po::value<int>()->default_value(1), "labels offset")(
-      "target", po::value<std::string>()->default_value("nnfw-delegate"),
-      "how the model will be run (available targets: tflite-cpu, "
-      "tflite-delegate, nnfw-delegate)")("imgsize", po::value<unsigned>()->default_value(224),
-                                         "the width and height of the image");
+  // clang-format off
+  desc.add_options()
+    ("help", "print this message and quit")
+    ("model", po::value<std::string>()->default_value(kDefaultModelFile), "tflite file")
+    ("input", po::value<std::string>()->default_value(kDefaultImagesDir), "directory with input images")
+    ("offset", po::value<int>()->default_value(1), "labels offset")
+    ("target", po::value<std::string>()->default_value("nnfw-delegate"),
+      "how the model will be run (available targets: tflite-cpu, tflite-delegate, nnfw-delegate)")
+    ("imgsize", po::value<unsigned>()->default_value(224), "the width and height of the image");
+  // clang-fomrat on
   po::variables_map vm;
   po::store(po::parse_command_line(argc, argv, desc), vm);
   if (vm.count("help"))
diff --git a/tools/tflitefile_tool/README.md b/tools/tflitefile_tool/README.md
index 9253fd1cc..200efb4cc 100644
--- a/tools/tflitefile_tool/README.md
+++ b/tools/tflitefile_tool/README.md
@@ -88,6 +88,55 @@ opcodelist.txt test.tflite -g 1
 
 Above selects operator index 11, 12, 13 in subgraph 1
 
+### Generating separate models for multi-model from a model file by using model generator here
+
+To make one model multi-model, separate models and inputs/outputs information of each model are required.
+So run model generator with the option `--store-io-info`
+
+#### How to use
+
+```
+./select_operator.py <base model file> <opcode list txt file> <output file name> --store-io-info <output json file name>
+```
+
+#### Example
+
+This example generates one model into two separate models.
+
+```
+$ cat 0-26.txt
+0-26
+
+$ cat 27-30.txt
+27-30
+
+$ ./tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite 0-26.txt m1.tflite --store-io-info m1.json
+Input tensor(s): [81]
+Output tensor(s): [44]
+Append subgraphs, orginal index :  0 , new index :  0
+
+$ ./tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite 27-30.txt m2.tflite --store-io-info m2.json
+Input tensor(s): [6]
+Output tensor(s): [7]
+Append subgraphs, orginal index :  0 , new index :  0
+
+$ cat m1.json
+{"org-model-io": {"inputs": {"new-indices": [81]}, "outputs": {"new-indices": [-1]}}, "new-model-io": {"inputs": {"org-indices": [88], "new-indices": [81]}, "outputs": {"org-indices": [50], "new-indices": [44]}}}
+
+$ cat m2.json
+{"org-model-io": {"inputs": {"new-indices": [-1]}, "outputs": {"new-indices": [7]}}, "new-model-io": {"inputs": {"org-indices": [50], "new-indices": [6]}, "outputs": {"org-indices": [87], "new-indices": [7]}}}
+
+```
+The meaning of `m1.json` above is as follows:
+- original model has 1 input and 1 output
+  - The only input is located at tensors[81] from new model.
+  - The only output has new-index -1, which means it is not in new-model.
+- new-model has 1 input and 1 output
+  - The only input was located at tensors[88] from org model, and it is located at tensors[81] from new model.
+  - The only output was located at tensors[50] from org model, and it is located at tensors[44] from new model.
+
+With the model files and inputs/outputs infomation files generated above, you can use `model2nnpkg.py` to create nnpkg for multi-model.
+
 ## Colaboration model parser and model generator
 
 1. Get imformation about base model using model parser
diff --git a/tools/tflitefile_tool/config_saver.py b/tools/tflitefile_tool/config_saver.py
deleted file mode 100755
index abf2c0ca2..000000000
--- a/tools/tflitefile_tool/config_saver.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_wrapping import Operator
-from tensor_printer import TensorPrinter
-from option_printer import OptionPrinter
-
-
-class ConfigSaver(object):
-    def __init__(self, file_name, operator):
-        self.file_name = file_name
-        self.operator = operator
-        # Set self.verbose to 1 level to print more information
-        self.verbose = 1
-        self.op_idx = operator.operator_idx
-        self.op_name = operator.opcode_str
-
-        self.f = open(file_name, 'at')
-
-    def __del__(self):
-        self.f.close()
-
-    def SaveInfo(self):
-        self.f.write("[{}]\n".format(self.op_idx))
-        if (self.op_name == 'CONV_2D'):
-            self.SaveConv2DInputs()
-        else:
-            self.SaveInputs()
-
-        self.SaveOutputs()
-
-        self.SaveAttributes()
-
-        self.f.write('\n')
-
-    def SaveConv2DInputs(self):
-        if (len(self.operator.inputs) != 3):
-            raise AssertionError('Conv2D input count should be 3')
-
-        inputs = self.operator.inputs[0]
-        weights = self.operator.inputs[1]
-        bias = self.operator.inputs[2]
-
-        self.f.write("input: {}\n".format(
-            TensorPrinter(self.verbose, inputs).GetShapeString()))
-        self.f.write("input_type: {}\n".format(inputs.type_name))
-        self.f.write("weights: {}\n".format(
-            TensorPrinter(self.verbose, weights).GetShapeString()))
-        self.f.write("weights_type: {}\n".format(weights.type_name))
-        self.f.write("bias: {}\n".format(
-            TensorPrinter(self.verbose, bias).GetShapeString()))
-        self.f.write("bias_type: {}\n".format(bias.type_name))
-
-    def SaveInputs(self):
-        total = len(self.operator.inputs)
-        self.f.write("input_counts: {}\n".format(total))
-        for idx in range(total):
-            tensor = self.operator.inputs[idx]
-            input_shape_str = TensorPrinter(self.verbose, tensor).GetShapeString()
-            self.f.write("input{}: {}\n".format(idx, input_shape_str))
-            self.f.write("input{}_type: {}\n".format(idx, tensor.type_name))
-
-    def SaveOutputs(self):
-        total = len(self.operator.outputs)
-        self.f.write("output_counts: {}\n".format(total))
-        for idx in range(total):
-            tensor = self.operator.outputs[idx]
-            output_shape_str = TensorPrinter(self.verbose, tensor).GetShapeString()
-            self.f.write("output{}: {}\n".format(idx, output_shape_str))
-            self.f.write("output{}_type: {}\n".format(idx, tensor.type_name))
-
-    def SaveFilter(self):
-        self.f.write("filter_w: {}\n".format(self.operator.options.FilterWidth()))
-        self.f.write("filter_h: {}\n".format(self.operator.options.FilterHeight()))
-
-    def SaveStride(self):
-        self.f.write("stride_w: {}\n".format(self.operator.options.StrideW()))
-        self.f.write("stride_h: {}\n".format(self.operator.options.StrideH()))
-
-    def SaveDilation(self):
-        self.f.write("dilation_w: {}\n".format(self.operator.options.DilationWFactor()))
-        self.f.write("dilation_h: {}\n".format(self.operator.options.DilationHFactor()))
-
-    def SavePadding(self):
-        if self.operator.options.Padding() == 0:
-            self.f.write("padding: SAME\n")
-        elif self.operator.options.Padding() == 1:
-            self.f.write("padding: VALID\n")
-
-    def SaveFusedAct(self):
-        if self.operator.fused_activation is not "NONE":
-            self.f.write("fused_act: {}\n".format(self.operator.fused_activation))
-
-    def SaveAttributes(self):
-        # operator option
-        # Some operations does not have option. In such case no option is printed
-        option_str = OptionPrinter(self.verbose, self.op_name,
-                                   self.operator.options).GetOptionString()
-        if self.op_name == 'AVERAGE_POOL_2D' or self.op_name == 'MAX_POOL_2D':
-            self.SaveFilter()
-            self.SaveStride()
-            self.SavePadding()
-        elif self.op_name == 'CONV_2D':
-            self.SaveStride()
-            self.SaveDilation()
-            self.SavePadding()
-        elif self.op_name == 'TRANSPOSE_CONV':
-            self.SaveStride()
-            self.SavePadding()
-        elif self.op_name == 'DEPTHWISE_CONV_2D':
-            self.SaveStride()
-            self.SaveDilation()
-            self.SavePadding()
-            self.f.write("depthmultiplier: {}\n".format(
-                self.operator.options.DepthMultiplier()))
-
-        self.SaveFusedAct()
diff --git a/tools/tflitefile_tool/graph_stats.py b/tools/tflitefile_tool/graph_stats.py
deleted file mode 100755
index 85acaefa6..000000000
--- a/tools/tflitefile_tool/graph_stats.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-class GraphStats():
-    def __init__(self):
-        from collections import Counter
-        from collections import defaultdict
-        self.op_counts = Counter()
-        self.filled_memory = 0
-        self.total_memory = 0
-
-    def accumulate_op_count(self, op_str, count):
-        self.op_counts[op_str] += count
-
-    def accumulate_filled_memory(self, size):
-        self.filled_memory += size
-
-    def accumulate_total_memory(self, size):
-        self.total_memory += size
-
-    def __iadd__(self, other):
-        self.op_counts += other.op_counts
-        self.filled_memory += other.filled_memory
-        self.total_memory += other.total_memory
-        return self
-
-
-def PrintGraphStats(stats, verbose):
-    print("Number of all operator types: {0}".format(len(stats.op_counts)))
-
-    # Print op type stats
-    for op_name in sorted(stats.op_counts.keys()):
-        occur = stats.op_counts[op_name]
-        optype_info_str = "\t{:38}: {:4}".format(op_name, occur)
-
-        print(optype_info_str)
-
-    summary_str = "{0:46}: {1:4}".format("Number of all operators",
-                                         sum(stats.op_counts.values()))
-    print(summary_str)
-    print('')
-
-    # Print memory stats
-    from tensor_printer import ConvertBytesToHuman
-    print("Expected TOTAL  memory: {0}".format(ConvertBytesToHuman(stats.total_memory)))
-    print("Expected FILLED memory: {0}".format(ConvertBytesToHuman(stats.filled_memory)))
-    print('')
-
-
-def CalcGraphStats(op_parser):
-    stats = GraphStats()
-
-    for type_str, oper_list in op_parser.operators_per_type.items():
-        # number of occurrence of this operator type
-        occur = len(oper_list)
-        stats.accumulate_op_count(type_str, occur)
-
-        # this operator type can be computed?
-        can_compute = oper_list[0].operation.can_compute
-
-    total_memory = 0
-    filled_memory = 0  # only memory for constant
-    for tensor in op_parser.GetAllTensors():
-        if tensor.tf_buffer.DataLength() != 0:
-            filled_memory += tensor.memory_size
-        total_memory += tensor.memory_size
-    stats.accumulate_filled_memory(filled_memory)
-    stats.accumulate_total_memory(total_memory)
-
-    return stats
diff --git a/tools/tflitefile_tool/ir/README.md b/tools/tflitefile_tool/ir/README.md
new file mode 100644
index 000000000..2625dfb91
--- /dev/null
+++ b/tools/tflitefile_tool/ir/README.md
@@ -0,0 +1,5 @@
+# IR
+
+A model has a subgraph or subgraphs. A subgraph has operators and tensors.
+
+Parser will use these IRs as data.
diff --git a/tools/tflitefile_tool/ir/__init__.py b/tools/tflitefile_tool/ir/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/ir/__init__.py
diff --git a/tools/tflitefile_tool/ir/graph_stats.py b/tools/tflitefile_tool/ir/graph_stats.py
new file mode 100755
index 000000000..5aebdbeaa
--- /dev/null
+++ b/tools/tflitefile_tool/ir/graph_stats.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class GraphStats():
+    def __init__(self):
+        from collections import Counter
+        from collections import defaultdict
+        self.op_counts = Counter()
+        self.filled_memory = 0
+        self.total_memory = 0
+
+    def accumulate_op_count(self, op_str, count):
+        self.op_counts[op_str] += count
+
+    def accumulate_filled_memory(self, size):
+        self.filled_memory += size
+
+    def accumulate_total_memory(self, size):
+        self.total_memory += size
+
+    def __iadd__(self, other):
+        self.op_counts += other.op_counts
+        self.filled_memory += other.filled_memory
+        self.total_memory += other.total_memory
+        return self
+
+
+def CalcGraphStats(subg):
+    stats = GraphStats()
+
+    for type_str, oper_list in subg.optypes_map.items():
+        # number of occurrence of this operator type
+        occur = len(oper_list)
+        stats.accumulate_op_count(type_str, occur)
+
+    total_memory = 0
+    filled_memory = 0  # only memory for constant
+    for index, tensor in subg.tensors_map.items():
+        if tensor.buffer is not None:
+            filled_memory += tensor.memory_size
+        total_memory += tensor.memory_size
+    stats.accumulate_filled_memory(filled_memory)
+    stats.accumulate_total_memory(total_memory)
+
+    return stats
diff --git a/tools/tflitefile_tool/ir/operator.py b/tools/tflitefile_tool/ir/operator.py
new file mode 100644
index 000000000..0601e6119
--- /dev/null
+++ b/tools/tflitefile_tool/ir/operator.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Operator(object):
+    def __init__(self):
+        self._index = -1
+        self._inputs = []
+        self._outputs = []
+        self._op_name = ""
+        self._actviation = ""
+        self._options = ""
+
+    '''index'''
+
+    @property
+    def index(self):
+        '''operator's int type index'''
+        return self._index
+
+    @index.setter
+    def index(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._index = value
+
+    '''inputs'''
+
+    @property
+    def inputs(self):
+        '''Operators's input tensors as a list which consists of Tensors'''
+        return self._inputs
+
+    @inputs.setter
+    def inputs(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._inputs = value
+
+    '''outputs'''
+
+    @property
+    def outputs(self):
+        '''Operators's output tensors as a list which consists of Tensors'''
+        return self._outputs
+
+    @outputs.setter
+    def outputs(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._outputs = value
+
+    '''op_name'''
+
+    @property
+    def op_name(self):
+        '''Operator's name str'''
+        return self._op_name
+
+    @op_name.setter
+    def op_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._op_name = value
+
+    '''actviation'''
+
+    @property
+    def actviation(self):
+        '''Operator's actviation str'''
+        return self._actviation
+
+    @actviation.setter
+    def actviation(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._actviation = value
+
+    '''options'''
+
+    @property
+    def options(self):
+        '''Operator's options str'''
+        return self._options
+
+    @options.setter
+    def options(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._options = value
diff --git a/tools/tflitefile_tool/ir/subgraph.py b/tools/tflitefile_tool/ir/subgraph.py
new file mode 100644
index 000000000..e68713480
--- /dev/null
+++ b/tools/tflitefile_tool/ir/subgraph.py
@@ -0,0 +1,170 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections.abc import MutableMapping
+'''optype -> Operator Index List'''
+
+
+class OpTypesMap(MutableMapping):
+    def __init__(self, *args, **kwargs):
+        self.store = dict()
+        self.update(dict(*args, **kwargs))
+
+    def __getitem__(self, key):
+        return self.store[self._keytransform(key)]
+
+    def __setitem__(self, key, value):
+        k = self._keytransform(key)
+        if not k in self.store.keys():
+            self.store[k] = []
+        self.store[k].append(value)
+
+    def __delitem__(self, key):
+        del self.store[self._keytransform(key)]
+
+    def __iter__(self):
+        return iter(self.store)
+
+    def __len__(self):
+        return len(self.store)
+
+    def _keytransform(self, key):
+        if not isinstance(key, str):
+            raise TypeError("must be set to a str")
+        return key
+
+
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Subgraph(object):
+    def __init__(self):
+        self._index = -1
+        self._inputs = []
+        self._outputs = []
+        self._subg_name = ""
+        self._model_name = ""
+        self._tensors_map = {}
+        self._operators_map = {}
+        self._optypes_map = OpTypesMap()
+
+    '''index'''
+
+    @property
+    def index(self):
+        '''Subgraph's int type index'''
+        return self._index
+
+    @index.setter
+    def index(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._index = value
+
+    '''inputs'''
+
+    @property
+    def inputs(self):
+        '''Subgraph's input tensors as a list which consists of Tensors'''
+        return self._inputs
+
+    @inputs.setter
+    def inputs(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._inputs = value
+
+    '''outputs'''
+
+    @property
+    def outputs(self):
+        '''Subgraph's output tensors as a list which consists of Tensors'''
+        return self._outputs
+
+    @outputs.setter
+    def outputs(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._outputs = value
+
+    '''subg_name'''
+
+    @property
+    def subg_name(self):
+        '''Subgraph's name str'''
+        return self._subg_name
+
+    @subg_name.setter
+    def subg_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._subg_name = value
+
+    '''model_name'''
+
+    @property
+    def model_name(self):
+        '''Model name str'''
+        return self._model_name
+
+    @model_name.setter
+    def model_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._model_name = value
+
+    '''tensors_map'''
+
+    @property
+    def tensors_map(self):
+        '''Subgraph's all tensors(key:index, value:Tensor)'''
+        return self._tensors_map
+
+    @tensors_map.setter
+    def tensors_map(self, value):
+        if not isinstance(value, dict):
+            raise TypeError("must be set to a dict")
+        self._tensors_map = value
+
+    '''operators_map'''
+
+    @property
+    def operators_map(self):
+        '''Subgraph's operators(key:index, value:Operator)'''
+        return self._operators_map
+
+    @operators_map.setter
+    def operators_map(self, value):
+        if not isinstance(value, dict):
+            raise TypeError("must be set to a dict")
+        self._operators_map = value
+
+    '''optypes_map'''
+
+    @property
+    def optypes_map(self):
+        '''Subgraph's operators per type(key:optype, value:[op_indice])'''
+        return self._optypes_map
+
+    @optypes_map.setter
+    def optypes_map(self, value):
+        if not isinstance(value, OpTypesMap):
+            raise TypeError("must be set to a OpTypesMap")
+        self._optypes_map = value
diff --git a/tools/tflitefile_tool/ir/tensor.py b/tools/tflitefile_tool/ir/tensor.py
new file mode 100644
index 000000000..f0f35a74b
--- /dev/null
+++ b/tools/tflitefile_tool/ir/tensor.py
@@ -0,0 +1,120 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Tensor(object):
+    def __init__(self):
+        self._index = -1
+        self._tensor_name = ""
+        self._buffer = None
+        self._buffer_index = -1
+        self._type_name = ""
+        self._shape = []
+        self._memory_size = -1
+
+    '''index'''
+
+    @property
+    def index(self):
+        '''Tensor's int type index'''
+        return self._index
+
+    @index.setter
+    def index(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._index = value
+
+    '''tensor_name'''
+
+    @property
+    def tensor_name(self):
+        '''Tensor's name str'''
+        return self._tensor_name
+
+    @tensor_name.setter
+    def tensor_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._tensor_name = value
+
+    '''buffer'''
+
+    @property
+    def buffer(self):
+        '''Tensor's buffer as a numpy instance type'''
+        return self._buffer
+
+    @buffer.setter
+    def buffer(self, value):
+        self._buffer = value
+
+    '''buffer_index'''
+
+    @property
+    def buffer_index(self):
+        '''Tensor's int type buffer index'''
+        return self._buffer_index
+
+    @buffer_index.setter
+    def buffer_index(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._buffer_index = value
+
+    '''type_name'''
+
+    @property
+    def type_name(self):
+        '''Tensor's type name str'''
+        return self._type_name
+
+    @type_name.setter
+    def type_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._type_name = value
+
+    '''shape'''
+
+    @property
+    def shape(self):
+        '''Tensor's shape as a list'''
+        return self._shape
+
+    @shape.setter
+    def shape(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._shape = value
+
+    '''memory_size'''
+
+    @property
+    def memory_size(self):
+        '''Tensor's memory size as int type'''
+        return self._memory_size
+
+    @memory_size.setter
+    def memory_size(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._memory_size = value
diff --git a/tools/tflitefile_tool/model_parser.py b/tools/tflitefile_tool/model_parser.py
index cd66bf500..76c43acfc 100755
--- a/tools/tflitefile_tool/model_parser.py
+++ b/tools/tflitefile_tool/model_parser.py
@@ -1,6 +1,6 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,29 +13,26 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+'''
+Why is this file named as `model_parser.py` which is same to `parser/model_parser.py`?
+- Until now, users have used by the path `tools/tflitefile_tool/model_parser.py`.
+- Let's change the name to the proper name like `main.py` after the task for revision is done.
+'''
 
-import os
-import sys
-import numpy
-import flatbuffers
-import tflite.Model
-import tflite.SubGraph
 import argparse
-import graph_stats
-from operator_parser import OperatorParser
-from subgraph_printer import SubgraphPrinter
-from model_saver import ModelSaver
+from parser.model_parser import ModelParser
+from printer.subgraph_printer import SubgraphPrinter
+from saver.model_saver import ModelSaver
 
 
-class TFLiteModelFileParser(object):
+class MainOption(object):
     def __init__(self, args):
-        # Read flatbuffer file descriptor using argument
-        self.tflite_file = args.input_file
+        self.model_file = args.input_file
 
-        # Set print level (0 ~ 1)
+        # Set print level (0 ~ 2)
         self.print_level = args.verbose
-        if (args.verbose > 1):
-            self.print_level = 1
+        if (args.verbose > 2):
+            self.print_level = 2
         if (args.verbose < 0):
             self.print_level = 0
 
@@ -66,54 +63,24 @@ class TFLiteModelFileParser(object):
         if self.save == True:
             self.save_prefix = args.prefix
 
-    def PrintModel(self, model_name, op_parser):
-        printer = SubgraphPrinter(self.print_level, op_parser, model_name)
 
-        if self.print_all_tensor == False:
-            printer.SetPrintSpecificTensors(self.print_tensor_index)
+def PrintSubgraph(option, subg):
+    printer = SubgraphPrinter(option.print_level, subg)
 
-        if self.print_all_operator == False:
-            printer.SetPrintSpecificOperators(self.print_operator_index)
+    if option.print_all_tensor == False:
+        printer.SetPrintSpecificTensors(option.print_tensor_index)
 
-        printer.PrintInfo()
+    if option.print_all_operator == False:
+        printer.SetPrintSpecificOperators(option.print_operator_index)
 
-    def SaveModel(self, model_name, op_parser):
-        saver = ModelSaver(model_name, op_parser)
+    printer.PrintInfo()
 
-        if self.save_config == True:
-            saver.SaveConfigInfo(self.save_prefix)
 
-    def main(self):
-        # Generate Model: top structure of tflite model file
-        buf = self.tflite_file.read()
-        buf = bytearray(buf)
-        tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+def SaveSubgraph(option, subg):
+    saver = ModelSaver(subg)
 
-        stats = graph_stats.GraphStats()
-        # Model file can have many models
-        for subgraph_index in range(tf_model.SubgraphsLength()):
-            tf_subgraph = tf_model.Subgraphs(subgraph_index)
-            model_name = "#{0} {1}".format(subgraph_index, tf_subgraph.Name())
-            # 0th subgraph is main subgraph
-            if (subgraph_index == 0):
-                model_name += " (MAIN)"
-
-            # Parse Operators
-            op_parser = OperatorParser(tf_model, tf_subgraph)
-            op_parser.Parse()
-
-            stats += graph_stats.CalcGraphStats(op_parser)
-
-            if self.save == False:
-                # print all of operators or requested objects
-                self.PrintModel(model_name, op_parser)
-            else:
-                # save all of operators in this model
-                self.SaveModel(model_name, op_parser)
-
-        print('==== Model Stats ({} Subgraphs) ===='.format(tf_model.SubgraphsLength()))
-        print('')
-        graph_stats.PrintGraphStats(stats, self.print_level)
+    if option.save_config == True:
+        saver.SaveConfigInfo(option.save_prefix)
 
 
 if __name__ == '__main__':
@@ -138,6 +105,14 @@ if __name__ == '__main__':
     arg_parser.add_argument(
         '-p', '--prefix', help="file prefix to be saved (with -c/--config option)")
     args = arg_parser.parse_args()
+    option = MainOption(args)
+
+    subg_list = ModelParser(option.model_file).Parse()
 
-    # Call main function
-    TFLiteModelFileParser(args).main()
+    for subg in subg_list:
+        if option.save == False:
+            # print all of operators or requested objects
+            PrintSubgraph(option, subg)
+        else:
+            # save all of operators in this model
+            SaveSubgraph(option, subg)
diff --git a/tools/tflitefile_tool/model_saver.py b/tools/tflitefile_tool/model_saver.py
deleted file mode 100755
index 15037a1fe..000000000
--- a/tools/tflitefile_tool/model_saver.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from config_saver import ConfigSaver
-
-
-class ModelSaver(object):
-    def __init__(self, model_name, op_parser):
-        self.model_name = model_name
-        self.op_parser = op_parser
-
-    def SaveConfigInfo(self, prefix):
-        print("Save model configuration file")
-        for type_str, oper_list in self.op_parser.operators_per_type.items():
-            if prefix:
-                file_name = "{}_{}_{}.config".format(prefix, self.model_name, type_str)
-            else:
-                file_name = "{}_{}.config".format(self.model_name, type_str)
-            print("{} file is generated".format(file_name))
-            with open(file_name, 'wt') as f:
-                f.write("# {}, Total count: {}\n\n".format(type_str, len(oper_list)))
-            for operator in oper_list:
-                ConfigSaver(file_name, operator).SaveInfo()
diff --git a/tools/tflitefile_tool/operation.py b/tools/tflitefile_tool/operation.py
deleted file mode 100755
index 6aa752772..000000000
--- a/tools/tflitefile_tool/operation.py
+++ /dev/null
@@ -1,209 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Conv2DOptions
-import tflite.Pool2DOptions
-import tflite.BuiltinOptions
-import tflite.Tensor
-from tensor_wrapping import Tensor
-import math
-
-
-# NOTICE
-# - an internal class. do not import outside this file.
-# - REF: https://stackoverflow.com/questions/551038/private-implementation-class-in-python
-class _OperationComputeMethod(object):
-    '''
-    NOTE: How to count operations of convolution(and also pooling)?
-
-    If we know operations of output's one element, we can calculate total output's operations.
-    For example, consider output Shape[3,3]
-    [ e11 e12 e13 ]
-    [ e21 e22 e23 ]
-    [ e31 e32 e33 ]
-    If we know operations for calculation of e11, we can know total operations of output(e11, e12, ... e33)
-    by operations of e11 * 9(total number of elements)
-
-    So we only need to know how to calculate operations of e11.
-    For this, just think how to conv operation to the output's element
-    If input_channel is 1, we can only think of kernel_size(kernel_w and kernel_h).
-    For example, consider input Shape[3,3] and kernel Shape[2,2]
-    [ i11 i12 i13 ]   [ k11 k12 ]   [ o11 o12 o13 ]
-    [ i21 i22 i23 ] * [ k21 k22 ] = [ o21 o22 o23 ]
-    [ i31 i32 i33 ]                 [ o31 o32 o33 ]
-
-    Conv operation: for o11, i11 * k11 + i21 * k21 + i12 * k12 + i22 * k22 = o11
-    On above conv operation, mul operations are done at 4 times(== kernel_w * kernel_h)
-    and add operations are dont at 3 times(== kernel_w * kernel_h - 1)
-    and also, bias will be done and it will be counted on add operations.
-
-    Anyway, we can calculate total operations on this way. This can apply to the way of pooling.
-    '''
-
-    def ComputeOperationForConv2D(self, tf_operator, inputs, outputs):
-        assert (
-            tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
-            .Conv2DOptions)
-
-        # NOTE: Assume that conv2d operator always take 3 tensors as inputs
-        #       and both width and height are the same.
-        # operator_inputs[]: [input_tensor, weight_tensor, bias_tensor]
-        # operator_outputs[]: [output_tensor]
-        # tflite's tensor shape: [N,H,W,C]
-        input_tensor = inputs[0].tf_tensor
-        weight_tensor = inputs[1].tf_tensor
-        output_tensor = outputs[0].tf_tensor
-
-        # kernel_ops = (kernel_w * kernel_h * input_channel * 2(multiply and add))
-        kernel_ops = (
-            weight_tensor.Shape(2) * weight_tensor.Shape(1) * input_tensor.Shape(3))
-
-        # total ops
-        #     = batch_size * output_channel * output_width * output_height * kernel_ops
-        total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) *
-                     output_tensor.Shape(2) * output_tensor.Shape(1))
-
-        add_instr_num = (total_ops * (kernel_ops + 1))  # bias
-        mul_instr_num = (total_ops * (kernel_ops))
-        nonlinear_instr_num = 0
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    # NOTE: Reference the comment 'NOTE' of ComputeOperationForConv2D
-
-    def ComputeOperationForPooling(self, tf_operator, inputs, outputs):
-        assert (
-            tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
-            .Pool2DOptions)
-
-        dummy_input_tensor = inputs[0].tf_tensor
-        output_tensor = outputs[0].tf_tensor
-
-        pool2d_options = tflite.Pool2DOptions.Pool2DOptions()
-        pool2d_options.Init(tf_operator.BuiltinOptions().Bytes,
-                            tf_operator.BuiltinOptions().Pos)
-
-        # kernel_ops = kernel_w * kernel_h
-        kernel_ops = (pool2d_options.FilterWidth() * pool2d_options.FilterHeight())
-
-        # total ops
-        #     = batch_size * output_channel * output_width * output_height *
-        #       kernel_ops(kernel_w * kernel_h)
-        total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) *
-                     output_tensor.Shape(2) * output_tensor.Shape(1))
-
-        add_instr_num = (total_ops * kernel_ops - 1)
-        mul_instr_num = (total_ops * kernel_ops)
-        nonlinear_instr_num = 0
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    def ComputeOperationForSoftmax(self, tf_operator, inputs, outputs):
-        assert (
-            tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
-            .SoftmaxOptions)
-
-        input_tensor = inputs[0].tf_tensor
-
-        dummy_batch_size = input_tensor.Shape(0)
-        input_dim = input_tensor.Shape(1)
-
-        # Softmax(x_i) = exp(x_i) / sum of exp(x)
-        add_instr_num = input_dim - 1  # sum of exp(x)
-        mul_instr_num = input_dim  # /
-        nonlinear_instr_num = input_dim + input_dim  # sum of exp(x) and exp(x_i)
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    def ComputeOperationForFullyConnected(self, tf_operator, inputs, outputs):
-        assert (
-            tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
-            .FullyConnectedOptions)
-
-        # NOTE: Assume that fully_connected operator always take 3 tensors as inputs
-        #       and its X tensor's shape is [1, 1, 1, input_dim] with
-        #       its output Y [1, output_dim]
-        input_tensor = inputs[0].tf_tensor
-        output_tensor = outputs[0].tf_tensor
-
-        # ops_per_element
-        #     = input_dim(multiplication) + input_dim-1(addition) + 1(bias)
-        # total_ops
-        #     = ops_per_elem * output_dim
-
-        add_instr_num = (input_tensor.Shape(3) * output_tensor.Shape(1))
-        mul_instr_num = (input_tensor.Shape(3) * output_tensor.Shape(1))
-        nonlinear_instr_num = 0
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    def ComputeOperationForNothing(self, tf_operator, inputs, outputs):
-        add_instr_num = 0
-        mul_instr_num = 0
-        nonlinear_instr_num = 0
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    def NYI_ComputeOperation(self, tf_operator, inputs, outputs):
-        pass
-
-    operation_to_method_map = {
-        # Inceptionv3
-        "CONV_2D": ComputeOperationForConv2D,
-        "AVERAGE_POOL_2D": ComputeOperationForPooling,
-        "MAX_POOL_2D": ComputeOperationForPooling,
-        "SOFTMAX": ComputeOperationForSoftmax,
-        "FULLY_CONNECTED": ComputeOperationForFullyConnected,
-        "CONCATENATION": ComputeOperationForNothing,
-        # Extension
-        "TOPK_V2": NYI_ComputeOperation,
-        "SUB": NYI_ComputeOperation,
-        "STRIDED_SLICE": NYI_ComputeOperation,
-        "RESHAPE": NYI_ComputeOperation,
-        "GATHER": NYI_ComputeOperation,
-        "RESIZE_BILINEAR": NYI_ComputeOperation,
-        "CAST": NYI_ComputeOperation,
-        "ADD": NYI_ComputeOperation,
-        "MUL": NYI_ComputeOperation,
-        "DIV": NYI_ComputeOperation,
-        "CUSTOM(TensorFlowMax)": NYI_ComputeOperation,
-        "CUSTOM": NYI_ComputeOperation,
-    }
-
-
-class Operation(object):
-    def __init__(self, tf_operator, operator_str, inputs, outputs):
-        self.tf_operator = tf_operator
-        self.operator_str = operator_str
-        self.inputs = inputs
-        self.outputs = outputs
-        self.add_instr_num = 0
-        self.mul_instr_num = 0
-        self.nonlinear_instr_num = 0
-        self.can_compute = True
-        self.Compute()
-
-    def Compute(self):
-        comp_map = _OperationComputeMethod().operation_to_method_map
-        if not self.operator_str in comp_map.keys():
-            self.can_compute = False
-            return
-
-        method = comp_map[self.operator_str]
-        if method.__name__ == _OperationComputeMethod().NYI_ComputeOperation.__name__:
-            self.can_compute = False
-            return
-
-        self.add_instr_num, self.mul_instr_num, self.nonlinear_instr_num = method(
-            _OperationComputeMethod(), self.tf_operator, self.inputs, self.outputs)
-
-    def TotalInstrNum(self):
-        return (self.add_instr_num + self.mul_instr_num + self.nonlinear_instr_num)
diff --git a/tools/tflitefile_tool/operator_parser.py b/tools/tflitefile_tool/operator_parser.py
deleted file mode 100755
index 2c230c275..000000000
--- a/tools/tflitefile_tool/operator_parser.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Model
-import tflite.SubGraph
-import tflite.Operator
-import tflite.OperatorCode
-import tflite.BuiltinOperator
-from operator_wrapping import Operator, EnumStrMaps
-from tensor_wrapping import Tensor, SetTensorTypeStr
-from operation import Operation
-
-
-class OperatorParser(object):
-    def __init__(self, tf_model, tf_subgraph):
-        self.tf_model = tf_model
-        self.tf_subgraph = tf_subgraph
-        self.operators_in_list = list()
-        self.operators_per_type = dict()
-        # Tensor type string table
-        SetTensorTypeStr()
-
-    def Parse(self):
-        for operator_idx in range(self.tf_subgraph.OperatorsLength()):
-            tf_operator = self.tf_subgraph.Operators(operator_idx)
-            opcode_str = self.GetOpcodeStr(tf_operator)
-            input_tensors = self.GetInputTensors(tf_operator)
-            output_tensors = self.GetOutputTensors(tf_operator)
-
-            op = Operator(operator_idx, tf_operator, input_tensors, output_tensors,
-                          opcode_str)
-            self.AppendOperator(op)
-
-    def GetOpcodeStr(self, tf_operator):
-        opcode_list_idx = tf_operator.OpcodeIndex()
-        opcode_id = self.tf_model.OperatorCodes(opcode_list_idx).BuiltinCode()
-        opcode_str = EnumStrMaps.BuiltinOpcode[opcode_id]
-        if opcode_id == 32:
-            # Custom operator
-            custom_operator = self.tf_model.OperatorCodes(tf_operator.OpcodeIndex())
-            custom_op_name = custom_operator.CustomCode().decode('utf-8')
-            opcode_str = opcode_str + "(" + custom_op_name + ")"
-        return opcode_str
-
-    def GetInputTensors(self, tf_operator):
-        operator_inputs = tf_operator.InputsAsNumpy()
-        return self.GetTensors(operator_inputs)
-
-    def GetOutputTensors(self, tf_operator):
-        operator_outputs = tf_operator.OutputsAsNumpy()
-        return self.GetTensors(operator_outputs)
-
-    def GetTensors(self, tf_tensors_index):
-        return_list = list()
-        for tensor_idx in tf_tensors_index:
-            # in case of optional input, tensor_idx == -1
-            if (tensor_idx < 0):
-                return_list.append(Tensor(tensor_idx, None, None))
-                continue
-            tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
-            buffer_idx = tf_tensor.Buffer()
-            tf_buffer = self.tf_model.Buffers(buffer_idx)
-            return_list.append(Tensor(tensor_idx, tf_tensor, tf_buffer))
-        return return_list
-
-    def GetAllTensors(self):
-        return_list = list()
-        for tensor_idx in range(self.tf_subgraph.TensorsLength()):
-            if (tensor_idx < 0):
-                return_list.append(Tensor(tensor_idx, 0, 0))
-                continue
-            tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
-            buffer_idx = tf_tensor.Buffer()
-            tf_buffer = self.tf_model.Buffers(buffer_idx)
-            return_list.append(Tensor(tensor_idx, tf_tensor, tf_buffer))
-        return return_list
-
-    def AppendOperator(self, operator):
-        self.operators_in_list.append(operator)
-
-        opcode_str = operator.opcode_str
-        if opcode_str not in self.operators_per_type:
-            self.operators_per_type[opcode_str] = list()
-        self.operators_per_type[opcode_str].append(operator)
diff --git a/tools/tflitefile_tool/operator_printer.py b/tools/tflitefile_tool/operator_printer.py
deleted file mode 100755
index e7c553394..000000000
--- a/tools/tflitefile_tool/operator_printer.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_wrapping import Operator
-from tensor_printer import TensorPrinter
-from option_printer import OptionPrinter
-
-
-def GetStrTensorIndex(tensors):
-    return_string = "["
-    for idx in range(len(tensors)):
-        if idx != 0:
-            return_string += ", "
-        return_string += str(tensors[idx].tensor_idx)
-    return_string += "]"
-    return return_string
-
-
-class OperatorPrinter(object):
-    def __init__(self, verbose, operator):
-        self.verbose = verbose
-        self.operator = operator
-
-    def PrintInfo(self):
-        if (self.verbose < 1):
-            return
-
-        op_str = "Operator {0}: {1}".format(self.operator.operator_idx,
-                                            self.operator.opcode_str)
-
-        print(op_str)
-        print("\tFused Activation: " + self.operator.fused_activation)
-        self.PrintTensors()
-
-    def PrintTensors(self):
-        print("\tInput Tensors" + GetStrTensorIndex(self.operator.inputs))
-        for tensor in self.operator.inputs:
-            TensorPrinter(self.verbose, tensor).PrintInfo("\t\t")
-        print("\tOutput Tensors" + GetStrTensorIndex(self.operator.outputs))
-        for tensor in self.operator.outputs:
-            TensorPrinter(self.verbose, tensor).PrintInfo("\t\t")
-
-        # operator option
-        # Some operations does not have option. In such case no option is printed
-        OptionPrinter(self.verbose, self.operator.opcode_str,
-                      self.operator.options).PrintInfo("\t")
diff --git a/tools/tflitefile_tool/operator_wrapping.py b/tools/tflitefile_tool/operator_wrapping.py
deleted file mode 100755
index 64bad1f08..000000000
--- a/tools/tflitefile_tool/operator_wrapping.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Operator
-import tflite.OperatorCode
-import tflite.BuiltinOperator
-import tflite.ActivationFunctionType
-from operation import Operation
-
-
-# Match enum value integer to name string
-# Assumption 1: enum value is defined by old style (can be used on python 2)
-# Assumption 2: when class define enum value, only constant value is defined and methods are not defined
-# Assumption 3: only integer value is set by constant definition
-def BuildEnumClassStrMap(obj):
-    ret = {}
-    for fieldName in dir(obj):
-        if (not fieldName.startswith('_')):
-            fieldValue = getattr(obj, fieldName)
-            if (isinstance(fieldValue, (int))):
-                ret[fieldValue] = fieldName
-    return ret
-
-
-class EnumStrMaps():
-    BuiltinOpcode = BuildEnumClassStrMap(tflite.BuiltinOperator.BuiltinOperator())
-    ActivationFunctionType = BuildEnumClassStrMap(
-        tflite.ActivationFunctionType.ActivationFunctionType())
-    BuiltinOptions = BuildEnumClassStrMap(tflite.BuiltinOptions.BuiltinOptions())
-
-
-def GetAttribute(o, *args):
-    import functools
-    return functools.reduce(getattr, args, o)
-
-
-def BuildBuiltinOptionGen():
-    bo_gen = {}
-    for val_enum in EnumStrMaps.BuiltinOptions:
-        val_str = EnumStrMaps.BuiltinOptions[val_enum]
-        try:
-            # Dynamically import Builtin Option classes
-            # 0 (NONE) is the only exception that does not have no corresponding flatbuffer-generated class
-            module = __import__("tflite." + val_str)
-            bo_gen[val_enum] = GetAttribute(module, val_str, val_str)
-        except ImportError as e:
-            assert val_enum == 0 and val_str == "NONE"
-    return bo_gen
-
-
-class OptionLoader:
-    builtinOptionGen = BuildBuiltinOptionGen()
-
-    @staticmethod
-    def GetBuiltinOptions(options_type, options_table):
-        if (options_table == None) and (options_type != 0):
-            print(
-                "Bad flatbuffer file: undefined builtin option table with defined option type"
-            )
-            exit(1)
-        options = OptionLoader.builtinOptionGen[options_type]()
-        options.Init(options_table.Bytes, options_table.Pos)
-        return options
-
-
-class Operator(object):
-    def __init__(self, operator_idx, tf_operator, input_tensors, output_tensors,
-                 opcode_str):
-        self.operator_idx = operator_idx
-        self.tf_operator = tf_operator
-        self.inputs = input_tensors
-        self.outputs = output_tensors
-        self.opcode_str = opcode_str
-        self.operation = Operation(self.tf_operator, self.opcode_str, self.inputs,
-                                   self.outputs)
-        self.fused_activation = "NONE"
-        self.SetupBuiltinOption()
-        self.SetupFusedActivation()
-
-    def SetupBuiltinOption(self):
-        try:
-            self.options = OptionLoader.GetBuiltinOptions(
-                self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
-        except KeyError:
-            self.options = 0
-            return
-
-    def SetupFusedActivation(self):
-        # FIXME: workaround for ops such as custom
-        try:
-            options = OptionLoader.GetBuiltinOptions(
-                self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
-        except KeyError:
-            return
-
-        # fused activation function
-        try:
-            activation_code = options.FusedActivationFunction()
-            self.fused_activation = EnumStrMaps.ActivationFunctionType[activation_code]
-        except AttributeError:
-            # This operator does not support FusedActivationFunction
-            pass
diff --git a/tools/tflitefile_tool/option_printer.py b/tools/tflitefile_tool/option_printer.py
deleted file mode 100755
index 15265adf2..000000000
--- a/tools/tflitefile_tool/option_printer.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-class OptionPrinter(object):
-    def __init__(self, verbose, op_name, options):
-        self.verbose = verbose
-        self.op_name = op_name
-        self.options = options
-
-    def GetPadding(self):
-        if self.options.Padding() == 0:
-            return "SAME"
-        elif self.options.Padding() == 1:
-            return "VALID"
-        else:
-            return "** wrong padding value **"
-
-    def PrintInfo(self, tab=""):
-        if (self.verbose < 1):
-            pass
-        if (self.options == 0):
-            return
-
-        option_str = self.GetOptionString()
-        if option_str:
-            print("{}Options".format(tab))
-            print("{}\t{}".format(tab, option_str))
-
-    def GetOptionString(self):
-        if (self.op_name == "AVERAGE_POOL_2D" or self.op_name == "MAX_POOL_2D"):
-            return "{}, {}, {}".format(
-                "Filter W:H = {}:{}".format(self.options.FilterWidth(),
-                                            self.options.FilterHeight()),
-                "Stride W:H = {}:{}".format(self.options.StrideW(),
-                                            self.options.StrideH()),
-                "Padding = {}".format(self.GetPadding()))
-        elif (self.op_name == "CONV_2D"):
-            return "{}, {}, {}".format(
-                "Stride W:H = {}:{}".format(self.options.StrideW(),
-                                            self.options.StrideH()),
-                "Dilation W:H = {}:{}".format(self.options.DilationWFactor(),
-                                              self.options.DilationHFactor()),
-                "Padding = {}".format(self.GetPadding()))
-        elif (self.op_name == "DEPTHWISE_CONV_2D"):
-            # yapf: disable
-            return "{}, {}, {}, {}".format(
-                "Stride W:H = {}:{}".format(self.options.StrideW(),
-                                                 self.options.StrideH()),
-                "Dilation W:H = {}:{}".format(self.options.DilationWFactor(),
-                                              self.options.DilationHFactor()),
-                "Padding = {}".format(self.GetPadding()),
-                "DepthMultiplier = {}".format(self.options.DepthMultiplier()))
-            # yapf: enable
diff --git a/tools/tflitefile_tool/parser/__init__.py b/tools/tflitefile_tool/parser/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/parser/__init__.py
diff --git a/tools/tflitefile_tool/parser/model_parser.py b/tools/tflitefile_tool/parser/model_parser.py
new file mode 100755
index 000000000..68cd31a23
--- /dev/null
+++ b/tools/tflitefile_tool/parser/model_parser.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from parser.tflite.tflite_parser import TFLiteParser
+
+
+class ModelParser(object):
+    def __init__(self, model_file):
+        self.parser = None
+        # model_file: _io.BufferedReader
+        if model_file.name.endswith("tflite"):
+            self.parser = TFLiteParser(model_file)
+        # TODO: Add more parser
+
+    def Parse(self):
+        if self.parser is None:
+            raise NotImplementedError
+        return self.parser.Parse()
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py b/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py
new file mode 100644
index 000000000..6a3a2054f
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tflite.BuiltinOperator
+import tflite.ActivationFunctionType
+import tflite.BuiltinOptions
+
+
+# Match enum value integer to name string
+# Assumption 1: enum value is defined by old style (can be used on python 2)
+# Assumption 2: when class define enum value, only constant value is defined and methods are not defined
+# Assumption 3: only integer value is set by constant definition
+def BuildEnumClassStrMap(obj):
+    ret = {}
+    for fieldName in dir(obj):
+        if (not fieldName.startswith('_')):
+            fieldValue = getattr(obj, fieldName)
+            if (isinstance(fieldValue, (int))):
+                ret[fieldValue] = fieldName
+    return ret
+
+
+class EnumStrMaps():
+    BuiltinOpcode = BuildEnumClassStrMap(tflite.BuiltinOperator.BuiltinOperator())
+    ActivationFunctionType = BuildEnumClassStrMap(
+        tflite.ActivationFunctionType.ActivationFunctionType())
+    BuiltinOptions = BuildEnumClassStrMap(tflite.BuiltinOptions.BuiltinOptions())
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_operator.py b/tools/tflitefile_tool/parser/tflite/tflite_operator.py
new file mode 100755
index 000000000..211007e1c
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_operator.py
@@ -0,0 +1,63 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir.operator import Operator
+from .tflite_enum_str_maps import EnumStrMaps
+from .tflite_option import OptionLoader, GetStringOptions
+
+
+class TFLiteOperator(Operator):
+    def __init__(self, operator_idx, tf_operator, input_tensors, output_tensors,
+                 opcode_str):
+        super(TFLiteOperator, self).__init__()
+
+        self.index = operator_idx
+        self.inputs = input_tensors
+        self.outputs = output_tensors
+        self.op_name = opcode_str
+        self.activation = "NONE"
+        self.options = ""
+
+        self.tf_operator = tf_operator
+        self.tf_options = None
+        self.SetupBuiltinOption()
+        self.SetupFusedActivation()
+
+    def SetupBuiltinOption(self):
+        # FIXME: workaround for ops such as custom
+        try:
+            self.tf_options = OptionLoader.GetBuiltinOptions(
+                self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
+            if self.tf_options == None:
+                return
+
+            option_str = GetStringOptions(self.op_name, self.tf_options)
+            if option_str is None:
+                return
+
+            self.options = option_str
+        except KeyError:
+            return
+
+    def SetupFusedActivation(self):
+        if self.tf_options == None:
+            return
+        try:
+            activation_code = self.tf_options.FusedActivationFunction()
+            self.activation = EnumStrMaps.ActivationFunctionType[activation_code]
+        except AttributeError:
+            # This operator does not support FusedActivationFunction
+            pass
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_option.py b/tools/tflitefile_tool/parser/tflite/tflite_option.py
new file mode 100644
index 000000000..b85fbae90
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_option.py
@@ -0,0 +1,96 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .tflite_enum_str_maps import EnumStrMaps
+
+
+def GetAttribute(o, *args):
+    import functools
+    return functools.reduce(getattr, args, o)
+
+
+def BuildBuiltinOptionGen():
+    bo_gen = {}
+    for val_enum in EnumStrMaps.BuiltinOptions:
+        val_str = EnumStrMaps.BuiltinOptions[val_enum]
+        try:
+            # Dynamically import Builtin Option classes
+            # 0 (NONE) is the only exception that does not have no corresponding flatbuffer-generated class
+            module = __import__("tflite." + val_str)
+            bo_gen[val_enum] = GetAttribute(module, val_str, val_str)
+        except ImportError as e:
+            assert val_enum == 0 and val_str == "NONE"
+    return bo_gen
+
+
+class OptionLoader:
+    builtinOptionGen = BuildBuiltinOptionGen()
+
+    @staticmethod
+    def GetBuiltinOptions(options_type, options_table):
+        if (options_table == None) and (options_type != 0):
+            print(
+                "Bad flatbuffer file: undefined builtin option table with defined option type"
+            )
+            exit(1)
+        options = OptionLoader.builtinOptionGen[options_type]()
+        options.Init(options_table.Bytes, options_table.Pos)
+        return options
+
+
+def GetStringPadding(options):
+    if options.Padding() == 0:
+        return "SAME"
+    elif options.Padding() == 1:
+        return "VALID"
+    else:
+        return "** wrong padding value **"
+
+
+def GetStringOptions(op_name, options):
+    if (op_name == "AVERAGE_POOL_2D" or op_name == "MAX_POOL_2D"):
+        return "{}, {}, {}".format(
+            "Filter W:H = {}:{}".format(options.FilterWidth(), options.FilterHeight()),
+            "Stride W:H = {}:{}".format(options.StrideW(),
+                                        options.StrideH()), "Padding = {}".format(
+                                            GetStringPadding(options)))
+    elif (op_name == "CONV_2D"):
+        return "{}, {}, {}".format(
+            "Stride W:H = {}:{}".format(options.StrideW(), options.StrideH()),
+            "Dilation W:H = {}:{}".format(options.DilationWFactor(),
+                                          options.DilationHFactor()),
+            "Padding = {}".format(GetStringPadding(options)))
+    elif (op_name == "DEPTHWISE_CONV_2D"):
+        # yapf: disable
+        return "{}, {}, {}, {}".format(
+            "Stride W:H = {}:{}".format(options.StrideW(),
+                                                options.StrideH()),
+            "Dilation W:H = {}:{}".format(options.DilationWFactor(),
+                                            options.DilationHFactor()),
+            "Padding = {}".format(GetStringPadding(options)),
+            "DepthMultiplier = {}".format(options.DepthMultiplier()))
+        # yapf: enable
+    elif (op_name == "STRIDED_SLICE"):
+        # yapf: disable
+        return "{}, {}, {}, {}, {}".format(
+            "begin_mask({})".format(options.BeginMask()),
+            "end_mask({})".format(options.EndMask()),
+            "ellipsis_mask({})".format(options.EllipsisMask()),
+            "new_axis_mask({})".format(options.NewAxisMask()),
+            "shrink_axis_mask({})".format(options.ShrinkAxisMask()))
+        # yapf: enable
+    else:
+        return None
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_parser.py b/tools/tflitefile_tool/parser/tflite/tflite_parser.py
new file mode 100755
index 000000000..6a8f2b8ab
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_parser.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tflite.Model
+from .tflite_subgraph import TFLiteSubgraph
+from .tflite_operator import TFLiteOperator, EnumStrMaps
+from .tflite_tensor import TFLiteTensor, SetTensorTypeStr
+
+
+def HasOptionalTensor(tf_subgraph):
+    for operator_idx in range(tf_subgraph.OperatorsLength()):
+        tf_operator = tf_subgraph.Operators(operator_idx)
+        if -1 in tf_operator.InputsAsNumpy():
+            return True
+        output_tensors = tf_operator.OutputsAsNumpy()
+        if -1 in tf_operator.OutputsAsNumpy():
+            return True
+
+    return False
+
+
+class TFLiteSubgraphParser(object):
+    def __init__(self, tf_model, subgraph_index):
+        self.tf_model = tf_model
+        self.tf_subgraph = tf_model.Subgraphs(subgraph_index)
+        self.subg = TFLiteSubgraph(subgraph_index, self.tf_subgraph)
+
+        # Tensor type string table
+        SetTensorTypeStr()
+
+    def Parse(self):
+        if HasOptionalTensor(self.tf_subgraph):
+            # Prepare that optional input and output tensors are indicated by -1
+            self.subg.tensors_map[-1] = TFLiteTensor(-1, None, None)
+
+        # tensors
+        for tensor_idx in range(self.tf_subgraph.TensorsLength()):
+            tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
+            buffer_idx = tf_tensor.Buffer()
+            tf_buffer = self.tf_model.Buffers(buffer_idx)
+            t = TFLiteTensor(tensor_idx, tf_tensor, tf_buffer)
+            self.subg.tensors_map[tensor_idx] = t
+
+        # operators
+        for operator_idx in range(self.tf_subgraph.OperatorsLength()):
+            tf_operator = self.tf_subgraph.Operators(operator_idx)
+            op_name = self.GetOpcodeStr(tf_operator)
+            input_tensors = self.GetTensors(tf_operator.InputsAsNumpy())
+            output_tensors = self.GetTensors(tf_operator.OutputsAsNumpy())
+
+            op = TFLiteOperator(operator_idx, tf_operator, input_tensors, output_tensors,
+                                op_name)
+            self.subg.operators_map[op.index] = op
+            self.subg.optypes_map[op.op_name] = op
+
+        self.subg.inputs = self.GetTensors(self.tf_subgraph.InputsAsNumpy())
+        self.subg.outputs = self.GetTensors(self.tf_subgraph.OutputsAsNumpy())
+
+        return self.subg
+
+    def GetOpcodeStr(self, tf_operator):
+        opcode_list_idx = tf_operator.OpcodeIndex()
+        opcode_id = self.tf_model.OperatorCodes(opcode_list_idx).BuiltinCode()
+        opcode_str = EnumStrMaps.BuiltinOpcode[opcode_id]
+        if opcode_id == 32:
+            # Custom operator
+            custom_operator = self.tf_model.OperatorCodes(tf_operator.OpcodeIndex())
+            custom_op_name = custom_operator.CustomCode().decode('utf-8')
+            opcode_str = opcode_str + "(" + custom_op_name + ")"
+        return opcode_str
+
+    def GetTensors(self, tf_tensors_index):
+        assert len(self.subg.tensors_map.keys()) > 0
+
+        return_list = []
+        for tensor_idx in tf_tensors_index:
+            return_list.append(self.subg.tensors_map[tensor_idx])
+        return return_list
+
+
+class TFLiteParser(object):
+    def __init__(self, model_file):
+        self.model_file = model_file
+
+    def Parse(self):
+        # Generate Model: top structure of tflite model file
+        buf = self.model_file.read()
+        buf = bytearray(buf)
+        tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+
+        # Model file can have many models
+        subg_list = []
+        for subgraph_index in range(tf_model.SubgraphsLength()):
+            # Parse Subgraphs
+            subg_parser = TFLiteSubgraphParser(tf_model, subgraph_index)
+            subg = subg_parser.Parse()
+            subg_list.append(subg)
+
+        return subg_list
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py b/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py
new file mode 100755
index 000000000..0c6338ec6
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py
@@ -0,0 +1,30 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir.subgraph import Subgraph
+
+
+class TFLiteSubgraph(Subgraph):
+    def __init__(self, subg_idx, tf_subgraph):
+        super(TFLiteSubgraph, self).__init__()
+        self.tf_subgraph = tf_subgraph
+
+        self.index = subg_idx
+        if tf_subgraph.Name() is not None:
+            self.subg_name = str(tf_subgraph.Name())
+        self.model_name = "#{0} {1}".format(subg_idx, self.subg_name)
+        if (subg_idx == 0):  # 0th subgraph is main subgraph
+            self.model_name += " (MAIN)"
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_tensor.py b/tools/tflitefile_tool/parser/tflite/tflite_tensor.py
new file mode 100755
index 000000000..5eb35e63e
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_tensor.py
@@ -0,0 +1,124 @@
+#!/usr/bin/python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import tflite.Tensor
+import tflite.TensorType
+from ir.tensor import Tensor
+
+TensorTypeList = {}
+
+
+def SetTensorTypeStr():
+    tensorTypeObj = tflite.TensorType.TensorType()
+
+    for fieldName in dir(tensorTypeObj):
+        if (not fieldName.startswith('_')):
+            fieldValue = getattr(tensorTypeObj, fieldName)
+            if (isinstance(fieldValue, (int))):
+                TensorTypeList[fieldValue] = fieldName
+
+
+TYPES_SIZE = {
+    'BOOL': 1,
+    'COMPLEX64': 8,
+    'FLOAT16': 2,
+    'FLOAT32': 4,
+    'INT16': 2,
+    'INT32': 4,
+    'INT64': 8,
+    'UINT8': 1,
+    'NONE': 0,
+}
+
+
+def GetTypeSize(type_name):
+    try:
+        return TYPES_SIZE[type_name]
+
+    except KeyError as error:
+        return 0
+
+
+TYPE_TO_NPTYPE = {
+    'BOOL': np.bool_,
+    'COMPLEX64': np.cdouble,
+    'FLOAT16': np.float16,
+    'FLOAT32': np.float32,
+    'INT16': np.int16,
+    'INT32': np.int32,
+    'INT64': np.int64,
+    'UINT8': np.uint8,
+}
+
+
+def ConvertProperNPArrayType(np_arr, np_shape, type_name):
+    try:
+        return np_arr.view(TYPE_TO_NPTYPE[type_name]).reshape(np_shape)
+    except KeyError as error:
+        return np_arr.view().reshape(np_shape)
+
+
+class TFLiteTensor(Tensor):
+    def __init__(self, tensor_idx, tf_tensor, tf_buffer):
+        super(TFLiteTensor, self).__init__()
+        self.tf_tensor = tf_tensor
+        self.tf_buffer = tf_buffer
+
+        self.index = int(tensor_idx)
+        self.tensor = tf_tensor
+
+        # optional input
+        if self.index == -1:
+            self.type_name = "NONE"
+        # general input
+        else:
+            assert tf_tensor is not None
+            assert tf_buffer is not None
+            self.tensor_name = str(tf_tensor.Name())
+            self.type_name = TensorTypeList[tf_tensor.Type()]
+            self.buffer_index = tf_tensor.Buffer()
+            if (tf_buffer.DataLength() > 0):
+                self.buffer = ConvertProperNPArrayType(tf_buffer.DataAsNumpy(),
+                                                       tf_tensor.ShapeAsNumpy(),
+                                                       self.type_name)
+
+            # shape: Empty list([]) will mean Scalar
+            for shape_idx in range(tf_tensor.ShapeLength()):
+                # when shape signature is -1, that means unknown dim
+                if tf_tensor.ShapeSignature(shape_idx) != -1:
+                    self.shape.append(int(tf_tensor.Shape(shape_idx)))
+                else:
+                    self.shape.append(-1)
+
+        self.memory_size = self.GetMemorySize()
+
+    def GetMemorySize(self):
+        type_size = GetTypeSize(self.type_name)
+        if type_size == 0:
+            return 0
+
+        # memory size in bytes
+        size = int(type_size)
+        shape_length = len(self.shape)
+        if shape_length == 0:
+            return size
+
+        for shape_idx in range(shape_length):
+            shape_size = int(self.shape[shape_idx])
+            size *= shape_size
+
+        return size
diff --git a/tools/tflitefile_tool/printer/__init__.py b/tools/tflitefile_tool/printer/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/printer/__init__.py
diff --git a/tools/tflitefile_tool/printer/string_builder.py b/tools/tflitefile_tool/printer/string_builder.py
new file mode 100644
index 000000000..d7654205a
--- /dev/null
+++ b/tools/tflitefile_tool/printer/string_builder.py
@@ -0,0 +1,175 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+UNIT_SYMBOLS = ['B', 'K', 'M', 'G', 'T']
+CHAR_SYMBOLS = {'operator': '#', 'tensor': '%', 'buffer': '&'}
+
+
+def ConvertBytesToHuman(n):
+    n = int(n)
+    if n < 0:
+        return 0
+
+    format_str = "%(val)3.1f%(symb)s"
+    prefix = {}
+    for i, s in enumerate(UNIT_SYMBOLS[1:]):
+        prefix[s] = 1 << (i + 1) * 10
+
+    for symbol in reversed(UNIT_SYMBOLS[1:]):
+        if n >= prefix[symbol]:
+            v = float(n) / prefix[symbol]
+            return format_str % dict(symb=symbol, val=v)
+
+    return format_str % dict(symb=UNIT_SYMBOLS[0], val=n)
+
+
+def GetStringTensorIndex(tensors):
+    return_string = []
+    return_string.append("[")
+    for idx in range(len(tensors)):
+        if idx != 0:
+            return_string.append(", ")
+        return_string.append(CHAR_SYMBOLS['tensor'] + str(tensors[idx].index))
+    return_string.append("]")
+    return "".join(return_string)
+
+
+def GetStringShape(tensor):
+    shape_len = len(tensor.shape)
+    if shape_len == 0:
+        return "Scalar"
+    return_string = []
+    return_string.append("[")
+    for shape_idx in range(shape_len):
+        if (shape_idx != 0):
+            return_string.append(", ")
+        return_string.append(str(tensor.shape[shape_idx]))
+    return_string.append("]")
+    return "".join(return_string)
+
+
+def GetStringTensor(tensor):
+    info = ""
+    if tensor.index < 0:
+        info = "{:5} : {}".format(CHAR_SYMBOLS['tensor'] + str(tensor.index),
+                                  "(OPTIONAL)")
+    else:
+        shape_str = GetStringShape(tensor)
+        type_name = tensor.type_name
+        shape_name = tensor.tensor_name
+        memory_size = ConvertBytesToHuman(tensor.memory_size)
+
+        buffer = ["("]
+        if tensor.buffer is not None:
+            buffer.append(
+                "{:5}: ".format(CHAR_SYMBOLS['buffer'] + str(tensor.buffer_index)))
+            # if too big, just skip it.
+            if tensor.buffer.size > 4:
+                buffer.append("".join(['[' for _ in range(tensor.buffer.ndim)]))
+                buffer.append(" ... ")
+                buffer.append("".join([']' for _ in range(tensor.buffer.ndim)]))
+            else:
+                buffer.append(
+                    np.array2string(
+                        tensor.buffer,
+                        precision=3,
+                        separator=', ',
+                        threshold=4,
+                        edgeitems=2))
+        else:
+            buffer.append("Empty")
+        buffer.append(")")
+        buffer_str = "".join(buffer)
+
+        info = "{:5} : buffer {:25} | {:7} | Memory {:6} | Shape {} ({})".format(
+            CHAR_SYMBOLS['tensor'] + str(tensor.index), buffer_str, type_name,
+            memory_size, shape_str, shape_name)
+    return info
+
+
+def GetStringBuffer(tensor):
+    buffer = []
+    buffer.append("Buffer {:5}".format(CHAR_SYMBOLS['buffer'] + str(tensor.buffer_index)))
+    buffer.append("\n")
+    buffer.append(np.array2string(tensor.buffer, separator=', '))
+    return "".join(buffer)
+
+
+class StringBuilder(object):
+    def __init__(self, spacious_str="  "):
+        self.spacious_str = spacious_str
+
+    def GraphStats(self, stats):
+        results = []
+
+        results.append("{:38}: {:4}".format("Number of all operator types",
+                                            len(stats.op_counts)))
+
+        # op type stats
+        for op_name in sorted(stats.op_counts.keys()):
+            occur = stats.op_counts[op_name]
+            optype_info_str = "{:38}: {:4}".format(self.spacious_str + op_name, occur)
+            results.append(optype_info_str)
+
+        summary_str = "{0:38}: {1:4}".format("Number of all operators",
+                                             sum(stats.op_counts.values()))
+        results.append(summary_str)
+        results.append('')
+
+        # memory stats
+        results.append("Expected TOTAL  memory: {}".format(
+            ConvertBytesToHuman(stats.total_memory)))
+        results.append("Expected FILLED memory: {}".format(
+            ConvertBytesToHuman(stats.filled_memory)))
+
+        return "\n".join(results)
+
+    def Operator(self, operator):
+        results = []
+        results.append("{} {}".format(CHAR_SYMBOLS['operator'] + str(operator.index),
+                                      operator.op_name))
+        results.append("{}Fused Activation: {}".format(self.spacious_str,
+                                                       operator.activation))
+        results.append("{}Input Tensors{}".format(self.spacious_str,
+                                                  GetStringTensorIndex(operator.inputs)))
+        for tensor in operator.inputs:
+            results.append(self.Tensor(tensor, self.spacious_str + self.spacious_str))
+        results.append("{}Output Tensors{}".format(self.spacious_str,
+                                                   GetStringTensorIndex(
+                                                       operator.outputs)))
+        for tensor in operator.outputs:
+            results.append(self.Tensor(tensor, self.spacious_str + self.spacious_str))
+        # operator option
+        # Some operations does not have option. In such case no option is printed
+        if operator.options != None and operator.options != "":
+            results.append(self.Option(operator.options, self.spacious_str))
+        return "\n".join(results)
+
+    def Tensor(self, tensor, depth_str=""):
+        results = []
+        results.append("{}{}".format(depth_str, GetStringTensor(tensor)))
+        return "".join(results)
+
+    def Option(self, options_str, depth_str=""):
+        results = []
+        results.append("{}Options".format(depth_str))
+        results.append("{}{}{}".format(depth_str, self.spacious_str, options_str))
+        return "\n".join(results)
+
+    def Buffer(self, tensor, depth_str=""):
+        return "{}{}".format(depth_str, GetStringBuffer(tensor))
diff --git a/tools/tflitefile_tool/printer/subgraph_printer.py b/tools/tflitefile_tool/printer/subgraph_printer.py
new file mode 100755
index 000000000..51d8453ae
--- /dev/null
+++ b/tools/tflitefile_tool/printer/subgraph_printer.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir import graph_stats
+from .string_builder import StringBuilder
+
+
+class SubgraphPrinter(object):
+    def __init__(self, verbose, subg, spacious_str="  "):
+        self.verbose = verbose
+        self.subg = subg
+        self.spacious_str = spacious_str
+        self.print_all_tensor = True
+        self.print_tensor_index_list = None
+        self.print_all_operator = True
+        self.print_operator_index_list = None
+
+    def SetPrintSpecificTensors(self, tensor_indices):
+        if len(tensor_indices) != 0:
+            self.print_all_tensor = False
+            self.print_tensor_index_list = tensor_indices
+
+    def SetPrintSpecificOperators(self, operator_indices):
+        if len(operator_indices) != 0:
+            self.print_all_operator = False
+            self.print_operator_index_list = operator_indices
+
+    def PrintInfo(self):
+        if self.print_all_tensor == True and self.print_all_operator == True:
+            print("[" + self.subg.model_name + "]")
+            print('')
+            if self.verbose > 0:
+                self.PrintModelInfo()
+                print('')
+                self.PrintOperators()
+            if self.verbose == 2:
+                self.PrintBuffers()
+            self.PrintGraphStats()
+
+        if self.verbose == 0:
+            return
+
+        if self.print_all_tensor == False:
+            print('')
+            self.PrintSpecificTensors(self.print_tensor_index_list)
+            print('')
+
+        if self.print_all_operator == False:
+            print('')
+            self.PrintSpecificOperators(self.print_operator_index_list)
+            print('')
+
+    def PrintModelInfo(self):
+        model_inputs = []
+        for t in self.subg.inputs:
+            model_inputs.append(t.index)
+        model_outputs = []
+        for t in self.subg.outputs:
+            model_outputs.append(t.index)
+        print(self.subg.model_name + " input tensors: " + str(model_inputs))
+        self.PrintSpecificTensors(model_inputs, "    ")
+        print(self.subg.model_name + " output tensors: " + str(model_outputs))
+        self.PrintSpecificTensors(model_outputs, "    ")
+
+    def PrintOperators(self):
+        for index, operator in self.subg.operators_map.items():
+            info = StringBuilder(self.spacious_str).Operator(operator)
+            print(info)
+            print()
+
+    def PrintSpecificTensors(self, print_tensor_index_list, depth_str=""):
+        for index in print_tensor_index_list:
+            tensor = self.subg.tensors_map[index]
+            info = StringBuilder(self.spacious_str).Tensor(tensor, depth_str)
+            print(info)
+
+    def PrintSpecificOperators(self, print_operator_index_list):
+        for index in print_operator_index_list:
+            operator = self.subg.operators_map[index]
+            info = StringBuilder(self.spacious_str).Operator(operator)
+            print(info)
+
+    def PrintGraphStats(self):
+        stats = graph_stats.CalcGraphStats(self.subg)
+        info = StringBuilder(self.spacious_str).GraphStats(stats)
+        print(info)
+
+    def PrintBuffers(self):
+        for index, tensor in self.subg.tensors_map.items():
+            if tensor.buffer is not None:
+                info = StringBuilder(self.spacious_str).Buffer(tensor)
+                print(info)
+                print()
diff --git a/tools/tflitefile_tool/requirements.txt b/tools/tflitefile_tool/requirements.txt
index 9b4366ae5..2c75ce04d 100644
--- a/tools/tflitefile_tool/requirements.txt
+++ b/tools/tflitefile_tool/requirements.txt
@@ -1,2 +1,2 @@
-flatbuffers>=1.12
+flatbuffers==1.12
 numpy
diff --git a/tools/tflitefile_tool/saver/__init__.py b/tools/tflitefile_tool/saver/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/saver/__init__.py
diff --git a/tools/tflitefile_tool/saver/config_saver.py b/tools/tflitefile_tool/saver/config_saver.py
new file mode 100755
index 000000000..fa359693f
--- /dev/null
+++ b/tools/tflitefile_tool/saver/config_saver.py
@@ -0,0 +1,122 @@
+#!/usr/bin/python
+
+# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from printer.string_builder import GetStringShape
+
+
+# TODO: Revise it as minimized `write` methods by using `StringBuilder`
+class ConfigSaver(object):
+    def __init__(self, file_name, operator):
+        self.file_name = file_name
+        self.operator = operator
+        # Set self.verbose to 1 level to print more information
+        self.verbose = 1
+        self.op_idx = operator.index
+        self.op_name = operator.op_name
+        self.options = operator.tf_options
+
+        self.f = open(file_name, 'at')
+
+    def __del__(self):
+        self.f.close()
+
+    def SaveInfo(self):
+        self.f.write("[{}]\n".format(self.op_idx))
+        if (self.op_name == 'CONV_2D'):
+            self.SaveConv2DInputs()
+        else:
+            self.SaveInputs()
+
+        self.SaveOutputs()
+
+        self.SaveAttributes()
+
+        self.f.write('\n')
+
+    def SaveConv2DInputs(self):
+        if (len(self.operator.inputs) != 3):
+            raise AssertionError('Conv2D input count should be 3')
+
+        input = self.operator.inputs[0]
+        weight = self.operator.inputs[1]
+        bias = self.operator.inputs[2]
+
+        self.f.write("input: {}\n".format(GetStringShape(input)))
+        self.f.write("input_type: {}\n".format(input.type_name))
+        self.f.write("weights: {}\n".format(GetStringShape(weight)))
+        self.f.write("weights_type: {}\n".format(weight.type_name))
+        self.f.write("bias: {}\n".format(GetStringShape(bias)))
+        self.f.write("bias_type: {}\n".format(bias.type_name))
+
+    def SaveInputs(self):
+        total = len(self.operator.inputs)
+        self.f.write("input_counts: {}\n".format(total))
+        for idx in range(total):
+            tensor = self.operator.inputs[idx]
+            input_shape_str = GetStringShape(tensor)
+            self.f.write("input{}: {}\n".format(idx, input_shape_str))
+            self.f.write("input{}_type: {}\n".format(idx, tensor.type_name))
+
+    def SaveOutputs(self):
+        total = len(self.operator.outputs)
+        self.f.write("output_counts: {}\n".format(total))
+        for idx in range(total):
+            tensor = self.operator.outputs[idx]
+            output_shape_str = GetStringShape(tensor)
+            self.f.write("output{}: {}\n".format(idx, output_shape_str))
+            self.f.write("output{}_type: {}\n".format(idx, tensor.type_name))
+
+    def SaveFilter(self):
+        self.f.write("filter_w: {}\n".format(self.options.FilterWidth()))
+        self.f.write("filter_h: {}\n".format(self.options.FilterHeight()))
+
+    def SaveStride(self):
+        self.f.write("stride_w: {}\n".format(self.options.StrideW()))
+        self.f.write("stride_h: {}\n".format(self.options.StrideH()))
+
+    def SaveDilation(self):
+        self.f.write("dilation_w: {}\n".format(self.options.DilationWFactor()))
+        self.f.write("dilation_h: {}\n".format(self.options.DilationHFactor()))
+
+    def SavePadding(self):
+        if self.options.Padding() == 0:
+            self.f.write("padding: SAME\n")
+        elif self.options.Padding() == 1:
+            self.f.write("padding: VALID\n")
+
+    def SaveFusedAct(self):
+        if self.operator.activation is not "NONE":
+            self.f.write("fused_act: {}\n".format(self.operator.activation))
+
+    def SaveAttributes(self):
+        if self.op_name == 'AVERAGE_POOL_2D' or self.op_name == 'MAX_POOL_2D':
+            self.SaveFilter()
+            self.SaveStride()
+            self.SavePadding()
+        elif self.op_name == 'CONV_2D':
+            self.SaveStride()
+            self.SaveDilation()
+            self.SavePadding()
+        elif self.op_name == 'TRANSPOSE_CONV':
+            self.SaveStride()
+            self.SavePadding()
+        elif self.op_name == 'DEPTHWISE_CONV_2D':
+            self.SaveStride()
+            self.SaveDilation()
+            self.SavePadding()
+            self.f.write("depthmultiplier: {}\n".format(self.options.DepthMultiplier()))
+
+        self.SaveFusedAct()
diff --git a/tools/tflitefile_tool/saver/model_saver.py b/tools/tflitefile_tool/saver/model_saver.py
new file mode 100755
index 000000000..117ec76df
--- /dev/null
+++ b/tools/tflitefile_tool/saver/model_saver.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+
+# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .config_saver import ConfigSaver
+
+
+class ModelSaver(object):
+    def __init__(self, subg):
+        self.model_name = subg.model_name
+        self.subg = subg.subg
+
+    def SaveConfigInfo(self, prefix):
+        print("Save model configuration file")
+        for type_str, oper_list in self.subg.optypes_map.items():
+            if prefix:
+                file_name = "{}_{}_{}.config".format(prefix, self.model_name, type_str)
+            else:
+                file_name = "{}_{}.config".format(self.model_name, type_str)
+            print("{} file is generated".format(file_name))
+            with open(file_name, 'wt') as f:
+                f.write("# {}, Total count: {}\n\n".format(type_str, len(oper_list)))
+            for operator in oper_list:
+                ConfigSaver(file_name, operator).SaveInfo()
diff --git a/tools/tflitefile_tool/select_operator.py b/tools/tflitefile_tool/select_operator.py
index 863edea57..5ec2b8b2f 100755
--- a/tools/tflitefile_tool/select_operator.py
+++ b/tools/tflitefile_tool/select_operator.py
@@ -22,6 +22,20 @@ import tflite.Model
 import tflite.SubGraph
 import tflite.BuiltinOptions
 import argparse
+import pkg_resources
+import json
+
+
+# On flatbuffers 2.0, EndVector doesn't require length argument any more.
+# But flatbuffers under 2.0 (ex. 1.12) requires length argument.
+# We need this workaround until we abandon flatbuffers 1.12.
+# Reference: https://github.com/google/flatbuffers/issues/6858
+def EndVector(builder, len):
+    flat_version = pkg_resources.get_distribution('flatbuffers').version
+    if pkg_resources.parse_version(flat_version) < pkg_resources.parse_version("2.0"):
+        return builder.EndVector(len)
+    else:
+        return builder.EndVector()
 
 
 # Assume we use only main model in model file
@@ -135,7 +149,7 @@ def GenerateOperatorCodes(new_builder, sample_model, used_opcodes_dic,
     for operator_code_idx in reversed(range(new_operator_code_num)):
         new_builder.PrependUOffsetTRelative(new_operator_code_list[operator_code_idx])
 
-    return new_builder.EndVector(new_operator_code_num)
+    return EndVector(new_builder, new_operator_code_num)
 
 
 def GenerateQuantization(new_builder, selected_quantization):
@@ -146,7 +160,7 @@ def GenerateQuantization(new_builder, selected_quantization):
             new_builder, min_num)
         for min_idx in reversed(range(min_num)):
             new_builder.PrependFloat32(selected_quantization.Min(min_idx))
-        new_min = new_builder.EndVector(min_num)
+        new_min = EndVector(new_builder, min_num)
 
     # Create max vector
     max_num = selected_quantization.MaxLength()
@@ -155,7 +169,7 @@ def GenerateQuantization(new_builder, selected_quantization):
             new_builder, max_num)
         for max_idx in reversed(range(max_num)):
             new_builder.PrependFloat32(selected_quantization.Max(max_idx))
-        new_max = new_builder.EndVector(max_num)
+        new_max = EndVector(new_builder, max_num)
 
     # Create scale vector
     scale_num = selected_quantization.ScaleLength()
@@ -164,7 +178,7 @@ def GenerateQuantization(new_builder, selected_quantization):
             new_builder, scale_num)
         for scale_idx in reversed(range(scale_num)):
             new_builder.PrependFloat32(selected_quantization.Scale(scale_idx))
-        new_scale = new_builder.EndVector(scale_num)
+        new_scale = EndVector(new_builder, scale_num)
 
     # Create zero_point vector
     zeropoint_num = selected_quantization.ZeroPointLength()
@@ -173,7 +187,7 @@ def GenerateQuantization(new_builder, selected_quantization):
             new_builder, zeropoint_num)
         for zeropoint_idx in reversed(range(zeropoint_num)):
             new_builder.PrependInt64(selected_quantization.ZeroPoint(zeropoint_idx))
-        new_zeropoint = new_builder.EndVector(zeropoint_num)
+        new_zeropoint = EndVector(new_builder, zeropoint_num)
 
     # Create quantization
     tflite.QuantizationParameters.QuantizationParametersStart(new_builder)
@@ -188,6 +202,11 @@ def GenerateQuantization(new_builder, selected_quantization):
         tflite.QuantizationParameters.QuantizationParametersAddZeroPoint(
             new_builder, new_zeropoint)
 
+    quantized_dimension = selected_quantization.QuantizedDimension()
+    if quantized_dimension != 0:
+        tflite.QuantizationParameters.QuantizationParametersAddQuantizedDimension(
+            new_builder, quantized_dimension)
+
     return tflite.QuantizationParameters.QuantizationParametersEnd(new_builder)
 
 
@@ -199,7 +218,7 @@ def GenerateTensor(new_builder, selected_tensor, used_buffers_dic):
     if shape_num != 0:
         for shape_idx in reversed(range(shape_num)):
             new_builder.PrependInt32(selected_tensor.Shape(shape_idx))
-    new_shape = new_builder.EndVector(shape_num)
+    new_shape = EndVector(new_builder, shape_num)
 
     # Create tensor_type
     tensor_type = selected_tensor.Type()
@@ -218,15 +237,26 @@ def GenerateTensor(new_builder, selected_tensor, used_buffers_dic):
     if quantization != None:
         new_quantization = GenerateQuantization(new_builder, quantization)
 
+    # Create IsVariable
+    is_variable = selected_tensor.IsVariable()
+
+    # Create Sparsity
+    sparsity = selected_tensor.Sparsity()
+
     # Create tensor
     tflite.Tensor.TensorStart(new_builder)
     tflite.Tensor.TensorAddShape(new_builder, new_shape)
     tflite.Tensor.TensorAddType(new_builder, tensor_type)
-    tflite.Tensor.TensorAddBuffer(new_builder, new_buffer_idx)
+    if (new_buffer_idx != 0):
+        tflite.Tensor.TensorAddBuffer(new_builder, new_buffer_idx)
     if name_string != "":
         tflite.Tensor.TensorAddName(new_builder, new_name)
     if quantization != None:
         tflite.Tensor.TensorAddQuantization(new_builder, new_quantization)
+    tflite.Tensor.TensorAddIsVariable(new_builder, is_variable)
+
+    if sparsity != None:
+        tflite.Tensor.TensorAddSparsity(new_builder, sparsity)
 
     return tflite.Tensor.TensorEnd(new_builder)
 
@@ -252,7 +282,7 @@ def GenerateTensors(new_builder, selected_subgraph, used_tensors_dic, used_buffe
     for new_tensor in reversed(new_tensor_list):
         new_builder.PrependUOffsetTRelative(new_tensor)
 
-    return new_builder.EndVector(new_tensor_num)
+    return EndVector(new_builder, new_tensor_num)
 
 
 def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_type,
@@ -458,7 +488,7 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
             for new_shape_idx in reversed(range(shape_num)):
                 new_shape_val = reshape_option.NewShape(new_shape_idx)
                 new_builder.PrependInt32(new_shape_val)
-            new_shape = new_builder.EndVector(shape_num)
+            new_shape = EndVector(new_builder, shape_num)
 
         tflite.ReshapeOptions.ReshapeOptionsStart(new_builder)
         if shape_num != 0:
@@ -597,7 +627,7 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
             for squeeze_dims_idx in reversed(range(squeeze_dims_num)):
                 squeeze_dims_val = squeeze_option.SqueezeDims(squeeze_dims_idx)
                 new_builder.PrependInt32(squeeze_dims_val)
-            new_squeeze_dims = new_builder.EndVector(squeeze_dims_num)
+            new_squeeze_dims = EndVector(new_builder, squeeze_dims_num)
 
         tflite.SqueezeOptions.SqueezeOptionsStart(new_builder)
         if squeeze_dims_num != 0:
@@ -871,7 +901,6 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
     # FillOptions: not supported
     # BidirectionalSequenceLSTMOptions: not supported
     # BidirectionalSequenceRNNOptions: not supported
-    # UnidirectionalSequenceLSTMOptions: not supported
     # FloorModOptions: not supported
     # RangeOptions: not supported
     # ResizeNearestNeighborOptions: not supported
@@ -901,6 +930,31 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
         tflite.SquaredDifferenceOptions.SquaredDifferenceOptionsStart(new_builder)
         return tflite.SquaredDifferenceOptions.SquaredDifferenceOptionsEnd(new_builder)
 
+    # UnidirectionalSequenceLSTMOptions
+    import tflite.UnidirectionalSequenceLSTMOptions
+    if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions(
+    ).UnidirectionalSequenceLSTMOptions:
+
+        unidirectional_sequence_lstm_option = tflite.UnidirectionalSequenceLSTMOptions.UnidirectionalSequenceLSTMOptions(
+        )
+        unidirectional_sequence_lstm_option.Init(selected_builtin_option.Bytes,
+                                                 selected_builtin_option.Pos)
+
+        tflite.UnidirectionalSequenceLSTMOptions.UnidirectionalSequenceLSTMOptionsStart(
+            new_builder)
+        tflite.UnidirectionalSequenceLSTMOptions.UnidirectionalSequenceLSTMOptionsAddFusedActivationFunction(
+            new_builder, unidirectional_sequence_lstm_option.FusedActivationFunction())
+        tflite.UnidirectionalSequenceLSTMOptions.UnidirectionalSequenceLSTMOptionsAddCellClip(
+            new_builder, unidirectional_sequence_lstm_option.CellClip())
+        tflite.UnidirectionalSequenceLSTMOptions.UnidirectionalSequenceLSTMOptionsAddProjClip(
+            new_builder, unidirectional_sequence_lstm_option.ProjClip())
+        tflite.UnidirectionalSequenceLSTMOptions.UnidirectionalSequenceLSTMOptionsAddTimeMajor(
+            new_builder, unidirectional_sequence_lstm_option.TimeMajor())
+        tflite.UnidirectionalSequenceLSTMOptions.UnidirectionalSequenceLSTMOptionsAddAsymmetricQuantizeInputs(
+            new_builder, unidirectional_sequence_lstm_option.AsymmetricQuantizeInputs())
+        return tflite.UnidirectionalSequenceLSTMOptions.UnidirectionalSequenceLSTMOptionsEnd(
+            new_builder)
+
     # MirrorPadOptions: not supported
     # AbsOptions: not supported
     # SplitVOptions: not supported
@@ -957,7 +1011,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
             else:
                 new_input_tensor_idx = used_tensors_dic[input_tensor_idx]
             new_builder.PrependInt32(new_input_tensor_idx)
-        new_input = new_builder.EndVector(input_num)
+        new_input = EndVector(new_builder, input_num)
 
     # create output_vector
     output_num = selected_operator.OutputsLength()
@@ -967,7 +1021,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
             output_tensor_idx = selected_operator.Outputs(output_idx)
             new_output_tensor_idx = used_tensors_dic[output_tensor_idx]
             new_builder.PrependInt32(new_output_tensor_idx)
-        new_output = new_builder.EndVector(output_num)
+        new_output = EndVector(new_builder, output_num)
 
     # Create builtin_option
     builtin_option_type = selected_operator.BuiltinOptionsType()
@@ -982,7 +1036,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
         tflite.Operator.OperatorStartCustomOptionsVector(new_builder, custom_option_num)
         for custom_option_idx in reversed(range(custom_option_num)):
             new_builder.PrependUint8(selected_operator.CustomOptions(custom_option_idx))
-        new_custom_option = new_builder.EndVector(custom_option_num)
+        new_custom_option = EndVector(new_builder, custom_option_num)
 
     # Create custum option type
     custom_option_type = selected_operator.CustomOptionsFormat()
@@ -1027,7 +1081,7 @@ def GenerateOperators(new_builder, selected_subgraph, operator_list, used_tensor
     for new_operator in reversed(new_operator_list):
         new_builder.PrependUOffsetTRelative(new_operator)
 
-    return new_builder.EndVector(new_operator_num)
+    return EndVector(new_builder, new_operator_num)
 
 
 def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_tensor,
@@ -1045,7 +1099,7 @@ def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_te
         for input_tensor_idx in reversed(new_input_tensor):
             new_input_tensor_idx = used_tensors_dic[input_tensor_idx]
             new_builder.PrependInt32(new_input_tensor_idx)
-        new_inputs = new_builder.EndVector(new_input_tensor_num)
+        new_inputs = EndVector(new_builder, new_input_tensor_num)
 
     # Create output vector for subgraph table
     new_output_tensor_num = len(new_output_tensor)
@@ -1054,7 +1108,7 @@ def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_te
         for output_tensor_idx in reversed(new_output_tensor):
             new_output_tensor_idx = used_tensors_dic[output_tensor_idx]
             new_builder.PrependInt32(new_output_tensor_idx)
-        new_outputs = new_builder.EndVector(new_output_tensor_num)
+        new_outputs = EndVector(new_builder, new_output_tensor_num)
 
     # Operators
     operators = GenerateOperators(new_builder, selected_subgraph, operator_list,
@@ -1089,10 +1143,10 @@ def GenerateSubgraphs(args, new_builder, sample_model, operator_list, new_input_
     # The selected subgraph will be primary subgraph of the model to be created newly
     selected_subgraph = sample_model.Subgraphs(args.subgraph)
 
-    # k: old subg index, v: new subg index
+    # k: orginal subg index, v: new subg index
     # new subg index is sequential in used_subgraphs_dic
     for k, v in used_subgraphs_dic.items():
-        print("Append subgraphs, old index : ", k, ", new index : ", v)
+        print("Append subgraphs, orginal index : ", k, ", new index : ", v)
         if k == args.subgraph:
             assert v == 0
             new_subgraph = GenerateSubgraph(new_builder, selected_subgraph, operator_list,
@@ -1121,7 +1175,7 @@ def GenerateSubgraphs(args, new_builder, sample_model, operator_list, new_input_
     for subgraph_idx in reversed(range(new_subgraph_num)):
         new_builder.PrependUOffsetTRelative(new_subgraph_list[subgraph_idx])
 
-    return new_builder.EndVector(new_subgraph_num)
+    return EndVector(new_builder, new_subgraph_num)
 
 
 def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
@@ -1141,7 +1195,7 @@ def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
             tflite.Buffer.BufferStartDataVector(new_builder, buffer_length)
             for buffer_data_idx in reversed(range(buffer_length)):
                 new_builder.PrependUint8(buffer.Data(buffer_data_idx))
-            new_buffer = new_builder.EndVector(buffer_length)
+            new_buffer = EndVector(new_builder, buffer_length)
             new_buffer_data_list[buffer_idx] = new_buffer
 
     # Create tables of buffer
@@ -1165,7 +1219,7 @@ def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
     for new_buffer_idx in reversed(range(new_buffer_num)):
         new_builder.PrependUOffsetTRelative(new_buffer_list[new_buffer_idx])
 
-    return new_builder.EndVector(new_buffer_num)
+    return EndVector(new_builder, new_buffer_num)
 
 
 def GenerateModel(args, new_builder, sample_model, operator_list, new_input_tensors,
@@ -1200,6 +1254,55 @@ def GenerateModel(args, new_builder, sample_model, operator_list, new_input_tens
     return tflite.Model.ModelEnd(new_builder)
 
 
+def StoreIOInfo(path, used_tensors, org_inputs, org_outputs, new_inputs, new_outputs):
+    ioinfo = {}
+
+    # For inputs and outputs of org model
+    ioinfo["org-model-io"] = {
+        "inputs": {
+            "new-indices": []
+        },
+        "outputs": {
+            "new-indices": []
+        }
+    }
+    for input_tensor_idx in org_inputs:
+        if input_tensor_idx in used_tensors:
+            ioinfo["org-model-io"]["inputs"]["new-indices"].append(
+                used_tensors[input_tensor_idx])
+        else:
+            ioinfo["org-model-io"]["inputs"]["new-indices"].append(-1)
+    for output_tensor_idx in org_outputs:
+        if output_tensor_idx in used_tensors:
+            ioinfo["org-model-io"]["outputs"]["new-indices"].append(
+                used_tensors[output_tensor_idx])
+        else:
+            ioinfo["org-model-io"]["outputs"]["new-indices"].append(-1)
+
+    # For inputs and outputs of new model
+    ioinfo["new-model-io"] = {
+        "inputs": {
+            "org-indices": [],
+            "new-indices": []
+        },
+        "outputs": {
+            "org-indices": [],
+            "new-indices": []
+        }
+    }
+    for input_tensor_idx in new_inputs:
+        ioinfo["new-model-io"]["inputs"]["org-indices"].append(int(input_tensor_idx))
+        ioinfo["new-model-io"]["inputs"]["new-indices"].append(
+            used_tensors[input_tensor_idx])
+    for output_tensor_idx in new_outputs:
+        ioinfo["new-model-io"]["outputs"]["org-indices"].append(int(output_tensor_idx))
+        ioinfo["new-model-io"]["outputs"]["new-indices"].append(
+            used_tensors[output_tensor_idx])
+
+    with open(path, "w") as json_file:
+        json_file.write(f'{json.dumps(ioinfo, indent=2)}\n')
+
+
 def main(args):
     input_model_file = args.input_model
     oplist_file = args.opcode_list
@@ -1267,7 +1370,8 @@ def main(args):
     for used_tensor in used_tensors:
         # key and value is same in prepare phase
         buf_idx = (sample_subgraph.Tensors(used_tensor)).Buffer()
-        used_buffers.append(buf_idx)
+        if buf_idx != 0:
+            used_buffers.append(buf_idx)
 
     # Append buffers of tensors of child subgraphs
     for subgraph_idx in used_subgraphs_list:
@@ -1275,8 +1379,10 @@ def main(args):
             continue
         for tensor_idx in range(sample_model.Subgraphs(subgraph_idx).TensorsLength()):
             tensor = sample_model.Subgraphs(subgraph_idx).Tensors(tensor_idx)
-            used_buffers.append(tensor.Buffer())
+            if tensor.Buffer() != 0:
+                used_buffers.append(tensor.Buffer())
 
+    used_buffers = list(set(used_buffers))
     used_buffers.sort()
 
     # Assign new index for operator
@@ -1296,6 +1402,8 @@ def main(args):
     # Assign new index for buffer
     used_buffers_dic = {}
 
+    # Tensor has empty buffer if buffer index is 0.
+    used_buffers_dic[0] = 0
     for new_buffer_idx in range(len(used_buffers)):
         sample_buffer_idx = used_buffers[new_buffer_idx]
         used_buffers_dic[sample_buffer_idx] = new_buffer_idx
@@ -1315,7 +1423,7 @@ def main(args):
             if input_tensor_idx in new_input_tensors:
                 matched_buffer_idx = sample_subgraph.Tensors(input_tensor_idx).Buffer()
                 matched_buffer = sample_model.Buffers(matched_buffer_idx)
-                if matched_buffer.DataLength() != 0:
+                if matched_buffer_idx == 0 or matched_buffer.DataLength() != 0:
                     new_input_tensors.remove(input_tensor_idx)
 
         for output_idx in range(operator.OutputsLength()):
@@ -1351,6 +1459,12 @@ def main(args):
 
     output_model_file.write(new_buf)
 
+    org_inputs = sample_subgraph.InputsAsNumpy()
+    org_outputs = sample_subgraph.OutputsAsNumpy()
+    if args.store_io_info != "":
+        StoreIOInfo(args.store_io_info, used_tensors_dic, org_inputs, org_outputs,
+                    new_input_tensors, new_output_tensors)
+
 
 if __name__ == '__main__':
     # Define argument and read
@@ -1367,6 +1481,13 @@ if __name__ == '__main__':
         "output_model", type=argparse.FileType('wb'), help="output tflite model file")
     arg_parser.add_argument(
         '-g', '--subgraph', type=int, default=0, help="subgraph to use (default: 0)")
+    arg_parser.add_argument(
+        '-s',
+        '--store-io-info',
+        type=str,
+        required=False,
+        default="",
+        help="Path to io information to be stored")
 
     # TODO
     #   Select multiple subgraph
diff --git a/tools/tflitefile_tool/subgraph_printer.py b/tools/tflitefile_tool/subgraph_printer.py
deleted file mode 100755
index cce7ff53b..000000000
--- a/tools/tflitefile_tool/subgraph_printer.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_printer import OperatorPrinter
-from tensor_printer import TensorPrinter
-import graph_stats
-
-
-class SubgraphPrinter(object):
-    def __init__(self, verbose, op_parser, model_name):
-        self.verbose = verbose
-        self.op_parser = op_parser
-        self.model_name = model_name
-        self.print_all_tensor = True
-        self.print_tensor_index_list = None
-        self.print_all_operator = True
-        self.print_operator_index_list = None
-
-    def SetPrintSpecificTensors(self, tensor_indices):
-        if len(tensor_indices) != 0:
-            self.print_all_tensor = False
-            self.print_tensor_index_list = tensor_indices
-
-    def SetPrintSpecificOperators(self, operator_indices):
-        if len(operator_indices) != 0:
-            self.print_all_operator = False
-            self.print_operator_index_list = operator_indices
-
-    def PrintInfo(self):
-        if self.print_all_tensor == True and self.print_all_operator == True:
-            self.PrintModelInfo()
-            self.PrintAllOperatorsInList()
-            graph_stats.PrintGraphStats(
-                graph_stats.CalcGraphStats(self.op_parser), self.verbose)
-
-        if self.print_all_tensor == False:
-            print('')
-            self.PrintSpecificTensors(self.print_tensor_index_list)
-            print('')
-
-        if self.print_all_operator == False:
-            print('')
-            self.PrintSpecificOperators(self.print_operator_index_list)
-            print('')
-
-    def PrintModelInfo(self):
-        print("[" + self.model_name + "]\n")
-        if self.verbose > 0:
-            model_inputs = self.op_parser.tf_subgraph.InputsAsNumpy()
-            model_outputs = self.op_parser.tf_subgraph.OutputsAsNumpy()
-            print(self.model_name + " input tensors: " + str(model_inputs))
-            self.PrintSpecificTensors(model_inputs, "\t")
-            print(self.model_name + " output tensors: " + str(model_outputs))
-            self.PrintSpecificTensors(model_outputs, "\t")
-        print('')
-
-    def PrintAllOperatorsInList(self):
-        if (self.verbose < 1):
-            return
-
-        for operator in self.op_parser.operators_in_list:
-            printer = OperatorPrinter(self.verbose, operator)
-            printer.PrintInfo()
-            print('')
-
-        print('')
-
-    def PrintSpecificTensors(self, print_tensor_index_list, depth_str=""):
-        for tensor in self.op_parser.GetTensors(print_tensor_index_list):
-            printer = TensorPrinter(self.verbose, tensor)
-            printer.PrintInfo(depth_str)
-
-    def PrintSpecificOperators(self, print_operator_index_list):
-        for operator in self.op_parser.operators_in_list:
-            if operator.operator_idx in print_operator_index_list:
-                printer = OperatorPrinter(self.verbose, operator)
-                printer.PrintInfo()
diff --git a/tools/tflitefile_tool/tensor_printer.py b/tools/tflitefile_tool/tensor_printer.py
deleted file mode 100755
index 7c2904346..000000000
--- a/tools/tflitefile_tool/tensor_printer.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from tensor_wrapping import Tensor
-
-SYMBOLS = ['B', 'K', 'M', 'G', 'T']
-
-
-def ConvertBytesToHuman(n):
-    n = int(n)
-    if n < 0:
-        return 0
-
-    format_str = "%(val)3.1f%(symb)s"
-    prefix = {}
-    for i, s in enumerate(SYMBOLS[1:]):
-        prefix[s] = 1 << (i + 1) * 10
-
-    for symbol in reversed(SYMBOLS[1:]):
-        if n >= prefix[symbol]:
-            v = float(n) / prefix[symbol]
-            return format_str % dict(symb=symbol, val=v)
-
-    return format_str % dict(symb=SYMBOLS[0], val=n)
-
-
-class TensorPrinter(object):
-    def __init__(self, verbose, tensor):
-        self.verbose = verbose
-        self.tensor = tensor
-
-    def PrintInfo(self, depth_str=""):
-        if (self.verbose < 1):
-            pass
-
-        print_str = ""
-        if self.tensor.tensor_idx < 0:
-            print_str = "Tensor {0:4}".format(self.tensor.tensor_idx)
-        else:
-            buffer_idx = self.tensor.tf_tensor.Buffer()
-            isEmpty = "Filled"
-            if (self.tensor.tf_buffer.DataLength() == 0):
-                isEmpty = " Empty"
-            shape_str = self.GetShapeString()
-            type_name = self.tensor.type_name
-
-            shape_name = ""
-            if self.tensor.tf_tensor.Name() != 0:
-                shape_name = self.tensor.tf_tensor.Name()
-
-            memory_size = ConvertBytesToHuman(self.tensor.memory_size)
-
-            print_str = "Tensor {0:4} : buffer {1:4} | {2} | {3:7} | Memory {4:6} | Shape {5} ({6})".format(
-                self.tensor.tensor_idx, buffer_idx, isEmpty, type_name, memory_size,
-                shape_str, shape_name)
-        print(depth_str + print_str)
-
-    def GetShapeString(self):
-        if self.tensor.tf_tensor.ShapeLength() == 0:
-            return "Scalar"
-        return_string = "["
-        for shape_idx in range(self.tensor.tf_tensor.ShapeLength()):
-            if (shape_idx != 0):
-                return_string += ", "
-            # when shape signature is -1, that means unknown dim
-            if self.tensor.tf_tensor.ShapeSignature(shape_idx) != -1:
-                return_string += str(self.tensor.tf_tensor.Shape(shape_idx))
-            else:
-                return_string += "-1"
-        return_string += "]"
-        return return_string
diff --git a/tools/tflitefile_tool/tensor_wrapping.py b/tools/tflitefile_tool/tensor_wrapping.py
deleted file mode 100755
index 2a6dcaceb..000000000
--- a/tools/tflitefile_tool/tensor_wrapping.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Tensor
-import tflite.TensorType
-
-TensorTypeList = {}
-
-
-def SetTensorTypeStr():
-    tensorTypeObj = tflite.TensorType.TensorType()
-
-    for fieldName in dir(tensorTypeObj):
-        if (not fieldName.startswith('_')):
-            fieldValue = getattr(tensorTypeObj, fieldName)
-            if (isinstance(fieldValue, (int))):
-                TensorTypeList[fieldValue] = fieldName
-
-
-TYPES = {
-    'BOOL': 1,
-    'COMPLEX64': 8,
-    'FLOAT16': 2,
-    'FLOAT32': 4,
-    'INT16': 2,
-    'INT32': 4,
-    'INT64': 8,
-    'UINT8': 1
-}
-
-
-def GetTypeSize(type_name):
-    try:
-        return TYPES[type_name]
-
-    except KeyError as error:
-        return 0
-
-
-class Tensor(object):
-    def __init__(self, tensor_idx, tf_tensor, tf_buffer):
-        self.tensor_idx = tensor_idx
-        self.tf_tensor = tf_tensor
-        self.tf_buffer = tf_buffer
-
-        # optional input
-        if (self.tf_tensor != None):
-            self.type_name = TensorTypeList[self.tf_tensor.Type()]
-        else:
-            self.type_name = None
-
-        self.memory_size = self.GetMemorySize()
-
-    def GetMemorySize(self):
-        type_size = GetTypeSize(self.type_name)
-        if type_size == 0:
-            return 0
-
-        # memory size in bytes
-        size = int(type_size)
-        shape_length = self.tf_tensor.ShapeLength()
-        if shape_length == 0:
-            return size
-
-        for shape_idx in range(shape_length):
-            shape_size = int(self.tf_tensor.Shape(shape_idx))
-            size *= shape_size
-
-        return size
diff --git a/tools/tflitefile_tool/tests/README.md b/tools/tflitefile_tool/tests/README.md
new file mode 100644
index 000000000..0d1d70786
--- /dev/null
+++ b/tools/tflitefile_tool/tests/README.md
@@ -0,0 +1,36 @@
+# How to test
+
+## Prepare
+
+There is `add.tflite` in `ONE/nnpackage/examples/v1.0.0/add`.
+
+```
+ONE$ find ./nnpackage -name "add.tflite"
+./nnpackage/examples/v1.0.0/add/add.tflite
+```
+
+## Test
+
+```
+ONE/tools/tflitefile_tool$ python -m unittest discover
+
+----------------------------------------------------------------------
+Ran 1 tests in 0.000s
+
+OK
+```
+
+OR
+
+```
+ONE/tools/tflitefile_tool$ python ./tests/main.py
+
+----------------------------------------------------------------------
+Ran 1 tests in 0.000s
+
+OK
+```
+
+## Reference
+
+https://docs.python.org/3.6/library/unittest.html
diff --git a/tools/tflitefile_tool/tests/__init__.py b/tools/tflitefile_tool/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/tflitefile_tool/tests/__init__.py
diff --git a/tools/tflitefile_tool/tests/main.py b/tools/tflitefile_tool/tests/main.py
new file mode 100644
index 000000000..b9c7104bb
--- /dev/null
+++ b/tools/tflitefile_tool/tests/main.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+if __name__ == '__main__':
+    loader = unittest.TestLoader()
+    tests = loader.discover('.')
+    testRunner = unittest.runner.TextTestRunner()
+    testRunner.run(tests)
diff --git a/tools/tflitefile_tool/tests/test_operator.py b/tools/tflitefile_tool/tests/test_operator.py
new file mode 100644
index 000000000..7d6fbe859
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_operator.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.tensor import Tensor
+from ir.operator import Operator
+
+
+# Test the only getter/setter
+class OperatorTestCase(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_index(self):
+        op = Operator()
+        op.index = 1000
+        self.assertEqual(op.index, 1000)
+
+    def test_inputs(self):
+        op = Operator()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        op.inputs = [t0, t1]
+        self.assertEqual(len(op.inputs), 2)
+        self.assertEqual(op.inputs[0], t0)
+        self.assertEqual(op.inputs[1], t1)
+
+    def test_outputs(self):
+        op = Operator()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        op.outputs = [t0, t1]
+        self.assertEqual(len(op.outputs), 2)
+        self.assertEqual(op.outputs[0], t0)
+        self.assertEqual(op.outputs[1], t1)
+
+    def test_op_name(self):
+        op = Operator()
+        op.op_name = "ADD"
+        self.assertEqual(op.op_name, "ADD")
+
+    def test_activation(self):
+        op = Operator()
+        op.activation = "Tanh"
+        self.assertEqual(op.activation, "Tanh")
+
+    def test_options(self):
+        op = Operator()
+        op.options = "Options ..."
+        self.assertEqual(op.options, "Options ...")
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_setup.py b/tools/tflitefile_tool/tests/test_setup.py
new file mode 100644
index 000000000..f38a2d66a
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_setup.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+import unittest
+
+# Python doesn't have const var but handle these as const
+# It's meaning that DO NOT MODIFY these vars
+THIS_FILE_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_MODEL_DIR = os.path.join(THIS_FILE_DIR, "../../../nnpackage/examples/v1.0.0/add")
+TEST_MODEL_PATH = os.path.join(TEST_MODEL_DIR, "add.tflite")
+
+
+def Exist_TEST_MODEL_DIR(dir):
+    return os.path.exists(dir) and os.path.isdir(dir)
+
+
+def Exist_TEST_MODEL_FILE(file):
+    return os.path.exists(file) and os.path.isfile(file)
+
+
+class Setup(unittest.TestCase):
+    def test_Exist_TEST_MODEL_DIR(self):
+        model_dir = TEST_MODEL_DIR
+        self.assertTrue(Exist_TEST_MODEL_DIR(model_dir))
+
+    def test_Exist_TEST_MODEL_FILE(self):
+        model_file = TEST_MODEL_PATH
+        self.assertTrue(Exist_TEST_MODEL_FILE(model_file))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_string_builder.py b/tools/tflitefile_tool/tests/test_string_builder.py
new file mode 100644
index 000000000..97a580967
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_string_builder.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from printer.string_builder import *
+
+
+class StringBuilderTestCase(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_ConvertBytesToHuman(self):
+        SYMBOLS = ['B', 'K', 'M', 'G', 'T']
+        format_str = "%(val)3.1f%(symb)s"
+
+        bytes = -1
+        self.assertEqual(ConvertBytesToHuman(bytes), 0)
+
+        bytes = 1
+        self.assertEqual(
+            ConvertBytesToHuman(bytes), format_str % dict(symb=SYMBOLS[0], val=(bytes)))
+
+        bytes = 1024
+        self.assertEqual(
+            ConvertBytesToHuman(bytes),
+            format_str % dict(symb=SYMBOLS[1], val=(bytes / 1024)))
+
+        bytes = 1024**2
+        self.assertEqual(
+            ConvertBytesToHuman(bytes),
+            format_str % dict(symb=SYMBOLS[2], val=(bytes / (1024**2))))
+
+        bytes = 1024**3
+        self.assertEqual(
+            ConvertBytesToHuman(bytes),
+            format_str % dict(symb=SYMBOLS[3], val=(bytes / (1024**3))))
+
+        bytes = 1024**4
+        self.assertEqual(
+            ConvertBytesToHuman(bytes),
+            format_str % dict(symb=SYMBOLS[4], val=(bytes / (1024**4))))
+
+    # TODO: More tests
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_subgraph.py b/tools/tflitefile_tool/tests/test_subgraph.py
new file mode 100644
index 000000000..7930ed03c
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_subgraph.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+# Csubgyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a csubgy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.subgraph import Subgraph
+from ir.operator import Operator
+from ir.tensor import Tensor
+
+
+# Test the only getter/setter
+class SubgraphTestCase(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_index(self):
+        subg = Subgraph()
+        subg.index = 1000
+        self.assertEqual(subg.index, 1000)
+
+    def test_inputs(self):
+        subg = Subgraph()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        subg.inputs = [t0, t1]
+        self.assertEqual(len(subg.inputs), 2)
+        self.assertEqual(subg.inputs[0], t0)
+        self.assertEqual(subg.inputs[0].index, 0)
+        self.assertEqual(subg.inputs[1], t1)
+        self.assertEqual(subg.inputs[1].index, 1)
+
+    def test_outputs(self):
+        subg = Subgraph()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        subg.outputs = [t0, t1]
+        self.assertEqual(len(subg.outputs), 2)
+        self.assertEqual(subg.outputs[0], t0)
+        self.assertEqual(subg.outputs[0].index, 0)
+        self.assertEqual(subg.outputs[1], t1)
+        self.assertEqual(subg.outputs[1].index, 1)
+
+    def test_subg_name(self):
+        subg = Subgraph()
+        subg.subg_name = "SUBGRAPH_0"
+        self.assertEqual(subg.subg_name, "SUBGRAPH_0")
+
+    def test_model_name(self):
+        subg = Subgraph()
+        subg.model_name = "SUBGRAPH_0"
+        self.assertEqual(subg.model_name, "SUBGRAPH_0")
+
+    def test_tensors_map(self):
+        subg = Subgraph()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        subg.tensors_map[t0.index] = t0
+        subg.tensors_map[t1.index] = t1
+        self.assertEqual(len(subg.tensors_map.keys()), 2)
+        self.assertEqual(subg.tensors_map[t0.index], t0)
+        self.assertEqual(subg.tensors_map[t1.index], t1)
+
+    def test_operators_map(self):
+        subg = Subgraph()
+        op0 = Operator()
+        op0.index = 0
+        op0.op_name = "ADD"
+        op1 = Operator()
+        op1.index = 1
+        op1.op_name = "SUB"
+        subg.operators_map[op0.index] = op0
+        subg.operators_map[op1.index] = op1
+        self.assertEqual(len(subg.operators_map.keys()), 2)
+        self.assertEqual(subg.operators_map[op0.index], op0)
+        self.assertEqual(subg.operators_map[op1.index], op1)
+
+    def test_optypes_map(self):
+        subg = Subgraph()
+        op0 = Operator()
+        op0.index = 0
+        op0.op_name = "ADD"
+        op1 = Operator()
+        op1.index = 1
+        op1.op_name = "SUB"
+        op2 = Operator()
+        op2.index = 2
+        op2.op_name = "SUB"
+
+        subg.optypes_map[op0.op_name] = op0
+        subg.optypes_map[op1.op_name] = op1
+        subg.optypes_map[op2.op_name] = op2
+
+        self.assertEqual(len(subg.optypes_map.keys()), 2)
+        self.assertEqual(len(subg.optypes_map[op0.op_name]), 1)
+        self.assertEqual(len(subg.optypes_map[op2.op_name]), 2)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_tensor.py b/tools/tflitefile_tool/tests/test_tensor.py
new file mode 100644
index 000000000..200f49557
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_tensor.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.tensor import Tensor
+
+
+# Test the only getter/setter
+class TensorTestCase(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_index(self):
+        t = Tensor()
+        t.index = 1000
+        self.assertEqual(t.index, 1000)
+
+    def test_tensor_name(self):
+        t = Tensor()
+        t.tensor_name = "input"
+        self.assertEqual(t.tensor_name, "input")
+
+    def test_buffer(self):
+        t = Tensor()
+        o = object()
+        t.buffer = o
+        self.assertEqual(t.buffer, o)
+
+    def test_buffer_index(self):
+        t = Tensor()
+        t.buffer_index = 1000
+        self.assertEqual(t.buffer_index, 1000)
+
+    def test_type_name(self):
+        t = Tensor()
+        t.type_name = "FLOAT32"
+        self.assertEqual(t.type_name, "FLOAT32")
+
+    def test_shape(self):
+        t = Tensor()
+        t.shape = [1, 2, 3, 4]
+        self.assertEqual(t.shape, [1, 2, 3, 4])
+
+    def test_memory_size(self):
+        t = Tensor()
+        t.memory_size = 1000
+        self.assertEqual(t.memory_size, 1000)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_tflite_parser.py b/tools/tflitefile_tool/tests/test_tflite_parser.py
new file mode 100644
index 000000000..dd1447a8a
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_tflite_parser.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import tflite.Model
+from parser.tflite.tflite_parser import TFLiteParser, TFLiteSubgraphParser
+from .test_setup import TEST_MODEL_PATH
+
+
+class TFLiteSubgraphParserTestCase(unittest.TestCase):
+    def setUp(self):
+        self.model_file = open(TEST_MODEL_PATH, 'rb')
+
+    def tearDown(self):
+        self.model_file.close()
+
+    def test_Parse(self):
+        buf = bytearray(self.model_file.read())
+        tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+        for subgraph_index in range(tf_model.SubgraphsLength()):
+            tf_subgraph = tf_model.Subgraphs(subgraph_index)
+            subg_parser = TFLiteSubgraphParser(tf_model, subgraph_index)
+            subg = subg_parser.Parse()
+            self.assertEqual(subg.index, subgraph_index)
+            self.assertEqual(len(subg.inputs), tf_subgraph.InputsLength())
+            self.assertEqual(len(subg.outputs), tf_subgraph.OutputsLength())
+            # if there is optional tensors, this assert could be wrong
+            self.assertEqual(len(subg.tensors_map.keys()), tf_subgraph.TensorsLength())
+            self.assertEqual(
+                len(subg.operators_map.keys()), tf_subgraph.OperatorsLength())
+            # because TEST_MODEL_PATH has an op(ADD)
+            self.assertEqual(len(subg.optypes_map.keys()), tf_subgraph.OperatorsLength())
+
+
+class TFLiteParserTestCase(unittest.TestCase):
+    def setUp(self):
+        self.model_file = open(TEST_MODEL_PATH, 'rb')
+        self.parser = TFLiteParser(self.model_file)
+
+    def tearDown(self):
+        self.model_file.close()
+
+    def test_Parse(self):
+        subg_list = self.parser.Parse()
+        self.assertIsNotNone(subg_list)
+        self.assertEqual(len(subg_list), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()